diff --git a/fluid/neural_machine_translation/transformer_nist_base/config.py b/fluid/neural_machine_translation/transformer_nist_base/config.py new file mode 100644 index 0000000000000000000000000000000000000000..cd7e6d5454bd47270805d7804a88dc5106b7af60 --- /dev/null +++ b/fluid/neural_machine_translation/transformer_nist_base/config.py @@ -0,0 +1,120 @@ +class TrainTaskConfig(object): + use_gpu = True + # the epoch number to train. + pass_num = 50 + + # the number of sequences contained in a mini-batch. + batch_size = 56 + + # the hyper parameters for Adam optimizer. + learning_rate = 0.001 + beta1 = 0.9 + beta2 = 0.98 + eps = 1e-9 + + # the parameters for learning rate scheduling. + warmup_steps = 15000 + + # the flag indicating to use average loss or sum loss when training. + use_avg_cost = False + + # the directory for saving trained models. + model_dir = "trained_models" + + +class InferTaskConfig(object): + use_gpu = True + # the number of examples in one run for sequence generation. + batch_size = 1 + + # the parameters for beam search. + beam_size = 5 + max_length = 30 + # the number of decoded sentences to output. + n_best = 1 + + # the flags indicating whether to output the special tokens. + output_bos = False + output_eos = False + output_unk = False + + # the directory for loading the trained model. + model_path = "trained_models/pass_20.infer.model" + + +class ModelHyperParams(object): + # This model directly uses paddle.dataset.wmt16 in which , and + # token has alreay been added. As for the token, any token + # included in dict can be used to pad, since the paddings' loss will be + # masked out and make no effect on parameter gradients. + + # size of source word dictionary. + src_vocab_size = 30001 + + # size of target word dictionay + trg_vocab_size = 30001 + + # index for token + bos_idx = 0 + # index for token + eos_idx = 1 + # index for token + unk_idx = 2 + + # max length of sequences. + # The size of position encoding table should at least plus 1, since the + # sinusoid position encoding starts from 1 and 0 can be used as the padding + # token for position encoding. + max_length = 150 + + # the dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + + d_model = 512 + # size of the hidden layer in position-wise feed-forward networks. + d_inner_hid = 2048 + # the dimension that keys are projected to for dot-product attention. + d_key = 64 + # the dimension that values are projected to for dot-product attention. + d_value = 64 + # number of head used in multi-head attention. + n_head = 8 + # number of sub-layers to be stacked in the encoder and decoder. + n_layer = 6 + # dropout rate used by all dropout layers. + dropout = 0.1 + + +# Names of position encoding table which will be initialized externally. +pos_enc_param_names = ( + "src_pos_enc_table", + "trg_pos_enc_table", ) + +# Names of all data layers in encoder listed in order. +encoder_input_data_names = ( + "src_word", + "src_pos", + "src_slf_attn_bias", + "src_data_shape", + "src_slf_attn_pre_softmax_shape", + "src_slf_attn_post_softmax_shape", ) + +# Names of all data layers in decoder listed in order. +decoder_input_data_names = ( + "trg_word", + "trg_pos", + "trg_slf_attn_bias", + "trg_src_attn_bias", + "trg_data_shape", + "trg_slf_attn_pre_softmax_shape", + "trg_slf_attn_post_softmax_shape", + "trg_src_attn_pre_softmax_shape", + "trg_src_attn_post_softmax_shape", + "enc_output", ) + +# Names of label related data layers listed in order. +label_data_names = ( + "lbl_word", + "lbl_weight", ) + diff --git a/fluid/neural_machine_translation/transformer_nist_base/infer.py b/fluid/neural_machine_translation/transformer_nist_base/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..b35a74a552f9daec31704aba19ebb53d0cc4d591 --- /dev/null +++ b/fluid/neural_machine_translation/transformer_nist_base/infer.py @@ -0,0 +1,366 @@ +import numpy as np + +import paddle +import paddle.fluid as fluid + +import model +from model import wrap_encoder as encoder +from model import wrap_decoder as decoder +from config import InferTaskConfig, ModelHyperParams, \ + encoder_input_data_names, decoder_input_data_names +from train import pad_batch_data +import nist_data_provider + + +def translate_batch(exe, + src_words, + encoder, + enc_in_names, + enc_out_names, + decoder, + dec_in_names, + dec_out_names, + beam_size, + max_length, + n_best, + batch_size, + n_head, + d_model, + src_pad_idx, + trg_pad_idx, + bos_idx, + eos_idx, + unk_idx, + output_unk=True): + """ + Run the encoder program once and run the decoder program multiple times to + implement beam search externally. + """ + # Prepare data for encoder and run the encoder. + enc_in_data = pad_batch_data( + src_words, + src_pad_idx, + n_head, + is_target=False, + is_label=False, + return_attn_bias=True, + return_max_len=False) + # Append the data shape input to reshape the output of embedding layer. + enc_in_data = enc_in_data + [ + np.array( + [-1, enc_in_data[2].shape[-1], d_model], dtype="int32") + ] + # Append the shape inputs to reshape before and after softmax in encoder + # self attention. + enc_in_data = enc_in_data + [ + np.array( + [-1, enc_in_data[2].shape[-1]], dtype="int32"), np.array( + enc_in_data[2].shape, dtype="int32") + ] + enc_output = exe.run(encoder, + feed=dict(zip(enc_in_names, enc_in_data)), + fetch_list=enc_out_names)[0] + + # Beam Search. + # To store the beam info. + scores = np.zeros((batch_size, beam_size), dtype="float32") + prev_branchs = [[] for i in range(batch_size)] + next_ids = [[] for i in range(batch_size)] + # Use beam_inst_map to map beam idx to the instance idx in batch, since the + # size of feeded batch is changing. + beam_inst_map = { + beam_idx: inst_idx + for inst_idx, beam_idx in enumerate(range(batch_size)) + } + # Use active_beams to recode the alive. + active_beams = range(batch_size) + + def beam_backtrace(prev_branchs, next_ids, n_best=beam_size): + """ + Decode and select n_best sequences for one instance by backtrace. + """ + seqs = [] + for i in range(n_best): + k = i + seq = [] + for j in range(len(prev_branchs) - 1, -1, -1): + seq.append(next_ids[j][k]) + k = prev_branchs[j][k] + seq = seq[::-1] + # Add the , since next_ids don't include the . + seq = [bos_idx] + seq + seqs.append(seq) + return seqs + + def init_dec_in_data(batch_size, beam_size, enc_in_data, enc_output): + """ + Initialize the input data for decoder. + """ + trg_words = np.array( + [[bos_idx]] * batch_size * beam_size, dtype="int64") + trg_pos = np.array([[1]] * batch_size * beam_size, dtype="int64") + src_max_length, src_slf_attn_bias, trg_max_len = enc_in_data[2].shape[ + -1], enc_in_data[2], 1 + # This is used to remove attention on subsequent words. + trg_slf_attn_bias = np.ones((batch_size * beam_size, trg_max_len, + trg_max_len)) + trg_slf_attn_bias = np.triu(trg_slf_attn_bias, 1).reshape( + [-1, 1, trg_max_len, trg_max_len]) + trg_slf_attn_bias = (np.tile(trg_slf_attn_bias, [1, n_head, 1, 1]) * + [-1e9]).astype("float32") + # This is used to remove attention on the paddings of source sequences. + trg_src_attn_bias = np.tile( + src_slf_attn_bias[:, :, ::src_max_length, :][:, np.newaxis], + [1, beam_size, 1, trg_max_len, 1]).reshape([ + -1, src_slf_attn_bias.shape[1], trg_max_len, + src_slf_attn_bias.shape[-1] + ]) + # Append the shape input to reshape the output of embedding layer. + trg_data_shape = np.array( + [batch_size * beam_size, trg_max_len, d_model], dtype="int32") + # Append the shape inputs to reshape before and after softmax in + # decoder self attention. + trg_slf_attn_pre_softmax_shape = np.array( + [-1, trg_slf_attn_bias.shape[-1]], dtype="int32") + trg_slf_attn_post_softmax_shape = np.array( + trg_slf_attn_bias.shape, dtype="int32") + # Append the shape inputs to reshape before and after softmax in + # encoder-decoder attention. + trg_src_attn_pre_softmax_shape = np.array( + [-1, trg_src_attn_bias.shape[-1]], dtype="int32") + trg_src_attn_post_softmax_shape = np.array( + trg_src_attn_bias.shape, dtype="int32") + enc_output = np.tile( + enc_output[:, np.newaxis], [1, beam_size, 1, 1]).reshape( + [-1, enc_output.shape[-2], enc_output.shape[-1]]) + return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + trg_data_shape, trg_slf_attn_pre_softmax_shape, \ + trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \ + trg_src_attn_post_softmax_shape, enc_output + + def update_dec_in_data(dec_in_data, next_ids, active_beams, beam_inst_map): + """ + Update the input data of decoder mainly by slicing from the previous + input data and dropping the finished instance beams. + """ + trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + trg_data_shape, trg_slf_attn_pre_softmax_shape, \ + trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \ + trg_src_attn_post_softmax_shape, enc_output = dec_in_data + trg_cur_len = trg_slf_attn_bias.shape[-1] + 1 + trg_words = np.array( + [ + beam_backtrace(prev_branchs[beam_idx], next_ids[beam_idx]) + for beam_idx in active_beams + ], + dtype="int64") + trg_words = trg_words.reshape([-1, 1]) + trg_pos = np.array( + [range(1, trg_cur_len + 1)] * len(active_beams) * beam_size, + dtype="int64").reshape([-1, 1]) + active_beams = [beam_inst_map[beam_idx] for beam_idx in active_beams] + active_beams_indice = ( + (np.array(active_beams) * beam_size)[:, np.newaxis] + + np.array(range(beam_size))[np.newaxis, :]).flatten() + # This is used to remove attention on subsequent words. + trg_slf_attn_bias = np.ones((len(active_beams) * beam_size, trg_cur_len, + trg_cur_len)) + trg_slf_attn_bias = np.triu(trg_slf_attn_bias, 1).reshape( + [-1, 1, trg_cur_len, trg_cur_len]) + trg_slf_attn_bias = (np.tile(trg_slf_attn_bias, [1, n_head, 1, 1]) * + [-1e9]).astype("float32") + # This is used to remove attention on the paddings of source sequences. + trg_src_attn_bias = np.tile(trg_src_attn_bias[ + active_beams_indice, :, ::trg_src_attn_bias.shape[2], :], + [1, 1, trg_cur_len, 1]) + # Append the shape input to reshape the output of embedding layer. + trg_data_shape = np.array( + [len(active_beams) * beam_size, trg_cur_len, d_model], + dtype="int32") + # Append the shape inputs to reshape before and after softmax in + # decoder self attention. + trg_slf_attn_pre_softmax_shape = np.array( + [-1, trg_slf_attn_bias.shape[-1]], dtype="int32") + trg_slf_attn_post_softmax_shape = np.array( + trg_slf_attn_bias.shape, dtype="int32") + # Append the shape inputs to reshape before and after softmax in + # encoder-decoder attention. + trg_src_attn_pre_softmax_shape = np.array( + [-1, trg_src_attn_bias.shape[-1]], dtype="int32") + trg_src_attn_post_softmax_shape = np.array( + trg_src_attn_bias.shape, dtype="int32") + enc_output = enc_output[active_beams_indice, :, :] + return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + trg_data_shape, trg_slf_attn_pre_softmax_shape, \ + trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \ + trg_src_attn_post_softmax_shape, enc_output + + dec_in_data = init_dec_in_data(batch_size, beam_size, enc_in_data, + enc_output) + for i in range(max_length): + predict_all = exe.run(decoder, + feed=dict(zip(dec_in_names, dec_in_data)), + fetch_list=dec_out_names)[0] + predict_all = np.log( + predict_all.reshape([len(beam_inst_map) * beam_size, i + 1, -1]) + [:, -1, :]) + predict_all = (predict_all + scores[active_beams].reshape( + [len(beam_inst_map) * beam_size, -1])).reshape( + [len(beam_inst_map), beam_size, -1]) + if not output_unk: # To exclude the token. + predict_all[:, :, unk_idx] = -1e9 + active_beams = [] + for beam_idx in range(batch_size): + if not beam_inst_map.has_key(beam_idx): + continue + inst_idx = beam_inst_map[beam_idx] + predict = (predict_all[inst_idx, :, :] + if i != 0 else predict_all[inst_idx, 0, :]).flatten() + top_k_indice = np.argpartition(predict, -beam_size)[-beam_size:] + top_scores_ids = top_k_indice[np.argsort(predict[top_k_indice])[:: + -1]] + top_scores = predict[top_scores_ids] + scores[beam_idx] = top_scores + prev_branchs[beam_idx].append(top_scores_ids / + predict_all.shape[-1]) + next_ids[beam_idx].append(top_scores_ids % predict_all.shape[-1]) + if next_ids[beam_idx][-1][0] != eos_idx: + active_beams.append(beam_idx) + if len(active_beams) == 0: + break + dec_in_data = update_dec_in_data(dec_in_data, next_ids, active_beams, + beam_inst_map) + beam_inst_map = { + beam_idx: inst_idx + for inst_idx, beam_idx in enumerate(active_beams) + } + + # Decode beams and select n_best sequences for each instance by backtrace. + seqs = [ + beam_backtrace(prev_branchs[beam_idx], next_ids[beam_idx], n_best) + for beam_idx in range(batch_size) + ] + + return seqs, scores[:, :n_best].tolist() + + +def main(): + place = fluid.CUDAPlace(0) if InferTaskConfig.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + encoder_program = fluid.Program() + with fluid.program_guard(main_program=encoder_program): + enc_output = encoder( + ModelHyperParams.src_vocab_size, ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, ModelHyperParams.n_head, + ModelHyperParams.d_key, ModelHyperParams.d_value, + ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, + ModelHyperParams.dropout) + + decoder_program = fluid.Program() + with fluid.program_guard(main_program=decoder_program): + predict = decoder( + ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, ModelHyperParams.n_head, + ModelHyperParams.d_key, ModelHyperParams.d_value, + ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, + ModelHyperParams.dropout) + + # Load model parameters of encoder and decoder separately from the saved + # transformer model. + encoder_var_names = [] + for op in encoder_program.block(0).ops: + encoder_var_names += op.input_arg_names + encoder_param_names = filter( + lambda var_name: isinstance(encoder_program.block(0).var(var_name), + fluid.framework.Parameter), + encoder_var_names) + encoder_params = map(encoder_program.block(0).var, encoder_param_names) + decoder_var_names = [] + for op in decoder_program.block(0).ops: + decoder_var_names += op.input_arg_names + decoder_param_names = filter( + lambda var_name: isinstance(decoder_program.block(0).var(var_name), + fluid.framework.Parameter), + decoder_var_names) + decoder_params = map(decoder_program.block(0).var, decoder_param_names) + fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=encoder_params) + fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=decoder_params) + + # This is used here to set dropout to the test mode. + encoder_program = fluid.io.get_inference_program( + target_vars=[enc_output], main_program=encoder_program) + decoder_program = fluid.io.get_inference_program( + target_vars=[predict], main_program=decoder_program) + + '''test_data = paddle.batch( + paddle.dataset.wmt16.test(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=InferTaskConfig.batch_size) + + trg_idx2word = paddle.dataset.wmt16.get_dict( + "de", dict_size=ModelHyperParams.trg_vocab_size, reverse=True)''' + test_data = paddle.batch( + nist_data_provider.test("nist06n.test", ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=InferTaskConfig.batch_size) + + trg_idx2word = nist_data_provider.get_dict( + "data", + dict_size=ModelHyperParams.trg_vocab_size, + lang="en", + reverse=True) + + def post_process_seq(seq, + bos_idx=ModelHyperParams.bos_idx, + eos_idx=ModelHyperParams.eos_idx, + output_bos=InferTaskConfig.output_bos, + output_eos=InferTaskConfig.output_eos): + """ + Post-process the beam-search decoded sequence. Truncate from the first + and remove the and tokens currently. + """ + eos_pos = len(seq) - 1 + for i, idx in enumerate(seq): + if idx == eos_idx: + eos_pos = i + break + seq = seq[:eos_pos + 1] + return filter( + lambda idx: (output_bos or idx != bos_idx) and \ + (output_eos or idx != eos_idx), + seq) + + for batch_id, data in enumerate(test_data()): + batch_seqs, batch_scores = translate_batch( + exe, + [item[0] for item in data], + encoder_program, + encoder_input_data_names, + [enc_output.name], + decoder_program, + decoder_input_data_names, + [predict.name], + InferTaskConfig.beam_size, + InferTaskConfig.max_length, + InferTaskConfig.n_best, + len(data), + ModelHyperParams.n_head, + ModelHyperParams.d_model, + ModelHyperParams.eos_idx, # Use eos_idx to pad. + ModelHyperParams.eos_idx, # Use eos_idx to pad. + ModelHyperParams.bos_idx, + ModelHyperParams.eos_idx, + ModelHyperParams.unk_idx, + output_unk=InferTaskConfig.output_unk) + for i in range(len(batch_seqs)): + # Post-process the beam-search decoded sequences. + seqs = map(post_process_seq, batch_seqs[i]) + scores = batch_scores[i] + for seq in seqs: + print(" ".join([trg_idx2word[idx] for idx in seq])) + + +if __name__ == "__main__": + main() + diff --git a/fluid/neural_machine_translation/transformer_nist_base/model.py b/fluid/neural_machine_translation/transformer_nist_base/model.py new file mode 100644 index 0000000000000000000000000000000000000000..de7d03f7c892aa827886c9006a70704a695c1307 --- /dev/null +++ b/fluid/neural_machine_translation/transformer_nist_base/model.py @@ -0,0 +1,719 @@ +from functools import partial +import numpy as np + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +from config import TrainTaskConfig, pos_enc_param_names, \ + encoder_input_data_names, decoder_input_data_names, label_data_names + + +def position_encoding_init(n_position, d_pos_vec): + """ + Generate the initial values for the sinusoid position encoding table. + """ + position_enc = np.array([[ + pos / np.power(10000, 2 * (j // 2) / d_pos_vec) + for j in range(d_pos_vec) + ] if pos != 0 else np.zeros(d_pos_vec) for pos in range(n_position)]) + position_enc[1:, 0::2] = np.sin(position_enc[1:, 0::2]) # dim 2i + position_enc[1:, 1::2] = np.cos(position_enc[1:, 1::2]) # dim 2i+1 + return position_enc.astype("float32") + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + pre_softmax_shape=None, + post_softmax_shape=None): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError( + "Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc(input=queries, + size=d_key * n_head, + param_attr=fluid.initializer.Xavier( + uniform=False, + fan_in=d_model * d_key, + fan_out=n_head * d_key), + bias_attr=False, + num_flatten_dims=2) + k = layers.fc(input=keys, + size=d_key * n_head, + param_attr=fluid.initializer.Xavier( + uniform=False, + fan_in=d_model * d_key, + fan_out=n_head * d_key), + bias_attr=False, + num_flatten_dims=2) + v = layers.fc(input=values, + size=d_value * n_head, + param_attr=fluid.initializer.Xavier( + uniform=False, + fan_in=d_model * d_value, + fan_out=n_head * d_value), + bias_attr=False, + num_flatten_dims=2) + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + if n_head == 1: + return x + + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape( + x=x, shape=[0, -1, n_head, hidden_size // n_head]) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape( + x=trans_x, + shape=map(int, [0, -1, trans_x.shape[2] * trans_x.shape[3]])) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_model, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_model**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + weights = layers.reshape( + x=layers.elementwise_add( + x=product, y=attn_bias) if attn_bias else product, + shape=[-1, product.shape[-1]], + actual_shape=pre_softmax_shape, + act="softmax") + weights = layers.reshape( + x=weights, shape=product.shape, actual_shape=post_softmax_shape) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_model, + dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc(input=out, + size=d_model, + param_attr=fluid.initializer.Xavier(uniform=False), + bias_attr=False, + num_flatten_dims=2) + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc(input=x, + size=d_inner_hid, + num_flatten_dims=2, + param_attr=fluid.initializer.Uniform( + low=-(d_hid**-0.5), high=(d_hid**-0.5)), + act="relu") + out = layers.fc(input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.initializer.Uniform( + low=-(d_inner_hid**-0.5), high=(d_inner_hid**-0.5))) + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.initializer.Constant(1.), + bias_attr=fluid.initializer.Constant(0.)) + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def prepare_encoder(src_word, + src_pos, + src_vocab_size, + src_emb_dim, + src_max_len, + dropout_rate=0., + src_data_shape=None, + pos_enc_param_name=None): + """Add word embeddings and position encodings. + The output tensor has a shape of: + [batch_size, max_src_length_in_batch, d_model]. + + This module is used at the bottom of the encoder stacks. + """ + src_word_emb = layers.embedding( + src_word, + size=[src_vocab_size, src_emb_dim], + param_attr=fluid.initializer.Normal(0., 1.)) + src_pos_enc = layers.embedding( + src_pos, + size=[src_max_len, src_emb_dim], + param_attr=fluid.ParamAttr( + name=pos_enc_param_name, trainable=False)) + enc_input = src_word_emb + src_pos_enc + enc_input = layers.reshape( + x=enc_input, + shape=[-1, src_max_len, src_emb_dim], + actual_shape=src_data_shape) + return layers.dropout( + enc_input, dropout_prob=dropout_rate, + is_test=False) if dropout_rate else enc_input + + +prepare_encoder = partial( + prepare_encoder, pos_enc_param_name=pos_enc_param_names[0]) +prepare_decoder = partial( + prepare_encoder, pos_enc_param_name=pos_enc_param_names[1]) + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0., + pre_softmax_shape=None, + post_softmax_shape=None): + """The encoder layers that can be stacked to form a deep encoder. + + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + enc_input, enc_input, enc_input, attn_bias, d_key, d_value, d_model, + n_head, dropout_rate, pre_softmax_shape, post_softmax_shape) + attn_output = post_process_layer(enc_input, attn_output, "dan", + dropout_rate) + ffd_output = positionwise_feed_forward(attn_output, d_inner_hid, d_model) + return post_process_layer(attn_output, ffd_output, "dan", dropout_rate) + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0., + pre_softmax_shape=None, + post_softmax_shape=None): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + pre_softmax_shape, + post_softmax_shape, ) + enc_input = enc_output + return enc_output + + +def decoder_layer(dec_input, + enc_output, + slf_attn_bias, + dec_enc_attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0., + slf_attn_pre_softmax_shape=None, + slf_attn_post_softmax_shape=None, + src_attn_pre_softmax_shape=None, + src_attn_post_softmax_shape=None): + """ The layer to be stacked in decoder part. + + The structure of this module is similar to that in the encoder part except + a multi-head attention is added to implement encoder-decoder attention. + """ + slf_attn_output = multi_head_attention( + dec_input, + dec_input, + dec_input, + slf_attn_bias, + d_key, + d_value, + d_model, + n_head, + dropout_rate, + slf_attn_pre_softmax_shape, + slf_attn_post_softmax_shape, ) + slf_attn_output = post_process_layer( + dec_input, + slf_attn_output, + "dan", # residual connection + dropout + layer normalization + dropout_rate, ) + enc_attn_output = multi_head_attention( + slf_attn_output, + enc_output, + enc_output, + dec_enc_attn_bias, + d_key, + d_value, + d_model, + n_head, + dropout_rate, + src_attn_pre_softmax_shape, + src_attn_post_softmax_shape, ) + enc_attn_output = post_process_layer( + slf_attn_output, + enc_attn_output, + "dan", # residual connection + dropout + layer normalization + dropout_rate, ) + ffd_output = positionwise_feed_forward( + enc_attn_output, + d_inner_hid, + d_model, ) + dec_output = post_process_layer( + enc_attn_output, + ffd_output, + "dan", # residual connection + dropout + layer normalization + dropout_rate, ) + return dec_output + + +def decoder(dec_input, + enc_output, + dec_slf_attn_bias, + dec_enc_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0., + slf_attn_pre_softmax_shape=None, + slf_attn_post_softmax_shape=None, + src_attn_pre_softmax_shape=None, + src_attn_post_softmax_shape=None): + """ + The decoder is composed of a stack of identical decoder_layer layers. + """ + for i in range(n_layer): + dec_output = decoder_layer( + dec_input, + enc_output, + dec_slf_attn_bias, + dec_enc_attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + slf_attn_pre_softmax_shape, + slf_attn_post_softmax_shape, + src_attn_pre_softmax_shape, + src_attn_post_softmax_shape, ) + dec_input = dec_output + return dec_output + + +def make_inputs(input_data_names, + n_head, + d_model, + max_length, + is_pos, + slf_attn_bias_flag, + src_attn_bias_flag, + enc_output_flag=False, + data_shape_flag=True, + slf_attn_shape_flag=True, + src_attn_shape_flag=True): + """ + Define the input data layers for the transformer model. + """ + input_layers = [] + batch_size = 1 # Only for the infer-shape in compile time. + # The shapes here act as placeholder and are set to pass the infer-shape in + # compile time. + # The actual data shape of word is: + # [batch_size * max_len_in_batch, 1] + word = layers.data( + name=input_data_names[len(input_layers)], + shape=[batch_size * max_length, 1], + dtype="int64", + append_batch_size=False) + input_layers += [word] + # This is used for position data or label weight. + # The actual data shape of pos is: + # [batch_size * max_len_in_batch, 1] + pos = layers.data( + name=input_data_names[len(input_layers)], + shape=[batch_size * max_length, 1], + dtype="int64" if is_pos else "float32", + append_batch_size=False) + input_layers += [pos] + if slf_attn_bias_flag: + # This input is used to remove attention weights on paddings for the + # encoder and to remove attention weights on subsequent words for the + # decoder. + # The actual data shape of slf_attn_bias_flag is: + # [batch_size, n_head, max_len_in_batch, max_len_in_batch] + slf_attn_bias = layers.data( + name=input_data_names[len(input_layers)], + shape=[batch_size, n_head, max_length, max_length], + dtype="float32", + append_batch_size=False) + input_layers += [slf_attn_bias] + if src_attn_bias_flag: + # This input is used to remove attention weights on paddings. It's used + # in encoder-decoder attention. + # The actual data shape of slf_attn_bias_flag is: + # [batch_size, n_head, trg_max_len_in_batch, src_max_len_in_batch] + src_attn_bias = layers.data( + name=input_data_names[len(input_layers)], + shape=[batch_size, n_head, max_length, max_length], + dtype="float32", + append_batch_size=False) + input_layers += [src_attn_bias] + if data_shape_flag: + # This input is used to reshape the output of embedding layer. + data_shape = layers.data( + name=input_data_names[len(input_layers)], + shape=[3], + dtype="int32", + append_batch_size=False) + input_layers += [data_shape] + if slf_attn_shape_flag: + # This shape input is used to reshape before softmax in self attention. + slf_attn_pre_softmax_shape = layers.data( + name=input_data_names[len(input_layers)], + shape=[2], + dtype="int32", + append_batch_size=False) + input_layers += [slf_attn_pre_softmax_shape] + # This shape input is used to reshape after softmax in self attention. + slf_attn_post_softmax_shape = layers.data( + name=input_data_names[len(input_layers)], + shape=[4], + dtype="int32", + append_batch_size=False) + input_layers += [slf_attn_post_softmax_shape] + if src_attn_shape_flag: + # This shape input is used to reshape before softmax in encoder-decoder + # attention. + src_attn_pre_softmax_shape = layers.data( + name=input_data_names[len(input_layers)], + shape=[2], + dtype="int32", + append_batch_size=False) + input_layers += [src_attn_pre_softmax_shape] + # This shape input is used to reshape after softmax in encoder-decoder + # attention. + src_attn_post_softmax_shape = layers.data( + name=input_data_names[len(input_layers)], + shape=[4], + dtype="int32", + append_batch_size=False) + input_layers += [src_attn_post_softmax_shape] + if enc_output_flag: + # This input is used in independent decoder program for inference. + # The actual data shape of slf_attn_bias_flag is: + # [batch_size, max_len_in_batch, d_model] + enc_output = layers.data( + name=input_data_names[len(input_layers)], + shape=[batch_size, max_length, d_model], + dtype="float32", + append_batch_size=False) + input_layers += [enc_output] + + return input_layers + + +def transformer( + src_vocab_size, + trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, ): + enc_inputs = make_inputs( + encoder_input_data_names, + n_head, + d_model, + max_length, + is_pos=True, + slf_attn_bias_flag=True, + src_attn_bias_flag=False, + enc_output_flag=False, + data_shape_flag=True, + slf_attn_shape_flag=True, + src_attn_shape_flag=False) + + enc_output = wrap_encoder( + src_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + enc_inputs, ) + + dec_inputs = make_inputs( + decoder_input_data_names, + n_head, + d_model, + max_length, + is_pos=True, + slf_attn_bias_flag=True, + src_attn_bias_flag=True, + enc_output_flag=False, + data_shape_flag=True, + slf_attn_shape_flag=True, + src_attn_shape_flag=True) + + predict = wrap_decoder( + trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + dec_inputs, + enc_output, ) + + # Padding index do not contribute to the total loss. The weights is used to + # cancel padding index in calculating the loss. + gold, weights = make_inputs( + label_data_names, + n_head, + d_model, + max_length, + is_pos=False, + slf_attn_bias_flag=False, + src_attn_bias_flag=False, + enc_output_flag=False, + data_shape_flag=False, + slf_attn_shape_flag=False, + src_attn_shape_flag=False) + cost = layers.softmax_with_cross_entropy(logits=predict, label=gold) + weighted_cost = cost * weights + sum_cost = layers.reduce_sum(weighted_cost) + token_num = layers.reduce_sum(weights) + avg_cost = sum_cost / token_num + return sum_cost, avg_cost, predict, token_num + + +def wrap_encoder(src_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + enc_inputs=None): + """ + The wrapper assembles together all needed layers for the encoder. + """ + if enc_inputs is None: + # This is used to implement independent encoder program in inference. + src_word, src_pos, src_slf_attn_bias, src_data_shape, \ + slf_attn_pre_softmax_shape, slf_attn_post_softmax_shape = \ + make_inputs( + encoder_input_data_names, + n_head, + d_model, + max_length, + is_pos=True, + slf_attn_bias_flag=True, + src_attn_bias_flag=False, + enc_output_flag=False, + data_shape_flag=True, + slf_attn_shape_flag=True, + src_attn_shape_flag=False) + else: + src_word, src_pos, src_slf_attn_bias, src_data_shape, \ + slf_attn_pre_softmax_shape, slf_attn_post_softmax_shape = \ + enc_inputs + enc_input = prepare_encoder( + src_word, + src_pos, + src_vocab_size, + d_model, + max_length, + dropout_rate, + src_data_shape, ) + enc_output = encoder( + enc_input, + src_slf_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + slf_attn_pre_softmax_shape, + slf_attn_post_softmax_shape, ) + return enc_output + + +def wrap_decoder(trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + dec_inputs=None, + enc_output=None): + """ + The wrapper assembles together all needed layers for the decoder. + """ + if dec_inputs is None: + # This is used to implement independent decoder program in inference. + trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + trg_data_shape, slf_attn_pre_softmax_shape, \ + slf_attn_post_softmax_shape, src_attn_pre_softmax_shape, \ + src_attn_post_softmax_shape, enc_output = make_inputs( + decoder_input_data_names, + n_head, + d_model, + max_length, + is_pos=True, + slf_attn_bias_flag=True, + src_attn_bias_flag=True, + enc_output_flag=True, + data_shape_flag=True, + slf_attn_shape_flag=True, + src_attn_shape_flag=True) + else: + trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \ + trg_data_shape, slf_attn_pre_softmax_shape, \ + slf_attn_post_softmax_shape, src_attn_pre_softmax_shape, \ + src_attn_post_softmax_shape = dec_inputs + + dec_input = prepare_decoder( + trg_word, + trg_pos, + trg_vocab_size, + d_model, + max_length, + dropout_rate, + trg_data_shape, ) + dec_output = decoder( + dec_input, + enc_output, + trg_slf_attn_bias, + trg_src_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + slf_attn_pre_softmax_shape, + slf_attn_post_softmax_shape, + src_attn_pre_softmax_shape, + src_attn_post_softmax_shape, ) + # Return logits for training and probs for inference. + predict = layers.reshape( + x=layers.fc(input=dec_output, + size=trg_vocab_size, + bias_attr=False, + num_flatten_dims=2), + shape=[-1, trg_vocab_size], + act="softmax" if dec_inputs is None else None) + return predict + diff --git a/fluid/neural_machine_translation/transformer_nist_base/nist_data_provider.py b/fluid/neural_machine_translation/transformer_nist_base/nist_data_provider.py new file mode 100644 index 0000000000000000000000000000000000000000..4c497011de213f6ad4ac20cbfc851f9552712421 --- /dev/null +++ b/fluid/neural_machine_translation/transformer_nist_base/nist_data_provider.py @@ -0,0 +1,128 @@ +import os +from functools import partial +from collections import defaultdict + +__all__ = [ + "train", + "test", + "get_dict", +] + +DATA_HOME = "/root/data/nist06n/" + +START_MARK = "_GO" +END_MARK = "_EOS" +UNK_MARK = "_UNK" + + +def __build_dict(data_file, dict_size, save_path, lang="cn"): + word_dict = defaultdict(int) + data_files = [os.path.join(data_file, f) for f in os.listdir(data_file) + ] if os.path.isdir(data_file) else [data_file] + + for file_path in data_files: + with open(file_path, mode="r") as f: + for line in f.readlines(): + line_split = line.strip().split("\t") + if len(line_split) != 2: continue + sen = line_split[0] if lang == "cn" else line_split[1] + for w in sen.split(): + word_dict[w] += 1 + + with open(save_path, "w") as fout: + fout.write("%s\n%s\n%s\n" % (START_MARK, END_MARK, + UNK_MARK)) + for idx, word in enumerate( + sorted(word_dict.iteritems(), key=lambda x: x[1], + reverse=True)): + if idx + 3 == dict_size: break + fout.write("%s\n" % (word[0])) + + +def __load_dict(data_file, dict_size, lang, dict_file=None, reverse=False): + dict_file = "%s_%d.dict" % (lang, + dict_size) if dict_file is None else dict_file + dict_path = os.path.join(DATA_HOME, dict_file) + data_path = os.path.join(DATA_HOME, data_file) + if not os.path.exists(dict_path) or (len(open(dict_path, "r").readlines()) + != dict_size): + __build_dict(data_path, dict_size, dict_path, lang) + + word_dict = {} + with open(dict_path, "r") as fdict: + for idx, line in enumerate(fdict): + if reverse: + word_dict[idx] = line.strip() + else: + word_dict[line.strip()] = idx + return word_dict + + +def reader_creator(data_file, + src_lang, + src_dict_size, + trg_dict_size, + src_dict_file=None, + trg_dict_file=None, + len_filter=200): + def reader(): + src_dict = __load_dict(data_file, src_dict_size, "cn", src_dict_file) + trg_dict = __load_dict(data_file, trg_dict_size, "en", trg_dict_file) + + # the indice for start mark, end mark, and unk are the same in source + # language and target language. Here uses the source language + # dictionary to determine their indices. + start_id = src_dict[START_MARK] + end_id = src_dict[END_MARK] + unk_id = src_dict[UNK_MARK] + + src_col = 0 if src_lang == "cn" else 1 + trg_col = 1 - src_col + + data_path = os.path.join(DATA_HOME, data_file) + data_files = [ + os.path.join(data_path, f) for f in os.listdir(data_path) + ] if os.path.isdir(data_path) else [data_path] + for file_path in data_files: + with open(file_path, mode="r") as f: + for line in f.readlines(): + line_split = line.strip().split("\t") + if len(line_split) != 2: + continue + src_words = line_split[src_col].split() + src_ids = [start_id + ] + [src_dict.get(w, unk_id) + for w in src_words] + [end_id] + + trg_words = line_split[trg_col].split() + trg_ids = [trg_dict.get(w, unk_id) for w in trg_words] + + trg_ids_next = trg_ids + [end_id] + trg_ids = [start_id] + trg_ids + if len(src_words) + len(trg_words) < len_filter: + yield src_ids, trg_ids, trg_ids_next + + return reader + + +def train(data_file, + src_dict_size, + trg_dict_size, + src_lang="cn", + src_dict_file=None, + trg_dict_file=None, + len_filter=200): + + return reader_creator(data_file, src_lang, src_dict_size, trg_dict_size, + src_dict_file, trg_dict_file, len_filter) + + +test = partial(train, len_filter=100000) + + +def get_dict(data_file, dict_size, lang, dict_file=None, reverse=False): + dict_file = "%s_%d.dict" % (lang, + dict_size) if dict_file is None else dict_file + dict_path = os.path.join(DATA_HOME, dict_file) + assert os.path.exists(dict_path), "Word dictionary does not exist. " + return __load_dict(data_file, dict_size, lang, dict_file, reverse) diff --git a/fluid/neural_machine_translation/transformer_nist_base/optim.py b/fluid/neural_machine_translation/transformer_nist_base/optim.py new file mode 100644 index 0000000000000000000000000000000000000000..2828b4bc848828a4350d0cf7a57526a7481c251e --- /dev/null +++ b/fluid/neural_machine_translation/transformer_nist_base/optim.py @@ -0,0 +1,41 @@ +import numpy as np + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +class LearningRateScheduler(object): + """ + Wrapper for learning rate scheduling as described in the Transformer paper. + LearningRateScheduler adapts the learning rate externally and the adapted + learning rate will be feeded into the main_program as input data. + """ + + def __init__(self, + d_model, + warmup_steps, + place, + learning_rate=0.001, + current_steps=0, + name="learning_rate"): + self.current_steps = current_steps + self.warmup_steps = warmup_steps + self.d_model = d_model + self.learning_rate = layers.create_global_var( + name=name, + shape=[1], + value=float(learning_rate), + dtype="float32", + persistable=True) + self.place = place + + def update_learning_rate(self, data_input): + self.current_steps += 1 + lr_value = np.power(self.d_model, -0.5) * np.min([ + np.power(self.current_steps, -0.5), + np.power(self.warmup_steps, -1.5) * self.current_steps + ]) + lr_tensor = fluid.LoDTensor() + lr_tensor.set(np.array([lr_value], dtype="float32"), self.place) + data_input[self.learning_rate.name] = lr_tensor + diff --git a/fluid/neural_machine_translation/transformer_nist_base/train.log b/fluid/neural_machine_translation/transformer_nist_base/train.log new file mode 100644 index 0000000000000000000000000000000000000000..4d7820369ff5459a9e5bde42bd49cfdf3569aec5 --- /dev/null +++ b/fluid/neural_machine_translation/transformer_nist_base/train.log @@ -0,0 +1,129227 @@ +epoch: 0, batch: 0, sum loss: 15748.643555, avg loss: 10.313454, ppl: 30135.333984 +epoch: 0, batch: 1, sum loss: 16346.643555, avg loss: 10.319851, ppl: 30328.736328 +epoch: 0, batch: 2, sum loss: 16173.799805, avg loss: 10.314923, ppl: 30179.654297 +epoch: 0, batch: 3, sum loss: 16308.712891, avg loss: 10.315441, ppl: 30195.287109 +epoch: 0, batch: 4, sum loss: 16056.920898, avg loss: 10.306111, ppl: 29914.880859 +epoch: 0, batch: 5, sum loss: 15868.953125, avg loss: 10.311211, ppl: 30067.816406 +epoch: 0, batch: 6, sum loss: 16098.379883, avg loss: 10.326094, ppl: 30518.662109 +epoch: 0, batch: 7, sum loss: 17544.429688, avg loss: 10.320253, ppl: 30340.943359 +epoch: 0, batch: 8, sum loss: 17075.628906, avg loss: 10.317601, ppl: 30260.582031 +epoch: 0, batch: 9, sum loss: 16899.197266, avg loss: 10.310676, ppl: 30051.732422 +epoch: 0, batch: 10, sum loss: 15684.426758, avg loss: 10.311918, ppl: 30089.099609 +epoch: 0, batch: 11, sum loss: 16326.007812, avg loss: 10.313334, ppl: 30131.712891 +epoch: 0, batch: 12, sum loss: 19040.072266, avg loss: 10.308648, ppl: 29990.865234 +epoch: 0, batch: 13, sum loss: 17394.527344, avg loss: 10.304815, ppl: 29876.134766 +epoch: 0, batch: 14, sum loss: 17366.382812, avg loss: 10.312579, ppl: 30108.992188 +epoch: 0, batch: 15, sum loss: 17109.820312, avg loss: 10.313334, ppl: 30131.712891 +epoch: 0, batch: 16, sum loss: 17632.037109, avg loss: 10.311132, ppl: 30065.464844 +epoch: 0, batch: 17, sum loss: 18952.136719, avg loss: 10.305676, ppl: 29901.845703 +epoch: 0, batch: 18, sum loss: 16547.919922, avg loss: 10.310230, ppl: 30038.351562 +epoch: 0, batch: 19, sum loss: 17472.691406, avg loss: 10.302295, ppl: 29800.925781 +epoch: 0, batch: 20, sum loss: 16424.042969, avg loss: 10.310134, ppl: 30035.458984 +epoch: 0, batch: 21, sum loss: 19581.789062, avg loss: 10.306206, ppl: 29917.705078 +epoch: 0, batch: 22, sum loss: 18407.675781, avg loss: 10.300882, ppl: 29758.865234 +epoch: 0, batch: 23, sum loss: 18321.349609, avg loss: 10.304472, ppl: 29865.880859 +epoch: 0, batch: 24, sum loss: 15336.279297, avg loss: 10.292806, ppl: 29519.480469 +epoch: 0, batch: 25, sum loss: 16892.414062, avg loss: 10.300253, ppl: 29740.138672 +epoch: 0, batch: 26, sum loss: 15955.625977, avg loss: 10.300598, ppl: 29750.408203 +epoch: 0, batch: 27, sum loss: 18613.611328, avg loss: 10.300836, ppl: 29757.474609 +epoch: 0, batch: 28, sum loss: 18228.568359, avg loss: 10.298627, ppl: 29691.820312 +epoch: 0, batch: 29, sum loss: 17414.699219, avg loss: 10.298463, ppl: 29686.951172 +epoch: 0, batch: 30, sum loss: 16385.468750, avg loss: 10.292379, ppl: 29506.898438 +epoch: 0, batch: 31, sum loss: 19446.626953, avg loss: 10.294667, ppl: 29574.484375 +epoch: 0, batch: 32, sum loss: 16833.849609, avg loss: 10.289639, ppl: 29426.164062 +epoch: 0, batch: 33, sum loss: 17823.677734, avg loss: 10.284868, ppl: 29286.099609 +epoch: 0, batch: 34, sum loss: 15476.041992, avg loss: 10.289921, ppl: 29434.443359 +epoch: 0, batch: 35, sum loss: 16805.248047, avg loss: 10.284730, ppl: 29282.048828 +epoch: 0, batch: 36, sum loss: 18518.216797, avg loss: 10.287898, ppl: 29374.964844 +epoch: 0, batch: 37, sum loss: 18246.220703, avg loss: 10.285355, ppl: 29300.345703 +epoch: 0, batch: 38, sum loss: 16609.126953, avg loss: 10.284289, ppl: 29269.150391 +epoch: 0, batch: 39, sum loss: 17553.316406, avg loss: 10.277117, ppl: 29059.966797 +epoch: 0, batch: 40, sum loss: 16919.937500, avg loss: 10.279428, ppl: 29127.193359 +epoch: 0, batch: 41, sum loss: 18260.501953, avg loss: 10.276028, ppl: 29028.333984 +epoch: 0, batch: 42, sum loss: 16274.455078, avg loss: 10.261321, ppl: 28604.548828 +epoch: 0, batch: 43, sum loss: 18909.234375, avg loss: 10.276757, ppl: 29049.519531 +epoch: 0, batch: 44, sum loss: 14218.058594, avg loss: 10.265746, ppl: 28731.406250 +epoch: 0, batch: 45, sum loss: 16650.093750, avg loss: 10.265162, ppl: 28714.642578 +epoch: 0, batch: 46, sum loss: 19669.738281, avg loss: 10.266043, ppl: 28739.929688 +epoch: 0, batch: 47, sum loss: 15732.839844, avg loss: 10.262779, ppl: 28646.291016 +epoch: 0, batch: 48, sum loss: 19289.884766, avg loss: 10.260576, ppl: 28583.251953 +epoch: 0, batch: 49, sum loss: 17743.123047, avg loss: 10.256141, ppl: 28456.751953 +epoch: 0, batch: 50, sum loss: 16088.282227, avg loss: 10.253844, ppl: 28391.476562 +epoch: 0, batch: 51, sum loss: 17251.476562, avg loss: 10.262627, ppl: 28641.919922 +epoch: 0, batch: 52, sum loss: 17476.626953, avg loss: 10.256237, ppl: 28459.492188 +epoch: 0, batch: 53, sum loss: 18781.789062, avg loss: 10.246475, ppl: 28183.027344 +epoch: 0, batch: 54, sum loss: 17430.316406, avg loss: 10.241078, ppl: 28031.337891 +epoch: 0, batch: 55, sum loss: 16161.534180, avg loss: 10.241783, ppl: 28051.099609 +epoch: 0, batch: 56, sum loss: 18744.242188, avg loss: 10.242756, ppl: 28078.400391 +epoch: 0, batch: 57, sum loss: 16048.675781, avg loss: 10.228601, ppl: 27683.765625 +epoch: 0, batch: 58, sum loss: 14569.031250, avg loss: 10.231061, ppl: 27751.939453 +epoch: 0, batch: 59, sum loss: 16899.085938, avg loss: 10.235667, ppl: 27880.066406 +epoch: 0, batch: 60, sum loss: 18782.113281, avg loss: 10.229909, ppl: 27719.986328 +epoch: 0, batch: 61, sum loss: 16924.703125, avg loss: 10.238780, ppl: 27966.986328 +epoch: 0, batch: 62, sum loss: 18315.830078, avg loss: 10.220888, ppl: 27471.054688 +epoch: 0, batch: 63, sum loss: 16671.101562, avg loss: 10.215137, ppl: 27313.503906 +epoch: 0, batch: 64, sum loss: 18088.339844, avg loss: 10.213631, ppl: 27272.406250 +epoch: 0, batch: 65, sum loss: 18115.646484, avg loss: 10.217511, ppl: 27378.441406 +epoch: 0, batch: 66, sum loss: 17379.099609, avg loss: 10.204991, ppl: 27037.804688 +epoch: 0, batch: 67, sum loss: 17479.105469, avg loss: 10.203797, ppl: 27005.541016 +epoch: 0, batch: 68, sum loss: 16555.761719, avg loss: 10.207005, ppl: 27092.292969 +epoch: 0, batch: 69, sum loss: 19051.195312, avg loss: 10.193255, ppl: 26722.345703 +epoch: 0, batch: 70, sum loss: 16632.558594, avg loss: 10.191519, ppl: 26675.978516 +epoch: 0, batch: 71, sum loss: 17580.753906, avg loss: 10.197654, ppl: 26840.138672 +epoch: 0, batch: 72, sum loss: 20388.974609, avg loss: 10.189393, ppl: 26619.333984 +epoch: 0, batch: 73, sum loss: 16480.386719, avg loss: 10.185653, ppl: 26519.955078 +epoch: 0, batch: 74, sum loss: 20409.837891, avg loss: 10.184550, ppl: 26490.734375 +epoch: 0, batch: 75, sum loss: 19735.121094, avg loss: 10.177989, ppl: 26317.490234 +epoch: 0, batch: 76, sum loss: 18194.488281, avg loss: 10.170200, ppl: 26113.308594 +epoch: 0, batch: 77, sum loss: 18844.496094, avg loss: 10.169723, ppl: 26100.833984 +epoch: 0, batch: 78, sum loss: 16387.880859, avg loss: 10.166179, ppl: 26008.500000 +epoch: 0, batch: 79, sum loss: 16727.695312, avg loss: 10.162634, ppl: 25916.468750 +epoch: 0, batch: 80, sum loss: 17574.869141, avg loss: 10.158884, ppl: 25819.466797 +epoch: 0, batch: 81, sum loss: 18394.568359, avg loss: 10.151527, ppl: 25630.220703 +epoch: 0, batch: 82, sum loss: 17733.800781, avg loss: 10.151002, ppl: 25616.755859 +epoch: 0, batch: 83, sum loss: 16349.505859, avg loss: 10.148669, ppl: 25557.068359 +epoch: 0, batch: 84, sum loss: 16046.363281, avg loss: 10.143086, ppl: 25414.787109 +epoch: 0, batch: 85, sum loss: 16688.326172, avg loss: 10.120271, ppl: 24841.496094 +epoch: 0, batch: 86, sum loss: 18667.199219, avg loss: 10.117723, ppl: 24778.273438 +epoch: 0, batch: 87, sum loss: 15629.372070, avg loss: 10.116099, ppl: 24738.087891 +epoch: 0, batch: 88, sum loss: 17402.865234, avg loss: 10.135623, ppl: 25225.810547 +epoch: 0, batch: 89, sum loss: 17432.927734, avg loss: 10.117776, ppl: 24779.597656 +epoch: 0, batch: 90, sum loss: 16895.244141, avg loss: 10.086714, ppl: 24021.722656 +epoch: 0, batch: 91, sum loss: 17096.398438, avg loss: 10.110230, ppl: 24593.328125 +epoch: 0, batch: 92, sum loss: 17680.355469, avg loss: 10.080020, ppl: 23861.460938 +epoch: 0, batch: 93, sum loss: 18438.351562, avg loss: 10.081111, ppl: 23887.507812 +epoch: 0, batch: 94, sum loss: 15412.826172, avg loss: 10.093534, ppl: 24186.103516 +epoch: 0, batch: 95, sum loss: 18932.517578, avg loss: 10.086584, ppl: 24018.607422 +epoch: 0, batch: 96, sum loss: 17940.908203, avg loss: 10.079162, ppl: 23840.990234 +epoch: 0, batch: 97, sum loss: 13909.125977, avg loss: 10.057214, ppl: 23323.431641 +epoch: 0, batch: 98, sum loss: 15578.993164, avg loss: 10.090022, ppl: 24101.324219 +epoch: 0, batch: 99, sum loss: 17072.457031, avg loss: 10.048533, ppl: 23121.851562 +epoch: 0, batch: 100, sum loss: 17527.394531, avg loss: 10.055878, ppl: 23292.291016 +epoch: 0, batch: 101, sum loss: 17914.800781, avg loss: 10.053200, ppl: 23229.998047 +epoch: 0, batch: 102, sum loss: 16642.652344, avg loss: 10.037788, ppl: 22874.736328 +epoch: 0, batch: 103, sum loss: 14763.207031, avg loss: 10.029353, ppl: 22682.593750 +epoch: 0, batch: 104, sum loss: 16957.082031, avg loss: 10.039718, ppl: 22918.912109 +epoch: 0, batch: 105, sum loss: 17652.894531, avg loss: 10.041464, ppl: 22958.966797 +epoch: 0, batch: 106, sum loss: 17779.177734, avg loss: 10.033397, ppl: 22774.498047 +epoch: 0, batch: 107, sum loss: 19039.607422, avg loss: 10.010309, ppl: 22254.716797 +epoch: 0, batch: 108, sum loss: 18596.259766, avg loss: 10.024938, ppl: 22582.660156 +epoch: 0, batch: 109, sum loss: 16632.761719, avg loss: 10.001660, ppl: 22063.068359 +epoch: 0, batch: 110, sum loss: 17766.804688, avg loss: 10.009467, ppl: 22235.982422 +epoch: 0, batch: 111, sum loss: 19654.441406, avg loss: 10.017553, ppl: 22416.517578 +epoch: 0, batch: 112, sum loss: 18079.576172, avg loss: 9.983201, ppl: 21659.535156 +epoch: 0, batch: 113, sum loss: 16270.853516, avg loss: 9.982119, ppl: 21636.101562 +epoch: 0, batch: 114, sum loss: 18160.113281, avg loss: 9.961664, ppl: 21198.044922 +epoch: 0, batch: 115, sum loss: 17992.498047, avg loss: 9.979200, ppl: 21573.054688 +epoch: 0, batch: 116, sum loss: 15851.249023, avg loss: 9.956815, ppl: 21095.494141 +epoch: 0, batch: 117, sum loss: 16149.617188, avg loss: 9.968900, ppl: 21351.978516 +epoch: 0, batch: 118, sum loss: 16450.443359, avg loss: 9.933843, ppl: 20616.410156 +epoch: 0, batch: 119, sum loss: 18124.521484, avg loss: 9.942141, ppl: 20788.195312 +epoch: 0, batch: 120, sum loss: 17684.167969, avg loss: 9.946101, ppl: 20870.693359 +epoch: 0, batch: 121, sum loss: 17653.873047, avg loss: 9.934649, ppl: 20633.050781 +epoch: 0, batch: 122, sum loss: 14868.475586, avg loss: 9.952126, ppl: 20996.804688 +epoch: 0, batch: 123, sum loss: 17410.552734, avg loss: 9.926199, ppl: 20459.425781 +epoch: 0, batch: 124, sum loss: 15289.935547, avg loss: 9.928530, ppl: 20507.167969 +epoch: 0, batch: 125, sum loss: 18445.177734, avg loss: 9.895482, ppl: 19840.529297 +epoch: 0, batch: 126, sum loss: 17258.894531, avg loss: 9.890484, ppl: 19741.609375 +epoch: 0, batch: 127, sum loss: 18111.085938, avg loss: 9.923882, ppl: 20412.085938 +epoch: 0, batch: 128, sum loss: 16286.631836, avg loss: 9.870686, ppl: 19354.603516 +epoch: 0, batch: 129, sum loss: 15277.259766, avg loss: 9.901011, ppl: 19950.539062 +epoch: 0, batch: 130, sum loss: 16546.152344, avg loss: 9.907876, ppl: 20087.962891 +epoch: 0, batch: 131, sum loss: 16815.906250, avg loss: 9.891710, ppl: 19765.835938 +epoch: 0, batch: 132, sum loss: 19152.542969, avg loss: 9.862278, ppl: 19192.560547 +epoch: 0, batch: 133, sum loss: 16138.133789, avg loss: 9.876459, ppl: 19466.671875 +epoch: 0, batch: 134, sum loss: 15195.428711, avg loss: 9.854363, ppl: 19041.259766 +epoch: 0, batch: 135, sum loss: 15818.170898, avg loss: 9.843293, ppl: 18831.630859 +epoch: 0, batch: 136, sum loss: 16101.520508, avg loss: 9.872177, ppl: 19383.494141 +epoch: 0, batch: 137, sum loss: 15450.732422, avg loss: 9.847503, ppl: 18911.068359 +epoch: 0, batch: 138, sum loss: 17129.287109, avg loss: 9.838763, ppl: 18746.517578 +epoch: 0, batch: 139, sum loss: 18644.849609, avg loss: 9.828598, ppl: 18556.919922 +epoch: 0, batch: 140, sum loss: 15120.088867, avg loss: 9.824619, ppl: 18483.232422 +epoch: 0, batch: 141, sum loss: 15233.888672, avg loss: 9.847375, ppl: 18908.652344 +epoch: 0, batch: 142, sum loss: 16399.283203, avg loss: 9.831705, ppl: 18614.666016 +epoch: 0, batch: 143, sum loss: 15894.869141, avg loss: 9.848123, ppl: 18922.794922 +epoch: 0, batch: 144, sum loss: 18169.656250, avg loss: 9.816130, ppl: 18326.982422 +epoch: 0, batch: 145, sum loss: 18442.189453, avg loss: 9.820122, ppl: 18400.291016 +epoch: 0, batch: 146, sum loss: 17373.902344, avg loss: 9.810222, ppl: 18219.025391 +epoch: 0, batch: 147, sum loss: 17094.693359, avg loss: 9.790775, ppl: 17868.154297 +epoch: 0, batch: 148, sum loss: 16458.845703, avg loss: 9.802767, ppl: 18083.708984 +epoch: 0, batch: 149, sum loss: 17633.919922, avg loss: 9.802068, ppl: 18071.072266 +epoch: 0, batch: 150, sum loss: 16797.125000, avg loss: 9.805677, ppl: 18136.421875 +epoch: 0, batch: 151, sum loss: 18675.156250, avg loss: 9.813535, ppl: 18279.486328 +epoch: 0, batch: 152, sum loss: 17401.156250, avg loss: 9.786927, ppl: 17799.527344 +epoch: 0, batch: 153, sum loss: 16465.183594, avg loss: 9.783235, ppl: 17733.921875 +epoch: 0, batch: 154, sum loss: 15828.977539, avg loss: 9.783051, ppl: 17730.675781 +epoch: 0, batch: 155, sum loss: 16844.533203, avg loss: 9.781959, ppl: 17711.308594 +epoch: 0, batch: 156, sum loss: 16560.484375, avg loss: 9.775965, ppl: 17605.466797 +epoch: 0, batch: 157, sum loss: 16164.111328, avg loss: 9.755046, ppl: 17241.005859 +epoch: 0, batch: 158, sum loss: 17298.191406, avg loss: 9.756454, ppl: 17265.291016 +epoch: 0, batch: 159, sum loss: 13428.293945, avg loss: 9.751847, ppl: 17185.947266 +epoch: 0, batch: 160, sum loss: 14442.484375, avg loss: 9.725578, ppl: 16740.367188 +epoch: 0, batch: 161, sum loss: 16111.308594, avg loss: 9.758515, ppl: 17300.927734 +epoch: 0, batch: 162, sum loss: 18994.666016, avg loss: 9.770920, ppl: 17516.871094 +epoch: 0, batch: 163, sum loss: 14921.886719, avg loss: 9.759246, ppl: 17313.570312 +epoch: 0, batch: 164, sum loss: 15860.679688, avg loss: 9.724512, ppl: 16722.529297 +epoch: 0, batch: 165, sum loss: 15028.316406, avg loss: 9.745990, ppl: 17085.574219 +epoch: 0, batch: 166, sum loss: 15529.936523, avg loss: 9.754985, ppl: 17239.953125 +epoch: 0, batch: 167, sum loss: 15301.804688, avg loss: 9.733973, ppl: 16881.482422 +epoch: 0, batch: 168, sum loss: 16270.163086, avg loss: 9.707735, ppl: 16444.314453 +epoch: 0, batch: 169, sum loss: 14739.288086, avg loss: 9.716076, ppl: 16582.046875 +epoch: 0, batch: 170, sum loss: 15592.464844, avg loss: 9.696806, ppl: 16265.571289 +epoch: 0, batch: 171, sum loss: 15424.695312, avg loss: 9.713284, ppl: 16535.824219 +epoch: 0, batch: 172, sum loss: 17851.886719, avg loss: 9.681067, ppl: 16011.564453 +epoch: 0, batch: 173, sum loss: 16513.599609, avg loss: 9.708171, ppl: 16451.482422 +epoch: 0, batch: 174, sum loss: 17314.773438, avg loss: 9.667656, ppl: 15798.273438 +epoch: 0, batch: 175, sum loss: 16817.871094, avg loss: 9.698887, ppl: 16299.454102 +epoch: 0, batch: 176, sum loss: 14732.503906, avg loss: 9.686065, ppl: 16091.793945 +epoch: 0, batch: 177, sum loss: 15237.389648, avg loss: 9.699166, ppl: 16304.008789 +epoch: 0, batch: 178, sum loss: 15039.850586, avg loss: 9.696873, ppl: 16266.657227 +epoch: 0, batch: 179, sum loss: 16587.976562, avg loss: 9.672289, ppl: 15871.635742 +epoch: 0, batch: 180, sum loss: 16279.054688, avg loss: 9.678391, ppl: 15968.790039 +epoch: 0, batch: 181, sum loss: 15693.358398, avg loss: 9.687259, ppl: 16111.018555 +epoch: 0, batch: 182, sum loss: 16148.351562, avg loss: 9.669672, ppl: 15830.156250 +epoch: 0, batch: 183, sum loss: 18067.707031, avg loss: 9.687778, ppl: 16119.394531 +epoch: 0, batch: 184, sum loss: 16392.335938, avg loss: 9.693871, ppl: 16217.910156 +epoch: 0, batch: 185, sum loss: 16197.346680, avg loss: 9.658525, ppl: 15654.668945 +epoch: 0, batch: 186, sum loss: 18513.253906, avg loss: 9.657410, ppl: 15637.226562 +epoch: 0, batch: 187, sum loss: 16106.554688, avg loss: 9.633107, ppl: 15261.784180 +epoch: 0, batch: 188, sum loss: 15632.165039, avg loss: 9.613877, ppl: 14971.105469 +epoch: 0, batch: 189, sum loss: 18898.939453, avg loss: 9.662034, ppl: 15709.706055 +epoch: 0, batch: 190, sum loss: 18404.755859, avg loss: 9.651156, ppl: 15539.748047 +epoch: 0, batch: 191, sum loss: 13627.621094, avg loss: 9.664980, ppl: 15756.053711 +epoch: 0, batch: 192, sum loss: 19435.560547, avg loss: 9.621565, ppl: 15086.639648 +epoch: 0, batch: 193, sum loss: 16343.649414, avg loss: 9.619571, ppl: 15056.584961 +epoch: 0, batch: 194, sum loss: 15054.208984, avg loss: 9.637778, ppl: 15333.239258 +epoch: 0, batch: 195, sum loss: 14759.023438, avg loss: 9.659047, ppl: 15662.852539 +epoch: 0, batch: 196, sum loss: 18738.878906, avg loss: 9.649269, ppl: 15510.447266 +epoch: 0, batch: 197, sum loss: 17947.341797, avg loss: 9.623240, ppl: 15111.925781 +epoch: 0, batch: 198, sum loss: 17576.193359, avg loss: 9.625517, ppl: 15146.380859 +epoch: 0, batch: 199, sum loss: 15658.958984, avg loss: 9.636282, ppl: 15310.313477 +epoch: 0, batch: 200, sum loss: 15413.962891, avg loss: 9.579840, ppl: 14470.099609 +epoch: 0, batch: 201, sum loss: 16586.974609, avg loss: 9.604502, ppl: 14831.398438 +epoch: 0, batch: 202, sum loss: 14226.750977, avg loss: 9.586760, ppl: 14570.578125 +epoch: 0, batch: 203, sum loss: 17350.265625, avg loss: 9.596386, ppl: 14711.517578 +epoch: 0, batch: 204, sum loss: 14342.899414, avg loss: 9.613204, ppl: 14961.028320 +epoch: 0, batch: 205, sum loss: 17778.894531, avg loss: 9.625823, ppl: 15151.018555 +epoch: 0, batch: 206, sum loss: 17338.919922, avg loss: 9.574224, ppl: 14389.074219 +epoch: 0, batch: 207, sum loss: 18079.921875, avg loss: 9.561037, ppl: 14200.565430 +epoch: 0, batch: 208, sum loss: 16403.845703, avg loss: 9.604125, ppl: 14825.812500 +epoch: 0, batch: 209, sum loss: 15177.222656, avg loss: 9.551431, ppl: 14064.802734 +epoch: 0, batch: 210, sum loss: 15963.509766, avg loss: 9.570450, ppl: 14334.863281 +epoch: 0, batch: 211, sum loss: 15788.051758, avg loss: 9.591769, ppl: 14643.754883 +epoch: 0, batch: 212, sum loss: 16810.546875, avg loss: 9.600541, ppl: 14772.773438 +epoch: 0, batch: 213, sum loss: 17685.132812, avg loss: 9.554367, ppl: 14106.163086 +epoch: 0, batch: 214, sum loss: 13978.935547, avg loss: 9.554980, ppl: 14114.815430 +epoch: 0, batch: 215, sum loss: 14163.330078, avg loss: 9.556904, ppl: 14141.992188 +epoch: 0, batch: 216, sum loss: 17412.521484, avg loss: 9.556818, ppl: 14140.778320 +epoch: 0, batch: 217, sum loss: 15093.185547, avg loss: 9.540572, ppl: 13912.906250 +epoch: 0, batch: 218, sum loss: 14731.558594, avg loss: 9.559740, ppl: 14182.159180 +epoch: 0, batch: 219, sum loss: 16000.010742, avg loss: 9.552246, ppl: 14076.275391 +epoch: 0, batch: 220, sum loss: 17673.019531, avg loss: 9.573684, ppl: 14381.295898 +epoch: 0, batch: 221, sum loss: 15960.870117, avg loss: 9.545976, ppl: 13988.288086 +epoch: 0, batch: 222, sum loss: 16076.058594, avg loss: 9.552025, ppl: 14073.162109 +epoch: 0, batch: 223, sum loss: 16796.732422, avg loss: 9.538178, ppl: 13879.641602 +epoch: 0, batch: 224, sum loss: 14951.507812, avg loss: 9.565904, ppl: 14269.842773 +epoch: 0, batch: 225, sum loss: 17529.820312, avg loss: 9.547832, ppl: 14014.285156 +epoch: 0, batch: 226, sum loss: 16713.246094, avg loss: 9.534082, ppl: 13822.907227 +epoch: 0, batch: 227, sum loss: 16654.746094, avg loss: 9.495294, ppl: 13296.999023 +epoch: 0, batch: 228, sum loss: 17491.072266, avg loss: 9.547528, ppl: 14010.022461 +epoch: 0, batch: 229, sum loss: 16665.060547, avg loss: 9.528337, ppl: 13743.709961 +epoch: 0, batch: 230, sum loss: 16102.392578, avg loss: 9.449761, ppl: 12705.133789 +epoch: 0, batch: 231, sum loss: 17380.232422, avg loss: 9.518200, ppl: 13605.098633 +epoch: 0, batch: 232, sum loss: 15695.958984, avg loss: 9.472516, ppl: 12997.548828 +epoch: 0, batch: 233, sum loss: 16604.414062, avg loss: 9.531811, ppl: 13791.541992 +epoch: 0, batch: 234, sum loss: 16106.936523, avg loss: 9.530731, ppl: 13776.661133 +epoch: 0, batch: 235, sum loss: 15393.824219, avg loss: 9.490644, ppl: 13235.309570 +epoch: 0, batch: 236, sum loss: 15090.064453, avg loss: 9.496579, ppl: 13314.103516 +epoch: 0, batch: 237, sum loss: 17312.976562, avg loss: 9.476178, ppl: 13045.234375 +epoch: 0, batch: 238, sum loss: 15448.063477, avg loss: 9.500654, ppl: 13368.469727 +epoch: 0, batch: 239, sum loss: 15770.309570, avg loss: 9.477350, ppl: 13060.533203 +epoch: 0, batch: 240, sum loss: 13710.527344, avg loss: 9.488255, ppl: 13203.728516 +epoch: 0, batch: 241, sum loss: 16766.332031, avg loss: 9.467155, ppl: 12928.060547 +epoch: 0, batch: 242, sum loss: 15569.064453, avg loss: 9.470234, ppl: 12967.919922 +epoch: 0, batch: 243, sum loss: 15552.077148, avg loss: 9.482974, ppl: 13134.190430 +epoch: 0, batch: 244, sum loss: 14815.266602, avg loss: 9.442490, ppl: 12613.080078 +epoch: 0, batch: 245, sum loss: 15450.740234, avg loss: 9.496460, ppl: 13312.516602 +epoch: 0, batch: 246, sum loss: 17496.537109, avg loss: 9.437182, ppl: 12546.316406 +epoch: 0, batch: 247, sum loss: 16367.941406, avg loss: 9.433972, ppl: 12506.107422 +epoch: 0, batch: 248, sum loss: 16405.539062, avg loss: 9.444756, ppl: 12641.692383 +epoch: 0, batch: 249, sum loss: 17081.562500, avg loss: 9.442545, ppl: 12613.777344 +epoch: 0, batch: 250, sum loss: 17576.294922, avg loss: 9.424287, ppl: 12385.563477 +epoch: 0, batch: 251, sum loss: 15391.080078, avg loss: 9.442380, ppl: 12611.696289 +epoch: 0, batch: 252, sum loss: 16275.705078, avg loss: 9.424265, ppl: 12385.291992 +epoch: 0, batch: 253, sum loss: 15561.576172, avg loss: 9.402765, ppl: 12121.854492 +epoch: 0, batch: 254, sum loss: 14752.690430, avg loss: 9.414608, ppl: 12266.263672 +epoch: 0, batch: 255, sum loss: 15745.730469, avg loss: 9.422939, ppl: 12368.884766 +epoch: 0, batch: 256, sum loss: 19073.300781, avg loss: 9.437556, ppl: 12551.007812 +epoch: 0, batch: 257, sum loss: 14851.093750, avg loss: 9.399426, ppl: 12081.449219 +epoch: 0, batch: 258, sum loss: 15420.519531, avg loss: 9.443061, ppl: 12620.287109 +epoch: 0, batch: 259, sum loss: 14587.508789, avg loss: 9.368984, ppl: 11719.205078 +epoch: 0, batch: 260, sum loss: 16233.552734, avg loss: 9.438112, ppl: 12557.988281 +epoch: 0, batch: 261, sum loss: 14608.442383, avg loss: 9.406595, ppl: 12168.370117 +epoch: 0, batch: 262, sum loss: 12440.339844, avg loss: 9.360678, ppl: 11622.262695 +epoch: 0, batch: 263, sum loss: 16676.982422, avg loss: 9.416703, ppl: 12291.991211 +epoch: 0, batch: 264, sum loss: 15087.040039, avg loss: 9.376657, ppl: 11809.463867 +epoch: 0, batch: 265, sum loss: 15568.287109, avg loss: 9.401139, ppl: 12102.160156 +epoch: 0, batch: 266, sum loss: 14223.279297, avg loss: 9.419391, ppl: 12325.070312 +epoch: 0, batch: 267, sum loss: 13001.372070, avg loss: 9.407650, ppl: 12181.210938 +epoch: 0, batch: 268, sum loss: 18487.480469, avg loss: 9.356012, ppl: 11568.166992 +epoch: 0, batch: 269, sum loss: 17089.800781, avg loss: 9.364274, ppl: 11664.134766 +epoch: 0, batch: 270, sum loss: 14250.638672, avg loss: 9.406363, ppl: 12165.539062 +epoch: 0, batch: 271, sum loss: 16210.605469, avg loss: 9.364879, ppl: 11671.189453 +epoch: 0, batch: 272, sum loss: 17767.207031, avg loss: 9.375834, ppl: 11799.759766 +epoch: 0, batch: 273, sum loss: 15524.196289, avg loss: 9.385851, ppl: 11918.545898 +epoch: 0, batch: 274, sum loss: 15446.602539, avg loss: 9.333295, ppl: 11308.330078 +epoch: 0, batch: 275, sum loss: 15009.995117, avg loss: 9.352021, ppl: 11522.088867 +epoch: 0, batch: 276, sum loss: 14897.293945, avg loss: 9.316631, ppl: 11121.454102 +epoch: 0, batch: 277, sum loss: 16014.478516, avg loss: 9.332447, ppl: 11298.746094 +epoch: 0, batch: 278, sum loss: 15110.352539, avg loss: 9.362053, ppl: 11638.256836 +epoch: 0, batch: 279, sum loss: 16095.222656, avg loss: 9.325158, ppl: 11216.689453 +epoch: 0, batch: 280, sum loss: 15535.214844, avg loss: 9.347301, ppl: 11467.835938 +epoch: 0, batch: 281, sum loss: 16772.888672, avg loss: 9.307930, ppl: 11025.102539 +epoch: 0, batch: 282, sum loss: 16248.632812, avg loss: 9.343665, ppl: 11426.209961 +epoch: 0, batch: 283, sum loss: 14979.626953, avg loss: 9.292572, ppl: 10857.073242 +epoch: 0, batch: 284, sum loss: 18357.458984, avg loss: 9.299624, ppl: 10933.912109 +epoch: 0, batch: 285, sum loss: 15884.118164, avg loss: 9.299834, ppl: 10936.206055 +epoch: 0, batch: 286, sum loss: 15628.636719, avg loss: 9.324963, ppl: 11214.497070 +epoch: 0, batch: 287, sum loss: 17561.527344, avg loss: 9.301657, ppl: 10956.155273 +epoch: 0, batch: 288, sum loss: 16483.220703, avg loss: 9.291555, ppl: 10846.041016 +epoch: 0, batch: 289, sum loss: 14208.036133, avg loss: 9.244005, ppl: 10342.378906 +epoch: 0, batch: 290, sum loss: 15527.899414, avg loss: 9.292580, ppl: 10857.155273 +epoch: 0, batch: 291, sum loss: 15363.986328, avg loss: 9.260992, ppl: 10519.564453 +epoch: 0, batch: 292, sum loss: 15835.787109, avg loss: 9.298759, ppl: 10924.458984 +epoch: 0, batch: 293, sum loss: 15921.249023, avg loss: 9.272714, ppl: 10643.595703 +epoch: 0, batch: 294, sum loss: 16101.526367, avg loss: 9.264400, ppl: 10555.470703 +epoch: 0, batch: 295, sum loss: 13786.807617, avg loss: 9.271558, ppl: 10631.300781 +epoch: 0, batch: 296, sum loss: 14038.416016, avg loss: 9.278530, ppl: 10705.684570 +epoch: 0, batch: 297, sum loss: 15868.202148, avg loss: 9.263398, ppl: 10544.906250 +epoch: 0, batch: 298, sum loss: 15706.215820, avg loss: 9.233519, ppl: 10234.489258 +epoch: 0, batch: 299, sum loss: 16459.191406, avg loss: 9.262347, ppl: 10533.829102 +epoch: 0, batch: 300, sum loss: 18771.416016, avg loss: 9.233357, ppl: 10232.839844 +epoch: 0, batch: 301, sum loss: 15357.575195, avg loss: 9.240418, ppl: 10305.349609 +epoch: 0, batch: 302, sum loss: 13791.014648, avg loss: 9.261930, ppl: 10529.440430 +epoch: 0, batch: 303, sum loss: 15259.582031, avg loss: 9.225866, ppl: 10156.470703 +epoch: 0, batch: 304, sum loss: 17465.839844, avg loss: 9.250975, ppl: 10414.711914 +epoch: 0, batch: 305, sum loss: 14230.378906, avg loss: 9.234509, ppl: 10244.635742 +epoch: 0, batch: 306, sum loss: 15846.218750, avg loss: 9.202217, ppl: 9919.096680 +epoch: 0, batch: 307, sum loss: 16603.177734, avg loss: 9.223988, ppl: 10137.407227 +epoch: 0, batch: 308, sum loss: 16172.098633, avg loss: 9.209623, ppl: 9992.832031 +epoch: 0, batch: 309, sum loss: 16334.926758, avg loss: 9.218356, ppl: 10080.479492 +epoch: 0, batch: 310, sum loss: 14543.867188, avg loss: 9.199157, ppl: 9888.787109 +epoch: 0, batch: 311, sum loss: 15456.531250, avg loss: 9.200316, ppl: 9900.261719 +epoch: 0, batch: 312, sum loss: 17196.126953, avg loss: 9.181061, ppl: 9711.449219 +epoch: 0, batch: 313, sum loss: 14824.757812, avg loss: 9.219377, ppl: 10090.771484 +epoch: 0, batch: 314, sum loss: 16172.589844, avg loss: 9.183753, ppl: 9737.629883 +epoch: 0, batch: 315, sum loss: 15356.598633, avg loss: 9.179079, ppl: 9692.222656 +epoch: 0, batch: 316, sum loss: 16169.975586, avg loss: 9.166653, ppl: 9572.528320 +epoch: 0, batch: 317, sum loss: 14735.345703, avg loss: 9.124053, ppl: 9173.305664 +epoch: 0, batch: 318, sum loss: 15091.082031, avg loss: 9.173909, ppl: 9642.244141 +epoch: 0, batch: 319, sum loss: 14800.994141, avg loss: 9.170381, ppl: 9608.281250 +epoch: 0, batch: 320, sum loss: 16321.981445, avg loss: 9.133734, ppl: 9262.541016 +epoch: 0, batch: 321, sum loss: 14548.482422, avg loss: 9.115590, ppl: 9096.000977 +epoch: 0, batch: 322, sum loss: 15777.794922, avg loss: 9.151854, ppl: 9431.907227 +epoch: 0, batch: 323, sum loss: 14776.096680, avg loss: 9.149286, ppl: 9407.723633 +epoch: 0, batch: 324, sum loss: 15156.813477, avg loss: 9.136114, ppl: 9284.616211 +epoch: 0, batch: 325, sum loss: 16415.320312, avg loss: 9.114558, ppl: 9086.619141 +epoch: 0, batch: 326, sum loss: 16577.326172, avg loss: 9.123460, ppl: 9167.866211 +epoch: 0, batch: 327, sum loss: 16429.990234, avg loss: 9.062323, ppl: 8624.158203 +epoch: 0, batch: 328, sum loss: 14923.648438, avg loss: 9.083170, ppl: 8805.835938 +epoch: 0, batch: 329, sum loss: 14912.773438, avg loss: 9.143332, ppl: 9351.869141 +epoch: 0, batch: 330, sum loss: 14149.399414, avg loss: 9.111011, ppl: 9054.440430 +epoch: 0, batch: 331, sum loss: 14094.408203, avg loss: 9.075602, ppl: 8739.441406 +epoch: 0, batch: 332, sum loss: 13647.798828, avg loss: 9.135073, ppl: 9274.952148 +epoch: 0, batch: 333, sum loss: 15718.046875, avg loss: 9.090831, ppl: 8873.554688 +epoch: 0, batch: 334, sum loss: 16695.990234, avg loss: 9.108560, ppl: 9032.275391 +epoch: 0, batch: 335, sum loss: 14076.398438, avg loss: 9.069845, ppl: 8689.278320 +epoch: 0, batch: 336, sum loss: 16774.982422, avg loss: 9.097062, ppl: 8929.021484 +epoch: 0, batch: 337, sum loss: 16476.578125, avg loss: 9.098056, ppl: 8937.899414 +epoch: 0, batch: 338, sum loss: 13482.208008, avg loss: 9.121927, ppl: 9153.826172 +epoch: 0, batch: 339, sum loss: 14550.789062, avg loss: 9.071565, ppl: 8704.232422 +epoch: 0, batch: 340, sum loss: 14411.323242, avg loss: 9.075141, ppl: 8735.416992 +epoch: 0, batch: 341, sum loss: 15725.564453, avg loss: 9.042878, ppl: 8458.085938 +epoch: 0, batch: 342, sum loss: 15182.386719, avg loss: 9.058703, ppl: 8593.001953 +epoch: 0, batch: 343, sum loss: 13689.798828, avg loss: 9.012376, ppl: 8203.989258 +epoch: 0, batch: 344, sum loss: 14643.041016, avg loss: 9.033338, ppl: 8377.774414 +epoch: 0, batch: 345, sum loss: 13729.978516, avg loss: 9.015088, ppl: 8226.270508 +epoch: 0, batch: 346, sum loss: 13244.455078, avg loss: 9.028258, ppl: 8335.329102 +epoch: 0, batch: 347, sum loss: 14140.815430, avg loss: 9.053019, ppl: 8544.291016 +epoch: 0, batch: 348, sum loss: 13357.664062, avg loss: 8.982962, ppl: 7966.190430 +epoch: 0, batch: 349, sum loss: 14262.928711, avg loss: 9.050081, ppl: 8519.230469 +epoch: 0, batch: 350, sum loss: 13767.748047, avg loss: 9.016207, ppl: 8235.478516 +epoch: 0, batch: 351, sum loss: 14276.409180, avg loss: 8.990183, ppl: 8023.924316 +epoch: 0, batch: 352, sum loss: 15267.072266, avg loss: 8.985917, ppl: 7989.768555 +epoch: 0, batch: 353, sum loss: 18049.390625, avg loss: 8.997703, ppl: 8084.489258 +epoch: 0, batch: 354, sum loss: 15310.473633, avg loss: 9.022082, ppl: 8284.008789 +epoch: 0, batch: 355, sum loss: 13919.218750, avg loss: 8.997556, ppl: 8083.302246 +epoch: 0, batch: 356, sum loss: 14290.011719, avg loss: 8.976138, ppl: 7912.017578 +epoch: 0, batch: 357, sum loss: 14525.976562, avg loss: 9.005566, ppl: 8148.308594 +epoch: 0, batch: 358, sum loss: 16305.395508, avg loss: 8.973801, ppl: 7893.545410 +epoch: 0, batch: 359, sum loss: 13751.706055, avg loss: 8.918097, ppl: 7465.872070 +epoch: 0, batch: 360, sum loss: 13291.224609, avg loss: 8.926275, ppl: 7527.176270 +epoch: 0, batch: 361, sum loss: 14307.962891, avg loss: 8.981772, ppl: 7956.722168 +epoch: 0, batch: 362, sum loss: 18018.505859, avg loss: 8.889248, ppl: 7253.561523 +epoch: 0, batch: 363, sum loss: 13711.170898, avg loss: 8.932359, ppl: 7573.107422 +epoch: 0, batch: 364, sum loss: 14057.253906, avg loss: 8.919578, ppl: 7476.930176 +epoch: 0, batch: 365, sum loss: 14250.660156, avg loss: 8.878916, ppl: 7179.002930 +epoch: 0, batch: 366, sum loss: 16216.779297, avg loss: 8.915216, ppl: 7444.393066 +epoch: 0, batch: 367, sum loss: 15672.493164, avg loss: 8.940384, ppl: 7634.127441 +epoch: 0, batch: 368, sum loss: 14432.490234, avg loss: 8.930996, ppl: 7562.793945 +epoch: 0, batch: 369, sum loss: 15639.893555, avg loss: 8.921788, ppl: 7493.477051 +epoch: 0, batch: 370, sum loss: 16028.537109, avg loss: 8.944496, ppl: 7665.585449 +epoch: 0, batch: 371, sum loss: 15157.022461, avg loss: 8.900189, ppl: 7333.362305 +epoch: 0, batch: 372, sum loss: 14094.496094, avg loss: 8.920568, ppl: 7484.335449 +epoch: 0, batch: 373, sum loss: 15911.381836, avg loss: 8.884076, ppl: 7216.144531 +epoch: 0, batch: 374, sum loss: 16339.725586, avg loss: 8.880285, ppl: 7188.841309 +epoch: 0, batch: 375, sum loss: 14571.592773, avg loss: 8.863500, ppl: 7069.179199 +epoch: 0, batch: 376, sum loss: 14716.277344, avg loss: 8.828000, ppl: 6822.628418 +epoch: 0, batch: 377, sum loss: 14815.119141, avg loss: 8.850131, ppl: 6975.302734 +epoch: 0, batch: 378, sum loss: 17175.458984, avg loss: 8.885390, ppl: 7225.634277 +epoch: 0, batch: 379, sum loss: 15278.026367, avg loss: 8.841451, ppl: 6915.017090 +epoch: 0, batch: 380, sum loss: 12397.344727, avg loss: 8.855247, ppl: 7011.076660 +epoch: 0, batch: 381, sum loss: 15318.178711, avg loss: 8.864687, ppl: 7077.577637 +epoch: 0, batch: 382, sum loss: 12752.564453, avg loss: 8.825304, ppl: 6804.259277 +epoch: 0, batch: 383, sum loss: 16571.748047, avg loss: 8.852430, ppl: 6991.359863 +epoch: 0, batch: 384, sum loss: 16725.367188, avg loss: 8.788947, ppl: 6561.320312 +epoch: 0, batch: 385, sum loss: 15273.508789, avg loss: 8.828618, ppl: 6826.845703 +epoch: 0, batch: 386, sum loss: 13468.542969, avg loss: 8.849240, ppl: 6969.092773 +epoch: 0, batch: 387, sum loss: 14626.250977, avg loss: 8.805690, ppl: 6672.099121 +epoch: 0, batch: 388, sum loss: 14957.893555, avg loss: 8.819512, ppl: 6764.964844 +epoch: 0, batch: 389, sum loss: 16218.675781, avg loss: 8.862664, ppl: 7063.275879 +epoch: 0, batch: 390, sum loss: 13162.441406, avg loss: 8.740001, ppl: 6247.900391 +epoch: 0, batch: 391, sum loss: 13267.458984, avg loss: 8.780581, ppl: 6506.659668 +epoch: 0, batch: 392, sum loss: 14095.144531, avg loss: 8.792978, ppl: 6587.823242 +epoch: 0, batch: 393, sum loss: 13665.897461, avg loss: 8.765810, ppl: 6411.252930 +epoch: 0, batch: 394, sum loss: 12467.498047, avg loss: 8.810952, ppl: 6707.302734 +epoch: 0, batch: 395, sum loss: 14723.919922, avg loss: 8.748615, ppl: 6301.955566 +epoch: 0, batch: 396, sum loss: 16782.550781, avg loss: 8.704643, ppl: 6030.850098 +epoch: 0, batch: 397, sum loss: 16161.369141, avg loss: 8.764300, ppl: 6401.581543 +epoch: 0, batch: 398, sum loss: 15397.066406, avg loss: 8.728496, ppl: 6176.429199 +epoch: 0, batch: 399, sum loss: 13388.330078, avg loss: 8.705026, ppl: 6033.156738 +epoch: 0, batch: 400, sum loss: 15996.978516, avg loss: 8.751082, ppl: 6317.522461 +epoch: 0, batch: 401, sum loss: 14561.880859, avg loss: 8.683292, ppl: 5903.451172 +epoch: 0, batch: 402, sum loss: 13331.966797, avg loss: 8.748010, ppl: 6298.140137 +epoch: 0, batch: 403, sum loss: 16577.078125, avg loss: 8.724778, ppl: 6153.511719 +epoch: 0, batch: 404, sum loss: 15629.073242, avg loss: 8.716717, ppl: 6104.104980 +epoch: 0, batch: 405, sum loss: 13573.458984, avg loss: 8.706516, ppl: 6042.156738 +epoch: 0, batch: 406, sum loss: 12513.215820, avg loss: 8.659665, ppl: 5765.603516 +epoch: 0, batch: 407, sum loss: 14256.152344, avg loss: 8.655829, ppl: 5743.530762 +epoch: 0, batch: 408, sum loss: 13275.888672, avg loss: 8.677052, ppl: 5866.723145 +epoch: 0, batch: 409, sum loss: 15895.291016, avg loss: 8.685951, ppl: 5919.168457 +epoch: 0, batch: 410, sum loss: 14908.116211, avg loss: 8.657442, ppl: 5752.800781 +epoch: 0, batch: 411, sum loss: 14491.964844, avg loss: 8.657088, ppl: 5750.765625 +epoch: 0, batch: 412, sum loss: 15158.772461, avg loss: 8.593409, ppl: 5395.975098 +epoch: 0, batch: 413, sum loss: 17066.685547, avg loss: 8.685336, ppl: 5915.528320 +epoch: 0, batch: 414, sum loss: 14918.740234, avg loss: 8.678732, ppl: 5876.589844 +epoch: 0, batch: 415, sum loss: 16293.550781, avg loss: 8.657572, ppl: 5753.546875 +epoch: 0, batch: 416, sum loss: 15900.789062, avg loss: 8.618314, ppl: 5532.050293 +epoch: 0, batch: 417, sum loss: 15314.360352, avg loss: 8.661969, ppl: 5778.903320 +epoch: 0, batch: 418, sum loss: 16590.753906, avg loss: 8.587347, ppl: 5363.365723 +epoch: 0, batch: 419, sum loss: 17108.179688, avg loss: 8.601397, ppl: 5439.250488 +epoch: 0, batch: 420, sum loss: 13492.130859, avg loss: 8.626682, ppl: 5578.539551 +epoch: 0, batch: 421, sum loss: 12977.379883, avg loss: 8.605689, ppl: 5462.648438 +epoch: 0, batch: 422, sum loss: 20057.181641, avg loss: 8.626745, ppl: 5578.890625 +epoch: 0, batch: 423, sum loss: 13484.777344, avg loss: 8.583564, ppl: 5343.113281 +epoch: 0, batch: 424, sum loss: 13407.673828, avg loss: 8.605696, ppl: 5462.685059 +epoch: 0, batch: 425, sum loss: 15074.585938, avg loss: 8.628841, ppl: 5590.597168 +epoch: 0, batch: 426, sum loss: 13199.601562, avg loss: 8.610308, ppl: 5487.937012 +epoch: 0, batch: 427, sum loss: 17071.265625, avg loss: 8.617498, ppl: 5527.541504 +epoch: 0, batch: 428, sum loss: 15880.302734, avg loss: 8.565427, ppl: 5247.079102 +epoch: 0, batch: 429, sum loss: 13964.487305, avg loss: 8.588245, ppl: 5368.186523 +epoch: 0, batch: 430, sum loss: 16268.886719, avg loss: 8.603325, ppl: 5449.749512 +epoch: 0, batch: 431, sum loss: 14125.414062, avg loss: 8.571246, ppl: 5277.702637 +epoch: 0, batch: 432, sum loss: 15602.747070, avg loss: 8.587092, ppl: 5362.000488 +epoch: 0, batch: 433, sum loss: 13236.000000, avg loss: 8.561449, ppl: 5226.249023 +epoch: 0, batch: 434, sum loss: 13211.863281, avg loss: 8.523783, ppl: 5033.056641 +epoch: 0, batch: 435, sum loss: 12231.484375, avg loss: 8.583497, ppl: 5342.756836 +epoch: 0, batch: 436, sum loss: 14921.268555, avg loss: 8.497306, ppl: 4901.545410 +epoch: 0, batch: 437, sum loss: 14752.737305, avg loss: 8.522667, ppl: 5027.443848 +epoch: 0, batch: 438, sum loss: 13507.161133, avg loss: 8.516495, ppl: 4996.508789 +epoch: 0, batch: 439, sum loss: 13645.058594, avg loss: 8.528161, ppl: 5055.141113 +epoch: 0, batch: 440, sum loss: 15149.825195, avg loss: 8.487297, ppl: 4852.731445 +epoch: 0, batch: 441, sum loss: 13945.162109, avg loss: 8.503148, ppl: 4930.265137 +epoch: 0, batch: 442, sum loss: 12620.662109, avg loss: 8.481627, ppl: 4825.296387 +epoch: 0, batch: 443, sum loss: 14354.790039, avg loss: 8.458921, ppl: 4716.967773 +epoch: 0, batch: 444, sum loss: 12708.450195, avg loss: 8.466656, ppl: 4753.591797 +epoch: 0, batch: 445, sum loss: 13299.239258, avg loss: 8.492490, ppl: 4877.996094 +epoch: 0, batch: 446, sum loss: 14417.316406, avg loss: 8.495768, ppl: 4894.011719 +epoch: 0, batch: 447, sum loss: 14394.010742, avg loss: 8.502074, ppl: 4924.973633 +epoch: 0, batch: 448, sum loss: 14807.252930, avg loss: 8.446808, ppl: 4660.172852 +epoch: 0, batch: 449, sum loss: 13938.555664, avg loss: 8.381573, ppl: 4365.869629 +epoch: 0, batch: 450, sum loss: 14109.154297, avg loss: 8.479059, ppl: 4812.919922 +epoch: 0, batch: 451, sum loss: 12805.685547, avg loss: 8.458181, ppl: 4713.478027 +epoch: 0, batch: 452, sum loss: 13548.434570, avg loss: 8.483679, ppl: 4835.205078 +epoch: 0, batch: 453, sum loss: 13671.519531, avg loss: 8.454867, ppl: 4697.883301 +epoch: 0, batch: 454, sum loss: 16610.748047, avg loss: 8.440421, ppl: 4630.504395 +epoch: 0, batch: 455, sum loss: 14717.750000, avg loss: 8.439076, ppl: 4624.282227 +epoch: 0, batch: 456, sum loss: 16471.216797, avg loss: 8.399397, ppl: 4444.385742 +epoch: 0, batch: 457, sum loss: 15183.429688, avg loss: 8.430554, ppl: 4585.041504 +epoch: 0, batch: 458, sum loss: 16531.777344, avg loss: 8.421690, ppl: 4544.577148 +epoch: 0, batch: 459, sum loss: 14598.586914, avg loss: 8.423882, ppl: 4554.547852 +epoch: 0, batch: 460, sum loss: 11430.474609, avg loss: 8.355610, ppl: 4253.978516 +epoch: 0, batch: 461, sum loss: 14667.748047, avg loss: 8.352932, ppl: 4242.601562 +epoch: 0, batch: 462, sum loss: 16929.302734, avg loss: 8.409986, ppl: 4491.699707 +epoch: 0, batch: 463, sum loss: 13462.904297, avg loss: 8.367249, ppl: 4303.782227 +epoch: 0, batch: 464, sum loss: 14609.231445, avg loss: 8.396110, ppl: 4429.799316 +epoch: 0, batch: 465, sum loss: 13973.916992, avg loss: 8.337660, ppl: 4178.300293 +epoch: 0, batch: 466, sum loss: 14800.986328, avg loss: 8.395341, ppl: 4426.395508 +epoch: 0, batch: 467, sum loss: 13741.652344, avg loss: 8.358669, ppl: 4267.012695 +epoch: 0, batch: 468, sum loss: 14963.367188, avg loss: 8.336138, ppl: 4171.945801 +epoch: 0, batch: 469, sum loss: 12434.112305, avg loss: 8.333856, ppl: 4162.435547 +epoch: 0, batch: 470, sum loss: 13980.961914, avg loss: 8.331920, ppl: 4154.384766 +epoch: 0, batch: 471, sum loss: 13658.287109, avg loss: 8.318080, ppl: 4097.285645 +epoch: 0, batch: 472, sum loss: 14959.915039, avg loss: 8.334215, ppl: 4163.932129 +epoch: 0, batch: 473, sum loss: 15910.098633, avg loss: 8.321181, ppl: 4110.012207 +epoch: 0, batch: 474, sum loss: 15111.044922, avg loss: 8.330235, ppl: 4147.390137 +epoch: 0, batch: 475, sum loss: 13512.570312, avg loss: 8.315428, ppl: 4086.433105 +epoch: 0, batch: 476, sum loss: 14281.076172, avg loss: 8.327158, ppl: 4134.649902 +epoch: 0, batch: 477, sum loss: 14999.127930, avg loss: 8.295978, ppl: 4007.719238 +epoch: 0, batch: 478, sum loss: 14346.456055, avg loss: 8.292749, ppl: 3994.802490 +epoch: 0, batch: 479, sum loss: 15600.136719, avg loss: 8.293533, ppl: 3997.935303 +epoch: 0, batch: 480, sum loss: 13031.128906, avg loss: 8.247550, ppl: 3818.259766 +epoch: 0, batch: 481, sum loss: 13616.439453, avg loss: 8.227456, ppl: 3742.301514 +epoch: 0, batch: 482, sum loss: 15910.557617, avg loss: 8.291067, ppl: 3988.087646 +epoch: 0, batch: 483, sum loss: 17051.833984, avg loss: 8.253550, ppl: 3841.236328 +epoch: 0, batch: 484, sum loss: 14579.472656, avg loss: 8.232339, ppl: 3760.619141 +epoch: 0, batch: 485, sum loss: 16068.886719, avg loss: 8.282931, ppl: 3955.772949 +epoch: 0, batch: 486, sum loss: 13749.061523, avg loss: 8.242843, ppl: 3800.328125 +epoch: 0, batch: 487, sum loss: 14067.515625, avg loss: 8.221809, ppl: 3721.229492 +epoch: 0, batch: 488, sum loss: 14576.947266, avg loss: 8.203121, ppl: 3652.332031 +epoch: 0, batch: 489, sum loss: 13963.135742, avg loss: 8.223284, ppl: 3726.719971 +epoch: 0, batch: 490, sum loss: 12412.192383, avg loss: 8.176675, ppl: 3557.007324 +epoch: 0, batch: 491, sum loss: 13784.195312, avg loss: 8.199997, ppl: 3640.939209 +epoch: 0, batch: 492, sum loss: 14296.707031, avg loss: 8.249686, ppl: 3826.425049 +epoch: 0, batch: 493, sum loss: 15049.740234, avg loss: 8.250954, ppl: 3831.277832 +epoch: 0, batch: 494, sum loss: 13741.293945, avg loss: 8.189091, ppl: 3601.446045 +epoch: 0, batch: 495, sum loss: 13850.738281, avg loss: 8.142703, ppl: 3438.198975 +epoch: 0, batch: 496, sum loss: 11550.876953, avg loss: 8.180508, ppl: 3570.666992 +epoch: 0, batch: 497, sum loss: 14212.404297, avg loss: 8.158670, ppl: 3493.538574 +epoch: 0, batch: 498, sum loss: 13200.658203, avg loss: 8.188993, ppl: 3601.095703 +epoch: 0, batch: 499, sum loss: 12895.083008, avg loss: 8.145978, ppl: 3449.477295 +epoch: 0, batch: 500, sum loss: 14595.247070, avg loss: 8.167458, ppl: 3524.372070 +epoch: 0, batch: 501, sum loss: 13421.599609, avg loss: 8.154071, ppl: 3477.506592 +epoch: 0, batch: 502, sum loss: 13345.312500, avg loss: 8.132426, ppl: 3403.046143 +epoch: 0, batch: 503, sum loss: 13842.645508, avg loss: 8.147526, ppl: 3454.820557 +epoch: 0, batch: 504, sum loss: 14082.388672, avg loss: 8.121331, ppl: 3365.498047 +epoch: 0, batch: 505, sum loss: 12694.270508, avg loss: 8.090676, ppl: 3263.894287 +epoch: 0, batch: 506, sum loss: 14888.207031, avg loss: 8.104631, ppl: 3309.761475 +epoch: 0, batch: 507, sum loss: 14600.158203, avg loss: 8.124741, ppl: 3376.991943 +epoch: 0, batch: 508, sum loss: 12019.400391, avg loss: 8.137712, ppl: 3421.083008 +epoch: 0, batch: 509, sum loss: 13245.076172, avg loss: 8.100964, ppl: 3297.644043 +epoch: 0, batch: 510, sum loss: 14215.095703, avg loss: 8.164903, ppl: 3515.379150 +epoch: 0, batch: 511, sum loss: 15115.520508, avg loss: 8.083166, ppl: 3239.473633 +epoch: 0, batch: 512, sum loss: 15128.071289, avg loss: 8.142126, ppl: 3436.215820 +epoch: 0, batch: 513, sum loss: 14290.296875, avg loss: 8.110271, ppl: 3328.481445 +epoch: 0, batch: 514, sum loss: 15543.311523, avg loss: 8.116611, ppl: 3349.651123 +epoch: 0, batch: 515, sum loss: 11343.636719, avg loss: 8.011043, ppl: 3014.057861 +epoch: 0, batch: 516, sum loss: 11819.997070, avg loss: 8.040814, ppl: 3105.140869 +epoch: 0, batch: 517, sum loss: 13630.408203, avg loss: 8.074886, ppl: 3212.762207 +epoch: 0, batch: 518, sum loss: 14230.645508, avg loss: 8.085594, ppl: 3247.348877 +epoch: 0, batch: 519, sum loss: 11908.220703, avg loss: 8.111867, ppl: 3333.796143 +epoch: 0, batch: 520, sum loss: 14269.093750, avg loss: 8.116663, ppl: 3349.823730 +epoch: 0, batch: 521, sum loss: 12885.187500, avg loss: 8.119211, ppl: 3358.370605 +epoch: 0, batch: 522, sum loss: 15157.219727, avg loss: 8.040965, ppl: 3105.608887 +epoch: 0, batch: 523, sum loss: 13049.343750, avg loss: 8.030365, ppl: 3072.863037 +epoch: 0, batch: 524, sum loss: 13999.000000, avg loss: 7.999428, ppl: 2979.254150 +epoch: 0, batch: 525, sum loss: 13407.438477, avg loss: 8.038033, ppl: 3096.514893 +epoch: 0, batch: 526, sum loss: 14015.509766, avg loss: 8.041027, ppl: 3105.801514 +epoch: 0, batch: 527, sum loss: 13095.326172, avg loss: 8.038874, ppl: 3099.120605 +epoch: 0, batch: 528, sum loss: 13290.042969, avg loss: 8.020545, ppl: 3042.835205 +epoch: 0, batch: 529, sum loss: 13453.666992, avg loss: 7.960750, ppl: 2866.220703 +epoch: 0, batch: 530, sum loss: 16705.939453, avg loss: 7.962793, ppl: 2872.083008 +epoch: 0, batch: 531, sum loss: 14099.852539, avg loss: 8.029530, ppl: 3070.296875 +epoch: 0, batch: 532, sum loss: 12626.077148, avg loss: 7.955941, ppl: 2852.470459 +epoch: 0, batch: 533, sum loss: 13143.003906, avg loss: 7.979966, ppl: 2921.830811 +epoch: 0, batch: 534, sum loss: 12793.937500, avg loss: 8.041444, ppl: 3107.095947 +epoch: 0, batch: 535, sum loss: 13519.616211, avg loss: 7.952716, ppl: 2843.286377 +epoch: 0, batch: 536, sum loss: 14121.688477, avg loss: 7.991901, ppl: 2956.912598 +epoch: 0, batch: 537, sum loss: 12948.323242, avg loss: 7.938886, ppl: 2804.235352 +epoch: 0, batch: 538, sum loss: 13559.043945, avg loss: 7.938550, ppl: 2803.292725 +epoch: 0, batch: 539, sum loss: 14901.362305, avg loss: 7.981447, ppl: 2926.162842 +epoch: 0, batch: 540, sum loss: 14168.834961, avg loss: 7.862839, ppl: 2598.887451 +epoch: 0, batch: 541, sum loss: 12576.257812, avg loss: 7.909596, ppl: 2723.290039 +epoch: 0, batch: 542, sum loss: 13512.625000, avg loss: 7.943930, ppl: 2818.414307 +epoch: 0, batch: 543, sum loss: 13523.624023, avg loss: 7.908552, ppl: 2720.448975 +epoch: 0, batch: 544, sum loss: 15252.537109, avg loss: 7.906966, ppl: 2716.136475 +epoch: 0, batch: 545, sum loss: 13334.208984, avg loss: 7.904096, ppl: 2708.352051 +epoch: 0, batch: 546, sum loss: 13729.884766, avg loss: 7.904367, ppl: 2709.086914 +epoch: 0, batch: 547, sum loss: 11530.892578, avg loss: 7.881677, ppl: 2648.309082 +epoch: 0, batch: 548, sum loss: 14259.810547, avg loss: 7.852318, ppl: 2571.689209 +epoch: 0, batch: 549, sum loss: 11705.762695, avg loss: 7.893300, ppl: 2679.269775 +epoch: 0, batch: 550, sum loss: 12704.639648, avg loss: 7.799042, ppl: 2438.265625 +epoch: 0, batch: 551, sum loss: 15301.986328, avg loss: 7.928491, ppl: 2775.234863 +epoch: 0, batch: 552, sum loss: 13796.097656, avg loss: 7.820917, ppl: 2492.189941 +epoch: 0, batch: 553, sum loss: 12698.312500, avg loss: 7.862732, ppl: 2598.611084 +epoch: 0, batch: 554, sum loss: 15097.945312, avg loss: 7.871713, ppl: 2622.053711 +epoch: 0, batch: 555, sum loss: 13253.460938, avg loss: 7.805336, ppl: 2453.660889 +epoch: 0, batch: 556, sum loss: 12340.848633, avg loss: 7.845422, ppl: 2554.014648 +epoch: 0, batch: 557, sum loss: 13659.498047, avg loss: 7.792070, ppl: 2421.324219 +epoch: 0, batch: 558, sum loss: 13067.733398, avg loss: 7.876873, ppl: 2635.619385 +epoch: 0, batch: 559, sum loss: 13053.513672, avg loss: 7.811798, ppl: 2469.566895 +epoch: 0, batch: 560, sum loss: 13920.017578, avg loss: 7.846684, ppl: 2557.239258 +epoch: 0, batch: 561, sum loss: 13626.386719, avg loss: 7.831257, ppl: 2518.092285 +epoch: 0, batch: 562, sum loss: 14046.816406, avg loss: 7.812467, ppl: 2471.219727 +epoch: 0, batch: 563, sum loss: 12819.871094, avg loss: 7.764913, ppl: 2356.453613 +epoch: 0, batch: 564, sum loss: 13645.563477, avg loss: 7.784121, ppl: 2402.153809 +epoch: 0, batch: 565, sum loss: 13383.187500, avg loss: 7.735946, ppl: 2289.173584 +epoch: 0, batch: 566, sum loss: 15101.138672, avg loss: 7.792126, ppl: 2421.459473 +epoch: 0, batch: 567, sum loss: 13292.207031, avg loss: 7.823548, ppl: 2498.754883 +epoch: 0, batch: 568, sum loss: 14693.151367, avg loss: 7.757736, ppl: 2339.601074 +epoch: 0, batch: 569, sum loss: 11711.871094, avg loss: 7.761346, ppl: 2348.063721 +epoch: 0, batch: 570, sum loss: 12955.390625, avg loss: 7.767021, ppl: 2361.425293 +epoch: 0, batch: 571, sum loss: 13218.779297, avg loss: 7.739332, ppl: 2296.936768 +epoch: 0, batch: 572, sum loss: 12146.626953, avg loss: 7.746573, ppl: 2313.629883 +epoch: 0, batch: 573, sum loss: 12106.941406, avg loss: 7.815972, ppl: 2479.896973 +epoch: 0, batch: 574, sum loss: 14617.793945, avg loss: 7.771288, ppl: 2371.524902 +epoch: 0, batch: 575, sum loss: 13176.442383, avg loss: 7.759978, ppl: 2344.853760 +epoch: 0, batch: 576, sum loss: 12856.677734, avg loss: 7.680214, ppl: 2165.083984 +epoch: 0, batch: 577, sum loss: 12214.648438, avg loss: 7.696692, ppl: 2201.053711 +epoch: 0, batch: 578, sum loss: 13992.702148, avg loss: 7.688298, ppl: 2182.656982 +epoch: 0, batch: 579, sum loss: 11611.683594, avg loss: 7.700055, ppl: 2208.469727 +epoch: 0, batch: 580, sum loss: 10602.103516, avg loss: 7.643910, ppl: 2087.891357 +epoch: 0, batch: 581, sum loss: 10844.233398, avg loss: 7.647555, ppl: 2095.516602 +epoch: 0, batch: 582, sum loss: 14474.625977, avg loss: 7.674776, ppl: 2153.340332 +epoch: 0, batch: 583, sum loss: 14353.044922, avg loss: 7.691878, ppl: 2190.484131 +epoch: 0, batch: 584, sum loss: 11368.035156, avg loss: 7.686298, ppl: 2178.295410 +epoch: 0, batch: 585, sum loss: 12781.803711, avg loss: 7.713822, ppl: 2239.084473 +epoch: 0, batch: 586, sum loss: 13811.006836, avg loss: 7.651527, ppl: 2103.856689 +epoch: 0, batch: 587, sum loss: 14831.431641, avg loss: 7.664822, ppl: 2132.013428 +epoch: 0, batch: 588, sum loss: 12904.413086, avg loss: 7.708730, ppl: 2227.710693 +epoch: 0, batch: 589, sum loss: 13721.087891, avg loss: 7.686884, ppl: 2179.573242 +epoch: 0, batch: 590, sum loss: 13545.054688, avg loss: 7.643936, ppl: 2087.946045 +epoch: 0, batch: 591, sum loss: 14878.079102, avg loss: 7.618064, ppl: 2034.619141 +epoch: 0, batch: 592, sum loss: 13289.394531, avg loss: 7.598281, ppl: 1994.764771 +epoch: 0, batch: 593, sum loss: 13341.901367, avg loss: 7.703176, ppl: 2215.374023 +epoch: 0, batch: 594, sum loss: 13390.997070, avg loss: 7.591268, ppl: 1980.823730 +epoch: 0, batch: 595, sum loss: 13849.155273, avg loss: 7.689703, ppl: 2185.725342 +epoch: 0, batch: 596, sum loss: 13341.308594, avg loss: 7.610559, ppl: 2019.405640 +epoch: 0, batch: 597, sum loss: 13955.908203, avg loss: 7.597120, ppl: 1992.448975 +epoch: 0, batch: 598, sum loss: 12712.081055, avg loss: 7.602920, ppl: 2004.038330 +epoch: 0, batch: 599, sum loss: 12668.626953, avg loss: 7.636303, ppl: 2072.069092 +epoch: 0, batch: 600, sum loss: 13952.105469, avg loss: 7.682877, ppl: 2170.856445 +epoch: 0, batch: 601, sum loss: 14262.976562, avg loss: 7.610980, ppl: 2020.257080 +epoch: 0, batch: 602, sum loss: 10912.316406, avg loss: 7.557005, ppl: 1914.104980 +epoch: 0, batch: 603, sum loss: 13366.813477, avg loss: 7.590467, ppl: 1979.238525 +epoch: 0, batch: 604, sum loss: 11792.760742, avg loss: 7.583769, ppl: 1966.025635 +epoch: 0, batch: 605, sum loss: 13634.455078, avg loss: 7.516238, ppl: 1837.640625 +epoch: 0, batch: 606, sum loss: 13661.157227, avg loss: 7.535112, ppl: 1872.653931 +epoch: 0, batch: 607, sum loss: 12738.996094, avg loss: 7.605371, ppl: 2008.958008 +epoch: 0, batch: 608, sum loss: 12623.498047, avg loss: 7.522943, ppl: 1850.003052 +epoch: 0, batch: 609, sum loss: 13664.338867, avg loss: 7.520275, ppl: 1845.073975 +epoch: 0, batch: 610, sum loss: 12083.660156, avg loss: 7.528760, ppl: 1860.796631 +epoch: 0, batch: 611, sum loss: 14444.168945, avg loss: 7.519089, ppl: 1842.887207 +epoch: 0, batch: 612, sum loss: 10838.210938, avg loss: 7.558027, ppl: 1916.061890 +epoch: 0, batch: 613, sum loss: 12849.227539, avg loss: 7.474827, ppl: 1763.097168 +epoch: 0, batch: 614, sum loss: 12440.556641, avg loss: 7.449435, ppl: 1718.892090 +epoch: 0, batch: 615, sum loss: 13204.742188, avg loss: 7.580219, ppl: 1959.058472 +epoch: 0, batch: 616, sum loss: 12661.777344, avg loss: 7.478900, ppl: 1770.292358 +epoch: 0, batch: 617, sum loss: 11981.607422, avg loss: 7.573709, ppl: 1946.344971 +epoch: 0, batch: 618, sum loss: 14499.817383, avg loss: 7.489575, ppl: 1789.291260 +epoch: 0, batch: 619, sum loss: 13827.583008, avg loss: 7.490565, ppl: 1791.063477 +epoch: 0, batch: 620, sum loss: 12419.946289, avg loss: 7.481895, ppl: 1775.603149 +epoch: 0, batch: 621, sum loss: 11881.317383, avg loss: 7.411926, ppl: 1655.611694 +epoch: 0, batch: 622, sum loss: 10813.046875, avg loss: 7.588103, ppl: 1974.563843 +epoch: 0, batch: 623, sum loss: 11570.592773, avg loss: 7.436113, ppl: 1696.145142 +epoch: 0, batch: 624, sum loss: 14285.148438, avg loss: 7.471312, ppl: 1756.909424 +epoch: 0, batch: 625, sum loss: 12299.349609, avg loss: 7.463198, ppl: 1742.711792 +epoch: 0, batch: 626, sum loss: 12099.634766, avg loss: 7.455104, ppl: 1728.664429 +epoch: 0, batch: 627, sum loss: 12578.552734, avg loss: 7.469450, ppl: 1753.641113 +epoch: 0, batch: 628, sum loss: 13502.491211, avg loss: 7.447596, ppl: 1715.733643 +epoch: 0, batch: 629, sum loss: 12051.849609, avg loss: 7.467069, ppl: 1749.471680 +epoch: 0, batch: 630, sum loss: 14249.367188, avg loss: 7.444810, ppl: 1710.960815 +epoch: 0, batch: 631, sum loss: 11767.305664, avg loss: 7.419487, ppl: 1668.176758 +epoch: 0, batch: 632, sum loss: 15041.412109, avg loss: 7.479569, ppl: 1771.477051 +epoch: 0, batch: 633, sum loss: 12981.920898, avg loss: 7.517036, ppl: 1839.108032 +epoch: 0, batch: 634, sum loss: 12826.903320, avg loss: 7.431578, ppl: 1688.469360 +epoch: 0, batch: 635, sum loss: 10416.904297, avg loss: 7.488788, ppl: 1787.884033 +epoch: 0, batch: 636, sum loss: 13943.250000, avg loss: 7.456284, ppl: 1730.704102 +epoch: 0, batch: 637, sum loss: 13503.439453, avg loss: 7.374899, ppl: 1595.430542 +epoch: 0, batch: 638, sum loss: 11398.167969, avg loss: 7.396605, ppl: 1630.439697 +epoch: 0, batch: 639, sum loss: 13057.939453, avg loss: 7.394076, ppl: 1626.322021 +epoch: 0, batch: 640, sum loss: 13548.747070, avg loss: 7.460763, ppl: 1738.474854 +epoch: 0, batch: 641, sum loss: 11595.703125, avg loss: 7.348354, ppl: 1553.637695 +epoch: 0, batch: 642, sum loss: 13692.115234, avg loss: 7.409153, ppl: 1651.028076 +epoch: 0, batch: 643, sum loss: 13561.025391, avg loss: 7.426630, ppl: 1680.135254 +epoch: 0, batch: 644, sum loss: 11744.130859, avg loss: 7.381603, ppl: 1606.162720 +epoch: 0, batch: 645, sum loss: 12588.577148, avg loss: 7.413768, ppl: 1658.664185 +epoch: 0, batch: 646, sum loss: 12339.111328, avg loss: 7.292619, ppl: 1469.414429 +epoch: 0, batch: 647, sum loss: 10445.690430, avg loss: 7.345774, ppl: 1549.633423 +epoch: 0, batch: 648, sum loss: 11885.074219, avg loss: 7.340997, ppl: 1542.249268 +epoch: 0, batch: 649, sum loss: 12091.411133, avg loss: 7.314828, ppl: 1502.413208 +epoch: 0, batch: 650, sum loss: 13411.045898, avg loss: 7.324439, ppl: 1516.921997 +epoch: 0, batch: 651, sum loss: 12358.969727, avg loss: 7.374087, ppl: 1594.136230 +epoch: 0, batch: 652, sum loss: 11712.315430, avg loss: 7.261200, ppl: 1423.964233 +epoch: 0, batch: 653, sum loss: 11063.195312, avg loss: 7.321771, ppl: 1512.880371 +epoch: 0, batch: 654, sum loss: 13332.950195, avg loss: 7.269875, ppl: 1436.370239 +epoch: 0, batch: 655, sum loss: 12098.973633, avg loss: 7.288538, ppl: 1463.430298 +epoch: 0, batch: 656, sum loss: 13222.891602, avg loss: 7.366514, ppl: 1582.108521 +epoch: 0, batch: 657, sum loss: 11279.366211, avg loss: 7.281708, ppl: 1453.468750 +epoch: 0, batch: 658, sum loss: 11965.865234, avg loss: 7.314099, ppl: 1501.318237 +epoch: 0, batch: 659, sum loss: 10949.539062, avg loss: 7.314321, ppl: 1501.651855 +epoch: 0, batch: 660, sum loss: 13118.118164, avg loss: 7.283797, ppl: 1456.507568 +epoch: 0, batch: 661, sum loss: 13437.121094, avg loss: 7.322682, ppl: 1514.260376 +epoch: 0, batch: 662, sum loss: 12206.376953, avg loss: 7.209910, ppl: 1352.770386 +epoch: 0, batch: 663, sum loss: 12978.956055, avg loss: 7.316209, ppl: 1504.490112 +epoch: 0, batch: 664, sum loss: 12446.906250, avg loss: 7.308812, ppl: 1493.401489 +epoch: 0, batch: 665, sum loss: 12979.227539, avg loss: 7.283517, ppl: 1456.100708 +epoch: 0, batch: 666, sum loss: 11772.870117, avg loss: 7.231493, ppl: 1382.284058 +epoch: 0, batch: 667, sum loss: 11787.129883, avg loss: 7.182894, ppl: 1316.713623 +epoch: 0, batch: 668, sum loss: 11710.884766, avg loss: 7.136432, ppl: 1256.935303 +epoch: 0, batch: 669, sum loss: 13537.763672, avg loss: 7.189466, ppl: 1325.394653 +epoch: 0, batch: 670, sum loss: 11034.024414, avg loss: 7.331578, ppl: 1527.791138 +epoch: 0, batch: 671, sum loss: 12589.763672, avg loss: 7.218901, ppl: 1364.987671 +epoch: 0, batch: 672, sum loss: 12327.083008, avg loss: 7.238451, ppl: 1391.936890 +epoch: 0, batch: 673, sum loss: 11791.133789, avg loss: 7.296494, ppl: 1475.118408 +epoch: 0, batch: 674, sum loss: 11476.264648, avg loss: 7.217776, ppl: 1363.453735 +epoch: 0, batch: 675, sum loss: 13084.914062, avg loss: 7.221255, ppl: 1368.204834 +epoch: 0, batch: 676, sum loss: 11033.890625, avg loss: 7.216410, ppl: 1361.592407 +epoch: 0, batch: 677, sum loss: 9798.366211, avg loss: 7.220609, ppl: 1367.321777 +epoch: 0, batch: 678, sum loss: 13068.959961, avg loss: 7.220420, ppl: 1367.063599 +epoch: 0, batch: 679, sum loss: 13730.363281, avg loss: 7.211326, ppl: 1354.686890 +epoch: 0, batch: 680, sum loss: 13219.945312, avg loss: 7.204330, ppl: 1345.243652 +epoch: 0, batch: 681, sum loss: 11476.017578, avg loss: 7.254119, ppl: 1413.917358 +epoch: 0, batch: 682, sum loss: 13078.706055, avg loss: 7.194008, ppl: 1331.428711 +epoch: 0, batch: 683, sum loss: 11959.009766, avg loss: 7.239110, ppl: 1392.854492 +epoch: 0, batch: 684, sum loss: 11730.957031, avg loss: 7.188087, ppl: 1323.568115 +epoch: 0, batch: 685, sum loss: 12914.579102, avg loss: 7.194752, ppl: 1332.419556 +epoch: 0, batch: 686, sum loss: 11398.631836, avg loss: 7.182503, ppl: 1316.198853 +epoch: 0, batch: 687, sum loss: 10720.432617, avg loss: 7.057559, ppl: 1161.606201 +epoch: 0, batch: 688, sum loss: 13309.134766, avg loss: 7.163151, ppl: 1290.972168 +epoch: 0, batch: 689, sum loss: 11734.939453, avg loss: 7.125039, ppl: 1242.696167 +epoch: 0, batch: 690, sum loss: 12447.305664, avg loss: 7.100574, ppl: 1212.662964 +epoch: 0, batch: 691, sum loss: 12395.562500, avg loss: 7.152661, ppl: 1277.501221 +epoch: 0, batch: 692, sum loss: 12552.521484, avg loss: 7.164681, ppl: 1292.949707 +epoch: 0, batch: 693, sum loss: 14139.906250, avg loss: 7.105481, ppl: 1218.627686 +epoch: 0, batch: 694, sum loss: 11897.891602, avg loss: 7.111711, ppl: 1226.243286 +epoch: 0, batch: 695, sum loss: 12069.647461, avg loss: 7.066538, ppl: 1172.083130 +epoch: 0, batch: 696, sum loss: 12110.665039, avg loss: 7.069857, ppl: 1175.979492 +epoch: 0, batch: 697, sum loss: 11963.242188, avg loss: 7.062126, ppl: 1166.923584 +epoch: 0, batch: 698, sum loss: 11396.892578, avg loss: 7.123058, ppl: 1240.237061 +epoch: 0, batch: 699, sum loss: 12348.675781, avg loss: 7.101021, ppl: 1213.205444 +epoch: 0, batch: 700, sum loss: 12959.616211, avg loss: 7.089506, ppl: 1199.314819 +epoch: 0, batch: 701, sum loss: 13571.208008, avg loss: 7.150268, ppl: 1274.447510 +epoch: 0, batch: 702, sum loss: 10682.562500, avg loss: 7.093335, ppl: 1203.916382 +epoch: 0, batch: 703, sum loss: 12392.098633, avg loss: 7.109638, ppl: 1223.704224 +epoch: 0, batch: 704, sum loss: 11537.041016, avg loss: 7.112849, ppl: 1227.639771 +epoch: 0, batch: 705, sum loss: 11629.063477, avg loss: 7.069339, ppl: 1175.371216 +epoch: 0, batch: 706, sum loss: 12315.783203, avg loss: 7.118950, ppl: 1235.152710 +epoch: 0, batch: 707, sum loss: 13186.687500, avg loss: 7.078200, ppl: 1185.832520 +epoch: 0, batch: 708, sum loss: 12162.317383, avg loss: 7.001910, ppl: 1098.729980 +epoch: 0, batch: 709, sum loss: 11333.876953, avg loss: 7.096980, ppl: 1208.312622 +epoch: 0, batch: 710, sum loss: 11759.041992, avg loss: 7.041343, ppl: 1142.921265 +epoch: 0, batch: 711, sum loss: 12107.818359, avg loss: 7.072324, ppl: 1178.884399 +epoch: 0, batch: 712, sum loss: 11848.938477, avg loss: 6.998783, ppl: 1095.298950 +epoch: 0, batch: 713, sum loss: 11109.335938, avg loss: 7.017900, ppl: 1116.439575 +epoch: 0, batch: 714, sum loss: 12616.167969, avg loss: 6.981830, ppl: 1076.887451 +epoch: 0, batch: 715, sum loss: 12736.940430, avg loss: 7.099744, ppl: 1211.656616 +epoch: 0, batch: 716, sum loss: 11837.958008, avg loss: 7.008856, ppl: 1106.388428 +epoch: 0, batch: 717, sum loss: 10400.970703, avg loss: 7.004020, ppl: 1101.050781 +epoch: 0, batch: 718, sum loss: 12228.407227, avg loss: 7.007683, ppl: 1105.091309 +epoch: 0, batch: 719, sum loss: 11152.416992, avg loss: 6.983355, ppl: 1078.530273 +epoch: 0, batch: 720, sum loss: 11541.510742, avg loss: 7.011854, ppl: 1109.709717 +epoch: 0, batch: 721, sum loss: 12012.191406, avg loss: 7.008280, ppl: 1105.750732 +epoch: 0, batch: 722, sum loss: 11405.023438, avg loss: 7.035795, ppl: 1136.597900 +epoch: 0, batch: 723, sum loss: 13769.234375, avg loss: 7.043087, ppl: 1144.916016 +epoch: 0, batch: 724, sum loss: 12662.468750, avg loss: 7.003578, ppl: 1100.563599 +epoch: 0, batch: 725, sum loss: 12025.712891, avg loss: 6.995761, ppl: 1091.994263 +epoch: 0, batch: 726, sum loss: 11890.757812, avg loss: 6.921279, ppl: 1013.616028 +epoch: 0, batch: 727, sum loss: 11231.777344, avg loss: 7.033048, ppl: 1133.480347 +epoch: 0, batch: 728, sum loss: 12715.774414, avg loss: 7.044750, ppl: 1146.822388 +epoch: 0, batch: 729, sum loss: 13112.129883, avg loss: 6.893865, ppl: 986.205383 +epoch: 0, batch: 730, sum loss: 10495.488281, avg loss: 6.987675, ppl: 1083.200317 +epoch: 0, batch: 731, sum loss: 10685.988281, avg loss: 6.966094, ppl: 1060.073975 +epoch: 0, batch: 732, sum loss: 10685.766602, avg loss: 7.002469, ppl: 1099.343628 +epoch: 0, batch: 733, sum loss: 13482.770508, avg loss: 6.971443, ppl: 1065.759155 +epoch: 0, batch: 734, sum loss: 11640.036133, avg loss: 6.831007, ppl: 926.122925 +epoch: 0, batch: 735, sum loss: 11686.440430, avg loss: 6.910964, ppl: 1003.214355 +epoch: 0, batch: 736, sum loss: 10771.536133, avg loss: 6.999049, ppl: 1095.590454 +epoch: 0, batch: 737, sum loss: 11881.565430, avg loss: 6.956420, ppl: 1049.868286 +epoch: 0, batch: 738, sum loss: 11585.213867, avg loss: 6.962268, ppl: 1056.026245 +epoch: 0, batch: 739, sum loss: 12745.603516, avg loss: 6.930726, ppl: 1023.236145 +epoch: 0, batch: 740, sum loss: 12356.058594, avg loss: 6.984771, ppl: 1080.058838 +epoch: 0, batch: 741, sum loss: 11470.660156, avg loss: 6.864549, ppl: 957.713501 +epoch: 0, batch: 742, sum loss: 8921.735352, avg loss: 6.953808, ppl: 1047.129395 +epoch: 0, batch: 743, sum loss: 10484.634766, avg loss: 6.884199, ppl: 976.719116 +epoch: 0, batch: 744, sum loss: 11716.224609, avg loss: 6.859616, ppl: 953.000854 +epoch: 0, batch: 745, sum loss: 10284.476562, avg loss: 6.856318, ppl: 949.862793 +epoch: 0, batch: 746, sum loss: 11005.333008, avg loss: 6.912898, ppl: 1005.155579 +epoch: 0, batch: 747, sum loss: 13253.453125, avg loss: 6.888489, ppl: 980.918396 +epoch: 0, batch: 748, sum loss: 11854.900391, avg loss: 7.023046, ppl: 1122.199097 +epoch: 0, batch: 749, sum loss: 11209.645508, avg loss: 6.851862, ppl: 945.640015 +epoch: 0, batch: 750, sum loss: 12893.832031, avg loss: 6.876710, ppl: 969.432068 +epoch: 0, batch: 751, sum loss: 12817.139648, avg loss: 6.828524, ppl: 923.826355 +epoch: 0, batch: 752, sum loss: 12414.098633, avg loss: 6.962479, ppl: 1056.248413 +epoch: 0, batch: 753, sum loss: 12440.928711, avg loss: 6.877241, ppl: 969.946716 +epoch: 0, batch: 754, sum loss: 13286.730469, avg loss: 6.967347, ppl: 1061.403320 +epoch: 0, batch: 755, sum loss: 11372.455078, avg loss: 6.947132, ppl: 1040.162354 +epoch: 0, batch: 756, sum loss: 11237.040039, avg loss: 6.864411, ppl: 957.581543 +epoch: 0, batch: 757, sum loss: 10910.656250, avg loss: 6.840537, ppl: 934.991150 +epoch: 0, batch: 758, sum loss: 10747.683594, avg loss: 6.819596, ppl: 915.614868 +epoch: 0, batch: 759, sum loss: 12050.878906, avg loss: 6.894096, ppl: 986.433472 +epoch: 0, batch: 760, sum loss: 11548.857422, avg loss: 6.837689, ppl: 932.332397 +epoch: 0, batch: 761, sum loss: 11489.677734, avg loss: 6.814755, ppl: 911.193665 +epoch: 0, batch: 762, sum loss: 11781.986328, avg loss: 6.842036, ppl: 936.393921 +epoch: 0, batch: 763, sum loss: 12587.268555, avg loss: 6.781933, ppl: 881.771790 +epoch: 0, batch: 764, sum loss: 11727.906250, avg loss: 6.870478, ppl: 963.409119 +epoch: 0, batch: 765, sum loss: 11318.862305, avg loss: 6.927088, ppl: 1019.521057 +epoch: 0, batch: 766, sum loss: 11134.400391, avg loss: 6.839312, ppl: 933.846497 +epoch: 0, batch: 767, sum loss: 12226.424805, avg loss: 6.758666, ppl: 861.491821 +epoch: 0, batch: 768, sum loss: 12086.884766, avg loss: 6.840343, ppl: 934.809265 +epoch: 0, batch: 769, sum loss: 11606.958008, avg loss: 6.819599, ppl: 915.617920 +epoch: 0, batch: 770, sum loss: 13244.871094, avg loss: 6.816712, ppl: 912.978516 +epoch: 0, batch: 771, sum loss: 12746.295898, avg loss: 6.923572, ppl: 1015.942505 +epoch: 0, batch: 772, sum loss: 11234.277344, avg loss: 6.833502, ppl: 928.436340 +epoch: 0, batch: 773, sum loss: 11047.653320, avg loss: 6.732269, ppl: 839.048767 +epoch: 0, batch: 774, sum loss: 9675.275391, avg loss: 6.925752, ppl: 1018.159302 +epoch: 0, batch: 775, sum loss: 11930.279297, avg loss: 6.825103, ppl: 920.671021 +epoch: 0, batch: 776, sum loss: 10730.443359, avg loss: 6.740228, ppl: 845.753723 +epoch: 0, batch: 777, sum loss: 12850.689453, avg loss: 6.846398, ppl: 940.487488 +epoch: 0, batch: 778, sum loss: 13373.830078, avg loss: 6.781861, ppl: 881.707886 +epoch: 0, batch: 779, sum loss: 10698.291016, avg loss: 6.818541, ppl: 914.649597 +epoch: 0, batch: 780, sum loss: 12296.281250, avg loss: 6.861764, ppl: 955.050720 +epoch: 0, batch: 781, sum loss: 11986.246094, avg loss: 6.749012, ppl: 853.215759 +epoch: 0, batch: 782, sum loss: 10373.826172, avg loss: 6.749399, ppl: 853.545776 +epoch: 0, batch: 783, sum loss: 10654.226562, avg loss: 6.794787, ppl: 893.179382 +epoch: 0, batch: 784, sum loss: 11098.785156, avg loss: 6.893655, ppl: 985.998962 +epoch: 0, batch: 785, sum loss: 11312.162109, avg loss: 6.757565, ppl: 860.543823 +epoch: 0, batch: 786, sum loss: 11149.251953, avg loss: 6.732640, ppl: 839.360107 +epoch: 0, batch: 787, sum loss: 10684.144531, avg loss: 6.766399, ppl: 868.180298 +epoch: 0, batch: 788, sum loss: 11536.129883, avg loss: 6.750222, ppl: 854.248535 +epoch: 0, batch: 789, sum loss: 11307.364258, avg loss: 6.686792, ppl: 801.746033 +epoch: 0, batch: 790, sum loss: 12029.681641, avg loss: 6.765851, ppl: 867.703918 +epoch: 0, batch: 791, sum loss: 11129.039062, avg loss: 6.790140, ppl: 889.037720 +epoch: 0, batch: 792, sum loss: 10554.097656, avg loss: 6.726640, ppl: 834.338989 +epoch: 0, batch: 793, sum loss: 9021.453125, avg loss: 6.657899, ppl: 778.912659 +epoch: 0, batch: 794, sum loss: 10503.812500, avg loss: 6.715992, ppl: 825.502625 +epoch: 0, batch: 795, sum loss: 13106.993164, avg loss: 6.742281, ppl: 847.491638 +epoch: 0, batch: 796, sum loss: 11847.670898, avg loss: 6.708760, ppl: 819.553955 +epoch: 0, batch: 797, sum loss: 12012.348633, avg loss: 6.759903, ppl: 862.558899 +epoch: 0, batch: 798, sum loss: 10435.666016, avg loss: 6.843060, ppl: 937.352600 +epoch: 0, batch: 799, sum loss: 11103.763672, avg loss: 6.717340, ppl: 826.615784 +epoch: 0, batch: 800, sum loss: 12550.737305, avg loss: 6.758609, ppl: 861.442932 +epoch: 0, batch: 801, sum loss: 11684.455078, avg loss: 6.692128, ppl: 806.035828 +epoch: 0, batch: 802, sum loss: 13361.238281, avg loss: 6.700721, ppl: 812.992004 +epoch: 0, batch: 803, sum loss: 11891.169922, avg loss: 6.691710, ppl: 805.699219 +epoch: 0, batch: 804, sum loss: 13790.711914, avg loss: 6.697772, ppl: 810.597412 +epoch: 0, batch: 805, sum loss: 11153.453125, avg loss: 6.727053, ppl: 834.683594 +epoch: 0, batch: 806, sum loss: 10476.861328, avg loss: 6.548038, ppl: 697.873596 +epoch: 0, batch: 807, sum loss: 10748.212891, avg loss: 6.781207, ppl: 881.131287 +epoch: 0, batch: 808, sum loss: 11552.509766, avg loss: 6.677751, ppl: 794.530273 +epoch: 0, batch: 809, sum loss: 9893.624023, avg loss: 6.762559, ppl: 864.852905 +epoch: 0, batch: 810, sum loss: 13354.048828, avg loss: 6.700476, ppl: 812.792786 +epoch: 0, batch: 811, sum loss: 13375.257812, avg loss: 6.654357, ppl: 776.159058 +epoch: 0, batch: 812, sum loss: 11719.507812, avg loss: 6.786050, ppl: 885.409119 +epoch: 0, batch: 813, sum loss: 10393.007812, avg loss: 6.683607, ppl: 799.196350 +epoch: 0, batch: 814, sum loss: 11602.772461, avg loss: 6.805145, ppl: 902.478882 +epoch: 0, batch: 815, sum loss: 11204.893555, avg loss: 6.677529, ppl: 794.353699 +epoch: 0, batch: 816, sum loss: 9662.484375, avg loss: 6.586560, ppl: 725.281799 +epoch: 0, batch: 817, sum loss: 10473.742188, avg loss: 6.679682, ppl: 796.066101 +epoch: 0, batch: 818, sum loss: 11199.064453, avg loss: 6.646329, ppl: 769.952576 +epoch: 0, batch: 819, sum loss: 10647.796875, avg loss: 6.671552, ppl: 789.619934 +epoch: 0, batch: 820, sum loss: 12551.831055, avg loss: 6.683616, ppl: 799.203918 +epoch: 0, batch: 821, sum loss: 11057.015625, avg loss: 6.701221, ppl: 813.398743 +epoch: 0, batch: 822, sum loss: 11438.574219, avg loss: 6.756394, ppl: 859.537048 +epoch: 0, batch: 823, sum loss: 9770.750000, avg loss: 6.633232, ppl: 759.934021 +epoch: 0, batch: 824, sum loss: 11136.100586, avg loss: 6.644452, ppl: 768.508484 +epoch: 0, batch: 825, sum loss: 9082.747070, avg loss: 6.492314, ppl: 660.048889 +epoch: 0, batch: 826, sum loss: 11882.798828, avg loss: 6.694534, ppl: 807.977600 +epoch: 0, batch: 827, sum loss: 11542.247070, avg loss: 6.710609, ppl: 821.070496 +epoch: 0, batch: 828, sum loss: 13307.962891, avg loss: 6.727989, ppl: 835.465210 +epoch: 0, batch: 829, sum loss: 13666.264648, avg loss: 6.682770, ppl: 798.527771 +epoch: 0, batch: 830, sum loss: 10688.387695, avg loss: 6.743462, ppl: 848.493225 +epoch: 0, batch: 831, sum loss: 11604.542969, avg loss: 6.650168, ppl: 772.914124 +epoch: 0, batch: 832, sum loss: 11258.224609, avg loss: 6.603065, ppl: 737.351379 +epoch: 0, batch: 833, sum loss: 9520.607422, avg loss: 6.723593, ppl: 831.800598 +epoch: 0, batch: 834, sum loss: 9750.702148, avg loss: 6.496138, ppl: 662.577576 +epoch: 0, batch: 835, sum loss: 10393.380859, avg loss: 6.603164, ppl: 737.424866 +epoch: 0, batch: 836, sum loss: 10731.246094, avg loss: 6.648851, ppl: 771.897217 +epoch: 0, batch: 837, sum loss: 11912.383789, avg loss: 6.570538, ppl: 713.753418 +epoch: 0, batch: 838, sum loss: 12356.476562, avg loss: 6.629011, ppl: 756.733521 +epoch: 0, batch: 839, sum loss: 11374.068359, avg loss: 6.655394, ppl: 776.964050 +epoch: 0, batch: 840, sum loss: 11002.265625, avg loss: 6.664001, ppl: 783.680542 +epoch: 0, batch: 841, sum loss: 8280.604492, avg loss: 6.582356, ppl: 722.239258 +epoch: 0, batch: 842, sum loss: 9662.954102, avg loss: 6.600379, ppl: 735.373474 +epoch: 0, batch: 843, sum loss: 11103.277344, avg loss: 6.700831, ppl: 813.081177 +epoch: 0, batch: 844, sum loss: 9911.431641, avg loss: 6.651968, ppl: 774.306641 +epoch: 0, batch: 845, sum loss: 9156.507812, avg loss: 6.644781, ppl: 768.761414 +epoch: 0, batch: 846, sum loss: 9918.424805, avg loss: 6.634398, ppl: 760.820557 +epoch: 0, batch: 847, sum loss: 13133.041992, avg loss: 6.619477, ppl: 749.552795 +epoch: 0, batch: 848, sum loss: 11451.277344, avg loss: 6.736046, ppl: 842.223877 +epoch: 0, batch: 849, sum loss: 10308.204102, avg loss: 6.586712, ppl: 725.391785 +epoch: 0, batch: 850, sum loss: 11873.804688, avg loss: 6.578285, ppl: 719.304810 +epoch: 0, batch: 851, sum loss: 10464.209961, avg loss: 6.515697, ppl: 675.664734 +epoch: 0, batch: 852, sum loss: 10960.896484, avg loss: 6.638944, ppl: 764.287598 +epoch: 0, batch: 853, sum loss: 9688.085938, avg loss: 6.586054, ppl: 724.914917 +epoch: 0, batch: 854, sum loss: 12955.928711, avg loss: 6.737352, ppl: 843.324951 +epoch: 0, batch: 855, sum loss: 10444.476562, avg loss: 6.686605, ppl: 801.596191 +epoch: 0, batch: 856, sum loss: 12390.091797, avg loss: 6.521101, ppl: 679.326233 +epoch: 0, batch: 857, sum loss: 10930.894531, avg loss: 6.714309, ppl: 824.113892 +epoch: 0, batch: 858, sum loss: 10799.783203, avg loss: 6.466936, ppl: 643.509033 +epoch: 0, batch: 859, sum loss: 10867.911133, avg loss: 6.570684, ppl: 713.857910 +epoch: 0, batch: 860, sum loss: 10444.700195, avg loss: 6.577268, ppl: 718.573242 +epoch: 0, batch: 861, sum loss: 11077.790039, avg loss: 6.661329, ppl: 781.588806 +epoch: 0, batch: 862, sum loss: 11371.369141, avg loss: 6.603582, ppl: 737.733276 +epoch: 0, batch: 863, sum loss: 11825.078125, avg loss: 6.680835, ppl: 796.984497 +epoch: 0, batch: 864, sum loss: 10408.197266, avg loss: 6.754184, ppl: 857.639404 +epoch: 0, batch: 865, sum loss: 10756.359375, avg loss: 6.623374, ppl: 752.479675 +epoch: 0, batch: 866, sum loss: 10688.413086, avg loss: 6.497515, ppl: 663.490967 +epoch: 0, batch: 867, sum loss: 13455.348633, avg loss: 6.608717, ppl: 741.531372 +epoch: 0, batch: 868, sum loss: 10625.676758, avg loss: 6.645201, ppl: 769.084778 +epoch: 0, batch: 869, sum loss: 11671.556641, avg loss: 6.571823, ppl: 714.671265 +epoch: 0, batch: 870, sum loss: 10453.766602, avg loss: 6.591278, ppl: 728.711304 +epoch: 0, batch: 871, sum loss: 11173.827148, avg loss: 6.538226, ppl: 691.059326 +epoch: 0, batch: 872, sum loss: 10668.461914, avg loss: 6.569250, ppl: 712.834778 +epoch: 0, batch: 873, sum loss: 8575.892578, avg loss: 6.607005, ppl: 740.262329 +epoch: 0, batch: 874, sum loss: 9319.240234, avg loss: 6.590693, ppl: 728.285767 +epoch: 0, batch: 875, sum loss: 9857.236328, avg loss: 6.455296, ppl: 636.061707 +epoch: 0, batch: 876, sum loss: 11020.768555, avg loss: 6.675209, ppl: 792.512756 +epoch: 0, batch: 877, sum loss: 10397.812500, avg loss: 6.531289, ppl: 686.282349 +epoch: 0, batch: 878, sum loss: 11217.302734, avg loss: 6.540701, ppl: 692.771973 +epoch: 0, batch: 879, sum loss: 12116.926758, avg loss: 6.563882, ppl: 709.019043 +epoch: 0, batch: 880, sum loss: 10790.701172, avg loss: 6.587729, ppl: 726.130310 +epoch: 0, batch: 881, sum loss: 8690.958008, avg loss: 6.564167, ppl: 709.221252 +epoch: 0, batch: 882, sum loss: 10541.849609, avg loss: 6.568130, ppl: 712.037415 +epoch: 0, batch: 883, sum loss: 10770.318359, avg loss: 6.399476, ppl: 601.529785 +epoch: 0, batch: 884, sum loss: 10756.726562, avg loss: 6.571000, ppl: 714.083618 +epoch: 0, batch: 885, sum loss: 10836.589844, avg loss: 6.532001, ppl: 686.771057 +epoch: 0, batch: 886, sum loss: 10241.636719, avg loss: 6.624603, ppl: 753.405273 +epoch: 0, batch: 887, sum loss: 9190.927734, avg loss: 6.472485, ppl: 647.089478 +epoch: 0, batch: 888, sum loss: 11609.784180, avg loss: 6.611495, ppl: 743.593506 +epoch: 0, batch: 889, sum loss: 12598.906250, avg loss: 6.624031, ppl: 752.974304 +epoch: 0, batch: 890, sum loss: 10102.535156, avg loss: 6.607283, ppl: 740.468506 +epoch: 0, batch: 891, sum loss: 10328.756836, avg loss: 6.583019, ppl: 722.717773 +epoch: 0, batch: 892, sum loss: 11899.304688, avg loss: 6.643944, ppl: 768.118713 +epoch: 0, batch: 893, sum loss: 10960.663086, avg loss: 6.504845, ppl: 668.371826 +epoch: 0, batch: 894, sum loss: 10836.579102, avg loss: 6.477334, ppl: 650.235107 +epoch: 0, batch: 895, sum loss: 9612.994141, avg loss: 6.499658, ppl: 664.913940 +epoch: 0, batch: 896, sum loss: 11696.538086, avg loss: 6.534379, ppl: 688.405823 +epoch: 0, batch: 897, sum loss: 10805.913086, avg loss: 6.486142, ppl: 655.987793 +epoch: 0, batch: 898, sum loss: 11735.563477, avg loss: 6.596719, ppl: 732.687500 +epoch: 0, batch: 899, sum loss: 11450.024414, avg loss: 6.653123, ppl: 775.201416 +epoch: 0, batch: 900, sum loss: 10474.287109, avg loss: 6.591748, ppl: 729.053955 +epoch: 0, batch: 901, sum loss: 9210.041992, avg loss: 6.485945, ppl: 655.858582 +epoch: 0, batch: 902, sum loss: 12605.948242, avg loss: 6.572444, ppl: 715.115784 +epoch: 0, batch: 903, sum loss: 10930.319336, avg loss: 6.475308, ppl: 648.919006 +epoch: 0, batch: 904, sum loss: 11249.080078, avg loss: 6.417045, ppl: 612.191467 +epoch: 0, batch: 905, sum loss: 10916.856445, avg loss: 6.540957, ppl: 692.949402 +epoch: 0, batch: 906, sum loss: 10684.016602, avg loss: 6.570736, ppl: 713.895020 +epoch: 0, batch: 907, sum loss: 11278.809570, avg loss: 6.553637, ppl: 701.791992 +epoch: 0, batch: 908, sum loss: 10889.415039, avg loss: 6.477939, ppl: 650.628418 +epoch: 0, batch: 909, sum loss: 10455.590820, avg loss: 6.518448, ppl: 677.526306 +epoch: 0, batch: 910, sum loss: 11167.131836, avg loss: 6.526670, ppl: 683.119629 +epoch: 0, batch: 911, sum loss: 11121.070312, avg loss: 6.495952, ppl: 662.454651 +epoch: 0, batch: 912, sum loss: 10830.339844, avg loss: 6.536113, ppl: 689.601074 +epoch: 0, batch: 913, sum loss: 9806.428711, avg loss: 6.455845, ppl: 636.411194 +epoch: 0, batch: 914, sum loss: 10547.398438, avg loss: 6.575685, ppl: 717.436890 +epoch: 0, batch: 915, sum loss: 12011.472656, avg loss: 6.545762, ppl: 696.287109 +epoch: 0, batch: 916, sum loss: 11874.880859, avg loss: 6.439740, ppl: 626.244080 +epoch: 0, batch: 917, sum loss: 12589.257812, avg loss: 6.419816, ppl: 613.889893 +epoch: 0, batch: 918, sum loss: 9817.554688, avg loss: 6.441965, ppl: 627.638977 +epoch: 0, batch: 919, sum loss: 11687.456055, avg loss: 6.471460, ppl: 646.426758 +epoch: 0, batch: 920, sum loss: 9635.324219, avg loss: 6.351565, ppl: 573.389282 +epoch: 0, batch: 921, sum loss: 10772.980469, avg loss: 6.521175, ppl: 679.376099 +epoch: 0, batch: 922, sum loss: 9629.890625, avg loss: 6.373190, ppl: 585.924194 +epoch: 0, batch: 923, sum loss: 11253.810547, avg loss: 6.460281, ppl: 639.240906 +epoch: 0, batch: 924, sum loss: 8663.053711, avg loss: 6.523384, ppl: 680.878662 +epoch: 0, batch: 925, sum loss: 12027.138672, avg loss: 6.473164, ppl: 647.528992 +epoch: 0, batch: 926, sum loss: 10419.605469, avg loss: 6.467788, ppl: 644.057312 +epoch: 0, batch: 927, sum loss: 9952.443359, avg loss: 6.560608, ppl: 706.701538 +epoch: 0, batch: 928, sum loss: 10362.522461, avg loss: 6.600333, ppl: 735.339844 +epoch: 0, batch: 929, sum loss: 11345.796875, avg loss: 6.439158, ppl: 625.879883 +epoch: 0, batch: 930, sum loss: 11284.730469, avg loss: 6.549466, ppl: 698.870972 +epoch: 0, batch: 931, sum loss: 11994.805664, avg loss: 6.483679, ppl: 654.373840 +epoch: 0, batch: 932, sum loss: 11492.612305, avg loss: 6.406138, ppl: 605.550781 +epoch: 0, batch: 933, sum loss: 11314.146484, avg loss: 6.570352, ppl: 713.621033 +epoch: 0, batch: 934, sum loss: 9714.316406, avg loss: 6.458987, ppl: 638.413879 +epoch: 0, batch: 935, sum loss: 11781.074219, avg loss: 6.526911, ppl: 683.284119 +epoch: 0, batch: 936, sum loss: 11002.583984, avg loss: 6.545261, ppl: 695.938232 +epoch: 0, batch: 937, sum loss: 11149.554688, avg loss: 6.531667, ppl: 686.541565 +epoch: 0, batch: 938, sum loss: 11134.809570, avg loss: 6.503977, ppl: 667.792358 +epoch: 0, batch: 939, sum loss: 9652.541016, avg loss: 6.388181, ppl: 594.773560 +epoch: 0, batch: 940, sum loss: 10202.597656, avg loss: 6.502612, ppl: 666.880981 +epoch: 0, batch: 941, sum loss: 10006.937500, avg loss: 6.456089, ppl: 636.566589 +epoch: 0, batch: 942, sum loss: 10038.455078, avg loss: 6.414348, ppl: 610.542664 +epoch: 0, batch: 943, sum loss: 11396.342773, avg loss: 6.359567, ppl: 577.995850 +epoch: 0, batch: 944, sum loss: 10649.261719, avg loss: 6.509329, ppl: 671.375671 +epoch: 0, batch: 945, sum loss: 10882.353516, avg loss: 6.539876, ppl: 692.200745 +epoch: 0, batch: 946, sum loss: 10102.678711, avg loss: 6.337941, ppl: 565.630554 +epoch: 0, batch: 947, sum loss: 10408.686523, avg loss: 6.377872, ppl: 588.673645 +epoch: 0, batch: 948, sum loss: 11932.263672, avg loss: 6.499054, ppl: 664.512695 +epoch: 0, batch: 949, sum loss: 11407.916016, avg loss: 6.437876, ppl: 625.077881 +epoch: 0, batch: 950, sum loss: 11760.569336, avg loss: 6.573823, ppl: 716.102234 +epoch: 0, batch: 951, sum loss: 9662.687500, avg loss: 6.463336, ppl: 641.196533 +epoch: 0, batch: 952, sum loss: 10964.038086, avg loss: 6.533992, ppl: 688.139648 +epoch: 0, batch: 953, sum loss: 11890.370117, avg loss: 6.476236, ppl: 649.521790 +epoch: 0, batch: 954, sum loss: 11335.544922, avg loss: 6.540995, ppl: 692.975830 +epoch: 0, batch: 955, sum loss: 10501.057617, avg loss: 6.502203, ppl: 666.608521 +epoch: 0, batch: 956, sum loss: 9352.123047, avg loss: 6.481028, ppl: 652.641235 +epoch: 0, batch: 957, sum loss: 10679.373047, avg loss: 6.515786, ppl: 675.724670 +epoch: 0, batch: 958, sum loss: 11246.164062, avg loss: 6.325177, ppl: 558.456482 +epoch: 0, batch: 959, sum loss: 10486.201172, avg loss: 6.362986, ppl: 579.975647 +epoch: 0, batch: 960, sum loss: 11536.331055, avg loss: 6.536165, ppl: 689.636597 +epoch: 0, batch: 961, sum loss: 10107.460938, avg loss: 6.437873, ppl: 625.076050 +epoch: 0, batch: 962, sum loss: 11424.808594, avg loss: 6.425652, ppl: 617.483276 +epoch: 0, batch: 963, sum loss: 12254.240234, avg loss: 6.459800, ppl: 638.933105 +epoch: 0, batch: 964, sum loss: 10198.146484, avg loss: 6.562514, ppl: 708.049377 +epoch: 0, batch: 965, sum loss: 11302.506836, avg loss: 6.429184, ppl: 619.668335 +epoch: 0, batch: 966, sum loss: 10390.694336, avg loss: 6.374659, ppl: 586.785339 +epoch: 0, batch: 967, sum loss: 9342.571289, avg loss: 6.412197, ppl: 609.230774 +epoch: 0, batch: 968, sum loss: 10035.055664, avg loss: 6.440986, ppl: 627.024536 +epoch: 0, batch: 969, sum loss: 10376.859375, avg loss: 6.346703, ppl: 570.608337 +epoch: 0, batch: 970, sum loss: 11442.498047, avg loss: 6.385323, ppl: 593.076294 +epoch: 0, batch: 971, sum loss: 11035.623047, avg loss: 6.518384, ppl: 677.482361 +epoch: 0, batch: 972, sum loss: 9783.824219, avg loss: 6.398839, ppl: 601.146729 +epoch: 0, batch: 973, sum loss: 13199.189453, avg loss: 6.357991, ppl: 577.085693 +epoch: 0, batch: 974, sum loss: 11263.625000, avg loss: 6.396152, ppl: 599.533875 +epoch: 0, batch: 975, sum loss: 9668.403320, avg loss: 6.398678, ppl: 601.050110 +epoch: 0, batch: 976, sum loss: 11536.527344, avg loss: 6.473921, ppl: 648.019836 +epoch: 0, batch: 977, sum loss: 10652.776367, avg loss: 6.616631, ppl: 747.422791 +epoch: 0, batch: 978, sum loss: 11164.467773, avg loss: 6.397975, ppl: 600.627808 +epoch: 0, batch: 979, sum loss: 9805.811523, avg loss: 6.388151, ppl: 594.755676 +epoch: 0, batch: 980, sum loss: 11202.440430, avg loss: 6.585797, ppl: 724.728333 +epoch: 0, batch: 981, sum loss: 10239.569336, avg loss: 6.419793, ppl: 613.875793 +epoch: 0, batch: 982, sum loss: 10793.723633, avg loss: 6.341788, ppl: 567.810791 +epoch: 0, batch: 983, sum loss: 10803.463867, avg loss: 6.457540, ppl: 637.490906 +epoch: 0, batch: 984, sum loss: 9414.341797, avg loss: 6.421788, ppl: 615.102051 +epoch: 0, batch: 985, sum loss: 12095.325195, avg loss: 6.369313, ppl: 583.656555 +epoch: 0, batch: 986, sum loss: 10509.890625, avg loss: 6.483584, ppl: 654.312073 +epoch: 0, batch: 987, sum loss: 12540.000977, avg loss: 6.420891, ppl: 614.550598 +epoch: 0, batch: 988, sum loss: 11430.774414, avg loss: 6.382342, ppl: 591.310852 +epoch: 0, batch: 989, sum loss: 10186.671875, avg loss: 6.327125, ppl: 559.545654 +epoch: 0, batch: 990, sum loss: 10551.863281, avg loss: 6.422315, ppl: 615.425964 +epoch: 0, batch: 991, sum loss: 11212.142578, avg loss: 6.374157, ppl: 586.491089 +epoch: 0, batch: 992, sum loss: 11730.663086, avg loss: 6.427761, ppl: 618.786682 +epoch: 0, batch: 993, sum loss: 10972.625977, avg loss: 6.346227, ppl: 570.336609 +epoch: 0, batch: 994, sum loss: 11282.996094, avg loss: 6.356617, ppl: 576.293457 +epoch: 0, batch: 995, sum loss: 12305.054688, avg loss: 6.455957, ppl: 636.482483 +epoch: 0, batch: 996, sum loss: 11301.564453, avg loss: 6.370667, ppl: 584.447815 +epoch: 0, batch: 997, sum loss: 10733.587891, avg loss: 6.373865, ppl: 586.319397 +epoch: 0, batch: 998, sum loss: 9949.264648, avg loss: 6.349244, ppl: 572.060120 +epoch: 0, batch: 999, sum loss: 9565.287109, avg loss: 6.428284, ppl: 619.110718 +epoch: 0, batch: 1000, sum loss: 9779.925781, avg loss: 6.404666, ppl: 604.660095 +epoch: 0, batch: 1001, sum loss: 9577.694336, avg loss: 6.385129, ppl: 592.961487 +epoch: 0, batch: 1002, sum loss: 10196.596680, avg loss: 6.425077, ppl: 617.128296 +epoch: 0, batch: 1003, sum loss: 10354.170898, avg loss: 6.317370, ppl: 554.114014 +epoch: 0, batch: 1004, sum loss: 9988.012695, avg loss: 6.289681, ppl: 538.981323 +epoch: 0, batch: 1005, sum loss: 12085.056641, avg loss: 6.469516, ppl: 645.171570 +epoch: 0, batch: 1006, sum loss: 10500.006836, avg loss: 6.422022, ppl: 615.246094 +epoch: 0, batch: 1007, sum loss: 10186.408203, avg loss: 6.350629, ppl: 572.852844 +epoch: 0, batch: 1008, sum loss: 10492.964844, avg loss: 6.386467, ppl: 593.754822 +epoch: 0, batch: 1009, sum loss: 9967.775391, avg loss: 6.410145, ppl: 607.981995 +epoch: 0, batch: 1010, sum loss: 10585.064453, avg loss: 6.458245, ppl: 637.940674 +epoch: 0, batch: 1011, sum loss: 11781.529297, avg loss: 6.347807, ppl: 571.238586 +epoch: 0, batch: 1012, sum loss: 10885.656250, avg loss: 6.270539, ppl: 528.762451 +epoch: 0, batch: 1013, sum loss: 10837.530273, avg loss: 6.348876, ppl: 571.849304 +epoch: 0, batch: 1014, sum loss: 10528.836914, avg loss: 6.354156, ppl: 574.876953 +epoch: 0, batch: 1015, sum loss: 11540.152344, avg loss: 6.479592, ppl: 651.704895 +epoch: 0, batch: 1016, sum loss: 10762.454102, avg loss: 6.330856, ppl: 561.637085 +epoch: 0, batch: 1017, sum loss: 10662.552734, avg loss: 6.312938, ppl: 551.663452 +epoch: 0, batch: 1018, sum loss: 11720.148438, avg loss: 6.345506, ppl: 569.925537 +epoch: 0, batch: 1019, sum loss: 10518.928711, avg loss: 6.394485, ppl: 598.535278 +epoch: 0, batch: 1020, sum loss: 11619.393555, avg loss: 6.398344, ppl: 600.849243 +epoch: 0, batch: 1021, sum loss: 9377.861328, avg loss: 6.214620, ppl: 500.006012 +epoch: 0, batch: 1022, sum loss: 9864.766602, avg loss: 6.389098, ppl: 595.319458 +epoch: 0, batch: 1023, sum loss: 12243.605469, avg loss: 6.481528, ppl: 652.967773 +epoch: 0, batch: 1024, sum loss: 11179.534180, avg loss: 6.447251, ppl: 630.965576 +epoch: 0, batch: 1025, sum loss: 10485.894531, avg loss: 6.362800, ppl: 579.867798 +epoch: 0, batch: 1026, sum loss: 10078.548828, avg loss: 6.283385, ppl: 535.598511 +epoch: 0, batch: 1027, sum loss: 13050.692383, avg loss: 6.457542, ppl: 637.492432 +epoch: 0, batch: 1028, sum loss: 11187.939453, avg loss: 6.295970, ppl: 542.381958 +epoch: 0, batch: 1029, sum loss: 11211.530273, avg loss: 6.476909, ppl: 649.958923 +epoch: 0, batch: 1030, sum loss: 9809.453125, avg loss: 6.328680, ppl: 560.416138 +epoch: 0, batch: 1031, sum loss: 10113.270508, avg loss: 6.336636, ppl: 564.892578 +epoch: 0, batch: 1032, sum loss: 10902.002930, avg loss: 6.412943, ppl: 609.685547 +epoch: 0, batch: 1033, sum loss: 10227.947266, avg loss: 6.305763, ppl: 547.719238 +epoch: 0, batch: 1034, sum loss: 10895.001953, avg loss: 6.423940, ppl: 616.426880 +epoch: 0, batch: 1035, sum loss: 9816.849609, avg loss: 6.313087, ppl: 551.745544 +epoch: 0, batch: 1036, sum loss: 11649.615234, avg loss: 6.379855, ppl: 589.842285 +epoch: 0, batch: 1037, sum loss: 10066.679688, avg loss: 6.172091, ppl: 479.186829 +epoch: 0, batch: 1038, sum loss: 9785.118164, avg loss: 6.395502, ppl: 599.144043 +epoch: 0, batch: 1039, sum loss: 10293.724609, avg loss: 6.292008, ppl: 540.237000 +epoch: 0, batch: 1040, sum loss: 9414.774414, avg loss: 6.243219, ppl: 514.512024 +epoch: 0, batch: 1041, sum loss: 9851.162109, avg loss: 6.322954, ppl: 557.216675 +epoch: 0, batch: 1042, sum loss: 9110.336914, avg loss: 6.309097, ppl: 549.548645 +epoch: 0, batch: 1043, sum loss: 11289.843750, avg loss: 6.378443, ppl: 589.009766 +epoch: 0, batch: 1044, sum loss: 9078.952148, avg loss: 6.322390, ppl: 556.902466 +epoch: 0, batch: 1045, sum loss: 11742.228516, avg loss: 6.323225, ppl: 557.367371 +epoch: 0, batch: 1046, sum loss: 8993.927734, avg loss: 6.293861, ppl: 541.239258 +epoch: 0, batch: 1047, sum loss: 10566.610352, avg loss: 6.334898, ppl: 563.912170 +epoch: 0, batch: 1048, sum loss: 10354.095703, avg loss: 6.532552, ppl: 687.149719 +epoch: 0, batch: 1049, sum loss: 10092.997070, avg loss: 6.268942, ppl: 527.918762 +epoch: 0, batch: 1050, sum loss: 11908.546875, avg loss: 6.419702, ppl: 613.820190 +epoch: 0, batch: 1051, sum loss: 11052.243164, avg loss: 6.293988, ppl: 541.307861 +epoch: 0, batch: 1052, sum loss: 9823.589844, avg loss: 6.325557, ppl: 558.669006 +epoch: 0, batch: 1053, sum loss: 10619.073242, avg loss: 6.389334, ppl: 595.460022 +epoch: 0, batch: 1054, sum loss: 11928.080078, avg loss: 6.304482, ppl: 547.018127 +epoch: 0, batch: 1055, sum loss: 10502.751953, avg loss: 6.349911, ppl: 572.441589 +epoch: 0, batch: 1056, sum loss: 9773.220703, avg loss: 6.309374, ppl: 549.700928 +epoch: 0, batch: 1057, sum loss: 11241.935547, avg loss: 6.468318, ppl: 644.398621 +epoch: 0, batch: 1058, sum loss: 11538.835938, avg loss: 6.371528, ppl: 584.950745 +epoch: 0, batch: 1059, sum loss: 10354.145508, avg loss: 6.263851, ppl: 525.237549 +epoch: 0, batch: 1060, sum loss: 11173.963867, avg loss: 6.359684, ppl: 578.063354 +epoch: 0, batch: 1061, sum loss: 9404.314453, avg loss: 6.423712, ppl: 616.286377 +epoch: 0, batch: 1062, sum loss: 9753.221680, avg loss: 6.180749, ppl: 483.353821 +epoch: 0, batch: 1063, sum loss: 10315.726562, avg loss: 6.270958, ppl: 528.983887 +epoch: 0, batch: 1064, sum loss: 12795.595703, avg loss: 6.442898, ppl: 628.224915 +epoch: 0, batch: 1065, sum loss: 11692.615234, avg loss: 6.246056, ppl: 515.973572 +epoch: 0, batch: 1066, sum loss: 9880.128906, avg loss: 6.341547, ppl: 567.673584 +epoch: 0, batch: 1067, sum loss: 11295.076172, avg loss: 6.303056, ppl: 546.238770 +epoch: 0, batch: 1068, sum loss: 10985.749023, avg loss: 6.295558, ppl: 542.158264 +epoch: 0, batch: 1069, sum loss: 13079.653320, avg loss: 6.309529, ppl: 549.785828 +epoch: 0, batch: 1070, sum loss: 11685.260742, avg loss: 6.374938, ppl: 586.949036 +epoch: 0, batch: 1071, sum loss: 11706.451172, avg loss: 6.287031, ppl: 537.554810 +epoch: 0, batch: 1072, sum loss: 11995.493164, avg loss: 6.270514, ppl: 528.748840 +epoch: 0, batch: 1073, sum loss: 9553.005859, avg loss: 6.256062, ppl: 521.162354 +epoch: 0, batch: 1074, sum loss: 11294.498047, avg loss: 6.257339, ppl: 521.828735 +epoch: 0, batch: 1075, sum loss: 10222.909180, avg loss: 6.409348, ppl: 607.497192 +epoch: 0, batch: 1076, sum loss: 10085.376953, avg loss: 6.145873, ppl: 466.786774 +epoch: 0, batch: 1077, sum loss: 10675.502930, avg loss: 6.213913, ppl: 499.652802 +epoch: 0, batch: 1078, sum loss: 11377.646484, avg loss: 6.388348, ppl: 594.872803 +epoch: 0, batch: 1079, sum loss: 8882.373047, avg loss: 6.381015, ppl: 590.526978 +epoch: 0, batch: 1080, sum loss: 8605.339844, avg loss: 6.177559, ppl: 481.814362 +epoch: 0, batch: 1081, sum loss: 8295.646484, avg loss: 6.172356, ppl: 479.314117 +epoch: 0, batch: 1082, sum loss: 9204.692383, avg loss: 6.215188, ppl: 500.289795 +epoch: 0, batch: 1083, sum loss: 11365.400391, avg loss: 6.282698, ppl: 535.230835 +epoch: 0, batch: 1084, sum loss: 11663.270508, avg loss: 6.247065, ppl: 516.494751 +epoch: 0, batch: 1085, sum loss: 10013.275391, avg loss: 6.211709, ppl: 498.552551 +epoch: 0, batch: 1086, sum loss: 12221.363281, avg loss: 6.325758, ppl: 558.780945 +epoch: 0, batch: 1087, sum loss: 9715.539062, avg loss: 6.337599, ppl: 565.436951 +epoch: 0, batch: 1088, sum loss: 10205.455078, avg loss: 6.196390, ppl: 490.973511 +epoch: 0, batch: 1089, sum loss: 10242.010742, avg loss: 6.295028, ppl: 541.871155 +epoch: 0, batch: 1090, sum loss: 10191.792969, avg loss: 6.291231, ppl: 539.817261 +epoch: 0, batch: 1091, sum loss: 10636.685547, avg loss: 6.238525, ppl: 512.102783 +epoch: 0, batch: 1092, sum loss: 11810.405273, avg loss: 6.235695, ppl: 510.655334 +epoch: 0, batch: 1093, sum loss: 8497.024414, avg loss: 6.117368, ppl: 453.669159 +epoch: 0, batch: 1094, sum loss: 10048.333984, avg loss: 6.315735, ppl: 553.208435 +epoch: 0, batch: 1095, sum loss: 10642.226562, avg loss: 6.260134, ppl: 523.288940 +epoch: 0, batch: 1096, sum loss: 10499.437500, avg loss: 6.234821, ppl: 510.209198 +epoch: 0, batch: 1097, sum loss: 10283.804688, avg loss: 6.293638, ppl: 541.118469 +epoch: 0, batch: 1098, sum loss: 9964.738281, avg loss: 6.286901, ppl: 537.485107 +epoch: 0, batch: 1099, sum loss: 9535.163086, avg loss: 6.277263, ppl: 532.329773 +epoch: 0, batch: 1100, sum loss: 10171.434570, avg loss: 6.274790, ppl: 531.014771 +epoch: 0, batch: 1101, sum loss: 11599.734375, avg loss: 6.440719, ppl: 626.857117 +epoch: 0, batch: 1102, sum loss: 10756.076172, avg loss: 6.253532, ppl: 519.845886 +epoch: 0, batch: 1103, sum loss: 11409.431641, avg loss: 6.221064, ppl: 503.238434 +epoch: 0, batch: 1104, sum loss: 11358.135742, avg loss: 6.310076, ppl: 550.086609 +epoch: 0, batch: 1105, sum loss: 10035.189453, avg loss: 6.186923, ppl: 486.347321 +epoch: 0, batch: 1106, sum loss: 10306.998047, avg loss: 6.186673, ppl: 486.225830 +epoch: 0, batch: 1107, sum loss: 10698.162109, avg loss: 6.383152, ppl: 591.790100 +epoch: 0, batch: 1108, sum loss: 9895.181641, avg loss: 6.278668, ppl: 533.078369 +epoch: 0, batch: 1109, sum loss: 10122.204102, avg loss: 6.116136, ppl: 453.110321 +epoch: 0, batch: 1110, sum loss: 12613.096680, avg loss: 6.319187, ppl: 555.121338 +epoch: 0, batch: 1111, sum loss: 9914.025391, avg loss: 6.192396, ppl: 489.016235 +epoch: 0, batch: 1112, sum loss: 9679.882812, avg loss: 6.142057, ppl: 465.009308 +epoch: 0, batch: 1113, sum loss: 10688.277344, avg loss: 6.181768, ppl: 483.846832 +epoch: 0, batch: 1114, sum loss: 9070.733398, avg loss: 6.247062, ppl: 516.492981 +epoch: 0, batch: 1115, sum loss: 9982.091797, avg loss: 6.165591, ppl: 476.082550 +epoch: 0, batch: 1116, sum loss: 9822.320312, avg loss: 6.264235, ppl: 525.439209 +epoch: 0, batch: 1117, sum loss: 10267.403320, avg loss: 6.357525, ppl: 576.817139 +epoch: 0, batch: 1118, sum loss: 10021.836914, avg loss: 6.334916, ppl: 563.921814 +epoch: 0, batch: 1119, sum loss: 8955.975586, avg loss: 6.241098, ppl: 513.421875 +epoch: 0, batch: 1120, sum loss: 9780.782227, avg loss: 6.112989, ppl: 451.686768 +epoch: 0, batch: 1121, sum loss: 11459.999023, avg loss: 6.245231, ppl: 515.548401 +epoch: 0, batch: 1122, sum loss: 9643.702148, avg loss: 6.193771, ppl: 489.689423 +epoch: 0, batch: 1123, sum loss: 10095.511719, avg loss: 6.278303, ppl: 532.883667 +epoch: 0, batch: 1124, sum loss: 9784.347656, avg loss: 6.208342, ppl: 496.876770 +epoch: 0, batch: 1125, sum loss: 9516.470703, avg loss: 6.187562, ppl: 486.658173 +epoch: 0, batch: 1126, sum loss: 9879.748047, avg loss: 6.225424, ppl: 505.437439 +epoch: 0, batch: 1127, sum loss: 10564.395508, avg loss: 6.199763, ppl: 492.632172 +epoch: 0, batch: 1128, sum loss: 11377.463867, avg loss: 6.313798, ppl: 552.137939 +epoch: 0, batch: 1129, sum loss: 11991.638672, avg loss: 6.443654, ppl: 628.699585 +epoch: 0, batch: 1130, sum loss: 11945.076172, avg loss: 6.253967, ppl: 520.071777 +epoch: 0, batch: 1131, sum loss: 10743.749023, avg loss: 6.246366, ppl: 516.133545 +epoch: 0, batch: 1132, sum loss: 9040.940430, avg loss: 6.230834, ppl: 508.179138 +epoch: 0, batch: 1133, sum loss: 10452.737305, avg loss: 6.236717, ppl: 511.177673 +epoch: 0, batch: 1134, sum loss: 9441.940430, avg loss: 6.379690, ppl: 589.744690 +epoch: 0, batch: 1135, sum loss: 9120.431641, avg loss: 6.217063, ppl: 501.228943 +epoch: 0, batch: 1136, sum loss: 9550.614258, avg loss: 6.185631, ppl: 485.719482 +epoch: 0, batch: 1137, sum loss: 11499.719727, avg loss: 6.311592, ppl: 550.921082 +epoch: 0, batch: 1138, sum loss: 10989.343750, avg loss: 6.191179, ppl: 488.421753 +epoch: 0, batch: 1139, sum loss: 11155.464844, avg loss: 6.149650, ppl: 468.553406 +epoch: 0, batch: 1140, sum loss: 11655.074219, avg loss: 6.317113, ppl: 553.971558 +epoch: 0, batch: 1141, sum loss: 11537.515625, avg loss: 6.236495, ppl: 511.064087 +epoch: 0, batch: 1142, sum loss: 9653.620117, avg loss: 6.272658, ppl: 529.883850 +epoch: 0, batch: 1143, sum loss: 11232.013672, avg loss: 6.271364, ppl: 529.198853 +epoch: 0, batch: 1144, sum loss: 11009.683594, avg loss: 6.160987, ppl: 473.895538 +epoch: 0, batch: 1145, sum loss: 11371.149414, avg loss: 6.234183, ppl: 509.883789 +epoch: 0, batch: 1146, sum loss: 10378.684570, avg loss: 6.240941, ppl: 513.341370 +epoch: 0, batch: 1147, sum loss: 11019.972656, avg loss: 6.384688, ppl: 592.700012 +epoch: 0, batch: 1148, sum loss: 9857.638672, avg loss: 6.164877, ppl: 475.742584 +epoch: 0, batch: 1149, sum loss: 11131.919922, avg loss: 6.296335, ppl: 542.579834 +epoch: 0, batch: 1150, sum loss: 10858.786133, avg loss: 6.208569, ppl: 496.989349 +epoch: 0, batch: 1151, sum loss: 9514.493164, avg loss: 6.271914, ppl: 529.489624 +epoch: 0, batch: 1152, sum loss: 9342.406250, avg loss: 6.150366, ppl: 468.888885 +epoch: 0, batch: 1153, sum loss: 10881.528320, avg loss: 6.151232, ppl: 469.295105 +epoch: 0, batch: 1154, sum loss: 12597.474609, avg loss: 6.375240, ppl: 587.126526 +epoch: 0, batch: 1155, sum loss: 10495.873047, avg loss: 6.281193, ppl: 534.425964 +epoch: 0, batch: 1156, sum loss: 10286.328125, avg loss: 6.181688, ppl: 483.807861 +epoch: 0, batch: 1157, sum loss: 10047.933594, avg loss: 6.194780, ppl: 490.183777 +epoch: 0, batch: 1158, sum loss: 9559.183594, avg loss: 6.131612, ppl: 460.177521 +epoch: 0, batch: 1159, sum loss: 10902.953125, avg loss: 6.223147, ppl: 504.287689 +epoch: 0, batch: 1160, sum loss: 11415.004883, avg loss: 6.183643, ppl: 484.754639 +epoch: 0, batch: 1161, sum loss: 10972.429688, avg loss: 6.353463, ppl: 574.478516 +epoch: 0, batch: 1162, sum loss: 9811.466797, avg loss: 6.265304, ppl: 526.001526 +epoch: 0, batch: 1163, sum loss: 7797.839844, avg loss: 6.159431, ppl: 473.158997 +epoch: 0, batch: 1164, sum loss: 11748.737305, avg loss: 6.167316, ppl: 476.904358 +epoch: 0, batch: 1165, sum loss: 11187.488281, avg loss: 6.120070, ppl: 454.896759 +epoch: 0, batch: 1166, sum loss: 9547.002930, avg loss: 6.096426, ppl: 444.266907 +epoch: 0, batch: 1167, sum loss: 10018.088867, avg loss: 6.187825, ppl: 486.786285 +epoch: 0, batch: 1168, sum loss: 10685.981445, avg loss: 6.216394, ppl: 500.893951 +epoch: 0, batch: 1169, sum loss: 8962.755859, avg loss: 6.023357, ppl: 412.962738 +epoch: 0, batch: 1170, sum loss: 9662.783203, avg loss: 6.202044, ppl: 493.757019 +epoch: 0, batch: 1171, sum loss: 9755.675781, avg loss: 6.155000, ppl: 471.066956 +epoch: 0, batch: 1172, sum loss: 10435.926758, avg loss: 6.237853, ppl: 511.758362 +epoch: 0, batch: 1173, sum loss: 10604.570312, avg loss: 6.080602, ppl: 437.292450 +epoch: 0, batch: 1174, sum loss: 11621.187500, avg loss: 6.103565, ppl: 447.450195 +epoch: 0, batch: 1175, sum loss: 10281.123047, avg loss: 6.167440, ppl: 476.963470 +epoch: 0, batch: 1176, sum loss: 10733.933594, avg loss: 6.193845, ppl: 489.725372 +epoch: 0, batch: 1177, sum loss: 10478.820312, avg loss: 6.237393, ppl: 511.523438 +epoch: 0, batch: 1178, sum loss: 9942.943359, avg loss: 6.316990, ppl: 553.903442 +epoch: 0, batch: 1179, sum loss: 11455.093750, avg loss: 6.328782, ppl: 560.473328 +epoch: 0, batch: 1180, sum loss: 10119.064453, avg loss: 6.077517, ppl: 435.945190 +epoch: 0, batch: 1181, sum loss: 9206.534180, avg loss: 6.284324, ppl: 536.101624 +epoch: 0, batch: 1182, sum loss: 10719.396484, avg loss: 6.287036, ppl: 537.557861 +epoch: 0, batch: 1183, sum loss: 9969.683594, avg loss: 6.101398, ppl: 446.481293 +epoch: 0, batch: 1184, sum loss: 11304.191406, avg loss: 6.113678, ppl: 451.998322 +epoch: 0, batch: 1185, sum loss: 9785.876953, avg loss: 6.213255, ppl: 499.323883 +epoch: 0, batch: 1186, sum loss: 9712.115234, avg loss: 6.335366, ppl: 564.175720 +epoch: 0, batch: 1187, sum loss: 10400.759766, avg loss: 6.139763, ppl: 463.943756 +epoch: 0, batch: 1188, sum loss: 10579.416992, avg loss: 6.256309, ppl: 521.291077 +epoch: 0, batch: 1189, sum loss: 9493.390625, avg loss: 6.221095, ppl: 503.254028 +epoch: 0, batch: 1190, sum loss: 10198.212891, avg loss: 6.326435, ppl: 559.159424 +epoch: 0, batch: 1191, sum loss: 8846.343750, avg loss: 5.989400, ppl: 399.175201 +epoch: 0, batch: 1192, sum loss: 9753.882812, avg loss: 6.280672, ppl: 534.147278 +epoch: 0, batch: 1193, sum loss: 10890.998047, avg loss: 6.146162, ppl: 466.921692 +epoch: 0, batch: 1194, sum loss: 10024.451172, avg loss: 6.237991, ppl: 511.829132 +epoch: 0, batch: 1195, sum loss: 10068.508789, avg loss: 6.211295, ppl: 498.346008 +epoch: 0, batch: 1196, sum loss: 8930.172852, avg loss: 6.171509, ppl: 478.908142 +epoch: 0, batch: 1197, sum loss: 10434.909180, avg loss: 6.174502, ppl: 480.343933 +epoch: 0, batch: 1198, sum loss: 9413.331055, avg loss: 6.238125, ppl: 511.897980 +epoch: 0, batch: 1199, sum loss: 10072.229492, avg loss: 6.160385, ppl: 473.610229 +epoch: 0, batch: 1200, sum loss: 10607.171875, avg loss: 6.306285, ppl: 548.005554 +epoch: 0, batch: 1201, sum loss: 8558.771484, avg loss: 6.044330, ppl: 421.715179 +epoch: 0, batch: 1202, sum loss: 9928.105469, avg loss: 6.208946, ppl: 497.177063 +epoch: 0, batch: 1203, sum loss: 9873.546875, avg loss: 6.198083, ppl: 491.805542 +epoch: 0, batch: 1204, sum loss: 10452.715820, avg loss: 6.145041, ppl: 466.398987 +epoch: 0, batch: 1205, sum loss: 10221.206055, avg loss: 6.138863, ppl: 463.526062 +epoch: 0, batch: 1206, sum loss: 10725.498047, avg loss: 6.111395, ppl: 450.967316 +epoch: 0, batch: 1207, sum loss: 10714.003906, avg loss: 6.178780, ppl: 482.402863 +epoch: 0, batch: 1208, sum loss: 10072.212891, avg loss: 6.137850, ppl: 463.057068 +epoch: 0, batch: 1209, sum loss: 11283.443359, avg loss: 6.122324, ppl: 455.923035 +epoch: 0, batch: 1210, sum loss: 10725.296875, avg loss: 6.149826, ppl: 468.635864 +epoch: 0, batch: 1211, sum loss: 9597.880859, avg loss: 6.021255, ppl: 412.095459 +epoch: 0, batch: 1212, sum loss: 9495.603516, avg loss: 6.106498, ppl: 448.764282 +epoch: 0, batch: 1213, sum loss: 10083.460938, avg loss: 6.353788, ppl: 574.665344 +epoch: 0, batch: 1214, sum loss: 11307.501953, avg loss: 6.115469, ppl: 452.808350 +epoch: 0, batch: 1215, sum loss: 9948.832031, avg loss: 6.126128, ppl: 457.660767 +epoch: 0, batch: 1216, sum loss: 9355.515625, avg loss: 6.175258, ppl: 480.707092 +epoch: 0, batch: 1217, sum loss: 10670.399414, avg loss: 6.200116, ppl: 492.806061 +epoch: 0, batch: 1218, sum loss: 11379.142578, avg loss: 6.160879, ppl: 473.844452 +epoch: 0, batch: 1219, sum loss: 10378.212891, avg loss: 6.159177, ppl: 473.038513 +epoch: 0, batch: 1220, sum loss: 9012.506836, avg loss: 6.069028, ppl: 432.260284 +epoch: 0, batch: 1221, sum loss: 10927.076172, avg loss: 6.176980, ppl: 481.535309 +epoch: 0, batch: 1222, sum loss: 9839.235352, avg loss: 6.107533, ppl: 449.229095 +epoch: 0, batch: 1223, sum loss: 11204.652344, avg loss: 6.183583, ppl: 484.725525 +epoch: 0, batch: 1224, sum loss: 10058.670898, avg loss: 6.170964, ppl: 478.647186 +epoch: 0, batch: 1225, sum loss: 10002.486328, avg loss: 6.080539, ppl: 437.264709 +epoch: 0, batch: 1226, sum loss: 10315.750000, avg loss: 6.096779, ppl: 444.423920 +epoch: 0, batch: 1227, sum loss: 10834.755859, avg loss: 6.163115, ppl: 474.904877 +epoch: 0, batch: 1228, sum loss: 9156.868164, avg loss: 6.032193, ppl: 416.627563 +epoch: 0, batch: 1229, sum loss: 10891.730469, avg loss: 6.125833, ppl: 457.525696 +epoch: 0, batch: 1230, sum loss: 9766.604492, avg loss: 6.032492, ppl: 416.752350 +epoch: 0, batch: 1231, sum loss: 9138.874023, avg loss: 6.008464, ppl: 406.858032 +epoch: 0, batch: 1232, sum loss: 11820.833984, avg loss: 6.287677, ppl: 537.902466 +epoch: 0, batch: 1233, sum loss: 10766.248047, avg loss: 6.183945, ppl: 484.901215 +epoch: 0, batch: 1234, sum loss: 9539.258789, avg loss: 6.118832, ppl: 454.333771 +epoch: 0, batch: 1235, sum loss: 10700.035156, avg loss: 6.224570, ppl: 505.005737 +epoch: 0, batch: 1236, sum loss: 11462.248047, avg loss: 6.172454, ppl: 479.361176 +epoch: 0, batch: 1237, sum loss: 9973.326172, avg loss: 6.288352, ppl: 538.265564 +epoch: 0, batch: 1238, sum loss: 10658.223633, avg loss: 6.118383, ppl: 454.129730 +epoch: 0, batch: 1239, sum loss: 10588.615234, avg loss: 6.239608, ppl: 512.657654 +epoch: 0, batch: 1240, sum loss: 10270.519531, avg loss: 6.172187, ppl: 479.233215 +epoch: 0, batch: 1241, sum loss: 9741.494141, avg loss: 6.080832, ppl: 437.393158 +epoch: 0, batch: 1242, sum loss: 9885.750000, avg loss: 6.144033, ppl: 465.929077 +epoch: 0, batch: 1243, sum loss: 10718.384766, avg loss: 6.224381, ppl: 504.910645 +epoch: 0, batch: 1244, sum loss: 10665.766602, avg loss: 6.119201, ppl: 454.501282 +epoch: 0, batch: 1245, sum loss: 10125.568359, avg loss: 6.281370, ppl: 534.520264 +epoch: 0, batch: 1246, sum loss: 9091.196289, avg loss: 5.988930, ppl: 398.987549 +epoch: 0, batch: 1247, sum loss: 9033.608398, avg loss: 6.038508, ppl: 419.267181 +epoch: 0, batch: 1248, sum loss: 9819.245117, avg loss: 6.061263, ppl: 428.916656 +epoch: 0, batch: 1249, sum loss: 9535.886719, avg loss: 6.120595, ppl: 455.135406 +epoch: 0, batch: 1250, sum loss: 10215.157227, avg loss: 6.240170, ppl: 512.945435 +epoch: 0, batch: 1251, sum loss: 10394.320312, avg loss: 6.194470, ppl: 490.031616 +epoch: 0, batch: 1252, sum loss: 9942.611328, avg loss: 6.245359, ppl: 515.614258 +epoch: 0, batch: 1253, sum loss: 9861.098633, avg loss: 6.140161, ppl: 464.128296 +epoch: 0, batch: 1254, sum loss: 10561.650391, avg loss: 6.180018, ppl: 483.000610 +epoch: 0, batch: 1255, sum loss: 10697.731445, avg loss: 6.169395, ppl: 477.896881 +epoch: 0, batch: 1256, sum loss: 9350.885742, avg loss: 6.099730, ppl: 445.737396 +epoch: 0, batch: 1257, sum loss: 10527.658203, avg loss: 6.214674, ppl: 500.032959 +epoch: 0, batch: 1258, sum loss: 9206.502930, avg loss: 6.084933, ppl: 439.190308 +epoch: 0, batch: 1259, sum loss: 10798.878906, avg loss: 6.097616, ppl: 444.796204 +epoch: 0, batch: 1260, sum loss: 9631.854492, avg loss: 6.218112, ppl: 501.754791 +epoch: 0, batch: 1261, sum loss: 11361.291016, avg loss: 6.218550, ppl: 501.974945 +epoch: 0, batch: 1262, sum loss: 10737.663086, avg loss: 6.125307, ppl: 457.285126 +epoch: 0, batch: 1263, sum loss: 9550.798828, avg loss: 6.185751, ppl: 485.777618 +epoch: 0, batch: 1264, sum loss: 10529.259766, avg loss: 6.132359, ppl: 460.521057 +epoch: 0, batch: 1265, sum loss: 10851.217773, avg loss: 6.035160, ppl: 417.865479 +epoch: 0, batch: 1266, sum loss: 10444.275391, avg loss: 6.082863, ppl: 438.282135 +epoch: 0, batch: 1267, sum loss: 9041.535156, avg loss: 6.146523, ppl: 467.090485 +epoch: 0, batch: 1268, sum loss: 10751.858398, avg loss: 5.973255, ppl: 392.781982 +epoch: 0, batch: 1269, sum loss: 10188.242188, avg loss: 6.163486, ppl: 475.081085 +epoch: 0, batch: 1270, sum loss: 9119.677734, avg loss: 6.059587, ppl: 428.198547 +epoch: 0, batch: 1271, sum loss: 11437.497070, avg loss: 6.132706, ppl: 460.681152 +epoch: 0, batch: 1272, sum loss: 9819.454102, avg loss: 6.102830, ppl: 447.121521 +epoch: 0, batch: 1273, sum loss: 9661.962891, avg loss: 6.154117, ppl: 470.650909 +epoch: 0, batch: 1274, sum loss: 11660.234375, avg loss: 6.124073, ppl: 456.721161 +epoch: 0, batch: 1275, sum loss: 9450.189453, avg loss: 6.015398, ppl: 409.688690 +epoch: 0, batch: 1276, sum loss: 11996.076172, avg loss: 6.222032, ppl: 503.725555 +epoch: 0, batch: 1277, sum loss: 11994.259766, avg loss: 6.169887, ppl: 478.132111 +epoch: 0, batch: 1278, sum loss: 10386.754883, avg loss: 6.099092, ppl: 445.453339 +epoch: 0, batch: 1279, sum loss: 9222.862305, avg loss: 6.095746, ppl: 443.964935 +epoch: 0, batch: 1280, sum loss: 12311.316406, avg loss: 6.049787, ppl: 424.022736 +epoch: 0, batch: 1281, sum loss: 11097.910156, avg loss: 6.015127, ppl: 409.577911 +epoch: 0, batch: 1282, sum loss: 9544.679688, avg loss: 6.122309, ppl: 455.916290 +epoch: 0, batch: 1283, sum loss: 9925.127930, avg loss: 5.989818, ppl: 399.341766 +epoch: 0, batch: 1284, sum loss: 12516.801758, avg loss: 6.199505, ppl: 492.505341 +epoch: 0, batch: 1285, sum loss: 11604.333984, avg loss: 6.056542, ppl: 426.896637 +epoch: 0, batch: 1286, sum loss: 10110.805664, avg loss: 6.021921, ppl: 412.370056 +epoch: 0, batch: 1287, sum loss: 9733.886719, avg loss: 6.121941, ppl: 455.748474 +epoch: 0, batch: 1288, sum loss: 9402.390625, avg loss: 6.081753, ppl: 437.796082 +epoch: 0, batch: 1289, sum loss: 10015.916992, avg loss: 6.011955, ppl: 408.280640 +epoch: 0, batch: 1290, sum loss: 10161.618164, avg loss: 6.226482, ppl: 505.972290 +epoch: 0, batch: 1291, sum loss: 10338.547852, avg loss: 6.235554, ppl: 510.583282 +epoch: 0, batch: 1292, sum loss: 10517.785156, avg loss: 6.197870, ppl: 491.700714 +epoch: 0, batch: 1293, sum loss: 10308.078125, avg loss: 5.972235, ppl: 392.381744 +epoch: 0, batch: 1294, sum loss: 8379.912109, avg loss: 6.085630, ppl: 439.496582 +epoch: 0, batch: 1295, sum loss: 10098.369141, avg loss: 6.043309, ppl: 421.284851 +epoch: 0, batch: 1296, sum loss: 9153.795898, avg loss: 6.058105, ppl: 427.564240 +epoch: 0, batch: 1297, sum loss: 10035.659180, avg loss: 6.093296, ppl: 442.878754 +epoch: 0, batch: 1298, sum loss: 10807.828125, avg loss: 6.109570, ppl: 450.144897 +epoch: 0, batch: 1299, sum loss: 10007.500000, avg loss: 6.135807, ppl: 462.111664 +epoch: 0, batch: 1300, sum loss: 9193.900391, avg loss: 5.935378, ppl: 378.182953 +epoch: 0, batch: 1301, sum loss: 11075.628906, avg loss: 5.977134, ppl: 394.308563 +epoch: 0, batch: 1302, sum loss: 9570.342773, avg loss: 5.985205, ppl: 397.503876 +epoch: 0, batch: 1303, sum loss: 11061.828125, avg loss: 6.091315, ppl: 442.002167 +epoch: 0, batch: 1304, sum loss: 10432.510742, avg loss: 6.140383, ppl: 464.231445 +epoch: 0, batch: 1305, sum loss: 10297.836914, avg loss: 5.987114, ppl: 398.263733 +epoch: 0, batch: 1306, sum loss: 9004.645508, avg loss: 6.063734, ppl: 429.977997 +epoch: 0, batch: 1307, sum loss: 11400.194336, avg loss: 6.185672, ppl: 485.739410 +epoch: 0, batch: 1308, sum loss: 11190.892578, avg loss: 6.068814, ppl: 432.167755 +epoch: 0, batch: 1309, sum loss: 10487.662109, avg loss: 6.006680, ppl: 406.132538 +epoch: 0, batch: 1310, sum loss: 9256.110352, avg loss: 6.113679, ppl: 451.998749 +epoch: 0, batch: 1311, sum loss: 10691.501953, avg loss: 6.030176, ppl: 415.788269 +epoch: 0, batch: 1312, sum loss: 11811.138672, avg loss: 6.279181, ppl: 533.351440 +epoch: 0, batch: 1313, sum loss: 11445.956055, avg loss: 5.970765, ppl: 391.805328 +epoch: 0, batch: 1314, sum loss: 12490.462891, avg loss: 6.022402, ppl: 412.568512 +epoch: 0, batch: 1315, sum loss: 9880.592773, avg loss: 5.980989, ppl: 395.831451 +epoch: 0, batch: 1316, sum loss: 10329.166016, avg loss: 5.943133, ppl: 381.127258 +epoch: 0, batch: 1317, sum loss: 10944.306641, avg loss: 6.006754, ppl: 406.162933 +epoch: 0, batch: 1318, sum loss: 10607.079102, avg loss: 6.117116, ppl: 453.554749 +epoch: 0, batch: 1319, sum loss: 9397.478516, avg loss: 6.122136, ppl: 455.837158 +epoch: 0, batch: 1320, sum loss: 9880.135742, avg loss: 6.076344, ppl: 435.434326 +epoch: 0, batch: 1321, sum loss: 10156.208984, avg loss: 6.067031, ppl: 431.397919 +epoch: 0, batch: 1322, sum loss: 9061.577148, avg loss: 5.922600, ppl: 373.381165 +epoch: 0, batch: 1323, sum loss: 10281.275391, avg loss: 6.079998, ppl: 437.028107 +epoch: 0, batch: 1324, sum loss: 10575.323242, avg loss: 6.177175, ppl: 481.629211 +epoch: 0, batch: 1325, sum loss: 11324.815430, avg loss: 6.148108, ppl: 467.831635 +epoch: 0, batch: 1326, sum loss: 9084.046875, avg loss: 6.000031, ppl: 403.441284 +epoch: 0, batch: 1327, sum loss: 10873.986328, avg loss: 6.021033, ppl: 412.004089 +epoch: 0, batch: 1328, sum loss: 11843.455078, avg loss: 5.969483, ppl: 391.303467 +epoch: 0, batch: 1329, sum loss: 9863.802734, avg loss: 6.040296, ppl: 420.017181 +epoch: 0, batch: 1330, sum loss: 9656.837891, avg loss: 5.946329, ppl: 382.347015 +epoch: 0, batch: 1331, sum loss: 10608.560547, avg loss: 6.031018, ppl: 416.138550 +epoch: 0, batch: 1332, sum loss: 9093.099609, avg loss: 6.086412, ppl: 439.840424 +epoch: 0, batch: 1333, sum loss: 10238.166016, avg loss: 6.250406, ppl: 518.223328 +epoch: 0, batch: 1334, sum loss: 10786.002930, avg loss: 5.985573, ppl: 397.650421 +epoch: 0, batch: 1335, sum loss: 11133.815430, avg loss: 6.070782, ppl: 433.019257 +epoch: 0, batch: 1336, sum loss: 9582.106445, avg loss: 6.041681, ppl: 420.599579 +epoch: 0, batch: 1337, sum loss: 10481.042969, avg loss: 6.114961, ppl: 452.578461 +epoch: 0, batch: 1338, sum loss: 9862.291992, avg loss: 6.072840, ppl: 433.911133 +epoch: 0, batch: 1339, sum loss: 11240.253906, avg loss: 6.033416, ppl: 417.137451 +epoch: 0, batch: 1340, sum loss: 9755.096680, avg loss: 5.984722, ppl: 397.311920 +epoch: 0, batch: 1341, sum loss: 11833.164062, avg loss: 6.071403, ppl: 433.288177 +epoch: 0, batch: 1342, sum loss: 9131.457031, avg loss: 6.031346, ppl: 416.274902 +epoch: 0, batch: 1343, sum loss: 9275.164062, avg loss: 6.102082, ppl: 446.786926 +epoch: 0, batch: 1344, sum loss: 9820.264648, avg loss: 6.050687, ppl: 424.404419 +epoch: 0, batch: 1345, sum loss: 11279.922852, avg loss: 6.123737, ppl: 456.567627 +epoch: 0, batch: 1346, sum loss: 8706.596680, avg loss: 6.118480, ppl: 454.173706 +epoch: 0, batch: 1347, sum loss: 11329.822266, avg loss: 6.032919, ppl: 416.930237 +epoch: 0, batch: 1348, sum loss: 10367.124023, avg loss: 5.985637, ppl: 397.675842 +epoch: 0, batch: 1349, sum loss: 9101.515625, avg loss: 5.960390, ppl: 387.761353 +epoch: 0, batch: 1350, sum loss: 11847.952148, avg loss: 5.971750, ppl: 392.191315 +epoch: 0, batch: 1351, sum loss: 10082.595703, avg loss: 6.005119, ppl: 405.499359 +epoch: 0, batch: 1352, sum loss: 10409.706055, avg loss: 5.982590, ppl: 396.465790 +epoch: 0, batch: 1353, sum loss: 9998.049805, avg loss: 6.070461, ppl: 432.880310 +epoch: 0, batch: 1354, sum loss: 10241.839844, avg loss: 6.203416, ppl: 494.435333 +epoch: 0, batch: 1355, sum loss: 9225.045898, avg loss: 5.872086, ppl: 354.988556 +epoch: 0, batch: 1356, sum loss: 10334.809570, avg loss: 5.994669, ppl: 401.284027 +epoch: 0, batch: 1357, sum loss: 11054.623047, avg loss: 6.168875, ppl: 477.648315 +epoch: 0, batch: 1358, sum loss: 9711.654297, avg loss: 6.009687, ppl: 407.355774 +epoch: 0, batch: 1359, sum loss: 8307.212891, avg loss: 6.072524, ppl: 433.774170 +epoch: 0, batch: 1360, sum loss: 8344.173828, avg loss: 5.794565, ppl: 328.509308 +epoch: 0, batch: 1361, sum loss: 11079.828125, avg loss: 6.135010, ppl: 461.743835 +epoch: 0, batch: 1362, sum loss: 9880.554688, avg loss: 5.909423, ppl: 368.493439 +epoch: 0, batch: 1363, sum loss: 11274.835938, avg loss: 6.035779, ppl: 418.124603 +epoch: 0, batch: 1364, sum loss: 10355.050781, avg loss: 6.105573, ppl: 448.349335 +epoch: 0, batch: 1365, sum loss: 11069.131836, avg loss: 6.071932, ppl: 433.517578 +epoch: 0, batch: 1366, sum loss: 8904.675781, avg loss: 5.960292, ppl: 387.723267 +epoch: 0, batch: 1367, sum loss: 9573.838867, avg loss: 5.909777, ppl: 368.623993 +epoch: 0, batch: 1368, sum loss: 9495.921875, avg loss: 6.118507, ppl: 454.186035 +epoch: 0, batch: 1369, sum loss: 8938.498047, avg loss: 5.800453, ppl: 330.449280 +epoch: 0, batch: 1370, sum loss: 10790.530273, avg loss: 6.038350, ppl: 419.200623 +epoch: 0, batch: 1371, sum loss: 10264.652344, avg loss: 5.943632, ppl: 381.317413 +epoch: 0, batch: 1372, sum loss: 9474.873047, avg loss: 5.974069, ppl: 393.101807 +epoch: 0, batch: 1373, sum loss: 10707.496094, avg loss: 5.951915, ppl: 384.489044 +epoch: 0, batch: 1374, sum loss: 9030.501953, avg loss: 6.056675, ppl: 426.953430 +epoch: 0, batch: 1375, sum loss: 8995.965820, avg loss: 6.049741, ppl: 424.003326 +epoch: 0, batch: 1376, sum loss: 10413.868164, avg loss: 5.943989, ppl: 381.453430 +epoch: 0, batch: 1377, sum loss: 8895.035156, avg loss: 6.030532, ppl: 415.936401 +epoch: 0, batch: 1378, sum loss: 9750.726562, avg loss: 5.963747, ppl: 389.065247 +epoch: 0, batch: 1379, sum loss: 10077.504883, avg loss: 6.118703, ppl: 454.275269 +epoch: 0, batch: 1380, sum loss: 10206.940430, avg loss: 5.986475, ppl: 398.009338 +epoch: 0, batch: 1381, sum loss: 11251.738281, avg loss: 5.956452, ppl: 386.237457 +epoch: 0, batch: 1382, sum loss: 10908.523438, avg loss: 6.010206, ppl: 407.567169 +epoch: 0, batch: 1383, sum loss: 10920.054688, avg loss: 6.033180, ppl: 417.039001 +epoch: 0, batch: 1384, sum loss: 10582.058594, avg loss: 5.958366, ppl: 386.977448 +epoch: 0, batch: 1385, sum loss: 9351.446289, avg loss: 5.979186, ppl: 395.118439 +epoch: 0, batch: 1386, sum loss: 10284.529297, avg loss: 6.046166, ppl: 422.490265 +epoch: 0, batch: 1387, sum loss: 11390.721680, avg loss: 5.895819, ppl: 363.514313 +epoch: 0, batch: 1388, sum loss: 8726.681641, avg loss: 6.030879, ppl: 416.080597 +epoch: 0, batch: 1389, sum loss: 9622.197266, avg loss: 6.017634, ppl: 410.605927 +epoch: 0, batch: 1390, sum loss: 10310.890625, avg loss: 6.001683, ppl: 404.108246 +epoch: 0, batch: 1391, sum loss: 8508.548828, avg loss: 5.912820, ppl: 369.747498 +epoch: 0, batch: 1392, sum loss: 13454.303711, avg loss: 6.096196, ppl: 444.164825 +epoch: 0, batch: 1393, sum loss: 10920.600586, avg loss: 6.046844, ppl: 422.776855 +epoch: 0, batch: 1394, sum loss: 9528.820312, avg loss: 5.842318, ppl: 344.577179 +epoch: 0, batch: 1395, sum loss: 10379.996094, avg loss: 6.048949, ppl: 423.667419 +epoch: 0, batch: 1396, sum loss: 10293.875000, avg loss: 6.083850, ppl: 438.715179 +epoch: 0, batch: 1397, sum loss: 10067.493164, avg loss: 5.894317, ppl: 362.968719 +epoch: 0, batch: 1398, sum loss: 10208.511719, avg loss: 6.065663, ppl: 430.808350 +epoch: 0, batch: 1399, sum loss: 10721.618164, avg loss: 5.996431, ppl: 401.991486 +epoch: 0, batch: 1400, sum loss: 10152.536133, avg loss: 6.028822, ppl: 415.225769 +epoch: 0, batch: 1401, sum loss: 9960.242188, avg loss: 6.011009, ppl: 407.894775 +epoch: 0, batch: 1402, sum loss: 9462.987305, avg loss: 5.892271, ppl: 362.226990 +epoch: 0, batch: 1403, sum loss: 9746.734375, avg loss: 5.843366, ppl: 344.938507 +epoch: 0, batch: 1404, sum loss: 10972.160156, avg loss: 6.048600, ppl: 423.519775 +epoch: 0, batch: 1405, sum loss: 10354.611328, avg loss: 5.988786, ppl: 398.930115 +epoch: 0, batch: 1406, sum loss: 9498.228516, avg loss: 5.947545, ppl: 382.812225 +epoch: 0, batch: 1407, sum loss: 11473.941406, avg loss: 6.074082, ppl: 434.450653 +epoch: 0, batch: 1408, sum loss: 9874.461914, avg loss: 6.069122, ppl: 432.301086 +epoch: 0, batch: 1409, sum loss: 10135.005859, avg loss: 6.083436, ppl: 438.533630 +epoch: 0, batch: 1410, sum loss: 10697.973633, avg loss: 5.999986, ppl: 403.423035 +epoch: 0, batch: 1411, sum loss: 9200.629883, avg loss: 5.924424, ppl: 374.062805 +epoch: 0, batch: 1412, sum loss: 8450.575195, avg loss: 5.868455, ppl: 353.702057 +epoch: 0, batch: 1413, sum loss: 8078.875000, avg loss: 5.871275, ppl: 354.701080 +epoch: 0, batch: 1414, sum loss: 9948.180664, avg loss: 5.903965, ppl: 366.487549 +epoch: 0, batch: 1415, sum loss: 9031.414062, avg loss: 6.037042, ppl: 418.652679 +epoch: 0, batch: 1416, sum loss: 9812.330078, avg loss: 6.094615, ppl: 443.463257 +epoch: 0, batch: 1417, sum loss: 9212.627930, avg loss: 5.932149, ppl: 376.963898 +epoch: 0, batch: 1418, sum loss: 9571.191406, avg loss: 5.900858, ppl: 365.350769 +epoch: 0, batch: 1419, sum loss: 9972.340820, avg loss: 6.054852, ppl: 426.175629 +epoch: 0, batch: 1420, sum loss: 10590.278320, avg loss: 6.089867, ppl: 441.362762 +epoch: 0, batch: 1421, sum loss: 9417.943359, avg loss: 5.856930, ppl: 349.648987 +epoch: 0, batch: 1422, sum loss: 10366.600586, avg loss: 5.883429, ppl: 359.038116 +epoch: 0, batch: 1423, sum loss: 10365.916016, avg loss: 6.009227, ppl: 407.168365 +epoch: 0, batch: 1424, sum loss: 10094.017578, avg loss: 5.990515, ppl: 399.620270 +epoch: 0, batch: 1425, sum loss: 9627.668945, avg loss: 5.852686, ppl: 348.168274 +epoch: 0, batch: 1426, sum loss: 11263.621094, avg loss: 5.881786, ppl: 358.448975 +epoch: 0, batch: 1427, sum loss: 9445.489258, avg loss: 5.929372, ppl: 375.918304 +epoch: 0, batch: 1428, sum loss: 9888.513672, avg loss: 6.018572, ppl: 410.991211 +epoch: 0, batch: 1429, sum loss: 10154.839844, avg loss: 5.819392, ppl: 336.767303 +epoch: 0, batch: 1430, sum loss: 9343.676758, avg loss: 5.962780, ppl: 388.689362 +epoch: 0, batch: 1431, sum loss: 9522.066406, avg loss: 5.823894, ppl: 338.286621 +epoch: 0, batch: 1432, sum loss: 9735.708008, avg loss: 5.950922, ppl: 384.107330 +epoch: 0, batch: 1433, sum loss: 10853.131836, avg loss: 5.888840, ppl: 360.986206 +epoch: 0, batch: 1434, sum loss: 8733.690430, avg loss: 5.990185, ppl: 399.488617 +epoch: 0, batch: 1435, sum loss: 9457.400391, avg loss: 5.955542, ppl: 385.886047 +epoch: 0, batch: 1436, sum loss: 10037.672852, avg loss: 5.974805, ppl: 393.391510 +epoch: 0, batch: 1437, sum loss: 11240.764648, avg loss: 6.059711, ppl: 428.251862 +epoch: 0, batch: 1438, sum loss: 9123.720703, avg loss: 6.002448, ppl: 404.417633 +epoch: 0, batch: 1439, sum loss: 11627.365234, avg loss: 5.905213, ppl: 366.945496 +epoch: 0, batch: 1440, sum loss: 11974.188477, avg loss: 5.910261, ppl: 368.802460 +epoch: 0, batch: 1441, sum loss: 9210.007812, avg loss: 5.992198, ppl: 400.293488 +epoch: 0, batch: 1442, sum loss: 10135.658203, avg loss: 5.896253, ppl: 363.672089 +epoch: 0, batch: 1443, sum loss: 8928.120117, avg loss: 5.967995, ppl: 390.721375 +epoch: 0, batch: 1444, sum loss: 7784.844727, avg loss: 5.983739, ppl: 396.921661 +epoch: 0, batch: 1445, sum loss: 8814.298828, avg loss: 5.845026, ppl: 345.511353 +epoch: 0, batch: 1446, sum loss: 9263.706055, avg loss: 6.007591, ppl: 406.502960 +epoch: 0, batch: 1447, sum loss: 10784.400391, avg loss: 5.954943, ppl: 385.654816 +epoch: 0, batch: 1448, sum loss: 10156.203125, avg loss: 6.056173, ppl: 426.739319 +epoch: 0, batch: 1449, sum loss: 10476.050781, avg loss: 5.905327, ppl: 366.987335 +epoch: 0, batch: 1450, sum loss: 8605.100586, avg loss: 5.901989, ppl: 365.764404 +epoch: 0, batch: 1451, sum loss: 8892.405273, avg loss: 5.984122, ppl: 397.073669 +epoch: 0, batch: 1452, sum loss: 10090.568359, avg loss: 5.963693, ppl: 389.044098 +epoch: 0, batch: 1453, sum loss: 9527.227539, avg loss: 5.988201, ppl: 398.696777 +epoch: 0, batch: 1454, sum loss: 9811.462891, avg loss: 5.986249, ppl: 397.919403 +epoch: 0, batch: 1455, sum loss: 10035.502930, avg loss: 6.005687, ppl: 405.729736 +epoch: 0, batch: 1456, sum loss: 9923.519531, avg loss: 5.920955, ppl: 372.767426 +epoch: 0, batch: 1457, sum loss: 10655.933594, avg loss: 5.989844, ppl: 399.352448 +epoch: 0, batch: 1458, sum loss: 10864.091797, avg loss: 5.917261, ppl: 371.393127 +epoch: 0, batch: 1459, sum loss: 10131.950195, avg loss: 5.949471, ppl: 383.550568 +epoch: 0, batch: 1460, sum loss: 8933.808594, avg loss: 6.044526, ppl: 421.797607 +epoch: 0, batch: 1461, sum loss: 10216.629883, avg loss: 5.915825, ppl: 370.860260 +epoch: 0, batch: 1462, sum loss: 8838.054688, avg loss: 6.045181, ppl: 422.074249 +epoch: 0, batch: 1463, sum loss: 10682.533203, avg loss: 5.924866, ppl: 374.228180 +epoch: 0, batch: 1464, sum loss: 10550.496094, avg loss: 5.954005, ppl: 385.293274 +epoch: 0, batch: 1465, sum loss: 9157.984375, avg loss: 5.840551, ppl: 343.968933 +epoch: 0, batch: 1466, sum loss: 10145.712891, avg loss: 6.039115, ppl: 419.521576 +epoch: 0, batch: 1467, sum loss: 10537.092773, avg loss: 5.946441, ppl: 382.389862 +epoch: 0, batch: 1468, sum loss: 8477.248047, avg loss: 5.982533, ppl: 396.443085 +epoch: 0, batch: 1469, sum loss: 8620.820312, avg loss: 5.900630, ppl: 365.267517 +epoch: 0, batch: 1470, sum loss: 11666.203125, avg loss: 5.801196, ppl: 330.694702 +epoch: 0, batch: 1471, sum loss: 11200.898438, avg loss: 5.970628, ppl: 391.751709 +epoch: 0, batch: 1472, sum loss: 10434.304688, avg loss: 6.031390, ppl: 416.293152 +epoch: 0, batch: 1473, sum loss: 10272.216797, avg loss: 5.979172, ppl: 395.112976 +epoch: 0, batch: 1474, sum loss: 10085.094727, avg loss: 5.799365, ppl: 330.089752 +epoch: 0, batch: 1475, sum loss: 10248.693359, avg loss: 5.735139, ppl: 309.555969 +epoch: 0, batch: 1476, sum loss: 8877.392578, avg loss: 5.879068, ppl: 357.475891 +epoch: 0, batch: 1477, sum loss: 11165.921875, avg loss: 5.842973, ppl: 344.803009 +epoch: 0, batch: 1478, sum loss: 8396.635742, avg loss: 5.934018, ppl: 377.669006 +epoch: 0, batch: 1479, sum loss: 10996.877930, avg loss: 6.035608, ppl: 418.053040 +epoch: 0, batch: 1480, sum loss: 10811.319336, avg loss: 5.950093, ppl: 383.789124 +epoch: 0, batch: 1481, sum loss: 9201.737305, avg loss: 5.772734, ppl: 321.415192 +epoch: 0, batch: 1482, sum loss: 11699.901367, avg loss: 5.820847, ppl: 337.257446 +epoch: 0, batch: 1483, sum loss: 9407.625000, avg loss: 5.757421, ppl: 316.530792 +epoch: 0, batch: 1484, sum loss: 9358.722656, avg loss: 5.838255, ppl: 343.179932 +epoch: 0, batch: 1485, sum loss: 9135.700195, avg loss: 5.947721, ppl: 382.879944 +epoch: 0, batch: 1486, sum loss: 10324.002930, avg loss: 5.967632, ppl: 390.579620 +epoch: 0, batch: 1487, sum loss: 9654.334961, avg loss: 5.879620, ppl: 357.673157 +epoch: 0, batch: 1488, sum loss: 10845.445312, avg loss: 5.821495, ppl: 337.476227 +epoch: 0, batch: 1489, sum loss: 10577.184570, avg loss: 5.965699, ppl: 389.825500 +epoch: 0, batch: 1490, sum loss: 9379.303711, avg loss: 5.778992, ppl: 323.432922 +epoch: 0, batch: 1491, sum loss: 9087.728516, avg loss: 5.744455, ppl: 312.453400 +epoch: 0, batch: 1492, sum loss: 10394.483398, avg loss: 5.875909, ppl: 356.348541 +epoch: 0, batch: 1493, sum loss: 10307.699219, avg loss: 5.934196, ppl: 377.736359 +epoch: 0, batch: 1494, sum loss: 10213.733398, avg loss: 5.979938, ppl: 395.415680 +epoch: 0, batch: 1495, sum loss: 8390.283203, avg loss: 5.855048, ppl: 348.991547 +epoch: 0, batch: 1496, sum loss: 10159.581055, avg loss: 5.920502, ppl: 372.598602 +epoch: 0, batch: 1497, sum loss: 8046.176270, avg loss: 5.739070, ppl: 310.775391 +epoch: 0, batch: 1498, sum loss: 10013.097656, avg loss: 6.050210, ppl: 424.202301 +epoch: 0, batch: 1499, sum loss: 11048.214844, avg loss: 5.959123, ppl: 387.270386 +epoch: 0, batch: 1500, sum loss: 11158.087891, avg loss: 6.005429, ppl: 405.625061 +epoch: 0, batch: 1501, sum loss: 10630.809570, avg loss: 5.915865, ppl: 370.875122 +epoch: 0, batch: 1502, sum loss: 7648.151367, avg loss: 5.856165, ppl: 349.381683 +epoch: 0, batch: 1503, sum loss: 9684.683594, avg loss: 5.869505, ppl: 354.073822 +epoch: 0, batch: 1504, sum loss: 10414.536133, avg loss: 5.750710, ppl: 314.413971 +epoch: 0, batch: 1505, sum loss: 10065.819336, avg loss: 5.788280, ppl: 326.451202 +epoch: 0, batch: 1506, sum loss: 9346.098633, avg loss: 5.945355, ppl: 381.975098 +epoch: 0, batch: 1507, sum loss: 9990.938477, avg loss: 6.073519, ppl: 434.205872 +epoch: 0, batch: 1508, sum loss: 10691.507812, avg loss: 5.916717, ppl: 371.190948 +epoch: 0, batch: 1509, sum loss: 8817.645508, avg loss: 6.027099, ppl: 414.510834 +epoch: 0, batch: 1510, sum loss: 10163.176758, avg loss: 5.884874, ppl: 359.557404 +epoch: 0, batch: 1511, sum loss: 10364.101562, avg loss: 5.862049, ppl: 351.443390 +epoch: 0, batch: 1512, sum loss: 9367.700195, avg loss: 5.951525, ppl: 384.338898 +epoch: 0, batch: 1513, sum loss: 9180.878906, avg loss: 5.961609, ppl: 388.234436 +epoch: 0, batch: 1514, sum loss: 10293.987305, avg loss: 5.946844, ppl: 382.544159 +epoch: 0, batch: 1515, sum loss: 10243.038086, avg loss: 5.803421, ppl: 331.431458 +epoch: 0, batch: 1516, sum loss: 11038.332031, avg loss: 5.909171, ppl: 368.400665 +epoch: 0, batch: 1517, sum loss: 9157.982422, avg loss: 5.829397, ppl: 340.153564 +epoch: 0, batch: 1518, sum loss: 11518.331055, avg loss: 6.146388, ppl: 467.027466 +epoch: 0, batch: 1519, sum loss: 8501.378906, avg loss: 5.953347, ppl: 385.039795 +epoch: 0, batch: 1520, sum loss: 9370.387695, avg loss: 5.805693, ppl: 332.185211 +epoch: 0, batch: 1521, sum loss: 10497.890625, avg loss: 5.874589, ppl: 355.878357 +epoch: 0, batch: 1522, sum loss: 10070.980469, avg loss: 5.862038, ppl: 351.439514 +epoch: 0, batch: 1523, sum loss: 10587.907227, avg loss: 5.849673, ppl: 347.120789 +epoch: 0, batch: 1524, sum loss: 10793.218750, avg loss: 5.849983, ppl: 347.228394 +epoch: 0, batch: 1525, sum loss: 10682.116211, avg loss: 6.024882, ppl: 413.592957 +epoch: 0, batch: 1526, sum loss: 8995.554688, avg loss: 5.860296, ppl: 350.828064 +epoch: 0, batch: 1527, sum loss: 10791.864258, avg loss: 5.995480, ppl: 401.609436 +epoch: 0, batch: 1528, sum loss: 8601.876953, avg loss: 5.819944, ppl: 336.953156 +epoch: 0, batch: 1529, sum loss: 10869.041992, avg loss: 5.796823, ppl: 329.251709 +epoch: 0, batch: 1530, sum loss: 9683.753906, avg loss: 5.854749, ppl: 348.887238 +epoch: 0, batch: 1531, sum loss: 10228.397461, avg loss: 5.922639, ppl: 373.395752 +epoch: 0, batch: 1532, sum loss: 10066.511719, avg loss: 5.812074, ppl: 334.311676 +epoch: 0, batch: 1533, sum loss: 9108.540039, avg loss: 5.891682, ppl: 362.013550 +epoch: 0, batch: 1534, sum loss: 11534.626953, avg loss: 5.799209, ppl: 330.038269 +epoch: 0, batch: 1535, sum loss: 11290.329102, avg loss: 5.942279, ppl: 380.801727 +epoch: 0, batch: 1536, sum loss: 9800.404297, avg loss: 5.990468, ppl: 399.601410 +epoch: 0, batch: 1537, sum loss: 10119.324219, avg loss: 5.829104, ppl: 340.053833 +epoch: 0, batch: 1538, sum loss: 9686.674805, avg loss: 5.845911, ppl: 345.817444 +epoch: 0, batch: 1539, sum loss: 10083.764648, avg loss: 5.838891, ppl: 343.398315 +epoch: 0, batch: 1540, sum loss: 8386.276367, avg loss: 5.918332, ppl: 371.791077 +epoch: 0, batch: 1541, sum loss: 11826.644531, avg loss: 5.931116, ppl: 376.574402 +epoch: 0, batch: 1542, sum loss: 10995.820312, avg loss: 5.937268, ppl: 378.898285 +epoch: 0, batch: 1543, sum loss: 9012.055664, avg loss: 5.765871, ppl: 319.216827 +epoch: 0, batch: 1544, sum loss: 11062.529297, avg loss: 6.002458, ppl: 404.421478 +epoch: 0, batch: 1545, sum loss: 9768.696289, avg loss: 5.877676, ppl: 356.978485 +epoch: 0, batch: 1546, sum loss: 11304.307617, avg loss: 5.984282, ppl: 397.137085 +epoch: 0, batch: 1547, sum loss: 10875.784180, avg loss: 5.757430, ppl: 316.533661 +epoch: 0, batch: 1548, sum loss: 10643.228516, avg loss: 5.926074, ppl: 374.680634 +epoch: 0, batch: 1549, sum loss: 10162.392578, avg loss: 5.956853, ppl: 386.392395 +epoch: 0, batch: 1550, sum loss: 10458.814453, avg loss: 5.932396, ppl: 377.056824 +epoch: 0, batch: 1551, sum loss: 8283.666016, avg loss: 6.007010, ppl: 406.266571 +epoch: 0, batch: 1552, sum loss: 9724.609375, avg loss: 5.865265, ppl: 352.575531 +epoch: 0, batch: 1553, sum loss: 9807.341797, avg loss: 5.810037, ppl: 333.631348 +epoch: 0, batch: 1554, sum loss: 11736.510742, avg loss: 5.882963, ppl: 358.870911 +epoch: 0, batch: 1555, sum loss: 10633.395508, avg loss: 5.897613, ppl: 364.166992 +epoch: 0, batch: 1556, sum loss: 10140.650391, avg loss: 5.716263, ppl: 303.767578 +epoch: 0, batch: 1557, sum loss: 9371.949219, avg loss: 5.795887, ppl: 328.943817 +epoch: 0, batch: 1558, sum loss: 10497.000977, avg loss: 5.960818, ppl: 387.927429 +epoch: 0, batch: 1559, sum loss: 8729.878906, avg loss: 5.781377, ppl: 324.205261 +epoch: 0, batch: 1560, sum loss: 10460.898438, avg loss: 5.700762, ppl: 299.095306 +epoch: 0, batch: 1561, sum loss: 10512.380859, avg loss: 5.986550, ppl: 398.038940 +epoch: 0, batch: 1562, sum loss: 8870.862305, avg loss: 5.847635, ppl: 346.414062 +epoch: 0, batch: 1563, sum loss: 13138.033203, avg loss: 6.043253, ppl: 421.261353 +epoch: 0, batch: 1564, sum loss: 8104.132324, avg loss: 5.855587, ppl: 349.179810 +epoch: 0, batch: 1565, sum loss: 9245.394531, avg loss: 5.699996, ppl: 298.866333 +epoch: 0, batch: 1566, sum loss: 8786.580078, avg loss: 5.683428, ppl: 293.955475 +epoch: 0, batch: 1567, sum loss: 9303.309570, avg loss: 5.818205, ppl: 336.367676 +epoch: 0, batch: 1568, sum loss: 10637.568359, avg loss: 5.989622, ppl: 399.263702 +epoch: 0, batch: 1569, sum loss: 10491.067383, avg loss: 5.950690, ppl: 384.018311 +epoch: 0, batch: 1570, sum loss: 9321.871094, avg loss: 5.979392, ppl: 395.199860 +epoch: 0, batch: 1571, sum loss: 9242.150391, avg loss: 5.845762, ppl: 345.765991 +epoch: 0, batch: 1572, sum loss: 11723.936523, avg loss: 5.990770, ppl: 399.722229 +epoch: 0, batch: 1573, sum loss: 11284.318359, avg loss: 5.895673, ppl: 363.461304 +epoch: 0, batch: 1574, sum loss: 8366.182617, avg loss: 5.683548, ppl: 293.990662 +epoch: 0, batch: 1575, sum loss: 9695.358398, avg loss: 5.795194, ppl: 328.715851 +epoch: 0, batch: 1576, sum loss: 11489.525391, avg loss: 6.040760, ppl: 420.212280 +epoch: 0, batch: 1577, sum loss: 9560.635742, avg loss: 5.804879, ppl: 331.914948 +epoch: 0, batch: 1578, sum loss: 8531.432617, avg loss: 5.768379, ppl: 320.018463 +epoch: 0, batch: 1579, sum loss: 8370.395508, avg loss: 5.602674, ppl: 271.150635 +epoch: 0, batch: 1580, sum loss: 12568.025391, avg loss: 5.805093, ppl: 331.985992 +epoch: 0, batch: 1581, sum loss: 11517.599609, avg loss: 5.915563, ppl: 370.762848 +epoch: 0, batch: 1582, sum loss: 9394.286133, avg loss: 5.749257, ppl: 313.957336 +epoch: 0, batch: 1583, sum loss: 9712.085938, avg loss: 6.043613, ppl: 421.412842 +epoch: 0, batch: 1584, sum loss: 11849.552734, avg loss: 6.033378, ppl: 417.121552 +epoch: 0, batch: 1585, sum loss: 10579.382812, avg loss: 5.851429, ppl: 347.730774 +epoch: 0, batch: 1586, sum loss: 9462.969727, avg loss: 5.819785, ppl: 336.899506 +epoch: 0, batch: 1587, sum loss: 10353.749023, avg loss: 5.755280, ppl: 315.853973 +epoch: 0, batch: 1588, sum loss: 10568.518555, avg loss: 5.897611, ppl: 364.166473 +epoch: 0, batch: 1589, sum loss: 10590.875000, avg loss: 5.919997, ppl: 372.410675 +epoch: 0, batch: 1590, sum loss: 10856.192383, avg loss: 5.890501, ppl: 361.586243 +epoch: 0, batch: 1591, sum loss: 9530.013672, avg loss: 5.843050, ppl: 344.829315 +epoch: 0, batch: 1592, sum loss: 11283.076172, avg loss: 5.935338, ppl: 378.167633 +epoch: 0, batch: 1593, sum loss: 10545.594727, avg loss: 5.874983, ppl: 356.018707 +epoch: 0, batch: 1594, sum loss: 10136.993164, avg loss: 5.832562, ppl: 341.231934 +epoch: 0, batch: 1595, sum loss: 10330.708984, avg loss: 5.940603, ppl: 380.164185 +epoch: 0, batch: 1596, sum loss: 9554.709961, avg loss: 5.801281, ppl: 330.722931 +epoch: 0, batch: 1597, sum loss: 8260.199219, avg loss: 5.792566, ppl: 327.853149 +epoch: 0, batch: 1598, sum loss: 10697.875000, avg loss: 5.714677, ppl: 303.286316 +epoch: 0, batch: 1599, sum loss: 10230.643555, avg loss: 5.802974, ppl: 331.283417 +epoch: 0, batch: 1600, sum loss: 8932.301758, avg loss: 5.696621, ppl: 297.859222 +epoch: 0, batch: 1601, sum loss: 8671.234375, avg loss: 5.819621, ppl: 336.844238 +epoch: 0, batch: 1602, sum loss: 11579.152344, avg loss: 5.798274, ppl: 329.729797 +epoch: 0, batch: 1603, sum loss: 9662.777344, avg loss: 5.835011, ppl: 342.068329 +epoch: 0, batch: 1604, sum loss: 10247.726562, avg loss: 5.680558, ppl: 293.112854 +epoch: 0, batch: 1605, sum loss: 10975.217773, avg loss: 5.900655, ppl: 365.276581 +epoch: 0, batch: 1606, sum loss: 8802.311523, avg loss: 5.657012, ppl: 286.292053 +epoch: 0, batch: 1607, sum loss: 10603.545898, avg loss: 5.759666, ppl: 317.242493 +epoch: 0, batch: 1608, sum loss: 9536.181641, avg loss: 5.706872, ppl: 300.928131 +epoch: 0, batch: 1609, sum loss: 9496.588867, avg loss: 5.847653, ppl: 346.420349 +epoch: 0, batch: 1610, sum loss: 11133.081055, avg loss: 5.871879, ppl: 354.915100 +epoch: 0, batch: 1611, sum loss: 6935.139160, avg loss: 5.548111, ppl: 256.752197 +epoch: 0, batch: 1612, sum loss: 10028.739258, avg loss: 5.599519, ppl: 270.296417 +epoch: 0, batch: 1613, sum loss: 10096.340820, avg loss: 5.893952, ppl: 362.836517 +epoch: 0, batch: 1614, sum loss: 9415.758789, avg loss: 5.877502, ppl: 356.916718 +epoch: 0, batch: 1615, sum loss: 8861.499023, avg loss: 5.757959, ppl: 316.701233 +epoch: 0, batch: 1616, sum loss: 10997.797852, avg loss: 5.874892, ppl: 355.986298 +epoch: 0, batch: 1617, sum loss: 10947.482422, avg loss: 5.978964, ppl: 395.030853 +epoch: 0, batch: 1618, sum loss: 9151.177734, avg loss: 5.759080, ppl: 317.056488 +epoch: 0, batch: 1619, sum loss: 9282.290039, avg loss: 5.856334, ppl: 349.440826 +epoch: 0, batch: 1620, sum loss: 10415.386719, avg loss: 6.023937, ppl: 413.202057 +epoch: 0, batch: 1621, sum loss: 8934.678711, avg loss: 5.889702, ppl: 361.297729 +epoch: 0, batch: 1622, sum loss: 8664.929688, avg loss: 5.967583, ppl: 390.560425 +epoch: 0, batch: 1623, sum loss: 8445.505859, avg loss: 5.873092, ppl: 355.345917 +epoch: 0, batch: 1624, sum loss: 9869.125000, avg loss: 5.959617, ppl: 387.461578 +epoch: 0, batch: 1625, sum loss: 9656.795898, avg loss: 5.884702, ppl: 359.495514 +epoch: 0, batch: 1626, sum loss: 9652.056641, avg loss: 5.814492, ppl: 335.121185 +epoch: 0, batch: 1627, sum loss: 8753.142578, avg loss: 5.796783, ppl: 329.238831 +epoch: 0, batch: 1628, sum loss: 10186.112305, avg loss: 6.038004, ppl: 419.055725 +epoch: 0, batch: 1629, sum loss: 9682.137695, avg loss: 5.561251, ppl: 260.148010 +epoch: 0, batch: 1630, sum loss: 10006.519531, avg loss: 5.879271, ppl: 357.548492 +epoch: 0, batch: 1631, sum loss: 10744.581055, avg loss: 5.628382, ppl: 278.211670 +epoch: 0, batch: 1632, sum loss: 9832.405273, avg loss: 5.739874, ppl: 311.025330 +epoch: 0, batch: 1633, sum loss: 9933.108398, avg loss: 5.853334, ppl: 348.394135 +epoch: 0, batch: 1634, sum loss: 9402.226562, avg loss: 5.935749, ppl: 378.323273 +epoch: 0, batch: 1635, sum loss: 8823.582031, avg loss: 5.699988, ppl: 298.863922 +epoch: 0, batch: 1636, sum loss: 10430.405273, avg loss: 5.859778, ppl: 350.646423 +epoch: 0, batch: 1637, sum loss: 9762.506836, avg loss: 5.749415, ppl: 314.007050 +epoch: 0, batch: 1638, sum loss: 8046.675781, avg loss: 5.694746, ppl: 297.301147 +epoch: 0, batch: 1639, sum loss: 11046.067383, avg loss: 5.906988, ppl: 367.597351 +epoch: 0, batch: 1640, sum loss: 10323.535156, avg loss: 5.922854, ppl: 373.476257 +epoch: 0, batch: 1641, sum loss: 9900.456055, avg loss: 5.722807, ppl: 305.761963 +epoch: 0, batch: 1642, sum loss: 9362.239258, avg loss: 5.754296, ppl: 315.543274 +epoch: 0, batch: 1643, sum loss: 9598.593750, avg loss: 5.744222, ppl: 312.380402 +epoch: 0, batch: 1644, sum loss: 10480.803711, avg loss: 5.730347, ppl: 308.076050 +epoch: 0, batch: 1645, sum loss: 8258.338867, avg loss: 5.811639, ppl: 334.166321 +epoch: 0, batch: 1646, sum loss: 9747.119141, avg loss: 5.760709, ppl: 317.573334 +epoch: 0, batch: 1647, sum loss: 11840.982422, avg loss: 5.905727, ppl: 367.134003 +epoch: 0, batch: 1648, sum loss: 8841.249023, avg loss: 5.801344, ppl: 330.743896 +epoch: 0, batch: 1649, sum loss: 9731.212891, avg loss: 5.887001, ppl: 360.323059 +epoch: 0, batch: 1650, sum loss: 7515.878906, avg loss: 5.530448, ppl: 252.256882 +epoch: 0, batch: 1651, sum loss: 11018.693359, avg loss: 5.774996, ppl: 322.143250 +epoch: 0, batch: 1652, sum loss: 10231.216797, avg loss: 5.637034, ppl: 280.629242 +epoch: 0, batch: 1653, sum loss: 9406.880859, avg loss: 5.543242, ppl: 255.504883 +epoch: 0, batch: 1654, sum loss: 9643.833984, avg loss: 5.736962, ppl: 310.120941 +epoch: 0, batch: 1655, sum loss: 10331.087891, avg loss: 5.765116, ppl: 318.976105 +epoch: 0, batch: 1656, sum loss: 9440.972656, avg loss: 5.683909, ppl: 294.096802 +epoch: 0, batch: 1657, sum loss: 10930.827148, avg loss: 5.798847, ppl: 329.919006 +epoch: 0, batch: 1658, sum loss: 10458.437500, avg loss: 5.752716, ppl: 315.045197 +epoch: 0, batch: 1659, sum loss: 8670.206055, avg loss: 5.652025, ppl: 284.867676 +epoch: 0, batch: 1660, sum loss: 8975.919922, avg loss: 5.920792, ppl: 372.706635 +epoch: 0, batch: 1661, sum loss: 6525.278320, avg loss: 5.501921, ppl: 245.162476 +epoch: 0, batch: 1662, sum loss: 9965.576172, avg loss: 5.548762, ppl: 256.919250 +epoch: 0, batch: 1663, sum loss: 11687.496094, avg loss: 5.806009, ppl: 332.290405 +epoch: 0, batch: 1664, sum loss: 9700.300781, avg loss: 5.833013, ppl: 341.385742 +epoch: 0, batch: 1665, sum loss: 10296.688477, avg loss: 5.948404, ppl: 383.141479 +epoch: 0, batch: 1666, sum loss: 10879.362305, avg loss: 5.774609, ppl: 322.018372 +epoch: 0, batch: 1667, sum loss: 9368.383789, avg loss: 5.729898, ppl: 307.937836 +epoch: 0, batch: 1668, sum loss: 9427.579102, avg loss: 5.669019, ppl: 289.750214 +epoch: 0, batch: 1669, sum loss: 8142.025391, avg loss: 5.721733, ppl: 305.433655 +epoch: 0, batch: 1670, sum loss: 9689.517578, avg loss: 5.826529, ppl: 339.179352 +epoch: 0, batch: 1671, sum loss: 9323.605469, avg loss: 5.912242, ppl: 369.533691 +epoch: 0, batch: 1672, sum loss: 9501.444336, avg loss: 5.748000, ppl: 313.562958 +epoch: 0, batch: 1673, sum loss: 9727.958984, avg loss: 5.938924, ppl: 379.526459 +epoch: 0, batch: 1674, sum loss: 11709.739258, avg loss: 5.726034, ppl: 306.750183 +epoch: 0, batch: 1675, sum loss: 8882.592773, avg loss: 5.614787, ppl: 274.454803 +epoch: 0, batch: 1676, sum loss: 11455.700195, avg loss: 5.929451, ppl: 375.948242 +epoch: 0, batch: 1677, sum loss: 9625.899414, avg loss: 5.678997, ppl: 292.655609 +epoch: 0, batch: 1678, sum loss: 10106.144531, avg loss: 5.944791, ppl: 381.759674 +epoch: 0, batch: 1679, sum loss: 10997.605469, avg loss: 5.818839, ppl: 336.581085 +epoch: 0, batch: 1680, sum loss: 10968.949219, avg loss: 5.822160, ppl: 337.700775 +epoch: 0, batch: 1681, sum loss: 9885.420898, avg loss: 5.808120, ppl: 332.992584 +epoch: 0, batch: 1682, sum loss: 10059.037109, avg loss: 5.791040, ppl: 327.353424 +epoch: 0, batch: 1683, sum loss: 8449.412109, avg loss: 5.670747, ppl: 290.251221 +epoch: 0, batch: 1684, sum loss: 9417.372070, avg loss: 5.690255, ppl: 295.969147 +epoch: 0, batch: 1685, sum loss: 9097.979492, avg loss: 5.809693, ppl: 333.516663 +epoch: 0, batch: 1686, sum loss: 8493.045898, avg loss: 5.643220, ppl: 282.370483 +epoch: 0, batch: 1687, sum loss: 9850.182617, avg loss: 5.811317, ppl: 334.058777 +epoch: 0, batch: 1688, sum loss: 9678.519531, avg loss: 5.524270, ppl: 250.703156 +epoch: 0, batch: 1689, sum loss: 9496.898438, avg loss: 5.679964, ppl: 292.938751 +epoch: 0, batch: 1690, sum loss: 10592.302734, avg loss: 5.842418, ppl: 344.611511 +epoch: 0, batch: 1691, sum loss: 7797.652832, avg loss: 5.704208, ppl: 300.127808 +epoch: 0, batch: 1692, sum loss: 9521.706055, avg loss: 5.681209, ppl: 293.303833 +epoch: 0, batch: 1693, sum loss: 9377.356445, avg loss: 5.806413, ppl: 332.424469 +epoch: 0, batch: 1694, sum loss: 9553.056641, avg loss: 5.656043, ppl: 286.014648 +epoch: 0, batch: 1695, sum loss: 9360.849609, avg loss: 5.711318, ppl: 302.269043 +epoch: 0, batch: 1696, sum loss: 10219.239258, avg loss: 5.812992, ppl: 334.618683 +epoch: 0, batch: 1697, sum loss: 9305.956055, avg loss: 5.819860, ppl: 336.924866 +epoch: 0, batch: 1698, sum loss: 11212.958984, avg loss: 5.756139, ppl: 316.125488 +epoch: 0, batch: 1699, sum loss: 10523.819336, avg loss: 5.827143, ppl: 339.387573 +epoch: 0, batch: 1700, sum loss: 9404.096680, avg loss: 5.769384, ppl: 320.340454 +epoch: 0, batch: 1701, sum loss: 9434.079102, avg loss: 5.780686, ppl: 323.981323 +epoch: 0, batch: 1702, sum loss: 8193.081055, avg loss: 5.666031, ppl: 288.885773 +epoch: 0, batch: 1703, sum loss: 8240.940430, avg loss: 5.571968, ppl: 262.950958 +epoch: 0, batch: 1704, sum loss: 9325.865234, avg loss: 5.753155, ppl: 315.183563 +epoch: 0, batch: 1705, sum loss: 8297.595703, avg loss: 5.691081, ppl: 296.213531 +epoch: 0, batch: 1706, sum loss: 11084.155273, avg loss: 5.737141, ppl: 310.176239 +epoch: 0, batch: 1707, sum loss: 8922.869141, avg loss: 5.749272, ppl: 313.961975 +epoch: 0, batch: 1708, sum loss: 9840.848633, avg loss: 5.826435, ppl: 339.147491 +epoch: 0, batch: 1709, sum loss: 9575.574219, avg loss: 5.692969, ppl: 296.773529 +epoch: 0, batch: 1710, sum loss: 10511.713867, avg loss: 5.712888, ppl: 302.744049 +epoch: 0, batch: 1711, sum loss: 10620.214844, avg loss: 5.883776, ppl: 359.162781 +epoch: 0, batch: 1712, sum loss: 10467.651367, avg loss: 5.741992, ppl: 311.684662 +epoch: 0, batch: 1713, sum loss: 9604.305664, avg loss: 5.580654, ppl: 265.245056 +epoch: 0, batch: 1714, sum loss: 9670.436523, avg loss: 5.685148, ppl: 294.461365 +epoch: 0, batch: 1715, sum loss: 9192.713867, avg loss: 5.720419, ppl: 305.032837 +epoch: 0, batch: 1716, sum loss: 10490.274414, avg loss: 5.943498, ppl: 381.266327 +epoch: 0, batch: 1717, sum loss: 11616.150391, avg loss: 5.796482, ppl: 329.139648 +epoch: 0, batch: 1718, sum loss: 10054.182617, avg loss: 5.518212, ppl: 249.189163 +epoch: 0, batch: 1719, sum loss: 10703.260742, avg loss: 5.801225, ppl: 330.704498 +epoch: 0, batch: 1720, sum loss: 9831.843750, avg loss: 5.618196, ppl: 275.392273 +epoch: 0, batch: 1721, sum loss: 10261.722656, avg loss: 5.804142, ppl: 331.670502 +epoch: 0, batch: 1722, sum loss: 10688.092773, avg loss: 5.834112, ppl: 341.761017 +epoch: 0, batch: 1723, sum loss: 10285.241211, avg loss: 5.726749, ppl: 306.969666 +epoch: 0, batch: 1724, sum loss: 8206.047852, avg loss: 5.574761, ppl: 263.686493 +epoch: 0, batch: 1725, sum loss: 9447.542969, avg loss: 5.577062, ppl: 264.293884 +epoch: 0, batch: 1726, sum loss: 9082.065430, avg loss: 5.740875, ppl: 311.336792 +epoch: 0, batch: 1727, sum loss: 9383.580078, avg loss: 5.588791, ppl: 267.412109 +epoch: 0, batch: 1728, sum loss: 8256.811523, avg loss: 5.647614, ppl: 283.613953 +epoch: 0, batch: 1729, sum loss: 11704.243164, avg loss: 5.667914, ppl: 289.430267 +epoch: 0, batch: 1730, sum loss: 8270.623047, avg loss: 5.874022, ppl: 355.676636 +epoch: 0, batch: 1731, sum loss: 10215.733398, avg loss: 5.864370, ppl: 352.260284 +epoch: 0, batch: 1732, sum loss: 8429.761719, avg loss: 5.627344, ppl: 277.923004 +epoch: 0, batch: 1733, sum loss: 9687.332031, avg loss: 5.545124, ppl: 255.986328 +epoch: 0, batch: 1734, sum loss: 9997.591797, avg loss: 5.758981, ppl: 317.025177 +epoch: 0, batch: 1735, sum loss: 9384.664062, avg loss: 5.796581, ppl: 329.172119 +epoch: 0, batch: 1736, sum loss: 10148.974609, avg loss: 5.829394, ppl: 340.152435 +epoch: 0, batch: 1737, sum loss: 9803.745117, avg loss: 5.676749, ppl: 291.998535 +epoch: 0, batch: 1738, sum loss: 9046.860352, avg loss: 5.672013, ppl: 290.618896 +epoch: 0, batch: 1739, sum loss: 9469.585938, avg loss: 5.633305, ppl: 279.584625 +epoch: 0, batch: 1740, sum loss: 8567.932617, avg loss: 5.655401, ppl: 285.831146 +epoch: 0, batch: 1741, sum loss: 8863.686523, avg loss: 5.634893, ppl: 280.028931 +epoch: 0, batch: 1742, sum loss: 8003.733887, avg loss: 5.628505, ppl: 278.245758 +epoch: 0, batch: 1743, sum loss: 11175.258789, avg loss: 5.805329, ppl: 332.064362 +epoch: 0, batch: 1744, sum loss: 10983.537109, avg loss: 5.738525, ppl: 310.605896 +epoch: 0, batch: 1745, sum loss: 9862.572266, avg loss: 5.704206, ppl: 300.127075 +epoch: 0, batch: 1746, sum loss: 10825.930664, avg loss: 5.902906, ppl: 366.099976 +epoch: 0, batch: 1747, sum loss: 10139.205078, avg loss: 5.636023, ppl: 280.345428 +epoch: 0, batch: 1748, sum loss: 9696.887695, avg loss: 5.848545, ppl: 346.729553 +epoch: 0, batch: 1749, sum loss: 9629.382812, avg loss: 5.681052, ppl: 293.257690 +epoch: 0, batch: 1750, sum loss: 11081.513672, avg loss: 5.814015, ppl: 334.961426 +epoch: 0, batch: 1751, sum loss: 9197.019531, avg loss: 5.910681, ppl: 368.957245 +epoch: 0, batch: 1752, sum loss: 9463.455078, avg loss: 5.596366, ppl: 269.445557 +epoch: 0, batch: 1753, sum loss: 10974.560547, avg loss: 5.709970, ppl: 301.861877 +epoch: 0, batch: 1754, sum loss: 10217.840820, avg loss: 5.746817, ppl: 313.192230 +epoch: 0, batch: 1755, sum loss: 9437.425781, avg loss: 5.740527, ppl: 311.228271 +epoch: 0, batch: 1756, sum loss: 10890.583984, avg loss: 5.987127, ppl: 398.268677 +epoch: 0, batch: 1757, sum loss: 9559.299805, avg loss: 5.600059, ppl: 270.442230 +epoch: 0, batch: 1758, sum loss: 9093.630859, avg loss: 5.773734, ppl: 321.736725 +epoch: 0, batch: 1759, sum loss: 8998.299805, avg loss: 5.568255, ppl: 261.976532 +epoch: 0, batch: 1760, sum loss: 9053.125977, avg loss: 5.616083, ppl: 274.810883 +epoch: 0, batch: 1761, sum loss: 8886.996094, avg loss: 5.782041, ppl: 324.420532 +epoch: 0, batch: 1762, sum loss: 9125.191406, avg loss: 5.608599, ppl: 272.761871 +epoch: 0, batch: 1763, sum loss: 10069.681641, avg loss: 5.760688, ppl: 317.566681 +epoch: 0, batch: 1764, sum loss: 9151.266602, avg loss: 5.492957, ppl: 242.974655 +epoch: 0, batch: 1765, sum loss: 12041.302734, avg loss: 5.650541, ppl: 284.445251 +epoch: 0, batch: 1766, sum loss: 8775.445312, avg loss: 5.639746, ppl: 281.391296 +epoch: 0, batch: 1767, sum loss: 8396.703125, avg loss: 5.794827, ppl: 328.595184 +epoch: 0, batch: 1768, sum loss: 8978.634766, avg loss: 5.668330, ppl: 289.550629 +epoch: 0, batch: 1769, sum loss: 8756.732422, avg loss: 5.549260, ppl: 257.047302 +epoch: 0, batch: 1770, sum loss: 8961.990234, avg loss: 5.693768, ppl: 297.010529 +epoch: 0, batch: 1771, sum loss: 9753.467773, avg loss: 5.677222, ppl: 292.136810 +epoch: 0, batch: 1772, sum loss: 11503.617188, avg loss: 5.708992, ppl: 301.566803 +epoch: 0, batch: 1773, sum loss: 9700.039062, avg loss: 5.797991, ppl: 329.636749 +epoch: 0, batch: 1774, sum loss: 10380.278320, avg loss: 5.722314, ppl: 305.611389 +epoch: 0, batch: 1775, sum loss: 8489.079102, avg loss: 5.755308, ppl: 315.862701 +epoch: 0, batch: 1776, sum loss: 9530.636719, avg loss: 5.703553, ppl: 299.931213 +epoch: 0, batch: 1777, sum loss: 9900.529297, avg loss: 5.647764, ppl: 283.656433 +epoch: 0, batch: 1778, sum loss: 9633.602539, avg loss: 5.555711, ppl: 258.710785 +epoch: 0, batch: 1779, sum loss: 10598.460938, avg loss: 5.791509, ppl: 327.506897 +epoch: 0, batch: 1780, sum loss: 10067.074219, avg loss: 5.779032, ppl: 323.446014 +epoch: 0, batch: 1781, sum loss: 10067.370117, avg loss: 5.703892, ppl: 300.032928 +epoch: 0, batch: 1782, sum loss: 9545.832031, avg loss: 5.592169, ppl: 268.316925 +epoch: 0, batch: 1783, sum loss: 9183.596680, avg loss: 5.830855, ppl: 340.649780 +epoch: 0, batch: 1784, sum loss: 9829.120117, avg loss: 5.678290, ppl: 292.448883 +epoch: 0, batch: 1785, sum loss: 10655.719727, avg loss: 5.819618, ppl: 336.843262 +epoch: 0, batch: 1786, sum loss: 9601.430664, avg loss: 5.790971, ppl: 327.330811 +epoch: 0, batch: 1787, sum loss: 8663.001953, avg loss: 5.718153, ppl: 304.342438 +epoch: 0, batch: 1788, sum loss: 8481.938477, avg loss: 5.536513, ppl: 253.791565 +epoch: 0, batch: 1789, sum loss: 10550.526367, avg loss: 5.672326, ppl: 290.709961 +epoch: 0, batch: 1790, sum loss: 8911.574219, avg loss: 5.828368, ppl: 339.803741 +epoch: 0, batch: 1791, sum loss: 10260.017578, avg loss: 5.671652, ppl: 290.514008 +epoch: 0, batch: 1792, sum loss: 10145.195312, avg loss: 5.410771, ppl: 223.804047 +epoch: 0, batch: 1793, sum loss: 8750.723633, avg loss: 5.708235, ppl: 301.338684 +epoch: 0, batch: 1794, sum loss: 9785.111328, avg loss: 5.695641, ppl: 297.567474 +epoch: 0, batch: 1795, sum loss: 9868.975586, avg loss: 5.597831, ppl: 269.840424 +epoch: 0, batch: 1796, sum loss: 9534.185547, avg loss: 5.638194, ppl: 280.954865 +epoch: 0, batch: 1797, sum loss: 8761.369141, avg loss: 5.779267, ppl: 323.522064 +epoch: 0, batch: 1798, sum loss: 9012.665039, avg loss: 5.502237, ppl: 245.240005 +epoch: 0, batch: 1799, sum loss: 9116.583984, avg loss: 5.562284, ppl: 260.416962 +epoch: 0, batch: 1800, sum loss: 10882.562500, avg loss: 5.767124, ppl: 319.617096 +epoch: 0, batch: 1801, sum loss: 10087.578125, avg loss: 5.699197, ppl: 298.627441 +epoch: 0, batch: 1802, sum loss: 8825.970703, avg loss: 5.593137, ppl: 268.576904 +epoch: 0, batch: 1803, sum loss: 9495.562500, avg loss: 5.765368, ppl: 319.056580 +epoch: 0, batch: 1804, sum loss: 10285.589844, avg loss: 5.759009, ppl: 317.033966 +epoch: 0, batch: 1805, sum loss: 9089.582031, avg loss: 5.760191, ppl: 317.409088 +epoch: 0, batch: 1806, sum loss: 7839.990723, avg loss: 5.656559, ppl: 286.162262 +epoch: 0, batch: 1807, sum loss: 8777.880859, avg loss: 5.612456, ppl: 273.815857 +epoch: 0, batch: 1808, sum loss: 10085.990234, avg loss: 5.701521, ppl: 299.322296 +epoch: 0, batch: 1809, sum loss: 9263.273438, avg loss: 5.563528, ppl: 260.740997 +epoch: 0, batch: 1810, sum loss: 9144.638672, avg loss: 5.824611, ppl: 338.529327 +epoch: 0, batch: 1811, sum loss: 10923.895508, avg loss: 5.651266, ppl: 284.651489 +epoch: 0, batch: 1812, sum loss: 8697.186523, avg loss: 5.632893, ppl: 279.469482 +epoch: 0, batch: 1813, sum loss: 11496.454102, avg loss: 5.877532, ppl: 356.927429 +epoch: 0, batch: 1814, sum loss: 9228.783203, avg loss: 5.400107, ppl: 221.430191 +epoch: 0, batch: 1815, sum loss: 8941.081055, avg loss: 5.680483, ppl: 293.091064 +epoch: 0, batch: 1816, sum loss: 9791.509766, avg loss: 5.563358, ppl: 260.696747 +epoch: 0, batch: 1817, sum loss: 9406.327148, avg loss: 5.599004, ppl: 270.157257 +epoch: 0, batch: 1818, sum loss: 9228.505859, avg loss: 5.623708, ppl: 276.914337 +epoch: 0, batch: 1819, sum loss: 10529.124023, avg loss: 5.716137, ppl: 303.729340 +epoch: 0, batch: 1820, sum loss: 10445.880859, avg loss: 5.631203, ppl: 278.997467 +epoch: 0, batch: 1821, sum loss: 8267.459961, avg loss: 5.697767, ppl: 298.200714 +epoch: 0, batch: 1822, sum loss: 9108.333984, avg loss: 5.646828, ppl: 283.391174 +epoch: 0, batch: 1823, sum loss: 8932.359375, avg loss: 5.660557, ppl: 287.308716 +epoch: 0, batch: 1824, sum loss: 9168.060547, avg loss: 5.755217, ppl: 315.834106 +epoch: 0, batch: 1825, sum loss: 10033.792969, avg loss: 5.704260, ppl: 300.143250 +epoch: 0, batch: 1826, sum loss: 9971.561523, avg loss: 5.770580, ppl: 320.723633 +epoch: 0, batch: 1827, sum loss: 7957.527832, avg loss: 5.529901, ppl: 252.118958 +epoch: 0, batch: 1828, sum loss: 10332.382812, avg loss: 5.801450, ppl: 330.778931 +epoch: 0, batch: 1829, sum loss: 9130.842773, avg loss: 5.605183, ppl: 271.831573 +epoch: 0, batch: 1830, sum loss: 9125.474609, avg loss: 5.639972, ppl: 281.454773 +epoch: 0, batch: 1831, sum loss: 10810.288086, avg loss: 5.692622, ppl: 296.670380 +epoch: 0, batch: 1832, sum loss: 8650.993164, avg loss: 5.574094, ppl: 263.510651 +epoch: 0, batch: 1833, sum loss: 9854.474609, avg loss: 5.689651, ppl: 295.790222 +epoch: 0, batch: 1834, sum loss: 10026.388672, avg loss: 5.667829, ppl: 289.405426 +epoch: 0, batch: 1835, sum loss: 9694.779297, avg loss: 5.770702, ppl: 320.762939 +epoch: 0, batch: 1836, sum loss: 9002.948242, avg loss: 5.680093, ppl: 292.976746 +epoch: 0, batch: 1837, sum loss: 9998.033203, avg loss: 5.651800, ppl: 284.803558 +epoch: 0, batch: 1838, sum loss: 9114.560547, avg loss: 5.671786, ppl: 290.553101 +epoch: 0, batch: 1839, sum loss: 11414.890625, avg loss: 5.753473, ppl: 315.283691 +epoch: 0, batch: 1840, sum loss: 10290.618164, avg loss: 5.679149, ppl: 292.700287 +epoch: 0, batch: 1841, sum loss: 9748.667969, avg loss: 5.710995, ppl: 302.171509 +epoch: 0, batch: 1842, sum loss: 10463.640625, avg loss: 5.809906, ppl: 333.587769 +epoch: 0, batch: 1843, sum loss: 9217.478516, avg loss: 5.707417, ppl: 301.092346 +epoch: 0, batch: 1844, sum loss: 11396.503906, avg loss: 5.703956, ppl: 300.051971 +epoch: 0, batch: 1845, sum loss: 10077.180664, avg loss: 5.706218, ppl: 300.731476 +epoch: 0, batch: 1846, sum loss: 10171.069336, avg loss: 5.653735, ppl: 285.355316 +epoch: 0, batch: 1847, sum loss: 9176.469727, avg loss: 5.738880, ppl: 310.716248 +epoch: 0, batch: 1848, sum loss: 9373.226562, avg loss: 5.626186, ppl: 277.601440 +epoch: 0, batch: 1849, sum loss: 8672.498047, avg loss: 5.541532, ppl: 255.068481 +epoch: 0, batch: 1850, sum loss: 9903.208984, avg loss: 5.626823, ppl: 277.778198 +epoch: 0, batch: 1851, sum loss: 10263.533203, avg loss: 5.654840, ppl: 285.670624 +epoch: 0, batch: 1852, sum loss: 10367.231445, avg loss: 5.827561, ppl: 339.529510 +epoch: 0, batch: 1853, sum loss: 10270.875000, avg loss: 5.585033, ppl: 266.409058 +epoch: 0, batch: 1854, sum loss: 9594.754883, avg loss: 5.694217, ppl: 297.143951 +epoch: 0, batch: 1855, sum loss: 9669.495117, avg loss: 5.759080, ppl: 317.056488 +epoch: 0, batch: 1856, sum loss: 9090.636719, avg loss: 5.731801, ppl: 308.524445 +epoch: 0, batch: 1857, sum loss: 9525.649414, avg loss: 5.693754, ppl: 297.006409 +epoch: 0, batch: 1858, sum loss: 8575.207031, avg loss: 5.694029, ppl: 297.088135 +epoch: 0, batch: 1859, sum loss: 9193.888672, avg loss: 5.515230, ppl: 248.447159 +epoch: 0, batch: 1860, sum loss: 8303.541016, avg loss: 5.695158, ppl: 297.423920 +epoch: 0, batch: 1861, sum loss: 9063.337891, avg loss: 5.650460, ppl: 284.422333 +epoch: 0, batch: 1862, sum loss: 7792.772461, avg loss: 5.542512, ppl: 255.318665 +epoch: 0, batch: 1863, sum loss: 9639.672852, avg loss: 5.697206, ppl: 298.033539 +epoch: 0, batch: 1864, sum loss: 9950.821289, avg loss: 5.562225, ppl: 260.401672 +epoch: 0, batch: 1865, sum loss: 9864.106445, avg loss: 5.744966, ppl: 312.613007 +epoch: 0, batch: 1866, sum loss: 9509.681641, avg loss: 5.647079, ppl: 283.462402 +epoch: 0, batch: 1867, sum loss: 10198.035156, avg loss: 5.668724, ppl: 289.664703 +epoch: 0, batch: 1868, sum loss: 8734.625000, avg loss: 5.624356, ppl: 277.093842 +epoch: 0, batch: 1869, sum loss: 10247.292969, avg loss: 5.670887, ppl: 290.291901 +epoch: 0, batch: 1870, sum loss: 8641.286133, avg loss: 5.404181, ppl: 222.334167 +epoch: 0, batch: 1871, sum loss: 8105.199219, avg loss: 5.652161, ppl: 284.906525 +epoch: 0, batch: 1872, sum loss: 9789.111328, avg loss: 5.539961, ppl: 254.668152 +epoch: 0, batch: 1873, sum loss: 10486.250000, avg loss: 5.730191, ppl: 308.028168 +epoch: 0, batch: 1874, sum loss: 11090.439453, avg loss: 5.696168, ppl: 297.724457 +epoch: 0, batch: 1875, sum loss: 7834.014648, avg loss: 5.451646, ppl: 233.141571 +epoch: 0, batch: 1876, sum loss: 9476.820312, avg loss: 5.667955, ppl: 289.441986 +epoch: 0, batch: 1877, sum loss: 9334.863281, avg loss: 5.845250, ppl: 345.588806 +epoch: 0, batch: 1878, sum loss: 9963.570312, avg loss: 5.588094, ppl: 267.225739 +epoch: 0, batch: 1879, sum loss: 8987.796875, avg loss: 5.670534, ppl: 290.189484 +epoch: 0, batch: 1880, sum loss: 9347.152344, avg loss: 5.593748, ppl: 268.740997 +epoch: 0, batch: 1881, sum loss: 8467.466797, avg loss: 5.701998, ppl: 299.465057 +epoch: 0, batch: 1882, sum loss: 8116.992188, avg loss: 5.473360, ppl: 238.259308 +epoch: 0, batch: 1883, sum loss: 8973.246094, avg loss: 5.711805, ppl: 302.416534 +epoch: 0, batch: 1884, sum loss: 8092.203125, avg loss: 5.588538, ppl: 267.344513 +epoch: 0, batch: 1885, sum loss: 10264.888672, avg loss: 5.728175, ppl: 307.407654 +epoch: 0, batch: 1886, sum loss: 8664.328125, avg loss: 5.463006, ppl: 235.805313 +epoch: 0, batch: 1887, sum loss: 7861.243164, avg loss: 5.627232, ppl: 277.891876 +epoch: 0, batch: 1888, sum loss: 11435.935547, avg loss: 5.959320, ppl: 387.346680 +epoch: 0, batch: 1889, sum loss: 8643.908203, avg loss: 5.701787, ppl: 299.401825 +epoch: 0, batch: 1890, sum loss: 9980.044922, avg loss: 5.559914, ppl: 259.800385 +epoch: 0, batch: 1891, sum loss: 9398.309570, avg loss: 5.783575, ppl: 324.918732 +epoch: 0, batch: 1892, sum loss: 9618.201172, avg loss: 5.818634, ppl: 336.511902 +epoch: 0, batch: 1893, sum loss: 9643.733398, avg loss: 5.482509, ppl: 240.449265 +epoch: 0, batch: 1894, sum loss: 9817.589844, avg loss: 5.540401, ppl: 254.780014 +epoch: 0, batch: 1895, sum loss: 8259.509766, avg loss: 5.480763, ppl: 240.029770 +epoch: 0, batch: 1896, sum loss: 8923.440430, avg loss: 5.535633, ppl: 253.568268 +epoch: 0, batch: 1897, sum loss: 11663.806641, avg loss: 5.607599, ppl: 272.489288 +epoch: 0, batch: 1898, sum loss: 11463.611328, avg loss: 5.697620, ppl: 298.157074 +epoch: 0, batch: 1899, sum loss: 8605.170898, avg loss: 5.498512, ppl: 244.328049 +epoch: 0, batch: 1900, sum loss: 9608.146484, avg loss: 5.641894, ppl: 281.996399 +epoch: 0, batch: 1901, sum loss: 10326.490234, avg loss: 5.948439, ppl: 383.154816 +epoch: 0, batch: 1902, sum loss: 10231.771484, avg loss: 5.690640, ppl: 296.083038 +epoch: 0, batch: 1903, sum loss: 8763.695312, avg loss: 5.792264, ppl: 327.754211 +epoch: 0, batch: 1904, sum loss: 6979.041992, avg loss: 5.482358, ppl: 240.413040 +epoch: 0, batch: 1905, sum loss: 8595.888672, avg loss: 5.475089, ppl: 238.671616 +epoch: 0, batch: 1906, sum loss: 9641.139648, avg loss: 5.560057, ppl: 259.837677 +epoch: 0, batch: 1907, sum loss: 8234.186523, avg loss: 5.582500, ppl: 265.734985 +epoch: 0, batch: 1908, sum loss: 9744.134766, avg loss: 5.629194, ppl: 278.437561 +epoch: 0, batch: 1909, sum loss: 9972.049805, avg loss: 5.580330, ppl: 265.159180 +epoch: 0, batch: 1910, sum loss: 7576.565918, avg loss: 5.566911, ppl: 261.624634 +epoch: 0, batch: 1911, sum loss: 10052.476562, avg loss: 5.787263, ppl: 326.119354 +epoch: 0, batch: 1912, sum loss: 9073.783203, avg loss: 5.590747, ppl: 267.935791 +epoch: 0, batch: 1913, sum loss: 9904.753906, avg loss: 5.567596, ppl: 261.804077 +epoch: 0, batch: 1914, sum loss: 9910.439453, avg loss: 5.624540, ppl: 277.144867 +epoch: 0, batch: 1915, sum loss: 9792.212891, avg loss: 5.630945, ppl: 278.925507 +epoch: 0, batch: 1916, sum loss: 9505.498047, avg loss: 5.591470, ppl: 268.129425 +epoch: 0, batch: 1917, sum loss: 8965.947266, avg loss: 5.681842, ppl: 293.489624 +epoch: 0, batch: 1918, sum loss: 9950.347656, avg loss: 5.488333, ppl: 241.853760 +epoch: 0, batch: 1919, sum loss: 10299.031250, avg loss: 5.609494, ppl: 273.006134 +epoch: 0, batch: 1920, sum loss: 9692.711914, avg loss: 5.738729, ppl: 310.669159 +epoch: 0, batch: 1921, sum loss: 7352.472656, avg loss: 5.478743, ppl: 239.545319 +epoch: 0, batch: 1922, sum loss: 7538.020508, avg loss: 5.458378, ppl: 234.716476 +epoch: 0, batch: 1923, sum loss: 9380.448242, avg loss: 5.495283, ppl: 243.540466 +epoch: 0, batch: 1924, sum loss: 9877.725586, avg loss: 5.667083, ppl: 289.189667 +epoch: 0, batch: 1925, sum loss: 11582.188477, avg loss: 5.677543, ppl: 292.230591 +epoch: 0, batch: 1926, sum loss: 11976.753906, avg loss: 5.708653, ppl: 301.464722 +epoch: 0, batch: 1927, sum loss: 8326.747070, avg loss: 5.445878, ppl: 231.800720 +epoch: 0, batch: 1928, sum loss: 10013.733398, avg loss: 5.538569, ppl: 254.313934 +epoch: 0, batch: 1929, sum loss: 9374.779297, avg loss: 5.570279, ppl: 262.507233 +epoch: 0, batch: 1930, sum loss: 7861.106934, avg loss: 5.659544, ppl: 287.017731 +epoch: 0, batch: 1931, sum loss: 9914.019531, avg loss: 5.501676, ppl: 245.102280 +epoch: 0, batch: 1932, sum loss: 9382.448242, avg loss: 5.522336, ppl: 250.218872 +epoch: 0, batch: 1933, sum loss: 9729.697266, avg loss: 5.693211, ppl: 296.845276 +epoch: 0, batch: 1934, sum loss: 9617.565430, avg loss: 5.598117, ppl: 269.917755 +epoch: 0, batch: 1935, sum loss: 10077.324219, avg loss: 5.564508, ppl: 260.996887 +epoch: 0, batch: 1936, sum loss: 10105.979492, avg loss: 5.661613, ppl: 287.612335 +epoch: 0, batch: 1937, sum loss: 9589.390625, avg loss: 5.624276, ppl: 277.071655 +epoch: 0, batch: 1938, sum loss: 9336.435547, avg loss: 5.600741, ppl: 270.626831 +epoch: 0, batch: 1939, sum loss: 9845.538086, avg loss: 5.760993, ppl: 317.663757 +epoch: 0, batch: 1940, sum loss: 11099.853516, avg loss: 5.663190, ppl: 288.066223 +epoch: 0, batch: 1941, sum loss: 10811.744141, avg loss: 5.696388, ppl: 297.789917 +epoch: 0, batch: 1942, sum loss: 9322.462891, avg loss: 5.747511, ppl: 313.409576 +epoch: 0, batch: 1943, sum loss: 8619.644531, avg loss: 5.630075, ppl: 278.683014 +epoch: 0, batch: 1944, sum loss: 10088.395508, avg loss: 5.595339, ppl: 269.168823 +epoch: 0, batch: 1945, sum loss: 9620.411133, avg loss: 5.596516, ppl: 269.485931 +epoch: 0, batch: 1946, sum loss: 8845.960938, avg loss: 5.577529, ppl: 264.417419 +epoch: 0, batch: 1947, sum loss: 9719.298828, avg loss: 5.826918, ppl: 339.311340 +epoch: 0, batch: 1948, sum loss: 10215.678711, avg loss: 5.784642, ppl: 325.265656 +epoch: 0, batch: 1949, sum loss: 9564.950195, avg loss: 5.649705, ppl: 284.207581 +epoch: 0, batch: 1950, sum loss: 9205.174805, avg loss: 5.619764, ppl: 275.824371 +epoch: 0, batch: 1951, sum loss: 9577.078125, avg loss: 5.707436, ppl: 301.098083 +epoch: 0, batch: 1952, sum loss: 10564.415039, avg loss: 5.625354, ppl: 277.370392 +epoch: 0, batch: 1953, sum loss: 8719.634766, avg loss: 5.607482, ppl: 272.457458 +epoch: 0, batch: 1954, sum loss: 9670.329102, avg loss: 5.749304, ppl: 313.972015 +epoch: 0, batch: 1955, sum loss: 8686.998047, avg loss: 5.648243, ppl: 283.792389 +epoch: 0, batch: 1956, sum loss: 8220.375977, avg loss: 5.415267, ppl: 224.812668 +epoch: 0, batch: 1957, sum loss: 10303.412109, avg loss: 5.847567, ppl: 346.390442 +epoch: 0, batch: 1958, sum loss: 8949.360352, avg loss: 5.603858, ppl: 271.471588 +epoch: 0, batch: 1959, sum loss: 8858.421875, avg loss: 5.617262, ppl: 275.135010 +epoch: 0, batch: 1960, sum loss: 10222.523438, avg loss: 5.795082, ppl: 328.679016 +epoch: 0, batch: 1961, sum loss: 10122.273438, avg loss: 5.510220, ppl: 247.205521 +epoch: 0, batch: 1962, sum loss: 9314.812500, avg loss: 5.704110, ppl: 300.098328 +epoch: 0, batch: 1963, sum loss: 8901.377930, avg loss: 5.563361, ppl: 260.697601 +epoch: 0, batch: 1964, sum loss: 10127.131836, avg loss: 5.626184, ppl: 277.600891 +epoch: 0, batch: 1965, sum loss: 9018.542969, avg loss: 5.522684, ppl: 250.305862 +epoch: 0, batch: 1966, sum loss: 8324.005859, avg loss: 5.553040, ppl: 258.020630 +epoch: 0, batch: 1967, sum loss: 8176.102539, avg loss: 5.382556, ppl: 217.577682 +epoch: 0, batch: 1968, sum loss: 8696.143555, avg loss: 5.445300, ppl: 231.666687 +epoch: 0, batch: 1969, sum loss: 9897.291016, avg loss: 5.658828, ppl: 286.812378 +epoch: 0, batch: 1970, sum loss: 9362.630859, avg loss: 5.640139, ppl: 281.501862 +epoch: 0, batch: 1971, sum loss: 8744.964844, avg loss: 5.573591, ppl: 263.378265 +epoch: 0, batch: 1972, sum loss: 9862.931641, avg loss: 5.610313, ppl: 273.229858 +epoch: 0, batch: 1973, sum loss: 10518.226562, avg loss: 5.772902, ppl: 321.469147 +epoch: 0, batch: 1974, sum loss: 8357.606445, avg loss: 5.612899, ppl: 273.937195 +epoch: 0, batch: 1975, sum loss: 8958.753906, avg loss: 5.750163, ppl: 314.241913 +epoch: 0, batch: 1976, sum loss: 8593.223633, avg loss: 5.705992, ppl: 300.663513 +epoch: 0, batch: 1977, sum loss: 8742.618164, avg loss: 5.644040, ppl: 282.602173 +epoch: 0, batch: 1978, sum loss: 7784.872559, avg loss: 5.493912, ppl: 243.206711 +epoch: 0, batch: 1979, sum loss: 10497.277344, avg loss: 5.513276, ppl: 247.962143 +epoch: 0, batch: 1980, sum loss: 8980.005859, avg loss: 5.370817, ppl: 215.038528 +epoch: 0, batch: 1981, sum loss: 9004.283203, avg loss: 5.385337, ppl: 218.183701 +epoch: 0, batch: 1982, sum loss: 9026.500000, avg loss: 5.487234, ppl: 241.587967 +epoch: 0, batch: 1983, sum loss: 9226.580078, avg loss: 5.585097, ppl: 266.426208 +epoch: 0, batch: 1984, sum loss: 8310.910156, avg loss: 5.533229, ppl: 252.959366 +epoch: 0, batch: 1985, sum loss: 10251.332031, avg loss: 5.629507, ppl: 278.524780 +epoch: 0, batch: 1986, sum loss: 9401.091797, avg loss: 5.465751, ppl: 236.453400 +epoch: 0, batch: 1987, sum loss: 7236.497559, avg loss: 5.408443, ppl: 223.283768 +epoch: 0, batch: 1988, sum loss: 8058.309082, avg loss: 5.643074, ppl: 282.329132 +epoch: 0, batch: 1989, sum loss: 10395.584961, avg loss: 5.482904, ppl: 240.544220 +epoch: 0, batch: 1990, sum loss: 8564.744141, avg loss: 5.390021, ppl: 219.208054 +epoch: 0, batch: 1991, sum loss: 8196.684570, avg loss: 5.587379, ppl: 267.034668 +epoch: 0, batch: 1992, sum loss: 9182.164062, avg loss: 5.575084, ppl: 263.771759 +epoch: 0, batch: 1993, sum loss: 10098.781250, avg loss: 5.648088, ppl: 283.748413 +epoch: 0, batch: 1994, sum loss: 9532.189453, avg loss: 5.548422, ppl: 256.832031 +epoch: 0, batch: 1995, sum loss: 9915.201172, avg loss: 5.617678, ppl: 275.249420 +epoch: 0, batch: 1996, sum loss: 8598.243164, avg loss: 5.547254, ppl: 256.532166 +epoch: 0, batch: 1997, sum loss: 9350.037109, avg loss: 5.516246, ppl: 248.699631 +epoch: 0, batch: 1998, sum loss: 10351.865234, avg loss: 5.482979, ppl: 240.562347 +epoch: 0, batch: 1999, sum loss: 10296.164062, avg loss: 5.541531, ppl: 255.068115 +epoch: 0, batch: 2000, sum loss: 9779.755859, avg loss: 5.463551, ppl: 235.933640 +epoch: 0, batch: 2001, sum loss: 9166.588867, avg loss: 5.562251, ppl: 260.408264 +epoch: 0, batch: 2002, sum loss: 8573.035156, avg loss: 5.595976, ppl: 269.340363 +epoch: 0, batch: 2003, sum loss: 9835.438477, avg loss: 5.491590, ppl: 242.642700 +epoch: 0, batch: 2004, sum loss: 9036.049805, avg loss: 5.526636, ppl: 251.297043 +epoch: 0, batch: 2005, sum loss: 9159.755859, avg loss: 5.501355, ppl: 245.023758 +epoch: 0, batch: 2006, sum loss: 10182.912109, avg loss: 5.522187, ppl: 250.181519 +epoch: 0, batch: 2007, sum loss: 8465.369141, avg loss: 5.540163, ppl: 254.719528 +epoch: 0, batch: 2008, sum loss: 9037.355469, avg loss: 5.627245, ppl: 277.895447 +epoch: 0, batch: 2009, sum loss: 9313.646484, avg loss: 5.374291, ppl: 215.786819 +epoch: 0, batch: 2010, sum loss: 9359.416016, avg loss: 5.658656, ppl: 286.762878 +epoch: 0, batch: 2011, sum loss: 9465.740234, avg loss: 5.775314, ppl: 322.245697 +epoch: 0, batch: 2012, sum loss: 9316.849609, avg loss: 5.705358, ppl: 300.473053 +epoch: 0, batch: 2013, sum loss: 9931.796875, avg loss: 5.627080, ppl: 277.849731 +epoch: 0, batch: 2014, sum loss: 9318.336914, avg loss: 5.616839, ppl: 275.018799 +epoch: 0, batch: 2015, sum loss: 8542.981445, avg loss: 5.616688, ppl: 274.977081 +epoch: 0, batch: 2016, sum loss: 11047.526367, avg loss: 5.622151, ppl: 276.483429 +epoch: 0, batch: 2017, sum loss: 9669.019531, avg loss: 5.544162, ppl: 255.740250 +epoch: 0, batch: 2018, sum loss: 10843.205078, avg loss: 5.783043, ppl: 324.745850 +epoch: 0, batch: 2019, sum loss: 8839.211914, avg loss: 5.597981, ppl: 269.880951 +epoch: 0, batch: 2020, sum loss: 6862.557129, avg loss: 5.332212, ppl: 206.895218 +epoch: 0, batch: 2021, sum loss: 9754.664062, avg loss: 5.526722, ppl: 251.318726 +epoch: 0, batch: 2022, sum loss: 9491.604492, avg loss: 5.550646, ppl: 257.403748 +epoch: 0, batch: 2023, sum loss: 9369.241211, avg loss: 5.437749, ppl: 229.924026 +epoch: 0, batch: 2024, sum loss: 9920.461914, avg loss: 5.545255, ppl: 256.019897 +epoch: 0, batch: 2025, sum loss: 9175.002930, avg loss: 5.530442, ppl: 252.255325 +epoch: 0, batch: 2026, sum loss: 8787.428711, avg loss: 5.431044, ppl: 228.387466 +epoch: 0, batch: 2027, sum loss: 10136.705078, avg loss: 5.545244, ppl: 256.016968 +epoch: 0, batch: 2028, sum loss: 8728.527344, avg loss: 5.534894, ppl: 253.380920 +epoch: 0, batch: 2029, sum loss: 8560.538086, avg loss: 5.672988, ppl: 290.902435 +epoch: 0, batch: 2030, sum loss: 10401.584961, avg loss: 5.646898, ppl: 283.410889 +epoch: 0, batch: 2031, sum loss: 7779.167969, avg loss: 5.361246, ppl: 212.990189 +epoch: 0, batch: 2032, sum loss: 9177.994141, avg loss: 5.654956, ppl: 285.704010 +epoch: 0, batch: 2033, sum loss: 10246.666016, avg loss: 5.503043, ppl: 245.437714 +epoch: 0, batch: 2034, sum loss: 9769.701172, avg loss: 5.743505, ppl: 312.156464 +epoch: 0, batch: 2035, sum loss: 9841.476562, avg loss: 5.698597, ppl: 298.448364 +epoch: 0, batch: 2036, sum loss: 9192.765625, avg loss: 5.622487, ppl: 276.576263 +epoch: 0, batch: 2037, sum loss: 8038.499023, avg loss: 5.431418, ppl: 228.473083 +epoch: 0, batch: 2038, sum loss: 10525.253906, avg loss: 5.574817, ppl: 263.701202 +epoch: 0, batch: 2039, sum loss: 10151.472656, avg loss: 5.617859, ppl: 275.299438 +epoch: 0, batch: 2040, sum loss: 9733.407227, avg loss: 5.404446, ppl: 222.392899 +epoch: 0, batch: 2041, sum loss: 8518.902344, avg loss: 5.446868, ppl: 232.030411 +epoch: 0, batch: 2042, sum loss: 9441.707031, avg loss: 5.534412, ppl: 253.258804 +epoch: 0, batch: 2043, sum loss: 8613.935547, avg loss: 5.704593, ppl: 300.243317 +epoch: 0, batch: 2044, sum loss: 9632.886719, avg loss: 5.649787, ppl: 284.230896 +epoch: 0, batch: 2045, sum loss: 9384.199219, avg loss: 5.322858, ppl: 204.968918 +epoch: 0, batch: 2046, sum loss: 9231.788086, avg loss: 5.482059, ppl: 240.341171 +epoch: 0, batch: 2047, sum loss: 10021.772461, avg loss: 5.420104, ppl: 225.902618 +epoch: 0, batch: 2048, sum loss: 8268.743164, avg loss: 5.348475, ppl: 210.287460 +epoch: 0, batch: 2049, sum loss: 9433.538086, avg loss: 5.552406, ppl: 257.857300 +epoch: 0, batch: 2050, sum loss: 8409.208008, avg loss: 5.503408, ppl: 245.527374 +epoch: 0, batch: 2051, sum loss: 9953.721680, avg loss: 5.639503, ppl: 281.322723 +epoch: 0, batch: 2052, sum loss: 8654.707031, avg loss: 5.583682, ppl: 266.049408 +epoch: 0, batch: 2053, sum loss: 8692.904297, avg loss: 5.526322, ppl: 251.218201 +epoch: 0, batch: 2054, sum loss: 8490.665039, avg loss: 5.442734, ppl: 231.073135 +epoch: 0, batch: 2055, sum loss: 9188.126953, avg loss: 5.678694, ppl: 292.567017 +epoch: 0, batch: 2056, sum loss: 8175.527344, avg loss: 5.657804, ppl: 286.518890 +epoch: 0, batch: 2057, sum loss: 8883.179688, avg loss: 5.400109, ppl: 221.430511 +epoch: 0, batch: 2058, sum loss: 7759.385742, avg loss: 5.472064, ppl: 237.950714 +epoch: 0, batch: 2059, sum loss: 7763.391602, avg loss: 5.299243, ppl: 200.185303 +epoch: 0, batch: 2060, sum loss: 8329.753906, avg loss: 5.426550, ppl: 227.363464 +epoch: 0, batch: 2061, sum loss: 8161.669434, avg loss: 5.555935, ppl: 258.768890 +epoch: 0, batch: 2062, sum loss: 9318.113281, avg loss: 5.503906, ppl: 245.649628 +epoch: 0, batch: 2063, sum loss: 10849.648438, avg loss: 5.604157, ppl: 271.552887 +epoch: 0, batch: 2064, sum loss: 9326.958008, avg loss: 5.535286, ppl: 253.480377 +epoch: 0, batch: 2065, sum loss: 8749.406250, avg loss: 5.357872, ppl: 212.272858 +epoch: 0, batch: 2066, sum loss: 8531.640625, avg loss: 5.413477, ppl: 224.410599 +epoch: 0, batch: 2067, sum loss: 8976.768555, avg loss: 5.349683, ppl: 210.541611 +epoch: 0, batch: 2068, sum loss: 9179.778320, avg loss: 5.550047, ppl: 257.249756 +epoch: 0, batch: 2069, sum loss: 9800.802734, avg loss: 5.524691, ppl: 250.808853 +epoch: 0, batch: 2070, sum loss: 10819.246094, avg loss: 5.600024, ppl: 270.432831 +epoch: 0, batch: 2071, sum loss: 10644.316406, avg loss: 5.646852, ppl: 283.397919 +epoch: 0, batch: 2072, sum loss: 8524.705078, avg loss: 5.503360, ppl: 245.515549 +epoch: 0, batch: 2073, sum loss: 9124.705078, avg loss: 5.664001, ppl: 288.299835 +epoch: 0, batch: 2074, sum loss: 8889.924805, avg loss: 5.484222, ppl: 240.861588 +epoch: 0, batch: 2075, sum loss: 8239.372070, avg loss: 5.544665, ppl: 255.868805 +epoch: 0, batch: 2076, sum loss: 7864.774414, avg loss: 5.499843, ppl: 244.653427 +epoch: 0, batch: 2077, sum loss: 8351.814453, avg loss: 5.312859, ppl: 202.929581 +epoch: 0, batch: 2078, sum loss: 8327.343750, avg loss: 5.456975, ppl: 234.387329 +epoch: 0, batch: 2079, sum loss: 9175.929688, avg loss: 5.481440, ppl: 240.192352 +epoch: 0, batch: 2080, sum loss: 9247.728516, avg loss: 5.564217, ppl: 260.920837 +epoch: 0, batch: 2081, sum loss: 9175.793945, avg loss: 5.325475, ppl: 205.505997 +epoch: 0, batch: 2082, sum loss: 10271.751953, avg loss: 5.725615, ppl: 306.621643 +epoch: 0, batch: 2083, sum loss: 10045.148438, avg loss: 5.630689, ppl: 278.854095 +epoch: 0, batch: 2084, sum loss: 8918.806641, avg loss: 5.491876, ppl: 242.712143 +epoch: 0, batch: 2085, sum loss: 10487.476562, avg loss: 5.650580, ppl: 284.456390 +epoch: 0, batch: 2086, sum loss: 9314.110352, avg loss: 5.466027, ppl: 236.518692 +epoch: 0, batch: 2087, sum loss: 7784.930176, avg loss: 5.357832, ppl: 212.264343 +epoch: 0, batch: 2088, sum loss: 8799.659180, avg loss: 5.479239, ppl: 239.664368 +epoch: 0, batch: 2089, sum loss: 8632.123047, avg loss: 5.480713, ppl: 240.017746 +epoch: 0, batch: 2090, sum loss: 8067.375977, avg loss: 5.458306, ppl: 234.699463 +epoch: 0, batch: 2091, sum loss: 10530.561523, avg loss: 5.428125, ppl: 227.721848 +epoch: 0, batch: 2092, sum loss: 9475.992188, avg loss: 5.732603, ppl: 308.771820 +epoch: 0, batch: 2093, sum loss: 8885.586914, avg loss: 5.539643, ppl: 254.587051 +epoch: 0, batch: 2094, sum loss: 8589.853516, avg loss: 5.332001, ppl: 206.851517 +epoch: 0, batch: 2095, sum loss: 9626.176758, avg loss: 5.538652, ppl: 254.334915 +epoch: 0, batch: 2096, sum loss: 10650.091797, avg loss: 5.506769, ppl: 246.353912 +epoch: 0, batch: 2097, sum loss: 11179.430664, avg loss: 5.634793, ppl: 280.001038 +epoch: 0, batch: 2098, sum loss: 7509.023926, avg loss: 5.501117, ppl: 244.965347 +epoch: 0, batch: 2099, sum loss: 9251.461914, avg loss: 5.539798, ppl: 254.626511 +epoch: 0, batch: 2100, sum loss: 11036.156250, avg loss: 5.766016, ppl: 319.263397 +epoch: 0, batch: 2101, sum loss: 7913.951172, avg loss: 5.472995, ppl: 238.172516 +epoch: 0, batch: 2102, sum loss: 8763.373047, avg loss: 5.460045, ppl: 235.108078 +epoch: 0, batch: 2103, sum loss: 9235.275391, avg loss: 5.624407, ppl: 277.107849 +epoch: 0, batch: 2104, sum loss: 9695.885742, avg loss: 5.598086, ppl: 269.909393 +epoch: 0, batch: 2105, sum loss: 8662.250977, avg loss: 5.472047, ppl: 237.946854 +epoch: 0, batch: 2106, sum loss: 8853.178711, avg loss: 5.431398, ppl: 228.468399 +epoch: 0, batch: 2107, sum loss: 9199.211914, avg loss: 5.565161, ppl: 261.167297 +epoch: 0, batch: 2108, sum loss: 8636.599609, avg loss: 5.455843, ppl: 234.122147 +epoch: 0, batch: 2109, sum loss: 8882.929688, avg loss: 5.503674, ppl: 245.592590 +epoch: 0, batch: 2110, sum loss: 9408.348633, avg loss: 5.492322, ppl: 242.820496 +epoch: 0, batch: 2111, sum loss: 10162.995117, avg loss: 5.463976, ppl: 236.034012 +epoch: 0, batch: 2112, sum loss: 8370.816406, avg loss: 5.671285, ppl: 290.407501 +epoch: 0, batch: 2113, sum loss: 9238.608398, avg loss: 5.575503, ppl: 263.882233 +epoch: 0, batch: 2114, sum loss: 10513.619141, avg loss: 5.475843, ppl: 238.851837 +epoch: 0, batch: 2115, sum loss: 9415.219727, avg loss: 5.401732, ppl: 221.790329 +epoch: 0, batch: 2116, sum loss: 10041.880859, avg loss: 5.502400, ppl: 245.279999 +epoch: 0, batch: 2117, sum loss: 9045.750977, avg loss: 5.667764, ppl: 289.386658 +epoch: 0, batch: 2118, sum loss: 8504.677734, avg loss: 5.427363, ppl: 227.548492 +epoch: 0, batch: 2119, sum loss: 9277.547852, avg loss: 5.422296, ppl: 226.398346 +epoch: 0, batch: 2120, sum loss: 8899.382812, avg loss: 5.496840, ppl: 243.919815 +epoch: 0, batch: 2121, sum loss: 10170.286133, avg loss: 5.453237, ppl: 233.512726 +epoch: 0, batch: 2122, sum loss: 9376.728516, avg loss: 5.658858, ppl: 286.820984 +epoch: 0, batch: 2123, sum loss: 9764.593750, avg loss: 5.436856, ppl: 229.718872 +epoch: 0, batch: 2124, sum loss: 9649.791016, avg loss: 5.549046, ppl: 256.992279 +epoch: 0, batch: 2125, sum loss: 7636.806152, avg loss: 5.263133, ppl: 193.085495 +epoch: 0, batch: 2126, sum loss: 10468.238281, avg loss: 5.466443, ppl: 236.617065 +epoch: 0, batch: 2127, sum loss: 10630.973633, avg loss: 5.633796, ppl: 279.721985 +epoch: 0, batch: 2128, sum loss: 11158.441406, avg loss: 5.649844, ppl: 284.247040 +epoch: 0, batch: 2129, sum loss: 9357.396484, avg loss: 5.459391, ppl: 234.954315 +epoch: 0, batch: 2130, sum loss: 10597.389648, avg loss: 5.442933, ppl: 231.119080 +epoch: 0, batch: 2131, sum loss: 10012.438477, avg loss: 5.441543, ppl: 230.797943 +epoch: 0, batch: 2132, sum loss: 10631.875977, avg loss: 5.688537, ppl: 295.461090 +epoch: 0, batch: 2133, sum loss: 10264.356445, avg loss: 5.439510, ppl: 230.329376 +epoch: 0, batch: 2134, sum loss: 7900.798828, avg loss: 5.445072, ppl: 231.613892 +epoch: 0, batch: 2135, sum loss: 9898.227539, avg loss: 5.480746, ppl: 240.025757 +epoch: 0, batch: 2136, sum loss: 10726.160156, avg loss: 5.455829, ppl: 234.118912 +epoch: 0, batch: 2137, sum loss: 8060.792480, avg loss: 5.487265, ppl: 241.595444 +epoch: 0, batch: 2138, sum loss: 10323.729492, avg loss: 5.433542, ppl: 228.958847 +epoch: 0, batch: 2139, sum loss: 9279.019531, avg loss: 5.366697, ppl: 214.154419 +epoch: 0, batch: 2140, sum loss: 11192.261719, avg loss: 5.658372, ppl: 286.681671 +epoch: 0, batch: 2141, sum loss: 9434.278320, avg loss: 5.572521, ppl: 263.096588 +epoch: 0, batch: 2142, sum loss: 9413.399414, avg loss: 5.560189, ppl: 259.871887 +epoch: 0, batch: 2143, sum loss: 9643.557617, avg loss: 5.577535, ppl: 264.418915 +epoch: 0, batch: 2144, sum loss: 8739.340820, avg loss: 5.434913, ppl: 229.272934 +epoch: 0, batch: 2145, sum loss: 8358.001953, avg loss: 5.326961, ppl: 205.811569 +epoch: 0, batch: 2146, sum loss: 8904.019531, avg loss: 5.631891, ppl: 279.189514 +epoch: 0, batch: 2147, sum loss: 6902.215820, avg loss: 5.313484, ppl: 203.056381 +epoch: 0, batch: 2148, sum loss: 8265.233398, avg loss: 5.391542, ppl: 219.541641 +epoch: 0, batch: 2149, sum loss: 9059.882812, avg loss: 5.418591, ppl: 225.560974 +epoch: 0, batch: 2150, sum loss: 9850.586914, avg loss: 5.600106, ppl: 270.455139 +epoch: 0, batch: 2151, sum loss: 7978.415039, avg loss: 5.358237, ppl: 212.350296 +epoch: 0, batch: 2152, sum loss: 8337.114258, avg loss: 5.492170, ppl: 242.783554 +epoch: 0, batch: 2153, sum loss: 9321.240234, avg loss: 5.638984, ppl: 281.176819 +epoch: 0, batch: 2154, sum loss: 6843.452637, avg loss: 5.439947, ppl: 230.429886 +epoch: 0, batch: 2155, sum loss: 7508.253906, avg loss: 5.366872, ppl: 214.191895 +epoch: 0, batch: 2156, sum loss: 8018.830566, avg loss: 5.436495, ppl: 229.635971 +epoch: 0, batch: 2157, sum loss: 8170.342285, avg loss: 5.542973, ppl: 255.436295 +epoch: 0, batch: 2158, sum loss: 10092.087891, avg loss: 5.428772, ppl: 227.869247 +epoch: 0, batch: 2159, sum loss: 10107.754883, avg loss: 5.553712, ppl: 258.194153 +epoch: 0, batch: 2160, sum loss: 10203.453125, avg loss: 5.631045, ppl: 278.953430 +epoch: 0, batch: 2161, sum loss: 8859.178711, avg loss: 5.506015, ppl: 246.168259 +epoch: 0, batch: 2162, sum loss: 9848.006836, avg loss: 5.563846, ppl: 260.823944 +epoch: 0, batch: 2163, sum loss: 8904.921875, avg loss: 5.520720, ppl: 249.814957 +epoch: 0, batch: 2164, sum loss: 9203.916992, avg loss: 5.561279, ppl: 260.155426 +epoch: 0, batch: 2165, sum loss: 8281.449219, avg loss: 5.374075, ppl: 215.740204 +epoch: 0, batch: 2166, sum loss: 10664.903320, avg loss: 5.583719, ppl: 266.059296 +epoch: 0, batch: 2167, sum loss: 8616.785156, avg loss: 5.402373, ppl: 221.932510 +epoch: 0, batch: 2168, sum loss: 8907.183594, avg loss: 5.444489, ppl: 231.478958 +epoch: 0, batch: 2169, sum loss: 9768.511719, avg loss: 5.692606, ppl: 296.665710 +epoch: 0, batch: 2170, sum loss: 7085.488770, avg loss: 5.359674, ppl: 212.655609 +epoch: 0, batch: 2171, sum loss: 10255.799805, avg loss: 5.493197, ppl: 243.033051 +epoch: 0, batch: 2172, sum loss: 9127.037109, avg loss: 5.504848, ppl: 245.880966 +epoch: 0, batch: 2173, sum loss: 8516.983398, avg loss: 5.438686, ppl: 230.139664 +epoch: 0, batch: 2174, sum loss: 8200.462891, avg loss: 5.373829, ppl: 215.687134 +epoch: 0, batch: 2175, sum loss: 8995.912109, avg loss: 5.354710, ppl: 211.602524 +epoch: 0, batch: 2176, sum loss: 8583.832031, avg loss: 5.354855, ppl: 211.633194 +epoch: 0, batch: 2177, sum loss: 8946.720703, avg loss: 5.376635, ppl: 216.293243 +epoch: 0, batch: 2178, sum loss: 9244.046875, avg loss: 5.460158, ppl: 235.134537 +epoch: 0, batch: 2179, sum loss: 8405.153320, avg loss: 5.391375, ppl: 219.505005 +epoch: 0, batch: 2180, sum loss: 10555.783203, avg loss: 5.503537, ppl: 245.558868 +epoch: 0, batch: 2181, sum loss: 9602.718750, avg loss: 5.632093, ppl: 279.246094 +epoch: 0, batch: 2182, sum loss: 8992.482422, avg loss: 5.476542, ppl: 239.018860 +epoch: 0, batch: 2183, sum loss: 8922.188477, avg loss: 5.413949, ppl: 224.516571 +epoch: 0, batch: 2184, sum loss: 9534.900391, avg loss: 5.737004, ppl: 310.133789 +epoch: 0, batch: 2185, sum loss: 8092.669922, avg loss: 5.227823, ppl: 186.386642 +epoch: 0, batch: 2186, sum loss: 9711.866211, avg loss: 5.518106, ppl: 249.162552 +epoch: 0, batch: 2187, sum loss: 8696.624023, avg loss: 5.312538, ppl: 202.864380 +epoch: 0, batch: 2188, sum loss: 8404.057617, avg loss: 5.467832, ppl: 236.945953 +epoch: 0, batch: 2189, sum loss: 8881.660156, avg loss: 5.389357, ppl: 219.062500 +epoch: 0, batch: 2190, sum loss: 10010.210938, avg loss: 5.494079, ppl: 243.247299 +epoch: 0, batch: 2191, sum loss: 9958.205078, avg loss: 5.588219, ppl: 267.259125 +epoch: 0, batch: 2192, sum loss: 8234.364258, avg loss: 5.500577, ppl: 244.833160 +epoch: 0, batch: 2193, sum loss: 9015.766602, avg loss: 5.500772, ppl: 244.881027 +epoch: 0, batch: 2194, sum loss: 7438.817383, avg loss: 5.465700, ppl: 236.441223 +epoch: 0, batch: 2195, sum loss: 8378.749023, avg loss: 5.272970, ppl: 194.994186 +epoch: 0, batch: 2196, sum loss: 9466.769531, avg loss: 5.384965, ppl: 218.102554 +epoch: 0, batch: 2197, sum loss: 9011.112305, avg loss: 5.596964, ppl: 269.606598 +epoch: 0, batch: 2198, sum loss: 10534.391602, avg loss: 5.550260, ppl: 257.304352 +epoch: 0, batch: 2199, sum loss: 8804.453125, avg loss: 5.365297, ppl: 213.854813 +epoch: 0, batch: 2200, sum loss: 10555.344727, avg loss: 5.617533, ppl: 275.209656 +epoch: 0, batch: 2201, sum loss: 9139.539062, avg loss: 5.539114, ppl: 254.452576 +epoch: 0, batch: 2202, sum loss: 10397.168945, avg loss: 5.443544, ppl: 231.260300 +epoch: 0, batch: 2203, sum loss: 10406.419922, avg loss: 5.462687, ppl: 235.730103 +epoch: 0, batch: 2204, sum loss: 8950.220703, avg loss: 5.375508, ppl: 216.049561 +epoch: 0, batch: 2205, sum loss: 9185.511719, avg loss: 5.393724, ppl: 220.021210 +epoch: 0, batch: 2206, sum loss: 11403.761719, avg loss: 5.710447, ppl: 302.005981 +epoch: 0, batch: 2207, sum loss: 9000.048828, avg loss: 5.405435, ppl: 222.613052 +epoch: 0, batch: 2208, sum loss: 9581.022461, avg loss: 5.456163, ppl: 234.197174 +epoch: 0, batch: 2209, sum loss: 9683.794922, avg loss: 5.458734, ppl: 234.799988 +epoch: 0, batch: 2210, sum loss: 10172.919922, avg loss: 5.507808, ppl: 246.609894 +epoch: 0, batch: 2211, sum loss: 7319.707031, avg loss: 5.232100, ppl: 187.185394 +epoch: 0, batch: 2212, sum loss: 9167.012695, avg loss: 5.280537, ppl: 196.475372 +epoch: 0, batch: 2213, sum loss: 9708.366211, avg loss: 5.602057, ppl: 270.983368 +epoch: 0, batch: 2214, sum loss: 9461.297852, avg loss: 5.487992, ppl: 241.771317 +epoch: 0, batch: 2215, sum loss: 8315.112305, avg loss: 5.427619, ppl: 227.606659 +epoch: 0, batch: 2216, sum loss: 9734.166016, avg loss: 5.565561, ppl: 261.271698 +epoch: 0, batch: 2217, sum loss: 10095.763672, avg loss: 5.752572, ppl: 314.999664 +epoch: 0, batch: 2218, sum loss: 8701.073242, avg loss: 5.427993, ppl: 227.691879 +epoch: 0, batch: 2219, sum loss: 8272.294922, avg loss: 5.589388, ppl: 267.571930 +epoch: 0, batch: 2220, sum loss: 9019.389648, avg loss: 5.423566, ppl: 226.686020 +epoch: 0, batch: 2221, sum loss: 8891.781250, avg loss: 5.346832, ppl: 209.942108 +epoch: 0, batch: 2222, sum loss: 9575.751953, avg loss: 5.255627, ppl: 191.641541 +epoch: 0, batch: 2223, sum loss: 8711.620117, avg loss: 5.527678, ppl: 251.558990 +epoch: 0, batch: 2224, sum loss: 9958.558594, avg loss: 5.575901, ppl: 263.987305 +epoch: 0, batch: 2225, sum loss: 10870.989258, avg loss: 5.440936, ppl: 230.657883 +epoch: 0, batch: 2226, sum loss: 9683.519531, avg loss: 5.306038, ppl: 201.550079 +epoch: 0, batch: 2227, sum loss: 9585.288086, avg loss: 5.375933, ppl: 216.141479 +epoch: 0, batch: 2228, sum loss: 9207.250977, avg loss: 5.526561, ppl: 251.278336 +epoch: 0, batch: 2229, sum loss: 9790.821289, avg loss: 5.591560, ppl: 268.153595 +epoch: 0, batch: 2230, sum loss: 9445.911133, avg loss: 5.507820, ppl: 246.612839 +epoch: 0, batch: 2231, sum loss: 9125.740234, avg loss: 5.451458, ppl: 233.097763 +epoch: 0, batch: 2232, sum loss: 9325.958984, avg loss: 5.422069, ppl: 226.346970 +epoch: 0, batch: 2233, sum loss: 8007.318848, avg loss: 5.226709, ppl: 186.179153 +epoch: 0, batch: 2234, sum loss: 8726.506836, avg loss: 5.526603, ppl: 251.288895 +epoch: 0, batch: 2235, sum loss: 9010.238281, avg loss: 5.541352, ppl: 255.022507 +epoch: 0, batch: 2236, sum loss: 9589.272461, avg loss: 5.405452, ppl: 222.616882 +epoch: 0, batch: 2237, sum loss: 9402.064453, avg loss: 5.469497, ppl: 237.340714 +epoch: 0, batch: 2238, sum loss: 8053.536133, avg loss: 5.561834, ppl: 260.299866 +epoch: 0, batch: 2239, sum loss: 7652.487793, avg loss: 5.167109, ppl: 175.406921 +epoch: 0, batch: 2240, sum loss: 10241.537109, avg loss: 5.244003, ppl: 189.426834 +epoch: 0, batch: 2241, sum loss: 8385.303711, avg loss: 5.553181, ppl: 258.057190 +epoch: 0, batch: 2242, sum loss: 8839.309570, avg loss: 5.416244, ppl: 225.032318 +epoch: 0, batch: 2243, sum loss: 9126.993164, avg loss: 5.544953, ppl: 255.942642 +epoch: 0, batch: 2244, sum loss: 9968.318359, avg loss: 5.612792, ppl: 273.907928 +epoch: 0, batch: 2245, sum loss: 9909.933594, avg loss: 5.583061, ppl: 265.884155 +epoch: 0, batch: 2246, sum loss: 8873.579102, avg loss: 5.423948, ppl: 226.772720 +epoch: 0, batch: 2247, sum loss: 10702.966797, avg loss: 5.648004, ppl: 283.724457 +epoch: 0, batch: 2248, sum loss: 8920.681641, avg loss: 5.503196, ppl: 245.475281 +epoch: 0, batch: 2249, sum loss: 10273.361328, avg loss: 5.526283, ppl: 251.208496 +epoch: 0, batch: 2250, sum loss: 8753.555664, avg loss: 5.433617, ppl: 228.975876 +epoch: 0, batch: 2251, sum loss: 9033.816406, avg loss: 5.478361, ppl: 239.453842 +epoch: 0, batch: 2252, sum loss: 9803.664062, avg loss: 5.476907, ppl: 239.106064 +epoch: 0, batch: 2253, sum loss: 9852.720703, avg loss: 5.464626, ppl: 236.187469 +epoch: 0, batch: 2254, sum loss: 10483.232422, avg loss: 5.523305, ppl: 250.461426 +epoch: 0, batch: 2255, sum loss: 8302.133789, avg loss: 5.429780, ppl: 228.099167 +epoch: 0, batch: 2256, sum loss: 8765.667969, avg loss: 5.461475, ppl: 235.444534 +epoch: 0, batch: 2257, sum loss: 9649.618164, avg loss: 5.488975, ppl: 242.009033 +epoch: 0, batch: 2258, sum loss: 9036.793945, avg loss: 5.379044, ppl: 216.814911 +epoch: 0, batch: 2259, sum loss: 8118.913086, avg loss: 5.234631, ppl: 187.659851 +epoch: 0, batch: 2260, sum loss: 9502.117188, avg loss: 5.534139, ppl: 253.189743 +epoch: 0, batch: 2261, sum loss: 8644.041016, avg loss: 5.283644, ppl: 197.086700 +epoch: 0, batch: 2262, sum loss: 10285.250977, avg loss: 5.506023, ppl: 246.170135 +epoch: 0, batch: 2263, sum loss: 9175.825195, avg loss: 5.578009, ppl: 264.544403 +epoch: 0, batch: 2264, sum loss: 8817.531250, avg loss: 5.510957, ppl: 247.387711 +epoch: 0, batch: 2265, sum loss: 9487.763672, avg loss: 5.516141, ppl: 248.673660 +epoch: 0, batch: 2266, sum loss: 8930.195312, avg loss: 5.259243, ppl: 192.335938 +epoch: 0, batch: 2267, sum loss: 9166.122070, avg loss: 5.508487, ppl: 246.777527 +epoch: 0, batch: 2268, sum loss: 9437.077148, avg loss: 5.518758, ppl: 249.325256 +epoch: 0, batch: 2269, sum loss: 9913.533203, avg loss: 5.455990, ppl: 234.156540 +epoch: 0, batch: 2270, sum loss: 8278.750977, avg loss: 5.054182, ppl: 156.676331 +epoch: 0, batch: 2271, sum loss: 9197.665039, avg loss: 5.359945, ppl: 212.713211 +epoch: 0, batch: 2272, sum loss: 8509.396484, avg loss: 5.282059, ppl: 196.774567 +epoch: 0, batch: 2273, sum loss: 9636.720703, avg loss: 5.315345, ppl: 203.434738 +epoch: 0, batch: 2274, sum loss: 9305.625000, avg loss: 5.388318, ppl: 218.835007 +epoch: 0, batch: 2275, sum loss: 7964.405273, avg loss: 5.281436, ppl: 196.652054 +epoch: 0, batch: 2276, sum loss: 8092.831543, avg loss: 5.442388, ppl: 230.993149 +epoch: 0, batch: 2277, sum loss: 8383.400391, avg loss: 5.339746, ppl: 208.459656 +epoch: 0, batch: 2278, sum loss: 9660.360352, avg loss: 5.366867, ppl: 214.190765 +epoch: 0, batch: 2279, sum loss: 9092.424805, avg loss: 5.329674, ppl: 206.370743 +epoch: 0, batch: 2280, sum loss: 9196.572266, avg loss: 5.359308, ppl: 212.577744 +epoch: 0, batch: 2281, sum loss: 8133.635742, avg loss: 5.254287, ppl: 191.384933 +epoch: 0, batch: 2282, sum loss: 8830.481445, avg loss: 5.410834, ppl: 223.818237 +epoch: 0, batch: 2283, sum loss: 9618.303711, avg loss: 5.331654, ppl: 206.779724 +epoch: 0, batch: 2284, sum loss: 9313.438477, avg loss: 5.481718, ppl: 240.259140 +epoch: 0, batch: 2285, sum loss: 8459.120117, avg loss: 5.401737, ppl: 221.791275 +epoch: 0, batch: 2286, sum loss: 9008.775391, avg loss: 5.459864, ppl: 235.065491 +epoch: 0, batch: 2287, sum loss: 8788.287109, avg loss: 5.428219, ppl: 227.743347 +epoch: 0, batch: 2288, sum loss: 7605.534180, avg loss: 5.337217, ppl: 207.933197 +epoch: 0, batch: 2289, sum loss: 7902.733887, avg loss: 5.268489, ppl: 194.122498 +epoch: 0, batch: 2290, sum loss: 8545.716797, avg loss: 5.239557, ppl: 188.586502 +epoch: 0, batch: 2291, sum loss: 7511.733398, avg loss: 5.560128, ppl: 259.856140 +epoch: 0, batch: 2292, sum loss: 8641.611328, avg loss: 5.380829, ppl: 217.202225 +epoch: 0, batch: 2293, sum loss: 11553.798828, avg loss: 5.605919, ppl: 272.031769 +epoch: 0, batch: 2294, sum loss: 9331.448242, avg loss: 5.295941, ppl: 199.525360 +epoch: 0, batch: 2295, sum loss: 10261.480469, avg loss: 5.235449, ppl: 187.813385 +epoch: 0, batch: 2296, sum loss: 7752.013672, avg loss: 5.202694, ppl: 181.761230 +epoch: 0, batch: 2297, sum loss: 9374.285156, avg loss: 5.517531, ppl: 249.019547 +epoch: 0, batch: 2298, sum loss: 10064.937500, avg loss: 5.493962, ppl: 243.218887 +epoch: 0, batch: 2299, sum loss: 8263.933594, avg loss: 5.394213, ppl: 220.128769 +epoch: 0, batch: 2300, sum loss: 8020.004883, avg loss: 5.581075, ppl: 265.356750 +epoch: 0, batch: 2301, sum loss: 7898.460449, avg loss: 5.432229, ppl: 228.658264 +epoch: 0, batch: 2302, sum loss: 7675.055664, avg loss: 5.238946, ppl: 188.471344 +epoch: 0, batch: 2303, sum loss: 9848.400391, avg loss: 5.456177, ppl: 234.200424 +epoch: 0, batch: 2304, sum loss: 9329.753906, avg loss: 5.462385, ppl: 235.658844 +epoch: 0, batch: 2305, sum loss: 9809.131836, avg loss: 5.265234, ppl: 193.491669 +epoch: 0, batch: 2306, sum loss: 9969.572266, avg loss: 5.138955, ppl: 170.537491 +epoch: 0, batch: 2307, sum loss: 9128.875977, avg loss: 5.255542, ppl: 191.625275 +epoch: 0, batch: 2308, sum loss: 10483.468750, avg loss: 5.426226, ppl: 227.289871 +epoch: 0, batch: 2309, sum loss: 9219.094727, avg loss: 5.451860, ppl: 233.191483 +epoch: 0, batch: 2310, sum loss: 8941.362305, avg loss: 5.338127, ppl: 208.122559 +epoch: 0, batch: 2311, sum loss: 8429.159180, avg loss: 5.274818, ppl: 195.354904 +epoch: 0, batch: 2312, sum loss: 8216.653320, avg loss: 5.398590, ppl: 221.094467 +epoch: 0, batch: 2313, sum loss: 8024.920898, avg loss: 5.342824, ppl: 209.102280 +epoch: 0, batch: 2314, sum loss: 9361.693359, avg loss: 5.510120, ppl: 247.180771 +epoch: 0, batch: 2315, sum loss: 8932.146484, avg loss: 5.319921, ppl: 204.367645 +epoch: 0, batch: 2316, sum loss: 9946.417969, avg loss: 5.441148, ppl: 230.706833 +epoch: 0, batch: 2317, sum loss: 7247.065918, avg loss: 5.444828, ppl: 231.557449 +epoch: 0, batch: 2318, sum loss: 9330.813477, avg loss: 5.472618, ppl: 238.082703 +epoch: 0, batch: 2319, sum loss: 10682.581055, avg loss: 5.555164, ppl: 258.569458 +epoch: 0, batch: 2320, sum loss: 9003.301758, avg loss: 5.687493, ppl: 295.152710 +epoch: 0, batch: 2321, sum loss: 9237.914062, avg loss: 5.437266, ppl: 229.812988 +epoch: 0, batch: 2322, sum loss: 9558.611328, avg loss: 5.534807, ppl: 253.358810 +epoch: 0, batch: 2323, sum loss: 9974.335938, avg loss: 5.435606, ppl: 229.431732 +epoch: 0, batch: 2324, sum loss: 6973.484375, avg loss: 5.082715, ppl: 161.211075 +epoch: 0, batch: 2325, sum loss: 8562.987305, avg loss: 5.416184, ppl: 225.018799 +epoch: 0, batch: 2326, sum loss: 10027.006836, avg loss: 5.344886, ppl: 209.534058 +epoch: 0, batch: 2327, sum loss: 8156.631348, avg loss: 5.345106, ppl: 209.580124 +epoch: 0, batch: 2328, sum loss: 9185.935547, avg loss: 5.381333, ppl: 217.311829 +epoch: 0, batch: 2329, sum loss: 9709.564453, avg loss: 5.427370, ppl: 227.549911 +epoch: 0, batch: 2330, sum loss: 8233.918945, avg loss: 5.445714, ppl: 231.762695 +epoch: 0, batch: 2331, sum loss: 10321.554688, avg loss: 5.624826, ppl: 277.224030 +epoch: 0, batch: 2332, sum loss: 9755.301758, avg loss: 5.465155, ppl: 236.312515 +epoch: 0, batch: 2333, sum loss: 8532.339844, avg loss: 5.283183, ppl: 196.995834 +epoch: 0, batch: 2334, sum loss: 8744.334961, avg loss: 5.465209, ppl: 236.325241 +epoch: 0, batch: 2335, sum loss: 8421.543945, avg loss: 5.475646, ppl: 238.804688 +epoch: 0, batch: 2336, sum loss: 8618.720703, avg loss: 5.400201, ppl: 221.450882 +epoch: 0, batch: 2337, sum loss: 8229.242188, avg loss: 5.288716, ppl: 198.088882 +epoch: 0, batch: 2338, sum loss: 8769.375000, avg loss: 5.270057, ppl: 194.427078 +epoch: 0, batch: 2339, sum loss: 8307.470703, avg loss: 5.443952, ppl: 231.354706 +epoch: 0, batch: 2340, sum loss: 8555.212891, avg loss: 5.387414, ppl: 218.637253 +epoch: 0, batch: 2341, sum loss: 8474.150391, avg loss: 5.279844, ppl: 196.339294 +epoch: 0, batch: 2342, sum loss: 10001.580078, avg loss: 5.571911, ppl: 262.936066 +epoch: 0, batch: 2343, sum loss: 7832.526855, avg loss: 5.361073, ppl: 212.953232 +epoch: 0, batch: 2344, sum loss: 8176.700195, avg loss: 5.271889, ppl: 194.783600 +epoch: 0, batch: 2345, sum loss: 9860.253906, avg loss: 5.414747, ppl: 224.695633 +epoch: 0, batch: 2346, sum loss: 9067.958008, avg loss: 5.459337, ppl: 234.941544 +epoch: 0, batch: 2347, sum loss: 8849.647461, avg loss: 5.184328, ppl: 178.453415 +epoch: 0, batch: 2348, sum loss: 8904.618164, avg loss: 5.297215, ppl: 199.779556 +epoch: 0, batch: 2349, sum loss: 8336.089844, avg loss: 5.282693, ppl: 196.899399 +epoch: 0, batch: 2350, sum loss: 8033.657227, avg loss: 5.233653, ppl: 187.476334 +epoch: 0, batch: 2351, sum loss: 9694.443359, avg loss: 5.428020, ppl: 227.697952 +epoch: 0, batch: 2352, sum loss: 9976.199219, avg loss: 5.378005, ppl: 216.589752 +epoch: 0, batch: 2353, sum loss: 8401.718750, avg loss: 5.167109, ppl: 175.406921 +epoch: 0, batch: 2354, sum loss: 8583.249023, avg loss: 5.463558, ppl: 235.935318 +epoch: 0, batch: 2355, sum loss: 9469.311523, avg loss: 5.380291, ppl: 217.085327 +epoch: 0, batch: 2356, sum loss: 9514.826172, avg loss: 5.506265, ppl: 246.229782 +epoch: 0, batch: 2357, sum loss: 8343.349609, avg loss: 5.507162, ppl: 246.450607 +epoch: 0, batch: 2358, sum loss: 7933.553223, avg loss: 5.281992, ppl: 196.761520 +epoch: 0, batch: 2359, sum loss: 10000.490234, avg loss: 5.402750, ppl: 222.016129 +epoch: 0, batch: 2360, sum loss: 9047.194336, avg loss: 5.347042, ppl: 209.986160 +epoch: 0, batch: 2361, sum loss: 8960.919922, avg loss: 5.517808, ppl: 249.088425 +epoch: 0, batch: 2362, sum loss: 11307.199219, avg loss: 5.451880, ppl: 233.196152 +epoch: 0, batch: 2363, sum loss: 10189.432617, avg loss: 5.374174, ppl: 215.761505 +epoch: 0, batch: 2364, sum loss: 7112.704102, avg loss: 5.206958, ppl: 182.537903 +epoch: 0, batch: 2365, sum loss: 7997.400391, avg loss: 5.363783, ppl: 213.531281 +epoch: 0, batch: 2366, sum loss: 8124.347656, avg loss: 5.282411, ppl: 196.843826 +epoch: 0, batch: 2367, sum loss: 7564.418945, avg loss: 5.149366, ppl: 172.322189 +epoch: 0, batch: 2368, sum loss: 8842.740234, avg loss: 5.295054, ppl: 199.348389 +epoch: 0, batch: 2369, sum loss: 8821.662109, avg loss: 5.408744, ppl: 223.350952 +epoch: 0, batch: 2370, sum loss: 9439.599609, avg loss: 5.484950, ppl: 241.036911 +epoch: 0, batch: 2371, sum loss: 9568.105469, avg loss: 5.260091, ppl: 192.498978 +epoch: 0, batch: 2372, sum loss: 8908.375977, avg loss: 5.412136, ppl: 224.109787 +epoch: 0, batch: 2373, sum loss: 7908.445801, avg loss: 5.409334, ppl: 223.482635 +epoch: 0, batch: 2374, sum loss: 9958.576172, avg loss: 5.441845, ppl: 230.867722 +epoch: 0, batch: 2375, sum loss: 9110.625000, avg loss: 5.248056, ppl: 190.196152 +epoch: 0, batch: 2376, sum loss: 8587.861328, avg loss: 5.265396, ppl: 193.522949 +epoch: 0, batch: 2377, sum loss: 8707.565430, avg loss: 5.507631, ppl: 246.566391 +epoch: 0, batch: 2378, sum loss: 9222.136719, avg loss: 5.374206, ppl: 215.768402 +epoch: 0, batch: 2379, sum loss: 9505.857422, avg loss: 5.266403, ppl: 193.717941 +epoch: 0, batch: 2380, sum loss: 8799.261719, avg loss: 5.458599, ppl: 234.768311 +epoch: 0, batch: 2381, sum loss: 8410.957031, avg loss: 5.326762, ppl: 205.770554 +epoch: 0, batch: 2382, sum loss: 8951.810547, avg loss: 5.303205, ppl: 200.980011 +epoch: 0, batch: 2383, sum loss: 11113.034180, avg loss: 5.684417, ppl: 294.246185 +epoch: 0, batch: 2384, sum loss: 7884.477051, avg loss: 5.259825, ppl: 192.447769 +epoch: 0, batch: 2385, sum loss: 8457.160156, avg loss: 5.352633, ppl: 211.163559 +epoch: 0, batch: 2386, sum loss: 8435.537109, avg loss: 5.288738, ppl: 198.093323 +epoch: 0, batch: 2387, sum loss: 9000.541016, avg loss: 5.392775, ppl: 219.812531 +epoch: 0, batch: 2388, sum loss: 7305.748535, avg loss: 5.010801, ppl: 150.024902 +epoch: 0, batch: 2389, sum loss: 9697.360352, avg loss: 5.378458, ppl: 216.687988 +epoch: 0, batch: 2390, sum loss: 8520.807617, avg loss: 5.338852, ppl: 208.273453 +epoch: 0, batch: 2391, sum loss: 9069.335938, avg loss: 5.366471, ppl: 214.105911 +epoch: 0, batch: 2392, sum loss: 8754.230469, avg loss: 5.176955, ppl: 177.142624 +epoch: 0, batch: 2393, sum loss: 9219.371094, avg loss: 5.347663, ppl: 210.116760 +epoch: 0, batch: 2394, sum loss: 8996.572266, avg loss: 5.435996, ppl: 229.521240 +epoch: 0, batch: 2395, sum loss: 8167.856445, avg loss: 5.162993, ppl: 174.686584 +epoch: 0, batch: 2396, sum loss: 9194.992188, avg loss: 5.522518, ppl: 250.264450 +epoch: 0, batch: 2397, sum loss: 8595.225586, avg loss: 5.409204, ppl: 223.453751 +epoch: 0, batch: 2398, sum loss: 8603.965820, avg loss: 5.340761, ppl: 208.671494 +epoch: 0, batch: 2399, sum loss: 9976.955078, avg loss: 5.301251, ppl: 200.587677 +epoch: 0, batch: 2400, sum loss: 8310.868164, avg loss: 5.246760, ppl: 189.949905 +epoch: 0, batch: 2401, sum loss: 8838.507812, avg loss: 5.321197, ppl: 204.628586 +epoch: 0, batch: 2402, sum loss: 8085.412109, avg loss: 5.305388, ppl: 201.419235 +epoch: 0, batch: 2403, sum loss: 7587.972168, avg loss: 5.075567, ppl: 160.062881 +epoch: 0, batch: 2404, sum loss: 9059.401367, avg loss: 5.218549, ppl: 184.666092 +epoch: 0, batch: 2405, sum loss: 9640.404297, avg loss: 5.566053, ppl: 261.400421 +epoch: 0, batch: 2406, sum loss: 8916.826172, avg loss: 5.504214, ppl: 245.725189 +epoch: 0, batch: 2407, sum loss: 9776.892578, avg loss: 5.363079, ppl: 213.380951 +epoch: 0, batch: 2408, sum loss: 8641.053711, avg loss: 5.252920, ppl: 191.123566 +epoch: 0, batch: 2409, sum loss: 7806.558594, avg loss: 5.335994, ppl: 207.679031 +epoch: 0, batch: 2410, sum loss: 9080.460938, avg loss: 5.350891, ppl: 210.795959 +epoch: 0, batch: 2411, sum loss: 9205.375000, avg loss: 5.392721, ppl: 219.800690 +epoch: 0, batch: 2412, sum loss: 9781.957031, avg loss: 5.250648, ppl: 190.689713 +epoch: 0, batch: 2413, sum loss: 8807.059570, avg loss: 5.409742, ppl: 223.573868 +epoch: 0, batch: 2414, sum loss: 9440.510742, avg loss: 5.388420, ppl: 218.857239 +epoch: 0, batch: 2415, sum loss: 8699.843750, avg loss: 5.269439, ppl: 194.306961 +epoch: 0, batch: 2416, sum loss: 9326.122070, avg loss: 5.521683, ppl: 250.055466 +epoch: 0, batch: 2417, sum loss: 7251.859375, avg loss: 5.304945, ppl: 201.329926 +epoch: 0, batch: 2418, sum loss: 8397.956055, avg loss: 5.359258, ppl: 212.567093 +epoch: 0, batch: 2419, sum loss: 8322.240234, avg loss: 5.188429, ppl: 179.186890 +epoch: 0, batch: 2420, sum loss: 8750.721680, avg loss: 5.265176, ppl: 193.480316 +epoch: 0, batch: 2421, sum loss: 9004.422852, avg loss: 5.453921, ppl: 233.672562 +epoch: 0, batch: 2422, sum loss: 8200.986328, avg loss: 5.216912, ppl: 184.364044 +epoch: 0, batch: 2423, sum loss: 7954.031250, avg loss: 5.212340, ppl: 183.523071 +epoch: 0, batch: 2424, sum loss: 7793.232422, avg loss: 5.216353, ppl: 184.261032 +epoch: 0, batch: 2425, sum loss: 9419.809570, avg loss: 5.518342, ppl: 249.221497 +epoch: 0, batch: 2426, sum loss: 9055.487305, avg loss: 5.277091, ppl: 195.799377 +epoch: 0, batch: 2427, sum loss: 8367.623047, avg loss: 5.282590, ppl: 196.879211 +epoch: 0, batch: 2428, sum loss: 8362.254883, avg loss: 5.322887, ppl: 204.974777 +epoch: 0, batch: 2429, sum loss: 9644.839844, avg loss: 5.308112, ppl: 201.968582 +epoch: 0, batch: 2430, sum loss: 8623.201172, avg loss: 5.270905, ppl: 194.592087 +epoch: 0, batch: 2431, sum loss: 9366.249023, avg loss: 5.420283, ppl: 225.943024 +epoch: 0, batch: 2432, sum loss: 8718.240234, avg loss: 5.365071, ppl: 213.806381 +epoch: 0, batch: 2433, sum loss: 9178.296875, avg loss: 5.287037, ppl: 197.756683 +epoch: 0, batch: 2434, sum loss: 9478.159180, avg loss: 5.274435, ppl: 195.280121 +epoch: 0, batch: 2435, sum loss: 9534.828125, avg loss: 5.368710, ppl: 214.585785 +epoch: 0, batch: 2436, sum loss: 9624.734375, avg loss: 5.459293, ppl: 234.931351 +epoch: 0, batch: 2437, sum loss: 9193.564453, avg loss: 5.436762, ppl: 229.697296 +epoch: 0, batch: 2438, sum loss: 11243.255859, avg loss: 5.544012, ppl: 255.701843 +epoch: 0, batch: 2439, sum loss: 10311.413086, avg loss: 5.277079, ppl: 195.797134 +epoch: 0, batch: 2440, sum loss: 8047.649902, avg loss: 5.270236, ppl: 194.461761 +epoch: 0, batch: 2441, sum loss: 9289.722656, avg loss: 5.410439, ppl: 223.729675 +epoch: 0, batch: 2442, sum loss: 8827.483398, avg loss: 5.047160, ppl: 155.580017 +epoch: 0, batch: 2443, sum loss: 10420.656250, avg loss: 5.352161, ppl: 211.063904 +epoch: 0, batch: 2444, sum loss: 7621.363770, avg loss: 5.270653, ppl: 194.542908 +epoch: 0, batch: 2445, sum loss: 8621.350586, avg loss: 5.122609, ppl: 167.772461 +epoch: 0, batch: 2446, sum loss: 9697.793945, avg loss: 5.411716, ppl: 224.015564 +epoch: 0, batch: 2447, sum loss: 9608.203125, avg loss: 5.515616, ppl: 248.543015 +epoch: 0, batch: 2448, sum loss: 9215.396484, avg loss: 5.404925, ppl: 222.499496 +epoch: 0, batch: 2449, sum loss: 8165.976562, avg loss: 5.087836, ppl: 162.038803 +epoch: 0, batch: 2450, sum loss: 9515.321289, avg loss: 5.277494, ppl: 195.878372 +epoch: 0, batch: 2451, sum loss: 8360.947266, avg loss: 5.215812, ppl: 184.161331 +epoch: 0, batch: 2452, sum loss: 8483.562500, avg loss: 5.236767, ppl: 188.061172 +epoch: 0, batch: 2453, sum loss: 9057.509766, avg loss: 5.397801, ppl: 220.920059 +epoch: 0, batch: 2454, sum loss: 8751.832031, avg loss: 5.297719, ppl: 199.880463 +epoch: 0, batch: 2455, sum loss: 11058.989258, avg loss: 5.394629, ppl: 220.220428 +epoch: 0, batch: 2456, sum loss: 8638.392578, avg loss: 5.355482, ppl: 211.766037 +epoch: 0, batch: 2457, sum loss: 9845.778320, avg loss: 5.472917, ppl: 238.153900 +epoch: 0, batch: 2458, sum loss: 9794.786133, avg loss: 5.426474, ppl: 227.346222 +epoch: 0, batch: 2459, sum loss: 7786.494629, avg loss: 5.160037, ppl: 174.170822 +epoch: 0, batch: 2460, sum loss: 8491.605469, avg loss: 5.357480, ppl: 212.189468 +epoch: 0, batch: 2461, sum loss: 7798.609375, avg loss: 5.280034, ppl: 196.376465 +epoch: 0, batch: 2462, sum loss: 9193.949219, avg loss: 5.401850, ppl: 221.816452 +epoch: 0, batch: 2463, sum loss: 7492.188965, avg loss: 5.272476, ppl: 194.897964 +epoch: 0, batch: 2464, sum loss: 9478.946289, avg loss: 5.304391, ppl: 201.218399 +epoch: 0, batch: 2465, sum loss: 9103.251953, avg loss: 5.192956, ppl: 179.999924 +epoch: 0, batch: 2466, sum loss: 9662.485352, avg loss: 5.362090, ppl: 213.169922 +epoch: 0, batch: 2467, sum loss: 9450.241211, avg loss: 5.481579, ppl: 240.225677 +epoch: 0, batch: 2468, sum loss: 8681.509766, avg loss: 5.220391, ppl: 185.006470 +epoch: 0, batch: 2469, sum loss: 9217.061523, avg loss: 5.333948, ppl: 207.254532 +epoch: 0, batch: 2470, sum loss: 9967.082031, avg loss: 5.350017, ppl: 210.611893 +epoch: 0, batch: 2471, sum loss: 9890.498047, avg loss: 5.485579, ppl: 241.188446 +epoch: 0, batch: 2472, sum loss: 9367.848633, avg loss: 5.462302, ppl: 235.639297 +epoch: 0, batch: 2473, sum loss: 11077.618164, avg loss: 5.494851, ppl: 243.435165 +epoch: 0, batch: 2474, sum loss: 9621.264648, avg loss: 5.357052, ppl: 212.098724 +epoch: 0, batch: 2475, sum loss: 10478.659180, avg loss: 5.376429, ppl: 216.248581 +epoch: 0, batch: 2476, sum loss: 7456.944336, avg loss: 5.153382, ppl: 173.015686 +epoch: 0, batch: 2477, sum loss: 7622.253418, avg loss: 5.112175, ppl: 166.031158 +epoch: 0, batch: 2478, sum loss: 9012.480469, avg loss: 5.176611, ppl: 177.081741 +epoch: 0, batch: 2479, sum loss: 8990.689453, avg loss: 5.393335, ppl: 219.935623 +epoch: 0, batch: 2480, sum loss: 9410.919922, avg loss: 5.331966, ppl: 206.844208 +epoch: 0, batch: 2481, sum loss: 7753.962891, avg loss: 5.321869, ppl: 204.766205 +epoch: 0, batch: 2482, sum loss: 9784.521484, avg loss: 5.610391, ppl: 273.250977 +epoch: 0, batch: 2483, sum loss: 7845.076172, avg loss: 5.178268, ppl: 177.375412 +epoch: 0, batch: 2484, sum loss: 8906.190430, avg loss: 5.273056, ppl: 195.010925 +epoch: 0, batch: 2485, sum loss: 8060.274902, avg loss: 5.345010, ppl: 209.559937 +epoch: 0, batch: 2486, sum loss: 9722.666016, avg loss: 5.449925, ppl: 232.740692 +epoch: 0, batch: 2487, sum loss: 9129.042969, avg loss: 5.332385, ppl: 206.930832 +epoch: 0, batch: 2488, sum loss: 7867.666992, avg loss: 5.186333, ppl: 178.811600 +epoch: 0, batch: 2489, sum loss: 8056.539062, avg loss: 5.191069, ppl: 179.660522 +epoch: 0, batch: 2490, sum loss: 8965.521484, avg loss: 5.336620, ppl: 207.809189 +epoch: 0, batch: 2491, sum loss: 10094.520508, avg loss: 5.433003, ppl: 228.835510 +epoch: 0, batch: 2492, sum loss: 9488.083008, avg loss: 5.244933, ppl: 189.603134 +epoch: 0, batch: 2493, sum loss: 8490.177734, avg loss: 5.397443, ppl: 220.840958 +epoch: 0, batch: 2494, sum loss: 8236.279297, avg loss: 5.320594, ppl: 204.505295 +epoch: 0, batch: 2495, sum loss: 8353.238281, avg loss: 5.276840, ppl: 195.750366 +epoch: 0, batch: 2496, sum loss: 7607.166992, avg loss: 5.199704, ppl: 181.218628 +epoch: 0, batch: 2497, sum loss: 7769.729492, avg loss: 5.145516, ppl: 171.660110 +epoch: 0, batch: 2498, sum loss: 7257.769043, avg loss: 5.251642, ppl: 190.879395 +epoch: 0, batch: 2499, sum loss: 8979.013672, avg loss: 5.379877, ppl: 216.995499 +epoch: 0, batch: 2500, sum loss: 8310.412109, avg loss: 5.273104, ppl: 195.020401 +epoch: 0, batch: 2501, sum loss: 9009.671875, avg loss: 5.271897, ppl: 194.785095 +epoch: 0, batch: 2502, sum loss: 7993.586914, avg loss: 5.279780, ppl: 196.326752 +epoch: 0, batch: 2503, sum loss: 9279.343750, avg loss: 5.360683, ppl: 212.870377 +epoch: 0, batch: 2504, sum loss: 8480.567383, avg loss: 5.260898, ppl: 192.654343 +epoch: 0, batch: 2505, sum loss: 9342.107422, avg loss: 5.195833, ppl: 180.518402 +epoch: 0, batch: 2506, sum loss: 9111.428711, avg loss: 5.343946, ppl: 209.337219 +epoch: 0, batch: 2507, sum loss: 8620.035156, avg loss: 5.134029, ppl: 169.699524 +epoch: 0, batch: 2508, sum loss: 8876.398438, avg loss: 5.283571, ppl: 197.072327 +epoch: 0, batch: 2509, sum loss: 9145.800781, avg loss: 5.351551, ppl: 210.935211 +epoch: 0, batch: 2510, sum loss: 9092.450195, avg loss: 5.373788, ppl: 215.678391 +epoch: 0, batch: 2511, sum loss: 8943.858398, avg loss: 5.261094, ppl: 192.692108 +epoch: 0, batch: 2512, sum loss: 9033.151367, avg loss: 5.194452, ppl: 180.269287 +epoch: 0, batch: 2513, sum loss: 7988.575195, avg loss: 5.372276, ppl: 215.352524 +epoch: 0, batch: 2514, sum loss: 11138.619141, avg loss: 5.401852, ppl: 221.816864 +epoch: 0, batch: 2515, sum loss: 7677.927246, avg loss: 5.237331, ppl: 188.167191 +epoch: 0, batch: 2516, sum loss: 8206.748047, avg loss: 5.171234, ppl: 176.132080 +epoch: 0, batch: 2517, sum loss: 8819.912109, avg loss: 5.335700, ppl: 207.618042 +epoch: 0, batch: 2518, sum loss: 8450.081055, avg loss: 5.420193, ppl: 225.922760 +epoch: 0, batch: 2519, sum loss: 8017.288086, avg loss: 5.413429, ppl: 224.399689 +epoch: 0, batch: 2520, sum loss: 8965.986328, avg loss: 5.483784, ppl: 240.755936 +epoch: 0, batch: 2521, sum loss: 9199.224609, avg loss: 5.296042, ppl: 199.545441 +epoch: 0, batch: 2522, sum loss: 8289.216797, avg loss: 5.142194, ppl: 171.090698 +epoch: 0, batch: 2523, sum loss: 7670.873047, avg loss: 5.257624, ppl: 192.024628 +epoch: 0, batch: 2524, sum loss: 8922.875000, avg loss: 5.122201, ppl: 167.704147 +epoch: 0, batch: 2525, sum loss: 13157.742188, avg loss: 5.728229, ppl: 307.424347 +epoch: 0, batch: 2526, sum loss: 7583.739258, avg loss: 5.262831, ppl: 193.027222 +epoch: 0, batch: 2527, sum loss: 10510.003906, avg loss: 5.488253, ppl: 241.834259 +epoch: 0, batch: 2528, sum loss: 9736.465820, avg loss: 5.311765, ppl: 202.707733 +epoch: 0, batch: 2529, sum loss: 11313.559570, avg loss: 5.575929, ppl: 263.994751 +epoch: 0, batch: 2530, sum loss: 8301.058594, avg loss: 5.369378, ppl: 214.729279 +epoch: 0, batch: 2531, sum loss: 8228.517578, avg loss: 5.214523, ppl: 183.924133 +epoch: 0, batch: 2532, sum loss: 9628.287109, avg loss: 5.224247, ppl: 185.721268 +epoch: 0, batch: 2533, sum loss: 8714.924805, avg loss: 5.436634, ppl: 229.667831 +epoch: 0, batch: 2534, sum loss: 8887.829102, avg loss: 5.221991, ppl: 185.302765 +epoch: 0, batch: 2535, sum loss: 10052.132812, avg loss: 5.472038, ppl: 237.944580 +epoch: 0, batch: 2536, sum loss: 8311.219727, avg loss: 5.307292, ppl: 201.803009 +epoch: 0, batch: 2537, sum loss: 9408.751953, avg loss: 5.438585, ppl: 230.116287 +epoch: 0, batch: 2538, sum loss: 7733.193359, avg loss: 5.282236, ppl: 196.809372 +epoch: 0, batch: 2539, sum loss: 8168.204590, avg loss: 4.968494, ppl: 143.810211 +epoch: 0, batch: 2540, sum loss: 8700.638672, avg loss: 5.250838, ppl: 190.725998 +epoch: 0, batch: 2541, sum loss: 9144.369141, avg loss: 5.350713, ppl: 210.758575 +epoch: 0, batch: 2542, sum loss: 9099.505859, avg loss: 5.343221, ppl: 209.185455 +epoch: 0, batch: 2543, sum loss: 9214.606445, avg loss: 5.445985, ppl: 231.825485 +epoch: 0, batch: 2544, sum loss: 9643.577148, avg loss: 5.348629, ppl: 210.319656 +epoch: 0, batch: 2545, sum loss: 9369.032227, avg loss: 5.353733, ppl: 211.395874 +epoch: 0, batch: 2546, sum loss: 8547.079102, avg loss: 5.328603, ppl: 206.149841 +epoch: 0, batch: 2547, sum loss: 8973.478516, avg loss: 5.405710, ppl: 222.674316 +epoch: 0, batch: 2548, sum loss: 9120.604492, avg loss: 5.358757, ppl: 212.460693 +epoch: 0, batch: 2549, sum loss: 9718.476562, avg loss: 5.562952, ppl: 260.590851 +epoch: 0, batch: 2550, sum loss: 10491.562500, avg loss: 5.317569, ppl: 203.887589 +epoch: 0, batch: 2551, sum loss: 8797.196289, avg loss: 5.309111, ppl: 202.170349 +epoch: 0, batch: 2552, sum loss: 9686.558594, avg loss: 5.227500, ppl: 186.326492 +epoch: 0, batch: 2553, sum loss: 8182.377930, avg loss: 5.272151, ppl: 194.834595 +epoch: 0, batch: 2554, sum loss: 9239.987305, avg loss: 5.292089, ppl: 198.758194 +epoch: 0, batch: 2555, sum loss: 8218.714844, avg loss: 5.336828, ppl: 207.852310 +epoch: 0, batch: 2556, sum loss: 10163.623047, avg loss: 5.332436, ppl: 206.941391 +epoch: 0, batch: 2557, sum loss: 9054.131836, avg loss: 5.341671, ppl: 208.861328 +epoch: 0, batch: 2558, sum loss: 9690.532227, avg loss: 5.419761, ppl: 225.825073 +epoch: 0, batch: 2559, sum loss: 9169.033203, avg loss: 5.396724, ppl: 220.682220 +epoch: 0, batch: 2560, sum loss: 7350.149414, avg loss: 5.194452, ppl: 180.269287 +epoch: 0, batch: 2561, sum loss: 9535.248047, avg loss: 5.420835, ppl: 226.067810 +epoch: 0, batch: 2562, sum loss: 8417.322266, avg loss: 5.300581, ppl: 200.453232 +epoch: 0, batch: 2563, sum loss: 7845.722168, avg loss: 5.355442, ppl: 211.757462 +epoch: 0, batch: 2564, sum loss: 9692.347656, avg loss: 5.196969, ppl: 180.723648 +epoch: 0, batch: 2565, sum loss: 8140.461914, avg loss: 5.204899, ppl: 182.162445 +epoch: 0, batch: 2566, sum loss: 9464.914062, avg loss: 5.281760, ppl: 196.715836 +epoch: 0, batch: 2567, sum loss: 9898.033203, avg loss: 5.373525, ppl: 215.621628 +epoch: 0, batch: 2568, sum loss: 8913.959961, avg loss: 5.376333, ppl: 216.227859 +epoch: 0, batch: 2569, sum loss: 8646.000000, avg loss: 5.217864, ppl: 184.539505 +epoch: 0, batch: 2570, sum loss: 8421.806641, avg loss: 5.176280, ppl: 177.022980 +epoch: 0, batch: 2571, sum loss: 9260.719727, avg loss: 5.229091, ppl: 186.623032 +epoch: 0, batch: 2572, sum loss: 9656.055664, avg loss: 5.373431, ppl: 215.601379 +epoch: 0, batch: 2573, sum loss: 9688.515625, avg loss: 5.150726, ppl: 172.556686 +epoch: 0, batch: 2574, sum loss: 9144.240234, avg loss: 5.258333, ppl: 192.160843 +epoch: 0, batch: 2575, sum loss: 8338.507812, avg loss: 5.147227, ppl: 171.954056 +epoch: 0, batch: 2576, sum loss: 8245.054688, avg loss: 5.467543, ppl: 236.877380 +epoch: 0, batch: 2577, sum loss: 9927.054688, avg loss: 5.314269, ppl: 203.215927 +epoch: 0, batch: 2578, sum loss: 10700.865234, avg loss: 5.171999, ppl: 176.266754 +epoch: 0, batch: 2579, sum loss: 9326.404297, avg loss: 5.184216, ppl: 178.433426 +epoch: 0, batch: 2580, sum loss: 8124.878906, avg loss: 5.324298, ppl: 205.264297 +epoch: 0, batch: 2581, sum loss: 9114.464844, avg loss: 5.172795, ppl: 176.407257 +epoch: 0, batch: 2582, sum loss: 9582.567383, avg loss: 5.551893, ppl: 257.724915 +epoch: 0, batch: 2583, sum loss: 8144.427246, avg loss: 5.217443, ppl: 184.461823 +epoch: 0, batch: 2584, sum loss: 8552.338867, avg loss: 5.176961, ppl: 177.143646 +epoch: 0, batch: 2585, sum loss: 8935.294922, avg loss: 5.246797, ppl: 189.956787 +epoch: 0, batch: 2586, sum loss: 7880.924316, avg loss: 5.267998, ppl: 194.027084 +epoch: 0, batch: 2587, sum loss: 8827.477539, avg loss: 5.263851, ppl: 193.224106 +epoch: 0, batch: 2588, sum loss: 8659.482422, avg loss: 5.415561, ppl: 224.878708 +epoch: 0, batch: 2589, sum loss: 10353.478516, avg loss: 5.492561, ppl: 242.878510 +epoch: 0, batch: 2590, sum loss: 8433.816406, avg loss: 5.469401, ppl: 237.317963 +epoch: 0, batch: 2591, sum loss: 8550.179688, avg loss: 5.313971, ppl: 203.155365 +epoch: 0, batch: 2592, sum loss: 9412.501953, avg loss: 5.348012, ppl: 210.190125 +epoch: 0, batch: 2593, sum loss: 9426.390625, avg loss: 5.292752, ppl: 198.889969 +epoch: 0, batch: 2594, sum loss: 9398.944336, avg loss: 5.373896, ppl: 215.701630 +epoch: 0, batch: 2595, sum loss: 8888.648438, avg loss: 5.383797, ppl: 217.847916 +epoch: 0, batch: 2596, sum loss: 9534.969727, avg loss: 5.276685, ppl: 195.719940 +epoch: 0, batch: 2597, sum loss: 8756.994141, avg loss: 5.268950, ppl: 194.211838 +epoch: 0, batch: 2598, sum loss: 8648.094727, avg loss: 5.358175, ppl: 212.337143 +epoch: 0, batch: 2599, sum loss: 9301.586914, avg loss: 5.336539, ppl: 207.792252 +epoch: 0, batch: 2600, sum loss: 9820.910156, avg loss: 5.405014, ppl: 222.519348 +epoch: 0, batch: 2601, sum loss: 9354.171875, avg loss: 5.240433, ppl: 188.751770 +epoch: 0, batch: 2602, sum loss: 8913.222656, avg loss: 5.305490, ppl: 201.439682 +epoch: 0, batch: 2603, sum loss: 11386.793945, avg loss: 5.406835, ppl: 222.924820 +epoch: 0, batch: 2604, sum loss: 9362.012695, avg loss: 5.361978, ppl: 213.146042 +epoch: 0, batch: 2605, sum loss: 9546.781250, avg loss: 5.312622, ppl: 202.881500 +epoch: 0, batch: 2606, sum loss: 9348.719727, avg loss: 5.451149, ppl: 233.025650 +epoch: 0, batch: 2607, sum loss: 9007.041016, avg loss: 5.267275, ppl: 193.886963 +epoch: 0, batch: 2608, sum loss: 8733.164062, avg loss: 5.286419, ppl: 197.634415 +epoch: 0, batch: 2609, sum loss: 7824.713867, avg loss: 5.168239, ppl: 175.605347 +epoch: 0, batch: 2610, sum loss: 8863.764648, avg loss: 5.414639, ppl: 224.671524 +epoch: 0, batch: 2611, sum loss: 8027.880859, avg loss: 5.165946, ppl: 175.203201 +epoch: 0, batch: 2612, sum loss: 8507.839844, avg loss: 5.190872, ppl: 179.625153 +epoch: 0, batch: 2613, sum loss: 9086.537109, avg loss: 5.228157, ppl: 186.448868 +epoch: 0, batch: 2614, sum loss: 9490.950195, avg loss: 5.229174, ppl: 186.638519 +epoch: 0, batch: 2615, sum loss: 10613.590820, avg loss: 5.304143, ppl: 201.168610 +epoch: 0, batch: 2616, sum loss: 9030.243164, avg loss: 5.201753, ppl: 181.590317 +epoch: 0, batch: 2617, sum loss: 8449.177734, avg loss: 5.402287, ppl: 221.913452 +epoch: 0, batch: 2618, sum loss: 10207.521484, avg loss: 5.380876, ppl: 217.212479 +epoch: 0, batch: 2619, sum loss: 8601.323242, avg loss: 5.280125, ppl: 196.394363 +epoch: 0, batch: 2620, sum loss: 9192.247070, avg loss: 5.319587, ppl: 204.299530 +epoch: 0, batch: 2621, sum loss: 8064.491699, avg loss: 5.065635, ppl: 158.481079 +epoch: 0, batch: 2622, sum loss: 9321.802734, avg loss: 5.272513, ppl: 194.905121 +epoch: 0, batch: 2623, sum loss: 8198.146484, avg loss: 5.088856, ppl: 162.204147 +epoch: 0, batch: 2624, sum loss: 9044.171875, avg loss: 5.174012, ppl: 176.621979 +epoch: 0, batch: 2625, sum loss: 8751.795898, avg loss: 5.209403, ppl: 182.984711 +epoch: 0, batch: 2626, sum loss: 7459.747559, avg loss: 5.180380, ppl: 177.750412 +epoch: 0, batch: 2627, sum loss: 8084.128418, avg loss: 5.175498, ppl: 176.884766 +epoch: 0, batch: 2628, sum loss: 7259.460938, avg loss: 4.921669, ppl: 137.231400 +epoch: 0, batch: 2629, sum loss: 7879.145996, avg loss: 5.211076, ppl: 183.291138 +epoch: 0, batch: 2630, sum loss: 9100.312500, avg loss: 5.423309, ppl: 226.627762 +epoch: 0, batch: 2631, sum loss: 8246.954102, avg loss: 5.283123, ppl: 196.984009 +epoch: 0, batch: 2632, sum loss: 9524.557617, avg loss: 5.387194, ppl: 218.589096 +epoch: 0, batch: 2633, sum loss: 8120.611328, avg loss: 5.123414, ppl: 167.907639 +epoch: 0, batch: 2634, sum loss: 8476.119141, avg loss: 5.137042, ppl: 170.211548 +epoch: 0, batch: 2635, sum loss: 8212.181641, avg loss: 5.142255, ppl: 171.101212 +epoch: 0, batch: 2636, sum loss: 9056.920898, avg loss: 5.175383, ppl: 176.864349 +epoch: 0, batch: 2637, sum loss: 9124.795898, avg loss: 5.412097, ppl: 224.101135 +epoch: 0, batch: 2638, sum loss: 8687.048828, avg loss: 5.158580, ppl: 173.917374 +epoch: 0, batch: 2639, sum loss: 8941.564453, avg loss: 5.253563, ppl: 191.246460 +epoch: 0, batch: 2640, sum loss: 8703.410156, avg loss: 5.134755, ppl: 169.822739 +epoch: 0, batch: 2641, sum loss: 9877.618164, avg loss: 5.204225, ppl: 182.039749 +epoch: 0, batch: 2642, sum loss: 8239.195312, avg loss: 5.301928, ppl: 200.723343 +epoch: 0, batch: 2643, sum loss: 10063.895508, avg loss: 5.434069, ppl: 229.079407 +epoch: 0, batch: 2644, sum loss: 9392.685547, avg loss: 5.376465, ppl: 216.256531 +epoch: 0, batch: 2645, sum loss: 8456.742188, avg loss: 5.236373, ppl: 187.987030 +epoch: 0, batch: 2646, sum loss: 7387.311523, avg loss: 5.130077, ppl: 169.030197 +epoch: 0, batch: 2647, sum loss: 8697.849609, avg loss: 5.316534, ppl: 203.676727 +epoch: 0, batch: 2648, sum loss: 8859.326172, avg loss: 5.547481, ppl: 256.590271 +epoch: 0, batch: 2649, sum loss: 9081.440430, avg loss: 5.415289, ppl: 224.817596 +epoch: 0, batch: 2650, sum loss: 7880.463379, avg loss: 5.260656, ppl: 192.607864 +epoch: 0, batch: 2651, sum loss: 8817.475586, avg loss: 5.356911, ppl: 212.068893 +epoch: 0, batch: 2652, sum loss: 8219.897461, avg loss: 5.033617, ppl: 153.487244 +epoch: 0, batch: 2653, sum loss: 8482.875000, avg loss: 5.138023, ppl: 170.378586 +epoch: 0, batch: 2654, sum loss: 8147.601562, avg loss: 5.232885, ppl: 187.332458 +epoch: 0, batch: 2655, sum loss: 8446.454102, avg loss: 5.201018, ppl: 181.456924 +epoch: 0, batch: 2656, sum loss: 7813.296387, avg loss: 5.261479, ppl: 192.766357 +epoch: 0, batch: 2657, sum loss: 8811.119141, avg loss: 5.170845, ppl: 176.063477 +epoch: 0, batch: 2658, sum loss: 9089.047852, avg loss: 5.296648, ppl: 199.666412 +epoch: 0, batch: 2659, sum loss: 8362.929688, avg loss: 5.233373, ppl: 187.423859 +epoch: 0, batch: 2660, sum loss: 7867.267578, avg loss: 5.227420, ppl: 186.311554 +epoch: 0, batch: 2661, sum loss: 9728.510742, avg loss: 5.247309, ppl: 190.054092 +epoch: 0, batch: 2662, sum loss: 8434.790039, avg loss: 5.242256, ppl: 189.096252 +epoch: 0, batch: 2663, sum loss: 9205.071289, avg loss: 5.171389, ppl: 176.159286 +epoch: 0, batch: 2664, sum loss: 9922.250000, avg loss: 5.413121, ppl: 224.330582 +epoch: 0, batch: 2665, sum loss: 8746.037109, avg loss: 5.252875, ppl: 191.114990 +epoch: 0, batch: 2666, sum loss: 7883.199219, avg loss: 5.301412, ppl: 200.619812 +epoch: 0, batch: 2667, sum loss: 9499.822266, avg loss: 5.254326, ppl: 191.392517 +epoch: 0, batch: 2668, sum loss: 7806.240234, avg loss: 4.991202, ppl: 147.113205 +epoch: 0, batch: 2669, sum loss: 10071.037109, avg loss: 5.234427, ppl: 187.621567 +epoch: 0, batch: 2670, sum loss: 7577.238281, avg loss: 5.186337, ppl: 178.812363 +epoch: 0, batch: 2671, sum loss: 8203.419922, avg loss: 5.289116, ppl: 198.168243 +epoch: 0, batch: 2672, sum loss: 9399.808594, avg loss: 5.245429, ppl: 189.697189 +epoch: 0, batch: 2673, sum loss: 9646.750977, avg loss: 5.197603, ppl: 180.838211 +epoch: 0, batch: 2674, sum loss: 8773.265625, avg loss: 5.237771, ppl: 188.250031 +epoch: 0, batch: 2675, sum loss: 8965.988281, avg loss: 5.155830, ppl: 173.439682 +epoch: 0, batch: 2676, sum loss: 9257.907227, avg loss: 5.395051, ppl: 220.313385 +epoch: 0, batch: 2677, sum loss: 8327.367188, avg loss: 5.293940, ppl: 199.126358 +epoch: 0, batch: 2678, sum loss: 11333.499023, avg loss: 5.286147, ppl: 197.580704 +epoch: 0, batch: 2679, sum loss: 7990.200195, avg loss: 5.154968, ppl: 173.290314 +epoch: 0, batch: 2680, sum loss: 8363.863281, avg loss: 5.293584, ppl: 199.055634 +epoch: 0, batch: 2681, sum loss: 8328.951172, avg loss: 5.157245, ppl: 173.685318 +epoch: 0, batch: 2682, sum loss: 8172.937500, avg loss: 5.296784, ppl: 199.693542 +epoch: 0, batch: 2683, sum loss: 7912.916016, avg loss: 5.404997, ppl: 222.515625 +epoch: 0, batch: 2684, sum loss: 9743.444336, avg loss: 5.286730, ppl: 197.695953 +epoch: 0, batch: 2685, sum loss: 9239.162109, avg loss: 5.261482, ppl: 192.767014 +epoch: 0, batch: 2686, sum loss: 8650.865234, avg loss: 5.091740, ppl: 162.672607 +epoch: 0, batch: 2687, sum loss: 8330.892578, avg loss: 5.148883, ppl: 172.238968 +epoch: 0, batch: 2688, sum loss: 9102.392578, avg loss: 5.231260, ppl: 187.028366 +epoch: 0, batch: 2689, sum loss: 7441.670898, avg loss: 4.931525, ppl: 138.590729 +epoch: 0, batch: 2690, sum loss: 8193.484375, avg loss: 5.124130, ppl: 168.027939 +epoch: 0, batch: 2691, sum loss: 8733.219727, avg loss: 5.410917, ppl: 223.836716 +epoch: 0, batch: 2692, sum loss: 7806.055664, avg loss: 4.965684, ppl: 143.406662 +epoch: 0, batch: 2693, sum loss: 8274.039062, avg loss: 5.276811, ppl: 195.744583 +epoch: 0, batch: 2694, sum loss: 10185.080078, avg loss: 5.349307, ppl: 210.462402 +epoch: 0, batch: 2695, sum loss: 9597.253906, avg loss: 5.230112, ppl: 186.813644 +epoch: 0, batch: 2696, sum loss: 8171.898438, avg loss: 5.362138, ppl: 213.180298 +epoch: 0, batch: 2697, sum loss: 8125.245117, avg loss: 5.188534, ppl: 179.205688 +epoch: 0, batch: 2698, sum loss: 8775.838867, avg loss: 5.270774, ppl: 194.566574 +epoch: 0, batch: 2699, sum loss: 10668.836914, avg loss: 5.263363, ppl: 193.129868 +epoch: 0, batch: 2700, sum loss: 9093.687500, avg loss: 5.238299, ppl: 188.349518 +epoch: 0, batch: 2701, sum loss: 8245.272461, avg loss: 5.278664, ppl: 196.107635 +epoch: 0, batch: 2702, sum loss: 9152.808594, avg loss: 5.343145, ppl: 209.169586 +epoch: 0, batch: 2703, sum loss: 9670.271484, avg loss: 5.387338, ppl: 218.620682 +epoch: 0, batch: 2704, sum loss: 9897.035156, avg loss: 5.384676, ppl: 218.039444 +epoch: 0, batch: 2705, sum loss: 8108.838867, avg loss: 5.231509, ppl: 187.074921 +epoch: 0, batch: 2706, sum loss: 7805.964844, avg loss: 5.108616, ppl: 165.441208 +epoch: 0, batch: 2707, sum loss: 8663.200195, avg loss: 5.165892, ppl: 175.193604 +epoch: 0, batch: 2708, sum loss: 8312.361328, avg loss: 5.001421, ppl: 148.624207 +epoch: 0, batch: 2709, sum loss: 8955.109375, avg loss: 5.239971, ppl: 188.664566 +epoch: 0, batch: 2710, sum loss: 10438.119141, avg loss: 5.177639, ppl: 177.263718 +epoch: 0, batch: 2711, sum loss: 9522.481445, avg loss: 5.158441, ppl: 173.893066 +epoch: 0, batch: 2712, sum loss: 7770.551758, avg loss: 5.036003, ppl: 153.853775 +epoch: 0, batch: 2713, sum loss: 9863.882812, avg loss: 5.260737, ppl: 192.623489 +epoch: 0, batch: 2714, sum loss: 8502.953125, avg loss: 5.168968, ppl: 175.733429 +epoch: 0, batch: 2715, sum loss: 9172.456055, avg loss: 5.220521, ppl: 185.030640 +epoch: 0, batch: 2716, sum loss: 7248.026855, avg loss: 5.252193, ppl: 190.984726 +epoch: 0, batch: 2717, sum loss: 8426.069336, avg loss: 5.279492, ppl: 196.270126 +epoch: 0, batch: 2718, sum loss: 8484.297852, avg loss: 5.253435, ppl: 191.222015 +epoch: 0, batch: 2719, sum loss: 8802.651367, avg loss: 5.290055, ppl: 198.354385 +epoch: 0, batch: 2720, sum loss: 9339.865234, avg loss: 5.255974, ppl: 191.708084 +epoch: 0, batch: 2721, sum loss: 7845.939453, avg loss: 5.032675, ppl: 153.342621 +epoch: 0, batch: 2722, sum loss: 8818.103516, avg loss: 5.416525, ppl: 225.095642 +epoch: 0, batch: 2723, sum loss: 8736.152344, avg loss: 5.090998, ppl: 162.552032 +epoch: 0, batch: 2724, sum loss: 7342.242676, avg loss: 5.301259, ppl: 200.589111 +epoch: 0, batch: 2725, sum loss: 9884.282227, avg loss: 5.249220, ppl: 190.417755 +epoch: 0, batch: 2726, sum loss: 8105.439453, avg loss: 5.143045, ppl: 171.236465 +epoch: 0, batch: 2727, sum loss: 8284.659180, avg loss: 5.113987, ppl: 166.332275 +epoch: 0, batch: 2728, sum loss: 8597.628906, avg loss: 5.313738, ppl: 203.108093 +epoch: 0, batch: 2729, sum loss: 9000.290039, avg loss: 5.175555, ppl: 176.894806 +epoch: 0, batch: 2730, sum loss: 8939.569336, avg loss: 5.185365, ppl: 178.638672 +epoch: 0, batch: 2731, sum loss: 9253.168945, avg loss: 5.357944, ppl: 212.288040 +epoch: 0, batch: 2732, sum loss: 10010.416992, avg loss: 5.205625, ppl: 182.294785 +epoch: 0, batch: 2733, sum loss: 8127.872559, avg loss: 5.167115, ppl: 175.408096 +epoch: 0, batch: 2734, sum loss: 9320.467773, avg loss: 5.221551, ppl: 185.221222 +epoch: 0, batch: 2735, sum loss: 9811.983398, avg loss: 5.318148, ppl: 204.005737 +epoch: 0, batch: 2736, sum loss: 9527.338867, avg loss: 5.407116, ppl: 222.987656 +epoch: 0, batch: 2737, sum loss: 9182.141602, avg loss: 5.326068, ppl: 205.627930 +epoch: 0, batch: 2738, sum loss: 10070.112305, avg loss: 5.275072, ppl: 195.404572 +epoch: 0, batch: 2739, sum loss: 9379.688477, avg loss: 5.213835, ppl: 183.797531 +epoch: 0, batch: 2740, sum loss: 8599.707031, avg loss: 5.189926, ppl: 179.455215 +epoch: 0, batch: 2741, sum loss: 6648.803711, avg loss: 4.976650, ppl: 144.987823 +epoch: 0, batch: 2742, sum loss: 9269.360352, avg loss: 5.333349, ppl: 207.130447 +epoch: 0, batch: 2743, sum loss: 9080.350586, avg loss: 5.363468, ppl: 213.463898 +epoch: 0, batch: 2744, sum loss: 8116.282227, avg loss: 5.270313, ppl: 194.476868 +epoch: 0, batch: 2745, sum loss: 9526.576172, avg loss: 5.194425, ppl: 180.264481 +epoch: 0, batch: 2746, sum loss: 9420.074219, avg loss: 5.173023, ppl: 176.447388 +epoch: 0, batch: 2747, sum loss: 8853.290039, avg loss: 5.141284, ppl: 170.935028 +epoch: 0, batch: 2748, sum loss: 10130.227539, avg loss: 5.137032, ppl: 170.209839 +epoch: 0, batch: 2749, sum loss: 8347.212891, avg loss: 5.423790, ppl: 226.736816 +epoch: 0, batch: 2750, sum loss: 9536.594727, avg loss: 5.205565, ppl: 182.283829 +epoch: 0, batch: 2751, sum loss: 8167.781738, avg loss: 5.222367, ppl: 185.372406 +epoch: 0, batch: 2752, sum loss: 9444.502930, avg loss: 5.212198, ppl: 183.496994 +epoch: 0, batch: 2753, sum loss: 9871.066406, avg loss: 5.165393, ppl: 175.106323 +epoch: 0, batch: 2754, sum loss: 8224.739258, avg loss: 5.351164, ppl: 210.853653 +epoch: 0, batch: 2755, sum loss: 9064.629883, avg loss: 5.215552, ppl: 184.113480 +epoch: 0, batch: 2756, sum loss: 9008.619141, avg loss: 5.449860, ppl: 232.725601 +epoch: 0, batch: 2757, sum loss: 8180.008789, avg loss: 5.138196, ppl: 170.408157 +epoch: 0, batch: 2758, sum loss: 9761.311523, avg loss: 5.328227, ppl: 206.072296 +epoch: 0, batch: 2759, sum loss: 8692.556641, avg loss: 5.242796, ppl: 189.198441 +epoch: 0, batch: 2760, sum loss: 8346.406250, avg loss: 5.326360, ppl: 205.687958 +epoch: 0, batch: 2761, sum loss: 8848.397461, avg loss: 5.150406, ppl: 172.501495 +epoch: 0, batch: 2762, sum loss: 9007.014648, avg loss: 5.233593, ppl: 187.465240 +epoch: 0, batch: 2763, sum loss: 8184.230957, avg loss: 5.249667, ppl: 190.502853 +epoch: 0, batch: 2764, sum loss: 8983.997070, avg loss: 5.353991, ppl: 211.450516 +epoch: 0, batch: 2765, sum loss: 7794.451660, avg loss: 5.144853, ppl: 171.546204 +epoch: 0, batch: 2766, sum loss: 8135.173340, avg loss: 5.031029, ppl: 153.090424 +epoch: 0, batch: 2767, sum loss: 9018.740234, avg loss: 5.130114, ppl: 169.036316 +epoch: 0, batch: 2768, sum loss: 9770.921875, avg loss: 5.383428, ppl: 217.767532 +epoch: 0, batch: 2769, sum loss: 9050.834961, avg loss: 5.139599, ppl: 170.647385 +epoch: 0, batch: 2770, sum loss: 8034.441406, avg loss: 4.926083, ppl: 137.838547 +epoch: 0, batch: 2771, sum loss: 8574.324219, avg loss: 5.196560, ppl: 180.649811 +epoch: 0, batch: 2772, sum loss: 9323.408203, avg loss: 5.252624, ppl: 191.066971 +epoch: 0, batch: 2773, sum loss: 9867.132812, avg loss: 5.558948, ppl: 259.549530 +epoch: 0, batch: 2774, sum loss: 8536.012695, avg loss: 5.065883, ppl: 158.520309 +epoch: 0, batch: 2775, sum loss: 8173.364746, avg loss: 5.121156, ppl: 167.528885 +epoch: 0, batch: 2776, sum loss: 9540.574219, avg loss: 5.063999, ppl: 158.222015 +epoch: 0, batch: 2777, sum loss: 7770.151367, avg loss: 4.949141, ppl: 141.053757 +epoch: 0, batch: 2778, sum loss: 6802.419434, avg loss: 5.122304, ppl: 167.721344 +epoch: 0, batch: 2779, sum loss: 9998.966797, avg loss: 5.393186, ppl: 219.902802 +epoch: 0, batch: 2780, sum loss: 7954.970703, avg loss: 5.115737, ppl: 166.623535 +epoch: 0, batch: 2781, sum loss: 9656.048828, avg loss: 5.239310, ppl: 188.540009 +epoch: 0, batch: 2782, sum loss: 8997.839844, avg loss: 5.106606, ppl: 165.109100 +epoch: 0, batch: 2783, sum loss: 9920.056641, avg loss: 5.350624, ppl: 210.739670 +epoch: 0, batch: 2784, sum loss: 9183.360352, avg loss: 5.262671, ppl: 192.996201 +epoch: 0, batch: 2785, sum loss: 8824.800781, avg loss: 5.265395, ppl: 193.522675 +epoch: 0, batch: 2786, sum loss: 8243.885742, avg loss: 5.353172, ppl: 211.277466 +epoch: 0, batch: 2787, sum loss: 9346.804688, avg loss: 5.055059, ppl: 156.813843 +epoch: 0, batch: 2788, sum loss: 8506.236328, avg loss: 5.186729, ppl: 178.882553 +epoch: 0, batch: 2789, sum loss: 10240.535156, avg loss: 5.358731, ppl: 212.455231 +epoch: 0, batch: 2790, sum loss: 8588.783203, avg loss: 5.208480, ppl: 182.815948 +epoch: 0, batch: 2791, sum loss: 9007.238281, avg loss: 5.123571, ppl: 167.933975 +epoch: 0, batch: 2792, sum loss: 8970.807617, avg loss: 5.132041, ppl: 169.362518 +epoch: 0, batch: 2793, sum loss: 8364.774414, avg loss: 5.224719, ppl: 185.808868 +epoch: 0, batch: 2794, sum loss: 9068.077148, avg loss: 5.232589, ppl: 187.277084 +epoch: 0, batch: 2795, sum loss: 9664.468750, avg loss: 5.286909, ppl: 197.731216 +epoch: 0, batch: 2796, sum loss: 9861.986328, avg loss: 5.215223, ppl: 184.052826 +epoch: 0, batch: 2797, sum loss: 9238.830078, avg loss: 5.542190, ppl: 255.236252 +epoch: 0, batch: 2798, sum loss: 7951.721191, avg loss: 5.130143, ppl: 169.041229 +epoch: 0, batch: 2799, sum loss: 9888.196289, avg loss: 5.344971, ppl: 209.551849 +epoch: 0, batch: 2800, sum loss: 8793.704102, avg loss: 5.249973, ppl: 190.561096 +epoch: 0, batch: 2801, sum loss: 8707.755859, avg loss: 5.033385, ppl: 153.451538 +epoch: 0, batch: 2802, sum loss: 8084.051758, avg loss: 5.283694, ppl: 197.096664 +epoch: 0, batch: 2803, sum loss: 7167.854492, avg loss: 5.012486, ppl: 150.277863 +epoch: 0, batch: 2804, sum loss: 9997.692383, avg loss: 5.228919, ppl: 186.590912 +epoch: 0, batch: 2805, sum loss: 8615.895508, avg loss: 5.190299, ppl: 179.522141 +epoch: 0, batch: 2806, sum loss: 8853.980469, avg loss: 5.282805, ppl: 196.921463 +epoch: 0, batch: 2807, sum loss: 8683.374023, avg loss: 4.973296, ppl: 144.502335 +epoch: 0, batch: 2808, sum loss: 8450.027344, avg loss: 5.177713, ppl: 177.276901 +epoch: 0, batch: 2809, sum loss: 8236.669922, avg loss: 5.246287, ppl: 189.859985 +epoch: 0, batch: 2810, sum loss: 8574.332031, avg loss: 5.143570, ppl: 171.326385 +epoch: 0, batch: 2811, sum loss: 7075.107422, avg loss: 5.007153, ppl: 149.478577 +epoch: 0, batch: 2812, sum loss: 9289.041992, avg loss: 5.308024, ppl: 201.950760 +epoch: 0, batch: 2813, sum loss: 9292.815430, avg loss: 5.409090, ppl: 223.428085 +epoch: 0, batch: 2814, sum loss: 7431.851074, avg loss: 5.128952, ppl: 168.840088 +epoch: 0, batch: 2815, sum loss: 7322.045898, avg loss: 5.248779, ppl: 190.333694 +epoch: 0, batch: 2816, sum loss: 7938.583008, avg loss: 5.292388, ppl: 198.817719 +epoch: 0, batch: 2817, sum loss: 8194.453125, avg loss: 5.481239, ppl: 240.144135 +epoch: 0, batch: 2818, sum loss: 9704.634766, avg loss: 5.274258, ppl: 195.245483 +epoch: 0, batch: 2819, sum loss: 7771.834961, avg loss: 5.212499, ppl: 183.552124 +epoch: 0, batch: 2820, sum loss: 8482.597656, avg loss: 5.103849, ppl: 164.654510 +epoch: 0, batch: 2821, sum loss: 9648.351562, avg loss: 5.354246, ppl: 211.504471 +epoch: 0, batch: 2822, sum loss: 7945.642578, avg loss: 5.193231, ppl: 180.049286 +epoch: 0, batch: 2823, sum loss: 8672.307617, avg loss: 5.313914, ppl: 203.143845 +epoch: 0, batch: 2824, sum loss: 9507.611328, avg loss: 5.432921, ppl: 228.816635 +epoch: 0, batch: 2825, sum loss: 8548.360352, avg loss: 5.007827, ppl: 149.579315 +epoch: 0, batch: 2826, sum loss: 7933.959961, avg loss: 5.209429, ppl: 182.989502 +epoch: 0, batch: 2827, sum loss: 8086.690430, avg loss: 5.016557, ppl: 150.890930 +epoch: 0, batch: 2828, sum loss: 9351.916992, avg loss: 5.233305, ppl: 187.411255 +epoch: 0, batch: 2829, sum loss: 7429.065918, avg loss: 5.127030, ppl: 168.515869 +epoch: 0, batch: 2830, sum loss: 8774.880859, avg loss: 5.383362, ppl: 217.753189 +epoch: 0, batch: 2831, sum loss: 9297.817383, avg loss: 5.165454, ppl: 175.117004 +epoch: 0, batch: 2832, sum loss: 9586.556641, avg loss: 5.162389, ppl: 174.580994 +epoch: 0, batch: 2833, sum loss: 7960.142578, avg loss: 5.092862, ppl: 162.855225 +epoch: 0, batch: 2834, sum loss: 9594.140625, avg loss: 5.200076, ppl: 181.286041 +epoch: 0, batch: 2835, sum loss: 8011.926758, avg loss: 5.168985, ppl: 175.736435 +epoch: 0, batch: 2836, sum loss: 9373.205078, avg loss: 5.289619, ppl: 198.267960 +epoch: 0, batch: 2837, sum loss: 8931.510742, avg loss: 5.386919, ppl: 218.528961 +epoch: 0, batch: 2838, sum loss: 8460.555664, avg loss: 5.212912, ppl: 183.627930 +epoch: 0, batch: 2839, sum loss: 8219.099609, avg loss: 5.261908, ppl: 192.849014 +epoch: 0, batch: 2840, sum loss: 8225.526367, avg loss: 5.170035, ppl: 175.921051 +epoch: 0, batch: 2841, sum loss: 9402.613281, avg loss: 5.342394, ppl: 209.012466 +epoch: 0, batch: 2842, sum loss: 8709.957031, avg loss: 5.014368, ppl: 150.560959 +epoch: 0, batch: 2843, sum loss: 7104.454102, avg loss: 5.020815, ppl: 151.534805 +epoch: 0, batch: 2844, sum loss: 9198.399414, avg loss: 5.360373, ppl: 212.804214 +epoch: 0, batch: 2845, sum loss: 8833.946289, avg loss: 5.172100, ppl: 176.284576 +epoch: 0, batch: 2846, sum loss: 7266.808594, avg loss: 5.246793, ppl: 189.956146 +epoch: 0, batch: 2847, sum loss: 7974.626953, avg loss: 5.144921, ppl: 171.557907 +epoch: 0, batch: 2848, sum loss: 8751.183594, avg loss: 5.035203, ppl: 153.730865 +epoch: 0, batch: 2849, sum loss: 7117.784180, avg loss: 4.956674, ppl: 142.120331 +epoch: 0, batch: 2850, sum loss: 7675.497559, avg loss: 5.103389, ppl: 164.578766 +epoch: 0, batch: 2851, sum loss: 7957.119629, avg loss: 5.077932, ppl: 160.441971 +epoch: 0, batch: 2852, sum loss: 8777.505859, avg loss: 5.221598, ppl: 185.229889 +epoch: 0, batch: 2853, sum loss: 8092.737793, avg loss: 5.099394, ppl: 163.922516 +epoch: 0, batch: 2854, sum loss: 9761.686523, avg loss: 5.302383, ppl: 200.814774 +epoch: 0, batch: 2855, sum loss: 8753.416992, avg loss: 5.204172, ppl: 182.030029 +epoch: 0, batch: 2856, sum loss: 10567.953125, avg loss: 5.302536, ppl: 200.845413 +epoch: 0, batch: 2857, sum loss: 8601.837891, avg loss: 5.339440, ppl: 208.396042 +epoch: 0, batch: 2858, sum loss: 7627.525879, avg loss: 5.146779, ppl: 171.876999 +epoch: 0, batch: 2859, sum loss: 9114.681641, avg loss: 5.223313, ppl: 185.547943 +epoch: 0, batch: 2860, sum loss: 7241.625977, avg loss: 5.139550, ppl: 170.638992 +epoch: 0, batch: 2861, sum loss: 9926.418945, avg loss: 5.252074, ppl: 190.961868 +epoch: 0, batch: 2862, sum loss: 8824.011719, avg loss: 5.187543, ppl: 179.028214 +epoch: 0, batch: 2863, sum loss: 8910.405273, avg loss: 5.278677, ppl: 196.110153 +epoch: 0, batch: 2864, sum loss: 9859.822266, avg loss: 5.321005, ppl: 204.589371 +epoch: 0, batch: 2865, sum loss: 9514.485352, avg loss: 5.279959, ppl: 196.361771 +epoch: 0, batch: 2866, sum loss: 8878.257812, avg loss: 5.140856, ppl: 170.861923 +epoch: 0, batch: 2867, sum loss: 7876.571289, avg loss: 5.029739, ppl: 152.893082 +epoch: 0, batch: 2868, sum loss: 8328.085938, avg loss: 5.318062, ppl: 203.988235 +epoch: 0, batch: 2869, sum loss: 7964.363281, avg loss: 4.829814, ppl: 125.187668 +epoch: 0, batch: 2870, sum loss: 8735.508789, avg loss: 5.090623, ppl: 162.491043 +epoch: 0, batch: 2871, sum loss: 9611.940430, avg loss: 5.204083, ppl: 182.013885 +epoch: 0, batch: 2872, sum loss: 8709.047852, avg loss: 5.141114, ppl: 170.906097 +epoch: 0, batch: 2873, sum loss: 8082.592773, avg loss: 5.158005, ppl: 173.817307 +epoch: 0, batch: 2874, sum loss: 7825.769043, avg loss: 5.179199, ppl: 177.540497 +epoch: 0, batch: 2875, sum loss: 10159.123047, avg loss: 5.302257, ppl: 200.789398 +epoch: 0, batch: 2876, sum loss: 8384.051758, avg loss: 5.017386, ppl: 151.016022 +epoch: 0, batch: 2877, sum loss: 8133.865723, avg loss: 4.929615, ppl: 138.326309 +epoch: 0, batch: 2878, sum loss: 9446.776367, avg loss: 5.184839, ppl: 178.544662 +epoch: 0, batch: 2879, sum loss: 8762.332031, avg loss: 5.187882, ppl: 179.088837 +epoch: 0, batch: 2880, sum loss: 8408.251953, avg loss: 5.161603, ppl: 174.443939 +epoch: 0, batch: 2881, sum loss: 7956.906738, avg loss: 5.187032, ppl: 178.936630 +epoch: 0, batch: 2882, sum loss: 8082.624023, avg loss: 5.167918, ppl: 175.548996 +epoch: 0, batch: 2883, sum loss: 8678.291016, avg loss: 5.199695, ppl: 181.216980 +epoch: 0, batch: 2884, sum loss: 9037.047852, avg loss: 5.214684, ppl: 183.953690 +epoch: 0, batch: 2885, sum loss: 8082.336914, avg loss: 5.026329, ppl: 152.372635 +epoch: 0, batch: 2886, sum loss: 8757.290039, avg loss: 5.194122, ppl: 180.209900 +epoch: 0, batch: 2887, sum loss: 7359.286133, avg loss: 5.064891, ppl: 158.363159 +epoch: 0, batch: 2888, sum loss: 8624.250000, avg loss: 5.317047, ppl: 203.781158 +epoch: 0, batch: 2889, sum loss: 7824.057129, avg loss: 5.064115, ppl: 158.240341 +epoch: 0, batch: 2890, sum loss: 7851.086914, avg loss: 5.068487, ppl: 158.933701 +epoch: 0, batch: 2891, sum loss: 8087.900879, avg loss: 5.221369, ppl: 185.187576 +epoch: 0, batch: 2892, sum loss: 9240.510742, avg loss: 5.142188, ppl: 171.089630 +epoch: 0, batch: 2893, sum loss: 9177.811523, avg loss: 5.176431, ppl: 177.049744 +epoch: 0, batch: 2894, sum loss: 10132.512695, avg loss: 5.263643, ppl: 193.183945 +epoch: 0, batch: 2895, sum loss: 8730.106445, avg loss: 5.105325, ppl: 164.897690 +epoch: 0, batch: 2896, sum loss: 7408.013672, avg loss: 5.098426, ppl: 163.763992 +epoch: 0, batch: 2897, sum loss: 8648.466797, avg loss: 5.072415, ppl: 159.559174 +epoch: 0, batch: 2898, sum loss: 8162.275391, avg loss: 5.104612, ppl: 164.780182 +epoch: 0, batch: 2899, sum loss: 9380.580078, avg loss: 5.234699, ppl: 187.672562 +epoch: 0, batch: 2900, sum loss: 8232.032227, avg loss: 5.157915, ppl: 173.801727 +epoch: 0, batch: 2901, sum loss: 9120.596680, avg loss: 5.287302, ppl: 197.809113 +epoch: 0, batch: 2902, sum loss: 7701.726074, avg loss: 5.027236, ppl: 152.510956 +epoch: 0, batch: 2903, sum loss: 8886.908203, avg loss: 5.072436, ppl: 159.562607 +epoch: 0, batch: 2904, sum loss: 8198.662109, avg loss: 5.146681, ppl: 171.860107 +epoch: 0, batch: 2905, sum loss: 7717.440918, avg loss: 5.083953, ppl: 161.410919 +epoch: 0, batch: 2906, sum loss: 8509.716797, avg loss: 5.071345, ppl: 159.388535 +epoch: 0, batch: 2907, sum loss: 8537.746094, avg loss: 5.161878, ppl: 174.491867 +epoch: 0, batch: 2908, sum loss: 9882.694336, avg loss: 5.256752, ppl: 191.857330 +epoch: 0, batch: 2909, sum loss: 7669.018555, avg loss: 5.206394, ppl: 182.435043 +epoch: 0, batch: 2910, sum loss: 9301.222656, avg loss: 5.254928, ppl: 191.507721 +epoch: 0, batch: 2911, sum loss: 8144.879395, avg loss: 5.109711, ppl: 165.622513 +epoch: 0, batch: 2912, sum loss: 8458.515625, avg loss: 5.173404, ppl: 176.514633 +epoch: 0, batch: 2913, sum loss: 8551.126953, avg loss: 5.163724, ppl: 174.814240 +epoch: 0, batch: 2914, sum loss: 8186.359375, avg loss: 5.174690, ppl: 176.741776 +epoch: 0, batch: 2915, sum loss: 8868.462891, avg loss: 5.174132, ppl: 176.643280 +epoch: 0, batch: 2916, sum loss: 9568.669922, avg loss: 5.166668, ppl: 175.329742 +epoch: 0, batch: 2917, sum loss: 10538.951172, avg loss: 5.328084, ppl: 206.042816 +epoch: 0, batch: 2918, sum loss: 7318.996094, avg loss: 5.125348, ppl: 168.232605 +epoch: 0, batch: 2919, sum loss: 7860.784180, avg loss: 5.045433, ppl: 155.311539 +epoch: 0, batch: 2920, sum loss: 9148.156250, avg loss: 5.278797, ppl: 196.133820 +epoch: 0, batch: 2921, sum loss: 9408.443359, avg loss: 5.256114, ppl: 191.734863 +epoch: 0, batch: 2922, sum loss: 9441.519531, avg loss: 5.089768, ppl: 162.352188 +epoch: 0, batch: 2923, sum loss: 8700.629883, avg loss: 5.121030, ppl: 167.507874 +epoch: 0, batch: 2924, sum loss: 9066.746094, avg loss: 5.076566, ppl: 160.222870 +epoch: 0, batch: 2925, sum loss: 8656.203125, avg loss: 5.317078, ppl: 203.787567 +epoch: 0, batch: 2926, sum loss: 7616.050781, avg loss: 5.132110, ppl: 169.374054 +epoch: 0, batch: 2927, sum loss: 9439.619141, avg loss: 5.075064, ppl: 159.982452 +epoch: 0, batch: 2928, sum loss: 9100.533203, avg loss: 5.275671, ppl: 195.521729 +epoch: 0, batch: 2929, sum loss: 8573.621094, avg loss: 5.221450, ppl: 185.202591 +epoch: 0, batch: 2930, sum loss: 9151.936523, avg loss: 5.247670, ppl: 190.122711 +epoch: 0, batch: 2931, sum loss: 7485.025391, avg loss: 4.960256, ppl: 142.630249 +epoch: 0, batch: 2932, sum loss: 8311.792969, avg loss: 5.105524, ppl: 164.930481 +epoch: 0, batch: 2933, sum loss: 7799.097656, avg loss: 4.917464, ppl: 136.655579 +epoch: 0, batch: 2934, sum loss: 8896.263672, avg loss: 5.276550, ppl: 195.693619 +epoch: 0, batch: 2935, sum loss: 7773.614258, avg loss: 5.090775, ppl: 162.515839 +epoch: 0, batch: 2936, sum loss: 7877.222656, avg loss: 5.039810, ppl: 154.440628 +epoch: 0, batch: 2937, sum loss: 8144.040039, avg loss: 5.213854, ppl: 183.801117 +epoch: 0, batch: 2938, sum loss: 8988.104492, avg loss: 5.183451, ppl: 178.297089 +epoch: 0, batch: 2939, sum loss: 9104.796875, avg loss: 5.268980, ppl: 194.217667 +epoch: 0, batch: 2940, sum loss: 9007.031250, avg loss: 5.138067, ppl: 170.386139 +epoch: 0, batch: 2941, sum loss: 9142.807617, avg loss: 5.260534, ppl: 192.584274 +epoch: 0, batch: 2942, sum loss: 8736.981445, avg loss: 5.121325, ppl: 167.557312 +epoch: 0, batch: 2943, sum loss: 8745.877930, avg loss: 5.175076, ppl: 176.809967 +epoch: 0, batch: 2944, sum loss: 7124.598633, avg loss: 5.027946, ppl: 152.619141 +epoch: 0, batch: 2945, sum loss: 7601.310547, avg loss: 5.153431, ppl: 173.024109 +epoch: 0, batch: 2946, sum loss: 10411.900391, avg loss: 5.229483, ppl: 186.696198 +epoch: 0, batch: 2947, sum loss: 8091.540527, avg loss: 5.076249, ppl: 160.172058 +epoch: 0, batch: 2948, sum loss: 7799.428711, avg loss: 4.961469, ppl: 142.803375 +epoch: 0, batch: 2949, sum loss: 8340.417969, avg loss: 5.161150, ppl: 174.364777 +epoch: 0, batch: 2950, sum loss: 9001.469727, avg loss: 5.221271, ppl: 185.169296 +epoch: 0, batch: 2951, sum loss: 7911.680176, avg loss: 4.847844, ppl: 127.465233 +epoch: 0, batch: 2952, sum loss: 8133.571289, avg loss: 5.093031, ppl: 162.882874 +epoch: 0, batch: 2953, sum loss: 9023.500000, avg loss: 5.286175, ppl: 197.586166 +epoch: 0, batch: 2954, sum loss: 8261.046875, avg loss: 5.166383, ppl: 175.279755 +epoch: 0, batch: 2955, sum loss: 9148.625000, avg loss: 5.099568, ppl: 163.951126 +epoch: 0, batch: 2956, sum loss: 9365.826172, avg loss: 5.154555, ppl: 173.218674 +epoch: 0, batch: 2957, sum loss: 8481.465820, avg loss: 4.974467, ppl: 144.671661 +epoch: 0, batch: 2958, sum loss: 7854.299805, avg loss: 5.157124, ppl: 173.664291 +epoch: 0, batch: 2959, sum loss: 9605.561523, avg loss: 5.098494, ppl: 163.775009 +epoch: 0, batch: 2960, sum loss: 9110.741211, avg loss: 5.138602, ppl: 170.477325 +epoch: 0, batch: 2961, sum loss: 9012.880859, avg loss: 5.071964, ppl: 159.487289 +epoch: 0, batch: 2962, sum loss: 9275.467773, avg loss: 5.210937, ppl: 183.265701 +epoch: 0, batch: 2963, sum loss: 9539.487305, avg loss: 5.338269, ppl: 208.152130 +epoch: 0, batch: 2964, sum loss: 9328.117188, avg loss: 5.237573, ppl: 188.212784 +epoch: 0, batch: 2965, sum loss: 8159.345215, avg loss: 5.213639, ppl: 183.761597 +epoch: 0, batch: 2966, sum loss: 9383.601562, avg loss: 5.152994, ppl: 172.948471 +epoch: 0, batch: 2967, sum loss: 8782.974609, avg loss: 5.041891, ppl: 154.762405 +epoch: 0, batch: 2968, sum loss: 9381.429688, avg loss: 5.154632, ppl: 173.232056 +epoch: 0, batch: 2969, sum loss: 10279.618164, avg loss: 5.170834, ppl: 176.061630 +epoch: 0, batch: 2970, sum loss: 8368.089844, avg loss: 5.093177, ppl: 162.906570 +epoch: 0, batch: 2971, sum loss: 7888.586426, avg loss: 5.213871, ppl: 183.804184 +epoch: 0, batch: 2972, sum loss: 8506.927734, avg loss: 5.161971, ppl: 174.508011 +epoch: 0, batch: 2973, sum loss: 8584.776367, avg loss: 5.253841, ppl: 191.299713 +epoch: 0, batch: 2974, sum loss: 9038.223633, avg loss: 5.071955, ppl: 159.485855 +epoch: 0, batch: 2975, sum loss: 7994.154297, avg loss: 5.134332, ppl: 169.750839 +epoch: 0, batch: 2976, sum loss: 10336.502930, avg loss: 5.170837, ppl: 176.062134 +epoch: 0, batch: 2977, sum loss: 8324.006836, avg loss: 4.931284, ppl: 138.557297 +epoch: 0, batch: 2978, sum loss: 9198.843750, avg loss: 5.197087, ppl: 180.744934 +epoch: 0, batch: 2979, sum loss: 7194.324707, avg loss: 5.190711, ppl: 179.596115 +epoch: 0, batch: 2980, sum loss: 7770.833984, avg loss: 4.850708, ppl: 127.830803 +epoch: 0, batch: 2981, sum loss: 6761.723633, avg loss: 4.957275, ppl: 142.205811 +epoch: 0, batch: 2982, sum loss: 7925.221680, avg loss: 5.106458, ppl: 165.084534 +epoch: 0, batch: 2983, sum loss: 9123.006836, avg loss: 5.186474, ppl: 178.836838 +epoch: 0, batch: 2984, sum loss: 8438.376953, avg loss: 5.028830, ppl: 152.754196 +epoch: 0, batch: 2985, sum loss: 8138.954102, avg loss: 5.027149, ppl: 152.497650 +epoch: 0, batch: 2986, sum loss: 8972.956055, avg loss: 5.262731, ppl: 193.007889 +epoch: 0, batch: 2987, sum loss: 6754.922852, avg loss: 4.756988, ppl: 116.394821 +epoch: 0, batch: 2988, sum loss: 7779.083496, avg loss: 5.015528, ppl: 150.735657 +epoch: 0, batch: 2989, sum loss: 9149.189453, avg loss: 5.282442, ppl: 196.850021 +epoch: 0, batch: 2990, sum loss: 7276.787598, avg loss: 5.117291, ppl: 166.882660 +epoch: 0, batch: 2991, sum loss: 6570.245117, avg loss: 5.030816, ppl: 153.057800 +epoch: 0, batch: 2992, sum loss: 8118.690430, avg loss: 5.067847, ppl: 158.831955 +epoch: 0, batch: 2993, sum loss: 7125.483887, avg loss: 5.057121, ppl: 157.137512 +epoch: 0, batch: 2994, sum loss: 8252.171875, avg loss: 4.935510, ppl: 139.144043 +epoch: 0, batch: 2995, sum loss: 8397.784180, avg loss: 4.922499, ppl: 137.345428 +epoch: 0, batch: 2996, sum loss: 9427.375977, avg loss: 5.065758, ppl: 158.500580 +epoch: 0, batch: 2997, sum loss: 8701.007812, avg loss: 5.166870, ppl: 175.365021 +epoch: 0, batch: 2998, sum loss: 7839.000977, avg loss: 4.939509, ppl: 139.701691 +epoch: 0, batch: 2999, sum loss: 7773.243164, avg loss: 5.027971, ppl: 152.623062 +epoch: 0, batch: 3000, sum loss: 9535.439453, avg loss: 5.436396, ppl: 229.613190 +epoch: 0, batch: 3001, sum loss: 9737.160156, avg loss: 5.146491, ppl: 171.827408 +epoch: 0, batch: 3002, sum loss: 9332.834961, avg loss: 5.179154, ppl: 177.532547 +epoch: 0, batch: 3003, sum loss: 7260.947266, avg loss: 4.980073, ppl: 145.485062 +epoch: 0, batch: 3004, sum loss: 7825.766602, avg loss: 5.084969, ppl: 161.574860 +epoch: 0, batch: 3005, sum loss: 7953.779297, avg loss: 5.239644, ppl: 188.602951 +epoch: 0, batch: 3006, sum loss: 9487.215820, avg loss: 5.136555, ppl: 170.128616 +epoch: 0, batch: 3007, sum loss: 7159.612793, avg loss: 5.041981, ppl: 154.776352 +epoch: 0, batch: 3008, sum loss: 7993.584961, avg loss: 4.877111, ppl: 131.250931 +epoch: 0, batch: 3009, sum loss: 7159.423340, avg loss: 4.907076, ppl: 135.243362 +epoch: 0, batch: 3010, sum loss: 9696.153320, avg loss: 5.371830, ppl: 215.256424 +epoch: 0, batch: 3011, sum loss: 8777.718750, avg loss: 5.303758, ppl: 201.091019 +epoch: 0, batch: 3012, sum loss: 8913.047852, avg loss: 5.194084, ppl: 180.203033 +epoch: 0, batch: 3013, sum loss: 7800.312988, avg loss: 5.193284, ppl: 180.058899 +epoch: 0, batch: 3014, sum loss: 8927.394531, avg loss: 5.095545, ppl: 163.292862 +epoch: 0, batch: 3015, sum loss: 7795.063965, avg loss: 5.088162, ppl: 162.091660 +epoch: 0, batch: 3016, sum loss: 9347.853516, avg loss: 5.335533, ppl: 207.583389 +epoch: 0, batch: 3017, sum loss: 8333.990234, avg loss: 5.195755, ppl: 180.504288 +epoch: 0, batch: 3018, sum loss: 8376.382812, avg loss: 5.052101, ppl: 156.350555 +epoch: 0, batch: 3019, sum loss: 9588.292969, avg loss: 5.199725, ppl: 181.222427 +epoch: 0, batch: 3020, sum loss: 8724.577148, avg loss: 5.078334, ppl: 160.506409 +epoch: 0, batch: 3021, sum loss: 8422.841797, avg loss: 5.151585, ppl: 172.705032 +epoch: 0, batch: 3022, sum loss: 8554.600586, avg loss: 5.043986, ppl: 155.087006 +epoch: 0, batch: 3023, sum loss: 7849.983887, avg loss: 5.205560, ppl: 182.282867 +epoch: 0, batch: 3024, sum loss: 9266.795898, avg loss: 5.229569, ppl: 186.712311 +epoch: 0, batch: 3025, sum loss: 7347.684082, avg loss: 5.063876, ppl: 158.202545 +epoch: 0, batch: 3026, sum loss: 9817.452148, avg loss: 5.175251, ppl: 176.841080 +epoch: 0, batch: 3027, sum loss: 8705.779297, avg loss: 5.106029, ppl: 165.013794 +epoch: 0, batch: 3028, sum loss: 7542.047852, avg loss: 5.123674, ppl: 167.951279 +epoch: 0, batch: 3029, sum loss: 9080.059570, avg loss: 5.272973, ppl: 194.994827 +epoch: 0, batch: 3030, sum loss: 6986.597656, avg loss: 5.019107, ppl: 151.276199 +epoch: 0, batch: 3031, sum loss: 9560.059570, avg loss: 5.187227, ppl: 178.971619 +epoch: 0, batch: 3032, sum loss: 8588.906250, avg loss: 4.874521, ppl: 130.911469 +epoch: 0, batch: 3033, sum loss: 8294.544922, avg loss: 4.966794, ppl: 143.565811 +epoch: 0, batch: 3034, sum loss: 7733.070801, avg loss: 4.969840, ppl: 144.003845 +epoch: 0, batch: 3035, sum loss: 8805.111328, avg loss: 5.002904, ppl: 148.844772 +epoch: 0, batch: 3036, sum loss: 8238.858398, avg loss: 5.066948, ppl: 158.689331 +epoch: 0, batch: 3037, sum loss: 8532.607422, avg loss: 5.199639, ppl: 181.206787 +epoch: 0, batch: 3038, sum loss: 8374.920898, avg loss: 4.883336, ppl: 132.070465 +epoch: 0, batch: 3039, sum loss: 9413.458008, avg loss: 5.116010, ppl: 166.668991 +epoch: 0, batch: 3040, sum loss: 9110.217773, avg loss: 5.080992, ppl: 160.933578 +epoch: 0, batch: 3041, sum loss: 8793.334961, avg loss: 5.121336, ppl: 167.559158 +epoch: 0, batch: 3042, sum loss: 8856.906250, avg loss: 5.194666, ppl: 180.307983 +epoch: 0, batch: 3043, sum loss: 9512.108398, avg loss: 5.299225, ppl: 200.181580 +epoch: 0, batch: 3044, sum loss: 10208.330078, avg loss: 5.111833, ppl: 165.974319 +epoch: 0, batch: 3045, sum loss: 8548.710938, avg loss: 5.228569, ppl: 186.525696 +epoch: 0, batch: 3046, sum loss: 7770.485352, avg loss: 4.936776, ppl: 139.320312 +epoch: 0, batch: 3047, sum loss: 8665.333984, avg loss: 5.103260, ppl: 164.557419 +epoch: 0, batch: 3048, sum loss: 8106.776367, avg loss: 5.089000, ppl: 162.227509 +epoch: 0, batch: 3049, sum loss: 6873.364258, avg loss: 5.160183, ppl: 174.196411 +epoch: 0, batch: 3050, sum loss: 8904.642578, avg loss: 5.300383, ppl: 200.413483 +epoch: 0, batch: 3051, sum loss: 10251.041992, avg loss: 5.164253, ppl: 174.906708 +epoch: 0, batch: 3052, sum loss: 8829.944336, avg loss: 5.136675, ppl: 170.149139 +epoch: 0, batch: 3053, sum loss: 7231.078125, avg loss: 4.776142, ppl: 118.645691 +epoch: 0, batch: 3054, sum loss: 8978.734375, avg loss: 5.107358, ppl: 165.233154 +epoch: 0, batch: 3055, sum loss: 7983.708008, avg loss: 5.101411, ppl: 164.253479 +epoch: 0, batch: 3056, sum loss: 9402.832031, avg loss: 5.288432, ppl: 198.032684 +epoch: 0, batch: 3057, sum loss: 9001.609375, avg loss: 5.111646, ppl: 165.943298 +epoch: 0, batch: 3058, sum loss: 8535.740234, avg loss: 5.068729, ppl: 158.972214 +epoch: 0, batch: 3059, sum loss: 8578.140625, avg loss: 5.004750, ppl: 149.119766 +epoch: 0, batch: 3060, sum loss: 8914.896484, avg loss: 5.195161, ppl: 180.397247 +epoch: 0, batch: 3061, sum loss: 8350.992188, avg loss: 5.088965, ppl: 162.221939 +epoch: 0, batch: 3062, sum loss: 7812.444336, avg loss: 5.170380, ppl: 175.981720 +epoch: 0, batch: 3063, sum loss: 8288.400391, avg loss: 5.157686, ppl: 173.761856 +epoch: 0, batch: 3064, sum loss: 8571.981445, avg loss: 5.015788, ppl: 150.774918 +epoch: 0, batch: 3065, sum loss: 7334.202148, avg loss: 5.030317, ppl: 152.981476 +epoch: 0, batch: 3066, sum loss: 9183.554688, avg loss: 5.004662, ppl: 149.106689 +epoch: 0, batch: 3067, sum loss: 8589.238281, avg loss: 5.097471, ppl: 163.607574 +epoch: 0, batch: 3068, sum loss: 8827.310547, avg loss: 5.254352, ppl: 191.397354 +epoch: 0, batch: 3069, sum loss: 8671.938477, avg loss: 5.056524, ppl: 157.043655 +epoch: 0, batch: 3070, sum loss: 8555.638672, avg loss: 5.012091, ppl: 150.218460 +epoch: 0, batch: 3071, sum loss: 8688.256836, avg loss: 5.066039, ppl: 158.545029 +epoch: 0, batch: 3072, sum loss: 8721.662109, avg loss: 5.154647, ppl: 173.234619 +epoch: 0, batch: 3073, sum loss: 7899.983398, avg loss: 5.146569, ppl: 171.840851 +epoch: 0, batch: 3074, sum loss: 8317.378906, avg loss: 5.307836, ppl: 201.912827 +epoch: 0, batch: 3075, sum loss: 9850.000000, avg loss: 5.327204, ppl: 205.861526 +epoch: 0, batch: 3076, sum loss: 9014.399414, avg loss: 4.972090, ppl: 144.328247 +epoch: 0, batch: 3077, sum loss: 8490.209961, avg loss: 5.102290, ppl: 164.397888 +epoch: 0, batch: 3078, sum loss: 8978.192383, avg loss: 5.265802, ppl: 193.601593 +epoch: 0, batch: 3079, sum loss: 6500.202148, avg loss: 5.027225, ppl: 152.509216 +epoch: 0, batch: 3080, sum loss: 8879.639648, avg loss: 5.168591, ppl: 175.667145 +epoch: 0, batch: 3081, sum loss: 7649.138672, avg loss: 4.819873, ppl: 123.949387 +epoch: 0, batch: 3082, sum loss: 7517.343262, avg loss: 4.890920, ppl: 133.075912 +epoch: 0, batch: 3083, sum loss: 7569.913086, avg loss: 4.883815, ppl: 132.133835 +epoch: 0, batch: 3084, sum loss: 7775.036133, avg loss: 4.942807, ppl: 140.163162 +epoch: 0, batch: 3085, sum loss: 7741.994629, avg loss: 4.953291, ppl: 141.640396 +epoch: 0, batch: 3086, sum loss: 6735.377930, avg loss: 4.797278, ppl: 121.180107 +epoch: 0, batch: 3087, sum loss: 9315.599609, avg loss: 5.332341, ppl: 206.921753 +epoch: 0, batch: 3088, sum loss: 9914.704102, avg loss: 5.265377, ppl: 193.519165 +epoch: 0, batch: 3089, sum loss: 8747.546875, avg loss: 5.030217, ppl: 152.966156 +epoch: 0, batch: 3090, sum loss: 8547.995117, avg loss: 5.143198, ppl: 171.262589 +epoch: 0, batch: 3091, sum loss: 9115.541992, avg loss: 5.208881, ppl: 182.889267 +epoch: 0, batch: 3092, sum loss: 7420.214844, avg loss: 5.061538, ppl: 157.833115 +epoch: 0, batch: 3093, sum loss: 8676.981445, avg loss: 5.233403, ppl: 187.429489 +epoch: 0, batch: 3094, sum loss: 9113.231445, avg loss: 5.270811, ppl: 194.573624 +epoch: 0, batch: 3095, sum loss: 8007.530273, avg loss: 5.001581, ppl: 148.647949 +epoch: 0, batch: 3096, sum loss: 10146.589844, avg loss: 5.086010, ppl: 161.743210 +epoch: 0, batch: 3097, sum loss: 8689.924805, avg loss: 5.017278, ppl: 150.999680 +epoch: 0, batch: 3098, sum loss: 7733.913574, avg loss: 4.882521, ppl: 131.962936 +epoch: 0, batch: 3099, sum loss: 7852.402344, avg loss: 5.011106, ppl: 150.070618 +epoch: 0, batch: 3100, sum loss: 8893.217773, avg loss: 5.191604, ppl: 179.756683 +epoch: 0, batch: 3101, sum loss: 9196.031250, avg loss: 4.987002, ppl: 146.496552 +epoch: 0, batch: 3102, sum loss: 8910.366211, avg loss: 5.112086, ppl: 166.016357 +epoch: 0, batch: 3103, sum loss: 7769.627441, avg loss: 4.996545, ppl: 147.901321 +epoch: 0, batch: 3104, sum loss: 9229.188477, avg loss: 5.196615, ppl: 180.659714 +epoch: 0, batch: 3105, sum loss: 9720.839844, avg loss: 5.192757, ppl: 179.963974 +epoch: 0, batch: 3106, sum loss: 9333.943359, avg loss: 5.282368, ppl: 196.835373 +epoch: 0, batch: 3107, sum loss: 9225.811523, avg loss: 5.156966, ppl: 173.636795 +epoch: 0, batch: 3108, sum loss: 8212.811523, avg loss: 5.047825, ppl: 155.683456 +epoch: 0, batch: 3109, sum loss: 9928.142578, avg loss: 5.410432, ppl: 223.728287 +epoch: 0, batch: 3110, sum loss: 9217.117188, avg loss: 5.236998, ppl: 188.104584 +epoch: 0, batch: 3111, sum loss: 7487.432617, avg loss: 5.052249, ppl: 156.373749 +epoch: 0, batch: 3112, sum loss: 8162.645020, avg loss: 5.146687, ppl: 171.861099 +epoch: 0, batch: 3113, sum loss: 8342.542969, avg loss: 5.108722, ppl: 165.458725 +epoch: 0, batch: 3114, sum loss: 8436.990234, avg loss: 5.119533, ppl: 167.257248 +epoch: 0, batch: 3115, sum loss: 8041.268066, avg loss: 4.858772, ppl: 128.865891 +epoch: 0, batch: 3116, sum loss: 8882.340820, avg loss: 5.035341, ppl: 153.751984 +epoch: 0, batch: 3117, sum loss: 8100.989258, avg loss: 4.827765, ppl: 124.931366 +epoch: 0, batch: 3118, sum loss: 8017.437500, avg loss: 4.992177, ppl: 147.256729 +epoch: 0, batch: 3119, sum loss: 8909.125000, avg loss: 5.234503, ppl: 187.635880 +epoch: 0, batch: 3120, sum loss: 8241.150391, avg loss: 5.176602, ppl: 177.080048 +epoch: 0, batch: 3121, sum loss: 9469.626953, avg loss: 5.074827, ppl: 159.944473 +epoch: 0, batch: 3122, sum loss: 7838.451172, avg loss: 4.892916, ppl: 133.341797 +epoch: 0, batch: 3123, sum loss: 7021.042969, avg loss: 4.944396, ppl: 140.386108 +epoch: 0, batch: 3124, sum loss: 9318.400391, avg loss: 4.980439, ppl: 145.538223 +epoch: 0, batch: 3125, sum loss: 10420.230469, avg loss: 5.100455, ppl: 164.096603 +epoch: 0, batch: 3126, sum loss: 7859.030762, avg loss: 5.197772, ppl: 180.868820 +epoch: 0, batch: 3127, sum loss: 8637.328125, avg loss: 5.024624, ppl: 152.113037 +epoch: 0, batch: 3128, sum loss: 6792.701660, avg loss: 4.908021, ppl: 135.371307 +epoch: 0, batch: 3129, sum loss: 7693.526855, avg loss: 5.031738, ppl: 153.199005 +epoch: 0, batch: 3130, sum loss: 7796.279785, avg loss: 4.978467, ppl: 145.251541 +epoch: 0, batch: 3131, sum loss: 9218.524414, avg loss: 5.054016, ppl: 156.650253 +epoch: 0, batch: 3132, sum loss: 9569.700195, avg loss: 5.361177, ppl: 212.975464 +epoch: 0, batch: 3133, sum loss: 9242.350586, avg loss: 5.134639, ppl: 169.803055 +epoch: 0, batch: 3134, sum loss: 8466.037109, avg loss: 5.137158, ppl: 170.231354 +epoch: 0, batch: 3135, sum loss: 7960.300293, avg loss: 5.175748, ppl: 176.928879 +epoch: 0, batch: 3136, sum loss: 8116.203613, avg loss: 4.883396, ppl: 132.078400 +epoch: 0, batch: 3137, sum loss: 7956.183594, avg loss: 4.985077, ppl: 146.214890 +epoch: 0, batch: 3138, sum loss: 8622.821289, avg loss: 5.063313, ppl: 158.113403 +epoch: 0, batch: 3139, sum loss: 8493.564453, avg loss: 5.007998, ppl: 149.604919 +epoch: 0, batch: 3140, sum loss: 9362.621094, avg loss: 5.082856, ppl: 161.233917 +epoch: 0, batch: 3141, sum loss: 9697.401367, avg loss: 5.258894, ppl: 192.268814 +epoch: 0, batch: 3142, sum loss: 7888.644531, avg loss: 4.839659, ppl: 126.426262 +epoch: 0, batch: 3143, sum loss: 7864.395508, avg loss: 5.034824, ppl: 153.672607 +epoch: 0, batch: 3144, sum loss: 7854.027344, avg loss: 4.983520, ppl: 145.987289 +epoch: 0, batch: 3145, sum loss: 8134.807129, avg loss: 5.258440, ppl: 192.181549 +epoch: 0, batch: 3146, sum loss: 10766.069336, avg loss: 5.153695, ppl: 173.069824 +epoch: 0, batch: 3147, sum loss: 9139.612305, avg loss: 5.077562, ppl: 160.382614 +epoch: 0, batch: 3148, sum loss: 7513.842773, avg loss: 5.199891, ppl: 181.252502 +epoch: 0, batch: 3149, sum loss: 10554.749023, avg loss: 5.256349, ppl: 191.780045 +epoch: 0, batch: 3150, sum loss: 10250.400391, avg loss: 5.161329, ppl: 174.396118 +epoch: 0, batch: 3151, sum loss: 8206.066406, avg loss: 5.037487, ppl: 154.082321 +epoch: 0, batch: 3152, sum loss: 7515.417969, avg loss: 4.915251, ppl: 136.353500 +epoch: 0, batch: 3153, sum loss: 8310.910156, avg loss: 4.973615, ppl: 144.548508 +epoch: 0, batch: 3154, sum loss: 8047.593750, avg loss: 5.103103, ppl: 164.531677 +epoch: 0, batch: 3155, sum loss: 7204.427734, avg loss: 5.020507, ppl: 151.488068 +epoch: 0, batch: 3156, sum loss: 8239.770508, avg loss: 5.033458, ppl: 153.462799 +epoch: 0, batch: 3157, sum loss: 7588.639648, avg loss: 4.982692, ppl: 145.866486 +epoch: 0, batch: 3158, sum loss: 9961.513672, avg loss: 5.193698, ppl: 180.133530 +epoch: 0, batch: 3159, sum loss: 7774.698242, avg loss: 4.986978, ppl: 146.493057 +epoch: 0, batch: 3160, sum loss: 8759.602539, avg loss: 5.045854, ppl: 155.376953 +epoch: 0, batch: 3161, sum loss: 7612.302734, avg loss: 5.051296, ppl: 156.224762 +epoch: 0, batch: 3162, sum loss: 9284.931641, avg loss: 5.266552, ppl: 193.746857 +epoch: 0, batch: 3163, sum loss: 8638.474609, avg loss: 5.075484, ppl: 160.049683 +epoch: 0, batch: 3164, sum loss: 9379.208008, avg loss: 5.119655, ppl: 167.277664 +epoch: 0, batch: 3165, sum loss: 8455.275391, avg loss: 5.118206, ppl: 167.035370 +epoch: 0, batch: 3166, sum loss: 8972.083008, avg loss: 4.937855, ppl: 139.470795 +epoch: 0, batch: 3167, sum loss: 9370.218750, avg loss: 5.137181, ppl: 170.235245 +epoch: 0, batch: 3168, sum loss: 7429.113770, avg loss: 5.023065, ppl: 151.876099 +epoch: 0, batch: 3169, sum loss: 9097.893555, avg loss: 5.128463, ppl: 168.757507 +epoch: 0, batch: 3170, sum loss: 9338.931641, avg loss: 4.951714, ppl: 141.417084 +epoch: 0, batch: 3171, sum loss: 7435.401855, avg loss: 4.907856, ppl: 135.348907 +epoch: 0, batch: 3172, sum loss: 9509.204102, avg loss: 5.201972, ppl: 181.629974 +epoch: 0, batch: 3173, sum loss: 9179.163086, avg loss: 5.321254, ppl: 204.640396 +epoch: 0, batch: 3174, sum loss: 9040.520508, avg loss: 5.096122, ppl: 163.387024 +epoch: 0, batch: 3175, sum loss: 10051.115234, avg loss: 5.278947, ppl: 196.163269 +epoch: 0, batch: 3176, sum loss: 9426.073242, avg loss: 5.179162, ppl: 177.533890 +epoch: 0, batch: 3177, sum loss: 6367.950195, avg loss: 4.795144, ppl: 120.921745 +epoch: 0, batch: 3178, sum loss: 9363.865234, avg loss: 5.153476, ppl: 173.031860 +epoch: 0, batch: 3179, sum loss: 8644.310547, avg loss: 5.025762, ppl: 152.286270 +epoch: 0, batch: 3180, sum loss: 7808.415527, avg loss: 5.116917, ppl: 166.820297 +epoch: 0, batch: 3181, sum loss: 8904.650391, avg loss: 5.150174, ppl: 172.461441 +epoch: 0, batch: 3182, sum loss: 9962.691406, avg loss: 5.257357, ppl: 191.973465 +epoch: 0, batch: 3183, sum loss: 8559.487305, avg loss: 5.171896, ppl: 176.248688 +epoch: 0, batch: 3184, sum loss: 9113.481445, avg loss: 4.982768, ppl: 145.877548 +epoch: 0, batch: 3185, sum loss: 8979.974609, avg loss: 5.056292, ppl: 157.007263 +epoch: 0, batch: 3186, sum loss: 8540.438477, avg loss: 5.071519, ppl: 159.416351 +epoch: 0, batch: 3187, sum loss: 8371.928711, avg loss: 5.196728, ppl: 180.680130 +epoch: 0, batch: 3188, sum loss: 7438.959961, avg loss: 4.949408, ppl: 141.091354 +epoch: 0, batch: 3189, sum loss: 7449.185059, avg loss: 4.907237, ppl: 135.265106 +epoch: 0, batch: 3190, sum loss: 8684.210938, avg loss: 5.114377, ppl: 166.397003 +epoch: 0, batch: 3191, sum loss: 8613.289062, avg loss: 5.019399, ppl: 151.320358 +epoch: 0, batch: 3192, sum loss: 8337.159180, avg loss: 4.904212, ppl: 134.856537 +epoch: 0, batch: 3193, sum loss: 8631.993164, avg loss: 5.150354, ppl: 172.492523 +epoch: 0, batch: 3194, sum loss: 8524.027344, avg loss: 5.020040, ppl: 151.417358 +epoch: 0, batch: 3195, sum loss: 7660.249023, avg loss: 4.799655, ppl: 121.468498 +epoch: 0, batch: 3196, sum loss: 10152.526367, avg loss: 5.003709, ppl: 148.964691 +epoch: 0, batch: 3197, sum loss: 9559.246094, avg loss: 5.263902, ppl: 193.233963 +epoch: 0, batch: 3198, sum loss: 8677.326172, avg loss: 5.092328, ppl: 162.768280 +epoch: 0, batch: 3199, sum loss: 9201.096680, avg loss: 5.097560, ppl: 163.622253 +epoch: 0, batch: 3200, sum loss: 9758.725586, avg loss: 5.058956, ppl: 157.426025 +epoch: 0, batch: 3201, sum loss: 8680.581055, avg loss: 5.100224, ppl: 164.058655 +epoch: 0, batch: 3202, sum loss: 8570.207031, avg loss: 5.003040, ppl: 148.864990 +epoch: 0, batch: 3203, sum loss: 10214.145508, avg loss: 5.303295, ppl: 200.997940 +epoch: 0, batch: 3204, sum loss: 7775.166992, avg loss: 5.145710, ppl: 171.693268 +epoch: 0, batch: 3205, sum loss: 8062.736328, avg loss: 4.880591, ppl: 131.708527 +epoch: 0, batch: 3206, sum loss: 8936.157227, avg loss: 5.195440, ppl: 180.447571 +epoch: 0, batch: 3207, sum loss: 8978.801758, avg loss: 5.064186, ppl: 158.251587 +epoch: 0, batch: 3208, sum loss: 8397.601562, avg loss: 5.298171, ppl: 199.970734 +epoch: 0, batch: 3209, sum loss: 8994.505859, avg loss: 5.022058, ppl: 151.723160 +epoch: 0, batch: 3210, sum loss: 8505.227539, avg loss: 4.997196, ppl: 147.997543 +epoch: 0, batch: 3211, sum loss: 7651.259766, avg loss: 4.815142, ppl: 123.364288 +epoch: 0, batch: 3212, sum loss: 8818.960938, avg loss: 4.988101, ppl: 146.657593 +epoch: 0, batch: 3213, sum loss: 6759.619141, avg loss: 4.870042, ppl: 130.326431 +epoch: 0, batch: 3214, sum loss: 8007.469727, avg loss: 4.927674, ppl: 138.057983 +epoch: 0, batch: 3215, sum loss: 7615.130371, avg loss: 5.013252, ppl: 150.393051 +epoch: 0, batch: 3216, sum loss: 8105.187012, avg loss: 5.133114, ppl: 169.544235 +epoch: 0, batch: 3217, sum loss: 7813.270020, avg loss: 4.898602, ppl: 134.102173 +epoch: 0, batch: 3218, sum loss: 9132.886719, avg loss: 5.145288, ppl: 171.620895 +epoch: 0, batch: 3219, sum loss: 7813.436035, avg loss: 4.999000, ppl: 148.264755 +epoch: 0, batch: 3220, sum loss: 7475.011719, avg loss: 5.173018, ppl: 176.446625 +epoch: 0, batch: 3221, sum loss: 7647.961426, avg loss: 4.822170, ppl: 124.234360 +epoch: 0, batch: 3222, sum loss: 9203.786133, avg loss: 5.194010, ppl: 180.189713 +epoch: 0, batch: 3223, sum loss: 8416.246094, avg loss: 5.233984, ppl: 187.538467 +epoch: 0, batch: 3224, sum loss: 8803.485352, avg loss: 4.926405, ppl: 137.882919 +epoch: 0, batch: 3225, sum loss: 8930.239258, avg loss: 4.925670, ppl: 137.781647 +epoch: 0, batch: 3226, sum loss: 7829.558594, avg loss: 5.002913, ppl: 148.846039 +epoch: 0, batch: 3227, sum loss: 8082.103516, avg loss: 5.045008, ppl: 155.245575 +epoch: 0, batch: 3228, sum loss: 9245.273438, avg loss: 5.032811, ppl: 153.363541 +epoch: 0, batch: 3229, sum loss: 7832.674805, avg loss: 5.079556, ppl: 160.702606 +epoch: 0, batch: 3230, sum loss: 7424.773438, avg loss: 4.913814, ppl: 136.157745 +epoch: 0, batch: 3231, sum loss: 9013.406250, avg loss: 5.058028, ppl: 157.280090 +epoch: 0, batch: 3232, sum loss: 8704.802734, avg loss: 5.224971, ppl: 185.855835 +epoch: 0, batch: 3233, sum loss: 9321.523438, avg loss: 5.388164, ppl: 218.801208 +epoch: 0, batch: 3234, sum loss: 8720.331055, avg loss: 4.915632, ppl: 136.405457 +epoch: 0, batch: 3235, sum loss: 8295.032227, avg loss: 5.079628, ppl: 160.714188 +epoch: 0, batch: 3236, sum loss: 9355.644531, avg loss: 5.132005, ppl: 169.356293 +epoch: 0, batch: 3237, sum loss: 9441.191406, avg loss: 5.201758, ppl: 181.591263 +epoch: 0, batch: 3238, sum loss: 8014.999023, avg loss: 5.009374, ppl: 149.810944 +epoch: 0, batch: 3239, sum loss: 7846.229004, avg loss: 4.819551, ppl: 123.909439 +epoch: 0, batch: 3240, sum loss: 7956.942383, avg loss: 4.905636, ppl: 135.048813 +epoch: 0, batch: 3241, sum loss: 7105.331055, avg loss: 4.640974, ppl: 103.645256 +epoch: 0, batch: 3242, sum loss: 7858.110352, avg loss: 4.899072, ppl: 134.165176 +epoch: 0, batch: 3243, sum loss: 7496.109375, avg loss: 5.113308, ppl: 166.219299 +epoch: 0, batch: 3244, sum loss: 10212.718750, avg loss: 5.170997, ppl: 176.090256 +epoch: 0, batch: 3245, sum loss: 9293.326172, avg loss: 5.206346, ppl: 182.426178 +epoch: 0, batch: 3246, sum loss: 9049.886719, avg loss: 5.276902, ppl: 195.762405 +epoch: 0, batch: 3247, sum loss: 8149.999023, avg loss: 5.027760, ppl: 152.590836 +epoch: 0, batch: 3248, sum loss: 8433.095703, avg loss: 5.077120, ppl: 160.311661 +epoch: 0, batch: 3249, sum loss: 8913.738281, avg loss: 4.718760, ppl: 112.029305 +epoch: 0, batch: 3250, sum loss: 7703.706055, avg loss: 4.995918, ppl: 147.808609 +epoch: 0, batch: 3251, sum loss: 9347.222656, avg loss: 4.950860, ppl: 141.296356 +epoch: 0, batch: 3252, sum loss: 7851.623535, avg loss: 4.938128, ppl: 139.508850 +epoch: 0, batch: 3253, sum loss: 9639.080078, avg loss: 5.255769, ppl: 191.668777 +epoch: 0, batch: 3254, sum loss: 7990.510742, avg loss: 4.947685, ppl: 140.848495 +epoch: 0, batch: 3255, sum loss: 8487.176758, avg loss: 4.874886, ppl: 130.959229 +epoch: 0, batch: 3256, sum loss: 8723.494141, avg loss: 5.027951, ppl: 152.619934 +epoch: 0, batch: 3257, sum loss: 9392.164062, avg loss: 5.194781, ppl: 180.328613 +epoch: 0, batch: 3258, sum loss: 8626.412109, avg loss: 5.059479, ppl: 157.508469 +epoch: 0, batch: 3259, sum loss: 7772.099609, avg loss: 4.891189, ppl: 133.111755 +epoch: 0, batch: 3260, sum loss: 8351.742188, avg loss: 4.980168, ppl: 145.498871 +epoch: 0, batch: 3261, sum loss: 8495.838867, avg loss: 4.982897, ppl: 145.896408 +epoch: 0, batch: 3262, sum loss: 8090.350098, avg loss: 5.253474, ppl: 191.229401 +epoch: 0, batch: 3263, sum loss: 8100.757812, avg loss: 4.957624, ppl: 142.255463 +epoch: 0, batch: 3264, sum loss: 9374.577148, avg loss: 5.254808, ppl: 191.484711 +epoch: 0, batch: 3265, sum loss: 8398.559570, avg loss: 5.029078, ppl: 152.792007 +epoch: 0, batch: 3266, sum loss: 8697.700195, avg loss: 5.110282, ppl: 165.717072 +epoch: 0, batch: 3267, sum loss: 8747.070312, avg loss: 5.151396, ppl: 172.672424 +epoch: 0, batch: 3268, sum loss: 9792.608398, avg loss: 5.124337, ppl: 168.062714 +epoch: 0, batch: 3269, sum loss: 9648.318359, avg loss: 4.950395, ppl: 141.230682 +epoch: 0, batch: 3270, sum loss: 9411.407227, avg loss: 5.168263, ppl: 175.609619 +epoch: 0, batch: 3271, sum loss: 7411.723633, avg loss: 4.994423, ppl: 147.587753 +epoch: 0, batch: 3272, sum loss: 10302.526367, avg loss: 5.203296, ppl: 181.870651 +epoch: 0, batch: 3273, sum loss: 7613.516602, avg loss: 4.992470, ppl: 147.299774 +epoch: 0, batch: 3274, sum loss: 8127.591797, avg loss: 5.004674, ppl: 149.108536 +epoch: 0, batch: 3275, sum loss: 7827.877441, avg loss: 4.886315, ppl: 132.464584 +epoch: 0, batch: 3276, sum loss: 9780.919922, avg loss: 5.015856, ppl: 150.785187 +epoch: 0, batch: 3277, sum loss: 9124.001953, avg loss: 5.038102, ppl: 154.177139 +epoch: 0, batch: 3278, sum loss: 7971.134277, avg loss: 5.016447, ppl: 150.874298 +epoch: 0, batch: 3279, sum loss: 8640.391602, avg loss: 5.121750, ppl: 167.628448 +epoch: 0, batch: 3280, sum loss: 7905.538574, avg loss: 5.064407, ppl: 158.286530 +epoch: 0, batch: 3281, sum loss: 7771.964844, avg loss: 4.894185, ppl: 133.511093 +epoch: 0, batch: 3282, sum loss: 8641.535156, avg loss: 4.929570, ppl: 138.319977 +epoch: 0, batch: 3283, sum loss: 7232.511230, avg loss: 4.730223, ppl: 113.320847 +epoch: 0, batch: 3284, sum loss: 7597.131836, avg loss: 4.914057, ppl: 136.190796 +epoch: 0, batch: 3285, sum loss: 8151.686035, avg loss: 5.104374, ppl: 164.740982 +epoch: 0, batch: 3286, sum loss: 8879.570312, avg loss: 5.033770, ppl: 153.510666 +epoch: 0, batch: 3287, sum loss: 8494.921875, avg loss: 4.732547, ppl: 113.584526 +epoch: 0, batch: 3288, sum loss: 8166.785156, avg loss: 5.010298, ppl: 149.949387 +epoch: 0, batch: 3289, sum loss: 8623.500000, avg loss: 4.993341, ppl: 147.428162 +epoch: 0, batch: 3290, sum loss: 8032.109863, avg loss: 4.927675, ppl: 138.058121 +epoch: 0, batch: 3291, sum loss: 10044.412109, avg loss: 5.032270, ppl: 153.280624 +epoch: 0, batch: 3292, sum loss: 7787.451172, avg loss: 5.113231, ppl: 166.206528 +epoch: 0, batch: 3293, sum loss: 7809.815430, avg loss: 4.999882, ppl: 148.395615 +epoch: 0, batch: 3294, sum loss: 7521.548828, avg loss: 4.938640, ppl: 139.580307 +epoch: 0, batch: 3295, sum loss: 8006.580078, avg loss: 4.864265, ppl: 129.575668 +epoch: 0, batch: 3296, sum loss: 8897.370117, avg loss: 5.292903, ppl: 198.920044 +epoch: 0, batch: 3297, sum loss: 9543.848633, avg loss: 5.189695, ppl: 179.413803 +epoch: 0, batch: 3298, sum loss: 7589.030762, avg loss: 5.100155, ppl: 164.047318 +epoch: 0, batch: 3299, sum loss: 7803.450684, avg loss: 4.957719, ppl: 142.268951 +epoch: 0, batch: 3300, sum loss: 8454.824219, avg loss: 5.002854, ppl: 148.837387 +epoch: 0, batch: 3301, sum loss: 8292.626953, avg loss: 5.071943, ppl: 159.483871 +epoch: 0, batch: 3302, sum loss: 9638.315430, avg loss: 5.025190, ppl: 152.199219 +epoch: 0, batch: 3303, sum loss: 9658.390625, avg loss: 5.260561, ppl: 192.589508 +epoch: 0, batch: 3304, sum loss: 7949.402832, avg loss: 4.931391, ppl: 138.572159 +epoch: 0, batch: 3305, sum loss: 8562.544922, avg loss: 5.136499, ppl: 170.119202 +epoch: 0, batch: 3306, sum loss: 9370.540039, avg loss: 5.008306, ppl: 149.651016 +epoch: 0, batch: 3307, sum loss: 9681.740234, avg loss: 5.076948, ppl: 160.284149 +epoch: 0, batch: 3308, sum loss: 8991.577148, avg loss: 4.981483, ppl: 145.690277 +epoch: 0, batch: 3309, sum loss: 9266.683594, avg loss: 5.165376, ppl: 175.103241 +epoch: 0, batch: 3310, sum loss: 8982.468750, avg loss: 5.040668, ppl: 154.573166 +epoch: 0, batch: 3311, sum loss: 8085.833496, avg loss: 4.865123, ppl: 129.686859 +epoch: 0, batch: 3312, sum loss: 7989.241211, avg loss: 4.931631, ppl: 138.605347 +epoch: 0, batch: 3313, sum loss: 7678.350586, avg loss: 5.081635, ppl: 161.037140 +epoch: 0, batch: 3314, sum loss: 8264.517578, avg loss: 4.904758, ppl: 134.930252 +epoch: 0, batch: 3315, sum loss: 8847.794922, avg loss: 5.004409, ppl: 149.069000 +epoch: 0, batch: 3316, sum loss: 7233.615234, avg loss: 4.854775, ppl: 128.351868 +epoch: 0, batch: 3317, sum loss: 8319.580078, avg loss: 5.005764, ppl: 149.271149 +epoch: 0, batch: 3318, sum loss: 8812.465820, avg loss: 5.055918, ppl: 156.948578 +epoch: 0, batch: 3319, sum loss: 8726.613281, avg loss: 4.897089, ppl: 133.899490 +epoch: 0, batch: 3320, sum loss: 7802.561523, avg loss: 4.846312, ppl: 127.270096 +epoch: 0, batch: 3321, sum loss: 7065.651855, avg loss: 4.958352, ppl: 142.359009 +epoch: 0, batch: 3322, sum loss: 9197.247070, avg loss: 5.106744, ppl: 165.131851 +epoch: 0, batch: 3323, sum loss: 8198.653320, avg loss: 5.079711, ppl: 160.727600 +epoch: 0, batch: 3324, sum loss: 7615.966309, avg loss: 5.111387, ppl: 165.900269 +epoch: 0, batch: 3325, sum loss: 8449.666992, avg loss: 5.199795, ppl: 181.235046 +epoch: 0, batch: 3326, sum loss: 8435.123047, avg loss: 4.831113, ppl: 125.350380 +epoch: 0, batch: 3327, sum loss: 7522.426758, avg loss: 5.065607, ppl: 158.476624 +epoch: 0, batch: 3328, sum loss: 8938.980469, avg loss: 4.906137, ppl: 135.116379 +epoch: 0, batch: 3329, sum loss: 7631.583008, avg loss: 4.778699, ppl: 118.949547 +epoch: 0, batch: 3330, sum loss: 8306.596680, avg loss: 5.077382, ppl: 160.353714 +epoch: 0, batch: 3331, sum loss: 8088.689453, avg loss: 4.803260, ppl: 121.907173 +epoch: 0, batch: 3332, sum loss: 7060.988281, avg loss: 5.000700, ppl: 148.517090 +epoch: 0, batch: 3333, sum loss: 9570.500000, avg loss: 5.112447, ppl: 166.076218 +epoch: 0, batch: 3334, sum loss: 9004.266602, avg loss: 5.256431, ppl: 191.795776 +epoch: 0, batch: 3335, sum loss: 8029.394531, avg loss: 5.059480, ppl: 157.508621 +epoch: 0, batch: 3336, sum loss: 7768.331543, avg loss: 5.070713, ppl: 159.287796 +epoch: 0, batch: 3337, sum loss: 8781.651367, avg loss: 4.870578, ppl: 130.396240 +epoch: 0, batch: 3338, sum loss: 8933.482422, avg loss: 5.035785, ppl: 153.820251 +epoch: 0, batch: 3339, sum loss: 8412.080078, avg loss: 4.905003, ppl: 134.963257 +epoch: 0, batch: 3340, sum loss: 7267.513184, avg loss: 4.947252, ppl: 140.787521 +epoch: 0, batch: 3341, sum loss: 9055.720703, avg loss: 5.122014, ppl: 167.672729 +epoch: 0, batch: 3342, sum loss: 7786.069824, avg loss: 4.857186, ppl: 128.661682 +epoch: 0, batch: 3343, sum loss: 10379.267578, avg loss: 5.189634, ppl: 179.402847 +epoch: 0, batch: 3344, sum loss: 8331.984375, avg loss: 4.852641, ppl: 128.078217 +epoch: 0, batch: 3345, sum loss: 9253.025391, avg loss: 5.140570, ppl: 170.813049 +epoch: 0, batch: 3346, sum loss: 7558.476074, avg loss: 4.879584, ppl: 131.575958 +epoch: 0, batch: 3347, sum loss: 9745.139648, avg loss: 4.982178, ppl: 145.791534 +epoch: 0, batch: 3348, sum loss: 8766.837891, avg loss: 4.986825, ppl: 146.470566 +epoch: 0, batch: 3349, sum loss: 8265.278320, avg loss: 4.867655, ppl: 130.015701 +epoch: 0, batch: 3350, sum loss: 8702.491211, avg loss: 4.842789, ppl: 126.822525 +epoch: 0, batch: 3351, sum loss: 7588.929199, avg loss: 4.986156, ppl: 146.372681 +epoch: 0, batch: 3352, sum loss: 7264.097656, avg loss: 4.881786, ppl: 131.865952 +epoch: 0, batch: 3353, sum loss: 9033.238281, avg loss: 5.010116, ppl: 149.922073 +epoch: 0, batch: 3354, sum loss: 7942.457031, avg loss: 5.017344, ppl: 151.009766 +epoch: 0, batch: 3355, sum loss: 8431.639648, avg loss: 5.054940, ppl: 156.795090 +epoch: 0, batch: 3356, sum loss: 8020.662598, avg loss: 5.009783, ppl: 149.872253 +epoch: 0, batch: 3357, sum loss: 10077.759766, avg loss: 4.969310, ppl: 143.927521 +epoch: 0, batch: 3358, sum loss: 6475.238281, avg loss: 4.775249, ppl: 118.539886 +epoch: 0, batch: 3359, sum loss: 7602.419922, avg loss: 5.021414, ppl: 151.625519 +epoch: 0, batch: 3360, sum loss: 8345.255859, avg loss: 5.167341, ppl: 175.447739 +epoch: 0, batch: 3361, sum loss: 8585.642578, avg loss: 4.937115, ppl: 139.367554 +epoch: 0, batch: 3362, sum loss: 9507.254883, avg loss: 5.024976, ppl: 152.166641 +epoch: 0, batch: 3363, sum loss: 7399.668945, avg loss: 4.752517, ppl: 115.875603 +epoch: 0, batch: 3364, sum loss: 8210.053711, avg loss: 4.858020, ppl: 128.768967 +epoch: 0, batch: 3365, sum loss: 7924.155762, avg loss: 4.894475, ppl: 133.549942 +epoch: 0, batch: 3366, sum loss: 9039.437500, avg loss: 5.286221, ppl: 197.595215 +epoch: 0, batch: 3367, sum loss: 8477.760742, avg loss: 4.946185, ppl: 140.637360 +epoch: 0, batch: 3368, sum loss: 7288.798828, avg loss: 4.961741, ppl: 142.842331 +epoch: 0, batch: 3369, sum loss: 8547.261719, avg loss: 5.066545, ppl: 158.625412 +epoch: 0, batch: 3370, sum loss: 8295.423828, avg loss: 5.036687, ppl: 153.959091 +epoch: 0, batch: 3371, sum loss: 8754.812500, avg loss: 4.985657, ppl: 146.299622 +epoch: 0, batch: 3372, sum loss: 7619.542969, avg loss: 4.865608, ppl: 129.749832 +epoch: 0, batch: 3373, sum loss: 8743.160156, avg loss: 4.914649, ppl: 136.271408 +epoch: 0, batch: 3374, sum loss: 8028.737305, avg loss: 4.706176, ppl: 110.628342 +epoch: 0, batch: 3375, sum loss: 8192.538086, avg loss: 4.738310, ppl: 114.241013 +epoch: 0, batch: 3376, sum loss: 8660.875000, avg loss: 4.937785, ppl: 139.461029 +epoch: 0, batch: 3377, sum loss: 8458.917969, avg loss: 4.993458, ppl: 147.445374 +epoch: 0, batch: 3378, sum loss: 7072.423340, avg loss: 5.044525, ppl: 155.170517 +epoch: 0, batch: 3379, sum loss: 8507.605469, avg loss: 5.091326, ppl: 162.605377 +epoch: 0, batch: 3380, sum loss: 9112.056641, avg loss: 5.116259, ppl: 166.710480 +epoch: 0, batch: 3381, sum loss: 8037.318359, avg loss: 4.918799, ppl: 136.838226 +epoch: 0, batch: 3382, sum loss: 9959.394531, avg loss: 5.179092, ppl: 177.521530 +epoch: 0, batch: 3383, sum loss: 8485.003906, avg loss: 4.967801, ppl: 143.710464 +epoch: 0, batch: 3384, sum loss: 9018.839844, avg loss: 4.810048, ppl: 122.737518 +epoch: 0, batch: 3385, sum loss: 8813.037109, avg loss: 5.067876, ppl: 158.836655 +epoch: 0, batch: 3386, sum loss: 8915.782227, avg loss: 4.923127, ppl: 137.431656 +epoch: 0, batch: 3387, sum loss: 7195.359863, avg loss: 4.822628, ppl: 124.291245 +epoch: 0, batch: 3388, sum loss: 8872.743164, avg loss: 5.225409, ppl: 185.937119 +epoch: 0, batch: 3389, sum loss: 7344.381836, avg loss: 4.674973, ppl: 107.229622 +epoch: 0, batch: 3390, sum loss: 8809.120117, avg loss: 4.993832, ppl: 147.500580 +epoch: 0, batch: 3391, sum loss: 9383.446289, avg loss: 5.085879, ppl: 161.722000 +epoch: 0, batch: 3392, sum loss: 8367.007812, avg loss: 4.980362, ppl: 145.527039 +epoch: 0, batch: 3393, sum loss: 8590.473633, avg loss: 4.889285, ppl: 132.858490 +epoch: 0, batch: 3394, sum loss: 7876.337402, avg loss: 4.852950, ppl: 128.117737 +epoch: 0, batch: 3395, sum loss: 8103.735352, avg loss: 4.829401, ppl: 125.135986 +epoch: 0, batch: 3396, sum loss: 8703.172852, avg loss: 4.824375, ppl: 124.508644 +epoch: 0, batch: 3397, sum loss: 8477.136719, avg loss: 4.835788, ppl: 125.937813 +epoch: 0, batch: 3398, sum loss: 7287.204590, avg loss: 5.025659, ppl: 152.270508 +epoch: 0, batch: 3399, sum loss: 8612.780273, avg loss: 5.163538, ppl: 174.781647 +epoch: 0, batch: 3400, sum loss: 8696.154297, avg loss: 4.918640, ppl: 136.816376 +epoch: 0, batch: 3401, sum loss: 8891.761719, avg loss: 4.945362, ppl: 140.521729 +epoch: 0, batch: 3402, sum loss: 8599.196289, avg loss: 4.922265, ppl: 137.313217 +epoch: 0, batch: 3403, sum loss: 6680.599121, avg loss: 4.758262, ppl: 116.543159 +epoch: 0, batch: 3404, sum loss: 8372.177734, avg loss: 5.161638, ppl: 174.450012 +epoch: 0, batch: 3405, sum loss: 8516.244141, avg loss: 4.911329, ppl: 135.819778 +epoch: 0, batch: 3406, sum loss: 7536.742188, avg loss: 5.017804, ppl: 151.079193 +epoch: 0, batch: 3407, sum loss: 8184.526855, avg loss: 5.108943, ppl: 165.495331 +epoch: 0, batch: 3408, sum loss: 8153.819336, avg loss: 4.891313, ppl: 133.128265 +epoch: 0, batch: 3409, sum loss: 8382.006836, avg loss: 4.907498, ppl: 135.300446 +epoch: 0, batch: 3410, sum loss: 8975.640625, avg loss: 5.137745, ppl: 170.331223 +epoch: 0, batch: 3411, sum loss: 9712.826172, avg loss: 4.912911, ppl: 136.034897 +epoch: 0, batch: 3412, sum loss: 9284.650391, avg loss: 5.051496, ppl: 156.256058 +epoch: 0, batch: 3413, sum loss: 8396.679688, avg loss: 4.927629, ppl: 138.051804 +epoch: 0, batch: 3414, sum loss: 6498.514648, avg loss: 4.781836, ppl: 119.323174 +epoch: 0, batch: 3415, sum loss: 8700.875977, avg loss: 4.907432, ppl: 135.291489 +epoch: 0, batch: 3416, sum loss: 7852.889648, avg loss: 4.666007, ppl: 106.272552 +epoch: 0, batch: 3417, sum loss: 7569.311035, avg loss: 5.029443, ppl: 152.847824 +epoch: 0, batch: 3418, sum loss: 7408.373047, avg loss: 4.870726, ppl: 130.415512 +epoch: 0, batch: 3419, sum loss: 6847.922363, avg loss: 4.795464, ppl: 120.960503 +epoch: 0, batch: 3420, sum loss: 7816.270020, avg loss: 4.906635, ppl: 135.183792 +epoch: 0, batch: 3421, sum loss: 8227.235352, avg loss: 4.995286, ppl: 147.715118 +epoch: 0, batch: 3422, sum loss: 8689.355469, avg loss: 4.976722, ppl: 144.998260 +epoch: 0, batch: 3423, sum loss: 8741.647461, avg loss: 4.800466, ppl: 121.567001 +epoch: 0, batch: 3424, sum loss: 8599.340820, avg loss: 4.863881, ppl: 129.525864 +epoch: 0, batch: 3425, sum loss: 7569.336914, avg loss: 5.022785, ppl: 151.833603 +epoch: 0, batch: 3426, sum loss: 7693.007812, avg loss: 4.881350, ppl: 131.808487 +epoch: 0, batch: 3427, sum loss: 10235.035156, avg loss: 5.177053, ppl: 177.160034 +epoch: 0, batch: 3428, sum loss: 7338.875977, avg loss: 5.078807, ppl: 160.582352 +epoch: 0, batch: 3429, sum loss: 7870.375977, avg loss: 4.981251, ppl: 145.656448 +epoch: 0, batch: 3430, sum loss: 9301.150391, avg loss: 4.950054, ppl: 141.182541 +epoch: 0, batch: 3431, sum loss: 8921.554688, avg loss: 5.063312, ppl: 158.113251 +epoch: 0, batch: 3432, sum loss: 8611.261719, avg loss: 4.940483, ppl: 139.837784 +epoch: 0, batch: 3433, sum loss: 9166.609375, avg loss: 5.144001, ppl: 171.400177 +epoch: 0, batch: 3434, sum loss: 9016.673828, avg loss: 5.065547, ppl: 158.467102 +epoch: 0, batch: 3435, sum loss: 8589.023438, avg loss: 5.127776, ppl: 168.641586 +epoch: 0, batch: 3436, sum loss: 7799.973633, avg loss: 4.924226, ppl: 137.582779 +epoch: 0, batch: 3437, sum loss: 6994.993652, avg loss: 4.771483, ppl: 118.094238 +epoch: 0, batch: 3438, sum loss: 7475.755371, avg loss: 4.692879, ppl: 109.166992 +epoch: 0, batch: 3439, sum loss: 7352.224121, avg loss: 4.950993, ppl: 141.315155 +epoch: 0, batch: 3440, sum loss: 7978.438477, avg loss: 5.005294, ppl: 149.200912 +epoch: 0, batch: 3441, sum loss: 7864.973145, avg loss: 4.921761, ppl: 137.244034 +epoch: 0, batch: 3442, sum loss: 7260.430664, avg loss: 4.895773, ppl: 133.723328 +epoch: 0, batch: 3443, sum loss: 9503.327148, avg loss: 5.253360, ppl: 191.207611 +epoch: 0, batch: 3444, sum loss: 8591.494141, avg loss: 4.934804, ppl: 139.045944 +epoch: 0, batch: 3445, sum loss: 8303.672852, avg loss: 5.116250, ppl: 166.708969 +epoch: 0, batch: 3446, sum loss: 9571.772461, avg loss: 5.102224, ppl: 164.387161 +epoch: 0, batch: 3447, sum loss: 8561.676758, avg loss: 4.903595, ppl: 134.773422 +epoch: 0, batch: 3448, sum loss: 7948.791992, avg loss: 4.931012, ppl: 138.519638 +epoch: 0, batch: 3449, sum loss: 8335.893555, avg loss: 4.976653, ppl: 144.988312 +epoch: 0, batch: 3450, sum loss: 9379.661133, avg loss: 5.050975, ppl: 156.174637 +epoch: 0, batch: 3451, sum loss: 6853.010742, avg loss: 4.788966, ppl: 120.177063 +epoch: 0, batch: 3452, sum loss: 8433.998047, avg loss: 4.978747, ppl: 145.292267 +epoch: 0, batch: 3453, sum loss: 10147.576172, avg loss: 5.033520, ppl: 153.472321 +epoch: 0, batch: 3454, sum loss: 8218.738281, avg loss: 5.002275, ppl: 148.751175 +epoch: 0, batch: 3455, sum loss: 9727.308594, avg loss: 4.960382, ppl: 142.648270 +epoch: 0, batch: 3456, sum loss: 7457.835938, avg loss: 4.845897, ppl: 127.217369 +epoch: 0, batch: 3457, sum loss: 7467.538086, avg loss: 4.938848, ppl: 139.609329 +epoch: 0, batch: 3458, sum loss: 8726.456055, avg loss: 5.041280, ppl: 154.667908 +epoch: 0, batch: 3459, sum loss: 8421.129883, avg loss: 4.856476, ppl: 128.570358 +epoch: 0, batch: 3460, sum loss: 8512.166016, avg loss: 4.951813, ppl: 141.431107 +epoch: 0, batch: 3461, sum loss: 7318.945312, avg loss: 4.850196, ppl: 127.765411 +epoch: 0, batch: 3462, sum loss: 9444.904297, avg loss: 5.061578, ppl: 157.839355 +epoch: 0, batch: 3463, sum loss: 8816.913086, avg loss: 5.117187, ppl: 166.865326 +epoch: 0, batch: 3464, sum loss: 9521.994141, avg loss: 4.956790, ppl: 142.136871 +epoch: 0, batch: 3465, sum loss: 7273.956055, avg loss: 4.836407, ppl: 126.015724 +epoch: 0, batch: 3466, sum loss: 7123.637695, avg loss: 4.793834, ppl: 120.763512 +epoch: 0, batch: 3467, sum loss: 7763.405273, avg loss: 4.733784, ppl: 113.725052 +epoch: 0, batch: 3468, sum loss: 7399.295898, avg loss: 4.810986, ppl: 122.852638 +epoch: 0, batch: 3469, sum loss: 9298.249023, avg loss: 4.959066, ppl: 142.460724 +epoch: 0, batch: 3470, sum loss: 8704.282227, avg loss: 4.879082, ppl: 131.509903 +epoch: 0, batch: 3471, sum loss: 9767.252930, avg loss: 5.231523, ppl: 187.077423 +epoch: 0, batch: 3472, sum loss: 9641.957031, avg loss: 5.161647, ppl: 174.451599 +epoch: 0, batch: 3473, sum loss: 9766.513672, avg loss: 5.021344, ppl: 151.614960 +epoch: 0, batch: 3474, sum loss: 8879.181641, avg loss: 5.010825, ppl: 150.028488 +epoch: 0, batch: 3475, sum loss: 8955.985352, avg loss: 5.191876, ppl: 179.805542 +epoch: 0, batch: 3476, sum loss: 7986.672852, avg loss: 4.951440, ppl: 141.378448 +epoch: 0, batch: 3477, sum loss: 7848.939941, avg loss: 4.948890, ppl: 141.018372 +epoch: 0, batch: 3478, sum loss: 7489.838867, avg loss: 4.943788, ppl: 140.300705 +epoch: 0, batch: 3479, sum loss: 7874.079102, avg loss: 4.933633, ppl: 138.883209 +epoch: 0, batch: 3480, sum loss: 9527.919922, avg loss: 4.941867, ppl: 140.031494 +epoch: 0, batch: 3481, sum loss: 8766.041992, avg loss: 5.037955, ppl: 154.154495 +epoch: 0, batch: 3482, sum loss: 7935.661621, avg loss: 4.847686, ppl: 127.445114 +epoch: 0, batch: 3483, sum loss: 9208.828125, avg loss: 5.167693, ppl: 175.509415 +epoch: 0, batch: 3484, sum loss: 7310.761230, avg loss: 4.781401, ppl: 119.271294 +epoch: 0, batch: 3485, sum loss: 8100.420898, avg loss: 4.945312, ppl: 140.514618 +epoch: 0, batch: 3486, sum loss: 9807.906250, avg loss: 4.950987, ppl: 141.314423 +epoch: 0, batch: 3487, sum loss: 7364.039062, avg loss: 4.854343, ppl: 128.296432 +epoch: 0, batch: 3488, sum loss: 8133.444824, avg loss: 4.923393, ppl: 137.468216 +epoch: 0, batch: 3489, sum loss: 8677.618164, avg loss: 5.015964, ppl: 150.801437 +epoch: 0, batch: 3490, sum loss: 7857.218262, avg loss: 4.761950, ppl: 116.973862 +epoch: 0, batch: 3491, sum loss: 8127.890137, avg loss: 5.111880, ppl: 165.982162 +epoch: 0, batch: 3492, sum loss: 7536.832031, avg loss: 4.903599, ppl: 134.774002 +epoch: 0, batch: 3493, sum loss: 8242.597656, avg loss: 5.007654, ppl: 149.553436 +epoch: 0, batch: 3494, sum loss: 6300.211426, avg loss: 4.772888, ppl: 118.260254 +epoch: 0, batch: 3495, sum loss: 9723.559570, avg loss: 5.183134, ppl: 178.240555 +epoch: 0, batch: 3496, sum loss: 8559.385742, avg loss: 4.882708, ppl: 131.987549 +epoch: 0, batch: 3497, sum loss: 7212.193359, avg loss: 4.973927, ppl: 144.593521 +epoch: 0, batch: 3498, sum loss: 7145.215332, avg loss: 4.958512, ppl: 142.381744 +epoch: 0, batch: 3499, sum loss: 8698.054688, avg loss: 4.928076, ppl: 138.113556 +epoch: 0, batch: 3500, sum loss: 7729.684570, avg loss: 4.961287, ppl: 142.777374 +epoch: 0, batch: 3501, sum loss: 7706.724121, avg loss: 4.877674, ppl: 131.324799 +epoch: 0, batch: 3502, sum loss: 8800.731445, avg loss: 5.134616, ppl: 169.799088 +epoch: 0, batch: 3503, sum loss: 7280.507812, avg loss: 4.915941, ppl: 136.447617 +epoch: 0, batch: 3504, sum loss: 7547.974121, avg loss: 4.926876, ppl: 137.947891 +epoch: 0, batch: 3505, sum loss: 8017.923828, avg loss: 4.766899, ppl: 117.554100 +epoch: 0, batch: 3506, sum loss: 6952.671875, avg loss: 4.875647, ppl: 131.058868 +epoch: 0, batch: 3507, sum loss: 9103.126953, avg loss: 5.048878, ppl: 155.847534 +epoch: 0, batch: 3508, sum loss: 9450.486328, avg loss: 5.238629, ppl: 188.411591 +epoch: 0, batch: 3509, sum loss: 7981.962891, avg loss: 4.982499, ppl: 145.838318 +epoch: 0, batch: 3510, sum loss: 9594.842773, avg loss: 5.073952, ppl: 159.804581 +epoch: 0, batch: 3511, sum loss: 8095.411621, avg loss: 5.081865, ppl: 161.074234 +epoch: 0, batch: 3512, sum loss: 7794.659668, avg loss: 4.877759, ppl: 131.336075 +epoch: 0, batch: 3513, sum loss: 8199.950195, avg loss: 4.984772, ppl: 146.170212 +epoch: 0, batch: 3514, sum loss: 7665.395508, avg loss: 4.913715, ppl: 136.144302 +epoch: 0, batch: 3515, sum loss: 7824.500000, avg loss: 4.779780, ppl: 119.078194 +epoch: 0, batch: 3516, sum loss: 8534.268555, avg loss: 4.818898, ppl: 123.828583 +epoch: 0, batch: 3517, sum loss: 7811.806152, avg loss: 4.991570, ppl: 147.167221 +epoch: 0, batch: 3518, sum loss: 7710.141602, avg loss: 4.951921, ppl: 141.446487 +epoch: 0, batch: 3519, sum loss: 7564.935547, avg loss: 4.757821, ppl: 116.491821 +epoch: 0, batch: 3520, sum loss: 8646.651367, avg loss: 5.202558, ppl: 181.736542 +epoch: 0, batch: 3521, sum loss: 8994.653320, avg loss: 4.955732, ppl: 141.986481 +epoch: 0, batch: 3522, sum loss: 8004.489258, avg loss: 5.088677, ppl: 162.175156 +epoch: 0, batch: 3523, sum loss: 8260.937500, avg loss: 4.949633, ppl: 141.123184 +epoch: 0, batch: 3524, sum loss: 9454.463867, avg loss: 5.010314, ppl: 149.951889 +epoch: 0, batch: 3525, sum loss: 8024.614746, avg loss: 4.932154, ppl: 138.677933 +epoch: 0, batch: 3526, sum loss: 9664.162109, avg loss: 4.900691, ppl: 134.382553 +epoch: 0, batch: 3527, sum loss: 8224.879883, avg loss: 5.018230, ppl: 151.143616 +epoch: 0, batch: 3528, sum loss: 8185.989258, avg loss: 4.890078, ppl: 132.963898 +epoch: 0, batch: 3529, sum loss: 8209.814453, avg loss: 4.936749, ppl: 139.316666 +epoch: 0, batch: 3530, sum loss: 7644.369141, avg loss: 4.881462, ppl: 131.823196 +epoch: 0, batch: 3531, sum loss: 9656.785156, avg loss: 4.838069, ppl: 126.225372 +epoch: 0, batch: 3532, sum loss: 8320.455078, avg loss: 4.911721, ppl: 135.873016 +epoch: 0, batch: 3533, sum loss: 8658.123047, avg loss: 5.212596, ppl: 183.570068 +epoch: 0, batch: 3534, sum loss: 8525.930664, avg loss: 5.179788, ppl: 177.645172 +epoch: 0, batch: 3535, sum loss: 8700.779297, avg loss: 5.188300, ppl: 179.163742 +epoch: 0, batch: 3536, sum loss: 8359.785156, avg loss: 4.932026, ppl: 138.660202 +epoch: 0, batch: 3537, sum loss: 7850.316895, avg loss: 4.906448, ppl: 135.158463 +epoch: 0, batch: 3538, sum loss: 8401.138672, avg loss: 4.950583, ppl: 141.257294 +epoch: 0, batch: 3539, sum loss: 9341.121094, avg loss: 4.867702, ppl: 130.021851 +epoch: 0, batch: 3540, sum loss: 8334.005859, avg loss: 5.032612, ppl: 153.333038 +epoch: 0, batch: 3541, sum loss: 7763.679199, avg loss: 5.067676, ppl: 158.804764 +epoch: 0, batch: 3542, sum loss: 10350.506836, avg loss: 5.106318, ppl: 165.061478 +epoch: 0, batch: 3543, sum loss: 9668.988281, avg loss: 4.966096, ppl: 143.465698 +epoch: 0, batch: 3544, sum loss: 7659.965332, avg loss: 4.866560, ppl: 129.873322 +epoch: 0, batch: 3545, sum loss: 8830.006836, avg loss: 4.974651, ppl: 144.698364 +epoch: 0, batch: 3546, sum loss: 8069.458496, avg loss: 4.941493, ppl: 139.979080 +epoch: 0, batch: 3547, sum loss: 8129.668457, avg loss: 4.912187, ppl: 135.936325 +epoch: 0, batch: 3548, sum loss: 8458.292969, avg loss: 5.031703, ppl: 153.193680 +epoch: 0, batch: 3549, sum loss: 9611.341797, avg loss: 5.186909, ppl: 178.914627 +epoch: 0, batch: 3550, sum loss: 7429.871094, avg loss: 4.720376, ppl: 112.210381 +epoch: 0, batch: 3551, sum loss: 8914.964844, avg loss: 5.028181, ppl: 152.655090 +epoch: 0, batch: 3552, sum loss: 7826.650391, avg loss: 4.894715, ppl: 133.581970 +epoch: 0, batch: 3553, sum loss: 7661.246094, avg loss: 4.842760, ppl: 126.818840 +epoch: 0, batch: 3554, sum loss: 8312.274414, avg loss: 5.156498, ppl: 173.555588 +epoch: 0, batch: 3555, sum loss: 8649.998047, avg loss: 4.962707, ppl: 142.980331 +epoch: 0, batch: 3556, sum loss: 8888.061523, avg loss: 4.899704, ppl: 134.249969 +epoch: 0, batch: 3557, sum loss: 8277.251953, avg loss: 4.915233, ppl: 136.351089 +epoch: 0, batch: 3558, sum loss: 8276.541992, avg loss: 4.789666, ppl: 120.261162 +epoch: 0, batch: 3559, sum loss: 9534.410156, avg loss: 5.047332, ppl: 155.606796 +epoch: 0, batch: 3560, sum loss: 9395.796875, avg loss: 4.853200, ppl: 128.149872 +epoch: 0, batch: 3561, sum loss: 8361.059570, avg loss: 5.048949, ppl: 155.858536 +epoch: 0, batch: 3562, sum loss: 7967.277344, avg loss: 4.893905, ppl: 133.473724 +epoch: 0, batch: 3563, sum loss: 8793.977539, avg loss: 4.934892, ppl: 139.058151 +epoch: 0, batch: 3564, sum loss: 7591.606934, avg loss: 4.878925, ppl: 131.489212 +epoch: 0, batch: 3565, sum loss: 7951.737305, avg loss: 4.758670, ppl: 116.590797 +epoch: 0, batch: 3566, sum loss: 7712.894531, avg loss: 4.875407, ppl: 131.027496 +epoch: 0, batch: 3567, sum loss: 7738.358398, avg loss: 4.979639, ppl: 145.421814 +epoch: 0, batch: 3568, sum loss: 8290.457031, avg loss: 5.098682, ppl: 163.805862 +epoch: 0, batch: 3569, sum loss: 8092.052734, avg loss: 4.880611, ppl: 131.711105 +epoch: 0, batch: 3570, sum loss: 8557.883789, avg loss: 5.072842, ppl: 159.627289 +epoch: 0, batch: 3571, sum loss: 8006.943848, avg loss: 4.894220, ppl: 133.515869 +epoch: 0, batch: 3572, sum loss: 8426.591797, avg loss: 5.064058, ppl: 158.231293 +epoch: 0, batch: 3573, sum loss: 7663.510742, avg loss: 4.678578, ppl: 107.616920 +epoch: 0, batch: 3574, sum loss: 7946.629883, avg loss: 4.845506, ppl: 127.167633 +epoch: 0, batch: 3575, sum loss: 7531.474609, avg loss: 4.840279, ppl: 126.504654 +epoch: 0, batch: 3576, sum loss: 8068.236816, avg loss: 4.819736, ppl: 123.932312 +epoch: 0, batch: 3577, sum loss: 7799.239258, avg loss: 4.999513, ppl: 148.340851 +epoch: 0, batch: 3578, sum loss: 9183.303711, avg loss: 5.079261, ppl: 160.655258 +epoch: 0, batch: 3579, sum loss: 9545.028320, avg loss: 4.847653, ppl: 127.440926 +epoch: 0, batch: 3580, sum loss: 7214.228516, avg loss: 4.684564, ppl: 108.263069 +epoch: 0, batch: 3581, sum loss: 7965.115234, avg loss: 5.109118, ppl: 165.524368 +epoch: 0, batch: 3582, sum loss: 8159.333984, avg loss: 4.853857, ppl: 128.234039 +epoch: 0, batch: 3583, sum loss: 7950.854492, avg loss: 4.848082, ppl: 127.495628 +epoch: 0, batch: 3584, sum loss: 8259.272461, avg loss: 4.904556, ppl: 134.903030 +epoch: 0, batch: 3585, sum loss: 7961.965820, avg loss: 5.074548, ppl: 159.899857 +epoch: 0, batch: 3586, sum loss: 8460.748047, avg loss: 5.018238, ppl: 151.144684 +epoch: 0, batch: 3587, sum loss: 7975.902344, avg loss: 4.819276, ppl: 123.875412 +epoch: 0, batch: 3588, sum loss: 8095.883789, avg loss: 4.833364, ppl: 125.632881 +epoch: 0, batch: 3589, sum loss: 8681.172852, avg loss: 4.901848, ppl: 134.538162 +epoch: 0, batch: 3590, sum loss: 9088.674805, avg loss: 4.920777, ppl: 137.109146 +epoch: 0, batch: 3591, sum loss: 7975.558105, avg loss: 4.886984, ppl: 132.553238 +epoch: 0, batch: 3592, sum loss: 8250.494141, avg loss: 4.741663, ppl: 114.624718 +epoch: 0, batch: 3593, sum loss: 7589.785156, avg loss: 4.896636, ppl: 133.838791 +epoch: 0, batch: 3594, sum loss: 8553.305664, avg loss: 4.882024, ppl: 131.897385 +epoch: 0, batch: 3595, sum loss: 8511.123047, avg loss: 4.962754, ppl: 142.987015 +epoch: 0, batch: 3596, sum loss: 6721.737305, avg loss: 4.924350, ppl: 137.599838 +epoch: 0, batch: 3597, sum loss: 7493.908691, avg loss: 4.819234, ppl: 123.870216 +epoch: 0, batch: 3598, sum loss: 8766.005859, avg loss: 4.916437, ppl: 136.515289 +epoch: 0, batch: 3599, sum loss: 9481.055664, avg loss: 4.894711, ppl: 133.581406 +epoch: 0, batch: 3600, sum loss: 8640.408203, avg loss: 4.945855, ppl: 140.590958 +epoch: 0, batch: 3601, sum loss: 8296.992188, avg loss: 5.153411, ppl: 173.020721 +epoch: 0, batch: 3602, sum loss: 7666.962402, avg loss: 4.803861, ppl: 121.980492 +epoch: 0, batch: 3603, sum loss: 9364.998047, avg loss: 5.008020, ppl: 149.608276 +epoch: 0, batch: 3604, sum loss: 7381.545898, avg loss: 4.940794, ppl: 139.881210 +epoch: 0, batch: 3605, sum loss: 8877.173828, avg loss: 4.782960, ppl: 119.457413 +epoch: 0, batch: 3606, sum loss: 7077.501465, avg loss: 4.908115, ppl: 135.383957 +epoch: 0, batch: 3607, sum loss: 7927.313965, avg loss: 4.804433, ppl: 122.050255 +epoch: 0, batch: 3608, sum loss: 11461.207031, avg loss: 5.075823, ppl: 160.103882 +epoch: 0, batch: 3609, sum loss: 8923.379883, avg loss: 4.935498, ppl: 139.142380 +epoch: 0, batch: 3610, sum loss: 8243.225586, avg loss: 4.854668, ppl: 128.338028 +epoch: 0, batch: 3611, sum loss: 8562.304688, avg loss: 4.887160, ppl: 132.576569 +epoch: 0, batch: 3612, sum loss: 7804.719727, avg loss: 4.808823, ppl: 122.587257 +epoch: 0, batch: 3613, sum loss: 8639.175781, avg loss: 5.055106, ppl: 156.821106 +epoch: 0, batch: 3614, sum loss: 7278.830566, avg loss: 4.779272, ppl: 119.017624 +epoch: 0, batch: 3615, sum loss: 8790.794922, avg loss: 5.046381, ppl: 155.458771 +epoch: 0, batch: 3616, sum loss: 8706.161133, avg loss: 5.026652, ppl: 152.421829 +epoch: 0, batch: 3617, sum loss: 7455.427734, avg loss: 4.927580, ppl: 138.045029 +epoch: 0, batch: 3618, sum loss: 8653.009766, avg loss: 4.953068, ppl: 141.608719 +epoch: 0, batch: 3619, sum loss: 7663.542480, avg loss: 4.841151, ppl: 126.615028 +epoch: 0, batch: 3620, sum loss: 9009.563477, avg loss: 4.797425, ppl: 121.197906 +epoch: 0, batch: 3621, sum loss: 8520.221680, avg loss: 4.965164, ppl: 143.332077 +epoch: 0, batch: 3622, sum loss: 7420.054688, avg loss: 4.784045, ppl: 119.587128 +epoch: 0, batch: 3623, sum loss: 7442.721680, avg loss: 4.731546, ppl: 113.470840 +epoch: 0, batch: 3624, sum loss: 7768.373047, avg loss: 4.840107, ppl: 126.482941 +epoch: 0, batch: 3625, sum loss: 8710.041992, avg loss: 5.215594, ppl: 184.121216 +epoch: 0, batch: 3626, sum loss: 6779.978516, avg loss: 4.669407, ppl: 106.634476 +epoch: 0, batch: 3627, sum loss: 8834.863281, avg loss: 4.919189, ppl: 136.891541 +epoch: 0, batch: 3628, sum loss: 8117.703613, avg loss: 4.940781, ppl: 139.879471 +epoch: 0, batch: 3629, sum loss: 8807.320312, avg loss: 4.768446, ppl: 117.736191 +epoch: 0, batch: 3630, sum loss: 8760.860352, avg loss: 4.837582, ppl: 126.163933 +epoch: 0, batch: 3631, sum loss: 8841.129883, avg loss: 4.947471, ppl: 140.818329 +epoch: 0, batch: 3632, sum loss: 8650.981445, avg loss: 4.641085, ppl: 103.656723 +epoch: 0, batch: 3633, sum loss: 10242.003906, avg loss: 5.110780, ppl: 165.799667 +epoch: 0, batch: 3634, sum loss: 8875.784180, avg loss: 4.972428, ppl: 144.377060 +epoch: 0, batch: 3635, sum loss: 8046.270020, avg loss: 4.942426, ppl: 140.109772 +epoch: 0, batch: 3636, sum loss: 8272.246094, avg loss: 5.189615, ppl: 179.399429 +epoch: 0, batch: 3637, sum loss: 9114.087891, avg loss: 4.905322, ppl: 135.006317 +epoch: 0, batch: 3638, sum loss: 8463.069336, avg loss: 5.019614, ppl: 151.352905 +epoch: 0, batch: 3639, sum loss: 7397.153809, avg loss: 4.723598, ppl: 112.572510 +epoch: 0, batch: 3640, sum loss: 7887.413086, avg loss: 4.992033, ppl: 147.235519 +epoch: 0, batch: 3641, sum loss: 7957.754883, avg loss: 5.027009, ppl: 152.476196 +epoch: 0, batch: 3642, sum loss: 8156.343262, avg loss: 4.982494, ppl: 145.837692 +epoch: 0, batch: 3643, sum loss: 9263.850586, avg loss: 5.106864, ppl: 165.151627 +epoch: 0, batch: 3644, sum loss: 8704.578125, avg loss: 4.945783, ppl: 140.580841 +epoch: 0, batch: 3645, sum loss: 9364.781250, avg loss: 4.744063, ppl: 114.900139 +epoch: 0, batch: 3646, sum loss: 7345.150879, avg loss: 4.775781, ppl: 118.602867 +epoch: 0, batch: 3647, sum loss: 7893.877930, avg loss: 4.825109, ppl: 124.600052 +epoch: 0, batch: 3648, sum loss: 8649.157227, avg loss: 4.905931, ppl: 135.088684 +epoch: 0, batch: 3649, sum loss: 7735.128906, avg loss: 4.471173, ppl: 87.459236 +epoch: 0, batch: 3650, sum loss: 7340.770508, avg loss: 4.579395, ppl: 97.455444 +epoch: 0, batch: 3651, sum loss: 7412.890137, avg loss: 4.748809, ppl: 115.446686 +epoch: 0, batch: 3652, sum loss: 8771.717773, avg loss: 4.981100, ppl: 145.634506 +epoch: 0, batch: 3653, sum loss: 9583.403320, avg loss: 4.820625, ppl: 124.042633 +epoch: 0, batch: 3654, sum loss: 8854.147461, avg loss: 4.897205, ppl: 133.915009 +epoch: 0, batch: 3655, sum loss: 7756.043945, avg loss: 4.608463, ppl: 100.329849 +epoch: 0, batch: 3656, sum loss: 7100.085938, avg loss: 4.739710, ppl: 114.401001 +epoch: 0, batch: 3657, sum loss: 8461.253906, avg loss: 4.956798, ppl: 142.137955 +epoch: 0, batch: 3658, sum loss: 7992.370117, avg loss: 4.846798, ppl: 127.332008 +epoch: 0, batch: 3659, sum loss: 8563.894531, avg loss: 5.155867, ppl: 173.446060 +epoch: 0, batch: 3660, sum loss: 7207.738770, avg loss: 4.808365, ppl: 122.531097 +epoch: 0, batch: 3661, sum loss: 7054.826172, avg loss: 4.786178, ppl: 119.842468 +epoch: 0, batch: 3662, sum loss: 7444.138184, avg loss: 5.002781, ppl: 148.826462 +epoch: 0, batch: 3663, sum loss: 7553.902832, avg loss: 4.697701, ppl: 109.694641 +epoch: 0, batch: 3664, sum loss: 9321.298828, avg loss: 4.908530, ppl: 135.440201 +epoch: 0, batch: 3665, sum loss: 8344.912109, avg loss: 5.094573, ppl: 163.134247 +epoch: 0, batch: 3666, sum loss: 8239.214844, avg loss: 4.843748, ppl: 126.944199 +epoch: 0, batch: 3667, sum loss: 8305.805664, avg loss: 4.684606, ppl: 108.267563 +epoch: 0, batch: 3668, sum loss: 7444.428223, avg loss: 5.102418, ppl: 164.419067 +epoch: 0, batch: 3669, sum loss: 8249.372070, avg loss: 4.722022, ppl: 112.395241 +epoch: 0, batch: 3670, sum loss: 7754.728027, avg loss: 4.904951, ppl: 134.956314 +epoch: 0, batch: 3671, sum loss: 8058.352539, avg loss: 4.860285, ppl: 129.061020 +epoch: 0, batch: 3672, sum loss: 8026.072266, avg loss: 5.057387, ppl: 157.179245 +epoch: 0, batch: 3673, sum loss: 8229.634766, avg loss: 4.927925, ppl: 138.092682 +epoch: 0, batch: 3674, sum loss: 7582.164551, avg loss: 4.774663, ppl: 118.470383 +epoch: 0, batch: 3675, sum loss: 7974.150391, avg loss: 4.757846, ppl: 116.494766 +epoch: 0, batch: 3676, sum loss: 7824.735352, avg loss: 4.708024, ppl: 110.832939 +epoch: 0, batch: 3677, sum loss: 6717.768555, avg loss: 4.598062, ppl: 99.291702 +epoch: 0, batch: 3678, sum loss: 9010.644531, avg loss: 5.039510, ppl: 154.394379 +epoch: 0, batch: 3679, sum loss: 6995.651367, avg loss: 4.831251, ppl: 125.367714 +epoch: 0, batch: 3680, sum loss: 8438.360352, avg loss: 4.926071, ppl: 137.836914 +epoch: 0, batch: 3681, sum loss: 8132.647461, avg loss: 4.887409, ppl: 132.609497 +epoch: 0, batch: 3682, sum loss: 8853.722656, avg loss: 4.843393, ppl: 126.899231 +epoch: 0, batch: 3683, sum loss: 7814.583008, avg loss: 4.808974, ppl: 122.605789 +epoch: 0, batch: 3684, sum loss: 7403.275391, avg loss: 4.736580, ppl: 114.043549 +epoch: 0, batch: 3685, sum loss: 8223.568359, avg loss: 4.701869, ppl: 110.152802 +epoch: 0, batch: 3686, sum loss: 8393.050781, avg loss: 5.040872, ppl: 154.604706 +epoch: 0, batch: 3687, sum loss: 7939.318359, avg loss: 5.050457, ppl: 156.093781 +epoch: 0, batch: 3688, sum loss: 7151.744141, avg loss: 4.777384, ppl: 118.793159 +epoch: 0, batch: 3689, sum loss: 8574.102539, avg loss: 4.849606, ppl: 127.690018 +epoch: 0, batch: 3690, sum loss: 7957.969238, avg loss: 4.708858, ppl: 110.925362 +epoch: 0, batch: 3691, sum loss: 8359.738281, avg loss: 5.138130, ppl: 170.396866 +epoch: 0, batch: 3692, sum loss: 8594.139648, avg loss: 5.115560, ppl: 166.593979 +epoch: 0, batch: 3693, sum loss: 7164.579590, avg loss: 4.808443, ppl: 122.540680 +epoch: 0, batch: 3694, sum loss: 9011.744141, avg loss: 5.051426, ppl: 156.245178 +epoch: 0, batch: 3695, sum loss: 8315.479492, avg loss: 4.845850, ppl: 127.211426 +epoch: 0, batch: 3696, sum loss: 7799.984375, avg loss: 4.949229, ppl: 141.066132 +epoch: 0, batch: 3697, sum loss: 7918.578613, avg loss: 4.952207, ppl: 141.486832 +epoch: 0, batch: 3698, sum loss: 7689.875488, avg loss: 4.683238, ppl: 108.119553 +epoch: 0, batch: 3699, sum loss: 8259.602539, avg loss: 4.990696, ppl: 147.038788 +epoch: 0, batch: 3700, sum loss: 7133.624512, avg loss: 4.787668, ppl: 120.021126 +epoch: 0, batch: 3701, sum loss: 9400.566406, avg loss: 5.156647, ppl: 173.581406 +epoch: 0, batch: 3702, sum loss: 7408.592773, avg loss: 4.801421, ppl: 121.683228 +epoch: 0, batch: 3703, sum loss: 7734.385254, avg loss: 4.961120, ppl: 142.753540 +epoch: 0, batch: 3704, sum loss: 7862.506348, avg loss: 4.753632, ppl: 116.004799 +epoch: 0, batch: 3705, sum loss: 8310.037109, avg loss: 4.784132, ppl: 119.597511 +epoch: 0, batch: 3706, sum loss: 8418.855469, avg loss: 4.835644, ppl: 125.919617 +epoch: 0, batch: 3707, sum loss: 7818.853516, avg loss: 4.811602, ppl: 122.928406 +epoch: 0, batch: 3708, sum loss: 8036.155762, avg loss: 4.921100, ppl: 137.153351 +epoch: 0, batch: 3709, sum loss: 6871.755859, avg loss: 4.911906, ppl: 135.898163 +epoch: 0, batch: 3710, sum loss: 9772.434570, avg loss: 5.111106, ppl: 165.853668 +epoch: 0, batch: 3711, sum loss: 8744.267578, avg loss: 4.988173, ppl: 146.668289 +epoch: 0, batch: 3712, sum loss: 9205.979492, avg loss: 4.881219, ppl: 131.791275 +epoch: 0, batch: 3713, sum loss: 7402.727539, avg loss: 4.832067, ppl: 125.470100 +epoch: 0, batch: 3714, sum loss: 7342.972656, avg loss: 4.853254, ppl: 128.156708 +epoch: 0, batch: 3715, sum loss: 9090.937500, avg loss: 4.877112, ppl: 131.251114 +epoch: 0, batch: 3716, sum loss: 8227.631836, avg loss: 4.777951, ppl: 118.860580 +epoch: 0, batch: 3717, sum loss: 7167.815918, avg loss: 4.943322, ppl: 140.235291 +epoch: 0, batch: 3718, sum loss: 10014.126953, avg loss: 4.964862, ppl: 143.288818 +epoch: 0, batch: 3719, sum loss: 8081.390137, avg loss: 5.003957, ppl: 149.001556 +epoch: 0, batch: 3720, sum loss: 9590.164062, avg loss: 5.052774, ppl: 156.455872 +epoch: 0, batch: 3721, sum loss: 7494.053711, avg loss: 4.910913, ppl: 135.763367 +epoch: 0, batch: 3722, sum loss: 7446.463379, avg loss: 4.964309, ppl: 143.209518 +epoch: 0, batch: 3723, sum loss: 7966.416992, avg loss: 4.957323, ppl: 142.212524 +epoch: 0, batch: 3724, sum loss: 6976.641113, avg loss: 4.682310, ppl: 108.019272 +epoch: 0, batch: 3725, sum loss: 8559.276367, avg loss: 4.896612, ppl: 133.835541 +epoch: 0, batch: 3726, sum loss: 7104.694824, avg loss: 4.913344, ppl: 136.093735 +epoch: 0, batch: 3727, sum loss: 9257.547852, avg loss: 4.921610, ppl: 137.223343 +epoch: 0, batch: 3728, sum loss: 8562.127930, avg loss: 4.963552, ppl: 143.101257 +epoch: 0, batch: 3729, sum loss: 10113.973633, avg loss: 4.902556, ppl: 134.633514 +epoch: 0, batch: 3730, sum loss: 7971.619629, avg loss: 4.555211, ppl: 95.126831 +epoch: 0, batch: 3731, sum loss: 8869.621094, avg loss: 4.949565, ppl: 141.113617 +epoch: 0, batch: 3732, sum loss: 8440.999023, avg loss: 4.884838, ppl: 132.268982 +epoch: 0, batch: 3733, sum loss: 7212.705078, avg loss: 4.742081, ppl: 114.672607 +epoch: 0, batch: 3734, sum loss: 7276.099121, avg loss: 4.691231, ppl: 108.987236 +epoch: 0, batch: 3735, sum loss: 7603.441406, avg loss: 4.883392, ppl: 132.077957 +epoch: 0, batch: 3736, sum loss: 7718.422852, avg loss: 4.627352, ppl: 102.242943 +epoch: 0, batch: 3737, sum loss: 8415.209961, avg loss: 4.889721, ppl: 132.916473 +epoch: 0, batch: 3738, sum loss: 9536.430664, avg loss: 4.951418, ppl: 141.375275 +epoch: 0, batch: 3739, sum loss: 7340.465820, avg loss: 4.628289, ppl: 102.338783 +epoch: 0, batch: 3740, sum loss: 8745.144531, avg loss: 5.000082, ppl: 148.425400 +epoch: 0, batch: 3741, sum loss: 8108.918945, avg loss: 4.983970, ppl: 146.053024 +epoch: 0, batch: 3742, sum loss: 9059.248047, avg loss: 4.857506, ppl: 128.702789 +epoch: 0, batch: 3743, sum loss: 7469.755371, avg loss: 4.751753, ppl: 115.787064 +epoch: 0, batch: 3744, sum loss: 9032.472656, avg loss: 4.979312, ppl: 145.374390 +epoch: 0, batch: 3745, sum loss: 9860.033203, avg loss: 4.871558, ppl: 130.524139 +epoch: 0, batch: 3746, sum loss: 9388.555664, avg loss: 4.905201, ppl: 134.990097 +epoch: 0, batch: 3747, sum loss: 9314.655273, avg loss: 5.048594, ppl: 155.803177 +epoch: 0, batch: 3748, sum loss: 7170.020996, avg loss: 4.649819, ppl: 104.566048 +epoch: 0, batch: 3749, sum loss: 8133.283203, avg loss: 4.867315, ppl: 129.971512 +epoch: 0, batch: 3750, sum loss: 7197.080566, avg loss: 4.939657, ppl: 139.722351 +epoch: 0, batch: 3751, sum loss: 8286.859375, avg loss: 5.016259, ppl: 150.845886 +epoch: 0, batch: 3752, sum loss: 9837.496094, avg loss: 5.099791, ppl: 163.987564 +epoch: 0, batch: 3753, sum loss: 8586.644531, avg loss: 5.065867, ppl: 158.517807 +epoch: 0, batch: 3754, sum loss: 9117.078125, avg loss: 5.023184, ppl: 151.894135 +epoch: 0, batch: 3755, sum loss: 8660.370117, avg loss: 4.857191, ppl: 128.662231 +epoch: 0, batch: 3756, sum loss: 7227.712891, avg loss: 4.684195, ppl: 108.223122 +epoch: 0, batch: 3757, sum loss: 8280.705078, avg loss: 4.896928, ppl: 133.877914 +epoch: 0, batch: 3758, sum loss: 7354.933594, avg loss: 4.922981, ppl: 137.411591 +epoch: 0, batch: 3759, sum loss: 8020.242676, avg loss: 4.831471, ppl: 125.395340 +epoch: 0, batch: 3760, sum loss: 8550.449219, avg loss: 5.110848, ppl: 165.810974 +epoch: 0, batch: 3761, sum loss: 8138.214844, avg loss: 4.998903, ppl: 148.250473 +epoch: 0, batch: 3762, sum loss: 7396.968262, avg loss: 4.702459, ppl: 110.217850 +epoch: 0, batch: 3763, sum loss: 8248.743164, avg loss: 4.524818, ppl: 92.279121 +epoch: 0, batch: 3764, sum loss: 8633.092773, avg loss: 4.944498, ppl: 140.400299 +epoch: 0, batch: 3765, sum loss: 7307.514648, avg loss: 4.845832, ppl: 127.209061 +epoch: 0, batch: 3766, sum loss: 8276.998047, avg loss: 4.762370, ppl: 117.022896 +epoch: 0, batch: 3767, sum loss: 7737.136230, avg loss: 4.714891, ppl: 111.596642 +epoch: 0, batch: 3768, sum loss: 6350.266602, avg loss: 4.652210, ppl: 104.816399 +epoch: 0, batch: 3769, sum loss: 8051.177246, avg loss: 4.870646, ppl: 130.405075 +epoch: 0, batch: 3770, sum loss: 9242.421875, avg loss: 5.123294, ppl: 167.887466 +epoch: 0, batch: 3771, sum loss: 8254.676758, avg loss: 4.698165, ppl: 109.745651 +epoch: 0, batch: 3772, sum loss: 8591.609375, avg loss: 4.903887, ppl: 134.812759 +epoch: 0, batch: 3773, sum loss: 8424.105469, avg loss: 4.852595, ppl: 128.072342 +epoch: 0, batch: 3774, sum loss: 8855.727539, avg loss: 4.865785, ppl: 129.772720 +epoch: 0, batch: 3775, sum loss: 9096.585938, avg loss: 4.919733, ppl: 136.965973 +epoch: 0, batch: 3776, sum loss: 8429.483398, avg loss: 4.869719, ppl: 130.284241 +epoch: 0, batch: 3777, sum loss: 8208.039062, avg loss: 4.917939, ppl: 136.720505 +epoch: 0, batch: 3778, sum loss: 7797.539062, avg loss: 4.855255, ppl: 128.413391 +epoch: 0, batch: 3779, sum loss: 7121.721680, avg loss: 4.867889, ppl: 130.046158 +epoch: 0, batch: 3780, sum loss: 6856.793457, avg loss: 4.626717, ppl: 102.178024 +epoch: 0, batch: 3781, sum loss: 7919.450684, avg loss: 4.837783, ppl: 126.189323 +epoch: 0, batch: 3782, sum loss: 7445.109863, avg loss: 4.853396, ppl: 128.174927 +epoch: 0, batch: 3783, sum loss: 9068.186523, avg loss: 4.977051, ppl: 145.045975 +epoch: 0, batch: 3784, sum loss: 8253.484375, avg loss: 4.798537, ppl: 121.332809 +epoch: 0, batch: 3785, sum loss: 7670.696289, avg loss: 4.697303, ppl: 109.651077 +epoch: 0, batch: 3786, sum loss: 8921.277344, avg loss: 4.722752, ppl: 112.477325 +epoch: 0, batch: 3787, sum loss: 7635.895020, avg loss: 4.981014, ppl: 145.622009 +epoch: 0, batch: 3788, sum loss: 9708.881836, avg loss: 5.017510, ppl: 151.034744 +epoch: 0, batch: 3789, sum loss: 9789.644531, avg loss: 5.074984, ppl: 159.969650 +epoch: 0, batch: 3790, sum loss: 6764.348145, avg loss: 4.828228, ppl: 124.989342 +epoch: 0, batch: 3791, sum loss: 8442.384766, avg loss: 4.877172, ppl: 131.258942 +epoch: 0, batch: 3792, sum loss: 8298.726562, avg loss: 4.896004, ppl: 133.754196 +epoch: 0, batch: 3793, sum loss: 8970.922852, avg loss: 4.959051, ppl: 142.458557 +epoch: 0, batch: 3794, sum loss: 8693.253906, avg loss: 5.071910, ppl: 159.478699 +epoch: 0, batch: 3795, sum loss: 7337.031250, avg loss: 4.798582, ppl: 121.338188 +epoch: 0, batch: 3796, sum loss: 9786.643555, avg loss: 4.849675, ppl: 127.698845 +epoch: 0, batch: 3797, sum loss: 8260.145508, avg loss: 5.042824, ppl: 154.906830 +epoch: 0, batch: 3798, sum loss: 8125.369629, avg loss: 4.796558, ppl: 121.092888 +epoch: 0, batch: 3799, sum loss: 7772.818848, avg loss: 4.815873, ppl: 123.454559 +epoch: 0, batch: 3800, sum loss: 8781.161133, avg loss: 4.913912, ppl: 136.171112 +epoch: 0, batch: 3801, sum loss: 8391.290039, avg loss: 4.930253, ppl: 138.414459 +epoch: 0, batch: 3802, sum loss: 6850.831055, avg loss: 4.669960, ppl: 106.693428 +epoch: 0, batch: 3803, sum loss: 8201.286133, avg loss: 4.796074, ppl: 121.034294 +epoch: 0, batch: 3804, sum loss: 7766.589844, avg loss: 4.851087, ppl: 127.879333 +epoch: 0, batch: 3805, sum loss: 9111.924805, avg loss: 4.998313, ppl: 148.163055 +epoch: 0, batch: 3806, sum loss: 8180.773926, avg loss: 4.789680, ppl: 120.262878 +epoch: 0, batch: 3807, sum loss: 9424.186523, avg loss: 4.986342, ppl: 146.399902 +epoch: 0, batch: 3808, sum loss: 7805.525879, avg loss: 4.662799, ppl: 105.932213 +epoch: 0, batch: 3809, sum loss: 8114.802734, avg loss: 4.975354, ppl: 144.800110 +epoch: 0, batch: 3810, sum loss: 8284.557617, avg loss: 4.811009, ppl: 122.855507 +epoch: 0, batch: 3811, sum loss: 8507.758789, avg loss: 4.883903, ppl: 132.145355 +epoch: 0, batch: 3812, sum loss: 8712.587891, avg loss: 5.077266, ppl: 160.335052 +epoch: 0, batch: 3813, sum loss: 8155.757812, avg loss: 4.979095, ppl: 145.342789 +epoch: 0, batch: 3814, sum loss: 8291.497070, avg loss: 4.920770, ppl: 137.108109 +epoch: 0, batch: 3815, sum loss: 8341.576172, avg loss: 4.977074, ppl: 145.049362 +epoch: 0, batch: 3816, sum loss: 8340.678711, avg loss: 4.779758, ppl: 119.075584 +epoch: 0, batch: 3817, sum loss: 7134.877441, avg loss: 4.669423, ppl: 106.636154 +epoch: 0, batch: 3818, sum loss: 7930.300781, avg loss: 4.751529, ppl: 115.761116 +epoch: 0, batch: 3819, sum loss: 8314.665039, avg loss: 4.893858, ppl: 133.467560 +epoch: 0, batch: 3820, sum loss: 8273.846680, avg loss: 4.793654, ppl: 120.741753 +epoch: 0, batch: 3821, sum loss: 7627.679199, avg loss: 4.797282, ppl: 121.180626 +epoch: 0, batch: 3822, sum loss: 9042.083984, avg loss: 5.028967, ppl: 152.775177 +epoch: 0, batch: 3823, sum loss: 8197.937500, avg loss: 5.026326, ppl: 152.372116 +epoch: 0, batch: 3824, sum loss: 8006.851074, avg loss: 4.939451, ppl: 139.693573 +epoch: 0, batch: 3825, sum loss: 7784.764648, avg loss: 4.905334, ppl: 135.007996 +epoch: 0, batch: 3826, sum loss: 9440.846680, avg loss: 4.772925, ppl: 118.264648 +epoch: 0, batch: 3827, sum loss: 8079.657715, avg loss: 4.700208, ppl: 109.970062 +epoch: 0, batch: 3828, sum loss: 8173.148926, avg loss: 4.899970, ppl: 134.285690 +epoch: 0, batch: 3829, sum loss: 8333.274414, avg loss: 4.896166, ppl: 133.775879 +epoch: 0, batch: 3830, sum loss: 8318.510742, avg loss: 4.858943, ppl: 128.887894 +epoch: 0, batch: 3831, sum loss: 7820.084961, avg loss: 4.833180, ppl: 125.609756 +epoch: 0, batch: 3832, sum loss: 7280.852539, avg loss: 4.640441, ppl: 103.590065 +epoch: 0, batch: 3833, sum loss: 6290.291504, avg loss: 4.676797, ppl: 107.425430 +epoch: 0, batch: 3834, sum loss: 8486.033203, avg loss: 4.748760, ppl: 115.441017 +epoch: 0, batch: 3835, sum loss: 8528.697266, avg loss: 4.821197, ppl: 124.113571 +epoch: 0, batch: 3836, sum loss: 9014.193359, avg loss: 5.058470, ppl: 157.349625 +epoch: 0, batch: 3837, sum loss: 8337.615234, avg loss: 4.783485, ppl: 119.520203 +epoch: 0, batch: 3838, sum loss: 9198.172852, avg loss: 4.990870, ppl: 147.064240 +epoch: 0, batch: 3839, sum loss: 7090.126465, avg loss: 4.774496, ppl: 118.450554 +epoch: 0, batch: 3840, sum loss: 7514.344238, avg loss: 4.761942, ppl: 116.972855 +epoch: 0, batch: 3841, sum loss: 8975.568359, avg loss: 4.891318, ppl: 133.128967 +epoch: 0, batch: 3842, sum loss: 8296.309570, avg loss: 5.015907, ppl: 150.792816 +epoch: 0, batch: 3843, sum loss: 5752.264160, avg loss: 4.344610, ppl: 77.061996 +epoch: 0, batch: 3844, sum loss: 6689.345703, avg loss: 4.812479, ppl: 123.036247 +epoch: 0, batch: 3845, sum loss: 8011.150391, avg loss: 4.917833, ppl: 136.706100 +epoch: 0, batch: 3846, sum loss: 8576.434570, avg loss: 4.668718, ppl: 106.561028 +epoch: 0, batch: 3847, sum loss: 8756.648438, avg loss: 4.792911, ppl: 120.652084 +epoch: 0, batch: 3848, sum loss: 6649.868652, avg loss: 4.726275, ppl: 112.874321 +epoch: 0, batch: 3849, sum loss: 6889.539551, avg loss: 4.807774, ppl: 122.458733 +epoch: 0, batch: 3850, sum loss: 8480.074219, avg loss: 4.868011, ppl: 130.061905 +epoch: 0, batch: 3851, sum loss: 8057.850586, avg loss: 4.657717, ppl: 105.395218 +epoch: 0, batch: 3852, sum loss: 8032.149902, avg loss: 4.967316, ppl: 143.640854 +epoch: 0, batch: 3853, sum loss: 7610.232422, avg loss: 4.674590, ppl: 107.188568 +epoch: 0, batch: 3854, sum loss: 7472.582520, avg loss: 4.852326, ppl: 128.037903 +epoch: 0, batch: 3855, sum loss: 7582.451660, avg loss: 4.863664, ppl: 129.497772 +epoch: 0, batch: 3856, sum loss: 9365.984375, avg loss: 4.878117, ppl: 131.383041 +epoch: 0, batch: 3857, sum loss: 9076.001953, avg loss: 4.871713, ppl: 130.544373 +epoch: 0, batch: 3858, sum loss: 7003.365723, avg loss: 4.681394, ppl: 107.920418 +epoch: 0, batch: 3859, sum loss: 7460.201660, avg loss: 4.803736, ppl: 121.965256 +epoch: 0, batch: 3860, sum loss: 8993.180664, avg loss: 5.095286, ppl: 163.250595 +epoch: 0, batch: 3861, sum loss: 7322.950195, avg loss: 4.685189, ppl: 108.330719 +epoch: 0, batch: 3862, sum loss: 9926.580078, avg loss: 4.923899, ppl: 137.537857 +epoch: 0, batch: 3863, sum loss: 8267.223633, avg loss: 4.886066, ppl: 132.431564 +epoch: 0, batch: 3864, sum loss: 9051.501953, avg loss: 4.879516, ppl: 131.566925 +epoch: 0, batch: 3865, sum loss: 8101.048340, avg loss: 4.807744, ppl: 122.455048 +epoch: 0, batch: 3866, sum loss: 8836.175781, avg loss: 4.919920, ppl: 136.991653 +epoch: 0, batch: 3867, sum loss: 8868.393555, avg loss: 5.108521, ppl: 165.425583 +epoch: 0, batch: 3868, sum loss: 9033.890625, avg loss: 4.843909, ppl: 126.964722 +epoch: 0, batch: 3869, sum loss: 9436.138672, avg loss: 4.879079, ppl: 131.509476 +epoch: 0, batch: 3870, sum loss: 7240.558594, avg loss: 4.629513, ppl: 102.464180 +epoch: 0, batch: 3871, sum loss: 7565.337891, avg loss: 4.874574, ppl: 130.918335 +epoch: 0, batch: 3872, sum loss: 9104.342773, avg loss: 4.975051, ppl: 144.756195 +epoch: 0, batch: 3873, sum loss: 6982.966797, avg loss: 4.766530, ppl: 117.510773 +epoch: 0, batch: 3874, sum loss: 7424.029297, avg loss: 4.792789, ppl: 120.637299 +epoch: 0, batch: 3875, sum loss: 7685.026367, avg loss: 4.797145, ppl: 121.163986 +epoch: 0, batch: 3876, sum loss: 9097.281250, avg loss: 4.925437, ppl: 137.749527 +epoch: 0, batch: 3877, sum loss: 7941.018555, avg loss: 4.847996, ppl: 127.484688 +epoch: 0, batch: 3878, sum loss: 6352.342773, avg loss: 4.486118, ppl: 88.776131 +epoch: 0, batch: 3879, sum loss: 6862.590332, avg loss: 4.749197, ppl: 115.491562 +epoch: 0, batch: 3880, sum loss: 7407.990234, avg loss: 4.730517, ppl: 113.354195 +epoch: 0, batch: 3881, sum loss: 9240.480469, avg loss: 4.830361, ppl: 125.256157 +epoch: 0, batch: 3882, sum loss: 9654.068359, avg loss: 4.933095, ppl: 138.808517 +epoch: 0, batch: 3883, sum loss: 8692.554688, avg loss: 4.869779, ppl: 130.292068 +epoch: 0, batch: 3884, sum loss: 7207.109863, avg loss: 5.057621, ppl: 157.215973 +epoch: 0, batch: 3885, sum loss: 8511.627930, avg loss: 4.942873, ppl: 140.172455 +epoch: 0, batch: 3886, sum loss: 7696.366211, avg loss: 4.589365, ppl: 98.431953 +epoch: 0, batch: 3887, sum loss: 8340.549805, avg loss: 4.849157, ppl: 127.632729 +epoch: 0, batch: 3888, sum loss: 8246.242188, avg loss: 4.808304, ppl: 122.523682 +epoch: 0, batch: 3889, sum loss: 7823.121094, avg loss: 4.895570, ppl: 133.696228 +epoch: 0, batch: 3890, sum loss: 8627.535156, avg loss: 4.958354, ppl: 142.359207 +epoch: 0, batch: 3891, sum loss: 8872.803711, avg loss: 4.981923, ppl: 145.754349 +epoch: 0, batch: 3892, sum loss: 8566.753906, avg loss: 4.796615, ppl: 121.099815 +epoch: 0, batch: 3893, sum loss: 7530.055664, avg loss: 4.597103, ppl: 99.196541 +epoch: 0, batch: 3894, sum loss: 7948.614258, avg loss: 4.753956, ppl: 116.042480 +epoch: 0, batch: 3895, sum loss: 8240.726562, avg loss: 4.616654, ppl: 101.154991 +epoch: 0, batch: 3896, sum loss: 7965.278320, avg loss: 4.671718, ppl: 106.881172 +epoch: 0, batch: 3897, sum loss: 7853.084473, avg loss: 4.773911, ppl: 118.381386 +epoch: 0, batch: 3898, sum loss: 7886.731445, avg loss: 4.794365, ppl: 120.827682 +epoch: 0, batch: 3899, sum loss: 9238.173828, avg loss: 4.966760, ppl: 143.561020 +epoch: 0, batch: 3900, sum loss: 9463.206055, avg loss: 5.036299, ppl: 153.899414 +epoch: 0, batch: 3901, sum loss: 6294.353516, avg loss: 4.614629, ppl: 100.950348 +epoch: 0, batch: 3902, sum loss: 8290.425781, avg loss: 4.800478, ppl: 121.568565 +epoch: 0, batch: 3903, sum loss: 8966.990234, avg loss: 4.881323, ppl: 131.804977 +epoch: 0, batch: 3904, sum loss: 8052.084473, avg loss: 4.643647, ppl: 103.922684 +epoch: 0, batch: 3905, sum loss: 8508.511719, avg loss: 4.867570, ppl: 130.004608 +epoch: 0, batch: 3906, sum loss: 9095.494141, avg loss: 5.027913, ppl: 152.614182 +epoch: 0, batch: 3907, sum loss: 7850.934570, avg loss: 4.778414, ppl: 118.915627 +epoch: 0, batch: 3908, sum loss: 7713.273438, avg loss: 4.960305, ppl: 142.637253 +epoch: 0, batch: 3909, sum loss: 9656.298828, avg loss: 5.119989, ppl: 167.333511 +epoch: 0, batch: 3910, sum loss: 8606.068359, avg loss: 5.035734, ppl: 153.812408 +epoch: 0, batch: 3911, sum loss: 7841.203125, avg loss: 4.766689, ppl: 117.529434 +epoch: 0, batch: 3912, sum loss: 7754.943848, avg loss: 4.778154, ppl: 118.884674 +epoch: 0, batch: 3913, sum loss: 9883.753906, avg loss: 4.979221, ppl: 145.361145 +epoch: 0, batch: 3914, sum loss: 6853.405762, avg loss: 4.615088, ppl: 100.996712 +epoch: 0, batch: 3915, sum loss: 8108.009277, avg loss: 4.852190, ppl: 128.020447 +epoch: 0, batch: 3916, sum loss: 7134.883301, avg loss: 4.731355, ppl: 113.449150 +epoch: 0, batch: 3917, sum loss: 7484.303223, avg loss: 4.761007, ppl: 116.863586 +epoch: 0, batch: 3918, sum loss: 8944.095703, avg loss: 4.762564, ppl: 117.045670 +epoch: 0, batch: 3919, sum loss: 8163.175781, avg loss: 4.938400, ppl: 139.546829 +epoch: 0, batch: 3920, sum loss: 8883.826172, avg loss: 4.883907, ppl: 132.145935 +epoch: 0, batch: 3921, sum loss: 9521.689453, avg loss: 4.746605, ppl: 115.192535 +epoch: 0, batch: 3922, sum loss: 7529.602539, avg loss: 4.851548, ppl: 127.938309 +epoch: 0, batch: 3923, sum loss: 8818.154297, avg loss: 4.893538, ppl: 133.424728 +epoch: 0, batch: 3924, sum loss: 8034.881348, avg loss: 5.098275, ppl: 163.739166 +epoch: 0, batch: 3925, sum loss: 7680.200684, avg loss: 4.782192, ppl: 119.365738 +epoch: 0, batch: 3926, sum loss: 7633.891602, avg loss: 4.804211, ppl: 122.023193 +epoch: 0, batch: 3927, sum loss: 8468.985352, avg loss: 4.831139, ppl: 125.353607 +epoch: 0, batch: 3928, sum loss: 7971.319824, avg loss: 4.813599, ppl: 123.174080 +epoch: 0, batch: 3929, sum loss: 6884.082520, avg loss: 4.613997, ppl: 100.886536 +epoch: 0, batch: 3930, sum loss: 8168.580078, avg loss: 4.716270, ppl: 111.750694 +epoch: 0, batch: 3931, sum loss: 7678.098633, avg loss: 4.807827, ppl: 122.465149 +epoch: 0, batch: 3932, sum loss: 8080.775391, avg loss: 4.862079, ppl: 129.292740 +epoch: 0, batch: 3933, sum loss: 9023.718750, avg loss: 5.055305, ppl: 156.852371 +epoch: 0, batch: 3934, sum loss: 7314.065430, avg loss: 4.743233, ppl: 114.804787 +epoch: 0, batch: 3935, sum loss: 7225.884277, avg loss: 4.611286, ppl: 100.613426 +epoch: 0, batch: 3936, sum loss: 9201.991211, avg loss: 4.915594, ppl: 136.400253 +epoch: 0, batch: 3937, sum loss: 8372.616211, avg loss: 4.743692, ppl: 114.857468 +epoch: 0, batch: 3938, sum loss: 9372.791016, avg loss: 5.006833, ppl: 149.430756 +epoch: 0, batch: 3939, sum loss: 7755.356445, avg loss: 4.674717, ppl: 107.202271 +epoch: 0, batch: 3940, sum loss: 9569.316406, avg loss: 5.028543, ppl: 152.710281 +epoch: 0, batch: 3941, sum loss: 8671.515625, avg loss: 4.943851, ppl: 140.309616 +epoch: 0, batch: 3942, sum loss: 8865.050781, avg loss: 4.678127, ppl: 107.568390 +epoch: 0, batch: 3943, sum loss: 8510.206055, avg loss: 4.857424, ppl: 128.692230 +epoch: 0, batch: 3944, sum loss: 8781.593750, avg loss: 4.983879, ppl: 146.039719 +epoch: 0, batch: 3945, sum loss: 7611.172852, avg loss: 4.829424, ppl: 125.138908 +epoch: 0, batch: 3946, sum loss: 7580.592773, avg loss: 4.755704, ppl: 116.245506 +epoch: 0, batch: 3947, sum loss: 8626.372070, avg loss: 4.857192, ppl: 128.662354 +epoch: 0, batch: 3948, sum loss: 8994.294922, avg loss: 4.874957, ppl: 130.968475 +epoch: 0, batch: 3949, sum loss: 8077.762695, avg loss: 4.710065, ppl: 111.059418 +epoch: 0, batch: 3950, sum loss: 7936.109863, avg loss: 4.839091, ppl: 126.354485 +epoch: 0, batch: 3951, sum loss: 8599.827148, avg loss: 4.982519, ppl: 145.841248 +epoch: 0, batch: 3952, sum loss: 7933.485352, avg loss: 4.958428, ppl: 142.369873 +epoch: 0, batch: 3953, sum loss: 7953.640137, avg loss: 4.791349, ppl: 120.463814 +epoch: 0, batch: 3954, sum loss: 7507.449707, avg loss: 4.482060, ppl: 88.416618 +epoch: 0, batch: 3955, sum loss: 6793.941406, avg loss: 4.704946, ppl: 110.492271 +epoch: 0, batch: 3956, sum loss: 8908.997070, avg loss: 4.870966, ppl: 130.446930 +epoch: 0, batch: 3957, sum loss: 8401.623047, avg loss: 4.733308, ppl: 113.670998 +epoch: 0, batch: 3958, sum loss: 8553.902344, avg loss: 4.860172, ppl: 129.046371 +epoch: 0, batch: 3959, sum loss: 7184.188965, avg loss: 4.704773, ppl: 110.473251 +epoch: 0, batch: 3960, sum loss: 7229.703613, avg loss: 4.622572, ppl: 101.755455 +epoch: 0, batch: 3961, sum loss: 6906.291016, avg loss: 4.573703, ppl: 96.902260 +epoch: 0, batch: 3962, sum loss: 7235.306152, avg loss: 4.955689, ppl: 141.980453 +epoch: 0, batch: 3963, sum loss: 6748.914551, avg loss: 4.746072, ppl: 115.131142 +epoch: 0, batch: 3964, sum loss: 9611.801758, avg loss: 4.813121, ppl: 123.115242 +epoch: 0, batch: 3965, sum loss: 7284.312988, avg loss: 4.776598, ppl: 118.699898 +epoch: 0, batch: 3966, sum loss: 7199.412109, avg loss: 4.924359, ppl: 137.601089 +epoch: 0, batch: 3967, sum loss: 7176.852051, avg loss: 4.842680, ppl: 126.808800 +epoch: 0, batch: 3968, sum loss: 9520.156250, avg loss: 5.140473, ppl: 170.796524 +epoch: 0, batch: 3969, sum loss: 8106.811035, avg loss: 4.892463, ppl: 133.281403 +epoch: 0, batch: 3970, sum loss: 6534.853516, avg loss: 4.684483, ppl: 108.254295 +epoch: 0, batch: 3971, sum loss: 8235.481445, avg loss: 4.850107, ppl: 127.754082 +epoch: 0, batch: 3972, sum loss: 8682.029297, avg loss: 4.804665, ppl: 122.078537 +epoch: 0, batch: 3973, sum loss: 8056.874512, avg loss: 4.804338, ppl: 122.038612 +epoch: 0, batch: 3974, sum loss: 6073.102051, avg loss: 4.498594, ppl: 89.890678 +epoch: 0, batch: 3975, sum loss: 8161.460449, avg loss: 4.781172, ppl: 119.243996 +epoch: 0, batch: 3976, sum loss: 8155.328125, avg loss: 4.831356, ppl: 125.380806 +epoch: 0, batch: 3977, sum loss: 7833.317871, avg loss: 4.853357, ppl: 128.169907 +epoch: 0, batch: 3978, sum loss: 8921.893555, avg loss: 4.788993, ppl: 120.180275 +epoch: 0, batch: 3979, sum loss: 8408.723633, avg loss: 4.750691, ppl: 115.664230 +epoch: 0, batch: 3980, sum loss: 8233.338867, avg loss: 4.809193, ppl: 122.632629 +epoch: 0, batch: 3981, sum loss: 7130.309082, avg loss: 4.603169, ppl: 99.800079 +epoch: 0, batch: 3982, sum loss: 9512.057617, avg loss: 4.727663, ppl: 113.031105 +epoch: 0, batch: 3983, sum loss: 8067.589844, avg loss: 4.949441, ppl: 141.096130 +epoch: 0, batch: 3984, sum loss: 8210.668945, avg loss: 4.754296, ppl: 116.081886 +epoch: 0, batch: 3985, sum loss: 8672.876953, avg loss: 4.998776, ppl: 148.231674 +epoch: 0, batch: 3986, sum loss: 8703.845703, avg loss: 4.948178, ppl: 140.918015 +epoch: 0, batch: 3987, sum loss: 7865.207031, avg loss: 4.921907, ppl: 137.264114 +epoch: 0, batch: 3988, sum loss: 9416.207031, avg loss: 4.911950, ppl: 135.904114 +epoch: 0, batch: 3989, sum loss: 7443.566406, avg loss: 4.443921, ppl: 85.107964 +epoch: 0, batch: 3990, sum loss: 8135.771484, avg loss: 4.895169, ppl: 133.642624 +epoch: 0, batch: 3991, sum loss: 8639.709961, avg loss: 5.115281, ppl: 166.547592 +epoch: 0, batch: 3992, sum loss: 8026.271484, avg loss: 4.589063, ppl: 98.402199 +epoch: 0, batch: 3993, sum loss: 7687.597168, avg loss: 4.780844, ppl: 119.204887 +epoch: 0, batch: 3994, sum loss: 8219.340820, avg loss: 4.795415, ppl: 120.954559 +epoch: 0, batch: 3995, sum loss: 7041.244629, avg loss: 4.809593, ppl: 122.681702 +epoch: 0, batch: 3996, sum loss: 7371.425293, avg loss: 4.764981, ppl: 117.328865 +epoch: 0, batch: 3997, sum loss: 8181.928711, avg loss: 4.798785, ppl: 121.362900 +epoch: 0, batch: 3998, sum loss: 9107.937500, avg loss: 4.597646, ppl: 99.250427 +epoch: 0, batch: 3999, sum loss: 7393.863770, avg loss: 4.785673, ppl: 119.781906 +epoch: 0, batch: 4000, sum loss: 7741.916016, avg loss: 4.931157, ppl: 138.539658 +epoch: 0, batch: 4001, sum loss: 8428.785156, avg loss: 4.735272, ppl: 113.894478 +epoch: 0, batch: 4002, sum loss: 8324.193359, avg loss: 4.705593, ppl: 110.563789 +epoch: 0, batch: 4003, sum loss: 8100.265137, avg loss: 4.838868, ppl: 126.326286 +epoch: 0, batch: 4004, sum loss: 6951.436523, avg loss: 4.748249, ppl: 115.382080 +epoch: 0, batch: 4005, sum loss: 7189.466797, avg loss: 4.593908, ppl: 98.880127 +epoch: 0, batch: 4006, sum loss: 7363.942871, avg loss: 4.542840, ppl: 93.957214 +epoch: 0, batch: 4007, sum loss: 8470.984375, avg loss: 4.708718, ppl: 110.909912 +epoch: 0, batch: 4008, sum loss: 7942.911621, avg loss: 4.890955, ppl: 133.080658 +epoch: 0, batch: 4009, sum loss: 6495.851074, avg loss: 4.822458, ppl: 124.270203 +epoch: 0, batch: 4010, sum loss: 8194.722656, avg loss: 4.883625, ppl: 132.108688 +epoch: 0, batch: 4011, sum loss: 8667.674805, avg loss: 4.944481, ppl: 140.397949 +epoch: 0, batch: 4012, sum loss: 7628.652344, avg loss: 4.738293, ppl: 114.239052 +epoch: 0, batch: 4013, sum loss: 8579.608398, avg loss: 4.672989, ppl: 107.017181 +epoch: 0, batch: 4014, sum loss: 8640.513672, avg loss: 4.750145, ppl: 115.601044 +epoch: 0, batch: 4015, sum loss: 7159.612793, avg loss: 4.763548, ppl: 117.160866 +epoch: 0, batch: 4016, sum loss: 6753.164062, avg loss: 4.612817, ppl: 100.767593 +epoch: 0, batch: 4017, sum loss: 7959.760254, avg loss: 4.665744, ppl: 106.244637 +epoch: 0, batch: 4018, sum loss: 8200.039062, avg loss: 5.012249, ppl: 150.242249 +epoch: 0, batch: 4019, sum loss: 7042.680664, avg loss: 4.771464, ppl: 118.091988 +epoch: 0, batch: 4020, sum loss: 9183.789062, avg loss: 4.843771, ppl: 126.947166 +epoch: 0, batch: 4021, sum loss: 7805.973633, avg loss: 4.839413, ppl: 126.395157 +epoch: 0, batch: 4022, sum loss: 8376.316406, avg loss: 4.825067, ppl: 124.594818 +epoch: 0, batch: 4023, sum loss: 7021.328125, avg loss: 4.702832, ppl: 110.258957 +epoch: 0, batch: 4024, sum loss: 6904.338867, avg loss: 4.834971, ppl: 125.834991 +epoch: 0, batch: 4025, sum loss: 7029.766113, avg loss: 4.550011, ppl: 94.633423 +epoch: 0, batch: 4026, sum loss: 8416.765625, avg loss: 4.776825, ppl: 118.726845 +epoch: 0, batch: 4027, sum loss: 7738.122070, avg loss: 4.821259, ppl: 124.121323 +epoch: 0, batch: 4028, sum loss: 9668.368164, avg loss: 4.795818, ppl: 121.003304 +epoch: 0, batch: 4029, sum loss: 7545.631348, avg loss: 4.575883, ppl: 97.113747 +epoch: 0, batch: 4030, sum loss: 7488.183594, avg loss: 4.894238, ppl: 133.518219 +epoch: 0, batch: 4031, sum loss: 8512.861328, avg loss: 4.903722, ppl: 134.790512 +epoch: 0, batch: 4032, sum loss: 7808.668457, avg loss: 4.883470, ppl: 132.088165 +epoch: 0, batch: 4033, sum loss: 9345.375000, avg loss: 4.857264, ppl: 128.671616 +epoch: 0, batch: 4034, sum loss: 7376.752930, avg loss: 4.994417, ppl: 147.586838 +epoch: 0, batch: 4035, sum loss: 8771.059570, avg loss: 4.685395, ppl: 108.353088 +epoch: 0, batch: 4036, sum loss: 7245.728027, avg loss: 4.811241, ppl: 122.883980 +epoch: 0, batch: 4037, sum loss: 7977.943359, avg loss: 4.882463, ppl: 131.955200 +epoch: 0, batch: 4038, sum loss: 7049.398438, avg loss: 4.756680, ppl: 116.358917 +epoch: 0, batch: 4039, sum loss: 7747.909180, avg loss: 4.609107, ppl: 100.394463 +epoch: 0, batch: 4040, sum loss: 8624.806641, avg loss: 4.853577, ppl: 128.198151 +epoch: 0, batch: 4041, sum loss: 8503.479492, avg loss: 4.801513, ppl: 121.694366 +epoch: 0, batch: 4042, sum loss: 7279.945801, avg loss: 4.817966, ppl: 123.713142 +epoch: 0, batch: 4043, sum loss: 8930.086914, avg loss: 4.795965, ppl: 121.021133 +epoch: 0, batch: 4044, sum loss: 8591.643555, avg loss: 4.752015, ppl: 115.817436 +epoch: 0, batch: 4045, sum loss: 8761.275391, avg loss: 4.859276, ppl: 128.930862 +epoch: 0, batch: 4046, sum loss: 8393.165039, avg loss: 4.807082, ppl: 122.374031 +epoch: 0, batch: 4047, sum loss: 6942.703613, avg loss: 4.700544, ppl: 110.007042 +epoch: 0, batch: 4048, sum loss: 8256.439453, avg loss: 4.731484, ppl: 113.463814 +epoch: 0, batch: 4049, sum loss: 8952.738281, avg loss: 4.810714, ppl: 122.819252 +epoch: 0, batch: 4050, sum loss: 7674.975098, avg loss: 4.643058, ppl: 103.861450 +epoch: 0, batch: 4051, sum loss: 8069.662109, avg loss: 4.727395, ppl: 113.000763 +epoch: 0, batch: 4052, sum loss: 8281.753906, avg loss: 4.781613, ppl: 119.296608 +epoch: 0, batch: 4053, sum loss: 7595.768555, avg loss: 4.822710, ppl: 124.301498 +epoch: 0, batch: 4054, sum loss: 7243.529297, avg loss: 4.587416, ppl: 98.240219 +epoch: 0, batch: 4055, sum loss: 7700.271484, avg loss: 4.698152, ppl: 109.744186 +epoch: 0, batch: 4056, sum loss: 7480.481445, avg loss: 4.719547, ppl: 112.117432 +epoch: 0, batch: 4057, sum loss: 8404.259766, avg loss: 4.663851, ppl: 106.043701 +epoch: 0, batch: 4058, sum loss: 7649.621094, avg loss: 4.811082, ppl: 122.864532 +epoch: 0, batch: 4059, sum loss: 8096.002930, avg loss: 4.723455, ppl: 112.556511 +epoch: 0, batch: 4060, sum loss: 8277.906250, avg loss: 4.942034, ppl: 140.054794 +epoch: 0, batch: 4061, sum loss: 8571.168945, avg loss: 4.943004, ppl: 140.190704 +epoch: 0, batch: 4062, sum loss: 8671.712891, avg loss: 4.814943, ppl: 123.339821 +epoch: 0, batch: 4063, sum loss: 8227.235352, avg loss: 4.800021, ppl: 121.512932 +epoch: 0, batch: 4064, sum loss: 7386.145020, avg loss: 4.740787, ppl: 114.524246 +epoch: 0, batch: 4065, sum loss: 8323.201172, avg loss: 4.850350, ppl: 127.785156 +epoch: 0, batch: 4066, sum loss: 8171.736816, avg loss: 4.899123, ppl: 134.172028 +epoch: 0, batch: 4067, sum loss: 8254.546875, avg loss: 4.733111, ppl: 113.648621 +epoch: 0, batch: 4068, sum loss: 8093.649414, avg loss: 4.769387, ppl: 117.846954 +epoch: 0, batch: 4069, sum loss: 8267.915039, avg loss: 4.895154, ppl: 133.640579 +epoch: 0, batch: 4070, sum loss: 8027.977051, avg loss: 4.877264, ppl: 131.271088 +epoch: 0, batch: 4071, sum loss: 8389.785156, avg loss: 4.655819, ppl: 105.195335 +epoch: 0, batch: 4072, sum loss: 7644.510742, avg loss: 4.715922, ppl: 111.711800 +epoch: 0, batch: 4073, sum loss: 7708.511230, avg loss: 4.761280, ppl: 116.895409 +epoch: 0, batch: 4074, sum loss: 7718.073242, avg loss: 4.605056, ppl: 99.988564 +epoch: 0, batch: 4075, sum loss: 9382.009766, avg loss: 4.779424, ppl: 119.035789 +epoch: 0, batch: 4076, sum loss: 8619.933594, avg loss: 4.621948, ppl: 101.691963 +epoch: 0, batch: 4077, sum loss: 8233.158203, avg loss: 4.857320, ppl: 128.678925 +epoch: 0, batch: 4078, sum loss: 7863.838379, avg loss: 4.792101, ppl: 120.554436 +epoch: 0, batch: 4079, sum loss: 7841.770508, avg loss: 4.985232, ppl: 146.237549 +epoch: 0, batch: 4080, sum loss: 8000.727539, avg loss: 4.640793, ppl: 103.626526 +epoch: 0, batch: 4081, sum loss: 7101.596191, avg loss: 4.641566, ppl: 103.706657 +epoch: 0, batch: 4082, sum loss: 6798.597656, avg loss: 4.637516, ppl: 103.287415 +epoch: 0, batch: 4083, sum loss: 8329.791992, avg loss: 4.751736, ppl: 115.785080 +epoch: 0, batch: 4084, sum loss: 8946.105469, avg loss: 4.786573, ppl: 119.889793 +epoch: 0, batch: 4085, sum loss: 7633.124023, avg loss: 4.758806, ppl: 116.606583 +epoch: 0, batch: 4086, sum loss: 8353.541992, avg loss: 4.781650, ppl: 119.300987 +epoch: 0, batch: 4087, sum loss: 7554.247559, avg loss: 4.542542, ppl: 93.929260 +epoch: 0, batch: 4088, sum loss: 9184.628906, avg loss: 5.122493, ppl: 167.753021 +epoch: 0, batch: 4089, sum loss: 9824.916992, avg loss: 4.900208, ppl: 134.317719 +epoch: 0, batch: 4090, sum loss: 7022.155273, avg loss: 4.607713, ppl: 100.254631 +epoch: 0, batch: 4091, sum loss: 7139.551758, avg loss: 4.811018, ppl: 122.856682 +epoch: 0, batch: 4092, sum loss: 9293.652344, avg loss: 5.154549, ppl: 173.217682 +epoch: 0, batch: 4093, sum loss: 8875.549805, avg loss: 4.868651, ppl: 130.145218 +epoch: 0, batch: 4094, sum loss: 8298.004883, avg loss: 4.898468, ppl: 134.084213 +epoch: 0, batch: 4095, sum loss: 7051.148926, avg loss: 4.581643, ppl: 97.674751 +epoch: 0, batch: 4096, sum loss: 7839.701660, avg loss: 4.722712, ppl: 112.472870 +epoch: 0, batch: 4097, sum loss: 8060.658203, avg loss: 4.621937, ppl: 101.690796 +epoch: 0, batch: 4098, sum loss: 8591.091797, avg loss: 4.674152, ppl: 107.141663 +epoch: 0, batch: 4099, sum loss: 7647.872070, avg loss: 4.729668, ppl: 113.257919 +epoch: 0, batch: 4100, sum loss: 7052.647461, avg loss: 4.730146, ppl: 113.312096 +epoch: 0, batch: 4101, sum loss: 7998.181641, avg loss: 4.844447, ppl: 127.033028 +epoch: 0, batch: 4102, sum loss: 8452.321289, avg loss: 4.786139, ppl: 119.837784 +epoch: 0, batch: 4103, sum loss: 6915.365723, avg loss: 4.594928, ppl: 98.980988 +epoch: 0, batch: 4104, sum loss: 8748.730469, avg loss: 4.785958, ppl: 119.816071 +epoch: 0, batch: 4105, sum loss: 7012.748047, avg loss: 4.783594, ppl: 119.533142 +epoch: 0, batch: 4106, sum loss: 7394.384766, avg loss: 4.848777, ppl: 127.584236 +epoch: 0, batch: 4107, sum loss: 9368.705078, avg loss: 4.770216, ppl: 117.944771 +epoch: 0, batch: 4108, sum loss: 6309.656250, avg loss: 4.698181, ppl: 109.747322 +epoch: 0, batch: 4109, sum loss: 8926.934570, avg loss: 4.733263, ppl: 113.665909 +epoch: 0, batch: 4110, sum loss: 7815.630859, avg loss: 4.912402, ppl: 135.965561 +epoch: 0, batch: 4111, sum loss: 8150.528809, avg loss: 4.641531, ppl: 103.703041 +epoch: 0, batch: 4112, sum loss: 9055.292969, avg loss: 4.814085, ppl: 123.233940 +epoch: 0, batch: 4113, sum loss: 8591.602539, avg loss: 4.757255, ppl: 116.425903 +epoch: 0, batch: 4114, sum loss: 8144.924316, avg loss: 4.716227, ppl: 111.745842 +epoch: 0, batch: 4115, sum loss: 7606.274414, avg loss: 4.637972, ppl: 103.334610 +epoch: 0, batch: 4116, sum loss: 7426.577148, avg loss: 4.612781, ppl: 100.763939 +epoch: 0, batch: 4117, sum loss: 7427.016602, avg loss: 4.688773, ppl: 108.719719 +epoch: 0, batch: 4118, sum loss: 8822.089844, avg loss: 4.748165, ppl: 115.372345 +epoch: 0, batch: 4119, sum loss: 7596.717773, avg loss: 4.689332, ppl: 108.780495 +epoch: 0, batch: 4120, sum loss: 9124.011719, avg loss: 5.091525, ppl: 162.637634 +epoch: 0, batch: 4121, sum loss: 9932.255859, avg loss: 4.819144, ppl: 123.859055 +epoch: 0, batch: 4122, sum loss: 8928.335938, avg loss: 4.761779, ppl: 116.953835 +epoch: 0, batch: 4123, sum loss: 7641.108398, avg loss: 4.725485, ppl: 112.785172 +epoch: 0, batch: 4124, sum loss: 8724.742188, avg loss: 4.839014, ppl: 126.344719 +epoch: 0, batch: 4125, sum loss: 7856.588379, avg loss: 4.852742, ppl: 128.091095 +epoch: 0, batch: 4126, sum loss: 7194.741211, avg loss: 4.761576, ppl: 116.930031 +epoch: 0, batch: 4127, sum loss: 8178.484375, avg loss: 4.746654, ppl: 115.198135 +epoch: 0, batch: 4128, sum loss: 8032.684082, avg loss: 4.569217, ppl: 96.468567 +epoch: 0, batch: 4129, sum loss: 8668.504883, avg loss: 4.859028, ppl: 128.898834 +epoch: 0, batch: 4130, sum loss: 8342.077148, avg loss: 4.745209, ppl: 115.031815 +epoch: 0, batch: 4131, sum loss: 7490.957031, avg loss: 4.756163, ppl: 116.298843 +epoch: 0, batch: 4132, sum loss: 6681.645508, avg loss: 4.614396, ppl: 100.926857 +epoch: 0, batch: 4133, sum loss: 7065.943359, avg loss: 4.458008, ppl: 86.315422 +epoch: 0, batch: 4134, sum loss: 6506.101074, avg loss: 4.496269, ppl: 89.681877 +epoch: 0, batch: 4135, sum loss: 7506.274902, avg loss: 4.735820, ppl: 113.956841 +epoch: 0, batch: 4136, sum loss: 8144.299805, avg loss: 4.859368, ppl: 128.942657 +epoch: 0, batch: 4137, sum loss: 8215.918945, avg loss: 4.787832, ppl: 120.040810 +epoch: 0, batch: 4138, sum loss: 7890.790527, avg loss: 4.666346, ppl: 106.308586 +epoch: 0, batch: 4139, sum loss: 9215.586914, avg loss: 5.005751, ppl: 149.269089 +epoch: 0, batch: 4140, sum loss: 8323.052734, avg loss: 4.957149, ppl: 142.187775 +epoch: 0, batch: 4141, sum loss: 7908.212402, avg loss: 4.630101, ppl: 102.524391 +epoch: 0, batch: 4142, sum loss: 8595.854492, avg loss: 4.788777, ppl: 120.154312 +epoch: 0, batch: 4143, sum loss: 6446.675781, avg loss: 4.421588, ppl: 83.228386 +epoch: 0, batch: 4144, sum loss: 8320.361328, avg loss: 4.669114, ppl: 106.603264 +epoch: 0, batch: 4145, sum loss: 6748.952148, avg loss: 4.448881, ppl: 85.531151 +epoch: 0, batch: 4146, sum loss: 8694.917969, avg loss: 4.766951, ppl: 117.560211 +epoch: 0, batch: 4147, sum loss: 7685.789062, avg loss: 4.613318, ppl: 100.818153 +epoch: 0, batch: 4148, sum loss: 7233.906250, avg loss: 4.679111, ppl: 107.674362 +epoch: 0, batch: 4149, sum loss: 7847.988770, avg loss: 4.826561, ppl: 124.781158 +epoch: 0, batch: 4150, sum loss: 8128.628418, avg loss: 4.605455, ppl: 100.028526 +epoch: 0, batch: 4151, sum loss: 8508.792969, avg loss: 4.772178, ppl: 118.176369 +epoch: 0, batch: 4152, sum loss: 8177.646973, avg loss: 4.707914, ppl: 110.820786 +epoch: 0, batch: 4153, sum loss: 8455.464844, avg loss: 4.697481, ppl: 109.670532 +epoch: 0, batch: 4154, sum loss: 8210.771484, avg loss: 4.732433, ppl: 113.571579 +epoch: 0, batch: 4155, sum loss: 6121.938477, avg loss: 4.627316, ppl: 102.239235 +epoch: 0, batch: 4156, sum loss: 10731.892578, avg loss: 4.902647, ppl: 134.645660 +epoch: 0, batch: 4157, sum loss: 7695.940430, avg loss: 4.479593, ppl: 88.198753 +epoch: 0, batch: 4158, sum loss: 7557.284668, avg loss: 4.490365, ppl: 89.153984 +epoch: 0, batch: 4159, sum loss: 7461.550293, avg loss: 4.614441, ppl: 100.931381 +epoch: 0, batch: 4160, sum loss: 8704.102539, avg loss: 4.870790, ppl: 130.423981 +epoch: 0, batch: 4161, sum loss: 7986.126953, avg loss: 4.941910, ppl: 140.037506 +epoch: 0, batch: 4162, sum loss: 8069.924805, avg loss: 4.651254, ppl: 104.716187 +epoch: 0, batch: 4163, sum loss: 8859.913086, avg loss: 4.952439, ppl: 141.519684 +epoch: 0, batch: 4164, sum loss: 7655.142578, avg loss: 4.740026, ppl: 114.437233 +epoch: 0, batch: 4165, sum loss: 8949.890625, avg loss: 4.882647, ppl: 131.979492 +epoch: 0, batch: 4166, sum loss: 8150.668457, avg loss: 4.673548, ppl: 107.077003 +epoch: 0, batch: 4167, sum loss: 7275.198242, avg loss: 4.648689, ppl: 104.447945 +epoch: 0, batch: 4168, sum loss: 7890.889648, avg loss: 4.666405, ppl: 106.314827 +epoch: 0, batch: 4169, sum loss: 8101.790527, avg loss: 4.754572, ppl: 116.113937 +epoch: 0, batch: 4170, sum loss: 7332.750488, avg loss: 4.805210, ppl: 122.145149 +epoch: 0, batch: 4171, sum loss: 8354.454102, avg loss: 4.704084, ppl: 110.397156 +epoch: 0, batch: 4172, sum loss: 8209.537109, avg loss: 4.483636, ppl: 88.556068 +epoch: 0, batch: 4173, sum loss: 6937.147949, avg loss: 4.421381, ppl: 83.211159 +epoch: 0, batch: 4174, sum loss: 6589.239258, avg loss: 4.611084, ppl: 100.593178 +epoch: 0, batch: 4175, sum loss: 6935.093750, avg loss: 4.704948, ppl: 110.492584 +epoch: 0, batch: 4176, sum loss: 6957.249512, avg loss: 4.994436, ppl: 147.589722 +epoch: 0, batch: 4177, sum loss: 8172.546387, avg loss: 5.165958, ppl: 175.205292 +epoch: 0, batch: 4178, sum loss: 8355.386719, avg loss: 4.886191, ppl: 132.448105 +epoch: 0, batch: 4179, sum loss: 7540.057129, avg loss: 4.657231, ppl: 105.344017 +epoch: 0, batch: 4180, sum loss: 7656.922363, avg loss: 4.797570, ppl: 121.215530 +epoch: 0, batch: 4181, sum loss: 8632.368164, avg loss: 4.787780, ppl: 120.034630 +epoch: 0, batch: 4182, sum loss: 8076.057129, avg loss: 4.604365, ppl: 99.919548 +epoch: 0, batch: 4183, sum loss: 8470.486328, avg loss: 4.742713, ppl: 114.745132 +epoch: 0, batch: 4184, sum loss: 6568.708008, avg loss: 4.658659, ppl: 105.494469 +epoch: 0, batch: 4185, sum loss: 9154.555664, avg loss: 4.983428, ppl: 145.973862 +epoch: 0, batch: 4186, sum loss: 8766.303711, avg loss: 4.814005, ppl: 123.224190 +epoch: 0, batch: 4187, sum loss: 7822.885742, avg loss: 4.718266, ppl: 111.973976 +epoch: 0, batch: 4188, sum loss: 7379.416992, avg loss: 4.832624, ppl: 125.539940 +epoch: 0, batch: 4189, sum loss: 7496.708496, avg loss: 4.679593, ppl: 107.726227 +epoch: 0, batch: 4190, sum loss: 7790.288574, avg loss: 4.808820, ppl: 122.586906 +epoch: 0, batch: 4191, sum loss: 6383.364746, avg loss: 4.501668, ppl: 90.167404 +epoch: 0, batch: 4192, sum loss: 7170.714844, avg loss: 4.605469, ppl: 100.029861 +epoch: 0, batch: 4193, sum loss: 9185.122070, avg loss: 4.919723, ppl: 136.964615 +epoch: 0, batch: 4194, sum loss: 7862.500977, avg loss: 4.765152, ppl: 117.348953 +epoch: 0, batch: 4195, sum loss: 9054.459961, avg loss: 4.674476, ppl: 107.176407 +epoch: 0, batch: 4196, sum loss: 7810.948730, avg loss: 4.713910, ppl: 111.487183 +epoch: 0, batch: 4197, sum loss: 6955.409668, avg loss: 4.597098, ppl: 99.196068 +epoch: 0, batch: 4198, sum loss: 7727.882812, avg loss: 4.649749, ppl: 104.558769 +epoch: 0, batch: 4199, sum loss: 7864.880859, avg loss: 4.778178, ppl: 118.887566 +epoch: 0, batch: 4200, sum loss: 6911.359375, avg loss: 4.724101, ppl: 112.629204 +epoch: 0, batch: 4201, sum loss: 7279.841797, avg loss: 4.748755, ppl: 115.440521 +epoch: 0, batch: 4202, sum loss: 7727.541016, avg loss: 4.767144, ppl: 117.582970 +epoch: 0, batch: 4203, sum loss: 8792.142578, avg loss: 4.854856, ppl: 128.362152 +epoch: 0, batch: 4204, sum loss: 8222.123047, avg loss: 4.598503, ppl: 99.335464 +epoch: 0, batch: 4205, sum loss: 8601.688477, avg loss: 4.909640, ppl: 135.590576 +epoch: 0, batch: 4206, sum loss: 9297.629883, avg loss: 4.824925, ppl: 124.577118 +epoch: 0, batch: 4207, sum loss: 7487.339844, avg loss: 4.815010, ppl: 123.347992 +epoch: 0, batch: 4208, sum loss: 7602.416992, avg loss: 4.621530, ppl: 101.649445 +epoch: 0, batch: 4209, sum loss: 8985.808594, avg loss: 4.878289, ppl: 131.405670 +epoch: 0, batch: 4210, sum loss: 6511.472656, avg loss: 4.611525, ppl: 100.637466 +epoch: 0, batch: 4211, sum loss: 7598.278320, avg loss: 4.895798, ppl: 133.726700 +epoch: 0, batch: 4212, sum loss: 6790.702148, avg loss: 4.461697, ppl: 86.634369 +epoch: 0, batch: 4213, sum loss: 8587.463867, avg loss: 4.726177, ppl: 112.863281 +epoch: 0, batch: 4214, sum loss: 8137.603027, avg loss: 4.682165, ppl: 108.003662 +epoch: 0, batch: 4215, sum loss: 7434.953613, avg loss: 4.652662, ppl: 104.863739 +epoch: 0, batch: 4216, sum loss: 9023.030273, avg loss: 4.699495, ppl: 109.891701 +epoch: 0, batch: 4217, sum loss: 7972.718262, avg loss: 4.524812, ppl: 92.278549 +epoch: 0, batch: 4218, sum loss: 9167.275391, avg loss: 4.592824, ppl: 98.772926 +epoch: 0, batch: 4219, sum loss: 8769.558594, avg loss: 4.760890, ppl: 116.849823 +epoch: 0, batch: 4220, sum loss: 7323.541016, avg loss: 4.743227, ppl: 114.804024 +epoch: 0, batch: 4221, sum loss: 7459.297852, avg loss: 4.359613, ppl: 78.226891 +epoch: 0, batch: 4222, sum loss: 7509.996094, avg loss: 4.735180, ppl: 113.884003 +epoch: 0, batch: 4223, sum loss: 8112.693848, avg loss: 4.711204, ppl: 111.185951 +epoch: 0, batch: 4224, sum loss: 8357.666016, avg loss: 4.617496, ppl: 101.240166 +epoch: 0, batch: 4225, sum loss: 8210.866211, avg loss: 4.815757, ppl: 123.440254 +epoch: 0, batch: 4226, sum loss: 8705.208008, avg loss: 4.994382, ppl: 147.581696 +epoch: 0, batch: 4227, sum loss: 8839.676758, avg loss: 4.672133, ppl: 106.925621 +epoch: 0, batch: 4228, sum loss: 8135.878906, avg loss: 4.593946, ppl: 98.883858 +epoch: 0, batch: 4229, sum loss: 7723.791992, avg loss: 4.910230, ppl: 135.670639 +epoch: 0, batch: 4230, sum loss: 8757.603516, avg loss: 4.715995, ppl: 111.719955 +epoch: 0, batch: 4231, sum loss: 7703.005371, avg loss: 4.601557, ppl: 99.639313 +epoch: 0, batch: 4232, sum loss: 8953.250977, avg loss: 4.795528, ppl: 120.968231 +epoch: 0, batch: 4233, sum loss: 7814.432617, avg loss: 4.924028, ppl: 137.555634 +epoch: 0, batch: 4234, sum loss: 7871.838867, avg loss: 4.680047, ppl: 107.775139 +epoch: 0, batch: 4235, sum loss: 7098.178711, avg loss: 4.710138, ppl: 111.067520 +epoch: 0, batch: 4236, sum loss: 8780.440430, avg loss: 4.741058, ppl: 114.555374 +epoch: 0, batch: 4237, sum loss: 7912.791016, avg loss: 4.905636, ppl: 135.048813 +epoch: 0, batch: 4238, sum loss: 7600.724609, avg loss: 4.747486, ppl: 115.294083 +epoch: 0, batch: 4239, sum loss: 6978.356934, avg loss: 4.534345, ppl: 93.162491 +epoch: 0, batch: 4240, sum loss: 7485.719238, avg loss: 4.807784, ppl: 122.459900 +epoch: 0, batch: 4241, sum loss: 8075.927246, avg loss: 4.681697, ppl: 107.953156 +epoch: 0, batch: 4242, sum loss: 6571.405273, avg loss: 4.443141, ppl: 85.041595 +epoch: 0, batch: 4243, sum loss: 7103.786133, avg loss: 4.720124, ppl: 112.182137 +epoch: 0, batch: 4244, sum loss: 7853.971680, avg loss: 4.751344, ppl: 115.739700 +epoch: 0, batch: 4245, sum loss: 8439.382812, avg loss: 4.712106, ppl: 111.286308 +epoch: 0, batch: 4246, sum loss: 9026.645508, avg loss: 4.783596, ppl: 119.533424 +epoch: 0, batch: 4247, sum loss: 8047.736328, avg loss: 4.761974, ppl: 116.976646 +epoch: 0, batch: 4248, sum loss: 7719.589844, avg loss: 4.800740, ppl: 121.600342 +epoch: 0, batch: 4249, sum loss: 7588.972656, avg loss: 4.766943, ppl: 117.559258 +epoch: 0, batch: 4250, sum loss: 8514.422852, avg loss: 4.832249, ppl: 125.492897 +epoch: 0, batch: 4251, sum loss: 8154.426758, avg loss: 4.885816, ppl: 132.398468 +epoch: 0, batch: 4252, sum loss: 8103.927734, avg loss: 4.725322, ppl: 112.766830 +epoch: 0, batch: 4253, sum loss: 8428.867188, avg loss: 4.772858, ppl: 118.256699 +epoch: 0, batch: 4254, sum loss: 7921.907715, avg loss: 4.632695, ppl: 102.790680 +epoch: 0, batch: 4255, sum loss: 8064.515137, avg loss: 4.902441, ppl: 134.617920 +epoch: 0, batch: 4256, sum loss: 8771.717773, avg loss: 4.809056, ppl: 122.615791 +epoch: 0, batch: 4257, sum loss: 6898.143555, avg loss: 4.490979, ppl: 89.208755 +epoch: 0, batch: 4258, sum loss: 7254.169922, avg loss: 4.707443, ppl: 110.768585 +epoch: 0, batch: 4259, sum loss: 7977.036621, avg loss: 4.717349, ppl: 111.871292 +epoch: 0, batch: 4260, sum loss: 7616.272461, avg loss: 4.627140, ppl: 102.221298 +epoch: 0, batch: 4261, sum loss: 6791.648438, avg loss: 4.749405, ppl: 115.515518 +epoch: 0, batch: 4262, sum loss: 7840.405273, avg loss: 4.612003, ppl: 100.685654 +epoch: 0, batch: 4263, sum loss: 6040.556152, avg loss: 4.604082, ppl: 99.891251 +epoch: 0, batch: 4264, sum loss: 8654.861328, avg loss: 4.739793, ppl: 114.410500 +epoch: 0, batch: 4265, sum loss: 7656.310547, avg loss: 4.535729, ppl: 93.291496 +epoch: 0, batch: 4266, sum loss: 6462.598633, avg loss: 4.535157, ppl: 93.238129 +epoch: 0, batch: 4267, sum loss: 7959.273926, avg loss: 4.619428, ppl: 101.435959 +epoch: 0, batch: 4268, sum loss: 10853.765625, avg loss: 4.967399, ppl: 143.652710 +epoch: 0, batch: 4269, sum loss: 7727.725098, avg loss: 4.655256, ppl: 105.136162 +epoch: 0, batch: 4270, sum loss: 7774.195801, avg loss: 4.501561, ppl: 90.157776 +epoch: 0, batch: 4271, sum loss: 7858.729004, avg loss: 4.606524, ppl: 100.135422 +epoch: 0, batch: 4272, sum loss: 8609.492188, avg loss: 4.653780, ppl: 104.981010 +epoch: 0, batch: 4273, sum loss: 8446.767578, avg loss: 4.748043, ppl: 115.358261 +epoch: 0, batch: 4274, sum loss: 7992.656250, avg loss: 4.636112, ppl: 103.142525 +epoch: 0, batch: 4275, sum loss: 6903.256836, avg loss: 4.614477, ppl: 100.934990 +epoch: 0, batch: 4276, sum loss: 7649.302734, avg loss: 4.730552, ppl: 113.358139 +epoch: 0, batch: 4277, sum loss: 6415.406250, avg loss: 4.406186, ppl: 81.956253 +epoch: 0, batch: 4278, sum loss: 6395.714844, avg loss: 4.658205, ppl: 105.446587 +epoch: 0, batch: 4279, sum loss: 7660.715820, avg loss: 4.601031, ppl: 99.586922 +epoch: 0, batch: 4280, sum loss: 8084.851562, avg loss: 4.719703, ppl: 112.134964 +epoch: 0, batch: 4281, sum loss: 7483.458008, avg loss: 4.790946, ppl: 120.415283 +epoch: 0, batch: 4282, sum loss: 7377.796875, avg loss: 4.902191, ppl: 134.584290 +epoch: 0, batch: 4283, sum loss: 8241.252930, avg loss: 4.687857, ppl: 108.620171 +epoch: 0, batch: 4284, sum loss: 6711.005859, avg loss: 4.400660, ppl: 81.504608 +epoch: 0, batch: 4285, sum loss: 7306.199707, avg loss: 4.554987, ppl: 95.105560 +epoch: 0, batch: 4286, sum loss: 8144.137207, avg loss: 4.653793, ppl: 104.982414 +epoch: 0, batch: 4287, sum loss: 7430.995117, avg loss: 4.641471, ppl: 103.696815 +epoch: 0, batch: 4288, sum loss: 7395.888184, avg loss: 4.642742, ppl: 103.828674 +epoch: 0, batch: 4289, sum loss: 8424.682617, avg loss: 4.667414, ppl: 106.422195 +epoch: 0, batch: 4290, sum loss: 7002.391602, avg loss: 4.779789, ppl: 119.079277 +epoch: 0, batch: 4291, sum loss: 8021.632812, avg loss: 4.888259, ppl: 132.722290 +epoch: 0, batch: 4292, sum loss: 8120.963379, avg loss: 4.921796, ppl: 137.248871 +epoch: 0, batch: 4293, sum loss: 8694.972656, avg loss: 4.769596, ppl: 117.871567 +epoch: 0, batch: 4294, sum loss: 7286.577637, avg loss: 4.588525, ppl: 98.349289 +epoch: 0, batch: 4295, sum loss: 7817.395508, avg loss: 4.793008, ppl: 120.663765 +epoch: 0, batch: 4296, sum loss: 7560.418945, avg loss: 4.582072, ppl: 97.716682 +epoch: 0, batch: 4297, sum loss: 8527.095703, avg loss: 4.763741, ppl: 117.183434 +epoch: 0, batch: 4298, sum loss: 8449.753906, avg loss: 4.604771, ppl: 99.960052 +epoch: 0, batch: 4299, sum loss: 7982.083496, avg loss: 4.748414, ppl: 115.401062 +epoch: 0, batch: 4300, sum loss: 8034.273926, avg loss: 4.703907, ppl: 110.377632 +epoch: 0, batch: 4301, sum loss: 6478.250488, avg loss: 4.565363, ppl: 96.097511 +epoch: 0, batch: 4302, sum loss: 7473.477051, avg loss: 4.477818, ppl: 88.042313 +epoch: 0, batch: 4303, sum loss: 8092.254395, avg loss: 4.640054, ppl: 103.549911 +epoch: 0, batch: 4304, sum loss: 7387.456543, avg loss: 4.444920, ppl: 85.193024 +epoch: 0, batch: 4305, sum loss: 8306.217773, avg loss: 4.820788, ppl: 124.062805 +epoch: 0, batch: 4306, sum loss: 7467.537598, avg loss: 4.811558, ppl: 122.923012 +epoch: 0, batch: 4307, sum loss: 7086.141113, avg loss: 4.730401, ppl: 113.341011 +epoch: 0, batch: 4308, sum loss: 7789.596191, avg loss: 4.628399, ppl: 102.350105 +epoch: 0, batch: 4309, sum loss: 8169.714355, avg loss: 4.752597, ppl: 115.884888 +epoch: 0, batch: 4310, sum loss: 9247.016602, avg loss: 4.838837, ppl: 126.322311 +epoch: 0, batch: 4311, sum loss: 7530.519043, avg loss: 4.781282, ppl: 119.257133 +epoch: 0, batch: 4312, sum loss: 9261.640625, avg loss: 4.727739, ppl: 113.039726 +epoch: 0, batch: 4313, sum loss: 7847.887695, avg loss: 4.713446, ppl: 111.435524 +epoch: 0, batch: 4314, sum loss: 9292.656250, avg loss: 4.921958, ppl: 137.271118 +epoch: 0, batch: 4315, sum loss: 8258.789062, avg loss: 4.660716, ppl: 105.711754 +epoch: 0, batch: 4316, sum loss: 6807.183105, avg loss: 4.672054, ppl: 106.917152 +epoch: 0, batch: 4317, sum loss: 8525.481445, avg loss: 4.720643, ppl: 112.240402 +epoch: 0, batch: 4318, sum loss: 7695.744141, avg loss: 4.785911, ppl: 119.810417 +epoch: 0, batch: 4319, sum loss: 8207.249023, avg loss: 4.673832, ppl: 107.107437 +epoch: 0, batch: 4320, sum loss: 7823.395508, avg loss: 4.880471, ppl: 131.692703 +epoch: 0, batch: 4321, sum loss: 7559.750977, avg loss: 4.635040, ppl: 103.032066 +epoch: 0, batch: 4322, sum loss: 8784.580078, avg loss: 4.667683, ppl: 106.450821 +epoch: 0, batch: 4323, sum loss: 7489.094727, avg loss: 4.704205, ppl: 110.410477 +epoch: 0, batch: 4324, sum loss: 7074.673828, avg loss: 4.623970, ppl: 101.897766 +epoch: 0, batch: 4325, sum loss: 7761.967773, avg loss: 4.687179, ppl: 108.546494 +epoch: 0, batch: 4326, sum loss: 6703.781250, avg loss: 4.493151, ppl: 89.402725 +epoch: 0, batch: 4327, sum loss: 7304.410645, avg loss: 4.667355, ppl: 106.415909 +epoch: 0, batch: 4328, sum loss: 9319.598633, avg loss: 4.853958, ppl: 128.246948 +epoch: 0, batch: 4329, sum loss: 9193.492188, avg loss: 4.848888, ppl: 127.598473 +epoch: 0, batch: 4330, sum loss: 8281.601562, avg loss: 4.616278, ppl: 101.116943 +epoch: 0, batch: 4331, sum loss: 5794.632812, avg loss: 4.276482, ppl: 71.986717 +epoch: 0, batch: 4332, sum loss: 9163.722656, avg loss: 4.630481, ppl: 102.563408 +epoch: 0, batch: 4333, sum loss: 6604.997070, avg loss: 4.322642, ppl: 75.387566 +epoch: 0, batch: 4334, sum loss: 7524.296875, avg loss: 4.593587, ppl: 98.848404 +epoch: 0, batch: 4335, sum loss: 7424.338379, avg loss: 4.654757, ppl: 105.083733 +epoch: 0, batch: 4336, sum loss: 8287.335938, avg loss: 4.622050, ppl: 101.702339 +epoch: 0, batch: 4337, sum loss: 7243.077637, avg loss: 4.672954, ppl: 107.013351 +epoch: 0, batch: 4338, sum loss: 8277.921875, avg loss: 4.550809, ppl: 94.709030 +epoch: 0, batch: 4339, sum loss: 7237.221680, avg loss: 4.919933, ppl: 136.993484 +epoch: 0, batch: 4340, sum loss: 7267.023438, avg loss: 4.605211, ppl: 100.004105 +epoch: 0, batch: 4341, sum loss: 8240.831055, avg loss: 4.749759, ppl: 115.556396 +epoch: 0, batch: 4342, sum loss: 9048.751953, avg loss: 4.528905, ppl: 92.657036 +epoch: 0, batch: 4343, sum loss: 7044.479492, avg loss: 4.686945, ppl: 108.521187 +epoch: 0, batch: 4344, sum loss: 8364.635742, avg loss: 4.931979, ppl: 138.653595 +epoch: 0, batch: 4345, sum loss: 7959.972656, avg loss: 4.743726, ppl: 114.861404 +epoch: 0, batch: 4346, sum loss: 8546.802734, avg loss: 4.624893, ppl: 101.991882 +epoch: 0, batch: 4347, sum loss: 8585.757812, avg loss: 4.635938, ppl: 103.124619 +epoch: 0, batch: 4348, sum loss: 7462.782227, avg loss: 4.717309, ppl: 111.866760 +epoch: 0, batch: 4349, sum loss: 9855.470703, avg loss: 4.842983, ppl: 126.847206 +epoch: 0, batch: 4350, sum loss: 8493.078125, avg loss: 4.990057, ppl: 146.944870 +epoch: 0, batch: 4351, sum loss: 8478.087891, avg loss: 4.875267, ppl: 131.009064 +epoch: 0, batch: 4352, sum loss: 8330.025391, avg loss: 4.792880, ppl: 120.648346 +epoch: 0, batch: 4353, sum loss: 6667.535645, avg loss: 4.623811, ppl: 101.881592 +epoch: 0, batch: 4354, sum loss: 6365.620117, avg loss: 4.586182, ppl: 98.119057 +epoch: 0, batch: 4355, sum loss: 8258.218750, avg loss: 4.493046, ppl: 89.393303 +epoch: 0, batch: 4356, sum loss: 8737.443359, avg loss: 4.687469, ppl: 108.578072 +epoch: 0, batch: 4357, sum loss: 8283.084961, avg loss: 4.883894, ppl: 132.144287 +epoch: 0, batch: 4358, sum loss: 7459.375000, avg loss: 4.421680, ppl: 83.236038 +epoch: 0, batch: 4359, sum loss: 8770.155273, avg loss: 4.745755, ppl: 115.094688 +epoch: 0, batch: 4360, sum loss: 7439.565918, avg loss: 4.849782, ppl: 127.712547 +epoch: 0, batch: 4361, sum loss: 6855.274902, avg loss: 4.740854, ppl: 114.531998 +epoch: 0, batch: 4362, sum loss: 8747.653320, avg loss: 4.889689, ppl: 132.912231 +epoch: 0, batch: 4363, sum loss: 8386.470703, avg loss: 4.735444, ppl: 113.913979 +epoch: 0, batch: 4364, sum loss: 7502.835938, avg loss: 4.511627, ppl: 91.069885 +epoch: 0, batch: 4365, sum loss: 8975.255859, avg loss: 4.691718, ppl: 109.040306 +epoch: 0, batch: 4366, sum loss: 7114.442383, avg loss: 4.699103, ppl: 109.848633 +epoch: 0, batch: 4367, sum loss: 7680.505371, avg loss: 4.732289, ppl: 113.555229 +epoch: 0, batch: 4368, sum loss: 9500.169922, avg loss: 4.747711, ppl: 115.320038 +epoch: 0, batch: 4369, sum loss: 6291.528809, avg loss: 4.744743, ppl: 114.978241 +epoch: 0, batch: 4370, sum loss: 7408.854980, avg loss: 4.309980, ppl: 74.438995 +epoch: 0, batch: 4371, sum loss: 7503.408691, avg loss: 4.689631, ppl: 108.812965 +epoch: 0, batch: 4372, sum loss: 7639.696777, avg loss: 4.754012, ppl: 116.048897 +epoch: 0, batch: 4373, sum loss: 9679.148438, avg loss: 4.758677, ppl: 116.591515 +epoch: 0, batch: 4374, sum loss: 8251.539062, avg loss: 4.772435, ppl: 118.206688 +epoch: 0, batch: 4375, sum loss: 7945.731445, avg loss: 4.566512, ppl: 96.207962 +epoch: 0, batch: 4376, sum loss: 7101.607422, avg loss: 4.534871, ppl: 93.211456 +epoch: 0, batch: 4377, sum loss: 7781.362305, avg loss: 4.962604, ppl: 142.965530 +epoch: 0, batch: 4378, sum loss: 8281.147461, avg loss: 4.590436, ppl: 98.537430 +epoch: 0, batch: 4379, sum loss: 7124.336426, avg loss: 4.489185, ppl: 89.048874 +epoch: 0, batch: 4380, sum loss: 7413.176758, avg loss: 4.715761, ppl: 111.693802 +epoch: 0, batch: 4381, sum loss: 7043.338867, avg loss: 4.418657, ppl: 82.984749 +epoch: 0, batch: 4382, sum loss: 7506.063477, avg loss: 4.685433, ppl: 108.357170 +epoch: 0, batch: 4383, sum loss: 7559.295898, avg loss: 4.733435, ppl: 113.685417 +epoch: 0, batch: 4384, sum loss: 7276.110840, avg loss: 4.730891, ppl: 113.396576 +epoch: 0, batch: 4385, sum loss: 8074.524902, avg loss: 4.513429, ppl: 91.234138 +epoch: 0, batch: 4386, sum loss: 7362.188477, avg loss: 4.575630, ppl: 97.089157 +epoch: 0, batch: 4387, sum loss: 8905.877930, avg loss: 4.811388, ppl: 122.902031 +epoch: 0, batch: 4388, sum loss: 6961.455078, avg loss: 4.729249, ppl: 113.210564 +epoch: 0, batch: 4389, sum loss: 8304.362305, avg loss: 4.769881, ppl: 117.905182 +epoch: 0, batch: 4390, sum loss: 8064.144531, avg loss: 4.683011, ppl: 108.095009 +epoch: 0, batch: 4391, sum loss: 6660.293945, avg loss: 4.434283, ppl: 84.291687 +epoch: 0, batch: 4392, sum loss: 7175.452148, avg loss: 4.529957, ppl: 92.754601 +epoch: 0, batch: 4393, sum loss: 7895.287109, avg loss: 4.633385, ppl: 102.861633 +epoch: 0, batch: 4394, sum loss: 8242.289062, avg loss: 4.783685, ppl: 119.544029 +epoch: 0, batch: 4395, sum loss: 7179.271973, avg loss: 4.374937, ppl: 79.434837 +epoch: 0, batch: 4396, sum loss: 7612.161133, avg loss: 4.784513, ppl: 119.643143 +epoch: 0, batch: 4397, sum loss: 8672.016602, avg loss: 4.718181, ppl: 111.964363 +epoch: 0, batch: 4398, sum loss: 7526.700195, avg loss: 4.724859, ppl: 112.714577 +epoch: 0, batch: 4399, sum loss: 6866.692383, avg loss: 4.348760, ppl: 77.382462 +epoch: 0, batch: 4400, sum loss: 9952.839844, avg loss: 4.819777, ppl: 123.937508 +epoch: 0, batch: 4401, sum loss: 7905.400879, avg loss: 4.580186, ppl: 97.532524 +epoch: 0, batch: 4402, sum loss: 7523.547363, avg loss: 4.638438, ppl: 103.382759 +epoch: 0, batch: 4403, sum loss: 8403.985352, avg loss: 4.758769, ppl: 116.602249 +epoch: 0, batch: 4404, sum loss: 7981.108887, avg loss: 4.776248, ppl: 118.658249 +epoch: 0, batch: 4405, sum loss: 8727.230469, avg loss: 4.659493, ppl: 105.582535 +epoch: 0, batch: 4406, sum loss: 7844.911133, avg loss: 4.728699, ppl: 113.148232 +epoch: 0, batch: 4407, sum loss: 7646.921875, avg loss: 4.506142, ppl: 90.571686 +epoch: 0, batch: 4408, sum loss: 7994.102539, avg loss: 4.607552, ppl: 100.238472 +epoch: 0, batch: 4409, sum loss: 7273.178711, avg loss: 4.585863, ppl: 98.087814 +epoch: 0, batch: 4410, sum loss: 7285.277344, avg loss: 4.685066, ppl: 108.317444 +epoch: 0, batch: 4411, sum loss: 8182.726562, avg loss: 4.971280, ppl: 144.211380 +epoch: 0, batch: 4412, sum loss: 7265.336914, avg loss: 4.507033, ppl: 90.652443 +epoch: 0, batch: 4413, sum loss: 7515.063477, avg loss: 4.616132, ppl: 101.102242 +epoch: 0, batch: 4414, sum loss: 8395.054688, avg loss: 4.824744, ppl: 124.554604 +epoch: 0, batch: 4415, sum loss: 7414.300293, avg loss: 4.695567, ppl: 109.460869 +epoch: 0, batch: 4416, sum loss: 9529.925781, avg loss: 4.937786, ppl: 139.461090 +epoch: 0, batch: 4417, sum loss: 8678.448242, avg loss: 4.655820, ppl: 105.195435 +epoch: 0, batch: 4418, sum loss: 7591.932617, avg loss: 4.535205, ppl: 93.242615 +epoch: 0, batch: 4419, sum loss: 8031.591309, avg loss: 4.724466, ppl: 112.670296 +epoch: 0, batch: 4420, sum loss: 6815.875000, avg loss: 4.690898, ppl: 108.951019 +epoch: 0, batch: 4421, sum loss: 7673.402344, avg loss: 4.642107, ppl: 103.762695 +epoch: 0, batch: 4422, sum loss: 7432.427734, avg loss: 4.562571, ppl: 95.829498 +epoch: 0, batch: 4423, sum loss: 8002.287109, avg loss: 4.771787, ppl: 118.130173 +epoch: 0, batch: 4424, sum loss: 9425.933594, avg loss: 4.821449, ppl: 124.144882 +epoch: 0, batch: 4425, sum loss: 7416.444336, avg loss: 4.603628, ppl: 99.845917 +epoch: 0, batch: 4426, sum loss: 8102.271484, avg loss: 4.656478, ppl: 105.264679 +epoch: 0, batch: 4427, sum loss: 8808.437500, avg loss: 4.784594, ppl: 119.652779 +epoch: 0, batch: 4428, sum loss: 7527.411621, avg loss: 4.592685, ppl: 98.759262 +epoch: 0, batch: 4429, sum loss: 8621.638672, avg loss: 4.830050, ppl: 125.217224 +epoch: 0, batch: 4430, sum loss: 7604.997070, avg loss: 4.798105, ppl: 121.280403 +epoch: 0, batch: 4431, sum loss: 6627.273438, avg loss: 4.517569, ppl: 91.612579 +epoch: 0, batch: 4432, sum loss: 7089.355469, avg loss: 4.529940, ppl: 92.752960 +epoch: 0, batch: 4433, sum loss: 9126.556641, avg loss: 4.896221, ppl: 133.783279 +epoch: 0, batch: 4434, sum loss: 7875.363281, avg loss: 4.621692, ppl: 101.665924 +epoch: 0, batch: 4435, sum loss: 7079.095703, avg loss: 4.529172, ppl: 92.681786 +epoch: 0, batch: 4436, sum loss: 7334.547852, avg loss: 4.701633, ppl: 110.126915 +epoch: 0, batch: 4437, sum loss: 7440.366211, avg loss: 4.718051, ppl: 111.949844 +epoch: 0, batch: 4438, sum loss: 6559.339355, avg loss: 4.431986, ppl: 84.098297 +epoch: 0, batch: 4439, sum loss: 8054.707031, avg loss: 4.861018, ppl: 129.155640 +epoch: 0, batch: 4440, sum loss: 7119.683594, avg loss: 4.529061, ppl: 92.671486 +epoch: 0, batch: 4441, sum loss: 8486.613281, avg loss: 4.735834, ppl: 113.958420 +epoch: 0, batch: 4442, sum loss: 7554.622070, avg loss: 4.718690, ppl: 112.021400 +epoch: 0, batch: 4443, sum loss: 9213.272461, avg loss: 4.758922, ppl: 116.620094 +epoch: 0, batch: 4444, sum loss: 8526.459961, avg loss: 4.636466, ppl: 103.179024 +epoch: 0, batch: 4445, sum loss: 8285.659180, avg loss: 4.775596, ppl: 118.580986 +epoch: 0, batch: 4446, sum loss: 7218.694336, avg loss: 4.669272, ppl: 106.620087 +epoch: 0, batch: 4447, sum loss: 8683.582031, avg loss: 4.776448, ppl: 118.682014 +epoch: 0, batch: 4448, sum loss: 7211.114258, avg loss: 4.473396, ppl: 87.653877 +epoch: 0, batch: 4449, sum loss: 7574.211426, avg loss: 4.739807, ppl: 114.412132 +epoch: 0, batch: 4450, sum loss: 8082.381836, avg loss: 4.618504, ppl: 101.342316 +epoch: 0, batch: 4451, sum loss: 6505.163574, avg loss: 4.600540, ppl: 99.538071 +epoch: 0, batch: 4452, sum loss: 8069.201172, avg loss: 4.746589, ppl: 115.190720 +epoch: 0, batch: 4453, sum loss: 7382.424805, avg loss: 4.614016, ppl: 100.888466 +epoch: 0, batch: 4454, sum loss: 8564.511719, avg loss: 4.880064, ppl: 131.639023 +epoch: 0, batch: 4455, sum loss: 8262.690430, avg loss: 4.618608, ppl: 101.352898 +epoch: 0, batch: 4456, sum loss: 8496.447266, avg loss: 4.802967, ppl: 121.871422 +epoch: 0, batch: 4457, sum loss: 6928.924805, avg loss: 4.322473, ppl: 75.374802 +epoch: 0, batch: 4458, sum loss: 8228.648438, avg loss: 4.731828, ppl: 113.502823 +epoch: 0, batch: 4459, sum loss: 8518.425781, avg loss: 4.774903, ppl: 118.498802 +epoch: 0, batch: 4460, sum loss: 7701.768555, avg loss: 4.745390, ppl: 115.052711 +epoch: 0, batch: 4461, sum loss: 8218.999023, avg loss: 4.685860, ppl: 108.403473 +epoch: 0, batch: 4462, sum loss: 9527.799805, avg loss: 4.841362, ppl: 126.641724 +epoch: 0, batch: 4463, sum loss: 9095.604492, avg loss: 4.717638, ppl: 111.903572 +epoch: 0, batch: 4464, sum loss: 7785.697266, avg loss: 4.937031, ppl: 139.355865 +epoch: 0, batch: 4465, sum loss: 8174.760742, avg loss: 4.676637, ppl: 107.408218 +epoch: 0, batch: 4466, sum loss: 8628.508789, avg loss: 4.828488, ppl: 125.021828 +epoch: 0, batch: 4467, sum loss: 8047.281250, avg loss: 4.611622, ppl: 100.647301 +epoch: 0, batch: 4468, sum loss: 6922.293945, avg loss: 4.512578, ppl: 91.156471 +epoch: 0, batch: 4469, sum loss: 6674.270020, avg loss: 4.552708, ppl: 94.889038 +epoch: 0, batch: 4470, sum loss: 7431.405273, avg loss: 4.638830, ppl: 103.423241 +epoch: 0, batch: 4471, sum loss: 6848.443359, avg loss: 4.398487, ppl: 81.327698 +epoch: 0, batch: 4472, sum loss: 8465.033203, avg loss: 4.793337, ppl: 120.703468 +epoch: 0, batch: 4473, sum loss: 7494.718750, avg loss: 4.612134, ppl: 100.698860 +epoch: 0, batch: 4474, sum loss: 8083.614746, avg loss: 4.743906, ppl: 114.882004 +epoch: 0, batch: 4475, sum loss: 7421.879883, avg loss: 4.659059, ppl: 105.536682 +epoch: 0, batch: 4476, sum loss: 6626.442871, avg loss: 4.566811, ppl: 96.236732 +epoch: 0, batch: 4477, sum loss: 8452.553711, avg loss: 4.813528, ppl: 123.165329 +epoch: 0, batch: 4478, sum loss: 7593.241211, avg loss: 4.610347, ppl: 100.519005 +epoch: 0, batch: 4479, sum loss: 7795.886719, avg loss: 4.800423, ppl: 121.561790 +epoch: 0, batch: 4480, sum loss: 8295.362305, avg loss: 4.647262, ppl: 104.299034 +epoch: 0, batch: 4481, sum loss: 7541.860352, avg loss: 4.457364, ppl: 86.259834 +epoch: 0, batch: 4482, sum loss: 7637.931641, avg loss: 4.818884, ppl: 123.826866 +epoch: 0, batch: 4483, sum loss: 7399.459961, avg loss: 4.441453, ppl: 84.898170 +epoch: 0, batch: 4484, sum loss: 8709.879883, avg loss: 4.588978, ppl: 98.393852 +epoch: 0, batch: 4485, sum loss: 7547.057129, avg loss: 4.505706, ppl: 90.532219 +epoch: 0, batch: 4486, sum loss: 7376.261719, avg loss: 4.689295, ppl: 108.776497 +epoch: 0, batch: 4487, sum loss: 7449.580566, avg loss: 4.598507, ppl: 99.335892 +epoch: 0, batch: 4488, sum loss: 8390.755859, avg loss: 4.503895, ppl: 90.368416 +epoch: 0, batch: 4489, sum loss: 7764.310059, avg loss: 4.459684, ppl: 86.460213 +epoch: 0, batch: 4490, sum loss: 7482.276367, avg loss: 4.702877, ppl: 110.263901 +epoch: 0, batch: 4491, sum loss: 8219.111328, avg loss: 4.604544, ppl: 99.937416 +epoch: 0, batch: 4492, sum loss: 7187.849609, avg loss: 4.560818, ppl: 95.661720 +epoch: 0, batch: 4493, sum loss: 10027.414062, avg loss: 4.884274, ppl: 132.194458 +epoch: 0, batch: 4494, sum loss: 8106.614258, avg loss: 4.746261, ppl: 115.152939 +epoch: 0, batch: 4495, sum loss: 9489.043945, avg loss: 5.042000, ppl: 154.779236 +epoch: 0, batch: 4496, sum loss: 6890.327637, avg loss: 4.729120, ppl: 113.195885 +epoch: 0, batch: 4497, sum loss: 8190.379395, avg loss: 4.661571, ppl: 105.802116 +epoch: 0, batch: 4498, sum loss: 7655.701660, avg loss: 4.648271, ppl: 104.404274 +epoch: 0, batch: 4499, sum loss: 7497.843262, avg loss: 4.544147, ppl: 94.080193 +epoch: 0, batch: 4500, sum loss: 6732.333008, avg loss: 4.573596, ppl: 96.891907 +epoch: 0, batch: 4501, sum loss: 7947.327148, avg loss: 4.577953, ppl: 97.315018 +epoch: 0, batch: 4502, sum loss: 7178.229004, avg loss: 4.661188, ppl: 105.761612 +epoch: 0, batch: 4503, sum loss: 7568.032227, avg loss: 4.564555, ppl: 96.019875 +epoch: 0, batch: 4504, sum loss: 8694.606445, avg loss: 4.614972, ppl: 100.984962 +epoch: 0, batch: 4505, sum loss: 8168.402344, avg loss: 4.716167, ppl: 111.739182 +epoch: 0, batch: 4506, sum loss: 7739.680664, avg loss: 4.687875, ppl: 108.622093 +epoch: 0, batch: 4507, sum loss: 7810.960938, avg loss: 4.654923, ppl: 105.101120 +epoch: 0, batch: 4508, sum loss: 7304.618652, avg loss: 4.740181, ppl: 114.454910 +epoch: 0, batch: 4509, sum loss: 8550.083984, avg loss: 4.846986, ppl: 127.355995 +epoch: 0, batch: 4510, sum loss: 8245.385742, avg loss: 4.692877, ppl: 109.166832 +epoch: 0, batch: 4511, sum loss: 6079.238770, avg loss: 4.519880, ppl: 91.824608 +epoch: 0, batch: 4512, sum loss: 7014.020508, avg loss: 4.381025, ppl: 79.919899 +epoch: 0, batch: 4513, sum loss: 8256.292969, avg loss: 4.873845, ppl: 130.822922 +epoch: 0, batch: 4514, sum loss: 8047.963867, avg loss: 4.660083, ppl: 105.644882 +epoch: 0, batch: 4515, sum loss: 7014.502441, avg loss: 4.482110, ppl: 88.421043 +epoch: 0, batch: 4516, sum loss: 7354.250977, avg loss: 4.573539, ppl: 96.886406 +epoch: 0, batch: 4517, sum loss: 7627.300781, avg loss: 4.603078, ppl: 99.791039 +epoch: 0, batch: 4518, sum loss: 8005.891113, avg loss: 4.679072, ppl: 107.670097 +epoch: 0, batch: 4519, sum loss: 8150.450684, avg loss: 4.752449, ppl: 115.867699 +epoch: 0, batch: 4520, sum loss: 7429.434082, avg loss: 4.516373, ppl: 91.503128 +epoch: 0, batch: 4521, sum loss: 7554.234863, avg loss: 4.654489, ppl: 105.055473 +epoch: 0, batch: 4522, sum loss: 7877.579590, avg loss: 4.675121, ppl: 107.245575 +epoch: 0, batch: 4523, sum loss: 6520.750977, avg loss: 4.531446, ppl: 92.892784 +epoch: 0, batch: 4524, sum loss: 9310.053711, avg loss: 4.879483, ppl: 131.562592 +epoch: 0, batch: 4525, sum loss: 7885.425293, avg loss: 4.724641, ppl: 112.690018 +epoch: 0, batch: 4526, sum loss: 7758.569336, avg loss: 4.366106, ppl: 78.736397 +epoch: 0, batch: 4527, sum loss: 8385.275391, avg loss: 5.128609, ppl: 168.782135 +epoch: 0, batch: 4528, sum loss: 6766.803711, avg loss: 4.702435, ppl: 110.215172 +epoch: 0, batch: 4529, sum loss: 6731.153320, avg loss: 4.419667, ppl: 83.068642 +epoch: 0, batch: 4530, sum loss: 6102.621094, avg loss: 4.490523, ppl: 89.168098 +epoch: 0, batch: 4531, sum loss: 9134.258789, avg loss: 4.937438, ppl: 139.412552 +epoch: 0, batch: 4532, sum loss: 7992.851562, avg loss: 4.737909, ppl: 114.195152 +epoch: 0, batch: 4533, sum loss: 8244.927734, avg loss: 4.765854, ppl: 117.431351 +epoch: 0, batch: 4534, sum loss: 7878.450684, avg loss: 4.642576, ppl: 103.811394 +epoch: 0, batch: 4535, sum loss: 7719.553711, avg loss: 4.462170, ppl: 86.675362 +epoch: 0, batch: 4536, sum loss: 7378.126465, avg loss: 4.608449, ppl: 100.328415 +epoch: 0, batch: 4537, sum loss: 8904.596680, avg loss: 4.818505, ppl: 123.779877 +epoch: 0, batch: 4538, sum loss: 8437.916016, avg loss: 4.546291, ppl: 94.282059 +epoch: 0, batch: 4539, sum loss: 6789.861328, avg loss: 4.523559, ppl: 92.162987 +epoch: 0, batch: 4540, sum loss: 6796.216797, avg loss: 4.651757, ppl: 104.768929 +epoch: 0, batch: 4541, sum loss: 7823.666504, avg loss: 4.645883, ppl: 104.155304 +epoch: 0, batch: 4542, sum loss: 6424.562500, avg loss: 4.291625, ppl: 73.085106 +epoch: 0, batch: 4543, sum loss: 8395.724609, avg loss: 4.404892, ppl: 81.850304 +epoch: 0, batch: 4544, sum loss: 8486.494141, avg loss: 4.706874, ppl: 110.705597 +epoch: 0, batch: 4545, sum loss: 8348.630859, avg loss: 4.836982, ppl: 126.088211 +epoch: 0, batch: 4546, sum loss: 8577.452148, avg loss: 4.813385, ppl: 123.147827 +epoch: 0, batch: 4547, sum loss: 8217.568359, avg loss: 4.542603, ppl: 93.934998 +epoch: 0, batch: 4548, sum loss: 7881.604492, avg loss: 4.716700, ppl: 111.798660 +epoch: 0, batch: 4549, sum loss: 7346.240234, avg loss: 4.721234, ppl: 112.306740 +epoch: 0, batch: 4550, sum loss: 7703.917969, avg loss: 4.878985, ppl: 131.497177 +epoch: 0, batch: 4551, sum loss: 7098.300781, avg loss: 4.657678, ppl: 105.391045 +epoch: 0, batch: 4552, sum loss: 9094.831055, avg loss: 4.799383, ppl: 121.435486 +epoch: 0, batch: 4553, sum loss: 7146.434570, avg loss: 4.534540, ppl: 93.180611 +epoch: 0, batch: 4554, sum loss: 7529.723633, avg loss: 4.659482, ppl: 105.581429 +epoch: 0, batch: 4555, sum loss: 8153.752441, avg loss: 5.002302, ppl: 148.755219 +epoch: 0, batch: 4556, sum loss: 7630.866699, avg loss: 4.580352, ppl: 97.548706 +epoch: 0, batch: 4557, sum loss: 7751.736816, avg loss: 4.644540, ppl: 104.015488 +epoch: 0, batch: 4558, sum loss: 6554.436035, avg loss: 4.281147, ppl: 72.323349 +epoch: 0, batch: 4559, sum loss: 7944.793457, avg loss: 4.665175, ppl: 106.184212 +epoch: 0, batch: 4560, sum loss: 8469.445312, avg loss: 4.497847, ppl: 89.823494 +epoch: 0, batch: 4561, sum loss: 8072.598633, avg loss: 4.558215, ppl: 95.412987 +epoch: 0, batch: 4562, sum loss: 6945.115234, avg loss: 4.432109, ppl: 84.108643 +epoch: 0, batch: 4563, sum loss: 8803.167969, avg loss: 4.815737, ppl: 123.437782 +epoch: 0, batch: 4564, sum loss: 7281.621582, avg loss: 4.448150, ppl: 85.468697 +epoch: 0, batch: 4565, sum loss: 8068.329102, avg loss: 4.674582, ppl: 107.187752 +epoch: 0, batch: 4566, sum loss: 7658.845215, avg loss: 4.670028, ppl: 106.700699 +epoch: 0, batch: 4567, sum loss: 7299.066406, avg loss: 4.693933, ppl: 109.282196 +epoch: 0, batch: 4568, sum loss: 6816.891602, avg loss: 4.375412, ppl: 79.472572 +epoch: 0, batch: 4569, sum loss: 8011.600098, avg loss: 4.541723, ppl: 93.852394 +epoch: 0, batch: 4570, sum loss: 7424.142578, avg loss: 4.654635, ppl: 105.070854 +epoch: 0, batch: 4571, sum loss: 8019.093750, avg loss: 4.750648, ppl: 115.659210 +epoch: 0, batch: 4572, sum loss: 8110.839844, avg loss: 4.785156, ppl: 119.720070 +epoch: 0, batch: 4573, sum loss: 7257.944336, avg loss: 4.519268, ppl: 91.768402 +epoch: 0, batch: 4574, sum loss: 8260.459961, avg loss: 4.761072, ppl: 116.871162 +epoch: 0, batch: 4575, sum loss: 7899.906250, avg loss: 4.741840, ppl: 114.644997 +epoch: 0, batch: 4576, sum loss: 7807.696289, avg loss: 4.711947, ppl: 111.268585 +epoch: 0, batch: 4577, sum loss: 8539.385742, avg loss: 4.633416, ppl: 102.864868 +epoch: 0, batch: 4578, sum loss: 9490.290039, avg loss: 4.700490, ppl: 110.001114 +epoch: 0, batch: 4579, sum loss: 7923.493652, avg loss: 4.655402, ppl: 105.151451 +epoch: 0, batch: 4580, sum loss: 7341.229004, avg loss: 4.611325, ppl: 100.617355 +epoch: 0, batch: 4581, sum loss: 7175.754395, avg loss: 4.745870, ppl: 115.107864 +epoch: 0, batch: 4582, sum loss: 7254.023926, avg loss: 4.667969, ppl: 106.481285 +epoch: 0, batch: 4583, sum loss: 8099.334961, avg loss: 4.401812, ppl: 81.598595 +epoch: 0, batch: 4584, sum loss: 6953.563965, avg loss: 4.431844, ppl: 84.086349 +epoch: 0, batch: 4585, sum loss: 7915.263672, avg loss: 4.634229, ppl: 102.948532 +epoch: 0, batch: 4586, sum loss: 7844.424805, avg loss: 4.774452, ppl: 118.445358 +epoch: 0, batch: 4587, sum loss: 9193.571289, avg loss: 4.697788, ppl: 109.704262 +epoch: 0, batch: 4588, sum loss: 8254.168945, avg loss: 4.658109, ppl: 105.436531 +epoch: 0, batch: 4589, sum loss: 8003.721680, avg loss: 4.647922, ppl: 104.367889 +epoch: 0, batch: 4590, sum loss: 8281.791992, avg loss: 4.583172, ppl: 97.824188 +epoch: 0, batch: 4591, sum loss: 7481.717773, avg loss: 4.504345, ppl: 90.409142 +epoch: 0, batch: 4592, sum loss: 8439.119141, avg loss: 4.725151, ppl: 112.747528 +epoch: 0, batch: 4593, sum loss: 8213.493164, avg loss: 4.704177, ppl: 110.407425 +epoch: 0, batch: 4594, sum loss: 8829.072266, avg loss: 4.798409, ppl: 121.317192 +epoch: 0, batch: 4595, sum loss: 7116.150879, avg loss: 4.371100, ppl: 79.130623 +epoch: 0, batch: 4596, sum loss: 8915.906250, avg loss: 4.707448, ppl: 110.769119 +epoch: 0, batch: 4597, sum loss: 7627.326172, avg loss: 4.494594, ppl: 89.531822 +epoch: 0, batch: 4598, sum loss: 7956.893066, avg loss: 4.658602, ppl: 105.488533 +epoch: 0, batch: 4599, sum loss: 6498.613281, avg loss: 4.705730, ppl: 110.578972 +epoch: 0, batch: 4600, sum loss: 7688.046387, avg loss: 4.562639, ppl: 95.836037 +epoch: 0, batch: 4601, sum loss: 7636.446289, avg loss: 4.583701, ppl: 97.875977 +epoch: 0, batch: 4602, sum loss: 7855.465820, avg loss: 4.496546, ppl: 89.706726 +epoch: 0, batch: 4603, sum loss: 6948.845703, avg loss: 4.541729, ppl: 93.852974 +epoch: 0, batch: 4604, sum loss: 7639.276855, avg loss: 4.765613, ppl: 117.403015 +epoch: 0, batch: 4605, sum loss: 6506.956055, avg loss: 4.335081, ppl: 76.331116 +epoch: 0, batch: 4606, sum loss: 7579.271973, avg loss: 4.690144, ppl: 108.868813 +epoch: 0, batch: 4607, sum loss: 8089.863770, avg loss: 4.778419, ppl: 118.916252 +epoch: 0, batch: 4608, sum loss: 9553.051758, avg loss: 4.762239, ppl: 117.007668 +epoch: 0, batch: 4609, sum loss: 8066.568359, avg loss: 4.506463, ppl: 90.600754 +epoch: 0, batch: 4610, sum loss: 7817.467285, avg loss: 4.636695, ppl: 103.202690 +epoch: 0, batch: 4611, sum loss: 6456.787598, avg loss: 4.531079, ppl: 92.858681 +epoch: 0, batch: 4612, sum loss: 7678.204102, avg loss: 4.513935, ppl: 91.280312 +epoch: 0, batch: 4613, sum loss: 7530.685547, avg loss: 4.564052, ppl: 95.971581 +epoch: 0, batch: 4614, sum loss: 6861.471191, avg loss: 4.455501, ppl: 86.099243 +epoch: 0, batch: 4615, sum loss: 6253.918457, avg loss: 4.508954, ppl: 90.826729 +epoch: 0, batch: 4616, sum loss: 8233.932617, avg loss: 4.697052, ppl: 109.623581 +epoch: 0, batch: 4617, sum loss: 8682.559570, avg loss: 4.847883, ppl: 127.470276 +epoch: 0, batch: 4618, sum loss: 5415.923828, avg loss: 4.517034, ppl: 91.563622 +epoch: 0, batch: 4619, sum loss: 7411.548340, avg loss: 4.655495, ppl: 105.161278 +epoch: 0, batch: 4620, sum loss: 7475.642578, avg loss: 4.970507, ppl: 144.099884 +epoch: 0, batch: 4621, sum loss: 7701.122559, avg loss: 4.365716, ppl: 78.705734 +epoch: 0, batch: 4622, sum loss: 7436.701660, avg loss: 4.680114, ppl: 107.782387 +epoch: 0, batch: 4623, sum loss: 7690.623047, avg loss: 4.658161, ppl: 105.441963 +epoch: 0, batch: 4624, sum loss: 7500.326660, avg loss: 4.762112, ppl: 116.992767 +epoch: 0, batch: 4625, sum loss: 7558.624023, avg loss: 4.575438, ppl: 97.070595 +epoch: 0, batch: 4626, sum loss: 7695.370117, avg loss: 4.481870, ppl: 88.399796 +epoch: 0, batch: 4627, sum loss: 7153.939941, avg loss: 4.530677, ppl: 92.821411 +epoch: 0, batch: 4628, sum loss: 7881.642090, avg loss: 4.493525, ppl: 89.436111 +epoch: 0, batch: 4629, sum loss: 7101.265137, avg loss: 4.391630, ppl: 80.771942 +epoch: 0, batch: 4630, sum loss: 7775.064941, avg loss: 4.520387, ppl: 91.871117 +epoch: 0, batch: 4631, sum loss: 8515.082031, avg loss: 4.592816, ppl: 98.772217 +epoch: 0, batch: 4632, sum loss: 7199.220703, avg loss: 4.755100, ppl: 116.175247 +epoch: 0, batch: 4633, sum loss: 6942.050293, avg loss: 4.555151, ppl: 95.121117 +epoch: 0, batch: 4634, sum loss: 7428.394043, avg loss: 4.286436, ppl: 72.706886 +epoch: 0, batch: 4635, sum loss: 7540.019043, avg loss: 4.674531, ppl: 107.182335 +epoch: 0, batch: 4636, sum loss: 8115.182617, avg loss: 4.546321, ppl: 94.284889 +epoch: 0, batch: 4637, sum loss: 7444.543945, avg loss: 4.641237, ppl: 103.672539 +epoch: 0, batch: 4638, sum loss: 8880.048828, avg loss: 4.763975, ppl: 117.210876 +epoch: 0, batch: 4639, sum loss: 7960.090332, avg loss: 4.657747, ppl: 105.398384 +epoch: 0, batch: 4640, sum loss: 6632.964844, avg loss: 4.284861, ppl: 72.592461 +epoch: 0, batch: 4641, sum loss: 8797.299805, avg loss: 4.799400, ppl: 121.437515 +epoch: 0, batch: 4642, sum loss: 7700.661133, avg loss: 4.883108, ppl: 132.040421 +epoch: 0, batch: 4643, sum loss: 6774.562988, avg loss: 4.565069, ppl: 96.069244 +epoch: 0, batch: 4644, sum loss: 7376.295410, avg loss: 4.624637, ppl: 101.965714 +epoch: 0, batch: 4645, sum loss: 7516.992188, avg loss: 4.591932, ppl: 98.684891 +epoch: 0, batch: 4646, sum loss: 8010.329102, avg loss: 4.773736, ppl: 118.360611 +epoch: 0, batch: 4647, sum loss: 8075.727539, avg loss: 4.575483, ppl: 97.074898 +epoch: 0, batch: 4648, sum loss: 9001.056641, avg loss: 4.860182, ppl: 129.047668 +epoch: 0, batch: 4649, sum loss: 8940.624023, avg loss: 4.536085, ppl: 93.324730 +epoch: 0, batch: 4650, sum loss: 8036.494629, avg loss: 4.645372, ppl: 104.102127 +epoch: 0, batch: 4651, sum loss: 8013.055664, avg loss: 4.410047, ppl: 82.273338 +epoch: 0, batch: 4652, sum loss: 7771.526855, avg loss: 4.625909, ppl: 102.095520 +epoch: 0, batch: 4653, sum loss: 7159.454102, avg loss: 4.571810, ppl: 96.718994 +epoch: 0, batch: 4654, sum loss: 6482.781250, avg loss: 4.495687, ppl: 89.629768 +epoch: 0, batch: 4655, sum loss: 8299.373047, avg loss: 4.683619, ppl: 108.160805 +epoch: 0, batch: 4656, sum loss: 7012.142090, avg loss: 4.544487, ppl: 94.112091 +epoch: 0, batch: 4657, sum loss: 6215.135254, avg loss: 4.433050, ppl: 84.187775 +epoch: 0, batch: 4658, sum loss: 8251.865234, avg loss: 4.612557, ppl: 100.741455 +epoch: 0, batch: 4659, sum loss: 7802.744629, avg loss: 4.711802, ppl: 111.252510 +epoch: 0, batch: 4660, sum loss: 7965.992188, avg loss: 4.798790, ppl: 121.363533 +epoch: 0, batch: 4661, sum loss: 8286.699219, avg loss: 4.302544, ppl: 73.887535 +epoch: 0, batch: 4662, sum loss: 6885.718262, avg loss: 4.566126, ppl: 96.170807 +epoch: 0, batch: 4663, sum loss: 6917.862305, avg loss: 4.715652, ppl: 111.681656 +epoch: 0, batch: 4664, sum loss: 8304.254883, avg loss: 4.585453, ppl: 98.047592 +epoch: 0, batch: 4665, sum loss: 7248.282227, avg loss: 4.712797, ppl: 111.363228 +epoch: 0, batch: 4666, sum loss: 7931.386230, avg loss: 4.630115, ppl: 102.525810 +epoch: 0, batch: 4667, sum loss: 7184.433594, avg loss: 4.620215, ppl: 101.515846 +epoch: 0, batch: 4668, sum loss: 7372.177246, avg loss: 4.648283, ppl: 104.405617 +epoch: 0, batch: 4669, sum loss: 8078.280762, avg loss: 4.642690, ppl: 103.823273 +epoch: 0, batch: 4670, sum loss: 8159.781250, avg loss: 4.716637, ppl: 111.791626 +epoch: 0, batch: 4671, sum loss: 7018.470215, avg loss: 4.501905, ppl: 90.188820 +epoch: 0, batch: 4672, sum loss: 9197.783203, avg loss: 4.676046, ppl: 107.344833 +epoch: 0, batch: 4673, sum loss: 7430.674805, avg loss: 4.866192, ppl: 129.825577 +epoch: 0, batch: 4674, sum loss: 9047.612305, avg loss: 4.771948, ppl: 118.149155 +epoch: 0, batch: 4675, sum loss: 9419.084961, avg loss: 4.773992, ppl: 118.390869 +epoch: 0, batch: 4676, sum loss: 7511.361328, avg loss: 4.465732, ppl: 86.984688 +epoch: 0, batch: 4677, sum loss: 7137.551270, avg loss: 4.506030, ppl: 90.561539 +epoch: 0, batch: 4678, sum loss: 8101.651367, avg loss: 4.713003, ppl: 111.386116 +epoch: 0, batch: 4679, sum loss: 8290.636719, avg loss: 4.503334, ppl: 90.317711 +epoch: 0, batch: 4680, sum loss: 8301.191406, avg loss: 4.526277, ppl: 92.413826 +epoch: 0, batch: 4681, sum loss: 7987.303711, avg loss: 4.684636, ppl: 108.270813 +epoch: 0, batch: 4682, sum loss: 7770.304199, avg loss: 4.573458, ppl: 96.878510 +epoch: 0, batch: 4683, sum loss: 7880.178711, avg loss: 4.687792, ppl: 108.613129 +epoch: 0, batch: 4684, sum loss: 7245.994629, avg loss: 4.647848, ppl: 104.360176 +epoch: 0, batch: 4685, sum loss: 8249.619141, avg loss: 4.801874, ppl: 121.738304 +epoch: 0, batch: 4686, sum loss: 7831.915039, avg loss: 4.672980, ppl: 107.016212 +epoch: 0, batch: 4687, sum loss: 7194.237305, avg loss: 4.674618, ppl: 107.191635 +epoch: 0, batch: 4688, sum loss: 8186.136719, avg loss: 4.745587, ppl: 115.075317 +epoch: 0, batch: 4689, sum loss: 8496.253906, avg loss: 4.802857, ppl: 121.858063 +epoch: 0, batch: 4690, sum loss: 7992.772461, avg loss: 4.622772, ppl: 101.775734 +epoch: 0, batch: 4691, sum loss: 7711.809570, avg loss: 4.702323, ppl: 110.202873 +epoch: 0, batch: 4692, sum loss: 7088.276367, avg loss: 4.500493, ppl: 90.061485 +epoch: 0, batch: 4693, sum loss: 7258.893555, avg loss: 4.533975, ppl: 93.128021 +epoch: 0, batch: 4694, sum loss: 8343.326172, avg loss: 4.574192, ppl: 96.949677 +epoch: 0, batch: 4695, sum loss: 6576.296387, avg loss: 4.449457, ppl: 85.580437 +epoch: 0, batch: 4696, sum loss: 7467.643555, avg loss: 4.793096, ppl: 120.674408 +epoch: 0, batch: 4697, sum loss: 7869.186523, avg loss: 4.618067, ppl: 101.298058 +epoch: 0, batch: 4698, sum loss: 8169.289062, avg loss: 4.785758, ppl: 119.792130 +epoch: 0, batch: 4699, sum loss: 7551.777344, avg loss: 4.455326, ppl: 86.084213 +epoch: 0, batch: 4700, sum loss: 7075.361816, avg loss: 4.591409, ppl: 98.633324 +epoch: 0, batch: 4701, sum loss: 8834.757812, avg loss: 4.468770, ppl: 87.249298 +epoch: 0, batch: 4702, sum loss: 8831.064453, avg loss: 4.781302, ppl: 119.259521 +epoch: 0, batch: 4703, sum loss: 7629.958984, avg loss: 4.816893, ppl: 123.580544 +epoch: 0, batch: 4704, sum loss: 7623.089355, avg loss: 4.642564, ppl: 103.810158 +epoch: 0, batch: 4705, sum loss: 7831.576172, avg loss: 4.614954, ppl: 100.983131 +epoch: 0, batch: 4706, sum loss: 8598.476562, avg loss: 4.696055, ppl: 109.514328 +epoch: 0, batch: 4707, sum loss: 5838.992676, avg loss: 4.296536, ppl: 73.444939 +epoch: 0, batch: 4708, sum loss: 8448.526367, avg loss: 4.520346, ppl: 91.867348 +epoch: 0, batch: 4709, sum loss: 7529.480469, avg loss: 4.659332, ppl: 105.565521 +epoch: 0, batch: 4710, sum loss: 8737.188477, avg loss: 4.657350, ppl: 105.356522 +epoch: 0, batch: 4711, sum loss: 6318.399902, avg loss: 4.581871, ppl: 97.696968 +epoch: 0, batch: 4712, sum loss: 7109.839844, avg loss: 4.522799, ppl: 92.093010 +epoch: 0, batch: 4713, sum loss: 7786.901855, avg loss: 4.384517, ppl: 80.199455 +epoch: 0, batch: 4714, sum loss: 8065.046387, avg loss: 4.755334, ppl: 116.202446 +epoch: 0, batch: 4715, sum loss: 6980.740234, avg loss: 4.577535, ppl: 97.274284 +epoch: 0, batch: 4716, sum loss: 8520.030273, avg loss: 4.510339, ppl: 90.952629 +epoch: 0, batch: 4717, sum loss: 8067.452148, avg loss: 4.649828, ppl: 104.567047 +epoch: 0, batch: 4718, sum loss: 8980.162109, avg loss: 4.591085, ppl: 98.601349 +epoch: 0, batch: 4719, sum loss: 8510.890625, avg loss: 4.894129, ppl: 133.503708 +epoch: 0, batch: 4720, sum loss: 8664.484375, avg loss: 4.779087, ppl: 118.995667 +epoch: 0, batch: 4721, sum loss: 6870.369141, avg loss: 4.353846, ppl: 77.777023 +epoch: 0, batch: 4722, sum loss: 6388.755859, avg loss: 4.547157, ppl: 94.363777 +epoch: 0, batch: 4723, sum loss: 7421.635254, avg loss: 4.495236, ppl: 89.589348 +epoch: 0, batch: 4724, sum loss: 8630.214844, avg loss: 4.749705, ppl: 115.550232 +epoch: 0, batch: 4725, sum loss: 7264.851562, avg loss: 4.509530, ppl: 90.879059 +epoch: 0, batch: 4726, sum loss: 8131.356934, avg loss: 4.791607, ppl: 120.494835 +epoch: 0, batch: 4727, sum loss: 8478.785156, avg loss: 4.635749, ppl: 103.105148 +epoch: 0, batch: 4728, sum loss: 7478.755371, avg loss: 4.568574, ppl: 96.406532 +epoch: 0, batch: 4729, sum loss: 7268.870117, avg loss: 4.609303, ppl: 100.414085 +epoch: 0, batch: 4730, sum loss: 7011.976562, avg loss: 4.643693, ppl: 103.927444 +epoch: 0, batch: 4731, sum loss: 7988.548340, avg loss: 4.631043, ppl: 102.621040 +epoch: 0, batch: 4732, sum loss: 8222.302734, avg loss: 4.634894, ppl: 103.017036 +epoch: 0, batch: 4733, sum loss: 7920.215332, avg loss: 4.672693, ppl: 106.985497 +epoch: 0, batch: 4734, sum loss: 7569.703125, avg loss: 4.635458, ppl: 103.075111 +epoch: 0, batch: 4735, sum loss: 7532.804688, avg loss: 4.559809, ppl: 95.565247 +epoch: 0, batch: 4736, sum loss: 8653.287109, avg loss: 4.615087, ppl: 100.996567 +epoch: 0, batch: 4737, sum loss: 8445.053711, avg loss: 4.465919, ppl: 87.000992 +epoch: 0, batch: 4738, sum loss: 7622.944336, avg loss: 4.592135, ppl: 98.704987 +epoch: 0, batch: 4739, sum loss: 8373.494141, avg loss: 4.618585, ppl: 101.350533 +epoch: 0, batch: 4740, sum loss: 7709.480957, avg loss: 4.575360, ppl: 97.062958 +epoch: 0, batch: 4741, sum loss: 8294.885742, avg loss: 4.673175, ppl: 107.037033 +epoch: 0, batch: 4742, sum loss: 8308.588867, avg loss: 4.649462, ppl: 104.528755 +epoch: 0, batch: 4743, sum loss: 8335.319336, avg loss: 4.600066, ppl: 99.490852 +epoch: 0, batch: 4744, sum loss: 8176.581055, avg loss: 4.446211, ppl: 85.303108 +epoch: 0, batch: 4745, sum loss: 6320.541016, avg loss: 4.527608, ppl: 92.536987 +epoch: 0, batch: 4746, sum loss: 8271.534180, avg loss: 4.562347, ppl: 95.808075 +epoch: 0, batch: 4747, sum loss: 7795.290527, avg loss: 4.794151, ppl: 120.801811 +epoch: 0, batch: 4748, sum loss: 8495.086914, avg loss: 4.785964, ppl: 119.816811 +epoch: 0, batch: 4749, sum loss: 6682.557129, avg loss: 4.466950, ppl: 87.090683 +epoch: 0, batch: 4750, sum loss: 7913.330566, avg loss: 4.890810, ppl: 133.061310 +epoch: 0, batch: 4751, sum loss: 8378.526367, avg loss: 4.720296, ppl: 112.201500 +epoch: 0, batch: 4752, sum loss: 7857.916992, avg loss: 4.516044, ppl: 91.473030 +epoch: 0, batch: 4753, sum loss: 7959.617188, avg loss: 4.577123, ppl: 97.234268 +epoch: 0, batch: 4754, sum loss: 7938.253418, avg loss: 4.591240, ppl: 98.616631 +epoch: 0, batch: 4755, sum loss: 6288.627441, avg loss: 4.345976, ppl: 77.167343 +epoch: 0, batch: 4756, sum loss: 7282.634766, avg loss: 4.626832, ppl: 102.189865 +epoch: 0, batch: 4757, sum loss: 7392.127441, avg loss: 4.599955, ppl: 99.479851 +epoch: 0, batch: 4758, sum loss: 6506.957031, avg loss: 4.518720, ppl: 91.718140 +epoch: 0, batch: 4759, sum loss: 8021.558594, avg loss: 4.588993, ppl: 98.395256 +epoch: 0, batch: 4760, sum loss: 7997.469727, avg loss: 4.609493, ppl: 100.433197 +epoch: 0, batch: 4761, sum loss: 7318.399902, avg loss: 4.481567, ppl: 88.373077 +epoch: 0, batch: 4762, sum loss: 7814.341309, avg loss: 4.377782, ppl: 79.661179 +epoch: 0, batch: 4763, sum loss: 8240.505859, avg loss: 4.555283, ppl: 95.133636 +epoch: 0, batch: 4764, sum loss: 6840.663574, avg loss: 4.594132, ppl: 98.902290 +epoch: 0, batch: 4765, sum loss: 7688.458008, avg loss: 4.571022, ppl: 96.642784 +epoch: 0, batch: 4766, sum loss: 6533.976074, avg loss: 4.330004, ppl: 75.944603 +epoch: 0, batch: 4767, sum loss: 7553.818359, avg loss: 4.651366, ppl: 104.727921 +epoch: 0, batch: 4768, sum loss: 7904.821289, avg loss: 4.696863, ppl: 109.602829 +epoch: 0, batch: 4769, sum loss: 8044.935547, avg loss: 4.591858, ppl: 98.677643 +epoch: 0, batch: 4770, sum loss: 6993.535156, avg loss: 4.538310, ppl: 93.532555 +epoch: 0, batch: 4771, sum loss: 6953.345703, avg loss: 4.529867, ppl: 92.746201 +epoch: 0, batch: 4772, sum loss: 7937.071289, avg loss: 4.633433, ppl: 102.866638 +epoch: 0, batch: 4773, sum loss: 7802.137207, avg loss: 4.522978, ppl: 92.109520 +epoch: 0, batch: 4774, sum loss: 8831.257812, avg loss: 4.747988, ppl: 115.351990 +epoch: 0, batch: 4775, sum loss: 7866.796875, avg loss: 4.565756, ppl: 96.135231 +epoch: 0, batch: 4776, sum loss: 8518.645508, avg loss: 4.649916, ppl: 104.576172 +epoch: 0, batch: 4777, sum loss: 8004.219238, avg loss: 4.504344, ppl: 90.409012 +epoch: 0, batch: 4778, sum loss: 7856.567383, avg loss: 4.523067, ppl: 92.117691 +epoch: 0, batch: 4779, sum loss: 7050.065430, avg loss: 4.434003, ppl: 84.268097 +epoch: 0, batch: 4780, sum loss: 8181.011719, avg loss: 4.611619, ppl: 100.646919 +epoch: 0, batch: 4781, sum loss: 8645.148438, avg loss: 4.934446, ppl: 138.996094 +epoch: 0, batch: 4782, sum loss: 8028.850098, avg loss: 4.736785, ppl: 114.066879 +epoch: 0, batch: 4783, sum loss: 7700.368164, avg loss: 4.706827, ppl: 110.700317 +epoch: 0, batch: 4784, sum loss: 7901.629883, avg loss: 4.530751, ppl: 92.828224 +epoch: 0, batch: 4785, sum loss: 7612.769043, avg loss: 4.616597, ppl: 101.149254 +epoch: 0, batch: 4786, sum loss: 8076.186035, avg loss: 4.604439, ppl: 99.926888 +epoch: 0, batch: 4787, sum loss: 8540.491211, avg loss: 4.825136, ppl: 124.603439 +epoch: 0, batch: 4788, sum loss: 9817.283203, avg loss: 4.774943, ppl: 118.503601 +epoch: 0, batch: 4789, sum loss: 7107.237305, avg loss: 4.606116, ppl: 100.094658 +epoch: 0, batch: 4790, sum loss: 7394.473633, avg loss: 4.500592, ppl: 90.070457 +epoch: 0, batch: 4791, sum loss: 7067.981445, avg loss: 4.527855, ppl: 92.559799 +epoch: 0, batch: 4792, sum loss: 7083.763184, avg loss: 4.611825, ppl: 100.667702 +epoch: 0, batch: 4793, sum loss: 8457.868164, avg loss: 4.693601, ppl: 109.245827 +epoch: 0, batch: 4794, sum loss: 8613.215820, avg loss: 4.435230, ppl: 84.371506 +epoch: 0, batch: 4795, sum loss: 6539.194824, avg loss: 4.412412, ppl: 82.468147 +epoch: 0, batch: 4796, sum loss: 7949.808105, avg loss: 4.595265, ppl: 99.014359 +epoch: 0, batch: 4797, sum loss: 7515.566406, avg loss: 4.747673, ppl: 115.315636 +epoch: 0, batch: 4798, sum loss: 7841.685547, avg loss: 4.517100, ppl: 91.569649 +epoch: 0, batch: 4799, sum loss: 8370.078125, avg loss: 4.753026, ppl: 115.934517 +epoch: 0, batch: 4800, sum loss: 8395.438477, avg loss: 4.615414, ppl: 101.029663 +epoch: 0, batch: 4801, sum loss: 6769.020996, avg loss: 4.500679, ppl: 90.078232 +epoch: 0, batch: 4802, sum loss: 7615.973145, avg loss: 4.506493, ppl: 90.603523 +epoch: 0, batch: 4803, sum loss: 7732.003418, avg loss: 4.569742, ppl: 96.519226 +epoch: 0, batch: 4804, sum loss: 7594.867188, avg loss: 4.600162, ppl: 99.500435 +epoch: 0, batch: 4805, sum loss: 6823.963379, avg loss: 4.394053, ppl: 80.967880 +epoch: 0, batch: 4806, sum loss: 8011.709961, avg loss: 4.677005, ppl: 107.447815 +epoch: 0, batch: 4807, sum loss: 6206.920410, avg loss: 4.301400, ppl: 73.803062 +epoch: 0, batch: 4808, sum loss: 7411.641113, avg loss: 4.459471, ppl: 86.441788 +epoch: 0, batch: 4809, sum loss: 8778.486328, avg loss: 4.581674, ppl: 97.677734 +epoch: 0, batch: 4810, sum loss: 7889.006348, avg loss: 4.632417, ppl: 102.762161 +epoch: 0, batch: 4811, sum loss: 8206.681641, avg loss: 4.844558, ppl: 127.047142 +epoch: 0, batch: 4812, sum loss: 7388.599121, avg loss: 4.426962, ppl: 83.676811 +epoch: 0, batch: 4813, sum loss: 7693.201660, avg loss: 4.708202, ppl: 110.852654 +epoch: 0, batch: 4814, sum loss: 7311.215332, avg loss: 4.689683, ppl: 108.818672 +epoch: 0, batch: 4815, sum loss: 7413.042969, avg loss: 4.685868, ppl: 108.404305 +epoch: 0, batch: 4816, sum loss: 7349.282715, avg loss: 4.545011, ppl: 94.161469 +epoch: 0, batch: 4817, sum loss: 7657.483398, avg loss: 4.618506, ppl: 101.342560 +epoch: 0, batch: 4818, sum loss: 6257.738281, avg loss: 4.574370, ppl: 96.966919 +epoch: 0, batch: 4819, sum loss: 7856.002930, avg loss: 4.491711, ppl: 89.274078 +epoch: 0, batch: 4820, sum loss: 7880.403809, avg loss: 4.621938, ppl: 101.690895 +epoch: 0, batch: 4821, sum loss: 8596.416016, avg loss: 4.786423, ppl: 119.871841 +epoch: 0, batch: 4822, sum loss: 7826.011719, avg loss: 4.518483, ppl: 91.696358 +epoch: 0, batch: 4823, sum loss: 7626.749023, avg loss: 4.839308, ppl: 126.381836 +epoch: 0, batch: 4824, sum loss: 8367.396484, avg loss: 4.759611, ppl: 116.700539 +epoch: 0, batch: 4825, sum loss: 7065.826172, avg loss: 4.424438, ppl: 83.465843 +epoch: 0, batch: 4826, sum loss: 8260.154297, avg loss: 4.695938, ppl: 109.501434 +epoch: 0, batch: 4827, sum loss: 7545.170898, avg loss: 4.435727, ppl: 84.413445 +epoch: 0, batch: 4828, sum loss: 8376.759766, avg loss: 4.664120, ppl: 106.072220 +epoch: 0, batch: 4829, sum loss: 7523.416992, avg loss: 4.661349, ppl: 105.778664 +epoch: 0, batch: 4830, sum loss: 6709.262207, avg loss: 4.542493, ppl: 93.924698 +epoch: 0, batch: 4831, sum loss: 7535.237305, avg loss: 4.742126, ppl: 114.677689 +epoch: 0, batch: 4832, sum loss: 8240.021484, avg loss: 4.605937, ppl: 100.076714 +epoch: 0, batch: 4833, sum loss: 8514.699219, avg loss: 4.699061, ppl: 109.844025 +epoch: 0, batch: 4834, sum loss: 7097.174805, avg loss: 4.514742, ppl: 91.354027 +epoch: 0, batch: 4835, sum loss: 7228.084473, avg loss: 4.375354, ppl: 79.467949 +epoch: 0, batch: 4836, sum loss: 7935.083984, avg loss: 4.483099, ppl: 88.508492 +epoch: 0, batch: 4837, sum loss: 7846.196289, avg loss: 4.514497, ppl: 91.331642 +epoch: 0, batch: 4838, sum loss: 8243.879883, avg loss: 4.480369, ppl: 88.267242 +epoch: 0, batch: 4839, sum loss: 7344.895508, avg loss: 4.446063, ppl: 85.290497 +epoch: 0, batch: 4840, sum loss: 7732.973633, avg loss: 4.570315, ppl: 96.574562 +epoch: 0, batch: 4841, sum loss: 6273.072266, avg loss: 4.506517, ppl: 90.605728 +epoch: 0, batch: 4842, sum loss: 10209.587891, avg loss: 4.985150, ppl: 146.225555 +epoch: 0, batch: 4843, sum loss: 7372.457031, avg loss: 4.454657, ppl: 86.026642 +epoch: 0, batch: 4844, sum loss: 8404.241211, avg loss: 4.821711, ppl: 124.177322 +epoch: 0, batch: 4845, sum loss: 6690.394531, avg loss: 4.554387, ppl: 95.048439 +epoch: 0, batch: 4846, sum loss: 8681.444336, avg loss: 4.552409, ppl: 94.860672 +epoch: 0, batch: 4847, sum loss: 8609.219727, avg loss: 4.753849, ppl: 116.030029 +epoch: 0, batch: 4848, sum loss: 8351.470703, avg loss: 4.691837, ppl: 109.053360 +epoch: 0, batch: 4849, sum loss: 8358.581055, avg loss: 4.703760, ppl: 110.361313 +epoch: 0, batch: 4850, sum loss: 7050.078613, avg loss: 4.659669, ppl: 105.601112 +epoch: 0, batch: 4851, sum loss: 8093.398438, avg loss: 4.826117, ppl: 124.725716 +epoch: 0, batch: 4852, sum loss: 7601.865723, avg loss: 4.530313, ppl: 92.787643 +epoch: 0, batch: 4853, sum loss: 7766.273438, avg loss: 4.573777, ppl: 96.909462 +epoch: 0, batch: 4854, sum loss: 9253.580078, avg loss: 4.929984, ppl: 138.377304 +epoch: 0, batch: 4855, sum loss: 6633.309570, avg loss: 4.463869, ppl: 86.822784 +epoch: 0, batch: 4856, sum loss: 8151.550781, avg loss: 4.618443, ppl: 101.336082 +epoch: 0, batch: 4857, sum loss: 7533.132812, avg loss: 4.486678, ppl: 88.825890 +epoch: 0, batch: 4858, sum loss: 7215.668945, avg loss: 4.504163, ppl: 90.392632 +epoch: 0, batch: 4859, sum loss: 8523.187500, avg loss: 4.617111, ppl: 101.201263 +epoch: 0, batch: 4860, sum loss: 8176.625977, avg loss: 4.685745, ppl: 108.391022 +epoch: 0, batch: 4861, sum loss: 6553.289551, avg loss: 4.325604, ppl: 75.611168 +epoch: 0, batch: 4862, sum loss: 8818.671875, avg loss: 4.779768, ppl: 119.076721 +epoch: 0, batch: 4863, sum loss: 7709.939453, avg loss: 4.788782, ppl: 120.154945 +epoch: 0, batch: 4864, sum loss: 7486.218262, avg loss: 4.705354, ppl: 110.537430 +epoch: 0, batch: 4865, sum loss: 7753.494141, avg loss: 4.539516, ppl: 93.645508 +epoch: 0, batch: 4866, sum loss: 7426.527832, avg loss: 4.514606, ppl: 91.341614 +epoch: 0, batch: 4867, sum loss: 8154.865234, avg loss: 4.525453, ppl: 92.337708 +epoch: 0, batch: 4868, sum loss: 8277.803711, avg loss: 4.751896, ppl: 115.803688 +epoch: 0, batch: 4869, sum loss: 8558.323242, avg loss: 4.651262, ppl: 104.717087 +epoch: 0, batch: 4870, sum loss: 5795.366699, avg loss: 4.273869, ppl: 71.798889 +epoch: 0, batch: 4871, sum loss: 7930.983398, avg loss: 4.534582, ppl: 93.184525 +epoch: 0, batch: 4872, sum loss: 6262.063477, avg loss: 4.357734, ppl: 78.079987 +epoch: 0, batch: 4873, sum loss: 7128.751953, avg loss: 4.323076, ppl: 75.420250 +epoch: 0, batch: 4874, sum loss: 7496.520996, avg loss: 4.417514, ppl: 82.889954 +epoch: 0, batch: 4875, sum loss: 7051.538086, avg loss: 4.694766, ppl: 109.373161 +epoch: 0, batch: 4876, sum loss: 8772.717773, avg loss: 5.070935, ppl: 159.323257 +epoch: 0, batch: 4877, sum loss: 6665.997559, avg loss: 4.446963, ppl: 85.367317 +epoch: 0, batch: 4878, sum loss: 7900.804199, avg loss: 4.561665, ppl: 95.742767 +epoch: 0, batch: 4879, sum loss: 7731.779297, avg loss: 4.613234, ppl: 100.809601 +epoch: 0, batch: 4880, sum loss: 8978.347656, avg loss: 4.727934, ppl: 113.061775 +epoch: 0, batch: 4881, sum loss: 6327.144531, avg loss: 4.578252, ppl: 97.344070 +epoch: 0, batch: 4882, sum loss: 7576.048828, avg loss: 4.525716, ppl: 92.362061 +epoch: 0, batch: 4883, sum loss: 7472.985352, avg loss: 4.529082, ppl: 92.673477 +epoch: 0, batch: 4884, sum loss: 7888.625488, avg loss: 4.715258, ppl: 111.637573 +epoch: 0, batch: 4885, sum loss: 8570.714844, avg loss: 4.491989, ppl: 89.298851 +epoch: 0, batch: 4886, sum loss: 7232.966797, avg loss: 4.572040, ppl: 96.741226 +epoch: 0, batch: 4887, sum loss: 8768.259766, avg loss: 4.617304, ppl: 101.220757 +epoch: 0, batch: 4888, sum loss: 7743.756836, avg loss: 4.590253, ppl: 98.519386 +epoch: 0, batch: 4889, sum loss: 7013.791992, avg loss: 4.481656, ppl: 88.380920 +epoch: 0, batch: 4890, sum loss: 8256.742188, avg loss: 4.554188, ppl: 95.029541 +epoch: 0, batch: 4891, sum loss: 7587.681641, avg loss: 4.403762, ppl: 81.757851 +epoch: 0, batch: 4892, sum loss: 7590.962402, avg loss: 4.685780, ppl: 108.394737 +epoch: 0, batch: 4893, sum loss: 7714.444336, avg loss: 4.469551, ppl: 87.317474 +epoch: 0, batch: 4894, sum loss: 8334.909180, avg loss: 4.749236, ppl: 115.496025 +epoch: 0, batch: 4895, sum loss: 7748.721191, avg loss: 4.571517, ppl: 96.690674 +epoch: 0, batch: 4896, sum loss: 7378.732422, avg loss: 4.507473, ppl: 90.692307 +epoch: 0, batch: 4897, sum loss: 6925.739258, avg loss: 4.488490, ppl: 88.986938 +epoch: 0, batch: 4898, sum loss: 8190.517090, avg loss: 4.507714, ppl: 90.714233 +epoch: 0, batch: 4899, sum loss: 7614.256836, avg loss: 4.578627, ppl: 97.380608 +epoch: 0, batch: 4900, sum loss: 6883.518066, avg loss: 4.421014, ppl: 83.180573 +epoch: 0, batch: 4901, sum loss: 8397.674805, avg loss: 4.655030, ppl: 105.112396 +epoch: 0, batch: 4902, sum loss: 7440.588867, avg loss: 4.490397, ppl: 89.156876 +epoch: 0, batch: 4903, sum loss: 7332.414062, avg loss: 4.484657, ppl: 88.646523 +epoch: 0, batch: 4904, sum loss: 7119.080078, avg loss: 4.298961, ppl: 73.623268 +epoch: 0, batch: 4905, sum loss: 9113.442383, avg loss: 4.502689, ppl: 90.259506 +epoch: 0, batch: 4906, sum loss: 7904.130859, avg loss: 4.526994, ppl: 92.480125 +epoch: 0, batch: 4907, sum loss: 7526.112305, avg loss: 4.424522, ppl: 83.472893 +epoch: 0, batch: 4908, sum loss: 9090.910156, avg loss: 4.593689, ppl: 98.858444 +epoch: 0, batch: 4909, sum loss: 7777.290039, avg loss: 4.393950, ppl: 80.959541 +epoch: 0, batch: 4910, sum loss: 7231.925293, avg loss: 4.489091, ppl: 89.040466 +epoch: 0, batch: 4911, sum loss: 8107.452148, avg loss: 4.580482, ppl: 97.561363 +epoch: 0, batch: 4912, sum loss: 8527.669922, avg loss: 4.492977, ppl: 89.387169 +epoch: 0, batch: 4913, sum loss: 7263.944336, avg loss: 4.772631, ppl: 118.229919 +epoch: 0, batch: 4914, sum loss: 7145.826660, avg loss: 4.471731, ppl: 87.508087 +epoch: 0, batch: 4915, sum loss: 8326.980469, avg loss: 4.680708, ppl: 107.846443 +epoch: 0, batch: 4916, sum loss: 8049.278320, avg loss: 4.834402, ppl: 125.763306 +epoch: 0, batch: 4917, sum loss: 7979.697266, avg loss: 4.531344, ppl: 92.883308 +epoch: 0, batch: 4918, sum loss: 7822.199219, avg loss: 4.645011, ppl: 104.064552 +epoch: 0, batch: 4919, sum loss: 7747.114746, avg loss: 4.347427, ppl: 77.279358 +epoch: 0, batch: 4920, sum loss: 7036.939453, avg loss: 4.496447, ppl: 89.697876 +epoch: 0, batch: 4921, sum loss: 7393.725098, avg loss: 4.318765, ppl: 75.095802 +epoch: 0, batch: 4922, sum loss: 8358.961914, avg loss: 4.506179, ppl: 90.575058 +epoch: 0, batch: 4923, sum loss: 7122.869629, avg loss: 4.314276, ppl: 74.759491 +epoch: 0, batch: 4924, sum loss: 7806.564941, avg loss: 4.536063, ppl: 93.322685 +epoch: 0, batch: 4925, sum loss: 7285.076172, avg loss: 4.336355, ppl: 76.428467 +epoch: 0, batch: 4926, sum loss: 6683.127441, avg loss: 4.334064, ppl: 76.253586 +epoch: 0, batch: 4927, sum loss: 6908.333984, avg loss: 4.483020, ppl: 88.501572 +epoch: 0, batch: 4928, sum loss: 8778.567383, avg loss: 4.442595, ppl: 84.995216 +epoch: 0, batch: 4929, sum loss: 7535.521484, avg loss: 4.592030, ppl: 98.694580 +epoch: 0, batch: 4930, sum loss: 6840.053711, avg loss: 4.511909, ppl: 91.095558 +epoch: 0, batch: 4931, sum loss: 6825.265137, avg loss: 4.690904, ppl: 108.951591 +epoch: 0, batch: 4932, sum loss: 7619.085938, avg loss: 4.700238, ppl: 109.973312 +epoch: 0, batch: 4933, sum loss: 9046.503906, avg loss: 4.796662, ppl: 121.105476 +epoch: 0, batch: 4934, sum loss: 7381.214844, avg loss: 4.465344, ppl: 86.950974 +epoch: 0, batch: 4935, sum loss: 7985.404297, avg loss: 4.634593, ppl: 102.986046 +epoch: 0, batch: 4936, sum loss: 7883.337891, avg loss: 4.517672, ppl: 91.622063 +epoch: 0, batch: 4937, sum loss: 8019.404297, avg loss: 4.523071, ppl: 92.118042 +epoch: 0, batch: 4938, sum loss: 6763.574707, avg loss: 4.497057, ppl: 89.752640 +epoch: 0, batch: 4939, sum loss: 8468.687500, avg loss: 4.403893, ppl: 81.768616 +epoch: 0, batch: 4940, sum loss: 7513.921875, avg loss: 4.604119, ppl: 99.894920 +epoch: 0, batch: 4941, sum loss: 7525.157227, avg loss: 4.423961, ppl: 83.426056 +epoch: 0, batch: 4942, sum loss: 7190.568359, avg loss: 4.550992, ppl: 94.726379 +epoch: 0, batch: 4943, sum loss: 8248.481445, avg loss: 4.509831, ppl: 90.906494 +epoch: 0, batch: 4944, sum loss: 6773.437500, avg loss: 4.558168, ppl: 95.408524 +epoch: 0, batch: 4945, sum loss: 8650.270508, avg loss: 4.768617, ppl: 117.756233 +epoch: 0, batch: 4946, sum loss: 6804.139160, avg loss: 4.415405, ppl: 82.715355 +epoch: 0, batch: 4947, sum loss: 8422.043945, avg loss: 4.681514, ppl: 107.933388 +epoch: 0, batch: 4948, sum loss: 8143.543945, avg loss: 4.511659, ppl: 91.072754 +epoch: 0, batch: 4949, sum loss: 7690.236328, avg loss: 4.596674, ppl: 99.154022 +epoch: 0, batch: 4950, sum loss: 7418.666992, avg loss: 4.315688, ppl: 74.865089 +epoch: 0, batch: 4951, sum loss: 7967.447266, avg loss: 4.438689, ppl: 84.663895 +epoch: 0, batch: 4952, sum loss: 7470.569824, avg loss: 4.444122, ppl: 85.125130 +epoch: 0, batch: 4953, sum loss: 7379.728516, avg loss: 4.392696, ppl: 80.858109 +epoch: 0, batch: 4954, sum loss: 8015.877441, avg loss: 4.515987, ppl: 91.467796 +epoch: 0, batch: 4955, sum loss: 8277.613281, avg loss: 4.820974, ppl: 124.085938 +epoch: 0, batch: 4956, sum loss: 7154.233887, avg loss: 4.513712, ppl: 91.259941 +epoch: 0, batch: 4957, sum loss: 6730.282715, avg loss: 4.375997, ppl: 79.519051 +epoch: 0, batch: 4958, sum loss: 8216.872070, avg loss: 4.603290, ppl: 99.812164 +epoch: 0, batch: 4959, sum loss: 6730.140625, avg loss: 4.474827, ppl: 87.779442 +epoch: 0, batch: 4960, sum loss: 7573.166016, avg loss: 4.526698, ppl: 92.452789 +epoch: 0, batch: 4961, sum loss: 6853.086426, avg loss: 4.432786, ppl: 84.165535 +epoch: 0, batch: 4962, sum loss: 7885.549316, avg loss: 4.506028, ppl: 90.561409 +epoch: 0, batch: 4963, sum loss: 7334.997559, avg loss: 4.550247, ppl: 94.655762 +epoch: 0, batch: 4964, sum loss: 8762.412109, avg loss: 4.700865, ppl: 110.042297 +epoch: 0, batch: 4965, sum loss: 7012.675293, avg loss: 4.421611, ppl: 83.230286 +epoch: 0, batch: 4966, sum loss: 8477.339844, avg loss: 4.516430, ppl: 91.508362 +epoch: 0, batch: 4967, sum loss: 8618.738281, avg loss: 4.812249, ppl: 123.007973 +epoch: 0, batch: 4968, sum loss: 7988.885254, avg loss: 4.612520, ppl: 100.737709 +epoch: 0, batch: 4969, sum loss: 6875.145020, avg loss: 4.715463, ppl: 111.660515 +epoch: 0, batch: 4970, sum loss: 7386.645996, avg loss: 4.692914, ppl: 109.170792 +epoch: 0, batch: 4971, sum loss: 7924.583984, avg loss: 4.546520, ppl: 94.303635 +epoch: 0, batch: 4972, sum loss: 7127.923828, avg loss: 4.497113, ppl: 89.757607 +epoch: 0, batch: 4973, sum loss: 6967.242676, avg loss: 4.676002, ppl: 107.340073 +epoch: 0, batch: 4974, sum loss: 8044.947266, avg loss: 4.555463, ppl: 95.150787 +epoch: 0, batch: 4975, sum loss: 6169.871094, avg loss: 4.391367, ppl: 80.750763 +epoch: 0, batch: 4976, sum loss: 6157.752441, avg loss: 4.433227, ppl: 84.202705 +epoch: 0, batch: 4977, sum loss: 6944.022461, avg loss: 4.364565, ppl: 78.615189 +epoch: 0, batch: 4978, sum loss: 7212.661133, avg loss: 4.471581, ppl: 87.494980 +epoch: 0, batch: 4979, sum loss: 6614.133301, avg loss: 4.487200, ppl: 88.872276 +epoch: 0, batch: 4980, sum loss: 7033.116699, avg loss: 4.442904, ppl: 85.021446 +epoch: 0, batch: 4981, sum loss: 7564.827637, avg loss: 4.710353, ppl: 111.091408 +epoch: 0, batch: 4982, sum loss: 7380.733398, avg loss: 4.578619, ppl: 97.379776 +epoch: 0, batch: 4983, sum loss: 7783.296387, avg loss: 4.387427, ppl: 80.433228 +epoch: 0, batch: 4984, sum loss: 7904.431152, avg loss: 4.514238, ppl: 91.307999 +epoch: 0, batch: 4985, sum loss: 6074.189941, avg loss: 4.224054, ppl: 68.309875 +epoch: 0, batch: 4986, sum loss: 5907.559570, avg loss: 4.331055, ppl: 76.024460 +epoch: 0, batch: 4987, sum loss: 7363.870117, avg loss: 4.473797, ppl: 87.689072 +epoch: 0, batch: 4988, sum loss: 7901.295898, avg loss: 4.567223, ppl: 96.276382 +epoch: 0, batch: 4989, sum loss: 6492.387695, avg loss: 4.407595, ppl: 82.071854 +epoch: 0, batch: 4990, sum loss: 6952.290527, avg loss: 4.755329, ppl: 116.201836 +epoch: 0, batch: 4991, sum loss: 9365.236328, avg loss: 4.770880, ppl: 118.023026 +epoch: 0, batch: 4992, sum loss: 8955.316406, avg loss: 4.656950, ppl: 105.314430 +epoch: 0, batch: 4993, sum loss: 8441.711914, avg loss: 4.669088, ppl: 106.600517 +epoch: 0, batch: 4994, sum loss: 7509.938965, avg loss: 4.438498, ppl: 84.647705 +epoch: 0, batch: 4995, sum loss: 7359.662109, avg loss: 4.498571, ppl: 89.888626 +epoch: 0, batch: 4996, sum loss: 7086.887695, avg loss: 4.563354, ppl: 95.904564 +epoch: 0, batch: 4997, sum loss: 8941.198242, avg loss: 4.637551, ppl: 103.291107 +epoch: 0, batch: 4998, sum loss: 7599.633789, avg loss: 4.561605, ppl: 95.737015 +epoch: 0, batch: 4999, sum loss: 7432.187012, avg loss: 4.466459, ppl: 87.047920 +epoch: 0, batch: 5000, sum loss: 7441.271484, avg loss: 4.534595, ppl: 93.185814 +epoch: 0, batch: 5001, sum loss: 8304.884766, avg loss: 4.484279, ppl: 88.613007 +epoch: 0, batch: 5002, sum loss: 7801.471680, avg loss: 4.210185, ppl: 67.368973 +epoch: 0, batch: 5003, sum loss: 5915.544922, avg loss: 4.213351, ppl: 67.582649 +epoch: 0, batch: 5004, sum loss: 7235.778320, avg loss: 4.528021, ppl: 92.575203 +epoch: 0, batch: 5005, sum loss: 7446.639648, avg loss: 4.395891, ppl: 81.116890 +epoch: 0, batch: 5006, sum loss: 6334.954590, avg loss: 4.223303, ppl: 68.258560 +epoch: 0, batch: 5007, sum loss: 8596.883789, avg loss: 4.626956, ppl: 102.202438 +epoch: 0, batch: 5008, sum loss: 8390.038086, avg loss: 4.572228, ppl: 96.759445 +epoch: 0, batch: 5009, sum loss: 8768.581055, avg loss: 4.505951, ppl: 90.554459 +epoch: 0, batch: 5010, sum loss: 7631.919434, avg loss: 4.314257, ppl: 74.758034 +epoch: 0, batch: 5011, sum loss: 7316.441406, avg loss: 4.370635, ppl: 79.093803 +epoch: 0, batch: 5012, sum loss: 8783.372070, avg loss: 4.536866, ppl: 93.397606 +epoch: 0, batch: 5013, sum loss: 7946.007324, avg loss: 4.553586, ppl: 94.972328 +epoch: 0, batch: 5014, sum loss: 6707.573730, avg loss: 4.513845, ppl: 91.272079 +epoch: 0, batch: 5015, sum loss: 6938.718750, avg loss: 4.380504, ppl: 79.878296 +epoch: 0, batch: 5016, sum loss: 6880.739258, avg loss: 4.444922, ppl: 85.193230 +epoch: 0, batch: 5017, sum loss: 6891.226074, avg loss: 4.434508, ppl: 84.310661 +epoch: 0, batch: 5018, sum loss: 7605.272949, avg loss: 4.691717, ppl: 109.040207 +epoch: 0, batch: 5019, sum loss: 8899.993164, avg loss: 4.711484, ppl: 111.217079 +epoch: 0, batch: 5020, sum loss: 8604.162109, avg loss: 4.613492, ppl: 100.835609 +epoch: 0, batch: 5021, sum loss: 6601.558594, avg loss: 4.565393, ppl: 96.100395 +epoch: 0, batch: 5022, sum loss: 7997.219727, avg loss: 4.734884, ppl: 113.850281 +epoch: 0, batch: 5023, sum loss: 8273.041992, avg loss: 4.616653, ppl: 101.154900 +epoch: 0, batch: 5024, sum loss: 7810.691406, avg loss: 4.646455, ppl: 104.214920 +epoch: 0, batch: 5025, sum loss: 7149.030273, avg loss: 4.324882, ppl: 75.556602 +epoch: 0, batch: 5026, sum loss: 7773.865234, avg loss: 4.597200, ppl: 99.206192 +epoch: 0, batch: 5027, sum loss: 7873.083984, avg loss: 4.488645, ppl: 89.000778 +epoch: 0, batch: 5028, sum loss: 7607.490234, avg loss: 4.555384, ppl: 95.143250 +epoch: 0, batch: 5029, sum loss: 7826.756836, avg loss: 4.787007, ppl: 119.941887 +epoch: 0, batch: 5030, sum loss: 7622.385254, avg loss: 4.558843, ppl: 95.472969 +epoch: 0, batch: 5031, sum loss: 8810.397461, avg loss: 4.557888, ppl: 95.381821 +epoch: 0, batch: 5032, sum loss: 6845.901367, avg loss: 4.411019, ppl: 82.353363 +epoch: 0, batch: 5033, sum loss: 7429.423828, avg loss: 4.569141, ppl: 96.461250 +epoch: 0, batch: 5034, sum loss: 7853.236328, avg loss: 4.454473, ppl: 86.010811 +epoch: 0, batch: 5035, sum loss: 7634.939941, avg loss: 4.672546, ppl: 106.969734 +epoch: 0, batch: 5036, sum loss: 7774.229980, avg loss: 4.613786, ppl: 100.865326 +epoch: 0, batch: 5037, sum loss: 7699.018066, avg loss: 4.518203, ppl: 91.670738 +epoch: 0, batch: 5038, sum loss: 6752.521484, avg loss: 4.568689, ppl: 96.417610 +epoch: 0, batch: 5039, sum loss: 7398.874023, avg loss: 4.404092, ppl: 81.784836 +epoch: 0, batch: 5040, sum loss: 7724.641113, avg loss: 4.562694, ppl: 95.841339 +epoch: 0, batch: 5041, sum loss: 9082.816406, avg loss: 4.624652, ppl: 101.967323 +epoch: 0, batch: 5042, sum loss: 7683.502441, avg loss: 4.576237, ppl: 97.148155 +epoch: 0, batch: 5043, sum loss: 7636.176270, avg loss: 4.494513, ppl: 89.524521 +epoch: 0, batch: 5044, sum loss: 8382.901367, avg loss: 4.644267, ppl: 103.987076 +epoch: 0, batch: 5045, sum loss: 6490.414062, avg loss: 4.267202, ppl: 71.321793 +epoch: 0, batch: 5046, sum loss: 6973.373047, avg loss: 4.328599, ppl: 75.837997 +epoch: 0, batch: 5047, sum loss: 7926.308594, avg loss: 4.468043, ppl: 87.185921 +epoch: 0, batch: 5048, sum loss: 6858.380371, avg loss: 4.476749, ppl: 87.948326 +epoch: 0, batch: 5049, sum loss: 8016.648926, avg loss: 4.652727, ppl: 104.870590 +epoch: 0, batch: 5050, sum loss: 7125.937012, avg loss: 4.269585, ppl: 71.491966 +epoch: 0, batch: 5051, sum loss: 7000.081055, avg loss: 4.452978, ppl: 85.882294 +epoch: 0, batch: 5052, sum loss: 7142.726562, avg loss: 4.532187, ppl: 92.961647 +epoch: 0, batch: 5053, sum loss: 7500.324707, avg loss: 4.445954, ppl: 85.281227 +epoch: 0, batch: 5054, sum loss: 8213.858398, avg loss: 4.606763, ppl: 100.159447 +epoch: 0, batch: 5055, sum loss: 8760.593750, avg loss: 4.630335, ppl: 102.548401 +epoch: 0, batch: 5056, sum loss: 8185.613281, avg loss: 4.688210, ppl: 108.658455 +epoch: 0, batch: 5057, sum loss: 7211.881836, avg loss: 4.262341, ppl: 70.975983 +epoch: 0, batch: 5058, sum loss: 7334.366211, avg loss: 4.304205, ppl: 74.010384 +epoch: 0, batch: 5059, sum loss: 8223.281250, avg loss: 4.523257, ppl: 92.135170 +epoch: 0, batch: 5060, sum loss: 8235.048828, avg loss: 4.663108, ppl: 105.964897 +epoch: 0, batch: 5061, sum loss: 7099.859375, avg loss: 4.496428, ppl: 89.696121 +epoch: 0, batch: 5062, sum loss: 7364.444336, avg loss: 4.455199, ppl: 86.073296 +epoch: 0, batch: 5063, sum loss: 9755.754883, avg loss: 4.831974, ppl: 125.458313 +epoch: 0, batch: 5064, sum loss: 7181.338379, avg loss: 4.737031, ppl: 114.094948 +epoch: 0, batch: 5065, sum loss: 8806.351562, avg loss: 4.714321, ppl: 111.533073 +epoch: 0, batch: 5066, sum loss: 8040.136230, avg loss: 4.693600, ppl: 109.245728 +epoch: 0, batch: 5067, sum loss: 6825.729004, avg loss: 4.333796, ppl: 76.233124 +epoch: 0, batch: 5068, sum loss: 6906.569824, avg loss: 4.368482, ppl: 78.923706 +epoch: 0, batch: 5069, sum loss: 7196.631348, avg loss: 4.648987, ppl: 104.479073 +epoch: 0, batch: 5070, sum loss: 9448.529297, avg loss: 4.748004, ppl: 115.353859 +epoch: 0, batch: 5071, sum loss: 6446.660645, avg loss: 4.323716, ppl: 75.468559 +epoch: 0, batch: 5072, sum loss: 7716.622070, avg loss: 4.475999, ppl: 87.882385 +epoch: 0, batch: 5073, sum loss: 6985.423828, avg loss: 4.390587, ppl: 80.687759 +epoch: 0, batch: 5074, sum loss: 9777.788086, avg loss: 4.658308, ppl: 105.457451 +epoch: 0, batch: 5075, sum loss: 6410.029297, avg loss: 4.555813, ppl: 95.184135 +epoch: 0, batch: 5076, sum loss: 6047.715332, avg loss: 4.165093, ppl: 64.398697 +epoch: 0, batch: 5077, sum loss: 7907.511719, avg loss: 4.827540, ppl: 124.903374 +epoch: 0, batch: 5078, sum loss: 6217.183594, avg loss: 4.375217, ppl: 79.457077 +epoch: 0, batch: 5079, sum loss: 7598.103027, avg loss: 4.607703, ppl: 100.253624 +epoch: 0, batch: 5080, sum loss: 8161.645020, avg loss: 4.761753, ppl: 116.950768 +epoch: 0, batch: 5081, sum loss: 7936.206055, avg loss: 4.504090, ppl: 90.386040 +epoch: 0, batch: 5082, sum loss: 8365.339844, avg loss: 4.566234, ppl: 96.181175 +epoch: 0, batch: 5083, sum loss: 8435.810547, avg loss: 4.642714, ppl: 103.825699 +epoch: 0, batch: 5084, sum loss: 7198.737305, avg loss: 4.413695, ppl: 82.574036 +epoch: 0, batch: 5085, sum loss: 7317.594238, avg loss: 4.599368, ppl: 99.421471 +epoch: 0, batch: 5086, sum loss: 8297.286133, avg loss: 4.472930, ppl: 87.613091 +epoch: 0, batch: 5087, sum loss: 7642.905762, avg loss: 4.511751, ppl: 91.081131 +epoch: 0, batch: 5088, sum loss: 6762.975098, avg loss: 4.505646, ppl: 90.526825 +epoch: 0, batch: 5089, sum loss: 7294.544922, avg loss: 4.730574, ppl: 113.360573 +epoch: 0, batch: 5090, sum loss: 9310.407227, avg loss: 4.577388, ppl: 97.260002 +epoch: 0, batch: 5091, sum loss: 7780.095703, avg loss: 4.479042, ppl: 88.150192 +epoch: 0, batch: 5092, sum loss: 7649.368652, avg loss: 4.424158, ppl: 83.442490 +epoch: 0, batch: 5093, sum loss: 7313.102539, avg loss: 4.453778, ppl: 85.951035 +epoch: 0, batch: 5094, sum loss: 6908.019531, avg loss: 4.468318, ppl: 87.209908 +epoch: 0, batch: 5095, sum loss: 7102.465332, avg loss: 4.338708, ppl: 76.608528 +epoch: 0, batch: 5096, sum loss: 8254.057617, avg loss: 4.668585, ppl: 106.546852 +epoch: 0, batch: 5097, sum loss: 9299.555664, avg loss: 4.631253, ppl: 102.642570 +epoch: 0, batch: 5098, sum loss: 8124.947266, avg loss: 4.518880, ppl: 91.732834 +epoch: 0, batch: 5099, sum loss: 8006.971680, avg loss: 4.552002, ppl: 94.822090 +epoch: 0, batch: 5100, sum loss: 6936.643555, avg loss: 4.639895, ppl: 103.533524 +epoch: 0, batch: 5101, sum loss: 8101.859375, avg loss: 4.746256, ppl: 115.152390 +epoch: 0, batch: 5102, sum loss: 7868.799316, avg loss: 4.617840, ppl: 101.275024 +epoch: 0, batch: 5103, sum loss: 8224.950195, avg loss: 4.636386, ppl: 103.170853 +epoch: 0, batch: 5104, sum loss: 7132.037109, avg loss: 4.505393, ppl: 90.503906 +epoch: 0, batch: 5105, sum loss: 7993.558594, avg loss: 4.508493, ppl: 90.784943 +epoch: 0, batch: 5106, sum loss: 7984.682617, avg loss: 4.575749, ppl: 97.100777 +epoch: 0, batch: 5107, sum loss: 7859.253906, avg loss: 4.556089, ppl: 95.210419 +epoch: 0, batch: 5108, sum loss: 7852.572754, avg loss: 4.699326, ppl: 109.873047 +epoch: 0, batch: 5109, sum loss: 8893.064453, avg loss: 4.516539, ppl: 91.518272 +epoch: 0, batch: 5110, sum loss: 6400.679688, avg loss: 4.267120, ppl: 71.315941 +epoch: 0, batch: 5111, sum loss: 6166.601074, avg loss: 4.458858, ppl: 86.388840 +epoch: 0, batch: 5112, sum loss: 7796.572266, avg loss: 4.646348, ppl: 104.203735 +epoch: 0, batch: 5113, sum loss: 9346.071289, avg loss: 4.528135, ppl: 92.585754 +epoch: 0, batch: 5114, sum loss: 8592.472656, avg loss: 4.468265, ppl: 87.205254 +epoch: 0, batch: 5115, sum loss: 7060.486328, avg loss: 4.192688, ppl: 66.200500 +epoch: 0, batch: 5116, sum loss: 7287.870117, avg loss: 4.572064, ppl: 96.743576 +epoch: 0, batch: 5117, sum loss: 7824.098145, avg loss: 4.530457, ppl: 92.800919 +epoch: 0, batch: 5118, sum loss: 6938.519531, avg loss: 4.402614, ppl: 81.664032 +epoch: 0, batch: 5119, sum loss: 6656.757812, avg loss: 4.497809, ppl: 89.820160 +epoch: 0, batch: 5120, sum loss: 8428.853516, avg loss: 4.541408, ppl: 93.822815 +epoch: 0, batch: 5121, sum loss: 6913.375488, avg loss: 4.348035, ppl: 77.326355 +epoch: 0, batch: 5122, sum loss: 6960.069824, avg loss: 4.572977, ppl: 96.831909 +epoch: 0, batch: 5123, sum loss: 7963.049316, avg loss: 4.635069, ppl: 103.035065 +epoch: 0, batch: 5124, sum loss: 7189.767578, avg loss: 4.323372, ppl: 75.442581 +epoch: 0, batch: 5125, sum loss: 8252.309570, avg loss: 4.664958, ppl: 106.161079 +epoch: 0, batch: 5126, sum loss: 6404.199219, avg loss: 4.371467, ppl: 79.159683 +epoch: 0, batch: 5127, sum loss: 8348.882812, avg loss: 4.452737, ppl: 85.861656 +epoch: 0, batch: 5128, sum loss: 7180.093262, avg loss: 4.584989, ppl: 98.002113 +epoch: 0, batch: 5129, sum loss: 7517.902344, avg loss: 4.496353, ppl: 89.689453 +epoch: 0, batch: 5130, sum loss: 7957.707031, avg loss: 4.586575, ppl: 98.157623 +epoch: 0, batch: 5131, sum loss: 7606.178711, avg loss: 4.458487, ppl: 86.356758 +epoch: 0, batch: 5132, sum loss: 7862.666016, avg loss: 4.581973, ppl: 97.706985 +epoch: 0, batch: 5133, sum loss: 7543.643555, avg loss: 4.442664, ppl: 85.001099 +epoch: 0, batch: 5134, sum loss: 8013.625000, avg loss: 4.504567, ppl: 90.429192 +epoch: 0, batch: 5135, sum loss: 8460.676758, avg loss: 4.565935, ppl: 96.152420 +epoch: 0, batch: 5136, sum loss: 7695.282227, avg loss: 4.354999, ppl: 77.866714 +epoch: 0, batch: 5137, sum loss: 6339.085938, avg loss: 4.223242, ppl: 68.254395 +epoch: 0, batch: 5138, sum loss: 6911.966309, avg loss: 4.433590, ppl: 84.233269 +epoch: 0, batch: 5139, sum loss: 7977.875977, avg loss: 4.492047, ppl: 89.304092 +epoch: 0, batch: 5140, sum loss: 7444.432617, avg loss: 4.609556, ppl: 100.439514 +epoch: 0, batch: 5141, sum loss: 8393.583008, avg loss: 4.718147, ppl: 111.960571 +epoch: 0, batch: 5142, sum loss: 8282.573242, avg loss: 4.815450, ppl: 123.402298 +epoch: 0, batch: 5143, sum loss: 8601.541992, avg loss: 4.634451, ppl: 102.971413 +epoch: 0, batch: 5144, sum loss: 7739.466797, avg loss: 4.557990, ppl: 95.391556 +epoch: 0, batch: 5145, sum loss: 7041.639160, avg loss: 4.542993, ppl: 93.971642 +epoch: 0, batch: 5146, sum loss: 6625.740234, avg loss: 4.373426, ppl: 79.314896 +epoch: 0, batch: 5147, sum loss: 7250.452148, avg loss: 4.450861, ppl: 85.700661 +epoch: 0, batch: 5148, sum loss: 6517.941406, avg loss: 4.564385, ppl: 96.003525 +epoch: 0, batch: 5149, sum loss: 8351.632812, avg loss: 4.444722, ppl: 85.176208 +epoch: 0, batch: 5150, sum loss: 8372.267578, avg loss: 4.572511, ppl: 96.786858 +epoch: 0, batch: 5151, sum loss: 6910.258789, avg loss: 4.370815, ppl: 79.108063 +epoch: 0, batch: 5152, sum loss: 8722.036133, avg loss: 4.573695, ppl: 96.901474 +epoch: 0, batch: 5153, sum loss: 7689.956055, avg loss: 4.215985, ppl: 67.760864 +epoch: 0, batch: 5154, sum loss: 7205.651367, avg loss: 4.514819, ppl: 91.361038 +epoch: 0, batch: 5155, sum loss: 8117.137695, avg loss: 4.317626, ppl: 75.010376 +epoch: 0, batch: 5156, sum loss: 7757.328613, avg loss: 4.372789, ppl: 79.264420 +epoch: 0, batch: 5157, sum loss: 8606.441406, avg loss: 4.558496, ppl: 95.439873 +epoch: 0, batch: 5158, sum loss: 8181.514648, avg loss: 4.465892, ppl: 86.998627 +epoch: 0, batch: 5159, sum loss: 8095.820312, avg loss: 4.507695, ppl: 90.712502 +epoch: 0, batch: 5160, sum loss: 8670.869141, avg loss: 4.558817, ppl: 95.470467 +epoch: 0, batch: 5161, sum loss: 7458.950195, avg loss: 4.581665, ppl: 97.676849 +epoch: 0, batch: 5162, sum loss: 9105.705078, avg loss: 4.426692, ppl: 83.654190 +epoch: 0, batch: 5163, sum loss: 6920.260742, avg loss: 4.461806, ppl: 86.643829 +epoch: 0, batch: 5164, sum loss: 5274.805176, avg loss: 4.344979, ppl: 77.090439 +epoch: 0, batch: 5165, sum loss: 7150.278809, avg loss: 4.392063, ppl: 80.806961 +epoch: 0, batch: 5166, sum loss: 7683.140137, avg loss: 4.543549, ppl: 94.023857 +epoch: 0, batch: 5167, sum loss: 7188.779785, avg loss: 4.380731, ppl: 79.896423 +epoch: 0, batch: 5168, sum loss: 6573.460938, avg loss: 4.499289, ppl: 89.953156 +epoch: 0, batch: 5169, sum loss: 7531.429688, avg loss: 4.507140, ppl: 90.662125 +epoch: 0, batch: 5170, sum loss: 6290.734375, avg loss: 4.185452, ppl: 65.723198 +epoch: 0, batch: 5171, sum loss: 8775.330078, avg loss: 4.611314, ppl: 100.616257 +epoch: 0, batch: 5172, sum loss: 8073.727051, avg loss: 4.490393, ppl: 89.156494 +epoch: 0, batch: 5173, sum loss: 7305.681641, avg loss: 4.330576, ppl: 75.988037 +epoch: 0, batch: 5174, sum loss: 8260.213867, avg loss: 4.630165, ppl: 102.530991 +epoch: 0, batch: 5175, sum loss: 6575.969727, avg loss: 4.395702, ppl: 81.101532 +epoch: 0, batch: 5176, sum loss: 6855.812012, avg loss: 4.206019, ppl: 67.088951 +epoch: 0, batch: 5177, sum loss: 6693.468750, avg loss: 4.421050, ppl: 83.183548 +epoch: 0, batch: 5178, sum loss: 8239.271484, avg loss: 4.801440, ppl: 121.685547 +epoch: 0, batch: 5179, sum loss: 7696.280273, avg loss: 4.503382, ppl: 90.322105 +epoch: 0, batch: 5180, sum loss: 7910.703125, avg loss: 4.580604, ppl: 97.573318 +epoch: 0, batch: 5181, sum loss: 8044.880859, avg loss: 4.696369, ppl: 109.548698 +epoch: 0, batch: 5182, sum loss: 7354.357422, avg loss: 4.536926, ppl: 93.403259 +epoch: 0, batch: 5183, sum loss: 7894.754883, avg loss: 4.693671, ppl: 109.253540 +epoch: 0, batch: 5184, sum loss: 7962.830078, avg loss: 4.632246, ppl: 102.744522 +epoch: 0, batch: 5185, sum loss: 7739.512207, avg loss: 4.445441, ppl: 85.237442 +epoch: 0, batch: 5186, sum loss: 7949.883789, avg loss: 4.547989, ppl: 94.442284 +epoch: 0, batch: 5187, sum loss: 8613.267578, avg loss: 4.533299, ppl: 93.065071 +epoch: 0, batch: 5188, sum loss: 7069.235352, avg loss: 4.352977, ppl: 77.709480 +epoch: 0, batch: 5189, sum loss: 6404.679688, avg loss: 4.339214, ppl: 76.647263 +epoch: 0, batch: 5190, sum loss: 6683.725586, avg loss: 4.562270, ppl: 95.800720 +epoch: 0, batch: 5191, sum loss: 8461.385742, avg loss: 4.664491, ppl: 106.111526 +epoch: 0, batch: 5192, sum loss: 7120.499023, avg loss: 4.284296, ppl: 72.551422 +epoch: 0, batch: 5193, sum loss: 8488.095703, avg loss: 4.671489, ppl: 106.856758 +epoch: 0, batch: 5194, sum loss: 7081.003906, avg loss: 4.222423, ppl: 68.198532 +epoch: 0, batch: 5195, sum loss: 8762.609375, avg loss: 4.754536, ppl: 116.109726 +epoch: 0, batch: 5196, sum loss: 7562.986816, avg loss: 4.425387, ppl: 83.545166 +epoch: 0, batch: 5197, sum loss: 8293.251953, avg loss: 4.357988, ppl: 78.099869 +epoch: 0, batch: 5198, sum loss: 8119.468750, avg loss: 4.739912, ppl: 114.424080 +epoch: 0, batch: 5199, sum loss: 8078.940430, avg loss: 4.443861, ppl: 85.102928 +epoch: 0, batch: 5200, sum loss: 6988.952637, avg loss: 4.634584, ppl: 102.985062 +epoch: 0, batch: 5201, sum loss: 8844.087891, avg loss: 4.689336, ppl: 108.780960 +epoch: 0, batch: 5202, sum loss: 6267.884766, avg loss: 4.316725, ppl: 74.942810 +epoch: 0, batch: 5203, sum loss: 7063.517090, avg loss: 4.484773, ppl: 88.656792 +epoch: 0, batch: 5204, sum loss: 8541.097656, avg loss: 4.567432, ppl: 96.296494 +epoch: 0, batch: 5205, sum loss: 6962.458984, avg loss: 4.329887, ppl: 75.935738 +epoch: 0, batch: 5206, sum loss: 7124.027344, avg loss: 4.460881, ppl: 86.563759 +epoch: 0, batch: 5207, sum loss: 6884.432617, avg loss: 4.421601, ppl: 83.229416 +epoch: 0, batch: 5208, sum loss: 6829.956055, avg loss: 4.300980, ppl: 73.772064 +epoch: 0, batch: 5209, sum loss: 7328.368164, avg loss: 4.617750, ppl: 101.265900 +epoch: 0, batch: 5210, sum loss: 7102.299805, avg loss: 4.518003, ppl: 91.652344 +epoch: 0, batch: 5211, sum loss: 7910.632812, avg loss: 4.497233, ppl: 89.768387 +epoch: 0, batch: 5212, sum loss: 6979.872070, avg loss: 4.332633, ppl: 76.144547 +epoch: 0, batch: 5213, sum loss: 7053.393066, avg loss: 4.353947, ppl: 77.784851 +epoch: 0, batch: 5214, sum loss: 6928.202148, avg loss: 4.362848, ppl: 78.480316 +epoch: 0, batch: 5215, sum loss: 8051.823730, avg loss: 4.349986, ppl: 77.477386 +epoch: 0, batch: 5216, sum loss: 8228.905273, avg loss: 4.670208, ppl: 106.719887 +epoch: 0, batch: 5217, sum loss: 7125.776855, avg loss: 4.678777, ppl: 107.638321 +epoch: 0, batch: 5218, sum loss: 7587.270996, avg loss: 4.455238, ppl: 86.076668 +epoch: 0, batch: 5219, sum loss: 7304.359863, avg loss: 4.459316, ppl: 86.428352 +epoch: 0, batch: 5220, sum loss: 8046.513672, avg loss: 4.616474, ppl: 101.136765 +epoch: 0, batch: 5221, sum loss: 8653.130859, avg loss: 4.561482, ppl: 95.725235 +epoch: 0, batch: 5222, sum loss: 7762.517090, avg loss: 4.542140, ppl: 93.891510 +epoch: 0, batch: 5223, sum loss: 6774.500000, avg loss: 4.474571, ppl: 87.756920 +epoch: 0, batch: 5224, sum loss: 5600.964844, avg loss: 4.502383, ppl: 90.231918 +epoch: 0, batch: 5225, sum loss: 7277.959473, avg loss: 4.788131, ppl: 120.076767 +epoch: 0, batch: 5226, sum loss: 8240.853516, avg loss: 4.402165, ppl: 81.627434 +epoch: 0, batch: 5227, sum loss: 6345.235352, avg loss: 4.558359, ppl: 95.426727 +epoch: 0, batch: 5228, sum loss: 7801.833496, avg loss: 4.496734, ppl: 89.723625 +epoch: 0, batch: 5229, sum loss: 6975.507812, avg loss: 4.634889, ppl: 103.016495 +epoch: 0, batch: 5230, sum loss: 7304.445312, avg loss: 4.437695, ppl: 84.579720 +epoch: 0, batch: 5231, sum loss: 7738.541992, avg loss: 4.684348, ppl: 108.239639 +epoch: 0, batch: 5232, sum loss: 7504.230469, avg loss: 4.575750, ppl: 97.100868 +epoch: 0, batch: 5233, sum loss: 8364.165039, avg loss: 4.701611, ppl: 110.124390 +epoch: 0, batch: 5234, sum loss: 7655.583008, avg loss: 4.600711, ppl: 99.555107 +epoch: 0, batch: 5235, sum loss: 7718.355469, avg loss: 4.413011, ppl: 82.517517 +epoch: 0, batch: 5236, sum loss: 7500.861328, avg loss: 4.638752, ppl: 103.415207 +epoch: 0, batch: 5237, sum loss: 7704.110352, avg loss: 4.502695, ppl: 90.260063 +epoch: 0, batch: 5238, sum loss: 7275.205078, avg loss: 4.598739, ppl: 99.358910 +epoch: 0, batch: 5239, sum loss: 7784.388672, avg loss: 4.568303, ppl: 96.380424 +epoch: 0, batch: 5240, sum loss: 8666.285156, avg loss: 4.513690, ppl: 91.257980 +epoch: 0, batch: 5241, sum loss: 7237.099609, avg loss: 4.423655, ppl: 83.400558 +epoch: 0, batch: 5242, sum loss: 7288.571289, avg loss: 4.444251, ppl: 85.136055 +epoch: 0, batch: 5243, sum loss: 6860.484863, avg loss: 4.460653, ppl: 86.544029 +epoch: 0, batch: 5244, sum loss: 9701.035156, avg loss: 4.493300, ppl: 89.416031 +epoch: 0, batch: 5245, sum loss: 7081.417969, avg loss: 4.428654, ppl: 83.818535 +epoch: 0, batch: 5246, sum loss: 7562.591797, avg loss: 4.648182, ppl: 104.395012 +epoch: 0, batch: 5247, sum loss: 6945.624023, avg loss: 4.300696, ppl: 73.751099 +epoch: 0, batch: 5248, sum loss: 8155.653320, avg loss: 4.626009, ppl: 102.105698 +epoch: 0, batch: 5249, sum loss: 6582.739258, avg loss: 4.524220, ppl: 92.223915 +epoch: 0, batch: 5250, sum loss: 7827.768555, avg loss: 4.729770, ppl: 113.269478 +epoch: 0, batch: 5251, sum loss: 7722.031250, avg loss: 4.507899, ppl: 90.731018 +epoch: 0, batch: 5252, sum loss: 6977.129395, avg loss: 4.510103, ppl: 90.931206 +epoch: 0, batch: 5253, sum loss: 6618.428711, avg loss: 4.242583, ppl: 69.587349 +epoch: 0, batch: 5254, sum loss: 7656.273438, avg loss: 4.282032, ppl: 72.387383 +epoch: 0, batch: 5255, sum loss: 7284.649414, avg loss: 4.367296, ppl: 78.830170 +epoch: 0, batch: 5256, sum loss: 7345.445312, avg loss: 4.813529, ppl: 123.165504 +epoch: 0, batch: 5257, sum loss: 6747.968750, avg loss: 4.519738, ppl: 91.811516 +epoch: 0, batch: 5258, sum loss: 6276.657227, avg loss: 4.115841, ppl: 61.303745 +epoch: 0, batch: 5259, sum loss: 7534.050781, avg loss: 4.610802, ppl: 100.564789 +epoch: 0, batch: 5260, sum loss: 8097.968262, avg loss: 4.614227, ppl: 100.909775 +epoch: 0, batch: 5261, sum loss: 7149.061523, avg loss: 4.639235, ppl: 103.465218 +epoch: 0, batch: 5262, sum loss: 7348.812012, avg loss: 4.480983, ppl: 88.321434 +epoch: 0, batch: 5263, sum loss: 8828.466797, avg loss: 4.536725, ppl: 93.384468 +epoch: 0, batch: 5264, sum loss: 6214.609375, avg loss: 4.445357, ppl: 85.230324 +epoch: 0, batch: 5265, sum loss: 7527.857910, avg loss: 4.554058, ppl: 95.017212 +epoch: 0, batch: 5266, sum loss: 6370.681152, avg loss: 4.336747, ppl: 76.458389 +epoch: 0, batch: 5267, sum loss: 6829.119141, avg loss: 4.397372, ppl: 81.237114 +epoch: 0, batch: 5268, sum loss: 7965.944336, avg loss: 4.810353, ppl: 122.774925 +epoch: 0, batch: 5269, sum loss: 8385.036133, avg loss: 4.769645, ppl: 117.877411 +epoch: 0, batch: 5270, sum loss: 8402.576172, avg loss: 4.691556, ppl: 109.022636 +epoch: 0, batch: 5271, sum loss: 6893.981445, avg loss: 4.465014, ppl: 86.922241 +epoch: 0, batch: 5272, sum loss: 7954.842773, avg loss: 4.517231, ppl: 91.581612 +epoch: 0, batch: 5273, sum loss: 7213.206543, avg loss: 4.308965, ppl: 74.363464 +epoch: 0, batch: 5274, sum loss: 7107.660156, avg loss: 4.467417, ppl: 87.131348 +epoch: 0, batch: 5275, sum loss: 7774.008789, avg loss: 4.535594, ppl: 93.278954 +epoch: 0, batch: 5276, sum loss: 6788.828125, avg loss: 4.442950, ppl: 85.025421 +epoch: 0, batch: 5277, sum loss: 8742.132812, avg loss: 4.557942, ppl: 95.386963 +epoch: 0, batch: 5278, sum loss: 8389.050781, avg loss: 4.330950, ppl: 76.016487 +epoch: 0, batch: 5279, sum loss: 7535.085449, avg loss: 4.493193, ppl: 89.406479 +epoch: 0, batch: 5280, sum loss: 9296.908203, avg loss: 4.655437, ppl: 105.155212 +epoch: 0, batch: 5281, sum loss: 6424.549316, avg loss: 4.575890, ppl: 97.114395 +epoch: 0, batch: 5282, sum loss: 6798.154297, avg loss: 4.541185, ppl: 93.801880 +epoch: 0, batch: 5283, sum loss: 7173.300781, avg loss: 4.433437, ppl: 84.220375 +epoch: 0, batch: 5284, sum loss: 7400.130859, avg loss: 4.431216, ppl: 84.033562 +epoch: 0, batch: 5285, sum loss: 6378.791504, avg loss: 4.261050, ppl: 70.884354 +epoch: 0, batch: 5286, sum loss: 6761.036133, avg loss: 4.295448, ppl: 73.365097 +epoch: 0, batch: 5287, sum loss: 6680.155273, avg loss: 4.441593, ppl: 84.910072 +epoch: 0, batch: 5288, sum loss: 8973.214844, avg loss: 4.933048, ppl: 138.801971 +epoch: 0, batch: 5289, sum loss: 7305.182617, avg loss: 4.443542, ppl: 85.075706 +epoch: 0, batch: 5290, sum loss: 7969.293945, avg loss: 4.381140, ppl: 79.929123 +epoch: 0, batch: 5291, sum loss: 7494.399902, avg loss: 4.490354, ppl: 89.152962 +epoch: 0, batch: 5292, sum loss: 8549.548828, avg loss: 4.641449, ppl: 103.694489 +epoch: 0, batch: 5293, sum loss: 6709.300781, avg loss: 4.306355, ppl: 74.169647 +epoch: 0, batch: 5294, sum loss: 6520.773438, avg loss: 4.344286, ppl: 77.037048 +epoch: 0, batch: 5295, sum loss: 7818.408691, avg loss: 4.477898, ppl: 88.049370 +epoch: 0, batch: 5296, sum loss: 7360.147461, avg loss: 4.407274, ppl: 82.045486 +epoch: 0, batch: 5297, sum loss: 7038.802246, avg loss: 4.334238, ppl: 76.266792 +epoch: 0, batch: 5298, sum loss: 7568.922852, avg loss: 4.543171, ppl: 93.988358 +epoch: 0, batch: 5299, sum loss: 8226.303711, avg loss: 4.255719, ppl: 70.507477 +epoch: 0, batch: 5300, sum loss: 7001.208496, avg loss: 4.392226, ppl: 80.820145 +epoch: 0, batch: 5301, sum loss: 7308.938965, avg loss: 4.464838, ppl: 86.906914 +epoch: 0, batch: 5302, sum loss: 7422.641602, avg loss: 4.542620, ppl: 93.936607 +epoch: 0, batch: 5303, sum loss: 7466.044434, avg loss: 4.402149, ppl: 81.626076 +epoch: 0, batch: 5304, sum loss: 7649.439941, avg loss: 4.403823, ppl: 81.762840 +epoch: 0, batch: 5305, sum loss: 7024.752441, avg loss: 4.314959, ppl: 74.810524 +epoch: 0, batch: 5306, sum loss: 7848.427734, avg loss: 4.365088, ppl: 78.656281 +epoch: 0, batch: 5307, sum loss: 7531.857422, avg loss: 4.417512, ppl: 82.889793 +epoch: 0, batch: 5308, sum loss: 5993.127441, avg loss: 4.199809, ppl: 66.673599 +epoch: 0, batch: 5309, sum loss: 7177.984863, avg loss: 4.227317, ppl: 68.533134 +epoch: 0, batch: 5310, sum loss: 7117.488281, avg loss: 4.448430, ppl: 85.492622 +epoch: 0, batch: 5311, sum loss: 7615.921387, avg loss: 4.332151, ppl: 76.107811 +epoch: 0, batch: 5312, sum loss: 7511.049316, avg loss: 4.433913, ppl: 84.260506 +epoch: 0, batch: 5313, sum loss: 7712.785156, avg loss: 4.607399, ppl: 100.223129 +epoch: 0, batch: 5314, sum loss: 7966.755371, avg loss: 4.485786, ppl: 88.746674 +epoch: 0, batch: 5315, sum loss: 7708.066406, avg loss: 4.432471, ppl: 84.139053 +epoch: 0, batch: 5316, sum loss: 7541.781738, avg loss: 4.570777, ppl: 96.619148 +epoch: 0, batch: 5317, sum loss: 7209.984863, avg loss: 4.307040, ppl: 74.220490 +epoch: 0, batch: 5318, sum loss: 7116.267090, avg loss: 4.339187, ppl: 76.645210 +epoch: 0, batch: 5319, sum loss: 6868.974121, avg loss: 4.336473, ppl: 76.437508 +epoch: 0, batch: 5320, sum loss: 8313.271484, avg loss: 4.562717, ppl: 95.843575 +epoch: 0, batch: 5321, sum loss: 6212.161621, avg loss: 4.459556, ppl: 86.449127 +epoch: 0, batch: 5322, sum loss: 8583.720703, avg loss: 4.790023, ppl: 120.304115 +epoch: 0, batch: 5323, sum loss: 7254.581055, avg loss: 4.349269, ppl: 77.421875 +epoch: 0, batch: 5324, sum loss: 8435.206055, avg loss: 4.709774, ppl: 111.027069 +epoch: 0, batch: 5325, sum loss: 8012.896484, avg loss: 4.496575, ppl: 89.709335 +epoch: 0, batch: 5326, sum loss: 7830.883301, avg loss: 4.487612, ppl: 88.908897 +epoch: 0, batch: 5327, sum loss: 6802.895508, avg loss: 4.523201, ppl: 92.130081 +epoch: 0, batch: 5328, sum loss: 7073.360352, avg loss: 4.423615, ppl: 83.397217 +epoch: 0, batch: 5329, sum loss: 7780.296387, avg loss: 4.595568, ppl: 99.044395 +epoch: 0, batch: 5330, sum loss: 7289.783691, avg loss: 4.469518, ppl: 87.314644 +epoch: 0, batch: 5331, sum loss: 6640.970215, avg loss: 4.334837, ppl: 76.312515 +epoch: 0, batch: 5332, sum loss: 7670.099609, avg loss: 4.438715, ppl: 84.666077 +epoch: 0, batch: 5333, sum loss: 6585.868652, avg loss: 4.471058, ppl: 87.449226 +epoch: 0, batch: 5334, sum loss: 7393.248535, avg loss: 4.469921, ppl: 87.349792 +epoch: 0, batch: 5335, sum loss: 7755.834961, avg loss: 4.594689, ppl: 98.957344 +epoch: 0, batch: 5336, sum loss: 7953.458008, avg loss: 4.428429, ppl: 83.799637 +epoch: 0, batch: 5337, sum loss: 6309.159180, avg loss: 4.175486, ppl: 65.071465 +epoch: 0, batch: 5338, sum loss: 7576.376465, avg loss: 4.475119, ppl: 87.805061 +epoch: 0, batch: 5339, sum loss: 7653.557617, avg loss: 4.666803, ppl: 106.357216 +epoch: 0, batch: 5340, sum loss: 7829.129395, avg loss: 4.530746, ppl: 92.827782 +epoch: 0, batch: 5341, sum loss: 6820.958984, avg loss: 4.529190, ppl: 92.683418 +epoch: 0, batch: 5342, sum loss: 8179.950195, avg loss: 4.534340, ppl: 93.162041 +epoch: 0, batch: 5343, sum loss: 6759.549805, avg loss: 4.542708, ppl: 93.944893 +epoch: 0, batch: 5344, sum loss: 7744.498047, avg loss: 4.266941, ppl: 71.303192 +epoch: 0, batch: 5345, sum loss: 7352.009766, avg loss: 4.444988, ppl: 85.198837 +epoch: 0, batch: 5346, sum loss: 6985.439453, avg loss: 4.418367, ppl: 82.960732 +epoch: 0, batch: 5347, sum loss: 8184.448242, avg loss: 4.541869, ppl: 93.866089 +epoch: 0, batch: 5348, sum loss: 8995.053711, avg loss: 4.612848, ppl: 100.770714 +epoch: 0, batch: 5349, sum loss: 7055.256836, avg loss: 4.437268, ppl: 84.543671 +epoch: 0, batch: 5350, sum loss: 5891.854492, avg loss: 4.288104, ppl: 72.828224 +epoch: 0, batch: 5351, sum loss: 8008.333008, avg loss: 4.441671, ppl: 84.916710 +epoch: 0, batch: 5352, sum loss: 8557.580078, avg loss: 4.566478, ppl: 96.204704 +epoch: 0, batch: 5353, sum loss: 8125.223145, avg loss: 4.432746, ppl: 84.162201 +epoch: 0, batch: 5354, sum loss: 7574.771484, avg loss: 4.393719, ppl: 80.940895 +epoch: 0, batch: 5355, sum loss: 7726.244141, avg loss: 4.615439, ppl: 101.032166 +epoch: 0, batch: 5356, sum loss: 7583.999023, avg loss: 4.463802, ppl: 86.816948 +epoch: 0, batch: 5357, sum loss: 7839.435547, avg loss: 4.521012, ppl: 91.928612 +epoch: 0, batch: 5358, sum loss: 6839.298340, avg loss: 4.418151, ppl: 82.942810 +epoch: 0, batch: 5359, sum loss: 9047.304688, avg loss: 4.673194, ppl: 107.039124 +epoch: 0, batch: 5360, sum loss: 6072.006836, avg loss: 4.261057, ppl: 70.884895 +epoch: 0, batch: 5361, sum loss: 6725.069824, avg loss: 4.480393, ppl: 88.269348 +epoch: 0, batch: 5362, sum loss: 7421.437012, avg loss: 4.495117, ppl: 89.578621 +epoch: 0, batch: 5363, sum loss: 7388.380859, avg loss: 4.510611, ppl: 90.977348 +epoch: 0, batch: 5364, sum loss: 9087.497070, avg loss: 4.500989, ppl: 90.106201 +epoch: 0, batch: 5365, sum loss: 7606.985840, avg loss: 4.359304, ppl: 78.202682 +epoch: 0, batch: 5366, sum loss: 7835.137207, avg loss: 4.490050, ppl: 89.125885 +epoch: 0, batch: 5367, sum loss: 6744.425781, avg loss: 4.257844, ppl: 70.657516 +epoch: 0, batch: 5368, sum loss: 7617.640137, avg loss: 4.542421, ppl: 93.917885 +epoch: 0, batch: 5369, sum loss: 7261.929199, avg loss: 4.364141, ppl: 78.581833 +epoch: 0, batch: 5370, sum loss: 7145.664551, avg loss: 3.909007, ppl: 49.849419 +epoch: 0, batch: 5371, sum loss: 7588.180664, avg loss: 4.615682, ppl: 101.056686 +epoch: 0, batch: 5372, sum loss: 8279.696289, avg loss: 4.475512, ppl: 87.839523 +epoch: 0, batch: 5373, sum loss: 9362.320312, avg loss: 4.707048, ppl: 110.724861 +epoch: 0, batch: 5374, sum loss: 7737.845703, avg loss: 4.467578, ppl: 87.145439 +epoch: 0, batch: 5375, sum loss: 7449.808105, avg loss: 4.539798, ppl: 93.671860 +epoch: 0, batch: 5376, sum loss: 7018.537109, avg loss: 4.258821, ppl: 70.726547 +epoch: 0, batch: 5377, sum loss: 7536.326172, avg loss: 4.420133, ppl: 83.107307 +epoch: 0, batch: 5378, sum loss: 6961.373047, avg loss: 4.383737, ppl: 80.136917 +epoch: 0, batch: 5379, sum loss: 6893.613770, avg loss: 4.544241, ppl: 94.088982 +epoch: 0, batch: 5380, sum loss: 6569.983398, avg loss: 4.527900, ppl: 92.563995 +epoch: 0, batch: 5381, sum loss: 8281.293945, avg loss: 4.665517, ppl: 106.220520 +epoch: 0, batch: 5382, sum loss: 7884.802734, avg loss: 4.459730, ppl: 86.464172 +epoch: 0, batch: 5383, sum loss: 6708.065430, avg loss: 4.381493, ppl: 79.957329 +epoch: 0, batch: 5384, sum loss: 8499.607422, avg loss: 4.777744, ppl: 118.835991 +epoch: 0, batch: 5385, sum loss: 7151.364258, avg loss: 4.251703, ppl: 70.224892 +epoch: 0, batch: 5386, sum loss: 7929.184082, avg loss: 4.459609, ppl: 86.453659 +epoch: 0, batch: 5387, sum loss: 8083.795898, avg loss: 4.483525, ppl: 88.546272 +epoch: 0, batch: 5388, sum loss: 7095.769531, avg loss: 4.516722, ppl: 91.535027 +epoch: 0, batch: 5389, sum loss: 7771.520508, avg loss: 4.453593, ppl: 85.935173 +epoch: 0, batch: 5390, sum loss: 8505.218750, avg loss: 4.377365, ppl: 79.627907 +epoch: 0, batch: 5391, sum loss: 6731.313477, avg loss: 4.390942, ppl: 80.716385 +epoch: 0, batch: 5392, sum loss: 7096.683594, avg loss: 4.367190, ppl: 78.821823 +epoch: 0, batch: 5393, sum loss: 8413.888672, avg loss: 4.687403, ppl: 108.570877 +epoch: 0, batch: 5394, sum loss: 6757.311035, avg loss: 4.331610, ppl: 76.066635 +epoch: 0, batch: 5395, sum loss: 8492.409180, avg loss: 4.648281, ppl: 104.405365 +epoch: 0, batch: 5396, sum loss: 8159.080078, avg loss: 4.530305, ppl: 92.786896 +epoch: 0, batch: 5397, sum loss: 7037.458008, avg loss: 4.593641, ppl: 98.853729 +epoch: 0, batch: 5398, sum loss: 7470.643066, avg loss: 4.486873, ppl: 88.843170 +epoch: 0, batch: 5399, sum loss: 6998.454102, avg loss: 4.267350, ppl: 71.332367 +epoch: 0, batch: 5400, sum loss: 7347.823730, avg loss: 4.480381, ppl: 88.268257 +epoch: 0, batch: 5401, sum loss: 8123.629883, avg loss: 4.393526, ppl: 80.925262 +epoch: 0, batch: 5402, sum loss: 6712.629883, avg loss: 4.146158, ppl: 63.190769 +epoch: 0, batch: 5403, sum loss: 7199.494629, avg loss: 4.446877, ppl: 85.359985 +epoch: 0, batch: 5404, sum loss: 6646.054688, avg loss: 4.120307, ppl: 61.578140 +epoch: 0, batch: 5405, sum loss: 7982.976074, avg loss: 4.515258, ppl: 91.401176 +epoch: 0, batch: 5406, sum loss: 7415.115234, avg loss: 4.568771, ppl: 96.425522 +epoch: 0, batch: 5407, sum loss: 6974.478516, avg loss: 4.337362, ppl: 76.505470 +epoch: 0, batch: 5408, sum loss: 6680.443359, avg loss: 4.421206, ppl: 83.196602 +epoch: 0, batch: 5409, sum loss: 8737.053711, avg loss: 4.419349, ppl: 83.042221 +epoch: 0, batch: 5410, sum loss: 7209.189453, avg loss: 4.311717, ppl: 74.568382 +epoch: 0, batch: 5411, sum loss: 8024.063477, avg loss: 4.561719, ppl: 95.747925 +epoch: 0, batch: 5412, sum loss: 8196.705078, avg loss: 4.546148, ppl: 94.268616 +epoch: 0, batch: 5413, sum loss: 6838.392090, avg loss: 4.580303, ppl: 97.543915 +epoch: 0, batch: 5414, sum loss: 7204.927734, avg loss: 4.263271, ppl: 71.041969 +epoch: 0, batch: 5415, sum loss: 7105.382812, avg loss: 4.399618, ppl: 81.419731 +epoch: 0, batch: 5416, sum loss: 7681.457520, avg loss: 4.384394, ppl: 80.189590 +epoch: 0, batch: 5417, sum loss: 6659.621094, avg loss: 4.355540, ppl: 77.908905 +epoch: 0, batch: 5418, sum loss: 6819.089844, avg loss: 4.399413, ppl: 81.403076 +epoch: 0, batch: 5419, sum loss: 8557.082031, avg loss: 4.714646, ppl: 111.569290 +epoch: 0, batch: 5420, sum loss: 6736.764648, avg loss: 4.340699, ppl: 76.761192 +epoch: 0, batch: 5421, sum loss: 7752.106934, avg loss: 4.379722, ppl: 79.815811 +epoch: 0, batch: 5422, sum loss: 6492.432617, avg loss: 4.395689, ppl: 81.100494 +epoch: 0, batch: 5423, sum loss: 7643.267578, avg loss: 4.306066, ppl: 74.148216 +epoch: 0, batch: 5424, sum loss: 6987.334961, avg loss: 4.612103, ppl: 100.695641 +epoch: 0, batch: 5425, sum loss: 7838.250977, avg loss: 4.509926, ppl: 90.915077 +epoch: 0, batch: 5426, sum loss: 7689.935547, avg loss: 4.607511, ppl: 100.234360 +epoch: 0, batch: 5427, sum loss: 8362.166016, avg loss: 4.539721, ppl: 93.664665 +epoch: 0, batch: 5428, sum loss: 6427.124023, avg loss: 4.336791, ppl: 76.461784 +epoch: 0, batch: 5429, sum loss: 8673.294922, avg loss: 4.569703, ppl: 96.515404 +epoch: 0, batch: 5430, sum loss: 6375.498047, avg loss: 4.375771, ppl: 79.501076 +epoch: 0, batch: 5431, sum loss: 7724.904785, avg loss: 4.581794, ppl: 97.689514 +epoch: 0, batch: 5432, sum loss: 7163.749023, avg loss: 4.463395, ppl: 86.781647 +epoch: 0, batch: 5433, sum loss: 8582.047852, avg loss: 4.689644, ppl: 108.814423 +epoch: 0, batch: 5434, sum loss: 7633.984863, avg loss: 4.487939, ppl: 88.937988 +epoch: 0, batch: 5435, sum loss: 7704.828613, avg loss: 4.469159, ppl: 87.283257 +epoch: 0, batch: 5436, sum loss: 6994.183105, avg loss: 4.474845, ppl: 87.781029 +epoch: 0, batch: 5437, sum loss: 8357.859375, avg loss: 4.577141, ppl: 97.236023 +epoch: 0, batch: 5438, sum loss: 7424.254395, avg loss: 4.477838, ppl: 88.044075 +epoch: 0, batch: 5439, sum loss: 7425.996582, avg loss: 4.241004, ppl: 69.477539 +epoch: 0, batch: 5440, sum loss: 7604.514648, avg loss: 4.308507, ppl: 74.329430 +epoch: 0, batch: 5441, sum loss: 7225.770996, avg loss: 4.152742, ppl: 63.608170 +epoch: 0, batch: 5442, sum loss: 6640.510742, avg loss: 4.447763, ppl: 85.435646 +epoch: 0, batch: 5443, sum loss: 7304.974609, avg loss: 4.330157, ppl: 75.956192 +epoch: 0, batch: 5444, sum loss: 7985.575684, avg loss: 4.443838, ppl: 85.100906 +epoch: 0, batch: 5445, sum loss: 6336.447754, avg loss: 4.290080, ppl: 72.972313 +epoch: 0, batch: 5446, sum loss: 7786.858398, avg loss: 4.318834, ppl: 75.101036 +epoch: 0, batch: 5447, sum loss: 6937.253906, avg loss: 4.393447, ppl: 80.918900 +epoch: 0, batch: 5448, sum loss: 7069.932617, avg loss: 4.149022, ppl: 63.371967 +epoch: 0, batch: 5449, sum loss: 7531.802734, avg loss: 4.384053, ppl: 80.162254 +epoch: 0, batch: 5450, sum loss: 7689.129395, avg loss: 4.302814, ppl: 73.907516 +epoch: 0, batch: 5451, sum loss: 6636.868652, avg loss: 4.150637, ppl: 63.474430 +epoch: 0, batch: 5452, sum loss: 7512.843262, avg loss: 4.558764, ppl: 95.465408 +epoch: 0, batch: 5453, sum loss: 8101.269043, avg loss: 4.535985, ppl: 93.315384 +epoch: 0, batch: 5454, sum loss: 9088.934570, avg loss: 4.521858, ppl: 92.006409 +epoch: 0, batch: 5455, sum loss: 7870.290039, avg loss: 4.464146, ppl: 86.846840 +epoch: 0, batch: 5456, sum loss: 8827.615234, avg loss: 4.646113, ppl: 104.179291 +epoch: 0, batch: 5457, sum loss: 6643.901855, avg loss: 4.382521, ppl: 80.039574 +epoch: 0, batch: 5458, sum loss: 7733.505859, avg loss: 4.366745, ppl: 78.786766 +epoch: 0, batch: 5459, sum loss: 8201.212891, avg loss: 4.521066, ppl: 91.933517 +epoch: 0, batch: 5460, sum loss: 7402.437500, avg loss: 4.549747, ppl: 94.608421 +epoch: 0, batch: 5461, sum loss: 7210.559082, avg loss: 4.537797, ppl: 93.484627 +epoch: 0, batch: 5462, sum loss: 7888.031250, avg loss: 4.599435, ppl: 99.428108 +epoch: 0, batch: 5463, sum loss: 6368.788086, avg loss: 4.085176, ppl: 59.452370 +epoch: 0, batch: 5464, sum loss: 6685.900391, avg loss: 4.442459, ppl: 84.983665 +epoch: 0, batch: 5465, sum loss: 6939.973633, avg loss: 4.198411, ppl: 66.580452 +epoch: 0, batch: 5466, sum loss: 6944.084473, avg loss: 4.289120, ppl: 72.902267 +epoch: 0, batch: 5467, sum loss: 7788.206055, avg loss: 4.520143, ppl: 91.848732 +epoch: 0, batch: 5468, sum loss: 6969.085449, avg loss: 4.254631, ppl: 70.430794 +epoch: 0, batch: 5469, sum loss: 7112.406250, avg loss: 4.456395, ppl: 86.176254 +epoch: 0, batch: 5470, sum loss: 7135.545898, avg loss: 4.277905, ppl: 72.089287 +epoch: 0, batch: 5471, sum loss: 6998.626953, avg loss: 4.239023, ppl: 69.340088 +epoch: 0, batch: 5472, sum loss: 8228.160156, avg loss: 4.484011, ppl: 88.589310 +epoch: 0, batch: 5473, sum loss: 7359.254395, avg loss: 4.487350, ppl: 88.885628 +epoch: 0, batch: 5474, sum loss: 6912.630859, avg loss: 4.227909, ppl: 68.573669 +epoch: 0, batch: 5475, sum loss: 7802.636230, avg loss: 4.557614, ppl: 95.355721 +epoch: 0, batch: 5476, sum loss: 6945.252930, avg loss: 4.348938, ppl: 77.396187 +epoch: 0, batch: 5477, sum loss: 7283.959961, avg loss: 4.277134, ppl: 72.033722 +epoch: 0, batch: 5478, sum loss: 8839.764648, avg loss: 4.406662, ppl: 81.995346 +epoch: 0, batch: 5479, sum loss: 8021.385742, avg loss: 4.514004, ppl: 91.286621 +epoch: 0, batch: 5480, sum loss: 7716.223633, avg loss: 4.582081, ppl: 97.717514 +epoch: 0, batch: 5481, sum loss: 7812.752930, avg loss: 4.636649, ppl: 103.197914 +epoch: 0, batch: 5482, sum loss: 8298.302734, avg loss: 4.633335, ppl: 102.856483 +epoch: 0, batch: 5483, sum loss: 6224.267578, avg loss: 4.277847, ppl: 72.085060 +epoch: 0, batch: 5484, sum loss: 7326.709961, avg loss: 4.227761, ppl: 68.563530 +epoch: 0, batch: 5485, sum loss: 7960.464844, avg loss: 4.660694, ppl: 105.709381 +epoch: 0, batch: 5486, sum loss: 8668.145508, avg loss: 4.554990, ppl: 95.105789 +epoch: 0, batch: 5487, sum loss: 7743.982910, avg loss: 4.523355, ppl: 92.144180 +epoch: 0, batch: 5488, sum loss: 7235.942871, avg loss: 4.461124, ppl: 86.584770 +epoch: 0, batch: 5489, sum loss: 8227.404297, avg loss: 4.371628, ppl: 79.172440 +epoch: 0, batch: 5490, sum loss: 7509.194824, avg loss: 4.355682, ppl: 77.919937 +epoch: 0, batch: 5491, sum loss: 7942.121094, avg loss: 4.434462, ppl: 84.306725 +epoch: 0, batch: 5492, sum loss: 8694.992188, avg loss: 4.507513, ppl: 90.695938 +epoch: 0, batch: 5493, sum loss: 7454.072754, avg loss: 4.468869, ppl: 87.257957 +epoch: 0, batch: 5494, sum loss: 8198.377930, avg loss: 4.629237, ppl: 102.435844 +epoch: 0, batch: 5495, sum loss: 7994.081055, avg loss: 4.594299, ppl: 98.918800 +epoch: 0, batch: 5496, sum loss: 6070.080078, avg loss: 4.265692, ppl: 71.214203 +epoch: 0, batch: 5497, sum loss: 7230.339844, avg loss: 4.334736, ppl: 76.304840 +epoch: 0, batch: 5498, sum loss: 8531.383789, avg loss: 4.506806, ppl: 90.631866 +epoch: 0, batch: 5499, sum loss: 6815.894531, avg loss: 4.313858, ppl: 74.728203 +epoch: 0, batch: 5500, sum loss: 7520.876465, avg loss: 4.329808, ppl: 75.929726 +epoch: 0, batch: 5501, sum loss: 7948.856934, avg loss: 4.308323, ppl: 74.315788 +epoch: 0, batch: 5502, sum loss: 8843.431641, avg loss: 4.679064, ppl: 107.669273 +epoch: 0, batch: 5503, sum loss: 8563.654297, avg loss: 4.526244, ppl: 92.410828 +epoch: 0, batch: 5504, sum loss: 8575.172852, avg loss: 4.610308, ppl: 100.515121 +epoch: 0, batch: 5505, sum loss: 7133.373047, avg loss: 4.523382, ppl: 92.146683 +epoch: 0, batch: 5506, sum loss: 7142.116211, avg loss: 4.258865, ppl: 70.729652 +epoch: 0, batch: 5507, sum loss: 7635.763184, avg loss: 4.636165, ppl: 103.147980 +epoch: 0, batch: 5508, sum loss: 7922.028320, avg loss: 4.746572, ppl: 115.188744 +epoch: 0, batch: 5509, sum loss: 7790.268066, avg loss: 4.469460, ppl: 87.309525 +epoch: 0, batch: 5510, sum loss: 7412.821289, avg loss: 4.506274, ppl: 90.583694 +epoch: 0, batch: 5511, sum loss: 7486.204102, avg loss: 4.327285, ppl: 75.738396 +epoch: 0, batch: 5512, sum loss: 6370.759277, avg loss: 4.241518, ppl: 69.513260 +epoch: 0, batch: 5513, sum loss: 5939.778320, avg loss: 4.426064, ppl: 83.601753 +epoch: 0, batch: 5514, sum loss: 7323.903809, avg loss: 4.449516, ppl: 85.585533 +epoch: 0, batch: 5515, sum loss: 7108.860352, avg loss: 4.571615, ppl: 96.700127 +epoch: 0, batch: 5516, sum loss: 8133.935547, avg loss: 4.546638, ppl: 94.314789 +epoch: 0, batch: 5517, sum loss: 6486.012207, avg loss: 4.353028, ppl: 77.713448 +epoch: 0, batch: 5518, sum loss: 7320.695312, avg loss: 4.365352, ppl: 78.677101 +epoch: 0, batch: 5519, sum loss: 7964.088867, avg loss: 4.540529, ppl: 93.740402 +epoch: 0, batch: 5520, sum loss: 5864.550781, avg loss: 4.216068, ppl: 67.766518 +epoch: 0, batch: 5521, sum loss: 6155.172852, avg loss: 4.334629, ppl: 76.296654 +epoch: 0, batch: 5522, sum loss: 7643.761719, avg loss: 4.330743, ppl: 76.000755 +epoch: 0, batch: 5523, sum loss: 8162.724609, avg loss: 4.300698, ppl: 73.751274 +epoch: 0, batch: 5524, sum loss: 7164.166016, avg loss: 4.447031, ppl: 85.373055 +epoch: 0, batch: 5525, sum loss: 7280.755859, avg loss: 4.344127, ppl: 77.024742 +epoch: 0, batch: 5526, sum loss: 6725.014648, avg loss: 4.213668, ppl: 67.604080 +epoch: 0, batch: 5527, sum loss: 7948.374512, avg loss: 4.613102, ppl: 100.796333 +epoch: 0, batch: 5528, sum loss: 7467.493164, avg loss: 4.374630, ppl: 79.410454 +epoch: 0, batch: 5529, sum loss: 8882.720703, avg loss: 4.682509, ppl: 108.040855 +epoch: 0, batch: 5530, sum loss: 6828.063477, avg loss: 4.430930, ppl: 84.009521 +epoch: 0, batch: 5531, sum loss: 7853.214844, avg loss: 4.497832, ppl: 89.822212 +epoch: 0, batch: 5532, sum loss: 7181.083496, avg loss: 4.510731, ppl: 90.988281 +epoch: 0, batch: 5533, sum loss: 7286.117676, avg loss: 4.505948, ppl: 90.554153 +epoch: 0, batch: 5534, sum loss: 6481.788574, avg loss: 4.165674, ppl: 64.436081 +epoch: 0, batch: 5535, sum loss: 6720.559082, avg loss: 4.232090, ppl: 68.861031 +epoch: 0, batch: 5536, sum loss: 7632.596680, avg loss: 4.384030, ppl: 80.160454 +epoch: 0, batch: 5537, sum loss: 6512.622070, avg loss: 4.180117, ppl: 65.373512 +epoch: 0, batch: 5538, sum loss: 7388.986328, avg loss: 4.200675, ppl: 66.731331 +epoch: 0, batch: 5539, sum loss: 7043.190430, avg loss: 4.413026, ppl: 82.518814 +epoch: 0, batch: 5540, sum loss: 7815.381348, avg loss: 4.422966, ppl: 83.343155 +epoch: 0, batch: 5541, sum loss: 7143.258301, avg loss: 4.318778, ppl: 75.096809 +epoch: 0, batch: 5542, sum loss: 6572.220215, avg loss: 4.320987, ppl: 75.262856 +epoch: 0, batch: 5543, sum loss: 8424.841797, avg loss: 4.324868, ppl: 75.555519 +epoch: 0, batch: 5544, sum loss: 7050.481445, avg loss: 4.288614, ppl: 72.865395 +epoch: 0, batch: 5545, sum loss: 7791.510254, avg loss: 4.480454, ppl: 88.274780 +epoch: 0, batch: 5546, sum loss: 6226.475586, avg loss: 4.184459, ppl: 65.657982 +epoch: 0, batch: 5547, sum loss: 7378.961914, avg loss: 4.381806, ppl: 79.982384 +epoch: 0, batch: 5548, sum loss: 6658.448242, avg loss: 4.415417, ppl: 82.716301 +epoch: 0, batch: 5549, sum loss: 7152.540527, avg loss: 4.342769, ppl: 76.920212 +epoch: 0, batch: 5550, sum loss: 6317.675293, avg loss: 4.248605, ppl: 70.007668 +epoch: 0, batch: 5551, sum loss: 8365.152344, avg loss: 4.509516, ppl: 90.877846 +epoch: 0, batch: 5552, sum loss: 7863.495117, avg loss: 4.342074, ppl: 76.866791 +epoch: 0, batch: 5553, sum loss: 6209.019043, avg loss: 4.384901, ppl: 80.230247 +epoch: 0, batch: 5554, sum loss: 7776.666016, avg loss: 4.361563, ppl: 78.379524 +epoch: 0, batch: 5555, sum loss: 7077.862305, avg loss: 4.491029, ppl: 89.213219 +epoch: 0, batch: 5556, sum loss: 7144.246582, avg loss: 4.496065, ppl: 89.663582 +epoch: 0, batch: 5557, sum loss: 6642.130371, avg loss: 4.398762, ppl: 81.350075 +epoch: 0, batch: 5558, sum loss: 7548.099609, avg loss: 4.544311, ppl: 94.095535 +epoch: 0, batch: 5559, sum loss: 7868.153320, avg loss: 4.465467, ppl: 86.961670 +epoch: 0, batch: 5560, sum loss: 6491.932617, avg loss: 4.093274, ppl: 59.935806 +epoch: 0, batch: 5561, sum loss: 7995.178711, avg loss: 4.451659, ppl: 85.769096 +epoch: 0, batch: 5562, sum loss: 7260.958984, avg loss: 4.521145, ppl: 91.940796 +epoch: 0, batch: 5563, sum loss: 6537.613281, avg loss: 4.106541, ppl: 60.736275 +epoch: 0, batch: 5564, sum loss: 7303.317871, avg loss: 4.418220, ppl: 82.948509 +epoch: 0, batch: 5565, sum loss: 7172.397461, avg loss: 4.378753, ppl: 79.738518 +epoch: 0, batch: 5566, sum loss: 6590.965820, avg loss: 4.285413, ppl: 72.632561 +epoch: 0, batch: 5567, sum loss: 8270.822266, avg loss: 4.584713, ppl: 97.975060 +epoch: 0, batch: 5568, sum loss: 7224.452148, avg loss: 4.297711, ppl: 73.531281 +epoch: 0, batch: 5569, sum loss: 7627.771973, avg loss: 4.671018, ppl: 106.806381 +epoch: 0, batch: 5570, sum loss: 7443.930176, avg loss: 4.373637, ppl: 79.331612 +epoch: 0, batch: 5571, sum loss: 7192.914062, avg loss: 4.388599, ppl: 80.527512 +epoch: 0, batch: 5572, sum loss: 6602.152344, avg loss: 4.375184, ppl: 79.454460 +epoch: 0, batch: 5573, sum loss: 8593.946289, avg loss: 4.544657, ppl: 94.128113 +epoch: 0, batch: 5574, sum loss: 7077.634277, avg loss: 4.175595, ppl: 65.078568 +epoch: 0, batch: 5575, sum loss: 7773.657227, avg loss: 4.394380, ppl: 80.994370 +epoch: 0, batch: 5576, sum loss: 7630.689453, avg loss: 4.370384, ppl: 79.073967 +epoch: 0, batch: 5577, sum loss: 7190.191406, avg loss: 4.491063, ppl: 89.216240 +epoch: 0, batch: 5578, sum loss: 6989.230469, avg loss: 4.390220, ppl: 80.658173 +epoch: 0, batch: 5579, sum loss: 6117.680176, avg loss: 4.144770, ppl: 63.103085 +epoch: 0, batch: 5580, sum loss: 8407.453125, avg loss: 4.569268, ppl: 96.473442 +epoch: 0, batch: 5581, sum loss: 9781.173828, avg loss: 4.633431, ppl: 102.866440 +epoch: 0, batch: 5582, sum loss: 7382.879395, avg loss: 4.399809, ppl: 81.435303 +epoch: 0, batch: 5583, sum loss: 7016.809570, avg loss: 4.336718, ppl: 76.456207 +epoch: 0, batch: 5584, sum loss: 7280.708008, avg loss: 4.313216, ppl: 74.680252 +epoch: 0, batch: 5585, sum loss: 8126.757812, avg loss: 4.599184, ppl: 99.403175 +epoch: 0, batch: 5586, sum loss: 8174.422852, avg loss: 4.569269, ppl: 96.473579 +epoch: 0, batch: 5587, sum loss: 8340.030273, avg loss: 4.582435, ppl: 97.752098 +epoch: 0, batch: 5588, sum loss: 6632.661621, avg loss: 4.163630, ppl: 64.304497 +epoch: 0, batch: 5589, sum loss: 6743.246094, avg loss: 4.410233, ppl: 82.288635 +epoch: 0, batch: 5590, sum loss: 7263.029297, avg loss: 4.305293, ppl: 74.090927 +epoch: 0, batch: 5591, sum loss: 6409.580078, avg loss: 4.143232, ppl: 63.006149 +epoch: 0, batch: 5592, sum loss: 6975.303711, avg loss: 4.303087, ppl: 73.927635 +epoch: 0, batch: 5593, sum loss: 6783.746094, avg loss: 4.351345, ppl: 77.582710 +epoch: 0, batch: 5594, sum loss: 7079.895996, avg loss: 4.532584, ppl: 92.998535 +epoch: 0, batch: 5595, sum loss: 7169.930664, avg loss: 4.543682, ppl: 94.036415 +epoch: 0, batch: 5596, sum loss: 7384.058105, avg loss: 4.724286, ppl: 112.649994 +epoch: 0, batch: 5597, sum loss: 6861.546387, avg loss: 4.429662, ppl: 83.903030 +epoch: 0, batch: 5598, sum loss: 7790.149414, avg loss: 4.421197, ppl: 83.195847 +epoch: 0, batch: 5599, sum loss: 9160.581055, avg loss: 4.676152, ppl: 107.356140 +epoch: 0, batch: 5600, sum loss: 6745.476562, avg loss: 4.301962, ppl: 73.844528 +epoch: 0, batch: 5601, sum loss: 7028.381836, avg loss: 4.605755, ppl: 100.058487 +epoch: 0, batch: 5602, sum loss: 6140.121094, avg loss: 4.225823, ppl: 68.430824 +epoch: 0, batch: 5603, sum loss: 8544.830078, avg loss: 4.411373, ppl: 82.382507 +epoch: 0, batch: 5604, sum loss: 8265.517578, avg loss: 4.516676, ppl: 91.530884 +epoch: 0, batch: 5605, sum loss: 7024.378906, avg loss: 4.244338, ppl: 69.709602 +epoch: 0, batch: 5606, sum loss: 7671.691406, avg loss: 4.419177, ppl: 83.027931 +epoch: 0, batch: 5607, sum loss: 7530.007812, avg loss: 4.429417, ppl: 83.882469 +epoch: 0, batch: 5608, sum loss: 8490.218750, avg loss: 4.677807, ppl: 107.533974 +epoch: 0, batch: 5609, sum loss: 7046.601074, avg loss: 4.349754, ppl: 77.459396 +epoch: 0, batch: 5610, sum loss: 7493.779297, avg loss: 4.400340, ppl: 81.478577 +epoch: 0, batch: 5611, sum loss: 8307.010742, avg loss: 4.541832, ppl: 93.862640 +epoch: 0, batch: 5612, sum loss: 7521.381348, avg loss: 4.089930, ppl: 59.735714 +epoch: 0, batch: 5613, sum loss: 7735.170898, avg loss: 4.412533, ppl: 82.478142 +epoch: 0, batch: 5614, sum loss: 6432.530273, avg loss: 4.402827, ppl: 81.681480 +epoch: 0, batch: 5615, sum loss: 6921.034668, avg loss: 4.246034, ppl: 69.827904 +epoch: 0, batch: 5616, sum loss: 6979.546387, avg loss: 4.445571, ppl: 85.248535 +epoch: 0, batch: 5617, sum loss: 6159.127930, avg loss: 4.253541, ppl: 70.354126 +epoch: 0, batch: 5618, sum loss: 6536.946289, avg loss: 4.150442, ppl: 63.462051 +epoch: 0, batch: 5619, sum loss: 8687.890625, avg loss: 4.653396, ppl: 104.940773 +epoch: 0, batch: 5620, sum loss: 7784.600098, avg loss: 4.341662, ppl: 76.835167 +epoch: 0, batch: 5621, sum loss: 6525.245117, avg loss: 4.106511, ppl: 60.734425 +epoch: 0, batch: 5622, sum loss: 7380.736816, avg loss: 4.607202, ppl: 100.203346 +epoch: 0, batch: 5623, sum loss: 7164.474609, avg loss: 4.427982, ppl: 83.762199 +epoch: 0, batch: 5624, sum loss: 8459.177734, avg loss: 4.509157, ppl: 90.845177 +epoch: 0, batch: 5625, sum loss: 5714.854492, avg loss: 4.047348, ppl: 57.245415 +epoch: 0, batch: 5626, sum loss: 7927.773438, avg loss: 4.212420, ppl: 67.519707 +epoch: 0, batch: 5627, sum loss: 6602.453125, avg loss: 4.251419, ppl: 70.204933 +epoch: 0, batch: 5628, sum loss: 6828.171387, avg loss: 4.349154, ppl: 77.412941 +epoch: 0, batch: 5629, sum loss: 6150.782227, avg loss: 4.244846, ppl: 69.745041 +epoch: 0, batch: 5630, sum loss: 8506.003906, avg loss: 4.462751, ppl: 86.725754 +epoch: 0, batch: 5631, sum loss: 6863.885254, avg loss: 4.445522, ppl: 85.244347 +epoch: 0, batch: 5632, sum loss: 7051.352539, avg loss: 4.291754, ppl: 73.094582 +epoch: 0, batch: 5633, sum loss: 7677.414062, avg loss: 4.382086, ppl: 80.004730 +epoch: 0, batch: 5634, sum loss: 7314.164062, avg loss: 4.451713, ppl: 85.773712 +epoch: 0, batch: 5635, sum loss: 7699.588379, avg loss: 4.672081, ppl: 106.919960 +epoch: 0, batch: 5636, sum loss: 8183.876953, avg loss: 4.339278, ppl: 76.652161 +epoch: 0, batch: 5637, sum loss: 5797.362305, avg loss: 4.037160, ppl: 56.665207 +epoch: 0, batch: 5638, sum loss: 8200.558594, avg loss: 4.498387, ppl: 89.872040 +epoch: 0, batch: 5639, sum loss: 6596.718750, avg loss: 4.294739, ppl: 73.313057 +epoch: 0, batch: 5640, sum loss: 8672.596680, avg loss: 4.295491, ppl: 73.368248 +epoch: 0, batch: 5641, sum loss: 8659.970703, avg loss: 4.470816, ppl: 87.428047 +epoch: 0, batch: 5642, sum loss: 8033.993164, avg loss: 4.424005, ppl: 83.429756 +epoch: 0, batch: 5643, sum loss: 6733.856934, avg loss: 4.243136, ppl: 69.625885 +epoch: 0, batch: 5644, sum loss: 8085.964844, avg loss: 4.373156, ppl: 79.293457 +epoch: 0, batch: 5645, sum loss: 6257.149414, avg loss: 4.409549, ppl: 82.232346 +epoch: 0, batch: 5646, sum loss: 6708.784180, avg loss: 4.322670, ppl: 75.389687 +epoch: 0, batch: 5647, sum loss: 7506.166016, avg loss: 4.538190, ppl: 93.521362 +epoch: 0, batch: 5648, sum loss: 7527.770996, avg loss: 4.351313, ppl: 77.580231 +epoch: 0, batch: 5649, sum loss: 7141.229492, avg loss: 4.286452, ppl: 72.708061 +epoch: 0, batch: 5650, sum loss: 7770.624023, avg loss: 4.345987, ppl: 77.168152 +epoch: 0, batch: 5651, sum loss: 7892.149414, avg loss: 4.275270, ppl: 71.899582 +epoch: 0, batch: 5652, sum loss: 6517.300293, avg loss: 4.256891, ppl: 70.590195 +epoch: 0, batch: 5653, sum loss: 7724.967285, avg loss: 4.244488, ppl: 69.720039 +epoch: 0, batch: 5654, sum loss: 7812.158691, avg loss: 4.406181, ppl: 81.955864 +epoch: 0, batch: 5655, sum loss: 5647.183594, avg loss: 4.059802, ppl: 57.962811 +epoch: 0, batch: 5656, sum loss: 6608.981445, avg loss: 4.297127, ppl: 73.488373 +epoch: 0, batch: 5657, sum loss: 7340.953125, avg loss: 4.152123, ppl: 63.568813 +epoch: 0, batch: 5658, sum loss: 7039.213867, avg loss: 4.148034, ppl: 63.309418 +epoch: 0, batch: 5659, sum loss: 6697.125000, avg loss: 4.411808, ppl: 82.418381 +epoch: 0, batch: 5660, sum loss: 6116.360840, avg loss: 4.253380, ppl: 70.342789 +epoch: 0, batch: 5661, sum loss: 6757.539551, avg loss: 4.436993, ppl: 84.520378 +epoch: 0, batch: 5662, sum loss: 6985.142578, avg loss: 4.254045, ppl: 70.389595 +epoch: 0, batch: 5663, sum loss: 6197.384766, avg loss: 4.061196, ppl: 58.043682 +epoch: 0, batch: 5664, sum loss: 8880.838867, avg loss: 4.568333, ppl: 96.383316 +epoch: 0, batch: 5665, sum loss: 6853.097656, avg loss: 4.526484, ppl: 92.433037 +epoch: 0, batch: 5666, sum loss: 7327.270508, avg loss: 4.408707, ppl: 82.163132 +epoch: 0, batch: 5667, sum loss: 7870.752441, avg loss: 4.482205, ppl: 88.429482 +epoch: 0, batch: 5668, sum loss: 6331.914062, avg loss: 4.345857, ppl: 77.158142 +epoch: 0, batch: 5669, sum loss: 6432.959961, avg loss: 3.949024, ppl: 51.884701 +epoch: 0, batch: 5670, sum loss: 7233.634277, avg loss: 4.619179, ppl: 101.410713 +epoch: 0, batch: 5671, sum loss: 7715.632324, avg loss: 4.337061, ppl: 76.482460 +epoch: 0, batch: 5672, sum loss: 7599.621582, avg loss: 4.269450, ppl: 71.482323 +epoch: 0, batch: 5673, sum loss: 8926.704102, avg loss: 4.591927, ppl: 98.684372 +epoch: 0, batch: 5674, sum loss: 7433.559570, avg loss: 4.469970, ppl: 87.354080 +epoch: 0, batch: 5675, sum loss: 7291.299316, avg loss: 4.319490, ppl: 75.150322 +epoch: 0, batch: 5676, sum loss: 8474.249023, avg loss: 4.439104, ppl: 84.699020 +epoch: 0, batch: 5677, sum loss: 7428.600586, avg loss: 4.274224, ppl: 71.824371 +epoch: 0, batch: 5678, sum loss: 7292.548828, avg loss: 4.552153, ppl: 94.836334 +epoch: 0, batch: 5679, sum loss: 6706.191895, avg loss: 4.134520, ppl: 62.459606 +epoch: 0, batch: 5680, sum loss: 7335.769043, avg loss: 4.511543, ppl: 91.062202 +epoch: 0, batch: 5681, sum loss: 7590.626465, avg loss: 4.526313, ppl: 92.417175 +epoch: 0, batch: 5682, sum loss: 8081.317871, avg loss: 4.401589, ppl: 81.580391 +epoch: 0, batch: 5683, sum loss: 7025.464844, avg loss: 4.196813, ppl: 66.474144 +epoch: 0, batch: 5684, sum loss: 6775.347168, avg loss: 4.422550, ppl: 83.308472 +epoch: 0, batch: 5685, sum loss: 8726.401367, avg loss: 4.454518, ppl: 86.014709 +epoch: 0, batch: 5686, sum loss: 6934.736328, avg loss: 4.165007, ppl: 64.393143 +epoch: 0, batch: 5687, sum loss: 5428.794922, avg loss: 4.150455, ppl: 63.462868 +epoch: 0, batch: 5688, sum loss: 9135.251953, avg loss: 4.419570, ppl: 83.060600 +epoch: 0, batch: 5689, sum loss: 6037.198730, avg loss: 4.445654, ppl: 85.255653 +epoch: 0, batch: 5690, sum loss: 6892.956543, avg loss: 4.441338, ppl: 84.888451 +epoch: 0, batch: 5691, sum loss: 6651.896973, avg loss: 4.327845, ppl: 75.780777 +epoch: 0, batch: 5692, sum loss: 7045.283203, avg loss: 4.373236, ppl: 79.299843 +epoch: 0, batch: 5693, sum loss: 7301.153320, avg loss: 4.294796, ppl: 73.317291 +epoch: 0, batch: 5694, sum loss: 8508.896484, avg loss: 4.399636, ppl: 81.421249 +epoch: 0, batch: 5695, sum loss: 6320.133789, avg loss: 4.111993, ppl: 61.068325 +epoch: 0, batch: 5696, sum loss: 7406.509766, avg loss: 4.395555, ppl: 81.089584 +epoch: 0, batch: 5697, sum loss: 6755.679688, avg loss: 4.369780, ppl: 79.026253 +epoch: 0, batch: 5698, sum loss: 6634.379883, avg loss: 4.446635, ppl: 85.339317 +epoch: 0, batch: 5699, sum loss: 8507.249023, avg loss: 4.433167, ppl: 84.197685 +epoch: 0, batch: 5700, sum loss: 6658.083496, avg loss: 4.184842, ppl: 65.683128 +epoch: 0, batch: 5701, sum loss: 6818.031738, avg loss: 4.317943, ppl: 75.034096 +epoch: 0, batch: 5702, sum loss: 6613.436035, avg loss: 4.169884, ppl: 64.707962 +epoch: 0, batch: 5703, sum loss: 5999.042480, avg loss: 4.215771, ppl: 67.746391 +epoch: 0, batch: 5704, sum loss: 7145.008301, avg loss: 4.025357, ppl: 56.000286 +epoch: 0, batch: 5705, sum loss: 7227.164062, avg loss: 4.442019, ppl: 84.946236 +epoch: 0, batch: 5706, sum loss: 6882.396973, avg loss: 4.237929, ppl: 69.264244 +epoch: 0, batch: 5707, sum loss: 7703.720703, avg loss: 4.526275, ppl: 92.413696 +epoch: 0, batch: 5708, sum loss: 6846.193848, avg loss: 4.166886, ppl: 64.514229 +epoch: 0, batch: 5709, sum loss: 8420.589844, avg loss: 4.657406, ppl: 105.362404 +epoch: 0, batch: 5710, sum loss: 8373.306641, avg loss: 4.487302, ppl: 88.881348 +epoch: 0, batch: 5711, sum loss: 7514.872559, avg loss: 4.473139, ppl: 87.631348 +epoch: 0, batch: 5712, sum loss: 6314.656250, avg loss: 4.292764, ppl: 73.168442 +epoch: 0, batch: 5713, sum loss: 7877.509766, avg loss: 4.248926, ppl: 70.030174 +epoch: 0, batch: 5714, sum loss: 8017.597168, avg loss: 4.539976, ppl: 93.688515 +epoch: 0, batch: 5715, sum loss: 7963.575684, avg loss: 4.481472, ppl: 88.364655 +epoch: 0, batch: 5716, sum loss: 7648.023438, avg loss: 4.433637, ppl: 84.237244 +epoch: 0, batch: 5717, sum loss: 7792.941895, avg loss: 4.608481, ppl: 100.331673 +epoch: 0, batch: 5718, sum loss: 6213.203613, avg loss: 4.360143, ppl: 78.268303 +epoch: 0, batch: 5719, sum loss: 6036.425293, avg loss: 4.148746, ppl: 63.354504 +epoch: 0, batch: 5720, sum loss: 7787.638184, avg loss: 4.450079, ppl: 85.633705 +epoch: 0, batch: 5721, sum loss: 8044.270508, avg loss: 4.267518, ppl: 71.344307 +epoch: 0, batch: 5722, sum loss: 8628.354492, avg loss: 4.338036, ppl: 76.557037 +epoch: 0, batch: 5723, sum loss: 7754.402344, avg loss: 4.456553, ppl: 86.189896 +epoch: 0, batch: 5724, sum loss: 8746.265625, avg loss: 4.344891, ppl: 77.083641 +epoch: 0, batch: 5725, sum loss: 6646.160645, avg loss: 4.102569, ppl: 60.495480 +epoch: 0, batch: 5726, sum loss: 6310.243164, avg loss: 4.198432, ppl: 66.581848 +epoch: 0, batch: 5727, sum loss: 7442.344238, avg loss: 4.264954, ppl: 71.161621 +epoch: 0, batch: 5728, sum loss: 9104.642578, avg loss: 4.498342, ppl: 89.867966 +epoch: 0, batch: 5729, sum loss: 7728.543945, avg loss: 4.509069, ppl: 90.837204 +epoch: 0, batch: 5730, sum loss: 7679.432617, avg loss: 4.297388, ppl: 73.507545 +epoch: 0, batch: 5731, sum loss: 6519.916504, avg loss: 4.323552, ppl: 75.456184 +epoch: 0, batch: 5732, sum loss: 7537.805176, avg loss: 4.402924, ppl: 81.689346 +epoch: 0, batch: 5733, sum loss: 7815.109375, avg loss: 4.185918, ppl: 65.753860 +epoch: 0, batch: 5734, sum loss: 8046.838867, avg loss: 4.366163, ppl: 78.740944 +epoch: 0, batch: 5735, sum loss: 6747.678711, avg loss: 4.413132, ppl: 82.527550 +epoch: 0, batch: 5736, sum loss: 8032.687500, avg loss: 4.396655, ppl: 81.178841 +epoch: 0, batch: 5737, sum loss: 6158.878418, avg loss: 4.195421, ppl: 66.381683 +epoch: 0, batch: 5738, sum loss: 6883.686035, avg loss: 4.315791, ppl: 74.872795 +epoch: 0, batch: 5739, sum loss: 6758.775391, avg loss: 4.195392, ppl: 66.379723 +epoch: 0, batch: 5740, sum loss: 6576.267578, avg loss: 4.449437, ppl: 85.578758 +epoch: 0, batch: 5741, sum loss: 6448.135742, avg loss: 4.099260, ppl: 60.295643 +epoch: 0, batch: 5742, sum loss: 5730.006348, avg loss: 3.943569, ppl: 51.602463 +epoch: 0, batch: 5743, sum loss: 7623.616211, avg loss: 4.376358, ppl: 79.547798 +epoch: 0, batch: 5744, sum loss: 7042.707031, avg loss: 4.355416, ppl: 77.899208 +epoch: 0, batch: 5745, sum loss: 6840.086426, avg loss: 4.427241, ppl: 83.700157 +epoch: 0, batch: 5746, sum loss: 7967.698242, avg loss: 4.610937, ppl: 100.578308 +epoch: 0, batch: 5747, sum loss: 8670.973633, avg loss: 4.798546, ppl: 121.333908 +epoch: 0, batch: 5748, sum loss: 8419.812500, avg loss: 4.284892, ppl: 72.594711 +epoch: 0, batch: 5749, sum loss: 7562.668457, avg loss: 4.351363, ppl: 77.584114 +epoch: 0, batch: 5750, sum loss: 7826.806152, avg loss: 4.457179, ppl: 86.243881 +epoch: 0, batch: 5751, sum loss: 5823.019043, avg loss: 4.177202, ppl: 65.183197 +epoch: 0, batch: 5752, sum loss: 7551.535156, avg loss: 4.360009, ppl: 78.257820 +epoch: 0, batch: 5753, sum loss: 6572.554199, avg loss: 4.089953, ppl: 59.737080 +epoch: 0, batch: 5754, sum loss: 8359.122070, avg loss: 4.494152, ppl: 89.492210 +epoch: 0, batch: 5755, sum loss: 5901.804688, avg loss: 4.224627, ppl: 68.349037 +epoch: 0, batch: 5756, sum loss: 7473.569824, avg loss: 4.300098, ppl: 73.707016 +epoch: 0, batch: 5757, sum loss: 8205.584961, avg loss: 4.216642, ppl: 67.805405 +epoch: 0, batch: 5758, sum loss: 6580.436523, avg loss: 4.275787, ppl: 71.936760 +epoch: 0, batch: 5759, sum loss: 8228.217773, avg loss: 4.612230, ppl: 100.708458 +epoch: 0, batch: 5760, sum loss: 6646.483887, avg loss: 4.107839, ppl: 60.815159 +epoch: 0, batch: 5761, sum loss: 8134.044922, avg loss: 4.493948, ppl: 89.473946 +epoch: 0, batch: 5762, sum loss: 6915.916992, avg loss: 4.087421, ppl: 59.586044 +epoch: 0, batch: 5763, sum loss: 7515.562012, avg loss: 4.374599, ppl: 79.408028 +epoch: 0, batch: 5764, sum loss: 6787.569824, avg loss: 4.106213, ppl: 60.716324 +epoch: 0, batch: 5765, sum loss: 5789.880371, avg loss: 3.995777, ppl: 54.368061 +epoch: 0, batch: 5766, sum loss: 6743.183594, avg loss: 4.222407, ppl: 68.197426 +epoch: 0, batch: 5767, sum loss: 6443.426270, avg loss: 4.301353, ppl: 73.799538 +epoch: 0, batch: 5768, sum loss: 7150.160156, avg loss: 4.400098, ppl: 81.458878 +epoch: 0, batch: 5769, sum loss: 6982.251953, avg loss: 4.410772, ppl: 82.332985 +epoch: 0, batch: 5770, sum loss: 7730.671875, avg loss: 4.596119, ppl: 99.098961 +epoch: 0, batch: 5771, sum loss: 6985.137695, avg loss: 4.341291, ppl: 76.806664 +epoch: 0, batch: 5772, sum loss: 7494.099609, avg loss: 4.260432, ppl: 70.840561 +epoch: 0, batch: 5773, sum loss: 7874.734375, avg loss: 4.456556, ppl: 86.190147 +epoch: 0, batch: 5774, sum loss: 7475.787598, avg loss: 4.267002, ppl: 71.307541 +epoch: 0, batch: 5775, sum loss: 8423.876953, avg loss: 4.487947, ppl: 88.938705 +epoch: 0, batch: 5776, sum loss: 7478.451660, avg loss: 4.173244, ppl: 64.925697 +epoch: 0, batch: 5777, sum loss: 6663.321289, avg loss: 4.149017, ppl: 63.371666 +epoch: 0, batch: 5778, sum loss: 8526.861328, avg loss: 4.418063, ppl: 82.935493 +epoch: 0, batch: 5779, sum loss: 6688.011230, avg loss: 4.300972, ppl: 73.771461 +epoch: 0, batch: 5780, sum loss: 7023.386719, avg loss: 4.173135, ppl: 64.918671 +epoch: 0, batch: 5781, sum loss: 7856.693359, avg loss: 4.326373, ppl: 75.669304 +epoch: 0, batch: 5782, sum loss: 6882.247070, avg loss: 4.158458, ppl: 63.972786 +epoch: 0, batch: 5783, sum loss: 7632.094238, avg loss: 4.559196, ppl: 95.506706 +epoch: 0, batch: 5784, sum loss: 6841.795898, avg loss: 4.265459, ppl: 71.197594 +epoch: 0, batch: 5785, sum loss: 7523.234863, avg loss: 4.361296, ppl: 78.358597 +epoch: 0, batch: 5786, sum loss: 7845.233398, avg loss: 4.427333, ppl: 83.707863 +epoch: 0, batch: 5787, sum loss: 8432.415039, avg loss: 4.319885, ppl: 75.179962 +epoch: 0, batch: 5788, sum loss: 8015.855469, avg loss: 4.598884, ppl: 99.373367 +epoch: 0, batch: 5789, sum loss: 7131.563477, avg loss: 4.375192, ppl: 79.455109 +epoch: 0, batch: 5790, sum loss: 7263.082031, avg loss: 4.409886, ppl: 82.260078 +epoch: 0, batch: 5791, sum loss: 7317.507812, avg loss: 4.437542, ppl: 84.566856 +epoch: 0, batch: 5792, sum loss: 6914.564453, avg loss: 4.170425, ppl: 64.742958 +epoch: 0, batch: 5793, sum loss: 7785.974121, avg loss: 4.383995, ppl: 80.157593 +epoch: 0, batch: 5794, sum loss: 7530.084961, avg loss: 4.403558, ppl: 81.741211 +epoch: 0, batch: 5795, sum loss: 6264.589844, avg loss: 4.075856, ppl: 58.900864 +epoch: 0, batch: 5796, sum loss: 7446.615234, avg loss: 4.557292, ppl: 95.324989 +epoch: 0, batch: 5797, sum loss: 7833.905273, avg loss: 4.383831, ppl: 80.144440 +epoch: 0, batch: 5798, sum loss: 6620.106934, avg loss: 4.265533, ppl: 71.202858 +epoch: 0, batch: 5799, sum loss: 8040.574219, avg loss: 4.290595, ppl: 73.009865 +epoch: 0, batch: 5800, sum loss: 6926.094727, avg loss: 4.120223, ppl: 61.572975 +epoch: 0, batch: 5801, sum loss: 7403.976562, avg loss: 4.262508, ppl: 70.987823 +epoch: 0, batch: 5802, sum loss: 7642.926758, avg loss: 4.464326, ppl: 86.862495 +epoch: 0, batch: 5803, sum loss: 8101.424805, avg loss: 4.523408, ppl: 92.149147 +epoch: 0, batch: 5804, sum loss: 6303.799316, avg loss: 4.362491, ppl: 78.452324 +epoch: 0, batch: 5805, sum loss: 7664.148438, avg loss: 4.592061, ppl: 98.697594 +epoch: 0, batch: 5806, sum loss: 7899.285156, avg loss: 4.286101, ppl: 72.682549 +epoch: 0, batch: 5807, sum loss: 7360.837402, avg loss: 4.350377, ppl: 77.507645 +epoch: 0, batch: 5808, sum loss: 7602.591797, avg loss: 4.202649, ppl: 66.863228 +epoch: 0, batch: 5809, sum loss: 6323.548828, avg loss: 4.287152, ppl: 72.758942 +epoch: 0, batch: 5810, sum loss: 8067.101562, avg loss: 4.628285, ppl: 102.338448 +epoch: 0, batch: 5811, sum loss: 7899.687500, avg loss: 4.805163, ppl: 122.139389 +epoch: 0, batch: 5812, sum loss: 8442.439453, avg loss: 4.573369, ppl: 96.869919 +epoch: 0, batch: 5813, sum loss: 6307.255859, avg loss: 4.395300, ppl: 81.068939 +epoch: 0, batch: 5814, sum loss: 7869.951660, avg loss: 4.421321, ppl: 83.206161 +epoch: 0, batch: 5815, sum loss: 8123.714355, avg loss: 4.551101, ppl: 94.736633 +epoch: 0, batch: 5816, sum loss: 7256.881348, avg loss: 4.266244, ppl: 71.253502 +epoch: 0, batch: 5817, sum loss: 7075.664062, avg loss: 4.600562, ppl: 99.540207 +epoch: 0, batch: 5818, sum loss: 6901.523926, avg loss: 4.376363, ppl: 79.548172 +epoch: 0, batch: 5819, sum loss: 6173.308594, avg loss: 4.205251, ppl: 67.037437 +epoch: 0, batch: 5820, sum loss: 7802.155273, avg loss: 4.520368, ppl: 91.869408 +epoch: 0, batch: 5821, sum loss: 7090.029297, avg loss: 4.431268, ppl: 84.037926 +epoch: 0, batch: 5822, sum loss: 7716.359375, avg loss: 4.460323, ppl: 86.515480 +epoch: 0, batch: 5823, sum loss: 6762.324219, avg loss: 4.218543, ppl: 67.934402 +epoch: 0, batch: 5824, sum loss: 6584.858398, avg loss: 4.413444, ppl: 82.553291 +epoch: 0, batch: 5825, sum loss: 7830.608887, avg loss: 4.276684, ppl: 72.001305 +epoch: 0, batch: 5826, sum loss: 6791.066406, avg loss: 4.210208, ppl: 67.370583 +epoch: 0, batch: 5827, sum loss: 7665.992188, avg loss: 4.493547, ppl: 89.438164 +epoch: 0, batch: 5828, sum loss: 7068.300293, avg loss: 4.197328, ppl: 66.508392 +epoch: 0, batch: 5829, sum loss: 7129.972656, avg loss: 4.254160, ppl: 70.397690 +epoch: 0, batch: 5830, sum loss: 6684.085938, avg loss: 4.429481, ppl: 83.887833 +epoch: 0, batch: 5831, sum loss: 8131.454590, avg loss: 4.485083, ppl: 88.684280 +epoch: 0, batch: 5832, sum loss: 8691.457031, avg loss: 4.343557, ppl: 76.980865 +epoch: 0, batch: 5833, sum loss: 7927.461914, avg loss: 4.566510, ppl: 96.207779 +epoch: 0, batch: 5834, sum loss: 7019.908203, avg loss: 4.398439, ppl: 81.323814 +epoch: 0, batch: 5835, sum loss: 7253.167480, avg loss: 4.369378, ppl: 78.994492 +epoch: 0, batch: 5836, sum loss: 7388.971191, avg loss: 4.486321, ppl: 88.794167 +epoch: 0, batch: 5837, sum loss: 8610.757812, avg loss: 4.592404, ppl: 98.731529 +epoch: 0, batch: 5838, sum loss: 7156.106934, avg loss: 4.646822, ppl: 104.253189 +epoch: 0, batch: 5839, sum loss: 7320.644043, avg loss: 4.278576, ppl: 72.137672 +epoch: 0, batch: 5840, sum loss: 7552.906738, avg loss: 4.196059, ppl: 66.424049 +epoch: 0, batch: 5841, sum loss: 5818.526367, avg loss: 4.170987, ppl: 64.779335 +epoch: 0, batch: 5842, sum loss: 7883.235352, avg loss: 4.249722, ppl: 70.085960 +epoch: 0, batch: 5843, sum loss: 6947.833984, avg loss: 4.358741, ppl: 78.158691 +epoch: 0, batch: 5844, sum loss: 7740.415527, avg loss: 4.360797, ppl: 78.319565 +epoch: 0, batch: 5845, sum loss: 6578.904297, avg loss: 4.263710, ppl: 71.073181 +epoch: 0, batch: 5846, sum loss: 8301.153320, avg loss: 4.396797, ppl: 81.190414 +epoch: 0, batch: 5847, sum loss: 6130.583496, avg loss: 4.087056, ppl: 59.564259 +epoch: 0, batch: 5848, sum loss: 7105.730957, avg loss: 4.540403, ppl: 93.728554 +epoch: 0, batch: 5849, sum loss: 6914.954102, avg loss: 4.346294, ppl: 77.191887 +epoch: 0, batch: 5850, sum loss: 6999.894043, avg loss: 4.146857, ppl: 63.234928 +epoch: 0, batch: 5851, sum loss: 7246.268555, avg loss: 4.262511, ppl: 70.988029 +epoch: 0, batch: 5852, sum loss: 6472.651367, avg loss: 4.361625, ppl: 78.384384 +epoch: 0, batch: 5853, sum loss: 7557.238281, avg loss: 4.318422, ppl: 75.070061 +epoch: 0, batch: 5854, sum loss: 6150.888184, avg loss: 4.340782, ppl: 76.767563 +epoch: 0, batch: 5855, sum loss: 7988.343750, avg loss: 4.374778, ppl: 79.422188 +epoch: 0, batch: 5856, sum loss: 7821.700195, avg loss: 4.444148, ppl: 85.127281 +epoch: 0, batch: 5857, sum loss: 7009.797852, avg loss: 4.394857, ppl: 81.033073 +epoch: 0, batch: 5858, sum loss: 6711.425781, avg loss: 3.947898, ppl: 51.826298 +epoch: 0, batch: 5859, sum loss: 8294.182617, avg loss: 4.473669, ppl: 87.677826 +epoch: 0, batch: 5860, sum loss: 6609.018555, avg loss: 4.385547, ppl: 80.282097 +epoch: 0, batch: 5861, sum loss: 6261.336914, avg loss: 3.848394, ppl: 46.917648 +epoch: 0, batch: 5862, sum loss: 8492.211914, avg loss: 4.379686, ppl: 79.812996 +epoch: 0, batch: 5863, sum loss: 6815.039551, avg loss: 4.032568, ppl: 56.405575 +epoch: 0, batch: 5864, sum loss: 6991.471680, avg loss: 4.216811, ppl: 67.816849 +epoch: 0, batch: 5865, sum loss: 7035.043945, avg loss: 4.220182, ppl: 68.045898 +epoch: 0, batch: 5866, sum loss: 7971.334473, avg loss: 4.413806, ppl: 82.583214 +epoch: 0, batch: 5867, sum loss: 7163.500977, avg loss: 4.392091, ppl: 80.809235 +epoch: 0, batch: 5868, sum loss: 6710.234375, avg loss: 4.315263, ppl: 74.833321 +epoch: 0, batch: 5869, sum loss: 7293.982422, avg loss: 4.245624, ppl: 69.799309 +epoch: 0, batch: 5870, sum loss: 5699.954590, avg loss: 4.166634, ppl: 64.497963 +epoch: 0, batch: 5871, sum loss: 6934.974609, avg loss: 4.392004, ppl: 80.802185 +epoch: 0, batch: 5872, sum loss: 9556.170898, avg loss: 4.682102, ppl: 107.996864 +epoch: 0, batch: 5873, sum loss: 8671.101562, avg loss: 4.363916, ppl: 78.564186 +epoch: 0, batch: 5874, sum loss: 8132.029297, avg loss: 4.174553, ppl: 65.010796 +epoch: 0, batch: 5875, sum loss: 6447.766602, avg loss: 4.281385, ppl: 72.340591 +epoch: 0, batch: 5876, sum loss: 7555.113281, avg loss: 4.540333, ppl: 93.721985 +epoch: 0, batch: 5877, sum loss: 6795.729980, avg loss: 4.303819, ppl: 73.981766 +epoch: 0, batch: 5878, sum loss: 5875.860352, avg loss: 4.164323, ppl: 64.349121 +epoch: 0, batch: 5879, sum loss: 6035.706055, avg loss: 4.108717, ppl: 60.868599 +epoch: 0, batch: 5880, sum loss: 6877.343750, avg loss: 4.380474, ppl: 79.875854 +epoch: 0, batch: 5881, sum loss: 7949.240723, avg loss: 4.550223, ppl: 94.653549 +epoch: 0, batch: 5882, sum loss: 5421.909668, avg loss: 4.107508, ppl: 60.795010 +epoch: 0, batch: 5883, sum loss: 7132.405762, avg loss: 4.127550, ppl: 62.025753 +epoch: 0, batch: 5884, sum loss: 8574.767578, avg loss: 4.377115, ppl: 79.608017 +epoch: 0, batch: 5885, sum loss: 7582.604492, avg loss: 4.489405, ppl: 89.068405 +epoch: 0, batch: 5886, sum loss: 6102.752930, avg loss: 4.334342, ppl: 76.274719 +epoch: 0, batch: 5887, sum loss: 6914.617188, avg loss: 4.198310, ppl: 66.573753 +epoch: 0, batch: 5888, sum loss: 7761.132812, avg loss: 4.292662, ppl: 73.160973 +epoch: 0, batch: 5889, sum loss: 7849.294434, avg loss: 4.286890, ppl: 72.739861 +epoch: 0, batch: 5890, sum loss: 7523.680176, avg loss: 4.122564, ppl: 61.717300 +epoch: 0, batch: 5891, sum loss: 6059.926270, avg loss: 4.439507, ppl: 84.733116 +epoch: 0, batch: 5892, sum loss: 7147.364258, avg loss: 4.360808, ppl: 78.320381 +epoch: 0, batch: 5893, sum loss: 7397.742188, avg loss: 4.311038, ppl: 74.517838 +epoch: 0, batch: 5894, sum loss: 8067.904297, avg loss: 4.182428, ppl: 65.524780 +epoch: 0, batch: 5895, sum loss: 8060.332031, avg loss: 4.328857, ppl: 75.857567 +epoch: 0, batch: 5896, sum loss: 6977.468750, avg loss: 4.374588, ppl: 79.407158 +epoch: 0, batch: 5897, sum loss: 7435.669922, avg loss: 4.182042, ppl: 65.499443 +epoch: 0, batch: 5898, sum loss: 7526.565430, avg loss: 4.416998, ppl: 82.847237 +epoch: 0, batch: 5899, sum loss: 8785.606445, avg loss: 4.575837, ppl: 97.109299 +epoch: 0, batch: 5900, sum loss: 7004.395020, avg loss: 4.307746, ppl: 74.272881 +epoch: 0, batch: 5901, sum loss: 8120.762695, avg loss: 4.771306, ppl: 118.073349 +epoch: 0, batch: 5902, sum loss: 6042.827148, avg loss: 3.965110, ppl: 52.726048 +epoch: 0, batch: 5903, sum loss: 5841.438965, avg loss: 4.266939, ppl: 71.303017 +epoch: 0, batch: 5904, sum loss: 8038.760742, avg loss: 4.475925, ppl: 87.875847 +epoch: 0, batch: 5905, sum loss: 6047.262207, avg loss: 4.347421, ppl: 77.278877 +epoch: 0, batch: 5906, sum loss: 8282.554688, avg loss: 4.391599, ppl: 80.769440 +epoch: 0, batch: 5907, sum loss: 6693.500977, avg loss: 4.239076, ppl: 69.343758 +epoch: 0, batch: 5908, sum loss: 7400.779297, avg loss: 4.280381, ppl: 72.267982 +epoch: 0, batch: 5909, sum loss: 8404.670898, avg loss: 4.489675, ppl: 89.092491 +epoch: 0, batch: 5910, sum loss: 7018.303223, avg loss: 4.073304, ppl: 58.750767 +epoch: 0, batch: 5911, sum loss: 7153.605957, avg loss: 4.245463, ppl: 69.788094 +epoch: 0, batch: 5912, sum loss: 8406.081055, avg loss: 4.391892, ppl: 80.793129 +epoch: 0, batch: 5913, sum loss: 6905.849121, avg loss: 4.528426, ppl: 92.612648 +epoch: 0, batch: 5914, sum loss: 7214.517578, avg loss: 4.369787, ppl: 79.026779 +epoch: 0, batch: 5915, sum loss: 7011.180176, avg loss: 4.226148, ppl: 68.453049 +epoch: 0, batch: 5916, sum loss: 6937.612793, avg loss: 4.161735, ppl: 64.182785 +epoch: 0, batch: 5917, sum loss: 6881.049316, avg loss: 4.300656, ppl: 73.748146 +epoch: 0, batch: 5918, sum loss: 6665.636719, avg loss: 4.150459, ppl: 63.463112 +epoch: 0, batch: 5919, sum loss: 6231.193359, avg loss: 4.168022, ppl: 64.587578 +epoch: 0, batch: 5920, sum loss: 7225.570801, avg loss: 4.077636, ppl: 59.005802 +epoch: 0, batch: 5921, sum loss: 8211.361328, avg loss: 4.261215, ppl: 70.896088 +epoch: 0, batch: 5922, sum loss: 7515.790039, avg loss: 4.436712, ppl: 84.496643 +epoch: 0, batch: 5923, sum loss: 5925.583008, avg loss: 3.982247, ppl: 53.637402 +epoch: 0, batch: 5924, sum loss: 8241.987305, avg loss: 4.377051, ppl: 79.602928 +epoch: 0, batch: 5925, sum loss: 6891.828613, avg loss: 4.251591, ppl: 70.217018 +epoch: 0, batch: 5926, sum loss: 6880.976074, avg loss: 4.229241, ppl: 68.665123 +epoch: 0, batch: 5927, sum loss: 7706.985352, avg loss: 4.590223, ppl: 98.516426 +epoch: 0, batch: 5928, sum loss: 6841.076660, avg loss: 4.371295, ppl: 79.146057 +epoch: 0, batch: 5929, sum loss: 7806.067383, avg loss: 4.463161, ppl: 86.761284 +epoch: 0, batch: 5930, sum loss: 8541.597656, avg loss: 4.427992, ppl: 83.763077 +epoch: 0, batch: 5931, sum loss: 7670.457031, avg loss: 4.363172, ppl: 78.505768 +epoch: 0, batch: 5932, sum loss: 6468.077148, avg loss: 4.197325, ppl: 66.508163 +epoch: 0, batch: 5933, sum loss: 7551.909668, avg loss: 4.375382, ppl: 79.470184 +epoch: 0, batch: 5934, sum loss: 8318.449219, avg loss: 4.479509, ppl: 88.191391 +epoch: 0, batch: 5935, sum loss: 7175.949707, avg loss: 4.107584, ppl: 60.799648 +epoch: 0, batch: 5936, sum loss: 7784.056152, avg loss: 4.478744, ppl: 88.123878 +epoch: 0, batch: 5937, sum loss: 7397.646973, avg loss: 4.130456, ppl: 62.206280 +epoch: 0, batch: 5938, sum loss: 6998.387695, avg loss: 4.384954, ppl: 80.234566 +epoch: 0, batch: 5939, sum loss: 6932.160156, avg loss: 4.443693, ppl: 85.088570 +epoch: 0, batch: 5940, sum loss: 6591.416016, avg loss: 4.336458, ppl: 76.436340 +epoch: 0, batch: 5941, sum loss: 6846.079590, avg loss: 4.231199, ppl: 68.799660 +epoch: 0, batch: 5942, sum loss: 6959.807129, avg loss: 4.363515, ppl: 78.532722 +epoch: 0, batch: 5943, sum loss: 6717.526367, avg loss: 4.164617, ppl: 64.367996 +epoch: 0, batch: 5944, sum loss: 7161.358398, avg loss: 4.260178, ppl: 70.822563 +epoch: 0, batch: 5945, sum loss: 7307.652832, avg loss: 4.447750, ppl: 85.434464 +epoch: 0, batch: 5946, sum loss: 7828.246094, avg loss: 4.298872, ppl: 73.616707 +epoch: 0, batch: 5947, sum loss: 6455.898926, avg loss: 4.167785, ppl: 64.572273 +epoch: 0, batch: 5948, sum loss: 7782.833984, avg loss: 4.229801, ppl: 68.703537 +epoch: 0, batch: 5949, sum loss: 7028.210449, avg loss: 4.190942, ppl: 66.085030 +epoch: 0, batch: 5950, sum loss: 7487.043457, avg loss: 4.467210, ppl: 87.113319 +epoch: 0, batch: 5951, sum loss: 7001.713867, avg loss: 4.330064, ppl: 75.949173 +epoch: 0, batch: 5952, sum loss: 6783.426270, avg loss: 4.239641, ppl: 69.382950 +epoch: 0, batch: 5953, sum loss: 7107.277344, avg loss: 4.315287, ppl: 74.835068 +epoch: 0, batch: 5954, sum loss: 7606.063477, avg loss: 4.383898, ppl: 80.149872 +epoch: 0, batch: 5955, sum loss: 7225.404785, avg loss: 4.518702, ppl: 91.716476 +epoch: 0, batch: 5956, sum loss: 7270.749023, avg loss: 4.107768, ppl: 60.810841 +epoch: 0, batch: 5957, sum loss: 7680.923340, avg loss: 4.381588, ppl: 79.964912 +epoch: 0, batch: 5958, sum loss: 6731.371582, avg loss: 4.273887, ppl: 71.800156 +epoch: 0, batch: 5959, sum loss: 7830.993164, avg loss: 4.288605, ppl: 72.864769 +epoch: 0, batch: 5960, sum loss: 7272.131348, avg loss: 4.362406, ppl: 78.445671 +epoch: 0, batch: 5961, sum loss: 6704.289551, avg loss: 4.102993, ppl: 60.521130 +epoch: 0, batch: 5962, sum loss: 6241.718750, avg loss: 4.439345, ppl: 84.719421 +epoch: 0, batch: 5963, sum loss: 8014.333496, avg loss: 4.449935, ppl: 85.621376 +epoch: 0, batch: 5964, sum loss: 6462.339844, avg loss: 4.190882, ppl: 66.081032 +epoch: 0, batch: 5965, sum loss: 6888.191406, avg loss: 4.223293, ppl: 68.257912 +epoch: 0, batch: 5966, sum loss: 8808.017578, avg loss: 4.360405, ppl: 78.288834 +epoch: 0, batch: 5967, sum loss: 7927.168945, avg loss: 4.606141, ppl: 100.097137 +epoch: 0, batch: 5968, sum loss: 7908.167969, avg loss: 4.235762, ppl: 69.114334 +epoch: 0, batch: 5969, sum loss: 6759.519043, avg loss: 4.182871, ppl: 65.553780 +epoch: 0, batch: 5970, sum loss: 6762.917480, avg loss: 4.277620, ppl: 72.068703 +epoch: 0, batch: 5971, sum loss: 9868.352539, avg loss: 4.385934, ppl: 80.313232 +epoch: 0, batch: 5972, sum loss: 6924.146484, avg loss: 4.271528, ppl: 71.630989 +epoch: 0, batch: 5973, sum loss: 6700.404785, avg loss: 4.425631, ppl: 83.565483 +epoch: 0, batch: 5974, sum loss: 6412.133789, avg loss: 4.002580, ppl: 54.739204 +epoch: 0, batch: 5975, sum loss: 6500.420898, avg loss: 4.223795, ppl: 68.292191 +epoch: 0, batch: 5976, sum loss: 7225.239258, avg loss: 4.298179, ppl: 73.565720 +epoch: 0, batch: 5977, sum loss: 6338.651367, avg loss: 4.256986, ppl: 70.596863 +epoch: 0, batch: 5978, sum loss: 7384.922363, avg loss: 4.273682, ppl: 71.785469 +epoch: 0, batch: 5979, sum loss: 7581.500977, avg loss: 4.167950, ppl: 64.582932 +epoch: 0, batch: 5980, sum loss: 7363.738281, avg loss: 4.460169, ppl: 86.502113 +epoch: 0, batch: 5981, sum loss: 7032.683105, avg loss: 4.216237, ppl: 67.777962 +epoch: 0, batch: 5982, sum loss: 7509.566895, avg loss: 4.137503, ppl: 62.646179 +epoch: 0, batch: 5983, sum loss: 8214.374023, avg loss: 4.515873, ppl: 91.457413 +epoch: 0, batch: 5984, sum loss: 7627.115234, avg loss: 4.385921, ppl: 80.312119 +epoch: 0, batch: 5985, sum loss: 6279.534180, avg loss: 4.205984, ppl: 67.086586 +epoch: 0, batch: 5986, sum loss: 7866.049805, avg loss: 4.459212, ppl: 86.419373 +epoch: 0, batch: 5987, sum loss: 6576.753906, avg loss: 4.002893, ppl: 54.756355 +epoch: 0, batch: 5988, sum loss: 8002.702148, avg loss: 4.463303, ppl: 86.773659 +epoch: 0, batch: 5989, sum loss: 8411.690430, avg loss: 4.467175, ppl: 87.110291 +epoch: 0, batch: 5990, sum loss: 7541.458984, avg loss: 4.361746, ppl: 78.393913 +epoch: 0, batch: 5991, sum loss: 6446.392578, avg loss: 4.263487, ppl: 71.057350 +epoch: 0, batch: 5992, sum loss: 7708.507812, avg loss: 4.435275, ppl: 84.375328 +epoch: 0, batch: 5993, sum loss: 7633.377930, avg loss: 4.425147, ppl: 83.525085 +epoch: 0, batch: 5994, sum loss: 7456.573730, avg loss: 4.342792, ppl: 76.922012 +epoch: 0, batch: 5995, sum loss: 5516.540039, avg loss: 4.157151, ppl: 63.889256 +epoch: 0, batch: 5996, sum loss: 7067.762207, avg loss: 4.330737, ppl: 76.000252 +epoch: 0, batch: 5997, sum loss: 5735.047363, avg loss: 4.238764, ppl: 69.322105 +epoch: 0, batch: 5998, sum loss: 7632.193359, avg loss: 4.326640, ppl: 75.689552 +epoch: 0, batch: 5999, sum loss: 6147.300781, avg loss: 4.095470, ppl: 60.067589 +epoch: 0, batch: 6000, sum loss: 7014.839355, avg loss: 4.246271, ppl: 69.844482 +epoch: 0, batch: 6001, sum loss: 8121.263672, avg loss: 4.462233, ppl: 86.680855 +epoch: 0, batch: 6002, sum loss: 6887.112793, avg loss: 4.011132, ppl: 55.209320 +epoch: 0, batch: 6003, sum loss: 6931.257812, avg loss: 4.223801, ppl: 68.292580 +epoch: 0, batch: 6004, sum loss: 6251.053711, avg loss: 4.350072, ppl: 77.484070 +epoch: 0, batch: 6005, sum loss: 7851.035645, avg loss: 4.540796, ppl: 93.765388 +epoch: 0, batch: 6006, sum loss: 6891.016602, avg loss: 4.380811, ppl: 79.902824 +epoch: 0, batch: 6007, sum loss: 6803.444824, avg loss: 4.347249, ppl: 77.265617 +epoch: 0, batch: 6008, sum loss: 7171.605469, avg loss: 4.462729, ppl: 86.723854 +epoch: 0, batch: 6009, sum loss: 6945.377441, avg loss: 4.219549, ppl: 68.002823 +epoch: 0, batch: 6010, sum loss: 6340.729492, avg loss: 4.144268, ppl: 63.071407 +epoch: 0, batch: 6011, sum loss: 7449.312500, avg loss: 4.239791, ppl: 69.393341 +epoch: 0, batch: 6012, sum loss: 7303.995117, avg loss: 4.494766, ppl: 89.547234 +epoch: 0, batch: 6013, sum loss: 6722.145508, avg loss: 4.342472, ppl: 76.897362 +epoch: 0, batch: 6014, sum loss: 7294.630859, avg loss: 4.329158, ppl: 75.880356 +epoch: 0, batch: 6015, sum loss: 9197.746094, avg loss: 4.497675, ppl: 89.808121 +epoch: 0, batch: 6016, sum loss: 8191.450684, avg loss: 4.498325, ppl: 89.866508 +epoch: 0, batch: 6017, sum loss: 6369.553223, avg loss: 4.218247, ppl: 67.914352 +epoch: 0, batch: 6018, sum loss: 7031.310059, avg loss: 4.438958, ppl: 84.686668 +epoch: 0, batch: 6019, sum loss: 8121.163574, avg loss: 4.349846, ppl: 77.466560 +epoch: 0, batch: 6020, sum loss: 6823.141113, avg loss: 4.137745, ppl: 62.661354 +epoch: 0, batch: 6021, sum loss: 6498.476074, avg loss: 4.329431, ppl: 75.901093 +epoch: 0, batch: 6022, sum loss: 7434.739258, avg loss: 4.375950, ppl: 79.515373 +epoch: 0, batch: 6023, sum loss: 7651.541504, avg loss: 4.425415, ppl: 83.547432 +epoch: 0, batch: 6024, sum loss: 7860.396973, avg loss: 4.640140, ppl: 103.558800 +epoch: 0, batch: 6025, sum loss: 7758.675781, avg loss: 4.380958, ppl: 79.914520 +epoch: 0, batch: 6026, sum loss: 7710.252930, avg loss: 4.304999, ppl: 74.069130 +epoch: 0, batch: 6027, sum loss: 6736.842773, avg loss: 4.363240, ppl: 78.511116 +epoch: 0, batch: 6028, sum loss: 7743.197266, avg loss: 4.359909, ppl: 78.249977 +epoch: 0, batch: 6029, sum loss: 7230.973145, avg loss: 4.382408, ppl: 80.030525 +epoch: 0, batch: 6030, sum loss: 7489.494629, avg loss: 4.356891, ppl: 78.014183 +epoch: 0, batch: 6031, sum loss: 8480.641602, avg loss: 4.513381, ppl: 91.229706 +epoch: 0, batch: 6032, sum loss: 6987.583496, avg loss: 4.430934, ppl: 84.009880 +epoch: 0, batch: 6033, sum loss: 7419.171875, avg loss: 4.193992, ppl: 66.286888 +epoch: 0, batch: 6034, sum loss: 6164.005859, avg loss: 4.181822, ppl: 65.485077 +epoch: 0, batch: 6035, sum loss: 6668.963379, avg loss: 4.434151, ppl: 84.280556 +epoch: 0, batch: 6036, sum loss: 8517.815430, avg loss: 4.450269, ppl: 85.650002 +epoch: 0, batch: 6037, sum loss: 6502.106445, avg loss: 4.211209, ppl: 67.438042 +epoch: 0, batch: 6038, sum loss: 8050.177734, avg loss: 4.360877, ppl: 78.325760 +epoch: 0, batch: 6039, sum loss: 5989.765625, avg loss: 4.239041, ppl: 69.341347 +epoch: 0, batch: 6040, sum loss: 7133.672852, avg loss: 4.281916, ppl: 72.378998 +epoch: 0, batch: 6041, sum loss: 6021.607422, avg loss: 4.115932, ppl: 61.309299 +epoch: 0, batch: 6042, sum loss: 8296.760742, avg loss: 4.382864, ppl: 80.066978 +epoch: 0, batch: 6043, sum loss: 8101.391602, avg loss: 4.341581, ppl: 76.828934 +epoch: 0, batch: 6044, sum loss: 6753.408691, avg loss: 4.454755, ppl: 86.035095 +epoch: 0, batch: 6045, sum loss: 6528.587402, avg loss: 4.306456, ppl: 74.177147 +epoch: 0, batch: 6046, sum loss: 6949.516113, avg loss: 4.292475, ppl: 73.147263 +epoch: 0, batch: 6047, sum loss: 7700.055664, avg loss: 4.407588, ppl: 82.071228 +epoch: 0, batch: 6048, sum loss: 6323.531250, avg loss: 4.352052, ppl: 77.637589 +epoch: 0, batch: 6049, sum loss: 7891.382812, avg loss: 4.450865, ppl: 85.701027 +epoch: 0, batch: 6050, sum loss: 6623.459961, avg loss: 4.343253, ppl: 76.957451 +epoch: 0, batch: 6051, sum loss: 7078.393066, avg loss: 4.248735, ppl: 70.016815 +epoch: 0, batch: 6052, sum loss: 6832.597656, avg loss: 4.249128, ppl: 70.044296 +epoch: 0, batch: 6053, sum loss: 7194.989258, avg loss: 4.360600, ppl: 78.304062 +epoch: 0, batch: 6054, sum loss: 7690.932617, avg loss: 4.256188, ppl: 70.540596 +epoch: 0, batch: 6055, sum loss: 7587.581543, avg loss: 4.468541, ppl: 87.229332 +epoch: 0, batch: 6056, sum loss: 7168.199219, avg loss: 4.346998, ppl: 77.246201 +epoch: 0, batch: 6057, sum loss: 8003.596191, avg loss: 4.330950, ppl: 76.016487 +epoch: 0, batch: 6058, sum loss: 6220.621094, avg loss: 4.095208, ppl: 60.051838 +epoch: 0, batch: 6059, sum loss: 6963.999023, avg loss: 4.207855, ppl: 67.212196 +epoch: 0, batch: 6060, sum loss: 7161.348633, avg loss: 4.242505, ppl: 69.581940 +epoch: 0, batch: 6061, sum loss: 6975.761719, avg loss: 4.276985, ppl: 72.022942 +epoch: 0, batch: 6062, sum loss: 7116.987793, avg loss: 4.310714, ppl: 74.493645 +epoch: 0, batch: 6063, sum loss: 7986.033691, avg loss: 4.400019, ppl: 81.452393 +epoch: 0, batch: 6064, sum loss: 7332.848633, avg loss: 4.290725, ppl: 73.019371 +epoch: 0, batch: 6065, sum loss: 6451.111816, avg loss: 4.269432, ppl: 71.481026 +epoch: 0, batch: 6066, sum loss: 6591.494141, avg loss: 4.153430, ppl: 63.651981 +epoch: 0, batch: 6067, sum loss: 8035.302734, avg loss: 4.451691, ppl: 85.771873 +epoch: 0, batch: 6068, sum loss: 6775.622559, avg loss: 4.425619, ppl: 83.564529 +epoch: 0, batch: 6069, sum loss: 7090.572754, avg loss: 4.276582, ppl: 71.993927 +epoch: 0, batch: 6070, sum loss: 7027.498535, avg loss: 4.386703, ppl: 80.374985 +epoch: 0, batch: 6071, sum loss: 6594.849121, avg loss: 4.299119, ppl: 73.634926 +epoch: 0, batch: 6072, sum loss: 7271.916016, avg loss: 4.024303, ppl: 55.941303 +epoch: 0, batch: 6073, sum loss: 7035.366211, avg loss: 4.287243, ppl: 72.765572 +epoch: 0, batch: 6074, sum loss: 8970.311523, avg loss: 4.354520, ppl: 77.829445 +epoch: 0, batch: 6075, sum loss: 7473.654297, avg loss: 4.243983, ppl: 69.684837 +epoch: 0, batch: 6076, sum loss: 5393.883789, avg loss: 4.145952, ppl: 63.177753 +epoch: 0, batch: 6077, sum loss: 7108.559570, avg loss: 4.186431, ppl: 65.787575 +epoch: 0, batch: 6078, sum loss: 5547.229980, avg loss: 4.130476, ppl: 62.207558 +epoch: 0, batch: 6079, sum loss: 7118.115723, avg loss: 4.133633, ppl: 62.404205 +epoch: 0, batch: 6080, sum loss: 6752.093750, avg loss: 4.238603, ppl: 69.310928 +epoch: 0, batch: 6081, sum loss: 7526.667480, avg loss: 4.567153, ppl: 96.269592 +epoch: 0, batch: 6082, sum loss: 7393.676270, avg loss: 4.419412, ppl: 83.047447 +epoch: 0, batch: 6083, sum loss: 7878.479980, avg loss: 4.486606, ppl: 88.819489 +epoch: 0, batch: 6084, sum loss: 6471.559570, avg loss: 4.260408, ppl: 70.838875 +epoch: 0, batch: 6085, sum loss: 7548.101562, avg loss: 4.274123, ppl: 71.817139 +epoch: 0, batch: 6086, sum loss: 8171.613770, avg loss: 4.650890, ppl: 104.678093 +epoch: 0, batch: 6087, sum loss: 8098.414062, avg loss: 4.601371, ppl: 99.620834 +epoch: 0, batch: 6088, sum loss: 6373.250488, avg loss: 4.251668, ppl: 70.222443 +epoch: 0, batch: 6089, sum loss: 7490.854492, avg loss: 4.322478, ppl: 75.375160 +epoch: 0, batch: 6090, sum loss: 6483.615723, avg loss: 4.265537, ppl: 71.203133 +epoch: 0, batch: 6091, sum loss: 7419.567871, avg loss: 4.196589, ppl: 66.459221 +epoch: 0, batch: 6092, sum loss: 8143.910645, avg loss: 4.385520, ppl: 80.279953 +epoch: 0, batch: 6093, sum loss: 6583.909668, avg loss: 4.188238, ppl: 65.906540 +epoch: 0, batch: 6094, sum loss: 7584.075195, avg loss: 4.533219, ppl: 93.057617 +epoch: 0, batch: 6095, sum loss: 6755.464355, avg loss: 4.375301, ppl: 79.463745 +epoch: 0, batch: 6096, sum loss: 6947.847656, avg loss: 4.106293, ppl: 60.721191 +epoch: 0, batch: 6097, sum loss: 7095.296387, avg loss: 4.284599, ppl: 72.573463 +epoch: 0, batch: 6098, sum loss: 7662.353516, avg loss: 4.314388, ppl: 74.767876 +epoch: 0, batch: 6099, sum loss: 6946.259766, avg loss: 4.301090, ppl: 73.780151 +epoch: 0, batch: 6100, sum loss: 6313.768555, avg loss: 4.234587, ppl: 69.033142 +epoch: 0, batch: 6101, sum loss: 7144.887695, avg loss: 4.141964, ppl: 62.926285 +epoch: 0, batch: 6102, sum loss: 6562.287109, avg loss: 4.252941, ppl: 70.311874 +epoch: 0, batch: 6103, sum loss: 8395.573242, avg loss: 4.706039, ppl: 110.613144 +epoch: 0, batch: 6104, sum loss: 7234.813477, avg loss: 4.411472, ppl: 82.390640 +epoch: 0, batch: 6105, sum loss: 7209.811523, avg loss: 4.141190, ppl: 62.877605 +epoch: 0, batch: 6106, sum loss: 8672.495117, avg loss: 4.498182, ppl: 89.853653 +epoch: 0, batch: 6107, sum loss: 7186.396484, avg loss: 4.508404, ppl: 90.776848 +epoch: 0, batch: 6108, sum loss: 7591.640625, avg loss: 4.395854, ppl: 81.113914 +epoch: 0, batch: 6109, sum loss: 6802.020508, avg loss: 4.340792, ppl: 76.768295 +epoch: 0, batch: 6110, sum loss: 7039.255859, avg loss: 4.097355, ppl: 60.180893 +epoch: 0, batch: 6111, sum loss: 7813.895996, avg loss: 4.119081, ppl: 61.502666 +epoch: 0, batch: 6112, sum loss: 8966.759766, avg loss: 4.490115, ppl: 89.131714 +epoch: 0, batch: 6113, sum loss: 7318.632324, avg loss: 4.427485, ppl: 83.720551 +epoch: 0, batch: 6114, sum loss: 5902.409180, avg loss: 4.177218, ppl: 65.184258 +epoch: 0, batch: 6115, sum loss: 7531.839844, avg loss: 4.260091, ppl: 70.816414 +epoch: 0, batch: 6116, sum loss: 7551.788086, avg loss: 4.450081, ppl: 85.633911 +epoch: 0, batch: 6117, sum loss: 6373.410156, avg loss: 4.016012, ppl: 55.479397 +epoch: 0, batch: 6118, sum loss: 9644.091797, avg loss: 4.739111, ppl: 114.332565 +epoch: 0, batch: 6119, sum loss: 6562.091309, avg loss: 4.185007, ppl: 65.693970 +epoch: 0, batch: 6120, sum loss: 7348.835449, avg loss: 4.206546, ppl: 67.124283 +epoch: 0, batch: 6121, sum loss: 7395.137695, avg loss: 4.279593, ppl: 72.211075 +epoch: 0, batch: 6122, sum loss: 5935.140625, avg loss: 4.185572, ppl: 65.731064 +epoch: 0, batch: 6123, sum loss: 7826.506348, avg loss: 4.352895, ppl: 77.703110 +epoch: 0, batch: 6124, sum loss: 5748.246094, avg loss: 4.114707, ppl: 61.234268 +epoch: 0, batch: 6125, sum loss: 7986.982422, avg loss: 4.374032, ppl: 79.362984 +epoch: 0, batch: 6126, sum loss: 6169.548340, avg loss: 4.140636, ppl: 62.842804 +epoch: 0, batch: 6127, sum loss: 7747.510742, avg loss: 4.301783, ppl: 73.831291 +epoch: 0, batch: 6128, sum loss: 7511.232422, avg loss: 4.476301, ppl: 87.908867 +epoch: 0, batch: 6129, sum loss: 6250.288086, avg loss: 4.208948, ppl: 67.285728 +epoch: 0, batch: 6130, sum loss: 6753.085938, avg loss: 4.282236, ppl: 72.402161 +epoch: 0, batch: 6131, sum loss: 7256.213867, avg loss: 4.253349, ppl: 70.340607 +epoch: 0, batch: 6132, sum loss: 7107.874512, avg loss: 4.355315, ppl: 77.891373 +epoch: 0, batch: 6133, sum loss: 7369.976074, avg loss: 4.101267, ppl: 60.416779 +epoch: 0, batch: 6134, sum loss: 8056.644531, avg loss: 4.376233, ppl: 79.537857 +epoch: 0, batch: 6135, sum loss: 6785.983398, avg loss: 4.170856, ppl: 64.770905 +epoch: 0, batch: 6136, sum loss: 6433.797363, avg loss: 4.255157, ppl: 70.467880 +epoch: 0, batch: 6137, sum loss: 7927.161133, avg loss: 4.205390, ppl: 67.046768 +epoch: 0, batch: 6138, sum loss: 6393.041992, avg loss: 4.087623, ppl: 59.598038 +epoch: 0, batch: 6139, sum loss: 7208.650391, avg loss: 4.347799, ppl: 77.308105 +epoch: 0, batch: 6140, sum loss: 8164.045898, avg loss: 4.214789, ppl: 67.679878 +epoch: 0, batch: 6141, sum loss: 6900.041504, avg loss: 4.154149, ppl: 63.697739 +epoch: 0, batch: 6142, sum loss: 6840.714844, avg loss: 4.123396, ppl: 61.768677 +epoch: 0, batch: 6143, sum loss: 7583.046875, avg loss: 4.447535, ppl: 85.416138 +epoch: 0, batch: 6144, sum loss: 6811.467773, avg loss: 4.289338, ppl: 72.918152 +epoch: 0, batch: 6145, sum loss: 8223.578125, avg loss: 4.341910, ppl: 76.854218 +epoch: 0, batch: 6146, sum loss: 7554.052246, avg loss: 4.208386, ppl: 67.247910 +epoch: 0, batch: 6147, sum loss: 6991.553223, avg loss: 4.234739, ppl: 69.043648 +epoch: 0, batch: 6148, sum loss: 6386.805176, avg loss: 4.068029, ppl: 58.441658 +epoch: 0, batch: 6149, sum loss: 7370.665527, avg loss: 4.400398, ppl: 81.483276 +epoch: 0, batch: 6150, sum loss: 7724.722656, avg loss: 4.256046, ppl: 70.530540 +epoch: 0, batch: 6151, sum loss: 6973.700684, avg loss: 4.422131, ppl: 83.273560 +epoch: 0, batch: 6152, sum loss: 6635.824707, avg loss: 4.186640, ppl: 65.801346 +epoch: 0, batch: 6153, sum loss: 6492.675781, avg loss: 4.117106, ppl: 61.381344 +epoch: 0, batch: 6154, sum loss: 6380.328613, avg loss: 4.293626, ppl: 73.231544 +epoch: 0, batch: 6155, sum loss: 6933.362793, avg loss: 4.232822, ppl: 68.911423 +epoch: 0, batch: 6156, sum loss: 7457.422852, avg loss: 4.191918, ppl: 66.149567 +epoch: 0, batch: 6157, sum loss: 6534.143066, avg loss: 4.435943, ppl: 84.431679 +epoch: 0, batch: 6158, sum loss: 7654.949707, avg loss: 4.432513, ppl: 84.142578 +epoch: 0, batch: 6159, sum loss: 7480.273926, avg loss: 4.343946, ppl: 77.010788 +epoch: 0, batch: 6160, sum loss: 6520.537109, avg loss: 3.961444, ppl: 52.533150 +epoch: 0, batch: 6161, sum loss: 6895.116211, avg loss: 4.497793, ppl: 89.818657 +epoch: 0, batch: 6162, sum loss: 9055.759766, avg loss: 4.378994, ppl: 79.757759 +epoch: 0, batch: 6163, sum loss: 6681.733887, avg loss: 4.223599, ppl: 68.278778 +epoch: 0, batch: 6164, sum loss: 6327.095703, avg loss: 4.218064, ppl: 67.901886 +epoch: 0, batch: 6165, sum loss: 7126.896484, avg loss: 4.219595, ppl: 68.005966 +epoch: 0, batch: 6166, sum loss: 7731.373535, avg loss: 4.510720, ppl: 90.987282 +epoch: 0, batch: 6167, sum loss: 7887.660156, avg loss: 4.413912, ppl: 82.591919 +epoch: 0, batch: 6168, sum loss: 5912.187500, avg loss: 3.989331, ppl: 54.018715 +epoch: 0, batch: 6169, sum loss: 6729.580078, avg loss: 4.272749, ppl: 71.718513 +epoch: 0, batch: 6170, sum loss: 6810.004883, avg loss: 4.242993, ppl: 69.615929 +epoch: 0, batch: 6171, sum loss: 7486.286133, avg loss: 4.362638, ppl: 78.463814 +epoch: 0, batch: 6172, sum loss: 6729.962891, avg loss: 4.203600, ppl: 66.926826 +epoch: 0, batch: 6173, sum loss: 6799.996582, avg loss: 4.040402, ppl: 56.849190 +epoch: 0, batch: 6174, sum loss: 6405.289062, avg loss: 4.087613, ppl: 59.597469 +epoch: 0, batch: 6175, sum loss: 7485.260742, avg loss: 4.564183, ppl: 95.984169 +epoch: 0, batch: 6176, sum loss: 7394.855469, avg loss: 4.194473, ppl: 66.318756 +epoch: 0, batch: 6177, sum loss: 7769.341797, avg loss: 4.170339, ppl: 64.737404 +epoch: 0, batch: 6178, sum loss: 7127.229004, avg loss: 4.244925, ppl: 69.750526 +epoch: 0, batch: 6179, sum loss: 7850.228027, avg loss: 4.506445, ppl: 90.599205 +epoch: 0, batch: 6180, sum loss: 6835.667480, avg loss: 4.427246, ppl: 83.700592 +epoch: 0, batch: 6181, sum loss: 7980.390625, avg loss: 4.297464, ppl: 73.513123 +epoch: 0, batch: 6182, sum loss: 6918.427734, avg loss: 4.145254, ppl: 63.133633 +epoch: 0, batch: 6183, sum loss: 6541.325195, avg loss: 4.132233, ppl: 62.316929 +epoch: 0, batch: 6184, sum loss: 7521.083984, avg loss: 4.439837, ppl: 84.761124 +epoch: 0, batch: 6185, sum loss: 7244.589844, avg loss: 4.202199, ppl: 66.833099 +epoch: 0, batch: 6186, sum loss: 6304.918945, avg loss: 4.169920, ppl: 64.710274 +epoch: 0, batch: 6187, sum loss: 5896.609863, avg loss: 4.226961, ppl: 68.508698 +epoch: 0, batch: 6188, sum loss: 6289.927246, avg loss: 4.173807, ppl: 64.962303 +epoch: 0, batch: 6189, sum loss: 6081.395996, avg loss: 4.125778, ppl: 61.915943 +epoch: 0, batch: 6190, sum loss: 6689.011230, avg loss: 4.141803, ppl: 62.916145 +epoch: 0, batch: 6191, sum loss: 7185.297852, avg loss: 4.122374, ppl: 61.705563 +epoch: 0, batch: 6192, sum loss: 7439.752930, avg loss: 4.241592, ppl: 69.518463 +epoch: 0, batch: 6193, sum loss: 7416.693848, avg loss: 4.209248, ppl: 67.305878 +epoch: 0, batch: 6194, sum loss: 7922.020508, avg loss: 4.513972, ppl: 91.283661 +epoch: 0, batch: 6195, sum loss: 6211.659668, avg loss: 4.146635, ppl: 63.220905 +epoch: 0, batch: 6196, sum loss: 7724.464844, avg loss: 4.296143, ppl: 73.416054 +epoch: 0, batch: 6197, sum loss: 7799.028809, avg loss: 4.396296, ppl: 81.149696 +epoch: 0, batch: 6198, sum loss: 8189.046875, avg loss: 4.365164, ppl: 78.662285 +epoch: 0, batch: 6199, sum loss: 8065.692383, avg loss: 4.338726, ppl: 76.609879 +epoch: 0, batch: 6200, sum loss: 7616.178711, avg loss: 4.150506, ppl: 63.466137 +epoch: 0, batch: 6201, sum loss: 6710.537109, avg loss: 4.354664, ppl: 77.840652 +epoch: 0, batch: 6202, sum loss: 8860.327148, avg loss: 4.423528, ppl: 83.389984 +epoch: 0, batch: 6203, sum loss: 6609.760742, avg loss: 4.231601, ppl: 68.827354 +epoch: 0, batch: 6204, sum loss: 7263.369629, avg loss: 4.497443, ppl: 89.787224 +epoch: 0, batch: 6205, sum loss: 8140.263672, avg loss: 4.282095, ppl: 72.391975 +epoch: 0, batch: 6206, sum loss: 6774.882812, avg loss: 4.323473, ppl: 75.450249 +epoch: 0, batch: 6207, sum loss: 7840.350098, avg loss: 4.254124, ppl: 70.395103 +epoch: 0, batch: 6208, sum loss: 6341.662109, avg loss: 3.996006, ppl: 54.380547 +epoch: 0, batch: 6209, sum loss: 8241.855469, avg loss: 4.198602, ppl: 66.593185 +epoch: 0, batch: 6210, sum loss: 7483.336914, avg loss: 4.420164, ppl: 83.109886 +epoch: 0, batch: 6211, sum loss: 7161.152832, avg loss: 4.431406, ppl: 84.049553 +epoch: 0, batch: 6212, sum loss: 6164.820801, avg loss: 4.026663, ppl: 56.073471 +epoch: 0, batch: 6213, sum loss: 6892.218750, avg loss: 4.291543, ppl: 73.079178 +epoch: 0, batch: 6214, sum loss: 7192.660156, avg loss: 4.218569, ppl: 67.936218 +epoch: 0, batch: 6215, sum loss: 8284.665039, avg loss: 4.638670, ppl: 103.406769 +epoch: 0, batch: 6216, sum loss: 6353.229980, avg loss: 4.128155, ppl: 62.063293 +epoch: 0, batch: 6217, sum loss: 7303.340332, avg loss: 4.268464, ppl: 71.411835 +epoch: 0, batch: 6218, sum loss: 6444.001465, avg loss: 4.316143, ppl: 74.899185 +epoch: 0, batch: 6219, sum loss: 8202.784180, avg loss: 4.438736, ppl: 84.667854 +epoch: 0, batch: 6220, sum loss: 8113.168945, avg loss: 4.190686, ppl: 66.068115 +epoch: 0, batch: 6221, sum loss: 7785.480469, avg loss: 4.436171, ppl: 84.450966 +epoch: 0, batch: 6222, sum loss: 6779.224121, avg loss: 4.136195, ppl: 62.564323 +epoch: 0, batch: 6223, sum loss: 7304.935547, avg loss: 4.517586, ppl: 91.614151 +epoch: 0, batch: 6224, sum loss: 5690.029297, avg loss: 3.855033, ppl: 47.230194 +epoch: 0, batch: 6225, sum loss: 6602.165039, avg loss: 4.105824, ppl: 60.692734 +epoch: 0, batch: 6226, sum loss: 6587.007324, avg loss: 4.263435, ppl: 71.053658 +epoch: 0, batch: 6227, sum loss: 6414.002930, avg loss: 4.151458, ppl: 63.526569 +epoch: 0, batch: 6228, sum loss: 7377.145996, avg loss: 4.234871, ppl: 69.052795 +epoch: 0, batch: 6229, sum loss: 6103.099121, avg loss: 4.140501, ppl: 62.834324 +epoch: 0, batch: 6230, sum loss: 6166.011230, avg loss: 4.110674, ppl: 60.987804 +epoch: 0, batch: 6231, sum loss: 6475.645508, avg loss: 4.161726, ppl: 64.182205 +epoch: 0, batch: 6232, sum loss: 7592.551270, avg loss: 4.306609, ppl: 74.188461 +epoch: 0, batch: 6233, sum loss: 7031.412109, avg loss: 3.897679, ppl: 49.287899 +epoch: 0, batch: 6234, sum loss: 7421.441406, avg loss: 4.317302, ppl: 74.986023 +epoch: 0, batch: 6235, sum loss: 6848.023438, avg loss: 4.127802, ppl: 62.041401 +epoch: 0, batch: 6236, sum loss: 7473.497559, avg loss: 4.165829, ppl: 64.446098 +epoch: 0, batch: 6237, sum loss: 6986.810059, avg loss: 4.144016, ppl: 63.055531 +epoch: 0, batch: 6238, sum loss: 7441.399414, avg loss: 4.308859, ppl: 74.355591 +epoch: 0, batch: 6239, sum loss: 7670.727539, avg loss: 4.363326, ppl: 78.517860 +epoch: 0, batch: 6240, sum loss: 8529.254883, avg loss: 4.482005, ppl: 88.411728 +epoch: 0, batch: 6241, sum loss: 6643.964355, avg loss: 4.147294, ppl: 63.262554 +epoch: 0, batch: 6242, sum loss: 7217.443359, avg loss: 4.446977, ppl: 85.368454 +epoch: 0, batch: 6243, sum loss: 8115.199219, avg loss: 4.325799, ppl: 75.625946 +epoch: 0, batch: 6244, sum loss: 7553.167480, avg loss: 4.360951, ppl: 78.331589 +epoch: 0, batch: 6245, sum loss: 6900.872070, avg loss: 4.409503, ppl: 82.228584 +epoch: 0, batch: 6246, sum loss: 6112.281738, avg loss: 4.113245, ppl: 61.144783 +epoch: 0, batch: 6247, sum loss: 6844.423340, avg loss: 4.356730, ppl: 78.001686 +epoch: 0, batch: 6248, sum loss: 6405.415527, avg loss: 4.119238, ppl: 61.512375 +epoch: 0, batch: 6249, sum loss: 5945.445801, avg loss: 4.169317, ppl: 64.671249 +epoch: 0, batch: 6250, sum loss: 6817.021484, avg loss: 4.187359, ppl: 65.848679 +epoch: 0, batch: 6251, sum loss: 7046.074707, avg loss: 4.096555, ppl: 60.132786 +epoch: 0, batch: 6252, sum loss: 7168.630859, avg loss: 4.323662, ppl: 75.464462 +epoch: 0, batch: 6253, sum loss: 7905.685547, avg loss: 4.187334, ppl: 65.846985 +epoch: 0, batch: 6254, sum loss: 7100.792969, avg loss: 4.229180, ppl: 68.660896 +epoch: 0, batch: 6255, sum loss: 7739.076172, avg loss: 4.271013, ppl: 71.594139 +epoch: 0, batch: 6256, sum loss: 7058.441895, avg loss: 4.094224, ppl: 59.992764 +epoch: 0, batch: 6257, sum loss: 8339.507812, avg loss: 4.252681, ppl: 70.293633 +epoch: 0, batch: 6258, sum loss: 6984.738281, avg loss: 4.298300, ppl: 73.574631 +epoch: 0, batch: 6259, sum loss: 8774.156250, avg loss: 4.335058, ppl: 76.329369 +epoch: 0, batch: 6260, sum loss: 6831.471191, avg loss: 4.165531, ppl: 64.426895 +epoch: 0, batch: 6261, sum loss: 7049.356445, avg loss: 4.168750, ppl: 64.634628 +epoch: 0, batch: 6262, sum loss: 8280.570312, avg loss: 4.587574, ppl: 98.255730 +epoch: 0, batch: 6263, sum loss: 7910.471680, avg loss: 4.454094, ppl: 85.978256 +epoch: 0, batch: 6264, sum loss: 8025.804688, avg loss: 4.257721, ppl: 70.648819 +epoch: 0, batch: 6265, sum loss: 7726.337891, avg loss: 4.290027, ppl: 72.968414 +epoch: 0, batch: 6266, sum loss: 6306.494141, avg loss: 4.207134, ppl: 67.163788 +epoch: 0, batch: 6267, sum loss: 7521.163574, avg loss: 4.300265, ppl: 73.719315 +epoch: 0, batch: 6268, sum loss: 7757.868164, avg loss: 4.269603, ppl: 71.493233 +epoch: 0, batch: 6269, sum loss: 7726.258789, avg loss: 4.245197, ppl: 69.769524 +epoch: 0, batch: 6270, sum loss: 6345.272461, avg loss: 4.152665, ppl: 63.603287 +epoch: 0, batch: 6271, sum loss: 6090.681641, avg loss: 4.247337, ppl: 69.918991 +epoch: 0, batch: 6272, sum loss: 8055.319824, avg loss: 4.452913, ppl: 85.876762 +epoch: 0, batch: 6273, sum loss: 6770.953125, avg loss: 4.081346, ppl: 59.225136 +epoch: 0, batch: 6274, sum loss: 7348.509766, avg loss: 4.177663, ppl: 65.213295 +epoch: 0, batch: 6275, sum loss: 6338.719238, avg loss: 4.037401, ppl: 56.678825 +epoch: 0, batch: 6276, sum loss: 7775.723633, avg loss: 4.324651, ppl: 75.539162 +epoch: 0, batch: 6277, sum loss: 8917.186523, avg loss: 4.503629, ppl: 90.344414 +epoch: 0, batch: 6278, sum loss: 6620.860352, avg loss: 4.364443, ppl: 78.605629 +epoch: 0, batch: 6279, sum loss: 6396.321289, avg loss: 4.408216, ppl: 82.122826 +epoch: 0, batch: 6280, sum loss: 7399.879883, avg loss: 4.265060, ppl: 71.169220 +epoch: 0, batch: 6281, sum loss: 7619.736328, avg loss: 4.096632, ppl: 60.137432 +epoch: 0, batch: 6282, sum loss: 7536.130859, avg loss: 4.245708, ppl: 69.805130 +epoch: 0, batch: 6283, sum loss: 9268.610352, avg loss: 4.436865, ppl: 84.509575 +epoch: 0, batch: 6284, sum loss: 7014.724609, avg loss: 4.246202, ppl: 69.839622 +epoch: 0, batch: 6285, sum loss: 6644.849609, avg loss: 4.306448, ppl: 74.176582 +epoch: 0, batch: 6286, sum loss: 7223.977539, avg loss: 4.139815, ppl: 62.791225 +epoch: 0, batch: 6287, sum loss: 7153.508301, avg loss: 4.059880, ppl: 57.967369 +epoch: 0, batch: 6288, sum loss: 7955.158203, avg loss: 4.186925, ppl: 65.820107 +epoch: 0, batch: 6289, sum loss: 6883.670410, avg loss: 4.315781, ppl: 74.872086 +epoch: 0, batch: 6290, sum loss: 6765.083496, avg loss: 4.265501, ppl: 71.200554 +epoch: 0, batch: 6291, sum loss: 6928.487793, avg loss: 3.970480, ppl: 53.009968 +epoch: 0, batch: 6292, sum loss: 7159.497070, avg loss: 4.307760, ppl: 74.273949 +epoch: 0, batch: 6293, sum loss: 7215.989258, avg loss: 4.287575, ppl: 72.789757 +epoch: 0, batch: 6294, sum loss: 8404.262695, avg loss: 4.477498, ppl: 88.014145 +epoch: 0, batch: 6295, sum loss: 7805.444824, avg loss: 4.314785, ppl: 74.797508 +epoch: 0, batch: 6296, sum loss: 6285.101562, avg loss: 4.407505, ppl: 82.064423 +epoch: 0, batch: 6297, sum loss: 6996.637695, avg loss: 4.250691, ppl: 70.153900 +epoch: 0, batch: 6298, sum loss: 6811.756836, avg loss: 4.423219, ppl: 83.364182 +epoch: 0, batch: 6299, sum loss: 7678.841797, avg loss: 4.410593, ppl: 82.318230 +epoch: 0, batch: 6300, sum loss: 6315.568359, avg loss: 4.111698, ppl: 61.050301 +epoch: 0, batch: 6301, sum loss: 7210.038574, avg loss: 4.286586, ppl: 72.717804 +epoch: 0, batch: 6302, sum loss: 7150.965820, avg loss: 4.341813, ppl: 76.846703 +epoch: 0, batch: 6303, sum loss: 8020.011719, avg loss: 4.277339, ppl: 72.048500 +epoch: 0, batch: 6304, sum loss: 7553.217285, avg loss: 4.602814, ppl: 99.764633 +epoch: 0, batch: 6305, sum loss: 6696.601562, avg loss: 4.217004, ppl: 67.829948 +epoch: 0, batch: 6306, sum loss: 6582.187988, avg loss: 4.307714, ppl: 74.270546 +epoch: 0, batch: 6307, sum loss: 7220.712891, avg loss: 4.334161, ppl: 76.260971 +epoch: 0, batch: 6308, sum loss: 7051.442871, avg loss: 4.207305, ppl: 67.175255 +epoch: 0, batch: 6309, sum loss: 7709.414551, avg loss: 4.245272, ppl: 69.774750 +epoch: 0, batch: 6310, sum loss: 6810.792480, avg loss: 4.183533, ppl: 65.597214 +epoch: 0, batch: 6311, sum loss: 5717.518066, avg loss: 3.962244, ppl: 52.575161 +epoch: 0, batch: 6312, sum loss: 7255.481445, avg loss: 4.238015, ppl: 69.270195 +epoch: 0, batch: 6313, sum loss: 6980.760254, avg loss: 4.192649, ppl: 66.197906 +epoch: 0, batch: 6314, sum loss: 7203.686035, avg loss: 4.212682, ppl: 67.537415 +epoch: 0, batch: 6315, sum loss: 6672.546875, avg loss: 4.266334, ppl: 71.259918 +epoch: 0, batch: 6316, sum loss: 7668.047363, avg loss: 4.171952, ppl: 64.841888 +epoch: 0, batch: 6317, sum loss: 7966.862305, avg loss: 4.341615, ppl: 76.831497 +epoch: 0, batch: 6318, sum loss: 7172.744629, avg loss: 4.433093, ppl: 84.191422 +epoch: 0, batch: 6319, sum loss: 8065.027344, avg loss: 4.296765, ppl: 73.461746 +epoch: 0, batch: 6320, sum loss: 8229.542969, avg loss: 4.546709, ppl: 94.321487 +epoch: 0, batch: 6321, sum loss: 7489.340820, avg loss: 4.209860, ppl: 67.347099 +epoch: 0, batch: 6322, sum loss: 6730.444336, avg loss: 4.152032, ppl: 63.563053 +epoch: 0, batch: 6323, sum loss: 7394.050293, avg loss: 4.089630, ppl: 59.717770 +epoch: 0, batch: 6324, sum loss: 7519.281250, avg loss: 4.732084, ppl: 113.531891 +epoch: 0, batch: 6325, sum loss: 7337.141602, avg loss: 4.493044, ppl: 89.393181 +epoch: 0, batch: 6326, sum loss: 5712.785156, avg loss: 4.133708, ppl: 62.408936 +epoch: 0, batch: 6327, sum loss: 9587.585938, avg loss: 4.520314, ppl: 91.864418 +epoch: 0, batch: 6328, sum loss: 7622.727539, avg loss: 4.411301, ppl: 82.376541 +epoch: 0, batch: 6329, sum loss: 8016.312500, avg loss: 4.300597, ppl: 73.743782 +epoch: 0, batch: 6330, sum loss: 6515.607910, avg loss: 4.233664, ppl: 68.969444 +epoch: 0, batch: 6331, sum loss: 6434.196289, avg loss: 4.067128, ppl: 58.389011 +epoch: 0, batch: 6332, sum loss: 7287.437500, avg loss: 4.366350, ppl: 78.755623 +epoch: 0, batch: 6333, sum loss: 7357.198242, avg loss: 4.363700, ppl: 78.547218 +epoch: 0, batch: 6334, sum loss: 7278.537109, avg loss: 4.492924, ppl: 89.382439 +epoch: 0, batch: 6335, sum loss: 7458.667969, avg loss: 4.173849, ppl: 64.965027 +epoch: 0, batch: 6336, sum loss: 6433.775879, avg loss: 3.988702, ppl: 53.984760 +epoch: 0, batch: 6337, sum loss: 6458.112305, avg loss: 4.166524, ppl: 64.490921 +epoch: 0, batch: 6338, sum loss: 7164.789062, avg loss: 4.390190, ppl: 80.655716 +epoch: 0, batch: 6339, sum loss: 7259.720703, avg loss: 4.210975, ppl: 67.422256 +epoch: 0, batch: 6340, sum loss: 7116.591797, avg loss: 4.310474, ppl: 74.475777 +epoch: 0, batch: 6341, sum loss: 7884.097656, avg loss: 4.231936, ppl: 68.850426 +epoch: 0, batch: 6342, sum loss: 6740.331543, avg loss: 4.236538, ppl: 69.167976 +epoch: 0, batch: 6343, sum loss: 7494.938965, avg loss: 4.236823, ppl: 69.187668 +epoch: 0, batch: 6344, sum loss: 7269.889648, avg loss: 4.376815, ppl: 79.584145 +epoch: 0, batch: 6345, sum loss: 8040.355469, avg loss: 4.432390, ppl: 84.132271 +epoch: 0, batch: 6346, sum loss: 6778.539062, avg loss: 4.207659, ppl: 67.199059 +epoch: 0, batch: 6347, sum loss: 6694.014160, avg loss: 4.324299, ppl: 75.512550 +epoch: 0, batch: 6348, sum loss: 7534.562012, avg loss: 4.416508, ppl: 82.806595 +epoch: 0, batch: 6349, sum loss: 7365.753906, avg loss: 4.477662, ppl: 88.028587 +epoch: 0, batch: 6350, sum loss: 7369.252930, avg loss: 4.357926, ppl: 78.095024 +epoch: 0, batch: 6351, sum loss: 7569.418945, avg loss: 4.345246, ppl: 77.111031 +epoch: 0, batch: 6352, sum loss: 7445.019043, avg loss: 4.288605, ppl: 72.864769 +epoch: 0, batch: 6353, sum loss: 7643.560059, avg loss: 4.133889, ppl: 62.420185 +epoch: 0, batch: 6354, sum loss: 8093.881348, avg loss: 4.396459, ppl: 81.162971 +epoch: 0, batch: 6355, sum loss: 8278.966797, avg loss: 4.494553, ppl: 89.528152 +epoch: 0, batch: 6356, sum loss: 6411.903809, avg loss: 4.226700, ppl: 68.490829 +epoch: 0, batch: 6357, sum loss: 7590.702148, avg loss: 4.087615, ppl: 59.597610 +epoch: 0, batch: 6358, sum loss: 7213.989746, avg loss: 4.226122, ppl: 68.451256 +epoch: 0, batch: 6359, sum loss: 6741.628906, avg loss: 4.250712, ppl: 70.155342 +epoch: 0, batch: 6360, sum loss: 7708.275391, avg loss: 4.287138, ppl: 72.757973 +epoch: 0, batch: 6361, sum loss: 7218.202148, avg loss: 4.324867, ppl: 75.555443 +epoch: 0, batch: 6362, sum loss: 6017.716309, avg loss: 4.121724, ppl: 61.665440 +epoch: 0, batch: 6363, sum loss: 6716.528320, avg loss: 4.125631, ppl: 61.906879 +epoch: 0, batch: 6364, sum loss: 7863.068848, avg loss: 4.213863, ppl: 67.617233 +epoch: 0, batch: 6365, sum loss: 6127.323730, avg loss: 4.134497, ppl: 62.458149 +epoch: 0, batch: 6366, sum loss: 7040.015137, avg loss: 4.233323, ppl: 68.945930 +epoch: 0, batch: 6367, sum loss: 6751.450684, avg loss: 4.214388, ppl: 67.652771 +epoch: 0, batch: 6368, sum loss: 6961.569824, avg loss: 4.224254, ppl: 68.323494 +epoch: 0, batch: 6369, sum loss: 7548.378906, avg loss: 4.274280, ppl: 71.828407 +epoch: 0, batch: 6370, sum loss: 7729.007812, avg loss: 4.162094, ppl: 64.205803 +epoch: 0, batch: 6371, sum loss: 7402.308594, avg loss: 3.950005, ppl: 51.935604 +epoch: 0, batch: 6372, sum loss: 6899.812988, avg loss: 4.285598, ppl: 72.645996 +epoch: 0, batch: 6373, sum loss: 6658.934082, avg loss: 4.001763, ppl: 54.694511 +epoch: 0, batch: 6374, sum loss: 8000.745117, avg loss: 4.184490, ppl: 65.660019 +epoch: 0, batch: 6375, sum loss: 8340.068359, avg loss: 4.433848, ppl: 84.254997 +epoch: 0, batch: 6376, sum loss: 6547.021484, avg loss: 4.026458, ppl: 56.062004 +epoch: 0, batch: 6377, sum loss: 7286.670410, avg loss: 4.102855, ppl: 60.512817 +epoch: 0, batch: 6378, sum loss: 7557.961914, avg loss: 4.440636, ppl: 84.828850 +epoch: 0, batch: 6379, sum loss: 5811.896484, avg loss: 3.905844, ppl: 49.692024 +epoch: 0, batch: 6380, sum loss: 6851.428711, avg loss: 4.013725, ppl: 55.352692 +epoch: 0, batch: 6381, sum loss: 7036.708984, avg loss: 4.181051, ppl: 65.434578 +epoch: 0, batch: 6382, sum loss: 6705.590332, avg loss: 4.126517, ppl: 61.961723 +epoch: 0, batch: 6383, sum loss: 5356.977539, avg loss: 4.046055, ppl: 57.171490 +epoch: 0, batch: 6384, sum loss: 7463.286621, avg loss: 4.153193, ppl: 63.636837 +epoch: 0, batch: 6385, sum loss: 6258.979980, avg loss: 4.098874, ppl: 60.272358 +epoch: 0, batch: 6386, sum loss: 7377.855469, avg loss: 4.098809, ppl: 60.268452 +epoch: 0, batch: 6387, sum loss: 6997.523438, avg loss: 4.138098, ppl: 62.683498 +epoch: 0, batch: 6388, sum loss: 6525.951172, avg loss: 4.130349, ppl: 62.199608 +epoch: 0, batch: 6389, sum loss: 6887.973145, avg loss: 4.259724, ppl: 70.790421 +epoch: 0, batch: 6390, sum loss: 8394.009766, avg loss: 4.365059, ppl: 78.654068 +epoch: 0, batch: 6391, sum loss: 7525.918457, avg loss: 4.117023, ppl: 61.376282 +epoch: 0, batch: 6392, sum loss: 7793.690918, avg loss: 4.294045, ppl: 73.262215 +epoch: 0, batch: 6393, sum loss: 6082.917969, avg loss: 3.986185, ppl: 53.849052 +epoch: 0, batch: 6394, sum loss: 6568.981934, avg loss: 4.085188, ppl: 59.453110 +epoch: 0, batch: 6395, sum loss: 6064.277832, avg loss: 4.139439, ppl: 62.767574 +epoch: 0, batch: 6396, sum loss: 6607.520508, avg loss: 4.142646, ppl: 62.969238 +epoch: 0, batch: 6397, sum loss: 7049.154785, avg loss: 4.129558, ppl: 62.150421 +epoch: 0, batch: 6398, sum loss: 6658.148926, avg loss: 4.284523, ppl: 72.567924 +epoch: 0, batch: 6399, sum loss: 8245.536133, avg loss: 4.452233, ppl: 85.818390 +epoch: 0, batch: 6400, sum loss: 6885.351074, avg loss: 4.287267, ppl: 72.767342 +epoch: 0, batch: 6401, sum loss: 7069.560059, avg loss: 4.100673, ppl: 60.380920 +epoch: 0, batch: 6402, sum loss: 7608.376465, avg loss: 4.255244, ppl: 70.474030 +epoch: 0, batch: 6403, sum loss: 8386.099609, avg loss: 4.404464, ppl: 81.815300 +epoch: 0, batch: 6404, sum loss: 7513.932617, avg loss: 4.328302, ppl: 75.815475 +epoch: 0, batch: 6405, sum loss: 6673.804199, avg loss: 4.147796, ppl: 63.294357 +epoch: 0, batch: 6406, sum loss: 7296.244141, avg loss: 4.371626, ppl: 79.172295 +epoch: 0, batch: 6407, sum loss: 7235.760254, avg loss: 4.261343, ppl: 70.905144 +epoch: 0, batch: 6408, sum loss: 7313.461426, avg loss: 4.108686, ppl: 60.866714 +epoch: 0, batch: 6409, sum loss: 6659.440918, avg loss: 3.985303, ppl: 53.801586 +epoch: 0, batch: 6410, sum loss: 8293.209961, avg loss: 4.470733, ppl: 87.420792 +epoch: 0, batch: 6411, sum loss: 6683.360840, avg loss: 3.968742, ppl: 52.917900 +epoch: 0, batch: 6412, sum loss: 7318.940430, avg loss: 4.262633, ppl: 70.996658 +epoch: 0, batch: 6413, sum loss: 6103.991699, avg loss: 3.984329, ppl: 53.749199 +epoch: 0, batch: 6414, sum loss: 6652.350098, avg loss: 4.205025, ppl: 67.022285 +epoch: 0, batch: 6415, sum loss: 6924.133789, avg loss: 4.025659, ppl: 56.017216 +epoch: 0, batch: 6416, sum loss: 6810.449219, avg loss: 4.167962, ppl: 64.583672 +epoch: 0, batch: 6417, sum loss: 7858.299805, avg loss: 4.088606, ppl: 59.656666 +epoch: 0, batch: 6418, sum loss: 6688.949219, avg loss: 4.198964, ppl: 66.617256 +epoch: 0, batch: 6419, sum loss: 7257.226074, avg loss: 4.419748, ppl: 83.075371 +epoch: 0, batch: 6420, sum loss: 6825.301270, avg loss: 4.144081, ppl: 63.059620 +epoch: 0, batch: 6421, sum loss: 7358.706543, avg loss: 4.243775, ppl: 69.670387 +epoch: 0, batch: 6422, sum loss: 7345.353516, avg loss: 4.419587, ppl: 83.061981 +epoch: 0, batch: 6423, sum loss: 6580.117188, avg loss: 4.275580, ppl: 71.921837 +epoch: 0, batch: 6424, sum loss: 7379.463867, avg loss: 4.416196, ppl: 82.780815 +epoch: 0, batch: 6425, sum loss: 5845.948730, avg loss: 3.894703, ppl: 49.141441 +epoch: 0, batch: 6426, sum loss: 8241.141602, avg loss: 4.376602, ppl: 79.567177 +epoch: 0, batch: 6427, sum loss: 7126.889160, avg loss: 4.167771, ppl: 64.571381 +epoch: 0, batch: 6428, sum loss: 7125.362305, avg loss: 4.130645, ppl: 62.218060 +epoch: 0, batch: 6429, sum loss: 7767.636719, avg loss: 4.235353, ppl: 69.086029 +epoch: 0, batch: 6430, sum loss: 6004.052734, avg loss: 4.103932, ppl: 60.578037 +epoch: 0, batch: 6431, sum loss: 7287.313477, avg loss: 4.205028, ppl: 67.022446 +epoch: 0, batch: 6432, sum loss: 6489.106445, avg loss: 4.314565, ppl: 74.781097 +epoch: 0, batch: 6433, sum loss: 6123.482910, avg loss: 3.796332, ppl: 44.537506 +epoch: 0, batch: 6434, sum loss: 5534.529785, avg loss: 3.984543, ppl: 53.760693 +epoch: 0, batch: 6435, sum loss: 8463.760742, avg loss: 4.313843, ppl: 74.727135 +epoch: 0, batch: 6436, sum loss: 7076.109863, avg loss: 4.265286, ppl: 71.185272 +epoch: 0, batch: 6437, sum loss: 7177.230469, avg loss: 4.416757, ppl: 82.827248 +epoch: 0, batch: 6438, sum loss: 6901.550781, avg loss: 4.239282, ppl: 69.358009 +epoch: 0, batch: 6439, sum loss: 8032.902344, avg loss: 4.372838, ppl: 79.268242 +epoch: 0, batch: 6440, sum loss: 6883.337891, avg loss: 4.176783, ppl: 65.155884 +epoch: 0, batch: 6441, sum loss: 7548.949219, avg loss: 4.378741, ppl: 79.737602 +epoch: 0, batch: 6442, sum loss: 7532.202148, avg loss: 4.131762, ppl: 62.287579 +epoch: 0, batch: 6443, sum loss: 7888.605469, avg loss: 4.466934, ppl: 87.089272 +epoch: 0, batch: 6444, sum loss: 6663.709473, avg loss: 4.118485, ppl: 61.466080 +epoch: 0, batch: 6445, sum loss: 6371.328125, avg loss: 4.081568, ppl: 59.238297 +epoch: 0, batch: 6446, sum loss: 5259.881348, avg loss: 4.090110, ppl: 59.746452 +epoch: 0, batch: 6447, sum loss: 6743.053711, avg loss: 4.273165, ppl: 71.748344 +epoch: 0, batch: 6448, sum loss: 7253.265137, avg loss: 4.261613, ppl: 70.924286 +epoch: 0, batch: 6449, sum loss: 6566.145020, avg loss: 4.238957, ppl: 69.335526 +epoch: 0, batch: 6450, sum loss: 6838.666504, avg loss: 4.260851, ppl: 70.870293 +epoch: 0, batch: 6451, sum loss: 6663.160156, avg loss: 4.077821, ppl: 59.016747 +epoch: 0, batch: 6452, sum loss: 6749.789551, avg loss: 4.363148, ppl: 78.503853 +epoch: 0, batch: 6453, sum loss: 6974.631836, avg loss: 4.211734, ppl: 67.473457 +epoch: 0, batch: 6454, sum loss: 6785.406738, avg loss: 4.188523, ppl: 65.925339 +epoch: 0, batch: 6455, sum loss: 6912.663086, avg loss: 4.259189, ppl: 70.752556 +epoch: 0, batch: 6456, sum loss: 7105.297363, avg loss: 4.085853, ppl: 59.492641 +epoch: 0, batch: 6457, sum loss: 7925.625488, avg loss: 4.452599, ppl: 85.849739 +epoch: 0, batch: 6458, sum loss: 7560.221680, avg loss: 4.276144, ppl: 71.962387 +epoch: 0, batch: 6459, sum loss: 6446.314453, avg loss: 4.044112, ppl: 57.060478 +epoch: 0, batch: 6460, sum loss: 9349.572266, avg loss: 4.562993, ppl: 95.869949 +epoch: 0, batch: 6461, sum loss: 8046.010254, avg loss: 4.014975, ppl: 55.421913 +epoch: 0, batch: 6462, sum loss: 7387.273438, avg loss: 4.272570, ppl: 71.705688 +epoch: 0, batch: 6463, sum loss: 6425.033203, avg loss: 4.232565, ppl: 68.893715 +epoch: 0, batch: 6464, sum loss: 6584.186523, avg loss: 4.228765, ppl: 68.632385 +epoch: 0, batch: 6465, sum loss: 7279.082031, avg loss: 4.142904, ppl: 62.985455 +epoch: 0, batch: 6466, sum loss: 5941.176270, avg loss: 4.249769, ppl: 70.089203 +epoch: 0, batch: 6467, sum loss: 7425.069336, avg loss: 4.104516, ppl: 60.613403 +epoch: 0, batch: 6468, sum loss: 8118.286621, avg loss: 4.257098, ppl: 70.604805 +epoch: 0, batch: 6469, sum loss: 7244.122070, avg loss: 4.311977, ppl: 74.587830 +epoch: 0, batch: 6470, sum loss: 7181.362793, avg loss: 4.320916, ppl: 75.257545 +epoch: 0, batch: 6471, sum loss: 7075.268555, avg loss: 4.386404, ppl: 80.350922 +epoch: 0, batch: 6472, sum loss: 7868.213867, avg loss: 4.450347, ppl: 85.656700 +epoch: 0, batch: 6473, sum loss: 8494.697266, avg loss: 4.606669, ppl: 100.149986 +epoch: 0, batch: 6474, sum loss: 8411.518555, avg loss: 4.291591, ppl: 73.082626 +epoch: 0, batch: 6475, sum loss: 6218.915039, avg loss: 4.283000, ppl: 72.457451 +epoch: 0, batch: 6476, sum loss: 7105.276367, avg loss: 4.290626, ppl: 73.012131 +epoch: 0, batch: 6477, sum loss: 6618.631836, avg loss: 4.033292, ppl: 56.446419 +epoch: 0, batch: 6478, sum loss: 7049.391602, avg loss: 3.960332, ppl: 52.474766 +epoch: 0, batch: 6479, sum loss: 7039.332520, avg loss: 4.145661, ppl: 63.159378 +epoch: 0, batch: 6480, sum loss: 5665.306641, avg loss: 3.989653, ppl: 54.036129 +epoch: 0, batch: 6481, sum loss: 6207.810059, avg loss: 4.084086, ppl: 59.387630 +epoch: 0, batch: 6482, sum loss: 6990.788086, avg loss: 4.260078, ppl: 70.815506 +epoch: 0, batch: 6483, sum loss: 7219.012207, avg loss: 4.500631, ppl: 90.073982 +epoch: 0, batch: 6484, sum loss: 5970.597656, avg loss: 4.210577, ppl: 67.395386 +epoch: 0, batch: 6485, sum loss: 6458.645508, avg loss: 4.113787, ppl: 61.177971 +epoch: 0, batch: 6486, sum loss: 7463.738770, avg loss: 4.411193, ppl: 82.367661 +epoch: 0, batch: 6487, sum loss: 8448.985352, avg loss: 4.295366, ppl: 73.359047 +epoch: 0, batch: 6488, sum loss: 7110.763672, avg loss: 4.317403, ppl: 74.993637 +epoch: 0, batch: 6489, sum loss: 6545.631836, avg loss: 4.158597, ppl: 63.981693 +epoch: 0, batch: 6490, sum loss: 7984.301758, avg loss: 4.304206, ppl: 74.010422 +epoch: 0, batch: 6491, sum loss: 6342.993164, avg loss: 4.053030, ppl: 57.571663 +epoch: 0, batch: 6492, sum loss: 6606.747070, avg loss: 4.038354, ppl: 56.732880 +epoch: 0, batch: 6493, sum loss: 6930.754395, avg loss: 4.027167, ppl: 56.101742 +epoch: 0, batch: 6494, sum loss: 7177.820312, avg loss: 4.209866, ppl: 67.347488 +epoch: 0, batch: 6495, sum loss: 6292.727539, avg loss: 4.051982, ppl: 57.511326 +epoch: 0, batch: 6496, sum loss: 7902.985352, avg loss: 4.500561, ppl: 90.067665 +epoch: 0, batch: 6497, sum loss: 7225.318359, avg loss: 4.077494, ppl: 58.997417 +epoch: 0, batch: 6498, sum loss: 6528.676270, avg loss: 4.317908, ppl: 75.031487 +epoch: 0, batch: 6499, sum loss: 6714.513184, avg loss: 4.044888, ppl: 57.104763 +epoch: 0, batch: 6500, sum loss: 6682.940430, avg loss: 4.047814, ppl: 57.272118 +epoch: 0, batch: 6501, sum loss: 7866.812988, avg loss: 4.261546, ppl: 70.919518 +epoch: 0, batch: 6502, sum loss: 9593.868164, avg loss: 4.258264, ppl: 70.687172 +epoch: 0, batch: 6503, sum loss: 6948.381348, avg loss: 4.305069, ppl: 74.074326 +epoch: 0, batch: 6504, sum loss: 6343.937012, avg loss: 4.027896, ppl: 56.142685 +epoch: 0, batch: 6505, sum loss: 6526.044922, avg loss: 4.339126, ppl: 76.640495 +epoch: 0, batch: 6506, sum loss: 6359.165039, avg loss: 3.913332, ppl: 50.065502 +epoch: 0, batch: 6507, sum loss: 6732.969727, avg loss: 4.192385, ppl: 66.180428 +epoch: 0, batch: 6508, sum loss: 6820.842285, avg loss: 4.038391, ppl: 56.734989 +epoch: 0, batch: 6509, sum loss: 6195.667969, avg loss: 4.062733, ppl: 58.132980 +epoch: 0, batch: 6510, sum loss: 6917.675293, avg loss: 4.033630, ppl: 56.465504 +epoch: 0, batch: 6511, sum loss: 6724.662109, avg loss: 4.234674, ppl: 69.039169 +epoch: 0, batch: 6512, sum loss: 6285.829590, avg loss: 3.991003, ppl: 54.109123 +epoch: 0, batch: 6513, sum loss: 5407.745117, avg loss: 4.075166, ppl: 58.860237 +epoch: 0, batch: 6514, sum loss: 8053.832031, avg loss: 4.227733, ppl: 68.561638 +epoch: 0, batch: 6515, sum loss: 7270.909180, avg loss: 4.117163, ppl: 61.384827 +epoch: 0, batch: 6516, sum loss: 6088.354492, avg loss: 4.204665, ppl: 66.998131 +epoch: 0, batch: 6517, sum loss: 6863.845703, avg loss: 4.105171, ppl: 60.653130 +epoch: 0, batch: 6518, sum loss: 8088.210938, avg loss: 4.111952, ppl: 61.065819 +epoch: 0, batch: 6519, sum loss: 7804.125977, avg loss: 4.262221, ppl: 70.967415 +epoch: 0, batch: 6520, sum loss: 7086.841309, avg loss: 4.248706, ppl: 70.014748 +epoch: 0, batch: 6521, sum loss: 7080.164062, avg loss: 4.232017, ppl: 68.855980 +epoch: 0, batch: 6522, sum loss: 7142.877930, avg loss: 4.279735, ppl: 72.221306 +epoch: 0, batch: 6523, sum loss: 7742.856445, avg loss: 4.187591, ppl: 65.863937 +epoch: 0, batch: 6524, sum loss: 6973.867676, avg loss: 4.221470, ppl: 68.133560 +epoch: 0, batch: 6525, sum loss: 7812.667969, avg loss: 4.357316, ppl: 78.047409 +epoch: 0, batch: 6526, sum loss: 6946.785645, avg loss: 4.120276, ppl: 61.576263 +epoch: 0, batch: 6527, sum loss: 6350.508789, avg loss: 3.991520, ppl: 54.137138 +epoch: 0, batch: 6528, sum loss: 6668.997070, avg loss: 4.302579, ppl: 73.890106 +epoch: 0, batch: 6529, sum loss: 7145.851562, avg loss: 4.281517, ppl: 72.350113 +epoch: 0, batch: 6530, sum loss: 6913.078125, avg loss: 4.172045, ppl: 64.847916 +epoch: 0, batch: 6531, sum loss: 6917.426758, avg loss: 4.238619, ppl: 69.312088 +epoch: 0, batch: 6532, sum loss: 5824.892578, avg loss: 4.202664, ppl: 66.864243 +epoch: 0, batch: 6533, sum loss: 8191.992188, avg loss: 4.369062, ppl: 78.969559 +epoch: 0, batch: 6534, sum loss: 7738.840332, avg loss: 4.357455, ppl: 78.058243 +epoch: 0, batch: 6535, sum loss: 7607.639160, avg loss: 4.367187, ppl: 78.821556 +epoch: 0, batch: 6536, sum loss: 6813.019043, avg loss: 4.208165, ppl: 67.233063 +epoch: 0, batch: 6537, sum loss: 6659.911621, avg loss: 4.019258, ppl: 55.659821 +epoch: 0, batch: 6538, sum loss: 6742.076660, avg loss: 4.224359, ppl: 68.330658 +epoch: 0, batch: 6539, sum loss: 5617.996094, avg loss: 3.882513, ppl: 48.546047 +epoch: 0, batch: 6540, sum loss: 7620.481934, avg loss: 4.198613, ppl: 66.593880 +epoch: 0, batch: 6541, sum loss: 7785.710938, avg loss: 4.299123, ppl: 73.635170 +epoch: 0, batch: 6542, sum loss: 6642.780273, avg loss: 4.108089, ppl: 60.830387 +epoch: 0, batch: 6543, sum loss: 7444.449219, avg loss: 4.161235, ppl: 64.150688 +epoch: 0, batch: 6544, sum loss: 5910.434570, avg loss: 4.200736, ppl: 66.735405 +epoch: 0, batch: 6545, sum loss: 7069.525391, avg loss: 4.287159, ppl: 72.759460 +epoch: 0, batch: 6546, sum loss: 6508.534668, avg loss: 4.067834, ppl: 58.430260 +epoch: 0, batch: 6547, sum loss: 6764.698242, avg loss: 4.127333, ppl: 62.012295 +epoch: 0, batch: 6548, sum loss: 8495.710938, avg loss: 4.226722, ppl: 68.492363 +epoch: 0, batch: 6549, sum loss: 7180.940430, avg loss: 4.122239, ppl: 61.697235 +epoch: 0, batch: 6550, sum loss: 7399.990234, avg loss: 4.099718, ppl: 60.323250 +epoch: 0, batch: 6551, sum loss: 7058.119141, avg loss: 4.161627, ppl: 64.175842 +epoch: 0, batch: 6552, sum loss: 6185.518555, avg loss: 4.109979, ppl: 60.945446 +epoch: 0, batch: 6553, sum loss: 8125.094727, avg loss: 4.314973, ppl: 74.811630 +epoch: 0, batch: 6554, sum loss: 7229.812988, avg loss: 4.164639, ppl: 64.369438 +epoch: 0, batch: 6555, sum loss: 7661.197266, avg loss: 4.313737, ppl: 74.719223 +epoch: 0, batch: 6556, sum loss: 6340.770020, avg loss: 4.141587, ppl: 62.902584 +epoch: 0, batch: 6557, sum loss: 7259.899414, avg loss: 4.233177, ppl: 68.935905 +epoch: 0, batch: 6558, sum loss: 6559.252441, avg loss: 4.250974, ppl: 70.173706 +epoch: 0, batch: 6559, sum loss: 6299.817871, avg loss: 4.183146, ppl: 65.571815 +epoch: 0, batch: 6560, sum loss: 6907.857910, avg loss: 4.119176, ppl: 61.508533 +epoch: 0, batch: 6561, sum loss: 5027.803711, avg loss: 4.019028, ppl: 55.646973 +epoch: 0, batch: 6562, sum loss: 7392.720703, avg loss: 4.325758, ppl: 75.622849 +epoch: 0, batch: 6563, sum loss: 7531.620605, avg loss: 4.243166, ppl: 69.627975 +epoch: 0, batch: 6564, sum loss: 7281.813965, avg loss: 4.241010, ppl: 69.477966 +epoch: 0, batch: 6565, sum loss: 5860.603516, avg loss: 4.153511, ppl: 63.657082 +epoch: 0, batch: 6566, sum loss: 7912.416504, avg loss: 4.400676, ppl: 81.505966 +epoch: 0, batch: 6567, sum loss: 8124.470703, avg loss: 4.446891, ppl: 85.361168 +epoch: 0, batch: 6568, sum loss: 7376.031738, avg loss: 4.390495, ppl: 80.680367 +epoch: 0, batch: 6569, sum loss: 7355.237305, avg loss: 4.109071, ppl: 60.890110 +epoch: 0, batch: 6570, sum loss: 7220.939453, avg loss: 4.230193, ppl: 68.730507 +epoch: 0, batch: 6571, sum loss: 6329.980469, avg loss: 4.081225, ppl: 59.217991 +epoch: 0, batch: 6572, sum loss: 5901.283203, avg loss: 3.913318, ppl: 50.064777 +epoch: 0, batch: 6573, sum loss: 7045.402344, avg loss: 4.108106, ppl: 60.831402 +epoch: 0, batch: 6574, sum loss: 7651.851562, avg loss: 4.332871, ppl: 76.162666 +epoch: 0, batch: 6575, sum loss: 6758.472168, avg loss: 4.288371, ppl: 72.847672 +epoch: 0, batch: 6576, sum loss: 6871.366211, avg loss: 4.220741, ppl: 68.083900 +epoch: 0, batch: 6577, sum loss: 5541.343262, avg loss: 3.845485, ppl: 46.781357 +epoch: 0, batch: 6578, sum loss: 5986.686035, avg loss: 4.145905, ppl: 63.174740 +epoch: 0, batch: 6579, sum loss: 6648.408691, avg loss: 4.026898, ppl: 56.086681 +epoch: 0, batch: 6580, sum loss: 6972.869629, avg loss: 4.269975, ppl: 71.519859 +epoch: 0, batch: 6581, sum loss: 5961.106445, avg loss: 4.116786, ppl: 61.361736 +epoch: 0, batch: 6582, sum loss: 7842.774414, avg loss: 4.581059, ppl: 97.617760 +epoch: 0, batch: 6583, sum loss: 6066.200195, avg loss: 4.036061, ppl: 56.602959 +epoch: 0, batch: 6584, sum loss: 8254.724609, avg loss: 4.553075, ppl: 94.923836 +epoch: 0, batch: 6585, sum loss: 7527.982910, avg loss: 4.361519, ppl: 78.376122 +epoch: 0, batch: 6586, sum loss: 7773.428223, avg loss: 4.352424, ppl: 77.666473 +epoch: 0, batch: 6587, sum loss: 8218.226562, avg loss: 4.447092, ppl: 85.378311 +epoch: 0, batch: 6588, sum loss: 6671.295410, avg loss: 4.174778, ppl: 65.025398 +epoch: 0, batch: 6589, sum loss: 5931.209473, avg loss: 3.941003, ppl: 51.470203 +epoch: 0, batch: 6590, sum loss: 6820.501465, avg loss: 4.091483, ppl: 59.828529 +epoch: 0, batch: 6591, sum loss: 6900.844727, avg loss: 4.291570, ppl: 73.081131 +epoch: 0, batch: 6592, sum loss: 8312.951172, avg loss: 4.577616, ppl: 97.282219 +epoch: 0, batch: 6593, sum loss: 6286.940430, avg loss: 4.024930, ppl: 55.976391 +epoch: 0, batch: 6594, sum loss: 6592.109375, avg loss: 4.190788, ppl: 66.074821 +epoch: 0, batch: 6595, sum loss: 7582.129395, avg loss: 4.193655, ppl: 66.264580 +epoch: 0, batch: 6596, sum loss: 5686.237305, avg loss: 4.129439, ppl: 62.143044 +epoch: 0, batch: 6597, sum loss: 7271.378418, avg loss: 4.098860, ppl: 60.271553 +epoch: 0, batch: 6598, sum loss: 7711.631836, avg loss: 4.291392, ppl: 73.068130 +epoch: 0, batch: 6599, sum loss: 8092.096191, avg loss: 4.290613, ppl: 73.011223 +epoch: 0, batch: 6600, sum loss: 7100.355957, avg loss: 4.434951, ppl: 84.347977 +epoch: 0, batch: 6601, sum loss: 7685.644531, avg loss: 4.143205, ppl: 63.004406 +epoch: 0, batch: 6602, sum loss: 7535.683594, avg loss: 4.330853, ppl: 76.009056 +epoch: 0, batch: 6603, sum loss: 7120.801758, avg loss: 4.027603, ppl: 56.126225 +epoch: 0, batch: 6604, sum loss: 6332.538574, avg loss: 3.916227, ppl: 50.210636 +epoch: 0, batch: 6605, sum loss: 7124.914551, avg loss: 4.220921, ppl: 68.096176 +epoch: 0, batch: 6606, sum loss: 7006.035645, avg loss: 4.330059, ppl: 75.948769 +epoch: 0, batch: 6607, sum loss: 6932.521484, avg loss: 4.297905, ppl: 73.545586 +epoch: 0, batch: 6608, sum loss: 8063.241211, avg loss: 4.246046, ppl: 69.828766 +epoch: 0, batch: 6609, sum loss: 5877.090332, avg loss: 3.979073, ppl: 53.467438 +epoch: 0, batch: 6610, sum loss: 6714.277344, avg loss: 3.890079, ppl: 48.914776 +epoch: 0, batch: 6611, sum loss: 6363.124023, avg loss: 4.205634, ppl: 67.063110 +epoch: 0, batch: 6612, sum loss: 6877.117676, avg loss: 4.135368, ppl: 62.512611 +epoch: 0, batch: 6613, sum loss: 7600.611328, avg loss: 4.040729, ppl: 56.867786 +epoch: 0, batch: 6614, sum loss: 7324.241699, avg loss: 4.265720, ppl: 71.216202 +epoch: 0, batch: 6615, sum loss: 6603.937988, avg loss: 3.997541, ppl: 54.464081 +epoch: 0, batch: 6616, sum loss: 6952.062500, avg loss: 4.414008, ppl: 82.599831 +epoch: 0, batch: 6617, sum loss: 8152.929199, avg loss: 4.447861, ppl: 85.444000 +epoch: 0, batch: 6618, sum loss: 7620.782715, avg loss: 4.198779, ppl: 66.604935 +epoch: 0, batch: 6619, sum loss: 6423.924805, avg loss: 4.120542, ppl: 61.592621 +epoch: 0, batch: 6620, sum loss: 6457.806152, avg loss: 4.011060, ppl: 55.205345 +epoch: 0, batch: 6621, sum loss: 6617.190430, avg loss: 4.115168, ppl: 61.262512 +epoch: 0, batch: 6622, sum loss: 6749.067383, avg loss: 4.304252, ppl: 74.013840 +epoch: 0, batch: 6623, sum loss: 7663.115234, avg loss: 4.247847, ppl: 69.954613 +epoch: 0, batch: 6624, sum loss: 7599.122070, avg loss: 4.065876, ppl: 58.315945 +epoch: 0, batch: 6625, sum loss: 8419.585938, avg loss: 4.389774, ppl: 80.622185 +epoch: 0, batch: 6626, sum loss: 6620.036133, avg loss: 4.150493, ppl: 63.465290 +epoch: 0, batch: 6627, sum loss: 6707.808105, avg loss: 4.221402, ppl: 68.128944 +epoch: 0, batch: 6628, sum loss: 6735.574707, avg loss: 4.082167, ppl: 59.273758 +epoch: 0, batch: 6629, sum loss: 7458.001953, avg loss: 4.476592, ppl: 87.934441 +epoch: 0, batch: 6630, sum loss: 6613.590820, avg loss: 4.175247, ppl: 65.055885 +epoch: 0, batch: 6631, sum loss: 6140.175293, avg loss: 4.305873, ppl: 74.133934 +epoch: 0, batch: 6632, sum loss: 7736.975586, avg loss: 4.227856, ppl: 68.570038 +epoch: 0, batch: 6633, sum loss: 6863.193359, avg loss: 4.097429, ppl: 60.185368 +epoch: 0, batch: 6634, sum loss: 8218.890625, avg loss: 4.210497, ppl: 67.390053 +epoch: 0, batch: 6635, sum loss: 7557.790039, avg loss: 4.331111, ppl: 76.028740 +epoch: 0, batch: 6636, sum loss: 7688.728516, avg loss: 4.072420, ppl: 58.698822 +epoch: 0, batch: 6637, sum loss: 8407.981445, avg loss: 4.434590, ppl: 84.317535 +epoch: 0, batch: 6638, sum loss: 6440.309082, avg loss: 4.209352, ppl: 67.312943 +epoch: 0, batch: 6639, sum loss: 7018.275879, avg loss: 4.326927, ppl: 75.711281 +epoch: 0, batch: 6640, sum loss: 7090.139160, avg loss: 4.153567, ppl: 63.660694 +epoch: 0, batch: 6641, sum loss: 6551.706055, avg loss: 4.056784, ppl: 57.788174 +epoch: 0, batch: 6642, sum loss: 7174.430176, avg loss: 4.039657, ppl: 56.806835 +epoch: 0, batch: 6643, sum loss: 7746.522461, avg loss: 4.045181, ppl: 57.121540 +epoch: 0, batch: 6644, sum loss: 8431.740234, avg loss: 4.444776, ppl: 85.180801 +epoch: 0, batch: 6645, sum loss: 6255.582520, avg loss: 4.129097, ppl: 62.121830 +epoch: 0, batch: 6646, sum loss: 6951.275391, avg loss: 4.243758, ppl: 69.669159 +epoch: 0, batch: 6647, sum loss: 8079.472656, avg loss: 4.197128, ppl: 66.495102 +epoch: 0, batch: 6648, sum loss: 6930.080078, avg loss: 4.162210, ppl: 64.213310 +epoch: 0, batch: 6649, sum loss: 6265.562500, avg loss: 3.983193, ppl: 53.688183 +epoch: 0, batch: 6650, sum loss: 6769.251953, avg loss: 4.005475, ppl: 54.897869 +epoch: 0, batch: 6651, sum loss: 6239.087402, avg loss: 3.911654, ppl: 49.981529 +epoch: 0, batch: 6652, sum loss: 7202.005859, avg loss: 3.990031, ppl: 54.056564 +epoch: 0, batch: 6653, sum loss: 7274.115723, avg loss: 4.306759, ppl: 74.199608 +epoch: 0, batch: 6654, sum loss: 7680.449707, avg loss: 4.322144, ppl: 75.350006 +epoch: 0, batch: 6655, sum loss: 7094.570801, avg loss: 4.240628, ppl: 69.451469 +epoch: 0, batch: 6656, sum loss: 8050.869141, avg loss: 4.246239, ppl: 69.842255 +epoch: 0, batch: 6657, sum loss: 7034.850586, avg loss: 4.026818, ppl: 56.082161 +epoch: 0, batch: 6658, sum loss: 5941.463867, avg loss: 4.019935, ppl: 55.697495 +epoch: 0, batch: 6659, sum loss: 6917.586426, avg loss: 4.081172, ppl: 59.214828 +epoch: 0, batch: 6660, sum loss: 6133.850586, avg loss: 3.970130, ppl: 52.991417 +epoch: 0, batch: 6661, sum loss: 7316.275879, avg loss: 4.318935, ppl: 75.108589 +epoch: 0, batch: 6662, sum loss: 7474.669922, avg loss: 4.180464, ppl: 65.396210 +epoch: 0, batch: 6663, sum loss: 6819.037109, avg loss: 4.054124, ppl: 57.634674 +epoch: 0, batch: 6664, sum loss: 7224.730469, avg loss: 4.318429, ppl: 75.070564 +epoch: 0, batch: 6665, sum loss: 7965.175781, avg loss: 4.280052, ppl: 72.244179 +epoch: 0, batch: 6666, sum loss: 6479.795410, avg loss: 4.032231, ppl: 56.386589 +epoch: 0, batch: 6667, sum loss: 6986.316406, avg loss: 3.768239, ppl: 43.303761 +epoch: 0, batch: 6668, sum loss: 7305.321289, avg loss: 3.902416, ppl: 49.521927 +epoch: 0, batch: 6669, sum loss: 6873.319336, avg loss: 4.014789, ppl: 55.411610 +epoch: 0, batch: 6670, sum loss: 8268.256836, avg loss: 4.450084, ppl: 85.634155 +epoch: 0, batch: 6671, sum loss: 7769.333984, avg loss: 4.186064, ppl: 65.763420 +epoch: 0, batch: 6672, sum loss: 8280.275391, avg loss: 4.241945, ppl: 69.542969 +epoch: 0, batch: 6673, sum loss: 6103.343750, avg loss: 3.989114, ppl: 54.006996 +epoch: 0, batch: 6674, sum loss: 8312.295898, avg loss: 4.076653, ppl: 58.947868 +epoch: 0, batch: 6675, sum loss: 6254.465332, avg loss: 3.963539, ppl: 52.643322 +epoch: 0, batch: 6676, sum loss: 6009.726562, avg loss: 3.943390, ppl: 51.593212 +epoch: 0, batch: 6677, sum loss: 7155.474609, avg loss: 4.315727, ppl: 74.868011 +epoch: 0, batch: 6678, sum loss: 5828.601562, avg loss: 4.081654, ppl: 59.243382 +epoch: 0, batch: 6679, sum loss: 7276.072266, avg loss: 4.150640, ppl: 63.474644 +epoch: 0, batch: 6680, sum loss: 7140.256348, avg loss: 4.094183, ppl: 59.990334 +epoch: 0, batch: 6681, sum loss: 7498.936523, avg loss: 4.426763, ppl: 83.660172 +epoch: 0, batch: 6682, sum loss: 7537.619141, avg loss: 4.282738, ppl: 72.438522 +epoch: 0, batch: 6683, sum loss: 6762.189453, avg loss: 4.277160, ppl: 72.035545 +epoch: 0, batch: 6684, sum loss: 6816.302734, avg loss: 4.096336, ppl: 60.119598 +epoch: 0, batch: 6685, sum loss: 7881.301758, avg loss: 4.368793, ppl: 78.948250 +epoch: 0, batch: 6686, sum loss: 6779.628906, avg loss: 3.997423, ppl: 54.457615 +epoch: 0, batch: 6687, sum loss: 7197.984863, avg loss: 4.167912, ppl: 64.580498 +epoch: 0, batch: 6688, sum loss: 7572.899414, avg loss: 4.372344, ppl: 79.229088 +epoch: 0, batch: 6689, sum loss: 8242.715820, avg loss: 4.386757, ppl: 80.379318 +epoch: 0, batch: 6690, sum loss: 7121.717773, avg loss: 4.279879, ppl: 72.231674 +epoch: 0, batch: 6691, sum loss: 7099.492676, avg loss: 4.284546, ppl: 72.569588 +epoch: 0, batch: 6692, sum loss: 6901.171875, avg loss: 4.042866, ppl: 56.989426 +epoch: 0, batch: 6693, sum loss: 6982.193848, avg loss: 4.153595, ppl: 63.662483 +epoch: 0, batch: 6694, sum loss: 7419.872070, avg loss: 4.316389, ppl: 74.917580 +epoch: 0, batch: 6695, sum loss: 6647.676758, avg loss: 4.302704, ppl: 73.899338 +epoch: 0, batch: 6696, sum loss: 6950.051270, avg loss: 4.181740, ppl: 65.479675 +epoch: 0, batch: 6697, sum loss: 8169.080078, avg loss: 4.525806, ppl: 92.370338 +epoch: 0, batch: 6698, sum loss: 7325.597168, avg loss: 4.183665, ppl: 65.605873 +epoch: 0, batch: 6699, sum loss: 6890.270996, avg loss: 4.221980, ppl: 68.168327 +epoch: 0, batch: 6700, sum loss: 7267.952637, avg loss: 4.290409, ppl: 72.996292 +epoch: 0, batch: 6701, sum loss: 6923.773926, avg loss: 4.178500, ppl: 65.267860 +epoch: 0, batch: 6702, sum loss: 6474.798340, avg loss: 3.940839, ppl: 51.461750 +epoch: 0, batch: 6703, sum loss: 6824.200195, avg loss: 3.969867, ppl: 52.977459 +epoch: 0, batch: 6704, sum loss: 7113.189941, avg loss: 4.083346, ppl: 59.343723 +epoch: 0, batch: 6705, sum loss: 5623.534668, avg loss: 4.057384, ppl: 57.822876 +epoch: 0, batch: 6706, sum loss: 7224.601562, avg loss: 4.180904, ppl: 65.424965 +epoch: 0, batch: 6707, sum loss: 6922.862305, avg loss: 4.147911, ppl: 63.301598 +epoch: 0, batch: 6708, sum loss: 6512.678711, avg loss: 4.156145, ppl: 63.825008 +epoch: 0, batch: 6709, sum loss: 6663.883301, avg loss: 3.973693, ppl: 53.180557 +epoch: 0, batch: 6710, sum loss: 6551.336426, avg loss: 3.958512, ppl: 52.379307 +epoch: 0, batch: 6711, sum loss: 6845.369141, avg loss: 4.204772, ppl: 67.005318 +epoch: 0, batch: 6712, sum loss: 7883.092773, avg loss: 4.268053, ppl: 71.382492 +epoch: 0, batch: 6713, sum loss: 7038.310547, avg loss: 4.239946, ppl: 69.404129 +epoch: 0, batch: 6714, sum loss: 6853.916992, avg loss: 4.126380, ppl: 61.953243 +epoch: 0, batch: 6715, sum loss: 7194.247070, avg loss: 4.153722, ppl: 63.670559 +epoch: 0, batch: 6716, sum loss: 7911.881348, avg loss: 4.186181, ppl: 65.771103 +epoch: 0, batch: 6717, sum loss: 7401.923340, avg loss: 4.146736, ppl: 63.227299 +epoch: 0, batch: 6718, sum loss: 6731.579590, avg loss: 4.163005, ppl: 64.264366 +epoch: 0, batch: 6719, sum loss: 8259.134766, avg loss: 4.235454, ppl: 69.093010 +epoch: 0, batch: 6720, sum loss: 7338.791504, avg loss: 4.337347, ppl: 76.504303 +epoch: 0, batch: 6721, sum loss: 7417.859863, avg loss: 4.118745, ppl: 61.482025 +epoch: 0, batch: 6722, sum loss: 7667.215820, avg loss: 4.093548, ppl: 59.952244 +epoch: 0, batch: 6723, sum loss: 6763.350586, avg loss: 4.357829, ppl: 78.087433 +epoch: 0, batch: 6724, sum loss: 8518.108398, avg loss: 4.352636, ppl: 77.682953 +epoch: 0, batch: 6725, sum loss: 7100.997559, avg loss: 4.216744, ppl: 67.812355 +epoch: 0, batch: 6726, sum loss: 6577.144043, avg loss: 4.149618, ppl: 63.409752 +epoch: 0, batch: 6727, sum loss: 7523.483887, avg loss: 4.238582, ppl: 69.309509 +epoch: 0, batch: 6728, sum loss: 5540.074219, avg loss: 3.826018, ppl: 45.879498 +epoch: 0, batch: 6729, sum loss: 6854.609863, avg loss: 4.270785, ppl: 71.577789 +epoch: 0, batch: 6730, sum loss: 6987.673828, avg loss: 4.237522, ppl: 69.236084 +epoch: 0, batch: 6731, sum loss: 7242.391113, avg loss: 4.255224, ppl: 70.472580 +epoch: 0, batch: 6732, sum loss: 7240.999023, avg loss: 4.209883, ppl: 67.348671 +epoch: 0, batch: 6733, sum loss: 6575.848145, avg loss: 4.079310, ppl: 59.104698 +epoch: 0, batch: 6734, sum loss: 8183.907715, avg loss: 4.207665, ppl: 67.199440 +epoch: 0, batch: 6735, sum loss: 8986.160156, avg loss: 4.264907, ppl: 71.158325 +epoch: 0, batch: 6736, sum loss: 7308.254883, avg loss: 4.091968, ppl: 59.857578 +epoch: 0, batch: 6737, sum loss: 6891.882812, avg loss: 4.109650, ppl: 60.925369 +epoch: 0, batch: 6738, sum loss: 7411.880859, avg loss: 4.267059, ppl: 71.311592 +epoch: 0, batch: 6739, sum loss: 7014.907715, avg loss: 4.190506, ppl: 66.056236 +epoch: 0, batch: 6740, sum loss: 7433.800781, avg loss: 4.267394, ppl: 71.335495 +epoch: 0, batch: 6741, sum loss: 7679.268555, avg loss: 4.254442, ppl: 70.417526 +epoch: 0, batch: 6742, sum loss: 6918.546387, avg loss: 4.265441, ppl: 71.196342 +epoch: 0, batch: 6743, sum loss: 5692.682617, avg loss: 4.057508, ppl: 57.829990 +epoch: 0, batch: 6744, sum loss: 7557.574219, avg loss: 4.152514, ppl: 63.593643 +epoch: 0, batch: 6745, sum loss: 6644.372070, avg loss: 4.073802, ppl: 58.780048 +epoch: 0, batch: 6746, sum loss: 6435.534180, avg loss: 4.078285, ppl: 59.044136 +epoch: 0, batch: 6747, sum loss: 8267.981445, avg loss: 4.226985, ppl: 68.510330 +epoch: 0, batch: 6748, sum loss: 5315.148926, avg loss: 3.916838, ppl: 50.241325 +epoch: 0, batch: 6749, sum loss: 5161.926758, avg loss: 3.803926, ppl: 44.877003 +epoch: 0, batch: 6750, sum loss: 7219.063477, avg loss: 3.927673, ppl: 50.788658 +epoch: 0, batch: 6751, sum loss: 7815.876953, avg loss: 4.299162, ppl: 73.638084 +epoch: 0, batch: 6752, sum loss: 6903.884766, avg loss: 4.317626, ppl: 75.010376 +epoch: 0, batch: 6753, sum loss: 7231.275391, avg loss: 4.062514, ppl: 58.120258 +epoch: 0, batch: 6754, sum loss: 5654.467285, avg loss: 3.857072, ppl: 47.326569 +epoch: 0, batch: 6755, sum loss: 7302.425781, avg loss: 4.243129, ppl: 69.625389 +epoch: 0, batch: 6756, sum loss: 7516.614746, avg loss: 4.322378, ppl: 75.367615 +epoch: 0, batch: 6757, sum loss: 6386.603516, avg loss: 4.086119, ppl: 59.508472 +epoch: 0, batch: 6758, sum loss: 8807.028320, avg loss: 4.252549, ppl: 70.284317 +epoch: 0, batch: 6759, sum loss: 6922.938965, avg loss: 4.160420, ppl: 64.098465 +epoch: 0, batch: 6760, sum loss: 7312.358398, avg loss: 4.103456, ppl: 60.549187 +epoch: 0, batch: 6761, sum loss: 7536.726074, avg loss: 4.241264, ppl: 69.495628 +epoch: 0, batch: 6762, sum loss: 7930.823242, avg loss: 4.079642, ppl: 59.124290 +epoch: 0, batch: 6763, sum loss: 6372.580078, avg loss: 4.176003, ppl: 65.105103 +epoch: 0, batch: 6764, sum loss: 6274.023926, avg loss: 4.106037, ppl: 60.705643 +epoch: 0, batch: 6765, sum loss: 6513.013672, avg loss: 4.119553, ppl: 61.531738 +epoch: 0, batch: 6766, sum loss: 6971.629883, avg loss: 4.266603, ppl: 71.279121 +epoch: 0, batch: 6767, sum loss: 7632.011719, avg loss: 4.214253, ppl: 67.643616 +epoch: 0, batch: 6768, sum loss: 6271.639648, avg loss: 4.069850, ppl: 58.548206 +epoch: 0, batch: 6769, sum loss: 7139.773926, avg loss: 3.977590, ppl: 53.388218 +epoch: 0, batch: 6770, sum loss: 7131.849609, avg loss: 4.207581, ppl: 67.193802 +epoch: 0, batch: 6771, sum loss: 7406.931641, avg loss: 4.156528, ppl: 63.849422 +epoch: 0, batch: 6772, sum loss: 7291.379883, avg loss: 4.064314, ppl: 58.224976 +epoch: 0, batch: 6773, sum loss: 6907.733398, avg loss: 4.176381, ppl: 65.129700 +epoch: 0, batch: 6774, sum loss: 7062.878418, avg loss: 4.211615, ppl: 67.465416 +epoch: 0, batch: 6775, sum loss: 7530.459961, avg loss: 4.235354, ppl: 69.086159 +epoch: 0, batch: 6776, sum loss: 6778.243652, avg loss: 4.163540, ppl: 64.298759 +epoch: 0, batch: 6777, sum loss: 7346.605957, avg loss: 4.205269, ppl: 67.038651 +epoch: 0, batch: 6778, sum loss: 6998.330078, avg loss: 4.143476, ppl: 63.021473 +epoch: 0, batch: 6779, sum loss: 7054.784180, avg loss: 4.211812, ppl: 67.478668 +epoch: 0, batch: 6780, sum loss: 5488.035156, avg loss: 4.104738, ppl: 60.626873 +epoch: 0, batch: 6781, sum loss: 6628.355957, avg loss: 4.044146, ppl: 57.062439 +epoch: 0, batch: 6782, sum loss: 6062.714355, avg loss: 3.908907, ppl: 49.844429 +epoch: 0, batch: 6783, sum loss: 6620.436523, avg loss: 4.096805, ppl: 60.147785 +epoch: 0, batch: 6784, sum loss: 6180.200195, avg loss: 4.139451, ppl: 62.768353 +epoch: 0, batch: 6785, sum loss: 7045.799316, avg loss: 4.023872, ppl: 55.917194 +epoch: 0, batch: 6786, sum loss: 6390.644043, avg loss: 3.880172, ppl: 48.432556 +epoch: 0, batch: 6787, sum loss: 6761.912109, avg loss: 4.015388, ppl: 55.444778 +epoch: 0, batch: 6788, sum loss: 7769.197266, avg loss: 4.136953, ppl: 62.611744 +epoch: 0, batch: 6789, sum loss: 5840.799805, avg loss: 3.886094, ppl: 48.720230 +epoch: 0, batch: 6790, sum loss: 6665.962891, avg loss: 4.030208, ppl: 56.272594 +epoch: 0, batch: 6791, sum loss: 7100.360352, avg loss: 4.324215, ppl: 75.506210 +epoch: 0, batch: 6792, sum loss: 6876.672852, avg loss: 4.078691, ppl: 59.068100 +epoch: 0, batch: 6793, sum loss: 6255.379883, avg loss: 4.126240, ppl: 61.944588 +epoch: 0, batch: 6794, sum loss: 7565.079590, avg loss: 4.195829, ppl: 66.408752 +epoch: 0, batch: 6795, sum loss: 7351.796875, avg loss: 4.450241, ppl: 85.647552 +epoch: 0, batch: 6796, sum loss: 6180.188477, avg loss: 4.044626, ppl: 57.089844 +epoch: 0, batch: 6797, sum loss: 7883.149414, avg loss: 4.263466, ppl: 71.055862 +epoch: 0, batch: 6798, sum loss: 6638.418457, avg loss: 4.260859, ppl: 70.870834 +epoch: 0, batch: 6799, sum loss: 8560.607422, avg loss: 4.433251, ppl: 84.204712 +epoch: 0, batch: 6800, sum loss: 7025.154297, avg loss: 4.098690, ppl: 60.261295 +epoch: 0, batch: 6801, sum loss: 5907.514648, avg loss: 4.265354, ppl: 71.190094 +epoch: 0, batch: 6802, sum loss: 7722.017578, avg loss: 4.259249, ppl: 70.756805 +epoch: 0, batch: 6803, sum loss: 6078.746094, avg loss: 3.988678, ppl: 53.983501 +epoch: 0, batch: 6804, sum loss: 6190.547852, avg loss: 3.955621, ppl: 52.228142 +epoch: 0, batch: 6805, sum loss: 6321.020020, avg loss: 4.262320, ppl: 70.974419 +epoch: 0, batch: 6806, sum loss: 5902.013672, avg loss: 3.870173, ppl: 47.950680 +epoch: 0, batch: 6807, sum loss: 6983.162109, avg loss: 4.232220, ppl: 68.869934 +epoch: 0, batch: 6808, sum loss: 7948.629883, avg loss: 4.391509, ppl: 80.762161 +epoch: 0, batch: 6809, sum loss: 7867.132324, avg loss: 4.180198, ppl: 65.378777 +epoch: 0, batch: 6810, sum loss: 7016.755859, avg loss: 4.237171, ppl: 69.211784 +epoch: 0, batch: 6811, sum loss: 6782.628418, avg loss: 4.199770, ppl: 66.670990 +epoch: 0, batch: 6812, sum loss: 7023.518555, avg loss: 4.168260, ppl: 64.602951 +epoch: 0, batch: 6813, sum loss: 6720.268066, avg loss: 4.130466, ppl: 62.206905 +epoch: 0, batch: 6814, sum loss: 7091.572266, avg loss: 4.115829, ppl: 61.302982 +epoch: 0, batch: 6815, sum loss: 6490.331543, avg loss: 4.046341, ppl: 57.187847 +epoch: 0, batch: 6816, sum loss: 6937.763672, avg loss: 4.266767, ppl: 71.290779 +epoch: 0, batch: 6817, sum loss: 7841.774902, avg loss: 4.405491, ppl: 81.899376 +epoch: 0, batch: 6818, sum loss: 6929.793457, avg loss: 4.174574, ppl: 65.012161 +epoch: 0, batch: 6819, sum loss: 7946.363281, avg loss: 4.279140, ppl: 72.178337 +epoch: 0, batch: 6820, sum loss: 6669.518555, avg loss: 4.396518, ppl: 81.167770 +epoch: 0, batch: 6821, sum loss: 6094.136230, avg loss: 3.903995, ppl: 49.600208 +epoch: 0, batch: 6822, sum loss: 6670.204102, avg loss: 4.168878, ppl: 64.642853 +epoch: 0, batch: 6823, sum loss: 6773.287598, avg loss: 4.117500, ppl: 61.405525 +epoch: 0, batch: 6824, sum loss: 6506.061035, avg loss: 3.877271, ppl: 48.292255 +epoch: 0, batch: 6825, sum loss: 5926.399414, avg loss: 3.789258, ppl: 44.223576 +epoch: 0, batch: 6826, sum loss: 7608.987305, avg loss: 4.210839, ppl: 67.413094 +epoch: 0, batch: 6827, sum loss: 6972.655273, avg loss: 4.175243, ppl: 65.055641 +epoch: 0, batch: 6828, sum loss: 6500.997070, avg loss: 4.068209, ppl: 58.452164 +epoch: 0, batch: 6829, sum loss: 6286.668457, avg loss: 4.019609, ppl: 55.679329 +epoch: 0, batch: 6830, sum loss: 7609.003906, avg loss: 4.272321, ppl: 71.687843 +epoch: 0, batch: 6831, sum loss: 7546.675293, avg loss: 4.218376, ppl: 67.923096 +epoch: 0, batch: 6832, sum loss: 6631.791016, avg loss: 4.194681, ppl: 66.332581 +epoch: 0, batch: 6833, sum loss: 7472.587891, avg loss: 4.393055, ppl: 80.887184 +epoch: 0, batch: 6834, sum loss: 7906.438477, avg loss: 4.264530, ppl: 71.131462 +epoch: 0, batch: 6835, sum loss: 6491.282227, avg loss: 4.014399, ppl: 55.389973 +epoch: 0, batch: 6836, sum loss: 6953.884766, avg loss: 4.078525, ppl: 59.058300 +epoch: 0, batch: 6837, sum loss: 7972.374023, avg loss: 4.198196, ppl: 66.566132 +epoch: 0, batch: 6838, sum loss: 7054.037598, avg loss: 4.166591, ppl: 64.495193 +epoch: 0, batch: 6839, sum loss: 7752.454102, avg loss: 4.080239, ppl: 59.159626 +epoch: 0, batch: 6840, sum loss: 6384.013672, avg loss: 3.992504, ppl: 54.190392 +epoch: 0, batch: 6841, sum loss: 8918.285156, avg loss: 4.578175, ppl: 97.336601 +epoch: 0, batch: 6842, sum loss: 7097.289062, avg loss: 4.157756, ppl: 63.927898 +epoch: 0, batch: 6843, sum loss: 6426.625000, avg loss: 3.918674, ppl: 50.333645 +epoch: 0, batch: 6844, sum loss: 6471.780273, avg loss: 4.116909, ppl: 61.369228 +epoch: 0, batch: 6845, sum loss: 6586.748047, avg loss: 3.963146, ppl: 52.622604 +epoch: 0, batch: 6846, sum loss: 7764.091309, avg loss: 4.031200, ppl: 56.328487 +epoch: 0, batch: 6847, sum loss: 6303.473633, avg loss: 4.144296, ppl: 63.073185 +epoch: 0, batch: 6848, sum loss: 6632.033203, avg loss: 4.251304, ppl: 70.196869 +epoch: 0, batch: 6849, sum loss: 6907.066406, avg loss: 4.140927, ppl: 62.861088 +epoch: 0, batch: 6850, sum loss: 9449.699219, avg loss: 4.411624, ppl: 82.403214 +epoch: 0, batch: 6851, sum loss: 7650.459473, avg loss: 4.484443, ppl: 88.627548 +epoch: 0, batch: 6852, sum loss: 8049.065918, avg loss: 4.429866, ppl: 83.920158 +epoch: 0, batch: 6853, sum loss: 7528.188965, avg loss: 4.316622, ppl: 74.935051 +epoch: 0, batch: 6854, sum loss: 8159.500977, avg loss: 4.505523, ppl: 90.515648 +epoch: 0, batch: 6855, sum loss: 7433.858398, avg loss: 4.297028, ppl: 73.481087 +epoch: 0, batch: 6856, sum loss: 6766.831543, avg loss: 4.231915, ppl: 68.848915 +epoch: 0, batch: 6857, sum loss: 7509.974609, avg loss: 4.467564, ppl: 87.144150 +epoch: 0, batch: 6858, sum loss: 8305.654297, avg loss: 4.341691, ppl: 76.837326 +epoch: 0, batch: 6859, sum loss: 6170.039062, avg loss: 4.132645, ppl: 62.342609 +epoch: 0, batch: 6860, sum loss: 7553.650879, avg loss: 4.304074, ppl: 74.000679 +epoch: 0, batch: 6861, sum loss: 6463.512207, avg loss: 4.065102, ppl: 58.270859 +epoch: 0, batch: 6862, sum loss: 7132.153320, avg loss: 4.022647, ppl: 55.848763 +epoch: 0, batch: 6863, sum loss: 7463.786621, avg loss: 3.987066, ppl: 53.896515 +epoch: 0, batch: 6864, sum loss: 6716.289551, avg loss: 4.063091, ppl: 58.153774 +epoch: 0, batch: 6865, sum loss: 6489.959473, avg loss: 4.258503, ppl: 70.704094 +epoch: 0, batch: 6866, sum loss: 6674.503906, avg loss: 3.989542, ppl: 54.030151 +epoch: 0, batch: 6867, sum loss: 7982.196289, avg loss: 4.252635, ppl: 70.290421 +epoch: 0, batch: 6868, sum loss: 6052.041016, avg loss: 4.114236, ppl: 61.205429 +epoch: 0, batch: 6869, sum loss: 7258.649902, avg loss: 4.126578, ppl: 61.965504 +epoch: 0, batch: 6870, sum loss: 6960.525391, avg loss: 4.070483, ppl: 58.585236 +epoch: 0, batch: 6871, sum loss: 7053.656250, avg loss: 4.233887, ppl: 68.984871 +epoch: 0, batch: 6872, sum loss: 7439.442871, avg loss: 3.874710, ppl: 48.168732 +epoch: 0, batch: 6873, sum loss: 7260.529297, avg loss: 4.106635, ppl: 60.741955 +epoch: 0, batch: 6874, sum loss: 5354.458984, avg loss: 4.013837, ppl: 55.358894 +epoch: 0, batch: 6875, sum loss: 8399.620117, avg loss: 4.131638, ppl: 62.279858 +epoch: 0, batch: 6876, sum loss: 6154.477051, avg loss: 3.741323, ppl: 42.153732 +epoch: 0, batch: 6877, sum loss: 8256.443359, avg loss: 4.275734, ppl: 71.932953 +epoch: 0, batch: 6878, sum loss: 6642.198242, avg loss: 4.141021, ppl: 62.866993 +epoch: 0, batch: 6879, sum loss: 7837.114746, avg loss: 4.135681, ppl: 62.532139 +epoch: 0, batch: 6880, sum loss: 7474.452148, avg loss: 4.251679, ppl: 70.223213 +epoch: 0, batch: 6881, sum loss: 6040.762207, avg loss: 3.899782, ppl: 49.391678 +epoch: 0, batch: 6882, sum loss: 6078.260254, avg loss: 3.881392, ppl: 48.491680 +epoch: 0, batch: 6883, sum loss: 8397.840820, avg loss: 4.360250, ppl: 78.276703 +epoch: 0, batch: 6884, sum loss: 6649.207520, avg loss: 4.208359, ppl: 67.246117 +epoch: 0, batch: 6885, sum loss: 6302.549316, avg loss: 4.040096, ppl: 56.831787 +epoch: 0, batch: 6886, sum loss: 7014.947266, avg loss: 3.954311, ppl: 52.159725 +epoch: 0, batch: 6887, sum loss: 8152.614746, avg loss: 4.226343, ppl: 68.466370 +epoch: 0, batch: 6888, sum loss: 7134.110840, avg loss: 4.326325, ppl: 75.665733 +epoch: 0, batch: 6889, sum loss: 7334.546387, avg loss: 4.242074, ppl: 69.551987 +epoch: 0, batch: 6890, sum loss: 6567.304688, avg loss: 4.130380, ppl: 62.201565 +epoch: 0, batch: 6891, sum loss: 6088.272949, avg loss: 4.116479, ppl: 61.342899 +epoch: 0, batch: 6892, sum loss: 7587.626953, avg loss: 4.189744, ppl: 66.005920 +epoch: 0, batch: 6893, sum loss: 9097.691406, avg loss: 4.200227, ppl: 66.701454 +epoch: 0, batch: 6894, sum loss: 6278.029297, avg loss: 4.300020, ppl: 73.701286 +epoch: 0, batch: 6895, sum loss: 6516.693359, avg loss: 4.177368, ppl: 65.194016 +epoch: 0, batch: 6896, sum loss: 6788.150391, avg loss: 4.079417, ppl: 59.111012 +epoch: 0, batch: 6897, sum loss: 6524.799805, avg loss: 4.137476, ppl: 62.644505 +epoch: 0, batch: 6898, sum loss: 6762.428711, avg loss: 4.105907, ppl: 60.697769 +epoch: 0, batch: 6899, sum loss: 6688.732910, avg loss: 4.041531, ppl: 56.913387 +epoch: 0, batch: 6900, sum loss: 5932.731934, avg loss: 3.864972, ppl: 47.701931 +epoch: 0, batch: 6901, sum loss: 6530.099609, avg loss: 4.096675, ppl: 60.139984 +epoch: 0, batch: 6902, sum loss: 6013.108887, avg loss: 4.014091, ppl: 55.372940 +epoch: 0, batch: 6903, sum loss: 7717.822754, avg loss: 4.156070, ppl: 63.820232 +epoch: 0, batch: 6904, sum loss: 6443.453125, avg loss: 4.267188, ppl: 71.320770 +epoch: 0, batch: 6905, sum loss: 7269.529785, avg loss: 4.209340, ppl: 67.312103 +epoch: 0, batch: 6906, sum loss: 5824.136230, avg loss: 3.914070, ppl: 50.102463 +epoch: 0, batch: 6907, sum loss: 6693.320312, avg loss: 3.946533, ppl: 51.755630 +epoch: 0, batch: 6908, sum loss: 7429.158691, avg loss: 4.086446, ppl: 59.527943 +epoch: 0, batch: 6909, sum loss: 6562.944336, avg loss: 4.081433, ppl: 59.230274 +epoch: 0, batch: 6910, sum loss: 5663.584961, avg loss: 3.980031, ppl: 53.518719 +epoch: 0, batch: 6911, sum loss: 6276.121582, avg loss: 3.915235, ppl: 50.160847 +epoch: 0, batch: 6912, sum loss: 6564.822266, avg loss: 4.333216, ppl: 76.188934 +epoch: 0, batch: 6913, sum loss: 8098.826172, avg loss: 4.273787, ppl: 71.793007 +epoch: 0, batch: 6914, sum loss: 7315.295898, avg loss: 4.154058, ppl: 63.691936 +epoch: 0, batch: 6915, sum loss: 6830.833984, avg loss: 4.237490, ppl: 69.233871 +epoch: 0, batch: 6916, sum loss: 6950.290527, avg loss: 4.033831, ppl: 56.476841 +epoch: 0, batch: 6917, sum loss: 6430.461914, avg loss: 4.036699, ppl: 56.639084 +epoch: 0, batch: 6918, sum loss: 5579.080078, avg loss: 3.951190, ppl: 51.997192 +epoch: 0, batch: 6919, sum loss: 6003.081543, avg loss: 4.015439, ppl: 55.447632 +epoch: 0, batch: 6920, sum loss: 7384.358887, avg loss: 4.164895, ppl: 64.385925 +epoch: 0, batch: 6921, sum loss: 6324.393555, avg loss: 4.048908, ppl: 57.334801 +epoch: 0, batch: 6922, sum loss: 7296.828613, avg loss: 4.348527, ppl: 77.364418 +epoch: 0, batch: 6923, sum loss: 6913.113770, avg loss: 3.982208, ppl: 53.635357 +epoch: 0, batch: 6924, sum loss: 7761.200195, avg loss: 4.357777, ppl: 78.083336 +epoch: 0, batch: 6925, sum loss: 6691.884766, avg loss: 4.065544, ppl: 58.296593 +epoch: 0, batch: 6926, sum loss: 6779.541016, avg loss: 3.852012, ppl: 47.087704 +epoch: 0, batch: 6927, sum loss: 6885.146484, avg loss: 3.954708, ppl: 52.180447 +epoch: 0, batch: 6928, sum loss: 7129.466797, avg loss: 4.060061, ppl: 57.977848 +epoch: 0, batch: 6929, sum loss: 7629.925293, avg loss: 4.327808, ppl: 75.777992 +epoch: 0, batch: 6930, sum loss: 5545.901367, avg loss: 3.933264, ppl: 51.073387 +epoch: 0, batch: 6931, sum loss: 7312.657715, avg loss: 4.164384, ppl: 64.353020 +epoch: 0, batch: 6932, sum loss: 6694.782227, avg loss: 4.264193, ppl: 71.107483 +epoch: 0, batch: 6933, sum loss: 7913.142578, avg loss: 4.272755, ppl: 71.718964 +epoch: 0, batch: 6934, sum loss: 7569.431641, avg loss: 4.131786, ppl: 62.289062 +epoch: 0, batch: 6935, sum loss: 6262.450684, avg loss: 3.958565, ppl: 52.382114 +epoch: 0, batch: 6936, sum loss: 7259.156250, avg loss: 4.101218, ppl: 60.413841 +epoch: 0, batch: 6937, sum loss: 7313.094727, avg loss: 3.968038, ppl: 52.880695 +epoch: 0, batch: 6938, sum loss: 5638.608887, avg loss: 4.001852, ppl: 54.699337 +epoch: 0, batch: 6939, sum loss: 6831.738281, avg loss: 4.044842, ppl: 57.102177 +epoch: 0, batch: 6940, sum loss: 7316.631836, avg loss: 4.234162, ppl: 69.003822 +epoch: 0, batch: 6941, sum loss: 6925.313965, avg loss: 4.261732, ppl: 70.932709 +epoch: 0, batch: 6942, sum loss: 7632.356445, avg loss: 4.193603, ppl: 66.261070 +epoch: 0, batch: 6943, sum loss: 5986.202637, avg loss: 3.922807, ppl: 50.542091 +epoch: 0, batch: 6944, sum loss: 7512.320312, avg loss: 4.239459, ppl: 69.370285 +epoch: 0, batch: 6945, sum loss: 7289.631348, avg loss: 4.170270, ppl: 64.732925 +epoch: 0, batch: 6946, sum loss: 6407.665039, avg loss: 3.669911, ppl: 39.248428 +epoch: 0, batch: 6947, sum loss: 6521.131836, avg loss: 4.022907, ppl: 55.863251 +epoch: 0, batch: 6948, sum loss: 7737.510742, avg loss: 4.144355, ppl: 63.076912 +epoch: 0, batch: 6949, sum loss: 6444.308105, avg loss: 4.025177, ppl: 55.990219 +epoch: 0, batch: 6950, sum loss: 7433.426270, avg loss: 4.296778, ppl: 73.462730 +epoch: 0, batch: 6951, sum loss: 6297.116699, avg loss: 4.036613, ppl: 56.634224 +epoch: 0, batch: 6952, sum loss: 5943.313477, avg loss: 3.999538, ppl: 54.572929 +epoch: 0, batch: 6953, sum loss: 7648.033691, avg loss: 4.397949, ppl: 81.284004 +epoch: 0, batch: 6954, sum loss: 6763.473145, avg loss: 4.256433, ppl: 70.557892 +epoch: 0, batch: 6955, sum loss: 7490.817383, avg loss: 4.173157, ppl: 64.920097 +epoch: 0, batch: 6956, sum loss: 6598.643555, avg loss: 4.108744, ppl: 60.870224 +epoch: 0, batch: 6957, sum loss: 6281.906738, avg loss: 4.029447, ppl: 56.229786 +epoch: 0, batch: 6958, sum loss: 7460.830078, avg loss: 4.101611, ppl: 60.437553 +epoch: 0, batch: 6959, sum loss: 7195.631836, avg loss: 4.324298, ppl: 75.512474 +epoch: 0, batch: 6960, sum loss: 7985.096191, avg loss: 4.209329, ppl: 67.311333 +epoch: 0, batch: 6961, sum loss: 8024.155273, avg loss: 4.081462, ppl: 59.232025 +epoch: 0, batch: 6962, sum loss: 6513.565918, avg loss: 4.040674, ppl: 56.864643 +epoch: 0, batch: 6963, sum loss: 7054.273438, avg loss: 4.142263, ppl: 62.945103 +epoch: 0, batch: 6964, sum loss: 6459.333008, avg loss: 4.451643, ppl: 85.767700 +epoch: 0, batch: 6965, sum loss: 8349.814453, avg loss: 4.380805, ppl: 79.902328 +epoch: 0, batch: 6966, sum loss: 7635.090820, avg loss: 4.272575, ppl: 71.706032 +epoch: 0, batch: 6967, sum loss: 7301.459473, avg loss: 4.198654, ppl: 66.596642 +epoch: 0, batch: 6968, sum loss: 8030.171875, avg loss: 4.275917, ppl: 71.946053 +epoch: 0, batch: 6969, sum loss: 6955.321289, avg loss: 4.220462, ppl: 68.064911 +epoch: 0, batch: 6970, sum loss: 6669.833008, avg loss: 4.091922, ppl: 59.854839 +epoch: 0, batch: 6971, sum loss: 6440.483887, avg loss: 4.104834, ppl: 60.632656 +epoch: 0, batch: 6972, sum loss: 7227.471191, avg loss: 4.151333, ppl: 63.518635 +epoch: 0, batch: 6973, sum loss: 7504.126465, avg loss: 4.093904, ppl: 59.973602 +epoch: 0, batch: 6974, sum loss: 7703.752441, avg loss: 4.239820, ppl: 69.395363 +epoch: 0, batch: 6975, sum loss: 6483.479980, avg loss: 4.075098, ppl: 58.856224 +epoch: 0, batch: 6976, sum loss: 6324.638184, avg loss: 3.889691, ppl: 48.895794 +epoch: 0, batch: 6977, sum loss: 7166.435547, avg loss: 4.288711, ppl: 72.872482 +epoch: 0, batch: 6978, sum loss: 7546.077148, avg loss: 4.218042, ppl: 67.900398 +epoch: 0, batch: 6979, sum loss: 8541.171875, avg loss: 4.359965, ppl: 78.254417 +epoch: 0, batch: 6980, sum loss: 6430.701172, avg loss: 3.876251, ppl: 48.243034 +epoch: 0, batch: 6981, sum loss: 7286.363770, avg loss: 3.994717, ppl: 54.310474 +epoch: 0, batch: 6982, sum loss: 6345.186523, avg loss: 4.051843, ppl: 57.503349 +epoch: 0, batch: 6983, sum loss: 7234.601562, avg loss: 4.213513, ppl: 67.593575 +epoch: 0, batch: 6984, sum loss: 7027.873047, avg loss: 4.124339, ppl: 61.826904 +epoch: 0, batch: 6985, sum loss: 6700.230957, avg loss: 3.811280, ppl: 45.208279 +epoch: 0, batch: 6986, sum loss: 8238.940430, avg loss: 4.472824, ppl: 87.603737 +epoch: 0, batch: 6987, sum loss: 8390.638672, avg loss: 4.413803, ppl: 82.582901 +epoch: 0, batch: 6988, sum loss: 6184.852539, avg loss: 3.642434, ppl: 38.184662 +epoch: 0, batch: 6989, sum loss: 5507.457031, avg loss: 4.011258, ppl: 55.216270 +epoch: 0, batch: 6990, sum loss: 6746.556641, avg loss: 4.286249, ppl: 72.693298 +epoch: 0, batch: 6991, sum loss: 6606.666504, avg loss: 3.881708, ppl: 48.506977 +epoch: 0, batch: 6992, sum loss: 7097.123047, avg loss: 3.971529, ppl: 53.065632 +epoch: 0, batch: 6993, sum loss: 7364.295898, avg loss: 4.111835, ppl: 61.058628 +epoch: 0, batch: 6994, sum loss: 6134.773926, avg loss: 3.848666, ppl: 46.930424 +epoch: 0, batch: 6995, sum loss: 6658.198730, avg loss: 3.998918, ppl: 54.539124 +epoch: 0, batch: 6996, sum loss: 7320.407715, avg loss: 4.126498, ppl: 61.960541 +epoch: 0, batch: 6997, sum loss: 6332.439941, avg loss: 4.516719, ppl: 91.534767 +epoch: 0, batch: 6998, sum loss: 5978.302246, avg loss: 3.788531, ppl: 44.191437 +epoch: 0, batch: 6999, sum loss: 6753.428711, avg loss: 4.143208, ppl: 63.004589 +epoch: 0, batch: 7000, sum loss: 8560.901367, avg loss: 4.378978, ppl: 79.756462 +epoch: 0, batch: 7001, sum loss: 6275.179199, avg loss: 4.009699, ppl: 55.130268 +epoch: 0, batch: 7002, sum loss: 6713.625000, avg loss: 4.141656, ppl: 62.906933 +epoch: 0, batch: 7003, sum loss: 6690.513184, avg loss: 4.011099, ppl: 55.207527 +epoch: 0, batch: 7004, sum loss: 8051.526367, avg loss: 4.321807, ppl: 75.324608 +epoch: 0, batch: 7005, sum loss: 6956.760254, avg loss: 4.296949, ppl: 73.475273 +epoch: 0, batch: 7006, sum loss: 8685.778320, avg loss: 4.206188, ppl: 67.100281 +epoch: 0, batch: 7007, sum loss: 6231.228027, avg loss: 4.102191, ppl: 60.472637 +epoch: 0, batch: 7008, sum loss: 6764.616699, avg loss: 4.316922, ppl: 74.957565 +epoch: 0, batch: 7009, sum loss: 7434.182617, avg loss: 4.267613, ppl: 71.351143 +epoch: 0, batch: 7010, sum loss: 7502.471191, avg loss: 4.217241, ppl: 67.846024 +epoch: 0, batch: 7011, sum loss: 7128.660645, avg loss: 4.137354, ppl: 62.636860 +epoch: 0, batch: 7012, sum loss: 7299.586914, avg loss: 4.114761, ppl: 61.237598 +epoch: 0, batch: 7013, sum loss: 6982.576172, avg loss: 4.312894, ppl: 74.656258 +epoch: 0, batch: 7014, sum loss: 6238.860840, avg loss: 4.137176, ppl: 62.625690 +epoch: 0, batch: 7015, sum loss: 5995.863770, avg loss: 4.123703, ppl: 61.787590 +epoch: 0, batch: 7016, sum loss: 7118.391602, avg loss: 4.217057, ppl: 67.833542 +epoch: 0, batch: 7017, sum loss: 7504.751953, avg loss: 4.032645, ppl: 56.409904 +epoch: 0, batch: 7018, sum loss: 7504.285156, avg loss: 4.134593, ppl: 62.464134 +epoch: 0, batch: 7019, sum loss: 7602.601562, avg loss: 4.339385, ppl: 76.660347 +epoch: 0, batch: 7020, sum loss: 7555.967773, avg loss: 4.228298, ppl: 68.600357 +epoch: 0, batch: 7021, sum loss: 7160.958984, avg loss: 4.168195, ppl: 64.598763 +epoch: 0, batch: 7022, sum loss: 6862.083008, avg loss: 4.091880, ppl: 59.852299 +epoch: 0, batch: 7023, sum loss: 6601.955566, avg loss: 4.032960, ppl: 56.427689 +epoch: 0, batch: 7024, sum loss: 6698.557617, avg loss: 3.977766, ppl: 53.397614 +epoch: 0, batch: 7025, sum loss: 6909.377930, avg loss: 4.038211, ppl: 56.724766 +epoch: 0, batch: 7026, sum loss: 6679.874023, avg loss: 3.973750, ppl: 53.183601 +epoch: 0, batch: 7027, sum loss: 5704.503418, avg loss: 3.772820, ppl: 43.502567 +epoch: 0, batch: 7028, sum loss: 6742.607910, avg loss: 4.237968, ppl: 69.266991 +epoch: 0, batch: 7029, sum loss: 6880.510742, avg loss: 4.064094, ppl: 58.212147 +epoch: 0, batch: 7030, sum loss: 6926.662598, avg loss: 4.137792, ppl: 62.664284 +epoch: 0, batch: 7031, sum loss: 6510.097656, avg loss: 4.089257, ppl: 59.695538 +epoch: 0, batch: 7032, sum loss: 6941.233398, avg loss: 4.204260, ppl: 66.971046 +epoch: 0, batch: 7033, sum loss: 7843.122070, avg loss: 4.352454, ppl: 77.668846 +epoch: 0, batch: 7034, sum loss: 7060.966797, avg loss: 3.962383, ppl: 52.582497 +epoch: 0, batch: 7035, sum loss: 6622.850098, avg loss: 4.001722, ppl: 54.692268 +epoch: 0, batch: 7036, sum loss: 5208.764648, avg loss: 3.752712, ppl: 42.636566 +epoch: 0, batch: 7037, sum loss: 8376.685547, avg loss: 4.474725, ppl: 87.770485 +epoch: 0, batch: 7038, sum loss: 6379.476562, avg loss: 3.928249, ppl: 50.817924 +epoch: 0, batch: 7039, sum loss: 6599.456055, avg loss: 4.187472, ppl: 65.856087 +epoch: 0, batch: 7040, sum loss: 7003.714355, avg loss: 3.837652, ppl: 46.416348 +epoch: 0, batch: 7041, sum loss: 7302.450195, avg loss: 4.063690, ppl: 58.188614 +epoch: 0, batch: 7042, sum loss: 6494.012207, avg loss: 4.056222, ppl: 57.755722 +epoch: 0, batch: 7043, sum loss: 5885.002930, avg loss: 3.992539, ppl: 54.192329 +epoch: 0, batch: 7044, sum loss: 6617.381836, avg loss: 4.128123, ppl: 62.061340 +epoch: 0, batch: 7045, sum loss: 7533.442383, avg loss: 4.069931, ppl: 58.552925 +epoch: 0, batch: 7046, sum loss: 6525.771973, avg loss: 4.135470, ppl: 62.518993 +epoch: 0, batch: 7047, sum loss: 7515.965820, avg loss: 4.053919, ppl: 57.622829 +epoch: 0, batch: 7048, sum loss: 7226.976562, avg loss: 4.046460, ppl: 57.194611 +epoch: 0, batch: 7049, sum loss: 6502.240234, avg loss: 3.969621, ppl: 52.964478 +epoch: 0, batch: 7050, sum loss: 7488.928223, avg loss: 4.423466, ppl: 83.384811 +epoch: 0, batch: 7051, sum loss: 8000.240723, avg loss: 4.280492, ppl: 72.276016 +epoch: 0, batch: 7052, sum loss: 9587.476562, avg loss: 4.434541, ppl: 84.313393 +epoch: 0, batch: 7053, sum loss: 6903.781250, avg loss: 4.156401, ppl: 63.841324 +epoch: 0, batch: 7054, sum loss: 7321.280762, avg loss: 3.991974, ppl: 54.161705 +epoch: 0, batch: 7055, sum loss: 6389.984375, avg loss: 4.093520, ppl: 59.950527 +epoch: 0, batch: 7056, sum loss: 8227.331055, avg loss: 4.155218, ppl: 63.765842 +epoch: 0, batch: 7057, sum loss: 5967.586914, avg loss: 4.121262, ppl: 61.636982 +epoch: 0, batch: 7058, sum loss: 5417.930664, avg loss: 4.101386, ppl: 60.423954 +epoch: 0, batch: 7059, sum loss: 7038.836914, avg loss: 4.068692, ppl: 58.480404 +epoch: 0, batch: 7060, sum loss: 8364.396484, avg loss: 4.245887, ppl: 69.817650 +epoch: 0, batch: 7061, sum loss: 7229.261230, avg loss: 4.260025, ppl: 70.811722 +epoch: 0, batch: 7062, sum loss: 6685.020996, avg loss: 4.098725, ppl: 60.263420 +epoch: 0, batch: 7063, sum loss: 6814.781250, avg loss: 4.204060, ppl: 66.957634 +epoch: 0, batch: 7064, sum loss: 6820.919922, avg loss: 4.271083, ppl: 71.599159 +epoch: 0, batch: 7065, sum loss: 6784.704590, avg loss: 4.159843, ppl: 64.061493 +epoch: 0, batch: 7066, sum loss: 6606.226074, avg loss: 4.047933, ppl: 57.278919 +epoch: 0, batch: 7067, sum loss: 7427.722168, avg loss: 4.090155, ppl: 59.749161 +epoch: 0, batch: 7068, sum loss: 7429.965820, avg loss: 4.066758, ppl: 58.367409 +epoch: 0, batch: 7069, sum loss: 6407.627930, avg loss: 3.883411, ppl: 48.589668 +epoch: 0, batch: 7070, sum loss: 7280.376465, avg loss: 4.310466, ppl: 74.475174 +epoch: 0, batch: 7071, sum loss: 6872.648438, avg loss: 4.145144, ppl: 63.126709 +epoch: 0, batch: 7072, sum loss: 7031.129395, avg loss: 4.092625, ppl: 59.896896 +epoch: 0, batch: 7073, sum loss: 7417.109375, avg loss: 3.879241, ppl: 48.387486 +epoch: 0, batch: 7074, sum loss: 7994.344238, avg loss: 4.174592, ppl: 65.013344 +epoch: 0, batch: 7075, sum loss: 6770.796875, avg loss: 4.081252, ppl: 59.219543 +epoch: 0, batch: 7076, sum loss: 8108.180176, avg loss: 4.175170, ppl: 65.050926 +epoch: 0, batch: 7077, sum loss: 6148.782227, avg loss: 3.859876, ppl: 47.459461 +epoch: 0, batch: 7078, sum loss: 6731.773926, avg loss: 4.191640, ppl: 66.131149 +epoch: 0, batch: 7079, sum loss: 7280.967285, avg loss: 4.198943, ppl: 66.615891 +epoch: 0, batch: 7080, sum loss: 7512.354980, avg loss: 4.189825, ppl: 66.011208 +epoch: 0, batch: 7081, sum loss: 7311.289551, avg loss: 4.077685, ppl: 59.008728 +epoch: 0, batch: 7082, sum loss: 6489.794434, avg loss: 3.918958, ppl: 50.347954 +epoch: 0, batch: 7083, sum loss: 6578.798828, avg loss: 4.093839, ppl: 59.969654 +epoch: 0, batch: 7084, sum loss: 7535.390137, avg loss: 4.018875, ppl: 55.638458 +epoch: 0, batch: 7085, sum loss: 6075.739258, avg loss: 3.958136, ppl: 52.359653 +epoch: 0, batch: 7086, sum loss: 6393.103027, avg loss: 4.008215, ppl: 55.048519 +epoch: 0, batch: 7087, sum loss: 6948.712891, avg loss: 4.009644, ppl: 55.127216 +epoch: 0, batch: 7088, sum loss: 7581.269043, avg loss: 4.049823, ppl: 57.387314 +epoch: 0, batch: 7089, sum loss: 8057.815918, avg loss: 4.144968, ppl: 63.115574 +epoch: 0, batch: 7090, sum loss: 7018.897949, avg loss: 4.285041, ppl: 72.605545 +epoch: 0, batch: 7091, sum loss: 6075.756836, avg loss: 3.811642, ppl: 45.224625 +epoch: 0, batch: 7092, sum loss: 6627.520020, avg loss: 4.043636, ppl: 57.033360 +epoch: 0, batch: 7093, sum loss: 6957.830078, avg loss: 4.199052, ppl: 66.623161 +epoch: 0, batch: 7094, sum loss: 6991.592773, avg loss: 4.219428, ppl: 67.994583 +epoch: 0, batch: 7095, sum loss: 8090.739746, avg loss: 4.218321, ppl: 67.919342 +epoch: 0, batch: 7096, sum loss: 7613.878418, avg loss: 4.301626, ppl: 73.819740 +epoch: 0, batch: 7097, sum loss: 5940.133789, avg loss: 3.895170, ppl: 49.164398 +epoch: 0, batch: 7098, sum loss: 7127.709473, avg loss: 4.110559, ppl: 60.980797 +epoch: 0, batch: 7099, sum loss: 8151.835449, avg loss: 4.167605, ppl: 64.560638 +epoch: 0, batch: 7100, sum loss: 6839.431152, avg loss: 3.994995, ppl: 54.325550 +epoch: 0, batch: 7101, sum loss: 7778.146484, avg loss: 4.287843, ppl: 72.809235 +epoch: 0, batch: 7102, sum loss: 7093.817383, avg loss: 4.039760, ppl: 56.812687 +epoch: 0, batch: 7103, sum loss: 5558.598633, avg loss: 3.854784, ppl: 47.218430 +epoch: 0, batch: 7104, sum loss: 7480.759766, avg loss: 4.248018, ppl: 69.966583 +epoch: 0, batch: 7105, sum loss: 6918.899902, avg loss: 4.249938, ppl: 70.101097 +epoch: 0, batch: 7106, sum loss: 6229.720215, avg loss: 4.026968, ppl: 56.090614 +epoch: 0, batch: 7107, sum loss: 5848.068359, avg loss: 4.153458, ppl: 63.653713 +epoch: 0, batch: 7108, sum loss: 8892.623047, avg loss: 4.464169, ppl: 86.848785 +epoch: 0, batch: 7109, sum loss: 6645.172363, avg loss: 4.069303, ppl: 58.516163 +epoch: 0, batch: 7110, sum loss: 6491.829102, avg loss: 4.095791, ppl: 60.086868 +epoch: 0, batch: 7111, sum loss: 6035.436523, avg loss: 3.989053, ppl: 54.003712 +epoch: 0, batch: 7112, sum loss: 8496.061523, avg loss: 4.436585, ppl: 84.485962 +epoch: 0, batch: 7113, sum loss: 7017.485352, avg loss: 4.056350, ppl: 57.763077 +epoch: 0, batch: 7114, sum loss: 6559.153809, avg loss: 4.043868, ppl: 57.046577 +epoch: 0, batch: 7115, sum loss: 7092.208984, avg loss: 4.290507, ppl: 73.003494 +epoch: 0, batch: 7116, sum loss: 8593.277344, avg loss: 4.153348, ppl: 63.646732 +epoch: 0, batch: 7117, sum loss: 7020.093750, avg loss: 3.831929, ppl: 46.151478 +epoch: 0, batch: 7118, sum loss: 6529.393555, avg loss: 4.132528, ppl: 62.335297 +epoch: 0, batch: 7119, sum loss: 7406.214355, avg loss: 4.461575, ppl: 86.623840 +epoch: 0, batch: 7120, sum loss: 6967.520508, avg loss: 3.999725, ppl: 54.583118 +epoch: 0, batch: 7121, sum loss: 6603.228516, avg loss: 4.031275, ppl: 56.332680 +epoch: 0, batch: 7122, sum loss: 6945.479980, avg loss: 4.049842, ppl: 57.388409 +epoch: 0, batch: 7123, sum loss: 6149.431641, avg loss: 4.110583, ppl: 60.982250 +epoch: 0, batch: 7124, sum loss: 6660.585449, avg loss: 4.199613, ppl: 66.660500 +epoch: 0, batch: 7125, sum loss: 6900.087402, avg loss: 3.970131, ppl: 52.991467 +epoch: 0, batch: 7126, sum loss: 6232.729004, avg loss: 4.081682, ppl: 59.245049 +epoch: 0, batch: 7127, sum loss: 6555.382812, avg loss: 3.925379, ppl: 50.672279 +epoch: 0, batch: 7128, sum loss: 7378.124023, avg loss: 4.173147, ppl: 64.919449 +epoch: 0, batch: 7129, sum loss: 5944.380859, avg loss: 3.952381, ppl: 52.059155 +epoch: 0, batch: 7130, sum loss: 7219.836914, avg loss: 4.277155, ppl: 72.035202 +epoch: 0, batch: 7131, sum loss: 6012.492188, avg loss: 3.899152, ppl: 49.360565 +epoch: 0, batch: 7132, sum loss: 6808.875000, avg loss: 4.131599, ppl: 62.277420 +epoch: 0, batch: 7133, sum loss: 8566.901367, avg loss: 4.395537, ppl: 81.088158 +epoch: 0, batch: 7134, sum loss: 5688.472656, avg loss: 3.964092, ppl: 52.672436 +epoch: 0, batch: 7135, sum loss: 8410.368164, avg loss: 4.378120, ppl: 79.688072 +epoch: 0, batch: 7136, sum loss: 6658.459961, avg loss: 4.120334, ppl: 61.579815 +epoch: 0, batch: 7137, sum loss: 5843.108398, avg loss: 3.786849, ppl: 44.117191 +epoch: 0, batch: 7138, sum loss: 6811.166504, avg loss: 4.100642, ppl: 60.379051 +epoch: 0, batch: 7139, sum loss: 5925.909668, avg loss: 3.868087, ppl: 47.850773 +epoch: 0, batch: 7140, sum loss: 6332.501953, avg loss: 4.074969, ppl: 58.848675 +epoch: 0, batch: 7141, sum loss: 6615.486816, avg loss: 4.165924, ppl: 64.452217 +epoch: 0, batch: 7142, sum loss: 5770.689453, avg loss: 3.821649, ppl: 45.679455 +epoch: 0, batch: 7143, sum loss: 7119.982910, avg loss: 4.173495, ppl: 64.942047 +epoch: 0, batch: 7144, sum loss: 6248.056152, avg loss: 3.768430, ppl: 43.311989 +epoch: 0, batch: 7145, sum loss: 6732.999512, avg loss: 4.070737, ppl: 58.600155 +epoch: 0, batch: 7146, sum loss: 7018.815430, avg loss: 4.218038, ppl: 67.900139 +epoch: 0, batch: 7147, sum loss: 8210.049805, avg loss: 4.307476, ppl: 74.252838 +epoch: 0, batch: 7148, sum loss: 6899.795898, avg loss: 4.230408, ppl: 68.745285 +epoch: 0, batch: 7149, sum loss: 7152.584961, avg loss: 4.195065, ppl: 66.358017 +epoch: 0, batch: 7150, sum loss: 6609.404297, avg loss: 4.231373, ppl: 68.811638 +epoch: 0, batch: 7151, sum loss: 7044.111816, avg loss: 4.090657, ppl: 59.779167 +epoch: 0, batch: 7152, sum loss: 5653.952148, avg loss: 4.029902, ppl: 56.255398 +epoch: 0, batch: 7153, sum loss: 5501.496094, avg loss: 3.817832, ppl: 45.505455 +epoch: 0, batch: 7154, sum loss: 6482.300781, avg loss: 3.989108, ppl: 54.006699 +epoch: 0, batch: 7155, sum loss: 7233.085449, avg loss: 4.126119, ppl: 61.937057 +epoch: 0, batch: 7156, sum loss: 7092.881836, avg loss: 4.083409, ppl: 59.347458 +epoch: 0, batch: 7157, sum loss: 7077.885742, avg loss: 4.072432, ppl: 58.699520 +epoch: 0, batch: 7158, sum loss: 6695.412598, avg loss: 4.048013, ppl: 57.283504 +epoch: 0, batch: 7159, sum loss: 6061.808105, avg loss: 3.790999, ppl: 44.300652 +epoch: 0, batch: 7160, sum loss: 7663.192871, avg loss: 4.093586, ppl: 59.954502 +epoch: 0, batch: 7161, sum loss: 7065.923828, avg loss: 3.918982, ppl: 50.349140 +epoch: 0, batch: 7162, sum loss: 7351.254883, avg loss: 4.139220, ppl: 62.753868 +epoch: 0, batch: 7163, sum loss: 6873.433105, avg loss: 4.128188, ppl: 62.065369 +epoch: 0, batch: 7164, sum loss: 6855.858398, avg loss: 4.051926, ppl: 57.508091 +epoch: 0, batch: 7165, sum loss: 7736.834961, avg loss: 4.232404, ppl: 68.882645 +epoch: 0, batch: 7166, sum loss: 7248.682129, avg loss: 3.941643, ppl: 51.503178 +epoch: 0, batch: 7167, sum loss: 7221.579102, avg loss: 4.270597, ppl: 71.564346 +epoch: 0, batch: 7168, sum loss: 6392.891602, avg loss: 4.025751, ppl: 56.022346 +epoch: 0, batch: 7169, sum loss: 8228.119141, avg loss: 4.153518, ppl: 63.657536 +epoch: 0, batch: 7170, sum loss: 7168.637695, avg loss: 4.105749, ppl: 60.688190 +epoch: 0, batch: 7171, sum loss: 7119.527344, avg loss: 4.250464, ppl: 70.137978 +epoch: 0, batch: 7172, sum loss: 7309.491699, avg loss: 4.088083, ppl: 59.625469 +epoch: 0, batch: 7173, sum loss: 7485.250488, avg loss: 4.306818, ppl: 74.204033 +epoch: 0, batch: 7174, sum loss: 7929.769531, avg loss: 4.153887, ppl: 63.681034 +epoch: 0, batch: 7175, sum loss: 6781.527344, avg loss: 4.085258, ppl: 59.457249 +epoch: 0, batch: 7176, sum loss: 5240.354492, avg loss: 3.830668, ppl: 46.093338 +epoch: 0, batch: 7177, sum loss: 8013.308105, avg loss: 4.355059, ppl: 77.871391 +epoch: 0, batch: 7178, sum loss: 6327.079590, avg loss: 4.068861, ppl: 58.490334 +epoch: 0, batch: 7179, sum loss: 6568.385254, avg loss: 3.895839, ppl: 49.197334 +epoch: 0, batch: 7180, sum loss: 6835.713379, avg loss: 4.180864, ppl: 65.422379 +epoch: 0, batch: 7181, sum loss: 8015.908203, avg loss: 4.129783, ppl: 62.164413 +epoch: 0, batch: 7182, sum loss: 6754.139648, avg loss: 4.166650, ppl: 64.499008 +epoch: 0, batch: 7183, sum loss: 7820.406738, avg loss: 4.031137, ppl: 56.324944 +epoch: 0, batch: 7184, sum loss: 6711.832031, avg loss: 3.824406, ppl: 45.805569 +epoch: 0, batch: 7185, sum loss: 6715.589844, avg loss: 4.122523, ppl: 61.714741 +epoch: 0, batch: 7186, sum loss: 7629.688965, avg loss: 4.210645, ppl: 67.400009 +epoch: 0, batch: 7187, sum loss: 6268.864258, avg loss: 4.070691, ppl: 58.597446 +epoch: 0, batch: 7188, sum loss: 5623.484863, avg loss: 3.766567, ppl: 43.231407 +epoch: 0, batch: 7189, sum loss: 6422.862793, avg loss: 3.628736, ppl: 37.665180 +epoch: 0, batch: 7190, sum loss: 5205.343750, avg loss: 3.931529, ppl: 50.984852 +epoch: 0, batch: 7191, sum loss: 6183.368652, avg loss: 3.802810, ppl: 44.826958 +epoch: 0, batch: 7192, sum loss: 6289.385254, avg loss: 3.721530, ppl: 41.327564 +epoch: 0, batch: 7193, sum loss: 6339.877930, avg loss: 3.997401, ppl: 54.456432 +epoch: 0, batch: 7194, sum loss: 5956.427734, avg loss: 3.772279, ppl: 43.479019 +epoch: 0, batch: 7195, sum loss: 5822.460938, avg loss: 4.034969, ppl: 56.541187 +epoch: 0, batch: 7196, sum loss: 7897.105469, avg loss: 4.158560, ppl: 63.979343 +epoch: 0, batch: 7197, sum loss: 6089.207031, avg loss: 3.966910, ppl: 52.821068 +epoch: 0, batch: 7198, sum loss: 6437.967285, avg loss: 4.046491, ppl: 57.196411 +epoch: 0, batch: 7199, sum loss: 6708.507324, avg loss: 4.075643, ppl: 58.888309 +epoch: 0, batch: 7200, sum loss: 5848.980469, avg loss: 3.907134, ppl: 49.756161 +epoch: 0, batch: 7201, sum loss: 5814.172363, avg loss: 4.111862, ppl: 61.060287 +epoch: 0, batch: 7202, sum loss: 5661.544922, avg loss: 3.736994, ppl: 41.971615 +epoch: 0, batch: 7203, sum loss: 7225.357422, avg loss: 4.282962, ppl: 72.454758 +epoch: 0, batch: 7204, sum loss: 5555.435547, avg loss: 3.799888, ppl: 44.696163 +epoch: 0, batch: 7205, sum loss: 6769.023926, avg loss: 4.036389, ppl: 56.621506 +epoch: 0, batch: 7206, sum loss: 6967.576172, avg loss: 4.154786, ppl: 63.738331 +epoch: 0, batch: 7207, sum loss: 5823.593262, avg loss: 3.851583, ppl: 47.067513 +epoch: 0, batch: 7208, sum loss: 5756.628418, avg loss: 3.794745, ppl: 44.466896 +epoch: 0, batch: 7209, sum loss: 8145.158691, avg loss: 4.205038, ppl: 67.023148 +epoch: 0, batch: 7210, sum loss: 6955.755859, avg loss: 4.046397, ppl: 57.191010 +epoch: 0, batch: 7211, sum loss: 6358.094238, avg loss: 3.936900, ppl: 51.259468 +epoch: 0, batch: 7212, sum loss: 7359.663086, avg loss: 4.158002, ppl: 63.943630 +epoch: 0, batch: 7213, sum loss: 8119.921387, avg loss: 4.396276, ppl: 81.148109 +epoch: 0, batch: 7214, sum loss: 7291.154297, avg loss: 3.899013, ppl: 49.353714 +epoch: 0, batch: 7215, sum loss: 7033.091797, avg loss: 3.944527, ppl: 51.651920 +epoch: 0, batch: 7216, sum loss: 5772.713867, avg loss: 3.863932, ppl: 47.652336 +epoch: 0, batch: 7217, sum loss: 7463.250977, avg loss: 4.056115, ppl: 57.749500 +epoch: 0, batch: 7218, sum loss: 5820.305176, avg loss: 4.139619, ppl: 62.778919 +epoch: 0, batch: 7219, sum loss: 8204.008789, avg loss: 4.366157, ppl: 78.740456 +epoch: 0, batch: 7220, sum loss: 7119.658203, avg loss: 4.222810, ppl: 68.224945 +epoch: 0, batch: 7221, sum loss: 7820.249023, avg loss: 4.264040, ppl: 71.096603 +epoch: 0, batch: 7222, sum loss: 7938.745117, avg loss: 4.128313, ppl: 62.073093 +epoch: 0, batch: 7223, sum loss: 7147.539062, avg loss: 4.199494, ppl: 66.652588 +epoch: 0, batch: 7224, sum loss: 7599.208008, avg loss: 4.136749, ppl: 62.598999 +epoch: 0, batch: 7225, sum loss: 7405.176758, avg loss: 4.127747, ppl: 62.037968 +epoch: 0, batch: 7226, sum loss: 8449.185547, avg loss: 4.095582, ppl: 60.074291 +epoch: 0, batch: 7227, sum loss: 7170.050781, avg loss: 3.978941, ppl: 53.460377 +epoch: 0, batch: 7228, sum loss: 7324.925781, avg loss: 4.000506, ppl: 54.625778 +epoch: 0, batch: 7229, sum loss: 7084.618164, avg loss: 4.090426, ppl: 59.765343 +epoch: 0, batch: 7230, sum loss: 6842.865234, avg loss: 4.369646, ppl: 79.015625 +epoch: 0, batch: 7231, sum loss: 7476.924316, avg loss: 4.076840, ppl: 58.958862 +epoch: 0, batch: 7232, sum loss: 7802.413574, avg loss: 4.199361, ppl: 66.643753 +epoch: 0, batch: 7233, sum loss: 8142.453125, avg loss: 4.225456, ppl: 68.405670 +epoch: 0, batch: 7234, sum loss: 7705.683594, avg loss: 4.087896, ppl: 59.614323 +epoch: 0, batch: 7235, sum loss: 6894.800293, avg loss: 4.008605, ppl: 55.069996 +epoch: 0, batch: 7236, sum loss: 5776.771484, avg loss: 3.874428, ppl: 48.155136 +epoch: 0, batch: 7237, sum loss: 6866.466797, avg loss: 3.868432, ppl: 47.867264 +epoch: 0, batch: 7238, sum loss: 7276.705566, avg loss: 4.083449, ppl: 59.349808 +epoch: 0, batch: 7239, sum loss: 6004.749023, avg loss: 3.884055, ppl: 48.620979 +epoch: 0, batch: 7240, sum loss: 6526.280762, avg loss: 4.051075, ppl: 57.459164 +epoch: 0, batch: 7241, sum loss: 6770.811523, avg loss: 4.329164, ppl: 75.880791 +epoch: 0, batch: 7242, sum loss: 6953.804688, avg loss: 4.214427, ppl: 67.655388 +epoch: 0, batch: 7243, sum loss: 6622.748047, avg loss: 4.118624, ppl: 61.474609 +epoch: 0, batch: 7244, sum loss: 6576.724609, avg loss: 4.207757, ppl: 67.205658 +epoch: 0, batch: 7245, sum loss: 5773.766602, avg loss: 3.831298, ppl: 46.122383 +epoch: 0, batch: 7246, sum loss: 7192.891113, avg loss: 4.160145, ppl: 64.080833 +epoch: 0, batch: 7247, sum loss: 7840.145508, avg loss: 4.436981, ppl: 84.519409 +epoch: 0, batch: 7248, sum loss: 6571.475586, avg loss: 4.004555, ppl: 54.847424 +epoch: 0, batch: 7249, sum loss: 6419.983398, avg loss: 3.919404, ppl: 50.370392 +epoch: 0, batch: 7250, sum loss: 7058.599609, avg loss: 3.969966, ppl: 52.982738 +epoch: 0, batch: 7251, sum loss: 7244.594238, avg loss: 4.095305, ppl: 60.057652 +epoch: 0, batch: 7252, sum loss: 6648.862793, avg loss: 4.064097, ppl: 58.212341 +epoch: 0, batch: 7253, sum loss: 7984.757812, avg loss: 4.147926, ppl: 63.302567 +epoch: 0, batch: 7254, sum loss: 7192.375977, avg loss: 4.332757, ppl: 76.153915 +epoch: 0, batch: 7255, sum loss: 6796.061523, avg loss: 4.131344, ppl: 62.261566 +epoch: 0, batch: 7256, sum loss: 7301.732422, avg loss: 4.189175, ppl: 65.968353 +epoch: 0, batch: 7257, sum loss: 6867.286133, avg loss: 3.969529, ppl: 52.959602 +epoch: 0, batch: 7258, sum loss: 5793.800781, avg loss: 3.811711, ppl: 45.227772 +epoch: 0, batch: 7259, sum loss: 7972.581055, avg loss: 4.171942, ppl: 64.841232 +epoch: 0, batch: 7260, sum loss: 7172.045410, avg loss: 3.762878, ppl: 43.072197 +epoch: 0, batch: 7261, sum loss: 6692.666992, avg loss: 4.019620, ppl: 55.679939 +epoch: 0, batch: 7262, sum loss: 6950.426270, avg loss: 3.964875, ppl: 52.713703 +epoch: 0, batch: 7263, sum loss: 6684.929688, avg loss: 4.014973, ppl: 55.421783 +epoch: 0, batch: 7264, sum loss: 6220.351074, avg loss: 4.031336, ppl: 56.336117 +epoch: 0, batch: 7265, sum loss: 6338.397949, avg loss: 4.156326, ppl: 63.836575 +epoch: 0, batch: 7266, sum loss: 8394.347656, avg loss: 4.287205, ppl: 72.762825 +epoch: 0, batch: 7267, sum loss: 6881.197266, avg loss: 3.907551, ppl: 49.776890 +epoch: 0, batch: 7268, sum loss: 6844.046875, avg loss: 4.209131, ppl: 67.298019 +epoch: 0, batch: 7269, sum loss: 7130.667969, avg loss: 4.119392, ppl: 61.521820 +epoch: 0, batch: 7270, sum loss: 6674.155273, avg loss: 4.234870, ppl: 69.052696 +epoch: 0, batch: 7271, sum loss: 6935.983887, avg loss: 4.106563, ppl: 60.737579 +epoch: 0, batch: 7272, sum loss: 7366.523926, avg loss: 4.173668, ppl: 64.953255 +epoch: 0, batch: 7273, sum loss: 6961.524414, avg loss: 4.104672, ppl: 60.622883 +epoch: 0, batch: 7274, sum loss: 5815.961914, avg loss: 4.058592, ppl: 57.892731 +epoch: 0, batch: 7275, sum loss: 6261.644531, avg loss: 4.141300, ppl: 62.884502 +epoch: 0, batch: 7276, sum loss: 6353.967773, avg loss: 4.205141, ppl: 67.030022 +epoch: 0, batch: 7277, sum loss: 5357.690918, avg loss: 3.885200, ppl: 48.676678 +epoch: 0, batch: 7278, sum loss: 6528.888184, avg loss: 4.057730, ppl: 57.842869 +epoch: 0, batch: 7279, sum loss: 7100.340820, avg loss: 3.949021, ppl: 51.884563 +epoch: 0, batch: 7280, sum loss: 6892.800781, avg loss: 4.045071, ppl: 57.115250 +epoch: 0, batch: 7281, sum loss: 7219.095215, avg loss: 3.997284, ppl: 54.450073 +epoch: 0, batch: 7282, sum loss: 7835.961914, avg loss: 4.188115, ppl: 65.898430 +epoch: 0, batch: 7283, sum loss: 6913.208008, avg loss: 4.016972, ppl: 55.532677 +epoch: 0, batch: 7284, sum loss: 7062.465332, avg loss: 4.229021, ppl: 68.649994 +epoch: 0, batch: 7285, sum loss: 6323.401367, avg loss: 4.025081, ppl: 55.984825 +epoch: 0, batch: 7286, sum loss: 6409.541504, avg loss: 4.059241, ppl: 57.930313 +epoch: 0, batch: 7287, sum loss: 6403.263672, avg loss: 3.992060, ppl: 54.166340 +epoch: 0, batch: 7288, sum loss: 7311.775879, avg loss: 4.168629, ppl: 64.626801 +epoch: 0, batch: 7289, sum loss: 5848.277832, avg loss: 3.652891, ppl: 38.586044 +epoch: 0, batch: 7290, sum loss: 7100.881348, avg loss: 3.897300, ppl: 49.269268 +epoch: 0, batch: 7291, sum loss: 6679.452637, avg loss: 3.872147, ppl: 48.045410 +epoch: 0, batch: 7292, sum loss: 6515.513672, avg loss: 3.880592, ppl: 48.452873 +epoch: 0, batch: 7293, sum loss: 6861.589844, avg loss: 4.128514, ppl: 62.085583 +epoch: 0, batch: 7294, sum loss: 6551.616699, avg loss: 4.009557, ppl: 55.122459 +epoch: 0, batch: 7295, sum loss: 8608.025391, avg loss: 4.289001, ppl: 72.893646 +epoch: 0, batch: 7296, sum loss: 6632.031250, avg loss: 4.163234, ppl: 64.279053 +epoch: 0, batch: 7297, sum loss: 7542.229980, avg loss: 4.160083, ppl: 64.076828 +epoch: 0, batch: 7298, sum loss: 6557.586426, avg loss: 3.742915, ppl: 42.220882 +epoch: 0, batch: 7299, sum loss: 8214.791992, avg loss: 4.319029, ppl: 75.115646 +epoch: 0, batch: 7300, sum loss: 6903.381348, avg loss: 4.301172, ppl: 73.786240 +epoch: 0, batch: 7301, sum loss: 7122.331055, avg loss: 4.116954, ppl: 61.372040 +epoch: 0, batch: 7302, sum loss: 8062.658203, avg loss: 4.256948, ppl: 70.594170 +epoch: 0, batch: 7303, sum loss: 6685.980469, avg loss: 3.914508, ppl: 50.124420 +epoch: 0, batch: 7304, sum loss: 5981.820312, avg loss: 3.993204, ppl: 54.228378 +epoch: 0, batch: 7305, sum loss: 6673.293945, avg loss: 4.269542, ppl: 71.488869 +epoch: 0, batch: 7306, sum loss: 5999.824219, avg loss: 4.160766, ppl: 64.120628 +epoch: 0, batch: 7307, sum loss: 7241.245605, avg loss: 4.328300, ppl: 75.815292 +epoch: 0, batch: 7308, sum loss: 6311.682617, avg loss: 3.896101, ppl: 49.210182 +epoch: 0, batch: 7309, sum loss: 6890.868164, avg loss: 4.146130, ppl: 63.188992 +epoch: 0, batch: 7310, sum loss: 6399.539062, avg loss: 3.974869, ppl: 53.243141 +epoch: 0, batch: 7311, sum loss: 7736.406738, avg loss: 4.086850, ppl: 59.551991 +epoch: 0, batch: 7312, sum loss: 6303.725586, avg loss: 4.010004, ppl: 55.147068 +epoch: 0, batch: 7313, sum loss: 7205.737305, avg loss: 3.985474, ppl: 53.810795 +epoch: 0, batch: 7314, sum loss: 7310.166016, avg loss: 4.047711, ppl: 57.266220 +epoch: 0, batch: 7315, sum loss: 6228.475586, avg loss: 4.111205, ppl: 61.020210 +epoch: 0, batch: 7316, sum loss: 6562.853516, avg loss: 3.894869, ppl: 49.149597 +epoch: 0, batch: 7317, sum loss: 7619.750000, avg loss: 4.195898, ppl: 66.413315 +epoch: 0, batch: 7318, sum loss: 5453.685547, avg loss: 3.881627, ppl: 48.503059 +epoch: 0, batch: 7319, sum loss: 6055.173828, avg loss: 3.901530, ppl: 49.478081 +epoch: 0, batch: 7320, sum loss: 6376.741699, avg loss: 4.043590, ppl: 57.030720 +epoch: 0, batch: 7321, sum loss: 7739.853027, avg loss: 4.302309, ppl: 73.870132 +epoch: 0, batch: 7322, sum loss: 6039.111328, avg loss: 4.047662, ppl: 57.263405 +epoch: 0, batch: 7323, sum loss: 5812.434082, avg loss: 3.844203, ppl: 46.721420 +epoch: 0, batch: 7324, sum loss: 9083.778320, avg loss: 4.409601, ppl: 82.236626 +epoch: 0, batch: 7325, sum loss: 6841.540039, avg loss: 3.975328, ppl: 53.267597 +epoch: 0, batch: 7326, sum loss: 6588.618164, avg loss: 4.212671, ppl: 67.536705 +epoch: 0, batch: 7327, sum loss: 7161.425293, avg loss: 3.904812, ppl: 49.640739 +epoch: 0, batch: 7328, sum loss: 7198.827637, avg loss: 4.090243, ppl: 59.754402 +epoch: 0, batch: 7329, sum loss: 6344.689941, avg loss: 3.852271, ppl: 47.099888 +epoch: 0, batch: 7330, sum loss: 7044.226074, avg loss: 4.246068, ppl: 69.830269 +epoch: 0, batch: 7331, sum loss: 6243.287109, avg loss: 3.865813, ppl: 47.742050 +epoch: 0, batch: 7332, sum loss: 7769.135742, avg loss: 3.931749, ppl: 50.996086 +epoch: 0, batch: 7333, sum loss: 7136.863281, avg loss: 4.312304, ppl: 74.612236 +epoch: 0, batch: 7334, sum loss: 6663.015625, avg loss: 4.075239, ppl: 58.864529 +epoch: 0, batch: 7335, sum loss: 6856.485352, avg loss: 4.229787, ppl: 68.702621 +epoch: 0, batch: 7336, sum loss: 7030.723633, avg loss: 3.895138, ppl: 49.162827 +epoch: 0, batch: 7337, sum loss: 7397.055664, avg loss: 3.885008, ppl: 48.667347 +epoch: 0, batch: 7338, sum loss: 6569.881836, avg loss: 3.996279, ppl: 54.395344 +epoch: 0, batch: 7339, sum loss: 8180.171387, avg loss: 4.116845, ppl: 61.365337 +epoch: 0, batch: 7340, sum loss: 7297.109375, avg loss: 4.129660, ppl: 62.156796 +epoch: 0, batch: 7341, sum loss: 6589.354492, avg loss: 3.998395, ppl: 54.510574 +epoch: 0, batch: 7342, sum loss: 6701.902344, avg loss: 4.193932, ppl: 66.282875 +epoch: 0, batch: 7343, sum loss: 5878.013672, avg loss: 3.792267, ppl: 44.356846 +epoch: 0, batch: 7344, sum loss: 7408.902832, avg loss: 4.176383, ppl: 65.129822 +epoch: 0, batch: 7345, sum loss: 6973.359863, avg loss: 4.325906, ppl: 75.633987 +epoch: 0, batch: 7346, sum loss: 6435.723633, avg loss: 3.905172, ppl: 49.658627 +epoch: 0, batch: 7347, sum loss: 8268.280273, avg loss: 4.144501, ppl: 63.086147 +epoch: 0, batch: 7348, sum loss: 6527.169434, avg loss: 3.862230, ppl: 47.571331 +epoch: 0, batch: 7349, sum loss: 7883.169434, avg loss: 4.275038, ppl: 71.882851 +epoch: 0, batch: 7350, sum loss: 7280.131836, avg loss: 4.076222, ppl: 58.922436 +epoch: 0, batch: 7351, sum loss: 7079.831543, avg loss: 4.031795, ppl: 56.361992 +epoch: 0, batch: 7352, sum loss: 7852.444824, avg loss: 4.374621, ppl: 79.409767 +epoch: 0, batch: 7353, sum loss: 7149.920410, avg loss: 4.081005, ppl: 59.204948 +epoch: 0, batch: 7354, sum loss: 6211.485352, avg loss: 3.896791, ppl: 49.244183 +epoch: 0, batch: 7355, sum loss: 6423.818359, avg loss: 4.073442, ppl: 58.758888 +epoch: 0, batch: 7356, sum loss: 6738.267578, avg loss: 3.733112, ppl: 41.809021 +epoch: 0, batch: 7357, sum loss: 6624.132812, avg loss: 4.044037, ppl: 57.056206 +epoch: 0, batch: 7358, sum loss: 6135.019043, avg loss: 3.897725, ppl: 49.290180 +epoch: 0, batch: 7359, sum loss: 7455.757812, avg loss: 4.110120, ppl: 60.954048 +epoch: 0, batch: 7360, sum loss: 6586.640625, avg loss: 4.011353, ppl: 55.221508 +epoch: 0, batch: 7361, sum loss: 7234.959961, avg loss: 4.160414, ppl: 64.098068 +epoch: 0, batch: 7362, sum loss: 7899.301758, avg loss: 4.237823, ppl: 69.256912 +epoch: 0, batch: 7363, sum loss: 6837.704590, avg loss: 4.106730, ppl: 60.747719 +epoch: 0, batch: 7364, sum loss: 7279.018555, avg loss: 4.080168, ppl: 59.155392 +epoch: 0, batch: 7365, sum loss: 6756.115234, avg loss: 4.094615, ppl: 60.016254 +epoch: 0, batch: 7366, sum loss: 7602.556641, avg loss: 4.319634, ppl: 75.161148 +epoch: 0, batch: 7367, sum loss: 8551.634766, avg loss: 4.212628, ppl: 67.533775 +epoch: 0, batch: 7368, sum loss: 7658.903320, avg loss: 4.322180, ppl: 75.352737 +epoch: 0, batch: 7369, sum loss: 6827.952637, avg loss: 4.351786, ppl: 77.616974 +epoch: 0, batch: 7370, sum loss: 7065.020020, avg loss: 4.263741, ppl: 71.075417 +epoch: 0, batch: 7371, sum loss: 8052.992676, avg loss: 4.042667, ppl: 56.978123 +epoch: 0, batch: 7372, sum loss: 5467.901855, avg loss: 3.847925, ppl: 46.895664 +epoch: 0, batch: 7373, sum loss: 7075.651367, avg loss: 3.966172, ppl: 52.782120 +epoch: 0, batch: 7374, sum loss: 7166.774902, avg loss: 4.065102, ppl: 58.270859 +epoch: 0, batch: 7375, sum loss: 7308.818359, avg loss: 4.241914, ppl: 69.540848 +epoch: 0, batch: 7376, sum loss: 6791.185547, avg loss: 4.061714, ppl: 58.073772 +epoch: 0, batch: 7377, sum loss: 7975.304199, avg loss: 4.081527, ppl: 59.235867 +epoch: 0, batch: 7378, sum loss: 7121.115234, avg loss: 4.052997, ppl: 57.569714 +epoch: 0, batch: 7379, sum loss: 5998.456055, avg loss: 4.069509, ppl: 58.528221 +epoch: 0, batch: 7380, sum loss: 5434.565430, avg loss: 3.835262, ppl: 46.305538 +epoch: 0, batch: 7381, sum loss: 6685.979492, avg loss: 4.111918, ppl: 61.063751 +epoch: 0, batch: 7382, sum loss: 7538.123047, avg loss: 3.928152, ppl: 50.812969 +epoch: 0, batch: 7383, sum loss: 6410.951172, avg loss: 4.029510, ppl: 56.233379 +epoch: 0, batch: 7384, sum loss: 7779.510742, avg loss: 3.985405, ppl: 53.807087 +epoch: 0, batch: 7385, sum loss: 6633.155762, avg loss: 3.818743, ppl: 45.546909 +epoch: 0, batch: 7386, sum loss: 6174.205078, avg loss: 3.905253, ppl: 49.662640 +epoch: 0, batch: 7387, sum loss: 6784.768555, avg loss: 4.126988, ppl: 61.990921 +epoch: 0, batch: 7388, sum loss: 7860.770996, avg loss: 4.089891, ppl: 59.733406 +epoch: 0, batch: 7389, sum loss: 5766.426758, avg loss: 3.672883, ppl: 39.365242 +epoch: 0, batch: 7390, sum loss: 6360.359375, avg loss: 3.935866, ppl: 51.206470 +epoch: 0, batch: 7391, sum loss: 6529.449219, avg loss: 3.962044, ppl: 52.564682 +epoch: 0, batch: 7392, sum loss: 6266.344727, avg loss: 3.816288, ppl: 45.435238 +epoch: 0, batch: 7393, sum loss: 6054.132812, avg loss: 4.052298, ppl: 57.529484 +epoch: 0, batch: 7394, sum loss: 8001.478027, avg loss: 4.381970, ppl: 79.995499 +epoch: 0, batch: 7395, sum loss: 7656.474609, avg loss: 4.116385, ppl: 61.337078 +epoch: 0, batch: 7396, sum loss: 7732.088867, avg loss: 3.920938, ppl: 50.447720 +epoch: 0, batch: 7397, sum loss: 7074.367188, avg loss: 4.284899, ppl: 72.595200 +epoch: 0, batch: 7398, sum loss: 6972.849121, avg loss: 4.077690, ppl: 59.008980 +epoch: 0, batch: 7399, sum loss: 7963.722656, avg loss: 4.489133, ppl: 89.044243 +epoch: 0, batch: 7400, sum loss: 6352.065430, avg loss: 3.794544, ppl: 44.457935 +epoch: 0, batch: 7401, sum loss: 7953.154297, avg loss: 4.150916, ppl: 63.492107 +epoch: 0, batch: 7402, sum loss: 5924.370117, avg loss: 3.663804, ppl: 39.009438 +epoch: 0, batch: 7403, sum loss: 7923.839844, avg loss: 4.118420, ppl: 61.462063 +epoch: 0, batch: 7404, sum loss: 7898.738281, avg loss: 4.302145, ppl: 73.858047 +epoch: 0, batch: 7405, sum loss: 6949.818359, avg loss: 4.033557, ppl: 56.461384 +epoch: 0, batch: 7406, sum loss: 7573.967285, avg loss: 4.005271, ppl: 54.886696 +epoch: 0, batch: 7407, sum loss: 6158.366211, avg loss: 3.988580, ppl: 53.978161 +epoch: 0, batch: 7408, sum loss: 7048.966797, avg loss: 3.986972, ppl: 53.891476 +epoch: 0, batch: 7409, sum loss: 6879.106445, avg loss: 4.039405, ppl: 56.792534 +epoch: 0, batch: 7410, sum loss: 6708.019043, avg loss: 3.978659, ppl: 53.445324 +epoch: 0, batch: 7411, sum loss: 6051.283691, avg loss: 3.999527, ppl: 54.572315 +epoch: 0, batch: 7412, sum loss: 7708.931641, avg loss: 4.113624, ppl: 61.167995 +epoch: 0, batch: 7413, sum loss: 6400.039062, avg loss: 4.107856, ppl: 60.816174 +epoch: 0, batch: 7414, sum loss: 6276.808105, avg loss: 3.952650, ppl: 52.073181 +epoch: 0, batch: 7415, sum loss: 5683.494141, avg loss: 3.654980, ppl: 38.666763 +epoch: 0, batch: 7416, sum loss: 7120.479492, avg loss: 4.147047, ppl: 63.246960 +epoch: 0, batch: 7417, sum loss: 5621.297852, avg loss: 3.895563, ppl: 49.183743 +epoch: 0, batch: 7418, sum loss: 7549.608398, avg loss: 3.921874, ppl: 50.494999 +epoch: 0, batch: 7419, sum loss: 6619.379395, avg loss: 4.016614, ppl: 55.512817 +epoch: 0, batch: 7420, sum loss: 7362.081055, avg loss: 4.067448, ppl: 58.407722 +epoch: 0, batch: 7421, sum loss: 7299.087891, avg loss: 4.144854, ppl: 63.108410 +epoch: 0, batch: 7422, sum loss: 4946.044434, avg loss: 3.732864, ppl: 41.798634 +epoch: 0, batch: 7423, sum loss: 7163.193359, avg loss: 4.291907, ppl: 73.105774 +epoch: 0, batch: 7424, sum loss: 7573.571289, avg loss: 4.030640, ppl: 56.296909 +epoch: 0, batch: 7425, sum loss: 7332.068359, avg loss: 4.173061, ppl: 64.913841 +epoch: 0, batch: 7426, sum loss: 6719.758301, avg loss: 3.897772, ppl: 49.292496 +epoch: 0, batch: 7427, sum loss: 6100.889648, avg loss: 3.992729, ppl: 54.202602 +epoch: 0, batch: 7428, sum loss: 8202.308594, avg loss: 4.010909, ppl: 55.196999 +epoch: 0, batch: 7429, sum loss: 8046.647949, avg loss: 4.235078, ppl: 69.067085 +epoch: 0, batch: 7430, sum loss: 7698.650879, avg loss: 3.995148, ppl: 54.333904 +epoch: 0, batch: 7431, sum loss: 7411.225586, avg loss: 4.056500, ppl: 57.771751 +epoch: 0, batch: 7432, sum loss: 5981.900391, avg loss: 3.948449, ppl: 51.854885 +epoch: 0, batch: 7433, sum loss: 8407.623047, avg loss: 4.300574, ppl: 73.742134 +epoch: 0, batch: 7434, sum loss: 7771.653809, avg loss: 4.251452, ppl: 70.207275 +epoch: 0, batch: 7435, sum loss: 7295.695801, avg loss: 4.138228, ppl: 62.691631 +epoch: 0, batch: 7436, sum loss: 6585.408691, avg loss: 3.979099, ppl: 53.468838 +epoch: 0, batch: 7437, sum loss: 5913.504883, avg loss: 3.952878, ppl: 52.085041 +epoch: 0, batch: 7438, sum loss: 7058.613281, avg loss: 4.042734, ppl: 56.981926 +epoch: 0, batch: 7439, sum loss: 7417.946777, avg loss: 4.096050, ppl: 60.102428 +epoch: 0, batch: 7440, sum loss: 6683.053711, avg loss: 3.970917, ppl: 53.033154 +epoch: 0, batch: 7441, sum loss: 6903.858398, avg loss: 4.196874, ppl: 66.478203 +epoch: 0, batch: 7442, sum loss: 7689.740234, avg loss: 4.051497, ppl: 57.483418 +epoch: 0, batch: 7443, sum loss: 7686.337402, avg loss: 4.223262, ppl: 68.255791 +epoch: 0, batch: 7444, sum loss: 6244.905273, avg loss: 4.044628, ppl: 57.089954 +epoch: 0, batch: 7445, sum loss: 6345.030762, avg loss: 3.983070, ppl: 53.681591 +epoch: 0, batch: 7446, sum loss: 5716.668945, avg loss: 3.899501, ppl: 49.377819 +epoch: 0, batch: 7447, sum loss: 6858.804199, avg loss: 4.129322, ppl: 62.135784 +epoch: 0, batch: 7448, sum loss: 7896.217773, avg loss: 4.396558, ppl: 81.170982 +epoch: 0, batch: 7449, sum loss: 6630.912598, avg loss: 3.999344, ppl: 54.562351 +epoch: 0, batch: 7450, sum loss: 6766.309570, avg loss: 4.032366, ppl: 56.394173 +epoch: 0, batch: 7451, sum loss: 6599.791992, avg loss: 3.861786, ppl: 47.550194 +epoch: 0, batch: 7452, sum loss: 7405.878418, avg loss: 4.051356, ppl: 57.475330 +epoch: 0, batch: 7453, sum loss: 6962.759277, avg loss: 4.050471, ppl: 57.424488 +epoch: 0, batch: 7454, sum loss: 6114.028320, avg loss: 4.017102, ppl: 55.539906 +epoch: 0, batch: 7455, sum loss: 7497.596680, avg loss: 4.048378, ppl: 57.304432 +epoch: 0, batch: 7456, sum loss: 6855.310547, avg loss: 3.937571, ppl: 51.293846 +epoch: 0, batch: 7457, sum loss: 6419.291504, avg loss: 3.862390, ppl: 47.578918 +epoch: 0, batch: 7458, sum loss: 6037.841309, avg loss: 3.910519, ppl: 49.924862 +epoch: 0, batch: 7459, sum loss: 6026.763672, avg loss: 3.661460, ppl: 38.918137 +epoch: 0, batch: 7460, sum loss: 6941.200195, avg loss: 3.752000, ppl: 42.606213 +epoch: 0, batch: 7461, sum loss: 6877.862305, avg loss: 4.096404, ppl: 60.123699 +epoch: 0, batch: 7462, sum loss: 6342.685059, avg loss: 4.055425, ppl: 57.709694 +epoch: 0, batch: 7463, sum loss: 6630.257812, avg loss: 4.193711, ppl: 66.268272 +epoch: 0, batch: 7464, sum loss: 6036.078613, avg loss: 3.861855, ppl: 47.553459 +epoch: 0, batch: 7465, sum loss: 6271.704590, avg loss: 3.927179, ppl: 50.763565 +epoch: 0, batch: 7466, sum loss: 5804.833008, avg loss: 3.885430, ppl: 48.687889 +epoch: 0, batch: 7467, sum loss: 6320.469238, avg loss: 4.082991, ppl: 59.322617 +epoch: 0, batch: 7468, sum loss: 6325.456543, avg loss: 3.723047, ppl: 41.390320 +epoch: 0, batch: 7469, sum loss: 7230.500977, avg loss: 4.374169, ppl: 79.373840 +epoch: 0, batch: 7470, sum loss: 6122.873535, avg loss: 4.009740, ppl: 55.132553 +epoch: 0, batch: 7471, sum loss: 6746.748047, avg loss: 4.128977, ppl: 62.114338 +epoch: 0, batch: 7472, sum loss: 7137.817383, avg loss: 4.106915, ppl: 60.758957 +epoch: 0, batch: 7473, sum loss: 7039.544922, avg loss: 3.977144, ppl: 53.364407 +epoch: 0, batch: 7474, sum loss: 7848.331055, avg loss: 4.336095, ppl: 76.408569 +epoch: 0, batch: 7475, sum loss: 6570.198242, avg loss: 3.955568, ppl: 52.225342 +epoch: 0, batch: 7476, sum loss: 5971.205078, avg loss: 3.822795, ppl: 45.731834 +epoch: 0, batch: 7477, sum loss: 6812.730469, avg loss: 3.935719, ppl: 51.198975 +epoch: 0, batch: 7478, sum loss: 7707.451660, avg loss: 4.101891, ppl: 60.454472 +epoch: 0, batch: 7479, sum loss: 7304.854492, avg loss: 3.812555, ppl: 45.265926 +epoch: 0, batch: 7480, sum loss: 7280.379395, avg loss: 4.184126, ppl: 65.636101 +epoch: 0, batch: 7481, sum loss: 6546.016113, avg loss: 3.843815, ppl: 46.703289 +epoch: 0, batch: 7482, sum loss: 7362.958008, avg loss: 4.205002, ppl: 67.020721 +epoch: 0, batch: 7483, sum loss: 7194.449219, avg loss: 3.957343, ppl: 52.318123 +epoch: 0, batch: 7484, sum loss: 7709.891113, avg loss: 4.190158, ppl: 66.033218 +epoch: 0, batch: 7485, sum loss: 6930.052246, avg loss: 4.107915, ppl: 60.819771 +epoch: 0, batch: 7486, sum loss: 5607.625488, avg loss: 4.037167, ppl: 56.665585 +epoch: 0, batch: 7487, sum loss: 6957.106445, avg loss: 4.099650, ppl: 60.319195 +epoch: 0, batch: 7488, sum loss: 7535.571289, avg loss: 4.161000, ppl: 64.135643 +epoch: 0, batch: 7489, sum loss: 7078.734863, avg loss: 4.208522, ppl: 67.257080 +epoch: 0, batch: 7490, sum loss: 6091.568848, avg loss: 4.034152, ppl: 56.494968 +epoch: 0, batch: 7491, sum loss: 5949.513184, avg loss: 3.782271, ppl: 43.915676 +epoch: 0, batch: 7492, sum loss: 7714.808594, avg loss: 4.075440, ppl: 58.876404 +epoch: 0, batch: 7493, sum loss: 6797.802734, avg loss: 3.996356, ppl: 54.399582 +epoch: 0, batch: 7494, sum loss: 6631.492676, avg loss: 4.026407, ppl: 56.059143 +epoch: 0, batch: 7495, sum loss: 5341.276367, avg loss: 3.907298, ppl: 49.764313 +epoch: 0, batch: 7496, sum loss: 6749.520508, avg loss: 3.926423, ppl: 50.725201 +epoch: 0, batch: 7497, sum loss: 7198.804688, avg loss: 3.893350, ppl: 49.075020 +epoch: 0, batch: 7498, sum loss: 6904.633789, avg loss: 3.810504, ppl: 45.173210 +epoch: 0, batch: 7499, sum loss: 7074.546875, avg loss: 3.893532, ppl: 49.083923 +epoch: 0, batch: 7500, sum loss: 6657.597168, avg loss: 4.069436, ppl: 58.523949 +epoch: 0, batch: 7501, sum loss: 7484.109863, avg loss: 3.912237, ppl: 50.010708 +epoch: 0, batch: 7502, sum loss: 7580.638672, avg loss: 4.008799, ppl: 55.080685 +epoch: 0, batch: 7503, sum loss: 6128.857422, avg loss: 3.928755, ppl: 50.843628 +epoch: 0, batch: 7504, sum loss: 5548.456543, avg loss: 3.918401, ppl: 50.319942 +epoch: 0, batch: 7505, sum loss: 6919.168945, avg loss: 3.940301, ppl: 51.434101 +epoch: 0, batch: 7506, sum loss: 7143.901367, avg loss: 4.049831, ppl: 57.387753 +epoch: 0, batch: 7507, sum loss: 6125.646973, avg loss: 4.256877, ppl: 70.589218 +epoch: 0, batch: 7508, sum loss: 7575.592773, avg loss: 4.042472, ppl: 56.966984 +epoch: 0, batch: 7509, sum loss: 6493.381836, avg loss: 4.048243, ppl: 57.296700 +epoch: 0, batch: 7510, sum loss: 6704.693359, avg loss: 4.080763, ppl: 59.190605 +epoch: 0, batch: 7511, sum loss: 9453.380859, avg loss: 4.186617, ppl: 65.799805 +epoch: 0, batch: 7512, sum loss: 7542.648438, avg loss: 4.001405, ppl: 54.674927 +epoch: 0, batch: 7513, sum loss: 6291.275879, avg loss: 3.792210, ppl: 44.354298 +epoch: 0, batch: 7514, sum loss: 7040.334961, avg loss: 4.107547, ppl: 60.797386 +epoch: 0, batch: 7515, sum loss: 5876.604980, avg loss: 4.011334, ppl: 55.220509 +epoch: 0, batch: 7516, sum loss: 6842.382812, avg loss: 4.223693, ppl: 68.285225 +epoch: 0, batch: 7517, sum loss: 6719.699707, avg loss: 4.189339, ppl: 65.979172 +epoch: 0, batch: 7518, sum loss: 7522.388184, avg loss: 3.969598, ppl: 52.963238 +epoch: 0, batch: 7519, sum loss: 7193.110352, avg loss: 4.105657, ppl: 60.682575 +epoch: 0, batch: 7520, sum loss: 6421.900879, avg loss: 4.013688, ppl: 55.350632 +epoch: 0, batch: 7521, sum loss: 6773.972168, avg loss: 3.681506, ppl: 39.706161 +epoch: 0, batch: 7522, sum loss: 6274.302246, avg loss: 4.004022, ppl: 54.818192 +epoch: 0, batch: 7523, sum loss: 7522.315430, avg loss: 4.235538, ppl: 69.098846 +epoch: 0, batch: 7524, sum loss: 5726.577148, avg loss: 3.845922, ppl: 46.801792 +epoch: 0, batch: 7525, sum loss: 7118.873047, avg loss: 4.019691, ppl: 55.683895 +epoch: 0, batch: 7526, sum loss: 5171.114258, avg loss: 3.649340, ppl: 38.449268 +epoch: 0, batch: 7527, sum loss: 6777.195312, avg loss: 4.053347, ppl: 57.589867 +epoch: 0, batch: 7528, sum loss: 6699.804688, avg loss: 4.197873, ppl: 66.544617 +epoch: 0, batch: 7529, sum loss: 7421.192383, avg loss: 4.097843, ppl: 60.210255 +epoch: 0, batch: 7530, sum loss: 7022.523926, avg loss: 4.126042, ppl: 61.932331 +epoch: 0, batch: 7531, sum loss: 7017.754395, avg loss: 4.159902, ppl: 64.065247 +epoch: 0, batch: 7532, sum loss: 6053.017578, avg loss: 3.783136, ppl: 43.953659 +epoch: 0, batch: 7533, sum loss: 7151.003418, avg loss: 3.918358, ppl: 50.317760 +epoch: 0, batch: 7534, sum loss: 7184.293945, avg loss: 3.989058, ppl: 54.004009 +epoch: 0, batch: 7535, sum loss: 6738.378418, avg loss: 3.949811, ppl: 51.925575 +epoch: 0, batch: 7536, sum loss: 6645.511230, avg loss: 4.057088, ppl: 57.805756 +epoch: 0, batch: 7537, sum loss: 7853.603516, avg loss: 4.242898, ppl: 69.609253 +epoch: 0, batch: 7538, sum loss: 6761.824707, avg loss: 4.192080, ppl: 66.160263 +epoch: 0, batch: 7539, sum loss: 6872.765137, avg loss: 4.047565, ppl: 57.257862 +epoch: 0, batch: 7540, sum loss: 6626.923828, avg loss: 4.043272, ppl: 57.012611 +epoch: 0, batch: 7541, sum loss: 6416.673828, avg loss: 3.721969, ppl: 41.345718 +epoch: 0, batch: 7542, sum loss: 7075.618652, avg loss: 4.022523, ppl: 55.841839 +epoch: 0, batch: 7543, sum loss: 7201.092285, avg loss: 4.007286, ppl: 54.997383 +epoch: 0, batch: 7544, sum loss: 8046.202637, avg loss: 4.206065, ppl: 67.092026 +epoch: 0, batch: 7545, sum loss: 7022.961426, avg loss: 3.905985, ppl: 49.699017 +epoch: 0, batch: 7546, sum loss: 7101.274414, avg loss: 4.097677, ppl: 60.200264 +epoch: 0, batch: 7547, sum loss: 7326.211914, avg loss: 4.102023, ppl: 60.462460 +epoch: 0, batch: 7548, sum loss: 6147.760742, avg loss: 3.767010, ppl: 43.250572 +epoch: 0, batch: 7549, sum loss: 7467.384277, avg loss: 3.930202, ppl: 50.917286 +epoch: 0, batch: 7550, sum loss: 8056.990723, avg loss: 3.992563, ppl: 54.193584 +epoch: 0, batch: 7551, sum loss: 7736.497070, avg loss: 4.200053, ppl: 66.689850 +epoch: 0, batch: 7552, sum loss: 6153.153809, avg loss: 3.995554, ppl: 54.355968 +epoch: 0, batch: 7553, sum loss: 6789.190430, avg loss: 4.129678, ppl: 62.157890 +epoch: 0, batch: 7554, sum loss: 6766.580078, avg loss: 4.098474, ppl: 60.248280 +epoch: 0, batch: 7555, sum loss: 6480.289062, avg loss: 3.997710, ppl: 54.473289 +epoch: 0, batch: 7556, sum loss: 5835.407715, avg loss: 3.841611, ppl: 46.600506 +epoch: 0, batch: 7557, sum loss: 6475.315430, avg loss: 3.753806, ppl: 42.683239 +epoch: 0, batch: 7558, sum loss: 6308.724121, avg loss: 3.832761, ppl: 46.189884 +epoch: 0, batch: 7559, sum loss: 6589.930664, avg loss: 3.977025, ppl: 53.358059 +epoch: 0, batch: 7560, sum loss: 6190.357422, avg loss: 3.930386, ppl: 50.926609 +epoch: 0, batch: 7561, sum loss: 7561.126953, avg loss: 4.069498, ppl: 58.527550 +epoch: 0, batch: 7562, sum loss: 6697.186523, avg loss: 4.241410, ppl: 69.505806 +epoch: 0, batch: 7563, sum loss: 7227.764648, avg loss: 4.192439, ppl: 66.184021 +epoch: 0, batch: 7564, sum loss: 6567.876465, avg loss: 3.829665, ppl: 46.047131 +epoch: 0, batch: 7565, sum loss: 7962.891113, avg loss: 4.184388, ppl: 65.653320 +epoch: 0, batch: 7566, sum loss: 6843.514160, avg loss: 3.988062, ppl: 53.950226 +epoch: 0, batch: 7567, sum loss: 7093.948242, avg loss: 4.107671, ppl: 60.804955 +epoch: 0, batch: 7568, sum loss: 5574.175293, avg loss: 3.684187, ppl: 39.812759 +epoch: 0, batch: 7569, sum loss: 6124.898926, avg loss: 4.130074, ppl: 62.182495 +epoch: 0, batch: 7570, sum loss: 5375.159668, avg loss: 3.958144, ppl: 52.360065 +epoch: 0, batch: 7571, sum loss: 6219.334473, avg loss: 3.989310, ppl: 54.017620 +epoch: 0, batch: 7572, sum loss: 7443.492676, avg loss: 4.167689, ppl: 64.566086 +epoch: 0, batch: 7573, sum loss: 6352.972168, avg loss: 3.978067, ppl: 53.413670 +epoch: 0, batch: 7574, sum loss: 5488.230469, avg loss: 3.829889, ppl: 46.057419 +epoch: 0, batch: 7575, sum loss: 8296.180664, avg loss: 4.415211, ppl: 82.699265 +epoch: 0, batch: 7576, sum loss: 7358.206055, avg loss: 4.110730, ppl: 60.991207 +epoch: 0, batch: 7577, sum loss: 6521.178711, avg loss: 3.858685, ppl: 47.403000 +epoch: 0, batch: 7578, sum loss: 6346.433105, avg loss: 3.900696, ppl: 49.436859 +epoch: 0, batch: 7579, sum loss: 6694.750488, avg loss: 4.057425, ppl: 57.825191 +epoch: 0, batch: 7580, sum loss: 7079.743164, avg loss: 4.066481, ppl: 58.351269 +epoch: 0, batch: 7581, sum loss: 7076.275391, avg loss: 4.076196, ppl: 58.920891 +epoch: 0, batch: 7582, sum loss: 6291.293457, avg loss: 3.747048, ppl: 42.395741 +epoch: 0, batch: 7583, sum loss: 7184.937500, avg loss: 4.138789, ppl: 62.726795 +epoch: 0, batch: 7584, sum loss: 5578.470703, avg loss: 3.852535, ppl: 47.112354 +epoch: 0, batch: 7585, sum loss: 7465.810547, avg loss: 4.066346, ppl: 58.343369 +epoch: 0, batch: 7586, sum loss: 6487.195312, avg loss: 4.111023, ppl: 61.009125 +epoch: 0, batch: 7587, sum loss: 8481.928711, avg loss: 4.249464, ppl: 70.067818 +epoch: 0, batch: 7588, sum loss: 5973.501953, avg loss: 4.014450, ppl: 55.392826 +epoch: 0, batch: 7589, sum loss: 7763.917480, avg loss: 3.931098, ppl: 50.962879 +epoch: 0, batch: 7590, sum loss: 7696.920898, avg loss: 4.120407, ppl: 61.584309 +epoch: 0, batch: 7591, sum loss: 7952.497070, avg loss: 4.357533, ppl: 78.064270 +epoch: 0, batch: 7592, sum loss: 6761.488281, avg loss: 3.982031, ppl: 53.625832 +epoch: 0, batch: 7593, sum loss: 6408.116699, avg loss: 3.682826, ppl: 39.758575 +epoch: 0, batch: 7594, sum loss: 7545.175781, avg loss: 4.123047, ppl: 61.747093 +epoch: 0, batch: 7595, sum loss: 7354.124512, avg loss: 4.124579, ppl: 61.841793 +epoch: 0, batch: 7596, sum loss: 7801.807129, avg loss: 4.038202, ppl: 56.724277 +epoch: 0, batch: 7597, sum loss: 7338.215332, avg loss: 4.138870, ppl: 62.731880 +epoch: 0, batch: 7598, sum loss: 6846.748047, avg loss: 3.964533, ppl: 52.695648 +epoch: 0, batch: 7599, sum loss: 6485.716797, avg loss: 3.609191, ppl: 36.936176 +epoch: 0, batch: 7600, sum loss: 6537.512695, avg loss: 3.845596, ppl: 46.786552 +epoch: 0, batch: 7601, sum loss: 7714.241211, avg loss: 4.257308, ppl: 70.619583 +epoch: 0, batch: 7602, sum loss: 6679.343750, avg loss: 3.795082, ppl: 44.481865 +epoch: 0, batch: 7603, sum loss: 6713.742188, avg loss: 3.903338, ppl: 49.567650 +epoch: 0, batch: 7604, sum loss: 6467.039062, avg loss: 4.044427, ppl: 57.078465 +epoch: 0, batch: 7605, sum loss: 5898.900391, avg loss: 3.880856, ppl: 48.465675 +epoch: 0, batch: 7606, sum loss: 6402.653809, avg loss: 3.731150, ppl: 41.727081 +epoch: 0, batch: 7607, sum loss: 7610.041016, avg loss: 3.990583, ppl: 54.086395 +epoch: 0, batch: 7608, sum loss: 6789.953125, avg loss: 3.922561, ppl: 50.529682 +epoch: 0, batch: 7609, sum loss: 6795.729492, avg loss: 3.872211, ppl: 48.048481 +epoch: 0, batch: 7610, sum loss: 6685.125977, avg loss: 3.965081, ppl: 52.724525 +epoch: 0, batch: 7611, sum loss: 6342.060547, avg loss: 3.966267, ppl: 52.787090 +epoch: 0, batch: 7612, sum loss: 6544.551758, avg loss: 3.952024, ppl: 52.040588 +epoch: 0, batch: 7613, sum loss: 6527.249023, avg loss: 4.016768, ppl: 55.521397 +epoch: 0, batch: 7614, sum loss: 6555.652832, avg loss: 3.872211, ppl: 48.048504 +epoch: 0, batch: 7615, sum loss: 7186.169922, avg loss: 4.010139, ppl: 55.154564 +epoch: 0, batch: 7616, sum loss: 5791.222656, avg loss: 3.787589, ppl: 44.149807 +epoch: 0, batch: 7617, sum loss: 7186.793945, avg loss: 4.113791, ppl: 61.178234 +epoch: 0, batch: 7618, sum loss: 7172.888184, avg loss: 3.943314, ppl: 51.589291 +epoch: 0, batch: 7619, sum loss: 7421.117676, avg loss: 3.893556, ppl: 49.085106 +epoch: 0, batch: 7620, sum loss: 7553.120605, avg loss: 4.293986, ppl: 73.257881 +epoch: 0, batch: 7621, sum loss: 6951.141602, avg loss: 3.988033, ppl: 53.948669 +epoch: 0, batch: 7622, sum loss: 6976.848145, avg loss: 4.125871, ppl: 61.921730 +epoch: 0, batch: 7623, sum loss: 6014.992676, avg loss: 3.873144, ppl: 48.093361 +epoch: 0, batch: 7624, sum loss: 6559.711426, avg loss: 4.130801, ppl: 62.227730 +epoch: 0, batch: 7625, sum loss: 6798.734375, avg loss: 4.103038, ppl: 60.523899 +epoch: 0, batch: 7626, sum loss: 6515.749512, avg loss: 4.054605, ppl: 57.662354 +epoch: 0, batch: 7627, sum loss: 6951.390137, avg loss: 3.983605, ppl: 53.710293 +epoch: 0, batch: 7628, sum loss: 6747.645020, avg loss: 4.175523, ppl: 65.073853 +epoch: 0, batch: 7629, sum loss: 6557.615234, avg loss: 4.052914, ppl: 57.564964 +epoch: 0, batch: 7630, sum loss: 5902.530762, avg loss: 3.985504, ppl: 53.812386 +epoch: 0, batch: 7631, sum loss: 6752.823242, avg loss: 4.002859, ppl: 54.754475 +epoch: 0, batch: 7632, sum loss: 6768.200195, avg loss: 3.914517, ppl: 50.124863 +epoch: 0, batch: 7633, sum loss: 6123.517578, avg loss: 4.002299, ppl: 54.723831 +epoch: 0, batch: 7634, sum loss: 7867.744629, avg loss: 4.158427, ppl: 63.970833 +epoch: 0, batch: 7635, sum loss: 7385.962891, avg loss: 4.027242, ppl: 56.105968 +epoch: 0, batch: 7636, sum loss: 6030.987793, avg loss: 3.802640, ppl: 44.819370 +epoch: 0, batch: 7637, sum loss: 7258.220703, avg loss: 4.315232, ppl: 74.831001 +epoch: 0, batch: 7638, sum loss: 6893.326172, avg loss: 4.047755, ppl: 57.268730 +epoch: 0, batch: 7639, sum loss: 8114.352051, avg loss: 4.266221, ppl: 71.251869 +epoch: 0, batch: 7640, sum loss: 6391.222656, avg loss: 3.984553, ppl: 53.761246 +epoch: 0, batch: 7641, sum loss: 6656.682129, avg loss: 3.986037, ppl: 53.841106 +epoch: 0, batch: 7642, sum loss: 7075.302734, avg loss: 4.216509, ppl: 67.796417 +epoch: 0, batch: 7643, sum loss: 6553.702148, avg loss: 4.055509, ppl: 57.714508 +epoch: 0, batch: 7644, sum loss: 6203.287598, avg loss: 3.810373, ppl: 45.167286 +epoch: 0, batch: 7645, sum loss: 6279.996094, avg loss: 3.751491, ppl: 42.584541 +epoch: 0, batch: 7646, sum loss: 6162.878418, avg loss: 4.136160, ppl: 62.562145 +epoch: 0, batch: 7647, sum loss: 7427.157715, avg loss: 4.229589, ppl: 68.688995 +epoch: 0, batch: 7648, sum loss: 7107.490723, avg loss: 4.052161, ppl: 57.521641 +epoch: 0, batch: 7649, sum loss: 6467.282227, avg loss: 3.948280, ppl: 51.846096 +epoch: 0, batch: 7650, sum loss: 7445.107422, avg loss: 4.127000, ppl: 61.991661 +epoch: 0, batch: 7651, sum loss: 5752.254883, avg loss: 3.860574, ppl: 47.492603 +epoch: 0, batch: 7652, sum loss: 5849.074219, avg loss: 3.954749, ppl: 52.182602 +epoch: 0, batch: 7653, sum loss: 6890.224609, avg loss: 3.847138, ppl: 46.858768 +epoch: 0, batch: 7654, sum loss: 6909.568359, avg loss: 3.930357, ppl: 50.925167 +epoch: 0, batch: 7655, sum loss: 6633.989258, avg loss: 4.057486, ppl: 57.828724 +epoch: 0, batch: 7656, sum loss: 6798.861328, avg loss: 3.927707, ppl: 50.790390 +epoch: 0, batch: 7657, sum loss: 7819.327148, avg loss: 4.310544, ppl: 74.481033 +epoch: 0, batch: 7658, sum loss: 6470.143555, avg loss: 3.935610, ppl: 51.193382 +epoch: 0, batch: 7659, sum loss: 8297.494141, avg loss: 4.326118, ppl: 75.650040 +epoch: 0, batch: 7660, sum loss: 6548.653809, avg loss: 4.110894, ppl: 61.001213 +epoch: 0, batch: 7661, sum loss: 6365.885254, avg loss: 3.834871, ppl: 46.287449 +epoch: 0, batch: 7662, sum loss: 7018.208496, avg loss: 4.068527, ppl: 58.470757 +epoch: 0, batch: 7663, sum loss: 6501.037598, avg loss: 4.130265, ppl: 62.194416 +epoch: 0, batch: 7664, sum loss: 6760.048828, avg loss: 4.072319, ppl: 58.692886 +epoch: 0, batch: 7665, sum loss: 7565.844727, avg loss: 4.224369, ppl: 68.331345 +epoch: 0, batch: 7666, sum loss: 6394.627441, avg loss: 4.052362, ppl: 57.533188 +epoch: 0, batch: 7667, sum loss: 6295.344238, avg loss: 3.941982, ppl: 51.520592 +epoch: 0, batch: 7668, sum loss: 7144.024414, avg loss: 4.295866, ppl: 73.395714 +epoch: 0, batch: 7669, sum loss: 6305.892090, avg loss: 4.011382, ppl: 55.223114 +epoch: 0, batch: 7670, sum loss: 6447.835938, avg loss: 4.112140, ppl: 61.077293 +epoch: 0, batch: 7671, sum loss: 7335.498047, avg loss: 4.223085, ppl: 68.243683 +epoch: 0, batch: 7672, sum loss: 5368.488281, avg loss: 3.728117, ppl: 41.600700 +epoch: 0, batch: 7673, sum loss: 6322.059570, avg loss: 4.026790, ppl: 56.080585 +epoch: 0, batch: 7674, sum loss: 7709.957031, avg loss: 4.304833, ppl: 74.056877 +epoch: 0, batch: 7675, sum loss: 5720.842773, avg loss: 3.910351, ppl: 49.916458 +epoch: 0, batch: 7676, sum loss: 5763.938477, avg loss: 4.059112, ppl: 57.922829 +epoch: 0, batch: 7677, sum loss: 7320.687500, avg loss: 4.076107, ppl: 58.915638 +epoch: 0, batch: 7678, sum loss: 5851.978027, avg loss: 3.872917, ppl: 48.082447 +epoch: 0, batch: 7679, sum loss: 7479.985840, avg loss: 4.181099, ppl: 65.437729 +epoch: 0, batch: 7680, sum loss: 6151.395508, avg loss: 3.856674, ppl: 47.307766 +epoch: 0, batch: 7681, sum loss: 5724.565918, avg loss: 3.899568, ppl: 49.381115 +epoch: 0, batch: 7682, sum loss: 5136.652344, avg loss: 3.599616, ppl: 36.584194 +epoch: 0, batch: 7683, sum loss: 6780.175293, avg loss: 4.081984, ppl: 59.262932 +epoch: 0, batch: 7684, sum loss: 7127.581055, avg loss: 3.959767, ppl: 52.445122 +epoch: 0, batch: 7685, sum loss: 6980.857422, avg loss: 3.906468, ppl: 49.723003 +epoch: 0, batch: 7686, sum loss: 6804.927246, avg loss: 4.164582, ppl: 64.365791 +epoch: 0, batch: 7687, sum loss: 6884.187988, avg loss: 4.273239, ppl: 71.753677 +epoch: 0, batch: 7688, sum loss: 6827.885742, avg loss: 4.028251, ppl: 56.162605 +epoch: 0, batch: 7689, sum loss: 6394.250488, avg loss: 3.939772, ppl: 51.406898 +epoch: 0, batch: 7690, sum loss: 6198.378906, avg loss: 3.873987, ppl: 48.133900 +epoch: 0, batch: 7691, sum loss: 5909.481445, avg loss: 3.929176, ppl: 50.865063 +epoch: 0, batch: 7692, sum loss: 8495.817383, avg loss: 4.148348, ppl: 63.329315 +epoch: 0, batch: 7693, sum loss: 6910.127441, avg loss: 4.038649, ppl: 56.749599 +epoch: 0, batch: 7694, sum loss: 5594.246582, avg loss: 3.785011, ppl: 44.036167 +epoch: 0, batch: 7695, sum loss: 7632.442383, avg loss: 4.132346, ppl: 62.323975 +epoch: 0, batch: 7696, sum loss: 6267.718750, avg loss: 3.997270, ppl: 54.449280 +epoch: 0, batch: 7697, sum loss: 6239.580078, avg loss: 4.115818, ppl: 61.302341 +epoch: 0, batch: 7698, sum loss: 8485.177734, avg loss: 3.978049, ppl: 53.412712 +epoch: 0, batch: 7699, sum loss: 7107.616211, avg loss: 4.006548, ppl: 54.956829 +epoch: 0, batch: 7700, sum loss: 7851.580566, avg loss: 3.937603, ppl: 51.295509 +epoch: 0, batch: 7701, sum loss: 6749.648926, avg loss: 3.919657, ppl: 50.383148 +epoch: 0, batch: 7702, sum loss: 5862.775879, avg loss: 3.819398, ppl: 45.576759 +epoch: 0, batch: 7703, sum loss: 7506.826172, avg loss: 4.222062, ppl: 68.173920 +epoch: 0, batch: 7704, sum loss: 8434.342773, avg loss: 4.150759, ppl: 63.482178 +epoch: 0, batch: 7705, sum loss: 6536.295898, avg loss: 3.992850, ppl: 54.209183 +epoch: 0, batch: 7706, sum loss: 7863.794434, avg loss: 4.478243, ppl: 88.079773 +epoch: 0, batch: 7707, sum loss: 6853.768066, avg loss: 3.945750, ppl: 51.715122 +epoch: 0, batch: 7708, sum loss: 8476.061523, avg loss: 4.229572, ppl: 68.687813 +epoch: 0, batch: 7709, sum loss: 5727.291992, avg loss: 3.711790, ppl: 40.927002 +epoch: 0, batch: 7710, sum loss: 7252.471191, avg loss: 4.095128, ppl: 60.047001 +epoch: 0, batch: 7711, sum loss: 7007.941895, avg loss: 4.093424, ppl: 59.944782 +epoch: 0, batch: 7712, sum loss: 6389.898438, avg loss: 3.898657, ppl: 49.336128 +epoch: 0, batch: 7713, sum loss: 6801.388672, avg loss: 3.938268, ppl: 51.329620 +epoch: 0, batch: 7714, sum loss: 6635.018555, avg loss: 4.126255, ppl: 61.945503 +epoch: 0, batch: 7715, sum loss: 6554.312988, avg loss: 3.901377, ppl: 49.470520 +epoch: 0, batch: 7716, sum loss: 6962.376465, avg loss: 3.992188, ppl: 54.173302 +epoch: 0, batch: 7717, sum loss: 7284.839844, avg loss: 4.069743, ppl: 58.541897 +epoch: 0, batch: 7718, sum loss: 6551.187500, avg loss: 4.031500, ppl: 56.345360 +epoch: 0, batch: 7719, sum loss: 7024.819336, avg loss: 4.065289, ppl: 58.281750 +epoch: 0, batch: 7720, sum loss: 7278.231445, avg loss: 3.955560, ppl: 52.224957 +epoch: 0, batch: 7721, sum loss: 5612.873047, avg loss: 3.828699, ppl: 46.002666 +epoch: 0, batch: 7722, sum loss: 7538.899902, avg loss: 4.121870, ppl: 61.674469 +epoch: 0, batch: 7723, sum loss: 5866.879883, avg loss: 3.929591, ppl: 50.886169 +epoch: 0, batch: 7724, sum loss: 4763.395996, avg loss: 3.798561, ppl: 44.636921 +epoch: 0, batch: 7725, sum loss: 6994.364746, avg loss: 4.188242, ppl: 65.906853 +epoch: 0, batch: 7726, sum loss: 7391.193848, avg loss: 4.092577, ppl: 59.894039 +epoch: 0, batch: 7727, sum loss: 6274.780273, avg loss: 4.130863, ppl: 62.231586 +epoch: 0, batch: 7728, sum loss: 7095.457031, avg loss: 4.127666, ppl: 62.032940 +epoch: 0, batch: 7729, sum loss: 7000.143066, avg loss: 4.125011, ppl: 61.868488 +epoch: 0, batch: 7730, sum loss: 6039.480469, avg loss: 3.839466, ppl: 46.500641 +epoch: 0, batch: 7731, sum loss: 6178.679199, avg loss: 3.797590, ppl: 44.593586 +epoch: 0, batch: 7732, sum loss: 7076.360840, avg loss: 4.189675, ppl: 66.001328 +epoch: 0, batch: 7733, sum loss: 7449.971680, avg loss: 4.064360, ppl: 58.227638 +epoch: 0, batch: 7734, sum loss: 5348.383789, avg loss: 3.753252, ppl: 42.659565 +epoch: 0, batch: 7735, sum loss: 7332.543945, avg loss: 4.187633, ppl: 65.866669 +epoch: 0, batch: 7736, sum loss: 6688.010254, avg loss: 4.284440, ppl: 72.561867 +epoch: 0, batch: 7737, sum loss: 6979.559082, avg loss: 4.258425, ppl: 70.698563 +epoch: 0, batch: 7738, sum loss: 7476.780762, avg loss: 4.224170, ppl: 68.317787 +epoch: 0, batch: 7739, sum loss: 6830.946289, avg loss: 4.175395, ppl: 65.065536 +epoch: 0, batch: 7740, sum loss: 5767.645020, avg loss: 3.980432, ppl: 53.540134 +epoch: 0, batch: 7741, sum loss: 7534.387695, avg loss: 4.237564, ppl: 69.238953 +epoch: 0, batch: 7742, sum loss: 6399.143066, avg loss: 4.193410, ppl: 66.248306 +epoch: 0, batch: 7743, sum loss: 6454.523438, avg loss: 4.019006, ppl: 55.645756 +epoch: 0, batch: 7744, sum loss: 6196.438477, avg loss: 3.877621, ppl: 48.309158 +epoch: 0, batch: 7745, sum loss: 6521.422363, avg loss: 3.752257, ppl: 42.617153 +epoch: 0, batch: 7746, sum loss: 6437.212402, avg loss: 3.859240, ppl: 47.429295 +epoch: 0, batch: 7747, sum loss: 7397.150391, avg loss: 4.073320, ppl: 58.751717 +epoch: 0, batch: 7748, sum loss: 6217.113281, avg loss: 3.593707, ppl: 36.368649 +epoch: 0, batch: 7749, sum loss: 6271.681152, avg loss: 3.775846, ppl: 43.634430 +epoch: 0, batch: 7750, sum loss: 7583.493652, avg loss: 3.843636, ppl: 46.694950 +epoch: 0, batch: 7751, sum loss: 6143.653320, avg loss: 3.976475, ppl: 53.328705 +epoch: 0, batch: 7752, sum loss: 6477.140625, avg loss: 3.801139, ppl: 44.752121 +epoch: 0, batch: 7753, sum loss: 6957.192383, avg loss: 4.005292, ppl: 54.887871 +epoch: 0, batch: 7754, sum loss: 6790.304199, avg loss: 3.909214, ppl: 49.859737 +epoch: 0, batch: 7755, sum loss: 7010.938965, avg loss: 3.882026, ppl: 48.522419 +epoch: 0, batch: 7756, sum loss: 7273.572266, avg loss: 3.877171, ppl: 48.287407 +epoch: 0, batch: 7757, sum loss: 7283.028320, avg loss: 4.068731, ppl: 58.482693 +epoch: 0, batch: 7758, sum loss: 7805.684570, avg loss: 4.054901, ppl: 57.679459 +epoch: 0, batch: 7759, sum loss: 7084.594238, avg loss: 4.189589, ppl: 65.995628 +epoch: 0, batch: 7760, sum loss: 6589.502441, avg loss: 4.173213, ppl: 64.923683 +epoch: 0, batch: 7761, sum loss: 7480.540527, avg loss: 4.112447, ppl: 61.096024 +epoch: 0, batch: 7762, sum loss: 6064.439453, avg loss: 3.704606, ppl: 40.634026 +epoch: 0, batch: 7763, sum loss: 6495.062988, avg loss: 3.915047, ppl: 50.151424 +epoch: 0, batch: 7764, sum loss: 6024.546875, avg loss: 3.904438, ppl: 49.622162 +epoch: 0, batch: 7765, sum loss: 7163.975586, avg loss: 3.979986, ppl: 53.516308 +epoch: 0, batch: 7766, sum loss: 5230.719727, avg loss: 3.834838, ppl: 46.285946 +epoch: 0, batch: 7767, sum loss: 6693.075195, avg loss: 3.976872, ppl: 53.349880 +epoch: 0, batch: 7768, sum loss: 6850.308594, avg loss: 4.126692, ppl: 61.972569 +epoch: 0, batch: 7769, sum loss: 7370.287109, avg loss: 4.096880, ppl: 60.152317 +epoch: 0, batch: 7770, sum loss: 6510.125488, avg loss: 4.076472, ppl: 58.937160 +epoch: 0, batch: 7771, sum loss: 7555.626953, avg loss: 4.259090, ppl: 70.745605 +epoch: 0, batch: 7772, sum loss: 6694.207031, avg loss: 3.968113, ppl: 52.884666 +epoch: 0, batch: 7773, sum loss: 7377.839844, avg loss: 4.082922, ppl: 59.318516 +epoch: 0, batch: 7774, sum loss: 5277.059570, avg loss: 3.843452, ppl: 46.686344 +epoch: 0, batch: 7775, sum loss: 7430.799805, avg loss: 4.193454, ppl: 66.251213 +epoch: 0, batch: 7776, sum loss: 6503.846191, avg loss: 3.862142, ppl: 47.567112 +epoch: 0, batch: 7777, sum loss: 6684.722656, avg loss: 4.046443, ppl: 57.193630 +epoch: 0, batch: 7778, sum loss: 6433.637695, avg loss: 4.041230, ppl: 56.896267 +epoch: 0, batch: 7779, sum loss: 6007.239258, avg loss: 3.612291, ppl: 37.050823 +epoch: 0, batch: 7780, sum loss: 7293.272949, avg loss: 3.734395, ppl: 41.862682 +epoch: 0, batch: 7781, sum loss: 7015.966797, avg loss: 4.067227, ppl: 58.394829 +epoch: 0, batch: 7782, sum loss: 5518.584961, avg loss: 3.811177, ppl: 45.203636 +epoch: 0, batch: 7783, sum loss: 7277.709961, avg loss: 4.236153, ppl: 69.141327 +epoch: 0, batch: 7784, sum loss: 5955.941406, avg loss: 4.032459, ppl: 56.399414 +epoch: 0, batch: 7785, sum loss: 6473.696777, avg loss: 3.722655, ppl: 41.374100 +epoch: 0, batch: 7786, sum loss: 6484.919922, avg loss: 4.048015, ppl: 57.283615 +epoch: 0, batch: 7787, sum loss: 6971.040039, avg loss: 4.139573, ppl: 62.775986 +epoch: 0, batch: 7788, sum loss: 7894.629883, avg loss: 4.398123, ppl: 81.298111 +epoch: 0, batch: 7789, sum loss: 7387.430176, avg loss: 3.942065, ppl: 51.524891 +epoch: 0, batch: 7790, sum loss: 7511.055664, avg loss: 4.012316, ppl: 55.274750 +epoch: 0, batch: 7791, sum loss: 6529.774902, avg loss: 4.030725, ppl: 56.301743 +epoch: 0, batch: 7792, sum loss: 6494.333496, avg loss: 3.902845, ppl: 49.543194 +epoch: 0, batch: 7793, sum loss: 6716.502930, avg loss: 3.898144, ppl: 49.310867 +epoch: 0, batch: 7794, sum loss: 7431.943848, avg loss: 4.052314, ppl: 57.530418 +epoch: 0, batch: 7795, sum loss: 6551.700684, avg loss: 3.847152, ppl: 46.859440 +epoch: 0, batch: 7796, sum loss: 8909.200195, avg loss: 4.380137, ppl: 79.849007 +epoch: 0, batch: 7797, sum loss: 7244.692383, avg loss: 3.897091, ppl: 49.258942 +epoch: 0, batch: 7798, sum loss: 6538.462891, avg loss: 4.008868, ppl: 55.084465 +epoch: 0, batch: 7799, sum loss: 6271.199219, avg loss: 3.929323, ppl: 50.872513 +epoch: 0, batch: 7800, sum loss: 6586.723633, avg loss: 3.909035, ppl: 49.850811 +epoch: 0, batch: 7801, sum loss: 6430.698730, avg loss: 3.771671, ppl: 43.452618 +epoch: 0, batch: 7802, sum loss: 6816.545410, avg loss: 3.897396, ppl: 49.273964 +epoch: 0, batch: 7803, sum loss: 7705.168945, avg loss: 4.091964, ppl: 59.857349 +epoch: 0, batch: 7804, sum loss: 7173.518066, avg loss: 4.110898, ppl: 61.001503 +epoch: 0, batch: 7805, sum loss: 7501.711914, avg loss: 3.977578, ppl: 53.387581 +epoch: 0, batch: 7806, sum loss: 5543.827148, avg loss: 3.934583, ppl: 51.140816 +epoch: 0, batch: 7807, sum loss: 7491.736328, avg loss: 4.107312, ppl: 60.783096 +epoch: 0, batch: 7808, sum loss: 7542.012207, avg loss: 3.928132, ppl: 50.811951 +epoch: 0, batch: 7809, sum loss: 7109.113770, avg loss: 4.027826, ppl: 56.138752 +epoch: 0, batch: 7810, sum loss: 5962.248535, avg loss: 4.067018, ppl: 58.382607 +epoch: 0, batch: 7811, sum loss: 6047.752441, avg loss: 4.156531, ppl: 63.849636 +epoch: 0, batch: 7812, sum loss: 7197.695312, avg loss: 3.935318, ppl: 51.178398 +epoch: 0, batch: 7813, sum loss: 6797.909180, avg loss: 3.849326, ppl: 46.961384 +epoch: 0, batch: 7814, sum loss: 6992.367676, avg loss: 4.297706, ppl: 73.530930 +epoch: 0, batch: 7815, sum loss: 7312.779785, avg loss: 4.002616, ppl: 54.741188 +epoch: 0, batch: 7816, sum loss: 6174.744141, avg loss: 4.001779, ppl: 54.695343 +epoch: 0, batch: 7817, sum loss: 6745.979492, avg loss: 4.161616, ppl: 64.175133 +epoch: 0, batch: 7818, sum loss: 6867.505371, avg loss: 3.958216, ppl: 52.363850 +epoch: 0, batch: 7819, sum loss: 6969.807617, avg loss: 3.998742, ppl: 54.529514 +epoch: 0, batch: 7820, sum loss: 6216.709473, avg loss: 4.103439, ppl: 60.548145 +epoch: 0, batch: 7821, sum loss: 6764.808105, avg loss: 3.960660, ppl: 52.491970 +epoch: 0, batch: 7822, sum loss: 6789.897949, avg loss: 4.085378, ppl: 59.464420 +epoch: 0, batch: 7823, sum loss: 6954.773926, avg loss: 4.071881, ppl: 58.667229 +epoch: 0, batch: 7824, sum loss: 7535.515625, avg loss: 4.117768, ppl: 61.422012 +epoch: 0, batch: 7825, sum loss: 6970.691406, avg loss: 3.958371, ppl: 52.371937 +epoch: 0, batch: 7826, sum loss: 5818.433105, avg loss: 3.792981, ppl: 44.388531 +epoch: 0, batch: 7827, sum loss: 6768.542969, avg loss: 4.092227, ppl: 59.873081 +epoch: 0, batch: 7828, sum loss: 5955.533203, avg loss: 3.892505, ppl: 49.033581 +epoch: 0, batch: 7829, sum loss: 7370.679688, avg loss: 4.138506, ppl: 62.709091 +epoch: 0, batch: 7830, sum loss: 6348.342285, avg loss: 3.840497, ppl: 46.548615 +epoch: 0, batch: 7831, sum loss: 6395.224609, avg loss: 3.994519, ppl: 54.299717 +epoch: 0, batch: 7832, sum loss: 6080.021484, avg loss: 3.743855, ppl: 42.260612 +epoch: 0, batch: 7833, sum loss: 7704.438477, avg loss: 4.040083, ppl: 56.831055 +epoch: 0, batch: 7834, sum loss: 6202.514160, avg loss: 4.040726, ppl: 56.867596 +epoch: 0, batch: 7835, sum loss: 7510.709473, avg loss: 4.077476, ppl: 58.996349 +epoch: 0, batch: 7836, sum loss: 7795.139160, avg loss: 4.243408, ppl: 69.644775 +epoch: 0, batch: 7837, sum loss: 6683.225586, avg loss: 3.847568, ppl: 46.878918 +epoch: 0, batch: 7838, sum loss: 7080.489258, avg loss: 3.984518, ppl: 53.759388 +epoch: 0, batch: 7839, sum loss: 6761.540039, avg loss: 3.991464, ppl: 54.134079 +epoch: 0, batch: 7840, sum loss: 6070.693848, avg loss: 3.813250, ppl: 45.297409 +epoch: 0, batch: 7841, sum loss: 6274.501465, avg loss: 3.667155, ppl: 39.140385 +epoch: 0, batch: 7842, sum loss: 7217.074707, avg loss: 3.863530, ppl: 47.633221 +epoch: 0, batch: 7843, sum loss: 7618.041992, avg loss: 3.939008, ppl: 51.367630 +epoch: 0, batch: 7844, sum loss: 6494.166992, avg loss: 3.860979, ppl: 47.511845 +epoch: 0, batch: 7845, sum loss: 6130.526367, avg loss: 3.812516, ppl: 45.264191 +epoch: 0, batch: 7846, sum loss: 7458.332031, avg loss: 4.093486, ppl: 59.948528 +epoch: 0, batch: 7847, sum loss: 6402.728516, avg loss: 3.733369, ppl: 41.819775 +epoch: 0, batch: 7848, sum loss: 6319.399414, avg loss: 3.841580, ppl: 46.599037 +epoch: 0, batch: 7849, sum loss: 6235.807129, avg loss: 3.842149, ppl: 46.625546 +epoch: 0, batch: 7850, sum loss: 6446.293457, avg loss: 3.964510, ppl: 52.694443 +epoch: 0, batch: 7851, sum loss: 5709.891113, avg loss: 3.913565, ppl: 50.077156 +epoch: 0, batch: 7852, sum loss: 6173.696289, avg loss: 3.970223, ppl: 52.996346 +epoch: 0, batch: 7853, sum loss: 6289.924316, avg loss: 3.909213, ppl: 49.859711 +epoch: 0, batch: 7854, sum loss: 6352.330078, avg loss: 3.871011, ppl: 47.990906 +epoch: 0, batch: 7855, sum loss: 5726.192383, avg loss: 3.971007, ppl: 53.037933 +epoch: 0, batch: 7856, sum loss: 7374.947266, avg loss: 4.041067, ppl: 56.887016 +epoch: 0, batch: 7857, sum loss: 6871.106934, avg loss: 3.783649, ppl: 43.976215 +epoch: 0, batch: 7858, sum loss: 8449.701172, avg loss: 4.158318, ppl: 63.963818 +epoch: 0, batch: 7859, sum loss: 5539.956055, avg loss: 3.954287, ppl: 52.158482 +epoch: 0, batch: 7860, sum loss: 7551.689941, avg loss: 4.195384, ppl: 66.379189 +epoch: 0, batch: 7861, sum loss: 6726.540039, avg loss: 4.359391, ppl: 78.209503 +epoch: 0, batch: 7862, sum loss: 6432.074707, avg loss: 3.905328, ppl: 49.666348 +epoch: 0, batch: 7863, sum loss: 7314.399902, avg loss: 3.932473, ppl: 51.033035 +epoch: 0, batch: 7864, sum loss: 6590.242188, avg loss: 4.083174, ppl: 59.333481 +epoch: 0, batch: 7865, sum loss: 6355.175781, avg loss: 3.989439, ppl: 54.024559 +epoch: 0, batch: 7866, sum loss: 7043.358887, avg loss: 3.912977, ppl: 50.047733 +epoch: 0, batch: 7867, sum loss: 7829.102539, avg loss: 4.351919, ppl: 77.627304 +epoch: 0, batch: 7868, sum loss: 6455.481934, avg loss: 3.992259, ppl: 54.177113 +epoch: 0, batch: 7869, sum loss: 8018.973145, avg loss: 4.133492, ppl: 62.395397 +epoch: 0, batch: 7870, sum loss: 7304.275391, avg loss: 4.087451, ppl: 59.587837 +epoch: 0, batch: 7871, sum loss: 6243.248047, avg loss: 4.009793, ppl: 55.135471 +epoch: 0, batch: 7872, sum loss: 8429.832031, avg loss: 4.128223, ppl: 62.067558 +epoch: 0, batch: 7873, sum loss: 6812.925781, avg loss: 3.808231, ppl: 45.070656 +epoch: 0, batch: 7874, sum loss: 6226.317383, avg loss: 3.789603, ppl: 44.238811 +epoch: 0, batch: 7875, sum loss: 6698.422852, avg loss: 3.924091, ppl: 50.607048 +epoch: 0, batch: 7876, sum loss: 6660.255859, avg loss: 3.899447, ppl: 49.375134 +epoch: 0, batch: 7877, sum loss: 6027.084961, avg loss: 4.053184, ppl: 57.580505 +epoch: 0, batch: 7878, sum loss: 7028.700684, avg loss: 4.208803, ppl: 67.275940 +epoch: 0, batch: 7879, sum loss: 7746.911621, avg loss: 4.003572, ppl: 54.793495 +epoch: 0, batch: 7880, sum loss: 6075.667969, avg loss: 3.748099, ppl: 42.440311 +epoch: 0, batch: 7881, sum loss: 5675.671875, avg loss: 3.840103, ppl: 46.530262 +epoch: 0, batch: 7882, sum loss: 5979.165039, avg loss: 3.840183, ppl: 46.533989 +epoch: 0, batch: 7883, sum loss: 7527.824219, avg loss: 4.193774, ppl: 66.272446 +epoch: 0, batch: 7884, sum loss: 7210.302734, avg loss: 3.870264, ppl: 47.955059 +epoch: 0, batch: 7885, sum loss: 4928.745117, avg loss: 3.725431, ppl: 41.489109 +epoch: 0, batch: 7886, sum loss: 7923.255859, avg loss: 4.086259, ppl: 59.516815 +epoch: 0, batch: 7887, sum loss: 7227.558105, avg loss: 3.995334, ppl: 54.343971 +epoch: 0, batch: 7888, sum loss: 6143.380371, avg loss: 4.128616, ppl: 62.091919 +epoch: 0, batch: 7889, sum loss: 6872.822266, avg loss: 3.905013, ppl: 49.650707 +epoch: 0, batch: 7890, sum loss: 5312.612793, avg loss: 3.762474, ppl: 43.054794 +epoch: 0, batch: 7891, sum loss: 6341.219727, avg loss: 3.953379, ppl: 52.111149 +epoch: 0, batch: 7892, sum loss: 7292.108398, avg loss: 4.217530, ppl: 67.865631 +epoch: 0, batch: 7893, sum loss: 5244.280762, avg loss: 3.641862, ppl: 38.162819 +epoch: 0, batch: 7894, sum loss: 6005.647949, avg loss: 3.796237, ppl: 44.533310 +epoch: 0, batch: 7895, sum loss: 7542.087891, avg loss: 4.134917, ppl: 62.484390 +epoch: 0, batch: 7896, sum loss: 6807.187500, avg loss: 3.841528, ppl: 46.596619 +epoch: 0, batch: 7897, sum loss: 6483.198242, avg loss: 3.795783, ppl: 44.513088 +epoch: 0, batch: 7898, sum loss: 5162.537109, avg loss: 3.727464, ppl: 41.573532 +epoch: 0, batch: 7899, sum loss: 6678.100586, avg loss: 4.037546, ppl: 56.687042 +epoch: 0, batch: 7900, sum loss: 6569.478027, avg loss: 3.891871, ppl: 49.002483 +epoch: 0, batch: 7901, sum loss: 7778.204102, avg loss: 4.030158, ppl: 56.269775 +epoch: 0, batch: 7902, sum loss: 6133.008301, avg loss: 3.849974, ppl: 46.991837 +epoch: 0, batch: 7903, sum loss: 6707.389648, avg loss: 3.945524, ppl: 51.703400 +epoch: 0, batch: 7904, sum loss: 6470.868164, avg loss: 4.009212, ppl: 55.103432 +epoch: 0, batch: 7905, sum loss: 7538.546875, avg loss: 3.897904, ppl: 49.299019 +epoch: 0, batch: 7906, sum loss: 6401.788086, avg loss: 3.998619, ppl: 54.522781 +epoch: 0, batch: 7907, sum loss: 7799.013672, avg loss: 4.184020, ppl: 65.629158 +epoch: 0, batch: 7908, sum loss: 7180.909180, avg loss: 3.943388, ppl: 51.593090 +epoch: 0, batch: 7909, sum loss: 6915.766602, avg loss: 4.072890, ppl: 58.726425 +epoch: 0, batch: 7910, sum loss: 7895.097168, avg loss: 4.177300, ppl: 65.189606 +epoch: 0, batch: 7911, sum loss: 6989.936523, avg loss: 4.097266, ppl: 60.175556 +epoch: 0, batch: 7912, sum loss: 7274.487305, avg loss: 4.052639, ppl: 57.549156 +epoch: 0, batch: 7913, sum loss: 6256.152832, avg loss: 3.947099, ppl: 51.784943 +epoch: 0, batch: 7914, sum loss: 7367.481445, avg loss: 3.933519, ppl: 51.086452 +epoch: 0, batch: 7915, sum loss: 6836.536133, avg loss: 3.834289, ppl: 46.260506 +epoch: 0, batch: 7916, sum loss: 7768.104980, avg loss: 4.035379, ppl: 56.564350 +epoch: 0, batch: 7917, sum loss: 7026.310059, avg loss: 4.040431, ppl: 56.850868 +epoch: 0, batch: 7918, sum loss: 6080.464844, avg loss: 3.920351, ppl: 50.418152 +epoch: 0, batch: 7919, sum loss: 6756.483398, avg loss: 4.129880, ppl: 62.170460 +epoch: 0, batch: 7920, sum loss: 8242.822266, avg loss: 4.088702, ppl: 59.662384 +epoch: 0, batch: 7921, sum loss: 7485.669922, avg loss: 4.193653, ppl: 66.264389 +epoch: 0, batch: 7922, sum loss: 5845.630859, avg loss: 3.923242, ppl: 50.564125 +epoch: 0, batch: 7923, sum loss: 7050.887207, avg loss: 4.113703, ppl: 61.172836 +epoch: 0, batch: 7924, sum loss: 6582.770508, avg loss: 3.851826, ppl: 47.078949 +epoch: 0, batch: 7925, sum loss: 7802.776367, avg loss: 4.055497, ppl: 57.713848 +epoch: 0, batch: 7926, sum loss: 7463.251465, avg loss: 4.025486, ppl: 56.007523 +epoch: 0, batch: 7927, sum loss: 6166.684082, avg loss: 4.025251, ppl: 55.994358 +epoch: 0, batch: 7928, sum loss: 6124.676270, avg loss: 3.682908, ppl: 39.761864 +epoch: 0, batch: 7929, sum loss: 5569.691406, avg loss: 3.604978, ppl: 36.780888 +epoch: 0, batch: 7930, sum loss: 5826.604980, avg loss: 3.742200, ppl: 42.190704 +epoch: 0, batch: 7931, sum loss: 5512.710938, avg loss: 3.600726, ppl: 36.624809 +epoch: 0, batch: 7932, sum loss: 7612.941406, avg loss: 4.257797, ppl: 70.654175 +epoch: 0, batch: 7933, sum loss: 6574.062500, avg loss: 4.008574, ppl: 55.068314 +epoch: 0, batch: 7934, sum loss: 5802.596680, avg loss: 3.700636, ppl: 40.473022 +epoch: 0, batch: 7935, sum loss: 6328.702148, avg loss: 3.805594, ppl: 44.951931 +epoch: 0, batch: 7936, sum loss: 7051.428711, avg loss: 3.851135, ppl: 47.046455 +epoch: 0, batch: 7937, sum loss: 6845.180664, avg loss: 3.869520, ppl: 47.919376 +epoch: 0, batch: 7938, sum loss: 5670.362793, avg loss: 3.844314, ppl: 46.726612 +epoch: 0, batch: 7939, sum loss: 6202.011230, avg loss: 3.779409, ppl: 43.790173 +epoch: 0, batch: 7940, sum loss: 6923.621094, avg loss: 3.947332, ppl: 51.797009 +epoch: 0, batch: 7941, sum loss: 5354.126465, avg loss: 4.004582, ppl: 54.848915 +epoch: 0, batch: 7942, sum loss: 6009.026367, avg loss: 3.914675, ppl: 50.132774 +epoch: 0, batch: 7943, sum loss: 6303.016602, avg loss: 3.855056, ppl: 47.231262 +epoch: 0, batch: 7944, sum loss: 6434.416016, avg loss: 3.947495, ppl: 51.805405 +epoch: 0, batch: 7945, sum loss: 5805.282227, avg loss: 3.949172, ppl: 51.892357 +epoch: 0, batch: 7946, sum loss: 5340.752930, avg loss: 3.648055, ppl: 38.399910 +epoch: 0, batch: 7947, sum loss: 6663.518066, avg loss: 3.949922, ppl: 51.931309 +epoch: 0, batch: 7948, sum loss: 6520.050781, avg loss: 3.927742, ppl: 50.792137 +epoch: 0, batch: 7949, sum loss: 5906.960938, avg loss: 3.850692, ppl: 47.025574 +epoch: 0, batch: 7950, sum loss: 5750.860840, avg loss: 3.960648, ppl: 52.491333 +epoch: 0, batch: 7951, sum loss: 6106.910156, avg loss: 3.962953, ppl: 52.612453 +epoch: 0, batch: 7952, sum loss: 6401.835449, avg loss: 3.865843, ppl: 47.743484 +epoch: 0, batch: 7953, sum loss: 7084.304688, avg loss: 4.179531, ppl: 65.335182 +epoch: 0, batch: 7954, sum loss: 6416.300781, avg loss: 4.136880, ppl: 62.607178 +epoch: 0, batch: 7955, sum loss: 7356.461426, avg loss: 4.059857, ppl: 57.966042 +epoch: 0, batch: 7956, sum loss: 6798.077148, avg loss: 4.137600, ppl: 62.652271 +epoch: 0, batch: 7957, sum loss: 6347.116211, avg loss: 4.019706, ppl: 55.684746 +epoch: 0, batch: 7958, sum loss: 7315.796875, avg loss: 4.178068, ppl: 65.239700 +epoch: 0, batch: 7959, sum loss: 5180.874023, avg loss: 3.732618, ppl: 41.788372 +epoch: 0, batch: 7960, sum loss: 6770.741211, avg loss: 3.728382, ppl: 41.611710 +epoch: 0, batch: 7961, sum loss: 6236.907227, avg loss: 3.888346, ppl: 48.830063 +epoch: 0, batch: 7962, sum loss: 6843.814453, avg loss: 4.013968, ppl: 55.366100 +epoch: 0, batch: 7963, sum loss: 6080.721680, avg loss: 4.089254, ppl: 59.695366 +epoch: 0, batch: 7964, sum loss: 6608.518555, avg loss: 4.032043, ppl: 56.375969 +epoch: 0, batch: 7965, sum loss: 7294.342285, avg loss: 4.070504, ppl: 58.586468 +epoch: 0, batch: 7966, sum loss: 5948.932617, avg loss: 3.968601, ppl: 52.910458 +epoch: 0, batch: 7967, sum loss: 7215.314453, avg loss: 4.083370, ppl: 59.345112 +epoch: 0, batch: 7968, sum loss: 7155.822266, avg loss: 4.239232, ppl: 69.354538 +epoch: 0, batch: 7969, sum loss: 6446.128906, avg loss: 3.949834, ppl: 51.926750 +epoch: 0, batch: 7970, sum loss: 7293.798340, avg loss: 4.099943, ppl: 60.336830 +epoch: 0, batch: 7971, sum loss: 6397.011719, avg loss: 3.789699, ppl: 44.243084 +epoch: 0, batch: 7972, sum loss: 6402.443359, avg loss: 3.944820, ppl: 51.667046 +epoch: 0, batch: 7973, sum loss: 5758.492188, avg loss: 3.783504, ppl: 43.969822 +epoch: 0, batch: 7974, sum loss: 7512.148438, avg loss: 3.962104, ppl: 52.567802 +epoch: 0, batch: 7975, sum loss: 7540.685547, avg loss: 4.219746, ppl: 68.016182 +epoch: 0, batch: 7976, sum loss: 5248.961426, avg loss: 3.605056, ppl: 36.783745 +epoch: 0, batch: 7977, sum loss: 6168.823242, avg loss: 3.906791, ppl: 49.739094 +epoch: 0, batch: 7978, sum loss: 7854.383301, avg loss: 4.175642, ppl: 65.081642 +epoch: 0, batch: 7979, sum loss: 5954.244629, avg loss: 3.730730, ppl: 41.709526 +epoch: 0, batch: 7980, sum loss: 7046.541992, avg loss: 4.103985, ppl: 60.581215 +epoch: 0, batch: 7981, sum loss: 6448.967773, avg loss: 4.015546, ppl: 55.453583 +epoch: 0, batch: 7982, sum loss: 6883.938965, avg loss: 3.990690, ppl: 54.092175 +epoch: 0, batch: 7983, sum loss: 7430.243652, avg loss: 4.160271, ppl: 64.088898 +epoch: 0, batch: 7984, sum loss: 7977.791992, avg loss: 3.984911, ppl: 53.780502 +epoch: 0, batch: 7985, sum loss: 7625.943359, avg loss: 4.362668, ppl: 78.466209 +epoch: 0, batch: 7986, sum loss: 6264.048828, avg loss: 4.038716, ppl: 56.753414 +epoch: 0, batch: 7987, sum loss: 6324.401855, avg loss: 3.872873, ppl: 48.080326 +epoch: 0, batch: 7988, sum loss: 7078.784668, avg loss: 3.880913, ppl: 48.468437 +epoch: 0, batch: 7989, sum loss: 6217.427734, avg loss: 3.866560, ppl: 47.777725 +epoch: 0, batch: 7990, sum loss: 7554.995117, avg loss: 3.941051, ppl: 51.472694 +epoch: 0, batch: 7991, sum loss: 7069.667480, avg loss: 3.809088, ppl: 45.109280 +epoch: 0, batch: 7992, sum loss: 5848.870605, avg loss: 3.935983, ppl: 51.212463 +epoch: 0, batch: 7993, sum loss: 6994.161133, avg loss: 4.131224, ppl: 62.254055 +epoch: 0, batch: 7994, sum loss: 5988.762695, avg loss: 3.960822, ppl: 52.500469 +epoch: 0, batch: 7995, sum loss: 6894.928711, avg loss: 3.924262, ppl: 50.615711 +epoch: 0, batch: 7996, sum loss: 5538.253906, avg loss: 3.958723, ppl: 52.390396 +epoch: 0, batch: 7997, sum loss: 6068.689941, avg loss: 3.734578, ppl: 41.870369 +epoch: 0, batch: 7998, sum loss: 7307.302246, avg loss: 4.295886, ppl: 73.397217 +epoch: 0, batch: 7999, sum loss: 6236.083008, avg loss: 3.984718, ppl: 53.770103 +epoch: 0, batch: 8000, sum loss: 7616.900879, avg loss: 4.019473, ppl: 55.671738 +epoch: 0, batch: 8001, sum loss: 8164.718750, avg loss: 4.313111, ppl: 74.672424 +epoch: 0, batch: 8002, sum loss: 7695.305664, avg loss: 4.121749, ppl: 61.667000 +epoch: 0, batch: 8003, sum loss: 6494.741699, avg loss: 3.728325, ppl: 41.609341 +epoch: 0, batch: 8004, sum loss: 7686.573242, avg loss: 3.978558, ppl: 53.439896 +epoch: 0, batch: 8005, sum loss: 6926.379883, avg loss: 3.978392, ppl: 53.431030 +epoch: 0, batch: 8006, sum loss: 6175.542480, avg loss: 3.981652, ppl: 53.605518 +epoch: 0, batch: 8007, sum loss: 7510.601562, avg loss: 4.138072, ppl: 62.681885 +epoch: 0, batch: 8008, sum loss: 6544.822266, avg loss: 4.124022, ppl: 61.807331 +epoch: 0, batch: 8009, sum loss: 6331.862305, avg loss: 3.925519, ppl: 50.679386 +epoch: 0, batch: 8010, sum loss: 7052.748047, avg loss: 4.044007, ppl: 57.054493 +epoch: 0, batch: 8011, sum loss: 7523.697266, avg loss: 4.040654, ppl: 56.863503 +epoch: 0, batch: 8012, sum loss: 6837.931641, avg loss: 4.058120, ppl: 57.865410 +epoch: 0, batch: 8013, sum loss: 6754.542480, avg loss: 3.835629, ppl: 46.322544 +epoch: 0, batch: 8014, sum loss: 5328.450684, avg loss: 3.852820, ppl: 47.125790 +epoch: 0, batch: 8015, sum loss: 6813.044922, avg loss: 3.897623, ppl: 49.285164 +epoch: 0, batch: 8016, sum loss: 6012.621094, avg loss: 3.881615, ppl: 48.502468 +epoch: 0, batch: 8017, sum loss: 7617.340332, avg loss: 4.160208, ppl: 64.084831 +epoch: 0, batch: 8018, sum loss: 7018.749512, avg loss: 3.983399, ppl: 53.699268 +epoch: 0, batch: 8019, sum loss: 6541.052246, avg loss: 4.072884, ppl: 58.726089 +epoch: 0, batch: 8020, sum loss: 5980.719238, avg loss: 3.629077, ppl: 37.678032 +epoch: 0, batch: 8021, sum loss: 6178.580078, avg loss: 3.932896, ppl: 51.054638 +epoch: 0, batch: 8022, sum loss: 7707.140625, avg loss: 4.099543, ppl: 60.312695 +epoch: 0, batch: 8023, sum loss: 7050.239746, avg loss: 3.756121, ppl: 42.782169 +epoch: 0, batch: 8024, sum loss: 6882.477051, avg loss: 4.055673, ppl: 57.724007 +epoch: 0, batch: 8025, sum loss: 6151.449219, avg loss: 3.568126, ppl: 35.450104 +epoch: 0, batch: 8026, sum loss: 6738.999023, avg loss: 3.994665, ppl: 54.307625 +epoch: 0, batch: 8027, sum loss: 6734.002930, avg loss: 3.823965, ppl: 45.785400 +epoch: 0, batch: 8028, sum loss: 7092.573242, avg loss: 3.984592, ppl: 53.763336 +epoch: 0, batch: 8029, sum loss: 6180.212891, avg loss: 3.966761, ppl: 52.813171 +epoch: 0, batch: 8030, sum loss: 7494.629395, avg loss: 4.224706, ppl: 68.354416 +epoch: 0, batch: 8031, sum loss: 6896.485840, avg loss: 4.169580, ppl: 64.688309 +epoch: 0, batch: 8032, sum loss: 7183.804688, avg loss: 4.038114, ppl: 56.719273 +epoch: 0, batch: 8033, sum loss: 6308.197754, avg loss: 3.576076, ppl: 35.733040 +epoch: 0, batch: 8034, sum loss: 7914.899414, avg loss: 4.098860, ppl: 60.271553 +epoch: 0, batch: 8035, sum loss: 6865.171387, avg loss: 4.266732, ppl: 71.288269 +epoch: 0, batch: 8036, sum loss: 7070.108398, avg loss: 4.195910, ppl: 66.414139 +epoch: 0, batch: 8037, sum loss: 6905.092773, avg loss: 4.059431, ppl: 57.941338 +epoch: 0, batch: 8038, sum loss: 6742.449219, avg loss: 3.815761, ppl: 45.411293 +epoch: 0, batch: 8039, sum loss: 7066.041016, avg loss: 4.037738, ppl: 56.697937 +epoch: 0, batch: 8040, sum loss: 5779.190918, avg loss: 3.678670, ppl: 39.593716 +epoch: 0, batch: 8041, sum loss: 7311.148438, avg loss: 3.995163, ppl: 54.334709 +epoch: 0, batch: 8042, sum loss: 7749.405273, avg loss: 3.992481, ppl: 54.189152 +epoch: 0, batch: 8043, sum loss: 7254.891602, avg loss: 4.122097, ppl: 61.688499 +epoch: 0, batch: 8044, sum loss: 5493.270508, avg loss: 3.711669, ppl: 40.922066 +epoch: 0, batch: 8045, sum loss: 6891.090332, avg loss: 4.141280, ppl: 62.883270 +epoch: 0, batch: 8046, sum loss: 7419.463867, avg loss: 4.006190, ppl: 54.937153 +epoch: 0, batch: 8047, sum loss: 7414.718262, avg loss: 3.914846, ppl: 50.141357 +epoch: 0, batch: 8048, sum loss: 6312.002930, avg loss: 3.715128, ppl: 41.063862 +epoch: 0, batch: 8049, sum loss: 7443.313477, avg loss: 4.025589, ppl: 56.013264 +epoch: 0, batch: 8050, sum loss: 5859.629883, avg loss: 3.664559, ppl: 39.038910 +epoch: 0, batch: 8051, sum loss: 6147.931641, avg loss: 3.813853, ppl: 45.324753 +epoch: 0, batch: 8052, sum loss: 5816.235352, avg loss: 3.831512, ppl: 46.132248 +epoch: 0, batch: 8053, sum loss: 6388.228516, avg loss: 3.611209, ppl: 37.010777 +epoch: 0, batch: 8054, sum loss: 7183.415039, avg loss: 3.973128, ppl: 53.150517 +epoch: 0, batch: 8055, sum loss: 7372.773926, avg loss: 3.953230, ppl: 52.103386 +epoch: 0, batch: 8056, sum loss: 6700.737305, avg loss: 3.864324, ppl: 47.671017 +epoch: 0, batch: 8057, sum loss: 7516.265625, avg loss: 4.256096, ppl: 70.534073 +epoch: 0, batch: 8058, sum loss: 6421.435547, avg loss: 3.875338, ppl: 48.199001 +epoch: 0, batch: 8059, sum loss: 6719.171387, avg loss: 3.924750, ppl: 50.640396 +epoch: 0, batch: 8060, sum loss: 6575.722656, avg loss: 4.002265, ppl: 54.721981 +epoch: 0, batch: 8061, sum loss: 5613.946777, avg loss: 3.903996, ppl: 49.600281 +epoch: 0, batch: 8062, sum loss: 7630.477051, avg loss: 4.144746, ppl: 63.101582 +epoch: 0, batch: 8063, sum loss: 6803.802246, avg loss: 3.992842, ppl: 54.208717 +epoch: 0, batch: 8064, sum loss: 7078.519531, avg loss: 4.003688, ppl: 54.799873 +epoch: 0, batch: 8065, sum loss: 6605.562500, avg loss: 3.972076, ppl: 53.094650 +epoch: 0, batch: 8066, sum loss: 5126.365723, avg loss: 3.630571, ppl: 37.734344 +epoch: 0, batch: 8067, sum loss: 6340.175781, avg loss: 3.904049, ppl: 49.602894 +epoch: 0, batch: 8068, sum loss: 5723.429199, avg loss: 3.864571, ppl: 47.682804 +epoch: 0, batch: 8069, sum loss: 6894.626465, avg loss: 3.951075, ppl: 51.991245 +epoch: 0, batch: 8070, sum loss: 5160.178223, avg loss: 3.822354, ppl: 45.711700 +epoch: 0, batch: 8071, sum loss: 5699.322754, avg loss: 3.737261, ppl: 41.982834 +epoch: 0, batch: 8072, sum loss: 6176.961914, avg loss: 3.929365, ppl: 50.874672 +epoch: 0, batch: 8073, sum loss: 6403.187500, avg loss: 3.885429, ppl: 48.687843 +epoch: 0, batch: 8074, sum loss: 6304.986328, avg loss: 3.957932, ppl: 52.348980 +epoch: 0, batch: 8075, sum loss: 6903.669434, avg loss: 3.848199, ppl: 46.908524 +epoch: 0, batch: 8076, sum loss: 6935.612793, avg loss: 3.958683, ppl: 52.388298 +epoch: 0, batch: 8077, sum loss: 6659.591309, avg loss: 3.961684, ppl: 52.545750 +epoch: 0, batch: 8078, sum loss: 5543.003906, avg loss: 3.680614, ppl: 39.670727 +epoch: 0, batch: 8079, sum loss: 6253.455566, avg loss: 3.985631, ppl: 53.819252 +epoch: 0, batch: 8080, sum loss: 6829.563965, avg loss: 4.062798, ppl: 58.136753 +epoch: 0, batch: 8081, sum loss: 6753.945312, avg loss: 3.783723, ppl: 43.979477 +epoch: 0, batch: 8082, sum loss: 5703.784668, avg loss: 3.750023, ppl: 42.522057 +epoch: 0, batch: 8083, sum loss: 5873.589844, avg loss: 3.920954, ppl: 50.448563 +epoch: 0, batch: 8084, sum loss: 7233.752930, avg loss: 3.994342, ppl: 54.290112 +epoch: 0, batch: 8085, sum loss: 8507.306641, avg loss: 4.314050, ppl: 74.742561 +epoch: 0, batch: 8086, sum loss: 6052.803711, avg loss: 3.922750, ppl: 50.539261 +epoch: 0, batch: 8087, sum loss: 6851.126465, avg loss: 3.990173, ppl: 54.064220 +epoch: 0, batch: 8088, sum loss: 6873.589844, avg loss: 3.870265, ppl: 47.955070 +epoch: 0, batch: 8089, sum loss: 6356.638184, avg loss: 3.940879, ppl: 51.463837 +epoch: 0, batch: 8090, sum loss: 7024.884277, avg loss: 4.178991, ppl: 65.299950 +epoch: 0, batch: 8091, sum loss: 5882.958984, avg loss: 3.883141, ppl: 48.576569 +epoch: 0, batch: 8092, sum loss: 5967.496094, avg loss: 3.791293, ppl: 44.313679 +epoch: 0, batch: 8093, sum loss: 7817.673340, avg loss: 3.922566, ppl: 50.529922 +epoch: 0, batch: 8094, sum loss: 7527.416016, avg loss: 4.066675, ppl: 58.362595 +epoch: 0, batch: 8095, sum loss: 5923.166016, avg loss: 3.964636, ppl: 52.701077 +epoch: 0, batch: 8096, sum loss: 6447.780273, avg loss: 3.812999, ppl: 45.286026 +epoch: 0, batch: 8097, sum loss: 6821.517578, avg loss: 3.843108, ppl: 46.670307 +epoch: 0, batch: 8098, sum loss: 7093.377930, avg loss: 4.060319, ppl: 57.992832 +epoch: 0, batch: 8099, sum loss: 6584.474609, avg loss: 3.799466, ppl: 44.677315 +epoch: 0, batch: 8100, sum loss: 6539.314453, avg loss: 4.079422, ppl: 59.111320 +epoch: 0, batch: 8101, sum loss: 7859.331543, avg loss: 4.099808, ppl: 60.328716 +epoch: 0, batch: 8102, sum loss: 5892.333496, avg loss: 3.675816, ppl: 39.480873 +epoch: 0, batch: 8103, sum loss: 7271.053711, avg loss: 3.930299, ppl: 50.922215 +epoch: 0, batch: 8104, sum loss: 6890.625488, avg loss: 4.070068, ppl: 58.560966 +epoch: 0, batch: 8105, sum loss: 6293.068848, avg loss: 4.123899, ppl: 61.799698 +epoch: 0, batch: 8106, sum loss: 6944.517578, avg loss: 4.268296, ppl: 71.399849 +epoch: 0, batch: 8107, sum loss: 7271.377441, avg loss: 3.941126, ppl: 51.476524 +epoch: 0, batch: 8108, sum loss: 6232.485352, avg loss: 3.883168, ppl: 48.577877 +epoch: 0, batch: 8109, sum loss: 6091.378906, avg loss: 3.667296, ppl: 39.145920 +epoch: 0, batch: 8110, sum loss: 6712.548340, avg loss: 4.130799, ppl: 62.227612 +epoch: 0, batch: 8111, sum loss: 7573.946289, avg loss: 3.998916, ppl: 54.538994 +epoch: 0, batch: 8112, sum loss: 8273.131836, avg loss: 4.242631, ppl: 69.590736 +epoch: 0, batch: 8113, sum loss: 7642.993164, avg loss: 4.046052, ppl: 57.171326 +epoch: 0, batch: 8114, sum loss: 6003.155273, avg loss: 3.850645, ppl: 47.023388 +epoch: 0, batch: 8115, sum loss: 6273.948242, avg loss: 3.790905, ppl: 44.296490 +epoch: 0, batch: 8116, sum loss: 6162.618652, avg loss: 3.827713, ppl: 45.957336 +epoch: 0, batch: 8117, sum loss: 6858.321289, avg loss: 3.939300, ppl: 51.382626 +epoch: 0, batch: 8118, sum loss: 7287.711914, avg loss: 3.960713, ppl: 52.494736 +epoch: 0, batch: 8119, sum loss: 6824.305664, avg loss: 3.974552, ppl: 53.226261 +epoch: 0, batch: 8120, sum loss: 5214.523926, avg loss: 3.751456, ppl: 42.583038 +epoch: 0, batch: 8121, sum loss: 6346.446777, avg loss: 3.761972, ppl: 43.033211 +epoch: 0, batch: 8122, sum loss: 5645.712402, avg loss: 3.814671, ppl: 45.361809 +epoch: 0, batch: 8123, sum loss: 7543.432617, avg loss: 3.963969, ppl: 52.665932 +epoch: 0, batch: 8124, sum loss: 7853.607422, avg loss: 3.982559, ppl: 53.654182 +epoch: 0, batch: 8125, sum loss: 6575.028320, avg loss: 3.838312, ppl: 46.447002 +epoch: 0, batch: 8126, sum loss: 7648.696289, avg loss: 4.006651, ppl: 54.962490 +epoch: 0, batch: 8127, sum loss: 6549.270020, avg loss: 3.801085, ppl: 44.749733 +epoch: 0, batch: 8128, sum loss: 7017.342285, avg loss: 4.179477, ppl: 65.331688 +epoch: 0, batch: 8129, sum loss: 6044.533691, avg loss: 3.840237, ppl: 46.536510 +epoch: 0, batch: 8130, sum loss: 5842.696289, avg loss: 3.674652, ppl: 39.434917 +epoch: 0, batch: 8131, sum loss: 6396.805176, avg loss: 3.814434, ppl: 45.351063 +epoch: 0, batch: 8132, sum loss: 7044.329590, avg loss: 3.920050, ppl: 50.402962 +epoch: 0, batch: 8133, sum loss: 6943.174805, avg loss: 4.155102, ppl: 63.758453 +epoch: 0, batch: 8134, sum loss: 7190.114746, avg loss: 3.888650, ppl: 48.844921 +epoch: 0, batch: 8135, sum loss: 6454.784180, avg loss: 3.895464, ppl: 49.178875 +epoch: 0, batch: 8136, sum loss: 8250.369141, avg loss: 4.135523, ppl: 62.522301 +epoch: 0, batch: 8137, sum loss: 7314.174805, avg loss: 4.077021, ppl: 58.969517 +epoch: 0, batch: 8138, sum loss: 5576.538086, avg loss: 3.899677, ppl: 49.386497 +epoch: 0, batch: 8139, sum loss: 6457.746582, avg loss: 3.794211, ppl: 44.443142 +epoch: 0, batch: 8140, sum loss: 6961.182617, avg loss: 4.058999, ppl: 57.916283 +epoch: 0, batch: 8141, sum loss: 6709.115723, avg loss: 4.090924, ppl: 59.795132 +epoch: 0, batch: 8142, sum loss: 6056.591797, avg loss: 3.816378, ppl: 45.439335 +epoch: 0, batch: 8143, sum loss: 7224.899414, avg loss: 3.865650, ppl: 47.734276 +epoch: 0, batch: 8144, sum loss: 7550.548828, avg loss: 4.237121, ppl: 69.208290 +epoch: 0, batch: 8145, sum loss: 6954.667969, avg loss: 4.122506, ppl: 61.713711 +epoch: 0, batch: 8146, sum loss: 6990.904297, avg loss: 3.951896, ppl: 52.033951 +epoch: 0, batch: 8147, sum loss: 6275.527832, avg loss: 3.852381, ppl: 47.105064 +epoch: 0, batch: 8148, sum loss: 5616.298340, avg loss: 3.581823, ppl: 35.938992 +epoch: 0, batch: 8149, sum loss: 6305.404297, avg loss: 3.901859, ppl: 49.494377 +epoch: 0, batch: 8150, sum loss: 6597.673340, avg loss: 3.927187, ppl: 50.763966 +epoch: 0, batch: 8151, sum loss: 6174.723145, avg loss: 3.915487, ppl: 50.173515 +epoch: 0, batch: 8152, sum loss: 7039.319336, avg loss: 4.059584, ppl: 57.950207 +epoch: 0, batch: 8153, sum loss: 6844.575684, avg loss: 3.843108, ppl: 46.670319 +epoch: 0, batch: 8154, sum loss: 4831.561035, avg loss: 3.834572, ppl: 46.273632 +epoch: 0, batch: 8155, sum loss: 6171.852051, avg loss: 3.869500, ppl: 47.918407 +epoch: 0, batch: 8156, sum loss: 7401.251465, avg loss: 4.172070, ppl: 64.849525 +epoch: 0, batch: 8157, sum loss: 6086.974609, avg loss: 3.968041, ppl: 52.880821 +epoch: 0, batch: 8158, sum loss: 6407.792480, avg loss: 4.032594, ppl: 56.407055 +epoch: 0, batch: 8159, sum loss: 7380.675781, avg loss: 4.155786, ppl: 63.802097 +epoch: 0, batch: 8160, sum loss: 7013.687988, avg loss: 4.028540, ppl: 56.178837 +epoch: 0, batch: 8161, sum loss: 7698.328125, avg loss: 3.921716, ppl: 50.487003 +epoch: 0, batch: 8162, sum loss: 7418.209473, avg loss: 4.091677, ppl: 59.840141 +epoch: 0, batch: 8163, sum loss: 8883.756836, avg loss: 4.335655, ppl: 76.374947 +epoch: 0, batch: 8164, sum loss: 7599.289551, avg loss: 4.214803, ppl: 67.680817 +epoch: 0, batch: 8165, sum loss: 8485.072266, avg loss: 4.322503, ppl: 75.377029 +epoch: 0, batch: 8166, sum loss: 5619.452148, avg loss: 3.809798, ppl: 45.141319 +epoch: 0, batch: 8167, sum loss: 5861.828613, avg loss: 4.009459, ppl: 55.117046 +epoch: 0, batch: 8168, sum loss: 7809.579102, avg loss: 4.000809, ppl: 54.642349 +epoch: 0, batch: 8169, sum loss: 5806.481445, avg loss: 3.758241, ppl: 42.872932 +epoch: 0, batch: 8170, sum loss: 6455.272949, avg loss: 3.812920, ppl: 45.282463 +epoch: 0, batch: 8171, sum loss: 6452.054688, avg loss: 3.953465, ppl: 52.115635 +epoch: 0, batch: 8172, sum loss: 7782.068359, avg loss: 3.922413, ppl: 50.522224 +epoch: 0, batch: 8173, sum loss: 5786.384277, avg loss: 3.616490, ppl: 37.206749 +epoch: 0, batch: 8174, sum loss: 5949.551270, avg loss: 3.937493, ppl: 51.289837 +epoch: 0, batch: 8175, sum loss: 6053.102539, avg loss: 3.912801, ppl: 50.038906 +epoch: 0, batch: 8176, sum loss: 6375.678223, avg loss: 3.945345, ppl: 51.694191 +epoch: 0, batch: 8177, sum loss: 5197.315430, avg loss: 3.717679, ppl: 41.168743 +epoch: 0, batch: 8178, sum loss: 6014.489258, avg loss: 3.658448, ppl: 38.801094 +epoch: 0, batch: 8179, sum loss: 6449.307617, avg loss: 3.838874, ppl: 46.473099 +epoch: 0, batch: 8180, sum loss: 7607.100586, avg loss: 4.005846, ppl: 54.918240 +epoch: 0, batch: 8181, sum loss: 6406.230469, avg loss: 3.976555, ppl: 53.333004 +epoch: 0, batch: 8182, sum loss: 4803.895996, avg loss: 3.481084, ppl: 32.494930 +epoch: 0, batch: 8183, sum loss: 5701.034668, avg loss: 3.798158, ppl: 44.618908 +epoch: 0, batch: 8184, sum loss: 6332.532715, avg loss: 3.840226, ppl: 46.535988 +epoch: 0, batch: 8185, sum loss: 6921.448242, avg loss: 4.100384, ppl: 60.363476 +epoch: 0, batch: 8186, sum loss: 6357.931641, avg loss: 3.764317, ppl: 43.134247 +epoch: 0, batch: 8187, sum loss: 6717.813477, avg loss: 4.270701, ppl: 71.571815 +epoch: 0, batch: 8188, sum loss: 5983.766602, avg loss: 3.589542, ppl: 36.217480 +epoch: 0, batch: 8189, sum loss: 6065.492676, avg loss: 3.885646, ppl: 48.698372 +epoch: 0, batch: 8190, sum loss: 6592.104492, avg loss: 3.711771, ppl: 40.926205 +epoch: 0, batch: 8191, sum loss: 6099.969727, avg loss: 3.922810, ppl: 50.542286 +epoch: 0, batch: 8192, sum loss: 6309.994141, avg loss: 3.988618, ppl: 53.980244 +epoch: 0, batch: 8193, sum loss: 7538.654297, avg loss: 4.068351, ppl: 58.460468 +epoch: 0, batch: 8194, sum loss: 6029.482910, avg loss: 3.773143, ppl: 43.516636 +epoch: 0, batch: 8195, sum loss: 7437.292969, avg loss: 3.953904, ppl: 52.138500 +epoch: 0, batch: 8196, sum loss: 6752.656250, avg loss: 3.854256, ppl: 47.193501 +epoch: 0, batch: 8197, sum loss: 6581.415039, avg loss: 3.795510, ppl: 44.500938 +epoch: 0, batch: 8198, sum loss: 7060.875488, avg loss: 4.168167, ppl: 64.596947 +epoch: 0, batch: 8199, sum loss: 7403.139648, avg loss: 4.083364, ppl: 59.344799 +epoch: 0, batch: 8200, sum loss: 6356.257812, avg loss: 3.817572, ppl: 45.493633 +epoch: 0, batch: 8201, sum loss: 6694.270508, avg loss: 3.896549, ppl: 49.232243 +epoch: 0, batch: 8202, sum loss: 6625.189453, avg loss: 3.867594, ppl: 47.827198 +epoch: 0, batch: 8203, sum loss: 5967.471191, avg loss: 3.842544, ppl: 46.644001 +epoch: 0, batch: 8204, sum loss: 6626.594727, avg loss: 3.960906, ppl: 52.504848 +epoch: 0, batch: 8205, sum loss: 7601.564941, avg loss: 4.036943, ppl: 56.652916 +epoch: 0, batch: 8206, sum loss: 6135.047852, avg loss: 3.920158, ppl: 50.408428 +epoch: 0, batch: 8207, sum loss: 6722.267090, avg loss: 4.106455, ppl: 60.731064 +epoch: 0, batch: 8208, sum loss: 6519.246094, avg loss: 3.880504, ppl: 48.448624 +epoch: 0, batch: 8209, sum loss: 5365.839844, avg loss: 3.757591, ppl: 42.845089 +epoch: 0, batch: 8210, sum loss: 6042.469238, avg loss: 3.858537, ppl: 47.395969 +epoch: 0, batch: 8211, sum loss: 8749.889648, avg loss: 4.037789, ppl: 56.700859 +epoch: 0, batch: 8212, sum loss: 5835.234863, avg loss: 3.742935, ppl: 42.221718 +epoch: 0, batch: 8213, sum loss: 5292.631836, avg loss: 3.843596, ppl: 46.693092 +epoch: 0, batch: 8214, sum loss: 7075.913574, avg loss: 4.164752, ppl: 64.376717 +epoch: 0, batch: 8215, sum loss: 5796.065918, avg loss: 3.851207, ppl: 47.049809 +epoch: 0, batch: 8216, sum loss: 6641.184570, avg loss: 4.020088, ppl: 55.705994 +epoch: 0, batch: 8217, sum loss: 6570.688477, avg loss: 3.710157, ppl: 40.860226 +epoch: 0, batch: 8218, sum loss: 6980.937988, avg loss: 3.991388, ppl: 54.129974 +epoch: 0, batch: 8219, sum loss: 6324.088379, avg loss: 3.753168, ppl: 42.656006 +epoch: 0, batch: 8220, sum loss: 6160.265137, avg loss: 3.840564, ppl: 46.551746 +epoch: 0, batch: 8221, sum loss: 5788.368652, avg loss: 3.744094, ppl: 42.270679 +epoch: 0, batch: 8222, sum loss: 6720.257812, avg loss: 3.835764, ppl: 46.328808 +epoch: 0, batch: 8223, sum loss: 4740.395020, avg loss: 3.607607, ppl: 36.877686 +epoch: 0, batch: 8224, sum loss: 6547.086914, avg loss: 3.929824, ppl: 50.898014 +epoch: 0, batch: 8225, sum loss: 6236.772461, avg loss: 3.857002, ppl: 47.323277 +epoch: 0, batch: 8226, sum loss: 6237.668945, avg loss: 3.937922, ppl: 51.311878 +epoch: 0, batch: 8227, sum loss: 7152.369629, avg loss: 4.151114, ppl: 63.504704 +epoch: 0, batch: 8228, sum loss: 7301.730469, avg loss: 4.155794, ppl: 63.802612 +epoch: 0, batch: 8229, sum loss: 5978.806641, avg loss: 3.930840, ppl: 50.949734 +epoch: 0, batch: 8230, sum loss: 7603.656250, avg loss: 4.048805, ppl: 57.328922 +epoch: 0, batch: 8231, sum loss: 5066.907227, avg loss: 3.645257, ppl: 38.292614 +epoch: 0, batch: 8232, sum loss: 6321.011230, avg loss: 3.904269, ppl: 49.613796 +epoch: 0, batch: 8233, sum loss: 6255.932129, avg loss: 4.046528, ppl: 57.198509 +epoch: 0, batch: 8234, sum loss: 6881.839355, avg loss: 3.848903, ppl: 46.941559 +epoch: 0, batch: 8235, sum loss: 6145.986328, avg loss: 4.014361, ppl: 55.387886 +epoch: 0, batch: 8236, sum loss: 7409.985352, avg loss: 4.062492, ppl: 58.118958 +epoch: 0, batch: 8237, sum loss: 6654.062500, avg loss: 3.951344, ppl: 52.005203 +epoch: 0, batch: 8238, sum loss: 7054.505859, avg loss: 3.945473, ppl: 51.700798 +epoch: 0, batch: 8239, sum loss: 6655.306152, avg loss: 4.031076, ppl: 56.321480 +epoch: 0, batch: 8240, sum loss: 6688.133301, avg loss: 4.026570, ppl: 56.068283 +epoch: 0, batch: 8241, sum loss: 6402.604492, avg loss: 4.162942, ppl: 64.260292 +epoch: 0, batch: 8242, sum loss: 7731.337891, avg loss: 3.952627, ppl: 52.071964 +epoch: 0, batch: 8243, sum loss: 5778.956055, avg loss: 3.767246, ppl: 43.260780 +epoch: 0, batch: 8244, sum loss: 7358.569336, avg loss: 4.106345, ppl: 60.724346 +epoch: 0, batch: 8245, sum loss: 7315.746094, avg loss: 4.219000, ppl: 67.965477 +epoch: 0, batch: 8246, sum loss: 6336.399902, avg loss: 3.896925, ppl: 49.250755 +epoch: 0, batch: 8247, sum loss: 6561.867188, avg loss: 3.915195, ppl: 50.158875 +epoch: 0, batch: 8248, sum loss: 7296.779297, avg loss: 3.996046, ppl: 54.382687 +epoch: 0, batch: 8249, sum loss: 7368.860352, avg loss: 4.167908, ppl: 64.580193 +epoch: 0, batch: 8250, sum loss: 7030.673828, avg loss: 3.976626, ppl: 53.336754 +epoch: 0, batch: 8251, sum loss: 6218.161133, avg loss: 3.632103, ppl: 37.792229 +epoch: 0, batch: 8252, sum loss: 6464.003906, avg loss: 3.896325, ppl: 49.221245 +epoch: 0, batch: 8253, sum loss: 7230.413086, avg loss: 3.899899, ppl: 49.397461 +epoch: 0, batch: 8254, sum loss: 7143.134766, avg loss: 4.067844, ppl: 58.430874 +epoch: 0, batch: 8255, sum loss: 7265.956543, avg loss: 3.877245, ppl: 48.290977 +epoch: 0, batch: 8256, sum loss: 6978.422852, avg loss: 3.844861, ppl: 46.752197 +epoch: 0, batch: 8257, sum loss: 5776.292480, avg loss: 3.712270, ppl: 40.946659 +epoch: 0, batch: 8258, sum loss: 7176.733398, avg loss: 4.000409, ppl: 54.620491 +epoch: 0, batch: 8259, sum loss: 5797.728027, avg loss: 3.839555, ppl: 46.504776 +epoch: 0, batch: 8260, sum loss: 6363.228516, avg loss: 3.877653, ppl: 48.310692 +epoch: 0, batch: 8261, sum loss: 5729.794434, avg loss: 3.742518, ppl: 42.204113 +epoch: 0, batch: 8262, sum loss: 5789.613281, avg loss: 3.803951, ppl: 44.878159 +epoch: 0, batch: 8263, sum loss: 5957.898438, avg loss: 3.785196, ppl: 44.044296 +epoch: 0, batch: 8264, sum loss: 6670.724609, avg loss: 4.001634, ppl: 54.687443 +epoch: 0, batch: 8265, sum loss: 6792.518555, avg loss: 4.038358, ppl: 56.733124 +epoch: 0, batch: 8266, sum loss: 6715.091309, avg loss: 3.813226, ppl: 45.296329 +epoch: 0, batch: 8267, sum loss: 6863.653320, avg loss: 3.767098, ppl: 43.254375 +epoch: 0, batch: 8268, sum loss: 6388.628906, avg loss: 3.926631, ppl: 50.735760 +epoch: 0, batch: 8269, sum loss: 6869.887695, avg loss: 4.038734, ppl: 56.754471 +epoch: 0, batch: 8270, sum loss: 6323.328613, avg loss: 3.881724, ppl: 48.507774 +epoch: 0, batch: 8271, sum loss: 7533.031738, avg loss: 4.087375, ppl: 59.583263 +epoch: 0, batch: 8272, sum loss: 5632.760254, avg loss: 3.693613, ppl: 40.189804 +epoch: 0, batch: 8273, sum loss: 7573.794922, avg loss: 4.127409, ppl: 62.017029 +epoch: 0, batch: 8274, sum loss: 6498.018555, avg loss: 3.874787, ppl: 48.172451 +epoch: 0, batch: 8275, sum loss: 6582.662598, avg loss: 3.785315, ppl: 44.049545 +epoch: 0, batch: 8276, sum loss: 7984.331055, avg loss: 4.222280, ppl: 68.188782 +epoch: 0, batch: 8277, sum loss: 6424.327637, avg loss: 3.893532, ppl: 49.083946 +epoch: 0, batch: 8278, sum loss: 7047.845215, avg loss: 3.911124, ppl: 49.955070 +epoch: 0, batch: 8279, sum loss: 6914.766602, avg loss: 4.247399, ppl: 69.923325 +epoch: 0, batch: 8280, sum loss: 6213.136719, avg loss: 3.861490, ppl: 47.536106 +epoch: 0, batch: 8281, sum loss: 5397.529297, avg loss: 3.560376, ppl: 35.176411 +epoch: 0, batch: 8282, sum loss: 7341.029297, avg loss: 4.024687, ppl: 55.962807 +epoch: 0, batch: 8283, sum loss: 5716.580078, avg loss: 3.878277, ppl: 48.340843 +epoch: 0, batch: 8284, sum loss: 7833.600586, avg loss: 4.160170, ppl: 64.082420 +epoch: 0, batch: 8285, sum loss: 6976.714355, avg loss: 3.622386, ppl: 37.426743 +epoch: 0, batch: 8286, sum loss: 5616.418945, avg loss: 3.810325, ppl: 45.165112 +epoch: 0, batch: 8287, sum loss: 7225.225586, avg loss: 3.937453, ppl: 51.287796 +epoch: 0, batch: 8288, sum loss: 5549.788574, avg loss: 3.589773, ppl: 36.225842 +epoch: 0, batch: 8289, sum loss: 7552.413574, avg loss: 3.905074, ppl: 49.653759 +epoch: 0, batch: 8290, sum loss: 7638.547852, avg loss: 3.881376, ppl: 48.490906 +epoch: 0, batch: 8291, sum loss: 7691.272949, avg loss: 4.037414, ppl: 56.679558 +epoch: 0, batch: 8292, sum loss: 5794.008301, avg loss: 3.653221, ppl: 38.598789 +epoch: 0, batch: 8293, sum loss: 6980.546387, avg loss: 3.921655, ppl: 50.483948 +epoch: 0, batch: 8294, sum loss: 5774.974609, avg loss: 3.772028, ppl: 43.468128 +epoch: 0, batch: 8295, sum loss: 6687.987305, avg loss: 3.808649, ppl: 45.089485 +epoch: 0, batch: 8296, sum loss: 7145.215820, avg loss: 4.082981, ppl: 59.322025 +epoch: 0, batch: 8297, sum loss: 6966.217285, avg loss: 4.112289, ppl: 61.086380 +epoch: 0, batch: 8298, sum loss: 7111.916992, avg loss: 4.080274, ppl: 59.161655 +epoch: 0, batch: 8299, sum loss: 7647.599609, avg loss: 4.008176, ppl: 55.046368 +epoch: 0, batch: 8300, sum loss: 6529.792480, avg loss: 3.926514, ppl: 50.729820 +epoch: 0, batch: 8301, sum loss: 5923.172852, avg loss: 3.708937, ppl: 40.810417 +epoch: 0, batch: 8302, sum loss: 7103.021973, avg loss: 4.008478, ppl: 55.062984 +epoch: 0, batch: 8303, sum loss: 6647.299805, avg loss: 3.860220, ppl: 47.475815 +epoch: 0, batch: 8304, sum loss: 6524.834961, avg loss: 3.930624, ppl: 50.938728 +epoch: 0, batch: 8305, sum loss: 6876.744629, avg loss: 3.889562, ppl: 48.889450 +epoch: 0, batch: 8306, sum loss: 6514.786621, avg loss: 4.107684, ppl: 60.805706 +epoch: 0, batch: 8307, sum loss: 5852.783203, avg loss: 3.798042, ppl: 44.613758 +epoch: 0, batch: 8308, sum loss: 6473.079102, avg loss: 3.871459, ppl: 48.012386 +epoch: 0, batch: 8309, sum loss: 6136.304688, avg loss: 3.928492, ppl: 50.830273 +epoch: 0, batch: 8310, sum loss: 7253.944336, avg loss: 3.968241, ppl: 52.891411 +epoch: 0, batch: 8311, sum loss: 6697.722656, avg loss: 3.956127, ppl: 52.254547 +epoch: 0, batch: 8312, sum loss: 7737.460938, avg loss: 4.124446, ppl: 61.833569 +epoch: 0, batch: 8313, sum loss: 7301.411621, avg loss: 3.914966, ppl: 50.147362 +epoch: 0, batch: 8314, sum loss: 5950.001465, avg loss: 3.797066, ppl: 44.570213 +epoch: 0, batch: 8315, sum loss: 6211.473145, avg loss: 3.848496, ppl: 46.922459 +epoch: 0, batch: 8316, sum loss: 6451.911621, avg loss: 4.019883, ppl: 55.694572 +epoch: 0, batch: 8317, sum loss: 7338.433594, avg loss: 4.115779, ppl: 61.299942 +epoch: 0, batch: 8318, sum loss: 6899.314453, avg loss: 4.041778, ppl: 56.927448 +epoch: 0, batch: 8319, sum loss: 6612.647461, avg loss: 3.746542, ppl: 42.374317 +epoch: 0, batch: 8320, sum loss: 6442.866699, avg loss: 3.774380, ppl: 43.570484 +epoch: 0, batch: 8321, sum loss: 7169.992188, avg loss: 3.987760, ppl: 53.933918 +epoch: 0, batch: 8322, sum loss: 5679.540039, avg loss: 3.756310, ppl: 42.790226 +epoch: 0, batch: 8323, sum loss: 7157.891602, avg loss: 3.840070, ppl: 46.528755 +epoch: 0, batch: 8324, sum loss: 6211.370117, avg loss: 3.679722, ppl: 39.635368 +epoch: 0, batch: 8325, sum loss: 7631.585938, avg loss: 4.065842, ppl: 58.313995 +epoch: 0, batch: 8326, sum loss: 8518.493164, avg loss: 4.212904, ppl: 67.552460 +epoch: 0, batch: 8327, sum loss: 6438.949219, avg loss: 4.083037, ppl: 59.325363 +epoch: 0, batch: 8328, sum loss: 6953.695801, avg loss: 3.886918, ppl: 48.760357 +epoch: 0, batch: 8329, sum loss: 7136.939453, avg loss: 3.987117, ppl: 53.899265 +epoch: 0, batch: 8330, sum loss: 6217.984375, avg loss: 3.821748, ppl: 45.684006 +epoch: 0, batch: 8331, sum loss: 7260.705078, avg loss: 4.177621, ppl: 65.210526 +epoch: 0, batch: 8332, sum loss: 7633.908691, avg loss: 4.082304, ppl: 59.281898 +epoch: 0, batch: 8333, sum loss: 7138.532227, avg loss: 3.937415, ppl: 51.285839 +epoch: 0, batch: 8334, sum loss: 7183.410645, avg loss: 4.081483, ppl: 59.233269 +epoch: 0, batch: 8335, sum loss: 7792.125000, avg loss: 3.993914, ppl: 54.266857 +epoch: 0, batch: 8336, sum loss: 6669.827148, avg loss: 3.798307, ppl: 44.625576 +epoch: 0, batch: 8337, sum loss: 6304.746094, avg loss: 3.894222, ppl: 49.117847 +epoch: 0, batch: 8338, sum loss: 6922.130859, avg loss: 3.982814, ppl: 53.667847 +epoch: 0, batch: 8339, sum loss: 6183.966309, avg loss: 3.989656, ppl: 54.036282 +epoch: 0, batch: 8340, sum loss: 6692.025391, avg loss: 4.055773, ppl: 57.729759 +epoch: 0, batch: 8341, sum loss: 6916.791992, avg loss: 3.901180, ppl: 49.460781 +epoch: 0, batch: 8342, sum loss: 6765.205566, avg loss: 4.012578, ppl: 55.289196 +epoch: 0, batch: 8343, sum loss: 6318.533691, avg loss: 3.939236, ppl: 51.379318 +epoch: 0, batch: 8344, sum loss: 6845.529785, avg loss: 3.977647, ppl: 53.391235 +epoch: 0, batch: 8345, sum loss: 6764.000488, avg loss: 4.035800, ppl: 56.588173 +epoch: 0, batch: 8346, sum loss: 5816.061035, avg loss: 3.811311, ppl: 45.209690 +epoch: 0, batch: 8347, sum loss: 5921.238770, avg loss: 3.761905, ppl: 43.030319 +epoch: 0, batch: 8348, sum loss: 6539.521973, avg loss: 4.056776, ppl: 57.787678 +epoch: 0, batch: 8349, sum loss: 6308.618164, avg loss: 3.925711, ppl: 50.689125 +epoch: 0, batch: 8350, sum loss: 7338.361816, avg loss: 3.979589, ppl: 53.495041 +epoch: 0, batch: 8351, sum loss: 6441.155273, avg loss: 3.854671, ppl: 47.213093 +epoch: 0, batch: 8352, sum loss: 6349.629395, avg loss: 3.793088, ppl: 44.393284 +epoch: 0, batch: 8353, sum loss: 6714.925293, avg loss: 3.843689, ppl: 46.697433 +epoch: 0, batch: 8354, sum loss: 7001.986328, avg loss: 3.836705, ppl: 46.372425 +epoch: 0, batch: 8355, sum loss: 6443.403320, avg loss: 3.860637, ppl: 47.495605 +epoch: 0, batch: 8356, sum loss: 6584.519043, avg loss: 3.680558, ppl: 39.668541 +epoch: 0, batch: 8357, sum loss: 7395.665039, avg loss: 3.995497, ppl: 54.352859 +epoch: 0, batch: 8358, sum loss: 6030.217773, avg loss: 3.865524, ppl: 47.728291 +epoch: 0, batch: 8359, sum loss: 6973.233887, avg loss: 3.816767, ppl: 45.457027 +epoch: 0, batch: 8360, sum loss: 7455.450195, avg loss: 3.984741, ppl: 53.771358 +epoch: 0, batch: 8361, sum loss: 6117.968750, avg loss: 3.710108, ppl: 40.858231 +epoch: 0, batch: 8362, sum loss: 6950.360352, avg loss: 4.151948, ppl: 63.557690 +epoch: 0, batch: 8363, sum loss: 6793.494141, avg loss: 4.092466, ppl: 59.887413 +epoch: 0, batch: 8364, sum loss: 6789.864258, avg loss: 4.090280, ppl: 59.756596 +epoch: 0, batch: 8365, sum loss: 5765.054688, avg loss: 3.903219, ppl: 49.561756 +epoch: 0, batch: 8366, sum loss: 5722.806152, avg loss: 3.747745, ppl: 42.425297 +epoch: 0, batch: 8367, sum loss: 6069.000488, avg loss: 3.897881, ppl: 49.297878 +epoch: 0, batch: 8368, sum loss: 6461.607910, avg loss: 3.717841, ppl: 41.175407 +epoch: 0, batch: 8369, sum loss: 6807.291016, avg loss: 3.992546, ppl: 54.192707 +epoch: 0, batch: 8370, sum loss: 5903.263184, avg loss: 3.682634, ppl: 39.750973 +epoch: 0, batch: 8371, sum loss: 7794.053223, avg loss: 3.873784, ppl: 48.124157 +epoch: 0, batch: 8372, sum loss: 6625.512207, avg loss: 4.022776, ppl: 55.855953 +epoch: 0, batch: 8373, sum loss: 6430.170410, avg loss: 4.072306, ppl: 58.692131 +epoch: 0, batch: 8374, sum loss: 6099.695801, avg loss: 3.885157, ppl: 48.674564 +epoch: 0, batch: 8375, sum loss: 6916.756836, avg loss: 3.945669, ppl: 51.710907 +epoch: 0, batch: 8376, sum loss: 7043.446777, avg loss: 3.972615, ppl: 53.123291 +epoch: 0, batch: 8377, sum loss: 6281.985352, avg loss: 3.677977, ppl: 39.566284 +epoch: 0, batch: 8378, sum loss: 7266.898926, avg loss: 3.855119, ppl: 47.234226 +epoch: 0, batch: 8379, sum loss: 7331.052246, avg loss: 4.034701, ppl: 56.526035 +epoch: 0, batch: 8380, sum loss: 7007.485840, avg loss: 3.961270, ppl: 52.523968 +epoch: 0, batch: 8381, sum loss: 7330.941406, avg loss: 4.030204, ppl: 56.272408 +epoch: 0, batch: 8382, sum loss: 5400.070801, avg loss: 3.724187, ppl: 41.437527 +epoch: 0, batch: 8383, sum loss: 6977.665039, avg loss: 4.150901, ppl: 63.491199 +epoch: 0, batch: 8384, sum loss: 6535.261230, avg loss: 3.994659, ppl: 54.307304 +epoch: 0, batch: 8385, sum loss: 5294.490723, avg loss: 3.646343, ppl: 38.334240 +epoch: 0, batch: 8386, sum loss: 7716.410156, avg loss: 4.124218, ppl: 61.819416 +epoch: 0, batch: 8387, sum loss: 5839.896973, avg loss: 3.698478, ppl: 40.385788 +epoch: 0, batch: 8388, sum loss: 5193.299805, avg loss: 3.629140, ppl: 37.680412 +epoch: 0, batch: 8389, sum loss: 6288.673340, avg loss: 3.923065, ppl: 50.555157 +epoch: 0, batch: 8390, sum loss: 6261.006348, avg loss: 3.769420, ppl: 43.354908 +epoch: 0, batch: 8391, sum loss: 6065.688477, avg loss: 3.793426, ppl: 44.408283 +epoch: 0, batch: 8392, sum loss: 6638.750977, avg loss: 3.864232, ppl: 47.666664 +epoch: 0, batch: 8393, sum loss: 6554.151367, avg loss: 3.775433, ppl: 43.616383 +epoch: 0, batch: 8394, sum loss: 6490.706055, avg loss: 4.059228, ppl: 57.929569 +epoch: 0, batch: 8395, sum loss: 5560.974609, avg loss: 3.699916, ppl: 40.443920 +epoch: 0, batch: 8396, sum loss: 5988.470215, avg loss: 3.669406, ppl: 39.228596 +epoch: 0, batch: 8397, sum loss: 7776.847656, avg loss: 4.012822, ppl: 55.302696 +epoch: 0, batch: 8398, sum loss: 5574.813965, avg loss: 3.687047, ppl: 39.926750 +epoch: 0, batch: 8399, sum loss: 5226.268066, avg loss: 3.854180, ppl: 47.189899 +epoch: 0, batch: 8400, sum loss: 6319.091309, avg loss: 3.777102, ppl: 43.689247 +epoch: 0, batch: 8401, sum loss: 5691.649902, avg loss: 3.655524, ppl: 38.687771 +epoch: 0, batch: 8402, sum loss: 7657.273438, avg loss: 3.877100, ppl: 48.284012 +epoch: 0, batch: 8403, sum loss: 6540.008789, avg loss: 3.980529, ppl: 53.545330 +epoch: 0, batch: 8404, sum loss: 5798.011230, avg loss: 3.801975, ppl: 44.789536 +epoch: 0, batch: 8405, sum loss: 6356.556641, avg loss: 4.030791, ppl: 56.305420 +epoch: 0, batch: 8406, sum loss: 7118.735352, avg loss: 4.010555, ppl: 55.177475 +epoch: 0, batch: 8407, sum loss: 6269.464355, avg loss: 3.792779, ppl: 44.379570 +epoch: 0, batch: 8408, sum loss: 6426.032227, avg loss: 3.766725, ppl: 43.238209 +epoch: 0, batch: 8409, sum loss: 6890.675293, avg loss: 3.955611, ppl: 52.227619 +epoch: 0, batch: 8410, sum loss: 7378.411621, avg loss: 4.016555, ppl: 55.509563 +epoch: 0, batch: 8411, sum loss: 7102.266602, avg loss: 3.843218, ppl: 46.675426 +epoch: 0, batch: 8412, sum loss: 6396.005859, avg loss: 4.092134, ppl: 59.867542 +epoch: 0, batch: 8413, sum loss: 6973.738770, avg loss: 4.126472, ppl: 61.958977 +epoch: 0, batch: 8414, sum loss: 5430.395508, avg loss: 3.739942, ppl: 42.095543 +epoch: 0, batch: 8415, sum loss: 6786.383789, avg loss: 4.068575, ppl: 58.473602 +epoch: 0, batch: 8416, sum loss: 6127.616211, avg loss: 3.851424, ppl: 47.060051 +epoch: 0, batch: 8417, sum loss: 5490.721191, avg loss: 3.842352, ppl: 46.635036 +epoch: 0, batch: 8418, sum loss: 6481.289062, avg loss: 3.826026, ppl: 45.879868 +epoch: 0, batch: 8419, sum loss: 7839.255371, avg loss: 4.185401, ppl: 65.719849 +epoch: 0, batch: 8420, sum loss: 6280.024414, avg loss: 3.655427, ppl: 38.684052 +epoch: 0, batch: 8421, sum loss: 5875.467773, avg loss: 3.785740, ppl: 44.068253 +epoch: 0, batch: 8422, sum loss: 7132.946289, avg loss: 3.880819, ppl: 48.463905 +epoch: 0, batch: 8423, sum loss: 5148.633789, avg loss: 3.744461, ppl: 42.286201 +epoch: 0, batch: 8424, sum loss: 5837.203125, avg loss: 4.034004, ppl: 56.486618 +epoch: 0, batch: 8425, sum loss: 7143.375000, avg loss: 3.867556, ppl: 47.825352 +epoch: 0, batch: 8426, sum loss: 7543.206055, avg loss: 4.014479, ppl: 55.394436 +epoch: 0, batch: 8427, sum loss: 6102.935059, avg loss: 4.023029, ppl: 55.870071 +epoch: 0, batch: 8428, sum loss: 6778.514648, avg loss: 3.797487, ppl: 44.589005 +epoch: 0, batch: 8429, sum loss: 7160.441895, avg loss: 3.841439, ppl: 46.592461 +epoch: 0, batch: 8430, sum loss: 6138.623047, avg loss: 3.834243, ppl: 46.258400 +epoch: 0, batch: 8431, sum loss: 7794.911621, avg loss: 3.987167, ppl: 53.901974 +epoch: 0, batch: 8432, sum loss: 6223.282227, avg loss: 3.899300, ppl: 49.367863 +epoch: 0, batch: 8433, sum loss: 6217.267578, avg loss: 3.990544, ppl: 54.084309 +epoch: 0, batch: 8434, sum loss: 6050.439941, avg loss: 3.760373, ppl: 42.964443 +epoch: 0, batch: 8435, sum loss: 7139.015137, avg loss: 3.922536, ppl: 50.528416 +epoch: 0, batch: 8436, sum loss: 6544.430664, avg loss: 4.049771, ppl: 57.384331 +epoch: 0, batch: 8437, sum loss: 8130.979004, avg loss: 4.156942, ppl: 63.875916 +epoch: 0, batch: 8438, sum loss: 6192.567383, avg loss: 3.817859, ppl: 45.506672 +epoch: 0, batch: 8439, sum loss: 6289.500488, avg loss: 3.616734, ppl: 37.215824 +epoch: 0, batch: 8440, sum loss: 7103.785156, avg loss: 3.894619, ppl: 49.137329 +epoch: 0, batch: 8441, sum loss: 5594.284668, avg loss: 3.549673, ppl: 34.801929 +epoch: 0, batch: 8442, sum loss: 5881.512207, avg loss: 3.758155, ppl: 42.869244 +epoch: 0, batch: 8443, sum loss: 7215.002930, avg loss: 3.864490, ppl: 47.678951 +epoch: 0, batch: 8444, sum loss: 5530.638184, avg loss: 3.724335, ppl: 41.443680 +epoch: 0, batch: 8445, sum loss: 6667.213867, avg loss: 3.847209, ppl: 46.862099 +epoch: 0, batch: 8446, sum loss: 7272.883789, avg loss: 4.065335, ppl: 58.284420 +epoch: 0, batch: 8447, sum loss: 7263.808594, avg loss: 3.995494, ppl: 54.352703 +epoch: 0, batch: 8448, sum loss: 5509.850586, avg loss: 3.584809, ppl: 36.046459 +epoch: 0, batch: 8449, sum loss: 4776.305664, avg loss: 3.436191, ppl: 31.068396 +epoch: 0, batch: 8450, sum loss: 6981.500977, avg loss: 4.054298, ppl: 57.644676 +epoch: 0, batch: 8451, sum loss: 5907.505371, avg loss: 3.912255, ppl: 50.011616 +epoch: 0, batch: 8452, sum loss: 6627.864258, avg loss: 4.004752, ppl: 54.858227 +epoch: 0, batch: 8453, sum loss: 6332.157227, avg loss: 3.782651, ppl: 43.932350 +epoch: 0, batch: 8454, sum loss: 6002.695312, avg loss: 3.862738, ppl: 47.595509 +epoch: 0, batch: 8455, sum loss: 6159.278809, avg loss: 3.748800, ppl: 42.470089 +epoch: 0, batch: 8456, sum loss: 6062.582520, avg loss: 3.908822, ppl: 49.840183 +epoch: 0, batch: 8457, sum loss: 6236.640625, avg loss: 3.777493, ppl: 43.706333 +epoch: 0, batch: 8458, sum loss: 6854.849609, avg loss: 3.727488, ppl: 41.574532 +epoch: 0, batch: 8459, sum loss: 6118.046387, avg loss: 3.891887, ppl: 49.003265 +epoch: 0, batch: 8460, sum loss: 5806.405273, avg loss: 3.787610, ppl: 44.150734 +epoch: 0, batch: 8461, sum loss: 6345.741211, avg loss: 3.626138, ppl: 37.567440 +epoch: 0, batch: 8462, sum loss: 6277.574707, avg loss: 3.802287, ppl: 44.803516 +epoch: 0, batch: 8463, sum loss: 6041.437012, avg loss: 3.806829, ppl: 45.007481 +epoch: 0, batch: 8464, sum loss: 7586.109375, avg loss: 4.364850, ppl: 78.637604 +epoch: 0, batch: 8465, sum loss: 5402.456543, avg loss: 3.783233, ppl: 43.957924 +epoch: 0, batch: 8466, sum loss: 6514.787598, avg loss: 3.987018, ppl: 53.893944 +epoch: 0, batch: 8467, sum loss: 7383.144531, avg loss: 3.871602, ppl: 48.019241 +epoch: 0, batch: 8468, sum loss: 6308.601562, avg loss: 3.667792, ppl: 39.165318 +epoch: 0, batch: 8469, sum loss: 6570.386719, avg loss: 4.233497, ppl: 68.957932 +epoch: 0, batch: 8470, sum loss: 6211.391602, avg loss: 3.865210, ppl: 47.713272 +epoch: 0, batch: 8471, sum loss: 6518.919922, avg loss: 4.056578, ppl: 57.776245 +epoch: 0, batch: 8472, sum loss: 7152.008301, avg loss: 4.000005, ppl: 54.598412 +epoch: 0, batch: 8473, sum loss: 7309.887695, avg loss: 3.998845, ppl: 54.535107 +epoch: 0, batch: 8474, sum loss: 7169.822266, avg loss: 3.777567, ppl: 43.709583 +epoch: 0, batch: 8475, sum loss: 5133.303711, avg loss: 3.492043, ppl: 32.853016 +epoch: 0, batch: 8476, sum loss: 6111.199707, avg loss: 3.885060, ppl: 48.669865 +epoch: 0, batch: 8477, sum loss: 8050.089844, avg loss: 4.031092, ppl: 56.322365 +epoch: 0, batch: 8478, sum loss: 6493.081055, avg loss: 3.761924, ppl: 43.031139 +epoch: 0, batch: 8479, sum loss: 6514.995117, avg loss: 4.173604, ppl: 64.949074 +epoch: 0, batch: 8480, sum loss: 6464.967773, avg loss: 4.000599, ppl: 54.630859 +epoch: 0, batch: 8481, sum loss: 6428.022461, avg loss: 3.796824, ppl: 44.559425 +epoch: 0, batch: 8482, sum loss: 5893.503906, avg loss: 3.602386, ppl: 36.685680 +epoch: 0, batch: 8483, sum loss: 5970.780762, avg loss: 3.889759, ppl: 48.899117 +epoch: 0, batch: 8484, sum loss: 7667.108887, avg loss: 4.155614, ppl: 63.791145 +epoch: 0, batch: 8485, sum loss: 6552.590332, avg loss: 4.007700, ppl: 55.020203 +epoch: 0, batch: 8486, sum loss: 6992.980469, avg loss: 3.880677, ppl: 48.457031 +epoch: 0, batch: 8487, sum loss: 6196.203613, avg loss: 3.730406, ppl: 41.696014 +epoch: 0, batch: 8488, sum loss: 6426.065430, avg loss: 3.655327, ppl: 38.680180 +epoch: 0, batch: 8489, sum loss: 6700.201660, avg loss: 4.050908, ppl: 57.449604 +epoch: 0, batch: 8490, sum loss: 7671.899902, avg loss: 4.111414, ppl: 61.032982 +epoch: 0, batch: 8491, sum loss: 5596.100586, avg loss: 3.878102, ppl: 48.332375 +epoch: 0, batch: 8492, sum loss: 7133.166016, avg loss: 3.828860, ppl: 46.010048 +epoch: 0, batch: 8493, sum loss: 6118.877441, avg loss: 4.114914, ppl: 61.246944 +epoch: 0, batch: 8494, sum loss: 7932.148438, avg loss: 4.148613, ppl: 63.346077 +epoch: 0, batch: 8495, sum loss: 5976.581543, avg loss: 3.693808, ppl: 40.197632 +epoch: 0, batch: 8496, sum loss: 6285.809570, avg loss: 3.828142, ppl: 45.977051 +epoch: 0, batch: 8497, sum loss: 7157.546875, avg loss: 4.094707, ppl: 60.021721 +epoch: 0, batch: 8498, sum loss: 6363.326660, avg loss: 3.765282, ppl: 43.175877 +epoch: 0, batch: 8499, sum loss: 6459.759277, avg loss: 3.773224, ppl: 43.520134 +epoch: 0, batch: 8500, sum loss: 7329.992676, avg loss: 3.768634, ppl: 43.320843 +epoch: 0, batch: 8501, sum loss: 6679.465820, avg loss: 4.004476, ppl: 54.843082 +epoch: 0, batch: 8502, sum loss: 7327.482910, avg loss: 3.868787, ppl: 47.884281 +epoch: 0, batch: 8503, sum loss: 7628.896484, avg loss: 4.180217, ppl: 65.380058 +epoch: 0, batch: 8504, sum loss: 5762.739258, avg loss: 3.705942, ppl: 40.688354 +epoch: 0, batch: 8505, sum loss: 7802.405762, avg loss: 4.194842, ppl: 66.343239 +epoch: 0, batch: 8506, sum loss: 7432.460938, avg loss: 4.166178, ppl: 64.468567 +epoch: 0, batch: 8507, sum loss: 6984.206543, avg loss: 3.934764, ppl: 51.150085 +epoch: 0, batch: 8508, sum loss: 6971.104004, avg loss: 4.060049, ppl: 57.977154 +epoch: 0, batch: 8509, sum loss: 6085.540039, avg loss: 3.812995, ppl: 45.285866 +epoch: 0, batch: 8510, sum loss: 6417.944336, avg loss: 3.773042, ppl: 43.512215 +epoch: 0, batch: 8511, sum loss: 7608.190430, avg loss: 3.821291, ppl: 45.663120 +epoch: 0, batch: 8512, sum loss: 6673.473145, avg loss: 3.809060, ppl: 45.108032 +epoch: 0, batch: 8513, sum loss: 6412.035645, avg loss: 3.776228, ppl: 43.651096 +epoch: 0, batch: 8514, sum loss: 6168.547852, avg loss: 3.872284, ppl: 48.051998 +epoch: 0, batch: 8515, sum loss: 5934.102051, avg loss: 3.660766, ppl: 38.891129 +epoch: 0, batch: 8516, sum loss: 5356.579590, avg loss: 3.681498, ppl: 39.705822 +epoch: 0, batch: 8517, sum loss: 6420.742188, avg loss: 3.912701, ppl: 50.033894 +epoch: 0, batch: 8518, sum loss: 5984.942871, avg loss: 3.824244, ppl: 45.798187 +epoch: 0, batch: 8519, sum loss: 6369.364258, avg loss: 3.755522, ppl: 42.756512 +epoch: 0, batch: 8520, sum loss: 6019.570312, avg loss: 3.499750, ppl: 33.107178 +epoch: 0, batch: 8521, sum loss: 6892.534180, avg loss: 3.968068, ppl: 52.882271 +epoch: 0, batch: 8522, sum loss: 6328.157227, avg loss: 3.918364, ppl: 50.318035 +epoch: 0, batch: 8523, sum loss: 6928.932617, avg loss: 3.857980, ppl: 47.369591 +epoch: 0, batch: 8524, sum loss: 6692.591309, avg loss: 3.753557, ppl: 42.672588 +epoch: 0, batch: 8525, sum loss: 7104.937988, avg loss: 3.731585, ppl: 41.745232 +epoch: 0, batch: 8526, sum loss: 7027.013672, avg loss: 4.020031, ppl: 55.702831 +epoch: 0, batch: 8527, sum loss: 6048.821289, avg loss: 3.889917, ppl: 48.906845 +epoch: 0, batch: 8528, sum loss: 6254.254883, avg loss: 3.935969, ppl: 51.211758 +epoch: 0, batch: 8529, sum loss: 6868.069824, avg loss: 3.935857, ppl: 51.205994 +epoch: 0, batch: 8530, sum loss: 6809.295410, avg loss: 3.864527, ppl: 47.680691 +epoch: 0, batch: 8531, sum loss: 7560.139160, avg loss: 3.985313, ppl: 53.802135 +epoch: 0, batch: 8532, sum loss: 5649.524414, avg loss: 3.707037, ppl: 40.732933 +epoch: 0, batch: 8533, sum loss: 6524.365723, avg loss: 3.788830, ppl: 44.204662 +epoch: 0, batch: 8534, sum loss: 5805.496582, avg loss: 3.885875, ppl: 48.709522 +epoch: 0, batch: 8535, sum loss: 6190.940918, avg loss: 3.823929, ppl: 45.783745 +epoch: 0, batch: 8536, sum loss: 7831.794922, avg loss: 4.201607, ppl: 66.793564 +epoch: 0, batch: 8537, sum loss: 5990.422852, avg loss: 3.767561, ppl: 43.274410 +epoch: 0, batch: 8538, sum loss: 6791.936523, avg loss: 3.916918, ppl: 50.245365 +epoch: 0, batch: 8539, sum loss: 7206.149414, avg loss: 3.920647, ppl: 50.433060 +epoch: 0, batch: 8540, sum loss: 6856.161621, avg loss: 3.931285, ppl: 50.972443 +epoch: 0, batch: 8541, sum loss: 5903.695312, avg loss: 3.696741, ppl: 40.315704 +epoch: 0, batch: 8542, sum loss: 6440.628418, avg loss: 4.010354, ppl: 55.166370 +epoch: 0, batch: 8543, sum loss: 6921.410645, avg loss: 3.877541, ppl: 48.305290 +epoch: 0, batch: 8544, sum loss: 6286.610352, avg loss: 3.787115, ppl: 44.128899 +epoch: 0, batch: 8545, sum loss: 6716.103516, avg loss: 3.888885, ppl: 48.856358 +epoch: 0, batch: 8546, sum loss: 5918.894043, avg loss: 3.906861, ppl: 49.742558 +epoch: 0, batch: 8547, sum loss: 5946.078125, avg loss: 4.050462, ppl: 57.423969 +epoch: 0, batch: 8548, sum loss: 6562.708984, avg loss: 3.901729, ppl: 49.487957 +epoch: 0, batch: 8549, sum loss: 5793.446777, avg loss: 3.824057, ppl: 45.789616 +epoch: 0, batch: 8550, sum loss: 5976.260742, avg loss: 3.811391, ppl: 45.213272 +epoch: 0, batch: 8551, sum loss: 5197.986816, avg loss: 3.587293, ppl: 36.136112 +epoch: 0, batch: 8552, sum loss: 5384.104004, avg loss: 3.587011, ppl: 36.125946 +epoch: 0, batch: 8553, sum loss: 7055.174805, avg loss: 4.125833, ppl: 61.919369 +epoch: 0, batch: 8554, sum loss: 6261.061035, avg loss: 3.801494, ppl: 44.768024 +epoch: 0, batch: 8555, sum loss: 6055.470215, avg loss: 3.810869, ppl: 45.189671 +epoch: 0, batch: 8556, sum loss: 6324.120605, avg loss: 3.762118, ppl: 43.039490 +epoch: 0, batch: 8557, sum loss: 7364.280762, avg loss: 4.013232, ppl: 55.325405 +epoch: 0, batch: 8558, sum loss: 6284.447266, avg loss: 3.822656, ppl: 45.725513 +epoch: 0, batch: 8559, sum loss: 5975.965820, avg loss: 3.850494, ppl: 47.016270 +epoch: 0, batch: 8560, sum loss: 6723.440430, avg loss: 3.777214, ppl: 43.694134 +epoch: 0, batch: 8561, sum loss: 6460.750000, avg loss: 3.816155, ppl: 45.429195 +epoch: 0, batch: 8562, sum loss: 5368.305176, avg loss: 3.772527, ppl: 43.489803 +epoch: 0, batch: 8563, sum loss: 6368.904297, avg loss: 3.960761, ppl: 52.497276 +epoch: 0, batch: 8564, sum loss: 6487.340332, avg loss: 3.840936, ppl: 46.569031 +epoch: 0, batch: 8565, sum loss: 7465.883789, avg loss: 3.994587, ppl: 54.303417 +epoch: 0, batch: 8566, sum loss: 7247.542969, avg loss: 3.788574, ppl: 44.193356 +epoch: 0, batch: 8567, sum loss: 6976.352051, avg loss: 4.025592, ppl: 56.013477 +epoch: 0, batch: 8568, sum loss: 6894.099121, avg loss: 3.730573, ppl: 41.703003 +epoch: 0, batch: 8569, sum loss: 6463.348145, avg loss: 3.795272, ppl: 44.490330 +epoch: 0, batch: 8570, sum loss: 6551.545898, avg loss: 3.693092, ppl: 40.168861 +epoch: 0, batch: 8571, sum loss: 6823.055664, avg loss: 3.881146, ppl: 48.479763 +epoch: 0, batch: 8572, sum loss: 6878.729492, avg loss: 3.840720, ppl: 46.558971 +epoch: 0, batch: 8573, sum loss: 6181.295898, avg loss: 4.042705, ppl: 56.980270 +epoch: 0, batch: 8574, sum loss: 6064.695801, avg loss: 3.680034, ppl: 39.647739 +epoch: 0, batch: 8575, sum loss: 6177.175781, avg loss: 3.827246, ppl: 45.935875 +epoch: 0, batch: 8576, sum loss: 7039.702637, avg loss: 3.897953, ppl: 49.301414 +epoch: 0, batch: 8577, sum loss: 6586.021484, avg loss: 3.901672, ppl: 49.485100 +epoch: 0, batch: 8578, sum loss: 6057.807617, avg loss: 3.826789, ppl: 45.914875 +epoch: 0, batch: 8579, sum loss: 6236.931641, avg loss: 3.752666, ppl: 42.634613 +epoch: 0, batch: 8580, sum loss: 5516.373047, avg loss: 3.577415, ppl: 35.780910 +epoch: 0, batch: 8581, sum loss: 7416.813477, avg loss: 4.095425, ppl: 60.064842 +epoch: 0, batch: 8582, sum loss: 6155.394043, avg loss: 3.774000, ppl: 43.553940 +epoch: 0, batch: 8583, sum loss: 5766.660645, avg loss: 3.917568, ppl: 50.278042 +epoch: 0, batch: 8584, sum loss: 5781.514648, avg loss: 3.734829, ppl: 41.880852 +epoch: 0, batch: 8585, sum loss: 7537.940430, avg loss: 3.899607, ppl: 49.383045 +epoch: 0, batch: 8586, sum loss: 6283.597168, avg loss: 3.792153, ppl: 44.351772 +epoch: 0, batch: 8587, sum loss: 5918.013672, avg loss: 3.878122, ppl: 48.333351 +epoch: 0, batch: 8588, sum loss: 5753.729492, avg loss: 3.690654, ppl: 40.071045 +epoch: 0, batch: 8589, sum loss: 6547.120117, avg loss: 4.001907, ppl: 54.702389 +epoch: 0, batch: 8590, sum loss: 7318.218750, avg loss: 4.146300, ppl: 63.199718 +epoch: 0, batch: 8591, sum loss: 6276.870117, avg loss: 3.905955, ppl: 49.697533 +epoch: 0, batch: 8592, sum loss: 6972.799805, avg loss: 3.921710, ppl: 50.486691 +epoch: 0, batch: 8593, sum loss: 6106.500488, avg loss: 3.891970, ppl: 49.007320 +epoch: 0, batch: 8594, sum loss: 5995.747070, avg loss: 3.923918, ppl: 50.598312 +epoch: 0, batch: 8595, sum loss: 5906.787109, avg loss: 3.796136, ppl: 44.528778 +epoch: 0, batch: 8596, sum loss: 5753.397949, avg loss: 3.866531, ppl: 47.776356 +epoch: 0, batch: 8597, sum loss: 5978.754395, avg loss: 3.910239, ppl: 49.910854 +epoch: 0, batch: 8598, sum loss: 6111.407227, avg loss: 3.890138, ppl: 48.917656 +epoch: 0, batch: 8599, sum loss: 6372.837891, avg loss: 3.822938, ppl: 45.738388 +epoch: 0, batch: 8600, sum loss: 5057.956055, avg loss: 3.564451, ppl: 35.320065 +epoch: 0, batch: 8601, sum loss: 5536.338867, avg loss: 3.656763, ppl: 38.735744 +epoch: 0, batch: 8602, sum loss: 6533.748047, avg loss: 4.025723, ppl: 56.020771 +epoch: 0, batch: 8603, sum loss: 6950.523438, avg loss: 3.789816, ppl: 44.248276 +epoch: 0, batch: 8604, sum loss: 5456.832031, avg loss: 3.709607, ppl: 40.837761 +epoch: 0, batch: 8605, sum loss: 6151.119629, avg loss: 3.915417, ppl: 50.169987 +epoch: 0, batch: 8606, sum loss: 6320.958984, avg loss: 3.854243, ppl: 47.192890 +epoch: 0, batch: 8607, sum loss: 7239.216797, avg loss: 4.017323, ppl: 55.552219 +epoch: 0, batch: 8608, sum loss: 7028.394043, avg loss: 3.750477, ppl: 42.541374 +epoch: 0, batch: 8609, sum loss: 6729.621582, avg loss: 3.819309, ppl: 45.572697 +epoch: 0, batch: 8610, sum loss: 5765.902832, avg loss: 3.568009, ppl: 35.445953 +epoch: 0, batch: 8611, sum loss: 6716.895508, avg loss: 3.911995, ppl: 49.998608 +epoch: 0, batch: 8612, sum loss: 6020.652832, avg loss: 3.817789, ppl: 45.503483 +epoch: 0, batch: 8613, sum loss: 5533.447266, avg loss: 3.718714, ppl: 41.211380 +epoch: 0, batch: 8614, sum loss: 7173.454102, avg loss: 3.875448, ppl: 48.204288 +epoch: 0, batch: 8615, sum loss: 6160.287598, avg loss: 3.987241, ppl: 53.905972 +epoch: 0, batch: 8616, sum loss: 6114.228027, avg loss: 3.855125, ppl: 47.234520 +epoch: 0, batch: 8617, sum loss: 6458.348633, avg loss: 3.644666, ppl: 38.270004 +epoch: 0, batch: 8618, sum loss: 5895.930664, avg loss: 3.597273, ppl: 36.498569 +epoch: 0, batch: 8619, sum loss: 7860.189453, avg loss: 4.039152, ppl: 56.778183 +epoch: 0, batch: 8620, sum loss: 7567.955566, avg loss: 3.915135, ppl: 50.155838 +epoch: 0, batch: 8621, sum loss: 6095.515625, avg loss: 3.860364, ppl: 47.482651 +epoch: 0, batch: 8622, sum loss: 6530.733398, avg loss: 4.167667, ppl: 64.564644 +epoch: 0, batch: 8623, sum loss: 6228.264648, avg loss: 3.786179, ppl: 44.087612 +epoch: 0, batch: 8624, sum loss: 6265.642578, avg loss: 3.611321, ppl: 37.014935 +epoch: 0, batch: 8625, sum loss: 6179.225586, avg loss: 3.981460, ppl: 53.595230 +epoch: 0, batch: 8626, sum loss: 6164.579102, avg loss: 4.029137, ppl: 56.212360 +epoch: 0, batch: 8627, sum loss: 6267.064941, avg loss: 3.882940, ppl: 48.566795 +epoch: 0, batch: 8628, sum loss: 6417.172852, avg loss: 4.079576, ppl: 59.120369 +epoch: 0, batch: 8629, sum loss: 6242.334961, avg loss: 3.841437, ppl: 46.592373 +epoch: 0, batch: 8630, sum loss: 6983.753906, avg loss: 3.910277, ppl: 49.912758 +epoch: 0, batch: 8631, sum loss: 5141.025391, avg loss: 3.391178, ppl: 29.700916 +epoch: 0, batch: 8632, sum loss: 6203.804688, avg loss: 3.824787, ppl: 45.823036 +epoch: 0, batch: 8633, sum loss: 7449.628418, avg loss: 4.020307, ppl: 55.718185 +epoch: 0, batch: 8634, sum loss: 6094.260742, avg loss: 3.931781, ppl: 50.997738 +epoch: 0, batch: 8635, sum loss: 5820.611328, avg loss: 3.888184, ppl: 48.822136 +epoch: 0, batch: 8636, sum loss: 5043.725098, avg loss: 3.607815, ppl: 36.885361 +epoch: 0, batch: 8637, sum loss: 7371.465332, avg loss: 4.025923, ppl: 56.032017 +epoch: 0, batch: 8638, sum loss: 6297.529785, avg loss: 3.775498, ppl: 43.619209 +epoch: 0, batch: 8639, sum loss: 7426.601562, avg loss: 3.902576, ppl: 49.529861 +epoch: 0, batch: 8640, sum loss: 6148.745605, avg loss: 3.833383, ppl: 46.218613 +epoch: 0, batch: 8641, sum loss: 6513.877441, avg loss: 3.877308, ppl: 48.294037 +epoch: 0, batch: 8642, sum loss: 7260.556641, avg loss: 3.905625, ppl: 49.681103 +epoch: 0, batch: 8643, sum loss: 5961.110352, avg loss: 3.998062, ppl: 54.492447 +epoch: 0, batch: 8644, sum loss: 6622.772461, avg loss: 3.804005, ppl: 44.880569 +epoch: 0, batch: 8645, sum loss: 5366.948242, avg loss: 3.737429, ppl: 41.989902 +epoch: 0, batch: 8646, sum loss: 6454.632812, avg loss: 3.808043, ppl: 45.062164 +epoch: 0, batch: 8647, sum loss: 6529.233398, avg loss: 3.995859, ppl: 54.372520 +epoch: 0, batch: 8648, sum loss: 6484.353516, avg loss: 3.934680, ppl: 51.145805 +epoch: 0, batch: 8649, sum loss: 6827.561035, avg loss: 3.978765, ppl: 53.450996 +epoch: 0, batch: 8650, sum loss: 7216.267578, avg loss: 3.890171, ppl: 48.919266 +epoch: 0, batch: 8651, sum loss: 7449.092773, avg loss: 3.994152, ppl: 54.279770 +epoch: 0, batch: 8652, sum loss: 7357.791992, avg loss: 3.913719, ppl: 50.084869 +epoch: 0, batch: 8653, sum loss: 6251.065918, avg loss: 3.793123, ppl: 44.394810 +epoch: 0, batch: 8654, sum loss: 6775.526367, avg loss: 3.766274, ppl: 43.218719 +epoch: 0, batch: 8655, sum loss: 5647.780762, avg loss: 3.691360, ppl: 40.099346 +epoch: 0, batch: 8656, sum loss: 5581.999512, avg loss: 3.708970, ppl: 40.811741 +epoch: 0, batch: 8657, sum loss: 5686.829102, avg loss: 3.697548, ppl: 40.348255 +epoch: 0, batch: 8658, sum loss: 6625.230957, avg loss: 4.000743, ppl: 54.638752 +epoch: 0, batch: 8659, sum loss: 7244.709961, avg loss: 4.033803, ppl: 56.475277 +epoch: 0, batch: 8660, sum loss: 6733.942383, avg loss: 3.977521, ppl: 53.384514 +epoch: 0, batch: 8661, sum loss: 6563.928711, avg loss: 3.963725, ppl: 52.653099 +epoch: 0, batch: 8662, sum loss: 5864.666504, avg loss: 3.756993, ppl: 42.819485 +epoch: 0, batch: 8663, sum loss: 7218.261230, avg loss: 4.117662, ppl: 61.415482 +epoch: 0, batch: 8664, sum loss: 6445.952148, avg loss: 3.935257, ppl: 51.175323 +epoch: 0, batch: 8665, sum loss: 6801.805664, avg loss: 3.970698, ppl: 53.021519 +epoch: 0, batch: 8666, sum loss: 6751.486328, avg loss: 3.840436, ppl: 46.545753 +epoch: 0, batch: 8667, sum loss: 6651.125000, avg loss: 3.887274, ppl: 48.777725 +epoch: 0, batch: 8668, sum loss: 8065.373047, avg loss: 4.178950, ppl: 65.297241 +epoch: 0, batch: 8669, sum loss: 6052.080078, avg loss: 3.578995, ppl: 35.837505 +epoch: 0, batch: 8670, sum loss: 6807.574707, avg loss: 3.798870, ppl: 44.650703 +epoch: 0, batch: 8671, sum loss: 5972.991699, avg loss: 3.735454, ppl: 41.907059 +epoch: 0, batch: 8672, sum loss: 7680.268066, avg loss: 4.162747, ppl: 64.247765 +epoch: 0, batch: 8673, sum loss: 5994.144043, avg loss: 3.739329, ppl: 42.069736 +epoch: 0, batch: 8674, sum loss: 8081.390625, avg loss: 4.189420, ppl: 65.984489 +epoch: 0, batch: 8675, sum loss: 5930.904297, avg loss: 3.951302, ppl: 52.003033 +epoch: 0, batch: 8676, sum loss: 6516.408691, avg loss: 3.927914, ppl: 50.800880 +epoch: 0, batch: 8677, sum loss: 5773.167969, avg loss: 3.554906, ppl: 34.984543 +epoch: 0, batch: 8678, sum loss: 7475.310547, avg loss: 3.913775, ppl: 50.087688 +epoch: 0, batch: 8679, sum loss: 7201.627441, avg loss: 3.985405, ppl: 53.807087 +epoch: 0, batch: 8680, sum loss: 7111.555176, avg loss: 4.013293, ppl: 55.328785 +epoch: 0, batch: 8681, sum loss: 5613.811035, avg loss: 3.745038, ppl: 42.310596 +epoch: 0, batch: 8682, sum loss: 6431.352539, avg loss: 3.960192, ppl: 52.467422 +epoch: 0, batch: 8683, sum loss: 5297.334961, avg loss: 3.382717, ppl: 29.450676 +epoch: 0, batch: 8684, sum loss: 5550.663574, avg loss: 3.656564, ppl: 38.728035 +epoch: 0, batch: 8685, sum loss: 6122.138672, avg loss: 3.746719, ppl: 42.381794 +epoch: 0, batch: 8686, sum loss: 5891.942871, avg loss: 3.894212, ppl: 49.117344 +epoch: 0, batch: 8687, sum loss: 6335.660156, avg loss: 3.846788, ppl: 46.842361 +epoch: 0, batch: 8688, sum loss: 7088.834473, avg loss: 4.050763, ppl: 57.441250 +epoch: 0, batch: 8689, sum loss: 6856.479492, avg loss: 3.897942, ppl: 49.300877 +epoch: 0, batch: 8690, sum loss: 7373.706055, avg loss: 4.000926, ppl: 54.648705 +epoch: 0, batch: 8691, sum loss: 6752.088379, avg loss: 3.780565, ppl: 43.840794 +epoch: 0, batch: 8692, sum loss: 7131.414551, avg loss: 3.961897, ppl: 52.556938 +epoch: 0, batch: 8693, sum loss: 7354.219727, avg loss: 3.918071, ppl: 50.303329 +epoch: 0, batch: 8694, sum loss: 6692.576172, avg loss: 3.936810, ppl: 51.254826 +epoch: 0, batch: 8695, sum loss: 6148.641113, avg loss: 3.610476, ppl: 36.983662 +epoch: 0, batch: 8696, sum loss: 6765.951660, avg loss: 3.954385, ppl: 52.163582 +epoch: 0, batch: 8697, sum loss: 7222.871582, avg loss: 3.900038, ppl: 49.404350 +epoch: 0, batch: 8698, sum loss: 5574.471680, avg loss: 3.781867, ppl: 43.897915 +epoch: 0, batch: 8699, sum loss: 6372.505859, avg loss: 3.975362, ppl: 53.269413 +epoch: 0, batch: 8700, sum loss: 6714.058105, avg loss: 3.793253, ppl: 44.400608 +epoch: 0, batch: 8701, sum loss: 7401.140625, avg loss: 4.048764, ppl: 57.326572 +epoch: 0, batch: 8702, sum loss: 6579.869629, avg loss: 3.898027, ppl: 49.305084 +epoch: 0, batch: 8703, sum loss: 6514.163086, avg loss: 3.910062, ppl: 49.902039 +epoch: 0, batch: 8704, sum loss: 6917.561035, avg loss: 3.914862, ppl: 50.142147 +epoch: 0, batch: 8705, sum loss: 7693.695312, avg loss: 3.911385, ppl: 49.968124 +epoch: 0, batch: 8706, sum loss: 7587.234375, avg loss: 3.935288, ppl: 51.176872 +epoch: 0, batch: 8707, sum loss: 7738.487305, avg loss: 3.810186, ppl: 45.158833 +epoch: 0, batch: 8708, sum loss: 7701.180176, avg loss: 3.887521, ppl: 48.789799 +epoch: 0, batch: 8709, sum loss: 6461.804688, avg loss: 3.663155, ppl: 38.984127 +epoch: 0, batch: 8710, sum loss: 6794.821289, avg loss: 3.887198, ppl: 48.774017 +epoch: 0, batch: 8711, sum loss: 5909.126465, avg loss: 3.758986, ppl: 42.904907 +epoch: 0, batch: 8712, sum loss: 6048.155273, avg loss: 3.715083, ppl: 41.062000 +epoch: 0, batch: 8713, sum loss: 6511.584961, avg loss: 3.794630, ppl: 44.461784 +epoch: 0, batch: 8714, sum loss: 4975.023438, avg loss: 3.650054, ppl: 38.476742 +epoch: 0, batch: 8715, sum loss: 5240.192383, avg loss: 3.594096, ppl: 36.382801 +epoch: 0, batch: 8716, sum loss: 6818.520020, avg loss: 3.950475, ppl: 51.960041 +epoch: 0, batch: 8717, sum loss: 6764.854004, avg loss: 3.652729, ppl: 38.579800 +epoch: 0, batch: 8718, sum loss: 6871.146484, avg loss: 3.960315, ppl: 52.473850 +epoch: 0, batch: 8719, sum loss: 6560.883301, avg loss: 3.909942, ppl: 49.896076 +epoch: 0, batch: 8720, sum loss: 6251.866699, avg loss: 3.688417, ppl: 39.981503 +epoch: 0, batch: 8721, sum loss: 7192.852051, avg loss: 4.152917, ppl: 63.619301 +epoch: 0, batch: 8722, sum loss: 7661.702148, avg loss: 3.975974, ppl: 53.302025 +epoch: 0, batch: 8723, sum loss: 5579.598633, avg loss: 3.623116, ppl: 37.454094 +epoch: 0, batch: 8724, sum loss: 5673.999512, avg loss: 3.792780, ppl: 44.379623 +epoch: 0, batch: 8725, sum loss: 5587.837891, avg loss: 3.994166, ppl: 54.280548 +epoch: 0, batch: 8726, sum loss: 7644.961426, avg loss: 4.096978, ppl: 60.158226 +epoch: 0, batch: 8727, sum loss: 6888.490234, avg loss: 4.035437, ppl: 56.567616 +epoch: 0, batch: 8728, sum loss: 7237.372559, avg loss: 4.038713, ppl: 56.753227 +epoch: 0, batch: 8729, sum loss: 5937.796387, avg loss: 3.779629, ppl: 43.799778 +epoch: 0, batch: 8730, sum loss: 6182.001465, avg loss: 3.783355, ppl: 43.963280 +epoch: 0, batch: 8731, sum loss: 6251.727051, avg loss: 3.701437, ppl: 40.505470 +epoch: 0, batch: 8732, sum loss: 6779.500000, avg loss: 3.907493, ppl: 49.774006 +epoch: 0, batch: 8733, sum loss: 6318.345703, avg loss: 3.736455, ppl: 41.949036 +epoch: 0, batch: 8734, sum loss: 6824.757324, avg loss: 3.884324, ppl: 48.634048 +epoch: 0, batch: 8735, sum loss: 5595.070312, avg loss: 3.586584, ppl: 36.110497 +epoch: 0, batch: 8736, sum loss: 7040.881348, avg loss: 4.105470, ppl: 60.671265 +epoch: 0, batch: 8737, sum loss: 6304.726562, avg loss: 3.925732, ppl: 50.690189 +epoch: 0, batch: 8738, sum loss: 6700.403320, avg loss: 4.024266, ppl: 55.939247 +epoch: 0, batch: 8739, sum loss: 7870.535645, avg loss: 4.025849, ppl: 56.027874 +epoch: 0, batch: 8740, sum loss: 5831.495117, avg loss: 3.601912, ppl: 36.668270 +epoch: 0, batch: 8741, sum loss: 6716.469238, avg loss: 4.095408, ppl: 60.063839 +epoch: 0, batch: 8742, sum loss: 6446.716797, avg loss: 3.916596, ppl: 50.229172 +epoch: 0, batch: 8743, sum loss: 6002.889648, avg loss: 3.719263, ppl: 41.233978 +epoch: 0, batch: 8744, sum loss: 7618.764648, avg loss: 3.921135, ppl: 50.457680 +epoch: 0, batch: 8745, sum loss: 6509.332031, avg loss: 4.101659, ppl: 60.440464 +epoch: 0, batch: 8746, sum loss: 6403.313965, avg loss: 3.987119, ppl: 53.899406 +epoch: 0, batch: 8747, sum loss: 5827.812500, avg loss: 3.839139, ppl: 46.485420 +epoch: 0, batch: 8748, sum loss: 6642.002930, avg loss: 3.902469, ppl: 49.524593 +epoch: 0, batch: 8749, sum loss: 6576.815918, avg loss: 3.784129, ppl: 43.997330 +epoch: 0, batch: 8750, sum loss: 6697.555664, avg loss: 3.875900, ppl: 48.226093 +epoch: 0, batch: 8751, sum loss: 7296.987793, avg loss: 4.106352, ppl: 60.724808 +epoch: 0, batch: 8752, sum loss: 6201.548828, avg loss: 3.967722, ppl: 52.863953 +epoch: 0, batch: 8753, sum loss: 6315.833008, avg loss: 3.930201, ppl: 50.917213 +epoch: 0, batch: 8754, sum loss: 5948.398438, avg loss: 3.944561, ppl: 51.653671 +epoch: 0, batch: 8755, sum loss: 7083.133301, avg loss: 3.747690, ppl: 42.422958 +epoch: 0, batch: 8756, sum loss: 5609.161133, avg loss: 3.777213, ppl: 43.694080 +epoch: 0, batch: 8757, sum loss: 7182.708496, avg loss: 3.698614, ppl: 40.391289 +epoch: 0, batch: 8758, sum loss: 6393.147949, avg loss: 3.900639, ppl: 49.434040 +epoch: 0, batch: 8759, sum loss: 5824.069336, avg loss: 3.557770, ppl: 35.084869 +epoch: 0, batch: 8760, sum loss: 5697.387695, avg loss: 3.711653, ppl: 40.921402 +epoch: 0, batch: 8761, sum loss: 6459.274414, avg loss: 3.870147, ppl: 47.949409 +epoch: 0, batch: 8762, sum loss: 5158.244141, avg loss: 3.525799, ppl: 33.980923 +epoch: 0, batch: 8763, sum loss: 7221.754395, avg loss: 3.882664, ppl: 48.553375 +epoch: 0, batch: 8764, sum loss: 6322.983398, avg loss: 3.832111, ppl: 46.159885 +epoch: 0, batch: 8765, sum loss: 6042.835449, avg loss: 3.788612, ppl: 44.195000 +epoch: 0, batch: 8766, sum loss: 6758.473145, avg loss: 3.989653, ppl: 54.036156 +epoch: 0, batch: 8767, sum loss: 6870.191406, avg loss: 3.758311, ppl: 42.875927 +epoch: 0, batch: 8768, sum loss: 5935.936035, avg loss: 3.900090, ppl: 49.406883 +epoch: 0, batch: 8769, sum loss: 5940.509277, avg loss: 3.521345, ppl: 33.829910 +epoch: 0, batch: 8770, sum loss: 5906.750000, avg loss: 3.906581, ppl: 49.728634 +epoch: 0, batch: 8771, sum loss: 6912.880859, avg loss: 3.957001, ppl: 52.300240 +epoch: 0, batch: 8772, sum loss: 5809.073242, avg loss: 3.730940, ppl: 41.718300 +epoch: 0, batch: 8773, sum loss: 7091.483887, avg loss: 3.798331, ppl: 44.626633 +epoch: 0, batch: 8774, sum loss: 7019.891602, avg loss: 4.055397, ppl: 57.708042 +epoch: 0, batch: 8775, sum loss: 6654.024414, avg loss: 3.702852, ppl: 40.562817 +epoch: 0, batch: 8776, sum loss: 6088.543457, avg loss: 3.885478, ppl: 48.690201 +epoch: 0, batch: 8777, sum loss: 5988.214844, avg loss: 3.921555, ppl: 50.478893 +epoch: 0, batch: 8778, sum loss: 6421.894531, avg loss: 3.983805, ppl: 53.721077 +epoch: 0, batch: 8779, sum loss: 7139.708008, avg loss: 3.935892, ppl: 51.207813 +epoch: 0, batch: 8780, sum loss: 6910.610352, avg loss: 3.858521, ppl: 47.395180 +epoch: 0, batch: 8781, sum loss: 5967.910156, avg loss: 3.860227, ppl: 47.476109 +epoch: 0, batch: 8782, sum loss: 6164.832031, avg loss: 3.838625, ppl: 46.461544 +epoch: 0, batch: 8783, sum loss: 6889.808594, avg loss: 3.968784, ppl: 52.920132 +epoch: 0, batch: 8784, sum loss: 7184.806152, avg loss: 3.949866, ppl: 51.928410 +epoch: 0, batch: 8785, sum loss: 5444.701660, avg loss: 3.721601, ppl: 41.330502 +epoch: 0, batch: 8786, sum loss: 5574.513672, avg loss: 3.716342, ppl: 41.113743 +epoch: 0, batch: 8787, sum loss: 7073.258789, avg loss: 3.748415, ppl: 42.453728 +epoch: 0, batch: 8788, sum loss: 6269.367676, avg loss: 3.945480, ppl: 51.701141 +epoch: 0, batch: 8789, sum loss: 7014.472656, avg loss: 3.600859, ppl: 36.629673 +epoch: 0, batch: 8790, sum loss: 6688.048340, avg loss: 3.850345, ppl: 47.009262 +epoch: 0, batch: 8791, sum loss: 6798.844238, avg loss: 3.865176, ppl: 47.711666 +epoch: 0, batch: 8792, sum loss: 6387.740234, avg loss: 3.890219, ppl: 48.921619 +epoch: 0, batch: 8793, sum loss: 6215.721680, avg loss: 3.760267, ppl: 42.959896 +epoch: 0, batch: 8794, sum loss: 5912.415039, avg loss: 3.660938, ppl: 38.897812 +epoch: 0, batch: 8795, sum loss: 8088.987305, avg loss: 4.077110, ppl: 58.974804 +epoch: 0, batch: 8796, sum loss: 5961.560547, avg loss: 3.826419, ppl: 45.897877 +epoch: 0, batch: 8797, sum loss: 6758.060059, avg loss: 4.027449, ppl: 56.117580 +epoch: 0, batch: 8798, sum loss: 7638.950684, avg loss: 4.149349, ppl: 63.392700 +epoch: 0, batch: 8799, sum loss: 4593.148926, avg loss: 3.642465, ppl: 38.185863 +epoch: 0, batch: 8800, sum loss: 7353.312988, avg loss: 3.896827, ppl: 49.245953 +epoch: 0, batch: 8801, sum loss: 7411.238770, avg loss: 4.016932, ppl: 55.530453 +epoch: 0, batch: 8802, sum loss: 6311.907715, avg loss: 3.827718, ppl: 45.957565 +epoch: 0, batch: 8803, sum loss: 7324.151855, avg loss: 3.978355, ppl: 53.429081 +epoch: 0, batch: 8804, sum loss: 6897.230957, avg loss: 3.838192, ppl: 46.441444 +epoch: 0, batch: 8805, sum loss: 7523.830078, avg loss: 4.055973, ppl: 57.741322 +epoch: 0, batch: 8806, sum loss: 6613.010742, avg loss: 3.867258, ppl: 47.811089 +epoch: 0, batch: 8807, sum loss: 5787.531250, avg loss: 3.557179, ppl: 35.064156 +epoch: 0, batch: 8808, sum loss: 7653.015625, avg loss: 3.996353, ppl: 54.399376 +epoch: 0, batch: 8809, sum loss: 6241.341309, avg loss: 3.876609, ppl: 48.260303 +epoch: 0, batch: 8810, sum loss: 6256.860352, avg loss: 3.826826, ppl: 45.916561 +epoch: 0, batch: 8811, sum loss: 6498.145996, avg loss: 4.071520, ppl: 58.646027 +epoch: 0, batch: 8812, sum loss: 5531.301758, avg loss: 3.559396, ppl: 35.141975 +epoch: 0, batch: 8813, sum loss: 5740.025879, avg loss: 3.686594, ppl: 39.908669 +epoch: 0, batch: 8814, sum loss: 6122.053711, avg loss: 3.726143, ppl: 41.518677 +epoch: 0, batch: 8815, sum loss: 7428.309082, avg loss: 3.993715, ppl: 54.256054 +epoch: 0, batch: 8816, sum loss: 6019.552734, avg loss: 3.841451, ppl: 46.593018 +epoch: 0, batch: 8817, sum loss: 5768.451660, avg loss: 3.810074, ppl: 45.153774 +epoch: 0, batch: 8818, sum loss: 7481.881836, avg loss: 3.967064, ppl: 52.829178 +epoch: 0, batch: 8819, sum loss: 7240.211426, avg loss: 4.026814, ppl: 56.081921 +epoch: 0, batch: 8820, sum loss: 6552.530762, avg loss: 3.701995, ppl: 40.528072 +epoch: 0, batch: 8821, sum loss: 7381.192383, avg loss: 4.009339, ppl: 55.110424 +epoch: 0, batch: 8822, sum loss: 7448.948730, avg loss: 3.857560, ppl: 47.349686 +epoch: 0, batch: 8823, sum loss: 6070.671387, avg loss: 3.751960, ppl: 42.604507 +epoch: 0, batch: 8824, sum loss: 6333.384277, avg loss: 3.808409, ppl: 45.078659 +epoch: 0, batch: 8825, sum loss: 5400.082031, avg loss: 3.474956, ppl: 32.296417 +epoch: 0, batch: 8826, sum loss: 5637.845703, avg loss: 3.761071, ppl: 42.994457 +epoch: 0, batch: 8827, sum loss: 6091.940430, avg loss: 3.663223, ppl: 38.986805 +epoch: 0, batch: 8828, sum loss: 5813.615234, avg loss: 3.784906, ppl: 44.031528 +epoch: 0, batch: 8829, sum loss: 6901.539551, avg loss: 3.959575, ppl: 52.435047 +epoch: 0, batch: 8830, sum loss: 8140.810547, avg loss: 3.928962, ppl: 50.854187 +epoch: 0, batch: 8831, sum loss: 7163.263672, avg loss: 3.910079, ppl: 49.902870 +epoch: 0, batch: 8832, sum loss: 6685.100586, avg loss: 3.951005, ppl: 51.987587 +epoch: 0, batch: 8833, sum loss: 6686.045898, avg loss: 4.089324, ppl: 59.699551 +epoch: 0, batch: 8834, sum loss: 6744.983887, avg loss: 3.759746, ppl: 42.937511 +epoch: 0, batch: 8835, sum loss: 7083.415039, avg loss: 3.855969, ppl: 47.274414 +epoch: 0, batch: 8836, sum loss: 7762.096680, avg loss: 4.281355, ppl: 72.338417 +epoch: 0, batch: 8837, sum loss: 7748.209961, avg loss: 3.917194, ppl: 50.259201 +epoch: 0, batch: 8838, sum loss: 7607.364258, avg loss: 3.903214, ppl: 49.561493 +epoch: 0, batch: 8839, sum loss: 5549.383301, avg loss: 3.795748, ppl: 44.511517 +epoch: 0, batch: 8840, sum loss: 6998.735352, avg loss: 3.916472, ppl: 50.222942 +epoch: 0, batch: 8841, sum loss: 5679.696289, avg loss: 3.783942, ppl: 43.989094 +epoch: 0, batch: 8842, sum loss: 6671.976074, avg loss: 3.832267, ppl: 46.167061 +epoch: 0, batch: 8843, sum loss: 5696.589844, avg loss: 3.675219, ppl: 39.457310 +epoch: 0, batch: 8844, sum loss: 5895.802734, avg loss: 3.784212, ppl: 44.000999 +epoch: 0, batch: 8845, sum loss: 8275.971680, avg loss: 4.099045, ppl: 60.282677 +epoch: 0, batch: 8846, sum loss: 7836.262695, avg loss: 3.914217, ppl: 50.109821 +epoch: 0, batch: 8847, sum loss: 5896.393066, avg loss: 3.545636, ppl: 34.661728 +epoch: 0, batch: 8848, sum loss: 6749.870605, avg loss: 3.729211, ppl: 41.646221 +epoch: 0, batch: 8849, sum loss: 6986.276367, avg loss: 4.102335, ppl: 60.481346 +epoch: 0, batch: 8850, sum loss: 5922.793945, avg loss: 3.463622, ppl: 31.932430 +epoch: 0, batch: 8851, sum loss: 6398.679199, avg loss: 4.039570, ppl: 56.801933 +epoch: 0, batch: 8852, sum loss: 5986.174316, avg loss: 3.822589, ppl: 45.722427 +epoch: 0, batch: 8853, sum loss: 5840.939453, avg loss: 3.842724, ppl: 46.652363 +epoch: 0, batch: 8854, sum loss: 5118.270996, avg loss: 3.515296, ppl: 33.625889 +epoch: 0, batch: 8855, sum loss: 6891.169434, avg loss: 3.728988, ppl: 41.636936 +epoch: 0, batch: 8856, sum loss: 5571.603027, avg loss: 3.510777, ppl: 33.474274 +epoch: 0, batch: 8857, sum loss: 6363.579102, avg loss: 3.833482, ppl: 46.223186 +epoch: 0, batch: 8858, sum loss: 4431.729980, avg loss: 3.475867, ppl: 32.325829 +epoch: 0, batch: 8859, sum loss: 6509.421387, avg loss: 3.860867, ppl: 47.506512 +epoch: 0, batch: 8860, sum loss: 6874.615723, avg loss: 3.948659, ppl: 51.865776 +epoch: 0, batch: 8861, sum loss: 6902.331055, avg loss: 4.138088, ppl: 62.682873 +epoch: 0, batch: 8862, sum loss: 7554.636230, avg loss: 4.003517, ppl: 54.790493 +epoch: 0, batch: 8863, sum loss: 7048.153809, avg loss: 3.779171, ppl: 43.779732 +epoch: 0, batch: 8864, sum loss: 5342.799805, avg loss: 3.871594, ppl: 48.018875 +epoch: 0, batch: 8865, sum loss: 5957.100586, avg loss: 3.541677, ppl: 34.524761 +epoch: 0, batch: 8866, sum loss: 5837.360352, avg loss: 3.788034, ppl: 44.169476 +epoch: 0, batch: 8867, sum loss: 6898.791016, avg loss: 3.955729, ppl: 52.233734 +epoch: 0, batch: 8868, sum loss: 5813.920410, avg loss: 4.071373, ppl: 58.637417 +epoch: 0, batch: 8869, sum loss: 6128.177734, avg loss: 3.895854, ppl: 49.198029 +epoch: 0, batch: 8870, sum loss: 6991.320801, avg loss: 4.081331, ppl: 59.224258 +epoch: 0, batch: 8871, sum loss: 6908.374023, avg loss: 3.863744, ppl: 47.643383 +epoch: 0, batch: 8872, sum loss: 7365.667480, avg loss: 3.957909, ppl: 52.347733 +epoch: 0, batch: 8873, sum loss: 6775.630371, avg loss: 3.948503, ppl: 51.857655 +epoch: 0, batch: 8874, sum loss: 6604.092773, avg loss: 3.880195, ppl: 48.433678 +epoch: 0, batch: 8875, sum loss: 5800.696289, avg loss: 3.806231, ppl: 44.980598 +epoch: 0, batch: 8876, sum loss: 5780.304199, avg loss: 3.630844, ppl: 37.744675 +epoch: 0, batch: 8877, sum loss: 6032.255859, avg loss: 3.730523, ppl: 41.700928 +epoch: 0, batch: 8878, sum loss: 5838.003418, avg loss: 3.568462, ppl: 35.462006 +epoch: 0, batch: 8879, sum loss: 6570.403809, avg loss: 3.668567, ppl: 39.195698 +epoch: 0, batch: 8880, sum loss: 5462.541504, avg loss: 3.547105, ppl: 34.712673 +epoch: 0, batch: 8881, sum loss: 6509.041504, avg loss: 3.704634, ppl: 40.635162 +epoch: 0, batch: 8882, sum loss: 6639.429688, avg loss: 3.985252, ppl: 53.798840 +epoch: 0, batch: 8883, sum loss: 6950.272461, avg loss: 3.789680, ppl: 44.242218 +epoch: 0, batch: 8884, sum loss: 5364.604980, avg loss: 3.506278, ppl: 33.323997 +epoch: 0, batch: 8885, sum loss: 5901.993164, avg loss: 3.742545, ppl: 42.205261 +epoch: 0, batch: 8886, sum loss: 5816.255371, avg loss: 3.877504, ppl: 48.303482 +epoch: 0, batch: 8887, sum loss: 6119.175781, avg loss: 3.388248, ppl: 29.614021 +epoch: 0, batch: 8888, sum loss: 6333.974609, avg loss: 3.917115, ppl: 50.255249 +epoch: 0, batch: 8889, sum loss: 6803.983398, avg loss: 3.910335, ppl: 49.915688 +epoch: 0, batch: 8890, sum loss: 6368.513672, avg loss: 3.936041, ppl: 51.215420 +epoch: 0, batch: 8891, sum loss: 5720.616211, avg loss: 3.888930, ppl: 48.858585 +epoch: 0, batch: 8892, sum loss: 6993.664551, avg loss: 3.994097, ppl: 54.276833 +epoch: 0, batch: 8893, sum loss: 6895.842773, avg loss: 3.780616, ppl: 43.843021 +epoch: 0, batch: 8894, sum loss: 6643.713867, avg loss: 3.838078, ppl: 46.436150 +epoch: 0, batch: 8895, sum loss: 5707.696777, avg loss: 3.711116, ppl: 40.899437 +epoch: 0, batch: 8896, sum loss: 5457.663086, avg loss: 3.727912, ppl: 41.592171 +epoch: 0, batch: 8897, sum loss: 5727.125000, avg loss: 3.657168, ppl: 38.751438 +epoch: 0, batch: 8898, sum loss: 7591.396484, avg loss: 3.909061, ppl: 49.852127 +epoch: 0, batch: 8899, sum loss: 5640.933105, avg loss: 3.733245, ppl: 41.814571 +epoch: 0, batch: 8900, sum loss: 7154.627441, avg loss: 4.145207, ppl: 63.130684 +epoch: 0, batch: 8901, sum loss: 6784.751465, avg loss: 3.786134, ppl: 44.085625 +epoch: 0, batch: 8902, sum loss: 6588.823242, avg loss: 4.092437, ppl: 59.885643 +epoch: 0, batch: 8903, sum loss: 5632.726562, avg loss: 3.674316, ppl: 39.421688 +epoch: 0, batch: 8904, sum loss: 6296.112793, avg loss: 3.721107, ppl: 41.310089 +epoch: 0, batch: 8905, sum loss: 8134.021484, avg loss: 3.948554, ppl: 51.860325 +epoch: 0, batch: 8906, sum loss: 5766.209961, avg loss: 3.640284, ppl: 38.102657 +epoch: 0, batch: 8907, sum loss: 7082.478027, avg loss: 3.885068, ppl: 48.670238 +epoch: 0, batch: 8908, sum loss: 6554.833496, avg loss: 3.760662, ppl: 42.976860 +epoch: 0, batch: 8909, sum loss: 6435.601074, avg loss: 3.853653, ppl: 47.165062 +epoch: 0, batch: 8910, sum loss: 5842.323730, avg loss: 3.692998, ppl: 40.165096 +epoch: 0, batch: 8911, sum loss: 6973.946289, avg loss: 3.915748, ppl: 50.186581 +epoch: 0, batch: 8912, sum loss: 6929.171387, avg loss: 4.126963, ppl: 61.989384 +epoch: 0, batch: 8913, sum loss: 5798.259277, avg loss: 3.904552, ppl: 49.627815 +epoch: 0, batch: 8914, sum loss: 6755.333496, avg loss: 3.732229, ppl: 41.772106 +epoch: 0, batch: 8915, sum loss: 6669.114258, avg loss: 3.721604, ppl: 41.330639 +epoch: 0, batch: 8916, sum loss: 6911.359375, avg loss: 3.895918, ppl: 49.201218 +epoch: 0, batch: 8917, sum loss: 5415.590820, avg loss: 3.742634, ppl: 42.209003 +epoch: 0, batch: 8918, sum loss: 6068.244141, avg loss: 3.716010, ppl: 41.100071 +epoch: 0, batch: 8919, sum loss: 6352.932617, avg loss: 3.704334, ppl: 40.622974 +epoch: 0, batch: 8920, sum loss: 6254.052734, avg loss: 3.509570, ppl: 33.433876 +epoch: 0, batch: 8921, sum loss: 6460.742676, avg loss: 4.017875, ppl: 55.582851 +epoch: 0, batch: 8922, sum loss: 6259.199707, avg loss: 3.615944, ppl: 37.186440 +epoch: 0, batch: 8923, sum loss: 6869.449219, avg loss: 3.829124, ppl: 46.022224 +epoch: 0, batch: 8924, sum loss: 6174.240234, avg loss: 3.827799, ppl: 45.961281 +epoch: 0, batch: 8925, sum loss: 6060.851562, avg loss: 3.716034, ppl: 41.101070 +epoch: 0, batch: 8926, sum loss: 7157.656250, avg loss: 3.952323, ppl: 52.056152 +epoch: 0, batch: 8927, sum loss: 7950.893066, avg loss: 3.953701, ppl: 52.127960 +epoch: 0, batch: 8928, sum loss: 4437.961426, avg loss: 3.611034, ppl: 37.004311 +epoch: 0, batch: 8929, sum loss: 6154.965820, avg loss: 3.945491, ppl: 51.701721 +epoch: 0, batch: 8930, sum loss: 6824.854004, avg loss: 4.069680, ppl: 58.538239 +epoch: 0, batch: 8931, sum loss: 5916.152344, avg loss: 3.649693, ppl: 38.462849 +epoch: 0, batch: 8932, sum loss: 5710.650391, avg loss: 3.727579, ppl: 41.578312 +epoch: 0, batch: 8933, sum loss: 6842.227539, avg loss: 3.957332, ppl: 52.317577 +epoch: 0, batch: 8934, sum loss: 7107.953125, avg loss: 4.111020, ppl: 61.008892 +epoch: 0, batch: 8935, sum loss: 6653.635254, avg loss: 3.850483, ppl: 47.015785 +epoch: 0, batch: 8936, sum loss: 6854.867188, avg loss: 3.679478, ppl: 39.625690 +epoch: 0, batch: 8937, sum loss: 6653.947266, avg loss: 4.092218, ppl: 59.872566 +epoch: 0, batch: 8938, sum loss: 6840.429688, avg loss: 3.983943, ppl: 53.728455 +epoch: 0, batch: 8939, sum loss: 6572.618652, avg loss: 3.973772, ppl: 53.184769 +epoch: 0, batch: 8940, sum loss: 7142.782715, avg loss: 3.918148, ppl: 50.307167 +epoch: 0, batch: 8941, sum loss: 5932.398926, avg loss: 3.771391, ppl: 43.440464 +epoch: 0, batch: 8942, sum loss: 6195.471680, avg loss: 3.703211, ppl: 40.577381 +epoch: 0, batch: 8943, sum loss: 6402.201660, avg loss: 4.018959, ppl: 55.643154 +epoch: 0, batch: 8944, sum loss: 7233.841309, avg loss: 4.096173, ppl: 60.109795 +epoch: 0, batch: 8945, sum loss: 6943.348145, avg loss: 3.861706, ppl: 47.546410 +epoch: 0, batch: 8946, sum loss: 6828.392578, avg loss: 3.944767, ppl: 51.664322 +epoch: 0, batch: 8947, sum loss: 7137.393555, avg loss: 3.845578, ppl: 46.785740 +epoch: 0, batch: 8948, sum loss: 5425.368164, avg loss: 3.665789, ppl: 39.086979 +epoch: 0, batch: 8949, sum loss: 6745.346680, avg loss: 3.981905, ppl: 53.619068 +epoch: 0, batch: 8950, sum loss: 6428.040039, avg loss: 3.938750, ppl: 51.354382 +epoch: 0, batch: 8951, sum loss: 6617.100586, avg loss: 3.842683, ppl: 46.650475 +epoch: 0, batch: 8952, sum loss: 5859.251465, avg loss: 3.706041, ppl: 40.692398 +epoch: 0, batch: 8953, sum loss: 4695.074219, avg loss: 3.334570, ppl: 28.066311 +epoch: 0, batch: 8954, sum loss: 7289.020020, avg loss: 3.976552, ppl: 53.332840 +epoch: 0, batch: 8955, sum loss: 6505.401367, avg loss: 4.013203, ppl: 55.323772 +epoch: 0, batch: 8956, sum loss: 7083.441895, avg loss: 4.043061, ppl: 57.000542 +epoch: 0, batch: 8957, sum loss: 7199.630859, avg loss: 3.990926, ppl: 54.104980 +epoch: 0, batch: 8958, sum loss: 6839.359375, avg loss: 3.835872, ppl: 46.333809 +epoch: 0, batch: 8959, sum loss: 6582.648926, avg loss: 3.911259, ppl: 49.961811 +epoch: 0, batch: 8960, sum loss: 7296.256836, avg loss: 4.060243, ppl: 57.988407 +epoch: 0, batch: 8961, sum loss: 5911.841309, avg loss: 3.718139, ppl: 41.187679 +epoch: 0, batch: 8962, sum loss: 6178.358887, avg loss: 3.940280, ppl: 51.432987 +epoch: 0, batch: 8963, sum loss: 6531.696289, avg loss: 3.612664, ppl: 37.064651 +epoch: 0, batch: 8964, sum loss: 6981.182617, avg loss: 4.007567, ppl: 55.012882 +epoch: 0, batch: 8965, sum loss: 7107.651367, avg loss: 3.713506, ppl: 40.997311 +epoch: 0, batch: 8966, sum loss: 6662.089844, avg loss: 3.678680, ppl: 39.594112 +epoch: 0, batch: 8967, sum loss: 6163.489746, avg loss: 3.710710, ppl: 40.882835 +epoch: 0, batch: 8968, sum loss: 6405.867188, avg loss: 3.810748, ppl: 45.184219 +epoch: 0, batch: 8969, sum loss: 6108.826172, avg loss: 3.825189, ppl: 45.841446 +epoch: 0, batch: 8970, sum loss: 6910.205078, avg loss: 4.062437, ppl: 58.115742 +epoch: 0, batch: 8971, sum loss: 5783.408203, avg loss: 3.762790, ppl: 43.068428 +epoch: 0, batch: 8972, sum loss: 5458.534668, avg loss: 3.751570, ppl: 42.587902 +epoch: 0, batch: 8973, sum loss: 6482.480957, avg loss: 3.833519, ppl: 46.224937 +epoch: 0, batch: 8974, sum loss: 5278.876953, avg loss: 3.704475, ppl: 40.628708 +epoch: 0, batch: 8975, sum loss: 8012.328613, avg loss: 3.912270, ppl: 50.012344 +epoch: 0, batch: 8976, sum loss: 6329.995605, avg loss: 3.644212, ppl: 38.252609 +epoch: 0, batch: 8977, sum loss: 7061.188477, avg loss: 4.163437, ppl: 64.292107 +epoch: 0, batch: 8978, sum loss: 6862.254883, avg loss: 3.973512, ppl: 53.170937 +epoch: 0, batch: 8979, sum loss: 5733.365234, avg loss: 3.832464, ppl: 46.176155 +epoch: 0, batch: 8980, sum loss: 6064.006836, avg loss: 3.835551, ppl: 46.318954 +epoch: 0, batch: 8981, sum loss: 6316.609863, avg loss: 3.800608, ppl: 44.728355 +epoch: 0, batch: 8982, sum loss: 5218.716797, avg loss: 3.709109, ppl: 40.817436 +epoch: 0, batch: 8983, sum loss: 6763.175293, avg loss: 4.126403, ppl: 61.954693 +epoch: 0, batch: 8984, sum loss: 6846.740234, avg loss: 3.964528, ppl: 52.695408 +epoch: 0, batch: 8985, sum loss: 6761.777344, avg loss: 4.113003, ppl: 61.130032 +epoch: 0, batch: 8986, sum loss: 6334.875488, avg loss: 3.839319, ppl: 46.493778 +epoch: 0, batch: 8987, sum loss: 6681.451660, avg loss: 3.732654, ppl: 41.789886 +epoch: 0, batch: 8988, sum loss: 5762.764648, avg loss: 3.966115, ppl: 52.779072 +epoch: 0, batch: 8989, sum loss: 6509.235840, avg loss: 4.008150, ppl: 55.044949 +epoch: 0, batch: 8990, sum loss: 6678.451660, avg loss: 3.792420, ppl: 44.363628 +epoch: 0, batch: 8991, sum loss: 7027.572754, avg loss: 3.990672, ppl: 54.091206 +epoch: 0, batch: 8992, sum loss: 6208.697754, avg loss: 3.912223, ppl: 50.010006 +epoch: 0, batch: 8993, sum loss: 5917.397461, avg loss: 3.952837, ppl: 52.082928 +epoch: 0, batch: 8994, sum loss: 6598.266113, avg loss: 4.003802, ppl: 54.806145 +epoch: 0, batch: 8995, sum loss: 6400.100098, avg loss: 3.716667, ppl: 41.127075 +epoch: 0, batch: 8996, sum loss: 5664.329102, avg loss: 3.680526, ppl: 39.667244 +epoch: 0, batch: 8997, sum loss: 6592.503906, avg loss: 3.769299, ppl: 43.349667 +epoch: 0, batch: 8998, sum loss: 5496.796875, avg loss: 3.550902, ppl: 34.844746 +epoch: 0, batch: 8999, sum loss: 6593.216797, avg loss: 3.733418, ppl: 41.821819 +epoch: 0, batch: 9000, sum loss: 6797.040039, avg loss: 4.142011, ppl: 62.929226 +epoch: 0, batch: 9001, sum loss: 6515.924316, avg loss: 3.946653, ppl: 51.761848 +epoch: 0, batch: 9002, sum loss: 5424.083496, avg loss: 3.725332, ppl: 41.485016 +epoch: 0, batch: 9003, sum loss: 7696.718262, avg loss: 3.867697, ppl: 47.832123 +epoch: 0, batch: 9004, sum loss: 5751.165527, avg loss: 3.679569, ppl: 39.629292 +epoch: 0, batch: 9005, sum loss: 6975.934570, avg loss: 4.368149, ppl: 78.897476 +epoch: 0, batch: 9006, sum loss: 7762.604492, avg loss: 3.982865, ppl: 53.670597 +epoch: 0, batch: 9007, sum loss: 6455.092773, avg loss: 3.718371, ppl: 41.197247 +epoch: 0, batch: 9008, sum loss: 7244.506348, avg loss: 4.035937, ppl: 56.595917 +epoch: 0, batch: 9009, sum loss: 6832.603516, avg loss: 3.904345, ppl: 49.617561 +epoch: 0, batch: 9010, sum loss: 6853.770508, avg loss: 3.843955, ppl: 46.709827 +epoch: 0, batch: 9011, sum loss: 7213.920410, avg loss: 4.223607, ppl: 68.279297 +epoch: 0, batch: 9012, sum loss: 6596.526855, avg loss: 3.782412, ppl: 43.921856 +epoch: 0, batch: 9013, sum loss: 6153.101074, avg loss: 3.585723, ppl: 36.079449 +epoch: 0, batch: 9014, sum loss: 6397.133789, avg loss: 3.774120, ppl: 43.559174 +epoch: 0, batch: 9015, sum loss: 6947.377930, avg loss: 3.965399, ppl: 52.741283 +epoch: 0, batch: 9016, sum loss: 6495.378418, avg loss: 3.889448, ppl: 48.883904 +epoch: 0, batch: 9017, sum loss: 6214.398438, avg loss: 3.859875, ppl: 47.459404 +epoch: 0, batch: 9018, sum loss: 5970.229980, avg loss: 3.743091, ppl: 42.228310 +epoch: 0, batch: 9019, sum loss: 5808.489258, avg loss: 3.911440, ppl: 49.970879 +epoch: 0, batch: 9020, sum loss: 6822.570312, avg loss: 3.809364, ppl: 45.121716 +epoch: 0, batch: 9021, sum loss: 5141.626465, avg loss: 3.758499, ppl: 42.884007 +epoch: 0, batch: 9022, sum loss: 6352.265625, avg loss: 3.909086, ppl: 49.853390 +epoch: 0, batch: 9023, sum loss: 6508.947266, avg loss: 3.819805, ppl: 45.595314 +epoch: 0, batch: 9024, sum loss: 7174.258789, avg loss: 3.857128, ppl: 47.329243 +epoch: 0, batch: 9025, sum loss: 6198.646973, avg loss: 3.864493, ppl: 47.679111 +epoch: 0, batch: 9026, sum loss: 7749.327637, avg loss: 4.102344, ppl: 60.481892 +epoch: 0, batch: 9027, sum loss: 8773.910156, avg loss: 3.986329, ppl: 53.856823 +epoch: 0, batch: 9028, sum loss: 6126.022461, avg loss: 3.498585, ppl: 33.068634 +epoch: 0, batch: 9029, sum loss: 6690.108887, avg loss: 4.066936, ppl: 58.377792 +epoch: 0, batch: 9030, sum loss: 6237.517090, avg loss: 3.552117, ppl: 34.887093 +epoch: 0, batch: 9031, sum loss: 7729.165527, avg loss: 4.085183, ppl: 59.452797 +epoch: 0, batch: 9032, sum loss: 7420.779785, avg loss: 3.976838, ppl: 53.348072 +epoch: 0, batch: 9033, sum loss: 5873.124023, avg loss: 3.603143, ppl: 36.713459 +epoch: 0, batch: 9034, sum loss: 7390.839844, avg loss: 4.085595, ppl: 59.477295 +epoch: 0, batch: 9035, sum loss: 5522.754883, avg loss: 3.840581, ppl: 46.552498 +epoch: 0, batch: 9036, sum loss: 5427.408203, avg loss: 3.625523, ppl: 37.544357 +epoch: 0, batch: 9037, sum loss: 8720.359375, avg loss: 4.127004, ppl: 61.991924 +epoch: 0, batch: 9038, sum loss: 6144.278809, avg loss: 3.735124, ppl: 41.893204 +epoch: 0, batch: 9039, sum loss: 5443.397461, avg loss: 3.525517, ppl: 33.971325 +epoch: 0, batch: 9040, sum loss: 5645.031250, avg loss: 3.750851, ppl: 42.557301 +epoch: 0, batch: 9041, sum loss: 6547.111328, avg loss: 3.743345, ppl: 42.239063 +epoch: 0, batch: 9042, sum loss: 6936.840820, avg loss: 3.961645, ppl: 52.543709 +epoch: 0, batch: 9043, sum loss: 6616.976562, avg loss: 3.842611, ppl: 46.647114 +epoch: 0, batch: 9044, sum loss: 6501.340820, avg loss: 3.533337, ppl: 34.238041 +epoch: 0, batch: 9045, sum loss: 7568.558594, avg loss: 3.964672, ppl: 52.702984 +epoch: 0, batch: 9046, sum loss: 7503.783691, avg loss: 3.600664, ppl: 36.622547 +epoch: 0, batch: 9047, sum loss: 6074.965820, avg loss: 3.624681, ppl: 37.512772 +epoch: 0, batch: 9048, sum loss: 5761.538574, avg loss: 3.637335, ppl: 37.990459 +epoch: 0, batch: 9049, sum loss: 5326.766602, avg loss: 3.722409, ppl: 41.363899 +epoch: 0, batch: 9050, sum loss: 7095.260742, avg loss: 3.957201, ppl: 52.310692 +epoch: 0, batch: 9051, sum loss: 6615.749023, avg loss: 4.053768, ppl: 57.614147 +epoch: 0, batch: 9052, sum loss: 6095.262695, avg loss: 3.889766, ppl: 48.899429 +epoch: 0, batch: 9053, sum loss: 6349.666504, avg loss: 3.726330, ppl: 41.526440 +epoch: 0, batch: 9054, sum loss: 6710.726074, avg loss: 3.977905, ppl: 53.405048 +epoch: 0, batch: 9055, sum loss: 6129.301758, avg loss: 3.755700, ppl: 42.764130 +epoch: 0, batch: 9056, sum loss: 6974.680664, avg loss: 3.936050, ppl: 51.215908 +epoch: 0, batch: 9057, sum loss: 6861.942383, avg loss: 3.894406, ppl: 49.126842 +epoch: 0, batch: 9058, sum loss: 5743.318848, avg loss: 3.674548, ppl: 39.430836 +epoch: 0, batch: 9059, sum loss: 6002.149902, avg loss: 3.628870, ppl: 37.670208 +epoch: 0, batch: 9060, sum loss: 7088.552734, avg loss: 3.989056, ppl: 54.003891 +epoch: 0, batch: 9061, sum loss: 5756.979004, avg loss: 3.723790, ppl: 41.421082 +epoch: 0, batch: 9062, sum loss: 4920.609863, avg loss: 3.497235, ppl: 33.024025 +epoch: 0, batch: 9063, sum loss: 6933.156738, avg loss: 3.687849, ppl: 39.958813 +epoch: 0, batch: 9064, sum loss: 6763.086914, avg loss: 3.836124, ppl: 46.345501 +epoch: 0, batch: 9065, sum loss: 8022.396973, avg loss: 4.043547, ppl: 57.028244 +epoch: 0, batch: 9066, sum loss: 7157.408691, avg loss: 3.950005, ppl: 51.935616 +epoch: 0, batch: 9067, sum loss: 6374.911621, avg loss: 3.817312, ppl: 45.481812 +epoch: 0, batch: 9068, sum loss: 7103.729004, avg loss: 4.020220, ppl: 55.713379 +epoch: 0, batch: 9069, sum loss: 7017.623047, avg loss: 3.973739, ppl: 53.183006 +epoch: 0, batch: 9070, sum loss: 5547.220703, avg loss: 3.768492, ppl: 43.314716 +epoch: 0, batch: 9071, sum loss: 6404.812012, avg loss: 3.874659, ppl: 48.166286 +epoch: 0, batch: 9072, sum loss: 6487.372070, avg loss: 3.825102, ppl: 45.837460 +epoch: 0, batch: 9073, sum loss: 6636.301270, avg loss: 3.707431, ppl: 40.748981 +epoch: 0, batch: 9074, sum loss: 7052.591797, avg loss: 3.951032, ppl: 51.988987 +epoch: 0, batch: 9075, sum loss: 6216.004395, avg loss: 3.794874, ppl: 44.472641 +epoch: 0, batch: 9076, sum loss: 7219.391113, avg loss: 3.973248, ppl: 53.156891 +epoch: 0, batch: 9077, sum loss: 7036.097656, avg loss: 3.932978, ppl: 51.058800 +epoch: 0, batch: 9078, sum loss: 6540.601562, avg loss: 3.870178, ppl: 47.950943 +epoch: 0, batch: 9079, sum loss: 6596.960449, avg loss: 3.851115, ppl: 47.045502 +epoch: 0, batch: 9080, sum loss: 6121.845703, avg loss: 3.934348, ppl: 51.128807 +epoch: 0, batch: 9081, sum loss: 6168.710938, avg loss: 3.611657, ppl: 37.027370 +epoch: 0, batch: 9082, sum loss: 5480.210938, avg loss: 3.579498, ppl: 35.855530 +epoch: 0, batch: 9083, sum loss: 6542.692383, avg loss: 3.725907, ppl: 41.508869 +epoch: 0, batch: 9084, sum loss: 6018.702148, avg loss: 3.641078, ppl: 38.132923 +epoch: 0, batch: 9085, sum loss: 7546.662598, avg loss: 3.898069, ppl: 49.307163 +epoch: 0, batch: 9086, sum loss: 7270.955078, avg loss: 4.008245, ppl: 55.050144 +epoch: 0, batch: 9087, sum loss: 8245.703125, avg loss: 3.898678, ppl: 49.337162 +epoch: 0, batch: 9088, sum loss: 5950.422363, avg loss: 3.951144, ppl: 51.994801 +epoch: 0, batch: 9089, sum loss: 6285.694824, avg loss: 3.948301, ppl: 51.847195 +epoch: 0, batch: 9090, sum loss: 7847.975586, avg loss: 4.121836, ppl: 61.672379 +epoch: 0, batch: 9091, sum loss: 6393.510742, avg loss: 4.013504, ppl: 55.340420 +epoch: 0, batch: 9092, sum loss: 6602.039062, avg loss: 3.678016, ppl: 39.567822 +epoch: 0, batch: 9093, sum loss: 6427.803711, avg loss: 3.752366, ppl: 42.621819 +epoch: 0, batch: 9094, sum loss: 6038.892090, avg loss: 3.666601, ppl: 39.118713 +epoch: 0, batch: 9095, sum loss: 6503.690918, avg loss: 3.985105, ppl: 53.790928 +epoch: 0, batch: 9096, sum loss: 6442.049805, avg loss: 3.850598, ppl: 47.021156 +epoch: 0, batch: 9097, sum loss: 6193.017578, avg loss: 3.647243, ppl: 38.368721 +epoch: 0, batch: 9098, sum loss: 5537.542480, avg loss: 3.696624, ppl: 40.310966 +epoch: 0, batch: 9099, sum loss: 6080.926758, avg loss: 3.613147, ppl: 37.082569 +epoch: 0, batch: 9100, sum loss: 5993.208984, avg loss: 3.708669, ppl: 40.799465 +epoch: 0, batch: 9101, sum loss: 6277.776367, avg loss: 3.988422, ppl: 53.969654 +epoch: 0, batch: 9102, sum loss: 5766.854492, avg loss: 3.682538, ppl: 39.747135 +epoch: 0, batch: 9103, sum loss: 6862.587891, avg loss: 4.082444, ppl: 59.290180 +epoch: 0, batch: 9104, sum loss: 6357.656250, avg loss: 3.748618, ppl: 42.462364 +epoch: 0, batch: 9105, sum loss: 6279.333984, avg loss: 3.573895, ppl: 35.655209 +epoch: 0, batch: 9106, sum loss: 5855.891113, avg loss: 3.880644, ppl: 48.455391 +epoch: 0, batch: 9107, sum loss: 6518.764160, avg loss: 3.735681, ppl: 41.916573 +epoch: 0, batch: 9108, sum loss: 7116.302734, avg loss: 3.731674, ppl: 41.748947 +epoch: 0, batch: 9109, sum loss: 8069.743164, avg loss: 3.890908, ppl: 48.955318 +epoch: 0, batch: 9110, sum loss: 6586.251953, avg loss: 3.585331, ppl: 36.065285 +epoch: 0, batch: 9111, sum loss: 6588.893066, avg loss: 3.964436, ppl: 52.690559 +epoch: 0, batch: 9112, sum loss: 5564.131836, avg loss: 3.537274, ppl: 34.373085 +epoch: 0, batch: 9113, sum loss: 8173.689453, avg loss: 4.136483, ppl: 62.582314 +epoch: 0, batch: 9114, sum loss: 6945.249023, avg loss: 3.975529, ppl: 53.278278 +epoch: 0, batch: 9115, sum loss: 6210.670898, avg loss: 3.798575, ppl: 44.637550 +epoch: 0, batch: 9116, sum loss: 7212.434570, avg loss: 3.973793, ppl: 53.185898 +epoch: 0, batch: 9117, sum loss: 6841.011719, avg loss: 3.713904, ppl: 41.013626 +epoch: 0, batch: 9118, sum loss: 7566.180664, avg loss: 4.020287, ppl: 55.717098 +epoch: 0, batch: 9119, sum loss: 5747.478516, avg loss: 3.583216, ppl: 35.989101 +epoch: 0, batch: 9120, sum loss: 5215.709961, avg loss: 3.730837, ppl: 41.714005 +epoch: 0, batch: 9121, sum loss: 5711.592773, avg loss: 3.682523, ppl: 39.746529 +epoch: 0, batch: 9122, sum loss: 6201.327148, avg loss: 4.063779, ppl: 58.193832 +epoch: 0, batch: 9123, sum loss: 6001.584961, avg loss: 3.964059, ppl: 52.670677 +epoch: 0, batch: 9124, sum loss: 5513.920410, avg loss: 3.564267, ppl: 35.313549 +epoch: 0, batch: 9125, sum loss: 5687.630371, avg loss: 3.592944, ppl: 36.340904 +epoch: 0, batch: 9126, sum loss: 7297.126953, avg loss: 3.838573, ppl: 46.459118 +epoch: 0, batch: 9127, sum loss: 6088.325195, avg loss: 3.589814, ppl: 36.227352 +epoch: 0, batch: 9128, sum loss: 6416.359863, avg loss: 3.772110, ppl: 43.471703 +epoch: 0, batch: 9129, sum loss: 6570.335449, avg loss: 3.844550, ppl: 46.737633 +epoch: 0, batch: 9130, sum loss: 6292.412598, avg loss: 3.898645, ppl: 49.335552 +epoch: 0, batch: 9131, sum loss: 6516.723633, avg loss: 3.775622, ppl: 43.624630 +epoch: 0, batch: 9132, sum loss: 6055.442383, avg loss: 3.613033, ppl: 37.078327 +epoch: 0, batch: 9133, sum loss: 5929.317871, avg loss: 3.624278, ppl: 37.497623 +epoch: 0, batch: 9134, sum loss: 6475.725586, avg loss: 3.773733, ppl: 43.542301 +epoch: 0, batch: 9135, sum loss: 6559.750000, avg loss: 3.845105, ppl: 46.763615 +epoch: 0, batch: 9136, sum loss: 6271.226562, avg loss: 3.828588, ppl: 45.997520 +epoch: 0, batch: 9137, sum loss: 6967.554688, avg loss: 3.801176, ppl: 44.753765 +epoch: 0, batch: 9138, sum loss: 7092.013672, avg loss: 3.852262, ppl: 47.099472 +epoch: 0, batch: 9139, sum loss: 6472.720703, avg loss: 3.679773, ppl: 39.637398 +epoch: 0, batch: 9140, sum loss: 6966.781738, avg loss: 4.055170, ppl: 57.694973 +epoch: 0, batch: 9141, sum loss: 5787.285156, avg loss: 3.667481, ppl: 39.153152 +epoch: 0, batch: 9142, sum loss: 5885.146973, avg loss: 3.689747, ppl: 40.034729 +epoch: 0, batch: 9143, sum loss: 7543.308594, avg loss: 4.077464, ppl: 58.995674 +epoch: 0, batch: 9144, sum loss: 5209.475586, avg loss: 3.440869, ppl: 31.214075 +epoch: 0, batch: 9145, sum loss: 7060.516602, avg loss: 3.818559, ppl: 45.538559 +epoch: 0, batch: 9146, sum loss: 5170.773438, avg loss: 3.366389, ppl: 28.973715 +epoch: 0, batch: 9147, sum loss: 6183.218262, avg loss: 3.751953, ppl: 42.604202 +epoch: 0, batch: 9148, sum loss: 5814.744629, avg loss: 3.622894, ppl: 37.445770 +epoch: 0, batch: 9149, sum loss: 6509.670410, avg loss: 3.895674, ppl: 49.189182 +epoch: 0, batch: 9150, sum loss: 5544.736816, avg loss: 3.631131, ppl: 37.755493 +epoch: 0, batch: 9151, sum loss: 6914.446289, avg loss: 4.103529, ppl: 60.553604 +epoch: 0, batch: 9152, sum loss: 6278.090332, avg loss: 3.658561, ppl: 38.805462 +epoch: 0, batch: 9153, sum loss: 6827.755371, avg loss: 3.795306, ppl: 44.491848 +epoch: 0, batch: 9154, sum loss: 6787.569336, avg loss: 3.785594, ppl: 44.061825 +epoch: 0, batch: 9155, sum loss: 6164.064941, avg loss: 3.688848, ppl: 39.998753 +epoch: 0, batch: 9156, sum loss: 6557.804688, avg loss: 4.023193, ppl: 55.879238 +epoch: 0, batch: 9157, sum loss: 6799.115234, avg loss: 3.943803, ppl: 51.614498 +epoch: 0, batch: 9158, sum loss: 6426.896484, avg loss: 3.841540, ppl: 46.597183 +epoch: 0, batch: 9159, sum loss: 4586.923340, avg loss: 3.415431, ppl: 30.430056 +epoch: 0, batch: 9160, sum loss: 6553.383301, avg loss: 3.814542, ppl: 45.355980 +epoch: 0, batch: 9161, sum loss: 5713.872559, avg loss: 3.705495, ppl: 40.670158 +epoch: 0, batch: 9162, sum loss: 6466.896973, avg loss: 3.936030, ppl: 51.214859 +epoch: 0, batch: 9163, sum loss: 5940.256836, avg loss: 3.622108, ppl: 37.416348 +epoch: 0, batch: 9164, sum loss: 6548.694824, avg loss: 3.868101, ppl: 47.851437 +epoch: 0, batch: 9165, sum loss: 6471.751465, avg loss: 3.637859, ppl: 38.010372 +epoch: 0, batch: 9166, sum loss: 6258.786133, avg loss: 3.885032, ppl: 48.668495 +epoch: 0, batch: 9167, sum loss: 5268.413086, avg loss: 3.666258, ppl: 39.105312 +epoch: 0, batch: 9168, sum loss: 6716.657227, avg loss: 3.946332, ppl: 51.745216 +epoch: 0, batch: 9169, sum loss: 6262.580566, avg loss: 3.686039, ppl: 39.886562 +epoch: 0, batch: 9170, sum loss: 6042.036133, avg loss: 3.878072, ppl: 48.330944 +epoch: 0, batch: 9171, sum loss: 7012.633789, avg loss: 3.995803, ppl: 54.369461 +epoch: 0, batch: 9172, sum loss: 6280.847656, avg loss: 3.734154, ppl: 41.852612 +epoch: 0, batch: 9173, sum loss: 8105.873047, avg loss: 4.197759, ppl: 66.537064 +epoch: 0, batch: 9174, sum loss: 7752.993164, avg loss: 3.778262, ppl: 43.739952 +epoch: 0, batch: 9175, sum loss: 6334.620605, avg loss: 3.589020, ppl: 36.198593 +epoch: 0, batch: 9176, sum loss: 6181.187500, avg loss: 3.664012, ppl: 39.017555 +epoch: 0, batch: 9177, sum loss: 5855.549316, avg loss: 3.708391, ppl: 40.788124 +epoch: 0, batch: 9178, sum loss: 6349.339844, avg loss: 3.951052, ppl: 51.990017 +epoch: 0, batch: 9179, sum loss: 6852.963867, avg loss: 3.631671, ppl: 37.775894 +epoch: 0, batch: 9180, sum loss: 4516.905273, avg loss: 3.416721, ppl: 30.469345 +epoch: 0, batch: 9181, sum loss: 7478.831055, avg loss: 3.990838, ppl: 54.100224 +epoch: 0, batch: 9182, sum loss: 6507.395996, avg loss: 3.752823, ppl: 42.641304 +epoch: 0, batch: 9183, sum loss: 6662.339355, avg loss: 3.711610, ppl: 40.919628 +epoch: 0, batch: 9184, sum loss: 6223.563477, avg loss: 3.746877, ppl: 42.388515 +epoch: 0, batch: 9185, sum loss: 6901.189453, avg loss: 3.821257, ppl: 45.661552 +epoch: 0, batch: 9186, sum loss: 5543.082520, avg loss: 3.889882, ppl: 48.905132 +epoch: 0, batch: 9187, sum loss: 6635.402344, avg loss: 3.828853, ppl: 46.009731 +epoch: 0, batch: 9188, sum loss: 6766.162598, avg loss: 4.001279, ppl: 54.668018 +epoch: 0, batch: 9189, sum loss: 7633.925781, avg loss: 3.736625, ppl: 41.956165 +epoch: 0, batch: 9190, sum loss: 6513.717773, avg loss: 3.905107, ppl: 49.655380 +epoch: 0, batch: 9191, sum loss: 6199.676758, avg loss: 3.621306, ppl: 37.386379 +epoch: 0, batch: 9192, sum loss: 6531.637207, avg loss: 3.937093, ppl: 51.269344 +epoch: 0, batch: 9193, sum loss: 5193.112305, avg loss: 3.680448, ppl: 39.664162 +epoch: 0, batch: 9194, sum loss: 7343.574707, avg loss: 4.030502, ppl: 56.289181 +epoch: 0, batch: 9195, sum loss: 6269.654785, avg loss: 3.698911, ppl: 40.403297 +epoch: 0, batch: 9196, sum loss: 7575.448242, avg loss: 3.894832, ppl: 49.147804 +epoch: 0, batch: 9197, sum loss: 6157.599609, avg loss: 3.704934, ppl: 40.647350 +epoch: 0, batch: 9198, sum loss: 6167.730469, avg loss: 3.823764, ppl: 45.776169 +epoch: 0, batch: 9199, sum loss: 6902.537109, avg loss: 3.908571, ppl: 49.827709 +epoch: 0, batch: 9200, sum loss: 5383.201660, avg loss: 3.796334, ppl: 44.537621 +epoch: 0, batch: 9201, sum loss: 6209.706055, avg loss: 3.903021, ppl: 49.551914 +epoch: 0, batch: 9202, sum loss: 7085.523926, avg loss: 3.793107, ppl: 44.394119 +epoch: 0, batch: 9203, sum loss: 6379.362305, avg loss: 3.838365, ppl: 46.449459 +epoch: 0, batch: 9204, sum loss: 7320.869141, avg loss: 3.838946, ppl: 46.476444 +epoch: 0, batch: 9205, sum loss: 5451.047363, avg loss: 3.553486, ppl: 34.934883 +epoch: 0, batch: 9206, sum loss: 7616.363281, avg loss: 3.870104, ppl: 47.947353 +epoch: 0, batch: 9207, sum loss: 5655.197266, avg loss: 3.563452, ppl: 35.284775 +epoch: 0, batch: 9208, sum loss: 6893.222656, avg loss: 3.814733, ppl: 45.364632 +epoch: 0, batch: 9209, sum loss: 6973.060547, avg loss: 4.150631, ppl: 63.474068 +epoch: 0, batch: 9210, sum loss: 6805.729492, avg loss: 3.780961, ppl: 43.858170 +epoch: 0, batch: 9211, sum loss: 8294.938477, avg loss: 4.042368, ppl: 56.961063 +epoch: 0, batch: 9212, sum loss: 6848.445312, avg loss: 3.808924, ppl: 45.101871 +epoch: 0, batch: 9213, sum loss: 6001.737305, avg loss: 3.659596, ppl: 38.845646 +epoch: 0, batch: 9214, sum loss: 6760.107422, avg loss: 4.031072, ppl: 56.321236 +epoch: 0, batch: 9215, sum loss: 5678.771973, avg loss: 3.626291, ppl: 37.573200 +epoch: 0, batch: 9216, sum loss: 5907.025391, avg loss: 3.558449, ppl: 35.108704 +epoch: 0, batch: 9217, sum loss: 6637.283203, avg loss: 3.932040, ppl: 51.010933 +epoch: 0, batch: 9218, sum loss: 5519.952148, avg loss: 3.554380, ppl: 34.966141 +epoch: 0, batch: 9219, sum loss: 6272.406738, avg loss: 3.888659, ppl: 48.845341 +epoch: 0, batch: 9220, sum loss: 5736.216797, avg loss: 3.907505, ppl: 49.774586 +epoch: 0, batch: 9221, sum loss: 6713.752441, avg loss: 4.167444, ppl: 64.550262 +epoch: 0, batch: 9222, sum loss: 7390.867188, avg loss: 4.094663, ppl: 60.019119 +epoch: 0, batch: 9223, sum loss: 7080.207031, avg loss: 3.896647, ppl: 49.237068 +epoch: 0, batch: 9224, sum loss: 6905.744629, avg loss: 3.838657, ppl: 46.463051 +epoch: 0, batch: 9225, sum loss: 6507.172852, avg loss: 3.785441, ppl: 44.055092 +epoch: 0, batch: 9226, sum loss: 6991.871582, avg loss: 3.801997, ppl: 44.790527 +epoch: 0, batch: 9227, sum loss: 6532.750000, avg loss: 3.856405, ppl: 47.295010 +epoch: 0, batch: 9228, sum loss: 6686.455078, avg loss: 3.965869, ppl: 52.766102 +epoch: 0, batch: 9229, sum loss: 7134.480957, avg loss: 3.961400, ppl: 52.530796 +epoch: 0, batch: 9230, sum loss: 5277.442871, avg loss: 3.599893, ppl: 36.594315 +epoch: 0, batch: 9231, sum loss: 6434.894043, avg loss: 3.888154, ppl: 48.820660 +epoch: 0, batch: 9232, sum loss: 6096.041504, avg loss: 3.802895, ppl: 44.830795 +epoch: 0, batch: 9233, sum loss: 6265.895996, avg loss: 3.844108, ppl: 46.716988 +epoch: 0, batch: 9234, sum loss: 7597.500977, avg loss: 4.056327, ppl: 57.761784 +epoch: 0, batch: 9235, sum loss: 6908.236328, avg loss: 3.934076, ppl: 51.114876 +epoch: 0, batch: 9236, sum loss: 6722.793945, avg loss: 4.071953, ppl: 58.671425 +epoch: 0, batch: 9237, sum loss: 6290.319336, avg loss: 3.983736, ppl: 53.717350 +epoch: 0, batch: 9238, sum loss: 6989.043945, avg loss: 3.812899, ppl: 45.281525 +epoch: 0, batch: 9239, sum loss: 7981.514160, avg loss: 4.057709, ppl: 57.841656 +epoch: 0, batch: 9240, sum loss: 6578.787109, avg loss: 3.752874, ppl: 42.643448 +epoch: 0, batch: 9241, sum loss: 8214.360352, avg loss: 3.907878, ppl: 49.793198 +epoch: 0, batch: 9242, sum loss: 6159.994141, avg loss: 3.462616, ppl: 31.900318 +epoch: 0, batch: 9243, sum loss: 6634.279785, avg loss: 3.960764, ppl: 52.497440 +epoch: 0, batch: 9244, sum loss: 7717.055664, avg loss: 4.164628, ppl: 64.368736 +epoch: 0, batch: 9245, sum loss: 5932.696289, avg loss: 3.726568, ppl: 41.536308 +epoch: 0, batch: 9246, sum loss: 6771.485352, avg loss: 3.817072, ppl: 45.470860 +epoch: 0, batch: 9247, sum loss: 6653.366699, avg loss: 3.637707, ppl: 38.004612 +epoch: 0, batch: 9248, sum loss: 7112.996094, avg loss: 3.793598, ppl: 44.415920 +epoch: 0, batch: 9249, sum loss: 6319.707520, avg loss: 3.682813, ppl: 39.758091 +epoch: 0, batch: 9250, sum loss: 5751.098633, avg loss: 3.712782, ppl: 40.967606 +epoch: 0, batch: 9251, sum loss: 4627.103516, avg loss: 3.382386, ppl: 29.440926 +epoch: 0, batch: 9252, sum loss: 7433.618652, avg loss: 3.935214, ppl: 51.173088 +epoch: 0, batch: 9253, sum loss: 5373.399414, avg loss: 3.383753, ppl: 29.481209 +epoch: 0, batch: 9254, sum loss: 6112.128906, avg loss: 3.893076, ppl: 49.061554 +epoch: 0, batch: 9255, sum loss: 4886.603516, avg loss: 3.458318, ppl: 31.763506 +epoch: 0, batch: 9256, sum loss: 6304.204590, avg loss: 3.889084, ppl: 48.866085 +epoch: 0, batch: 9257, sum loss: 5168.386230, avg loss: 3.652570, ppl: 38.573666 +epoch: 0, batch: 9258, sum loss: 5998.987793, avg loss: 3.806464, ppl: 44.991077 +epoch: 0, batch: 9259, sum loss: 6437.492676, avg loss: 3.742728, ppl: 42.213001 +epoch: 0, batch: 9260, sum loss: 6512.901855, avg loss: 3.822126, ppl: 45.701252 +epoch: 0, batch: 9261, sum loss: 7019.859375, avg loss: 3.910785, ppl: 49.938148 +epoch: 0, batch: 9262, sum loss: 6838.008789, avg loss: 3.925378, ppl: 50.672234 +epoch: 0, batch: 9263, sum loss: 7050.395508, avg loss: 4.209192, ppl: 67.302124 +epoch: 0, batch: 9264, sum loss: 6024.213379, avg loss: 3.581578, ppl: 35.930176 +epoch: 0, batch: 9265, sum loss: 7501.958496, avg loss: 4.163129, ppl: 64.272339 +epoch: 0, batch: 9266, sum loss: 5783.602539, avg loss: 3.473635, ppl: 32.253777 +epoch: 0, batch: 9267, sum loss: 6665.424316, avg loss: 3.988884, ppl: 53.994583 +epoch: 0, batch: 9268, sum loss: 6683.027344, avg loss: 3.933506, ppl: 51.085796 +epoch: 0, batch: 9269, sum loss: 7725.643066, avg loss: 4.142436, ppl: 62.955997 +epoch: 0, batch: 9270, sum loss: 6743.527344, avg loss: 3.833728, ppl: 46.234585 +epoch: 0, batch: 9271, sum loss: 7016.140137, avg loss: 3.525698, ppl: 33.977497 +epoch: 0, batch: 9272, sum loss: 7379.428711, avg loss: 4.131819, ppl: 62.291142 +epoch: 0, batch: 9273, sum loss: 5569.478516, avg loss: 3.545181, ppl: 34.645939 +epoch: 0, batch: 9274, sum loss: 4938.938965, avg loss: 3.420318, ppl: 30.579126 +epoch: 0, batch: 9275, sum loss: 6480.889648, avg loss: 3.724649, ppl: 41.456688 +epoch: 0, batch: 9276, sum loss: 5890.918457, avg loss: 3.629648, ppl: 37.699535 +epoch: 0, batch: 9277, sum loss: 6047.433105, avg loss: 3.612565, ppl: 37.060986 +epoch: 0, batch: 9278, sum loss: 7078.802734, avg loss: 3.900167, ppl: 49.410698 +epoch: 0, batch: 9279, sum loss: 6812.125977, avg loss: 3.646748, ppl: 38.349762 +epoch: 0, batch: 9280, sum loss: 6804.554199, avg loss: 4.040709, ppl: 56.866676 +epoch: 0, batch: 9281, sum loss: 5845.313477, avg loss: 3.957558, ppl: 52.329403 +epoch: 0, batch: 9282, sum loss: 6958.612793, avg loss: 4.115088, ppl: 61.257576 +epoch: 0, batch: 9283, sum loss: 7440.036133, avg loss: 3.839028, ppl: 46.480278 +epoch: 0, batch: 9284, sum loss: 5835.559570, avg loss: 3.731176, ppl: 41.728157 +epoch: 0, batch: 9285, sum loss: 8078.067871, avg loss: 4.016941, ppl: 55.530979 +epoch: 0, batch: 9286, sum loss: 6097.845215, avg loss: 3.804021, ppl: 44.881275 +epoch: 0, batch: 9287, sum loss: 7145.960449, avg loss: 3.704489, ppl: 40.629299 +epoch: 0, batch: 9288, sum loss: 7284.475586, avg loss: 3.941816, ppl: 51.512054 +epoch: 0, batch: 9289, sum loss: 6439.681641, avg loss: 3.837713, ppl: 46.419170 +epoch: 0, batch: 9290, sum loss: 7047.082031, avg loss: 3.904201, ppl: 49.610405 +epoch: 0, batch: 9291, sum loss: 7484.619629, avg loss: 3.753571, ppl: 42.673176 +epoch: 0, batch: 9292, sum loss: 5571.736328, avg loss: 3.533124, ppl: 34.230736 +epoch: 0, batch: 9293, sum loss: 7749.503418, avg loss: 3.982273, ppl: 53.638836 +epoch: 0, batch: 9294, sum loss: 6411.820312, avg loss: 3.791733, ppl: 44.333164 +epoch: 0, batch: 9295, sum loss: 6588.902344, avg loss: 3.709968, ppl: 40.852482 +epoch: 0, batch: 9296, sum loss: 6287.165527, avg loss: 4.071999, ppl: 58.674110 +epoch: 0, batch: 9297, sum loss: 8295.985352, avg loss: 4.088706, ppl: 59.662666 +epoch: 0, batch: 9298, sum loss: 6004.213379, avg loss: 3.704018, ppl: 40.610153 +epoch: 0, batch: 9299, sum loss: 6963.014648, avg loss: 3.792491, ppl: 44.366798 +epoch: 0, batch: 9300, sum loss: 5276.519043, avg loss: 3.426311, ppl: 30.762949 +epoch: 0, batch: 9301, sum loss: 6424.423340, avg loss: 3.846960, ppl: 46.850426 +epoch: 0, batch: 9302, sum loss: 7166.591797, avg loss: 3.834452, ppl: 46.268085 +epoch: 0, batch: 9303, sum loss: 7171.900391, avg loss: 3.938441, ppl: 51.338493 +epoch: 0, batch: 9304, sum loss: 7144.570312, avg loss: 4.173230, ppl: 64.924835 +epoch: 0, batch: 9305, sum loss: 5903.510742, avg loss: 3.774623, ppl: 43.581081 +epoch: 0, batch: 9306, sum loss: 6108.079102, avg loss: 3.992209, ppl: 54.174412 +epoch: 0, batch: 9307, sum loss: 6354.108398, avg loss: 3.853310, ppl: 47.148872 +epoch: 0, batch: 9308, sum loss: 6311.134766, avg loss: 3.795030, ppl: 44.479565 +epoch: 0, batch: 9309, sum loss: 6183.590820, avg loss: 3.994568, ppl: 54.302368 +epoch: 0, batch: 9310, sum loss: 7516.750000, avg loss: 4.036923, ppl: 56.651752 +epoch: 0, batch: 9311, sum loss: 5778.142578, avg loss: 3.713459, ppl: 40.995377 +epoch: 0, batch: 9312, sum loss: 7014.916016, avg loss: 3.992553, ppl: 54.193069 +epoch: 0, batch: 9313, sum loss: 6308.194336, avg loss: 3.732659, ppl: 41.790096 +epoch: 0, batch: 9314, sum loss: 8318.818359, avg loss: 4.091893, ppl: 59.853069 +epoch: 0, batch: 9315, sum loss: 5543.668945, avg loss: 3.745722, ppl: 42.339577 +epoch: 0, batch: 9316, sum loss: 7338.007324, avg loss: 4.027446, ppl: 56.117420 +epoch: 0, batch: 9317, sum loss: 5593.044922, avg loss: 3.535426, ppl: 34.309639 +epoch: 0, batch: 9318, sum loss: 6337.237305, avg loss: 3.729981, ppl: 41.678314 +epoch: 0, batch: 9319, sum loss: 7163.898926, avg loss: 3.839174, ppl: 46.487061 +epoch: 0, batch: 9320, sum loss: 6347.139160, avg loss: 3.915570, ppl: 50.177666 +epoch: 0, batch: 9321, sum loss: 5632.024902, avg loss: 3.638259, ppl: 38.025581 +epoch: 0, batch: 9322, sum loss: 6389.828613, avg loss: 4.001145, ppl: 54.660721 +epoch: 0, batch: 9323, sum loss: 5820.688965, avg loss: 3.893437, ppl: 49.079300 +epoch: 0, batch: 9324, sum loss: 6086.254883, avg loss: 3.677495, ppl: 39.547222 +epoch: 0, batch: 9325, sum loss: 5539.795898, avg loss: 3.763448, ppl: 43.096786 +epoch: 0, batch: 9326, sum loss: 5615.956055, avg loss: 3.697140, ppl: 40.331799 +epoch: 0, batch: 9327, sum loss: 7667.618652, avg loss: 4.074186, ppl: 58.802616 +epoch: 0, batch: 9328, sum loss: 6744.972656, avg loss: 3.724447, ppl: 41.448296 +epoch: 0, batch: 9329, sum loss: 6271.828613, avg loss: 3.944546, ppl: 51.652893 +epoch: 0, batch: 9330, sum loss: 5297.057617, avg loss: 3.623159, ppl: 37.455692 +epoch: 0, batch: 9331, sum loss: 6243.724609, avg loss: 3.668463, ppl: 39.191631 +epoch: 0, batch: 9332, sum loss: 7391.671875, avg loss: 3.944329, ppl: 51.641651 +epoch: 0, batch: 9333, sum loss: 7853.069336, avg loss: 4.025151, ppl: 55.988777 +epoch: 0, batch: 9334, sum loss: 6374.050781, avg loss: 3.623679, ppl: 37.475174 +epoch: 0, batch: 9335, sum loss: 7299.975586, avg loss: 3.743577, ppl: 42.248844 +epoch: 0, batch: 9336, sum loss: 7105.827637, avg loss: 3.866065, ppl: 47.754105 +epoch: 0, batch: 9337, sum loss: 6334.110352, avg loss: 3.697671, ppl: 40.353210 +epoch: 0, batch: 9338, sum loss: 6329.236328, avg loss: 3.892519, ppl: 49.034260 +epoch: 0, batch: 9339, sum loss: 6747.866211, avg loss: 3.719882, ppl: 41.259537 +epoch: 0, batch: 9340, sum loss: 7230.065430, avg loss: 4.039143, ppl: 56.777641 +epoch: 0, batch: 9341, sum loss: 6807.887695, avg loss: 3.863728, ppl: 47.642612 +epoch: 0, batch: 9342, sum loss: 6217.673828, avg loss: 3.687826, ppl: 39.957870 +epoch: 0, batch: 9343, sum loss: 7156.240234, avg loss: 3.770411, ppl: 43.397907 +epoch: 0, batch: 9344, sum loss: 7406.661133, avg loss: 3.869729, ppl: 47.929386 +epoch: 0, batch: 9345, sum loss: 5390.548340, avg loss: 3.622680, ppl: 37.437782 +epoch: 0, batch: 9346, sum loss: 5853.144043, avg loss: 3.651369, ppl: 38.527359 +epoch: 0, batch: 9347, sum loss: 6002.642090, avg loss: 3.616050, ppl: 37.190357 +epoch: 0, batch: 9348, sum loss: 7088.329590, avg loss: 3.875522, ppl: 48.207863 +epoch: 0, batch: 9349, sum loss: 6076.730469, avg loss: 3.638761, ppl: 38.044662 +epoch: 0, batch: 9350, sum loss: 6913.385254, avg loss: 3.821661, ppl: 45.680042 +epoch: 0, batch: 9351, sum loss: 6733.602539, avg loss: 3.986739, ppl: 53.878925 +epoch: 0, batch: 9352, sum loss: 6813.808594, avg loss: 3.804471, ppl: 44.901512 +epoch: 0, batch: 9353, sum loss: 6398.036133, avg loss: 3.812894, ppl: 45.281288 +epoch: 0, batch: 9354, sum loss: 7216.456543, avg loss: 3.875648, ppl: 48.213932 +epoch: 0, batch: 9355, sum loss: 6830.801758, avg loss: 3.928006, ppl: 50.805553 +epoch: 0, batch: 9356, sum loss: 7044.270508, avg loss: 3.725156, ppl: 41.477715 +epoch: 0, batch: 9357, sum loss: 7330.296387, avg loss: 4.049888, ppl: 57.391010 +epoch: 0, batch: 9358, sum loss: 6894.355469, avg loss: 3.860222, ppl: 47.475872 +epoch: 0, batch: 9359, sum loss: 6122.308594, avg loss: 3.788557, ppl: 44.192596 +epoch: 0, batch: 9360, sum loss: 6822.570312, avg loss: 3.852383, ppl: 47.105175 +epoch: 0, batch: 9361, sum loss: 6627.885254, avg loss: 3.891888, ppl: 49.003311 +epoch: 0, batch: 9362, sum loss: 6721.834961, avg loss: 4.013036, ppl: 55.314564 +epoch: 0, batch: 9363, sum loss: 6428.431152, avg loss: 3.611478, ppl: 37.020741 +epoch: 0, batch: 9364, sum loss: 7263.421875, avg loss: 4.131639, ppl: 62.279915 +epoch: 0, batch: 9365, sum loss: 6576.208984, avg loss: 3.643329, ppl: 38.218861 +epoch: 0, batch: 9366, sum loss: 6670.078613, avg loss: 3.652836, ppl: 38.583939 +epoch: 0, batch: 9367, sum loss: 6676.201660, avg loss: 3.990557, ppl: 54.085003 +epoch: 0, batch: 9368, sum loss: 5778.179688, avg loss: 3.857263, ppl: 47.335598 +epoch: 0, batch: 9369, sum loss: 4978.250000, avg loss: 3.523178, ppl: 33.891953 +epoch: 0, batch: 9370, sum loss: 5973.456543, avg loss: 3.707919, ppl: 40.768864 +epoch: 0, batch: 9371, sum loss: 5690.659180, avg loss: 3.971151, ppl: 53.045559 +epoch: 0, batch: 9372, sum loss: 5954.291992, avg loss: 3.907016, ppl: 49.750267 +epoch: 0, batch: 9373, sum loss: 6620.338379, avg loss: 3.717203, ppl: 41.149143 +epoch: 0, batch: 9374, sum loss: 5097.639648, avg loss: 3.605120, ppl: 36.786095 +epoch: 0, batch: 9375, sum loss: 8296.310547, avg loss: 3.958163, ppl: 52.361076 +epoch: 0, batch: 9376, sum loss: 7278.195312, avg loss: 3.913008, ppl: 50.049286 +epoch: 0, batch: 9377, sum loss: 7479.188965, avg loss: 3.849300, ppl: 46.960163 +epoch: 0, batch: 9378, sum loss: 6967.818848, avg loss: 3.941074, ppl: 51.473862 +epoch: 0, batch: 9379, sum loss: 8054.077148, avg loss: 4.113420, ppl: 61.155540 +epoch: 0, batch: 9380, sum loss: 6840.147949, avg loss: 4.066675, ppl: 58.362595 +epoch: 0, batch: 9381, sum loss: 6164.308105, avg loss: 3.786430, ppl: 44.098682 +epoch: 0, batch: 9382, sum loss: 5551.559570, avg loss: 3.740943, ppl: 42.137707 +epoch: 0, batch: 9383, sum loss: 6699.211426, avg loss: 3.954670, ppl: 52.178482 +epoch: 0, batch: 9384, sum loss: 6342.923828, avg loss: 4.017051, ppl: 55.537098 +epoch: 0, batch: 9385, sum loss: 7021.835938, avg loss: 3.947069, ppl: 51.783363 +epoch: 0, batch: 9386, sum loss: 7255.599121, avg loss: 4.030889, ppl: 56.310925 +epoch: 0, batch: 9387, sum loss: 6764.936523, avg loss: 3.828487, ppl: 45.992905 +epoch: 0, batch: 9388, sum loss: 5948.559570, avg loss: 3.685601, ppl: 39.869068 +epoch: 0, batch: 9389, sum loss: 6648.960449, avg loss: 3.836676, ppl: 46.371098 +epoch: 0, batch: 9390, sum loss: 6458.791992, avg loss: 3.989371, ppl: 54.020916 +epoch: 0, batch: 9391, sum loss: 5764.019043, avg loss: 3.531875, ppl: 34.188000 +epoch: 0, batch: 9392, sum loss: 6790.781738, avg loss: 3.896031, ppl: 49.206753 +epoch: 0, batch: 9393, sum loss: 5781.343262, avg loss: 3.687081, ppl: 39.928131 +epoch: 0, batch: 9394, sum loss: 5882.529785, avg loss: 3.792733, ppl: 44.377537 +epoch: 0, batch: 9395, sum loss: 7510.092773, avg loss: 4.048567, ppl: 57.315281 +epoch: 0, batch: 9396, sum loss: 6078.566895, avg loss: 3.538165, ppl: 34.403717 +epoch: 0, batch: 9397, sum loss: 6514.337891, avg loss: 3.829710, ppl: 46.049206 +epoch: 0, batch: 9398, sum loss: 6910.909180, avg loss: 3.962677, ppl: 52.597942 +epoch: 0, batch: 9399, sum loss: 6214.774414, avg loss: 3.668698, ppl: 39.200825 +epoch: 0, batch: 9400, sum loss: 6442.278320, avg loss: 3.805244, ppl: 44.936222 +epoch: 0, batch: 9401, sum loss: 6461.064941, avg loss: 3.911057, ppl: 49.951698 +epoch: 0, batch: 9402, sum loss: 8294.161133, avg loss: 3.957138, ppl: 52.307411 +epoch: 0, batch: 9403, sum loss: 6145.697266, avg loss: 3.791300, ppl: 44.313965 +epoch: 0, batch: 9404, sum loss: 5589.726562, avg loss: 3.537802, ppl: 34.391235 +epoch: 0, batch: 9405, sum loss: 6354.151855, avg loss: 3.750975, ppl: 42.562565 +epoch: 0, batch: 9406, sum loss: 5705.166504, avg loss: 3.654815, ppl: 38.660378 +epoch: 0, batch: 9407, sum loss: 6408.251953, avg loss: 3.745326, ppl: 42.322792 +epoch: 0, batch: 9408, sum loss: 6049.687500, avg loss: 3.900508, ppl: 49.427536 +epoch: 0, batch: 9409, sum loss: 5755.773438, avg loss: 3.640590, ppl: 38.114326 +epoch: 0, batch: 9410, sum loss: 6984.565430, avg loss: 3.937184, ppl: 51.274029 +epoch: 0, batch: 9411, sum loss: 7035.925781, avg loss: 4.036675, ppl: 56.637733 +epoch: 0, batch: 9412, sum loss: 5170.290039, avg loss: 3.643615, ppl: 38.229805 +epoch: 0, batch: 9413, sum loss: 5705.247070, avg loss: 3.773312, ppl: 43.523972 +epoch: 0, batch: 9414, sum loss: 6348.119629, avg loss: 3.621289, ppl: 37.385735 +epoch: 0, batch: 9415, sum loss: 6523.623535, avg loss: 3.691921, ppl: 40.121826 +epoch: 0, batch: 9416, sum loss: 6866.648438, avg loss: 3.866356, ppl: 47.768009 +epoch: 0, batch: 9417, sum loss: 5701.324219, avg loss: 3.678274, ppl: 39.578022 +epoch: 0, batch: 9418, sum loss: 6091.719727, avg loss: 3.683023, ppl: 39.766426 +epoch: 0, batch: 9419, sum loss: 7817.519531, avg loss: 3.996687, ppl: 54.417549 +epoch: 0, batch: 9420, sum loss: 5116.705566, avg loss: 3.670521, ppl: 39.272343 +epoch: 0, batch: 9421, sum loss: 6980.172363, avg loss: 4.041790, ppl: 56.928181 +epoch: 0, batch: 9422, sum loss: 7317.717773, avg loss: 3.892403, ppl: 49.028557 +epoch: 0, batch: 9423, sum loss: 6244.517090, avg loss: 3.721405, ppl: 41.322403 +epoch: 0, batch: 9424, sum loss: 5585.390625, avg loss: 3.799586, ppl: 44.682663 +epoch: 0, batch: 9425, sum loss: 6260.056152, avg loss: 3.579220, ppl: 35.845581 +epoch: 0, batch: 9426, sum loss: 5704.203125, avg loss: 3.898977, ppl: 49.351940 +epoch: 0, batch: 9427, sum loss: 8367.865234, avg loss: 4.224061, ppl: 68.310333 +epoch: 0, batch: 9428, sum loss: 5894.161133, avg loss: 3.761431, ppl: 43.009907 +epoch: 0, batch: 9429, sum loss: 7368.441406, avg loss: 4.013312, ppl: 55.329838 +epoch: 0, batch: 9430, sum loss: 5775.884766, avg loss: 3.733604, ppl: 41.829578 +epoch: 0, batch: 9431, sum loss: 7197.074219, avg loss: 3.658909, ppl: 38.818962 +epoch: 0, batch: 9432, sum loss: 7273.674316, avg loss: 3.914787, ppl: 50.138393 +epoch: 0, batch: 9433, sum loss: 5475.685547, avg loss: 3.623882, ppl: 37.482788 +epoch: 0, batch: 9434, sum loss: 6810.349121, avg loss: 3.943456, ppl: 51.596634 +epoch: 0, batch: 9435, sum loss: 6394.591797, avg loss: 3.774848, ppl: 43.590862 +epoch: 0, batch: 9436, sum loss: 7017.308105, avg loss: 3.774776, ppl: 43.587723 +epoch: 0, batch: 9437, sum loss: 5943.396484, avg loss: 3.520970, ppl: 33.817204 +epoch: 0, batch: 9438, sum loss: 6289.657227, avg loss: 3.835157, ppl: 46.300694 +epoch: 0, batch: 9439, sum loss: 6012.638184, avg loss: 3.718391, ppl: 41.198059 +epoch: 0, batch: 9440, sum loss: 6778.074219, avg loss: 3.795115, ppl: 44.483330 +epoch: 0, batch: 9441, sum loss: 5927.804688, avg loss: 3.654627, ppl: 38.653084 +epoch: 0, batch: 9442, sum loss: 6535.662598, avg loss: 3.808661, ppl: 45.090034 +epoch: 0, batch: 9443, sum loss: 6826.437988, avg loss: 3.668156, ppl: 39.179588 +epoch: 0, batch: 9444, sum loss: 5190.509766, avg loss: 3.660444, ppl: 38.878613 +epoch: 0, batch: 9445, sum loss: 6453.497559, avg loss: 3.741158, ppl: 42.146778 +epoch: 0, batch: 9446, sum loss: 6441.001953, avg loss: 3.760071, ppl: 42.951477 +epoch: 0, batch: 9447, sum loss: 5849.220703, avg loss: 3.610630, ppl: 36.989357 +epoch: 0, batch: 9448, sum loss: 5044.559082, avg loss: 3.464670, ppl: 31.965900 +epoch: 0, batch: 9449, sum loss: 5724.701660, avg loss: 3.761302, ppl: 43.004391 +epoch: 0, batch: 9450, sum loss: 6611.593750, avg loss: 3.914502, ppl: 50.124100 +epoch: 0, batch: 9451, sum loss: 6417.096191, avg loss: 3.700747, ppl: 40.477551 +epoch: 0, batch: 9452, sum loss: 5955.681641, avg loss: 3.817745, ppl: 45.501476 +epoch: 0, batch: 9453, sum loss: 6144.214355, avg loss: 3.694657, ppl: 40.231766 +epoch: 0, batch: 9454, sum loss: 6424.974609, avg loss: 3.671414, ppl: 39.307453 +epoch: 0, batch: 9455, sum loss: 6351.725586, avg loss: 3.877732, ppl: 48.314514 +epoch: 0, batch: 9456, sum loss: 6772.490234, avg loss: 3.905704, ppl: 49.685024 +epoch: 0, batch: 9457, sum loss: 5902.872070, avg loss: 3.588372, ppl: 36.175133 +epoch: 0, batch: 9458, sum loss: 6984.761719, avg loss: 3.741168, ppl: 42.147209 +epoch: 0, batch: 9459, sum loss: 6282.117188, avg loss: 3.757247, ppl: 42.830372 +epoch: 0, batch: 9460, sum loss: 6255.455078, avg loss: 3.926839, ppl: 50.746334 +epoch: 0, batch: 9461, sum loss: 6050.670410, avg loss: 3.712068, ppl: 40.938362 +epoch: 0, batch: 9462, sum loss: 5764.612305, avg loss: 3.620988, ppl: 37.374462 +epoch: 0, batch: 9463, sum loss: 6877.927734, avg loss: 3.777006, ppl: 43.685047 +epoch: 0, batch: 9464, sum loss: 6216.828613, avg loss: 3.818691, ppl: 45.544540 +epoch: 0, batch: 9465, sum loss: 7523.403320, avg loss: 4.099947, ppl: 60.337116 +epoch: 0, batch: 9466, sum loss: 5808.043457, avg loss: 3.669010, ppl: 39.213074 +epoch: 0, batch: 9467, sum loss: 7187.298828, avg loss: 3.743385, ppl: 42.240738 +epoch: 0, batch: 9468, sum loss: 5940.181152, avg loss: 3.606667, ppl: 36.843060 +epoch: 0, batch: 9469, sum loss: 6308.711426, avg loss: 3.730758, ppl: 41.710712 +epoch: 0, batch: 9470, sum loss: 7364.933594, avg loss: 3.915435, ppl: 50.170895 +epoch: 0, batch: 9471, sum loss: 7370.017578, avg loss: 4.016358, ppl: 55.498631 +epoch: 0, batch: 9472, sum loss: 6545.971191, avg loss: 3.667211, ppl: 39.142578 +epoch: 0, batch: 9473, sum loss: 5651.816406, avg loss: 3.636948, ppl: 37.975742 +epoch: 0, batch: 9474, sum loss: 5479.995605, avg loss: 3.650897, ppl: 38.509174 +epoch: 0, batch: 9475, sum loss: 6548.448730, avg loss: 3.752693, ppl: 42.635735 +epoch: 0, batch: 9476, sum loss: 6707.610352, avg loss: 3.824179, ppl: 45.795197 +epoch: 0, batch: 9477, sum loss: 6278.774902, avg loss: 3.557379, ppl: 35.071171 +epoch: 0, batch: 9478, sum loss: 5732.689453, avg loss: 3.356375, ppl: 28.685032 +epoch: 0, batch: 9479, sum loss: 6121.503906, avg loss: 3.769399, ppl: 43.353996 +epoch: 0, batch: 9480, sum loss: 7059.405273, avg loss: 3.952635, ppl: 52.072376 +epoch: 0, batch: 9481, sum loss: 6809.762695, avg loss: 3.927199, ppl: 50.764568 +epoch: 0, batch: 9482, sum loss: 7066.392090, avg loss: 3.844609, ppl: 46.740417 +epoch: 0, batch: 9483, sum loss: 7322.229492, avg loss: 3.837646, ppl: 46.416092 +epoch: 0, batch: 9484, sum loss: 5797.414062, avg loss: 3.740267, ppl: 42.109245 +epoch: 0, batch: 9485, sum loss: 7335.732422, avg loss: 3.912391, ppl: 50.018387 +epoch: 0, batch: 9486, sum loss: 6910.687012, avg loss: 4.031906, ppl: 56.368256 +epoch: 0, batch: 9487, sum loss: 5957.731445, avg loss: 3.909273, ppl: 49.862671 +epoch: 0, batch: 9488, sum loss: 7179.707520, avg loss: 4.024500, ppl: 55.952320 +epoch: 0, batch: 9489, sum loss: 7080.262695, avg loss: 3.798424, ppl: 44.630802 +epoch: 0, batch: 9490, sum loss: 7717.282227, avg loss: 4.133520, ppl: 62.397182 +epoch: 0, batch: 9491, sum loss: 7205.711914, avg loss: 3.802486, ppl: 44.812469 +epoch: 0, batch: 9492, sum loss: 6688.909180, avg loss: 3.770524, ppl: 43.402782 +epoch: 0, batch: 9493, sum loss: 5516.186035, avg loss: 3.648271, ppl: 38.408215 +epoch: 0, batch: 9494, sum loss: 5112.944824, avg loss: 3.504417, ppl: 33.262062 +epoch: 0, batch: 9495, sum loss: 5938.742188, avg loss: 3.821584, ppl: 45.676525 +epoch: 0, batch: 9496, sum loss: 6366.125977, avg loss: 3.862941, ppl: 47.605129 +epoch: 0, batch: 9497, sum loss: 6547.848633, avg loss: 3.451686, ppl: 31.553551 +epoch: 0, batch: 9498, sum loss: 6114.176758, avg loss: 3.575542, ppl: 35.713970 +epoch: 0, batch: 9499, sum loss: 7547.846191, avg loss: 3.777701, ppl: 43.715408 +epoch: 0, batch: 9500, sum loss: 6127.118164, avg loss: 3.798585, ppl: 44.637997 +epoch: 0, batch: 9501, sum loss: 5721.139160, avg loss: 3.712615, ppl: 40.960770 +epoch: 0, batch: 9502, sum loss: 6134.292480, avg loss: 3.744989, ppl: 42.308548 +epoch: 0, batch: 9503, sum loss: 7123.481934, avg loss: 3.918307, ppl: 50.315182 +epoch: 0, batch: 9504, sum loss: 6426.104492, avg loss: 3.620340, ppl: 37.350277 +epoch: 0, batch: 9505, sum loss: 7093.345703, avg loss: 3.518525, ppl: 33.734631 +epoch: 0, batch: 9506, sum loss: 7746.737793, avg loss: 4.018018, ppl: 55.590801 +epoch: 0, batch: 9507, sum loss: 5942.822266, avg loss: 3.654872, ppl: 38.662579 +epoch: 0, batch: 9508, sum loss: 5946.371582, avg loss: 3.765910, ppl: 43.202988 +epoch: 0, batch: 9509, sum loss: 6127.470703, avg loss: 3.693472, ppl: 40.184139 +epoch: 0, batch: 9510, sum loss: 5294.461426, avg loss: 3.945202, ppl: 51.686794 +epoch: 0, batch: 9511, sum loss: 6545.902832, avg loss: 3.642684, ppl: 38.194210 +epoch: 0, batch: 9512, sum loss: 5245.706055, avg loss: 3.670893, ppl: 39.286972 +epoch: 0, batch: 9513, sum loss: 6876.103516, avg loss: 3.765665, ppl: 43.192410 +epoch: 0, batch: 9514, sum loss: 5674.027344, avg loss: 3.859883, ppl: 47.459778 +epoch: 0, batch: 9515, sum loss: 6211.369141, avg loss: 3.719383, ppl: 41.238934 +epoch: 0, batch: 9516, sum loss: 6686.213379, avg loss: 3.975157, ppl: 53.258453 +epoch: 0, batch: 9517, sum loss: 6453.086426, avg loss: 3.864124, ppl: 47.661484 +epoch: 0, batch: 9518, sum loss: 5731.006836, avg loss: 3.448259, ppl: 31.445608 +epoch: 0, batch: 9519, sum loss: 5727.030273, avg loss: 3.718851, ppl: 41.217003 +epoch: 0, batch: 9520, sum loss: 6811.846191, avg loss: 3.896937, ppl: 49.251381 +epoch: 0, batch: 9521, sum loss: 5602.530762, avg loss: 3.577606, ppl: 35.787754 +epoch: 0, batch: 9522, sum loss: 6813.938477, avg loss: 3.721430, ppl: 41.323456 +epoch: 0, batch: 9523, sum loss: 6022.974121, avg loss: 3.604413, ppl: 36.760109 +epoch: 0, batch: 9524, sum loss: 6143.417480, avg loss: 3.698626, ppl: 40.391762 +epoch: 0, batch: 9525, sum loss: 6114.610352, avg loss: 3.635321, ppl: 37.914028 +epoch: 0, batch: 9526, sum loss: 5387.494141, avg loss: 3.718078, ppl: 41.185146 +epoch: 0, batch: 9527, sum loss: 6219.081543, avg loss: 3.792123, ppl: 44.350449 +epoch: 0, batch: 9528, sum loss: 5729.523926, avg loss: 3.384243, ppl: 29.495670 +epoch: 0, batch: 9529, sum loss: 6238.755371, avg loss: 3.758286, ppl: 42.874897 +epoch: 0, batch: 9530, sum loss: 5604.908203, avg loss: 3.668134, ppl: 39.178719 +epoch: 0, batch: 9531, sum loss: 7391.716309, avg loss: 4.162003, ppl: 64.199959 +epoch: 0, batch: 9532, sum loss: 6051.768555, avg loss: 3.617316, ppl: 37.237473 +epoch: 0, batch: 9533, sum loss: 6270.287598, avg loss: 3.804786, ppl: 44.915646 +epoch: 0, batch: 9534, sum loss: 7209.135742, avg loss: 3.969788, ppl: 52.973316 +epoch: 0, batch: 9535, sum loss: 6559.763672, avg loss: 3.716580, ppl: 41.123508 +epoch: 0, batch: 9536, sum loss: 7300.171875, avg loss: 3.885137, ppl: 48.673603 +epoch: 0, batch: 9537, sum loss: 6493.153809, avg loss: 4.008120, ppl: 55.043270 +epoch: 0, batch: 9538, sum loss: 5825.934570, avg loss: 3.652624, ppl: 38.575745 +epoch: 0, batch: 9539, sum loss: 4996.238281, avg loss: 3.513529, ppl: 33.566517 +epoch: 0, batch: 9540, sum loss: 6688.116211, avg loss: 3.658707, ppl: 38.811123 +epoch: 0, batch: 9541, sum loss: 5088.930176, avg loss: 3.593877, ppl: 36.374840 +epoch: 0, batch: 9542, sum loss: 7314.935547, avg loss: 3.762827, ppl: 43.070019 +epoch: 0, batch: 9543, sum loss: 7724.028320, avg loss: 3.906944, ppl: 49.746696 +epoch: 0, batch: 9544, sum loss: 6430.230957, avg loss: 3.932863, ppl: 51.052921 +epoch: 0, batch: 9545, sum loss: 7557.359375, avg loss: 3.962957, ppl: 52.612679 +epoch: 0, batch: 9546, sum loss: 5884.741699, avg loss: 3.853793, ppl: 47.171642 +epoch: 0, batch: 9547, sum loss: 5425.171387, avg loss: 3.655776, ppl: 38.697529 +epoch: 0, batch: 9548, sum loss: 5982.896973, avg loss: 3.755742, ppl: 42.765942 +epoch: 0, batch: 9549, sum loss: 7142.517578, avg loss: 3.787125, ppl: 44.129353 +epoch: 0, batch: 9550, sum loss: 6273.738281, avg loss: 3.664567, ppl: 39.039219 +epoch: 0, batch: 9551, sum loss: 5584.015625, avg loss: 3.685819, ppl: 39.877766 +epoch: 0, batch: 9552, sum loss: 6188.562500, avg loss: 3.447667, ppl: 31.426991 +epoch: 0, batch: 9553, sum loss: 7456.749512, avg loss: 3.684165, ppl: 39.811867 +epoch: 0, batch: 9554, sum loss: 7539.583008, avg loss: 3.840847, ppl: 46.564911 +epoch: 0, batch: 9555, sum loss: 7001.832520, avg loss: 3.911638, ppl: 49.980766 +epoch: 0, batch: 9556, sum loss: 6959.543945, avg loss: 3.859980, ppl: 47.464409 +epoch: 0, batch: 9557, sum loss: 6685.875000, avg loss: 3.946797, ppl: 51.769302 +epoch: 0, batch: 9558, sum loss: 6479.444336, avg loss: 3.683596, ppl: 39.789204 +epoch: 0, batch: 9559, sum loss: 6366.503906, avg loss: 3.714413, ppl: 41.034500 +epoch: 0, batch: 9560, sum loss: 6620.933594, avg loss: 3.860603, ppl: 47.493965 +epoch: 0, batch: 9561, sum loss: 5922.979004, avg loss: 3.500579, ppl: 33.134621 +epoch: 0, batch: 9562, sum loss: 5916.833984, avg loss: 3.787986, ppl: 44.167358 +epoch: 0, batch: 9563, sum loss: 6224.084473, avg loss: 3.738189, ppl: 42.021820 +epoch: 0, batch: 9564, sum loss: 5760.666992, avg loss: 3.521190, ppl: 33.824661 +epoch: 0, batch: 9565, sum loss: 5827.449707, avg loss: 3.577317, ppl: 35.777431 +epoch: 0, batch: 9566, sum loss: 6659.387695, avg loss: 3.856044, ppl: 47.277939 +epoch: 0, batch: 9567, sum loss: 6761.774414, avg loss: 3.892789, ppl: 49.047497 +epoch: 0, batch: 9568, sum loss: 7461.495605, avg loss: 3.954158, ppl: 52.151741 +epoch: 0, batch: 9569, sum loss: 6406.267578, avg loss: 3.737612, ppl: 41.997601 +epoch: 0, batch: 9570, sum loss: 5177.193359, avg loss: 3.517115, ppl: 33.687103 +epoch: 0, batch: 9571, sum loss: 6393.502930, avg loss: 3.919990, ppl: 50.399921 +epoch: 0, batch: 9572, sum loss: 7278.042480, avg loss: 4.043357, ppl: 57.017422 +epoch: 0, batch: 9573, sum loss: 7282.819824, avg loss: 3.783283, ppl: 43.960117 +epoch: 0, batch: 9574, sum loss: 6003.818359, avg loss: 3.548356, ppl: 34.756134 +epoch: 0, batch: 9575, sum loss: 5695.125977, avg loss: 3.555010, ppl: 34.988163 +epoch: 0, batch: 9576, sum loss: 5610.505371, avg loss: 3.629046, ppl: 37.676865 +epoch: 0, batch: 9577, sum loss: 7525.379883, avg loss: 4.002861, ppl: 54.754608 +epoch: 0, batch: 9578, sum loss: 6695.480469, avg loss: 3.674797, ppl: 39.440662 +epoch: 0, batch: 9579, sum loss: 6158.957031, avg loss: 3.723674, ppl: 41.416290 +epoch: 0, batch: 9580, sum loss: 5683.024414, avg loss: 3.850288, ppl: 47.006584 +epoch: 0, batch: 9581, sum loss: 7312.834961, avg loss: 3.991722, ppl: 54.148056 +epoch: 0, batch: 9582, sum loss: 7048.423340, avg loss: 4.048491, ppl: 57.310936 +epoch: 0, batch: 9583, sum loss: 6704.542480, avg loss: 3.700079, ppl: 40.450489 +epoch: 0, batch: 9584, sum loss: 5749.473633, avg loss: 3.726166, ppl: 41.519608 +epoch: 0, batch: 9585, sum loss: 7012.739258, avg loss: 3.993587, ppl: 54.249161 +epoch: 0, batch: 9586, sum loss: 6441.518066, avg loss: 3.920583, ppl: 50.429836 +epoch: 0, batch: 9587, sum loss: 5981.534180, avg loss: 3.851600, ppl: 47.068298 +epoch: 0, batch: 9588, sum loss: 5864.658203, avg loss: 3.697767, ppl: 40.357079 +epoch: 0, batch: 9589, sum loss: 5333.461914, avg loss: 3.801470, ppl: 44.766933 +epoch: 0, batch: 9590, sum loss: 5281.053223, avg loss: 3.383122, ppl: 29.462608 +epoch: 0, batch: 9591, sum loss: 5903.408691, avg loss: 3.781812, ppl: 43.895515 +epoch: 0, batch: 9592, sum loss: 6358.453125, avg loss: 3.839646, ppl: 46.508991 +epoch: 0, batch: 9593, sum loss: 5892.912598, avg loss: 3.902591, ppl: 49.530628 +epoch: 0, batch: 9594, sum loss: 7532.089355, avg loss: 4.040821, ppl: 56.872993 +epoch: 0, batch: 9595, sum loss: 7843.772949, avg loss: 3.684252, ppl: 39.815338 +epoch: 0, batch: 9596, sum loss: 6932.445801, avg loss: 3.729126, ppl: 41.642704 +epoch: 0, batch: 9597, sum loss: 5739.088867, avg loss: 3.755948, ppl: 42.774765 +epoch: 0, batch: 9598, sum loss: 5990.632812, avg loss: 3.675235, ppl: 39.457920 +epoch: 0, batch: 9599, sum loss: 7291.145508, avg loss: 3.739049, ppl: 42.057972 +epoch: 0, batch: 9600, sum loss: 7503.635742, avg loss: 4.127412, ppl: 62.017235 +epoch: 0, batch: 9601, sum loss: 7408.192383, avg loss: 3.864472, ppl: 47.678089 +epoch: 0, batch: 9602, sum loss: 6441.312988, avg loss: 3.606558, ppl: 36.839046 +epoch: 0, batch: 9603, sum loss: 6714.148926, avg loss: 3.769876, ppl: 43.374687 +epoch: 0, batch: 9604, sum loss: 5854.956055, avg loss: 3.533468, ppl: 34.242508 +epoch: 0, batch: 9605, sum loss: 6642.850586, avg loss: 3.848697, ppl: 46.931858 +epoch: 0, batch: 9606, sum loss: 5641.987305, avg loss: 3.566364, ppl: 35.387672 +epoch: 0, batch: 9607, sum loss: 6260.941895, avg loss: 3.625328, ppl: 37.537045 +epoch: 0, batch: 9608, sum loss: 7286.149414, avg loss: 3.692930, ppl: 40.162331 +epoch: 0, batch: 9609, sum loss: 7124.418945, avg loss: 3.783547, ppl: 43.971729 +epoch: 0, batch: 9610, sum loss: 6249.509277, avg loss: 3.616614, ppl: 37.211361 +epoch: 0, batch: 9611, sum loss: 5464.589355, avg loss: 3.611758, ppl: 37.031086 +epoch: 0, batch: 9612, sum loss: 6513.833984, avg loss: 3.669765, ppl: 39.242702 +epoch: 0, batch: 9613, sum loss: 6491.157227, avg loss: 3.675627, ppl: 39.473400 +epoch: 0, batch: 9614, sum loss: 5553.196777, avg loss: 3.737010, ppl: 41.972294 +epoch: 0, batch: 9615, sum loss: 6171.426270, avg loss: 3.760772, ppl: 42.981575 +epoch: 0, batch: 9616, sum loss: 5845.430664, avg loss: 3.637480, ppl: 37.995975 +epoch: 0, batch: 9617, sum loss: 5982.693848, avg loss: 3.743864, ppl: 42.260952 +epoch: 0, batch: 9618, sum loss: 5988.688965, avg loss: 3.692163, ppl: 40.131565 +epoch: 0, batch: 9619, sum loss: 6309.402832, avg loss: 3.837836, ppl: 46.424915 +epoch: 0, batch: 9620, sum loss: 5238.662109, avg loss: 3.544426, ppl: 34.619823 +epoch: 0, batch: 9621, sum loss: 7244.196289, avg loss: 3.903123, ppl: 49.556969 +epoch: 0, batch: 9622, sum loss: 7352.761719, avg loss: 3.652639, ppl: 38.576332 +epoch: 0, batch: 9623, sum loss: 5922.864258, avg loss: 3.811367, ppl: 45.212204 +epoch: 0, batch: 9624, sum loss: 5821.572754, avg loss: 3.802464, ppl: 44.811485 +epoch: 0, batch: 9625, sum loss: 5899.823730, avg loss: 3.774679, ppl: 43.583523 +epoch: 0, batch: 9626, sum loss: 5343.524902, avg loss: 3.603186, ppl: 36.715027 +epoch: 0, batch: 9627, sum loss: 5306.549805, avg loss: 3.682547, ppl: 39.747498 +epoch: 0, batch: 9628, sum loss: 5271.446289, avg loss: 3.403129, ppl: 30.057993 +epoch: 0, batch: 9629, sum loss: 6640.069336, avg loss: 3.602859, ppl: 36.703018 +epoch: 0, batch: 9630, sum loss: 6064.835938, avg loss: 3.711650, ppl: 40.921268 +epoch: 0, batch: 9631, sum loss: 7255.822266, avg loss: 4.022074, ppl: 55.816761 +epoch: 0, batch: 9632, sum loss: 6868.197266, avg loss: 3.708530, ppl: 40.793785 +epoch: 0, batch: 9633, sum loss: 6772.581055, avg loss: 3.865629, ppl: 47.733273 +epoch: 0, batch: 9634, sum loss: 6150.099609, avg loss: 3.877743, ppl: 48.315022 +epoch: 0, batch: 9635, sum loss: 6307.566406, avg loss: 3.903197, ppl: 49.560642 +epoch: 0, batch: 9636, sum loss: 5487.968262, avg loss: 3.547491, ppl: 34.726074 +epoch: 0, batch: 9637, sum loss: 6472.225586, avg loss: 3.965825, ppl: 52.763760 +epoch: 0, batch: 9638, sum loss: 6340.273926, avg loss: 3.835616, ppl: 46.321968 +epoch: 0, batch: 9639, sum loss: 6856.053711, avg loss: 3.583928, ppl: 36.014717 +epoch: 0, batch: 9640, sum loss: 6497.298828, avg loss: 3.691647, ppl: 40.110859 +epoch: 0, batch: 9641, sum loss: 6502.766602, avg loss: 3.957861, ppl: 52.345261 +epoch: 0, batch: 9642, sum loss: 6734.627930, avg loss: 3.739383, ppl: 42.072002 +epoch: 0, batch: 9643, sum loss: 6575.332031, avg loss: 3.575493, ppl: 35.712234 +epoch: 0, batch: 9644, sum loss: 7381.621094, avg loss: 3.994384, ppl: 54.292377 +epoch: 0, batch: 9645, sum loss: 7438.823242, avg loss: 3.826555, ppl: 45.904148 +epoch: 0, batch: 9646, sum loss: 4951.430664, avg loss: 3.246840, ppl: 25.708965 +epoch: 0, batch: 9647, sum loss: 5594.437988, avg loss: 3.632752, ppl: 37.816742 +epoch: 0, batch: 9648, sum loss: 5603.716309, avg loss: 3.566974, ppl: 35.409286 +epoch: 0, batch: 9649, sum loss: 6355.411133, avg loss: 3.695006, ppl: 40.245831 +epoch: 0, batch: 9650, sum loss: 5991.967285, avg loss: 3.447622, ppl: 31.425575 +epoch: 0, batch: 9651, sum loss: 6437.604980, avg loss: 3.704031, ppl: 40.610657 +epoch: 0, batch: 9652, sum loss: 5671.964355, avg loss: 3.509879, ppl: 33.444218 +epoch: 0, batch: 9653, sum loss: 6695.658203, avg loss: 3.697216, ppl: 40.334866 +epoch: 0, batch: 9654, sum loss: 6682.402832, avg loss: 3.735273, ppl: 41.899448 +epoch: 0, batch: 9655, sum loss: 5978.915039, avg loss: 3.656829, ppl: 38.738297 +epoch: 0, batch: 9656, sum loss: 6574.526367, avg loss: 3.626325, ppl: 37.574463 +epoch: 0, batch: 9657, sum loss: 5972.384766, avg loss: 3.900970, ppl: 49.450394 +epoch: 0, batch: 9658, sum loss: 5886.393555, avg loss: 3.704464, ppl: 40.628273 +epoch: 0, batch: 9659, sum loss: 6621.861328, avg loss: 3.537319, ppl: 34.374645 +epoch: 0, batch: 9660, sum loss: 5765.609375, avg loss: 3.530685, ppl: 34.147366 +epoch: 0, batch: 9661, sum loss: 6765.362793, avg loss: 3.910614, ppl: 49.929611 +epoch: 0, batch: 9662, sum loss: 7256.024414, avg loss: 3.984637, ppl: 53.765781 +epoch: 0, batch: 9663, sum loss: 6927.956055, avg loss: 3.696882, ppl: 40.321365 +epoch: 0, batch: 9664, sum loss: 5896.053223, avg loss: 3.678137, ppl: 39.572586 +epoch: 0, batch: 9665, sum loss: 6458.623047, avg loss: 3.952646, ppl: 52.072960 +epoch: 0, batch: 9666, sum loss: 6835.356445, avg loss: 3.990284, ppl: 54.070240 +epoch: 0, batch: 9667, sum loss: 5632.988281, avg loss: 3.725522, ppl: 41.492867 +epoch: 0, batch: 9668, sum loss: 5628.956543, avg loss: 3.605994, ppl: 36.818264 +epoch: 0, batch: 9669, sum loss: 5591.842773, avg loss: 3.703207, ppl: 40.577236 +epoch: 0, batch: 9670, sum loss: 7631.115234, avg loss: 3.688311, ppl: 39.977264 +epoch: 0, batch: 9671, sum loss: 5100.912598, avg loss: 3.772864, ppl: 43.504498 +epoch: 0, batch: 9672, sum loss: 6206.405762, avg loss: 3.625237, ppl: 37.533619 +epoch: 0, batch: 9673, sum loss: 6974.491699, avg loss: 3.661150, ppl: 38.906078 +epoch: 0, batch: 9674, sum loss: 6485.662109, avg loss: 4.001025, ppl: 54.654152 +epoch: 0, batch: 9675, sum loss: 4114.207031, avg loss: 3.568263, ppl: 35.454948 +epoch: 0, batch: 9676, sum loss: 5617.923340, avg loss: 3.502446, ppl: 33.196556 +epoch: 0, batch: 9677, sum loss: 6585.283691, avg loss: 3.728926, ppl: 41.634377 +epoch: 0, batch: 9678, sum loss: 6595.291016, avg loss: 3.594164, ppl: 36.385273 +epoch: 0, batch: 9679, sum loss: 5697.943848, avg loss: 3.746183, ppl: 42.359085 +epoch: 0, batch: 9680, sum loss: 5868.541992, avg loss: 3.726058, ppl: 41.515144 +epoch: 0, batch: 9681, sum loss: 6900.124512, avg loss: 3.902786, ppl: 49.540253 +epoch: 0, batch: 9682, sum loss: 6532.222656, avg loss: 3.767141, ppl: 43.256214 +epoch: 0, batch: 9683, sum loss: 5727.978516, avg loss: 3.385330, ppl: 29.527740 +epoch: 0, batch: 9684, sum loss: 5412.151855, avg loss: 3.507552, ppl: 33.366474 +epoch: 0, batch: 9685, sum loss: 6837.776855, avg loss: 3.885100, ppl: 48.671825 +epoch: 0, batch: 9686, sum loss: 7282.972168, avg loss: 3.695064, ppl: 40.248161 +epoch: 0, batch: 9687, sum loss: 5957.021973, avg loss: 3.952901, ppl: 52.086258 +epoch: 0, batch: 9688, sum loss: 6519.282715, avg loss: 3.994659, ppl: 54.307304 +epoch: 0, batch: 9689, sum loss: 6020.219238, avg loss: 3.807855, ppl: 45.053699 +epoch: 0, batch: 9690, sum loss: 7365.448242, avg loss: 3.945071, ppl: 51.680019 +epoch: 0, batch: 9691, sum loss: 6253.626465, avg loss: 3.563320, ppl: 35.280128 +epoch: 0, batch: 9692, sum loss: 6667.799316, avg loss: 3.838687, ppl: 46.464436 +epoch: 0, batch: 9693, sum loss: 5733.049316, avg loss: 3.545485, ppl: 34.656498 +epoch: 0, batch: 9694, sum loss: 6409.952148, avg loss: 3.838295, ppl: 46.446205 +epoch: 0, batch: 9695, sum loss: 5839.713867, avg loss: 3.767557, ppl: 43.274235 +epoch: 0, batch: 9696, sum loss: 5759.864746, avg loss: 3.675728, ppl: 39.477371 +epoch: 0, batch: 9697, sum loss: 5737.540527, avg loss: 3.711216, ppl: 40.903534 +epoch: 0, batch: 9698, sum loss: 6355.895996, avg loss: 3.703902, ppl: 40.605450 +epoch: 0, batch: 9699, sum loss: 6471.190430, avg loss: 3.749241, ppl: 42.488827 +epoch: 0, batch: 9700, sum loss: 6303.430176, avg loss: 3.947045, ppl: 51.782104 +epoch: 0, batch: 9701, sum loss: 7273.215820, avg loss: 3.798024, ppl: 44.612938 +epoch: 0, batch: 9702, sum loss: 7584.513184, avg loss: 4.082085, ppl: 59.268898 +epoch: 0, batch: 9703, sum loss: 6494.593750, avg loss: 3.891309, ppl: 48.974930 +epoch: 0, batch: 9704, sum loss: 7283.985840, avg loss: 4.178994, ppl: 65.300110 +epoch: 0, batch: 9705, sum loss: 5481.382324, avg loss: 3.728832, ppl: 41.630436 +epoch: 0, batch: 9706, sum loss: 5206.702148, avg loss: 3.573577, ppl: 35.643867 +epoch: 0, batch: 9707, sum loss: 4951.965820, avg loss: 3.494683, ppl: 32.939846 +epoch: 0, batch: 9708, sum loss: 6870.924316, avg loss: 3.935237, ppl: 51.174297 +epoch: 0, batch: 9709, sum loss: 6077.411621, avg loss: 3.678821, ppl: 39.599674 +epoch: 0, batch: 9710, sum loss: 6509.331055, avg loss: 3.817790, ppl: 45.503513 +epoch: 0, batch: 9711, sum loss: 7247.501465, avg loss: 3.828580, ppl: 45.997162 +epoch: 0, batch: 9712, sum loss: 6317.560547, avg loss: 3.817257, ppl: 45.479294 +epoch: 0, batch: 9713, sum loss: 6074.874023, avg loss: 3.854615, ppl: 47.210457 +epoch: 0, batch: 9714, sum loss: 7195.065918, avg loss: 4.008393, ppl: 55.058338 +epoch: 0, batch: 9715, sum loss: 5645.061035, avg loss: 3.670391, ppl: 39.267250 +epoch: 0, batch: 9716, sum loss: 5933.665039, avg loss: 3.825703, ppl: 45.865017 +epoch: 0, batch: 9717, sum loss: 5126.633789, avg loss: 3.567595, ppl: 35.431278 +epoch: 0, batch: 9718, sum loss: 6998.024414, avg loss: 3.881322, ppl: 48.488293 +epoch: 0, batch: 9719, sum loss: 7207.782715, avg loss: 3.947307, ppl: 51.795712 +epoch: 0, batch: 9720, sum loss: 6617.072266, avg loss: 3.896980, ppl: 49.253494 +epoch: 0, batch: 9721, sum loss: 5949.158203, avg loss: 3.772453, ppl: 43.486610 +epoch: 0, batch: 9722, sum loss: 6201.061523, avg loss: 3.645539, ppl: 38.303406 +epoch: 0, batch: 9723, sum loss: 6309.625488, avg loss: 3.589093, ppl: 36.201225 +epoch: 0, batch: 9724, sum loss: 6892.332520, avg loss: 3.988618, ppl: 53.980259 +epoch: 0, batch: 9725, sum loss: 6351.538574, avg loss: 3.720878, ppl: 41.300636 +epoch: 0, batch: 9726, sum loss: 6320.240234, avg loss: 3.780048, ppl: 43.818150 +epoch: 0, batch: 9727, sum loss: 7620.174805, avg loss: 3.981282, ppl: 53.585712 +epoch: 0, batch: 9728, sum loss: 5337.480469, avg loss: 3.331760, ppl: 27.987568 +epoch: 0, batch: 9729, sum loss: 6088.615234, avg loss: 3.710308, ppl: 40.866386 +epoch: 0, batch: 9730, sum loss: 5684.223633, avg loss: 3.771880, ppl: 43.461712 +epoch: 0, batch: 9731, sum loss: 6139.294922, avg loss: 3.577678, ppl: 35.790329 +epoch: 0, batch: 9732, sum loss: 5312.587891, avg loss: 3.712500, ppl: 40.956081 +epoch: 0, batch: 9733, sum loss: 6512.269043, avg loss: 3.792818, ppl: 44.381306 +epoch: 0, batch: 9734, sum loss: 6857.786621, avg loss: 3.973225, ppl: 53.155689 +epoch: 0, batch: 9735, sum loss: 6472.717773, avg loss: 3.622114, ppl: 37.416588 +epoch: 0, batch: 9736, sum loss: 5716.296875, avg loss: 3.400533, ppl: 29.980089 +epoch: 0, batch: 9737, sum loss: 5918.141113, avg loss: 3.875665, ppl: 48.214771 +epoch: 0, batch: 9738, sum loss: 8050.985352, avg loss: 3.971872, ppl: 53.083828 +epoch: 0, batch: 9739, sum loss: 5982.328125, avg loss: 3.715732, ppl: 41.088638 +epoch: 0, batch: 9740, sum loss: 5117.423828, avg loss: 3.548838, ppl: 34.772877 +epoch: 0, batch: 9741, sum loss: 5588.868164, avg loss: 3.662430, ppl: 38.955894 +epoch: 0, batch: 9742, sum loss: 7303.758789, avg loss: 4.059899, ppl: 57.968475 +epoch: 0, batch: 9743, sum loss: 5960.754883, avg loss: 3.608205, ppl: 36.899769 +epoch: 0, batch: 9744, sum loss: 7448.402832, avg loss: 3.873324, ppl: 48.102020 +epoch: 0, batch: 9745, sum loss: 8195.379883, avg loss: 3.906282, ppl: 49.713783 +epoch: 0, batch: 9746, sum loss: 5778.472168, avg loss: 3.737692, ppl: 42.000954 +epoch: 0, batch: 9747, sum loss: 6421.790527, avg loss: 3.555809, ppl: 35.016129 +epoch: 0, batch: 9748, sum loss: 7391.608887, avg loss: 3.859848, ppl: 47.458138 +epoch: 0, batch: 9749, sum loss: 5765.237305, avg loss: 3.674466, ppl: 39.427601 +epoch: 0, batch: 9750, sum loss: 6490.382324, avg loss: 3.700332, ppl: 40.460732 +epoch: 0, batch: 9751, sum loss: 6019.292480, avg loss: 3.606526, ppl: 36.837868 +epoch: 0, batch: 9752, sum loss: 6382.326172, avg loss: 3.778760, ppl: 43.761761 +epoch: 0, batch: 9753, sum loss: 5422.629395, avg loss: 3.482742, ppl: 32.548851 +epoch: 0, batch: 9754, sum loss: 6040.280273, avg loss: 3.747072, ppl: 42.396763 +epoch: 0, batch: 9755, sum loss: 6013.013184, avg loss: 3.686703, ppl: 39.913055 +epoch: 0, batch: 9756, sum loss: 6365.165527, avg loss: 3.951065, ppl: 51.990711 +epoch: 0, batch: 9757, sum loss: 7920.447266, avg loss: 3.905546, ppl: 49.677193 +epoch: 0, batch: 9758, sum loss: 5891.307129, avg loss: 3.677470, ppl: 39.546215 +epoch: 0, batch: 9759, sum loss: 6852.166992, avg loss: 3.851696, ppl: 47.072819 +epoch: 0, batch: 9760, sum loss: 6226.847168, avg loss: 3.618156, ppl: 37.268787 +epoch: 0, batch: 9761, sum loss: 6385.986816, avg loss: 3.760888, ppl: 42.986565 +epoch: 0, batch: 9762, sum loss: 5549.625977, avg loss: 3.566598, ppl: 35.395962 +epoch: 0, batch: 9763, sum loss: 6113.788086, avg loss: 3.705326, ppl: 40.663303 +epoch: 0, batch: 9764, sum loss: 5384.529785, avg loss: 3.810707, ppl: 45.182388 +epoch: 0, batch: 9765, sum loss: 6330.568359, avg loss: 3.674155, ppl: 39.415318 +epoch: 0, batch: 9766, sum loss: 6793.871582, avg loss: 3.866745, ppl: 47.786598 +epoch: 0, batch: 9767, sum loss: 7067.722168, avg loss: 3.505815, ppl: 33.308571 +epoch: 0, batch: 9768, sum loss: 7271.637695, avg loss: 3.886498, ppl: 48.739910 +epoch: 0, batch: 9769, sum loss: 6187.126465, avg loss: 3.864539, ppl: 47.681282 +epoch: 0, batch: 9770, sum loss: 5554.319824, avg loss: 3.535532, ppl: 34.313248 +epoch: 0, batch: 9771, sum loss: 7390.217773, avg loss: 3.780163, ppl: 43.823166 +epoch: 0, batch: 9772, sum loss: 5760.748535, avg loss: 3.523394, ppl: 33.899277 +epoch: 0, batch: 9773, sum loss: 6450.048828, avg loss: 3.673149, ppl: 39.375690 +epoch: 0, batch: 9774, sum loss: 6322.524902, avg loss: 3.716946, ppl: 41.138550 +epoch: 0, batch: 9775, sum loss: 6917.673340, avg loss: 3.950699, ppl: 51.971676 +epoch: 0, batch: 9776, sum loss: 5314.955078, avg loss: 3.729793, ppl: 41.670475 +epoch: 0, batch: 9777, sum loss: 7009.563965, avg loss: 3.975930, ppl: 53.299648 +epoch: 0, batch: 9778, sum loss: 6011.410156, avg loss: 3.715334, ppl: 41.072292 +epoch: 0, batch: 9779, sum loss: 6493.847168, avg loss: 3.786500, ppl: 44.101761 +epoch: 0, batch: 9780, sum loss: 7071.831543, avg loss: 3.649036, ppl: 38.437599 +epoch: 0, batch: 9781, sum loss: 6393.170410, avg loss: 3.785181, ppl: 44.043633 +epoch: 0, batch: 9782, sum loss: 6666.466309, avg loss: 3.840130, ppl: 46.531540 +epoch: 0, batch: 9783, sum loss: 6253.356445, avg loss: 3.623034, ppl: 37.451012 +epoch: 0, batch: 9784, sum loss: 6857.613281, avg loss: 3.874358, ppl: 48.151772 +epoch: 0, batch: 9785, sum loss: 7032.416992, avg loss: 3.868216, ppl: 47.856934 +epoch: 0, batch: 9786, sum loss: 5734.612793, avg loss: 3.846152, ppl: 46.812595 +epoch: 0, batch: 9787, sum loss: 6225.091309, avg loss: 3.814394, ppl: 45.349277 +epoch: 0, batch: 9788, sum loss: 5445.542969, avg loss: 3.712026, ppl: 40.936676 +epoch: 0, batch: 9789, sum loss: 6296.955078, avg loss: 3.648294, ppl: 38.409073 +epoch: 0, batch: 9790, sum loss: 5855.854492, avg loss: 3.594754, ppl: 36.406742 +epoch: 0, batch: 9791, sum loss: 5609.892090, avg loss: 3.477925, ppl: 32.392426 +epoch: 0, batch: 9792, sum loss: 6828.457520, avg loss: 3.860067, ppl: 47.468513 +epoch: 0, batch: 9793, sum loss: 5741.069336, avg loss: 3.842751, ppl: 46.653622 +epoch: 0, batch: 9794, sum loss: 5920.019531, avg loss: 3.695393, ppl: 40.261387 +epoch: 0, batch: 9795, sum loss: 6423.154297, avg loss: 3.604464, ppl: 36.761967 +epoch: 0, batch: 9796, sum loss: 6221.503906, avg loss: 3.600407, ppl: 36.613148 +epoch: 0, batch: 9797, sum loss: 5723.066406, avg loss: 3.640627, ppl: 38.115742 +epoch: 0, batch: 9798, sum loss: 5909.650879, avg loss: 3.668312, ppl: 39.185719 +epoch: 0, batch: 9799, sum loss: 6127.158691, avg loss: 3.686618, ppl: 39.909637 +epoch: 0, batch: 9800, sum loss: 5825.068359, avg loss: 3.551871, ppl: 34.878517 +epoch: 0, batch: 9801, sum loss: 5485.417480, avg loss: 3.627922, ppl: 37.634525 +epoch: 0, batch: 9802, sum loss: 4671.991211, avg loss: 3.445421, ppl: 31.356489 +epoch: 0, batch: 9803, sum loss: 7336.313965, avg loss: 3.952755, ppl: 52.078671 +epoch: 0, batch: 9804, sum loss: 6328.395996, avg loss: 3.787191, ppl: 44.132267 +epoch: 0, batch: 9805, sum loss: 7170.988770, avg loss: 3.824528, ppl: 45.811150 +epoch: 0, batch: 9806, sum loss: 7595.437500, avg loss: 3.730569, ppl: 41.702816 +epoch: 0, batch: 9807, sum loss: 6554.334961, avg loss: 3.799615, ppl: 44.683964 +epoch: 0, batch: 9808, sum loss: 5280.908691, avg loss: 3.599801, ppl: 36.590961 +epoch: 0, batch: 9809, sum loss: 6955.743164, avg loss: 3.701833, ppl: 40.521492 +epoch: 0, batch: 9810, sum loss: 5536.972168, avg loss: 3.654767, ppl: 38.658524 +epoch: 0, batch: 9811, sum loss: 6642.546875, avg loss: 3.729673, ppl: 41.665466 +epoch: 0, batch: 9812, sum loss: 6194.760742, avg loss: 4.116120, ppl: 61.320877 +epoch: 0, batch: 9813, sum loss: 6192.124512, avg loss: 3.583405, ppl: 35.995914 +epoch: 0, batch: 9814, sum loss: 6583.673340, avg loss: 4.066506, ppl: 58.352745 +epoch: 0, batch: 9815, sum loss: 6290.842773, avg loss: 3.866529, ppl: 47.776264 +epoch: 0, batch: 9816, sum loss: 6447.380371, avg loss: 3.916999, ppl: 50.249424 +epoch: 0, batch: 9817, sum loss: 7062.692383, avg loss: 3.853078, ppl: 47.137947 +epoch: 0, batch: 9818, sum loss: 6488.762207, avg loss: 3.676353, ppl: 39.502048 +epoch: 0, batch: 9819, sum loss: 7056.398926, avg loss: 3.853850, ppl: 47.174332 +epoch: 0, batch: 9820, sum loss: 6598.762207, avg loss: 3.790214, ppl: 44.265865 +epoch: 0, batch: 9821, sum loss: 5670.845703, avg loss: 3.618919, ppl: 37.297222 +epoch: 0, batch: 9822, sum loss: 7931.857422, avg loss: 4.264440, ppl: 71.125046 +epoch: 0, batch: 9823, sum loss: 6459.995605, avg loss: 3.764566, ppl: 43.144985 +epoch: 0, batch: 9824, sum loss: 6444.952148, avg loss: 3.913146, ppl: 50.056194 +epoch: 0, batch: 9825, sum loss: 6187.001953, avg loss: 3.729356, ppl: 41.652287 +epoch: 0, batch: 9826, sum loss: 6202.424316, avg loss: 3.908270, ppl: 49.812706 +epoch: 0, batch: 9827, sum loss: 5556.556641, avg loss: 3.774835, ppl: 43.590298 +epoch: 0, batch: 9828, sum loss: 5067.332031, avg loss: 3.571059, ppl: 35.554214 +epoch: 0, batch: 9829, sum loss: 7623.727539, avg loss: 3.842604, ppl: 46.646805 +epoch: 0, batch: 9830, sum loss: 6697.313477, avg loss: 3.523048, ppl: 33.887550 +epoch: 0, batch: 9831, sum loss: 7286.150391, avg loss: 3.853067, ppl: 47.137432 +epoch: 0, batch: 9832, sum loss: 6926.125000, avg loss: 3.780636, ppl: 43.843922 +epoch: 0, batch: 9833, sum loss: 7275.216797, avg loss: 3.702400, ppl: 40.544514 +epoch: 0, batch: 9834, sum loss: 5975.522949, avg loss: 3.661473, ppl: 38.918610 +epoch: 0, batch: 9835, sum loss: 6366.386230, avg loss: 4.011586, ppl: 55.234386 +epoch: 0, batch: 9836, sum loss: 6363.726074, avg loss: 3.781180, ppl: 43.867771 +epoch: 0, batch: 9837, sum loss: 5981.355469, avg loss: 3.778493, ppl: 43.750080 +epoch: 0, batch: 9838, sum loss: 6002.062500, avg loss: 3.644239, ppl: 38.253666 +epoch: 0, batch: 9839, sum loss: 6144.896484, avg loss: 3.819078, ppl: 45.562180 +epoch: 0, batch: 9840, sum loss: 6705.808105, avg loss: 3.876189, ppl: 48.240044 +epoch: 0, batch: 9841, sum loss: 6092.342285, avg loss: 3.592183, ppl: 36.313267 +epoch: 0, batch: 9842, sum loss: 5851.382324, avg loss: 3.734131, ppl: 41.851624 +epoch: 0, batch: 9843, sum loss: 5844.974609, avg loss: 3.741981, ppl: 42.181480 +epoch: 0, batch: 9844, sum loss: 5420.114258, avg loss: 3.623071, ppl: 37.452415 +epoch: 0, batch: 9845, sum loss: 4913.637207, avg loss: 3.688917, ppl: 40.001499 +epoch: 0, batch: 9846, sum loss: 6615.819336, avg loss: 3.725124, ppl: 41.476360 +epoch: 0, batch: 9847, sum loss: 7266.607422, avg loss: 3.824530, ppl: 45.811279 +epoch: 0, batch: 9848, sum loss: 6393.729492, avg loss: 3.526602, ppl: 34.008221 +epoch: 0, batch: 9849, sum loss: 5694.426270, avg loss: 3.659657, ppl: 38.848015 +epoch: 0, batch: 9850, sum loss: 5536.795898, avg loss: 3.454021, ppl: 31.627310 +epoch: 0, batch: 9851, sum loss: 5634.978027, avg loss: 3.630785, ppl: 37.742432 +epoch: 0, batch: 9852, sum loss: 5582.013672, avg loss: 3.619983, ppl: 37.336933 +epoch: 0, batch: 9853, sum loss: 6072.898438, avg loss: 3.601956, ppl: 36.669903 +epoch: 0, batch: 9854, sum loss: 5000.877930, avg loss: 3.378972, ppl: 29.340582 +epoch: 0, batch: 9855, sum loss: 6816.181641, avg loss: 3.664614, ppl: 39.041061 +epoch: 0, batch: 9856, sum loss: 6072.625977, avg loss: 3.545024, ppl: 34.640514 +epoch: 0, batch: 9857, sum loss: 6197.239258, avg loss: 3.578083, ppl: 35.804829 +epoch: 0, batch: 9858, sum loss: 5927.414062, avg loss: 3.816751, ppl: 45.456280 +epoch: 0, batch: 9859, sum loss: 6433.950195, avg loss: 3.868882, ppl: 47.888802 +epoch: 0, batch: 9860, sum loss: 6887.483398, avg loss: 4.111931, ppl: 61.064510 +epoch: 0, batch: 9861, sum loss: 7361.019531, avg loss: 3.872183, ppl: 48.047150 +epoch: 0, batch: 9862, sum loss: 5369.132324, avg loss: 3.728564, ppl: 41.619312 +epoch: 0, batch: 9863, sum loss: 7315.942383, avg loss: 3.812372, ppl: 45.257671 +epoch: 0, batch: 9864, sum loss: 6249.788086, avg loss: 3.771749, ppl: 43.456005 +epoch: 0, batch: 9865, sum loss: 6630.368652, avg loss: 3.806182, ppl: 44.978367 +epoch: 0, batch: 9866, sum loss: 5926.293945, avg loss: 3.565761, ppl: 35.366344 +epoch: 0, batch: 9867, sum loss: 7993.442383, avg loss: 3.880312, ppl: 48.439312 +epoch: 0, batch: 9868, sum loss: 6523.204590, avg loss: 3.704262, ppl: 40.620041 +epoch: 0, batch: 9869, sum loss: 5486.865234, avg loss: 3.444360, ppl: 31.323223 +epoch: 0, batch: 9870, sum loss: 7402.613281, avg loss: 3.859548, ppl: 47.443905 +epoch: 0, batch: 9871, sum loss: 6768.412598, avg loss: 3.869876, ppl: 47.936424 +epoch: 0, batch: 9872, sum loss: 6508.448730, avg loss: 3.702189, ppl: 40.535950 +epoch: 0, batch: 9873, sum loss: 5763.281738, avg loss: 3.852461, ppl: 47.108860 +epoch: 0, batch: 9874, sum loss: 6998.920898, avg loss: 3.943054, ppl: 51.575859 +epoch: 0, batch: 9875, sum loss: 7581.017090, avg loss: 3.891693, ppl: 48.993744 +epoch: 0, batch: 9876, sum loss: 5508.955566, avg loss: 3.581896, ppl: 35.941608 +epoch: 0, batch: 9877, sum loss: 5965.030273, avg loss: 3.695806, ppl: 40.278015 +epoch: 0, batch: 9878, sum loss: 5579.183594, avg loss: 3.871745, ppl: 48.026100 +epoch: 0, batch: 9879, sum loss: 6345.233887, avg loss: 3.880877, ppl: 48.466702 +epoch: 0, batch: 9880, sum loss: 5503.603027, avg loss: 3.669069, ppl: 39.215363 +epoch: 0, batch: 9881, sum loss: 5855.364258, avg loss: 3.755846, ppl: 42.770401 +epoch: 0, batch: 9882, sum loss: 6936.521484, avg loss: 3.501525, ppl: 33.165989 +epoch: 0, batch: 9883, sum loss: 6527.362305, avg loss: 3.488703, ppl: 32.743435 +epoch: 0, batch: 9884, sum loss: 5419.181641, avg loss: 3.727085, ppl: 41.557796 +epoch: 0, batch: 9885, sum loss: 5939.838379, avg loss: 3.703141, ppl: 40.574558 +epoch: 0, batch: 9886, sum loss: 5931.583008, avg loss: 3.804736, ppl: 44.913387 +epoch: 0, batch: 9887, sum loss: 6954.103516, avg loss: 3.683318, ppl: 39.778145 +epoch: 0, batch: 9888, sum loss: 6054.330566, avg loss: 3.856261, ppl: 47.288235 +epoch: 0, batch: 9889, sum loss: 6087.632324, avg loss: 3.904832, ppl: 49.641724 +epoch: 0, batch: 9890, sum loss: 7116.013672, avg loss: 3.991034, ppl: 54.110825 +epoch: 0, batch: 9891, sum loss: 8710.112305, avg loss: 3.951957, ppl: 52.037079 +epoch: 0, batch: 9892, sum loss: 5736.889160, avg loss: 3.554454, ppl: 34.968735 +epoch: 0, batch: 9893, sum loss: 5774.364258, avg loss: 3.732621, ppl: 41.788490 +epoch: 0, batch: 9894, sum loss: 6663.732910, avg loss: 3.890095, ppl: 48.915531 +epoch: 0, batch: 9895, sum loss: 6997.382324, avg loss: 3.915715, ppl: 50.184940 +epoch: 0, batch: 9896, sum loss: 6637.222656, avg loss: 3.732971, ppl: 41.803127 +epoch: 0, batch: 9897, sum loss: 6060.586914, avg loss: 3.631269, ppl: 37.760685 +epoch: 0, batch: 9898, sum loss: 7963.208984, avg loss: 3.801054, ppl: 44.748344 +epoch: 0, batch: 9899, sum loss: 5464.113770, avg loss: 3.659822, ppl: 38.854408 +epoch: 0, batch: 9900, sum loss: 8244.166992, avg loss: 3.830933, ppl: 46.105518 +epoch: 0, batch: 9901, sum loss: 6222.181641, avg loss: 3.723628, ppl: 41.414364 +epoch: 0, batch: 9902, sum loss: 7253.538574, avg loss: 3.617725, ppl: 37.252716 +epoch: 0, batch: 9903, sum loss: 7067.147949, avg loss: 3.667435, ppl: 39.151371 +epoch: 0, batch: 9904, sum loss: 7205.903320, avg loss: 3.965825, ppl: 52.763760 +epoch: 0, batch: 9905, sum loss: 5500.034180, avg loss: 3.698745, ppl: 40.396576 +epoch: 0, batch: 9906, sum loss: 6518.099121, avg loss: 3.659797, ppl: 38.853470 +epoch: 0, batch: 9907, sum loss: 4380.795410, avg loss: 3.493457, ppl: 32.899494 +epoch: 0, batch: 9908, sum loss: 4983.064941, avg loss: 3.584939, ppl: 36.051151 +epoch: 0, batch: 9909, sum loss: 7465.570312, avg loss: 3.759099, ppl: 42.909737 +epoch: 0, batch: 9910, sum loss: 6954.042480, avg loss: 3.781426, ppl: 43.878578 +epoch: 0, batch: 9911, sum loss: 6067.679688, avg loss: 3.695298, ppl: 40.257584 +epoch: 0, batch: 9912, sum loss: 4509.869141, avg loss: 3.225944, ppl: 25.177319 +epoch: 0, batch: 9913, sum loss: 6636.151367, avg loss: 3.849276, ppl: 46.959057 +epoch: 0, batch: 9914, sum loss: 6272.885254, avg loss: 3.998015, ppl: 54.489864 +epoch: 0, batch: 9915, sum loss: 6825.611328, avg loss: 3.934070, ppl: 51.114586 +epoch: 0, batch: 9916, sum loss: 5867.484863, avg loss: 3.725387, ppl: 41.487289 +epoch: 0, batch: 9917, sum loss: 6324.581543, avg loss: 3.729116, ppl: 41.642300 +epoch: 0, batch: 9918, sum loss: 5478.883789, avg loss: 3.687001, ppl: 39.924942 +epoch: 0, batch: 9919, sum loss: 6659.110352, avg loss: 3.829276, ppl: 46.029194 +epoch: 0, batch: 9920, sum loss: 7397.448242, avg loss: 3.891346, ppl: 48.976753 +epoch: 0, batch: 9921, sum loss: 6177.122070, avg loss: 3.705532, ppl: 40.671680 +epoch: 0, batch: 9922, sum loss: 6362.936035, avg loss: 3.801037, ppl: 44.747578 +epoch: 0, batch: 9923, sum loss: 6449.283203, avg loss: 3.758324, ppl: 42.876492 +epoch: 0, batch: 9924, sum loss: 5485.425781, avg loss: 3.439138, ppl: 31.160099 +epoch: 0, batch: 9925, sum loss: 6616.284668, avg loss: 3.917279, ppl: 50.263504 +epoch: 0, batch: 9926, sum loss: 6795.444824, avg loss: 3.826264, ppl: 45.890778 +epoch: 0, batch: 9927, sum loss: 6751.736816, avg loss: 3.829686, ppl: 46.048084 +epoch: 0, batch: 9928, sum loss: 6598.879883, avg loss: 3.888556, ppl: 48.840321 +epoch: 0, batch: 9929, sum loss: 6315.828125, avg loss: 3.741605, ppl: 42.165592 +epoch: 0, batch: 9930, sum loss: 6892.628418, avg loss: 3.846333, ppl: 46.821056 +epoch: 0, batch: 9931, sum loss: 5771.920410, avg loss: 3.704699, ppl: 40.637806 +epoch: 0, batch: 9932, sum loss: 6978.700684, avg loss: 3.801035, ppl: 44.747482 +epoch: 0, batch: 9933, sum loss: 6306.815918, avg loss: 3.792433, ppl: 44.364208 +epoch: 0, batch: 9934, sum loss: 6082.030273, avg loss: 3.683846, ppl: 39.799187 +epoch: 0, batch: 9935, sum loss: 6492.153809, avg loss: 3.458793, ppl: 31.778587 +epoch: 0, batch: 9936, sum loss: 5254.539062, avg loss: 3.572086, ppl: 35.590771 +epoch: 0, batch: 9937, sum loss: 6352.037109, avg loss: 3.957655, ppl: 52.334480 +epoch: 0, batch: 9938, sum loss: 5832.661621, avg loss: 3.819687, ppl: 45.589920 +epoch: 0, batch: 9939, sum loss: 5845.535156, avg loss: 3.653459, ppl: 38.607994 +epoch: 0, batch: 9940, sum loss: 6036.060059, avg loss: 3.937417, ppl: 51.285961 +epoch: 0, batch: 9941, sum loss: 4518.498047, avg loss: 3.232116, ppl: 25.333204 +epoch: 0, batch: 9942, sum loss: 6114.600586, avg loss: 3.498055, ppl: 33.051121 +epoch: 0, batch: 9943, sum loss: 6494.737305, avg loss: 3.955382, ppl: 52.215641 +epoch: 0, batch: 9944, sum loss: 6801.327637, avg loss: 3.868787, ppl: 47.884270 +epoch: 0, batch: 9945, sum loss: 5383.430664, avg loss: 3.518582, ppl: 33.736568 +epoch: 0, batch: 9946, sum loss: 6293.020996, avg loss: 3.476807, ppl: 32.356255 +epoch: 0, batch: 9947, sum loss: 6908.061523, avg loss: 3.742178, ppl: 42.189796 +epoch: 0, batch: 9948, sum loss: 5922.555176, avg loss: 3.646894, ppl: 38.355331 +epoch: 0, batch: 9949, sum loss: 5949.824219, avg loss: 3.471309, ppl: 32.178852 +epoch: 0, batch: 9950, sum loss: 6720.443848, avg loss: 3.678404, ppl: 39.583164 +epoch: 0, batch: 9951, sum loss: 7588.670410, avg loss: 3.940120, ppl: 51.424759 +epoch: 0, batch: 9952, sum loss: 7527.269531, avg loss: 3.790166, ppl: 44.263744 +epoch: 0, batch: 9953, sum loss: 6619.111328, avg loss: 3.658989, ppl: 38.822090 +epoch: 0, batch: 9954, sum loss: 7568.841797, avg loss: 3.885442, ppl: 48.688480 +epoch: 0, batch: 9955, sum loss: 5773.720215, avg loss: 3.682220, ppl: 39.734486 +epoch: 0, batch: 9956, sum loss: 7160.736328, avg loss: 4.219645, ppl: 68.009308 +epoch: 0, batch: 9957, sum loss: 6774.902832, avg loss: 3.869162, ppl: 47.902241 +epoch: 0, batch: 9958, sum loss: 7101.651855, avg loss: 4.030449, ppl: 56.286201 +epoch: 0, batch: 9959, sum loss: 7035.057129, avg loss: 4.052452, ppl: 57.538372 +epoch: 0, batch: 9960, sum loss: 6363.126953, avg loss: 3.849442, ppl: 46.966827 +epoch: 0, batch: 9961, sum loss: 6780.091797, avg loss: 3.806902, ppl: 45.010765 +epoch: 0, batch: 9962, sum loss: 6973.355469, avg loss: 3.739064, ppl: 42.058624 +epoch: 0, batch: 9963, sum loss: 6894.683105, avg loss: 3.838911, ppl: 46.474815 +epoch: 0, batch: 9964, sum loss: 6242.804199, avg loss: 3.785812, ppl: 44.071449 +epoch: 0, batch: 9965, sum loss: 6165.221191, avg loss: 3.707289, ppl: 40.743191 +epoch: 0, batch: 9966, sum loss: 6523.929688, avg loss: 3.864887, ppl: 47.697906 +epoch: 0, batch: 9967, sum loss: 7873.130859, avg loss: 3.982363, ppl: 53.643620 +epoch: 0, batch: 9968, sum loss: 5828.020020, avg loss: 3.631165, ppl: 37.756779 +epoch: 0, batch: 9969, sum loss: 6629.779297, avg loss: 3.610991, ppl: 37.002697 +epoch: 0, batch: 9970, sum loss: 7116.186523, avg loss: 3.608614, ppl: 36.914845 +epoch: 0, batch: 9971, sum loss: 5975.184570, avg loss: 3.610384, ppl: 36.980232 +epoch: 0, batch: 9972, sum loss: 5914.713379, avg loss: 3.635349, ppl: 37.915092 +epoch: 0, batch: 9973, sum loss: 6736.333496, avg loss: 4.012110, ppl: 55.263367 +epoch: 0, batch: 9974, sum loss: 6631.561523, avg loss: 3.690351, ppl: 40.058926 +epoch: 0, batch: 9975, sum loss: 7359.597168, avg loss: 3.898092, ppl: 49.308270 +epoch: 0, batch: 9976, sum loss: 5804.056641, avg loss: 3.680442, ppl: 39.663918 +epoch: 0, batch: 9977, sum loss: 6944.280273, avg loss: 3.981812, ppl: 53.614094 +epoch: 0, batch: 9978, sum loss: 6438.109375, avg loss: 3.736570, ppl: 41.953827 +epoch: 0, batch: 9979, sum loss: 6575.940430, avg loss: 3.845579, ppl: 46.785774 +epoch: 0, batch: 9980, sum loss: 7586.401855, avg loss: 3.986548, ppl: 53.868637 +epoch: 0, batch: 9981, sum loss: 5853.177734, avg loss: 3.626504, ppl: 37.581211 +epoch: 0, batch: 9982, sum loss: 6497.467773, avg loss: 3.788611, ppl: 44.194958 +epoch: 0, batch: 9983, sum loss: 5856.327148, avg loss: 3.651077, ppl: 38.516117 +epoch: 0, batch: 9984, sum loss: 4815.661133, avg loss: 3.312009, ppl: 27.440201 +epoch: 0, batch: 9985, sum loss: 5835.287109, avg loss: 3.586532, ppl: 36.108620 +epoch: 0, batch: 9986, sum loss: 6826.783203, avg loss: 3.852587, ppl: 47.114769 +epoch: 0, batch: 9987, sum loss: 5946.904785, avg loss: 3.576010, ppl: 35.730698 +epoch: 0, batch: 9988, sum loss: 6323.011719, avg loss: 3.822861, ppl: 45.734867 +epoch: 0, batch: 9989, sum loss: 6771.876465, avg loss: 3.745507, ppl: 42.330452 +epoch: 0, batch: 9990, sum loss: 5653.948242, avg loss: 3.626651, ppl: 37.586720 +epoch: 0, batch: 9991, sum loss: 5930.340332, avg loss: 3.578962, ppl: 35.836327 +epoch: 0, batch: 9992, sum loss: 6528.489746, avg loss: 3.715703, ppl: 41.087440 +epoch: 0, batch: 9993, sum loss: 8950.260742, avg loss: 3.709184, ppl: 40.820473 +epoch: 0, batch: 9994, sum loss: 6652.444824, avg loss: 3.775508, ppl: 43.619659 +epoch: 0, batch: 9995, sum loss: 7551.665527, avg loss: 3.766417, ppl: 43.224892 +epoch: 0, batch: 9996, sum loss: 6967.902832, avg loss: 3.720183, ppl: 41.271954 +epoch: 0, batch: 9997, sum loss: 6343.924805, avg loss: 3.865890, ppl: 47.745728 +epoch: 0, batch: 9998, sum loss: 6936.355469, avg loss: 3.851391, ppl: 47.058479 +epoch: 0, batch: 9999, sum loss: 5874.952637, avg loss: 3.560577, ppl: 35.183506 +epoch: 0, batch: 10000, sum loss: 6061.686523, avg loss: 3.595306, ppl: 36.426861 +epoch: 0, batch: 10001, sum loss: 5384.104980, avg loss: 3.491637, ppl: 32.839676 +epoch: 0, batch: 10002, sum loss: 6593.129883, avg loss: 3.666924, ppl: 39.131363 +epoch: 0, batch: 10003, sum loss: 6570.223633, avg loss: 3.528584, ppl: 34.075691 +epoch: 0, batch: 10004, sum loss: 6059.781250, avg loss: 3.889462, ppl: 48.884567 +epoch: 0, batch: 10005, sum loss: 5702.717773, avg loss: 3.674432, ppl: 39.426247 +epoch: 0, batch: 10006, sum loss: 6015.791504, avg loss: 3.762221, ppl: 43.043915 +epoch: 0, batch: 10007, sum loss: 5491.305176, avg loss: 3.856254, ppl: 47.287861 +epoch: 0, batch: 10008, sum loss: 6572.688477, avg loss: 3.794855, ppl: 44.471771 +epoch: 0, batch: 10009, sum loss: 6120.614258, avg loss: 3.727536, ppl: 41.576546 +epoch: 0, batch: 10010, sum loss: 5216.692383, avg loss: 3.570632, ppl: 35.539028 +epoch: 0, batch: 10011, sum loss: 6567.202637, avg loss: 3.829273, ppl: 46.029041 +epoch: 0, batch: 10012, sum loss: 6041.240234, avg loss: 3.823570, ppl: 45.767296 +epoch: 0, batch: 10013, sum loss: 6644.187500, avg loss: 3.491428, ppl: 32.832798 +epoch: 0, batch: 10014, sum loss: 5820.778320, avg loss: 3.842098, ppl: 46.623188 +epoch: 0, batch: 10015, sum loss: 6488.434082, avg loss: 3.651342, ppl: 38.526321 +epoch: 0, batch: 10016, sum loss: 6420.518066, avg loss: 3.870113, ppl: 47.947823 +epoch: 0, batch: 10017, sum loss: 6597.581055, avg loss: 3.789535, ppl: 44.235840 +epoch: 0, batch: 10018, sum loss: 6558.794434, avg loss: 3.507377, ppl: 33.360638 +epoch: 0, batch: 10019, sum loss: 6581.551270, avg loss: 3.857885, ppl: 47.365051 +epoch: 0, batch: 10020, sum loss: 6556.420898, avg loss: 3.668954, ppl: 39.210876 +epoch: 0, batch: 10021, sum loss: 6032.359375, avg loss: 3.758479, ppl: 42.883167 +epoch: 0, batch: 10022, sum loss: 7320.139648, avg loss: 4.075802, ppl: 58.897690 +epoch: 0, batch: 10023, sum loss: 7012.972656, avg loss: 3.857521, ppl: 47.347809 +epoch: 0, batch: 10024, sum loss: 7771.562500, avg loss: 3.979295, ppl: 53.479305 +epoch: 0, batch: 10025, sum loss: 5529.150879, avg loss: 3.807955, ppl: 45.058212 +epoch: 0, batch: 10026, sum loss: 6840.917969, avg loss: 3.920297, ppl: 50.415398 +epoch: 0, batch: 10027, sum loss: 7843.674316, avg loss: 3.651618, ppl: 38.536949 +epoch: 0, batch: 10028, sum loss: 6563.429688, avg loss: 3.765594, ppl: 43.189331 +epoch: 0, batch: 10029, sum loss: 6668.207520, avg loss: 3.760975, ppl: 42.990307 +epoch: 0, batch: 10030, sum loss: 6655.393555, avg loss: 3.798741, ppl: 44.644936 +epoch: 0, batch: 10031, sum loss: 5680.895996, avg loss: 3.730070, ppl: 41.682011 +epoch: 0, batch: 10032, sum loss: 7450.210938, avg loss: 3.777997, ppl: 43.728378 +epoch: 0, batch: 10033, sum loss: 6653.583008, avg loss: 3.848226, ppl: 46.909786 +epoch: 0, batch: 10034, sum loss: 5902.499023, avg loss: 3.953449, ppl: 52.114788 +epoch: 0, batch: 10035, sum loss: 6988.728516, avg loss: 3.739287, ppl: 42.068001 +epoch: 0, batch: 10036, sum loss: 6037.492188, avg loss: 3.663527, ppl: 38.998638 +epoch: 0, batch: 10037, sum loss: 5864.411133, avg loss: 3.813011, ppl: 45.286598 +epoch: 0, batch: 10038, sum loss: 6033.907227, avg loss: 3.792525, ppl: 44.368290 +epoch: 0, batch: 10039, sum loss: 6597.370605, avg loss: 3.983919, ppl: 53.727200 +epoch: 0, batch: 10040, sum loss: 5227.118652, avg loss: 3.597466, ppl: 36.505630 +epoch: 0, batch: 10041, sum loss: 6206.663574, avg loss: 3.593899, ppl: 36.375629 +epoch: 0, batch: 10042, sum loss: 5316.756836, avg loss: 3.484113, ppl: 32.593517 +epoch: 0, batch: 10043, sum loss: 6582.926758, avg loss: 3.965619, ppl: 52.752892 +epoch: 0, batch: 10044, sum loss: 6738.874023, avg loss: 3.719025, ppl: 41.224197 +epoch: 0, batch: 10045, sum loss: 6893.593262, avg loss: 3.620585, ppl: 37.359417 +epoch: 0, batch: 10046, sum loss: 7967.324219, avg loss: 3.957936, ppl: 52.349155 +epoch: 0, batch: 10047, sum loss: 6980.182129, avg loss: 3.758849, ppl: 42.899006 +epoch: 0, batch: 10048, sum loss: 6064.423340, avg loss: 3.835815, ppl: 46.331158 +epoch: 0, batch: 10049, sum loss: 6288.537109, avg loss: 3.808926, ppl: 45.101990 +epoch: 0, batch: 10050, sum loss: 5111.007812, avg loss: 3.534584, ppl: 34.280735 +epoch: 0, batch: 10051, sum loss: 5887.671875, avg loss: 3.572617, ppl: 35.609646 +epoch: 0, batch: 10052, sum loss: 6487.855957, avg loss: 3.673758, ppl: 39.399673 +epoch: 0, batch: 10053, sum loss: 5183.833496, avg loss: 3.493149, ppl: 32.889362 +epoch: 0, batch: 10054, sum loss: 4885.796875, avg loss: 3.635266, ppl: 37.911919 +epoch: 0, batch: 10055, sum loss: 5680.983398, avg loss: 3.627703, ppl: 37.626297 +epoch: 0, batch: 10056, sum loss: 5854.020508, avg loss: 3.796382, ppl: 44.539734 +epoch: 0, batch: 10057, sum loss: 5293.957520, avg loss: 3.699481, ppl: 40.426319 +epoch: 0, batch: 10058, sum loss: 5975.588379, avg loss: 3.654794, ppl: 38.659557 +epoch: 0, batch: 10059, sum loss: 7242.408203, avg loss: 3.927553, ppl: 50.782581 +epoch: 0, batch: 10060, sum loss: 5665.408691, avg loss: 3.538669, ppl: 34.421070 +epoch: 0, batch: 10061, sum loss: 5394.866211, avg loss: 3.482806, ppl: 32.550922 +epoch: 0, batch: 10062, sum loss: 6641.810547, avg loss: 3.812750, ppl: 45.274769 +epoch: 0, batch: 10063, sum loss: 5281.773926, avg loss: 3.660273, ppl: 38.871948 +epoch: 0, batch: 10064, sum loss: 6822.763672, avg loss: 3.941516, ppl: 51.496593 +epoch: 0, batch: 10065, sum loss: 5700.351562, avg loss: 3.805308, ppl: 44.939083 +epoch: 0, batch: 10066, sum loss: 5828.110840, avg loss: 3.752808, ppl: 42.640652 +epoch: 0, batch: 10067, sum loss: 6014.807129, avg loss: 3.845785, ppl: 46.795387 +epoch: 0, batch: 10068, sum loss: 4698.281738, avg loss: 3.346354, ppl: 28.399015 +epoch: 0, batch: 10069, sum loss: 5805.247070, avg loss: 3.692905, ppl: 40.161354 +epoch: 0, batch: 10070, sum loss: 6554.948242, avg loss: 3.951144, ppl: 51.994827 +epoch: 0, batch: 10071, sum loss: 7640.127930, avg loss: 4.012672, ppl: 55.294445 +epoch: 0, batch: 10072, sum loss: 7757.402344, avg loss: 3.929788, ppl: 50.896191 +epoch: 0, batch: 10073, sum loss: 6426.540039, avg loss: 3.616511, ppl: 37.207520 +epoch: 0, batch: 10074, sum loss: 6953.910645, avg loss: 3.852582, ppl: 47.114555 +epoch: 0, batch: 10075, sum loss: 5423.604004, avg loss: 3.672041, ppl: 39.332100 +epoch: 0, batch: 10076, sum loss: 6890.970703, avg loss: 3.686983, ppl: 39.924198 +epoch: 0, batch: 10077, sum loss: 7321.797852, avg loss: 3.791713, ppl: 44.332275 +epoch: 0, batch: 10078, sum loss: 6828.181152, avg loss: 3.778739, ppl: 43.760815 +epoch: 0, batch: 10079, sum loss: 6183.184570, avg loss: 3.747385, ppl: 42.410015 +epoch: 0, batch: 10080, sum loss: 6926.389648, avg loss: 3.768438, ppl: 43.312374 +epoch: 0, batch: 10081, sum loss: 6832.645508, avg loss: 3.667550, ppl: 39.155849 +epoch: 0, batch: 10082, sum loss: 6174.867676, avg loss: 3.776677, ppl: 43.670700 +epoch: 0, batch: 10083, sum loss: 6731.727539, avg loss: 4.077364, ppl: 58.989765 +epoch: 0, batch: 10084, sum loss: 5928.099609, avg loss: 3.733060, ppl: 41.806858 +epoch: 0, batch: 10085, sum loss: 5851.631836, avg loss: 3.428021, ppl: 30.815598 +epoch: 0, batch: 10086, sum loss: 5482.585938, avg loss: 3.487650, ppl: 32.708996 +epoch: 0, batch: 10087, sum loss: 5730.759277, avg loss: 3.423393, ppl: 30.673306 +epoch: 0, batch: 10088, sum loss: 5783.306641, avg loss: 3.583214, ppl: 35.989010 +epoch: 0, batch: 10089, sum loss: 6964.452148, avg loss: 3.835051, ppl: 46.295769 +epoch: 0, batch: 10090, sum loss: 5680.125977, avg loss: 3.645781, ppl: 38.312668 +epoch: 0, batch: 10091, sum loss: 5923.310547, avg loss: 3.627257, ppl: 37.609509 +epoch: 0, batch: 10092, sum loss: 5259.558594, avg loss: 3.504036, ppl: 33.249390 +epoch: 0, batch: 10093, sum loss: 5764.383789, avg loss: 3.573704, ppl: 35.648373 +epoch: 0, batch: 10094, sum loss: 7883.144043, avg loss: 4.090889, ppl: 59.793053 +epoch: 0, batch: 10095, sum loss: 5944.083496, avg loss: 3.740770, ppl: 42.130413 +epoch: 0, batch: 10096, sum loss: 6773.304688, avg loss: 3.986642, ppl: 53.873684 +epoch: 0, batch: 10097, sum loss: 4798.647461, avg loss: 3.457239, ppl: 31.729240 +epoch: 0, batch: 10098, sum loss: 6368.553223, avg loss: 3.666409, ppl: 39.111225 +epoch: 0, batch: 10099, sum loss: 6566.868164, avg loss: 3.876545, ppl: 48.257217 +epoch: 0, batch: 10100, sum loss: 6165.638672, avg loss: 3.689790, ppl: 40.036438 +epoch: 0, batch: 10101, sum loss: 7095.190430, avg loss: 3.835238, ppl: 46.304459 +epoch: 0, batch: 10102, sum loss: 5907.140625, avg loss: 3.669031, ppl: 39.213905 +epoch: 0, batch: 10103, sum loss: 6502.807617, avg loss: 3.785104, ppl: 44.040230 +epoch: 0, batch: 10104, sum loss: 4889.685547, avg loss: 3.530459, ppl: 34.139633 +epoch: 0, batch: 10105, sum loss: 5809.100586, avg loss: 3.702422, ppl: 40.545403 +epoch: 0, batch: 10106, sum loss: 7359.324219, avg loss: 3.914534, ppl: 50.125713 +epoch: 0, batch: 10107, sum loss: 6041.323242, avg loss: 3.624069, ppl: 37.489803 +epoch: 0, batch: 10108, sum loss: 6150.780273, avg loss: 3.526823, ppl: 34.015736 +epoch: 0, batch: 10109, sum loss: 6824.687012, avg loss: 3.840567, ppl: 46.551846 +epoch: 0, batch: 10110, sum loss: 5474.916016, avg loss: 3.443343, ppl: 31.291403 +epoch: 0, batch: 10111, sum loss: 8122.440430, avg loss: 3.815144, ppl: 45.383274 +epoch: 0, batch: 10112, sum loss: 6207.466797, avg loss: 3.785041, ppl: 44.037460 +epoch: 0, batch: 10113, sum loss: 5862.935059, avg loss: 3.731977, ppl: 41.761578 +epoch: 0, batch: 10114, sum loss: 8063.000977, avg loss: 4.084600, ppl: 59.418163 +epoch: 0, batch: 10115, sum loss: 5478.633789, avg loss: 3.594904, ppl: 36.412201 +epoch: 0, batch: 10116, sum loss: 5884.444824, avg loss: 3.738529, ppl: 42.036110 +epoch: 0, batch: 10117, sum loss: 7827.732910, avg loss: 4.026612, ppl: 56.070610 +epoch: 0, batch: 10118, sum loss: 6271.028809, avg loss: 4.048437, ppl: 57.307823 +epoch: 0, batch: 10119, sum loss: 5277.953125, avg loss: 3.525687, ppl: 33.977100 +epoch: 0, batch: 10120, sum loss: 6309.678711, avg loss: 3.674828, ppl: 39.441856 +epoch: 0, batch: 10121, sum loss: 5658.541992, avg loss: 3.499408, ppl: 33.095844 +epoch: 0, batch: 10122, sum loss: 5028.009277, avg loss: 3.749448, ppl: 42.497601 +epoch: 0, batch: 10123, sum loss: 5185.245605, avg loss: 3.389050, ppl: 29.637770 +epoch: 0, batch: 10124, sum loss: 6666.669922, avg loss: 3.639012, ppl: 38.054226 +epoch: 0, batch: 10125, sum loss: 6411.758789, avg loss: 3.550254, ppl: 34.822166 +epoch: 0, batch: 10126, sum loss: 5827.121582, avg loss: 3.535875, ppl: 34.325031 +epoch: 0, batch: 10127, sum loss: 6977.603516, avg loss: 3.767604, ppl: 43.276264 +epoch: 0, batch: 10128, sum loss: 4534.130859, avg loss: 3.461169, ppl: 31.854177 +epoch: 0, batch: 10129, sum loss: 6768.201660, avg loss: 3.682373, ppl: 39.740589 +epoch: 0, batch: 10130, sum loss: 6351.964355, avg loss: 3.661075, ppl: 38.903126 +epoch: 0, batch: 10131, sum loss: 5856.865723, avg loss: 3.579991, ppl: 35.873230 +epoch: 0, batch: 10132, sum loss: 5768.760254, avg loss: 3.571988, ppl: 35.587265 +epoch: 0, batch: 10133, sum loss: 6199.917480, avg loss: 3.860471, ppl: 47.487736 +epoch: 0, batch: 10134, sum loss: 5695.774902, avg loss: 3.611779, ppl: 37.031864 +epoch: 0, batch: 10135, sum loss: 5570.136719, avg loss: 3.554650, ppl: 34.975586 +epoch: 0, batch: 10136, sum loss: 7453.985352, avg loss: 3.890389, ppl: 48.929928 +epoch: 0, batch: 10137, sum loss: 5501.331543, avg loss: 3.449111, ppl: 31.472393 +epoch: 0, batch: 10138, sum loss: 7138.056641, avg loss: 3.733293, ppl: 41.816586 +epoch: 0, batch: 10139, sum loss: 6990.248535, avg loss: 3.813556, ppl: 45.311287 +epoch: 0, batch: 10140, sum loss: 7294.142090, avg loss: 3.693237, ppl: 40.174667 +epoch: 0, batch: 10141, sum loss: 5460.033691, avg loss: 3.575661, ppl: 35.718212 +epoch: 0, batch: 10142, sum loss: 6266.687012, avg loss: 3.520611, ppl: 33.805069 +epoch: 0, batch: 10143, sum loss: 5255.583496, avg loss: 3.392888, ppl: 29.751753 +epoch: 0, batch: 10144, sum loss: 6110.674805, avg loss: 3.355670, ppl: 28.664803 +epoch: 0, batch: 10145, sum loss: 5555.920898, avg loss: 3.503103, ppl: 33.218361 +epoch: 0, batch: 10146, sum loss: 5936.899414, avg loss: 3.703618, ppl: 40.593899 +epoch: 0, batch: 10147, sum loss: 6014.931641, avg loss: 3.790127, ppl: 44.262035 +epoch: 0, batch: 10148, sum loss: 7238.547852, avg loss: 3.774008, ppl: 43.554295 +epoch: 0, batch: 10149, sum loss: 7432.608887, avg loss: 3.989591, ppl: 54.032806 +epoch: 0, batch: 10150, sum loss: 7603.132812, avg loss: 3.743541, ppl: 42.247345 +epoch: 0, batch: 10151, sum loss: 6025.298828, avg loss: 3.751743, ppl: 42.595245 +epoch: 0, batch: 10152, sum loss: 5224.252441, avg loss: 3.291905, ppl: 26.894039 +epoch: 0, batch: 10153, sum loss: 5935.100098, avg loss: 3.668171, ppl: 39.180168 +epoch: 0, batch: 10154, sum loss: 5407.820312, avg loss: 3.698920, ppl: 40.403637 +epoch: 0, batch: 10155, sum loss: 6655.092773, avg loss: 3.607096, ppl: 36.858868 +epoch: 0, batch: 10156, sum loss: 6440.057129, avg loss: 3.844810, ppl: 46.749825 +epoch: 0, batch: 10157, sum loss: 5459.758789, avg loss: 3.462117, ppl: 31.884411 +epoch: 0, batch: 10158, sum loss: 6053.295898, avg loss: 3.780947, ppl: 43.857555 +epoch: 0, batch: 10159, sum loss: 5006.563965, avg loss: 3.550755, ppl: 34.839603 +epoch: 0, batch: 10160, sum loss: 6757.941406, avg loss: 3.739868, ppl: 42.092419 +epoch: 0, batch: 10161, sum loss: 7205.204590, avg loss: 3.967623, ppl: 52.858761 +epoch: 0, batch: 10162, sum loss: 6031.263184, avg loss: 3.725302, ppl: 41.483738 +epoch: 0, batch: 10163, sum loss: 4581.042480, avg loss: 3.615661, ppl: 37.175907 +epoch: 0, batch: 10164, sum loss: 6083.706055, avg loss: 3.826230, ppl: 45.889210 +epoch: 0, batch: 10165, sum loss: 7152.661133, avg loss: 3.914976, ppl: 50.147873 +epoch: 0, batch: 10166, sum loss: 5979.592773, avg loss: 3.915909, ppl: 50.194668 +epoch: 0, batch: 10167, sum loss: 5960.008301, avg loss: 3.647496, ppl: 38.378445 +epoch: 0, batch: 10168, sum loss: 6241.022461, avg loss: 3.576517, ppl: 35.748814 +epoch: 0, batch: 10169, sum loss: 6329.172852, avg loss: 3.740646, ppl: 42.125191 +epoch: 0, batch: 10170, sum loss: 5941.195801, avg loss: 3.416444, ppl: 30.460897 +epoch: 0, batch: 10171, sum loss: 6206.205078, avg loss: 3.436437, ppl: 31.076033 +epoch: 0, batch: 10172, sum loss: 6029.785645, avg loss: 3.761563, ppl: 43.015610 +epoch: 0, batch: 10173, sum loss: 6305.112305, avg loss: 3.805137, ppl: 44.931412 +epoch: 0, batch: 10174, sum loss: 6818.546875, avg loss: 3.911960, ppl: 49.996868 +epoch: 0, batch: 10175, sum loss: 5830.083496, avg loss: 3.583333, ppl: 35.993317 +epoch: 0, batch: 10176, sum loss: 6178.412598, avg loss: 3.565154, ppl: 35.344906 +epoch: 0, batch: 10177, sum loss: 8062.209473, avg loss: 3.950127, ppl: 51.941956 +epoch: 0, batch: 10178, sum loss: 6751.663574, avg loss: 3.836172, ppl: 46.347733 +epoch: 0, batch: 10179, sum loss: 5314.876953, avg loss: 3.913753, ppl: 50.086590 +epoch: 0, batch: 10180, sum loss: 6546.875000, avg loss: 3.643225, ppl: 38.214878 +epoch: 0, batch: 10181, sum loss: 5340.909668, avg loss: 3.611163, ppl: 37.009056 +epoch: 0, batch: 10182, sum loss: 5571.862305, avg loss: 3.687533, ppl: 39.946175 +epoch: 0, batch: 10183, sum loss: 6379.068848, avg loss: 3.852095, ppl: 47.091602 +epoch: 0, batch: 10184, sum loss: 6109.598633, avg loss: 3.896428, ppl: 49.226280 +epoch: 0, batch: 10185, sum loss: 6406.527344, avg loss: 3.701056, ppl: 40.490021 +epoch: 0, batch: 10186, sum loss: 7277.219727, avg loss: 3.768628, ppl: 43.320572 +epoch: 0, batch: 10187, sum loss: 6884.645996, avg loss: 3.818439, ppl: 45.533089 +epoch: 0, batch: 10188, sum loss: 6966.075195, avg loss: 3.794158, ppl: 44.440823 +epoch: 0, batch: 10189, sum loss: 6445.307129, avg loss: 3.444846, ppl: 31.338469 +epoch: 0, batch: 10190, sum loss: 5036.159668, avg loss: 3.302400, ppl: 27.177784 +epoch: 0, batch: 10191, sum loss: 6490.041992, avg loss: 3.613609, ppl: 37.099716 +epoch: 0, batch: 10192, sum loss: 5760.220215, avg loss: 3.510189, ppl: 33.454582 +epoch: 0, batch: 10193, sum loss: 6201.321289, avg loss: 3.827976, ppl: 45.969414 +epoch: 0, batch: 10194, sum loss: 6368.823730, avg loss: 3.850559, ppl: 47.019318 +epoch: 0, batch: 10195, sum loss: 5695.809570, avg loss: 3.665257, ppl: 39.066185 +epoch: 0, batch: 10196, sum loss: 6994.351562, avg loss: 3.851515, ppl: 47.064316 +epoch: 0, batch: 10197, sum loss: 5353.757812, avg loss: 3.689702, ppl: 40.032917 +epoch: 0, batch: 10198, sum loss: 7333.577637, avg loss: 3.865882, ppl: 47.745350 +epoch: 0, batch: 10199, sum loss: 6184.611328, avg loss: 3.589444, ppl: 36.213924 +epoch: 0, batch: 10200, sum loss: 5783.829102, avg loss: 3.691021, ppl: 40.085743 +epoch: 0, batch: 10201, sum loss: 6019.193359, avg loss: 3.530319, ppl: 34.134846 +epoch: 0, batch: 10202, sum loss: 6111.116699, avg loss: 3.781631, ppl: 43.887585 +epoch: 0, batch: 10203, sum loss: 6247.374023, avg loss: 3.809374, ppl: 45.122200 +epoch: 0, batch: 10204, sum loss: 5827.365723, avg loss: 3.551106, ppl: 34.851856 +epoch: 0, batch: 10205, sum loss: 5927.812500, avg loss: 3.650131, ppl: 38.479698 +epoch: 0, batch: 10206, sum loss: 6127.066406, avg loss: 3.958053, ppl: 52.355309 +epoch: 0, batch: 10207, sum loss: 6230.647461, avg loss: 3.722012, ppl: 41.347492 +epoch: 0, batch: 10208, sum loss: 6537.499023, avg loss: 3.706065, ppl: 40.693371 +epoch: 0, batch: 10209, sum loss: 6629.875000, avg loss: 3.779860, ppl: 43.809917 +epoch: 0, batch: 10210, sum loss: 5389.571289, avg loss: 3.732390, ppl: 41.778839 +epoch: 0, batch: 10211, sum loss: 5807.590332, avg loss: 3.763831, ppl: 43.113262 +epoch: 0, batch: 10212, sum loss: 6268.956543, avg loss: 3.627868, ppl: 37.632515 +epoch: 0, batch: 10213, sum loss: 6882.532227, avg loss: 3.906091, ppl: 49.704288 +epoch: 0, batch: 10214, sum loss: 6715.679688, avg loss: 3.844121, ppl: 46.717602 +epoch: 0, batch: 10215, sum loss: 6384.766602, avg loss: 3.617432, ppl: 37.241795 +epoch: 0, batch: 10216, sum loss: 7849.769531, avg loss: 3.907302, ppl: 49.764500 +epoch: 0, batch: 10217, sum loss: 6535.078125, avg loss: 3.576945, ppl: 35.764107 +epoch: 0, batch: 10218, sum loss: 5591.851074, avg loss: 3.666788, ppl: 39.126015 +epoch: 0, batch: 10219, sum loss: 5772.104492, avg loss: 3.605312, ppl: 36.793167 +epoch: 0, batch: 10220, sum loss: 5779.310059, avg loss: 3.660108, ppl: 38.865524 +epoch: 0, batch: 10221, sum loss: 5115.099121, avg loss: 3.472573, ppl: 32.219524 +epoch: 0, batch: 10222, sum loss: 6375.049805, avg loss: 3.687131, ppl: 39.930130 +epoch: 0, batch: 10223, sum loss: 6584.487793, avg loss: 3.803864, ppl: 44.874222 +epoch: 0, batch: 10224, sum loss: 7391.988281, avg loss: 3.761826, ppl: 43.026924 +epoch: 0, batch: 10225, sum loss: 6711.233398, avg loss: 3.904150, ppl: 49.607895 +epoch: 0, batch: 10226, sum loss: 6903.415527, avg loss: 3.791003, ppl: 44.300789 +epoch: 0, batch: 10227, sum loss: 6571.472168, avg loss: 3.738039, ppl: 42.015507 +epoch: 0, batch: 10228, sum loss: 6005.818359, avg loss: 3.686813, ppl: 39.917423 +epoch: 0, batch: 10229, sum loss: 6113.794922, avg loss: 3.840323, ppl: 46.540527 +epoch: 0, batch: 10230, sum loss: 6048.687500, avg loss: 3.619801, ppl: 37.330151 +epoch: 0, batch: 10231, sum loss: 5957.640137, avg loss: 3.612880, ppl: 37.072685 +epoch: 0, batch: 10232, sum loss: 5953.457520, avg loss: 3.643487, ppl: 38.224895 +epoch: 0, batch: 10233, sum loss: 6813.483887, avg loss: 3.933882, ppl: 51.104996 +epoch: 0, batch: 10234, sum loss: 6140.893555, avg loss: 3.739887, ppl: 42.093212 +epoch: 0, batch: 10235, sum loss: 5714.429688, avg loss: 3.730046, ppl: 41.681007 +epoch: 0, batch: 10236, sum loss: 6655.013672, avg loss: 3.785559, ppl: 44.060310 +epoch: 0, batch: 10237, sum loss: 5070.305664, avg loss: 3.703657, ppl: 40.595478 +epoch: 0, batch: 10238, sum loss: 5434.945801, avg loss: 3.536074, ppl: 34.331871 +epoch: 0, batch: 10239, sum loss: 7008.086914, avg loss: 3.680718, ppl: 39.674877 +epoch: 0, batch: 10240, sum loss: 6663.659180, avg loss: 3.639355, ppl: 38.067291 +epoch: 0, batch: 10241, sum loss: 5936.052246, avg loss: 3.693872, ppl: 40.200199 +epoch: 0, batch: 10242, sum loss: 6566.154297, avg loss: 3.496355, ppl: 32.994961 +epoch: 0, batch: 10243, sum loss: 6398.509766, avg loss: 3.783862, ppl: 43.985569 +epoch: 0, batch: 10244, sum loss: 6521.590332, avg loss: 4.060766, ppl: 58.018723 +epoch: 0, batch: 10245, sum loss: 7096.234375, avg loss: 4.015979, ppl: 55.477570 +epoch: 0, batch: 10246, sum loss: 7878.750000, avg loss: 4.202000, ppl: 66.819847 +epoch: 0, batch: 10247, sum loss: 5636.925781, avg loss: 3.674658, ppl: 39.435181 +epoch: 0, batch: 10248, sum loss: 6048.932617, avg loss: 3.668243, ppl: 39.182999 +epoch: 0, batch: 10249, sum loss: 7332.326172, avg loss: 3.879538, ppl: 48.401840 +epoch: 0, batch: 10250, sum loss: 5402.375488, avg loss: 3.628190, ppl: 37.644630 +epoch: 0, batch: 10251, sum loss: 5829.204102, avg loss: 3.785197, ppl: 44.044369 +epoch: 0, batch: 10252, sum loss: 5440.345215, avg loss: 3.588618, ppl: 36.184044 +epoch: 0, batch: 10253, sum loss: 5859.655273, avg loss: 3.559937, ppl: 35.160965 +epoch: 0, batch: 10254, sum loss: 6507.940918, avg loss: 3.832710, ppl: 46.187527 +epoch: 0, batch: 10255, sum loss: 5165.608398, avg loss: 3.448337, ppl: 31.448038 +epoch: 0, batch: 10256, sum loss: 7336.092285, avg loss: 3.923044, ppl: 50.554096 +epoch: 0, batch: 10257, sum loss: 5614.813477, avg loss: 3.596934, ppl: 36.486191 +epoch: 0, batch: 10258, sum loss: 6277.342285, avg loss: 4.081497, ppl: 59.234058 +epoch: 0, batch: 10259, sum loss: 6344.386230, avg loss: 3.606814, ppl: 36.848480 +epoch: 0, batch: 10260, sum loss: 6813.707520, avg loss: 3.817203, ppl: 45.476845 +epoch: 0, batch: 10261, sum loss: 6484.691895, avg loss: 3.823521, ppl: 45.765072 +epoch: 0, batch: 10262, sum loss: 5737.774902, avg loss: 3.815010, ppl: 45.377201 +epoch: 0, batch: 10263, sum loss: 5179.559082, avg loss: 3.720948, ppl: 41.303513 +epoch: 0, batch: 10264, sum loss: 5959.466309, avg loss: 3.555768, ppl: 35.014687 +epoch: 0, batch: 10265, sum loss: 5490.814941, avg loss: 3.584083, ppl: 36.020306 +epoch: 0, batch: 10266, sum loss: 6040.937500, avg loss: 3.623838, ppl: 37.481133 +epoch: 0, batch: 10267, sum loss: 6563.304199, avg loss: 3.681046, ppl: 39.687878 +epoch: 0, batch: 10268, sum loss: 5696.295898, avg loss: 3.372585, ppl: 29.153793 +epoch: 0, batch: 10269, sum loss: 6516.213379, avg loss: 3.493948, ppl: 32.915649 +epoch: 0, batch: 10270, sum loss: 6639.789551, avg loss: 3.703173, ppl: 40.575863 +epoch: 0, batch: 10271, sum loss: 6750.638672, avg loss: 3.723463, ppl: 41.407551 +epoch: 0, batch: 10272, sum loss: 6647.604004, avg loss: 3.768483, ppl: 43.314316 +epoch: 0, batch: 10273, sum loss: 6159.062988, avg loss: 3.537658, ppl: 34.386299 +epoch: 0, batch: 10274, sum loss: 5843.413086, avg loss: 3.344827, ppl: 28.355675 +epoch: 0, batch: 10275, sum loss: 6805.127930, avg loss: 3.732928, ppl: 41.801334 +epoch: 0, batch: 10276, sum loss: 6332.262695, avg loss: 3.753564, ppl: 42.672913 +epoch: 0, batch: 10277, sum loss: 6490.719727, avg loss: 3.706865, ppl: 40.725914 +epoch: 0, batch: 10278, sum loss: 7110.753906, avg loss: 3.772283, ppl: 43.479229 +epoch: 0, batch: 10279, sum loss: 6168.402344, avg loss: 3.691444, ppl: 40.102711 +epoch: 0, batch: 10280, sum loss: 7012.407227, avg loss: 4.030119, ppl: 56.267605 +epoch: 0, batch: 10281, sum loss: 7054.641602, avg loss: 3.788744, ppl: 44.200859 +epoch: 0, batch: 10282, sum loss: 5348.576660, avg loss: 3.504965, ppl: 33.280281 +epoch: 0, batch: 10283, sum loss: 5981.422363, avg loss: 3.644986, ppl: 38.282242 +epoch: 0, batch: 10284, sum loss: 5596.090820, avg loss: 3.365058, ppl: 28.935167 +epoch: 0, batch: 10285, sum loss: 6910.461426, avg loss: 4.020048, ppl: 55.703762 +epoch: 0, batch: 10286, sum loss: 4911.537598, avg loss: 3.377949, ppl: 29.310587 +epoch: 0, batch: 10287, sum loss: 7033.933594, avg loss: 3.757443, ppl: 42.838764 +epoch: 0, batch: 10288, sum loss: 5546.700195, avg loss: 3.634797, ppl: 37.894165 +epoch: 0, batch: 10289, sum loss: 6586.549316, avg loss: 3.665303, ppl: 39.067970 +epoch: 0, batch: 10290, sum loss: 5668.200195, avg loss: 3.822118, ppl: 45.700882 +epoch: 0, batch: 10291, sum loss: 5616.748047, avg loss: 3.530326, ppl: 34.135082 +epoch: 0, batch: 10292, sum loss: 5620.299805, avg loss: 3.675801, ppl: 39.480278 +epoch: 0, batch: 10293, sum loss: 5503.282715, avg loss: 3.465544, ppl: 31.993843 +epoch: 0, batch: 10294, sum loss: 5705.428711, avg loss: 3.606466, ppl: 36.835629 +epoch: 0, batch: 10295, sum loss: 5779.240234, avg loss: 3.350284, ppl: 28.510839 +epoch: 0, batch: 10296, sum loss: 6557.663086, avg loss: 3.962334, ppl: 52.579926 +epoch: 0, batch: 10297, sum loss: 6812.247559, avg loss: 3.652680, ppl: 38.577904 +epoch: 0, batch: 10298, sum loss: 6330.890625, avg loss: 3.721864, ppl: 41.341373 +epoch: 0, batch: 10299, sum loss: 6789.559570, avg loss: 3.881967, ppl: 48.519539 +epoch: 0, batch: 10300, sum loss: 6108.811035, avg loss: 3.839605, ppl: 46.507084 +epoch: 0, batch: 10301, sum loss: 6093.301270, avg loss: 3.540559, ppl: 34.486176 +epoch: 0, batch: 10302, sum loss: 5694.599121, avg loss: 3.491477, ppl: 32.834408 +epoch: 0, batch: 10303, sum loss: 6275.888184, avg loss: 3.838464, ppl: 46.454044 +epoch: 0, batch: 10304, sum loss: 7903.140137, avg loss: 4.088536, ppl: 59.652512 +epoch: 0, batch: 10305, sum loss: 5546.992188, avg loss: 3.484292, ppl: 32.599323 +epoch: 0, batch: 10306, sum loss: 7062.951172, avg loss: 3.861647, ppl: 47.543575 +epoch: 0, batch: 10307, sum loss: 6004.290039, avg loss: 3.681355, ppl: 39.700150 +epoch: 0, batch: 10308, sum loss: 6481.391602, avg loss: 3.790287, ppl: 44.269127 +epoch: 0, batch: 10309, sum loss: 6159.922363, avg loss: 3.735550, ppl: 41.911076 +epoch: 0, batch: 10310, sum loss: 7090.105957, avg loss: 3.906395, ppl: 49.719376 +epoch: 0, batch: 10311, sum loss: 6389.721191, avg loss: 3.691347, ppl: 40.098808 +epoch: 0, batch: 10312, sum loss: 5665.708008, avg loss: 3.527838, ppl: 34.050270 +epoch: 0, batch: 10313, sum loss: 6177.791992, avg loss: 3.753215, ppl: 42.658020 +epoch: 0, batch: 10314, sum loss: 7466.557617, avg loss: 3.737016, ppl: 41.972546 +epoch: 0, batch: 10315, sum loss: 5760.696289, avg loss: 3.553792, ppl: 34.945564 +epoch: 0, batch: 10316, sum loss: 7093.050293, avg loss: 3.927492, ppl: 50.779457 +epoch: 0, batch: 10317, sum loss: 6197.686523, avg loss: 3.695698, ppl: 40.273693 +epoch: 0, batch: 10318, sum loss: 6081.748535, avg loss: 3.801093, ppl: 44.750065 +epoch: 0, batch: 10319, sum loss: 6667.453125, avg loss: 3.700030, ppl: 40.448502 +epoch: 0, batch: 10320, sum loss: 5408.100586, avg loss: 3.451245, ppl: 31.539637 +epoch: 0, batch: 10321, sum loss: 5794.521973, avg loss: 3.572455, ppl: 35.603889 +epoch: 0, batch: 10322, sum loss: 4834.369629, avg loss: 3.433501, ppl: 30.984940 +epoch: 0, batch: 10323, sum loss: 6480.041504, avg loss: 3.636387, ppl: 37.954464 +epoch: 0, batch: 10324, sum loss: 7673.677734, avg loss: 3.846455, ppl: 46.826759 +epoch: 0, batch: 10325, sum loss: 4901.937500, avg loss: 3.378317, ppl: 29.321371 +epoch: 0, batch: 10326, sum loss: 5330.853516, avg loss: 3.376095, ppl: 29.256290 +epoch: 0, batch: 10327, sum loss: 5889.374023, avg loss: 3.872041, ppl: 48.040337 +epoch: 0, batch: 10328, sum loss: 5196.846680, avg loss: 3.598924, ppl: 36.558884 +epoch: 0, batch: 10329, sum loss: 5226.992188, avg loss: 3.536531, ppl: 34.347553 +epoch: 0, batch: 10330, sum loss: 6263.657227, avg loss: 3.351341, ppl: 28.540989 +epoch: 0, batch: 10331, sum loss: 5169.558594, avg loss: 3.572604, ppl: 35.609215 +epoch: 0, batch: 10332, sum loss: 5847.995605, avg loss: 3.758352, ppl: 42.877697 +epoch: 0, batch: 10333, sum loss: 6904.073242, avg loss: 3.874340, ppl: 48.150898 +epoch: 0, batch: 10334, sum loss: 5634.990234, avg loss: 3.640175, ppl: 38.098488 +epoch: 0, batch: 10335, sum loss: 6817.566895, avg loss: 3.891305, ppl: 48.974777 +epoch: 0, batch: 10336, sum loss: 6585.545898, avg loss: 3.733303, ppl: 41.816994 +epoch: 0, batch: 10337, sum loss: 6417.626465, avg loss: 3.669312, ppl: 39.224899 +epoch: 0, batch: 10338, sum loss: 6705.236328, avg loss: 3.708648, ppl: 40.798626 +epoch: 0, batch: 10339, sum loss: 6032.895996, avg loss: 3.582480, ppl: 35.962616 +epoch: 0, batch: 10340, sum loss: 6392.989258, avg loss: 3.716854, ppl: 41.134785 +epoch: 0, batch: 10341, sum loss: 5870.366211, avg loss: 3.755832, ppl: 42.769810 +epoch: 0, batch: 10342, sum loss: 6518.050293, avg loss: 3.811725, ppl: 45.228409 +epoch: 0, batch: 10343, sum loss: 5756.615723, avg loss: 3.645735, ppl: 38.310921 +epoch: 0, batch: 10344, sum loss: 6462.696777, avg loss: 3.926304, ppl: 50.719189 +epoch: 0, batch: 10345, sum loss: 6118.804199, avg loss: 3.703877, ppl: 40.604412 +epoch: 0, batch: 10346, sum loss: 6311.947266, avg loss: 3.862881, ppl: 47.602295 +epoch: 0, batch: 10347, sum loss: 6249.735840, avg loss: 3.631456, ppl: 37.767761 +epoch: 0, batch: 10348, sum loss: 7099.360840, avg loss: 3.734540, ppl: 41.868771 +epoch: 0, batch: 10349, sum loss: 7108.508789, avg loss: 3.651006, ppl: 38.513401 +epoch: 0, batch: 10350, sum loss: 6651.652344, avg loss: 3.796605, ppl: 44.549698 +epoch: 0, batch: 10351, sum loss: 5353.033691, avg loss: 3.566312, ppl: 35.385834 +epoch: 0, batch: 10352, sum loss: 5941.008301, avg loss: 3.850297, ppl: 47.007030 +epoch: 0, batch: 10353, sum loss: 5527.806641, avg loss: 3.639109, ppl: 38.057907 +epoch: 0, batch: 10354, sum loss: 5840.218750, avg loss: 3.453707, ppl: 31.617388 +epoch: 0, batch: 10355, sum loss: 6459.126953, avg loss: 3.576482, ppl: 35.747570 +epoch: 0, batch: 10356, sum loss: 6660.470215, avg loss: 3.836676, ppl: 46.371098 +epoch: 0, batch: 10357, sum loss: 7110.872559, avg loss: 3.800573, ppl: 44.726822 +epoch: 0, batch: 10358, sum loss: 5678.953613, avg loss: 3.626407, ppl: 37.577564 +epoch: 0, batch: 10359, sum loss: 6526.496582, avg loss: 3.710345, ppl: 40.867905 +epoch: 0, batch: 10360, sum loss: 6486.873535, avg loss: 4.014154, ppl: 55.376450 +epoch: 0, batch: 10361, sum loss: 6886.690918, avg loss: 3.767336, ppl: 43.264671 +epoch: 0, batch: 10362, sum loss: 5568.695801, avg loss: 3.848442, ppl: 46.919910 +epoch: 0, batch: 10363, sum loss: 6536.505371, avg loss: 3.682538, ppl: 39.747147 +epoch: 0, batch: 10364, sum loss: 5069.862305, avg loss: 3.650009, ppl: 38.474998 +epoch: 0, batch: 10365, sum loss: 5268.192871, avg loss: 3.502788, ppl: 33.207893 +epoch: 0, batch: 10366, sum loss: 6041.875488, avg loss: 3.472342, ppl: 32.212105 +epoch: 0, batch: 10367, sum loss: 6021.345215, avg loss: 3.662619, ppl: 38.963242 +epoch: 0, batch: 10368, sum loss: 6417.130371, avg loss: 3.735233, ppl: 41.897789 +epoch: 0, batch: 10369, sum loss: 7192.511719, avg loss: 3.711307, ppl: 40.907219 +epoch: 0, batch: 10370, sum loss: 6729.753906, avg loss: 3.720152, ppl: 41.270653 +epoch: 0, batch: 10371, sum loss: 6325.595703, avg loss: 3.541767, ppl: 34.527882 +epoch: 0, batch: 10372, sum loss: 6017.761719, avg loss: 3.818377, ppl: 45.530231 +epoch: 0, batch: 10373, sum loss: 7387.503418, avg loss: 3.845655, ppl: 46.789330 +epoch: 0, batch: 10374, sum loss: 7296.077148, avg loss: 3.733919, ppl: 41.842754 +epoch: 0, batch: 10375, sum loss: 5818.940430, avg loss: 3.437059, ppl: 31.095369 +epoch: 0, batch: 10376, sum loss: 4911.901855, avg loss: 3.481149, ppl: 32.497055 +epoch: 0, batch: 10377, sum loss: 6601.997070, avg loss: 3.688267, ppl: 39.975491 +epoch: 0, batch: 10378, sum loss: 6276.858887, avg loss: 3.801853, ppl: 44.784088 +epoch: 0, batch: 10379, sum loss: 5851.583008, avg loss: 3.578950, ppl: 35.835884 +epoch: 0, batch: 10380, sum loss: 6053.930664, avg loss: 3.797949, ppl: 44.609589 +epoch: 0, batch: 10381, sum loss: 5000.679688, avg loss: 3.618437, ppl: 37.279247 +epoch: 0, batch: 10382, sum loss: 6597.467773, avg loss: 3.844678, ppl: 46.743637 +epoch: 0, batch: 10383, sum loss: 6179.851562, avg loss: 3.553681, ppl: 34.941715 +epoch: 0, batch: 10384, sum loss: 6477.839844, avg loss: 3.799320, ppl: 44.670788 +epoch: 0, batch: 10385, sum loss: 6007.281250, avg loss: 3.715078, ppl: 41.061794 +epoch: 0, batch: 10386, sum loss: 6407.630371, avg loss: 3.888125, ppl: 48.819275 +epoch: 0, batch: 10387, sum loss: 5634.179688, avg loss: 3.649080, ppl: 38.439297 +epoch: 0, batch: 10388, sum loss: 6067.113770, avg loss: 3.400849, ppl: 29.989540 +epoch: 0, batch: 10389, sum loss: 6988.231445, avg loss: 3.789713, ppl: 44.243717 +epoch: 0, batch: 10390, sum loss: 7061.372070, avg loss: 3.738154, ppl: 42.020336 +epoch: 0, batch: 10391, sum loss: 6756.938965, avg loss: 3.521073, ppl: 33.820694 +epoch: 0, batch: 10392, sum loss: 5286.209473, avg loss: 3.395125, ppl: 29.818378 +epoch: 0, batch: 10393, sum loss: 7426.621582, avg loss: 3.878131, ppl: 48.333813 +epoch: 0, batch: 10394, sum loss: 6963.228516, avg loss: 3.743671, ppl: 42.252834 +epoch: 0, batch: 10395, sum loss: 5671.003906, avg loss: 3.430734, ppl: 30.899319 +epoch: 0, batch: 10396, sum loss: 6270.838867, avg loss: 3.732642, ppl: 41.789387 +epoch: 0, batch: 10397, sum loss: 6953.346191, avg loss: 3.803800, ppl: 44.871365 +epoch: 0, batch: 10398, sum loss: 6475.159180, avg loss: 3.693759, ppl: 40.195656 +epoch: 0, batch: 10399, sum loss: 6581.695801, avg loss: 3.806649, ppl: 44.999393 +epoch: 0, batch: 10400, sum loss: 5705.344238, avg loss: 3.519645, ppl: 33.772427 +epoch: 0, batch: 10401, sum loss: 5412.899414, avg loss: 3.699863, ppl: 40.441761 +epoch: 0, batch: 10402, sum loss: 6846.596680, avg loss: 3.649572, ppl: 38.458199 +epoch: 0, batch: 10403, sum loss: 6036.453613, avg loss: 3.789362, ppl: 44.228172 +epoch: 0, batch: 10404, sum loss: 6521.443359, avg loss: 4.081003, ppl: 59.204834 +epoch: 0, batch: 10405, sum loss: 6353.550781, avg loss: 3.709020, ppl: 40.813774 +epoch: 0, batch: 10406, sum loss: 5770.216309, avg loss: 3.652036, ppl: 38.553070 +epoch: 0, batch: 10407, sum loss: 4989.206543, avg loss: 3.530932, ppl: 34.155777 +epoch: 0, batch: 10408, sum loss: 5590.531250, avg loss: 3.461629, ppl: 31.868860 +epoch: 0, batch: 10409, sum loss: 6417.577637, avg loss: 4.056623, ppl: 57.778862 +epoch: 0, batch: 10410, sum loss: 6588.106445, avg loss: 3.694956, ppl: 40.243805 +epoch: 0, batch: 10411, sum loss: 6084.465332, avg loss: 3.719111, ppl: 41.227726 +epoch: 0, batch: 10412, sum loss: 5653.594727, avg loss: 3.671165, ppl: 39.297668 +epoch: 0, batch: 10413, sum loss: 6011.584473, avg loss: 3.759590, ppl: 42.930817 +epoch: 0, batch: 10414, sum loss: 5900.380859, avg loss: 3.789583, ppl: 44.237957 +epoch: 0, batch: 10415, sum loss: 6998.409668, avg loss: 3.776800, ppl: 43.676041 +epoch: 0, batch: 10416, sum loss: 5688.442871, avg loss: 3.698597, ppl: 40.390614 +epoch: 0, batch: 10417, sum loss: 6400.187988, avg loss: 3.650992, ppl: 38.512840 +epoch: 0, batch: 10418, sum loss: 6266.196289, avg loss: 3.666587, ppl: 39.118153 +epoch: 0, batch: 10419, sum loss: 6181.842285, avg loss: 3.470995, ppl: 32.168739 +epoch: 0, batch: 10420, sum loss: 6004.663574, avg loss: 3.851612, ppl: 47.068893 +epoch: 0, batch: 10421, sum loss: 5342.475586, avg loss: 3.573562, ppl: 35.643333 +epoch: 0, batch: 10422, sum loss: 5402.103516, avg loss: 3.535408, ppl: 34.309010 +epoch: 0, batch: 10423, sum loss: 6587.105469, avg loss: 3.631260, ppl: 37.760361 +epoch: 0, batch: 10424, sum loss: 5165.573242, avg loss: 3.469156, ppl: 32.109631 +epoch: 0, batch: 10425, sum loss: 7269.377441, avg loss: 3.762618, ppl: 43.061005 +epoch: 0, batch: 10426, sum loss: 6147.729004, avg loss: 3.694549, ppl: 40.227421 +epoch: 0, batch: 10427, sum loss: 5971.872559, avg loss: 3.986564, ppl: 53.869457 +epoch: 0, batch: 10428, sum loss: 6773.646973, avg loss: 3.723830, ppl: 41.422749 +epoch: 0, batch: 10429, sum loss: 5958.285645, avg loss: 3.783038, ppl: 43.949375 +epoch: 0, batch: 10430, sum loss: 5821.963379, avg loss: 3.652424, ppl: 38.568027 +epoch: 0, batch: 10431, sum loss: 5154.365723, avg loss: 3.463955, ppl: 31.943075 +epoch: 0, batch: 10432, sum loss: 7205.510742, avg loss: 3.659477, ppl: 38.841034 +epoch: 0, batch: 10433, sum loss: 7224.391113, avg loss: 3.806318, ppl: 44.984501 +epoch: 0, batch: 10434, sum loss: 7283.967285, avg loss: 3.884783, ppl: 48.656361 +epoch: 0, batch: 10435, sum loss: 8257.849609, avg loss: 3.953016, ppl: 52.092216 +epoch: 0, batch: 10436, sum loss: 6128.668457, avg loss: 3.620005, ppl: 37.337761 +epoch: 0, batch: 10437, sum loss: 7200.326660, avg loss: 3.900502, ppl: 49.427254 +epoch: 0, batch: 10438, sum loss: 4783.313477, avg loss: 3.433821, ppl: 30.994862 +epoch: 0, batch: 10439, sum loss: 6237.802246, avg loss: 3.616117, ppl: 37.192886 +epoch: 0, batch: 10440, sum loss: 5806.293945, avg loss: 3.644880, ppl: 38.278183 +epoch: 0, batch: 10441, sum loss: 6340.309082, avg loss: 3.810282, ppl: 45.163174 +epoch: 0, batch: 10442, sum loss: 6288.515137, avg loss: 3.641294, ppl: 38.141171 +epoch: 0, batch: 10443, sum loss: 6554.089355, avg loss: 3.538925, ppl: 34.429886 +epoch: 0, batch: 10444, sum loss: 6654.620605, avg loss: 3.835516, ppl: 46.317329 +epoch: 0, batch: 10445, sum loss: 7803.273926, avg loss: 3.634501, ppl: 37.882954 +epoch: 0, batch: 10446, sum loss: 6114.118652, avg loss: 3.680987, ppl: 39.685532 +epoch: 0, batch: 10447, sum loss: 7181.333496, avg loss: 3.928519, ppl: 50.831665 +epoch: 0, batch: 10448, sum loss: 5370.041992, avg loss: 3.675594, ppl: 39.472080 +epoch: 0, batch: 10449, sum loss: 6012.797852, avg loss: 3.652976, ppl: 38.589329 +epoch: 0, batch: 10450, sum loss: 5803.375000, avg loss: 3.677677, ppl: 39.554417 +epoch: 0, batch: 10451, sum loss: 5294.938965, avg loss: 3.541765, ppl: 34.527813 +epoch: 0, batch: 10452, sum loss: 5306.948242, avg loss: 3.590628, ppl: 36.256844 +epoch: 0, batch: 10453, sum loss: 6913.071289, avg loss: 3.885931, ppl: 48.712273 +epoch: 0, batch: 10454, sum loss: 6135.363281, avg loss: 3.676072, ppl: 39.490955 +epoch: 0, batch: 10455, sum loss: 5831.559570, avg loss: 3.721481, ppl: 41.325535 +epoch: 0, batch: 10456, sum loss: 5577.171875, avg loss: 3.503249, ppl: 33.223209 +epoch: 0, batch: 10457, sum loss: 4308.668457, avg loss: 3.322026, ppl: 27.716461 +epoch: 0, batch: 10458, sum loss: 5569.519531, avg loss: 3.522783, ppl: 33.878567 +epoch: 0, batch: 10459, sum loss: 5942.213379, avg loss: 3.638832, ppl: 38.047382 +epoch: 0, batch: 10460, sum loss: 5952.887695, avg loss: 3.720555, ppl: 41.287296 +epoch: 0, batch: 10461, sum loss: 6441.846680, avg loss: 3.944793, ppl: 51.665630 +epoch: 0, batch: 10462, sum loss: 6497.901367, avg loss: 3.537235, ppl: 34.371765 +epoch: 0, batch: 10463, sum loss: 5963.584961, avg loss: 3.708697, ppl: 40.800613 +epoch: 0, batch: 10464, sum loss: 7412.618164, avg loss: 3.887057, ppl: 48.767170 +epoch: 0, batch: 10465, sum loss: 6267.077148, avg loss: 3.564890, ppl: 35.335571 +epoch: 0, batch: 10466, sum loss: 5311.581543, avg loss: 3.693728, ppl: 40.194431 +epoch: 0, batch: 10467, sum loss: 6565.931641, avg loss: 3.664025, ppl: 39.018059 +epoch: 0, batch: 10468, sum loss: 6380.753418, avg loss: 3.771131, ppl: 43.429146 +epoch: 0, batch: 10469, sum loss: 7894.608398, avg loss: 3.775518, ppl: 43.620106 +epoch: 0, batch: 10470, sum loss: 5006.771484, avg loss: 3.472102, ppl: 32.204376 +epoch: 0, batch: 10471, sum loss: 6687.030273, avg loss: 3.883293, ppl: 48.583923 +epoch: 0, batch: 10472, sum loss: 6242.940430, avg loss: 3.687502, ppl: 39.944935 +epoch: 0, batch: 10473, sum loss: 6221.989258, avg loss: 3.557455, ppl: 35.073830 +epoch: 0, batch: 10474, sum loss: 6228.883301, avg loss: 3.770511, ppl: 43.402222 +epoch: 0, batch: 10475, sum loss: 5577.566895, avg loss: 3.488159, ppl: 32.725658 +epoch: 0, batch: 10476, sum loss: 7138.042480, avg loss: 3.823269, ppl: 45.753517 +epoch: 0, batch: 10477, sum loss: 5502.539062, avg loss: 3.575399, ppl: 35.708862 +epoch: 0, batch: 10478, sum loss: 7050.529297, avg loss: 3.748288, ppl: 42.448334 +epoch: 0, batch: 10479, sum loss: 5655.753906, avg loss: 3.526031, ppl: 33.988808 +epoch: 0, batch: 10480, sum loss: 6235.651367, avg loss: 3.420544, ppl: 30.586046 +epoch: 0, batch: 10481, sum loss: 7927.567383, avg loss: 3.649893, ppl: 38.470543 +epoch: 0, batch: 10482, sum loss: 6564.710449, avg loss: 3.657220, ppl: 38.753471 +epoch: 0, batch: 10483, sum loss: 6646.157715, avg loss: 3.635754, ppl: 37.930428 +epoch: 0, batch: 10484, sum loss: 5855.283203, avg loss: 3.842049, ppl: 46.620918 +epoch: 0, batch: 10485, sum loss: 5550.556641, avg loss: 3.702840, ppl: 40.562332 +epoch: 0, batch: 10486, sum loss: 5308.150879, avg loss: 3.761978, ppl: 43.033459 +epoch: 0, batch: 10487, sum loss: 5798.891113, avg loss: 3.658606, ppl: 38.807220 +epoch: 0, batch: 10488, sum loss: 6117.627441, avg loss: 3.558829, ppl: 35.122055 +epoch: 0, batch: 10489, sum loss: 6215.099609, avg loss: 3.936099, ppl: 51.218388 +epoch: 0, batch: 10490, sum loss: 5829.109863, avg loss: 3.379194, ppl: 29.347115 +epoch: 0, batch: 10491, sum loss: 6201.700195, avg loss: 3.844824, ppl: 46.750439 +epoch: 0, batch: 10492, sum loss: 5954.629395, avg loss: 3.595791, ppl: 36.444504 +epoch: 0, batch: 10493, sum loss: 5149.352539, avg loss: 3.744984, ppl: 42.308315 +epoch: 0, batch: 10494, sum loss: 6300.021973, avg loss: 3.509762, ppl: 33.440292 +epoch: 0, batch: 10495, sum loss: 4817.772461, avg loss: 3.275168, ppl: 26.447674 +epoch: 0, batch: 10496, sum loss: 4910.959473, avg loss: 3.460860, ppl: 31.844336 +epoch: 0, batch: 10497, sum loss: 7271.452148, avg loss: 4.082792, ppl: 59.310822 +epoch: 0, batch: 10498, sum loss: 6368.545898, avg loss: 3.852720, ppl: 47.121048 +epoch: 0, batch: 10499, sum loss: 4734.923828, avg loss: 3.411328, ppl: 30.305473 +epoch: 0, batch: 10500, sum loss: 6525.405273, avg loss: 3.605196, ppl: 36.788910 +epoch: 0, batch: 10501, sum loss: 5983.819824, avg loss: 3.456857, ppl: 31.717138 +epoch: 0, batch: 10502, sum loss: 6831.895508, avg loss: 3.626272, ppl: 37.572475 +epoch: 0, batch: 10503, sum loss: 6864.345703, avg loss: 4.117784, ppl: 61.422951 +epoch: 0, batch: 10504, sum loss: 7206.265625, avg loss: 3.920710, ppl: 50.436256 +epoch: 0, batch: 10505, sum loss: 7116.498047, avg loss: 3.821965, ppl: 45.693897 +epoch: 0, batch: 10506, sum loss: 5944.454590, avg loss: 3.685341, ppl: 39.858707 +epoch: 0, batch: 10507, sum loss: 4825.766113, avg loss: 3.289547, ppl: 26.830711 +epoch: 0, batch: 10508, sum loss: 6127.115723, avg loss: 3.789187, ppl: 44.220455 +epoch: 0, batch: 10509, sum loss: 5641.489258, avg loss: 3.721299, ppl: 41.318039 +epoch: 0, batch: 10510, sum loss: 7234.587402, avg loss: 3.883300, ppl: 48.584259 +epoch: 0, batch: 10511, sum loss: 6231.221191, avg loss: 3.776498, ppl: 43.662849 +epoch: 0, batch: 10512, sum loss: 6876.512695, avg loss: 3.638366, ppl: 38.029663 +epoch: 0, batch: 10513, sum loss: 6743.983887, avg loss: 3.990523, ppl: 54.083172 +epoch: 0, batch: 10514, sum loss: 5632.579590, avg loss: 3.417827, ppl: 30.503071 +epoch: 0, batch: 10515, sum loss: 4750.203613, avg loss: 3.319499, ppl: 27.646503 +epoch: 0, batch: 10516, sum loss: 6032.909668, avg loss: 3.489248, ppl: 32.761303 +epoch: 0, batch: 10517, sum loss: 5791.908203, avg loss: 3.510247, ppl: 33.456547 +epoch: 0, batch: 10518, sum loss: 6284.420898, avg loss: 3.749655, ppl: 42.506393 +epoch: 0, batch: 10519, sum loss: 6193.258301, avg loss: 3.649534, ppl: 38.456722 +epoch: 0, batch: 10520, sum loss: 7027.806641, avg loss: 3.804985, ppl: 44.924580 +epoch: 0, batch: 10521, sum loss: 6536.481934, avg loss: 3.737268, ppl: 41.983135 +epoch: 0, batch: 10522, sum loss: 6679.321289, avg loss: 3.727300, ppl: 41.566723 +epoch: 0, batch: 10523, sum loss: 6590.214355, avg loss: 4.116312, ppl: 61.332603 +epoch: 0, batch: 10524, sum loss: 6489.397461, avg loss: 3.766336, ppl: 43.221432 +epoch: 0, batch: 10525, sum loss: 6034.323242, avg loss: 3.596140, ppl: 36.457241 +epoch: 0, batch: 10526, sum loss: 5718.730469, avg loss: 3.711052, ppl: 40.896793 +epoch: 0, batch: 10527, sum loss: 6662.039551, avg loss: 3.907355, ppl: 49.767136 +epoch: 0, batch: 10528, sum loss: 5656.688477, avg loss: 3.511290, ppl: 33.491455 +epoch: 0, batch: 10529, sum loss: 6003.601562, avg loss: 3.826387, ppl: 45.896423 +epoch: 0, batch: 10530, sum loss: 6203.581055, avg loss: 3.550991, ppl: 34.847828 +epoch: 0, batch: 10531, sum loss: 6293.815918, avg loss: 3.518064, ppl: 33.719070 +epoch: 0, batch: 10532, sum loss: 6206.050781, avg loss: 4.009077, ppl: 55.095997 +epoch: 0, batch: 10533, sum loss: 6629.474121, avg loss: 3.743351, ppl: 42.239285 +epoch: 0, batch: 10534, sum loss: 5467.732910, avg loss: 3.467174, ppl: 32.046047 +epoch: 0, batch: 10535, sum loss: 5930.537109, avg loss: 3.695039, ppl: 40.247124 +epoch: 0, batch: 10536, sum loss: 5446.131348, avg loss: 3.625920, ppl: 37.559246 +epoch: 0, batch: 10537, sum loss: 6478.238281, avg loss: 3.744646, ppl: 42.294044 +epoch: 0, batch: 10538, sum loss: 6186.365723, avg loss: 3.630496, ppl: 37.731537 +epoch: 0, batch: 10539, sum loss: 5185.627930, avg loss: 3.429648, ppl: 30.865788 +epoch: 0, batch: 10540, sum loss: 6775.945312, avg loss: 3.747757, ppl: 42.425831 +epoch: 0, batch: 10541, sum loss: 6429.166504, avg loss: 3.575732, ppl: 35.720760 +epoch: 0, batch: 10542, sum loss: 6019.039551, avg loss: 3.582762, ppl: 35.972752 +epoch: 0, batch: 10543, sum loss: 5601.073242, avg loss: 3.576675, ppl: 35.754456 +epoch: 0, batch: 10544, sum loss: 6790.103516, avg loss: 3.909098, ppl: 49.853981 +epoch: 0, batch: 10545, sum loss: 5779.596680, avg loss: 3.583135, ppl: 35.986176 +epoch: 0, batch: 10546, sum loss: 5923.417969, avg loss: 3.596489, ppl: 36.469971 +epoch: 0, batch: 10547, sum loss: 6780.648438, avg loss: 3.631842, ppl: 37.782333 +epoch: 0, batch: 10548, sum loss: 5518.557129, avg loss: 3.609259, ppl: 36.938675 +epoch: 0, batch: 10549, sum loss: 5206.088379, avg loss: 3.723955, ppl: 41.427906 +epoch: 0, batch: 10550, sum loss: 5679.541016, avg loss: 3.549713, ppl: 34.803333 +epoch: 0, batch: 10551, sum loss: 6061.753418, avg loss: 3.786230, ppl: 44.089851 +epoch: 0, batch: 10552, sum loss: 5516.331055, avg loss: 3.665336, ppl: 39.069275 +epoch: 0, batch: 10553, sum loss: 4930.522461, avg loss: 3.499306, ppl: 33.092484 +epoch: 0, batch: 10554, sum loss: 6924.723145, avg loss: 3.821591, ppl: 45.676830 +epoch: 0, batch: 10555, sum loss: 5349.160645, avg loss: 3.556623, ppl: 35.044643 +epoch: 0, batch: 10556, sum loss: 6295.914062, avg loss: 3.654042, ppl: 38.630486 +epoch: 0, batch: 10557, sum loss: 6369.196289, avg loss: 3.647879, ppl: 38.393143 +epoch: 0, batch: 10558, sum loss: 6112.679688, avg loss: 3.898393, ppl: 49.323109 +epoch: 0, batch: 10559, sum loss: 5709.568359, avg loss: 3.437428, ppl: 31.106855 +epoch: 0, batch: 10560, sum loss: 6311.350098, avg loss: 3.569768, ppl: 35.508362 +epoch: 0, batch: 10561, sum loss: 6574.815430, avg loss: 4.106693, ppl: 60.745518 +epoch: 0, batch: 10562, sum loss: 6431.692871, avg loss: 3.611282, ppl: 37.013470 +epoch: 0, batch: 10563, sum loss: 6334.390137, avg loss: 3.636274, ppl: 37.950184 +epoch: 0, batch: 10564, sum loss: 6088.278320, avg loss: 3.503037, ppl: 33.216175 +epoch: 0, batch: 10565, sum loss: 5125.397461, avg loss: 3.489038, ppl: 32.754429 +epoch: 0, batch: 10566, sum loss: 5433.791992, avg loss: 3.600923, ppl: 36.632015 +epoch: 0, batch: 10567, sum loss: 5884.208984, avg loss: 3.666174, ppl: 39.102005 +epoch: 0, batch: 10568, sum loss: 6041.750488, avg loss: 3.659449, ppl: 38.839951 +epoch: 0, batch: 10569, sum loss: 6282.541016, avg loss: 3.515692, ppl: 33.639191 +epoch: 0, batch: 10570, sum loss: 4629.723145, avg loss: 3.153762, ppl: 23.424028 +epoch: 0, batch: 10571, sum loss: 6239.937012, avg loss: 3.632094, ppl: 37.791859 +epoch: 0, batch: 10572, sum loss: 6151.316406, avg loss: 3.622684, ppl: 37.437901 +epoch: 0, batch: 10573, sum loss: 6592.998047, avg loss: 3.756694, ppl: 42.806667 +epoch: 0, batch: 10574, sum loss: 6314.219238, avg loss: 3.831444, ppl: 46.129089 +epoch: 0, batch: 10575, sum loss: 7557.877930, avg loss: 3.811335, ppl: 45.210770 +epoch: 0, batch: 10576, sum loss: 6354.859375, avg loss: 3.821322, ppl: 45.664558 +epoch: 0, batch: 10577, sum loss: 6668.192871, avg loss: 3.810396, ppl: 45.168320 +epoch: 0, batch: 10578, sum loss: 7380.774414, avg loss: 3.602135, ppl: 36.676453 +epoch: 0, batch: 10579, sum loss: 5433.775879, avg loss: 3.441277, ppl: 31.226795 +epoch: 0, batch: 10580, sum loss: 6350.962891, avg loss: 3.749093, ppl: 42.482513 +epoch: 0, batch: 10581, sum loss: 5733.108398, avg loss: 3.651661, ppl: 38.538639 +epoch: 0, batch: 10582, sum loss: 6617.071289, avg loss: 3.643762, ppl: 38.235394 +epoch: 0, batch: 10583, sum loss: 5565.390625, avg loss: 3.720181, ppl: 41.271862 +epoch: 0, batch: 10584, sum loss: 6519.524414, avg loss: 3.777245, ppl: 43.695477 +epoch: 0, batch: 10585, sum loss: 6528.773438, avg loss: 3.789190, ppl: 44.220547 +epoch: 0, batch: 10586, sum loss: 5014.648926, avg loss: 3.370060, ppl: 29.080263 +epoch: 0, batch: 10587, sum loss: 5557.780273, avg loss: 3.558118, ppl: 35.097084 +epoch: 0, batch: 10588, sum loss: 5540.500000, avg loss: 3.520013, ppl: 33.784855 +epoch: 0, batch: 10589, sum loss: 6499.572754, avg loss: 3.889631, ppl: 48.892830 +epoch: 0, batch: 10590, sum loss: 6522.635742, avg loss: 3.790026, ppl: 44.257568 +epoch: 0, batch: 10591, sum loss: 7083.848633, avg loss: 3.814673, ppl: 45.361931 +epoch: 0, batch: 10592, sum loss: 5526.963867, avg loss: 3.593605, ppl: 36.364922 +epoch: 0, batch: 10593, sum loss: 6362.850098, avg loss: 3.915600, ppl: 50.179173 +epoch: 0, batch: 10594, sum loss: 5193.015625, avg loss: 3.429997, ppl: 30.876556 +epoch: 0, batch: 10595, sum loss: 5947.049805, avg loss: 3.584720, ppl: 36.043251 +epoch: 0, batch: 10596, sum loss: 5597.320312, avg loss: 3.531432, ppl: 34.172882 +epoch: 0, batch: 10597, sum loss: 6577.266602, avg loss: 3.986222, ppl: 53.851070 +epoch: 0, batch: 10598, sum loss: 6461.241211, avg loss: 3.711224, ppl: 40.903847 +epoch: 0, batch: 10599, sum loss: 4764.265625, avg loss: 3.285701, ppl: 26.727701 +epoch: 0, batch: 10600, sum loss: 7210.126953, avg loss: 4.003402, ppl: 54.784195 +epoch: 0, batch: 10601, sum loss: 7610.943359, avg loss: 3.836161, ppl: 46.347202 +epoch: 0, batch: 10602, sum loss: 6755.995605, avg loss: 3.867198, ppl: 47.808250 +epoch: 0, batch: 10603, sum loss: 6375.558594, avg loss: 3.536083, ppl: 34.332191 +epoch: 0, batch: 10604, sum loss: 5911.393555, avg loss: 3.694621, ppl: 40.230316 +epoch: 0, batch: 10605, sum loss: 6481.533203, avg loss: 3.682689, ppl: 39.753155 +epoch: 0, batch: 10606, sum loss: 6834.927246, avg loss: 3.684597, ppl: 39.829067 +epoch: 0, batch: 10607, sum loss: 5650.254883, avg loss: 3.389475, ppl: 29.650377 +epoch: 0, batch: 10608, sum loss: 6540.557129, avg loss: 3.564337, ppl: 35.316013 +epoch: 0, batch: 10609, sum loss: 6224.910645, avg loss: 3.479548, ppl: 32.445038 +epoch: 0, batch: 10610, sum loss: 4596.813477, avg loss: 3.255534, ppl: 25.933451 +epoch: 0, batch: 10611, sum loss: 6102.964844, avg loss: 3.552366, ppl: 34.895782 +epoch: 0, batch: 10612, sum loss: 5754.171387, avg loss: 3.481047, ppl: 32.493736 +epoch: 0, batch: 10613, sum loss: 4959.318359, avg loss: 3.297419, ppl: 27.042753 +epoch: 0, batch: 10614, sum loss: 5602.383789, avg loss: 3.432833, ppl: 30.964254 +epoch: 0, batch: 10615, sum loss: 5111.571777, avg loss: 3.439819, ppl: 31.181324 +epoch: 0, batch: 10616, sum loss: 6687.261230, avg loss: 3.603050, ppl: 36.710037 +epoch: 0, batch: 10617, sum loss: 7030.020508, avg loss: 4.010280, ppl: 55.162323 +epoch: 0, batch: 10618, sum loss: 6627.010254, avg loss: 3.932944, ppl: 51.057060 +epoch: 0, batch: 10619, sum loss: 7246.554688, avg loss: 3.680322, ppl: 39.659180 +epoch: 0, batch: 10620, sum loss: 4666.807617, avg loss: 3.277253, ppl: 26.502861 +epoch: 0, batch: 10621, sum loss: 6581.734375, avg loss: 3.833276, ppl: 46.213676 +epoch: 0, batch: 10622, sum loss: 5626.839844, avg loss: 3.552298, ppl: 34.893406 +epoch: 0, batch: 10623, sum loss: 5839.788086, avg loss: 3.705449, ppl: 40.668308 +epoch: 0, batch: 10624, sum loss: 7238.550293, avg loss: 3.856447, ppl: 47.296993 +epoch: 0, batch: 10625, sum loss: 6142.658203, avg loss: 3.508086, ppl: 33.384308 +epoch: 0, batch: 10626, sum loss: 6207.367188, avg loss: 3.625798, ppl: 37.554699 +epoch: 0, batch: 10627, sum loss: 5993.482422, avg loss: 3.914750, ppl: 50.136551 +epoch: 0, batch: 10628, sum loss: 6061.375977, avg loss: 3.666894, ppl: 39.130177 +epoch: 0, batch: 10629, sum loss: 6028.446777, avg loss: 3.571355, ppl: 35.564747 +epoch: 0, batch: 10630, sum loss: 6757.789551, avg loss: 3.702899, ppl: 40.564713 +epoch: 0, batch: 10631, sum loss: 5771.651367, avg loss: 3.760033, ppl: 42.949860 +epoch: 0, batch: 10632, sum loss: 5407.222656, avg loss: 3.379514, ppl: 29.356501 +epoch: 0, batch: 10633, sum loss: 5211.838867, avg loss: 3.404206, ppl: 30.090389 +epoch: 0, batch: 10634, sum loss: 6876.187500, avg loss: 3.902490, ppl: 49.525620 +epoch: 0, batch: 10635, sum loss: 5893.718750, avg loss: 3.681273, ppl: 39.696915 +epoch: 0, batch: 10636, sum loss: 6588.318359, avg loss: 3.625932, ppl: 37.559711 +epoch: 0, batch: 10637, sum loss: 6058.492676, avg loss: 3.716867, ppl: 41.135303 +epoch: 0, batch: 10638, sum loss: 6062.196777, avg loss: 3.460158, ppl: 31.822008 +epoch: 0, batch: 10639, sum loss: 6826.950684, avg loss: 3.894439, ppl: 49.128471 +epoch: 0, batch: 10640, sum loss: 5876.234375, avg loss: 3.855797, ppl: 47.266266 +epoch: 0, batch: 10641, sum loss: 6721.447754, avg loss: 3.709408, ppl: 40.829639 +epoch: 0, batch: 10642, sum loss: 6865.667969, avg loss: 3.903166, ppl: 49.559086 +epoch: 0, batch: 10643, sum loss: 6337.434570, avg loss: 3.619323, ppl: 37.312309 +epoch: 0, batch: 10644, sum loss: 6204.773926, avg loss: 3.632771, ppl: 37.817482 +epoch: 0, batch: 10645, sum loss: 8071.065430, avg loss: 3.900950, ppl: 49.449402 +epoch: 0, batch: 10646, sum loss: 5951.405273, avg loss: 3.839617, ppl: 46.507637 +epoch: 0, batch: 10647, sum loss: 6531.578125, avg loss: 3.653008, ppl: 38.590572 +epoch: 0, batch: 10648, sum loss: 5560.831055, avg loss: 3.519513, ppl: 33.767990 +epoch: 0, batch: 10649, sum loss: 6064.131836, avg loss: 3.639935, ppl: 38.089363 +epoch: 0, batch: 10650, sum loss: 6022.302734, avg loss: 3.649880, ppl: 38.470066 +epoch: 0, batch: 10651, sum loss: 6229.331055, avg loss: 3.634382, ppl: 37.878445 +epoch: 0, batch: 10652, sum loss: 5827.584961, avg loss: 3.487484, ppl: 32.703552 +epoch: 0, batch: 10653, sum loss: 6733.165039, avg loss: 3.659329, ppl: 38.835262 +epoch: 0, batch: 10654, sum loss: 6002.273926, avg loss: 3.387288, ppl: 29.585604 +epoch: 0, batch: 10655, sum loss: 5455.731445, avg loss: 3.627481, ppl: 37.617935 +epoch: 0, batch: 10656, sum loss: 5788.891113, avg loss: 3.504172, ppl: 33.253883 +epoch: 0, batch: 10657, sum loss: 6764.895508, avg loss: 3.878954, ppl: 48.373573 +epoch: 0, batch: 10658, sum loss: 5686.036133, avg loss: 3.623987, ppl: 37.486736 +epoch: 0, batch: 10659, sum loss: 4892.473633, avg loss: 3.252974, ppl: 25.867161 +epoch: 0, batch: 10660, sum loss: 7400.675781, avg loss: 3.799115, ppl: 44.661640 +epoch: 0, batch: 10661, sum loss: 7211.097656, avg loss: 3.817415, ppl: 45.486496 +epoch: 0, batch: 10662, sum loss: 5164.077637, avg loss: 3.390727, ppl: 29.687536 +epoch: 0, batch: 10663, sum loss: 5713.301270, avg loss: 3.618304, ppl: 37.274281 +epoch: 0, batch: 10664, sum loss: 5486.087402, avg loss: 3.454715, ppl: 31.649275 +epoch: 0, batch: 10665, sum loss: 6197.045898, avg loss: 3.748969, ppl: 42.477268 +epoch: 0, batch: 10666, sum loss: 6435.849609, avg loss: 3.595447, ppl: 36.431965 +epoch: 0, batch: 10667, sum loss: 6139.500000, avg loss: 3.571553, ppl: 35.571812 +epoch: 0, batch: 10668, sum loss: 5374.042969, avg loss: 3.442693, ppl: 31.271042 +epoch: 0, batch: 10669, sum loss: 6939.571777, avg loss: 3.995148, ppl: 54.333878 +epoch: 0, batch: 10670, sum loss: 6643.442383, avg loss: 3.781128, ppl: 43.865501 +epoch: 0, batch: 10671, sum loss: 6885.748047, avg loss: 3.752451, ppl: 42.625446 +epoch: 0, batch: 10672, sum loss: 6937.453613, avg loss: 3.847728, ppl: 46.886417 +epoch: 0, batch: 10673, sum loss: 5612.687012, avg loss: 3.736809, ppl: 41.963860 +epoch: 0, batch: 10674, sum loss: 6268.028320, avg loss: 3.627331, ppl: 37.612305 +epoch: 0, batch: 10675, sum loss: 5922.937988, avg loss: 3.638168, ppl: 38.022129 +epoch: 0, batch: 10676, sum loss: 6157.375977, avg loss: 3.532631, ppl: 34.213871 +epoch: 0, batch: 10677, sum loss: 5550.455078, avg loss: 3.737680, ppl: 42.000435 +epoch: 0, batch: 10678, sum loss: 5898.943848, avg loss: 3.268113, ppl: 26.261734 +epoch: 0, batch: 10679, sum loss: 6513.281738, avg loss: 3.704938, ppl: 40.647545 +epoch: 0, batch: 10680, sum loss: 6954.072266, avg loss: 3.531779, ppl: 34.184723 +epoch: 0, batch: 10681, sum loss: 6579.276367, avg loss: 3.675573, ppl: 39.471283 +epoch: 0, batch: 10682, sum loss: 5931.782715, avg loss: 3.432745, ppl: 30.961508 +epoch: 0, batch: 10683, sum loss: 5413.767578, avg loss: 3.660424, ppl: 38.877834 +epoch: 0, batch: 10684, sum loss: 5725.863281, avg loss: 3.288836, ppl: 26.811636 +epoch: 0, batch: 10685, sum loss: 5524.851562, avg loss: 3.651587, ppl: 38.535793 +epoch: 0, batch: 10686, sum loss: 5902.618652, avg loss: 3.579514, ppl: 35.856110 +epoch: 0, batch: 10687, sum loss: 6779.875000, avg loss: 3.562730, ppl: 35.259319 +epoch: 0, batch: 10688, sum loss: 6525.027344, avg loss: 3.463391, ppl: 31.925037 +epoch: 0, batch: 10689, sum loss: 6734.055176, avg loss: 3.722529, ppl: 41.368900 +epoch: 0, batch: 10690, sum loss: 7077.372070, avg loss: 3.925331, ppl: 50.669853 +epoch: 0, batch: 10691, sum loss: 6356.818359, avg loss: 3.510115, ppl: 33.452126 +epoch: 0, batch: 10692, sum loss: 5541.620605, avg loss: 3.612530, ppl: 37.059685 +epoch: 0, batch: 10693, sum loss: 7575.227051, avg loss: 3.652472, ppl: 38.569893 +epoch: 0, batch: 10694, sum loss: 7904.908691, avg loss: 3.917200, ppl: 50.259502 +epoch: 0, batch: 10695, sum loss: 7375.316895, avg loss: 3.772541, ppl: 43.490414 +epoch: 0, batch: 10696, sum loss: 5303.619629, avg loss: 3.578691, ppl: 35.826614 +epoch: 0, batch: 10697, sum loss: 6502.160156, avg loss: 3.622373, ppl: 37.426289 +epoch: 0, batch: 10698, sum loss: 5048.177734, avg loss: 3.415547, ppl: 30.433582 +epoch: 0, batch: 10699, sum loss: 7339.157715, avg loss: 3.667745, ppl: 39.163498 +epoch: 0, batch: 10700, sum loss: 6946.549316, avg loss: 3.906946, ppl: 49.746777 +epoch: 0, batch: 10701, sum loss: 6134.376465, avg loss: 3.682099, ppl: 39.729683 +epoch: 0, batch: 10702, sum loss: 5859.003906, avg loss: 3.412349, ppl: 30.336435 +epoch: 0, batch: 10703, sum loss: 5561.314941, avg loss: 3.737443, ppl: 41.990471 +epoch: 0, batch: 10704, sum loss: 6563.388672, avg loss: 3.733441, ppl: 41.822758 +epoch: 0, batch: 10705, sum loss: 7783.779297, avg loss: 4.049833, ppl: 57.387890 +epoch: 0, batch: 10706, sum loss: 6533.008789, avg loss: 3.833925, ppl: 46.243710 +epoch: 0, batch: 10707, sum loss: 6091.956543, avg loss: 3.710083, ppl: 40.857208 +epoch: 0, batch: 10708, sum loss: 6615.436035, avg loss: 3.624897, ppl: 37.520840 +epoch: 0, batch: 10709, sum loss: 7264.223633, avg loss: 3.769706, ppl: 43.367325 +epoch: 0, batch: 10710, sum loss: 5576.919434, avg loss: 3.514127, ppl: 33.586594 +epoch: 0, batch: 10711, sum loss: 5758.113770, avg loss: 3.714912, ppl: 41.054981 +epoch: 0, batch: 10712, sum loss: 5740.195312, avg loss: 3.554301, ppl: 34.963356 +epoch: 0, batch: 10713, sum loss: 7257.513672, avg loss: 3.779955, ppl: 43.814075 +epoch: 0, batch: 10714, sum loss: 8011.918945, avg loss: 4.011978, ppl: 55.256042 +epoch: 0, batch: 10715, sum loss: 6720.798828, avg loss: 3.797062, ppl: 44.570030 +epoch: 0, batch: 10716, sum loss: 5529.333984, avg loss: 3.676419, ppl: 39.504665 +epoch: 0, batch: 10717, sum loss: 7000.722656, avg loss: 3.865667, ppl: 47.735107 +epoch: 0, batch: 10718, sum loss: 5628.608887, avg loss: 3.626681, ppl: 37.587860 +epoch: 0, batch: 10719, sum loss: 6592.185547, avg loss: 3.722296, ppl: 41.359226 +epoch: 0, batch: 10720, sum loss: 6603.271484, avg loss: 3.861562, ppl: 47.539562 +epoch: 0, batch: 10721, sum loss: 5693.715332, avg loss: 3.706846, ppl: 40.725155 +epoch: 0, batch: 10722, sum loss: 6575.235352, avg loss: 3.689807, ppl: 40.037106 +epoch: 0, batch: 10723, sum loss: 5781.707520, avg loss: 3.613567, ppl: 37.098152 +epoch: 0, batch: 10724, sum loss: 6154.587402, avg loss: 3.628884, ppl: 37.670757 +epoch: 0, batch: 10725, sum loss: 5530.208984, avg loss: 3.859183, ppl: 47.426590 +epoch: 0, batch: 10726, sum loss: 5127.567383, avg loss: 3.621163, ppl: 37.381031 +epoch: 0, batch: 10727, sum loss: 6644.272949, avg loss: 3.711884, ppl: 40.930859 +epoch: 0, batch: 10728, sum loss: 5824.279785, avg loss: 3.613077, ppl: 37.079960 +epoch: 0, batch: 10729, sum loss: 5303.355469, avg loss: 3.605272, ppl: 36.791702 +epoch: 0, batch: 10730, sum loss: 7003.029297, avg loss: 3.664589, ppl: 39.040073 +epoch: 0, batch: 10731, sum loss: 4945.232910, avg loss: 3.405808, ppl: 30.138630 +epoch: 0, batch: 10732, sum loss: 6027.547363, avg loss: 3.718413, ppl: 41.198956 +epoch: 0, batch: 10733, sum loss: 6553.192383, avg loss: 3.582937, ppl: 35.979073 +epoch: 0, batch: 10734, sum loss: 5874.137207, avg loss: 3.794662, ppl: 44.463226 +epoch: 0, batch: 10735, sum loss: 5761.527344, avg loss: 3.554304, ppl: 34.963482 +epoch: 0, batch: 10736, sum loss: 5360.794922, avg loss: 3.605108, ppl: 36.785641 +epoch: 0, batch: 10737, sum loss: 5578.450684, avg loss: 3.585123, ppl: 36.057777 +epoch: 0, batch: 10738, sum loss: 7113.060059, avg loss: 3.958297, ppl: 52.368080 +epoch: 0, batch: 10739, sum loss: 7843.566406, avg loss: 4.216971, ppl: 67.827751 +epoch: 0, batch: 10740, sum loss: 5983.143555, avg loss: 3.519496, ppl: 33.767422 +epoch: 0, batch: 10741, sum loss: 6349.720215, avg loss: 3.724176, ppl: 41.437080 +epoch: 0, batch: 10742, sum loss: 8059.333008, avg loss: 3.946784, ppl: 51.768612 +epoch: 0, batch: 10743, sum loss: 6387.850586, avg loss: 3.507881, ppl: 33.377457 +epoch: 0, batch: 10744, sum loss: 6311.633789, avg loss: 3.765891, ppl: 43.202194 +epoch: 0, batch: 10745, sum loss: 6705.067871, avg loss: 3.831467, ppl: 46.130180 +epoch: 0, batch: 10746, sum loss: 6377.192871, avg loss: 3.696923, ppl: 40.323059 +epoch: 0, batch: 10747, sum loss: 5410.589355, avg loss: 3.631268, ppl: 37.760677 +epoch: 0, batch: 10748, sum loss: 5893.223145, avg loss: 3.571651, ppl: 35.575260 +epoch: 0, batch: 10749, sum loss: 5567.799316, avg loss: 3.629595, ppl: 37.697556 +epoch: 0, batch: 10750, sum loss: 7055.447754, avg loss: 3.900192, ppl: 49.411961 +epoch: 0, batch: 10751, sum loss: 6387.978027, avg loss: 3.848180, ppl: 46.907593 +epoch: 0, batch: 10752, sum loss: 6094.700684, avg loss: 3.498680, ppl: 33.071766 +epoch: 0, batch: 10753, sum loss: 6644.409180, avg loss: 4.005069, ppl: 54.875599 +epoch: 0, batch: 10754, sum loss: 6374.260742, avg loss: 3.716770, ppl: 41.131321 +epoch: 0, batch: 10755, sum loss: 6255.194824, avg loss: 3.729991, ppl: 41.678730 +epoch: 0, batch: 10756, sum loss: 7388.788574, avg loss: 3.842324, ppl: 46.633705 +epoch: 0, batch: 10757, sum loss: 4962.484375, avg loss: 3.415337, ppl: 30.427197 +epoch: 0, batch: 10758, sum loss: 6377.318359, avg loss: 3.537060, ppl: 34.365719 +epoch: 0, batch: 10759, sum loss: 6450.248535, avg loss: 3.792033, ppl: 44.346474 +epoch: 0, batch: 10760, sum loss: 6145.708984, avg loss: 3.653810, ppl: 38.621544 +epoch: 0, batch: 10761, sum loss: 6153.997070, avg loss: 3.637114, ppl: 37.982063 +epoch: 0, batch: 10762, sum loss: 6206.297363, avg loss: 3.740987, ppl: 42.139553 +epoch: 0, batch: 10763, sum loss: 7020.123535, avg loss: 3.867837, ppl: 47.838783 +epoch: 0, batch: 10764, sum loss: 7167.084961, avg loss: 3.988361, ppl: 53.966385 +epoch: 0, batch: 10765, sum loss: 5217.628906, avg loss: 3.687370, ppl: 39.939671 +epoch: 0, batch: 10766, sum loss: 5792.730957, avg loss: 3.577969, ppl: 35.800743 +epoch: 0, batch: 10767, sum loss: 5621.692871, avg loss: 3.520159, ppl: 33.789783 +epoch: 0, batch: 10768, sum loss: 6277.115234, avg loss: 3.626294, ppl: 37.573318 +epoch: 0, batch: 10769, sum loss: 6112.097168, avg loss: 3.686428, ppl: 39.902054 +epoch: 0, batch: 10770, sum loss: 7051.992676, avg loss: 3.686352, ppl: 39.899048 +epoch: 0, batch: 10771, sum loss: 4958.501465, avg loss: 3.477210, ppl: 32.369286 +epoch: 0, batch: 10772, sum loss: 4573.617188, avg loss: 3.088195, ppl: 21.937452 +epoch: 0, batch: 10773, sum loss: 7198.918945, avg loss: 3.961981, ppl: 52.561337 +epoch: 0, batch: 10774, sum loss: 5431.007324, avg loss: 3.632781, ppl: 37.817833 +epoch: 0, batch: 10775, sum loss: 7190.521484, avg loss: 4.101838, ppl: 60.451275 +epoch: 0, batch: 10776, sum loss: 6114.645996, avg loss: 3.644008, ppl: 38.244831 +epoch: 0, batch: 10777, sum loss: 6533.377930, avg loss: 3.629654, ppl: 37.699787 +epoch: 0, batch: 10778, sum loss: 6390.785156, avg loss: 3.810844, ppl: 45.188549 +epoch: 0, batch: 10779, sum loss: 5762.592773, avg loss: 3.597124, ppl: 36.493134 +epoch: 0, batch: 10780, sum loss: 6758.661133, avg loss: 3.950124, ppl: 51.941795 +epoch: 0, batch: 10781, sum loss: 5974.819336, avg loss: 3.554324, ppl: 34.964191 +epoch: 0, batch: 10782, sum loss: 7061.881836, avg loss: 3.905908, ppl: 49.695187 +epoch: 0, batch: 10783, sum loss: 6003.879395, avg loss: 3.729118, ppl: 41.642349 +epoch: 0, batch: 10784, sum loss: 5766.454590, avg loss: 3.507576, ppl: 33.367279 +epoch: 0, batch: 10785, sum loss: 5504.333496, avg loss: 3.408256, ppl: 30.212509 +epoch: 0, batch: 10786, sum loss: 4949.425293, avg loss: 3.323993, ppl: 27.771011 +epoch: 0, batch: 10787, sum loss: 5781.926270, avg loss: 3.597963, ppl: 36.523754 +epoch: 0, batch: 10788, sum loss: 6766.891113, avg loss: 3.533625, ppl: 34.247879 +epoch: 0, batch: 10789, sum loss: 6233.955566, avg loss: 3.721765, ppl: 41.337280 +epoch: 0, batch: 10790, sum loss: 6927.336914, avg loss: 3.756690, ppl: 42.806515 +epoch: 0, batch: 10791, sum loss: 7288.985352, avg loss: 4.169900, ppl: 64.709007 +epoch: 0, batch: 10792, sum loss: 6389.158203, avg loss: 3.712468, ppl: 40.954773 +epoch: 0, batch: 10793, sum loss: 6402.026367, avg loss: 3.906056, ppl: 49.702545 +epoch: 0, batch: 10794, sum loss: 6085.193359, avg loss: 3.403352, ppl: 30.064709 +epoch: 0, batch: 10795, sum loss: 6274.206055, avg loss: 3.432279, ppl: 30.947094 +epoch: 0, batch: 10796, sum loss: 6456.971191, avg loss: 3.715173, ppl: 41.065701 +epoch: 0, batch: 10797, sum loss: 6154.119629, avg loss: 3.601006, ppl: 36.635082 +epoch: 0, batch: 10798, sum loss: 6780.516602, avg loss: 3.828637, ppl: 45.999802 +epoch: 0, batch: 10799, sum loss: 6066.576172, avg loss: 3.581214, ppl: 35.917107 +epoch: 0, batch: 10800, sum loss: 7213.712891, avg loss: 3.672970, ppl: 39.368660 +epoch: 0, batch: 10801, sum loss: 7322.554199, avg loss: 3.872319, ppl: 48.053669 +epoch: 0, batch: 10802, sum loss: 6078.877930, avg loss: 3.495617, ppl: 32.970631 +epoch: 0, batch: 10803, sum loss: 7881.651367, avg loss: 3.869245, ppl: 47.906181 +epoch: 0, batch: 10804, sum loss: 6191.765625, avg loss: 3.879552, ppl: 48.402542 +epoch: 0, batch: 10805, sum loss: 6474.261230, avg loss: 3.984161, ppl: 53.740166 +epoch: 0, batch: 10806, sum loss: 6626.726562, avg loss: 3.875279, ppl: 48.196129 +epoch: 0, batch: 10807, sum loss: 7235.569336, avg loss: 3.619595, ppl: 37.322433 +epoch: 0, batch: 10808, sum loss: 5521.902344, avg loss: 3.544225, ppl: 34.612850 +epoch: 0, batch: 10809, sum loss: 5377.748535, avg loss: 3.643461, ppl: 38.223911 +epoch: 0, batch: 10810, sum loss: 6210.216797, avg loss: 3.674685, ppl: 39.436214 +epoch: 0, batch: 10811, sum loss: 5697.108887, avg loss: 3.562920, ppl: 35.266018 +epoch: 0, batch: 10812, sum loss: 5407.628906, avg loss: 3.569392, ppl: 35.495014 +epoch: 0, batch: 10813, sum loss: 6214.808105, avg loss: 3.683941, ppl: 39.802944 +epoch: 0, batch: 10814, sum loss: 8060.835449, avg loss: 3.901663, ppl: 49.484665 +epoch: 0, batch: 10815, sum loss: 6076.071289, avg loss: 3.658080, ppl: 38.786816 +epoch: 0, batch: 10816, sum loss: 6697.566406, avg loss: 3.932805, ppl: 51.049950 +epoch: 0, batch: 10817, sum loss: 6146.330566, avg loss: 3.628294, ppl: 37.648540 +epoch: 0, batch: 10818, sum loss: 5898.678223, avg loss: 3.439462, ppl: 31.170197 +epoch: 0, batch: 10819, sum loss: 6650.080566, avg loss: 3.522289, ppl: 33.861835 +epoch: 0, batch: 10820, sum loss: 5902.004883, avg loss: 3.486122, ppl: 32.659061 +epoch: 0, batch: 10821, sum loss: 4987.012207, avg loss: 3.460800, ppl: 31.842430 +epoch: 0, batch: 10822, sum loss: 7255.040039, avg loss: 3.906860, ppl: 49.742531 +epoch: 0, batch: 10823, sum loss: 6359.929199, avg loss: 3.619766, ppl: 37.328831 +epoch: 0, batch: 10824, sum loss: 6281.265137, avg loss: 3.666821, ppl: 39.127342 +epoch: 0, batch: 10825, sum loss: 5839.195312, avg loss: 3.667836, ppl: 39.167065 +epoch: 0, batch: 10826, sum loss: 6690.771484, avg loss: 3.686376, ppl: 39.899971 +epoch: 0, batch: 10827, sum loss: 6700.130859, avg loss: 3.802572, ppl: 44.816284 +epoch: 0, batch: 10828, sum loss: 6067.029297, avg loss: 3.731260, ppl: 41.731670 +epoch: 0, batch: 10829, sum loss: 6546.258789, avg loss: 3.624728, ppl: 37.514515 +epoch: 0, batch: 10830, sum loss: 5115.018555, avg loss: 3.449102, ppl: 31.472130 +epoch: 0, batch: 10831, sum loss: 7047.249023, avg loss: 3.746544, ppl: 42.374378 +epoch: 0, batch: 10832, sum loss: 6130.084473, avg loss: 3.648860, ppl: 38.430828 +epoch: 0, batch: 10833, sum loss: 5695.141113, avg loss: 3.599962, ppl: 36.596863 +epoch: 0, batch: 10834, sum loss: 6715.260742, avg loss: 3.895163, ppl: 49.164070 +epoch: 0, batch: 10835, sum loss: 5672.442383, avg loss: 3.603839, ppl: 36.738995 +epoch: 0, batch: 10836, sum loss: 6816.063965, avg loss: 3.544495, ppl: 34.622192 +epoch: 0, batch: 10837, sum loss: 6555.183594, avg loss: 3.394709, ppl: 29.805988 +epoch: 0, batch: 10838, sum loss: 6044.006836, avg loss: 3.909448, ppl: 49.871422 +epoch: 0, batch: 10839, sum loss: 5380.366211, avg loss: 3.496014, ppl: 32.983727 +epoch: 0, batch: 10840, sum loss: 7601.196289, avg loss: 3.908070, ppl: 49.802757 +epoch: 0, batch: 10841, sum loss: 6769.922852, avg loss: 3.732041, ppl: 41.764278 +epoch: 0, batch: 10842, sum loss: 6680.770508, avg loss: 3.906883, ppl: 49.743671 +epoch: 0, batch: 10843, sum loss: 5478.916016, avg loss: 3.606923, ppl: 36.852478 +epoch: 0, batch: 10844, sum loss: 5434.327148, avg loss: 3.644753, ppl: 38.273335 +epoch: 0, batch: 10845, sum loss: 6447.556641, avg loss: 3.806114, ppl: 44.975311 +epoch: 0, batch: 10846, sum loss: 6548.323242, avg loss: 3.716415, ppl: 41.116734 +epoch: 0, batch: 10847, sum loss: 6212.381348, avg loss: 3.566235, ppl: 35.383118 +epoch: 0, batch: 10848, sum loss: 6644.069824, avg loss: 3.887695, ppl: 48.798256 +epoch: 0, batch: 10849, sum loss: 5694.995117, avg loss: 3.546074, ppl: 34.676914 +epoch: 0, batch: 10850, sum loss: 6140.070801, avg loss: 3.984472, ppl: 53.756874 +epoch: 0, batch: 10851, sum loss: 5698.653320, avg loss: 3.606743, ppl: 36.845837 +epoch: 0, batch: 10852, sum loss: 4962.898438, avg loss: 3.614638, ppl: 37.137901 +epoch: 0, batch: 10853, sum loss: 6164.297852, avg loss: 3.916326, ppl: 50.215626 +epoch: 0, batch: 10854, sum loss: 6694.847168, avg loss: 3.799573, ppl: 44.682098 +epoch: 0, batch: 10855, sum loss: 5201.644531, avg loss: 3.404218, ppl: 30.090748 +epoch: 0, batch: 10856, sum loss: 6034.936523, avg loss: 3.592224, ppl: 36.314762 +epoch: 0, batch: 10857, sum loss: 5456.311523, avg loss: 3.639968, ppl: 38.090607 +epoch: 0, batch: 10858, sum loss: 5588.907227, avg loss: 3.686615, ppl: 39.909504 +epoch: 0, batch: 10859, sum loss: 7011.668945, avg loss: 3.865308, ppl: 47.717979 +epoch: 0, batch: 10860, sum loss: 7352.045898, avg loss: 3.772215, ppl: 43.476242 +epoch: 0, batch: 10861, sum loss: 5671.137695, avg loss: 3.542247, ppl: 34.544464 +epoch: 0, batch: 10862, sum loss: 6059.107910, avg loss: 3.578918, ppl: 35.834747 +epoch: 0, batch: 10863, sum loss: 6101.448730, avg loss: 3.633978, ppl: 37.863125 +epoch: 0, batch: 10864, sum loss: 6141.427734, avg loss: 3.529556, ppl: 34.108829 +epoch: 0, batch: 10865, sum loss: 6601.259766, avg loss: 3.915338, ppl: 50.166027 +epoch: 0, batch: 10866, sum loss: 6449.786621, avg loss: 3.758617, ppl: 42.889065 +epoch: 0, batch: 10867, sum loss: 6204.283691, avg loss: 3.464145, ppl: 31.949129 +epoch: 0, batch: 10868, sum loss: 6940.433594, avg loss: 3.717426, ppl: 41.158298 +epoch: 0, batch: 10869, sum loss: 6382.664062, avg loss: 3.666091, ppl: 39.098759 +epoch: 0, batch: 10870, sum loss: 6480.761230, avg loss: 3.821204, ppl: 45.659134 +epoch: 0, batch: 10871, sum loss: 5589.637695, avg loss: 3.410395, ppl: 30.277206 +epoch: 0, batch: 10872, sum loss: 7608.684082, avg loss: 3.885947, ppl: 48.713051 +epoch: 0, batch: 10873, sum loss: 5456.545898, avg loss: 3.460080, ppl: 31.819519 +epoch: 0, batch: 10874, sum loss: 5683.406738, avg loss: 3.497481, ppl: 33.032143 +epoch: 0, batch: 10875, sum loss: 7345.987305, avg loss: 3.769106, ppl: 43.341297 +epoch: 0, batch: 10876, sum loss: 7002.854004, avg loss: 3.775123, ppl: 43.602886 +epoch: 0, batch: 10877, sum loss: 6181.215820, avg loss: 3.625347, ppl: 37.537735 +epoch: 0, batch: 10878, sum loss: 7585.076172, avg loss: 3.844438, ppl: 46.732418 +epoch: 0, batch: 10879, sum loss: 6442.489746, avg loss: 3.611261, ppl: 37.012711 +epoch: 0, batch: 10880, sum loss: 5757.379883, avg loss: 3.657802, ppl: 38.776005 +epoch: 0, batch: 10881, sum loss: 6973.059082, avg loss: 3.795895, ppl: 44.518066 +epoch: 0, batch: 10882, sum loss: 5816.626465, avg loss: 3.516703, ppl: 33.673222 +epoch: 0, batch: 10883, sum loss: 6309.927734, avg loss: 3.585186, ppl: 36.060066 +epoch: 0, batch: 10884, sum loss: 6223.610840, avg loss: 3.713372, ppl: 40.991787 +epoch: 0, batch: 10885, sum loss: 5839.696777, avg loss: 3.576054, ppl: 35.732273 +epoch: 0, batch: 10886, sum loss: 6628.479492, avg loss: 3.738567, ppl: 42.037724 +epoch: 0, batch: 10887, sum loss: 6606.881348, avg loss: 3.749649, ppl: 42.506161 +epoch: 0, batch: 10888, sum loss: 5383.636719, avg loss: 3.500414, ppl: 33.129162 +epoch: 0, batch: 10889, sum loss: 5769.827148, avg loss: 3.615180, ppl: 37.158035 +epoch: 0, batch: 10890, sum loss: 6661.517090, avg loss: 3.702900, ppl: 40.564770 +epoch: 0, batch: 10891, sum loss: 5405.926270, avg loss: 3.570625, ppl: 35.538799 +epoch: 0, batch: 10892, sum loss: 6453.481934, avg loss: 3.613372, ppl: 37.090908 +epoch: 0, batch: 10893, sum loss: 6536.365723, avg loss: 3.745768, ppl: 42.341526 +epoch: 0, batch: 10894, sum loss: 7705.831055, avg loss: 3.729831, ppl: 41.672073 +epoch: 0, batch: 10895, sum loss: 7059.233887, avg loss: 3.684360, ppl: 39.819641 +epoch: 0, batch: 10896, sum loss: 5956.526855, avg loss: 3.560387, ppl: 35.176796 +epoch: 0, batch: 10897, sum loss: 6796.458984, avg loss: 3.695736, ppl: 40.275219 +epoch: 0, batch: 10898, sum loss: 5643.836914, avg loss: 3.581115, ppl: 35.913555 +epoch: 0, batch: 10899, sum loss: 6188.840332, avg loss: 3.571172, ppl: 35.558228 +epoch: 0, batch: 10900, sum loss: 5175.941895, avg loss: 3.290491, ppl: 26.856037 +epoch: 0, batch: 10901, sum loss: 7243.921387, avg loss: 3.782727, ppl: 43.935669 +epoch: 0, batch: 10902, sum loss: 6614.191895, avg loss: 3.906788, ppl: 49.738937 +epoch: 0, batch: 10903, sum loss: 4904.031738, avg loss: 3.482977, ppl: 32.556503 +epoch: 0, batch: 10904, sum loss: 6309.416992, avg loss: 3.713606, ppl: 41.001408 +epoch: 0, batch: 10905, sum loss: 6163.331543, avg loss: 3.548262, ppl: 34.752876 +epoch: 0, batch: 10906, sum loss: 6875.592285, avg loss: 3.809192, ppl: 45.113991 +epoch: 0, batch: 10907, sum loss: 6423.064941, avg loss: 3.653620, ppl: 38.614216 +epoch: 0, batch: 10908, sum loss: 7055.166992, avg loss: 3.680317, ppl: 39.658962 +epoch: 0, batch: 10909, sum loss: 6774.883301, avg loss: 3.567606, ppl: 35.431660 +epoch: 0, batch: 10910, sum loss: 7476.998047, avg loss: 4.068007, ppl: 58.440403 +epoch: 0, batch: 10911, sum loss: 6078.261230, avg loss: 3.633151, ppl: 37.831829 +epoch: 0, batch: 10912, sum loss: 6605.008789, avg loss: 3.780772, ppl: 43.849880 +epoch: 0, batch: 10913, sum loss: 6227.323730, avg loss: 3.663132, ppl: 38.983234 +epoch: 0, batch: 10914, sum loss: 6427.787109, avg loss: 3.607064, ppl: 36.857662 +epoch: 0, batch: 10915, sum loss: 5464.927246, avg loss: 3.528036, ppl: 34.057003 +epoch: 0, batch: 10916, sum loss: 5928.984863, avg loss: 3.694072, ppl: 40.208221 +epoch: 0, batch: 10917, sum loss: 7984.821289, avg loss: 4.000412, ppl: 54.620621 +epoch: 0, batch: 10918, sum loss: 6058.592773, avg loss: 3.939267, ppl: 51.380932 +epoch: 0, batch: 10919, sum loss: 6722.222656, avg loss: 3.910543, ppl: 49.926041 +epoch: 0, batch: 10920, sum loss: 6131.038086, avg loss: 3.564557, ppl: 35.323803 +epoch: 0, batch: 10921, sum loss: 6770.698730, avg loss: 3.936453, ppl: 51.236523 +epoch: 0, batch: 10922, sum loss: 8289.818359, avg loss: 3.936286, ppl: 51.227985 +epoch: 0, batch: 10923, sum loss: 5655.079102, avg loss: 3.684091, ppl: 39.808903 +epoch: 0, batch: 10924, sum loss: 5942.506836, avg loss: 3.977582, ppl: 53.387760 +epoch: 0, batch: 10925, sum loss: 5415.376465, avg loss: 3.773782, ppl: 43.544418 +epoch: 0, batch: 10926, sum loss: 5671.198242, avg loss: 3.548935, ppl: 34.776268 +epoch: 0, batch: 10927, sum loss: 5608.151367, avg loss: 3.372310, ppl: 29.145773 +epoch: 0, batch: 10928, sum loss: 7108.797852, avg loss: 3.916693, ppl: 50.234055 +epoch: 0, batch: 10929, sum loss: 4486.526367, avg loss: 3.223079, ppl: 25.105305 +epoch: 0, batch: 10930, sum loss: 6131.428223, avg loss: 3.754702, ppl: 42.721481 +epoch: 0, batch: 10931, sum loss: 7111.527344, avg loss: 3.677108, ppl: 39.531914 +epoch: 0, batch: 10932, sum loss: 6525.142578, avg loss: 3.483792, ppl: 32.583046 +epoch: 0, batch: 10933, sum loss: 6356.174316, avg loss: 3.609412, ppl: 36.944313 +epoch: 0, batch: 10934, sum loss: 4988.331543, avg loss: 3.756274, ppl: 42.788696 +epoch: 0, batch: 10935, sum loss: 5735.871582, avg loss: 3.591654, ppl: 36.294067 +epoch: 0, batch: 10936, sum loss: 5869.050293, avg loss: 3.576508, ppl: 35.748501 +epoch: 0, batch: 10937, sum loss: 7255.810547, avg loss: 3.943375, ppl: 51.592438 +epoch: 0, batch: 10938, sum loss: 6954.413574, avg loss: 4.026875, ppl: 56.085400 +epoch: 0, batch: 10939, sum loss: 5783.016113, avg loss: 3.481647, ppl: 32.513237 +epoch: 0, batch: 10940, sum loss: 6146.010742, avg loss: 3.520052, ppl: 33.786190 +epoch: 0, batch: 10941, sum loss: 6598.430664, avg loss: 3.641518, ppl: 38.149708 +epoch: 0, batch: 10942, sum loss: 6728.384766, avg loss: 3.937030, ppl: 51.266132 +epoch: 0, batch: 10943, sum loss: 6286.414062, avg loss: 3.741913, ppl: 42.178616 +epoch: 0, batch: 10944, sum loss: 6425.005371, avg loss: 3.833536, ppl: 46.225689 +epoch: 0, batch: 10945, sum loss: 6089.574707, avg loss: 3.624747, ppl: 37.515232 +epoch: 0, batch: 10946, sum loss: 6984.136230, avg loss: 3.808144, ppl: 45.066711 +epoch: 0, batch: 10947, sum loss: 6601.731445, avg loss: 3.723481, ppl: 41.408283 +epoch: 0, batch: 10948, sum loss: 5555.685547, avg loss: 3.491946, ppl: 32.849804 +epoch: 0, batch: 10949, sum loss: 6420.080566, avg loss: 3.876860, ppl: 48.272408 +epoch: 0, batch: 10950, sum loss: 6145.165039, avg loss: 3.867316, ppl: 47.813881 +epoch: 0, batch: 10951, sum loss: 4917.098633, avg loss: 3.360970, ppl: 28.817123 +epoch: 0, batch: 10952, sum loss: 5928.596191, avg loss: 3.947134, ppl: 51.786758 +epoch: 0, batch: 10953, sum loss: 6004.966309, avg loss: 3.580779, ppl: 35.901493 +epoch: 0, batch: 10954, sum loss: 5894.437500, avg loss: 3.354831, ppl: 28.640749 +epoch: 0, batch: 10955, sum loss: 6471.002930, avg loss: 3.653869, ppl: 38.623829 +epoch: 0, batch: 10956, sum loss: 6019.911133, avg loss: 3.764797, ppl: 43.154964 +epoch: 0, batch: 10957, sum loss: 7904.603027, avg loss: 4.006388, ppl: 54.948025 +epoch: 0, batch: 10958, sum loss: 6310.549805, avg loss: 3.747358, ppl: 42.408875 +epoch: 0, batch: 10959, sum loss: 6996.757812, avg loss: 3.790226, ppl: 44.266415 +epoch: 0, batch: 10960, sum loss: 6784.192383, avg loss: 3.559387, ppl: 35.141655 +epoch: 0, batch: 10961, sum loss: 6471.822754, avg loss: 3.668834, ppl: 39.206154 +epoch: 0, batch: 10962, sum loss: 5366.953613, avg loss: 3.405427, ppl: 30.127171 +epoch: 0, batch: 10963, sum loss: 5725.880371, avg loss: 3.633173, ppl: 37.832661 +epoch: 0, batch: 10964, sum loss: 5256.597656, avg loss: 3.391353, ppl: 29.706129 +epoch: 0, batch: 10965, sum loss: 7308.551270, avg loss: 3.623476, ppl: 37.467590 +epoch: 0, batch: 10966, sum loss: 5457.192871, avg loss: 3.597358, ppl: 36.501686 +epoch: 0, batch: 10967, sum loss: 6698.916992, avg loss: 3.617126, ppl: 37.230404 +epoch: 0, batch: 10968, sum loss: 6201.449219, avg loss: 3.785988, ppl: 44.079212 +epoch: 0, batch: 10969, sum loss: 5129.836426, avg loss: 3.617656, ppl: 37.250160 +epoch: 0, batch: 10970, sum loss: 6324.804199, avg loss: 3.511829, ppl: 33.509499 +epoch: 0, batch: 10971, sum loss: 7111.457520, avg loss: 3.903105, ppl: 49.556095 +epoch: 0, batch: 10972, sum loss: 5321.729980, avg loss: 3.324004, ppl: 27.771322 +epoch: 0, batch: 10973, sum loss: 7319.960938, avg loss: 3.663644, ppl: 39.003216 +epoch: 0, batch: 10974, sum loss: 6743.812500, avg loss: 3.795055, ppl: 44.480679 +epoch: 0, batch: 10975, sum loss: 7166.534180, avg loss: 3.983621, ppl: 53.711163 +epoch: 0, batch: 10976, sum loss: 7431.444824, avg loss: 4.001855, ppl: 54.699516 +epoch: 0, batch: 10977, sum loss: 5894.390625, avg loss: 3.535927, ppl: 34.326824 +epoch: 0, batch: 10978, sum loss: 6092.375977, avg loss: 3.594322, ppl: 36.391026 +epoch: 0, batch: 10979, sum loss: 6267.931641, avg loss: 3.791852, ppl: 44.338440 +epoch: 0, batch: 10980, sum loss: 5753.495117, avg loss: 3.598183, ppl: 36.531803 +epoch: 0, batch: 10981, sum loss: 5472.432129, avg loss: 3.732901, ppl: 41.800179 +epoch: 0, batch: 10982, sum loss: 5649.974121, avg loss: 3.580465, ppl: 35.890228 +epoch: 0, batch: 10983, sum loss: 6722.322266, avg loss: 3.815166, ppl: 45.384289 +epoch: 0, batch: 10984, sum loss: 6650.908691, avg loss: 3.662395, ppl: 38.954510 +epoch: 0, batch: 10985, sum loss: 7213.075195, avg loss: 3.752901, ppl: 42.644608 +epoch: 0, batch: 10986, sum loss: 5996.305176, avg loss: 3.699139, ppl: 40.412510 +epoch: 0, batch: 10987, sum loss: 5986.119141, avg loss: 3.448225, ppl: 31.444536 +epoch: 0, batch: 10988, sum loss: 5855.766113, avg loss: 3.302745, ppl: 27.187155 +epoch: 0, batch: 10989, sum loss: 5853.844727, avg loss: 3.617951, ppl: 37.261139 +epoch: 0, batch: 10990, sum loss: 5807.244629, avg loss: 3.624997, ppl: 37.524597 +epoch: 0, batch: 10991, sum loss: 5353.192383, avg loss: 3.377409, ppl: 29.294756 +epoch: 0, batch: 10992, sum loss: 6233.679688, avg loss: 3.743952, ppl: 42.264683 +epoch: 0, batch: 10993, sum loss: 6410.235352, avg loss: 3.766296, ppl: 43.219669 +epoch: 0, batch: 10994, sum loss: 5657.794434, avg loss: 3.576355, ppl: 35.743027 +epoch: 0, batch: 10995, sum loss: 7027.056641, avg loss: 3.780020, ppl: 43.816898 +epoch: 0, batch: 10996, sum loss: 6252.321777, avg loss: 3.616149, ppl: 37.194065 +epoch: 0, batch: 10997, sum loss: 7045.132812, avg loss: 3.757404, ppl: 42.837090 +epoch: 0, batch: 10998, sum loss: 7964.683105, avg loss: 3.834705, ppl: 46.279789 +epoch: 0, batch: 10999, sum loss: 5557.088867, avg loss: 3.517145, ppl: 33.688110 +epoch: 0, batch: 11000, sum loss: 6140.483398, avg loss: 3.518902, ppl: 33.747341 +epoch: 0, batch: 11001, sum loss: 6083.582031, avg loss: 3.446789, ppl: 31.399401 +epoch: 0, batch: 11002, sum loss: 6759.043945, avg loss: 3.782341, ppl: 43.918755 +epoch: 0, batch: 11003, sum loss: 6478.329102, avg loss: 3.563437, ppl: 35.284267 +epoch: 0, batch: 11004, sum loss: 6981.183105, avg loss: 3.581931, ppl: 35.942875 +epoch: 0, batch: 11005, sum loss: 6774.193359, avg loss: 3.838070, ppl: 46.435764 +epoch: 0, batch: 11006, sum loss: 6456.891602, avg loss: 3.811624, ppl: 45.223835 +epoch: 0, batch: 11007, sum loss: 5812.307129, avg loss: 3.492973, ppl: 32.883568 +epoch: 0, batch: 11008, sum loss: 5321.533203, avg loss: 3.779498, ppl: 43.794056 +epoch: 0, batch: 11009, sum loss: 6452.130859, avg loss: 3.852019, ppl: 47.088020 +epoch: 0, batch: 11010, sum loss: 6385.243652, avg loss: 3.922140, ppl: 50.508400 +epoch: 0, batch: 11011, sum loss: 6639.866211, avg loss: 3.835855, ppl: 46.333050 +epoch: 0, batch: 11012, sum loss: 8094.776855, avg loss: 3.869396, ppl: 47.913448 +epoch: 0, batch: 11013, sum loss: 6439.130859, avg loss: 3.652371, ppl: 38.566013 +epoch: 0, batch: 11014, sum loss: 5205.745117, avg loss: 3.303138, ppl: 27.197845 +epoch: 0, batch: 11015, sum loss: 6901.455078, avg loss: 3.769228, ppl: 43.346565 +epoch: 0, batch: 11016, sum loss: 6494.922852, avg loss: 3.576499, ppl: 35.748177 +epoch: 0, batch: 11017, sum loss: 7443.021973, avg loss: 3.896870, ppl: 49.248070 +epoch: 0, batch: 11018, sum loss: 6280.387207, avg loss: 3.594955, ppl: 36.414078 +epoch: 0, batch: 11019, sum loss: 6643.849609, avg loss: 3.896686, ppl: 49.239006 +epoch: 0, batch: 11020, sum loss: 7029.855469, avg loss: 3.627377, ppl: 37.614010 +epoch: 0, batch: 11021, sum loss: 6265.181641, avg loss: 3.744879, ppl: 42.303867 +epoch: 0, batch: 11022, sum loss: 7210.505859, avg loss: 3.625191, ppl: 37.531898 +epoch: 0, batch: 11023, sum loss: 8336.914062, avg loss: 3.915883, ppl: 50.193363 +epoch: 0, batch: 11024, sum loss: 5255.879883, avg loss: 3.592536, ppl: 36.326080 +epoch: 0, batch: 11025, sum loss: 6750.900879, avg loss: 3.635380, ppl: 37.916260 +epoch: 0, batch: 11026, sum loss: 5892.749512, avg loss: 3.601925, ppl: 36.668766 +epoch: 0, batch: 11027, sum loss: 6082.845703, avg loss: 3.695532, ppl: 40.266991 +epoch: 0, batch: 11028, sum loss: 6477.130371, avg loss: 3.819063, ppl: 45.561485 +epoch: 0, batch: 11029, sum loss: 6651.666992, avg loss: 3.794448, ppl: 44.453686 +epoch: 0, batch: 11030, sum loss: 6195.292480, avg loss: 3.777617, ppl: 43.711773 +epoch: 0, batch: 11031, sum loss: 5784.473633, avg loss: 3.493040, ppl: 32.885757 +epoch: 0, batch: 11032, sum loss: 6407.860352, avg loss: 3.563882, ppl: 35.299969 +epoch: 0, batch: 11033, sum loss: 5148.747559, avg loss: 3.425647, ppl: 30.742529 +epoch: 0, batch: 11034, sum loss: 6328.187988, avg loss: 3.503980, ppl: 33.247520 +epoch: 0, batch: 11035, sum loss: 5311.212402, avg loss: 3.545536, ppl: 34.658241 +epoch: 0, batch: 11036, sum loss: 5235.912109, avg loss: 3.533004, ppl: 34.226639 +epoch: 0, batch: 11037, sum loss: 6849.791016, avg loss: 3.859037, ppl: 47.419659 +epoch: 0, batch: 11038, sum loss: 5572.929688, avg loss: 3.379581, ppl: 29.358480 +epoch: 0, batch: 11039, sum loss: 6056.728516, avg loss: 3.718065, ppl: 41.184635 +epoch: 0, batch: 11040, sum loss: 7020.079102, avg loss: 3.712363, ppl: 40.950478 +epoch: 0, batch: 11041, sum loss: 6304.172852, avg loss: 3.467642, ppl: 32.061047 +epoch: 0, batch: 11042, sum loss: 5829.070801, avg loss: 3.661477, ppl: 38.918770 +epoch: 0, batch: 11043, sum loss: 5762.667969, avg loss: 3.668153, ppl: 39.179466 +epoch: 0, batch: 11044, sum loss: 6217.132324, avg loss: 3.711721, ppl: 40.924175 +epoch: 0, batch: 11045, sum loss: 6440.489746, avg loss: 3.636640, ppl: 37.964066 +epoch: 0, batch: 11046, sum loss: 5666.356934, avg loss: 3.679452, ppl: 39.624691 +epoch: 0, batch: 11047, sum loss: 5832.596680, avg loss: 3.647653, ppl: 38.384457 +epoch: 0, batch: 11048, sum loss: 5878.804199, avg loss: 3.597800, ppl: 36.517792 +epoch: 0, batch: 11049, sum loss: 6205.406250, avg loss: 3.605698, ppl: 36.807362 +epoch: 0, batch: 11050, sum loss: 6577.309570, avg loss: 3.728634, ppl: 41.622200 +epoch: 0, batch: 11051, sum loss: 5606.812012, avg loss: 3.550863, ppl: 34.843357 +epoch: 0, batch: 11052, sum loss: 6722.149414, avg loss: 3.772250, ppl: 43.477787 +epoch: 0, batch: 11053, sum loss: 7967.512207, avg loss: 3.979776, ppl: 53.505054 +epoch: 0, batch: 11054, sum loss: 6019.836426, avg loss: 3.572603, ppl: 35.609173 +epoch: 0, batch: 11055, sum loss: 6083.835449, avg loss: 3.781128, ppl: 43.865501 +epoch: 0, batch: 11056, sum loss: 6282.092285, avg loss: 3.633368, ppl: 37.840031 +epoch: 0, batch: 11057, sum loss: 7062.234863, avg loss: 3.606862, ppl: 36.850220 +epoch: 0, batch: 11058, sum loss: 6688.274902, avg loss: 3.789391, ppl: 44.229446 +epoch: 0, batch: 11059, sum loss: 7019.963867, avg loss: 3.734023, ppl: 41.847134 +epoch: 0, batch: 11060, sum loss: 6698.729004, avg loss: 3.793165, ppl: 44.396683 +epoch: 0, batch: 11061, sum loss: 5098.213867, avg loss: 3.567679, ppl: 35.434269 +epoch: 0, batch: 11062, sum loss: 5592.699219, avg loss: 3.371127, ppl: 29.111307 +epoch: 0, batch: 11063, sum loss: 5951.749512, avg loss: 3.724499, ppl: 41.450470 +epoch: 0, batch: 11064, sum loss: 5406.671387, avg loss: 3.494940, ppl: 32.948296 +epoch: 0, batch: 11065, sum loss: 6920.882812, avg loss: 3.738997, ppl: 42.055775 +epoch: 0, batch: 11066, sum loss: 6859.484375, avg loss: 3.703825, ppl: 40.602322 +epoch: 0, batch: 11067, sum loss: 6482.330566, avg loss: 3.656137, ppl: 38.711510 +epoch: 0, batch: 11068, sum loss: 6303.423828, avg loss: 3.498016, ppl: 33.049805 +epoch: 0, batch: 11069, sum loss: 6174.194336, avg loss: 3.940137, ppl: 51.425655 +epoch: 0, batch: 11070, sum loss: 6810.641113, avg loss: 3.964285, ppl: 52.682571 +epoch: 0, batch: 11071, sum loss: 5252.521484, avg loss: 3.645053, ppl: 38.284809 +epoch: 0, batch: 11072, sum loss: 5999.529297, avg loss: 3.566902, ppl: 35.406731 +epoch: 0, batch: 11073, sum loss: 6240.552246, avg loss: 3.459286, ppl: 31.794273 +epoch: 0, batch: 11074, sum loss: 5393.459473, avg loss: 3.605254, ppl: 36.791016 +epoch: 0, batch: 11075, sum loss: 7260.692383, avg loss: 3.831500, ppl: 46.131676 +epoch: 0, batch: 11076, sum loss: 6223.806152, avg loss: 3.783469, ppl: 43.968292 +epoch: 0, batch: 11077, sum loss: 7001.543945, avg loss: 3.756193, ppl: 42.785240 +epoch: 0, batch: 11078, sum loss: 6679.794434, avg loss: 3.638232, ppl: 38.024559 +epoch: 0, batch: 11079, sum loss: 5457.454590, avg loss: 3.771565, ppl: 43.448006 +epoch: 0, batch: 11080, sum loss: 5887.058594, avg loss: 3.544286, ppl: 34.614952 +epoch: 0, batch: 11081, sum loss: 7231.985840, avg loss: 3.859117, ppl: 47.423470 +epoch: 0, batch: 11082, sum loss: 5328.978027, avg loss: 3.590955, ppl: 36.268715 +epoch: 0, batch: 11083, sum loss: 6469.721680, avg loss: 3.853318, ppl: 47.149265 +epoch: 0, batch: 11084, sum loss: 6386.867676, avg loss: 3.596209, ppl: 36.459763 +epoch: 0, batch: 11085, sum loss: 6712.479004, avg loss: 3.648086, ppl: 38.401108 +epoch: 0, batch: 11086, sum loss: 5981.425293, avg loss: 3.522630, ppl: 33.873398 +epoch: 0, batch: 11087, sum loss: 7046.271973, avg loss: 3.671846, ppl: 39.324417 +epoch: 0, batch: 11088, sum loss: 5905.395996, avg loss: 3.688567, ppl: 39.987518 +epoch: 0, batch: 11089, sum loss: 6318.712402, avg loss: 3.874134, ppl: 48.140995 +epoch: 0, batch: 11090, sum loss: 6872.343262, avg loss: 3.745146, ppl: 42.315197 +epoch: 0, batch: 11091, sum loss: 5247.065430, avg loss: 3.646327, ppl: 38.333626 +epoch: 0, batch: 11092, sum loss: 6388.004395, avg loss: 3.667052, ppl: 39.136353 +epoch: 0, batch: 11093, sum loss: 6564.138184, avg loss: 3.753081, ppl: 42.652275 +epoch: 0, batch: 11094, sum loss: 6422.108398, avg loss: 3.561902, ppl: 35.230125 +epoch: 0, batch: 11095, sum loss: 6379.590820, avg loss: 3.770444, ppl: 43.399315 +epoch: 0, batch: 11096, sum loss: 6976.516602, avg loss: 3.671851, ppl: 39.324635 +epoch: 0, batch: 11097, sum loss: 6406.896484, avg loss: 3.707695, ppl: 40.759739 +epoch: 0, batch: 11098, sum loss: 5051.393555, avg loss: 3.252668, ppl: 25.859243 +epoch: 0, batch: 11099, sum loss: 5715.837891, avg loss: 3.360281, ppl: 28.797276 +epoch: 0, batch: 11100, sum loss: 5951.647949, avg loss: 3.708192, ppl: 40.779995 +epoch: 0, batch: 11101, sum loss: 7382.735352, avg loss: 4.001482, ppl: 54.679150 +epoch: 0, batch: 11102, sum loss: 5902.331055, avg loss: 3.498714, ppl: 33.072906 +epoch: 0, batch: 11103, sum loss: 5836.380859, avg loss: 3.656880, ppl: 38.740292 +epoch: 0, batch: 11104, sum loss: 6242.687012, avg loss: 3.579522, ppl: 35.856403 +epoch: 0, batch: 11105, sum loss: 7324.082520, avg loss: 3.941917, ppl: 51.517288 +epoch: 0, batch: 11106, sum loss: 7299.915527, avg loss: 3.848137, ppl: 46.905594 +epoch: 0, batch: 11107, sum loss: 6370.247070, avg loss: 3.828274, ppl: 45.983082 +epoch: 0, batch: 11108, sum loss: 6408.065918, avg loss: 3.616290, ppl: 37.199306 +epoch: 0, batch: 11109, sum loss: 6416.456055, avg loss: 3.592641, ppl: 36.329884 +epoch: 0, batch: 11110, sum loss: 6442.526367, avg loss: 3.585157, ppl: 36.059006 +epoch: 0, batch: 11111, sum loss: 5772.093750, avg loss: 3.500360, ppl: 33.127377 +epoch: 0, batch: 11112, sum loss: 5712.811035, avg loss: 3.563825, ppl: 35.297943 +epoch: 0, batch: 11113, sum loss: 5693.799805, avg loss: 3.682924, ppl: 39.762470 +epoch: 0, batch: 11114, sum loss: 6841.088379, avg loss: 3.794281, ppl: 44.446259 +epoch: 0, batch: 11115, sum loss: 7220.790527, avg loss: 3.768680, ppl: 43.322834 +epoch: 0, batch: 11116, sum loss: 6317.954590, avg loss: 3.709897, ppl: 40.849602 +epoch: 0, batch: 11117, sum loss: 6515.695801, avg loss: 3.770657, ppl: 43.408588 +epoch: 0, batch: 11118, sum loss: 5629.805176, avg loss: 3.874608, ppl: 48.163803 +epoch: 0, batch: 11119, sum loss: 5142.765625, avg loss: 3.539412, ppl: 34.446659 +epoch: 0, batch: 11120, sum loss: 7251.067383, avg loss: 3.641922, ppl: 38.165127 +epoch: 0, batch: 11121, sum loss: 6092.819336, avg loss: 3.413344, ppl: 30.366632 +epoch: 0, batch: 11122, sum loss: 6007.670898, avg loss: 3.694754, ppl: 40.235687 +epoch: 0, batch: 11123, sum loss: 5331.083984, avg loss: 3.402096, ppl: 30.026972 +epoch: 0, batch: 11124, sum loss: 6019.171875, avg loss: 3.641362, ppl: 38.143761 +epoch: 0, batch: 11125, sum loss: 5218.364746, avg loss: 3.435395, ppl: 31.043665 +epoch: 0, batch: 11126, sum loss: 5435.473633, avg loss: 3.252827, ppl: 25.863350 +epoch: 0, batch: 11127, sum loss: 5842.624023, avg loss: 3.660792, ppl: 38.892128 +epoch: 0, batch: 11128, sum loss: 5796.354004, avg loss: 3.519341, ppl: 33.762154 +epoch: 0, batch: 11129, sum loss: 6767.785156, avg loss: 3.648402, ppl: 38.413223 +epoch: 0, batch: 11130, sum loss: 6495.200684, avg loss: 3.582571, ppl: 35.965881 +epoch: 0, batch: 11131, sum loss: 6375.774414, avg loss: 3.604169, ppl: 36.751125 +epoch: 0, batch: 11132, sum loss: 5953.106445, avg loss: 3.545626, ppl: 34.661381 +epoch: 0, batch: 11133, sum loss: 4468.078125, avg loss: 3.344370, ppl: 28.342710 +epoch: 0, batch: 11134, sum loss: 5855.973633, avg loss: 3.628237, ppl: 37.646370 +epoch: 0, batch: 11135, sum loss: 5662.356445, avg loss: 3.461098, ppl: 31.851936 +epoch: 0, batch: 11136, sum loss: 7270.950195, avg loss: 3.792880, ppl: 44.384033 +epoch: 0, batch: 11137, sum loss: 5586.919922, avg loss: 3.576773, ppl: 35.757977 +epoch: 0, batch: 11138, sum loss: 5528.570312, avg loss: 3.446740, ppl: 31.397858 +epoch: 0, batch: 11139, sum loss: 7604.806152, avg loss: 3.787254, ppl: 44.135044 +epoch: 0, batch: 11140, sum loss: 7495.390625, avg loss: 3.883622, ppl: 48.599934 +epoch: 0, batch: 11141, sum loss: 7367.211914, avg loss: 3.841091, ppl: 46.576248 +epoch: 0, batch: 11142, sum loss: 5983.943848, avg loss: 3.576775, ppl: 35.758022 +epoch: 0, batch: 11143, sum loss: 5594.435059, avg loss: 3.593086, ppl: 36.346077 +epoch: 0, batch: 11144, sum loss: 6015.306152, avg loss: 3.336276, ppl: 28.114243 +epoch: 0, batch: 11145, sum loss: 6123.091309, avg loss: 3.557868, ppl: 35.088318 +epoch: 0, batch: 11146, sum loss: 6351.978516, avg loss: 3.560526, ppl: 35.181709 +epoch: 0, batch: 11147, sum loss: 6936.336914, avg loss: 3.759532, ppl: 42.928341 +epoch: 0, batch: 11148, sum loss: 5815.090820, avg loss: 3.509409, ppl: 33.428505 +epoch: 0, batch: 11149, sum loss: 6426.537598, avg loss: 3.513690, ppl: 33.571903 +epoch: 0, batch: 11150, sum loss: 6573.383789, avg loss: 3.639747, ppl: 38.082214 +epoch: 0, batch: 11151, sum loss: 6977.505859, avg loss: 3.741290, ppl: 42.152325 +epoch: 0, batch: 11152, sum loss: 5947.441406, avg loss: 3.502616, ppl: 33.202194 +epoch: 0, batch: 11153, sum loss: 6523.561523, avg loss: 3.779584, ppl: 43.797806 +epoch: 0, batch: 11154, sum loss: 5865.532715, avg loss: 3.638668, ppl: 38.041126 +epoch: 0, batch: 11155, sum loss: 6044.254883, avg loss: 3.645510, ppl: 38.302292 +epoch: 0, batch: 11156, sum loss: 7302.556152, avg loss: 3.718206, ppl: 41.190430 +epoch: 0, batch: 11157, sum loss: 6659.158203, avg loss: 3.701589, ppl: 40.511612 +epoch: 0, batch: 11158, sum loss: 6733.265137, avg loss: 3.629792, ppl: 37.704990 +epoch: 0, batch: 11159, sum loss: 6729.598633, avg loss: 3.720066, ppl: 41.267109 +epoch: 0, batch: 11160, sum loss: 5325.913574, avg loss: 3.492402, ppl: 32.864803 +epoch: 0, batch: 11161, sum loss: 5288.490723, avg loss: 3.713828, ppl: 41.010487 +epoch: 0, batch: 11162, sum loss: 5058.375000, avg loss: 3.385793, ppl: 29.541416 +epoch: 0, batch: 11163, sum loss: 5929.543945, avg loss: 3.687527, ppl: 39.945946 +epoch: 0, batch: 11164, sum loss: 7057.743164, avg loss: 3.691288, ppl: 40.096466 +epoch: 0, batch: 11165, sum loss: 5083.940918, avg loss: 3.380280, ppl: 29.378990 +epoch: 0, batch: 11166, sum loss: 7361.028320, avg loss: 3.829880, ppl: 46.056999 +epoch: 0, batch: 11167, sum loss: 4695.949707, avg loss: 3.400398, ppl: 29.976028 +epoch: 0, batch: 11168, sum loss: 7056.474609, avg loss: 3.572899, ppl: 35.619690 +epoch: 0, batch: 11169, sum loss: 6746.365234, avg loss: 3.815818, ppl: 45.413872 +epoch: 0, batch: 11170, sum loss: 5903.830078, avg loss: 3.503757, ppl: 33.240093 +epoch: 0, batch: 11171, sum loss: 4902.053711, avg loss: 3.496472, ppl: 32.998829 +epoch: 0, batch: 11172, sum loss: 6764.288086, avg loss: 3.722778, ppl: 41.379200 +epoch: 0, batch: 11173, sum loss: 6047.344727, avg loss: 3.685158, ppl: 39.851429 +epoch: 0, batch: 11174, sum loss: 6301.108887, avg loss: 3.580175, ppl: 35.879833 +epoch: 0, batch: 11175, sum loss: 5809.470703, avg loss: 3.564092, ppl: 35.307392 +epoch: 0, batch: 11176, sum loss: 6579.408691, avg loss: 3.731939, ppl: 41.760006 +epoch: 0, batch: 11177, sum loss: 6769.163086, avg loss: 3.965532, ppl: 52.748329 +epoch: 0, batch: 11178, sum loss: 6470.230957, avg loss: 3.853622, ppl: 47.163567 +epoch: 0, batch: 11179, sum loss: 6094.275879, avg loss: 3.580656, ppl: 35.897068 +epoch: 0, batch: 11180, sum loss: 6683.721680, avg loss: 3.612823, ppl: 37.070538 +epoch: 0, batch: 11181, sum loss: 5479.573730, avg loss: 3.597882, ppl: 36.520794 +epoch: 0, batch: 11182, sum loss: 7263.387695, avg loss: 3.779078, ppl: 43.775661 +epoch: 0, batch: 11183, sum loss: 7480.977051, avg loss: 4.009098, ppl: 55.097153 +epoch: 0, batch: 11184, sum loss: 6208.060059, avg loss: 3.474012, ppl: 32.265945 +epoch: 0, batch: 11185, sum loss: 6854.814453, avg loss: 3.549878, ppl: 34.809074 +epoch: 0, batch: 11186, sum loss: 5890.545898, avg loss: 3.663275, ppl: 38.988811 +epoch: 0, batch: 11187, sum loss: 6117.216309, avg loss: 3.579413, ppl: 35.852478 +epoch: 0, batch: 11188, sum loss: 6405.699707, avg loss: 3.920257, ppl: 50.413391 +epoch: 0, batch: 11189, sum loss: 5582.082031, avg loss: 3.191585, ppl: 24.326950 +epoch: 0, batch: 11190, sum loss: 6392.881348, avg loss: 3.787252, ppl: 44.134960 +epoch: 0, batch: 11191, sum loss: 5636.690430, avg loss: 3.441203, ppl: 31.224487 +epoch: 0, batch: 11192, sum loss: 6289.448242, avg loss: 3.442500, ppl: 31.265034 +epoch: 0, batch: 11193, sum loss: 6434.276367, avg loss: 3.775984, ppl: 43.640423 +epoch: 0, batch: 11194, sum loss: 5668.092773, avg loss: 3.645076, ppl: 38.285683 +epoch: 0, batch: 11195, sum loss: 4996.654297, avg loss: 3.599895, ppl: 36.594391 +epoch: 0, batch: 11196, sum loss: 5686.813477, avg loss: 3.608384, ppl: 36.906368 +epoch: 0, batch: 11197, sum loss: 5439.606445, avg loss: 3.440611, ppl: 31.206022 +epoch: 0, batch: 11198, sum loss: 5286.954590, avg loss: 3.555450, ppl: 35.003574 +epoch: 0, batch: 11199, sum loss: 6858.455566, avg loss: 3.877024, ppl: 48.280327 +epoch: 0, batch: 11200, sum loss: 6909.012207, avg loss: 3.702579, ppl: 40.551743 +epoch: 0, batch: 11201, sum loss: 5883.965820, avg loss: 3.721673, ppl: 41.333496 +epoch: 0, batch: 11202, sum loss: 5108.677246, avg loss: 3.487152, ppl: 32.692696 +epoch: 0, batch: 11203, sum loss: 6073.901367, avg loss: 3.676696, ppl: 39.515610 +epoch: 0, batch: 11204, sum loss: 7260.111328, avg loss: 3.759768, ppl: 42.938454 +epoch: 0, batch: 11205, sum loss: 6542.542480, avg loss: 3.434406, ppl: 31.012972 +epoch: 0, batch: 11206, sum loss: 7076.864746, avg loss: 3.649750, ppl: 38.465038 +epoch: 0, batch: 11207, sum loss: 6527.832031, avg loss: 3.679725, ppl: 39.635479 +epoch: 0, batch: 11208, sum loss: 5411.454590, avg loss: 3.498032, ppl: 33.050331 +epoch: 0, batch: 11209, sum loss: 6361.812500, avg loss: 3.924622, ppl: 50.633938 +epoch: 0, batch: 11210, sum loss: 5955.637207, avg loss: 3.689986, ppl: 40.044285 +epoch: 0, batch: 11211, sum loss: 6562.933594, avg loss: 3.689114, ppl: 40.009377 +epoch: 0, batch: 11212, sum loss: 6293.288086, avg loss: 3.763928, ppl: 43.117477 +epoch: 0, batch: 11213, sum loss: 5499.146973, avg loss: 3.411381, ppl: 30.307083 +epoch: 0, batch: 11214, sum loss: 6431.372070, avg loss: 3.886026, ppl: 48.716885 +epoch: 0, batch: 11215, sum loss: 7836.765137, avg loss: 3.936095, ppl: 51.218204 +epoch: 0, batch: 11216, sum loss: 6288.865723, avg loss: 3.697158, ppl: 40.332512 +epoch: 0, batch: 11217, sum loss: 5980.335938, avg loss: 3.622251, ppl: 37.421711 +epoch: 0, batch: 11218, sum loss: 5792.859375, avg loss: 3.458424, ppl: 31.766869 +epoch: 0, batch: 11219, sum loss: 7393.277344, avg loss: 3.683746, ppl: 39.795174 +epoch: 0, batch: 11220, sum loss: 5989.285156, avg loss: 3.625475, ppl: 37.542568 +epoch: 0, batch: 11221, sum loss: 5502.284180, avg loss: 3.527105, ppl: 34.025333 +epoch: 0, batch: 11222, sum loss: 6194.614746, avg loss: 3.680698, ppl: 39.674072 +epoch: 0, batch: 11223, sum loss: 6522.798828, avg loss: 3.704031, ppl: 40.610676 +epoch: 0, batch: 11224, sum loss: 6553.210938, avg loss: 3.656926, ppl: 38.742062 +epoch: 0, batch: 11225, sum loss: 6434.331055, avg loss: 3.666286, ppl: 39.106377 +epoch: 0, batch: 11226, sum loss: 6359.430664, avg loss: 3.654845, ppl: 38.661537 +epoch: 0, batch: 11227, sum loss: 8052.956543, avg loss: 4.138210, ppl: 62.690525 +epoch: 0, batch: 11228, sum loss: 6601.194336, avg loss: 3.727382, ppl: 41.570152 +epoch: 0, batch: 11229, sum loss: 6011.602051, avg loss: 3.527936, ppl: 34.053593 +epoch: 0, batch: 11230, sum loss: 6507.447754, avg loss: 3.639512, ppl: 38.073265 +epoch: 0, batch: 11231, sum loss: 6761.666504, avg loss: 3.666847, ppl: 39.128357 +epoch: 0, batch: 11232, sum loss: 6117.329102, avg loss: 3.626158, ppl: 37.568218 +epoch: 0, batch: 11233, sum loss: 7073.193848, avg loss: 3.819219, ppl: 45.568611 +epoch: 0, batch: 11234, sum loss: 7151.042480, avg loss: 3.582687, ppl: 35.970051 +epoch: 0, batch: 11235, sum loss: 5514.494629, avg loss: 3.842853, ppl: 46.658417 +epoch: 0, batch: 11236, sum loss: 6510.104004, avg loss: 3.984152, ppl: 53.739689 +epoch: 0, batch: 11237, sum loss: 4916.654297, avg loss: 3.211401, ppl: 24.813820 +epoch: 0, batch: 11238, sum loss: 5197.905762, avg loss: 3.368701, ppl: 29.040785 +epoch: 0, batch: 11239, sum loss: 6564.031250, avg loss: 3.644659, ppl: 38.269722 +epoch: 0, batch: 11240, sum loss: 6744.684082, avg loss: 3.874029, ppl: 48.135933 +epoch: 0, batch: 11241, sum loss: 5591.277832, avg loss: 3.501113, ppl: 33.152344 +epoch: 0, batch: 11242, sum loss: 5859.185059, avg loss: 3.592388, ppl: 36.320713 +epoch: 0, batch: 11243, sum loss: 6254.234375, avg loss: 3.573848, ppl: 35.653534 +epoch: 0, batch: 11244, sum loss: 5445.750000, avg loss: 3.416405, ppl: 30.459721 +epoch: 0, batch: 11245, sum loss: 6901.043457, avg loss: 3.874814, ppl: 48.173740 +epoch: 0, batch: 11246, sum loss: 6374.861328, avg loss: 3.615917, ppl: 37.185421 +epoch: 0, batch: 11247, sum loss: 5016.636230, avg loss: 3.493479, ppl: 32.900215 +epoch: 0, batch: 11248, sum loss: 6514.977051, avg loss: 3.478365, ppl: 32.406685 +epoch: 0, batch: 11249, sum loss: 5285.163574, avg loss: 3.470232, ppl: 32.144207 +epoch: 0, batch: 11250, sum loss: 5739.130371, avg loss: 3.815911, ppl: 45.418118 +epoch: 0, batch: 11251, sum loss: 6472.003906, avg loss: 3.565842, ppl: 35.369236 +epoch: 0, batch: 11252, sum loss: 5560.373535, avg loss: 3.665375, ppl: 39.070774 +epoch: 0, batch: 11253, sum loss: 7601.217773, avg loss: 4.093278, ppl: 59.936035 +epoch: 0, batch: 11254, sum loss: 6343.391113, avg loss: 3.809844, ppl: 45.143417 +epoch: 0, batch: 11255, sum loss: 7009.136719, avg loss: 3.605523, ppl: 36.800938 +epoch: 0, batch: 11256, sum loss: 5890.089844, avg loss: 3.481141, ppl: 32.496769 +epoch: 0, batch: 11257, sum loss: 6402.484375, avg loss: 4.011582, ppl: 55.234177 +epoch: 0, batch: 11258, sum loss: 5777.679688, avg loss: 3.682396, ppl: 39.741508 +epoch: 0, batch: 11259, sum loss: 6259.409668, avg loss: 3.285779, ppl: 26.729805 +epoch: 0, batch: 11260, sum loss: 7286.393555, avg loss: 3.972952, ppl: 53.141155 +epoch: 0, batch: 11261, sum loss: 5739.052734, avg loss: 3.693084, ppl: 40.168545 +epoch: 0, batch: 11262, sum loss: 5780.162109, avg loss: 3.561406, ppl: 35.212677 +epoch: 0, batch: 11263, sum loss: 6455.779297, avg loss: 3.708087, ppl: 40.775726 +epoch: 0, batch: 11264, sum loss: 6038.787598, avg loss: 3.709329, ppl: 40.826397 +epoch: 0, batch: 11265, sum loss: 6139.213867, avg loss: 3.711738, ppl: 40.924858 +epoch: 0, batch: 11266, sum loss: 6053.258789, avg loss: 3.887771, ppl: 48.801968 +epoch: 0, batch: 11267, sum loss: 5994.284180, avg loss: 3.466908, ppl: 32.037537 +epoch: 0, batch: 11268, sum loss: 6950.292480, avg loss: 3.831473, ppl: 46.130455 +epoch: 0, batch: 11269, sum loss: 5640.263672, avg loss: 3.567529, ppl: 35.428940 +epoch: 0, batch: 11270, sum loss: 7504.736816, avg loss: 3.838740, ppl: 46.466896 +epoch: 0, batch: 11271, sum loss: 7388.724609, avg loss: 3.672329, ppl: 39.343418 +epoch: 0, batch: 11272, sum loss: 6269.690430, avg loss: 3.714272, ppl: 41.028698 +epoch: 0, batch: 11273, sum loss: 5640.243652, avg loss: 3.722933, ppl: 41.385612 +epoch: 0, batch: 11274, sum loss: 5598.745117, avg loss: 3.602796, ppl: 36.700710 +epoch: 0, batch: 11275, sum loss: 6366.505371, avg loss: 3.723103, ppl: 41.392620 +epoch: 0, batch: 11276, sum loss: 8093.565918, avg loss: 3.872519, ppl: 48.063328 +epoch: 0, batch: 11277, sum loss: 6573.416992, avg loss: 3.709603, ppl: 40.837605 +epoch: 0, batch: 11278, sum loss: 6552.984375, avg loss: 3.604502, ppl: 36.763363 +epoch: 0, batch: 11279, sum loss: 5569.011719, avg loss: 3.506934, ppl: 33.345886 +epoch: 0, batch: 11280, sum loss: 6075.126953, avg loss: 3.983690, ppl: 53.714867 +epoch: 0, batch: 11281, sum loss: 5414.314453, avg loss: 3.495361, ppl: 32.962189 +epoch: 0, batch: 11282, sum loss: 5816.387695, avg loss: 3.484954, ppl: 32.620922 +epoch: 0, batch: 11283, sum loss: 6716.664062, avg loss: 3.529513, ppl: 34.107365 +epoch: 0, batch: 11284, sum loss: 5117.891602, avg loss: 3.364820, ppl: 28.928303 +epoch: 0, batch: 11285, sum loss: 5536.201660, avg loss: 3.501709, ppl: 33.172085 +epoch: 0, batch: 11286, sum loss: 6271.910645, avg loss: 3.717789, ppl: 41.173275 +epoch: 0, batch: 11287, sum loss: 6316.338867, avg loss: 3.617605, ppl: 37.248268 +epoch: 0, batch: 11288, sum loss: 6452.626953, avg loss: 3.641438, ppl: 38.146633 +epoch: 0, batch: 11289, sum loss: 6826.677246, avg loss: 3.788389, ppl: 44.185169 +epoch: 0, batch: 11290, sum loss: 6704.509766, avg loss: 3.733023, ppl: 41.805313 +epoch: 0, batch: 11291, sum loss: 6619.182129, avg loss: 3.727017, ppl: 41.554970 +epoch: 0, batch: 11292, sum loss: 5985.476074, avg loss: 3.531254, ppl: 34.166798 +epoch: 0, batch: 11293, sum loss: 5306.372070, avg loss: 3.568508, ppl: 35.463657 +epoch: 0, batch: 11294, sum loss: 6052.841797, avg loss: 3.517049, ppl: 33.684879 +epoch: 0, batch: 11295, sum loss: 5999.891602, avg loss: 3.944702, ppl: 51.660950 +epoch: 0, batch: 11296, sum loss: 6661.945801, avg loss: 3.599106, ppl: 36.565540 +epoch: 0, batch: 11297, sum loss: 6051.748047, avg loss: 3.796580, ppl: 44.548550 +epoch: 0, batch: 11298, sum loss: 7385.004883, avg loss: 3.771708, ppl: 43.454243 +epoch: 0, batch: 11299, sum loss: 5741.649902, avg loss: 3.458825, ppl: 31.779625 +epoch: 0, batch: 11300, sum loss: 6159.053223, avg loss: 3.395288, ppl: 29.823256 +epoch: 0, batch: 11301, sum loss: 6159.658203, avg loss: 3.560496, ppl: 35.180645 +epoch: 0, batch: 11302, sum loss: 6630.075195, avg loss: 3.743690, ppl: 42.253620 +epoch: 0, batch: 11303, sum loss: 6190.974609, avg loss: 3.661132, ppl: 38.905373 +epoch: 0, batch: 11304, sum loss: 5785.959473, avg loss: 3.327176, ppl: 27.859564 +epoch: 0, batch: 11305, sum loss: 6553.708984, avg loss: 3.797050, ppl: 44.569519 +epoch: 0, batch: 11306, sum loss: 6928.215332, avg loss: 3.792127, ppl: 44.350620 +epoch: 0, batch: 11307, sum loss: 6755.475098, avg loss: 3.761400, ppl: 43.008617 +epoch: 0, batch: 11308, sum loss: 6200.078613, avg loss: 3.555091, ppl: 34.991001 +epoch: 0, batch: 11309, sum loss: 6713.745605, avg loss: 3.799516, ppl: 44.679573 +epoch: 0, batch: 11310, sum loss: 6350.572266, avg loss: 3.764418, ppl: 43.138577 +epoch: 0, batch: 11311, sum loss: 6051.229004, avg loss: 3.791497, ppl: 44.322689 +epoch: 0, batch: 11312, sum loss: 5809.310547, avg loss: 3.236385, ppl: 25.441580 +epoch: 0, batch: 11313, sum loss: 7327.734863, avg loss: 3.642016, ppl: 38.168697 +epoch: 0, batch: 11314, sum loss: 5239.879395, avg loss: 3.456385, ppl: 31.702162 +epoch: 0, batch: 11315, sum loss: 5562.333984, avg loss: 3.609561, ppl: 36.949844 +epoch: 0, batch: 11316, sum loss: 6781.502930, avg loss: 3.857510, ppl: 47.347313 +epoch: 0, batch: 11317, sum loss: 6562.735352, avg loss: 3.597991, ppl: 36.524780 +epoch: 0, batch: 11318, sum loss: 6061.611328, avg loss: 3.612402, ppl: 37.054970 +epoch: 0, batch: 11319, sum loss: 6305.195312, avg loss: 3.791458, ppl: 44.320988 +epoch: 0, batch: 11320, sum loss: 6663.586914, avg loss: 3.701993, ppl: 40.527985 +epoch: 0, batch: 11321, sum loss: 7017.822754, avg loss: 3.973852, ppl: 53.189018 +epoch: 0, batch: 11322, sum loss: 6336.069336, avg loss: 3.668830, ppl: 39.206013 +epoch: 0, batch: 11323, sum loss: 7906.559570, avg loss: 3.914139, ppl: 50.105888 +epoch: 0, batch: 11324, sum loss: 5953.955566, avg loss: 3.657221, ppl: 38.753490 +epoch: 0, batch: 11325, sum loss: 5927.699219, avg loss: 3.887016, ppl: 48.765144 +epoch: 0, batch: 11326, sum loss: 4600.712402, avg loss: 3.235381, ppl: 25.416063 +epoch: 0, batch: 11327, sum loss: 7493.622559, avg loss: 3.925418, ppl: 50.674252 +epoch: 0, batch: 11328, sum loss: 6512.763184, avg loss: 3.669162, ppl: 39.219028 +epoch: 0, batch: 11329, sum loss: 5525.852051, avg loss: 3.349001, ppl: 28.474277 +epoch: 0, batch: 11330, sum loss: 6451.075684, avg loss: 3.812692, ppl: 45.272167 +epoch: 0, batch: 11331, sum loss: 5594.334961, avg loss: 3.522881, ppl: 33.881905 +epoch: 0, batch: 11332, sum loss: 6636.526855, avg loss: 3.726293, ppl: 41.524872 +epoch: 0, batch: 11333, sum loss: 7296.765625, avg loss: 3.897845, ppl: 49.296104 +epoch: 0, batch: 11334, sum loss: 4980.970215, avg loss: 3.283434, ppl: 26.667202 +epoch: 0, batch: 11335, sum loss: 6141.554199, avg loss: 3.367080, ppl: 28.993748 +epoch: 0, batch: 11336, sum loss: 7398.743652, avg loss: 3.922982, ppl: 50.550961 +epoch: 0, batch: 11337, sum loss: 5720.889648, avg loss: 3.650855, ppl: 38.507580 +epoch: 0, batch: 11338, sum loss: 5505.824219, avg loss: 3.438991, ppl: 31.155500 +epoch: 0, batch: 11339, sum loss: 5318.818359, avg loss: 3.550613, ppl: 34.834663 +epoch: 0, batch: 11340, sum loss: 5371.699707, avg loss: 3.421465, ppl: 30.614229 +epoch: 0, batch: 11341, sum loss: 6206.562988, avg loss: 3.583466, ppl: 35.998089 +epoch: 0, batch: 11342, sum loss: 6184.118164, avg loss: 3.515701, ppl: 33.639511 +epoch: 0, batch: 11343, sum loss: 5651.851562, avg loss: 3.658157, ppl: 38.789772 +epoch: 0, batch: 11344, sum loss: 6011.349609, avg loss: 3.488885, ppl: 32.749424 +epoch: 0, batch: 11345, sum loss: 6741.940430, avg loss: 3.688151, ppl: 39.970886 +epoch: 0, batch: 11346, sum loss: 7082.938477, avg loss: 3.723943, ppl: 41.427402 +epoch: 0, batch: 11347, sum loss: 6936.355957, avg loss: 3.757506, ppl: 42.841442 +epoch: 0, batch: 11348, sum loss: 6266.063965, avg loss: 3.522239, ppl: 33.860172 +epoch: 0, batch: 11349, sum loss: 6822.099121, avg loss: 3.677681, ppl: 39.554577 +epoch: 0, batch: 11350, sum loss: 8042.989258, avg loss: 3.872407, ppl: 48.057919 +epoch: 0, batch: 11351, sum loss: 5431.036621, avg loss: 3.632800, ppl: 37.818573 +epoch: 0, batch: 11352, sum loss: 8091.537109, avg loss: 3.816763, ppl: 45.456821 +epoch: 0, batch: 11353, sum loss: 5651.430176, avg loss: 3.297217, ppl: 27.037292 +epoch: 0, batch: 11354, sum loss: 5560.520020, avg loss: 3.757108, ppl: 42.824409 +epoch: 0, batch: 11355, sum loss: 6628.664062, avg loss: 3.768428, ppl: 43.311909 +epoch: 0, batch: 11356, sum loss: 5582.315430, avg loss: 3.542078, ppl: 34.538624 +epoch: 0, batch: 11357, sum loss: 6236.748047, avg loss: 3.632352, ppl: 37.801617 +epoch: 0, batch: 11358, sum loss: 6168.348633, avg loss: 3.510728, ppl: 33.472614 +epoch: 0, batch: 11359, sum loss: 7200.431641, avg loss: 3.906908, ppl: 49.744904 +epoch: 0, batch: 11360, sum loss: 5462.465820, avg loss: 3.610354, ppl: 36.979149 +epoch: 0, batch: 11361, sum loss: 5141.104004, avg loss: 3.358004, ppl: 28.731787 +epoch: 0, batch: 11362, sum loss: 5483.380859, avg loss: 3.418567, ppl: 30.525631 +epoch: 0, batch: 11363, sum loss: 6133.686523, avg loss: 3.835951, ppl: 46.337490 +epoch: 0, batch: 11364, sum loss: 5542.951660, avg loss: 3.488327, ppl: 32.731144 +epoch: 0, batch: 11365, sum loss: 8580.257812, avg loss: 3.815144, ppl: 45.383274 +epoch: 0, batch: 11366, sum loss: 6067.351562, avg loss: 3.469040, ppl: 32.105919 +epoch: 0, batch: 11367, sum loss: 6454.558594, avg loss: 3.976931, ppl: 53.353024 +epoch: 0, batch: 11368, sum loss: 6487.869629, avg loss: 3.586440, ppl: 36.105316 +epoch: 0, batch: 11369, sum loss: 6640.198242, avg loss: 3.502214, ppl: 33.188866 +epoch: 0, batch: 11370, sum loss: 6250.854980, avg loss: 3.442101, ppl: 31.252537 +epoch: 0, batch: 11371, sum loss: 5627.633789, avg loss: 3.557291, ppl: 35.068054 +epoch: 0, batch: 11372, sum loss: 6340.949707, avg loss: 3.950747, ppl: 51.974190 +epoch: 0, batch: 11373, sum loss: 7835.755859, avg loss: 3.781735, ppl: 43.892147 +epoch: 0, batch: 11374, sum loss: 6067.654785, avg loss: 3.887031, ppl: 48.765865 +epoch: 0, batch: 11375, sum loss: 6665.094727, avg loss: 3.767719, ppl: 43.281227 +epoch: 0, batch: 11376, sum loss: 6607.706055, avg loss: 3.672988, ppl: 39.369381 +epoch: 0, batch: 11377, sum loss: 6100.697754, avg loss: 3.849020, ppl: 46.947056 +epoch: 0, batch: 11378, sum loss: 6886.909668, avg loss: 3.913017, ppl: 50.049713 +epoch: 0, batch: 11379, sum loss: 5713.465332, avg loss: 3.479577, ppl: 32.445995 +epoch: 0, batch: 11380, sum loss: 6165.522949, avg loss: 3.851045, ppl: 47.042202 +epoch: 0, batch: 11381, sum loss: 7068.833984, avg loss: 3.890388, ppl: 48.929844 +epoch: 0, batch: 11382, sum loss: 6546.250000, avg loss: 3.616713, ppl: 37.215034 +epoch: 0, batch: 11383, sum loss: 5507.935059, avg loss: 3.408376, ppl: 30.216118 +epoch: 0, batch: 11384, sum loss: 6271.401367, avg loss: 3.902552, ppl: 49.528702 +epoch: 0, batch: 11385, sum loss: 6371.429688, avg loss: 3.772309, ppl: 43.480347 +epoch: 0, batch: 11386, sum loss: 6912.339844, avg loss: 3.756706, ppl: 42.807198 +epoch: 0, batch: 11387, sum loss: 5861.367188, avg loss: 3.580554, ppl: 35.893429 +epoch: 0, batch: 11388, sum loss: 6367.270996, avg loss: 3.736661, ppl: 41.957676 +epoch: 0, batch: 11389, sum loss: 5841.646484, avg loss: 3.583832, ppl: 36.011280 +epoch: 0, batch: 11390, sum loss: 6051.804688, avg loss: 3.663320, ppl: 38.990589 +epoch: 0, batch: 11391, sum loss: 7272.664062, avg loss: 3.625456, ppl: 37.541824 +epoch: 0, batch: 11392, sum loss: 5853.746094, avg loss: 3.455576, ppl: 31.676535 +epoch: 0, batch: 11393, sum loss: 6398.364746, avg loss: 3.786015, ppl: 44.080368 +epoch: 0, batch: 11394, sum loss: 6345.572266, avg loss: 3.626041, ppl: 37.563812 +epoch: 0, batch: 11395, sum loss: 5989.846680, avg loss: 3.444420, ppl: 31.325121 +epoch: 0, batch: 11396, sum loss: 5947.955078, avg loss: 3.568059, ppl: 35.447731 +epoch: 0, batch: 11397, sum loss: 5911.596680, avg loss: 3.613446, ppl: 37.093639 +epoch: 0, batch: 11398, sum loss: 7054.697266, avg loss: 3.770549, ppl: 43.403900 +epoch: 0, batch: 11399, sum loss: 5902.715820, avg loss: 3.581745, ppl: 35.936199 +epoch: 0, batch: 11400, sum loss: 5925.308594, avg loss: 3.624042, ppl: 37.488785 +epoch: 0, batch: 11401, sum loss: 5753.972168, avg loss: 3.517098, ppl: 33.686527 +epoch: 0, batch: 11402, sum loss: 7916.858398, avg loss: 3.865654, ppl: 47.734459 +epoch: 0, batch: 11403, sum loss: 6220.812012, avg loss: 3.560854, ppl: 35.193237 +epoch: 0, batch: 11404, sum loss: 6338.908691, avg loss: 3.589416, ppl: 36.212921 +epoch: 0, batch: 11405, sum loss: 6485.476562, avg loss: 3.881195, ppl: 48.482121 +epoch: 0, batch: 11406, sum loss: 6565.110352, avg loss: 3.917130, ppl: 50.256027 +epoch: 0, batch: 11407, sum loss: 6606.993652, avg loss: 3.588807, ppl: 36.190880 +epoch: 0, batch: 11408, sum loss: 5747.160156, avg loss: 3.658282, ppl: 38.794621 +epoch: 0, batch: 11409, sum loss: 6084.580566, avg loss: 3.798115, ppl: 44.617004 +epoch: 0, batch: 11410, sum loss: 6457.849609, avg loss: 3.542430, ppl: 34.550774 +epoch: 0, batch: 11411, sum loss: 5367.570312, avg loss: 3.342198, ppl: 28.281225 +epoch: 0, batch: 11412, sum loss: 5160.109863, avg loss: 3.451578, ppl: 31.550152 +epoch: 0, batch: 11413, sum loss: 6257.306641, avg loss: 3.380501, ppl: 29.385483 +epoch: 0, batch: 11414, sum loss: 7405.340820, avg loss: 3.704523, ppl: 40.630657 +epoch: 0, batch: 11415, sum loss: 5694.701172, avg loss: 3.420241, ppl: 30.576786 +epoch: 0, batch: 11416, sum loss: 5997.358887, avg loss: 3.561377, ppl: 35.211651 +epoch: 0, batch: 11417, sum loss: 5477.117188, avg loss: 3.651412, ppl: 38.529011 +epoch: 0, batch: 11418, sum loss: 6882.326172, avg loss: 3.618468, ppl: 37.280422 +epoch: 0, batch: 11419, sum loss: 5449.139160, avg loss: 3.568526, ppl: 35.464283 +epoch: 0, batch: 11420, sum loss: 7088.898926, avg loss: 3.786805, ppl: 44.115234 +epoch: 0, batch: 11421, sum loss: 5943.721191, avg loss: 3.694047, ppl: 40.207226 +epoch: 0, batch: 11422, sum loss: 6700.135742, avg loss: 3.718166, ppl: 41.188801 +epoch: 0, batch: 11423, sum loss: 6890.828613, avg loss: 3.712731, ppl: 40.965527 +epoch: 0, batch: 11424, sum loss: 5600.076660, avg loss: 3.537635, ppl: 34.385506 +epoch: 0, batch: 11425, sum loss: 6079.836914, avg loss: 3.766937, ppl: 43.247417 +epoch: 0, batch: 11426, sum loss: 7173.050781, avg loss: 3.687944, ppl: 39.962597 +epoch: 0, batch: 11427, sum loss: 6781.628418, avg loss: 3.711893, ppl: 40.931221 +epoch: 0, batch: 11428, sum loss: 6616.422363, avg loss: 3.498901, ppl: 33.079090 +epoch: 0, batch: 11429, sum loss: 7013.435547, avg loss: 3.748496, ppl: 42.457172 +epoch: 0, batch: 11430, sum loss: 5808.880371, avg loss: 3.590161, ppl: 36.239906 +epoch: 0, batch: 11431, sum loss: 6634.887207, avg loss: 3.902875, ppl: 49.544682 +epoch: 0, batch: 11432, sum loss: 6564.875000, avg loss: 3.698521, ppl: 40.387524 +epoch: 0, batch: 11433, sum loss: 6619.864746, avg loss: 3.591896, ppl: 36.302853 +epoch: 0, batch: 11434, sum loss: 6323.822754, avg loss: 3.715524, ppl: 41.080124 +epoch: 0, batch: 11435, sum loss: 5856.482422, avg loss: 3.612882, ppl: 37.072758 +epoch: 0, batch: 11436, sum loss: 6628.903320, avg loss: 3.715753, ppl: 41.089520 +epoch: 0, batch: 11437, sum loss: 6010.921875, avg loss: 3.785215, ppl: 44.045155 +epoch: 0, batch: 11438, sum loss: 6066.456543, avg loss: 3.613137, ppl: 37.082180 +epoch: 0, batch: 11439, sum loss: 6573.679688, avg loss: 3.760687, ppl: 42.977924 +epoch: 0, batch: 11440, sum loss: 6445.474121, avg loss: 3.670543, ppl: 39.273243 +epoch: 0, batch: 11441, sum loss: 6231.643555, avg loss: 3.637854, ppl: 38.010166 +epoch: 0, batch: 11442, sum loss: 5570.990723, avg loss: 3.424088, ppl: 30.694632 +epoch: 0, batch: 11443, sum loss: 5571.939453, avg loss: 3.418368, ppl: 30.519562 +epoch: 0, batch: 11444, sum loss: 6316.442383, avg loss: 3.655349, ppl: 38.681000 +epoch: 0, batch: 11445, sum loss: 6593.190918, avg loss: 3.695735, ppl: 40.275162 +epoch: 0, batch: 11446, sum loss: 5171.054199, avg loss: 3.472837, ppl: 32.228043 +epoch: 0, batch: 11447, sum loss: 5839.854004, avg loss: 3.670556, ppl: 39.273720 +epoch: 0, batch: 11448, sum loss: 5262.797852, avg loss: 3.360663, ppl: 28.808275 +epoch: 0, batch: 11449, sum loss: 7170.918945, avg loss: 3.603477, ppl: 36.725700 +epoch: 0, batch: 11450, sum loss: 6652.852539, avg loss: 3.698084, ppl: 40.369865 +epoch: 0, batch: 11451, sum loss: 5230.690430, avg loss: 3.592507, ppl: 36.325043 +epoch: 0, batch: 11452, sum loss: 5939.580566, avg loss: 3.435269, ppl: 31.039772 +epoch: 0, batch: 11453, sum loss: 5778.697754, avg loss: 3.498001, ppl: 33.049324 +epoch: 0, batch: 11454, sum loss: 6459.867188, avg loss: 3.727563, ppl: 41.577667 +epoch: 0, batch: 11455, sum loss: 6325.983398, avg loss: 3.705907, ppl: 40.686939 +epoch: 0, batch: 11456, sum loss: 5400.394531, avg loss: 3.562266, ppl: 35.242954 +epoch: 0, batch: 11457, sum loss: 5565.386719, avg loss: 3.632759, ppl: 37.817013 +epoch: 0, batch: 11458, sum loss: 5989.384277, avg loss: 3.474121, ppl: 32.269455 +epoch: 0, batch: 11459, sum loss: 6469.379395, avg loss: 3.539048, ppl: 34.434120 +epoch: 0, batch: 11460, sum loss: 6764.219727, avg loss: 3.815127, ppl: 45.382515 +epoch: 0, batch: 11461, sum loss: 6002.890137, avg loss: 3.590245, ppl: 36.242973 +epoch: 0, batch: 11462, sum loss: 6590.090820, avg loss: 3.700220, ppl: 40.456188 +epoch: 0, batch: 11463, sum loss: 6039.193359, avg loss: 3.734814, ppl: 41.880222 +epoch: 0, batch: 11464, sum loss: 5757.082520, avg loss: 3.441173, ppl: 31.223564 +epoch: 0, batch: 11465, sum loss: 6101.915039, avg loss: 3.553823, ppl: 34.946678 +epoch: 0, batch: 11466, sum loss: 6908.759766, avg loss: 3.796022, ppl: 44.523712 +epoch: 0, batch: 11467, sum loss: 7078.492188, avg loss: 3.830353, ppl: 46.078804 +epoch: 0, batch: 11468, sum loss: 6126.536133, avg loss: 3.480986, ppl: 32.491756 +epoch: 0, batch: 11469, sum loss: 5808.438965, avg loss: 3.574424, ppl: 35.674068 +epoch: 0, batch: 11470, sum loss: 6607.175781, avg loss: 3.475632, ppl: 32.318237 +epoch: 0, batch: 11471, sum loss: 4915.849121, avg loss: 3.428068, ppl: 30.817038 +epoch: 0, batch: 11472, sum loss: 5913.073730, avg loss: 3.490598, ppl: 32.805573 +epoch: 0, batch: 11473, sum loss: 6120.005859, avg loss: 3.974030, ppl: 53.198479 +epoch: 0, batch: 11474, sum loss: 7429.006836, avg loss: 3.847233, ppl: 46.863216 +epoch: 0, batch: 11475, sum loss: 5938.975586, avg loss: 3.539318, ppl: 34.443424 +epoch: 0, batch: 11476, sum loss: 5720.482422, avg loss: 3.582018, ppl: 35.946003 +epoch: 0, batch: 11477, sum loss: 6895.164062, avg loss: 3.671546, ppl: 39.312645 +epoch: 0, batch: 11478, sum loss: 7238.579102, avg loss: 3.931874, ppl: 51.002445 +epoch: 0, batch: 11479, sum loss: 6020.553223, avg loss: 3.680045, ppl: 39.648174 +epoch: 0, batch: 11480, sum loss: 6950.727539, avg loss: 3.812796, ppl: 45.276871 +epoch: 0, batch: 11481, sum loss: 6923.371094, avg loss: 3.839917, ppl: 46.521633 +epoch: 0, batch: 11482, sum loss: 6242.347656, avg loss: 3.540753, ppl: 34.492886 +epoch: 0, batch: 11483, sum loss: 6197.614258, avg loss: 3.729010, ppl: 41.637859 +epoch: 0, batch: 11484, sum loss: 7158.030273, avg loss: 3.771354, ppl: 43.438862 +epoch: 0, batch: 11485, sum loss: 5333.847656, avg loss: 3.474819, ppl: 32.291988 +epoch: 0, batch: 11486, sum loss: 5892.208496, avg loss: 3.338362, ppl: 28.172934 +epoch: 0, batch: 11487, sum loss: 6280.616211, avg loss: 3.834320, ppl: 46.261951 +epoch: 0, batch: 11488, sum loss: 5939.976562, avg loss: 3.400101, ppl: 29.967125 +epoch: 0, batch: 11489, sum loss: 7285.475098, avg loss: 3.950908, ppl: 51.982567 +epoch: 0, batch: 11490, sum loss: 6773.652832, avg loss: 3.671356, ppl: 39.305183 +epoch: 0, batch: 11491, sum loss: 5848.726562, avg loss: 3.466939, ppl: 32.038536 +epoch: 0, batch: 11492, sum loss: 5730.250000, avg loss: 3.645197, ppl: 38.290321 +epoch: 0, batch: 11493, sum loss: 5978.647461, avg loss: 3.619036, ppl: 37.301598 +epoch: 0, batch: 11494, sum loss: 6933.047363, avg loss: 3.683872, ppl: 39.800201 +epoch: 0, batch: 11495, sum loss: 6572.166504, avg loss: 3.749097, ppl: 42.482697 +epoch: 0, batch: 11496, sum loss: 6652.081543, avg loss: 3.659011, ppl: 38.822914 +epoch: 0, batch: 11497, sum loss: 6124.238770, avg loss: 3.487608, ppl: 32.707607 +epoch: 0, batch: 11498, sum loss: 6249.891602, avg loss: 3.771811, ppl: 43.458717 +epoch: 0, batch: 11499, sum loss: 6463.334473, avg loss: 3.627012, ppl: 37.600281 +epoch: 0, batch: 11500, sum loss: 5867.973633, avg loss: 3.405673, ppl: 30.134556 +epoch: 0, batch: 11501, sum loss: 6650.974609, avg loss: 3.873602, ppl: 48.115383 +epoch: 0, batch: 11502, sum loss: 6219.146484, avg loss: 3.651877, ppl: 38.546947 +epoch: 0, batch: 11503, sum loss: 6788.207031, avg loss: 3.960448, ppl: 52.480808 +epoch: 0, batch: 11504, sum loss: 7401.922852, avg loss: 3.653466, ppl: 38.608269 +epoch: 0, batch: 11505, sum loss: 5781.988281, avg loss: 3.652551, ppl: 38.572937 +epoch: 0, batch: 11506, sum loss: 5302.970215, avg loss: 3.405890, ppl: 30.141102 +epoch: 0, batch: 11507, sum loss: 6580.839844, avg loss: 3.786444, ppl: 44.099312 +epoch: 0, batch: 11508, sum loss: 7010.405273, avg loss: 4.049916, ppl: 57.392651 +epoch: 0, batch: 11509, sum loss: 5720.923828, avg loss: 3.465127, ppl: 31.980513 +epoch: 0, batch: 11510, sum loss: 6722.128906, avg loss: 3.812892, ppl: 45.281212 +epoch: 0, batch: 11511, sum loss: 5130.253418, avg loss: 3.525947, ppl: 33.985954 +epoch: 0, batch: 11512, sum loss: 6418.025879, avg loss: 3.692765, ppl: 40.155735 +epoch: 0, batch: 11513, sum loss: 6356.811523, avg loss: 3.571243, ppl: 35.560753 +epoch: 0, batch: 11514, sum loss: 5565.730469, avg loss: 3.435636, ppl: 31.051163 +epoch: 0, batch: 11515, sum loss: 5544.498535, avg loss: 3.443788, ppl: 31.305313 +epoch: 0, batch: 11516, sum loss: 5211.954102, avg loss: 3.654947, ppl: 38.665462 +epoch: 0, batch: 11517, sum loss: 5976.070312, avg loss: 3.650623, ppl: 38.498657 +epoch: 0, batch: 11518, sum loss: 6593.266602, avg loss: 3.894428, ppl: 49.127956 +epoch: 0, batch: 11519, sum loss: 6044.997070, avg loss: 3.579039, ppl: 35.839085 +epoch: 0, batch: 11520, sum loss: 6054.008789, avg loss: 3.750935, ppl: 42.560852 +epoch: 0, batch: 11521, sum loss: 6430.538574, avg loss: 3.704227, ppl: 40.618645 +epoch: 0, batch: 11522, sum loss: 5226.979492, avg loss: 3.470770, ppl: 32.161503 +epoch: 0, batch: 11523, sum loss: 5967.838867, avg loss: 3.569282, ppl: 35.491096 +epoch: 0, batch: 11524, sum loss: 6259.590332, avg loss: 3.626646, ppl: 37.586525 +epoch: 0, batch: 11525, sum loss: 6273.192383, avg loss: 3.615673, ppl: 37.176350 +epoch: 0, batch: 11526, sum loss: 5596.430176, avg loss: 3.517555, ppl: 33.701935 +epoch: 0, batch: 11527, sum loss: 5688.355469, avg loss: 3.230185, ppl: 25.284328 +epoch: 0, batch: 11528, sum loss: 5610.781738, avg loss: 3.539925, ppl: 34.464344 +epoch: 0, batch: 11529, sum loss: 6760.590332, avg loss: 3.710533, ppl: 40.875572 +epoch: 0, batch: 11530, sum loss: 4736.673828, avg loss: 3.615782, ppl: 37.180393 +epoch: 0, batch: 11531, sum loss: 6142.948242, avg loss: 3.652169, ppl: 38.558208 +epoch: 0, batch: 11532, sum loss: 6049.122559, avg loss: 3.657269, ppl: 38.755356 +epoch: 0, batch: 11533, sum loss: 5650.183105, avg loss: 3.424353, ppl: 30.702785 +epoch: 0, batch: 11534, sum loss: 5812.278809, avg loss: 3.327006, ppl: 27.854809 +epoch: 0, batch: 11535, sum loss: 6542.654297, avg loss: 3.653073, ppl: 38.593082 +epoch: 0, batch: 11536, sum loss: 5870.177734, avg loss: 3.531996, ppl: 34.192154 +epoch: 0, batch: 11537, sum loss: 6838.394531, avg loss: 3.680514, ppl: 39.666763 +epoch: 0, batch: 11538, sum loss: 6346.478516, avg loss: 3.616227, ppl: 37.196957 +epoch: 0, batch: 11539, sum loss: 6067.572754, avg loss: 3.631103, ppl: 37.754436 +epoch: 0, batch: 11540, sum loss: 6554.115234, avg loss: 3.713380, ppl: 40.992119 +epoch: 0, batch: 11541, sum loss: 6597.640625, avg loss: 3.759339, ppl: 42.920059 +epoch: 0, batch: 11542, sum loss: 5878.558594, avg loss: 3.457976, ppl: 31.752640 +epoch: 0, batch: 11543, sum loss: 6723.708008, avg loss: 3.745798, ppl: 42.342796 +epoch: 0, batch: 11544, sum loss: 6973.672852, avg loss: 3.735229, ppl: 41.897621 +epoch: 0, batch: 11545, sum loss: 6502.383789, avg loss: 3.715648, ppl: 41.085197 +epoch: 0, batch: 11546, sum loss: 6479.414551, avg loss: 3.721663, ppl: 41.333054 +epoch: 0, batch: 11547, sum loss: 5789.659180, avg loss: 3.496171, ppl: 32.988895 +epoch: 0, batch: 11548, sum loss: 6575.966797, avg loss: 3.923608, ppl: 50.582623 +epoch: 0, batch: 11549, sum loss: 6359.147949, avg loss: 3.574563, ppl: 35.679028 +epoch: 0, batch: 11550, sum loss: 6399.850586, avg loss: 3.549557, ppl: 34.797890 +epoch: 0, batch: 11551, sum loss: 5215.701660, avg loss: 3.474818, ppl: 32.291950 +epoch: 0, batch: 11552, sum loss: 5991.207520, avg loss: 3.602650, ppl: 36.695362 +epoch: 0, batch: 11553, sum loss: 5392.843262, avg loss: 3.590441, ppl: 36.250076 +epoch: 0, batch: 11554, sum loss: 7219.143555, avg loss: 3.613185, ppl: 37.083973 +epoch: 0, batch: 11555, sum loss: 6053.931152, avg loss: 3.633812, ppl: 37.856861 +epoch: 0, batch: 11556, sum loss: 6688.122070, avg loss: 3.734295, ppl: 41.858490 +epoch: 0, batch: 11557, sum loss: 6951.884766, avg loss: 3.963446, ppl: 52.638412 +epoch: 0, batch: 11558, sum loss: 6642.115723, avg loss: 3.793327, ppl: 44.403900 +epoch: 0, batch: 11559, sum loss: 6013.125488, avg loss: 3.594217, ppl: 36.387207 +epoch: 0, batch: 11560, sum loss: 6498.258301, avg loss: 3.717539, ppl: 41.162979 +epoch: 0, batch: 11561, sum loss: 5228.504883, avg loss: 3.513780, ppl: 33.574944 +epoch: 0, batch: 11562, sum loss: 5578.939941, avg loss: 3.473811, ppl: 32.259438 +epoch: 0, batch: 11563, sum loss: 5634.056152, avg loss: 3.704179, ppl: 40.616688 +epoch: 0, batch: 11564, sum loss: 6162.081055, avg loss: 3.707630, ppl: 40.757103 +epoch: 0, batch: 11565, sum loss: 5977.522461, avg loss: 3.836664, ppl: 46.370533 +epoch: 0, batch: 11566, sum loss: 5470.542480, avg loss: 3.683867, ppl: 39.799992 +epoch: 0, batch: 11567, sum loss: 6553.348633, avg loss: 3.476578, ppl: 32.348820 +epoch: 0, batch: 11568, sum loss: 6498.182129, avg loss: 3.640438, ppl: 38.108536 +epoch: 0, batch: 11569, sum loss: 5804.535156, avg loss: 3.494603, ppl: 32.937199 +epoch: 0, batch: 11570, sum loss: 5752.289062, avg loss: 3.796890, ppl: 44.562401 +epoch: 0, batch: 11571, sum loss: 7009.785156, avg loss: 3.772758, ppl: 43.499882 +epoch: 0, batch: 11572, sum loss: 6874.596191, avg loss: 3.859964, ppl: 47.463661 +epoch: 0, batch: 11573, sum loss: 6116.576172, avg loss: 3.525404, ppl: 33.967503 +epoch: 0, batch: 11574, sum loss: 5411.959473, avg loss: 3.397338, ppl: 29.884447 +epoch: 0, batch: 11575, sum loss: 5993.258301, avg loss: 3.411075, ppl: 30.297787 +epoch: 0, batch: 11576, sum loss: 5936.471191, avg loss: 3.525221, ppl: 33.961266 +epoch: 0, batch: 11577, sum loss: 6404.612305, avg loss: 3.712819, ppl: 40.969131 +epoch: 0, batch: 11578, sum loss: 6413.732422, avg loss: 3.587099, ppl: 36.129097 +epoch: 0, batch: 11579, sum loss: 7489.734863, avg loss: 3.823244, ppl: 45.752392 +epoch: 0, batch: 11580, sum loss: 7130.170898, avg loss: 3.845831, ppl: 46.797554 +epoch: 0, batch: 11581, sum loss: 4886.570801, avg loss: 3.367726, ppl: 29.012487 +epoch: 0, batch: 11582, sum loss: 7081.211426, avg loss: 3.614707, ppl: 37.140472 +epoch: 0, batch: 11583, sum loss: 5180.312012, avg loss: 3.531228, ppl: 34.165901 +epoch: 0, batch: 11584, sum loss: 5724.924316, avg loss: 3.593801, ppl: 36.372047 +epoch: 0, batch: 11585, sum loss: 6705.699219, avg loss: 3.765132, ppl: 43.169392 +epoch: 0, batch: 11586, sum loss: 6380.588379, avg loss: 3.827587, ppl: 45.951542 +epoch: 0, batch: 11587, sum loss: 6652.997070, avg loss: 3.702280, ppl: 40.539631 +epoch: 0, batch: 11588, sum loss: 7107.576172, avg loss: 3.854434, ppl: 47.201893 +epoch: 0, batch: 11589, sum loss: 6419.864746, avg loss: 3.485269, ppl: 32.631199 +epoch: 0, batch: 11590, sum loss: 5912.231445, avg loss: 3.600628, ppl: 36.621239 +epoch: 0, batch: 11591, sum loss: 5754.334473, avg loss: 3.600960, ppl: 36.633396 +epoch: 0, batch: 11592, sum loss: 6319.437988, avg loss: 3.613172, ppl: 37.083496 +epoch: 0, batch: 11593, sum loss: 5481.795410, avg loss: 3.679057, ppl: 39.609039 +epoch: 0, batch: 11594, sum loss: 6145.283691, avg loss: 3.444666, ppl: 31.332813 +epoch: 0, batch: 11595, sum loss: 4848.224121, avg loss: 3.316159, ppl: 27.554312 +epoch: 0, batch: 11596, sum loss: 7407.291016, avg loss: 3.933771, ppl: 51.099304 +epoch: 0, batch: 11597, sum loss: 5909.721191, avg loss: 3.523984, ppl: 33.919292 +epoch: 0, batch: 11598, sum loss: 5152.529785, avg loss: 3.590613, ppl: 36.256298 +epoch: 0, batch: 11599, sum loss: 6262.762695, avg loss: 3.743433, ppl: 42.242752 +epoch: 0, batch: 11600, sum loss: 5423.031738, avg loss: 3.296676, ppl: 27.022657 +epoch: 0, batch: 11601, sum loss: 6070.016113, avg loss: 3.598113, ppl: 36.529232 +epoch: 0, batch: 11602, sum loss: 6333.521484, avg loss: 3.412458, ppl: 30.339718 +epoch: 0, batch: 11603, sum loss: 5494.323242, avg loss: 3.519746, ppl: 33.775841 +epoch: 0, batch: 11604, sum loss: 5909.698242, avg loss: 3.583807, ppl: 36.010380 +epoch: 0, batch: 11605, sum loss: 6601.292969, avg loss: 3.509459, ppl: 33.430187 +epoch: 0, batch: 11606, sum loss: 4983.749023, avg loss: 3.320286, ppl: 27.668257 +epoch: 0, batch: 11607, sum loss: 6328.122070, avg loss: 3.618137, ppl: 37.268078 +epoch: 0, batch: 11608, sum loss: 6774.733398, avg loss: 3.742947, ppl: 42.222229 +epoch: 0, batch: 11609, sum loss: 6660.692383, avg loss: 3.541038, ppl: 34.502708 +epoch: 0, batch: 11610, sum loss: 6660.813965, avg loss: 3.606288, ppl: 36.829098 +epoch: 0, batch: 11611, sum loss: 5461.109375, avg loss: 3.317806, ppl: 27.599743 +epoch: 0, batch: 11612, sum loss: 4828.069336, avg loss: 3.300116, ppl: 27.115780 +epoch: 0, batch: 11613, sum loss: 5423.892578, avg loss: 3.354294, ppl: 28.625376 +epoch: 0, batch: 11614, sum loss: 6189.839844, avg loss: 3.626151, ppl: 37.567944 +epoch: 0, batch: 11615, sum loss: 5862.842773, avg loss: 3.422559, ppl: 30.647728 +epoch: 0, batch: 11616, sum loss: 6304.469238, avg loss: 3.837169, ppl: 46.393955 +epoch: 0, batch: 11617, sum loss: 6412.222168, avg loss: 3.616595, ppl: 37.210640 +epoch: 0, batch: 11618, sum loss: 6693.035645, avg loss: 3.732870, ppl: 41.798893 +epoch: 0, batch: 11619, sum loss: 6136.374023, avg loss: 3.931053, ppl: 50.960632 +epoch: 0, batch: 11620, sum loss: 6155.111328, avg loss: 3.696764, ppl: 40.316620 +epoch: 0, batch: 11621, sum loss: 6165.836914, avg loss: 3.665777, ppl: 39.086494 +epoch: 0, batch: 11622, sum loss: 6552.164551, avg loss: 3.720707, ppl: 41.293568 +epoch: 0, batch: 11623, sum loss: 5138.055664, avg loss: 3.507205, ppl: 33.354919 +epoch: 0, batch: 11624, sum loss: 6028.015625, avg loss: 3.624784, ppl: 37.516617 +epoch: 0, batch: 11625, sum loss: 6003.469727, avg loss: 3.470214, ppl: 32.143608 +epoch: 0, batch: 11626, sum loss: 6994.453613, avg loss: 3.870754, ppl: 47.978573 +epoch: 0, batch: 11627, sum loss: 7602.749023, avg loss: 3.786230, ppl: 44.089851 +epoch: 0, batch: 11628, sum loss: 5306.292480, avg loss: 3.390602, ppl: 29.683821 +epoch: 0, batch: 11629, sum loss: 7530.989746, avg loss: 3.930579, ppl: 50.936470 +epoch: 0, batch: 11630, sum loss: 5026.710449, avg loss: 3.515182, ppl: 33.622055 +epoch: 0, batch: 11631, sum loss: 7682.619629, avg loss: 3.718596, ppl: 41.206509 +epoch: 0, batch: 11632, sum loss: 6067.272461, avg loss: 3.470980, ppl: 32.168251 +epoch: 0, batch: 11633, sum loss: 5302.717773, avg loss: 3.434403, ppl: 31.012890 +epoch: 0, batch: 11634, sum loss: 5467.558105, avg loss: 3.484741, ppl: 32.613972 +epoch: 0, batch: 11635, sum loss: 6045.140625, avg loss: 3.450423, ppl: 31.513720 +epoch: 0, batch: 11636, sum loss: 5333.641602, avg loss: 3.577224, ppl: 35.774113 +epoch: 0, batch: 11637, sum loss: 6060.563477, avg loss: 3.684233, ppl: 39.814571 +epoch: 0, batch: 11638, sum loss: 5041.382812, avg loss: 3.753822, ppl: 42.683914 +epoch: 0, batch: 11639, sum loss: 5132.178223, avg loss: 3.407821, ppl: 30.199366 +epoch: 0, batch: 11640, sum loss: 5080.588867, avg loss: 3.487020, ppl: 32.688408 +epoch: 0, batch: 11641, sum loss: 6579.391602, avg loss: 3.688000, ppl: 39.964836 +epoch: 0, batch: 11642, sum loss: 5460.259277, avg loss: 3.671997, ppl: 39.330353 +epoch: 0, batch: 11643, sum loss: 6033.295898, avg loss: 3.660980, ppl: 38.899464 +epoch: 0, batch: 11644, sum loss: 6172.566406, avg loss: 3.454150, ppl: 31.631397 +epoch: 0, batch: 11645, sum loss: 6114.952148, avg loss: 3.644191, ppl: 38.251808 +epoch: 0, batch: 11646, sum loss: 6039.799805, avg loss: 3.597260, ppl: 36.498093 +epoch: 0, batch: 11647, sum loss: 7762.841309, avg loss: 3.837291, ppl: 46.399632 +epoch: 0, batch: 11648, sum loss: 5623.724609, avg loss: 3.611898, ppl: 37.036270 +epoch: 0, batch: 11649, sum loss: 5927.164551, avg loss: 3.756124, ppl: 42.782303 +epoch: 0, batch: 11650, sum loss: 6300.786621, avg loss: 3.797942, ppl: 44.609303 +epoch: 0, batch: 11651, sum loss: 7557.502441, avg loss: 3.803474, ppl: 44.856735 +epoch: 0, batch: 11652, sum loss: 5294.857422, avg loss: 3.427092, ppl: 30.786993 +epoch: 0, batch: 11653, sum loss: 6279.697266, avg loss: 3.531888, ppl: 34.188473 +epoch: 0, batch: 11654, sum loss: 5953.152832, avg loss: 3.439141, ppl: 31.160173 +epoch: 0, batch: 11655, sum loss: 6409.912598, avg loss: 3.685976, ppl: 39.884041 +epoch: 0, batch: 11656, sum loss: 7655.286621, avg loss: 3.747081, ppl: 42.397144 +epoch: 0, batch: 11657, sum loss: 6638.936035, avg loss: 3.706832, ppl: 40.724575 +epoch: 0, batch: 11658, sum loss: 5811.575195, avg loss: 3.632234, ppl: 37.797173 +epoch: 0, batch: 11659, sum loss: 5525.051270, avg loss: 3.463982, ppl: 31.943928 +epoch: 0, batch: 11660, sum loss: 4932.737305, avg loss: 3.411298, ppl: 30.304562 +epoch: 0, batch: 11661, sum loss: 6183.233398, avg loss: 3.555626, ppl: 35.009727 +epoch: 0, batch: 11662, sum loss: 6725.434570, avg loss: 3.759326, ppl: 42.919498 +epoch: 0, batch: 11663, sum loss: 5351.268066, avg loss: 3.408451, ppl: 30.218403 +epoch: 0, batch: 11664, sum loss: 6211.106445, avg loss: 3.611108, ppl: 37.007046 +epoch: 0, batch: 11665, sum loss: 5966.393555, avg loss: 3.497300, ppl: 33.026150 +epoch: 0, batch: 11666, sum loss: 6624.287598, avg loss: 3.742535, ppl: 42.204861 +epoch: 0, batch: 11667, sum loss: 7216.563965, avg loss: 3.892429, ppl: 49.029842 +epoch: 0, batch: 11668, sum loss: 5908.291504, avg loss: 3.563505, ppl: 35.286667 +epoch: 0, batch: 11669, sum loss: 5935.144043, avg loss: 3.562511, ppl: 35.251610 +epoch: 0, batch: 11670, sum loss: 5926.872070, avg loss: 3.796843, ppl: 44.560299 +epoch: 0, batch: 11671, sum loss: 5996.121582, avg loss: 3.870963, ppl: 47.988571 +epoch: 0, batch: 11672, sum loss: 6330.799805, avg loss: 3.761616, ppl: 43.017887 +epoch: 0, batch: 11673, sum loss: 6127.227539, avg loss: 3.503275, ppl: 33.224072 +epoch: 0, batch: 11674, sum loss: 6280.438965, avg loss: 3.574524, ppl: 35.677639 +epoch: 0, batch: 11675, sum loss: 6437.727539, avg loss: 3.592482, ppl: 36.324123 +epoch: 0, batch: 11676, sum loss: 6268.850098, avg loss: 3.537726, ppl: 34.388626 +epoch: 0, batch: 11677, sum loss: 4805.648438, avg loss: 3.500108, ppl: 33.119030 +epoch: 0, batch: 11678, sum loss: 6263.039062, avg loss: 3.522519, ppl: 33.869652 +epoch: 0, batch: 11679, sum loss: 5657.044922, avg loss: 3.594056, ppl: 36.381351 +epoch: 0, batch: 11680, sum loss: 6337.633789, avg loss: 3.594801, ppl: 36.408443 +epoch: 0, batch: 11681, sum loss: 6202.441406, avg loss: 3.702950, ppl: 40.566811 +epoch: 0, batch: 11682, sum loss: 5368.604980, avg loss: 3.783372, ppl: 43.964046 +epoch: 0, batch: 11683, sum loss: 5846.123047, avg loss: 3.816007, ppl: 45.422489 +epoch: 0, batch: 11684, sum loss: 6525.763672, avg loss: 3.613380, ppl: 37.091198 +epoch: 0, batch: 11685, sum loss: 6749.576660, avg loss: 3.783395, ppl: 43.965061 +epoch: 0, batch: 11686, sum loss: 6765.610352, avg loss: 3.777560, ppl: 43.709259 +epoch: 0, batch: 11687, sum loss: 5528.634766, avg loss: 3.893405, ppl: 49.077709 +epoch: 0, batch: 11688, sum loss: 5579.010742, avg loss: 3.320840, ppl: 27.683592 +epoch: 0, batch: 11689, sum loss: 6846.762695, avg loss: 3.636093, ppl: 37.943291 +epoch: 0, batch: 11690, sum loss: 5377.007812, avg loss: 3.601479, ppl: 36.652397 +epoch: 0, batch: 11691, sum loss: 7182.787109, avg loss: 3.891001, ppl: 48.959846 +epoch: 0, batch: 11692, sum loss: 4826.657227, avg loss: 3.497578, ppl: 33.035339 +epoch: 0, batch: 11693, sum loss: 5501.627930, avg loss: 3.429943, ppl: 30.874870 +epoch: 0, batch: 11694, sum loss: 5355.597168, avg loss: 3.488988, ppl: 32.752800 +epoch: 0, batch: 11695, sum loss: 6832.692871, avg loss: 3.832133, ppl: 46.160896 +epoch: 0, batch: 11696, sum loss: 5385.135742, avg loss: 3.538197, ppl: 34.404842 +epoch: 0, batch: 11697, sum loss: 6160.325195, avg loss: 3.767783, ppl: 43.283993 +epoch: 0, batch: 11698, sum loss: 6291.358887, avg loss: 3.642941, ppl: 38.204029 +epoch: 0, batch: 11699, sum loss: 5685.018555, avg loss: 3.623339, ppl: 37.462437 +epoch: 0, batch: 11700, sum loss: 5548.154297, avg loss: 3.595693, ppl: 36.440948 +epoch: 0, batch: 11701, sum loss: 5308.670898, avg loss: 3.565259, ppl: 35.348614 +epoch: 0, batch: 11702, sum loss: 6537.393555, avg loss: 3.658307, ppl: 38.795589 +epoch: 0, batch: 11703, sum loss: 6558.283203, avg loss: 3.734786, ppl: 41.879044 +epoch: 0, batch: 11704, sum loss: 5883.379883, avg loss: 3.550622, ppl: 34.834961 +epoch: 0, batch: 11705, sum loss: 4864.369141, avg loss: 3.359371, ppl: 28.771093 +epoch: 0, batch: 11706, sum loss: 6470.647949, avg loss: 3.947924, ppl: 51.827667 +epoch: 0, batch: 11707, sum loss: 7513.124512, avg loss: 3.904950, ppl: 49.647617 +epoch: 0, batch: 11708, sum loss: 6383.476562, avg loss: 3.633168, ppl: 37.832489 +epoch: 0, batch: 11709, sum loss: 6387.598633, avg loss: 3.909179, ppl: 49.858013 +epoch: 0, batch: 11710, sum loss: 6297.716797, avg loss: 3.598695, ppl: 36.550514 +epoch: 0, batch: 11711, sum loss: 5228.002930, avg loss: 3.392604, ppl: 29.743307 +epoch: 0, batch: 11712, sum loss: 6216.551758, avg loss: 3.820868, ppl: 45.643787 +epoch: 0, batch: 11713, sum loss: 6215.311035, avg loss: 3.617760, ppl: 37.254013 +epoch: 0, batch: 11714, sum loss: 5722.097168, avg loss: 3.682173, ppl: 39.732651 +epoch: 0, batch: 11715, sum loss: 6316.177734, avg loss: 3.388507, ppl: 29.621704 +epoch: 0, batch: 11716, sum loss: 5745.486816, avg loss: 3.503346, ppl: 33.226433 +epoch: 0, batch: 11717, sum loss: 6086.528320, avg loss: 3.642447, ppl: 38.185154 +epoch: 0, batch: 11718, sum loss: 6903.195801, avg loss: 3.976495, ppl: 53.329800 +epoch: 0, batch: 11719, sum loss: 6123.223633, avg loss: 3.485045, ppl: 32.623886 +epoch: 0, batch: 11720, sum loss: 6322.356445, avg loss: 3.384559, ppl: 29.504982 +epoch: 0, batch: 11721, sum loss: 5881.506836, avg loss: 3.713073, ppl: 40.979523 +epoch: 0, batch: 11722, sum loss: 6774.635742, avg loss: 3.767873, ppl: 43.287884 +epoch: 0, batch: 11723, sum loss: 6349.721680, avg loss: 3.555275, ppl: 34.997459 +epoch: 0, batch: 11724, sum loss: 7204.678711, avg loss: 3.721425, ppl: 41.323231 +epoch: 0, batch: 11725, sum loss: 6428.980957, avg loss: 3.686342, ppl: 39.898640 +epoch: 0, batch: 11726, sum loss: 7685.782715, avg loss: 3.951559, ppl: 52.016415 +epoch: 0, batch: 11727, sum loss: 6669.773438, avg loss: 3.772496, ppl: 43.488499 +epoch: 0, batch: 11728, sum loss: 5657.142578, avg loss: 3.436903, ppl: 31.090528 +epoch: 0, batch: 11729, sum loss: 4917.945312, avg loss: 3.366150, ppl: 28.966795 +epoch: 0, batch: 11730, sum loss: 6794.431641, avg loss: 3.830006, ppl: 46.062832 +epoch: 0, batch: 11731, sum loss: 7393.169922, avg loss: 4.132571, ppl: 62.338001 +epoch: 0, batch: 11732, sum loss: 6102.591309, avg loss: 3.714298, ppl: 41.029766 +epoch: 0, batch: 11733, sum loss: 5211.449219, avg loss: 3.557303, ppl: 35.068497 +epoch: 0, batch: 11734, sum loss: 5602.158203, avg loss: 3.554669, ppl: 34.976238 +epoch: 0, batch: 11735, sum loss: 5563.580566, avg loss: 3.701650, ppl: 40.514114 +epoch: 0, batch: 11736, sum loss: 5072.901367, avg loss: 3.455655, ppl: 31.679028 +epoch: 0, batch: 11737, sum loss: 5627.790039, avg loss: 3.666313, ppl: 39.107437 +epoch: 0, batch: 11738, sum loss: 6558.819336, avg loss: 3.701365, ppl: 40.502571 +epoch: 0, batch: 11739, sum loss: 6135.221680, avg loss: 3.757025, ppl: 42.820835 +epoch: 0, batch: 11740, sum loss: 6605.193359, avg loss: 3.929324, ppl: 50.872559 +epoch: 0, batch: 11741, sum loss: 6247.211914, avg loss: 3.707544, ppl: 40.753597 +epoch: 0, batch: 11742, sum loss: 6239.004395, avg loss: 3.414890, ppl: 30.413612 +epoch: 0, batch: 11743, sum loss: 6105.443848, avg loss: 3.531199, ppl: 34.164921 +epoch: 0, batch: 11744, sum loss: 7016.854492, avg loss: 3.677597, ppl: 39.551220 +epoch: 0, batch: 11745, sum loss: 6442.931641, avg loss: 3.673279, ppl: 39.380825 +epoch: 0, batch: 11746, sum loss: 6913.223633, avg loss: 3.628989, ppl: 37.674698 +epoch: 0, batch: 11747, sum loss: 5884.020020, avg loss: 3.677512, ppl: 39.547894 +epoch: 0, batch: 11748, sum loss: 7314.644531, avg loss: 3.557707, ppl: 35.082645 +epoch: 0, batch: 11749, sum loss: 6474.786621, avg loss: 3.762223, ppl: 43.044025 +epoch: 0, batch: 11750, sum loss: 5996.343262, avg loss: 3.508685, ppl: 33.404324 +epoch: 0, batch: 11751, sum loss: 5180.027832, avg loss: 3.547964, ppl: 34.742523 +epoch: 0, batch: 11752, sum loss: 6818.094238, avg loss: 3.771070, ppl: 43.426495 +epoch: 0, batch: 11753, sum loss: 5004.811523, avg loss: 3.468338, ppl: 32.083366 +epoch: 0, batch: 11754, sum loss: 5800.376953, avg loss: 3.536815, ppl: 34.357323 +epoch: 0, batch: 11755, sum loss: 4895.636230, avg loss: 3.472083, ppl: 32.203739 +epoch: 0, batch: 11756, sum loss: 4227.200684, avg loss: 3.065410, ppl: 21.443254 +epoch: 0, batch: 11757, sum loss: 5854.361816, avg loss: 3.679674, ppl: 39.633488 +epoch: 0, batch: 11758, sum loss: 5995.579590, avg loss: 3.770805, ppl: 43.414986 +epoch: 0, batch: 11759, sum loss: 7700.479492, avg loss: 3.861825, ppl: 47.552067 +epoch: 0, batch: 11760, sum loss: 6020.771973, avg loss: 3.620428, ppl: 37.353554 +epoch: 0, batch: 11761, sum loss: 7101.845703, avg loss: 3.810003, ppl: 45.150578 +epoch: 0, batch: 11762, sum loss: 6167.686035, avg loss: 3.938497, ppl: 51.341370 +epoch: 0, batch: 11763, sum loss: 5611.010254, avg loss: 3.542305, ppl: 34.546440 +epoch: 0, batch: 11764, sum loss: 5054.797363, avg loss: 3.481266, ppl: 32.500828 +epoch: 0, batch: 11765, sum loss: 6188.122070, avg loss: 3.576949, ppl: 35.764263 +epoch: 0, batch: 11766, sum loss: 7067.242188, avg loss: 3.880968, ppl: 48.471104 +epoch: 0, batch: 11767, sum loss: 6447.617188, avg loss: 3.703399, ppl: 40.585007 +epoch: 0, batch: 11768, sum loss: 7087.741211, avg loss: 3.827074, ppl: 45.927959 +epoch: 0, batch: 11769, sum loss: 5950.340332, avg loss: 3.838929, ppl: 46.475693 +epoch: 0, batch: 11770, sum loss: 5758.083496, avg loss: 3.502484, ppl: 33.197807 +epoch: 0, batch: 11771, sum loss: 6780.469238, avg loss: 3.639543, ppl: 38.074444 +epoch: 0, batch: 11772, sum loss: 6130.852539, avg loss: 3.572758, ppl: 35.614681 +epoch: 0, batch: 11773, sum loss: 6001.362305, avg loss: 3.479051, ppl: 32.428928 +epoch: 0, batch: 11774, sum loss: 6743.318359, avg loss: 3.641101, ppl: 38.133785 +epoch: 0, batch: 11775, sum loss: 6686.268555, avg loss: 3.635818, ppl: 37.932861 +epoch: 0, batch: 11776, sum loss: 5888.670410, avg loss: 3.492687, ppl: 32.874168 +epoch: 0, batch: 11777, sum loss: 6097.212402, avg loss: 3.425400, ppl: 30.734943 +epoch: 0, batch: 11778, sum loss: 6258.549316, avg loss: 3.653561, ppl: 38.611904 +epoch: 0, batch: 11779, sum loss: 7139.594727, avg loss: 3.705031, ppl: 40.651314 +epoch: 0, batch: 11780, sum loss: 6945.267090, avg loss: 3.655404, ppl: 38.683140 +epoch: 0, batch: 11781, sum loss: 5534.269531, avg loss: 3.579735, ppl: 35.864021 +epoch: 0, batch: 11782, sum loss: 5595.701660, avg loss: 3.904886, ppl: 49.644432 +epoch: 0, batch: 11783, sum loss: 6854.922852, avg loss: 3.604061, ppl: 36.747150 +epoch: 0, batch: 11784, sum loss: 5144.704590, avg loss: 3.459788, ppl: 31.810226 +epoch: 0, batch: 11785, sum loss: 4853.045898, avg loss: 3.483881, ppl: 32.585934 +epoch: 0, batch: 11786, sum loss: 6491.988281, avg loss: 3.561157, ppl: 35.203896 +epoch: 0, batch: 11787, sum loss: 5687.391113, avg loss: 3.399517, ppl: 29.949619 +epoch: 0, batch: 11788, sum loss: 5442.667969, avg loss: 3.513666, ppl: 33.571102 +epoch: 0, batch: 11789, sum loss: 5933.277344, avg loss: 3.655747, ppl: 38.696415 +epoch: 0, batch: 11790, sum loss: 6638.412598, avg loss: 3.681870, ppl: 39.720619 +epoch: 0, batch: 11791, sum loss: 6555.493652, avg loss: 3.778383, ppl: 43.745228 +epoch: 0, batch: 11792, sum loss: 5418.830566, avg loss: 3.451484, ppl: 31.547188 +epoch: 0, batch: 11793, sum loss: 5944.215820, avg loss: 3.620107, ppl: 37.341568 +epoch: 0, batch: 11794, sum loss: 6860.555176, avg loss: 3.765398, ppl: 43.180889 +epoch: 0, batch: 11795, sum loss: 6104.141113, avg loss: 3.528405, ppl: 34.069592 +epoch: 0, batch: 11796, sum loss: 5952.676758, avg loss: 3.448828, ppl: 31.463488 +epoch: 0, batch: 11797, sum loss: 6297.839844, avg loss: 3.578318, ppl: 35.813248 +epoch: 0, batch: 11798, sum loss: 6418.800293, avg loss: 3.708146, ppl: 40.778118 +epoch: 0, batch: 11799, sum loss: 5320.703613, avg loss: 3.558999, ppl: 35.128017 +epoch: 0, batch: 11800, sum loss: 6266.752930, avg loss: 3.616130, ppl: 37.193336 +epoch: 0, batch: 11801, sum loss: 6521.989746, avg loss: 3.678505, ppl: 39.587185 +epoch: 0, batch: 11802, sum loss: 5426.199219, avg loss: 3.541906, ppl: 34.532665 +epoch: 0, batch: 11803, sum loss: 5728.214355, avg loss: 3.580134, ppl: 35.878345 +epoch: 0, batch: 11804, sum loss: 5479.630859, avg loss: 3.372081, ppl: 29.139090 +epoch: 0, batch: 11805, sum loss: 5564.292969, avg loss: 3.292481, ppl: 26.909542 +epoch: 0, batch: 11806, sum loss: 6622.524902, avg loss: 3.662901, ppl: 38.974239 +epoch: 0, batch: 11807, sum loss: 6518.598633, avg loss: 3.796505, ppl: 44.545216 +epoch: 0, batch: 11808, sum loss: 6801.414062, avg loss: 3.684406, ppl: 39.821472 +epoch: 0, batch: 11809, sum loss: 6123.727051, avg loss: 3.731704, ppl: 41.750198 +epoch: 0, batch: 11810, sum loss: 6200.281250, avg loss: 3.706086, ppl: 40.694202 +epoch: 0, batch: 11811, sum loss: 7155.294922, avg loss: 3.855224, ppl: 47.239182 +epoch: 0, batch: 11812, sum loss: 6537.409180, avg loss: 3.754974, ppl: 42.733093 +epoch: 0, batch: 11813, sum loss: 6736.947754, avg loss: 3.782677, ppl: 43.933502 +epoch: 0, batch: 11814, sum loss: 6598.167969, avg loss: 3.748959, ppl: 42.476845 +epoch: 0, batch: 11815, sum loss: 6758.290039, avg loss: 3.784037, ppl: 43.993290 +epoch: 0, batch: 11816, sum loss: 5962.458008, avg loss: 3.717243, ppl: 41.150791 +epoch: 0, batch: 11817, sum loss: 5486.591309, avg loss: 3.483550, ppl: 32.575153 +epoch: 0, batch: 11818, sum loss: 5448.170898, avg loss: 3.338340, ppl: 28.172323 +epoch: 0, batch: 11819, sum loss: 5996.011230, avg loss: 3.583988, ppl: 36.016880 +epoch: 0, batch: 11820, sum loss: 5672.028320, avg loss: 3.286227, ppl: 26.741783 +epoch: 0, batch: 11821, sum loss: 5684.464355, avg loss: 3.572888, ppl: 35.619301 +epoch: 0, batch: 11822, sum loss: 5876.465820, avg loss: 3.585397, ppl: 36.067673 +epoch: 0, batch: 11823, sum loss: 6166.462891, avg loss: 3.599803, ppl: 36.591034 +epoch: 0, batch: 11824, sum loss: 6706.672852, avg loss: 3.795514, ppl: 44.501099 +epoch: 0, batch: 11825, sum loss: 6661.170898, avg loss: 3.698596, ppl: 40.390537 +epoch: 0, batch: 11826, sum loss: 6330.588867, avg loss: 3.890958, ppl: 48.957767 +epoch: 0, batch: 11827, sum loss: 5098.589355, avg loss: 3.442667, ppl: 31.270237 +epoch: 0, batch: 11828, sum loss: 5156.424805, avg loss: 3.412591, ppl: 30.343756 +epoch: 0, batch: 11829, sum loss: 6005.103516, avg loss: 3.542834, ppl: 34.564732 +epoch: 0, batch: 11830, sum loss: 5664.100098, avg loss: 3.489895, ppl: 32.782516 +epoch: 0, batch: 11831, sum loss: 5694.514160, avg loss: 3.457507, ppl: 31.737753 +epoch: 0, batch: 11832, sum loss: 6811.490234, avg loss: 3.660124, ppl: 38.866154 +epoch: 0, batch: 11833, sum loss: 5699.760254, avg loss: 3.696342, ppl: 40.299637 +epoch: 0, batch: 11834, sum loss: 5965.041016, avg loss: 3.621761, ppl: 37.403389 +epoch: 0, batch: 11835, sum loss: 7047.637207, avg loss: 3.750738, ppl: 42.552490 +epoch: 0, batch: 11836, sum loss: 6437.767090, avg loss: 3.457448, ppl: 31.735868 +epoch: 0, batch: 11837, sum loss: 6122.395996, avg loss: 3.701570, ppl: 40.510838 +epoch: 0, batch: 11838, sum loss: 7003.072266, avg loss: 4.085806, ppl: 59.489891 +epoch: 0, batch: 11839, sum loss: 6499.406250, avg loss: 3.783123, ppl: 43.953114 +epoch: 0, batch: 11840, sum loss: 8033.590332, avg loss: 4.043075, ppl: 57.001358 +epoch: 0, batch: 11841, sum loss: 5773.545410, avg loss: 3.715280, ppl: 41.070099 +epoch: 0, batch: 11842, sum loss: 6663.919922, avg loss: 3.769186, ppl: 43.344749 +epoch: 0, batch: 11843, sum loss: 5610.059082, avg loss: 3.707904, ppl: 40.768280 +epoch: 0, batch: 11844, sum loss: 6987.049805, avg loss: 3.805583, ppl: 44.951427 +epoch: 0, batch: 11845, sum loss: 5643.114746, avg loss: 3.617381, ppl: 37.239922 +epoch: 0, batch: 11846, sum loss: 6539.761230, avg loss: 3.686449, ppl: 39.902912 +epoch: 0, batch: 11847, sum loss: 5957.045898, avg loss: 3.672655, ppl: 39.356243 +epoch: 0, batch: 11848, sum loss: 5788.442871, avg loss: 3.720079, ppl: 41.267651 +epoch: 0, batch: 11849, sum loss: 6314.788086, avg loss: 3.587948, ppl: 36.159786 +epoch: 0, batch: 11850, sum loss: 7399.335938, avg loss: 3.718259, ppl: 41.192631 +epoch: 0, batch: 11851, sum loss: 4590.933105, avg loss: 3.523356, ppl: 33.897999 +epoch: 0, batch: 11852, sum loss: 5570.480957, avg loss: 3.369922, ppl: 29.076258 +epoch: 0, batch: 11853, sum loss: 7310.188965, avg loss: 3.787663, ppl: 44.153084 +epoch: 0, batch: 11854, sum loss: 5540.999512, avg loss: 3.640604, ppl: 38.114861 +epoch: 0, batch: 11855, sum loss: 5867.702637, avg loss: 3.332029, ppl: 27.995075 +epoch: 0, batch: 11856, sum loss: 5468.014160, avg loss: 3.362862, ppl: 28.871714 +epoch: 0, batch: 11857, sum loss: 5817.027832, avg loss: 3.772392, ppl: 43.483936 +epoch: 0, batch: 11858, sum loss: 6739.085938, avg loss: 3.712995, ppl: 40.976349 +epoch: 0, batch: 11859, sum loss: 5578.206543, avg loss: 3.728748, ppl: 41.626953 +epoch: 0, batch: 11860, sum loss: 7381.634277, avg loss: 3.932677, ppl: 51.043427 +epoch: 0, batch: 11861, sum loss: 6003.423828, avg loss: 3.605660, ppl: 36.805965 +epoch: 0, batch: 11862, sum loss: 6136.103027, avg loss: 3.590464, ppl: 36.250896 +epoch: 0, batch: 11863, sum loss: 7000.335938, avg loss: 3.792165, ppl: 44.352299 +epoch: 0, batch: 11864, sum loss: 5799.226074, avg loss: 3.551271, ppl: 34.857601 +epoch: 0, batch: 11865, sum loss: 5173.658691, avg loss: 3.708716, ppl: 40.801392 +epoch: 0, batch: 11866, sum loss: 6491.806152, avg loss: 3.759008, ppl: 42.905830 +epoch: 0, batch: 11867, sum loss: 6617.504883, avg loss: 3.573167, ppl: 35.629238 +epoch: 0, batch: 11868, sum loss: 6579.486328, avg loss: 3.603224, ppl: 36.716412 +epoch: 0, batch: 11869, sum loss: 6643.043945, avg loss: 3.884821, ppl: 48.658230 +epoch: 0, batch: 11870, sum loss: 5777.014648, avg loss: 3.705590, ppl: 40.674038 +epoch: 0, batch: 11871, sum loss: 6243.753906, avg loss: 3.844676, ppl: 46.743538 +epoch: 0, batch: 11872, sum loss: 5365.066406, avg loss: 3.627496, ppl: 37.618492 +epoch: 0, batch: 11873, sum loss: 4893.345215, avg loss: 3.200357, ppl: 24.541283 +epoch: 0, batch: 11874, sum loss: 5993.747559, avg loss: 3.634777, ppl: 37.893406 +epoch: 0, batch: 11875, sum loss: 6224.841797, avg loss: 3.653076, ppl: 38.593204 +epoch: 0, batch: 11876, sum loss: 6276.245117, avg loss: 3.594642, ppl: 36.402672 +epoch: 0, batch: 11877, sum loss: 6450.333984, avg loss: 3.466058, ppl: 32.010319 +epoch: 0, batch: 11878, sum loss: 5908.004883, avg loss: 3.535610, ppl: 34.315956 +epoch: 0, batch: 11879, sum loss: 5489.609375, avg loss: 3.472239, ppl: 32.208763 +epoch: 0, batch: 11880, sum loss: 5564.443359, avg loss: 3.499650, ppl: 33.103855 +epoch: 0, batch: 11881, sum loss: 4957.445312, avg loss: 3.331616, ppl: 27.983538 +epoch: 0, batch: 11882, sum loss: 5514.187988, avg loss: 3.608762, ppl: 36.920311 +epoch: 0, batch: 11883, sum loss: 6188.303711, avg loss: 3.413295, ppl: 30.365133 +epoch: 0, batch: 11884, sum loss: 7006.364746, avg loss: 3.974115, ppl: 53.203007 +epoch: 0, batch: 11885, sum loss: 5099.385742, avg loss: 3.561024, ppl: 35.199211 +epoch: 0, batch: 11886, sum loss: 3909.348877, avg loss: 3.170599, ppl: 23.821760 +epoch: 0, batch: 11887, sum loss: 7746.799316, avg loss: 3.958508, ppl: 52.379093 +epoch: 0, batch: 11888, sum loss: 6457.878906, avg loss: 3.772125, ppl: 43.472366 +epoch: 0, batch: 11889, sum loss: 4682.983398, avg loss: 3.267958, ppl: 26.257664 +epoch: 0, batch: 11890, sum loss: 5454.227539, avg loss: 3.753770, ppl: 42.681683 +epoch: 0, batch: 11891, sum loss: 6602.177246, avg loss: 3.627570, ppl: 37.621292 +epoch: 0, batch: 11892, sum loss: 6158.606445, avg loss: 3.599420, ppl: 36.577000 +epoch: 0, batch: 11893, sum loss: 6261.414551, avg loss: 3.687523, ppl: 39.945793 +epoch: 0, batch: 11894, sum loss: 6233.851562, avg loss: 3.624332, ppl: 37.499672 +epoch: 0, batch: 11895, sum loss: 5675.700684, avg loss: 3.603619, ppl: 36.730934 +epoch: 0, batch: 11896, sum loss: 6554.143066, avg loss: 3.753805, ppl: 42.683178 +epoch: 0, batch: 11897, sum loss: 5832.450195, avg loss: 3.357772, ppl: 28.725130 +epoch: 0, batch: 11898, sum loss: 5707.824707, avg loss: 3.640194, ppl: 38.099236 +epoch: 0, batch: 11899, sum loss: 5596.706055, avg loss: 3.624810, ppl: 37.517586 +epoch: 0, batch: 11900, sum loss: 5791.492188, avg loss: 3.544365, ppl: 34.617695 +epoch: 0, batch: 11901, sum loss: 6505.168945, avg loss: 3.753704, ppl: 42.678864 +epoch: 0, batch: 11902, sum loss: 6011.423828, avg loss: 3.678962, ppl: 39.605263 +epoch: 0, batch: 11903, sum loss: 7557.315918, avg loss: 3.726487, ppl: 41.532951 +epoch: 0, batch: 11904, sum loss: 6583.869629, avg loss: 3.515147, ppl: 33.620861 +epoch: 0, batch: 11905, sum loss: 7273.275391, avg loss: 3.680808, ppl: 39.678425 +epoch: 0, batch: 11906, sum loss: 6327.431152, avg loss: 3.649037, ppl: 38.437645 +epoch: 0, batch: 11907, sum loss: 6212.434570, avg loss: 3.675997, ppl: 39.487999 +epoch: 0, batch: 11908, sum loss: 7522.177246, avg loss: 3.841766, ppl: 46.607704 +epoch: 0, batch: 11909, sum loss: 6521.487305, avg loss: 3.831661, ppl: 46.139111 +epoch: 0, batch: 11910, sum loss: 6290.157227, avg loss: 3.456131, ppl: 31.694099 +epoch: 0, batch: 11911, sum loss: 6732.506836, avg loss: 3.893873, ppl: 49.100708 +epoch: 0, batch: 11912, sum loss: 6514.871094, avg loss: 3.647744, ppl: 38.387974 +epoch: 0, batch: 11913, sum loss: 6856.292969, avg loss: 3.949477, ppl: 51.908234 +epoch: 0, batch: 11914, sum loss: 5961.161133, avg loss: 3.575981, ppl: 35.729668 +epoch: 0, batch: 11915, sum loss: 5909.050781, avg loss: 3.553248, ppl: 34.926563 +epoch: 0, batch: 11916, sum loss: 6839.346680, avg loss: 3.823000, ppl: 45.741215 +epoch: 0, batch: 11917, sum loss: 4952.308594, avg loss: 3.305947, ppl: 27.274353 +epoch: 0, batch: 11918, sum loss: 7205.287109, avg loss: 3.922312, ppl: 50.517120 +epoch: 0, batch: 11919, sum loss: 5869.826172, avg loss: 3.632318, ppl: 37.800339 +epoch: 0, batch: 11920, sum loss: 5738.525879, avg loss: 3.421900, ppl: 30.627546 +epoch: 0, batch: 11921, sum loss: 6409.294922, avg loss: 3.629272, ppl: 37.685379 +epoch: 0, batch: 11922, sum loss: 7006.156738, avg loss: 3.894473, ppl: 49.130146 +epoch: 0, batch: 11923, sum loss: 6379.574219, avg loss: 3.660111, ppl: 38.865673 +epoch: 0, batch: 11924, sum loss: 6892.779297, avg loss: 3.744041, ppl: 42.268452 +epoch: 0, batch: 11925, sum loss: 5095.006348, avg loss: 3.380893, ppl: 29.397024 +epoch: 0, batch: 11926, sum loss: 5032.207031, avg loss: 3.193025, ppl: 24.362001 +epoch: 0, batch: 11927, sum loss: 6685.304688, avg loss: 3.697624, ppl: 40.351322 +epoch: 0, batch: 11928, sum loss: 6810.279785, avg loss: 3.573074, ppl: 35.625942 +epoch: 0, batch: 11929, sum loss: 5353.960449, avg loss: 3.294745, ppl: 26.970528 +epoch: 0, batch: 11930, sum loss: 6828.828125, avg loss: 3.795902, ppl: 44.518387 +epoch: 0, batch: 11931, sum loss: 6534.446289, avg loss: 3.772775, ppl: 43.500610 +epoch: 0, batch: 11932, sum loss: 7078.536621, avg loss: 3.828305, ppl: 45.984539 +epoch: 0, batch: 11933, sum loss: 7055.601562, avg loss: 3.775068, ppl: 43.600452 +epoch: 0, batch: 11934, sum loss: 6913.561523, avg loss: 3.693142, ppl: 40.170864 +epoch: 0, batch: 11935, sum loss: 6506.367188, avg loss: 3.813814, ppl: 45.322979 +epoch: 0, batch: 11936, sum loss: 4752.526855, avg loss: 3.461418, ppl: 31.862122 +epoch: 0, batch: 11937, sum loss: 6087.240234, avg loss: 3.419798, ppl: 30.563238 +epoch: 0, batch: 11938, sum loss: 5981.989258, avg loss: 3.502336, ppl: 33.192886 +epoch: 0, batch: 11939, sum loss: 7607.002930, avg loss: 3.849698, ppl: 46.978867 +epoch: 0, batch: 11940, sum loss: 5970.804199, avg loss: 3.667570, ppl: 39.156643 +epoch: 0, batch: 11941, sum loss: 7249.936523, avg loss: 3.495630, ppl: 32.971054 +epoch: 0, batch: 11942, sum loss: 5330.225586, avg loss: 3.371427, ppl: 29.120039 +epoch: 0, batch: 11943, sum loss: 6637.220703, avg loss: 3.897370, ppl: 49.272686 +epoch: 0, batch: 11944, sum loss: 5880.300781, avg loss: 3.574651, ppl: 35.682156 +epoch: 0, batch: 11945, sum loss: 6432.301758, avg loss: 3.513000, ppl: 33.548748 +epoch: 0, batch: 11946, sum loss: 7248.684570, avg loss: 3.736435, ppl: 41.948196 +epoch: 0, batch: 11947, sum loss: 5372.170898, avg loss: 3.269733, ppl: 26.304308 +epoch: 0, batch: 11948, sum loss: 6129.135254, avg loss: 3.502363, ppl: 33.193794 +epoch: 0, batch: 11949, sum loss: 5247.532227, avg loss: 3.400864, ppl: 29.989990 +epoch: 0, batch: 11950, sum loss: 5246.192383, avg loss: 3.397793, ppl: 29.898050 +epoch: 0, batch: 11951, sum loss: 6078.024414, avg loss: 3.566916, ppl: 35.407219 +epoch: 0, batch: 11952, sum loss: 6867.927734, avg loss: 3.660942, ppl: 38.897980 +epoch: 0, batch: 11953, sum loss: 5919.435059, avg loss: 3.718238, ppl: 41.191753 +epoch: 0, batch: 11954, sum loss: 6195.966309, avg loss: 3.712383, ppl: 40.951260 +epoch: 0, batch: 11955, sum loss: 6678.738770, avg loss: 3.726975, ppl: 41.553219 +epoch: 0, batch: 11956, sum loss: 6875.513184, avg loss: 3.716494, ppl: 41.119957 +epoch: 0, batch: 11957, sum loss: 4708.261719, avg loss: 3.372680, ppl: 29.156574 +epoch: 0, batch: 11958, sum loss: 6502.525391, avg loss: 3.707255, ppl: 40.741814 +epoch: 0, batch: 11959, sum loss: 5924.616211, avg loss: 3.634734, ppl: 37.891769 +epoch: 0, batch: 11960, sum loss: 7003.656250, avg loss: 3.719414, ppl: 41.240211 +epoch: 0, batch: 11961, sum loss: 6225.514160, avg loss: 3.584061, ppl: 36.019524 +epoch: 0, batch: 11962, sum loss: 6802.226074, avg loss: 3.774820, ppl: 43.589676 +epoch: 0, batch: 11963, sum loss: 4699.594238, avg loss: 3.432867, ppl: 30.965281 +epoch: 0, batch: 11964, sum loss: 6033.876465, avg loss: 3.587322, ppl: 36.137188 +epoch: 0, batch: 11965, sum loss: 6320.790527, avg loss: 3.539077, ppl: 34.435104 +epoch: 0, batch: 11966, sum loss: 6388.981445, avg loss: 3.642521, ppl: 38.187973 +epoch: 0, batch: 11967, sum loss: 6302.000000, avg loss: 3.657574, ppl: 38.767178 +epoch: 0, batch: 11968, sum loss: 5819.280762, avg loss: 3.497164, ppl: 33.021671 +epoch: 0, batch: 11969, sum loss: 6312.326660, avg loss: 3.708770, ppl: 40.803589 +epoch: 0, batch: 11970, sum loss: 5971.157227, avg loss: 3.586281, ppl: 36.099556 +epoch: 0, batch: 11971, sum loss: 6607.791016, avg loss: 3.554487, ppl: 34.969868 +epoch: 0, batch: 11972, sum loss: 5896.180664, avg loss: 3.604023, ppl: 36.745747 +epoch: 0, batch: 11973, sum loss: 5770.724609, avg loss: 3.579854, ppl: 35.868305 +epoch: 0, batch: 11974, sum loss: 6246.144531, avg loss: 3.751438, ppl: 42.582279 +epoch: 0, batch: 11975, sum loss: 6691.737793, avg loss: 3.603520, ppl: 36.727303 +epoch: 0, batch: 11976, sum loss: 6379.647949, avg loss: 3.797410, ppl: 44.585537 +epoch: 0, batch: 11977, sum loss: 6578.156250, avg loss: 3.548089, ppl: 34.746838 +epoch: 0, batch: 11978, sum loss: 5872.390137, avg loss: 3.704978, ppl: 40.649151 +epoch: 0, batch: 11979, sum loss: 6139.671875, avg loss: 3.674250, ppl: 39.419094 +epoch: 0, batch: 11980, sum loss: 5009.082520, avg loss: 3.550023, ppl: 34.814121 +epoch: 0, batch: 11981, sum loss: 5258.291504, avg loss: 3.461680, ppl: 31.870464 +epoch: 0, batch: 11982, sum loss: 6289.019043, avg loss: 3.480365, ppl: 32.471558 +epoch: 0, batch: 11983, sum loss: 6659.926758, avg loss: 3.856356, ppl: 47.292709 +epoch: 0, batch: 11984, sum loss: 6614.455078, avg loss: 3.726453, ppl: 41.531548 +epoch: 0, batch: 11985, sum loss: 6162.583008, avg loss: 3.460181, ppl: 31.822750 +epoch: 0, batch: 11986, sum loss: 5590.177246, avg loss: 3.457129, ppl: 31.725761 +epoch: 0, batch: 11987, sum loss: 5482.972656, avg loss: 3.446243, ppl: 31.382269 +epoch: 0, batch: 11988, sum loss: 6905.446289, avg loss: 3.734692, ppl: 41.875141 +epoch: 0, batch: 11989, sum loss: 7054.894531, avg loss: 3.582983, ppl: 35.980728 +epoch: 0, batch: 11990, sum loss: 6832.660645, avg loss: 3.542074, ppl: 34.538475 +epoch: 0, batch: 11991, sum loss: 5995.753906, avg loss: 3.685159, ppl: 39.851456 +epoch: 0, batch: 11992, sum loss: 6251.777832, avg loss: 3.611657, ppl: 37.027344 +epoch: 0, batch: 11993, sum loss: 6125.253906, avg loss: 3.456690, ppl: 31.711824 +epoch: 0, batch: 11994, sum loss: 6255.673340, avg loss: 3.546300, ppl: 34.684753 +epoch: 0, batch: 11995, sum loss: 6890.252441, avg loss: 3.708424, ppl: 40.789486 +epoch: 0, batch: 11996, sum loss: 5456.413574, avg loss: 3.416665, ppl: 30.467630 +epoch: 0, batch: 11997, sum loss: 6336.464844, avg loss: 3.598220, ppl: 36.533134 +epoch: 0, batch: 11998, sum loss: 4541.520020, avg loss: 2.972199, ppl: 19.534828 +epoch: 0, batch: 11999, sum loss: 7162.120117, avg loss: 3.595442, ppl: 36.431801 +epoch: 0, batch: 12000, sum loss: 7010.743164, avg loss: 3.871200, ppl: 47.999966 +epoch: 0, batch: 12001, sum loss: 6356.250488, avg loss: 3.623860, ppl: 37.481964 +epoch: 0, batch: 12002, sum loss: 6925.202148, avg loss: 3.743353, ppl: 42.239368 +epoch: 0, batch: 12003, sum loss: 5373.781738, avg loss: 3.526103, ppl: 33.991261 +epoch: 0, batch: 12004, sum loss: 5712.875000, avg loss: 3.481338, ppl: 32.503166 +epoch: 0, batch: 12005, sum loss: 6870.705078, avg loss: 3.983018, ppl: 53.678776 +epoch: 0, batch: 12006, sum loss: 6209.057129, avg loss: 3.747168, ppl: 42.400826 +epoch: 0, batch: 12007, sum loss: 5773.952148, avg loss: 3.808676, ppl: 45.090687 +epoch: 0, batch: 12008, sum loss: 6595.085449, avg loss: 3.863553, ppl: 47.634308 +epoch: 0, batch: 12009, sum loss: 7331.193359, avg loss: 3.782866, ppl: 43.941788 +epoch: 0, batch: 12010, sum loss: 5833.464844, avg loss: 3.594248, ppl: 36.388329 +epoch: 0, batch: 12011, sum loss: 7076.826172, avg loss: 3.782377, ppl: 43.920296 +epoch: 0, batch: 12012, sum loss: 5897.074219, avg loss: 3.608981, ppl: 36.928391 +epoch: 0, batch: 12013, sum loss: 6781.809570, avg loss: 3.530354, ppl: 34.136051 +epoch: 0, batch: 12014, sum loss: 7274.148438, avg loss: 3.747629, ppl: 42.420391 +epoch: 0, batch: 12015, sum loss: 7326.736816, avg loss: 3.960398, ppl: 52.478230 +epoch: 0, batch: 12016, sum loss: 6116.938965, avg loss: 3.598200, ppl: 36.532402 +epoch: 0, batch: 12017, sum loss: 5999.444336, avg loss: 3.500259, ppl: 33.124035 +epoch: 0, batch: 12018, sum loss: 5877.436035, avg loss: 3.326223, ppl: 27.833021 +epoch: 0, batch: 12019, sum loss: 5441.616211, avg loss: 3.352813, ppl: 28.583038 +epoch: 0, batch: 12020, sum loss: 5924.797363, avg loss: 3.675433, ppl: 39.465729 +epoch: 0, batch: 12021, sum loss: 6040.628906, avg loss: 3.663207, ppl: 38.986183 +epoch: 0, batch: 12022, sum loss: 6304.113770, avg loss: 3.602351, ppl: 36.684368 +epoch: 0, batch: 12023, sum loss: 6175.565918, avg loss: 3.691313, ppl: 40.097450 +epoch: 0, batch: 12024, sum loss: 5332.794434, avg loss: 3.506111, ppl: 33.318436 +epoch: 0, batch: 12025, sum loss: 6497.089844, avg loss: 3.556152, ppl: 35.028152 +epoch: 0, batch: 12026, sum loss: 7879.448242, avg loss: 3.746766, ppl: 42.383785 +epoch: 0, batch: 12027, sum loss: 7428.295898, avg loss: 3.805480, ppl: 44.946808 +epoch: 0, batch: 12028, sum loss: 6159.611328, avg loss: 3.799883, ppl: 44.695972 +epoch: 0, batch: 12029, sum loss: 5707.911621, avg loss: 3.621771, ppl: 37.403763 +epoch: 0, batch: 12030, sum loss: 5871.307617, avg loss: 3.541199, ppl: 34.508259 +epoch: 0, batch: 12031, sum loss: 5891.082031, avg loss: 3.742746, ppl: 42.213734 +epoch: 0, batch: 12032, sum loss: 5898.322266, avg loss: 3.618603, ppl: 37.285427 +epoch: 0, batch: 12033, sum loss: 5705.696289, avg loss: 3.707405, ppl: 40.747932 +epoch: 0, batch: 12034, sum loss: 5658.343750, avg loss: 3.585769, ppl: 36.081100 +epoch: 0, batch: 12035, sum loss: 5800.658203, avg loss: 3.416171, ppl: 30.452591 +epoch: 0, batch: 12036, sum loss: 6147.004395, avg loss: 3.709719, ppl: 40.842327 +epoch: 0, batch: 12037, sum loss: 5571.624023, avg loss: 3.513004, ppl: 33.548893 +epoch: 0, batch: 12038, sum loss: 5142.780273, avg loss: 3.328660, ppl: 27.900944 +epoch: 0, batch: 12039, sum loss: 6472.305664, avg loss: 3.498544, ppl: 33.067261 +epoch: 0, batch: 12040, sum loss: 6112.969727, avg loss: 3.551987, ppl: 34.882557 +epoch: 0, batch: 12041, sum loss: 5463.903320, avg loss: 3.419214, ppl: 30.545383 +epoch: 0, batch: 12042, sum loss: 6494.401855, avg loss: 3.762689, ppl: 43.064083 +epoch: 0, batch: 12043, sum loss: 7725.796875, avg loss: 3.857113, ppl: 47.328510 +epoch: 0, batch: 12044, sum loss: 5142.892578, avg loss: 3.421751, ppl: 30.623005 +epoch: 0, batch: 12045, sum loss: 5059.671875, avg loss: 3.237154, ppl: 25.461157 +epoch: 0, batch: 12046, sum loss: 7352.258301, avg loss: 4.019825, ppl: 55.691330 +epoch: 0, batch: 12047, sum loss: 6223.441406, avg loss: 3.643701, ppl: 38.233070 +epoch: 0, batch: 12048, sum loss: 5349.577148, avg loss: 3.308335, ppl: 27.339563 +epoch: 0, batch: 12049, sum loss: 5383.106934, avg loss: 3.248707, ppl: 25.757004 +epoch: 0, batch: 12050, sum loss: 6223.853516, avg loss: 3.538291, ppl: 34.408066 +epoch: 0, batch: 12051, sum loss: 5667.750488, avg loss: 3.531309, ppl: 34.168652 +epoch: 0, batch: 12052, sum loss: 6620.437988, avg loss: 3.815814, ppl: 45.413731 +epoch: 0, batch: 12053, sum loss: 6892.457031, avg loss: 3.627609, ppl: 37.622761 +epoch: 0, batch: 12054, sum loss: 4972.595215, avg loss: 3.396581, ppl: 29.861839 +epoch: 0, batch: 12055, sum loss: 6641.707031, avg loss: 3.848034, ppl: 46.900772 +epoch: 0, batch: 12056, sum loss: 6969.316895, avg loss: 3.775361, ppl: 43.613262 +epoch: 0, batch: 12057, sum loss: 5703.234375, avg loss: 3.429486, ppl: 30.860764 +epoch: 0, batch: 12058, sum loss: 6148.173340, avg loss: 3.582852, ppl: 35.975986 +epoch: 0, batch: 12059, sum loss: 5062.196289, avg loss: 3.406592, ppl: 30.162287 +epoch: 0, batch: 12060, sum loss: 5460.172852, avg loss: 3.482253, ppl: 32.532940 +epoch: 0, batch: 12061, sum loss: 6127.517090, avg loss: 3.407963, ppl: 30.203644 +epoch: 0, batch: 12062, sum loss: 6251.945312, avg loss: 3.823820, ppl: 45.778732 +epoch: 0, batch: 12063, sum loss: 5469.927246, avg loss: 3.453237, ppl: 31.602518 +epoch: 0, batch: 12064, sum loss: 5719.580566, avg loss: 3.588194, ppl: 36.168682 +epoch: 0, batch: 12065, sum loss: 5174.028809, avg loss: 3.340238, ppl: 28.225847 +epoch: 0, batch: 12066, sum loss: 6444.318848, avg loss: 3.546681, ppl: 34.697952 +epoch: 0, batch: 12067, sum loss: 4908.526367, avg loss: 3.531314, ppl: 34.168831 +epoch: 0, batch: 12068, sum loss: 6167.267578, avg loss: 3.542371, ppl: 34.548729 +epoch: 0, batch: 12069, sum loss: 4800.989258, avg loss: 3.407373, ppl: 30.185854 +epoch: 0, batch: 12070, sum loss: 5846.367676, avg loss: 3.439040, ppl: 31.157032 +epoch: 0, batch: 12071, sum loss: 6251.666016, avg loss: 3.675288, ppl: 39.460037 +epoch: 0, batch: 12072, sum loss: 6179.184082, avg loss: 3.495014, ppl: 32.950741 +epoch: 0, batch: 12073, sum loss: 5286.119141, avg loss: 3.576535, ppl: 35.749470 +epoch: 0, batch: 12074, sum loss: 5969.545898, avg loss: 3.696313, ppl: 40.298466 +epoch: 0, batch: 12075, sum loss: 6778.076660, avg loss: 3.940742, ppl: 51.456779 +epoch: 0, batch: 12076, sum loss: 6832.992188, avg loss: 3.623008, ppl: 37.450039 +epoch: 0, batch: 12077, sum loss: 7068.932617, avg loss: 3.617673, ppl: 37.250771 +epoch: 0, batch: 12078, sum loss: 5998.321289, avg loss: 3.602596, ppl: 36.693352 +epoch: 0, batch: 12079, sum loss: 5893.460938, avg loss: 3.522691, ppl: 33.875450 +epoch: 0, batch: 12080, sum loss: 5503.833008, avg loss: 3.386974, ppl: 29.576323 +epoch: 0, batch: 12081, sum loss: 6690.571289, avg loss: 3.646088, ppl: 38.324451 +epoch: 0, batch: 12082, sum loss: 5510.006836, avg loss: 3.529793, ppl: 34.116905 +epoch: 0, batch: 12083, sum loss: 5911.371582, avg loss: 3.606694, ppl: 36.844044 +epoch: 0, batch: 12084, sum loss: 6892.011719, avg loss: 3.648498, ppl: 38.416904 +epoch: 0, batch: 12085, sum loss: 6539.871094, avg loss: 3.647446, ppl: 38.376541 +epoch: 0, batch: 12086, sum loss: 5589.155762, avg loss: 3.546419, ppl: 34.688862 +epoch: 0, batch: 12087, sum loss: 5755.752930, avg loss: 3.403757, ppl: 30.076889 +epoch: 0, batch: 12088, sum loss: 5669.408691, avg loss: 3.486721, ppl: 32.678619 +epoch: 0, batch: 12089, sum loss: 4835.062500, avg loss: 3.042834, ppl: 20.964563 +epoch: 0, batch: 12090, sum loss: 6270.329102, avg loss: 3.673304, ppl: 39.381794 +epoch: 0, batch: 12091, sum loss: 7929.338867, avg loss: 3.972615, ppl: 53.123253 +epoch: 0, batch: 12092, sum loss: 6085.172363, avg loss: 3.719543, ppl: 41.245548 +epoch: 0, batch: 12093, sum loss: 7719.573242, avg loss: 3.871401, ppl: 48.009594 +epoch: 0, batch: 12094, sum loss: 5915.580566, avg loss: 3.473623, ppl: 32.253395 +epoch: 0, batch: 12095, sum loss: 7259.596680, avg loss: 3.755611, ppl: 42.760357 +epoch: 0, batch: 12096, sum loss: 6011.181641, avg loss: 3.724400, ppl: 41.446358 +epoch: 0, batch: 12097, sum loss: 6701.772949, avg loss: 3.638313, ppl: 38.027641 +epoch: 0, batch: 12098, sum loss: 6684.716309, avg loss: 3.531281, ppl: 34.167725 +epoch: 0, batch: 12099, sum loss: 6256.295898, avg loss: 3.597640, ppl: 36.511967 +epoch: 0, batch: 12100, sum loss: 6149.342773, avg loss: 3.507897, ppl: 33.377987 +epoch: 0, batch: 12101, sum loss: 5533.085938, avg loss: 3.495316, ppl: 32.960712 +epoch: 0, batch: 12102, sum loss: 5572.614746, avg loss: 3.476366, ppl: 32.341980 +epoch: 0, batch: 12103, sum loss: 6878.387695, avg loss: 3.754579, ppl: 42.716213 +epoch: 0, batch: 12104, sum loss: 6090.092773, avg loss: 3.704436, ppl: 40.627129 +epoch: 0, batch: 12105, sum loss: 5791.328125, avg loss: 3.601572, ppl: 36.655815 +epoch: 0, batch: 12106, sum loss: 5771.300293, avg loss: 3.721019, ppl: 41.306454 +epoch: 0, batch: 12107, sum loss: 4829.430664, avg loss: 3.434872, ppl: 31.027452 +epoch: 0, batch: 12108, sum loss: 5896.702148, avg loss: 3.412443, ppl: 30.339285 +epoch: 0, batch: 12109, sum loss: 7355.870117, avg loss: 3.821231, ppl: 45.660389 +epoch: 0, batch: 12110, sum loss: 6820.874023, avg loss: 3.772607, ppl: 43.493317 +epoch: 0, batch: 12111, sum loss: 6079.874512, avg loss: 3.490169, ppl: 32.791492 +epoch: 0, batch: 12112, sum loss: 6882.321777, avg loss: 3.616564, ppl: 37.209507 +epoch: 0, batch: 12113, sum loss: 6804.516113, avg loss: 3.848708, ppl: 46.932407 +epoch: 0, batch: 12114, sum loss: 5663.310059, avg loss: 3.387147, ppl: 29.581448 +epoch: 0, batch: 12115, sum loss: 7049.620605, avg loss: 3.646984, ppl: 38.358814 +epoch: 0, batch: 12116, sum loss: 6100.313477, avg loss: 3.666054, ppl: 39.097324 +epoch: 0, batch: 12117, sum loss: 6144.838867, avg loss: 3.574659, ppl: 35.682453 +epoch: 0, batch: 12118, sum loss: 6764.880859, avg loss: 3.686584, ppl: 39.908276 +epoch: 0, batch: 12119, sum loss: 5927.707031, avg loss: 3.704817, ppl: 40.642601 +epoch: 0, batch: 12120, sum loss: 5681.969727, avg loss: 3.542375, ppl: 34.548885 +epoch: 0, batch: 12121, sum loss: 6429.628906, avg loss: 3.646982, ppl: 38.358715 +epoch: 0, batch: 12122, sum loss: 6614.281250, avg loss: 3.703405, ppl: 40.585266 +epoch: 0, batch: 12123, sum loss: 5445.374512, avg loss: 3.513145, ppl: 33.553627 +epoch: 0, batch: 12124, sum loss: 6547.170410, avg loss: 3.542841, ppl: 34.564987 +epoch: 0, batch: 12125, sum loss: 5969.025391, avg loss: 3.650780, ppl: 38.504688 +epoch: 0, batch: 12126, sum loss: 7239.623047, avg loss: 3.804321, ppl: 44.894760 +epoch: 0, batch: 12127, sum loss: 4922.365723, avg loss: 3.411203, ppl: 30.301664 +epoch: 0, batch: 12128, sum loss: 6954.589844, avg loss: 3.838074, ppl: 46.435951 +epoch: 0, batch: 12129, sum loss: 5469.692383, avg loss: 3.446561, ppl: 31.392260 +epoch: 0, batch: 12130, sum loss: 6229.513672, avg loss: 3.555659, ppl: 35.010880 +epoch: 0, batch: 12131, sum loss: 5791.277344, avg loss: 3.490824, ppl: 32.812981 +epoch: 0, batch: 12132, sum loss: 5977.349121, avg loss: 3.499619, ppl: 33.102829 +epoch: 0, batch: 12133, sum loss: 6226.268066, avg loss: 3.710529, ppl: 40.875439 +epoch: 0, batch: 12134, sum loss: 6321.621094, avg loss: 3.718601, ppl: 41.206696 +epoch: 0, batch: 12135, sum loss: 6429.503906, avg loss: 3.636597, ppl: 37.962444 +epoch: 0, batch: 12136, sum loss: 5519.759766, avg loss: 3.460665, ppl: 31.838127 +epoch: 0, batch: 12137, sum loss: 6191.575195, avg loss: 3.757024, ppl: 42.820793 +epoch: 0, batch: 12138, sum loss: 5809.326660, avg loss: 3.470327, ppl: 32.147251 +epoch: 0, batch: 12139, sum loss: 5614.136719, avg loss: 3.419085, ppl: 30.541443 +epoch: 0, batch: 12140, sum loss: 6076.174805, avg loss: 3.578431, ppl: 35.817287 +epoch: 0, batch: 12141, sum loss: 6105.943848, avg loss: 3.705063, ppl: 40.652611 +epoch: 0, batch: 12142, sum loss: 6495.090820, avg loss: 3.677854, ppl: 39.561417 +epoch: 0, batch: 12143, sum loss: 6407.221191, avg loss: 3.573464, ppl: 35.639843 +epoch: 0, batch: 12144, sum loss: 6189.625977, avg loss: 3.647393, ppl: 38.374500 +epoch: 0, batch: 12145, sum loss: 6499.411621, avg loss: 3.628929, ppl: 37.672436 +epoch: 0, batch: 12146, sum loss: 6334.880371, avg loss: 3.698120, ppl: 40.371349 +epoch: 0, batch: 12147, sum loss: 5577.304199, avg loss: 3.409110, ppl: 30.238329 +epoch: 0, batch: 12148, sum loss: 5384.542969, avg loss: 3.442802, ppl: 31.274481 +epoch: 0, batch: 12149, sum loss: 5793.517578, avg loss: 3.521895, ppl: 33.848507 +epoch: 0, batch: 12150, sum loss: 5873.110352, avg loss: 3.672989, ppl: 39.369419 +epoch: 0, batch: 12151, sum loss: 6718.958496, avg loss: 3.627947, ppl: 37.635483 +epoch: 0, batch: 12152, sum loss: 5368.952637, avg loss: 3.493138, ppl: 32.888985 +epoch: 0, batch: 12153, sum loss: 6857.492188, avg loss: 3.511261, ppl: 33.490463 +epoch: 0, batch: 12154, sum loss: 4928.275391, avg loss: 3.238026, ppl: 25.483372 +epoch: 0, batch: 12155, sum loss: 6042.937500, avg loss: 3.560953, ppl: 35.196728 +epoch: 0, batch: 12156, sum loss: 7167.766602, avg loss: 3.698538, ppl: 40.388218 +epoch: 0, batch: 12157, sum loss: 6776.750977, avg loss: 3.892447, ppl: 49.030743 +epoch: 0, batch: 12158, sum loss: 6757.896484, avg loss: 3.743987, ppl: 42.266163 +epoch: 0, batch: 12159, sum loss: 5320.687500, avg loss: 3.677047, ppl: 39.529499 +epoch: 0, batch: 12160, sum loss: 6508.101562, avg loss: 3.519795, ppl: 33.777508 +epoch: 0, batch: 12161, sum loss: 5199.515625, avg loss: 3.494298, ppl: 32.927174 +epoch: 0, batch: 12162, sum loss: 6571.462891, avg loss: 3.638684, ppl: 38.041733 +epoch: 0, batch: 12163, sum loss: 5565.455566, avg loss: 3.551663, ppl: 34.871258 +epoch: 0, batch: 12164, sum loss: 6075.973145, avg loss: 3.364326, ppl: 28.914009 +epoch: 0, batch: 12165, sum loss: 6167.252930, avg loss: 3.370084, ppl: 29.080965 +epoch: 0, batch: 12166, sum loss: 6529.162109, avg loss: 3.639444, ppl: 38.070660 +epoch: 0, batch: 12167, sum loss: 6821.602051, avg loss: 3.713447, ppl: 40.994877 +epoch: 0, batch: 12168, sum loss: 6105.530762, avg loss: 3.570486, ppl: 35.533852 +epoch: 0, batch: 12169, sum loss: 7210.056152, avg loss: 3.510251, ppl: 33.456673 +epoch: 0, batch: 12170, sum loss: 6689.217285, avg loss: 3.535527, ppl: 34.313091 +epoch: 0, batch: 12171, sum loss: 5025.584961, avg loss: 3.432776, ppl: 30.962490 +epoch: 0, batch: 12172, sum loss: 5537.731445, avg loss: 3.593596, ppl: 36.364609 +epoch: 0, batch: 12173, sum loss: 6359.272461, avg loss: 3.600947, ppl: 36.632904 +epoch: 0, batch: 12174, sum loss: 5346.213867, avg loss: 3.542885, ppl: 34.566509 +epoch: 0, batch: 12175, sum loss: 6523.365234, avg loss: 3.524238, ppl: 33.927921 +epoch: 0, batch: 12176, sum loss: 6398.614258, avg loss: 3.741880, ppl: 42.177197 +epoch: 0, batch: 12177, sum loss: 6050.954590, avg loss: 3.606052, ppl: 36.820389 +epoch: 0, batch: 12178, sum loss: 6362.955078, avg loss: 3.572687, ppl: 35.612152 +epoch: 0, batch: 12179, sum loss: 5058.393555, avg loss: 3.434076, ppl: 31.002748 +epoch: 0, batch: 12180, sum loss: 7029.933594, avg loss: 3.509702, ppl: 33.438309 +epoch: 0, batch: 12181, sum loss: 6885.792969, avg loss: 3.829696, ppl: 46.048523 +epoch: 0, batch: 12182, sum loss: 5744.988770, avg loss: 3.340110, ppl: 28.222225 +epoch: 0, batch: 12183, sum loss: 5504.678711, avg loss: 3.226658, ppl: 25.195316 +epoch: 0, batch: 12184, sum loss: 5550.197754, avg loss: 3.415506, ppl: 30.432356 +epoch: 0, batch: 12185, sum loss: 7061.920898, avg loss: 3.710941, ppl: 40.892273 +epoch: 0, batch: 12186, sum loss: 5550.913574, avg loss: 3.569720, ppl: 35.506634 +epoch: 0, batch: 12187, sum loss: 5498.674805, avg loss: 3.598609, ppl: 36.547371 +epoch: 0, batch: 12188, sum loss: 5766.176270, avg loss: 3.661064, ppl: 38.902718 +epoch: 0, batch: 12189, sum loss: 5336.867188, avg loss: 3.399278, ppl: 29.942486 +epoch: 0, batch: 12190, sum loss: 5554.211426, avg loss: 3.477903, ppl: 32.391739 +epoch: 0, batch: 12191, sum loss: 5598.369629, avg loss: 3.370481, ppl: 29.092531 +epoch: 0, batch: 12192, sum loss: 6408.750977, avg loss: 3.576312, ppl: 35.741486 +epoch: 0, batch: 12193, sum loss: 4671.708008, avg loss: 3.180196, ppl: 24.051462 +epoch: 0, batch: 12194, sum loss: 5094.975586, avg loss: 3.371923, ppl: 29.134499 +epoch: 0, batch: 12195, sum loss: 7045.692383, avg loss: 3.594741, ppl: 36.406265 +epoch: 0, batch: 12196, sum loss: 5538.881836, avg loss: 3.350806, ppl: 28.525702 +epoch: 0, batch: 12197, sum loss: 5395.062500, avg loss: 3.494212, ppl: 32.924324 +epoch: 0, batch: 12198, sum loss: 5658.269043, avg loss: 3.587996, ppl: 36.161526 +epoch: 0, batch: 12199, sum loss: 6410.577148, avg loss: 3.818092, ppl: 45.517296 +epoch: 0, batch: 12200, sum loss: 5849.469727, avg loss: 3.521656, ppl: 33.840408 +epoch: 0, batch: 12201, sum loss: 6285.811523, avg loss: 3.631318, ppl: 37.762550 +epoch: 0, batch: 12202, sum loss: 7337.596680, avg loss: 3.827646, ppl: 45.954224 +epoch: 0, batch: 12203, sum loss: 5605.022949, avg loss: 3.538525, ppl: 34.416103 +epoch: 0, batch: 12204, sum loss: 6020.015625, avg loss: 3.622151, ppl: 37.417980 +epoch: 0, batch: 12205, sum loss: 6963.489258, avg loss: 3.995117, ppl: 54.332207 +epoch: 0, batch: 12206, sum loss: 6942.449219, avg loss: 3.893690, ppl: 49.091705 +epoch: 0, batch: 12207, sum loss: 6526.931641, avg loss: 3.794728, ppl: 44.466129 +epoch: 0, batch: 12208, sum loss: 7130.824219, avg loss: 3.731462, ppl: 41.740097 +epoch: 0, batch: 12209, sum loss: 6604.793945, avg loss: 3.718916, ppl: 41.219677 +epoch: 0, batch: 12210, sum loss: 7691.975586, avg loss: 3.533291, ppl: 34.236465 +epoch: 0, batch: 12211, sum loss: 5876.091797, avg loss: 3.464677, ppl: 31.966129 +epoch: 0, batch: 12212, sum loss: 6725.056641, avg loss: 3.939693, ppl: 51.402840 +epoch: 0, batch: 12213, sum loss: 5619.122070, avg loss: 3.185443, ppl: 24.178007 +epoch: 0, batch: 12214, sum loss: 6634.990723, avg loss: 3.546227, ppl: 34.682220 +epoch: 0, batch: 12215, sum loss: 5156.105957, avg loss: 3.341611, ppl: 28.264635 +epoch: 0, batch: 12216, sum loss: 6848.890625, avg loss: 3.662508, ppl: 38.958950 +epoch: 0, batch: 12217, sum loss: 6498.062988, avg loss: 3.738816, ppl: 42.048195 +epoch: 0, batch: 12218, sum loss: 5572.308594, avg loss: 3.623087, ppl: 37.453022 +epoch: 0, batch: 12219, sum loss: 7419.582520, avg loss: 3.854328, ppl: 47.196907 +epoch: 0, batch: 12220, sum loss: 6369.153320, avg loss: 3.622954, ppl: 37.448021 +epoch: 0, batch: 12221, sum loss: 5984.877930, avg loss: 3.396639, ppl: 29.863564 +epoch: 0, batch: 12222, sum loss: 5534.946777, avg loss: 3.783286, ppl: 43.960251 +epoch: 0, batch: 12223, sum loss: 5949.764648, avg loss: 3.588519, ppl: 36.180458 +epoch: 0, batch: 12224, sum loss: 5681.041016, avg loss: 3.566253, ppl: 35.383759 +epoch: 0, batch: 12225, sum loss: 5915.036621, avg loss: 3.552575, ppl: 34.903065 +epoch: 0, batch: 12226, sum loss: 5760.182617, avg loss: 3.525204, ppl: 33.960690 +epoch: 0, batch: 12227, sum loss: 6274.833496, avg loss: 3.631269, ppl: 37.760723 +epoch: 0, batch: 12228, sum loss: 5830.647949, avg loss: 3.531586, ppl: 34.178127 +epoch: 0, batch: 12229, sum loss: 5508.597168, avg loss: 3.306481, ppl: 27.288923 +epoch: 0, batch: 12230, sum loss: 5837.039062, avg loss: 3.687327, ppl: 39.937958 +epoch: 0, batch: 12231, sum loss: 6417.441406, avg loss: 3.468887, ppl: 32.101006 +epoch: 0, batch: 12232, sum loss: 5983.166016, avg loss: 3.608665, ppl: 36.916725 +epoch: 0, batch: 12233, sum loss: 6021.093750, avg loss: 3.566999, ppl: 35.410156 +epoch: 0, batch: 12234, sum loss: 7021.559570, avg loss: 3.750833, ppl: 42.556530 +epoch: 0, batch: 12235, sum loss: 5131.853027, avg loss: 3.566263, ppl: 35.384121 +epoch: 0, batch: 12236, sum loss: 5968.607422, avg loss: 3.458057, ppl: 31.755228 +epoch: 0, batch: 12237, sum loss: 6130.300781, avg loss: 3.872584, ppl: 48.066433 +epoch: 0, batch: 12238, sum loss: 4933.814941, avg loss: 3.583017, ppl: 35.981949 +epoch: 0, batch: 12239, sum loss: 5185.583984, avg loss: 3.422828, ppl: 30.655979 +epoch: 0, batch: 12240, sum loss: 6097.729980, avg loss: 3.671120, ppl: 39.295879 +epoch: 0, batch: 12241, sum loss: 5472.895996, avg loss: 3.492595, ppl: 32.871136 +epoch: 0, batch: 12242, sum loss: 6214.566895, avg loss: 3.653478, ppl: 38.608730 +epoch: 0, batch: 12243, sum loss: 6161.350586, avg loss: 3.685019, ppl: 39.845860 +epoch: 0, batch: 12244, sum loss: 6129.672363, avg loss: 3.432067, ppl: 30.940542 +epoch: 0, batch: 12245, sum loss: 6223.719727, avg loss: 3.654562, ppl: 38.650608 +epoch: 0, batch: 12246, sum loss: 5612.604004, avg loss: 3.714496, ppl: 41.037914 +epoch: 0, batch: 12247, sum loss: 6167.657227, avg loss: 3.811902, ppl: 45.236389 +epoch: 0, batch: 12248, sum loss: 4874.029785, avg loss: 3.331531, ppl: 27.981150 +epoch: 0, batch: 12249, sum loss: 6672.383789, avg loss: 3.763330, ppl: 43.091682 +epoch: 0, batch: 12250, sum loss: 6908.148926, avg loss: 3.457532, ppl: 31.738546 +epoch: 0, batch: 12251, sum loss: 6705.145508, avg loss: 3.733377, ppl: 41.820114 +epoch: 0, batch: 12252, sum loss: 6318.844238, avg loss: 3.434154, ppl: 31.005180 +epoch: 0, batch: 12253, sum loss: 4822.629883, avg loss: 3.384301, ppl: 29.497379 +epoch: 0, batch: 12254, sum loss: 5277.419922, avg loss: 3.367850, ppl: 29.016064 +epoch: 0, batch: 12255, sum loss: 6041.624023, avg loss: 3.720212, ppl: 41.273132 +epoch: 0, batch: 12256, sum loss: 5020.209961, avg loss: 3.095074, ppl: 22.088871 +epoch: 0, batch: 12257, sum loss: 6558.302246, avg loss: 3.310602, ppl: 27.401608 +epoch: 0, batch: 12258, sum loss: 6272.985840, avg loss: 3.672708, ppl: 39.358356 +epoch: 0, batch: 12259, sum loss: 4924.813477, avg loss: 3.482895, ppl: 32.553833 +epoch: 0, batch: 12260, sum loss: 6187.706055, avg loss: 3.610097, ppl: 36.969635 +epoch: 0, batch: 12261, sum loss: 6172.197266, avg loss: 3.747539, ppl: 42.416576 +epoch: 0, batch: 12262, sum loss: 6144.033203, avg loss: 3.668080, ppl: 39.176609 +epoch: 0, batch: 12263, sum loss: 5652.970703, avg loss: 3.670760, ppl: 39.281754 +epoch: 0, batch: 12264, sum loss: 6587.519043, avg loss: 3.643539, ppl: 38.226891 +epoch: 0, batch: 12265, sum loss: 6508.687500, avg loss: 3.510619, ppl: 33.468967 +epoch: 0, batch: 12266, sum loss: 6588.873047, avg loss: 3.824070, ppl: 45.790207 +epoch: 0, batch: 12267, sum loss: 6822.274414, avg loss: 3.607760, ppl: 36.883350 +epoch: 0, batch: 12268, sum loss: 5772.557617, avg loss: 3.707487, ppl: 40.751286 +epoch: 0, batch: 12269, sum loss: 6263.306641, avg loss: 3.717096, ppl: 41.144730 +epoch: 0, batch: 12270, sum loss: 5215.775391, avg loss: 3.424672, ppl: 30.712574 +epoch: 0, batch: 12271, sum loss: 6796.340332, avg loss: 3.740418, ppl: 42.115608 +epoch: 0, batch: 12272, sum loss: 6138.207520, avg loss: 3.552203, ppl: 34.890110 +epoch: 0, batch: 12273, sum loss: 6026.454102, avg loss: 3.670192, ppl: 39.259426 +epoch: 0, batch: 12274, sum loss: 5987.399902, avg loss: 3.576703, ppl: 35.755447 +epoch: 0, batch: 12275, sum loss: 6673.037109, avg loss: 3.701074, ppl: 40.490784 +epoch: 0, batch: 12276, sum loss: 6593.109375, avg loss: 3.983752, ppl: 53.718208 +epoch: 0, batch: 12277, sum loss: 5104.020020, avg loss: 3.444008, ppl: 31.312218 +epoch: 0, batch: 12278, sum loss: 5707.817871, avg loss: 3.426061, ppl: 30.755264 +epoch: 0, batch: 12279, sum loss: 5200.426270, avg loss: 3.416837, ppl: 30.472891 +epoch: 0, batch: 12280, sum loss: 5652.508301, avg loss: 3.429920, ppl: 30.874178 +epoch: 0, batch: 12281, sum loss: 6692.123535, avg loss: 3.629134, ppl: 37.680180 +epoch: 0, batch: 12282, sum loss: 7275.646484, avg loss: 3.781521, ppl: 43.882740 +epoch: 0, batch: 12283, sum loss: 5131.825195, avg loss: 3.289632, ppl: 26.832977 +epoch: 0, batch: 12284, sum loss: 6481.294922, avg loss: 3.565069, ppl: 35.341881 +epoch: 0, batch: 12285, sum loss: 5699.783203, avg loss: 3.433604, ppl: 30.988138 +epoch: 0, batch: 12286, sum loss: 6576.370117, avg loss: 3.725989, ppl: 41.512264 +epoch: 0, batch: 12287, sum loss: 5923.818848, avg loss: 3.507294, ppl: 33.357868 +epoch: 0, batch: 12288, sum loss: 4310.614746, avg loss: 3.185968, ppl: 24.190691 +epoch: 0, batch: 12289, sum loss: 5493.934082, avg loss: 3.508259, ppl: 33.390095 +epoch: 0, batch: 12290, sum loss: 6386.426758, avg loss: 3.482239, ppl: 32.532482 +epoch: 0, batch: 12291, sum loss: 5800.467285, avg loss: 3.786206, ppl: 44.088799 +epoch: 0, batch: 12292, sum loss: 6325.411133, avg loss: 3.709919, ppl: 40.850487 +epoch: 0, batch: 12293, sum loss: 7053.210449, avg loss: 3.812546, ppl: 45.265549 +epoch: 0, batch: 12294, sum loss: 6277.415039, avg loss: 3.653909, ppl: 38.625347 +epoch: 0, batch: 12295, sum loss: 5592.188477, avg loss: 3.466949, ppl: 32.038841 +epoch: 0, batch: 12296, sum loss: 6051.891113, avg loss: 3.456249, ppl: 31.697847 +epoch: 0, batch: 12297, sum loss: 5692.966309, avg loss: 3.684768, ppl: 39.835876 +epoch: 0, batch: 12298, sum loss: 6146.224609, avg loss: 3.846198, ppl: 46.814739 +epoch: 0, batch: 12299, sum loss: 6393.308105, avg loss: 3.798757, ppl: 44.645649 +epoch: 0, batch: 12300, sum loss: 5776.398926, avg loss: 3.374065, ppl: 29.196962 +epoch: 0, batch: 12301, sum loss: 6544.458008, avg loss: 3.731162, ppl: 41.727562 +epoch: 0, batch: 12302, sum loss: 6077.547363, avg loss: 3.523216, ppl: 33.893253 +epoch: 0, batch: 12303, sum loss: 5918.723633, avg loss: 3.407440, ppl: 30.187878 +epoch: 0, batch: 12304, sum loss: 6024.884766, avg loss: 3.627264, ppl: 37.609756 +epoch: 0, batch: 12305, sum loss: 5436.768066, avg loss: 3.626930, ppl: 37.597218 +epoch: 0, batch: 12306, sum loss: 6458.922852, avg loss: 3.902673, ppl: 49.534657 +epoch: 0, batch: 12307, sum loss: 5690.173828, avg loss: 3.351104, ppl: 28.534212 +epoch: 0, batch: 12308, sum loss: 5994.800781, avg loss: 3.471222, ppl: 32.176052 +epoch: 0, batch: 12309, sum loss: 6233.571289, avg loss: 3.710459, ppl: 40.872574 +epoch: 0, batch: 12310, sum loss: 6029.029785, avg loss: 3.601571, ppl: 36.655777 +epoch: 0, batch: 12311, sum loss: 6342.380859, avg loss: 3.668236, ppl: 39.182747 +epoch: 0, batch: 12312, sum loss: 6567.821289, avg loss: 3.392470, ppl: 29.739307 +epoch: 0, batch: 12313, sum loss: 6403.869629, avg loss: 3.655177, ppl: 38.674362 +epoch: 0, batch: 12314, sum loss: 6501.973633, avg loss: 3.614215, ppl: 37.122208 +epoch: 0, batch: 12315, sum loss: 6363.442383, avg loss: 3.401092, ppl: 29.996834 +epoch: 0, batch: 12316, sum loss: 5896.354980, avg loss: 3.543483, ppl: 34.587170 +epoch: 0, batch: 12317, sum loss: 6821.605469, avg loss: 3.628513, ppl: 37.656792 +epoch: 0, batch: 12318, sum loss: 5939.745605, avg loss: 3.626218, ppl: 37.570457 +epoch: 0, batch: 12319, sum loss: 5634.810059, avg loss: 3.366076, ppl: 28.964632 +epoch: 0, batch: 12320, sum loss: 5646.501953, avg loss: 3.712362, ppl: 40.950409 +epoch: 0, batch: 12321, sum loss: 5610.177734, avg loss: 3.548499, ppl: 34.761112 +epoch: 0, batch: 12322, sum loss: 5477.825195, avg loss: 3.592016, ppl: 36.307213 +epoch: 0, batch: 12323, sum loss: 6694.689453, avg loss: 3.481378, ppl: 32.504471 +epoch: 0, batch: 12324, sum loss: 5279.023438, avg loss: 3.303519, ppl: 27.208216 +epoch: 0, batch: 12325, sum loss: 5730.098633, avg loss: 3.517556, ppl: 33.701958 +epoch: 0, batch: 12326, sum loss: 6539.703125, avg loss: 3.705214, ppl: 40.658756 +epoch: 0, batch: 12327, sum loss: 6474.645508, avg loss: 3.662130, ppl: 38.944202 +epoch: 0, batch: 12328, sum loss: 6443.880859, avg loss: 3.667547, ppl: 39.155746 +epoch: 0, batch: 12329, sum loss: 6783.106445, avg loss: 3.633158, ppl: 37.832111 +epoch: 0, batch: 12330, sum loss: 6586.505371, avg loss: 3.647013, ppl: 38.359913 +epoch: 0, batch: 12331, sum loss: 5475.740234, avg loss: 3.555676, ppl: 35.011463 +epoch: 0, batch: 12332, sum loss: 6304.056641, avg loss: 3.535646, ppl: 34.317177 +epoch: 0, batch: 12333, sum loss: 5067.247070, avg loss: 3.435422, ppl: 31.044502 +epoch: 0, batch: 12334, sum loss: 6606.141602, avg loss: 3.678253, ppl: 39.577179 +epoch: 0, batch: 12335, sum loss: 6026.694824, avg loss: 3.613127, ppl: 37.081810 +epoch: 0, batch: 12336, sum loss: 5418.769531, avg loss: 3.523257, ppl: 33.894653 +epoch: 0, batch: 12337, sum loss: 5714.437988, avg loss: 3.771906, ppl: 43.462841 +epoch: 0, batch: 12338, sum loss: 5235.507812, avg loss: 3.330476, ppl: 27.951637 +epoch: 0, batch: 12339, sum loss: 6498.244141, avg loss: 3.535497, ppl: 34.312069 +epoch: 0, batch: 12340, sum loss: 6159.308594, avg loss: 3.448661, ppl: 31.458244 +epoch: 0, batch: 12341, sum loss: 5719.357910, avg loss: 3.384235, ppl: 29.495432 +epoch: 0, batch: 12342, sum loss: 4981.819336, avg loss: 3.433370, ppl: 30.980862 +epoch: 0, batch: 12343, sum loss: 6314.434082, avg loss: 3.656302, ppl: 38.717918 +epoch: 0, batch: 12344, sum loss: 5561.939941, avg loss: 3.515765, ppl: 33.641644 +epoch: 0, batch: 12345, sum loss: 5554.707520, avg loss: 3.473863, ppl: 32.261139 +epoch: 0, batch: 12346, sum loss: 6316.446289, avg loss: 3.524803, ppl: 33.947075 +epoch: 0, batch: 12347, sum loss: 5855.722656, avg loss: 3.506421, ppl: 33.328766 +epoch: 0, batch: 12348, sum loss: 6864.542969, avg loss: 3.663043, ppl: 38.979778 +epoch: 0, batch: 12349, sum loss: 5804.122559, avg loss: 3.654989, ppl: 38.667107 +epoch: 0, batch: 12350, sum loss: 5404.657227, avg loss: 3.516368, ppl: 33.661942 +epoch: 0, batch: 12351, sum loss: 7068.064941, avg loss: 4.002302, ppl: 54.723965 +epoch: 0, batch: 12352, sum loss: 6863.763672, avg loss: 3.538023, ppl: 34.398838 +epoch: 0, batch: 12353, sum loss: 5961.107422, avg loss: 3.695665, ppl: 40.272339 +epoch: 0, batch: 12354, sum loss: 5183.869141, avg loss: 3.495529, ppl: 32.967720 +epoch: 0, batch: 12355, sum loss: 7171.836914, avg loss: 3.749000, ppl: 42.478577 +epoch: 0, batch: 12356, sum loss: 6716.280273, avg loss: 3.644211, ppl: 38.252563 +epoch: 0, batch: 12357, sum loss: 5769.843750, avg loss: 3.621999, ppl: 37.412262 +epoch: 0, batch: 12358, sum loss: 6775.071289, avg loss: 3.607599, ppl: 36.877403 +epoch: 0, batch: 12359, sum loss: 6809.332031, avg loss: 3.490175, ppl: 32.791702 +epoch: 0, batch: 12360, sum loss: 5966.673340, avg loss: 3.520161, ppl: 33.789875 +epoch: 0, batch: 12361, sum loss: 6730.000488, avg loss: 3.651655, ppl: 38.538403 +epoch: 0, batch: 12362, sum loss: 6845.505859, avg loss: 3.629643, ppl: 37.699345 +epoch: 0, batch: 12363, sum loss: 6982.624023, avg loss: 3.569849, ppl: 35.511215 +epoch: 0, batch: 12364, sum loss: 5406.402344, avg loss: 3.410979, ppl: 30.294897 +epoch: 0, batch: 12365, sum loss: 5991.128906, avg loss: 3.483214, ppl: 32.564228 +epoch: 0, batch: 12366, sum loss: 6971.136719, avg loss: 3.522555, ppl: 33.870861 +epoch: 0, batch: 12367, sum loss: 5688.826172, avg loss: 3.635033, ppl: 37.903091 +epoch: 0, batch: 12368, sum loss: 7110.128418, avg loss: 3.603714, ppl: 36.734428 +epoch: 0, batch: 12369, sum loss: 6284.937500, avg loss: 3.725511, ppl: 41.492443 +epoch: 0, batch: 12370, sum loss: 6249.093262, avg loss: 3.665158, ppl: 39.062290 +epoch: 0, batch: 12371, sum loss: 5952.941895, avg loss: 3.439019, ppl: 31.156378 +epoch: 0, batch: 12372, sum loss: 5697.202148, avg loss: 3.501661, ppl: 33.170498 +epoch: 0, batch: 12373, sum loss: 5854.045898, avg loss: 3.488704, ppl: 32.743500 +epoch: 0, batch: 12374, sum loss: 5571.494629, avg loss: 3.562337, ppl: 35.245457 +epoch: 0, batch: 12375, sum loss: 5964.578125, avg loss: 3.599625, ppl: 36.584499 +epoch: 0, batch: 12376, sum loss: 4859.745117, avg loss: 3.396048, ppl: 29.845924 +epoch: 0, batch: 12377, sum loss: 6029.457031, avg loss: 3.473190, ppl: 32.239410 +epoch: 0, batch: 12378, sum loss: 6005.277832, avg loss: 3.723049, ppl: 41.390400 +epoch: 0, batch: 12379, sum loss: 5765.862793, avg loss: 3.520062, ppl: 33.786537 +epoch: 0, batch: 12380, sum loss: 6362.217773, avg loss: 3.332749, ppl: 28.015253 +epoch: 0, batch: 12381, sum loss: 5617.438477, avg loss: 3.633531, ppl: 37.846210 +epoch: 0, batch: 12382, sum loss: 6719.508789, avg loss: 3.604887, ppl: 36.777519 +epoch: 0, batch: 12383, sum loss: 4634.432617, avg loss: 3.390221, ppl: 29.672520 +epoch: 0, batch: 12384, sum loss: 5455.918945, avg loss: 3.637279, ppl: 37.988338 +epoch: 0, batch: 12385, sum loss: 5287.649414, avg loss: 3.400418, ppl: 29.976622 +epoch: 0, batch: 12386, sum loss: 5413.061523, avg loss: 3.387398, ppl: 29.588856 +epoch: 0, batch: 12387, sum loss: 6579.268066, avg loss: 3.455498, ppl: 31.674065 +epoch: 0, batch: 12388, sum loss: 8224.774414, avg loss: 3.986803, ppl: 53.882355 +epoch: 0, batch: 12389, sum loss: 5419.270996, avg loss: 3.482822, ppl: 32.551453 +epoch: 0, batch: 12390, sum loss: 6170.491211, avg loss: 3.764790, ppl: 43.154644 +epoch: 0, batch: 12391, sum loss: 6402.645508, avg loss: 3.815641, ppl: 45.405849 +epoch: 0, batch: 12392, sum loss: 7626.375977, avg loss: 3.622982, ppl: 37.449059 +epoch: 0, batch: 12393, sum loss: 5990.353516, avg loss: 3.597810, ppl: 36.518173 +epoch: 0, batch: 12394, sum loss: 6332.850586, avg loss: 3.508504, ppl: 33.398281 +epoch: 0, batch: 12395, sum loss: 6395.151367, avg loss: 3.550889, ppl: 34.844280 +epoch: 0, batch: 12396, sum loss: 5960.146484, avg loss: 3.431288, ppl: 30.916430 +epoch: 0, batch: 12397, sum loss: 6865.413086, avg loss: 3.770133, ppl: 43.385857 +epoch: 0, batch: 12398, sum loss: 5782.348633, avg loss: 3.409404, ppl: 30.247206 +epoch: 0, batch: 12399, sum loss: 7683.344238, avg loss: 3.985137, ppl: 53.792671 +epoch: 0, batch: 12400, sum loss: 6905.723633, avg loss: 3.702801, ppl: 40.560745 +epoch: 0, batch: 12401, sum loss: 6317.738281, avg loss: 3.711950, ppl: 40.933533 +epoch: 0, batch: 12402, sum loss: 7298.131836, avg loss: 3.904832, ppl: 49.641758 +epoch: 0, batch: 12403, sum loss: 6340.792969, avg loss: 3.472505, ppl: 32.217335 +epoch: 0, batch: 12404, sum loss: 4901.648438, avg loss: 3.237549, ppl: 25.471205 +epoch: 0, batch: 12405, sum loss: 6676.714844, avg loss: 3.699011, ppl: 40.407314 +epoch: 0, batch: 12406, sum loss: 5815.327148, avg loss: 3.699317, ppl: 40.419708 +epoch: 0, batch: 12407, sum loss: 5034.329102, avg loss: 3.693565, ppl: 40.187866 +epoch: 0, batch: 12408, sum loss: 6967.565430, avg loss: 3.569450, ppl: 35.497051 +epoch: 0, batch: 12409, sum loss: 6125.620605, avg loss: 3.502356, ppl: 33.193565 +epoch: 0, batch: 12410, sum loss: 5479.131836, avg loss: 3.498807, ppl: 33.075966 +epoch: 0, batch: 12411, sum loss: 5312.819336, avg loss: 3.364673, ppl: 28.924049 +epoch: 0, batch: 12412, sum loss: 6028.325195, avg loss: 3.466547, ppl: 32.025967 +epoch: 0, batch: 12413, sum loss: 5782.383789, avg loss: 3.395410, ppl: 29.826889 +epoch: 0, batch: 12414, sum loss: 6836.675781, avg loss: 3.725709, ppl: 41.500645 +epoch: 0, batch: 12415, sum loss: 4623.087891, avg loss: 3.271824, ppl: 26.359385 +epoch: 0, batch: 12416, sum loss: 6417.437012, avg loss: 3.595203, ppl: 36.423092 +epoch: 0, batch: 12417, sum loss: 5627.368652, avg loss: 3.541453, ppl: 34.517033 +epoch: 0, batch: 12418, sum loss: 5640.524902, avg loss: 3.728040, ppl: 41.597515 +epoch: 0, batch: 12419, sum loss: 6147.729492, avg loss: 3.787880, ppl: 44.162685 +epoch: 0, batch: 12420, sum loss: 6865.015137, avg loss: 3.850261, ppl: 47.005329 +epoch: 0, batch: 12421, sum loss: 4968.806152, avg loss: 3.445774, ppl: 31.367556 +epoch: 0, batch: 12422, sum loss: 5689.096191, avg loss: 3.382340, ppl: 29.439585 +epoch: 0, batch: 12423, sum loss: 5134.488770, avg loss: 3.471595, ppl: 32.188034 +epoch: 0, batch: 12424, sum loss: 4878.213867, avg loss: 3.289423, ppl: 26.827374 +epoch: 0, batch: 12425, sum loss: 5665.283203, avg loss: 3.540802, ppl: 34.494572 +epoch: 0, batch: 12426, sum loss: 6491.773438, avg loss: 3.451235, ppl: 31.539322 +epoch: 0, batch: 12427, sum loss: 6000.789062, avg loss: 3.505134, ppl: 33.285892 +epoch: 0, batch: 12428, sum loss: 6015.564941, avg loss: 3.495389, ppl: 32.963116 +epoch: 0, batch: 12429, sum loss: 4800.006348, avg loss: 3.342623, ppl: 28.293236 +epoch: 0, batch: 12430, sum loss: 6278.449219, avg loss: 3.395592, ppl: 29.832300 +epoch: 0, batch: 12431, sum loss: 7076.874512, avg loss: 3.829478, ppl: 46.038479 +epoch: 0, batch: 12432, sum loss: 6173.250977, avg loss: 3.439137, ppl: 31.160063 +epoch: 0, batch: 12433, sum loss: 6603.947266, avg loss: 3.602808, ppl: 36.701157 +epoch: 0, batch: 12434, sum loss: 5162.138672, avg loss: 3.439133, ppl: 31.159937 +epoch: 0, batch: 12435, sum loss: 6553.104492, avg loss: 3.573121, ppl: 35.627617 +epoch: 0, batch: 12436, sum loss: 6935.493652, avg loss: 3.693021, ppl: 40.166008 +epoch: 0, batch: 12437, sum loss: 5615.126953, avg loss: 3.411377, ppl: 30.306961 +epoch: 0, batch: 12438, sum loss: 6721.802734, avg loss: 3.586874, ppl: 36.121002 +epoch: 0, batch: 12439, sum loss: 6777.328125, avg loss: 3.859526, ppl: 47.442879 +epoch: 0, batch: 12440, sum loss: 5920.112305, avg loss: 3.716329, ppl: 41.113194 +epoch: 0, batch: 12441, sum loss: 6473.282227, avg loss: 3.518088, ppl: 33.719898 +epoch: 0, batch: 12442, sum loss: 4644.962891, avg loss: 3.458647, ppl: 31.773958 +epoch: 0, batch: 12443, sum loss: 6854.035645, avg loss: 3.753579, ppl: 42.673553 +epoch: 0, batch: 12444, sum loss: 6564.895508, avg loss: 3.419217, ppl: 30.545477 +epoch: 0, batch: 12445, sum loss: 4733.445312, avg loss: 3.585944, ppl: 36.087399 +epoch: 0, batch: 12446, sum loss: 6796.691895, avg loss: 3.712011, ppl: 40.936050 +epoch: 0, batch: 12447, sum loss: 5616.019531, avg loss: 3.714299, ppl: 41.029812 +epoch: 0, batch: 12448, sum loss: 6256.251953, avg loss: 3.508835, ppl: 33.409309 +epoch: 0, batch: 12449, sum loss: 6358.041504, avg loss: 3.387342, ppl: 29.587217 +epoch: 0, batch: 12450, sum loss: 6849.450684, avg loss: 3.565565, ppl: 35.359440 +epoch: 0, batch: 12451, sum loss: 6480.805664, avg loss: 3.774494, ppl: 43.575439 +epoch: 0, batch: 12452, sum loss: 6235.312988, avg loss: 3.489263, ppl: 32.761795 +epoch: 0, batch: 12453, sum loss: 5859.097168, avg loss: 3.434406, ppl: 31.012995 +epoch: 0, batch: 12454, sum loss: 7509.117188, avg loss: 3.970977, ppl: 53.036312 +epoch: 0, batch: 12455, sum loss: 7094.059082, avg loss: 3.820172, ppl: 45.612034 +epoch: 0, batch: 12456, sum loss: 5576.811523, avg loss: 3.593307, ppl: 36.354084 +epoch: 0, batch: 12457, sum loss: 7032.858887, avg loss: 3.615866, ppl: 37.183533 +epoch: 0, batch: 12458, sum loss: 7114.601562, avg loss: 3.717138, ppl: 41.146446 +epoch: 0, batch: 12459, sum loss: 6216.858398, avg loss: 3.863803, ppl: 47.646191 +epoch: 0, batch: 12460, sum loss: 5955.411133, avg loss: 3.566114, ppl: 35.378857 +epoch: 0, batch: 12461, sum loss: 7034.645996, avg loss: 3.616785, ppl: 37.217712 +epoch: 0, batch: 12462, sum loss: 5858.991211, avg loss: 3.542316, ppl: 34.546852 +epoch: 0, batch: 12463, sum loss: 6173.553711, avg loss: 3.610265, ppl: 36.975861 +epoch: 0, batch: 12464, sum loss: 7229.112793, avg loss: 3.922470, ppl: 50.525105 +epoch: 0, batch: 12465, sum loss: 6115.152344, avg loss: 3.580300, ppl: 35.884300 +epoch: 0, batch: 12466, sum loss: 5673.192871, avg loss: 3.602027, ppl: 36.672501 +epoch: 0, batch: 12467, sum loss: 5446.285156, avg loss: 3.347440, ppl: 28.429859 +epoch: 0, batch: 12468, sum loss: 6240.374023, avg loss: 3.754738, ppl: 42.723019 +epoch: 0, batch: 12469, sum loss: 6941.622070, avg loss: 3.895411, ppl: 49.176262 +epoch: 0, batch: 12470, sum loss: 6028.015625, avg loss: 3.464377, ppl: 31.956535 +epoch: 0, batch: 12471, sum loss: 6634.576172, avg loss: 3.563145, ppl: 35.273964 +epoch: 0, batch: 12472, sum loss: 5740.973145, avg loss: 3.448032, ppl: 31.438457 +epoch: 0, batch: 12473, sum loss: 6584.610840, avg loss: 3.695068, ppl: 40.248306 +epoch: 0, batch: 12474, sum loss: 5730.166504, avg loss: 3.550289, ppl: 34.823387 +epoch: 0, batch: 12475, sum loss: 6883.358398, avg loss: 3.673083, ppl: 39.373119 +epoch: 0, batch: 12476, sum loss: 6495.951172, avg loss: 3.707735, ppl: 40.761372 +epoch: 0, batch: 12477, sum loss: 6913.918945, avg loss: 3.809322, ppl: 45.119831 +epoch: 0, batch: 12478, sum loss: 6505.653320, avg loss: 3.458614, ppl: 31.772913 +epoch: 0, batch: 12479, sum loss: 5914.075684, avg loss: 3.595183, ppl: 36.422352 +epoch: 0, batch: 12480, sum loss: 6828.209473, avg loss: 3.785038, ppl: 44.037365 +epoch: 0, batch: 12481, sum loss: 5628.295410, avg loss: 3.425621, ppl: 30.741730 +epoch: 0, batch: 12482, sum loss: 6964.536621, avg loss: 3.602968, ppl: 36.707020 +epoch: 0, batch: 12483, sum loss: 6967.556641, avg loss: 3.604530, ppl: 36.764412 +epoch: 0, batch: 12484, sum loss: 6432.416016, avg loss: 3.623896, ppl: 37.483322 +epoch: 0, batch: 12485, sum loss: 5898.923828, avg loss: 3.858027, ppl: 47.371815 +epoch: 0, batch: 12486, sum loss: 5288.645508, avg loss: 3.309541, ppl: 27.372545 +epoch: 0, batch: 12487, sum loss: 6482.745605, avg loss: 3.668787, ppl: 39.204304 +epoch: 0, batch: 12488, sum loss: 6042.113770, avg loss: 3.653031, ppl: 38.591473 +epoch: 0, batch: 12489, sum loss: 5934.552734, avg loss: 3.631917, ppl: 37.785191 +epoch: 0, batch: 12490, sum loss: 6371.338379, avg loss: 3.618023, ppl: 37.263821 +epoch: 0, batch: 12491, sum loss: 5493.166016, avg loss: 3.374181, ppl: 29.200346 +epoch: 0, batch: 12492, sum loss: 6289.476562, avg loss: 3.958135, ppl: 52.359592 +epoch: 0, batch: 12493, sum loss: 6862.919434, avg loss: 3.810616, ppl: 45.178249 +epoch: 0, batch: 12494, sum loss: 4884.521484, avg loss: 3.432552, ppl: 30.955536 +epoch: 0, batch: 12495, sum loss: 7085.766113, avg loss: 3.741165, ppl: 42.147060 +epoch: 0, batch: 12496, sum loss: 5722.813477, avg loss: 3.554543, ppl: 34.971817 +epoch: 0, batch: 12497, sum loss: 7722.703613, avg loss: 3.819339, ppl: 45.574066 +epoch: 0, batch: 12498, sum loss: 6175.727539, avg loss: 3.598909, ppl: 36.558323 +epoch: 0, batch: 12499, sum loss: 5675.110840, avg loss: 3.605534, ppl: 36.801334 +epoch: 0, batch: 12500, sum loss: 5429.002930, avg loss: 3.477901, ppl: 32.391651 +epoch: 0, batch: 12501, sum loss: 6131.972168, avg loss: 3.698415, ppl: 40.383247 +epoch: 0, batch: 12502, sum loss: 5708.281250, avg loss: 3.666205, ppl: 39.103233 +epoch: 0, batch: 12503, sum loss: 6601.636719, avg loss: 3.696325, ppl: 40.298946 +epoch: 0, batch: 12504, sum loss: 6157.968262, avg loss: 3.607480, ppl: 36.873016 +epoch: 0, batch: 12505, sum loss: 6037.185547, avg loss: 3.825846, ppl: 45.871613 +epoch: 0, batch: 12506, sum loss: 6621.958008, avg loss: 3.507393, ppl: 33.361187 +epoch: 0, batch: 12507, sum loss: 6656.322266, avg loss: 3.769152, ppl: 43.343292 +epoch: 0, batch: 12508, sum loss: 5398.772949, avg loss: 3.416945, ppl: 30.476168 +epoch: 0, batch: 12509, sum loss: 5990.843750, avg loss: 3.474968, ppl: 32.296787 +epoch: 0, batch: 12510, sum loss: 5151.244629, avg loss: 3.504248, ppl: 33.256432 +epoch: 0, batch: 12511, sum loss: 6644.316895, avg loss: 3.555012, ppl: 34.988224 +epoch: 0, batch: 12512, sum loss: 6633.075195, avg loss: 3.838585, ppl: 46.459694 +epoch: 0, batch: 12513, sum loss: 5609.162598, avg loss: 3.441204, ppl: 31.224531 +epoch: 0, batch: 12514, sum loss: 7404.667969, avg loss: 3.774041, ppl: 43.555706 +epoch: 0, batch: 12515, sum loss: 6603.972656, avg loss: 3.756526, ppl: 42.799473 +epoch: 0, batch: 12516, sum loss: 5617.038086, avg loss: 3.347460, ppl: 28.430429 +epoch: 0, batch: 12517, sum loss: 5403.382812, avg loss: 3.448234, ppl: 31.444822 +epoch: 0, batch: 12518, sum loss: 5879.812012, avg loss: 3.609461, ppl: 36.946136 +epoch: 0, batch: 12519, sum loss: 6446.316895, avg loss: 3.581287, ppl: 35.919746 +epoch: 0, batch: 12520, sum loss: 5958.466309, avg loss: 3.613382, ppl: 37.091270 +epoch: 0, batch: 12521, sum loss: 5767.645508, avg loss: 3.427003, ppl: 30.784233 +epoch: 0, batch: 12522, sum loss: 7162.296387, avg loss: 3.646791, ppl: 38.351379 +epoch: 0, batch: 12523, sum loss: 6702.036621, avg loss: 3.501586, ppl: 33.168030 +epoch: 0, batch: 12524, sum loss: 5690.150391, avg loss: 3.384979, ppl: 29.517380 +epoch: 0, batch: 12525, sum loss: 6669.222168, avg loss: 3.828486, ppl: 45.992840 +epoch: 0, batch: 12526, sum loss: 6676.297363, avg loss: 3.526834, ppl: 34.016109 +epoch: 0, batch: 12527, sum loss: 5621.447266, avg loss: 3.470029, ppl: 32.137688 +epoch: 0, batch: 12528, sum loss: 6859.043457, avg loss: 3.445024, ppl: 31.344044 +epoch: 0, batch: 12529, sum loss: 4969.692383, avg loss: 3.399243, ppl: 29.941416 +epoch: 0, batch: 12530, sum loss: 6479.327637, avg loss: 3.660637, ppl: 38.886112 +epoch: 0, batch: 12531, sum loss: 5052.138672, avg loss: 3.203639, ppl: 24.621967 +epoch: 0, batch: 12532, sum loss: 5585.311523, avg loss: 3.356558, ppl: 28.690258 +epoch: 0, batch: 12533, sum loss: 5393.919434, avg loss: 3.301052, ppl: 27.141186 +epoch: 0, batch: 12534, sum loss: 6015.170410, avg loss: 3.715362, ppl: 41.073437 +epoch: 0, batch: 12535, sum loss: 4988.658203, avg loss: 3.368439, ppl: 29.033176 +epoch: 0, batch: 12536, sum loss: 6533.312500, avg loss: 3.568166, ppl: 35.451534 +epoch: 0, batch: 12537, sum loss: 5641.450684, avg loss: 3.423210, ppl: 30.667713 +epoch: 0, batch: 12538, sum loss: 7189.351074, avg loss: 3.636495, ppl: 37.958561 +epoch: 0, batch: 12539, sum loss: 5958.429688, avg loss: 3.673508, ppl: 39.389832 +epoch: 0, batch: 12540, sum loss: 5252.712891, avg loss: 3.236422, ppl: 25.442520 +epoch: 0, batch: 12541, sum loss: 6882.935547, avg loss: 3.704486, ppl: 40.629166 +epoch: 0, batch: 12542, sum loss: 6279.050781, avg loss: 3.216727, ppl: 24.946337 +epoch: 0, batch: 12543, sum loss: 5647.078125, avg loss: 3.631562, ppl: 37.771759 +epoch: 0, batch: 12544, sum loss: 6305.394043, avg loss: 3.456905, ppl: 31.718643 +epoch: 0, batch: 12545, sum loss: 5547.270996, avg loss: 3.329694, ppl: 27.929802 +epoch: 0, batch: 12546, sum loss: 5291.015137, avg loss: 3.458180, ppl: 31.759121 +epoch: 0, batch: 12547, sum loss: 5956.578125, avg loss: 3.323983, ppl: 27.770754 +epoch: 0, batch: 12548, sum loss: 5682.939453, avg loss: 3.497193, ppl: 33.022640 +epoch: 0, batch: 12549, sum loss: 6147.541504, avg loss: 3.508871, ppl: 33.410522 +epoch: 0, batch: 12550, sum loss: 5979.100586, avg loss: 3.468156, ppl: 32.077541 +epoch: 0, batch: 12551, sum loss: 5360.019043, avg loss: 3.364733, ppl: 28.925766 +epoch: 0, batch: 12552, sum loss: 6324.854004, avg loss: 3.346484, ppl: 28.402685 +epoch: 0, batch: 12553, sum loss: 7147.170410, avg loss: 3.771594, ppl: 43.449261 +epoch: 0, batch: 12554, sum loss: 5063.442383, avg loss: 3.391455, ppl: 29.709146 +epoch: 0, batch: 12555, sum loss: 7040.557617, avg loss: 3.684227, ppl: 39.814323 +epoch: 0, batch: 12556, sum loss: 5262.990234, avg loss: 3.160955, ppl: 23.593121 +epoch: 0, batch: 12557, sum loss: 5875.368164, avg loss: 3.524516, ppl: 33.937340 +epoch: 0, batch: 12558, sum loss: 6115.529297, avg loss: 3.494588, ppl: 32.936718 +epoch: 0, batch: 12559, sum loss: 5408.601562, avg loss: 3.442776, ppl: 31.273666 +epoch: 0, batch: 12560, sum loss: 5137.934570, avg loss: 3.375778, ppl: 29.247042 +epoch: 0, batch: 12561, sum loss: 5687.294434, avg loss: 3.440589, ppl: 31.205345 +epoch: 0, batch: 12562, sum loss: 6105.083984, avg loss: 3.822845, ppl: 45.734158 +epoch: 0, batch: 12563, sum loss: 6691.601074, avg loss: 3.565051, ppl: 35.341267 +epoch: 0, batch: 12564, sum loss: 6148.338379, avg loss: 3.367108, ppl: 28.994543 +epoch: 0, batch: 12565, sum loss: 5853.185059, avg loss: 3.331352, ppl: 27.976133 +epoch: 0, batch: 12566, sum loss: 6085.395508, avg loss: 3.676976, ppl: 39.526691 +epoch: 0, batch: 12567, sum loss: 6592.071777, avg loss: 3.640018, ppl: 38.092514 +epoch: 0, batch: 12568, sum loss: 7018.913574, avg loss: 3.800170, ppl: 44.708794 +epoch: 0, batch: 12569, sum loss: 5976.211914, avg loss: 3.413028, ppl: 30.357027 +epoch: 0, batch: 12570, sum loss: 5246.204590, avg loss: 3.391212, ppl: 29.701921 +epoch: 0, batch: 12571, sum loss: 4966.473633, avg loss: 3.487692, ppl: 32.710369 +epoch: 0, batch: 12572, sum loss: 5260.761719, avg loss: 3.433918, ppl: 30.997841 +epoch: 0, batch: 12573, sum loss: 6100.064941, avg loss: 3.534221, ppl: 34.268299 +epoch: 0, batch: 12574, sum loss: 5436.635742, avg loss: 3.612383, ppl: 37.054234 +epoch: 0, batch: 12575, sum loss: 6759.173340, avg loss: 3.585768, ppl: 36.081074 +epoch: 0, batch: 12576, sum loss: 5429.734863, avg loss: 3.548846, ppl: 34.773182 +epoch: 0, batch: 12577, sum loss: 4736.574219, avg loss: 3.373628, ppl: 29.184227 +epoch: 0, batch: 12578, sum loss: 6645.484375, avg loss: 3.790921, ppl: 44.297176 +epoch: 0, batch: 12579, sum loss: 7114.557129, avg loss: 3.812731, ppl: 45.273926 +epoch: 0, batch: 12580, sum loss: 5794.421875, avg loss: 3.406480, ppl: 30.158886 +epoch: 0, batch: 12581, sum loss: 5378.031738, avg loss: 3.277289, ppl: 26.503828 +epoch: 0, batch: 12582, sum loss: 6604.065918, avg loss: 3.636600, ppl: 37.962543 +epoch: 0, batch: 12583, sum loss: 5652.707031, avg loss: 3.591300, ppl: 36.281227 +epoch: 0, batch: 12584, sum loss: 5678.085449, avg loss: 3.557698, ppl: 35.082329 +epoch: 0, batch: 12585, sum loss: 6977.765625, avg loss: 3.670576, ppl: 39.274536 +epoch: 0, batch: 12586, sum loss: 6440.357422, avg loss: 3.638620, ppl: 38.039310 +epoch: 0, batch: 12587, sum loss: 7163.838867, avg loss: 3.445810, ppl: 31.368685 +epoch: 0, batch: 12588, sum loss: 5251.446289, avg loss: 3.401196, ppl: 29.999958 +epoch: 0, batch: 12589, sum loss: 6350.352539, avg loss: 3.569619, ppl: 35.503071 +epoch: 0, batch: 12590, sum loss: 6573.418457, avg loss: 3.940898, ppl: 51.464817 +epoch: 0, batch: 12591, sum loss: 6485.420410, avg loss: 3.473712, ppl: 32.256256 +epoch: 0, batch: 12592, sum loss: 6285.876953, avg loss: 3.352468, ppl: 28.573158 +epoch: 0, batch: 12593, sum loss: 6913.894043, avg loss: 3.721148, ppl: 41.311813 +epoch: 0, batch: 12594, sum loss: 6018.051758, avg loss: 3.586443, ppl: 36.105427 +epoch: 0, batch: 12595, sum loss: 5621.894531, avg loss: 3.866503, ppl: 47.775002 +epoch: 0, batch: 12596, sum loss: 5599.008789, avg loss: 3.659483, ppl: 38.841255 +epoch: 0, batch: 12597, sum loss: 6932.108398, avg loss: 3.689254, ppl: 40.014996 +epoch: 0, batch: 12598, sum loss: 5398.937988, avg loss: 3.573089, ppl: 35.626488 +epoch: 0, batch: 12599, sum loss: 5915.716797, avg loss: 3.475744, ppl: 32.321877 +epoch: 0, batch: 12600, sum loss: 6825.345215, avg loss: 3.620873, ppl: 37.370167 +epoch: 0, batch: 12601, sum loss: 6817.072266, avg loss: 3.374788, ppl: 29.218098 +epoch: 0, batch: 12602, sum loss: 6270.488281, avg loss: 3.589289, ppl: 36.208328 +epoch: 0, batch: 12603, sum loss: 5789.057617, avg loss: 3.675592, ppl: 39.472015 +epoch: 0, batch: 12604, sum loss: 4674.672852, avg loss: 3.083557, ppl: 21.835943 +epoch: 0, batch: 12605, sum loss: 5635.339844, avg loss: 3.482905, ppl: 32.554146 +epoch: 0, batch: 12606, sum loss: 5958.209473, avg loss: 3.633054, ppl: 37.828186 +epoch: 0, batch: 12607, sum loss: 7512.128906, avg loss: 3.874228, ppl: 48.145538 +epoch: 0, batch: 12608, sum loss: 6500.375488, avg loss: 3.525149, ppl: 33.958843 +epoch: 0, batch: 12609, sum loss: 6448.164062, avg loss: 3.910348, ppl: 49.916328 +epoch: 0, batch: 12610, sum loss: 6955.019531, avg loss: 3.487974, ppl: 32.719582 +epoch: 0, batch: 12611, sum loss: 6759.964844, avg loss: 3.634390, ppl: 37.878735 +epoch: 0, batch: 12612, sum loss: 6215.785645, avg loss: 3.535714, ppl: 34.319515 +epoch: 0, batch: 12613, sum loss: 5898.990234, avg loss: 3.639106, ppl: 38.057781 +epoch: 0, batch: 12614, sum loss: 5826.356445, avg loss: 3.383482, ppl: 29.473225 +epoch: 0, batch: 12615, sum loss: 6449.669922, avg loss: 3.583150, ppl: 35.986725 +epoch: 0, batch: 12616, sum loss: 5213.445312, avg loss: 3.398595, ppl: 29.922041 +epoch: 0, batch: 12617, sum loss: 5700.332520, avg loss: 3.518724, ppl: 33.741344 +epoch: 0, batch: 12618, sum loss: 4649.161621, avg loss: 3.330345, ppl: 27.947987 +epoch: 0, batch: 12619, sum loss: 6342.366211, avg loss: 3.702490, ppl: 40.548157 +epoch: 0, batch: 12620, sum loss: 5882.353516, avg loss: 3.454113, ppl: 31.630205 +epoch: 0, batch: 12621, sum loss: 5520.385254, avg loss: 3.416080, ppl: 30.449818 +epoch: 0, batch: 12622, sum loss: 5956.036621, avg loss: 3.387962, ppl: 29.605543 +epoch: 0, batch: 12623, sum loss: 5012.238770, avg loss: 3.304047, ppl: 27.222576 +epoch: 0, batch: 12624, sum loss: 6448.011230, avg loss: 3.663643, ppl: 39.003159 +epoch: 0, batch: 12625, sum loss: 6124.116211, avg loss: 3.645307, ppl: 38.294540 +epoch: 0, batch: 12626, sum loss: 7066.260254, avg loss: 3.636779, ppl: 37.969326 +epoch: 0, batch: 12627, sum loss: 7125.905273, avg loss: 3.760372, ppl: 42.964413 +epoch: 0, batch: 12628, sum loss: 4670.906250, avg loss: 3.056876, ppl: 21.261032 +epoch: 0, batch: 12629, sum loss: 6317.809082, avg loss: 3.624675, ppl: 37.512539 +epoch: 0, batch: 12630, sum loss: 5339.168457, avg loss: 3.370687, ppl: 29.098518 +epoch: 0, batch: 12631, sum loss: 6601.935059, avg loss: 3.578285, ppl: 35.812054 +epoch: 0, batch: 12632, sum loss: 6872.577148, avg loss: 3.822345, ppl: 45.711288 +epoch: 0, batch: 12633, sum loss: 4677.956055, avg loss: 3.273587, ppl: 26.405893 +epoch: 0, batch: 12634, sum loss: 5660.149902, avg loss: 3.349201, ppl: 28.479975 +epoch: 0, batch: 12635, sum loss: 5358.946777, avg loss: 3.567874, ppl: 35.441162 +epoch: 0, batch: 12636, sum loss: 5545.344238, avg loss: 3.314611, ppl: 27.511696 +epoch: 0, batch: 12637, sum loss: 5411.016602, avg loss: 3.506816, ppl: 33.341934 +epoch: 0, batch: 12638, sum loss: 6059.072266, avg loss: 3.570461, ppl: 35.532970 +epoch: 0, batch: 12639, sum loss: 5972.571289, avg loss: 3.538253, ppl: 34.406769 +epoch: 0, batch: 12640, sum loss: 6130.159668, avg loss: 3.512985, ppl: 33.548275 +epoch: 0, batch: 12641, sum loss: 4822.374023, avg loss: 3.316626, ppl: 27.567177 +epoch: 0, batch: 12642, sum loss: 4827.455078, avg loss: 3.368776, ppl: 29.042946 +epoch: 0, batch: 12643, sum loss: 5061.395996, avg loss: 3.325490, ppl: 27.812630 +epoch: 0, batch: 12644, sum loss: 6420.353027, avg loss: 3.602892, ppl: 36.704227 +epoch: 0, batch: 12645, sum loss: 6400.716309, avg loss: 3.771783, ppl: 43.457497 +epoch: 0, batch: 12646, sum loss: 6009.034180, avg loss: 3.555641, ppl: 35.010269 +epoch: 0, batch: 12647, sum loss: 7224.826172, avg loss: 3.871825, ppl: 48.029972 +epoch: 0, batch: 12648, sum loss: 6633.629395, avg loss: 3.607194, ppl: 36.862469 +epoch: 0, batch: 12649, sum loss: 6257.167969, avg loss: 3.535123, ppl: 34.299236 +epoch: 0, batch: 12650, sum loss: 6224.918945, avg loss: 3.606558, ppl: 36.839027 +epoch: 0, batch: 12651, sum loss: 5079.925293, avg loss: 3.437027, ppl: 31.094368 +epoch: 0, batch: 12652, sum loss: 5474.164062, avg loss: 3.599056, ppl: 36.563702 +epoch: 0, batch: 12653, sum loss: 6747.551270, avg loss: 3.558835, ppl: 35.122265 +epoch: 0, batch: 12654, sum loss: 5529.266602, avg loss: 3.693565, ppl: 40.187847 +epoch: 0, batch: 12655, sum loss: 6103.980957, avg loss: 3.565409, ppl: 35.353924 +epoch: 0, batch: 12656, sum loss: 5370.040039, avg loss: 3.446752, ppl: 31.398254 +epoch: 0, batch: 12657, sum loss: 5708.564453, avg loss: 3.438894, ppl: 31.152493 +epoch: 0, batch: 12658, sum loss: 5494.741699, avg loss: 3.486511, ppl: 32.671764 +epoch: 0, batch: 12659, sum loss: 5531.564453, avg loss: 3.527783, ppl: 34.048412 +epoch: 0, batch: 12660, sum loss: 6431.859863, avg loss: 3.599250, ppl: 36.570801 +epoch: 0, batch: 12661, sum loss: 5801.519043, avg loss: 3.655652, ppl: 38.692734 +epoch: 0, batch: 12662, sum loss: 6116.102539, avg loss: 3.644876, ppl: 38.278034 +epoch: 0, batch: 12663, sum loss: 5668.283203, avg loss: 3.573949, ppl: 35.657127 +epoch: 0, batch: 12664, sum loss: 4561.976562, avg loss: 3.226292, ppl: 25.186090 +epoch: 0, batch: 12665, sum loss: 5467.022461, avg loss: 3.625346, ppl: 37.537724 +epoch: 0, batch: 12666, sum loss: 5656.699219, avg loss: 3.434547, ppl: 31.017357 +epoch: 0, batch: 12667, sum loss: 5036.668457, avg loss: 3.655057, ppl: 38.669724 +epoch: 0, batch: 12668, sum loss: 5987.312012, avg loss: 3.456877, ppl: 31.717781 +epoch: 0, batch: 12669, sum loss: 5611.674316, avg loss: 3.558449, ppl: 35.108711 +epoch: 0, batch: 12670, sum loss: 5701.392578, avg loss: 3.422204, ppl: 30.636871 +epoch: 0, batch: 12671, sum loss: 5624.731934, avg loss: 3.366087, ppl: 28.964977 +epoch: 0, batch: 12672, sum loss: 5608.419922, avg loss: 3.459852, ppl: 31.812260 +epoch: 0, batch: 12673, sum loss: 5547.051270, avg loss: 3.477775, ppl: 32.387581 +epoch: 0, batch: 12674, sum loss: 4448.400879, avg loss: 3.078478, ppl: 21.725313 +epoch: 0, batch: 12675, sum loss: 6450.008789, avg loss: 3.507346, ppl: 33.359612 +epoch: 0, batch: 12676, sum loss: 7092.186523, avg loss: 3.642623, ppl: 38.191879 +epoch: 0, batch: 12677, sum loss: 6440.190430, avg loss: 3.587850, ppl: 36.156250 +epoch: 0, batch: 12678, sum loss: 5065.463379, avg loss: 3.422611, ppl: 30.649321 +epoch: 0, batch: 12679, sum loss: 6910.029297, avg loss: 3.671641, ppl: 39.316383 +epoch: 0, batch: 12680, sum loss: 5197.125488, avg loss: 3.571908, ppl: 35.584408 +epoch: 0, batch: 12681, sum loss: 5765.680664, avg loss: 3.746381, ppl: 42.367489 +epoch: 0, batch: 12682, sum loss: 6066.057617, avg loss: 3.612899, ppl: 37.073376 +epoch: 0, batch: 12683, sum loss: 7040.052734, avg loss: 3.784975, ppl: 44.034550 +epoch: 0, batch: 12684, sum loss: 5471.590332, avg loss: 3.441252, ppl: 31.226021 +epoch: 0, batch: 12685, sum loss: 7160.074219, avg loss: 3.895579, ppl: 49.184517 +epoch: 0, batch: 12686, sum loss: 5900.791016, avg loss: 3.493660, ppl: 32.906155 +epoch: 0, batch: 12687, sum loss: 7212.199219, avg loss: 3.696668, ppl: 40.312763 +epoch: 0, batch: 12688, sum loss: 6299.599121, avg loss: 3.605953, ppl: 36.816734 +epoch: 0, batch: 12689, sum loss: 4387.618652, avg loss: 3.145247, ppl: 23.225399 +epoch: 0, batch: 12690, sum loss: 5702.081543, avg loss: 3.622669, ppl: 37.437363 +epoch: 0, batch: 12691, sum loss: 5442.094727, avg loss: 3.448729, ppl: 31.460390 +epoch: 0, batch: 12692, sum loss: 5358.953613, avg loss: 3.167230, ppl: 23.741638 +epoch: 0, batch: 12693, sum loss: 6119.571289, avg loss: 3.488923, ppl: 32.750660 +epoch: 0, batch: 12694, sum loss: 5832.293945, avg loss: 3.661201, ppl: 38.908062 +epoch: 0, batch: 12695, sum loss: 5731.612793, avg loss: 3.432104, ppl: 30.941664 +epoch: 0, batch: 12696, sum loss: 6028.369629, avg loss: 3.521244, ppl: 33.826477 +epoch: 0, batch: 12697, sum loss: 6919.149414, avg loss: 3.983391, ppl: 53.698807 +epoch: 0, batch: 12698, sum loss: 6749.194336, avg loss: 3.749552, ppl: 42.502056 +epoch: 0, batch: 12699, sum loss: 6657.262695, avg loss: 3.717064, ppl: 41.143436 +epoch: 0, batch: 12700, sum loss: 5842.997070, avg loss: 3.521999, ppl: 33.852043 +epoch: 0, batch: 12701, sum loss: 7467.325195, avg loss: 3.769472, ppl: 43.357182 +epoch: 0, batch: 12702, sum loss: 5696.240723, avg loss: 3.460657, ppl: 31.837875 +epoch: 0, batch: 12703, sum loss: 6199.078125, avg loss: 3.542330, ppl: 34.547329 +epoch: 0, batch: 12704, sum loss: 5858.356445, avg loss: 3.315425, ppl: 27.534105 +epoch: 0, batch: 12705, sum loss: 5711.646973, avg loss: 3.432480, ppl: 30.953321 +epoch: 0, batch: 12706, sum loss: 6550.129395, avg loss: 3.616858, ppl: 37.220436 +epoch: 0, batch: 12707, sum loss: 5845.143555, avg loss: 3.603664, ppl: 36.732582 +epoch: 0, batch: 12708, sum loss: 5741.611816, avg loss: 3.539834, ppl: 34.461216 +epoch: 0, batch: 12709, sum loss: 6101.238281, avg loss: 3.563807, ppl: 35.297329 +epoch: 0, batch: 12710, sum loss: 5215.254883, avg loss: 3.401993, ppl: 30.023872 +epoch: 0, batch: 12711, sum loss: 5517.440430, avg loss: 3.487636, ppl: 32.708538 +epoch: 0, batch: 12712, sum loss: 5343.489746, avg loss: 3.438539, ppl: 31.141428 +epoch: 0, batch: 12713, sum loss: 5166.425781, avg loss: 3.363559, ppl: 28.891821 +epoch: 0, batch: 12714, sum loss: 6422.556152, avg loss: 3.691124, ppl: 40.089890 +epoch: 0, batch: 12715, sum loss: 6231.729492, avg loss: 3.911946, ppl: 49.996140 +epoch: 0, batch: 12716, sum loss: 6745.932617, avg loss: 3.561738, ppl: 35.224380 +epoch: 0, batch: 12717, sum loss: 6365.442871, avg loss: 3.422281, ppl: 30.639231 +epoch: 0, batch: 12718, sum loss: 4480.659668, avg loss: 3.124588, ppl: 22.750526 +epoch: 0, batch: 12719, sum loss: 5594.936523, avg loss: 3.637800, ppl: 38.008125 +epoch: 0, batch: 12720, sum loss: 5996.087402, avg loss: 3.404933, ppl: 30.112284 +epoch: 0, batch: 12721, sum loss: 6450.750977, avg loss: 3.526928, ppl: 34.019299 +epoch: 0, batch: 12722, sum loss: 5518.772461, avg loss: 3.336622, ppl: 28.123962 +epoch: 0, batch: 12723, sum loss: 4894.275879, avg loss: 3.382361, ppl: 29.440189 +epoch: 0, batch: 12724, sum loss: 6391.529297, avg loss: 3.859619, ppl: 47.447277 +epoch: 0, batch: 12725, sum loss: 6779.379883, avg loss: 3.768416, ppl: 43.311390 +epoch: 0, batch: 12726, sum loss: 5282.985840, avg loss: 3.417197, ppl: 30.483833 +epoch: 0, batch: 12727, sum loss: 5661.195801, avg loss: 3.578505, ppl: 35.819962 +epoch: 0, batch: 12728, sum loss: 5455.295898, avg loss: 3.409560, ppl: 30.251930 +epoch: 0, batch: 12729, sum loss: 5270.313965, avg loss: 3.378407, ppl: 29.324007 +epoch: 0, batch: 12730, sum loss: 6590.175781, avg loss: 3.593334, ppl: 36.355099 +epoch: 0, batch: 12731, sum loss: 5648.240723, avg loss: 3.559068, ppl: 35.130440 +epoch: 0, batch: 12732, sum loss: 6277.092773, avg loss: 3.538383, ppl: 34.411247 +epoch: 0, batch: 12733, sum loss: 6637.359863, avg loss: 3.471422, ppl: 32.182487 +epoch: 0, batch: 12734, sum loss: 4403.117188, avg loss: 3.392232, ppl: 29.732239 +epoch: 0, batch: 12735, sum loss: 6734.968262, avg loss: 3.731284, ppl: 41.732674 +epoch: 0, batch: 12736, sum loss: 5170.491211, avg loss: 3.495937, ppl: 32.981182 +epoch: 0, batch: 12737, sum loss: 6159.504395, avg loss: 3.404922, ppl: 30.111961 +epoch: 0, batch: 12738, sum loss: 6880.540527, avg loss: 3.801404, ppl: 44.763977 +epoch: 0, batch: 12739, sum loss: 6351.067383, avg loss: 3.738121, ppl: 42.018963 +epoch: 0, batch: 12740, sum loss: 5929.229004, avg loss: 3.520920, ppl: 33.815525 +epoch: 0, batch: 12741, sum loss: 5891.895996, avg loss: 3.808595, ppl: 45.087032 +epoch: 0, batch: 12742, sum loss: 5979.172852, avg loss: 3.492507, ppl: 32.868259 +epoch: 0, batch: 12743, sum loss: 6224.929199, avg loss: 3.606564, ppl: 36.839249 +epoch: 0, batch: 12744, sum loss: 5864.758789, avg loss: 3.292958, ppl: 26.922396 +epoch: 0, batch: 12745, sum loss: 7204.489746, avg loss: 3.721327, ppl: 41.319202 +epoch: 0, batch: 12746, sum loss: 6727.696289, avg loss: 3.603479, ppl: 36.725796 +epoch: 0, batch: 12747, sum loss: 5367.814453, avg loss: 3.410301, ppl: 30.274363 +epoch: 0, batch: 12748, sum loss: 6208.372559, avg loss: 3.697661, ppl: 40.352795 +epoch: 0, batch: 12749, sum loss: 6201.501953, avg loss: 3.507637, ppl: 33.369324 +epoch: 0, batch: 12750, sum loss: 6110.628418, avg loss: 3.366738, ppl: 28.983816 +epoch: 0, batch: 12751, sum loss: 5836.315918, avg loss: 3.350353, ppl: 28.512810 +epoch: 0, batch: 12752, sum loss: 5634.430176, avg loss: 3.412738, ppl: 30.348227 +epoch: 0, batch: 12753, sum loss: 5849.011719, avg loss: 3.706598, ppl: 40.715061 +epoch: 0, batch: 12754, sum loss: 5392.125000, avg loss: 3.404119, ppl: 30.087784 +epoch: 0, batch: 12755, sum loss: 6645.770508, avg loss: 3.788923, ppl: 44.208740 +epoch: 0, batch: 12756, sum loss: 5907.307617, avg loss: 3.637505, ppl: 37.996899 +epoch: 0, batch: 12757, sum loss: 4951.143066, avg loss: 3.287612, ppl: 26.778831 +epoch: 0, batch: 12758, sum loss: 4653.843262, avg loss: 3.452406, ppl: 31.576271 +epoch: 0, batch: 12759, sum loss: 6312.222656, avg loss: 3.370114, ppl: 29.081831 +epoch: 0, batch: 12760, sum loss: 6033.503906, avg loss: 3.708361, ppl: 40.786919 +epoch: 0, batch: 12761, sum loss: 5418.698730, avg loss: 3.500452, ppl: 33.130409 +epoch: 0, batch: 12762, sum loss: 6040.905273, avg loss: 3.412941, ppl: 30.354385 +epoch: 0, batch: 12763, sum loss: 5463.741699, avg loss: 3.462447, ppl: 31.894934 +epoch: 0, batch: 12764, sum loss: 5778.902344, avg loss: 3.498125, ppl: 33.053421 +epoch: 0, batch: 12765, sum loss: 7159.495605, avg loss: 3.903760, ppl: 49.588539 +epoch: 0, batch: 12766, sum loss: 6088.521973, avg loss: 3.457423, ppl: 31.735088 +epoch: 0, batch: 12767, sum loss: 6870.736328, avg loss: 3.576646, ppl: 35.753418 +epoch: 0, batch: 12768, sum loss: 4871.373047, avg loss: 3.136750, ppl: 23.028906 +epoch: 0, batch: 12769, sum loss: 5040.607910, avg loss: 3.371644, ppl: 29.126371 +epoch: 0, batch: 12770, sum loss: 5565.183594, avg loss: 3.846015, ppl: 46.806168 +epoch: 0, batch: 12771, sum loss: 6254.790527, avg loss: 3.566015, ppl: 35.375340 +epoch: 0, batch: 12772, sum loss: 6067.346680, avg loss: 3.688356, ppl: 39.979084 +epoch: 0, batch: 12773, sum loss: 6015.234375, avg loss: 3.457031, ppl: 31.722660 +epoch: 0, batch: 12774, sum loss: 6835.128906, avg loss: 3.587994, ppl: 36.161469 +epoch: 0, batch: 12775, sum loss: 6436.678711, avg loss: 3.554213, ppl: 34.960281 +epoch: 0, batch: 12776, sum loss: 6095.143555, avg loss: 3.698509, ppl: 40.387062 +epoch: 0, batch: 12777, sum loss: 8204.797852, avg loss: 4.000389, ppl: 54.619400 +epoch: 0, batch: 12778, sum loss: 6985.332031, avg loss: 3.802576, ppl: 44.816486 +epoch: 0, batch: 12779, sum loss: 5366.703613, avg loss: 3.276376, ppl: 26.479624 +epoch: 0, batch: 12780, sum loss: 4976.739258, avg loss: 3.331151, ppl: 27.970510 +epoch: 0, batch: 12781, sum loss: 6672.923340, avg loss: 3.455683, ppl: 31.679911 +epoch: 0, batch: 12782, sum loss: 7047.868164, avg loss: 3.494233, ppl: 32.925037 +epoch: 0, batch: 12783, sum loss: 5597.372070, avg loss: 3.390292, ppl: 29.674622 +epoch: 0, batch: 12784, sum loss: 5431.212891, avg loss: 3.454970, ppl: 31.657343 +epoch: 0, batch: 12785, sum loss: 6093.751465, avg loss: 3.588782, ppl: 36.189964 +epoch: 0, batch: 12786, sum loss: 6209.277344, avg loss: 3.595412, ppl: 36.430725 +epoch: 0, batch: 12787, sum loss: 6587.761230, avg loss: 3.511600, ppl: 33.501820 +epoch: 0, batch: 12788, sum loss: 5327.311523, avg loss: 3.691830, ppl: 40.118214 +epoch: 0, batch: 12789, sum loss: 5923.132812, avg loss: 3.583262, ppl: 35.990768 +epoch: 0, batch: 12790, sum loss: 5489.968750, avg loss: 3.395157, ppl: 29.819338 +epoch: 0, batch: 12791, sum loss: 5746.543457, avg loss: 3.329399, ppl: 27.921566 +epoch: 0, batch: 12792, sum loss: 6027.008789, avg loss: 3.792957, ppl: 44.387463 +epoch: 0, batch: 12793, sum loss: 5126.593262, avg loss: 3.461576, ppl: 31.867144 +epoch: 0, batch: 12794, sum loss: 6209.386719, avg loss: 3.641869, ppl: 38.163101 +epoch: 0, batch: 12795, sum loss: 6163.375000, avg loss: 3.515901, ppl: 33.646240 +epoch: 0, batch: 12796, sum loss: 5587.498047, avg loss: 3.464041, ppl: 31.945808 +epoch: 0, batch: 12797, sum loss: 6041.859863, avg loss: 3.657300, ppl: 38.756569 +epoch: 0, batch: 12798, sum loss: 5737.066895, avg loss: 3.312394, ppl: 27.450768 +epoch: 0, batch: 12799, sum loss: 5492.080078, avg loss: 3.536433, ppl: 34.344193 +epoch: 0, batch: 12800, sum loss: 5327.466309, avg loss: 3.437075, ppl: 31.095875 +epoch: 0, batch: 12801, sum loss: 6302.294922, avg loss: 3.731377, ppl: 41.736526 +epoch: 0, batch: 12802, sum loss: 6998.945312, avg loss: 3.565433, ppl: 35.354767 +epoch: 0, batch: 12803, sum loss: 6059.673340, avg loss: 3.470603, ppl: 32.156143 +epoch: 0, batch: 12804, sum loss: 5835.427246, avg loss: 3.705033, ppl: 40.651390 +epoch: 0, batch: 12805, sum loss: 5302.555176, avg loss: 3.445455, ppl: 31.357550 +epoch: 0, batch: 12806, sum loss: 5846.053711, avg loss: 3.467410, ppl: 32.053627 +epoch: 0, batch: 12807, sum loss: 5093.821777, avg loss: 3.357826, ppl: 28.726664 +epoch: 0, batch: 12808, sum loss: 4941.420898, avg loss: 3.382218, ppl: 29.435999 +epoch: 0, batch: 12809, sum loss: 4723.140625, avg loss: 3.319143, ppl: 27.636658 +epoch: 0, batch: 12810, sum loss: 5340.589844, avg loss: 3.135989, ppl: 23.011393 +epoch: 0, batch: 12811, sum loss: 5921.264648, avg loss: 3.456663, ppl: 31.710993 +epoch: 0, batch: 12812, sum loss: 4943.279785, avg loss: 3.576903, ppl: 35.762608 +epoch: 0, batch: 12813, sum loss: 6597.609375, avg loss: 3.526248, ppl: 33.996166 +epoch: 0, batch: 12814, sum loss: 5992.214355, avg loss: 3.356983, ppl: 28.702463 +epoch: 0, batch: 12815, sum loss: 4706.225586, avg loss: 3.349627, ppl: 28.492098 +epoch: 0, batch: 12816, sum loss: 6232.027344, avg loss: 3.727289, ppl: 41.566277 +epoch: 0, batch: 12817, sum loss: 4769.455078, avg loss: 3.527704, ppl: 34.045692 +epoch: 0, batch: 12818, sum loss: 5887.563477, avg loss: 3.603160, ppl: 36.714073 +epoch: 0, batch: 12819, sum loss: 4887.732422, avg loss: 3.513826, ppl: 33.576500 +epoch: 0, batch: 12820, sum loss: 5088.104492, avg loss: 3.226445, ppl: 25.189959 +epoch: 0, batch: 12821, sum loss: 5128.120117, avg loss: 3.391614, ppl: 29.713871 +epoch: 0, batch: 12822, sum loss: 5028.073242, avg loss: 3.397347, ppl: 29.884710 +epoch: 0, batch: 12823, sum loss: 5125.690918, avg loss: 3.374385, ppl: 29.206312 +epoch: 0, batch: 12824, sum loss: 6641.932617, avg loss: 3.657452, ppl: 38.762436 +epoch: 0, batch: 12825, sum loss: 5392.087891, avg loss: 3.401948, ppl: 30.022533 +epoch: 0, batch: 12826, sum loss: 5458.029785, avg loss: 3.574348, ppl: 35.671364 +epoch: 0, batch: 12827, sum loss: 6027.387695, avg loss: 3.562286, ppl: 35.243668 +epoch: 0, batch: 12828, sum loss: 5820.764160, avg loss: 3.615381, ppl: 37.165520 +epoch: 0, batch: 12829, sum loss: 6038.948242, avg loss: 3.644507, ppl: 38.263901 +epoch: 0, batch: 12830, sum loss: 7087.207031, avg loss: 3.664533, ppl: 39.037907 +epoch: 0, batch: 12831, sum loss: 5900.947754, avg loss: 3.706625, ppl: 40.716175 +epoch: 0, batch: 12832, sum loss: 6155.161133, avg loss: 3.551737, ppl: 34.873852 +epoch: 0, batch: 12833, sum loss: 5270.791016, avg loss: 3.509182, ppl: 33.420910 +epoch: 0, batch: 12834, sum loss: 5618.360352, avg loss: 3.524693, ppl: 33.943344 +epoch: 0, batch: 12835, sum loss: 5545.604004, avg loss: 3.516553, ppl: 33.668171 +epoch: 0, batch: 12836, sum loss: 5431.786133, avg loss: 3.262334, ppl: 26.110411 +epoch: 0, batch: 12837, sum loss: 5468.446777, avg loss: 3.439275, ppl: 31.164341 +epoch: 0, batch: 12838, sum loss: 6156.044922, avg loss: 3.719665, ppl: 41.250565 +epoch: 0, batch: 12839, sum loss: 7493.347168, avg loss: 3.933516, ppl: 51.086258 +epoch: 0, batch: 12840, sum loss: 5103.198730, avg loss: 3.331070, ppl: 27.968243 +epoch: 0, batch: 12841, sum loss: 6559.986328, avg loss: 3.628311, ppl: 37.649170 +epoch: 0, batch: 12842, sum loss: 6648.129883, avg loss: 3.489832, ppl: 32.780437 +epoch: 0, batch: 12843, sum loss: 7038.492188, avg loss: 3.739900, ppl: 42.093784 +epoch: 0, batch: 12844, sum loss: 4435.671875, avg loss: 3.449201, ppl: 31.475222 +epoch: 0, batch: 12845, sum loss: 6050.171875, avg loss: 3.457241, ppl: 31.729317 +epoch: 0, batch: 12846, sum loss: 4580.331055, avg loss: 3.454247, ppl: 31.634451 +epoch: 0, batch: 12847, sum loss: 5793.165039, avg loss: 3.502518, ppl: 33.198948 +epoch: 0, batch: 12848, sum loss: 4869.481934, avg loss: 3.233388, ppl: 25.365448 +epoch: 0, batch: 12849, sum loss: 6537.796875, avg loss: 3.433717, ppl: 30.991625 +epoch: 0, batch: 12850, sum loss: 6476.246094, avg loss: 3.487478, ppl: 32.703350 +epoch: 0, batch: 12851, sum loss: 5622.961914, avg loss: 3.367043, ppl: 28.992670 +epoch: 0, batch: 12852, sum loss: 5130.820312, avg loss: 3.487981, ppl: 32.719830 +epoch: 0, batch: 12853, sum loss: 5901.936035, avg loss: 3.517244, ppl: 33.691460 +epoch: 0, batch: 12854, sum loss: 5067.975586, avg loss: 3.157617, ppl: 23.514496 +epoch: 0, batch: 12855, sum loss: 5448.751465, avg loss: 3.392747, ppl: 29.747547 +epoch: 0, batch: 12856, sum loss: 5464.903320, avg loss: 3.352701, ppl: 28.579836 +epoch: 0, batch: 12857, sum loss: 5324.703125, avg loss: 3.521629, ppl: 33.839520 +epoch: 0, batch: 12858, sum loss: 5240.532227, avg loss: 3.400735, ppl: 29.986122 +epoch: 0, batch: 12859, sum loss: 6775.937988, avg loss: 3.698656, ppl: 40.392982 +epoch: 0, batch: 12860, sum loss: 5912.803711, avg loss: 3.711741, ppl: 40.925003 +epoch: 0, batch: 12861, sum loss: 5572.336426, avg loss: 3.412331, ppl: 30.335871 +epoch: 0, batch: 12862, sum loss: 5388.124023, avg loss: 3.465032, ppl: 31.977470 +epoch: 0, batch: 12863, sum loss: 6602.107422, avg loss: 3.534319, ppl: 34.271656 +epoch: 0, batch: 12864, sum loss: 7219.658691, avg loss: 3.736883, ppl: 41.966991 +epoch: 0, batch: 12865, sum loss: 6253.233398, avg loss: 3.689223, ppl: 40.013756 +epoch: 0, batch: 12866, sum loss: 5698.529785, avg loss: 3.535068, ppl: 34.297348 +epoch: 0, batch: 12867, sum loss: 7175.013184, avg loss: 3.746743, ppl: 42.382824 +epoch: 0, batch: 12868, sum loss: 5556.183105, avg loss: 3.538970, ppl: 34.431446 +epoch: 0, batch: 12869, sum loss: 5389.910156, avg loss: 3.375022, ppl: 29.224932 +epoch: 0, batch: 12870, sum loss: 6575.219727, avg loss: 3.757268, ppl: 42.831268 +epoch: 0, batch: 12871, sum loss: 5867.658691, avg loss: 3.354865, ppl: 28.641733 +epoch: 0, batch: 12872, sum loss: 7042.616699, avg loss: 3.833760, ppl: 46.236061 +epoch: 0, batch: 12873, sum loss: 6271.487305, avg loss: 3.728589, ppl: 41.620354 +epoch: 0, batch: 12874, sum loss: 5213.520508, avg loss: 3.314380, ppl: 27.505346 +epoch: 0, batch: 12875, sum loss: 6649.044922, avg loss: 3.946021, ppl: 51.729107 +epoch: 0, batch: 12876, sum loss: 6098.710938, avg loss: 3.469119, ppl: 32.108437 +epoch: 0, batch: 12877, sum loss: 6055.705078, avg loss: 3.602442, ppl: 36.687717 +epoch: 0, batch: 12878, sum loss: 6061.929688, avg loss: 3.524378, ppl: 33.932655 +epoch: 0, batch: 12879, sum loss: 4987.186035, avg loss: 3.360637, ppl: 28.807549 +epoch: 0, batch: 12880, sum loss: 6028.232910, avg loss: 3.466494, ppl: 32.024269 +epoch: 0, batch: 12881, sum loss: 5485.165527, avg loss: 3.557176, ppl: 35.064041 +epoch: 0, batch: 12882, sum loss: 6156.639648, avg loss: 3.680000, ppl: 39.646389 +epoch: 0, batch: 12883, sum loss: 5450.062012, avg loss: 3.597401, ppl: 36.503227 +epoch: 0, batch: 12884, sum loss: 5669.809570, avg loss: 3.554739, ppl: 34.978706 +epoch: 0, batch: 12885, sum loss: 5937.964844, avg loss: 3.743988, ppl: 42.266212 +epoch: 0, batch: 12886, sum loss: 5530.604492, avg loss: 3.331690, ppl: 27.985586 +epoch: 0, batch: 12887, sum loss: 5625.454102, avg loss: 3.457562, ppl: 31.739515 +epoch: 0, batch: 12888, sum loss: 6426.442383, avg loss: 3.760353, ppl: 42.963573 +epoch: 0, batch: 12889, sum loss: 5585.614258, avg loss: 3.555452, ppl: 35.003624 +epoch: 0, batch: 12890, sum loss: 6253.114746, avg loss: 3.528846, ppl: 34.084602 +epoch: 0, batch: 12891, sum loss: 4974.553711, avg loss: 3.358916, ppl: 28.757988 +epoch: 0, batch: 12892, sum loss: 4625.330078, avg loss: 3.358991, ppl: 28.760147 +epoch: 0, batch: 12893, sum loss: 5647.368652, avg loss: 3.516419, ppl: 33.663654 +epoch: 0, batch: 12894, sum loss: 6678.507324, avg loss: 3.476579, ppl: 32.348858 +epoch: 0, batch: 12895, sum loss: 4817.393066, avg loss: 3.390143, ppl: 29.670193 +epoch: 0, batch: 12896, sum loss: 7937.869629, avg loss: 3.832868, ppl: 46.194839 +epoch: 0, batch: 12897, sum loss: 6189.053711, avg loss: 3.834606, ppl: 46.275188 +epoch: 0, batch: 12898, sum loss: 6668.658203, avg loss: 3.526525, ppl: 34.005585 +epoch: 0, batch: 12899, sum loss: 7289.272949, avg loss: 3.759295, ppl: 42.918156 +epoch: 0, batch: 12900, sum loss: 7066.502441, avg loss: 3.667100, ppl: 39.138256 +epoch: 0, batch: 12901, sum loss: 5813.609375, avg loss: 3.399771, ppl: 29.957253 +epoch: 0, batch: 12902, sum loss: 5894.516602, avg loss: 3.498230, ppl: 33.056873 +epoch: 0, batch: 12903, sum loss: 7779.863770, avg loss: 4.020601, ppl: 55.734608 +epoch: 0, batch: 12904, sum loss: 5664.182617, avg loss: 3.359539, ppl: 28.775923 +epoch: 0, batch: 12905, sum loss: 6232.347656, avg loss: 3.523091, ppl: 33.889019 +epoch: 0, batch: 12906, sum loss: 6128.167480, avg loss: 3.558750, ppl: 35.119267 +epoch: 0, batch: 12907, sum loss: 7051.103027, avg loss: 3.562963, ppl: 35.267525 +epoch: 0, batch: 12908, sum loss: 5685.265137, avg loss: 3.694129, ppl: 40.210552 +epoch: 0, batch: 12909, sum loss: 5943.904785, avg loss: 3.800451, ppl: 44.721329 +epoch: 0, batch: 12910, sum loss: 5516.716797, avg loss: 3.420159, ppl: 30.574286 +epoch: 0, batch: 12911, sum loss: 5437.194336, avg loss: 3.532940, ppl: 34.224438 +epoch: 0, batch: 12912, sum loss: 5777.607422, avg loss: 3.522931, ppl: 33.883606 +epoch: 0, batch: 12913, sum loss: 5465.647461, avg loss: 3.450535, ppl: 31.517252 +epoch: 0, batch: 12914, sum loss: 5878.769531, avg loss: 3.537166, ppl: 34.369373 +epoch: 0, batch: 12915, sum loss: 7109.384766, avg loss: 3.618007, ppl: 37.263245 +epoch: 0, batch: 12916, sum loss: 6416.871094, avg loss: 3.504572, ppl: 33.267208 +epoch: 0, batch: 12917, sum loss: 5670.138672, avg loss: 3.534999, ppl: 34.294994 +epoch: 0, batch: 12918, sum loss: 6202.243164, avg loss: 3.709476, ppl: 40.832394 +epoch: 0, batch: 12919, sum loss: 5773.001465, avg loss: 3.520123, ppl: 33.788578 +epoch: 0, batch: 12920, sum loss: 5788.910156, avg loss: 3.476823, ppl: 32.356766 +epoch: 0, batch: 12921, sum loss: 6048.408203, avg loss: 3.615307, ppl: 37.162746 +epoch: 0, batch: 12922, sum loss: 7255.092773, avg loss: 3.753282, ppl: 42.660847 +epoch: 0, batch: 12923, sum loss: 5017.075684, avg loss: 3.324769, ppl: 27.792564 +epoch: 0, batch: 12924, sum loss: 6052.122070, avg loss: 3.617527, ppl: 37.245338 +epoch: 0, batch: 12925, sum loss: 6071.745117, avg loss: 3.752624, ppl: 42.632786 +epoch: 0, batch: 12926, sum loss: 6297.926758, avg loss: 3.481441, ppl: 32.506538 +epoch: 0, batch: 12927, sum loss: 7227.236816, avg loss: 3.809824, ppl: 45.142513 +epoch: 0, batch: 12928, sum loss: 6616.833496, avg loss: 3.523340, ppl: 33.897472 +epoch: 0, batch: 12929, sum loss: 6365.286621, avg loss: 3.643553, ppl: 38.227402 +epoch: 0, batch: 12930, sum loss: 4330.229492, avg loss: 2.976103, ppl: 19.611235 +epoch: 0, batch: 12931, sum loss: 4711.452148, avg loss: 3.145161, ppl: 23.223423 +epoch: 0, batch: 12932, sum loss: 5458.433594, avg loss: 3.465672, ppl: 31.997955 +epoch: 0, batch: 12933, sum loss: 5441.479492, avg loss: 3.625236, ppl: 37.533588 +epoch: 0, batch: 12934, sum loss: 6739.476562, avg loss: 3.668741, ppl: 39.202511 +epoch: 0, batch: 12935, sum loss: 6901.614746, avg loss: 3.722554, ppl: 41.369907 +epoch: 0, batch: 12936, sum loss: 5587.643555, avg loss: 3.447035, ppl: 31.407127 +epoch: 0, batch: 12937, sum loss: 5488.273926, avg loss: 3.603594, ppl: 36.730015 +epoch: 0, batch: 12938, sum loss: 6248.471680, avg loss: 3.481043, ppl: 32.493591 +epoch: 0, batch: 12939, sum loss: 6933.362305, avg loss: 3.693853, ppl: 40.199444 +epoch: 0, batch: 12940, sum loss: 6982.778320, avg loss: 3.936177, ppl: 51.222404 +epoch: 0, batch: 12941, sum loss: 5345.572754, avg loss: 3.334730, ppl: 28.070814 +epoch: 0, batch: 12942, sum loss: 6823.625000, avg loss: 3.765797, ppl: 43.198139 +epoch: 0, batch: 12943, sum loss: 6410.562500, avg loss: 3.806747, ppl: 45.003822 +epoch: 0, batch: 12944, sum loss: 7266.907227, avg loss: 3.592144, ppl: 36.311836 +epoch: 0, batch: 12945, sum loss: 6732.168945, avg loss: 3.880213, ppl: 48.434521 +epoch: 0, batch: 12946, sum loss: 6144.397949, avg loss: 3.455792, ppl: 31.683371 +epoch: 0, batch: 12947, sum loss: 5672.571289, avg loss: 3.709988, ppl: 40.853310 +epoch: 0, batch: 12948, sum loss: 6411.405273, avg loss: 3.649064, ppl: 38.438663 +epoch: 0, batch: 12949, sum loss: 5795.723145, avg loss: 3.348193, ppl: 28.451288 +epoch: 0, batch: 12950, sum loss: 6087.753418, avg loss: 3.608627, ppl: 36.915329 +epoch: 0, batch: 12951, sum loss: 5652.593262, avg loss: 3.482805, ppl: 32.550915 +epoch: 0, batch: 12952, sum loss: 5112.191406, avg loss: 3.374384, ppl: 29.206285 +epoch: 0, batch: 12953, sum loss: 6453.474121, avg loss: 3.609326, ppl: 36.941132 +epoch: 0, batch: 12954, sum loss: 5035.120117, avg loss: 3.436942, ppl: 31.091738 +epoch: 0, batch: 12955, sum loss: 6246.414062, avg loss: 3.625313, ppl: 37.536465 +epoch: 0, batch: 12956, sum loss: 4994.381348, avg loss: 3.624370, ppl: 37.501076 +epoch: 0, batch: 12957, sum loss: 5733.313477, avg loss: 3.563277, ppl: 35.278633 +epoch: 0, batch: 12958, sum loss: 7216.673828, avg loss: 3.869530, ppl: 47.919868 +epoch: 0, batch: 12959, sum loss: 6222.732910, avg loss: 3.744123, ppl: 42.271938 +epoch: 0, batch: 12960, sum loss: 6170.703125, avg loss: 3.625560, ppl: 37.545746 +epoch: 0, batch: 12961, sum loss: 5974.354492, avg loss: 3.545611, ppl: 34.660854 +epoch: 0, batch: 12962, sum loss: 5565.798340, avg loss: 3.387583, ppl: 29.594322 +epoch: 0, batch: 12963, sum loss: 7099.400391, avg loss: 3.772264, ppl: 43.478378 +epoch: 0, batch: 12964, sum loss: 5488.900391, avg loss: 3.409255, ppl: 30.242699 +epoch: 0, batch: 12965, sum loss: 6080.697266, avg loss: 3.617309, ppl: 37.237240 +epoch: 0, batch: 12966, sum loss: 5680.755371, avg loss: 3.861832, ppl: 47.552406 +epoch: 0, batch: 12967, sum loss: 5379.118164, avg loss: 3.766890, ppl: 43.245342 +epoch: 0, batch: 12968, sum loss: 6329.135742, avg loss: 3.718646, ppl: 41.208561 +epoch: 0, batch: 12969, sum loss: 6213.503906, avg loss: 3.528395, ppl: 34.069248 +epoch: 0, batch: 12970, sum loss: 6323.083496, avg loss: 3.489560, ppl: 32.771538 +epoch: 0, batch: 12971, sum loss: 5384.090820, avg loss: 3.409811, ppl: 30.259510 +epoch: 0, batch: 12972, sum loss: 5229.442383, avg loss: 3.138921, ppl: 23.078947 +epoch: 0, batch: 12973, sum loss: 6003.386719, avg loss: 3.603473, ppl: 36.725567 +epoch: 0, batch: 12974, sum loss: 5423.214844, avg loss: 3.530739, ppl: 34.149189 +epoch: 0, batch: 12975, sum loss: 5511.759277, avg loss: 3.462160, ppl: 31.885786 +epoch: 0, batch: 12976, sum loss: 4809.403809, avg loss: 3.182928, ppl: 24.117256 +epoch: 0, batch: 12977, sum loss: 6028.462891, avg loss: 3.594790, ppl: 36.408054 +epoch: 0, batch: 12978, sum loss: 7295.022461, avg loss: 3.397775, ppl: 29.897503 +epoch: 0, batch: 12979, sum loss: 6353.016602, avg loss: 3.443369, ppl: 31.292217 +epoch: 0, batch: 12980, sum loss: 7264.055176, avg loss: 3.924395, ppl: 50.622448 +epoch: 0, batch: 12981, sum loss: 5581.630859, avg loss: 3.322399, ppl: 27.726797 +epoch: 0, batch: 12982, sum loss: 5108.732422, avg loss: 3.520836, ppl: 33.812672 +epoch: 0, batch: 12983, sum loss: 5792.609863, avg loss: 3.401415, ppl: 30.006533 +epoch: 0, batch: 12984, sum loss: 6170.099121, avg loss: 3.666131, ppl: 39.100346 +epoch: 0, batch: 12985, sum loss: 6141.835938, avg loss: 3.606480, ppl: 36.836174 +epoch: 0, batch: 12986, sum loss: 6332.608398, avg loss: 3.610381, ppl: 36.980137 +epoch: 0, batch: 12987, sum loss: 5331.695312, avg loss: 3.332309, ppl: 28.002939 +epoch: 0, batch: 12988, sum loss: 5707.444824, avg loss: 3.425837, ppl: 30.748371 +epoch: 0, batch: 12989, sum loss: 6972.854004, avg loss: 3.804067, ppl: 44.883339 +epoch: 0, batch: 12990, sum loss: 7059.355469, avg loss: 3.533211, ppl: 34.233715 +epoch: 0, batch: 12991, sum loss: 5613.602051, avg loss: 3.463049, ppl: 31.914125 +epoch: 0, batch: 12992, sum loss: 6200.159180, avg loss: 3.569464, ppl: 35.497570 +epoch: 0, batch: 12993, sum loss: 6119.770508, avg loss: 3.599865, ppl: 36.593300 +epoch: 0, batch: 12994, sum loss: 6474.069336, avg loss: 3.695245, ppl: 40.255444 +epoch: 0, batch: 12995, sum loss: 6126.128906, avg loss: 3.516721, ppl: 33.673847 +epoch: 0, batch: 12996, sum loss: 6866.274414, avg loss: 3.745922, ppl: 42.348026 +epoch: 0, batch: 12997, sum loss: 5622.493652, avg loss: 3.738360, ppl: 42.029003 +epoch: 0, batch: 12998, sum loss: 6494.328125, avg loss: 3.414474, ppl: 30.400940 +epoch: 0, batch: 12999, sum loss: 5439.960938, avg loss: 3.614592, ppl: 37.136192 +epoch: 0, batch: 13000, sum loss: 5553.753906, avg loss: 3.512811, ppl: 33.542412 +epoch: 0, batch: 13001, sum loss: 6441.543457, avg loss: 3.566746, ppl: 35.401218 +epoch: 0, batch: 13002, sum loss: 6012.664062, avg loss: 3.810307, ppl: 45.164291 +epoch: 0, batch: 13003, sum loss: 6582.825684, avg loss: 3.663231, ppl: 38.987103 +epoch: 0, batch: 13004, sum loss: 6037.720215, avg loss: 3.473947, ppl: 32.263847 +epoch: 0, batch: 13005, sum loss: 5562.320801, avg loss: 3.260446, ppl: 26.061153 +epoch: 0, batch: 13006, sum loss: 6892.024902, avg loss: 3.595214, ppl: 36.423500 +epoch: 0, batch: 13007, sum loss: 4834.991699, avg loss: 3.385849, ppl: 29.543051 +epoch: 0, batch: 13008, sum loss: 6126.896973, avg loss: 3.644793, ppl: 38.274841 +epoch: 0, batch: 13009, sum loss: 4913.844727, avg loss: 3.300097, ppl: 27.115274 +epoch: 0, batch: 13010, sum loss: 6336.667480, avg loss: 3.671302, ppl: 39.303047 +epoch: 0, batch: 13011, sum loss: 5737.830078, avg loss: 3.483807, ppl: 32.583527 +epoch: 0, batch: 13012, sum loss: 4533.805176, avg loss: 3.163856, ppl: 23.661655 +epoch: 0, batch: 13013, sum loss: 5812.437012, avg loss: 3.559361, ppl: 35.140743 +epoch: 0, batch: 13014, sum loss: 5119.513184, avg loss: 3.461469, ppl: 31.863756 +epoch: 0, batch: 13015, sum loss: 6398.613770, avg loss: 3.637643, ppl: 38.002155 +epoch: 0, batch: 13016, sum loss: 6050.411621, avg loss: 3.666916, ppl: 39.131046 +epoch: 0, batch: 13017, sum loss: 5547.989258, avg loss: 3.476184, ppl: 32.336082 +epoch: 0, batch: 13018, sum loss: 5313.241699, avg loss: 3.432327, ppl: 30.948570 +epoch: 0, batch: 13019, sum loss: 6165.413086, avg loss: 3.531165, ppl: 34.163734 +epoch: 0, batch: 13020, sum loss: 5890.845215, avg loss: 3.436899, ppl: 31.090403 +epoch: 0, batch: 13021, sum loss: 6446.886719, avg loss: 3.553962, ppl: 34.951523 +epoch: 0, batch: 13022, sum loss: 5613.200195, avg loss: 3.399879, ppl: 29.960482 +epoch: 0, batch: 13023, sum loss: 5153.306641, avg loss: 3.363777, ppl: 28.898138 +epoch: 0, batch: 13024, sum loss: 5499.824707, avg loss: 3.289369, ppl: 26.825933 +epoch: 0, batch: 13025, sum loss: 6885.776367, avg loss: 3.789640, ppl: 44.240490 +epoch: 0, batch: 13026, sum loss: 5027.597656, avg loss: 3.325131, ppl: 27.802639 +epoch: 0, batch: 13027, sum loss: 5800.697754, avg loss: 3.428308, ppl: 30.824459 +epoch: 0, batch: 13028, sum loss: 6699.213379, avg loss: 3.693062, ppl: 40.167637 +epoch: 0, batch: 13029, sum loss: 4613.068848, avg loss: 3.241791, ppl: 25.579500 +epoch: 0, batch: 13030, sum loss: 5215.031250, avg loss: 3.319562, ppl: 27.648230 +epoch: 0, batch: 13031, sum loss: 5609.290039, avg loss: 3.393400, ppl: 29.766979 +epoch: 0, batch: 13032, sum loss: 6684.512695, avg loss: 3.617161, ppl: 37.231701 +epoch: 0, batch: 13033, sum loss: 5405.382324, avg loss: 3.596395, ppl: 36.466545 +epoch: 0, batch: 13034, sum loss: 5417.409180, avg loss: 3.263499, ppl: 26.140858 +epoch: 0, batch: 13035, sum loss: 6467.778809, avg loss: 3.833894, ppl: 46.242256 +epoch: 0, batch: 13036, sum loss: 6896.794434, avg loss: 3.524167, ppl: 33.925495 +epoch: 0, batch: 13037, sum loss: 6157.715820, avg loss: 3.613683, ppl: 37.102448 +epoch: 0, batch: 13038, sum loss: 6028.913086, avg loss: 3.726151, ppl: 41.519012 +epoch: 0, batch: 13039, sum loss: 6185.074219, avg loss: 3.801521, ppl: 44.769207 +epoch: 0, batch: 13040, sum loss: 6731.704590, avg loss: 3.886665, ppl: 48.748058 +epoch: 0, batch: 13041, sum loss: 7030.110840, avg loss: 3.659610, ppl: 38.846199 +epoch: 0, batch: 13042, sum loss: 6699.525879, avg loss: 3.728173, ppl: 41.603012 +epoch: 0, batch: 13043, sum loss: 6045.688965, avg loss: 3.502717, ppl: 33.205532 +epoch: 0, batch: 13044, sum loss: 6183.967285, avg loss: 3.485889, ppl: 32.651440 +epoch: 0, batch: 13045, sum loss: 5557.233887, avg loss: 3.455991, ppl: 31.689678 +epoch: 0, batch: 13046, sum loss: 5304.392090, avg loss: 3.591329, ppl: 36.282249 +epoch: 0, batch: 13047, sum loss: 5600.894043, avg loss: 3.431921, ppl: 30.935999 +epoch: 0, batch: 13048, sum loss: 6413.607422, avg loss: 3.583021, ppl: 35.982067 +epoch: 0, batch: 13049, sum loss: 6267.941406, avg loss: 3.826582, ppl: 45.905354 +epoch: 0, batch: 13050, sum loss: 6556.988281, avg loss: 3.510165, ppl: 33.453785 +epoch: 0, batch: 13051, sum loss: 5324.845703, avg loss: 3.590591, ppl: 36.255486 +epoch: 0, batch: 13052, sum loss: 5897.620605, avg loss: 3.578653, ppl: 35.825264 +epoch: 0, batch: 13053, sum loss: 6698.334961, avg loss: 3.628567, ppl: 37.658821 +epoch: 0, batch: 13054, sum loss: 6560.146973, avg loss: 3.748655, ppl: 42.463943 +epoch: 0, batch: 13055, sum loss: 6875.453125, avg loss: 3.684594, ppl: 39.828957 +epoch: 0, batch: 13056, sum loss: 6869.387207, avg loss: 3.642305, ppl: 38.179745 +epoch: 0, batch: 13057, sum loss: 5011.051758, avg loss: 3.191753, ppl: 24.331034 +epoch: 0, batch: 13058, sum loss: 5853.469727, avg loss: 3.415093, ppl: 30.419785 +epoch: 0, batch: 13059, sum loss: 5363.709473, avg loss: 3.268561, ppl: 26.273514 +epoch: 0, batch: 13060, sum loss: 7273.690430, avg loss: 3.902194, ppl: 49.510983 +epoch: 0, batch: 13061, sum loss: 5718.546875, avg loss: 3.371785, ppl: 29.130470 +epoch: 0, batch: 13062, sum loss: 4870.875977, avg loss: 3.476714, ppl: 32.353222 +epoch: 0, batch: 13063, sum loss: 5327.207031, avg loss: 3.570514, ppl: 35.534859 +epoch: 0, batch: 13064, sum loss: 6822.838379, avg loss: 3.937010, ppl: 51.265079 +epoch: 0, batch: 13065, sum loss: 5702.528320, avg loss: 3.406528, ppl: 30.160360 +epoch: 0, batch: 13066, sum loss: 5217.125977, avg loss: 3.566047, ppl: 35.376465 +epoch: 0, batch: 13067, sum loss: 6285.852539, avg loss: 3.612559, ppl: 37.060764 +epoch: 0, batch: 13068, sum loss: 4934.835938, avg loss: 3.497403, ppl: 33.029568 +epoch: 0, batch: 13069, sum loss: 6482.861328, avg loss: 3.540613, ppl: 34.488045 +epoch: 0, batch: 13070, sum loss: 4437.978516, avg loss: 3.377457, ppl: 29.296181 +epoch: 0, batch: 13071, sum loss: 5389.546387, avg loss: 3.612297, ppl: 37.051044 +epoch: 0, batch: 13072, sum loss: 5498.877441, avg loss: 3.375615, ppl: 29.242273 +epoch: 0, batch: 13073, sum loss: 4836.375977, avg loss: 3.439812, ppl: 31.181101 +epoch: 0, batch: 13074, sum loss: 6048.754883, avg loss: 3.838042, ppl: 46.434479 +epoch: 0, batch: 13075, sum loss: 6001.543945, avg loss: 3.501484, ppl: 33.164639 +epoch: 0, batch: 13076, sum loss: 5201.332520, avg loss: 3.476827, ppl: 32.356880 +epoch: 0, batch: 13077, sum loss: 5040.220703, avg loss: 3.407857, ppl: 30.200453 +epoch: 0, batch: 13078, sum loss: 5494.623047, avg loss: 3.332094, ppl: 27.996904 +epoch: 0, batch: 13079, sum loss: 5498.840332, avg loss: 3.261471, ppl: 26.087891 +epoch: 0, batch: 13080, sum loss: 6156.071289, avg loss: 3.762880, ppl: 43.072289 +epoch: 0, batch: 13081, sum loss: 5645.740234, avg loss: 3.573253, ppl: 35.632332 +epoch: 0, batch: 13082, sum loss: 5927.842773, avg loss: 3.570990, ppl: 35.551758 +epoch: 0, batch: 13083, sum loss: 4511.466797, avg loss: 3.009651, ppl: 20.280319 +epoch: 0, batch: 13084, sum loss: 6202.146484, avg loss: 3.807333, ppl: 45.030205 +epoch: 0, batch: 13085, sum loss: 5086.497559, avg loss: 3.366312, ppl: 28.971483 +epoch: 0, batch: 13086, sum loss: 5992.236328, avg loss: 3.618500, ppl: 37.281612 +epoch: 0, batch: 13087, sum loss: 6794.497070, avg loss: 3.666755, ppl: 39.124737 +epoch: 0, batch: 13088, sum loss: 5925.683594, avg loss: 3.666883, ppl: 39.129768 +epoch: 0, batch: 13089, sum loss: 6244.027832, avg loss: 3.672958, ppl: 39.368172 +epoch: 0, batch: 13090, sum loss: 4481.231934, avg loss: 3.244918, ppl: 25.659609 +epoch: 0, batch: 13091, sum loss: 5609.122070, avg loss: 3.586395, ppl: 36.103695 +epoch: 0, batch: 13092, sum loss: 6076.792969, avg loss: 3.630104, ppl: 37.716724 +epoch: 0, batch: 13093, sum loss: 5583.327148, avg loss: 3.410707, ppl: 30.286650 +epoch: 0, batch: 13094, sum loss: 6205.918457, avg loss: 3.552329, ppl: 34.894485 +epoch: 0, batch: 13095, sum loss: 6209.432129, avg loss: 3.394988, ppl: 29.814283 +epoch: 0, batch: 13096, sum loss: 5544.651367, avg loss: 3.518180, ppl: 33.722984 +epoch: 0, batch: 13097, sum loss: 5977.989258, avg loss: 3.594702, ppl: 36.404850 +epoch: 0, batch: 13098, sum loss: 5587.272949, avg loss: 3.567863, ppl: 35.440758 +epoch: 0, batch: 13099, sum loss: 5088.144531, avg loss: 3.218307, ppl: 24.985796 +epoch: 0, batch: 13100, sum loss: 5664.411621, avg loss: 3.496551, ppl: 33.001419 +epoch: 0, batch: 13101, sum loss: 6787.065918, avg loss: 3.772688, ppl: 43.496834 +epoch: 0, batch: 13102, sum loss: 6453.054688, avg loss: 3.695908, ppl: 40.282124 +epoch: 0, batch: 13103, sum loss: 6727.000977, avg loss: 3.540527, ppl: 34.485092 +epoch: 0, batch: 13104, sum loss: 5674.583008, avg loss: 3.291522, ppl: 26.883743 +epoch: 0, batch: 13105, sum loss: 6904.729492, avg loss: 3.850937, ppl: 47.037109 +epoch: 0, batch: 13106, sum loss: 6860.140137, avg loss: 3.732394, ppl: 41.779007 +epoch: 0, batch: 13107, sum loss: 5726.170410, avg loss: 3.441208, ppl: 31.224667 +epoch: 0, batch: 13108, sum loss: 5401.710938, avg loss: 3.429658, ppl: 30.866076 +epoch: 0, batch: 13109, sum loss: 6947.599609, avg loss: 3.639392, ppl: 38.068699 +epoch: 0, batch: 13110, sum loss: 6464.537598, avg loss: 3.611473, ppl: 37.020557 +epoch: 0, batch: 13111, sum loss: 5155.575195, avg loss: 3.248630, ppl: 25.755026 +epoch: 0, batch: 13112, sum loss: 7045.496094, avg loss: 3.985009, ppl: 53.785786 +epoch: 0, batch: 13113, sum loss: 6250.087891, avg loss: 3.753807, ppl: 42.683250 +epoch: 0, batch: 13114, sum loss: 6031.807129, avg loss: 3.689178, ppl: 40.011963 +epoch: 0, batch: 13115, sum loss: 5501.788086, avg loss: 3.236346, ppl: 25.440592 +epoch: 0, batch: 13116, sum loss: 6927.734375, avg loss: 3.481273, ppl: 32.501083 +epoch: 0, batch: 13117, sum loss: 7284.549805, avg loss: 3.724207, ppl: 41.438377 +epoch: 0, batch: 13118, sum loss: 5886.125488, avg loss: 3.359661, ppl: 28.779428 +epoch: 0, batch: 13119, sum loss: 7075.154297, avg loss: 3.775429, ppl: 43.616215 +epoch: 0, batch: 13120, sum loss: 6185.723145, avg loss: 3.699595, ppl: 40.430946 +epoch: 0, batch: 13121, sum loss: 6538.518555, avg loss: 3.764260, ppl: 43.131760 +epoch: 0, batch: 13122, sum loss: 4906.945801, avg loss: 3.354030, ppl: 28.617836 +epoch: 0, batch: 13123, sum loss: 7232.155762, avg loss: 3.747231, ppl: 42.403503 +epoch: 0, batch: 13124, sum loss: 6044.620117, avg loss: 3.570360, ppl: 35.529396 +epoch: 0, batch: 13125, sum loss: 6247.098145, avg loss: 3.476404, ppl: 32.343212 +epoch: 0, batch: 13126, sum loss: 6054.842285, avg loss: 3.604073, ppl: 36.747597 +epoch: 0, batch: 13127, sum loss: 6431.393555, avg loss: 3.502938, ppl: 33.212872 +epoch: 0, batch: 13128, sum loss: 6191.449219, avg loss: 3.474439, ppl: 32.279705 +epoch: 0, batch: 13129, sum loss: 5988.572266, avg loss: 3.556159, ppl: 35.028412 +epoch: 0, batch: 13130, sum loss: 6707.352539, avg loss: 3.675262, ppl: 39.458984 +epoch: 0, batch: 13131, sum loss: 5618.779785, avg loss: 3.161947, ppl: 23.616533 +epoch: 0, batch: 13132, sum loss: 6205.674316, avg loss: 3.363509, ppl: 28.890388 +epoch: 0, batch: 13133, sum loss: 6407.502441, avg loss: 3.883335, ppl: 48.585972 +epoch: 0, batch: 13134, sum loss: 5356.552734, avg loss: 3.449165, ppl: 31.474096 +epoch: 0, batch: 13135, sum loss: 4804.907715, avg loss: 3.419863, ppl: 30.565233 +epoch: 0, batch: 13136, sum loss: 6766.142090, avg loss: 3.977744, ppl: 53.396416 +epoch: 0, batch: 13137, sum loss: 6124.652832, avg loss: 3.468093, ppl: 32.075520 +epoch: 0, batch: 13138, sum loss: 5192.506348, avg loss: 3.343533, ppl: 28.318996 +epoch: 0, batch: 13139, sum loss: 6114.916992, avg loss: 3.435347, ppl: 31.042170 +epoch: 0, batch: 13140, sum loss: 6321.605469, avg loss: 3.508105, ppl: 33.384953 +epoch: 0, batch: 13141, sum loss: 5467.889160, avg loss: 3.465075, ppl: 31.978874 +epoch: 0, batch: 13142, sum loss: 5967.593750, avg loss: 3.348818, ppl: 28.469072 +epoch: 0, batch: 13143, sum loss: 5291.135742, avg loss: 3.424684, ppl: 30.712925 +epoch: 0, batch: 13144, sum loss: 5537.266113, avg loss: 3.563234, ppl: 35.277111 +epoch: 0, batch: 13145, sum loss: 6240.464355, avg loss: 3.559877, ppl: 35.158878 +epoch: 0, batch: 13146, sum loss: 4535.476562, avg loss: 3.185026, ppl: 24.167910 +epoch: 0, batch: 13147, sum loss: 7214.486328, avg loss: 3.783160, ppl: 43.954727 +epoch: 0, batch: 13148, sum loss: 6636.770996, avg loss: 3.840724, ppl: 46.559170 +epoch: 0, batch: 13149, sum loss: 6287.025879, avg loss: 3.526094, ppl: 33.990955 +epoch: 0, batch: 13150, sum loss: 6110.553711, avg loss: 3.299435, ppl: 27.097322 +epoch: 0, batch: 13151, sum loss: 5363.924805, avg loss: 3.311065, ppl: 27.414305 +epoch: 0, batch: 13152, sum loss: 6849.167480, avg loss: 3.802980, ppl: 44.834602 +epoch: 0, batch: 13153, sum loss: 5763.924805, avg loss: 3.735532, ppl: 41.910297 +epoch: 0, batch: 13154, sum loss: 5022.693359, avg loss: 3.253040, ppl: 25.868858 +epoch: 0, batch: 13155, sum loss: 5663.842773, avg loss: 3.296765, ppl: 27.025080 +epoch: 0, batch: 13156, sum loss: 5375.645996, avg loss: 3.406620, ppl: 30.163115 +epoch: 0, batch: 13157, sum loss: 5507.737305, avg loss: 3.455293, ppl: 31.667572 +epoch: 0, batch: 13158, sum loss: 5581.889648, avg loss: 3.352486, ppl: 28.573690 +epoch: 0, batch: 13159, sum loss: 5165.966309, avg loss: 3.345833, ppl: 28.384218 +epoch: 0, batch: 13160, sum loss: 5336.591309, avg loss: 3.252036, ppl: 25.842905 +epoch: 0, batch: 13161, sum loss: 6428.130859, avg loss: 3.585126, ppl: 36.057907 +epoch: 0, batch: 13162, sum loss: 6449.157227, avg loss: 3.729993, ppl: 41.678802 +epoch: 0, batch: 13163, sum loss: 5667.076172, avg loss: 3.470347, ppl: 32.147884 +epoch: 0, batch: 13164, sum loss: 4947.136719, avg loss: 3.135068, ppl: 22.990192 +epoch: 0, batch: 13165, sum loss: 5023.093262, avg loss: 3.213751, ppl: 24.872215 +epoch: 0, batch: 13166, sum loss: 6516.393066, avg loss: 3.458807, ppl: 31.779057 +epoch: 0, batch: 13167, sum loss: 5770.624512, avg loss: 3.349173, ppl: 28.479160 +epoch: 0, batch: 13168, sum loss: 5978.006836, avg loss: 3.717666, ppl: 41.168190 +epoch: 0, batch: 13169, sum loss: 6910.912109, avg loss: 3.499196, ppl: 33.088840 +epoch: 0, batch: 13170, sum loss: 5257.770020, avg loss: 3.215761, ppl: 24.922260 +epoch: 0, batch: 13171, sum loss: 6316.101074, avg loss: 3.403072, ppl: 30.056280 +epoch: 0, batch: 13172, sum loss: 5722.791992, avg loss: 3.420677, ppl: 30.590109 +epoch: 0, batch: 13173, sum loss: 5719.755371, avg loss: 3.750659, ppl: 42.549122 +epoch: 0, batch: 13174, sum loss: 4337.630859, avg loss: 3.175425, ppl: 23.936998 +epoch: 0, batch: 13175, sum loss: 5877.681641, avg loss: 3.451369, ppl: 31.543547 +epoch: 0, batch: 13176, sum loss: 6996.688965, avg loss: 3.610263, ppl: 36.975773 +epoch: 0, batch: 13177, sum loss: 6412.168457, avg loss: 3.769646, ppl: 43.364719 +epoch: 0, batch: 13178, sum loss: 6648.309082, avg loss: 3.890175, ppl: 48.919453 +epoch: 0, batch: 13179, sum loss: 6159.313477, avg loss: 3.517598, ppl: 33.703373 +epoch: 0, batch: 13180, sum loss: 5728.720215, avg loss: 3.749162, ppl: 42.485481 +epoch: 0, batch: 13181, sum loss: 5896.104492, avg loss: 3.507498, ppl: 33.364685 +epoch: 0, batch: 13182, sum loss: 6468.472656, avg loss: 3.782732, ppl: 43.935932 +epoch: 0, batch: 13183, sum loss: 6607.698242, avg loss: 3.733163, ppl: 41.811142 +epoch: 0, batch: 13184, sum loss: 6560.380371, avg loss: 3.900345, ppl: 49.419498 +epoch: 0, batch: 13185, sum loss: 5243.374512, avg loss: 3.385006, ppl: 29.518175 +epoch: 0, batch: 13186, sum loss: 6387.962891, avg loss: 3.527313, ppl: 34.032391 +epoch: 0, batch: 13187, sum loss: 4893.909180, avg loss: 3.403275, ppl: 30.062387 +epoch: 0, batch: 13188, sum loss: 6827.434082, avg loss: 3.600968, ppl: 36.633667 +epoch: 0, batch: 13189, sum loss: 5679.905273, avg loss: 3.376876, ppl: 29.279156 +epoch: 0, batch: 13190, sum loss: 5732.640625, avg loss: 3.580663, ppl: 35.897324 +epoch: 0, batch: 13191, sum loss: 6597.849609, avg loss: 3.479879, ppl: 32.455784 +epoch: 0, batch: 13192, sum loss: 6821.367188, avg loss: 3.483845, ppl: 32.584755 +epoch: 0, batch: 13193, sum loss: 4781.988281, avg loss: 3.503288, ppl: 33.224525 +epoch: 0, batch: 13194, sum loss: 6082.159180, avg loss: 3.343683, ppl: 28.323250 +epoch: 0, batch: 13195, sum loss: 5745.741211, avg loss: 3.428247, ppl: 30.822556 +epoch: 0, batch: 13196, sum loss: 7014.924316, avg loss: 3.661234, ppl: 38.909332 +epoch: 0, batch: 13197, sum loss: 6276.677246, avg loss: 3.700871, ppl: 40.482548 +epoch: 0, batch: 13198, sum loss: 4842.344727, avg loss: 3.241194, ppl: 25.564240 +epoch: 0, batch: 13199, sum loss: 5923.845703, avg loss: 3.442095, ppl: 31.252365 +epoch: 0, batch: 13200, sum loss: 5587.321777, avg loss: 3.637579, ppl: 37.999744 +epoch: 0, batch: 13201, sum loss: 6056.153320, avg loss: 3.344094, ppl: 28.334887 +epoch: 0, batch: 13202, sum loss: 5690.333984, avg loss: 3.652333, ppl: 38.564514 +epoch: 0, batch: 13203, sum loss: 5669.422852, avg loss: 3.442272, ppl: 31.257908 +epoch: 0, batch: 13204, sum loss: 5421.416016, avg loss: 3.181582, ppl: 24.084831 +epoch: 0, batch: 13205, sum loss: 6401.763184, avg loss: 3.635300, ppl: 37.913223 +epoch: 0, batch: 13206, sum loss: 5194.860352, avg loss: 3.419921, ppl: 30.567005 +epoch: 0, batch: 13207, sum loss: 5608.664551, avg loss: 3.432475, ppl: 30.953167 +epoch: 0, batch: 13208, sum loss: 5433.050293, avg loss: 3.567334, ppl: 35.422047 +epoch: 0, batch: 13209, sum loss: 6316.372070, avg loss: 3.493569, ppl: 32.903160 +epoch: 0, batch: 13210, sum loss: 5956.320312, avg loss: 3.505780, ppl: 33.307423 +epoch: 0, batch: 13211, sum loss: 6171.803223, avg loss: 3.504715, ppl: 33.271957 +epoch: 0, batch: 13212, sum loss: 5057.828125, avg loss: 3.231839, ppl: 25.326187 +epoch: 0, batch: 13213, sum loss: 6809.248535, avg loss: 3.580047, ppl: 35.875214 +epoch: 0, batch: 13214, sum loss: 6860.949707, avg loss: 3.747105, ppl: 42.398178 +epoch: 0, batch: 13215, sum loss: 5432.306152, avg loss: 3.405835, ppl: 30.139442 +epoch: 0, batch: 13216, sum loss: 6098.954102, avg loss: 3.585511, ppl: 36.071777 +epoch: 0, batch: 13217, sum loss: 6547.165527, avg loss: 3.565994, ppl: 35.374607 +epoch: 0, batch: 13218, sum loss: 5775.228027, avg loss: 3.562756, ppl: 35.260250 +epoch: 0, batch: 13219, sum loss: 6421.262207, avg loss: 3.625783, ppl: 37.554123 +epoch: 0, batch: 13220, sum loss: 5072.121094, avg loss: 3.278682, ppl: 26.540775 +epoch: 0, batch: 13221, sum loss: 5191.812012, avg loss: 3.429202, ppl: 30.852022 +epoch: 0, batch: 13222, sum loss: 5433.142090, avg loss: 3.406359, ppl: 30.155241 +epoch: 0, batch: 13223, sum loss: 7089.908691, avg loss: 3.615456, ppl: 37.168285 +epoch: 0, batch: 13224, sum loss: 5644.529297, avg loss: 3.527831, ppl: 34.050026 +epoch: 0, batch: 13225, sum loss: 5648.021484, avg loss: 3.392205, ppl: 29.731445 +epoch: 0, batch: 13226, sum loss: 5681.746094, avg loss: 3.553312, ppl: 34.928814 +epoch: 0, batch: 13227, sum loss: 6182.288086, avg loss: 3.502713, ppl: 33.205406 +epoch: 0, batch: 13228, sum loss: 5199.767090, avg loss: 3.305637, ppl: 27.265902 +epoch: 0, batch: 13229, sum loss: 4850.198730, avg loss: 3.410829, ppl: 30.290339 +epoch: 0, batch: 13230, sum loss: 5086.431152, avg loss: 3.427514, ppl: 30.799980 +epoch: 0, batch: 13231, sum loss: 5017.625977, avg loss: 3.153756, ppl: 23.423883 +epoch: 0, batch: 13232, sum loss: 5093.997070, avg loss: 3.353520, ppl: 28.603245 +epoch: 0, batch: 13233, sum loss: 4929.265625, avg loss: 3.513376, ppl: 33.561363 +epoch: 0, batch: 13234, sum loss: 7383.935547, avg loss: 3.688279, ppl: 39.976006 +epoch: 0, batch: 13235, sum loss: 5468.570312, avg loss: 3.188671, ppl: 24.256161 +epoch: 0, batch: 13236, sum loss: 5579.396973, avg loss: 3.515688, ppl: 33.639069 +epoch: 0, batch: 13237, sum loss: 5551.603027, avg loss: 3.205313, ppl: 24.663229 +epoch: 0, batch: 13238, sum loss: 5758.340820, avg loss: 3.677101, ppl: 39.531639 +epoch: 0, batch: 13239, sum loss: 6571.512207, avg loss: 3.546418, ppl: 34.688828 +epoch: 0, batch: 13240, sum loss: 5929.586914, avg loss: 3.600235, ppl: 36.606827 +epoch: 0, batch: 13241, sum loss: 6189.990234, avg loss: 3.624116, ppl: 37.491566 +epoch: 0, batch: 13242, sum loss: 6717.580078, avg loss: 3.898770, ppl: 49.341702 +epoch: 0, batch: 13243, sum loss: 5700.791992, avg loss: 3.543065, ppl: 34.572735 +epoch: 0, batch: 13244, sum loss: 5605.798828, avg loss: 3.577409, ppl: 35.780697 +epoch: 0, batch: 13245, sum loss: 7295.095703, avg loss: 3.760359, ppl: 42.963840 +epoch: 0, batch: 13246, sum loss: 5056.758789, avg loss: 3.548603, ppl: 34.764702 +epoch: 0, batch: 13247, sum loss: 8528.407227, avg loss: 4.082531, ppl: 59.295357 +epoch: 0, batch: 13248, sum loss: 5226.139648, avg loss: 3.435989, ppl: 31.062130 +epoch: 0, batch: 13249, sum loss: 6013.421875, avg loss: 3.607332, ppl: 36.867542 +epoch: 0, batch: 13250, sum loss: 8095.319824, avg loss: 3.767017, ppl: 43.250870 +epoch: 0, batch: 13251, sum loss: 6298.805176, avg loss: 3.713918, ppl: 41.014194 +epoch: 0, batch: 13252, sum loss: 5732.266113, avg loss: 3.401938, ppl: 30.022240 +epoch: 0, batch: 13253, sum loss: 5595.609375, avg loss: 3.492890, ppl: 32.880825 +epoch: 0, batch: 13254, sum loss: 5624.549316, avg loss: 3.544140, ppl: 34.609894 +epoch: 0, batch: 13255, sum loss: 6568.848145, avg loss: 3.682090, ppl: 39.729343 +epoch: 0, batch: 13256, sum loss: 6553.729004, avg loss: 3.723710, ppl: 41.417751 +epoch: 0, batch: 13257, sum loss: 4023.245605, avg loss: 3.341566, ppl: 28.263355 +epoch: 0, batch: 13258, sum loss: 4801.018555, avg loss: 3.239554, ppl: 25.522335 +epoch: 0, batch: 13259, sum loss: 6110.558594, avg loss: 3.583905, ppl: 36.013916 +epoch: 0, batch: 13260, sum loss: 6768.939941, avg loss: 3.619754, ppl: 37.328388 +epoch: 0, batch: 13261, sum loss: 6598.590332, avg loss: 3.643617, ppl: 38.229870 +epoch: 0, batch: 13262, sum loss: 5214.626953, avg loss: 3.340568, ppl: 28.235168 +epoch: 0, batch: 13263, sum loss: 5140.760742, avg loss: 3.183134, ppl: 24.122225 +epoch: 0, batch: 13264, sum loss: 5876.861328, avg loss: 3.438772, ppl: 31.148684 +epoch: 0, batch: 13265, sum loss: 5831.767090, avg loss: 3.479575, ppl: 32.445919 +epoch: 0, batch: 13266, sum loss: 5594.492676, avg loss: 3.661317, ppl: 38.912571 +epoch: 0, batch: 13267, sum loss: 7029.415039, avg loss: 3.570043, ppl: 35.518131 +epoch: 0, batch: 13268, sum loss: 4945.037109, avg loss: 3.412724, ppl: 30.347807 +epoch: 0, batch: 13269, sum loss: 6166.940430, avg loss: 3.755749, ppl: 42.766258 +epoch: 0, batch: 13270, sum loss: 6227.496094, avg loss: 3.740238, ppl: 42.108009 +epoch: 0, batch: 13271, sum loss: 6287.049316, avg loss: 3.663782, ppl: 39.008579 +epoch: 0, batch: 13272, sum loss: 5604.169922, avg loss: 3.562727, ppl: 35.259224 +epoch: 0, batch: 13273, sum loss: 4713.433594, avg loss: 3.333404, ppl: 28.033615 +epoch: 0, batch: 13274, sum loss: 5263.267578, avg loss: 3.373890, ppl: 29.191854 +epoch: 0, batch: 13275, sum loss: 5547.399902, avg loss: 3.460636, ppl: 31.837223 +epoch: 0, batch: 13276, sum loss: 6505.790039, avg loss: 3.665234, ppl: 39.065262 +epoch: 0, batch: 13277, sum loss: 5530.861816, avg loss: 3.636333, ppl: 37.952400 +epoch: 0, batch: 13278, sum loss: 5651.261230, avg loss: 3.617965, ppl: 37.261662 +epoch: 0, batch: 13279, sum loss: 6076.140137, avg loss: 3.678051, ppl: 39.569199 +epoch: 0, batch: 13280, sum loss: 6398.934570, avg loss: 3.596928, ppl: 36.485966 +epoch: 0, batch: 13281, sum loss: 6672.284668, avg loss: 3.658051, ppl: 38.785667 +epoch: 0, batch: 13282, sum loss: 4658.621094, avg loss: 3.269208, ppl: 26.290501 +epoch: 0, batch: 13283, sum loss: 5663.537598, avg loss: 3.373161, ppl: 29.170591 +epoch: 0, batch: 13284, sum loss: 5921.372070, avg loss: 3.657426, ppl: 38.761429 +epoch: 0, batch: 13285, sum loss: 5872.452148, avg loss: 3.503850, ppl: 33.243191 +epoch: 0, batch: 13286, sum loss: 5674.729004, avg loss: 3.324387, ppl: 27.781971 +epoch: 0, batch: 13287, sum loss: 4989.168457, avg loss: 3.339470, ppl: 28.204178 +epoch: 0, batch: 13288, sum loss: 6264.845703, avg loss: 3.470829, ppl: 32.163380 +epoch: 0, batch: 13289, sum loss: 4647.074707, avg loss: 3.163427, ppl: 23.651514 +epoch: 0, batch: 13290, sum loss: 5544.316406, avg loss: 3.531412, ppl: 34.172180 +epoch: 0, batch: 13291, sum loss: 5076.479492, avg loss: 3.446354, ppl: 31.385756 +epoch: 0, batch: 13292, sum loss: 5822.747559, avg loss: 3.397169, ppl: 29.879389 +epoch: 0, batch: 13293, sum loss: 5587.993652, avg loss: 3.475120, ppl: 32.301716 +epoch: 0, batch: 13294, sum loss: 5618.102051, avg loss: 3.427762, ppl: 30.807619 +epoch: 0, batch: 13295, sum loss: 6259.492188, avg loss: 3.512622, ppl: 33.536087 +epoch: 0, batch: 13296, sum loss: 5290.538574, avg loss: 3.478329, ppl: 32.405533 +epoch: 0, batch: 13297, sum loss: 4557.525391, avg loss: 3.527496, ppl: 34.038639 +epoch: 0, batch: 13298, sum loss: 6532.809082, avg loss: 3.703407, ppl: 40.585327 +epoch: 0, batch: 13299, sum loss: 6992.453125, avg loss: 3.632443, ppl: 37.805061 +epoch: 0, batch: 13300, sum loss: 5905.256836, avg loss: 3.429301, ppl: 30.855076 +epoch: 0, batch: 13301, sum loss: 6063.014160, avg loss: 3.452742, ppl: 31.586872 +epoch: 0, batch: 13302, sum loss: 6006.821777, avg loss: 3.680651, ppl: 39.672199 +epoch: 0, batch: 13303, sum loss: 6378.533203, avg loss: 3.725779, ppl: 41.503532 +epoch: 0, batch: 13304, sum loss: 6791.155762, avg loss: 3.631634, ppl: 37.774498 +epoch: 0, batch: 13305, sum loss: 5618.613281, avg loss: 3.556084, ppl: 35.025780 +epoch: 0, batch: 13306, sum loss: 5513.276367, avg loss: 3.685345, ppl: 39.858879 +epoch: 0, batch: 13307, sum loss: 6280.640137, avg loss: 3.657915, ppl: 38.780407 +epoch: 0, batch: 13308, sum loss: 5396.529297, avg loss: 3.339436, ppl: 28.203224 +epoch: 0, batch: 13309, sum loss: 5679.413086, avg loss: 3.290506, ppl: 26.856440 +epoch: 0, batch: 13310, sum loss: 4841.208984, avg loss: 3.354961, ppl: 28.644491 +epoch: 0, batch: 13311, sum loss: 5703.132812, avg loss: 3.419144, ppl: 30.543270 +epoch: 0, batch: 13312, sum loss: 6948.197754, avg loss: 3.544999, ppl: 34.639637 +epoch: 0, batch: 13313, sum loss: 5336.601562, avg loss: 3.333293, ppl: 28.030487 +epoch: 0, batch: 13314, sum loss: 6097.903809, avg loss: 3.547355, ppl: 34.721371 +epoch: 0, batch: 13315, sum loss: 6669.340820, avg loss: 3.622673, ppl: 37.437508 +epoch: 0, batch: 13316, sum loss: 5870.694336, avg loss: 3.542966, ppl: 34.569286 +epoch: 0, batch: 13317, sum loss: 5531.356445, avg loss: 3.498644, ppl: 33.070583 +epoch: 0, batch: 13318, sum loss: 6169.128906, avg loss: 3.529250, ppl: 34.098389 +epoch: 0, batch: 13319, sum loss: 5464.121094, avg loss: 3.571321, ppl: 35.563541 +epoch: 0, batch: 13320, sum loss: 6098.196777, avg loss: 3.386006, ppl: 29.547699 +epoch: 0, batch: 13321, sum loss: 6663.964355, avg loss: 3.685821, ppl: 39.877850 +epoch: 0, batch: 13322, sum loss: 6080.678711, avg loss: 3.492636, ppl: 32.872475 +epoch: 0, batch: 13323, sum loss: 6534.887207, avg loss: 3.642635, ppl: 38.192345 +epoch: 0, batch: 13324, sum loss: 6608.667969, avg loss: 3.667408, ppl: 39.150276 +epoch: 0, batch: 13325, sum loss: 5062.313477, avg loss: 3.136502, ppl: 23.023180 +epoch: 0, batch: 13326, sum loss: 6325.065430, avg loss: 3.683789, ppl: 39.796890 +epoch: 0, batch: 13327, sum loss: 5932.012695, avg loss: 3.512145, ppl: 33.520092 +epoch: 0, batch: 13328, sum loss: 5833.970703, avg loss: 3.605668, ppl: 36.806255 +epoch: 0, batch: 13329, sum loss: 5887.499512, avg loss: 3.426950, ppl: 30.782604 +epoch: 0, batch: 13330, sum loss: 5571.377441, avg loss: 3.185464, ppl: 24.178514 +epoch: 0, batch: 13331, sum loss: 5684.072754, avg loss: 3.541478, ppl: 34.517906 +epoch: 0, batch: 13332, sum loss: 6120.555664, avg loss: 3.292391, ppl: 26.907129 +epoch: 0, batch: 13333, sum loss: 5375.121582, avg loss: 3.458894, ppl: 31.781815 +epoch: 0, batch: 13334, sum loss: 5658.168457, avg loss: 3.636355, ppl: 37.953243 +epoch: 0, batch: 13335, sum loss: 6160.550781, avg loss: 3.751858, ppl: 42.600170 +epoch: 0, batch: 13336, sum loss: 5852.145508, avg loss: 3.396486, ppl: 29.858992 +epoch: 0, batch: 13337, sum loss: 6133.533691, avg loss: 3.362683, ppl: 28.866537 +epoch: 0, batch: 13338, sum loss: 5116.618164, avg loss: 3.461853, ppl: 31.875980 +epoch: 0, batch: 13339, sum loss: 5869.922852, avg loss: 3.519139, ppl: 33.755337 +epoch: 0, batch: 13340, sum loss: 5651.146484, avg loss: 3.599456, ppl: 36.578342 +epoch: 0, batch: 13341, sum loss: 6513.164062, avg loss: 3.831273, ppl: 46.121227 +epoch: 0, batch: 13342, sum loss: 6309.813965, avg loss: 3.468837, ppl: 32.099380 +epoch: 0, batch: 13343, sum loss: 4905.050781, avg loss: 3.350444, ppl: 28.515400 +epoch: 0, batch: 13344, sum loss: 5862.535156, avg loss: 3.390709, ppl: 29.686983 +epoch: 0, batch: 13345, sum loss: 5174.405273, avg loss: 3.355645, ppl: 28.664099 +epoch: 0, batch: 13346, sum loss: 4967.927734, avg loss: 3.253391, ppl: 25.877937 +epoch: 0, batch: 13347, sum loss: 4756.976562, avg loss: 3.364198, ppl: 28.910315 +epoch: 0, batch: 13348, sum loss: 5440.155762, avg loss: 3.698270, ppl: 40.377403 +epoch: 0, batch: 13349, sum loss: 6631.937012, avg loss: 3.704993, ppl: 40.649742 +epoch: 0, batch: 13350, sum loss: 6438.197754, avg loss: 3.465122, ppl: 31.980375 +epoch: 0, batch: 13351, sum loss: 5542.465820, avg loss: 3.457558, ppl: 31.739380 +epoch: 0, batch: 13352, sum loss: 6017.536133, avg loss: 3.554363, ppl: 34.965530 +epoch: 0, batch: 13353, sum loss: 5187.933105, avg loss: 3.543670, ppl: 34.593643 +epoch: 0, batch: 13354, sum loss: 5949.839355, avg loss: 3.497848, ppl: 33.044258 +epoch: 0, batch: 13355, sum loss: 6426.689941, avg loss: 3.727779, ppl: 41.586628 +epoch: 0, batch: 13356, sum loss: 5836.378906, avg loss: 3.369734, ppl: 29.070787 +epoch: 0, batch: 13357, sum loss: 5466.513672, avg loss: 3.515443, ppl: 33.630825 +epoch: 0, batch: 13358, sum loss: 6103.738770, avg loss: 3.613818, ppl: 37.107464 +epoch: 0, batch: 13359, sum loss: 6699.350098, avg loss: 3.723930, ppl: 41.426876 +epoch: 0, batch: 13360, sum loss: 5567.525879, avg loss: 3.492802, ppl: 32.877934 +epoch: 0, batch: 13361, sum loss: 6310.941895, avg loss: 3.604193, ppl: 36.752022 +epoch: 0, batch: 13362, sum loss: 5463.358398, avg loss: 3.406084, ppl: 30.146952 +epoch: 0, batch: 13363, sum loss: 6009.339844, avg loss: 3.644233, ppl: 38.253410 +epoch: 0, batch: 13364, sum loss: 5450.081055, avg loss: 3.702501, ppl: 40.548584 +epoch: 0, batch: 13365, sum loss: 5764.918945, avg loss: 3.316984, ppl: 27.577065 +epoch: 0, batch: 13366, sum loss: 6169.381836, avg loss: 3.696454, ppl: 40.304142 +epoch: 0, batch: 13367, sum loss: 7031.433594, avg loss: 3.784410, ppl: 44.009686 +epoch: 0, batch: 13368, sum loss: 5247.671875, avg loss: 3.409793, ppl: 30.258991 +epoch: 0, batch: 13369, sum loss: 5633.686523, avg loss: 3.488351, ppl: 32.731922 +epoch: 0, batch: 13370, sum loss: 5925.177734, avg loss: 3.724185, ppl: 41.437439 +epoch: 0, batch: 13371, sum loss: 5393.131836, avg loss: 3.483935, ppl: 32.587715 +epoch: 0, batch: 13372, sum loss: 6539.018066, avg loss: 3.463463, ppl: 31.927351 +epoch: 0, batch: 13373, sum loss: 6116.500488, avg loss: 3.684639, ppl: 39.830742 +epoch: 0, batch: 13374, sum loss: 5898.062988, avg loss: 3.594188, ppl: 36.386150 +epoch: 0, batch: 13375, sum loss: 5631.054199, avg loss: 3.285329, ppl: 26.717775 +epoch: 0, batch: 13376, sum loss: 5293.302246, avg loss: 3.441679, ppl: 31.239365 +epoch: 0, batch: 13377, sum loss: 6181.571777, avg loss: 3.455322, ppl: 31.668493 +epoch: 0, batch: 13378, sum loss: 5128.193359, avg loss: 3.312787, ppl: 27.461542 +epoch: 0, batch: 13379, sum loss: 6138.455566, avg loss: 3.791511, ppl: 44.323303 +epoch: 0, batch: 13380, sum loss: 6576.293945, avg loss: 3.450311, ppl: 31.510204 +epoch: 0, batch: 13381, sum loss: 6106.367676, avg loss: 3.652134, ppl: 38.556858 +epoch: 0, batch: 13382, sum loss: 6619.034180, avg loss: 3.638831, ppl: 38.047348 +epoch: 0, batch: 13383, sum loss: 5455.947754, avg loss: 3.589439, ppl: 36.213768 +epoch: 0, batch: 13384, sum loss: 6341.946289, avg loss: 3.578977, ppl: 35.836849 +epoch: 0, batch: 13385, sum loss: 6122.571289, avg loss: 3.490633, ppl: 32.806725 +epoch: 0, batch: 13386, sum loss: 6700.130859, avg loss: 3.745182, ppl: 42.316711 +epoch: 0, batch: 13387, sum loss: 5305.120117, avg loss: 3.270727, ppl: 26.330484 +epoch: 0, batch: 13388, sum loss: 6193.836914, avg loss: 3.747028, ppl: 42.394882 +epoch: 0, batch: 13389, sum loss: 5777.175781, avg loss: 3.608480, ppl: 36.909897 +epoch: 0, batch: 13390, sum loss: 6247.402344, avg loss: 3.590461, ppl: 36.250786 +epoch: 0, batch: 13391, sum loss: 6068.420898, avg loss: 3.444053, ppl: 31.313614 +epoch: 0, batch: 13392, sum loss: 5752.967285, avg loss: 3.555604, ppl: 35.008957 +epoch: 0, batch: 13393, sum loss: 6735.336914, avg loss: 3.550520, ppl: 34.831432 +epoch: 0, batch: 13394, sum loss: 5352.182129, avg loss: 3.511930, ppl: 33.512901 +epoch: 0, batch: 13395, sum loss: 5919.016602, avg loss: 3.445295, ppl: 31.352526 +epoch: 0, batch: 13396, sum loss: 6008.151855, avg loss: 3.717916, ppl: 41.178478 +epoch: 0, batch: 13397, sum loss: 5686.324219, avg loss: 3.394821, ppl: 29.809307 +epoch: 0, batch: 13398, sum loss: 7003.946289, avg loss: 3.619610, ppl: 37.323021 +epoch: 0, batch: 13399, sum loss: 6242.778320, avg loss: 3.579575, ppl: 35.858284 +epoch: 0, batch: 13400, sum loss: 5796.554199, avg loss: 3.276741, ppl: 26.489298 +epoch: 0, batch: 13401, sum loss: 5896.154297, avg loss: 3.511706, ppl: 33.505375 +epoch: 0, batch: 13402, sum loss: 5590.189453, avg loss: 3.463562, ppl: 31.930519 +epoch: 0, batch: 13403, sum loss: 7056.838379, avg loss: 3.519620, ppl: 33.771591 +epoch: 0, batch: 13404, sum loss: 4668.491211, avg loss: 3.294631, ppl: 26.967449 +epoch: 0, batch: 13405, sum loss: 5914.367188, avg loss: 3.393211, ppl: 29.761366 +epoch: 0, batch: 13406, sum loss: 4958.533691, avg loss: 3.154284, ppl: 23.436239 +epoch: 0, batch: 13407, sum loss: 5173.264648, avg loss: 3.444251, ppl: 31.319803 +epoch: 0, batch: 13408, sum loss: 6444.169434, avg loss: 3.712079, ppl: 40.938831 +epoch: 0, batch: 13409, sum loss: 6459.267578, avg loss: 3.439440, ppl: 31.169483 +epoch: 0, batch: 13410, sum loss: 5243.532715, avg loss: 3.400475, ppl: 29.978344 +epoch: 0, batch: 13411, sum loss: 5897.808594, avg loss: 3.298551, ppl: 27.073370 +epoch: 0, batch: 13412, sum loss: 5470.629883, avg loss: 3.522621, ppl: 33.873093 +epoch: 0, batch: 13413, sum loss: 6835.174316, avg loss: 3.441679, ppl: 31.239365 +epoch: 0, batch: 13414, sum loss: 5579.714355, avg loss: 3.498254, ppl: 33.057671 +epoch: 0, batch: 13415, sum loss: 5972.229980, avg loss: 3.632743, ppl: 37.816418 +epoch: 0, batch: 13416, sum loss: 6014.654785, avg loss: 3.306572, ppl: 27.291416 +epoch: 0, batch: 13417, sum loss: 4625.498047, avg loss: 3.311022, ppl: 27.413128 +epoch: 0, batch: 13418, sum loss: 5132.973633, avg loss: 3.328777, ppl: 27.904182 +epoch: 0, batch: 13419, sum loss: 6283.893555, avg loss: 3.548218, ppl: 34.751320 +epoch: 0, batch: 13420, sum loss: 5506.629395, avg loss: 3.359749, ppl: 28.781975 +epoch: 0, batch: 13421, sum loss: 5100.989258, avg loss: 3.297343, ppl: 27.040689 +epoch: 0, batch: 13422, sum loss: 6038.495605, avg loss: 3.670818, ppl: 39.284019 +epoch: 0, batch: 13423, sum loss: 5786.155273, avg loss: 3.774400, ppl: 43.571369 +epoch: 0, batch: 13424, sum loss: 7107.411133, avg loss: 3.744685, ppl: 42.295670 +epoch: 0, batch: 13425, sum loss: 5421.434082, avg loss: 3.311811, ppl: 27.434757 +epoch: 0, batch: 13426, sum loss: 5007.916016, avg loss: 3.325310, ppl: 27.807604 +epoch: 0, batch: 13427, sum loss: 5926.001465, avg loss: 3.334835, ppl: 28.073746 +epoch: 0, batch: 13428, sum loss: 6074.891113, avg loss: 3.481313, ppl: 32.502369 +epoch: 0, batch: 13429, sum loss: 5816.267578, avg loss: 3.518613, ppl: 33.737606 +epoch: 0, batch: 13430, sum loss: 6039.144043, avg loss: 3.686901, ppl: 39.920933 +epoch: 0, batch: 13431, sum loss: 6057.267090, avg loss: 3.686711, ppl: 39.913368 +epoch: 0, batch: 13432, sum loss: 6503.400391, avg loss: 3.602992, ppl: 36.707893 +epoch: 0, batch: 13433, sum loss: 6336.429688, avg loss: 3.443712, ppl: 31.302925 +epoch: 0, batch: 13434, sum loss: 4706.876953, avg loss: 3.241651, ppl: 25.575909 +epoch: 0, batch: 13435, sum loss: 5383.073730, avg loss: 3.400552, ppl: 29.980639 +epoch: 0, batch: 13436, sum loss: 6522.084961, avg loss: 3.722651, ppl: 41.373951 +epoch: 0, batch: 13437, sum loss: 4766.981445, avg loss: 3.335886, ppl: 28.103279 +epoch: 0, batch: 13438, sum loss: 7119.844238, avg loss: 3.638142, ppl: 38.021133 +epoch: 0, batch: 13439, sum loss: 5302.518066, avg loss: 3.263088, ppl: 26.130102 +epoch: 0, batch: 13440, sum loss: 5543.308594, avg loss: 3.224729, ppl: 25.146753 +epoch: 0, batch: 13441, sum loss: 6614.049805, avg loss: 3.602424, ppl: 36.687042 +epoch: 0, batch: 13442, sum loss: 7430.089355, avg loss: 3.839839, ppl: 46.517994 +epoch: 0, batch: 13443, sum loss: 6267.099121, avg loss: 3.603852, ppl: 36.739494 +epoch: 0, batch: 13444, sum loss: 6815.767090, avg loss: 3.767699, ppl: 43.280361 +epoch: 0, batch: 13445, sum loss: 6057.952148, avg loss: 3.725678, ppl: 41.499348 +epoch: 0, batch: 13446, sum loss: 6745.979004, avg loss: 3.508049, ppl: 33.383083 +epoch: 0, batch: 13447, sum loss: 5295.174805, avg loss: 3.621871, ppl: 37.407482 +epoch: 0, batch: 13448, sum loss: 5971.267578, avg loss: 3.562809, ppl: 35.262108 +epoch: 0, batch: 13449, sum loss: 5714.930664, avg loss: 3.527735, ppl: 34.046772 +epoch: 0, batch: 13450, sum loss: 6657.907227, avg loss: 3.721580, ppl: 41.329655 +epoch: 0, batch: 13451, sum loss: 5644.660156, avg loss: 3.373975, ppl: 29.194345 +epoch: 0, batch: 13452, sum loss: 5816.062012, avg loss: 3.791435, ppl: 44.319977 +epoch: 0, batch: 13453, sum loss: 6208.555664, avg loss: 3.691175, ppl: 40.091908 +epoch: 0, batch: 13454, sum loss: 5350.017578, avg loss: 3.416358, ppl: 30.458298 +epoch: 0, batch: 13455, sum loss: 5172.983398, avg loss: 3.567575, ppl: 35.430569 +epoch: 0, batch: 13456, sum loss: 6309.981934, avg loss: 3.599534, ppl: 36.581169 +epoch: 0, batch: 13457, sum loss: 6712.318359, avg loss: 3.768848, ppl: 43.330128 +epoch: 0, batch: 13458, sum loss: 7444.296875, avg loss: 3.926317, ppl: 50.719845 +epoch: 0, batch: 13459, sum loss: 5915.718750, avg loss: 3.288337, ppl: 26.798267 +epoch: 0, batch: 13460, sum loss: 6385.460449, avg loss: 3.642590, ppl: 38.190632 +epoch: 0, batch: 13461, sum loss: 6344.363770, avg loss: 3.615022, ppl: 37.152161 +epoch: 0, batch: 13462, sum loss: 5196.448730, avg loss: 3.523016, ppl: 33.886482 +epoch: 0, batch: 13463, sum loss: 5949.494141, avg loss: 3.641061, ppl: 38.132286 +epoch: 0, batch: 13464, sum loss: 5903.085938, avg loss: 3.446051, ppl: 31.376253 +epoch: 0, batch: 13465, sum loss: 5644.049805, avg loss: 3.527531, ppl: 34.039825 +epoch: 0, batch: 13466, sum loss: 6162.538086, avg loss: 3.710137, ppl: 40.859421 +epoch: 0, batch: 13467, sum loss: 5001.984375, avg loss: 3.286455, ppl: 26.747879 +epoch: 0, batch: 13468, sum loss: 5291.931152, avg loss: 3.472396, ppl: 32.213825 +epoch: 0, batch: 13469, sum loss: 6059.361328, avg loss: 3.329319, ppl: 27.919336 +epoch: 0, batch: 13470, sum loss: 6923.574707, avg loss: 3.760768, ppl: 42.981441 +epoch: 0, batch: 13471, sum loss: 5797.426758, avg loss: 3.648475, ppl: 38.416035 +epoch: 0, batch: 13472, sum loss: 6234.089844, avg loss: 3.482732, ppl: 32.548512 +epoch: 0, batch: 13473, sum loss: 5001.908691, avg loss: 3.314718, ppl: 27.514622 +epoch: 0, batch: 13474, sum loss: 5627.625488, avg loss: 3.478137, ppl: 32.399300 +epoch: 0, batch: 13475, sum loss: 5571.102539, avg loss: 3.409487, ppl: 30.249737 +epoch: 0, batch: 13476, sum loss: 5913.406250, avg loss: 3.645750, ppl: 38.311489 +epoch: 0, batch: 13477, sum loss: 5676.391602, avg loss: 3.440237, ppl: 31.194359 +epoch: 0, batch: 13478, sum loss: 4939.249023, avg loss: 3.664131, ppl: 39.022228 +epoch: 0, batch: 13479, sum loss: 6296.871094, avg loss: 3.531616, ppl: 34.179157 +epoch: 0, batch: 13480, sum loss: 5856.865234, avg loss: 3.409118, ppl: 30.238575 +epoch: 0, batch: 13481, sum loss: 6517.979004, avg loss: 3.414342, ppl: 30.396946 +epoch: 0, batch: 13482, sum loss: 4252.153320, avg loss: 3.358731, ppl: 28.752682 +epoch: 0, batch: 13483, sum loss: 5733.762207, avg loss: 3.364884, ppl: 28.930138 +epoch: 0, batch: 13484, sum loss: 5243.177246, avg loss: 3.373988, ppl: 29.194721 +epoch: 0, batch: 13485, sum loss: 6285.759766, avg loss: 3.418031, ppl: 30.509296 +epoch: 0, batch: 13486, sum loss: 6425.958008, avg loss: 3.697329, ppl: 40.339417 +epoch: 0, batch: 13487, sum loss: 6764.641113, avg loss: 3.605886, ppl: 36.814270 +epoch: 0, batch: 13488, sum loss: 6852.045410, avg loss: 3.550283, ppl: 34.823162 +epoch: 0, batch: 13489, sum loss: 6122.662109, avg loss: 3.557619, ppl: 35.079567 +epoch: 0, batch: 13490, sum loss: 6729.561523, avg loss: 3.587186, ppl: 36.132267 +epoch: 0, batch: 13491, sum loss: 7041.215820, avg loss: 3.655875, ppl: 38.701385 +epoch: 0, batch: 13492, sum loss: 6693.417480, avg loss: 3.618064, ppl: 37.265339 +epoch: 0, batch: 13493, sum loss: 4865.538086, avg loss: 3.334845, ppl: 28.074020 +epoch: 0, batch: 13494, sum loss: 6294.836914, avg loss: 3.817366, ppl: 45.484249 +epoch: 0, batch: 13495, sum loss: 6462.941406, avg loss: 3.491595, ppl: 32.838268 +epoch: 0, batch: 13496, sum loss: 5879.863770, avg loss: 3.625070, ppl: 37.527355 +epoch: 0, batch: 13497, sum loss: 5808.518066, avg loss: 3.505442, ppl: 33.296169 +epoch: 0, batch: 13498, sum loss: 5134.804688, avg loss: 3.416370, ppl: 30.458662 +epoch: 0, batch: 13499, sum loss: 5479.836914, avg loss: 3.108246, ppl: 22.381744 +epoch: 0, batch: 13500, sum loss: 6292.530762, avg loss: 3.517345, ppl: 33.694855 +epoch: 0, batch: 13501, sum loss: 6504.662109, avg loss: 3.615710, ppl: 37.177723 +epoch: 0, batch: 13502, sum loss: 6168.080078, avg loss: 3.473018, ppl: 32.233883 +epoch: 0, batch: 13503, sum loss: 6350.143555, avg loss: 3.470024, ppl: 32.137508 +epoch: 0, batch: 13504, sum loss: 5185.356445, avg loss: 3.338929, ppl: 28.188919 +epoch: 0, batch: 13505, sum loss: 5604.772461, avg loss: 3.451215, ppl: 31.538675 +epoch: 0, batch: 13506, sum loss: 6044.825195, avg loss: 3.808964, ppl: 45.103676 +epoch: 0, batch: 13507, sum loss: 6710.673340, avg loss: 3.767925, ppl: 43.290127 +epoch: 0, batch: 13508, sum loss: 5174.251465, avg loss: 3.419862, ppl: 30.565205 +epoch: 0, batch: 13509, sum loss: 4685.120117, avg loss: 3.258081, ppl: 25.999588 +epoch: 0, batch: 13510, sum loss: 5514.856934, avg loss: 3.562569, ppl: 35.253651 +epoch: 0, batch: 13511, sum loss: 6266.052734, avg loss: 3.456179, ppl: 31.695648 +epoch: 0, batch: 13512, sum loss: 5853.384766, avg loss: 3.401153, ppl: 29.998671 +epoch: 0, batch: 13513, sum loss: 6350.636230, avg loss: 3.673011, ppl: 39.370285 +epoch: 0, batch: 13514, sum loss: 4878.379395, avg loss: 3.332226, ppl: 28.000610 +epoch: 0, batch: 13515, sum loss: 6444.635254, avg loss: 3.588327, ppl: 36.173512 +epoch: 0, batch: 13516, sum loss: 7067.711914, avg loss: 3.808034, ppl: 45.061779 +epoch: 0, batch: 13517, sum loss: 5574.772461, avg loss: 3.424307, ppl: 30.701372 +epoch: 0, batch: 13518, sum loss: 6572.447266, avg loss: 3.783792, ppl: 43.982529 +epoch: 0, batch: 13519, sum loss: 6168.945312, avg loss: 3.389531, ppl: 29.652033 +epoch: 0, batch: 13520, sum loss: 5038.528320, avg loss: 3.446326, ppl: 31.384872 +epoch: 0, batch: 13521, sum loss: 6443.909180, avg loss: 3.624246, ppl: 37.496445 +epoch: 0, batch: 13522, sum loss: 6169.321777, avg loss: 3.588902, ppl: 36.194305 +epoch: 0, batch: 13523, sum loss: 4513.371094, avg loss: 3.237712, ppl: 25.475378 +epoch: 0, batch: 13524, sum loss: 5708.186035, avg loss: 3.484851, ppl: 32.617561 +epoch: 0, batch: 13525, sum loss: 4634.815430, avg loss: 3.067383, ppl: 21.485598 +epoch: 0, batch: 13526, sum loss: 6277.700195, avg loss: 3.522840, ppl: 33.880505 +epoch: 0, batch: 13527, sum loss: 5984.329102, avg loss: 3.664623, ppl: 39.041405 +epoch: 0, batch: 13528, sum loss: 4623.891113, avg loss: 3.105367, ppl: 22.317402 +epoch: 0, batch: 13529, sum loss: 6430.217285, avg loss: 3.504206, ppl: 33.255020 +epoch: 0, batch: 13530, sum loss: 5315.176758, avg loss: 3.522317, ppl: 33.862812 +epoch: 0, batch: 13531, sum loss: 5495.580566, avg loss: 3.350964, ppl: 28.530218 +epoch: 0, batch: 13532, sum loss: 6307.041504, avg loss: 3.649908, ppl: 38.471138 +epoch: 0, batch: 13533, sum loss: 7240.451172, avg loss: 3.747646, ppl: 42.421089 +epoch: 0, batch: 13534, sum loss: 5395.815430, avg loss: 3.242678, ppl: 25.602184 +epoch: 0, batch: 13535, sum loss: 5508.452637, avg loss: 3.263301, ppl: 26.135679 +epoch: 0, batch: 13536, sum loss: 5651.306152, avg loss: 3.396218, ppl: 29.850985 +epoch: 0, batch: 13537, sum loss: 6345.538574, avg loss: 3.433733, ppl: 30.992121 +epoch: 0, batch: 13538, sum loss: 4396.881836, avg loss: 3.325932, ppl: 27.824921 +epoch: 0, batch: 13539, sum loss: 4952.834961, avg loss: 3.258444, ppl: 26.009043 +epoch: 0, batch: 13540, sum loss: 6337.477539, avg loss: 3.468789, ppl: 32.097851 +epoch: 0, batch: 13541, sum loss: 5209.285645, avg loss: 3.461319, ppl: 31.858986 +epoch: 0, batch: 13542, sum loss: 4759.406250, avg loss: 3.314350, ppl: 27.504501 +epoch: 0, batch: 13543, sum loss: 5913.962402, avg loss: 3.507688, ppl: 33.371033 +epoch: 0, batch: 13544, sum loss: 7348.897461, avg loss: 3.780297, ppl: 43.829071 +epoch: 0, batch: 13545, sum loss: 7404.103516, avg loss: 3.604724, ppl: 36.771549 +epoch: 0, batch: 13546, sum loss: 6168.248535, avg loss: 3.536840, ppl: 34.358166 +epoch: 0, batch: 13547, sum loss: 5071.820312, avg loss: 3.349947, ppl: 28.501236 +epoch: 0, batch: 13548, sum loss: 7513.146484, avg loss: 3.850921, ppl: 47.036362 +epoch: 0, batch: 13549, sum loss: 5031.425781, avg loss: 3.273537, ppl: 26.404558 +epoch: 0, batch: 13550, sum loss: 4870.718262, avg loss: 3.293251, ppl: 26.930273 +epoch: 0, batch: 13551, sum loss: 5287.162598, avg loss: 3.327352, ppl: 27.864460 +epoch: 0, batch: 13552, sum loss: 6639.103027, avg loss: 3.763664, ppl: 43.106079 +epoch: 0, batch: 13553, sum loss: 6077.618164, avg loss: 3.602619, ppl: 36.694218 +epoch: 0, batch: 13554, sum loss: 5374.960449, avg loss: 3.384736, ppl: 29.510202 +epoch: 0, batch: 13555, sum loss: 5923.607422, avg loss: 3.629662, ppl: 37.700054 +epoch: 0, batch: 13556, sum loss: 5305.429688, avg loss: 3.458559, ppl: 31.771162 +epoch: 0, batch: 13557, sum loss: 5556.770508, avg loss: 3.442857, ppl: 31.276173 +epoch: 0, batch: 13558, sum loss: 6563.279297, avg loss: 3.632141, ppl: 37.793652 +epoch: 0, batch: 13559, sum loss: 6247.717773, avg loss: 3.476749, ppl: 32.354366 +epoch: 0, batch: 13560, sum loss: 5710.894531, avg loss: 3.395300, ppl: 29.823597 +epoch: 0, batch: 13561, sum loss: 6082.393066, avg loss: 3.399884, ppl: 29.960630 +epoch: 0, batch: 13562, sum loss: 5603.503418, avg loss: 3.476119, ppl: 32.333984 +epoch: 0, batch: 13563, sum loss: 5684.863281, avg loss: 3.485508, ppl: 32.639004 +epoch: 0, batch: 13564, sum loss: 6438.918457, avg loss: 3.654324, ppl: 38.641384 +epoch: 0, batch: 13565, sum loss: 6423.449219, avg loss: 3.560670, ppl: 35.186779 +epoch: 0, batch: 13566, sum loss: 5393.817871, avg loss: 3.398751, ppl: 29.926706 +epoch: 0, batch: 13567, sum loss: 5322.847168, avg loss: 3.354031, ppl: 28.617863 +epoch: 0, batch: 13568, sum loss: 6360.504883, avg loss: 3.490947, ppl: 32.817001 +epoch: 0, batch: 13569, sum loss: 6004.241699, avg loss: 3.480720, ppl: 32.483105 +epoch: 0, batch: 13570, sum loss: 5758.765137, avg loss: 3.423760, ppl: 30.684586 +epoch: 0, batch: 13571, sum loss: 5725.661133, avg loss: 3.761933, ppl: 43.031509 +epoch: 0, batch: 13572, sum loss: 5968.244141, avg loss: 3.713904, ppl: 41.013626 +epoch: 0, batch: 13573, sum loss: 5958.791016, avg loss: 3.591797, ppl: 36.299244 +epoch: 0, batch: 13574, sum loss: 4222.914062, avg loss: 3.201603, ppl: 24.571886 +epoch: 0, batch: 13575, sum loss: 5553.856934, avg loss: 3.447459, ppl: 31.420465 +epoch: 0, batch: 13576, sum loss: 5399.896484, avg loss: 3.479315, ppl: 32.437496 +epoch: 0, batch: 13577, sum loss: 6135.558594, avg loss: 3.532273, ppl: 34.201630 +epoch: 0, batch: 13578, sum loss: 6486.180664, avg loss: 3.679059, ppl: 39.609097 +epoch: 0, batch: 13579, sum loss: 6491.581543, avg loss: 3.669634, ppl: 39.237526 +epoch: 0, batch: 13580, sum loss: 6166.020508, avg loss: 3.462111, ppl: 31.884228 +epoch: 0, batch: 13581, sum loss: 6929.166016, avg loss: 3.817723, ppl: 45.500465 +epoch: 0, batch: 13582, sum loss: 5243.983398, avg loss: 3.454535, ppl: 31.643557 +epoch: 0, batch: 13583, sum loss: 6132.601562, avg loss: 3.426034, ppl: 30.754435 +epoch: 0, batch: 13584, sum loss: 6558.288086, avg loss: 3.751881, ppl: 42.601154 +epoch: 0, batch: 13585, sum loss: 4970.888672, avg loss: 3.347400, ppl: 28.428715 +epoch: 0, batch: 13586, sum loss: 6623.403809, avg loss: 3.771870, ppl: 43.461266 +epoch: 0, batch: 13587, sum loss: 6832.984863, avg loss: 3.636501, ppl: 37.958778 +epoch: 0, batch: 13588, sum loss: 6242.918945, avg loss: 3.509229, ppl: 33.422497 +epoch: 0, batch: 13589, sum loss: 5697.822754, avg loss: 3.525880, ppl: 33.983677 +epoch: 0, batch: 13590, sum loss: 6580.220703, avg loss: 3.526377, ppl: 34.000576 +epoch: 0, batch: 13591, sum loss: 5510.323242, avg loss: 3.205540, ppl: 24.668821 +epoch: 0, batch: 13592, sum loss: 6511.300781, avg loss: 3.748590, ppl: 42.461170 +epoch: 0, batch: 13593, sum loss: 6757.821777, avg loss: 3.670734, ppl: 39.280743 +epoch: 0, batch: 13594, sum loss: 6181.767090, avg loss: 3.387270, ppl: 29.585068 +epoch: 0, batch: 13595, sum loss: 4351.923340, avg loss: 3.238038, ppl: 25.483681 +epoch: 0, batch: 13596, sum loss: 6293.001465, avg loss: 3.701766, ppl: 40.518787 +epoch: 0, batch: 13597, sum loss: 5900.041016, avg loss: 3.383051, ppl: 29.460516 +epoch: 0, batch: 13598, sum loss: 6664.789062, avg loss: 3.663985, ppl: 39.016525 +epoch: 0, batch: 13599, sum loss: 6709.391113, avg loss: 3.553703, ppl: 34.942463 +epoch: 0, batch: 13600, sum loss: 6604.284180, avg loss: 3.533592, ppl: 34.246769 +epoch: 0, batch: 13601, sum loss: 6276.399902, avg loss: 3.600918, ppl: 36.631840 +epoch: 0, batch: 13602, sum loss: 6320.032227, avg loss: 3.554574, ppl: 34.972919 +epoch: 0, batch: 13603, sum loss: 4829.737793, avg loss: 3.425346, ppl: 30.733280 +epoch: 0, batch: 13604, sum loss: 5997.318359, avg loss: 3.584769, ppl: 36.045033 +epoch: 0, batch: 13605, sum loss: 7308.371094, avg loss: 3.559850, ppl: 35.157906 +epoch: 0, batch: 13606, sum loss: 6368.458984, avg loss: 3.510727, ppl: 33.472599 +epoch: 0, batch: 13607, sum loss: 4836.378906, avg loss: 3.274461, ppl: 26.428984 +epoch: 0, batch: 13608, sum loss: 5044.412109, avg loss: 3.692835, ppl: 40.158520 +epoch: 0, batch: 13609, sum loss: 5232.367676, avg loss: 3.328478, ppl: 27.895855 +epoch: 0, batch: 13610, sum loss: 5551.929199, avg loss: 3.446263, ppl: 31.382891 +epoch: 0, batch: 13611, sum loss: 7053.424316, avg loss: 3.784026, ppl: 43.992798 +epoch: 0, batch: 13612, sum loss: 5805.150879, avg loss: 3.482394, ppl: 32.537521 +epoch: 0, batch: 13613, sum loss: 6797.213379, avg loss: 3.763684, ppl: 43.106941 +epoch: 0, batch: 13614, sum loss: 6523.409668, avg loss: 3.830540, ppl: 46.087418 +epoch: 0, batch: 13615, sum loss: 5669.451172, avg loss: 3.517029, ppl: 33.684212 +epoch: 0, batch: 13616, sum loss: 6543.808105, avg loss: 3.651679, ppl: 38.539310 +epoch: 0, batch: 13617, sum loss: 6371.129883, avg loss: 3.686996, ppl: 39.924751 +epoch: 0, batch: 13618, sum loss: 5477.967285, avg loss: 3.538739, ppl: 34.423473 +epoch: 0, batch: 13619, sum loss: 5789.932617, avg loss: 3.616448, ppl: 37.205170 +epoch: 0, batch: 13620, sum loss: 5780.695312, avg loss: 3.430680, ppl: 30.897640 +epoch: 0, batch: 13621, sum loss: 5504.520020, avg loss: 3.638149, ppl: 38.021412 +epoch: 0, batch: 13622, sum loss: 6036.854492, avg loss: 3.401045, ppl: 29.995417 +epoch: 0, batch: 13623, sum loss: 6474.235352, avg loss: 3.572978, ppl: 35.622501 +epoch: 0, batch: 13624, sum loss: 5713.595215, avg loss: 3.588942, ppl: 36.195744 +epoch: 0, batch: 13625, sum loss: 6931.349609, avg loss: 3.502450, ppl: 33.196701 +epoch: 0, batch: 13626, sum loss: 4802.336914, avg loss: 3.275810, ppl: 26.464647 +epoch: 0, batch: 13627, sum loss: 6501.793945, avg loss: 3.516384, ppl: 33.662479 +epoch: 0, batch: 13628, sum loss: 6446.450195, avg loss: 3.644121, ppl: 38.249142 +epoch: 0, batch: 13629, sum loss: 6120.742188, avg loss: 3.283660, ppl: 26.673216 +epoch: 0, batch: 13630, sum loss: 6620.693359, avg loss: 3.635746, ppl: 37.930138 +epoch: 0, batch: 13631, sum loss: 6641.666016, avg loss: 3.546004, ppl: 34.674492 +epoch: 0, batch: 13632, sum loss: 6009.929688, avg loss: 3.479983, ppl: 32.459156 +epoch: 0, batch: 13633, sum loss: 6771.337402, avg loss: 3.603692, ppl: 36.733616 +epoch: 0, batch: 13634, sum loss: 6258.522949, avg loss: 3.690167, ppl: 40.051533 +epoch: 0, batch: 13635, sum loss: 5146.987305, avg loss: 3.340031, ppl: 28.219992 +epoch: 0, batch: 13636, sum loss: 5605.368652, avg loss: 3.556706, ppl: 35.047558 +epoch: 0, batch: 13637, sum loss: 5925.397949, avg loss: 3.657653, ppl: 38.770256 +epoch: 0, batch: 13638, sum loss: 7304.416992, avg loss: 3.866817, ppl: 47.790028 +epoch: 0, batch: 13639, sum loss: 6119.407227, avg loss: 3.441736, ppl: 31.241159 +epoch: 0, batch: 13640, sum loss: 5613.091797, avg loss: 3.516975, ppl: 33.682373 +epoch: 0, batch: 13641, sum loss: 6601.137207, avg loss: 3.562405, ppl: 35.247879 +epoch: 0, batch: 13642, sum loss: 4503.251953, avg loss: 3.024347, ppl: 20.580553 +epoch: 0, batch: 13643, sum loss: 6201.359375, avg loss: 3.697889, ppl: 40.361996 +epoch: 0, batch: 13644, sum loss: 6245.489746, avg loss: 3.558684, ppl: 35.116940 +epoch: 0, batch: 13645, sum loss: 6129.415039, avg loss: 3.494535, ppl: 32.934975 +epoch: 0, batch: 13646, sum loss: 6441.443848, avg loss: 3.572626, ppl: 35.609970 +epoch: 0, batch: 13647, sum loss: 6098.763672, avg loss: 3.589620, ppl: 36.220306 +epoch: 0, batch: 13648, sum loss: 5898.905273, avg loss: 3.504994, ppl: 33.281250 +epoch: 0, batch: 13649, sum loss: 6538.246582, avg loss: 3.528466, ppl: 34.071644 +epoch: 0, batch: 13650, sum loss: 6604.531250, avg loss: 3.416726, ppl: 30.469498 +epoch: 0, batch: 13651, sum loss: 5804.392578, avg loss: 3.539264, ppl: 34.441551 +epoch: 0, batch: 13652, sum loss: 5672.489746, avg loss: 3.545306, ppl: 34.650284 +epoch: 0, batch: 13653, sum loss: 5202.731445, avg loss: 3.332948, ppl: 28.020819 +epoch: 0, batch: 13654, sum loss: 6200.423828, avg loss: 3.463924, ppl: 31.942070 +epoch: 0, batch: 13655, sum loss: 6062.120117, avg loss: 3.510203, ppl: 33.455055 +epoch: 0, batch: 13656, sum loss: 6267.307617, avg loss: 3.780041, ppl: 43.817825 +epoch: 0, batch: 13657, sum loss: 6031.169434, avg loss: 3.650829, ppl: 38.506577 +epoch: 0, batch: 13658, sum loss: 5613.219238, avg loss: 3.685633, ppl: 39.870361 +epoch: 0, batch: 13659, sum loss: 6803.020508, avg loss: 3.703332, ppl: 40.582298 +epoch: 0, batch: 13660, sum loss: 6106.769531, avg loss: 3.417331, ppl: 30.487919 +epoch: 0, batch: 13661, sum loss: 4776.060059, avg loss: 3.001923, ppl: 20.124205 +epoch: 0, batch: 13662, sum loss: 4910.698242, avg loss: 3.239247, ppl: 25.514503 +epoch: 0, batch: 13663, sum loss: 6359.737305, avg loss: 3.819662, ppl: 45.588802 +epoch: 0, batch: 13664, sum loss: 6081.926758, avg loss: 3.670444, ppl: 39.269356 +epoch: 0, batch: 13665, sum loss: 5618.155762, avg loss: 3.364165, ppl: 28.909349 +epoch: 0, batch: 13666, sum loss: 6595.643555, avg loss: 3.764637, ppl: 43.148041 +epoch: 0, batch: 13667, sum loss: 6205.570801, avg loss: 3.417164, ppl: 30.482853 +epoch: 0, batch: 13668, sum loss: 6832.411133, avg loss: 3.603593, ppl: 36.729954 +epoch: 0, batch: 13669, sum loss: 5738.305664, avg loss: 3.511815, ppl: 33.509033 +epoch: 0, batch: 13670, sum loss: 6757.395020, avg loss: 3.402515, ppl: 30.039560 +epoch: 0, batch: 13671, sum loss: 6352.449707, avg loss: 3.667696, ppl: 39.161575 +epoch: 0, batch: 13672, sum loss: 5094.072754, avg loss: 3.530196, ppl: 34.130653 +epoch: 0, batch: 13673, sum loss: 5918.232422, avg loss: 3.326719, ppl: 27.846815 +epoch: 0, batch: 13674, sum loss: 5913.038086, avg loss: 3.505061, ppl: 33.283478 +epoch: 0, batch: 13675, sum loss: 6787.157715, avg loss: 3.520310, ppl: 33.794910 +epoch: 0, batch: 13676, sum loss: 6071.468750, avg loss: 3.550566, ppl: 34.833042 +epoch: 0, batch: 13677, sum loss: 6076.009766, avg loss: 3.658043, ppl: 38.785370 +epoch: 0, batch: 13678, sum loss: 4858.562988, avg loss: 3.183855, ppl: 24.139639 +epoch: 0, batch: 13679, sum loss: 6631.924805, avg loss: 3.804891, ppl: 44.920349 +epoch: 0, batch: 13680, sum loss: 4761.701660, avg loss: 3.210858, ppl: 24.800346 +epoch: 0, batch: 13681, sum loss: 4891.006348, avg loss: 3.343135, ppl: 28.307735 +epoch: 0, batch: 13682, sum loss: 5469.165039, avg loss: 3.340968, ppl: 28.246466 +epoch: 0, batch: 13683, sum loss: 6139.000488, avg loss: 3.546505, ppl: 34.691856 +epoch: 0, batch: 13684, sum loss: 5139.924316, avg loss: 3.417503, ppl: 30.493176 +epoch: 0, batch: 13685, sum loss: 6088.267578, avg loss: 3.687624, ppl: 39.949833 +epoch: 0, batch: 13686, sum loss: 5556.413574, avg loss: 3.483645, ppl: 32.578251 +epoch: 0, batch: 13687, sum loss: 5507.925293, avg loss: 3.551209, ppl: 34.855431 +epoch: 0, batch: 13688, sum loss: 6177.051270, avg loss: 3.824800, ppl: 45.823612 +epoch: 0, batch: 13689, sum loss: 5644.415039, avg loss: 3.363775, ppl: 28.898083 +epoch: 0, batch: 13690, sum loss: 5205.359863, avg loss: 3.451830, ppl: 31.558096 +epoch: 0, batch: 13691, sum loss: 4958.043457, avg loss: 3.257585, ppl: 25.986692 +epoch: 0, batch: 13692, sum loss: 6027.231934, avg loss: 3.566409, ppl: 35.389294 +epoch: 0, batch: 13693, sum loss: 5570.375000, avg loss: 3.563900, ppl: 35.300583 +epoch: 0, batch: 13694, sum loss: 5735.297852, avg loss: 3.371721, ppl: 29.128622 +epoch: 0, batch: 13695, sum loss: 6500.299316, avg loss: 3.808025, ppl: 45.061371 +epoch: 0, batch: 13696, sum loss: 5247.104980, avg loss: 3.359222, ppl: 28.766806 +epoch: 0, batch: 13697, sum loss: 5783.262695, avg loss: 3.532842, ppl: 34.221100 +epoch: 0, batch: 13698, sum loss: 5210.397949, avg loss: 3.359380, ppl: 28.771334 +epoch: 0, batch: 13699, sum loss: 7626.099609, avg loss: 3.845739, ppl: 46.793236 +epoch: 0, batch: 13700, sum loss: 5870.140137, avg loss: 3.406930, ppl: 30.172472 +epoch: 0, batch: 13701, sum loss: 6133.928711, avg loss: 3.765456, ppl: 43.183411 +epoch: 0, batch: 13702, sum loss: 5564.694336, avg loss: 3.258018, ppl: 25.997946 +epoch: 0, batch: 13703, sum loss: 5406.738770, avg loss: 3.253152, ppl: 25.871763 +epoch: 0, batch: 13704, sum loss: 5978.418945, avg loss: 3.487993, ppl: 32.720196 +epoch: 0, batch: 13705, sum loss: 7051.722656, avg loss: 3.438188, ppl: 31.130486 +epoch: 0, batch: 13706, sum loss: 5370.777832, avg loss: 3.350454, ppl: 28.515673 +epoch: 0, batch: 13707, sum loss: 6355.502441, avg loss: 3.872945, ppl: 48.083763 +epoch: 0, batch: 13708, sum loss: 5880.987793, avg loss: 3.544899, ppl: 34.636196 +epoch: 0, batch: 13709, sum loss: 5270.739746, avg loss: 3.338024, ppl: 28.163418 +epoch: 0, batch: 13710, sum loss: 6113.116211, avg loss: 3.720704, ppl: 41.293438 +epoch: 0, batch: 13711, sum loss: 6017.703125, avg loss: 3.636075, ppl: 37.942600 +epoch: 0, batch: 13712, sum loss: 5360.215820, avg loss: 3.609573, ppl: 36.950268 +epoch: 0, batch: 13713, sum loss: 6355.212402, avg loss: 3.323856, ppl: 27.767212 +epoch: 0, batch: 13714, sum loss: 6067.920410, avg loss: 3.681991, ppl: 39.725395 +epoch: 0, batch: 13715, sum loss: 6681.978027, avg loss: 3.623632, ppl: 37.473442 +epoch: 0, batch: 13716, sum loss: 5258.556641, avg loss: 3.383885, ppl: 29.485088 +epoch: 0, batch: 13717, sum loss: 6682.720703, avg loss: 3.669808, ppl: 39.244377 +epoch: 0, batch: 13718, sum loss: 5758.771973, avg loss: 3.608253, ppl: 36.901531 +epoch: 0, batch: 13719, sum loss: 6226.811523, avg loss: 3.504114, ppl: 33.251984 +epoch: 0, batch: 13720, sum loss: 7045.348145, avg loss: 3.667542, ppl: 39.155552 +epoch: 0, batch: 13721, sum loss: 5594.754883, avg loss: 3.304640, ppl: 27.238729 +epoch: 0, batch: 13722, sum loss: 5673.841797, avg loss: 3.453342, ppl: 31.605856 +epoch: 0, batch: 13723, sum loss: 5669.416016, avg loss: 3.484583, ppl: 32.608814 +epoch: 0, batch: 13724, sum loss: 5144.135742, avg loss: 3.119549, ppl: 22.636158 +epoch: 0, batch: 13725, sum loss: 6177.963379, avg loss: 3.655600, ppl: 38.690712 +epoch: 0, batch: 13726, sum loss: 6079.259766, avg loss: 3.620762, ppl: 37.366035 +epoch: 0, batch: 13727, sum loss: 6806.144043, avg loss: 3.727352, ppl: 41.568874 +epoch: 0, batch: 13728, sum loss: 5077.554199, avg loss: 3.248595, ppl: 25.754135 +epoch: 0, batch: 13729, sum loss: 5767.544434, avg loss: 3.406701, ppl: 30.165560 +epoch: 0, batch: 13730, sum loss: 5764.509277, avg loss: 3.562737, ppl: 35.259586 +epoch: 0, batch: 13731, sum loss: 5808.416016, avg loss: 3.639358, ppl: 38.067402 +epoch: 0, batch: 13732, sum loss: 6705.945801, avg loss: 3.721391, ppl: 41.321823 +epoch: 0, batch: 13733, sum loss: 6159.812500, avg loss: 3.612793, ppl: 37.069458 +epoch: 0, batch: 13734, sum loss: 5461.395508, avg loss: 3.421927, ppl: 30.628378 +epoch: 0, batch: 13735, sum loss: 5386.873047, avg loss: 3.298759, ppl: 27.079006 +epoch: 0, batch: 13736, sum loss: 6830.441406, avg loss: 3.748870, ppl: 42.473064 +epoch: 0, batch: 13737, sum loss: 5737.437988, avg loss: 3.572502, ppl: 35.605556 +epoch: 0, batch: 13738, sum loss: 5651.263672, avg loss: 3.379943, ppl: 29.369087 +epoch: 0, batch: 13739, sum loss: 6146.250488, avg loss: 3.494173, ppl: 32.923058 +epoch: 0, batch: 13740, sum loss: 5593.240234, avg loss: 3.301795, ppl: 27.161337 +epoch: 0, batch: 13741, sum loss: 5946.661133, avg loss: 3.471489, ppl: 32.184635 +epoch: 0, batch: 13742, sum loss: 5795.570312, avg loss: 3.651903, ppl: 38.547970 +epoch: 0, batch: 13743, sum loss: 5140.658203, avg loss: 3.318695, ppl: 27.624273 +epoch: 0, batch: 13744, sum loss: 6093.991699, avg loss: 3.610185, ppl: 36.972889 +epoch: 0, batch: 13745, sum loss: 5436.703125, avg loss: 3.393697, ppl: 29.775839 +epoch: 0, batch: 13746, sum loss: 6400.082520, avg loss: 3.573469, ppl: 35.640003 +epoch: 0, batch: 13747, sum loss: 6004.635742, avg loss: 3.480948, ppl: 32.490524 +epoch: 0, batch: 13748, sum loss: 5454.245117, avg loss: 3.569532, ppl: 35.499981 +epoch: 0, batch: 13749, sum loss: 6639.572266, avg loss: 3.905631, ppl: 49.681423 +epoch: 0, batch: 13750, sum loss: 7206.164062, avg loss: 3.447925, ppl: 31.435106 +epoch: 0, batch: 13751, sum loss: 5140.984863, avg loss: 3.760779, ppl: 42.981903 +epoch: 0, batch: 13752, sum loss: 6404.421875, avg loss: 3.661762, ppl: 38.929878 +epoch: 0, batch: 13753, sum loss: 6346.234375, avg loss: 3.634728, ppl: 37.891533 +epoch: 0, batch: 13754, sum loss: 4572.636230, avg loss: 3.394682, ppl: 29.805164 +epoch: 0, batch: 13755, sum loss: 5683.837891, avg loss: 3.335586, ppl: 28.094831 +epoch: 0, batch: 13756, sum loss: 6837.768555, avg loss: 3.704100, ppl: 40.613476 +epoch: 0, batch: 13757, sum loss: 6059.169434, avg loss: 3.740228, ppl: 42.107597 +epoch: 0, batch: 13758, sum loss: 5163.797852, avg loss: 3.191470, ppl: 24.324150 +epoch: 0, batch: 13759, sum loss: 4784.095703, avg loss: 3.241257, ppl: 25.565849 +epoch: 0, batch: 13760, sum loss: 5368.835938, avg loss: 3.265715, ppl: 26.198847 +epoch: 0, batch: 13761, sum loss: 5519.186523, avg loss: 3.351054, ppl: 28.532797 +epoch: 0, batch: 13762, sum loss: 5422.597656, avg loss: 3.260732, ppl: 26.068617 +epoch: 0, batch: 13763, sum loss: 5118.821777, avg loss: 3.215340, ppl: 24.911770 +epoch: 0, batch: 13764, sum loss: 7300.966309, avg loss: 3.650483, ppl: 38.493267 +epoch: 0, batch: 13765, sum loss: 5826.965820, avg loss: 3.628247, ppl: 37.646782 +epoch: 0, batch: 13766, sum loss: 5885.416504, avg loss: 3.775123, ppl: 43.602867 +epoch: 0, batch: 13767, sum loss: 5996.989258, avg loss: 3.337223, ppl: 28.140865 +epoch: 0, batch: 13768, sum loss: 5578.020020, avg loss: 3.288927, ppl: 26.814072 +epoch: 0, batch: 13769, sum loss: 5025.421875, avg loss: 3.602453, ppl: 36.688129 +epoch: 0, batch: 13770, sum loss: 6171.604004, avg loss: 3.569464, ppl: 35.497578 +epoch: 0, batch: 13771, sum loss: 6318.094727, avg loss: 3.709979, ppl: 40.852962 +epoch: 0, batch: 13772, sum loss: 5584.181152, avg loss: 3.323917, ppl: 27.768919 +epoch: 0, batch: 13773, sum loss: 5733.388184, avg loss: 3.400586, ppl: 29.981668 +epoch: 0, batch: 13774, sum loss: 6327.272949, avg loss: 3.693679, ppl: 40.192455 +epoch: 0, batch: 13775, sum loss: 6283.077148, avg loss: 3.477076, ppl: 32.364960 +epoch: 0, batch: 13776, sum loss: 5261.608398, avg loss: 3.383671, ppl: 29.478798 +epoch: 0, batch: 13777, sum loss: 6969.348145, avg loss: 3.537740, ppl: 34.389118 +epoch: 0, batch: 13778, sum loss: 6667.849121, avg loss: 3.872154, ppl: 48.045753 +epoch: 0, batch: 13779, sum loss: 5871.349121, avg loss: 3.437558, ppl: 31.110891 +epoch: 0, batch: 13780, sum loss: 6762.480469, avg loss: 3.544277, ppl: 34.614647 +epoch: 0, batch: 13781, sum loss: 6827.601074, avg loss: 3.660912, ppl: 38.896801 +epoch: 0, batch: 13782, sum loss: 6346.145508, avg loss: 3.415579, ppl: 30.434561 +epoch: 0, batch: 13783, sum loss: 6300.569824, avg loss: 3.551618, ppl: 34.869678 +epoch: 0, batch: 13784, sum loss: 5320.807617, avg loss: 3.323428, ppl: 27.755323 +epoch: 0, batch: 13785, sum loss: 6107.005859, avg loss: 3.491713, ppl: 32.842152 +epoch: 0, batch: 13786, sum loss: 6107.355957, avg loss: 3.487925, ppl: 32.717979 +epoch: 0, batch: 13787, sum loss: 5025.054199, avg loss: 3.215006, ppl: 24.903439 +epoch: 0, batch: 13788, sum loss: 5199.700684, avg loss: 3.398497, ppl: 29.919107 +epoch: 0, batch: 13789, sum loss: 5325.175781, avg loss: 3.480507, ppl: 32.476189 +epoch: 0, batch: 13790, sum loss: 6471.815918, avg loss: 3.615540, ppl: 37.171398 +epoch: 0, batch: 13791, sum loss: 6203.903809, avg loss: 3.657962, ppl: 38.782238 +epoch: 0, batch: 13792, sum loss: 6795.816895, avg loss: 3.783863, ppl: 43.985611 +epoch: 0, batch: 13793, sum loss: 5726.290039, avg loss: 3.445421, ppl: 31.356489 +epoch: 0, batch: 13794, sum loss: 5700.659668, avg loss: 3.231667, ppl: 25.321823 +epoch: 0, batch: 13795, sum loss: 6097.758789, avg loss: 3.671137, ppl: 39.296566 +epoch: 0, batch: 13796, sum loss: 4966.285156, avg loss: 3.288931, ppl: 26.814175 +epoch: 0, batch: 13797, sum loss: 6371.250488, avg loss: 3.672190, ppl: 39.337978 +epoch: 0, batch: 13798, sum loss: 5626.332031, avg loss: 3.597399, ppl: 36.503166 +epoch: 0, batch: 13799, sum loss: 5520.827637, avg loss: 3.317805, ppl: 27.599710 +epoch: 0, batch: 13800, sum loss: 5678.847168, avg loss: 3.414821, ppl: 30.411503 +epoch: 0, batch: 13801, sum loss: 5264.083008, avg loss: 3.663245, ppl: 38.987652 +epoch: 0, batch: 13802, sum loss: 5315.752930, avg loss: 3.648423, ppl: 38.414055 +epoch: 0, batch: 13803, sum loss: 4868.908203, avg loss: 3.590640, ppl: 36.257286 +epoch: 0, batch: 13804, sum loss: 7015.518555, avg loss: 3.775844, ppl: 43.634323 +epoch: 0, batch: 13805, sum loss: 4896.302246, avg loss: 3.253357, ppl: 25.877068 +epoch: 0, batch: 13806, sum loss: 5973.325684, avg loss: 3.794997, ppl: 44.478104 +epoch: 0, batch: 13807, sum loss: 5632.337402, avg loss: 3.346606, ppl: 28.406147 +epoch: 0, batch: 13808, sum loss: 6607.023926, avg loss: 3.600558, ppl: 36.618671 +epoch: 0, batch: 13809, sum loss: 6069.966797, avg loss: 3.564279, ppl: 35.313976 +epoch: 0, batch: 13810, sum loss: 5637.252441, avg loss: 3.402083, ppl: 30.026592 +epoch: 0, batch: 13811, sum loss: 6892.525879, avg loss: 3.764351, ppl: 43.135696 +epoch: 0, batch: 13812, sum loss: 4605.421875, avg loss: 3.327617, ppl: 27.871841 +epoch: 0, batch: 13813, sum loss: 5714.332520, avg loss: 3.473758, ppl: 32.257748 +epoch: 0, batch: 13814, sum loss: 5877.663086, avg loss: 3.465603, ppl: 31.995758 +epoch: 0, batch: 13815, sum loss: 5627.866211, avg loss: 3.345937, ppl: 28.387163 +epoch: 0, batch: 13816, sum loss: 5724.147949, avg loss: 3.345499, ppl: 28.374725 +epoch: 0, batch: 13817, sum loss: 5789.748047, avg loss: 3.554173, ppl: 34.958897 +epoch: 0, batch: 13818, sum loss: 4673.307617, avg loss: 3.088769, ppl: 21.950045 +epoch: 0, batch: 13819, sum loss: 6346.933594, avg loss: 3.600076, ppl: 36.601006 +epoch: 0, batch: 13820, sum loss: 5855.708984, avg loss: 3.744059, ppl: 42.269226 +epoch: 0, batch: 13821, sum loss: 5271.750000, avg loss: 3.625688, ppl: 37.550545 +epoch: 0, batch: 13822, sum loss: 5970.848633, avg loss: 3.477489, ppl: 32.378319 +epoch: 0, batch: 13823, sum loss: 5630.292969, avg loss: 3.235801, ppl: 25.426718 +epoch: 0, batch: 13824, sum loss: 5253.671875, avg loss: 3.474651, ppl: 32.286552 +epoch: 0, batch: 13825, sum loss: 5172.048828, avg loss: 3.180842, ppl: 24.067001 +epoch: 0, batch: 13826, sum loss: 4711.893066, avg loss: 3.681166, ppl: 39.692665 +epoch: 0, batch: 13827, sum loss: 7310.879395, avg loss: 3.829691, ppl: 46.048294 +epoch: 0, batch: 13828, sum loss: 5751.476562, avg loss: 3.541549, ppl: 34.520367 +epoch: 0, batch: 13829, sum loss: 5358.770020, avg loss: 3.439519, ppl: 31.171951 +epoch: 0, batch: 13830, sum loss: 5991.270508, avg loss: 3.369668, ppl: 29.068888 +epoch: 0, batch: 13831, sum loss: 7261.917480, avg loss: 3.836195, ppl: 46.348782 +epoch: 0, batch: 13832, sum loss: 5663.258789, avg loss: 3.267893, ppl: 26.255962 +epoch: 0, batch: 13833, sum loss: 6628.964844, avg loss: 3.563960, ppl: 35.302704 +epoch: 0, batch: 13834, sum loss: 6736.420898, avg loss: 3.725897, ppl: 41.508434 +epoch: 0, batch: 13835, sum loss: 6474.790527, avg loss: 3.515087, ppl: 33.618866 +epoch: 0, batch: 13836, sum loss: 6786.802734, avg loss: 3.516478, ppl: 33.665661 +epoch: 0, batch: 13837, sum loss: 6456.318359, avg loss: 3.443370, ppl: 31.292231 +epoch: 0, batch: 13838, sum loss: 5499.474609, avg loss: 3.485092, ppl: 32.625420 +epoch: 0, batch: 13839, sum loss: 5361.173340, avg loss: 3.291082, ppl: 26.871933 +epoch: 0, batch: 13840, sum loss: 5309.791504, avg loss: 3.521082, ppl: 33.820999 +epoch: 0, batch: 13841, sum loss: 5264.078125, avg loss: 3.525839, ppl: 33.982285 +epoch: 0, batch: 13842, sum loss: 5641.810547, avg loss: 3.450649, ppl: 31.520828 +epoch: 0, batch: 13843, sum loss: 6036.205566, avg loss: 3.640655, ppl: 38.116787 +epoch: 0, batch: 13844, sum loss: 6889.054199, avg loss: 3.513031, ppl: 33.549820 +epoch: 0, batch: 13845, sum loss: 5406.390625, avg loss: 3.413125, ppl: 30.359980 +epoch: 0, batch: 13846, sum loss: 5550.513672, avg loss: 3.466904, ppl: 32.037415 +epoch: 0, batch: 13847, sum loss: 4896.704102, avg loss: 3.472840, ppl: 32.228142 +epoch: 0, batch: 13848, sum loss: 8118.579590, avg loss: 3.871521, ppl: 48.015373 +epoch: 0, batch: 13849, sum loss: 4391.975586, avg loss: 3.073461, ppl: 21.616590 +epoch: 0, batch: 13850, sum loss: 5456.077637, avg loss: 3.437982, ppl: 31.124096 +epoch: 0, batch: 13851, sum loss: 5183.250000, avg loss: 3.217412, ppl: 24.963425 +epoch: 0, batch: 13852, sum loss: 5528.483887, avg loss: 3.244416, ppl: 25.646715 +epoch: 0, batch: 13853, sum loss: 6725.152344, avg loss: 3.590578, ppl: 36.255020 +epoch: 0, batch: 13854, sum loss: 6456.983887, avg loss: 3.447402, ppl: 31.418661 +epoch: 0, batch: 13855, sum loss: 6285.063965, avg loss: 3.509248, ppl: 33.423134 +epoch: 0, batch: 13856, sum loss: 6867.758789, avg loss: 3.607016, ppl: 36.855923 +epoch: 0, batch: 13857, sum loss: 5983.254395, avg loss: 3.490813, ppl: 32.812630 +epoch: 0, batch: 13858, sum loss: 5647.926270, avg loss: 3.534372, ppl: 34.273479 +epoch: 0, batch: 13859, sum loss: 5416.255859, avg loss: 3.637512, ppl: 37.997189 +epoch: 0, batch: 13860, sum loss: 4769.219727, avg loss: 3.194387, ppl: 24.395214 +epoch: 0, batch: 13861, sum loss: 5117.128906, avg loss: 3.473951, ppl: 32.263954 +epoch: 0, batch: 13862, sum loss: 6163.991211, avg loss: 3.381235, ppl: 29.407070 +epoch: 0, batch: 13863, sum loss: 6806.071777, avg loss: 3.665090, ppl: 39.059635 +epoch: 0, batch: 13864, sum loss: 5622.420898, avg loss: 3.382925, ppl: 29.456800 +epoch: 0, batch: 13865, sum loss: 6994.417969, avg loss: 3.793068, ppl: 44.392406 +epoch: 0, batch: 13866, sum loss: 6583.686523, avg loss: 3.496382, ppl: 32.995842 +epoch: 0, batch: 13867, sum loss: 5622.428711, avg loss: 3.527245, ppl: 34.030087 +epoch: 0, batch: 13868, sum loss: 5660.535645, avg loss: 3.804123, ppl: 44.885887 +epoch: 0, batch: 13869, sum loss: 5138.454590, avg loss: 3.500310, ppl: 33.125710 +epoch: 0, batch: 13870, sum loss: 7632.687988, avg loss: 3.701595, ppl: 40.511852 +epoch: 0, batch: 13871, sum loss: 5055.091797, avg loss: 3.401811, ppl: 30.018425 +epoch: 0, batch: 13872, sum loss: 6304.689453, avg loss: 3.492903, ppl: 32.881256 +epoch: 0, batch: 13873, sum loss: 6349.149414, avg loss: 3.570950, ppl: 35.550358 +epoch: 0, batch: 13874, sum loss: 5826.428223, avg loss: 3.413256, ppl: 30.363955 +epoch: 0, batch: 13875, sum loss: 5381.770020, avg loss: 3.421341, ppl: 30.610449 +epoch: 0, batch: 13876, sum loss: 5819.691406, avg loss: 3.275009, ppl: 26.443468 +epoch: 0, batch: 13877, sum loss: 6087.287598, avg loss: 3.417904, ppl: 30.505421 +epoch: 0, batch: 13878, sum loss: 4729.065430, avg loss: 3.361099, ppl: 28.820833 +epoch: 0, batch: 13879, sum loss: 4878.036133, avg loss: 3.350300, ppl: 28.511274 +epoch: 0, batch: 13880, sum loss: 6458.087891, avg loss: 3.517477, ppl: 33.699299 +epoch: 0, batch: 13881, sum loss: 6871.002441, avg loss: 3.698064, ppl: 40.369057 +epoch: 0, batch: 13882, sum loss: 6949.018555, avg loss: 3.645865, ppl: 38.315891 +epoch: 0, batch: 13883, sum loss: 5690.012207, avg loss: 3.529784, ppl: 34.116604 +epoch: 0, batch: 13884, sum loss: 6633.089844, avg loss: 3.640554, ppl: 38.112953 +epoch: 0, batch: 13885, sum loss: 5504.122070, avg loss: 3.546471, ppl: 34.690666 +epoch: 0, batch: 13886, sum loss: 5643.366699, avg loss: 3.349179, ppl: 28.479343 +epoch: 0, batch: 13887, sum loss: 5226.863770, avg loss: 3.327094, ppl: 27.857260 +epoch: 0, batch: 13888, sum loss: 6427.405762, avg loss: 3.554981, ppl: 34.987156 +epoch: 0, batch: 13889, sum loss: 6160.670410, avg loss: 3.445565, ppl: 31.361004 +epoch: 0, batch: 13890, sum loss: 4813.477539, avg loss: 3.237039, ppl: 25.458237 +epoch: 0, batch: 13891, sum loss: 5980.378906, avg loss: 3.485069, ppl: 32.624687 +epoch: 0, batch: 13892, sum loss: 5953.491211, avg loss: 3.508245, ppl: 33.389610 +epoch: 0, batch: 13893, sum loss: 6246.064941, avg loss: 3.753645, ppl: 42.676353 +epoch: 0, batch: 13894, sum loss: 5646.312500, avg loss: 3.432409, ppl: 30.951101 +epoch: 0, batch: 13895, sum loss: 5299.493652, avg loss: 3.335112, ppl: 28.081537 +epoch: 0, batch: 13896, sum loss: 5743.099609, avg loss: 3.547313, ppl: 34.719898 +epoch: 0, batch: 13897, sum loss: 6734.880859, avg loss: 3.513240, ppl: 33.556820 +epoch: 0, batch: 13898, sum loss: 6284.869141, avg loss: 3.348359, ppl: 28.455988 +epoch: 0, batch: 13899, sum loss: 7704.765137, avg loss: 3.814240, ppl: 45.342293 +epoch: 0, batch: 13900, sum loss: 6157.755859, avg loss: 3.453593, ppl: 31.613777 +epoch: 0, batch: 13901, sum loss: 6127.096191, avg loss: 3.331754, ppl: 27.987402 +epoch: 0, batch: 13902, sum loss: 6326.651855, avg loss: 3.272971, ppl: 26.389618 +epoch: 0, batch: 13903, sum loss: 5607.867188, avg loss: 3.677290, ppl: 39.539097 +epoch: 0, batch: 13904, sum loss: 5386.928223, avg loss: 3.262828, ppl: 26.123301 +epoch: 0, batch: 13905, sum loss: 4951.701660, avg loss: 3.279273, ppl: 26.556454 +epoch: 0, batch: 13906, sum loss: 6082.902832, avg loss: 3.657789, ppl: 38.775505 +epoch: 0, batch: 13907, sum loss: 6137.923340, avg loss: 3.702004, ppl: 40.528461 +epoch: 0, batch: 13908, sum loss: 6039.200195, avg loss: 3.490867, ppl: 32.814388 +epoch: 0, batch: 13909, sum loss: 6653.746094, avg loss: 3.476356, ppl: 32.341663 +epoch: 0, batch: 13910, sum loss: 4948.715820, avg loss: 3.255734, ppl: 25.938652 +epoch: 0, batch: 13911, sum loss: 5898.896484, avg loss: 3.439590, ppl: 31.174166 +epoch: 0, batch: 13912, sum loss: 6121.438965, avg loss: 3.462353, ppl: 31.891914 +epoch: 0, batch: 13913, sum loss: 5843.798828, avg loss: 3.381828, ppl: 29.424513 +epoch: 0, batch: 13914, sum loss: 5678.989258, avg loss: 3.388418, ppl: 29.619070 +epoch: 0, batch: 13915, sum loss: 6220.932129, avg loss: 3.423738, ppl: 30.683905 +epoch: 0, batch: 13916, sum loss: 8407.511719, avg loss: 3.819860, ppl: 45.597824 +epoch: 0, batch: 13917, sum loss: 6001.892578, avg loss: 3.509879, ppl: 33.444210 +epoch: 0, batch: 13918, sum loss: 6801.635742, avg loss: 3.728967, ppl: 41.636074 +epoch: 0, batch: 13919, sum loss: 5314.572266, avg loss: 3.329932, ppl: 27.936455 +epoch: 0, batch: 13920, sum loss: 5469.774902, avg loss: 3.538018, ppl: 34.398655 +epoch: 0, batch: 13921, sum loss: 6247.528320, avg loss: 3.501978, ppl: 33.181015 +epoch: 0, batch: 13922, sum loss: 5542.336426, avg loss: 3.440308, ppl: 31.196575 +epoch: 0, batch: 13923, sum loss: 5507.462891, avg loss: 3.364363, ppl: 28.915085 +epoch: 0, batch: 13924, sum loss: 5750.140625, avg loss: 3.573735, ppl: 35.649513 +epoch: 0, batch: 13925, sum loss: 5928.653809, avg loss: 3.345742, ppl: 28.381613 +epoch: 0, batch: 13926, sum loss: 5688.393555, avg loss: 3.414402, ppl: 30.398758 +epoch: 0, batch: 13927, sum loss: 6691.389648, avg loss: 3.510697, ppl: 33.471603 +epoch: 0, batch: 13928, sum loss: 5670.097168, avg loss: 3.407511, ppl: 30.189993 +epoch: 0, batch: 13929, sum loss: 4947.076172, avg loss: 3.271876, ppl: 26.360743 +epoch: 0, batch: 13930, sum loss: 5322.217285, avg loss: 3.464985, ppl: 31.975992 +epoch: 0, batch: 13931, sum loss: 5846.526367, avg loss: 3.490464, ppl: 32.801155 +epoch: 0, batch: 13932, sum loss: 6498.614746, avg loss: 3.747759, ppl: 42.425915 +epoch: 0, batch: 13933, sum loss: 6506.767578, avg loss: 3.659600, ppl: 38.845783 +epoch: 0, batch: 13934, sum loss: 6452.652832, avg loss: 3.592791, ppl: 36.335358 +epoch: 0, batch: 13935, sum loss: 6241.870117, avg loss: 3.566783, ppl: 35.402519 +epoch: 0, batch: 13936, sum loss: 5532.543945, avg loss: 3.451369, ppl: 31.543531 +epoch: 0, batch: 13937, sum loss: 4770.911133, avg loss: 3.369287, ppl: 29.057816 +epoch: 0, batch: 13938, sum loss: 6550.061035, avg loss: 3.538661, ppl: 34.420792 +epoch: 0, batch: 13939, sum loss: 5539.874512, avg loss: 3.526337, ppl: 33.999187 +epoch: 0, batch: 13940, sum loss: 6179.629883, avg loss: 3.427415, ppl: 30.796942 +epoch: 0, batch: 13941, sum loss: 6203.782227, avg loss: 3.855676, ppl: 47.260540 +epoch: 0, batch: 13942, sum loss: 7040.773926, avg loss: 3.573997, ppl: 35.658836 +epoch: 0, batch: 13943, sum loss: 5746.003418, avg loss: 3.381992, ppl: 29.429325 +epoch: 0, batch: 13944, sum loss: 5474.184082, avg loss: 3.647025, ppl: 38.360371 +epoch: 0, batch: 13945, sum loss: 6248.995117, avg loss: 3.544523, ppl: 34.623184 +epoch: 0, batch: 13946, sum loss: 5328.273438, avg loss: 3.430955, ppl: 30.906157 +epoch: 0, batch: 13947, sum loss: 5842.488770, avg loss: 3.436758, ppl: 31.086023 +epoch: 0, batch: 13948, sum loss: 5263.570312, avg loss: 3.125636, ppl: 22.774366 +epoch: 0, batch: 13949, sum loss: 5985.814941, avg loss: 3.406838, ppl: 30.169703 +epoch: 0, batch: 13950, sum loss: 5938.068848, avg loss: 3.722927, ppl: 41.385365 +epoch: 0, batch: 13951, sum loss: 5934.225586, avg loss: 3.430188, ppl: 30.882454 +epoch: 0, batch: 13952, sum loss: 6184.716797, avg loss: 3.441690, ppl: 31.239708 +epoch: 0, batch: 13953, sum loss: 5888.858398, avg loss: 3.708349, ppl: 40.786423 +epoch: 0, batch: 13954, sum loss: 5363.656250, avg loss: 3.545047, ppl: 34.641323 +epoch: 0, batch: 13955, sum loss: 5689.757324, avg loss: 3.471481, ppl: 32.184368 +epoch: 0, batch: 13956, sum loss: 6115.714844, avg loss: 3.666496, ppl: 39.114601 +epoch: 0, batch: 13957, sum loss: 6413.045410, avg loss: 3.590731, ppl: 36.260578 +epoch: 0, batch: 13958, sum loss: 5619.201172, avg loss: 3.574555, ppl: 35.678753 +epoch: 0, batch: 13959, sum loss: 5166.677246, avg loss: 3.399130, ppl: 29.938038 +epoch: 0, batch: 13960, sum loss: 6248.628906, avg loss: 3.605671, ppl: 36.806389 +epoch: 0, batch: 13961, sum loss: 5460.133789, avg loss: 3.238514, ppl: 25.495800 +epoch: 0, batch: 13962, sum loss: 5716.921875, avg loss: 3.469006, ppl: 32.104816 +epoch: 0, batch: 13963, sum loss: 5371.414062, avg loss: 3.384634, ppl: 29.507191 +epoch: 0, batch: 13964, sum loss: 6114.618164, avg loss: 3.472242, ppl: 32.208870 +epoch: 0, batch: 13965, sum loss: 5132.984863, avg loss: 3.350512, ppl: 28.517338 +epoch: 0, batch: 13966, sum loss: 6019.258301, avg loss: 3.364594, ppl: 28.921745 +epoch: 0, batch: 13967, sum loss: 6690.290039, avg loss: 3.706532, ppl: 40.712360 +epoch: 0, batch: 13968, sum loss: 6078.841797, avg loss: 3.626994, ppl: 37.599628 +epoch: 0, batch: 13969, sum loss: 5139.633789, avg loss: 3.328779, ppl: 27.904236 +epoch: 0, batch: 13970, sum loss: 4571.835938, avg loss: 3.361644, ppl: 28.836559 +epoch: 0, batch: 13971, sum loss: 6808.720703, avg loss: 3.566643, ppl: 35.397556 +epoch: 0, batch: 13972, sum loss: 7332.534668, avg loss: 3.635367, ppl: 37.915764 +epoch: 0, batch: 13973, sum loss: 6319.137695, avg loss: 3.391915, ppl: 29.722813 +epoch: 0, batch: 13974, sum loss: 5066.727539, avg loss: 3.523454, ppl: 33.901337 +epoch: 0, batch: 13975, sum loss: 6000.851562, avg loss: 3.552902, ppl: 34.914501 +epoch: 0, batch: 13976, sum loss: 6357.801758, avg loss: 3.591979, ppl: 36.305840 +epoch: 0, batch: 13977, sum loss: 5348.966797, avg loss: 3.406985, ppl: 30.174141 +epoch: 0, batch: 13978, sum loss: 6024.612793, avg loss: 3.454480, ppl: 31.641821 +epoch: 0, batch: 13979, sum loss: 5755.910645, avg loss: 3.496908, ppl: 33.013222 +epoch: 0, batch: 13980, sum loss: 5349.481445, avg loss: 3.377198, ppl: 29.288589 +epoch: 0, batch: 13981, sum loss: 6266.490723, avg loss: 3.727835, ppl: 41.588966 +epoch: 0, batch: 13982, sum loss: 5680.424805, avg loss: 3.395353, ppl: 29.825169 +epoch: 0, batch: 13983, sum loss: 5992.214355, avg loss: 3.658250, ppl: 38.793407 +epoch: 0, batch: 13984, sum loss: 6141.964355, avg loss: 3.481839, ppl: 32.519478 +epoch: 0, batch: 13985, sum loss: 5365.143066, avg loss: 3.525061, ppl: 33.955856 +epoch: 0, batch: 13986, sum loss: 5516.303223, avg loss: 3.465014, ppl: 31.976923 +epoch: 0, batch: 13987, sum loss: 4302.261719, avg loss: 3.284169, ppl: 26.686804 +epoch: 0, batch: 13988, sum loss: 5255.705078, avg loss: 3.568028, ppl: 35.446621 +epoch: 0, batch: 13989, sum loss: 6697.833008, avg loss: 3.836102, ppl: 46.344448 +epoch: 0, batch: 13990, sum loss: 5636.192871, avg loss: 3.652750, ppl: 38.580616 +epoch: 0, batch: 13991, sum loss: 6188.882324, avg loss: 3.283227, ppl: 26.661663 +epoch: 0, batch: 13992, sum loss: 6083.448730, avg loss: 3.408095, ppl: 30.207634 +epoch: 0, batch: 13993, sum loss: 5757.094238, avg loss: 3.582510, ppl: 35.963715 +epoch: 0, batch: 13994, sum loss: 6153.551758, avg loss: 3.675957, ppl: 39.486427 +epoch: 0, batch: 13995, sum loss: 5595.705566, avg loss: 3.737946, ppl: 42.011612 +epoch: 0, batch: 13996, sum loss: 5686.645996, avg loss: 3.352975, ppl: 28.587666 +epoch: 0, batch: 13997, sum loss: 6106.705078, avg loss: 3.497540, ppl: 33.034096 +epoch: 0, batch: 13998, sum loss: 5674.925293, avg loss: 3.612301, ppl: 37.051224 +epoch: 0, batch: 13999, sum loss: 6812.991211, avg loss: 3.546586, ppl: 34.694660 +epoch: 0, batch: 14000, sum loss: 5584.164551, avg loss: 3.563602, ppl: 35.290100 +epoch: 0, batch: 14001, sum loss: 5454.778809, avg loss: 3.430678, ppl: 30.897596 +epoch: 0, batch: 14002, sum loss: 5206.000488, avg loss: 3.233541, ppl: 25.369320 +epoch: 0, batch: 14003, sum loss: 5247.696777, avg loss: 3.459260, ppl: 31.793425 +epoch: 0, batch: 14004, sum loss: 6556.252930, avg loss: 3.584611, ppl: 36.039333 +epoch: 0, batch: 14005, sum loss: 6474.992676, avg loss: 3.567489, ppl: 35.427528 +epoch: 0, batch: 14006, sum loss: 5411.778320, avg loss: 3.446993, ppl: 31.405802 +epoch: 0, batch: 14007, sum loss: 6139.187988, avg loss: 3.806068, ppl: 44.973263 +epoch: 0, batch: 14008, sum loss: 6412.966797, avg loss: 3.681381, ppl: 39.701202 +epoch: 0, batch: 14009, sum loss: 5104.375977, avg loss: 3.515410, ppl: 33.629726 +epoch: 0, batch: 14010, sum loss: 5757.038574, avg loss: 3.466007, ppl: 32.008690 +epoch: 0, batch: 14011, sum loss: 7168.744141, avg loss: 3.704777, ppl: 40.640995 +epoch: 0, batch: 14012, sum loss: 6264.681641, avg loss: 3.573692, ppl: 35.647957 +epoch: 0, batch: 14013, sum loss: 5509.380371, avg loss: 3.326921, ppl: 27.852438 +epoch: 0, batch: 14014, sum loss: 6146.784180, avg loss: 3.565420, ppl: 35.354313 +epoch: 0, batch: 14015, sum loss: 6124.585938, avg loss: 3.647758, ppl: 38.388504 +epoch: 0, batch: 14016, sum loss: 6371.541016, avg loss: 3.287689, ppl: 26.780901 +epoch: 0, batch: 14017, sum loss: 5448.479980, avg loss: 3.446224, ppl: 31.381662 +epoch: 0, batch: 14018, sum loss: 5128.637695, avg loss: 3.544325, ppl: 34.616299 +epoch: 0, batch: 14019, sum loss: 6517.859375, avg loss: 3.684488, ppl: 39.824738 +epoch: 0, batch: 14020, sum loss: 6062.407227, avg loss: 3.442593, ppl: 31.267941 +epoch: 0, batch: 14021, sum loss: 5695.239258, avg loss: 3.390023, ppl: 29.666649 +epoch: 0, batch: 14022, sum loss: 6757.247070, avg loss: 3.737415, ppl: 41.989319 +epoch: 0, batch: 14023, sum loss: 6526.321289, avg loss: 3.533471, ppl: 34.242630 +epoch: 0, batch: 14024, sum loss: 5542.267090, avg loss: 3.687470, ppl: 39.943649 +epoch: 0, batch: 14025, sum loss: 5170.990234, avg loss: 3.223809, ppl: 25.123646 +epoch: 0, batch: 14026, sum loss: 6320.251953, avg loss: 3.530867, ppl: 34.153561 +epoch: 0, batch: 14027, sum loss: 5742.204102, avg loss: 3.367862, ppl: 29.016417 +epoch: 0, batch: 14028, sum loss: 5381.293945, avg loss: 3.378088, ppl: 29.314669 +epoch: 0, batch: 14029, sum loss: 4937.078613, avg loss: 3.428527, ppl: 30.831192 +epoch: 0, batch: 14030, sum loss: 6540.384766, avg loss: 3.686801, ppl: 39.916939 +epoch: 0, batch: 14031, sum loss: 5921.541016, avg loss: 3.565046, ppl: 35.341072 +epoch: 0, batch: 14032, sum loss: 5355.312988, avg loss: 3.253532, ppl: 25.881584 +epoch: 0, batch: 14033, sum loss: 5306.494141, avg loss: 3.318633, ppl: 27.622561 +epoch: 0, batch: 14034, sum loss: 6440.327148, avg loss: 3.650979, ppl: 38.512352 +epoch: 0, batch: 14035, sum loss: 6813.329590, avg loss: 3.533885, ppl: 34.256790 +epoch: 0, batch: 14036, sum loss: 6310.087402, avg loss: 3.431260, ppl: 30.915583 +epoch: 0, batch: 14037, sum loss: 5407.745117, avg loss: 3.315601, ppl: 27.538937 +epoch: 0, batch: 14038, sum loss: 5814.055176, avg loss: 3.297819, ppl: 27.053574 +epoch: 0, batch: 14039, sum loss: 5250.894043, avg loss: 3.461367, ppl: 31.860504 +epoch: 0, batch: 14040, sum loss: 5057.616699, avg loss: 3.442898, ppl: 31.277454 +epoch: 0, batch: 14041, sum loss: 5060.370117, avg loss: 3.497146, ppl: 33.021072 +epoch: 0, batch: 14042, sum loss: 5385.052734, avg loss: 3.547466, ppl: 34.725197 +epoch: 0, batch: 14043, sum loss: 5687.165039, avg loss: 3.474139, ppl: 32.270023 +epoch: 0, batch: 14044, sum loss: 5667.648438, avg loss: 3.418365, ppl: 30.519461 +epoch: 0, batch: 14045, sum loss: 4849.773438, avg loss: 3.252699, ppl: 25.860033 +epoch: 0, batch: 14046, sum loss: 6112.480469, avg loss: 3.553768, ppl: 34.944729 +epoch: 0, batch: 14047, sum loss: 5902.108398, avg loss: 3.351566, ppl: 28.547413 +epoch: 0, batch: 14048, sum loss: 5070.801758, avg loss: 3.338250, ppl: 28.169785 +epoch: 0, batch: 14049, sum loss: 5424.615723, avg loss: 3.319838, ppl: 27.655878 +epoch: 0, batch: 14050, sum loss: 6309.291504, avg loss: 3.597087, ppl: 36.491795 +epoch: 0, batch: 14051, sum loss: 6216.905273, avg loss: 3.400933, ppl: 29.992064 +epoch: 0, batch: 14052, sum loss: 5690.145508, avg loss: 3.523310, ppl: 33.896439 +epoch: 0, batch: 14053, sum loss: 5875.810547, avg loss: 3.325303, ppl: 27.807425 +epoch: 0, batch: 14054, sum loss: 5986.633789, avg loss: 3.617301, ppl: 37.236946 +epoch: 0, batch: 14055, sum loss: 6650.402344, avg loss: 3.686476, ppl: 39.903969 +epoch: 0, batch: 14056, sum loss: 6075.234375, avg loss: 3.457731, ppl: 31.744881 +epoch: 0, batch: 14057, sum loss: 6540.757812, avg loss: 3.560565, ppl: 35.183079 +epoch: 0, batch: 14058, sum loss: 5691.757812, avg loss: 3.550691, ppl: 34.837379 +epoch: 0, batch: 14059, sum loss: 4613.746094, avg loss: 3.149315, ppl: 23.320076 +epoch: 0, batch: 14060, sum loss: 5688.368652, avg loss: 3.483386, ppl: 32.569801 +epoch: 0, batch: 14061, sum loss: 6908.943359, avg loss: 3.628647, ppl: 37.661819 +epoch: 0, batch: 14062, sum loss: 6983.097656, avg loss: 3.716391, ppl: 41.115723 +epoch: 0, batch: 14063, sum loss: 5981.100098, avg loss: 3.532841, ppl: 34.221058 +epoch: 0, batch: 14064, sum loss: 5955.473145, avg loss: 3.474605, ppl: 32.285076 +epoch: 0, batch: 14065, sum loss: 5814.779297, avg loss: 3.365034, ppl: 28.934490 +epoch: 0, batch: 14066, sum loss: 5795.476074, avg loss: 3.472424, ppl: 32.214745 +epoch: 0, batch: 14067, sum loss: 6390.143555, avg loss: 3.567919, ppl: 35.442768 +epoch: 0, batch: 14068, sum loss: 6893.070801, avg loss: 3.756442, ppl: 42.795891 +epoch: 0, batch: 14069, sum loss: 5008.691406, avg loss: 3.361538, ppl: 28.833500 +epoch: 0, batch: 14070, sum loss: 6544.204590, avg loss: 3.670334, ppl: 39.265003 +epoch: 0, batch: 14071, sum loss: 5479.067871, avg loss: 3.574082, ppl: 35.661873 +epoch: 0, batch: 14072, sum loss: 5617.829102, avg loss: 3.298784, ppl: 27.079691 +epoch: 0, batch: 14073, sum loss: 5537.108887, avg loss: 3.363979, ppl: 28.903967 +epoch: 0, batch: 14074, sum loss: 5478.051758, avg loss: 3.282236, ppl: 26.635265 +epoch: 0, batch: 14075, sum loss: 6623.708008, avg loss: 3.635405, ppl: 37.917210 +epoch: 0, batch: 14076, sum loss: 5576.037109, avg loss: 3.606751, ppl: 36.846142 +epoch: 0, batch: 14077, sum loss: 6708.437988, avg loss: 3.669824, ppl: 39.244995 +epoch: 0, batch: 14078, sum loss: 5984.883301, avg loss: 3.594525, ppl: 36.398392 +epoch: 0, batch: 14079, sum loss: 6017.042969, avg loss: 3.178575, ppl: 24.012512 +epoch: 0, batch: 14080, sum loss: 6125.934082, avg loss: 3.496538, ppl: 33.001003 +epoch: 0, batch: 14081, sum loss: 5266.603516, avg loss: 3.511069, ppl: 33.484047 +epoch: 0, batch: 14082, sum loss: 8885.231445, avg loss: 3.844756, ppl: 46.747284 +epoch: 0, batch: 14083, sum loss: 5012.173828, avg loss: 3.286671, ppl: 26.753662 +epoch: 0, batch: 14084, sum loss: 4875.717773, avg loss: 3.228952, ppl: 25.253181 +epoch: 0, batch: 14085, sum loss: 6365.061523, avg loss: 3.585950, ppl: 36.087620 +epoch: 0, batch: 14086, sum loss: 5954.945312, avg loss: 3.660077, ppl: 38.864330 +epoch: 0, batch: 14087, sum loss: 5189.829590, avg loss: 3.257897, ppl: 25.994810 +epoch: 0, batch: 14088, sum loss: 6924.188965, avg loss: 3.629030, ppl: 37.676243 +epoch: 0, batch: 14089, sum loss: 5955.826660, avg loss: 3.534615, ppl: 34.281822 +epoch: 0, batch: 14090, sum loss: 6318.120605, avg loss: 3.441242, ppl: 31.225716 +epoch: 0, batch: 14091, sum loss: 5836.985352, avg loss: 3.539712, ppl: 34.457001 +epoch: 0, batch: 14092, sum loss: 6301.833984, avg loss: 3.698259, ppl: 40.376961 +epoch: 0, batch: 14093, sum loss: 6940.023438, avg loss: 3.683664, ppl: 39.791939 +epoch: 0, batch: 14094, sum loss: 6761.279785, avg loss: 3.545506, ppl: 34.657219 +epoch: 0, batch: 14095, sum loss: 4987.666016, avg loss: 3.287848, ppl: 26.785166 +epoch: 0, batch: 14096, sum loss: 5795.296875, avg loss: 3.379182, ppl: 29.346745 +epoch: 0, batch: 14097, sum loss: 6021.823242, avg loss: 3.492937, ppl: 32.882385 +epoch: 0, batch: 14098, sum loss: 4939.809082, avg loss: 3.495972, ppl: 32.982346 +epoch: 0, batch: 14099, sum loss: 5984.273926, avg loss: 3.461119, ppl: 31.852591 +epoch: 0, batch: 14100, sum loss: 5441.768555, avg loss: 3.208590, ppl: 24.744173 +epoch: 0, batch: 14101, sum loss: 5637.796875, avg loss: 3.384032, ppl: 29.489420 +epoch: 0, batch: 14102, sum loss: 5992.030273, avg loss: 3.371992, ppl: 29.136518 +epoch: 0, batch: 14103, sum loss: 5500.935059, avg loss: 3.427374, ppl: 30.795656 +epoch: 0, batch: 14104, sum loss: 5160.330078, avg loss: 3.541750, ppl: 34.527294 +epoch: 0, batch: 14105, sum loss: 5361.617188, avg loss: 3.525061, ppl: 33.955833 +epoch: 0, batch: 14106, sum loss: 5611.054688, avg loss: 3.578479, ppl: 35.819004 +epoch: 0, batch: 14107, sum loss: 4386.350586, avg loss: 3.162473, ppl: 23.628969 +epoch: 0, batch: 14108, sum loss: 5967.337402, avg loss: 3.296872, ppl: 27.027954 +epoch: 0, batch: 14109, sum loss: 5738.623047, avg loss: 3.535812, ppl: 34.322880 +epoch: 0, batch: 14110, sum loss: 4751.138672, avg loss: 3.338819, ppl: 28.185806 +epoch: 0, batch: 14111, sum loss: 6186.596191, avg loss: 3.724622, ppl: 41.455540 +epoch: 0, batch: 14112, sum loss: 6391.355957, avg loss: 3.786348, ppl: 44.095085 +epoch: 0, batch: 14113, sum loss: 6740.481934, avg loss: 3.645474, ppl: 38.300930 +epoch: 0, batch: 14114, sum loss: 4718.064453, avg loss: 3.265097, ppl: 26.182642 +epoch: 0, batch: 14115, sum loss: 5758.247070, avg loss: 3.421418, ppl: 30.612799 +epoch: 0, batch: 14116, sum loss: 5712.478516, avg loss: 3.426802, ppl: 30.778048 +epoch: 0, batch: 14117, sum loss: 5692.467773, avg loss: 3.548920, ppl: 34.775745 +epoch: 0, batch: 14118, sum loss: 5451.686035, avg loss: 3.639310, ppl: 38.065548 +epoch: 0, batch: 14119, sum loss: 5630.153320, avg loss: 3.521046, ppl: 33.819798 +epoch: 0, batch: 14120, sum loss: 4711.991699, avg loss: 3.196738, ppl: 24.452635 +epoch: 0, batch: 14121, sum loss: 5248.094238, avg loss: 3.325788, ppl: 27.820927 +epoch: 0, batch: 14122, sum loss: 5543.171387, avg loss: 3.404896, ppl: 30.111172 +epoch: 0, batch: 14123, sum loss: 4946.270020, avg loss: 3.348863, ppl: 28.470341 +epoch: 0, batch: 14124, sum loss: 5906.889648, avg loss: 3.516006, ppl: 33.649761 +epoch: 0, batch: 14125, sum loss: 5725.006836, avg loss: 3.316922, ppl: 27.575335 +epoch: 0, batch: 14126, sum loss: 5950.908203, avg loss: 3.371619, ppl: 29.125656 +epoch: 0, batch: 14127, sum loss: 5240.324219, avg loss: 3.318761, ppl: 27.626110 +epoch: 0, batch: 14128, sum loss: 5487.872559, avg loss: 3.419235, ppl: 30.546045 +epoch: 0, batch: 14129, sum loss: 5003.396484, avg loss: 3.396739, ppl: 29.866547 +epoch: 0, batch: 14130, sum loss: 5370.326172, avg loss: 3.530787, ppl: 34.150818 +epoch: 0, batch: 14131, sum loss: 5157.823242, avg loss: 3.306297, ppl: 27.283907 +epoch: 0, batch: 14132, sum loss: 5328.117676, avg loss: 3.370093, ppl: 29.081242 +epoch: 0, batch: 14133, sum loss: 6095.375977, avg loss: 3.501078, ppl: 33.151157 +epoch: 0, batch: 14134, sum loss: 6847.195312, avg loss: 3.529482, ppl: 34.106308 +epoch: 0, batch: 14135, sum loss: 6692.366211, avg loss: 3.603859, ppl: 36.739738 +epoch: 0, batch: 14136, sum loss: 5778.594727, avg loss: 3.435550, ppl: 31.048477 +epoch: 0, batch: 14137, sum loss: 5876.534180, avg loss: 3.270192, ppl: 26.316383 +epoch: 0, batch: 14138, sum loss: 6713.219727, avg loss: 3.640575, ppl: 38.113735 +epoch: 0, batch: 14139, sum loss: 5422.258789, avg loss: 3.270361, ppl: 26.320845 +epoch: 0, batch: 14140, sum loss: 5330.340820, avg loss: 3.310771, ppl: 27.406240 +epoch: 0, batch: 14141, sum loss: 6417.760254, avg loss: 3.581340, ppl: 35.921631 +epoch: 0, batch: 14142, sum loss: 5109.579590, avg loss: 3.260740, ppl: 26.068829 +epoch: 0, batch: 14143, sum loss: 6372.740234, avg loss: 3.400608, ppl: 29.982332 +epoch: 0, batch: 14144, sum loss: 5528.286133, avg loss: 3.459503, ppl: 31.801172 +epoch: 0, batch: 14145, sum loss: 6715.211914, avg loss: 3.757813, ppl: 42.854610 +epoch: 0, batch: 14146, sum loss: 6328.387695, avg loss: 3.683578, ppl: 39.788521 +epoch: 0, batch: 14147, sum loss: 6226.328125, avg loss: 3.451401, ppl: 31.544571 +epoch: 0, batch: 14148, sum loss: 6476.245117, avg loss: 3.679685, ppl: 39.633892 +epoch: 0, batch: 14149, sum loss: 6037.524414, avg loss: 3.729169, ppl: 41.644482 +epoch: 0, batch: 14150, sum loss: 6374.340820, avg loss: 3.535408, ppl: 34.309010 +epoch: 0, batch: 14151, sum loss: 5708.223633, avg loss: 3.530132, ppl: 34.128483 +epoch: 0, batch: 14152, sum loss: 6062.548828, avg loss: 3.730799, ppl: 41.712433 +epoch: 0, batch: 14153, sum loss: 5540.018066, avg loss: 3.131723, ppl: 22.913429 +epoch: 0, batch: 14154, sum loss: 6452.122559, avg loss: 3.822348, ppl: 45.711395 +epoch: 0, batch: 14155, sum loss: 4329.808105, avg loss: 2.947453, ppl: 19.057344 +epoch: 0, batch: 14156, sum loss: 6267.447266, avg loss: 3.387810, ppl: 29.601040 +epoch: 0, batch: 14157, sum loss: 6500.657715, avg loss: 3.710421, ppl: 40.871021 +epoch: 0, batch: 14158, sum loss: 5543.544922, avg loss: 3.351599, ppl: 28.548351 +epoch: 0, batch: 14159, sum loss: 5673.384766, avg loss: 3.389119, ppl: 29.639826 +epoch: 0, batch: 14160, sum loss: 5574.186523, avg loss: 3.519057, ppl: 33.752586 +epoch: 0, batch: 14161, sum loss: 5947.021484, avg loss: 3.666474, ppl: 39.113762 +epoch: 0, batch: 14162, sum loss: 6522.634766, avg loss: 3.631757, ppl: 37.779118 +epoch: 0, batch: 14163, sum loss: 6460.308594, avg loss: 3.593052, ppl: 36.344845 +epoch: 0, batch: 14164, sum loss: 5448.647461, avg loss: 3.306218, ppl: 27.281755 +epoch: 0, batch: 14165, sum loss: 6905.299316, avg loss: 3.640116, ppl: 38.096245 +epoch: 0, batch: 14166, sum loss: 5795.820312, avg loss: 3.487257, ppl: 32.696125 +epoch: 0, batch: 14167, sum loss: 6123.266113, avg loss: 3.519119, ppl: 33.754662 +epoch: 0, batch: 14168, sum loss: 6006.731934, avg loss: 3.550078, ppl: 34.816029 +epoch: 0, batch: 14169, sum loss: 5074.164062, avg loss: 3.447122, ppl: 31.409876 +epoch: 0, batch: 14170, sum loss: 6686.239746, avg loss: 3.547077, ppl: 34.711697 +epoch: 0, batch: 14171, sum loss: 6040.920410, avg loss: 3.543062, ppl: 34.572617 +epoch: 0, batch: 14172, sum loss: 5379.856934, avg loss: 3.375067, ppl: 29.226242 +epoch: 0, batch: 14173, sum loss: 6424.989258, avg loss: 3.623796, ppl: 37.479553 +epoch: 0, batch: 14174, sum loss: 6258.706055, avg loss: 3.763503, ppl: 43.099152 +epoch: 0, batch: 14175, sum loss: 5432.192383, avg loss: 3.176721, ppl: 23.968023 +epoch: 0, batch: 14176, sum loss: 6505.025391, avg loss: 3.601897, ppl: 36.667717 +epoch: 0, batch: 14177, sum loss: 4779.410645, avg loss: 3.361048, ppl: 28.819384 +epoch: 0, batch: 14178, sum loss: 5470.963867, avg loss: 3.315736, ppl: 27.542652 +epoch: 0, batch: 14179, sum loss: 7146.846191, avg loss: 3.735936, ppl: 41.927258 +epoch: 0, batch: 14180, sum loss: 4925.766113, avg loss: 3.454254, ppl: 31.634678 +epoch: 0, batch: 14181, sum loss: 5996.055176, avg loss: 3.347881, ppl: 28.442402 +epoch: 0, batch: 14182, sum loss: 6237.300781, avg loss: 3.326560, ppl: 27.842413 +epoch: 0, batch: 14183, sum loss: 5916.693359, avg loss: 3.394546, ppl: 29.801115 +epoch: 0, batch: 14184, sum loss: 4926.416016, avg loss: 3.123916, ppl: 22.735245 +epoch: 0, batch: 14185, sum loss: 5343.715332, avg loss: 3.483517, ppl: 32.574089 +epoch: 0, batch: 14186, sum loss: 5064.053223, avg loss: 3.235817, ppl: 25.427130 +epoch: 0, batch: 14187, sum loss: 5301.632812, avg loss: 3.451584, ppl: 31.550323 +epoch: 0, batch: 14188, sum loss: 5792.965332, avg loss: 3.513017, ppl: 33.549339 +epoch: 0, batch: 14189, sum loss: 6303.750000, avg loss: 3.553410, ppl: 34.932243 +epoch: 0, batch: 14190, sum loss: 5500.333008, avg loss: 3.669335, ppl: 39.225807 +epoch: 0, batch: 14191, sum loss: 5395.370117, avg loss: 3.467462, ppl: 32.055271 +epoch: 0, batch: 14192, sum loss: 4788.133301, avg loss: 3.065386, ppl: 21.442743 +epoch: 0, batch: 14193, sum loss: 5500.018555, avg loss: 3.613679, ppl: 37.102291 +epoch: 0, batch: 14194, sum loss: 5018.014648, avg loss: 3.294822, ppl: 26.972624 +epoch: 0, batch: 14195, sum loss: 5364.411621, avg loss: 3.255104, ppl: 25.922312 +epoch: 0, batch: 14196, sum loss: 6167.993164, avg loss: 3.774782, ppl: 43.587994 +epoch: 0, batch: 14197, sum loss: 6860.917969, avg loss: 3.728760, ppl: 41.627438 +epoch: 0, batch: 14198, sum loss: 5876.839844, avg loss: 3.510657, ppl: 33.470261 +epoch: 0, batch: 14199, sum loss: 5619.910156, avg loss: 3.367232, ppl: 28.998152 +epoch: 0, batch: 14200, sum loss: 6080.235840, avg loss: 3.564030, ppl: 35.305199 +epoch: 0, batch: 14201, sum loss: 5165.370117, avg loss: 3.371652, ppl: 29.126595 +epoch: 0, batch: 14202, sum loss: 6673.251953, avg loss: 3.768070, ppl: 43.296421 +epoch: 0, batch: 14203, sum loss: 5683.586426, avg loss: 3.581340, ppl: 35.921646 +epoch: 0, batch: 14204, sum loss: 5903.536133, avg loss: 3.388942, ppl: 29.634569 +epoch: 0, batch: 14205, sum loss: 5932.628906, avg loss: 3.700954, ppl: 40.485897 +epoch: 0, batch: 14206, sum loss: 6023.522461, avg loss: 3.734360, ppl: 41.861225 +epoch: 0, batch: 14207, sum loss: 5667.291504, avg loss: 3.401736, ppl: 30.016150 +epoch: 0, batch: 14208, sum loss: 7230.856445, avg loss: 3.817770, ppl: 45.502636 +epoch: 0, batch: 14209, sum loss: 5893.344238, avg loss: 3.318325, ppl: 27.614046 +epoch: 0, batch: 14210, sum loss: 5636.829102, avg loss: 3.418332, ppl: 30.518463 +epoch: 0, batch: 14211, sum loss: 6096.597656, avg loss: 3.598936, ppl: 36.559307 +epoch: 0, batch: 14212, sum loss: 5221.128906, avg loss: 3.327679, ppl: 27.873583 +epoch: 0, batch: 14213, sum loss: 4790.719727, avg loss: 3.174765, ppl: 23.921188 +epoch: 0, batch: 14214, sum loss: 6489.805176, avg loss: 3.369577, ppl: 29.066235 +epoch: 0, batch: 14215, sum loss: 6287.921387, avg loss: 3.709688, ppl: 40.841068 +epoch: 0, batch: 14216, sum loss: 6295.015137, avg loss: 3.530575, ppl: 34.143597 +epoch: 0, batch: 14217, sum loss: 6001.736328, avg loss: 3.499555, ppl: 33.100708 +epoch: 0, batch: 14218, sum loss: 5992.527832, avg loss: 3.453907, ppl: 31.623690 +epoch: 0, batch: 14219, sum loss: 5121.105469, avg loss: 3.418628, ppl: 30.527510 +epoch: 0, batch: 14220, sum loss: 5326.188965, avg loss: 3.416414, ppl: 30.459982 +epoch: 0, batch: 14221, sum loss: 5370.810547, avg loss: 3.494347, ppl: 32.928764 +epoch: 0, batch: 14222, sum loss: 4666.015625, avg loss: 3.373836, ppl: 29.190294 +epoch: 0, batch: 14223, sum loss: 6271.959473, avg loss: 3.612880, ppl: 37.072670 +epoch: 0, batch: 14224, sum loss: 6109.132324, avg loss: 3.411017, ppl: 30.296053 +epoch: 0, batch: 14225, sum loss: 6474.358398, avg loss: 3.535969, ppl: 34.328255 +epoch: 0, batch: 14226, sum loss: 6756.148438, avg loss: 3.601358, ppl: 36.647984 +epoch: 0, batch: 14227, sum loss: 5827.145996, avg loss: 3.470605, ppl: 32.156197 +epoch: 0, batch: 14228, sum loss: 6096.898438, avg loss: 3.646471, ppl: 38.339111 +epoch: 0, batch: 14229, sum loss: 4819.537598, avg loss: 3.321528, ppl: 27.702654 +epoch: 0, batch: 14230, sum loss: 6409.344238, avg loss: 3.574648, ppl: 35.682072 +epoch: 0, batch: 14231, sum loss: 5870.577148, avg loss: 3.498556, ppl: 33.067673 +epoch: 0, batch: 14232, sum loss: 5565.028320, avg loss: 3.567326, ppl: 35.421753 +epoch: 0, batch: 14233, sum loss: 5947.835938, avg loss: 3.766837, ppl: 43.243076 +epoch: 0, batch: 14234, sum loss: 6210.620117, avg loss: 3.540832, ppl: 34.495617 +epoch: 0, batch: 14235, sum loss: 5787.063965, avg loss: 3.416212, ppl: 30.453846 +epoch: 0, batch: 14236, sum loss: 6311.453125, avg loss: 3.621029, ppl: 37.375996 +epoch: 0, batch: 14237, sum loss: 6240.042969, avg loss: 3.779554, ppl: 43.796501 +epoch: 0, batch: 14238, sum loss: 6595.262207, avg loss: 3.662000, ppl: 38.939140 +epoch: 0, batch: 14239, sum loss: 6178.593750, avg loss: 3.419255, ppl: 30.546642 +epoch: 0, batch: 14240, sum loss: 6546.917969, avg loss: 3.486112, ppl: 32.658710 +epoch: 0, batch: 14241, sum loss: 5917.884277, avg loss: 3.501707, ppl: 33.172016 +epoch: 0, batch: 14242, sum loss: 5953.170410, avg loss: 3.654494, ppl: 38.647953 +epoch: 0, batch: 14243, sum loss: 6337.605957, avg loss: 3.693244, ppl: 40.174953 +epoch: 0, batch: 14244, sum loss: 6128.713867, avg loss: 3.672087, ppl: 39.333927 +epoch: 0, batch: 14245, sum loss: 7585.321289, avg loss: 3.860214, ppl: 47.475533 +epoch: 0, batch: 14246, sum loss: 6117.648438, avg loss: 3.624199, ppl: 37.494694 +epoch: 0, batch: 14247, sum loss: 4747.262207, avg loss: 3.238242, ppl: 25.488865 +epoch: 0, batch: 14248, sum loss: 6337.459961, avg loss: 3.619338, ppl: 37.312843 +epoch: 0, batch: 14249, sum loss: 6143.299316, avg loss: 3.397843, ppl: 29.899527 +epoch: 0, batch: 14250, sum loss: 5965.715332, avg loss: 3.540484, ppl: 34.483604 +epoch: 0, batch: 14251, sum loss: 6041.715820, avg loss: 3.377147, ppl: 29.287094 +epoch: 0, batch: 14252, sum loss: 5255.753418, avg loss: 3.169936, ppl: 23.805954 +epoch: 0, batch: 14253, sum loss: 4759.124023, avg loss: 3.358592, ppl: 28.748671 +epoch: 0, batch: 14254, sum loss: 5684.113281, avg loss: 3.457490, ppl: 31.737207 +epoch: 0, batch: 14255, sum loss: 6357.789551, avg loss: 3.637180, ppl: 37.984554 +epoch: 0, batch: 14256, sum loss: 6295.239746, avg loss: 3.632567, ppl: 37.809765 +epoch: 0, batch: 14257, sum loss: 5920.372070, avg loss: 3.536662, ppl: 34.352062 +epoch: 0, batch: 14258, sum loss: 5699.946289, avg loss: 3.696463, ppl: 40.304501 +epoch: 0, batch: 14259, sum loss: 6104.076172, avg loss: 3.538595, ppl: 34.418526 +epoch: 0, batch: 14260, sum loss: 5175.772949, avg loss: 3.319932, ppl: 27.658457 +epoch: 0, batch: 14261, sum loss: 6570.655762, avg loss: 3.592486, ppl: 36.324253 +epoch: 0, batch: 14262, sum loss: 6367.751465, avg loss: 3.401577, ppl: 30.011383 +epoch: 0, batch: 14263, sum loss: 6481.902832, avg loss: 3.517039, ppl: 33.684544 +epoch: 0, batch: 14264, sum loss: 5511.617676, avg loss: 3.537624, ppl: 34.385120 +epoch: 0, batch: 14265, sum loss: 5578.259766, avg loss: 3.360398, ppl: 28.800638 +epoch: 0, batch: 14266, sum loss: 6270.402832, avg loss: 3.647704, ppl: 38.386425 +epoch: 0, batch: 14267, sum loss: 5965.633789, avg loss: 3.466376, ppl: 32.020500 +epoch: 0, batch: 14268, sum loss: 6403.074707, avg loss: 3.777625, ppl: 43.712105 +epoch: 0, batch: 14269, sum loss: 6500.547363, avg loss: 3.573693, ppl: 35.647991 +epoch: 0, batch: 14270, sum loss: 5352.743164, avg loss: 3.480327, ppl: 32.470341 +epoch: 0, batch: 14271, sum loss: 4923.983398, avg loss: 3.197392, ppl: 24.468626 +epoch: 0, batch: 14272, sum loss: 5561.880371, avg loss: 3.471835, ppl: 32.195778 +epoch: 0, batch: 14273, sum loss: 5512.303711, avg loss: 3.316669, ppl: 27.568367 +epoch: 0, batch: 14274, sum loss: 5523.701172, avg loss: 3.366058, ppl: 28.964121 +epoch: 0, batch: 14275, sum loss: 5672.052734, avg loss: 3.462791, ppl: 31.905916 +epoch: 0, batch: 14276, sum loss: 5513.077148, avg loss: 3.469526, ppl: 32.121521 +epoch: 0, batch: 14277, sum loss: 5805.945312, avg loss: 3.612910, ppl: 37.073765 +epoch: 0, batch: 14278, sum loss: 6013.511719, avg loss: 3.640141, ppl: 38.097191 +epoch: 0, batch: 14279, sum loss: 5588.585449, avg loss: 3.239760, ppl: 25.527592 +epoch: 0, batch: 14280, sum loss: 6314.570801, avg loss: 3.579689, ppl: 35.862377 +epoch: 0, batch: 14281, sum loss: 4937.760742, avg loss: 3.343102, ppl: 28.306784 +epoch: 0, batch: 14282, sum loss: 5118.483887, avg loss: 3.414599, ppl: 30.404760 +epoch: 0, batch: 14283, sum loss: 5258.143555, avg loss: 3.589176, ppl: 36.204247 +epoch: 0, batch: 14284, sum loss: 6782.175293, avg loss: 3.437494, ppl: 31.108902 +epoch: 0, batch: 14285, sum loss: 6466.118652, avg loss: 3.699153, ppl: 40.413048 +epoch: 0, batch: 14286, sum loss: 4917.453613, avg loss: 3.254436, ppl: 25.905012 +epoch: 0, batch: 14287, sum loss: 6069.223633, avg loss: 3.673864, ppl: 39.403885 +epoch: 0, batch: 14288, sum loss: 5327.722656, avg loss: 3.130272, ppl: 22.880194 +epoch: 0, batch: 14289, sum loss: 4049.877441, avg loss: 3.173885, ppl: 23.900164 +epoch: 0, batch: 14290, sum loss: 4963.790039, avg loss: 3.418588, ppl: 30.526287 +epoch: 0, batch: 14291, sum loss: 6440.590332, avg loss: 3.588073, ppl: 36.164310 +epoch: 0, batch: 14292, sum loss: 6466.615234, avg loss: 3.630890, ppl: 37.746403 +epoch: 0, batch: 14293, sum loss: 5243.619629, avg loss: 3.389541, ppl: 29.652336 +epoch: 0, batch: 14294, sum loss: 5992.204102, avg loss: 3.491961, ppl: 32.850288 +epoch: 0, batch: 14295, sum loss: 6636.367188, avg loss: 3.509449, ppl: 33.429829 +epoch: 0, batch: 14296, sum loss: 6382.540527, avg loss: 3.587713, ppl: 36.151287 +epoch: 0, batch: 14297, sum loss: 5965.411133, avg loss: 3.389438, ppl: 29.649290 +epoch: 0, batch: 14298, sum loss: 5963.769043, avg loss: 3.533039, ppl: 34.227814 +epoch: 0, batch: 14299, sum loss: 5928.952637, avg loss: 3.610812, ppl: 36.996071 +epoch: 0, batch: 14300, sum loss: 5650.993164, avg loss: 3.599359, ppl: 36.574776 +epoch: 0, batch: 14301, sum loss: 6393.250000, avg loss: 3.670063, ppl: 39.254379 +epoch: 0, batch: 14302, sum loss: 5986.381348, avg loss: 3.597585, ppl: 36.509956 +epoch: 0, batch: 14303, sum loss: 5889.925293, avg loss: 3.556718, ppl: 35.047985 +epoch: 0, batch: 14304, sum loss: 7255.742676, avg loss: 3.859437, ppl: 47.438660 +epoch: 0, batch: 14305, sum loss: 5094.299316, avg loss: 3.129176, ppl: 22.855148 +epoch: 0, batch: 14306, sum loss: 6652.646973, avg loss: 3.551867, ppl: 34.878384 +epoch: 0, batch: 14307, sum loss: 5834.319336, avg loss: 3.491514, ppl: 32.835621 +epoch: 0, batch: 14308, sum loss: 6968.996094, avg loss: 3.584875, ppl: 36.048847 +epoch: 0, batch: 14309, sum loss: 4946.780273, avg loss: 3.538470, ppl: 34.414211 +epoch: 0, batch: 14310, sum loss: 6030.731445, avg loss: 3.469926, ppl: 32.134369 +epoch: 0, batch: 14311, sum loss: 5355.554199, avg loss: 3.368273, ppl: 29.028353 +epoch: 0, batch: 14312, sum loss: 6460.130371, avg loss: 3.804553, ppl: 44.905163 +epoch: 0, batch: 14313, sum loss: 5303.452637, avg loss: 3.367271, ppl: 28.999292 +epoch: 0, batch: 14314, sum loss: 6197.796875, avg loss: 3.383077, ppl: 29.461281 +epoch: 0, batch: 14315, sum loss: 6308.740234, avg loss: 3.576383, ppl: 35.744034 +epoch: 0, batch: 14316, sum loss: 5911.669922, avg loss: 3.550553, ppl: 34.832561 +epoch: 0, batch: 14317, sum loss: 5657.550293, avg loss: 3.231040, ppl: 25.305956 +epoch: 0, batch: 14318, sum loss: 4728.452637, avg loss: 3.192743, ppl: 24.355150 +epoch: 0, batch: 14319, sum loss: 5808.447754, avg loss: 3.457410, ppl: 31.734665 +epoch: 0, batch: 14320, sum loss: 5984.738281, avg loss: 3.575113, ppl: 35.698647 +epoch: 0, batch: 14321, sum loss: 5368.243164, avg loss: 3.130171, ppl: 22.877888 +epoch: 0, batch: 14322, sum loss: 5570.875488, avg loss: 3.434572, ppl: 31.018126 +epoch: 0, batch: 14323, sum loss: 5413.668457, avg loss: 3.463639, ppl: 31.932985 +epoch: 0, batch: 14324, sum loss: 5317.585449, avg loss: 3.315203, ppl: 27.527981 +epoch: 0, batch: 14325, sum loss: 5729.082031, avg loss: 3.428535, ppl: 30.831448 +epoch: 0, batch: 14326, sum loss: 5672.708008, avg loss: 3.516868, ppl: 33.678783 +epoch: 0, batch: 14327, sum loss: 5332.372559, avg loss: 3.422575, ppl: 30.648247 +epoch: 0, batch: 14328, sum loss: 5611.365234, avg loss: 3.378305, ppl: 29.321043 +epoch: 0, batch: 14329, sum loss: 5057.409180, avg loss: 3.215136, ppl: 24.906681 +epoch: 0, batch: 14330, sum loss: 6025.185547, avg loss: 3.548402, ppl: 34.757717 +epoch: 0, batch: 14331, sum loss: 6968.436035, avg loss: 3.671463, ppl: 39.309364 +epoch: 0, batch: 14332, sum loss: 6276.122070, avg loss: 3.429575, ppl: 30.863522 +epoch: 0, batch: 14333, sum loss: 6070.134766, avg loss: 3.535314, ppl: 34.305798 +epoch: 0, batch: 14334, sum loss: 4485.414551, avg loss: 3.229240, ppl: 25.260450 +epoch: 0, batch: 14335, sum loss: 6121.720703, avg loss: 3.319805, ppl: 27.654961 +epoch: 0, batch: 14336, sum loss: 6330.683594, avg loss: 3.489903, ppl: 32.782768 +epoch: 0, batch: 14337, sum loss: 4993.072754, avg loss: 3.217186, ppl: 24.957796 +epoch: 0, batch: 14338, sum loss: 4943.281738, avg loss: 3.383492, ppl: 29.473513 +epoch: 0, batch: 14339, sum loss: 5361.296875, avg loss: 3.465609, ppl: 31.995934 +epoch: 0, batch: 14340, sum loss: 6383.056641, avg loss: 3.647461, ppl: 38.377102 +epoch: 0, batch: 14341, sum loss: 6309.985352, avg loss: 3.438684, ppl: 31.145950 +epoch: 0, batch: 14342, sum loss: 5859.970215, avg loss: 3.481860, ppl: 32.520149 +epoch: 0, batch: 14343, sum loss: 6189.624023, avg loss: 3.506869, ppl: 33.343708 +epoch: 0, batch: 14344, sum loss: 5195.344238, avg loss: 3.183422, ppl: 24.129179 +epoch: 0, batch: 14345, sum loss: 6084.849609, avg loss: 3.529495, ppl: 34.106754 +epoch: 0, batch: 14346, sum loss: 6852.755859, avg loss: 3.576595, ppl: 35.751602 +epoch: 0, batch: 14347, sum loss: 4723.945312, avg loss: 3.340838, ppl: 28.242796 +epoch: 0, batch: 14348, sum loss: 6066.527344, avg loss: 3.209803, ppl: 24.774202 +epoch: 0, batch: 14349, sum loss: 5106.821777, avg loss: 3.311817, ppl: 27.434927 +epoch: 0, batch: 14350, sum loss: 5386.529785, avg loss: 3.282468, ppl: 26.641439 +epoch: 0, batch: 14351, sum loss: 6409.071777, avg loss: 3.689736, ppl: 40.034290 +epoch: 0, batch: 14352, sum loss: 5239.512207, avg loss: 3.574019, ppl: 35.659630 +epoch: 0, batch: 14353, sum loss: 6045.279297, avg loss: 3.419276, ppl: 30.547283 +epoch: 0, batch: 14354, sum loss: 5953.269043, avg loss: 3.300038, ppl: 27.113678 +epoch: 0, batch: 14355, sum loss: 6082.949707, avg loss: 3.631612, ppl: 37.773663 +epoch: 0, batch: 14356, sum loss: 7494.654297, avg loss: 3.618858, ppl: 37.294949 +epoch: 0, batch: 14357, sum loss: 5762.005859, avg loss: 3.456512, ppl: 31.706200 +epoch: 0, batch: 14358, sum loss: 4818.807617, avg loss: 3.131129, ppl: 22.899820 +epoch: 0, batch: 14359, sum loss: 4748.325684, avg loss: 3.148757, ppl: 23.307076 +epoch: 0, batch: 14360, sum loss: 6619.101562, avg loss: 3.706104, ppl: 40.694950 +epoch: 0, batch: 14361, sum loss: 5874.346680, avg loss: 3.423279, ppl: 30.669819 +epoch: 0, batch: 14362, sum loss: 6591.434082, avg loss: 3.568725, ppl: 35.471325 +epoch: 0, batch: 14363, sum loss: 6658.935547, avg loss: 3.866978, ppl: 47.797707 +epoch: 0, batch: 14364, sum loss: 6182.898926, avg loss: 3.410314, ppl: 30.274752 +epoch: 0, batch: 14365, sum loss: 5479.732910, avg loss: 3.450714, ppl: 31.522881 +epoch: 0, batch: 14366, sum loss: 5320.511719, avg loss: 3.493442, ppl: 32.898987 +epoch: 0, batch: 14367, sum loss: 6382.913086, avg loss: 3.569862, ppl: 35.511688 +epoch: 0, batch: 14368, sum loss: 5505.252441, avg loss: 3.610002, ppl: 36.966110 +epoch: 0, batch: 14369, sum loss: 5151.709961, avg loss: 3.635646, ppl: 37.926342 +epoch: 0, batch: 14370, sum loss: 4726.685059, avg loss: 3.132329, ppl: 22.927326 +epoch: 0, batch: 14371, sum loss: 6225.650879, avg loss: 3.571802, ppl: 35.580647 +epoch: 0, batch: 14372, sum loss: 5801.257324, avg loss: 3.473807, ppl: 32.259315 +epoch: 0, batch: 14373, sum loss: 5069.817871, avg loss: 3.434836, ppl: 31.026321 +epoch: 0, batch: 14374, sum loss: 5561.936035, avg loss: 3.441792, ppl: 31.242895 +epoch: 0, batch: 14375, sum loss: 5311.764160, avg loss: 3.545904, ppl: 34.671013 +epoch: 0, batch: 14376, sum loss: 6443.586914, avg loss: 3.617960, ppl: 37.261486 +epoch: 0, batch: 14377, sum loss: 5799.129395, avg loss: 3.381416, ppl: 29.412399 +epoch: 0, batch: 14378, sum loss: 5705.495117, avg loss: 3.455782, ppl: 31.683046 +epoch: 0, batch: 14379, sum loss: 7148.733887, avg loss: 3.800496, ppl: 44.723377 +epoch: 0, batch: 14380, sum loss: 5922.240234, avg loss: 3.692170, ppl: 40.131836 +epoch: 0, batch: 14381, sum loss: 5133.857422, avg loss: 3.255458, ppl: 25.931492 +epoch: 0, batch: 14382, sum loss: 6280.028809, avg loss: 3.313999, ppl: 27.494869 +epoch: 0, batch: 14383, sum loss: 5823.873047, avg loss: 3.502028, ppl: 33.182686 +epoch: 0, batch: 14384, sum loss: 5789.504395, avg loss: 3.496078, ppl: 32.985813 +epoch: 0, batch: 14385, sum loss: 4860.163086, avg loss: 3.322053, ppl: 27.717194 +epoch: 0, batch: 14386, sum loss: 5758.187988, avg loss: 3.468788, ppl: 32.097813 +epoch: 0, batch: 14387, sum loss: 5843.237305, avg loss: 3.606937, ppl: 36.852989 +epoch: 0, batch: 14388, sum loss: 6101.605469, avg loss: 3.582857, ppl: 35.976173 +epoch: 0, batch: 14389, sum loss: 5621.223633, avg loss: 3.457087, ppl: 31.724430 +epoch: 0, batch: 14390, sum loss: 6147.953613, avg loss: 3.337651, ppl: 28.152931 +epoch: 0, batch: 14391, sum loss: 5439.926758, avg loss: 3.436466, ppl: 31.076952 +epoch: 0, batch: 14392, sum loss: 6685.467773, avg loss: 3.635382, ppl: 37.916351 +epoch: 0, batch: 14393, sum loss: 4993.657227, avg loss: 3.221714, ppl: 25.071068 +epoch: 0, batch: 14394, sum loss: 4790.171387, avg loss: 3.187073, ppl: 24.217445 +epoch: 0, batch: 14395, sum loss: 4863.096191, avg loss: 3.337746, ppl: 28.155596 +epoch: 0, batch: 14396, sum loss: 5319.586914, avg loss: 3.281670, ppl: 26.620188 +epoch: 0, batch: 14397, sum loss: 5736.044434, avg loss: 3.368200, ppl: 29.026228 +epoch: 0, batch: 14398, sum loss: 5517.711914, avg loss: 3.521195, ppl: 33.824821 +epoch: 0, batch: 14399, sum loss: 7006.301270, avg loss: 3.639637, ppl: 38.078003 +epoch: 0, batch: 14400, sum loss: 6263.916504, avg loss: 3.656694, ppl: 38.733067 +epoch: 0, batch: 14401, sum loss: 5935.580078, avg loss: 3.382097, ppl: 29.432419 +epoch: 0, batch: 14402, sum loss: 5934.063965, avg loss: 3.478349, ppl: 32.406181 +epoch: 0, batch: 14403, sum loss: 5615.994629, avg loss: 3.288053, ppl: 26.790653 +epoch: 0, batch: 14404, sum loss: 5342.437988, avg loss: 3.429036, ppl: 30.846897 +epoch: 0, batch: 14405, sum loss: 6863.211914, avg loss: 3.810778, ppl: 45.185596 +epoch: 0, batch: 14406, sum loss: 5228.557129, avg loss: 3.245535, ppl: 25.675453 +epoch: 0, batch: 14407, sum loss: 5095.147461, avg loss: 3.127776, ppl: 22.823168 +epoch: 0, batch: 14408, sum loss: 5949.732910, avg loss: 3.323873, ppl: 27.767689 +epoch: 0, batch: 14409, sum loss: 6553.910645, avg loss: 3.451243, ppl: 31.539577 +epoch: 0, batch: 14410, sum loss: 5687.453613, avg loss: 3.472194, ppl: 32.207317 +epoch: 0, batch: 14411, sum loss: 6328.284668, avg loss: 3.696428, ppl: 40.303078 +epoch: 0, batch: 14412, sum loss: 5570.894531, avg loss: 3.047535, ppl: 21.063368 +epoch: 0, batch: 14413, sum loss: 6160.484375, avg loss: 3.504257, ppl: 33.256733 +epoch: 0, batch: 14414, sum loss: 6020.969727, avg loss: 3.635851, ppl: 37.934128 +epoch: 0, batch: 14415, sum loss: 5334.927246, avg loss: 3.433029, ppl: 30.970316 +epoch: 0, batch: 14416, sum loss: 5940.797852, avg loss: 3.680792, ppl: 39.677799 +epoch: 0, batch: 14417, sum loss: 5541.270020, avg loss: 3.412112, ppl: 30.329231 +epoch: 0, batch: 14418, sum loss: 4982.172363, avg loss: 3.183497, ppl: 24.130985 +epoch: 0, batch: 14419, sum loss: 5014.514648, avg loss: 3.066981, ppl: 21.476973 +epoch: 0, batch: 14420, sum loss: 5990.044922, avg loss: 3.736772, ppl: 41.962299 +epoch: 0, batch: 14421, sum loss: 6768.422363, avg loss: 3.756061, ppl: 42.779598 +epoch: 0, batch: 14422, sum loss: 6337.712891, avg loss: 3.648655, ppl: 38.422939 +epoch: 0, batch: 14423, sum loss: 4762.013672, avg loss: 3.290956, ppl: 26.868546 +epoch: 0, batch: 14424, sum loss: 5052.853027, avg loss: 3.368569, ppl: 29.036936 +epoch: 0, batch: 14425, sum loss: 6148.751953, avg loss: 3.599972, ppl: 36.597202 +epoch: 0, batch: 14426, sum loss: 4740.306152, avg loss: 3.080121, ppl: 21.761036 +epoch: 0, batch: 14427, sum loss: 5711.624512, avg loss: 3.311087, ppl: 27.414906 +epoch: 0, batch: 14428, sum loss: 6314.933594, avg loss: 3.454559, ppl: 31.644325 +epoch: 0, batch: 14429, sum loss: 4982.126953, avg loss: 3.366302, ppl: 28.971193 +epoch: 0, batch: 14430, sum loss: 5665.204102, avg loss: 3.376164, ppl: 29.258333 +epoch: 0, batch: 14431, sum loss: 6142.486816, avg loss: 3.395515, ppl: 29.830025 +epoch: 0, batch: 14432, sum loss: 6795.018066, avg loss: 3.618220, ppl: 37.271179 +epoch: 0, batch: 14433, sum loss: 5635.730957, avg loss: 3.415595, ppl: 30.435040 +epoch: 0, batch: 14434, sum loss: 6672.010742, avg loss: 3.676039, ppl: 39.489674 +epoch: 0, batch: 14435, sum loss: 5505.023438, avg loss: 3.302353, ppl: 27.176521 +epoch: 0, batch: 14436, sum loss: 6085.376465, avg loss: 3.567044, ppl: 35.411743 +epoch: 0, batch: 14437, sum loss: 5738.383789, avg loss: 3.393486, ppl: 29.769535 +epoch: 0, batch: 14438, sum loss: 5761.727051, avg loss: 3.283035, ppl: 26.656561 +epoch: 0, batch: 14439, sum loss: 5635.588867, avg loss: 3.354517, ppl: 28.631779 +epoch: 0, batch: 14440, sum loss: 5222.856934, avg loss: 3.248045, ppl: 25.739975 +epoch: 0, batch: 14441, sum loss: 5669.701172, avg loss: 3.604387, ppl: 36.759155 +epoch: 0, batch: 14442, sum loss: 6315.589844, avg loss: 3.582297, ppl: 35.956039 +epoch: 0, batch: 14443, sum loss: 5873.543945, avg loss: 3.436831, ppl: 31.088284 +epoch: 0, batch: 14444, sum loss: 6764.264160, avg loss: 3.541500, ppl: 34.518646 +epoch: 0, batch: 14445, sum loss: 5929.719727, avg loss: 3.591593, ppl: 36.291843 +epoch: 0, batch: 14446, sum loss: 6096.210449, avg loss: 3.353251, ppl: 28.595547 +epoch: 0, batch: 14447, sum loss: 6107.624023, avg loss: 3.476166, ppl: 32.335510 +epoch: 0, batch: 14448, sum loss: 5664.133789, avg loss: 3.612330, ppl: 37.052292 +epoch: 0, batch: 14449, sum loss: 5643.894531, avg loss: 3.363465, ppl: 28.889120 +epoch: 0, batch: 14450, sum loss: 6487.562988, avg loss: 3.650851, ppl: 38.507442 +epoch: 0, batch: 14451, sum loss: 5551.724121, avg loss: 3.502665, ppl: 33.203823 +epoch: 0, batch: 14452, sum loss: 5291.340332, avg loss: 3.462919, ppl: 31.909986 +epoch: 0, batch: 14453, sum loss: 5216.867676, avg loss: 3.346291, ppl: 28.397215 +epoch: 0, batch: 14454, sum loss: 5167.122070, avg loss: 3.213384, ppl: 24.863091 +epoch: 0, batch: 14455, sum loss: 5847.035156, avg loss: 3.497031, ppl: 33.017269 +epoch: 0, batch: 14456, sum loss: 6151.318359, avg loss: 3.509024, ppl: 33.415627 +epoch: 0, batch: 14457, sum loss: 6133.471680, avg loss: 3.591025, ppl: 36.271248 +epoch: 0, batch: 14458, sum loss: 5850.244629, avg loss: 3.511551, ppl: 33.500198 +epoch: 0, batch: 14459, sum loss: 5359.708008, avg loss: 3.300313, ppl: 27.121120 +epoch: 0, batch: 14460, sum loss: 5228.489258, avg loss: 3.349449, ppl: 28.487024 +epoch: 0, batch: 14461, sum loss: 6380.336426, avg loss: 3.608788, ppl: 36.921268 +epoch: 0, batch: 14462, sum loss: 6300.220703, avg loss: 3.690815, ppl: 40.077488 +epoch: 0, batch: 14463, sum loss: 5555.781738, avg loss: 3.470195, ppl: 32.143005 +epoch: 0, batch: 14464, sum loss: 5831.436523, avg loss: 3.337972, ppl: 28.161947 +epoch: 0, batch: 14465, sum loss: 5728.673340, avg loss: 3.497358, ppl: 33.028088 +epoch: 0, batch: 14466, sum loss: 5528.482422, avg loss: 3.282947, ppl: 26.654209 +epoch: 0, batch: 14467, sum loss: 5434.934570, avg loss: 3.356970, ppl: 28.702093 +epoch: 0, batch: 14468, sum loss: 5189.945312, avg loss: 3.341884, ppl: 28.272339 +epoch: 0, batch: 14469, sum loss: 6092.003418, avg loss: 3.422474, ppl: 30.645134 +epoch: 0, batch: 14470, sum loss: 5501.263672, avg loss: 3.131055, ppl: 22.898121 +epoch: 0, batch: 14471, sum loss: 6544.537109, avg loss: 3.811612, ppl: 45.223263 +epoch: 0, batch: 14472, sum loss: 4915.890137, avg loss: 3.303690, ppl: 27.212862 +epoch: 0, batch: 14473, sum loss: 7605.266113, avg loss: 3.884202, ppl: 48.628098 +epoch: 0, batch: 14474, sum loss: 4411.112305, avg loss: 3.189524, ppl: 24.276878 +epoch: 0, batch: 14475, sum loss: 5428.203125, avg loss: 3.442107, ppl: 31.252745 +epoch: 0, batch: 14476, sum loss: 6293.760742, avg loss: 3.452420, ppl: 31.576708 +epoch: 0, batch: 14477, sum loss: 5471.171387, avg loss: 3.245060, ppl: 25.663256 +epoch: 0, batch: 14478, sum loss: 5166.150879, avg loss: 3.439514, ppl: 31.171818 +epoch: 0, batch: 14479, sum loss: 6043.007812, avg loss: 3.433527, ppl: 30.985737 +epoch: 0, batch: 14480, sum loss: 5157.202637, avg loss: 3.175617, ppl: 23.941593 +epoch: 0, batch: 14481, sum loss: 6507.894531, avg loss: 3.733732, ppl: 41.834934 +epoch: 0, batch: 14482, sum loss: 5651.901367, avg loss: 3.350268, ppl: 28.510363 +epoch: 0, batch: 14483, sum loss: 6701.909668, avg loss: 3.652267, ppl: 38.561985 +epoch: 0, batch: 14484, sum loss: 6652.979004, avg loss: 3.653476, ppl: 38.608620 +epoch: 0, batch: 14485, sum loss: 5415.497070, avg loss: 3.365753, ppl: 28.955297 +epoch: 0, batch: 14486, sum loss: 6977.597168, avg loss: 3.893749, ppl: 49.094574 +epoch: 0, batch: 14487, sum loss: 6336.640625, avg loss: 3.441956, ppl: 31.248013 +epoch: 0, batch: 14488, sum loss: 5779.157227, avg loss: 3.456434, ppl: 31.703720 +epoch: 0, batch: 14489, sum loss: 5018.833008, avg loss: 3.101875, ppl: 22.239603 +epoch: 0, batch: 14490, sum loss: 5621.810547, avg loss: 3.567139, ppl: 35.415115 +epoch: 0, batch: 14491, sum loss: 6433.308594, avg loss: 3.795462, ppl: 44.498806 +epoch: 0, batch: 14492, sum loss: 5948.785156, avg loss: 3.607511, ppl: 36.874142 +epoch: 0, batch: 14493, sum loss: 7664.841309, avg loss: 3.447972, ppl: 31.436562 +epoch: 0, batch: 14494, sum loss: 5556.943359, avg loss: 3.345541, ppl: 28.375916 +epoch: 0, batch: 14495, sum loss: 5870.507812, avg loss: 3.206176, ppl: 24.684513 +epoch: 0, batch: 14496, sum loss: 5584.699219, avg loss: 3.290925, ppl: 26.867699 +epoch: 0, batch: 14497, sum loss: 6502.457031, avg loss: 3.570817, ppl: 35.545612 +epoch: 0, batch: 14498, sum loss: 5335.112305, avg loss: 3.239291, ppl: 25.515623 +epoch: 0, batch: 14499, sum loss: 6428.006836, avg loss: 3.715611, ppl: 41.083672 +epoch: 0, batch: 14500, sum loss: 5687.543457, avg loss: 3.506500, ppl: 33.331413 +epoch: 0, batch: 14501, sum loss: 5976.937500, avg loss: 3.564065, ppl: 35.306435 +epoch: 0, batch: 14502, sum loss: 5800.698242, avg loss: 3.473472, ppl: 32.248520 +epoch: 0, batch: 14503, sum loss: 5522.419922, avg loss: 3.408901, ppl: 30.232016 +epoch: 0, batch: 14504, sum loss: 5479.826660, avg loss: 3.273493, ppl: 26.403400 +epoch: 0, batch: 14505, sum loss: 5614.369141, avg loss: 3.519981, ppl: 33.783775 +epoch: 0, batch: 14506, sum loss: 5911.555664, avg loss: 3.567626, ppl: 35.432362 +epoch: 0, batch: 14507, sum loss: 5539.099121, avg loss: 3.550705, ppl: 34.837860 +epoch: 0, batch: 14508, sum loss: 5499.895020, avg loss: 3.260163, ppl: 26.053785 +epoch: 0, batch: 14509, sum loss: 5893.476074, avg loss: 3.352375, ppl: 28.570517 +epoch: 0, batch: 14510, sum loss: 6239.442871, avg loss: 3.507275, ppl: 33.357258 +epoch: 0, batch: 14511, sum loss: 5000.585938, avg loss: 3.342638, ppl: 28.293661 +epoch: 0, batch: 14512, sum loss: 5799.299316, avg loss: 3.510472, ppl: 33.464054 +epoch: 0, batch: 14513, sum loss: 5666.696289, avg loss: 3.421918, ppl: 30.628101 +epoch: 0, batch: 14514, sum loss: 6057.534180, avg loss: 3.556978, ppl: 35.057110 +epoch: 0, batch: 14515, sum loss: 3986.825684, avg loss: 2.859990, ppl: 17.461346 +epoch: 0, batch: 14516, sum loss: 5610.924805, avg loss: 3.245185, ppl: 25.666462 +epoch: 0, batch: 14517, sum loss: 5747.094727, avg loss: 3.517194, ppl: 33.689754 +epoch: 0, batch: 14518, sum loss: 5927.173828, avg loss: 3.375384, ppl: 29.235504 +epoch: 0, batch: 14519, sum loss: 5778.132324, avg loss: 3.518960, ppl: 33.749317 +epoch: 0, batch: 14520, sum loss: 5298.142578, avg loss: 3.651373, ppl: 38.527534 +epoch: 0, batch: 14521, sum loss: 5734.336914, avg loss: 3.403167, ppl: 30.059155 +epoch: 0, batch: 14522, sum loss: 4103.717773, avg loss: 3.277730, ppl: 26.515507 +epoch: 0, batch: 14523, sum loss: 5906.613281, avg loss: 3.643808, ppl: 38.237171 +epoch: 0, batch: 14524, sum loss: 5976.253906, avg loss: 3.527895, ppl: 34.052204 +epoch: 0, batch: 14525, sum loss: 7164.831543, avg loss: 3.526000, ppl: 33.987736 +epoch: 0, batch: 14526, sum loss: 6498.125977, avg loss: 3.745318, ppl: 42.322453 +epoch: 0, batch: 14527, sum loss: 6552.021973, avg loss: 3.625911, ppl: 37.558933 +epoch: 0, batch: 14528, sum loss: 6131.809570, avg loss: 3.613323, ppl: 37.089111 +epoch: 0, batch: 14529, sum loss: 5023.386719, avg loss: 3.371401, ppl: 29.119284 +epoch: 0, batch: 14530, sum loss: 6460.463867, avg loss: 3.597140, ppl: 36.493725 +epoch: 0, batch: 14531, sum loss: 5836.625488, avg loss: 3.354383, ppl: 28.627922 +epoch: 0, batch: 14532, sum loss: 5390.305664, avg loss: 3.555611, ppl: 35.009193 +epoch: 0, batch: 14533, sum loss: 4992.588867, avg loss: 3.630974, ppl: 37.749550 +epoch: 0, batch: 14534, sum loss: 5865.315430, avg loss: 3.446131, ppl: 31.378759 +epoch: 0, batch: 14535, sum loss: 5977.151367, avg loss: 3.559947, ppl: 35.161343 +epoch: 0, batch: 14536, sum loss: 4079.127930, avg loss: 3.062408, ppl: 21.378984 +epoch: 0, batch: 14537, sum loss: 5847.776855, avg loss: 3.345410, ppl: 28.372215 +epoch: 0, batch: 14538, sum loss: 5427.170410, avg loss: 3.492388, ppl: 32.864326 +epoch: 0, batch: 14539, sum loss: 5816.423340, avg loss: 3.574937, ppl: 35.692383 +epoch: 0, batch: 14540, sum loss: 5938.938477, avg loss: 3.605913, ppl: 36.815269 +epoch: 0, batch: 14541, sum loss: 5464.893555, avg loss: 3.550938, ppl: 34.845985 +epoch: 0, batch: 14542, sum loss: 5087.429199, avg loss: 3.267456, ppl: 26.244495 +epoch: 0, batch: 14543, sum loss: 6201.854980, avg loss: 3.654599, ppl: 38.652035 +epoch: 0, batch: 14544, sum loss: 5887.256836, avg loss: 3.531648, ppl: 34.180241 +epoch: 0, batch: 14545, sum loss: 6496.820312, avg loss: 3.725241, ppl: 41.481228 +epoch: 0, batch: 14546, sum loss: 4672.621094, avg loss: 3.285950, ppl: 26.734375 +epoch: 0, batch: 14547, sum loss: 5157.104492, avg loss: 3.381708, ppl: 29.420977 +epoch: 0, batch: 14548, sum loss: 6590.529297, avg loss: 3.575979, ppl: 35.729584 +epoch: 0, batch: 14549, sum loss: 5585.129395, avg loss: 3.473339, ppl: 32.244228 +epoch: 0, batch: 14550, sum loss: 6691.808105, avg loss: 3.476264, ppl: 32.338673 +epoch: 0, batch: 14551, sum loss: 5378.670898, avg loss: 3.585781, ppl: 36.081512 +epoch: 0, batch: 14552, sum loss: 4743.941895, avg loss: 3.224977, ppl: 25.153008 +epoch: 0, batch: 14553, sum loss: 7419.980957, avg loss: 3.760761, ppl: 42.981113 +epoch: 0, batch: 14554, sum loss: 5689.521484, avg loss: 3.654156, ppl: 38.634918 +epoch: 0, batch: 14555, sum loss: 6710.428711, avg loss: 3.697206, ppl: 40.334454 +epoch: 0, batch: 14556, sum loss: 6334.286621, avg loss: 3.632045, ppl: 37.790020 +epoch: 0, batch: 14557, sum loss: 6699.547852, avg loss: 3.843688, ppl: 46.697365 +epoch: 0, batch: 14558, sum loss: 4565.036133, avg loss: 3.409288, ppl: 30.243694 +epoch: 0, batch: 14559, sum loss: 6274.708008, avg loss: 3.569231, ppl: 35.489285 +epoch: 0, batch: 14560, sum loss: 7272.163086, avg loss: 3.663558, ppl: 38.999866 +epoch: 0, batch: 14561, sum loss: 6511.954102, avg loss: 3.759789, ppl: 42.939354 +epoch: 0, batch: 14562, sum loss: 6144.749512, avg loss: 3.400525, ppl: 29.979845 +epoch: 0, batch: 14563, sum loss: 6609.961914, avg loss: 3.625871, ppl: 37.557411 +epoch: 0, batch: 14564, sum loss: 5728.620117, avg loss: 3.273497, ppl: 26.403515 +epoch: 0, batch: 14565, sum loss: 5047.458008, avg loss: 3.419687, ppl: 30.559849 +epoch: 0, batch: 14566, sum loss: 6322.304688, avg loss: 3.473794, ppl: 32.258900 +epoch: 0, batch: 14567, sum loss: 6420.919922, avg loss: 3.524105, ppl: 33.923409 +epoch: 0, batch: 14568, sum loss: 6225.441406, avg loss: 3.477900, ppl: 32.391628 +epoch: 0, batch: 14569, sum loss: 5831.122070, avg loss: 3.358941, ppl: 28.758728 +epoch: 0, batch: 14570, sum loss: 6725.037598, avg loss: 3.611728, ppl: 37.029991 +epoch: 0, batch: 14571, sum loss: 5730.089844, avg loss: 3.382580, ppl: 29.446632 +epoch: 0, batch: 14572, sum loss: 5925.262207, avg loss: 3.615169, ppl: 37.157627 +epoch: 0, batch: 14573, sum loss: 7153.332031, avg loss: 3.731524, ppl: 41.742695 +epoch: 0, batch: 14574, sum loss: 5457.687500, avg loss: 3.315728, ppl: 27.542429 +epoch: 0, batch: 14575, sum loss: 6830.325684, avg loss: 3.824371, ppl: 45.803963 +epoch: 0, batch: 14576, sum loss: 5698.586426, avg loss: 3.472630, ppl: 32.221382 +epoch: 0, batch: 14577, sum loss: 4634.791992, avg loss: 3.310566, ppl: 27.400621 +epoch: 0, batch: 14578, sum loss: 4879.927734, avg loss: 3.567199, ppl: 35.417240 +epoch: 0, batch: 14579, sum loss: 5526.638184, avg loss: 3.458472, ppl: 31.768398 +epoch: 0, batch: 14580, sum loss: 5350.185059, avg loss: 3.219125, ppl: 25.006220 +epoch: 0, batch: 14581, sum loss: 6199.436035, avg loss: 3.685753, ppl: 39.875122 +epoch: 0, batch: 14582, sum loss: 5403.031250, avg loss: 3.485827, ppl: 32.649406 +epoch: 0, batch: 14583, sum loss: 5535.245117, avg loss: 3.501104, ppl: 33.152020 +epoch: 0, batch: 14584, sum loss: 5264.010254, avg loss: 3.398328, ppl: 29.914043 +epoch: 0, batch: 14585, sum loss: 6225.116699, avg loss: 3.646817, ppl: 38.352394 +epoch: 0, batch: 14586, sum loss: 5827.675781, avg loss: 3.619674, ppl: 37.325413 +epoch: 0, batch: 14587, sum loss: 5824.927734, avg loss: 3.436536, ppl: 31.079102 +epoch: 0, batch: 14588, sum loss: 6818.865234, avg loss: 3.707920, ppl: 40.768932 +epoch: 0, batch: 14589, sum loss: 5111.784668, avg loss: 3.200867, ppl: 24.553814 +epoch: 0, batch: 14590, sum loss: 5034.244629, avg loss: 3.358402, ppl: 28.743223 +epoch: 0, batch: 14591, sum loss: 6412.168945, avg loss: 3.399878, ppl: 29.960438 +epoch: 0, batch: 14592, sum loss: 5359.915039, avg loss: 3.302474, ppl: 27.179792 +epoch: 0, batch: 14593, sum loss: 5024.826172, avg loss: 3.422906, ppl: 30.658384 +epoch: 0, batch: 14594, sum loss: 6757.180176, avg loss: 3.725017, ppl: 41.471931 +epoch: 0, batch: 14595, sum loss: 5572.546875, avg loss: 3.429260, ppl: 30.853788 +epoch: 0, batch: 14596, sum loss: 5970.634277, avg loss: 3.371335, ppl: 29.117374 +epoch: 0, batch: 14597, sum loss: 6816.379395, avg loss: 3.712625, ppl: 40.961182 +epoch: 0, batch: 14598, sum loss: 4900.720215, avg loss: 3.398558, ppl: 29.920919 +epoch: 0, batch: 14599, sum loss: 5849.179199, avg loss: 3.291603, ppl: 26.885937 +epoch: 0, batch: 14600, sum loss: 5714.941406, avg loss: 3.225136, ppl: 25.157001 +epoch: 0, batch: 14601, sum loss: 4532.130371, avg loss: 3.228013, ppl: 25.229477 +epoch: 0, batch: 14602, sum loss: 4780.610840, avg loss: 3.417163, ppl: 30.482809 +epoch: 0, batch: 14603, sum loss: 5683.147461, avg loss: 3.585582, ppl: 36.074348 +epoch: 0, batch: 14604, sum loss: 4869.651367, avg loss: 3.078161, ppl: 21.718431 +epoch: 0, batch: 14605, sum loss: 6089.426758, avg loss: 3.479672, ppl: 32.449093 +epoch: 0, batch: 14606, sum loss: 5326.356445, avg loss: 3.255719, ppl: 25.938263 +epoch: 0, batch: 14607, sum loss: 6088.059570, avg loss: 3.490860, ppl: 32.814148 +epoch: 0, batch: 14608, sum loss: 7161.816895, avg loss: 3.586288, ppl: 36.099823 +epoch: 0, batch: 14609, sum loss: 6357.539551, avg loss: 3.485493, ppl: 32.638519 +epoch: 0, batch: 14610, sum loss: 5168.031250, avg loss: 3.355864, ppl: 28.670372 +epoch: 0, batch: 14611, sum loss: 5983.516113, avg loss: 3.415249, ppl: 30.424528 +epoch: 0, batch: 14612, sum loss: 6546.995605, avg loss: 3.546585, ppl: 34.694626 +epoch: 0, batch: 14613, sum loss: 5989.854004, avg loss: 3.432581, ppl: 30.956436 +epoch: 0, batch: 14614, sum loss: 6386.749512, avg loss: 3.548194, ppl: 34.750507 +epoch: 0, batch: 14615, sum loss: 5029.485840, avg loss: 3.317603, ppl: 27.594124 +epoch: 0, batch: 14616, sum loss: 5703.711426, avg loss: 3.596287, ppl: 36.462605 +epoch: 0, batch: 14617, sum loss: 5560.302734, avg loss: 3.278481, ppl: 26.535421 +epoch: 0, batch: 14618, sum loss: 6141.979004, avg loss: 3.669044, ppl: 39.214390 +epoch: 0, batch: 14619, sum loss: 5285.305664, avg loss: 3.544806, ppl: 34.632973 +epoch: 0, batch: 14620, sum loss: 5764.611816, avg loss: 3.655429, ppl: 38.684128 +epoch: 0, batch: 14621, sum loss: 5946.481445, avg loss: 3.451237, ppl: 31.539381 +epoch: 0, batch: 14622, sum loss: 6907.008301, avg loss: 3.495450, ppl: 32.965103 +epoch: 0, batch: 14623, sum loss: 5539.719238, avg loss: 3.449389, ppl: 31.481159 +epoch: 0, batch: 14624, sum loss: 7056.130859, avg loss: 3.644696, ppl: 38.271118 +epoch: 0, batch: 14625, sum loss: 5228.405762, avg loss: 3.742595, ppl: 42.207386 +epoch: 0, batch: 14626, sum loss: 5837.436035, avg loss: 3.347154, ppl: 28.421721 +epoch: 0, batch: 14627, sum loss: 5775.954102, avg loss: 3.539188, ppl: 34.438931 +epoch: 0, batch: 14628, sum loss: 5990.849609, avg loss: 3.286259, ppl: 26.742630 +epoch: 0, batch: 14629, sum loss: 5796.331055, avg loss: 3.510800, ppl: 33.475048 +epoch: 0, batch: 14630, sum loss: 5764.886230, avg loss: 3.211636, ppl: 24.819654 +epoch: 0, batch: 14631, sum loss: 5410.994141, avg loss: 3.475269, ppl: 32.306526 +epoch: 0, batch: 14632, sum loss: 5420.934570, avg loss: 3.545412, ppl: 34.653954 +epoch: 0, batch: 14633, sum loss: 6162.727539, avg loss: 3.507528, ppl: 33.365696 +epoch: 0, batch: 14634, sum loss: 5118.516602, avg loss: 3.513052, ppl: 33.550499 +epoch: 0, batch: 14635, sum loss: 6388.663574, avg loss: 3.669537, ppl: 39.233730 +epoch: 0, batch: 14636, sum loss: 5649.851074, avg loss: 3.502697, ppl: 33.204899 +epoch: 0, batch: 14637, sum loss: 7239.123047, avg loss: 3.610535, ppl: 36.985840 +epoch: 0, batch: 14638, sum loss: 5951.249023, avg loss: 3.519367, ppl: 33.763042 +epoch: 0, batch: 14639, sum loss: 5410.264648, avg loss: 3.396274, ppl: 29.852665 +epoch: 0, batch: 14640, sum loss: 6365.805176, avg loss: 3.450301, ppl: 31.509872 +epoch: 0, batch: 14641, sum loss: 6654.109375, avg loss: 3.587121, ppl: 36.129910 +epoch: 0, batch: 14642, sum loss: 6392.682129, avg loss: 3.632206, ppl: 37.796093 +epoch: 0, batch: 14643, sum loss: 7512.555664, avg loss: 3.507262, ppl: 33.356819 +epoch: 0, batch: 14644, sum loss: 5674.558105, avg loss: 3.238903, ppl: 25.505733 +epoch: 0, batch: 14645, sum loss: 6605.755371, avg loss: 3.709015, ppl: 40.813580 +epoch: 0, batch: 14646, sum loss: 4826.493652, avg loss: 3.423045, ppl: 30.662653 +epoch: 0, batch: 14647, sum loss: 5547.035645, avg loss: 3.398919, ppl: 29.931730 +epoch: 0, batch: 14648, sum loss: 5885.974609, avg loss: 3.291932, ppl: 26.894777 +epoch: 0, batch: 14649, sum loss: 5410.730469, avg loss: 3.303254, ppl: 27.201010 +epoch: 0, batch: 14650, sum loss: 5657.327148, avg loss: 3.466500, ppl: 32.024448 +epoch: 0, batch: 14651, sum loss: 6551.900879, avg loss: 3.623839, ppl: 37.481178 +epoch: 0, batch: 14652, sum loss: 4297.117676, avg loss: 3.275242, ppl: 26.449629 +epoch: 0, batch: 14653, sum loss: 6588.712402, avg loss: 3.565320, ppl: 35.350780 +epoch: 0, batch: 14654, sum loss: 6207.073730, avg loss: 3.450291, ppl: 31.509565 +epoch: 0, batch: 14655, sum loss: 5266.195801, avg loss: 3.299621, ppl: 27.102375 +epoch: 0, batch: 14656, sum loss: 5843.912109, avg loss: 3.645609, ppl: 38.306110 +epoch: 0, batch: 14657, sum loss: 5198.647461, avg loss: 3.294453, ppl: 26.962671 +epoch: 0, batch: 14658, sum loss: 4703.323730, avg loss: 3.020760, ppl: 20.506878 +epoch: 0, batch: 14659, sum loss: 6269.245117, avg loss: 3.410906, ppl: 30.292671 +epoch: 0, batch: 14660, sum loss: 5972.837891, avg loss: 3.761233, ppl: 43.001419 +epoch: 0, batch: 14661, sum loss: 6217.264160, avg loss: 3.538568, ppl: 34.417599 +epoch: 0, batch: 14662, sum loss: 7239.024902, avg loss: 3.693380, ppl: 40.180424 +epoch: 0, batch: 14663, sum loss: 6292.251465, avg loss: 3.468716, ppl: 32.095516 +epoch: 0, batch: 14664, sum loss: 5134.911133, avg loss: 3.423274, ppl: 30.669664 +epoch: 0, batch: 14665, sum loss: 5833.071777, avg loss: 3.647950, ppl: 38.395863 +epoch: 0, batch: 14666, sum loss: 6856.129883, avg loss: 3.606591, ppl: 36.840260 +epoch: 0, batch: 14667, sum loss: 6078.207031, avg loss: 3.337840, ppl: 28.158249 +epoch: 0, batch: 14668, sum loss: 5750.580078, avg loss: 3.476772, ppl: 32.355099 +epoch: 0, batch: 14669, sum loss: 4381.257812, avg loss: 3.425534, ppl: 30.739056 +epoch: 0, batch: 14670, sum loss: 5747.728027, avg loss: 3.524052, ppl: 33.921589 +epoch: 0, batch: 14671, sum loss: 5733.088867, avg loss: 3.335130, ppl: 28.082047 +epoch: 0, batch: 14672, sum loss: 6710.387695, avg loss: 3.680959, ppl: 39.684422 +epoch: 0, batch: 14673, sum loss: 6853.322754, avg loss: 3.569439, ppl: 35.496681 +epoch: 0, batch: 14674, sum loss: 5917.289062, avg loss: 3.484858, ppl: 32.617805 +epoch: 0, batch: 14675, sum loss: 5948.861816, avg loss: 3.376199, ppl: 29.259338 +epoch: 0, batch: 14676, sum loss: 6595.335449, avg loss: 3.478553, ppl: 32.412773 +epoch: 0, batch: 14677, sum loss: 4676.841309, avg loss: 3.379220, ppl: 29.347885 +epoch: 0, batch: 14678, sum loss: 5800.336914, avg loss: 3.293774, ppl: 26.944370 +epoch: 0, batch: 14679, sum loss: 5622.833496, avg loss: 3.418136, ppl: 30.512476 +epoch: 0, batch: 14680, sum loss: 6111.852539, avg loss: 3.506513, ppl: 33.331841 +epoch: 0, batch: 14681, sum loss: 6047.007812, avg loss: 3.501452, ppl: 33.163578 +epoch: 0, batch: 14682, sum loss: 4630.074707, avg loss: 3.099113, ppl: 22.178270 +epoch: 0, batch: 14683, sum loss: 4806.955078, avg loss: 3.223981, ppl: 25.127947 +epoch: 0, batch: 14684, sum loss: 6136.483398, avg loss: 3.516609, ppl: 33.670074 +epoch: 0, batch: 14685, sum loss: 5499.566895, avg loss: 3.463203, ppl: 31.919064 +epoch: 0, batch: 14686, sum loss: 7299.936523, avg loss: 3.768682, ppl: 43.322918 +epoch: 0, batch: 14687, sum loss: 5383.772949, avg loss: 3.416100, ppl: 30.450413 +epoch: 0, batch: 14688, sum loss: 6425.146973, avg loss: 3.473053, ppl: 32.234989 +epoch: 0, batch: 14689, sum loss: 4854.097656, avg loss: 3.333858, ppl: 28.046343 +epoch: 0, batch: 14690, sum loss: 5813.958984, avg loss: 3.462751, ppl: 31.904623 +epoch: 0, batch: 14691, sum loss: 5800.174805, avg loss: 3.393900, ppl: 29.781874 +epoch: 0, batch: 14692, sum loss: 5898.731445, avg loss: 3.104596, ppl: 22.300200 +epoch: 0, batch: 14693, sum loss: 5979.231445, avg loss: 3.630377, ppl: 37.727047 +epoch: 0, batch: 14694, sum loss: 6016.715820, avg loss: 3.737091, ppl: 41.975689 +epoch: 0, batch: 14695, sum loss: 5750.153809, avg loss: 3.540735, ppl: 34.492264 +epoch: 0, batch: 14696, sum loss: 6590.696289, avg loss: 3.684011, ppl: 39.805744 +epoch: 0, batch: 14697, sum loss: 6507.105469, avg loss: 3.487195, ppl: 32.694096 +epoch: 0, batch: 14698, sum loss: 6008.010254, avg loss: 3.846357, ppl: 46.822197 +epoch: 0, batch: 14699, sum loss: 5859.791992, avg loss: 3.342722, ppl: 28.296055 +epoch: 0, batch: 14700, sum loss: 5838.487793, avg loss: 3.557884, ppl: 35.088871 +epoch: 0, batch: 14701, sum loss: 6428.609375, avg loss: 3.565507, ppl: 35.357380 +epoch: 0, batch: 14702, sum loss: 5696.123047, avg loss: 3.445930, ppl: 31.372454 +epoch: 0, batch: 14703, sum loss: 5212.447754, avg loss: 3.255745, ppl: 25.938936 +epoch: 0, batch: 14704, sum loss: 5306.145020, avg loss: 3.488590, ppl: 32.739754 +epoch: 0, batch: 14705, sum loss: 5865.654785, avg loss: 3.557098, ppl: 35.061306 +epoch: 0, batch: 14706, sum loss: 5563.849609, avg loss: 3.223551, ppl: 25.117161 +epoch: 0, batch: 14707, sum loss: 7069.277832, avg loss: 3.833665, ppl: 46.231663 +epoch: 0, batch: 14708, sum loss: 4976.177246, avg loss: 3.135588, ppl: 23.002150 +epoch: 0, batch: 14709, sum loss: 5180.673828, avg loss: 3.266503, ppl: 26.219492 +epoch: 0, batch: 14710, sum loss: 5516.836426, avg loss: 3.450179, ppl: 31.506035 +epoch: 0, batch: 14711, sum loss: 5625.512207, avg loss: 3.276361, ppl: 26.479246 +epoch: 0, batch: 14712, sum loss: 6141.598633, avg loss: 3.519541, ppl: 33.768917 +epoch: 0, batch: 14713, sum loss: 6013.120117, avg loss: 3.232860, ppl: 25.352068 +epoch: 0, batch: 14714, sum loss: 6753.078613, avg loss: 3.722756, ppl: 41.378262 +epoch: 0, batch: 14715, sum loss: 6147.940918, avg loss: 3.849681, ppl: 46.978092 +epoch: 0, batch: 14716, sum loss: 6253.136719, avg loss: 3.512998, ppl: 33.548698 +epoch: 0, batch: 14717, sum loss: 6405.714844, avg loss: 3.735111, ppl: 41.892654 +epoch: 0, batch: 14718, sum loss: 6778.292969, avg loss: 3.650131, ppl: 38.479698 +epoch: 0, batch: 14719, sum loss: 6061.525879, avg loss: 3.653723, ppl: 38.618156 +epoch: 0, batch: 14720, sum loss: 6393.377930, avg loss: 3.622310, ppl: 37.423931 +epoch: 0, batch: 14721, sum loss: 5274.234863, avg loss: 3.619928, ppl: 37.334866 +epoch: 0, batch: 14722, sum loss: 6502.503906, avg loss: 3.305798, ppl: 27.270283 +epoch: 0, batch: 14723, sum loss: 6004.540039, avg loss: 3.371443, ppl: 29.120518 +epoch: 0, batch: 14724, sum loss: 5460.889160, avg loss: 3.555266, ppl: 34.997143 +epoch: 0, batch: 14725, sum loss: 5402.096191, avg loss: 3.336687, ppl: 28.125793 +epoch: 0, batch: 14726, sum loss: 5618.633789, avg loss: 3.238406, ppl: 25.493046 +epoch: 0, batch: 14727, sum loss: 5344.964355, avg loss: 3.461765, ppl: 31.873177 +epoch: 0, batch: 14728, sum loss: 4984.934570, avg loss: 3.203686, ppl: 24.623112 +epoch: 0, batch: 14729, sum loss: 5195.504883, avg loss: 3.393537, ppl: 29.771069 +epoch: 0, batch: 14730, sum loss: 6794.739258, avg loss: 3.678798, ppl: 39.598766 +epoch: 0, batch: 14731, sum loss: 6212.286621, avg loss: 3.555974, ppl: 35.021915 +epoch: 0, batch: 14732, sum loss: 5364.826660, avg loss: 3.730756, ppl: 41.710621 +epoch: 0, batch: 14733, sum loss: 5351.101074, avg loss: 3.151414, ppl: 23.369089 +epoch: 0, batch: 14734, sum loss: 6160.650391, avg loss: 3.713472, ppl: 40.995903 +epoch: 0, batch: 14735, sum loss: 5151.835938, avg loss: 3.464584, ppl: 31.963148 +epoch: 0, batch: 14736, sum loss: 5220.470703, avg loss: 3.262794, ppl: 26.122421 +epoch: 0, batch: 14737, sum loss: 5655.171387, avg loss: 3.352206, ppl: 28.565680 +epoch: 0, batch: 14738, sum loss: 5365.698242, avg loss: 3.468454, ppl: 32.087101 +epoch: 0, batch: 14739, sum loss: 5611.293945, avg loss: 3.440401, ppl: 31.199469 +epoch: 0, batch: 14740, sum loss: 5286.096680, avg loss: 3.227165, ppl: 25.208090 +epoch: 0, batch: 14741, sum loss: 5640.654785, avg loss: 3.503512, ppl: 33.231964 +epoch: 0, batch: 14742, sum loss: 5692.624512, avg loss: 3.392506, ppl: 29.740377 +epoch: 0, batch: 14743, sum loss: 6380.496094, avg loss: 3.475216, ppl: 32.304794 +epoch: 0, batch: 14744, sum loss: 6454.813477, avg loss: 3.669593, ppl: 39.235928 +epoch: 0, batch: 14745, sum loss: 5647.463867, avg loss: 3.316185, ppl: 27.555040 +epoch: 0, batch: 14746, sum loss: 5846.988281, avg loss: 3.587109, ppl: 36.129486 +epoch: 0, batch: 14747, sum loss: 5616.542969, avg loss: 3.460593, ppl: 31.835857 +epoch: 0, batch: 14748, sum loss: 5465.856445, avg loss: 3.261251, ppl: 26.082146 +epoch: 0, batch: 14749, sum loss: 5373.302246, avg loss: 3.448846, ppl: 31.464066 +epoch: 0, batch: 14750, sum loss: 6022.092285, avg loss: 3.383198, ppl: 29.464849 +epoch: 0, batch: 14751, sum loss: 5770.460938, avg loss: 3.212952, ppl: 24.852333 +epoch: 0, batch: 14752, sum loss: 7822.083984, avg loss: 3.693146, ppl: 40.171036 +epoch: 0, batch: 14753, sum loss: 6013.799316, avg loss: 3.448279, ppl: 31.446239 +epoch: 0, batch: 14754, sum loss: 5316.851562, avg loss: 3.230165, ppl: 25.283829 +epoch: 0, batch: 14755, sum loss: 6682.612305, avg loss: 3.583170, ppl: 35.987446 +epoch: 0, batch: 14756, sum loss: 6467.793945, avg loss: 3.637680, ppl: 38.003551 +epoch: 0, batch: 14757, sum loss: 6134.287109, avg loss: 3.455936, ppl: 31.687941 +epoch: 0, batch: 14758, sum loss: 5959.381836, avg loss: 3.355508, ppl: 28.660156 +epoch: 0, batch: 14759, sum loss: 6442.680176, avg loss: 3.583248, ppl: 35.990246 +epoch: 0, batch: 14760, sum loss: 6050.565430, avg loss: 3.431971, ppl: 30.937571 +epoch: 0, batch: 14761, sum loss: 5240.806152, avg loss: 3.321170, ppl: 27.692728 +epoch: 0, batch: 14762, sum loss: 6095.338867, avg loss: 3.390066, ppl: 29.667908 +epoch: 0, batch: 14763, sum loss: 6610.791504, avg loss: 3.739136, ppl: 42.061623 +epoch: 0, batch: 14764, sum loss: 6027.526367, avg loss: 3.528996, ppl: 34.089741 +epoch: 0, batch: 14765, sum loss: 5552.808594, avg loss: 3.446809, ppl: 31.400028 +epoch: 0, batch: 14766, sum loss: 5725.844238, avg loss: 3.556425, ppl: 35.037708 +epoch: 0, batch: 14767, sum loss: 6163.812988, avg loss: 3.464763, ppl: 31.968872 +epoch: 0, batch: 14768, sum loss: 5946.099609, avg loss: 3.417299, ppl: 30.486944 +epoch: 0, batch: 14769, sum loss: 5133.504395, avg loss: 3.572376, ppl: 35.601089 +epoch: 0, batch: 14770, sum loss: 5254.686035, avg loss: 3.304834, ppl: 27.244015 +epoch: 0, batch: 14771, sum loss: 4996.070801, avg loss: 3.107009, ppl: 22.354088 +epoch: 0, batch: 14772, sum loss: 5927.876953, avg loss: 3.530600, ppl: 34.144436 +epoch: 0, batch: 14773, sum loss: 5605.433105, avg loss: 3.372704, ppl: 29.157249 +epoch: 0, batch: 14774, sum loss: 4919.241211, avg loss: 3.219399, ppl: 25.013077 +epoch: 0, batch: 14775, sum loss: 4978.144531, avg loss: 3.279410, ppl: 26.560101 +epoch: 0, batch: 14776, sum loss: 6206.081055, avg loss: 3.449739, ppl: 31.492178 +epoch: 0, batch: 14777, sum loss: 5689.285156, avg loss: 3.338782, ppl: 28.184786 +epoch: 0, batch: 14778, sum loss: 6266.794922, avg loss: 3.508844, ppl: 33.409622 +epoch: 0, batch: 14779, sum loss: 4270.783691, avg loss: 3.260140, ppl: 26.053188 +epoch: 0, batch: 14780, sum loss: 6247.324707, avg loss: 3.638512, ppl: 38.035194 +epoch: 0, batch: 14781, sum loss: 5436.767090, avg loss: 3.485107, ppl: 32.625923 +epoch: 0, batch: 14782, sum loss: 5502.984375, avg loss: 3.534351, ppl: 34.272758 +epoch: 0, batch: 14783, sum loss: 5854.249023, avg loss: 3.535174, ppl: 34.300995 +epoch: 0, batch: 14784, sum loss: 5506.631836, avg loss: 3.610906, ppl: 36.999565 +epoch: 0, batch: 14785, sum loss: 5721.248047, avg loss: 3.520768, ppl: 33.810383 +epoch: 0, batch: 14786, sum loss: 6685.024902, avg loss: 3.514735, ppl: 33.607014 +epoch: 0, batch: 14787, sum loss: 5138.101074, avg loss: 3.314904, ppl: 27.519751 +epoch: 0, batch: 14788, sum loss: 6040.637207, avg loss: 3.549140, ppl: 34.783405 +epoch: 0, batch: 14789, sum loss: 4075.112549, avg loss: 3.159002, ppl: 23.547085 +epoch: 0, batch: 14790, sum loss: 6202.356445, avg loss: 3.426717, ppl: 30.775427 +epoch: 0, batch: 14791, sum loss: 5474.597168, avg loss: 3.413091, ppl: 30.358931 +epoch: 0, batch: 14792, sum loss: 6659.582031, avg loss: 3.726683, ppl: 41.541084 +epoch: 0, batch: 14793, sum loss: 5860.060547, avg loss: 3.387318, ppl: 29.586498 +epoch: 0, batch: 14794, sum loss: 5205.028320, avg loss: 3.442479, ppl: 31.264370 +epoch: 0, batch: 14795, sum loss: 5257.243652, avg loss: 3.370028, ppl: 29.079342 +epoch: 0, batch: 14796, sum loss: 5843.999023, avg loss: 3.441696, ppl: 31.239887 +epoch: 0, batch: 14797, sum loss: 5874.329590, avg loss: 3.360601, ppl: 28.806490 +epoch: 0, batch: 14798, sum loss: 7231.546875, avg loss: 3.643097, ppl: 38.209976 +epoch: 0, batch: 14799, sum loss: 5090.279297, avg loss: 3.307524, ppl: 27.317404 +epoch: 0, batch: 14800, sum loss: 5514.562012, avg loss: 3.226777, ppl: 25.198315 +epoch: 0, batch: 14801, sum loss: 5732.527832, avg loss: 3.428545, ppl: 30.831764 +epoch: 0, batch: 14802, sum loss: 6524.199219, avg loss: 3.626570, ppl: 37.583675 +epoch: 0, batch: 14803, sum loss: 6609.200195, avg loss: 3.794030, ppl: 44.435112 +epoch: 0, batch: 14804, sum loss: 6904.208984, avg loss: 3.622355, ppl: 37.425602 +epoch: 0, batch: 14805, sum loss: 6672.753906, avg loss: 3.484467, ppl: 32.605038 +epoch: 0, batch: 14806, sum loss: 6292.959961, avg loss: 3.469107, ppl: 32.108063 +epoch: 0, batch: 14807, sum loss: 6286.929199, avg loss: 3.731115, ppl: 41.725620 +epoch: 0, batch: 14808, sum loss: 5354.484375, avg loss: 3.247110, ppl: 25.715910 +epoch: 0, batch: 14809, sum loss: 5812.805176, avg loss: 3.415279, ppl: 30.425442 +epoch: 0, batch: 14810, sum loss: 6110.839844, avg loss: 3.408165, ppl: 30.209764 +epoch: 0, batch: 14811, sum loss: 6055.859375, avg loss: 3.417528, ppl: 30.493938 +epoch: 0, batch: 14812, sum loss: 5794.097656, avg loss: 3.428460, ppl: 30.829134 +epoch: 0, batch: 14813, sum loss: 5802.746582, avg loss: 3.413381, ppl: 30.367733 +epoch: 0, batch: 14814, sum loss: 6563.743164, avg loss: 3.662803, ppl: 38.970432 +epoch: 0, batch: 14815, sum loss: 5281.867676, avg loss: 3.351439, ppl: 28.543772 +epoch: 0, batch: 14816, sum loss: 6172.769043, avg loss: 3.572204, ppl: 35.594971 +epoch: 0, batch: 14817, sum loss: 7203.491699, avg loss: 3.505349, ppl: 33.293049 +epoch: 0, batch: 14818, sum loss: 5127.068359, avg loss: 3.216480, ppl: 24.940165 +epoch: 0, batch: 14819, sum loss: 5130.381348, avg loss: 3.373032, ppl: 29.166822 +epoch: 0, batch: 14820, sum loss: 5388.354980, avg loss: 3.554324, ppl: 34.964172 +epoch: 0, batch: 14821, sum loss: 5832.919434, avg loss: 3.451432, ppl: 31.545517 +epoch: 0, batch: 14822, sum loss: 5436.380371, avg loss: 3.296774, ppl: 27.025312 +epoch: 0, batch: 14823, sum loss: 5769.396973, avg loss: 3.432122, ppl: 30.942225 +epoch: 0, batch: 14824, sum loss: 3984.435059, avg loss: 3.048535, ppl: 21.084431 +epoch: 0, batch: 14825, sum loss: 5979.541992, avg loss: 3.589161, ppl: 36.203678 +epoch: 0, batch: 14826, sum loss: 6054.554199, avg loss: 3.540675, ppl: 34.490189 +epoch: 0, batch: 14827, sum loss: 6794.017578, avg loss: 3.789190, ppl: 44.220570 +epoch: 0, batch: 14828, sum loss: 5908.689941, avg loss: 3.506641, ppl: 33.336102 +epoch: 0, batch: 14829, sum loss: 5792.325684, avg loss: 3.593254, ppl: 36.352177 +epoch: 0, batch: 14830, sum loss: 5231.193848, avg loss: 3.359791, ppl: 28.783161 +epoch: 0, batch: 14831, sum loss: 7605.041504, avg loss: 3.715213, ppl: 41.067345 +epoch: 0, batch: 14832, sum loss: 5452.213379, avg loss: 3.258944, ppl: 26.022045 +epoch: 0, batch: 14833, sum loss: 6148.911621, avg loss: 3.495686, ppl: 32.972900 +epoch: 0, batch: 14834, sum loss: 4864.038086, avg loss: 3.352197, ppl: 28.565428 +epoch: 0, batch: 14835, sum loss: 6241.941895, avg loss: 3.354080, ppl: 28.619255 +epoch: 0, batch: 14836, sum loss: 5513.853516, avg loss: 3.333648, ppl: 28.040445 +epoch: 0, batch: 14837, sum loss: 6261.591797, avg loss: 3.569893, ppl: 35.512779 +epoch: 0, batch: 14838, sum loss: 6306.881836, avg loss: 3.455826, ppl: 31.684435 +epoch: 0, batch: 14839, sum loss: 5224.634277, avg loss: 3.435000, ppl: 31.031403 +epoch: 0, batch: 14840, sum loss: 5233.392578, avg loss: 3.331249, ppl: 27.973272 +epoch: 0, batch: 14841, sum loss: 6092.816895, avg loss: 3.542335, ppl: 34.547504 +epoch: 0, batch: 14842, sum loss: 5512.058594, avg loss: 3.410927, ppl: 30.293322 +epoch: 0, batch: 14843, sum loss: 5382.232422, avg loss: 3.497227, ppl: 33.023750 +epoch: 0, batch: 14844, sum loss: 4944.473633, avg loss: 3.238031, ppl: 25.483500 +epoch: 0, batch: 14845, sum loss: 5071.395508, avg loss: 3.327687, ppl: 27.873802 +epoch: 0, batch: 14846, sum loss: 5523.743652, avg loss: 3.243537, ppl: 25.624199 +epoch: 0, batch: 14847, sum loss: 6437.168945, avg loss: 3.337050, ppl: 28.135996 +epoch: 0, batch: 14848, sum loss: 5411.106445, avg loss: 3.181133, ppl: 24.074003 +epoch: 0, batch: 14849, sum loss: 5300.897949, avg loss: 3.292483, ppl: 26.909599 +epoch: 0, batch: 14850, sum loss: 5230.920898, avg loss: 3.355305, ppl: 28.654354 +epoch: 0, batch: 14851, sum loss: 6170.670898, avg loss: 3.608579, ppl: 36.913574 +epoch: 0, batch: 14852, sum loss: 5717.923828, avg loss: 3.446609, ppl: 31.393749 +epoch: 0, batch: 14853, sum loss: 5425.479004, avg loss: 3.346995, ppl: 28.417208 +epoch: 0, batch: 14854, sum loss: 5855.640137, avg loss: 3.619061, ppl: 37.302505 +epoch: 0, batch: 14855, sum loss: 5099.825195, avg loss: 3.406697, ppl: 30.165438 +epoch: 0, batch: 14856, sum loss: 5254.288086, avg loss: 3.519282, ppl: 33.760174 +epoch: 0, batch: 14857, sum loss: 5286.423828, avg loss: 3.461967, ppl: 31.879629 +epoch: 0, batch: 14858, sum loss: 7333.213867, avg loss: 3.635704, ppl: 37.928528 +epoch: 0, batch: 14859, sum loss: 5565.543457, avg loss: 3.320730, ppl: 27.680555 +epoch: 0, batch: 14860, sum loss: 6542.478027, avg loss: 3.819310, ppl: 45.572750 +epoch: 0, batch: 14861, sum loss: 5494.976074, avg loss: 3.396153, ppl: 29.849056 +epoch: 0, batch: 14862, sum loss: 4796.720703, avg loss: 3.161978, ppl: 23.617266 +epoch: 0, batch: 14863, sum loss: 5794.270020, avg loss: 3.707147, ppl: 40.737404 +epoch: 0, batch: 14864, sum loss: 6806.343262, avg loss: 3.608878, ppl: 36.924595 +epoch: 0, batch: 14865, sum loss: 5152.404785, avg loss: 3.172663, ppl: 23.870968 +epoch: 0, batch: 14866, sum loss: 6670.020508, avg loss: 3.464946, ppl: 31.974718 +epoch: 0, batch: 14867, sum loss: 6450.358887, avg loss: 3.709235, ppl: 40.822556 +epoch: 0, batch: 14868, sum loss: 6491.343262, avg loss: 3.866196, ppl: 47.760357 +epoch: 0, batch: 14869, sum loss: 5259.397949, avg loss: 3.590033, ppl: 36.235256 +epoch: 0, batch: 14870, sum loss: 6198.389160, avg loss: 3.494019, ppl: 32.917965 +epoch: 0, batch: 14871, sum loss: 6694.054688, avg loss: 3.649975, ppl: 38.473705 +epoch: 0, batch: 14872, sum loss: 5668.334961, avg loss: 3.603519, ppl: 36.727242 +epoch: 0, batch: 14873, sum loss: 6912.027344, avg loss: 3.506863, ppl: 33.343510 +epoch: 0, batch: 14874, sum loss: 5654.696777, avg loss: 3.479813, ppl: 32.453663 +epoch: 0, batch: 14875, sum loss: 6024.216309, avg loss: 3.492300, ppl: 32.861427 +epoch: 0, batch: 14876, sum loss: 5397.733887, avg loss: 3.457869, ppl: 31.749256 +epoch: 0, batch: 14877, sum loss: 6412.744629, avg loss: 3.696106, ppl: 40.290127 +epoch: 0, batch: 14878, sum loss: 5058.632812, avg loss: 3.259429, ppl: 26.034655 +epoch: 0, batch: 14879, sum loss: 6175.069824, avg loss: 3.579751, ppl: 35.864601 +epoch: 0, batch: 14880, sum loss: 6866.799805, avg loss: 3.646734, ppl: 38.349205 +epoch: 0, batch: 14881, sum loss: 6955.062988, avg loss: 3.552126, ppl: 34.887424 +epoch: 0, batch: 14882, sum loss: 6236.182617, avg loss: 3.439704, ppl: 31.177719 +epoch: 0, batch: 14883, sum loss: 5400.273438, avg loss: 3.595388, ppl: 36.429848 +epoch: 0, batch: 14884, sum loss: 5584.432617, avg loss: 3.333990, ppl: 28.050034 +epoch: 0, batch: 14885, sum loss: 5415.508789, avg loss: 3.359497, ppl: 28.774708 +epoch: 0, batch: 14886, sum loss: 5949.853516, avg loss: 3.463244, ppl: 31.920364 +epoch: 0, batch: 14887, sum loss: 5652.706055, avg loss: 3.532941, ppl: 34.224476 +epoch: 0, batch: 14888, sum loss: 5839.970703, avg loss: 3.526552, ppl: 34.006519 +epoch: 0, batch: 14889, sum loss: 6013.974121, avg loss: 3.494465, ppl: 32.932659 +epoch: 0, batch: 14890, sum loss: 5858.874023, avg loss: 3.502017, ppl: 33.182304 +epoch: 0, batch: 14891, sum loss: 6110.774414, avg loss: 3.448518, ppl: 31.453753 +epoch: 0, batch: 14892, sum loss: 6017.713379, avg loss: 3.358099, ppl: 28.734522 +epoch: 0, batch: 14893, sum loss: 6284.863281, avg loss: 3.285344, ppl: 26.718178 +epoch: 0, batch: 14894, sum loss: 5446.886719, avg loss: 3.291170, ppl: 26.874298 +epoch: 0, batch: 14895, sum loss: 6223.253418, avg loss: 3.406269, ppl: 30.152536 +epoch: 0, batch: 14896, sum loss: 5496.567383, avg loss: 3.271766, ppl: 26.357857 +epoch: 0, batch: 14897, sum loss: 6315.315430, avg loss: 3.625324, ppl: 37.536900 +epoch: 0, batch: 14898, sum loss: 5854.574219, avg loss: 3.355057, ppl: 28.647230 +epoch: 0, batch: 14899, sum loss: 5728.943359, avg loss: 3.461597, ppl: 31.867844 +epoch: 0, batch: 14900, sum loss: 6545.109863, avg loss: 3.516986, ppl: 33.682743 +epoch: 0, batch: 14901, sum loss: 5063.092773, avg loss: 3.294140, ppl: 26.954220 +epoch: 0, batch: 14902, sum loss: 5719.717285, avg loss: 3.327352, ppl: 27.864447 +epoch: 0, batch: 14903, sum loss: 5838.581543, avg loss: 3.586352, ppl: 36.102146 +epoch: 0, batch: 14904, sum loss: 6496.720703, avg loss: 3.643702, ppl: 38.233124 +epoch: 0, batch: 14905, sum loss: 4738.992676, avg loss: 3.022317, ppl: 20.538818 +epoch: 0, batch: 14906, sum loss: 6882.252930, avg loss: 3.484685, ppl: 32.612152 +epoch: 0, batch: 14907, sum loss: 4747.931641, avg loss: 3.178000, ppl: 23.998701 +epoch: 0, batch: 14908, sum loss: 6295.133789, avg loss: 3.609595, ppl: 36.951077 +epoch: 0, batch: 14909, sum loss: 5013.095703, avg loss: 3.304611, ppl: 27.237957 +epoch: 0, batch: 14910, sum loss: 4840.173828, avg loss: 3.328868, ppl: 27.906731 +epoch: 0, batch: 14911, sum loss: 5223.151367, avg loss: 3.331091, ppl: 27.968851 +epoch: 0, batch: 14912, sum loss: 6485.078613, avg loss: 3.750768, ppl: 42.553768 +epoch: 0, batch: 14913, sum loss: 5392.105469, avg loss: 3.326407, ppl: 27.838131 +epoch: 0, batch: 14914, sum loss: 6048.756836, avg loss: 3.539355, ppl: 34.444679 +epoch: 0, batch: 14915, sum loss: 6358.700195, avg loss: 3.568294, ppl: 35.456062 +epoch: 0, batch: 14916, sum loss: 5260.400879, avg loss: 3.456243, ppl: 31.697651 +epoch: 0, batch: 14917, sum loss: 6477.167969, avg loss: 3.495503, ppl: 32.966881 +epoch: 0, batch: 14918, sum loss: 6346.629883, avg loss: 3.529827, ppl: 34.118076 +epoch: 0, batch: 14919, sum loss: 6791.847168, avg loss: 3.603102, ppl: 36.711937 +epoch: 0, batch: 14920, sum loss: 7065.650879, avg loss: 3.550578, ppl: 34.833458 +epoch: 0, batch: 14921, sum loss: 5485.451172, avg loss: 3.441312, ppl: 31.227898 +epoch: 0, batch: 14922, sum loss: 5619.208008, avg loss: 3.420090, ppl: 30.572165 +epoch: 0, batch: 14923, sum loss: 5376.801758, avg loss: 3.642820, ppl: 38.199394 +epoch: 0, batch: 14924, sum loss: 6093.358887, avg loss: 3.548840, ppl: 34.772968 +epoch: 0, batch: 14925, sum loss: 6060.656738, avg loss: 3.244463, ppl: 25.647932 +epoch: 0, batch: 14926, sum loss: 5518.529297, avg loss: 3.669235, ppl: 39.221878 +epoch: 0, batch: 14927, sum loss: 4761.503906, avg loss: 3.099937, ppl: 22.196562 +epoch: 0, batch: 14928, sum loss: 6089.697266, avg loss: 3.344150, ppl: 28.336481 +epoch: 0, batch: 14929, sum loss: 4465.895996, avg loss: 3.286163, ppl: 26.740074 +epoch: 0, batch: 14930, sum loss: 5100.155273, avg loss: 3.309640, ppl: 27.375267 +epoch: 0, batch: 14931, sum loss: 6326.817383, avg loss: 3.689106, ppl: 40.009071 +epoch: 0, batch: 14932, sum loss: 6337.467285, avg loss: 3.652719, ppl: 38.579422 +epoch: 0, batch: 14933, sum loss: 6861.482422, avg loss: 3.520514, ppl: 33.801815 +epoch: 0, batch: 14934, sum loss: 5132.257324, avg loss: 3.321850, ppl: 27.711557 +epoch: 0, batch: 14935, sum loss: 5655.220703, avg loss: 3.324645, ppl: 27.789120 +epoch: 0, batch: 14936, sum loss: 5546.747070, avg loss: 3.432393, ppl: 30.950613 +epoch: 0, batch: 14937, sum loss: 4928.177734, avg loss: 3.263694, ppl: 26.145937 +epoch: 0, batch: 14938, sum loss: 5899.543945, avg loss: 3.468280, ppl: 32.081501 +epoch: 0, batch: 14939, sum loss: 4538.921875, avg loss: 3.064768, ppl: 21.429497 +epoch: 0, batch: 14940, sum loss: 6157.753418, avg loss: 3.547093, ppl: 34.712257 +epoch: 0, batch: 14941, sum loss: 4835.851562, avg loss: 3.142204, ppl: 23.154839 +epoch: 0, batch: 14942, sum loss: 6225.873535, avg loss: 3.468453, ppl: 32.087078 +epoch: 0, batch: 14943, sum loss: 5929.775879, avg loss: 3.512901, ppl: 33.545429 +epoch: 0, batch: 14944, sum loss: 5964.821777, avg loss: 3.561088, ppl: 35.201469 +epoch: 0, batch: 14945, sum loss: 4884.960938, avg loss: 3.222270, ppl: 25.084993 +epoch: 0, batch: 14946, sum loss: 5522.599121, avg loss: 3.345003, ppl: 28.360651 +epoch: 0, batch: 14947, sum loss: 6284.513672, avg loss: 3.495280, ppl: 32.959515 +epoch: 0, batch: 14948, sum loss: 5739.087891, avg loss: 3.438639, ppl: 31.144531 +epoch: 0, batch: 14949, sum loss: 4606.201660, avg loss: 3.170132, ppl: 23.810625 +epoch: 0, batch: 14950, sum loss: 4896.813477, avg loss: 3.072028, ppl: 21.585642 +epoch: 0, batch: 14951, sum loss: 6136.676758, avg loss: 3.447571, ppl: 31.423979 +epoch: 0, batch: 14952, sum loss: 4952.170898, avg loss: 3.378016, ppl: 29.312550 +epoch: 0, batch: 14953, sum loss: 6197.532715, avg loss: 3.607411, ppl: 36.870487 +epoch: 0, batch: 14954, sum loss: 5097.585938, avg loss: 3.251011, ppl: 25.816437 +epoch: 0, batch: 14955, sum loss: 7197.131836, avg loss: 3.534937, ppl: 34.292858 +epoch: 0, batch: 14956, sum loss: 7530.264648, avg loss: 3.984267, ppl: 53.745880 +epoch: 0, batch: 14957, sum loss: 4910.185059, avg loss: 3.293216, ppl: 26.929335 +epoch: 0, batch: 14958, sum loss: 6125.414062, avg loss: 3.530498, ppl: 34.140968 +epoch: 0, batch: 14959, sum loss: 4432.050293, avg loss: 3.097170, ppl: 22.135221 +epoch: 0, batch: 14960, sum loss: 6500.333496, avg loss: 3.555981, ppl: 35.022167 +epoch: 0, batch: 14961, sum loss: 5989.248047, avg loss: 3.470016, ppl: 32.137264 +epoch: 0, batch: 14962, sum loss: 6670.781250, avg loss: 3.481619, ppl: 32.512306 +epoch: 0, batch: 14963, sum loss: 6446.846191, avg loss: 3.466046, ppl: 32.009933 +epoch: 0, batch: 14964, sum loss: 5812.627441, avg loss: 3.552951, ppl: 34.916199 +epoch: 0, batch: 14965, sum loss: 5872.219727, avg loss: 3.402213, ppl: 30.030479 +epoch: 0, batch: 14966, sum loss: 6087.642090, avg loss: 3.583074, ppl: 35.983982 +epoch: 0, batch: 14967, sum loss: 6336.679199, avg loss: 3.485522, ppl: 32.639462 +epoch: 0, batch: 14968, sum loss: 6169.633789, avg loss: 3.578674, ppl: 35.826008 +epoch: 0, batch: 14969, sum loss: 5022.582031, avg loss: 3.330625, ppl: 27.955797 +epoch: 0, batch: 14970, sum loss: 6810.219727, avg loss: 3.766714, ppl: 43.237766 +epoch: 0, batch: 14971, sum loss: 6249.711914, avg loss: 3.604217, ppl: 36.752880 +epoch: 0, batch: 14972, sum loss: 4917.723633, avg loss: 3.267591, ppl: 26.248018 +epoch: 0, batch: 14973, sum loss: 6300.985352, avg loss: 3.531943, ppl: 34.190323 +epoch: 0, batch: 14974, sum loss: 5161.966309, avg loss: 3.184433, ppl: 24.153595 +epoch: 0, batch: 14975, sum loss: 5318.463867, avg loss: 3.387557, ppl: 29.593554 +epoch: 0, batch: 14976, sum loss: 4737.682617, avg loss: 3.139617, ppl: 23.095030 +epoch: 0, batch: 14977, sum loss: 5811.384277, avg loss: 3.402450, ppl: 30.037590 +epoch: 0, batch: 14978, sum loss: 5725.388672, avg loss: 3.538559, ppl: 34.417294 +epoch: 0, batch: 14979, sum loss: 5980.864258, avg loss: 3.455150, ppl: 31.663027 +epoch: 0, batch: 14980, sum loss: 5451.715820, avg loss: 3.210669, ppl: 24.795670 +epoch: 0, batch: 14981, sum loss: 6869.803223, avg loss: 3.608090, ppl: 36.895519 +epoch: 0, batch: 14982, sum loss: 6232.409180, avg loss: 3.707560, ppl: 40.754257 +epoch: 0, batch: 14983, sum loss: 4911.426758, avg loss: 3.256914, ppl: 25.969276 +epoch: 0, batch: 14984, sum loss: 4877.749512, avg loss: 3.403873, ppl: 30.080368 +epoch: 0, batch: 14985, sum loss: 6468.119141, avg loss: 3.397122, ppl: 29.877977 +epoch: 0, batch: 14986, sum loss: 6424.074219, avg loss: 3.242844, ppl: 25.606440 +epoch: 0, batch: 14987, sum loss: 6988.744141, avg loss: 3.773620, ppl: 43.537380 +epoch: 0, batch: 14988, sum loss: 5103.673828, avg loss: 3.169984, ppl: 23.807095 +epoch: 0, batch: 14989, sum loss: 5415.866699, avg loss: 3.262570, ppl: 26.116581 +epoch: 0, batch: 14990, sum loss: 5908.930664, avg loss: 3.546777, ppl: 34.701294 +epoch: 0, batch: 14991, sum loss: 6718.964844, avg loss: 3.819764, ppl: 45.593452 +epoch: 0, batch: 14992, sum loss: 5139.208984, avg loss: 3.387745, ppl: 29.599129 +epoch: 0, batch: 14993, sum loss: 5619.532715, avg loss: 3.625505, ppl: 37.543686 +epoch: 0, batch: 14994, sum loss: 5241.928711, avg loss: 3.307210, ppl: 27.308840 +epoch: 0, batch: 14995, sum loss: 6336.855469, avg loss: 3.506837, ppl: 33.342651 +epoch: 0, batch: 14996, sum loss: 6271.940918, avg loss: 3.425419, ppl: 30.735508 +epoch: 0, batch: 14997, sum loss: 5870.922852, avg loss: 3.423279, ppl: 30.669804 +epoch: 0, batch: 14998, sum loss: 7035.118652, avg loss: 3.609604, ppl: 36.951431 +epoch: 0, batch: 14999, sum loss: 7251.687988, avg loss: 3.570501, ppl: 35.534405 +epoch: 0, batch: 15000, sum loss: 5958.068848, avg loss: 3.461981, ppl: 31.880054 +epoch: 0, batch: 15001, sum loss: 7276.462402, avg loss: 3.614736, ppl: 37.141525 +epoch: 0, batch: 15002, sum loss: 6022.245117, avg loss: 3.606135, ppl: 36.823452 +epoch: 0, batch: 15003, sum loss: 5291.716309, avg loss: 3.264476, ppl: 26.166405 +epoch: 0, batch: 15004, sum loss: 4850.130371, avg loss: 3.067761, ppl: 21.493723 +epoch: 0, batch: 15005, sum loss: 6347.764648, avg loss: 3.480134, ppl: 32.464081 +epoch: 0, batch: 15006, sum loss: 5453.975098, avg loss: 3.419420, ppl: 30.551697 +epoch: 0, batch: 15007, sum loss: 5838.828125, avg loss: 3.568966, ppl: 35.479885 +epoch: 0, batch: 15008, sum loss: 4643.199707, avg loss: 3.391673, ppl: 29.715614 +epoch: 0, batch: 15009, sum loss: 5865.895020, avg loss: 3.616458, ppl: 37.205551 +epoch: 0, batch: 15010, sum loss: 5866.592285, avg loss: 3.434773, ppl: 31.024368 +epoch: 0, batch: 15011, sum loss: 4737.284668, avg loss: 3.002082, ppl: 20.127392 +epoch: 0, batch: 15012, sum loss: 5696.402344, avg loss: 3.362693, ppl: 28.866827 +epoch: 0, batch: 15013, sum loss: 6890.955566, avg loss: 3.411364, ppl: 30.306564 +epoch: 0, batch: 15014, sum loss: 6101.207520, avg loss: 3.484413, ppl: 32.603294 +epoch: 0, batch: 15015, sum loss: 6493.035156, avg loss: 3.810467, ppl: 45.171520 +epoch: 0, batch: 15016, sum loss: 4888.020996, avg loss: 3.364089, ppl: 28.907145 +epoch: 0, batch: 15017, sum loss: 6240.884766, avg loss: 3.574390, ppl: 35.672859 +epoch: 0, batch: 15018, sum loss: 5562.324219, avg loss: 3.169415, ppl: 23.793571 +epoch: 0, batch: 15019, sum loss: 6619.913086, avg loss: 3.588029, ppl: 36.162716 +epoch: 0, batch: 15020, sum loss: 6492.791992, avg loss: 3.365885, ppl: 28.959108 +epoch: 0, batch: 15021, sum loss: 6013.821289, avg loss: 3.376654, ppl: 29.272673 +epoch: 0, batch: 15022, sum loss: 5800.062500, avg loss: 3.586928, ppl: 36.122940 +epoch: 0, batch: 15023, sum loss: 6041.002930, avg loss: 3.553531, ppl: 34.936474 +epoch: 0, batch: 15024, sum loss: 4668.968262, avg loss: 3.073712, ppl: 21.622007 +epoch: 0, batch: 15025, sum loss: 5914.584473, avg loss: 3.452764, ppl: 31.587574 +epoch: 0, batch: 15026, sum loss: 4010.761230, avg loss: 2.914797, ppl: 18.445074 +epoch: 0, batch: 15027, sum loss: 6006.686523, avg loss: 3.516795, ppl: 33.676327 +epoch: 0, batch: 15028, sum loss: 5528.441406, avg loss: 3.499014, ppl: 33.082806 +epoch: 0, batch: 15029, sum loss: 6257.942383, avg loss: 3.486319, ppl: 32.665485 +epoch: 0, batch: 15030, sum loss: 6009.472168, avg loss: 3.418357, ppl: 30.519236 +epoch: 0, batch: 15031, sum loss: 6100.622559, avg loss: 3.344640, ppl: 28.350355 +epoch: 0, batch: 15032, sum loss: 6277.043945, avg loss: 3.694552, ppl: 40.227554 +epoch: 0, batch: 15033, sum loss: 5661.310547, avg loss: 3.318470, ppl: 27.618069 +epoch: 0, batch: 15034, sum loss: 6228.726074, avg loss: 3.531024, ppl: 34.158928 +epoch: 0, batch: 15035, sum loss: 4815.577148, avg loss: 3.234102, ppl: 25.383554 +epoch: 0, batch: 15036, sum loss: 5746.325684, avg loss: 3.564718, ppl: 35.329498 +epoch: 0, batch: 15037, sum loss: 6042.571289, avg loss: 3.466765, ppl: 32.032948 +epoch: 0, batch: 15038, sum loss: 5975.054688, avg loss: 3.647774, ppl: 38.389126 +epoch: 0, batch: 15039, sum loss: 6303.589355, avg loss: 3.654255, ppl: 38.638721 +epoch: 0, batch: 15040, sum loss: 5133.098145, avg loss: 3.246741, ppl: 25.706434 +epoch: 0, batch: 15041, sum loss: 5755.642578, avg loss: 3.541934, ppl: 34.533634 +epoch: 0, batch: 15042, sum loss: 6759.921875, avg loss: 3.580467, ppl: 35.890297 +epoch: 0, batch: 15043, sum loss: 5803.829590, avg loss: 3.519606, ppl: 33.771107 +epoch: 0, batch: 15044, sum loss: 6111.983398, avg loss: 3.547292, ppl: 34.719154 +epoch: 0, batch: 15045, sum loss: 5287.376953, avg loss: 3.306677, ppl: 27.294279 +epoch: 0, batch: 15046, sum loss: 7080.031738, avg loss: 3.672216, ppl: 39.338982 +epoch: 0, batch: 15047, sum loss: 5371.514648, avg loss: 3.214551, ppl: 24.892118 +epoch: 0, batch: 15048, sum loss: 5900.435547, avg loss: 3.548067, ppl: 34.746090 +epoch: 0, batch: 15049, sum loss: 5283.617188, avg loss: 3.433150, ppl: 30.974045 +epoch: 0, batch: 15050, sum loss: 6231.916504, avg loss: 3.530831, ppl: 34.152332 +epoch: 0, batch: 15051, sum loss: 6843.238281, avg loss: 3.669297, ppl: 39.224312 +epoch: 0, batch: 15052, sum loss: 5432.720703, avg loss: 3.129447, ppl: 22.861343 +epoch: 0, batch: 15053, sum loss: 6869.662109, avg loss: 3.695353, ppl: 40.259792 +epoch: 0, batch: 15054, sum loss: 6561.099609, avg loss: 3.792543, ppl: 44.369083 +epoch: 0, batch: 15055, sum loss: 6431.506836, avg loss: 3.452231, ppl: 31.570753 +epoch: 0, batch: 15056, sum loss: 5537.531250, avg loss: 3.399344, ppl: 29.944448 +epoch: 0, batch: 15057, sum loss: 5698.025879, avg loss: 3.405873, ppl: 30.140606 +epoch: 0, batch: 15058, sum loss: 5320.492188, avg loss: 3.093309, ppl: 22.049929 +epoch: 0, batch: 15059, sum loss: 5034.383301, avg loss: 3.345105, ppl: 28.363565 +epoch: 0, batch: 15060, sum loss: 6622.477539, avg loss: 3.498403, ppl: 33.062618 +epoch: 0, batch: 15061, sum loss: 4161.742676, avg loss: 2.994060, ppl: 19.966574 +epoch: 0, batch: 15062, sum loss: 4079.368408, avg loss: 3.191994, ppl: 24.336906 +epoch: 0, batch: 15063, sum loss: 4954.700684, avg loss: 3.153852, ppl: 23.426123 +epoch: 0, batch: 15064, sum loss: 6441.243652, avg loss: 3.590437, ppl: 36.249912 +epoch: 0, batch: 15065, sum loss: 5458.442871, avg loss: 3.417936, ppl: 30.506372 +epoch: 0, batch: 15066, sum loss: 4417.313965, avg loss: 3.128409, ppl: 22.837626 +epoch: 0, batch: 15067, sum loss: 5694.344727, avg loss: 3.383449, ppl: 29.472240 +epoch: 0, batch: 15068, sum loss: 5482.033203, avg loss: 3.245727, ppl: 25.680380 +epoch: 0, batch: 15069, sum loss: 6660.476562, avg loss: 3.685930, ppl: 39.882206 +epoch: 0, batch: 15070, sum loss: 5302.731445, avg loss: 3.251215, ppl: 25.821693 +epoch: 0, batch: 15071, sum loss: 5495.085938, avg loss: 3.451687, ppl: 31.553581 +epoch: 0, batch: 15072, sum loss: 5201.815918, avg loss: 3.234960, ppl: 25.405357 +epoch: 0, batch: 15073, sum loss: 5646.801270, avg loss: 3.399640, ppl: 29.953302 +epoch: 0, batch: 15074, sum loss: 6924.639648, avg loss: 3.808933, ppl: 45.102280 +epoch: 0, batch: 15075, sum loss: 6223.178711, avg loss: 3.374826, ppl: 29.219191 +epoch: 0, batch: 15076, sum loss: 6813.336426, avg loss: 3.584080, ppl: 36.020210 +epoch: 0, batch: 15077, sum loss: 4946.999023, avg loss: 3.246062, ppl: 25.688984 +epoch: 0, batch: 15078, sum loss: 6554.850098, avg loss: 3.663974, ppl: 39.016098 +epoch: 0, batch: 15079, sum loss: 5660.897949, avg loss: 3.391790, ppl: 29.719114 +epoch: 0, batch: 15080, sum loss: 5655.843262, avg loss: 3.193587, ppl: 24.375713 +epoch: 0, batch: 15081, sum loss: 5222.444336, avg loss: 3.481630, ppl: 32.512661 +epoch: 0, batch: 15082, sum loss: 6680.068359, avg loss: 3.521386, ppl: 33.831276 +epoch: 0, batch: 15083, sum loss: 4722.292480, avg loss: 3.286216, ppl: 26.741484 +epoch: 0, batch: 15084, sum loss: 4637.943848, avg loss: 3.148638, ppl: 23.304308 +epoch: 0, batch: 15085, sum loss: 5486.707520, avg loss: 3.424911, ppl: 30.719912 +epoch: 0, batch: 15086, sum loss: 6062.137695, avg loss: 3.481986, ppl: 32.524254 +epoch: 0, batch: 15087, sum loss: 6035.727539, avg loss: 3.542094, ppl: 34.539162 +epoch: 0, batch: 15088, sum loss: 6285.829590, avg loss: 3.392245, ppl: 29.732622 +epoch: 0, batch: 15089, sum loss: 5194.283691, avg loss: 3.419542, ppl: 30.555405 +epoch: 0, batch: 15090, sum loss: 6103.076172, avg loss: 3.432551, ppl: 30.955521 +epoch: 0, batch: 15091, sum loss: 5299.839355, avg loss: 3.324868, ppl: 27.795322 +epoch: 0, batch: 15092, sum loss: 6119.966309, avg loss: 3.438183, ppl: 31.130352 +epoch: 0, batch: 15093, sum loss: 6119.305176, avg loss: 3.418606, ppl: 30.526833 +epoch: 0, batch: 15094, sum loss: 5459.112793, avg loss: 3.437729, ppl: 31.116201 +epoch: 0, batch: 15095, sum loss: 6454.264648, avg loss: 3.623955, ppl: 37.485550 +epoch: 0, batch: 15096, sum loss: 5552.114746, avg loss: 3.393713, ppl: 29.776314 +epoch: 0, batch: 15097, sum loss: 5555.101562, avg loss: 3.372861, ppl: 29.161831 +epoch: 0, batch: 15098, sum loss: 6951.094238, avg loss: 3.812998, ppl: 45.285984 +epoch: 0, batch: 15099, sum loss: 4699.499023, avg loss: 3.243271, ppl: 25.617371 +epoch: 0, batch: 15100, sum loss: 4937.910645, avg loss: 3.601686, ppl: 36.659973 +epoch: 0, batch: 15101, sum loss: 5708.958496, avg loss: 3.451607, ppl: 31.551069 +epoch: 0, batch: 15102, sum loss: 4549.232422, avg loss: 3.446388, ppl: 31.386826 +epoch: 0, batch: 15103, sum loss: 6129.307129, avg loss: 3.462885, ppl: 31.908913 +epoch: 0, batch: 15104, sum loss: 5744.400391, avg loss: 3.265719, ppl: 26.198952 +epoch: 0, batch: 15105, sum loss: 5678.921875, avg loss: 3.332701, ppl: 28.013897 +epoch: 0, batch: 15106, sum loss: 6043.864746, avg loss: 3.324458, ppl: 27.783939 +epoch: 0, batch: 15107, sum loss: 5167.915039, avg loss: 3.287478, ppl: 26.775244 +epoch: 0, batch: 15108, sum loss: 5763.990723, avg loss: 3.657354, ppl: 38.758667 +epoch: 0, batch: 15109, sum loss: 5205.125977, avg loss: 3.239033, ppl: 25.509043 +epoch: 0, batch: 15110, sum loss: 6734.589355, avg loss: 3.906375, ppl: 49.718380 +epoch: 0, batch: 15111, sum loss: 5691.350586, avg loss: 3.098177, ppl: 22.157520 +epoch: 0, batch: 15112, sum loss: 6738.030273, avg loss: 3.384244, ppl: 29.495691 +epoch: 0, batch: 15113, sum loss: 5781.893555, avg loss: 3.480971, ppl: 32.491268 +epoch: 0, batch: 15114, sum loss: 5542.165039, avg loss: 3.324634, ppl: 27.788828 +epoch: 0, batch: 15115, sum loss: 5793.494629, avg loss: 3.438276, ppl: 31.133232 +epoch: 0, batch: 15116, sum loss: 5531.312500, avg loss: 3.410180, ppl: 30.270702 +epoch: 0, batch: 15117, sum loss: 5867.955566, avg loss: 3.224152, ppl: 25.132244 +epoch: 0, batch: 15118, sum loss: 6438.121094, avg loss: 3.639413, ppl: 38.069477 +epoch: 0, batch: 15119, sum loss: 5931.973633, avg loss: 3.554209, ppl: 34.960140 +epoch: 0, batch: 15120, sum loss: 5186.319824, avg loss: 3.223319, ppl: 25.111317 +epoch: 0, batch: 15121, sum loss: 5291.221191, avg loss: 3.336205, ppl: 28.112238 +epoch: 0, batch: 15122, sum loss: 4757.083008, avg loss: 3.123495, ppl: 22.725670 +epoch: 0, batch: 15123, sum loss: 5686.341797, avg loss: 3.529697, ppl: 34.113636 +epoch: 0, batch: 15124, sum loss: 5197.218750, avg loss: 3.460199, ppl: 31.823305 +epoch: 0, batch: 15125, sum loss: 7474.407227, avg loss: 3.807645, ppl: 45.044239 +epoch: 0, batch: 15126, sum loss: 5642.874023, avg loss: 3.533422, ppl: 34.240925 +epoch: 0, batch: 15127, sum loss: 5962.024414, avg loss: 3.460258, ppl: 31.825186 +epoch: 0, batch: 15128, sum loss: 5767.312012, avg loss: 3.376646, ppl: 29.272434 +epoch: 0, batch: 15129, sum loss: 6127.663086, avg loss: 3.686921, ppl: 39.921753 +epoch: 0, batch: 15130, sum loss: 6080.477539, avg loss: 3.656331, ppl: 38.719006 +epoch: 0, batch: 15131, sum loss: 5101.640625, avg loss: 3.496670, ppl: 33.005344 +epoch: 0, batch: 15132, sum loss: 6411.225098, avg loss: 3.439498, ppl: 31.171320 +epoch: 0, batch: 15133, sum loss: 5227.946777, avg loss: 3.383785, ppl: 29.482136 +epoch: 0, batch: 15134, sum loss: 7013.608887, avg loss: 3.517357, ppl: 33.695240 +epoch: 0, batch: 15135, sum loss: 6029.122070, avg loss: 3.213818, ppl: 24.873869 +epoch: 0, batch: 15136, sum loss: 6823.754883, avg loss: 3.637396, ppl: 37.992779 +epoch: 0, batch: 15137, sum loss: 6224.569336, avg loss: 3.465796, ppl: 32.001923 +epoch: 0, batch: 15138, sum loss: 6234.848145, avg loss: 3.465730, ppl: 31.999809 +epoch: 0, batch: 15139, sum loss: 5373.788086, avg loss: 3.356520, ppl: 28.689177 +epoch: 0, batch: 15140, sum loss: 5763.240723, avg loss: 3.230516, ppl: 25.292709 +epoch: 0, batch: 15141, sum loss: 5989.261719, avg loss: 3.379945, ppl: 29.369143 +epoch: 0, batch: 15142, sum loss: 5439.063477, avg loss: 3.224104, ppl: 25.131050 +epoch: 0, batch: 15143, sum loss: 5774.320801, avg loss: 3.573218, ppl: 35.631081 +epoch: 0, batch: 15144, sum loss: 5956.085938, avg loss: 3.620721, ppl: 37.364494 +epoch: 0, batch: 15145, sum loss: 5426.143555, avg loss: 3.548819, ppl: 34.772221 +epoch: 0, batch: 15146, sum loss: 4922.634277, avg loss: 3.227957, ppl: 25.228064 +epoch: 0, batch: 15147, sum loss: 5298.609375, avg loss: 3.334556, ppl: 28.065916 +epoch: 0, batch: 15148, sum loss: 5900.602051, avg loss: 3.651363, ppl: 38.527130 +epoch: 0, batch: 15149, sum loss: 6237.190430, avg loss: 3.619960, ppl: 37.336060 +epoch: 0, batch: 15150, sum loss: 5648.277344, avg loss: 3.803554, ppl: 44.860317 +epoch: 0, batch: 15151, sum loss: 6132.999023, avg loss: 3.375344, ppl: 29.234325 +epoch: 0, batch: 15152, sum loss: 6774.168457, avg loss: 3.509932, ppl: 33.445995 +epoch: 0, batch: 15153, sum loss: 6055.002930, avg loss: 3.352715, ppl: 28.580217 +epoch: 0, batch: 15154, sum loss: 5314.892578, avg loss: 3.338500, ppl: 28.176838 +epoch: 0, batch: 15155, sum loss: 5452.225586, avg loss: 3.422615, ppl: 30.649460 +epoch: 0, batch: 15156, sum loss: 6283.238281, avg loss: 3.570022, ppl: 35.517361 +epoch: 0, batch: 15157, sum loss: 5825.123047, avg loss: 3.394594, ppl: 29.802549 +epoch: 0, batch: 15158, sum loss: 5500.134766, avg loss: 3.422611, ppl: 30.649321 +epoch: 0, batch: 15159, sum loss: 6146.482910, avg loss: 3.367936, ppl: 29.018568 +epoch: 0, batch: 15160, sum loss: 4852.438477, avg loss: 3.219933, ppl: 25.026432 +epoch: 0, batch: 15161, sum loss: 5305.401367, avg loss: 3.305546, ppl: 27.263418 +epoch: 0, batch: 15162, sum loss: 5254.058594, avg loss: 3.449809, ppl: 31.494371 +epoch: 0, batch: 15163, sum loss: 5882.102051, avg loss: 3.443854, ppl: 31.307373 +epoch: 0, batch: 15164, sum loss: 6367.517578, avg loss: 3.405090, ppl: 30.116995 +epoch: 0, batch: 15165, sum loss: 5911.939453, avg loss: 3.697273, ppl: 40.337147 +epoch: 0, batch: 15166, sum loss: 5692.694824, avg loss: 3.551275, ppl: 34.857750 +epoch: 0, batch: 15167, sum loss: 3702.989014, avg loss: 2.924952, ppl: 18.633327 +epoch: 0, batch: 15168, sum loss: 4710.789062, avg loss: 3.312791, ppl: 27.461668 +epoch: 0, batch: 15169, sum loss: 5459.714355, avg loss: 3.221070, ppl: 25.054922 +epoch: 0, batch: 15170, sum loss: 5619.294922, avg loss: 3.387158, ppl: 29.581758 +epoch: 0, batch: 15171, sum loss: 5748.857422, avg loss: 3.577385, ppl: 35.779854 +epoch: 0, batch: 15172, sum loss: 4736.027344, avg loss: 3.270737, ppl: 26.330742 +epoch: 0, batch: 15173, sum loss: 6534.417969, avg loss: 3.486882, ppl: 32.683895 +epoch: 0, batch: 15174, sum loss: 5517.317871, avg loss: 3.414182, ppl: 30.392069 +epoch: 0, batch: 15175, sum loss: 5026.262695, avg loss: 3.445005, ppl: 31.343452 +epoch: 0, batch: 15176, sum loss: 5532.202148, avg loss: 3.671003, ppl: 39.291309 +epoch: 0, batch: 15177, sum loss: 6845.270020, avg loss: 3.583911, ppl: 36.014114 +epoch: 0, batch: 15178, sum loss: 5030.458984, avg loss: 3.232943, ppl: 25.354160 +epoch: 0, batch: 15179, sum loss: 6680.393555, avg loss: 3.676606, ppl: 39.512077 +epoch: 0, batch: 15180, sum loss: 5916.255859, avg loss: 3.353887, ppl: 28.613728 +epoch: 0, batch: 15181, sum loss: 6238.309082, avg loss: 3.431413, ppl: 30.920307 +epoch: 0, batch: 15182, sum loss: 5984.826660, avg loss: 3.522558, ppl: 33.870975 +epoch: 0, batch: 15183, sum loss: 6015.475586, avg loss: 3.619420, ppl: 37.315910 +epoch: 0, batch: 15184, sum loss: 5079.875977, avg loss: 3.266801, ppl: 26.227314 +epoch: 0, batch: 15185, sum loss: 6855.340820, avg loss: 3.591064, ppl: 36.272648 +epoch: 0, batch: 15186, sum loss: 5231.585449, avg loss: 3.571048, ppl: 35.553833 +epoch: 0, batch: 15187, sum loss: 6928.331055, avg loss: 3.644572, ppl: 38.266384 +epoch: 0, batch: 15188, sum loss: 6108.660645, avg loss: 3.508708, ppl: 33.405079 +epoch: 0, batch: 15189, sum loss: 6297.335938, avg loss: 3.619159, ppl: 37.306160 +epoch: 0, batch: 15190, sum loss: 5906.563477, avg loss: 3.398483, ppl: 29.918680 +epoch: 0, batch: 15191, sum loss: 5066.410156, avg loss: 3.458300, ppl: 31.762945 +epoch: 0, batch: 15192, sum loss: 4922.708008, avg loss: 3.306050, ppl: 27.277157 +epoch: 0, batch: 15193, sum loss: 6726.896484, avg loss: 3.589592, ppl: 36.219311 +epoch: 0, batch: 15194, sum loss: 5746.630371, avg loss: 3.480697, ppl: 32.482353 +epoch: 0, batch: 15195, sum loss: 5230.476562, avg loss: 3.312525, ppl: 27.454355 +epoch: 0, batch: 15196, sum loss: 5812.925293, avg loss: 3.260194, ppl: 26.054586 +epoch: 0, batch: 15197, sum loss: 6771.150879, avg loss: 3.619001, ppl: 37.300293 +epoch: 0, batch: 15198, sum loss: 6244.341797, avg loss: 3.662371, ppl: 38.953579 +epoch: 0, batch: 15199, sum loss: 5734.289062, avg loss: 3.615567, ppl: 37.172417 +epoch: 0, batch: 15200, sum loss: 5931.100586, avg loss: 3.555816, ppl: 35.016373 +epoch: 0, batch: 15201, sum loss: 5023.527832, avg loss: 3.193597, ppl: 24.375946 +epoch: 0, batch: 15202, sum loss: 6418.855957, avg loss: 3.585953, ppl: 36.087734 +epoch: 0, batch: 15203, sum loss: 5313.921875, avg loss: 3.486825, ppl: 32.682034 +epoch: 0, batch: 15204, sum loss: 5501.020508, avg loss: 3.433846, ppl: 30.995607 +epoch: 0, batch: 15205, sum loss: 6288.020996, avg loss: 3.756285, ppl: 42.789177 +epoch: 0, batch: 15206, sum loss: 5836.031738, avg loss: 3.432960, ppl: 30.968174 +epoch: 0, batch: 15207, sum loss: 6162.762207, avg loss: 3.878390, ppl: 48.346329 +epoch: 0, batch: 15208, sum loss: 6111.413086, avg loss: 3.532609, ppl: 34.213104 +epoch: 0, batch: 15209, sum loss: 6031.251465, avg loss: 3.600747, ppl: 36.625595 +epoch: 0, batch: 15210, sum loss: 5336.782227, avg loss: 3.325098, ppl: 27.801718 +epoch: 0, batch: 15211, sum loss: 6797.795898, avg loss: 3.698474, ppl: 40.385635 +epoch: 0, batch: 15212, sum loss: 5458.525391, avg loss: 3.334469, ppl: 28.063473 +epoch: 0, batch: 15213, sum loss: 5154.095703, avg loss: 3.282864, ppl: 26.651985 +epoch: 0, batch: 15214, sum loss: 6123.840332, avg loss: 3.509364, ppl: 33.426998 +epoch: 0, batch: 15215, sum loss: 5432.873535, avg loss: 3.507343, ppl: 33.359501 +epoch: 0, batch: 15216, sum loss: 4710.729492, avg loss: 3.176487, ppl: 23.962420 +epoch: 0, batch: 15217, sum loss: 6142.380859, avg loss: 3.443039, ppl: 31.281870 +epoch: 0, batch: 15218, sum loss: 6144.450195, avg loss: 3.396601, ppl: 29.862417 +epoch: 0, batch: 15219, sum loss: 5943.696777, avg loss: 3.604425, ppl: 36.760540 +epoch: 0, batch: 15220, sum loss: 6287.010254, avg loss: 3.526086, ppl: 33.990654 +epoch: 0, batch: 15221, sum loss: 6392.061523, avg loss: 3.585004, ppl: 36.053497 +epoch: 0, batch: 15222, sum loss: 5538.711914, avg loss: 3.442332, ppl: 31.259764 +epoch: 0, batch: 15223, sum loss: 4921.044434, avg loss: 3.087230, ppl: 21.916285 +epoch: 0, batch: 15224, sum loss: 5058.102051, avg loss: 3.240296, ppl: 25.541285 +epoch: 0, batch: 15225, sum loss: 6118.892578, avg loss: 3.524708, ppl: 33.943855 +epoch: 0, batch: 15226, sum loss: 5516.307617, avg loss: 3.384238, ppl: 29.495502 +epoch: 0, batch: 15227, sum loss: 5398.774414, avg loss: 3.299984, ppl: 27.112217 +epoch: 0, batch: 15228, sum loss: 5495.942383, avg loss: 3.532097, ppl: 34.195587 +epoch: 0, batch: 15229, sum loss: 5713.792969, avg loss: 3.250167, ppl: 25.794638 +epoch: 0, batch: 15230, sum loss: 6634.375000, avg loss: 3.605639, ppl: 36.805176 +epoch: 0, batch: 15231, sum loss: 5137.509277, avg loss: 3.251588, ppl: 25.831329 +epoch: 0, batch: 15232, sum loss: 6000.653809, avg loss: 3.517382, ppl: 33.696095 +epoch: 0, batch: 15233, sum loss: 5161.966797, avg loss: 3.562434, ppl: 35.248894 +epoch: 0, batch: 15234, sum loss: 6425.762695, avg loss: 3.350241, ppl: 28.509609 +epoch: 0, batch: 15235, sum loss: 5907.174805, avg loss: 3.530888, ppl: 34.154270 +epoch: 0, batch: 15236, sum loss: 6267.847656, avg loss: 3.585725, ppl: 36.079517 +epoch: 0, batch: 15237, sum loss: 5377.908203, avg loss: 3.646039, ppl: 38.322586 +epoch: 0, batch: 15238, sum loss: 6532.905762, avg loss: 3.438372, ppl: 31.136217 +epoch: 0, batch: 15239, sum loss: 4902.877441, avg loss: 3.527250, ppl: 34.030251 +epoch: 0, batch: 15240, sum loss: 6755.371582, avg loss: 3.651552, ppl: 38.534431 +epoch: 0, batch: 15241, sum loss: 4382.205566, avg loss: 3.173212, ppl: 23.884073 +epoch: 0, batch: 15242, sum loss: 5335.816406, avg loss: 3.387820, ppl: 29.601343 +epoch: 0, batch: 15243, sum loss: 5676.620117, avg loss: 3.700535, ppl: 40.468933 +epoch: 0, batch: 15244, sum loss: 6065.462891, avg loss: 3.649496, ppl: 38.455292 +epoch: 0, batch: 15245, sum loss: 6350.501953, avg loss: 3.475918, ppl: 32.327484 +epoch: 0, batch: 15246, sum loss: 6478.551270, avg loss: 3.565521, ppl: 35.357861 +epoch: 0, batch: 15247, sum loss: 4920.597656, avg loss: 3.050588, ppl: 21.127756 +epoch: 0, batch: 15248, sum loss: 6299.181641, avg loss: 3.603651, ppl: 36.732101 +epoch: 0, batch: 15249, sum loss: 6711.610840, avg loss: 3.706025, ppl: 40.691738 +epoch: 0, batch: 15250, sum loss: 6443.392578, avg loss: 3.619884, ppl: 37.333221 +epoch: 0, batch: 15251, sum loss: 5806.844727, avg loss: 3.536446, ppl: 34.344654 +epoch: 0, batch: 15252, sum loss: 7400.490234, avg loss: 3.899099, ppl: 49.357975 +epoch: 0, batch: 15253, sum loss: 5071.741211, avg loss: 3.240729, ppl: 25.552345 +epoch: 0, batch: 15254, sum loss: 5189.124023, avg loss: 3.382740, ppl: 29.451372 +epoch: 0, batch: 15255, sum loss: 6745.429688, avg loss: 3.557716, ppl: 35.082989 +epoch: 0, batch: 15256, sum loss: 5655.150391, avg loss: 3.380245, ppl: 29.377981 +epoch: 0, batch: 15257, sum loss: 6776.047852, avg loss: 3.590910, ppl: 36.267071 +epoch: 0, batch: 15258, sum loss: 5872.265137, avg loss: 3.485024, ppl: 32.623203 +epoch: 0, batch: 15259, sum loss: 5925.273438, avg loss: 3.537477, ppl: 34.380070 +epoch: 0, batch: 15260, sum loss: 5714.022949, avg loss: 3.351333, ppl: 28.540756 +epoch: 0, batch: 15261, sum loss: 5171.310059, avg loss: 3.122772, ppl: 22.709236 +epoch: 0, batch: 15262, sum loss: 5729.207031, avg loss: 3.487040, ppl: 32.689045 +epoch: 0, batch: 15263, sum loss: 5163.966309, avg loss: 3.370735, ppl: 29.099913 +epoch: 0, batch: 15264, sum loss: 7134.826172, avg loss: 3.547900, ppl: 34.740284 +epoch: 0, batch: 15265, sum loss: 5193.964355, avg loss: 3.346627, ppl: 28.406750 +epoch: 0, batch: 15266, sum loss: 5658.601562, avg loss: 3.301401, ppl: 27.150654 +epoch: 0, batch: 15267, sum loss: 7393.812500, avg loss: 3.704315, ppl: 40.622208 +epoch: 0, batch: 15268, sum loss: 7111.977539, avg loss: 3.727451, ppl: 41.573017 +epoch: 0, batch: 15269, sum loss: 5689.082031, avg loss: 3.400527, ppl: 29.979902 +epoch: 0, batch: 15270, sum loss: 5504.363281, avg loss: 3.497054, ppl: 33.018040 +epoch: 0, batch: 15271, sum loss: 5527.425781, avg loss: 3.360137, ppl: 28.793135 +epoch: 0, batch: 15272, sum loss: 4420.688477, avg loss: 3.361740, ppl: 28.839331 +epoch: 0, batch: 15273, sum loss: 5377.191406, avg loss: 3.652983, ppl: 38.589622 +epoch: 0, batch: 15274, sum loss: 5963.541992, avg loss: 3.376864, ppl: 29.278807 +epoch: 0, batch: 15275, sum loss: 6444.189453, avg loss: 3.433239, ppl: 30.976814 +epoch: 0, batch: 15276, sum loss: 6766.803223, avg loss: 3.677610, ppl: 39.551769 +epoch: 0, batch: 15277, sum loss: 4626.912598, avg loss: 3.333510, ppl: 28.036589 +epoch: 0, batch: 15278, sum loss: 5393.044922, avg loss: 3.278447, ppl: 26.534523 +epoch: 0, batch: 15279, sum loss: 5661.588867, avg loss: 3.441695, ppl: 31.239872 +epoch: 0, batch: 15280, sum loss: 5602.173340, avg loss: 3.426406, ppl: 30.765862 +epoch: 0, batch: 15281, sum loss: 6355.660156, avg loss: 3.511415, ppl: 33.495625 +epoch: 0, batch: 15282, sum loss: 5239.574707, avg loss: 3.369502, ppl: 29.064037 +epoch: 0, batch: 15283, sum loss: 5368.077148, avg loss: 3.363457, ppl: 28.888880 +epoch: 0, batch: 15284, sum loss: 5658.802246, avg loss: 3.376374, ppl: 29.264460 +epoch: 0, batch: 15285, sum loss: 5138.821289, avg loss: 3.510124, ppl: 33.452408 +epoch: 0, batch: 15286, sum loss: 5581.115723, avg loss: 3.252398, ppl: 25.852272 +epoch: 0, batch: 15287, sum loss: 5960.544434, avg loss: 3.463419, ppl: 31.925951 +epoch: 0, batch: 15288, sum loss: 5504.819336, avg loss: 3.296299, ppl: 27.012480 +epoch: 0, batch: 15289, sum loss: 6902.357910, avg loss: 3.726975, ppl: 41.553219 +epoch: 0, batch: 15290, sum loss: 6186.035156, avg loss: 3.647426, ppl: 38.375774 +epoch: 0, batch: 15291, sum loss: 5458.443848, avg loss: 3.162482, ppl: 23.629166 +epoch: 0, batch: 15292, sum loss: 5318.745605, avg loss: 3.362039, ppl: 28.847948 +epoch: 0, batch: 15293, sum loss: 5794.616211, avg loss: 3.587998, ppl: 36.161594 +epoch: 0, batch: 15294, sum loss: 6270.037109, avg loss: 3.491112, ppl: 32.822426 +epoch: 0, batch: 15295, sum loss: 4700.062012, avg loss: 3.014793, ppl: 20.384869 +epoch: 0, batch: 15296, sum loss: 5465.009277, avg loss: 3.354825, ppl: 28.640579 +epoch: 0, batch: 15297, sum loss: 5928.123047, avg loss: 3.343555, ppl: 28.319630 +epoch: 0, batch: 15298, sum loss: 5675.953125, avg loss: 3.398774, ppl: 29.927399 +epoch: 0, batch: 15299, sum loss: 6572.079102, avg loss: 3.585422, ppl: 36.068584 +epoch: 0, batch: 15300, sum loss: 4628.518555, avg loss: 3.280311, ppl: 26.584036 +epoch: 0, batch: 15301, sum loss: 5885.014160, avg loss: 3.364788, ppl: 28.927366 +epoch: 0, batch: 15302, sum loss: 4796.399414, avg loss: 3.051145, ppl: 21.139526 +epoch: 0, batch: 15303, sum loss: 5879.934082, avg loss: 3.348482, ppl: 28.459496 +epoch: 0, batch: 15304, sum loss: 5576.267090, avg loss: 3.504882, ppl: 33.277512 +epoch: 0, batch: 15305, sum loss: 5391.139160, avg loss: 3.160105, ppl: 23.573071 +epoch: 0, batch: 15306, sum loss: 6274.982910, avg loss: 3.404766, ppl: 30.107239 +epoch: 0, batch: 15307, sum loss: 6170.240234, avg loss: 3.493907, ppl: 32.914299 +epoch: 0, batch: 15308, sum loss: 7540.894531, avg loss: 3.567121, ppl: 35.414497 +epoch: 0, batch: 15309, sum loss: 7140.942383, avg loss: 3.798373, ppl: 44.628536 +epoch: 0, batch: 15310, sum loss: 6642.270020, avg loss: 3.702492, ppl: 40.548214 +epoch: 0, batch: 15311, sum loss: 4690.228516, avg loss: 3.091779, ppl: 22.016205 +epoch: 0, batch: 15312, sum loss: 7045.161621, avg loss: 3.515550, ppl: 33.634411 +epoch: 0, batch: 15313, sum loss: 6621.238770, avg loss: 3.658143, ppl: 38.789257 +epoch: 0, batch: 15314, sum loss: 5901.428223, avg loss: 3.417156, ppl: 30.482599 +epoch: 0, batch: 15315, sum loss: 6351.038574, avg loss: 3.447904, ppl: 31.434433 +epoch: 0, batch: 15316, sum loss: 5232.369629, avg loss: 3.408710, ppl: 30.226221 +epoch: 0, batch: 15317, sum loss: 6614.856445, avg loss: 3.463276, ppl: 31.921370 +epoch: 0, batch: 15318, sum loss: 5511.419434, avg loss: 3.340254, ppl: 28.226303 +epoch: 0, batch: 15319, sum loss: 6367.166016, avg loss: 3.521663, ppl: 33.840649 +epoch: 0, batch: 15320, sum loss: 5053.113770, avg loss: 3.270624, ppl: 26.327761 +epoch: 0, batch: 15321, sum loss: 5592.530273, avg loss: 3.544062, ppl: 34.607212 +epoch: 0, batch: 15322, sum loss: 6887.418945, avg loss: 3.796813, ppl: 44.558960 +epoch: 0, batch: 15323, sum loss: 5610.862793, avg loss: 3.329889, ppl: 27.935236 +epoch: 0, batch: 15324, sum loss: 5892.270020, avg loss: 3.425738, ppl: 30.745337 +epoch: 0, batch: 15325, sum loss: 4208.000977, avg loss: 3.051487, ppl: 21.146770 +epoch: 0, batch: 15326, sum loss: 5730.499512, avg loss: 3.479356, ppl: 32.438828 +epoch: 0, batch: 15327, sum loss: 5643.777344, avg loss: 3.565241, ppl: 35.347984 +epoch: 0, batch: 15328, sum loss: 6017.921875, avg loss: 3.394203, ppl: 29.790907 +epoch: 0, batch: 15329, sum loss: 5314.699219, avg loss: 3.494214, ppl: 32.924400 +epoch: 0, batch: 15330, sum loss: 5779.629395, avg loss: 3.733611, ppl: 41.829880 +epoch: 0, batch: 15331, sum loss: 5493.597656, avg loss: 3.505806, ppl: 33.308277 +epoch: 0, batch: 15332, sum loss: 6049.068848, avg loss: 3.566668, ppl: 35.398449 +epoch: 0, batch: 15333, sum loss: 6162.821289, avg loss: 3.431415, ppl: 30.920366 +epoch: 0, batch: 15334, sum loss: 5321.362793, avg loss: 3.315491, ppl: 27.535904 +epoch: 0, batch: 15335, sum loss: 5878.414551, avg loss: 3.254936, ppl: 25.917961 +epoch: 0, batch: 15336, sum loss: 7221.795898, avg loss: 3.667748, ppl: 39.163620 +epoch: 0, batch: 15337, sum loss: 5751.685059, avg loss: 3.469050, ppl: 32.106232 +epoch: 0, batch: 15338, sum loss: 5474.061035, avg loss: 3.297627, ppl: 27.048382 +epoch: 0, batch: 15339, sum loss: 5052.614746, avg loss: 3.302363, ppl: 27.176773 +epoch: 0, batch: 15340, sum loss: 4268.992188, avg loss: 3.214603, ppl: 24.893400 +epoch: 0, batch: 15341, sum loss: 6353.269043, avg loss: 3.430491, ppl: 30.891798 +epoch: 0, batch: 15342, sum loss: 6016.486328, avg loss: 3.412641, ppl: 30.345282 +epoch: 0, batch: 15343, sum loss: 5774.613281, avg loss: 3.242343, ppl: 25.593628 +epoch: 0, batch: 15344, sum loss: 5737.964355, avg loss: 3.391232, ppl: 29.702517 +epoch: 0, batch: 15345, sum loss: 6484.301758, avg loss: 3.620492, ppl: 37.355949 +epoch: 0, batch: 15346, sum loss: 6413.240234, avg loss: 3.409484, ppl: 30.249643 +epoch: 0, batch: 15347, sum loss: 6764.110352, avg loss: 3.650356, ppl: 38.488377 +epoch: 0, batch: 15348, sum loss: 6005.202148, avg loss: 3.413986, ppl: 30.386135 +epoch: 0, batch: 15349, sum loss: 5234.682617, avg loss: 3.327834, ppl: 27.877884 +epoch: 0, batch: 15350, sum loss: 6112.784180, avg loss: 3.451600, ppl: 31.550842 +epoch: 0, batch: 15351, sum loss: 4484.152344, avg loss: 3.216752, ppl: 24.946962 +epoch: 0, batch: 15352, sum loss: 6441.141602, avg loss: 3.596394, ppl: 36.466492 +epoch: 0, batch: 15353, sum loss: 5956.691895, avg loss: 3.322193, ppl: 27.721073 +epoch: 0, batch: 15354, sum loss: 4451.764160, avg loss: 3.173032, ppl: 23.879786 +epoch: 0, batch: 15355, sum loss: 5608.330078, avg loss: 3.481273, ppl: 32.501060 +epoch: 0, batch: 15356, sum loss: 5336.250488, avg loss: 3.251828, ppl: 25.837540 +epoch: 0, batch: 15357, sum loss: 6068.830566, avg loss: 3.382849, ppl: 29.454559 +epoch: 0, batch: 15358, sum loss: 5006.259766, avg loss: 3.410259, ppl: 30.273069 +epoch: 0, batch: 15359, sum loss: 6366.309570, avg loss: 3.465601, ppl: 31.995697 +epoch: 0, batch: 15360, sum loss: 5945.825195, avg loss: 3.321690, ppl: 27.707132 +epoch: 0, batch: 15361, sum loss: 6113.077148, avg loss: 3.503196, ppl: 33.221458 +epoch: 0, batch: 15362, sum loss: 5888.181152, avg loss: 3.429343, ppl: 30.856348 +epoch: 0, batch: 15363, sum loss: 5819.286621, avg loss: 3.334835, ppl: 28.073738 +epoch: 0, batch: 15364, sum loss: 4734.196777, avg loss: 3.046459, ppl: 21.040701 +epoch: 0, batch: 15365, sum loss: 5203.359375, avg loss: 3.452793, ppl: 31.588507 +epoch: 0, batch: 15366, sum loss: 5971.593750, avg loss: 3.328648, ppl: 27.900583 +epoch: 0, batch: 15367, sum loss: 4766.569336, avg loss: 3.037966, ppl: 20.862776 +epoch: 0, batch: 15368, sum loss: 6465.100098, avg loss: 3.698570, ppl: 40.389507 +epoch: 0, batch: 15369, sum loss: 6720.835449, avg loss: 3.478693, ppl: 32.417332 +epoch: 0, batch: 15370, sum loss: 6481.983398, avg loss: 3.390159, ppl: 29.670660 +epoch: 0, batch: 15371, sum loss: 7166.231445, avg loss: 3.632150, ppl: 37.793983 +epoch: 0, batch: 15372, sum loss: 5220.309570, avg loss: 3.289420, ppl: 26.827303 +epoch: 0, batch: 15373, sum loss: 5329.680176, avg loss: 3.492582, ppl: 32.870712 +epoch: 0, batch: 15374, sum loss: 6053.980469, avg loss: 3.404938, ppl: 30.112442 +epoch: 0, batch: 15375, sum loss: 4214.216797, avg loss: 2.999443, ppl: 20.074345 +epoch: 0, batch: 15376, sum loss: 5837.670898, avg loss: 3.476874, ppl: 32.358398 +epoch: 0, batch: 15377, sum loss: 5184.251953, avg loss: 3.149606, ppl: 23.326883 +epoch: 0, batch: 15378, sum loss: 5043.215820, avg loss: 3.105428, ppl: 22.318779 +epoch: 0, batch: 15379, sum loss: 6172.486328, avg loss: 3.547406, ppl: 34.723125 +epoch: 0, batch: 15380, sum loss: 4885.196289, avg loss: 3.024889, ppl: 20.591728 +epoch: 0, batch: 15381, sum loss: 5995.659180, avg loss: 3.347660, ppl: 28.436110 +epoch: 0, batch: 15382, sum loss: 6413.344727, avg loss: 3.639810, ppl: 38.084595 +epoch: 0, batch: 15383, sum loss: 5103.714355, avg loss: 3.203838, ppl: 24.626875 +epoch: 0, batch: 15384, sum loss: 6378.562500, avg loss: 3.367773, ppl: 29.013849 +epoch: 0, batch: 15385, sum loss: 6377.995117, avg loss: 3.555181, ppl: 34.994164 +epoch: 0, batch: 15386, sum loss: 5835.256836, avg loss: 3.446697, ppl: 31.396503 +epoch: 0, batch: 15387, sum loss: 6169.637695, avg loss: 3.491589, ppl: 32.838089 +epoch: 0, batch: 15388, sum loss: 5474.448242, avg loss: 3.094657, ppl: 22.079668 +epoch: 0, batch: 15389, sum loss: 6438.545898, avg loss: 3.698188, ppl: 40.374092 +epoch: 0, batch: 15390, sum loss: 5623.633301, avg loss: 3.355390, ppl: 28.656773 +epoch: 0, batch: 15391, sum loss: 5489.545410, avg loss: 3.295045, ppl: 26.978632 +epoch: 0, batch: 15392, sum loss: 5713.467285, avg loss: 3.380750, ppl: 29.392805 +epoch: 0, batch: 15393, sum loss: 5901.162109, avg loss: 3.514688, ppl: 33.605457 +epoch: 0, batch: 15394, sum loss: 5690.267090, avg loss: 3.473911, ppl: 32.262684 +epoch: 0, batch: 15395, sum loss: 5967.630371, avg loss: 3.502131, ppl: 33.186081 +epoch: 0, batch: 15396, sum loss: 5203.854492, avg loss: 3.196471, ppl: 24.446100 +epoch: 0, batch: 15397, sum loss: 5333.526367, avg loss: 3.316869, ppl: 27.573895 +epoch: 0, batch: 15398, sum loss: 6902.646484, avg loss: 3.595129, ppl: 36.420380 +epoch: 0, batch: 15399, sum loss: 5273.504883, avg loss: 3.522715, ppl: 33.876282 +epoch: 0, batch: 15400, sum loss: 6030.291016, avg loss: 3.449823, ppl: 31.494829 +epoch: 0, batch: 15401, sum loss: 4651.262207, avg loss: 3.123749, ppl: 22.731441 +epoch: 0, batch: 15402, sum loss: 4887.335938, avg loss: 3.188086, ppl: 24.241985 +epoch: 0, batch: 15403, sum loss: 6751.455078, avg loss: 3.357263, ppl: 28.710499 +epoch: 0, batch: 15404, sum loss: 6817.719727, avg loss: 3.453758, ppl: 31.619009 +epoch: 0, batch: 15405, sum loss: 6251.731445, avg loss: 3.568340, ppl: 35.457687 +epoch: 0, batch: 15406, sum loss: 5847.666504, avg loss: 3.701055, ppl: 40.489990 +epoch: 0, batch: 15407, sum loss: 6315.329102, avg loss: 3.588255, ppl: 36.170910 +epoch: 0, batch: 15408, sum loss: 5463.737793, avg loss: 3.385216, ppl: 29.524355 +epoch: 0, batch: 15409, sum loss: 5389.505371, avg loss: 3.366337, ppl: 28.972210 +epoch: 0, batch: 15410, sum loss: 5239.882812, avg loss: 3.433737, ppl: 30.992252 +epoch: 0, batch: 15411, sum loss: 5815.902344, avg loss: 3.678622, ppl: 39.591820 +epoch: 0, batch: 15412, sum loss: 5915.839844, avg loss: 3.427485, ppl: 30.799093 +epoch: 0, batch: 15413, sum loss: 5427.723145, avg loss: 3.402961, ppl: 30.052963 +epoch: 0, batch: 15414, sum loss: 6273.021973, avg loss: 3.528134, ppl: 34.060349 +epoch: 0, batch: 15415, sum loss: 5664.864258, avg loss: 3.416685, ppl: 30.468256 +epoch: 0, batch: 15416, sum loss: 6106.202148, avg loss: 3.360596, ppl: 28.806345 +epoch: 0, batch: 15417, sum loss: 6068.494629, avg loss: 3.392115, ppl: 29.728773 +epoch: 0, batch: 15418, sum loss: 6532.234863, avg loss: 3.657466, ppl: 38.763008 +epoch: 0, batch: 15419, sum loss: 5675.182617, avg loss: 3.441590, ppl: 31.236595 +epoch: 0, batch: 15420, sum loss: 5295.125000, avg loss: 3.516019, ppl: 33.650211 +epoch: 0, batch: 15421, sum loss: 5351.265137, avg loss: 3.454658, ppl: 31.647465 +epoch: 0, batch: 15422, sum loss: 5629.279785, avg loss: 3.411685, ppl: 30.316277 +epoch: 0, batch: 15423, sum loss: 6440.050293, avg loss: 3.405632, ppl: 30.133335 +epoch: 0, batch: 15424, sum loss: 6085.722656, avg loss: 3.624611, ppl: 37.510143 +epoch: 0, batch: 15425, sum loss: 6306.217773, avg loss: 3.482175, ppl: 32.530388 +epoch: 0, batch: 15426, sum loss: 5037.355957, avg loss: 3.054794, ppl: 21.216822 +epoch: 0, batch: 15427, sum loss: 6419.523438, avg loss: 3.540829, ppl: 34.495518 +epoch: 0, batch: 15428, sum loss: 4535.446777, avg loss: 3.529531, ppl: 34.107952 +epoch: 0, batch: 15429, sum loss: 5587.802734, avg loss: 3.314237, ppl: 27.501392 +epoch: 0, batch: 15430, sum loss: 6382.938477, avg loss: 3.472763, ppl: 32.225662 +epoch: 0, batch: 15431, sum loss: 5464.252930, avg loss: 3.712128, ppl: 40.940853 +epoch: 0, batch: 15432, sum loss: 6154.026367, avg loss: 3.436084, ppl: 31.065062 +epoch: 0, batch: 15433, sum loss: 5170.224121, avg loss: 3.173864, ppl: 23.899647 +epoch: 0, batch: 15434, sum loss: 5507.317871, avg loss: 3.412217, ppl: 30.332415 +epoch: 0, batch: 15435, sum loss: 7541.020020, avg loss: 3.723960, ppl: 41.428143 +epoch: 0, batch: 15436, sum loss: 6939.989746, avg loss: 3.590269, ppl: 36.243828 +epoch: 0, batch: 15437, sum loss: 5561.010254, avg loss: 3.493097, ppl: 32.887638 +epoch: 0, batch: 15438, sum loss: 5543.301758, avg loss: 3.359577, ppl: 28.777014 +epoch: 0, batch: 15439, sum loss: 6608.455078, avg loss: 3.558672, ppl: 35.116547 +epoch: 0, batch: 15440, sum loss: 6318.246582, avg loss: 3.353634, ppl: 28.606512 +epoch: 0, batch: 15441, sum loss: 5979.292480, avg loss: 3.401190, ppl: 29.999781 +epoch: 0, batch: 15442, sum loss: 5712.628418, avg loss: 3.470613, ppl: 32.156441 +epoch: 0, batch: 15443, sum loss: 5365.663086, avg loss: 3.374631, ppl: 29.213499 +epoch: 0, batch: 15444, sum loss: 6547.650391, avg loss: 3.499546, ppl: 33.100422 +epoch: 0, batch: 15445, sum loss: 6329.368652, avg loss: 3.439874, ppl: 31.183035 +epoch: 0, batch: 15446, sum loss: 6648.753418, avg loss: 3.356261, ppl: 28.681749 +epoch: 0, batch: 15447, sum loss: 6459.104492, avg loss: 3.636883, ppl: 37.973301 +epoch: 0, batch: 15448, sum loss: 6840.380859, avg loss: 3.806556, ppl: 44.995209 +epoch: 0, batch: 15449, sum loss: 3924.215332, avg loss: 3.027944, ppl: 20.654720 +epoch: 0, batch: 15450, sum loss: 4879.152344, avg loss: 2.989677, ppl: 19.879255 +epoch: 0, batch: 15451, sum loss: 5399.276855, avg loss: 3.483405, ppl: 32.570423 +epoch: 0, batch: 15452, sum loss: 5353.009766, avg loss: 3.362443, ppl: 28.859615 +epoch: 0, batch: 15453, sum loss: 5566.191895, avg loss: 3.400239, ppl: 29.971277 +epoch: 0, batch: 15454, sum loss: 5044.938477, avg loss: 3.114160, ppl: 22.514502 +epoch: 0, batch: 15455, sum loss: 6119.529297, avg loss: 3.518993, ppl: 33.750439 +epoch: 0, batch: 15456, sum loss: 5763.890137, avg loss: 3.497506, ppl: 33.032970 +epoch: 0, batch: 15457, sum loss: 5146.588867, avg loss: 3.301212, ppl: 27.145508 +epoch: 0, batch: 15458, sum loss: 5472.606445, avg loss: 3.347160, ppl: 28.421896 +epoch: 0, batch: 15459, sum loss: 6606.931641, avg loss: 3.598547, ppl: 36.545078 +epoch: 0, batch: 15460, sum loss: 6137.046387, avg loss: 3.661722, ppl: 38.928337 +epoch: 0, batch: 15461, sum loss: 5863.854492, avg loss: 3.427151, ppl: 30.788792 +epoch: 0, batch: 15462, sum loss: 6652.750000, avg loss: 3.574826, ppl: 35.688393 +epoch: 0, batch: 15463, sum loss: 5696.028809, avg loss: 3.477429, ppl: 32.376373 +epoch: 0, batch: 15464, sum loss: 5645.496094, avg loss: 3.409116, ppl: 30.238503 +epoch: 0, batch: 15465, sum loss: 6133.511719, avg loss: 3.788457, ppl: 44.188160 +epoch: 0, batch: 15466, sum loss: 6574.919434, avg loss: 3.488021, ppl: 32.721127 +epoch: 0, batch: 15467, sum loss: 4660.595703, avg loss: 3.166166, ppl: 23.716372 +epoch: 0, batch: 15468, sum loss: 6383.969727, avg loss: 3.671058, ppl: 39.293453 +epoch: 0, batch: 15469, sum loss: 6877.018066, avg loss: 3.723345, ppl: 41.402668 +epoch: 0, batch: 15470, sum loss: 7342.559082, avg loss: 3.866540, ppl: 47.776791 +epoch: 0, batch: 15471, sum loss: 5842.926758, avg loss: 3.515600, ppl: 33.636093 +epoch: 0, batch: 15472, sum loss: 6427.386230, avg loss: 3.623104, ppl: 37.453629 +epoch: 0, batch: 15473, sum loss: 5690.629395, avg loss: 3.285583, ppl: 26.724554 +epoch: 0, batch: 15474, sum loss: 5671.713379, avg loss: 3.265235, ppl: 26.186270 +epoch: 0, batch: 15475, sum loss: 5450.583984, avg loss: 3.491726, ppl: 32.842583 +epoch: 0, batch: 15476, sum loss: 6131.736328, avg loss: 3.400852, ppl: 29.989647 +epoch: 0, batch: 15477, sum loss: 4504.826172, avg loss: 3.126181, ppl: 22.786783 +epoch: 0, batch: 15478, sum loss: 6319.523926, avg loss: 3.644477, ppl: 38.262772 +epoch: 0, batch: 15479, sum loss: 4812.265625, avg loss: 3.210317, ppl: 24.786951 +epoch: 0, batch: 15480, sum loss: 5744.688477, avg loss: 3.496463, ppl: 32.998524 +epoch: 0, batch: 15481, sum loss: 5406.488770, avg loss: 3.235481, ppl: 25.418602 +epoch: 0, batch: 15482, sum loss: 5509.777832, avg loss: 3.347374, ppl: 28.427990 +epoch: 0, batch: 15483, sum loss: 6271.423340, avg loss: 3.593939, ppl: 36.377079 +epoch: 0, batch: 15484, sum loss: 6136.252930, avg loss: 3.371568, ppl: 29.124149 +epoch: 0, batch: 15485, sum loss: 5074.178711, avg loss: 3.277893, ppl: 26.519844 +epoch: 0, batch: 15486, sum loss: 4398.489746, avg loss: 3.135061, ppl: 22.990028 +epoch: 0, batch: 15487, sum loss: 5932.312500, avg loss: 3.586646, ppl: 36.112755 +epoch: 0, batch: 15488, sum loss: 7956.644531, avg loss: 3.687046, ppl: 39.926712 +epoch: 0, batch: 15489, sum loss: 6275.784180, avg loss: 3.596438, ppl: 36.468102 +epoch: 0, batch: 15490, sum loss: 5507.247070, avg loss: 3.343805, ppl: 28.326708 +epoch: 0, batch: 15491, sum loss: 5529.816895, avg loss: 3.272081, ppl: 26.366148 +epoch: 0, batch: 15492, sum loss: 6179.964355, avg loss: 3.529392, ppl: 34.103210 +epoch: 0, batch: 15493, sum loss: 5687.132324, avg loss: 3.367160, ppl: 28.996050 +epoch: 0, batch: 15494, sum loss: 6049.651855, avg loss: 3.460899, ppl: 31.845604 +epoch: 0, batch: 15495, sum loss: 5130.091797, avg loss: 3.282208, ppl: 26.634529 +epoch: 0, batch: 15496, sum loss: 6544.917480, avg loss: 3.373669, ppl: 29.185410 +epoch: 0, batch: 15497, sum loss: 6687.458496, avg loss: 3.642406, ppl: 38.183613 +epoch: 0, batch: 15498, sum loss: 5194.479004, avg loss: 3.319156, ppl: 27.637012 +epoch: 0, batch: 15499, sum loss: 5338.002441, avg loss: 3.332086, ppl: 27.996691 +epoch: 0, batch: 15500, sum loss: 6108.328125, avg loss: 3.779906, ppl: 43.811924 +epoch: 0, batch: 15501, sum loss: 5041.959961, avg loss: 3.112321, ppl: 22.473146 +epoch: 0, batch: 15502, sum loss: 6284.125000, avg loss: 3.353322, ppl: 28.597570 +epoch: 0, batch: 15503, sum loss: 5286.126465, avg loss: 3.223248, ppl: 25.109537 +epoch: 0, batch: 15504, sum loss: 6976.202637, avg loss: 3.716677, ppl: 41.127499 +epoch: 0, batch: 15505, sum loss: 5049.745605, avg loss: 3.300488, ppl: 27.125860 +epoch: 0, batch: 15506, sum loss: 4937.081543, avg loss: 3.400194, ppl: 29.969912 +epoch: 0, batch: 15507, sum loss: 6065.868164, avg loss: 3.392544, ppl: 29.741512 +epoch: 0, batch: 15508, sum loss: 5669.172852, avg loss: 3.340703, ppl: 28.238972 +epoch: 0, batch: 15509, sum loss: 5831.278320, avg loss: 3.416097, ppl: 30.450348 +epoch: 0, batch: 15510, sum loss: 5287.005371, avg loss: 3.294084, ppl: 26.952723 +epoch: 0, batch: 15511, sum loss: 6404.670898, avg loss: 3.666097, ppl: 39.098991 +epoch: 0, batch: 15512, sum loss: 5298.209961, avg loss: 3.402833, ppl: 30.049093 +epoch: 0, batch: 15513, sum loss: 5311.233887, avg loss: 3.350936, ppl: 28.529430 +epoch: 0, batch: 15514, sum loss: 5932.341797, avg loss: 3.479379, ppl: 32.439583 +epoch: 0, batch: 15515, sum loss: 6281.715332, avg loss: 3.601901, ppl: 36.667866 +epoch: 0, batch: 15516, sum loss: 5067.224121, avg loss: 3.351339, ppl: 28.540920 +epoch: 0, batch: 15517, sum loss: 5138.055176, avg loss: 3.353822, ppl: 28.611879 +epoch: 0, batch: 15518, sum loss: 6268.741211, avg loss: 3.446257, ppl: 31.382704 +epoch: 0, batch: 15519, sum loss: 6925.055664, avg loss: 3.588112, ppl: 36.165726 +epoch: 0, batch: 15520, sum loss: 6016.225098, avg loss: 3.439809, ppl: 31.180990 +epoch: 0, batch: 15521, sum loss: 5469.863770, avg loss: 3.526669, ppl: 34.010490 +epoch: 0, batch: 15522, sum loss: 5230.793945, avg loss: 3.390016, ppl: 29.666414 +epoch: 0, batch: 15523, sum loss: 5109.681641, avg loss: 3.288084, ppl: 26.791470 +epoch: 0, batch: 15524, sum loss: 5038.817383, avg loss: 3.193167, ppl: 24.365471 +epoch: 0, batch: 15525, sum loss: 4822.724121, avg loss: 2.988057, ppl: 19.847084 +epoch: 0, batch: 15526, sum loss: 5453.090332, avg loss: 3.449140, ppl: 31.473309 +epoch: 0, batch: 15527, sum loss: 5054.889648, avg loss: 3.225839, ppl: 25.174690 +epoch: 0, batch: 15528, sum loss: 5302.795410, avg loss: 3.416750, ppl: 30.470217 +epoch: 0, batch: 15529, sum loss: 6159.409668, avg loss: 3.474005, ppl: 32.265694 +epoch: 0, batch: 15530, sum loss: 5816.361816, avg loss: 3.327438, ppl: 27.866865 +epoch: 0, batch: 15531, sum loss: 5271.905273, avg loss: 3.226380, ppl: 25.188318 +epoch: 0, batch: 15532, sum loss: 5537.540039, avg loss: 3.439466, ppl: 31.170301 +epoch: 0, batch: 15533, sum loss: 5293.984375, avg loss: 3.286148, ppl: 26.739666 +epoch: 0, batch: 15534, sum loss: 6292.728516, avg loss: 3.527314, ppl: 34.032448 +epoch: 0, batch: 15535, sum loss: 5191.357422, avg loss: 3.317161, ppl: 27.581944 +epoch: 0, batch: 15536, sum loss: 5695.963867, avg loss: 3.410757, ppl: 30.288158 +epoch: 0, batch: 15537, sum loss: 6056.935059, avg loss: 3.594620, ppl: 36.401875 +epoch: 0, batch: 15538, sum loss: 5317.806641, avg loss: 3.380678, ppl: 29.390690 +epoch: 0, batch: 15539, sum loss: 4826.219238, avg loss: 3.269796, ppl: 26.305983 +epoch: 0, batch: 15540, sum loss: 6650.965820, avg loss: 3.545291, ppl: 34.649765 +epoch: 0, batch: 15541, sum loss: 6543.801270, avg loss: 3.508741, ppl: 33.406170 +epoch: 0, batch: 15542, sum loss: 6561.816895, avg loss: 3.448143, ppl: 31.441965 +epoch: 0, batch: 15543, sum loss: 5558.142578, avg loss: 3.259908, ppl: 26.047134 +epoch: 0, batch: 15544, sum loss: 6303.502930, avg loss: 3.523479, ppl: 33.902153 +epoch: 0, batch: 15545, sum loss: 5988.814453, avg loss: 3.379692, ppl: 29.361736 +epoch: 0, batch: 15546, sum loss: 6502.591797, avg loss: 3.686277, ppl: 39.896023 +epoch: 0, batch: 15547, sum loss: 6435.360352, avg loss: 3.650233, ppl: 38.483624 +epoch: 0, batch: 15548, sum loss: 5808.071777, avg loss: 3.589661, ppl: 36.221798 +epoch: 0, batch: 15549, sum loss: 5807.938965, avg loss: 3.418446, ppl: 30.521942 +epoch: 0, batch: 15550, sum loss: 5595.156250, avg loss: 3.324513, ppl: 27.785477 +epoch: 0, batch: 15551, sum loss: 5482.496582, avg loss: 3.514421, ppl: 33.596470 +epoch: 0, batch: 15552, sum loss: 6230.426758, avg loss: 3.455589, ppl: 31.676935 +epoch: 0, batch: 15553, sum loss: 5811.557617, avg loss: 3.332315, ppl: 28.003094 +epoch: 0, batch: 15554, sum loss: 5825.363281, avg loss: 3.528385, ppl: 34.068901 +epoch: 0, batch: 15555, sum loss: 6304.100098, avg loss: 3.671578, ppl: 39.313911 +epoch: 0, batch: 15556, sum loss: 5963.015137, avg loss: 3.432939, ppl: 30.967524 +epoch: 0, batch: 15557, sum loss: 6472.446777, avg loss: 3.439132, ppl: 31.159906 +epoch: 0, batch: 15558, sum loss: 5196.592285, avg loss: 3.295239, ppl: 26.983868 +epoch: 0, batch: 15559, sum loss: 6565.977539, avg loss: 3.747704, ppl: 42.423576 +epoch: 0, batch: 15560, sum loss: 5992.939941, avg loss: 3.603692, ppl: 36.733608 +epoch: 0, batch: 15561, sum loss: 6403.926270, avg loss: 3.474729, ppl: 32.289085 +epoch: 0, batch: 15562, sum loss: 5834.625488, avg loss: 3.370667, ppl: 29.097942 +epoch: 0, batch: 15563, sum loss: 4841.881348, avg loss: 3.007380, ppl: 20.234312 +epoch: 0, batch: 15564, sum loss: 4615.251465, avg loss: 3.315554, ppl: 27.537649 +epoch: 0, batch: 15565, sum loss: 5132.855469, avg loss: 3.279780, ppl: 26.569923 +epoch: 0, batch: 15566, sum loss: 4679.499512, avg loss: 3.220578, ppl: 25.042589 +epoch: 0, batch: 15567, sum loss: 5654.173828, avg loss: 3.363577, ppl: 28.892365 +epoch: 0, batch: 15568, sum loss: 4714.314453, avg loss: 3.211386, ppl: 24.813448 +epoch: 0, batch: 15569, sum loss: 6606.152832, avg loss: 3.605979, ppl: 36.817699 +epoch: 0, batch: 15570, sum loss: 5785.909180, avg loss: 3.634365, ppl: 37.877796 +epoch: 0, batch: 15571, sum loss: 5762.610840, avg loss: 3.358165, ppl: 28.736404 +epoch: 0, batch: 15572, sum loss: 6525.754395, avg loss: 3.664096, ppl: 39.020840 +epoch: 0, batch: 15573, sum loss: 6985.589844, avg loss: 3.576851, ppl: 35.760750 +epoch: 0, batch: 15574, sum loss: 5715.229492, avg loss: 3.428452, ppl: 30.828876 +epoch: 0, batch: 15575, sum loss: 6042.856445, avg loss: 3.351557, ppl: 28.547134 +epoch: 0, batch: 15576, sum loss: 5963.533203, avg loss: 3.437195, ppl: 31.099604 +epoch: 0, batch: 15577, sum loss: 5974.642578, avg loss: 3.449563, ppl: 31.486616 +epoch: 0, batch: 15578, sum loss: 5504.880371, avg loss: 3.375157, ppl: 29.228863 +epoch: 0, batch: 15579, sum loss: 6380.224609, avg loss: 3.542601, ppl: 34.556686 +epoch: 0, batch: 15580, sum loss: 6567.356445, avg loss: 3.586760, ppl: 36.116852 +epoch: 0, batch: 15581, sum loss: 5831.505859, avg loss: 3.250561, ppl: 25.804806 +epoch: 0, batch: 15582, sum loss: 4406.095215, avg loss: 3.144964, ppl: 23.218851 +epoch: 0, batch: 15583, sum loss: 4461.490723, avg loss: 3.157460, ppl: 23.510796 +epoch: 0, batch: 15584, sum loss: 4875.879883, avg loss: 3.500273, ppl: 33.124485 +epoch: 0, batch: 15585, sum loss: 5669.566406, avg loss: 3.368726, ppl: 29.041512 +epoch: 0, batch: 15586, sum loss: 6681.435059, avg loss: 3.435185, ppl: 31.037161 +epoch: 0, batch: 15587, sum loss: 7442.223633, avg loss: 3.854078, ppl: 47.185074 +epoch: 0, batch: 15588, sum loss: 6708.483887, avg loss: 3.549462, ppl: 34.794605 +epoch: 0, batch: 15589, sum loss: 5909.402344, avg loss: 3.519596, ppl: 33.770794 +epoch: 0, batch: 15590, sum loss: 6924.252930, avg loss: 3.324173, ppl: 27.776031 +epoch: 0, batch: 15591, sum loss: 4961.040039, avg loss: 3.049195, ppl: 21.098349 +epoch: 0, batch: 15592, sum loss: 5499.229492, avg loss: 3.318787, ppl: 27.626808 +epoch: 0, batch: 15593, sum loss: 5233.018555, avg loss: 3.200623, ppl: 24.547813 +epoch: 0, batch: 15594, sum loss: 5270.535645, avg loss: 3.458357, ppl: 31.764732 +epoch: 0, batch: 15595, sum loss: 6194.557617, avg loss: 3.428089, ppl: 30.817698 +epoch: 0, batch: 15596, sum loss: 6528.015625, avg loss: 3.524846, ppl: 33.948555 +epoch: 0, batch: 15597, sum loss: 6569.779785, avg loss: 3.619714, ppl: 37.326874 +epoch: 0, batch: 15598, sum loss: 5557.520508, avg loss: 3.193977, ppl: 24.385223 +epoch: 0, batch: 15599, sum loss: 6634.100098, avg loss: 3.377851, ppl: 29.307735 +epoch: 0, batch: 15600, sum loss: 5877.494141, avg loss: 3.370123, ppl: 29.082094 +epoch: 0, batch: 15601, sum loss: 6091.504883, avg loss: 3.410697, ppl: 30.286354 +epoch: 0, batch: 15602, sum loss: 6670.384277, avg loss: 3.433034, ppl: 30.970455 +epoch: 0, batch: 15603, sum loss: 4416.769531, avg loss: 3.230995, ppl: 25.304815 +epoch: 0, batch: 15604, sum loss: 6662.993652, avg loss: 3.559292, ppl: 35.138298 +epoch: 0, batch: 15605, sum loss: 5060.825195, avg loss: 3.378388, ppl: 29.323462 +epoch: 0, batch: 15606, sum loss: 5893.555664, avg loss: 3.563214, ppl: 35.276386 +epoch: 0, batch: 15607, sum loss: 5954.028320, avg loss: 3.546175, ppl: 34.680420 +epoch: 0, batch: 15608, sum loss: 7136.368164, avg loss: 3.720734, ppl: 41.294697 +epoch: 0, batch: 15609, sum loss: 6119.686035, avg loss: 3.469210, ppl: 32.111362 +epoch: 0, batch: 15610, sum loss: 5845.386230, avg loss: 3.406402, ppl: 30.156555 +epoch: 0, batch: 15611, sum loss: 5882.922363, avg loss: 3.751864, ppl: 42.600403 +epoch: 0, batch: 15612, sum loss: 4995.466309, avg loss: 3.314842, ppl: 27.518040 +epoch: 0, batch: 15613, sum loss: 6627.395508, avg loss: 3.729542, ppl: 41.660004 +epoch: 0, batch: 15614, sum loss: 5899.733398, avg loss: 3.424105, ppl: 30.695173 +epoch: 0, batch: 15615, sum loss: 4570.287109, avg loss: 3.134628, ppl: 22.980076 +epoch: 0, batch: 15616, sum loss: 5602.469727, avg loss: 3.313111, ppl: 27.470442 +epoch: 0, batch: 15617, sum loss: 5367.343750, avg loss: 3.210134, ppl: 24.782408 +epoch: 0, batch: 15618, sum loss: 6545.199219, avg loss: 3.638243, ppl: 38.024967 +epoch: 0, batch: 15619, sum loss: 6352.548340, avg loss: 3.384416, ppl: 29.500755 +epoch: 0, batch: 15620, sum loss: 6615.712402, avg loss: 3.627035, ppl: 37.601177 +epoch: 0, batch: 15621, sum loss: 4953.105957, avg loss: 3.376350, ppl: 29.263767 +epoch: 0, batch: 15622, sum loss: 6984.152832, avg loss: 3.406904, ppl: 30.171680 +epoch: 0, batch: 15623, sum loss: 5496.209961, avg loss: 3.347266, ppl: 28.424906 +epoch: 0, batch: 15624, sum loss: 5005.733887, avg loss: 3.254704, ppl: 25.911932 +epoch: 0, batch: 15625, sum loss: 5987.635742, avg loss: 3.491333, ppl: 32.829666 +epoch: 0, batch: 15626, sum loss: 6206.607910, avg loss: 3.701018, ppl: 40.488514 +epoch: 0, batch: 15627, sum loss: 5288.968750, avg loss: 3.289160, ppl: 26.820312 +epoch: 0, batch: 15628, sum loss: 5345.902832, avg loss: 3.251766, ppl: 25.835918 +epoch: 0, batch: 15629, sum loss: 5569.364258, avg loss: 3.309188, ppl: 27.362907 +epoch: 0, batch: 15630, sum loss: 5088.176758, avg loss: 3.208182, ppl: 24.734081 +epoch: 0, batch: 15631, sum loss: 6346.769531, avg loss: 3.510381, ppl: 33.461029 +epoch: 0, batch: 15632, sum loss: 6521.046387, avg loss: 3.713580, ppl: 41.000332 +epoch: 0, batch: 15633, sum loss: 6236.116699, avg loss: 3.679125, ppl: 39.611713 +epoch: 0, batch: 15634, sum loss: 5252.758789, avg loss: 3.504175, ppl: 33.254013 +epoch: 0, batch: 15635, sum loss: 7015.417969, avg loss: 3.570187, ppl: 35.523247 +epoch: 0, batch: 15636, sum loss: 5741.403320, avg loss: 3.417502, ppl: 30.493153 +epoch: 0, batch: 15637, sum loss: 5189.265137, avg loss: 3.396116, ppl: 29.847946 +epoch: 0, batch: 15638, sum loss: 6562.582031, avg loss: 3.511280, ppl: 33.491104 +epoch: 0, batch: 15639, sum loss: 6030.384766, avg loss: 3.426355, ppl: 30.764299 +epoch: 0, batch: 15640, sum loss: 5452.660645, avg loss: 3.603874, ppl: 36.740280 +epoch: 0, batch: 15641, sum loss: 6249.020020, avg loss: 3.352479, ppl: 28.573465 +epoch: 0, batch: 15642, sum loss: 4482.516113, avg loss: 3.063921, ppl: 21.411346 +epoch: 0, batch: 15643, sum loss: 5498.164062, avg loss: 3.340318, ppl: 28.228115 +epoch: 0, batch: 15644, sum loss: 7017.836914, avg loss: 3.787284, ppl: 44.136349 +epoch: 0, batch: 15645, sum loss: 5833.005859, avg loss: 3.340782, ppl: 28.241215 +epoch: 0, batch: 15646, sum loss: 6351.345703, avg loss: 3.420218, ppl: 30.576086 +epoch: 0, batch: 15647, sum loss: 5681.910156, avg loss: 3.388139, ppl: 29.610809 +epoch: 0, batch: 15648, sum loss: 4895.317871, avg loss: 3.214260, ppl: 24.884872 +epoch: 0, batch: 15649, sum loss: 6023.232422, avg loss: 3.547251, ppl: 34.717754 +epoch: 0, batch: 15650, sum loss: 5267.695312, avg loss: 3.210052, ppl: 24.780369 +epoch: 0, batch: 15651, sum loss: 5888.728516, avg loss: 3.188267, ppl: 24.246372 +epoch: 0, batch: 15652, sum loss: 5589.329590, avg loss: 3.412289, ppl: 30.334599 +epoch: 0, batch: 15653, sum loss: 5769.661133, avg loss: 3.331213, ppl: 27.972252 +epoch: 0, batch: 15654, sum loss: 5967.336914, avg loss: 3.371377, ppl: 29.118589 +epoch: 0, batch: 15655, sum loss: 5024.887695, avg loss: 3.314570, ppl: 27.510561 +epoch: 0, batch: 15656, sum loss: 6528.897461, avg loss: 3.649467, ppl: 38.454182 +epoch: 0, batch: 15657, sum loss: 5026.890137, avg loss: 3.262096, ppl: 26.104198 +epoch: 0, batch: 15658, sum loss: 6653.726562, avg loss: 3.700626, ppl: 40.472649 +epoch: 0, batch: 15659, sum loss: 7624.931152, avg loss: 3.703220, ppl: 40.577770 +epoch: 0, batch: 15660, sum loss: 5662.765137, avg loss: 3.217480, ppl: 24.965134 +epoch: 0, batch: 15661, sum loss: 6076.258301, avg loss: 3.557528, ppl: 35.076382 +epoch: 0, batch: 15662, sum loss: 5255.397461, avg loss: 3.162092, ppl: 23.619968 +epoch: 0, batch: 15663, sum loss: 5326.060059, avg loss: 3.398890, ppl: 29.930851 +epoch: 0, batch: 15664, sum loss: 4610.221680, avg loss: 3.210461, ppl: 24.790504 +epoch: 0, batch: 15665, sum loss: 5616.958984, avg loss: 3.555037, ppl: 34.989124 +epoch: 0, batch: 15666, sum loss: 5913.288086, avg loss: 3.222501, ppl: 25.090782 +epoch: 0, batch: 15667, sum loss: 5949.633301, avg loss: 3.231740, ppl: 25.323681 +epoch: 0, batch: 15668, sum loss: 5075.610840, avg loss: 3.295851, ppl: 27.000387 +epoch: 0, batch: 15669, sum loss: 5739.376953, avg loss: 3.476304, ppl: 32.339958 +epoch: 0, batch: 15670, sum loss: 5589.485352, avg loss: 3.353020, ppl: 28.588955 +epoch: 0, batch: 15671, sum loss: 5723.075684, avg loss: 3.487554, ppl: 32.705837 +epoch: 0, batch: 15672, sum loss: 6516.753906, avg loss: 3.564964, ppl: 35.338173 +epoch: 0, batch: 15673, sum loss: 5043.936035, avg loss: 3.488199, ppl: 32.726959 +epoch: 0, batch: 15674, sum loss: 4793.666992, avg loss: 3.204323, ppl: 24.638809 +epoch: 0, batch: 15675, sum loss: 5220.223633, avg loss: 3.367886, ppl: 29.017128 +epoch: 0, batch: 15676, sum loss: 5332.643555, avg loss: 3.351756, ppl: 28.552824 +epoch: 0, batch: 15677, sum loss: 6728.520508, avg loss: 3.303152, ppl: 27.198229 +epoch: 0, batch: 15678, sum loss: 6304.154785, avg loss: 3.450550, ppl: 31.517725 +epoch: 0, batch: 15679, sum loss: 5536.990234, avg loss: 3.351689, ppl: 28.550919 +epoch: 0, batch: 15680, sum loss: 5628.360840, avg loss: 3.463607, ppl: 31.931934 +epoch: 0, batch: 15681, sum loss: 4519.838867, avg loss: 3.210113, ppl: 24.781881 +epoch: 0, batch: 15682, sum loss: 4742.425781, avg loss: 3.210850, ppl: 24.800169 +epoch: 0, batch: 15683, sum loss: 5388.508789, avg loss: 3.285676, ppl: 26.727045 +epoch: 0, batch: 15684, sum loss: 5637.372070, avg loss: 3.306377, ppl: 27.286081 +epoch: 0, batch: 15685, sum loss: 5184.109863, avg loss: 3.366305, ppl: 28.971283 +epoch: 0, batch: 15686, sum loss: 6394.767578, avg loss: 3.542808, ppl: 34.563824 +epoch: 0, batch: 15687, sum loss: 5109.242188, avg loss: 3.359134, ppl: 28.764261 +epoch: 0, batch: 15688, sum loss: 6744.218262, avg loss: 3.337070, ppl: 28.136566 +epoch: 0, batch: 15689, sum loss: 5704.739746, avg loss: 3.422159, ppl: 30.635492 +epoch: 0, batch: 15690, sum loss: 5163.616211, avg loss: 3.217206, ppl: 24.958296 +epoch: 0, batch: 15691, sum loss: 6144.561035, avg loss: 3.620837, ppl: 37.368851 +epoch: 0, batch: 15692, sum loss: 5388.547363, avg loss: 3.367842, ppl: 29.015842 +epoch: 0, batch: 15693, sum loss: 6473.165039, avg loss: 3.586241, ppl: 36.098129 +epoch: 0, batch: 15694, sum loss: 5250.601562, avg loss: 3.458894, ppl: 31.781815 +epoch: 0, batch: 15695, sum loss: 5975.623535, avg loss: 3.525442, ppl: 33.968773 +epoch: 0, batch: 15696, sum loss: 5949.673340, avg loss: 3.321984, ppl: 27.715279 +epoch: 0, batch: 15697, sum loss: 7081.347168, avg loss: 3.447589, ppl: 31.424526 +epoch: 0, batch: 15698, sum loss: 5433.374512, avg loss: 3.498631, ppl: 33.070164 +epoch: 0, batch: 15699, sum loss: 5616.889648, avg loss: 3.503986, ppl: 33.247711 +epoch: 0, batch: 15700, sum loss: 6144.109863, avg loss: 3.655033, ppl: 38.668781 +epoch: 0, batch: 15701, sum loss: 5857.160645, avg loss: 3.425240, ppl: 30.730021 +epoch: 0, batch: 15702, sum loss: 5623.070801, avg loss: 3.286424, ppl: 26.747044 +epoch: 0, batch: 15703, sum loss: 4075.710938, avg loss: 2.911222, ppl: 18.379244 +epoch: 0, batch: 15704, sum loss: 6415.205078, avg loss: 3.706069, ppl: 40.693516 +epoch: 0, batch: 15705, sum loss: 6598.400879, avg loss: 3.657650, ppl: 38.770134 +epoch: 0, batch: 15706, sum loss: 5622.456543, avg loss: 3.492209, ppl: 32.858452 +epoch: 0, batch: 15707, sum loss: 6025.689941, avg loss: 3.485072, ppl: 32.624786 +epoch: 0, batch: 15708, sum loss: 5699.691406, avg loss: 3.503191, ppl: 33.221283 +epoch: 0, batch: 15709, sum loss: 6252.642578, avg loss: 3.522615, ppl: 33.872906 +epoch: 0, batch: 15710, sum loss: 5822.572266, avg loss: 3.587537, ppl: 36.144936 +epoch: 0, batch: 15711, sum loss: 6163.968262, avg loss: 3.426330, ppl: 30.763544 +epoch: 0, batch: 15712, sum loss: 5529.009277, avg loss: 3.235231, ppl: 25.412233 +epoch: 0, batch: 15713, sum loss: 5114.051758, avg loss: 3.086332, ppl: 21.896605 +epoch: 0, batch: 15714, sum loss: 5485.537109, avg loss: 3.476259, ppl: 32.338524 +epoch: 0, batch: 15715, sum loss: 6089.203125, avg loss: 3.487516, ppl: 32.704613 +epoch: 0, batch: 15716, sum loss: 5738.343750, avg loss: 3.369550, ppl: 29.065445 +epoch: 0, batch: 15717, sum loss: 5305.134766, avg loss: 3.374768, ppl: 29.217499 +epoch: 0, batch: 15718, sum loss: 7202.994141, avg loss: 3.612334, ppl: 37.052433 +epoch: 0, batch: 15719, sum loss: 5488.344727, avg loss: 3.522686, ppl: 33.875294 +epoch: 0, batch: 15720, sum loss: 4927.165039, avg loss: 3.340451, ppl: 28.231857 +epoch: 0, batch: 15721, sum loss: 3975.556152, avg loss: 2.966833, ppl: 19.430285 +epoch: 0, batch: 15722, sum loss: 6294.743164, avg loss: 3.740192, ppl: 42.106071 +epoch: 0, batch: 15723, sum loss: 6065.255859, avg loss: 3.551086, ppl: 34.851162 +epoch: 0, batch: 15724, sum loss: 6024.546875, avg loss: 3.369433, ppl: 29.062056 +epoch: 0, batch: 15725, sum loss: 5613.717285, avg loss: 3.600845, ppl: 36.629177 +epoch: 0, batch: 15726, sum loss: 6408.269043, avg loss: 3.632806, ppl: 37.818771 +epoch: 0, batch: 15727, sum loss: 4843.138672, avg loss: 3.358626, ppl: 28.749666 +epoch: 0, batch: 15728, sum loss: 5588.687012, avg loss: 3.469079, ppl: 32.107174 +epoch: 0, batch: 15729, sum loss: 5240.704102, avg loss: 3.344419, ppl: 28.344103 +epoch: 0, batch: 15730, sum loss: 5076.877441, avg loss: 3.380078, ppl: 29.373064 +epoch: 0, batch: 15731, sum loss: 5617.735352, avg loss: 3.310392, ppl: 27.395866 +epoch: 0, batch: 15732, sum loss: 5779.115234, avg loss: 3.571765, ppl: 35.579327 +epoch: 0, batch: 15733, sum loss: 5355.113770, avg loss: 3.326158, ppl: 27.831196 +epoch: 0, batch: 15734, sum loss: 6626.648926, avg loss: 3.613222, ppl: 37.085335 +epoch: 0, batch: 15735, sum loss: 5499.954590, avg loss: 3.592394, ppl: 36.320919 +epoch: 0, batch: 15736, sum loss: 6315.614746, avg loss: 3.580281, ppl: 35.883606 +epoch: 0, batch: 15737, sum loss: 5585.922363, avg loss: 3.387460, ppl: 29.590710 +epoch: 0, batch: 15738, sum loss: 6187.324219, avg loss: 3.330099, ppl: 27.941111 +epoch: 0, batch: 15739, sum loss: 5995.339355, avg loss: 3.345614, ppl: 28.377987 +epoch: 0, batch: 15740, sum loss: 5260.291992, avg loss: 3.556654, ppl: 35.045753 +epoch: 0, batch: 15741, sum loss: 4934.009766, avg loss: 3.216434, ppl: 24.939030 +epoch: 0, batch: 15742, sum loss: 5915.838379, avg loss: 3.279289, ppl: 26.556896 +epoch: 0, batch: 15743, sum loss: 5557.997070, avg loss: 3.356278, ppl: 28.682249 +epoch: 0, batch: 15744, sum loss: 5579.759766, avg loss: 3.520353, ppl: 33.796360 +epoch: 0, batch: 15745, sum loss: 6153.558105, avg loss: 3.506301, ppl: 33.324768 +epoch: 0, batch: 15746, sum loss: 6636.144531, avg loss: 3.709416, ppl: 40.829941 +epoch: 0, batch: 15747, sum loss: 5128.899414, avg loss: 3.321826, ppl: 27.710911 +epoch: 0, batch: 15748, sum loss: 5645.094727, avg loss: 3.388412, ppl: 29.618879 +epoch: 0, batch: 15749, sum loss: 6289.101562, avg loss: 3.414279, ppl: 30.395027 +epoch: 0, batch: 15750, sum loss: 4742.614258, avg loss: 3.252822, ppl: 25.863220 +epoch: 0, batch: 15751, sum loss: 6190.906738, avg loss: 3.458607, ppl: 31.772684 +epoch: 0, batch: 15752, sum loss: 5828.268555, avg loss: 3.448680, ppl: 31.458830 +epoch: 0, batch: 15753, sum loss: 5167.963867, avg loss: 3.266728, ppl: 26.225388 +epoch: 0, batch: 15754, sum loss: 5109.630859, avg loss: 3.056000, ppl: 21.242407 +epoch: 0, batch: 15755, sum loss: 6050.658203, avg loss: 3.443744, ppl: 31.303940 +epoch: 0, batch: 15756, sum loss: 4842.516602, avg loss: 3.319066, ppl: 27.634516 +epoch: 0, batch: 15757, sum loss: 5249.287598, avg loss: 3.162221, ppl: 23.623014 +epoch: 0, batch: 15758, sum loss: 5803.389648, avg loss: 3.462643, ppl: 31.901184 +epoch: 0, batch: 15759, sum loss: 6740.987305, avg loss: 3.755425, ppl: 42.752373 +epoch: 0, batch: 15760, sum loss: 5318.171387, avg loss: 3.248730, ppl: 25.757618 +epoch: 0, batch: 15761, sum loss: 5201.906250, avg loss: 3.243084, ppl: 25.612581 +epoch: 0, batch: 15762, sum loss: 5308.412109, avg loss: 3.344935, ppl: 28.358744 +epoch: 0, batch: 15763, sum loss: 6376.279785, avg loss: 3.592270, ppl: 36.316425 +epoch: 0, batch: 15764, sum loss: 5857.361328, avg loss: 3.285116, ppl: 26.712082 +epoch: 0, batch: 15765, sum loss: 5331.427734, avg loss: 3.446301, ppl: 31.384094 +epoch: 0, batch: 15766, sum loss: 6287.814941, avg loss: 3.473931, ppl: 32.263325 +epoch: 0, batch: 15767, sum loss: 6733.565430, avg loss: 3.439002, ppl: 31.155844 +epoch: 0, batch: 15768, sum loss: 7018.022949, avg loss: 3.697589, ppl: 40.349892 +epoch: 0, batch: 15769, sum loss: 5641.746094, avg loss: 3.557217, ppl: 35.065472 +epoch: 0, batch: 15770, sum loss: 6025.806152, avg loss: 3.343955, ppl: 28.330948 +epoch: 0, batch: 15771, sum loss: 5951.974609, avg loss: 3.332573, ppl: 28.010311 +epoch: 0, batch: 15772, sum loss: 5552.368652, avg loss: 3.287370, ppl: 26.772371 +epoch: 0, batch: 15773, sum loss: 4269.465820, avg loss: 3.214959, ppl: 24.902281 +epoch: 0, batch: 15774, sum loss: 5920.953125, avg loss: 3.530682, ppl: 34.147236 +epoch: 0, batch: 15775, sum loss: 6113.035156, avg loss: 3.405591, ppl: 30.132092 +epoch: 0, batch: 15776, sum loss: 5580.898438, avg loss: 3.485883, ppl: 32.651245 +epoch: 0, batch: 15777, sum loss: 5625.634277, avg loss: 3.284083, ppl: 26.684500 +epoch: 0, batch: 15778, sum loss: 6057.602051, avg loss: 3.567492, ppl: 35.427647 +epoch: 0, batch: 15779, sum loss: 6364.868652, avg loss: 3.614349, ppl: 37.127163 +epoch: 0, batch: 15780, sum loss: 6254.864258, avg loss: 3.666392, ppl: 39.110523 +epoch: 0, batch: 15781, sum loss: 5065.739258, avg loss: 3.020715, ppl: 20.505949 +epoch: 0, batch: 15782, sum loss: 6097.424805, avg loss: 3.408287, ppl: 30.213430 +epoch: 0, batch: 15783, sum loss: 5296.950195, avg loss: 3.139864, ppl: 23.100723 +epoch: 0, batch: 15784, sum loss: 5941.040039, avg loss: 3.257149, ppl: 25.975382 +epoch: 0, batch: 15785, sum loss: 6293.586914, avg loss: 3.396431, ppl: 29.857355 +epoch: 0, batch: 15786, sum loss: 5440.084473, avg loss: 3.213281, ppl: 24.860518 +epoch: 0, batch: 15787, sum loss: 5586.472656, avg loss: 3.469859, ppl: 32.132198 +epoch: 0, batch: 15788, sum loss: 5773.602051, avg loss: 3.239956, ppl: 25.532608 +epoch: 0, batch: 15789, sum loss: 5656.501465, avg loss: 3.385100, ppl: 29.520941 +epoch: 0, batch: 15790, sum loss: 5020.642090, avg loss: 3.424722, ppl: 30.714096 +epoch: 0, batch: 15791, sum loss: 5905.772461, avg loss: 3.601081, ppl: 36.637806 +epoch: 0, batch: 15792, sum loss: 5437.307617, avg loss: 3.567787, ppl: 35.438087 +epoch: 0, batch: 15793, sum loss: 6027.066406, avg loss: 3.430317, ppl: 30.886423 +epoch: 0, batch: 15794, sum loss: 5141.319824, avg loss: 3.001354, ppl: 20.112755 +epoch: 0, batch: 15795, sum loss: 5653.020020, avg loss: 3.195602, ppl: 24.424877 +epoch: 0, batch: 15796, sum loss: 5978.611816, avg loss: 3.683679, ppl: 39.792534 +epoch: 0, batch: 15797, sum loss: 5407.124512, avg loss: 3.424398, ppl: 30.704153 +epoch: 0, batch: 15798, sum loss: 4346.387695, avg loss: 2.997509, ppl: 20.035561 +epoch: 0, batch: 15799, sum loss: 5643.969238, avg loss: 3.339627, ppl: 28.208597 +epoch: 0, batch: 15800, sum loss: 5947.399414, avg loss: 3.664448, ppl: 39.034584 +epoch: 0, batch: 15801, sum loss: 5743.364258, avg loss: 3.466122, ppl: 32.012356 +epoch: 0, batch: 15802, sum loss: 6523.931641, avg loss: 3.537924, ppl: 34.395435 +epoch: 0, batch: 15803, sum loss: 5575.950195, avg loss: 3.210104, ppl: 24.781656 +epoch: 0, batch: 15804, sum loss: 6067.010742, avg loss: 3.533495, ppl: 34.243439 +epoch: 0, batch: 15805, sum loss: 5888.562988, avg loss: 3.538800, ppl: 34.425583 +epoch: 0, batch: 15806, sum loss: 5650.084961, avg loss: 3.315778, ppl: 27.543802 +epoch: 0, batch: 15807, sum loss: 5437.249512, avg loss: 3.389807, ppl: 29.660213 +epoch: 0, batch: 15808, sum loss: 6679.622559, avg loss: 3.729549, ppl: 41.660313 +epoch: 0, batch: 15809, sum loss: 5872.435059, avg loss: 3.510123, ppl: 33.452374 +epoch: 0, batch: 15810, sum loss: 5909.659180, avg loss: 3.338791, ppl: 28.185020 +epoch: 0, batch: 15811, sum loss: 7097.384766, avg loss: 3.492807, ppl: 32.878120 +epoch: 0, batch: 15812, sum loss: 5992.335938, avg loss: 3.537388, ppl: 34.377022 +epoch: 0, batch: 15813, sum loss: 5480.364258, avg loss: 3.612633, ppl: 37.063511 +epoch: 0, batch: 15814, sum loss: 7077.789062, avg loss: 3.684430, ppl: 39.822403 +epoch: 0, batch: 15815, sum loss: 5689.154297, avg loss: 3.199749, ppl: 24.526384 +epoch: 0, batch: 15816, sum loss: 5745.516602, avg loss: 3.271935, ppl: 26.362288 +epoch: 0, batch: 15817, sum loss: 7158.347168, avg loss: 3.730248, ppl: 41.689465 +epoch: 0, batch: 15818, sum loss: 5895.372559, avg loss: 3.275207, ppl: 26.448702 +epoch: 0, batch: 15819, sum loss: 5361.158203, avg loss: 3.352819, ppl: 28.583202 +epoch: 0, batch: 15820, sum loss: 5943.514160, avg loss: 3.274663, ppl: 26.434328 +epoch: 0, batch: 15821, sum loss: 5484.135742, avg loss: 3.484203, ppl: 32.596439 +epoch: 0, batch: 15822, sum loss: 5866.097656, avg loss: 3.367450, ppl: 29.004465 +epoch: 0, batch: 15823, sum loss: 5823.688477, avg loss: 3.171943, ppl: 23.853798 +epoch: 0, batch: 15824, sum loss: 6141.299316, avg loss: 3.423244, ppl: 30.668736 +epoch: 0, batch: 15825, sum loss: 5144.470703, avg loss: 3.380073, ppl: 29.372910 +epoch: 0, batch: 15826, sum loss: 6429.028320, avg loss: 3.450901, ppl: 31.528780 +epoch: 0, batch: 15827, sum loss: 5871.349121, avg loss: 3.206635, ppl: 24.695856 +epoch: 0, batch: 15828, sum loss: 6143.875000, avg loss: 3.333627, ppl: 28.039864 +epoch: 0, batch: 15829, sum loss: 6049.736816, avg loss: 3.385415, ppl: 29.530247 +epoch: 0, batch: 15830, sum loss: 5885.259277, avg loss: 3.433640, ppl: 30.989246 +epoch: 0, batch: 15831, sum loss: 6606.866211, avg loss: 3.626162, ppl: 37.568336 +epoch: 0, batch: 15832, sum loss: 6090.878418, avg loss: 3.557756, ppl: 35.084377 +epoch: 0, batch: 15833, sum loss: 5243.675781, avg loss: 3.203223, ppl: 24.611727 +epoch: 0, batch: 15834, sum loss: 4771.278320, avg loss: 3.467499, ppl: 32.056461 +epoch: 0, batch: 15835, sum loss: 5925.737305, avg loss: 3.391950, ppl: 29.723867 +epoch: 0, batch: 15836, sum loss: 5162.293945, avg loss: 3.159299, ppl: 23.554071 +epoch: 0, batch: 15837, sum loss: 6110.831543, avg loss: 3.505927, ppl: 33.312321 +epoch: 0, batch: 15838, sum loss: 5441.896484, avg loss: 3.118565, ppl: 22.613913 +epoch: 0, batch: 15839, sum loss: 6692.630859, avg loss: 3.605943, ppl: 36.816402 +epoch: 0, batch: 15840, sum loss: 5476.334473, avg loss: 3.271407, ppl: 26.348375 +epoch: 0, batch: 15841, sum loss: 5839.367676, avg loss: 3.367571, ppl: 29.007971 +epoch: 0, batch: 15842, sum loss: 6678.430664, avg loss: 3.762496, ppl: 43.055759 +epoch: 0, batch: 15843, sum loss: 4731.161133, avg loss: 3.086211, ppl: 21.893963 +epoch: 0, batch: 15844, sum loss: 5200.826660, avg loss: 3.355372, ppl: 28.656267 +epoch: 0, batch: 15845, sum loss: 6070.261719, avg loss: 3.251345, ppl: 25.825056 +epoch: 0, batch: 15846, sum loss: 6066.261719, avg loss: 3.562103, ppl: 35.237232 +epoch: 0, batch: 15847, sum loss: 6058.021484, avg loss: 3.234395, ppl: 25.391001 +epoch: 0, batch: 15848, sum loss: 4830.793457, avg loss: 3.317853, ppl: 27.601021 +epoch: 0, batch: 15849, sum loss: 5360.108398, avg loss: 3.323068, ppl: 27.745340 +epoch: 0, batch: 15850, sum loss: 5551.929199, avg loss: 3.406091, ppl: 30.147182 +epoch: 0, batch: 15851, sum loss: 5710.917969, avg loss: 3.463261, ppl: 31.920912 +epoch: 0, batch: 15852, sum loss: 5566.373535, avg loss: 3.204591, ppl: 24.645407 +epoch: 0, batch: 15853, sum loss: 7173.878418, avg loss: 3.608591, ppl: 36.913990 +epoch: 0, batch: 15854, sum loss: 5587.235840, avg loss: 3.474649, ppl: 32.286499 +epoch: 0, batch: 15855, sum loss: 4944.937500, avg loss: 3.213085, ppl: 24.855646 +epoch: 0, batch: 15856, sum loss: 5590.732422, avg loss: 3.224182, ppl: 25.133017 +epoch: 0, batch: 15857, sum loss: 6075.752441, avg loss: 3.503894, ppl: 33.244659 +epoch: 0, batch: 15858, sum loss: 4904.306641, avg loss: 3.241445, ppl: 25.570652 +epoch: 0, batch: 15859, sum loss: 7401.480469, avg loss: 3.476506, ppl: 32.346497 +epoch: 0, batch: 15860, sum loss: 5957.847656, avg loss: 3.559049, ppl: 35.129768 +epoch: 0, batch: 15861, sum loss: 6783.004883, avg loss: 3.696461, ppl: 40.304401 +epoch: 0, batch: 15862, sum loss: 6208.706543, avg loss: 3.572328, ppl: 35.599384 +epoch: 0, batch: 15863, sum loss: 5591.756836, avg loss: 3.407530, ppl: 30.190577 +epoch: 0, batch: 15864, sum loss: 5303.225098, avg loss: 3.459377, ppl: 31.797171 +epoch: 0, batch: 15865, sum loss: 4292.105957, avg loss: 2.943831, ppl: 18.988457 +epoch: 0, batch: 15866, sum loss: 5154.395020, avg loss: 3.142924, ppl: 23.171518 +epoch: 0, batch: 15867, sum loss: 5699.745117, avg loss: 3.339042, ppl: 28.192110 +epoch: 0, batch: 15868, sum loss: 5331.916016, avg loss: 3.206203, ppl: 24.685184 +epoch: 0, batch: 15869, sum loss: 6799.253418, avg loss: 3.543123, ppl: 34.574726 +epoch: 0, batch: 15870, sum loss: 5449.929688, avg loss: 3.236300, ppl: 25.439434 +epoch: 0, batch: 15871, sum loss: 5992.813477, avg loss: 3.374332, ppl: 29.204767 +epoch: 0, batch: 15872, sum loss: 6667.395996, avg loss: 3.386184, ppl: 29.552961 +epoch: 0, batch: 15873, sum loss: 6200.322266, avg loss: 3.584001, ppl: 36.017368 +epoch: 0, batch: 15874, sum loss: 4529.897461, avg loss: 3.187824, ppl: 24.235622 +epoch: 0, batch: 15875, sum loss: 5707.372559, avg loss: 3.507912, ppl: 33.378490 +epoch: 0, batch: 15876, sum loss: 5996.661621, avg loss: 3.544126, ppl: 34.609432 +epoch: 0, batch: 15877, sum loss: 6106.799316, avg loss: 3.505625, ppl: 33.302261 +epoch: 0, batch: 15878, sum loss: 6644.888184, avg loss: 3.655054, ppl: 38.669613 +epoch: 0, batch: 15879, sum loss: 6049.640137, avg loss: 3.748228, ppl: 42.445805 +epoch: 0, batch: 15880, sum loss: 4444.243652, avg loss: 3.284733, ppl: 26.701849 +epoch: 0, batch: 15881, sum loss: 5425.983398, avg loss: 3.318644, ppl: 27.622877 +epoch: 0, batch: 15882, sum loss: 5907.053223, avg loss: 3.363926, ppl: 28.902431 +epoch: 0, batch: 15883, sum loss: 5291.094238, avg loss: 3.336125, ppl: 28.109985 +epoch: 0, batch: 15884, sum loss: 6094.883789, avg loss: 3.734610, ppl: 41.871704 +epoch: 0, batch: 15885, sum loss: 4444.240234, avg loss: 2.945156, ppl: 19.013630 +epoch: 0, batch: 15886, sum loss: 5883.006348, avg loss: 3.377156, ppl: 29.287367 +epoch: 0, batch: 15887, sum loss: 5745.659180, avg loss: 3.395780, ppl: 29.837906 +epoch: 0, batch: 15888, sum loss: 6726.302734, avg loss: 3.476125, ppl: 32.334190 +epoch: 0, batch: 15889, sum loss: 5987.905762, avg loss: 3.217574, ppl: 24.967485 +epoch: 0, batch: 15890, sum loss: 5506.330078, avg loss: 3.373977, ppl: 29.194401 +epoch: 0, batch: 15891, sum loss: 5810.575195, avg loss: 3.615791, ppl: 37.180729 +epoch: 0, batch: 15892, sum loss: 5677.021484, avg loss: 3.335500, ppl: 28.092432 +epoch: 0, batch: 15893, sum loss: 5791.545410, avg loss: 3.187422, ppl: 24.225887 +epoch: 0, batch: 15894, sum loss: 6601.593262, avg loss: 3.361300, ppl: 28.826649 +epoch: 0, batch: 15895, sum loss: 4875.999023, avg loss: 3.101781, ppl: 22.237513 +epoch: 0, batch: 15896, sum loss: 4965.728516, avg loss: 3.256215, ppl: 25.951134 +epoch: 0, batch: 15897, sum loss: 5274.387207, avg loss: 3.128344, ppl: 22.836123 +epoch: 0, batch: 15898, sum loss: 5323.401855, avg loss: 3.335465, ppl: 28.091434 +epoch: 0, batch: 15899, sum loss: 6797.370117, avg loss: 3.547688, ppl: 34.732922 +epoch: 0, batch: 15900, sum loss: 5160.317383, avg loss: 3.276392, ppl: 26.480053 +epoch: 0, batch: 15901, sum loss: 5481.428223, avg loss: 3.471455, ppl: 32.183548 +epoch: 0, batch: 15902, sum loss: 5317.835938, avg loss: 3.426441, ppl: 30.766947 +epoch: 0, batch: 15903, sum loss: 6223.157227, avg loss: 3.637146, ppl: 37.983295 +epoch: 0, batch: 15904, sum loss: 5460.473633, avg loss: 3.520615, ppl: 33.805199 +epoch: 0, batch: 15905, sum loss: 5464.226562, avg loss: 3.333878, ppl: 28.046906 +epoch: 0, batch: 15906, sum loss: 6386.096680, avg loss: 3.451944, ppl: 31.561699 +epoch: 0, batch: 15907, sum loss: 6392.354980, avg loss: 3.736035, ppl: 41.931385 +epoch: 0, batch: 15908, sum loss: 4311.397461, avg loss: 2.926950, ppl: 18.670603 +epoch: 0, batch: 15909, sum loss: 6191.508789, avg loss: 3.608105, ppl: 36.896084 +epoch: 0, batch: 15910, sum loss: 5704.845215, avg loss: 3.474327, ppl: 32.276112 +epoch: 0, batch: 15911, sum loss: 6214.715820, avg loss: 3.412804, ppl: 30.350224 +epoch: 0, batch: 15912, sum loss: 5395.124512, avg loss: 3.169873, ppl: 23.804466 +epoch: 0, batch: 15913, sum loss: 5662.410645, avg loss: 3.392697, ppl: 29.746065 +epoch: 0, batch: 15914, sum loss: 4833.841797, avg loss: 2.934937, ppl: 18.820322 +epoch: 0, batch: 15915, sum loss: 6106.524414, avg loss: 3.347875, ppl: 28.442240 +epoch: 0, batch: 15916, sum loss: 5926.406250, avg loss: 3.604870, ppl: 36.776897 +epoch: 0, batch: 15917, sum loss: 4986.650879, avg loss: 3.248632, ppl: 25.755095 +epoch: 0, batch: 15918, sum loss: 4810.957520, avg loss: 3.272760, ppl: 26.384064 +epoch: 0, batch: 15919, sum loss: 6139.246582, avg loss: 3.369510, ppl: 29.064274 +epoch: 0, batch: 15920, sum loss: 4479.706543, avg loss: 3.041213, ppl: 20.930618 +epoch: 0, batch: 15921, sum loss: 5261.982910, avg loss: 3.351582, ppl: 28.547848 +epoch: 0, batch: 15922, sum loss: 6298.461426, avg loss: 3.619805, ppl: 37.330299 +epoch: 0, batch: 15923, sum loss: 5610.675781, avg loss: 3.265818, ppl: 26.201544 +epoch: 0, batch: 15924, sum loss: 5318.555664, avg loss: 3.291185, ppl: 26.874701 +epoch: 0, batch: 15925, sum loss: 5347.046875, avg loss: 3.343994, ppl: 28.332064 +epoch: 0, batch: 15926, sum loss: 6498.833984, avg loss: 3.362046, ppl: 28.848146 +epoch: 0, batch: 15927, sum loss: 5304.157715, avg loss: 3.411034, ppl: 30.296558 +epoch: 0, batch: 15928, sum loss: 5207.493164, avg loss: 3.437289, ppl: 31.102531 +epoch: 0, batch: 15929, sum loss: 6873.733398, avg loss: 3.636896, ppl: 37.973789 +epoch: 0, batch: 15930, sum loss: 6673.096680, avg loss: 3.609030, ppl: 36.930214 +epoch: 0, batch: 15931, sum loss: 4742.789062, avg loss: 3.368458, ppl: 29.033730 +epoch: 0, batch: 15932, sum loss: 5426.772949, avg loss: 3.370666, ppl: 29.097908 +epoch: 0, batch: 15933, sum loss: 5958.500977, avg loss: 3.302938, ppl: 27.192425 +epoch: 0, batch: 15934, sum loss: 5259.582031, avg loss: 3.160807, ppl: 23.589617 +epoch: 0, batch: 15935, sum loss: 5281.158203, avg loss: 3.411601, ppl: 30.313740 +epoch: 0, batch: 15936, sum loss: 5164.576172, avg loss: 3.321271, ppl: 27.695528 +epoch: 0, batch: 15937, sum loss: 5793.811523, avg loss: 3.374381, ppl: 29.206188 +epoch: 0, batch: 15938, sum loss: 6212.669434, avg loss: 3.400476, ppl: 29.978365 +epoch: 0, batch: 15939, sum loss: 6398.146484, avg loss: 3.694080, ppl: 40.208557 +epoch: 0, batch: 15940, sum loss: 6087.433594, avg loss: 3.530994, ppl: 34.157909 +epoch: 0, batch: 15941, sum loss: 4944.462402, avg loss: 3.046496, ppl: 21.041479 +epoch: 0, batch: 15942, sum loss: 5849.000000, avg loss: 3.380925, ppl: 29.397943 +epoch: 0, batch: 15943, sum loss: 5242.714355, avg loss: 3.568900, ppl: 35.477551 +epoch: 0, batch: 15944, sum loss: 5336.870605, avg loss: 3.382047, ppl: 29.430960 +epoch: 0, batch: 15945, sum loss: 5789.114746, avg loss: 3.321351, ppl: 27.697746 +epoch: 0, batch: 15946, sum loss: 5691.678223, avg loss: 3.383875, ppl: 29.484808 +epoch: 0, batch: 15947, sum loss: 6461.016113, avg loss: 3.438540, ppl: 31.141451 +epoch: 0, batch: 15948, sum loss: 5619.200195, avg loss: 3.409709, ppl: 30.256437 +epoch: 0, batch: 15949, sum loss: 5034.139160, avg loss: 3.279569, ppl: 26.564331 +epoch: 0, batch: 15950, sum loss: 5320.266602, avg loss: 3.346080, ppl: 28.391211 +epoch: 0, batch: 15951, sum loss: 4981.382324, avg loss: 3.285872, ppl: 26.732298 +epoch: 0, batch: 15952, sum loss: 5296.246094, avg loss: 3.360562, ppl: 28.805378 +epoch: 0, batch: 15953, sum loss: 5939.880371, avg loss: 3.323940, ppl: 27.769543 +epoch: 0, batch: 15954, sum loss: 5454.872070, avg loss: 3.239235, ppl: 25.514206 +epoch: 0, batch: 15955, sum loss: 6532.023438, avg loss: 3.557747, ppl: 35.084061 +epoch: 0, batch: 15956, sum loss: 7763.462891, avg loss: 3.665469, ppl: 39.074448 +epoch: 0, batch: 15957, sum loss: 6141.737305, avg loss: 3.383878, ppl: 29.484879 +epoch: 0, batch: 15958, sum loss: 5763.012695, avg loss: 3.572854, ppl: 35.618088 +epoch: 0, batch: 15959, sum loss: 6592.574219, avg loss: 3.415842, ppl: 30.442566 +epoch: 0, batch: 15960, sum loss: 5909.804199, avg loss: 3.310815, ppl: 27.407450 +epoch: 0, batch: 15961, sum loss: 5987.796387, avg loss: 3.241904, ppl: 25.582384 +epoch: 0, batch: 15962, sum loss: 6391.987793, avg loss: 3.545196, ppl: 34.646461 +epoch: 0, batch: 15963, sum loss: 4981.008789, avg loss: 3.318460, ppl: 27.617792 +epoch: 0, batch: 15964, sum loss: 5220.635254, avg loss: 3.289625, ppl: 26.832809 +epoch: 0, batch: 15965, sum loss: 6226.174316, avg loss: 3.493925, ppl: 32.914890 +epoch: 0, batch: 15966, sum loss: 5147.552734, avg loss: 3.346913, ppl: 28.414890 +epoch: 0, batch: 15967, sum loss: 5389.611328, avg loss: 3.437252, ppl: 31.101376 +epoch: 0, batch: 15968, sum loss: 5918.659668, avg loss: 3.271785, ppl: 26.358360 +epoch: 0, batch: 15969, sum loss: 6631.041016, avg loss: 3.527149, ppl: 34.026833 +epoch: 0, batch: 15970, sum loss: 5357.904297, avg loss: 3.311436, ppl: 27.424490 +epoch: 0, batch: 15971, sum loss: 6581.287598, avg loss: 3.648164, ppl: 38.404083 +epoch: 0, batch: 15972, sum loss: 4881.446777, avg loss: 3.217829, ppl: 24.973843 +epoch: 0, batch: 15973, sum loss: 4160.398926, avg loss: 3.154207, ppl: 23.434446 +epoch: 0, batch: 15974, sum loss: 6612.309082, avg loss: 3.589745, ppl: 36.224823 +epoch: 0, batch: 15975, sum loss: 7068.179199, avg loss: 3.647151, ppl: 38.365227 +epoch: 0, batch: 15976, sum loss: 6616.156738, avg loss: 3.511761, ppl: 33.507214 +epoch: 0, batch: 15977, sum loss: 5274.950195, avg loss: 3.321757, ppl: 27.709002 +epoch: 0, batch: 15978, sum loss: 4983.431152, avg loss: 3.210974, ppl: 24.803232 +epoch: 0, batch: 15979, sum loss: 4230.041016, avg loss: 2.993659, ppl: 19.958588 +epoch: 0, batch: 15980, sum loss: 6418.005859, avg loss: 3.524440, ppl: 33.934776 +epoch: 0, batch: 15981, sum loss: 5479.420898, avg loss: 3.324891, ppl: 27.795979 +epoch: 0, batch: 15982, sum loss: 6714.814941, avg loss: 3.673312, ppl: 39.382141 +epoch: 0, batch: 15983, sum loss: 5374.416016, avg loss: 3.414495, ppl: 30.401608 +epoch: 0, batch: 15984, sum loss: 6071.472168, avg loss: 3.631263, ppl: 37.760498 +epoch: 0, batch: 15985, sum loss: 5983.478516, avg loss: 3.382407, ppl: 29.441565 +epoch: 0, batch: 15986, sum loss: 6014.138184, avg loss: 3.586248, ppl: 36.098385 +epoch: 0, batch: 15987, sum loss: 5867.534180, avg loss: 3.419309, ppl: 30.548304 +epoch: 0, batch: 15988, sum loss: 4961.153809, avg loss: 3.274689, ppl: 26.435009 +epoch: 0, batch: 15989, sum loss: 5432.187012, avg loss: 3.536580, ppl: 34.349247 +epoch: 0, batch: 15990, sum loss: 6718.588379, avg loss: 3.653392, ppl: 38.605408 +epoch: 0, batch: 15991, sum loss: 6095.773438, avg loss: 3.566866, ppl: 35.405449 +epoch: 0, batch: 15992, sum loss: 7307.457520, avg loss: 3.745493, ppl: 42.329887 +epoch: 0, batch: 15993, sum loss: 5189.194336, avg loss: 3.284300, ppl: 26.690304 +epoch: 0, batch: 15994, sum loss: 5145.885742, avg loss: 3.094339, ppl: 22.072641 +epoch: 0, batch: 15995, sum loss: 5336.978516, avg loss: 3.184355, ppl: 24.151701 +epoch: 0, batch: 15996, sum loss: 6032.856445, avg loss: 3.586716, ppl: 36.115276 +epoch: 0, batch: 15997, sum loss: 6193.465332, avg loss: 3.463907, ppl: 31.941521 +epoch: 0, batch: 15998, sum loss: 4689.107910, avg loss: 3.070798, ppl: 21.559093 +epoch: 0, batch: 15999, sum loss: 5628.966797, avg loss: 3.426029, ppl: 30.754288 +epoch: 0, batch: 16000, sum loss: 5309.629883, avg loss: 3.243513, ppl: 25.623569 +epoch: 0, batch: 16001, sum loss: 5685.456055, avg loss: 3.263752, ppl: 26.147459 +epoch: 0, batch: 16002, sum loss: 5542.344238, avg loss: 3.496747, ppl: 33.007904 +epoch: 0, batch: 16003, sum loss: 4243.814453, avg loss: 3.066340, ppl: 21.463203 +epoch: 0, batch: 16004, sum loss: 4634.904297, avg loss: 3.104424, ppl: 22.296362 +epoch: 0, batch: 16005, sum loss: 6215.149902, avg loss: 3.267692, ppl: 26.250685 +epoch: 0, batch: 16006, sum loss: 6674.426270, avg loss: 3.592264, ppl: 36.316193 +epoch: 0, batch: 16007, sum loss: 4941.661621, avg loss: 3.489874, ppl: 32.781822 +epoch: 0, batch: 16008, sum loss: 6258.721680, avg loss: 3.427559, ppl: 30.801353 +epoch: 0, batch: 16009, sum loss: 5215.820801, avg loss: 3.227612, ppl: 25.219357 +epoch: 0, batch: 16010, sum loss: 5148.058105, avg loss: 3.622842, ppl: 37.443817 +epoch: 0, batch: 16011, sum loss: 6311.716797, avg loss: 3.415431, ppl: 30.430069 +epoch: 0, batch: 16012, sum loss: 5274.875488, avg loss: 3.130490, ppl: 22.885187 +epoch: 0, batch: 16013, sum loss: 5225.796875, avg loss: 3.180643, ppl: 24.062223 +epoch: 0, batch: 16014, sum loss: 5981.750488, avg loss: 3.569064, ppl: 35.483353 +epoch: 0, batch: 16015, sum loss: 5350.020020, avg loss: 3.296377, ppl: 27.014585 +epoch: 0, batch: 16016, sum loss: 5923.110352, avg loss: 3.583249, ppl: 35.990280 +epoch: 0, batch: 16017, sum loss: 6284.274902, avg loss: 3.437787, ppl: 31.118027 +epoch: 0, batch: 16018, sum loss: 5220.910156, avg loss: 3.103989, ppl: 22.286684 +epoch: 0, batch: 16019, sum loss: 6090.876953, avg loss: 3.675846, ppl: 39.482048 +epoch: 0, batch: 16020, sum loss: 5426.984375, avg loss: 3.155223, ppl: 23.458277 +epoch: 0, batch: 16021, sum loss: 6060.370605, avg loss: 3.441437, ppl: 31.231806 +epoch: 0, batch: 16022, sum loss: 6121.687500, avg loss: 3.319787, ppl: 27.654467 +epoch: 0, batch: 16023, sum loss: 6074.286133, avg loss: 3.473005, ppl: 32.233467 +epoch: 0, batch: 16024, sum loss: 6007.339844, avg loss: 3.264858, ppl: 26.176407 +epoch: 0, batch: 16025, sum loss: 5337.383789, avg loss: 3.244610, ppl: 25.651707 +epoch: 0, batch: 16026, sum loss: 6239.682129, avg loss: 3.604669, ppl: 36.769505 +epoch: 0, batch: 16027, sum loss: 6799.210449, avg loss: 3.524733, ppl: 33.944710 +epoch: 0, batch: 16028, sum loss: 5537.331543, avg loss: 3.577088, ppl: 35.769215 +epoch: 0, batch: 16029, sum loss: 6861.583496, avg loss: 3.586818, ppl: 36.118969 +epoch: 0, batch: 16030, sum loss: 5268.361816, avg loss: 3.479764, ppl: 32.452045 +epoch: 0, batch: 16031, sum loss: 5149.888672, avg loss: 3.200677, ppl: 24.549137 +epoch: 0, batch: 16032, sum loss: 6539.317871, avg loss: 3.732488, ppl: 41.782921 +epoch: 0, batch: 16033, sum loss: 5400.159668, avg loss: 3.428673, ppl: 30.835682 +epoch: 0, batch: 16034, sum loss: 6237.320312, avg loss: 3.465178, ppl: 31.982153 +epoch: 0, batch: 16035, sum loss: 6148.821289, avg loss: 3.485726, ppl: 32.646130 +epoch: 0, batch: 16036, sum loss: 5089.390625, avg loss: 3.262430, ppl: 26.112913 +epoch: 0, batch: 16037, sum loss: 6436.317871, avg loss: 3.638394, ppl: 38.030697 +epoch: 0, batch: 16038, sum loss: 6473.716797, avg loss: 3.456336, ppl: 31.700605 +epoch: 0, batch: 16039, sum loss: 5784.621094, avg loss: 3.527208, ppl: 34.028828 +epoch: 0, batch: 16040, sum loss: 5755.978516, avg loss: 3.379905, ppl: 29.367987 +epoch: 0, batch: 16041, sum loss: 7091.213379, avg loss: 3.843476, ppl: 46.687481 +epoch: 0, batch: 16042, sum loss: 5026.666992, avg loss: 3.236747, ppl: 25.450785 +epoch: 0, batch: 16043, sum loss: 5676.655273, avg loss: 3.138008, ppl: 23.057882 +epoch: 0, batch: 16044, sum loss: 4674.069824, avg loss: 3.241380, ppl: 25.568981 +epoch: 0, batch: 16045, sum loss: 7139.532227, avg loss: 3.759627, ppl: 42.932423 +epoch: 0, batch: 16046, sum loss: 5937.177734, avg loss: 3.339245, ppl: 28.197838 +epoch: 0, batch: 16047, sum loss: 5699.941406, avg loss: 3.425446, ppl: 30.736343 +epoch: 0, batch: 16048, sum loss: 5983.018066, avg loss: 3.597726, ppl: 36.515099 +epoch: 0, batch: 16049, sum loss: 7260.618164, avg loss: 3.581953, ppl: 35.943653 +epoch: 0, batch: 16050, sum loss: 5339.179688, avg loss: 3.265553, ppl: 26.194599 +epoch: 0, batch: 16051, sum loss: 7717.898438, avg loss: 3.783283, ppl: 43.960136 +epoch: 0, batch: 16052, sum loss: 6979.430664, avg loss: 3.631338, ppl: 37.763298 +epoch: 0, batch: 16053, sum loss: 5775.066406, avg loss: 3.512814, ppl: 33.542526 +epoch: 0, batch: 16054, sum loss: 5817.569336, avg loss: 3.519401, ppl: 33.764183 +epoch: 0, batch: 16055, sum loss: 4883.553711, avg loss: 3.485763, ppl: 32.647324 +epoch: 0, batch: 16056, sum loss: 4813.124512, avg loss: 3.160292, ppl: 23.577477 +epoch: 0, batch: 16057, sum loss: 6030.618164, avg loss: 3.465873, ppl: 32.004372 +epoch: 0, batch: 16058, sum loss: 4959.537598, avg loss: 3.262854, ppl: 26.123985 +epoch: 0, batch: 16059, sum loss: 4629.639160, avg loss: 3.128135, ppl: 22.831352 +epoch: 0, batch: 16060, sum loss: 5759.785156, avg loss: 3.469750, ppl: 32.128716 +epoch: 0, batch: 16061, sum loss: 5494.284180, avg loss: 3.325838, ppl: 27.822306 +epoch: 0, batch: 16062, sum loss: 5899.325195, avg loss: 3.211391, ppl: 24.813583 +epoch: 0, batch: 16063, sum loss: 5280.660156, avg loss: 3.150752, ppl: 23.353621 +epoch: 0, batch: 16064, sum loss: 5423.230957, avg loss: 3.406552, ppl: 30.161072 +epoch: 0, batch: 16065, sum loss: 6020.274902, avg loss: 3.522689, ppl: 33.875385 +epoch: 0, batch: 16066, sum loss: 4774.274414, avg loss: 3.012161, ppl: 20.331280 +epoch: 0, batch: 16067, sum loss: 4555.841797, avg loss: 3.135473, ppl: 22.999506 +epoch: 0, batch: 16068, sum loss: 5642.515625, avg loss: 3.411436, ppl: 30.308746 +epoch: 0, batch: 16069, sum loss: 5346.386719, avg loss: 3.130203, ppl: 22.878618 +epoch: 0, batch: 16070, sum loss: 5897.636230, avg loss: 3.343331, ppl: 28.313292 +epoch: 0, batch: 16071, sum loss: 4031.439453, avg loss: 3.134867, ppl: 22.985588 +epoch: 0, batch: 16072, sum loss: 5676.142090, avg loss: 3.613076, ppl: 37.079933 +epoch: 0, batch: 16073, sum loss: 5321.801270, avg loss: 3.260908, ppl: 26.073193 +epoch: 0, batch: 16074, sum loss: 4500.747070, avg loss: 2.998499, ppl: 20.055414 +epoch: 0, batch: 16075, sum loss: 5284.998535, avg loss: 3.294887, ppl: 26.974367 +epoch: 0, batch: 16076, sum loss: 6157.030762, avg loss: 3.426283, ppl: 30.762098 +epoch: 0, batch: 16077, sum loss: 5341.288086, avg loss: 3.229316, ppl: 25.262365 +epoch: 0, batch: 16078, sum loss: 6016.274414, avg loss: 3.395189, ppl: 29.820284 +epoch: 0, batch: 16079, sum loss: 6319.433594, avg loss: 3.633947, ppl: 37.861958 +epoch: 0, batch: 16080, sum loss: 5342.815918, avg loss: 3.345533, ppl: 28.375692 +epoch: 0, batch: 16081, sum loss: 5293.296875, avg loss: 3.457412, ppl: 31.734726 +epoch: 0, batch: 16082, sum loss: 5300.809082, avg loss: 3.254026, ppl: 25.894392 +epoch: 0, batch: 16083, sum loss: 6642.256836, avg loss: 3.383728, ppl: 29.480457 +epoch: 0, batch: 16084, sum loss: 5756.846191, avg loss: 3.432824, ppl: 30.963966 +epoch: 0, batch: 16085, sum loss: 6854.513672, avg loss: 3.540555, ppl: 34.486038 +epoch: 0, batch: 16086, sum loss: 5182.510254, avg loss: 3.374030, ppl: 29.195953 +epoch: 0, batch: 16087, sum loss: 5949.498047, avg loss: 3.450985, ppl: 31.531441 +epoch: 0, batch: 16088, sum loss: 6318.859375, avg loss: 3.421148, ppl: 30.604523 +epoch: 0, batch: 16089, sum loss: 6420.393066, avg loss: 3.451824, ppl: 31.557907 +epoch: 0, batch: 16090, sum loss: 6496.985840, avg loss: 3.517589, ppl: 33.703060 +epoch: 0, batch: 16091, sum loss: 6196.649414, avg loss: 3.456023, ppl: 31.690699 +epoch: 0, batch: 16092, sum loss: 6496.634766, avg loss: 3.561752, ppl: 35.224842 +epoch: 0, batch: 16093, sum loss: 5099.455078, avg loss: 3.223423, ppl: 25.113932 +epoch: 0, batch: 16094, sum loss: 4620.174316, avg loss: 2.791646, ppl: 16.307844 +epoch: 0, batch: 16095, sum loss: 4889.003418, avg loss: 3.114015, ppl: 22.511242 +epoch: 0, batch: 16096, sum loss: 6478.746094, avg loss: 3.546112, ppl: 34.678219 +epoch: 0, batch: 16097, sum loss: 6236.613281, avg loss: 3.507657, ppl: 33.369976 +epoch: 0, batch: 16098, sum loss: 6699.283691, avg loss: 3.483767, ppl: 32.582222 +epoch: 0, batch: 16099, sum loss: 5629.350586, avg loss: 3.417942, ppl: 30.506569 +epoch: 0, batch: 16100, sum loss: 5991.500977, avg loss: 3.507904, ppl: 33.378242 +epoch: 0, batch: 16101, sum loss: 5315.557617, avg loss: 3.154634, ppl: 23.444443 +epoch: 0, batch: 16102, sum loss: 6878.229492, avg loss: 3.589890, ppl: 36.230099 +epoch: 0, batch: 16103, sum loss: 5688.358398, avg loss: 3.375880, ppl: 29.250019 +epoch: 0, batch: 16104, sum loss: 5353.931641, avg loss: 3.401481, ppl: 30.008514 +epoch: 0, batch: 16105, sum loss: 4856.047363, avg loss: 3.233054, ppl: 25.356977 +epoch: 0, batch: 16106, sum loss: 5424.637207, avg loss: 3.381943, ppl: 29.427908 +epoch: 0, batch: 16107, sum loss: 4602.341797, avg loss: 3.335030, ppl: 28.079235 +epoch: 0, batch: 16108, sum loss: 5877.103027, avg loss: 3.542557, ppl: 34.555180 +epoch: 0, batch: 16109, sum loss: 6211.259277, avg loss: 3.604910, ppl: 36.778362 +epoch: 0, batch: 16110, sum loss: 6550.143555, avg loss: 3.538705, ppl: 34.422325 +epoch: 0, batch: 16111, sum loss: 5716.748047, avg loss: 3.264848, ppl: 26.176125 +epoch: 0, batch: 16112, sum loss: 6176.337891, avg loss: 3.609783, ppl: 36.958019 +epoch: 0, batch: 16113, sum loss: 6357.391602, avg loss: 3.256861, ppl: 25.967890 +epoch: 0, batch: 16114, sum loss: 5107.304199, avg loss: 3.617071, ppl: 37.228363 +epoch: 0, batch: 16115, sum loss: 5926.280273, avg loss: 3.685498, ppl: 39.864952 +epoch: 0, batch: 16116, sum loss: 6052.681152, avg loss: 3.562496, ppl: 35.251087 +epoch: 0, batch: 16117, sum loss: 4574.992188, avg loss: 3.093301, ppl: 22.049740 +epoch: 0, batch: 16118, sum loss: 5936.670410, avg loss: 3.469708, ppl: 32.127365 +epoch: 0, batch: 16119, sum loss: 5202.476074, avg loss: 2.995093, ppl: 19.987215 +epoch: 0, batch: 16120, sum loss: 5782.154785, avg loss: 3.512852, ppl: 33.543812 +epoch: 0, batch: 16121, sum loss: 5593.856934, avg loss: 3.160371, ppl: 23.579350 +epoch: 0, batch: 16122, sum loss: 5255.070312, avg loss: 3.370796, ppl: 29.101675 +epoch: 0, batch: 16123, sum loss: 5865.367188, avg loss: 3.347812, ppl: 28.440449 +epoch: 0, batch: 16124, sum loss: 5134.643066, avg loss: 3.509667, ppl: 33.437145 +epoch: 0, batch: 16125, sum loss: 5983.935059, avg loss: 3.350468, ppl: 28.516068 +epoch: 0, batch: 16126, sum loss: 5477.595703, avg loss: 3.385412, ppl: 29.530142 +epoch: 0, batch: 16127, sum loss: 6168.048340, avg loss: 3.405880, ppl: 30.140808 +epoch: 0, batch: 16128, sum loss: 6761.915039, avg loss: 3.670964, ppl: 39.289753 +epoch: 0, batch: 16129, sum loss: 6103.555664, avg loss: 3.450286, ppl: 31.509399 +epoch: 0, batch: 16130, sum loss: 4865.351074, avg loss: 3.325599, ppl: 27.815655 +epoch: 0, batch: 16131, sum loss: 6206.725586, avg loss: 3.602278, ppl: 36.681717 +epoch: 0, batch: 16132, sum loss: 6012.923340, avg loss: 3.514275, ppl: 33.591553 +epoch: 0, batch: 16133, sum loss: 6794.119141, avg loss: 3.302926, ppl: 27.192089 +epoch: 0, batch: 16134, sum loss: 5654.561035, avg loss: 3.285625, ppl: 26.725689 +epoch: 0, batch: 16135, sum loss: 6927.034180, avg loss: 3.602202, ppl: 36.678902 +epoch: 0, batch: 16136, sum loss: 5482.930664, avg loss: 3.474607, ppl: 32.285152 +epoch: 0, batch: 16137, sum loss: 6511.365723, avg loss: 3.450644, ppl: 31.520693 +epoch: 0, batch: 16138, sum loss: 5434.521484, avg loss: 3.244491, ppl: 25.648642 +epoch: 0, batch: 16139, sum loss: 5358.317383, avg loss: 3.241571, ppl: 25.573872 +epoch: 0, batch: 16140, sum loss: 7168.822266, avg loss: 3.777040, ppl: 43.686558 +epoch: 0, batch: 16141, sum loss: 6675.383789, avg loss: 3.775670, ppl: 43.626720 +epoch: 0, batch: 16142, sum loss: 6583.434082, avg loss: 3.345241, ppl: 28.367413 +epoch: 0, batch: 16143, sum loss: 5538.195801, avg loss: 3.364639, ppl: 28.923056 +epoch: 0, batch: 16144, sum loss: 5738.303711, avg loss: 3.618098, ppl: 37.266628 +epoch: 0, batch: 16145, sum loss: 6871.145996, avg loss: 3.606901, ppl: 36.851662 +epoch: 0, batch: 16146, sum loss: 5617.543945, avg loss: 3.245259, ppl: 25.668365 +epoch: 0, batch: 16147, sum loss: 5349.466309, avg loss: 3.383596, ppl: 29.476591 +epoch: 0, batch: 16148, sum loss: 5623.860352, avg loss: 3.367581, ppl: 29.008274 +epoch: 0, batch: 16149, sum loss: 6496.224609, avg loss: 3.522898, ppl: 33.882492 +epoch: 0, batch: 16150, sum loss: 5364.042969, avg loss: 3.451765, ppl: 31.556042 +epoch: 0, batch: 16151, sum loss: 7037.953613, avg loss: 3.731683, ppl: 41.749313 +epoch: 0, batch: 16152, sum loss: 5246.491699, avg loss: 3.345977, ppl: 28.388292 +epoch: 0, batch: 16153, sum loss: 5707.895020, avg loss: 3.371468, ppl: 29.121248 +epoch: 0, batch: 16154, sum loss: 6327.461426, avg loss: 3.532921, ppl: 34.223785 +epoch: 0, batch: 16155, sum loss: 5910.973633, avg loss: 3.350892, ppl: 28.528172 +epoch: 0, batch: 16156, sum loss: 6705.216797, avg loss: 3.521648, ppl: 33.840141 +epoch: 0, batch: 16157, sum loss: 5302.767090, avg loss: 3.267262, ppl: 26.239410 +epoch: 0, batch: 16158, sum loss: 6044.691406, avg loss: 3.248088, ppl: 25.741074 +epoch: 0, batch: 16159, sum loss: 7295.028320, avg loss: 3.723853, ppl: 41.423706 +epoch: 0, batch: 16160, sum loss: 6878.694824, avg loss: 3.645307, ppl: 38.294540 +epoch: 0, batch: 16161, sum loss: 5322.514648, avg loss: 3.073045, ppl: 21.607609 +epoch: 0, batch: 16162, sum loss: 6500.781250, avg loss: 3.534955, ppl: 34.293457 +epoch: 0, batch: 16163, sum loss: 6505.166992, avg loss: 3.429187, ppl: 30.851538 +epoch: 0, batch: 16164, sum loss: 6736.486328, avg loss: 3.419536, ppl: 30.555244 +epoch: 0, batch: 16165, sum loss: 5742.626953, avg loss: 3.457331, ppl: 31.732176 +epoch: 0, batch: 16166, sum loss: 6249.537109, avg loss: 3.470037, ppl: 32.137939 +epoch: 0, batch: 16167, sum loss: 5633.524414, avg loss: 3.357285, ppl: 28.711134 +epoch: 0, batch: 16168, sum loss: 5595.663574, avg loss: 3.543802, ppl: 34.598213 +epoch: 0, batch: 16169, sum loss: 6158.228516, avg loss: 3.242880, ppl: 25.607355 +epoch: 0, batch: 16170, sum loss: 6489.064941, avg loss: 3.501924, ppl: 33.179222 +epoch: 0, batch: 16171, sum loss: 6642.748047, avg loss: 3.518405, ppl: 33.730576 +epoch: 0, batch: 16172, sum loss: 5587.250000, avg loss: 3.455319, ppl: 31.668379 +epoch: 0, batch: 16173, sum loss: 4944.735352, avg loss: 3.078913, ppl: 21.734774 +epoch: 0, batch: 16174, sum loss: 4958.919922, avg loss: 3.355156, ppl: 28.650064 +epoch: 0, batch: 16175, sum loss: 6082.826660, avg loss: 3.204861, ppl: 24.652082 +epoch: 0, batch: 16176, sum loss: 5467.546875, avg loss: 3.391778, ppl: 29.718760 +epoch: 0, batch: 16177, sum loss: 5615.088867, avg loss: 3.451192, ppl: 31.537952 +epoch: 0, batch: 16178, sum loss: 5269.821777, avg loss: 3.419742, ppl: 30.561518 +epoch: 0, batch: 16179, sum loss: 5998.141602, avg loss: 3.459136, ppl: 31.789492 +epoch: 0, batch: 16180, sum loss: 4935.068359, avg loss: 3.053879, ppl: 21.197405 +epoch: 0, batch: 16181, sum loss: 6304.453125, avg loss: 3.365965, ppl: 28.961443 +epoch: 0, batch: 16182, sum loss: 4927.108398, avg loss: 3.250072, ppl: 25.792185 +epoch: 0, batch: 16183, sum loss: 5381.337402, avg loss: 3.554384, ppl: 34.966282 +epoch: 0, batch: 16184, sum loss: 4656.646484, avg loss: 3.102363, ppl: 22.250463 +epoch: 0, batch: 16185, sum loss: 5710.104492, avg loss: 3.441895, ppl: 31.246128 +epoch: 0, batch: 16186, sum loss: 5530.176270, avg loss: 3.297660, ppl: 27.049273 +epoch: 0, batch: 16187, sum loss: 6173.480957, avg loss: 3.275056, ppl: 26.444710 +epoch: 0, batch: 16188, sum loss: 6303.315430, avg loss: 3.446318, ppl: 31.384626 +epoch: 0, batch: 16189, sum loss: 6257.269531, avg loss: 3.557288, ppl: 35.067970 +epoch: 0, batch: 16190, sum loss: 5053.500977, avg loss: 3.136872, ppl: 23.031717 +epoch: 0, batch: 16191, sum loss: 6218.393066, avg loss: 3.714691, ppl: 41.045918 +epoch: 0, batch: 16192, sum loss: 5721.600098, avg loss: 3.591714, ppl: 36.296230 +epoch: 0, batch: 16193, sum loss: 6244.320312, avg loss: 3.594888, ppl: 36.411621 +epoch: 0, batch: 16194, sum loss: 6267.723145, avg loss: 3.644025, ppl: 38.245468 +epoch: 0, batch: 16195, sum loss: 5560.380371, avg loss: 3.384285, ppl: 29.496895 +epoch: 0, batch: 16196, sum loss: 5737.316895, avg loss: 3.429359, ppl: 30.856848 +epoch: 0, batch: 16197, sum loss: 6192.397949, avg loss: 3.358133, ppl: 28.735500 +epoch: 0, batch: 16198, sum loss: 5893.958984, avg loss: 3.416788, ppl: 30.471386 +epoch: 0, batch: 16199, sum loss: 4866.323730, avg loss: 3.231291, ppl: 25.312309 +epoch: 0, batch: 16200, sum loss: 5146.623047, avg loss: 3.174968, ppl: 23.926048 +epoch: 0, batch: 16201, sum loss: 4834.148926, avg loss: 3.320157, ppl: 27.664701 +epoch: 0, batch: 16202, sum loss: 5070.179688, avg loss: 3.268975, ppl: 26.284372 +epoch: 0, batch: 16203, sum loss: 3952.316650, avg loss: 2.851599, ppl: 17.315451 +epoch: 0, batch: 16204, sum loss: 5042.000000, avg loss: 3.102769, ppl: 22.259506 +epoch: 0, batch: 16205, sum loss: 5830.185547, avg loss: 3.413457, ppl: 30.370064 +epoch: 0, batch: 16206, sum loss: 5891.256348, avg loss: 3.467485, ppl: 32.056011 +epoch: 0, batch: 16207, sum loss: 6095.121582, avg loss: 3.455284, ppl: 31.667292 +epoch: 0, batch: 16208, sum loss: 6061.777832, avg loss: 3.336146, ppl: 28.110590 +epoch: 0, batch: 16209, sum loss: 5797.623535, avg loss: 3.632596, ppl: 37.810856 +epoch: 0, batch: 16210, sum loss: 5499.040527, avg loss: 3.390284, ppl: 29.674374 +epoch: 0, batch: 16211, sum loss: 5789.176758, avg loss: 3.313782, ppl: 27.488884 +epoch: 0, batch: 16212, sum loss: 6136.145020, avg loss: 3.472635, ppl: 32.221519 +epoch: 0, batch: 16213, sum loss: 5381.649902, avg loss: 3.324058, ppl: 27.772825 +epoch: 0, batch: 16214, sum loss: 4812.526855, avg loss: 3.309853, ppl: 27.381109 +epoch: 0, batch: 16215, sum loss: 5238.189453, avg loss: 3.392610, ppl: 29.743484 +epoch: 0, batch: 16216, sum loss: 6003.044922, avg loss: 3.316600, ppl: 27.566454 +epoch: 0, batch: 16217, sum loss: 4559.468750, avg loss: 3.367407, ppl: 29.003212 +epoch: 0, batch: 16218, sum loss: 6482.432129, avg loss: 3.546188, ppl: 34.680874 +epoch: 0, batch: 16219, sum loss: 5271.238281, avg loss: 3.620356, ppl: 37.350864 +epoch: 0, batch: 16220, sum loss: 6765.872559, avg loss: 3.750484, ppl: 42.541656 +epoch: 0, batch: 16221, sum loss: 5714.376465, avg loss: 3.347614, ppl: 28.434795 +epoch: 0, batch: 16222, sum loss: 5809.671875, avg loss: 3.429558, ppl: 30.863007 +epoch: 0, batch: 16223, sum loss: 5156.688965, avg loss: 3.399267, ppl: 29.942158 +epoch: 0, batch: 16224, sum loss: 5517.347656, avg loss: 3.437600, ppl: 31.112188 +epoch: 0, batch: 16225, sum loss: 5909.867188, avg loss: 3.431978, ppl: 30.937792 +epoch: 0, batch: 16226, sum loss: 6679.179688, avg loss: 3.426978, ppl: 30.783470 +epoch: 0, batch: 16227, sum loss: 5258.062012, avg loss: 3.550346, ppl: 34.825363 +epoch: 0, batch: 16228, sum loss: 5072.217285, avg loss: 3.276626, ppl: 26.486267 +epoch: 0, batch: 16229, sum loss: 6470.637207, avg loss: 3.557250, ppl: 35.066624 +epoch: 0, batch: 16230, sum loss: 5947.158203, avg loss: 3.593449, ppl: 36.359268 +epoch: 0, batch: 16231, sum loss: 5969.153809, avg loss: 3.428578, ppl: 30.832756 +epoch: 0, batch: 16232, sum loss: 6097.815918, avg loss: 3.376421, ppl: 29.265833 +epoch: 0, batch: 16233, sum loss: 5851.125000, avg loss: 3.301989, ppl: 27.166628 +epoch: 0, batch: 16234, sum loss: 7394.638184, avg loss: 3.753623, ppl: 42.675434 +epoch: 0, batch: 16235, sum loss: 5181.528320, avg loss: 3.210365, ppl: 24.788122 +epoch: 0, batch: 16236, sum loss: 6576.477539, avg loss: 3.522484, ppl: 33.868446 +epoch: 0, batch: 16237, sum loss: 5821.806152, avg loss: 3.412547, ppl: 30.342440 +epoch: 0, batch: 16238, sum loss: 5241.374023, avg loss: 3.298536, ppl: 27.072983 +epoch: 0, batch: 16239, sum loss: 5446.477539, avg loss: 3.263318, ppl: 26.136114 +epoch: 0, batch: 16240, sum loss: 6261.975586, avg loss: 3.361232, ppl: 28.824682 +epoch: 0, batch: 16241, sum loss: 5647.281738, avg loss: 3.533969, ppl: 34.259663 +epoch: 0, batch: 16242, sum loss: 6917.860352, avg loss: 3.511604, ppl: 33.501972 +epoch: 0, batch: 16243, sum loss: 6109.172363, avg loss: 3.614895, ppl: 37.147442 +epoch: 0, batch: 16244, sum loss: 5081.737305, avg loss: 3.308423, ppl: 27.341969 +epoch: 0, batch: 16245, sum loss: 5902.137695, avg loss: 3.266263, ppl: 26.213198 +epoch: 0, batch: 16246, sum loss: 5866.895020, avg loss: 3.322138, ppl: 27.719540 +epoch: 0, batch: 16247, sum loss: 7488.408203, avg loss: 3.595011, ppl: 36.416111 +epoch: 0, batch: 16248, sum loss: 6129.366211, avg loss: 3.248207, ppl: 25.744129 +epoch: 0, batch: 16249, sum loss: 6340.421875, avg loss: 3.472301, ppl: 32.210781 +epoch: 0, batch: 16250, sum loss: 6825.178223, avg loss: 3.447059, ppl: 31.407898 +epoch: 0, batch: 16251, sum loss: 5772.539551, avg loss: 3.253968, ppl: 25.892879 +epoch: 0, batch: 16252, sum loss: 5959.851074, avg loss: 3.348231, ppl: 28.452358 +epoch: 0, batch: 16253, sum loss: 5338.219727, avg loss: 3.378620, ppl: 29.330273 +epoch: 0, batch: 16254, sum loss: 6157.696777, avg loss: 3.341127, ppl: 28.250946 +epoch: 0, batch: 16255, sum loss: 4363.643066, avg loss: 2.994951, ppl: 19.984371 +epoch: 0, batch: 16256, sum loss: 4448.343750, avg loss: 3.239872, ppl: 25.530441 +epoch: 0, batch: 16257, sum loss: 5782.734375, avg loss: 3.454441, ppl: 31.640606 +epoch: 0, batch: 16258, sum loss: 5534.211426, avg loss: 3.189747, ppl: 24.282291 +epoch: 0, batch: 16259, sum loss: 6053.462891, avg loss: 3.431668, ppl: 30.928196 +epoch: 0, batch: 16260, sum loss: 6551.032227, avg loss: 3.617357, ppl: 37.238998 +epoch: 0, batch: 16261, sum loss: 4990.401855, avg loss: 3.420426, ppl: 30.582451 +epoch: 0, batch: 16262, sum loss: 6245.271484, avg loss: 3.240930, ppl: 25.557476 +epoch: 0, batch: 16263, sum loss: 5200.365234, avg loss: 3.170954, ppl: 23.830219 +epoch: 0, batch: 16264, sum loss: 5885.883301, avg loss: 3.516060, ppl: 33.651569 +epoch: 0, batch: 16265, sum loss: 6221.661621, avg loss: 3.646929, ppl: 38.356701 +epoch: 0, batch: 16266, sum loss: 4870.258789, avg loss: 3.268630, ppl: 26.275324 +epoch: 0, batch: 16267, sum loss: 5334.691406, avg loss: 3.502752, ppl: 33.206715 +epoch: 0, batch: 16268, sum loss: 6174.979980, avg loss: 3.442018, ppl: 31.249958 +epoch: 0, batch: 16269, sum loss: 5509.473633, avg loss: 3.271659, ppl: 26.355030 +epoch: 0, batch: 16270, sum loss: 5548.181641, avg loss: 3.154168, ppl: 23.433535 +epoch: 0, batch: 16271, sum loss: 6458.949219, avg loss: 3.657389, ppl: 38.760014 +epoch: 0, batch: 16272, sum loss: 5503.145996, avg loss: 3.301227, ppl: 27.145935 +epoch: 0, batch: 16273, sum loss: 4728.452148, avg loss: 3.068431, ppl: 21.508127 +epoch: 0, batch: 16274, sum loss: 5236.256836, avg loss: 3.406804, ppl: 30.168659 +epoch: 0, batch: 16275, sum loss: 5942.355957, avg loss: 3.430921, ppl: 30.905104 +epoch: 0, batch: 16276, sum loss: 5972.040039, avg loss: 3.395134, ppl: 29.818642 +epoch: 0, batch: 16277, sum loss: 6513.308105, avg loss: 3.466370, ppl: 32.020287 +epoch: 0, batch: 16278, sum loss: 5643.362793, avg loss: 3.395525, ppl: 29.830317 +epoch: 0, batch: 16279, sum loss: 5792.843750, avg loss: 3.327308, ppl: 27.863245 +epoch: 0, batch: 16280, sum loss: 6485.421387, avg loss: 3.668225, ppl: 39.182289 +epoch: 0, batch: 16281, sum loss: 5426.035156, avg loss: 3.500668, ppl: 33.137581 +epoch: 0, batch: 16282, sum loss: 7230.880859, avg loss: 3.549770, ppl: 34.805298 +epoch: 0, batch: 16283, sum loss: 6211.822754, avg loss: 3.320055, ppl: 27.661873 +epoch: 0, batch: 16284, sum loss: 5944.132324, avg loss: 3.513081, ppl: 33.551468 +epoch: 0, batch: 16285, sum loss: 5045.268066, avg loss: 3.299718, ppl: 27.104984 +epoch: 0, batch: 16286, sum loss: 5895.236816, avg loss: 3.284255, ppl: 26.689081 +epoch: 0, batch: 16287, sum loss: 5004.354004, avg loss: 3.349634, ppl: 28.492315 +epoch: 0, batch: 16288, sum loss: 6773.266602, avg loss: 3.551792, ppl: 34.875748 +epoch: 0, batch: 16289, sum loss: 6098.005859, avg loss: 3.533028, ppl: 34.227440 +epoch: 0, batch: 16290, sum loss: 5114.735840, avg loss: 3.200711, ppl: 24.549973 +epoch: 0, batch: 16291, sum loss: 4732.660645, avg loss: 3.130067, ppl: 22.875504 +epoch: 0, batch: 16292, sum loss: 6283.412109, avg loss: 3.414898, ppl: 30.413837 +epoch: 0, batch: 16293, sum loss: 5588.368164, avg loss: 3.382790, ppl: 29.452818 +epoch: 0, batch: 16294, sum loss: 6315.781250, avg loss: 3.710800, ppl: 40.886490 +epoch: 0, batch: 16295, sum loss: 6002.927734, avg loss: 3.592417, ppl: 36.321743 +epoch: 0, batch: 16296, sum loss: 7450.962891, avg loss: 3.536290, ppl: 34.339283 +epoch: 0, batch: 16297, sum loss: 4961.600586, avg loss: 3.257781, ppl: 25.991798 +epoch: 0, batch: 16298, sum loss: 6236.705078, avg loss: 3.453325, ppl: 31.605307 +epoch: 0, batch: 16299, sum loss: 7014.730469, avg loss: 3.923227, ppl: 50.563366 +epoch: 0, batch: 16300, sum loss: 5574.320801, avg loss: 3.652897, ppl: 38.586292 +epoch: 0, batch: 16301, sum loss: 6650.639648, avg loss: 3.640197, ppl: 38.099335 +epoch: 0, batch: 16302, sum loss: 5640.721680, avg loss: 2.976634, ppl: 19.621660 +epoch: 0, batch: 16303, sum loss: 5620.220215, avg loss: 3.230012, ppl: 25.279953 +epoch: 0, batch: 16304, sum loss: 5040.611816, avg loss: 3.419683, ppl: 30.559711 +epoch: 0, batch: 16305, sum loss: 5582.303711, avg loss: 3.375033, ppl: 29.225239 +epoch: 0, batch: 16306, sum loss: 5279.996094, avg loss: 3.496686, ppl: 33.005898 +epoch: 0, batch: 16307, sum loss: 4974.061035, avg loss: 3.152130, ppl: 23.385820 +epoch: 0, batch: 16308, sum loss: 6570.229004, avg loss: 3.941349, ppl: 51.488014 +epoch: 0, batch: 16309, sum loss: 6405.732422, avg loss: 3.696326, ppl: 40.298954 +epoch: 0, batch: 16310, sum loss: 6439.813477, avg loss: 3.646554, ppl: 38.342293 +epoch: 0, batch: 16311, sum loss: 5154.164062, avg loss: 3.195390, ppl: 24.419695 +epoch: 0, batch: 16312, sum loss: 6531.973145, avg loss: 3.365262, ppl: 28.941072 +epoch: 0, batch: 16313, sum loss: 5483.490723, avg loss: 3.223686, ppl: 25.120556 +epoch: 0, batch: 16314, sum loss: 5742.183105, avg loss: 3.505606, ppl: 33.301624 +epoch: 0, batch: 16315, sum loss: 4999.288086, avg loss: 3.172137, ppl: 23.858416 +epoch: 0, batch: 16316, sum loss: 6344.739746, avg loss: 3.435160, ppl: 31.036375 +epoch: 0, batch: 16317, sum loss: 6065.796387, avg loss: 3.566018, ppl: 35.375443 +epoch: 0, batch: 16318, sum loss: 6497.692383, avg loss: 3.546775, ppl: 34.701237 +epoch: 0, batch: 16319, sum loss: 6162.626953, avg loss: 3.485649, ppl: 32.643600 +epoch: 0, batch: 16320, sum loss: 5595.297363, avg loss: 3.264467, ppl: 26.166174 +epoch: 0, batch: 16321, sum loss: 6187.671875, avg loss: 3.578758, ppl: 35.829006 +epoch: 0, batch: 16322, sum loss: 5918.956055, avg loss: 3.537930, ppl: 34.395630 +epoch: 0, batch: 16323, sum loss: 6242.086914, avg loss: 3.485252, ppl: 32.630653 +epoch: 0, batch: 16324, sum loss: 6064.693848, avg loss: 3.548680, ppl: 34.767387 +epoch: 0, batch: 16325, sum loss: 6221.572266, avg loss: 3.409081, ppl: 30.237436 +epoch: 0, batch: 16326, sum loss: 5401.389648, avg loss: 3.325979, ppl: 27.826220 +epoch: 0, batch: 16327, sum loss: 5077.519043, avg loss: 3.327339, ppl: 27.864096 +epoch: 0, batch: 16328, sum loss: 5579.117676, avg loss: 3.338790, ppl: 28.185001 +epoch: 0, batch: 16329, sum loss: 6377.650391, avg loss: 3.705782, ppl: 40.681835 +epoch: 0, batch: 16330, sum loss: 5746.331055, avg loss: 3.230090, ppl: 25.281942 +epoch: 0, batch: 16331, sum loss: 6240.333008, avg loss: 3.721129, ppl: 41.310997 +epoch: 0, batch: 16332, sum loss: 5436.716797, avg loss: 3.680919, ppl: 39.682842 +epoch: 0, batch: 16333, sum loss: 4914.070312, avg loss: 3.230816, ppl: 25.300285 +epoch: 0, batch: 16334, sum loss: 6632.931152, avg loss: 3.652495, ppl: 38.570778 +epoch: 0, batch: 16335, sum loss: 5725.883789, avg loss: 3.325136, ppl: 27.802771 +epoch: 0, batch: 16336, sum loss: 6062.305176, avg loss: 3.347491, ppl: 28.431303 +epoch: 0, batch: 16337, sum loss: 5084.158203, avg loss: 3.209696, ppl: 24.771549 +epoch: 0, batch: 16338, sum loss: 5014.983398, avg loss: 3.166025, ppl: 23.713036 +epoch: 0, batch: 16339, sum loss: 6536.905762, avg loss: 3.374758, ppl: 29.217205 +epoch: 0, batch: 16340, sum loss: 6801.099121, avg loss: 3.358567, ppl: 28.747980 +epoch: 0, batch: 16341, sum loss: 5306.144531, avg loss: 3.373264, ppl: 29.173597 +epoch: 0, batch: 16342, sum loss: 7404.860352, avg loss: 3.737940, ppl: 42.011372 +epoch: 0, batch: 16343, sum loss: 5798.875000, avg loss: 3.449658, ppl: 31.489618 +epoch: 0, batch: 16344, sum loss: 5235.677734, avg loss: 3.125778, ppl: 22.777609 +epoch: 0, batch: 16345, sum loss: 5523.861816, avg loss: 3.333652, ppl: 28.040565 +epoch: 0, batch: 16346, sum loss: 7255.269043, avg loss: 3.705449, ppl: 40.668308 +epoch: 0, batch: 16347, sum loss: 5577.774414, avg loss: 3.147728, ppl: 23.283110 +epoch: 0, batch: 16348, sum loss: 6221.662109, avg loss: 3.503188, ppl: 33.221195 +epoch: 0, batch: 16349, sum loss: 6172.589355, avg loss: 3.262468, ppl: 26.113903 +epoch: 0, batch: 16350, sum loss: 5351.509277, avg loss: 3.399942, ppl: 29.962366 +epoch: 0, batch: 16351, sum loss: 4986.212402, avg loss: 3.258962, ppl: 26.022522 +epoch: 0, batch: 16352, sum loss: 6157.040039, avg loss: 3.407327, ppl: 30.184452 +epoch: 0, batch: 16353, sum loss: 5924.498535, avg loss: 3.381563, ppl: 29.416719 +epoch: 0, batch: 16354, sum loss: 6065.189941, avg loss: 3.493773, ppl: 32.909882 +epoch: 0, batch: 16355, sum loss: 6392.797852, avg loss: 3.699536, ppl: 40.428535 +epoch: 0, batch: 16356, sum loss: 6782.990723, avg loss: 3.596496, ppl: 36.470215 +epoch: 0, batch: 16357, sum loss: 5786.676758, avg loss: 3.366304, ppl: 28.971256 +epoch: 0, batch: 16358, sum loss: 6874.704590, avg loss: 3.529109, ppl: 34.093586 +epoch: 0, batch: 16359, sum loss: 6042.512207, avg loss: 3.366302, ppl: 28.971201 +epoch: 0, batch: 16360, sum loss: 5437.890137, avg loss: 3.443882, ppl: 31.308268 +epoch: 0, batch: 16361, sum loss: 6189.944336, avg loss: 3.596714, ppl: 36.478153 +epoch: 0, batch: 16362, sum loss: 5347.240234, avg loss: 3.358819, ppl: 28.755219 +epoch: 0, batch: 16363, sum loss: 6935.084961, avg loss: 3.692803, ppl: 40.157265 +epoch: 0, batch: 16364, sum loss: 6975.764160, avg loss: 3.665667, ppl: 39.082188 +epoch: 0, batch: 16365, sum loss: 5648.407227, avg loss: 3.497466, ppl: 33.031639 +epoch: 0, batch: 16366, sum loss: 6312.614258, avg loss: 3.601035, ppl: 36.636139 +epoch: 0, batch: 16367, sum loss: 5467.386230, avg loss: 3.469154, ppl: 32.109554 +epoch: 0, batch: 16368, sum loss: 6763.336914, avg loss: 3.383360, ppl: 29.469627 +epoch: 0, batch: 16369, sum loss: 6173.048828, avg loss: 3.416186, ppl: 30.453049 +epoch: 0, batch: 16370, sum loss: 5177.060059, avg loss: 3.221568, ppl: 25.067404 +epoch: 0, batch: 16371, sum loss: 5865.092773, avg loss: 3.514136, ppl: 33.586899 +epoch: 0, batch: 16372, sum loss: 6026.025391, avg loss: 3.383507, ppl: 29.473949 +epoch: 0, batch: 16373, sum loss: 5229.738281, avg loss: 3.314156, ppl: 27.499176 +epoch: 0, batch: 16374, sum loss: 6314.387695, avg loss: 3.444838, ppl: 31.338200 +epoch: 0, batch: 16375, sum loss: 5164.193848, avg loss: 3.053929, ppl: 21.198471 +epoch: 0, batch: 16376, sum loss: 5837.011719, avg loss: 3.419456, ppl: 30.552797 +epoch: 0, batch: 16377, sum loss: 6149.538574, avg loss: 3.353074, ppl: 28.590494 +epoch: 0, batch: 16378, sum loss: 5085.696289, avg loss: 3.341456, ppl: 28.260256 +epoch: 0, batch: 16379, sum loss: 5674.478516, avg loss: 3.485552, ppl: 32.640434 +epoch: 0, batch: 16380, sum loss: 5462.317383, avg loss: 3.216913, ppl: 24.950966 +epoch: 0, batch: 16381, sum loss: 5918.980957, avg loss: 3.554944, ppl: 34.985844 +epoch: 0, batch: 16382, sum loss: 5714.123535, avg loss: 3.333794, ppl: 28.044552 +epoch: 0, batch: 16383, sum loss: 6253.719727, avg loss: 3.600299, ppl: 36.609192 +epoch: 0, batch: 16384, sum loss: 5047.905273, avg loss: 3.376525, ppl: 29.268890 +epoch: 0, batch: 16385, sum loss: 6350.640137, avg loss: 3.361906, ppl: 28.844110 +epoch: 0, batch: 16386, sum loss: 6511.444336, avg loss: 3.674630, ppl: 39.434071 +epoch: 0, batch: 16387, sum loss: 6697.152832, avg loss: 3.618127, ppl: 37.267696 +epoch: 0, batch: 16388, sum loss: 5900.594727, avg loss: 3.205103, ppl: 24.658043 +epoch: 0, batch: 16389, sum loss: 5577.722168, avg loss: 3.011729, ppl: 20.322508 +epoch: 0, batch: 16390, sum loss: 5706.509766, avg loss: 3.184436, ppl: 24.153669 +epoch: 0, batch: 16391, sum loss: 6050.671875, avg loss: 3.475400, ppl: 32.310757 +epoch: 0, batch: 16392, sum loss: 5554.829102, avg loss: 3.348300, ppl: 28.454306 +epoch: 0, batch: 16393, sum loss: 5072.716309, avg loss: 3.247578, ppl: 25.727943 +epoch: 0, batch: 16394, sum loss: 5808.202637, avg loss: 3.380793, ppl: 29.394073 +epoch: 0, batch: 16395, sum loss: 5459.692871, avg loss: 3.615691, ppl: 37.177017 +epoch: 0, batch: 16396, sum loss: 6580.500000, avg loss: 3.537903, ppl: 34.394730 +epoch: 0, batch: 16397, sum loss: 5676.601562, avg loss: 3.391041, ppl: 29.696838 +epoch: 0, batch: 16398, sum loss: 5096.266602, avg loss: 3.250170, ppl: 25.794724 +epoch: 0, batch: 16399, sum loss: 5601.108398, avg loss: 3.378232, ppl: 29.318882 +epoch: 0, batch: 16400, sum loss: 6318.913086, avg loss: 3.452958, ppl: 31.593712 +epoch: 0, batch: 16401, sum loss: 5706.257812, avg loss: 3.315664, ppl: 27.540676 +epoch: 0, batch: 16402, sum loss: 5903.424316, avg loss: 3.476693, ppl: 32.352551 +epoch: 0, batch: 16403, sum loss: 4981.833984, avg loss: 3.428654, ppl: 30.835110 +epoch: 0, batch: 16404, sum loss: 6043.511719, avg loss: 3.477279, ppl: 32.371532 +epoch: 0, batch: 16405, sum loss: 6670.180176, avg loss: 3.484942, ppl: 32.620548 +epoch: 0, batch: 16406, sum loss: 5878.638184, avg loss: 3.472320, ppl: 32.211395 +epoch: 0, batch: 16407, sum loss: 6767.402344, avg loss: 3.346885, ppl: 28.414091 +epoch: 0, batch: 16408, sum loss: 5998.099121, avg loss: 3.297471, ppl: 27.044146 +epoch: 0, batch: 16409, sum loss: 5927.620117, avg loss: 3.354624, ppl: 28.634830 +epoch: 0, batch: 16410, sum loss: 6227.014160, avg loss: 3.353266, ppl: 28.595963 +epoch: 0, batch: 16411, sum loss: 6305.943359, avg loss: 3.445871, ppl: 31.370592 +epoch: 0, batch: 16412, sum loss: 6337.111816, avg loss: 3.487679, ppl: 32.709923 +epoch: 0, batch: 16413, sum loss: 5578.185547, avg loss: 3.449713, ppl: 31.491352 +epoch: 0, batch: 16414, sum loss: 5942.534668, avg loss: 3.452954, ppl: 31.593592 +epoch: 0, batch: 16415, sum loss: 6161.686523, avg loss: 3.567856, ppl: 35.440514 +epoch: 0, batch: 16416, sum loss: 5654.760742, avg loss: 3.400337, ppl: 29.974207 +epoch: 0, batch: 16417, sum loss: 7199.675293, avg loss: 3.658372, ppl: 38.798126 +epoch: 0, batch: 16418, sum loss: 6345.089355, avg loss: 3.461587, ppl: 31.867516 +epoch: 0, batch: 16419, sum loss: 4945.640137, avg loss: 3.085240, ppl: 21.872719 +epoch: 0, batch: 16420, sum loss: 6118.539551, avg loss: 3.478420, ppl: 32.408463 +epoch: 0, batch: 16421, sum loss: 5711.455566, avg loss: 3.506111, ppl: 33.318455 +epoch: 0, batch: 16422, sum loss: 5856.043945, avg loss: 3.410625, ppl: 30.284180 +epoch: 0, batch: 16423, sum loss: 5426.032227, avg loss: 3.312596, ppl: 27.456305 +epoch: 0, batch: 16424, sum loss: 5762.438477, avg loss: 3.513682, ppl: 33.571648 +epoch: 0, batch: 16425, sum loss: 5460.974609, avg loss: 3.478328, ppl: 32.405487 +epoch: 0, batch: 16426, sum loss: 6364.613281, avg loss: 3.691771, ppl: 40.115841 +epoch: 0, batch: 16427, sum loss: 6038.666016, avg loss: 3.682113, ppl: 39.730270 +epoch: 0, batch: 16428, sum loss: 5906.558105, avg loss: 3.541102, ppl: 34.504929 +epoch: 0, batch: 16429, sum loss: 5387.429199, avg loss: 3.253278, ppl: 25.875032 +epoch: 0, batch: 16430, sum loss: 4655.465820, avg loss: 3.171298, ppl: 23.838413 +epoch: 0, batch: 16431, sum loss: 4805.375000, avg loss: 3.277882, ppl: 26.519547 +epoch: 0, batch: 16432, sum loss: 4598.225098, avg loss: 3.229091, ppl: 25.256680 +epoch: 0, batch: 16433, sum loss: 6353.281250, avg loss: 3.408413, ppl: 30.217241 +epoch: 0, batch: 16434, sum loss: 6179.524414, avg loss: 3.276524, ppl: 26.483564 +epoch: 0, batch: 16435, sum loss: 4246.730469, avg loss: 2.992763, ppl: 19.940704 +epoch: 0, batch: 16436, sum loss: 5274.485352, avg loss: 3.363830, ppl: 28.899660 +epoch: 0, batch: 16437, sum loss: 5334.284180, avg loss: 3.311163, ppl: 27.417004 +epoch: 0, batch: 16438, sum loss: 5526.440918, avg loss: 3.590930, ppl: 36.267780 +epoch: 0, batch: 16439, sum loss: 4559.857422, avg loss: 3.048033, ppl: 21.073851 +epoch: 0, batch: 16440, sum loss: 5620.254883, avg loss: 3.237474, ppl: 25.469303 +epoch: 0, batch: 16441, sum loss: 5646.541504, avg loss: 3.317592, ppl: 27.593822 +epoch: 0, batch: 16442, sum loss: 6341.657227, avg loss: 3.398530, ppl: 29.920086 +epoch: 0, batch: 16443, sum loss: 5911.673828, avg loss: 3.349390, ppl: 28.485359 +epoch: 0, batch: 16444, sum loss: 6485.380371, avg loss: 3.513207, ppl: 33.555714 +epoch: 0, batch: 16445, sum loss: 5553.353516, avg loss: 3.317416, ppl: 27.588961 +epoch: 0, batch: 16446, sum loss: 6641.697266, avg loss: 3.490119, ppl: 32.789864 +epoch: 0, batch: 16447, sum loss: 5770.187012, avg loss: 3.640496, ppl: 38.110752 +epoch: 0, batch: 16448, sum loss: 6243.090820, avg loss: 3.358306, ppl: 28.740461 +epoch: 0, batch: 16449, sum loss: 5671.452637, avg loss: 3.373856, ppl: 29.190880 +epoch: 0, batch: 16450, sum loss: 5374.190430, avg loss: 3.200828, ppl: 24.552853 +epoch: 0, batch: 16451, sum loss: 6501.382812, avg loss: 3.828847, ppl: 46.009476 +epoch: 0, batch: 16452, sum loss: 4754.696777, avg loss: 3.216980, ppl: 24.952662 +epoch: 0, batch: 16453, sum loss: 6733.290039, avg loss: 3.573933, ppl: 35.656567 +epoch: 0, batch: 16454, sum loss: 6032.174316, avg loss: 3.466767, ppl: 32.033009 +epoch: 0, batch: 16455, sum loss: 5835.274414, avg loss: 3.597580, ppl: 36.509762 +epoch: 0, batch: 16456, sum loss: 5501.301270, avg loss: 3.412718, ppl: 30.347612 +epoch: 0, batch: 16457, sum loss: 5085.358887, avg loss: 3.293627, ppl: 26.940388 +epoch: 0, batch: 16458, sum loss: 5459.438965, avg loss: 3.625126, ppl: 37.529438 +epoch: 0, batch: 16459, sum loss: 5027.875488, avg loss: 3.284047, ppl: 26.683540 +epoch: 0, batch: 16460, sum loss: 6299.977539, avg loss: 3.529399, ppl: 34.103477 +epoch: 0, batch: 16461, sum loss: 6616.427734, avg loss: 3.534417, ppl: 34.275017 +epoch: 0, batch: 16462, sum loss: 5324.731445, avg loss: 3.006624, ppl: 20.219030 +epoch: 0, batch: 16463, sum loss: 5710.065918, avg loss: 3.218752, ppl: 24.996897 +epoch: 0, batch: 16464, sum loss: 6452.282227, avg loss: 3.463383, ppl: 31.924788 +epoch: 0, batch: 16465, sum loss: 5605.607422, avg loss: 3.194078, ppl: 24.387684 +epoch: 0, batch: 16466, sum loss: 5347.854980, avg loss: 3.497616, ppl: 33.036610 +epoch: 0, batch: 16467, sum loss: 5156.808105, avg loss: 3.324828, ppl: 27.794214 +epoch: 0, batch: 16468, sum loss: 5525.266602, avg loss: 3.557802, ppl: 35.085999 +epoch: 0, batch: 16469, sum loss: 5883.337891, avg loss: 3.259467, ppl: 26.035654 +epoch: 0, batch: 16470, sum loss: 6230.667969, avg loss: 3.421564, ppl: 30.617266 +epoch: 0, batch: 16471, sum loss: 5654.423340, avg loss: 3.173077, ppl: 23.880856 +epoch: 0, batch: 16472, sum loss: 6310.966797, avg loss: 3.409491, ppl: 30.249830 +epoch: 0, batch: 16473, sum loss: 5282.049316, avg loss: 3.470466, ppl: 32.151733 +epoch: 0, batch: 16474, sum loss: 6136.214355, avg loss: 3.645998, ppl: 38.320988 +epoch: 0, batch: 16475, sum loss: 5106.061523, avg loss: 3.313473, ppl: 27.480392 +epoch: 0, batch: 16476, sum loss: 5995.743164, avg loss: 3.418325, ppl: 30.518267 +epoch: 0, batch: 16477, sum loss: 5728.173828, avg loss: 3.432100, ppl: 30.941540 +epoch: 0, batch: 16478, sum loss: 6206.821777, avg loss: 3.554881, ppl: 34.983650 +epoch: 0, batch: 16479, sum loss: 5212.141113, avg loss: 3.191758, ppl: 24.331167 +epoch: 0, batch: 16480, sum loss: 5904.731934, avg loss: 3.438982, ppl: 31.155226 +epoch: 0, batch: 16481, sum loss: 4925.756836, avg loss: 3.188192, ppl: 24.244562 +epoch: 0, batch: 16482, sum loss: 6946.755371, avg loss: 3.593769, ppl: 36.370903 +epoch: 0, batch: 16483, sum loss: 5949.393555, avg loss: 3.427070, ppl: 30.786304 +epoch: 0, batch: 16484, sum loss: 5316.905762, avg loss: 3.365130, ppl: 28.937263 +epoch: 0, batch: 16485, sum loss: 5928.094727, avg loss: 3.324787, ppl: 27.793076 +epoch: 0, batch: 16486, sum loss: 5597.694336, avg loss: 3.127203, ppl: 22.810101 +epoch: 0, batch: 16487, sum loss: 8469.382812, avg loss: 3.872603, ppl: 48.067341 +epoch: 0, batch: 16488, sum loss: 4817.635742, avg loss: 3.007263, ppl: 20.231953 +epoch: 0, batch: 16489, sum loss: 5640.729004, avg loss: 3.268093, ppl: 26.261215 +epoch: 0, batch: 16490, sum loss: 5492.193359, avg loss: 3.117023, ppl: 22.579050 +epoch: 0, batch: 16491, sum loss: 5761.891602, avg loss: 3.660668, ppl: 38.887306 +epoch: 0, batch: 16492, sum loss: 5853.815430, avg loss: 3.348865, ppl: 28.470396 +epoch: 0, batch: 16493, sum loss: 5280.848633, avg loss: 3.398230, ppl: 29.911098 +epoch: 0, batch: 16494, sum loss: 5574.016602, avg loss: 3.640769, ppl: 38.121132 +epoch: 0, batch: 16495, sum loss: 5086.746094, avg loss: 3.377654, ppl: 29.301937 +epoch: 0, batch: 16496, sum loss: 5844.379883, avg loss: 3.261373, ppl: 26.085323 +epoch: 0, batch: 16497, sum loss: 5784.048828, avg loss: 3.310846, ppl: 27.408312 +epoch: 0, batch: 16498, sum loss: 6619.427734, avg loss: 3.714606, ppl: 41.042416 +epoch: 0, batch: 16499, sum loss: 6420.605469, avg loss: 3.464979, ppl: 31.975779 +epoch: 0, batch: 16500, sum loss: 5969.102539, avg loss: 3.671035, ppl: 39.292545 +epoch: 0, batch: 16501, sum loss: 5141.643555, avg loss: 3.277020, ppl: 26.496681 +epoch: 0, batch: 16502, sum loss: 6100.494141, avg loss: 3.499997, ppl: 33.115341 +epoch: 0, batch: 16503, sum loss: 6766.196777, avg loss: 3.496742, ppl: 33.007755 +epoch: 0, batch: 16504, sum loss: 5500.636719, avg loss: 3.366363, ppl: 28.972956 +epoch: 0, batch: 16505, sum loss: 6266.821289, avg loss: 3.601621, ppl: 36.657623 +epoch: 0, batch: 16506, sum loss: 5866.937500, avg loss: 3.405071, ppl: 30.116434 +epoch: 0, batch: 16507, sum loss: 6240.364746, avg loss: 3.367709, ppl: 29.011982 +epoch: 0, batch: 16508, sum loss: 4842.574707, avg loss: 3.102226, ppl: 22.247419 +epoch: 0, batch: 16509, sum loss: 6526.441406, avg loss: 3.607762, ppl: 36.883427 +epoch: 0, batch: 16510, sum loss: 5307.473633, avg loss: 3.278242, ppl: 26.529091 +epoch: 0, batch: 16511, sum loss: 6356.735352, avg loss: 3.324652, ppl: 27.789331 +epoch: 0, batch: 16512, sum loss: 5609.077148, avg loss: 3.439042, ppl: 31.157091 +epoch: 0, batch: 16513, sum loss: 6491.256348, avg loss: 3.527857, ppl: 34.050903 +epoch: 0, batch: 16514, sum loss: 5338.665527, avg loss: 3.194893, ppl: 24.407558 +epoch: 0, batch: 16515, sum loss: 5623.052246, avg loss: 3.422430, ppl: 30.643784 +epoch: 0, batch: 16516, sum loss: 5378.486816, avg loss: 3.161956, ppl: 23.616735 +epoch: 0, batch: 16517, sum loss: 6020.689453, avg loss: 3.384311, ppl: 29.497662 +epoch: 0, batch: 16518, sum loss: 5887.577148, avg loss: 3.318815, ppl: 27.627579 +epoch: 0, batch: 16519, sum loss: 6767.925781, avg loss: 3.430272, ppl: 30.885031 +epoch: 0, batch: 16520, sum loss: 5348.019531, avg loss: 3.443670, ppl: 31.301634 +epoch: 0, batch: 16521, sum loss: 5950.221680, avg loss: 3.481698, ppl: 32.514885 +epoch: 0, batch: 16522, sum loss: 5026.554688, avg loss: 3.445205, ppl: 31.349724 +epoch: 0, batch: 16523, sum loss: 4994.890625, avg loss: 3.155332, ppl: 23.460821 +epoch: 0, batch: 16524, sum loss: 5649.548828, avg loss: 3.294197, ppl: 26.955769 +epoch: 0, batch: 16525, sum loss: 7313.005859, avg loss: 3.687850, ppl: 39.958832 +epoch: 0, batch: 16526, sum loss: 6102.293457, avg loss: 3.338235, ppl: 28.169361 +epoch: 0, batch: 16527, sum loss: 5445.875000, avg loss: 3.561723, ppl: 35.223850 +epoch: 0, batch: 16528, sum loss: 4799.160645, avg loss: 3.318922, ppl: 27.630543 +epoch: 0, batch: 16529, sum loss: 5454.021973, avg loss: 3.532398, ppl: 34.205894 +epoch: 0, batch: 16530, sum loss: 7208.295410, avg loss: 3.490700, ppl: 32.808907 +epoch: 0, batch: 16531, sum loss: 5612.176270, avg loss: 3.644270, ppl: 38.254845 +epoch: 0, batch: 16532, sum loss: 5121.616699, avg loss: 3.327886, ppl: 27.879345 +epoch: 0, batch: 16533, sum loss: 5506.379883, avg loss: 3.333160, ppl: 28.026764 +epoch: 0, batch: 16534, sum loss: 6287.616699, avg loss: 3.491181, ppl: 32.824680 +epoch: 0, batch: 16535, sum loss: 5204.965820, avg loss: 3.294282, ppl: 26.958057 +epoch: 0, batch: 16536, sum loss: 6853.374512, avg loss: 3.666867, ppl: 39.129124 +epoch: 0, batch: 16537, sum loss: 6517.104492, avg loss: 3.318281, ppl: 27.612854 +epoch: 0, batch: 16538, sum loss: 6484.946289, avg loss: 3.616813, ppl: 37.218777 +epoch: 0, batch: 16539, sum loss: 5167.159668, avg loss: 3.141130, ppl: 23.129999 +epoch: 0, batch: 16540, sum loss: 5198.618652, avg loss: 3.377920, ppl: 29.309740 +epoch: 0, batch: 16541, sum loss: 5673.223633, avg loss: 3.403253, ppl: 30.061741 +epoch: 0, batch: 16542, sum loss: 5924.837891, avg loss: 3.407037, ppl: 30.175716 +epoch: 0, batch: 16543, sum loss: 5889.901367, avg loss: 3.495490, ppl: 32.966450 +epoch: 0, batch: 16544, sum loss: 5950.721191, avg loss: 3.814565, ppl: 45.357018 +epoch: 0, batch: 16545, sum loss: 5502.319336, avg loss: 3.584573, ppl: 36.037960 +epoch: 0, batch: 16546, sum loss: 5460.765137, avg loss: 3.248522, ppl: 25.752245 +epoch: 0, batch: 16547, sum loss: 7287.987305, avg loss: 3.536141, ppl: 34.334183 +epoch: 0, batch: 16548, sum loss: 6012.150391, avg loss: 3.400538, ppl: 29.980217 +epoch: 0, batch: 16549, sum loss: 6515.864258, avg loss: 3.766395, ppl: 43.223976 +epoch: 0, batch: 16550, sum loss: 6122.484375, avg loss: 3.401380, ppl: 30.005487 +epoch: 0, batch: 16551, sum loss: 4763.272949, avg loss: 3.057300, ppl: 21.270048 +epoch: 0, batch: 16552, sum loss: 6691.116211, avg loss: 3.477711, ppl: 32.385513 +epoch: 0, batch: 16553, sum loss: 6974.064941, avg loss: 3.784083, ppl: 43.995304 +epoch: 0, batch: 16554, sum loss: 6462.198730, avg loss: 3.690576, ppl: 40.067932 +epoch: 0, batch: 16555, sum loss: 5210.168945, avg loss: 3.295489, ppl: 26.990618 +epoch: 0, batch: 16556, sum loss: 5363.676758, avg loss: 3.217562, ppl: 24.967186 +epoch: 0, batch: 16557, sum loss: 4939.858398, avg loss: 3.267102, ppl: 26.235207 +epoch: 0, batch: 16558, sum loss: 5238.139160, avg loss: 3.110534, ppl: 22.433025 +epoch: 0, batch: 16559, sum loss: 6296.074219, avg loss: 3.549083, ppl: 34.781425 +epoch: 0, batch: 16560, sum loss: 4342.807129, avg loss: 3.064790, ppl: 21.429956 +epoch: 0, batch: 16561, sum loss: 6299.186523, avg loss: 3.511252, ppl: 33.490185 +epoch: 0, batch: 16562, sum loss: 5867.828125, avg loss: 3.549805, ppl: 34.806534 +epoch: 0, batch: 16563, sum loss: 5414.221680, avg loss: 3.446354, ppl: 31.385748 +epoch: 0, batch: 16564, sum loss: 4999.587402, avg loss: 3.269841, ppl: 26.307167 +epoch: 0, batch: 16565, sum loss: 4410.152832, avg loss: 2.912915, ppl: 18.410381 +epoch: 0, batch: 16566, sum loss: 5962.401855, avg loss: 3.383883, ppl: 29.485046 +epoch: 0, batch: 16567, sum loss: 5357.979492, avg loss: 3.159186, ppl: 23.551420 +epoch: 0, batch: 16568, sum loss: 6150.202148, avg loss: 3.313686, ppl: 27.486263 +epoch: 0, batch: 16569, sum loss: 5013.868164, avg loss: 3.247324, ppl: 25.721422 +epoch: 0, batch: 16570, sum loss: 6100.775391, avg loss: 3.400655, ppl: 29.983742 +epoch: 0, batch: 16571, sum loss: 4513.319824, avg loss: 3.261069, ppl: 26.077400 +epoch: 0, batch: 16572, sum loss: 5308.901367, avg loss: 3.315991, ppl: 27.549681 +epoch: 0, batch: 16573, sum loss: 6734.636719, avg loss: 3.614942, ppl: 37.149185 +epoch: 0, batch: 16574, sum loss: 5360.759277, avg loss: 3.221610, ppl: 25.068455 +epoch: 0, batch: 16575, sum loss: 6840.795898, avg loss: 3.441044, ppl: 31.219538 +epoch: 0, batch: 16576, sum loss: 5510.972656, avg loss: 3.385118, ppl: 29.521482 +epoch: 0, batch: 16577, sum loss: 4657.299805, avg loss: 2.938359, ppl: 18.884840 +epoch: 0, batch: 16578, sum loss: 5841.766113, avg loss: 3.376743, ppl: 29.275276 +epoch: 0, batch: 16579, sum loss: 5799.210938, avg loss: 3.391351, ppl: 29.706072 +epoch: 0, batch: 16580, sum loss: 5553.318359, avg loss: 3.176956, ppl: 23.973660 +epoch: 0, batch: 16581, sum loss: 6535.777832, avg loss: 3.548197, ppl: 34.750591 +epoch: 0, batch: 16582, sum loss: 6201.833984, avg loss: 3.574544, ppl: 35.678356 +epoch: 0, batch: 16583, sum loss: 6440.357910, avg loss: 3.538658, ppl: 34.420708 +epoch: 0, batch: 16584, sum loss: 4524.971680, avg loss: 3.045068, ppl: 21.011465 +epoch: 0, batch: 16585, sum loss: 6303.589355, avg loss: 3.503941, ppl: 33.246204 +epoch: 0, batch: 16586, sum loss: 7303.068848, avg loss: 3.611804, ppl: 37.032818 +epoch: 0, batch: 16587, sum loss: 5497.044922, avg loss: 3.293616, ppl: 26.940105 +epoch: 0, batch: 16588, sum loss: 5840.788086, avg loss: 3.441832, ppl: 31.244133 +epoch: 0, batch: 16589, sum loss: 6558.003906, avg loss: 3.364805, ppl: 28.927841 +epoch: 0, batch: 16590, sum loss: 6539.935059, avg loss: 3.942095, ppl: 51.526413 +epoch: 0, batch: 16591, sum loss: 6517.639648, avg loss: 3.432143, ppl: 30.942881 +epoch: 0, batch: 16592, sum loss: 4777.958496, avg loss: 3.228350, ppl: 25.237989 +epoch: 0, batch: 16593, sum loss: 5221.289062, avg loss: 3.203245, ppl: 24.612267 +epoch: 0, batch: 16594, sum loss: 6075.160156, avg loss: 3.362014, ppl: 28.847239 +epoch: 0, batch: 16595, sum loss: 5705.185547, avg loss: 3.404049, ppl: 30.085661 +epoch: 0, batch: 16596, sum loss: 5446.794922, avg loss: 3.351874, ppl: 28.556187 +epoch: 0, batch: 16597, sum loss: 7257.209961, avg loss: 3.835735, ppl: 46.327446 +epoch: 0, batch: 16598, sum loss: 5577.825684, avg loss: 3.310282, ppl: 27.392855 +epoch: 0, batch: 16599, sum loss: 4182.712402, avg loss: 2.974902, ppl: 19.587706 +epoch: 0, batch: 16600, sum loss: 5137.026367, avg loss: 3.169048, ppl: 23.784821 +epoch: 0, batch: 16601, sum loss: 6178.921875, avg loss: 3.434643, ppl: 31.020323 +epoch: 0, batch: 16602, sum loss: 5931.000488, avg loss: 3.389143, ppl: 29.640539 +epoch: 0, batch: 16603, sum loss: 4975.909180, avg loss: 3.094471, ppl: 22.075552 +epoch: 0, batch: 16604, sum loss: 6098.900879, avg loss: 3.411018, ppl: 30.296082 +epoch: 0, batch: 16605, sum loss: 5111.561035, avg loss: 3.088557, ppl: 21.945377 +epoch: 0, batch: 16606, sum loss: 6505.146484, avg loss: 3.694007, ppl: 40.205624 +epoch: 0, batch: 16607, sum loss: 5565.695312, avg loss: 3.377242, ppl: 29.289888 +epoch: 0, batch: 16608, sum loss: 5176.685547, avg loss: 3.235428, ppl: 25.417257 +epoch: 0, batch: 16609, sum loss: 5804.149902, avg loss: 3.286608, ppl: 26.751966 +epoch: 0, batch: 16610, sum loss: 5927.636719, avg loss: 3.373726, ppl: 29.187073 +epoch: 0, batch: 16611, sum loss: 6637.666504, avg loss: 3.376229, ppl: 29.260231 +epoch: 0, batch: 16612, sum loss: 6515.004883, avg loss: 3.529255, ppl: 34.098560 +epoch: 0, batch: 16613, sum loss: 6236.456055, avg loss: 3.545455, ppl: 34.655464 +epoch: 0, batch: 16614, sum loss: 5321.218750, avg loss: 3.482473, ppl: 32.540100 +epoch: 0, batch: 16615, sum loss: 5795.267578, avg loss: 3.686557, ppl: 39.907211 +epoch: 0, batch: 16616, sum loss: 5865.945801, avg loss: 3.379001, ppl: 29.341448 +epoch: 0, batch: 16617, sum loss: 5947.425781, avg loss: 3.412178, ppl: 30.331228 +epoch: 0, batch: 16618, sum loss: 4977.155762, avg loss: 3.162107, ppl: 23.620300 +epoch: 0, batch: 16619, sum loss: 5049.638184, avg loss: 3.228669, ppl: 25.246031 +epoch: 0, batch: 16620, sum loss: 6685.932617, avg loss: 3.875903, ppl: 48.226234 +epoch: 0, batch: 16621, sum loss: 6834.150879, avg loss: 3.757092, ppl: 42.823723 +epoch: 0, batch: 16622, sum loss: 5603.734375, avg loss: 3.463371, ppl: 31.924414 +epoch: 0, batch: 16623, sum loss: 5745.647461, avg loss: 3.292634, ppl: 26.913673 +epoch: 0, batch: 16624, sum loss: 4900.760254, avg loss: 3.188523, ppl: 24.252586 +epoch: 0, batch: 16625, sum loss: 6141.671875, avg loss: 3.410145, ppl: 30.269642 +epoch: 0, batch: 16626, sum loss: 6024.021484, avg loss: 3.450184, ppl: 31.506199 +epoch: 0, batch: 16627, sum loss: 5864.990723, avg loss: 3.526753, ppl: 34.013363 +epoch: 0, batch: 16628, sum loss: 7493.017578, avg loss: 3.707579, ppl: 40.755035 +epoch: 0, batch: 16629, sum loss: 6039.141113, avg loss: 3.668980, ppl: 39.211895 +epoch: 0, batch: 16630, sum loss: 5140.169434, avg loss: 3.399583, ppl: 29.951611 +epoch: 0, batch: 16631, sum loss: 5553.492676, avg loss: 3.247657, ppl: 25.729973 +epoch: 0, batch: 16632, sum loss: 5195.716797, avg loss: 3.473073, ppl: 32.235641 +epoch: 0, batch: 16633, sum loss: 5983.359863, avg loss: 3.548849, ppl: 34.773281 +epoch: 0, batch: 16634, sum loss: 4921.634277, avg loss: 3.237917, ppl: 25.480600 +epoch: 0, batch: 16635, sum loss: 6729.781738, avg loss: 3.659479, ppl: 38.841106 +epoch: 0, batch: 16636, sum loss: 5910.242676, avg loss: 3.436188, ppl: 31.068285 +epoch: 0, batch: 16637, sum loss: 5048.809082, avg loss: 3.080420, ppl: 21.767548 +epoch: 0, batch: 16638, sum loss: 5531.610840, avg loss: 3.290667, ppl: 26.860762 +epoch: 0, batch: 16639, sum loss: 5424.104980, avg loss: 3.198175, ppl: 24.487804 +epoch: 0, batch: 16640, sum loss: 5386.636230, avg loss: 3.302659, ppl: 27.184814 +epoch: 0, batch: 16641, sum loss: 5497.079102, avg loss: 3.177502, ppl: 23.986769 +epoch: 0, batch: 16642, sum loss: 5182.512695, avg loss: 3.294668, ppl: 26.968458 +epoch: 0, batch: 16643, sum loss: 7359.714844, avg loss: 3.711404, ppl: 40.911221 +epoch: 0, batch: 16644, sum loss: 5263.599609, avg loss: 3.241133, ppl: 25.562660 +epoch: 0, batch: 16645, sum loss: 5822.840820, avg loss: 3.217039, ppl: 24.954130 +epoch: 0, batch: 16646, sum loss: 6949.201660, avg loss: 3.626932, ppl: 37.597298 +epoch: 0, batch: 16647, sum loss: 6399.887207, avg loss: 3.476310, ppl: 32.340176 +epoch: 0, batch: 16648, sum loss: 5419.797852, avg loss: 3.349690, ppl: 28.493891 +epoch: 0, batch: 16649, sum loss: 6393.284668, avg loss: 3.706252, ppl: 40.700977 +epoch: 0, batch: 16650, sum loss: 5836.878418, avg loss: 3.266300, ppl: 26.214172 +epoch: 0, batch: 16651, sum loss: 5187.929199, avg loss: 3.279348, ppl: 26.558455 +epoch: 0, batch: 16652, sum loss: 5626.061523, avg loss: 3.220413, ppl: 25.038458 +epoch: 0, batch: 16653, sum loss: 6178.679688, avg loss: 3.304107, ppl: 27.224218 +epoch: 0, batch: 16654, sum loss: 5832.024414, avg loss: 3.193880, ppl: 24.382845 +epoch: 0, batch: 16655, sum loss: 6641.233887, avg loss: 3.540103, ppl: 34.470486 +epoch: 0, batch: 16656, sum loss: 5829.830078, avg loss: 3.511946, ppl: 33.513420 +epoch: 0, batch: 16657, sum loss: 6333.891113, avg loss: 3.524703, ppl: 33.943691 +epoch: 0, batch: 16658, sum loss: 6045.791992, avg loss: 3.338372, ppl: 28.173237 +epoch: 0, batch: 16659, sum loss: 5159.983887, avg loss: 3.536658, ppl: 34.351925 +epoch: 0, batch: 16660, sum loss: 5359.773438, avg loss: 3.254264, ppl: 25.900555 +epoch: 0, batch: 16661, sum loss: 5764.368164, avg loss: 3.525607, ppl: 33.974403 +epoch: 0, batch: 16662, sum loss: 4671.214844, avg loss: 3.241648, ppl: 25.575842 +epoch: 0, batch: 16663, sum loss: 5082.624023, avg loss: 3.352654, ppl: 28.578493 +epoch: 0, batch: 16664, sum loss: 5004.561523, avg loss: 3.218368, ppl: 24.987303 +epoch: 0, batch: 16665, sum loss: 5397.228516, avg loss: 3.562527, ppl: 35.252174 +epoch: 0, batch: 16666, sum loss: 6377.650879, avg loss: 3.707937, ppl: 40.769592 +epoch: 0, batch: 16667, sum loss: 5987.492188, avg loss: 3.468999, ppl: 32.104588 +epoch: 0, batch: 16668, sum loss: 5111.477539, avg loss: 3.400850, ppl: 29.989574 +epoch: 0, batch: 16669, sum loss: 5139.192383, avg loss: 3.260909, ppl: 26.073223 +epoch: 0, batch: 16670, sum loss: 5371.257812, avg loss: 3.336185, ppl: 28.111675 +epoch: 0, batch: 16671, sum loss: 6557.917969, avg loss: 3.412028, ppl: 30.326694 +epoch: 0, batch: 16672, sum loss: 6479.571289, avg loss: 3.636123, ppl: 37.944447 +epoch: 0, batch: 16673, sum loss: 5699.732910, avg loss: 3.562333, ppl: 35.245331 +epoch: 0, batch: 16674, sum loss: 5717.156250, avg loss: 3.293293, ppl: 26.931396 +epoch: 0, batch: 16675, sum loss: 4548.709473, avg loss: 3.258388, ppl: 26.007580 +epoch: 0, batch: 16676, sum loss: 6018.946777, avg loss: 3.453211, ppl: 31.601706 +epoch: 0, batch: 16677, sum loss: 6779.611328, avg loss: 3.729159, ppl: 41.644077 +epoch: 0, batch: 16678, sum loss: 5999.659180, avg loss: 3.327598, ppl: 27.871317 +epoch: 0, batch: 16679, sum loss: 5253.712402, avg loss: 3.335690, ppl: 28.097771 +epoch: 0, batch: 16680, sum loss: 6319.527344, avg loss: 3.451408, ppl: 31.544773 +epoch: 0, batch: 16681, sum loss: 6586.402832, avg loss: 3.527800, ppl: 34.048981 +epoch: 0, batch: 16682, sum loss: 6344.304199, avg loss: 3.614988, ppl: 37.150902 +epoch: 0, batch: 16683, sum loss: 3904.925293, avg loss: 2.962766, ppl: 19.351418 +epoch: 0, batch: 16684, sum loss: 5287.044922, avg loss: 3.430918, ppl: 30.905006 +epoch: 0, batch: 16685, sum loss: 6110.507812, avg loss: 3.509769, ppl: 33.440540 +epoch: 0, batch: 16686, sum loss: 6477.231445, avg loss: 3.537538, ppl: 34.382160 +epoch: 0, batch: 16687, sum loss: 6253.267090, avg loss: 3.363780, ppl: 28.898220 +epoch: 0, batch: 16688, sum loss: 5890.857910, avg loss: 3.401188, ppl: 29.999723 +epoch: 0, batch: 16689, sum loss: 5910.588867, avg loss: 3.219275, ppl: 25.009981 +epoch: 0, batch: 16690, sum loss: 5535.146973, avg loss: 3.530068, ppl: 34.126293 +epoch: 0, batch: 16691, sum loss: 5113.236816, avg loss: 3.133111, ppl: 22.945251 +epoch: 0, batch: 16692, sum loss: 6331.426758, avg loss: 3.511607, ppl: 33.502052 +epoch: 0, batch: 16693, sum loss: 6056.626953, avg loss: 3.527447, ppl: 34.036968 +epoch: 0, batch: 16694, sum loss: 4724.794922, avg loss: 3.301744, ppl: 27.159952 +epoch: 0, batch: 16695, sum loss: 6248.044922, avg loss: 3.617861, ppl: 37.257771 +epoch: 0, batch: 16696, sum loss: 7080.164062, avg loss: 3.722484, ppl: 41.367016 +epoch: 0, batch: 16697, sum loss: 6701.618652, avg loss: 3.417450, ppl: 30.491554 +epoch: 0, batch: 16698, sum loss: 5928.022461, avg loss: 3.362463, ppl: 28.860186 +epoch: 0, batch: 16699, sum loss: 6382.496582, avg loss: 3.824144, ppl: 45.793602 +epoch: 0, batch: 16700, sum loss: 6584.752441, avg loss: 3.682747, ppl: 39.755466 +epoch: 0, batch: 16701, sum loss: 7551.420898, avg loss: 3.616581, ppl: 37.210129 +epoch: 0, batch: 16702, sum loss: 6275.059570, avg loss: 3.521358, ppl: 33.830338 +epoch: 0, batch: 16703, sum loss: 5825.798828, avg loss: 3.591738, ppl: 36.297096 +epoch: 0, batch: 16704, sum loss: 5640.378906, avg loss: 3.273580, ppl: 26.405710 +epoch: 0, batch: 16705, sum loss: 5709.904785, avg loss: 3.251654, ppl: 25.833042 +epoch: 0, batch: 16706, sum loss: 6167.057617, avg loss: 3.437602, ppl: 31.112255 +epoch: 0, batch: 16707, sum loss: 5840.011230, avg loss: 3.431264, ppl: 30.915693 +epoch: 0, batch: 16708, sum loss: 5697.225586, avg loss: 3.436204, ppl: 31.068789 +epoch: 0, batch: 16709, sum loss: 6040.123535, avg loss: 3.374371, ppl: 29.205894 +epoch: 0, batch: 16710, sum loss: 5455.478516, avg loss: 3.435440, ppl: 31.045071 +epoch: 0, batch: 16711, sum loss: 5939.749512, avg loss: 3.363391, ppl: 28.886992 +epoch: 0, batch: 16712, sum loss: 5647.877930, avg loss: 3.441729, ppl: 31.240936 +epoch: 0, batch: 16713, sum loss: 6728.129395, avg loss: 3.932279, ppl: 51.023144 +epoch: 0, batch: 16714, sum loss: 5062.473145, avg loss: 3.255610, ppl: 25.935431 +epoch: 0, batch: 16715, sum loss: 6520.713867, avg loss: 3.536179, ppl: 34.335468 +epoch: 0, batch: 16716, sum loss: 5491.890625, avg loss: 3.346673, ppl: 28.408064 +epoch: 0, batch: 16717, sum loss: 7216.037598, avg loss: 3.518302, ppl: 33.727119 +epoch: 0, batch: 16718, sum loss: 5419.443848, avg loss: 3.042922, ppl: 20.966417 +epoch: 0, batch: 16719, sum loss: 5345.985352, avg loss: 3.396433, ppl: 29.857397 +epoch: 0, batch: 16720, sum loss: 6499.500000, avg loss: 3.496234, ppl: 32.990986 +epoch: 0, batch: 16721, sum loss: 5687.094238, avg loss: 3.365144, ppl: 28.937677 +epoch: 0, batch: 16722, sum loss: 6022.979492, avg loss: 3.487539, ppl: 32.705353 +epoch: 0, batch: 16723, sum loss: 6397.695801, avg loss: 3.756721, ppl: 42.807831 +epoch: 0, batch: 16724, sum loss: 6573.215332, avg loss: 3.333273, ppl: 28.029945 +epoch: 0, batch: 16725, sum loss: 5216.948242, avg loss: 3.461810, ppl: 31.874628 +epoch: 0, batch: 16726, sum loss: 4007.749512, avg loss: 3.126170, ppl: 22.786533 +epoch: 0, batch: 16727, sum loss: 5443.275879, avg loss: 3.414853, ppl: 30.412481 +epoch: 0, batch: 16728, sum loss: 4913.036621, avg loss: 3.080274, ppl: 21.764362 +epoch: 0, batch: 16729, sum loss: 5621.294434, avg loss: 3.190292, ppl: 24.295523 +epoch: 0, batch: 16730, sum loss: 5867.381836, avg loss: 3.488336, ppl: 32.731449 +epoch: 0, batch: 16731, sum loss: 5121.528320, avg loss: 3.126696, ppl: 22.798525 +epoch: 0, batch: 16732, sum loss: 6535.226562, avg loss: 3.557554, ppl: 35.077301 +epoch: 0, batch: 16733, sum loss: 5556.625977, avg loss: 3.550560, ppl: 34.832809 +epoch: 0, batch: 16734, sum loss: 5710.373047, avg loss: 3.484059, ppl: 32.591747 +epoch: 0, batch: 16735, sum loss: 7515.244141, avg loss: 3.618317, ppl: 37.274776 +epoch: 0, batch: 16736, sum loss: 7582.570801, avg loss: 3.574998, ppl: 35.694546 +epoch: 0, batch: 16737, sum loss: 4689.747070, avg loss: 3.302639, ppl: 27.184284 +epoch: 0, batch: 16738, sum loss: 6751.847656, avg loss: 3.469603, ppl: 32.123997 +epoch: 0, batch: 16739, sum loss: 5655.459961, avg loss: 3.328699, ppl: 27.902027 +epoch: 0, batch: 16740, sum loss: 5155.785156, avg loss: 3.361007, ppl: 28.818203 +epoch: 0, batch: 16741, sum loss: 5802.229492, avg loss: 3.363611, ppl: 28.893351 +epoch: 0, batch: 16742, sum loss: 6188.854492, avg loss: 3.177030, ppl: 23.975443 +epoch: 0, batch: 16743, sum loss: 5950.468262, avg loss: 3.477772, ppl: 32.387497 +epoch: 0, batch: 16744, sum loss: 6251.306641, avg loss: 3.410424, ppl: 30.278072 +epoch: 0, batch: 16745, sum loss: 5977.277344, avg loss: 3.491400, ppl: 32.831886 +epoch: 0, batch: 16746, sum loss: 4869.012207, avg loss: 3.351007, ppl: 28.531437 +epoch: 0, batch: 16747, sum loss: 5518.701172, avg loss: 3.352796, ppl: 28.582527 +epoch: 0, batch: 16748, sum loss: 6620.610352, avg loss: 3.508538, ppl: 33.399387 +epoch: 0, batch: 16749, sum loss: 5306.755371, avg loss: 3.425923, ppl: 30.751026 +epoch: 0, batch: 16750, sum loss: 6074.150391, avg loss: 3.564642, ppl: 35.326813 +epoch: 0, batch: 16751, sum loss: 6204.201660, avg loss: 3.241485, ppl: 25.571659 +epoch: 0, batch: 16752, sum loss: 4899.630859, avg loss: 3.043249, ppl: 20.973272 +epoch: 0, batch: 16753, sum loss: 5643.949219, avg loss: 3.624887, ppl: 37.520493 +epoch: 0, batch: 16754, sum loss: 4367.965820, avg loss: 3.151490, ppl: 23.370872 +epoch: 0, batch: 16755, sum loss: 5945.633789, avg loss: 3.240128, ppl: 25.536980 +epoch: 0, batch: 16756, sum loss: 6120.491699, avg loss: 3.577143, ppl: 35.771202 +epoch: 0, batch: 16757, sum loss: 6017.077637, avg loss: 3.420738, ppl: 30.591976 +epoch: 0, batch: 16758, sum loss: 5722.550293, avg loss: 3.378129, ppl: 29.315863 +epoch: 0, batch: 16759, sum loss: 6632.876465, avg loss: 3.766540, ppl: 43.230209 +epoch: 0, batch: 16760, sum loss: 5335.231934, avg loss: 3.383153, ppl: 29.463522 +epoch: 0, batch: 16761, sum loss: 6434.827637, avg loss: 3.584862, ppl: 36.048389 +epoch: 0, batch: 16762, sum loss: 6071.745117, avg loss: 3.479510, ppl: 32.443825 +epoch: 0, batch: 16763, sum loss: 4883.436035, avg loss: 2.943602, ppl: 18.984106 +epoch: 0, batch: 16764, sum loss: 5217.218262, avg loss: 3.568549, ppl: 35.465092 +epoch: 0, batch: 16765, sum loss: 6179.645508, avg loss: 3.467815, ppl: 32.066589 +epoch: 0, batch: 16766, sum loss: 6436.552734, avg loss: 3.376995, ppl: 29.282640 +epoch: 0, batch: 16767, sum loss: 5257.024902, avg loss: 3.189942, ppl: 24.287027 +epoch: 0, batch: 16768, sum loss: 6080.687988, avg loss: 3.464779, ppl: 31.969406 +epoch: 0, batch: 16769, sum loss: 4542.449707, avg loss: 3.050671, ppl: 21.129524 +epoch: 0, batch: 16770, sum loss: 6781.227539, avg loss: 3.511770, ppl: 33.507515 +epoch: 0, batch: 16771, sum loss: 5066.407715, avg loss: 3.198490, ppl: 24.495506 +epoch: 0, batch: 16772, sum loss: 5307.291504, avg loss: 3.340020, ppl: 28.219683 +epoch: 0, batch: 16773, sum loss: 5692.206543, avg loss: 3.338538, ppl: 28.177893 +epoch: 0, batch: 16774, sum loss: 5727.907715, avg loss: 3.448469, ppl: 31.452215 +epoch: 0, batch: 16775, sum loss: 7077.838379, avg loss: 3.663477, ppl: 38.996716 +epoch: 0, batch: 16776, sum loss: 5952.318848, avg loss: 3.301342, ppl: 27.149036 +epoch: 0, batch: 16777, sum loss: 5983.378906, avg loss: 3.407391, ppl: 30.186394 +epoch: 0, batch: 16778, sum loss: 5914.342773, avg loss: 3.432584, ppl: 30.956539 +epoch: 0, batch: 16779, sum loss: 6577.639160, avg loss: 3.707801, ppl: 40.764061 +epoch: 0, batch: 16780, sum loss: 7018.343262, avg loss: 3.663018, ppl: 38.978821 +epoch: 0, batch: 16781, sum loss: 5602.458008, avg loss: 3.350753, ppl: 28.524193 +epoch: 0, batch: 16782, sum loss: 7087.719727, avg loss: 3.995332, ppl: 54.343891 +epoch: 0, batch: 16783, sum loss: 6350.193359, avg loss: 3.569530, ppl: 35.499889 +epoch: 0, batch: 16784, sum loss: 6028.900391, avg loss: 3.575860, ppl: 35.725342 +epoch: 0, batch: 16785, sum loss: 5300.350586, avg loss: 3.249755, ppl: 25.784025 +epoch: 0, batch: 16786, sum loss: 5423.142578, avg loss: 3.284762, ppl: 26.702639 +epoch: 0, batch: 16787, sum loss: 5990.066406, avg loss: 3.296679, ppl: 27.022760 +epoch: 0, batch: 16788, sum loss: 6227.182617, avg loss: 3.408420, ppl: 30.217451 +epoch: 0, batch: 16789, sum loss: 5002.099121, avg loss: 3.175936, ppl: 23.949221 +epoch: 0, batch: 16790, sum loss: 5704.543457, avg loss: 3.501868, ppl: 33.177376 +epoch: 0, batch: 16791, sum loss: 5512.667969, avg loss: 3.462731, ppl: 31.903992 +epoch: 0, batch: 16792, sum loss: 5285.302246, avg loss: 3.191607, ppl: 24.327501 +epoch: 0, batch: 16793, sum loss: 5606.244629, avg loss: 3.454248, ppl: 31.634489 +epoch: 0, batch: 16794, sum loss: 5170.742676, avg loss: 3.422067, ppl: 30.632658 +epoch: 0, batch: 16795, sum loss: 7776.543945, avg loss: 3.542845, ppl: 34.565109 +epoch: 0, batch: 16796, sum loss: 4903.334961, avg loss: 3.058849, ppl: 21.303020 +epoch: 0, batch: 16797, sum loss: 4955.216797, avg loss: 3.192795, ppl: 24.356398 +epoch: 0, batch: 16798, sum loss: 5221.892090, avg loss: 3.237379, ppl: 25.466881 +epoch: 0, batch: 16799, sum loss: 6688.508789, avg loss: 3.798131, ppl: 44.617714 +epoch: 0, batch: 16800, sum loss: 5269.080078, avg loss: 3.177974, ppl: 23.998072 +epoch: 0, batch: 16801, sum loss: 6876.834961, avg loss: 3.751683, ppl: 42.592705 +epoch: 0, batch: 16802, sum loss: 5554.199707, avg loss: 3.294306, ppl: 26.958700 +epoch: 0, batch: 16803, sum loss: 5226.388672, avg loss: 3.318342, ppl: 27.614527 +epoch: 0, batch: 16804, sum loss: 5135.527832, avg loss: 3.285686, ppl: 26.727325 +epoch: 0, batch: 16805, sum loss: 5171.333984, avg loss: 3.210015, ppl: 24.779459 +epoch: 0, batch: 16806, sum loss: 6746.090820, avg loss: 3.569360, ppl: 35.493877 +epoch: 0, batch: 16807, sum loss: 5348.577148, avg loss: 3.344951, ppl: 28.359190 +epoch: 0, batch: 16808, sum loss: 4690.482910, avg loss: 3.416229, ppl: 30.454363 +epoch: 0, batch: 16809, sum loss: 6355.843750, avg loss: 3.488389, ppl: 32.733158 +epoch: 0, batch: 16810, sum loss: 6090.362305, avg loss: 3.500208, ppl: 33.122345 +epoch: 0, batch: 16811, sum loss: 6525.301270, avg loss: 3.567688, ppl: 35.434582 +epoch: 0, batch: 16812, sum loss: 5251.275879, avg loss: 3.357593, ppl: 28.719988 +epoch: 0, batch: 16813, sum loss: 5067.754883, avg loss: 3.347262, ppl: 28.424805 +epoch: 0, batch: 16814, sum loss: 5861.701172, avg loss: 3.357217, ppl: 28.709190 +epoch: 0, batch: 16815, sum loss: 4415.278320, avg loss: 3.181036, ppl: 24.071684 +epoch: 0, batch: 16816, sum loss: 5215.702637, avg loss: 3.080746, ppl: 21.774639 +epoch: 0, batch: 16817, sum loss: 6027.798828, avg loss: 3.405536, ppl: 30.130447 +epoch: 0, batch: 16818, sum loss: 5905.550781, avg loss: 3.372673, ppl: 29.156366 +epoch: 0, batch: 16819, sum loss: 5920.667480, avg loss: 3.254902, ppl: 25.917084 +epoch: 0, batch: 16820, sum loss: 5134.025391, avg loss: 3.436429, ppl: 31.075796 +epoch: 0, batch: 16821, sum loss: 6301.488770, avg loss: 3.717692, ppl: 41.169281 +epoch: 0, batch: 16822, sum loss: 6709.867676, avg loss: 3.636785, ppl: 37.969551 +epoch: 0, batch: 16823, sum loss: 6401.842285, avg loss: 3.496364, ppl: 32.995266 +epoch: 0, batch: 16824, sum loss: 5418.264648, avg loss: 3.435805, ppl: 31.056406 +epoch: 0, batch: 16825, sum loss: 7117.620117, avg loss: 3.906487, ppl: 49.723988 +epoch: 0, batch: 16826, sum loss: 4741.649902, avg loss: 3.227808, ppl: 25.224298 +epoch: 0, batch: 16827, sum loss: 5313.491699, avg loss: 3.343922, ppl: 28.330009 +epoch: 0, batch: 16828, sum loss: 5745.273926, avg loss: 3.247752, ppl: 25.732439 +epoch: 0, batch: 16829, sum loss: 5904.276367, avg loss: 3.565384, ppl: 35.353031 +epoch: 0, batch: 16830, sum loss: 5248.975098, avg loss: 3.515723, ppl: 33.640255 +epoch: 0, batch: 16831, sum loss: 6942.003906, avg loss: 3.483193, ppl: 32.563538 +epoch: 0, batch: 16832, sum loss: 6614.750977, avg loss: 3.457789, ppl: 31.746714 +epoch: 0, batch: 16833, sum loss: 5249.075684, avg loss: 3.311720, ppl: 27.432259 +epoch: 0, batch: 16834, sum loss: 6102.929199, avg loss: 3.489382, ppl: 32.765694 +epoch: 0, batch: 16835, sum loss: 4650.470215, avg loss: 3.293534, ppl: 26.937895 +epoch: 0, batch: 16836, sum loss: 6398.992188, avg loss: 3.568875, ppl: 35.476654 +epoch: 0, batch: 16837, sum loss: 7530.374023, avg loss: 3.495995, ppl: 32.983101 +epoch: 0, batch: 16838, sum loss: 5604.309082, avg loss: 3.312239, ppl: 27.446520 +epoch: 0, batch: 16839, sum loss: 5978.231934, avg loss: 3.690267, ppl: 40.055534 +epoch: 0, batch: 16840, sum loss: 6452.622070, avg loss: 3.641435, ppl: 38.146526 +epoch: 0, batch: 16841, sum loss: 5568.973633, avg loss: 3.273941, ppl: 26.415232 +epoch: 0, batch: 16842, sum loss: 5622.605469, avg loss: 3.309362, ppl: 27.367657 +epoch: 0, batch: 16843, sum loss: 5822.466797, avg loss: 3.513860, ppl: 33.577644 +epoch: 0, batch: 16844, sum loss: 6164.909668, avg loss: 3.391039, ppl: 29.696802 +epoch: 0, batch: 16845, sum loss: 6692.214844, avg loss: 3.361233, ppl: 28.824709 +epoch: 0, batch: 16846, sum loss: 5508.746582, avg loss: 3.246168, ppl: 25.691698 +epoch: 0, batch: 16847, sum loss: 6341.965820, avg loss: 3.476955, ppl: 32.361031 +epoch: 0, batch: 16848, sum loss: 6318.201172, avg loss: 3.375108, ppl: 29.227427 +epoch: 0, batch: 16849, sum loss: 6560.249512, avg loss: 3.540340, ppl: 34.478630 +epoch: 0, batch: 16850, sum loss: 4638.921387, avg loss: 3.234952, ppl: 25.405157 +epoch: 0, batch: 16851, sum loss: 5601.972656, avg loss: 3.407526, ppl: 30.190460 +epoch: 0, batch: 16852, sum loss: 5877.780273, avg loss: 3.534444, ppl: 34.275948 +epoch: 0, batch: 16853, sum loss: 5272.321777, avg loss: 3.132693, ppl: 22.935652 +epoch: 0, batch: 16854, sum loss: 5345.463867, avg loss: 3.544737, ppl: 34.630589 +epoch: 0, batch: 16855, sum loss: 7200.129395, avg loss: 3.656744, ppl: 38.735027 +epoch: 0, batch: 16856, sum loss: 6116.494141, avg loss: 3.664766, ppl: 39.047001 +epoch: 0, batch: 16857, sum loss: 4083.801514, avg loss: 2.904553, ppl: 18.257080 +epoch: 0, batch: 16858, sum loss: 5538.958008, avg loss: 3.222198, ppl: 25.083193 +epoch: 0, batch: 16859, sum loss: 5145.496094, avg loss: 3.127961, ppl: 22.827385 +epoch: 0, batch: 16860, sum loss: 5452.170410, avg loss: 3.142461, ppl: 23.160803 +epoch: 0, batch: 16861, sum loss: 6399.533203, avg loss: 3.585173, ppl: 36.059593 +epoch: 0, batch: 16862, sum loss: 5580.672363, avg loss: 3.442734, ppl: 31.272348 +epoch: 0, batch: 16863, sum loss: 5516.342773, avg loss: 3.531590, ppl: 34.178249 +epoch: 0, batch: 16864, sum loss: 6120.353027, avg loss: 3.385151, ppl: 29.522448 +epoch: 0, batch: 16865, sum loss: 5593.046875, avg loss: 3.290028, ppl: 26.843611 +epoch: 0, batch: 16866, sum loss: 5402.241211, avg loss: 3.153672, ppl: 23.421923 +epoch: 0, batch: 16867, sum loss: 5591.333496, avg loss: 3.237599, ppl: 25.472492 +epoch: 0, batch: 16868, sum loss: 5765.754395, avg loss: 3.248312, ppl: 25.746849 +epoch: 0, batch: 16869, sum loss: 4947.699219, avg loss: 3.356648, ppl: 28.692858 +epoch: 0, batch: 16870, sum loss: 5637.848633, avg loss: 3.653823, ppl: 38.622032 +epoch: 0, batch: 16871, sum loss: 5179.291016, avg loss: 3.337172, ppl: 28.139442 +epoch: 0, batch: 16872, sum loss: 5039.931641, avg loss: 3.409967, ppl: 30.264257 +epoch: 0, batch: 16873, sum loss: 5622.459961, avg loss: 3.311225, ppl: 27.418697 +epoch: 0, batch: 16874, sum loss: 5696.848633, avg loss: 3.456826, ppl: 31.716141 +epoch: 0, batch: 16875, sum loss: 6792.097656, avg loss: 3.608979, ppl: 36.928310 +epoch: 0, batch: 16876, sum loss: 6267.981445, avg loss: 3.543234, ppl: 34.578579 +epoch: 0, batch: 16877, sum loss: 5737.654785, avg loss: 3.581557, ppl: 35.929451 +epoch: 0, batch: 16878, sum loss: 6724.619629, avg loss: 3.392845, ppl: 29.750484 +epoch: 0, batch: 16879, sum loss: 6444.830078, avg loss: 3.361936, ppl: 28.844969 +epoch: 0, batch: 16880, sum loss: 6619.110840, avg loss: 3.575965, ppl: 35.729080 +epoch: 0, batch: 16881, sum loss: 5409.805664, avg loss: 3.193510, ppl: 24.373825 +epoch: 0, batch: 16882, sum loss: 4950.400391, avg loss: 3.097873, ppl: 22.150780 +epoch: 0, batch: 16883, sum loss: 4697.426758, avg loss: 3.032554, ppl: 20.750170 +epoch: 0, batch: 16884, sum loss: 4976.571777, avg loss: 3.110357, ppl: 22.429056 +epoch: 0, batch: 16885, sum loss: 5290.638184, avg loss: 3.300460, ppl: 27.125122 +epoch: 0, batch: 16886, sum loss: 4667.712891, avg loss: 3.064815, ppl: 21.430492 +epoch: 0, batch: 16887, sum loss: 5312.421875, avg loss: 3.422952, ppl: 30.659801 +epoch: 0, batch: 16888, sum loss: 6507.957031, avg loss: 3.601526, ppl: 36.654110 +epoch: 0, batch: 16889, sum loss: 5314.228027, avg loss: 3.455285, ppl: 31.667307 +epoch: 0, batch: 16890, sum loss: 6904.415527, avg loss: 3.868020, ppl: 47.847557 +epoch: 0, batch: 16891, sum loss: 6266.281250, avg loss: 3.261989, ppl: 26.101410 +epoch: 0, batch: 16892, sum loss: 5171.158203, avg loss: 3.312721, ppl: 27.459755 +epoch: 0, batch: 16893, sum loss: 5510.438965, avg loss: 3.393127, ppl: 29.758877 +epoch: 0, batch: 16894, sum loss: 4260.492188, avg loss: 3.047562, ppl: 21.063936 +epoch: 0, batch: 16895, sum loss: 5475.536621, avg loss: 3.557854, ppl: 35.087807 +epoch: 0, batch: 16896, sum loss: 6239.929688, avg loss: 3.394956, ppl: 29.813345 +epoch: 0, batch: 16897, sum loss: 5670.229980, avg loss: 3.247555, ppl: 25.727348 +epoch: 0, batch: 16898, sum loss: 5924.872070, avg loss: 3.336077, ppl: 28.108633 +epoch: 0, batch: 16899, sum loss: 5672.347168, avg loss: 3.332754, ppl: 28.015387 +epoch: 0, batch: 16900, sum loss: 5891.948730, avg loss: 3.260624, ppl: 26.065807 +epoch: 0, batch: 16901, sum loss: 5927.533691, avg loss: 3.341338, ppl: 28.256907 +epoch: 0, batch: 16902, sum loss: 5179.518555, avg loss: 3.446120, ppl: 31.378408 +epoch: 0, batch: 16903, sum loss: 5925.548340, avg loss: 3.368703, ppl: 29.040833 +epoch: 0, batch: 16904, sum loss: 5052.955078, avg loss: 3.171974, ppl: 23.854536 +epoch: 0, batch: 16905, sum loss: 5364.168457, avg loss: 3.392896, ppl: 29.751980 +epoch: 0, batch: 16906, sum loss: 5326.527344, avg loss: 3.412253, ppl: 30.333521 +epoch: 0, batch: 16907, sum loss: 6296.030762, avg loss: 3.591575, ppl: 36.291187 +epoch: 0, batch: 16908, sum loss: 6721.022949, avg loss: 3.648764, ppl: 38.427135 +epoch: 0, batch: 16909, sum loss: 7277.401855, avg loss: 3.563860, ppl: 35.299179 +epoch: 0, batch: 16910, sum loss: 5917.582031, avg loss: 3.391164, ppl: 29.700512 +epoch: 0, batch: 16911, sum loss: 5568.388672, avg loss: 3.275523, ppl: 26.457058 +epoch: 0, batch: 16912, sum loss: 4302.231445, avg loss: 2.940691, ppl: 18.928930 +epoch: 0, batch: 16913, sum loss: 5340.860840, avg loss: 3.474861, ppl: 32.293327 +epoch: 0, batch: 16914, sum loss: 5828.729004, avg loss: 3.365317, ppl: 28.942667 +epoch: 0, batch: 16915, sum loss: 5068.218262, avg loss: 3.151877, ppl: 23.379906 +epoch: 0, batch: 16916, sum loss: 5686.991699, avg loss: 3.361106, ppl: 28.821054 +epoch: 0, batch: 16917, sum loss: 5675.489258, avg loss: 3.230216, ppl: 25.285112 +epoch: 0, batch: 16918, sum loss: 6865.019531, avg loss: 3.575531, ppl: 35.713589 +epoch: 0, batch: 16919, sum loss: 5527.095703, avg loss: 3.364027, ppl: 28.905346 +epoch: 0, batch: 16920, sum loss: 5505.518066, avg loss: 3.106951, ppl: 22.352797 +epoch: 0, batch: 16921, sum loss: 5729.702148, avg loss: 3.300520, ppl: 27.126734 +epoch: 0, batch: 16922, sum loss: 4434.989258, avg loss: 3.145383, ppl: 23.228561 +epoch: 0, batch: 16923, sum loss: 4420.154785, avg loss: 3.182257, ppl: 24.101082 +epoch: 0, batch: 16924, sum loss: 5742.727051, avg loss: 3.272209, ppl: 26.369530 +epoch: 0, batch: 16925, sum loss: 4668.777344, avg loss: 3.039569, ppl: 20.896229 +epoch: 0, batch: 16926, sum loss: 5635.018066, avg loss: 3.404845, ppl: 30.109629 +epoch: 0, batch: 16927, sum loss: 5786.708496, avg loss: 3.517756, ppl: 33.708694 +epoch: 0, batch: 16928, sum loss: 4881.909180, avg loss: 3.161859, ppl: 23.614445 +epoch: 0, batch: 16929, sum loss: 6356.405273, avg loss: 3.375680, ppl: 29.244156 +epoch: 0, batch: 16930, sum loss: 5552.035156, avg loss: 3.489651, ppl: 32.774513 +epoch: 0, batch: 16931, sum loss: 5763.437988, avg loss: 3.510011, ppl: 33.448643 +epoch: 0, batch: 16932, sum loss: 5864.087402, avg loss: 3.580029, ppl: 35.874573 +epoch: 0, batch: 16933, sum loss: 5134.990234, avg loss: 3.165838, ppl: 23.708614 +epoch: 0, batch: 16934, sum loss: 6819.351074, avg loss: 3.696125, ppl: 40.290886 +epoch: 0, batch: 16935, sum loss: 5292.785156, avg loss: 3.354110, ppl: 28.620113 +epoch: 0, batch: 16936, sum loss: 6011.141113, avg loss: 3.326586, ppl: 27.843122 +epoch: 0, batch: 16937, sum loss: 7152.772949, avg loss: 3.668089, ppl: 39.176956 +epoch: 0, batch: 16938, sum loss: 4830.144043, avg loss: 3.220096, ppl: 25.030525 +epoch: 0, batch: 16939, sum loss: 5596.530273, avg loss: 3.192544, ppl: 24.350307 +epoch: 0, batch: 16940, sum loss: 6327.691406, avg loss: 3.807275, ppl: 45.027584 +epoch: 0, batch: 16941, sum loss: 6043.874512, avg loss: 3.629955, ppl: 37.711113 +epoch: 0, batch: 16942, sum loss: 5594.961426, avg loss: 3.184383, ppl: 24.152386 +epoch: 0, batch: 16943, sum loss: 6013.317383, avg loss: 3.378268, ppl: 29.319952 +epoch: 0, batch: 16944, sum loss: 6167.617188, avg loss: 3.372125, ppl: 29.140396 +epoch: 0, batch: 16945, sum loss: 5345.023438, avg loss: 3.357427, ppl: 28.715200 +epoch: 0, batch: 16946, sum loss: 5442.493164, avg loss: 3.226137, ppl: 25.182194 +epoch: 0, batch: 16947, sum loss: 5744.823242, avg loss: 3.440014, ppl: 31.187391 +epoch: 0, batch: 16948, sum loss: 5515.166504, avg loss: 3.316396, ppl: 27.560843 +epoch: 0, batch: 16949, sum loss: 6538.414062, avg loss: 3.389535, ppl: 29.652174 +epoch: 0, batch: 16950, sum loss: 5478.711426, avg loss: 3.234186, ppl: 25.385704 +epoch: 0, batch: 16951, sum loss: 5326.366211, avg loss: 3.094925, ppl: 22.085587 +epoch: 0, batch: 16952, sum loss: 5892.731934, avg loss: 3.262864, ppl: 26.124247 +epoch: 0, batch: 16953, sum loss: 5952.478516, avg loss: 3.249170, ppl: 25.768932 +epoch: 0, batch: 16954, sum loss: 5103.532227, avg loss: 3.012711, ppl: 20.342470 +epoch: 0, batch: 16955, sum loss: 5431.297363, avg loss: 3.371383, ppl: 29.118763 +epoch: 0, batch: 16956, sum loss: 5790.666992, avg loss: 3.406275, ppl: 30.152710 +epoch: 0, batch: 16957, sum loss: 6896.631836, avg loss: 3.573385, ppl: 35.637005 +epoch: 0, batch: 16958, sum loss: 5849.320801, avg loss: 3.465238, ppl: 31.984058 +epoch: 0, batch: 16959, sum loss: 4397.756348, avg loss: 3.168412, ppl: 23.769712 +epoch: 0, batch: 16960, sum loss: 5892.531250, avg loss: 3.486705, ppl: 32.678089 +epoch: 0, batch: 16961, sum loss: 5230.044434, avg loss: 3.491351, ppl: 32.830284 +epoch: 0, batch: 16962, sum loss: 5065.067383, avg loss: 3.399374, ppl: 29.945356 +epoch: 0, batch: 16963, sum loss: 4900.609375, avg loss: 3.051438, ppl: 21.145727 +epoch: 0, batch: 16964, sum loss: 6224.061035, avg loss: 3.552547, ppl: 34.902084 +epoch: 0, batch: 16965, sum loss: 6109.697266, avg loss: 3.649760, ppl: 38.465424 +epoch: 0, batch: 16966, sum loss: 5478.788086, avg loss: 3.447947, ppl: 31.435797 +epoch: 0, batch: 16967, sum loss: 5515.390625, avg loss: 3.210356, ppl: 24.787897 +epoch: 0, batch: 16968, sum loss: 6138.415039, avg loss: 3.415924, ppl: 30.445063 +epoch: 0, batch: 16969, sum loss: 7294.064453, avg loss: 3.607351, ppl: 36.868271 +epoch: 0, batch: 16970, sum loss: 5490.645996, avg loss: 3.352042, ppl: 28.561008 +epoch: 0, batch: 16971, sum loss: 6370.167969, avg loss: 3.675804, ppl: 39.480373 +epoch: 0, batch: 16972, sum loss: 5034.702148, avg loss: 3.174465, ppl: 23.914032 +epoch: 0, batch: 16973, sum loss: 6234.117676, avg loss: 3.708577, ppl: 40.795700 +epoch: 0, batch: 16974, sum loss: 5716.698242, avg loss: 3.390687, ppl: 29.686340 +epoch: 0, batch: 16975, sum loss: 5983.898438, avg loss: 3.164410, ppl: 23.674763 +epoch: 0, batch: 16976, sum loss: 5607.148926, avg loss: 3.508854, ppl: 33.409962 +epoch: 0, batch: 16977, sum loss: 6064.482422, avg loss: 3.374782, ppl: 29.217901 +epoch: 0, batch: 16978, sum loss: 6509.996582, avg loss: 3.513220, ppl: 33.556141 +epoch: 0, batch: 16979, sum loss: 6310.326172, avg loss: 3.614162, ppl: 37.120216 +epoch: 0, batch: 16980, sum loss: 6151.406250, avg loss: 3.332289, ppl: 28.002378 +epoch: 0, batch: 16981, sum loss: 5748.148438, avg loss: 3.631174, ppl: 37.757111 +epoch: 0, batch: 16982, sum loss: 5676.054688, avg loss: 3.340821, ppl: 28.242306 +epoch: 0, batch: 16983, sum loss: 7480.564453, avg loss: 3.654404, ppl: 38.644470 +epoch: 0, batch: 16984, sum loss: 5439.956055, avg loss: 3.068221, ppl: 21.503620 +epoch: 0, batch: 16985, sum loss: 5040.049316, avg loss: 3.243275, ppl: 25.617479 +epoch: 0, batch: 16986, sum loss: 5721.529297, avg loss: 3.379521, ppl: 29.356716 +epoch: 0, batch: 16987, sum loss: 5562.784180, avg loss: 3.274152, ppl: 26.420813 +epoch: 0, batch: 16988, sum loss: 5600.496582, avg loss: 3.132269, ppl: 22.925938 +epoch: 0, batch: 16989, sum loss: 4977.705078, avg loss: 3.184712, ppl: 24.160334 +epoch: 0, batch: 16990, sum loss: 5610.899902, avg loss: 3.440159, ppl: 31.191927 +epoch: 0, batch: 16991, sum loss: 5331.383789, avg loss: 3.293010, ppl: 26.923796 +epoch: 0, batch: 16992, sum loss: 5778.252930, avg loss: 3.482973, ppl: 32.556381 +epoch: 0, batch: 16993, sum loss: 5324.406250, avg loss: 3.188267, ppl: 24.246378 +epoch: 0, batch: 16994, sum loss: 5553.618164, avg loss: 3.497241, ppl: 33.024204 +epoch: 0, batch: 16995, sum loss: 7778.162109, avg loss: 3.606009, ppl: 36.818825 +epoch: 0, batch: 16996, sum loss: 5328.629395, avg loss: 3.420173, ppl: 30.574701 +epoch: 0, batch: 16997, sum loss: 5130.210938, avg loss: 3.290706, ppl: 26.861832 +epoch: 0, batch: 16998, sum loss: 6444.292480, avg loss: 3.676151, ppl: 39.494091 +epoch: 0, batch: 16999, sum loss: 5228.765625, avg loss: 3.286465, ppl: 26.748140 +epoch: 0, batch: 17000, sum loss: 6307.290039, avg loss: 3.347819, ppl: 28.440626 +epoch: 0, batch: 17001, sum loss: 5119.874512, avg loss: 3.357295, ppl: 28.711416 +epoch: 0, batch: 17002, sum loss: 5619.343750, avg loss: 3.383109, ppl: 29.462223 +epoch: 0, batch: 17003, sum loss: 5465.257812, avg loss: 3.346759, ppl: 28.410507 +epoch: 0, batch: 17004, sum loss: 7102.895996, avg loss: 3.571089, ppl: 35.555283 +epoch: 0, batch: 17005, sum loss: 6072.165527, avg loss: 3.457953, ppl: 31.751921 +epoch: 0, batch: 17006, sum loss: 5709.115234, avg loss: 3.317324, ppl: 27.586435 +epoch: 0, batch: 17007, sum loss: 5352.284180, avg loss: 3.018773, ppl: 20.466162 +epoch: 0, batch: 17008, sum loss: 6469.016602, avg loss: 3.562234, ppl: 35.241829 +epoch: 0, batch: 17009, sum loss: 4304.582031, avg loss: 3.193310, ppl: 24.368956 +epoch: 0, batch: 17010, sum loss: 6031.613770, avg loss: 3.492538, ppl: 32.869278 +epoch: 0, batch: 17011, sum loss: 5762.716797, avg loss: 3.327204, ppl: 27.860329 +epoch: 0, batch: 17012, sum loss: 5948.800781, avg loss: 3.495183, ppl: 32.956303 +epoch: 0, batch: 17013, sum loss: 5491.710449, avg loss: 3.296345, ppl: 27.013716 +epoch: 0, batch: 17014, sum loss: 5014.490234, avg loss: 3.329675, ppl: 27.929262 +epoch: 0, batch: 17015, sum loss: 5087.961426, avg loss: 3.286797, ppl: 26.757019 +epoch: 0, batch: 17016, sum loss: 5258.372070, avg loss: 3.482366, ppl: 32.536598 +epoch: 0, batch: 17017, sum loss: 6301.762695, avg loss: 3.644744, ppl: 38.272980 +epoch: 0, batch: 17018, sum loss: 6284.575195, avg loss: 3.595295, ppl: 36.426441 +epoch: 0, batch: 17019, sum loss: 5860.160156, avg loss: 3.571091, ppl: 35.555351 +epoch: 0, batch: 17020, sum loss: 6264.896484, avg loss: 3.659402, ppl: 38.838116 +epoch: 0, batch: 17021, sum loss: 5390.993164, avg loss: 3.267269, ppl: 26.239573 +epoch: 0, batch: 17022, sum loss: 6417.483887, avg loss: 3.446554, ppl: 31.392035 +epoch: 0, batch: 17023, sum loss: 4585.248535, avg loss: 3.151373, ppl: 23.368135 +epoch: 0, batch: 17024, sum loss: 5731.638672, avg loss: 3.401566, ppl: 30.011061 +epoch: 0, batch: 17025, sum loss: 5818.049316, avg loss: 3.334126, ppl: 28.053841 +epoch: 0, batch: 17026, sum loss: 6756.680176, avg loss: 3.696215, ppl: 40.294487 +epoch: 0, batch: 17027, sum loss: 4904.623047, avg loss: 3.162233, ppl: 23.623280 +epoch: 0, batch: 17028, sum loss: 6159.054688, avg loss: 3.340051, ppl: 28.220577 +epoch: 0, batch: 17029, sum loss: 5770.601562, avg loss: 3.443080, ppl: 31.283154 +epoch: 0, batch: 17030, sum loss: 5654.477539, avg loss: 3.183828, ppl: 24.138971 +epoch: 0, batch: 17031, sum loss: 5839.948730, avg loss: 3.478230, ppl: 32.402328 +epoch: 0, batch: 17032, sum loss: 6500.409180, avg loss: 3.435734, ppl: 31.054207 +epoch: 0, batch: 17033, sum loss: 5978.226562, avg loss: 3.368015, ppl: 29.020859 +epoch: 0, batch: 17034, sum loss: 5826.364746, avg loss: 3.589874, ppl: 36.229496 +epoch: 0, batch: 17035, sum loss: 5302.684570, avg loss: 3.229406, ppl: 25.264648 +epoch: 0, batch: 17036, sum loss: 5731.050293, avg loss: 3.503087, ppl: 33.217838 +epoch: 0, batch: 17037, sum loss: 6767.986328, avg loss: 3.411283, ppl: 30.304108 +epoch: 0, batch: 17038, sum loss: 5568.220215, avg loss: 3.342269, ppl: 28.283228 +epoch: 0, batch: 17039, sum loss: 6117.459473, avg loss: 3.577462, ppl: 35.782600 +epoch: 0, batch: 17040, sum loss: 5562.252930, avg loss: 3.431371, ppl: 30.919010 +epoch: 0, batch: 17041, sum loss: 5087.902344, avg loss: 3.046648, ppl: 21.044689 +epoch: 0, batch: 17042, sum loss: 6027.035156, avg loss: 3.557872, ppl: 35.088444 +epoch: 0, batch: 17043, sum loss: 4892.650391, avg loss: 3.351130, ppl: 28.534973 +epoch: 0, batch: 17044, sum loss: 5190.740723, avg loss: 3.113822, ppl: 22.506891 +epoch: 0, batch: 17045, sum loss: 4683.852539, avg loss: 3.075412, ppl: 21.658804 +epoch: 0, batch: 17046, sum loss: 7165.272949, avg loss: 3.728030, ppl: 41.597069 +epoch: 0, batch: 17047, sum loss: 5629.257812, avg loss: 3.321096, ppl: 27.690681 +epoch: 0, batch: 17048, sum loss: 5665.387695, avg loss: 3.364245, ppl: 28.911652 +epoch: 0, batch: 17049, sum loss: 5288.051758, avg loss: 3.300906, ppl: 27.137218 +epoch: 0, batch: 17050, sum loss: 4435.078125, avg loss: 2.944939, ppl: 19.009506 +epoch: 0, batch: 17051, sum loss: 5564.104004, avg loss: 3.261491, ppl: 26.088408 +epoch: 0, batch: 17052, sum loss: 5526.780273, avg loss: 3.297602, ppl: 27.047693 +epoch: 0, batch: 17053, sum loss: 6544.678223, avg loss: 3.586125, ppl: 36.093945 +epoch: 0, batch: 17054, sum loss: 5072.799316, avg loss: 3.420634, ppl: 30.588789 +epoch: 0, batch: 17055, sum loss: 6741.219238, avg loss: 3.591486, ppl: 36.287960 +epoch: 0, batch: 17056, sum loss: 6867.643066, avg loss: 3.413342, ppl: 30.366552 +epoch: 0, batch: 17057, sum loss: 5671.642578, avg loss: 3.255822, ppl: 25.940933 +epoch: 0, batch: 17058, sum loss: 6278.216797, avg loss: 3.386309, ppl: 29.556654 +epoch: 0, batch: 17059, sum loss: 5531.445312, avg loss: 3.144654, ppl: 23.211632 +epoch: 0, batch: 17060, sum loss: 5099.430176, avg loss: 3.262591, ppl: 26.117128 +epoch: 0, batch: 17061, sum loss: 5755.190430, avg loss: 3.320941, ppl: 27.686384 +epoch: 0, batch: 17062, sum loss: 6276.316895, avg loss: 3.467579, ppl: 32.059021 +epoch: 0, batch: 17063, sum loss: 5419.005859, avg loss: 3.367934, ppl: 29.018513 +epoch: 0, batch: 17064, sum loss: 6616.226074, avg loss: 3.528654, ppl: 34.078064 +epoch: 0, batch: 17065, sum loss: 6241.945801, avg loss: 3.498849, ppl: 33.077347 +epoch: 0, batch: 17066, sum loss: 5884.863281, avg loss: 3.492501, ppl: 32.868034 +epoch: 0, batch: 17067, sum loss: 5548.509277, avg loss: 3.258080, ppl: 25.999557 +epoch: 0, batch: 17068, sum loss: 5504.437988, avg loss: 3.239811, ppl: 25.528889 +epoch: 0, batch: 17069, sum loss: 5880.885254, avg loss: 3.328175, ppl: 27.887403 +epoch: 0, batch: 17070, sum loss: 6407.951660, avg loss: 3.689091, ppl: 40.008480 +epoch: 0, batch: 17071, sum loss: 5734.477051, avg loss: 3.419485, ppl: 30.553686 +epoch: 0, batch: 17072, sum loss: 6100.348633, avg loss: 3.182237, ppl: 24.100616 +epoch: 0, batch: 17073, sum loss: 5570.136230, avg loss: 3.238451, ppl: 25.494207 +epoch: 0, batch: 17074, sum loss: 5187.603027, avg loss: 3.375149, ppl: 29.228624 +epoch: 0, batch: 17075, sum loss: 5886.401855, avg loss: 3.394695, ppl: 29.805571 +epoch: 0, batch: 17076, sum loss: 6114.944336, avg loss: 3.458679, ppl: 31.774973 +epoch: 0, batch: 17077, sum loss: 6257.406250, avg loss: 3.445708, ppl: 31.365484 +epoch: 0, batch: 17078, sum loss: 5889.022461, avg loss: 3.449925, ppl: 31.498043 +epoch: 0, batch: 17079, sum loss: 4433.290527, avg loss: 3.187125, ppl: 24.218693 +epoch: 0, batch: 17080, sum loss: 5236.159668, avg loss: 3.449381, ppl: 31.480888 +epoch: 0, batch: 17081, sum loss: 5683.712402, avg loss: 3.476277, ppl: 32.339088 +epoch: 0, batch: 17082, sum loss: 6840.673828, avg loss: 3.640593, ppl: 38.114426 +epoch: 0, batch: 17083, sum loss: 6150.575684, avg loss: 3.490679, ppl: 32.808208 +epoch: 0, batch: 17084, sum loss: 5325.686035, avg loss: 3.110798, ppl: 22.438940 +epoch: 0, batch: 17085, sum loss: 5314.513184, avg loss: 3.400200, ppl: 29.970104 +epoch: 0, batch: 17086, sum loss: 5527.156738, avg loss: 3.280212, ppl: 26.581400 +epoch: 0, batch: 17087, sum loss: 4715.400879, avg loss: 3.279138, ppl: 26.552883 +epoch: 0, batch: 17088, sum loss: 5033.138672, avg loss: 3.222240, ppl: 25.084246 +epoch: 0, batch: 17089, sum loss: 5013.865723, avg loss: 3.100721, ppl: 22.213964 +epoch: 0, batch: 17090, sum loss: 5166.783203, avg loss: 3.259800, ppl: 26.044327 +epoch: 0, batch: 17091, sum loss: 5325.126953, avg loss: 3.424519, ppl: 30.707872 +epoch: 0, batch: 17092, sum loss: 5364.846191, avg loss: 3.287283, ppl: 26.770042 +epoch: 0, batch: 17093, sum loss: 5561.104980, avg loss: 3.292543, ppl: 26.911211 +epoch: 0, batch: 17094, sum loss: 6069.263672, avg loss: 3.498135, ppl: 33.053738 +epoch: 0, batch: 17095, sum loss: 5560.425781, avg loss: 3.648573, ppl: 38.419819 +epoch: 0, batch: 17096, sum loss: 6008.705078, avg loss: 3.578740, ppl: 35.828381 +epoch: 0, batch: 17097, sum loss: 6302.695801, avg loss: 3.636870, ppl: 37.972794 +epoch: 0, batch: 17098, sum loss: 6103.521973, avg loss: 3.776932, ppl: 43.681808 +epoch: 0, batch: 17099, sum loss: 5752.749512, avg loss: 3.533630, ppl: 34.248058 +epoch: 0, batch: 17100, sum loss: 5584.560547, avg loss: 3.318218, ppl: 27.611097 +epoch: 0, batch: 17101, sum loss: 5582.091309, avg loss: 3.266291, ppl: 26.213936 +epoch: 0, batch: 17102, sum loss: 5197.794434, avg loss: 3.383981, ppl: 29.487923 +epoch: 0, batch: 17103, sum loss: 7449.777344, avg loss: 3.859988, ppl: 47.464802 +epoch: 0, batch: 17104, sum loss: 5231.051758, avg loss: 3.319195, ppl: 27.638100 +epoch: 0, batch: 17105, sum loss: 5357.563965, avg loss: 3.418994, ppl: 30.538691 +epoch: 0, batch: 17106, sum loss: 5901.487305, avg loss: 3.542309, ppl: 34.546604 +epoch: 0, batch: 17107, sum loss: 6146.171875, avg loss: 3.528227, ppl: 34.063522 +epoch: 0, batch: 17108, sum loss: 6084.135254, avg loss: 3.484614, ppl: 32.609825 +epoch: 0, batch: 17109, sum loss: 6294.707031, avg loss: 3.421036, ppl: 30.601109 +epoch: 0, batch: 17110, sum loss: 6592.305176, avg loss: 3.493537, ppl: 32.902130 +epoch: 0, batch: 17111, sum loss: 4898.663574, avg loss: 3.110262, ppl: 22.426928 +epoch: 0, batch: 17112, sum loss: 5470.998047, avg loss: 3.543393, ppl: 34.584053 +epoch: 0, batch: 17113, sum loss: 6424.400879, avg loss: 3.506769, ppl: 33.340378 +epoch: 0, batch: 17114, sum loss: 5948.121094, avg loss: 3.319264, ppl: 27.640005 +epoch: 0, batch: 17115, sum loss: 4323.400391, avg loss: 3.061898, ppl: 21.368080 +epoch: 0, batch: 17116, sum loss: 6352.777832, avg loss: 3.437650, ppl: 31.113770 +epoch: 0, batch: 17117, sum loss: 4977.819824, avg loss: 3.186825, ppl: 24.211424 +epoch: 0, batch: 17118, sum loss: 5430.708984, avg loss: 3.194535, ppl: 24.398821 +epoch: 0, batch: 17119, sum loss: 5694.227051, avg loss: 3.411760, ppl: 30.318554 +epoch: 0, batch: 17120, sum loss: 6767.778320, avg loss: 3.493948, ppl: 32.915657 +epoch: 0, batch: 17121, sum loss: 5902.892090, avg loss: 3.388572, ppl: 29.623611 +epoch: 0, batch: 17122, sum loss: 5708.587891, avg loss: 3.502201, ppl: 33.188423 +epoch: 0, batch: 17123, sum loss: 4760.437012, avg loss: 3.152607, ppl: 23.396992 +epoch: 0, batch: 17124, sum loss: 6041.212402, avg loss: 3.422783, ppl: 30.654612 +epoch: 0, batch: 17125, sum loss: 6339.285156, avg loss: 3.486955, ppl: 32.686279 +epoch: 0, batch: 17126, sum loss: 6267.118164, avg loss: 3.160423, ppl: 23.580559 +epoch: 0, batch: 17127, sum loss: 5350.288086, avg loss: 3.260383, ppl: 26.059507 +epoch: 0, batch: 17128, sum loss: 5875.944824, avg loss: 3.408321, ppl: 30.214468 +epoch: 0, batch: 17129, sum loss: 5817.366699, avg loss: 3.536393, ppl: 34.342819 +epoch: 0, batch: 17130, sum loss: 4757.667969, avg loss: 3.138304, ppl: 23.064705 +epoch: 0, batch: 17131, sum loss: 5624.408203, avg loss: 3.400489, ppl: 29.978752 +epoch: 0, batch: 17132, sum loss: 6155.186035, avg loss: 3.624963, ppl: 37.523319 +epoch: 0, batch: 17133, sum loss: 6052.451660, avg loss: 3.407912, ppl: 30.202116 +epoch: 0, batch: 17134, sum loss: 5692.682129, avg loss: 3.309699, ppl: 27.376879 +epoch: 0, batch: 17135, sum loss: 5437.319336, avg loss: 3.257831, ppl: 25.993093 +epoch: 0, batch: 17136, sum loss: 5638.424316, avg loss: 3.310878, ppl: 27.409168 +epoch: 0, batch: 17137, sum loss: 6477.827148, avg loss: 3.691070, ppl: 40.087704 +epoch: 0, batch: 17138, sum loss: 5468.542969, avg loss: 3.222477, ppl: 25.090185 +epoch: 0, batch: 17139, sum loss: 6541.819336, avg loss: 3.470461, ppl: 32.151566 +epoch: 0, batch: 17140, sum loss: 6272.287598, avg loss: 3.541664, ppl: 34.524334 +epoch: 0, batch: 17141, sum loss: 5714.513184, avg loss: 3.469650, ppl: 32.125481 +epoch: 0, batch: 17142, sum loss: 5998.822754, avg loss: 3.427899, ppl: 30.811829 +epoch: 0, batch: 17143, sum loss: 5972.317383, avg loss: 3.450212, ppl: 31.507078 +epoch: 0, batch: 17144, sum loss: 6582.601074, avg loss: 3.632782, ppl: 37.817879 +epoch: 0, batch: 17145, sum loss: 5763.091797, avg loss: 3.564064, ppl: 35.306400 +epoch: 0, batch: 17146, sum loss: 5640.895996, avg loss: 3.427033, ppl: 30.785166 +epoch: 0, batch: 17147, sum loss: 5582.229492, avg loss: 3.515258, ppl: 33.624603 +epoch: 0, batch: 17148, sum loss: 6091.568359, avg loss: 3.549865, ppl: 34.808617 +epoch: 0, batch: 17149, sum loss: 5534.791016, avg loss: 3.259594, ppl: 26.038975 +epoch: 0, batch: 17150, sum loss: 5662.568359, avg loss: 3.454892, ppl: 31.654875 +epoch: 0, batch: 17151, sum loss: 6234.253906, avg loss: 3.473122, ppl: 32.237232 +epoch: 0, batch: 17152, sum loss: 4912.330078, avg loss: 3.330393, ppl: 27.949333 +epoch: 0, batch: 17153, sum loss: 5476.138672, avg loss: 3.249934, ppl: 25.788637 +epoch: 0, batch: 17154, sum loss: 5869.271484, avg loss: 3.542107, ppl: 34.539619 +epoch: 0, batch: 17155, sum loss: 6202.765625, avg loss: 3.534339, ppl: 34.272358 +epoch: 0, batch: 17156, sum loss: 6555.432129, avg loss: 3.603866, ppl: 36.739990 +epoch: 0, batch: 17157, sum loss: 5812.571289, avg loss: 3.258168, ppl: 26.001858 +epoch: 0, batch: 17158, sum loss: 6051.729004, avg loss: 3.436530, ppl: 31.078917 +epoch: 0, batch: 17159, sum loss: 5386.628418, avg loss: 3.198711, ppl: 24.500921 +epoch: 0, batch: 17160, sum loss: 6025.764160, avg loss: 3.715021, ppl: 41.059444 +epoch: 0, batch: 17161, sum loss: 5321.901367, avg loss: 3.324111, ppl: 27.774296 +epoch: 0, batch: 17162, sum loss: 5777.669434, avg loss: 3.512261, ppl: 33.523975 +epoch: 0, batch: 17163, sum loss: 5332.383301, avg loss: 3.125664, ppl: 22.775019 +epoch: 0, batch: 17164, sum loss: 5921.013184, avg loss: 3.104884, ppl: 22.306623 +epoch: 0, batch: 17165, sum loss: 6157.601562, avg loss: 3.536819, ppl: 34.357452 +epoch: 0, batch: 17166, sum loss: 5588.000000, avg loss: 3.256410, ppl: 25.956196 +epoch: 0, batch: 17167, sum loss: 5603.527344, avg loss: 3.267363, ppl: 26.242043 +epoch: 0, batch: 17168, sum loss: 6195.750977, avg loss: 3.463248, ppl: 31.920486 +epoch: 0, batch: 17169, sum loss: 6128.729492, avg loss: 3.607257, ppl: 36.864799 +epoch: 0, batch: 17170, sum loss: 5837.426758, avg loss: 3.160491, ppl: 23.582178 +epoch: 0, batch: 17171, sum loss: 5947.125488, avg loss: 3.348607, ppl: 28.463053 +epoch: 0, batch: 17172, sum loss: 5551.244141, avg loss: 3.382842, ppl: 29.454363 +epoch: 0, batch: 17173, sum loss: 5832.084473, avg loss: 3.125447, ppl: 22.770071 +epoch: 0, batch: 17174, sum loss: 6245.460938, avg loss: 3.354168, ppl: 28.621786 +epoch: 0, batch: 17175, sum loss: 6669.248535, avg loss: 3.378545, ppl: 29.328070 +epoch: 0, batch: 17176, sum loss: 5482.094238, avg loss: 3.346822, ppl: 28.412291 +epoch: 0, batch: 17177, sum loss: 4895.299316, avg loss: 3.176703, ppl: 23.967602 +epoch: 0, batch: 17178, sum loss: 5716.754883, avg loss: 3.439684, ppl: 31.177109 +epoch: 0, batch: 17179, sum loss: 5995.346680, avg loss: 3.445601, ppl: 31.362141 +epoch: 0, batch: 17180, sum loss: 5001.432129, avg loss: 3.098781, ppl: 22.170906 +epoch: 0, batch: 17181, sum loss: 5610.568848, avg loss: 3.335653, ppl: 28.096733 +epoch: 0, batch: 17182, sum loss: 5534.574219, avg loss: 3.215906, ppl: 24.925861 +epoch: 0, batch: 17183, sum loss: 5501.956543, avg loss: 3.379580, ppl: 29.358438 +epoch: 0, batch: 17184, sum loss: 6437.338867, avg loss: 3.345810, ppl: 28.383562 +epoch: 0, batch: 17185, sum loss: 4902.990723, avg loss: 3.200386, ppl: 24.541990 +epoch: 0, batch: 17186, sum loss: 6791.047363, avg loss: 3.271217, ppl: 26.343390 +epoch: 0, batch: 17187, sum loss: 6156.536133, avg loss: 3.295790, ppl: 26.998739 +epoch: 0, batch: 17188, sum loss: 4722.093262, avg loss: 3.169190, ppl: 23.788212 +epoch: 0, batch: 17189, sum loss: 6650.449219, avg loss: 3.384453, ppl: 29.501839 +epoch: 0, batch: 17190, sum loss: 6446.592285, avg loss: 3.307641, ppl: 27.320601 +epoch: 0, batch: 17191, sum loss: 5749.663574, avg loss: 3.338945, ppl: 28.189375 +epoch: 0, batch: 17192, sum loss: 4939.667480, avg loss: 3.251920, ppl: 25.839916 +epoch: 0, batch: 17193, sum loss: 5840.792480, avg loss: 3.470465, ppl: 32.151688 +epoch: 0, batch: 17194, sum loss: 5696.087891, avg loss: 3.319399, ppl: 27.643723 +epoch: 0, batch: 17195, sum loss: 6556.745117, avg loss: 3.600629, ppl: 36.621258 +epoch: 0, batch: 17196, sum loss: 4645.054199, avg loss: 3.086415, ppl: 21.898428 +epoch: 0, batch: 17197, sum loss: 5743.968750, avg loss: 3.468580, ppl: 32.091141 +epoch: 0, batch: 17198, sum loss: 5116.988770, avg loss: 3.133490, ppl: 22.953939 +epoch: 0, batch: 17199, sum loss: 5827.474609, avg loss: 3.502088, ppl: 33.184681 +epoch: 0, batch: 17200, sum loss: 5787.350098, avg loss: 3.432592, ppl: 30.956783 +epoch: 0, batch: 17201, sum loss: 5261.861328, avg loss: 3.416793, ppl: 30.471539 +epoch: 0, batch: 17202, sum loss: 5952.625000, avg loss: 3.319925, ppl: 27.658272 +epoch: 0, batch: 17203, sum loss: 5063.293457, avg loss: 3.225028, ppl: 25.154274 +epoch: 0, batch: 17204, sum loss: 7598.478516, avg loss: 3.579123, ppl: 35.842102 +epoch: 0, batch: 17205, sum loss: 5487.039551, avg loss: 3.264152, ppl: 26.157921 +epoch: 0, batch: 17206, sum loss: 5298.247070, avg loss: 3.368243, ppl: 29.027494 +epoch: 0, batch: 17207, sum loss: 5823.423828, avg loss: 3.192667, ppl: 24.353279 +epoch: 0, batch: 17208, sum loss: 5277.338867, avg loss: 3.365650, ppl: 28.952301 +epoch: 0, batch: 17209, sum loss: 5372.379395, avg loss: 3.218921, ppl: 25.001135 +epoch: 0, batch: 17210, sum loss: 5511.056641, avg loss: 3.374805, ppl: 29.218584 +epoch: 0, batch: 17211, sum loss: 5576.575195, avg loss: 3.379743, ppl: 29.363213 +epoch: 0, batch: 17212, sum loss: 6150.275391, avg loss: 3.281897, ppl: 26.626236 +epoch: 0, batch: 17213, sum loss: 4787.417480, avg loss: 2.928084, ppl: 18.691786 +epoch: 0, batch: 17214, sum loss: 5242.732422, avg loss: 3.360726, ppl: 28.810102 +epoch: 0, batch: 17215, sum loss: 5161.710938, avg loss: 3.306669, ppl: 27.294071 +epoch: 0, batch: 17216, sum loss: 7176.337402, avg loss: 3.898065, ppl: 49.306942 +epoch: 0, batch: 17217, sum loss: 5630.238281, avg loss: 3.271492, ppl: 26.350632 +epoch: 0, batch: 17218, sum loss: 5614.012695, avg loss: 3.187968, ppl: 24.239136 +epoch: 0, batch: 17219, sum loss: 5831.048340, avg loss: 3.335840, ppl: 28.101986 +epoch: 0, batch: 17220, sum loss: 6030.028320, avg loss: 3.418383, ppl: 30.520035 +epoch: 0, batch: 17221, sum loss: 6271.862793, avg loss: 3.476642, ppl: 32.350918 +epoch: 0, batch: 17222, sum loss: 6300.262207, avg loss: 3.541463, ppl: 34.517368 +epoch: 0, batch: 17223, sum loss: 6358.585449, avg loss: 3.534511, ppl: 34.278252 +epoch: 0, batch: 17224, sum loss: 5761.874023, avg loss: 3.541410, ppl: 34.515553 +epoch: 0, batch: 17225, sum loss: 4910.065430, avg loss: 3.184219, ppl: 24.148418 +epoch: 0, batch: 17226, sum loss: 4977.278320, avg loss: 3.144206, ppl: 23.201248 +epoch: 0, batch: 17227, sum loss: 6048.230957, avg loss: 3.212018, ppl: 24.829153 +epoch: 0, batch: 17228, sum loss: 5711.168945, avg loss: 3.326249, ppl: 27.833731 +epoch: 0, batch: 17229, sum loss: 5504.803711, avg loss: 3.230519, ppl: 25.292770 +epoch: 0, batch: 17230, sum loss: 4353.212891, avg loss: 2.886746, ppl: 17.934853 +epoch: 0, batch: 17231, sum loss: 5903.549805, avg loss: 3.588784, ppl: 36.190041 +epoch: 0, batch: 17232, sum loss: 7253.931641, avg loss: 3.545421, ppl: 34.654278 +epoch: 0, batch: 17233, sum loss: 5953.749512, avg loss: 3.520846, ppl: 33.813011 +epoch: 0, batch: 17234, sum loss: 5657.693359, avg loss: 3.198244, ppl: 24.489491 +epoch: 0, batch: 17235, sum loss: 5214.924805, avg loss: 3.257292, ppl: 25.979097 +epoch: 0, batch: 17236, sum loss: 5564.293457, avg loss: 3.284707, ppl: 26.701155 +epoch: 0, batch: 17237, sum loss: 5463.555176, avg loss: 3.223336, ppl: 25.111765 +epoch: 0, batch: 17238, sum loss: 6125.217773, avg loss: 3.406684, ppl: 30.165049 +epoch: 0, batch: 17239, sum loss: 5415.225586, avg loss: 3.357239, ppl: 28.709801 +epoch: 0, batch: 17240, sum loss: 6180.469727, avg loss: 3.244341, ppl: 25.644808 +epoch: 0, batch: 17241, sum loss: 6413.462402, avg loss: 3.504624, ppl: 33.268944 +epoch: 0, batch: 17242, sum loss: 5349.572754, avg loss: 3.197593, ppl: 24.473551 +epoch: 0, batch: 17243, sum loss: 5061.734375, avg loss: 3.077042, ppl: 21.694138 +epoch: 0, batch: 17244, sum loss: 5696.935059, avg loss: 3.222249, ppl: 25.084461 +epoch: 0, batch: 17245, sum loss: 6217.252930, avg loss: 3.384460, ppl: 29.502064 +epoch: 0, batch: 17246, sum loss: 5758.586914, avg loss: 3.485828, ppl: 32.649441 +epoch: 0, batch: 17247, sum loss: 6496.310547, avg loss: 3.397652, ppl: 29.893824 +epoch: 0, batch: 17248, sum loss: 5763.995117, avg loss: 3.347268, ppl: 28.424959 +epoch: 0, batch: 17249, sum loss: 5861.541992, avg loss: 3.189087, ppl: 24.266260 +epoch: 0, batch: 17250, sum loss: 5487.893066, avg loss: 3.410748, ppl: 30.287884 +epoch: 0, batch: 17251, sum loss: 5117.311035, avg loss: 3.362228, ppl: 28.853409 +epoch: 0, batch: 17252, sum loss: 5980.610352, avg loss: 3.378876, ppl: 29.337776 +epoch: 0, batch: 17253, sum loss: 7000.055664, avg loss: 3.571457, ppl: 35.568375 +epoch: 0, batch: 17254, sum loss: 5255.628418, avg loss: 3.260315, ppl: 26.057755 +epoch: 0, batch: 17255, sum loss: 5298.490723, avg loss: 3.266640, ppl: 26.223093 +epoch: 0, batch: 17256, sum loss: 5526.615723, avg loss: 3.325280, ppl: 27.806789 +epoch: 0, batch: 17257, sum loss: 6356.197266, avg loss: 3.281465, ppl: 26.614729 +epoch: 0, batch: 17258, sum loss: 5798.103027, avg loss: 3.422729, ppl: 30.652954 +epoch: 0, batch: 17259, sum loss: 5742.634277, avg loss: 3.279631, ppl: 26.565977 +epoch: 0, batch: 17260, sum loss: 5901.965820, avg loss: 3.289836, ppl: 26.838467 +epoch: 0, batch: 17261, sum loss: 5481.876953, avg loss: 3.375540, ppl: 29.240070 +epoch: 0, batch: 17262, sum loss: 5680.077637, avg loss: 3.608690, ppl: 36.917641 +epoch: 0, batch: 17263, sum loss: 6085.036133, avg loss: 3.459373, ppl: 31.797018 +epoch: 0, batch: 17264, sum loss: 5585.836914, avg loss: 3.104968, ppl: 22.308496 +epoch: 0, batch: 17265, sum loss: 5653.142578, avg loss: 3.245202, ppl: 25.666903 +epoch: 0, batch: 17266, sum loss: 6035.770020, avg loss: 3.351344, ppl: 28.541056 +epoch: 0, batch: 17267, sum loss: 6093.623047, avg loss: 3.522325, ppl: 33.863079 +epoch: 0, batch: 17268, sum loss: 5091.157715, avg loss: 3.138815, ppl: 23.076498 +epoch: 0, batch: 17269, sum loss: 5352.681152, avg loss: 3.242085, ppl: 25.587002 +epoch: 0, batch: 17270, sum loss: 5955.631348, avg loss: 3.261573, ppl: 26.090534 +epoch: 0, batch: 17271, sum loss: 4952.568848, avg loss: 3.245458, ppl: 25.673470 +epoch: 0, batch: 17272, sum loss: 5322.249512, avg loss: 3.299597, ppl: 27.101709 +epoch: 0, batch: 17273, sum loss: 6487.867676, avg loss: 3.429106, ppl: 30.849037 +epoch: 0, batch: 17274, sum loss: 6002.472168, avg loss: 3.201319, ppl: 24.564898 +epoch: 0, batch: 17275, sum loss: 5289.896484, avg loss: 3.139405, ppl: 23.090113 +epoch: 0, batch: 17276, sum loss: 5037.614746, avg loss: 3.119266, ppl: 22.629765 +epoch: 0, batch: 17277, sum loss: 5294.952637, avg loss: 3.205177, ppl: 24.659872 +epoch: 0, batch: 17278, sum loss: 6581.750000, avg loss: 3.532877, ppl: 34.222282 +epoch: 0, batch: 17279, sum loss: 5292.750977, avg loss: 3.161739, ppl: 23.611624 +epoch: 0, batch: 17280, sum loss: 5611.245605, avg loss: 3.275683, ppl: 26.461304 +epoch: 0, batch: 17281, sum loss: 6140.343750, avg loss: 3.416997, ppl: 30.477758 +epoch: 0, batch: 17282, sum loss: 7075.184570, avg loss: 3.688834, ppl: 39.998199 +epoch: 0, batch: 17283, sum loss: 5178.116211, avg loss: 3.212231, ppl: 24.834429 +epoch: 0, batch: 17284, sum loss: 5852.555176, avg loss: 3.489896, ppl: 32.782524 +epoch: 0, batch: 17285, sum loss: 6722.052734, avg loss: 3.486542, ppl: 32.672768 +epoch: 0, batch: 17286, sum loss: 5742.190430, avg loss: 3.535831, ppl: 34.323540 +epoch: 0, batch: 17287, sum loss: 5448.351562, avg loss: 3.435278, ppl: 31.040054 +epoch: 0, batch: 17288, sum loss: 6330.250977, avg loss: 3.522677, ppl: 33.875004 +epoch: 0, batch: 17289, sum loss: 5471.929199, avg loss: 3.268775, ppl: 26.279133 +epoch: 0, batch: 17290, sum loss: 4617.363770, avg loss: 3.270088, ppl: 26.313648 +epoch: 0, batch: 17291, sum loss: 6162.266602, avg loss: 3.406450, ppl: 30.157980 +epoch: 0, batch: 17292, sum loss: 6857.831543, avg loss: 3.520447, ppl: 33.799549 +epoch: 0, batch: 17293, sum loss: 5661.007812, avg loss: 3.363641, ppl: 28.894203 +epoch: 0, batch: 17294, sum loss: 6093.511230, avg loss: 3.355458, ppl: 28.658714 +epoch: 0, batch: 17295, sum loss: 5857.200195, avg loss: 3.617789, ppl: 37.255108 +epoch: 0, batch: 17296, sum loss: 6242.112305, avg loss: 3.473630, ppl: 32.253601 +epoch: 0, batch: 17297, sum loss: 5269.784668, avg loss: 3.345895, ppl: 28.385965 +epoch: 0, batch: 17298, sum loss: 5430.208984, avg loss: 3.253571, ppl: 25.882591 +epoch: 0, batch: 17299, sum loss: 5471.353516, avg loss: 3.383645, ppl: 29.478018 +epoch: 0, batch: 17300, sum loss: 5239.681641, avg loss: 3.303709, ppl: 27.213375 +epoch: 0, batch: 17301, sum loss: 6315.408691, avg loss: 3.473822, ppl: 32.259808 +epoch: 0, batch: 17302, sum loss: 5648.447266, avg loss: 3.408839, ppl: 30.230141 +epoch: 0, batch: 17303, sum loss: 6849.481445, avg loss: 3.349380, ppl: 28.485062 +epoch: 0, batch: 17304, sum loss: 5788.382812, avg loss: 3.715265, ppl: 41.069469 +epoch: 0, batch: 17305, sum loss: 4537.269531, avg loss: 3.179586, ppl: 24.036810 +epoch: 0, batch: 17306, sum loss: 6217.834961, avg loss: 3.351933, ppl: 28.557869 +epoch: 0, batch: 17307, sum loss: 6360.041992, avg loss: 3.551112, ppl: 34.852058 +epoch: 0, batch: 17308, sum loss: 5916.866699, avg loss: 3.503177, ppl: 33.220840 +epoch: 0, batch: 17309, sum loss: 6019.571289, avg loss: 3.300204, ppl: 27.118158 +epoch: 0, batch: 17310, sum loss: 5947.718262, avg loss: 3.206317, ppl: 24.687998 +epoch: 0, batch: 17311, sum loss: 5161.466797, avg loss: 3.506431, ppl: 33.329117 +epoch: 0, batch: 17312, sum loss: 5385.019043, avg loss: 3.386804, ppl: 29.571301 +epoch: 0, batch: 17313, sum loss: 5337.928711, avg loss: 3.258809, ppl: 26.018520 +epoch: 0, batch: 17314, sum loss: 4181.635742, avg loss: 2.848526, ppl: 17.262314 +epoch: 0, batch: 17315, sum loss: 5373.426270, avg loss: 3.292541, ppl: 26.911152 +epoch: 0, batch: 17316, sum loss: 5590.086426, avg loss: 3.433714, ppl: 30.991529 +epoch: 0, batch: 17317, sum loss: 5572.452148, avg loss: 3.397837, ppl: 29.899349 +epoch: 0, batch: 17318, sum loss: 5108.526367, avg loss: 3.094202, ppl: 22.069611 +epoch: 0, batch: 17319, sum loss: 5270.425293, avg loss: 3.365533, ppl: 28.948933 +epoch: 0, batch: 17320, sum loss: 5570.093262, avg loss: 3.295913, ppl: 27.002062 +epoch: 0, batch: 17321, sum loss: 6257.523438, avg loss: 3.631761, ppl: 37.779274 +epoch: 0, batch: 17322, sum loss: 5524.096191, avg loss: 3.376587, ppl: 29.270704 +epoch: 0, batch: 17323, sum loss: 5087.894043, avg loss: 3.083572, ppl: 21.836266 +epoch: 0, batch: 17324, sum loss: 6505.976074, avg loss: 3.351868, ppl: 28.556030 +epoch: 0, batch: 17325, sum loss: 6720.540527, avg loss: 3.474943, ppl: 32.296001 +epoch: 0, batch: 17326, sum loss: 5459.778809, avg loss: 3.279146, ppl: 26.553099 +epoch: 0, batch: 17327, sum loss: 5417.450684, avg loss: 3.390144, ppl: 29.670235 +epoch: 0, batch: 17328, sum loss: 6313.489258, avg loss: 3.589249, ppl: 36.206886 +epoch: 0, batch: 17329, sum loss: 6002.902344, avg loss: 3.444006, ppl: 31.312143 +epoch: 0, batch: 17330, sum loss: 6658.685547, avg loss: 3.386920, ppl: 29.574736 +epoch: 0, batch: 17331, sum loss: 4812.612305, avg loss: 3.262788, ppl: 26.122267 +epoch: 0, batch: 17332, sum loss: 6268.475586, avg loss: 3.463246, ppl: 31.920433 +epoch: 0, batch: 17333, sum loss: 6900.075195, avg loss: 3.542133, ppl: 34.540520 +epoch: 0, batch: 17334, sum loss: 5597.915039, avg loss: 3.474808, ppl: 32.291618 +epoch: 0, batch: 17335, sum loss: 3756.335693, avg loss: 3.135505, ppl: 23.000257 +epoch: 0, batch: 17336, sum loss: 5639.258789, avg loss: 3.432294, ppl: 30.947544 +epoch: 0, batch: 17337, sum loss: 5685.878418, avg loss: 3.269625, ppl: 26.301485 +epoch: 0, batch: 17338, sum loss: 5317.625488, avg loss: 3.455247, ppl: 31.666115 +epoch: 0, batch: 17339, sum loss: 5360.155273, avg loss: 3.339660, ppl: 28.209545 +epoch: 0, batch: 17340, sum loss: 5741.897461, avg loss: 3.618083, ppl: 37.266060 +epoch: 0, batch: 17341, sum loss: 6658.811523, avg loss: 3.484464, ppl: 32.604958 +epoch: 0, batch: 17342, sum loss: 5512.395508, avg loss: 3.304794, ppl: 27.242918 +epoch: 0, batch: 17343, sum loss: 4371.914551, avg loss: 2.853730, ppl: 17.352388 +epoch: 0, batch: 17344, sum loss: 5317.021973, avg loss: 3.174342, ppl: 23.911074 +epoch: 0, batch: 17345, sum loss: 5808.755859, avg loss: 3.474137, ppl: 32.269955 +epoch: 0, batch: 17346, sum loss: 6218.719727, avg loss: 3.286850, ppl: 26.758434 +epoch: 0, batch: 17347, sum loss: 5341.182617, avg loss: 3.229252, ppl: 25.260757 +epoch: 0, batch: 17348, sum loss: 6375.849609, avg loss: 3.357477, ppl: 28.716660 +epoch: 0, batch: 17349, sum loss: 6082.084961, avg loss: 3.399712, ppl: 29.955475 +epoch: 0, batch: 17350, sum loss: 5232.593750, avg loss: 3.110936, ppl: 22.442034 +epoch: 0, batch: 17351, sum loss: 5377.999023, avg loss: 3.269300, ppl: 26.292934 +epoch: 0, batch: 17352, sum loss: 6167.555664, avg loss: 3.600441, ppl: 36.614376 +epoch: 0, batch: 17353, sum loss: 5163.792480, avg loss: 3.489049, ppl: 32.754780 +epoch: 0, batch: 17354, sum loss: 4719.492676, avg loss: 3.098813, ppl: 22.171625 +epoch: 0, batch: 17355, sum loss: 5694.326172, avg loss: 3.409776, ppl: 30.258472 +epoch: 0, batch: 17356, sum loss: 5098.911133, avg loss: 3.206862, ppl: 24.701456 +epoch: 0, batch: 17357, sum loss: 4862.165527, avg loss: 3.298620, ppl: 27.075243 +epoch: 0, batch: 17358, sum loss: 5681.068359, avg loss: 3.438904, ppl: 31.152782 +epoch: 0, batch: 17359, sum loss: 6153.066406, avg loss: 3.424077, ppl: 30.694309 +epoch: 0, batch: 17360, sum loss: 5525.315918, avg loss: 3.402288, ppl: 30.032736 +epoch: 0, batch: 17361, sum loss: 5305.794922, avg loss: 3.318196, ppl: 27.610485 +epoch: 0, batch: 17362, sum loss: 4957.057617, avg loss: 3.117646, ppl: 22.593138 +epoch: 0, batch: 17363, sum loss: 6160.170898, avg loss: 3.439515, ppl: 31.171825 +epoch: 0, batch: 17364, sum loss: 5223.602051, avg loss: 3.274986, ppl: 26.442842 +epoch: 0, batch: 17365, sum loss: 5288.546875, avg loss: 3.481598, ppl: 32.511623 +epoch: 0, batch: 17366, sum loss: 5798.182129, avg loss: 3.376926, ppl: 29.280630 +epoch: 0, batch: 17367, sum loss: 5781.109863, avg loss: 3.380766, ppl: 29.393274 +epoch: 0, batch: 17368, sum loss: 6850.247559, avg loss: 3.377834, ppl: 29.307226 +epoch: 0, batch: 17369, sum loss: 5635.904785, avg loss: 3.175158, ppl: 23.930590 +epoch: 0, batch: 17370, sum loss: 5124.386230, avg loss: 3.196747, ppl: 24.452864 +epoch: 0, batch: 17371, sum loss: 4955.331055, avg loss: 3.106791, ppl: 22.349205 +epoch: 0, batch: 17372, sum loss: 6400.857422, avg loss: 3.536386, ppl: 34.342564 +epoch: 0, batch: 17373, sum loss: 5194.012695, avg loss: 3.274913, ppl: 26.440933 +epoch: 0, batch: 17374, sum loss: 5617.336914, avg loss: 3.213580, ppl: 24.867947 +epoch: 0, batch: 17375, sum loss: 5365.169922, avg loss: 3.404296, ppl: 30.093094 +epoch: 0, batch: 17376, sum loss: 4506.272461, avg loss: 3.182396, ppl: 24.104431 +epoch: 0, batch: 17377, sum loss: 5006.292480, avg loss: 2.885471, ppl: 17.912004 +epoch: 0, batch: 17378, sum loss: 6892.083984, avg loss: 3.689552, ppl: 40.026924 +epoch: 0, batch: 17379, sum loss: 5270.045898, avg loss: 3.253115, ppl: 25.870800 +epoch: 0, batch: 17380, sum loss: 5697.990723, avg loss: 3.257856, ppl: 25.993757 +epoch: 0, batch: 17381, sum loss: 6205.677246, avg loss: 3.533985, ppl: 34.260220 +epoch: 0, batch: 17382, sum loss: 5735.325684, avg loss: 3.232991, ppl: 25.355375 +epoch: 0, batch: 17383, sum loss: 5436.234863, avg loss: 3.412577, ppl: 30.343336 +epoch: 0, batch: 17384, sum loss: 6044.020996, avg loss: 3.455701, ppl: 31.680485 +epoch: 0, batch: 17385, sum loss: 4871.826660, avg loss: 3.307418, ppl: 27.314510 +epoch: 0, batch: 17386, sum loss: 5394.149902, avg loss: 3.390415, ppl: 29.678257 +epoch: 0, batch: 17387, sum loss: 5162.683594, avg loss: 3.113802, ppl: 22.506451 +epoch: 0, batch: 17388, sum loss: 6093.455566, avg loss: 3.387135, ppl: 29.581076 +epoch: 0, batch: 17389, sum loss: 5744.572266, avg loss: 3.111903, ppl: 22.463745 +epoch: 0, batch: 17390, sum loss: 6511.115723, avg loss: 3.653825, ppl: 38.622105 +epoch: 0, batch: 17391, sum loss: 5039.868164, avg loss: 3.218307, ppl: 24.985771 +epoch: 0, batch: 17392, sum loss: 5291.967773, avg loss: 3.434113, ppl: 31.003893 +epoch: 0, batch: 17393, sum loss: 6602.380371, avg loss: 3.366844, ppl: 28.986891 +epoch: 0, batch: 17394, sum loss: 6332.520020, avg loss: 3.306799, ppl: 27.297604 +epoch: 0, batch: 17395, sum loss: 5690.493164, avg loss: 3.482554, ppl: 32.542728 +epoch: 0, batch: 17396, sum loss: 5248.485352, avg loss: 3.288525, ppl: 26.803289 +epoch: 0, batch: 17397, sum loss: 5117.047363, avg loss: 3.560924, ppl: 35.195698 +epoch: 0, batch: 17398, sum loss: 5750.712891, avg loss: 3.357100, ppl: 28.705830 +epoch: 0, batch: 17399, sum loss: 5702.854980, avg loss: 3.555396, ppl: 35.001682 +epoch: 0, batch: 17400, sum loss: 4921.380859, avg loss: 3.179187, ppl: 24.027201 +epoch: 0, batch: 17401, sum loss: 6146.715820, avg loss: 3.353364, ppl: 28.598772 +epoch: 0, batch: 17402, sum loss: 6319.549316, avg loss: 3.276075, ppl: 26.471678 +epoch: 0, batch: 17403, sum loss: 4867.299805, avg loss: 3.359075, ppl: 28.762583 +epoch: 0, batch: 17404, sum loss: 5891.645996, avg loss: 3.315501, ppl: 27.536198 +epoch: 0, batch: 17405, sum loss: 5181.036133, avg loss: 3.212050, ppl: 24.829929 +epoch: 0, batch: 17406, sum loss: 5386.596191, avg loss: 3.250812, ppl: 25.811298 +epoch: 0, batch: 17407, sum loss: 4928.931641, avg loss: 3.033189, ppl: 20.763332 +epoch: 0, batch: 17408, sum loss: 6089.521484, avg loss: 3.446249, ppl: 31.382448 +epoch: 0, batch: 17409, sum loss: 5470.950684, avg loss: 3.423624, ppl: 30.680393 +epoch: 0, batch: 17410, sum loss: 6124.542969, avg loss: 3.546348, ppl: 34.686413 +epoch: 0, batch: 17411, sum loss: 4566.190430, avg loss: 3.114727, ppl: 22.527290 +epoch: 0, batch: 17412, sum loss: 5036.598633, avg loss: 3.449725, ppl: 31.491735 +epoch: 0, batch: 17413, sum loss: 5813.132812, avg loss: 3.405468, ppl: 30.128378 +epoch: 0, batch: 17414, sum loss: 5144.495605, avg loss: 3.310486, ppl: 27.398447 +epoch: 0, batch: 17415, sum loss: 6883.065918, avg loss: 3.636062, ppl: 37.942131 +epoch: 0, batch: 17416, sum loss: 6959.583008, avg loss: 3.474580, ppl: 32.284252 +epoch: 0, batch: 17417, sum loss: 6167.994141, avg loss: 3.565315, ppl: 35.350571 +epoch: 0, batch: 17418, sum loss: 6674.430176, avg loss: 3.345579, ppl: 28.376999 +epoch: 0, batch: 17419, sum loss: 6281.406250, avg loss: 3.350083, ppl: 28.505110 +epoch: 0, batch: 17420, sum loss: 5408.377930, avg loss: 3.108263, ppl: 22.382139 +epoch: 0, batch: 17421, sum loss: 6351.628906, avg loss: 3.646170, ppl: 38.327606 +epoch: 0, batch: 17422, sum loss: 5493.064941, avg loss: 3.289260, ppl: 26.823017 +epoch: 0, batch: 17423, sum loss: 6233.654785, avg loss: 3.387856, ppl: 29.602409 +epoch: 0, batch: 17424, sum loss: 5643.566895, avg loss: 3.363270, ppl: 28.883480 +epoch: 0, batch: 17425, sum loss: 5484.359863, avg loss: 3.254813, ppl: 25.914766 +epoch: 0, batch: 17426, sum loss: 6509.480469, avg loss: 3.582543, ppl: 35.964878 +epoch: 0, batch: 17427, sum loss: 5950.775391, avg loss: 3.514930, ppl: 33.613560 +epoch: 0, batch: 17428, sum loss: 6382.909668, avg loss: 3.191455, ppl: 24.323790 +epoch: 0, batch: 17429, sum loss: 6988.042480, avg loss: 3.641502, ppl: 38.149090 +epoch: 0, batch: 17430, sum loss: 5737.875977, avg loss: 3.431744, ppl: 30.930542 +epoch: 0, batch: 17431, sum loss: 5829.511719, avg loss: 3.567633, ppl: 35.432613 +epoch: 0, batch: 17432, sum loss: 5913.206543, avg loss: 3.247231, ppl: 25.719013 +epoch: 0, batch: 17433, sum loss: 6125.489258, avg loss: 3.437424, ppl: 31.106722 +epoch: 0, batch: 17434, sum loss: 4734.299316, avg loss: 3.110578, ppl: 22.434004 +epoch: 0, batch: 17435, sum loss: 6013.321289, avg loss: 3.434221, ppl: 31.007257 +epoch: 0, batch: 17436, sum loss: 5275.108398, avg loss: 3.220457, ppl: 25.039551 +epoch: 0, batch: 17437, sum loss: 5730.357910, avg loss: 3.410928, ppl: 30.293329 +epoch: 0, batch: 17438, sum loss: 5571.197266, avg loss: 3.426321, ppl: 30.763243 +epoch: 0, batch: 17439, sum loss: 6410.818359, avg loss: 3.551700, ppl: 34.872547 +epoch: 0, batch: 17440, sum loss: 5697.584473, avg loss: 3.432280, ppl: 30.947117 +epoch: 0, batch: 17441, sum loss: 5967.635742, avg loss: 3.278921, ppl: 26.547110 +epoch: 0, batch: 17442, sum loss: 6463.883789, avg loss: 3.308027, ppl: 27.331135 +epoch: 0, batch: 17443, sum loss: 5837.653809, avg loss: 3.382186, ppl: 29.435059 +epoch: 0, batch: 17444, sum loss: 4959.164551, avg loss: 3.003734, ppl: 20.160679 +epoch: 0, batch: 17445, sum loss: 5400.689453, avg loss: 3.350304, ppl: 28.511391 +epoch: 0, batch: 17446, sum loss: 5781.733398, avg loss: 3.248165, ppl: 25.743055 +epoch: 0, batch: 17447, sum loss: 6620.254395, avg loss: 3.729721, ppl: 41.667465 +epoch: 0, batch: 17448, sum loss: 6371.499023, avg loss: 3.372948, ppl: 29.164383 +epoch: 0, batch: 17449, sum loss: 6309.891602, avg loss: 3.572985, ppl: 35.622765 +epoch: 0, batch: 17450, sum loss: 5628.314941, avg loss: 3.306883, ppl: 27.299902 +epoch: 0, batch: 17451, sum loss: 6257.876953, avg loss: 3.533527, ppl: 34.244549 +epoch: 0, batch: 17452, sum loss: 6113.492188, avg loss: 3.459815, ppl: 31.811077 +epoch: 0, batch: 17453, sum loss: 5329.840820, avg loss: 3.403474, ppl: 30.068386 +epoch: 0, batch: 17454, sum loss: 4342.647461, avg loss: 3.424801, ppl: 30.716528 +epoch: 0, batch: 17455, sum loss: 6493.306641, avg loss: 3.548255, ppl: 34.752628 +epoch: 0, batch: 17456, sum loss: 6075.294434, avg loss: 3.409256, ppl: 30.242743 +epoch: 0, batch: 17457, sum loss: 5388.959961, avg loss: 3.277956, ppl: 26.521513 +epoch: 0, batch: 17458, sum loss: 5300.509766, avg loss: 3.290199, ppl: 26.848194 +epoch: 0, batch: 17459, sum loss: 6465.409180, avg loss: 3.494816, ppl: 32.944221 +epoch: 0, batch: 17460, sum loss: 6562.650879, avg loss: 3.437743, ppl: 31.116648 +epoch: 0, batch: 17461, sum loss: 5375.775879, avg loss: 3.364065, ppl: 28.906462 +epoch: 0, batch: 17462, sum loss: 5887.483398, avg loss: 3.389455, ppl: 29.649792 +epoch: 0, batch: 17463, sum loss: 4631.452637, avg loss: 3.334379, ppl: 28.060951 +epoch: 0, batch: 17464, sum loss: 5943.308105, avg loss: 3.292691, ppl: 26.915207 +epoch: 0, batch: 17465, sum loss: 6420.280762, avg loss: 3.523755, ppl: 33.911514 +epoch: 0, batch: 17466, sum loss: 6259.943359, avg loss: 3.475815, ppl: 32.324173 +epoch: 0, batch: 17467, sum loss: 4568.910156, avg loss: 3.210759, ppl: 24.797899 +epoch: 0, batch: 17468, sum loss: 5346.538086, avg loss: 3.341586, ppl: 28.263922 +epoch: 0, batch: 17469, sum loss: 5592.464844, avg loss: 3.081248, ppl: 21.785574 +epoch: 0, batch: 17470, sum loss: 5016.693848, avg loss: 3.285327, ppl: 26.717712 +epoch: 0, batch: 17471, sum loss: 6136.275879, avg loss: 3.654721, ppl: 38.656715 +epoch: 0, batch: 17472, sum loss: 5169.876465, avg loss: 3.167817, ppl: 23.755560 +epoch: 0, batch: 17473, sum loss: 6334.662598, avg loss: 3.365921, ppl: 28.960144 +epoch: 0, batch: 17474, sum loss: 5558.048828, avg loss: 3.467279, ppl: 32.049423 +epoch: 0, batch: 17475, sum loss: 6011.001953, avg loss: 3.367508, ppl: 29.006159 +epoch: 0, batch: 17476, sum loss: 6321.708496, avg loss: 3.652056, ppl: 38.553841 +epoch: 0, batch: 17477, sum loss: 5571.655273, avg loss: 3.342324, ppl: 28.284798 +epoch: 0, batch: 17478, sum loss: 6687.451172, avg loss: 3.738095, ppl: 42.017853 +epoch: 0, batch: 17479, sum loss: 4314.257324, avg loss: 3.429457, ppl: 30.859894 +epoch: 0, batch: 17480, sum loss: 5706.055664, avg loss: 3.262467, ppl: 26.113892 +epoch: 0, batch: 17481, sum loss: 5036.880859, avg loss: 3.224636, ppl: 25.144409 +epoch: 0, batch: 17482, sum loss: 4787.643555, avg loss: 3.084822, ppl: 21.863579 +epoch: 0, batch: 17483, sum loss: 5356.147461, avg loss: 3.195792, ppl: 24.429520 +epoch: 0, batch: 17484, sum loss: 4998.657227, avg loss: 3.220784, ppl: 25.047760 +epoch: 0, batch: 17485, sum loss: 5669.789551, avg loss: 3.374875, ppl: 29.220627 +epoch: 0, batch: 17486, sum loss: 5825.681152, avg loss: 3.494710, ppl: 32.940723 +epoch: 0, batch: 17487, sum loss: 5478.156250, avg loss: 3.578156, ppl: 35.807442 +epoch: 0, batch: 17488, sum loss: 5976.177246, avg loss: 3.414958, ppl: 30.415686 +epoch: 0, batch: 17489, sum loss: 5607.165527, avg loss: 3.019475, ppl: 20.480536 +epoch: 0, batch: 17490, sum loss: 4925.600098, avg loss: 3.107634, ppl: 22.368061 +epoch: 0, batch: 17491, sum loss: 5464.237793, avg loss: 3.184288, ppl: 24.150082 +epoch: 0, batch: 17492, sum loss: 4514.879395, avg loss: 3.319764, ppl: 27.653833 +epoch: 0, batch: 17493, sum loss: 5007.683105, avg loss: 3.096898, ppl: 22.129190 +epoch: 0, batch: 17494, sum loss: 5873.840820, avg loss: 3.494254, ppl: 32.925713 +epoch: 0, batch: 17495, sum loss: 6150.126953, avg loss: 3.472686, ppl: 32.223179 +epoch: 0, batch: 17496, sum loss: 6369.552734, avg loss: 3.465480, ppl: 31.991816 +epoch: 0, batch: 17497, sum loss: 6379.761230, avg loss: 3.422619, ppl: 30.649570 +epoch: 0, batch: 17498, sum loss: 5629.298340, avg loss: 3.409630, ppl: 30.254044 +epoch: 0, batch: 17499, sum loss: 6183.309082, avg loss: 3.537362, ppl: 34.376125 +epoch: 0, batch: 17500, sum loss: 6418.505859, avg loss: 3.624227, ppl: 37.495720 +epoch: 0, batch: 17501, sum loss: 5691.014648, avg loss: 3.208013, ppl: 24.729889 +epoch: 0, batch: 17502, sum loss: 5124.240234, avg loss: 3.331756, ppl: 27.987434 +epoch: 0, batch: 17503, sum loss: 5007.108398, avg loss: 3.161053, ppl: 23.595432 +epoch: 0, batch: 17504, sum loss: 6557.287109, avg loss: 3.499086, ppl: 33.085194 +epoch: 0, batch: 17505, sum loss: 5887.357422, avg loss: 3.199651, ppl: 24.523958 +epoch: 0, batch: 17506, sum loss: 5085.247070, avg loss: 3.334588, ppl: 28.066826 +epoch: 0, batch: 17507, sum loss: 5771.653809, avg loss: 3.470628, ppl: 32.156925 +epoch: 0, batch: 17508, sum loss: 7600.569824, avg loss: 3.641864, ppl: 38.162907 +epoch: 0, batch: 17509, sum loss: 6976.259766, avg loss: 3.642956, ppl: 38.204586 +epoch: 0, batch: 17510, sum loss: 5292.716309, avg loss: 3.215502, ppl: 24.915796 +epoch: 0, batch: 17511, sum loss: 5578.231445, avg loss: 3.424329, ppl: 30.702024 +epoch: 0, batch: 17512, sum loss: 4883.135742, avg loss: 3.132223, ppl: 22.924889 +epoch: 0, batch: 17513, sum loss: 4355.297852, avg loss: 2.932860, ppl: 18.781275 +epoch: 0, batch: 17514, sum loss: 5072.649902, avg loss: 3.226876, ppl: 25.200819 +epoch: 0, batch: 17515, sum loss: 4938.046875, avg loss: 3.121395, ppl: 22.677990 +epoch: 0, batch: 17516, sum loss: 5889.494141, avg loss: 3.526643, ppl: 34.009613 +epoch: 0, batch: 17517, sum loss: 4950.963867, avg loss: 3.143469, ppl: 23.184156 +epoch: 0, batch: 17518, sum loss: 5382.054688, avg loss: 3.201698, ppl: 24.574223 +epoch: 0, batch: 17519, sum loss: 5620.776367, avg loss: 3.482513, ppl: 32.541409 +epoch: 0, batch: 17520, sum loss: 5525.095703, avg loss: 3.308441, ppl: 27.342457 +epoch: 0, batch: 17521, sum loss: 5028.021973, avg loss: 3.025284, ppl: 20.599855 +epoch: 0, batch: 17522, sum loss: 5338.569824, avg loss: 3.229625, ppl: 25.270172 +epoch: 0, batch: 17523, sum loss: 6420.518066, avg loss: 3.607033, ppl: 36.856529 +epoch: 0, batch: 17524, sum loss: 4959.903809, avg loss: 3.378681, ppl: 29.332054 +epoch: 0, batch: 17525, sum loss: 5708.502930, avg loss: 3.385826, ppl: 29.542395 +epoch: 0, batch: 17526, sum loss: 5412.134766, avg loss: 3.359488, ppl: 28.774454 +epoch: 0, batch: 17527, sum loss: 5568.320312, avg loss: 3.458584, ppl: 31.771957 +epoch: 0, batch: 17528, sum loss: 6572.293457, avg loss: 3.546839, ppl: 34.703465 +epoch: 0, batch: 17529, sum loss: 5527.250977, avg loss: 3.426690, ppl: 30.774612 +epoch: 0, batch: 17530, sum loss: 4783.244141, avg loss: 3.247281, ppl: 25.720301 +epoch: 0, batch: 17531, sum loss: 6402.524414, avg loss: 3.475855, ppl: 32.325451 +epoch: 0, batch: 17532, sum loss: 4658.826172, avg loss: 3.081234, ppl: 21.785278 +epoch: 0, batch: 17533, sum loss: 5308.737305, avg loss: 3.081101, ppl: 21.782375 +epoch: 0, batch: 17534, sum loss: 4502.478516, avg loss: 3.308213, ppl: 27.336245 +epoch: 0, batch: 17535, sum loss: 5808.417480, avg loss: 3.319096, ppl: 27.635345 +epoch: 0, batch: 17536, sum loss: 5294.122559, avg loss: 3.315042, ppl: 27.523563 +epoch: 0, batch: 17537, sum loss: 5488.874023, avg loss: 3.316540, ppl: 27.564817 +epoch: 0, batch: 17538, sum loss: 5124.747070, avg loss: 3.113455, ppl: 22.498650 +epoch: 0, batch: 17539, sum loss: 4695.770996, avg loss: 3.145191, ppl: 23.224121 +epoch: 0, batch: 17540, sum loss: 5679.712891, avg loss: 3.113878, ppl: 22.508158 +epoch: 0, batch: 17541, sum loss: 5292.810547, avg loss: 3.271205, ppl: 26.343075 +epoch: 0, batch: 17542, sum loss: 5527.793457, avg loss: 3.360361, ppl: 28.799574 +epoch: 0, batch: 17543, sum loss: 5773.775391, avg loss: 3.410381, ppl: 30.276787 +epoch: 0, batch: 17544, sum loss: 6170.897461, avg loss: 3.278904, ppl: 26.546661 +epoch: 0, batch: 17545, sum loss: 5501.242188, avg loss: 3.446894, ppl: 31.402695 +epoch: 0, batch: 17546, sum loss: 5581.259766, avg loss: 3.281164, ppl: 26.606716 +epoch: 0, batch: 17547, sum loss: 6048.038574, avg loss: 3.456022, ppl: 31.690660 +epoch: 0, batch: 17548, sum loss: 6675.781738, avg loss: 3.486048, ppl: 32.656631 +epoch: 0, batch: 17549, sum loss: 4740.894531, avg loss: 3.116959, ppl: 22.577620 +epoch: 0, batch: 17550, sum loss: 7377.008789, avg loss: 3.605576, ppl: 36.802879 +epoch: 0, batch: 17551, sum loss: 4539.113770, avg loss: 2.915295, ppl: 18.454248 +epoch: 0, batch: 17552, sum loss: 5990.062500, avg loss: 3.584718, ppl: 36.043175 +epoch: 0, batch: 17553, sum loss: 6074.206055, avg loss: 3.566768, ppl: 35.401997 +epoch: 0, batch: 17554, sum loss: 5863.882812, avg loss: 3.445289, ppl: 31.352354 +epoch: 0, batch: 17555, sum loss: 5524.228027, avg loss: 3.360236, ppl: 28.795984 +epoch: 0, batch: 17556, sum loss: 5758.158691, avg loss: 3.379201, ppl: 29.347319 +epoch: 0, batch: 17557, sum loss: 6003.226562, avg loss: 3.329576, ppl: 27.926512 +epoch: 0, batch: 17558, sum loss: 7031.995117, avg loss: 3.416907, ppl: 30.475019 +epoch: 0, batch: 17559, sum loss: 6510.890137, avg loss: 3.459559, ppl: 31.802948 +epoch: 0, batch: 17560, sum loss: 5487.195312, avg loss: 3.075782, ppl: 21.666824 +epoch: 0, batch: 17561, sum loss: 5473.846191, avg loss: 3.356129, ppl: 28.677961 +epoch: 0, batch: 17562, sum loss: 7460.750977, avg loss: 3.735980, ppl: 41.929077 +epoch: 0, batch: 17563, sum loss: 5441.248535, avg loss: 3.459153, ppl: 31.790052 +epoch: 0, batch: 17564, sum loss: 6498.697266, avg loss: 3.547324, ppl: 34.720280 +epoch: 0, batch: 17565, sum loss: 6114.360352, avg loss: 3.489932, ppl: 32.783714 +epoch: 0, batch: 17566, sum loss: 6200.722656, avg loss: 3.561587, ppl: 35.219040 +epoch: 0, batch: 17567, sum loss: 5614.276367, avg loss: 3.533214, ppl: 34.233807 +epoch: 0, batch: 17568, sum loss: 6092.228516, avg loss: 3.420679, ppl: 30.590166 +epoch: 0, batch: 17569, sum loss: 5212.020996, avg loss: 3.235271, ppl: 25.413252 +epoch: 0, batch: 17570, sum loss: 7068.578125, avg loss: 3.569989, ppl: 35.516193 +epoch: 0, batch: 17571, sum loss: 5472.680664, avg loss: 3.508129, ppl: 33.385735 +epoch: 0, batch: 17572, sum loss: 6095.721191, avg loss: 3.441966, ppl: 31.248320 +epoch: 0, batch: 17573, sum loss: 6928.304688, avg loss: 3.578670, ppl: 35.825844 +epoch: 0, batch: 17574, sum loss: 7363.466797, avg loss: 3.606007, ppl: 36.818745 +epoch: 0, batch: 17575, sum loss: 6157.126953, avg loss: 3.530463, ppl: 34.139755 +epoch: 0, batch: 17576, sum loss: 5386.910645, avg loss: 3.161333, ppl: 23.602028 +epoch: 0, batch: 17577, sum loss: 5643.248535, avg loss: 3.232101, ppl: 25.332830 +epoch: 0, batch: 17578, sum loss: 5366.498047, avg loss: 3.341530, ppl: 28.262344 +epoch: 0, batch: 17579, sum loss: 5443.822754, avg loss: 3.354173, ppl: 28.621923 +epoch: 0, batch: 17580, sum loss: 5657.596680, avg loss: 3.404089, ppl: 30.086887 +epoch: 0, batch: 17581, sum loss: 6396.025391, avg loss: 3.459181, ppl: 31.790924 +epoch: 0, batch: 17582, sum loss: 4973.030762, avg loss: 3.065987, ppl: 21.455626 +epoch: 0, batch: 17583, sum loss: 5006.871582, avg loss: 3.162900, ppl: 23.639061 +epoch: 0, batch: 17584, sum loss: 5215.549805, avg loss: 3.391125, ppl: 29.699337 +epoch: 0, batch: 17585, sum loss: 5905.974121, avg loss: 3.359485, ppl: 28.774359 +epoch: 0, batch: 17586, sum loss: 6227.607422, avg loss: 3.450198, ppl: 31.506628 +epoch: 0, batch: 17587, sum loss: 6133.659180, avg loss: 3.553684, ppl: 34.941814 +epoch: 0, batch: 17588, sum loss: 5964.898438, avg loss: 3.478075, ppl: 32.397289 +epoch: 0, batch: 17589, sum loss: 5378.348145, avg loss: 3.150760, ppl: 23.353817 +epoch: 0, batch: 17590, sum loss: 6006.710449, avg loss: 3.460087, ppl: 31.819731 +epoch: 0, batch: 17591, sum loss: 5117.362793, avg loss: 3.244999, ppl: 25.661676 +epoch: 0, batch: 17592, sum loss: 5485.040039, avg loss: 3.432441, ppl: 30.952089 +epoch: 0, batch: 17593, sum loss: 6050.350098, avg loss: 3.649186, ppl: 38.443356 +epoch: 0, batch: 17594, sum loss: 6141.958496, avg loss: 3.438947, ppl: 31.154127 +epoch: 0, batch: 17595, sum loss: 6333.411133, avg loss: 3.443943, ppl: 31.310171 +epoch: 0, batch: 17596, sum loss: 5889.594238, avg loss: 3.460396, ppl: 31.829580 +epoch: 0, batch: 17597, sum loss: 5445.161621, avg loss: 3.296103, ppl: 27.007179 +epoch: 0, batch: 17598, sum loss: 5674.374512, avg loss: 3.283782, ppl: 26.676460 +epoch: 0, batch: 17599, sum loss: 5397.895996, avg loss: 3.329979, ppl: 27.937754 +epoch: 0, batch: 17600, sum loss: 5945.681641, avg loss: 3.334651, ppl: 28.068571 +epoch: 0, batch: 17601, sum loss: 4453.003906, avg loss: 3.160400, ppl: 23.580030 +epoch: 0, batch: 17602, sum loss: 5714.301758, avg loss: 3.270922, ppl: 26.335621 +epoch: 0, batch: 17603, sum loss: 4769.305664, avg loss: 3.152218, ppl: 23.387884 +epoch: 0, batch: 17604, sum loss: 5155.846191, avg loss: 3.356671, ppl: 28.693506 +epoch: 0, batch: 17605, sum loss: 5307.096680, avg loss: 3.094517, ppl: 22.076572 +epoch: 0, batch: 17606, sum loss: 4525.601562, avg loss: 3.134073, ppl: 22.967335 +epoch: 0, batch: 17607, sum loss: 5813.205078, avg loss: 3.346693, ppl: 28.408625 +epoch: 0, batch: 17608, sum loss: 6152.227539, avg loss: 3.339972, ppl: 28.218323 +epoch: 0, batch: 17609, sum loss: 5969.659668, avg loss: 3.336869, ppl: 28.130924 +epoch: 0, batch: 17610, sum loss: 6281.427246, avg loss: 3.694957, ppl: 40.243862 +epoch: 0, batch: 17611, sum loss: 5010.097656, avg loss: 3.081241, ppl: 21.785419 +epoch: 0, batch: 17612, sum loss: 5489.812500, avg loss: 3.437578, ppl: 31.111528 +epoch: 0, batch: 17613, sum loss: 6823.785156, avg loss: 3.432487, ppl: 30.953543 +epoch: 0, batch: 17614, sum loss: 5423.817871, avg loss: 3.273276, ppl: 26.397667 +epoch: 0, batch: 17615, sum loss: 6922.699707, avg loss: 3.622553, ppl: 37.433025 +epoch: 0, batch: 17616, sum loss: 5693.063965, avg loss: 3.317636, ppl: 27.595053 +epoch: 0, batch: 17617, sum loss: 5685.738281, avg loss: 3.179943, ppl: 24.045385 +epoch: 0, batch: 17618, sum loss: 4705.646484, avg loss: 3.279196, ppl: 26.554415 +epoch: 0, batch: 17619, sum loss: 5353.898926, avg loss: 3.397144, ppl: 29.878639 +epoch: 0, batch: 17620, sum loss: 5161.460449, avg loss: 3.050508, ppl: 21.126083 +epoch: 0, batch: 17621, sum loss: 7285.152344, avg loss: 3.668254, ppl: 39.183430 +epoch: 0, batch: 17622, sum loss: 5266.059570, avg loss: 3.205149, ppl: 24.659166 +epoch: 0, batch: 17623, sum loss: 6540.282715, avg loss: 3.635510, ppl: 37.921188 +epoch: 0, batch: 17624, sum loss: 6067.111328, avg loss: 3.348295, ppl: 28.454191 +epoch: 0, batch: 17625, sum loss: 5690.537109, avg loss: 3.310377, ppl: 27.395441 +epoch: 0, batch: 17626, sum loss: 5586.434082, avg loss: 3.365322, ppl: 28.942812 +epoch: 0, batch: 17627, sum loss: 5380.951172, avg loss: 3.212508, ppl: 24.841322 +epoch: 0, batch: 17628, sum loss: 4851.542969, avg loss: 3.128010, ppl: 22.828495 +epoch: 0, batch: 17629, sum loss: 6429.791992, avg loss: 3.369912, ppl: 29.075966 +epoch: 0, batch: 17630, sum loss: 5765.076172, avg loss: 3.222513, ppl: 25.091101 +epoch: 0, batch: 17631, sum loss: 5783.542480, avg loss: 3.197094, ppl: 24.461342 +epoch: 0, batch: 17632, sum loss: 6302.907227, avg loss: 3.556946, ppl: 35.055958 +epoch: 0, batch: 17633, sum loss: 5194.247559, avg loss: 3.196460, ppl: 24.445839 +epoch: 0, batch: 17634, sum loss: 4796.652832, avg loss: 3.066914, ppl: 21.475519 +epoch: 0, batch: 17635, sum loss: 6392.932617, avg loss: 3.497228, ppl: 33.023781 +epoch: 0, batch: 17636, sum loss: 5280.986816, avg loss: 3.449371, ppl: 31.480587 +epoch: 0, batch: 17637, sum loss: 5840.631348, avg loss: 3.499480, ppl: 33.098228 +epoch: 0, batch: 17638, sum loss: 6090.669922, avg loss: 3.357591, ppl: 28.719919 +epoch: 0, batch: 17639, sum loss: 5906.549805, avg loss: 3.357902, ppl: 28.728863 +epoch: 0, batch: 17640, sum loss: 6462.713867, avg loss: 3.545098, ppl: 34.643089 +epoch: 0, batch: 17641, sum loss: 6442.784180, avg loss: 3.454576, ppl: 31.644869 +epoch: 0, batch: 17642, sum loss: 6459.943359, avg loss: 3.463777, ppl: 31.937363 +epoch: 0, batch: 17643, sum loss: 5405.974609, avg loss: 3.395713, ppl: 29.835907 +epoch: 0, batch: 17644, sum loss: 6579.739746, avg loss: 3.764153, ppl: 43.127182 +epoch: 0, batch: 17645, sum loss: 4608.665527, avg loss: 3.064272, ppl: 21.418867 +epoch: 0, batch: 17646, sum loss: 6072.210938, avg loss: 3.390402, ppl: 29.677889 +epoch: 0, batch: 17647, sum loss: 5771.679199, avg loss: 3.215420, ppl: 24.913759 +epoch: 0, batch: 17648, sum loss: 6045.856934, avg loss: 3.598725, ppl: 36.551586 +epoch: 0, batch: 17649, sum loss: 5821.443848, avg loss: 3.534574, ppl: 34.280411 +epoch: 0, batch: 17650, sum loss: 6360.249023, avg loss: 3.502340, ppl: 33.193020 +epoch: 0, batch: 17651, sum loss: 5815.542480, avg loss: 3.507565, ppl: 33.366913 +epoch: 0, batch: 17652, sum loss: 5544.422852, avg loss: 3.458779, ppl: 31.778154 +epoch: 0, batch: 17653, sum loss: 5955.561035, avg loss: 3.517756, ppl: 33.708710 +epoch: 0, batch: 17654, sum loss: 4954.452148, avg loss: 3.298570, ppl: 27.073893 +epoch: 0, batch: 17655, sum loss: 5318.045898, avg loss: 3.150501, ppl: 23.347765 +epoch: 0, batch: 17656, sum loss: 4911.577148, avg loss: 3.090986, ppl: 21.998764 +epoch: 0, batch: 17657, sum loss: 6076.988281, avg loss: 3.585244, ppl: 36.062145 +epoch: 0, batch: 17658, sum loss: 6019.440430, avg loss: 3.477435, ppl: 32.376572 +epoch: 0, batch: 17659, sum loss: 5565.199707, avg loss: 3.215020, ppl: 24.903788 +epoch: 0, batch: 17660, sum loss: 6142.640625, avg loss: 3.388109, ppl: 29.609900 +epoch: 0, batch: 17661, sum loss: 6684.963867, avg loss: 3.613494, ppl: 37.095444 +epoch: 0, batch: 17662, sum loss: 5907.022461, avg loss: 3.335416, ppl: 28.090075 +epoch: 0, batch: 17663, sum loss: 5568.237793, avg loss: 3.158388, ppl: 23.532629 +epoch: 0, batch: 17664, sum loss: 5081.009766, avg loss: 3.244578, ppl: 25.650892 +epoch: 0, batch: 17665, sum loss: 5657.586426, avg loss: 3.232907, ppl: 25.353241 +epoch: 0, batch: 17666, sum loss: 6258.026855, avg loss: 3.569896, ppl: 35.512890 +epoch: 0, batch: 17667, sum loss: 5075.000488, avg loss: 3.142415, ppl: 23.159737 +epoch: 0, batch: 17668, sum loss: 5336.244141, avg loss: 3.172559, ppl: 23.868481 +epoch: 0, batch: 17669, sum loss: 5700.202637, avg loss: 3.236912, ppl: 25.455002 +epoch: 0, batch: 17670, sum loss: 4719.595215, avg loss: 3.094816, ppl: 22.083185 +epoch: 0, batch: 17671, sum loss: 7034.834473, avg loss: 3.644992, ppl: 38.282471 +epoch: 0, batch: 17672, sum loss: 5651.719238, avg loss: 3.463063, ppl: 31.914597 +epoch: 0, batch: 17673, sum loss: 4836.057129, avg loss: 3.330618, ppl: 27.955610 +epoch: 0, batch: 17674, sum loss: 5647.070312, avg loss: 3.262317, ppl: 26.109957 +epoch: 0, batch: 17675, sum loss: 5986.650391, avg loss: 3.484663, ppl: 32.611427 +epoch: 0, batch: 17676, sum loss: 6255.175293, avg loss: 3.664426, ppl: 39.033730 +epoch: 0, batch: 17677, sum loss: 5346.009766, avg loss: 3.261751, ppl: 26.095182 +epoch: 0, batch: 17678, sum loss: 5584.274414, avg loss: 3.117964, ppl: 22.600325 +epoch: 0, batch: 17679, sum loss: 6513.277832, avg loss: 3.341856, ppl: 28.271564 +epoch: 0, batch: 17680, sum loss: 5568.614258, avg loss: 3.493484, ppl: 32.900391 +epoch: 0, batch: 17681, sum loss: 5072.601562, avg loss: 3.041128, ppl: 20.928841 +epoch: 0, batch: 17682, sum loss: 5954.657227, avg loss: 3.400718, ppl: 29.985628 +epoch: 0, batch: 17683, sum loss: 6461.229492, avg loss: 3.621766, ppl: 37.403549 +epoch: 0, batch: 17684, sum loss: 6121.597656, avg loss: 3.247532, ppl: 25.726765 +epoch: 0, batch: 17685, sum loss: 6782.247559, avg loss: 3.584697, ppl: 36.042454 +epoch: 0, batch: 17686, sum loss: 4128.886719, avg loss: 2.700384, ppl: 14.885446 +epoch: 0, batch: 17687, sum loss: 6611.082520, avg loss: 3.575491, ppl: 35.712139 +epoch: 0, batch: 17688, sum loss: 6915.607910, avg loss: 3.752365, ppl: 42.621746 +epoch: 0, batch: 17689, sum loss: 5477.021973, avg loss: 3.475268, ppl: 32.306473 +epoch: 0, batch: 17690, sum loss: 5834.800293, avg loss: 3.292777, ppl: 26.917505 +epoch: 0, batch: 17691, sum loss: 6559.041992, avg loss: 3.202657, ppl: 24.597805 +epoch: 0, batch: 17692, sum loss: 5263.333496, avg loss: 3.159264, ppl: 23.553246 +epoch: 0, batch: 17693, sum loss: 4400.749023, avg loss: 3.127753, ppl: 22.822651 +epoch: 0, batch: 17694, sum loss: 5386.542969, avg loss: 3.430919, ppl: 30.905037 +epoch: 0, batch: 17695, sum loss: 5023.429688, avg loss: 3.133768, ppl: 22.960321 +epoch: 0, batch: 17696, sum loss: 4785.256836, avg loss: 3.055719, ppl: 21.236458 +epoch: 0, batch: 17697, sum loss: 5033.563477, avg loss: 3.080516, ppl: 21.769640 +epoch: 0, batch: 17698, sum loss: 4841.079102, avg loss: 3.180736, ppl: 24.064449 +epoch: 0, batch: 17699, sum loss: 5777.831055, avg loss: 3.761609, ppl: 43.017578 +epoch: 0, batch: 17700, sum loss: 5254.378906, avg loss: 3.465949, ppl: 32.006828 +epoch: 0, batch: 17701, sum loss: 6875.874023, avg loss: 3.444827, ppl: 31.337849 +epoch: 0, batch: 17702, sum loss: 5641.594727, avg loss: 3.149969, ppl: 23.335344 +epoch: 0, batch: 17703, sum loss: 5332.962402, avg loss: 3.084420, ppl: 21.854792 +epoch: 0, batch: 17704, sum loss: 6497.743164, avg loss: 3.318562, ppl: 27.620592 +epoch: 0, batch: 17705, sum loss: 5937.972656, avg loss: 3.227159, ppl: 25.207939 +epoch: 0, batch: 17706, sum loss: 5680.443848, avg loss: 3.554721, ppl: 34.978054 +epoch: 0, batch: 17707, sum loss: 5923.108398, avg loss: 3.034379, ppl: 20.788074 +epoch: 0, batch: 17708, sum loss: 4662.770020, avg loss: 2.994714, ppl: 19.979650 +epoch: 0, batch: 17709, sum loss: 5151.604492, avg loss: 3.030356, ppl: 20.704596 +epoch: 0, batch: 17710, sum loss: 5292.567871, avg loss: 3.104146, ppl: 22.290165 +epoch: 0, batch: 17711, sum loss: 4642.914062, avg loss: 3.177902, ppl: 23.996344 +epoch: 0, batch: 17712, sum loss: 5548.664551, avg loss: 3.433579, ppl: 30.987362 +epoch: 0, batch: 17713, sum loss: 6070.108398, avg loss: 3.492583, ppl: 32.870728 +epoch: 0, batch: 17714, sum loss: 5856.702148, avg loss: 3.299550, ppl: 27.100449 +epoch: 0, batch: 17715, sum loss: 5847.780273, avg loss: 3.682482, ppl: 39.744900 +epoch: 0, batch: 17716, sum loss: 5516.151367, avg loss: 3.242887, ppl: 25.607552 +epoch: 0, batch: 17717, sum loss: 4717.717773, avg loss: 3.276193, ppl: 26.474789 +epoch: 0, batch: 17718, sum loss: 5591.197266, avg loss: 3.160654, ppl: 23.586023 +epoch: 0, batch: 17719, sum loss: 5888.463379, avg loss: 3.231868, ppl: 25.326923 +epoch: 0, batch: 17720, sum loss: 6806.977051, avg loss: 3.543455, ppl: 34.586212 +epoch: 0, batch: 17721, sum loss: 5735.278320, avg loss: 3.163419, ppl: 23.651321 +epoch: 0, batch: 17722, sum loss: 5225.891113, avg loss: 3.400059, ppl: 29.965881 +epoch: 0, batch: 17723, sum loss: 6194.619629, avg loss: 3.296764, ppl: 27.025047 +epoch: 0, batch: 17724, sum loss: 7180.708496, avg loss: 3.674876, ppl: 39.443783 +epoch: 0, batch: 17725, sum loss: 4743.644043, avg loss: 3.139407, ppl: 23.090168 +epoch: 0, batch: 17726, sum loss: 5980.994629, avg loss: 3.377185, ppl: 29.288204 +epoch: 0, batch: 17727, sum loss: 5738.452148, avg loss: 3.170416, ppl: 23.817381 +epoch: 0, batch: 17728, sum loss: 5350.753906, avg loss: 3.248788, ppl: 25.759098 +epoch: 0, batch: 17729, sum loss: 5134.903809, avg loss: 3.268558, ppl: 26.273415 +epoch: 0, batch: 17730, sum loss: 6450.046875, avg loss: 3.652348, ppl: 38.565113 +epoch: 0, batch: 17731, sum loss: 6006.782227, avg loss: 3.434409, ppl: 31.013090 +epoch: 0, batch: 17732, sum loss: 5768.994141, avg loss: 3.302229, ppl: 27.173138 +epoch: 0, batch: 17733, sum loss: 5808.904297, avg loss: 3.336533, ppl: 28.121469 +epoch: 0, batch: 17734, sum loss: 5530.937012, avg loss: 3.407848, ppl: 30.200172 +epoch: 0, batch: 17735, sum loss: 6230.232422, avg loss: 3.626445, ppl: 37.578987 +epoch: 0, batch: 17736, sum loss: 5877.223633, avg loss: 3.335541, ppl: 28.093592 +epoch: 0, batch: 17737, sum loss: 4822.237305, avg loss: 3.318815, ppl: 27.627579 +epoch: 0, batch: 17738, sum loss: 6047.587891, avg loss: 3.326506, ppl: 27.840893 +epoch: 0, batch: 17739, sum loss: 6303.341797, avg loss: 3.343948, ppl: 28.330753 +epoch: 0, batch: 17740, sum loss: 6405.702637, avg loss: 3.339782, ppl: 28.212990 +epoch: 0, batch: 17741, sum loss: 5538.563965, avg loss: 3.127365, ppl: 22.813793 +epoch: 0, batch: 17742, sum loss: 5272.720215, avg loss: 3.421623, ppl: 30.619055 +epoch: 0, batch: 17743, sum loss: 6264.829102, avg loss: 3.465060, ppl: 31.978386 +epoch: 0, batch: 17744, sum loss: 5496.245117, avg loss: 3.303032, ppl: 27.194973 +epoch: 0, batch: 17745, sum loss: 5055.274902, avg loss: 3.276264, ppl: 26.476669 +epoch: 0, batch: 17746, sum loss: 5036.074219, avg loss: 2.979926, ppl: 19.686352 +epoch: 0, batch: 17747, sum loss: 6073.321289, avg loss: 3.543361, ppl: 34.582973 +epoch: 0, batch: 17748, sum loss: 4868.317871, avg loss: 3.085119, ppl: 21.870070 +epoch: 0, batch: 17749, sum loss: 6199.351562, avg loss: 3.451755, ppl: 31.555717 +epoch: 0, batch: 17750, sum loss: 4906.485352, avg loss: 2.887867, ppl: 17.954966 +epoch: 0, batch: 17751, sum loss: 6418.663574, avg loss: 3.397916, ppl: 29.901722 +epoch: 0, batch: 17752, sum loss: 6570.231445, avg loss: 3.347036, ppl: 28.418373 +epoch: 0, batch: 17753, sum loss: 5514.970703, avg loss: 3.314286, ppl: 27.502737 +epoch: 0, batch: 17754, sum loss: 6380.976562, avg loss: 3.644190, ppl: 38.251778 +epoch: 0, batch: 17755, sum loss: 5765.151367, avg loss: 3.224358, ppl: 25.137421 +epoch: 0, batch: 17756, sum loss: 6430.492676, avg loss: 3.226539, ppl: 25.192318 +epoch: 0, batch: 17757, sum loss: 6304.992188, avg loss: 3.532209, ppl: 34.199421 +epoch: 0, batch: 17758, sum loss: 6367.238281, avg loss: 3.498483, ppl: 33.065243 +epoch: 0, batch: 17759, sum loss: 6132.422852, avg loss: 3.506245, ppl: 33.322910 +epoch: 0, batch: 17760, sum loss: 6772.235352, avg loss: 3.581298, ppl: 35.920147 +epoch: 0, batch: 17761, sum loss: 5229.047852, avg loss: 3.061503, ppl: 21.359646 +epoch: 0, batch: 17762, sum loss: 5549.115234, avg loss: 3.338818, ppl: 28.185787 +epoch: 0, batch: 17763, sum loss: 4107.773438, avg loss: 2.866555, ppl: 17.576366 +epoch: 0, batch: 17764, sum loss: 4908.606445, avg loss: 3.263701, ppl: 26.146124 +epoch: 0, batch: 17765, sum loss: 5574.040039, avg loss: 3.081283, ppl: 21.786327 +epoch: 0, batch: 17766, sum loss: 7219.683594, avg loss: 3.556494, ppl: 35.040146 +epoch: 0, batch: 17767, sum loss: 4557.543457, avg loss: 3.385991, ppl: 29.547249 +epoch: 0, batch: 17768, sum loss: 4844.991699, avg loss: 2.945284, ppl: 19.016056 +epoch: 0, batch: 17769, sum loss: 6170.455566, avg loss: 3.337185, ppl: 28.139805 +epoch: 0, batch: 17770, sum loss: 5233.372070, avg loss: 2.988791, ppl: 19.861645 +epoch: 0, batch: 17771, sum loss: 5379.692383, avg loss: 3.351833, ppl: 28.555031 +epoch: 0, batch: 17772, sum loss: 4343.372559, avg loss: 3.196006, ppl: 24.434750 +epoch: 0, batch: 17773, sum loss: 6255.388672, avg loss: 3.421985, ppl: 30.630159 +epoch: 0, batch: 17774, sum loss: 5100.719727, avg loss: 3.292911, ppl: 26.921125 +epoch: 0, batch: 17775, sum loss: 5576.109375, avg loss: 3.480717, ppl: 32.483017 +epoch: 0, batch: 17776, sum loss: 5577.760742, avg loss: 3.374326, ppl: 29.204586 +epoch: 0, batch: 17777, sum loss: 7060.659668, avg loss: 3.462805, ppl: 31.906357 +epoch: 0, batch: 17778, sum loss: 4914.368164, avg loss: 3.071480, ppl: 21.573809 +epoch: 0, batch: 17779, sum loss: 6502.948730, avg loss: 3.569127, ppl: 35.485588 +epoch: 0, batch: 17780, sum loss: 5874.686523, avg loss: 3.453666, ppl: 31.616091 +epoch: 0, batch: 17781, sum loss: 6318.501953, avg loss: 3.443326, ppl: 31.290844 +epoch: 0, batch: 17782, sum loss: 6671.262695, avg loss: 3.544773, ppl: 34.631817 +epoch: 0, batch: 17783, sum loss: 5800.242676, avg loss: 3.556249, ppl: 35.031559 +epoch: 0, batch: 17784, sum loss: 5498.177734, avg loss: 3.362800, ppl: 28.869904 +epoch: 0, batch: 17785, sum loss: 5519.543457, avg loss: 3.333058, ppl: 28.023897 +epoch: 0, batch: 17786, sum loss: 4560.604004, avg loss: 2.974954, ppl: 19.588715 +epoch: 0, batch: 17787, sum loss: 4789.024902, avg loss: 3.156905, ppl: 23.497757 +epoch: 0, batch: 17788, sum loss: 5708.413574, avg loss: 3.422310, ppl: 30.640123 +epoch: 0, batch: 17789, sum loss: 6072.237305, avg loss: 3.394207, ppl: 29.791035 +epoch: 0, batch: 17790, sum loss: 5923.873535, avg loss: 3.385071, ppl: 29.520075 +epoch: 0, batch: 17791, sum loss: 5899.467285, avg loss: 3.344369, ppl: 28.342691 +epoch: 0, batch: 17792, sum loss: 6006.288086, avg loss: 3.549816, ppl: 34.806900 +epoch: 0, batch: 17793, sum loss: 5819.005859, avg loss: 3.357764, ppl: 28.724897 +epoch: 0, batch: 17794, sum loss: 5439.140625, avg loss: 3.685055, ppl: 39.847305 +epoch: 0, batch: 17795, sum loss: 5485.986328, avg loss: 3.152866, ppl: 23.403034 +epoch: 0, batch: 17796, sum loss: 5360.908203, avg loss: 3.061627, ppl: 21.362278 +epoch: 0, batch: 17797, sum loss: 4916.393066, avg loss: 2.943948, ppl: 18.990675 +epoch: 0, batch: 17798, sum loss: 7143.387207, avg loss: 3.539835, ppl: 34.461239 +epoch: 0, batch: 17799, sum loss: 5373.930664, avg loss: 3.225649, ppl: 25.169895 +epoch: 0, batch: 17800, sum loss: 6086.608398, avg loss: 3.544909, ppl: 34.636524 +epoch: 0, batch: 17801, sum loss: 6732.356934, avg loss: 3.417440, ppl: 30.491262 +epoch: 0, batch: 17802, sum loss: 5188.520508, avg loss: 3.034222, ppl: 20.784809 +epoch: 0, batch: 17803, sum loss: 5540.941406, avg loss: 3.495862, ppl: 32.978703 +epoch: 0, batch: 17804, sum loss: 6236.728516, avg loss: 3.441903, ppl: 31.246367 +epoch: 0, batch: 17805, sum loss: 4843.173340, avg loss: 2.982249, ppl: 19.732155 +epoch: 0, batch: 17806, sum loss: 5235.902344, avg loss: 3.079943, ppl: 21.757156 +epoch: 0, batch: 17807, sum loss: 5567.134277, avg loss: 3.249932, ppl: 25.788593 +epoch: 0, batch: 17808, sum loss: 5959.931641, avg loss: 3.182024, ppl: 24.095486 +epoch: 0, batch: 17809, sum loss: 7294.884277, avg loss: 3.623887, ppl: 37.482983 +epoch: 0, batch: 17810, sum loss: 6824.382812, avg loss: 3.505076, ppl: 33.283978 +epoch: 0, batch: 17811, sum loss: 5835.631348, avg loss: 2.980404, ppl: 19.695780 +epoch: 0, batch: 17812, sum loss: 5886.632324, avg loss: 3.631482, ppl: 37.768745 +epoch: 0, batch: 17813, sum loss: 5665.095215, avg loss: 3.340268, ppl: 28.226702 +epoch: 0, batch: 17814, sum loss: 6432.838867, avg loss: 3.559955, ppl: 35.161610 +epoch: 0, batch: 17815, sum loss: 5373.636230, avg loss: 3.175908, ppl: 23.948551 +epoch: 0, batch: 17816, sum loss: 5696.097168, avg loss: 3.317471, ppl: 27.590481 +epoch: 0, batch: 17817, sum loss: 5624.012207, avg loss: 3.186409, ppl: 24.201370 +epoch: 0, batch: 17818, sum loss: 6713.820801, avg loss: 3.385689, ppl: 29.538332 +epoch: 0, batch: 17819, sum loss: 6417.541504, avg loss: 3.499205, ppl: 33.089123 +epoch: 0, batch: 17820, sum loss: 4735.125000, avg loss: 3.199409, ppl: 24.518030 +epoch: 0, batch: 17821, sum loss: 5950.021973, avg loss: 3.541680, ppl: 34.524868 +epoch: 0, batch: 17822, sum loss: 5328.672852, avg loss: 3.273140, ppl: 26.394098 +epoch: 0, batch: 17823, sum loss: 6325.466797, avg loss: 3.337977, ppl: 28.162102 +epoch: 0, batch: 17824, sum loss: 4839.989746, avg loss: 3.239618, ppl: 25.523977 +epoch: 0, batch: 17825, sum loss: 5651.025879, avg loss: 3.456285, ppl: 31.698988 +epoch: 0, batch: 17826, sum loss: 5088.034180, avg loss: 3.087399, ppl: 21.920000 +epoch: 0, batch: 17827, sum loss: 5305.773926, avg loss: 3.345381, ppl: 28.371376 +epoch: 0, batch: 17828, sum loss: 7296.537109, avg loss: 3.622908, ppl: 37.446308 +epoch: 0, batch: 17829, sum loss: 6860.937012, avg loss: 3.653321, ppl: 38.602654 +epoch: 0, batch: 17830, sum loss: 5315.114746, avg loss: 3.317799, ppl: 27.599546 +epoch: 0, batch: 17831, sum loss: 5706.471680, avg loss: 3.138873, ppl: 23.077850 +epoch: 0, batch: 17832, sum loss: 5589.285156, avg loss: 3.289750, ppl: 26.836157 +epoch: 0, batch: 17833, sum loss: 4831.353027, avg loss: 3.210202, ppl: 24.784081 +epoch: 0, batch: 17834, sum loss: 5769.505859, avg loss: 3.461011, ppl: 31.849165 +epoch: 0, batch: 17835, sum loss: 5352.978516, avg loss: 3.174958, ppl: 23.925810 +epoch: 0, batch: 17836, sum loss: 5305.940918, avg loss: 3.392545, ppl: 29.741554 +epoch: 0, batch: 17837, sum loss: 6177.566895, avg loss: 3.468595, ppl: 32.091606 +epoch: 0, batch: 17838, sum loss: 5563.217773, avg loss: 3.264799, ppl: 26.174852 +epoch: 0, batch: 17839, sum loss: 6042.334961, avg loss: 3.119430, ppl: 22.633465 +epoch: 0, batch: 17840, sum loss: 6086.028320, avg loss: 3.724620, ppl: 41.455460 +epoch: 0, batch: 17841, sum loss: 6408.674316, avg loss: 3.534845, ppl: 34.289711 +epoch: 0, batch: 17842, sum loss: 5698.677246, avg loss: 3.194326, ppl: 24.393724 +epoch: 0, batch: 17843, sum loss: 6451.648438, avg loss: 3.525491, ppl: 33.970448 +epoch: 0, batch: 17844, sum loss: 5263.041992, avg loss: 3.238795, ppl: 25.502974 +epoch: 0, batch: 17845, sum loss: 5757.417969, avg loss: 3.364944, ppl: 28.931862 +epoch: 0, batch: 17846, sum loss: 6554.141113, avg loss: 3.546613, ppl: 34.695610 +epoch: 0, batch: 17847, sum loss: 5721.968750, avg loss: 3.296065, ppl: 27.006157 +epoch: 0, batch: 17848, sum loss: 5157.577148, avg loss: 3.436094, ppl: 31.065380 +epoch: 0, batch: 17849, sum loss: 7364.417480, avg loss: 3.636749, ppl: 37.968212 +epoch: 0, batch: 17850, sum loss: 6386.911133, avg loss: 3.584125, ppl: 36.021835 +epoch: 0, batch: 17851, sum loss: 5300.839844, avg loss: 3.404521, ppl: 30.099888 +epoch: 0, batch: 17852, sum loss: 6977.977051, avg loss: 3.565650, ppl: 35.362431 +epoch: 0, batch: 17853, sum loss: 5286.794922, avg loss: 3.415242, ppl: 30.424316 +epoch: 0, batch: 17854, sum loss: 5127.721680, avg loss: 3.360237, ppl: 28.796019 +epoch: 0, batch: 17855, sum loss: 4998.090820, avg loss: 3.139504, ppl: 23.092415 +epoch: 0, batch: 17856, sum loss: 5701.262207, avg loss: 3.237514, ppl: 25.470324 +epoch: 0, batch: 17857, sum loss: 6259.127930, avg loss: 3.357901, ppl: 28.728836 +epoch: 0, batch: 17858, sum loss: 5122.893555, avg loss: 3.324396, ppl: 27.782204 +epoch: 0, batch: 17859, sum loss: 6213.875000, avg loss: 3.461769, ppl: 31.873306 +epoch: 0, batch: 17860, sum loss: 5718.618164, avg loss: 3.251062, ppl: 25.817747 +epoch: 0, batch: 17861, sum loss: 6092.415039, avg loss: 3.371563, ppl: 29.124018 +epoch: 0, batch: 17862, sum loss: 5619.833496, avg loss: 3.341161, ppl: 28.251915 +epoch: 0, batch: 17863, sum loss: 5113.648438, avg loss: 3.108601, ppl: 22.389690 +epoch: 0, batch: 17864, sum loss: 5837.291504, avg loss: 3.212599, ppl: 24.843559 +epoch: 0, batch: 17865, sum loss: 7034.636719, avg loss: 3.656256, ppl: 38.716125 +epoch: 0, batch: 17866, sum loss: 5033.174805, avg loss: 3.145734, ppl: 23.236727 +epoch: 0, batch: 17867, sum loss: 5786.651367, avg loss: 3.155208, ppl: 23.457912 +epoch: 0, batch: 17868, sum loss: 5479.640137, avg loss: 3.308961, ppl: 27.356697 +epoch: 0, batch: 17869, sum loss: 5377.155273, avg loss: 3.431497, ppl: 30.922895 +epoch: 0, batch: 17870, sum loss: 5057.023438, avg loss: 3.168561, ppl: 23.773249 +epoch: 0, batch: 17871, sum loss: 5137.906250, avg loss: 3.334138, ppl: 28.054180 +epoch: 0, batch: 17872, sum loss: 5785.571289, avg loss: 3.125646, ppl: 22.774611 +epoch: 0, batch: 17873, sum loss: 4491.897461, avg loss: 3.033017, ppl: 20.759764 +epoch: 0, batch: 17874, sum loss: 5176.372559, avg loss: 3.412243, ppl: 30.333202 +epoch: 0, batch: 17875, sum loss: 6118.723633, avg loss: 3.347223, ppl: 28.423693 +epoch: 0, batch: 17876, sum loss: 5986.530273, avg loss: 3.376498, ppl: 29.268087 +epoch: 0, batch: 17877, sum loss: 5790.933594, avg loss: 3.550542, ppl: 34.832188 +epoch: 0, batch: 17878, sum loss: 5571.336914, avg loss: 3.248593, ppl: 25.754074 +epoch: 0, batch: 17879, sum loss: 5975.890137, avg loss: 3.325482, ppl: 27.812391 +epoch: 0, batch: 17880, sum loss: 5764.320312, avg loss: 3.343574, ppl: 28.320150 +epoch: 0, batch: 17881, sum loss: 5120.729004, avg loss: 3.318684, ppl: 27.623970 +epoch: 0, batch: 17882, sum loss: 5616.880859, avg loss: 3.256163, ppl: 25.949774 +epoch: 0, batch: 17883, sum loss: 5533.719238, avg loss: 3.053929, ppl: 21.198467 +epoch: 0, batch: 17884, sum loss: 6308.187012, avg loss: 3.389676, ppl: 29.656345 +epoch: 0, batch: 17885, sum loss: 4665.702637, avg loss: 3.359037, ppl: 28.761484 +epoch: 0, batch: 17886, sum loss: 5218.292480, avg loss: 3.166440, ppl: 23.722870 +epoch: 0, batch: 17887, sum loss: 6595.651855, avg loss: 3.419208, ppl: 30.545208 +epoch: 0, batch: 17888, sum loss: 4903.992188, avg loss: 3.347435, ppl: 28.429718 +epoch: 0, batch: 17889, sum loss: 5488.434570, avg loss: 3.129096, ppl: 22.853312 +epoch: 0, batch: 17890, sum loss: 5072.668457, avg loss: 3.152684, ppl: 23.398783 +epoch: 0, batch: 17891, sum loss: 6404.290527, avg loss: 3.432096, ppl: 30.941414 +epoch: 0, batch: 17892, sum loss: 6352.932617, avg loss: 3.397290, ppl: 29.883015 +epoch: 0, batch: 17893, sum loss: 5175.831055, avg loss: 3.309355, ppl: 27.367462 +epoch: 0, batch: 17894, sum loss: 5033.235840, avg loss: 3.302648, ppl: 27.184530 +epoch: 0, batch: 17895, sum loss: 5750.873047, avg loss: 3.493848, ppl: 32.912338 +epoch: 0, batch: 17896, sum loss: 5780.988281, avg loss: 3.288389, ppl: 26.799660 +epoch: 0, batch: 17897, sum loss: 5505.576660, avg loss: 3.204643, ppl: 24.646700 +epoch: 0, batch: 17898, sum loss: 5739.780273, avg loss: 3.239154, ppl: 25.512119 +epoch: 0, batch: 17899, sum loss: 5131.538086, avg loss: 3.345201, ppl: 28.366270 +epoch: 0, batch: 17900, sum loss: 5757.298828, avg loss: 3.256391, ppl: 25.955688 +epoch: 0, batch: 17901, sum loss: 5136.281250, avg loss: 3.151093, ppl: 23.361580 +epoch: 0, batch: 17902, sum loss: 4957.049316, avg loss: 3.481074, ppl: 32.494606 +epoch: 0, batch: 17903, sum loss: 5283.653320, avg loss: 3.217816, ppl: 24.973515 +epoch: 0, batch: 17904, sum loss: 5273.004395, avg loss: 3.277194, ppl: 26.501293 +epoch: 0, batch: 17905, sum loss: 5468.767090, avg loss: 2.988397, ppl: 19.853838 +epoch: 0, batch: 17906, sum loss: 6193.508301, avg loss: 3.429407, ppl: 30.858328 +epoch: 0, batch: 17907, sum loss: 4828.556641, avg loss: 3.334639, ppl: 28.068237 +epoch: 0, batch: 17908, sum loss: 5821.078613, avg loss: 3.416126, ppl: 30.451218 +epoch: 0, batch: 17909, sum loss: 5524.846680, avg loss: 3.195400, ppl: 24.419945 +epoch: 0, batch: 17910, sum loss: 6077.045898, avg loss: 3.188376, ppl: 24.249025 +epoch: 0, batch: 17911, sum loss: 6812.936523, avg loss: 3.499197, ppl: 33.088879 +epoch: 0, batch: 17912, sum loss: 5782.606445, avg loss: 3.407547, ppl: 30.191080 +epoch: 0, batch: 17913, sum loss: 5218.209473, avg loss: 3.115349, ppl: 22.541298 +epoch: 0, batch: 17914, sum loss: 6122.361328, avg loss: 3.405095, ppl: 30.117159 +epoch: 0, batch: 17915, sum loss: 5934.895996, avg loss: 3.458564, ppl: 31.771330 +epoch: 0, batch: 17916, sum loss: 6614.524414, avg loss: 3.554285, ppl: 34.962814 +epoch: 0, batch: 17917, sum loss: 5290.430664, avg loss: 3.194705, ppl: 24.402962 +epoch: 0, batch: 17918, sum loss: 4715.027344, avg loss: 3.227260, ppl: 25.210495 +epoch: 0, batch: 17919, sum loss: 5679.304199, avg loss: 3.350622, ppl: 28.520466 +epoch: 0, batch: 17920, sum loss: 4340.668457, avg loss: 3.039684, ppl: 20.898634 +epoch: 0, batch: 17921, sum loss: 6321.748535, avg loss: 3.290864, ppl: 26.866053 +epoch: 0, batch: 17922, sum loss: 4989.734863, avg loss: 3.106933, ppl: 22.352386 +epoch: 0, batch: 17923, sum loss: 5217.145020, avg loss: 3.331510, ppl: 27.980562 +epoch: 0, batch: 17924, sum loss: 4631.831543, avg loss: 3.146625, ppl: 23.257433 +epoch: 0, batch: 17925, sum loss: 6117.352051, avg loss: 3.509668, ppl: 33.437176 +epoch: 0, batch: 17926, sum loss: 5770.452148, avg loss: 3.261986, ppl: 26.101311 +epoch: 0, batch: 17927, sum loss: 5587.412109, avg loss: 3.244722, ppl: 25.654587 +epoch: 0, batch: 17928, sum loss: 6019.659668, avg loss: 3.402860, ppl: 30.049932 +epoch: 0, batch: 17929, sum loss: 6299.443359, avg loss: 3.279252, ppl: 26.555910 +epoch: 0, batch: 17930, sum loss: 5483.369629, avg loss: 3.260030, ppl: 26.050306 +epoch: 0, batch: 17931, sum loss: 5963.268555, avg loss: 3.403692, ppl: 30.074940 +epoch: 0, batch: 17932, sum loss: 6385.166016, avg loss: 3.362383, ppl: 28.857887 +epoch: 0, batch: 17933, sum loss: 5265.953613, avg loss: 3.475877, ppl: 32.326168 +epoch: 0, batch: 17934, sum loss: 5424.757324, avg loss: 3.248358, ppl: 25.748020 +epoch: 0, batch: 17935, sum loss: 5627.361328, avg loss: 3.145535, ppl: 23.232090 +epoch: 0, batch: 17936, sum loss: 5730.711914, avg loss: 3.388949, ppl: 29.634781 +epoch: 0, batch: 17937, sum loss: 5640.284668, avg loss: 3.296484, ppl: 27.017490 +epoch: 0, batch: 17938, sum loss: 5129.541992, avg loss: 3.154700, ppl: 23.445997 +epoch: 0, batch: 17939, sum loss: 5311.324219, avg loss: 3.199593, ppl: 24.522549 +epoch: 0, batch: 17940, sum loss: 5560.378906, avg loss: 3.276593, ppl: 26.485395 +epoch: 0, batch: 17941, sum loss: 6375.292480, avg loss: 3.530062, ppl: 34.126099 +epoch: 0, batch: 17942, sum loss: 4755.571289, avg loss: 3.060213, ppl: 21.332108 +epoch: 0, batch: 17943, sum loss: 6502.458496, avg loss: 3.292384, ppl: 26.906937 +epoch: 0, batch: 17944, sum loss: 5107.440430, avg loss: 3.234604, ppl: 25.396322 +epoch: 0, batch: 17945, sum loss: 5720.219727, avg loss: 3.406921, ppl: 30.172192 +epoch: 0, batch: 17946, sum loss: 5402.207520, avg loss: 3.517062, ppl: 33.685322 +epoch: 0, batch: 17947, sum loss: 6237.791016, avg loss: 3.488698, ppl: 32.743271 +epoch: 0, batch: 17948, sum loss: 4304.078613, avg loss: 2.859853, ppl: 17.458961 +epoch: 0, batch: 17949, sum loss: 5736.958008, avg loss: 3.485394, ppl: 32.635277 +epoch: 0, batch: 17950, sum loss: 5249.795898, avg loss: 3.279073, ppl: 26.551155 +epoch: 0, batch: 17951, sum loss: 5838.321289, avg loss: 3.351505, ppl: 28.545650 +epoch: 0, batch: 17952, sum loss: 5660.819336, avg loss: 3.300769, ppl: 27.133499 +epoch: 0, batch: 17953, sum loss: 5078.318359, avg loss: 3.214126, ppl: 24.881527 +epoch: 0, batch: 17954, sum loss: 6210.544434, avg loss: 3.380808, ppl: 29.394524 +epoch: 0, batch: 17955, sum loss: 6197.495605, avg loss: 3.682410, ppl: 39.742046 +epoch: 0, batch: 17956, sum loss: 5940.439453, avg loss: 3.350502, ppl: 28.517038 +epoch: 0, batch: 17957, sum loss: 5420.543457, avg loss: 3.184808, ppl: 24.162649 +epoch: 0, batch: 17958, sum loss: 6539.911133, avg loss: 3.548514, ppl: 34.761620 +epoch: 0, batch: 17959, sum loss: 4637.550781, avg loss: 3.029099, ppl: 20.678598 +epoch: 0, batch: 17960, sum loss: 5975.145508, avg loss: 3.416321, ppl: 30.457157 +epoch: 0, batch: 17961, sum loss: 5671.570312, avg loss: 3.381974, ppl: 29.428806 +epoch: 0, batch: 17962, sum loss: 6251.863770, avg loss: 3.572494, ppl: 35.605267 +epoch: 0, batch: 17963, sum loss: 4723.606934, avg loss: 3.215525, ppl: 24.916380 +epoch: 0, batch: 17964, sum loss: 6738.116699, avg loss: 3.811152, ppl: 45.202492 +epoch: 0, batch: 17965, sum loss: 4771.120605, avg loss: 3.153418, ppl: 23.415955 +epoch: 0, batch: 17966, sum loss: 6593.914062, avg loss: 3.625021, ppl: 37.525528 +epoch: 0, batch: 17967, sum loss: 5941.456055, avg loss: 3.227298, ppl: 25.211451 +epoch: 0, batch: 17968, sum loss: 6349.686523, avg loss: 3.343700, ppl: 28.323736 +epoch: 0, batch: 17969, sum loss: 5645.604492, avg loss: 3.213207, ppl: 24.858675 +epoch: 0, batch: 17970, sum loss: 5104.615234, avg loss: 3.125912, ppl: 22.780672 +epoch: 0, batch: 17971, sum loss: 5378.660156, avg loss: 3.271691, ppl: 26.355871 +epoch: 0, batch: 17972, sum loss: 4758.334961, avg loss: 3.021165, ppl: 20.515175 +epoch: 0, batch: 17973, sum loss: 5553.212891, avg loss: 3.268519, ppl: 26.272392 +epoch: 0, batch: 17974, sum loss: 5520.413086, avg loss: 3.189147, ppl: 24.267712 +epoch: 0, batch: 17975, sum loss: 4638.365723, avg loss: 3.084020, ppl: 21.846041 +epoch: 0, batch: 17976, sum loss: 5166.292969, avg loss: 3.335244, ppl: 28.085234 +epoch: 0, batch: 17977, sum loss: 4384.839355, avg loss: 2.827105, ppl: 16.896467 +epoch: 0, batch: 17978, sum loss: 6470.299805, avg loss: 3.531823, ppl: 34.186237 +epoch: 0, batch: 17979, sum loss: 5533.455566, avg loss: 3.454092, ppl: 31.629557 +epoch: 0, batch: 17980, sum loss: 5375.389160, avg loss: 3.143502, ppl: 23.184929 +epoch: 0, batch: 17981, sum loss: 5412.237793, avg loss: 3.258421, ppl: 26.008448 +epoch: 0, batch: 17982, sum loss: 5140.892090, avg loss: 3.036558, ppl: 20.833405 +epoch: 0, batch: 17983, sum loss: 5613.236328, avg loss: 3.280676, ppl: 26.593748 +epoch: 0, batch: 17984, sum loss: 5187.458984, avg loss: 3.115591, ppl: 22.546753 +epoch: 0, batch: 17985, sum loss: 5455.138184, avg loss: 3.099510, ppl: 22.187080 +epoch: 0, batch: 17986, sum loss: 5877.678711, avg loss: 3.349105, ppl: 28.477224 +epoch: 0, batch: 17987, sum loss: 5519.122070, avg loss: 3.250366, ppl: 25.799793 +epoch: 0, batch: 17988, sum loss: 4423.919922, avg loss: 3.009469, ppl: 20.276636 +epoch: 0, batch: 17989, sum loss: 5880.155762, avg loss: 3.283169, ppl: 26.660120 +epoch: 0, batch: 17990, sum loss: 4546.621094, avg loss: 2.999091, ppl: 20.067282 +epoch: 0, batch: 17991, sum loss: 6391.431641, avg loss: 3.303065, ppl: 27.195875 +epoch: 0, batch: 17992, sum loss: 6068.525391, avg loss: 3.160691, ppl: 23.586878 +epoch: 0, batch: 17993, sum loss: 4885.141602, avg loss: 3.157816, ppl: 23.519178 +epoch: 0, batch: 17994, sum loss: 4172.096191, avg loss: 3.204375, ppl: 24.640097 +epoch: 0, batch: 17995, sum loss: 5724.137207, avg loss: 3.215807, ppl: 24.923409 +epoch: 0, batch: 17996, sum loss: 5407.856934, avg loss: 2.873463, ppl: 17.698195 +epoch: 0, batch: 17997, sum loss: 5551.466309, avg loss: 3.286836, ppl: 26.758072 +epoch: 0, batch: 17998, sum loss: 5274.051270, avg loss: 3.321191, ppl: 27.693316 +epoch: 0, batch: 17999, sum loss: 4746.102051, avg loss: 3.134810, ppl: 22.984268 +epoch: 0, batch: 18000, sum loss: 4551.526855, avg loss: 3.437709, ppl: 31.115593 +epoch: 0, batch: 18001, sum loss: 4569.964844, avg loss: 3.127970, ppl: 22.827602 +epoch: 0, batch: 18002, sum loss: 5506.214844, avg loss: 3.214369, ppl: 24.887590 +epoch: 0, batch: 18003, sum loss: 6177.902832, avg loss: 3.364871, ppl: 28.929758 +epoch: 0, batch: 18004, sum loss: 6304.653320, avg loss: 3.411609, ppl: 30.313978 +epoch: 0, batch: 18005, sum loss: 6148.719727, avg loss: 3.260191, ppl: 26.054512 +epoch: 0, batch: 18006, sum loss: 5972.674316, avg loss: 3.395494, ppl: 29.829391 +epoch: 0, batch: 18007, sum loss: 6329.460449, avg loss: 3.203168, ppl: 24.610382 +epoch: 0, batch: 18008, sum loss: 5812.688477, avg loss: 3.236464, ppl: 25.443583 +epoch: 0, batch: 18009, sum loss: 5872.312500, avg loss: 3.251557, ppl: 25.830536 +epoch: 0, batch: 18010, sum loss: 4600.694824, avg loss: 3.177276, ppl: 23.981331 +epoch: 0, batch: 18011, sum loss: 5748.081055, avg loss: 3.247504, ppl: 25.726036 +epoch: 0, batch: 18012, sum loss: 5989.390137, avg loss: 3.383836, ppl: 29.483662 +epoch: 0, batch: 18013, sum loss: 5463.196289, avg loss: 3.361967, ppl: 28.845871 +epoch: 0, batch: 18014, sum loss: 5547.749023, avg loss: 3.533598, ppl: 34.246975 +epoch: 0, batch: 18015, sum loss: 3729.378906, avg loss: 2.806154, ppl: 16.546164 +epoch: 0, batch: 18016, sum loss: 5909.988770, avg loss: 3.416178, ppl: 30.452816 +epoch: 0, batch: 18017, sum loss: 5202.025391, avg loss: 3.253299, ppl: 25.875563 +epoch: 0, batch: 18018, sum loss: 4682.144531, avg loss: 2.982258, ppl: 19.732315 +epoch: 0, batch: 18019, sum loss: 5499.996582, avg loss: 3.275757, ppl: 26.463259 +epoch: 0, batch: 18020, sum loss: 6192.684570, avg loss: 3.440380, ppl: 31.198822 +epoch: 0, batch: 18021, sum loss: 5939.201172, avg loss: 3.465111, ppl: 31.980024 +epoch: 0, batch: 18022, sum loss: 6652.229492, avg loss: 3.779676, ppl: 43.801834 +epoch: 0, batch: 18023, sum loss: 5501.269531, avg loss: 3.300102, ppl: 27.115391 +epoch: 0, batch: 18024, sum loss: 5187.911621, avg loss: 3.190598, ppl: 24.302944 +epoch: 0, batch: 18025, sum loss: 4419.053711, avg loss: 3.041331, ppl: 20.933083 +epoch: 0, batch: 18026, sum loss: 5907.291992, avg loss: 3.206999, ppl: 24.704838 +epoch: 0, batch: 18027, sum loss: 6661.669922, avg loss: 3.502455, ppl: 33.196865 +epoch: 0, batch: 18028, sum loss: 6236.763184, avg loss: 3.267032, ppl: 26.233355 +epoch: 0, batch: 18029, sum loss: 6163.401855, avg loss: 3.456759, ppl: 31.714039 +epoch: 0, batch: 18030, sum loss: 5173.720703, avg loss: 3.314363, ppl: 27.504868 +epoch: 0, batch: 18031, sum loss: 5642.823242, avg loss: 3.378936, ppl: 29.339539 +epoch: 0, batch: 18032, sum loss: 5158.608887, avg loss: 3.441367, ppl: 31.229616 +epoch: 0, batch: 18033, sum loss: 6270.449707, avg loss: 3.597504, ppl: 36.507004 +epoch: 0, batch: 18034, sum loss: 6017.665039, avg loss: 3.405583, ppl: 30.131855 +epoch: 0, batch: 18035, sum loss: 5520.471680, avg loss: 3.393037, ppl: 29.756195 +epoch: 0, batch: 18036, sum loss: 6171.462402, avg loss: 3.415308, ppl: 30.426327 +epoch: 0, batch: 18037, sum loss: 5388.264160, avg loss: 3.359267, ppl: 28.768097 +epoch: 0, batch: 18038, sum loss: 6363.461426, avg loss: 3.343910, ppl: 28.329693 +epoch: 0, batch: 18039, sum loss: 5475.447266, avg loss: 3.415750, ppl: 30.439772 +epoch: 0, batch: 18040, sum loss: 5147.606934, avg loss: 3.318895, ppl: 27.629812 +epoch: 0, batch: 18041, sum loss: 5377.544434, avg loss: 3.482866, ppl: 32.552879 +epoch: 0, batch: 18042, sum loss: 5379.980957, avg loss: 3.323027, ppl: 27.744209 +epoch: 0, batch: 18043, sum loss: 6112.981445, avg loss: 3.449764, ppl: 31.492952 +epoch: 0, batch: 18044, sum loss: 4670.459473, avg loss: 3.188027, ppl: 24.240551 +epoch: 0, batch: 18045, sum loss: 5102.093750, avg loss: 2.903867, ppl: 18.244553 +epoch: 0, batch: 18046, sum loss: 5410.573730, avg loss: 3.387961, ppl: 29.605530 +epoch: 0, batch: 18047, sum loss: 5578.498047, avg loss: 3.160622, ppl: 23.585264 +epoch: 0, batch: 18048, sum loss: 5690.043457, avg loss: 3.257037, ppl: 25.972466 +epoch: 0, batch: 18049, sum loss: 5808.507812, avg loss: 3.380971, ppl: 29.399303 +epoch: 0, batch: 18050, sum loss: 5501.510742, avg loss: 3.282524, ppl: 26.642944 +epoch: 0, batch: 18051, sum loss: 4819.166504, avg loss: 3.238687, ppl: 25.500225 +epoch: 0, batch: 18052, sum loss: 4478.959961, avg loss: 3.250334, ppl: 25.798950 +epoch: 0, batch: 18053, sum loss: 4275.575195, avg loss: 2.886952, ppl: 17.938543 +epoch: 0, batch: 18054, sum loss: 5007.263184, avg loss: 3.191372, ppl: 24.321783 +epoch: 0, batch: 18055, sum loss: 5422.773926, avg loss: 3.266731, ppl: 26.225475 +epoch: 0, batch: 18056, sum loss: 4883.988281, avg loss: 3.349786, ppl: 28.496643 +epoch: 0, batch: 18057, sum loss: 4506.527344, avg loss: 2.932028, ppl: 18.765654 +epoch: 0, batch: 18058, sum loss: 5224.069824, avg loss: 3.432372, ppl: 30.949972 +epoch: 0, batch: 18059, sum loss: 5804.545898, avg loss: 3.345560, ppl: 28.376451 +epoch: 0, batch: 18060, sum loss: 5368.735352, avg loss: 3.207130, ppl: 24.708071 +epoch: 0, batch: 18061, sum loss: 4657.937012, avg loss: 3.115677, ppl: 22.548689 +epoch: 0, batch: 18062, sum loss: 4934.073730, avg loss: 3.034486, ppl: 20.790279 +epoch: 0, batch: 18063, sum loss: 5252.613770, avg loss: 3.537113, ppl: 34.367573 +epoch: 0, batch: 18064, sum loss: 6684.771973, avg loss: 3.470806, ppl: 32.162651 +epoch: 0, batch: 18065, sum loss: 7053.097656, avg loss: 3.761652, ppl: 43.019444 +epoch: 0, batch: 18066, sum loss: 5994.521484, avg loss: 3.409853, ppl: 30.260788 +epoch: 0, batch: 18067, sum loss: 4888.615723, avg loss: 3.097982, ppl: 22.153204 +epoch: 0, batch: 18068, sum loss: 6007.033691, avg loss: 3.494493, ppl: 32.933594 +epoch: 0, batch: 18069, sum loss: 5277.268066, avg loss: 3.285970, ppl: 26.734903 +epoch: 0, batch: 18070, sum loss: 4975.398926, avg loss: 3.181201, ppl: 24.075657 +epoch: 0, batch: 18071, sum loss: 5301.449219, avg loss: 3.163156, ppl: 23.645103 +epoch: 0, batch: 18072, sum loss: 5697.335938, avg loss: 3.255620, ppl: 25.935701 +epoch: 0, batch: 18073, sum loss: 5993.428711, avg loss: 3.203329, ppl: 24.614344 +epoch: 0, batch: 18074, sum loss: 7052.451660, avg loss: 3.618498, ppl: 37.281513 +epoch: 0, batch: 18075, sum loss: 6490.960449, avg loss: 3.640471, ppl: 38.109798 +epoch: 0, batch: 18076, sum loss: 5312.736816, avg loss: 3.283521, ppl: 26.669510 +epoch: 0, batch: 18077, sum loss: 5069.975098, avg loss: 3.178668, ppl: 24.014740 +epoch: 0, batch: 18078, sum loss: 6521.679688, avg loss: 3.283827, ppl: 26.677662 +epoch: 0, batch: 18079, sum loss: 6106.444824, avg loss: 3.246382, ppl: 25.697199 +epoch: 0, batch: 18080, sum loss: 4374.079102, avg loss: 3.099985, ppl: 22.197620 +epoch: 0, batch: 18081, sum loss: 4837.137207, avg loss: 3.145083, ppl: 23.221596 +epoch: 0, batch: 18082, sum loss: 6151.656738, avg loss: 3.429017, ppl: 30.846315 +epoch: 0, batch: 18083, sum loss: 5607.830078, avg loss: 3.320207, ppl: 27.666086 +epoch: 0, batch: 18084, sum loss: 5992.218262, avg loss: 3.393102, ppl: 29.758118 +epoch: 0, batch: 18085, sum loss: 5969.643555, avg loss: 3.187210, ppl: 24.220747 +epoch: 0, batch: 18086, sum loss: 6142.704590, avg loss: 3.671670, ppl: 39.317528 +epoch: 0, batch: 18087, sum loss: 5916.585938, avg loss: 3.279704, ppl: 26.567904 +epoch: 0, batch: 18088, sum loss: 5827.912109, avg loss: 3.225186, ppl: 25.158262 +epoch: 0, batch: 18089, sum loss: 5766.422852, avg loss: 3.362345, ppl: 28.856794 +epoch: 0, batch: 18090, sum loss: 5839.831543, avg loss: 3.019561, ppl: 20.482304 +epoch: 0, batch: 18091, sum loss: 6169.027832, avg loss: 3.251992, ppl: 25.841753 +epoch: 0, batch: 18092, sum loss: 5727.446777, avg loss: 3.505169, ppl: 33.287083 +epoch: 0, batch: 18093, sum loss: 5708.216797, avg loss: 3.472151, ppl: 32.205952 +epoch: 0, batch: 18094, sum loss: 5364.937012, avg loss: 3.348899, ppl: 28.471380 +epoch: 0, batch: 18095, sum loss: 5672.266602, avg loss: 3.328795, ppl: 27.904696 +epoch: 0, batch: 18096, sum loss: 5747.158203, avg loss: 3.380682, ppl: 29.390795 +epoch: 0, batch: 18097, sum loss: 6834.371094, avg loss: 3.415478, ppl: 30.431492 +epoch: 0, batch: 18098, sum loss: 6252.712402, avg loss: 3.325911, ppl: 27.824329 +epoch: 0, batch: 18099, sum loss: 5501.142578, avg loss: 3.315939, ppl: 27.548248 +epoch: 0, batch: 18100, sum loss: 5988.459473, avg loss: 3.371881, ppl: 29.133261 +epoch: 0, batch: 18101, sum loss: 4936.558594, avg loss: 3.234967, ppl: 25.405521 +epoch: 0, batch: 18102, sum loss: 5347.206543, avg loss: 3.358798, ppl: 28.754608 +epoch: 0, batch: 18103, sum loss: 6707.069336, avg loss: 3.462607, ppl: 31.900021 +epoch: 0, batch: 18104, sum loss: 5461.500977, avg loss: 3.260598, ppl: 26.065111 +epoch: 0, batch: 18105, sum loss: 5137.051758, avg loss: 3.009403, ppl: 20.275301 +epoch: 0, batch: 18106, sum loss: 5489.364746, avg loss: 3.193348, ppl: 24.369879 +epoch: 0, batch: 18107, sum loss: 5904.774414, avg loss: 3.633707, ppl: 37.852890 +epoch: 0, batch: 18108, sum loss: 5159.718262, avg loss: 3.080429, ppl: 21.767740 +epoch: 0, batch: 18109, sum loss: 5123.273438, avg loss: 3.080742, ppl: 21.774546 +epoch: 0, batch: 18110, sum loss: 5699.839844, avg loss: 3.266384, ppl: 26.216366 +epoch: 0, batch: 18111, sum loss: 5085.596191, avg loss: 3.231001, ppl: 25.304977 +epoch: 0, batch: 18112, sum loss: 6035.825684, avg loss: 3.298265, ppl: 27.065651 +epoch: 0, batch: 18113, sum loss: 5052.645996, avg loss: 3.308871, ppl: 27.354225 +epoch: 0, batch: 18114, sum loss: 5031.053711, avg loss: 3.342893, ppl: 28.300880 +epoch: 0, batch: 18115, sum loss: 5806.775879, avg loss: 3.444114, ppl: 31.315525 +epoch: 0, batch: 18116, sum loss: 4845.307129, avg loss: 3.105966, ppl: 22.330782 +epoch: 0, batch: 18117, sum loss: 3807.285156, avg loss: 2.926430, ppl: 18.660887 +epoch: 0, batch: 18118, sum loss: 4969.445801, avg loss: 3.222728, ppl: 25.096479 +epoch: 0, batch: 18119, sum loss: 4442.496094, avg loss: 3.139573, ppl: 23.094006 +epoch: 0, batch: 18120, sum loss: 5302.441406, avg loss: 3.273112, ppl: 26.393349 +epoch: 0, batch: 18121, sum loss: 5042.729492, avg loss: 3.278758, ppl: 26.542782 +epoch: 0, batch: 18122, sum loss: 5852.789062, avg loss: 3.465239, ppl: 31.984112 +epoch: 0, batch: 18123, sum loss: 6365.855957, avg loss: 3.415159, ppl: 30.421778 +epoch: 0, batch: 18124, sum loss: 6382.802246, avg loss: 3.571798, ppl: 35.580494 +epoch: 0, batch: 18125, sum loss: 4712.660156, avg loss: 2.963937, ppl: 19.374098 +epoch: 0, batch: 18126, sum loss: 7011.358398, avg loss: 3.532171, ppl: 34.198116 +epoch: 0, batch: 18127, sum loss: 5370.686523, avg loss: 3.288846, ppl: 26.811918 +epoch: 0, batch: 18128, sum loss: 6165.948242, avg loss: 3.450447, ppl: 31.514471 +epoch: 0, batch: 18129, sum loss: 5521.653809, avg loss: 3.208398, ppl: 24.739429 +epoch: 0, batch: 18130, sum loss: 5618.530762, avg loss: 3.356351, ppl: 28.684320 +epoch: 0, batch: 18131, sum loss: 6467.399414, avg loss: 3.447441, ppl: 31.419889 +epoch: 0, batch: 18132, sum loss: 5871.142578, avg loss: 3.322661, ppl: 27.734064 +epoch: 0, batch: 18133, sum loss: 6021.341797, avg loss: 3.367641, ppl: 29.010004 +epoch: 0, batch: 18134, sum loss: 5758.187500, avg loss: 3.523983, ppl: 33.919243 +epoch: 0, batch: 18135, sum loss: 5128.526367, avg loss: 3.315143, ppl: 27.526333 +epoch: 0, batch: 18136, sum loss: 4927.315918, avg loss: 3.162591, ppl: 23.631735 +epoch: 0, batch: 18137, sum loss: 4610.307617, avg loss: 3.063328, ppl: 21.398643 +epoch: 0, batch: 18138, sum loss: 5622.232422, avg loss: 3.264943, ppl: 26.178629 +epoch: 0, batch: 18139, sum loss: 5708.991699, avg loss: 3.303815, ppl: 27.216261 +epoch: 0, batch: 18140, sum loss: 5445.196777, avg loss: 3.435455, ppl: 31.045546 +epoch: 0, batch: 18141, sum loss: 5967.474121, avg loss: 3.451402, ppl: 31.544592 +epoch: 0, batch: 18142, sum loss: 6246.089844, avg loss: 3.507069, ppl: 33.350380 +epoch: 0, batch: 18143, sum loss: 5745.635742, avg loss: 3.399784, ppl: 29.957638 +epoch: 0, batch: 18144, sum loss: 5986.641602, avg loss: 3.456491, ppl: 31.705511 +epoch: 0, batch: 18145, sum loss: 6729.179199, avg loss: 3.332927, ppl: 28.020229 +epoch: 0, batch: 18146, sum loss: 5296.144531, avg loss: 3.054293, ppl: 21.206190 +epoch: 0, batch: 18147, sum loss: 5233.884766, avg loss: 3.293823, ppl: 26.945681 +epoch: 0, batch: 18148, sum loss: 5519.849609, avg loss: 3.351457, ppl: 28.544289 +epoch: 0, batch: 18149, sum loss: 6094.931641, avg loss: 3.414528, ppl: 30.402586 +epoch: 0, batch: 18150, sum loss: 6015.742188, avg loss: 3.425822, ppl: 30.747894 +epoch: 0, batch: 18151, sum loss: 5285.651855, avg loss: 3.418921, ppl: 30.536448 +epoch: 0, batch: 18152, sum loss: 5380.786621, avg loss: 3.291001, ppl: 26.869743 +epoch: 0, batch: 18153, sum loss: 4926.235840, avg loss: 3.556849, ppl: 35.052574 +epoch: 0, batch: 18154, sum loss: 5368.562500, avg loss: 3.338658, ppl: 28.181284 +epoch: 0, batch: 18155, sum loss: 4737.624512, avg loss: 3.227265, ppl: 25.210602 +epoch: 0, batch: 18156, sum loss: 5271.055176, avg loss: 3.376717, ppl: 29.274508 +epoch: 0, batch: 18157, sum loss: 5609.292480, avg loss: 3.350832, ppl: 28.526457 +epoch: 0, batch: 18158, sum loss: 6203.471680, avg loss: 3.324476, ppl: 27.784422 +epoch: 0, batch: 18159, sum loss: 4853.072754, avg loss: 3.069622, ppl: 21.533762 +epoch: 0, batch: 18160, sum loss: 5414.620117, avg loss: 3.259856, ppl: 26.045773 +epoch: 0, batch: 18161, sum loss: 6790.766602, avg loss: 3.551656, ppl: 34.871017 +epoch: 0, batch: 18162, sum loss: 5546.695801, avg loss: 3.221078, ppl: 25.055107 +epoch: 0, batch: 18163, sum loss: 5513.302246, avg loss: 3.303357, ppl: 27.203806 +epoch: 0, batch: 18164, sum loss: 5509.039062, avg loss: 3.214142, ppl: 24.881929 +epoch: 0, batch: 18165, sum loss: 7068.873535, avg loss: 3.619495, ppl: 37.318714 +epoch: 0, batch: 18166, sum loss: 6106.344727, avg loss: 3.344110, ppl: 28.335346 +epoch: 0, batch: 18167, sum loss: 4752.733398, avg loss: 3.209138, ppl: 24.757740 +epoch: 0, batch: 18168, sum loss: 5795.682129, avg loss: 3.334685, ppl: 28.069536 +epoch: 0, batch: 18169, sum loss: 5547.334961, avg loss: 3.329733, ppl: 27.930874 +epoch: 0, batch: 18170, sum loss: 6289.223633, avg loss: 3.306637, ppl: 27.293192 +epoch: 0, batch: 18171, sum loss: 5907.838379, avg loss: 3.383642, ppl: 29.477934 +epoch: 0, batch: 18172, sum loss: 6135.636230, avg loss: 3.267112, ppl: 26.235456 +epoch: 0, batch: 18173, sum loss: 5648.767578, avg loss: 3.471891, ppl: 32.197582 +epoch: 0, batch: 18174, sum loss: 5573.214355, avg loss: 3.181059, ppl: 24.072224 +epoch: 0, batch: 18175, sum loss: 5445.870605, avg loss: 3.545489, ppl: 34.656624 +epoch: 0, batch: 18176, sum loss: 6354.247070, avg loss: 3.534064, ppl: 34.262924 +epoch: 0, batch: 18177, sum loss: 5063.401855, avg loss: 3.066870, ppl: 21.474581 +epoch: 0, batch: 18178, sum loss: 5447.250000, avg loss: 3.271622, ppl: 26.354044 +epoch: 0, batch: 18179, sum loss: 6338.181152, avg loss: 3.568796, ppl: 35.473846 +epoch: 0, batch: 18180, sum loss: 4901.501465, avg loss: 3.375690, ppl: 29.244448 +epoch: 0, batch: 18181, sum loss: 5728.799805, avg loss: 3.389822, ppl: 29.660679 +epoch: 0, batch: 18182, sum loss: 5795.612793, avg loss: 3.114247, ppl: 22.516460 +epoch: 0, batch: 18183, sum loss: 6112.668457, avg loss: 3.247964, ppl: 25.737888 +epoch: 0, batch: 18184, sum loss: 6193.071289, avg loss: 3.360321, ppl: 28.798428 +epoch: 0, batch: 18185, sum loss: 5581.929688, avg loss: 3.501838, ppl: 33.176373 +epoch: 0, batch: 18186, sum loss: 4786.150391, avg loss: 3.171737, ppl: 23.848867 +epoch: 0, batch: 18187, sum loss: 5848.669434, avg loss: 3.084741, ppl: 21.861813 +epoch: 0, batch: 18188, sum loss: 6373.256836, avg loss: 3.552540, ppl: 34.901859 +epoch: 0, batch: 18189, sum loss: 3586.192383, avg loss: 2.642736, ppl: 14.051592 +epoch: 0, batch: 18190, sum loss: 4542.616211, avg loss: 3.268069, ppl: 26.260588 +epoch: 0, batch: 18191, sum loss: 5005.615234, avg loss: 2.965412, ppl: 19.402693 +epoch: 0, batch: 18192, sum loss: 5569.134277, avg loss: 3.450517, ppl: 31.516680 +epoch: 0, batch: 18193, sum loss: 5623.777344, avg loss: 3.357479, ppl: 28.716715 +epoch: 0, batch: 18194, sum loss: 4968.638672, avg loss: 3.314636, ppl: 27.512363 +epoch: 0, batch: 18195, sum loss: 4971.408691, avg loss: 3.323134, ppl: 27.747179 +epoch: 0, batch: 18196, sum loss: 6130.787109, avg loss: 3.392798, ppl: 29.749086 +epoch: 0, batch: 18197, sum loss: 4524.816895, avg loss: 2.938193, ppl: 18.881693 +epoch: 0, batch: 18198, sum loss: 5565.280273, avg loss: 3.260270, ppl: 26.056562 +epoch: 0, batch: 18199, sum loss: 5311.799805, avg loss: 3.344962, ppl: 28.359509 +epoch: 0, batch: 18200, sum loss: 5831.878418, avg loss: 3.593271, ppl: 36.352783 +epoch: 0, batch: 18201, sum loss: 6482.643066, avg loss: 3.517441, ppl: 33.698071 +epoch: 0, batch: 18202, sum loss: 5961.840820, avg loss: 3.474266, ppl: 32.274139 +epoch: 0, batch: 18203, sum loss: 6761.582031, avg loss: 3.456841, ppl: 31.716640 +epoch: 0, batch: 18204, sum loss: 4854.468750, avg loss: 3.127880, ppl: 22.825529 +epoch: 0, batch: 18205, sum loss: 5253.418945, avg loss: 3.097535, ppl: 22.143299 +epoch: 0, batch: 18206, sum loss: 5893.404297, avg loss: 3.520552, ppl: 33.803097 +epoch: 0, batch: 18207, sum loss: 4491.238281, avg loss: 3.014254, ppl: 20.373888 +epoch: 0, batch: 18208, sum loss: 4988.371582, avg loss: 3.183390, ppl: 24.128408 +epoch: 0, batch: 18209, sum loss: 4751.144043, avg loss: 2.886479, ppl: 17.930065 +epoch: 0, batch: 18210, sum loss: 6931.736328, avg loss: 3.683176, ppl: 39.772495 +epoch: 0, batch: 18211, sum loss: 4945.902832, avg loss: 3.023168, ppl: 20.556313 +epoch: 0, batch: 18212, sum loss: 5378.118652, avg loss: 3.189869, ppl: 24.285238 +epoch: 0, batch: 18213, sum loss: 5085.780273, avg loss: 3.052689, ppl: 21.172201 +epoch: 0, batch: 18214, sum loss: 5044.386230, avg loss: 3.316493, ppl: 27.563522 +epoch: 0, batch: 18215, sum loss: 4888.344727, avg loss: 3.057126, ppl: 21.266350 +epoch: 0, batch: 18216, sum loss: 5341.407227, avg loss: 3.413040, ppl: 30.357382 +epoch: 0, batch: 18217, sum loss: 6081.316406, avg loss: 3.543891, ppl: 34.601280 +epoch: 0, batch: 18218, sum loss: 5637.453613, avg loss: 3.121514, ppl: 22.680683 +epoch: 0, batch: 18219, sum loss: 6618.460449, avg loss: 3.258720, ppl: 26.016220 +epoch: 0, batch: 18220, sum loss: 6733.649902, avg loss: 3.593196, ppl: 36.350071 +epoch: 0, batch: 18221, sum loss: 5287.364746, avg loss: 3.464853, ppl: 31.971746 +epoch: 0, batch: 18222, sum loss: 6212.655762, avg loss: 3.322276, ppl: 27.723373 +epoch: 0, batch: 18223, sum loss: 5308.524902, avg loss: 3.161718, ppl: 23.611128 +epoch: 0, batch: 18224, sum loss: 6700.307129, avg loss: 3.554540, ppl: 34.971718 +epoch: 0, batch: 18225, sum loss: 5735.215820, avg loss: 3.452869, ppl: 31.590902 +epoch: 0, batch: 18226, sum loss: 5535.479004, avg loss: 3.455355, ppl: 31.669535 +epoch: 0, batch: 18227, sum loss: 4696.734375, avg loss: 3.079826, ppl: 21.754614 +epoch: 0, batch: 18228, sum loss: 6554.256836, avg loss: 3.243076, ppl: 25.612392 +epoch: 0, batch: 18229, sum loss: 6575.350098, avg loss: 3.529442, ppl: 34.104927 +epoch: 0, batch: 18230, sum loss: 6200.051758, avg loss: 3.518758, ppl: 33.742504 +epoch: 0, batch: 18231, sum loss: 6361.089844, avg loss: 3.460876, ppl: 31.844852 +epoch: 0, batch: 18232, sum loss: 5790.541504, avg loss: 3.172899, ppl: 23.876614 +epoch: 0, batch: 18233, sum loss: 5254.652832, avg loss: 3.138980, ppl: 23.080322 +epoch: 0, batch: 18234, sum loss: 4515.026855, avg loss: 3.090368, ppl: 21.985157 +epoch: 0, batch: 18235, sum loss: 5501.009766, avg loss: 3.483857, ppl: 32.585152 +epoch: 0, batch: 18236, sum loss: 4284.158203, avg loss: 2.896659, ppl: 18.113522 +epoch: 0, batch: 18237, sum loss: 5693.179199, avg loss: 3.294664, ppl: 26.968349 +epoch: 0, batch: 18238, sum loss: 5887.170898, avg loss: 3.483533, ppl: 32.574612 +epoch: 0, batch: 18239, sum loss: 5909.928223, avg loss: 3.272386, ppl: 26.374182 +epoch: 0, batch: 18240, sum loss: 6777.914551, avg loss: 3.382193, ppl: 29.435247 +epoch: 0, batch: 18241, sum loss: 5183.420898, avg loss: 3.535758, ppl: 34.321014 +epoch: 0, batch: 18242, sum loss: 5313.949219, avg loss: 3.129534, ppl: 22.863335 +epoch: 0, batch: 18243, sum loss: 5780.463379, avg loss: 3.112797, ppl: 22.483833 +epoch: 0, batch: 18244, sum loss: 5992.380859, avg loss: 3.443897, ppl: 31.308731 +epoch: 0, batch: 18245, sum loss: 5979.240234, avg loss: 3.430430, ppl: 30.889935 +epoch: 0, batch: 18246, sum loss: 4574.061035, avg loss: 3.051408, ppl: 21.145102 +epoch: 0, batch: 18247, sum loss: 5859.013672, avg loss: 3.251395, ppl: 25.826342 +epoch: 0, batch: 18248, sum loss: 5272.510742, avg loss: 3.318131, ppl: 27.608713 +epoch: 0, batch: 18249, sum loss: 5347.171387, avg loss: 3.152813, ppl: 23.401812 +epoch: 0, batch: 18250, sum loss: 6496.177734, avg loss: 3.494447, ppl: 32.932079 +epoch: 0, batch: 18251, sum loss: 5091.229492, avg loss: 3.131137, ppl: 22.900011 +epoch: 0, batch: 18252, sum loss: 5891.177734, avg loss: 3.368312, ppl: 29.029488 +epoch: 0, batch: 18253, sum loss: 5885.840820, avg loss: 3.279020, ppl: 26.549744 +epoch: 0, batch: 18254, sum loss: 4810.480469, avg loss: 3.166873, ppl: 23.733160 +epoch: 0, batch: 18255, sum loss: 5210.527344, avg loss: 3.260655, ppl: 26.066616 +epoch: 0, batch: 18256, sum loss: 5562.867676, avg loss: 3.031536, ppl: 20.729040 +epoch: 0, batch: 18257, sum loss: 4668.142578, avg loss: 3.377816, ppl: 29.306707 +epoch: 0, batch: 18258, sum loss: 5200.892578, avg loss: 3.304252, ppl: 27.228165 +epoch: 0, batch: 18259, sum loss: 5691.909180, avg loss: 3.252520, ppl: 25.855404 +epoch: 0, batch: 18260, sum loss: 5185.685547, avg loss: 3.162003, ppl: 23.617861 +epoch: 0, batch: 18261, sum loss: 5809.387207, avg loss: 3.332982, ppl: 28.021772 +epoch: 0, batch: 18262, sum loss: 5100.131348, avg loss: 3.219780, ppl: 25.022614 +epoch: 0, batch: 18263, sum loss: 5397.440430, avg loss: 3.208942, ppl: 24.752882 +epoch: 0, batch: 18264, sum loss: 5601.754883, avg loss: 3.129472, ppl: 22.861906 +epoch: 0, batch: 18265, sum loss: 5039.071777, avg loss: 3.381927, ppl: 29.427437 +epoch: 0, batch: 18266, sum loss: 5329.966309, avg loss: 3.352180, ppl: 28.564938 +epoch: 0, batch: 18267, sum loss: 6104.534668, avg loss: 3.412261, ppl: 30.333744 +epoch: 0, batch: 18268, sum loss: 5819.402344, avg loss: 3.256521, ppl: 25.959063 +epoch: 0, batch: 18269, sum loss: 5474.264160, avg loss: 3.252682, ppl: 25.859608 +epoch: 0, batch: 18270, sum loss: 5074.068848, avg loss: 3.314219, ppl: 27.500900 +epoch: 0, batch: 18271, sum loss: 6778.541992, avg loss: 3.534172, ppl: 34.266632 +epoch: 0, batch: 18272, sum loss: 5699.565918, avg loss: 3.319491, ppl: 27.646273 +epoch: 0, batch: 18273, sum loss: 5710.143555, avg loss: 3.276043, ppl: 26.470831 +epoch: 0, batch: 18274, sum loss: 6356.353027, avg loss: 3.308877, ppl: 27.354395 +epoch: 0, batch: 18275, sum loss: 5828.787109, avg loss: 3.321246, ppl: 27.694841 +epoch: 0, batch: 18276, sum loss: 4555.511230, avg loss: 3.203594, ppl: 24.620871 +epoch: 0, batch: 18277, sum loss: 5605.488281, avg loss: 3.322756, ppl: 27.736675 +epoch: 0, batch: 18278, sum loss: 6341.439453, avg loss: 3.422256, ppl: 30.638443 +epoch: 0, batch: 18279, sum loss: 6605.071289, avg loss: 3.581926, ppl: 35.942696 +epoch: 0, batch: 18280, sum loss: 5885.938965, avg loss: 3.466395, ppl: 32.021111 +epoch: 0, batch: 18281, sum loss: 5405.291992, avg loss: 3.289891, ppl: 26.839951 +epoch: 0, batch: 18282, sum loss: 4538.386719, avg loss: 3.248666, ppl: 25.755960 +epoch: 0, batch: 18283, sum loss: 4997.299805, avg loss: 3.043423, ppl: 20.976917 +epoch: 0, batch: 18284, sum loss: 5007.170898, avg loss: 3.117790, ppl: 22.596386 +epoch: 0, batch: 18285, sum loss: 5716.511719, avg loss: 3.352793, ppl: 28.582453 +epoch: 0, batch: 18286, sum loss: 5364.526367, avg loss: 3.449856, ppl: 31.495865 +epoch: 0, batch: 18287, sum loss: 5382.679199, avg loss: 3.258281, ppl: 26.004784 +epoch: 0, batch: 18288, sum loss: 5147.019531, avg loss: 3.165449, ppl: 23.699375 +epoch: 0, batch: 18289, sum loss: 5409.681152, avg loss: 3.320860, ppl: 27.684153 +epoch: 0, batch: 18290, sum loss: 5442.990234, avg loss: 3.522971, ppl: 33.884956 +epoch: 0, batch: 18291, sum loss: 5592.595215, avg loss: 3.603477, ppl: 36.725689 +epoch: 0, batch: 18292, sum loss: 5263.704102, avg loss: 3.251207, ppl: 25.821491 +epoch: 0, batch: 18293, sum loss: 5934.922852, avg loss: 3.448531, ppl: 31.454165 +epoch: 0, batch: 18294, sum loss: 5988.242188, avg loss: 3.419899, ppl: 30.566313 +epoch: 0, batch: 18295, sum loss: 5110.061035, avg loss: 3.207822, ppl: 24.725183 +epoch: 0, batch: 18296, sum loss: 5781.569336, avg loss: 3.412969, ppl: 30.355232 +epoch: 0, batch: 18297, sum loss: 5628.072266, avg loss: 3.433845, ppl: 30.995594 +epoch: 0, batch: 18298, sum loss: 6267.244629, avg loss: 3.213971, ppl: 24.877689 +epoch: 0, batch: 18299, sum loss: 5540.852539, avg loss: 3.249767, ppl: 25.784332 +epoch: 0, batch: 18300, sum loss: 5524.874023, avg loss: 3.296464, ppl: 27.016951 +epoch: 0, batch: 18301, sum loss: 5445.433105, avg loss: 3.357234, ppl: 28.709663 +epoch: 0, batch: 18302, sum loss: 5400.914551, avg loss: 3.475492, ppl: 32.313725 +epoch: 0, batch: 18303, sum loss: 5004.615723, avg loss: 3.106528, ppl: 22.343325 +epoch: 0, batch: 18304, sum loss: 5958.793945, avg loss: 3.511369, ppl: 33.494106 +epoch: 0, batch: 18305, sum loss: 5766.642578, avg loss: 3.408181, ppl: 30.210247 +epoch: 0, batch: 18306, sum loss: 4541.239258, avg loss: 3.116842, ppl: 22.574982 +epoch: 0, batch: 18307, sum loss: 5741.500977, avg loss: 3.269648, ppl: 26.302069 +epoch: 0, batch: 18308, sum loss: 6382.435059, avg loss: 3.271366, ppl: 26.347309 +epoch: 0, batch: 18309, sum loss: 4878.046875, avg loss: 3.120951, ppl: 22.667936 +epoch: 0, batch: 18310, sum loss: 6679.041016, avg loss: 3.393822, ppl: 29.779551 +epoch: 0, batch: 18311, sum loss: 4846.687012, avg loss: 2.910923, ppl: 18.373755 +epoch: 0, batch: 18312, sum loss: 5339.299805, avg loss: 3.127885, ppl: 22.825655 +epoch: 0, batch: 18313, sum loss: 5305.354004, avg loss: 3.154194, ppl: 23.434139 +epoch: 0, batch: 18314, sum loss: 4862.662109, avg loss: 3.083489, ppl: 21.834448 +epoch: 0, batch: 18315, sum loss: 4073.084961, avg loss: 3.071708, ppl: 21.578732 +epoch: 0, batch: 18316, sum loss: 5961.665039, avg loss: 3.210374, ppl: 24.788359 +epoch: 0, batch: 18317, sum loss: 4670.755859, avg loss: 3.117995, ppl: 22.601009 +epoch: 0, batch: 18318, sum loss: 5681.272949, avg loss: 3.498321, ppl: 33.059891 +epoch: 0, batch: 18319, sum loss: 5450.552734, avg loss: 3.295377, ppl: 26.987574 +epoch: 0, batch: 18320, sum loss: 5495.623047, avg loss: 3.384004, ppl: 29.488617 +epoch: 0, batch: 18321, sum loss: 4843.313965, avg loss: 3.246189, ppl: 25.692244 +epoch: 0, batch: 18322, sum loss: 5238.125000, avg loss: 3.142246, ppl: 23.155828 +epoch: 0, batch: 18323, sum loss: 5662.486328, avg loss: 3.267447, ppl: 26.244259 +epoch: 0, batch: 18324, sum loss: 5302.285645, avg loss: 3.273016, ppl: 26.390814 +epoch: 0, batch: 18325, sum loss: 5290.206055, avg loss: 3.327173, ppl: 27.859486 +epoch: 0, batch: 18326, sum loss: 5947.569336, avg loss: 3.244719, ppl: 25.654495 +epoch: 0, batch: 18327, sum loss: 6504.606934, avg loss: 3.190097, ppl: 24.290773 +epoch: 0, batch: 18328, sum loss: 6173.451660, avg loss: 3.353314, ppl: 28.597361 +epoch: 0, batch: 18329, sum loss: 5812.810547, avg loss: 3.067446, ppl: 21.486959 +epoch: 0, batch: 18330, sum loss: 6444.486816, avg loss: 3.462916, ppl: 31.909895 +epoch: 0, batch: 18331, sum loss: 4607.020508, avg loss: 2.899321, ppl: 18.161802 +epoch: 0, batch: 18332, sum loss: 5694.171387, avg loss: 3.387371, ppl: 29.588072 +epoch: 0, batch: 18333, sum loss: 5387.562988, avg loss: 3.309314, ppl: 27.366339 +epoch: 0, batch: 18334, sum loss: 4958.067383, avg loss: 3.091064, ppl: 22.000484 +epoch: 0, batch: 18335, sum loss: 5516.513184, avg loss: 3.188736, ppl: 24.257746 +epoch: 0, batch: 18336, sum loss: 5463.776367, avg loss: 3.321445, ppl: 27.700335 +epoch: 0, batch: 18337, sum loss: 5805.484375, avg loss: 3.369405, ppl: 29.061224 +epoch: 0, batch: 18338, sum loss: 6367.728516, avg loss: 3.651220, ppl: 38.521645 +epoch: 0, batch: 18339, sum loss: 5278.589844, avg loss: 3.147639, ppl: 23.281023 +epoch: 0, batch: 18340, sum loss: 6952.632812, avg loss: 3.653512, ppl: 38.610016 +epoch: 0, batch: 18341, sum loss: 4774.817871, avg loss: 3.513479, ppl: 33.564838 +epoch: 0, batch: 18342, sum loss: 4340.734375, avg loss: 3.050411, ppl: 21.124020 +epoch: 0, batch: 18343, sum loss: 6106.912109, avg loss: 3.432778, ppl: 30.962534 +epoch: 0, batch: 18344, sum loss: 5994.922852, avg loss: 3.193885, ppl: 24.382980 +epoch: 0, batch: 18345, sum loss: 5576.000977, avg loss: 3.454772, ppl: 31.651056 +epoch: 0, batch: 18346, sum loss: 5622.148926, avg loss: 3.466183, ppl: 32.014309 +epoch: 0, batch: 18347, sum loss: 4598.840820, avg loss: 3.023564, ppl: 20.564455 +epoch: 0, batch: 18348, sum loss: 6053.869141, avg loss: 3.244303, ppl: 25.643824 +epoch: 0, batch: 18349, sum loss: 5455.405273, avg loss: 3.247265, ppl: 25.719902 +epoch: 0, batch: 18350, sum loss: 5389.839355, avg loss: 3.404826, ppl: 30.109047 +epoch: 0, batch: 18351, sum loss: 5937.383301, avg loss: 3.515325, ppl: 33.626842 +epoch: 0, batch: 18352, sum loss: 4724.410645, avg loss: 3.280841, ppl: 26.598129 +epoch: 0, batch: 18353, sum loss: 6149.666016, avg loss: 3.454869, ppl: 31.654127 +epoch: 0, batch: 18354, sum loss: 5867.159668, avg loss: 3.513269, ppl: 33.557804 +epoch: 0, batch: 18355, sum loss: 5811.518555, avg loss: 3.225038, ppl: 25.154531 +epoch: 0, batch: 18356, sum loss: 5238.398926, avg loss: 3.241583, ppl: 25.574183 +epoch: 0, batch: 18357, sum loss: 5515.143066, avg loss: 3.366998, ppl: 28.991364 +epoch: 0, batch: 18358, sum loss: 5442.895996, avg loss: 3.168158, ppl: 23.763678 +epoch: 0, batch: 18359, sum loss: 5744.149902, avg loss: 3.324161, ppl: 27.775681 +epoch: 0, batch: 18360, sum loss: 4852.079590, avg loss: 3.104337, ppl: 22.294443 +epoch: 0, batch: 18361, sum loss: 6915.305664, avg loss: 3.555427, ppl: 35.002773 +epoch: 0, batch: 18362, sum loss: 5409.207031, avg loss: 3.314465, ppl: 27.507681 +epoch: 0, batch: 18363, sum loss: 4670.205078, avg loss: 3.209763, ppl: 24.773211 +epoch: 0, batch: 18364, sum loss: 6625.136230, avg loss: 3.492428, ppl: 32.865658 +epoch: 0, batch: 18365, sum loss: 5433.916992, avg loss: 3.303293, ppl: 27.202061 +epoch: 0, batch: 18366, sum loss: 5440.916016, avg loss: 3.193026, ppl: 24.362032 +epoch: 0, batch: 18367, sum loss: 5325.570312, avg loss: 3.121671, ppl: 22.684248 +epoch: 0, batch: 18368, sum loss: 4826.548340, avg loss: 3.086028, ppl: 21.889965 +epoch: 0, batch: 18369, sum loss: 5573.694336, avg loss: 3.032478, ppl: 20.748581 +epoch: 0, batch: 18370, sum loss: 6522.542480, avg loss: 3.556457, ppl: 35.038837 +epoch: 0, batch: 18371, sum loss: 5333.077637, avg loss: 3.151937, ppl: 23.381315 +epoch: 0, batch: 18372, sum loss: 6387.483887, avg loss: 3.454561, ppl: 31.644402 +epoch: 0, batch: 18373, sum loss: 5815.465820, avg loss: 3.302366, ppl: 27.176851 +epoch: 0, batch: 18374, sum loss: 6372.989746, avg loss: 3.536620, ppl: 34.350632 +epoch: 0, batch: 18375, sum loss: 4986.985352, avg loss: 3.340245, ppl: 28.226034 +epoch: 0, batch: 18376, sum loss: 5058.849609, avg loss: 2.904047, ppl: 18.247841 +epoch: 0, batch: 18377, sum loss: 5411.697266, avg loss: 3.279817, ppl: 26.570900 +epoch: 0, batch: 18378, sum loss: 5223.654785, avg loss: 3.109318, ppl: 22.405769 +epoch: 0, batch: 18379, sum loss: 5454.457031, avg loss: 3.256392, ppl: 25.955732 +epoch: 0, batch: 18380, sum loss: 5581.505859, avg loss: 3.137440, ppl: 23.044796 +epoch: 0, batch: 18381, sum loss: 5931.474121, avg loss: 3.446527, ppl: 31.391197 +epoch: 0, batch: 18382, sum loss: 5471.797852, avg loss: 3.441382, ppl: 31.230093 +epoch: 0, batch: 18383, sum loss: 6224.539062, avg loss: 3.408839, ppl: 30.230112 +epoch: 0, batch: 18384, sum loss: 5914.687012, avg loss: 3.364441, ppl: 28.917318 +epoch: 0, batch: 18385, sum loss: 6550.396484, avg loss: 3.463985, ppl: 31.944035 +epoch: 0, batch: 18386, sum loss: 5646.850586, avg loss: 3.219413, ppl: 25.013433 +epoch: 0, batch: 18387, sum loss: 5447.426270, avg loss: 3.275662, ppl: 26.460741 +epoch: 0, batch: 18388, sum loss: 5310.354492, avg loss: 3.200937, ppl: 24.555529 +epoch: 0, batch: 18389, sum loss: 4960.259277, avg loss: 3.100162, ppl: 22.201548 +epoch: 0, batch: 18390, sum loss: 5715.690430, avg loss: 3.326944, ppl: 27.853102 +epoch: 0, batch: 18391, sum loss: 5897.107910, avg loss: 3.448601, ppl: 31.456354 +epoch: 0, batch: 18392, sum loss: 6025.446289, avg loss: 3.472880, ppl: 32.229420 +epoch: 0, batch: 18393, sum loss: 5664.956055, avg loss: 3.408518, ppl: 30.220413 +epoch: 0, batch: 18394, sum loss: 5718.492188, avg loss: 3.210832, ppl: 24.799721 +epoch: 0, batch: 18395, sum loss: 5016.802246, avg loss: 3.238736, ppl: 25.501471 +epoch: 0, batch: 18396, sum loss: 6066.013672, avg loss: 3.338478, ppl: 28.176199 +epoch: 0, batch: 18397, sum loss: 5425.261230, avg loss: 3.123351, ppl: 22.722403 +epoch: 0, batch: 18398, sum loss: 6293.394531, avg loss: 3.504117, ppl: 33.252079 +epoch: 0, batch: 18399, sum loss: 7092.421387, avg loss: 3.454662, ppl: 31.647593 +epoch: 0, batch: 18400, sum loss: 6115.137695, avg loss: 3.699418, ppl: 40.423763 +epoch: 0, batch: 18401, sum loss: 5243.603027, avg loss: 3.136127, ppl: 23.014547 +epoch: 0, batch: 18402, sum loss: 5448.794922, avg loss: 3.294314, ppl: 26.958912 +epoch: 0, batch: 18403, sum loss: 5325.021973, avg loss: 3.442160, ppl: 31.254406 +epoch: 0, batch: 18404, sum loss: 4591.195801, avg loss: 3.100065, ppl: 22.199389 +epoch: 0, batch: 18405, sum loss: 6321.449707, avg loss: 3.673126, ppl: 39.374790 +epoch: 0, batch: 18406, sum loss: 5954.950195, avg loss: 3.462180, ppl: 31.886425 +epoch: 0, batch: 18407, sum loss: 5967.600098, avg loss: 3.367720, ppl: 29.012308 +epoch: 0, batch: 18408, sum loss: 5409.131836, avg loss: 3.363888, ppl: 28.901335 +epoch: 0, batch: 18409, sum loss: 6048.789551, avg loss: 3.379212, ppl: 29.347633 +epoch: 0, batch: 18410, sum loss: 5419.103027, avg loss: 3.276362, ppl: 26.479271 +epoch: 0, batch: 18411, sum loss: 5111.144531, avg loss: 3.312472, ppl: 27.452915 +epoch: 0, batch: 18412, sum loss: 6187.890625, avg loss: 3.350239, ppl: 28.509541 +epoch: 0, batch: 18413, sum loss: 4648.859375, avg loss: 3.099240, ppl: 22.181078 +epoch: 0, batch: 18414, sum loss: 4713.937012, avg loss: 3.228724, ppl: 25.247421 +epoch: 0, batch: 18415, sum loss: 5601.319336, avg loss: 3.258476, ppl: 26.009855 +epoch: 0, batch: 18416, sum loss: 4845.484375, avg loss: 3.013361, ppl: 20.355700 +epoch: 0, batch: 18417, sum loss: 6492.129883, avg loss: 3.397242, ppl: 29.881575 +epoch: 0, batch: 18418, sum loss: 4746.083008, avg loss: 3.181021, ppl: 24.071318 +epoch: 0, batch: 18419, sum loss: 5520.603027, avg loss: 3.315678, ppl: 27.541050 +epoch: 0, batch: 18420, sum loss: 6378.160156, avg loss: 3.487239, ppl: 32.695553 +epoch: 0, batch: 18421, sum loss: 6378.618164, avg loss: 3.468526, ppl: 32.089397 +epoch: 0, batch: 18422, sum loss: 5994.941895, avg loss: 3.467289, ppl: 32.049728 +epoch: 0, batch: 18423, sum loss: 5439.992676, avg loss: 3.399995, ppl: 29.963961 +epoch: 0, batch: 18424, sum loss: 5528.665527, avg loss: 3.229361, ppl: 25.263498 +epoch: 0, batch: 18425, sum loss: 5346.776367, avg loss: 3.264210, ppl: 26.159437 +epoch: 0, batch: 18426, sum loss: 6236.350586, avg loss: 3.234622, ppl: 25.396765 +epoch: 0, batch: 18427, sum loss: 5503.737793, avg loss: 3.299603, ppl: 27.101877 +epoch: 0, batch: 18428, sum loss: 5726.022949, avg loss: 3.154834, ppl: 23.449139 +epoch: 0, batch: 18429, sum loss: 5455.992676, avg loss: 3.034479, ppl: 20.790136 +epoch: 0, batch: 18430, sum loss: 4965.228516, avg loss: 2.941486, ppl: 18.943979 +epoch: 0, batch: 18431, sum loss: 5383.152344, avg loss: 3.172158, ppl: 23.858923 +epoch: 0, batch: 18432, sum loss: 6182.153320, avg loss: 3.604754, ppl: 36.772636 +epoch: 0, batch: 18433, sum loss: 4227.963867, avg loss: 3.292807, ppl: 26.918314 +epoch: 0, batch: 18434, sum loss: 5812.648438, avg loss: 3.249105, ppl: 25.767262 +epoch: 0, batch: 18435, sum loss: 6509.554199, avg loss: 3.420680, ppl: 30.590210 +epoch: 0, batch: 18436, sum loss: 5708.030762, avg loss: 3.328298, ppl: 27.890820 +epoch: 0, batch: 18437, sum loss: 4986.105957, avg loss: 2.982121, ppl: 19.729618 +epoch: 0, batch: 18438, sum loss: 5065.288086, avg loss: 3.242822, ppl: 25.605885 +epoch: 0, batch: 18439, sum loss: 5622.930176, avg loss: 3.434899, ppl: 31.028288 +epoch: 0, batch: 18440, sum loss: 5492.443848, avg loss: 3.213835, ppl: 24.874296 +epoch: 0, batch: 18441, sum loss: 5798.307617, avg loss: 3.255647, ppl: 25.936394 +epoch: 0, batch: 18442, sum loss: 5186.046875, avg loss: 3.127893, ppl: 22.825840 +epoch: 0, batch: 18443, sum loss: 4791.953125, avg loss: 3.044443, ppl: 20.998325 +epoch: 0, batch: 18444, sum loss: 4991.848145, avg loss: 3.125766, ppl: 22.777336 +epoch: 0, batch: 18445, sum loss: 5994.321289, avg loss: 3.413623, ppl: 30.375090 +epoch: 0, batch: 18446, sum loss: 5086.066406, avg loss: 3.328578, ppl: 27.898628 +epoch: 0, batch: 18447, sum loss: 5925.541016, avg loss: 3.364873, ppl: 28.929813 +epoch: 0, batch: 18448, sum loss: 5817.993164, avg loss: 3.288860, ppl: 26.812283 +epoch: 0, batch: 18449, sum loss: 6572.919922, avg loss: 3.543353, ppl: 34.582684 +epoch: 0, batch: 18450, sum loss: 5738.468262, avg loss: 3.501201, ppl: 33.155235 +epoch: 0, batch: 18451, sum loss: 5111.266602, avg loss: 3.194541, ppl: 24.398983 +epoch: 0, batch: 18452, sum loss: 5480.704102, avg loss: 3.313606, ppl: 27.484055 +epoch: 0, batch: 18453, sum loss: 5463.305176, avg loss: 3.065828, ppl: 21.452213 +epoch: 0, batch: 18454, sum loss: 5466.396484, avg loss: 3.296982, ppl: 27.030945 +epoch: 0, batch: 18455, sum loss: 4315.438477, avg loss: 3.170785, ppl: 23.826180 +epoch: 0, batch: 18456, sum loss: 5644.816406, avg loss: 3.400492, ppl: 29.978844 +epoch: 0, batch: 18457, sum loss: 5133.227051, avg loss: 3.292641, ppl: 26.913847 +epoch: 0, batch: 18458, sum loss: 5791.989258, avg loss: 3.100637, ppl: 22.212090 +epoch: 0, batch: 18459, sum loss: 6447.044922, avg loss: 3.386053, ppl: 29.549093 +epoch: 0, batch: 18460, sum loss: 6198.735352, avg loss: 3.350668, ppl: 28.521778 +epoch: 0, batch: 18461, sum loss: 6244.296387, avg loss: 3.436597, ppl: 31.081005 +epoch: 0, batch: 18462, sum loss: 5618.993164, avg loss: 3.289809, ppl: 26.837725 +epoch: 0, batch: 18463, sum loss: 5726.703125, avg loss: 3.502571, ppl: 33.200687 +epoch: 0, batch: 18464, sum loss: 5787.314941, avg loss: 3.234944, ppl: 25.404951 +epoch: 0, batch: 18465, sum loss: 5148.668945, avg loss: 3.184087, ppl: 24.145241 +epoch: 0, batch: 18466, sum loss: 5381.140137, avg loss: 3.476189, ppl: 32.336250 +epoch: 0, batch: 18467, sum loss: 5305.751953, avg loss: 3.544256, ppl: 34.613930 +epoch: 0, batch: 18468, sum loss: 4501.069824, avg loss: 3.016803, ppl: 20.425886 +epoch: 0, batch: 18469, sum loss: 5345.319824, avg loss: 3.138767, ppl: 23.075392 +epoch: 0, batch: 18470, sum loss: 4921.472656, avg loss: 3.181301, ppl: 24.078062 +epoch: 0, batch: 18471, sum loss: 5657.537598, avg loss: 3.430890, ppl: 30.904139 +epoch: 0, batch: 18472, sum loss: 5654.503906, avg loss: 3.420753, ppl: 30.592428 +epoch: 0, batch: 18473, sum loss: 4539.885742, avg loss: 2.915791, ppl: 18.463404 +epoch: 0, batch: 18474, sum loss: 6745.294922, avg loss: 3.430974, ppl: 30.906731 +epoch: 0, batch: 18475, sum loss: 5149.256836, avg loss: 3.462849, ppl: 31.907757 +epoch: 0, batch: 18476, sum loss: 4968.711914, avg loss: 3.160758, ppl: 23.588476 +epoch: 0, batch: 18477, sum loss: 4730.644531, avg loss: 3.028582, ppl: 20.667896 +epoch: 0, batch: 18478, sum loss: 5635.021484, avg loss: 3.330391, ppl: 27.949266 +epoch: 0, batch: 18479, sum loss: 6844.794922, avg loss: 3.650557, ppl: 38.496113 +epoch: 0, batch: 18480, sum loss: 6037.179688, avg loss: 3.382174, ppl: 29.434679 +epoch: 0, batch: 18481, sum loss: 6630.745117, avg loss: 3.322017, ppl: 27.716190 +epoch: 0, batch: 18482, sum loss: 5425.041016, avg loss: 3.246584, ppl: 25.702383 +epoch: 0, batch: 18483, sum loss: 5633.117188, avg loss: 3.303881, ppl: 27.218073 +epoch: 0, batch: 18484, sum loss: 5119.435547, avg loss: 3.394851, ppl: 29.810217 +epoch: 0, batch: 18485, sum loss: 6096.346680, avg loss: 3.322260, ppl: 27.722931 +epoch: 0, batch: 18486, sum loss: 4874.193359, avg loss: 3.120482, ppl: 22.657309 +epoch: 0, batch: 18487, sum loss: 5994.233887, avg loss: 3.148232, ppl: 23.294847 +epoch: 0, batch: 18488, sum loss: 5033.518066, avg loss: 3.050617, ppl: 21.128376 +epoch: 0, batch: 18489, sum loss: 5380.635742, avg loss: 3.396866, ppl: 29.870342 +epoch: 0, batch: 18490, sum loss: 6856.297852, avg loss: 3.574712, ppl: 35.684345 +epoch: 0, batch: 18491, sum loss: 5548.769043, avg loss: 3.440031, ppl: 31.187912 +epoch: 0, batch: 18492, sum loss: 5778.383789, avg loss: 3.215573, ppl: 24.917555 +epoch: 0, batch: 18493, sum loss: 6166.291992, avg loss: 3.295720, ppl: 26.996847 +epoch: 0, batch: 18494, sum loss: 5078.271484, avg loss: 3.003118, ppl: 20.148252 +epoch: 0, batch: 18495, sum loss: 6224.505371, avg loss: 3.627334, ppl: 37.612411 +epoch: 0, batch: 18496, sum loss: 6414.702148, avg loss: 3.334045, ppl: 28.051573 +epoch: 0, batch: 18497, sum loss: 5913.680176, avg loss: 3.356232, ppl: 28.680916 +epoch: 0, batch: 18498, sum loss: 4876.694824, avg loss: 3.393664, ppl: 29.774845 +epoch: 0, batch: 18499, sum loss: 5483.019531, avg loss: 3.208320, ppl: 24.737501 +epoch: 0, batch: 18500, sum loss: 5040.987305, avg loss: 3.378678, ppl: 29.331964 +epoch: 0, batch: 18501, sum loss: 5118.969727, avg loss: 3.201357, ppl: 24.565840 +epoch: 0, batch: 18502, sum loss: 5031.292480, avg loss: 3.064125, ppl: 21.415707 +epoch: 0, batch: 18503, sum loss: 5815.300293, avg loss: 3.379024, ppl: 29.342113 +epoch: 0, batch: 18504, sum loss: 6048.771484, avg loss: 3.280245, ppl: 26.582279 +epoch: 0, batch: 18505, sum loss: 5726.647949, avg loss: 3.447711, ppl: 31.428370 +epoch: 0, batch: 18506, sum loss: 4629.000977, avg loss: 3.005845, ppl: 20.203278 +epoch: 0, batch: 18507, sum loss: 5610.938477, avg loss: 3.353819, ppl: 28.611784 +epoch: 0, batch: 18508, sum loss: 5537.460449, avg loss: 3.341859, ppl: 28.271631 +epoch: 0, batch: 18509, sum loss: 5865.445801, avg loss: 3.306339, ppl: 27.285053 +epoch: 0, batch: 18510, sum loss: 5679.209961, avg loss: 3.325064, ppl: 27.800783 +epoch: 0, batch: 18511, sum loss: 6002.124023, avg loss: 3.408361, ppl: 30.215679 +epoch: 0, batch: 18512, sum loss: 5182.730469, avg loss: 3.371978, ppl: 29.136110 +epoch: 0, batch: 18513, sum loss: 5307.384766, avg loss: 3.302666, ppl: 27.185030 +epoch: 0, batch: 18514, sum loss: 6151.146484, avg loss: 3.389061, ppl: 29.638123 +epoch: 0, batch: 18515, sum loss: 5895.215820, avg loss: 3.447495, ppl: 31.421568 +epoch: 0, batch: 18516, sum loss: 5138.708008, avg loss: 3.066055, ppl: 21.457088 +epoch: 0, batch: 18517, sum loss: 5279.830078, avg loss: 3.241148, ppl: 25.563051 +epoch: 0, batch: 18518, sum loss: 5931.910645, avg loss: 3.202976, ppl: 24.605637 +epoch: 0, batch: 18519, sum loss: 4863.114746, avg loss: 3.050888, ppl: 21.134094 +epoch: 0, batch: 18520, sum loss: 5799.005859, avg loss: 3.357849, ppl: 28.727343 +epoch: 0, batch: 18521, sum loss: 6512.093750, avg loss: 3.508671, ppl: 33.403854 +epoch: 0, batch: 18522, sum loss: 5268.677246, avg loss: 3.315719, ppl: 27.542187 +epoch: 0, batch: 18523, sum loss: 6381.501465, avg loss: 3.506320, ppl: 33.325397 +epoch: 0, batch: 18524, sum loss: 5202.713379, avg loss: 3.284541, ppl: 26.696737 +epoch: 0, batch: 18525, sum loss: 5126.108887, avg loss: 3.370223, ppl: 29.085014 +epoch: 0, batch: 18526, sum loss: 4645.250000, avg loss: 3.192612, ppl: 24.351944 +epoch: 0, batch: 18527, sum loss: 5876.063965, avg loss: 3.329215, ppl: 27.916414 +epoch: 0, batch: 18528, sum loss: 5460.823242, avg loss: 3.279774, ppl: 26.569759 +epoch: 0, batch: 18529, sum loss: 5698.678711, avg loss: 3.390053, ppl: 29.667519 +epoch: 0, batch: 18530, sum loss: 6644.895508, avg loss: 3.523275, ppl: 33.895241 +epoch: 0, batch: 18531, sum loss: 5673.248047, avg loss: 3.273657, ppl: 26.407738 +epoch: 0, batch: 18532, sum loss: 4816.816406, avg loss: 3.252408, ppl: 25.852526 +epoch: 0, batch: 18533, sum loss: 5360.150391, avg loss: 3.258450, ppl: 26.009192 +epoch: 0, batch: 18534, sum loss: 5846.249023, avg loss: 3.275210, ppl: 26.448771 +epoch: 0, batch: 18535, sum loss: 5182.641602, avg loss: 3.352291, ppl: 28.568104 +epoch: 0, batch: 18536, sum loss: 6225.009766, avg loss: 3.330663, ppl: 27.956877 +epoch: 0, batch: 18537, sum loss: 5906.418945, avg loss: 3.225789, ppl: 25.173424 +epoch: 0, batch: 18538, sum loss: 5117.039062, avg loss: 3.180260, ppl: 24.053017 +epoch: 0, batch: 18539, sum loss: 6879.311523, avg loss: 3.522433, ppl: 33.866726 +epoch: 0, batch: 18540, sum loss: 5669.423340, avg loss: 3.241523, ppl: 25.572634 +epoch: 0, batch: 18541, sum loss: 5254.520508, avg loss: 3.300578, ppl: 27.128317 +epoch: 0, batch: 18542, sum loss: 5231.987305, avg loss: 3.358143, ppl: 28.735788 +epoch: 0, batch: 18543, sum loss: 4930.816406, avg loss: 3.162807, ppl: 23.636856 +epoch: 0, batch: 18544, sum loss: 4609.380371, avg loss: 3.349840, ppl: 28.498184 +epoch: 0, batch: 18545, sum loss: 4871.133301, avg loss: 3.108573, ppl: 22.389067 +epoch: 0, batch: 18546, sum loss: 4297.494141, avg loss: 3.082851, ppl: 21.820522 +epoch: 0, batch: 18547, sum loss: 5391.675781, avg loss: 3.275623, ppl: 26.459713 +epoch: 0, batch: 18548, sum loss: 5054.967773, avg loss: 3.347661, ppl: 28.436138 +epoch: 0, batch: 18549, sum loss: 6839.002930, avg loss: 3.421212, ppl: 30.606493 +epoch: 0, batch: 18550, sum loss: 4902.497070, avg loss: 2.949758, ppl: 19.101322 +epoch: 0, batch: 18551, sum loss: 6006.283203, avg loss: 3.255438, ppl: 25.930973 +epoch: 0, batch: 18552, sum loss: 5069.354980, avg loss: 3.289653, ppl: 26.833546 +epoch: 0, batch: 18553, sum loss: 5617.988770, avg loss: 3.213953, ppl: 24.877220 +epoch: 0, batch: 18554, sum loss: 5587.312988, avg loss: 3.056517, ppl: 21.253401 +epoch: 0, batch: 18555, sum loss: 4994.079102, avg loss: 3.360753, ppl: 28.810879 +epoch: 0, batch: 18556, sum loss: 6010.034180, avg loss: 3.357561, ppl: 28.719055 +epoch: 0, batch: 18557, sum loss: 6335.521973, avg loss: 3.475328, ppl: 32.308414 +epoch: 0, batch: 18558, sum loss: 5381.798340, avg loss: 3.188269, ppl: 24.246429 +epoch: 0, batch: 18559, sum loss: 5091.335938, avg loss: 3.166254, ppl: 23.718458 +epoch: 0, batch: 18560, sum loss: 6310.710449, avg loss: 3.257982, ppl: 25.997009 +epoch: 0, batch: 18561, sum loss: 4773.793945, avg loss: 3.136527, ppl: 23.023767 +epoch: 0, batch: 18562, sum loss: 5945.504395, avg loss: 3.434722, ppl: 31.022793 +epoch: 0, batch: 18563, sum loss: 6140.627441, avg loss: 3.601541, ppl: 36.654678 +epoch: 0, batch: 18564, sum loss: 4495.111328, avg loss: 3.062065, ppl: 21.371641 +epoch: 0, batch: 18565, sum loss: 6601.866211, avg loss: 3.363152, ppl: 28.880064 +epoch: 0, batch: 18566, sum loss: 3890.782715, avg loss: 2.976881, ppl: 19.626497 +epoch: 0, batch: 18567, sum loss: 4903.870605, avg loss: 3.125475, ppl: 22.770712 +epoch: 0, batch: 18568, sum loss: 5069.712891, avg loss: 3.129452, ppl: 22.861458 +epoch: 0, batch: 18569, sum loss: 5085.698242, avg loss: 3.295981, ppl: 27.003889 +epoch: 0, batch: 18570, sum loss: 5155.652832, avg loss: 3.298562, ppl: 27.073687 +epoch: 0, batch: 18571, sum loss: 5210.124512, avg loss: 3.210181, ppl: 24.783583 +epoch: 0, batch: 18572, sum loss: 5862.797852, avg loss: 3.269826, ppl: 26.306761 +epoch: 0, batch: 18573, sum loss: 4891.368164, avg loss: 3.235032, ppl: 25.407187 +epoch: 0, batch: 18574, sum loss: 5099.816406, avg loss: 3.541539, ppl: 34.520012 +epoch: 0, batch: 18575, sum loss: 5041.530762, avg loss: 3.089173, ppl: 21.958918 +epoch: 0, batch: 18576, sum loss: 6362.926758, avg loss: 3.278169, ppl: 26.527161 +epoch: 0, batch: 18577, sum loss: 6177.274414, avg loss: 3.299826, ppl: 27.107925 +epoch: 0, batch: 18578, sum loss: 5497.036621, avg loss: 3.164673, ppl: 23.680996 +epoch: 0, batch: 18579, sum loss: 5505.848633, avg loss: 3.140815, ppl: 23.122705 +epoch: 0, batch: 18580, sum loss: 4909.327637, avg loss: 3.353366, ppl: 28.598833 +epoch: 0, batch: 18581, sum loss: 5882.089355, avg loss: 3.336409, ppl: 28.117975 +epoch: 0, batch: 18582, sum loss: 5127.398926, avg loss: 3.320854, ppl: 27.683994 +epoch: 0, batch: 18583, sum loss: 5651.730957, avg loss: 3.209387, ppl: 24.763903 +epoch: 0, batch: 18584, sum loss: 5293.263672, avg loss: 3.417213, ppl: 30.484350 +epoch: 0, batch: 18585, sum loss: 5468.131348, avg loss: 3.312012, ppl: 27.440279 +epoch: 0, batch: 18586, sum loss: 5613.122070, avg loss: 3.353120, ppl: 28.591797 +epoch: 0, batch: 18587, sum loss: 6405.059082, avg loss: 3.332497, ppl: 28.008194 +epoch: 0, batch: 18588, sum loss: 5833.825195, avg loss: 3.292227, ppl: 26.902697 +epoch: 0, batch: 18589, sum loss: 5503.700195, avg loss: 3.268231, ppl: 26.264828 +epoch: 0, batch: 18590, sum loss: 5152.633301, avg loss: 3.151458, ppl: 23.370102 +epoch: 0, batch: 18591, sum loss: 4628.957520, avg loss: 2.996089, ppl: 20.007141 +epoch: 0, batch: 18592, sum loss: 5029.288574, avg loss: 3.197259, ppl: 24.465378 +epoch: 0, batch: 18593, sum loss: 4761.503906, avg loss: 3.308898, ppl: 27.354956 +epoch: 0, batch: 18594, sum loss: 6245.989746, avg loss: 3.394559, ppl: 29.801519 +epoch: 0, batch: 18595, sum loss: 5559.902344, avg loss: 3.164429, ppl: 23.675226 +epoch: 0, batch: 18596, sum loss: 4760.638184, avg loss: 3.079326, ppl: 21.743746 +epoch: 0, batch: 18597, sum loss: 4589.937012, avg loss: 3.227804, ppl: 25.224195 +epoch: 0, batch: 18598, sum loss: 5566.596680, avg loss: 3.509834, ppl: 33.442719 +epoch: 0, batch: 18599, sum loss: 4535.555664, avg loss: 3.013658, ppl: 20.361753 +epoch: 0, batch: 18600, sum loss: 6232.775391, avg loss: 3.296021, ppl: 27.004965 +epoch: 0, batch: 18601, sum loss: 6077.468262, avg loss: 3.383891, ppl: 29.485279 +epoch: 0, batch: 18602, sum loss: 5734.658691, avg loss: 3.316749, ppl: 27.570568 +epoch: 0, batch: 18603, sum loss: 6465.502441, avg loss: 3.517684, ppl: 33.706257 +epoch: 0, batch: 18604, sum loss: 4932.036621, avg loss: 3.141425, ppl: 23.136805 +epoch: 0, batch: 18605, sum loss: 5742.948242, avg loss: 3.302443, ppl: 27.178957 +epoch: 0, batch: 18606, sum loss: 6183.973145, avg loss: 3.543824, ppl: 34.598972 +epoch: 0, batch: 18607, sum loss: 5189.321289, avg loss: 3.158442, ppl: 23.533913 +epoch: 0, batch: 18608, sum loss: 5735.435547, avg loss: 3.332618, ppl: 28.011580 +epoch: 0, batch: 18609, sum loss: 5735.360352, avg loss: 3.282977, ppl: 26.654997 +epoch: 0, batch: 18610, sum loss: 4964.088379, avg loss: 3.104496, ppl: 22.297968 +epoch: 0, batch: 18611, sum loss: 5649.288086, avg loss: 3.455222, ppl: 31.665314 +epoch: 0, batch: 18612, sum loss: 5058.740234, avg loss: 3.207825, ppl: 24.725254 +epoch: 0, batch: 18613, sum loss: 5084.376953, avg loss: 3.064724, ppl: 21.428547 +epoch: 0, batch: 18614, sum loss: 5088.633789, avg loss: 3.356619, ppl: 28.692009 +epoch: 0, batch: 18615, sum loss: 4934.876953, avg loss: 3.131267, ppl: 22.902981 +epoch: 0, batch: 18616, sum loss: 4938.762207, avg loss: 3.024349, ppl: 20.580606 +epoch: 0, batch: 18617, sum loss: 6661.520020, avg loss: 3.707023, ppl: 40.732361 +epoch: 0, batch: 18618, sum loss: 5066.145020, avg loss: 3.377430, ppl: 29.295383 +epoch: 0, batch: 18619, sum loss: 5098.935059, avg loss: 3.132024, ppl: 22.920324 +epoch: 0, batch: 18620, sum loss: 5386.272949, avg loss: 3.381213, ppl: 29.406431 +epoch: 0, batch: 18621, sum loss: 4886.168945, avg loss: 3.330722, ppl: 27.958509 +epoch: 0, batch: 18622, sum loss: 5865.389648, avg loss: 3.460407, ppl: 31.829922 +epoch: 0, batch: 18623, sum loss: 5083.637207, avg loss: 3.107358, ppl: 22.361881 +epoch: 0, batch: 18624, sum loss: 7071.040039, avg loss: 3.535520, ppl: 34.312855 +epoch: 0, batch: 18625, sum loss: 5344.422852, avg loss: 3.231211, ppl: 25.310287 +epoch: 0, batch: 18626, sum loss: 5538.335449, avg loss: 3.520874, ppl: 33.813976 +epoch: 0, batch: 18627, sum loss: 6029.189941, avg loss: 3.580279, ppl: 35.883556 +epoch: 0, batch: 18628, sum loss: 5791.796387, avg loss: 3.175327, ppl: 23.934647 +epoch: 0, batch: 18629, sum loss: 4902.891602, avg loss: 3.210800, ppl: 24.798916 +epoch: 0, batch: 18630, sum loss: 5656.968262, avg loss: 3.275604, ppl: 26.459209 +epoch: 0, batch: 18631, sum loss: 5159.386230, avg loss: 3.435011, ppl: 31.031744 +epoch: 0, batch: 18632, sum loss: 6033.000488, avg loss: 3.584671, ppl: 36.041481 +epoch: 0, batch: 18633, sum loss: 5370.522949, avg loss: 3.323343, ppl: 27.752981 +epoch: 0, batch: 18634, sum loss: 5129.493652, avg loss: 3.277632, ppl: 26.512909 +epoch: 0, batch: 18635, sum loss: 5544.309082, avg loss: 3.223435, ppl: 25.114250 +epoch: 0, batch: 18636, sum loss: 5228.726074, avg loss: 3.082975, ppl: 21.823238 +epoch: 0, batch: 18637, sum loss: 5570.743164, avg loss: 3.351831, ppl: 28.554976 +epoch: 0, batch: 18638, sum loss: 4823.876465, avg loss: 3.106167, ppl: 22.335260 +epoch: 0, batch: 18639, sum loss: 5677.868164, avg loss: 3.371656, ppl: 29.126719 +epoch: 0, batch: 18640, sum loss: 5088.576660, avg loss: 3.330220, ppl: 27.944502 +epoch: 0, batch: 18641, sum loss: 5903.610352, avg loss: 3.426355, ppl: 30.764313 +epoch: 0, batch: 18642, sum loss: 7379.829102, avg loss: 3.647963, ppl: 38.396366 +epoch: 0, batch: 18643, sum loss: 4945.089355, avg loss: 3.004307, ppl: 20.172232 +epoch: 0, batch: 18644, sum loss: 5226.438477, avg loss: 3.218250, ppl: 24.984365 +epoch: 0, batch: 18645, sum loss: 4640.183594, avg loss: 2.887482, ppl: 17.948063 +epoch: 0, batch: 18646, sum loss: 5766.421875, avg loss: 3.352571, ppl: 28.576101 +epoch: 0, batch: 18647, sum loss: 5399.573242, avg loss: 3.103203, ppl: 22.269167 +epoch: 0, batch: 18648, sum loss: 4815.928223, avg loss: 3.009955, ppl: 20.286489 +epoch: 0, batch: 18649, sum loss: 6353.199219, avg loss: 3.511995, ppl: 33.515076 +epoch: 0, batch: 18650, sum loss: 5395.974121, avg loss: 3.351537, ppl: 28.546570 +epoch: 0, batch: 18651, sum loss: 4881.569824, avg loss: 3.213673, ppl: 24.870275 +epoch: 0, batch: 18652, sum loss: 5003.212402, avg loss: 3.113387, ppl: 22.497110 +epoch: 0, batch: 18653, sum loss: 5992.949707, avg loss: 3.430423, ppl: 30.889715 +epoch: 0, batch: 18654, sum loss: 5461.162109, avg loss: 3.260396, ppl: 26.059843 +epoch: 0, batch: 18655, sum loss: 6011.661133, avg loss: 3.228604, ppl: 25.244400 +epoch: 0, batch: 18656, sum loss: 6178.359375, avg loss: 3.510431, ppl: 33.462696 +epoch: 0, batch: 18657, sum loss: 5597.064941, avg loss: 3.203815, ppl: 24.626301 +epoch: 0, batch: 18658, sum loss: 4412.996094, avg loss: 3.101192, ppl: 22.224426 +epoch: 0, batch: 18659, sum loss: 4686.274414, avg loss: 3.318891, ppl: 27.629700 +epoch: 0, batch: 18660, sum loss: 5125.341797, avg loss: 3.163791, ppl: 23.660131 +epoch: 0, batch: 18661, sum loss: 5780.417969, avg loss: 3.170827, ppl: 23.827190 +epoch: 0, batch: 18662, sum loss: 5126.032715, avg loss: 3.356930, ppl: 28.700958 +epoch: 0, batch: 18663, sum loss: 5188.313477, avg loss: 3.250823, ppl: 25.811575 +epoch: 0, batch: 18664, sum loss: 6444.189453, avg loss: 3.372156, ppl: 29.141279 +epoch: 0, batch: 18665, sum loss: 5177.881836, avg loss: 3.157245, ppl: 23.505747 +epoch: 0, batch: 18666, sum loss: 5519.625488, avg loss: 3.382123, ppl: 29.433205 +epoch: 0, batch: 18667, sum loss: 5980.942383, avg loss: 3.361969, ppl: 28.845926 +epoch: 0, batch: 18668, sum loss: 6103.715820, avg loss: 3.339013, ppl: 28.191292 +epoch: 0, batch: 18669, sum loss: 5650.721680, avg loss: 3.492411, ppl: 32.865101 +epoch: 0, batch: 18670, sum loss: 5904.113281, avg loss: 3.291033, ppl: 26.870607 +epoch: 0, batch: 18671, sum loss: 4920.629395, avg loss: 3.302436, ppl: 27.178762 +epoch: 0, batch: 18672, sum loss: 6744.982910, avg loss: 3.697907, ppl: 40.362743 +epoch: 0, batch: 18673, sum loss: 5876.323242, avg loss: 3.284697, ppl: 26.700893 +epoch: 0, batch: 18674, sum loss: 5447.445801, avg loss: 3.234825, ppl: 25.401936 +epoch: 0, batch: 18675, sum loss: 5570.630371, avg loss: 3.372052, ppl: 29.138269 +epoch: 0, batch: 18676, sum loss: 5534.602051, avg loss: 3.344171, ppl: 28.337063 +epoch: 0, batch: 18677, sum loss: 6084.177246, avg loss: 3.502693, ppl: 33.204750 +epoch: 0, batch: 18678, sum loss: 5392.147461, avg loss: 3.134969, ppl: 22.987934 +epoch: 0, batch: 18679, sum loss: 5491.079102, avg loss: 3.097055, ppl: 22.132685 +epoch: 0, batch: 18680, sum loss: 5924.001465, avg loss: 3.309498, ppl: 27.371378 +epoch: 0, batch: 18681, sum loss: 5707.702637, avg loss: 3.409620, ppl: 30.253733 +epoch: 0, batch: 18682, sum loss: 5383.474121, avg loss: 3.181722, ppl: 24.088207 +epoch: 0, batch: 18683, sum loss: 5330.958496, avg loss: 3.333933, ppl: 28.048429 +epoch: 0, batch: 18684, sum loss: 5863.978516, avg loss: 3.185214, ppl: 24.172457 +epoch: 0, batch: 18685, sum loss: 5739.252441, avg loss: 3.110706, ppl: 22.436876 +epoch: 0, batch: 18686, sum loss: 5523.867188, avg loss: 3.380580, ppl: 29.387802 +epoch: 0, batch: 18687, sum loss: 5551.642578, avg loss: 3.294743, ppl: 26.970490 +epoch: 0, batch: 18688, sum loss: 7129.238770, avg loss: 3.510211, ppl: 33.455334 +epoch: 0, batch: 18689, sum loss: 5647.592773, avg loss: 3.300756, ppl: 27.133137 +epoch: 0, batch: 18690, sum loss: 4970.605957, avg loss: 3.196531, ppl: 24.447582 +epoch: 0, batch: 18691, sum loss: 5397.886719, avg loss: 3.405607, ppl: 30.132572 +epoch: 0, batch: 18692, sum loss: 5095.714844, avg loss: 3.159154, ppl: 23.550657 +epoch: 0, batch: 18693, sum loss: 5112.120605, avg loss: 3.183139, ppl: 24.122345 +epoch: 0, batch: 18694, sum loss: 5064.698242, avg loss: 3.047352, ppl: 21.059496 +epoch: 0, batch: 18695, sum loss: 5742.964844, avg loss: 3.264903, ppl: 26.177580 +epoch: 0, batch: 18696, sum loss: 6230.846680, avg loss: 3.397408, ppl: 29.886534 +epoch: 0, batch: 18697, sum loss: 4919.138184, avg loss: 3.038381, ppl: 20.871418 +epoch: 0, batch: 18698, sum loss: 4547.773438, avg loss: 3.125618, ppl: 22.773954 +epoch: 0, batch: 18699, sum loss: 5418.766602, avg loss: 3.336679, ppl: 28.125566 +epoch: 0, batch: 18700, sum loss: 5973.495117, avg loss: 3.361562, ppl: 28.834181 +epoch: 0, batch: 18701, sum loss: 6090.852539, avg loss: 3.380051, ppl: 29.372280 +epoch: 0, batch: 18702, sum loss: 4906.605469, avg loss: 3.206932, ppl: 24.703176 +epoch: 0, batch: 18703, sum loss: 4672.809082, avg loss: 3.070177, ppl: 21.545717 +epoch: 0, batch: 18704, sum loss: 6310.645508, avg loss: 3.734110, ppl: 41.850746 +epoch: 0, batch: 18705, sum loss: 4887.369141, avg loss: 3.122920, ppl: 22.712593 +epoch: 0, batch: 18706, sum loss: 6001.154297, avg loss: 3.358228, ppl: 28.738234 +epoch: 0, batch: 18707, sum loss: 6147.089844, avg loss: 3.310226, ppl: 27.391314 +epoch: 0, batch: 18708, sum loss: 6397.121582, avg loss: 3.484271, ppl: 32.598656 +epoch: 0, batch: 18709, sum loss: 6355.663086, avg loss: 3.544709, ppl: 34.629604 +epoch: 0, batch: 18710, sum loss: 5386.835938, avg loss: 3.166864, ppl: 23.732944 +epoch: 0, batch: 18711, sum loss: 6620.615234, avg loss: 3.657799, ppl: 38.775894 +epoch: 0, batch: 18712, sum loss: 4970.189453, avg loss: 2.977945, ppl: 19.647392 +epoch: 0, batch: 18713, sum loss: 4878.715332, avg loss: 3.303125, ppl: 27.197496 +epoch: 0, batch: 18714, sum loss: 5364.939453, avg loss: 3.311691, ppl: 27.431480 +epoch: 0, batch: 18715, sum loss: 4534.727051, avg loss: 2.981412, ppl: 19.715630 +epoch: 0, batch: 18716, sum loss: 4486.672363, avg loss: 3.170793, ppl: 23.826372 +epoch: 0, batch: 18717, sum loss: 4789.139160, avg loss: 3.062109, ppl: 21.372595 +epoch: 0, batch: 18718, sum loss: 4822.459961, avg loss: 3.243080, ppl: 25.612484 +epoch: 0, batch: 18719, sum loss: 6311.055664, avg loss: 3.608379, ppl: 36.906193 +epoch: 0, batch: 18720, sum loss: 6158.545898, avg loss: 3.511143, ppl: 33.486534 +epoch: 0, batch: 18721, sum loss: 6461.319336, avg loss: 3.451560, ppl: 31.549557 +epoch: 0, batch: 18722, sum loss: 5477.577148, avg loss: 3.281952, ppl: 26.627691 +epoch: 0, batch: 18723, sum loss: 5420.489258, avg loss: 3.043509, ppl: 20.978729 +epoch: 0, batch: 18724, sum loss: 4474.988770, avg loss: 2.803878, ppl: 16.508535 +epoch: 0, batch: 18725, sum loss: 4289.271484, avg loss: 3.070345, ppl: 21.549330 +epoch: 0, batch: 18726, sum loss: 4573.163086, avg loss: 3.098349, ppl: 22.161335 +epoch: 0, batch: 18727, sum loss: 5219.254883, avg loss: 3.409050, ppl: 30.236498 +epoch: 0, batch: 18728, sum loss: 6349.590820, avg loss: 3.315713, ppl: 27.542028 +epoch: 0, batch: 18729, sum loss: 5856.464355, avg loss: 3.212542, ppl: 24.842161 +epoch: 0, batch: 18730, sum loss: 5592.704102, avg loss: 3.323056, ppl: 27.745022 +epoch: 0, batch: 18731, sum loss: 4831.205078, avg loss: 2.963929, ppl: 19.373951 +epoch: 0, batch: 18732, sum loss: 5570.476562, avg loss: 3.494653, ppl: 32.938847 +epoch: 0, batch: 18733, sum loss: 6547.884766, avg loss: 3.371722, ppl: 29.128656 +epoch: 0, batch: 18734, sum loss: 6844.178711, avg loss: 3.615520, ppl: 37.170650 +epoch: 0, batch: 18735, sum loss: 5643.544434, avg loss: 3.321686, ppl: 27.707026 +epoch: 0, batch: 18736, sum loss: 5559.566406, avg loss: 3.249309, ppl: 25.772514 +epoch: 0, batch: 18737, sum loss: 5551.843750, avg loss: 3.429181, ppl: 30.851360 +epoch: 0, batch: 18738, sum loss: 5067.062500, avg loss: 3.313972, ppl: 27.494108 +epoch: 0, batch: 18739, sum loss: 5067.550293, avg loss: 3.213412, ppl: 24.863771 +epoch: 0, batch: 18740, sum loss: 4932.490723, avg loss: 2.978557, ppl: 19.659430 +epoch: 0, batch: 18741, sum loss: 6053.717773, avg loss: 3.279370, ppl: 26.559044 +epoch: 0, batch: 18742, sum loss: 5076.074219, avg loss: 2.871083, ppl: 17.656126 +epoch: 0, batch: 18743, sum loss: 5584.959961, avg loss: 3.350306, ppl: 28.511452 +epoch: 0, batch: 18744, sum loss: 5130.714355, avg loss: 3.400076, ppl: 29.966375 +epoch: 0, batch: 18745, sum loss: 5494.126465, avg loss: 2.998977, ppl: 20.065008 +epoch: 0, batch: 18746, sum loss: 6032.058594, avg loss: 3.260572, ppl: 26.064447 +epoch: 0, batch: 18747, sum loss: 5642.187988, avg loss: 3.444559, ppl: 31.329468 +epoch: 0, batch: 18748, sum loss: 5569.663086, avg loss: 3.337126, ppl: 28.138142 +epoch: 0, batch: 18749, sum loss: 5242.199707, avg loss: 3.049564, ppl: 21.106133 +epoch: 0, batch: 18750, sum loss: 6286.909180, avg loss: 3.486916, ppl: 32.684986 +epoch: 0, batch: 18751, sum loss: 5588.230957, avg loss: 3.405381, ppl: 30.125784 +epoch: 0, batch: 18752, sum loss: 4837.729492, avg loss: 3.340974, ppl: 28.246614 +epoch: 0, batch: 18753, sum loss: 5771.211426, avg loss: 3.380909, ppl: 29.397480 +epoch: 0, batch: 18754, sum loss: 5416.757812, avg loss: 3.253308, ppl: 25.875797 +epoch: 0, batch: 18755, sum loss: 4709.684082, avg loss: 3.195172, ppl: 24.414381 +epoch: 0, batch: 18756, sum loss: 5657.906250, avg loss: 3.187553, ppl: 24.229059 +epoch: 0, batch: 18757, sum loss: 6121.321289, avg loss: 3.354149, ppl: 28.621227 +epoch: 0, batch: 18758, sum loss: 5820.446289, avg loss: 3.301444, ppl: 27.151825 +epoch: 0, batch: 18759, sum loss: 6278.738281, avg loss: 3.480453, ppl: 32.474445 +epoch: 0, batch: 18760, sum loss: 5535.185059, avg loss: 3.120172, ppl: 22.650270 +epoch: 0, batch: 18761, sum loss: 4526.342773, avg loss: 3.132417, ppl: 22.929337 +epoch: 0, batch: 18762, sum loss: 5655.009766, avg loss: 3.299306, ppl: 27.093821 +epoch: 0, batch: 18763, sum loss: 6172.558594, avg loss: 3.557671, ppl: 35.081383 +epoch: 0, batch: 18764, sum loss: 5148.229980, avg loss: 3.103213, ppl: 22.269384 +epoch: 0, batch: 18765, sum loss: 4886.363281, avg loss: 3.044463, ppl: 20.998751 +epoch: 0, batch: 18766, sum loss: 5422.837402, avg loss: 3.173106, ppl: 23.881538 +epoch: 0, batch: 18767, sum loss: 5570.592773, avg loss: 3.413353, ppl: 30.366907 +epoch: 0, batch: 18768, sum loss: 5919.332031, avg loss: 3.318011, ppl: 27.605396 +epoch: 0, batch: 18769, sum loss: 5037.756836, avg loss: 3.056891, ppl: 21.261356 +epoch: 0, batch: 18770, sum loss: 6658.119629, avg loss: 3.664348, ppl: 39.030666 +epoch: 0, batch: 18771, sum loss: 4635.974609, avg loss: 3.117669, ppl: 22.593660 +epoch: 0, batch: 18772, sum loss: 4638.776367, avg loss: 2.958403, ppl: 19.267183 +epoch: 0, batch: 18773, sum loss: 5260.980957, avg loss: 3.385445, ppl: 29.531120 +epoch: 0, batch: 18774, sum loss: 5891.662109, avg loss: 3.364742, ppl: 28.926022 +epoch: 0, batch: 18775, sum loss: 5035.808594, avg loss: 3.076242, ppl: 21.676798 +epoch: 0, batch: 18776, sum loss: 5322.683594, avg loss: 3.082040, ppl: 21.802841 +epoch: 0, batch: 18777, sum loss: 5026.370605, avg loss: 3.355387, ppl: 28.656706 +epoch: 0, batch: 18778, sum loss: 6164.403809, avg loss: 3.259865, ppl: 26.046009 +epoch: 0, batch: 18779, sum loss: 7012.583008, avg loss: 3.283045, ppl: 26.656809 +epoch: 0, batch: 18780, sum loss: 5317.857910, avg loss: 3.422045, ppl: 30.631992 +epoch: 0, batch: 18781, sum loss: 6793.684570, avg loss: 3.330237, ppl: 27.944975 +epoch: 0, batch: 18782, sum loss: 5156.620117, avg loss: 2.965279, ppl: 19.400118 +epoch: 0, batch: 18783, sum loss: 5321.630859, avg loss: 3.169524, ppl: 23.796154 +epoch: 0, batch: 18784, sum loss: 5918.950195, avg loss: 3.315939, ppl: 27.548243 +epoch: 0, batch: 18785, sum loss: 5592.872070, avg loss: 3.373264, ppl: 29.173603 +epoch: 0, batch: 18786, sum loss: 5770.234375, avg loss: 3.378357, ppl: 29.322559 +epoch: 0, batch: 18787, sum loss: 5668.850586, avg loss: 3.368301, ppl: 29.029161 +epoch: 0, batch: 18788, sum loss: 4288.694336, avg loss: 3.144204, ppl: 23.201202 +epoch: 0, batch: 18789, sum loss: 5276.908203, avg loss: 3.247328, ppl: 25.721521 +epoch: 0, batch: 18790, sum loss: 5053.231445, avg loss: 3.214524, ppl: 24.891436 +epoch: 0, batch: 18791, sum loss: 6558.511719, avg loss: 3.494146, ppl: 32.922157 +epoch: 0, batch: 18792, sum loss: 5770.309082, avg loss: 3.211079, ppl: 24.805840 +epoch: 0, batch: 18793, sum loss: 5211.326660, avg loss: 3.189306, ppl: 24.271589 +epoch: 0, batch: 18794, sum loss: 6060.285645, avg loss: 3.439436, ppl: 31.169365 +epoch: 0, batch: 18795, sum loss: 5804.544922, avg loss: 3.152930, ppl: 23.404551 +epoch: 0, batch: 18796, sum loss: 5229.625977, avg loss: 2.931405, ppl: 18.753958 +epoch: 0, batch: 18797, sum loss: 4873.619629, avg loss: 3.156490, ppl: 23.487999 +epoch: 0, batch: 18798, sum loss: 4532.174805, avg loss: 3.112757, ppl: 22.482954 +epoch: 0, batch: 18799, sum loss: 6197.393555, avg loss: 3.161935, ppl: 23.616257 +epoch: 0, batch: 18800, sum loss: 5556.640625, avg loss: 3.255208, ppl: 25.925013 +epoch: 0, batch: 18801, sum loss: 4794.687500, avg loss: 3.109395, ppl: 22.407490 +epoch: 0, batch: 18802, sum loss: 4152.353027, avg loss: 2.963849, ppl: 19.372398 +epoch: 0, batch: 18803, sum loss: 6813.660156, avg loss: 3.423950, ppl: 30.690393 +epoch: 0, batch: 18804, sum loss: 4649.657227, avg loss: 3.120575, ppl: 22.659414 +epoch: 0, batch: 18805, sum loss: 3632.362061, avg loss: 2.957950, ppl: 19.258444 +epoch: 0, batch: 18806, sum loss: 5311.909668, avg loss: 3.154341, ppl: 23.437580 +epoch: 0, batch: 18807, sum loss: 4754.407227, avg loss: 3.093303, ppl: 22.049799 +epoch: 0, batch: 18808, sum loss: 6377.072266, avg loss: 3.392060, ppl: 29.727114 +epoch: 0, batch: 18809, sum loss: 4698.104004, avg loss: 3.046760, ppl: 21.047043 +epoch: 0, batch: 18810, sum loss: 5551.496582, avg loss: 3.389192, ppl: 29.641989 +epoch: 0, batch: 18811, sum loss: 4734.791992, avg loss: 3.074540, ppl: 21.639929 +epoch: 0, batch: 18812, sum loss: 5921.715820, avg loss: 3.397427, ppl: 29.887112 +epoch: 0, batch: 18813, sum loss: 5920.829102, avg loss: 3.503449, ppl: 33.229870 +epoch: 0, batch: 18814, sum loss: 5504.114258, avg loss: 3.362318, ppl: 28.856003 +epoch: 0, batch: 18815, sum loss: 6004.426270, avg loss: 3.233401, ppl: 25.365782 +epoch: 0, batch: 18816, sum loss: 5210.386230, avg loss: 3.314495, ppl: 27.508501 +epoch: 0, batch: 18817, sum loss: 5284.955078, avg loss: 3.189472, ppl: 24.275606 +epoch: 0, batch: 18818, sum loss: 5686.412109, avg loss: 3.275583, ppl: 26.458647 +epoch: 0, batch: 18819, sum loss: 6335.143555, avg loss: 3.502014, ppl: 33.182228 +epoch: 0, batch: 18820, sum loss: 5320.530762, avg loss: 3.166983, ppl: 23.735758 +epoch: 0, batch: 18821, sum loss: 6548.489258, avg loss: 3.441140, ppl: 31.222530 +epoch: 0, batch: 18822, sum loss: 4777.459473, avg loss: 3.088209, ppl: 21.937756 +epoch: 0, batch: 18823, sum loss: 4785.616699, avg loss: 3.089488, ppl: 21.965824 +epoch: 0, batch: 18824, sum loss: 5328.573242, avg loss: 3.190763, ppl: 24.306955 +epoch: 0, batch: 18825, sum loss: 5442.571289, avg loss: 3.212852, ppl: 24.849857 +epoch: 0, batch: 18826, sum loss: 5650.663086, avg loss: 3.176314, ppl: 23.958282 +epoch: 0, batch: 18827, sum loss: 6777.051270, avg loss: 3.385140, ppl: 29.522139 +epoch: 0, batch: 18828, sum loss: 6171.570801, avg loss: 3.383537, ppl: 29.474827 +epoch: 0, batch: 18829, sum loss: 5934.452148, avg loss: 3.438269, ppl: 31.133017 +epoch: 0, batch: 18830, sum loss: 5781.926758, avg loss: 3.673397, ppl: 39.385464 +epoch: 0, batch: 18831, sum loss: 5055.620117, avg loss: 3.263796, ppl: 26.148611 +epoch: 0, batch: 18832, sum loss: 5808.298340, avg loss: 3.243047, ppl: 25.611654 +epoch: 0, batch: 18833, sum loss: 4716.270020, avg loss: 3.048656, ppl: 21.086973 +epoch: 0, batch: 18834, sum loss: 5091.201172, avg loss: 3.191976, ppl: 24.336458 +epoch: 0, batch: 18835, sum loss: 4916.531738, avg loss: 3.048067, ppl: 21.074564 +epoch: 0, batch: 18836, sum loss: 5356.983887, avg loss: 3.352305, ppl: 28.568520 +epoch: 0, batch: 18837, sum loss: 5508.738770, avg loss: 3.300622, ppl: 27.129520 +epoch: 0, batch: 18838, sum loss: 5253.077148, avg loss: 3.214858, ppl: 24.899746 +epoch: 0, batch: 18839, sum loss: 6109.100586, avg loss: 3.251251, ppl: 25.822630 +epoch: 0, batch: 18840, sum loss: 5301.261230, avg loss: 3.214834, ppl: 24.899158 +epoch: 0, batch: 18841, sum loss: 4140.427734, avg loss: 3.131943, ppl: 22.918455 +epoch: 0, batch: 18842, sum loss: 4769.023438, avg loss: 2.986239, ppl: 19.811031 +epoch: 0, batch: 18843, sum loss: 5398.266602, avg loss: 3.319967, ppl: 27.659439 +epoch: 0, batch: 18844, sum loss: 4496.182129, avg loss: 3.069066, ppl: 21.521797 +epoch: 0, batch: 18845, sum loss: 6700.939453, avg loss: 3.571929, ppl: 35.585186 +epoch: 0, batch: 18846, sum loss: 4728.628906, avg loss: 3.175708, ppl: 23.943762 +epoch: 0, batch: 18847, sum loss: 5816.110840, avg loss: 3.199181, ppl: 24.512442 +epoch: 0, batch: 18848, sum loss: 5884.267090, avg loss: 3.372073, ppl: 29.138861 +epoch: 0, batch: 18849, sum loss: 5654.385742, avg loss: 3.207252, ppl: 24.711094 +epoch: 0, batch: 18850, sum loss: 4917.805664, avg loss: 3.150420, ppl: 23.345873 +epoch: 0, batch: 18851, sum loss: 5893.364258, avg loss: 3.279557, ppl: 26.564009 +epoch: 0, batch: 18852, sum loss: 5990.577637, avg loss: 3.363604, ppl: 28.893122 +epoch: 0, batch: 18853, sum loss: 5475.851074, avg loss: 3.284853, ppl: 26.705070 +epoch: 0, batch: 18854, sum loss: 5252.204590, avg loss: 3.100475, ppl: 22.208494 +epoch: 0, batch: 18855, sum loss: 5470.901367, avg loss: 3.335916, ppl: 28.104103 +epoch: 0, batch: 18856, sum loss: 6095.333984, avg loss: 3.473125, ppl: 32.237320 +epoch: 0, batch: 18857, sum loss: 4757.283691, avg loss: 3.111369, ppl: 22.451769 +epoch: 0, batch: 18858, sum loss: 5928.246094, avg loss: 3.422775, ppl: 30.654356 +epoch: 0, batch: 18859, sum loss: 6978.339844, avg loss: 3.454624, ppl: 31.646378 +epoch: 0, batch: 18860, sum loss: 5709.837891, avg loss: 3.555316, ppl: 34.998886 +epoch: 0, batch: 18861, sum loss: 4534.261719, avg loss: 3.292855, ppl: 26.919617 +epoch: 0, batch: 18862, sum loss: 5321.285645, avg loss: 3.223068, ppl: 25.105036 +epoch: 0, batch: 18863, sum loss: 4656.797852, avg loss: 3.029797, ppl: 20.693033 +epoch: 0, batch: 18864, sum loss: 5877.225586, avg loss: 3.481769, ppl: 32.517189 +epoch: 0, batch: 18865, sum loss: 5317.675781, avg loss: 3.120702, ppl: 22.662279 +epoch: 0, batch: 18866, sum loss: 6594.786621, avg loss: 3.465469, ppl: 31.991449 +epoch: 0, batch: 18867, sum loss: 5590.162598, avg loss: 3.347403, ppl: 28.428802 +epoch: 0, batch: 18868, sum loss: 6060.605469, avg loss: 3.445484, ppl: 31.358448 +epoch: 0, batch: 18869, sum loss: 5547.657227, avg loss: 3.572220, ppl: 35.595524 +epoch: 0, batch: 18870, sum loss: 5260.042969, avg loss: 3.209300, ppl: 24.761747 +epoch: 0, batch: 18871, sum loss: 5030.196289, avg loss: 3.044913, ppl: 21.008205 +epoch: 0, batch: 18872, sum loss: 5611.004395, avg loss: 3.392385, ppl: 29.736784 +epoch: 0, batch: 18873, sum loss: 5189.370605, avg loss: 3.177814, ppl: 23.994246 +epoch: 0, batch: 18874, sum loss: 4199.059570, avg loss: 3.267751, ppl: 26.252224 +epoch: 0, batch: 18875, sum loss: 3898.935791, avg loss: 2.922740, ppl: 18.592169 +epoch: 0, batch: 18876, sum loss: 4486.016113, avg loss: 3.057952, ppl: 21.283926 +epoch: 0, batch: 18877, sum loss: 4305.909668, avg loss: 3.017456, ppl: 20.439230 +epoch: 0, batch: 18878, sum loss: 4885.322754, avg loss: 3.141687, ppl: 23.142874 +epoch: 0, batch: 18879, sum loss: 6849.666504, avg loss: 3.492946, ppl: 32.882675 +epoch: 0, batch: 18880, sum loss: 4773.827637, avg loss: 3.276477, ppl: 26.482321 +epoch: 0, batch: 18881, sum loss: 6197.783203, avg loss: 3.403505, ppl: 30.069319 +epoch: 0, batch: 18882, sum loss: 5459.885254, avg loss: 3.050215, ppl: 21.119890 +epoch: 0, batch: 18883, sum loss: 5296.379395, avg loss: 3.029965, ppl: 20.696516 +epoch: 0, batch: 18884, sum loss: 5984.413086, avg loss: 3.382936, ppl: 29.457123 +epoch: 0, batch: 18885, sum loss: 6295.397461, avg loss: 3.391917, ppl: 29.722870 +epoch: 0, batch: 18886, sum loss: 5916.895508, avg loss: 3.305528, ppl: 27.262938 +epoch: 0, batch: 18887, sum loss: 5966.700684, avg loss: 3.401768, ppl: 30.017115 +epoch: 0, batch: 18888, sum loss: 6175.038086, avg loss: 3.309238, ppl: 27.364258 +epoch: 0, batch: 18889, sum loss: 5655.183594, avg loss: 3.370193, ppl: 29.084133 +epoch: 0, batch: 18890, sum loss: 5005.018066, avg loss: 3.303642, ppl: 27.211571 +epoch: 0, batch: 18891, sum loss: 5206.758789, avg loss: 3.333392, ppl: 28.033279 +epoch: 0, batch: 18892, sum loss: 5313.329590, avg loss: 3.419131, ppl: 30.542864 +epoch: 0, batch: 18893, sum loss: 5318.831055, avg loss: 3.394277, ppl: 29.793095 +epoch: 0, batch: 18894, sum loss: 5514.787109, avg loss: 3.501452, ppl: 33.163570 +epoch: 0, batch: 18895, sum loss: 4470.582031, avg loss: 3.066243, ppl: 21.461115 +epoch: 0, batch: 18896, sum loss: 5573.946777, avg loss: 3.271096, ppl: 26.340179 +epoch: 0, batch: 18897, sum loss: 5571.695312, avg loss: 3.110941, ppl: 22.442150 +epoch: 0, batch: 18898, sum loss: 4956.539551, avg loss: 3.208116, ppl: 24.732454 +epoch: 0, batch: 18899, sum loss: 5128.856445, avg loss: 3.426090, ppl: 30.756144 +epoch: 0, batch: 18900, sum loss: 5685.871582, avg loss: 3.366413, ppl: 28.974413 +epoch: 0, batch: 18901, sum loss: 4926.295898, avg loss: 3.042802, ppl: 20.963894 +epoch: 0, batch: 18902, sum loss: 5709.292969, avg loss: 3.352491, ppl: 28.573833 +epoch: 0, batch: 18903, sum loss: 5129.357910, avg loss: 3.370143, ppl: 29.082697 +epoch: 0, batch: 18904, sum loss: 6973.900879, avg loss: 3.449011, ppl: 31.469257 +epoch: 0, batch: 18905, sum loss: 4703.701172, avg loss: 2.939813, ppl: 18.912312 +epoch: 0, batch: 18906, sum loss: 6194.977051, avg loss: 3.363180, ppl: 28.880884 +epoch: 0, batch: 18907, sum loss: 5136.926758, avg loss: 3.316286, ppl: 27.557806 +epoch: 0, batch: 18908, sum loss: 6106.511230, avg loss: 3.325986, ppl: 27.826433 +epoch: 0, batch: 18909, sum loss: 5674.512207, avg loss: 3.233340, ppl: 25.364239 +epoch: 0, batch: 18910, sum loss: 5602.745117, avg loss: 3.266907, ppl: 26.230078 +epoch: 0, batch: 18911, sum loss: 5485.770020, avg loss: 3.136518, ppl: 23.023560 +epoch: 0, batch: 18912, sum loss: 4175.743164, avg loss: 2.932404, ppl: 18.772707 +epoch: 0, batch: 18913, sum loss: 7179.152344, avg loss: 3.444891, ppl: 31.339867 +epoch: 0, batch: 18914, sum loss: 5183.757812, avg loss: 3.178270, ppl: 24.005180 +epoch: 0, batch: 18915, sum loss: 5818.816895, avg loss: 3.306146, ppl: 27.279783 +epoch: 0, batch: 18916, sum loss: 5313.990234, avg loss: 3.270148, ppl: 26.315228 +epoch: 0, batch: 18917, sum loss: 6900.289062, avg loss: 3.504464, ppl: 33.263607 +epoch: 0, batch: 18918, sum loss: 5472.021973, avg loss: 3.402999, ppl: 30.054089 +epoch: 0, batch: 18919, sum loss: 6646.300293, avg loss: 3.542804, ppl: 34.563702 +epoch: 0, batch: 18920, sum loss: 4740.201660, avg loss: 3.056223, ppl: 21.247149 +epoch: 0, batch: 18921, sum loss: 5697.002930, avg loss: 3.327688, ppl: 27.873837 +epoch: 0, batch: 18922, sum loss: 6229.130859, avg loss: 3.217526, ppl: 24.966282 +epoch: 0, batch: 18923, sum loss: 5163.213379, avg loss: 2.994904, ppl: 19.983433 +epoch: 0, batch: 18924, sum loss: 5440.234375, avg loss: 3.015651, ppl: 20.402369 +epoch: 0, batch: 18925, sum loss: 5036.229492, avg loss: 3.384563, ppl: 29.505089 +epoch: 0, batch: 18926, sum loss: 5207.020508, avg loss: 3.316574, ppl: 27.565739 +epoch: 0, batch: 18927, sum loss: 6141.422852, avg loss: 3.487463, ppl: 32.702885 +epoch: 0, batch: 18928, sum loss: 5153.569336, avg loss: 3.235135, ppl: 25.409792 +epoch: 0, batch: 18929, sum loss: 4546.002441, avg loss: 3.181247, ppl: 24.076771 +epoch: 0, batch: 18930, sum loss: 6234.898926, avg loss: 3.492941, ppl: 32.882504 +epoch: 0, batch: 18931, sum loss: 5502.363281, avg loss: 3.255836, ppl: 25.941299 +epoch: 0, batch: 18932, sum loss: 5855.172363, avg loss: 3.315500, ppl: 27.536146 +epoch: 0, batch: 18933, sum loss: 5434.841309, avg loss: 3.379876, ppl: 29.367140 +epoch: 0, batch: 18934, sum loss: 6391.773438, avg loss: 3.483255, ppl: 32.565563 +epoch: 0, batch: 18935, sum loss: 5526.845215, avg loss: 3.325418, ppl: 27.810635 +epoch: 0, batch: 18936, sum loss: 5231.169922, avg loss: 3.285911, ppl: 26.733316 +epoch: 0, batch: 18937, sum loss: 5348.500000, avg loss: 3.269254, ppl: 26.291731 +epoch: 0, batch: 18938, sum loss: 5023.813477, avg loss: 3.189723, ppl: 24.281694 +epoch: 0, batch: 18939, sum loss: 5344.441895, avg loss: 3.406273, ppl: 30.152645 +epoch: 0, batch: 18940, sum loss: 4089.371094, avg loss: 3.072405, ppl: 21.593775 +epoch: 0, batch: 18941, sum loss: 6002.954102, avg loss: 3.196461, ppl: 24.445868 +epoch: 0, batch: 18942, sum loss: 5132.160156, avg loss: 3.248203, ppl: 25.744024 +epoch: 0, batch: 18943, sum loss: 5662.817383, avg loss: 3.303861, ppl: 27.217520 +epoch: 0, batch: 18944, sum loss: 5065.319336, avg loss: 3.140310, ppl: 23.111021 +epoch: 0, batch: 18945, sum loss: 5697.120117, avg loss: 3.383088, ppl: 29.461611 +epoch: 0, batch: 18946, sum loss: 5422.958984, avg loss: 3.186227, ppl: 24.196968 +epoch: 0, batch: 18947, sum loss: 5355.016113, avg loss: 3.257309, ppl: 25.979530 +epoch: 0, batch: 18948, sum loss: 5441.994629, avg loss: 3.363408, ppl: 28.887482 +epoch: 0, batch: 18949, sum loss: 5359.464355, avg loss: 3.139698, ppl: 23.096891 +epoch: 0, batch: 18950, sum loss: 5776.821289, avg loss: 3.314298, ppl: 27.503078 +epoch: 0, batch: 18951, sum loss: 5288.686523, avg loss: 3.379352, ppl: 29.351755 +epoch: 0, batch: 18952, sum loss: 5846.612793, avg loss: 3.526304, ppl: 33.998096 +epoch: 0, batch: 18953, sum loss: 5712.552734, avg loss: 3.303963, ppl: 27.220310 +epoch: 0, batch: 18954, sum loss: 5886.522461, avg loss: 3.342716, ppl: 28.295866 +epoch: 0, batch: 18955, sum loss: 5099.030273, avg loss: 2.968004, ppl: 19.453047 +epoch: 0, batch: 18956, sum loss: 5374.804688, avg loss: 3.295404, ppl: 26.988327 +epoch: 0, batch: 18957, sum loss: 4529.041504, avg loss: 3.207536, ppl: 24.718117 +epoch: 0, batch: 18958, sum loss: 4560.521484, avg loss: 2.700131, ppl: 14.881683 +epoch: 0, batch: 18959, sum loss: 5882.506348, avg loss: 3.275338, ppl: 26.452156 +epoch: 0, batch: 18960, sum loss: 5611.019043, avg loss: 3.222871, ppl: 25.100086 +epoch: 0, batch: 18961, sum loss: 5271.256348, avg loss: 3.340466, ppl: 28.232294 +epoch: 0, batch: 18962, sum loss: 6460.209473, avg loss: 3.345526, ppl: 28.375483 +epoch: 0, batch: 18963, sum loss: 5675.179199, avg loss: 3.175814, ppl: 23.946302 +epoch: 0, batch: 18964, sum loss: 5271.029297, avg loss: 3.210128, ppl: 24.782248 +epoch: 0, batch: 18965, sum loss: 5837.379883, avg loss: 3.219735, ppl: 25.021498 +epoch: 0, batch: 18966, sum loss: 5883.508301, avg loss: 3.286876, ppl: 26.759136 +epoch: 0, batch: 18967, sum loss: 6205.769531, avg loss: 3.648307, ppl: 38.409569 +epoch: 0, batch: 18968, sum loss: 5579.064941, avg loss: 3.123777, ppl: 22.732069 +epoch: 0, batch: 18969, sum loss: 6659.538086, avg loss: 3.402932, ppl: 30.052082 +epoch: 0, batch: 18970, sum loss: 4975.388672, avg loss: 3.181195, ppl: 24.075500 +epoch: 0, batch: 18971, sum loss: 6210.524414, avg loss: 3.371620, ppl: 29.125685 +epoch: 0, batch: 18972, sum loss: 4803.327148, avg loss: 3.164247, ppl: 23.670919 +epoch: 0, batch: 18973, sum loss: 6125.464355, avg loss: 3.303918, ppl: 27.219078 +epoch: 0, batch: 18974, sum loss: 6274.009766, avg loss: 3.505033, ppl: 33.282551 +epoch: 0, batch: 18975, sum loss: 5731.443359, avg loss: 3.353683, ppl: 28.607889 +epoch: 0, batch: 18976, sum loss: 4981.882812, avg loss: 3.048888, ppl: 21.091877 +epoch: 0, batch: 18977, sum loss: 5200.303223, avg loss: 3.198218, ppl: 24.488861 +epoch: 0, batch: 18978, sum loss: 5656.645508, avg loss: 3.258436, ppl: 26.008839 +epoch: 0, batch: 18979, sum loss: 5275.509277, avg loss: 3.322109, ppl: 27.718754 +epoch: 0, batch: 18980, sum loss: 4591.447754, avg loss: 3.277265, ppl: 26.503176 +epoch: 0, batch: 18981, sum loss: 4683.465820, avg loss: 3.164504, ppl: 23.676998 +epoch: 0, batch: 18982, sum loss: 4521.576172, avg loss: 3.257620, ppl: 25.987602 +epoch: 0, batch: 18983, sum loss: 6259.111328, avg loss: 3.716812, ppl: 41.133057 +epoch: 0, batch: 18984, sum loss: 5080.202148, avg loss: 3.279666, ppl: 26.566891 +epoch: 0, batch: 18985, sum loss: 5125.918945, avg loss: 3.140882, ppl: 23.124249 +epoch: 0, batch: 18986, sum loss: 5807.366211, avg loss: 3.135727, ppl: 23.005354 +epoch: 0, batch: 18987, sum loss: 5136.305664, avg loss: 3.200190, ppl: 24.537199 +epoch: 0, batch: 18988, sum loss: 4713.320312, avg loss: 3.161181, ppl: 23.598442 +epoch: 0, batch: 18989, sum loss: 6334.511719, avg loss: 3.396521, ppl: 29.860039 +epoch: 0, batch: 18990, sum loss: 5374.814453, avg loss: 3.463154, ppl: 31.917480 +epoch: 0, batch: 18991, sum loss: 5884.125488, avg loss: 3.415047, ppl: 30.418369 +epoch: 0, batch: 18992, sum loss: 5932.290039, avg loss: 3.247011, ppl: 25.713379 +epoch: 0, batch: 18993, sum loss: 5359.426270, avg loss: 3.096144, ppl: 22.112530 +epoch: 0, batch: 18994, sum loss: 5500.740723, avg loss: 3.429389, ppl: 30.857798 +epoch: 0, batch: 18995, sum loss: 4540.510742, avg loss: 3.097211, ppl: 22.136120 +epoch: 0, batch: 18996, sum loss: 5613.757324, avg loss: 3.375681, ppl: 29.244190 +epoch: 0, batch: 18997, sum loss: 4423.924316, avg loss: 2.971071, ppl: 19.512802 +epoch: 0, batch: 18998, sum loss: 4737.861328, avg loss: 3.015825, ppl: 20.405924 +epoch: 0, batch: 18999, sum loss: 4996.600586, avg loss: 3.344445, ppl: 28.344833 +epoch: 0, batch: 19000, sum loss: 5976.674805, avg loss: 3.470775, ppl: 32.161663 +epoch: 0, batch: 19001, sum loss: 4864.357910, avg loss: 3.026981, ppl: 20.634834 +epoch: 0, batch: 19002, sum loss: 5044.738770, avg loss: 3.137275, ppl: 23.041000 +epoch: 0, batch: 19003, sum loss: 5353.201660, avg loss: 3.362564, ppl: 28.863096 +epoch: 0, batch: 19004, sum loss: 4796.108398, avg loss: 3.247196, ppl: 25.718130 +epoch: 0, batch: 19005, sum loss: 5776.883789, avg loss: 3.327698, ppl: 27.874102 +epoch: 0, batch: 19006, sum loss: 5472.498535, avg loss: 3.230519, ppl: 25.292770 +epoch: 0, batch: 19007, sum loss: 4644.168945, avg loss: 3.202875, ppl: 24.603167 +epoch: 0, batch: 19008, sum loss: 5709.801758, avg loss: 3.220418, ppl: 25.038595 +epoch: 0, batch: 19009, sum loss: 5952.040527, avg loss: 3.349488, ppl: 28.488152 +epoch: 0, batch: 19010, sum loss: 6347.965332, avg loss: 3.518828, ppl: 33.744846 +epoch: 0, batch: 19011, sum loss: 4753.430664, avg loss: 3.145884, ppl: 23.240211 +epoch: 0, batch: 19012, sum loss: 5145.389160, avg loss: 3.129799, ppl: 22.869375 +epoch: 0, batch: 19013, sum loss: 5502.755859, avg loss: 3.146230, ppl: 23.248247 +epoch: 0, batch: 19014, sum loss: 6009.314453, avg loss: 3.323736, ppl: 27.763882 +epoch: 0, batch: 19015, sum loss: 5847.004395, avg loss: 3.301527, ppl: 27.154072 +epoch: 0, batch: 19016, sum loss: 6133.229980, avg loss: 3.239952, ppl: 25.532505 +epoch: 0, batch: 19017, sum loss: 4920.812988, avg loss: 3.214117, ppl: 24.881313 +epoch: 0, batch: 19018, sum loss: 5966.930664, avg loss: 3.262401, ppl: 26.112154 +epoch: 0, batch: 19019, sum loss: 5597.206055, avg loss: 3.271307, ppl: 26.345751 +epoch: 0, batch: 19020, sum loss: 5373.327637, avg loss: 3.240849, ppl: 25.555410 +epoch: 0, batch: 19021, sum loss: 5812.116211, avg loss: 3.340297, ppl: 28.227501 +epoch: 0, batch: 19022, sum loss: 5151.041016, avg loss: 3.251920, ppl: 25.839899 +epoch: 0, batch: 19023, sum loss: 5536.582520, avg loss: 3.283857, ppl: 26.678471 +epoch: 0, batch: 19024, sum loss: 6883.764648, avg loss: 3.462658, ppl: 31.901665 +epoch: 0, batch: 19025, sum loss: 4731.974609, avg loss: 3.201607, ppl: 24.571980 +epoch: 0, batch: 19026, sum loss: 6895.874023, avg loss: 3.318515, ppl: 27.619301 +epoch: 0, batch: 19027, sum loss: 5841.661133, avg loss: 3.270807, ppl: 26.332582 +epoch: 0, batch: 19028, sum loss: 4756.656250, avg loss: 3.164774, ppl: 23.683401 +epoch: 0, batch: 19029, sum loss: 5250.308105, avg loss: 3.108531, ppl: 22.388121 +epoch: 0, batch: 19030, sum loss: 5541.374023, avg loss: 3.412176, ppl: 30.331177 +epoch: 0, batch: 19031, sum loss: 4959.179688, avg loss: 3.346275, ppl: 28.396769 +epoch: 0, batch: 19032, sum loss: 5818.663574, avg loss: 3.480062, ppl: 32.461742 +epoch: 0, batch: 19033, sum loss: 5942.693359, avg loss: 3.401656, ppl: 30.013765 +epoch: 0, batch: 19034, sum loss: 5409.486816, avg loss: 3.239214, ppl: 25.513653 +epoch: 0, batch: 19035, sum loss: 5759.637695, avg loss: 3.304439, ppl: 27.233269 +epoch: 0, batch: 19036, sum loss: 6482.805664, avg loss: 3.615620, ppl: 37.174374 +epoch: 0, batch: 19037, sum loss: 5439.622559, avg loss: 3.322922, ppl: 27.741285 +epoch: 0, batch: 19038, sum loss: 5115.363770, avg loss: 2.963710, ppl: 19.369701 +epoch: 0, batch: 19039, sum loss: 6708.932617, avg loss: 3.525451, ppl: 33.969082 +epoch: 0, batch: 19040, sum loss: 6153.010742, avg loss: 3.532153, ppl: 34.197521 +epoch: 0, batch: 19041, sum loss: 6415.159180, avg loss: 3.436079, ppl: 31.064907 +epoch: 0, batch: 19042, sum loss: 5588.429688, avg loss: 3.252869, ppl: 25.864449 +epoch: 0, batch: 19043, sum loss: 5738.508301, avg loss: 3.348021, ppl: 28.446390 +epoch: 0, batch: 19044, sum loss: 4788.008301, avg loss: 3.187755, ppl: 24.233963 +epoch: 0, batch: 19045, sum loss: 4886.203125, avg loss: 3.130175, ppl: 22.877985 +epoch: 0, batch: 19046, sum loss: 4057.389893, avg loss: 2.831396, ppl: 16.969131 +epoch: 0, batch: 19047, sum loss: 5857.690430, avg loss: 3.180071, ppl: 24.048458 +epoch: 0, batch: 19048, sum loss: 6480.046387, avg loss: 3.378544, ppl: 29.328028 +epoch: 0, batch: 19049, sum loss: 4857.277832, avg loss: 3.158178, ppl: 23.527691 +epoch: 0, batch: 19050, sum loss: 4901.540039, avg loss: 3.289624, ppl: 26.832779 +epoch: 0, batch: 19051, sum loss: 5106.517090, avg loss: 3.144407, ppl: 23.205912 +epoch: 0, batch: 19052, sum loss: 5619.729492, avg loss: 3.403834, ppl: 30.079199 +epoch: 0, batch: 19053, sum loss: 4652.408691, avg loss: 2.959548, ppl: 19.289242 +epoch: 0, batch: 19054, sum loss: 6342.780273, avg loss: 3.426678, ppl: 30.774231 +epoch: 0, batch: 19055, sum loss: 5495.589355, avg loss: 3.277036, ppl: 26.497118 +epoch: 0, batch: 19056, sum loss: 5523.812012, avg loss: 3.158269, ppl: 23.529823 +epoch: 0, batch: 19057, sum loss: 4492.451660, avg loss: 2.940086, ppl: 18.917475 +epoch: 0, batch: 19058, sum loss: 4394.334961, avg loss: 3.026402, ppl: 20.622889 +epoch: 0, batch: 19059, sum loss: 6025.249023, avg loss: 3.705565, ppl: 40.673031 +epoch: 0, batch: 19060, sum loss: 5042.966797, avg loss: 3.201884, ppl: 24.578783 +epoch: 0, batch: 19061, sum loss: 5844.460449, avg loss: 3.370508, ppl: 29.093294 +epoch: 0, batch: 19062, sum loss: 6167.652832, avg loss: 3.308827, ppl: 27.353012 +epoch: 0, batch: 19063, sum loss: 6028.479492, avg loss: 3.206638, ppl: 24.695915 +epoch: 0, batch: 19064, sum loss: 7046.132324, avg loss: 3.589472, ppl: 36.214935 +epoch: 0, batch: 19065, sum loss: 6058.553711, avg loss: 3.405595, ppl: 30.132221 +epoch: 0, batch: 19066, sum loss: 4461.958496, avg loss: 2.968702, ppl: 19.466627 +epoch: 0, batch: 19067, sum loss: 5490.221191, avg loss: 3.277744, ppl: 26.515894 +epoch: 0, batch: 19068, sum loss: 4386.362305, avg loss: 3.075990, ppl: 21.671335 +epoch: 0, batch: 19069, sum loss: 6042.199219, avg loss: 3.396402, ppl: 29.856480 +epoch: 0, batch: 19070, sum loss: 6249.361816, avg loss: 3.473798, ppl: 32.259014 +epoch: 0, batch: 19071, sum loss: 5562.498047, avg loss: 2.958776, ppl: 19.274357 +epoch: 0, batch: 19072, sum loss: 6089.808105, avg loss: 3.358968, ppl: 28.759489 +epoch: 0, batch: 19073, sum loss: 5559.050781, avg loss: 3.180235, ppl: 24.052408 +epoch: 0, batch: 19074, sum loss: 6678.125000, avg loss: 3.745443, ppl: 42.327770 +epoch: 0, batch: 19075, sum loss: 5701.721680, avg loss: 3.410121, ppl: 30.268898 +epoch: 0, batch: 19076, sum loss: 5845.980469, avg loss: 3.295367, ppl: 26.987305 +epoch: 0, batch: 19077, sum loss: 4704.505859, avg loss: 3.103236, ppl: 22.269905 +epoch: 0, batch: 19078, sum loss: 6112.839355, avg loss: 3.367956, ppl: 29.019142 +epoch: 0, batch: 19079, sum loss: 5656.827148, avg loss: 3.245455, ppl: 25.673378 +epoch: 0, batch: 19080, sum loss: 5541.267090, avg loss: 3.032987, ppl: 20.759146 +epoch: 0, batch: 19081, sum loss: 6255.112305, avg loss: 3.346769, ppl: 28.410799 +epoch: 0, batch: 19082, sum loss: 5744.947754, avg loss: 3.170501, ppl: 23.819414 +epoch: 0, batch: 19083, sum loss: 5687.023438, avg loss: 3.274049, ppl: 26.418097 +epoch: 0, batch: 19084, sum loss: 5976.883301, avg loss: 3.271420, ppl: 26.348715 +epoch: 0, batch: 19085, sum loss: 4787.427734, avg loss: 3.198014, ppl: 24.483864 +epoch: 0, batch: 19086, sum loss: 5168.311035, avg loss: 3.109694, ppl: 22.414179 +epoch: 0, batch: 19087, sum loss: 3998.043945, avg loss: 2.884592, ppl: 17.896257 +epoch: 0, batch: 19088, sum loss: 5315.707031, avg loss: 3.497176, ppl: 33.022064 +epoch: 0, batch: 19089, sum loss: 5384.560059, avg loss: 3.195585, ppl: 24.424446 +epoch: 0, batch: 19090, sum loss: 6328.237305, avg loss: 3.444876, ppl: 31.339403 +epoch: 0, batch: 19091, sum loss: 6205.741699, avg loss: 3.488331, ppl: 32.731285 +epoch: 0, batch: 19092, sum loss: 5334.599609, avg loss: 3.397834, ppl: 29.899277 +epoch: 0, batch: 19093, sum loss: 5930.513184, avg loss: 3.298394, ppl: 27.069136 +epoch: 0, batch: 19094, sum loss: 5539.204102, avg loss: 3.165259, ppl: 23.694889 +epoch: 0, batch: 19095, sum loss: 5577.039062, avg loss: 3.244351, ppl: 25.645060 +epoch: 0, batch: 19096, sum loss: 5191.479492, avg loss: 3.238602, ppl: 25.498055 +epoch: 0, batch: 19097, sum loss: 5371.372070, avg loss: 3.350825, ppl: 28.526247 +epoch: 0, batch: 19098, sum loss: 4979.009766, avg loss: 3.284307, ppl: 26.690495 +epoch: 0, batch: 19099, sum loss: 4838.484375, avg loss: 3.062332, ppl: 21.377350 +epoch: 0, batch: 19100, sum loss: 4682.752930, avg loss: 3.205170, ppl: 24.659689 +epoch: 0, batch: 19101, sum loss: 5656.187988, avg loss: 3.425917, ppl: 30.750820 +epoch: 0, batch: 19102, sum loss: 5002.340332, avg loss: 3.144149, ppl: 23.199915 +epoch: 0, batch: 19103, sum loss: 5906.328613, avg loss: 3.369269, ppl: 29.057281 +epoch: 0, batch: 19104, sum loss: 5569.910156, avg loss: 3.377750, ppl: 29.304766 +epoch: 0, batch: 19105, sum loss: 3898.647705, avg loss: 2.885750, ppl: 17.916992 +epoch: 0, batch: 19106, sum loss: 5188.082031, avg loss: 3.228427, ppl: 25.239922 +epoch: 0, batch: 19107, sum loss: 5822.022461, avg loss: 3.321177, ppl: 27.692919 +epoch: 0, batch: 19108, sum loss: 6331.571289, avg loss: 3.424322, ppl: 30.701818 +epoch: 0, batch: 19109, sum loss: 5112.051270, avg loss: 2.922842, ppl: 18.594061 +epoch: 0, batch: 19110, sum loss: 5736.876465, avg loss: 3.333455, ppl: 28.035038 +epoch: 0, batch: 19111, sum loss: 5047.854492, avg loss: 3.438593, ppl: 31.143106 +epoch: 0, batch: 19112, sum loss: 5861.500977, avg loss: 3.324731, ppl: 27.791525 +epoch: 0, batch: 19113, sum loss: 5987.486328, avg loss: 3.401981, ppl: 30.023514 +epoch: 0, batch: 19114, sum loss: 5398.675781, avg loss: 3.344904, ppl: 28.357864 +epoch: 0, batch: 19115, sum loss: 5752.412598, avg loss: 3.399771, ppl: 29.957239 +epoch: 0, batch: 19116, sum loss: 5049.325195, avg loss: 3.201855, ppl: 24.578079 +epoch: 0, batch: 19117, sum loss: 5295.976074, avg loss: 3.318281, ppl: 27.612835 +epoch: 0, batch: 19118, sum loss: 6200.275879, avg loss: 3.344270, ppl: 28.339867 +epoch: 0, batch: 19119, sum loss: 6082.577148, avg loss: 3.413343, ppl: 30.366596 +epoch: 0, batch: 19120, sum loss: 4350.954590, avg loss: 2.755513, ppl: 15.729103 +epoch: 0, batch: 19121, sum loss: 6154.169434, avg loss: 3.182094, ppl: 24.097157 +epoch: 0, batch: 19122, sum loss: 5017.463867, avg loss: 3.214263, ppl: 24.884937 +epoch: 0, batch: 19123, sum loss: 5887.299805, avg loss: 3.416889, ppl: 30.474461 +epoch: 0, batch: 19124, sum loss: 5371.295898, avg loss: 3.226004, ppl: 25.178833 +epoch: 0, batch: 19125, sum loss: 5101.476074, avg loss: 3.312647, ppl: 27.457699 +epoch: 0, batch: 19126, sum loss: 5580.800781, avg loss: 3.419608, ppl: 30.557446 +epoch: 0, batch: 19127, sum loss: 5812.263184, avg loss: 3.306179, ppl: 27.280695 +epoch: 0, batch: 19128, sum loss: 4800.651855, avg loss: 3.254679, ppl: 25.911301 +epoch: 0, batch: 19129, sum loss: 5638.815430, avg loss: 3.180381, ppl: 24.055918 +epoch: 0, batch: 19130, sum loss: 6534.453125, avg loss: 3.247740, ppl: 25.732126 +epoch: 0, batch: 19131, sum loss: 6073.201660, avg loss: 3.237314, ppl: 25.465242 +epoch: 0, batch: 19132, sum loss: 4892.395508, avg loss: 3.078915, ppl: 21.734800 +epoch: 0, batch: 19133, sum loss: 4975.324707, avg loss: 3.061738, ppl: 21.364662 +epoch: 0, batch: 19134, sum loss: 6103.800781, avg loss: 3.364830, ppl: 28.928566 +epoch: 0, batch: 19135, sum loss: 6313.664551, avg loss: 3.511493, ppl: 33.498241 +epoch: 0, batch: 19136, sum loss: 5628.970215, avg loss: 3.546925, ppl: 34.706440 +epoch: 0, batch: 19137, sum loss: 4878.908203, avg loss: 2.807197, ppl: 16.563423 +epoch: 0, batch: 19138, sum loss: 4554.806152, avg loss: 3.191875, ppl: 24.334023 +epoch: 0, batch: 19139, sum loss: 5540.060059, avg loss: 3.490901, ppl: 32.815510 +epoch: 0, batch: 19140, sum loss: 5184.257812, avg loss: 3.210067, ppl: 24.780741 +epoch: 0, batch: 19141, sum loss: 4658.456543, avg loss: 3.294524, ppl: 26.964569 +epoch: 0, batch: 19142, sum loss: 4946.047363, avg loss: 3.209635, ppl: 24.770044 +epoch: 0, batch: 19143, sum loss: 5632.483887, avg loss: 3.509336, ppl: 33.426056 +epoch: 0, batch: 19144, sum loss: 5696.122559, avg loss: 3.126302, ppl: 22.789553 +epoch: 0, batch: 19145, sum loss: 6193.049316, avg loss: 3.322451, ppl: 27.728239 +epoch: 0, batch: 19146, sum loss: 6470.972656, avg loss: 3.393274, ppl: 29.763226 +epoch: 0, batch: 19147, sum loss: 5994.500977, avg loss: 3.110795, ppl: 22.438866 +epoch: 0, batch: 19148, sum loss: 5074.393066, avg loss: 3.073527, ppl: 21.618017 +epoch: 0, batch: 19149, sum loss: 4376.484863, avg loss: 2.959084, ppl: 19.280298 +epoch: 0, batch: 19150, sum loss: 4899.705078, avg loss: 3.246988, ppl: 25.712784 +epoch: 0, batch: 19151, sum loss: 5521.807129, avg loss: 3.318394, ppl: 27.615963 +epoch: 0, batch: 19152, sum loss: 6139.211914, avg loss: 3.382486, ppl: 29.443880 +epoch: 0, batch: 19153, sum loss: 6324.653320, avg loss: 3.367760, ppl: 29.013462 +epoch: 0, batch: 19154, sum loss: 5015.981934, avg loss: 3.032637, ppl: 20.751890 +epoch: 0, batch: 19155, sum loss: 5335.009766, avg loss: 3.592599, ppl: 36.328369 +epoch: 0, batch: 19156, sum loss: 7145.316406, avg loss: 3.250826, ppl: 25.811661 +epoch: 0, batch: 19157, sum loss: 5193.159668, avg loss: 3.233599, ppl: 25.370794 +epoch: 0, batch: 19158, sum loss: 5897.944336, avg loss: 3.235296, ppl: 25.413893 +epoch: 0, batch: 19159, sum loss: 6143.365723, avg loss: 3.424396, ppl: 30.704088 +epoch: 0, batch: 19160, sum loss: 4490.950684, avg loss: 2.819178, ppl: 16.763067 +epoch: 0, batch: 19161, sum loss: 6022.483398, avg loss: 3.351410, ppl: 28.542948 +epoch: 0, batch: 19162, sum loss: 5732.300293, avg loss: 3.549412, ppl: 34.792854 +epoch: 0, batch: 19163, sum loss: 6380.506348, avg loss: 3.257022, ppl: 25.972080 +epoch: 0, batch: 19164, sum loss: 5116.885742, avg loss: 3.305482, ppl: 27.261677 +epoch: 0, batch: 19165, sum loss: 5368.518555, avg loss: 3.174760, ppl: 23.921076 +epoch: 0, batch: 19166, sum loss: 5596.535156, avg loss: 3.331271, ppl: 27.973879 +epoch: 0, batch: 19167, sum loss: 4841.378418, avg loss: 3.133579, ppl: 22.955982 +epoch: 0, batch: 19168, sum loss: 5321.265625, avg loss: 3.346708, ppl: 28.409052 +epoch: 0, batch: 19169, sum loss: 5754.380859, avg loss: 3.380952, ppl: 29.398756 +epoch: 0, batch: 19170, sum loss: 5864.835938, avg loss: 3.458040, ppl: 31.754677 +epoch: 0, batch: 19171, sum loss: 4986.726562, avg loss: 3.144216, ppl: 23.201479 +epoch: 0, batch: 19172, sum loss: 5186.722656, avg loss: 3.434916, ppl: 31.028799 +epoch: 0, batch: 19173, sum loss: 4823.921387, avg loss: 3.304056, ppl: 27.222830 +epoch: 0, batch: 19174, sum loss: 4904.196777, avg loss: 3.038536, ppl: 20.874657 +epoch: 0, batch: 19175, sum loss: 4156.351074, avg loss: 2.996648, ppl: 20.018324 +epoch: 0, batch: 19176, sum loss: 5251.672852, avg loss: 3.200288, ppl: 24.539598 +epoch: 0, batch: 19177, sum loss: 5371.880859, avg loss: 3.412885, ppl: 30.352676 +epoch: 0, batch: 19178, sum loss: 6685.602051, avg loss: 3.531749, ppl: 34.183720 +epoch: 0, batch: 19179, sum loss: 5378.212891, avg loss: 3.171116, ppl: 23.834070 +epoch: 0, batch: 19180, sum loss: 4734.589355, avg loss: 3.214250, ppl: 24.884611 +epoch: 0, batch: 19181, sum loss: 6063.665039, avg loss: 3.387522, ppl: 29.592537 +epoch: 0, batch: 19182, sum loss: 6082.151367, avg loss: 3.214668, ppl: 24.895020 +epoch: 0, batch: 19183, sum loss: 4736.482910, avg loss: 3.071649, ppl: 21.577456 +epoch: 0, batch: 19184, sum loss: 5993.457031, avg loss: 3.432679, ppl: 30.959471 +epoch: 0, batch: 19185, sum loss: 5481.697266, avg loss: 3.234040, ppl: 25.381989 +epoch: 0, batch: 19186, sum loss: 6651.722656, avg loss: 3.578119, ppl: 35.806110 +epoch: 0, batch: 19187, sum loss: 5231.851562, avg loss: 3.467099, ppl: 32.043640 +epoch: 0, batch: 19188, sum loss: 4900.542969, avg loss: 3.165725, ppl: 23.705936 +epoch: 0, batch: 19189, sum loss: 5083.631836, avg loss: 3.056904, ppl: 21.261637 +epoch: 0, batch: 19190, sum loss: 5352.901855, avg loss: 3.296122, ppl: 27.007696 +epoch: 0, batch: 19191, sum loss: 6588.418457, avg loss: 3.478574, ppl: 32.413460 +epoch: 0, batch: 19192, sum loss: 4709.947754, avg loss: 2.971576, ppl: 19.522657 +epoch: 0, batch: 19193, sum loss: 5582.311035, avg loss: 3.336707, ppl: 28.126358 +epoch: 0, batch: 19194, sum loss: 5509.934082, avg loss: 3.454504, ppl: 31.642597 +epoch: 0, batch: 19195, sum loss: 4960.818848, avg loss: 3.151727, ppl: 23.376406 +epoch: 0, batch: 19196, sum loss: 5276.443359, avg loss: 3.354382, ppl: 28.627914 +epoch: 0, batch: 19197, sum loss: 4719.599609, avg loss: 3.123494, ppl: 22.725647 +epoch: 0, batch: 19198, sum loss: 4780.035645, avg loss: 3.105936, ppl: 22.330111 +epoch: 0, batch: 19199, sum loss: 4370.196777, avg loss: 2.991237, ppl: 19.910290 +epoch: 0, batch: 19200, sum loss: 6245.950684, avg loss: 3.506991, ppl: 33.347771 +epoch: 0, batch: 19201, sum loss: 5490.662598, avg loss: 3.067409, ppl: 21.486166 +epoch: 0, batch: 19202, sum loss: 5046.905273, avg loss: 3.260275, ppl: 26.056700 +epoch: 0, batch: 19203, sum loss: 3769.407715, avg loss: 3.089679, ppl: 21.970015 +epoch: 0, batch: 19204, sum loss: 5229.688477, avg loss: 3.262438, ppl: 26.113125 +epoch: 0, batch: 19205, sum loss: 4956.150879, avg loss: 2.881483, ppl: 17.840714 +epoch: 0, batch: 19206, sum loss: 3866.543457, avg loss: 2.748076, ppl: 15.612572 +epoch: 0, batch: 19207, sum loss: 5137.498047, avg loss: 3.436453, ppl: 31.076544 +epoch: 0, batch: 19208, sum loss: 5706.343750, avg loss: 3.218468, ppl: 24.989811 +epoch: 0, batch: 19209, sum loss: 5671.514648, avg loss: 3.284027, ppl: 26.683012 +epoch: 0, batch: 19210, sum loss: 4081.731201, avg loss: 3.092221, ppl: 22.025938 +epoch: 0, batch: 19211, sum loss: 5027.672363, avg loss: 2.987328, ppl: 19.832611 +epoch: 0, batch: 19212, sum loss: 5847.425781, avg loss: 3.362522, ppl: 28.861893 +epoch: 0, batch: 19213, sum loss: 5943.771973, avg loss: 3.404222, ppl: 30.090891 +epoch: 0, batch: 19214, sum loss: 5966.558594, avg loss: 3.093084, ppl: 22.044952 +epoch: 0, batch: 19215, sum loss: 5414.870117, avg loss: 3.194614, ppl: 24.400745 +epoch: 0, batch: 19216, sum loss: 4985.031738, avg loss: 3.205808, ppl: 24.675440 +epoch: 0, batch: 19217, sum loss: 5427.259766, avg loss: 3.337798, ppl: 28.157061 +epoch: 0, batch: 19218, sum loss: 6504.387695, avg loss: 3.357970, ppl: 28.730801 +epoch: 0, batch: 19219, sum loss: 5224.160156, avg loss: 3.065822, ppl: 21.452082 +epoch: 0, batch: 19220, sum loss: 4599.989746, avg loss: 3.174596, ppl: 23.917162 +epoch: 0, batch: 19221, sum loss: 6985.684082, avg loss: 3.621402, ppl: 37.389942 +epoch: 0, batch: 19222, sum loss: 6963.537598, avg loss: 3.647741, ppl: 38.387852 +epoch: 0, batch: 19223, sum loss: 5816.238770, avg loss: 3.423331, ppl: 30.671406 +epoch: 0, batch: 19224, sum loss: 6200.484375, avg loss: 3.285895, ppl: 26.732903 +epoch: 0, batch: 19225, sum loss: 6536.290039, avg loss: 3.516025, ppl: 33.650387 +epoch: 0, batch: 19226, sum loss: 5659.549316, avg loss: 3.313553, ppl: 27.482607 +epoch: 0, batch: 19227, sum loss: 6900.006836, avg loss: 3.365857, ppl: 28.958302 +epoch: 0, batch: 19228, sum loss: 4690.901367, avg loss: 3.210747, ppl: 24.797604 +epoch: 0, batch: 19229, sum loss: 6218.200195, avg loss: 3.276186, ppl: 26.474600 +epoch: 0, batch: 19230, sum loss: 4724.190918, avg loss: 3.179132, ppl: 24.025902 +epoch: 0, batch: 19231, sum loss: 5047.390625, avg loss: 2.955147, ppl: 19.204536 +epoch: 0, batch: 19232, sum loss: 4416.762207, avg loss: 3.031409, ppl: 20.726406 +epoch: 0, batch: 19233, sum loss: 5136.125977, avg loss: 3.238415, ppl: 25.493277 +epoch: 0, batch: 19234, sum loss: 5693.845703, avg loss: 3.200588, ppl: 24.546953 +epoch: 0, batch: 19235, sum loss: 5074.673828, avg loss: 3.263456, ppl: 26.139723 +epoch: 0, batch: 19236, sum loss: 5418.561523, avg loss: 3.179907, ppl: 24.044519 +epoch: 0, batch: 19237, sum loss: 5450.526367, avg loss: 3.221351, ppl: 25.061966 +epoch: 0, batch: 19238, sum loss: 5542.306641, avg loss: 3.348826, ppl: 28.469290 +epoch: 0, batch: 19239, sum loss: 5266.281250, avg loss: 3.026598, ppl: 20.626949 +epoch: 0, batch: 19240, sum loss: 5808.150391, avg loss: 3.416559, ppl: 30.464413 +epoch: 0, batch: 19241, sum loss: 5359.271973, avg loss: 3.145113, ppl: 23.222294 +epoch: 0, batch: 19242, sum loss: 5104.134766, avg loss: 3.257266, ppl: 25.978403 +epoch: 0, batch: 19243, sum loss: 4422.662109, avg loss: 3.216481, ppl: 24.940212 +epoch: 0, batch: 19244, sum loss: 5494.753906, avg loss: 3.130914, ppl: 22.894890 +epoch: 0, batch: 19245, sum loss: 5599.922852, avg loss: 3.151335, ppl: 23.367245 +epoch: 0, batch: 19246, sum loss: 5538.383789, avg loss: 2.966461, ppl: 19.423067 +epoch: 0, batch: 19247, sum loss: 5748.252930, avg loss: 3.247601, ppl: 25.728531 +epoch: 0, batch: 19248, sum loss: 5074.087402, avg loss: 3.320738, ppl: 27.680767 +epoch: 0, batch: 19249, sum loss: 6056.711426, avg loss: 3.439359, ppl: 31.166973 +epoch: 0, batch: 19250, sum loss: 5063.959473, avg loss: 3.023260, ppl: 20.558195 +epoch: 0, batch: 19251, sum loss: 4741.021484, avg loss: 3.143914, ppl: 23.194462 +epoch: 0, batch: 19252, sum loss: 5675.424316, avg loss: 3.094561, ppl: 22.077536 +epoch: 0, batch: 19253, sum loss: 6211.532227, avg loss: 3.422332, ppl: 30.640779 +epoch: 0, batch: 19254, sum loss: 6160.363770, avg loss: 3.303144, ppl: 27.198015 +epoch: 0, batch: 19255, sum loss: 6767.112793, avg loss: 3.528213, ppl: 34.063053 +epoch: 0, batch: 19256, sum loss: 5368.360840, avg loss: 3.402003, ppl: 30.024179 +epoch: 0, batch: 19257, sum loss: 5099.069336, avg loss: 3.163194, ppl: 23.646011 +epoch: 0, batch: 19258, sum loss: 4902.540039, avg loss: 2.974842, ppl: 19.586535 +epoch: 0, batch: 19259, sum loss: 5411.049805, avg loss: 3.249880, ppl: 25.787241 +epoch: 0, batch: 19260, sum loss: 4457.471191, avg loss: 3.172577, ppl: 23.868925 +epoch: 0, batch: 19261, sum loss: 5974.104492, avg loss: 3.445273, ppl: 31.351854 +epoch: 0, batch: 19262, sum loss: 5412.749512, avg loss: 3.056324, ppl: 21.249296 +epoch: 0, batch: 19263, sum loss: 4166.154297, avg loss: 2.818779, ppl: 16.756371 +epoch: 0, batch: 19264, sum loss: 5221.346680, avg loss: 3.197395, ppl: 24.468714 +epoch: 0, batch: 19265, sum loss: 6087.680664, avg loss: 3.099634, ppl: 22.189827 +epoch: 0, batch: 19266, sum loss: 5240.887207, avg loss: 3.082875, ppl: 21.821049 +epoch: 0, batch: 19267, sum loss: 5692.192871, avg loss: 3.487863, ppl: 32.715969 +epoch: 0, batch: 19268, sum loss: 6154.555176, avg loss: 3.703102, ppl: 40.572960 +epoch: 0, batch: 19269, sum loss: 5364.011230, avg loss: 3.204308, ppl: 24.638445 +epoch: 0, batch: 19270, sum loss: 6343.476074, avg loss: 3.696665, ppl: 40.312618 +epoch: 0, batch: 19271, sum loss: 4491.249512, avg loss: 3.074093, ppl: 21.630251 +epoch: 0, batch: 19272, sum loss: 4253.268555, avg loss: 3.271745, ppl: 26.357292 +epoch: 0, batch: 19273, sum loss: 5853.219238, avg loss: 3.486134, ppl: 32.659443 +epoch: 0, batch: 19274, sum loss: 4440.313965, avg loss: 3.066515, ppl: 21.466965 +epoch: 0, batch: 19275, sum loss: 6734.037598, avg loss: 3.593403, ppl: 36.357594 +epoch: 0, batch: 19276, sum loss: 5255.927246, avg loss: 3.160510, ppl: 23.582611 +epoch: 0, batch: 19277, sum loss: 5884.574707, avg loss: 3.110240, ppl: 22.426432 +epoch: 0, batch: 19278, sum loss: 6152.468262, avg loss: 3.408570, ppl: 30.221983 +epoch: 0, batch: 19279, sum loss: 5857.519043, avg loss: 3.283363, ppl: 26.665295 +epoch: 0, batch: 19280, sum loss: 6039.429688, avg loss: 3.234831, ppl: 25.402081 +epoch: 0, batch: 19281, sum loss: 4964.670898, avg loss: 3.042078, ppl: 20.948725 +epoch: 0, batch: 19282, sum loss: 6047.864258, avg loss: 3.436286, ppl: 31.071358 +epoch: 0, batch: 19283, sum loss: 5647.221680, avg loss: 3.132125, ppl: 22.922642 +epoch: 0, batch: 19284, sum loss: 5825.742188, avg loss: 3.285811, ppl: 26.730646 +epoch: 0, batch: 19285, sum loss: 4169.890137, avg loss: 2.825129, ppl: 16.863117 +epoch: 0, batch: 19286, sum loss: 6032.547363, avg loss: 3.381473, ppl: 29.414061 +epoch: 0, batch: 19287, sum loss: 5114.812012, avg loss: 2.825863, ppl: 16.875504 +epoch: 0, batch: 19288, sum loss: 4873.598633, avg loss: 3.076767, ppl: 21.688166 +epoch: 0, batch: 19289, sum loss: 4354.755371, avg loss: 2.962419, ppl: 19.344702 +epoch: 0, batch: 19290, sum loss: 5473.978516, avg loss: 3.297577, ppl: 27.047035 +epoch: 0, batch: 19291, sum loss: 5364.703125, avg loss: 3.346664, ppl: 28.407820 +epoch: 0, batch: 19292, sum loss: 4578.904297, avg loss: 3.164412, ppl: 23.674820 +epoch: 0, batch: 19293, sum loss: 5866.698730, avg loss: 3.360080, ppl: 28.791481 +epoch: 0, batch: 19294, sum loss: 5317.444824, avg loss: 3.256243, ppl: 25.951853 +epoch: 0, batch: 19295, sum loss: 7501.546875, avg loss: 3.533465, ppl: 34.242424 +epoch: 0, batch: 19296, sum loss: 6252.446777, avg loss: 3.126224, ppl: 22.787760 +epoch: 0, batch: 19297, sum loss: 5134.527344, avg loss: 3.203074, ppl: 24.608053 +epoch: 0, batch: 19298, sum loss: 5382.641602, avg loss: 3.300210, ppl: 27.118320 +epoch: 0, batch: 19299, sum loss: 6422.308594, avg loss: 3.464028, ppl: 31.945398 +epoch: 0, batch: 19300, sum loss: 4828.395996, avg loss: 2.958576, ppl: 19.270510 +epoch: 0, batch: 19301, sum loss: 4632.067383, avg loss: 3.059490, ppl: 21.316677 +epoch: 0, batch: 19302, sum loss: 5877.147461, avg loss: 3.428908, ppl: 30.842932 +epoch: 0, batch: 19303, sum loss: 5403.336426, avg loss: 3.270785, ppl: 26.331999 +epoch: 0, batch: 19304, sum loss: 6023.860840, avg loss: 3.342875, ppl: 28.300381 +epoch: 0, batch: 19305, sum loss: 5211.446777, avg loss: 3.081873, ppl: 21.799192 +epoch: 0, batch: 19306, sum loss: 5511.043945, avg loss: 3.466065, ppl: 32.010548 +epoch: 0, batch: 19307, sum loss: 5257.726562, avg loss: 3.310911, ppl: 27.410089 +epoch: 0, batch: 19308, sum loss: 6594.447266, avg loss: 3.535897, ppl: 34.325775 +epoch: 0, batch: 19309, sum loss: 5885.682617, avg loss: 3.534945, ppl: 34.293114 +epoch: 0, batch: 19310, sum loss: 5699.329102, avg loss: 3.305876, ppl: 27.272409 +epoch: 0, batch: 19311, sum loss: 5250.083984, avg loss: 3.099223, ppl: 22.180714 +epoch: 0, batch: 19312, sum loss: 6655.203613, avg loss: 3.523136, ppl: 33.890541 +epoch: 0, batch: 19313, sum loss: 5486.862793, avg loss: 3.140734, ppl: 23.120836 +epoch: 0, batch: 19314, sum loss: 4967.888184, avg loss: 3.102991, ppl: 22.264442 +epoch: 0, batch: 19315, sum loss: 5397.943359, avg loss: 3.257660, ppl: 25.988663 +epoch: 0, batch: 19316, sum loss: 5018.874023, avg loss: 3.098071, ppl: 22.155165 +epoch: 0, batch: 19317, sum loss: 6096.697266, avg loss: 3.505864, ppl: 33.310207 +epoch: 0, batch: 19318, sum loss: 5811.388672, avg loss: 3.217823, ppl: 24.973700 +epoch: 0, batch: 19319, sum loss: 4879.901367, avg loss: 3.112182, ppl: 22.470018 +epoch: 0, batch: 19320, sum loss: 4804.587891, avg loss: 3.295328, ppl: 26.986256 +epoch: 0, batch: 19321, sum loss: 5220.277344, avg loss: 3.333510, ppl: 28.036589 +epoch: 0, batch: 19322, sum loss: 5964.082031, avg loss: 3.307866, ppl: 27.326744 +epoch: 0, batch: 19323, sum loss: 5550.374512, avg loss: 3.313657, ppl: 27.485445 +epoch: 0, batch: 19324, sum loss: 5560.969238, avg loss: 3.165036, ppl: 23.689608 +epoch: 0, batch: 19325, sum loss: 5464.721191, avg loss: 3.292001, ppl: 26.896624 +epoch: 0, batch: 19326, sum loss: 5190.039062, avg loss: 3.243774, ppl: 25.630280 +epoch: 0, batch: 19327, sum loss: 5854.119629, avg loss: 3.283298, ppl: 26.663559 +epoch: 0, batch: 19328, sum loss: 5881.654785, avg loss: 3.273041, ppl: 26.391481 +epoch: 0, batch: 19329, sum loss: 4792.391113, avg loss: 3.150816, ppl: 23.355120 +epoch: 0, batch: 19330, sum loss: 5186.711426, avg loss: 3.203651, ppl: 24.622272 +epoch: 0, batch: 19331, sum loss: 5418.589844, avg loss: 3.268148, ppl: 26.262667 +epoch: 0, batch: 19332, sum loss: 5789.172852, avg loss: 3.365798, ppl: 28.956602 +epoch: 0, batch: 19333, sum loss: 5620.800781, avg loss: 3.112293, ppl: 22.472509 +epoch: 0, batch: 19334, sum loss: 5559.127930, avg loss: 3.230173, ppl: 25.284033 +epoch: 0, batch: 19335, sum loss: 4693.042969, avg loss: 2.842546, ppl: 17.159397 +epoch: 0, batch: 19336, sum loss: 5813.078125, avg loss: 3.497640, ppl: 33.037403 +epoch: 0, batch: 19337, sum loss: 4579.149414, avg loss: 3.229301, ppl: 25.262005 +epoch: 0, batch: 19338, sum loss: 5589.346680, avg loss: 3.214115, ppl: 24.881271 +epoch: 0, batch: 19339, sum loss: 6138.551758, avg loss: 3.328933, ppl: 27.908541 +epoch: 0, batch: 19340, sum loss: 5320.941895, avg loss: 3.254399, ppl: 25.904030 +epoch: 0, batch: 19341, sum loss: 4710.584473, avg loss: 3.176389, ppl: 23.960072 +epoch: 0, batch: 19342, sum loss: 6343.858887, avg loss: 3.115844, ppl: 22.552462 +epoch: 0, batch: 19343, sum loss: 6140.888184, avg loss: 3.257766, ppl: 25.991396 +epoch: 0, batch: 19344, sum loss: 5100.111816, avg loss: 3.223838, ppl: 25.124359 +epoch: 0, batch: 19345, sum loss: 4898.228516, avg loss: 2.934829, ppl: 18.818275 +epoch: 0, batch: 19346, sum loss: 4626.421875, avg loss: 3.059803, ppl: 21.323357 +epoch: 0, batch: 19347, sum loss: 5431.252441, avg loss: 3.299667, ppl: 27.103622 +epoch: 0, batch: 19348, sum loss: 5290.775879, avg loss: 3.194913, ppl: 24.408049 +epoch: 0, batch: 19349, sum loss: 5829.054199, avg loss: 3.202777, ppl: 24.600756 +epoch: 0, batch: 19350, sum loss: 6031.401855, avg loss: 3.317603, ppl: 27.594124 +epoch: 0, batch: 19351, sum loss: 5709.345703, avg loss: 3.054760, ppl: 21.216082 +epoch: 0, batch: 19352, sum loss: 4785.040039, avg loss: 3.322945, ppl: 27.741920 +epoch: 0, batch: 19353, sum loss: 5496.643555, avg loss: 3.046920, ppl: 21.050406 +epoch: 0, batch: 19354, sum loss: 5956.496094, avg loss: 3.151585, ppl: 23.373089 +epoch: 0, batch: 19355, sum loss: 5279.641113, avg loss: 3.120355, ppl: 22.654428 +epoch: 0, batch: 19356, sum loss: 4748.396973, avg loss: 3.267995, ppl: 26.258640 +epoch: 0, batch: 19357, sum loss: 4961.529785, avg loss: 3.051371, ppl: 21.144321 +epoch: 0, batch: 19358, sum loss: 5957.751953, avg loss: 3.615141, ppl: 37.156574 +epoch: 0, batch: 19359, sum loss: 7164.831055, avg loss: 3.503585, ppl: 33.234379 +epoch: 0, batch: 19360, sum loss: 4709.389648, avg loss: 3.243381, ppl: 25.620205 +epoch: 0, batch: 19361, sum loss: 4759.079102, avg loss: 2.887791, ppl: 17.953600 +epoch: 0, batch: 19362, sum loss: 5185.312500, avg loss: 2.976643, ppl: 19.621838 +epoch: 0, batch: 19363, sum loss: 5073.308594, avg loss: 3.210955, ppl: 24.802759 +epoch: 0, batch: 19364, sum loss: 5894.360352, avg loss: 3.345267, ppl: 28.368151 +epoch: 0, batch: 19365, sum loss: 5234.467773, avg loss: 3.141937, ppl: 23.148668 +epoch: 0, batch: 19366, sum loss: 5710.432617, avg loss: 3.337483, ppl: 28.148180 +epoch: 0, batch: 19367, sum loss: 5498.273926, avg loss: 3.389811, ppl: 29.660355 +epoch: 0, batch: 19368, sum loss: 6766.208008, avg loss: 3.597133, ppl: 36.493473 +epoch: 0, batch: 19369, sum loss: 6015.163086, avg loss: 3.415766, ppl: 30.440243 +epoch: 0, batch: 19370, sum loss: 5466.209961, avg loss: 3.464011, ppl: 31.944864 +epoch: 0, batch: 19371, sum loss: 4958.489258, avg loss: 3.140272, ppl: 23.110149 +epoch: 0, batch: 19372, sum loss: 5500.129883, avg loss: 3.289551, ppl: 26.830828 +epoch: 0, batch: 19373, sum loss: 4476.100098, avg loss: 3.091229, ppl: 22.004114 +epoch: 0, batch: 19374, sum loss: 5665.639648, avg loss: 3.163394, ppl: 23.650742 +epoch: 0, batch: 19375, sum loss: 4365.607910, avg loss: 2.879689, ppl: 17.808729 +epoch: 0, batch: 19376, sum loss: 4997.811523, avg loss: 3.214027, ppl: 24.879070 +epoch: 0, batch: 19377, sum loss: 5348.336426, avg loss: 3.160955, ppl: 23.593126 +epoch: 0, batch: 19378, sum loss: 5419.932617, avg loss: 3.072524, ppl: 21.596350 +epoch: 0, batch: 19379, sum loss: 4327.287109, avg loss: 2.906170, ppl: 18.286625 +epoch: 0, batch: 19380, sum loss: 4827.314941, avg loss: 3.235466, ppl: 25.418207 +epoch: 0, batch: 19381, sum loss: 6321.290039, avg loss: 3.537376, ppl: 34.376587 +epoch: 0, batch: 19382, sum loss: 4930.735352, avg loss: 3.077862, ppl: 21.711939 +epoch: 0, batch: 19383, sum loss: 4890.148438, avg loss: 3.165145, ppl: 23.692171 +epoch: 0, batch: 19384, sum loss: 4946.179688, avg loss: 3.032606, ppl: 20.751232 +epoch: 0, batch: 19385, sum loss: 4504.380371, avg loss: 3.012963, ppl: 20.347607 +epoch: 0, batch: 19386, sum loss: 4928.584473, avg loss: 3.078441, ppl: 21.724516 +epoch: 0, batch: 19387, sum loss: 7234.230469, avg loss: 3.823589, ppl: 45.768181 +epoch: 0, batch: 19388, sum loss: 4743.466309, avg loss: 3.048500, ppl: 21.083702 +epoch: 0, batch: 19389, sum loss: 6398.846680, avg loss: 3.438392, ppl: 31.136841 +epoch: 0, batch: 19390, sum loss: 4416.760254, avg loss: 3.033489, ppl: 20.769577 +epoch: 0, batch: 19391, sum loss: 5882.452637, avg loss: 3.351825, ppl: 28.554792 +epoch: 0, batch: 19392, sum loss: 4720.201660, avg loss: 3.061090, ppl: 21.350826 +epoch: 0, batch: 19393, sum loss: 5416.444336, avg loss: 3.298687, ppl: 27.077070 +epoch: 0, batch: 19394, sum loss: 4734.589355, avg loss: 2.973988, ppl: 19.569815 +epoch: 0, batch: 19395, sum loss: 6558.557617, avg loss: 3.468301, ppl: 32.082199 +epoch: 0, batch: 19396, sum loss: 6223.812988, avg loss: 3.358777, ppl: 28.753990 +epoch: 0, batch: 19397, sum loss: 5639.806641, avg loss: 3.393386, ppl: 29.766562 +epoch: 0, batch: 19398, sum loss: 6161.381836, avg loss: 3.438271, ppl: 31.133091 +epoch: 0, batch: 19399, sum loss: 5235.833984, avg loss: 3.142757, ppl: 23.167662 +epoch: 0, batch: 19400, sum loss: 4672.932129, avg loss: 3.181029, ppl: 24.071518 +epoch: 0, batch: 19401, sum loss: 6220.437988, avg loss: 3.291237, ppl: 26.876091 +epoch: 0, batch: 19402, sum loss: 5155.689453, avg loss: 3.103967, ppl: 22.286190 +epoch: 0, batch: 19403, sum loss: 5653.776855, avg loss: 3.424456, ppl: 30.705940 +epoch: 0, batch: 19404, sum loss: 4841.402832, avg loss: 2.997773, ppl: 20.040850 +epoch: 0, batch: 19405, sum loss: 5016.200684, avg loss: 3.199107, ppl: 24.510643 +epoch: 0, batch: 19406, sum loss: 5657.434082, avg loss: 3.253269, ppl: 25.874779 +epoch: 0, batch: 19407, sum loss: 5677.861328, avg loss: 3.387746, ppl: 29.599150 +epoch: 0, batch: 19408, sum loss: 5368.722168, avg loss: 3.263661, ppl: 26.145071 +epoch: 0, batch: 19409, sum loss: 5341.467773, avg loss: 3.258980, ppl: 26.022968 +epoch: 0, batch: 19410, sum loss: 5729.020996, avg loss: 3.330826, ppl: 27.961430 +epoch: 0, batch: 19411, sum loss: 5374.321289, avg loss: 3.289058, ppl: 26.817602 +epoch: 0, batch: 19412, sum loss: 5458.663086, avg loss: 3.286371, ppl: 26.745640 +epoch: 0, batch: 19413, sum loss: 5364.891113, avg loss: 3.152110, ppl: 23.385353 +epoch: 0, batch: 19414, sum loss: 5630.739258, avg loss: 3.351631, ppl: 28.549252 +epoch: 0, batch: 19415, sum loss: 4874.611328, avg loss: 3.183940, ppl: 24.141678 +epoch: 0, batch: 19416, sum loss: 5741.142578, avg loss: 3.341759, ppl: 28.268820 +epoch: 0, batch: 19417, sum loss: 5374.086914, avg loss: 3.420807, ppl: 30.594084 +epoch: 0, batch: 19418, sum loss: 5706.091797, avg loss: 3.161270, ppl: 23.600542 +epoch: 0, batch: 19419, sum loss: 5229.564941, avg loss: 3.012422, ppl: 20.336597 +epoch: 0, batch: 19420, sum loss: 5836.055176, avg loss: 3.565092, ppl: 35.342701 +epoch: 0, batch: 19421, sum loss: 5991.029297, avg loss: 3.433254, ppl: 30.977293 +epoch: 0, batch: 19422, sum loss: 5559.135742, avg loss: 3.137210, ppl: 23.039494 +epoch: 0, batch: 19423, sum loss: 6137.817383, avg loss: 3.062783, ppl: 21.386995 +epoch: 0, batch: 19424, sum loss: 5511.931152, avg loss: 3.340564, ppl: 28.235054 +epoch: 0, batch: 19425, sum loss: 5777.881836, avg loss: 3.236909, ppl: 25.454910 +epoch: 0, batch: 19426, sum loss: 5329.160156, avg loss: 3.017644, ppl: 20.443079 +epoch: 0, batch: 19427, sum loss: 5011.609863, avg loss: 3.132256, ppl: 22.925642 +epoch: 0, batch: 19428, sum loss: 4857.587891, avg loss: 3.103890, ppl: 22.284468 +epoch: 0, batch: 19429, sum loss: 5911.443359, avg loss: 3.379899, ppl: 29.367805 +epoch: 0, batch: 19430, sum loss: 5403.138672, avg loss: 3.333213, ppl: 28.028254 +epoch: 0, batch: 19431, sum loss: 4900.112793, avg loss: 2.966170, ppl: 19.417410 +epoch: 0, batch: 19432, sum loss: 4767.070312, avg loss: 3.046051, ppl: 21.032129 +epoch: 0, batch: 19433, sum loss: 5845.000000, avg loss: 3.404193, ppl: 30.090015 +epoch: 0, batch: 19434, sum loss: 4143.424805, avg loss: 2.692284, ppl: 14.765360 +epoch: 0, batch: 19435, sum loss: 5467.413086, avg loss: 3.182429, ppl: 24.105236 +epoch: 0, batch: 19436, sum loss: 5531.799805, avg loss: 3.534696, ppl: 34.284603 +epoch: 0, batch: 19437, sum loss: 4738.579590, avg loss: 3.037551, ppl: 20.854113 +epoch: 0, batch: 19438, sum loss: 6147.881836, avg loss: 3.551636, ppl: 34.870308 +epoch: 0, batch: 19439, sum loss: 5712.013672, avg loss: 3.656859, ppl: 38.739468 +epoch: 0, batch: 19440, sum loss: 4730.462402, avg loss: 2.941830, ppl: 18.950487 +epoch: 0, batch: 19441, sum loss: 5651.354004, avg loss: 3.467088, ppl: 32.043304 +epoch: 0, batch: 19442, sum loss: 4872.302734, avg loss: 3.022520, ppl: 20.543001 +epoch: 0, batch: 19443, sum loss: 5367.601074, avg loss: 3.313334, ppl: 27.476585 +epoch: 0, batch: 19444, sum loss: 5568.643555, avg loss: 3.445943, ppl: 31.372843 +epoch: 0, batch: 19445, sum loss: 5888.108398, avg loss: 3.319114, ppl: 27.635853 +epoch: 0, batch: 19446, sum loss: 6228.611328, avg loss: 3.411069, ppl: 30.297606 +epoch: 0, batch: 19447, sum loss: 4735.512695, avg loss: 3.136101, ppl: 23.013966 +epoch: 0, batch: 19448, sum loss: 5015.643066, avg loss: 3.148552, ppl: 23.302298 +epoch: 0, batch: 19449, sum loss: 4412.662109, avg loss: 3.010001, ppl: 20.287428 +epoch: 0, batch: 19450, sum loss: 6938.611328, avg loss: 3.538303, ppl: 34.408466 +epoch: 0, batch: 19451, sum loss: 5151.795410, avg loss: 3.156738, ppl: 23.493824 +epoch: 0, batch: 19452, sum loss: 5780.876953, avg loss: 3.353177, ppl: 28.593433 +epoch: 0, batch: 19453, sum loss: 5883.714844, avg loss: 3.362123, ppl: 28.850368 +epoch: 0, batch: 19454, sum loss: 6644.618164, avg loss: 3.745557, ppl: 42.332573 +epoch: 0, batch: 19455, sum loss: 5028.384277, avg loss: 2.870083, ppl: 17.638475 +epoch: 0, batch: 19456, sum loss: 5335.581055, avg loss: 3.281415, ppl: 26.613403 +epoch: 0, batch: 19457, sum loss: 6270.370605, avg loss: 3.212280, ppl: 24.835653 +epoch: 0, batch: 19458, sum loss: 4423.894531, avg loss: 2.852285, ppl: 17.327332 +epoch: 0, batch: 19459, sum loss: 5237.093262, avg loss: 3.216888, ppl: 24.950346 +epoch: 0, batch: 19460, sum loss: 5627.248535, avg loss: 3.286944, ppl: 26.760962 +epoch: 0, batch: 19461, sum loss: 5725.093750, avg loss: 3.273353, ppl: 26.399700 +epoch: 0, batch: 19462, sum loss: 5286.671875, avg loss: 3.192435, ppl: 24.347631 +epoch: 0, batch: 19463, sum loss: 5082.121094, avg loss: 2.949577, ppl: 19.097876 +epoch: 0, batch: 19464, sum loss: 4359.157227, avg loss: 3.124844, ppl: 22.756346 +epoch: 0, batch: 19465, sum loss: 6148.920410, avg loss: 3.361903, ppl: 28.844034 +epoch: 0, batch: 19466, sum loss: 5358.130859, avg loss: 3.206542, ppl: 24.693542 +epoch: 0, batch: 19467, sum loss: 5258.294922, avg loss: 3.015077, ppl: 20.390669 +epoch: 0, batch: 19468, sum loss: 5401.112305, avg loss: 3.216862, ppl: 24.949717 +epoch: 0, batch: 19469, sum loss: 4632.522461, avg loss: 3.136441, ppl: 23.021774 +epoch: 0, batch: 19470, sum loss: 5584.286133, avg loss: 3.498926, ppl: 33.079910 +epoch: 0, batch: 19471, sum loss: 4491.534668, avg loss: 2.941411, ppl: 18.942556 +epoch: 0, batch: 19472, sum loss: 5584.226562, avg loss: 3.400869, ppl: 29.990147 +epoch: 0, batch: 19473, sum loss: 6315.810059, avg loss: 3.313646, ppl: 27.485163 +epoch: 0, batch: 19474, sum loss: 5684.711914, avg loss: 3.233624, ppl: 25.371449 +epoch: 0, batch: 19475, sum loss: 5760.116211, avg loss: 3.100170, ppl: 22.201729 +epoch: 0, batch: 19476, sum loss: 4766.027832, avg loss: 3.268881, ppl: 26.281902 +epoch: 0, batch: 19477, sum loss: 6151.878418, avg loss: 3.543709, ppl: 34.594986 +epoch: 0, batch: 19478, sum loss: 5328.240723, avg loss: 3.289038, ppl: 26.817045 +epoch: 0, batch: 19479, sum loss: 5362.324707, avg loss: 3.014235, ppl: 20.373505 +epoch: 0, batch: 19480, sum loss: 6067.488770, avg loss: 3.382101, ppl: 29.432539 +epoch: 0, batch: 19481, sum loss: 4763.014648, avg loss: 2.965762, ppl: 19.409494 +epoch: 0, batch: 19482, sum loss: 5546.503906, avg loss: 3.315304, ppl: 27.530769 +epoch: 0, batch: 19483, sum loss: 6687.610352, avg loss: 3.527221, ppl: 34.029266 +epoch: 0, batch: 19484, sum loss: 6110.064453, avg loss: 3.270912, ppl: 26.335358 +epoch: 0, batch: 19485, sum loss: 5472.996094, avg loss: 3.132797, ppl: 22.938042 +epoch: 0, batch: 19486, sum loss: 5200.495605, avg loss: 3.366017, ppl: 28.962934 +epoch: 0, batch: 19487, sum loss: 6844.687012, avg loss: 3.499329, ppl: 33.093227 +epoch: 0, batch: 19488, sum loss: 4805.240234, avg loss: 3.150977, ppl: 23.358879 +epoch: 0, batch: 19489, sum loss: 6032.695312, avg loss: 3.449225, ppl: 31.476002 +epoch: 0, batch: 19490, sum loss: 4758.215332, avg loss: 3.038452, ppl: 20.872900 +epoch: 0, batch: 19491, sum loss: 5126.364746, avg loss: 3.139231, ppl: 23.086111 +epoch: 0, batch: 19492, sum loss: 5126.220703, avg loss: 2.937662, ppl: 18.871675 +epoch: 0, batch: 19493, sum loss: 5575.777832, avg loss: 3.270251, ppl: 26.317945 +epoch: 0, batch: 19494, sum loss: 4936.181641, avg loss: 3.006201, ppl: 20.210470 +epoch: 0, batch: 19495, sum loss: 4994.556152, avg loss: 3.290221, ppl: 26.848808 +epoch: 0, batch: 19496, sum loss: 6478.333984, avg loss: 3.413243, ppl: 30.363541 +epoch: 0, batch: 19497, sum loss: 4842.969727, avg loss: 3.335379, ppl: 28.089018 +epoch: 0, batch: 19498, sum loss: 5457.893066, avg loss: 3.176888, ppl: 23.972031 +epoch: 0, batch: 19499, sum loss: 5961.117188, avg loss: 3.330233, ppl: 27.944849 +epoch: 0, batch: 19500, sum loss: 5665.084961, avg loss: 3.376094, ppl: 29.256262 +epoch: 0, batch: 19501, sum loss: 4385.852051, avg loss: 2.971445, ppl: 19.520098 +epoch: 0, batch: 19502, sum loss: 4416.918945, avg loss: 3.090916, ppl: 21.997221 +epoch: 0, batch: 19503, sum loss: 6159.353027, avg loss: 3.399201, ppl: 29.940180 +epoch: 0, batch: 19504, sum loss: 5354.157715, avg loss: 3.248882, ppl: 25.761524 +epoch: 0, batch: 19505, sum loss: 4858.424805, avg loss: 3.116373, ppl: 22.564381 +epoch: 0, batch: 19506, sum loss: 5833.443848, avg loss: 3.377790, ppl: 29.305946 +epoch: 0, batch: 19507, sum loss: 5794.030762, avg loss: 3.446776, ppl: 31.399004 +epoch: 0, batch: 19508, sum loss: 4817.655762, avg loss: 3.053014, ppl: 21.179077 +epoch: 0, batch: 19509, sum loss: 4590.227539, avg loss: 3.035865, ppl: 20.818979 +epoch: 0, batch: 19510, sum loss: 5783.392090, avg loss: 3.396002, ppl: 29.844551 +epoch: 0, batch: 19511, sum loss: 5328.871094, avg loss: 3.215975, ppl: 24.927591 +epoch: 0, batch: 19512, sum loss: 4229.411133, avg loss: 2.991097, ppl: 19.907509 +epoch: 0, batch: 19513, sum loss: 4785.667969, avg loss: 2.979868, ppl: 19.685217 +epoch: 0, batch: 19514, sum loss: 4892.469238, avg loss: 3.116222, ppl: 22.560991 +epoch: 0, batch: 19515, sum loss: 5178.026367, avg loss: 3.126827, ppl: 22.801527 +epoch: 0, batch: 19516, sum loss: 5537.626465, avg loss: 3.282529, ppl: 26.643070 +epoch: 0, batch: 19517, sum loss: 5165.321777, avg loss: 3.188470, ppl: 24.251303 +epoch: 0, batch: 19518, sum loss: 4708.891602, avg loss: 3.057722, ppl: 21.279026 +epoch: 0, batch: 19519, sum loss: 5535.737305, avg loss: 3.318787, ppl: 27.626827 +epoch: 0, batch: 19520, sum loss: 5169.561523, avg loss: 3.356858, ppl: 28.698877 +epoch: 0, batch: 19521, sum loss: 5976.990234, avg loss: 3.415423, ppl: 30.429815 +epoch: 0, batch: 19522, sum loss: 6307.289551, avg loss: 3.231194, ppl: 25.309847 +epoch: 0, batch: 19523, sum loss: 4818.435059, avg loss: 3.176292, ppl: 23.957752 +epoch: 0, batch: 19524, sum loss: 5241.763672, avg loss: 3.155788, ppl: 23.471523 +epoch: 0, batch: 19525, sum loss: 5328.905762, avg loss: 3.067879, ppl: 21.496260 +epoch: 0, batch: 19526, sum loss: 6067.521484, avg loss: 3.339307, ppl: 28.199587 +epoch: 0, batch: 19527, sum loss: 5103.649414, avg loss: 3.108191, ppl: 22.380522 +epoch: 0, batch: 19528, sum loss: 5780.517578, avg loss: 3.301267, ppl: 27.147003 +epoch: 0, batch: 19529, sum loss: 6491.919922, avg loss: 3.522474, ppl: 33.868126 +epoch: 0, batch: 19530, sum loss: 5296.447266, avg loss: 3.251349, ppl: 25.825148 +epoch: 0, batch: 19531, sum loss: 5098.435059, avg loss: 3.153021, ppl: 23.406672 +epoch: 0, batch: 19532, sum loss: 6183.729492, avg loss: 3.263182, ppl: 26.132551 +epoch: 0, batch: 19533, sum loss: 5940.772461, avg loss: 3.326300, ppl: 27.835171 +epoch: 0, batch: 19534, sum loss: 4827.537109, avg loss: 3.163524, ppl: 23.653797 +epoch: 0, batch: 19535, sum loss: 5718.513672, avg loss: 3.357906, ppl: 28.728966 +epoch: 0, batch: 19536, sum loss: 5630.431641, avg loss: 3.204571, ppl: 24.644932 +epoch: 0, batch: 19537, sum loss: 4975.993164, avg loss: 3.077300, ppl: 21.699726 +epoch: 0, batch: 19538, sum loss: 5210.493652, avg loss: 3.133189, ppl: 22.947046 +epoch: 0, batch: 19539, sum loss: 6353.265137, avg loss: 3.496569, ppl: 33.002018 +epoch: 0, batch: 19540, sum loss: 5077.543457, avg loss: 3.310002, ppl: 27.385183 +epoch: 0, batch: 19541, sum loss: 5764.827637, avg loss: 3.614312, ppl: 37.125793 +epoch: 0, batch: 19542, sum loss: 5052.489746, avg loss: 3.138192, ppl: 23.062143 +epoch: 0, batch: 19543, sum loss: 4014.842285, avg loss: 3.211874, ppl: 24.825565 +epoch: 0, batch: 19544, sum loss: 5132.488770, avg loss: 3.332785, ppl: 28.016256 +epoch: 0, batch: 19545, sum loss: 4551.511719, avg loss: 3.167371, ppl: 23.744972 +epoch: 0, batch: 19546, sum loss: 5825.796387, avg loss: 3.289552, ppl: 26.830833 +epoch: 0, batch: 19547, sum loss: 5073.447266, avg loss: 3.311650, ppl: 27.430344 +epoch: 0, batch: 19548, sum loss: 5262.755859, avg loss: 3.040298, ppl: 20.911474 +epoch: 0, batch: 19549, sum loss: 4770.130859, avg loss: 3.163217, ppl: 23.646536 +epoch: 0, batch: 19550, sum loss: 5964.937500, avg loss: 3.191513, ppl: 24.325199 +epoch: 0, batch: 19551, sum loss: 6173.468262, avg loss: 3.282014, ppl: 26.629347 +epoch: 0, batch: 19552, sum loss: 5560.538574, avg loss: 3.288314, ppl: 26.797642 +epoch: 0, batch: 19553, sum loss: 5254.871582, avg loss: 3.202237, ppl: 24.587481 +epoch: 0, batch: 19554, sum loss: 5058.362793, avg loss: 3.590038, ppl: 36.235439 +epoch: 0, batch: 19555, sum loss: 5239.315918, avg loss: 3.146736, ppl: 23.260029 +epoch: 0, batch: 19556, sum loss: 4971.650879, avg loss: 3.257963, ppl: 25.996521 +epoch: 0, batch: 19557, sum loss: 4658.509766, avg loss: 3.089197, ppl: 21.959446 +epoch: 0, batch: 19558, sum loss: 5443.212891, avg loss: 3.201890, ppl: 24.578947 +epoch: 0, batch: 19559, sum loss: 5634.655762, avg loss: 3.452608, ppl: 31.582649 +epoch: 0, batch: 19560, sum loss: 5880.662598, avg loss: 3.254379, ppl: 25.903511 +epoch: 0, batch: 19561, sum loss: 5104.774902, avg loss: 3.149152, ppl: 23.316275 +epoch: 0, batch: 19562, sum loss: 5921.903320, avg loss: 3.407309, ppl: 30.183918 +epoch: 0, batch: 19563, sum loss: 5054.711426, avg loss: 3.213421, ppl: 24.864004 +epoch: 0, batch: 19564, sum loss: 4925.756836, avg loss: 3.121519, ppl: 22.680801 +epoch: 0, batch: 19565, sum loss: 6053.857910, avg loss: 3.479229, ppl: 32.434696 +epoch: 0, batch: 19566, sum loss: 5001.922363, avg loss: 3.126201, ppl: 22.787254 +epoch: 0, batch: 19567, sum loss: 5487.906738, avg loss: 3.274407, ppl: 26.427559 +epoch: 0, batch: 19568, sum loss: 5966.394531, avg loss: 3.507581, ppl: 33.367439 +epoch: 0, batch: 19569, sum loss: 5926.733398, avg loss: 3.439776, ppl: 31.179964 +epoch: 0, batch: 19570, sum loss: 4700.234375, avg loss: 3.074058, ppl: 21.629499 +epoch: 0, batch: 19571, sum loss: 5470.304688, avg loss: 3.020599, ppl: 20.503572 +epoch: 0, batch: 19572, sum loss: 6036.343262, avg loss: 3.642935, ppl: 38.203793 +epoch: 0, batch: 19573, sum loss: 5438.332520, avg loss: 3.216045, ppl: 24.929338 +epoch: 0, batch: 19574, sum loss: 4000.350586, avg loss: 2.803329, ppl: 16.499487 +epoch: 0, batch: 19575, sum loss: 5827.613281, avg loss: 3.273940, ppl: 26.415213 +epoch: 0, batch: 19576, sum loss: 6112.012207, avg loss: 3.490584, ppl: 32.805096 +epoch: 0, batch: 19577, sum loss: 4997.018066, avg loss: 3.318073, ppl: 27.607107 +epoch: 0, batch: 19578, sum loss: 5689.406250, avg loss: 3.282981, ppl: 26.655111 +epoch: 0, batch: 19579, sum loss: 5454.862793, avg loss: 3.214415, ppl: 24.888735 +epoch: 0, batch: 19580, sum loss: 4947.669434, avg loss: 3.198235, ppl: 24.489269 +epoch: 0, batch: 19581, sum loss: 5238.781250, avg loss: 3.144526, ppl: 23.208683 +epoch: 0, batch: 19582, sum loss: 5674.680176, avg loss: 3.248243, ppl: 25.745056 +epoch: 0, batch: 19583, sum loss: 5808.441406, avg loss: 3.212634, ppl: 24.844431 +epoch: 0, batch: 19584, sum loss: 5789.065918, avg loss: 3.100732, ppl: 22.214197 +epoch: 0, batch: 19585, sum loss: 4440.265625, avg loss: 3.129151, ppl: 22.854576 +epoch: 0, batch: 19586, sum loss: 4745.937500, avg loss: 3.052050, ppl: 21.158678 +epoch: 0, batch: 19587, sum loss: 6156.227539, avg loss: 3.491904, ppl: 32.848450 +epoch: 0, batch: 19588, sum loss: 5716.889160, avg loss: 3.333463, ppl: 28.035259 +epoch: 0, batch: 19589, sum loss: 5856.900879, avg loss: 3.439167, ppl: 31.160984 +epoch: 0, batch: 19590, sum loss: 5974.833496, avg loss: 3.343499, ppl: 28.318050 +epoch: 0, batch: 19591, sum loss: 6147.202148, avg loss: 3.453484, ppl: 31.610340 +epoch: 0, batch: 19592, sum loss: 6473.959961, avg loss: 3.350911, ppl: 28.528709 +epoch: 0, batch: 19593, sum loss: 5556.792969, avg loss: 3.323441, ppl: 27.755695 +epoch: 0, batch: 19594, sum loss: 5054.203125, avg loss: 3.206982, ppl: 24.704407 +epoch: 0, batch: 19595, sum loss: 5288.100586, avg loss: 3.101525, ppl: 22.231836 +epoch: 0, batch: 19596, sum loss: 6187.130371, avg loss: 3.390208, ppl: 29.672138 +epoch: 0, batch: 19597, sum loss: 6035.096191, avg loss: 3.396227, ppl: 29.851269 +epoch: 0, batch: 19598, sum loss: 4262.768066, avg loss: 2.841845, ppl: 17.147379 +epoch: 0, batch: 19599, sum loss: 5409.081055, avg loss: 3.194968, ppl: 24.409397 +epoch: 0, batch: 19600, sum loss: 6836.280273, avg loss: 3.387651, ppl: 29.596361 +epoch: 0, batch: 19601, sum loss: 5867.971680, avg loss: 3.305899, ppl: 27.273060 +epoch: 0, batch: 19602, sum loss: 5498.479004, avg loss: 3.125912, ppl: 22.780661 +epoch: 0, batch: 19603, sum loss: 5400.208984, avg loss: 3.302880, ppl: 27.190838 +epoch: 0, batch: 19604, sum loss: 4588.132812, avg loss: 3.172983, ppl: 23.878601 +epoch: 0, batch: 19605, sum loss: 4933.606934, avg loss: 3.172738, ppl: 23.872755 +epoch: 0, batch: 19606, sum loss: 4794.630371, avg loss: 3.171052, ppl: 23.832542 +epoch: 0, batch: 19607, sum loss: 7272.423828, avg loss: 3.496358, ppl: 32.995056 +epoch: 0, batch: 19608, sum loss: 7038.881348, avg loss: 3.535350, ppl: 34.307014 +epoch: 0, batch: 19609, sum loss: 4568.919922, avg loss: 2.978435, ppl: 19.657034 +epoch: 0, batch: 19610, sum loss: 4577.499023, avg loss: 2.808282, ppl: 16.581402 +epoch: 0, batch: 19611, sum loss: 4555.791992, avg loss: 3.161549, ppl: 23.607138 +epoch: 0, batch: 19612, sum loss: 4105.016602, avg loss: 2.899023, ppl: 18.156399 +epoch: 0, batch: 19613, sum loss: 5808.033691, avg loss: 3.262940, ppl: 26.126245 +epoch: 0, batch: 19614, sum loss: 5283.124023, avg loss: 3.444018, ppl: 31.312523 +epoch: 0, batch: 19615, sum loss: 5112.458008, avg loss: 3.518553, ppl: 33.735596 +epoch: 0, batch: 19616, sum loss: 5526.371094, avg loss: 3.480083, ppl: 32.462410 +epoch: 0, batch: 19617, sum loss: 4638.342285, avg loss: 3.250415, ppl: 25.801048 +epoch: 0, batch: 19618, sum loss: 4404.503418, avg loss: 3.004436, ppl: 20.174839 +epoch: 0, batch: 19619, sum loss: 6441.438965, avg loss: 3.502686, ppl: 33.204514 +epoch: 0, batch: 19620, sum loss: 5617.147461, avg loss: 3.296448, ppl: 27.016506 +epoch: 0, batch: 19621, sum loss: 4729.678223, avg loss: 3.033790, ppl: 20.775822 +epoch: 0, batch: 19622, sum loss: 6089.052246, avg loss: 3.307470, ppl: 27.315931 +epoch: 0, batch: 19623, sum loss: 3727.794922, avg loss: 2.703259, ppl: 14.928310 +epoch: 0, batch: 19624, sum loss: 5869.819824, avg loss: 3.379286, ppl: 29.349810 +epoch: 0, batch: 19625, sum loss: 4720.143555, avg loss: 3.146762, ppl: 23.260633 +epoch: 0, batch: 19626, sum loss: 5361.767090, avg loss: 2.952515, ppl: 19.154064 +epoch: 0, batch: 19627, sum loss: 4398.497070, avg loss: 3.150786, ppl: 23.354412 +epoch: 0, batch: 19628, sum loss: 6010.863770, avg loss: 3.382591, ppl: 29.446962 +epoch: 0, batch: 19629, sum loss: 5496.758789, avg loss: 3.109027, ppl: 22.399233 +epoch: 0, batch: 19630, sum loss: 5609.518555, avg loss: 3.361006, ppl: 28.818161 +epoch: 0, batch: 19631, sum loss: 4713.677734, avg loss: 2.968311, ppl: 19.459026 +epoch: 0, batch: 19632, sum loss: 6115.304199, avg loss: 3.239038, ppl: 25.509176 +epoch: 0, batch: 19633, sum loss: 6359.339355, avg loss: 3.296703, ppl: 27.023386 +epoch: 0, batch: 19634, sum loss: 5797.506348, avg loss: 3.353098, ppl: 28.591169 +epoch: 0, batch: 19635, sum loss: 4887.344238, avg loss: 3.262579, ppl: 26.116817 +epoch: 0, batch: 19636, sum loss: 4962.926270, avg loss: 3.026175, ppl: 20.618208 +epoch: 0, batch: 19637, sum loss: 4410.745117, avg loss: 3.137088, ppl: 23.036676 +epoch: 0, batch: 19638, sum loss: 6048.945801, avg loss: 3.396376, ppl: 29.855711 +epoch: 0, batch: 19639, sum loss: 5728.528320, avg loss: 3.010262, ppl: 20.292711 +epoch: 0, batch: 19640, sum loss: 5018.175781, avg loss: 3.336553, ppl: 28.122019 +epoch: 0, batch: 19641, sum loss: 4671.636719, avg loss: 3.015905, ppl: 20.407549 +epoch: 0, batch: 19642, sum loss: 4732.569336, avg loss: 3.431885, ppl: 30.934893 +epoch: 0, batch: 19643, sum loss: 5425.627441, avg loss: 3.167325, ppl: 23.743879 +epoch: 0, batch: 19644, sum loss: 6282.357910, avg loss: 3.488261, ppl: 32.728973 +epoch: 0, batch: 19645, sum loss: 5643.006836, avg loss: 3.129787, ppl: 22.869118 +epoch: 0, batch: 19646, sum loss: 6053.022461, avg loss: 3.324010, ppl: 27.771496 +epoch: 0, batch: 19647, sum loss: 5200.402344, avg loss: 3.348617, ppl: 28.463350 +epoch: 0, batch: 19648, sum loss: 4596.085938, avg loss: 3.154486, ppl: 23.440979 +epoch: 0, batch: 19649, sum loss: 6663.202148, avg loss: 3.279135, ppl: 26.552794 +epoch: 0, batch: 19650, sum loss: 4696.500488, avg loss: 3.214580, ppl: 24.892824 +epoch: 0, batch: 19651, sum loss: 5306.208008, avg loss: 3.154701, ppl: 23.446037 +epoch: 0, batch: 19652, sum loss: 6234.563477, avg loss: 3.469429, ppl: 32.118397 +epoch: 0, batch: 19653, sum loss: 4625.140137, avg loss: 2.947827, ppl: 19.064474 +epoch: 0, batch: 19654, sum loss: 4573.511719, avg loss: 3.165060, ppl: 23.690166 +epoch: 0, batch: 19655, sum loss: 5660.176758, avg loss: 3.430410, ppl: 30.889309 +epoch: 0, batch: 19656, sum loss: 5813.793457, avg loss: 3.280922, ppl: 26.600285 +epoch: 0, batch: 19657, sum loss: 6090.086914, avg loss: 3.413726, ppl: 30.378225 +epoch: 0, batch: 19658, sum loss: 6070.486328, avg loss: 3.451101, ppl: 31.535088 +epoch: 0, batch: 19659, sum loss: 5131.280273, avg loss: 3.310503, ppl: 27.398916 +epoch: 0, batch: 19660, sum loss: 5037.897461, avg loss: 3.271362, ppl: 26.347195 +epoch: 0, batch: 19661, sum loss: 5771.709961, avg loss: 3.451980, ppl: 31.562820 +epoch: 0, batch: 19662, sum loss: 5952.167480, avg loss: 3.409031, ppl: 30.235922 +epoch: 0, batch: 19663, sum loss: 6325.996094, avg loss: 3.130132, ppl: 22.876999 +epoch: 0, batch: 19664, sum loss: 5700.636230, avg loss: 3.159998, ppl: 23.570547 +epoch: 0, batch: 19665, sum loss: 5354.058105, avg loss: 3.337942, ppl: 28.161102 +epoch: 0, batch: 19666, sum loss: 6045.704102, avg loss: 3.385053, ppl: 29.519554 +epoch: 0, batch: 19667, sum loss: 5935.695312, avg loss: 3.499820, ppl: 33.109509 +epoch: 0, batch: 19668, sum loss: 6608.725586, avg loss: 3.621220, ppl: 37.383133 +epoch: 0, batch: 19669, sum loss: 6400.502930, avg loss: 3.382930, ppl: 29.456942 +epoch: 0, batch: 19670, sum loss: 6136.777832, avg loss: 3.311807, ppl: 27.434647 +epoch: 0, batch: 19671, sum loss: 5768.305664, avg loss: 3.224319, ppl: 25.136438 +epoch: 0, batch: 19672, sum loss: 4845.914551, avg loss: 3.156948, ppl: 23.498758 +epoch: 0, batch: 19673, sum loss: 5919.906250, avg loss: 3.421911, ppl: 30.627890 +epoch: 0, batch: 19674, sum loss: 5923.965820, avg loss: 3.242455, ppl: 25.596491 +epoch: 0, batch: 19675, sum loss: 6027.807617, avg loss: 3.352507, ppl: 28.574276 +epoch: 0, batch: 19676, sum loss: 5628.083984, avg loss: 2.884718, ppl: 17.898514 +epoch: 0, batch: 19677, sum loss: 6278.904297, avg loss: 3.436729, ppl: 31.085117 +epoch: 0, batch: 19678, sum loss: 5450.197754, avg loss: 3.232620, ppl: 25.345984 +epoch: 0, batch: 19679, sum loss: 4277.019043, avg loss: 2.830588, ppl: 16.955433 +epoch: 0, batch: 19680, sum loss: 5628.989746, avg loss: 3.196473, ppl: 24.446165 +epoch: 0, batch: 19681, sum loss: 3996.646484, avg loss: 2.900324, ppl: 18.180037 +epoch: 0, batch: 19682, sum loss: 5541.408691, avg loss: 3.336188, ppl: 28.111769 +epoch: 0, batch: 19683, sum loss: 6028.427734, avg loss: 3.546134, ppl: 34.678997 +epoch: 0, batch: 19684, sum loss: 5743.144043, avg loss: 3.187094, ppl: 24.217958 +epoch: 0, batch: 19685, sum loss: 6875.179688, avg loss: 3.670678, ppl: 39.278522 +epoch: 0, batch: 19686, sum loss: 6054.264160, avg loss: 3.430178, ppl: 30.882137 +epoch: 0, batch: 19687, sum loss: 5817.296875, avg loss: 3.508623, ppl: 33.402237 +epoch: 0, batch: 19688, sum loss: 4921.881348, avg loss: 3.198104, ppl: 24.486052 +epoch: 0, batch: 19689, sum loss: 6661.466797, avg loss: 3.381455, ppl: 29.413549 +epoch: 0, batch: 19690, sum loss: 5827.515625, avg loss: 3.347223, ppl: 28.423698 +epoch: 0, batch: 19691, sum loss: 5511.163086, avg loss: 3.280454, ppl: 26.587851 +epoch: 0, batch: 19692, sum loss: 6315.657227, avg loss: 3.497042, ppl: 33.017632 +epoch: 0, batch: 19693, sum loss: 5845.340820, avg loss: 3.487674, ppl: 32.709770 +epoch: 0, batch: 19694, sum loss: 5141.096191, avg loss: 2.992489, ppl: 19.935242 +epoch: 0, batch: 19695, sum loss: 5874.791504, avg loss: 3.482390, ppl: 32.537392 +epoch: 0, batch: 19696, sum loss: 4906.033691, avg loss: 3.058625, ppl: 21.298241 +epoch: 0, batch: 19697, sum loss: 4702.234375, avg loss: 3.023945, ppl: 20.572292 +epoch: 0, batch: 19698, sum loss: 5588.042969, avg loss: 3.314379, ppl: 27.505308 +epoch: 0, batch: 19699, sum loss: 6670.521973, avg loss: 3.374063, ppl: 29.196907 +epoch: 0, batch: 19700, sum loss: 6618.908203, avg loss: 3.543313, ppl: 34.581291 +epoch: 0, batch: 19701, sum loss: 7335.162109, avg loss: 3.552137, ppl: 34.887783 +epoch: 0, batch: 19702, sum loss: 6031.550293, avg loss: 3.237547, ppl: 25.471169 +epoch: 0, batch: 19703, sum loss: 5344.375488, avg loss: 3.233137, ppl: 25.359081 +epoch: 0, batch: 19704, sum loss: 4808.854980, avg loss: 2.979464, ppl: 19.677267 +epoch: 0, batch: 19705, sum loss: 6108.915527, avg loss: 3.660225, ppl: 38.870094 +epoch: 0, batch: 19706, sum loss: 5747.899902, avg loss: 3.243736, ppl: 25.629295 +epoch: 0, batch: 19707, sum loss: 5815.041016, avg loss: 3.402599, ppl: 30.042067 +epoch: 0, batch: 19708, sum loss: 7225.557129, avg loss: 3.462174, ppl: 31.886227 +epoch: 0, batch: 19709, sum loss: 5034.877441, avg loss: 3.395062, ppl: 29.816515 +epoch: 0, batch: 19710, sum loss: 5234.508789, avg loss: 3.032740, ppl: 20.754013 +epoch: 0, batch: 19711, sum loss: 5148.714355, avg loss: 3.101635, ppl: 22.234280 +epoch: 0, batch: 19712, sum loss: 4999.018066, avg loss: 3.373157, ppl: 29.170467 +epoch: 0, batch: 19713, sum loss: 5325.345703, avg loss: 3.162319, ppl: 23.625330 +epoch: 0, batch: 19714, sum loss: 5539.007324, avg loss: 3.194352, ppl: 24.394371 +epoch: 0, batch: 19715, sum loss: 4656.970703, avg loss: 3.041784, ppl: 20.942568 +epoch: 0, batch: 19716, sum loss: 4949.588867, avg loss: 3.418225, ppl: 30.515190 +epoch: 0, batch: 19717, sum loss: 5459.740723, avg loss: 3.363981, ppl: 28.904022 +epoch: 0, batch: 19718, sum loss: 5361.742188, avg loss: 3.380670, ppl: 29.390451 +epoch: 0, batch: 19719, sum loss: 6122.955078, avg loss: 3.445670, ppl: 31.364279 +epoch: 0, batch: 19720, sum loss: 5517.179199, avg loss: 3.446083, ppl: 31.377256 +epoch: 0, batch: 19721, sum loss: 5330.614258, avg loss: 3.013349, ppl: 20.355457 +epoch: 0, batch: 19722, sum loss: 4821.939941, avg loss: 3.214627, ppl: 24.893993 +epoch: 0, batch: 19723, sum loss: 5316.741699, avg loss: 3.170388, ppl: 23.816734 +epoch: 0, batch: 19724, sum loss: 5250.072754, avg loss: 3.240786, ppl: 25.553795 +epoch: 0, batch: 19725, sum loss: 5363.205566, avg loss: 3.196189, ppl: 24.439219 +epoch: 0, batch: 19726, sum loss: 5886.332031, avg loss: 3.221857, ppl: 25.074631 +epoch: 0, batch: 19727, sum loss: 5600.882324, avg loss: 3.448819, ppl: 31.463217 +epoch: 0, batch: 19728, sum loss: 4322.692871, avg loss: 3.139210, ppl: 23.085632 +epoch: 0, batch: 19729, sum loss: 6125.693359, avg loss: 3.386232, ppl: 29.554386 +epoch: 0, batch: 19730, sum loss: 7056.285156, avg loss: 3.390815, ppl: 29.690126 +epoch: 0, batch: 19731, sum loss: 5911.315430, avg loss: 3.368271, ppl: 29.028290 +epoch: 0, batch: 19732, sum loss: 5399.744629, avg loss: 3.219883, ppl: 25.025204 +epoch: 0, batch: 19733, sum loss: 6100.355957, avg loss: 3.448477, ppl: 31.452461 +epoch: 0, batch: 19734, sum loss: 6257.928711, avg loss: 3.425248, ppl: 30.730276 +epoch: 0, batch: 19735, sum loss: 5377.664551, avg loss: 3.265127, ppl: 26.183441 +epoch: 0, batch: 19736, sum loss: 5150.246094, avg loss: 3.111931, ppl: 22.464388 +epoch: 0, batch: 19737, sum loss: 5921.163086, avg loss: 3.404924, ppl: 30.112011 +epoch: 0, batch: 19738, sum loss: 5685.523926, avg loss: 3.217614, ppl: 24.968479 +epoch: 0, batch: 19739, sum loss: 5594.604004, avg loss: 3.334090, ppl: 28.052856 +epoch: 0, batch: 19740, sum loss: 6458.652832, avg loss: 3.339531, ppl: 28.205893 +epoch: 0, batch: 19741, sum loss: 6257.066895, avg loss: 3.503397, ppl: 33.228134 +epoch: 0, batch: 19742, sum loss: 6093.072266, avg loss: 3.444360, ppl: 31.323223 +epoch: 0, batch: 19743, sum loss: 5792.406250, avg loss: 3.357917, ppl: 28.729280 +epoch: 0, batch: 19744, sum loss: 5576.821289, avg loss: 3.194056, ppl: 24.387136 +epoch: 0, batch: 19745, sum loss: 5298.162109, avg loss: 3.191664, ppl: 24.328876 +epoch: 0, batch: 19746, sum loss: 6851.018066, avg loss: 3.396638, ppl: 29.863520 +epoch: 0, batch: 19747, sum loss: 5190.600586, avg loss: 3.113738, ppl: 22.505001 +epoch: 0, batch: 19748, sum loss: 6194.728516, avg loss: 3.134984, ppl: 22.988274 +epoch: 0, batch: 19749, sum loss: 6215.761230, avg loss: 3.476377, ppl: 32.342319 +epoch: 0, batch: 19750, sum loss: 6270.743652, avg loss: 3.344397, ppl: 28.343468 +epoch: 0, batch: 19751, sum loss: 6393.104492, avg loss: 3.424266, ppl: 30.700098 +epoch: 0, batch: 19752, sum loss: 5792.760254, avg loss: 3.468719, ppl: 32.095592 +epoch: 0, batch: 19753, sum loss: 6157.100586, avg loss: 3.322774, ppl: 27.737192 +epoch: 0, batch: 19754, sum loss: 5802.574219, avg loss: 3.241662, ppl: 25.576183 +epoch: 0, batch: 19755, sum loss: 5233.612305, avg loss: 3.310318, ppl: 27.393822 +epoch: 0, batch: 19756, sum loss: 5160.949219, avg loss: 3.032285, ppl: 20.744579 +epoch: 0, batch: 19757, sum loss: 5712.567871, avg loss: 3.346554, ppl: 28.404684 +epoch: 0, batch: 19758, sum loss: 5125.431152, avg loss: 3.110092, ppl: 22.423100 +epoch: 0, batch: 19759, sum loss: 5088.076172, avg loss: 3.374056, ppl: 29.196705 +epoch: 0, batch: 19760, sum loss: 5790.197754, avg loss: 3.227535, ppl: 25.217419 +epoch: 0, batch: 19761, sum loss: 5198.354004, avg loss: 3.236833, ppl: 25.452988 +epoch: 0, batch: 19762, sum loss: 5007.354980, avg loss: 3.245208, ppl: 25.667036 +epoch: 0, batch: 19763, sum loss: 4767.241699, avg loss: 3.152938, ppl: 23.404724 +epoch: 0, batch: 19764, sum loss: 5763.223633, avg loss: 3.283888, ppl: 26.679302 +epoch: 0, batch: 19765, sum loss: 5814.897949, avg loss: 3.359271, ppl: 28.768206 +epoch: 0, batch: 19766, sum loss: 5117.654785, avg loss: 3.208561, ppl: 24.743454 +epoch: 0, batch: 19767, sum loss: 6658.456055, avg loss: 3.332561, ppl: 28.009970 +epoch: 0, batch: 19768, sum loss: 5437.408691, avg loss: 2.931218, ppl: 18.750448 +epoch: 0, batch: 19769, sum loss: 5327.750977, avg loss: 3.300961, ppl: 27.138708 +epoch: 0, batch: 19770, sum loss: 5093.492676, avg loss: 3.070219, ppl: 21.546612 +epoch: 0, batch: 19771, sum loss: 5755.492676, avg loss: 3.255369, ppl: 25.929186 +epoch: 0, batch: 19772, sum loss: 5995.413086, avg loss: 3.308727, ppl: 27.350281 +epoch: 0, batch: 19773, sum loss: 5954.092285, avg loss: 3.537785, ppl: 34.390663 +epoch: 0, batch: 19774, sum loss: 5433.257812, avg loss: 3.370507, ppl: 29.093281 +epoch: 0, batch: 19775, sum loss: 4885.180176, avg loss: 3.197108, ppl: 24.461674 +epoch: 0, batch: 19776, sum loss: 4078.025146, avg loss: 3.098803, ppl: 22.171402 +epoch: 0, batch: 19777, sum loss: 6281.791992, avg loss: 3.573260, ppl: 35.632576 +epoch: 0, batch: 19778, sum loss: 5952.311523, avg loss: 3.462660, ppl: 31.901709 +epoch: 0, batch: 19779, sum loss: 4791.537598, avg loss: 3.046114, ppl: 21.033453 +epoch: 0, batch: 19780, sum loss: 5513.676270, avg loss: 3.125667, ppl: 22.775078 +epoch: 0, batch: 19781, sum loss: 5620.290039, avg loss: 3.379609, ppl: 29.359293 +epoch: 0, batch: 19782, sum loss: 5372.318848, avg loss: 3.283814, ppl: 26.677319 +epoch: 0, batch: 19783, sum loss: 6705.889648, avg loss: 3.611141, ppl: 37.008263 +epoch: 0, batch: 19784, sum loss: 6739.550781, avg loss: 3.499248, ppl: 33.090553 +epoch: 0, batch: 19785, sum loss: 5710.754883, avg loss: 3.417567, ppl: 30.495146 +epoch: 0, batch: 19786, sum loss: 5874.402832, avg loss: 3.478036, ppl: 32.396038 +epoch: 0, batch: 19787, sum loss: 5926.436523, avg loss: 3.301636, ppl: 27.157030 +epoch: 0, batch: 19788, sum loss: 4543.643066, avg loss: 3.202004, ppl: 24.581736 +epoch: 0, batch: 19789, sum loss: 5573.625000, avg loss: 3.251823, ppl: 25.837404 +epoch: 0, batch: 19790, sum loss: 5081.900391, avg loss: 3.372197, ppl: 29.142473 +epoch: 0, batch: 19791, sum loss: 4657.531250, avg loss: 2.949672, ppl: 19.099678 +epoch: 0, batch: 19792, sum loss: 5380.215332, avg loss: 3.097418, ppl: 22.140717 +epoch: 0, batch: 19793, sum loss: 4555.420410, avg loss: 3.055279, ppl: 21.227102 +epoch: 0, batch: 19794, sum loss: 5649.973145, avg loss: 3.309885, ppl: 27.381971 +epoch: 0, batch: 19795, sum loss: 5877.025879, avg loss: 3.551073, ppl: 34.850697 +epoch: 0, batch: 19796, sum loss: 5318.731445, avg loss: 3.123154, ppl: 22.717922 +epoch: 0, batch: 19797, sum loss: 4878.261230, avg loss: 3.137146, ppl: 23.038017 +epoch: 0, batch: 19798, sum loss: 5377.193848, avg loss: 3.166781, ppl: 23.730970 +epoch: 0, batch: 19799, sum loss: 6152.246582, avg loss: 3.159860, ppl: 23.567293 +epoch: 0, batch: 19800, sum loss: 6343.127930, avg loss: 3.129318, ppl: 22.858391 +epoch: 0, batch: 19801, sum loss: 5481.460938, avg loss: 3.076016, ppl: 21.671898 +epoch: 0, batch: 19802, sum loss: 5788.131348, avg loss: 3.128720, ppl: 22.844715 +epoch: 0, batch: 19803, sum loss: 5410.855957, avg loss: 3.418102, ppl: 30.511457 +epoch: 0, batch: 19804, sum loss: 6115.167969, avg loss: 3.208378, ppl: 24.738916 +epoch: 0, batch: 19805, sum loss: 6815.557129, avg loss: 3.627226, ppl: 37.608341 +epoch: 0, batch: 19806, sum loss: 6158.889160, avg loss: 3.553889, ppl: 34.948956 +epoch: 0, batch: 19807, sum loss: 5686.578125, avg loss: 3.516746, ppl: 33.674675 +epoch: 0, batch: 19808, sum loss: 5298.822754, avg loss: 3.086094, ppl: 21.891396 +epoch: 0, batch: 19809, sum loss: 6733.122070, avg loss: 3.639525, ppl: 38.073765 +epoch: 0, batch: 19810, sum loss: 5276.289062, avg loss: 3.275164, ppl: 26.447565 +epoch: 0, batch: 19811, sum loss: 3522.984375, avg loss: 2.642899, ppl: 14.053887 +epoch: 0, batch: 19812, sum loss: 6387.819336, avg loss: 3.426942, ppl: 30.782354 +epoch: 0, batch: 19813, sum loss: 5817.860352, avg loss: 3.324492, ppl: 27.784866 +epoch: 0, batch: 19814, sum loss: 5980.635742, avg loss: 3.344875, ppl: 28.357019 +epoch: 0, batch: 19815, sum loss: 5097.764648, avg loss: 3.174200, ppl: 23.907677 +epoch: 0, batch: 19816, sum loss: 4867.859863, avg loss: 3.160948, ppl: 23.592947 +epoch: 0, batch: 19817, sum loss: 5088.180176, avg loss: 3.178126, ppl: 24.001740 +epoch: 0, batch: 19818, sum loss: 5347.008789, avg loss: 3.390621, ppl: 29.684372 +epoch: 0, batch: 19819, sum loss: 5783.168945, avg loss: 3.448520, ppl: 31.453819 +epoch: 0, batch: 19820, sum loss: 5839.074219, avg loss: 3.287767, ppl: 26.782995 +epoch: 0, batch: 19821, sum loss: 5137.833984, avg loss: 3.325459, ppl: 27.811762 +epoch: 0, batch: 19822, sum loss: 5194.343750, avg loss: 3.472155, ppl: 32.206074 +epoch: 0, batch: 19823, sum loss: 4866.430664, avg loss: 3.189011, ppl: 24.264420 +epoch: 0, batch: 19824, sum loss: 5411.522949, avg loss: 2.968471, ppl: 19.462145 +epoch: 0, batch: 19825, sum loss: 5729.917969, avg loss: 3.420847, ppl: 30.595308 +epoch: 0, batch: 19826, sum loss: 6337.958496, avg loss: 3.342805, ppl: 28.298403 +epoch: 0, batch: 19827, sum loss: 5385.853027, avg loss: 3.258229, ppl: 26.003450 +epoch: 0, batch: 19828, sum loss: 5713.374023, avg loss: 3.341154, ppl: 28.251720 +epoch: 0, batch: 19829, sum loss: 6582.726562, avg loss: 3.466417, ppl: 32.021812 +epoch: 0, batch: 19830, sum loss: 6867.294434, avg loss: 3.597326, ppl: 36.500484 +epoch: 0, batch: 19831, sum loss: 5952.162109, avg loss: 3.174486, ppl: 23.914534 +epoch: 0, batch: 19832, sum loss: 5028.898926, avg loss: 3.154893, ppl: 23.450520 +epoch: 0, batch: 19833, sum loss: 5326.606934, avg loss: 3.113154, ppl: 22.491880 +epoch: 0, batch: 19834, sum loss: 5437.853516, avg loss: 3.250361, ppl: 25.799644 +epoch: 0, batch: 19835, sum loss: 5964.833008, avg loss: 3.389110, ppl: 29.639549 +epoch: 0, batch: 19836, sum loss: 6056.736816, avg loss: 3.470909, ppl: 32.165981 +epoch: 0, batch: 19837, sum loss: 5478.566895, avg loss: 3.249447, ppl: 25.776072 +epoch: 0, batch: 19838, sum loss: 5252.433105, avg loss: 2.997964, ppl: 20.044691 +epoch: 0, batch: 19839, sum loss: 6412.028809, avg loss: 3.443624, ppl: 31.300200 +epoch: 0, batch: 19840, sum loss: 5492.121094, avg loss: 3.267175, ppl: 26.237114 +epoch: 0, batch: 19841, sum loss: 4864.001465, avg loss: 3.136042, ppl: 23.012606 +epoch: 0, batch: 19842, sum loss: 5173.405762, avg loss: 3.276381, ppl: 26.479769 +epoch: 0, batch: 19843, sum loss: 4824.482910, avg loss: 2.974404, ppl: 19.577944 +epoch: 0, batch: 19844, sum loss: 5918.324707, avg loss: 3.234057, ppl: 25.382435 +epoch: 0, batch: 19845, sum loss: 6367.756836, avg loss: 3.453230, ppl: 31.602316 +epoch: 0, batch: 19846, sum loss: 4201.568848, avg loss: 2.889662, ppl: 17.987234 +epoch: 0, batch: 19847, sum loss: 5916.285645, avg loss: 3.459816, ppl: 31.811131 +epoch: 0, batch: 19848, sum loss: 4021.609131, avg loss: 2.895327, ppl: 18.089413 +epoch: 0, batch: 19849, sum loss: 6217.914551, avg loss: 3.282954, ppl: 26.654392 +epoch: 0, batch: 19850, sum loss: 5946.884766, avg loss: 3.463532, ppl: 31.929560 +epoch: 0, batch: 19851, sum loss: 5701.353027, avg loss: 3.332176, ppl: 27.999208 +epoch: 0, batch: 19852, sum loss: 5833.017090, avg loss: 3.244169, ppl: 25.640406 +epoch: 0, batch: 19853, sum loss: 4964.891602, avg loss: 3.066641, ppl: 21.469662 +epoch: 0, batch: 19854, sum loss: 6302.208984, avg loss: 3.457054, ppl: 31.723379 +epoch: 0, batch: 19855, sum loss: 4842.865234, avg loss: 3.148807, ppl: 23.308243 +epoch: 0, batch: 19856, sum loss: 5270.433105, avg loss: 3.025507, ppl: 20.604448 +epoch: 0, batch: 19857, sum loss: 4834.697754, avg loss: 2.942604, ppl: 18.965160 +epoch: 0, batch: 19858, sum loss: 6660.526367, avg loss: 3.452839, ppl: 31.589945 +epoch: 0, batch: 19859, sum loss: 4290.577148, avg loss: 3.143280, ppl: 23.179773 +epoch: 0, batch: 19860, sum loss: 5926.626465, avg loss: 3.427777, ppl: 30.808083 +epoch: 0, batch: 19861, sum loss: 5186.739258, avg loss: 3.017300, ppl: 20.436047 +epoch: 0, batch: 19862, sum loss: 4886.100098, avg loss: 3.283669, ppl: 26.673471 +epoch: 0, batch: 19863, sum loss: 4987.607422, avg loss: 3.026461, ppl: 20.624113 +epoch: 0, batch: 19864, sum loss: 5032.893555, avg loss: 3.224147, ppl: 25.132130 +epoch: 0, batch: 19865, sum loss: 6422.131836, avg loss: 3.367662, ppl: 29.010626 +epoch: 0, batch: 19866, sum loss: 5955.939453, avg loss: 3.301518, ppl: 27.153839 +epoch: 0, batch: 19867, sum loss: 4863.907227, avg loss: 3.299801, ppl: 27.107254 +epoch: 0, batch: 19868, sum loss: 6354.702637, avg loss: 3.449893, ppl: 31.497023 +epoch: 0, batch: 19869, sum loss: 6111.344238, avg loss: 3.368988, ppl: 29.049116 +epoch: 0, batch: 19870, sum loss: 4879.845703, avg loss: 3.168731, ppl: 23.777290 +epoch: 0, batch: 19871, sum loss: 5471.571289, avg loss: 3.091283, ppl: 22.005301 +epoch: 0, batch: 19872, sum loss: 5557.000000, avg loss: 3.357704, ppl: 28.723171 +epoch: 0, batch: 19873, sum loss: 5553.187012, avg loss: 3.341268, ppl: 28.254927 +epoch: 0, batch: 19874, sum loss: 5803.631836, avg loss: 3.322056, ppl: 27.717274 +epoch: 0, batch: 19875, sum loss: 5294.036621, avg loss: 3.451132, ppl: 31.536074 +epoch: 0, batch: 19876, sum loss: 4507.342285, avg loss: 3.070397, ppl: 21.550449 +epoch: 0, batch: 19877, sum loss: 6387.450195, avg loss: 3.319880, ppl: 27.657045 +epoch: 0, batch: 19878, sum loss: 6133.941406, avg loss: 3.424869, ppl: 30.718630 +epoch: 0, batch: 19879, sum loss: 4638.670410, avg loss: 3.082173, ppl: 21.805737 +epoch: 0, batch: 19880, sum loss: 5192.303223, avg loss: 3.249251, ppl: 25.771034 +epoch: 0, batch: 19881, sum loss: 5318.832520, avg loss: 3.297478, ppl: 27.044359 +epoch: 0, batch: 19882, sum loss: 5411.680176, avg loss: 3.464584, ppl: 31.963163 +epoch: 0, batch: 19883, sum loss: 5846.769531, avg loss: 3.221361, ppl: 25.062199 +epoch: 0, batch: 19884, sum loss: 6606.421387, avg loss: 3.514054, ppl: 33.584137 +epoch: 0, batch: 19885, sum loss: 5708.164062, avg loss: 3.328375, ppl: 27.892988 +epoch: 0, batch: 19886, sum loss: 5946.007324, avg loss: 3.303338, ppl: 27.203281 +epoch: 0, batch: 19887, sum loss: 4680.779297, avg loss: 3.051355, ppl: 21.143982 +epoch: 0, batch: 19888, sum loss: 5835.723145, avg loss: 3.236674, ppl: 25.448940 +epoch: 0, batch: 19889, sum loss: 6950.206055, avg loss: 3.571535, ppl: 35.571140 +epoch: 0, batch: 19890, sum loss: 6004.056641, avg loss: 3.378760, ppl: 29.334377 +epoch: 0, batch: 19891, sum loss: 5519.164062, avg loss: 3.201371, ppl: 24.566198 +epoch: 0, batch: 19892, sum loss: 7314.037109, avg loss: 3.669863, ppl: 39.246529 +epoch: 0, batch: 19893, sum loss: 5242.571777, avg loss: 3.260306, ppl: 26.057501 +epoch: 0, batch: 19894, sum loss: 5126.360352, avg loss: 3.324488, ppl: 27.784767 +epoch: 0, batch: 19895, sum loss: 5289.036621, avg loss: 3.349612, ppl: 28.491663 +epoch: 0, batch: 19896, sum loss: 5871.423340, avg loss: 3.556283, ppl: 35.032745 +epoch: 0, batch: 19897, sum loss: 4960.124023, avg loss: 3.086574, ppl: 21.901909 +epoch: 0, batch: 19898, sum loss: 4918.549805, avg loss: 3.183527, ppl: 24.131727 +epoch: 0, batch: 19899, sum loss: 7273.637207, avg loss: 3.644107, ppl: 38.248596 +epoch: 0, batch: 19900, sum loss: 6315.521973, avg loss: 3.377285, ppl: 29.291124 +epoch: 0, batch: 19901, sum loss: 5364.193848, avg loss: 3.053041, ppl: 21.179663 +epoch: 0, batch: 19902, sum loss: 5517.023438, avg loss: 3.401371, ppl: 30.005201 +epoch: 0, batch: 19903, sum loss: 5326.850586, avg loss: 3.468002, ppl: 32.072594 +epoch: 0, batch: 19904, sum loss: 5896.151855, avg loss: 3.420042, ppl: 30.570692 +epoch: 0, batch: 19905, sum loss: 6052.589355, avg loss: 3.261093, ppl: 26.078035 +epoch: 0, batch: 19906, sum loss: 4741.915039, avg loss: 2.991745, ppl: 19.920404 +epoch: 0, batch: 19907, sum loss: 5515.414551, avg loss: 3.232951, ppl: 25.354366 +epoch: 0, batch: 19908, sum loss: 5510.069336, avg loss: 3.275903, ppl: 26.467121 +epoch: 0, batch: 19909, sum loss: 6290.952637, avg loss: 3.182070, ppl: 24.096582 +epoch: 0, batch: 19910, sum loss: 4339.990234, avg loss: 3.084570, ppl: 21.858070 +epoch: 0, batch: 19911, sum loss: 5717.467285, avg loss: 3.267124, ppl: 26.235781 +epoch: 0, batch: 19912, sum loss: 5725.090820, avg loss: 3.375643, ppl: 29.243088 +epoch: 0, batch: 19913, sum loss: 6526.455566, avg loss: 3.589910, ppl: 36.230801 +epoch: 0, batch: 19914, sum loss: 5207.980957, avg loss: 2.984516, ppl: 19.776930 +epoch: 0, batch: 19915, sum loss: 4786.621094, avg loss: 3.186831, ppl: 24.211590 +epoch: 0, batch: 19916, sum loss: 4839.153809, avg loss: 3.060818, ppl: 21.345015 +epoch: 0, batch: 19917, sum loss: 5812.711426, avg loss: 3.520722, ppl: 33.808826 +epoch: 0, batch: 19918, sum loss: 5251.309570, avg loss: 3.311040, ppl: 27.413626 +epoch: 0, batch: 19919, sum loss: 4767.473145, avg loss: 3.105846, ppl: 22.328094 +epoch: 0, batch: 19920, sum loss: 5776.139648, avg loss: 3.397729, ppl: 29.896141 +epoch: 0, batch: 19921, sum loss: 5086.788574, avg loss: 3.305256, ppl: 27.255508 +epoch: 0, batch: 19922, sum loss: 5185.204590, avg loss: 3.165570, ppl: 23.702263 +epoch: 0, batch: 19923, sum loss: 5358.469727, avg loss: 3.161339, ppl: 23.602179 +epoch: 0, batch: 19924, sum loss: 5106.218750, avg loss: 3.134573, ppl: 22.978811 +epoch: 0, batch: 19925, sum loss: 5414.451172, avg loss: 3.242186, ppl: 25.589607 +epoch: 0, batch: 19926, sum loss: 4639.394531, avg loss: 3.177668, ppl: 23.990732 +epoch: 0, batch: 19927, sum loss: 6885.152832, avg loss: 3.518218, ppl: 33.724281 +epoch: 0, batch: 19928, sum loss: 4181.810059, avg loss: 2.786016, ppl: 16.216286 +epoch: 0, batch: 19929, sum loss: 5782.524414, avg loss: 3.257760, ppl: 25.991253 +epoch: 0, batch: 19930, sum loss: 5652.700195, avg loss: 3.254289, ppl: 25.901196 +epoch: 0, batch: 19931, sum loss: 6305.309570, avg loss: 3.384492, ppl: 29.503014 +epoch: 0, batch: 19932, sum loss: 4889.229004, avg loss: 2.936474, ppl: 18.849264 +epoch: 0, batch: 19933, sum loss: 4961.744629, avg loss: 3.253603, ppl: 25.883430 +epoch: 0, batch: 19934, sum loss: 5750.036133, avg loss: 3.457628, ppl: 31.741611 +epoch: 0, batch: 19935, sum loss: 6077.943359, avg loss: 3.533688, ppl: 34.250053 +epoch: 0, batch: 19936, sum loss: 5686.193848, avg loss: 3.300171, ppl: 27.117266 +epoch: 0, batch: 19937, sum loss: 7015.494629, avg loss: 3.382591, ppl: 29.446983 +epoch: 0, batch: 19938, sum loss: 5175.805664, avg loss: 3.121716, ppl: 22.685280 +epoch: 0, batch: 19939, sum loss: 5601.929688, avg loss: 3.395109, ppl: 29.817902 +epoch: 0, batch: 19940, sum loss: 5511.538574, avg loss: 3.117386, ppl: 22.587261 +epoch: 0, batch: 19941, sum loss: 4864.817871, avg loss: 3.128500, ppl: 22.839705 +epoch: 0, batch: 19942, sum loss: 5286.175293, avg loss: 3.171071, ppl: 23.832991 +epoch: 0, batch: 19943, sum loss: 6300.289062, avg loss: 3.376360, ppl: 29.264069 +epoch: 0, batch: 19944, sum loss: 5785.040527, avg loss: 3.164683, ppl: 23.681238 +epoch: 0, batch: 19945, sum loss: 4873.249023, avg loss: 3.229456, ppl: 25.265913 +epoch: 0, batch: 19946, sum loss: 5483.483398, avg loss: 3.299328, ppl: 27.094435 +epoch: 0, batch: 19947, sum loss: 5188.451172, avg loss: 3.009543, ppl: 20.278124 +epoch: 0, batch: 19948, sum loss: 5951.554199, avg loss: 3.375811, ppl: 29.247984 +epoch: 0, batch: 19949, sum loss: 6055.195801, avg loss: 3.479998, ppl: 32.459644 +epoch: 0, batch: 19950, sum loss: 5451.813477, avg loss: 3.369477, ppl: 29.063316 +epoch: 0, batch: 19951, sum loss: 5818.784180, avg loss: 3.195379, ppl: 24.419416 +epoch: 0, batch: 19952, sum loss: 5764.512207, avg loss: 3.251276, ppl: 25.823269 +epoch: 0, batch: 19953, sum loss: 6463.827637, avg loss: 3.581068, ppl: 35.911865 +epoch: 0, batch: 19954, sum loss: 5952.280273, avg loss: 3.292190, ppl: 26.901722 +epoch: 0, batch: 19955, sum loss: 5034.194336, avg loss: 2.947420, ppl: 19.056730 +epoch: 0, batch: 19956, sum loss: 5629.491211, avg loss: 3.274864, ppl: 26.439634 +epoch: 0, batch: 19957, sum loss: 5797.525391, avg loss: 3.219059, ppl: 25.004580 +epoch: 0, batch: 19958, sum loss: 5648.588867, avg loss: 3.429623, ppl: 30.864994 +epoch: 0, batch: 19959, sum loss: 5239.182617, avg loss: 3.103781, ppl: 22.282045 +epoch: 0, batch: 19960, sum loss: 6753.645020, avg loss: 3.600024, ppl: 36.599113 +epoch: 0, batch: 19961, sum loss: 5914.951660, avg loss: 3.248189, ppl: 25.743670 +epoch: 0, batch: 19962, sum loss: 5245.268555, avg loss: 2.966781, ppl: 19.429274 +epoch: 0, batch: 19963, sum loss: 5912.694824, avg loss: 3.297655, ppl: 27.049131 +epoch: 0, batch: 19964, sum loss: 5873.457031, avg loss: 3.308990, ppl: 27.357473 +epoch: 0, batch: 19965, sum loss: 5309.335449, avg loss: 3.124977, ppl: 22.759369 +epoch: 0, batch: 19966, sum loss: 5329.271484, avg loss: 3.189271, ppl: 24.270727 +epoch: 0, batch: 19967, sum loss: 6905.485352, avg loss: 3.413487, ppl: 30.370977 +epoch: 0, batch: 19968, sum loss: 4507.974609, avg loss: 3.145830, ppl: 23.238964 +epoch: 0, batch: 19969, sum loss: 5495.286621, avg loss: 3.278811, ppl: 26.544186 +epoch: 0, batch: 19970, sum loss: 4423.729980, avg loss: 3.124103, ppl: 22.739491 +epoch: 0, batch: 19971, sum loss: 4190.495605, avg loss: 2.858455, ppl: 17.434578 +epoch: 0, batch: 19972, sum loss: 4945.132812, avg loss: 3.186297, ppl: 24.198658 +epoch: 0, batch: 19973, sum loss: 5127.050293, avg loss: 3.070090, ppl: 21.543842 +epoch: 0, batch: 19974, sum loss: 6747.569336, avg loss: 3.476336, ppl: 32.341022 +epoch: 0, batch: 19975, sum loss: 5128.266602, avg loss: 3.256042, ppl: 25.946644 +epoch: 0, batch: 19976, sum loss: 5441.856934, avg loss: 3.088455, ppl: 21.943144 +epoch: 0, batch: 19977, sum loss: 4945.761230, avg loss: 2.935170, ppl: 18.824701 +epoch: 0, batch: 19978, sum loss: 5550.949219, avg loss: 3.302171, ppl: 27.171558 +epoch: 0, batch: 19979, sum loss: 4934.410156, avg loss: 3.030964, ppl: 20.717201 +epoch: 0, batch: 19980, sum loss: 5526.110840, avg loss: 3.116814, ppl: 22.574341 +epoch: 0, batch: 19981, sum loss: 5454.438965, avg loss: 3.338090, ppl: 28.165277 +epoch: 0, batch: 19982, sum loss: 6355.195312, avg loss: 3.530664, ppl: 34.146641 +epoch: 0, batch: 19983, sum loss: 6024.553711, avg loss: 3.313836, ppl: 27.490374 +epoch: 0, batch: 19984, sum loss: 5354.067383, avg loss: 3.266667, ppl: 26.223787 +epoch: 0, batch: 19985, sum loss: 5858.507324, avg loss: 3.099739, ppl: 22.192165 +epoch: 0, batch: 19986, sum loss: 5638.307617, avg loss: 3.465462, ppl: 31.991251 +epoch: 0, batch: 19987, sum loss: 5729.304199, avg loss: 3.534426, ppl: 34.275326 +epoch: 0, batch: 19988, sum loss: 5426.403809, avg loss: 3.154886, ppl: 23.450363 +epoch: 0, batch: 19989, sum loss: 5489.173828, avg loss: 3.388379, ppl: 29.617905 +epoch: 0, batch: 19990, sum loss: 5293.635742, avg loss: 3.245638, ppl: 25.678091 +epoch: 0, batch: 19991, sum loss: 5750.099609, avg loss: 3.512584, ppl: 33.534809 +epoch: 0, batch: 19992, sum loss: 4961.778809, avg loss: 3.321137, ppl: 27.691816 +epoch: 0, batch: 19993, sum loss: 5936.155273, avg loss: 3.417476, ppl: 30.492346 +epoch: 0, batch: 19994, sum loss: 6063.751465, avg loss: 3.490934, ppl: 32.816574 +epoch: 0, batch: 19995, sum loss: 5589.897461, avg loss: 3.371470, ppl: 29.121309 +epoch: 0, batch: 19996, sum loss: 5622.648438, avg loss: 3.223995, ppl: 25.128319 +epoch: 0, batch: 19997, sum loss: 5210.969727, avg loss: 3.298082, ppl: 27.060690 +epoch: 0, batch: 19998, sum loss: 4498.682129, avg loss: 2.854494, ppl: 17.365641 +epoch: 0, batch: 19999, sum loss: 5350.658203, avg loss: 3.188712, ppl: 24.257156 +epoch: 0, batch: 20000, sum loss: 6240.678223, avg loss: 3.505999, ppl: 33.314713 +epoch: 0, batch: 20001, sum loss: 5809.984863, avg loss: 3.225977, ppl: 25.178160 +epoch: 0, batch: 20002, sum loss: 5519.045898, avg loss: 3.348936, ppl: 28.472412 +epoch: 0, batch: 20003, sum loss: 4674.545898, avg loss: 3.013891, ppl: 20.366501 +epoch: 0, batch: 20004, sum loss: 5828.022461, avg loss: 3.483576, ppl: 32.576008 +epoch: 0, batch: 20005, sum loss: 5822.402832, avg loss: 3.437074, ppl: 31.095829 +epoch: 0, batch: 20006, sum loss: 4755.097168, avg loss: 3.182796, ppl: 24.114082 +epoch: 0, batch: 20007, sum loss: 6216.193359, avg loss: 3.311771, ppl: 27.433659 +epoch: 0, batch: 20008, sum loss: 5516.067871, avg loss: 3.186636, ppl: 24.206869 +epoch: 0, batch: 20009, sum loss: 5227.583008, avg loss: 3.183668, ppl: 24.135122 +epoch: 0, batch: 20010, sum loss: 5870.852051, avg loss: 3.183759, ppl: 24.137321 +epoch: 0, batch: 20011, sum loss: 5207.073730, avg loss: 3.210280, ppl: 24.786018 +epoch: 0, batch: 20012, sum loss: 5647.634766, avg loss: 3.242041, ppl: 25.585880 +epoch: 0, batch: 20013, sum loss: 4976.569336, avg loss: 3.259050, ppl: 26.024799 +epoch: 0, batch: 20014, sum loss: 6082.440430, avg loss: 3.240512, ppl: 25.546789 +epoch: 0, batch: 20015, sum loss: 5194.610840, avg loss: 3.296073, ppl: 27.006369 +epoch: 0, batch: 20016, sum loss: 4905.631836, avg loss: 3.031911, ppl: 20.736820 +epoch: 0, batch: 20017, sum loss: 4659.305176, avg loss: 3.112428, ppl: 22.475554 +epoch: 0, batch: 20018, sum loss: 4991.478516, avg loss: 3.094531, ppl: 22.076883 +epoch: 0, batch: 20019, sum loss: 5451.861816, avg loss: 3.280302, ppl: 26.583801 +epoch: 0, batch: 20020, sum loss: 5656.810547, avg loss: 3.365146, ppl: 28.937719 +epoch: 0, batch: 20021, sum loss: 4694.050781, avg loss: 3.020625, ppl: 20.504095 +epoch: 0, batch: 20022, sum loss: 5474.640137, avg loss: 3.011353, ppl: 20.314873 +epoch: 0, batch: 20023, sum loss: 5633.550781, avg loss: 3.341371, ppl: 28.257830 +epoch: 0, batch: 20024, sum loss: 6055.968750, avg loss: 3.407973, ppl: 30.203968 +epoch: 0, batch: 20025, sum loss: 5449.859375, avg loss: 3.366189, ppl: 28.967913 +epoch: 0, batch: 20026, sum loss: 4546.528320, avg loss: 2.935138, ppl: 18.824097 +epoch: 0, batch: 20027, sum loss: 5277.196289, avg loss: 3.122601, ppl: 22.705366 +epoch: 0, batch: 20028, sum loss: 5479.125977, avg loss: 3.033846, ppl: 20.776991 +epoch: 0, batch: 20029, sum loss: 5558.916016, avg loss: 3.279597, ppl: 26.565054 +epoch: 0, batch: 20030, sum loss: 6508.708008, avg loss: 3.384663, ppl: 29.508057 +epoch: 0, batch: 20031, sum loss: 4486.580566, avg loss: 3.179717, ppl: 24.039946 +epoch: 0, batch: 20032, sum loss: 5795.257324, avg loss: 3.387059, ppl: 29.578833 +epoch: 0, batch: 20033, sum loss: 6625.881348, avg loss: 3.396146, ppl: 29.848850 +epoch: 0, batch: 20034, sum loss: 6419.091309, avg loss: 3.260077, ppl: 26.051542 +epoch: 0, batch: 20035, sum loss: 6207.582031, avg loss: 3.505128, ppl: 33.285709 +epoch: 0, batch: 20036, sum loss: 4925.663086, avg loss: 3.301383, ppl: 27.150156 +epoch: 0, batch: 20037, sum loss: 6791.123047, avg loss: 3.340444, ppl: 28.231661 +epoch: 0, batch: 20038, sum loss: 6098.364746, avg loss: 3.426048, ppl: 30.754847 +epoch: 0, batch: 20039, sum loss: 5322.762695, avg loss: 3.259499, ppl: 26.036497 +epoch: 0, batch: 20040, sum loss: 6297.094727, avg loss: 3.581965, ppl: 35.944099 +epoch: 0, batch: 20041, sum loss: 5135.692383, avg loss: 3.181966, ppl: 24.094067 +epoch: 0, batch: 20042, sum loss: 5274.794434, avg loss: 2.938604, ppl: 18.889460 +epoch: 0, batch: 20043, sum loss: 4864.321289, avg loss: 3.042102, ppl: 20.949234 +epoch: 0, batch: 20044, sum loss: 5982.742188, avg loss: 3.376266, ppl: 29.261292 +epoch: 0, batch: 20045, sum loss: 6188.229004, avg loss: 3.428382, ppl: 30.826715 +epoch: 0, batch: 20046, sum loss: 5391.050293, avg loss: 3.176812, ppl: 23.970219 +epoch: 0, batch: 20047, sum loss: 5477.668945, avg loss: 3.325846, ppl: 27.822533 +epoch: 0, batch: 20048, sum loss: 5855.753906, avg loss: 3.297159, ppl: 27.035713 +epoch: 0, batch: 20049, sum loss: 5962.117188, avg loss: 3.412775, ppl: 30.349333 +epoch: 0, batch: 20050, sum loss: 4965.581543, avg loss: 2.982331, ppl: 19.733768 +epoch: 0, batch: 20051, sum loss: 6005.187012, avg loss: 3.369914, ppl: 29.076035 +epoch: 0, batch: 20052, sum loss: 4824.442871, avg loss: 2.990975, ppl: 19.905083 +epoch: 0, batch: 20053, sum loss: 5633.918945, avg loss: 3.192022, ppl: 24.337591 +epoch: 0, batch: 20054, sum loss: 6251.067383, avg loss: 3.404721, ppl: 30.105888 +epoch: 0, batch: 20055, sum loss: 5971.913086, avg loss: 3.362564, ppl: 28.863096 +epoch: 0, batch: 20056, sum loss: 4946.544922, avg loss: 3.468825, ppl: 32.099014 +epoch: 0, batch: 20057, sum loss: 4870.894531, avg loss: 3.038612, ppl: 20.876240 +epoch: 0, batch: 20058, sum loss: 6784.718750, avg loss: 3.380528, ppl: 29.386274 +epoch: 0, batch: 20059, sum loss: 5491.380371, avg loss: 3.253188, ppl: 25.872683 +epoch: 0, batch: 20060, sum loss: 5775.250977, avg loss: 3.391222, ppl: 29.702227 +epoch: 0, batch: 20061, sum loss: 5367.087891, avg loss: 3.098781, ppl: 22.170900 +epoch: 0, batch: 20062, sum loss: 5332.201172, avg loss: 3.116424, ppl: 22.565542 +epoch: 0, batch: 20063, sum loss: 5850.751465, avg loss: 3.150647, ppl: 23.351166 +epoch: 0, batch: 20064, sum loss: 4407.503418, avg loss: 2.972019, ppl: 19.531307 +epoch: 0, batch: 20065, sum loss: 6229.430176, avg loss: 3.480129, ppl: 32.463894 +epoch: 0, batch: 20066, sum loss: 5917.384766, avg loss: 3.328113, ppl: 27.885674 +epoch: 0, batch: 20067, sum loss: 5687.605957, avg loss: 3.383466, ppl: 29.472740 +epoch: 0, batch: 20068, sum loss: 4791.889160, avg loss: 3.152559, ppl: 23.395853 +epoch: 0, batch: 20069, sum loss: 5476.123047, avg loss: 3.093855, ppl: 22.061960 +epoch: 0, batch: 20070, sum loss: 5760.385742, avg loss: 3.306766, ppl: 27.296694 +epoch: 0, batch: 20071, sum loss: 6459.603516, avg loss: 3.454334, ppl: 31.637197 +epoch: 0, batch: 20072, sum loss: 5211.169922, avg loss: 3.164037, ppl: 23.665955 +epoch: 0, batch: 20073, sum loss: 6828.094727, avg loss: 3.607023, ppl: 36.856167 +epoch: 0, batch: 20074, sum loss: 5020.269531, avg loss: 3.382931, ppl: 29.456984 +epoch: 0, batch: 20075, sum loss: 5408.033203, avg loss: 3.257851, ppl: 25.993626 +epoch: 0, batch: 20076, sum loss: 4978.411133, avg loss: 3.069304, ppl: 21.526915 +epoch: 0, batch: 20077, sum loss: 5668.529785, avg loss: 3.165008, ppl: 23.688934 +epoch: 0, batch: 20078, sum loss: 6724.786621, avg loss: 3.404955, ppl: 30.112953 +epoch: 0, batch: 20079, sum loss: 4849.216797, avg loss: 3.269870, ppl: 26.307915 +epoch: 0, batch: 20080, sum loss: 6558.446289, avg loss: 3.335934, ppl: 28.104626 +epoch: 0, batch: 20081, sum loss: 6665.607422, avg loss: 3.593319, ppl: 36.354553 +epoch: 0, batch: 20082, sum loss: 4856.447266, avg loss: 2.887305, ppl: 17.944883 +epoch: 0, batch: 20083, sum loss: 6308.093262, avg loss: 3.654747, ppl: 38.657730 +epoch: 0, batch: 20084, sum loss: 4717.656250, avg loss: 3.026078, ppl: 20.616226 +epoch: 0, batch: 20085, sum loss: 6494.862793, avg loss: 3.402233, ppl: 30.031088 +epoch: 0, batch: 20086, sum loss: 6119.275391, avg loss: 3.270591, ppl: 26.326895 +epoch: 0, batch: 20087, sum loss: 6403.455078, avg loss: 3.309279, ppl: 27.365387 +epoch: 0, batch: 20088, sum loss: 5590.000488, avg loss: 3.513514, ppl: 33.566006 +epoch: 0, batch: 20089, sum loss: 5658.999023, avg loss: 3.376491, ppl: 29.267899 +epoch: 0, batch: 20090, sum loss: 5330.714355, avg loss: 3.256392, ppl: 25.955732 +epoch: 0, batch: 20091, sum loss: 5711.592773, avg loss: 3.205159, ppl: 24.659414 +epoch: 0, batch: 20092, sum loss: 6004.865234, avg loss: 3.304824, ppl: 27.243750 +epoch: 0, batch: 20093, sum loss: 5849.902344, avg loss: 3.445172, ppl: 31.348684 +epoch: 0, batch: 20094, sum loss: 5340.551758, avg loss: 3.284472, ppl: 26.694891 +epoch: 0, batch: 20095, sum loss: 6257.945801, avg loss: 3.335792, ppl: 28.100632 +epoch: 0, batch: 20096, sum loss: 5221.111328, avg loss: 3.383740, ppl: 29.480837 +epoch: 0, batch: 20097, sum loss: 5046.480469, avg loss: 3.128630, ppl: 22.842667 +epoch: 0, batch: 20098, sum loss: 5245.943848, avg loss: 3.254308, ppl: 25.901672 +epoch: 0, batch: 20099, sum loss: 6535.242188, avg loss: 3.517353, ppl: 33.695122 +epoch: 0, batch: 20100, sum loss: 5156.333008, avg loss: 3.078408, ppl: 21.723791 +epoch: 0, batch: 20101, sum loss: 5288.818848, avg loss: 3.468078, ppl: 32.075031 +epoch: 0, batch: 20102, sum loss: 5675.393555, avg loss: 3.288177, ppl: 26.793974 +epoch: 0, batch: 20103, sum loss: 6411.138672, avg loss: 3.441298, ppl: 31.227472 +epoch: 0, batch: 20104, sum loss: 5070.653320, avg loss: 3.139724, ppl: 23.097481 +epoch: 0, batch: 20105, sum loss: 6652.296875, avg loss: 3.484703, ppl: 32.612728 +epoch: 0, batch: 20106, sum loss: 6554.107910, avg loss: 3.335424, ppl: 28.090290 +epoch: 0, batch: 20107, sum loss: 5432.132324, avg loss: 3.270399, ppl: 26.321836 +epoch: 0, batch: 20108, sum loss: 4979.492676, avg loss: 3.110239, ppl: 22.426409 +epoch: 0, batch: 20109, sum loss: 4490.327148, avg loss: 3.034005, ppl: 20.780291 +epoch: 0, batch: 20110, sum loss: 5058.057617, avg loss: 3.282322, ppl: 26.637545 +epoch: 0, batch: 20111, sum loss: 4578.125000, avg loss: 3.286522, ppl: 26.749664 +epoch: 0, batch: 20112, sum loss: 5629.171387, avg loss: 3.305444, ppl: 27.260649 +epoch: 0, batch: 20113, sum loss: 6114.539551, avg loss: 3.532374, ppl: 34.205070 +epoch: 0, batch: 20114, sum loss: 4967.438965, avg loss: 3.141960, ppl: 23.149193 +epoch: 0, batch: 20115, sum loss: 4723.448730, avg loss: 3.000920, ppl: 20.104031 +epoch: 0, batch: 20116, sum loss: 5438.726074, avg loss: 3.151058, ppl: 23.360765 +epoch: 0, batch: 20117, sum loss: 4951.575684, avg loss: 3.064094, ppl: 21.415047 +epoch: 0, batch: 20118, sum loss: 4141.789551, avg loss: 3.081689, ppl: 21.795176 +epoch: 0, batch: 20119, sum loss: 6477.536133, avg loss: 3.447332, ppl: 31.416451 +epoch: 0, batch: 20120, sum loss: 5659.964355, avg loss: 3.149674, ppl: 23.328463 +epoch: 0, batch: 20121, sum loss: 4887.801758, avg loss: 3.058700, ppl: 21.299841 +epoch: 0, batch: 20122, sum loss: 6348.605469, avg loss: 3.393162, ppl: 29.759897 +epoch: 0, batch: 20123, sum loss: 6256.816895, avg loss: 3.383892, ppl: 29.485313 +epoch: 0, batch: 20124, sum loss: 4997.475586, avg loss: 3.152981, ppl: 23.405739 +epoch: 0, batch: 20125, sum loss: 5418.112793, avg loss: 3.244379, ppl: 25.645781 +epoch: 0, batch: 20126, sum loss: 5799.803711, avg loss: 3.251011, ppl: 25.816431 +epoch: 0, batch: 20127, sum loss: 5009.710938, avg loss: 3.295863, ppl: 27.000696 +epoch: 0, batch: 20128, sum loss: 5828.297852, avg loss: 3.278008, ppl: 26.522886 +epoch: 0, batch: 20129, sum loss: 4837.218262, avg loss: 3.257386, ppl: 25.981531 +epoch: 0, batch: 20130, sum loss: 6303.477539, avg loss: 3.465353, ppl: 31.987757 +epoch: 0, batch: 20131, sum loss: 4936.699707, avg loss: 3.166581, ppl: 23.726229 +epoch: 0, batch: 20132, sum loss: 5838.087891, avg loss: 3.315212, ppl: 27.528223 +epoch: 0, batch: 20133, sum loss: 5758.983398, avg loss: 3.395627, ppl: 29.833353 +epoch: 0, batch: 20134, sum loss: 5570.494629, avg loss: 3.206963, ppl: 24.703947 +epoch: 0, batch: 20135, sum loss: 5644.750000, avg loss: 3.142957, ppl: 23.172279 +epoch: 0, batch: 20136, sum loss: 5561.044434, avg loss: 3.091187, ppl: 22.003170 +epoch: 0, batch: 20137, sum loss: 4722.456543, avg loss: 3.006020, ppl: 20.206808 +epoch: 0, batch: 20138, sum loss: 6050.117188, avg loss: 3.306075, ppl: 27.277853 +epoch: 0, batch: 20139, sum loss: 5038.694824, avg loss: 3.191067, ppl: 24.314356 +epoch: 0, batch: 20140, sum loss: 4958.741699, avg loss: 2.930698, ppl: 18.740715 +epoch: 0, batch: 20141, sum loss: 5593.424316, avg loss: 3.176277, ppl: 23.957397 +epoch: 0, batch: 20142, sum loss: 4745.476562, avg loss: 2.953004, ppl: 19.163427 +epoch: 0, batch: 20143, sum loss: 5622.730469, avg loss: 3.212989, ppl: 24.853258 +epoch: 0, batch: 20144, sum loss: 6720.730957, avg loss: 3.428944, ppl: 30.844057 +epoch: 0, batch: 20145, sum loss: 5132.357422, avg loss: 3.217779, ppl: 24.972593 +epoch: 0, batch: 20146, sum loss: 4504.657227, avg loss: 3.438670, ppl: 31.145496 +epoch: 0, batch: 20147, sum loss: 5258.509277, avg loss: 3.345108, ppl: 28.363632 +epoch: 0, batch: 20148, sum loss: 5555.728027, avg loss: 3.220712, ppl: 25.045952 +epoch: 0, batch: 20149, sum loss: 5968.839355, avg loss: 3.268806, ppl: 26.279942 +epoch: 0, batch: 20150, sum loss: 6306.888672, avg loss: 3.523401, ppl: 33.899540 +epoch: 0, batch: 20151, sum loss: 6409.081543, avg loss: 3.261619, ppl: 26.091747 +epoch: 0, batch: 20152, sum loss: 5379.131348, avg loss: 3.158621, ppl: 23.538111 +epoch: 0, batch: 20153, sum loss: 5189.602539, avg loss: 3.052708, ppl: 21.172596 +epoch: 0, batch: 20154, sum loss: 5274.498535, avg loss: 3.130266, ppl: 22.880070 +epoch: 0, batch: 20155, sum loss: 5226.211426, avg loss: 3.192554, ppl: 24.350552 +epoch: 0, batch: 20156, sum loss: 5306.636719, avg loss: 3.001492, ppl: 20.115519 +epoch: 0, batch: 20157, sum loss: 6121.218262, avg loss: 3.466148, ppl: 32.013203 +epoch: 0, batch: 20158, sum loss: 4712.544922, avg loss: 3.158542, ppl: 23.536259 +epoch: 0, batch: 20159, sum loss: 5493.166016, avg loss: 3.376254, ppl: 29.260963 +epoch: 0, batch: 20160, sum loss: 5405.053223, avg loss: 3.223049, ppl: 25.104546 +epoch: 0, batch: 20161, sum loss: 5316.447266, avg loss: 3.170213, ppl: 23.812555 +epoch: 0, batch: 20162, sum loss: 5840.269043, avg loss: 3.155197, ppl: 23.457649 +epoch: 0, batch: 20163, sum loss: 5469.713867, avg loss: 3.085005, ppl: 21.867577 +epoch: 0, batch: 20164, sum loss: 5338.980469, avg loss: 3.357849, ppl: 28.727343 +epoch: 0, batch: 20165, sum loss: 4797.306641, avg loss: 3.160281, ppl: 23.577225 +epoch: 0, batch: 20166, sum loss: 5643.789551, avg loss: 3.481671, ppl: 32.514019 +epoch: 0, batch: 20167, sum loss: 5882.202637, avg loss: 3.484717, ppl: 32.613209 +epoch: 0, batch: 20168, sum loss: 4572.822754, avg loss: 3.100219, ppl: 22.202808 +epoch: 0, batch: 20169, sum loss: 6346.352539, avg loss: 3.296806, ppl: 27.026188 +epoch: 0, batch: 20170, sum loss: 5577.779297, avg loss: 3.244781, ppl: 25.656103 +epoch: 0, batch: 20171, sum loss: 7237.502930, avg loss: 3.467898, ppl: 32.069267 +epoch: 0, batch: 20172, sum loss: 5779.118164, avg loss: 3.452281, ppl: 31.572327 +epoch: 0, batch: 20173, sum loss: 5585.316895, avg loss: 3.364649, ppl: 28.923338 +epoch: 0, batch: 20174, sum loss: 5294.000977, avg loss: 3.216283, ppl: 24.935253 +epoch: 0, batch: 20175, sum loss: 5922.670898, avg loss: 3.662753, ppl: 38.968460 +epoch: 0, batch: 20176, sum loss: 5971.877441, avg loss: 3.256203, ppl: 25.950825 +epoch: 0, batch: 20177, sum loss: 5265.517090, avg loss: 3.307486, ppl: 27.316355 +epoch: 0, batch: 20178, sum loss: 5258.926758, avg loss: 3.152834, ppl: 23.402292 +epoch: 0, batch: 20179, sum loss: 5845.032227, avg loss: 3.324819, ppl: 27.793970 +epoch: 0, batch: 20180, sum loss: 4580.901367, avg loss: 3.023697, ppl: 20.567196 +epoch: 0, batch: 20181, sum loss: 5214.713867, avg loss: 3.150885, ppl: 23.356718 +epoch: 0, batch: 20182, sum loss: 5478.666992, avg loss: 3.525526, ppl: 33.971630 +epoch: 0, batch: 20183, sum loss: 4778.547363, avg loss: 2.984727, ppl: 19.781096 +epoch: 0, batch: 20184, sum loss: 6137.099121, avg loss: 3.346292, ppl: 28.397228 +epoch: 0, batch: 20185, sum loss: 5426.011719, avg loss: 3.204969, ppl: 24.654728 +epoch: 0, batch: 20186, sum loss: 5299.208008, avg loss: 3.418844, ppl: 30.534098 +epoch: 0, batch: 20187, sum loss: 6918.901367, avg loss: 3.590504, ppl: 36.252350 +epoch: 0, batch: 20188, sum loss: 4378.666504, avg loss: 2.980712, ppl: 19.701843 +epoch: 0, batch: 20189, sum loss: 6199.637695, avg loss: 3.475133, ppl: 32.302139 +epoch: 0, batch: 20190, sum loss: 5586.246582, avg loss: 3.249707, ppl: 25.782789 +epoch: 0, batch: 20191, sum loss: 5723.367676, avg loss: 3.222617, ppl: 25.093702 +epoch: 0, batch: 20192, sum loss: 6014.516602, avg loss: 3.476599, ppl: 32.349522 +epoch: 0, batch: 20193, sum loss: 5786.458496, avg loss: 3.358362, ppl: 28.742085 +epoch: 0, batch: 20194, sum loss: 5058.365234, avg loss: 3.183364, ppl: 24.127781 +epoch: 0, batch: 20195, sum loss: 5776.931152, avg loss: 3.124354, ppl: 22.745205 +epoch: 0, batch: 20196, sum loss: 6900.339844, avg loss: 3.479748, ppl: 32.451534 +epoch: 0, batch: 20197, sum loss: 6703.324219, avg loss: 3.522504, ppl: 33.869118 +epoch: 0, batch: 20198, sum loss: 4555.493652, avg loss: 2.774357, ppl: 16.028315 +epoch: 0, batch: 20199, sum loss: 5639.462891, avg loss: 3.196975, ppl: 24.458420 +epoch: 0, batch: 20200, sum loss: 5212.679688, avg loss: 3.365190, ppl: 28.939003 +epoch: 0, batch: 20201, sum loss: 4661.390625, avg loss: 3.117987, ppl: 22.600836 +epoch: 0, batch: 20202, sum loss: 5933.594727, avg loss: 3.443758, ppl: 31.304373 +epoch: 0, batch: 20203, sum loss: 4594.422363, avg loss: 3.010762, ppl: 20.302860 +epoch: 0, batch: 20204, sum loss: 6089.813477, avg loss: 3.316892, ppl: 27.574512 +epoch: 0, batch: 20205, sum loss: 5077.720703, avg loss: 3.238342, ppl: 25.491428 +epoch: 0, batch: 20206, sum loss: 5172.093750, avg loss: 3.171118, ppl: 23.834122 +epoch: 0, batch: 20207, sum loss: 4765.006836, avg loss: 3.038907, ppl: 20.882412 +epoch: 0, batch: 20208, sum loss: 5838.876465, avg loss: 3.178485, ppl: 24.010347 +epoch: 0, batch: 20209, sum loss: 4697.383789, avg loss: 3.034486, ppl: 20.790285 +epoch: 0, batch: 20210, sum loss: 5624.411621, avg loss: 3.190250, ppl: 24.294510 +epoch: 0, batch: 20211, sum loss: 5253.790039, avg loss: 3.067011, ppl: 21.477613 +epoch: 0, batch: 20212, sum loss: 4623.626465, avg loss: 3.274523, ppl: 26.430616 +epoch: 0, batch: 20213, sum loss: 5278.523438, avg loss: 3.351443, ppl: 28.543901 +epoch: 0, batch: 20214, sum loss: 6277.143555, avg loss: 3.233974, ppl: 25.380318 +epoch: 0, batch: 20215, sum loss: 5243.936523, avg loss: 3.304308, ppl: 27.229691 +epoch: 0, batch: 20216, sum loss: 5534.226074, avg loss: 3.364271, ppl: 28.912411 +epoch: 0, batch: 20217, sum loss: 5542.887695, avg loss: 3.055616, ppl: 21.234264 +epoch: 0, batch: 20218, sum loss: 5671.875977, avg loss: 3.128448, ppl: 22.838507 +epoch: 0, batch: 20219, sum loss: 5800.505371, avg loss: 3.521861, ppl: 33.847363 +epoch: 0, batch: 20220, sum loss: 5818.308105, avg loss: 3.296492, ppl: 27.017691 +epoch: 0, batch: 20221, sum loss: 4316.533203, avg loss: 3.134737, ppl: 22.982597 +epoch: 0, batch: 20222, sum loss: 6413.607422, avg loss: 3.299181, ppl: 27.090443 +epoch: 0, batch: 20223, sum loss: 7689.701172, avg loss: 3.622092, ppl: 37.415760 +epoch: 0, batch: 20224, sum loss: 5964.680176, avg loss: 3.347183, ppl: 28.422562 +epoch: 0, batch: 20225, sum loss: 6391.894043, avg loss: 3.513960, ppl: 33.580997 +epoch: 0, batch: 20226, sum loss: 4598.416504, avg loss: 3.069704, ppl: 21.535522 +epoch: 0, batch: 20227, sum loss: 5648.118164, avg loss: 3.088091, ppl: 21.935167 +epoch: 0, batch: 20228, sum loss: 5735.788086, avg loss: 3.418229, ppl: 30.515320 +epoch: 0, batch: 20229, sum loss: 5732.676758, avg loss: 3.279564, ppl: 26.564178 +epoch: 0, batch: 20230, sum loss: 6252.258789, avg loss: 3.214529, ppl: 24.891565 +epoch: 0, batch: 20231, sum loss: 4835.838867, avg loss: 3.047158, ppl: 21.055410 +epoch: 0, batch: 20232, sum loss: 5927.341797, avg loss: 3.346890, ppl: 28.414221 +epoch: 0, batch: 20233, sum loss: 5095.443848, avg loss: 3.188638, ppl: 24.255375 +epoch: 0, batch: 20234, sum loss: 3969.663086, avg loss: 2.916725, ppl: 18.480667 +epoch: 0, batch: 20235, sum loss: 6315.911621, avg loss: 3.375688, ppl: 29.244392 +epoch: 0, batch: 20236, sum loss: 5285.667969, avg loss: 3.343243, ppl: 28.310801 +epoch: 0, batch: 20237, sum loss: 5694.895508, avg loss: 3.365777, ppl: 28.956001 +epoch: 0, batch: 20238, sum loss: 6096.561523, avg loss: 3.394522, ppl: 29.800411 +epoch: 0, batch: 20239, sum loss: 5706.750000, avg loss: 3.531405, ppl: 34.171936 +epoch: 0, batch: 20240, sum loss: 5603.516602, avg loss: 3.119998, ppl: 22.646339 +epoch: 0, batch: 20241, sum loss: 5881.499512, avg loss: 3.238711, ppl: 25.500834 +epoch: 0, batch: 20242, sum loss: 5054.531250, avg loss: 3.052253, ppl: 21.162975 +epoch: 0, batch: 20243, sum loss: 4847.330078, avg loss: 3.180663, ppl: 24.062698 +epoch: 0, batch: 20244, sum loss: 4718.431641, avg loss: 3.079916, ppl: 21.756580 +epoch: 0, batch: 20245, sum loss: 4432.990723, avg loss: 2.848966, ppl: 17.269909 +epoch: 0, batch: 20246, sum loss: 5584.623047, avg loss: 3.144495, ppl: 23.207952 +epoch: 0, batch: 20247, sum loss: 5648.279785, avg loss: 3.212901, ppl: 24.851072 +epoch: 0, batch: 20248, sum loss: 5635.590332, avg loss: 3.191161, ppl: 24.316641 +epoch: 0, batch: 20249, sum loss: 5054.270020, avg loss: 2.924925, ppl: 18.632826 +epoch: 0, batch: 20250, sum loss: 5877.564453, avg loss: 3.274409, ppl: 26.427610 +epoch: 0, batch: 20251, sum loss: 5330.657715, avg loss: 3.192011, ppl: 24.337313 +epoch: 0, batch: 20252, sum loss: 5494.038086, avg loss: 3.414567, ppl: 30.403774 +epoch: 0, batch: 20253, sum loss: 4896.917969, avg loss: 3.054846, ppl: 21.217915 +epoch: 0, batch: 20254, sum loss: 5132.897461, avg loss: 3.143232, ppl: 23.178650 +epoch: 0, batch: 20255, sum loss: 5216.792480, avg loss: 3.038318, ppl: 20.870119 +epoch: 0, batch: 20256, sum loss: 5138.702148, avg loss: 3.118145, ppl: 22.604399 +epoch: 0, batch: 20257, sum loss: 4909.131348, avg loss: 2.977035, ppl: 19.629534 +epoch: 0, batch: 20258, sum loss: 5243.766602, avg loss: 3.348510, ppl: 28.460291 +epoch: 0, batch: 20259, sum loss: 5837.458496, avg loss: 3.205633, ppl: 24.671122 +epoch: 0, batch: 20260, sum loss: 4946.458008, avg loss: 3.199520, ppl: 24.520760 +epoch: 0, batch: 20261, sum loss: 4709.928711, avg loss: 3.117094, ppl: 22.580660 +epoch: 0, batch: 20262, sum loss: 6184.132324, avg loss: 3.495835, ppl: 32.977821 +epoch: 0, batch: 20263, sum loss: 5484.115234, avg loss: 3.214604, ppl: 24.893442 +epoch: 0, batch: 20264, sum loss: 6200.421387, avg loss: 3.419979, ppl: 30.568769 +epoch: 0, batch: 20265, sum loss: 5239.764648, avg loss: 3.058823, ppl: 21.302477 +epoch: 0, batch: 20266, sum loss: 4312.312500, avg loss: 3.060548, ppl: 21.339254 +epoch: 0, batch: 20267, sum loss: 4131.744629, avg loss: 2.919961, ppl: 18.540560 +epoch: 0, batch: 20268, sum loss: 6261.988281, avg loss: 3.343293, ppl: 28.312218 +epoch: 0, batch: 20269, sum loss: 5261.063477, avg loss: 2.962311, ppl: 19.342613 +epoch: 0, batch: 20270, sum loss: 5836.567383, avg loss: 3.327575, ppl: 27.870686 +epoch: 0, batch: 20271, sum loss: 5384.195312, avg loss: 3.313351, ppl: 27.477045 +epoch: 0, batch: 20272, sum loss: 5148.372559, avg loss: 3.457604, ppl: 31.740839 +epoch: 0, batch: 20273, sum loss: 5461.064941, avg loss: 2.881829, ppl: 17.846878 +epoch: 0, batch: 20274, sum loss: 4871.222656, avg loss: 2.979341, ppl: 19.674847 +epoch: 0, batch: 20275, sum loss: 5558.277344, avg loss: 3.467422, ppl: 32.053993 +epoch: 0, batch: 20276, sum loss: 5881.631836, avg loss: 3.459784, ppl: 31.810091 +epoch: 0, batch: 20277, sum loss: 6066.576660, avg loss: 3.439102, ppl: 31.158978 +epoch: 0, batch: 20278, sum loss: 4790.715820, avg loss: 3.174762, ppl: 23.921125 +epoch: 0, batch: 20279, sum loss: 4444.691406, avg loss: 2.905027, ppl: 18.265739 +epoch: 0, batch: 20280, sum loss: 5569.775879, avg loss: 3.381770, ppl: 29.422815 +epoch: 0, batch: 20281, sum loss: 5045.023926, avg loss: 3.061301, ppl: 21.355322 +epoch: 0, batch: 20282, sum loss: 4361.513184, avg loss: 3.022532, ppl: 20.543232 +epoch: 0, batch: 20283, sum loss: 5215.358398, avg loss: 3.317658, ppl: 27.595644 +epoch: 0, batch: 20284, sum loss: 6203.886719, avg loss: 3.609009, ppl: 36.929447 +epoch: 0, batch: 20285, sum loss: 5636.468750, avg loss: 3.307787, ppl: 27.324587 +epoch: 0, batch: 20286, sum loss: 5874.785156, avg loss: 3.419549, ppl: 30.555632 +epoch: 0, batch: 20287, sum loss: 5571.969727, avg loss: 3.422586, ppl: 30.648561 +epoch: 0, batch: 20288, sum loss: 5598.341797, avg loss: 3.447255, ppl: 31.414032 +epoch: 0, batch: 20289, sum loss: 5935.525879, avg loss: 3.102732, ppl: 22.258673 +epoch: 0, batch: 20290, sum loss: 4991.473633, avg loss: 3.264535, ppl: 26.167938 +epoch: 0, batch: 20291, sum loss: 5926.383789, avg loss: 3.502591, ppl: 33.201363 +epoch: 0, batch: 20292, sum loss: 5219.467285, avg loss: 3.229868, ppl: 25.276331 +epoch: 0, batch: 20293, sum loss: 6593.591797, avg loss: 3.288574, ppl: 26.804625 +epoch: 0, batch: 20294, sum loss: 4975.410645, avg loss: 3.092238, ppl: 22.026312 +epoch: 0, batch: 20295, sum loss: 4896.753906, avg loss: 3.103139, ppl: 22.267750 +epoch: 0, batch: 20296, sum loss: 5779.626465, avg loss: 3.419897, ppl: 30.566277 +epoch: 0, batch: 20297, sum loss: 4798.484863, avg loss: 2.991574, ppl: 19.917013 +epoch: 0, batch: 20298, sum loss: 6848.484863, avg loss: 3.353812, ppl: 28.611599 +epoch: 0, batch: 20299, sum loss: 6186.374023, avg loss: 3.510996, ppl: 33.481586 +epoch: 0, batch: 20300, sum loss: 4996.381836, avg loss: 3.134493, ppl: 22.976980 +epoch: 0, batch: 20301, sum loss: 6557.555176, avg loss: 3.626966, ppl: 37.598579 +epoch: 0, batch: 20302, sum loss: 5512.748047, avg loss: 3.369651, ppl: 29.068369 +epoch: 0, batch: 20303, sum loss: 5216.996094, avg loss: 3.293558, ppl: 26.938543 +epoch: 0, batch: 20304, sum loss: 5729.042480, avg loss: 3.352278, ppl: 28.567730 +epoch: 0, batch: 20305, sum loss: 5638.423828, avg loss: 3.192765, ppl: 24.355690 +epoch: 0, batch: 20306, sum loss: 5747.336426, avg loss: 3.205430, ppl: 24.666111 +epoch: 0, batch: 20307, sum loss: 7447.040527, avg loss: 3.573436, ppl: 35.638840 +epoch: 0, batch: 20308, sum loss: 5075.046387, avg loss: 3.387881, ppl: 29.603165 +epoch: 0, batch: 20309, sum loss: 4267.420410, avg loss: 3.225564, ppl: 25.167753 +epoch: 0, batch: 20310, sum loss: 5334.825684, avg loss: 3.241085, ppl: 25.561441 +epoch: 0, batch: 20311, sum loss: 5029.171875, avg loss: 3.187054, ppl: 24.216990 +epoch: 0, batch: 20312, sum loss: 5479.161133, avg loss: 3.292765, ppl: 26.917191 +epoch: 0, batch: 20313, sum loss: 6864.202637, avg loss: 3.599477, ppl: 36.579102 +epoch: 0, batch: 20314, sum loss: 5887.583008, avg loss: 3.494115, ppl: 32.921127 +epoch: 0, batch: 20315, sum loss: 5018.131348, avg loss: 3.307931, ppl: 27.328524 +epoch: 0, batch: 20316, sum loss: 4626.976562, avg loss: 3.004530, ppl: 20.176735 +epoch: 0, batch: 20317, sum loss: 5246.944824, avg loss: 3.215040, ppl: 24.904282 +epoch: 0, batch: 20318, sum loss: 4509.648438, avg loss: 2.815012, ppl: 16.693367 +epoch: 0, batch: 20319, sum loss: 4899.964844, avg loss: 3.151103, ppl: 23.361818 +epoch: 0, batch: 20320, sum loss: 6697.125977, avg loss: 3.543453, ppl: 34.586140 +epoch: 0, batch: 20321, sum loss: 5112.220703, avg loss: 3.155692, ppl: 23.469269 +epoch: 0, batch: 20322, sum loss: 6566.210938, avg loss: 3.437807, ppl: 31.118635 +epoch: 0, batch: 20323, sum loss: 4831.480957, avg loss: 3.189096, ppl: 24.266491 +epoch: 0, batch: 20324, sum loss: 5603.570312, avg loss: 3.288480, ppl: 26.802101 +epoch: 0, batch: 20325, sum loss: 6381.921875, avg loss: 3.535691, ppl: 34.318714 +epoch: 0, batch: 20326, sum loss: 5191.887695, avg loss: 3.142789, ppl: 23.168402 +epoch: 0, batch: 20327, sum loss: 5152.427734, avg loss: 3.096411, ppl: 22.118425 +epoch: 0, batch: 20328, sum loss: 5498.500977, avg loss: 3.381612, ppl: 29.418150 +epoch: 0, batch: 20329, sum loss: 6408.744629, avg loss: 3.373024, ppl: 29.166586 +epoch: 0, batch: 20330, sum loss: 5660.817871, avg loss: 3.424572, ppl: 30.709505 +epoch: 0, batch: 20331, sum loss: 6252.201172, avg loss: 3.471516, ppl: 32.185509 +epoch: 0, batch: 20332, sum loss: 4831.541992, avg loss: 3.131265, ppl: 22.902937 +epoch: 0, batch: 20333, sum loss: 5349.456543, avg loss: 3.251949, ppl: 25.840656 +epoch: 0, batch: 20334, sum loss: 5896.999023, avg loss: 3.208378, ppl: 24.738930 +epoch: 0, batch: 20335, sum loss: 4840.853027, avg loss: 3.038828, ppl: 20.880760 +epoch: 0, batch: 20336, sum loss: 5744.818848, avg loss: 3.338070, ppl: 28.164721 +epoch: 0, batch: 20337, sum loss: 5218.925293, avg loss: 3.004563, ppl: 20.177389 +epoch: 0, batch: 20338, sum loss: 6675.178711, avg loss: 3.407442, ppl: 30.187927 +epoch: 0, batch: 20339, sum loss: 5601.871582, avg loss: 3.342406, ppl: 28.287090 +epoch: 0, batch: 20340, sum loss: 5073.224609, avg loss: 3.290029, ppl: 26.843637 +epoch: 0, batch: 20341, sum loss: 5629.215820, avg loss: 3.116952, ppl: 22.577469 +epoch: 0, batch: 20342, sum loss: 4978.358398, avg loss: 3.193302, ppl: 24.368771 +epoch: 0, batch: 20343, sum loss: 6553.218750, avg loss: 3.431005, ppl: 30.907675 +epoch: 0, batch: 20344, sum loss: 6032.650879, avg loss: 3.292932, ppl: 26.921677 +epoch: 0, batch: 20345, sum loss: 5924.866211, avg loss: 3.341718, ppl: 28.267654 +epoch: 0, batch: 20346, sum loss: 5897.239746, avg loss: 3.261748, ppl: 26.095100 +epoch: 0, batch: 20347, sum loss: 5004.520508, avg loss: 2.998514, ppl: 20.055719 +epoch: 0, batch: 20348, sum loss: 4964.538574, avg loss: 3.230019, ppl: 25.280128 +epoch: 0, batch: 20349, sum loss: 5661.979492, avg loss: 3.229880, ppl: 25.276627 +epoch: 0, batch: 20350, sum loss: 4706.316895, avg loss: 2.914128, ppl: 18.432734 +epoch: 0, batch: 20351, sum loss: 6028.273438, avg loss: 3.375293, ppl: 29.232855 +epoch: 0, batch: 20352, sum loss: 6008.083984, avg loss: 3.343397, ppl: 28.315147 +epoch: 0, batch: 20353, sum loss: 5128.155762, avg loss: 3.293613, ppl: 26.940027 +epoch: 0, batch: 20354, sum loss: 5705.493652, avg loss: 3.166201, ppl: 23.717209 +epoch: 0, batch: 20355, sum loss: 4352.854004, avg loss: 3.089322, ppl: 21.962173 +epoch: 0, batch: 20356, sum loss: 5400.329102, avg loss: 3.265012, ppl: 26.180414 +epoch: 0, batch: 20357, sum loss: 5473.980469, avg loss: 3.117301, ppl: 22.585339 +epoch: 0, batch: 20358, sum loss: 5747.187500, avg loss: 3.388672, ppl: 29.626577 +epoch: 0, batch: 20359, sum loss: 5302.808105, avg loss: 3.126656, ppl: 22.797613 +epoch: 0, batch: 20360, sum loss: 5479.276855, avg loss: 3.215538, ppl: 24.916695 +epoch: 0, batch: 20361, sum loss: 5477.884277, avg loss: 3.197831, ppl: 24.479374 +epoch: 0, batch: 20362, sum loss: 4952.658691, avg loss: 3.049667, ppl: 21.108307 +epoch: 0, batch: 20363, sum loss: 5704.112305, avg loss: 3.349449, ppl: 28.487043 +epoch: 0, batch: 20364, sum loss: 4692.916016, avg loss: 3.149608, ppl: 23.326921 +epoch: 0, batch: 20365, sum loss: 5551.533203, avg loss: 3.188704, ppl: 24.256964 +epoch: 0, batch: 20366, sum loss: 5990.336914, avg loss: 3.382460, ppl: 29.443115 +epoch: 0, batch: 20367, sum loss: 6008.638672, avg loss: 3.246158, ppl: 25.691446 +epoch: 0, batch: 20368, sum loss: 4997.499512, avg loss: 3.162974, ppl: 23.640808 +epoch: 0, batch: 20369, sum loss: 5172.844238, avg loss: 2.954223, ppl: 19.186808 +epoch: 0, batch: 20370, sum loss: 5023.833008, avg loss: 3.089688, ppl: 21.970224 +epoch: 0, batch: 20371, sum loss: 5901.424805, avg loss: 3.107649, ppl: 22.368385 +epoch: 0, batch: 20372, sum loss: 6592.927734, avg loss: 3.602693, ppl: 36.696922 +epoch: 0, batch: 20373, sum loss: 4381.879395, avg loss: 3.038751, ppl: 20.879156 +epoch: 0, batch: 20374, sum loss: 6004.829102, avg loss: 3.279536, ppl: 26.563433 +epoch: 0, batch: 20375, sum loss: 5154.457520, avg loss: 3.082810, ppl: 21.819622 +epoch: 0, batch: 20376, sum loss: 5266.150391, avg loss: 3.448690, ppl: 31.459167 +epoch: 0, batch: 20377, sum loss: 5169.409668, avg loss: 3.165591, ppl: 23.702744 +epoch: 0, batch: 20378, sum loss: 4195.172852, avg loss: 2.907258, ppl: 18.306524 +epoch: 0, batch: 20379, sum loss: 5273.970215, avg loss: 3.329527, ppl: 27.925121 +epoch: 0, batch: 20380, sum loss: 6209.370117, avg loss: 3.389394, ppl: 29.647989 +epoch: 0, batch: 20381, sum loss: 6243.303223, avg loss: 3.308587, ppl: 27.346447 +epoch: 0, batch: 20382, sum loss: 5507.136230, avg loss: 3.264456, ppl: 26.165861 +epoch: 0, batch: 20383, sum loss: 5902.876465, avg loss: 3.284851, ppl: 26.704994 +epoch: 0, batch: 20384, sum loss: 5764.760254, avg loss: 3.476936, ppl: 32.360428 +epoch: 0, batch: 20385, sum loss: 6526.460938, avg loss: 3.376338, ppl: 29.263412 +epoch: 0, batch: 20386, sum loss: 5300.195801, avg loss: 3.386707, ppl: 29.568411 +epoch: 0, batch: 20387, sum loss: 4506.938965, avg loss: 3.176137, ppl: 23.954050 +epoch: 0, batch: 20388, sum loss: 5948.160645, avg loss: 3.446211, ppl: 31.381266 +epoch: 0, batch: 20389, sum loss: 6266.790039, avg loss: 3.091658, ppl: 22.013538 +epoch: 0, batch: 20390, sum loss: 4732.523438, avg loss: 2.943111, ppl: 18.974792 +epoch: 0, batch: 20391, sum loss: 4503.645508, avg loss: 3.121029, ppl: 22.669703 +epoch: 0, batch: 20392, sum loss: 4265.274414, avg loss: 3.018595, ppl: 20.462517 +epoch: 0, batch: 20393, sum loss: 5938.621582, avg loss: 3.385759, ppl: 29.540409 +epoch: 0, batch: 20394, sum loss: 5451.044434, avg loss: 3.375260, ppl: 29.231873 +epoch: 0, batch: 20395, sum loss: 6321.481445, avg loss: 3.429995, ppl: 30.876497 +epoch: 0, batch: 20396, sum loss: 5807.755859, avg loss: 3.448786, ppl: 31.462183 +epoch: 0, batch: 20397, sum loss: 5738.383789, avg loss: 3.236539, ppl: 25.445505 +epoch: 0, batch: 20398, sum loss: 5660.356445, avg loss: 3.333543, ppl: 28.037512 +epoch: 0, batch: 20399, sum loss: 4203.581543, avg loss: 3.050495, ppl: 21.125792 +epoch: 0, batch: 20400, sum loss: 7022.878418, avg loss: 3.399264, ppl: 29.942043 +epoch: 0, batch: 20401, sum loss: 5174.680664, avg loss: 3.060131, ppl: 21.330343 +epoch: 0, batch: 20402, sum loss: 5529.350098, avg loss: 3.190623, ppl: 24.303564 +epoch: 0, batch: 20403, sum loss: 5229.951660, avg loss: 3.314291, ppl: 27.502893 +epoch: 0, batch: 20404, sum loss: 4703.313477, avg loss: 3.125125, ppl: 22.762749 +epoch: 0, batch: 20405, sum loss: 5186.699219, avg loss: 3.178125, ppl: 24.001699 +epoch: 0, batch: 20406, sum loss: 6511.068359, avg loss: 3.056840, ppl: 21.260262 +epoch: 0, batch: 20407, sum loss: 6344.669922, avg loss: 3.282292, ppl: 26.636751 +epoch: 0, batch: 20408, sum loss: 4404.631348, avg loss: 3.161975, ppl: 23.617197 +epoch: 0, batch: 20409, sum loss: 5249.353516, avg loss: 3.214546, ppl: 24.891987 +epoch: 0, batch: 20410, sum loss: 5669.608887, avg loss: 3.294369, ppl: 26.960390 +epoch: 0, batch: 20411, sum loss: 4407.073242, avg loss: 2.903210, ppl: 18.232586 +epoch: 0, batch: 20412, sum loss: 5595.149902, avg loss: 3.223013, ppl: 25.103636 +epoch: 0, batch: 20413, sum loss: 5742.208008, avg loss: 3.242353, ppl: 25.593885 +epoch: 0, batch: 20414, sum loss: 5639.765137, avg loss: 3.200775, ppl: 24.551548 +epoch: 0, batch: 20415, sum loss: 5288.855957, avg loss: 3.240721, ppl: 25.552132 +epoch: 0, batch: 20416, sum loss: 5597.618164, avg loss: 3.329933, ppl: 27.936481 +epoch: 0, batch: 20417, sum loss: 4494.666016, avg loss: 2.990463, ppl: 19.894892 +epoch: 0, batch: 20418, sum loss: 4211.628418, avg loss: 2.963848, ppl: 19.372375 +epoch: 0, batch: 20419, sum loss: 4983.686035, avg loss: 3.283061, ppl: 26.657234 +epoch: 0, batch: 20420, sum loss: 5315.022461, avg loss: 3.113663, ppl: 22.503323 +epoch: 0, batch: 20421, sum loss: 6296.783203, avg loss: 3.529587, ppl: 34.109886 +epoch: 0, batch: 20422, sum loss: 6678.068848, avg loss: 3.479973, ppl: 32.458855 +epoch: 0, batch: 20423, sum loss: 4838.721191, avg loss: 3.060544, ppl: 21.339172 +epoch: 0, batch: 20424, sum loss: 5525.510742, avg loss: 3.152031, ppl: 23.383518 +epoch: 0, batch: 20425, sum loss: 4404.031250, avg loss: 2.983761, ppl: 19.762003 +epoch: 0, batch: 20426, sum loss: 6121.736328, avg loss: 3.328840, ppl: 27.905952 +epoch: 0, batch: 20427, sum loss: 6124.828125, avg loss: 3.509930, ppl: 33.445930 +epoch: 0, batch: 20428, sum loss: 5769.407227, avg loss: 3.348466, ppl: 28.459049 +epoch: 0, batch: 20429, sum loss: 4880.750488, avg loss: 3.439571, ppl: 31.173571 +epoch: 0, batch: 20430, sum loss: 5221.280273, avg loss: 3.205206, ppl: 24.660572 +epoch: 0, batch: 20431, sum loss: 5722.327148, avg loss: 3.273643, ppl: 26.407360 +epoch: 0, batch: 20432, sum loss: 5353.510742, avg loss: 3.147273, ppl: 23.272505 +epoch: 0, batch: 20433, sum loss: 5135.577148, avg loss: 3.082579, ppl: 21.814592 +epoch: 0, batch: 20434, sum loss: 5527.548340, avg loss: 3.204376, ppl: 24.640120 +epoch: 0, batch: 20435, sum loss: 5203.376465, avg loss: 3.166997, ppl: 23.736103 +epoch: 0, batch: 20436, sum loss: 5108.587891, avg loss: 3.175008, ppl: 23.927013 +epoch: 0, batch: 20437, sum loss: 4569.660156, avg loss: 2.953885, ppl: 19.180326 +epoch: 0, batch: 20438, sum loss: 5415.769043, avg loss: 3.256626, ppl: 25.961798 +epoch: 0, batch: 20439, sum loss: 5472.485352, avg loss: 3.139693, ppl: 23.096781 +epoch: 0, batch: 20440, sum loss: 5602.599121, avg loss: 3.285982, ppl: 26.735229 +epoch: 0, batch: 20441, sum loss: 5921.160156, avg loss: 3.317177, ppl: 27.582371 +epoch: 0, batch: 20442, sum loss: 6192.225098, avg loss: 3.424903, ppl: 30.719669 +epoch: 0, batch: 20443, sum loss: 5824.999512, avg loss: 3.234314, ppl: 25.388948 +epoch: 0, batch: 20444, sum loss: 5490.250000, avg loss: 3.098335, ppl: 22.161028 +epoch: 0, batch: 20445, sum loss: 6440.990234, avg loss: 3.496737, ppl: 33.007587 +epoch: 0, batch: 20446, sum loss: 7285.019531, avg loss: 3.485655, ppl: 32.643810 +epoch: 0, batch: 20447, sum loss: 5207.087891, avg loss: 3.090260, ppl: 21.982788 +epoch: 0, batch: 20448, sum loss: 6011.084961, avg loss: 3.371332, ppl: 29.117290 +epoch: 0, batch: 20449, sum loss: 5669.153320, avg loss: 3.177777, ppl: 23.993347 +epoch: 0, batch: 20450, sum loss: 5611.307617, avg loss: 3.368132, ppl: 29.024248 +epoch: 0, batch: 20451, sum loss: 4974.705566, avg loss: 3.251442, ppl: 25.827549 +epoch: 0, batch: 20452, sum loss: 5581.405273, avg loss: 3.155119, ppl: 23.455828 +epoch: 0, batch: 20453, sum loss: 4675.560547, avg loss: 3.169872, ppl: 23.804426 +epoch: 0, batch: 20454, sum loss: 4665.365234, avg loss: 3.184550, ppl: 24.156404 +epoch: 0, batch: 20455, sum loss: 5184.919434, avg loss: 3.308819, ppl: 27.352810 +epoch: 0, batch: 20456, sum loss: 5446.999023, avg loss: 3.125071, ppl: 22.761513 +epoch: 0, batch: 20457, sum loss: 5808.832031, avg loss: 3.172492, ppl: 23.866882 +epoch: 0, batch: 20458, sum loss: 4653.475098, avg loss: 3.087907, ppl: 21.931120 +epoch: 0, batch: 20459, sum loss: 6016.456543, avg loss: 3.361149, ppl: 28.822285 +epoch: 0, batch: 20460, sum loss: 5836.454102, avg loss: 3.578451, ppl: 35.818031 +epoch: 0, batch: 20461, sum loss: 4515.189941, avg loss: 3.124699, ppl: 22.753042 +epoch: 0, batch: 20462, sum loss: 4952.448730, avg loss: 3.215876, ppl: 24.925108 +epoch: 0, batch: 20463, sum loss: 6745.160645, avg loss: 3.704097, ppl: 40.613358 +epoch: 0, batch: 20464, sum loss: 4651.709473, avg loss: 3.074494, ppl: 21.638933 +epoch: 0, batch: 20465, sum loss: 6274.048828, avg loss: 3.307353, ppl: 27.312733 +epoch: 0, batch: 20466, sum loss: 4168.160156, avg loss: 2.962445, ppl: 19.345213 +epoch: 0, batch: 20467, sum loss: 5276.684082, avg loss: 3.190257, ppl: 24.294661 +epoch: 0, batch: 20468, sum loss: 5091.731934, avg loss: 3.241077, ppl: 25.561241 +epoch: 0, batch: 20469, sum loss: 5302.717285, avg loss: 3.023214, ppl: 20.557253 +epoch: 0, batch: 20470, sum loss: 5281.690430, avg loss: 3.177912, ppl: 23.996607 +epoch: 0, batch: 20471, sum loss: 5484.424805, avg loss: 3.211021, ppl: 24.804409 +epoch: 0, batch: 20472, sum loss: 5496.996094, avg loss: 3.146535, ppl: 23.255337 +epoch: 0, batch: 20473, sum loss: 4946.290039, avg loss: 3.317431, ppl: 27.589396 +epoch: 0, batch: 20474, sum loss: 5353.581055, avg loss: 2.992499, ppl: 19.935442 +epoch: 0, batch: 20475, sum loss: 6126.256836, avg loss: 3.195752, ppl: 24.428547 +epoch: 0, batch: 20476, sum loss: 5148.305176, avg loss: 3.215681, ppl: 24.920259 +epoch: 0, batch: 20477, sum loss: 6006.109863, avg loss: 3.545519, ppl: 34.657681 +epoch: 0, batch: 20478, sum loss: 5869.086914, avg loss: 3.140228, ppl: 23.109142 +epoch: 0, batch: 20479, sum loss: 5173.788574, avg loss: 3.282861, ppl: 26.651909 +epoch: 0, batch: 20480, sum loss: 4258.001953, avg loss: 3.041430, ppl: 20.935160 +epoch: 0, batch: 20481, sum loss: 5592.847168, avg loss: 3.361086, ppl: 28.820477 +epoch: 0, batch: 20482, sum loss: 5027.078613, avg loss: 2.844980, ppl: 17.201206 +epoch: 0, batch: 20483, sum loss: 3997.185547, avg loss: 2.768134, ppl: 15.928881 +epoch: 0, batch: 20484, sum loss: 5788.060547, avg loss: 3.164604, ppl: 23.679369 +epoch: 0, batch: 20485, sum loss: 5789.288086, avg loss: 3.154926, ppl: 23.451292 +epoch: 0, batch: 20486, sum loss: 4902.092773, avg loss: 3.132328, ppl: 22.927282 +epoch: 0, batch: 20487, sum loss: 5329.348633, avg loss: 3.160942, ppl: 23.592817 +epoch: 0, batch: 20488, sum loss: 4657.306641, avg loss: 3.444753, ppl: 31.335556 +epoch: 0, batch: 20489, sum loss: 6740.928711, avg loss: 3.458660, ppl: 31.774382 +epoch: 0, batch: 20490, sum loss: 5856.875977, avg loss: 3.110396, ppl: 22.429928 +epoch: 0, batch: 20491, sum loss: 4927.955078, avg loss: 3.041948, ppl: 20.945997 +epoch: 0, batch: 20492, sum loss: 4466.322266, avg loss: 2.721708, ppl: 15.206266 +epoch: 0, batch: 20493, sum loss: 6385.994629, avg loss: 3.587637, ppl: 36.148571 +epoch: 0, batch: 20494, sum loss: 6622.154297, avg loss: 3.676932, ppl: 39.524940 +epoch: 0, batch: 20495, sum loss: 5678.458984, avg loss: 3.161726, ppl: 23.611303 +epoch: 0, batch: 20496, sum loss: 5829.526855, avg loss: 3.178586, ppl: 24.012775 +epoch: 0, batch: 20497, sum loss: 6209.339844, avg loss: 3.239092, ppl: 25.510557 +epoch: 0, batch: 20498, sum loss: 5648.993164, avg loss: 3.366504, ppl: 28.977037 +epoch: 0, batch: 20499, sum loss: 5295.368652, avg loss: 3.052086, ppl: 21.159430 +epoch: 0, batch: 20500, sum loss: 5317.796875, avg loss: 3.141050, ppl: 23.128130 +epoch: 0, batch: 20501, sum loss: 4712.210449, avg loss: 3.192554, ppl: 24.350552 +epoch: 0, batch: 20502, sum loss: 5907.377930, avg loss: 3.020132, ppl: 20.493994 +epoch: 0, batch: 20503, sum loss: 4638.299805, avg loss: 2.926372, ppl: 18.659811 +epoch: 0, batch: 20504, sum loss: 5206.862793, avg loss: 2.950064, ppl: 19.107176 +epoch: 0, batch: 20505, sum loss: 6116.600586, avg loss: 3.180759, ppl: 24.065022 +epoch: 0, batch: 20506, sum loss: 6076.333008, avg loss: 3.316776, ppl: 27.571312 +epoch: 0, batch: 20507, sum loss: 4977.394043, avg loss: 3.276757, ppl: 26.489727 +epoch: 0, batch: 20508, sum loss: 6479.965820, avg loss: 3.421313, ppl: 30.609566 +epoch: 0, batch: 20509, sum loss: 5148.395508, avg loss: 3.211725, ppl: 24.821873 +epoch: 0, batch: 20510, sum loss: 5393.415527, avg loss: 3.441874, ppl: 31.245443 +epoch: 0, batch: 20511, sum loss: 5366.940918, avg loss: 3.429355, ppl: 30.856739 +epoch: 0, batch: 20512, sum loss: 4911.075195, avg loss: 3.039032, ppl: 20.885012 +epoch: 0, batch: 20513, sum loss: 4960.033203, avg loss: 2.945388, ppl: 19.018042 +epoch: 0, batch: 20514, sum loss: 5138.580078, avg loss: 3.595927, ppl: 36.449482 +epoch: 0, batch: 20515, sum loss: 5461.799805, avg loss: 3.158936, ppl: 23.545525 +epoch: 0, batch: 20516, sum loss: 4870.100586, avg loss: 3.424824, ppl: 30.717253 +epoch: 0, batch: 20517, sum loss: 4840.677734, avg loss: 3.143297, ppl: 23.180170 +epoch: 0, batch: 20518, sum loss: 6786.622070, avg loss: 3.543928, ppl: 34.602570 +epoch: 0, batch: 20519, sum loss: 6197.146973, avg loss: 3.277180, ppl: 26.500927 +epoch: 0, batch: 20520, sum loss: 5068.585938, avg loss: 3.287021, ppl: 26.763010 +epoch: 0, batch: 20521, sum loss: 5598.217773, avg loss: 3.285339, ppl: 26.718042 +epoch: 0, batch: 20522, sum loss: 5980.780762, avg loss: 3.245133, ppl: 25.665134 +epoch: 0, batch: 20523, sum loss: 6009.612793, avg loss: 3.340530, ppl: 28.234077 +epoch: 0, batch: 20524, sum loss: 6378.677734, avg loss: 3.427554, ppl: 30.801207 +epoch: 0, batch: 20525, sum loss: 6986.119141, avg loss: 3.510612, ppl: 33.468761 +epoch: 0, batch: 20526, sum loss: 5026.724609, avg loss: 3.613749, ppl: 37.104900 +epoch: 0, batch: 20527, sum loss: 5906.926758, avg loss: 3.350497, ppl: 28.516911 +epoch: 0, batch: 20528, sum loss: 4623.855469, avg loss: 3.084627, ppl: 21.859310 +epoch: 0, batch: 20529, sum loss: 6159.555664, avg loss: 3.290361, ppl: 26.852560 +epoch: 0, batch: 20530, sum loss: 4990.897461, avg loss: 3.264158, ppl: 26.158077 +epoch: 0, batch: 20531, sum loss: 6637.027832, avg loss: 3.408849, ppl: 30.230421 +epoch: 0, batch: 20532, sum loss: 5274.892578, avg loss: 3.045550, ppl: 21.021587 +epoch: 0, batch: 20533, sum loss: 5614.818848, avg loss: 3.254968, ppl: 25.918777 +epoch: 0, batch: 20534, sum loss: 5409.967773, avg loss: 3.100268, ppl: 22.203899 +epoch: 0, batch: 20535, sum loss: 6007.016602, avg loss: 3.496517, ppl: 33.000317 +epoch: 0, batch: 20536, sum loss: 5576.933105, avg loss: 3.347499, ppl: 28.431528 +epoch: 0, batch: 20537, sum loss: 6820.450195, avg loss: 3.460401, ppl: 31.829733 +epoch: 0, batch: 20538, sum loss: 4781.384766, avg loss: 2.992106, ppl: 19.927601 +epoch: 0, batch: 20539, sum loss: 5470.332520, avg loss: 3.020615, ppl: 20.503889 +epoch: 0, batch: 20540, sum loss: 6081.217773, avg loss: 3.363505, ppl: 28.890285 +epoch: 0, batch: 20541, sum loss: 5506.097168, avg loss: 3.155356, ppl: 23.461397 +epoch: 0, batch: 20542, sum loss: 3994.450684, avg loss: 2.838984, ppl: 17.098389 +epoch: 0, batch: 20543, sum loss: 7035.557617, avg loss: 3.683538, ppl: 39.786911 +epoch: 0, batch: 20544, sum loss: 7852.817871, avg loss: 3.470092, ppl: 32.139702 +epoch: 0, batch: 20545, sum loss: 4011.291992, avg loss: 2.732488, ppl: 15.371078 +epoch: 0, batch: 20546, sum loss: 5944.196289, avg loss: 3.242878, ppl: 25.607325 +epoch: 0, batch: 20547, sum loss: 5654.080078, avg loss: 3.435043, ppl: 31.032736 +epoch: 0, batch: 20548, sum loss: 5101.388184, avg loss: 3.139316, ppl: 23.088066 +epoch: 0, batch: 20549, sum loss: 5749.779297, avg loss: 3.217560, ppl: 24.967121 +epoch: 0, batch: 20550, sum loss: 6522.520508, avg loss: 3.452896, ppl: 31.591753 +epoch: 0, batch: 20551, sum loss: 6872.194336, avg loss: 3.363776, ppl: 28.898104 +epoch: 0, batch: 20552, sum loss: 6207.316895, avg loss: 3.485299, ppl: 32.632179 +epoch: 0, batch: 20553, sum loss: 4622.771973, avg loss: 3.285552, ppl: 26.723740 +epoch: 0, batch: 20554, sum loss: 4839.334961, avg loss: 3.328291, ppl: 27.890635 +epoch: 0, batch: 20555, sum loss: 5274.906250, avg loss: 3.191111, ppl: 24.315424 +epoch: 0, batch: 20556, sum loss: 6041.050781, avg loss: 3.288542, ppl: 26.803749 +epoch: 0, batch: 20557, sum loss: 6100.805664, avg loss: 3.444837, ppl: 31.338163 +epoch: 0, batch: 20558, sum loss: 4917.438965, avg loss: 3.214012, ppl: 24.878710 +epoch: 0, batch: 20559, sum loss: 5303.034180, avg loss: 3.196524, ppl: 24.447412 +epoch: 0, batch: 20560, sum loss: 5521.446289, avg loss: 3.354463, ppl: 28.630236 +epoch: 0, batch: 20561, sum loss: 6613.592285, avg loss: 3.519741, ppl: 33.775665 +epoch: 0, batch: 20562, sum loss: 6135.778809, avg loss: 3.378733, ppl: 29.333572 +epoch: 0, batch: 20563, sum loss: 4707.723633, avg loss: 3.037241, ppl: 20.847651 +epoch: 0, batch: 20564, sum loss: 5228.633789, avg loss: 3.219602, ppl: 25.018164 +epoch: 0, batch: 20565, sum loss: 5051.014648, avg loss: 3.121764, ppl: 22.686367 +epoch: 0, batch: 20566, sum loss: 5247.594727, avg loss: 3.119854, ppl: 22.643078 +epoch: 0, batch: 20567, sum loss: 5377.638672, avg loss: 3.265111, ppl: 26.183029 +epoch: 0, batch: 20568, sum loss: 6903.260742, avg loss: 3.403975, ppl: 30.083437 +epoch: 0, batch: 20569, sum loss: 4981.730469, avg loss: 3.249661, ppl: 25.781605 +epoch: 0, batch: 20570, sum loss: 5637.372070, avg loss: 3.365596, ppl: 28.950735 +epoch: 0, batch: 20571, sum loss: 5960.122070, avg loss: 3.283814, ppl: 26.677324 +epoch: 0, batch: 20572, sum loss: 5638.570312, avg loss: 3.350309, ppl: 28.511547 +epoch: 0, batch: 20573, sum loss: 5274.256348, avg loss: 3.243700, ppl: 25.628372 +epoch: 0, batch: 20574, sum loss: 5314.956055, avg loss: 3.201781, ppl: 24.576256 +epoch: 0, batch: 20575, sum loss: 6601.223633, avg loss: 3.665310, ppl: 39.068253 +epoch: 0, batch: 20576, sum loss: 5235.615234, avg loss: 3.395341, ppl: 29.824812 +epoch: 0, batch: 20577, sum loss: 5997.206543, avg loss: 3.318875, ppl: 27.629238 +epoch: 0, batch: 20578, sum loss: 6051.729004, avg loss: 3.312386, ppl: 27.450539 +epoch: 0, batch: 20579, sum loss: 5809.780273, avg loss: 3.247502, ppl: 25.725986 +epoch: 0, batch: 20580, sum loss: 5527.619141, avg loss: 3.105404, ppl: 22.318237 +epoch: 0, batch: 20581, sum loss: 5045.445312, avg loss: 2.964422, ppl: 19.383488 +epoch: 0, batch: 20582, sum loss: 5206.783203, avg loss: 3.240064, ppl: 25.535366 +epoch: 0, batch: 20583, sum loss: 5934.098145, avg loss: 3.362095, ppl: 28.849577 +epoch: 0, batch: 20584, sum loss: 5663.309570, avg loss: 3.137568, ppl: 23.047735 +epoch: 0, batch: 20585, sum loss: 5512.799805, avg loss: 3.246643, ppl: 25.703909 +epoch: 0, batch: 20586, sum loss: 5532.919434, avg loss: 3.388193, ppl: 29.612398 +epoch: 0, batch: 20587, sum loss: 6146.760742, avg loss: 3.390381, ppl: 29.677259 +epoch: 0, batch: 20588, sum loss: 5160.267578, avg loss: 3.154198, ppl: 23.434235 +epoch: 0, batch: 20589, sum loss: 5598.623047, avg loss: 3.175623, ppl: 23.941723 +epoch: 0, batch: 20590, sum loss: 5277.036133, avg loss: 3.177024, ppl: 23.975288 +epoch: 0, batch: 20591, sum loss: 5691.443359, avg loss: 3.320562, ppl: 27.675903 +epoch: 0, batch: 20592, sum loss: 4742.708984, avg loss: 3.266329, ppl: 26.214916 +epoch: 0, batch: 20593, sum loss: 6116.274414, avg loss: 3.428405, ppl: 30.827435 +epoch: 0, batch: 20594, sum loss: 6180.253418, avg loss: 3.377188, ppl: 29.288288 +epoch: 0, batch: 20595, sum loss: 5243.444824, avg loss: 3.236695, ppl: 25.449461 +epoch: 0, batch: 20596, sum loss: 6400.490234, avg loss: 3.323204, ppl: 27.749111 +epoch: 0, batch: 20597, sum loss: 5111.808105, avg loss: 3.258004, ppl: 25.997593 +epoch: 0, batch: 20598, sum loss: 4998.378906, avg loss: 2.904346, ppl: 18.253294 +epoch: 0, batch: 20599, sum loss: 5595.766602, avg loss: 3.131375, ppl: 22.905449 +epoch: 0, batch: 20600, sum loss: 5219.052734, avg loss: 3.006367, ppl: 20.213823 +epoch: 0, batch: 20601, sum loss: 4695.565430, avg loss: 2.955044, ppl: 19.202572 +epoch: 0, batch: 20602, sum loss: 5694.422363, avg loss: 3.280197, ppl: 26.581013 +epoch: 0, batch: 20603, sum loss: 4933.124023, avg loss: 3.002510, ppl: 20.136011 +epoch: 0, batch: 20604, sum loss: 4520.857422, avg loss: 3.107118, ppl: 22.356527 +epoch: 0, batch: 20605, sum loss: 6191.012207, avg loss: 3.273936, ppl: 26.415092 +epoch: 0, batch: 20606, sum loss: 6219.880371, avg loss: 3.280528, ppl: 26.589804 +epoch: 0, batch: 20607, sum loss: 4749.338867, avg loss: 2.937130, ppl: 18.861635 +epoch: 0, batch: 20608, sum loss: 6428.829102, avg loss: 3.293458, ppl: 26.935841 +epoch: 0, batch: 20609, sum loss: 6458.787109, avg loss: 3.498801, ppl: 33.075779 +epoch: 0, batch: 20610, sum loss: 5215.393066, avg loss: 3.080563, ppl: 21.770657 +epoch: 0, batch: 20611, sum loss: 4345.556152, avg loss: 2.862685, ppl: 17.508478 +epoch: 0, batch: 20612, sum loss: 5639.678711, avg loss: 3.228208, ppl: 25.234386 +epoch: 0, batch: 20613, sum loss: 5239.528809, avg loss: 3.067640, ppl: 21.491116 +epoch: 0, batch: 20614, sum loss: 5415.791016, avg loss: 3.139589, ppl: 23.094376 +epoch: 0, batch: 20615, sum loss: 6134.896484, avg loss: 3.363430, ppl: 28.888107 +epoch: 0, batch: 20616, sum loss: 5844.229004, avg loss: 3.309303, ppl: 27.366039 +epoch: 0, batch: 20617, sum loss: 5491.576660, avg loss: 3.085155, ppl: 21.870867 +epoch: 0, batch: 20618, sum loss: 6634.715820, avg loss: 3.547976, ppl: 34.742943 +epoch: 0, batch: 20619, sum loss: 4976.509277, avg loss: 3.077619, ppl: 21.706654 +epoch: 0, batch: 20620, sum loss: 4932.842773, avg loss: 3.083027, ppl: 21.824358 +epoch: 0, batch: 20621, sum loss: 5447.515137, avg loss: 3.145216, ppl: 23.224703 +epoch: 0, batch: 20622, sum loss: 4545.031738, avg loss: 3.052405, ppl: 21.166195 +epoch: 0, batch: 20623, sum loss: 5267.974121, avg loss: 3.272033, ppl: 26.364897 +epoch: 0, batch: 20624, sum loss: 5192.260254, avg loss: 3.213032, ppl: 24.854336 +epoch: 0, batch: 20625, sum loss: 5900.194824, avg loss: 3.279708, ppl: 26.568010 +epoch: 0, batch: 20626, sum loss: 5313.092285, avg loss: 3.192964, ppl: 24.360533 +epoch: 0, batch: 20627, sum loss: 4470.451660, avg loss: 3.294364, ppl: 26.960260 +epoch: 0, batch: 20628, sum loss: 5336.986328, avg loss: 3.228667, ppl: 25.245975 +epoch: 0, batch: 20629, sum loss: 5050.991211, avg loss: 3.192788, ppl: 24.356241 +epoch: 0, batch: 20630, sum loss: 6253.179199, avg loss: 3.404017, ppl: 30.084715 +epoch: 0, batch: 20631, sum loss: 6497.836914, avg loss: 3.328810, ppl: 27.905115 +epoch: 0, batch: 20632, sum loss: 4810.378418, avg loss: 3.245870, ppl: 25.684036 +epoch: 0, batch: 20633, sum loss: 6061.146973, avg loss: 3.258681, ppl: 26.015207 +epoch: 0, batch: 20634, sum loss: 6028.887695, avg loss: 3.523605, ppl: 33.906437 +epoch: 0, batch: 20635, sum loss: 5386.297852, avg loss: 3.246714, ppl: 25.705723 +epoch: 0, batch: 20636, sum loss: 5993.672363, avg loss: 3.361566, ppl: 28.834318 +epoch: 0, batch: 20637, sum loss: 5820.432129, avg loss: 3.222831, ppl: 25.099070 +epoch: 0, batch: 20638, sum loss: 5185.130859, avg loss: 3.161665, ppl: 23.609879 +epoch: 0, batch: 20639, sum loss: 4708.300781, avg loss: 3.087410, ppl: 21.920240 +epoch: 0, batch: 20640, sum loss: 6781.604004, avg loss: 3.501086, ppl: 33.151443 +epoch: 0, batch: 20641, sum loss: 5681.124512, avg loss: 3.281990, ppl: 26.628706 +epoch: 0, batch: 20642, sum loss: 6289.259766, avg loss: 3.354272, ppl: 28.624754 +epoch: 0, batch: 20643, sum loss: 5137.412598, avg loss: 3.397760, ppl: 29.897053 +epoch: 0, batch: 20644, sum loss: 5976.249512, avg loss: 3.305448, ppl: 27.260746 +epoch: 0, batch: 20645, sum loss: 5540.324707, avg loss: 3.411530, ppl: 30.311586 +epoch: 0, batch: 20646, sum loss: 5904.423340, avg loss: 3.318956, ppl: 27.631498 +epoch: 0, batch: 20647, sum loss: 4603.826172, avg loss: 2.904622, ppl: 18.258343 +epoch: 0, batch: 20648, sum loss: 4719.510742, avg loss: 2.990818, ppl: 19.901957 +epoch: 0, batch: 20649, sum loss: 6205.553223, avg loss: 3.470667, ppl: 32.158195 +epoch: 0, batch: 20650, sum loss: 4850.062012, avg loss: 3.139199, ppl: 23.085363 +epoch: 0, batch: 20651, sum loss: 5655.154297, avg loss: 3.289793, ppl: 26.837315 +epoch: 0, batch: 20652, sum loss: 5227.613770, avg loss: 3.089606, ppl: 21.968428 +epoch: 0, batch: 20653, sum loss: 5607.634277, avg loss: 3.450852, ppl: 31.527239 +epoch: 0, batch: 20654, sum loss: 5643.154785, avg loss: 3.383186, ppl: 29.464506 +epoch: 0, batch: 20655, sum loss: 5714.974609, avg loss: 3.387655, ppl: 29.596481 +epoch: 0, batch: 20656, sum loss: 5111.870117, avg loss: 3.053686, ppl: 21.193323 +epoch: 0, batch: 20657, sum loss: 5230.814453, avg loss: 3.177895, ppl: 23.996178 +epoch: 0, batch: 20658, sum loss: 4871.817871, avg loss: 3.099121, ppl: 22.178444 +epoch: 0, batch: 20659, sum loss: 6511.658203, avg loss: 3.398569, ppl: 29.921255 +epoch: 0, batch: 20660, sum loss: 4449.687500, avg loss: 3.360791, ppl: 28.811977 +epoch: 0, batch: 20661, sum loss: 5832.765137, avg loss: 3.473952, ppl: 32.263992 +epoch: 0, batch: 20662, sum loss: 4827.809570, avg loss: 3.030640, ppl: 20.710485 +epoch: 0, batch: 20663, sum loss: 5857.532715, avg loss: 3.248770, ppl: 25.758644 +epoch: 0, batch: 20664, sum loss: 6199.273926, avg loss: 3.261060, ppl: 26.077152 +epoch: 0, batch: 20665, sum loss: 6882.774414, avg loss: 3.542344, ppl: 34.547806 +epoch: 0, batch: 20666, sum loss: 4959.123047, avg loss: 3.292911, ppl: 26.921106 +epoch: 0, batch: 20667, sum loss: 6378.470703, avg loss: 3.405484, ppl: 30.128866 +epoch: 0, batch: 20668, sum loss: 5764.786133, avg loss: 3.371220, ppl: 29.114021 +epoch: 0, batch: 20669, sum loss: 5716.774414, avg loss: 3.441767, ppl: 31.242105 +epoch: 0, batch: 20670, sum loss: 4878.321289, avg loss: 3.207312, ppl: 24.712566 +epoch: 0, batch: 20671, sum loss: 4807.519531, avg loss: 2.922504, ppl: 18.587776 +epoch: 0, batch: 20672, sum loss: 5213.794922, avg loss: 3.144629, ppl: 23.211063 +epoch: 0, batch: 20673, sum loss: 6474.734863, avg loss: 3.674651, ppl: 39.434898 +epoch: 0, batch: 20674, sum loss: 3759.981201, avg loss: 2.810150, ppl: 16.612417 +epoch: 0, batch: 20675, sum loss: 5726.349609, avg loss: 3.177775, ppl: 23.993301 +epoch: 0, batch: 20676, sum loss: 4925.390137, avg loss: 3.078369, ppl: 21.722937 +epoch: 0, batch: 20677, sum loss: 4174.566895, avg loss: 2.917238, ppl: 18.490139 +epoch: 0, batch: 20678, sum loss: 4586.882812, avg loss: 3.105540, ppl: 22.321276 +epoch: 0, batch: 20679, sum loss: 4995.487305, avg loss: 3.027568, ppl: 20.646959 +epoch: 0, batch: 20680, sum loss: 5965.553711, avg loss: 3.428479, ppl: 30.829721 +epoch: 0, batch: 20681, sum loss: 6216.026855, avg loss: 3.451431, ppl: 31.545496 +epoch: 0, batch: 20682, sum loss: 5451.241211, avg loss: 3.285860, ppl: 26.731953 +epoch: 0, batch: 20683, sum loss: 5175.006836, avg loss: 3.351689, ppl: 28.550905 +epoch: 0, batch: 20684, sum loss: 5027.336914, avg loss: 3.084256, ppl: 21.851196 +epoch: 0, batch: 20685, sum loss: 4935.754883, avg loss: 3.020658, ppl: 20.504774 +epoch: 0, batch: 20686, sum loss: 5623.460938, avg loss: 3.259978, ppl: 26.048954 +epoch: 0, batch: 20687, sum loss: 5533.132324, avg loss: 3.237643, ppl: 25.473616 +epoch: 0, batch: 20688, sum loss: 5584.125000, avg loss: 3.278993, ppl: 26.549021 +epoch: 0, batch: 20689, sum loss: 6262.798828, avg loss: 3.441098, ppl: 31.221235 +epoch: 0, batch: 20690, sum loss: 5816.724609, avg loss: 3.280725, ppl: 26.595041 +epoch: 0, batch: 20691, sum loss: 5299.460938, avg loss: 3.277341, ppl: 26.505211 +epoch: 0, batch: 20692, sum loss: 6172.375977, avg loss: 3.260632, ppl: 26.065994 +epoch: 0, batch: 20693, sum loss: 5417.675293, avg loss: 3.381820, ppl: 29.424267 +epoch: 0, batch: 20694, sum loss: 6182.906738, avg loss: 3.196953, ppl: 24.457890 +epoch: 0, batch: 20695, sum loss: 4624.061523, avg loss: 3.124366, ppl: 22.745470 +epoch: 0, batch: 20696, sum loss: 4716.240234, avg loss: 3.178059, ppl: 24.000132 +epoch: 0, batch: 20697, sum loss: 5320.003906, avg loss: 3.255816, ppl: 25.940786 +epoch: 0, batch: 20698, sum loss: 5392.805176, avg loss: 3.157380, ppl: 23.508919 +epoch: 0, batch: 20699, sum loss: 6464.038086, avg loss: 3.429198, ppl: 30.851891 +epoch: 0, batch: 20700, sum loss: 4975.034180, avg loss: 3.258045, ppl: 25.998653 +epoch: 0, batch: 20701, sum loss: 4807.444824, avg loss: 3.277058, ppl: 26.497711 +epoch: 0, batch: 20702, sum loss: 5463.471680, avg loss: 2.935772, ppl: 18.836042 +epoch: 0, batch: 20703, sum loss: 4779.823242, avg loss: 3.124068, ppl: 22.738682 +epoch: 0, batch: 20704, sum loss: 5374.656250, avg loss: 3.301386, ppl: 27.150240 +epoch: 0, batch: 20705, sum loss: 4854.366211, avg loss: 3.199978, ppl: 24.531982 +epoch: 0, batch: 20706, sum loss: 5908.575684, avg loss: 3.498269, ppl: 33.058167 +epoch: 0, batch: 20707, sum loss: 6924.722168, avg loss: 3.443423, ppl: 31.293880 +epoch: 0, batch: 20708, sum loss: 6010.624512, avg loss: 3.517042, ppl: 33.684639 +epoch: 0, batch: 20709, sum loss: 5003.403320, avg loss: 2.846077, ppl: 17.220095 +epoch: 0, batch: 20710, sum loss: 6413.089355, avg loss: 3.446045, ppl: 31.376051 +epoch: 0, batch: 20711, sum loss: 5916.511230, avg loss: 3.290607, ppl: 26.859156 +epoch: 0, batch: 20712, sum loss: 6480.891113, avg loss: 3.294810, ppl: 26.972284 +epoch: 0, batch: 20713, sum loss: 5792.593750, avg loss: 3.122692, ppl: 22.707428 +epoch: 0, batch: 20714, sum loss: 6322.491699, avg loss: 3.343465, ppl: 28.317064 +epoch: 0, batch: 20715, sum loss: 4634.611816, avg loss: 2.918521, ppl: 18.513893 +epoch: 0, batch: 20716, sum loss: 4440.219727, avg loss: 2.851779, ppl: 17.318563 +epoch: 0, batch: 20717, sum loss: 6748.906738, avg loss: 3.685913, ppl: 39.881531 +epoch: 0, batch: 20718, sum loss: 4642.313965, avg loss: 2.916026, ppl: 18.467756 +epoch: 0, batch: 20719, sum loss: 4577.323242, avg loss: 2.880631, ppl: 17.825525 +epoch: 0, batch: 20720, sum loss: 5645.632324, avg loss: 3.311221, ppl: 27.418592 +epoch: 0, batch: 20721, sum loss: 7023.568359, avg loss: 3.577977, ppl: 35.801041 +epoch: 0, batch: 20722, sum loss: 5496.062012, avg loss: 3.238693, ppl: 25.500370 +epoch: 0, batch: 20723, sum loss: 4685.500000, avg loss: 3.267434, ppl: 26.243902 +epoch: 0, batch: 20724, sum loss: 5245.523926, avg loss: 3.399562, ppl: 29.950983 +epoch: 0, batch: 20725, sum loss: 4971.438477, avg loss: 3.266386, ppl: 26.216410 +epoch: 0, batch: 20726, sum loss: 5020.058105, avg loss: 3.149346, ppl: 23.320816 +epoch: 0, batch: 20727, sum loss: 5393.027344, avg loss: 3.254694, ppl: 25.911671 +epoch: 0, batch: 20728, sum loss: 5606.324707, avg loss: 3.267089, ppl: 26.234861 +epoch: 0, batch: 20729, sum loss: 5734.439453, avg loss: 3.357400, ppl: 28.714434 +epoch: 0, batch: 20730, sum loss: 4134.534180, avg loss: 2.970211, ppl: 19.496038 +epoch: 0, batch: 20731, sum loss: 6425.218262, avg loss: 3.406797, ppl: 30.168451 +epoch: 0, batch: 20732, sum loss: 5749.056152, avg loss: 3.144998, ppl: 23.219625 +epoch: 0, batch: 20733, sum loss: 6181.346680, avg loss: 3.612710, ppl: 37.066376 +epoch: 0, batch: 20734, sum loss: 5173.802734, avg loss: 3.128055, ppl: 22.829529 +epoch: 0, batch: 20735, sum loss: 6308.741211, avg loss: 3.304736, ppl: 27.241352 +epoch: 0, batch: 20736, sum loss: 5870.482910, avg loss: 3.078387, ppl: 21.723324 +epoch: 0, batch: 20737, sum loss: 4399.064941, avg loss: 2.886526, ppl: 17.930902 +epoch: 0, batch: 20738, sum loss: 4660.333984, avg loss: 3.136160, ppl: 23.015322 +epoch: 0, batch: 20739, sum loss: 4464.779297, avg loss: 2.994487, ppl: 19.975101 +epoch: 0, batch: 20740, sum loss: 5577.706543, avg loss: 3.244739, ppl: 25.655022 +epoch: 0, batch: 20741, sum loss: 4988.062012, avg loss: 3.100101, ppl: 22.200188 +epoch: 0, batch: 20742, sum loss: 5739.906250, avg loss: 3.312121, ppl: 27.443275 +epoch: 0, batch: 20743, sum loss: 4546.650391, avg loss: 2.985325, ppl: 19.792940 +epoch: 0, batch: 20744, sum loss: 4818.005859, avg loss: 3.290987, ppl: 26.869385 +epoch: 0, batch: 20745, sum loss: 5580.302246, avg loss: 3.278673, ppl: 26.540541 +epoch: 0, batch: 20746, sum loss: 5623.779297, avg loss: 3.312002, ppl: 27.440010 +epoch: 0, batch: 20747, sum loss: 4846.108398, avg loss: 3.069100, ppl: 21.522516 +epoch: 0, batch: 20748, sum loss: 4728.379883, avg loss: 3.013626, ppl: 20.361103 +epoch: 0, batch: 20749, sum loss: 5614.696777, avg loss: 3.228693, ppl: 25.246637 +epoch: 0, batch: 20750, sum loss: 4711.137207, avg loss: 3.029670, ppl: 20.690414 +epoch: 0, batch: 20751, sum loss: 5624.924316, avg loss: 3.285587, ppl: 26.724657 +epoch: 0, batch: 20752, sum loss: 6393.075684, avg loss: 3.329727, ppl: 27.930721 +epoch: 0, batch: 20753, sum loss: 4958.062988, avg loss: 3.141992, ppl: 23.149927 +epoch: 0, batch: 20754, sum loss: 5520.620117, avg loss: 3.333708, ppl: 28.042124 +epoch: 0, batch: 20755, sum loss: 5905.476074, avg loss: 3.421481, ppl: 30.614719 +epoch: 0, batch: 20756, sum loss: 5477.914062, avg loss: 3.559398, ppl: 35.142048 +epoch: 0, batch: 20757, sum loss: 4624.470703, avg loss: 3.064593, ppl: 21.425741 +epoch: 0, batch: 20758, sum loss: 5187.945312, avg loss: 3.134710, ppl: 22.981977 +epoch: 0, batch: 20759, sum loss: 5652.491211, avg loss: 3.301689, ppl: 27.158463 +epoch: 0, batch: 20760, sum loss: 6753.224609, avg loss: 3.414168, ppl: 30.391649 +epoch: 0, batch: 20761, sum loss: 6782.489258, avg loss: 3.360996, ppl: 28.817873 +epoch: 0, batch: 20762, sum loss: 5917.997559, avg loss: 3.278669, ppl: 26.540421 +epoch: 0, batch: 20763, sum loss: 5559.812988, avg loss: 3.349285, ppl: 28.482359 +epoch: 0, batch: 20764, sum loss: 4982.186035, avg loss: 3.139374, ppl: 23.089403 +epoch: 0, batch: 20765, sum loss: 5375.863281, avg loss: 3.395997, ppl: 29.844387 +epoch: 0, batch: 20766, sum loss: 5240.154785, avg loss: 3.281249, ppl: 26.608994 +epoch: 0, batch: 20767, sum loss: 6064.985352, avg loss: 3.532315, ppl: 34.203064 +epoch: 0, batch: 20768, sum loss: 5196.235352, avg loss: 3.324527, ppl: 27.785847 +epoch: 0, batch: 20769, sum loss: 5159.334473, avg loss: 3.091273, ppl: 22.005074 +epoch: 0, batch: 20770, sum loss: 6005.226562, avg loss: 3.157322, ppl: 23.507563 +epoch: 0, batch: 20771, sum loss: 4840.927734, avg loss: 3.095222, ppl: 22.092148 +epoch: 0, batch: 20772, sum loss: 5045.482910, avg loss: 3.063438, ppl: 21.401011 +epoch: 0, batch: 20773, sum loss: 6204.423828, avg loss: 3.224752, ppl: 25.147348 +epoch: 0, batch: 20774, sum loss: 5921.680176, avg loss: 3.462971, ppl: 31.911636 +epoch: 0, batch: 20775, sum loss: 4789.805176, avg loss: 3.159502, ppl: 23.558867 +epoch: 0, batch: 20776, sum loss: 4614.555176, avg loss: 2.859080, ppl: 17.445471 +epoch: 0, batch: 20777, sum loss: 5948.628906, avg loss: 3.609605, ppl: 36.951447 +epoch: 0, batch: 20778, sum loss: 5342.752441, avg loss: 3.021919, ppl: 20.530653 +epoch: 0, batch: 20779, sum loss: 3951.864258, avg loss: 2.998379, ppl: 20.053015 +epoch: 0, batch: 20780, sum loss: 4987.662109, avg loss: 2.963554, ppl: 19.366686 +epoch: 0, batch: 20781, sum loss: 6501.860352, avg loss: 3.476931, ppl: 32.360245 +epoch: 0, batch: 20782, sum loss: 4095.292969, avg loss: 2.950499, ppl: 19.115492 +epoch: 0, batch: 20783, sum loss: 5752.079590, avg loss: 3.177945, ppl: 23.997379 +epoch: 0, batch: 20784, sum loss: 5201.521973, avg loss: 3.292103, ppl: 26.899363 +epoch: 0, batch: 20785, sum loss: 4859.364746, avg loss: 3.037103, ppl: 20.844767 +epoch: 0, batch: 20786, sum loss: 5390.696777, avg loss: 3.231833, ppl: 25.326031 +epoch: 0, batch: 20787, sum loss: 4345.053711, avg loss: 3.470490, ppl: 32.152500 +epoch: 0, batch: 20788, sum loss: 5777.009766, avg loss: 3.254653, ppl: 25.910627 +epoch: 0, batch: 20789, sum loss: 6592.771484, avg loss: 3.208161, ppl: 24.733568 +epoch: 0, batch: 20790, sum loss: 6524.007812, avg loss: 3.437307, ppl: 31.103081 +epoch: 0, batch: 20791, sum loss: 4621.476074, avg loss: 3.038446, ppl: 20.872780 +epoch: 0, batch: 20792, sum loss: 6008.517578, avg loss: 3.240840, ppl: 25.555178 +epoch: 0, batch: 20793, sum loss: 6241.229492, avg loss: 3.616008, ppl: 37.188805 +epoch: 0, batch: 20794, sum loss: 5580.908203, avg loss: 3.275181, ppl: 26.448008 +epoch: 0, batch: 20795, sum loss: 5057.364258, avg loss: 2.999623, ppl: 20.077967 +epoch: 0, batch: 20796, sum loss: 5442.790527, avg loss: 3.190381, ppl: 24.297689 +epoch: 0, batch: 20797, sum loss: 4690.363281, avg loss: 3.031909, ppl: 20.736786 +epoch: 0, batch: 20798, sum loss: 6671.855957, avg loss: 3.545088, ppl: 34.642735 +epoch: 0, batch: 20799, sum loss: 5608.925293, avg loss: 3.210604, ppl: 24.794056 +epoch: 0, batch: 20800, sum loss: 5956.396484, avg loss: 3.365196, ppl: 28.939161 +epoch: 0, batch: 20801, sum loss: 6283.158203, avg loss: 3.422199, ppl: 30.636726 +epoch: 0, batch: 20802, sum loss: 5351.884766, avg loss: 3.265335, ppl: 26.188890 +epoch: 0, batch: 20803, sum loss: 4587.754883, avg loss: 2.948429, ppl: 19.075954 +epoch: 0, batch: 20804, sum loss: 5411.318848, avg loss: 3.181257, ppl: 24.077005 +epoch: 0, batch: 20805, sum loss: 5575.531250, avg loss: 3.038437, ppl: 20.872591 +epoch: 0, batch: 20806, sum loss: 5002.610840, avg loss: 3.377860, ppl: 29.307993 +epoch: 0, batch: 20807, sum loss: 5210.699219, avg loss: 3.323150, ppl: 27.747616 +epoch: 0, batch: 20808, sum loss: 5724.677734, avg loss: 3.187460, ppl: 24.226812 +epoch: 0, batch: 20809, sum loss: 5101.981934, avg loss: 3.161079, ppl: 23.596052 +epoch: 0, batch: 20810, sum loss: 3937.195801, avg loss: 2.922937, ppl: 18.595821 +epoch: 0, batch: 20811, sum loss: 5234.982422, avg loss: 3.233467, ppl: 25.367445 +epoch: 0, batch: 20812, sum loss: 5460.051758, avg loss: 3.436156, ppl: 31.067307 +epoch: 0, batch: 20813, sum loss: 5927.938965, avg loss: 3.111779, ppl: 22.460966 +epoch: 0, batch: 20814, sum loss: 5173.635742, avg loss: 3.146980, ppl: 23.265703 +epoch: 0, batch: 20815, sum loss: 6456.686523, avg loss: 3.499559, ppl: 33.100849 +epoch: 0, batch: 20816, sum loss: 5530.128906, avg loss: 3.386484, ppl: 29.561834 +epoch: 0, batch: 20817, sum loss: 5209.234375, avg loss: 2.943070, ppl: 18.974014 +epoch: 0, batch: 20818, sum loss: 5280.169922, avg loss: 3.506089, ppl: 33.317707 +epoch: 0, batch: 20819, sum loss: 4518.132812, avg loss: 3.084050, ppl: 21.846697 +epoch: 0, batch: 20820, sum loss: 5358.092285, avg loss: 3.259180, ppl: 26.028193 +epoch: 0, batch: 20821, sum loss: 5742.525879, avg loss: 3.217102, ppl: 24.955683 +epoch: 0, batch: 20822, sum loss: 5227.873535, avg loss: 3.336231, ppl: 28.112968 +epoch: 0, batch: 20823, sum loss: 4752.949707, avg loss: 3.131060, ppl: 22.898247 +epoch: 0, batch: 20824, sum loss: 5427.356445, avg loss: 3.228647, ppl: 25.245489 +epoch: 0, batch: 20825, sum loss: 5539.374512, avg loss: 3.030293, ppl: 20.703287 +epoch: 0, batch: 20826, sum loss: 5391.728516, avg loss: 2.959236, ppl: 19.283241 +epoch: 0, batch: 20827, sum loss: 4405.359375, avg loss: 3.082827, ppl: 21.819998 +epoch: 0, batch: 20828, sum loss: 4466.646484, avg loss: 3.190462, ppl: 24.299643 +epoch: 0, batch: 20829, sum loss: 5666.400879, avg loss: 3.185161, ppl: 24.171171 +epoch: 0, batch: 20830, sum loss: 4678.352539, avg loss: 2.837085, ppl: 17.065937 +epoch: 0, batch: 20831, sum loss: 5476.530273, avg loss: 3.171124, ppl: 23.834248 +epoch: 0, batch: 20832, sum loss: 4668.750000, avg loss: 3.230969, ppl: 25.304163 +epoch: 0, batch: 20833, sum loss: 4781.905762, avg loss: 3.065324, ppl: 21.441414 +epoch: 0, batch: 20834, sum loss: 5309.008301, avg loss: 3.194349, ppl: 24.394295 +epoch: 0, batch: 20835, sum loss: 5907.731445, avg loss: 3.219472, ppl: 25.014919 +epoch: 0, batch: 20836, sum loss: 5389.227539, avg loss: 3.109768, ppl: 22.415836 +epoch: 0, batch: 20837, sum loss: 5123.064941, avg loss: 3.154597, ppl: 23.443577 +epoch: 0, batch: 20838, sum loss: 6244.896484, avg loss: 3.384768, ppl: 29.511139 +epoch: 0, batch: 20839, sum loss: 5186.609863, avg loss: 3.100185, ppl: 22.202066 +epoch: 0, batch: 20840, sum loss: 6242.453613, avg loss: 3.505027, ppl: 33.282352 +epoch: 0, batch: 20841, sum loss: 5046.319336, avg loss: 3.274704, ppl: 26.435406 +epoch: 0, batch: 20842, sum loss: 6211.383789, avg loss: 3.284708, ppl: 26.701200 +epoch: 0, batch: 20843, sum loss: 5342.753906, avg loss: 3.297996, ppl: 27.058367 +epoch: 0, batch: 20844, sum loss: 5869.033691, avg loss: 3.306498, ppl: 27.289379 +epoch: 0, batch: 20845, sum loss: 5098.994629, avg loss: 3.137843, ppl: 23.054083 +epoch: 0, batch: 20846, sum loss: 5575.896484, avg loss: 3.463290, ppl: 31.921818 +epoch: 0, batch: 20847, sum loss: 5387.327637, avg loss: 3.348246, ppl: 28.452780 +epoch: 0, batch: 20848, sum loss: 5516.419922, avg loss: 3.163085, ppl: 23.643417 +epoch: 0, batch: 20849, sum loss: 5853.693848, avg loss: 3.455545, ppl: 31.675552 +epoch: 0, batch: 20850, sum loss: 5462.695801, avg loss: 3.082786, ppl: 21.819098 +epoch: 0, batch: 20851, sum loss: 7430.923828, avg loss: 3.616021, ppl: 37.189304 +epoch: 0, batch: 20852, sum loss: 4956.934082, avg loss: 3.271904, ppl: 26.361477 +epoch: 0, batch: 20853, sum loss: 6150.480469, avg loss: 3.512553, ppl: 33.533779 +epoch: 0, batch: 20854, sum loss: 5823.894531, avg loss: 3.185938, ppl: 24.189964 +epoch: 0, batch: 20855, sum loss: 5305.125000, avg loss: 3.228926, ppl: 25.252514 +epoch: 0, batch: 20856, sum loss: 5303.926758, avg loss: 3.158980, ppl: 23.546558 +epoch: 0, batch: 20857, sum loss: 6013.357910, avg loss: 3.294991, ppl: 26.977160 +epoch: 0, batch: 20858, sum loss: 5165.428711, avg loss: 3.216331, ppl: 24.936472 +epoch: 0, batch: 20859, sum loss: 5274.088867, avg loss: 3.398253, ppl: 29.911812 +epoch: 0, batch: 20860, sum loss: 5803.598145, avg loss: 3.256789, ppl: 25.966032 +epoch: 0, batch: 20861, sum loss: 5302.850098, avg loss: 3.410193, ppl: 30.271091 +epoch: 0, batch: 20862, sum loss: 5948.064453, avg loss: 3.317381, ppl: 27.588013 +epoch: 0, batch: 20863, sum loss: 5765.314453, avg loss: 3.288827, ppl: 26.811407 +epoch: 0, batch: 20864, sum loss: 6034.823242, avg loss: 3.283364, ppl: 26.665325 +epoch: 0, batch: 20865, sum loss: 5680.409180, avg loss: 3.274011, ppl: 26.417089 +epoch: 0, batch: 20866, sum loss: 5372.163086, avg loss: 3.330541, ppl: 27.953470 +epoch: 0, batch: 20867, sum loss: 5088.549316, avg loss: 3.180343, ppl: 24.055006 +epoch: 0, batch: 20868, sum loss: 6126.430664, avg loss: 3.430253, ppl: 30.884441 +epoch: 0, batch: 20869, sum loss: 4835.845215, avg loss: 3.168968, ppl: 23.782932 +epoch: 0, batch: 20870, sum loss: 5241.002930, avg loss: 3.207468, ppl: 24.716433 +epoch: 0, batch: 20871, sum loss: 6441.632812, avg loss: 3.437371, ppl: 31.105068 +epoch: 0, batch: 20872, sum loss: 5488.603027, avg loss: 2.974853, ppl: 19.586735 +epoch: 0, batch: 20873, sum loss: 4995.742188, avg loss: 3.227224, ppl: 25.209568 +epoch: 0, batch: 20874, sum loss: 5207.622070, avg loss: 3.122076, ppl: 22.693432 +epoch: 0, batch: 20875, sum loss: 5172.858887, avg loss: 3.154182, ppl: 23.433865 +epoch: 0, batch: 20876, sum loss: 4408.242676, avg loss: 2.921301, ppl: 18.565424 +epoch: 0, batch: 20877, sum loss: 4790.046875, avg loss: 3.204045, ppl: 24.631954 +epoch: 0, batch: 20878, sum loss: 5582.518555, avg loss: 3.168285, ppl: 23.766699 +epoch: 0, batch: 20879, sum loss: 5988.053711, avg loss: 3.392665, ppl: 29.745115 +epoch: 0, batch: 20880, sum loss: 5678.275391, avg loss: 3.208065, ppl: 24.731192 +epoch: 0, batch: 20881, sum loss: 5348.708984, avg loss: 3.516574, ppl: 33.668884 +epoch: 0, batch: 20882, sum loss: 6112.614258, avg loss: 3.261801, ppl: 26.096483 +epoch: 0, batch: 20883, sum loss: 6627.777344, avg loss: 3.651668, ppl: 38.538898 +epoch: 0, batch: 20884, sum loss: 5737.551270, avg loss: 3.369085, ppl: 29.051920 +epoch: 0, batch: 20885, sum loss: 5835.489258, avg loss: 3.378975, ppl: 29.340672 +epoch: 0, batch: 20886, sum loss: 5412.196289, avg loss: 3.242778, ppl: 25.604755 +epoch: 0, batch: 20887, sum loss: 5042.769531, avg loss: 3.332961, ppl: 28.021185 +epoch: 0, batch: 20888, sum loss: 5337.314453, avg loss: 3.190266, ppl: 24.294880 +epoch: 0, batch: 20889, sum loss: 4791.474609, avg loss: 3.318196, ppl: 27.610485 +epoch: 0, batch: 20890, sum loss: 4327.667969, avg loss: 3.168132, ppl: 23.763050 +epoch: 0, batch: 20891, sum loss: 5021.979492, avg loss: 3.080969, ppl: 21.779499 +epoch: 0, batch: 20892, sum loss: 5389.354492, avg loss: 3.268256, ppl: 26.265497 +epoch: 0, batch: 20893, sum loss: 5146.419434, avg loss: 3.145733, ppl: 23.236710 +epoch: 0, batch: 20894, sum loss: 6140.116699, avg loss: 3.320777, ppl: 27.681849 +epoch: 0, batch: 20895, sum loss: 5204.335938, avg loss: 3.423905, ppl: 30.689034 +epoch: 0, batch: 20896, sum loss: 5812.982910, avg loss: 3.336959, ppl: 28.133446 +epoch: 0, batch: 20897, sum loss: 5741.021973, avg loss: 3.318510, ppl: 27.619162 +epoch: 0, batch: 20898, sum loss: 6071.741211, avg loss: 3.360122, ppl: 28.792709 +epoch: 0, batch: 20899, sum loss: 5380.312500, avg loss: 3.012493, ppl: 20.338041 +epoch: 0, batch: 20900, sum loss: 4820.426758, avg loss: 3.144440, ppl: 23.206680 +epoch: 0, batch: 20901, sum loss: 5831.137695, avg loss: 3.288854, ppl: 26.812117 +epoch: 0, batch: 20902, sum loss: 4636.183594, avg loss: 2.938012, ppl: 18.878286 +epoch: 0, batch: 20903, sum loss: 5485.634766, avg loss: 3.132858, ppl: 22.939453 +epoch: 0, batch: 20904, sum loss: 6089.860840, avg loss: 3.225562, ppl: 25.167711 +epoch: 0, batch: 20905, sum loss: 5392.644043, avg loss: 3.240772, ppl: 25.553436 +epoch: 0, batch: 20906, sum loss: 5838.445312, avg loss: 3.234596, ppl: 25.396105 +epoch: 0, batch: 20907, sum loss: 4917.158691, avg loss: 3.054136, ppl: 21.202854 +epoch: 0, batch: 20908, sum loss: 6218.468750, avg loss: 3.407380, ppl: 30.186056 +epoch: 0, batch: 20909, sum loss: 6331.078613, avg loss: 3.207233, ppl: 24.710623 +epoch: 0, batch: 20910, sum loss: 5728.601562, avg loss: 3.365806, ppl: 28.956823 +epoch: 0, batch: 20911, sum loss: 5314.821289, avg loss: 3.294991, ppl: 26.977184 +epoch: 0, batch: 20912, sum loss: 5528.658691, avg loss: 3.134160, ppl: 22.969339 +epoch: 0, batch: 20913, sum loss: 5076.034180, avg loss: 3.112222, ppl: 22.470924 +epoch: 0, batch: 20914, sum loss: 7324.809570, avg loss: 3.588834, ppl: 36.191837 +epoch: 0, batch: 20915, sum loss: 5375.637695, avg loss: 3.265880, ppl: 26.203150 +epoch: 0, batch: 20916, sum loss: 5353.959473, avg loss: 3.050689, ppl: 21.129898 +epoch: 0, batch: 20917, sum loss: 4911.680176, avg loss: 3.092998, ppl: 22.043060 +epoch: 0, batch: 20918, sum loss: 5004.691406, avg loss: 3.271040, ppl: 26.338722 +epoch: 0, batch: 20919, sum loss: 6452.379883, avg loss: 3.302139, ppl: 27.170696 +epoch: 0, batch: 20920, sum loss: 5360.146484, avg loss: 3.211592, ppl: 24.818560 +epoch: 0, batch: 20921, sum loss: 5189.586914, avg loss: 3.027764, ppl: 20.650997 +epoch: 0, batch: 20922, sum loss: 4774.051758, avg loss: 3.044676, ppl: 21.003222 +epoch: 0, batch: 20923, sum loss: 5209.767578, avg loss: 3.099207, ppl: 22.180365 +epoch: 0, batch: 20924, sum loss: 4883.496094, avg loss: 3.150643, ppl: 23.351072 +epoch: 0, batch: 20925, sum loss: 4116.726562, avg loss: 2.909347, ppl: 18.344820 +epoch: 0, batch: 20926, sum loss: 5197.184570, avg loss: 3.112087, ppl: 22.467876 +epoch: 0, batch: 20927, sum loss: 5698.829102, avg loss: 3.346347, ppl: 28.398806 +epoch: 0, batch: 20928, sum loss: 5440.422852, avg loss: 3.211584, ppl: 24.818357 +epoch: 0, batch: 20929, sum loss: 5066.920898, avg loss: 3.386979, ppl: 29.576477 +epoch: 0, batch: 20930, sum loss: 5714.945801, avg loss: 3.351875, ppl: 28.556215 +epoch: 0, batch: 20931, sum loss: 5754.333008, avg loss: 3.355296, ppl: 28.654089 +epoch: 0, batch: 20932, sum loss: 5354.140137, avg loss: 3.213769, ppl: 24.872665 +epoch: 0, batch: 20933, sum loss: 5711.795410, avg loss: 3.381762, ppl: 29.422562 +epoch: 0, batch: 20934, sum loss: 5219.039062, avg loss: 3.159225, ppl: 23.552330 +epoch: 0, batch: 20935, sum loss: 4594.380859, avg loss: 2.977564, ppl: 19.639914 +epoch: 0, batch: 20936, sum loss: 6246.500977, avg loss: 3.294568, ppl: 26.965771 +epoch: 0, batch: 20937, sum loss: 5464.520996, avg loss: 3.210647, ppl: 24.795120 +epoch: 0, batch: 20938, sum loss: 4887.437500, avg loss: 3.215420, ppl: 24.913742 +epoch: 0, batch: 20939, sum loss: 5845.386230, avg loss: 3.206465, ppl: 24.691658 +epoch: 0, batch: 20940, sum loss: 6333.862305, avg loss: 3.241485, ppl: 25.571676 +epoch: 0, batch: 20941, sum loss: 4340.444824, avg loss: 3.205646, ppl: 24.671434 +epoch: 0, batch: 20942, sum loss: 6744.118164, avg loss: 3.627820, ppl: 37.630703 +epoch: 0, batch: 20943, sum loss: 4743.014648, avg loss: 3.089912, ppl: 21.975138 +epoch: 0, batch: 20944, sum loss: 5772.604980, avg loss: 3.281754, ppl: 26.622429 +epoch: 0, batch: 20945, sum loss: 5716.085938, avg loss: 3.249623, ppl: 25.780607 +epoch: 0, batch: 20946, sum loss: 6826.265625, avg loss: 3.442393, ppl: 31.261681 +epoch: 0, batch: 20947, sum loss: 5888.614746, avg loss: 3.336326, ppl: 28.115629 +epoch: 0, batch: 20948, sum loss: 4575.372070, avg loss: 3.112498, ppl: 22.477123 +epoch: 0, batch: 20949, sum loss: 6281.017090, avg loss: 3.589153, ppl: 36.203384 +epoch: 0, batch: 20950, sum loss: 6524.646484, avg loss: 3.296941, ppl: 27.029829 +epoch: 0, batch: 20951, sum loss: 6259.078125, avg loss: 3.354275, ppl: 28.624849 +epoch: 0, batch: 20952, sum loss: 5142.149902, avg loss: 3.035507, ppl: 20.811537 +epoch: 0, batch: 20953, sum loss: 5696.410645, avg loss: 3.191267, ppl: 24.319210 +epoch: 0, batch: 20954, sum loss: 4904.504883, avg loss: 3.012595, ppl: 20.340118 +epoch: 0, batch: 20955, sum loss: 4274.792969, avg loss: 3.124849, ppl: 22.756449 +epoch: 0, batch: 20956, sum loss: 4841.220703, avg loss: 3.153890, ppl: 23.427011 +epoch: 0, batch: 20957, sum loss: 6829.223633, avg loss: 3.336211, ppl: 28.112406 +epoch: 0, batch: 20958, sum loss: 5556.591309, avg loss: 3.278225, ppl: 26.528641 +epoch: 0, batch: 20959, sum loss: 4845.592773, avg loss: 3.036086, ppl: 20.823572 +epoch: 0, batch: 20960, sum loss: 5507.409668, avg loss: 3.194553, ppl: 24.399275 +epoch: 0, batch: 20961, sum loss: 5104.835449, avg loss: 2.950772, ppl: 19.120705 +epoch: 0, batch: 20962, sum loss: 5622.981445, avg loss: 3.214969, ppl: 24.902531 +epoch: 0, batch: 20963, sum loss: 5551.304688, avg loss: 3.082346, ppl: 21.809502 +epoch: 0, batch: 20964, sum loss: 6000.293457, avg loss: 3.426781, ppl: 30.777416 +epoch: 0, batch: 20965, sum loss: 5929.385254, avg loss: 3.351829, ppl: 28.554914 +epoch: 0, batch: 20966, sum loss: 5358.759766, avg loss: 3.311965, ppl: 27.438997 +epoch: 0, batch: 20967, sum loss: 4933.342773, avg loss: 3.085268, ppl: 21.873318 +epoch: 0, batch: 20968, sum loss: 5703.692871, avg loss: 3.074767, ppl: 21.644840 +epoch: 0, batch: 20969, sum loss: 5094.356445, avg loss: 3.034161, ppl: 20.783535 +epoch: 0, batch: 20970, sum loss: 5180.958008, avg loss: 3.355543, ppl: 28.661160 +epoch: 0, batch: 20971, sum loss: 6166.922363, avg loss: 3.441363, ppl: 31.229498 +epoch: 0, batch: 20972, sum loss: 5238.488281, avg loss: 2.976414, ppl: 19.617336 +epoch: 0, batch: 20973, sum loss: 6180.121582, avg loss: 3.297824, ppl: 27.053698 +epoch: 0, batch: 20974, sum loss: 5444.607422, avg loss: 3.334113, ppl: 28.053499 +epoch: 0, batch: 20975, sum loss: 5126.893066, avg loss: 3.149197, ppl: 23.317335 +epoch: 0, batch: 20976, sum loss: 5800.083008, avg loss: 3.304891, ppl: 27.245562 +epoch: 0, batch: 20977, sum loss: 5778.076172, avg loss: 3.351552, ppl: 28.547018 +epoch: 0, batch: 20978, sum loss: 5360.372070, avg loss: 3.256605, ppl: 25.961260 +epoch: 0, batch: 20979, sum loss: 5980.790527, avg loss: 3.344961, ppl: 28.359474 +epoch: 0, batch: 20980, sum loss: 5737.631348, avg loss: 3.349464, ppl: 28.487452 +epoch: 0, batch: 20981, sum loss: 4138.596680, avg loss: 2.896149, ppl: 18.104290 +epoch: 0, batch: 20982, sum loss: 5453.272461, avg loss: 3.329226, ppl: 27.916727 +epoch: 0, batch: 20983, sum loss: 5452.929199, avg loss: 2.968388, ppl: 19.460529 +epoch: 0, batch: 20984, sum loss: 5849.768066, avg loss: 3.282698, ppl: 26.647581 +epoch: 0, batch: 20985, sum loss: 6163.505371, avg loss: 3.211832, ppl: 24.824518 +epoch: 0, batch: 20986, sum loss: 5479.425293, avg loss: 3.069706, ppl: 21.535574 +epoch: 0, batch: 20987, sum loss: 5460.746094, avg loss: 3.265997, ppl: 26.206217 +epoch: 0, batch: 20988, sum loss: 5961.433594, avg loss: 3.268330, ppl: 26.267433 +epoch: 0, batch: 20989, sum loss: 4614.798828, avg loss: 2.898743, ppl: 18.151314 +epoch: 0, batch: 20990, sum loss: 5193.768066, avg loss: 3.390188, ppl: 29.671530 +epoch: 0, batch: 20991, sum loss: 5257.607910, avg loss: 3.454407, ppl: 31.639536 +epoch: 0, batch: 20992, sum loss: 4933.260254, avg loss: 3.022831, ppl: 20.549389 +epoch: 0, batch: 20993, sum loss: 5336.882812, avg loss: 3.362875, ppl: 28.872086 +epoch: 0, batch: 20994, sum loss: 4844.318359, avg loss: 2.988475, ppl: 19.855381 +epoch: 0, batch: 20995, sum loss: 4953.812500, avg loss: 3.302542, ppl: 27.181639 +epoch: 0, batch: 20996, sum loss: 6037.770996, avg loss: 3.023421, ppl: 20.561504 +epoch: 0, batch: 20997, sum loss: 4905.583496, avg loss: 2.894150, ppl: 18.068129 +epoch: 0, batch: 20998, sum loss: 6207.246582, avg loss: 3.303484, ppl: 27.207270 +epoch: 0, batch: 20999, sum loss: 5224.245605, avg loss: 3.164292, ppl: 23.671974 +epoch: 0, batch: 21000, sum loss: 4566.134277, avg loss: 2.955427, ppl: 19.209923 +epoch: 0, batch: 21001, sum loss: 6075.855957, avg loss: 3.287801, ppl: 26.783895 +epoch: 0, batch: 21002, sum loss: 5149.577637, avg loss: 3.161189, ppl: 23.598646 +epoch: 0, batch: 21003, sum loss: 4283.426758, avg loss: 3.010138, ppl: 20.290205 +epoch: 0, batch: 21004, sum loss: 5392.234863, avg loss: 3.140498, ppl: 23.115374 +epoch: 0, batch: 21005, sum loss: 5708.520508, avg loss: 3.348106, ppl: 28.448797 +epoch: 0, batch: 21006, sum loss: 5432.272461, avg loss: 3.310343, ppl: 27.394508 +epoch: 0, batch: 21007, sum loss: 6306.167480, avg loss: 3.399551, ppl: 29.950653 +epoch: 0, batch: 21008, sum loss: 4766.183594, avg loss: 3.003266, ppl: 20.151251 +epoch: 0, batch: 21009, sum loss: 6477.579102, avg loss: 3.480698, ppl: 32.482391 +epoch: 0, batch: 21010, sum loss: 5099.322266, avg loss: 2.969902, ppl: 19.490015 +epoch: 0, batch: 21011, sum loss: 5357.603027, avg loss: 3.294959, ppl: 26.976303 +epoch: 0, batch: 21012, sum loss: 4960.188477, avg loss: 3.189832, ppl: 24.284348 +epoch: 0, batch: 21013, sum loss: 5753.634766, avg loss: 3.414620, ppl: 30.405392 +epoch: 0, batch: 21014, sum loss: 6115.138184, avg loss: 3.257932, ppl: 25.995722 +epoch: 0, batch: 21015, sum loss: 4882.344727, avg loss: 3.137754, ppl: 23.052027 +epoch: 0, batch: 21016, sum loss: 4510.139648, avg loss: 2.919184, ppl: 18.526173 +epoch: 0, batch: 21017, sum loss: 5546.253906, avg loss: 3.042377, ppl: 20.955004 +epoch: 0, batch: 21018, sum loss: 5461.696777, avg loss: 3.288198, ppl: 26.794537 +epoch: 0, batch: 21019, sum loss: 6130.076172, avg loss: 3.439998, ppl: 31.186893 +epoch: 0, batch: 21020, sum loss: 5548.879883, avg loss: 3.389664, ppl: 29.655991 +epoch: 0, batch: 21021, sum loss: 6762.340820, avg loss: 3.413599, ppl: 30.374372 +epoch: 0, batch: 21022, sum loss: 5865.129883, avg loss: 3.160092, ppl: 23.572756 +epoch: 0, batch: 21023, sum loss: 5298.113281, avg loss: 3.447048, ppl: 31.407553 +epoch: 0, batch: 21024, sum loss: 6679.702148, avg loss: 3.473584, ppl: 32.252125 +epoch: 0, batch: 21025, sum loss: 5330.517090, avg loss: 2.961398, ppl: 19.324976 +epoch: 0, batch: 21026, sum loss: 5627.721680, avg loss: 3.461083, ppl: 31.851465 +epoch: 0, batch: 21027, sum loss: 5707.840820, avg loss: 3.276602, ppl: 26.485622 +epoch: 0, batch: 21028, sum loss: 6194.860352, avg loss: 3.501900, ppl: 33.178421 +epoch: 0, batch: 21029, sum loss: 5581.689453, avg loss: 3.454016, ppl: 31.627144 +epoch: 0, batch: 21030, sum loss: 4755.750488, avg loss: 3.178978, ppl: 24.022184 +epoch: 0, batch: 21031, sum loss: 4603.045898, avg loss: 3.036310, ppl: 20.828249 +epoch: 0, batch: 21032, sum loss: 6014.654785, avg loss: 3.334066, ppl: 28.052168 +epoch: 0, batch: 21033, sum loss: 4971.369629, avg loss: 3.099358, ppl: 22.183701 +epoch: 0, batch: 21034, sum loss: 4609.985840, avg loss: 3.030892, ppl: 20.715696 +epoch: 0, batch: 21035, sum loss: 5717.712402, avg loss: 3.131278, ppl: 22.903221 +epoch: 0, batch: 21036, sum loss: 6028.137695, avg loss: 3.186119, ppl: 24.194355 +epoch: 0, batch: 21037, sum loss: 4956.257324, avg loss: 3.330818, ppl: 27.961210 +epoch: 0, batch: 21038, sum loss: 6521.355469, avg loss: 3.328921, ppl: 27.908201 +epoch: 0, batch: 21039, sum loss: 5188.758789, avg loss: 3.269539, ppl: 26.299221 +epoch: 0, batch: 21040, sum loss: 5164.877930, avg loss: 3.052528, ppl: 21.168800 +epoch: 0, batch: 21041, sum loss: 5471.097656, avg loss: 3.208855, ppl: 24.750734 +epoch: 0, batch: 21042, sum loss: 5064.222656, avg loss: 3.478175, ppl: 32.400536 +epoch: 0, batch: 21043, sum loss: 6479.767578, avg loss: 3.443022, ppl: 31.281347 +epoch: 0, batch: 21044, sum loss: 5036.418457, avg loss: 3.086041, ppl: 21.890238 +epoch: 0, batch: 21045, sum loss: 4372.757812, avg loss: 3.175569, ppl: 23.940428 +epoch: 0, batch: 21046, sum loss: 5011.185547, avg loss: 3.187777, ppl: 24.234495 +epoch: 0, batch: 21047, sum loss: 6487.418945, avg loss: 3.332008, ppl: 27.994495 +epoch: 0, batch: 21048, sum loss: 5290.075195, avg loss: 3.019449, ppl: 20.480013 +epoch: 0, batch: 21049, sum loss: 5478.989746, avg loss: 3.226732, ppl: 25.197166 +epoch: 0, batch: 21050, sum loss: 4654.615723, avg loss: 3.056215, ppl: 21.246990 +epoch: 0, batch: 21051, sum loss: 4696.345703, avg loss: 3.106049, ppl: 22.332630 +epoch: 0, batch: 21052, sum loss: 5242.886719, avg loss: 3.262531, ppl: 26.115547 +epoch: 0, batch: 21053, sum loss: 5328.338379, avg loss: 3.619795, ppl: 37.329918 +epoch: 0, batch: 21054, sum loss: 5142.472168, avg loss: 3.172407, ppl: 23.864862 +epoch: 0, batch: 21055, sum loss: 4895.968750, avg loss: 3.075357, ppl: 21.657616 +epoch: 0, batch: 21056, sum loss: 3814.729248, avg loss: 2.804948, ppl: 16.526218 +epoch: 0, batch: 21057, sum loss: 5669.055176, avg loss: 3.452531, ppl: 31.580210 +epoch: 0, batch: 21058, sum loss: 5787.304199, avg loss: 3.482133, ppl: 32.529022 +epoch: 0, batch: 21059, sum loss: 6418.041016, avg loss: 3.640409, ppl: 38.107418 +epoch: 0, batch: 21060, sum loss: 6153.085938, avg loss: 3.606733, ppl: 36.845467 +epoch: 0, batch: 21061, sum loss: 5476.526367, avg loss: 3.104607, ppl: 22.300449 +epoch: 0, batch: 21062, sum loss: 6622.207031, avg loss: 3.429419, ppl: 30.858696 +epoch: 0, batch: 21063, sum loss: 4745.370117, avg loss: 2.891755, ppl: 18.024918 +epoch: 0, batch: 21064, sum loss: 5604.749512, avg loss: 3.314459, ppl: 27.507504 +epoch: 0, batch: 21065, sum loss: 6268.728516, avg loss: 3.571925, ppl: 35.585026 +epoch: 0, batch: 21066, sum loss: 5151.634277, avg loss: 3.382557, ppl: 29.445972 +epoch: 0, batch: 21067, sum loss: 6396.651855, avg loss: 3.397053, ppl: 29.875940 +epoch: 0, batch: 21068, sum loss: 5203.056641, avg loss: 3.316161, ppl: 27.554363 +epoch: 0, batch: 21069, sum loss: 6369.151367, avg loss: 3.345143, ppl: 28.364620 +epoch: 0, batch: 21070, sum loss: 6609.571289, avg loss: 3.475064, ppl: 32.299896 +epoch: 0, batch: 21071, sum loss: 5390.320312, avg loss: 3.358455, ppl: 28.744743 +epoch: 0, batch: 21072, sum loss: 5160.686523, avg loss: 3.280792, ppl: 26.596842 +epoch: 0, batch: 21073, sum loss: 5464.985840, avg loss: 2.942911, ppl: 18.970989 +epoch: 0, batch: 21074, sum loss: 6141.174805, avg loss: 3.487322, ppl: 32.698277 +epoch: 0, batch: 21075, sum loss: 5158.746582, avg loss: 3.081689, ppl: 21.795176 +epoch: 0, batch: 21076, sum loss: 5294.606934, avg loss: 3.357392, ppl: 28.714201 +epoch: 0, batch: 21077, sum loss: 6765.675293, avg loss: 3.319762, ppl: 27.653774 +epoch: 0, batch: 21078, sum loss: 4807.100098, avg loss: 2.960037, ppl: 19.298685 +epoch: 0, batch: 21079, sum loss: 5080.936035, avg loss: 3.307901, ppl: 27.327709 +epoch: 0, batch: 21080, sum loss: 6569.820801, avg loss: 3.555098, ppl: 34.991241 +epoch: 0, batch: 21081, sum loss: 5263.803711, avg loss: 3.275547, ppl: 26.457687 +epoch: 0, batch: 21082, sum loss: 5527.780273, avg loss: 3.213826, ppl: 24.874065 +epoch: 0, batch: 21083, sum loss: 5531.125977, avg loss: 3.184298, ppl: 24.150335 +epoch: 0, batch: 21084, sum loss: 5306.291504, avg loss: 2.926802, ppl: 18.667833 +epoch: 0, batch: 21085, sum loss: 5463.436523, avg loss: 3.285290, ppl: 26.716719 +epoch: 0, batch: 21086, sum loss: 6922.081055, avg loss: 3.467977, ppl: 32.071781 +epoch: 0, batch: 21087, sum loss: 5540.007324, avg loss: 3.124652, ppl: 22.751968 +epoch: 0, batch: 21088, sum loss: 4898.903809, avg loss: 3.039022, ppl: 20.884811 +epoch: 0, batch: 21089, sum loss: 5748.332520, avg loss: 3.409450, ppl: 30.248604 +epoch: 0, batch: 21090, sum loss: 6428.712891, avg loss: 3.271610, ppl: 26.353724 +epoch: 0, batch: 21091, sum loss: 4634.829590, avg loss: 3.150802, ppl: 23.354786 +epoch: 0, batch: 21092, sum loss: 5856.462402, avg loss: 3.359990, ppl: 28.788900 +epoch: 0, batch: 21093, sum loss: 5066.806152, avg loss: 3.081999, ppl: 21.801937 +epoch: 0, batch: 21094, sum loss: 4919.087891, avg loss: 3.192140, ppl: 24.340464 +epoch: 0, batch: 21095, sum loss: 3804.956543, avg loss: 2.781401, ppl: 16.141619 +epoch: 0, batch: 21096, sum loss: 6528.257324, avg loss: 3.203267, ppl: 24.612801 +epoch: 0, batch: 21097, sum loss: 5418.884277, avg loss: 3.020560, ppl: 20.502766 +epoch: 0, batch: 21098, sum loss: 5116.827637, avg loss: 3.065804, ppl: 21.451712 +epoch: 0, batch: 21099, sum loss: 5366.750977, avg loss: 3.185015, ppl: 24.167662 +epoch: 0, batch: 21100, sum loss: 5426.107910, avg loss: 3.316692, ppl: 27.569004 +epoch: 0, batch: 21101, sum loss: 5588.080078, avg loss: 3.189544, ppl: 24.277348 +epoch: 0, batch: 21102, sum loss: 6068.227539, avg loss: 3.278351, ppl: 26.531986 +epoch: 0, batch: 21103, sum loss: 5707.950684, avg loss: 3.263551, ppl: 26.142204 +epoch: 0, batch: 21104, sum loss: 5325.366211, avg loss: 3.202265, ppl: 24.588154 +epoch: 0, batch: 21105, sum loss: 5040.774414, avg loss: 3.303260, ppl: 27.201166 +epoch: 0, batch: 21106, sum loss: 5618.066406, avg loss: 3.264420, ppl: 26.164919 +epoch: 0, batch: 21107, sum loss: 6773.251953, avg loss: 3.748341, ppl: 42.450581 +epoch: 0, batch: 21108, sum loss: 5342.764160, avg loss: 3.356008, ppl: 28.674480 +epoch: 0, batch: 21109, sum loss: 5196.323730, avg loss: 3.343838, ppl: 28.327633 +epoch: 0, batch: 21110, sum loss: 5093.127441, avg loss: 3.057099, ppl: 21.265778 +epoch: 0, batch: 21111, sum loss: 5396.990234, avg loss: 3.061254, ppl: 21.354309 +epoch: 0, batch: 21112, sum loss: 6697.159180, avg loss: 3.510041, ppl: 33.449654 +epoch: 0, batch: 21113, sum loss: 5305.025391, avg loss: 3.256615, ppl: 25.961500 +epoch: 0, batch: 21114, sum loss: 5442.973145, avg loss: 3.239865, ppl: 25.530277 +epoch: 0, batch: 21115, sum loss: 5544.335938, avg loss: 3.204818, ppl: 24.651026 +epoch: 0, batch: 21116, sum loss: 5201.411133, avg loss: 3.077758, ppl: 21.709677 +epoch: 0, batch: 21117, sum loss: 5201.974609, avg loss: 3.162294, ppl: 23.624739 +epoch: 0, batch: 21118, sum loss: 4577.183594, avg loss: 3.041318, ppl: 20.932817 +epoch: 0, batch: 21119, sum loss: 4500.974121, avg loss: 3.114861, ppl: 22.530298 +epoch: 0, batch: 21120, sum loss: 5280.145996, avg loss: 3.095044, ppl: 22.088219 +epoch: 0, batch: 21121, sum loss: 5499.636719, avg loss: 3.281406, ppl: 26.613169 +epoch: 0, batch: 21122, sum loss: 5570.680176, avg loss: 3.296260, ppl: 27.011436 +epoch: 0, batch: 21123, sum loss: 5043.093262, avg loss: 3.195877, ppl: 24.431581 +epoch: 0, batch: 21124, sum loss: 4744.943848, avg loss: 3.053375, ppl: 21.186720 +epoch: 0, batch: 21125, sum loss: 4068.912354, avg loss: 2.788836, ppl: 16.262087 +epoch: 0, batch: 21126, sum loss: 6006.112793, avg loss: 3.439927, ppl: 31.184692 +epoch: 0, batch: 21127, sum loss: 4873.259277, avg loss: 3.117888, ppl: 22.598606 +epoch: 0, batch: 21128, sum loss: 5961.520508, avg loss: 3.310117, ppl: 27.388323 +epoch: 0, batch: 21129, sum loss: 5127.725586, avg loss: 3.157466, ppl: 23.510954 +epoch: 0, batch: 21130, sum loss: 5158.812500, avg loss: 3.076215, ppl: 21.676197 +epoch: 0, batch: 21131, sum loss: 4596.186035, avg loss: 2.982600, ppl: 19.739067 +epoch: 0, batch: 21132, sum loss: 5161.499023, avg loss: 2.941025, ppl: 18.935246 +epoch: 0, batch: 21133, sum loss: 5182.910156, avg loss: 3.039830, ppl: 20.901693 +epoch: 0, batch: 21134, sum loss: 5347.752441, avg loss: 3.007735, ppl: 20.241501 +epoch: 0, batch: 21135, sum loss: 5047.853027, avg loss: 3.192823, ppl: 24.357082 +epoch: 0, batch: 21136, sum loss: 5418.219727, avg loss: 3.273849, ppl: 26.412807 +epoch: 0, batch: 21137, sum loss: 4786.367676, avg loss: 3.370682, ppl: 29.098352 +epoch: 0, batch: 21138, sum loss: 6343.766602, avg loss: 3.489421, ppl: 32.766960 +epoch: 0, batch: 21139, sum loss: 6187.459473, avg loss: 3.237812, ppl: 25.477922 +epoch: 0, batch: 21140, sum loss: 5266.607910, avg loss: 3.028527, ppl: 20.666763 +epoch: 0, batch: 21141, sum loss: 4831.574219, avg loss: 3.271208, ppl: 26.343151 +epoch: 0, batch: 21142, sum loss: 6302.433594, avg loss: 3.213888, ppl: 24.875607 +epoch: 0, batch: 21143, sum loss: 4901.400879, avg loss: 3.291740, ppl: 26.889608 +epoch: 0, batch: 21144, sum loss: 4785.987793, avg loss: 2.983783, ppl: 19.762436 +epoch: 0, batch: 21145, sum loss: 4596.346191, avg loss: 2.809503, ppl: 16.601658 +epoch: 0, batch: 21146, sum loss: 4870.855469, avg loss: 3.162893, ppl: 23.638885 +epoch: 0, batch: 21147, sum loss: 5469.987305, avg loss: 3.291208, ppl: 26.875303 +epoch: 0, batch: 21148, sum loss: 5786.155273, avg loss: 3.269014, ppl: 26.285419 +epoch: 0, batch: 21149, sum loss: 5483.311523, avg loss: 3.122615, ppl: 22.705675 +epoch: 0, batch: 21150, sum loss: 5359.081543, avg loss: 3.299927, ppl: 27.110661 +epoch: 0, batch: 21151, sum loss: 5413.884277, avg loss: 3.167867, ppl: 23.756750 +epoch: 0, batch: 21152, sum loss: 5484.558105, avg loss: 3.128670, ppl: 22.843576 +epoch: 0, batch: 21153, sum loss: 6621.328125, avg loss: 3.421875, ppl: 30.626780 +epoch: 0, batch: 21154, sum loss: 5418.841797, avg loss: 3.374123, ppl: 29.198668 +epoch: 0, batch: 21155, sum loss: 4470.318359, avg loss: 3.223012, ppl: 25.103630 +epoch: 0, batch: 21156, sum loss: 5067.561523, avg loss: 3.173176, ppl: 23.883213 +epoch: 0, batch: 21157, sum loss: 6090.085449, avg loss: 3.181863, ppl: 24.091591 +epoch: 0, batch: 21158, sum loss: 5222.722168, avg loss: 3.190423, ppl: 24.298704 +epoch: 0, batch: 21159, sum loss: 4818.736816, avg loss: 2.947240, ppl: 19.053288 +epoch: 0, batch: 21160, sum loss: 4470.427734, avg loss: 3.061937, ppl: 21.368906 +epoch: 0, batch: 21161, sum loss: 5550.745605, avg loss: 3.193755, ppl: 24.379793 +epoch: 0, batch: 21162, sum loss: 5400.356445, avg loss: 3.233747, ppl: 25.374546 +epoch: 0, batch: 21163, sum loss: 5400.474609, avg loss: 3.323369, ppl: 27.753696 +epoch: 0, batch: 21164, sum loss: 5156.610840, avg loss: 3.091493, ppl: 22.009922 +epoch: 0, batch: 21165, sum loss: 5751.441895, avg loss: 3.349704, ppl: 28.494299 +epoch: 0, batch: 21166, sum loss: 5574.238281, avg loss: 3.190749, ppl: 24.306620 +epoch: 0, batch: 21167, sum loss: 4461.388672, avg loss: 3.076820, ppl: 21.689318 +epoch: 0, batch: 21168, sum loss: 5526.178711, avg loss: 3.432409, ppl: 30.951117 +epoch: 0, batch: 21169, sum loss: 5794.773926, avg loss: 3.270189, ppl: 26.316301 +epoch: 0, batch: 21170, sum loss: 4789.218750, avg loss: 2.794177, ppl: 16.349161 +epoch: 0, batch: 21171, sum loss: 4546.041992, avg loss: 2.912263, ppl: 18.398380 +epoch: 0, batch: 21172, sum loss: 4869.048828, avg loss: 3.145380, ppl: 23.228512 +epoch: 0, batch: 21173, sum loss: 4498.045898, avg loss: 2.900094, ppl: 18.175854 +epoch: 0, batch: 21174, sum loss: 5889.500488, avg loss: 3.474632, ppl: 32.285931 +epoch: 0, batch: 21175, sum loss: 5359.150391, avg loss: 3.121229, ppl: 22.674232 +epoch: 0, batch: 21176, sum loss: 5496.855957, avg loss: 3.114366, ppl: 22.519150 +epoch: 0, batch: 21177, sum loss: 5940.224609, avg loss: 3.224878, ppl: 25.150501 +epoch: 0, batch: 21178, sum loss: 5309.364258, avg loss: 3.293650, ppl: 26.941023 +epoch: 0, batch: 21179, sum loss: 5268.525391, avg loss: 3.088233, ppl: 21.938272 +epoch: 0, batch: 21180, sum loss: 6611.759766, avg loss: 3.383705, ppl: 29.479795 +epoch: 0, batch: 21181, sum loss: 5381.833496, avg loss: 3.320070, ppl: 27.662281 +epoch: 0, batch: 21182, sum loss: 6755.981445, avg loss: 3.493268, ppl: 32.893284 +epoch: 0, batch: 21183, sum loss: 6027.492188, avg loss: 3.450196, ppl: 31.506561 +epoch: 0, batch: 21184, sum loss: 6996.411133, avg loss: 3.399617, ppl: 29.952618 +epoch: 0, batch: 21185, sum loss: 5499.020020, avg loss: 3.144094, ppl: 23.198643 +epoch: 0, batch: 21186, sum loss: 4647.276855, avg loss: 3.116886, ppl: 22.575972 +epoch: 0, batch: 21187, sum loss: 5797.668457, avg loss: 3.738019, ppl: 42.014698 +epoch: 0, batch: 21188, sum loss: 5631.732422, avg loss: 3.052429, ppl: 21.166706 +epoch: 0, batch: 21189, sum loss: 6462.145996, avg loss: 3.320733, ppl: 27.680628 +epoch: 0, batch: 21190, sum loss: 5697.789551, avg loss: 3.409808, ppl: 30.259445 +epoch: 0, batch: 21191, sum loss: 5364.381836, avg loss: 3.239361, ppl: 25.517412 +epoch: 0, batch: 21192, sum loss: 4555.908203, avg loss: 2.997308, ppl: 20.031544 +epoch: 0, batch: 21193, sum loss: 5314.505859, avg loss: 3.095228, ppl: 22.092268 +epoch: 0, batch: 21194, sum loss: 5749.077637, avg loss: 3.192158, ppl: 24.340910 +epoch: 0, batch: 21195, sum loss: 5401.272461, avg loss: 3.215043, ppl: 24.904371 +epoch: 0, batch: 21196, sum loss: 5943.744141, avg loss: 3.471813, ppl: 32.195057 +epoch: 0, batch: 21197, sum loss: 5054.083008, avg loss: 3.182672, ppl: 24.111094 +epoch: 0, batch: 21198, sum loss: 5413.019531, avg loss: 3.376806, ppl: 29.277098 +epoch: 0, batch: 21199, sum loss: 5196.264648, avg loss: 3.219495, ppl: 25.015480 +epoch: 0, batch: 21200, sum loss: 6465.369141, avg loss: 3.376172, ppl: 29.258549 +epoch: 0, batch: 21201, sum loss: 6086.018555, avg loss: 3.164856, ppl: 23.685337 +epoch: 0, batch: 21202, sum loss: 6138.299316, avg loss: 3.385714, ppl: 29.539078 +epoch: 0, batch: 21203, sum loss: 4605.096191, avg loss: 3.065976, ppl: 21.455395 +epoch: 0, batch: 21204, sum loss: 5472.409668, avg loss: 3.322653, ppl: 27.733833 +epoch: 0, batch: 21205, sum loss: 5195.867676, avg loss: 3.265787, ppl: 26.200733 +epoch: 0, batch: 21206, sum loss: 6305.446777, avg loss: 3.601055, ppl: 36.636864 +epoch: 0, batch: 21207, sum loss: 5069.155762, avg loss: 3.108005, ppl: 22.376354 +epoch: 0, batch: 21208, sum loss: 4620.604980, avg loss: 3.066095, ppl: 21.457943 +epoch: 0, batch: 21209, sum loss: 5103.551758, avg loss: 3.025223, ppl: 20.598608 +epoch: 0, batch: 21210, sum loss: 5164.561035, avg loss: 2.988751, ppl: 19.860855 +epoch: 0, batch: 21211, sum loss: 5713.195801, avg loss: 3.191729, ppl: 24.330465 +epoch: 0, batch: 21212, sum loss: 5163.369629, avg loss: 3.339825, ppl: 28.214193 +epoch: 0, batch: 21213, sum loss: 5151.757812, avg loss: 3.044774, ppl: 21.005285 +epoch: 0, batch: 21214, sum loss: 5855.253906, avg loss: 3.213641, ppl: 24.869469 +epoch: 0, batch: 21215, sum loss: 5807.812500, avg loss: 3.275698, ppl: 26.461695 +epoch: 0, batch: 21216, sum loss: 4611.993652, avg loss: 3.167578, ppl: 23.749897 +epoch: 0, batch: 21217, sum loss: 5639.814453, avg loss: 3.315588, ppl: 27.538568 +epoch: 0, batch: 21218, sum loss: 5084.787598, avg loss: 3.024859, ppl: 20.591095 +epoch: 0, batch: 21219, sum loss: 6588.659180, avg loss: 3.444150, ppl: 31.316652 +epoch: 0, batch: 21220, sum loss: 4158.884277, avg loss: 2.912384, ppl: 18.400614 +epoch: 0, batch: 21221, sum loss: 5783.806641, avg loss: 3.370517, ppl: 29.093557 +epoch: 0, batch: 21222, sum loss: 5919.589844, avg loss: 3.125443, ppl: 22.769991 +epoch: 0, batch: 21223, sum loss: 4998.428223, avg loss: 3.187773, ppl: 24.234398 +epoch: 0, batch: 21224, sum loss: 6627.178711, avg loss: 3.292190, ppl: 26.901722 +epoch: 0, batch: 21225, sum loss: 5517.628418, avg loss: 3.378829, ppl: 29.336405 +epoch: 0, batch: 21226, sum loss: 5631.883789, avg loss: 3.130563, ppl: 22.886873 +epoch: 0, batch: 21227, sum loss: 5233.565918, avg loss: 3.350554, ppl: 28.518541 +epoch: 0, batch: 21228, sum loss: 5826.854492, avg loss: 3.175398, ppl: 23.936337 +epoch: 0, batch: 21229, sum loss: 5228.018066, avg loss: 3.084376, ppl: 21.853832 +epoch: 0, batch: 21230, sum loss: 5107.021973, avg loss: 3.373198, ppl: 29.171677 +epoch: 0, batch: 21231, sum loss: 4571.747559, avg loss: 3.133480, ppl: 22.953728 +epoch: 0, batch: 21232, sum loss: 6167.494629, avg loss: 3.341005, ppl: 28.247490 +epoch: 0, batch: 21233, sum loss: 5802.553711, avg loss: 3.474583, ppl: 32.284367 +epoch: 0, batch: 21234, sum loss: 5009.502441, avg loss: 3.090378, ppl: 21.985382 +epoch: 0, batch: 21235, sum loss: 6195.284668, avg loss: 3.438005, ppl: 31.124802 +epoch: 0, batch: 21236, sum loss: 4949.185547, avg loss: 3.186855, ppl: 24.212162 +epoch: 0, batch: 21237, sum loss: 4844.132812, avg loss: 2.814720, ppl: 16.688498 +epoch: 0, batch: 21238, sum loss: 4892.041016, avg loss: 3.121915, ppl: 22.689796 +epoch: 0, batch: 21239, sum loss: 5188.282227, avg loss: 3.127355, ppl: 22.813560 +epoch: 0, batch: 21240, sum loss: 5736.431152, avg loss: 3.000225, ppl: 20.090063 +epoch: 0, batch: 21241, sum loss: 5423.949219, avg loss: 3.220873, ppl: 25.049971 +epoch: 0, batch: 21242, sum loss: 5495.819336, avg loss: 3.238550, ppl: 25.496729 +epoch: 0, batch: 21243, sum loss: 6272.503906, avg loss: 3.423856, ppl: 30.687511 +epoch: 0, batch: 21244, sum loss: 5146.329102, avg loss: 3.322356, ppl: 27.725588 +epoch: 0, batch: 21245, sum loss: 4917.042969, avg loss: 3.003692, ppl: 20.159822 +epoch: 0, batch: 21246, sum loss: 5203.528320, avg loss: 3.044780, ppl: 21.005400 +epoch: 0, batch: 21247, sum loss: 5739.164062, avg loss: 3.453167, ppl: 31.600327 +epoch: 0, batch: 21248, sum loss: 4593.843262, avg loss: 2.901985, ppl: 18.210264 +epoch: 0, batch: 21249, sum loss: 5385.714355, avg loss: 3.547901, ppl: 34.740334 +epoch: 0, batch: 21250, sum loss: 4694.836914, avg loss: 3.182940, ppl: 24.117561 +epoch: 0, batch: 21251, sum loss: 6078.463379, avg loss: 3.233225, ppl: 25.361319 +epoch: 0, batch: 21252, sum loss: 5311.901855, avg loss: 3.063380, ppl: 21.399776 +epoch: 0, batch: 21253, sum loss: 4746.604980, avg loss: 3.181371, ppl: 24.079739 +epoch: 0, batch: 21254, sum loss: 6362.805664, avg loss: 3.529010, ppl: 34.090210 +epoch: 0, batch: 21255, sum loss: 5316.254395, avg loss: 3.208361, ppl: 24.738510 +epoch: 0, batch: 21256, sum loss: 4533.873535, avg loss: 3.020569, ppl: 20.502947 +epoch: 0, batch: 21257, sum loss: 4165.953125, avg loss: 2.801582, ppl: 16.470688 +epoch: 0, batch: 21258, sum loss: 6176.337402, avg loss: 3.438941, ppl: 31.153942 +epoch: 0, batch: 21259, sum loss: 6917.568359, avg loss: 3.759548, ppl: 42.929016 +epoch: 0, batch: 21260, sum loss: 5796.923828, avg loss: 3.446447, ppl: 31.388666 +epoch: 0, batch: 21261, sum loss: 5109.744629, avg loss: 2.924868, ppl: 18.631769 +epoch: 0, batch: 21262, sum loss: 5036.761230, avg loss: 3.030542, ppl: 20.708462 +epoch: 0, batch: 21263, sum loss: 5099.339355, avg loss: 3.328551, ppl: 27.897877 +epoch: 0, batch: 21264, sum loss: 6442.349609, avg loss: 3.387145, ppl: 29.581379 +epoch: 0, batch: 21265, sum loss: 5064.884277, avg loss: 3.103483, ppl: 22.275406 +epoch: 0, batch: 21266, sum loss: 3722.585449, avg loss: 2.585129, ppl: 13.264997 +epoch: 0, batch: 21267, sum loss: 5017.914062, avg loss: 3.202243, ppl: 24.587614 +epoch: 0, batch: 21268, sum loss: 4559.390625, avg loss: 2.798889, ppl: 16.426390 +epoch: 0, batch: 21269, sum loss: 5741.517578, avg loss: 3.159889, ppl: 23.567974 +epoch: 0, batch: 21270, sum loss: 4923.667969, avg loss: 3.132104, ppl: 22.922167 +epoch: 0, batch: 21271, sum loss: 5254.663086, avg loss: 3.311067, ppl: 27.414364 +epoch: 0, batch: 21272, sum loss: 6664.238281, avg loss: 3.478204, ppl: 32.401470 +epoch: 0, batch: 21273, sum loss: 5973.203125, avg loss: 3.547033, ppl: 34.710175 +epoch: 0, batch: 21274, sum loss: 5297.793457, avg loss: 3.183770, ppl: 24.137585 +epoch: 0, batch: 21275, sum loss: 4575.142578, avg loss: 2.886525, ppl: 17.930895 +epoch: 0, batch: 21276, sum loss: 4917.773926, avg loss: 3.327317, ppl: 27.863478 +epoch: 0, batch: 21277, sum loss: 4946.498047, avg loss: 3.239357, ppl: 25.517309 +epoch: 0, batch: 21278, sum loss: 4112.151367, avg loss: 2.841846, ppl: 17.147394 +epoch: 0, batch: 21279, sum loss: 5252.354004, avg loss: 3.075149, ppl: 21.653099 +epoch: 0, batch: 21280, sum loss: 5376.823242, avg loss: 3.055013, ppl: 21.221466 +epoch: 0, batch: 21281, sum loss: 4513.879883, avg loss: 3.068579, ppl: 21.511318 +epoch: 0, batch: 21282, sum loss: 5451.236328, avg loss: 3.160137, ppl: 23.573830 +epoch: 0, batch: 21283, sum loss: 5769.835449, avg loss: 3.278316, ppl: 26.531044 +epoch: 0, batch: 21284, sum loss: 3999.533203, avg loss: 3.036851, ppl: 20.839520 +epoch: 0, batch: 21285, sum loss: 4444.465820, avg loss: 3.112371, ppl: 22.474272 +epoch: 0, batch: 21286, sum loss: 4896.868164, avg loss: 3.091457, ppl: 22.009125 +epoch: 0, batch: 21287, sum loss: 5332.376953, avg loss: 3.045332, ppl: 21.017017 +epoch: 0, batch: 21288, sum loss: 5831.041504, avg loss: 3.252115, ppl: 25.844938 +epoch: 0, batch: 21289, sum loss: 5582.118652, avg loss: 3.222932, ppl: 25.101620 +epoch: 0, batch: 21290, sum loss: 5102.940918, avg loss: 2.987670, ppl: 19.839407 +epoch: 0, batch: 21291, sum loss: 6084.339844, avg loss: 3.322960, ppl: 27.742350 +epoch: 0, batch: 21292, sum loss: 4479.791504, avg loss: 2.835311, ppl: 17.035700 +epoch: 0, batch: 21293, sum loss: 6266.981445, avg loss: 3.319376, ppl: 27.643089 +epoch: 0, batch: 21294, sum loss: 6230.948242, avg loss: 3.380873, ppl: 29.396416 +epoch: 0, batch: 21295, sum loss: 5546.688965, avg loss: 3.335351, ppl: 28.088247 +epoch: 0, batch: 21296, sum loss: 4529.517090, avg loss: 3.106665, ppl: 22.346388 +epoch: 0, batch: 21297, sum loss: 6317.933105, avg loss: 3.561405, ppl: 35.212643 +epoch: 0, batch: 21298, sum loss: 6008.502441, avg loss: 3.563762, ppl: 35.295727 +epoch: 0, batch: 21299, sum loss: 4996.060547, avg loss: 3.033431, ppl: 20.768362 +epoch: 0, batch: 21300, sum loss: 4210.407715, avg loss: 2.977658, ppl: 19.641752 +epoch: 0, batch: 21301, sum loss: 4826.669434, avg loss: 2.876442, ppl: 17.751001 +epoch: 0, batch: 21302, sum loss: 6014.843750, avg loss: 3.237268, ppl: 25.464052 +epoch: 0, batch: 21303, sum loss: 5719.917969, avg loss: 3.085177, ppl: 21.871332 +epoch: 0, batch: 21304, sum loss: 5365.562012, avg loss: 3.534626, ppl: 34.282185 +epoch: 0, batch: 21305, sum loss: 4214.009766, avg loss: 2.908219, ppl: 18.324141 +epoch: 0, batch: 21306, sum loss: 4942.784668, avg loss: 3.239046, ppl: 25.509382 +epoch: 0, batch: 21307, sum loss: 5815.473633, avg loss: 3.252502, ppl: 25.854948 +epoch: 0, batch: 21308, sum loss: 5520.533691, avg loss: 3.138450, ppl: 23.068087 +epoch: 0, batch: 21309, sum loss: 5204.771484, avg loss: 3.230771, ppl: 25.299150 +epoch: 0, batch: 21310, sum loss: 4571.809570, avg loss: 2.864542, ppl: 17.541021 +epoch: 0, batch: 21311, sum loss: 5885.600586, avg loss: 3.253511, ppl: 25.881042 +epoch: 0, batch: 21312, sum loss: 5012.931152, avg loss: 3.094402, ppl: 22.074036 +epoch: 0, batch: 21313, sum loss: 6172.630859, avg loss: 3.247044, ppl: 25.714218 +epoch: 0, batch: 21314, sum loss: 5903.772461, avg loss: 3.313004, ppl: 27.467508 +epoch: 0, batch: 21315, sum loss: 4873.493652, avg loss: 2.907813, ppl: 18.316689 +epoch: 0, batch: 21316, sum loss: 6027.929688, avg loss: 3.376992, ppl: 29.282543 +epoch: 0, batch: 21317, sum loss: 4477.072266, avg loss: 3.166246, ppl: 23.718288 +epoch: 0, batch: 21318, sum loss: 6779.198242, avg loss: 3.382833, ppl: 29.454111 +epoch: 0, batch: 21319, sum loss: 5216.089844, avg loss: 3.231778, ppl: 25.324648 +epoch: 0, batch: 21320, sum loss: 5441.979492, avg loss: 2.960816, ppl: 19.313719 +epoch: 0, batch: 21321, sum loss: 5344.020020, avg loss: 3.250620, ppl: 25.806345 +epoch: 0, batch: 21322, sum loss: 5753.252930, avg loss: 3.360545, ppl: 28.804882 +epoch: 0, batch: 21323, sum loss: 6551.835938, avg loss: 3.421324, ppl: 30.609924 +epoch: 0, batch: 21324, sum loss: 6614.429199, avg loss: 3.634302, ppl: 37.875404 +epoch: 0, batch: 21325, sum loss: 4793.646484, avg loss: 3.022476, ppl: 20.542086 +epoch: 0, batch: 21326, sum loss: 7143.585449, avg loss: 3.447676, ppl: 31.427284 +epoch: 0, batch: 21327, sum loss: 5400.459473, avg loss: 2.952684, ppl: 19.157310 +epoch: 0, batch: 21328, sum loss: 5430.588379, avg loss: 3.232493, ppl: 25.342762 +epoch: 0, batch: 21329, sum loss: 4953.541504, avg loss: 3.048333, ppl: 21.080177 +epoch: 0, batch: 21330, sum loss: 4821.292480, avg loss: 3.163578, ppl: 23.655077 +epoch: 0, batch: 21331, sum loss: 5132.834961, avg loss: 3.208022, ppl: 24.730118 +epoch: 0, batch: 21332, sum loss: 5338.583496, avg loss: 3.301536, ppl: 27.154318 +epoch: 0, batch: 21333, sum loss: 5755.995605, avg loss: 3.215640, ppl: 24.919231 +epoch: 0, batch: 21334, sum loss: 5109.891113, avg loss: 3.207716, ppl: 24.722549 +epoch: 0, batch: 21335, sum loss: 6228.130859, avg loss: 3.253987, ppl: 25.893368 +epoch: 0, batch: 21336, sum loss: 5403.846191, avg loss: 3.417992, ppl: 30.508104 +epoch: 0, batch: 21337, sum loss: 5699.321289, avg loss: 3.131495, ppl: 22.908213 +epoch: 0, batch: 21338, sum loss: 5403.676270, avg loss: 3.229932, ppl: 25.277939 +epoch: 0, batch: 21339, sum loss: 5641.658203, avg loss: 3.356132, ppl: 28.678051 +epoch: 0, batch: 21340, sum loss: 6677.195801, avg loss: 3.506931, ppl: 33.345768 +epoch: 0, batch: 21341, sum loss: 5309.698242, avg loss: 3.116020, ppl: 22.556431 +epoch: 0, batch: 21342, sum loss: 5319.447754, avg loss: 3.299906, ppl: 27.110079 +epoch: 0, batch: 21343, sum loss: 5345.086426, avg loss: 3.357466, ppl: 28.716337 +epoch: 0, batch: 21344, sum loss: 6064.890137, avg loss: 3.339697, ppl: 28.210581 +epoch: 0, batch: 21345, sum loss: 5537.628418, avg loss: 3.225177, ppl: 25.158022 +epoch: 0, batch: 21346, sum loss: 5613.601562, avg loss: 3.216964, ppl: 24.952238 +epoch: 0, batch: 21347, sum loss: 5861.334961, avg loss: 3.541592, ppl: 34.521847 +epoch: 0, batch: 21348, sum loss: 4664.792969, avg loss: 3.031054, ppl: 20.719069 +epoch: 0, batch: 21349, sum loss: 5805.008789, avg loss: 3.298300, ppl: 27.066593 +epoch: 0, batch: 21350, sum loss: 6168.944336, avg loss: 3.343601, ppl: 28.320934 +epoch: 0, batch: 21351, sum loss: 5038.179688, avg loss: 3.035048, ppl: 20.801977 +epoch: 0, batch: 21352, sum loss: 4982.136719, avg loss: 3.195726, ppl: 24.427900 +epoch: 0, batch: 21353, sum loss: 5440.193359, avg loss: 3.151908, ppl: 23.380630 +epoch: 0, batch: 21354, sum loss: 4825.832520, avg loss: 3.054324, ppl: 21.206854 +epoch: 0, batch: 21355, sum loss: 4823.316406, avg loss: 3.074134, ppl: 21.631144 +epoch: 0, batch: 21356, sum loss: 4976.935547, avg loss: 3.219234, ppl: 25.008955 +epoch: 0, batch: 21357, sum loss: 5554.162598, avg loss: 3.219805, ppl: 25.023228 +epoch: 0, batch: 21358, sum loss: 5885.550293, avg loss: 3.626340, ppl: 37.575047 +epoch: 0, batch: 21359, sum loss: 5253.283203, avg loss: 3.283302, ppl: 26.663666 +epoch: 0, batch: 21360, sum loss: 5572.186523, avg loss: 3.344650, ppl: 28.350639 +epoch: 0, batch: 21361, sum loss: 4987.340820, avg loss: 3.144603, ppl: 23.210464 +epoch: 0, batch: 21362, sum loss: 5890.562500, avg loss: 3.158478, ppl: 23.534761 +epoch: 0, batch: 21363, sum loss: 6013.429199, avg loss: 3.391669, ppl: 29.715515 +epoch: 0, batch: 21364, sum loss: 5821.368164, avg loss: 3.223349, ppl: 25.112076 +epoch: 0, batch: 21365, sum loss: 5368.750488, avg loss: 3.097952, ppl: 22.152529 +epoch: 0, batch: 21366, sum loss: 5694.068848, avg loss: 3.349452, ppl: 28.487133 +epoch: 0, batch: 21367, sum loss: 5364.245117, avg loss: 3.221769, ppl: 25.072430 +epoch: 0, batch: 21368, sum loss: 5096.792480, avg loss: 3.229907, ppl: 25.277294 +epoch: 0, batch: 21369, sum loss: 5276.081543, avg loss: 3.328758, ppl: 27.903664 +epoch: 0, batch: 21370, sum loss: 5360.542969, avg loss: 3.240957, ppl: 25.558176 +epoch: 0, batch: 21371, sum loss: 5886.594238, avg loss: 3.132834, ppl: 22.938890 +epoch: 0, batch: 21372, sum loss: 4936.893066, avg loss: 3.218313, ppl: 24.985945 +epoch: 0, batch: 21373, sum loss: 5808.612793, avg loss: 3.156855, ppl: 23.496574 +epoch: 0, batch: 21374, sum loss: 5444.293457, avg loss: 3.421932, ppl: 30.628525 +epoch: 0, batch: 21375, sum loss: 6167.892578, avg loss: 3.258263, ppl: 26.004337 +epoch: 0, batch: 21376, sum loss: 5501.184082, avg loss: 3.255138, ppl: 25.923203 +epoch: 0, batch: 21377, sum loss: 4690.441895, avg loss: 3.089883, ppl: 21.974499 +epoch: 0, batch: 21378, sum loss: 5391.148926, avg loss: 3.247680, ppl: 25.730581 +epoch: 0, batch: 21379, sum loss: 5821.441406, avg loss: 3.324638, ppl: 27.788948 +epoch: 0, batch: 21380, sum loss: 5568.795898, avg loss: 3.151554, ppl: 23.372360 +epoch: 0, batch: 21381, sum loss: 6666.876953, avg loss: 3.722433, ppl: 41.364895 +epoch: 0, batch: 21382, sum loss: 6253.102539, avg loss: 3.520891, ppl: 33.814548 +epoch: 0, batch: 21383, sum loss: 5357.520508, avg loss: 3.305071, ppl: 27.250479 +epoch: 0, batch: 21384, sum loss: 3600.905762, avg loss: 2.752986, ppl: 15.689414 +epoch: 0, batch: 21385, sum loss: 4500.777344, avg loss: 3.165103, ppl: 23.691195 +epoch: 0, batch: 21386, sum loss: 5781.200684, avg loss: 3.007909, ppl: 20.245020 +epoch: 0, batch: 21387, sum loss: 4684.337891, avg loss: 3.310486, ppl: 27.398439 +epoch: 0, batch: 21388, sum loss: 5894.816895, avg loss: 3.347426, ppl: 28.429453 +epoch: 0, batch: 21389, sum loss: 5520.220215, avg loss: 3.178020, ppl: 23.999182 +epoch: 0, batch: 21390, sum loss: 4831.446289, avg loss: 3.282233, ppl: 26.635170 +epoch: 0, batch: 21391, sum loss: 5364.779297, avg loss: 3.202853, ppl: 24.602633 +epoch: 0, batch: 21392, sum loss: 5410.223633, avg loss: 3.147309, ppl: 23.273348 +epoch: 0, batch: 21393, sum loss: 5746.046875, avg loss: 3.346562, ppl: 28.404907 +epoch: 0, batch: 21394, sum loss: 5427.950195, avg loss: 3.350587, ppl: 28.519461 +epoch: 0, batch: 21395, sum loss: 5868.989746, avg loss: 3.340347, ppl: 28.228922 +epoch: 0, batch: 21396, sum loss: 5178.832520, avg loss: 3.131096, ppl: 22.899061 +epoch: 0, batch: 21397, sum loss: 3734.581543, avg loss: 2.789083, ppl: 16.266088 +epoch: 0, batch: 21398, sum loss: 5496.110840, avg loss: 3.160501, ppl: 23.582401 +epoch: 0, batch: 21399, sum loss: 6286.354004, avg loss: 3.327874, ppl: 27.879005 +epoch: 0, batch: 21400, sum loss: 6045.565918, avg loss: 3.474463, ppl: 32.280495 +epoch: 0, batch: 21401, sum loss: 5373.615234, avg loss: 3.023982, ppl: 20.573042 +epoch: 0, batch: 21402, sum loss: 5330.018066, avg loss: 3.135305, ppl: 22.995647 +epoch: 0, batch: 21403, sum loss: 5402.558594, avg loss: 3.170516, ppl: 23.819761 +epoch: 0, batch: 21404, sum loss: 6243.352051, avg loss: 3.273913, ppl: 26.414495 +epoch: 0, batch: 21405, sum loss: 6169.820312, avg loss: 3.127126, ppl: 22.808344 +epoch: 0, batch: 21406, sum loss: 5043.646973, avg loss: 2.857590, ppl: 17.419502 +epoch: 0, batch: 21407, sum loss: 5172.947754, avg loss: 3.369998, ppl: 29.078482 +epoch: 0, batch: 21408, sum loss: 5844.515625, avg loss: 3.318862, ppl: 27.628883 +epoch: 0, batch: 21409, sum loss: 4444.477539, avg loss: 2.800553, ppl: 16.453745 +epoch: 0, batch: 21410, sum loss: 5664.212402, avg loss: 3.183930, ppl: 24.141453 +epoch: 0, batch: 21411, sum loss: 4458.512695, avg loss: 3.000345, ppl: 20.092468 +epoch: 0, batch: 21412, sum loss: 5152.272461, avg loss: 3.153166, ppl: 23.410053 +epoch: 0, batch: 21413, sum loss: 6160.407227, avg loss: 3.388563, ppl: 29.623350 +epoch: 0, batch: 21414, sum loss: 4843.485352, avg loss: 3.391797, ppl: 29.719297 +epoch: 0, batch: 21415, sum loss: 5518.841797, avg loss: 3.126822, ppl: 22.801413 +epoch: 0, batch: 21416, sum loss: 5733.085449, avg loss: 3.368440, ppl: 29.033211 +epoch: 0, batch: 21417, sum loss: 5527.188965, avg loss: 3.136884, ppl: 23.031981 +epoch: 0, batch: 21418, sum loss: 5642.190430, avg loss: 3.342530, ppl: 28.290611 +epoch: 0, batch: 21419, sum loss: 4980.388672, avg loss: 3.124460, ppl: 22.747597 +epoch: 0, batch: 21420, sum loss: 5844.174316, avg loss: 3.366460, ppl: 28.975767 +epoch: 0, batch: 21421, sum loss: 5777.406738, avg loss: 3.179641, ppl: 24.038111 +epoch: 0, batch: 21422, sum loss: 6008.772461, avg loss: 3.221862, ppl: 25.074762 +epoch: 0, batch: 21423, sum loss: 5610.016602, avg loss: 3.311698, ppl: 27.431664 +epoch: 0, batch: 21424, sum loss: 5756.370117, avg loss: 2.897016, ppl: 18.119987 +epoch: 0, batch: 21425, sum loss: 5748.031738, avg loss: 3.220186, ppl: 25.032776 +epoch: 0, batch: 21426, sum loss: 5267.300781, avg loss: 3.205904, ppl: 24.677805 +epoch: 0, batch: 21427, sum loss: 4905.173828, avg loss: 3.239877, ppl: 25.530586 +epoch: 0, batch: 21428, sum loss: 5398.240234, avg loss: 3.226683, ppl: 25.195940 +epoch: 0, batch: 21429, sum loss: 5344.744141, avg loss: 3.338379, ppl: 28.173410 +epoch: 0, batch: 21430, sum loss: 5901.836914, avg loss: 3.276978, ppl: 26.495569 +epoch: 0, batch: 21431, sum loss: 4858.007812, avg loss: 3.110120, ppl: 22.423742 +epoch: 0, batch: 21432, sum loss: 5043.587891, avg loss: 3.307271, ppl: 27.310488 +epoch: 0, batch: 21433, sum loss: 5804.779297, avg loss: 3.125891, ppl: 22.780176 +epoch: 0, batch: 21434, sum loss: 5158.838867, avg loss: 3.238442, ppl: 25.493982 +epoch: 0, batch: 21435, sum loss: 4789.316895, avg loss: 3.186505, ppl: 24.203684 +epoch: 0, batch: 21436, sum loss: 5015.243164, avg loss: 3.109264, ppl: 22.404552 +epoch: 0, batch: 21437, sum loss: 5052.925781, avg loss: 3.049442, ppl: 21.103567 +epoch: 0, batch: 21438, sum loss: 5519.988281, avg loss: 3.092431, ppl: 22.030560 +epoch: 0, batch: 21439, sum loss: 6269.992188, avg loss: 3.221990, ppl: 25.077972 +epoch: 0, batch: 21440, sum loss: 4603.359375, avg loss: 3.097819, ppl: 22.149591 +epoch: 0, batch: 21441, sum loss: 5281.450195, avg loss: 3.136253, ppl: 23.017462 +epoch: 0, batch: 21442, sum loss: 4410.439941, avg loss: 2.865783, ppl: 17.562799 +epoch: 0, batch: 21443, sum loss: 5182.205078, avg loss: 3.042986, ppl: 20.967758 +epoch: 0, batch: 21444, sum loss: 5605.370605, avg loss: 3.168666, ppl: 23.775755 +epoch: 0, batch: 21445, sum loss: 4660.174805, avg loss: 2.994971, ppl: 19.984781 +epoch: 0, batch: 21446, sum loss: 5614.648926, avg loss: 3.285342, ppl: 26.718113 +epoch: 0, batch: 21447, sum loss: 5803.437988, avg loss: 3.111763, ppl: 22.460608 +epoch: 0, batch: 21448, sum loss: 6140.138184, avg loss: 3.536946, ppl: 34.361820 +epoch: 0, batch: 21449, sum loss: 4048.830322, avg loss: 2.839292, ppl: 17.103649 +epoch: 0, batch: 21450, sum loss: 4363.586426, avg loss: 3.013527, ppl: 20.359074 +epoch: 0, batch: 21451, sum loss: 6664.190430, avg loss: 3.287711, ppl: 26.781494 +epoch: 0, batch: 21452, sum loss: 6421.397949, avg loss: 3.212305, ppl: 24.836275 +epoch: 0, batch: 21453, sum loss: 6712.370605, avg loss: 3.467134, ppl: 32.044754 +epoch: 0, batch: 21454, sum loss: 6548.751953, avg loss: 3.344613, ppl: 28.349604 +epoch: 0, batch: 21455, sum loss: 4415.181641, avg loss: 3.107095, ppl: 22.356001 +epoch: 0, batch: 21456, sum loss: 4002.041016, avg loss: 2.824306, ppl: 16.849239 +epoch: 0, batch: 21457, sum loss: 5762.304688, avg loss: 3.106364, ppl: 22.339664 +epoch: 0, batch: 21458, sum loss: 5320.748047, avg loss: 3.088072, ppl: 21.934748 +epoch: 0, batch: 21459, sum loss: 5017.042969, avg loss: 2.988114, ppl: 19.848206 +epoch: 0, batch: 21460, sum loss: 5812.069336, avg loss: 3.267043, ppl: 26.233648 +epoch: 0, batch: 21461, sum loss: 5417.411621, avg loss: 3.232346, ppl: 25.339035 +epoch: 0, batch: 21462, sum loss: 4737.559082, avg loss: 3.156269, ppl: 23.482809 +epoch: 0, batch: 21463, sum loss: 4324.810059, avg loss: 2.956125, ppl: 19.223328 +epoch: 0, batch: 21464, sum loss: 5407.829102, avg loss: 3.205589, ppl: 24.670034 +epoch: 0, batch: 21465, sum loss: 5206.603516, avg loss: 2.785769, ppl: 16.212288 +epoch: 0, batch: 21466, sum loss: 5319.132812, avg loss: 3.370806, ppl: 29.101988 +epoch: 0, batch: 21467, sum loss: 5289.598145, avg loss: 3.199999, ppl: 24.532501 +epoch: 0, batch: 21468, sum loss: 4828.861328, avg loss: 2.988157, ppl: 19.849062 +epoch: 0, batch: 21469, sum loss: 5497.841797, avg loss: 3.360539, ppl: 28.804718 +epoch: 0, batch: 21470, sum loss: 5169.142090, avg loss: 2.977616, ppl: 19.640944 +epoch: 0, batch: 21471, sum loss: 4537.725098, avg loss: 3.105904, ppl: 22.329388 +epoch: 0, batch: 21472, sum loss: 6245.672852, avg loss: 3.394387, ppl: 29.796391 +epoch: 0, batch: 21473, sum loss: 5344.169434, avg loss: 3.116134, ppl: 22.558990 +epoch: 0, batch: 21474, sum loss: 4570.550781, avg loss: 2.965964, ppl: 19.413410 +epoch: 0, batch: 21475, sum loss: 6199.990723, avg loss: 3.336916, ppl: 28.132246 +epoch: 0, batch: 21476, sum loss: 5211.425293, avg loss: 3.166115, ppl: 23.715174 +epoch: 0, batch: 21477, sum loss: 5550.696289, avg loss: 3.152014, ppl: 23.383106 +epoch: 0, batch: 21478, sum loss: 6089.615723, avg loss: 3.444353, ppl: 31.323008 +epoch: 0, batch: 21479, sum loss: 4340.580078, avg loss: 2.922949, ppl: 18.596052 +epoch: 0, batch: 21480, sum loss: 6114.144043, avg loss: 3.248748, ppl: 25.758072 +epoch: 0, batch: 21481, sum loss: 5129.202148, avg loss: 3.181887, ppl: 24.092176 +epoch: 0, batch: 21482, sum loss: 5497.621582, avg loss: 3.209353, ppl: 24.763058 +epoch: 0, batch: 21483, sum loss: 4554.616211, avg loss: 2.942259, ppl: 18.958618 +epoch: 0, batch: 21484, sum loss: 4408.656738, avg loss: 3.167138, ppl: 23.739452 +epoch: 0, batch: 21485, sum loss: 5583.099121, avg loss: 3.245988, ppl: 25.687073 +epoch: 0, batch: 21486, sum loss: 4801.021973, avg loss: 3.046334, ppl: 21.038073 +epoch: 0, batch: 21487, sum loss: 5716.188965, avg loss: 3.251529, ppl: 25.829815 +epoch: 0, batch: 21488, sum loss: 5276.710449, avg loss: 3.182576, ppl: 24.108770 +epoch: 0, batch: 21489, sum loss: 4374.235840, avg loss: 2.945613, ppl: 19.022322 +epoch: 0, batch: 21490, sum loss: 4732.053223, avg loss: 2.866174, ppl: 17.569672 +epoch: 0, batch: 21491, sum loss: 4981.720215, avg loss: 3.177117, ppl: 23.977535 +epoch: 0, batch: 21492, sum loss: 4495.592285, avg loss: 2.859792, ppl: 17.457888 +epoch: 0, batch: 21493, sum loss: 5571.912598, avg loss: 3.247036, ppl: 25.713997 +epoch: 0, batch: 21494, sum loss: 5987.013672, avg loss: 3.215367, ppl: 24.912441 +epoch: 0, batch: 21495, sum loss: 5189.519043, avg loss: 3.040140, ppl: 20.908173 +epoch: 0, batch: 21496, sum loss: 4769.988770, avg loss: 3.061610, ppl: 21.361927 +epoch: 0, batch: 21497, sum loss: 4311.553223, avg loss: 2.779854, ppl: 16.116661 +epoch: 0, batch: 21498, sum loss: 5111.025391, avg loss: 3.340539, ppl: 28.234354 +epoch: 0, batch: 21499, sum loss: 5555.216797, avg loss: 3.273551, ppl: 26.404949 +epoch: 0, batch: 21500, sum loss: 5691.411133, avg loss: 3.399887, ppl: 29.960724 +epoch: 0, batch: 21501, sum loss: 5715.501953, avg loss: 3.277237, ppl: 26.502449 +epoch: 0, batch: 21502, sum loss: 4471.246582, avg loss: 2.935815, ppl: 18.836855 +epoch: 0, batch: 21503, sum loss: 6088.108398, avg loss: 3.422208, ppl: 30.636990 +epoch: 0, batch: 21504, sum loss: 4911.548828, avg loss: 3.102684, ppl: 22.257607 +epoch: 0, batch: 21505, sum loss: 4027.889160, avg loss: 2.858686, ppl: 17.438606 +epoch: 0, batch: 21506, sum loss: 5473.666016, avg loss: 3.242693, ppl: 25.602581 +epoch: 0, batch: 21507, sum loss: 5768.601562, avg loss: 3.119849, ppl: 22.642971 +epoch: 0, batch: 21508, sum loss: 3993.048828, avg loss: 2.858303, ppl: 17.431913 +epoch: 0, batch: 21509, sum loss: 5447.807129, avg loss: 3.394272, ppl: 29.792967 +epoch: 0, batch: 21510, sum loss: 5736.232422, avg loss: 3.035044, ppl: 20.801888 +epoch: 0, batch: 21511, sum loss: 5360.008301, avg loss: 3.438107, ppl: 31.127970 +epoch: 0, batch: 21512, sum loss: 5446.849121, avg loss: 3.100085, ppl: 22.199833 +epoch: 0, batch: 21513, sum loss: 6298.065430, avg loss: 3.309546, ppl: 27.372688 +epoch: 0, batch: 21514, sum loss: 5627.832031, avg loss: 3.412876, ppl: 30.352402 +epoch: 0, batch: 21515, sum loss: 5179.144531, avg loss: 2.978232, ppl: 19.653032 +epoch: 0, batch: 21516, sum loss: 6089.093750, avg loss: 3.196375, ppl: 24.443752 +epoch: 0, batch: 21517, sum loss: 6241.594727, avg loss: 3.544347, ppl: 34.617065 +epoch: 0, batch: 21518, sum loss: 5521.246094, avg loss: 3.096605, ppl: 22.122713 +epoch: 0, batch: 21519, sum loss: 5295.172363, avg loss: 3.017192, ppl: 20.433836 +epoch: 0, batch: 21520, sum loss: 4738.963379, avg loss: 2.987997, ppl: 19.845892 +epoch: 0, batch: 21521, sum loss: 5700.571289, avg loss: 3.190023, ppl: 24.288990 +epoch: 0, batch: 21522, sum loss: 5626.488770, avg loss: 3.311647, ppl: 27.430277 +epoch: 0, batch: 21523, sum loss: 4919.858398, avg loss: 3.129681, ppl: 22.866680 +epoch: 0, batch: 21524, sum loss: 5417.569336, avg loss: 3.228587, ppl: 25.243965 +epoch: 0, batch: 21525, sum loss: 4149.702148, avg loss: 3.037849, ppl: 20.860329 +epoch: 0, batch: 21526, sum loss: 5159.536621, avg loss: 3.253176, ppl: 25.872374 +epoch: 0, batch: 21527, sum loss: 5343.348633, avg loss: 3.083294, ppl: 21.830191 +epoch: 0, batch: 21528, sum loss: 4793.447754, avg loss: 3.163992, ppl: 23.664877 +epoch: 0, batch: 21529, sum loss: 5065.849121, avg loss: 3.127068, ppl: 22.807001 +epoch: 0, batch: 21530, sum loss: 4928.521484, avg loss: 3.014386, ppl: 20.376579 +epoch: 0, batch: 21531, sum loss: 4427.062012, avg loss: 2.926016, ppl: 18.653170 +epoch: 0, batch: 21532, sum loss: 6254.591797, avg loss: 3.399235, ppl: 29.941172 +epoch: 0, batch: 21533, sum loss: 5431.878906, avg loss: 3.180257, ppl: 24.052931 +epoch: 0, batch: 21534, sum loss: 5540.613770, avg loss: 3.202667, ppl: 24.598040 +epoch: 0, batch: 21535, sum loss: 5347.305176, avg loss: 3.067874, ppl: 21.496162 +epoch: 0, batch: 21536, sum loss: 4402.167480, avg loss: 2.970423, ppl: 19.500175 +epoch: 0, batch: 21537, sum loss: 5230.943848, avg loss: 3.193494, ppl: 24.373447 +epoch: 0, batch: 21538, sum loss: 5882.676758, avg loss: 3.214578, ppl: 24.892776 +epoch: 0, batch: 21539, sum loss: 6820.160156, avg loss: 3.441050, ppl: 31.219707 +epoch: 0, batch: 21540, sum loss: 5341.327148, avg loss: 3.227388, ppl: 25.213717 +epoch: 0, batch: 21541, sum loss: 5160.872559, avg loss: 3.002253, ppl: 20.130836 +epoch: 0, batch: 21542, sum loss: 4249.450684, avg loss: 3.037492, ppl: 20.852869 +epoch: 0, batch: 21543, sum loss: 5757.087891, avg loss: 3.127153, ppl: 22.808943 +epoch: 0, batch: 21544, sum loss: 5560.577148, avg loss: 3.407217, ppl: 30.181120 +epoch: 0, batch: 21545, sum loss: 4911.885742, avg loss: 3.148645, ppl: 23.304459 +epoch: 0, batch: 21546, sum loss: 4147.779785, avg loss: 3.083851, ppl: 21.842358 +epoch: 0, batch: 21547, sum loss: 4958.576172, avg loss: 2.908256, ppl: 18.324808 +epoch: 0, batch: 21548, sum loss: 5260.339355, avg loss: 3.101615, ppl: 22.233833 +epoch: 0, batch: 21549, sum loss: 5668.835938, avg loss: 3.250479, ppl: 25.802702 +epoch: 0, batch: 21550, sum loss: 3988.659668, avg loss: 2.766061, ppl: 15.895894 +epoch: 0, batch: 21551, sum loss: 5113.755859, avg loss: 3.135350, ppl: 22.996683 +epoch: 0, batch: 21552, sum loss: 5088.701172, avg loss: 3.093435, ppl: 22.052706 +epoch: 0, batch: 21553, sum loss: 5307.143555, avg loss: 3.300462, ppl: 27.125175 +epoch: 0, batch: 21554, sum loss: 5003.571289, avg loss: 3.041685, ppl: 20.940491 +epoch: 0, batch: 21555, sum loss: 5884.473633, avg loss: 3.041071, ppl: 20.927654 +epoch: 0, batch: 21556, sum loss: 5242.408203, avg loss: 3.035558, ppl: 20.812582 +epoch: 0, batch: 21557, sum loss: 5734.419922, avg loss: 3.116532, ppl: 22.567986 +epoch: 0, batch: 21558, sum loss: 5063.390137, avg loss: 3.100667, ppl: 22.212772 +epoch: 0, batch: 21559, sum loss: 5248.750977, avg loss: 3.173368, ppl: 23.887808 +epoch: 0, batch: 21560, sum loss: 5206.117676, avg loss: 3.350140, ppl: 28.506727 +epoch: 0, batch: 21561, sum loss: 4939.135742, avg loss: 3.104422, ppl: 22.296335 +epoch: 0, batch: 21562, sum loss: 5320.642578, avg loss: 3.165165, ppl: 23.692652 +epoch: 0, batch: 21563, sum loss: 4918.979004, avg loss: 3.133108, ppl: 22.945175 +epoch: 0, batch: 21564, sum loss: 4811.679199, avg loss: 3.197129, ppl: 24.462204 +epoch: 0, batch: 21565, sum loss: 5717.294434, avg loss: 3.204762, ppl: 24.649620 +epoch: 0, batch: 21566, sum loss: 5336.527344, avg loss: 3.063448, ppl: 21.401230 +epoch: 0, batch: 21567, sum loss: 6332.245605, avg loss: 3.341554, ppl: 28.263025 +epoch: 0, batch: 21568, sum loss: 5526.078613, avg loss: 3.285421, ppl: 26.720234 +epoch: 0, batch: 21569, sum loss: 4521.672852, avg loss: 3.103413, ppl: 22.273844 +epoch: 0, batch: 21570, sum loss: 5029.649414, avg loss: 3.270253, ppl: 26.318001 +epoch: 0, batch: 21571, sum loss: 4279.145508, avg loss: 3.114371, ppl: 22.519257 +epoch: 0, batch: 21572, sum loss: 5651.893066, avg loss: 3.259454, ppl: 26.035313 +epoch: 0, batch: 21573, sum loss: 4219.539551, avg loss: 2.818664, ppl: 16.754444 +epoch: 0, batch: 21574, sum loss: 5127.207031, avg loss: 3.068347, ppl: 21.506317 +epoch: 0, batch: 21575, sum loss: 5440.221191, avg loss: 3.249834, ppl: 25.786049 +epoch: 0, batch: 21576, sum loss: 5591.828613, avg loss: 3.184413, ppl: 24.153099 +epoch: 0, batch: 21577, sum loss: 5788.309082, avg loss: 3.427063, ppl: 30.786091 +epoch: 0, batch: 21578, sum loss: 6661.448730, avg loss: 3.494989, ppl: 32.949917 +epoch: 0, batch: 21579, sum loss: 5229.417969, avg loss: 3.280689, ppl: 26.594084 +epoch: 0, batch: 21580, sum loss: 6624.389648, avg loss: 3.282651, ppl: 26.646317 +epoch: 0, batch: 21581, sum loss: 5146.854004, avg loss: 3.290827, ppl: 26.865080 +epoch: 0, batch: 21582, sum loss: 5333.444336, avg loss: 3.220679, ppl: 25.045115 +epoch: 0, batch: 21583, sum loss: 5750.936035, avg loss: 3.355272, ppl: 28.653391 +epoch: 0, batch: 21584, sum loss: 4389.962402, avg loss: 2.907260, ppl: 18.306568 +epoch: 0, batch: 21585, sum loss: 6084.585938, avg loss: 3.515070, ppl: 33.618271 +epoch: 0, batch: 21586, sum loss: 6647.216797, avg loss: 3.220551, ppl: 25.041908 +epoch: 0, batch: 21587, sum loss: 5808.677246, avg loss: 3.505538, ppl: 33.299370 +epoch: 0, batch: 21588, sum loss: 6234.542480, avg loss: 3.481040, ppl: 32.493488 +epoch: 0, batch: 21589, sum loss: 5580.625000, avg loss: 2.989087, ppl: 19.867527 +epoch: 0, batch: 21590, sum loss: 5655.347656, avg loss: 3.166488, ppl: 23.724024 +epoch: 0, batch: 21591, sum loss: 5093.827637, avg loss: 3.269466, ppl: 26.297291 +epoch: 0, batch: 21592, sum loss: 5600.370117, avg loss: 3.173014, ppl: 23.879353 +epoch: 0, batch: 21593, sum loss: 4510.957031, avg loss: 3.152311, ppl: 23.390059 +epoch: 0, batch: 21594, sum loss: 5507.209961, avg loss: 3.181519, ppl: 24.083315 +epoch: 0, batch: 21595, sum loss: 5515.251465, avg loss: 3.200958, ppl: 24.556055 +epoch: 0, batch: 21596, sum loss: 5495.478516, avg loss: 3.493629, ppl: 32.905144 +epoch: 0, batch: 21597, sum loss: 5917.496094, avg loss: 3.101413, ppl: 22.229340 +epoch: 0, batch: 21598, sum loss: 5931.073242, avg loss: 3.341449, ppl: 28.260059 +epoch: 0, batch: 21599, sum loss: 4775.531250, avg loss: 2.899533, ppl: 18.165661 +epoch: 0, batch: 21600, sum loss: 4278.612305, avg loss: 2.760395, ppl: 15.806086 +epoch: 0, batch: 21601, sum loss: 5596.116211, avg loss: 3.185040, ppl: 24.168266 +epoch: 0, batch: 21602, sum loss: 5018.033203, avg loss: 2.920858, ppl: 18.557196 +epoch: 0, batch: 21603, sum loss: 4615.780273, avg loss: 3.289936, ppl: 26.841148 +epoch: 0, batch: 21604, sum loss: 5468.811035, avg loss: 3.142995, ppl: 23.173164 +epoch: 0, batch: 21605, sum loss: 4587.346680, avg loss: 3.116404, ppl: 22.565090 +epoch: 0, batch: 21606, sum loss: 6829.847656, avg loss: 3.224668, ppl: 25.145231 +epoch: 0, batch: 21607, sum loss: 4039.846191, avg loss: 2.933803, ppl: 18.798981 +epoch: 0, batch: 21608, sum loss: 4955.166504, avg loss: 3.162200, ppl: 23.622501 +epoch: 0, batch: 21609, sum loss: 5750.616211, avg loss: 3.347274, ppl: 28.425137 +epoch: 0, batch: 21610, sum loss: 5952.507324, avg loss: 3.452731, ppl: 31.586527 +epoch: 0, batch: 21611, sum loss: 5016.849609, avg loss: 3.213869, ppl: 24.875145 +epoch: 0, batch: 21612, sum loss: 5392.468262, avg loss: 3.179521, ppl: 24.035246 +epoch: 0, batch: 21613, sum loss: 6249.659668, avg loss: 3.223135, ppl: 25.106718 +epoch: 0, batch: 21614, sum loss: 3910.567383, avg loss: 2.931460, ppl: 18.754992 +epoch: 0, batch: 21615, sum loss: 5936.643066, avg loss: 3.457567, ppl: 31.739666 +epoch: 0, batch: 21616, sum loss: 5625.324219, avg loss: 3.223681, ppl: 25.120430 +epoch: 0, batch: 21617, sum loss: 5130.335938, avg loss: 3.397573, ppl: 29.891479 +epoch: 0, batch: 21618, sum loss: 5484.095703, avg loss: 3.101864, ppl: 22.239374 +epoch: 0, batch: 21619, sum loss: 5900.496094, avg loss: 3.303749, ppl: 27.214483 +epoch: 0, batch: 21620, sum loss: 6409.561523, avg loss: 3.431243, ppl: 30.915037 +epoch: 0, batch: 21621, sum loss: 4059.850830, avg loss: 2.998413, ppl: 20.053679 +epoch: 0, batch: 21622, sum loss: 6043.012695, avg loss: 3.331319, ppl: 27.975220 +epoch: 0, batch: 21623, sum loss: 4978.491699, avg loss: 2.914808, ppl: 18.445263 +epoch: 0, batch: 21624, sum loss: 4451.434082, avg loss: 2.855314, ppl: 17.379890 +epoch: 0, batch: 21625, sum loss: 4870.343750, avg loss: 3.004530, ppl: 20.176735 +epoch: 0, batch: 21626, sum loss: 5642.346191, avg loss: 3.073173, ppl: 21.610371 +epoch: 0, batch: 21627, sum loss: 6003.726074, avg loss: 3.112351, ppl: 22.473827 +epoch: 0, batch: 21628, sum loss: 5242.212891, avg loss: 2.778067, ppl: 16.087894 +epoch: 0, batch: 21629, sum loss: 4963.457031, avg loss: 3.015466, ppl: 20.398600 +epoch: 0, batch: 21630, sum loss: 5501.453125, avg loss: 3.163573, ppl: 23.654959 +epoch: 0, batch: 21631, sum loss: 5596.294922, avg loss: 3.105602, ppl: 22.322659 +epoch: 0, batch: 21632, sum loss: 5964.585938, avg loss: 3.461745, ppl: 31.872532 +epoch: 0, batch: 21633, sum loss: 5223.353516, avg loss: 2.988189, ppl: 19.849697 +epoch: 0, batch: 21634, sum loss: 4896.019531, avg loss: 3.096786, ppl: 22.126732 +epoch: 0, batch: 21635, sum loss: 5577.885742, avg loss: 3.300524, ppl: 27.126850 +epoch: 0, batch: 21636, sum loss: 5289.940918, avg loss: 3.487107, ppl: 32.691219 +epoch: 0, batch: 21637, sum loss: 4923.357910, avg loss: 3.291015, ppl: 26.870113 +epoch: 0, batch: 21638, sum loss: 5257.358398, avg loss: 3.136849, ppl: 23.031174 +epoch: 0, batch: 21639, sum loss: 5674.315430, avg loss: 3.102414, ppl: 22.251610 +epoch: 0, batch: 21640, sum loss: 4851.891113, avg loss: 2.996845, ppl: 20.022257 +epoch: 0, batch: 21641, sum loss: 6011.580566, avg loss: 3.343482, ppl: 28.317551 +epoch: 0, batch: 21642, sum loss: 6087.224121, avg loss: 3.251722, ppl: 25.834797 +epoch: 0, batch: 21643, sum loss: 4670.206055, avg loss: 3.295841, ppl: 27.000097 +epoch: 0, batch: 21644, sum loss: 5479.034668, avg loss: 3.411603, ppl: 30.313797 +epoch: 0, batch: 21645, sum loss: 5573.207031, avg loss: 3.299708, ppl: 27.104727 +epoch: 0, batch: 21646, sum loss: 5265.230469, avg loss: 3.013870, ppl: 20.366060 +epoch: 0, batch: 21647, sum loss: 4797.321289, avg loss: 3.051731, ppl: 21.151930 +epoch: 0, batch: 21648, sum loss: 5328.414062, avg loss: 3.128840, ppl: 22.847454 +epoch: 0, batch: 21649, sum loss: 5179.703125, avg loss: 3.341744, ppl: 28.268389 +epoch: 0, batch: 21650, sum loss: 5388.334961, avg loss: 3.059815, ppl: 21.323620 +epoch: 0, batch: 21651, sum loss: 5361.237793, avg loss: 3.149963, ppl: 23.335211 +epoch: 0, batch: 21652, sum loss: 4573.820801, avg loss: 3.035050, ppl: 20.802027 +epoch: 0, batch: 21653, sum loss: 5274.008301, avg loss: 3.271717, ppl: 26.356564 +epoch: 0, batch: 21654, sum loss: 5957.867676, avg loss: 3.369835, ppl: 29.073727 +epoch: 0, batch: 21655, sum loss: 5055.835938, avg loss: 3.128611, ppl: 22.842237 +epoch: 0, batch: 21656, sum loss: 4615.199707, avg loss: 2.952783, ppl: 19.159197 +epoch: 0, batch: 21657, sum loss: 4586.290039, avg loss: 3.238905, ppl: 25.505789 +epoch: 0, batch: 21658, sum loss: 5166.979980, avg loss: 3.342160, ppl: 28.280159 +epoch: 0, batch: 21659, sum loss: 4493.799316, avg loss: 2.707108, ppl: 14.985874 +epoch: 0, batch: 21660, sum loss: 4332.884277, avg loss: 3.013132, ppl: 20.351046 +epoch: 0, batch: 21661, sum loss: 5786.420898, avg loss: 3.092689, ppl: 22.036255 +epoch: 0, batch: 21662, sum loss: 4712.113281, avg loss: 3.049912, ppl: 21.113482 +epoch: 0, batch: 21663, sum loss: 4589.590820, avg loss: 3.072015, ppl: 21.585360 +epoch: 0, batch: 21664, sum loss: 5996.082520, avg loss: 3.211613, ppl: 24.819098 +epoch: 0, batch: 21665, sum loss: 5313.907227, avg loss: 3.172482, ppl: 23.866653 +epoch: 0, batch: 21666, sum loss: 5904.258789, avg loss: 3.364250, ppl: 28.911804 +epoch: 0, batch: 21667, sum loss: 5547.495605, avg loss: 3.120076, ppl: 22.648109 +epoch: 0, batch: 21668, sum loss: 5346.123047, avg loss: 3.240075, ppl: 25.535627 +epoch: 0, batch: 21669, sum loss: 6037.824219, avg loss: 3.343203, ppl: 28.309652 +epoch: 0, batch: 21670, sum loss: 3978.654053, avg loss: 2.841896, ppl: 17.148245 +epoch: 0, batch: 21671, sum loss: 5075.940918, avg loss: 3.188405, ppl: 24.249720 +epoch: 0, batch: 21672, sum loss: 4523.910156, avg loss: 3.122091, ppl: 22.693789 +epoch: 0, batch: 21673, sum loss: 4892.465820, avg loss: 2.950824, ppl: 19.121700 +epoch: 0, batch: 21674, sum loss: 4761.411133, avg loss: 2.983340, ppl: 19.753689 +epoch: 0, batch: 21675, sum loss: 5291.694824, avg loss: 3.262450, ppl: 26.113449 +epoch: 0, batch: 21676, sum loss: 5124.722168, avg loss: 3.109662, ppl: 22.413458 +epoch: 0, batch: 21677, sum loss: 6026.725586, avg loss: 3.366886, ppl: 28.988108 +epoch: 0, batch: 21678, sum loss: 5548.730469, avg loss: 3.101582, ppl: 22.233097 +epoch: 0, batch: 21679, sum loss: 5004.225586, avg loss: 3.010966, ppl: 20.307009 +epoch: 0, batch: 21680, sum loss: 6113.735352, avg loss: 3.238207, ppl: 25.487989 +epoch: 0, batch: 21681, sum loss: 6684.601562, avg loss: 3.429760, ppl: 30.869225 +epoch: 0, batch: 21682, sum loss: 5327.623047, avg loss: 3.246571, ppl: 25.702051 +epoch: 0, batch: 21683, sum loss: 6842.309570, avg loss: 3.622186, ppl: 37.419285 +epoch: 0, batch: 21684, sum loss: 6496.212402, avg loss: 3.195382, ppl: 24.419508 +epoch: 0, batch: 21685, sum loss: 5592.818359, avg loss: 3.361069, ppl: 28.819983 +epoch: 0, batch: 21686, sum loss: 5416.466797, avg loss: 3.191790, ppl: 24.331934 +epoch: 0, batch: 21687, sum loss: 5375.714844, avg loss: 3.428390, ppl: 30.826958 +epoch: 0, batch: 21688, sum loss: 5364.687988, avg loss: 2.977074, ppl: 19.630302 +epoch: 0, batch: 21689, sum loss: 5874.436035, avg loss: 3.012531, ppl: 20.338818 +epoch: 0, batch: 21690, sum loss: 4974.463867, avg loss: 3.140444, ppl: 23.114134 +epoch: 0, batch: 21691, sum loss: 6873.327148, avg loss: 3.466126, ppl: 32.012478 +epoch: 0, batch: 21692, sum loss: 5992.037598, avg loss: 3.551890, ppl: 34.879166 +epoch: 0, batch: 21693, sum loss: 5834.386230, avg loss: 3.512574, ppl: 33.534489 +epoch: 0, batch: 21694, sum loss: 5740.364746, avg loss: 3.111309, ppl: 22.450409 +epoch: 0, batch: 21695, sum loss: 6858.790039, avg loss: 3.355572, ppl: 28.662008 +epoch: 0, batch: 21696, sum loss: 6570.822266, avg loss: 3.100907, ppl: 22.218096 +epoch: 0, batch: 21697, sum loss: 4581.193359, avg loss: 3.080829, ppl: 21.776457 +epoch: 0, batch: 21698, sum loss: 4491.061523, avg loss: 2.968316, ppl: 19.459120 +epoch: 0, batch: 21699, sum loss: 6339.617676, avg loss: 3.100058, ppl: 22.199230 +epoch: 0, batch: 21700, sum loss: 4841.303223, avg loss: 2.841141, ppl: 17.135298 +epoch: 0, batch: 21701, sum loss: 4603.598145, avg loss: 2.960513, ppl: 19.307880 +epoch: 0, batch: 21702, sum loss: 5645.752930, avg loss: 3.157580, ppl: 23.513622 +epoch: 0, batch: 21703, sum loss: 4753.675293, avg loss: 3.029748, ppl: 20.692026 +epoch: 0, batch: 21704, sum loss: 5685.429688, avg loss: 3.416725, ppl: 30.469454 +epoch: 0, batch: 21705, sum loss: 5734.887695, avg loss: 3.007282, ppl: 20.232344 +epoch: 0, batch: 21706, sum loss: 5211.284180, avg loss: 3.056472, ppl: 21.252443 +epoch: 0, batch: 21707, sum loss: 4599.152832, avg loss: 3.137212, ppl: 23.039543 +epoch: 0, batch: 21708, sum loss: 5833.518555, avg loss: 3.231866, ppl: 25.326876 +epoch: 0, batch: 21709, sum loss: 6325.073242, avg loss: 3.380584, ppl: 29.387943 +epoch: 0, batch: 21710, sum loss: 5336.886719, avg loss: 3.300487, ppl: 27.125834 +epoch: 0, batch: 21711, sum loss: 5172.089355, avg loss: 3.208492, ppl: 24.741749 +epoch: 0, batch: 21712, sum loss: 5721.752441, avg loss: 3.081181, ppl: 21.784105 +epoch: 0, batch: 21713, sum loss: 5297.431641, avg loss: 3.247966, ppl: 25.737925 +epoch: 0, batch: 21714, sum loss: 4652.723633, avg loss: 3.215428, ppl: 24.913944 +epoch: 0, batch: 21715, sum loss: 5513.932129, avg loss: 3.254977, ppl: 25.919031 +epoch: 0, batch: 21716, sum loss: 5278.725586, avg loss: 3.185712, ppl: 24.184509 +epoch: 0, batch: 21717, sum loss: 5405.350586, avg loss: 3.252317, ppl: 25.850164 +epoch: 0, batch: 21718, sum loss: 4987.384277, avg loss: 3.142649, ppl: 23.165159 +epoch: 0, batch: 21719, sum loss: 4308.099609, avg loss: 2.946717, ppl: 19.043327 +epoch: 0, batch: 21720, sum loss: 5705.741211, avg loss: 3.157577, ppl: 23.513544 +epoch: 0, batch: 21721, sum loss: 5885.110840, avg loss: 3.175991, ppl: 23.950539 +epoch: 0, batch: 21722, sum loss: 5768.391602, avg loss: 3.326639, ppl: 27.844589 +epoch: 0, batch: 21723, sum loss: 5063.667480, avg loss: 2.903479, ppl: 18.237480 +epoch: 0, batch: 21724, sum loss: 4816.603516, avg loss: 3.029310, ppl: 20.682961 +epoch: 0, batch: 21725, sum loss: 5280.685059, avg loss: 3.064820, ppl: 21.430605 +epoch: 0, batch: 21726, sum loss: 4917.409668, avg loss: 2.951626, ppl: 19.137051 +epoch: 0, batch: 21727, sum loss: 5825.061035, avg loss: 3.256043, ppl: 25.946661 +epoch: 0, batch: 21728, sum loss: 5684.625000, avg loss: 3.062837, ppl: 21.388142 +epoch: 0, batch: 21729, sum loss: 5228.641602, avg loss: 3.054113, ppl: 21.202370 +epoch: 0, batch: 21730, sum loss: 5008.537598, avg loss: 3.171968, ppl: 23.854383 +epoch: 0, batch: 21731, sum loss: 5251.623047, avg loss: 2.973739, ppl: 19.564934 +epoch: 0, batch: 21732, sum loss: 4680.041504, avg loss: 2.943422, ppl: 18.980694 +epoch: 0, batch: 21733, sum loss: 5300.603516, avg loss: 3.034117, ppl: 20.782629 +epoch: 0, batch: 21734, sum loss: 5188.398926, avg loss: 3.103110, ppl: 22.267086 +epoch: 0, batch: 21735, sum loss: 4234.154785, avg loss: 3.138736, ppl: 23.074677 +epoch: 0, batch: 21736, sum loss: 6925.471680, avg loss: 3.381578, ppl: 29.417154 +epoch: 0, batch: 21737, sum loss: 4896.664062, avg loss: 2.892300, ppl: 18.034744 +epoch: 0, batch: 21738, sum loss: 5093.556641, avg loss: 3.150004, ppl: 23.336161 +epoch: 0, batch: 21739, sum loss: 4395.944336, avg loss: 2.886372, ppl: 17.928146 +epoch: 0, batch: 21740, sum loss: 6429.844238, avg loss: 3.420130, ppl: 30.573383 +epoch: 0, batch: 21741, sum loss: 5138.299805, avg loss: 3.112235, ppl: 22.471212 +epoch: 0, batch: 21742, sum loss: 6535.499512, avg loss: 3.439737, ppl: 31.178753 +epoch: 0, batch: 21743, sum loss: 5767.663086, avg loss: 3.087614, ppl: 21.924704 +epoch: 0, batch: 21744, sum loss: 4835.503418, avg loss: 2.992267, ppl: 19.930813 +epoch: 0, batch: 21745, sum loss: 4783.590820, avg loss: 2.967488, ppl: 19.443018 +epoch: 0, batch: 21746, sum loss: 6060.859863, avg loss: 3.372766, ppl: 29.159063 +epoch: 0, batch: 21747, sum loss: 5372.049316, avg loss: 3.226456, ppl: 25.190229 +epoch: 0, batch: 21748, sum loss: 5079.326172, avg loss: 3.114240, ppl: 22.516321 +epoch: 0, batch: 21749, sum loss: 4580.060547, avg loss: 2.858964, ppl: 17.443445 +epoch: 0, batch: 21750, sum loss: 5836.792969, avg loss: 3.139749, ppl: 23.098059 +epoch: 0, batch: 21751, sum loss: 5560.467773, avg loss: 3.307833, ppl: 27.325851 +epoch: 0, batch: 21752, sum loss: 4443.030762, avg loss: 3.053629, ppl: 21.192120 +epoch: 0, batch: 21753, sum loss: 4537.960449, avg loss: 2.895955, ppl: 18.100773 +epoch: 0, batch: 21754, sum loss: 5191.326660, avg loss: 2.990396, ppl: 19.893549 +epoch: 0, batch: 21755, sum loss: 5526.227051, avg loss: 3.185145, ppl: 24.170801 +epoch: 0, batch: 21756, sum loss: 5357.719727, avg loss: 3.307235, ppl: 27.309498 +epoch: 0, batch: 21757, sum loss: 5868.894531, avg loss: 3.125077, ppl: 22.761648 +epoch: 0, batch: 21758, sum loss: 7346.644043, avg loss: 3.379321, ppl: 29.350838 +epoch: 0, batch: 21759, sum loss: 5225.200684, avg loss: 3.201716, ppl: 24.574669 +epoch: 0, batch: 21760, sum loss: 5175.261719, avg loss: 3.192635, ppl: 24.352512 +epoch: 0, batch: 21761, sum loss: 4943.975586, avg loss: 3.208291, ppl: 24.736765 +epoch: 0, batch: 21762, sum loss: 5755.666504, avg loss: 3.336618, ppl: 28.123863 +epoch: 0, batch: 21763, sum loss: 5427.604492, avg loss: 3.192709, ppl: 24.354307 +epoch: 0, batch: 21764, sum loss: 4902.207031, avg loss: 3.031668, ppl: 20.731783 +epoch: 0, batch: 21765, sum loss: 4064.757324, avg loss: 2.936963, ppl: 18.858492 +epoch: 0, batch: 21766, sum loss: 5870.507812, avg loss: 3.171533, ppl: 23.844011 +epoch: 0, batch: 21767, sum loss: 4246.390625, avg loss: 2.838496, ppl: 17.090050 +epoch: 0, batch: 21768, sum loss: 4039.604004, avg loss: 2.830837, ppl: 16.959646 +epoch: 0, batch: 21769, sum loss: 6086.365723, avg loss: 3.425079, ppl: 30.725075 +epoch: 0, batch: 21770, sum loss: 4269.757812, avg loss: 2.879136, ppl: 17.798882 +epoch: 0, batch: 21771, sum loss: 5166.975586, avg loss: 2.978084, ppl: 19.650127 +epoch: 0, batch: 21772, sum loss: 5358.362793, avg loss: 3.363693, ppl: 28.895706 +epoch: 0, batch: 21773, sum loss: 6370.894043, avg loss: 3.217623, ppl: 24.968699 +epoch: 0, batch: 21774, sum loss: 5368.848633, avg loss: 3.043565, ppl: 20.979904 +epoch: 0, batch: 21775, sum loss: 5552.970703, avg loss: 3.419317, ppl: 30.548536 +epoch: 0, batch: 21776, sum loss: 4503.994629, avg loss: 3.043240, ppl: 20.973078 +epoch: 0, batch: 21777, sum loss: 5625.718262, avg loss: 3.246231, ppl: 25.693316 +epoch: 0, batch: 21778, sum loss: 4702.503906, avg loss: 2.989513, ppl: 19.875999 +epoch: 0, batch: 21779, sum loss: 5902.963867, avg loss: 3.450008, ppl: 31.500656 +epoch: 0, batch: 21780, sum loss: 4854.357422, avg loss: 3.093918, ppl: 22.063354 +epoch: 0, batch: 21781, sum loss: 4172.682129, avg loss: 2.752429, ppl: 15.680675 +epoch: 0, batch: 21782, sum loss: 6322.762695, avg loss: 3.156646, ppl: 23.491684 +epoch: 0, batch: 21783, sum loss: 5939.187500, avg loss: 3.421191, ppl: 30.605844 +epoch: 0, batch: 21784, sum loss: 4672.449707, avg loss: 3.187210, ppl: 24.220753 +epoch: 0, batch: 21785, sum loss: 5010.698730, avg loss: 3.187467, ppl: 24.226992 +epoch: 0, batch: 21786, sum loss: 5664.153320, avg loss: 3.412141, ppl: 30.330099 +epoch: 0, batch: 21787, sum loss: 6355.287598, avg loss: 3.334358, ppl: 28.060375 +epoch: 0, batch: 21788, sum loss: 4906.205078, avg loss: 2.977066, ppl: 19.630142 +epoch: 0, batch: 21789, sum loss: 5199.411621, avg loss: 2.904699, ppl: 18.259748 +epoch: 0, batch: 21790, sum loss: 5797.058594, avg loss: 3.178212, ppl: 24.003794 +epoch: 0, batch: 21791, sum loss: 5170.975586, avg loss: 3.122570, ppl: 22.704651 +epoch: 0, batch: 21792, sum loss: 6571.537109, avg loss: 3.491784, ppl: 32.844486 +epoch: 0, batch: 21793, sum loss: 5989.068359, avg loss: 3.230350, ppl: 25.288494 +epoch: 0, batch: 21794, sum loss: 4010.086914, avg loss: 3.003810, ppl: 20.162216 +epoch: 0, batch: 21795, sum loss: 5881.892578, avg loss: 3.258666, ppl: 26.014818 +epoch: 0, batch: 21796, sum loss: 5130.053711, avg loss: 3.202281, ppl: 24.588547 +epoch: 0, batch: 21797, sum loss: 5606.608887, avg loss: 3.097574, ppl: 22.144169 +epoch: 0, batch: 21798, sum loss: 4705.990723, avg loss: 3.207901, ppl: 24.727123 +epoch: 0, batch: 21799, sum loss: 4800.990234, avg loss: 3.008139, ppl: 20.249683 +epoch: 0, batch: 21800, sum loss: 5287.307617, avg loss: 3.273875, ppl: 26.413486 +epoch: 0, batch: 21801, sum loss: 5713.258789, avg loss: 3.238809, ppl: 25.503326 +epoch: 0, batch: 21802, sum loss: 4034.389648, avg loss: 2.770872, ppl: 15.972558 +epoch: 0, batch: 21803, sum loss: 4887.460938, avg loss: 3.054663, ppl: 21.214033 +epoch: 0, batch: 21804, sum loss: 4565.577637, avg loss: 3.120696, ppl: 22.662144 +epoch: 0, batch: 21805, sum loss: 5792.654297, avg loss: 3.250648, ppl: 25.807053 +epoch: 0, batch: 21806, sum loss: 4443.423340, avg loss: 2.980163, ppl: 19.691032 +epoch: 0, batch: 21807, sum loss: 4086.630859, avg loss: 2.925290, ppl: 18.639637 +epoch: 0, batch: 21808, sum loss: 4565.098633, avg loss: 2.928222, ppl: 18.694370 +epoch: 0, batch: 21809, sum loss: 5453.359863, avg loss: 3.444952, ppl: 31.341795 +epoch: 0, batch: 21810, sum loss: 5930.965332, avg loss: 3.379467, ppl: 29.355127 +epoch: 0, batch: 21811, sum loss: 5591.753418, avg loss: 3.111716, ppl: 22.459553 +epoch: 0, batch: 21812, sum loss: 5602.441895, avg loss: 3.247792, ppl: 25.733471 +epoch: 0, batch: 21813, sum loss: 5039.057617, avg loss: 3.137645, ppl: 23.049517 +epoch: 0, batch: 21814, sum loss: 5865.811035, avg loss: 3.310277, ppl: 27.392719 +epoch: 0, batch: 21815, sum loss: 5694.928711, avg loss: 3.155085, ppl: 23.455032 +epoch: 0, batch: 21816, sum loss: 5277.429199, avg loss: 3.041746, ppl: 20.941774 +epoch: 0, batch: 21817, sum loss: 4966.098145, avg loss: 2.995234, ppl: 19.990042 +epoch: 0, batch: 21818, sum loss: 4971.523438, avg loss: 3.072635, ppl: 21.598740 +epoch: 0, batch: 21819, sum loss: 5402.501465, avg loss: 3.200534, ppl: 24.545637 +epoch: 0, batch: 21820, sum loss: 6062.271973, avg loss: 3.273365, ppl: 26.400026 +epoch: 0, batch: 21821, sum loss: 5280.722656, avg loss: 3.002116, ppl: 20.128092 +epoch: 0, batch: 21822, sum loss: 5819.891602, avg loss: 3.289933, ppl: 26.841070 +epoch: 0, batch: 21823, sum loss: 5711.604004, avg loss: 3.303415, ppl: 27.205381 +epoch: 0, batch: 21824, sum loss: 5402.758789, avg loss: 3.185589, ppl: 24.181528 +epoch: 0, batch: 21825, sum loss: 4795.997070, avg loss: 3.072388, ppl: 21.593399 +epoch: 0, batch: 21826, sum loss: 4937.724609, avg loss: 3.322829, ppl: 27.738726 +epoch: 0, batch: 21827, sum loss: 4700.777832, avg loss: 3.013319, ppl: 20.354851 +epoch: 0, batch: 21828, sum loss: 4597.621094, avg loss: 3.054898, ppl: 21.219017 +epoch: 0, batch: 21829, sum loss: 4861.464844, avg loss: 3.030838, ppl: 20.714594 +epoch: 0, batch: 21830, sum loss: 5561.668945, avg loss: 2.875734, ppl: 17.738432 +epoch: 0, batch: 21831, sum loss: 4648.025391, avg loss: 3.039912, ppl: 20.903408 +epoch: 0, batch: 21832, sum loss: 5453.813477, avg loss: 2.888672, ppl: 17.969437 +epoch: 0, batch: 21833, sum loss: 4916.355957, avg loss: 2.931637, ppl: 18.758318 +epoch: 0, batch: 21834, sum loss: 5900.159668, avg loss: 3.354270, ppl: 28.624687 +epoch: 0, batch: 21835, sum loss: 6438.762695, avg loss: 3.280063, ppl: 26.577438 +epoch: 0, batch: 21836, sum loss: 5067.201660, avg loss: 3.239899, ppl: 25.531136 +epoch: 0, batch: 21837, sum loss: 5244.662598, avg loss: 3.227485, ppl: 25.216150 +epoch: 0, batch: 21838, sum loss: 5902.038086, avg loss: 3.198936, ppl: 24.506453 +epoch: 0, batch: 21839, sum loss: 4737.780273, avg loss: 3.127248, ppl: 22.811113 +epoch: 0, batch: 21840, sum loss: 5486.063477, avg loss: 3.343122, ppl: 28.307365 +epoch: 0, batch: 21841, sum loss: 4578.430176, avg loss: 2.817495, ppl: 16.734882 +epoch: 0, batch: 21842, sum loss: 5836.739258, avg loss: 3.038386, ppl: 20.871532 +epoch: 0, batch: 21843, sum loss: 5561.400879, avg loss: 3.346210, ppl: 28.394913 +epoch: 0, batch: 21844, sum loss: 6329.180176, avg loss: 3.282770, ppl: 26.649487 +epoch: 0, batch: 21845, sum loss: 5796.304688, avg loss: 3.170845, ppl: 23.827610 +epoch: 0, batch: 21846, sum loss: 5702.697266, avg loss: 3.136797, ppl: 23.029987 +epoch: 0, batch: 21847, sum loss: 4736.217773, avg loss: 2.877411, ppl: 17.768206 +epoch: 0, batch: 21848, sum loss: 5792.168945, avg loss: 3.317394, ppl: 27.588348 +epoch: 0, batch: 21849, sum loss: 5765.122070, avg loss: 3.377342, ppl: 29.292793 +epoch: 0, batch: 21850, sum loss: 5282.483887, avg loss: 3.384038, ppl: 29.489624 +epoch: 0, batch: 21851, sum loss: 6380.865234, avg loss: 3.232454, ppl: 25.341778 +epoch: 0, batch: 21852, sum loss: 5232.815918, avg loss: 3.109219, ppl: 22.403542 +epoch: 0, batch: 21853, sum loss: 5783.996582, avg loss: 3.400351, ppl: 29.974606 +epoch: 0, batch: 21854, sum loss: 4857.928223, avg loss: 3.036205, ppl: 20.826059 +epoch: 0, batch: 21855, sum loss: 5302.921875, avg loss: 3.273409, ppl: 26.401178 +epoch: 0, batch: 21856, sum loss: 5707.432617, avg loss: 3.331835, ppl: 27.989643 +epoch: 0, batch: 21857, sum loss: 5479.239258, avg loss: 3.090378, ppl: 21.985378 +epoch: 0, batch: 21858, sum loss: 6157.248535, avg loss: 3.474745, ppl: 32.289604 +epoch: 0, batch: 21859, sum loss: 5918.797852, avg loss: 3.007519, ppl: 20.237139 +epoch: 0, batch: 21860, sum loss: 4937.234863, avg loss: 2.878854, ppl: 17.793871 +epoch: 0, batch: 21861, sum loss: 5944.290527, avg loss: 3.379358, ppl: 29.351923 +epoch: 0, batch: 21862, sum loss: 4571.094238, avg loss: 3.107474, ppl: 22.364483 +epoch: 0, batch: 21863, sum loss: 4810.439941, avg loss: 2.889153, ppl: 17.978081 +epoch: 0, batch: 21864, sum loss: 6611.678223, avg loss: 3.329143, ppl: 27.914410 +epoch: 0, batch: 21865, sum loss: 5768.700195, avg loss: 3.281399, ppl: 26.612986 +epoch: 0, batch: 21866, sum loss: 5708.972656, avg loss: 3.117954, ppl: 22.600082 +epoch: 0, batch: 21867, sum loss: 5037.119629, avg loss: 2.989388, ppl: 19.873526 +epoch: 0, batch: 21868, sum loss: 5864.642090, avg loss: 3.224102, ppl: 25.131002 +epoch: 0, batch: 21869, sum loss: 5570.811035, avg loss: 3.163436, ppl: 23.651722 +epoch: 0, batch: 21870, sum loss: 5334.002930, avg loss: 3.065519, ppl: 21.445587 +epoch: 0, batch: 21871, sum loss: 7127.622070, avg loss: 3.466742, ppl: 32.032219 +epoch: 0, batch: 21872, sum loss: 5245.549805, avg loss: 3.044428, ppl: 20.998024 +epoch: 0, batch: 21873, sum loss: 5056.745605, avg loss: 3.178344, ppl: 24.006971 +epoch: 0, batch: 21874, sum loss: 5846.154785, avg loss: 3.319793, ppl: 27.654612 +epoch: 0, batch: 21875, sum loss: 5924.439453, avg loss: 3.293185, ppl: 26.928488 +epoch: 0, batch: 21876, sum loss: 5140.667969, avg loss: 3.255648, ppl: 25.936413 +epoch: 0, batch: 21877, sum loss: 5116.202637, avg loss: 3.085768, ppl: 21.884256 +epoch: 0, batch: 21878, sum loss: 6284.838379, avg loss: 3.389880, ppl: 29.662405 +epoch: 0, batch: 21879, sum loss: 5964.034180, avg loss: 3.550020, ppl: 34.814030 +epoch: 0, batch: 21880, sum loss: 5609.346191, avg loss: 3.207173, ppl: 24.709143 +epoch: 0, batch: 21881, sum loss: 5216.481445, avg loss: 3.148148, ppl: 23.292887 +epoch: 0, batch: 21882, sum loss: 5611.676270, avg loss: 3.172231, ppl: 23.860657 +epoch: 0, batch: 21883, sum loss: 4720.539062, avg loss: 3.107662, ppl: 22.368690 +epoch: 0, batch: 21884, sum loss: 5000.859863, avg loss: 3.102270, ppl: 22.248405 +epoch: 0, batch: 21885, sum loss: 5723.382812, avg loss: 3.356823, ppl: 28.697872 +epoch: 0, batch: 21886, sum loss: 5854.323242, avg loss: 3.345328, ppl: 28.369869 +epoch: 0, batch: 21887, sum loss: 5694.667480, avg loss: 3.330215, ppl: 27.944349 +epoch: 0, batch: 21888, sum loss: 4888.695801, avg loss: 3.001041, ppl: 20.106455 +epoch: 0, batch: 21889, sum loss: 4918.562500, avg loss: 3.111045, ppl: 22.444489 +epoch: 0, batch: 21890, sum loss: 5691.614258, avg loss: 3.224711, ppl: 25.146297 +epoch: 0, batch: 21891, sum loss: 5304.447266, avg loss: 3.005352, ppl: 20.193333 +epoch: 0, batch: 21892, sum loss: 4641.964844, avg loss: 2.786293, ppl: 16.220781 +epoch: 0, batch: 21893, sum loss: 5797.966797, avg loss: 3.149357, ppl: 23.321072 +epoch: 0, batch: 21894, sum loss: 5112.769043, avg loss: 3.019946, ppl: 20.490192 +epoch: 0, batch: 21895, sum loss: 5037.990723, avg loss: 3.068204, ppl: 21.503246 +epoch: 0, batch: 21896, sum loss: 5693.798340, avg loss: 3.033457, ppl: 20.768902 +epoch: 0, batch: 21897, sum loss: 5172.635742, avg loss: 3.365411, ppl: 28.945379 +epoch: 0, batch: 21898, sum loss: 6265.659180, avg loss: 3.431358, ppl: 30.918598 +epoch: 0, batch: 21899, sum loss: 4766.007324, avg loss: 3.084795, ppl: 21.862974 +epoch: 0, batch: 21900, sum loss: 4853.527344, avg loss: 3.182641, ppl: 24.110340 +epoch: 0, batch: 21901, sum loss: 5562.127930, avg loss: 3.344635, ppl: 28.350225 +epoch: 0, batch: 21902, sum loss: 4655.086426, avg loss: 2.991701, ppl: 19.919535 +epoch: 0, batch: 21903, sum loss: 5382.855957, avg loss: 3.411189, ppl: 30.301247 +epoch: 0, batch: 21904, sum loss: 5181.572266, avg loss: 3.147978, ppl: 23.288935 +epoch: 0, batch: 21905, sum loss: 5456.167969, avg loss: 2.997895, ppl: 20.043291 +epoch: 0, batch: 21906, sum loss: 5142.565918, avg loss: 3.164656, ppl: 23.680595 +epoch: 0, batch: 21907, sum loss: 5032.719238, avg loss: 2.936242, ppl: 18.844900 +epoch: 0, batch: 21908, sum loss: 6135.584961, avg loss: 3.162673, ppl: 23.633678 +epoch: 0, batch: 21909, sum loss: 6145.921875, avg loss: 3.613123, ppl: 37.081669 +epoch: 0, batch: 21910, sum loss: 5175.833008, avg loss: 3.189053, ppl: 24.265432 +epoch: 0, batch: 21911, sum loss: 4566.216797, avg loss: 3.217912, ppl: 24.975914 +epoch: 0, batch: 21912, sum loss: 5766.345703, avg loss: 3.115260, ppl: 22.539282 +epoch: 0, batch: 21913, sum loss: 4644.045898, avg loss: 3.100164, ppl: 22.201591 +epoch: 0, batch: 21914, sum loss: 5946.569824, avg loss: 3.329546, ppl: 27.925673 +epoch: 0, batch: 21915, sum loss: 5317.336426, avg loss: 3.038478, ppl: 20.873447 +epoch: 0, batch: 21916, sum loss: 5537.275391, avg loss: 3.321701, ppl: 27.707436 +epoch: 0, batch: 21917, sum loss: 4861.735352, avg loss: 2.895614, ppl: 18.094603 +epoch: 0, batch: 21918, sum loss: 5282.263184, avg loss: 3.270751, ppl: 26.331112 +epoch: 0, batch: 21919, sum loss: 5247.992188, avg loss: 3.165255, ppl: 23.694777 +epoch: 0, batch: 21920, sum loss: 5023.244629, avg loss: 3.179269, ppl: 24.029179 +epoch: 0, batch: 21921, sum loss: 5580.033203, avg loss: 3.134850, ppl: 22.985193 +epoch: 0, batch: 21922, sum loss: 5701.666992, avg loss: 3.349981, ppl: 28.502180 +epoch: 0, batch: 21923, sum loss: 5721.163086, avg loss: 3.208729, ppl: 24.747606 +epoch: 0, batch: 21924, sum loss: 5797.035156, avg loss: 3.126772, ppl: 22.800261 +epoch: 0, batch: 21925, sum loss: 5129.375488, avg loss: 2.953008, ppl: 19.163519 +epoch: 0, batch: 21926, sum loss: 5450.777832, avg loss: 3.210117, ppl: 24.781977 +epoch: 0, batch: 21927, sum loss: 4601.261230, avg loss: 2.921436, ppl: 18.567924 +epoch: 0, batch: 21928, sum loss: 4556.251953, avg loss: 3.142243, ppl: 23.155745 +epoch: 0, batch: 21929, sum loss: 4824.917969, avg loss: 3.051814, ppl: 21.153679 +epoch: 0, batch: 21930, sum loss: 4945.490234, avg loss: 3.257899, ppl: 25.994860 +epoch: 0, batch: 21931, sum loss: 4359.749023, avg loss: 3.023404, ppl: 20.561171 +epoch: 0, batch: 21932, sum loss: 4800.270508, avg loss: 3.206593, ppl: 24.694815 +epoch: 0, batch: 21933, sum loss: 4576.726562, avg loss: 2.918831, ppl: 18.519619 +epoch: 0, batch: 21934, sum loss: 5061.316895, avg loss: 3.038005, ppl: 20.863586 +epoch: 0, batch: 21935, sum loss: 5868.388184, avg loss: 3.300556, ppl: 27.127716 +epoch: 0, batch: 21936, sum loss: 5814.976562, avg loss: 3.109613, ppl: 22.412373 +epoch: 0, batch: 21937, sum loss: 4885.433594, avg loss: 3.131688, ppl: 22.912632 +epoch: 0, batch: 21938, sum loss: 5178.641602, avg loss: 3.429564, ppl: 30.863184 +epoch: 0, batch: 21939, sum loss: 5946.404297, avg loss: 3.272650, ppl: 26.381144 +epoch: 0, batch: 21940, sum loss: 5515.376953, avg loss: 3.166118, ppl: 23.715235 +epoch: 0, batch: 21941, sum loss: 4496.887695, avg loss: 2.933391, ppl: 18.791237 +epoch: 0, batch: 21942, sum loss: 5158.812988, avg loss: 3.260944, ppl: 26.074131 +epoch: 0, batch: 21943, sum loss: 5254.110352, avg loss: 3.261397, ppl: 26.085957 +epoch: 0, batch: 21944, sum loss: 4909.308594, avg loss: 3.030437, ppl: 20.706289 +epoch: 0, batch: 21945, sum loss: 4624.029297, avg loss: 2.935891, ppl: 18.838287 +epoch: 0, batch: 21946, sum loss: 5301.141602, avg loss: 3.238327, ppl: 25.491047 +epoch: 0, batch: 21947, sum loss: 4810.296875, avg loss: 3.148100, ppl: 23.291771 +epoch: 0, batch: 21948, sum loss: 5563.159180, avg loss: 3.236277, ppl: 25.438828 +epoch: 0, batch: 21949, sum loss: 5073.591797, avg loss: 3.074904, ppl: 21.647808 +epoch: 0, batch: 21950, sum loss: 5189.653809, avg loss: 3.162495, ppl: 23.629471 +epoch: 0, batch: 21951, sum loss: 5776.687500, avg loss: 3.064556, ppl: 21.424940 +epoch: 0, batch: 21952, sum loss: 5471.562500, avg loss: 2.985031, ppl: 19.787123 +epoch: 0, batch: 21953, sum loss: 5323.933105, avg loss: 3.207189, ppl: 24.709520 +epoch: 0, batch: 21954, sum loss: 4965.758301, avg loss: 3.129022, ppl: 22.851627 +epoch: 0, batch: 21955, sum loss: 6110.125000, avg loss: 3.234582, ppl: 25.395752 +epoch: 0, batch: 21956, sum loss: 5875.013184, avg loss: 3.116718, ppl: 22.572172 +epoch: 0, batch: 21957, sum loss: 6229.175781, avg loss: 3.426389, ppl: 30.765354 +epoch: 0, batch: 21958, sum loss: 5449.045898, avg loss: 3.111962, ppl: 22.465084 +epoch: 0, batch: 21959, sum loss: 6203.266113, avg loss: 3.229186, ppl: 25.259089 +epoch: 0, batch: 21960, sum loss: 5933.751465, avg loss: 3.242487, ppl: 25.597307 +epoch: 0, batch: 21961, sum loss: 4316.293945, avg loss: 3.003684, ppl: 20.159670 +epoch: 0, batch: 21962, sum loss: 5067.785156, avg loss: 3.179288, ppl: 24.029636 +epoch: 0, batch: 21963, sum loss: 5170.226074, avg loss: 3.112719, ppl: 22.482086 +epoch: 0, batch: 21964, sum loss: 5354.025879, avg loss: 3.121881, ppl: 22.689013 +epoch: 0, batch: 21965, sum loss: 5601.432617, avg loss: 3.330221, ppl: 27.944529 +epoch: 0, batch: 21966, sum loss: 7002.704590, avg loss: 3.365067, ppl: 28.935436 +epoch: 0, batch: 21967, sum loss: 5747.186035, avg loss: 3.360928, ppl: 28.815908 +epoch: 0, batch: 21968, sum loss: 5021.683594, avg loss: 3.037921, ppl: 20.861826 +epoch: 0, batch: 21969, sum loss: 6107.018066, avg loss: 3.100009, ppl: 22.198156 +epoch: 0, batch: 21970, sum loss: 5917.355469, avg loss: 3.183085, ppl: 24.121058 +epoch: 0, batch: 21971, sum loss: 5385.820312, avg loss: 3.355651, ppl: 28.664263 +epoch: 0, batch: 21972, sum loss: 6132.399414, avg loss: 3.202297, ppl: 24.588951 +epoch: 0, batch: 21973, sum loss: 6248.197754, avg loss: 3.283341, ppl: 26.664709 +epoch: 0, batch: 21974, sum loss: 5276.791992, avg loss: 3.289771, ppl: 26.836706 +epoch: 0, batch: 21975, sum loss: 5691.926270, avg loss: 3.254389, ppl: 25.903778 +epoch: 0, batch: 21976, sum loss: 4909.809570, avg loss: 3.064800, ppl: 21.430176 +epoch: 0, batch: 21977, sum loss: 4209.957031, avg loss: 2.897424, ppl: 18.127388 +epoch: 0, batch: 21978, sum loss: 6811.578125, avg loss: 3.279527, ppl: 26.563210 +epoch: 0, batch: 21979, sum loss: 6136.770508, avg loss: 3.394232, ppl: 29.791752 +epoch: 0, batch: 21980, sum loss: 4954.399414, avg loss: 3.167774, ppl: 23.754559 +epoch: 0, batch: 21981, sum loss: 4736.837891, avg loss: 3.120447, ppl: 22.656498 +epoch: 0, batch: 21982, sum loss: 5649.977539, avg loss: 3.215696, ppl: 24.920622 +epoch: 0, batch: 21983, sum loss: 5040.585938, avg loss: 3.094282, ppl: 22.071394 +epoch: 0, batch: 21984, sum loss: 5342.791992, avg loss: 3.124440, ppl: 22.747147 +epoch: 0, batch: 21985, sum loss: 4531.996094, avg loss: 2.956292, ppl: 19.226555 +epoch: 0, batch: 21986, sum loss: 5030.192383, avg loss: 3.157685, ppl: 23.516094 +epoch: 0, batch: 21987, sum loss: 5643.898438, avg loss: 3.179661, ppl: 24.038599 +epoch: 0, batch: 21988, sum loss: 5483.967773, avg loss: 3.376827, ppl: 29.277739 +epoch: 0, batch: 21989, sum loss: 5351.728516, avg loss: 3.271228, ppl: 26.343666 +epoch: 0, batch: 21990, sum loss: 6185.616211, avg loss: 3.394959, ppl: 29.813444 +epoch: 0, batch: 21991, sum loss: 5582.004883, avg loss: 2.993032, ppl: 19.946068 +epoch: 0, batch: 21992, sum loss: 5910.327148, avg loss: 3.385067, ppl: 29.519978 +epoch: 0, batch: 21993, sum loss: 4259.703125, avg loss: 2.847395, ppl: 17.242809 +epoch: 0, batch: 21994, sum loss: 4804.599121, avg loss: 3.203066, ppl: 24.607864 +epoch: 0, batch: 21995, sum loss: 4766.859863, avg loss: 2.885509, ppl: 17.912674 +epoch: 0, batch: 21996, sum loss: 5604.404785, avg loss: 3.413158, ppl: 30.360964 +epoch: 0, batch: 21997, sum loss: 5704.283691, avg loss: 3.100154, ppl: 22.201368 +epoch: 0, batch: 21998, sum loss: 5094.700684, avg loss: 3.426160, ppl: 30.758314 +epoch: 0, batch: 21999, sum loss: 5515.598145, avg loss: 3.367276, ppl: 28.999416 +epoch: 0, batch: 22000, sum loss: 5132.189453, avg loss: 2.944458, ppl: 19.000353 +epoch: 0, batch: 22001, sum loss: 6651.464844, avg loss: 3.541781, ppl: 34.528358 +epoch: 0, batch: 22002, sum loss: 6061.127441, avg loss: 3.205250, ppl: 24.661659 +epoch: 0, batch: 22003, sum loss: 6577.980469, avg loss: 3.371594, ppl: 29.124928 +epoch: 0, batch: 22004, sum loss: 6598.208496, avg loss: 3.359577, ppl: 28.777006 +epoch: 0, batch: 22005, sum loss: 6055.644531, avg loss: 3.462347, ppl: 31.891731 +epoch: 0, batch: 22006, sum loss: 4586.383301, avg loss: 3.009438, ppl: 20.275997 +epoch: 0, batch: 22007, sum loss: 6376.254395, avg loss: 3.556193, ppl: 35.029598 +epoch: 0, batch: 22008, sum loss: 5674.287109, avg loss: 3.078832, ppl: 21.733002 +epoch: 0, batch: 22009, sum loss: 6253.931641, avg loss: 3.152183, ppl: 23.387070 +epoch: 0, batch: 22010, sum loss: 5629.060547, avg loss: 3.391001, ppl: 29.695648 +epoch: 0, batch: 22011, sum loss: 5015.326172, avg loss: 2.892345, ppl: 18.035553 +epoch: 0, batch: 22012, sum loss: 5961.855957, avg loss: 3.073122, ppl: 21.609257 +epoch: 0, batch: 22013, sum loss: 5303.620605, avg loss: 3.147549, ppl: 23.278948 +epoch: 0, batch: 22014, sum loss: 4598.497070, avg loss: 3.005554, ppl: 20.197397 +epoch: 0, batch: 22015, sum loss: 5510.829102, avg loss: 3.127599, ppl: 22.819124 +epoch: 0, batch: 22016, sum loss: 4056.077148, avg loss: 2.939186, ppl: 18.900461 +epoch: 0, batch: 22017, sum loss: 5550.927734, avg loss: 3.118499, ppl: 22.612408 +epoch: 0, batch: 22018, sum loss: 5383.924316, avg loss: 2.812917, ppl: 16.658447 +epoch: 0, batch: 22019, sum loss: 4903.368164, avg loss: 2.989859, ppl: 19.882872 +epoch: 0, batch: 22020, sum loss: 4930.473633, avg loss: 3.148451, ppl: 23.299936 +epoch: 0, batch: 22021, sum loss: 5499.739258, avg loss: 3.401199, ppl: 30.000059 +epoch: 0, batch: 22022, sum loss: 4484.214844, avg loss: 3.015612, ppl: 20.401566 +epoch: 0, batch: 22023, sum loss: 6341.093750, avg loss: 3.323424, ppl: 27.755232 +epoch: 0, batch: 22024, sum loss: 5779.334961, avg loss: 3.207178, ppl: 24.709261 +epoch: 0, batch: 22025, sum loss: 4717.598633, avg loss: 2.824909, ppl: 16.859419 +epoch: 0, batch: 22026, sum loss: 4140.946289, avg loss: 2.903889, ppl: 18.244970 +epoch: 0, batch: 22027, sum loss: 6113.320312, avg loss: 3.503335, ppl: 33.226086 +epoch: 0, batch: 22028, sum loss: 5325.634766, avg loss: 3.108952, ppl: 22.397560 +epoch: 0, batch: 22029, sum loss: 4623.229492, avg loss: 2.798565, ppl: 16.421068 +epoch: 0, batch: 22030, sum loss: 4911.477051, avg loss: 3.075440, ppl: 21.659403 +epoch: 0, batch: 22031, sum loss: 5798.132324, avg loss: 3.309437, ppl: 27.369699 +epoch: 0, batch: 22032, sum loss: 5152.070312, avg loss: 3.052175, ppl: 21.161310 +epoch: 0, batch: 22033, sum loss: 5587.941406, avg loss: 3.146363, ppl: 23.251356 +epoch: 0, batch: 22034, sum loss: 4841.566406, avg loss: 3.150011, ppl: 23.336317 +epoch: 0, batch: 22035, sum loss: 5888.503906, avg loss: 3.291506, ppl: 26.883314 +epoch: 0, batch: 22036, sum loss: 4939.980469, avg loss: 2.943969, ppl: 18.991077 +epoch: 0, batch: 22037, sum loss: 4319.504395, avg loss: 2.879670, ppl: 17.808390 +epoch: 0, batch: 22038, sum loss: 7017.348145, avg loss: 3.341594, ppl: 28.264151 +epoch: 0, batch: 22039, sum loss: 5725.416504, avg loss: 3.292361, ppl: 26.906328 +epoch: 0, batch: 22040, sum loss: 5979.026367, avg loss: 3.364674, ppl: 28.924076 +epoch: 0, batch: 22041, sum loss: 4860.258789, avg loss: 3.210211, ppl: 24.784304 +epoch: 0, batch: 22042, sum loss: 4814.948730, avg loss: 3.082554, ppl: 21.814035 +epoch: 0, batch: 22043, sum loss: 5020.289551, avg loss: 2.990047, ppl: 19.886621 +epoch: 0, batch: 22044, sum loss: 5631.385742, avg loss: 3.048937, ppl: 21.092907 +epoch: 0, batch: 22045, sum loss: 5129.383789, avg loss: 3.104954, ppl: 22.308193 +epoch: 0, batch: 22046, sum loss: 4505.235352, avg loss: 3.092131, ppl: 22.023964 +epoch: 0, batch: 22047, sum loss: 6494.000977, avg loss: 3.214852, ppl: 24.899609 +epoch: 0, batch: 22048, sum loss: 5424.395508, avg loss: 2.916342, ppl: 18.473583 +epoch: 0, batch: 22049, sum loss: 7036.082031, avg loss: 3.254432, ppl: 25.904896 +epoch: 0, batch: 22050, sum loss: 5093.875977, avg loss: 3.316326, ppl: 27.558903 +epoch: 0, batch: 22051, sum loss: 5856.040039, avg loss: 3.386952, ppl: 29.575672 +epoch: 0, batch: 22052, sum loss: 5915.765625, avg loss: 3.275618, ppl: 26.459568 +epoch: 0, batch: 22053, sum loss: 6204.969727, avg loss: 3.281317, ppl: 26.610783 +epoch: 0, batch: 22054, sum loss: 5077.692383, avg loss: 2.868753, ppl: 17.615034 +epoch: 0, batch: 22055, sum loss: 4343.868652, avg loss: 2.800689, ppl: 16.455980 +epoch: 0, batch: 22056, sum loss: 4770.044434, avg loss: 2.975698, ppl: 19.603306 +epoch: 0, batch: 22057, sum loss: 4976.572754, avg loss: 3.159729, ppl: 23.564198 +epoch: 0, batch: 22058, sum loss: 4945.910645, avg loss: 2.792722, ppl: 16.325397 +epoch: 0, batch: 22059, sum loss: 4369.836914, avg loss: 3.007458, ppl: 20.235905 +epoch: 0, batch: 22060, sum loss: 4693.257812, avg loss: 2.931454, ppl: 18.754883 +epoch: 0, batch: 22061, sum loss: 5696.984375, avg loss: 3.137106, ppl: 23.037094 +epoch: 0, batch: 22062, sum loss: 5544.812012, avg loss: 3.308361, ppl: 27.340267 +epoch: 0, batch: 22063, sum loss: 4898.207031, avg loss: 3.137865, ppl: 23.054588 +epoch: 0, batch: 22064, sum loss: 6486.029785, avg loss: 3.319360, ppl: 27.642654 +epoch: 0, batch: 22065, sum loss: 5709.535645, avg loss: 3.313718, ppl: 27.487129 +epoch: 0, batch: 22066, sum loss: 5205.732422, avg loss: 3.217387, ppl: 24.962812 +epoch: 0, batch: 22067, sum loss: 5733.779785, avg loss: 3.064554, ppl: 21.424898 +epoch: 0, batch: 22068, sum loss: 4817.416992, avg loss: 2.882955, ppl: 17.866985 +epoch: 0, batch: 22069, sum loss: 4760.151855, avg loss: 2.831738, ppl: 16.974937 +epoch: 0, batch: 22070, sum loss: 4666.638184, avg loss: 2.964827, ppl: 19.391354 +epoch: 0, batch: 22071, sum loss: 5545.498535, avg loss: 3.389669, ppl: 29.656139 +epoch: 0, batch: 22072, sum loss: 4896.867676, avg loss: 3.068213, ppl: 21.503437 +epoch: 0, batch: 22073, sum loss: 4482.155273, avg loss: 3.026438, ppl: 20.623650 +epoch: 0, batch: 22074, sum loss: 5507.195312, avg loss: 3.323594, ppl: 27.759937 +epoch: 0, batch: 22075, sum loss: 5789.442383, avg loss: 3.425705, ppl: 30.744326 +epoch: 0, batch: 22076, sum loss: 6497.617676, avg loss: 3.398335, ppl: 29.914265 +epoch: 0, batch: 22077, sum loss: 5298.679688, avg loss: 3.050478, ppl: 21.125435 +epoch: 0, batch: 22078, sum loss: 5435.630371, avg loss: 3.243216, ppl: 25.615978 +epoch: 0, batch: 22079, sum loss: 6161.960938, avg loss: 3.425215, ppl: 30.729237 +epoch: 0, batch: 22080, sum loss: 5310.422363, avg loss: 3.267952, ppl: 26.257515 +epoch: 0, batch: 22081, sum loss: 5400.221191, avg loss: 3.094682, ppl: 22.080227 +epoch: 0, batch: 22082, sum loss: 4244.795898, avg loss: 3.116590, ppl: 22.569292 +epoch: 0, batch: 22083, sum loss: 5314.705078, avg loss: 3.213244, ppl: 24.859592 +epoch: 0, batch: 22084, sum loss: 4349.816895, avg loss: 2.953033, ppl: 19.163984 +epoch: 0, batch: 22085, sum loss: 6804.578613, avg loss: 3.268290, ppl: 26.266375 +epoch: 0, batch: 22086, sum loss: 4977.227051, avg loss: 2.983949, ppl: 19.765720 +epoch: 0, batch: 22087, sum loss: 5638.627930, avg loss: 3.094746, ppl: 22.081636 +epoch: 0, batch: 22088, sum loss: 5517.535645, avg loss: 3.147482, ppl: 23.277376 +epoch: 0, batch: 22089, sum loss: 5148.708984, avg loss: 3.193988, ppl: 24.385485 +epoch: 0, batch: 22090, sum loss: 6531.906250, avg loss: 3.297277, ppl: 27.038912 +epoch: 0, batch: 22091, sum loss: 5368.387695, avg loss: 3.057168, ppl: 21.267254 +epoch: 0, batch: 22092, sum loss: 6220.922363, avg loss: 3.248523, ppl: 25.752289 +epoch: 0, batch: 22093, sum loss: 5230.056641, avg loss: 3.339755, ppl: 28.212215 +epoch: 0, batch: 22094, sum loss: 4823.898926, avg loss: 2.854378, ppl: 17.363634 +epoch: 0, batch: 22095, sum loss: 5366.793457, avg loss: 2.993192, ppl: 19.949263 +epoch: 0, batch: 22096, sum loss: 5248.978516, avg loss: 3.426226, ppl: 30.760338 +epoch: 0, batch: 22097, sum loss: 4839.482910, avg loss: 2.889244, ppl: 17.979704 +epoch: 0, batch: 22098, sum loss: 6532.071777, avg loss: 3.279153, ppl: 26.553268 +epoch: 0, batch: 22099, sum loss: 5602.884766, avg loss: 3.303588, ppl: 27.210085 +epoch: 0, batch: 22100, sum loss: 4631.061523, avg loss: 2.970533, ppl: 19.502319 +epoch: 0, batch: 22101, sum loss: 5316.987305, avg loss: 2.963761, ppl: 19.370689 +epoch: 0, batch: 22102, sum loss: 5786.023438, avg loss: 3.379687, ppl: 29.361568 +epoch: 0, batch: 22103, sum loss: 5211.829102, avg loss: 3.040740, ppl: 20.920729 +epoch: 0, batch: 22104, sum loss: 5652.974609, avg loss: 3.430203, ppl: 30.882910 +epoch: 0, batch: 22105, sum loss: 4591.265625, avg loss: 3.040573, ppl: 20.917233 +epoch: 0, batch: 22106, sum loss: 5416.102539, avg loss: 3.121673, ppl: 22.684296 +epoch: 0, batch: 22107, sum loss: 4494.183105, avg loss: 3.099437, ppl: 22.185452 +epoch: 0, batch: 22108, sum loss: 4941.796875, avg loss: 3.104144, ppl: 22.290121 +epoch: 0, batch: 22109, sum loss: 4420.179199, avg loss: 2.720110, ppl: 15.181994 +epoch: 0, batch: 22110, sum loss: 4426.677734, avg loss: 3.076218, ppl: 21.676266 +epoch: 0, batch: 22111, sum loss: 5726.918945, avg loss: 3.095632, ppl: 22.101198 +epoch: 0, batch: 22112, sum loss: 4540.207031, avg loss: 3.208627, ppl: 24.745081 +epoch: 0, batch: 22113, sum loss: 4524.287109, avg loss: 3.020218, ppl: 20.495762 +epoch: 0, batch: 22114, sum loss: 5518.931152, avg loss: 3.302772, ppl: 27.187887 +epoch: 0, batch: 22115, sum loss: 5711.410156, avg loss: 3.164216, ppl: 23.670179 +epoch: 0, batch: 22116, sum loss: 6534.880859, avg loss: 3.288818, ppl: 26.811144 +epoch: 0, batch: 22117, sum loss: 5061.612793, avg loss: 3.060225, ppl: 21.332367 +epoch: 0, batch: 22118, sum loss: 5323.020020, avg loss: 3.085809, ppl: 21.885164 +epoch: 0, batch: 22119, sum loss: 6149.274902, avg loss: 3.241579, ppl: 25.574066 +epoch: 0, batch: 22120, sum loss: 4643.125977, avg loss: 2.820854, ppl: 16.791187 +epoch: 0, batch: 22121, sum loss: 6544.587891, avg loss: 3.330579, ppl: 27.954523 +epoch: 0, batch: 22122, sum loss: 6064.017090, avg loss: 3.140351, ppl: 23.111967 +epoch: 0, batch: 22123, sum loss: 6701.553711, avg loss: 3.445529, ppl: 31.359867 +epoch: 0, batch: 22124, sum loss: 4526.041504, avg loss: 2.995395, ppl: 19.993250 +epoch: 0, batch: 22125, sum loss: 5295.793945, avg loss: 3.080741, ppl: 21.774534 +epoch: 0, batch: 22126, sum loss: 4894.875977, avg loss: 2.970192, ppl: 19.495657 +epoch: 0, batch: 22127, sum loss: 5411.708984, avg loss: 3.085353, ppl: 21.875185 +epoch: 0, batch: 22128, sum loss: 6093.853516, avg loss: 3.368631, ppl: 29.038755 +epoch: 0, batch: 22129, sum loss: 6034.296875, avg loss: 3.169274, ppl: 23.790197 +epoch: 0, batch: 22130, sum loss: 5207.553223, avg loss: 3.423769, ppl: 30.684862 +epoch: 0, batch: 22131, sum loss: 5472.638184, avg loss: 3.074516, ppl: 21.639402 +epoch: 0, batch: 22132, sum loss: 5472.978516, avg loss: 3.244208, ppl: 25.641390 +epoch: 0, batch: 22133, sum loss: 4247.106934, avg loss: 2.906986, ppl: 18.301563 +epoch: 0, batch: 22134, sum loss: 5295.267578, avg loss: 3.066165, ppl: 21.459457 +epoch: 0, batch: 22135, sum loss: 5251.942383, avg loss: 3.139236, ppl: 23.086233 +epoch: 0, batch: 22136, sum loss: 4255.631348, avg loss: 3.122253, ppl: 22.697470 +epoch: 0, batch: 22137, sum loss: 5173.736816, avg loss: 3.086955, ppl: 21.910267 +epoch: 0, batch: 22138, sum loss: 5343.832031, avg loss: 3.329490, ppl: 27.924103 +epoch: 0, batch: 22139, sum loss: 5663.242676, avg loss: 3.135793, ppl: 23.006878 +epoch: 0, batch: 22140, sum loss: 4768.745605, avg loss: 3.164397, ppl: 23.674452 +epoch: 0, batch: 22141, sum loss: 4940.485352, avg loss: 3.061019, ppl: 21.349310 +epoch: 0, batch: 22142, sum loss: 5649.537109, avg loss: 3.209964, ppl: 24.778200 +epoch: 0, batch: 22143, sum loss: 4966.479492, avg loss: 3.017302, ppl: 20.436087 +epoch: 0, batch: 22144, sum loss: 5399.797363, avg loss: 3.174484, ppl: 23.914476 +epoch: 0, batch: 22145, sum loss: 5079.885742, avg loss: 3.122241, ppl: 22.697182 +epoch: 0, batch: 22146, sum loss: 5663.221191, avg loss: 3.079511, ppl: 21.747774 +epoch: 0, batch: 22147, sum loss: 5882.123047, avg loss: 3.167541, ppl: 23.749002 +epoch: 0, batch: 22148, sum loss: 5522.439941, avg loss: 3.394247, ppl: 29.792213 +epoch: 0, batch: 22149, sum loss: 5263.614258, avg loss: 3.039038, ppl: 20.885145 +epoch: 0, batch: 22150, sum loss: 5768.347656, avg loss: 3.445847, ppl: 31.369844 +epoch: 0, batch: 22151, sum loss: 4461.355957, avg loss: 3.148452, ppl: 23.299963 +epoch: 0, batch: 22152, sum loss: 5899.357422, avg loss: 3.338629, ppl: 28.180466 +epoch: 0, batch: 22153, sum loss: 6082.062500, avg loss: 3.319903, ppl: 27.657671 +epoch: 0, batch: 22154, sum loss: 5970.246094, avg loss: 3.280355, ppl: 26.585215 +epoch: 0, batch: 22155, sum loss: 5968.453613, avg loss: 3.245489, ppl: 25.674259 +epoch: 0, batch: 22156, sum loss: 5931.059570, avg loss: 2.984932, ppl: 19.785151 +epoch: 0, batch: 22157, sum loss: 5455.554199, avg loss: 3.113901, ppl: 22.508677 +epoch: 0, batch: 22158, sum loss: 5133.985352, avg loss: 3.147753, ppl: 23.283693 +epoch: 0, batch: 22159, sum loss: 4546.346191, avg loss: 2.905014, ppl: 18.265491 +epoch: 0, batch: 22160, sum loss: 6182.327637, avg loss: 3.339993, ppl: 28.218943 +epoch: 0, batch: 22161, sum loss: 5192.064453, avg loss: 3.167825, ppl: 23.755747 +epoch: 0, batch: 22162, sum loss: 5286.265625, avg loss: 3.064502, ppl: 21.423790 +epoch: 0, batch: 22163, sum loss: 5013.589355, avg loss: 3.112098, ppl: 22.468128 +epoch: 0, batch: 22164, sum loss: 5164.439941, avg loss: 3.074071, ppl: 21.629787 +epoch: 0, batch: 22165, sum loss: 4690.466309, avg loss: 2.929710, ppl: 18.722208 +epoch: 0, batch: 22166, sum loss: 4773.993164, avg loss: 3.095975, ppl: 22.108782 +epoch: 0, batch: 22167, sum loss: 4345.083984, avg loss: 3.002822, ppl: 20.142307 +epoch: 0, batch: 22168, sum loss: 5148.797363, avg loss: 3.133778, ppl: 22.960558 +epoch: 0, batch: 22169, sum loss: 4468.349609, avg loss: 2.972954, ppl: 19.549580 +epoch: 0, batch: 22170, sum loss: 5856.215820, avg loss: 3.210645, ppl: 24.795067 +epoch: 0, batch: 22171, sum loss: 5358.292969, avg loss: 3.161235, ppl: 23.599720 +epoch: 0, batch: 22172, sum loss: 5841.267090, avg loss: 3.491493, ppl: 32.834927 +epoch: 0, batch: 22173, sum loss: 6019.501465, avg loss: 3.248517, ppl: 25.752110 +epoch: 0, batch: 22174, sum loss: 5465.534180, avg loss: 3.257172, ppl: 25.975964 +epoch: 0, batch: 22175, sum loss: 4906.232422, avg loss: 3.268643, ppl: 26.275650 +epoch: 0, batch: 22176, sum loss: 6236.643555, avg loss: 3.443757, ppl: 31.304350 +epoch: 0, batch: 22177, sum loss: 5359.143066, avg loss: 3.097770, ppl: 22.148516 +epoch: 0, batch: 22178, sum loss: 4578.221680, avg loss: 3.010008, ppl: 20.287560 +epoch: 0, batch: 22179, sum loss: 4630.084961, avg loss: 3.117902, ppl: 22.598923 +epoch: 0, batch: 22180, sum loss: 3769.150391, avg loss: 2.721408, ppl: 15.201716 +epoch: 0, batch: 22181, sum loss: 4380.506348, avg loss: 3.004462, ppl: 20.175369 +epoch: 0, batch: 22182, sum loss: 6604.820801, avg loss: 3.232903, ppl: 25.353151 +epoch: 0, batch: 22183, sum loss: 4768.790039, avg loss: 3.114820, ppl: 22.529385 +epoch: 0, batch: 22184, sum loss: 4790.183105, avg loss: 2.949620, ppl: 19.098696 +epoch: 0, batch: 22185, sum loss: 4437.303223, avg loss: 3.272347, ppl: 26.373177 +epoch: 0, batch: 22186, sum loss: 5206.759766, avg loss: 3.287096, ppl: 26.765018 +epoch: 0, batch: 22187, sum loss: 4813.508301, avg loss: 3.071799, ppl: 21.580687 +epoch: 0, batch: 22188, sum loss: 4619.004883, avg loss: 2.767529, ppl: 15.919242 +epoch: 0, batch: 22189, sum loss: 5338.480469, avg loss: 3.227618, ppl: 25.219519 +epoch: 0, batch: 22190, sum loss: 6991.188477, avg loss: 3.568754, ppl: 35.472359 +epoch: 0, batch: 22191, sum loss: 5808.365723, avg loss: 3.294592, ppl: 26.966408 +epoch: 0, batch: 22192, sum loss: 4611.582031, avg loss: 2.969467, ppl: 19.481531 +epoch: 0, batch: 22193, sum loss: 5085.091797, avg loss: 3.089363, ppl: 21.963091 +epoch: 0, batch: 22194, sum loss: 4717.803223, avg loss: 3.112008, ppl: 22.466103 +epoch: 0, batch: 22195, sum loss: 5894.016113, avg loss: 3.328072, ppl: 27.884537 +epoch: 0, batch: 22196, sum loss: 4856.745117, avg loss: 3.155780, ppl: 23.471334 +epoch: 0, batch: 22197, sum loss: 4824.410156, avg loss: 2.996528, ppl: 20.015919 +epoch: 0, batch: 22198, sum loss: 5826.229492, avg loss: 3.229617, ppl: 25.269979 +epoch: 0, batch: 22199, sum loss: 5566.550781, avg loss: 3.305553, ppl: 27.263613 +epoch: 0, batch: 22200, sum loss: 5074.813477, avg loss: 2.901551, ppl: 18.202364 +epoch: 0, batch: 22201, sum loss: 4783.648926, avg loss: 2.859324, ppl: 17.449726 +epoch: 0, batch: 22202, sum loss: 5582.215332, avg loss: 3.137839, ppl: 23.053989 +epoch: 0, batch: 22203, sum loss: 6137.752441, avg loss: 3.205093, ppl: 24.657785 +epoch: 0, batch: 22204, sum loss: 5236.356445, avg loss: 3.085655, ppl: 21.881794 +epoch: 0, batch: 22205, sum loss: 5709.044434, avg loss: 3.386147, ppl: 29.551884 +epoch: 0, batch: 22206, sum loss: 4890.563477, avg loss: 3.198537, ppl: 24.496674 +epoch: 0, batch: 22207, sum loss: 4889.418457, avg loss: 2.913837, ppl: 18.427368 +epoch: 0, batch: 22208, sum loss: 5009.120605, avg loss: 3.136582, ppl: 23.025024 +epoch: 0, batch: 22209, sum loss: 5126.445312, avg loss: 3.133524, ppl: 22.954733 +epoch: 0, batch: 22210, sum loss: 6503.832520, avg loss: 3.380370, ppl: 29.381651 +epoch: 0, batch: 22211, sum loss: 5233.922852, avg loss: 3.277347, ppl: 26.505356 +epoch: 0, batch: 22212, sum loss: 6140.904297, avg loss: 3.141128, ppl: 23.129932 +epoch: 0, batch: 22213, sum loss: 5635.163574, avg loss: 3.649718, ppl: 38.463802 +epoch: 0, batch: 22214, sum loss: 4813.326172, avg loss: 3.040636, ppl: 20.918535 +epoch: 0, batch: 22215, sum loss: 5050.749023, avg loss: 2.976281, ppl: 19.614737 +epoch: 0, batch: 22216, sum loss: 5388.168945, avg loss: 3.134479, ppl: 22.976658 +epoch: 0, batch: 22217, sum loss: 5493.519531, avg loss: 2.972684, ppl: 19.544300 +epoch: 0, batch: 22218, sum loss: 6325.284180, avg loss: 3.285862, ppl: 26.732010 +epoch: 0, batch: 22219, sum loss: 4835.172363, avg loss: 3.064114, ppl: 21.415487 +epoch: 0, batch: 22220, sum loss: 5003.160645, avg loss: 3.160556, ppl: 23.583706 +epoch: 0, batch: 22221, sum loss: 5886.270020, avg loss: 3.414310, ppl: 30.395967 +epoch: 0, batch: 22222, sum loss: 5985.094238, avg loss: 3.286707, ppl: 26.754625 +epoch: 0, batch: 22223, sum loss: 5863.001465, avg loss: 3.502391, ppl: 33.194714 +epoch: 0, batch: 22224, sum loss: 6673.780762, avg loss: 3.751422, ppl: 42.581608 +epoch: 0, batch: 22225, sum loss: 4413.541016, avg loss: 2.976090, ppl: 19.610983 +epoch: 0, batch: 22226, sum loss: 5110.553711, avg loss: 3.013298, ppl: 20.354424 +epoch: 0, batch: 22227, sum loss: 3719.243652, avg loss: 2.699016, ppl: 14.865091 +epoch: 0, batch: 22228, sum loss: 5494.433594, avg loss: 3.194438, ppl: 24.396465 +epoch: 0, batch: 22229, sum loss: 5082.584961, avg loss: 3.116239, ppl: 22.561357 +epoch: 0, batch: 22230, sum loss: 5985.552734, avg loss: 3.199120, ppl: 24.510946 +epoch: 0, batch: 22231, sum loss: 5208.082031, avg loss: 2.947415, ppl: 19.056625 +epoch: 0, batch: 22232, sum loss: 4144.381836, avg loss: 3.154020, ppl: 23.430056 +epoch: 0, batch: 22233, sum loss: 4656.805176, avg loss: 3.023899, ppl: 20.571350 +epoch: 0, batch: 22234, sum loss: 5185.016113, avg loss: 3.279580, ppl: 26.564610 +epoch: 0, batch: 22235, sum loss: 5032.272461, avg loss: 3.123695, ppl: 22.730215 +epoch: 0, batch: 22236, sum loss: 5047.486328, avg loss: 2.889231, ppl: 17.979473 +epoch: 0, batch: 22237, sum loss: 5039.392578, avg loss: 3.035779, ppl: 20.817183 +epoch: 0, batch: 22238, sum loss: 5104.654785, avg loss: 3.243110, ppl: 25.613247 +epoch: 0, batch: 22239, sum loss: 5928.105957, avg loss: 3.366329, ppl: 28.971981 +epoch: 0, batch: 22240, sum loss: 4956.125977, avg loss: 3.078339, ppl: 21.722294 +epoch: 0, batch: 22241, sum loss: 4941.270508, avg loss: 3.011134, ppl: 20.310411 +epoch: 0, batch: 22242, sum loss: 4411.486328, avg loss: 2.942953, ppl: 18.971785 +epoch: 0, batch: 22243, sum loss: 6880.459961, avg loss: 3.366174, ppl: 28.967491 +epoch: 0, batch: 22244, sum loss: 5610.043945, avg loss: 3.194786, ppl: 24.404953 +epoch: 0, batch: 22245, sum loss: 5624.748535, avg loss: 3.386363, ppl: 29.558247 +epoch: 0, batch: 22246, sum loss: 5252.493652, avg loss: 3.091521, ppl: 22.010527 +epoch: 0, batch: 22247, sum loss: 6066.529297, avg loss: 3.127077, ppl: 22.807219 +epoch: 0, batch: 22248, sum loss: 5351.784180, avg loss: 3.104283, ppl: 22.293236 +epoch: 0, batch: 22249, sum loss: 5746.485352, avg loss: 3.321668, ppl: 27.706518 +epoch: 0, batch: 22250, sum loss: 5045.453125, avg loss: 3.173241, ppl: 23.884768 +epoch: 0, batch: 22251, sum loss: 5598.612793, avg loss: 3.270218, ppl: 26.317066 +epoch: 0, batch: 22252, sum loss: 5709.103516, avg loss: 3.129991, ppl: 22.873775 +epoch: 0, batch: 22253, sum loss: 4831.916504, avg loss: 3.229891, ppl: 25.276897 +epoch: 0, batch: 22254, sum loss: 4491.724609, avg loss: 3.106310, ppl: 22.338472 +epoch: 0, batch: 22255, sum loss: 5434.728027, avg loss: 3.075681, ppl: 21.664629 +epoch: 0, batch: 22256, sum loss: 5026.667969, avg loss: 2.936138, ppl: 18.842928 +epoch: 0, batch: 22257, sum loss: 4559.147461, avg loss: 3.066004, ppl: 21.455984 +epoch: 0, batch: 22258, sum loss: 5032.975098, avg loss: 2.924448, ppl: 18.623943 +epoch: 0, batch: 22259, sum loss: 5993.188477, avg loss: 3.232572, ppl: 25.344755 +epoch: 0, batch: 22260, sum loss: 4373.236328, avg loss: 2.771379, ppl: 15.980659 +epoch: 0, batch: 22261, sum loss: 5538.761230, avg loss: 3.223959, ppl: 25.127401 +epoch: 0, batch: 22262, sum loss: 6278.833984, avg loss: 3.260039, ppl: 26.050543 +epoch: 0, batch: 22263, sum loss: 5672.297852, avg loss: 3.226563, ppl: 25.192919 +epoch: 0, batch: 22264, sum loss: 6320.093750, avg loss: 3.172738, ppl: 23.872761 +epoch: 0, batch: 22265, sum loss: 6363.201660, avg loss: 3.363214, ppl: 28.881876 +epoch: 0, batch: 22266, sum loss: 6121.274414, avg loss: 3.198158, ppl: 24.487383 +epoch: 0, batch: 22267, sum loss: 5303.226562, avg loss: 3.011486, ppl: 20.317566 +epoch: 0, batch: 22268, sum loss: 6358.851074, avg loss: 3.350291, ppl: 28.511017 +epoch: 0, batch: 22269, sum loss: 5678.153809, avg loss: 3.363835, ppl: 28.899818 +epoch: 0, batch: 22270, sum loss: 6036.672852, avg loss: 3.389485, ppl: 29.650682 +epoch: 0, batch: 22271, sum loss: 5170.870117, avg loss: 3.588390, ppl: 36.175800 +epoch: 0, batch: 22272, sum loss: 5003.135254, avg loss: 3.194850, ppl: 24.406511 +epoch: 0, batch: 22273, sum loss: 5355.822266, avg loss: 3.002143, ppl: 20.128620 +epoch: 0, batch: 22274, sum loss: 5848.748047, avg loss: 3.338327, ppl: 28.171947 +epoch: 0, batch: 22275, sum loss: 4439.177734, avg loss: 2.888209, ppl: 17.961119 +epoch: 0, batch: 22276, sum loss: 5589.054688, avg loss: 3.234407, ppl: 25.391302 +epoch: 0, batch: 22277, sum loss: 5953.886719, avg loss: 3.274965, ppl: 26.442308 +epoch: 0, batch: 22278, sum loss: 6435.719727, avg loss: 3.303758, ppl: 27.214710 +epoch: 0, batch: 22279, sum loss: 4951.166504, avg loss: 3.086762, ppl: 21.906036 +epoch: 0, batch: 22280, sum loss: 4563.022461, avg loss: 2.917534, ppl: 18.495615 +epoch: 0, batch: 22281, sum loss: 4793.047363, avg loss: 3.227641, ppl: 25.220095 +epoch: 0, batch: 22282, sum loss: 5093.876953, avg loss: 2.924154, ppl: 18.618473 +epoch: 0, batch: 22283, sum loss: 5810.996094, avg loss: 3.422259, ppl: 30.638559 +epoch: 0, batch: 22284, sum loss: 4563.832031, avg loss: 3.096223, ppl: 22.114260 +epoch: 0, batch: 22285, sum loss: 5864.025391, avg loss: 3.283329, ppl: 26.664391 +epoch: 0, batch: 22286, sum loss: 5696.908691, avg loss: 3.397083, ppl: 29.876831 +epoch: 0, batch: 22287, sum loss: 5457.154785, avg loss: 3.447350, ppl: 31.417021 +epoch: 0, batch: 22288, sum loss: 4728.023438, avg loss: 2.971731, ppl: 19.525684 +epoch: 0, batch: 22289, sum loss: 6110.333496, avg loss: 3.117517, ppl: 22.590218 +epoch: 0, batch: 22290, sum loss: 4691.945312, avg loss: 3.136327, ppl: 23.019163 +epoch: 0, batch: 22291, sum loss: 5581.752441, avg loss: 3.226446, ppl: 25.189981 +epoch: 0, batch: 22292, sum loss: 5899.199219, avg loss: 3.319752, ppl: 27.653492 +epoch: 0, batch: 22293, sum loss: 5374.917969, avg loss: 3.230119, ppl: 25.282665 +epoch: 0, batch: 22294, sum loss: 5952.800293, avg loss: 3.285210, ppl: 26.714590 +epoch: 0, batch: 22295, sum loss: 6443.439941, avg loss: 3.237910, ppl: 25.480400 +epoch: 0, batch: 22296, sum loss: 5251.504883, avg loss: 3.169285, ppl: 23.790464 +epoch: 0, batch: 22297, sum loss: 5324.658203, avg loss: 3.177004, ppl: 23.974813 +epoch: 0, batch: 22298, sum loss: 5137.905762, avg loss: 2.935946, ppl: 18.839321 +epoch: 0, batch: 22299, sum loss: 4766.712891, avg loss: 2.947875, ppl: 19.065388 +epoch: 0, batch: 22300, sum loss: 4917.961914, avg loss: 3.000587, ppl: 20.097326 +epoch: 0, batch: 22301, sum loss: 5610.851562, avg loss: 3.023088, ppl: 20.554670 +epoch: 0, batch: 22302, sum loss: 5692.781250, avg loss: 3.167936, ppl: 23.758404 +epoch: 0, batch: 22303, sum loss: 5900.491211, avg loss: 3.261742, ppl: 26.094957 +epoch: 0, batch: 22304, sum loss: 5467.447754, avg loss: 3.158549, ppl: 23.536409 +epoch: 0, batch: 22305, sum loss: 5230.018555, avg loss: 2.958155, ppl: 19.262407 +epoch: 0, batch: 22306, sum loss: 5287.541992, avg loss: 3.104840, ppl: 22.305639 +epoch: 0, batch: 22307, sum loss: 4918.424316, avg loss: 2.954009, ppl: 19.182695 +epoch: 0, batch: 22308, sum loss: 4727.472168, avg loss: 3.089851, ppl: 21.973806 +epoch: 0, batch: 22309, sum loss: 6198.458496, avg loss: 3.492089, ppl: 32.854511 +epoch: 0, batch: 22310, sum loss: 5642.832031, avg loss: 3.204334, ppl: 24.639080 +epoch: 0, batch: 22311, sum loss: 5071.240723, avg loss: 3.077209, ppl: 21.697765 +epoch: 0, batch: 22312, sum loss: 5504.935059, avg loss: 3.280653, ppl: 26.593121 +epoch: 0, batch: 22313, sum loss: 5369.617676, avg loss: 3.453130, ppl: 31.599157 +epoch: 0, batch: 22314, sum loss: 4908.454590, avg loss: 3.087078, ppl: 21.912962 +epoch: 0, batch: 22315, sum loss: 4903.841797, avg loss: 3.010339, ppl: 20.294273 +epoch: 0, batch: 22316, sum loss: 6763.517578, avg loss: 3.552268, ppl: 34.892357 +epoch: 0, batch: 22317, sum loss: 5452.077148, avg loss: 3.087246, ppl: 21.916645 +epoch: 0, batch: 22318, sum loss: 4787.558594, avg loss: 2.922807, ppl: 18.593410 +epoch: 0, batch: 22319, sum loss: 5998.958984, avg loss: 3.360761, ppl: 28.811119 +epoch: 0, batch: 22320, sum loss: 4965.869141, avg loss: 3.109499, ppl: 22.409803 +epoch: 0, batch: 22321, sum loss: 5840.360840, avg loss: 3.453791, ppl: 31.620049 +epoch: 0, batch: 22322, sum loss: 4826.913574, avg loss: 3.058881, ppl: 21.303696 +epoch: 0, batch: 22323, sum loss: 5807.823242, avg loss: 3.226568, ppl: 25.193058 +epoch: 0, batch: 22324, sum loss: 4451.823730, avg loss: 2.777183, ppl: 16.073671 +epoch: 0, batch: 22325, sum loss: 5180.086426, avg loss: 3.085221, ppl: 21.872301 +epoch: 0, batch: 22326, sum loss: 5509.047363, avg loss: 3.240616, ppl: 25.549463 +epoch: 0, batch: 22327, sum loss: 5145.309570, avg loss: 2.830203, ppl: 16.948906 +epoch: 0, batch: 22328, sum loss: 5153.220215, avg loss: 2.999546, ppl: 20.076422 +epoch: 0, batch: 22329, sum loss: 5352.969727, avg loss: 3.416063, ppl: 30.449287 +epoch: 0, batch: 22330, sum loss: 4783.638184, avg loss: 3.199758, ppl: 24.526590 +epoch: 0, batch: 22331, sum loss: 5685.312012, avg loss: 3.332539, ppl: 28.009369 +epoch: 0, batch: 22332, sum loss: 5272.787598, avg loss: 2.862534, ppl: 17.505831 +epoch: 0, batch: 22333, sum loss: 5642.132812, avg loss: 3.148512, ppl: 23.301357 +epoch: 0, batch: 22334, sum loss: 5467.498047, avg loss: 2.941096, ppl: 18.936596 +epoch: 0, batch: 22335, sum loss: 4942.378418, avg loss: 3.152027, ppl: 23.383411 +epoch: 0, batch: 22336, sum loss: 5268.200684, avg loss: 3.104420, ppl: 22.296282 +epoch: 0, batch: 22337, sum loss: 5958.245117, avg loss: 3.308298, ppl: 27.338558 +epoch: 0, batch: 22338, sum loss: 4806.594727, avg loss: 3.083127, ppl: 21.826548 +epoch: 0, batch: 22339, sum loss: 5399.287598, avg loss: 3.090605, ppl: 21.990383 +epoch: 0, batch: 22340, sum loss: 4580.224609, avg loss: 3.076041, ppl: 21.672426 +epoch: 0, batch: 22341, sum loss: 4852.873047, avg loss: 2.868128, ppl: 17.604038 +epoch: 0, batch: 22342, sum loss: 5446.757324, avg loss: 2.984524, ppl: 19.777096 +epoch: 0, batch: 22343, sum loss: 5320.063965, avg loss: 2.953950, ppl: 19.181570 +epoch: 0, batch: 22344, sum loss: 4651.355469, avg loss: 3.113357, ppl: 22.496439 +epoch: 0, batch: 22345, sum loss: 4183.344727, avg loss: 2.732426, ppl: 15.370136 +epoch: 0, batch: 22346, sum loss: 5408.248047, avg loss: 2.979751, ppl: 19.682917 +epoch: 0, batch: 22347, sum loss: 6171.801758, avg loss: 3.455656, ppl: 31.679073 +epoch: 0, batch: 22348, sum loss: 5235.937500, avg loss: 3.220134, ppl: 25.031469 +epoch: 0, batch: 22349, sum loss: 6064.626953, avg loss: 3.180192, ppl: 24.051382 +epoch: 0, batch: 22350, sum loss: 4772.947754, avg loss: 3.026600, ppl: 20.626980 +epoch: 0, batch: 22351, sum loss: 5979.616699, avg loss: 3.452435, ppl: 31.577175 +epoch: 0, batch: 22352, sum loss: 5467.299805, avg loss: 3.235089, ppl: 25.408623 +epoch: 0, batch: 22353, sum loss: 4433.534668, avg loss: 2.977525, ppl: 19.639149 +epoch: 0, batch: 22354, sum loss: 5247.195312, avg loss: 3.081148, ppl: 21.783398 +epoch: 0, batch: 22355, sum loss: 4715.521484, avg loss: 2.947201, ppl: 19.052547 +epoch: 0, batch: 22356, sum loss: 5164.056152, avg loss: 3.249878, ppl: 25.787191 +epoch: 0, batch: 22357, sum loss: 5744.290039, avg loss: 3.260097, ppl: 26.052052 +epoch: 0, batch: 22358, sum loss: 6194.123047, avg loss: 3.292995, ppl: 26.923372 +epoch: 0, batch: 22359, sum loss: 5490.281250, avg loss: 3.130149, ppl: 22.877386 +epoch: 0, batch: 22360, sum loss: 5213.450684, avg loss: 3.068541, ppl: 21.510492 +epoch: 0, batch: 22361, sum loss: 4573.793945, avg loss: 2.962302, ppl: 19.342445 +epoch: 0, batch: 22362, sum loss: 4162.355957, avg loss: 2.933302, ppl: 18.789579 +epoch: 0, batch: 22363, sum loss: 5543.371094, avg loss: 3.303558, ppl: 27.209288 +epoch: 0, batch: 22364, sum loss: 4728.499512, avg loss: 3.033034, ppl: 20.760120 +epoch: 0, batch: 22365, sum loss: 4541.374023, avg loss: 2.977950, ppl: 19.647499 +epoch: 0, batch: 22366, sum loss: 6291.364258, avg loss: 3.172650, ppl: 23.870649 +epoch: 0, batch: 22367, sum loss: 4409.489258, avg loss: 3.160924, ppl: 23.592396 +epoch: 0, batch: 22368, sum loss: 5658.197266, avg loss: 3.274420, ppl: 26.427887 +epoch: 0, batch: 22369, sum loss: 5732.788574, avg loss: 3.115646, ppl: 22.547989 +epoch: 0, batch: 22370, sum loss: 5250.885742, avg loss: 3.061741, ppl: 21.364717 +epoch: 0, batch: 22371, sum loss: 5021.616211, avg loss: 3.052654, ppl: 21.171459 +epoch: 0, batch: 22372, sum loss: 6241.543945, avg loss: 3.442661, ppl: 31.270060 +epoch: 0, batch: 22373, sum loss: 4082.934326, avg loss: 2.863208, ppl: 17.517630 +epoch: 0, batch: 22374, sum loss: 4307.870117, avg loss: 2.558118, ppl: 12.911493 +epoch: 0, batch: 22375, sum loss: 5204.544922, avg loss: 3.029421, ppl: 20.685249 +epoch: 0, batch: 22376, sum loss: 5701.958008, avg loss: 3.336430, ppl: 28.118553 +epoch: 0, batch: 22377, sum loss: 5055.913574, avg loss: 3.031123, ppl: 20.720497 +epoch: 0, batch: 22378, sum loss: 4629.689453, avg loss: 3.138772, ppl: 23.075523 +epoch: 0, batch: 22379, sum loss: 6541.891113, avg loss: 3.163390, ppl: 23.650644 +epoch: 0, batch: 22380, sum loss: 4565.279297, avg loss: 2.993626, ppl: 19.957916 +epoch: 0, batch: 22381, sum loss: 5000.324219, avg loss: 3.082814, ppl: 21.819715 +epoch: 0, batch: 22382, sum loss: 5141.222656, avg loss: 3.095258, ppl: 22.092928 +epoch: 0, batch: 22383, sum loss: 5829.873047, avg loss: 3.109266, ppl: 22.404583 +epoch: 0, batch: 22384, sum loss: 4712.061523, avg loss: 2.941362, ppl: 18.941622 +epoch: 0, batch: 22385, sum loss: 5985.938477, avg loss: 3.175564, ppl: 23.940325 +epoch: 0, batch: 22386, sum loss: 5257.643555, avg loss: 3.094552, ppl: 22.077347 +epoch: 0, batch: 22387, sum loss: 5403.025879, avg loss: 3.260728, ppl: 26.068499 +epoch: 0, batch: 22388, sum loss: 5427.984375, avg loss: 3.301694, ppl: 27.158598 +epoch: 0, batch: 22389, sum loss: 5208.807129, avg loss: 3.074857, ppl: 21.646776 +epoch: 0, batch: 22390, sum loss: 6210.649902, avg loss: 3.234714, ppl: 25.399096 +epoch: 0, batch: 22391, sum loss: 5290.901367, avg loss: 3.179628, ppl: 24.037819 +epoch: 0, batch: 22392, sum loss: 5379.852539, avg loss: 3.375064, ppl: 29.226166 +epoch: 0, batch: 22393, sum loss: 5233.922852, avg loss: 3.172075, ppl: 23.856926 +epoch: 0, batch: 22394, sum loss: 5740.176758, avg loss: 3.354867, ppl: 28.641787 +epoch: 0, batch: 22395, sum loss: 5705.795410, avg loss: 3.090897, ppl: 21.996798 +epoch: 0, batch: 22396, sum loss: 5226.371094, avg loss: 3.335272, ppl: 28.086023 +epoch: 0, batch: 22397, sum loss: 6228.712891, avg loss: 3.060792, ppl: 21.344465 +epoch: 0, batch: 22398, sum loss: 5762.633789, avg loss: 3.096525, ppl: 22.120956 +epoch: 0, batch: 22399, sum loss: 4878.743652, avg loss: 2.955024, ppl: 19.202175 +epoch: 0, batch: 22400, sum loss: 4999.584961, avg loss: 3.142417, ppl: 23.159769 +epoch: 0, batch: 22401, sum loss: 4997.766602, avg loss: 2.969558, ppl: 19.483311 +epoch: 0, batch: 22402, sum loss: 5095.944336, avg loss: 2.895423, ppl: 18.091150 +epoch: 0, batch: 22403, sum loss: 5263.210938, avg loss: 3.037052, ppl: 20.843699 +epoch: 0, batch: 22404, sum loss: 5026.198730, avg loss: 3.255310, ppl: 25.927658 +epoch: 0, batch: 22405, sum loss: 5004.702637, avg loss: 2.993243, ppl: 19.950285 +epoch: 0, batch: 22406, sum loss: 3894.297363, avg loss: 2.844629, ppl: 17.195183 +epoch: 0, batch: 22407, sum loss: 4634.644531, avg loss: 2.995892, ppl: 20.003191 +epoch: 0, batch: 22408, sum loss: 5891.849609, avg loss: 3.364849, ppl: 28.929117 +epoch: 0, batch: 22409, sum loss: 5852.392578, avg loss: 3.206790, ppl: 24.699684 +epoch: 0, batch: 22410, sum loss: 5517.698242, avg loss: 3.171091, ppl: 23.833469 +epoch: 0, batch: 22411, sum loss: 5319.741211, avg loss: 3.247705, ppl: 25.731218 +epoch: 0, batch: 22412, sum loss: 4112.904785, avg loss: 2.740110, ppl: 15.488688 +epoch: 0, batch: 22413, sum loss: 6164.396484, avg loss: 3.588124, ppl: 36.166157 +epoch: 0, batch: 22414, sum loss: 6272.291016, avg loss: 3.325711, ppl: 27.818777 +epoch: 0, batch: 22415, sum loss: 5347.031738, avg loss: 3.262374, ppl: 26.111464 +epoch: 0, batch: 22416, sum loss: 5452.260254, avg loss: 3.120927, ppl: 22.667391 +epoch: 0, batch: 22417, sum loss: 5778.226562, avg loss: 3.292437, ppl: 26.908348 +epoch: 0, batch: 22418, sum loss: 5256.437500, avg loss: 3.176095, ppl: 23.953039 +epoch: 0, batch: 22419, sum loss: 5448.229492, avg loss: 3.218092, ppl: 24.980412 +epoch: 0, batch: 22420, sum loss: 5477.772949, avg loss: 2.946623, ppl: 19.041548 +epoch: 0, batch: 22421, sum loss: 5629.007812, avg loss: 3.122023, ppl: 22.692242 +epoch: 0, batch: 22422, sum loss: 4728.363770, avg loss: 2.927779, ppl: 18.686090 +epoch: 0, batch: 22423, sum loss: 4721.886230, avg loss: 3.135383, ppl: 22.997435 +epoch: 0, batch: 22424, sum loss: 5149.474121, avg loss: 3.072479, ppl: 21.595366 +epoch: 0, batch: 22425, sum loss: 4415.333496, avg loss: 2.895301, ppl: 18.088938 +epoch: 0, batch: 22426, sum loss: 6817.349121, avg loss: 3.499666, ppl: 33.104393 +epoch: 0, batch: 22427, sum loss: 5620.945801, avg loss: 3.277519, ppl: 26.509926 +epoch: 0, batch: 22428, sum loss: 5307.477539, avg loss: 3.050274, ppl: 21.121140 +epoch: 0, batch: 22429, sum loss: 5887.869141, avg loss: 3.229769, ppl: 25.273825 +epoch: 0, batch: 22430, sum loss: 5620.331055, avg loss: 3.309971, ppl: 27.384340 +epoch: 0, batch: 22431, sum loss: 5614.196777, avg loss: 3.086419, ppl: 21.898527 +epoch: 0, batch: 22432, sum loss: 4722.825684, avg loss: 3.043058, ppl: 20.969267 +epoch: 0, batch: 22433, sum loss: 5872.193359, avg loss: 3.060028, ppl: 21.328146 +epoch: 0, batch: 22434, sum loss: 6282.830566, avg loss: 3.682785, ppl: 39.756947 +epoch: 0, batch: 22435, sum loss: 5682.867188, avg loss: 3.174786, ppl: 23.921701 +epoch: 0, batch: 22436, sum loss: 4836.270996, avg loss: 3.181757, ppl: 24.089052 +epoch: 0, batch: 22437, sum loss: 4548.727051, avg loss: 2.949888, ppl: 19.103809 +epoch: 0, batch: 22438, sum loss: 4279.078125, avg loss: 3.017686, ppl: 20.443922 +epoch: 0, batch: 22439, sum loss: 5436.875977, avg loss: 3.185047, ppl: 24.168434 +epoch: 0, batch: 22440, sum loss: 5510.678223, avg loss: 3.422781, ppl: 30.654562 +epoch: 0, batch: 22441, sum loss: 5207.314941, avg loss: 3.055936, ppl: 21.241060 +epoch: 0, batch: 22442, sum loss: 6258.498047, avg loss: 3.636547, ppl: 37.960545 +epoch: 0, batch: 22443, sum loss: 4761.239746, avg loss: 2.964657, ppl: 19.388058 +epoch: 0, batch: 22444, sum loss: 5057.353516, avg loss: 3.106483, ppl: 22.342318 +epoch: 0, batch: 22445, sum loss: 4309.040039, avg loss: 3.051728, ppl: 21.151863 +epoch: 0, batch: 22446, sum loss: 5557.300781, avg loss: 3.290291, ppl: 26.850666 +epoch: 0, batch: 22447, sum loss: 4921.571777, avg loss: 3.164998, ppl: 23.688698 +epoch: 0, batch: 22448, sum loss: 5305.040039, avg loss: 3.165298, ppl: 23.695816 +epoch: 0, batch: 22449, sum loss: 5214.281250, avg loss: 3.096367, ppl: 22.117445 +epoch: 0, batch: 22450, sum loss: 6248.695801, avg loss: 3.167104, ppl: 23.738638 +epoch: 0, batch: 22451, sum loss: 5317.854980, avg loss: 3.262488, ppl: 26.114420 +epoch: 0, batch: 22452, sum loss: 5933.404297, avg loss: 3.355998, ppl: 28.674208 +epoch: 0, batch: 22453, sum loss: 4987.156250, avg loss: 3.236312, ppl: 25.439718 +epoch: 0, batch: 22454, sum loss: 4517.455078, avg loss: 3.245298, ppl: 25.669363 +epoch: 0, batch: 22455, sum loss: 5349.209961, avg loss: 3.154015, ppl: 23.429955 +epoch: 0, batch: 22456, sum loss: 5638.923828, avg loss: 3.106845, ppl: 22.350420 +epoch: 0, batch: 22457, sum loss: 5234.541016, avg loss: 3.045108, ppl: 21.012306 +epoch: 0, batch: 22458, sum loss: 4858.167480, avg loss: 2.849365, ppl: 17.276812 +epoch: 0, batch: 22459, sum loss: 5103.943848, avg loss: 2.974326, ppl: 19.576431 +epoch: 0, batch: 22460, sum loss: 6855.814941, avg loss: 3.400702, ppl: 29.985142 +epoch: 0, batch: 22461, sum loss: 6072.998047, avg loss: 3.394633, ppl: 29.803707 +epoch: 0, batch: 22462, sum loss: 4364.331543, avg loss: 2.995423, ppl: 19.993816 +epoch: 0, batch: 22463, sum loss: 4693.999023, avg loss: 3.034259, ppl: 20.785576 +epoch: 0, batch: 22464, sum loss: 4913.350586, avg loss: 3.001436, ppl: 20.114401 +epoch: 0, batch: 22465, sum loss: 4725.168457, avg loss: 3.106620, ppl: 22.345385 +epoch: 0, batch: 22466, sum loss: 5475.642578, avg loss: 3.189075, ppl: 24.265982 +epoch: 0, batch: 22467, sum loss: 6613.024414, avg loss: 3.298266, ppl: 27.065678 +epoch: 0, batch: 22468, sum loss: 5421.666992, avg loss: 3.071766, ppl: 21.579983 +epoch: 0, batch: 22469, sum loss: 5290.527344, avg loss: 3.106593, ppl: 22.344778 +epoch: 0, batch: 22470, sum loss: 5528.645996, avg loss: 2.983619, ppl: 19.759195 +epoch: 0, batch: 22471, sum loss: 5100.905762, avg loss: 3.276112, ppl: 26.472635 +epoch: 0, batch: 22472, sum loss: 5241.067383, avg loss: 3.327662, ppl: 27.873091 +epoch: 0, batch: 22473, sum loss: 5053.433594, avg loss: 3.081362, ppl: 21.788057 +epoch: 0, batch: 22474, sum loss: 4568.134277, avg loss: 2.842647, ppl: 17.161137 +epoch: 0, batch: 22475, sum loss: 4935.474609, avg loss: 3.343817, ppl: 28.327059 +epoch: 0, batch: 22476, sum loss: 5404.998535, avg loss: 3.151603, ppl: 23.373491 +epoch: 0, batch: 22477, sum loss: 4470.328125, avg loss: 3.104395, ppl: 22.295719 +epoch: 0, batch: 22478, sum loss: 5291.793945, avg loss: 2.846581, ppl: 17.228771 +epoch: 0, batch: 22479, sum loss: 5013.639160, avg loss: 3.112129, ppl: 22.468824 +epoch: 0, batch: 22480, sum loss: 4930.520020, avg loss: 3.106818, ppl: 22.349813 +epoch: 0, batch: 22481, sum loss: 4992.001953, avg loss: 2.943398, ppl: 18.980223 +epoch: 0, batch: 22482, sum loss: 5383.927246, avg loss: 3.257064, ppl: 25.973171 +epoch: 0, batch: 22483, sum loss: 4518.207520, avg loss: 2.939628, ppl: 18.908804 +epoch: 0, batch: 22484, sum loss: 5296.992188, avg loss: 3.085027, ppl: 21.868067 +epoch: 0, batch: 22485, sum loss: 5623.931152, avg loss: 3.145375, ppl: 23.228395 +epoch: 0, batch: 22486, sum loss: 5515.373047, avg loss: 3.033759, ppl: 20.775173 +epoch: 0, batch: 22487, sum loss: 5108.351562, avg loss: 3.132036, ppl: 22.920609 +epoch: 0, batch: 22488, sum loss: 5096.867676, avg loss: 3.021261, ppl: 20.517157 +epoch: 0, batch: 22489, sum loss: 4802.128906, avg loss: 2.919227, ppl: 18.526962 +epoch: 0, batch: 22490, sum loss: 6191.563477, avg loss: 3.407575, ppl: 30.191936 +epoch: 0, batch: 22491, sum loss: 6410.333496, avg loss: 3.250676, ppl: 25.807785 +epoch: 0, batch: 22492, sum loss: 5718.160645, avg loss: 3.230599, ppl: 25.294815 +epoch: 0, batch: 22493, sum loss: 6593.042969, avg loss: 3.365515, ppl: 28.948395 +epoch: 0, batch: 22494, sum loss: 5476.652832, avg loss: 3.174871, ppl: 23.923744 +epoch: 0, batch: 22495, sum loss: 5736.697754, avg loss: 3.360690, ppl: 28.809059 +epoch: 0, batch: 22496, sum loss: 4349.257324, avg loss: 2.831548, ppl: 16.971708 +epoch: 0, batch: 22497, sum loss: 4738.078613, avg loss: 3.019808, ppl: 20.487354 +epoch: 0, batch: 22498, sum loss: 5373.190430, avg loss: 2.867231, ppl: 17.588243 +epoch: 0, batch: 22499, sum loss: 4531.069336, avg loss: 2.975095, ppl: 19.591480 +epoch: 0, batch: 22500, sum loss: 6423.983398, avg loss: 3.347568, ppl: 28.433500 +epoch: 0, batch: 22501, sum loss: 5621.346191, avg loss: 3.238103, ppl: 25.485321 +epoch: 0, batch: 22502, sum loss: 4585.502930, avg loss: 2.924428, ppl: 18.623566 +epoch: 0, batch: 22503, sum loss: 5571.542480, avg loss: 3.364458, ppl: 28.917822 +epoch: 0, batch: 22504, sum loss: 5854.524414, avg loss: 3.259758, ppl: 26.043221 +epoch: 0, batch: 22505, sum loss: 5355.613770, avg loss: 3.253715, ppl: 25.886318 +epoch: 0, batch: 22506, sum loss: 4616.159668, avg loss: 3.005312, ppl: 20.192524 +epoch: 0, batch: 22507, sum loss: 4516.694824, avg loss: 3.083068, ppl: 21.825262 +epoch: 0, batch: 22508, sum loss: 5753.728516, avg loss: 3.271023, ppl: 26.338259 +epoch: 0, batch: 22509, sum loss: 5857.912109, avg loss: 3.279906, ppl: 26.573277 +epoch: 0, batch: 22510, sum loss: 5012.639160, avg loss: 3.094222, ppl: 22.070057 +epoch: 0, batch: 22511, sum loss: 5509.643066, avg loss: 3.054126, ppl: 21.202642 +epoch: 0, batch: 22512, sum loss: 5270.279297, avg loss: 3.213585, ppl: 24.868076 +epoch: 0, batch: 22513, sum loss: 6510.617676, avg loss: 3.359452, ppl: 28.773418 +epoch: 0, batch: 22514, sum loss: 5627.002930, avg loss: 3.226492, ppl: 25.191141 +epoch: 0, batch: 22515, sum loss: 5595.146973, avg loss: 3.256779, ppl: 25.965778 +epoch: 0, batch: 22516, sum loss: 5697.118652, avg loss: 3.215078, ppl: 24.905243 +epoch: 0, batch: 22517, sum loss: 5670.314453, avg loss: 3.146678, ppl: 23.258680 +epoch: 0, batch: 22518, sum loss: 4805.404785, avg loss: 2.907081, ppl: 18.303291 +epoch: 0, batch: 22519, sum loss: 5375.270020, avg loss: 3.236165, ppl: 25.435989 +epoch: 0, batch: 22520, sum loss: 4746.996582, avg loss: 3.240271, ppl: 25.540632 +epoch: 0, batch: 22521, sum loss: 4615.506836, avg loss: 2.727841, ppl: 15.299817 +epoch: 0, batch: 22522, sum loss: 6917.354492, avg loss: 3.644550, ppl: 38.265533 +epoch: 0, batch: 22523, sum loss: 5857.907715, avg loss: 3.154500, ppl: 23.441324 +epoch: 0, batch: 22524, sum loss: 4928.857910, avg loss: 3.186075, ppl: 24.193281 +epoch: 0, batch: 22525, sum loss: 4619.739258, avg loss: 3.096340, ppl: 22.116859 +epoch: 0, batch: 22526, sum loss: 5339.817871, avg loss: 2.984806, ppl: 19.782661 +epoch: 0, batch: 22527, sum loss: 5619.229492, avg loss: 3.155098, ppl: 23.455336 +epoch: 0, batch: 22528, sum loss: 5156.072754, avg loss: 3.128685, ppl: 22.843920 +epoch: 0, batch: 22529, sum loss: 4870.846680, avg loss: 3.010412, ppl: 20.295759 +epoch: 0, batch: 22530, sum loss: 5095.294434, avg loss: 3.178599, ppl: 24.013090 +epoch: 0, batch: 22531, sum loss: 5450.687012, avg loss: 3.402426, ppl: 30.036888 +epoch: 0, batch: 22532, sum loss: 7006.373535, avg loss: 3.422752, ppl: 30.653662 +epoch: 0, batch: 22533, sum loss: 4290.015137, avg loss: 2.916394, ppl: 18.474543 +epoch: 0, batch: 22534, sum loss: 5951.138672, avg loss: 3.271654, ppl: 26.354898 +epoch: 0, batch: 22535, sum loss: 4348.721680, avg loss: 2.853492, ppl: 17.348257 +epoch: 0, batch: 22536, sum loss: 5453.377930, avg loss: 3.049988, ppl: 21.115082 +epoch: 0, batch: 22537, sum loss: 5253.605469, avg loss: 3.314578, ppl: 27.510771 +epoch: 0, batch: 22538, sum loss: 5737.686523, avg loss: 3.063367, ppl: 21.399490 +epoch: 0, batch: 22539, sum loss: 6023.049316, avg loss: 3.175039, ppl: 23.927759 +epoch: 0, batch: 22540, sum loss: 4238.145508, avg loss: 2.873319, ppl: 17.695652 +epoch: 0, batch: 22541, sum loss: 4675.811035, avg loss: 2.976328, ppl: 19.615654 +epoch: 0, batch: 22542, sum loss: 6186.496094, avg loss: 3.268091, ppl: 26.261152 +epoch: 0, batch: 22543, sum loss: 6737.944336, avg loss: 3.535123, ppl: 34.299221 +epoch: 0, batch: 22544, sum loss: 6184.218262, avg loss: 3.344629, ppl: 28.350044 +epoch: 0, batch: 22545, sum loss: 4719.738281, avg loss: 2.836381, ppl: 17.053940 +epoch: 0, batch: 22546, sum loss: 5672.178711, avg loss: 3.267384, ppl: 26.242599 +epoch: 0, batch: 22547, sum loss: 5359.644043, avg loss: 3.130633, ppl: 22.888466 +epoch: 0, batch: 22548, sum loss: 6334.025391, avg loss: 3.203857, ppl: 24.627333 +epoch: 0, batch: 22549, sum loss: 5263.417969, avg loss: 2.943746, ppl: 18.986839 +epoch: 0, batch: 22550, sum loss: 5720.150391, avg loss: 3.202772, ppl: 24.600626 +epoch: 0, batch: 22551, sum loss: 5213.655273, avg loss: 3.101520, ppl: 22.231720 +epoch: 0, batch: 22552, sum loss: 4949.446777, avg loss: 3.114819, ppl: 22.529343 +epoch: 0, batch: 22553, sum loss: 4805.724609, avg loss: 2.994221, ppl: 19.969791 +epoch: 0, batch: 22554, sum loss: 5174.644531, avg loss: 3.045700, ppl: 21.024750 +epoch: 0, batch: 22555, sum loss: 6044.436523, avg loss: 3.256701, ppl: 25.963736 +epoch: 0, batch: 22556, sum loss: 5305.673828, avg loss: 3.295449, ppl: 26.989544 +epoch: 0, batch: 22557, sum loss: 5030.955566, avg loss: 3.019781, ppl: 20.486807 +epoch: 0, batch: 22558, sum loss: 6874.047852, avg loss: 3.369631, ppl: 29.067801 +epoch: 0, batch: 22559, sum loss: 5767.766113, avg loss: 3.233053, ppl: 25.356953 +epoch: 0, batch: 22560, sum loss: 6832.482422, avg loss: 3.169055, ppl: 23.784996 +epoch: 0, batch: 22561, sum loss: 5237.155762, avg loss: 3.304199, ppl: 27.226730 +epoch: 0, batch: 22562, sum loss: 5222.715820, avg loss: 3.210028, ppl: 24.779785 +epoch: 0, batch: 22563, sum loss: 4822.017578, avg loss: 2.897847, ppl: 18.135061 +epoch: 0, batch: 22564, sum loss: 5936.149414, avg loss: 3.238488, ppl: 25.495155 +epoch: 0, batch: 22565, sum loss: 5388.442383, avg loss: 2.993579, ppl: 19.956984 +epoch: 0, batch: 22566, sum loss: 5052.734863, avg loss: 3.047488, ppl: 21.062368 +epoch: 0, batch: 22567, sum loss: 5160.960938, avg loss: 3.012820, ppl: 20.344690 +epoch: 0, batch: 22568, sum loss: 5126.486328, avg loss: 2.992695, ppl: 19.939344 +epoch: 0, batch: 22569, sum loss: 4735.633789, avg loss: 3.049346, ppl: 21.101534 +epoch: 0, batch: 22570, sum loss: 5376.421387, avg loss: 3.134939, ppl: 22.987244 +epoch: 0, batch: 22571, sum loss: 5533.534668, avg loss: 3.355691, ppl: 28.665411 +epoch: 0, batch: 22572, sum loss: 5356.812012, avg loss: 3.001015, ppl: 20.105938 +epoch: 0, batch: 22573, sum loss: 4627.500488, avg loss: 3.213542, ppl: 24.867008 +epoch: 0, batch: 22574, sum loss: 5063.039551, avg loss: 3.174320, ppl: 23.910543 +epoch: 0, batch: 22575, sum loss: 4455.020508, avg loss: 2.997995, ppl: 20.045303 +epoch: 0, batch: 22576, sum loss: 5309.725098, avg loss: 3.060360, ppl: 21.335245 +epoch: 0, batch: 22577, sum loss: 5688.354492, avg loss: 3.192118, ppl: 24.339930 +epoch: 0, batch: 22578, sum loss: 5451.543945, avg loss: 3.043855, ppl: 20.985983 +epoch: 0, batch: 22579, sum loss: 5116.812988, avg loss: 2.937321, ppl: 18.865232 +epoch: 0, batch: 22580, sum loss: 5434.223633, avg loss: 3.152102, ppl: 23.385168 +epoch: 0, batch: 22581, sum loss: 5298.429688, avg loss: 3.222889, ppl: 25.100536 +epoch: 0, batch: 22582, sum loss: 6011.585938, avg loss: 3.240747, ppl: 25.552809 +epoch: 0, batch: 22583, sum loss: 6346.424316, avg loss: 3.494727, ppl: 32.941292 +epoch: 0, batch: 22584, sum loss: 4795.083984, avg loss: 3.069836, ppl: 21.538374 +epoch: 0, batch: 22585, sum loss: 5215.108398, avg loss: 3.095020, ppl: 22.087677 +epoch: 0, batch: 22586, sum loss: 4215.886230, avg loss: 2.923638, ppl: 18.608870 +epoch: 0, batch: 22587, sum loss: 5438.815430, avg loss: 3.116800, ppl: 22.574018 +epoch: 0, batch: 22588, sum loss: 6237.913086, avg loss: 3.248913, ppl: 25.762329 +epoch: 0, batch: 22589, sum loss: 5122.132812, avg loss: 3.171599, ppl: 23.845592 +epoch: 0, batch: 22590, sum loss: 4439.900391, avg loss: 2.967848, ppl: 19.450014 +epoch: 0, batch: 22591, sum loss: 4289.083984, avg loss: 3.087893, ppl: 21.930826 +epoch: 0, batch: 22592, sum loss: 3931.804443, avg loss: 3.045549, ppl: 21.021578 +epoch: 0, batch: 22593, sum loss: 6922.912598, avg loss: 3.613211, ppl: 37.084946 +epoch: 0, batch: 22594, sum loss: 4684.965820, avg loss: 3.213282, ppl: 24.860554 +epoch: 0, batch: 22595, sum loss: 4686.514160, avg loss: 3.041216, ppl: 20.930683 +epoch: 0, batch: 22596, sum loss: 6142.890625, avg loss: 3.158299, ppl: 23.530531 +epoch: 0, batch: 22597, sum loss: 5620.504395, avg loss: 3.072993, ppl: 21.606480 +epoch: 0, batch: 22598, sum loss: 5313.282227, avg loss: 3.324958, ppl: 27.797821 +epoch: 0, batch: 22599, sum loss: 5891.286133, avg loss: 3.260258, ppl: 26.056252 +epoch: 0, batch: 22600, sum loss: 5469.803223, avg loss: 3.193113, ppl: 24.364162 +epoch: 0, batch: 22601, sum loss: 4856.202637, avg loss: 3.110956, ppl: 22.442493 +epoch: 0, batch: 22602, sum loss: 5342.456055, avg loss: 3.178142, ppl: 24.002106 +epoch: 0, batch: 22603, sum loss: 5523.152344, avg loss: 3.106385, ppl: 22.340139 +epoch: 0, batch: 22604, sum loss: 5229.772461, avg loss: 3.148569, ppl: 23.302687 +epoch: 0, batch: 22605, sum loss: 5630.767578, avg loss: 3.306381, ppl: 27.286211 +epoch: 0, batch: 22606, sum loss: 4900.561523, avg loss: 3.099659, ppl: 22.190388 +epoch: 0, batch: 22607, sum loss: 5582.037598, avg loss: 3.254832, ppl: 25.915268 +epoch: 0, batch: 22608, sum loss: 5893.473145, avg loss: 3.404664, ppl: 30.104174 +epoch: 0, batch: 22609, sum loss: 5232.046387, avg loss: 3.105072, ppl: 22.310825 +epoch: 0, batch: 22610, sum loss: 6467.658203, avg loss: 3.200227, ppl: 24.538101 +epoch: 0, batch: 22611, sum loss: 5075.816406, avg loss: 3.104475, ppl: 22.297504 +epoch: 0, batch: 22612, sum loss: 4659.291016, avg loss: 3.091766, ppl: 22.015921 +epoch: 0, batch: 22613, sum loss: 4685.502930, avg loss: 2.948712, ppl: 19.081354 +epoch: 0, batch: 22614, sum loss: 4807.490723, avg loss: 2.885648, ppl: 17.915182 +epoch: 0, batch: 22615, sum loss: 6066.117188, avg loss: 3.344056, ppl: 28.333813 +epoch: 0, batch: 22616, sum loss: 5619.258301, avg loss: 3.231316, ppl: 25.312943 +epoch: 0, batch: 22617, sum loss: 6275.237793, avg loss: 3.147060, ppl: 23.267561 +epoch: 0, batch: 22618, sum loss: 4973.535156, avg loss: 3.175948, ppl: 23.949522 +epoch: 0, batch: 22619, sum loss: 4990.928223, avg loss: 3.119330, ppl: 22.631216 +epoch: 0, batch: 22620, sum loss: 6333.314941, avg loss: 3.462720, ppl: 31.903641 +epoch: 0, batch: 22621, sum loss: 6432.720215, avg loss: 3.326122, ppl: 27.830208 +epoch: 0, batch: 22622, sum loss: 4941.332031, avg loss: 3.121498, ppl: 22.680338 +epoch: 0, batch: 22623, sum loss: 5900.055176, avg loss: 3.222313, ppl: 25.086081 +epoch: 0, batch: 22624, sum loss: 5955.554688, avg loss: 3.103468, ppl: 22.275061 +epoch: 0, batch: 22625, sum loss: 5024.460449, avg loss: 3.021323, ppl: 20.518423 +epoch: 0, batch: 22626, sum loss: 6184.448730, avg loss: 3.256687, ppl: 25.963383 +epoch: 0, batch: 22627, sum loss: 5474.172852, avg loss: 3.158784, ppl: 23.541950 +epoch: 0, batch: 22628, sum loss: 5821.025391, avg loss: 3.230314, ppl: 25.287590 +epoch: 0, batch: 22629, sum loss: 5651.217773, avg loss: 3.253436, ppl: 25.879105 +epoch: 0, batch: 22630, sum loss: 4815.706055, avg loss: 3.123026, ppl: 22.715010 +epoch: 0, batch: 22631, sum loss: 6295.205078, avg loss: 3.295919, ppl: 27.002216 +epoch: 0, batch: 22632, sum loss: 4781.519531, avg loss: 3.078892, ppl: 21.734312 +epoch: 0, batch: 22633, sum loss: 6084.525391, avg loss: 3.175640, ppl: 23.942129 +epoch: 0, batch: 22634, sum loss: 5814.271484, avg loss: 3.119244, ppl: 22.629272 +epoch: 0, batch: 22635, sum loss: 4953.307129, avg loss: 3.067063, ppl: 21.478735 +epoch: 0, batch: 22636, sum loss: 5368.802246, avg loss: 3.059146, ppl: 21.309361 +epoch: 0, batch: 22637, sum loss: 5547.992676, avg loss: 3.071978, ppl: 21.584562 +epoch: 0, batch: 22638, sum loss: 4513.755859, avg loss: 2.997182, ppl: 20.029013 +epoch: 0, batch: 22639, sum loss: 5272.858398, avg loss: 3.153624, ppl: 23.420778 +epoch: 0, batch: 22640, sum loss: 6022.787109, avg loss: 3.393119, ppl: 29.758635 +epoch: 0, batch: 22641, sum loss: 5542.654785, avg loss: 3.069023, ppl: 21.520859 +epoch: 0, batch: 22642, sum loss: 6260.970703, avg loss: 3.364305, ppl: 28.913382 +epoch: 0, batch: 22643, sum loss: 4952.922852, avg loss: 3.290979, ppl: 26.869154 +epoch: 0, batch: 22644, sum loss: 5516.216797, avg loss: 3.345189, ppl: 28.365938 +epoch: 0, batch: 22645, sum loss: 5264.035156, avg loss: 3.231452, ppl: 25.316383 +epoch: 0, batch: 22646, sum loss: 4709.013672, avg loss: 3.188229, ppl: 24.245447 +epoch: 0, batch: 22647, sum loss: 5289.365723, avg loss: 3.087779, ppl: 21.928322 +epoch: 0, batch: 22648, sum loss: 4912.022949, avg loss: 3.136668, ppl: 23.027023 +epoch: 0, batch: 22649, sum loss: 4776.080566, avg loss: 2.894594, ppl: 18.076164 +epoch: 0, batch: 22650, sum loss: 4753.627930, avg loss: 3.062905, ppl: 21.389601 +epoch: 0, batch: 22651, sum loss: 6174.764160, avg loss: 3.222737, ppl: 25.096718 +epoch: 0, batch: 22652, sum loss: 4723.361328, avg loss: 3.103391, ppl: 22.273361 +epoch: 0, batch: 22653, sum loss: 5465.443359, avg loss: 2.981693, ppl: 19.721176 +epoch: 0, batch: 22654, sum loss: 5261.449707, avg loss: 3.060762, ppl: 21.343813 +epoch: 0, batch: 22655, sum loss: 5738.030273, avg loss: 3.375312, ppl: 29.233406 +epoch: 0, batch: 22656, sum loss: 5906.276855, avg loss: 3.163512, ppl: 23.653521 +epoch: 0, batch: 22657, sum loss: 4690.493652, avg loss: 3.114538, ppl: 22.523016 +epoch: 0, batch: 22658, sum loss: 5568.112305, avg loss: 3.107206, ppl: 22.358479 +epoch: 0, batch: 22659, sum loss: 5113.494629, avg loss: 2.960912, ppl: 19.315575 +epoch: 0, batch: 22660, sum loss: 5312.621582, avg loss: 3.032318, ppl: 20.745272 +epoch: 0, batch: 22661, sum loss: 5029.731445, avg loss: 3.099034, ppl: 22.176510 +epoch: 0, batch: 22662, sum loss: 4980.387207, avg loss: 3.229823, ppl: 25.275187 +epoch: 0, batch: 22663, sum loss: 6597.353516, avg loss: 3.384994, ppl: 29.517824 +epoch: 0, batch: 22664, sum loss: 5092.960449, avg loss: 2.964471, ppl: 19.384439 +epoch: 0, batch: 22665, sum loss: 5105.922852, avg loss: 3.050133, ppl: 21.118158 +epoch: 0, batch: 22666, sum loss: 5326.790527, avg loss: 3.199274, ppl: 24.514715 +epoch: 0, batch: 22667, sum loss: 5919.736816, avg loss: 3.236598, ppl: 25.446999 +epoch: 0, batch: 22668, sum loss: 5753.061523, avg loss: 3.248482, ppl: 25.751219 +epoch: 0, batch: 22669, sum loss: 4625.740234, avg loss: 2.965218, ppl: 19.398937 +epoch: 0, batch: 22670, sum loss: 4893.406738, avg loss: 2.989253, ppl: 19.870829 +epoch: 0, batch: 22671, sum loss: 4869.538086, avg loss: 3.103593, ppl: 22.277861 +epoch: 0, batch: 22672, sum loss: 5007.752441, avg loss: 3.120095, ppl: 22.648525 +epoch: 0, batch: 22673, sum loss: 5113.044434, avg loss: 3.089453, ppl: 21.965059 +epoch: 0, batch: 22674, sum loss: 4893.422363, avg loss: 3.046963, ppl: 21.051310 +epoch: 0, batch: 22675, sum loss: 4729.300293, avg loss: 3.037444, ppl: 20.851879 +epoch: 0, batch: 22676, sum loss: 5197.380371, avg loss: 3.066301, ppl: 21.462368 +epoch: 0, batch: 22677, sum loss: 6976.682617, avg loss: 3.350952, ppl: 28.529892 +epoch: 0, batch: 22678, sum loss: 4947.602539, avg loss: 3.046553, ppl: 21.042688 +epoch: 0, batch: 22679, sum loss: 5351.345215, avg loss: 3.327951, ppl: 27.881153 +epoch: 0, batch: 22680, sum loss: 6368.158203, avg loss: 3.234210, ppl: 25.386297 +epoch: 0, batch: 22681, sum loss: 5207.895996, avg loss: 3.392766, ppl: 29.748121 +epoch: 0, batch: 22682, sum loss: 5404.725586, avg loss: 3.217099, ppl: 24.955612 +epoch: 0, batch: 22683, sum loss: 5647.208984, avg loss: 3.435042, ppl: 31.032713 +epoch: 0, batch: 22684, sum loss: 4918.312500, avg loss: 3.047282, ppl: 21.058025 +epoch: 0, batch: 22685, sum loss: 5126.660156, avg loss: 3.222288, ppl: 25.085447 +epoch: 0, batch: 22686, sum loss: 4732.744629, avg loss: 3.234959, ppl: 25.405327 +epoch: 0, batch: 22687, sum loss: 4614.088867, avg loss: 3.031596, ppl: 20.730291 +epoch: 0, batch: 22688, sum loss: 5564.967285, avg loss: 3.287045, ppl: 26.763666 +epoch: 0, batch: 22689, sum loss: 5005.129883, avg loss: 3.126252, ppl: 22.788418 +epoch: 0, batch: 22690, sum loss: 4370.581543, avg loss: 3.012117, ppl: 20.330393 +epoch: 0, batch: 22691, sum loss: 4680.737305, avg loss: 3.120492, ppl: 22.657513 +epoch: 0, batch: 22692, sum loss: 5332.442871, avg loss: 3.267428, ppl: 26.243765 +epoch: 0, batch: 22693, sum loss: 5324.515137, avg loss: 3.296913, ppl: 27.029081 +epoch: 0, batch: 22694, sum loss: 6281.596680, avg loss: 3.307845, ppl: 27.326164 +epoch: 0, batch: 22695, sum loss: 5403.027344, avg loss: 3.195167, ppl: 24.414251 +epoch: 0, batch: 22696, sum loss: 6106.719727, avg loss: 3.128443, ppl: 22.838387 +epoch: 0, batch: 22697, sum loss: 5693.403320, avg loss: 3.184230, ppl: 24.148689 +epoch: 0, batch: 22698, sum loss: 4362.251953, avg loss: 2.915944, ppl: 18.466234 +epoch: 0, batch: 22699, sum loss: 6174.836914, avg loss: 3.352246, ppl: 28.566824 +epoch: 0, batch: 22700, sum loss: 5021.165039, avg loss: 2.976387, ppl: 19.616817 +epoch: 0, batch: 22701, sum loss: 6020.464844, avg loss: 3.252547, ppl: 25.856119 +epoch: 0, batch: 22702, sum loss: 5108.402832, avg loss: 3.094127, ppl: 22.067959 +epoch: 0, batch: 22703, sum loss: 5737.449707, avg loss: 3.423300, ppl: 30.670446 +epoch: 0, batch: 22704, sum loss: 5426.310059, avg loss: 3.235724, ppl: 25.424784 +epoch: 0, batch: 22705, sum loss: 4284.040527, avg loss: 2.842761, ppl: 17.163084 +epoch: 0, batch: 22706, sum loss: 6174.061523, avg loss: 3.409200, ppl: 30.241055 +epoch: 0, batch: 22707, sum loss: 5195.586914, avg loss: 3.092611, ppl: 22.034542 +epoch: 0, batch: 22708, sum loss: 5127.118164, avg loss: 3.395442, ppl: 29.827848 +epoch: 0, batch: 22709, sum loss: 6000.079590, avg loss: 3.324144, ppl: 27.775209 +epoch: 0, batch: 22710, sum loss: 5053.924805, avg loss: 3.250113, ppl: 25.793242 +epoch: 0, batch: 22711, sum loss: 6591.727539, avg loss: 3.495084, ppl: 32.953049 +epoch: 0, batch: 22712, sum loss: 6357.071289, avg loss: 3.253363, ppl: 25.877216 +epoch: 0, batch: 22713, sum loss: 4714.376953, avg loss: 3.083308, ppl: 21.830488 +epoch: 0, batch: 22714, sum loss: 6919.343750, avg loss: 3.329809, ppl: 27.933012 +epoch: 0, batch: 22715, sum loss: 5014.583008, avg loss: 2.937659, ppl: 18.871607 +epoch: 0, batch: 22716, sum loss: 6071.531250, avg loss: 3.289020, ppl: 26.816572 +epoch: 0, batch: 22717, sum loss: 6838.951660, avg loss: 3.720866, ppl: 41.300144 +epoch: 0, batch: 22718, sum loss: 5053.758789, avg loss: 3.053631, ppl: 21.192150 +epoch: 0, batch: 22719, sum loss: 5866.071289, avg loss: 3.202004, ppl: 24.581741 +epoch: 0, batch: 22720, sum loss: 5515.270508, avg loss: 3.178830, ppl: 24.018644 +epoch: 0, batch: 22721, sum loss: 5928.958496, avg loss: 3.459136, ppl: 31.789492 +epoch: 0, batch: 22722, sum loss: 5515.646484, avg loss: 3.025588, ppl: 20.606113 +epoch: 0, batch: 22723, sum loss: 6565.296387, avg loss: 3.468197, ppl: 32.078838 +epoch: 0, batch: 22724, sum loss: 6040.894531, avg loss: 3.335668, ppl: 28.097149 +epoch: 0, batch: 22725, sum loss: 4947.356934, avg loss: 3.038917, ppl: 20.882616 +epoch: 0, batch: 22726, sum loss: 6202.020020, avg loss: 3.252239, ppl: 25.848156 +epoch: 0, batch: 22727, sum loss: 5561.383301, avg loss: 3.203562, ppl: 24.620066 +epoch: 0, batch: 22728, sum loss: 5149.157715, avg loss: 3.059511, ppl: 21.317139 +epoch: 0, batch: 22729, sum loss: 6142.393066, avg loss: 3.439190, ppl: 31.161705 +epoch: 0, batch: 22730, sum loss: 4200.628418, avg loss: 2.889015, ppl: 17.975603 +epoch: 0, batch: 22731, sum loss: 5519.378906, avg loss: 3.252433, ppl: 25.853167 +epoch: 0, batch: 22732, sum loss: 5608.459473, avg loss: 3.063058, ppl: 21.392885 +epoch: 0, batch: 22733, sum loss: 5330.284668, avg loss: 3.155882, ppl: 23.473734 +epoch: 0, batch: 22734, sum loss: 5840.163574, avg loss: 3.308875, ppl: 27.354324 +epoch: 0, batch: 22735, sum loss: 6266.029785, avg loss: 3.377914, ppl: 29.309559 +epoch: 0, batch: 22736, sum loss: 3537.622803, avg loss: 2.436379, ppl: 11.431577 +epoch: 0, batch: 22737, sum loss: 5787.741699, avg loss: 3.303506, ppl: 27.207853 +epoch: 0, batch: 22738, sum loss: 6229.451172, avg loss: 3.394796, ppl: 29.808582 +epoch: 0, batch: 22739, sum loss: 4559.105469, avg loss: 2.759749, ppl: 15.795880 +epoch: 0, batch: 22740, sum loss: 4037.449951, avg loss: 2.849294, ppl: 17.275589 +epoch: 0, batch: 22741, sum loss: 4767.721680, avg loss: 2.952150, ppl: 19.147068 +epoch: 0, batch: 22742, sum loss: 5635.856445, avg loss: 3.342738, ppl: 28.296501 +epoch: 0, batch: 22743, sum loss: 4831.891602, avg loss: 3.095382, ppl: 22.095682 +epoch: 0, batch: 22744, sum loss: 6600.122070, avg loss: 3.313314, ppl: 27.476042 +epoch: 0, batch: 22745, sum loss: 4798.849121, avg loss: 2.917233, ppl: 18.490063 +epoch: 0, batch: 22746, sum loss: 5626.191406, avg loss: 3.319287, ppl: 27.640638 +epoch: 0, batch: 22747, sum loss: 5444.549316, avg loss: 3.197034, ppl: 24.459877 +epoch: 0, batch: 22748, sum loss: 5028.615723, avg loss: 3.043956, ppl: 20.988113 +epoch: 0, batch: 22749, sum loss: 5393.688965, avg loss: 3.152361, ppl: 23.391224 +epoch: 0, batch: 22750, sum loss: 5461.252930, avg loss: 3.164109, ppl: 23.667658 +epoch: 0, batch: 22751, sum loss: 5842.027344, avg loss: 3.209905, ppl: 24.776741 +epoch: 0, batch: 22752, sum loss: 6022.751953, avg loss: 3.300138, ppl: 27.116381 +epoch: 0, batch: 22753, sum loss: 5386.479980, avg loss: 3.430879, ppl: 30.903799 +epoch: 0, batch: 22754, sum loss: 4463.678223, avg loss: 3.009898, ppl: 20.285324 +epoch: 0, batch: 22755, sum loss: 4735.345215, avg loss: 2.903339, ppl: 18.234924 +epoch: 0, batch: 22756, sum loss: 5897.532715, avg loss: 3.265522, ppl: 26.193781 +epoch: 0, batch: 22757, sum loss: 5840.855469, avg loss: 3.177832, ppl: 23.994680 +epoch: 0, batch: 22758, sum loss: 5471.492188, avg loss: 3.278306, ppl: 26.530785 +epoch: 0, batch: 22759, sum loss: 5389.303223, avg loss: 3.093745, ppl: 22.059526 +epoch: 0, batch: 22760, sum loss: 6048.772461, avg loss: 3.255529, ppl: 25.933321 +epoch: 0, batch: 22761, sum loss: 4387.100098, avg loss: 2.960257, ppl: 19.302923 +epoch: 0, batch: 22762, sum loss: 4750.613281, avg loss: 3.018178, ppl: 20.454000 +epoch: 0, batch: 22763, sum loss: 4388.041992, avg loss: 2.881183, ppl: 17.835363 +epoch: 0, batch: 22764, sum loss: 5978.779297, avg loss: 3.205780, ppl: 24.674734 +epoch: 0, batch: 22765, sum loss: 5627.561523, avg loss: 3.075170, ppl: 21.653568 +epoch: 0, batch: 22766, sum loss: 4911.014648, avg loss: 2.928452, ppl: 18.698668 +epoch: 0, batch: 22767, sum loss: 4667.528320, avg loss: 3.076815, ppl: 21.689209 +epoch: 0, batch: 22768, sum loss: 5394.006836, avg loss: 3.233817, ppl: 25.376337 +epoch: 0, batch: 22769, sum loss: 5391.558105, avg loss: 3.075618, ppl: 21.663267 +epoch: 0, batch: 22770, sum loss: 4380.829102, avg loss: 3.021261, ppl: 20.517157 +epoch: 0, batch: 22771, sum loss: 5825.019043, avg loss: 3.186553, ppl: 24.204855 +epoch: 0, batch: 22772, sum loss: 5760.444336, avg loss: 3.276703, ppl: 26.488300 +epoch: 0, batch: 22773, sum loss: 5427.333008, avg loss: 3.275397, ppl: 26.453728 +epoch: 0, batch: 22774, sum loss: 5021.544434, avg loss: 2.976612, ppl: 19.621229 +epoch: 0, batch: 22775, sum loss: 5100.841309, avg loss: 3.014682, ppl: 20.382599 +epoch: 0, batch: 22776, sum loss: 5841.136230, avg loss: 3.460389, ppl: 31.829353 +epoch: 0, batch: 22777, sum loss: 5734.512207, avg loss: 3.295697, ppl: 26.996216 +epoch: 0, batch: 22778, sum loss: 5285.235840, avg loss: 3.021862, ppl: 20.529472 +epoch: 0, batch: 22779, sum loss: 5161.626953, avg loss: 2.896536, ppl: 18.111301 +epoch: 0, batch: 22780, sum loss: 5732.906738, avg loss: 3.296669, ppl: 27.022470 +epoch: 0, batch: 22781, sum loss: 5851.574707, avg loss: 3.041359, ppl: 20.933672 +epoch: 0, batch: 22782, sum loss: 5705.910645, avg loss: 3.366319, ppl: 28.971684 +epoch: 0, batch: 22783, sum loss: 5081.346680, avg loss: 3.075876, ppl: 21.668850 +epoch: 0, batch: 22784, sum loss: 5182.237793, avg loss: 2.990328, ppl: 19.892199 +epoch: 0, batch: 22785, sum loss: 4691.144043, avg loss: 3.119112, ppl: 22.626268 +epoch: 0, batch: 22786, sum loss: 6558.133789, avg loss: 3.347695, ppl: 28.437099 +epoch: 0, batch: 22787, sum loss: 6824.740234, avg loss: 3.274827, ppl: 26.438663 +epoch: 0, batch: 22788, sum loss: 5230.368652, avg loss: 3.030341, ppl: 20.704290 +epoch: 0, batch: 22789, sum loss: 4653.980469, avg loss: 2.947423, ppl: 19.056776 +epoch: 0, batch: 22790, sum loss: 5418.528320, avg loss: 3.028803, ppl: 20.672470 +epoch: 0, batch: 22791, sum loss: 5668.884766, avg loss: 3.342503, ppl: 28.289843 +epoch: 0, batch: 22792, sum loss: 4852.328613, avg loss: 3.114460, ppl: 22.521265 +epoch: 0, batch: 22793, sum loss: 6253.578613, avg loss: 3.472281, ppl: 32.210136 +epoch: 0, batch: 22794, sum loss: 5725.755859, avg loss: 3.175683, ppl: 23.943163 +epoch: 0, batch: 22795, sum loss: 5824.832031, avg loss: 3.378673, ppl: 29.331823 +epoch: 0, batch: 22796, sum loss: 5526.511719, avg loss: 3.134720, ppl: 22.982203 +epoch: 0, batch: 22797, sum loss: 4789.321289, avg loss: 2.938234, ppl: 18.882467 +epoch: 0, batch: 22798, sum loss: 5151.765137, avg loss: 3.003945, ppl: 20.164923 +epoch: 0, batch: 22799, sum loss: 5054.501953, avg loss: 2.928448, ppl: 18.698595 +epoch: 0, batch: 22800, sum loss: 5095.851074, avg loss: 2.959263, ppl: 19.283756 +epoch: 0, batch: 22801, sum loss: 6124.837891, avg loss: 3.145782, ppl: 23.237852 +epoch: 0, batch: 22802, sum loss: 4406.188965, avg loss: 3.020006, ppl: 20.491419 +epoch: 0, batch: 22803, sum loss: 5741.972656, avg loss: 3.115558, ppl: 22.546000 +epoch: 0, batch: 22804, sum loss: 4868.543945, avg loss: 2.943497, ppl: 18.982109 +epoch: 0, batch: 22805, sum loss: 5970.648438, avg loss: 3.142447, ppl: 23.160465 +epoch: 0, batch: 22806, sum loss: 5314.209473, avg loss: 3.029766, ppl: 20.692387 +epoch: 0, batch: 22807, sum loss: 5010.854492, avg loss: 3.118142, ppl: 22.604349 +epoch: 0, batch: 22808, sum loss: 5032.085449, avg loss: 3.121641, ppl: 22.683577 +epoch: 0, batch: 22809, sum loss: 6276.679199, avg loss: 3.454419, ppl: 31.639896 +epoch: 0, batch: 22810, sum loss: 5465.442383, avg loss: 3.146484, ppl: 23.254156 +epoch: 0, batch: 22811, sum loss: 5565.833984, avg loss: 3.123364, ppl: 22.722685 +epoch: 0, batch: 22812, sum loss: 5710.130859, avg loss: 3.312141, ppl: 27.443825 +epoch: 0, batch: 22813, sum loss: 5437.442383, avg loss: 3.295420, ppl: 26.988739 +epoch: 0, batch: 22814, sum loss: 5013.615723, avg loss: 3.123748, ppl: 22.731419 +epoch: 0, batch: 22815, sum loss: 6460.324707, avg loss: 3.340395, ppl: 28.230288 +epoch: 0, batch: 22816, sum loss: 5863.043945, avg loss: 3.448850, ppl: 31.464178 +epoch: 0, batch: 22817, sum loss: 4680.499023, avg loss: 2.952996, ppl: 19.163286 +epoch: 0, batch: 22818, sum loss: 5263.738770, avg loss: 3.063876, ppl: 21.410381 +epoch: 0, batch: 22819, sum loss: 5842.532715, avg loss: 3.260342, ppl: 26.058451 +epoch: 0, batch: 22820, sum loss: 5097.300293, avg loss: 3.000177, ppl: 20.089090 +epoch: 0, batch: 22821, sum loss: 6338.077637, avg loss: 3.371318, ppl: 29.116875 +epoch: 0, batch: 22822, sum loss: 4893.704590, avg loss: 3.120985, ppl: 22.668697 +epoch: 0, batch: 22823, sum loss: 4955.349609, avg loss: 3.041958, ppl: 20.946213 +epoch: 0, batch: 22824, sum loss: 5566.696289, avg loss: 3.386068, ppl: 29.549545 +epoch: 0, batch: 22825, sum loss: 4472.695312, avg loss: 3.091013, ppl: 21.999346 +epoch: 0, batch: 22826, sum loss: 5588.041504, avg loss: 3.320286, ppl: 27.668264 +epoch: 0, batch: 22827, sum loss: 4559.121094, avg loss: 3.080487, ppl: 21.769007 +epoch: 0, batch: 22828, sum loss: 5377.736816, avg loss: 3.043428, ppl: 20.977024 +epoch: 0, batch: 22829, sum loss: 5295.693359, avg loss: 3.113282, ppl: 22.494755 +epoch: 0, batch: 22830, sum loss: 4575.948730, avg loss: 3.177742, ppl: 23.992523 +epoch: 0, batch: 22831, sum loss: 4557.456055, avg loss: 3.089801, ppl: 21.972702 +epoch: 0, batch: 22832, sum loss: 6424.991699, avg loss: 3.415732, ppl: 30.439220 +epoch: 0, batch: 22833, sum loss: 3771.085205, avg loss: 2.837536, ppl: 17.073647 +epoch: 0, batch: 22834, sum loss: 5466.157227, avg loss: 3.118173, ppl: 22.605045 +epoch: 0, batch: 22835, sum loss: 3994.821533, avg loss: 2.622995, ppl: 13.776926 +epoch: 0, batch: 22836, sum loss: 5589.068359, avg loss: 3.289623, ppl: 26.832733 +epoch: 0, batch: 22837, sum loss: 5199.047852, avg loss: 3.015689, ppl: 20.403147 +epoch: 0, batch: 22838, sum loss: 5508.819824, avg loss: 3.421627, ppl: 30.619194 +epoch: 0, batch: 22839, sum loss: 5071.332031, avg loss: 3.271827, ppl: 26.359461 +epoch: 0, batch: 22840, sum loss: 5709.023438, avg loss: 3.294301, ppl: 26.958557 +epoch: 0, batch: 22841, sum loss: 4794.255371, avg loss: 2.992669, ppl: 19.938822 +epoch: 0, batch: 22842, sum loss: 5453.817383, avg loss: 3.285432, ppl: 26.720533 +epoch: 0, batch: 22843, sum loss: 6044.522949, avg loss: 3.386287, ppl: 29.556021 +epoch: 0, batch: 22844, sum loss: 6997.346680, avg loss: 3.579206, ppl: 35.845051 +epoch: 0, batch: 22845, sum loss: 5992.285156, avg loss: 3.138966, ppl: 23.079981 +epoch: 0, batch: 22846, sum loss: 4615.223633, avg loss: 2.758651, ppl: 15.778551 +epoch: 0, batch: 22847, sum loss: 6163.117676, avg loss: 3.255741, ppl: 25.938826 +epoch: 0, batch: 22848, sum loss: 4697.423828, avg loss: 3.086350, ppl: 21.897001 +epoch: 0, batch: 22849, sum loss: 5592.467285, avg loss: 3.154240, ppl: 23.435223 +epoch: 0, batch: 22850, sum loss: 4559.094727, avg loss: 3.072166, ppl: 21.588617 +epoch: 0, batch: 22851, sum loss: 4441.211426, avg loss: 2.841466, ppl: 17.140875 +epoch: 0, batch: 22852, sum loss: 5053.637695, avg loss: 3.129187, ppl: 22.855398 +epoch: 0, batch: 22853, sum loss: 4842.158691, avg loss: 2.901234, ppl: 18.196581 +epoch: 0, batch: 22854, sum loss: 5425.640137, avg loss: 3.171035, ppl: 23.832134 +epoch: 0, batch: 22855, sum loss: 4143.900879, avg loss: 2.989827, ppl: 19.882250 +epoch: 0, batch: 22856, sum loss: 5671.558105, avg loss: 3.052507, ppl: 21.168344 +epoch: 0, batch: 22857, sum loss: 5891.525879, avg loss: 3.186331, ppl: 24.199472 +epoch: 0, batch: 22858, sum loss: 4487.049805, avg loss: 3.037949, ppl: 20.862402 +epoch: 0, batch: 22859, sum loss: 4616.105469, avg loss: 2.823306, ppl: 16.832409 +epoch: 0, batch: 22860, sum loss: 5950.908691, avg loss: 3.243002, ppl: 25.610495 +epoch: 0, batch: 22861, sum loss: 5972.015137, avg loss: 3.410631, ppl: 30.284361 +epoch: 0, batch: 22862, sum loss: 5877.677246, avg loss: 3.080544, ppl: 21.770231 +epoch: 0, batch: 22863, sum loss: 4789.866699, avg loss: 2.949425, ppl: 19.094975 +epoch: 0, batch: 22864, sum loss: 4940.637207, avg loss: 3.105366, ppl: 22.317385 +epoch: 0, batch: 22865, sum loss: 4922.637695, avg loss: 2.850398, ppl: 17.294668 +epoch: 0, batch: 22866, sum loss: 6420.467773, avg loss: 3.361501, ppl: 28.832449 +epoch: 0, batch: 22867, sum loss: 6103.269043, avg loss: 3.564993, ppl: 35.339218 +epoch: 0, batch: 22868, sum loss: 4729.196289, avg loss: 3.031536, ppl: 20.729050 +epoch: 0, batch: 22869, sum loss: 5491.514648, avg loss: 3.239832, ppl: 25.529425 +epoch: 0, batch: 22870, sum loss: 5450.631348, avg loss: 3.285492, ppl: 26.722126 +epoch: 0, batch: 22871, sum loss: 4235.428223, avg loss: 3.114285, ppl: 22.517336 +epoch: 0, batch: 22872, sum loss: 4640.286621, avg loss: 3.058857, ppl: 21.303198 +epoch: 0, batch: 22873, sum loss: 6052.324707, avg loss: 3.325453, ppl: 27.811602 +epoch: 0, batch: 22874, sum loss: 5586.659180, avg loss: 3.311594, ppl: 27.428820 +epoch: 0, batch: 22875, sum loss: 5373.802734, avg loss: 3.221704, ppl: 25.070810 +epoch: 0, batch: 22876, sum loss: 5279.882812, avg loss: 3.163501, ppl: 23.653261 +epoch: 0, batch: 22877, sum loss: 5893.953125, avg loss: 3.296394, ppl: 27.015057 +epoch: 0, batch: 22878, sum loss: 5497.844727, avg loss: 3.481852, ppl: 32.519901 +epoch: 0, batch: 22879, sum loss: 5966.692383, avg loss: 3.262270, ppl: 26.108749 +epoch: 0, batch: 22880, sum loss: 4914.408203, avg loss: 3.116302, ppl: 22.562788 +epoch: 0, batch: 22881, sum loss: 5264.806641, avg loss: 3.251888, ppl: 25.839079 +epoch: 0, batch: 22882, sum loss: 4821.764160, avg loss: 2.909936, ppl: 18.355623 +epoch: 0, batch: 22883, sum loss: 5033.036621, avg loss: 3.306857, ppl: 27.299198 +epoch: 0, batch: 22884, sum loss: 5154.806641, avg loss: 3.248146, ppl: 25.742558 +epoch: 0, batch: 22885, sum loss: 5163.736328, avg loss: 3.179641, ppl: 24.038111 +epoch: 0, batch: 22886, sum loss: 5414.604980, avg loss: 3.222979, ppl: 25.102798 +epoch: 0, batch: 22887, sum loss: 4486.109863, avg loss: 2.883104, ppl: 17.869656 +epoch: 0, batch: 22888, sum loss: 6062.215820, avg loss: 3.210919, ppl: 24.801878 +epoch: 0, batch: 22889, sum loss: 5936.063965, avg loss: 3.469354, ppl: 32.115986 +epoch: 0, batch: 22890, sum loss: 4566.222168, avg loss: 2.908422, ppl: 18.327850 +epoch: 0, batch: 22891, sum loss: 4397.643555, avg loss: 2.939601, ppl: 18.908308 +epoch: 0, batch: 22892, sum loss: 5287.631836, avg loss: 3.177664, ppl: 23.990635 +epoch: 0, batch: 22893, sum loss: 5487.679199, avg loss: 3.194225, ppl: 24.391272 +epoch: 0, batch: 22894, sum loss: 5455.246094, avg loss: 3.184615, ppl: 24.157995 +epoch: 0, batch: 22895, sum loss: 4392.108887, avg loss: 2.967641, ppl: 19.445995 +epoch: 0, batch: 22896, sum loss: 5375.315430, avg loss: 3.062858, ppl: 21.388590 +epoch: 0, batch: 22897, sum loss: 4725.282227, avg loss: 3.131400, ppl: 22.906023 +epoch: 0, batch: 22898, sum loss: 5315.412598, avg loss: 3.326291, ppl: 27.834906 +epoch: 0, batch: 22899, sum loss: 5811.791016, avg loss: 3.139812, ppl: 23.099512 +epoch: 0, batch: 22900, sum loss: 6152.276367, avg loss: 3.305898, ppl: 27.273027 +epoch: 0, batch: 22901, sum loss: 5405.283691, avg loss: 3.097584, ppl: 22.144381 +epoch: 0, batch: 22902, sum loss: 5347.446777, avg loss: 3.050455, ppl: 21.124947 +epoch: 0, batch: 22903, sum loss: 5153.624512, avg loss: 3.031544, ppl: 20.729214 +epoch: 0, batch: 22904, sum loss: 4743.330078, avg loss: 3.032820, ppl: 20.755676 +epoch: 0, batch: 22905, sum loss: 5613.190918, avg loss: 3.144645, ppl: 23.211433 +epoch: 0, batch: 22906, sum loss: 5876.469238, avg loss: 3.204182, ppl: 24.635332 +epoch: 0, batch: 22907, sum loss: 5668.115234, avg loss: 3.052297, ppl: 21.163895 +epoch: 0, batch: 22908, sum loss: 5571.019043, avg loss: 3.280930, ppl: 26.600500 +epoch: 0, batch: 22909, sum loss: 5120.531250, avg loss: 2.992713, ppl: 19.939701 +epoch: 0, batch: 22910, sum loss: 5284.740723, avg loss: 3.268238, ppl: 26.265022 +epoch: 0, batch: 22911, sum loss: 5023.219238, avg loss: 3.285297, ppl: 26.716923 +epoch: 0, batch: 22912, sum loss: 5624.840820, avg loss: 3.230810, ppl: 25.300152 +epoch: 0, batch: 22913, sum loss: 5587.368164, avg loss: 3.329778, ppl: 27.932152 +epoch: 0, batch: 22914, sum loss: 5835.104004, avg loss: 3.317285, ppl: 27.585356 +epoch: 0, batch: 22915, sum loss: 6554.555664, avg loss: 3.219330, ppl: 25.011358 +epoch: 0, batch: 22916, sum loss: 5231.400391, avg loss: 3.140096, ppl: 23.106089 +epoch: 0, batch: 22917, sum loss: 4301.832520, avg loss: 3.068354, ppl: 21.506477 +epoch: 0, batch: 22918, sum loss: 5181.640625, avg loss: 3.234482, ppl: 25.393223 +epoch: 0, batch: 22919, sum loss: 5888.715820, avg loss: 3.319456, ppl: 27.645317 +epoch: 0, batch: 22920, sum loss: 4847.478516, avg loss: 3.044899, ppl: 21.007898 +epoch: 0, batch: 22921, sum loss: 7160.482422, avg loss: 3.629236, ppl: 37.684013 +epoch: 0, batch: 22922, sum loss: 5480.923340, avg loss: 3.352247, ppl: 28.566837 +epoch: 0, batch: 22923, sum loss: 5878.419922, avg loss: 3.308059, ppl: 27.332008 +epoch: 0, batch: 22924, sum loss: 5351.859375, avg loss: 3.263329, ppl: 26.136395 +epoch: 0, batch: 22925, sum loss: 5076.716309, avg loss: 3.078664, ppl: 21.729343 +epoch: 0, batch: 22926, sum loss: 4972.139648, avg loss: 2.938617, ppl: 18.889700 +epoch: 0, batch: 22927, sum loss: 5281.858398, avg loss: 3.074423, ppl: 21.637390 +epoch: 0, batch: 22928, sum loss: 6199.521484, avg loss: 3.262906, ppl: 26.125355 +epoch: 0, batch: 22929, sum loss: 5368.317383, avg loss: 3.263415, ppl: 26.138638 +epoch: 0, batch: 22930, sum loss: 4550.466797, avg loss: 2.911367, ppl: 18.381912 +epoch: 0, batch: 22931, sum loss: 6080.782227, avg loss: 3.099277, ppl: 22.181913 +epoch: 0, batch: 22932, sum loss: 5973.854980, avg loss: 3.182661, ppl: 24.110834 +epoch: 0, batch: 22933, sum loss: 4860.620117, avg loss: 2.922802, ppl: 18.593317 +epoch: 0, batch: 22934, sum loss: 4746.642090, avg loss: 3.090262, ppl: 21.982836 +epoch: 0, batch: 22935, sum loss: 5683.031250, avg loss: 3.364731, ppl: 28.925724 +epoch: 0, batch: 22936, sum loss: 5043.190918, avg loss: 2.968329, ppl: 19.459379 +epoch: 0, batch: 22937, sum loss: 4962.979980, avg loss: 3.141127, ppl: 23.129911 +epoch: 0, batch: 22938, sum loss: 4613.935059, avg loss: 3.025531, ppl: 20.604950 +epoch: 0, batch: 22939, sum loss: 5458.299805, avg loss: 3.013970, ppl: 20.368109 +epoch: 0, batch: 22940, sum loss: 5024.149414, avg loss: 2.922716, ppl: 18.591721 +epoch: 0, batch: 22941, sum loss: 5953.376465, avg loss: 3.405822, ppl: 30.139061 +epoch: 0, batch: 22942, sum loss: 5454.292969, avg loss: 3.204637, ppl: 24.646559 +epoch: 0, batch: 22943, sum loss: 4844.033203, avg loss: 3.065844, ppl: 21.452557 +epoch: 0, batch: 22944, sum loss: 5559.729004, avg loss: 3.251303, ppl: 25.823978 +epoch: 0, batch: 22945, sum loss: 5625.761719, avg loss: 3.183793, ppl: 24.138132 +epoch: 0, batch: 22946, sum loss: 4673.677734, avg loss: 3.017223, ppl: 20.434460 +epoch: 0, batch: 22947, sum loss: 5377.533691, avg loss: 3.307216, ppl: 27.308996 +epoch: 0, batch: 22948, sum loss: 5661.918945, avg loss: 3.348267, ppl: 28.453377 +epoch: 0, batch: 22949, sum loss: 4799.002441, avg loss: 2.841328, ppl: 17.138504 +epoch: 0, batch: 22950, sum loss: 5353.749512, avg loss: 3.101825, ppl: 22.238495 +epoch: 0, batch: 22951, sum loss: 4451.644531, avg loss: 3.011938, ppl: 20.326761 +epoch: 0, batch: 22952, sum loss: 6166.749512, avg loss: 3.055872, ppl: 21.239698 +epoch: 0, batch: 22953, sum loss: 4427.707031, avg loss: 3.118104, ppl: 22.603477 +epoch: 0, batch: 22954, sum loss: 4815.868164, avg loss: 3.153810, ppl: 23.425150 +epoch: 0, batch: 22955, sum loss: 5889.498047, avg loss: 3.211286, ppl: 24.810963 +epoch: 0, batch: 22956, sum loss: 5460.559570, avg loss: 3.055713, ppl: 21.236330 +epoch: 0, batch: 22957, sum loss: 4367.137695, avg loss: 3.049677, ppl: 21.108534 +epoch: 0, batch: 22958, sum loss: 6060.195312, avg loss: 3.414194, ppl: 30.392454 +epoch: 0, batch: 22959, sum loss: 6285.126465, avg loss: 3.327224, ppl: 27.860899 +epoch: 0, batch: 22960, sum loss: 5975.425781, avg loss: 3.210868, ppl: 24.800613 +epoch: 0, batch: 22961, sum loss: 5743.406250, avg loss: 3.185472, ppl: 24.178703 +epoch: 0, batch: 22962, sum loss: 5335.675293, avg loss: 3.305871, ppl: 27.272280 +epoch: 0, batch: 22963, sum loss: 4397.905273, avg loss: 3.001983, ppl: 20.125410 +epoch: 0, batch: 22964, sum loss: 4864.960449, avg loss: 3.136660, ppl: 23.026836 +epoch: 0, batch: 22965, sum loss: 4548.006836, avg loss: 2.898666, ppl: 18.149916 +epoch: 0, batch: 22966, sum loss: 5713.700195, avg loss: 3.093503, ppl: 22.054203 +epoch: 0, batch: 22967, sum loss: 5572.344238, avg loss: 3.283644, ppl: 26.672796 +epoch: 0, batch: 22968, sum loss: 5574.058105, avg loss: 3.167078, ppl: 23.738026 +epoch: 0, batch: 22969, sum loss: 5161.630371, avg loss: 2.980156, ppl: 19.690891 +epoch: 0, batch: 22970, sum loss: 5861.804688, avg loss: 3.094934, ppl: 22.085781 +epoch: 0, batch: 22971, sum loss: 5513.786133, avg loss: 3.152536, ppl: 23.395329 +epoch: 0, batch: 22972, sum loss: 5597.115234, avg loss: 3.025468, ppl: 20.603642 +epoch: 0, batch: 22973, sum loss: 5179.816406, avg loss: 3.189542, ppl: 24.277308 +epoch: 0, batch: 22974, sum loss: 4030.481689, avg loss: 3.051084, ppl: 21.138241 +epoch: 0, batch: 22975, sum loss: 4757.583984, avg loss: 2.975349, ppl: 19.596470 +epoch: 0, batch: 22976, sum loss: 5223.536133, avg loss: 3.067255, ppl: 21.482862 +epoch: 0, batch: 22977, sum loss: 5493.513672, avg loss: 3.505752, ppl: 33.306492 +epoch: 0, batch: 22978, sum loss: 5889.346191, avg loss: 3.193789, ppl: 24.380625 +epoch: 0, batch: 22979, sum loss: 5094.315918, avg loss: 3.162207, ppl: 23.622683 +epoch: 0, batch: 22980, sum loss: 5656.735840, avg loss: 2.980367, ppl: 19.695038 +epoch: 0, batch: 22981, sum loss: 5768.889648, avg loss: 3.152399, ppl: 23.392111 +epoch: 0, batch: 22982, sum loss: 5649.285156, avg loss: 3.395003, ppl: 29.814753 +epoch: 0, batch: 22983, sum loss: 5892.899414, avg loss: 3.270200, ppl: 26.316589 +epoch: 0, batch: 22984, sum loss: 5722.909668, avg loss: 3.101848, ppl: 22.239008 +epoch: 0, batch: 22985, sum loss: 5452.731934, avg loss: 3.037734, ppl: 20.857922 +epoch: 0, batch: 22986, sum loss: 4721.094238, avg loss: 3.168520, ppl: 23.772268 +epoch: 0, batch: 22987, sum loss: 5587.588379, avg loss: 3.189263, ppl: 24.270531 +epoch: 0, batch: 22988, sum loss: 5801.503906, avg loss: 3.253788, ppl: 25.888218 +epoch: 0, batch: 22989, sum loss: 5494.645020, avg loss: 3.090352, ppl: 21.984806 +epoch: 0, batch: 22990, sum loss: 5068.050781, avg loss: 3.053043, ppl: 21.179689 +epoch: 0, batch: 22991, sum loss: 7107.664062, avg loss: 3.299751, ppl: 27.105890 +epoch: 0, batch: 22992, sum loss: 4637.438477, avg loss: 3.176328, ppl: 23.958609 +epoch: 0, batch: 22993, sum loss: 5613.932617, avg loss: 3.580314, ppl: 35.884811 +epoch: 0, batch: 22994, sum loss: 5309.973633, avg loss: 3.088990, ppl: 21.954891 +epoch: 0, batch: 22995, sum loss: 5285.490234, avg loss: 3.089124, ppl: 21.957829 +epoch: 0, batch: 22996, sum loss: 5679.011719, avg loss: 2.940969, ppl: 18.934193 +epoch: 0, batch: 22997, sum loss: 5263.841797, avg loss: 3.287846, ppl: 26.785114 +epoch: 0, batch: 22998, sum loss: 5195.385254, avg loss: 3.112873, ppl: 22.485559 +epoch: 0, batch: 22999, sum loss: 4616.051270, avg loss: 3.406680, ppl: 30.164927 +epoch: 0, batch: 23000, sum loss: 6925.595703, avg loss: 3.355424, ppl: 28.657757 +epoch: 0, batch: 23001, sum loss: 5302.913574, avg loss: 3.183021, ppl: 24.119516 +epoch: 0, batch: 23002, sum loss: 5032.291992, avg loss: 2.984752, ppl: 19.781599 +epoch: 0, batch: 23003, sum loss: 4953.586914, avg loss: 3.193802, ppl: 24.380945 +epoch: 0, batch: 23004, sum loss: 5804.377441, avg loss: 3.180481, ppl: 24.058315 +epoch: 0, batch: 23005, sum loss: 6375.675781, avg loss: 3.538111, ppl: 34.401871 +epoch: 0, batch: 23006, sum loss: 4110.761719, avg loss: 2.882722, ppl: 17.862833 +epoch: 0, batch: 23007, sum loss: 6312.921875, avg loss: 3.354369, ppl: 28.627525 +epoch: 0, batch: 23008, sum loss: 4817.348633, avg loss: 3.163065, ppl: 23.642962 +epoch: 0, batch: 23009, sum loss: 6046.059570, avg loss: 3.355194, ppl: 28.651165 +epoch: 0, batch: 23010, sum loss: 5711.181641, avg loss: 3.237631, ppl: 25.473312 +epoch: 0, batch: 23011, sum loss: 6107.378906, avg loss: 3.313825, ppl: 27.490065 +epoch: 0, batch: 23012, sum loss: 5284.294434, avg loss: 3.192927, ppl: 24.359632 +epoch: 0, batch: 23013, sum loss: 5177.495117, avg loss: 3.321036, ppl: 27.689024 +epoch: 0, batch: 23014, sum loss: 5127.183594, avg loss: 3.151311, ppl: 23.366688 +epoch: 0, batch: 23015, sum loss: 4937.793457, avg loss: 3.204279, ppl: 24.637722 +epoch: 0, batch: 23016, sum loss: 5189.127930, avg loss: 2.926750, ppl: 18.666868 +epoch: 0, batch: 23017, sum loss: 5083.704102, avg loss: 3.314018, ppl: 27.495388 +epoch: 0, batch: 23018, sum loss: 5375.907227, avg loss: 3.194241, ppl: 24.391649 +epoch: 0, batch: 23019, sum loss: 5249.133301, avg loss: 3.200691, ppl: 24.549488 +epoch: 0, batch: 23020, sum loss: 5483.515625, avg loss: 3.204860, ppl: 24.652054 +epoch: 0, batch: 23021, sum loss: 5512.616211, avg loss: 3.201287, ppl: 24.564125 +epoch: 0, batch: 23022, sum loss: 6340.064453, avg loss: 3.545897, ppl: 34.670780 +epoch: 0, batch: 23023, sum loss: 4843.289062, avg loss: 3.011996, ppl: 20.327925 +epoch: 0, batch: 23024, sum loss: 5078.144531, avg loss: 3.117338, ppl: 22.586185 +epoch: 0, batch: 23025, sum loss: 4850.936523, avg loss: 3.091738, ppl: 22.015303 +epoch: 0, batch: 23026, sum loss: 5654.190918, avg loss: 2.958760, ppl: 19.274061 +epoch: 0, batch: 23027, sum loss: 5391.958008, avg loss: 3.201875, ppl: 24.578583 +epoch: 0, batch: 23028, sum loss: 5043.600098, avg loss: 3.069750, ppl: 21.536524 +epoch: 0, batch: 23029, sum loss: 6551.066895, avg loss: 3.401385, ppl: 30.005625 +epoch: 0, batch: 23030, sum loss: 5566.800781, avg loss: 3.244056, ppl: 25.637508 +epoch: 0, batch: 23031, sum loss: 4850.707031, avg loss: 3.050759, ppl: 21.131378 +epoch: 0, batch: 23032, sum loss: 5741.111816, avg loss: 2.985497, ppl: 19.796349 +epoch: 0, batch: 23033, sum loss: 5475.130859, avg loss: 3.093294, ppl: 22.049599 +epoch: 0, batch: 23034, sum loss: 5488.709961, avg loss: 3.332550, ppl: 28.009676 +epoch: 0, batch: 23035, sum loss: 6175.089355, avg loss: 3.469151, ppl: 32.109478 +epoch: 0, batch: 23036, sum loss: 4766.956543, avg loss: 3.083413, ppl: 21.832794 +epoch: 0, batch: 23037, sum loss: 6056.372070, avg loss: 3.381559, ppl: 29.416594 +epoch: 0, batch: 23038, sum loss: 4466.360840, avg loss: 3.011707, ppl: 20.322052 +epoch: 0, batch: 23039, sum loss: 4742.965820, avg loss: 3.091894, ppl: 22.018745 +epoch: 0, batch: 23040, sum loss: 5475.492188, avg loss: 3.409397, ppl: 30.247011 +epoch: 0, batch: 23041, sum loss: 4516.002930, avg loss: 2.978894, ppl: 19.666050 +epoch: 0, batch: 23042, sum loss: 4706.751953, avg loss: 2.903610, ppl: 18.239872 +epoch: 0, batch: 23043, sum loss: 5926.993164, avg loss: 3.269164, ppl: 26.289343 +epoch: 0, batch: 23044, sum loss: 4725.617676, avg loss: 3.060633, ppl: 21.341070 +epoch: 0, batch: 23045, sum loss: 5276.052246, avg loss: 2.955772, ppl: 19.216547 +epoch: 0, batch: 23046, sum loss: 5442.098145, avg loss: 3.254843, ppl: 25.915558 +epoch: 0, batch: 23047, sum loss: 4955.307129, avg loss: 3.032624, ppl: 20.751608 +epoch: 0, batch: 23048, sum loss: 5493.714844, avg loss: 2.993850, ppl: 19.962395 +epoch: 0, batch: 23049, sum loss: 5916.302246, avg loss: 3.384613, ppl: 29.506559 +epoch: 0, batch: 23050, sum loss: 5655.955566, avg loss: 3.172157, ppl: 23.858894 +epoch: 0, batch: 23051, sum loss: 5865.035645, avg loss: 3.280221, ppl: 26.581652 +epoch: 0, batch: 23052, sum loss: 5513.226074, avg loss: 3.243074, ppl: 25.612345 +epoch: 0, batch: 23053, sum loss: 5167.191895, avg loss: 3.057510, ppl: 21.274515 +epoch: 0, batch: 23054, sum loss: 5285.983398, avg loss: 3.082206, ppl: 21.806454 +epoch: 0, batch: 23055, sum loss: 4470.636719, avg loss: 2.954816, ppl: 19.198193 +epoch: 0, batch: 23056, sum loss: 5035.267090, avg loss: 3.236033, ppl: 25.432623 +epoch: 0, batch: 23057, sum loss: 5377.338867, avg loss: 3.110086, ppl: 22.422977 +epoch: 0, batch: 23058, sum loss: 6166.230469, avg loss: 3.208237, ppl: 24.735432 +epoch: 0, batch: 23059, sum loss: 5601.058594, avg loss: 3.116894, ppl: 22.576155 +epoch: 0, batch: 23060, sum loss: 5790.828613, avg loss: 3.180027, ppl: 24.047398 +epoch: 0, batch: 23061, sum loss: 5110.799805, avg loss: 3.065867, ppl: 21.453047 +epoch: 0, batch: 23062, sum loss: 5787.308594, avg loss: 3.275217, ppl: 26.448965 +epoch: 0, batch: 23063, sum loss: 4677.099121, avg loss: 2.998141, ppl: 20.048223 +epoch: 0, batch: 23064, sum loss: 4672.019531, avg loss: 2.824679, ppl: 16.855539 +epoch: 0, batch: 23065, sum loss: 5381.971191, avg loss: 3.318108, ppl: 27.608063 +epoch: 0, batch: 23066, sum loss: 4402.039551, avg loss: 2.860325, ppl: 17.467196 +epoch: 0, batch: 23067, sum loss: 5813.544922, avg loss: 3.108848, ppl: 22.395222 +epoch: 0, batch: 23068, sum loss: 6432.448242, avg loss: 3.327702, ppl: 27.874222 +epoch: 0, batch: 23069, sum loss: 5869.858398, avg loss: 3.226970, ppl: 25.203175 +epoch: 0, batch: 23070, sum loss: 5356.400391, avg loss: 3.220926, ppl: 25.051313 +epoch: 0, batch: 23071, sum loss: 4572.212891, avg loss: 2.946014, ppl: 19.029943 +epoch: 0, batch: 23072, sum loss: 4814.916504, avg loss: 3.033974, ppl: 20.779646 +epoch: 0, batch: 23073, sum loss: 5356.256348, avg loss: 3.064220, ppl: 21.417749 +epoch: 0, batch: 23074, sum loss: 5369.107422, avg loss: 3.180751, ppl: 24.064816 +epoch: 0, batch: 23075, sum loss: 6039.896484, avg loss: 3.333276, ppl: 28.030025 +epoch: 0, batch: 23076, sum loss: 6429.010254, avg loss: 3.449040, ppl: 31.470165 +epoch: 0, batch: 23077, sum loss: 5311.555664, avg loss: 2.831320, ppl: 16.967840 +epoch: 0, batch: 23078, sum loss: 6644.233398, avg loss: 3.308881, ppl: 27.354506 +epoch: 0, batch: 23079, sum loss: 5216.082031, avg loss: 3.140326, ppl: 23.111406 +epoch: 0, batch: 23080, sum loss: 5058.336914, avg loss: 3.242524, ppl: 25.598242 +epoch: 0, batch: 23081, sum loss: 4587.019531, avg loss: 3.003942, ppl: 20.164871 +epoch: 0, batch: 23082, sum loss: 4742.076660, avg loss: 2.879221, ppl: 17.800396 +epoch: 0, batch: 23083, sum loss: 6146.128906, avg loss: 3.091614, ppl: 22.012577 +epoch: 0, batch: 23084, sum loss: 5221.653320, avg loss: 3.082440, ppl: 21.811556 +epoch: 0, batch: 23085, sum loss: 5508.546875, avg loss: 3.122759, ppl: 22.708950 +epoch: 0, batch: 23086, sum loss: 5399.833984, avg loss: 3.178243, ppl: 24.004534 +epoch: 0, batch: 23087, sum loss: 4912.079102, avg loss: 2.932585, ppl: 18.776100 +epoch: 0, batch: 23088, sum loss: 4136.499512, avg loss: 3.010553, ppl: 20.298620 +epoch: 0, batch: 23089, sum loss: 5645.984375, avg loss: 3.117606, ppl: 22.592234 +epoch: 0, batch: 23090, sum loss: 5330.917969, avg loss: 3.242651, ppl: 25.601496 +epoch: 0, batch: 23091, sum loss: 4910.913574, avg loss: 2.951270, ppl: 19.130239 +epoch: 0, batch: 23092, sum loss: 6292.778809, avg loss: 3.325993, ppl: 27.826618 +epoch: 0, batch: 23093, sum loss: 4928.937500, avg loss: 3.040677, ppl: 20.919397 +epoch: 0, batch: 23094, sum loss: 4906.220703, avg loss: 3.169393, ppl: 23.793045 +epoch: 0, batch: 23095, sum loss: 4223.144043, avg loss: 2.892565, ppl: 18.039513 +epoch: 0, batch: 23096, sum loss: 4759.567383, avg loss: 3.014292, ppl: 20.374666 +epoch: 0, batch: 23097, sum loss: 5067.827148, avg loss: 3.043740, ppl: 20.983576 +epoch: 0, batch: 23098, sum loss: 5424.614258, avg loss: 3.064754, ppl: 21.429184 +epoch: 0, batch: 23099, sum loss: 5449.221191, avg loss: 3.224391, ppl: 25.138266 +epoch: 0, batch: 23100, sum loss: 4988.966797, avg loss: 3.183770, ppl: 24.137573 +epoch: 0, batch: 23101, sum loss: 4649.866211, avg loss: 2.929973, ppl: 18.727118 +epoch: 0, batch: 23102, sum loss: 5008.345703, avg loss: 2.960015, ppl: 19.298267 +epoch: 0, batch: 23103, sum loss: 6582.747559, avg loss: 3.198614, ppl: 24.498549 +epoch: 0, batch: 23104, sum loss: 4986.167480, avg loss: 3.198312, ppl: 24.491144 +epoch: 0, batch: 23105, sum loss: 5803.049805, avg loss: 3.135089, ppl: 22.990686 +epoch: 0, batch: 23106, sum loss: 5445.664551, avg loss: 3.133294, ppl: 22.949448 +epoch: 0, batch: 23107, sum loss: 5788.588379, avg loss: 3.319144, ppl: 27.636698 +epoch: 0, batch: 23108, sum loss: 5654.816406, avg loss: 3.110460, ppl: 22.431362 +epoch: 0, batch: 23109, sum loss: 5435.523438, avg loss: 3.302262, ppl: 27.174046 +epoch: 0, batch: 23110, sum loss: 5434.159668, avg loss: 3.261801, ppl: 26.096483 +epoch: 0, batch: 23111, sum loss: 4574.866699, avg loss: 3.051946, ppl: 21.156473 +epoch: 0, batch: 23112, sum loss: 4141.158203, avg loss: 2.873809, ppl: 17.704323 +epoch: 0, batch: 23113, sum loss: 6343.545410, avg loss: 3.336952, ppl: 28.133238 +epoch: 0, batch: 23114, sum loss: 5581.721680, avg loss: 3.254648, ppl: 25.910492 +epoch: 0, batch: 23115, sum loss: 4279.864258, avg loss: 2.933423, ppl: 18.791845 +epoch: 0, batch: 23116, sum loss: 4827.357422, avg loss: 2.904547, ppl: 18.256975 +epoch: 0, batch: 23117, sum loss: 5186.466797, avg loss: 3.058058, ppl: 21.286184 +epoch: 0, batch: 23118, sum loss: 4946.253906, avg loss: 3.114770, ppl: 22.528242 +epoch: 0, batch: 23119, sum loss: 4553.968262, avg loss: 2.809357, ppl: 16.599245 +epoch: 0, batch: 23120, sum loss: 5652.011719, avg loss: 3.114056, ppl: 22.512171 +epoch: 0, batch: 23121, sum loss: 6274.204590, avg loss: 3.279772, ppl: 26.569721 +epoch: 0, batch: 23122, sum loss: 5016.554688, avg loss: 3.027492, ppl: 20.645395 +epoch: 0, batch: 23123, sum loss: 7060.503906, avg loss: 3.524965, ppl: 33.952572 +epoch: 0, batch: 23124, sum loss: 4893.150391, avg loss: 3.065884, ppl: 21.453411 +epoch: 0, batch: 23125, sum loss: 4934.559570, avg loss: 3.283140, ppl: 26.659349 +epoch: 0, batch: 23126, sum loss: 4947.669922, avg loss: 3.086506, ppl: 21.900433 +epoch: 0, batch: 23127, sum loss: 5634.771484, avg loss: 3.067377, ppl: 21.485474 +epoch: 0, batch: 23128, sum loss: 4961.912598, avg loss: 3.245201, ppl: 25.666878 +epoch: 0, batch: 23129, sum loss: 5534.025879, avg loss: 3.109003, ppl: 22.398708 +epoch: 0, batch: 23130, sum loss: 4160.920410, avg loss: 2.822877, ppl: 16.825186 +epoch: 0, batch: 23131, sum loss: 4926.146973, avg loss: 3.113873, ppl: 22.508043 +epoch: 0, batch: 23132, sum loss: 5112.840820, avg loss: 3.365925, ppl: 28.960281 +epoch: 0, batch: 23133, sum loss: 5350.631836, avg loss: 3.118084, ppl: 22.603029 +epoch: 0, batch: 23134, sum loss: 5628.328125, avg loss: 3.272284, ppl: 26.371498 +epoch: 0, batch: 23135, sum loss: 5458.896484, avg loss: 2.950755, ppl: 19.120382 +epoch: 0, batch: 23136, sum loss: 5549.484375, avg loss: 3.233965, ppl: 25.380100 +epoch: 0, batch: 23137, sum loss: 5667.046875, avg loss: 3.227248, ppl: 25.210176 +epoch: 0, batch: 23138, sum loss: 4059.535645, avg loss: 3.018242, ppl: 20.455301 +epoch: 0, batch: 23139, sum loss: 4653.577148, avg loss: 3.065598, ppl: 21.447273 +epoch: 0, batch: 23140, sum loss: 4864.672852, avg loss: 3.106432, ppl: 22.341192 +epoch: 0, batch: 23141, sum loss: 4324.012207, avg loss: 2.875008, ppl: 17.725567 +epoch: 0, batch: 23142, sum loss: 6144.894531, avg loss: 3.284284, ppl: 26.689857 +epoch: 0, batch: 23143, sum loss: 5993.303711, avg loss: 3.053135, ppl: 21.181643 +epoch: 0, batch: 23144, sum loss: 5905.482422, avg loss: 3.239431, ppl: 25.519196 +epoch: 0, batch: 23145, sum loss: 6665.645020, avg loss: 3.599160, ppl: 36.567513 +epoch: 0, batch: 23146, sum loss: 5093.687500, avg loss: 3.044643, ppl: 21.002531 +epoch: 0, batch: 23147, sum loss: 5263.134766, avg loss: 3.125377, ppl: 22.768480 +epoch: 0, batch: 23148, sum loss: 5968.477051, avg loss: 3.134705, ppl: 22.981846 +epoch: 0, batch: 23149, sum loss: 4586.177734, avg loss: 3.063579, ppl: 21.404022 +epoch: 0, batch: 23150, sum loss: 5649.708008, avg loss: 3.119662, ppl: 22.638733 +epoch: 0, batch: 23151, sum loss: 5706.399902, avg loss: 3.211255, ppl: 24.810200 +epoch: 0, batch: 23152, sum loss: 4931.653320, avg loss: 2.937256, ppl: 18.864019 +epoch: 0, batch: 23153, sum loss: 5838.488770, avg loss: 3.222124, ppl: 25.081339 +epoch: 0, batch: 23154, sum loss: 4055.996826, avg loss: 2.723974, ppl: 15.240765 +epoch: 0, batch: 23155, sum loss: 5113.386719, avg loss: 3.114121, ppl: 22.513638 +epoch: 0, batch: 23156, sum loss: 5351.094238, avg loss: 3.325727, ppl: 27.819208 +epoch: 0, batch: 23157, sum loss: 5245.014648, avg loss: 3.079868, ppl: 21.755522 +epoch: 0, batch: 23158, sum loss: 4808.073242, avg loss: 3.023945, ppl: 20.572296 +epoch: 0, batch: 23159, sum loss: 5164.649414, avg loss: 3.090754, ppl: 21.993650 +epoch: 0, batch: 23160, sum loss: 4448.067383, avg loss: 3.046622, ppl: 21.044128 +epoch: 0, batch: 23161, sum loss: 5255.447266, avg loss: 3.060831, ppl: 21.345295 +epoch: 0, batch: 23162, sum loss: 5301.479980, avg loss: 3.138828, ppl: 23.076801 +epoch: 0, batch: 23163, sum loss: 5903.560059, avg loss: 3.133525, ppl: 22.954744 +epoch: 0, batch: 23164, sum loss: 5420.833496, avg loss: 3.495057, ppl: 32.952164 +epoch: 0, batch: 23165, sum loss: 5676.396484, avg loss: 3.421577, ppl: 30.617668 +epoch: 0, batch: 23166, sum loss: 5328.433594, avg loss: 3.127015, ppl: 22.805805 +epoch: 0, batch: 23167, sum loss: 4988.170898, avg loss: 2.978013, ppl: 19.648727 +epoch: 0, batch: 23168, sum loss: 4939.118652, avg loss: 2.989781, ppl: 19.881336 +epoch: 0, batch: 23169, sum loss: 4322.005859, avg loss: 3.003479, ppl: 20.155527 +epoch: 0, batch: 23170, sum loss: 3895.815918, avg loss: 2.694202, ppl: 14.793708 +epoch: 0, batch: 23171, sum loss: 4821.504883, avg loss: 3.114667, ppl: 22.525938 +epoch: 0, batch: 23172, sum loss: 6014.202148, avg loss: 3.452470, ppl: 31.578281 +epoch: 0, batch: 23173, sum loss: 4885.037598, avg loss: 3.145549, ppl: 23.232428 +epoch: 0, batch: 23174, sum loss: 5197.299805, avg loss: 2.976690, ppl: 19.622749 +epoch: 0, batch: 23175, sum loss: 4832.301758, avg loss: 3.044929, ppl: 21.008535 +epoch: 0, batch: 23176, sum loss: 3966.524414, avg loss: 2.756445, ppl: 15.743773 +epoch: 0, batch: 23177, sum loss: 6012.167969, avg loss: 3.427690, ppl: 30.805393 +epoch: 0, batch: 23178, sum loss: 5360.189941, avg loss: 3.202025, ppl: 24.582264 +epoch: 0, batch: 23179, sum loss: 5333.332031, avg loss: 3.001312, ppl: 20.111912 +epoch: 0, batch: 23180, sum loss: 4857.214355, avg loss: 3.223102, ppl: 25.105875 +epoch: 0, batch: 23181, sum loss: 5561.889160, avg loss: 3.170974, ppl: 23.830690 +epoch: 0, batch: 23182, sum loss: 5019.011719, avg loss: 2.861466, ppl: 17.487146 +epoch: 0, batch: 23183, sum loss: 4446.193848, avg loss: 2.984023, ppl: 19.767178 +epoch: 0, batch: 23184, sum loss: 6025.354492, avg loss: 3.113878, ppl: 22.508167 +epoch: 0, batch: 23185, sum loss: 7085.475098, avg loss: 3.480096, ppl: 32.462833 +epoch: 0, batch: 23186, sum loss: 5083.684570, avg loss: 2.965977, ppl: 19.413660 +epoch: 0, batch: 23187, sum loss: 5354.740234, avg loss: 3.129597, ppl: 22.864767 +epoch: 0, batch: 23188, sum loss: 5203.218750, avg loss: 2.997246, ppl: 20.030293 +epoch: 0, batch: 23189, sum loss: 4230.055664, avg loss: 2.797656, ppl: 16.406147 +epoch: 0, batch: 23190, sum loss: 5753.306641, avg loss: 3.267068, ppl: 26.234304 +epoch: 0, batch: 23191, sum loss: 6135.367188, avg loss: 3.406645, ppl: 30.163862 +epoch: 0, batch: 23192, sum loss: 6600.664062, avg loss: 3.390172, ppl: 29.671049 +epoch: 0, batch: 23193, sum loss: 5343.715820, avg loss: 3.443116, ppl: 31.284286 +epoch: 0, batch: 23194, sum loss: 5709.676270, avg loss: 3.247825, ppl: 25.734304 +epoch: 0, batch: 23195, sum loss: 5270.804688, avg loss: 3.085951, ppl: 21.888275 +epoch: 0, batch: 23196, sum loss: 5007.077637, avg loss: 2.966278, ppl: 19.419512 +epoch: 0, batch: 23197, sum loss: 4381.467773, avg loss: 2.922927, ppl: 18.595640 +epoch: 0, batch: 23198, sum loss: 5643.750000, avg loss: 3.104373, ppl: 22.295235 +epoch: 0, batch: 23199, sum loss: 5738.565430, avg loss: 3.413781, ppl: 30.379885 +epoch: 0, batch: 23200, sum loss: 4717.803711, avg loss: 2.965307, ppl: 19.400663 +epoch: 0, batch: 23201, sum loss: 5698.524902, avg loss: 3.232289, ppl: 25.337578 +epoch: 0, batch: 23202, sum loss: 5177.773438, avg loss: 3.170712, ppl: 23.824453 +epoch: 0, batch: 23203, sum loss: 5264.520996, avg loss: 3.243697, ppl: 25.628305 +epoch: 0, batch: 23204, sum loss: 4522.615723, avg loss: 3.023139, ppl: 20.555716 +epoch: 0, batch: 23205, sum loss: 4751.048828, avg loss: 2.886421, ppl: 17.929026 +epoch: 0, batch: 23206, sum loss: 5533.030762, avg loss: 3.099737, ppl: 22.192118 +epoch: 0, batch: 23207, sum loss: 5123.295898, avg loss: 3.060512, ppl: 21.338470 +epoch: 0, batch: 23208, sum loss: 4907.951172, avg loss: 2.976320, ppl: 19.615490 +epoch: 0, batch: 23209, sum loss: 4928.242676, avg loss: 3.074387, ppl: 21.636616 +epoch: 0, batch: 23210, sum loss: 5396.059570, avg loss: 3.058991, ppl: 21.306042 +epoch: 0, batch: 23211, sum loss: 4302.749023, avg loss: 2.806751, ppl: 16.556036 +epoch: 0, batch: 23212, sum loss: 5515.761719, avg loss: 3.062611, ppl: 21.383308 +epoch: 0, batch: 23213, sum loss: 4726.754883, avg loss: 3.117912, ppl: 22.599150 +epoch: 0, batch: 23214, sum loss: 6192.547852, avg loss: 3.662063, ppl: 38.941593 +epoch: 0, batch: 23215, sum loss: 5273.981445, avg loss: 3.316969, ppl: 27.576651 +epoch: 0, batch: 23216, sum loss: 4628.214844, avg loss: 3.245592, ppl: 25.676916 +epoch: 0, batch: 23217, sum loss: 5785.953613, avg loss: 3.319537, ppl: 27.647558 +epoch: 0, batch: 23218, sum loss: 5396.857910, avg loss: 3.126801, ppl: 22.800913 +epoch: 0, batch: 23219, sum loss: 7126.161621, avg loss: 3.548885, ppl: 34.774536 +epoch: 0, batch: 23220, sum loss: 5570.089844, avg loss: 3.070612, ppl: 21.555090 +epoch: 0, batch: 23221, sum loss: 5381.578125, avg loss: 3.091084, ppl: 22.000925 +epoch: 0, batch: 23222, sum loss: 6287.222656, avg loss: 3.153070, ppl: 23.407827 +epoch: 0, batch: 23223, sum loss: 4870.080078, avg loss: 2.737538, ppl: 15.448901 +epoch: 0, batch: 23224, sum loss: 5659.907227, avg loss: 3.370999, ppl: 29.107588 +epoch: 0, batch: 23225, sum loss: 4868.208008, avg loss: 2.990300, ppl: 19.891644 +epoch: 0, batch: 23226, sum loss: 6103.255371, avg loss: 3.244687, ppl: 25.653669 +epoch: 0, batch: 23227, sum loss: 5143.604980, avg loss: 3.032786, ppl: 20.754972 +epoch: 0, batch: 23228, sum loss: 7200.234863, avg loss: 3.417292, ppl: 30.486755 +epoch: 0, batch: 23229, sum loss: 4557.628418, avg loss: 2.969139, ppl: 19.475145 +epoch: 0, batch: 23230, sum loss: 5618.222656, avg loss: 3.190359, ppl: 24.297152 +epoch: 0, batch: 23231, sum loss: 5264.508301, avg loss: 2.929610, ppl: 18.720320 +epoch: 0, batch: 23232, sum loss: 5884.457031, avg loss: 3.196337, ppl: 24.442842 +epoch: 0, batch: 23233, sum loss: 5805.580078, avg loss: 3.254249, ppl: 25.900158 +epoch: 0, batch: 23234, sum loss: 4029.166992, avg loss: 2.624864, ppl: 13.802702 +epoch: 0, batch: 23235, sum loss: 5455.335938, avg loss: 3.160681, ppl: 23.586658 +epoch: 0, batch: 23236, sum loss: 5742.788086, avg loss: 3.126178, ppl: 22.786716 +epoch: 0, batch: 23237, sum loss: 5044.379883, avg loss: 3.038783, ppl: 20.879818 +epoch: 0, batch: 23238, sum loss: 6063.039062, avg loss: 3.351597, ppl: 28.548298 +epoch: 0, batch: 23239, sum loss: 4985.534668, avg loss: 3.169443, ppl: 23.794235 +epoch: 0, batch: 23240, sum loss: 5285.359375, avg loss: 2.915256, ppl: 18.453541 +epoch: 0, batch: 23241, sum loss: 5388.953613, avg loss: 3.012271, ppl: 20.333532 +epoch: 0, batch: 23242, sum loss: 5680.948730, avg loss: 3.184388, ppl: 24.152512 +epoch: 0, batch: 23243, sum loss: 5881.909180, avg loss: 3.200168, ppl: 24.536655 +epoch: 0, batch: 23244, sum loss: 4957.536133, avg loss: 3.208761, ppl: 24.748415 +epoch: 0, batch: 23245, sum loss: 6286.839844, avg loss: 3.615204, ppl: 37.158928 +epoch: 0, batch: 23246, sum loss: 5049.996582, avg loss: 3.326744, ppl: 27.847511 +epoch: 0, batch: 23247, sum loss: 5627.358398, avg loss: 3.195547, ppl: 24.423532 +epoch: 0, batch: 23248, sum loss: 5338.935547, avg loss: 2.977655, ppl: 19.641706 +epoch: 0, batch: 23249, sum loss: 4907.885254, avg loss: 3.048376, ppl: 21.081078 +epoch: 0, batch: 23250, sum loss: 4902.715820, avg loss: 3.112835, ppl: 22.484707 +epoch: 0, batch: 23251, sum loss: 5756.441406, avg loss: 3.133610, ppl: 22.956703 +epoch: 0, batch: 23252, sum loss: 5384.529297, avg loss: 3.018234, ppl: 20.455135 +epoch: 0, batch: 23253, sum loss: 5394.596680, avg loss: 2.866417, ppl: 17.573936 +epoch: 0, batch: 23254, sum loss: 5546.064941, avg loss: 3.100092, ppl: 22.199997 +epoch: 0, batch: 23255, sum loss: 4206.888672, avg loss: 2.909328, ppl: 18.344471 +epoch: 0, batch: 23256, sum loss: 5515.628906, avg loss: 3.132100, ppl: 22.922073 +epoch: 0, batch: 23257, sum loss: 4865.901855, avg loss: 2.855576, ppl: 17.384453 +epoch: 0, batch: 23258, sum loss: 3722.252686, avg loss: 2.666370, ppl: 14.387650 +epoch: 0, batch: 23259, sum loss: 5651.818848, avg loss: 3.274518, ppl: 26.430490 +epoch: 0, batch: 23260, sum loss: 5343.402832, avg loss: 2.932713, ppl: 18.778503 +epoch: 0, batch: 23261, sum loss: 4861.172852, avg loss: 3.100238, ppl: 22.203226 +epoch: 0, batch: 23262, sum loss: 5019.756836, avg loss: 3.009447, ppl: 20.276176 +epoch: 0, batch: 23263, sum loss: 5114.921387, avg loss: 2.968614, ppl: 19.464914 +epoch: 0, batch: 23264, sum loss: 4832.458984, avg loss: 3.323562, ppl: 27.759050 +epoch: 0, batch: 23265, sum loss: 6027.627930, avg loss: 3.254659, ppl: 25.910770 +epoch: 0, batch: 23266, sum loss: 3807.092773, avg loss: 2.690525, ppl: 14.739409 +epoch: 0, batch: 23267, sum loss: 5044.812500, avg loss: 2.955368, ppl: 19.208782 +epoch: 0, batch: 23268, sum loss: 5556.562012, avg loss: 3.361501, ppl: 28.832449 +epoch: 0, batch: 23269, sum loss: 5893.674805, avg loss: 3.440557, ppl: 31.204342 +epoch: 0, batch: 23270, sum loss: 5263.203613, avg loss: 3.026569, ppl: 20.626345 +epoch: 0, batch: 23271, sum loss: 5135.244141, avg loss: 3.164044, ppl: 23.666117 +epoch: 0, batch: 23272, sum loss: 6404.796875, avg loss: 3.279466, ppl: 26.561583 +epoch: 0, batch: 23273, sum loss: 5409.451660, avg loss: 3.216083, ppl: 24.930279 +epoch: 0, batch: 23274, sum loss: 4347.742188, avg loss: 2.862240, ppl: 17.500677 +epoch: 0, batch: 23275, sum loss: 4848.048828, avg loss: 2.860206, ppl: 17.465122 +epoch: 0, batch: 23276, sum loss: 5198.285645, avg loss: 3.070458, ppl: 21.551775 +epoch: 0, batch: 23277, sum loss: 4235.868652, avg loss: 2.964219, ppl: 19.379559 +epoch: 0, batch: 23278, sum loss: 4865.622070, avg loss: 2.959624, ppl: 19.290718 +epoch: 0, batch: 23279, sum loss: 5539.108887, avg loss: 3.592159, ppl: 36.312382 +epoch: 0, batch: 23280, sum loss: 4603.727539, avg loss: 2.760029, ppl: 15.800295 +epoch: 0, batch: 23281, sum loss: 6311.651367, avg loss: 3.302800, ppl: 27.188665 +epoch: 0, batch: 23282, sum loss: 4314.253418, avg loss: 2.857121, ppl: 17.411335 +epoch: 0, batch: 23283, sum loss: 4714.244141, avg loss: 3.000792, ppl: 20.101452 +epoch: 0, batch: 23284, sum loss: 5789.615234, avg loss: 3.269122, ppl: 26.288252 +epoch: 0, batch: 23285, sum loss: 4886.020020, avg loss: 3.036681, ppl: 20.835978 +epoch: 0, batch: 23286, sum loss: 5000.556641, avg loss: 3.038005, ppl: 20.863586 +epoch: 0, batch: 23287, sum loss: 5903.484375, avg loss: 3.480828, ppl: 32.486610 +epoch: 0, batch: 23288, sum loss: 5347.101074, avg loss: 2.960743, ppl: 19.312309 +epoch: 0, batch: 23289, sum loss: 5170.012695, avg loss: 3.057370, ppl: 21.271544 +epoch: 0, batch: 23290, sum loss: 4899.916992, avg loss: 3.147024, ppl: 23.266729 +epoch: 0, batch: 23291, sum loss: 4862.246094, avg loss: 3.247993, ppl: 25.738636 +epoch: 0, batch: 23292, sum loss: 4864.709961, avg loss: 3.025317, ppl: 20.600533 +epoch: 0, batch: 23293, sum loss: 5200.014160, avg loss: 3.215841, ppl: 24.924234 +epoch: 0, batch: 23294, sum loss: 5432.403320, avg loss: 3.227809, ppl: 25.224340 +epoch: 0, batch: 23295, sum loss: 5771.635254, avg loss: 3.194043, ppl: 24.386816 +epoch: 0, batch: 23296, sum loss: 5812.037598, avg loss: 3.267025, ppl: 26.233179 +epoch: 0, batch: 23297, sum loss: 4814.799805, avg loss: 3.104320, ppl: 22.294044 +epoch: 0, batch: 23298, sum loss: 4541.302246, avg loss: 3.200354, ppl: 24.541218 +epoch: 0, batch: 23299, sum loss: 5507.681641, avg loss: 2.945284, ppl: 19.016069 +epoch: 0, batch: 23300, sum loss: 5076.982910, avg loss: 3.103290, ppl: 22.271111 +epoch: 0, batch: 23301, sum loss: 4814.569824, avg loss: 2.990416, ppl: 19.893957 +epoch: 0, batch: 23302, sum loss: 5330.925293, avg loss: 3.102983, ppl: 22.264273 +epoch: 0, batch: 23303, sum loss: 4866.834961, avg loss: 3.127786, ppl: 22.823391 +epoch: 0, batch: 23304, sum loss: 5085.939453, avg loss: 3.151140, ppl: 23.362677 +epoch: 0, batch: 23305, sum loss: 5585.188965, avg loss: 3.256670, ppl: 25.962931 +epoch: 0, batch: 23306, sum loss: 5270.943359, avg loss: 3.106036, ppl: 22.332348 +epoch: 0, batch: 23307, sum loss: 5521.710938, avg loss: 3.102085, ppl: 22.244280 +epoch: 0, batch: 23308, sum loss: 5169.517090, avg loss: 2.887998, ppl: 17.957329 +epoch: 0, batch: 23309, sum loss: 6120.438477, avg loss: 3.161383, ppl: 23.603226 +epoch: 0, batch: 23310, sum loss: 5523.894043, avg loss: 3.140361, ppl: 23.112200 +epoch: 0, batch: 23311, sum loss: 6089.073730, avg loss: 3.095614, ppl: 22.100813 +epoch: 0, batch: 23312, sum loss: 4665.013184, avg loss: 3.188663, ppl: 24.255964 +epoch: 0, batch: 23313, sum loss: 5639.372070, avg loss: 3.407476, ppl: 30.188942 +epoch: 0, batch: 23314, sum loss: 5949.788574, avg loss: 3.298109, ppl: 27.061419 +epoch: 0, batch: 23315, sum loss: 4458.920898, avg loss: 2.931572, ppl: 18.757093 +epoch: 0, batch: 23316, sum loss: 6097.047852, avg loss: 3.098094, ppl: 22.155676 +epoch: 0, batch: 23317, sum loss: 6481.908691, avg loss: 3.597064, ppl: 36.490932 +epoch: 0, batch: 23318, sum loss: 5828.261719, avg loss: 3.261478, ppl: 26.088078 +epoch: 0, batch: 23319, sum loss: 7045.109863, avg loss: 3.338915, ppl: 28.188515 +epoch: 0, batch: 23320, sum loss: 4238.852539, avg loss: 2.873798, ppl: 17.704138 +epoch: 0, batch: 23321, sum loss: 4687.753906, avg loss: 3.075954, ppl: 21.670546 +epoch: 0, batch: 23322, sum loss: 4975.683105, avg loss: 2.838382, ppl: 17.088091 +epoch: 0, batch: 23323, sum loss: 5192.698242, avg loss: 3.255610, ppl: 25.935436 +epoch: 0, batch: 23324, sum loss: 5775.806152, avg loss: 3.122057, ppl: 22.693022 +epoch: 0, batch: 23325, sum loss: 5336.357422, avg loss: 3.298120, ppl: 27.061703 +epoch: 0, batch: 23326, sum loss: 4612.482422, avg loss: 2.763620, ppl: 15.857148 +epoch: 0, batch: 23327, sum loss: 5028.502930, avg loss: 3.297379, ppl: 27.041670 +epoch: 0, batch: 23328, sum loss: 6091.242676, avg loss: 3.344999, ppl: 28.360542 +epoch: 0, batch: 23329, sum loss: 5310.173828, avg loss: 3.214391, ppl: 24.888130 +epoch: 0, batch: 23330, sum loss: 3674.183350, avg loss: 2.677976, ppl: 14.555609 +epoch: 0, batch: 23331, sum loss: 5733.502930, avg loss: 3.181744, ppl: 24.088736 +epoch: 0, batch: 23332, sum loss: 4989.101562, avg loss: 3.116241, ppl: 22.561411 +epoch: 0, batch: 23333, sum loss: 5677.580566, avg loss: 3.306686, ppl: 27.294533 +epoch: 0, batch: 23334, sum loss: 5758.812988, avg loss: 3.385545, ppl: 29.534100 +epoch: 0, batch: 23335, sum loss: 5579.679199, avg loss: 3.055684, ppl: 21.235712 +epoch: 0, batch: 23336, sum loss: 5191.639160, avg loss: 3.055703, ppl: 21.236107 +epoch: 0, batch: 23337, sum loss: 7105.327148, avg loss: 3.704550, ppl: 40.631771 +epoch: 0, batch: 23338, sum loss: 6478.163086, avg loss: 3.374043, ppl: 29.196342 +epoch: 0, batch: 23339, sum loss: 4619.084961, avg loss: 3.125227, ppl: 22.765057 +epoch: 0, batch: 23340, sum loss: 4879.907715, avg loss: 3.067195, ppl: 21.481571 +epoch: 0, batch: 23341, sum loss: 4998.673340, avg loss: 3.208391, ppl: 24.739254 +epoch: 0, batch: 23342, sum loss: 5197.496582, avg loss: 3.048385, ppl: 21.081274 +epoch: 0, batch: 23343, sum loss: 5276.448242, avg loss: 3.150119, ppl: 23.338833 +epoch: 0, batch: 23344, sum loss: 5398.056641, avg loss: 3.074064, ppl: 21.629633 +epoch: 0, batch: 23345, sum loss: 5575.824219, avg loss: 3.013959, ppl: 20.367880 +epoch: 0, batch: 23346, sum loss: 4055.078369, avg loss: 2.723357, ppl: 15.231368 +epoch: 0, batch: 23347, sum loss: 4673.954590, avg loss: 3.021302, ppl: 20.517998 +epoch: 0, batch: 23348, sum loss: 5120.916016, avg loss: 3.017629, ppl: 20.442762 +epoch: 0, batch: 23349, sum loss: 5569.336914, avg loss: 3.095796, ppl: 22.104824 +epoch: 0, batch: 23350, sum loss: 4618.916992, avg loss: 2.970365, ppl: 19.499031 +epoch: 0, batch: 23351, sum loss: 4652.123535, avg loss: 2.774075, ppl: 16.023794 +epoch: 0, batch: 23352, sum loss: 5761.396484, avg loss: 3.104201, ppl: 22.291391 +epoch: 0, batch: 23353, sum loss: 6253.686523, avg loss: 3.360390, ppl: 28.800432 +epoch: 0, batch: 23354, sum loss: 5000.333008, avg loss: 3.080920, ppl: 21.778423 +epoch: 0, batch: 23355, sum loss: 5337.284180, avg loss: 3.148840, ppl: 23.309015 +epoch: 0, batch: 23356, sum loss: 5414.164062, avg loss: 3.201753, ppl: 24.575571 +epoch: 0, batch: 23357, sum loss: 5018.579590, avg loss: 3.012353, ppl: 20.335186 +epoch: 0, batch: 23358, sum loss: 5093.656250, avg loss: 3.189516, ppl: 24.276665 +epoch: 0, batch: 23359, sum loss: 4463.906738, avg loss: 2.927152, ppl: 18.674368 +epoch: 0, batch: 23360, sum loss: 3782.763428, avg loss: 2.797902, ppl: 16.410179 +epoch: 0, batch: 23361, sum loss: 5417.129883, avg loss: 3.011189, ppl: 20.311544 +epoch: 0, batch: 23362, sum loss: 5702.413086, avg loss: 3.072421, ppl: 21.594114 +epoch: 0, batch: 23363, sum loss: 5605.854004, avg loss: 3.105736, ppl: 22.325651 +epoch: 0, batch: 23364, sum loss: 5045.183105, avg loss: 3.112389, ppl: 22.474680 +epoch: 0, batch: 23365, sum loss: 5102.590332, avg loss: 3.126587, ppl: 22.796053 +epoch: 0, batch: 23366, sum loss: 6944.267090, avg loss: 3.402385, ppl: 30.035635 +epoch: 0, batch: 23367, sum loss: 6038.416992, avg loss: 2.957109, ppl: 19.242266 +epoch: 0, batch: 23368, sum loss: 4326.637695, avg loss: 2.947301, ppl: 19.054455 +epoch: 0, batch: 23369, sum loss: 6004.167969, avg loss: 3.326409, ppl: 27.838192 +epoch: 0, batch: 23370, sum loss: 5622.469727, avg loss: 2.985911, ppl: 19.804529 +epoch: 0, batch: 23371, sum loss: 4764.558105, avg loss: 3.112056, ppl: 22.467201 +epoch: 0, batch: 23372, sum loss: 5280.401855, avg loss: 3.141227, ppl: 23.132221 +epoch: 0, batch: 23373, sum loss: 5004.743652, avg loss: 3.153588, ppl: 23.419941 +epoch: 0, batch: 23374, sum loss: 5134.627441, avg loss: 3.258012, ppl: 25.997810 +epoch: 0, batch: 23375, sum loss: 4626.053223, avg loss: 2.913132, ppl: 18.414381 +epoch: 0, batch: 23376, sum loss: 5497.199219, avg loss: 3.027092, ppl: 20.637133 +epoch: 0, batch: 23377, sum loss: 5786.845703, avg loss: 3.089613, ppl: 21.968584 +epoch: 0, batch: 23378, sum loss: 4360.350586, avg loss: 2.861122, ppl: 17.481136 +epoch: 0, batch: 23379, sum loss: 5123.792969, avg loss: 2.904644, ppl: 18.258751 +epoch: 0, batch: 23380, sum loss: 4601.500977, avg loss: 2.993820, ppl: 19.961790 +epoch: 0, batch: 23381, sum loss: 4817.067871, avg loss: 2.986403, ppl: 19.814281 +epoch: 0, batch: 23382, sum loss: 5628.888672, avg loss: 3.233135, ppl: 25.359045 +epoch: 0, batch: 23383, sum loss: 4786.106934, avg loss: 3.015821, ppl: 20.405827 +epoch: 0, batch: 23384, sum loss: 5177.868164, avg loss: 3.078400, ppl: 21.723610 +epoch: 0, batch: 23385, sum loss: 5108.208984, avg loss: 2.983767, ppl: 19.762117 +epoch: 0, batch: 23386, sum loss: 5806.918945, avg loss: 3.159368, ppl: 23.555710 +epoch: 0, batch: 23387, sum loss: 5366.660645, avg loss: 3.003168, ppl: 20.149261 +epoch: 0, batch: 23388, sum loss: 5208.575195, avg loss: 3.279959, ppl: 26.574696 +epoch: 0, batch: 23389, sum loss: 4963.291016, avg loss: 3.031943, ppl: 20.737494 +epoch: 0, batch: 23390, sum loss: 5704.387695, avg loss: 3.276501, ppl: 26.482939 +epoch: 0, batch: 23391, sum loss: 5664.494141, avg loss: 3.285670, ppl: 26.726873 +epoch: 0, batch: 23392, sum loss: 4721.116699, avg loss: 2.921483, ppl: 18.568806 +epoch: 0, batch: 23393, sum loss: 5424.396973, avg loss: 3.311597, ppl: 27.428904 +epoch: 0, batch: 23394, sum loss: 4246.516602, avg loss: 2.834791, ppl: 17.026836 +epoch: 0, batch: 23395, sum loss: 5390.408691, avg loss: 3.251151, ppl: 25.820049 +epoch: 0, batch: 23396, sum loss: 4941.581055, avg loss: 2.964356, ppl: 19.382212 +epoch: 0, batch: 23397, sum loss: 4364.236328, avg loss: 3.018144, ppl: 20.453299 +epoch: 0, batch: 23398, sum loss: 5062.634766, avg loss: 3.009890, ppl: 20.285166 +epoch: 0, batch: 23399, sum loss: 4827.763672, avg loss: 3.071096, ppl: 21.565535 +epoch: 0, batch: 23400, sum loss: 5411.219727, avg loss: 3.076305, ppl: 21.678146 +epoch: 0, batch: 23401, sum loss: 4139.297852, avg loss: 2.798714, ppl: 16.423512 +epoch: 0, batch: 23402, sum loss: 5139.569336, avg loss: 2.974288, ppl: 19.575676 +epoch: 0, batch: 23403, sum loss: 4396.918457, avg loss: 2.960888, ppl: 19.315109 +epoch: 0, batch: 23404, sum loss: 4522.402344, avg loss: 2.729271, ppl: 15.321716 +epoch: 0, batch: 23405, sum loss: 6277.189453, avg loss: 3.534454, ppl: 34.276283 +epoch: 0, batch: 23406, sum loss: 4538.836914, avg loss: 3.134556, ppl: 22.978432 +epoch: 0, batch: 23407, sum loss: 4660.024902, avg loss: 2.992951, ppl: 19.944454 +epoch: 0, batch: 23408, sum loss: 5291.875000, avg loss: 3.131287, ppl: 22.903435 +epoch: 0, batch: 23409, sum loss: 5234.217773, avg loss: 3.289892, ppl: 26.839958 +epoch: 0, batch: 23410, sum loss: 5933.287109, avg loss: 3.096705, ppl: 22.124933 +epoch: 0, batch: 23411, sum loss: 6778.454102, avg loss: 3.580800, ppl: 35.902245 +epoch: 0, batch: 23412, sum loss: 4719.332520, avg loss: 2.973745, ppl: 19.565046 +epoch: 0, batch: 23413, sum loss: 5657.851562, avg loss: 3.259131, ppl: 26.026915 +epoch: 0, batch: 23414, sum loss: 5388.740234, avg loss: 3.002084, ppl: 20.127434 +epoch: 0, batch: 23415, sum loss: 4877.188965, avg loss: 3.075151, ppl: 21.653145 +epoch: 0, batch: 23416, sum loss: 6074.931641, avg loss: 3.312394, ppl: 27.450775 +epoch: 0, batch: 23417, sum loss: 5022.312988, avg loss: 3.146813, ppl: 23.261803 +epoch: 0, batch: 23418, sum loss: 4791.969238, avg loss: 2.871162, ppl: 17.657524 +epoch: 0, batch: 23419, sum loss: 5871.013672, avg loss: 3.211714, ppl: 24.821600 +epoch: 0, batch: 23420, sum loss: 5049.407715, avg loss: 3.195828, ppl: 24.430387 +epoch: 0, batch: 23421, sum loss: 5708.444336, avg loss: 3.109174, ppl: 22.402544 +epoch: 0, batch: 23422, sum loss: 4856.026367, avg loss: 2.950198, ppl: 19.109745 +epoch: 0, batch: 23423, sum loss: 5114.397949, avg loss: 2.990876, ppl: 19.903109 +epoch: 0, batch: 23424, sum loss: 5359.626465, avg loss: 3.090903, ppl: 21.996939 +epoch: 0, batch: 23425, sum loss: 4936.208008, avg loss: 3.339789, ppl: 28.213177 +epoch: 0, batch: 23426, sum loss: 4069.262939, avg loss: 2.867698, ppl: 17.596460 +epoch: 0, batch: 23427, sum loss: 4378.344238, avg loss: 2.920843, ppl: 18.556931 +epoch: 0, batch: 23428, sum loss: 5395.252441, avg loss: 3.164371, ppl: 23.673849 +epoch: 0, batch: 23429, sum loss: 5066.409668, avg loss: 2.930254, ppl: 18.732397 +epoch: 0, batch: 23430, sum loss: 5654.900879, avg loss: 2.992011, ppl: 19.925714 +epoch: 0, batch: 23431, sum loss: 4759.162598, avg loss: 2.873890, ppl: 17.705767 +epoch: 0, batch: 23432, sum loss: 4965.908691, avg loss: 3.033542, ppl: 20.770679 +epoch: 0, batch: 23433, sum loss: 5162.745117, avg loss: 3.093317, ppl: 22.050087 +epoch: 0, batch: 23434, sum loss: 4622.172363, avg loss: 2.707775, ppl: 14.995878 +epoch: 0, batch: 23435, sum loss: 5483.239258, avg loss: 3.162191, ppl: 23.622293 +epoch: 0, batch: 23436, sum loss: 6360.011230, avg loss: 3.441565, ppl: 31.235790 +epoch: 0, batch: 23437, sum loss: 5421.537109, avg loss: 3.066480, ppl: 21.466217 +epoch: 0, batch: 23438, sum loss: 5099.078613, avg loss: 3.188917, ppl: 24.262140 +epoch: 0, batch: 23439, sum loss: 4479.187988, avg loss: 3.206290, ppl: 24.687338 +epoch: 0, batch: 23440, sum loss: 5514.579102, avg loss: 3.274691, ppl: 26.435053 +epoch: 0, batch: 23441, sum loss: 5192.356445, avg loss: 3.259483, ppl: 26.036076 +epoch: 0, batch: 23442, sum loss: 5422.627930, avg loss: 3.419059, ppl: 30.540670 +epoch: 0, batch: 23443, sum loss: 3789.331055, avg loss: 2.735979, ppl: 15.424838 +epoch: 0, batch: 23444, sum loss: 6129.044434, avg loss: 3.563398, ppl: 35.282883 +epoch: 0, batch: 23445, sum loss: 4741.253418, avg loss: 3.265326, ppl: 26.188648 +epoch: 0, batch: 23446, sum loss: 5049.199219, avg loss: 3.010852, ppl: 20.304693 +epoch: 0, batch: 23447, sum loss: 4307.936523, avg loss: 2.775733, ppl: 16.050381 +epoch: 0, batch: 23448, sum loss: 5173.984375, avg loss: 3.227688, ppl: 25.221285 +epoch: 0, batch: 23449, sum loss: 4944.960449, avg loss: 2.996946, ppl: 20.024286 +epoch: 0, batch: 23450, sum loss: 4431.818848, avg loss: 2.974375, ppl: 19.577389 +epoch: 0, batch: 23451, sum loss: 5431.248047, avg loss: 3.063310, ppl: 21.398266 +epoch: 0, batch: 23452, sum loss: 5161.628906, avg loss: 3.188159, ppl: 24.243748 +epoch: 0, batch: 23453, sum loss: 5406.725098, avg loss: 3.119864, ppl: 22.643305 +epoch: 0, batch: 23454, sum loss: 5948.154297, avg loss: 2.986022, ppl: 19.806725 +epoch: 0, batch: 23455, sum loss: 5348.864746, avg loss: 3.015143, ppl: 20.392015 +epoch: 0, batch: 23456, sum loss: 4637.125000, avg loss: 2.747112, ppl: 15.597522 +epoch: 0, batch: 23457, sum loss: 5080.083984, avg loss: 2.833287, ppl: 17.001257 +epoch: 0, batch: 23458, sum loss: 4956.173340, avg loss: 3.154789, ppl: 23.448088 +epoch: 0, batch: 23459, sum loss: 4741.087402, avg loss: 3.186215, ppl: 24.196661 +epoch: 0, batch: 23460, sum loss: 6651.794922, avg loss: 3.405937, ppl: 30.142525 +epoch: 0, batch: 23461, sum loss: 5563.919434, avg loss: 3.483982, ppl: 32.589237 +epoch: 0, batch: 23462, sum loss: 5250.635254, avg loss: 3.215331, ppl: 24.911531 +epoch: 0, batch: 23463, sum loss: 5373.048340, avg loss: 3.219322, ppl: 25.011156 +epoch: 0, batch: 23464, sum loss: 5284.598633, avg loss: 3.061760, ppl: 21.365135 +epoch: 0, batch: 23465, sum loss: 4830.870117, avg loss: 3.067219, ppl: 21.482079 +epoch: 0, batch: 23466, sum loss: 5290.921387, avg loss: 3.108649, ppl: 22.390785 +epoch: 0, batch: 23467, sum loss: 6051.356934, avg loss: 3.288781, ppl: 26.810154 +epoch: 0, batch: 23468, sum loss: 5748.174805, avg loss: 3.322644, ppl: 27.733587 +epoch: 0, batch: 23469, sum loss: 5604.050781, avg loss: 2.980878, ppl: 19.705109 +epoch: 0, batch: 23470, sum loss: 5598.498535, avg loss: 3.228661, ppl: 25.245832 +epoch: 0, batch: 23471, sum loss: 5873.781250, avg loss: 3.129345, ppl: 22.859011 +epoch: 0, batch: 23472, sum loss: 5358.687500, avg loss: 3.180230, ppl: 24.052282 +epoch: 0, batch: 23473, sum loss: 5529.079102, avg loss: 3.300943, ppl: 27.138216 +epoch: 0, batch: 23474, sum loss: 4145.166016, avg loss: 2.944010, ppl: 18.991852 +epoch: 0, batch: 23475, sum loss: 4976.288574, avg loss: 3.133683, ppl: 22.958384 +epoch: 0, batch: 23476, sum loss: 5371.202148, avg loss: 3.350719, ppl: 28.523228 +epoch: 0, batch: 23477, sum loss: 5177.993164, avg loss: 3.062089, ppl: 21.372166 +epoch: 0, batch: 23478, sum loss: 5547.004395, avg loss: 3.146344, ppl: 23.250902 +epoch: 0, batch: 23479, sum loss: 5413.762207, avg loss: 3.123925, ppl: 22.735441 +epoch: 0, batch: 23480, sum loss: 6179.533691, avg loss: 3.306331, ppl: 27.284843 +epoch: 0, batch: 23481, sum loss: 5337.623535, avg loss: 3.103270, ppl: 22.270647 +epoch: 0, batch: 23482, sum loss: 5272.846191, avg loss: 3.070965, ppl: 21.562691 +epoch: 0, batch: 23483, sum loss: 5506.521973, avg loss: 3.349466, ppl: 28.487513 +epoch: 0, batch: 23484, sum loss: 4660.146973, avg loss: 3.080071, ppl: 21.759941 +epoch: 0, batch: 23485, sum loss: 5546.446289, avg loss: 3.220933, ppl: 25.051476 +epoch: 0, batch: 23486, sum loss: 5457.009277, avg loss: 3.118291, ppl: 22.607708 +epoch: 0, batch: 23487, sum loss: 5910.044434, avg loss: 3.316523, ppl: 27.564352 +epoch: 0, batch: 23488, sum loss: 5114.281250, avg loss: 3.035182, ppl: 20.804760 +epoch: 0, batch: 23489, sum loss: 6078.551758, avg loss: 3.199238, ppl: 24.513845 +epoch: 0, batch: 23490, sum loss: 5552.685059, avg loss: 3.285612, ppl: 26.725344 +epoch: 0, batch: 23491, sum loss: 4534.849609, avg loss: 2.962018, ppl: 19.336960 +epoch: 0, batch: 23492, sum loss: 4696.781250, avg loss: 2.955809, ppl: 19.217270 +epoch: 0, batch: 23493, sum loss: 5245.590820, avg loss: 3.196582, ppl: 24.448816 +epoch: 0, batch: 23494, sum loss: 6694.402344, avg loss: 3.357273, ppl: 28.710793 +epoch: 0, batch: 23495, sum loss: 6260.828125, avg loss: 3.391565, ppl: 29.712404 +epoch: 0, batch: 23496, sum loss: 4334.517578, avg loss: 2.909072, ppl: 18.339779 +epoch: 0, batch: 23497, sum loss: 5090.262207, avg loss: 3.111407, ppl: 22.452620 +epoch: 0, batch: 23498, sum loss: 4190.454590, avg loss: 2.665684, ppl: 14.377773 +epoch: 0, batch: 23499, sum loss: 5747.908203, avg loss: 2.944625, ppl: 19.003542 +epoch: 0, batch: 23500, sum loss: 6156.988281, avg loss: 3.466773, ppl: 32.033199 +epoch: 0, batch: 23501, sum loss: 5603.103027, avg loss: 3.309571, ppl: 27.373375 +epoch: 0, batch: 23502, sum loss: 4907.044434, avg loss: 3.188463, ppl: 24.251123 +epoch: 0, batch: 23503, sum loss: 4342.110352, avg loss: 2.785190, ppl: 16.202890 +epoch: 0, batch: 23504, sum loss: 6184.961426, avg loss: 3.296888, ppl: 27.028393 +epoch: 0, batch: 23505, sum loss: 5719.311035, avg loss: 3.111703, ppl: 22.459269 +epoch: 0, batch: 23506, sum loss: 7382.245605, avg loss: 3.665465, ppl: 39.074287 +epoch: 0, batch: 23507, sum loss: 5825.127930, avg loss: 3.319161, ppl: 27.637157 +epoch: 0, batch: 23508, sum loss: 4941.917480, avg loss: 3.198652, ppl: 24.499489 +epoch: 0, batch: 23509, sum loss: 4490.294922, avg loss: 2.954142, ppl: 19.185247 +epoch: 0, batch: 23510, sum loss: 6349.830078, avg loss: 3.229822, ppl: 25.275162 +epoch: 0, batch: 23511, sum loss: 6726.188477, avg loss: 3.383394, ppl: 29.470638 +epoch: 0, batch: 23512, sum loss: 5451.329590, avg loss: 3.023477, ppl: 20.562670 +epoch: 0, batch: 23513, sum loss: 5032.860840, avg loss: 2.811654, ppl: 16.637415 +epoch: 0, batch: 23514, sum loss: 4715.225586, avg loss: 3.089925, ppl: 21.975431 +epoch: 0, batch: 23515, sum loss: 4591.449219, avg loss: 3.065053, ppl: 21.435593 +epoch: 0, batch: 23516, sum loss: 5332.326172, avg loss: 3.181579, ppl: 24.084751 +epoch: 0, batch: 23517, sum loss: 6044.772949, avg loss: 3.363814, ppl: 28.899193 +epoch: 0, batch: 23518, sum loss: 4425.016113, avg loss: 2.915030, ppl: 18.449375 +epoch: 0, batch: 23519, sum loss: 4523.672363, avg loss: 2.985922, ppl: 19.804760 +epoch: 0, batch: 23520, sum loss: 4196.003906, avg loss: 2.731773, ppl: 15.360102 +epoch: 0, batch: 23521, sum loss: 5376.168945, avg loss: 3.181165, ppl: 24.074783 +epoch: 0, batch: 23522, sum loss: 4316.909180, avg loss: 3.137289, ppl: 23.041307 +epoch: 0, batch: 23523, sum loss: 6261.870117, avg loss: 3.168963, ppl: 23.782803 +epoch: 0, batch: 23524, sum loss: 6344.185547, avg loss: 3.487733, ppl: 32.711693 +epoch: 0, batch: 23525, sum loss: 4704.479004, avg loss: 2.830613, ppl: 16.955854 +epoch: 0, batch: 23526, sum loss: 5729.487793, avg loss: 3.236999, ppl: 25.457205 +epoch: 0, batch: 23527, sum loss: 4281.280273, avg loss: 2.940440, ppl: 18.924170 +epoch: 0, batch: 23528, sum loss: 4682.911621, avg loss: 2.869431, ppl: 17.626989 +epoch: 0, batch: 23529, sum loss: 6345.910156, avg loss: 3.432077, ppl: 30.940838 +epoch: 0, batch: 23530, sum loss: 5458.833008, avg loss: 3.316424, ppl: 27.561604 +epoch: 0, batch: 23531, sum loss: 4424.252930, avg loss: 3.120066, ppl: 22.647867 +epoch: 0, batch: 23532, sum loss: 5724.500977, avg loss: 3.288054, ppl: 26.790665 +epoch: 0, batch: 23533, sum loss: 5747.229980, avg loss: 3.293541, ppl: 26.938095 +epoch: 0, batch: 23534, sum loss: 4990.295410, avg loss: 3.037307, ppl: 20.849016 +epoch: 0, batch: 23535, sum loss: 4673.193848, avg loss: 2.933581, ppl: 18.794807 +epoch: 0, batch: 23536, sum loss: 5149.716797, avg loss: 3.246984, ppl: 25.712673 +epoch: 0, batch: 23537, sum loss: 4784.478516, avg loss: 3.074858, ppl: 21.646801 +epoch: 0, batch: 23538, sum loss: 5400.282227, avg loss: 2.906503, ppl: 18.292711 +epoch: 0, batch: 23539, sum loss: 4406.262695, avg loss: 3.005636, ppl: 20.199062 +epoch: 0, batch: 23540, sum loss: 5386.545898, avg loss: 3.113610, ppl: 22.502136 +epoch: 0, batch: 23541, sum loss: 5507.017578, avg loss: 3.386850, ppl: 29.572641 +epoch: 0, batch: 23542, sum loss: 4814.997559, avg loss: 3.037853, ppl: 20.860413 +epoch: 0, batch: 23543, sum loss: 4791.397461, avg loss: 2.961309, ppl: 19.323244 +epoch: 0, batch: 23544, sum loss: 5090.188477, avg loss: 3.071930, ppl: 21.583523 +epoch: 0, batch: 23545, sum loss: 5613.111328, avg loss: 3.034114, ppl: 20.782564 +epoch: 0, batch: 23546, sum loss: 6022.884766, avg loss: 3.095008, ppl: 22.087408 +epoch: 0, batch: 23547, sum loss: 5457.158203, avg loss: 3.028390, ppl: 20.663931 +epoch: 0, batch: 23548, sum loss: 5876.115234, avg loss: 3.323595, ppl: 27.759964 +epoch: 0, batch: 23549, sum loss: 6400.138672, avg loss: 3.237298, ppl: 25.464828 +epoch: 0, batch: 23550, sum loss: 6455.416992, avg loss: 3.170637, ppl: 23.822659 +epoch: 0, batch: 23551, sum loss: 5038.778320, avg loss: 3.149236, ppl: 23.318253 +epoch: 0, batch: 23552, sum loss: 5341.563477, avg loss: 2.818767, ppl: 16.756178 +epoch: 0, batch: 23553, sum loss: 4312.212891, avg loss: 2.886354, ppl: 17.927824 +epoch: 0, batch: 23554, sum loss: 4531.828125, avg loss: 3.043538, ppl: 20.979340 +epoch: 0, batch: 23555, sum loss: 5662.661621, avg loss: 2.991369, ppl: 19.912926 +epoch: 0, batch: 23556, sum loss: 5636.361328, avg loss: 3.453653, ppl: 31.615669 +epoch: 0, batch: 23557, sum loss: 5636.871094, avg loss: 3.221069, ppl: 25.054892 +epoch: 0, batch: 23558, sum loss: 5082.340820, avg loss: 3.065344, ppl: 21.441843 +epoch: 0, batch: 23559, sum loss: 5052.876465, avg loss: 3.160023, ppl: 23.571133 +epoch: 0, batch: 23560, sum loss: 5530.795410, avg loss: 3.208118, ppl: 24.732500 +epoch: 0, batch: 23561, sum loss: 5481.562500, avg loss: 3.381593, ppl: 29.417595 +epoch: 0, batch: 23562, sum loss: 5503.424316, avg loss: 3.090075, ppl: 21.978737 +epoch: 0, batch: 23563, sum loss: 5178.944824, avg loss: 3.163681, ppl: 23.657509 +epoch: 0, batch: 23564, sum loss: 5817.231934, avg loss: 3.292152, ppl: 26.900682 +epoch: 0, batch: 23565, sum loss: 4036.199951, avg loss: 2.931155, ppl: 18.749268 +epoch: 0, batch: 23566, sum loss: 6257.013184, avg loss: 3.308838, ppl: 27.353333 +epoch: 0, batch: 23567, sum loss: 5898.562500, avg loss: 3.255277, ppl: 25.926805 +epoch: 0, batch: 23568, sum loss: 5225.654785, avg loss: 3.017122, ppl: 20.432398 +epoch: 0, batch: 23569, sum loss: 5692.907715, avg loss: 3.117693, ppl: 22.594198 +epoch: 0, batch: 23570, sum loss: 5893.625977, avg loss: 3.256147, ppl: 25.949366 +epoch: 0, batch: 23571, sum loss: 6243.184082, avg loss: 3.389351, ppl: 29.646702 +epoch: 0, batch: 23572, sum loss: 6465.627930, avg loss: 3.348331, ppl: 28.455215 +epoch: 0, batch: 23573, sum loss: 4708.730957, avg loss: 2.983987, ppl: 19.766460 +epoch: 0, batch: 23574, sum loss: 5558.311523, avg loss: 3.296745, ppl: 27.024527 +epoch: 0, batch: 23575, sum loss: 4693.621094, avg loss: 2.959408, ppl: 19.286552 +epoch: 0, batch: 23576, sum loss: 5554.119141, avg loss: 3.040021, ppl: 20.905691 +epoch: 0, batch: 23577, sum loss: 5594.394043, avg loss: 3.292757, ppl: 26.916973 +epoch: 0, batch: 23578, sum loss: 5845.974609, avg loss: 3.513206, ppl: 33.555676 +epoch: 0, batch: 23579, sum loss: 5186.114258, avg loss: 2.994292, ppl: 19.971220 +epoch: 0, batch: 23580, sum loss: 5703.897949, avg loss: 3.297051, ppl: 27.032793 +epoch: 0, batch: 23581, sum loss: 5099.628906, avg loss: 3.053670, ppl: 21.192984 +epoch: 0, batch: 23582, sum loss: 4804.866699, avg loss: 3.161097, ppl: 23.596457 +epoch: 0, batch: 23583, sum loss: 5748.305664, avg loss: 3.182894, ppl: 24.116434 +epoch: 0, batch: 23584, sum loss: 6240.265625, avg loss: 3.413712, ppl: 30.377798 +epoch: 0, batch: 23585, sum loss: 5815.916016, avg loss: 3.291407, ppl: 26.880655 +epoch: 0, batch: 23586, sum loss: 5298.764648, avg loss: 3.180531, ppl: 24.059521 +epoch: 0, batch: 23587, sum loss: 5231.000488, avg loss: 3.151205, ppl: 23.364202 +epoch: 0, batch: 23588, sum loss: 4730.352539, avg loss: 3.069664, ppl: 21.534672 +epoch: 0, batch: 23589, sum loss: 4630.389160, avg loss: 2.904886, ppl: 18.263170 +epoch: 0, batch: 23590, sum loss: 4059.701172, avg loss: 2.862977, ppl: 17.513588 +epoch: 0, batch: 23591, sum loss: 4567.346191, avg loss: 2.763065, ppl: 15.848342 +epoch: 0, batch: 23592, sum loss: 5392.956055, avg loss: 3.119119, ppl: 22.626431 +epoch: 0, batch: 23593, sum loss: 5607.304199, avg loss: 3.127331, ppl: 22.813011 +epoch: 0, batch: 23594, sum loss: 5235.579590, avg loss: 3.208076, ppl: 24.731451 +epoch: 0, batch: 23595, sum loss: 5758.421387, avg loss: 3.177937, ppl: 23.997190 +epoch: 0, batch: 23596, sum loss: 5480.386719, avg loss: 2.878355, ppl: 17.784985 +epoch: 0, batch: 23597, sum loss: 4635.218750, avg loss: 3.015757, ppl: 20.404537 +epoch: 0, batch: 23598, sum loss: 4444.629395, avg loss: 2.829172, ppl: 16.931438 +epoch: 0, batch: 23599, sum loss: 5681.726074, avg loss: 3.096309, ppl: 22.116163 +epoch: 0, batch: 23600, sum loss: 6037.785645, avg loss: 3.181131, ppl: 24.073957 +epoch: 0, batch: 23601, sum loss: 6130.406250, avg loss: 3.420986, ppl: 30.599562 +epoch: 0, batch: 23602, sum loss: 5920.642578, avg loss: 3.452270, ppl: 31.571966 +epoch: 0, batch: 23603, sum loss: 4978.653320, avg loss: 3.080850, ppl: 21.776903 +epoch: 0, batch: 23604, sum loss: 4760.202637, avg loss: 3.079044, ppl: 21.737619 +epoch: 0, batch: 23605, sum loss: 4931.687012, avg loss: 3.200316, ppl: 24.540283 +epoch: 0, batch: 23606, sum loss: 4514.286133, avg loss: 2.991575, ppl: 19.917023 +epoch: 0, batch: 23607, sum loss: 5460.373535, avg loss: 3.050488, ppl: 21.125650 +epoch: 0, batch: 23608, sum loss: 5525.341309, avg loss: 3.265568, ppl: 26.194992 +epoch: 0, batch: 23609, sum loss: 4040.519043, avg loss: 2.917342, ppl: 18.492079 +epoch: 0, batch: 23610, sum loss: 4498.160156, avg loss: 3.031105, ppl: 20.720116 +epoch: 0, batch: 23611, sum loss: 6952.151367, avg loss: 3.353667, ppl: 28.607439 +epoch: 0, batch: 23612, sum loss: 4436.936035, avg loss: 3.137861, ppl: 23.054512 +epoch: 0, batch: 23613, sum loss: 4539.981445, avg loss: 2.744850, ppl: 15.562278 +epoch: 0, batch: 23614, sum loss: 6219.713379, avg loss: 3.338547, ppl: 28.178154 +epoch: 0, batch: 23615, sum loss: 4916.111816, avg loss: 3.143294, ppl: 23.180094 +epoch: 0, batch: 23616, sum loss: 4823.613770, avg loss: 3.076284, ppl: 21.677702 +epoch: 0, batch: 23617, sum loss: 5535.693848, avg loss: 3.212823, ppl: 24.849134 +epoch: 0, batch: 23618, sum loss: 5114.997070, avg loss: 3.208907, ppl: 24.752008 +epoch: 0, batch: 23619, sum loss: 5713.267578, avg loss: 3.246175, ppl: 25.691870 +epoch: 0, batch: 23620, sum loss: 6009.264160, avg loss: 3.312715, ppl: 27.459566 +epoch: 0, batch: 23621, sum loss: 3732.360840, avg loss: 2.624726, ppl: 13.800797 +epoch: 0, batch: 23622, sum loss: 4916.852539, avg loss: 3.016474, ppl: 20.419167 +epoch: 0, batch: 23623, sum loss: 5479.520508, avg loss: 3.160046, ppl: 23.571688 +epoch: 0, batch: 23624, sum loss: 4735.511719, avg loss: 2.952314, ppl: 19.150219 +epoch: 0, batch: 23625, sum loss: 4228.492188, avg loss: 2.824644, ppl: 16.854940 +epoch: 0, batch: 23626, sum loss: 5531.495605, avg loss: 3.259573, ppl: 26.038422 +epoch: 0, batch: 23627, sum loss: 5451.842773, avg loss: 3.049129, ppl: 21.096962 +epoch: 0, batch: 23628, sum loss: 5044.017578, avg loss: 3.053280, ppl: 21.184710 +epoch: 0, batch: 23629, sum loss: 4996.210938, avg loss: 3.276204, ppl: 26.475079 +epoch: 0, batch: 23630, sum loss: 4836.939941, avg loss: 2.799155, ppl: 16.430758 +epoch: 0, batch: 23631, sum loss: 5064.698730, avg loss: 3.173370, ppl: 23.887854 +epoch: 0, batch: 23632, sum loss: 6280.028320, avg loss: 3.301803, ppl: 27.161556 +epoch: 0, batch: 23633, sum loss: 5226.146484, avg loss: 3.373884, ppl: 29.191694 +epoch: 0, batch: 23634, sum loss: 4980.201172, avg loss: 3.059092, ppl: 21.308191 +epoch: 0, batch: 23635, sum loss: 4581.349609, avg loss: 3.006135, ppl: 20.209141 +epoch: 0, batch: 23636, sum loss: 4808.218262, avg loss: 3.039329, ppl: 20.891212 +epoch: 0, batch: 23637, sum loss: 5410.122070, avg loss: 3.265010, ppl: 26.180376 +epoch: 0, batch: 23638, sum loss: 6567.806641, avg loss: 3.517840, ppl: 33.711521 +epoch: 0, batch: 23639, sum loss: 5127.462402, avg loss: 3.336020, ppl: 28.107038 +epoch: 0, batch: 23640, sum loss: 4962.350098, avg loss: 3.040656, ppl: 20.918959 +epoch: 0, batch: 23641, sum loss: 6698.794434, avg loss: 3.283723, ppl: 26.674889 +epoch: 0, batch: 23642, sum loss: 5699.916016, avg loss: 3.102840, ppl: 22.261076 +epoch: 0, batch: 23643, sum loss: 4713.133301, avg loss: 2.792141, ppl: 16.315910 +epoch: 0, batch: 23644, sum loss: 5658.646973, avg loss: 3.270894, ppl: 26.334873 +epoch: 0, batch: 23645, sum loss: 4932.323242, avg loss: 3.052180, ppl: 21.161432 +epoch: 0, batch: 23646, sum loss: 6660.167480, avg loss: 3.396312, ppl: 29.853796 +epoch: 0, batch: 23647, sum loss: 5212.094727, avg loss: 3.060537, ppl: 21.339016 +epoch: 0, batch: 23648, sum loss: 5946.877441, avg loss: 3.461512, ppl: 31.865116 +epoch: 0, batch: 23649, sum loss: 4876.249512, avg loss: 3.006319, ppl: 20.212860 +epoch: 0, batch: 23650, sum loss: 5411.670898, avg loss: 3.071323, ppl: 21.570419 +epoch: 0, batch: 23651, sum loss: 4250.993164, avg loss: 2.843474, ppl: 17.175323 +epoch: 0, batch: 23652, sum loss: 6233.989258, avg loss: 3.215054, ppl: 24.904633 +epoch: 0, batch: 23653, sum loss: 5518.648438, avg loss: 3.201072, ppl: 24.558855 +epoch: 0, batch: 23654, sum loss: 5090.413574, avg loss: 3.177536, ppl: 23.987587 +epoch: 0, batch: 23655, sum loss: 5783.425293, avg loss: 3.104361, ppl: 22.294975 +epoch: 0, batch: 23656, sum loss: 6195.156250, avg loss: 3.402063, ppl: 30.025976 +epoch: 0, batch: 23657, sum loss: 4495.830566, avg loss: 2.771782, ppl: 15.987097 +epoch: 0, batch: 23658, sum loss: 6601.214844, avg loss: 3.425643, ppl: 30.742420 +epoch: 0, batch: 23659, sum loss: 4095.520264, avg loss: 2.836233, ppl: 17.051411 +epoch: 0, batch: 23660, sum loss: 4490.427734, avg loss: 2.780451, ppl: 16.126286 +epoch: 0, batch: 23661, sum loss: 4667.633789, avg loss: 2.928252, ppl: 18.694923 +epoch: 0, batch: 23662, sum loss: 4592.193359, avg loss: 2.997515, ppl: 20.035690 +epoch: 0, batch: 23663, sum loss: 4383.000977, avg loss: 2.781092, ppl: 16.136631 +epoch: 0, batch: 23664, sum loss: 4337.664062, avg loss: 2.868826, ppl: 17.616320 +epoch: 0, batch: 23665, sum loss: 3937.369629, avg loss: 2.704237, ppl: 14.942918 +epoch: 0, batch: 23666, sum loss: 5904.342773, avg loss: 3.370059, ppl: 29.080236 +epoch: 0, batch: 23667, sum loss: 4943.069336, avg loss: 3.043762, ppl: 20.984035 +epoch: 0, batch: 23668, sum loss: 5149.219238, avg loss: 3.275585, ppl: 26.458691 +epoch: 0, batch: 23669, sum loss: 5859.679199, avg loss: 3.174258, ppl: 23.909061 +epoch: 0, batch: 23670, sum loss: 4928.481934, avg loss: 3.089957, ppl: 21.976143 +epoch: 0, batch: 23671, sum loss: 4881.799805, avg loss: 3.111408, ppl: 22.452646 +epoch: 0, batch: 23672, sum loss: 4980.265625, avg loss: 3.138164, ppl: 23.061483 +epoch: 0, batch: 23673, sum loss: 5012.353516, avg loss: 3.192582, ppl: 24.351219 +epoch: 0, batch: 23674, sum loss: 4943.189941, avg loss: 3.282331, ppl: 26.637787 +epoch: 0, batch: 23675, sum loss: 4176.665527, avg loss: 2.758696, ppl: 15.779251 +epoch: 0, batch: 23676, sum loss: 5444.443359, avg loss: 3.256246, ppl: 25.951939 +epoch: 0, batch: 23677, sum loss: 5820.955566, avg loss: 3.121156, ppl: 22.672567 +epoch: 0, batch: 23678, sum loss: 5091.478027, avg loss: 2.853968, ppl: 17.356510 +epoch: 0, batch: 23679, sum loss: 4963.484375, avg loss: 2.984657, ppl: 19.779713 +epoch: 0, batch: 23680, sum loss: 5271.822754, avg loss: 3.123118, ppl: 22.717100 +epoch: 0, batch: 23681, sum loss: 5091.799805, avg loss: 2.962071, ppl: 19.337978 +epoch: 0, batch: 23682, sum loss: 4649.965332, avg loss: 3.041181, ppl: 20.929943 +epoch: 0, batch: 23683, sum loss: 5683.989258, avg loss: 3.136859, ppl: 23.031422 +epoch: 0, batch: 23684, sum loss: 4984.615234, avg loss: 3.123193, ppl: 22.718796 +epoch: 0, batch: 23685, sum loss: 4836.129883, avg loss: 3.092155, ppl: 22.024485 +epoch: 0, batch: 23686, sum loss: 4488.855469, avg loss: 2.909174, ppl: 18.341646 +epoch: 0, batch: 23687, sum loss: 4722.991699, avg loss: 3.123672, ppl: 22.729691 +epoch: 0, batch: 23688, sum loss: 5548.229980, avg loss: 3.122245, ppl: 22.697285 +epoch: 0, batch: 23689, sum loss: 4774.457520, avg loss: 2.864102, ppl: 17.533295 +epoch: 0, batch: 23690, sum loss: 5666.843262, avg loss: 3.240048, ppl: 25.534939 +epoch: 0, batch: 23691, sum loss: 5344.280273, avg loss: 3.192521, ppl: 24.349745 +epoch: 0, batch: 23692, sum loss: 5237.672363, avg loss: 3.128837, ppl: 22.847383 +epoch: 0, batch: 23693, sum loss: 4412.850098, avg loss: 2.928235, ppl: 18.694607 +epoch: 0, batch: 23694, sum loss: 4012.754639, avg loss: 2.746581, ppl: 15.589238 +epoch: 0, batch: 23695, sum loss: 4712.054199, avg loss: 2.752368, ppl: 15.679717 +epoch: 0, batch: 23696, sum loss: 4704.059082, avg loss: 3.098853, ppl: 22.172508 +epoch: 0, batch: 23697, sum loss: 6199.723633, avg loss: 3.333185, ppl: 28.027460 +epoch: 0, batch: 23698, sum loss: 5550.258301, avg loss: 3.299797, ppl: 27.107130 +epoch: 0, batch: 23699, sum loss: 4663.107910, avg loss: 2.936466, ppl: 18.849115 +epoch: 0, batch: 23700, sum loss: 4957.843750, avg loss: 3.010227, ppl: 20.292000 +epoch: 0, batch: 23701, sum loss: 5698.538086, avg loss: 3.136235, ppl: 23.017033 +epoch: 0, batch: 23702, sum loss: 4360.676758, avg loss: 3.137177, ppl: 23.038748 +epoch: 0, batch: 23703, sum loss: 5322.655273, avg loss: 3.214164, ppl: 24.882477 +epoch: 0, batch: 23704, sum loss: 5807.654297, avg loss: 3.239071, ppl: 25.510015 +epoch: 0, batch: 23705, sum loss: 4660.354980, avg loss: 3.151017, ppl: 23.359819 +epoch: 0, batch: 23706, sum loss: 4686.234375, avg loss: 3.011719, ppl: 20.322298 +epoch: 0, batch: 23707, sum loss: 5741.133789, avg loss: 3.288164, ppl: 26.793623 +epoch: 0, batch: 23708, sum loss: 5363.432129, avg loss: 3.207794, ppl: 24.724495 +epoch: 0, batch: 23709, sum loss: 5652.342285, avg loss: 3.037261, ppl: 20.848057 +epoch: 0, batch: 23710, sum loss: 6089.960449, avg loss: 3.215396, ppl: 24.913166 +epoch: 0, batch: 23711, sum loss: 5687.911621, avg loss: 3.158196, ppl: 23.528118 +epoch: 0, batch: 23712, sum loss: 4918.274414, avg loss: 3.168991, ppl: 23.783482 +epoch: 0, batch: 23713, sum loss: 5316.975098, avg loss: 3.140564, ppl: 23.116905 +epoch: 0, batch: 23714, sum loss: 5796.747070, avg loss: 3.301109, ppl: 27.142719 +epoch: 0, batch: 23715, sum loss: 4794.078613, avg loss: 2.828365, ppl: 16.917774 +epoch: 0, batch: 23716, sum loss: 4069.366211, avg loss: 2.744010, ppl: 15.549209 +epoch: 0, batch: 23717, sum loss: 5504.024414, avg loss: 3.284024, ppl: 26.682936 +epoch: 0, batch: 23718, sum loss: 4488.916016, avg loss: 3.165667, ppl: 23.704557 +epoch: 0, batch: 23719, sum loss: 6335.279785, avg loss: 3.398755, ppl: 29.926826 +epoch: 0, batch: 23720, sum loss: 5714.593262, avg loss: 3.169492, ppl: 23.795382 +epoch: 0, batch: 23721, sum loss: 4287.021484, avg loss: 2.952494, ppl: 19.153666 +epoch: 0, batch: 23722, sum loss: 5062.076172, avg loss: 2.977692, ppl: 19.642431 +epoch: 0, batch: 23723, sum loss: 5909.126465, avg loss: 3.259309, ppl: 26.031538 +epoch: 0, batch: 23724, sum loss: 5524.131836, avg loss: 3.145861, ppl: 23.239679 +epoch: 0, batch: 23725, sum loss: 4746.449707, avg loss: 3.068164, ppl: 21.502390 +epoch: 0, batch: 23726, sum loss: 6103.924805, avg loss: 3.243318, ppl: 25.618586 +epoch: 0, batch: 23727, sum loss: 5287.708008, avg loss: 3.038913, ppl: 20.882523 +epoch: 0, batch: 23728, sum loss: 5303.143066, avg loss: 3.171736, ppl: 23.848862 +epoch: 0, batch: 23729, sum loss: 6696.008789, avg loss: 3.363138, ppl: 28.879686 +epoch: 0, batch: 23730, sum loss: 5930.434570, avg loss: 3.433952, ppl: 30.998903 +epoch: 0, batch: 23731, sum loss: 5717.180664, avg loss: 3.211899, ppl: 24.826193 +epoch: 0, batch: 23732, sum loss: 5281.290527, avg loss: 3.278269, ppl: 26.529799 +epoch: 0, batch: 23733, sum loss: 5805.123047, avg loss: 3.221489, ppl: 25.065414 +epoch: 0, batch: 23734, sum loss: 5631.986816, avg loss: 3.233058, ppl: 25.357075 +epoch: 0, batch: 23735, sum loss: 5620.833496, avg loss: 3.260345, ppl: 26.058514 +epoch: 0, batch: 23736, sum loss: 4881.674805, avg loss: 2.893702, ppl: 18.060041 +epoch: 0, batch: 23737, sum loss: 5648.065430, avg loss: 3.178427, ppl: 24.008963 +epoch: 0, batch: 23738, sum loss: 4917.516602, avg loss: 2.865686, ppl: 17.561090 +epoch: 0, batch: 23739, sum loss: 5439.185547, avg loss: 3.008399, ppl: 20.254946 +epoch: 0, batch: 23740, sum loss: 5130.221191, avg loss: 3.145445, ppl: 23.230019 +epoch: 0, batch: 23741, sum loss: 5410.580078, avg loss: 3.237930, ppl: 25.480911 +epoch: 0, batch: 23742, sum loss: 5045.266602, avg loss: 2.962576, ppl: 19.347746 +epoch: 0, batch: 23743, sum loss: 4718.162109, avg loss: 2.993758, ppl: 19.960548 +epoch: 0, batch: 23744, sum loss: 5556.330078, avg loss: 3.198808, ppl: 24.503315 +epoch: 0, batch: 23745, sum loss: 5054.277344, avg loss: 2.884862, ppl: 17.901093 +epoch: 0, batch: 23746, sum loss: 5358.299805, avg loss: 2.975180, ppl: 19.593153 +epoch: 0, batch: 23747, sum loss: 4944.330566, avg loss: 3.078661, ppl: 21.729298 +epoch: 0, batch: 23748, sum loss: 4615.609375, avg loss: 3.058721, ppl: 21.300293 +epoch: 0, batch: 23749, sum loss: 5175.321289, avg loss: 2.984614, ppl: 19.778870 +epoch: 0, batch: 23750, sum loss: 5654.360352, avg loss: 3.238465, ppl: 25.494566 +epoch: 0, batch: 23751, sum loss: 6391.750000, avg loss: 3.251144, ppl: 25.819872 +epoch: 0, batch: 23752, sum loss: 5400.569824, avg loss: 3.150858, ppl: 23.356089 +epoch: 0, batch: 23753, sum loss: 5743.954590, avg loss: 3.322125, ppl: 27.719204 +epoch: 0, batch: 23754, sum loss: 6186.750000, avg loss: 3.342383, ppl: 28.286444 +epoch: 0, batch: 23755, sum loss: 4211.468262, avg loss: 2.834097, ppl: 17.015032 +epoch: 0, batch: 23756, sum loss: 5068.563965, avg loss: 3.073720, ppl: 21.622183 +epoch: 0, batch: 23757, sum loss: 5080.109375, avg loss: 2.986543, ppl: 19.817051 +epoch: 0, batch: 23758, sum loss: 4738.301758, avg loss: 3.158868, ppl: 23.543924 +epoch: 0, batch: 23759, sum loss: 4645.015625, avg loss: 3.162025, ppl: 23.618385 +epoch: 0, batch: 23760, sum loss: 4415.305176, avg loss: 3.201817, ppl: 24.577135 +epoch: 0, batch: 23761, sum loss: 4827.191406, avg loss: 3.035969, ppl: 20.821154 +epoch: 0, batch: 23762, sum loss: 4991.893066, avg loss: 3.058758, ppl: 21.301085 +epoch: 0, batch: 23763, sum loss: 6426.718262, avg loss: 3.187856, ppl: 24.236420 +epoch: 0, batch: 23764, sum loss: 4905.670898, avg loss: 3.009614, ppl: 20.279570 +epoch: 0, batch: 23765, sum loss: 6005.633301, avg loss: 3.398774, ppl: 29.927383 +epoch: 0, batch: 23766, sum loss: 6106.753906, avg loss: 3.265644, ppl: 26.196972 +epoch: 0, batch: 23767, sum loss: 5196.836914, avg loss: 3.046211, ppl: 21.035500 +epoch: 0, batch: 23768, sum loss: 5891.638672, avg loss: 3.287745, ppl: 26.782402 +epoch: 0, batch: 23769, sum loss: 6734.827637, avg loss: 3.097897, ppl: 22.151314 +epoch: 0, batch: 23770, sum loss: 5441.962402, avg loss: 3.212492, ppl: 24.840925 +epoch: 0, batch: 23771, sum loss: 5544.915039, avg loss: 3.294661, ppl: 26.968277 +epoch: 0, batch: 23772, sum loss: 5235.809570, avg loss: 3.131465, ppl: 22.907518 +epoch: 0, batch: 23773, sum loss: 5032.173828, avg loss: 3.201128, ppl: 24.560230 +epoch: 0, batch: 23774, sum loss: 5343.184082, avg loss: 3.124669, ppl: 22.752365 +epoch: 0, batch: 23775, sum loss: 4449.262207, avg loss: 3.355402, ppl: 28.657108 +epoch: 0, batch: 23776, sum loss: 5146.556641, avg loss: 3.016739, ppl: 20.424576 +epoch: 0, batch: 23777, sum loss: 5220.346191, avg loss: 2.929487, ppl: 18.718029 +epoch: 0, batch: 23778, sum loss: 4687.129395, avg loss: 3.093815, ppl: 22.061077 +epoch: 0, batch: 23779, sum loss: 5610.305664, avg loss: 3.277048, ppl: 26.497427 +epoch: 0, batch: 23780, sum loss: 5256.192871, avg loss: 3.070206, ppl: 21.546339 +epoch: 0, batch: 23781, sum loss: 5487.555664, avg loss: 3.133955, ppl: 22.964636 +epoch: 0, batch: 23782, sum loss: 5146.554199, avg loss: 3.442511, ppl: 31.265369 +epoch: 0, batch: 23783, sum loss: 4411.695312, avg loss: 2.952942, ppl: 19.162245 +epoch: 0, batch: 23784, sum loss: 5047.455078, avg loss: 3.283966, ppl: 26.681370 +epoch: 0, batch: 23785, sum loss: 4761.945312, avg loss: 2.921439, ppl: 18.567986 +epoch: 0, batch: 23786, sum loss: 6120.813477, avg loss: 3.430949, ppl: 30.905973 +epoch: 0, batch: 23787, sum loss: 5174.756836, avg loss: 3.321410, ppl: 27.699385 +epoch: 0, batch: 23788, sum loss: 5900.050293, avg loss: 3.288768, ppl: 26.809822 +epoch: 0, batch: 23789, sum loss: 5223.026855, avg loss: 3.026087, ppl: 20.616409 +epoch: 0, batch: 23790, sum loss: 5823.037109, avg loss: 3.253093, ppl: 25.870239 +epoch: 0, batch: 23791, sum loss: 6259.025391, avg loss: 3.304660, ppl: 27.239275 +epoch: 0, batch: 23792, sum loss: 4851.953125, avg loss: 3.076698, ppl: 21.686682 +epoch: 0, batch: 23793, sum loss: 4896.763672, avg loss: 2.861931, ppl: 17.495279 +epoch: 0, batch: 23794, sum loss: 6371.353027, avg loss: 3.265686, ppl: 26.198078 +epoch: 0, batch: 23795, sum loss: 6015.295898, avg loss: 3.276305, ppl: 26.477755 +epoch: 0, batch: 23796, sum loss: 4446.395020, avg loss: 2.814174, ppl: 16.679396 +epoch: 0, batch: 23797, sum loss: 4950.580078, avg loss: 2.953807, ppl: 19.178827 +epoch: 0, batch: 23798, sum loss: 4691.909668, avg loss: 2.965809, ppl: 19.410393 +epoch: 0, batch: 23799, sum loss: 6065.599121, avg loss: 3.276931, ppl: 26.494337 +epoch: 0, batch: 23800, sum loss: 5233.457520, avg loss: 2.950089, ppl: 19.107649 +epoch: 0, batch: 23801, sum loss: 4300.048340, avg loss: 2.897607, ppl: 18.130699 +epoch: 0, batch: 23802, sum loss: 5628.877930, avg loss: 3.209166, ppl: 24.758436 +epoch: 0, batch: 23803, sum loss: 4256.128418, avg loss: 2.949500, ppl: 19.096399 +epoch: 0, batch: 23804, sum loss: 5521.333984, avg loss: 3.108859, ppl: 22.395483 +epoch: 0, batch: 23805, sum loss: 6039.984863, avg loss: 3.059769, ppl: 21.322634 +epoch: 0, batch: 23806, sum loss: 5541.528809, avg loss: 3.234985, ppl: 25.405981 +epoch: 0, batch: 23807, sum loss: 5181.501953, avg loss: 2.915870, ppl: 18.464878 +epoch: 0, batch: 23808, sum loss: 5847.298340, avg loss: 3.080769, ppl: 21.775131 +epoch: 0, batch: 23809, sum loss: 4908.116699, avg loss: 3.122212, ppl: 22.696522 +epoch: 0, batch: 23810, sum loss: 4715.506836, avg loss: 3.092136, ppl: 22.024063 +epoch: 0, batch: 23811, sum loss: 5955.351562, avg loss: 3.338202, ppl: 28.168428 +epoch: 0, batch: 23812, sum loss: 6011.463379, avg loss: 3.303002, ppl: 27.194157 +epoch: 0, batch: 23813, sum loss: 4627.294922, avg loss: 3.032304, ppl: 20.744965 +epoch: 0, batch: 23814, sum loss: 5377.534180, avg loss: 3.243386, ppl: 25.620333 +epoch: 0, batch: 23815, sum loss: 4251.241211, avg loss: 2.937969, ppl: 18.877466 +epoch: 0, batch: 23816, sum loss: 4899.263184, avg loss: 2.863392, ppl: 17.520855 +epoch: 0, batch: 23817, sum loss: 6096.561035, avg loss: 3.004712, ppl: 20.180410 +epoch: 0, batch: 23818, sum loss: 5269.458984, avg loss: 3.299599, ppl: 27.101761 +epoch: 0, batch: 23819, sum loss: 4440.761230, avg loss: 2.866857, ppl: 17.581669 +epoch: 0, batch: 23820, sum loss: 5586.125000, avg loss: 3.323096, ppl: 27.746128 +epoch: 0, batch: 23821, sum loss: 5468.488770, avg loss: 3.318258, ppl: 27.612204 +epoch: 0, batch: 23822, sum loss: 4479.171387, avg loss: 3.172218, ppl: 23.860338 +epoch: 0, batch: 23823, sum loss: 5611.849609, avg loss: 3.138618, ppl: 23.071970 +epoch: 0, batch: 23824, sum loss: 5804.581055, avg loss: 3.164984, ppl: 23.688364 +epoch: 0, batch: 23825, sum loss: 5086.447266, avg loss: 3.229490, ppl: 25.266775 +epoch: 0, batch: 23826, sum loss: 5551.045410, avg loss: 3.184765, ppl: 24.161612 +epoch: 0, batch: 23827, sum loss: 5800.615723, avg loss: 3.231541, ppl: 25.318642 +epoch: 0, batch: 23828, sum loss: 4548.770508, avg loss: 2.807883, ppl: 16.574797 +epoch: 0, batch: 23829, sum loss: 5651.833984, avg loss: 3.268846, ppl: 26.280981 +epoch: 0, batch: 23830, sum loss: 5212.082031, avg loss: 3.030280, ppl: 20.703035 +epoch: 0, batch: 23831, sum loss: 4239.012695, avg loss: 2.909411, ppl: 18.345997 +epoch: 0, batch: 23832, sum loss: 5295.187500, avg loss: 3.024094, ppl: 20.575348 +epoch: 0, batch: 23833, sum loss: 5411.474121, avg loss: 3.170166, ppl: 23.811449 +epoch: 0, batch: 23834, sum loss: 4859.073242, avg loss: 3.122798, ppl: 22.709827 +epoch: 0, batch: 23835, sum loss: 4940.747559, avg loss: 3.087967, ppl: 21.932447 +epoch: 0, batch: 23836, sum loss: 5492.671387, avg loss: 3.306846, ppl: 27.298893 +epoch: 0, batch: 23837, sum loss: 4964.620117, avg loss: 3.202981, ppl: 24.605764 +epoch: 0, batch: 23838, sum loss: 5305.713379, avg loss: 3.047509, ppl: 21.062815 +epoch: 0, batch: 23839, sum loss: 4153.163574, avg loss: 2.754087, ppl: 15.706697 +epoch: 0, batch: 23840, sum loss: 6156.653809, avg loss: 3.306474, ppl: 27.288729 +epoch: 0, batch: 23841, sum loss: 4839.713379, avg loss: 3.254683, ppl: 25.911394 +epoch: 0, batch: 23842, sum loss: 4746.375488, avg loss: 3.114420, ppl: 22.520357 +epoch: 0, batch: 23843, sum loss: 5172.210938, avg loss: 3.067741, ppl: 21.493292 +epoch: 0, batch: 23844, sum loss: 5522.200195, avg loss: 3.006097, ppl: 20.208374 +epoch: 0, batch: 23845, sum loss: 5064.913574, avg loss: 3.193514, ppl: 24.373936 +epoch: 0, batch: 23846, sum loss: 6534.357422, avg loss: 3.222070, ppl: 25.079975 +epoch: 0, batch: 23847, sum loss: 7024.552246, avg loss: 3.214898, ppl: 24.900755 +epoch: 0, batch: 23848, sum loss: 5278.808105, avg loss: 3.023372, ppl: 20.560513 +epoch: 0, batch: 23849, sum loss: 5876.886719, avg loss: 3.168133, ppl: 23.763077 +epoch: 0, batch: 23850, sum loss: 6355.087402, avg loss: 3.366042, ppl: 28.963665 +epoch: 0, batch: 23851, sum loss: 5510.769531, avg loss: 3.315746, ppl: 27.542929 +epoch: 0, batch: 23852, sum loss: 5965.530762, avg loss: 3.146377, ppl: 23.251673 +epoch: 0, batch: 23853, sum loss: 5611.834961, avg loss: 2.972370, ppl: 19.538174 +epoch: 0, batch: 23854, sum loss: 5162.521484, avg loss: 3.138311, ppl: 23.064875 +epoch: 0, batch: 23855, sum loss: 5515.777832, avg loss: 3.055833, ppl: 21.238863 +epoch: 0, batch: 23856, sum loss: 4979.457031, avg loss: 3.145582, ppl: 23.233204 +epoch: 0, batch: 23857, sum loss: 5191.988770, avg loss: 3.081299, ppl: 21.786686 +epoch: 0, batch: 23858, sum loss: 5602.871094, avg loss: 3.121377, ppl: 22.677580 +epoch: 0, batch: 23859, sum loss: 4569.881836, avg loss: 2.874140, ppl: 17.710178 +epoch: 0, batch: 23860, sum loss: 5877.251953, avg loss: 3.098182, ppl: 22.157642 +epoch: 0, batch: 23861, sum loss: 6088.350586, avg loss: 3.537682, ppl: 34.387112 +epoch: 0, batch: 23862, sum loss: 7362.887207, avg loss: 3.641388, ppl: 38.144753 +epoch: 0, batch: 23863, sum loss: 5361.203613, avg loss: 3.223814, ppl: 25.123772 +epoch: 0, batch: 23864, sum loss: 4745.650391, avg loss: 3.081591, ppl: 21.793051 +epoch: 0, batch: 23865, sum loss: 5681.858887, avg loss: 2.959302, ppl: 19.284500 +epoch: 0, batch: 23866, sum loss: 6013.327148, avg loss: 3.122185, ppl: 22.695906 +epoch: 0, batch: 23867, sum loss: 6644.701660, avg loss: 3.615180, ppl: 37.158043 +epoch: 0, batch: 23868, sum loss: 5435.229492, avg loss: 3.033052, ppl: 20.760502 +epoch: 0, batch: 23869, sum loss: 5155.423828, avg loss: 3.070532, ppl: 21.553373 +epoch: 0, batch: 23870, sum loss: 5171.761230, avg loss: 2.998123, ppl: 20.047865 +epoch: 0, batch: 23871, sum loss: 5321.168945, avg loss: 3.232788, ppl: 25.350237 +epoch: 0, batch: 23872, sum loss: 5772.778320, avg loss: 3.276265, ppl: 26.476694 +epoch: 0, batch: 23873, sum loss: 6061.447266, avg loss: 3.363733, ppl: 28.896870 +epoch: 0, batch: 23874, sum loss: 5193.829102, avg loss: 3.104501, ppl: 22.298079 +epoch: 0, batch: 23875, sum loss: 5729.475586, avg loss: 3.321435, ppl: 27.700077 +epoch: 0, batch: 23876, sum loss: 5166.023926, avg loss: 3.206719, ppl: 24.697918 +epoch: 0, batch: 23877, sum loss: 4427.323242, avg loss: 2.983371, ppl: 19.754307 +epoch: 0, batch: 23878, sum loss: 3959.113037, avg loss: 2.831984, ppl: 16.979107 +epoch: 0, batch: 23879, sum loss: 4172.994629, avg loss: 2.819591, ppl: 16.769991 +epoch: 0, batch: 23880, sum loss: 4530.648926, avg loss: 2.824594, ppl: 16.854105 +epoch: 0, batch: 23881, sum loss: 4596.482422, avg loss: 3.004237, ppl: 20.170818 +epoch: 0, batch: 23882, sum loss: 4518.526855, avg loss: 3.267192, ppl: 26.237570 +epoch: 0, batch: 23883, sum loss: 4845.820312, avg loss: 2.929759, ppl: 18.723108 +epoch: 0, batch: 23884, sum loss: 6147.947266, avg loss: 3.182167, ppl: 24.098927 +epoch: 0, batch: 23885, sum loss: 4838.541016, avg loss: 2.999716, ppl: 20.079824 +epoch: 0, batch: 23886, sum loss: 5500.458984, avg loss: 3.207265, ppl: 24.711399 +epoch: 0, batch: 23887, sum loss: 5258.917969, avg loss: 3.150940, ppl: 23.358004 +epoch: 0, batch: 23888, sum loss: 5852.336914, avg loss: 3.392659, ppl: 29.744946 +epoch: 0, batch: 23889, sum loss: 5521.339844, avg loss: 3.117640, ppl: 22.592987 +epoch: 0, batch: 23890, sum loss: 5253.722656, avg loss: 3.172538, ppl: 23.867979 +epoch: 0, batch: 23891, sum loss: 5488.999023, avg loss: 3.024242, ppl: 20.578400 +epoch: 0, batch: 23892, sum loss: 5129.319336, avg loss: 3.207829, ppl: 24.725361 +epoch: 0, batch: 23893, sum loss: 5152.316895, avg loss: 3.013051, ppl: 20.349386 +epoch: 0, batch: 23894, sum loss: 5718.201660, avg loss: 3.336174, ppl: 28.111361 +epoch: 0, batch: 23895, sum loss: 6283.597168, avg loss: 3.305417, ppl: 27.259901 +epoch: 0, batch: 23896, sum loss: 5141.611816, avg loss: 3.420899, ppl: 30.596922 +epoch: 0, batch: 23897, sum loss: 4392.318848, avg loss: 3.095362, ppl: 22.095240 +epoch: 0, batch: 23898, sum loss: 5292.403809, avg loss: 3.277030, ppl: 26.496965 +epoch: 0, batch: 23899, sum loss: 5574.372070, avg loss: 3.128155, ppl: 22.831816 +epoch: 0, batch: 23900, sum loss: 5969.680664, avg loss: 3.283653, ppl: 26.673025 +epoch: 0, batch: 23901, sum loss: 5636.825195, avg loss: 3.230272, ppl: 25.286541 +epoch: 0, batch: 23902, sum loss: 5125.693848, avg loss: 3.108365, ppl: 22.384417 +epoch: 0, batch: 23903, sum loss: 5595.396973, avg loss: 3.260721, ppl: 26.068325 +epoch: 0, batch: 23904, sum loss: 5144.580078, avg loss: 2.989297, ppl: 19.871706 +epoch: 0, batch: 23905, sum loss: 4832.473145, avg loss: 2.888508, ppl: 17.966476 +epoch: 0, batch: 23906, sum loss: 6163.583496, avg loss: 3.464634, ppl: 31.964748 +epoch: 0, batch: 23907, sum loss: 5283.206055, avg loss: 3.124309, ppl: 22.744169 +epoch: 0, batch: 23908, sum loss: 4950.018066, avg loss: 2.896441, ppl: 18.109583 +epoch: 0, batch: 23909, sum loss: 5365.632324, avg loss: 2.977598, ppl: 19.640593 +epoch: 0, batch: 23910, sum loss: 5964.010254, avg loss: 3.322568, ppl: 27.731485 +epoch: 0, batch: 23911, sum loss: 5405.758789, avg loss: 3.166818, ppl: 23.731859 +epoch: 0, batch: 23912, sum loss: 5759.953613, avg loss: 3.268986, ppl: 26.284679 +epoch: 0, batch: 23913, sum loss: 5186.808594, avg loss: 3.130240, ppl: 22.879480 +epoch: 0, batch: 23914, sum loss: 4531.532715, avg loss: 2.880822, ppl: 17.828917 +epoch: 0, batch: 23915, sum loss: 4293.947266, avg loss: 2.879911, ppl: 17.812691 +epoch: 0, batch: 23916, sum loss: 5456.550293, avg loss: 3.283123, ppl: 26.658899 +epoch: 0, batch: 23917, sum loss: 5036.757812, avg loss: 3.088141, ppl: 21.936260 +epoch: 0, batch: 23918, sum loss: 4780.833984, avg loss: 3.022019, ppl: 20.532700 +epoch: 0, batch: 23919, sum loss: 5110.149414, avg loss: 3.138912, ppl: 23.078754 +epoch: 0, batch: 23920, sum loss: 5663.139160, avg loss: 3.281077, ppl: 26.604414 +epoch: 0, batch: 23921, sum loss: 5567.212402, avg loss: 3.037214, ppl: 20.847073 +epoch: 0, batch: 23922, sum loss: 4476.104004, avg loss: 2.976133, ppl: 19.611828 +epoch: 0, batch: 23923, sum loss: 5283.212891, avg loss: 3.261243, ppl: 26.081926 +epoch: 0, batch: 23924, sum loss: 6691.501953, avg loss: 3.479720, ppl: 32.450638 +epoch: 0, batch: 23925, sum loss: 4875.722656, avg loss: 2.797317, ppl: 16.400578 +epoch: 0, batch: 23926, sum loss: 4418.503418, avg loss: 2.843310, ppl: 17.172510 +epoch: 0, batch: 23927, sum loss: 5091.185547, avg loss: 2.989539, ppl: 19.876526 +epoch: 0, batch: 23928, sum loss: 6080.163086, avg loss: 3.180001, ppl: 24.046789 +epoch: 0, batch: 23929, sum loss: 5753.278809, avg loss: 3.370404, ppl: 29.090263 +epoch: 0, batch: 23930, sum loss: 5645.863770, avg loss: 2.971508, ppl: 19.521326 +epoch: 0, batch: 23931, sum loss: 4730.116211, avg loss: 2.887739, ppl: 17.952667 +epoch: 0, batch: 23932, sum loss: 4678.265625, avg loss: 2.970327, ppl: 19.498302 +epoch: 0, batch: 23933, sum loss: 6013.636719, avg loss: 3.140280, ppl: 23.110342 +epoch: 0, batch: 23934, sum loss: 6279.288574, avg loss: 3.171358, ppl: 23.839828 +epoch: 0, batch: 23935, sum loss: 4892.741699, avg loss: 3.022076, ppl: 20.533884 +epoch: 0, batch: 23936, sum loss: 5889.815918, avg loss: 3.426304, ppl: 30.762730 +epoch: 0, batch: 23937, sum loss: 5743.487793, avg loss: 3.278247, ppl: 26.529217 +epoch: 0, batch: 23938, sum loss: 5101.490234, avg loss: 3.149068, ppl: 23.314329 +epoch: 0, batch: 23939, sum loss: 5795.583984, avg loss: 3.340394, ppl: 28.230255 +epoch: 0, batch: 23940, sum loss: 4570.206055, avg loss: 2.883411, ppl: 17.875135 +epoch: 0, batch: 23941, sum loss: 5625.830078, avg loss: 3.222125, ppl: 25.081364 +epoch: 0, batch: 23942, sum loss: 5343.118652, avg loss: 3.216808, ppl: 24.948366 +epoch: 0, batch: 23943, sum loss: 5069.984863, avg loss: 3.188670, ppl: 24.256138 +epoch: 0, batch: 23944, sum loss: 5607.092773, avg loss: 3.148283, ppl: 23.296041 +epoch: 0, batch: 23945, sum loss: 5470.574219, avg loss: 3.095967, ppl: 22.108614 +epoch: 0, batch: 23946, sum loss: 5532.712891, avg loss: 3.156140, ppl: 23.479786 +epoch: 0, batch: 23947, sum loss: 5797.989746, avg loss: 3.546171, ppl: 34.680271 +epoch: 0, batch: 23948, sum loss: 5586.456055, avg loss: 3.027889, ppl: 20.653597 +epoch: 0, batch: 23949, sum loss: 5058.224121, avg loss: 3.244531, ppl: 25.649687 +epoch: 0, batch: 23950, sum loss: 5617.812988, avg loss: 3.281433, ppl: 26.613873 +epoch: 0, batch: 23951, sum loss: 5191.400879, avg loss: 3.218476, ppl: 24.989996 +epoch: 0, batch: 23952, sum loss: 4504.198242, avg loss: 2.961340, ppl: 19.323853 +epoch: 0, batch: 23953, sum loss: 5110.017578, avg loss: 3.138831, ppl: 23.076883 +epoch: 0, batch: 23954, sum loss: 4831.467285, avg loss: 2.951416, ppl: 19.133022 +epoch: 0, batch: 23955, sum loss: 4800.964844, avg loss: 3.107421, ppl: 22.363289 +epoch: 0, batch: 23956, sum loss: 4091.552734, avg loss: 2.691811, ppl: 14.758380 +epoch: 0, batch: 23957, sum loss: 5419.399414, avg loss: 3.324785, ppl: 27.793022 +epoch: 0, batch: 23958, sum loss: 5573.523438, avg loss: 3.286276, ppl: 26.743076 +epoch: 0, batch: 23959, sum loss: 6259.007324, avg loss: 3.304650, ppl: 27.239014 +epoch: 0, batch: 23960, sum loss: 5383.615234, avg loss: 3.094032, ppl: 22.065865 +epoch: 0, batch: 23961, sum loss: 4006.984131, avg loss: 2.682051, ppl: 14.615037 +epoch: 0, batch: 23962, sum loss: 6226.579102, avg loss: 3.319072, ppl: 27.634693 +epoch: 0, batch: 23963, sum loss: 6269.066895, avg loss: 3.249905, ppl: 25.787893 +epoch: 0, batch: 23964, sum loss: 4499.700684, avg loss: 2.999800, ppl: 20.081530 +epoch: 0, batch: 23965, sum loss: 5014.133301, avg loss: 3.253818, ppl: 25.888990 +epoch: 0, batch: 23966, sum loss: 4460.978516, avg loss: 2.721768, ppl: 15.207191 +epoch: 0, batch: 23967, sum loss: 5897.662598, avg loss: 3.364326, ppl: 28.913988 +epoch: 0, batch: 23968, sum loss: 4989.827637, avg loss: 2.971905, ppl: 19.529078 +epoch: 0, batch: 23969, sum loss: 4094.079590, avg loss: 3.021461, ppl: 20.521252 +epoch: 0, batch: 23970, sum loss: 4296.617188, avg loss: 3.010944, ppl: 20.306562 +epoch: 0, batch: 23971, sum loss: 5290.725098, avg loss: 3.166203, ppl: 23.717258 +epoch: 0, batch: 23972, sum loss: 6134.448242, avg loss: 3.344846, ppl: 28.356216 +epoch: 0, batch: 23973, sum loss: 5371.409180, avg loss: 3.249491, ppl: 25.777222 +epoch: 0, batch: 23974, sum loss: 4925.433105, avg loss: 3.231911, ppl: 25.328024 +epoch: 0, batch: 23975, sum loss: 4521.157227, avg loss: 3.046602, ppl: 21.043711 +epoch: 0, batch: 23976, sum loss: 6523.257812, avg loss: 3.392230, ppl: 29.732168 +epoch: 0, batch: 23977, sum loss: 5678.125488, avg loss: 3.175685, ppl: 23.943226 +epoch: 0, batch: 23978, sum loss: 4834.088867, avg loss: 2.851970, ppl: 17.321867 +epoch: 0, batch: 23979, sum loss: 5307.016113, avg loss: 3.094470, ppl: 22.075531 +epoch: 0, batch: 23980, sum loss: 4579.189453, avg loss: 2.990979, ppl: 19.905169 +epoch: 0, batch: 23981, sum loss: 5909.165527, avg loss: 3.253946, ppl: 25.892300 +epoch: 0, batch: 23982, sum loss: 5506.835449, avg loss: 3.128884, ppl: 22.848457 +epoch: 0, batch: 23983, sum loss: 5586.724121, avg loss: 3.103736, ppl: 22.281031 +epoch: 0, batch: 23984, sum loss: 5737.640137, avg loss: 3.035789, ppl: 20.817387 +epoch: 0, batch: 23985, sum loss: 5710.786133, avg loss: 3.086911, ppl: 21.909306 +epoch: 0, batch: 23986, sum loss: 4409.316895, avg loss: 3.036720, ppl: 20.836777 +epoch: 0, batch: 23987, sum loss: 4935.399414, avg loss: 3.107934, ppl: 22.374775 +epoch: 0, batch: 23988, sum loss: 5063.214355, avg loss: 3.287802, ppl: 26.783915 +epoch: 0, batch: 23989, sum loss: 4235.791992, avg loss: 2.858159, ppl: 17.429415 +epoch: 0, batch: 23990, sum loss: 4905.980469, avg loss: 3.083583, ppl: 21.836500 +epoch: 0, batch: 23991, sum loss: 6151.985352, avg loss: 3.327196, ppl: 27.860109 +epoch: 0, batch: 23992, sum loss: 5947.370117, avg loss: 3.204402, ppl: 24.640760 +epoch: 0, batch: 23993, sum loss: 6125.348145, avg loss: 3.307423, ppl: 27.314655 +epoch: 0, batch: 23994, sum loss: 5538.237305, avg loss: 3.170141, ppl: 23.810852 +epoch: 0, batch: 23995, sum loss: 5177.830078, avg loss: 3.258546, ppl: 26.011698 +epoch: 0, batch: 23996, sum loss: 4819.310059, avg loss: 2.829894, ppl: 16.943670 +epoch: 0, batch: 23997, sum loss: 6049.892578, avg loss: 3.034049, ppl: 20.781197 +epoch: 0, batch: 23998, sum loss: 5304.136719, avg loss: 2.919173, ppl: 18.525955 +epoch: 0, batch: 23999, sum loss: 4789.735352, avg loss: 2.861252, ppl: 17.483400 +epoch: 0, batch: 24000, sum loss: 5451.038574, avg loss: 3.319756, ppl: 27.653597 +epoch: 0, batch: 24001, sum loss: 5154.116211, avg loss: 2.960434, ppl: 19.306356 +epoch: 0, batch: 24002, sum loss: 5862.517090, avg loss: 3.098582, ppl: 22.166498 +epoch: 0, batch: 24003, sum loss: 5381.972656, avg loss: 3.125420, ppl: 22.769447 +epoch: 0, batch: 24004, sum loss: 5710.019531, avg loss: 3.179298, ppl: 24.029882 +epoch: 0, batch: 24005, sum loss: 5209.297363, avg loss: 3.032187, ppl: 20.742546 +epoch: 0, batch: 24006, sum loss: 6039.032227, avg loss: 3.074864, ppl: 21.646936 +epoch: 0, batch: 24007, sum loss: 5833.718750, avg loss: 3.437666, ppl: 31.114250 +epoch: 0, batch: 24008, sum loss: 6203.795410, avg loss: 3.448469, ppl: 31.452192 +epoch: 0, batch: 24009, sum loss: 5109.536621, avg loss: 3.065109, ppl: 21.436794 +epoch: 0, batch: 24010, sum loss: 5676.179688, avg loss: 3.243531, ppl: 25.624046 +epoch: 0, batch: 24011, sum loss: 5749.101562, avg loss: 3.174546, ppl: 23.915949 +epoch: 0, batch: 24012, sum loss: 5350.866699, avg loss: 2.987642, ppl: 19.838844 +epoch: 0, batch: 24013, sum loss: 6701.121094, avg loss: 3.299420, ppl: 27.096909 +epoch: 0, batch: 24014, sum loss: 5565.879883, avg loss: 3.151687, ppl: 23.375475 +epoch: 0, batch: 24015, sum loss: 4561.759277, avg loss: 3.172294, ppl: 23.862171 +epoch: 0, batch: 24016, sum loss: 6162.592285, avg loss: 3.181514, ppl: 24.083183 +epoch: 0, batch: 24017, sum loss: 6034.156738, avg loss: 2.924943, ppl: 18.633160 +epoch: 0, batch: 24018, sum loss: 4683.200195, avg loss: 2.994374, ppl: 19.972845 +epoch: 0, batch: 24019, sum loss: 5095.063965, avg loss: 3.052765, ppl: 21.173801 +epoch: 0, batch: 24020, sum loss: 4773.824219, avg loss: 2.804832, ppl: 16.524300 +epoch: 0, batch: 24021, sum loss: 4521.628418, avg loss: 3.016430, ppl: 20.418266 +epoch: 0, batch: 24022, sum loss: 5443.395508, avg loss: 3.052942, ppl: 21.177559 +epoch: 0, batch: 24023, sum loss: 5055.510742, avg loss: 3.263726, ppl: 26.146767 +epoch: 0, batch: 24024, sum loss: 6222.851562, avg loss: 3.141268, ppl: 23.133171 +epoch: 0, batch: 24025, sum loss: 4842.802246, avg loss: 3.036240, ppl: 20.826778 +epoch: 0, batch: 24026, sum loss: 5111.869141, avg loss: 2.932799, ppl: 18.780128 +epoch: 0, batch: 24027, sum loss: 4750.508301, avg loss: 3.025801, ppl: 20.610516 +epoch: 0, batch: 24028, sum loss: 5161.959961, avg loss: 3.105872, ppl: 22.328690 +epoch: 0, batch: 24029, sum loss: 5130.036621, avg loss: 3.071878, ppl: 21.582401 +epoch: 0, batch: 24030, sum loss: 5010.699219, avg loss: 3.165318, ppl: 23.696289 +epoch: 0, batch: 24031, sum loss: 5080.458984, avg loss: 3.109216, ppl: 22.403473 +epoch: 0, batch: 24032, sum loss: 4783.967773, avg loss: 3.114563, ppl: 22.523575 +epoch: 0, batch: 24033, sum loss: 5168.594238, avg loss: 3.157358, ppl: 23.508398 +epoch: 0, batch: 24034, sum loss: 5401.561523, avg loss: 3.142270, ppl: 23.156368 +epoch: 0, batch: 24035, sum loss: 4795.423828, avg loss: 3.079913, ppl: 21.756502 +epoch: 0, batch: 24036, sum loss: 6266.960938, avg loss: 3.356701, ppl: 28.694376 +epoch: 0, batch: 24037, sum loss: 3818.077148, avg loss: 2.583273, ppl: 13.240402 +epoch: 0, batch: 24038, sum loss: 5973.981445, avg loss: 3.337420, ppl: 28.146408 +epoch: 0, batch: 24039, sum loss: 5476.739746, avg loss: 3.087226, ppl: 21.916206 +epoch: 0, batch: 24040, sum loss: 6216.582520, avg loss: 3.324376, ppl: 27.781647 +epoch: 0, batch: 24041, sum loss: 5022.620605, avg loss: 3.081362, ppl: 21.788063 +epoch: 0, batch: 24042, sum loss: 5515.287109, avg loss: 3.227201, ppl: 25.209009 +epoch: 0, batch: 24043, sum loss: 6264.924805, avg loss: 3.113780, ppl: 22.505957 +epoch: 0, batch: 24044, sum loss: 5537.673828, avg loss: 3.157169, ppl: 23.503954 +epoch: 0, batch: 24045, sum loss: 5965.440430, avg loss: 3.310455, ppl: 27.397598 +epoch: 0, batch: 24046, sum loss: 5511.510254, avg loss: 3.251628, ppl: 25.832371 +epoch: 0, batch: 24047, sum loss: 5237.551758, avg loss: 3.066482, ppl: 21.466253 +epoch: 0, batch: 24048, sum loss: 4670.941406, avg loss: 2.725170, ppl: 15.259009 +epoch: 0, batch: 24049, sum loss: 4542.709473, avg loss: 3.010411, ppl: 20.295731 +epoch: 0, batch: 24050, sum loss: 4555.462891, avg loss: 2.952342, ppl: 19.150743 +epoch: 0, batch: 24051, sum loss: 5930.140137, avg loss: 2.981468, ppl: 19.716740 +epoch: 0, batch: 24052, sum loss: 5845.098633, avg loss: 3.112406, ppl: 22.475054 +epoch: 0, batch: 24053, sum loss: 4931.383789, avg loss: 3.027246, ppl: 20.640310 +epoch: 0, batch: 24054, sum loss: 4824.943848, avg loss: 2.920668, ppl: 18.553684 +epoch: 0, batch: 24055, sum loss: 4104.233398, avg loss: 2.929503, ppl: 18.718321 +epoch: 0, batch: 24056, sum loss: 4588.017090, avg loss: 3.177297, ppl: 23.981846 +epoch: 0, batch: 24057, sum loss: 6351.339844, avg loss: 3.285743, ppl: 26.728825 +epoch: 0, batch: 24058, sum loss: 5412.612305, avg loss: 3.134113, ppl: 22.968245 +epoch: 0, batch: 24059, sum loss: 3858.183594, avg loss: 2.684888, ppl: 14.656554 +epoch: 0, batch: 24060, sum loss: 4651.887695, avg loss: 2.991568, ppl: 19.916885 +epoch: 0, batch: 24061, sum loss: 5941.923340, avg loss: 3.196301, ppl: 24.441946 +epoch: 0, batch: 24062, sum loss: 5096.336914, avg loss: 2.989054, ppl: 19.866879 +epoch: 0, batch: 24063, sum loss: 4337.429688, avg loss: 2.709200, ppl: 15.017262 +epoch: 0, batch: 24064, sum loss: 5018.165039, avg loss: 3.111076, ppl: 22.445175 +epoch: 0, batch: 24065, sum loss: 3444.143311, avg loss: 2.837021, ppl: 17.064852 +epoch: 0, batch: 24066, sum loss: 4097.806152, avg loss: 2.833891, ppl: 17.011522 +epoch: 0, batch: 24067, sum loss: 5424.129395, avg loss: 3.146247, ppl: 23.248646 +epoch: 0, batch: 24068, sum loss: 4943.941406, avg loss: 3.127098, ppl: 22.807686 +epoch: 0, batch: 24069, sum loss: 4850.946289, avg loss: 2.956091, ppl: 19.222691 +epoch: 0, batch: 24070, sum loss: 5231.836914, avg loss: 2.987914, ppl: 19.844242 +epoch: 0, batch: 24071, sum loss: 5914.924805, avg loss: 3.251745, ppl: 25.835396 +epoch: 0, batch: 24072, sum loss: 4845.035645, avg loss: 3.016834, ppl: 20.426519 +epoch: 0, batch: 24073, sum loss: 4513.217773, avg loss: 2.990867, ppl: 19.902924 +epoch: 0, batch: 24074, sum loss: 5313.396973, avg loss: 3.177869, ppl: 23.995567 +epoch: 0, batch: 24075, sum loss: 5625.763672, avg loss: 3.144641, ppl: 23.211349 +epoch: 0, batch: 24076, sum loss: 4841.940430, avg loss: 3.267166, ppl: 26.236889 +epoch: 0, batch: 24077, sum loss: 4878.700684, avg loss: 2.871513, ppl: 17.663725 +epoch: 0, batch: 24078, sum loss: 5949.903320, avg loss: 3.097295, ppl: 22.137987 +epoch: 0, batch: 24079, sum loss: 5563.792969, avg loss: 3.089280, ppl: 21.961252 +epoch: 0, batch: 24080, sum loss: 4232.239258, avg loss: 2.918786, ppl: 18.518789 +epoch: 0, batch: 24081, sum loss: 5755.799805, avg loss: 3.373857, ppl: 29.190893 +epoch: 0, batch: 24082, sum loss: 4419.701660, avg loss: 2.813305, ppl: 16.664904 +epoch: 0, batch: 24083, sum loss: 5297.796875, avg loss: 3.081906, ppl: 21.799921 +epoch: 0, batch: 24084, sum loss: 6016.921875, avg loss: 3.516612, ppl: 33.670147 +epoch: 0, batch: 24085, sum loss: 6281.217285, avg loss: 3.389756, ppl: 29.658707 +epoch: 0, batch: 24086, sum loss: 5986.378418, avg loss: 3.216754, ppl: 24.947004 +epoch: 0, batch: 24087, sum loss: 5662.088867, avg loss: 3.008549, ppl: 20.257978 +epoch: 0, batch: 24088, sum loss: 6301.978516, avg loss: 3.408317, ppl: 30.214354 +epoch: 0, batch: 24089, sum loss: 4938.785156, avg loss: 2.975172, ppl: 19.592989 +epoch: 0, batch: 24090, sum loss: 4623.193359, avg loss: 2.969296, ppl: 19.478197 +epoch: 0, batch: 24091, sum loss: 5563.374512, avg loss: 3.153841, ppl: 23.425859 +epoch: 0, batch: 24092, sum loss: 4569.066406, avg loss: 2.731062, ppl: 15.349178 +epoch: 0, batch: 24093, sum loss: 4818.783203, avg loss: 2.913412, ppl: 18.419537 +epoch: 0, batch: 24094, sum loss: 4785.879883, avg loss: 2.905817, ppl: 18.280165 +epoch: 0, batch: 24095, sum loss: 5663.533203, avg loss: 3.105007, ppl: 22.309380 +epoch: 0, batch: 24096, sum loss: 5719.454590, avg loss: 3.315626, ppl: 27.539633 +epoch: 0, batch: 24097, sum loss: 5308.510742, avg loss: 3.093538, ppl: 22.054966 +epoch: 0, batch: 24098, sum loss: 4615.727051, avg loss: 3.165794, ppl: 23.707552 +epoch: 0, batch: 24099, sum loss: 4503.714844, avg loss: 3.022628, ppl: 20.545206 +epoch: 0, batch: 24100, sum loss: 5673.505371, avg loss: 3.169556, ppl: 23.796913 +epoch: 0, batch: 24101, sum loss: 4735.857422, avg loss: 2.800626, ppl: 16.454937 +epoch: 0, batch: 24102, sum loss: 4347.732422, avg loss: 2.959654, ppl: 19.291302 +epoch: 0, batch: 24103, sum loss: 5966.549805, avg loss: 3.225162, ppl: 25.157650 +epoch: 0, batch: 24104, sum loss: 5312.061035, avg loss: 3.135809, ppl: 23.007246 +epoch: 0, batch: 24105, sum loss: 4582.698730, avg loss: 2.947073, ppl: 19.050117 +epoch: 0, batch: 24106, sum loss: 5073.539062, avg loss: 3.003872, ppl: 20.163452 +epoch: 0, batch: 24107, sum loss: 5543.490723, avg loss: 3.266642, ppl: 26.223124 +epoch: 0, batch: 24108, sum loss: 5601.014648, avg loss: 3.245084, ppl: 25.663860 +epoch: 0, batch: 24109, sum loss: 5835.751953, avg loss: 3.115725, ppl: 22.549763 +epoch: 0, batch: 24110, sum loss: 5393.151367, avg loss: 2.986241, ppl: 19.811075 +epoch: 0, batch: 24111, sum loss: 5900.527344, avg loss: 3.186030, ppl: 24.192192 +epoch: 0, batch: 24112, sum loss: 5493.572266, avg loss: 3.279745, ppl: 26.568993 +epoch: 0, batch: 24113, sum loss: 5467.988770, avg loss: 3.119218, ppl: 22.628674 +epoch: 0, batch: 24114, sum loss: 5286.792969, avg loss: 3.137563, ppl: 23.047621 +epoch: 0, batch: 24115, sum loss: 5329.421387, avg loss: 2.928254, ppl: 18.694954 +epoch: 0, batch: 24116, sum loss: 5796.549805, avg loss: 3.186668, ppl: 24.207644 +epoch: 0, batch: 24117, sum loss: 5154.894531, avg loss: 3.124178, ppl: 22.741203 +epoch: 0, batch: 24118, sum loss: 4982.637695, avg loss: 3.096730, ppl: 22.125471 +epoch: 0, batch: 24119, sum loss: 5108.004883, avg loss: 2.933949, ppl: 18.801727 +epoch: 0, batch: 24120, sum loss: 5536.848633, avg loss: 3.048925, ppl: 21.092667 +epoch: 0, batch: 24121, sum loss: 5489.818359, avg loss: 3.186198, ppl: 24.196247 +epoch: 0, batch: 24122, sum loss: 5268.442871, avg loss: 3.200755, ppl: 24.551062 +epoch: 0, batch: 24123, sum loss: 6081.350586, avg loss: 3.328599, ppl: 27.899233 +epoch: 0, batch: 24124, sum loss: 5551.419434, avg loss: 3.314281, ppl: 27.502600 +epoch: 0, batch: 24125, sum loss: 4134.903320, avg loss: 2.791967, ppl: 16.313078 +epoch: 0, batch: 24126, sum loss: 4938.951172, avg loss: 3.114093, ppl: 22.512999 +epoch: 0, batch: 24127, sum loss: 4997.138184, avg loss: 3.131039, ppl: 22.897757 +epoch: 0, batch: 24128, sum loss: 4586.160156, avg loss: 3.047283, ppl: 21.058046 +epoch: 0, batch: 24129, sum loss: 4941.913574, avg loss: 3.080994, ppl: 21.780033 +epoch: 0, batch: 24130, sum loss: 7081.824219, avg loss: 3.456234, ppl: 31.697393 +epoch: 0, batch: 24131, sum loss: 4456.777344, avg loss: 2.928238, ppl: 18.694656 +epoch: 0, batch: 24132, sum loss: 5143.045898, avg loss: 3.068643, ppl: 21.512697 +epoch: 0, batch: 24133, sum loss: 5679.864258, avg loss: 3.189143, ppl: 24.267632 +epoch: 0, batch: 24134, sum loss: 5966.924805, avg loss: 3.320493, ppl: 27.673977 +epoch: 0, batch: 24135, sum loss: 5499.098633, avg loss: 3.367482, ppl: 29.005405 +epoch: 0, batch: 24136, sum loss: 5636.264160, avg loss: 3.171786, ppl: 23.850050 +epoch: 0, batch: 24137, sum loss: 4203.328125, avg loss: 2.819134, ppl: 16.762323 +epoch: 0, batch: 24138, sum loss: 3938.832031, avg loss: 2.593043, ppl: 13.370394 +epoch: 0, batch: 24139, sum loss: 5394.751953, avg loss: 2.982174, ppl: 19.730658 +epoch: 0, batch: 24140, sum loss: 4951.833008, avg loss: 3.230159, ppl: 25.283667 +epoch: 0, batch: 24141, sum loss: 5876.171387, avg loss: 3.225121, ppl: 25.156630 +epoch: 0, batch: 24142, sum loss: 5954.652344, avg loss: 3.063093, ppl: 21.393623 +epoch: 0, batch: 24143, sum loss: 5227.498047, avg loss: 3.147199, ppl: 23.270802 +epoch: 0, batch: 24144, sum loss: 4284.572754, avg loss: 2.938664, ppl: 18.890600 +epoch: 0, batch: 24145, sum loss: 6341.344727, avg loss: 3.258656, ppl: 26.014557 +epoch: 0, batch: 24146, sum loss: 5816.824219, avg loss: 3.201334, ppl: 24.565285 +epoch: 0, batch: 24147, sum loss: 4262.916992, avg loss: 3.210028, ppl: 24.779778 +epoch: 0, batch: 24148, sum loss: 5305.614746, avg loss: 3.186555, ppl: 24.204906 +epoch: 0, batch: 24149, sum loss: 6059.191895, avg loss: 3.174014, ppl: 23.903231 +epoch: 0, batch: 24150, sum loss: 5223.807617, avg loss: 2.991872, ppl: 19.922935 +epoch: 0, batch: 24151, sum loss: 4641.169922, avg loss: 2.782476, ppl: 16.158979 +epoch: 0, batch: 24152, sum loss: 6232.235352, avg loss: 3.304473, ppl: 27.234177 +epoch: 0, batch: 24153, sum loss: 5515.122559, avg loss: 3.138943, ppl: 23.079453 +epoch: 0, batch: 24154, sum loss: 4796.710449, avg loss: 3.026316, ppl: 20.621119 +epoch: 0, batch: 24155, sum loss: 5906.000488, avg loss: 3.308684, ppl: 27.349108 +epoch: 0, batch: 24156, sum loss: 5922.959961, avg loss: 3.158912, ppl: 23.544964 +epoch: 0, batch: 24157, sum loss: 4968.175293, avg loss: 3.080084, ppl: 21.760227 +epoch: 0, batch: 24158, sum loss: 5459.950684, avg loss: 2.952921, ppl: 19.161842 +epoch: 0, batch: 24159, sum loss: 5273.291016, avg loss: 2.937767, ppl: 18.873646 +epoch: 0, batch: 24160, sum loss: 3707.777344, avg loss: 2.565936, ppl: 13.012831 +epoch: 0, batch: 24161, sum loss: 4836.383301, avg loss: 2.979903, ppl: 19.685917 +epoch: 0, batch: 24162, sum loss: 5255.680664, avg loss: 3.073498, ppl: 21.617378 +epoch: 0, batch: 24163, sum loss: 4820.306152, avg loss: 3.113893, ppl: 22.508501 +epoch: 0, batch: 24164, sum loss: 4510.147949, avg loss: 2.984876, ppl: 19.784052 +epoch: 0, batch: 24165, sum loss: 5359.683594, avg loss: 3.331065, ppl: 27.968111 +epoch: 0, batch: 24166, sum loss: 4963.296875, avg loss: 3.107888, ppl: 22.373741 +epoch: 0, batch: 24167, sum loss: 5025.965332, avg loss: 2.925475, ppl: 18.643072 +epoch: 0, batch: 24168, sum loss: 5792.479492, avg loss: 3.346320, ppl: 28.398027 +epoch: 0, batch: 24169, sum loss: 4673.852051, avg loss: 2.956263, ppl: 19.225990 +epoch: 0, batch: 24170, sum loss: 5277.905762, avg loss: 3.272105, ppl: 26.366789 +epoch: 0, batch: 24171, sum loss: 5397.101074, avg loss: 3.010096, ppl: 20.289339 +epoch: 0, batch: 24172, sum loss: 4726.952148, avg loss: 2.980424, ppl: 19.696159 +epoch: 0, batch: 24173, sum loss: 5416.379395, avg loss: 3.079238, ppl: 21.741827 +epoch: 0, batch: 24174, sum loss: 5236.167969, avg loss: 3.154318, ppl: 23.437050 +epoch: 0, batch: 24175, sum loss: 4626.977539, avg loss: 2.866777, ppl: 17.580261 +epoch: 0, batch: 24176, sum loss: 5310.907227, avg loss: 3.068115, ppl: 21.501335 +epoch: 0, batch: 24177, sum loss: 5995.367676, avg loss: 3.368184, ppl: 29.025770 +epoch: 0, batch: 24178, sum loss: 4642.638672, avg loss: 3.068499, ppl: 21.509590 +epoch: 0, batch: 24179, sum loss: 5184.167480, avg loss: 3.180471, ppl: 24.058081 +epoch: 0, batch: 24180, sum loss: 5215.151367, avg loss: 3.085888, ppl: 21.886902 +epoch: 0, batch: 24181, sum loss: 5923.531250, avg loss: 3.303699, ppl: 27.213102 +epoch: 0, batch: 24182, sum loss: 5564.920410, avg loss: 3.081351, ppl: 21.787825 +epoch: 0, batch: 24183, sum loss: 6461.741211, avg loss: 3.393772, ppl: 29.778061 +epoch: 0, batch: 24184, sum loss: 4717.678223, avg loss: 2.878388, ppl: 17.785583 +epoch: 0, batch: 24185, sum loss: 5395.772949, avg loss: 3.069268, ppl: 21.526134 +epoch: 0, batch: 24186, sum loss: 5163.231934, avg loss: 3.000135, ppl: 20.088243 +epoch: 0, batch: 24187, sum loss: 6055.722656, avg loss: 3.345703, ppl: 28.380531 +epoch: 0, batch: 24188, sum loss: 6407.672852, avg loss: 3.401100, ppl: 29.997091 +epoch: 0, batch: 24189, sum loss: 5119.763672, avg loss: 3.187898, ppl: 24.237419 +epoch: 0, batch: 24190, sum loss: 5031.721191, avg loss: 3.040315, ppl: 20.911833 +epoch: 0, batch: 24191, sum loss: 5156.173828, avg loss: 3.020606, ppl: 20.503704 +epoch: 0, batch: 24192, sum loss: 6057.982422, avg loss: 3.532351, ppl: 34.204288 +epoch: 0, batch: 24193, sum loss: 5315.355957, avg loss: 3.200094, ppl: 24.534836 +epoch: 0, batch: 24194, sum loss: 4717.267090, avg loss: 3.224380, ppl: 25.137972 +epoch: 0, batch: 24195, sum loss: 6318.353516, avg loss: 3.275455, ppl: 26.455273 +epoch: 0, batch: 24196, sum loss: 6441.677734, avg loss: 3.439230, ppl: 31.162952 +epoch: 0, batch: 24197, sum loss: 5398.715820, avg loss: 3.236640, ppl: 25.448078 +epoch: 0, batch: 24198, sum loss: 4653.727539, avg loss: 3.033721, ppl: 20.774385 +epoch: 0, batch: 24199, sum loss: 6463.732422, avg loss: 3.345617, ppl: 28.378088 +epoch: 0, batch: 24200, sum loss: 5518.996094, avg loss: 3.421572, ppl: 30.617521 +epoch: 0, batch: 24201, sum loss: 4573.095215, avg loss: 2.903552, ppl: 18.238821 +epoch: 0, batch: 24202, sum loss: 5212.336426, avg loss: 3.124902, ppl: 22.757666 +epoch: 0, batch: 24203, sum loss: 5559.562500, avg loss: 3.545639, ppl: 34.661839 +epoch: 0, batch: 24204, sum loss: 6181.303711, avg loss: 3.418863, ppl: 30.534664 +epoch: 0, batch: 24205, sum loss: 5429.218750, avg loss: 3.093572, ppl: 22.055717 +epoch: 0, batch: 24206, sum loss: 4751.390137, avg loss: 3.018672, ppl: 20.464102 +epoch: 0, batch: 24207, sum loss: 5888.308105, avg loss: 3.302472, ppl: 27.179754 +epoch: 0, batch: 24208, sum loss: 4882.393555, avg loss: 2.825459, ppl: 16.868690 +epoch: 0, batch: 24209, sum loss: 4578.471680, avg loss: 2.861545, ppl: 17.488523 +epoch: 0, batch: 24210, sum loss: 5469.154785, avg loss: 3.334851, ppl: 28.074186 +epoch: 0, batch: 24211, sum loss: 6105.968262, avg loss: 3.180192, ppl: 24.051371 +epoch: 0, batch: 24212, sum loss: 5352.468750, avg loss: 3.193597, ppl: 24.375957 +epoch: 0, batch: 24213, sum loss: 5180.770508, avg loss: 2.984315, ppl: 19.772951 +epoch: 0, batch: 24214, sum loss: 5683.201172, avg loss: 3.254984, ppl: 25.919191 +epoch: 0, batch: 24215, sum loss: 4615.880859, avg loss: 2.999273, ppl: 20.070936 +epoch: 0, batch: 24216, sum loss: 5647.236328, avg loss: 3.232534, ppl: 25.343788 +epoch: 0, batch: 24217, sum loss: 5478.259766, avg loss: 3.155680, ppl: 23.468983 +epoch: 0, batch: 24218, sum loss: 5065.954590, avg loss: 3.357160, ppl: 28.707556 +epoch: 0, batch: 24219, sum loss: 7186.583984, avg loss: 3.529757, ppl: 34.115662 +epoch: 0, batch: 24220, sum loss: 4519.555664, avg loss: 3.089238, ppl: 21.960346 +epoch: 0, batch: 24221, sum loss: 5786.467773, avg loss: 3.155108, ppl: 23.455564 +epoch: 0, batch: 24222, sum loss: 4910.071289, avg loss: 3.004940, ppl: 20.184996 +epoch: 0, batch: 24223, sum loss: 5467.904297, avg loss: 3.227807, ppl: 25.224268 +epoch: 0, batch: 24224, sum loss: 4467.721191, avg loss: 2.914365, ppl: 18.437098 +epoch: 0, batch: 24225, sum loss: 4877.424805, avg loss: 2.917120, ppl: 18.487970 +epoch: 0, batch: 24226, sum loss: 6340.471191, avg loss: 3.173409, ppl: 23.888783 +epoch: 0, batch: 24227, sum loss: 5750.805664, avg loss: 3.144235, ppl: 23.201921 +epoch: 0, batch: 24228, sum loss: 5401.023926, avg loss: 3.243858, ppl: 25.632429 +epoch: 0, batch: 24229, sum loss: 5231.589355, avg loss: 3.362204, ppl: 28.852707 +epoch: 0, batch: 24230, sum loss: 4391.723145, avg loss: 3.028775, ppl: 20.671888 +epoch: 0, batch: 24231, sum loss: 5158.237793, avg loss: 3.083226, ppl: 21.828718 +epoch: 0, batch: 24232, sum loss: 4979.106445, avg loss: 3.086861, ppl: 21.908197 +epoch: 0, batch: 24233, sum loss: 4706.689941, avg loss: 3.076268, ppl: 21.677351 +epoch: 0, batch: 24234, sum loss: 5243.464355, avg loss: 3.004851, ppl: 20.183201 +epoch: 0, batch: 24235, sum loss: 5835.935547, avg loss: 3.037968, ppl: 20.862801 +epoch: 0, batch: 24236, sum loss: 5433.763184, avg loss: 3.242102, ppl: 25.587454 +epoch: 0, batch: 24237, sum loss: 4981.604492, avg loss: 2.958198, ppl: 19.263220 +epoch: 0, batch: 24238, sum loss: 5097.487793, avg loss: 3.106330, ppl: 22.338913 +epoch: 0, batch: 24239, sum loss: 5299.286621, avg loss: 3.073832, ppl: 21.624615 +epoch: 0, batch: 24240, sum loss: 5015.622559, avg loss: 2.939990, ppl: 18.915653 +epoch: 0, batch: 24241, sum loss: 4895.299805, avg loss: 3.120013, ppl: 22.646664 +epoch: 0, batch: 24242, sum loss: 4814.687012, avg loss: 2.932209, ppl: 18.769047 +epoch: 0, batch: 24243, sum loss: 4387.621582, avg loss: 3.167958, ppl: 23.758919 +epoch: 0, batch: 24244, sum loss: 6042.810059, avg loss: 3.300279, ppl: 27.120195 +epoch: 0, batch: 24245, sum loss: 5057.920898, avg loss: 3.108741, ppl: 22.392824 +epoch: 0, batch: 24246, sum loss: 5157.828125, avg loss: 3.118397, ppl: 22.610102 +epoch: 0, batch: 24247, sum loss: 4849.480469, avg loss: 3.008363, ppl: 20.254208 +epoch: 0, batch: 24248, sum loss: 6306.915039, avg loss: 3.448286, ppl: 31.446449 +epoch: 0, batch: 24249, sum loss: 6562.996094, avg loss: 3.176668, ppl: 23.966766 +epoch: 0, batch: 24250, sum loss: 5330.347656, avg loss: 3.013198, ppl: 20.352390 +epoch: 0, batch: 24251, sum loss: 5264.610352, avg loss: 2.998070, ppl: 20.046804 +epoch: 0, batch: 24252, sum loss: 5361.904785, avg loss: 3.079785, ppl: 21.753717 +epoch: 0, batch: 24253, sum loss: 5345.626953, avg loss: 3.107923, ppl: 22.374514 +epoch: 0, batch: 24254, sum loss: 5465.031250, avg loss: 3.166298, ppl: 23.719521 +epoch: 0, batch: 24255, sum loss: 5585.623047, avg loss: 3.132711, ppl: 22.936068 +epoch: 0, batch: 24256, sum loss: 5365.986816, avg loss: 3.061031, ppl: 21.349554 +epoch: 0, batch: 24257, sum loss: 5241.086426, avg loss: 3.205557, ppl: 24.669245 +epoch: 0, batch: 24258, sum loss: 4557.502441, avg loss: 3.054626, ppl: 21.213261 +epoch: 0, batch: 24259, sum loss: 4412.182129, avg loss: 2.983220, ppl: 19.751310 +epoch: 0, batch: 24260, sum loss: 5094.499023, avg loss: 3.137007, ppl: 23.034815 +epoch: 0, batch: 24261, sum loss: 5826.944824, avg loss: 3.141210, ppl: 23.131842 +epoch: 0, batch: 24262, sum loss: 4851.376953, avg loss: 2.804264, ppl: 16.514917 +epoch: 0, batch: 24263, sum loss: 4967.300781, avg loss: 3.143861, ppl: 23.193251 +epoch: 0, batch: 24264, sum loss: 5031.150391, avg loss: 2.879880, ppl: 17.812134 +epoch: 0, batch: 24265, sum loss: 5472.490723, avg loss: 3.282838, ppl: 26.651299 +epoch: 0, batch: 24266, sum loss: 6152.980469, avg loss: 3.311615, ppl: 27.429382 +epoch: 0, batch: 24267, sum loss: 4032.835449, avg loss: 2.832047, ppl: 16.980192 +epoch: 0, batch: 24268, sum loss: 5207.184082, avg loss: 3.125561, ppl: 22.772655 +epoch: 0, batch: 24269, sum loss: 4413.333496, avg loss: 2.893989, ppl: 18.065229 +epoch: 0, batch: 24270, sum loss: 5312.000000, avg loss: 3.106433, ppl: 22.341204 +epoch: 0, batch: 24271, sum loss: 5748.219238, avg loss: 3.332301, ppl: 28.002707 +epoch: 0, batch: 24272, sum loss: 4998.072266, avg loss: 3.281729, ppl: 26.621748 +epoch: 0, batch: 24273, sum loss: 5174.087402, avg loss: 2.816597, ppl: 16.719849 +epoch: 0, batch: 24274, sum loss: 5248.588379, avg loss: 3.414827, ppl: 30.411676 +epoch: 0, batch: 24275, sum loss: 5323.170410, avg loss: 3.318685, ppl: 27.623997 +epoch: 0, batch: 24276, sum loss: 5252.662109, avg loss: 3.069937, ppl: 21.540550 +epoch: 0, batch: 24277, sum loss: 5332.909180, avg loss: 3.277756, ppl: 26.516203 +epoch: 0, batch: 24278, sum loss: 5438.716309, avg loss: 3.237331, ppl: 25.465668 +epoch: 0, batch: 24279, sum loss: 5504.178711, avg loss: 3.092235, ppl: 22.026258 +epoch: 0, batch: 24280, sum loss: 4086.843506, avg loss: 2.755795, ppl: 15.733541 +epoch: 0, batch: 24281, sum loss: 4048.409668, avg loss: 2.923039, ppl: 18.597729 +epoch: 0, batch: 24282, sum loss: 5790.909668, avg loss: 3.307202, ppl: 27.308599 +epoch: 0, batch: 24283, sum loss: 5418.812500, avg loss: 3.135887, ppl: 23.009033 +epoch: 0, batch: 24284, sum loss: 5311.602051, avg loss: 3.252665, ppl: 25.859163 +epoch: 0, batch: 24285, sum loss: 5019.062500, avg loss: 2.846887, ppl: 17.234055 +epoch: 0, batch: 24286, sum loss: 5527.958008, avg loss: 3.020742, ppl: 20.506506 +epoch: 0, batch: 24287, sum loss: 5860.802246, avg loss: 3.071699, ppl: 21.578537 +epoch: 0, batch: 24288, sum loss: 5432.936523, avg loss: 3.019976, ppl: 20.490797 +epoch: 0, batch: 24289, sum loss: 5490.571289, avg loss: 3.241187, ppl: 25.564051 +epoch: 0, batch: 24290, sum loss: 5942.869141, avg loss: 3.336816, ppl: 28.129421 +epoch: 0, batch: 24291, sum loss: 5483.569824, avg loss: 2.941829, ppl: 18.950480 +epoch: 0, batch: 24292, sum loss: 5138.921387, avg loss: 3.108845, ppl: 22.395168 +epoch: 0, batch: 24293, sum loss: 5365.219727, avg loss: 3.180332, ppl: 24.054737 +epoch: 0, batch: 24294, sum loss: 4912.160645, avg loss: 3.276958, ppl: 26.495064 +epoch: 0, batch: 24295, sum loss: 5221.225586, avg loss: 3.126482, ppl: 22.793661 +epoch: 0, batch: 24296, sum loss: 5018.153809, avg loss: 2.971080, ppl: 19.512980 +epoch: 0, batch: 24297, sum loss: 4967.546387, avg loss: 3.270274, ppl: 26.318548 +epoch: 0, batch: 24298, sum loss: 4518.997070, avg loss: 3.030850, ppl: 20.714830 +epoch: 0, batch: 24299, sum loss: 5636.112305, avg loss: 3.084900, ppl: 21.865284 +epoch: 0, batch: 24300, sum loss: 5585.041016, avg loss: 3.217190, ppl: 24.957878 +epoch: 0, batch: 24301, sum loss: 5631.304688, avg loss: 2.943703, ppl: 18.986029 +epoch: 0, batch: 24302, sum loss: 5189.625488, avg loss: 2.957052, ppl: 19.241156 +epoch: 0, batch: 24303, sum loss: 4494.664551, avg loss: 2.833962, ppl: 17.012739 +epoch: 0, batch: 24304, sum loss: 5912.731934, avg loss: 3.125123, ppl: 22.762684 +epoch: 0, batch: 24305, sum loss: 4806.674805, avg loss: 2.893844, ppl: 18.062607 +epoch: 0, batch: 24306, sum loss: 5049.304199, avg loss: 3.041749, ppl: 20.941849 +epoch: 0, batch: 24307, sum loss: 5163.507324, avg loss: 3.095628, ppl: 22.101114 +epoch: 0, batch: 24308, sum loss: 4831.386719, avg loss: 3.061715, ppl: 21.364174 +epoch: 0, batch: 24309, sum loss: 5778.899414, avg loss: 2.960502, ppl: 19.307659 +epoch: 0, batch: 24310, sum loss: 5186.589844, avg loss: 2.977376, ppl: 19.636232 +epoch: 0, batch: 24311, sum loss: 5928.875977, avg loss: 3.481430, ppl: 32.506184 +epoch: 0, batch: 24312, sum loss: 5124.768555, avg loss: 3.081641, ppl: 21.794130 +epoch: 0, batch: 24313, sum loss: 5305.825195, avg loss: 3.090172, ppl: 21.980860 +epoch: 0, batch: 24314, sum loss: 4585.019531, avg loss: 2.812895, ppl: 16.658079 +epoch: 0, batch: 24315, sum loss: 4194.211914, avg loss: 2.788705, ppl: 16.259943 +epoch: 0, batch: 24316, sum loss: 6138.426758, avg loss: 3.277323, ppl: 26.504738 +epoch: 0, batch: 24317, sum loss: 5939.374512, avg loss: 3.256236, ppl: 25.951674 +epoch: 0, batch: 24318, sum loss: 5077.295410, avg loss: 3.069707, ppl: 21.535595 +epoch: 0, batch: 24319, sum loss: 4668.875977, avg loss: 3.180433, ppl: 24.057175 +epoch: 0, batch: 24320, sum loss: 5630.033691, avg loss: 3.230083, ppl: 25.281742 +epoch: 0, batch: 24321, sum loss: 4726.370605, avg loss: 3.047305, ppl: 21.058523 +epoch: 0, batch: 24322, sum loss: 5138.588379, avg loss: 3.104887, ppl: 22.306704 +epoch: 0, batch: 24323, sum loss: 5696.958984, avg loss: 3.415443, ppl: 30.430433 +epoch: 0, batch: 24324, sum loss: 4361.018555, avg loss: 2.907346, ppl: 18.308140 +epoch: 0, batch: 24325, sum loss: 5185.758789, avg loss: 3.251260, ppl: 25.822845 +epoch: 0, batch: 24326, sum loss: 5626.344727, avg loss: 3.327229, ppl: 27.861046 +epoch: 0, batch: 24327, sum loss: 3982.280273, avg loss: 2.782865, ppl: 16.165274 +epoch: 0, batch: 24328, sum loss: 5613.171875, avg loss: 2.992096, ppl: 19.927406 +epoch: 0, batch: 24329, sum loss: 5258.629395, avg loss: 3.154547, ppl: 23.442410 +epoch: 0, batch: 24330, sum loss: 5670.887695, avg loss: 3.180532, ppl: 24.059544 +epoch: 0, batch: 24331, sum loss: 5401.833984, avg loss: 3.203935, ppl: 24.629253 +epoch: 0, batch: 24332, sum loss: 6131.071777, avg loss: 3.489511, ppl: 32.769936 +epoch: 0, batch: 24333, sum loss: 5391.522949, avg loss: 3.211151, ppl: 24.807627 +epoch: 0, batch: 24334, sum loss: 5793.488281, avg loss: 3.384047, ppl: 29.489870 +epoch: 0, batch: 24335, sum loss: 5822.767090, avg loss: 3.132204, ppl: 22.924446 +epoch: 0, batch: 24336, sum loss: 5583.135742, avg loss: 3.280338, ppl: 26.584764 +epoch: 0, batch: 24337, sum loss: 5653.951660, avg loss: 3.462310, ppl: 31.890547 +epoch: 0, batch: 24338, sum loss: 5026.036133, avg loss: 2.927220, ppl: 18.675632 +epoch: 0, batch: 24339, sum loss: 5533.393555, avg loss: 3.172817, ppl: 23.874651 +epoch: 0, batch: 24340, sum loss: 4718.580078, avg loss: 2.950957, ppl: 19.124243 +epoch: 0, batch: 24341, sum loss: 5069.010742, avg loss: 2.936854, ppl: 18.856438 +epoch: 0, batch: 24342, sum loss: 5590.301758, avg loss: 3.091981, ppl: 22.020657 +epoch: 0, batch: 24343, sum loss: 4718.594238, avg loss: 3.056085, ppl: 21.244215 +epoch: 0, batch: 24344, sum loss: 5788.509277, avg loss: 3.294541, ppl: 26.965044 +epoch: 0, batch: 24345, sum loss: 4932.791992, avg loss: 3.178345, ppl: 24.007000 +epoch: 0, batch: 24346, sum loss: 4435.426758, avg loss: 2.906571, ppl: 18.293955 +epoch: 0, batch: 24347, sum loss: 5342.316406, avg loss: 3.163006, ppl: 23.641546 +epoch: 0, batch: 24348, sum loss: 4802.940918, avg loss: 3.049486, ppl: 21.104498 +epoch: 0, batch: 24349, sum loss: 6531.537598, avg loss: 3.472375, ppl: 32.213161 +epoch: 0, batch: 24350, sum loss: 5851.068848, avg loss: 3.300095, ppl: 27.115223 +epoch: 0, batch: 24351, sum loss: 4810.919922, avg loss: 3.029547, ppl: 20.687853 +epoch: 0, batch: 24352, sum loss: 4521.030762, avg loss: 3.073440, ppl: 21.616140 +epoch: 0, batch: 24353, sum loss: 5732.323242, avg loss: 3.115393, ppl: 22.542286 +epoch: 0, batch: 24354, sum loss: 5780.553223, avg loss: 3.119565, ppl: 22.636520 +epoch: 0, batch: 24355, sum loss: 5459.952148, avg loss: 3.038371, ppl: 20.871214 +epoch: 0, batch: 24356, sum loss: 5171.155273, avg loss: 3.100213, ppl: 22.202681 +epoch: 0, batch: 24357, sum loss: 4579.323730, avg loss: 2.907507, ppl: 18.311090 +epoch: 0, batch: 24358, sum loss: 4306.237793, avg loss: 2.783606, ppl: 16.177244 +epoch: 0, batch: 24359, sum loss: 5353.408691, avg loss: 2.922166, ppl: 18.581497 +epoch: 0, batch: 24360, sum loss: 5310.458008, avg loss: 3.096477, ppl: 22.119881 +epoch: 0, batch: 24361, sum loss: 4517.454590, avg loss: 2.719720, ppl: 15.176070 +epoch: 0, batch: 24362, sum loss: 4480.051758, avg loss: 2.974802, ppl: 19.585745 +epoch: 0, batch: 24363, sum loss: 5047.248047, avg loss: 2.811837, ppl: 16.640467 +epoch: 0, batch: 24364, sum loss: 4753.348633, avg loss: 3.147913, ppl: 23.287413 +epoch: 0, batch: 24365, sum loss: 5449.840820, avg loss: 3.162995, ppl: 23.641304 +epoch: 0, batch: 24366, sum loss: 5061.446777, avg loss: 3.211578, ppl: 24.818216 +epoch: 0, batch: 24367, sum loss: 4481.957520, avg loss: 2.927471, ppl: 18.680323 +epoch: 0, batch: 24368, sum loss: 5619.095703, avg loss: 3.045580, ppl: 21.022224 +epoch: 0, batch: 24369, sum loss: 4596.700195, avg loss: 2.950385, ppl: 19.113317 +epoch: 0, batch: 24370, sum loss: 5161.369629, avg loss: 3.066767, ppl: 21.472380 +epoch: 0, batch: 24371, sum loss: 5200.673340, avg loss: 3.097483, ppl: 22.142141 +epoch: 0, batch: 24372, sum loss: 5044.559570, avg loss: 3.048072, ppl: 21.074680 +epoch: 0, batch: 24373, sum loss: 4581.922852, avg loss: 2.756873, ppl: 15.750516 +epoch: 0, batch: 24374, sum loss: 5645.811035, avg loss: 3.184327, ppl: 24.151020 +epoch: 0, batch: 24375, sum loss: 4911.198242, avg loss: 2.980096, ppl: 19.689709 +epoch: 0, batch: 24376, sum loss: 4965.842773, avg loss: 3.069124, ppl: 21.523041 +epoch: 0, batch: 24377, sum loss: 4131.213379, avg loss: 2.925788, ppl: 18.648924 +epoch: 0, batch: 24378, sum loss: 5310.405273, avg loss: 3.092839, ppl: 22.039570 +epoch: 0, batch: 24379, sum loss: 4866.538574, avg loss: 3.135657, ppl: 23.003735 +epoch: 0, batch: 24380, sum loss: 4548.156250, avg loss: 3.000103, ppl: 20.087610 +epoch: 0, batch: 24381, sum loss: 5633.476562, avg loss: 3.159549, ppl: 23.559978 +epoch: 0, batch: 24382, sum loss: 5513.991211, avg loss: 3.102978, ppl: 22.264145 +epoch: 0, batch: 24383, sum loss: 5387.684570, avg loss: 3.163643, ppl: 23.656630 +epoch: 0, batch: 24384, sum loss: 5229.000000, avg loss: 3.033063, ppl: 20.760723 +epoch: 0, batch: 24385, sum loss: 4514.853516, avg loss: 2.844898, ppl: 17.199812 +epoch: 0, batch: 24386, sum loss: 5347.700195, avg loss: 3.048860, ppl: 21.091284 +epoch: 0, batch: 24387, sum loss: 4973.747559, avg loss: 3.147942, ppl: 23.288078 +epoch: 0, batch: 24388, sum loss: 6414.000977, avg loss: 3.279141, ppl: 26.552965 +epoch: 0, batch: 24389, sum loss: 6051.724121, avg loss: 2.987031, ppl: 19.826725 +epoch: 0, batch: 24390, sum loss: 5353.292480, avg loss: 3.083694, ppl: 21.838921 +epoch: 0, batch: 24391, sum loss: 5218.035156, avg loss: 3.117106, ppl: 22.580935 +epoch: 0, batch: 24392, sum loss: 4900.004883, avg loss: 2.768364, ppl: 15.932554 +epoch: 0, batch: 24393, sum loss: 6248.594727, avg loss: 3.276662, ppl: 26.487213 +epoch: 0, batch: 24394, sum loss: 5679.659180, avg loss: 3.372719, ppl: 29.157707 +epoch: 0, batch: 24395, sum loss: 4760.842285, avg loss: 3.053780, ppl: 21.195303 +epoch: 0, batch: 24396, sum loss: 6205.733887, avg loss: 3.252481, ppl: 25.854404 +epoch: 0, batch: 24397, sum loss: 4958.861816, avg loss: 3.068603, ppl: 21.511820 +epoch: 0, batch: 24398, sum loss: 6121.598633, avg loss: 3.336021, ppl: 28.107071 +epoch: 0, batch: 24399, sum loss: 4864.356445, avg loss: 3.000837, ppl: 20.102348 +epoch: 0, batch: 24400, sum loss: 4434.127930, avg loss: 2.926817, ppl: 18.668119 +epoch: 0, batch: 24401, sum loss: 5128.483398, avg loss: 2.954195, ppl: 19.186281 +epoch: 0, batch: 24402, sum loss: 5498.380859, avg loss: 3.085511, ppl: 21.878649 +epoch: 0, batch: 24403, sum loss: 3903.897217, avg loss: 2.913356, ppl: 18.418509 +epoch: 0, batch: 24404, sum loss: 5337.851074, avg loss: 3.071261, ppl: 21.569077 +epoch: 0, batch: 24405, sum loss: 4838.038086, avg loss: 3.160051, ppl: 23.571800 +epoch: 0, batch: 24406, sum loss: 5301.798340, avg loss: 3.244675, ppl: 25.653362 +epoch: 0, batch: 24407, sum loss: 6194.672852, avg loss: 3.372168, ppl: 29.141647 +epoch: 0, batch: 24408, sum loss: 5258.181641, avg loss: 2.932617, ppl: 18.776699 +epoch: 0, batch: 24409, sum loss: 4652.547852, avg loss: 3.073017, ppl: 21.606995 +epoch: 0, batch: 24410, sum loss: 4879.004395, avg loss: 3.186809, ppl: 24.211048 +epoch: 0, batch: 24411, sum loss: 5489.153809, avg loss: 3.165602, ppl: 23.703009 +epoch: 0, batch: 24412, sum loss: 5130.994141, avg loss: 3.085384, ppl: 21.875874 +epoch: 0, batch: 24413, sum loss: 5738.152832, avg loss: 3.200308, ppl: 24.540094 +epoch: 0, batch: 24414, sum loss: 4867.958984, avg loss: 3.014216, ppl: 20.373116 +epoch: 0, batch: 24415, sum loss: 5062.144531, avg loss: 2.975981, ppl: 19.608860 +epoch: 0, batch: 24416, sum loss: 5961.518066, avg loss: 3.300951, ppl: 27.138441 +epoch: 0, batch: 24417, sum loss: 5224.984863, avg loss: 2.963690, ppl: 19.369305 +epoch: 0, batch: 24418, sum loss: 4695.176270, avg loss: 3.056756, ppl: 21.258472 +epoch: 0, batch: 24419, sum loss: 7198.158691, avg loss: 3.384184, ppl: 29.493906 +epoch: 0, batch: 24420, sum loss: 5144.052734, avg loss: 3.223091, ppl: 25.105593 +epoch: 0, batch: 24421, sum loss: 4307.152344, avg loss: 2.796852, ppl: 16.392962 +epoch: 0, batch: 24422, sum loss: 4468.984375, avg loss: 2.896296, ppl: 18.106953 +epoch: 0, batch: 24423, sum loss: 4714.284668, avg loss: 2.902884, ppl: 18.226645 +epoch: 0, batch: 24424, sum loss: 5145.995605, avg loss: 3.317857, ppl: 27.601126 +epoch: 0, batch: 24425, sum loss: 5901.591797, avg loss: 3.223152, ppl: 25.107143 +epoch: 0, batch: 24426, sum loss: 6214.379395, avg loss: 3.289772, ppl: 26.836744 +epoch: 0, batch: 24427, sum loss: 5582.711914, avg loss: 3.080967, ppl: 21.779451 +epoch: 0, batch: 24428, sum loss: 5484.711426, avg loss: 3.243472, ppl: 25.622538 +epoch: 0, batch: 24429, sum loss: 6068.926758, avg loss: 3.142893, ppl: 23.170811 +epoch: 0, batch: 24430, sum loss: 4758.194336, avg loss: 2.742475, ppl: 15.525363 +epoch: 0, batch: 24431, sum loss: 4381.615723, avg loss: 2.835997, ppl: 17.047394 +epoch: 0, batch: 24432, sum loss: 5153.415527, avg loss: 3.003156, ppl: 20.149027 +epoch: 0, batch: 24433, sum loss: 5272.996094, avg loss: 3.055038, ppl: 21.221996 +epoch: 0, batch: 24434, sum loss: 5612.339844, avg loss: 3.099028, ppl: 22.176394 +epoch: 0, batch: 24435, sum loss: 6237.434082, avg loss: 3.231831, ppl: 25.325994 +epoch: 0, batch: 24436, sum loss: 5421.131836, avg loss: 3.115593, ppl: 22.546795 +epoch: 0, batch: 24437, sum loss: 5846.585938, avg loss: 3.133219, ppl: 22.947720 +epoch: 0, batch: 24438, sum loss: 5294.630859, avg loss: 3.072914, ppl: 21.604771 +epoch: 0, batch: 24439, sum loss: 4277.095703, avg loss: 2.646718, ppl: 14.107655 +epoch: 0, batch: 24440, sum loss: 5474.189453, avg loss: 3.092762, ppl: 22.037872 +epoch: 0, batch: 24441, sum loss: 6396.024414, avg loss: 3.314003, ppl: 27.494955 +epoch: 0, batch: 24442, sum loss: 5228.561035, avg loss: 3.091994, ppl: 22.020935 +epoch: 0, batch: 24443, sum loss: 6245.014648, avg loss: 3.264513, ppl: 26.167377 +epoch: 0, batch: 24444, sum loss: 4695.184570, avg loss: 3.117652, ppl: 22.593277 +epoch: 0, batch: 24445, sum loss: 4731.460449, avg loss: 3.029104, ppl: 20.678696 +epoch: 0, batch: 24446, sum loss: 5895.952148, avg loss: 3.369116, ppl: 29.052820 +epoch: 0, batch: 24447, sum loss: 5869.911133, avg loss: 3.179800, ppl: 24.041952 +epoch: 0, batch: 24448, sum loss: 4706.170898, avg loss: 3.009061, ppl: 20.268351 +epoch: 0, batch: 24449, sum loss: 5200.933105, avg loss: 3.206494, ppl: 24.692360 +epoch: 0, batch: 24450, sum loss: 6206.469238, avg loss: 3.516413, ppl: 33.663467 +epoch: 0, batch: 24451, sum loss: 5044.844238, avg loss: 3.280133, ppl: 26.579302 +epoch: 0, batch: 24452, sum loss: 4351.608398, avg loss: 3.040956, ppl: 20.925243 +epoch: 0, batch: 24453, sum loss: 5677.335938, avg loss: 3.231267, ppl: 25.311701 +epoch: 0, batch: 24454, sum loss: 5636.049805, avg loss: 3.189615, ppl: 24.279078 +epoch: 0, batch: 24455, sum loss: 4644.218262, avg loss: 3.017686, ppl: 20.443922 +epoch: 0, batch: 24456, sum loss: 5933.738281, avg loss: 3.230124, ppl: 25.282804 +epoch: 0, batch: 24457, sum loss: 4900.675781, avg loss: 2.973711, ppl: 19.564388 +epoch: 0, batch: 24458, sum loss: 6016.533203, avg loss: 3.196883, ppl: 24.456175 +epoch: 0, batch: 24459, sum loss: 5516.474121, avg loss: 3.129027, ppl: 22.851727 +epoch: 0, batch: 24460, sum loss: 4749.461426, avg loss: 2.800390, ppl: 16.451061 +epoch: 0, batch: 24461, sum loss: 4661.714844, avg loss: 2.832148, ppl: 16.981892 +epoch: 0, batch: 24462, sum loss: 5063.231934, avg loss: 3.196485, ppl: 24.446445 +epoch: 0, batch: 24463, sum loss: 5365.683594, avg loss: 3.173083, ppl: 23.881004 +epoch: 0, batch: 24464, sum loss: 4935.887695, avg loss: 3.075319, ppl: 21.656796 +epoch: 0, batch: 24465, sum loss: 5592.241211, avg loss: 2.993705, ppl: 19.959496 +epoch: 0, batch: 24466, sum loss: 5114.993652, avg loss: 2.960066, ppl: 19.299242 +epoch: 0, batch: 24467, sum loss: 5979.386230, avg loss: 3.185608, ppl: 24.181990 +epoch: 0, batch: 24468, sum loss: 5573.504883, avg loss: 3.221679, ppl: 25.070177 +epoch: 0, batch: 24469, sum loss: 4426.844727, avg loss: 2.981040, ppl: 19.708303 +epoch: 0, batch: 24470, sum loss: 5670.060547, avg loss: 3.122280, ppl: 22.698071 +epoch: 0, batch: 24471, sum loss: 3776.849854, avg loss: 2.869947, ppl: 17.636078 +epoch: 0, batch: 24472, sum loss: 5453.995605, avg loss: 3.454082, ppl: 31.629240 +epoch: 0, batch: 24473, sum loss: 4518.010742, avg loss: 2.912966, ppl: 18.411331 +epoch: 0, batch: 24474, sum loss: 5230.491211, avg loss: 3.067737, ppl: 21.493206 +epoch: 0, batch: 24475, sum loss: 5061.681641, avg loss: 2.880866, ppl: 17.829708 +epoch: 0, batch: 24476, sum loss: 5560.989258, avg loss: 3.280820, ppl: 26.597565 +epoch: 0, batch: 24477, sum loss: 5094.312988, avg loss: 3.021538, ppl: 20.522833 +epoch: 0, batch: 24478, sum loss: 4546.065430, avg loss: 2.973228, ppl: 19.554941 +epoch: 0, batch: 24479, sum loss: 3940.702393, avg loss: 2.804770, ppl: 16.523283 +epoch: 0, batch: 24480, sum loss: 4487.773438, avg loss: 2.811888, ppl: 16.641312 +epoch: 0, batch: 24481, sum loss: 5805.512695, avg loss: 3.165492, ppl: 23.700403 +epoch: 0, batch: 24482, sum loss: 5616.847656, avg loss: 3.130907, ppl: 22.894743 +epoch: 0, batch: 24483, sum loss: 4886.751953, avg loss: 2.834543, ppl: 17.022619 +epoch: 0, batch: 24484, sum loss: 5308.547852, avg loss: 3.319917, ppl: 27.658060 +epoch: 0, batch: 24485, sum loss: 5405.244141, avg loss: 3.120811, ppl: 22.664747 +epoch: 0, batch: 24486, sum loss: 5013.913574, avg loss: 3.029555, ppl: 20.688026 +epoch: 0, batch: 24487, sum loss: 6237.406250, avg loss: 3.558133, ppl: 35.097595 +epoch: 0, batch: 24488, sum loss: 7070.626953, avg loss: 3.381457, ppl: 29.413599 +epoch: 0, batch: 24489, sum loss: 5635.996582, avg loss: 3.001063, ppl: 20.106901 +epoch: 0, batch: 24490, sum loss: 5576.405273, avg loss: 3.113571, ppl: 22.501245 +epoch: 0, batch: 24491, sum loss: 5024.033203, avg loss: 3.183798, ppl: 24.138252 +epoch: 0, batch: 24492, sum loss: 5468.826660, avg loss: 3.183252, ppl: 24.125078 +epoch: 0, batch: 24493, sum loss: 5427.244629, avg loss: 3.101283, ppl: 22.226440 +epoch: 0, batch: 24494, sum loss: 6221.626953, avg loss: 3.297100, ppl: 27.034115 +epoch: 0, batch: 24495, sum loss: 3944.064453, avg loss: 2.866326, ppl: 17.572336 +epoch: 0, batch: 24496, sum loss: 4131.288086, avg loss: 2.921703, ppl: 18.572891 +epoch: 0, batch: 24497, sum loss: 4123.815918, avg loss: 2.763952, ppl: 15.862404 +epoch: 0, batch: 24498, sum loss: 4591.670898, avg loss: 3.378713, ppl: 29.332993 +epoch: 0, batch: 24499, sum loss: 4974.192383, avg loss: 3.036747, ppl: 20.837353 +epoch: 0, batch: 24500, sum loss: 5400.337891, avg loss: 2.978675, ppl: 19.661751 +epoch: 0, batch: 24501, sum loss: 5403.215820, avg loss: 3.210467, ppl: 24.790657 +epoch: 0, batch: 24502, sum loss: 4250.558594, avg loss: 2.635188, ppl: 13.945939 +epoch: 0, batch: 24503, sum loss: 5392.809570, avg loss: 3.058882, ppl: 21.303732 +epoch: 0, batch: 24504, sum loss: 6076.993164, avg loss: 3.253208, ppl: 25.873219 +epoch: 0, batch: 24505, sum loss: 4476.464844, avg loss: 3.034891, ppl: 20.798717 +epoch: 0, batch: 24506, sum loss: 5767.387207, avg loss: 3.100746, ppl: 22.214516 +epoch: 0, batch: 24507, sum loss: 4017.513916, avg loss: 2.909134, ppl: 18.340906 +epoch: 0, batch: 24508, sum loss: 6167.473633, avg loss: 3.261488, ppl: 26.088327 +epoch: 0, batch: 24509, sum loss: 5918.882812, avg loss: 3.312190, ppl: 27.445152 +epoch: 0, batch: 24510, sum loss: 4691.888672, avg loss: 2.960182, ppl: 19.301489 +epoch: 0, batch: 24511, sum loss: 5882.345703, avg loss: 3.299129, ppl: 27.089022 +epoch: 0, batch: 24512, sum loss: 5250.210938, avg loss: 3.059564, ppl: 21.318253 +epoch: 0, batch: 24513, sum loss: 6483.164062, avg loss: 3.573961, ppl: 35.657536 +epoch: 0, batch: 24514, sum loss: 5603.620117, avg loss: 3.092506, ppl: 22.032215 +epoch: 0, batch: 24515, sum loss: 4723.914062, avg loss: 2.945084, ppl: 19.012253 +epoch: 0, batch: 24516, sum loss: 4924.675293, avg loss: 3.118857, ppl: 22.620508 +epoch: 0, batch: 24517, sum loss: 6153.959473, avg loss: 3.319288, ppl: 27.640650 +epoch: 0, batch: 24518, sum loss: 4312.891113, avg loss: 3.035110, ppl: 20.803261 +epoch: 0, batch: 24519, sum loss: 5712.644043, avg loss: 3.211154, ppl: 24.807709 +epoch: 0, batch: 24520, sum loss: 5649.945312, avg loss: 3.073964, ppl: 21.627457 +epoch: 0, batch: 24521, sum loss: 5332.840332, avg loss: 3.075456, ppl: 21.659750 +epoch: 0, batch: 24522, sum loss: 4844.258301, avg loss: 3.193315, ppl: 24.369066 +epoch: 0, batch: 24523, sum loss: 5553.100586, avg loss: 3.245529, ppl: 25.675299 +epoch: 0, batch: 24524, sum loss: 4755.455566, avg loss: 2.964748, ppl: 19.389814 +epoch: 0, batch: 24525, sum loss: 5389.079590, avg loss: 3.179398, ppl: 24.032284 +epoch: 0, batch: 24526, sum loss: 7471.598145, avg loss: 3.329589, ppl: 27.926872 +epoch: 0, batch: 24527, sum loss: 4780.768066, avg loss: 3.155622, ppl: 23.467640 +epoch: 0, batch: 24528, sum loss: 5846.773926, avg loss: 3.075631, ppl: 21.663540 +epoch: 0, batch: 24529, sum loss: 4335.276855, avg loss: 2.754305, ppl: 15.710124 +epoch: 0, batch: 24530, sum loss: 5980.337891, avg loss: 3.320565, ppl: 27.675983 +epoch: 0, batch: 24531, sum loss: 4908.984863, avg loss: 3.130730, ppl: 22.890688 +epoch: 0, batch: 24532, sum loss: 5393.352539, avg loss: 3.099628, ppl: 22.189695 +epoch: 0, batch: 24533, sum loss: 4510.496094, avg loss: 2.819060, ppl: 16.761089 +epoch: 0, batch: 24534, sum loss: 4739.738281, avg loss: 2.997937, ppl: 20.044138 +epoch: 0, batch: 24535, sum loss: 5521.128906, avg loss: 3.198800, ppl: 24.503111 +epoch: 0, batch: 24536, sum loss: 4708.993164, avg loss: 3.274682, ppl: 26.434826 +epoch: 0, batch: 24537, sum loss: 5929.467773, avg loss: 3.243691, ppl: 25.628153 +epoch: 0, batch: 24538, sum loss: 4838.250488, avg loss: 3.044840, ppl: 21.006666 +epoch: 0, batch: 24539, sum loss: 5779.365723, avg loss: 3.191257, ppl: 24.318972 +epoch: 0, batch: 24540, sum loss: 5602.168457, avg loss: 2.936147, ppl: 18.843103 +epoch: 0, batch: 24541, sum loss: 5586.408691, avg loss: 3.253587, ppl: 25.883017 +epoch: 0, batch: 24542, sum loss: 5407.462891, avg loss: 2.826693, ppl: 16.889507 +epoch: 0, batch: 24543, sum loss: 4420.313477, avg loss: 2.797667, ppl: 16.406322 +epoch: 0, batch: 24544, sum loss: 5993.576172, avg loss: 3.372862, ppl: 29.161873 +epoch: 0, batch: 24545, sum loss: 5901.626953, avg loss: 3.334253, ppl: 28.057405 +epoch: 0, batch: 24546, sum loss: 4436.732910, avg loss: 2.772958, ppl: 16.005911 +epoch: 0, batch: 24547, sum loss: 5044.789551, avg loss: 3.091170, ppl: 22.002808 +epoch: 0, batch: 24548, sum loss: 5106.678223, avg loss: 3.050585, ppl: 21.127691 +epoch: 0, batch: 24549, sum loss: 6304.265137, avg loss: 3.351550, ppl: 28.546936 +epoch: 0, batch: 24550, sum loss: 5899.380859, avg loss: 3.061433, ppl: 21.358137 +epoch: 0, batch: 24551, sum loss: 4449.685547, avg loss: 3.079367, ppl: 21.744637 +epoch: 0, batch: 24552, sum loss: 5429.519531, avg loss: 3.133018, ppl: 22.943108 +epoch: 0, batch: 24553, sum loss: 4433.025879, avg loss: 2.744908, ppl: 15.563176 +epoch: 0, batch: 24554, sum loss: 5406.106445, avg loss: 3.286387, ppl: 26.746048 +epoch: 0, batch: 24555, sum loss: 6431.511719, avg loss: 3.236795, ppl: 25.452015 +epoch: 0, batch: 24556, sum loss: 4623.351562, avg loss: 2.948566, ppl: 19.078575 +epoch: 0, batch: 24557, sum loss: 5605.301270, avg loss: 3.206694, ppl: 24.697311 +epoch: 0, batch: 24558, sum loss: 4255.800781, avg loss: 2.959528, ppl: 19.288860 +epoch: 0, batch: 24559, sum loss: 6011.849121, avg loss: 3.155826, ppl: 23.472425 +epoch: 0, batch: 24560, sum loss: 5163.075684, avg loss: 3.084275, ppl: 21.851614 +epoch: 0, batch: 24561, sum loss: 5286.572266, avg loss: 2.981710, ppl: 19.721519 +epoch: 0, batch: 24562, sum loss: 4651.270996, avg loss: 2.932706, ppl: 18.778370 +epoch: 0, batch: 24563, sum loss: 5645.487305, avg loss: 3.007718, ppl: 20.241163 +epoch: 0, batch: 24564, sum loss: 5210.431641, avg loss: 2.807345, ppl: 16.565872 +epoch: 0, batch: 24565, sum loss: 5109.471191, avg loss: 3.002039, ppl: 20.126541 +epoch: 0, batch: 24566, sum loss: 5360.672363, avg loss: 3.089725, ppl: 21.971031 +epoch: 0, batch: 24567, sum loss: 5898.662109, avg loss: 3.051558, ppl: 21.148277 +epoch: 0, batch: 24568, sum loss: 5375.920410, avg loss: 2.970122, ppl: 19.494295 +epoch: 0, batch: 24569, sum loss: 5278.071777, avg loss: 3.028154, ppl: 20.659054 +epoch: 0, batch: 24570, sum loss: 6587.809570, avg loss: 3.318796, ppl: 27.627058 +epoch: 0, batch: 24571, sum loss: 4870.735840, avg loss: 3.059507, ppl: 21.317053 +epoch: 0, batch: 24572, sum loss: 4711.979492, avg loss: 3.166653, ppl: 23.727932 +epoch: 0, batch: 24573, sum loss: 5113.075684, avg loss: 3.018344, ppl: 20.457394 +epoch: 0, batch: 24574, sum loss: 4557.011230, avg loss: 2.826930, ppl: 16.893518 +epoch: 0, batch: 24575, sum loss: 6120.589844, avg loss: 3.315596, ppl: 27.538799 +epoch: 0, batch: 24576, sum loss: 6516.342773, avg loss: 3.254916, ppl: 25.917448 +epoch: 0, batch: 24577, sum loss: 4961.553711, avg loss: 2.827096, ppl: 16.896322 +epoch: 0, batch: 24578, sum loss: 5013.123047, avg loss: 3.168851, ppl: 23.780153 +epoch: 0, batch: 24579, sum loss: 4831.230957, avg loss: 2.922705, ppl: 18.591503 +epoch: 0, batch: 24580, sum loss: 5057.160156, avg loss: 3.028240, ppl: 20.660831 +epoch: 0, batch: 24581, sum loss: 5387.374512, avg loss: 3.097973, ppl: 22.152998 +epoch: 0, batch: 24582, sum loss: 5028.239258, avg loss: 3.144615, ppl: 23.210735 +epoch: 0, batch: 24583, sum loss: 5320.786621, avg loss: 3.054412, ppl: 21.208719 +epoch: 0, batch: 24584, sum loss: 5191.890625, avg loss: 3.165787, ppl: 23.707394 +epoch: 0, batch: 24585, sum loss: 5711.426758, avg loss: 3.146792, ppl: 23.261316 +epoch: 0, batch: 24586, sum loss: 6026.107422, avg loss: 3.334868, ppl: 28.074690 +epoch: 0, batch: 24587, sum loss: 5471.726562, avg loss: 3.197970, ppl: 24.482784 +epoch: 0, batch: 24588, sum loss: 5447.227051, avg loss: 3.198606, ppl: 24.498367 +epoch: 0, batch: 24589, sum loss: 5073.232422, avg loss: 3.100998, ppl: 22.220114 +epoch: 0, batch: 24590, sum loss: 5604.466797, avg loss: 3.245204, ppl: 25.666939 +epoch: 0, batch: 24591, sum loss: 5912.826172, avg loss: 3.006012, ppl: 20.206659 +epoch: 0, batch: 24592, sum loss: 6438.127930, avg loss: 3.227132, ppl: 25.207249 +epoch: 0, batch: 24593, sum loss: 4110.770508, avg loss: 2.708018, ppl: 14.999511 +epoch: 0, batch: 24594, sum loss: 5419.026855, avg loss: 3.246871, ppl: 25.709755 +epoch: 0, batch: 24595, sum loss: 4666.543945, avg loss: 2.940481, ppl: 18.924955 +epoch: 0, batch: 24596, sum loss: 4992.481445, avg loss: 2.978808, ppl: 19.664356 +epoch: 0, batch: 24597, sum loss: 5019.559082, avg loss: 3.027478, ppl: 20.645109 +epoch: 0, batch: 24598, sum loss: 5154.937012, avg loss: 3.018113, ppl: 20.452660 +epoch: 0, batch: 24599, sum loss: 5050.854980, avg loss: 3.085434, ppl: 21.876953 +epoch: 0, batch: 24600, sum loss: 4177.414551, avg loss: 2.996711, ppl: 20.019575 +epoch: 0, batch: 24601, sum loss: 6396.599121, avg loss: 3.276947, ppl: 26.494749 +epoch: 0, batch: 24602, sum loss: 5652.579590, avg loss: 3.168487, ppl: 23.771481 +epoch: 0, batch: 24603, sum loss: 4440.953125, avg loss: 3.178921, ppl: 24.020826 +epoch: 0, batch: 24604, sum loss: 5000.808105, avg loss: 3.069864, ppl: 21.538969 +epoch: 0, batch: 24605, sum loss: 4415.076172, avg loss: 2.941423, ppl: 18.942785 +epoch: 0, batch: 24606, sum loss: 5503.915039, avg loss: 3.201812, ppl: 24.577030 +epoch: 0, batch: 24607, sum loss: 5272.647949, avg loss: 3.129168, ppl: 22.854952 +epoch: 0, batch: 24608, sum loss: 5881.678711, avg loss: 3.077801, ppl: 21.710619 +epoch: 0, batch: 24609, sum loss: 5479.153320, avg loss: 3.280930, ppl: 26.600508 +epoch: 0, batch: 24610, sum loss: 5375.511230, avg loss: 2.979773, ppl: 19.683353 +epoch: 0, batch: 24611, sum loss: 5935.982422, avg loss: 3.224325, ppl: 25.136606 +epoch: 0, batch: 24612, sum loss: 6057.876465, avg loss: 3.200146, ppl: 24.536112 +epoch: 0, batch: 24613, sum loss: 4909.545410, avg loss: 3.038085, ppl: 20.865248 +epoch: 0, batch: 24614, sum loss: 4684.113770, avg loss: 2.975930, ppl: 19.607845 +epoch: 0, batch: 24615, sum loss: 5145.408203, avg loss: 3.238142, ppl: 25.486330 +epoch: 0, batch: 24616, sum loss: 5468.769043, avg loss: 3.105491, ppl: 22.320168 +epoch: 0, batch: 24617, sum loss: 5469.296875, avg loss: 3.221023, ppl: 25.053740 +epoch: 0, batch: 24618, sum loss: 4671.005859, avg loss: 2.832629, ppl: 16.990076 +epoch: 0, batch: 24619, sum loss: 5059.257324, avg loss: 3.071801, ppl: 21.580744 +epoch: 0, batch: 24620, sum loss: 5386.658691, avg loss: 3.150093, ppl: 23.338232 +epoch: 0, batch: 24621, sum loss: 4847.651367, avg loss: 2.901048, ppl: 18.193205 +epoch: 0, batch: 24622, sum loss: 6375.944336, avg loss: 3.444595, ppl: 31.330580 +epoch: 0, batch: 24623, sum loss: 4859.480469, avg loss: 3.239654, ppl: 25.524878 +epoch: 0, batch: 24624, sum loss: 5540.863281, avg loss: 2.964614, ppl: 19.387213 +epoch: 0, batch: 24625, sum loss: 6386.349121, avg loss: 3.334908, ppl: 28.075808 +epoch: 0, batch: 24626, sum loss: 5337.142090, avg loss: 3.154339, ppl: 23.437548 +epoch: 0, batch: 24627, sum loss: 5878.334961, avg loss: 3.182640, ppl: 24.110312 +epoch: 0, batch: 24628, sum loss: 4786.729980, avg loss: 2.958424, ppl: 19.267580 +epoch: 0, batch: 24629, sum loss: 5182.473633, avg loss: 3.088482, ppl: 21.943750 +epoch: 0, batch: 24630, sum loss: 5281.852051, avg loss: 3.049568, ppl: 21.106228 +epoch: 0, batch: 24631, sum loss: 4986.393555, avg loss: 3.259081, ppl: 26.025606 +epoch: 0, batch: 24632, sum loss: 5259.642578, avg loss: 3.022783, ppl: 20.548399 +epoch: 0, batch: 24633, sum loss: 5614.092773, avg loss: 3.329830, ppl: 27.933584 +epoch: 0, batch: 24634, sum loss: 5383.738281, avg loss: 2.976085, ppl: 19.610897 +epoch: 0, batch: 24635, sum loss: 5459.969727, avg loss: 3.172556, ppl: 23.868423 +epoch: 0, batch: 24636, sum loss: 4343.922852, avg loss: 2.979371, ppl: 19.675434 +epoch: 0, batch: 24637, sum loss: 5178.629883, avg loss: 3.048046, ppl: 21.074127 +epoch: 0, batch: 24638, sum loss: 4876.261230, avg loss: 3.090153, ppl: 21.980434 +epoch: 0, batch: 24639, sum loss: 5533.923828, avg loss: 3.032287, ppl: 20.744623 +epoch: 0, batch: 24640, sum loss: 4623.865234, avg loss: 2.917265, ppl: 18.490650 +epoch: 0, batch: 24641, sum loss: 5185.453613, avg loss: 3.200897, ppl: 24.554558 +epoch: 0, batch: 24642, sum loss: 5952.691406, avg loss: 3.376456, ppl: 29.266859 +epoch: 0, batch: 24643, sum loss: 4970.901855, avg loss: 2.798931, ppl: 16.427082 +epoch: 0, batch: 24644, sum loss: 4450.766113, avg loss: 2.895749, ppl: 18.097054 +epoch: 0, batch: 24645, sum loss: 5374.271973, avg loss: 3.119136, ppl: 22.626829 +epoch: 0, batch: 24646, sum loss: 4696.064453, avg loss: 3.031675, ppl: 20.731926 +epoch: 0, batch: 24647, sum loss: 5427.771973, avg loss: 3.047598, ppl: 21.064690 +epoch: 0, batch: 24648, sum loss: 4990.662109, avg loss: 3.046802, ppl: 21.047926 +epoch: 0, batch: 24649, sum loss: 4047.629395, avg loss: 2.521887, ppl: 12.452075 +epoch: 0, batch: 24650, sum loss: 4286.768555, avg loss: 2.771020, ppl: 15.974927 +epoch: 0, batch: 24651, sum loss: 5928.095703, avg loss: 3.375909, ppl: 29.250856 +epoch: 0, batch: 24652, sum loss: 6176.335938, avg loss: 3.437026, ppl: 31.094355 +epoch: 0, batch: 24653, sum loss: 4218.548340, avg loss: 3.002526, ppl: 20.136328 +epoch: 0, batch: 24654, sum loss: 4323.545898, avg loss: 2.778628, ppl: 16.096926 +epoch: 0, batch: 24655, sum loss: 5470.105957, avg loss: 2.853472, ppl: 17.347914 +epoch: 0, batch: 24656, sum loss: 5359.152832, avg loss: 3.417827, ppl: 30.503056 +epoch: 0, batch: 24657, sum loss: 5537.520508, avg loss: 3.022664, ppl: 20.545954 +epoch: 0, batch: 24658, sum loss: 5391.244629, avg loss: 2.911039, ppl: 18.375883 +epoch: 0, batch: 24659, sum loss: 6358.391602, avg loss: 3.357123, ppl: 28.706493 +epoch: 0, batch: 24660, sum loss: 5526.420410, avg loss: 3.161568, ppl: 23.607582 +epoch: 0, batch: 24661, sum loss: 5195.668945, avg loss: 2.994622, ppl: 19.977806 +epoch: 0, batch: 24662, sum loss: 6126.459961, avg loss: 3.276182, ppl: 26.474499 +epoch: 0, batch: 24663, sum loss: 6120.817383, avg loss: 3.238528, ppl: 25.496159 +epoch: 0, batch: 24664, sum loss: 4515.705566, avg loss: 2.896540, ppl: 18.111370 +epoch: 0, batch: 24665, sum loss: 7066.943848, avg loss: 3.327186, ppl: 27.859844 +epoch: 0, batch: 24666, sum loss: 4745.666992, avg loss: 3.248232, ppl: 25.744785 +epoch: 0, batch: 24667, sum loss: 5155.352051, avg loss: 3.057741, ppl: 21.279442 +epoch: 0, batch: 24668, sum loss: 4994.632812, avg loss: 2.917426, ppl: 18.493616 +epoch: 0, batch: 24669, sum loss: 4938.397461, avg loss: 2.861180, ppl: 17.482149 +epoch: 0, batch: 24670, sum loss: 5158.067383, avg loss: 3.084969, ppl: 21.866785 +epoch: 0, batch: 24671, sum loss: 5095.682617, avg loss: 3.025940, ppl: 20.613371 +epoch: 0, batch: 24672, sum loss: 3538.407715, avg loss: 2.779582, ppl: 16.112286 +epoch: 0, batch: 24673, sum loss: 5081.962402, avg loss: 3.098758, ppl: 22.170387 +epoch: 0, batch: 24674, sum loss: 6072.051270, avg loss: 3.259287, ppl: 26.030962 +epoch: 0, batch: 24675, sum loss: 4358.675293, avg loss: 2.677319, ppl: 14.546041 +epoch: 0, batch: 24676, sum loss: 6241.306641, avg loss: 3.453960, ppl: 31.625395 +epoch: 0, batch: 24677, sum loss: 4167.539551, avg loss: 2.872184, ppl: 17.675585 +epoch: 0, batch: 24678, sum loss: 4869.787598, avg loss: 3.117662, ppl: 22.593489 +epoch: 0, batch: 24679, sum loss: 4354.724121, avg loss: 2.859307, ppl: 17.449427 +epoch: 0, batch: 24680, sum loss: 5876.232910, avg loss: 3.099279, ppl: 22.181955 +epoch: 0, batch: 24681, sum loss: 4577.348633, avg loss: 2.832518, ppl: 16.988176 +epoch: 0, batch: 24682, sum loss: 4700.042969, avg loss: 2.865880, ppl: 17.564499 +epoch: 0, batch: 24683, sum loss: 5533.314453, avg loss: 3.113852, ppl: 22.507566 +epoch: 0, batch: 24684, sum loss: 5403.361816, avg loss: 3.039011, ppl: 20.884583 +epoch: 0, batch: 24685, sum loss: 5392.147461, avg loss: 3.198189, ppl: 24.488153 +epoch: 0, batch: 24686, sum loss: 5409.203613, avg loss: 3.146715, ppl: 23.259541 +epoch: 0, batch: 24687, sum loss: 4024.673340, avg loss: 2.885071, ppl: 17.904831 +epoch: 0, batch: 24688, sum loss: 5512.927734, avg loss: 3.059339, ppl: 21.313456 +epoch: 0, batch: 24689, sum loss: 5498.801270, avg loss: 2.931131, ppl: 18.748821 +epoch: 0, batch: 24690, sum loss: 5595.092285, avg loss: 3.124005, ppl: 22.737251 +epoch: 0, batch: 24691, sum loss: 4833.951172, avg loss: 2.816988, ppl: 16.726395 +epoch: 0, batch: 24692, sum loss: 3969.011475, avg loss: 2.981977, ppl: 19.726778 +epoch: 0, batch: 24693, sum loss: 5098.644531, avg loss: 3.251687, ppl: 25.833874 +epoch: 0, batch: 24694, sum loss: 4993.351562, avg loss: 3.186568, ppl: 24.205208 +epoch: 0, batch: 24695, sum loss: 5794.492676, avg loss: 3.035355, ppl: 20.808365 +epoch: 0, batch: 24696, sum loss: 5091.692383, avg loss: 3.116091, ppl: 22.558023 +epoch: 0, batch: 24697, sum loss: 5651.204590, avg loss: 3.084719, ppl: 21.861317 +epoch: 0, batch: 24698, sum loss: 3967.612793, avg loss: 2.967549, ppl: 19.444201 +epoch: 0, batch: 24699, sum loss: 5120.843750, avg loss: 3.068211, ppl: 21.503395 +epoch: 0, batch: 24700, sum loss: 5853.193848, avg loss: 3.037464, ppl: 20.852303 +epoch: 0, batch: 24701, sum loss: 5253.208496, avg loss: 3.117631, ppl: 22.592798 +epoch: 0, batch: 24702, sum loss: 5120.104980, avg loss: 3.080689, ppl: 21.773397 +epoch: 0, batch: 24703, sum loss: 5078.943848, avg loss: 3.019586, ppl: 20.482811 +epoch: 0, batch: 24704, sum loss: 5832.816895, avg loss: 3.319759, ppl: 27.653688 +epoch: 0, batch: 24705, sum loss: 6153.368164, avg loss: 3.087490, ppl: 21.921991 +epoch: 0, batch: 24706, sum loss: 5476.399902, avg loss: 3.200701, ppl: 24.549746 +epoch: 0, batch: 24707, sum loss: 5352.561035, avg loss: 3.236131, ppl: 25.435133 +epoch: 0, batch: 24708, sum loss: 3999.739258, avg loss: 2.832676, ppl: 16.990875 +epoch: 0, batch: 24709, sum loss: 5360.078125, avg loss: 3.024875, ppl: 20.591429 +epoch: 0, batch: 24710, sum loss: 4291.818359, avg loss: 2.911681, ppl: 18.387690 +epoch: 0, batch: 24711, sum loss: 4737.333008, avg loss: 2.869372, ppl: 17.625948 +epoch: 0, batch: 24712, sum loss: 6206.580078, avg loss: 3.256338, ppl: 25.954309 +epoch: 0, batch: 24713, sum loss: 4698.568848, avg loss: 3.334683, ppl: 28.069496 +epoch: 0, batch: 24714, sum loss: 4609.565918, avg loss: 2.817583, ppl: 16.736355 +epoch: 0, batch: 24715, sum loss: 5618.172363, avg loss: 3.230691, ppl: 25.297142 +epoch: 0, batch: 24716, sum loss: 4499.958496, avg loss: 2.952729, ppl: 19.158161 +epoch: 0, batch: 24717, sum loss: 4692.818359, avg loss: 3.212059, ppl: 24.830166 +epoch: 0, batch: 24718, sum loss: 5027.437500, avg loss: 3.187976, ppl: 24.239309 +epoch: 0, batch: 24719, sum loss: 5380.116699, avg loss: 3.036183, ppl: 20.825607 +epoch: 0, batch: 24720, sum loss: 4897.467285, avg loss: 3.093788, ppl: 22.060493 +epoch: 0, batch: 24721, sum loss: 4314.305176, avg loss: 2.751470, ppl: 15.665645 +epoch: 0, batch: 24722, sum loss: 5055.836426, avg loss: 2.974022, ppl: 19.570467 +epoch: 0, batch: 24723, sum loss: 4863.564941, avg loss: 2.915806, ppl: 18.463694 +epoch: 0, batch: 24724, sum loss: 4900.395020, avg loss: 3.194521, ppl: 24.398478 +epoch: 0, batch: 24725, sum loss: 5012.724121, avg loss: 2.990886, ppl: 19.903303 +epoch: 0, batch: 24726, sum loss: 4846.795898, avg loss: 3.042559, ppl: 20.958801 +epoch: 0, batch: 24727, sum loss: 6291.110840, avg loss: 3.164543, ppl: 23.677914 +epoch: 0, batch: 24728, sum loss: 6380.365234, avg loss: 3.233840, ppl: 25.376905 +epoch: 0, batch: 24729, sum loss: 5305.127930, avg loss: 3.203580, ppl: 24.620506 +epoch: 0, batch: 24730, sum loss: 5542.730469, avg loss: 3.291408, ppl: 26.880680 +epoch: 0, batch: 24731, sum loss: 5478.125488, avg loss: 3.155602, ppl: 23.467165 +epoch: 0, batch: 24732, sum loss: 4976.158203, avg loss: 3.167510, ppl: 23.748278 +epoch: 0, batch: 24733, sum loss: 5299.807129, avg loss: 3.121206, ppl: 22.673704 +epoch: 0, batch: 24734, sum loss: 6106.810547, avg loss: 3.419267, ppl: 30.547014 +epoch: 0, batch: 24735, sum loss: 5338.556641, avg loss: 3.172048, ppl: 23.856289 +epoch: 0, batch: 24736, sum loss: 5434.832031, avg loss: 3.178264, ppl: 24.005054 +epoch: 0, batch: 24737, sum loss: 5184.054199, avg loss: 3.008737, ppl: 20.261799 +epoch: 0, batch: 24738, sum loss: 4570.379883, avg loss: 3.032767, ppl: 20.754581 +epoch: 0, batch: 24739, sum loss: 5251.779785, avg loss: 3.247854, ppl: 25.735052 +epoch: 0, batch: 24740, sum loss: 4982.015625, avg loss: 3.063970, ppl: 21.412397 +epoch: 0, batch: 24741, sum loss: 4661.650391, avg loss: 3.052816, ppl: 21.174892 +epoch: 0, batch: 24742, sum loss: 4651.605957, avg loss: 3.320204, ppl: 27.665995 +epoch: 0, batch: 24743, sum loss: 5818.377441, avg loss: 3.153592, ppl: 23.420040 +epoch: 0, batch: 24744, sum loss: 4549.304199, avg loss: 3.059384, ppl: 21.314426 +epoch: 0, batch: 24745, sum loss: 4546.967773, avg loss: 2.824203, ppl: 16.847521 +epoch: 0, batch: 24746, sum loss: 5021.388672, avg loss: 2.962471, ppl: 19.345720 +epoch: 0, batch: 24747, sum loss: 5223.240234, avg loss: 3.107222, ppl: 22.358841 +epoch: 0, batch: 24748, sum loss: 4496.028809, avg loss: 3.011406, ppl: 20.315939 +epoch: 0, batch: 24749, sum loss: 5429.056152, avg loss: 2.976456, ppl: 19.618170 +epoch: 0, batch: 24750, sum loss: 5394.335449, avg loss: 3.130781, ppl: 22.891850 +epoch: 0, batch: 24751, sum loss: 5956.306152, avg loss: 3.256592, ppl: 25.960907 +epoch: 0, batch: 24752, sum loss: 5175.170410, avg loss: 2.969117, ppl: 19.474710 +epoch: 0, batch: 24753, sum loss: 5211.680664, avg loss: 3.223056, ppl: 25.104713 +epoch: 0, batch: 24754, sum loss: 4545.850586, avg loss: 2.923377, ppl: 18.604004 +epoch: 0, batch: 24755, sum loss: 5096.287109, avg loss: 3.203197, ppl: 24.611097 +epoch: 0, batch: 24756, sum loss: 5607.135254, avg loss: 3.135982, ppl: 23.011217 +epoch: 0, batch: 24757, sum loss: 6422.249512, avg loss: 3.250126, ppl: 25.793598 +epoch: 0, batch: 24758, sum loss: 5393.734375, avg loss: 2.896743, ppl: 18.115042 +epoch: 0, batch: 24759, sum loss: 5200.284668, avg loss: 2.963125, ppl: 19.358372 +epoch: 0, batch: 24760, sum loss: 5294.779785, avg loss: 3.252322, ppl: 25.850286 +epoch: 0, batch: 24761, sum loss: 4706.241699, avg loss: 3.014889, ppl: 20.386827 +epoch: 0, batch: 24762, sum loss: 6184.818848, avg loss: 3.163590, ppl: 23.655371 +epoch: 0, batch: 24763, sum loss: 6103.561035, avg loss: 3.060963, ppl: 21.348114 +epoch: 0, batch: 24764, sum loss: 5421.690430, avg loss: 3.453306, ppl: 31.604704 +epoch: 0, batch: 24765, sum loss: 5400.079590, avg loss: 3.272776, ppl: 26.384472 +epoch: 0, batch: 24766, sum loss: 4555.198730, avg loss: 2.967556, ppl: 19.444344 +epoch: 0, batch: 24767, sum loss: 5749.226074, avg loss: 3.266606, ppl: 26.222179 +epoch: 0, batch: 24768, sum loss: 5149.304199, avg loss: 3.218315, ppl: 24.985987 +epoch: 0, batch: 24769, sum loss: 5173.559082, avg loss: 3.172017, ppl: 23.855543 +epoch: 0, batch: 24770, sum loss: 4377.201660, avg loss: 2.673917, ppl: 14.496639 +epoch: 0, batch: 24771, sum loss: 4991.309082, avg loss: 3.088681, ppl: 21.948114 +epoch: 0, batch: 24772, sum loss: 5417.146973, avg loss: 3.329531, ppl: 27.925240 +epoch: 0, batch: 24773, sum loss: 4402.698730, avg loss: 2.840451, ppl: 17.123487 +epoch: 0, batch: 24774, sum loss: 4801.525879, avg loss: 2.846192, ppl: 17.222076 +epoch: 0, batch: 24775, sum loss: 6303.617188, avg loss: 3.337013, ppl: 28.134954 +epoch: 0, batch: 24776, sum loss: 5223.791992, avg loss: 3.005634, ppl: 20.199020 +epoch: 0, batch: 24777, sum loss: 4735.389648, avg loss: 2.845787, ppl: 17.215103 +epoch: 0, batch: 24778, sum loss: 4814.502930, avg loss: 3.084243, ppl: 21.850916 +epoch: 0, batch: 24779, sum loss: 5394.277344, avg loss: 3.064930, ppl: 21.432966 +epoch: 0, batch: 24780, sum loss: 4215.506348, avg loss: 2.915288, ppl: 18.454126 +epoch: 0, batch: 24781, sum loss: 6216.568359, avg loss: 3.345839, ppl: 28.384367 +epoch: 0, batch: 24782, sum loss: 5122.416016, avg loss: 3.006113, ppl: 20.208693 +epoch: 0, batch: 24783, sum loss: 4352.665039, avg loss: 2.751368, ppl: 15.664050 +epoch: 0, batch: 24784, sum loss: 4151.674316, avg loss: 3.128617, ppl: 22.842358 +epoch: 0, batch: 24785, sum loss: 4608.031250, avg loss: 2.914631, ppl: 18.441999 +epoch: 0, batch: 24786, sum loss: 4331.437988, avg loss: 2.814450, ppl: 16.683990 +epoch: 0, batch: 24787, sum loss: 5184.676270, avg loss: 3.402019, ppl: 30.024645 +epoch: 0, batch: 24788, sum loss: 4886.206543, avg loss: 3.019905, ppl: 20.489347 +epoch: 0, batch: 24789, sum loss: 6357.452637, avg loss: 3.120988, ppl: 22.668762 +epoch: 0, batch: 24790, sum loss: 4068.672119, avg loss: 2.614828, ppl: 13.664864 +epoch: 0, batch: 24791, sum loss: 5147.493652, avg loss: 3.022604, ppl: 20.544710 +epoch: 0, batch: 24792, sum loss: 3710.945801, avg loss: 2.615184, ppl: 13.669729 +epoch: 0, batch: 24793, sum loss: 4773.258301, avg loss: 3.067647, ppl: 21.491268 +epoch: 0, batch: 24794, sum loss: 5087.512695, avg loss: 2.915480, ppl: 18.457668 +epoch: 0, batch: 24795, sum loss: 5153.759766, avg loss: 2.911729, ppl: 18.388563 +epoch: 0, batch: 24796, sum loss: 6217.035645, avg loss: 3.263536, ppl: 26.141804 +epoch: 0, batch: 24797, sum loss: 5053.295898, avg loss: 3.166225, ppl: 23.717791 +epoch: 0, batch: 24798, sum loss: 4876.018066, avg loss: 3.178630, ppl: 24.013823 +epoch: 0, batch: 24799, sum loss: 4630.586426, avg loss: 3.014705, ppl: 20.383072 +epoch: 0, batch: 24800, sum loss: 4374.363281, avg loss: 2.813096, ppl: 16.661415 +epoch: 0, batch: 24801, sum loss: 5083.299316, avg loss: 3.128184, ppl: 22.832479 +epoch: 0, batch: 24802, sum loss: 5143.868652, avg loss: 3.093126, ppl: 22.045887 +epoch: 0, batch: 24803, sum loss: 5247.708984, avg loss: 2.936603, ppl: 18.851694 +epoch: 0, batch: 24804, sum loss: 4873.842285, avg loss: 3.375237, ppl: 29.231211 +epoch: 0, batch: 24805, sum loss: 4950.478027, avg loss: 3.105695, ppl: 22.324730 +epoch: 0, batch: 24806, sum loss: 4552.479492, avg loss: 2.975477, ppl: 19.598969 +epoch: 0, batch: 24807, sum loss: 5287.583008, avg loss: 3.162430, ppl: 23.627949 +epoch: 0, batch: 24808, sum loss: 5225.512695, avg loss: 3.231610, ppl: 25.320385 +epoch: 0, batch: 24809, sum loss: 6020.560547, avg loss: 3.250843, ppl: 25.812078 +epoch: 0, batch: 24810, sum loss: 4654.695312, avg loss: 2.898316, ppl: 18.143564 +epoch: 0, batch: 24811, sum loss: 6395.786133, avg loss: 3.334612, ppl: 28.067495 +epoch: 0, batch: 24812, sum loss: 5514.937988, avg loss: 2.971411, ppl: 19.519432 +epoch: 0, batch: 24813, sum loss: 5473.394043, avg loss: 3.124084, ppl: 22.739046 +epoch: 0, batch: 24814, sum loss: 3997.372070, avg loss: 2.939244, ppl: 18.901556 +epoch: 0, batch: 24815, sum loss: 4431.299316, avg loss: 2.923021, ppl: 18.597382 +epoch: 0, batch: 24816, sum loss: 5919.273438, avg loss: 3.162005, ppl: 23.617907 +epoch: 0, batch: 24817, sum loss: 5045.621582, avg loss: 2.975013, ppl: 19.589874 +epoch: 0, batch: 24818, sum loss: 4914.005859, avg loss: 3.123971, ppl: 22.736477 +epoch: 0, batch: 24819, sum loss: 5467.090820, avg loss: 3.025507, ppl: 20.604443 +epoch: 0, batch: 24820, sum loss: 4236.618164, avg loss: 2.805707, ppl: 16.538773 +epoch: 0, batch: 24821, sum loss: 5448.939453, avg loss: 3.233792, ppl: 25.375694 +epoch: 0, batch: 24822, sum loss: 4164.274414, avg loss: 2.697069, ppl: 14.836185 +epoch: 0, batch: 24823, sum loss: 4947.413086, avg loss: 3.345107, ppl: 28.363605 +epoch: 0, batch: 24824, sum loss: 3960.227051, avg loss: 2.645442, ppl: 14.089675 +epoch: 0, batch: 24825, sum loss: 5391.983398, avg loss: 3.114953, ppl: 22.532368 +epoch: 0, batch: 24826, sum loss: 5180.279297, avg loss: 2.824580, ppl: 16.853861 +epoch: 0, batch: 24827, sum loss: 5984.364258, avg loss: 3.236541, ppl: 25.445547 +epoch: 0, batch: 24828, sum loss: 4835.230957, avg loss: 2.964581, ppl: 19.386570 +epoch: 0, batch: 24829, sum loss: 4985.866211, avg loss: 3.021737, ppl: 20.526918 +epoch: 0, batch: 24830, sum loss: 6280.904785, avg loss: 3.035720, ppl: 20.815962 +epoch: 0, batch: 24831, sum loss: 4423.146484, avg loss: 3.054659, ppl: 21.213959 +epoch: 0, batch: 24832, sum loss: 4581.790039, avg loss: 3.162036, ppl: 23.618639 +epoch: 0, batch: 24833, sum loss: 4682.083496, avg loss: 2.854929, ppl: 17.373203 +epoch: 0, batch: 24834, sum loss: 6299.740234, avg loss: 3.539180, ppl: 34.438671 +epoch: 0, batch: 24835, sum loss: 5957.483398, avg loss: 3.031798, ppl: 20.734482 +epoch: 0, batch: 24836, sum loss: 5216.992188, avg loss: 2.893507, ppl: 18.056515 +epoch: 0, batch: 24837, sum loss: 4932.860840, avg loss: 3.037476, ppl: 20.852541 +epoch: 0, batch: 24838, sum loss: 5139.067871, avg loss: 2.996541, ppl: 20.016176 +epoch: 0, batch: 24839, sum loss: 4709.087402, avg loss: 3.283882, ppl: 26.679150 +epoch: 0, batch: 24840, sum loss: 5283.975586, avg loss: 3.160273, ppl: 23.577023 +epoch: 0, batch: 24841, sum loss: 4994.419434, avg loss: 2.826497, ppl: 16.886198 +epoch: 0, batch: 24842, sum loss: 4919.419922, avg loss: 3.165650, ppl: 23.704144 +epoch: 0, batch: 24843, sum loss: 6219.659180, avg loss: 3.179785, ppl: 24.041578 +epoch: 0, batch: 24844, sum loss: 5802.590820, avg loss: 3.041190, ppl: 20.930138 +epoch: 0, batch: 24845, sum loss: 5172.404297, avg loss: 3.106549, ppl: 22.343809 +epoch: 0, batch: 24846, sum loss: 4741.980957, avg loss: 3.031957, ppl: 20.737780 +epoch: 0, batch: 24847, sum loss: 5369.159180, avg loss: 3.267900, ppl: 26.256130 +epoch: 0, batch: 24848, sum loss: 4760.622070, avg loss: 3.028386, ppl: 20.663847 +epoch: 0, batch: 24849, sum loss: 5332.692383, avg loss: 3.144276, ppl: 23.202873 +epoch: 0, batch: 24850, sum loss: 5540.357422, avg loss: 3.243769, ppl: 25.630138 +epoch: 0, batch: 24851, sum loss: 4028.678711, avg loss: 2.801585, ppl: 16.470728 +epoch: 0, batch: 24852, sum loss: 4433.674316, avg loss: 3.160139, ppl: 23.573864 +epoch: 0, batch: 24853, sum loss: 4831.086914, avg loss: 2.799007, ppl: 16.428331 +epoch: 0, batch: 24854, sum loss: 4808.657227, avg loss: 2.966476, ppl: 19.423346 +epoch: 0, batch: 24855, sum loss: 5003.462891, avg loss: 3.090465, ppl: 21.987301 +epoch: 0, batch: 24856, sum loss: 4924.904297, avg loss: 3.097424, ppl: 22.140844 +epoch: 0, batch: 24857, sum loss: 5406.751465, avg loss: 3.159995, ppl: 23.570486 +epoch: 0, batch: 24858, sum loss: 5596.118652, avg loss: 3.284107, ppl: 26.685150 +epoch: 0, batch: 24859, sum loss: 5270.718750, avg loss: 3.267650, ppl: 26.249571 +epoch: 0, batch: 24860, sum loss: 5456.169922, avg loss: 3.349398, ppl: 28.485590 +epoch: 0, batch: 24861, sum loss: 3679.209961, avg loss: 2.717289, ppl: 15.139231 +epoch: 0, batch: 24862, sum loss: 5798.283203, avg loss: 3.332347, ppl: 28.003981 +epoch: 0, batch: 24863, sum loss: 4940.284668, avg loss: 3.214239, ppl: 24.884338 +epoch: 0, batch: 24864, sum loss: 5485.226074, avg loss: 3.150618, ppl: 23.350492 +epoch: 0, batch: 24865, sum loss: 5094.262695, avg loss: 3.085562, ppl: 21.879759 +epoch: 0, batch: 24866, sum loss: 4665.577148, avg loss: 2.903284, ppl: 18.233925 +epoch: 0, batch: 24867, sum loss: 5001.480957, avg loss: 3.038567, ppl: 20.875303 +epoch: 0, batch: 24868, sum loss: 4552.839844, avg loss: 2.706801, ppl: 14.981280 +epoch: 0, batch: 24869, sum loss: 4847.085938, avg loss: 2.904186, ppl: 18.250378 +epoch: 0, batch: 24870, sum loss: 5446.845703, avg loss: 3.116045, ppl: 22.556980 +epoch: 0, batch: 24871, sum loss: 5757.432617, avg loss: 3.163425, ppl: 23.651457 +epoch: 0, batch: 24872, sum loss: 5836.448730, avg loss: 3.002289, ppl: 20.131557 +epoch: 0, batch: 24873, sum loss: 5512.801758, avg loss: 3.095341, ppl: 22.094765 +epoch: 0, batch: 24874, sum loss: 6219.189941, avg loss: 3.091049, ppl: 22.000143 +epoch: 0, batch: 24875, sum loss: 5659.883789, avg loss: 3.424007, ppl: 30.692150 +epoch: 0, batch: 24876, sum loss: 5121.107910, avg loss: 3.157280, ppl: 23.506565 +epoch: 0, batch: 24877, sum loss: 5257.361328, avg loss: 2.980364, ppl: 19.694977 +epoch: 0, batch: 24878, sum loss: 4497.941406, avg loss: 3.138829, ppl: 23.076822 +epoch: 0, batch: 24879, sum loss: 3913.334961, avg loss: 2.771484, ppl: 15.982328 +epoch: 0, batch: 24880, sum loss: 5322.309570, avg loss: 3.076479, ppl: 21.681934 +epoch: 0, batch: 24881, sum loss: 6211.461914, avg loss: 3.314547, ppl: 27.509937 +epoch: 0, batch: 24882, sum loss: 5617.279785, avg loss: 3.067876, ppl: 21.496187 +epoch: 0, batch: 24883, sum loss: 5914.983887, avg loss: 3.168175, ppl: 23.764086 +epoch: 0, batch: 24884, sum loss: 4303.220703, avg loss: 2.923384, ppl: 18.604132 +epoch: 0, batch: 24885, sum loss: 4721.293457, avg loss: 2.905411, ppl: 18.272757 +epoch: 0, batch: 24886, sum loss: 4982.085938, avg loss: 3.100240, ppl: 22.203285 +epoch: 0, batch: 24887, sum loss: 5917.532227, avg loss: 3.341351, ppl: 28.257271 +epoch: 0, batch: 24888, sum loss: 4398.726074, avg loss: 2.810688, ppl: 16.621342 +epoch: 0, batch: 24889, sum loss: 5178.828125, avg loss: 2.869157, ppl: 17.622154 +epoch: 0, batch: 24890, sum loss: 4574.059570, avg loss: 3.172025, ppl: 23.855738 +epoch: 0, batch: 24891, sum loss: 4866.973145, avg loss: 3.068709, ppl: 21.514118 +epoch: 0, batch: 24892, sum loss: 4680.058105, avg loss: 2.761096, ppl: 15.817173 +epoch: 0, batch: 24893, sum loss: 4800.808105, avg loss: 3.121462, ppl: 22.679504 +epoch: 0, batch: 24894, sum loss: 4715.741699, avg loss: 2.859758, ppl: 17.457308 +epoch: 0, batch: 24895, sum loss: 4827.611816, avg loss: 2.885602, ppl: 17.914349 +epoch: 0, batch: 24896, sum loss: 5604.106934, avg loss: 3.159023, ppl: 23.547575 +epoch: 0, batch: 24897, sum loss: 5111.458984, avg loss: 3.132022, ppl: 22.920269 +epoch: 0, batch: 24898, sum loss: 5920.602539, avg loss: 3.240614, ppl: 25.549416 +epoch: 0, batch: 24899, sum loss: 5932.538086, avg loss: 3.248926, ppl: 25.762648 +epoch: 0, batch: 24900, sum loss: 6136.136230, avg loss: 3.187603, ppl: 24.230278 +epoch: 0, batch: 24901, sum loss: 4942.994629, avg loss: 3.226498, ppl: 25.191280 +epoch: 0, batch: 24902, sum loss: 5480.436523, avg loss: 3.180753, ppl: 24.064856 +epoch: 0, batch: 24903, sum loss: 5515.525391, avg loss: 3.091662, ppl: 22.013643 +epoch: 0, batch: 24904, sum loss: 4652.753906, avg loss: 3.073153, ppl: 21.609938 +epoch: 0, batch: 24905, sum loss: 5233.402344, avg loss: 2.980298, ppl: 19.693676 +epoch: 0, batch: 24906, sum loss: 4732.942383, avg loss: 2.713843, ppl: 15.087146 +epoch: 0, batch: 24907, sum loss: 5457.321777, avg loss: 3.227275, ppl: 25.210867 +epoch: 0, batch: 24908, sum loss: 4175.433594, avg loss: 2.881597, ppl: 17.842741 +epoch: 0, batch: 24909, sum loss: 4190.235352, avg loss: 3.010226, ppl: 20.291996 +epoch: 0, batch: 24910, sum loss: 5584.054688, avg loss: 3.222189, ppl: 25.082977 +epoch: 0, batch: 24911, sum loss: 5350.237793, avg loss: 3.260352, ppl: 26.058706 +epoch: 0, batch: 24912, sum loss: 4997.593750, avg loss: 3.069775, ppl: 21.537054 +epoch: 0, batch: 24913, sum loss: 5336.255371, avg loss: 3.091689, ppl: 22.014231 +epoch: 0, batch: 24914, sum loss: 5173.477539, avg loss: 2.853545, ppl: 17.349184 +epoch: 0, batch: 24915, sum loss: 6202.615234, avg loss: 3.169451, ppl: 23.794413 +epoch: 0, batch: 24916, sum loss: 5016.853516, avg loss: 3.081605, ppl: 21.793358 +epoch: 0, batch: 24917, sum loss: 4822.613281, avg loss: 3.097375, ppl: 22.139767 +epoch: 0, batch: 24918, sum loss: 5124.695801, avg loss: 3.149782, ppl: 23.330982 +epoch: 0, batch: 24919, sum loss: 4830.517578, avg loss: 3.297282, ppl: 27.039034 +epoch: 0, batch: 24920, sum loss: 4765.959473, avg loss: 3.060989, ppl: 21.348658 +epoch: 0, batch: 24921, sum loss: 5528.877930, avg loss: 2.975715, ppl: 19.603628 +epoch: 0, batch: 24922, sum loss: 5586.573242, avg loss: 3.166992, ppl: 23.735971 +epoch: 0, batch: 24923, sum loss: 4684.074707, avg loss: 3.014205, ppl: 20.372892 +epoch: 0, batch: 24924, sum loss: 5594.909180, avg loss: 2.949346, ppl: 19.093460 +epoch: 0, batch: 24925, sum loss: 4865.037109, avg loss: 3.075244, ppl: 21.655174 +epoch: 0, batch: 24926, sum loss: 5534.066406, avg loss: 2.924982, ppl: 18.633896 +epoch: 0, batch: 24927, sum loss: 4628.425781, avg loss: 2.957460, ppl: 19.249025 +epoch: 0, batch: 24928, sum loss: 5483.352051, avg loss: 3.071906, ppl: 21.583002 +epoch: 0, batch: 24929, sum loss: 5262.502441, avg loss: 3.102891, ppl: 22.262213 +epoch: 0, batch: 24930, sum loss: 5484.342773, avg loss: 3.069022, ppl: 21.520853 +epoch: 0, batch: 24931, sum loss: 6177.413574, avg loss: 3.280623, ppl: 26.592340 +epoch: 0, batch: 24932, sum loss: 4724.891602, avg loss: 2.758255, ppl: 15.772304 +epoch: 0, batch: 24933, sum loss: 3860.217773, avg loss: 2.942239, ppl: 18.958248 +epoch: 0, batch: 24934, sum loss: 5106.149902, avg loss: 2.993054, ppl: 19.946499 +epoch: 0, batch: 24935, sum loss: 5128.595703, avg loss: 3.004450, ppl: 20.175114 +epoch: 0, batch: 24936, sum loss: 5426.038086, avg loss: 3.062098, ppl: 21.372355 +epoch: 0, batch: 24937, sum loss: 5140.013672, avg loss: 3.070498, ppl: 21.552639 +epoch: 0, batch: 24938, sum loss: 6183.521973, avg loss: 3.280383, ppl: 26.585957 +epoch: 0, batch: 24939, sum loss: 3887.726807, avg loss: 2.914338, ppl: 18.436605 +epoch: 0, batch: 24940, sum loss: 4280.479492, avg loss: 2.917845, ppl: 18.501383 +epoch: 0, batch: 24941, sum loss: 6283.077637, avg loss: 3.208926, ppl: 24.752499 +epoch: 0, batch: 24942, sum loss: 4386.779785, avg loss: 3.008765, ppl: 20.262365 +epoch: 0, batch: 24943, sum loss: 6617.329590, avg loss: 3.320286, ppl: 27.668257 +epoch: 0, batch: 24944, sum loss: 5005.809082, avg loss: 2.711706, ppl: 15.054936 +epoch: 0, batch: 24945, sum loss: 4479.984863, avg loss: 2.894047, ppl: 18.066280 +epoch: 0, batch: 24946, sum loss: 4184.009766, avg loss: 2.921795, ppl: 18.574593 +epoch: 0, batch: 24947, sum loss: 4409.811523, avg loss: 3.116474, ppl: 22.566677 +epoch: 0, batch: 24948, sum loss: 5679.982422, avg loss: 3.219945, ppl: 25.026737 +epoch: 0, batch: 24949, sum loss: 5504.167969, avg loss: 3.440105, ppl: 31.190231 +epoch: 0, batch: 24950, sum loss: 5256.001953, avg loss: 3.039909, ppl: 20.903334 +epoch: 0, batch: 24951, sum loss: 5375.417480, avg loss: 3.156440, ppl: 23.486834 +epoch: 0, batch: 24952, sum loss: 5616.661133, avg loss: 3.164316, ppl: 23.672544 +epoch: 0, batch: 24953, sum loss: 5616.915527, avg loss: 3.060990, ppl: 21.348673 +epoch: 0, batch: 24954, sum loss: 5202.767090, avg loss: 3.335107, ppl: 28.081396 +epoch: 0, batch: 24955, sum loss: 5299.824707, avg loss: 3.273518, ppl: 26.404055 +epoch: 0, batch: 24956, sum loss: 5447.865234, avg loss: 2.913297, ppl: 18.417419 +epoch: 0, batch: 24957, sum loss: 5728.558594, avg loss: 3.152756, ppl: 23.400478 +epoch: 0, batch: 24958, sum loss: 6353.442871, avg loss: 3.203955, ppl: 24.629753 +epoch: 0, batch: 24959, sum loss: 4768.481934, avg loss: 3.068521, ppl: 21.510071 +epoch: 0, batch: 24960, sum loss: 4670.504395, avg loss: 3.036739, ppl: 20.837179 +epoch: 0, batch: 24961, sum loss: 5319.479492, avg loss: 3.032770, ppl: 20.754652 +epoch: 0, batch: 24962, sum loss: 5560.301758, avg loss: 3.202939, ppl: 24.604733 +epoch: 0, batch: 24963, sum loss: 5598.859375, avg loss: 2.998853, ppl: 20.062521 +epoch: 0, batch: 24964, sum loss: 5516.328613, avg loss: 3.209034, ppl: 24.755154 +epoch: 0, batch: 24965, sum loss: 4029.133301, avg loss: 2.938828, ppl: 18.893694 +epoch: 0, batch: 24966, sum loss: 5258.878418, avg loss: 3.061047, ppl: 21.349895 +epoch: 0, batch: 24967, sum loss: 4873.004395, avg loss: 3.099876, ppl: 22.195192 +epoch: 0, batch: 24968, sum loss: 5384.188477, avg loss: 3.373552, ppl: 29.181986 +epoch: 0, batch: 24969, sum loss: 4803.084961, avg loss: 3.020808, ppl: 20.507860 +epoch: 0, batch: 24970, sum loss: 4763.156250, avg loss: 2.954812, ppl: 19.198105 +epoch: 0, batch: 24971, sum loss: 4458.776367, avg loss: 3.064451, ppl: 21.422697 +epoch: 0, batch: 24972, sum loss: 4778.807617, avg loss: 2.885753, ppl: 17.917061 +epoch: 0, batch: 24973, sum loss: 5908.510742, avg loss: 3.287986, ppl: 26.788858 +epoch: 0, batch: 24974, sum loss: 4787.285645, avg loss: 3.094561, ppl: 22.077541 +epoch: 0, batch: 24975, sum loss: 4831.690918, avg loss: 2.912412, ppl: 18.401123 +epoch: 0, batch: 24976, sum loss: 5127.309082, avg loss: 3.083169, ppl: 21.827454 +epoch: 0, batch: 24977, sum loss: 4814.544434, avg loss: 2.971941, ppl: 19.529795 +epoch: 0, batch: 24978, sum loss: 3839.327881, avg loss: 2.884544, ppl: 17.895403 +epoch: 0, batch: 24979, sum loss: 4787.997559, avg loss: 2.875674, ppl: 17.737379 +epoch: 0, batch: 24980, sum loss: 4780.371582, avg loss: 2.958151, ppl: 19.262316 +epoch: 0, batch: 24981, sum loss: 5514.374023, avg loss: 3.021575, ppl: 20.523586 +epoch: 0, batch: 24982, sum loss: 4277.977051, avg loss: 2.938171, ppl: 18.881283 +epoch: 0, batch: 24983, sum loss: 5272.820801, avg loss: 3.244813, ppl: 25.656904 +epoch: 0, batch: 24984, sum loss: 6100.344727, avg loss: 3.202281, ppl: 24.588547 +epoch: 0, batch: 24985, sum loss: 5951.366699, avg loss: 3.273579, ppl: 26.405680 +epoch: 0, batch: 24986, sum loss: 3932.489258, avg loss: 2.678807, ppl: 14.567707 +epoch: 0, batch: 24987, sum loss: 4695.714844, avg loss: 2.957000, ppl: 19.240156 +epoch: 0, batch: 24988, sum loss: 5704.532715, avg loss: 3.185110, ppl: 24.169954 +epoch: 0, batch: 24989, sum loss: 5410.383301, avg loss: 3.138273, ppl: 23.064013 +epoch: 0, batch: 24990, sum loss: 5552.492188, avg loss: 2.930075, ppl: 18.729034 +epoch: 0, batch: 24991, sum loss: 5129.145020, avg loss: 3.199716, ppl: 24.525566 +epoch: 0, batch: 24992, sum loss: 5077.569336, avg loss: 3.134302, ppl: 22.972599 +epoch: 0, batch: 24993, sum loss: 4744.488281, avg loss: 2.937764, ppl: 18.873592 +epoch: 0, batch: 24994, sum loss: 5282.077148, avg loss: 3.103453, ppl: 22.274731 +epoch: 0, batch: 24995, sum loss: 4529.214844, avg loss: 2.749979, ppl: 15.642296 +epoch: 0, batch: 24996, sum loss: 3790.548340, avg loss: 2.904635, ppl: 18.258574 +epoch: 0, batch: 24997, sum loss: 5848.270508, avg loss: 3.287392, ppl: 26.772945 +epoch: 0, batch: 24998, sum loss: 4656.887695, avg loss: 3.020031, ppl: 20.491926 +epoch: 0, batch: 24999, sum loss: 4938.016602, avg loss: 3.223248, ppl: 25.109556 +epoch: 0, batch: 25000, sum loss: 4924.334961, avg loss: 3.130537, ppl: 22.886267 +epoch: 0, batch: 25001, sum loss: 4650.979980, avg loss: 2.853362, ppl: 17.345999 +epoch: 0, batch: 25002, sum loss: 4772.218262, avg loss: 2.920574, ppl: 18.551937 +epoch: 0, batch: 25003, sum loss: 5369.817383, avg loss: 3.356136, ppl: 28.678160 +epoch: 0, batch: 25004, sum loss: 3614.448242, avg loss: 2.752817, ppl: 15.686754 +epoch: 0, batch: 25005, sum loss: 5794.019531, avg loss: 3.081925, ppl: 21.800331 +epoch: 0, batch: 25006, sum loss: 5885.596191, avg loss: 3.349798, ppl: 28.496988 +epoch: 0, batch: 25007, sum loss: 5547.270508, avg loss: 3.204662, ppl: 24.647177 +epoch: 0, batch: 25008, sum loss: 5270.402344, avg loss: 3.192249, ppl: 24.343109 +epoch: 0, batch: 25009, sum loss: 5736.696777, avg loss: 3.495854, ppl: 32.978443 +epoch: 0, batch: 25010, sum loss: 5354.853027, avg loss: 3.208420, ppl: 24.739967 +epoch: 0, batch: 25011, sum loss: 5797.091797, avg loss: 3.308843, ppl: 27.353451 +epoch: 0, batch: 25012, sum loss: 5925.492188, avg loss: 3.148508, ppl: 23.301275 +epoch: 0, batch: 25013, sum loss: 6143.808105, avg loss: 3.409439, ppl: 30.248259 +epoch: 0, batch: 25014, sum loss: 4961.439453, avg loss: 2.892967, ppl: 18.046783 +epoch: 0, batch: 25015, sum loss: 5396.726074, avg loss: 2.766134, ppl: 15.897050 +epoch: 0, batch: 25016, sum loss: 5543.026367, avg loss: 3.131653, ppl: 22.911829 +epoch: 0, batch: 25017, sum loss: 4097.306641, avg loss: 2.791081, ppl: 16.298624 +epoch: 0, batch: 25018, sum loss: 4194.460449, avg loss: 2.902741, ppl: 18.224028 +epoch: 0, batch: 25019, sum loss: 4901.185059, avg loss: 2.943655, ppl: 18.985106 +epoch: 0, batch: 25020, sum loss: 4692.080078, avg loss: 2.947286, ppl: 19.054178 +epoch: 0, batch: 25021, sum loss: 5380.074219, avg loss: 3.155469, ppl: 23.464037 +epoch: 0, batch: 25022, sum loss: 5512.045898, avg loss: 3.053765, ppl: 21.194994 +epoch: 0, batch: 25023, sum loss: 4958.716309, avg loss: 2.906633, ppl: 18.295097 +epoch: 0, batch: 25024, sum loss: 5039.539062, avg loss: 2.952278, ppl: 19.149534 +epoch: 0, batch: 25025, sum loss: 4828.366699, avg loss: 3.027189, ppl: 20.639139 +epoch: 0, batch: 25026, sum loss: 4813.932129, avg loss: 2.887782, ppl: 17.953438 +epoch: 0, batch: 25027, sum loss: 5530.723145, avg loss: 3.010737, ppl: 20.302351 +epoch: 0, batch: 25028, sum loss: 5622.176758, avg loss: 3.174577, ppl: 23.916712 +epoch: 0, batch: 25029, sum loss: 4750.641602, avg loss: 2.959901, ppl: 19.296068 +epoch: 0, batch: 25030, sum loss: 5437.780762, avg loss: 3.019312, ppl: 20.477196 +epoch: 0, batch: 25031, sum loss: 4499.635254, avg loss: 2.970056, ppl: 19.493017 +epoch: 0, batch: 25032, sum loss: 4674.617188, avg loss: 2.887349, ppl: 17.945665 +epoch: 0, batch: 25033, sum loss: 4626.638184, avg loss: 3.147373, ppl: 23.274839 +epoch: 0, batch: 25034, sum loss: 5024.933594, avg loss: 3.065853, ppl: 21.452757 +epoch: 0, batch: 25035, sum loss: 4745.789551, avg loss: 3.036334, ppl: 20.828741 +epoch: 0, batch: 25036, sum loss: 5334.677734, avg loss: 3.348825, ppl: 28.469255 +epoch: 0, batch: 25037, sum loss: 5132.266602, avg loss: 2.975227, ppl: 19.594072 +epoch: 0, batch: 25038, sum loss: 4573.036133, avg loss: 2.899833, ppl: 18.171106 +epoch: 0, batch: 25039, sum loss: 6057.749512, avg loss: 3.206855, ppl: 24.701286 +epoch: 0, batch: 25040, sum loss: 5642.874023, avg loss: 3.250504, ppl: 25.803329 +epoch: 0, batch: 25041, sum loss: 4091.884033, avg loss: 2.677935, ppl: 14.555001 +epoch: 0, batch: 25042, sum loss: 4510.212402, avg loss: 2.796164, ppl: 16.381687 +epoch: 0, batch: 25043, sum loss: 4075.592285, avg loss: 2.737134, ppl: 15.442659 +epoch: 0, batch: 25044, sum loss: 5163.037109, avg loss: 2.989599, ppl: 19.877705 +epoch: 0, batch: 25045, sum loss: 5404.062500, avg loss: 3.205257, ppl: 24.661829 +epoch: 0, batch: 25046, sum loss: 4762.112305, avg loss: 2.993157, ppl: 19.948555 +epoch: 0, batch: 25047, sum loss: 4931.402344, avg loss: 3.010624, ppl: 20.300062 +epoch: 0, batch: 25048, sum loss: 5700.270508, avg loss: 3.198805, ppl: 24.503233 +epoch: 0, batch: 25049, sum loss: 5021.518555, avg loss: 3.028660, ppl: 20.669523 +epoch: 0, batch: 25050, sum loss: 4984.241211, avg loss: 3.093880, ppl: 22.062523 +epoch: 0, batch: 25051, sum loss: 5298.178711, avg loss: 2.971497, ppl: 19.521118 +epoch: 0, batch: 25052, sum loss: 4925.275391, avg loss: 3.005049, ppl: 20.187204 +epoch: 0, batch: 25053, sum loss: 5501.910645, avg loss: 3.292586, ppl: 26.912365 +epoch: 0, batch: 25054, sum loss: 4604.560547, avg loss: 3.027325, ppl: 20.641935 +epoch: 0, batch: 25055, sum loss: 5688.646484, avg loss: 3.024267, ppl: 20.578918 +epoch: 0, batch: 25056, sum loss: 5072.366699, avg loss: 2.852850, ppl: 17.337118 +epoch: 0, batch: 25057, sum loss: 5879.040527, avg loss: 3.076421, ppl: 21.680668 +epoch: 0, batch: 25058, sum loss: 4433.523926, avg loss: 3.267151, ppl: 26.236488 +epoch: 0, batch: 25059, sum loss: 5182.359863, avg loss: 3.118147, ppl: 22.604452 +epoch: 0, batch: 25060, sum loss: 5264.615723, avg loss: 3.302770, ppl: 27.187849 +epoch: 0, batch: 25061, sum loss: 4767.433594, avg loss: 2.882366, ppl: 17.856474 +epoch: 0, batch: 25062, sum loss: 4618.019043, avg loss: 2.941413, ppl: 18.942600 +epoch: 0, batch: 25063, sum loss: 5959.735352, avg loss: 3.136703, ppl: 23.027819 +epoch: 0, batch: 25064, sum loss: 5285.305176, avg loss: 2.812829, ppl: 16.656971 +epoch: 0, batch: 25065, sum loss: 5558.090820, avg loss: 3.352286, ppl: 28.567976 +epoch: 0, batch: 25066, sum loss: 5571.769043, avg loss: 3.088564, ppl: 21.945534 +epoch: 0, batch: 25067, sum loss: 4754.178711, avg loss: 2.940123, ppl: 18.918175 +epoch: 0, batch: 25068, sum loss: 6373.197266, avg loss: 3.309033, ppl: 27.358654 +epoch: 0, batch: 25069, sum loss: 4516.981934, avg loss: 2.844447, ppl: 17.192051 +epoch: 0, batch: 25070, sum loss: 5870.713867, avg loss: 3.075282, ppl: 21.655994 +epoch: 0, batch: 25071, sum loss: 4810.204590, avg loss: 3.056038, ppl: 21.243233 +epoch: 0, batch: 25072, sum loss: 4549.119141, avg loss: 3.109446, ppl: 22.408623 +epoch: 0, batch: 25073, sum loss: 5733.763184, avg loss: 3.246752, ppl: 25.706697 +epoch: 0, batch: 25074, sum loss: 5069.102539, avg loss: 3.109879, ppl: 22.418327 +epoch: 0, batch: 25075, sum loss: 6348.817871, avg loss: 3.265853, ppl: 26.202457 +epoch: 0, batch: 25076, sum loss: 5269.760254, avg loss: 3.209355, ppl: 24.763100 +epoch: 0, batch: 25077, sum loss: 4763.873047, avg loss: 2.807232, ppl: 16.564013 +epoch: 0, batch: 25078, sum loss: 5250.555176, avg loss: 3.367899, ppl: 29.017509 +epoch: 0, batch: 25079, sum loss: 5788.661133, avg loss: 3.070908, ppl: 21.561468 +epoch: 0, batch: 25080, sum loss: 6182.619141, avg loss: 3.436698, ppl: 31.084141 +epoch: 0, batch: 25081, sum loss: 5376.295898, avg loss: 3.001840, ppl: 20.122530 +epoch: 0, batch: 25082, sum loss: 5133.318359, avg loss: 3.131982, ppl: 22.919357 +epoch: 0, batch: 25083, sum loss: 4356.478516, avg loss: 2.866104, ppl: 17.568445 +epoch: 0, batch: 25084, sum loss: 5507.573242, avg loss: 2.994874, ppl: 19.982841 +epoch: 0, batch: 25085, sum loss: 4224.229004, avg loss: 2.750149, ppl: 15.644967 +epoch: 0, batch: 25086, sum loss: 4943.097656, avg loss: 3.083654, ppl: 21.838057 +epoch: 0, batch: 25087, sum loss: 5019.799805, avg loss: 3.040460, ppl: 20.914869 +epoch: 0, batch: 25088, sum loss: 5413.032715, avg loss: 3.134356, ppl: 22.973837 +epoch: 0, batch: 25089, sum loss: 5110.780273, avg loss: 3.075078, ppl: 21.651581 +epoch: 0, batch: 25090, sum loss: 5190.270020, avg loss: 3.134221, ppl: 22.970736 +epoch: 0, batch: 25091, sum loss: 5592.711426, avg loss: 3.140209, ppl: 23.108690 +epoch: 0, batch: 25092, sum loss: 6798.578613, avg loss: 3.458077, ppl: 31.755842 +epoch: 0, batch: 25093, sum loss: 3694.782227, avg loss: 2.633487, ppl: 13.922232 +epoch: 0, batch: 25094, sum loss: 5365.275391, avg loss: 2.941489, ppl: 18.944029 +epoch: 0, batch: 25095, sum loss: 5479.449219, avg loss: 3.010687, ppl: 20.301334 +epoch: 0, batch: 25096, sum loss: 5544.939453, avg loss: 3.111639, ppl: 22.457813 +epoch: 0, batch: 25097, sum loss: 5581.506836, avg loss: 3.191256, ppl: 24.318954 +epoch: 0, batch: 25098, sum loss: 5401.602539, avg loss: 3.027804, ppl: 20.651838 +epoch: 0, batch: 25099, sum loss: 5705.195801, avg loss: 3.148563, ppl: 23.302553 +epoch: 0, batch: 25100, sum loss: 4683.404785, avg loss: 3.181661, ppl: 24.086725 +epoch: 0, batch: 25101, sum loss: 5593.401855, avg loss: 3.359401, ppl: 28.771936 +epoch: 0, batch: 25102, sum loss: 4443.844238, avg loss: 2.739731, ppl: 15.482824 +epoch: 0, batch: 25103, sum loss: 5398.964844, avg loss: 3.230978, ppl: 25.304405 +epoch: 0, batch: 25104, sum loss: 5488.046387, avg loss: 3.278403, ppl: 26.533367 +epoch: 0, batch: 25105, sum loss: 4263.717773, avg loss: 2.750786, ppl: 15.654929 +epoch: 0, batch: 25106, sum loss: 4564.192871, avg loss: 2.849059, ppl: 17.271524 +epoch: 0, batch: 25107, sum loss: 4297.873047, avg loss: 2.865249, ppl: 17.553417 +epoch: 0, batch: 25108, sum loss: 5652.250977, avg loss: 3.161214, ppl: 23.599230 +epoch: 0, batch: 25109, sum loss: 4561.767090, avg loss: 3.031075, ppl: 20.719484 +epoch: 0, batch: 25110, sum loss: 4911.358887, avg loss: 3.311773, ppl: 27.433718 +epoch: 0, batch: 25111, sum loss: 5399.185547, avg loss: 3.231111, ppl: 25.307747 +epoch: 0, batch: 25112, sum loss: 4528.073730, avg loss: 3.090835, ppl: 21.995443 +epoch: 0, batch: 25113, sum loss: 5220.703613, avg loss: 3.379096, ppl: 29.344240 +epoch: 0, batch: 25114, sum loss: 5358.870117, avg loss: 3.088686, ppl: 21.948219 +epoch: 0, batch: 25115, sum loss: 5522.166016, avg loss: 3.250245, ppl: 25.796661 +epoch: 0, batch: 25116, sum loss: 5468.037109, avg loss: 3.153424, ppl: 23.416100 +epoch: 0, batch: 25117, sum loss: 4846.479492, avg loss: 3.102740, ppl: 22.258852 +epoch: 0, batch: 25118, sum loss: 4159.311035, avg loss: 2.504101, ppl: 12.232552 +epoch: 0, batch: 25119, sum loss: 5048.722656, avg loss: 3.223961, ppl: 25.127443 +epoch: 0, batch: 25120, sum loss: 5677.093750, avg loss: 3.257082, ppl: 25.973635 +epoch: 0, batch: 25121, sum loss: 5545.886719, avg loss: 3.135041, ppl: 22.989573 +epoch: 0, batch: 25122, sum loss: 5672.161621, avg loss: 3.360286, ppl: 28.797413 +epoch: 0, batch: 25123, sum loss: 5379.625977, avg loss: 3.166349, ppl: 23.720715 +epoch: 0, batch: 25124, sum loss: 5752.323242, avg loss: 3.367871, ppl: 29.016672 +epoch: 0, batch: 25125, sum loss: 4866.935547, avg loss: 3.105894, ppl: 22.329174 +epoch: 0, batch: 25126, sum loss: 5185.890137, avg loss: 3.165989, ppl: 23.712181 +epoch: 0, batch: 25127, sum loss: 5327.000977, avg loss: 3.199400, ppl: 24.517815 +epoch: 0, batch: 25128, sum loss: 5855.722656, avg loss: 3.027778, ppl: 20.651293 +epoch: 0, batch: 25129, sum loss: 3980.558105, avg loss: 2.805186, ppl: 16.530144 +epoch: 0, batch: 25130, sum loss: 4555.731445, avg loss: 3.053439, ppl: 21.188093 +epoch: 0, batch: 25131, sum loss: 5842.937012, avg loss: 3.310446, ppl: 27.397343 +epoch: 0, batch: 25132, sum loss: 4874.212402, avg loss: 3.023705, ppl: 20.567352 +epoch: 0, batch: 25133, sum loss: 4845.483887, avg loss: 2.911950, ppl: 18.392622 +epoch: 0, batch: 25134, sum loss: 5627.061523, avg loss: 3.353434, ppl: 28.600769 +epoch: 0, batch: 25135, sum loss: 5016.161621, avg loss: 3.030914, ppl: 20.716150 +epoch: 0, batch: 25136, sum loss: 5550.393066, avg loss: 3.144699, ppl: 23.212679 +epoch: 0, batch: 25137, sum loss: 4490.914551, avg loss: 3.024185, ppl: 20.577221 +epoch: 0, batch: 25138, sum loss: 5229.275879, avg loss: 3.099749, ppl: 22.192371 +epoch: 0, batch: 25139, sum loss: 6346.874512, avg loss: 3.539807, ppl: 34.460278 +epoch: 0, batch: 25140, sum loss: 5545.962891, avg loss: 3.178202, ppl: 24.003559 +epoch: 0, batch: 25141, sum loss: 5962.939941, avg loss: 3.178540, ppl: 24.011660 +epoch: 0, batch: 25142, sum loss: 5959.873047, avg loss: 3.290930, ppl: 26.867826 +epoch: 0, batch: 25143, sum loss: 5706.809082, avg loss: 3.038769, ppl: 20.879536 +epoch: 0, batch: 25144, sum loss: 4446.067871, avg loss: 2.775323, ppl: 16.043810 +epoch: 0, batch: 25145, sum loss: 5005.984375, avg loss: 3.109307, ppl: 22.405514 +epoch: 0, batch: 25146, sum loss: 4631.924805, avg loss: 3.053345, ppl: 21.186098 +epoch: 0, batch: 25147, sum loss: 5706.638672, avg loss: 3.170355, ppl: 23.815933 +epoch: 0, batch: 25148, sum loss: 4818.504883, avg loss: 2.959769, ppl: 19.293524 +epoch: 0, batch: 25149, sum loss: 5436.442871, avg loss: 3.099454, ppl: 22.185839 +epoch: 0, batch: 25150, sum loss: 5044.504395, avg loss: 3.111970, ppl: 22.465267 +epoch: 0, batch: 25151, sum loss: 4332.076172, avg loss: 2.759284, ppl: 15.788538 +epoch: 0, batch: 25152, sum loss: 4460.167480, avg loss: 2.926619, ppl: 18.664421 +epoch: 0, batch: 25153, sum loss: 5303.845215, avg loss: 2.969678, ppl: 19.485647 +epoch: 0, batch: 25154, sum loss: 5213.791016, avg loss: 3.137058, ppl: 23.036007 +epoch: 0, batch: 25155, sum loss: 5335.789062, avg loss: 3.176065, ppl: 23.952314 +epoch: 0, batch: 25156, sum loss: 5448.046387, avg loss: 3.104300, ppl: 22.293604 +epoch: 0, batch: 25157, sum loss: 5243.445312, avg loss: 3.331287, ppl: 27.974312 +epoch: 0, batch: 25158, sum loss: 6096.222656, avg loss: 3.507608, ppl: 33.368355 +epoch: 0, batch: 25159, sum loss: 5670.204102, avg loss: 3.150114, ppl: 23.338715 +epoch: 0, batch: 25160, sum loss: 6160.915527, avg loss: 3.275341, ppl: 26.452232 +epoch: 0, batch: 25161, sum loss: 5055.775391, avg loss: 3.274466, ppl: 26.429110 +epoch: 0, batch: 25162, sum loss: 4889.247559, avg loss: 2.901631, ppl: 18.203806 +epoch: 0, batch: 25163, sum loss: 7647.602539, avg loss: 3.610766, ppl: 36.994377 +epoch: 0, batch: 25164, sum loss: 5414.190918, avg loss: 3.109817, ppl: 22.416937 +epoch: 0, batch: 25165, sum loss: 4769.786621, avg loss: 3.222829, ppl: 25.099022 +epoch: 0, batch: 25166, sum loss: 5044.015625, avg loss: 2.977577, ppl: 19.640171 +epoch: 0, batch: 25167, sum loss: 5483.305176, avg loss: 3.042900, ppl: 20.965954 +epoch: 0, batch: 25168, sum loss: 4066.848633, avg loss: 2.906968, ppl: 18.301231 +epoch: 0, batch: 25169, sum loss: 4959.189941, avg loss: 2.929232, ppl: 18.713255 +epoch: 0, batch: 25170, sum loss: 4768.042969, avg loss: 3.114333, ppl: 22.518398 +epoch: 0, batch: 25171, sum loss: 4984.677246, avg loss: 2.859826, ppl: 17.458494 +epoch: 0, batch: 25172, sum loss: 4560.458984, avg loss: 2.969049, ppl: 19.473391 +epoch: 0, batch: 25173, sum loss: 5275.159668, avg loss: 2.973596, ppl: 19.562141 +epoch: 0, batch: 25174, sum loss: 5250.934570, avg loss: 2.990282, ppl: 19.891287 +epoch: 0, batch: 25175, sum loss: 5248.491211, avg loss: 3.237811, ppl: 25.477880 +epoch: 0, batch: 25176, sum loss: 4265.749023, avg loss: 2.960270, ppl: 19.303186 +epoch: 0, batch: 25177, sum loss: 5624.922852, avg loss: 3.145930, ppl: 23.241282 +epoch: 0, batch: 25178, sum loss: 5429.377441, avg loss: 2.989745, ppl: 19.880615 +epoch: 0, batch: 25179, sum loss: 5067.155762, avg loss: 3.254435, ppl: 25.904982 +epoch: 0, batch: 25180, sum loss: 4760.213867, avg loss: 2.853845, ppl: 17.354387 +epoch: 0, batch: 25181, sum loss: 4421.167969, avg loss: 2.614529, ppl: 13.660779 +epoch: 0, batch: 25182, sum loss: 5279.752441, avg loss: 3.037832, ppl: 20.859976 +epoch: 0, batch: 25183, sum loss: 5852.726562, avg loss: 3.077143, ppl: 21.696337 +epoch: 0, batch: 25184, sum loss: 4323.315430, avg loss: 2.945038, ppl: 19.011377 +epoch: 0, batch: 25185, sum loss: 5044.315918, avg loss: 3.115699, ppl: 22.549177 +epoch: 0, batch: 25186, sum loss: 7998.367188, avg loss: 3.658905, ppl: 38.818825 +epoch: 0, batch: 25187, sum loss: 4280.828125, avg loss: 2.838745, ppl: 17.094305 +epoch: 0, batch: 25188, sum loss: 5645.492676, avg loss: 3.202208, ppl: 24.586760 +epoch: 0, batch: 25189, sum loss: 4527.551758, avg loss: 3.141951, ppl: 23.148994 +epoch: 0, batch: 25190, sum loss: 5113.956055, avg loss: 2.890874, ppl: 18.009045 +epoch: 0, batch: 25191, sum loss: 4438.944824, avg loss: 2.945551, ppl: 19.021133 +epoch: 0, batch: 25192, sum loss: 4896.442383, avg loss: 2.916285, ppl: 18.472530 +epoch: 0, batch: 25193, sum loss: 5526.307129, avg loss: 3.165124, ppl: 23.691692 +epoch: 0, batch: 25194, sum loss: 5018.080078, avg loss: 3.254267, ppl: 25.900627 +epoch: 0, batch: 25195, sum loss: 5734.803711, avg loss: 3.130351, ppl: 22.882017 +epoch: 0, batch: 25196, sum loss: 4629.583496, avg loss: 2.742644, ppl: 15.527991 +epoch: 0, batch: 25197, sum loss: 4967.551270, avg loss: 3.066390, ppl: 21.464272 +epoch: 0, batch: 25198, sum loss: 4839.101562, avg loss: 3.092078, ppl: 22.022787 +epoch: 0, batch: 25199, sum loss: 6164.648926, avg loss: 3.266905, ppl: 26.230021 +epoch: 0, batch: 25200, sum loss: 5883.244141, avg loss: 3.034164, ppl: 20.783600 +epoch: 0, batch: 25201, sum loss: 4985.359863, avg loss: 3.052884, ppl: 21.176331 +epoch: 0, batch: 25202, sum loss: 5682.102539, avg loss: 3.299711, ppl: 27.104805 +epoch: 0, batch: 25203, sum loss: 4458.202637, avg loss: 2.760497, ppl: 15.807699 +epoch: 0, batch: 25204, sum loss: 4393.097168, avg loss: 2.964303, ppl: 19.381195 +epoch: 0, batch: 25205, sum loss: 4837.070801, avg loss: 2.919174, ppl: 18.525974 +epoch: 0, batch: 25206, sum loss: 5583.821777, avg loss: 3.220197, ppl: 25.033051 +epoch: 0, batch: 25207, sum loss: 5103.671875, avg loss: 3.041521, ppl: 20.937061 +epoch: 0, batch: 25208, sum loss: 5987.707520, avg loss: 3.264835, ppl: 26.175789 +epoch: 0, batch: 25209, sum loss: 4775.007324, avg loss: 2.967686, ppl: 19.446875 +epoch: 0, batch: 25210, sum loss: 5558.545898, avg loss: 2.964558, ppl: 19.386131 +epoch: 0, batch: 25211, sum loss: 5853.860840, avg loss: 3.283153, ppl: 26.659687 +epoch: 0, batch: 25212, sum loss: 4981.674316, avg loss: 3.090369, ppl: 21.985184 +epoch: 0, batch: 25213, sum loss: 4573.860840, avg loss: 2.775401, ppl: 16.045061 +epoch: 0, batch: 25214, sum loss: 4597.620117, avg loss: 3.022762, ppl: 20.547958 +epoch: 0, batch: 25215, sum loss: 5710.369629, avg loss: 2.961810, ppl: 19.332935 +epoch: 0, batch: 25216, sum loss: 5038.234375, avg loss: 3.143003, ppl: 23.173357 +epoch: 0, batch: 25217, sum loss: 5341.760254, avg loss: 3.365949, ppl: 28.960958 +epoch: 0, batch: 25218, sum loss: 4622.162598, avg loss: 2.701439, ppl: 14.901165 +epoch: 0, batch: 25219, sum loss: 4506.618164, avg loss: 2.970744, ppl: 19.506420 +epoch: 0, batch: 25220, sum loss: 5077.555176, avg loss: 3.027761, ppl: 20.650944 +epoch: 0, batch: 25221, sum loss: 4837.513672, avg loss: 2.953305, ppl: 19.169199 +epoch: 0, batch: 25222, sum loss: 5257.214844, avg loss: 3.239196, ppl: 25.513197 +epoch: 0, batch: 25223, sum loss: 5744.627441, avg loss: 3.151195, ppl: 23.363958 +epoch: 0, batch: 25224, sum loss: 6638.845215, avg loss: 3.438035, ppl: 31.125729 +epoch: 0, batch: 25225, sum loss: 5214.418945, avg loss: 3.306544, ppl: 27.290634 +epoch: 0, batch: 25226, sum loss: 4972.923828, avg loss: 3.067812, ppl: 21.494825 +epoch: 0, batch: 25227, sum loss: 4321.763184, avg loss: 3.058573, ppl: 21.297134 +epoch: 0, batch: 25228, sum loss: 5991.692871, avg loss: 3.099686, ppl: 22.190985 +epoch: 0, batch: 25229, sum loss: 4844.546387, avg loss: 3.156056, ppl: 23.477821 +epoch: 0, batch: 25230, sum loss: 5492.982910, avg loss: 3.108649, ppl: 22.390779 +epoch: 0, batch: 25231, sum loss: 5598.956543, avg loss: 3.195752, ppl: 24.428530 +epoch: 0, batch: 25232, sum loss: 5682.217773, avg loss: 3.127253, ppl: 22.811222 +epoch: 0, batch: 25233, sum loss: 4398.187012, avg loss: 2.930171, ppl: 18.730839 +epoch: 0, batch: 25234, sum loss: 4470.908203, avg loss: 2.906962, ppl: 18.301123 +epoch: 0, batch: 25235, sum loss: 6628.208496, avg loss: 3.362866, ppl: 28.871811 +epoch: 0, batch: 25236, sum loss: 5701.113281, avg loss: 3.299255, ppl: 27.092457 +epoch: 0, batch: 25237, sum loss: 4487.762695, avg loss: 2.856628, ppl: 17.402748 +epoch: 0, batch: 25238, sum loss: 4748.446289, avg loss: 2.838282, ppl: 17.086391 +epoch: 0, batch: 25239, sum loss: 4531.593750, avg loss: 2.952178, ppl: 19.147617 +epoch: 0, batch: 25240, sum loss: 4443.064453, avg loss: 2.684631, ppl: 14.652795 +epoch: 0, batch: 25241, sum loss: 4965.794922, avg loss: 2.978881, ppl: 19.665800 +epoch: 0, batch: 25242, sum loss: 5386.957520, avg loss: 2.968021, ppl: 19.453377 +epoch: 0, batch: 25243, sum loss: 6343.161133, avg loss: 3.395696, ppl: 29.835424 +epoch: 0, batch: 25244, sum loss: 5968.033203, avg loss: 3.354712, ppl: 28.637363 +epoch: 0, batch: 25245, sum loss: 4994.419434, avg loss: 3.012316, ppl: 20.334435 +epoch: 0, batch: 25246, sum loss: 4490.927246, avg loss: 2.918082, ppl: 18.505751 +epoch: 0, batch: 25247, sum loss: 4261.896484, avg loss: 3.020479, ppl: 20.501118 +epoch: 0, batch: 25248, sum loss: 5482.266113, avg loss: 3.007277, ppl: 20.232237 +epoch: 0, batch: 25249, sum loss: 5089.217773, avg loss: 3.045612, ppl: 21.022900 +epoch: 0, batch: 25250, sum loss: 5993.929199, avg loss: 3.392150, ppl: 29.729807 +epoch: 0, batch: 25251, sum loss: 5392.666016, avg loss: 3.057067, ppl: 21.265093 +epoch: 0, batch: 25252, sum loss: 4557.306152, avg loss: 3.058595, ppl: 21.297611 +epoch: 0, batch: 25253, sum loss: 5216.138672, avg loss: 3.034403, ppl: 20.788565 +epoch: 0, batch: 25254, sum loss: 5072.244141, avg loss: 3.210281, ppl: 24.786053 +epoch: 0, batch: 25255, sum loss: 5596.025391, avg loss: 3.305390, ppl: 27.259174 +epoch: 0, batch: 25256, sum loss: 6109.341309, avg loss: 3.047053, ppl: 21.053205 +epoch: 0, batch: 25257, sum loss: 5137.494629, avg loss: 3.268126, ppl: 26.262085 +epoch: 0, batch: 25258, sum loss: 4956.581543, avg loss: 3.220651, ppl: 25.044416 +epoch: 0, batch: 25259, sum loss: 5656.208496, avg loss: 3.188392, ppl: 24.249413 +epoch: 0, batch: 25260, sum loss: 5618.866211, avg loss: 3.185298, ppl: 24.174501 +epoch: 0, batch: 25261, sum loss: 4983.969238, avg loss: 2.885912, ppl: 17.919897 +epoch: 0, batch: 25262, sum loss: 5490.824219, avg loss: 3.057252, ppl: 21.269028 +epoch: 0, batch: 25263, sum loss: 4354.046875, avg loss: 2.829140, ppl: 16.930897 +epoch: 0, batch: 25264, sum loss: 4727.629395, avg loss: 3.081897, ppl: 21.799707 +epoch: 0, batch: 25265, sum loss: 4976.170898, avg loss: 3.362278, ppl: 28.854839 +epoch: 0, batch: 25266, sum loss: 5234.900879, avg loss: 2.979454, ppl: 19.677071 +epoch: 0, batch: 25267, sum loss: 6055.726074, avg loss: 3.434898, ppl: 31.028259 +epoch: 0, batch: 25268, sum loss: 6527.516602, avg loss: 3.349162, ppl: 28.478861 +epoch: 0, batch: 25269, sum loss: 5418.768555, avg loss: 3.057996, ppl: 21.284855 +epoch: 0, batch: 25270, sum loss: 5262.305176, avg loss: 3.123030, ppl: 22.715096 +epoch: 0, batch: 25271, sum loss: 5334.496094, avg loss: 3.096051, ppl: 22.110470 +epoch: 0, batch: 25272, sum loss: 5278.771484, avg loss: 3.147747, ppl: 23.283543 +epoch: 0, batch: 25273, sum loss: 4248.500000, avg loss: 2.802441, ppl: 16.484831 +epoch: 0, batch: 25274, sum loss: 5657.438477, avg loss: 3.190885, ppl: 24.309929 +epoch: 0, batch: 25275, sum loss: 4707.078125, avg loss: 3.084586, ppl: 21.858419 +epoch: 0, batch: 25276, sum loss: 4878.249023, avg loss: 3.073881, ppl: 21.625673 +epoch: 0, batch: 25277, sum loss: 4029.689453, avg loss: 2.758172, ppl: 15.770991 +epoch: 0, batch: 25278, sum loss: 4515.409180, avg loss: 2.915048, ppl: 18.449696 +epoch: 0, batch: 25279, sum loss: 5887.786621, avg loss: 3.313329, ppl: 27.476456 +epoch: 0, batch: 25280, sum loss: 5044.239746, avg loss: 3.160551, ppl: 23.583593 +epoch: 0, batch: 25281, sum loss: 4250.447754, avg loss: 2.919264, ppl: 18.527639 +epoch: 0, batch: 25282, sum loss: 4102.207031, avg loss: 2.742117, ppl: 15.519809 +epoch: 0, batch: 25283, sum loss: 5068.923828, avg loss: 3.046228, ppl: 21.035856 +epoch: 0, batch: 25284, sum loss: 4600.409668, avg loss: 2.907971, ppl: 18.319584 +epoch: 0, batch: 25285, sum loss: 5619.693359, avg loss: 3.166024, ppl: 23.713018 +epoch: 0, batch: 25286, sum loss: 3996.889648, avg loss: 2.802868, ppl: 16.491877 +epoch: 0, batch: 25287, sum loss: 4796.927246, avg loss: 2.826710, ppl: 16.889805 +epoch: 0, batch: 25288, sum loss: 4814.187988, avg loss: 3.200923, ppl: 24.555178 +epoch: 0, batch: 25289, sum loss: 4724.348633, avg loss: 2.976905, ppl: 19.626984 +epoch: 0, batch: 25290, sum loss: 5452.502930, avg loss: 3.340995, ppl: 28.247208 +epoch: 0, batch: 25291, sum loss: 3607.104492, avg loss: 2.589450, ppl: 13.322447 +epoch: 0, batch: 25292, sum loss: 4539.866211, avg loss: 2.927058, ppl: 18.672606 +epoch: 0, batch: 25293, sum loss: 5172.181641, avg loss: 3.033538, ppl: 20.770582 +epoch: 0, batch: 25294, sum loss: 5926.429688, avg loss: 3.355849, ppl: 28.669935 +epoch: 0, batch: 25295, sum loss: 5029.967773, avg loss: 3.205843, ppl: 24.676292 +epoch: 0, batch: 25296, sum loss: 5746.075684, avg loss: 3.190492, ppl: 24.300373 +epoch: 0, batch: 25297, sum loss: 5686.867676, avg loss: 3.399204, ppl: 29.940252 +epoch: 0, batch: 25298, sum loss: 4039.356445, avg loss: 2.844617, ppl: 17.194979 +epoch: 0, batch: 25299, sum loss: 4855.233398, avg loss: 3.006336, ppl: 20.213211 +epoch: 0, batch: 25300, sum loss: 4743.062988, avg loss: 3.112246, ppl: 22.471460 +epoch: 0, batch: 25301, sum loss: 5787.054688, avg loss: 3.058697, ppl: 21.299786 +epoch: 0, batch: 25302, sum loss: 4875.158691, avg loss: 2.974471, ppl: 19.579269 +epoch: 0, batch: 25303, sum loss: 4517.864746, avg loss: 2.795708, ppl: 16.374224 +epoch: 0, batch: 25304, sum loss: 4140.401855, avg loss: 2.693820, ppl: 14.788066 +epoch: 0, batch: 25305, sum loss: 6330.906250, avg loss: 3.248285, ppl: 25.746136 +epoch: 0, batch: 25306, sum loss: 5553.171387, avg loss: 3.226712, ppl: 25.196674 +epoch: 0, batch: 25307, sum loss: 4501.651855, avg loss: 3.009126, ppl: 20.269670 +epoch: 0, batch: 25308, sum loss: 4356.801758, avg loss: 2.935850, ppl: 18.837511 +epoch: 0, batch: 25309, sum loss: 4780.041016, avg loss: 3.015799, ppl: 20.405384 +epoch: 0, batch: 25310, sum loss: 5403.906250, avg loss: 3.125452, ppl: 22.770185 +epoch: 0, batch: 25311, sum loss: 4571.552734, avg loss: 2.811533, ppl: 16.635401 +epoch: 0, batch: 25312, sum loss: 6059.734863, avg loss: 3.273763, ppl: 26.410534 +epoch: 0, batch: 25313, sum loss: 5640.388184, avg loss: 3.219400, ppl: 25.013105 +epoch: 0, batch: 25314, sum loss: 4541.248047, avg loss: 3.033566, ppl: 20.771166 +epoch: 0, batch: 25315, sum loss: 5145.224121, avg loss: 2.970684, ppl: 19.505249 +epoch: 0, batch: 25316, sum loss: 5083.791016, avg loss: 3.017087, ppl: 20.431683 +epoch: 0, batch: 25317, sum loss: 5364.061035, avg loss: 3.309106, ppl: 27.360657 +epoch: 0, batch: 25318, sum loss: 4725.771484, avg loss: 2.846850, ppl: 17.233418 +epoch: 0, batch: 25319, sum loss: 5237.776367, avg loss: 3.184058, ppl: 24.144545 +epoch: 0, batch: 25320, sum loss: 5821.371582, avg loss: 3.028810, ppl: 20.672607 +epoch: 0, batch: 25321, sum loss: 5002.158691, avg loss: 3.130262, ppl: 22.879976 +epoch: 0, batch: 25322, sum loss: 5730.104492, avg loss: 3.174573, ppl: 23.916609 +epoch: 0, batch: 25323, sum loss: 4915.685059, avg loss: 3.200316, ppl: 24.540283 +epoch: 0, batch: 25324, sum loss: 5713.411133, avg loss: 3.312123, ppl: 27.443314 +epoch: 0, batch: 25325, sum loss: 5268.847168, avg loss: 3.248364, ppl: 25.748194 +epoch: 0, batch: 25326, sum loss: 4686.294434, avg loss: 3.192299, ppl: 24.344322 +epoch: 0, batch: 25327, sum loss: 5364.608887, avg loss: 3.139034, ppl: 23.081560 +epoch: 0, batch: 25328, sum loss: 5864.616211, avg loss: 3.117818, ppl: 22.597027 +epoch: 0, batch: 25329, sum loss: 5428.422363, avg loss: 3.079082, ppl: 21.738447 +epoch: 0, batch: 25330, sum loss: 4338.421875, avg loss: 3.092247, ppl: 22.026506 +epoch: 0, batch: 25331, sum loss: 4325.133789, avg loss: 2.885346, ppl: 17.909767 +epoch: 0, batch: 25332, sum loss: 5658.207031, avg loss: 3.246246, ppl: 25.693707 +epoch: 0, batch: 25333, sum loss: 5822.630371, avg loss: 3.401069, ppl: 29.996147 +epoch: 0, batch: 25334, sum loss: 4834.106445, avg loss: 3.139030, ppl: 23.081472 +epoch: 0, batch: 25335, sum loss: 5251.605469, avg loss: 3.278156, ppl: 26.526800 +epoch: 0, batch: 25336, sum loss: 4715.866211, avg loss: 3.072226, ppl: 21.589899 +epoch: 0, batch: 25337, sum loss: 5167.772949, avg loss: 2.919646, ppl: 18.534721 +epoch: 0, batch: 25338, sum loss: 6726.222656, avg loss: 3.313410, ppl: 27.478676 +epoch: 0, batch: 25339, sum loss: 4841.048340, avg loss: 3.216644, ppl: 24.944256 +epoch: 0, batch: 25340, sum loss: 4641.534180, avg loss: 3.077940, ppl: 21.713636 +epoch: 0, batch: 25341, sum loss: 5053.115723, avg loss: 2.861334, ppl: 17.484833 +epoch: 0, batch: 25342, sum loss: 5742.789551, avg loss: 3.114311, ppl: 22.517910 +epoch: 0, batch: 25343, sum loss: 4757.575684, avg loss: 2.934963, ppl: 18.820810 +epoch: 0, batch: 25344, sum loss: 5230.812012, avg loss: 3.209087, ppl: 24.756477 +epoch: 0, batch: 25345, sum loss: 5895.972168, avg loss: 2.940634, ppl: 18.927853 +epoch: 0, batch: 25346, sum loss: 6386.468750, avg loss: 3.440985, ppl: 31.217699 +epoch: 0, batch: 25347, sum loss: 5919.874512, avg loss: 3.267039, ppl: 26.233541 +epoch: 0, batch: 25348, sum loss: 5602.625000, avg loss: 3.255447, ppl: 25.931213 +epoch: 0, batch: 25349, sum loss: 5388.750000, avg loss: 3.326389, ppl: 27.837641 +epoch: 0, batch: 25350, sum loss: 3332.172363, avg loss: 2.653003, ppl: 14.196613 +epoch: 0, batch: 25351, sum loss: 4837.652344, avg loss: 3.014113, ppl: 20.371023 +epoch: 0, batch: 25352, sum loss: 5194.458984, avg loss: 2.971659, ppl: 19.524279 +epoch: 0, batch: 25353, sum loss: 5856.607422, avg loss: 3.329510, ppl: 27.924648 +epoch: 0, batch: 25354, sum loss: 5947.223145, avg loss: 3.206050, ppl: 24.681406 +epoch: 0, batch: 25355, sum loss: 4895.902832, avg loss: 2.810507, ppl: 16.618340 +epoch: 0, batch: 25356, sum loss: 5203.564453, avg loss: 3.140353, ppl: 23.112017 +epoch: 0, batch: 25357, sum loss: 5773.228027, avg loss: 3.325592, ppl: 27.815468 +epoch: 0, batch: 25358, sum loss: 4815.335938, avg loss: 2.983479, ppl: 19.756439 +epoch: 0, batch: 25359, sum loss: 4420.561035, avg loss: 3.005140, ppl: 20.189043 +epoch: 0, batch: 25360, sum loss: 4845.278809, avg loss: 3.109935, ppl: 22.419588 +epoch: 0, batch: 25361, sum loss: 5507.000977, avg loss: 3.127201, ppl: 22.810041 +epoch: 0, batch: 25362, sum loss: 5354.319336, avg loss: 3.311268, ppl: 27.419861 +epoch: 0, batch: 25363, sum loss: 4617.625488, avg loss: 2.920699, ppl: 18.554255 +epoch: 0, batch: 25364, sum loss: 6048.709961, avg loss: 3.024355, ppl: 20.580729 +epoch: 0, batch: 25365, sum loss: 4771.170898, avg loss: 3.046725, ppl: 21.046295 +epoch: 0, batch: 25366, sum loss: 5719.595215, avg loss: 3.118645, ppl: 22.615713 +epoch: 0, batch: 25367, sum loss: 5022.736816, avg loss: 2.959774, ppl: 19.293615 +epoch: 0, batch: 25368, sum loss: 4853.460938, avg loss: 3.058262, ppl: 21.290514 +epoch: 0, batch: 25369, sum loss: 5335.056641, avg loss: 2.977152, ppl: 19.631832 +epoch: 0, batch: 25370, sum loss: 4519.086914, avg loss: 2.876567, ppl: 17.753223 +epoch: 0, batch: 25371, sum loss: 5871.545410, avg loss: 3.143226, ppl: 23.178513 +epoch: 0, batch: 25372, sum loss: 5288.935059, avg loss: 3.048378, ppl: 21.081112 +epoch: 0, batch: 25373, sum loss: 5164.782715, avg loss: 3.139685, ppl: 23.096600 +epoch: 0, batch: 25374, sum loss: 4363.110840, avg loss: 2.775516, ppl: 16.046902 +epoch: 0, batch: 25375, sum loss: 4809.566406, avg loss: 3.112988, ppl: 22.488144 +epoch: 0, batch: 25376, sum loss: 4350.458008, avg loss: 2.910005, ppl: 18.356897 +epoch: 0, batch: 25377, sum loss: 5305.907227, avg loss: 3.263165, ppl: 26.132128 +epoch: 0, batch: 25378, sum loss: 5426.771484, avg loss: 3.122423, ppl: 22.701323 +epoch: 0, batch: 25379, sum loss: 5415.015137, avg loss: 3.215567, ppl: 24.917425 +epoch: 0, batch: 25380, sum loss: 5736.306641, avg loss: 3.001730, ppl: 20.120319 +epoch: 0, batch: 25381, sum loss: 4840.066895, avg loss: 3.231019, ppl: 25.305437 +epoch: 0, batch: 25382, sum loss: 5190.956543, avg loss: 3.127082, ppl: 22.807339 +epoch: 0, batch: 25383, sum loss: 4401.345215, avg loss: 2.872941, ppl: 17.688961 +epoch: 0, batch: 25384, sum loss: 5244.562500, avg loss: 3.035048, ppl: 20.801971 +epoch: 0, batch: 25385, sum loss: 4623.715332, avg loss: 3.035926, ppl: 20.820251 +epoch: 0, batch: 25386, sum loss: 3844.673584, avg loss: 2.746195, ppl: 15.583230 +epoch: 0, batch: 25387, sum loss: 4758.834961, avg loss: 3.078160, ppl: 21.718399 +epoch: 0, batch: 25388, sum loss: 5331.647949, avg loss: 2.962027, ppl: 19.337120 +epoch: 0, batch: 25389, sum loss: 5162.958008, avg loss: 3.087894, ppl: 21.930838 +epoch: 0, batch: 25390, sum loss: 5476.993164, avg loss: 3.138678, ppl: 23.073339 +epoch: 0, batch: 25391, sum loss: 5916.108398, avg loss: 3.187558, ppl: 24.229197 +epoch: 0, batch: 25392, sum loss: 4438.055176, avg loss: 2.778995, ppl: 16.102835 +epoch: 0, batch: 25393, sum loss: 4785.875977, avg loss: 2.904051, ppl: 18.247919 +epoch: 0, batch: 25394, sum loss: 5627.605469, avg loss: 3.343794, ppl: 28.326397 +epoch: 0, batch: 25395, sum loss: 5488.529297, avg loss: 2.991024, ppl: 19.906061 +epoch: 0, batch: 25396, sum loss: 5383.324707, avg loss: 3.176003, ppl: 23.950825 +epoch: 0, batch: 25397, sum loss: 4889.796387, avg loss: 3.012814, ppl: 20.344561 +epoch: 0, batch: 25398, sum loss: 4492.234375, avg loss: 2.874110, ppl: 17.709660 +epoch: 0, batch: 25399, sum loss: 4435.916992, avg loss: 3.009442, ppl: 20.276075 +epoch: 0, batch: 25400, sum loss: 4393.086426, avg loss: 2.984434, ppl: 19.775299 +epoch: 0, batch: 25401, sum loss: 5900.559082, avg loss: 3.194672, ppl: 24.402170 +epoch: 0, batch: 25402, sum loss: 5692.847656, avg loss: 3.260509, ppl: 26.062788 +epoch: 0, batch: 25403, sum loss: 4859.064453, avg loss: 2.977368, ppl: 19.636068 +epoch: 0, batch: 25404, sum loss: 4990.635254, avg loss: 3.052376, ppl: 21.165581 +epoch: 0, batch: 25405, sum loss: 4596.599121, avg loss: 2.795985, ppl: 16.378750 +epoch: 0, batch: 25406, sum loss: 5587.941895, avg loss: 3.432397, ppl: 30.950731 +epoch: 0, batch: 25407, sum loss: 5279.043457, avg loss: 3.026974, ppl: 20.634706 +epoch: 0, batch: 25408, sum loss: 4166.385254, avg loss: 2.775740, ppl: 16.050495 +epoch: 0, batch: 25409, sum loss: 5602.091797, avg loss: 3.210368, ppl: 24.788198 +epoch: 0, batch: 25410, sum loss: 4761.867676, avg loss: 2.926778, ppl: 18.667385 +epoch: 0, batch: 25411, sum loss: 5687.320312, avg loss: 3.202320, ppl: 24.589514 +epoch: 0, batch: 25412, sum loss: 4110.002441, avg loss: 2.870114, ppl: 17.639023 +epoch: 0, batch: 25413, sum loss: 4504.723145, avg loss: 2.861959, ppl: 17.495764 +epoch: 0, batch: 25414, sum loss: 4998.713867, avg loss: 3.187955, ppl: 24.238811 +epoch: 0, batch: 25415, sum loss: 5570.865234, avg loss: 3.152725, ppl: 23.399748 +epoch: 0, batch: 25416, sum loss: 4864.778320, avg loss: 3.002950, ppl: 20.144871 +epoch: 0, batch: 25417, sum loss: 5655.002441, avg loss: 3.268787, ppl: 26.279453 +epoch: 0, batch: 25418, sum loss: 4499.207031, avg loss: 2.768743, ppl: 15.938583 +epoch: 0, batch: 25419, sum loss: 4633.810547, avg loss: 2.929084, ppl: 18.710476 +epoch: 0, batch: 25420, sum loss: 4840.666992, avg loss: 2.877923, ppl: 17.777315 +epoch: 0, batch: 25421, sum loss: 6051.645508, avg loss: 3.196854, ppl: 24.455481 +epoch: 0, batch: 25422, sum loss: 4146.114746, avg loss: 2.733101, ppl: 15.380514 +epoch: 0, batch: 25423, sum loss: 3873.766357, avg loss: 2.809113, ppl: 16.595184 +epoch: 0, batch: 25424, sum loss: 5096.459961, avg loss: 3.031802, ppl: 20.734571 +epoch: 0, batch: 25425, sum loss: 6101.419922, avg loss: 3.214658, ppl: 24.894772 +epoch: 0, batch: 25426, sum loss: 4693.861328, avg loss: 2.764347, ppl: 15.868675 +epoch: 0, batch: 25427, sum loss: 4276.241699, avg loss: 2.721987, ppl: 15.210519 +epoch: 0, batch: 25428, sum loss: 5093.037109, avg loss: 2.893771, ppl: 18.061289 +epoch: 0, batch: 25429, sum loss: 5626.024414, avg loss: 3.157141, ppl: 23.503298 +epoch: 0, batch: 25430, sum loss: 6029.645020, avg loss: 3.180193, ppl: 24.051388 +epoch: 0, batch: 25431, sum loss: 6106.098145, avg loss: 3.360538, ppl: 28.804697 +epoch: 0, batch: 25432, sum loss: 3929.372314, avg loss: 2.771067, ppl: 15.975666 +epoch: 0, batch: 25433, sum loss: 4652.163574, avg loss: 2.772445, ppl: 15.997707 +epoch: 0, batch: 25434, sum loss: 4784.085449, avg loss: 3.049130, ppl: 21.096987 +epoch: 0, batch: 25435, sum loss: 5348.234375, avg loss: 3.082556, ppl: 21.814083 +epoch: 0, batch: 25436, sum loss: 5601.975098, avg loss: 3.202959, ppl: 24.605225 +epoch: 0, batch: 25437, sum loss: 5175.295898, avg loss: 3.055074, ppl: 21.222761 +epoch: 0, batch: 25438, sum loss: 5268.861816, avg loss: 3.313750, ppl: 27.488001 +epoch: 0, batch: 25439, sum loss: 4936.694824, avg loss: 3.036098, ppl: 20.823820 +epoch: 0, batch: 25440, sum loss: 5146.623535, avg loss: 3.036356, ppl: 20.829203 +epoch: 0, batch: 25441, sum loss: 5219.758301, avg loss: 2.975917, ppl: 19.607592 +epoch: 0, batch: 25442, sum loss: 5237.009766, avg loss: 3.260903, ppl: 26.073061 +epoch: 0, batch: 25443, sum loss: 5709.091797, avg loss: 3.200164, ppl: 24.536551 +epoch: 0, batch: 25444, sum loss: 5713.506348, avg loss: 3.244467, ppl: 25.648031 +epoch: 0, batch: 25445, sum loss: 4877.555176, avg loss: 3.196301, ppl: 24.441952 +epoch: 0, batch: 25446, sum loss: 4511.495605, avg loss: 2.826752, ppl: 16.890507 +epoch: 0, batch: 25447, sum loss: 4572.738770, avg loss: 2.969311, ppl: 19.478489 +epoch: 0, batch: 25448, sum loss: 4994.698242, avg loss: 3.088867, ppl: 21.952202 +epoch: 0, batch: 25449, sum loss: 4693.098633, avg loss: 2.832286, ppl: 16.984249 +epoch: 0, batch: 25450, sum loss: 5030.364258, avg loss: 3.245396, ppl: 25.671885 +epoch: 0, batch: 25451, sum loss: 5846.053711, avg loss: 3.212118, ppl: 24.831615 +epoch: 0, batch: 25452, sum loss: 4954.283203, avg loss: 3.008065, ppl: 20.248182 +epoch: 0, batch: 25453, sum loss: 5354.198730, avg loss: 3.223479, ppl: 25.115345 +epoch: 0, batch: 25454, sum loss: 5497.342285, avg loss: 3.090131, ppl: 21.979948 +epoch: 0, batch: 25455, sum loss: 4478.602051, avg loss: 2.883839, ppl: 17.882795 +epoch: 0, batch: 25456, sum loss: 4615.414551, avg loss: 3.054543, ppl: 21.211491 +epoch: 0, batch: 25457, sum loss: 4698.612305, avg loss: 3.017734, ppl: 20.444918 +epoch: 0, batch: 25458, sum loss: 5259.326660, avg loss: 3.136152, ppl: 23.015129 +epoch: 0, batch: 25459, sum loss: 5863.175781, avg loss: 3.056922, ppl: 21.262007 +epoch: 0, batch: 25460, sum loss: 5473.001953, avg loss: 3.085119, ppl: 21.870075 +epoch: 0, batch: 25461, sum loss: 5247.505859, avg loss: 3.074110, ppl: 21.630623 +epoch: 0, batch: 25462, sum loss: 6144.333496, avg loss: 3.025275, ppl: 20.599669 +epoch: 0, batch: 25463, sum loss: 5175.220703, avg loss: 2.945487, ppl: 19.019924 +epoch: 0, batch: 25464, sum loss: 5872.093750, avg loss: 3.215824, ppl: 24.923813 +epoch: 0, batch: 25465, sum loss: 4809.390137, avg loss: 2.918319, ppl: 18.510149 +epoch: 0, batch: 25466, sum loss: 4441.693359, avg loss: 2.777794, ppl: 16.083508 +epoch: 0, batch: 25467, sum loss: 5951.743164, avg loss: 3.282815, ppl: 26.650688 +epoch: 0, batch: 25468, sum loss: 5211.331543, avg loss: 3.014073, ppl: 20.370192 +epoch: 0, batch: 25469, sum loss: 5296.204102, avg loss: 2.985459, ppl: 19.795584 +epoch: 0, batch: 25470, sum loss: 5703.062500, avg loss: 3.072771, ppl: 21.601675 +epoch: 0, batch: 25471, sum loss: 4740.807617, avg loss: 2.883703, ppl: 17.880362 +epoch: 0, batch: 25472, sum loss: 5177.833984, avg loss: 3.134282, ppl: 22.972139 +epoch: 0, batch: 25473, sum loss: 4468.858887, avg loss: 2.926561, ppl: 18.663340 +epoch: 0, batch: 25474, sum loss: 5737.916504, avg loss: 3.027924, ppl: 20.654316 +epoch: 0, batch: 25475, sum loss: 4814.444336, avg loss: 2.881176, ppl: 17.835230 +epoch: 0, batch: 25476, sum loss: 4509.368164, avg loss: 2.883228, ppl: 17.871862 +epoch: 0, batch: 25477, sum loss: 5055.133789, avg loss: 3.175335, ppl: 23.934841 +epoch: 0, batch: 25478, sum loss: 4472.850586, avg loss: 2.950429, ppl: 19.114157 +epoch: 0, batch: 25479, sum loss: 5353.114746, avg loss: 3.171277, ppl: 23.837896 +epoch: 0, batch: 25480, sum loss: 3860.022705, avg loss: 2.908834, ppl: 18.335407 +epoch: 0, batch: 25481, sum loss: 4345.880371, avg loss: 3.028488, ppl: 20.665960 +epoch: 0, batch: 25482, sum loss: 4842.702148, avg loss: 3.136465, ppl: 23.022346 +epoch: 0, batch: 25483, sum loss: 4933.378906, avg loss: 3.271471, ppl: 26.350079 +epoch: 0, batch: 25484, sum loss: 4135.103516, avg loss: 2.814910, ppl: 16.691681 +epoch: 0, batch: 25485, sum loss: 5233.961914, avg loss: 3.177876, ppl: 23.995733 +epoch: 0, batch: 25486, sum loss: 5375.286621, avg loss: 2.955078, ppl: 19.203218 +epoch: 0, batch: 25487, sum loss: 6078.086426, avg loss: 3.200677, ppl: 24.549154 +epoch: 0, batch: 25488, sum loss: 4925.137207, avg loss: 3.083993, ppl: 21.845467 +epoch: 0, batch: 25489, sum loss: 5125.606445, avg loss: 3.123465, ppl: 22.724987 +epoch: 0, batch: 25490, sum loss: 5782.901367, avg loss: 3.304515, ppl: 27.235332 +epoch: 0, batch: 25491, sum loss: 6246.973633, avg loss: 3.279251, ppl: 26.555883 +epoch: 0, batch: 25492, sum loss: 5943.577637, avg loss: 3.254972, ppl: 25.918877 +epoch: 0, batch: 25493, sum loss: 3883.372070, avg loss: 2.549818, ppl: 12.804768 +epoch: 0, batch: 25494, sum loss: 5074.691406, avg loss: 3.177640, ppl: 23.990074 +epoch: 0, batch: 25495, sum loss: 5238.943848, avg loss: 2.944881, ppl: 19.008404 +epoch: 0, batch: 25496, sum loss: 5285.355469, avg loss: 2.989455, ppl: 19.874838 +epoch: 0, batch: 25497, sum loss: 4582.293945, avg loss: 2.874714, ppl: 17.720354 +epoch: 0, batch: 25498, sum loss: 6213.720703, avg loss: 3.032562, ppl: 20.750338 +epoch: 0, batch: 25499, sum loss: 5457.355957, avg loss: 3.291530, ppl: 26.883955 +epoch: 0, batch: 25500, sum loss: 5567.570801, avg loss: 3.323923, ppl: 27.769079 +epoch: 0, batch: 25501, sum loss: 4225.263672, avg loss: 2.932175, ppl: 18.768402 +epoch: 0, batch: 25502, sum loss: 5003.291504, avg loss: 2.953537, ppl: 19.173647 +epoch: 0, batch: 25503, sum loss: 4801.389648, avg loss: 2.940226, ppl: 18.920128 +epoch: 0, batch: 25504, sum loss: 5027.650391, avg loss: 3.120826, ppl: 22.665092 +epoch: 0, batch: 25505, sum loss: 3850.807861, avg loss: 2.858803, ppl: 17.440643 +epoch: 0, batch: 25506, sum loss: 4231.126465, avg loss: 3.046167, ppl: 21.034573 +epoch: 0, batch: 25507, sum loss: 4754.185059, avg loss: 2.940127, ppl: 18.918247 +epoch: 0, batch: 25508, sum loss: 5223.882324, avg loss: 3.069261, ppl: 21.525991 +epoch: 0, batch: 25509, sum loss: 6049.560059, avg loss: 2.951005, ppl: 19.125164 +epoch: 0, batch: 25510, sum loss: 5113.911621, avg loss: 2.913910, ppl: 18.428709 +epoch: 0, batch: 25511, sum loss: 4949.499023, avg loss: 3.176829, ppl: 23.970613 +epoch: 0, batch: 25512, sum loss: 4618.759277, avg loss: 3.176588, ppl: 23.964853 +epoch: 0, batch: 25513, sum loss: 4640.809570, avg loss: 2.887872, ppl: 17.955055 +epoch: 0, batch: 25514, sum loss: 5346.059570, avg loss: 3.081302, ppl: 21.786758 +epoch: 0, batch: 25515, sum loss: 5280.424316, avg loss: 3.298204, ppl: 27.063988 +epoch: 0, batch: 25516, sum loss: 5902.102539, avg loss: 3.180012, ppl: 24.047047 +epoch: 0, batch: 25517, sum loss: 5826.018555, avg loss: 3.064713, ppl: 21.428301 +epoch: 0, batch: 25518, sum loss: 5034.372070, avg loss: 2.871861, ppl: 17.669870 +epoch: 0, batch: 25519, sum loss: 5808.542480, avg loss: 3.200299, ppl: 24.539867 +epoch: 0, batch: 25520, sum loss: 5592.361328, avg loss: 3.322853, ppl: 27.739374 +epoch: 0, batch: 25521, sum loss: 5593.952637, avg loss: 3.073601, ppl: 21.619604 +epoch: 0, batch: 25522, sum loss: 5341.776367, avg loss: 3.133007, ppl: 22.942862 +epoch: 0, batch: 25523, sum loss: 4315.658203, avg loss: 2.898360, ppl: 18.144365 +epoch: 0, batch: 25524, sum loss: 5342.317383, avg loss: 3.049268, ppl: 21.099894 +epoch: 0, batch: 25525, sum loss: 4458.508301, avg loss: 2.960497, ppl: 19.307564 +epoch: 0, batch: 25526, sum loss: 5155.232422, avg loss: 2.875200, ppl: 17.728962 +epoch: 0, batch: 25527, sum loss: 5651.212891, avg loss: 3.183782, ppl: 24.137861 +epoch: 0, batch: 25528, sum loss: 3986.021484, avg loss: 2.673388, ppl: 14.488975 +epoch: 0, batch: 25529, sum loss: 4263.020020, avg loss: 2.962488, ppl: 19.346039 +epoch: 0, batch: 25530, sum loss: 4516.834473, avg loss: 2.862379, ppl: 17.503122 +epoch: 0, batch: 25531, sum loss: 4398.367188, avg loss: 2.914756, ppl: 18.444317 +epoch: 0, batch: 25532, sum loss: 5395.734375, avg loss: 3.007656, ppl: 20.239899 +epoch: 0, batch: 25533, sum loss: 4699.753906, avg loss: 2.787517, ppl: 16.240648 +epoch: 0, batch: 25534, sum loss: 4000.629150, avg loss: 2.772439, ppl: 15.997601 +epoch: 0, batch: 25535, sum loss: 6284.634766, avg loss: 3.182094, ppl: 24.097151 +epoch: 0, batch: 25536, sum loss: 4758.690430, avg loss: 2.926624, ppl: 18.664509 +epoch: 0, batch: 25537, sum loss: 5456.456055, avg loss: 3.209680, ppl: 24.771166 +epoch: 0, batch: 25538, sum loss: 5414.433594, avg loss: 3.311580, ppl: 27.428432 +epoch: 0, batch: 25539, sum loss: 4850.013672, avg loss: 2.909426, ppl: 18.346270 +epoch: 0, batch: 25540, sum loss: 6773.696289, avg loss: 3.216380, ppl: 24.937679 +epoch: 0, batch: 25541, sum loss: 6378.771484, avg loss: 3.425764, ppl: 30.746115 +epoch: 0, batch: 25542, sum loss: 4881.740234, avg loss: 3.129321, ppl: 22.858450 +epoch: 0, batch: 25543, sum loss: 5026.939453, avg loss: 2.981577, ppl: 19.718897 +epoch: 0, batch: 25544, sum loss: 4997.555176, avg loss: 2.948410, ppl: 19.075600 +epoch: 0, batch: 25545, sum loss: 5119.283203, avg loss: 2.981528, ppl: 19.717918 +epoch: 0, batch: 25546, sum loss: 4884.561035, avg loss: 2.956756, ppl: 19.235474 +epoch: 0, batch: 25547, sum loss: 5065.494141, avg loss: 3.317285, ppl: 27.585356 +epoch: 0, batch: 25548, sum loss: 4385.533203, avg loss: 2.857025, ppl: 17.409655 +epoch: 0, batch: 25549, sum loss: 5125.213379, avg loss: 3.070829, ppl: 21.559767 +epoch: 0, batch: 25550, sum loss: 5196.922852, avg loss: 2.997072, ppl: 20.026812 +epoch: 0, batch: 25551, sum loss: 4502.528320, avg loss: 3.038143, ppl: 20.866467 +epoch: 0, batch: 25552, sum loss: 6171.869141, avg loss: 3.391137, ppl: 29.699705 +epoch: 0, batch: 25553, sum loss: 4852.611328, avg loss: 2.939195, ppl: 18.900633 +epoch: 0, batch: 25554, sum loss: 5016.074707, avg loss: 2.978667, ppl: 19.661587 +epoch: 0, batch: 25555, sum loss: 6190.088867, avg loss: 3.158208, ppl: 23.528404 +epoch: 0, batch: 25556, sum loss: 4755.776367, avg loss: 2.884037, ppl: 17.886326 +epoch: 0, batch: 25557, sum loss: 6035.723145, avg loss: 3.188443, ppl: 24.250645 +epoch: 0, batch: 25558, sum loss: 5943.902344, avg loss: 3.289376, ppl: 26.826113 +epoch: 0, batch: 25559, sum loss: 5338.125488, avg loss: 3.250990, ppl: 25.815889 +epoch: 0, batch: 25560, sum loss: 4765.614258, avg loss: 2.945374, ppl: 19.017765 +epoch: 0, batch: 25561, sum loss: 5362.100098, avg loss: 3.139403, ppl: 23.090069 +epoch: 0, batch: 25562, sum loss: 5099.354980, avg loss: 2.942501, ppl: 18.963221 +epoch: 0, batch: 25563, sum loss: 5072.645020, avg loss: 2.987424, ppl: 19.834517 +epoch: 0, batch: 25564, sum loss: 5569.169434, avg loss: 3.158916, ppl: 23.545065 +epoch: 0, batch: 25565, sum loss: 5910.201660, avg loss: 2.994023, ppl: 19.965841 +epoch: 0, batch: 25566, sum loss: 5051.486328, avg loss: 2.976716, ppl: 19.623259 +epoch: 0, batch: 25567, sum loss: 5032.277344, avg loss: 3.013340, ppl: 20.355268 +epoch: 0, batch: 25568, sum loss: 4621.189453, avg loss: 3.026319, ppl: 20.621191 +epoch: 0, batch: 25569, sum loss: 5079.812500, avg loss: 3.069373, ppl: 21.528402 +epoch: 0, batch: 25570, sum loss: 5264.695312, avg loss: 3.013563, ppl: 20.359821 +epoch: 0, batch: 25571, sum loss: 6004.733398, avg loss: 3.150437, ppl: 23.346268 +epoch: 0, batch: 25572, sum loss: 5995.732422, avg loss: 3.244444, ppl: 25.647444 +epoch: 0, batch: 25573, sum loss: 5620.450195, avg loss: 3.113823, ppl: 22.506918 +epoch: 0, batch: 25574, sum loss: 4822.737793, avg loss: 3.021765, ppl: 20.527500 +epoch: 0, batch: 25575, sum loss: 5197.928223, avg loss: 3.114397, ppl: 22.519842 +epoch: 0, batch: 25576, sum loss: 5246.358398, avg loss: 2.959029, ppl: 19.279247 +epoch: 0, batch: 25577, sum loss: 5471.246094, avg loss: 3.051448, ppl: 21.145945 +epoch: 0, batch: 25578, sum loss: 6241.112793, avg loss: 3.359049, ppl: 28.761822 +epoch: 0, batch: 25579, sum loss: 6038.069336, avg loss: 3.149750, ppl: 23.330219 +epoch: 0, batch: 25580, sum loss: 4525.831055, avg loss: 2.899315, ppl: 18.161707 +epoch: 0, batch: 25581, sum loss: 5250.912598, avg loss: 3.028208, ppl: 20.660172 +epoch: 0, batch: 25582, sum loss: 5649.757812, avg loss: 3.533307, ppl: 34.236996 +epoch: 0, batch: 25583, sum loss: 4469.922363, avg loss: 2.850716, ppl: 17.300158 +epoch: 0, batch: 25584, sum loss: 5322.071777, avg loss: 3.235302, ppl: 25.414040 +epoch: 0, batch: 25585, sum loss: 4699.027832, avg loss: 3.145266, ppl: 23.225859 +epoch: 0, batch: 25586, sum loss: 5024.063477, avg loss: 3.095541, ppl: 22.099195 +epoch: 0, batch: 25587, sum loss: 5038.056641, avg loss: 3.031322, ppl: 20.724607 +epoch: 0, batch: 25588, sum loss: 6355.076172, avg loss: 3.369606, ppl: 29.067066 +epoch: 0, batch: 25589, sum loss: 5075.173828, avg loss: 2.891837, ppl: 18.026392 +epoch: 0, batch: 25590, sum loss: 4889.661133, avg loss: 3.052223, ppl: 21.162334 +epoch: 0, batch: 25591, sum loss: 4433.865234, avg loss: 2.683938, ppl: 14.642643 +epoch: 0, batch: 25592, sum loss: 5462.933105, avg loss: 3.333089, ppl: 28.024780 +epoch: 0, batch: 25593, sum loss: 4673.083008, avg loss: 2.985996, ppl: 19.806210 +epoch: 0, batch: 25594, sum loss: 4051.598145, avg loss: 2.857263, ppl: 17.413794 +epoch: 0, batch: 25595, sum loss: 6000.026367, avg loss: 3.205142, ppl: 24.659014 +epoch: 0, batch: 25596, sum loss: 5416.550781, avg loss: 3.079335, ppl: 21.743948 +epoch: 0, batch: 25597, sum loss: 4169.744141, avg loss: 3.021554, ppl: 20.523155 +epoch: 0, batch: 25598, sum loss: 5501.825195, avg loss: 3.018006, ppl: 20.450480 +epoch: 0, batch: 25599, sum loss: 4968.659180, avg loss: 3.174862, ppl: 23.923521 +epoch: 0, batch: 25600, sum loss: 5225.237793, avg loss: 3.095520, ppl: 22.098728 +epoch: 0, batch: 25601, sum loss: 5976.460938, avg loss: 3.280165, ppl: 26.580164 +epoch: 0, batch: 25602, sum loss: 6632.299316, avg loss: 3.344579, ppl: 28.348625 +epoch: 0, batch: 25603, sum loss: 5752.719727, avg loss: 3.319515, ppl: 27.646938 +epoch: 0, batch: 25604, sum loss: 4745.161133, avg loss: 3.155027, ppl: 23.453674 +epoch: 0, batch: 25605, sum loss: 5138.852051, avg loss: 2.934810, ppl: 18.817926 +epoch: 0, batch: 25606, sum loss: 5531.806152, avg loss: 2.832466, ppl: 16.987303 +epoch: 0, batch: 25607, sum loss: 5239.447754, avg loss: 3.037361, ppl: 20.850149 +epoch: 0, batch: 25608, sum loss: 4235.991211, avg loss: 2.629417, ppl: 13.865689 +epoch: 0, batch: 25609, sum loss: 4798.873047, avg loss: 3.043039, ppl: 20.968878 +epoch: 0, batch: 25610, sum loss: 6397.940430, avg loss: 3.303015, ppl: 27.194513 +epoch: 0, batch: 25611, sum loss: 5076.385742, avg loss: 3.000228, ppl: 20.090115 +epoch: 0, batch: 25612, sum loss: 4565.550293, avg loss: 2.773725, ppl: 16.018183 +epoch: 0, batch: 25613, sum loss: 6686.889160, avg loss: 3.429174, ppl: 30.851141 +epoch: 0, batch: 25614, sum loss: 6393.299805, avg loss: 3.388076, ppl: 29.608932 +epoch: 0, batch: 25615, sum loss: 5575.157227, avg loss: 3.013599, ppl: 20.360538 +epoch: 0, batch: 25616, sum loss: 4795.594727, avg loss: 2.963903, ppl: 19.373434 +epoch: 0, batch: 25617, sum loss: 3915.423584, avg loss: 2.575937, ppl: 13.143624 +epoch: 0, batch: 25618, sum loss: 4570.282227, avg loss: 3.000842, ppl: 20.102453 +epoch: 0, batch: 25619, sum loss: 5442.916992, avg loss: 3.124522, ppl: 22.749012 +epoch: 0, batch: 25620, sum loss: 5668.597168, avg loss: 3.404563, ppl: 30.101137 +epoch: 0, batch: 25621, sum loss: 6549.605469, avg loss: 3.126303, ppl: 22.789581 +epoch: 0, batch: 25622, sum loss: 5382.566406, avg loss: 2.967236, ppl: 19.438124 +epoch: 0, batch: 25623, sum loss: 5209.441406, avg loss: 3.009498, ppl: 20.277220 +epoch: 0, batch: 25624, sum loss: 5398.697266, avg loss: 2.940467, ppl: 18.924681 +epoch: 0, batch: 25625, sum loss: 5006.632812, avg loss: 2.941617, ppl: 18.946463 +epoch: 0, batch: 25626, sum loss: 4081.284180, avg loss: 2.892476, ppl: 18.037922 +epoch: 0, batch: 25627, sum loss: 4935.026367, avg loss: 3.137334, ppl: 23.042351 +epoch: 0, batch: 25628, sum loss: 4502.104492, avg loss: 2.703967, ppl: 14.938871 +epoch: 0, batch: 25629, sum loss: 4745.217773, avg loss: 2.900500, ppl: 18.183235 +epoch: 0, batch: 25630, sum loss: 5048.237305, avg loss: 3.017476, ppl: 20.439638 +epoch: 0, batch: 25631, sum loss: 5327.060547, avg loss: 3.157712, ppl: 23.516739 +epoch: 0, batch: 25632, sum loss: 4765.591797, avg loss: 3.080538, ppl: 21.770107 +epoch: 0, batch: 25633, sum loss: 5463.974121, avg loss: 3.151081, ppl: 23.361296 +epoch: 0, batch: 25634, sum loss: 4706.809570, avg loss: 2.927120, ppl: 18.673777 +epoch: 0, batch: 25635, sum loss: 5522.034180, avg loss: 3.223604, ppl: 25.118490 +epoch: 0, batch: 25636, sum loss: 5271.081543, avg loss: 2.967951, ppl: 19.452028 +epoch: 0, batch: 25637, sum loss: 5119.633789, avg loss: 3.221922, ppl: 25.076269 +epoch: 0, batch: 25638, sum loss: 5685.697266, avg loss: 3.112040, ppl: 22.466831 +epoch: 0, batch: 25639, sum loss: 5170.308105, avg loss: 3.004246, ppl: 20.171005 +epoch: 0, batch: 25640, sum loss: 5897.774902, avg loss: 3.099199, ppl: 22.180168 +epoch: 0, batch: 25641, sum loss: 5208.409668, avg loss: 3.273670, ppl: 26.408091 +epoch: 0, batch: 25642, sum loss: 4528.960449, avg loss: 3.167105, ppl: 23.738672 +epoch: 0, batch: 25643, sum loss: 5086.776367, avg loss: 3.042331, ppl: 20.954020 +epoch: 0, batch: 25644, sum loss: 4640.744141, avg loss: 3.100029, ppl: 22.198601 +epoch: 0, batch: 25645, sum loss: 5030.225586, avg loss: 3.224504, ppl: 25.141094 +epoch: 0, batch: 25646, sum loss: 5022.607910, avg loss: 3.158873, ppl: 23.544043 +epoch: 0, batch: 25647, sum loss: 5034.958984, avg loss: 2.846218, ppl: 17.222521 +epoch: 0, batch: 25648, sum loss: 5704.496094, avg loss: 3.434375, ppl: 31.012011 +epoch: 0, batch: 25649, sum loss: 5823.414062, avg loss: 3.092626, ppl: 22.034857 +epoch: 0, batch: 25650, sum loss: 5067.006836, avg loss: 2.989385, ppl: 19.873449 +epoch: 0, batch: 25651, sum loss: 5121.793457, avg loss: 3.183215, ppl: 24.124197 +epoch: 0, batch: 25652, sum loss: 4603.576660, avg loss: 3.052770, ppl: 21.173908 +epoch: 0, batch: 25653, sum loss: 6201.750977, avg loss: 3.262362, ppl: 26.111151 +epoch: 0, batch: 25654, sum loss: 6194.236816, avg loss: 3.222808, ppl: 25.098501 +epoch: 0, batch: 25655, sum loss: 4588.641602, avg loss: 2.964239, ppl: 19.379948 +epoch: 0, batch: 25656, sum loss: 5448.077148, avg loss: 3.158306, ppl: 23.530699 +epoch: 0, batch: 25657, sum loss: 4970.202637, avg loss: 3.083252, ppl: 21.829281 +epoch: 0, batch: 25658, sum loss: 5281.587891, avg loss: 3.021503, ppl: 20.522123 +epoch: 0, batch: 25659, sum loss: 5800.908691, avg loss: 2.968735, ppl: 19.467281 +epoch: 0, batch: 25660, sum loss: 4134.011230, avg loss: 2.839293, ppl: 17.103678 +epoch: 0, batch: 25661, sum loss: 5427.081055, avg loss: 3.263428, ppl: 26.138994 +epoch: 0, batch: 25662, sum loss: 5169.731445, avg loss: 2.984833, ppl: 19.783207 +epoch: 0, batch: 25663, sum loss: 4625.953613, avg loss: 3.019552, ppl: 20.482113 +epoch: 0, batch: 25664, sum loss: 6042.383789, avg loss: 3.158591, ppl: 23.537397 +epoch: 0, batch: 25665, sum loss: 4004.371826, avg loss: 3.042836, ppl: 20.964609 +epoch: 0, batch: 25666, sum loss: 4420.181152, avg loss: 2.783489, ppl: 16.175365 +epoch: 0, batch: 25667, sum loss: 4897.245117, avg loss: 2.969827, ppl: 19.488552 +epoch: 0, batch: 25668, sum loss: 5163.524414, avg loss: 3.108684, ppl: 22.391560 +epoch: 0, batch: 25669, sum loss: 5465.953613, avg loss: 3.075945, ppl: 21.670343 +epoch: 0, batch: 25670, sum loss: 5692.076172, avg loss: 3.016469, ppl: 20.419054 +epoch: 0, batch: 25671, sum loss: 5598.466797, avg loss: 3.228643, ppl: 25.245369 +epoch: 0, batch: 25672, sum loss: 4876.844727, avg loss: 2.937858, ppl: 18.875378 +epoch: 0, batch: 25673, sum loss: 5875.723145, avg loss: 3.118749, ppl: 22.618071 +epoch: 0, batch: 25674, sum loss: 5251.404297, avg loss: 3.163497, ppl: 23.653160 +epoch: 0, batch: 25675, sum loss: 4814.989746, avg loss: 2.944948, ppl: 19.009668 +epoch: 0, batch: 25676, sum loss: 4378.692383, avg loss: 2.810457, ppl: 16.617514 +epoch: 0, batch: 25677, sum loss: 4145.557617, avg loss: 2.876862, ppl: 17.758457 +epoch: 0, batch: 25678, sum loss: 5196.872559, avg loss: 3.075073, ppl: 21.651453 +epoch: 0, batch: 25679, sum loss: 7141.441406, avg loss: 3.208195, ppl: 24.734394 +epoch: 0, batch: 25680, sum loss: 5569.044922, avg loss: 3.116421, ppl: 22.565483 +epoch: 0, batch: 25681, sum loss: 4784.645996, avg loss: 2.781771, ppl: 16.147593 +epoch: 0, batch: 25682, sum loss: 5341.156250, avg loss: 3.223389, ppl: 25.113094 +epoch: 0, batch: 25683, sum loss: 5549.713867, avg loss: 3.243550, ppl: 25.624529 +epoch: 0, batch: 25684, sum loss: 5986.364258, avg loss: 3.206408, ppl: 24.690247 +epoch: 0, batch: 25685, sum loss: 5316.351562, avg loss: 3.140196, ppl: 23.108398 +epoch: 0, batch: 25686, sum loss: 5521.724609, avg loss: 3.059127, ppl: 21.308949 +epoch: 0, batch: 25687, sum loss: 6067.361816, avg loss: 3.227320, ppl: 25.211998 +epoch: 0, batch: 25688, sum loss: 5676.079590, avg loss: 3.041843, ppl: 20.943811 +epoch: 0, batch: 25689, sum loss: 5374.086914, avg loss: 3.102822, ppl: 22.260674 +epoch: 0, batch: 25690, sum loss: 5273.021484, avg loss: 3.129390, ppl: 22.860020 +epoch: 0, batch: 25691, sum loss: 4878.406738, avg loss: 2.985561, ppl: 19.797607 +epoch: 0, batch: 25692, sum loss: 4680.964844, avg loss: 3.021927, ppl: 20.530813 +epoch: 0, batch: 25693, sum loss: 5142.425781, avg loss: 3.152928, ppl: 23.404495 +epoch: 0, batch: 25694, sum loss: 6072.112305, avg loss: 3.396036, ppl: 29.845554 +epoch: 0, batch: 25695, sum loss: 4768.826172, avg loss: 2.907821, ppl: 18.316841 +epoch: 0, batch: 25696, sum loss: 5670.895020, avg loss: 3.026091, ppl: 20.616488 +epoch: 0, batch: 25697, sum loss: 4946.483398, avg loss: 2.889301, ppl: 17.980738 +epoch: 0, batch: 25698, sum loss: 5505.349121, avg loss: 3.106856, ppl: 22.350672 +epoch: 0, batch: 25699, sum loss: 4578.323242, avg loss: 3.048151, ppl: 21.076344 +epoch: 0, batch: 25700, sum loss: 5003.437988, avg loss: 3.023226, ppl: 20.557493 +epoch: 0, batch: 25701, sum loss: 5066.554688, avg loss: 3.158700, ppl: 23.539974 +epoch: 0, batch: 25702, sum loss: 5940.585938, avg loss: 2.954046, ppl: 19.183413 +epoch: 0, batch: 25703, sum loss: 5379.919434, avg loss: 3.225371, ppl: 25.162916 +epoch: 0, batch: 25704, sum loss: 5222.632812, avg loss: 3.059539, ppl: 21.317724 +epoch: 0, batch: 25705, sum loss: 5893.849609, avg loss: 3.196231, ppl: 24.440239 +epoch: 0, batch: 25706, sum loss: 5245.035156, avg loss: 2.915528, ppl: 18.458557 +epoch: 0, batch: 25707, sum loss: 5986.358398, avg loss: 2.918751, ppl: 18.518145 +epoch: 0, batch: 25708, sum loss: 4199.375000, avg loss: 2.825959, ppl: 16.877121 +epoch: 0, batch: 25709, sum loss: 5183.975098, avg loss: 3.390435, ppl: 29.678865 +epoch: 0, batch: 25710, sum loss: 5578.413086, avg loss: 3.056665, ppl: 21.256542 +epoch: 0, batch: 25711, sum loss: 5759.940430, avg loss: 2.941747, ppl: 18.948919 +epoch: 0, batch: 25712, sum loss: 6371.694336, avg loss: 3.201856, ppl: 24.578114 +epoch: 0, batch: 25713, sum loss: 5271.795410, avg loss: 3.181530, ppl: 24.083572 +epoch: 0, batch: 25714, sum loss: 4677.588867, avg loss: 2.956756, ppl: 19.235477 +epoch: 0, batch: 25715, sum loss: 4154.877441, avg loss: 2.828371, ppl: 16.917879 +epoch: 0, batch: 25716, sum loss: 4816.872559, avg loss: 2.831789, ppl: 16.975796 +epoch: 0, batch: 25717, sum loss: 4607.929688, avg loss: 3.017636, ppl: 20.442900 +epoch: 0, batch: 25718, sum loss: 5574.234375, avg loss: 3.337865, ppl: 28.158939 +epoch: 0, batch: 25719, sum loss: 6347.483887, avg loss: 3.429219, ppl: 30.852530 +epoch: 0, batch: 25720, sum loss: 4641.811523, avg loss: 2.945312, ppl: 19.016590 +epoch: 0, batch: 25721, sum loss: 4834.811035, avg loss: 2.838996, ppl: 17.098597 +epoch: 0, batch: 25722, sum loss: 5821.919922, avg loss: 3.254287, ppl: 25.901146 +epoch: 0, batch: 25723, sum loss: 4700.170410, avg loss: 2.794394, ppl: 16.352713 +epoch: 0, batch: 25724, sum loss: 5231.030273, avg loss: 3.037764, ppl: 20.858559 +epoch: 0, batch: 25725, sum loss: 5863.144043, avg loss: 3.233946, ppl: 25.379610 +epoch: 0, batch: 25726, sum loss: 4558.869629, avg loss: 2.945006, ppl: 19.010784 +epoch: 0, batch: 25727, sum loss: 6421.988281, avg loss: 3.469470, ppl: 32.119709 +epoch: 0, batch: 25728, sum loss: 5864.172852, avg loss: 3.119241, ppl: 22.629192 +epoch: 0, batch: 25729, sum loss: 6175.926270, avg loss: 3.190044, ppl: 24.289505 +epoch: 0, batch: 25730, sum loss: 5637.574707, avg loss: 3.034217, ppl: 20.784689 +epoch: 0, batch: 25731, sum loss: 5522.256836, avg loss: 3.244569, ppl: 25.650654 +epoch: 0, batch: 25732, sum loss: 5304.937500, avg loss: 3.169019, ppl: 23.784140 +epoch: 0, batch: 25733, sum loss: 4878.394531, avg loss: 2.912475, ppl: 18.402281 +epoch: 0, batch: 25734, sum loss: 5180.990234, avg loss: 3.216009, ppl: 24.928429 +epoch: 0, batch: 25735, sum loss: 5483.363770, avg loss: 3.031158, ppl: 20.721209 +epoch: 0, batch: 25736, sum loss: 4429.751953, avg loss: 3.027855, ppl: 20.652887 +epoch: 0, batch: 25737, sum loss: 5640.603516, avg loss: 2.942412, ppl: 18.961529 +epoch: 0, batch: 25738, sum loss: 4159.327637, avg loss: 3.231801, ppl: 25.325222 +epoch: 0, batch: 25739, sum loss: 5769.682129, avg loss: 3.308304, ppl: 27.338715 +epoch: 0, batch: 25740, sum loss: 5975.591797, avg loss: 3.288713, ppl: 26.808346 +epoch: 0, batch: 25741, sum loss: 5264.505371, avg loss: 3.105903, ppl: 22.329370 +epoch: 0, batch: 25742, sum loss: 5582.087402, avg loss: 3.075530, ppl: 21.661366 +epoch: 0, batch: 25743, sum loss: 4924.095703, avg loss: 2.968111, ppl: 19.455130 +epoch: 0, batch: 25744, sum loss: 6004.315430, avg loss: 3.429078, ppl: 30.848190 +epoch: 0, batch: 25745, sum loss: 5520.405273, avg loss: 3.187301, ppl: 24.222960 +epoch: 0, batch: 25746, sum loss: 4322.932129, avg loss: 2.774668, ppl: 16.033298 +epoch: 0, batch: 25747, sum loss: 5175.870117, avg loss: 2.969518, ppl: 19.482531 +epoch: 0, batch: 25748, sum loss: 4942.742188, avg loss: 3.032357, ppl: 20.746078 +epoch: 0, batch: 25749, sum loss: 5345.675781, avg loss: 3.081081, ppl: 21.781939 +epoch: 0, batch: 25750, sum loss: 5351.064941, avg loss: 2.954757, ppl: 19.197062 +epoch: 0, batch: 25751, sum loss: 5510.881836, avg loss: 3.101228, ppl: 22.225222 +epoch: 0, batch: 25752, sum loss: 5047.398438, avg loss: 2.981334, ppl: 19.714102 +epoch: 0, batch: 25753, sum loss: 4613.341309, avg loss: 2.896008, ppl: 18.101748 +epoch: 0, batch: 25754, sum loss: 5209.642090, avg loss: 3.020082, ppl: 20.492981 +epoch: 0, batch: 25755, sum loss: 6103.772949, avg loss: 3.446512, ppl: 31.390709 +epoch: 0, batch: 25756, sum loss: 3612.544922, avg loss: 2.520967, ppl: 12.440615 +epoch: 0, batch: 25757, sum loss: 4600.132812, avg loss: 2.969743, ppl: 19.486921 +epoch: 0, batch: 25758, sum loss: 4540.836914, avg loss: 2.728868, ppl: 15.315547 +epoch: 0, batch: 25759, sum loss: 4840.376465, avg loss: 2.762772, ppl: 15.843702 +epoch: 0, batch: 25760, sum loss: 5522.011719, avg loss: 3.091832, ppl: 22.017376 +epoch: 0, batch: 25761, sum loss: 4845.427246, avg loss: 2.925983, ppl: 18.652548 +epoch: 0, batch: 25762, sum loss: 5141.529785, avg loss: 2.882024, ppl: 17.850359 +epoch: 0, batch: 25763, sum loss: 4734.229492, avg loss: 2.990669, ppl: 19.898996 +epoch: 0, batch: 25764, sum loss: 4611.018555, avg loss: 2.809883, ppl: 16.607981 +epoch: 0, batch: 25765, sum loss: 5306.826660, avg loss: 3.025557, ppl: 20.605474 +epoch: 0, batch: 25766, sum loss: 5180.853027, avg loss: 3.136110, ppl: 23.014164 +epoch: 0, batch: 25767, sum loss: 5526.125977, avg loss: 3.203552, ppl: 24.619814 +epoch: 0, batch: 25768, sum loss: 5013.040527, avg loss: 3.131193, ppl: 22.901293 +epoch: 0, batch: 25769, sum loss: 5453.159668, avg loss: 3.504601, ppl: 33.268177 +epoch: 0, batch: 25770, sum loss: 4609.467773, avg loss: 3.201019, ppl: 24.557549 +epoch: 0, batch: 25771, sum loss: 5166.252441, avg loss: 3.086173, ppl: 21.893124 +epoch: 0, batch: 25772, sum loss: 6462.674805, avg loss: 3.275558, ppl: 26.457972 +epoch: 0, batch: 25773, sum loss: 5306.825195, avg loss: 3.107040, ppl: 22.354786 +epoch: 0, batch: 25774, sum loss: 5057.255859, avg loss: 3.150938, ppl: 23.357965 +epoch: 0, batch: 25775, sum loss: 5892.520020, avg loss: 3.176561, ppl: 23.964190 +epoch: 0, batch: 25776, sum loss: 4868.006348, avg loss: 2.882183, ppl: 17.853197 +epoch: 0, batch: 25777, sum loss: 4859.968750, avg loss: 2.785082, ppl: 16.201151 +epoch: 0, batch: 25778, sum loss: 5857.374023, avg loss: 3.328053, ppl: 27.884005 +epoch: 0, batch: 25779, sum loss: 4302.938965, avg loss: 2.941175, ppl: 18.938086 +epoch: 0, batch: 25780, sum loss: 5000.937988, avg loss: 3.106173, ppl: 22.335394 +epoch: 0, batch: 25781, sum loss: 6235.750977, avg loss: 3.292371, ppl: 26.906591 +epoch: 0, batch: 25782, sum loss: 6194.213867, avg loss: 3.123658, ppl: 22.729370 +epoch: 0, batch: 25783, sum loss: 5487.120605, avg loss: 2.860855, ppl: 17.476469 +epoch: 0, batch: 25784, sum loss: 6893.412598, avg loss: 3.367568, ppl: 29.007908 +epoch: 0, batch: 25785, sum loss: 4777.261230, avg loss: 3.124435, ppl: 22.747044 +epoch: 0, batch: 25786, sum loss: 5409.097168, avg loss: 3.266363, ppl: 26.215816 +epoch: 0, batch: 25787, sum loss: 4837.876465, avg loss: 3.135370, ppl: 22.997154 +epoch: 0, batch: 25788, sum loss: 5280.607910, avg loss: 3.031348, ppl: 20.725145 +epoch: 0, batch: 25789, sum loss: 4929.222656, avg loss: 3.113849, ppl: 22.507502 +epoch: 0, batch: 25790, sum loss: 4730.768555, avg loss: 3.139196, ppl: 23.085302 +epoch: 0, batch: 25791, sum loss: 4322.440430, avg loss: 2.761943, ppl: 15.830566 +epoch: 0, batch: 25792, sum loss: 4618.947266, avg loss: 3.030805, ppl: 20.713907 +epoch: 0, batch: 25793, sum loss: 4821.793457, avg loss: 2.851445, ppl: 17.312784 +epoch: 0, batch: 25794, sum loss: 5351.520996, avg loss: 3.111349, ppl: 22.451319 +epoch: 0, batch: 25795, sum loss: 5600.465820, avg loss: 3.065389, ppl: 21.442804 +epoch: 0, batch: 25796, sum loss: 5711.123047, avg loss: 3.480270, ppl: 32.468483 +epoch: 0, batch: 25797, sum loss: 5249.019531, avg loss: 3.074997, ppl: 21.649811 +epoch: 0, batch: 25798, sum loss: 6131.629883, avg loss: 3.491817, ppl: 32.845566 +epoch: 0, batch: 25799, sum loss: 5740.227539, avg loss: 3.199681, ppl: 24.524706 +epoch: 0, batch: 25800, sum loss: 5436.927246, avg loss: 3.047605, ppl: 21.064835 +epoch: 0, batch: 25801, sum loss: 5400.570312, avg loss: 3.032325, ppl: 20.745405 +epoch: 0, batch: 25802, sum loss: 4604.963867, avg loss: 2.752519, ppl: 15.682084 +epoch: 0, batch: 25803, sum loss: 5593.941895, avg loss: 3.106020, ppl: 22.331980 +epoch: 0, batch: 25804, sum loss: 4446.992188, avg loss: 2.878312, ppl: 17.784235 +epoch: 0, batch: 25805, sum loss: 5026.424316, avg loss: 3.215883, ppl: 24.925280 +epoch: 0, batch: 25806, sum loss: 5674.365234, avg loss: 3.283776, ppl: 26.676319 +epoch: 0, batch: 25807, sum loss: 4101.231445, avg loss: 2.767363, ppl: 15.916604 +epoch: 0, batch: 25808, sum loss: 4772.260742, avg loss: 3.031932, ppl: 20.737251 +epoch: 0, batch: 25809, sum loss: 4382.542480, avg loss: 2.784334, ppl: 16.189039 +epoch: 0, batch: 25810, sum loss: 5149.317871, avg loss: 2.981655, ppl: 19.720425 +epoch: 0, batch: 25811, sum loss: 6574.171875, avg loss: 3.371370, ppl: 29.118395 +epoch: 0, batch: 25812, sum loss: 5239.510742, avg loss: 3.154432, ppl: 23.439711 +epoch: 0, batch: 25813, sum loss: 4432.559082, avg loss: 2.812537, ppl: 16.652119 +epoch: 0, batch: 25814, sum loss: 4658.231445, avg loss: 2.911395, ppl: 18.382418 +epoch: 0, batch: 25815, sum loss: 5315.018555, avg loss: 3.011342, ppl: 20.314646 +epoch: 0, batch: 25816, sum loss: 3993.529053, avg loss: 2.802476, ppl: 16.485420 +epoch: 0, batch: 25817, sum loss: 5294.366211, avg loss: 3.088895, ppl: 21.952808 +epoch: 0, batch: 25818, sum loss: 4824.318359, avg loss: 3.059175, ppl: 21.309965 +epoch: 0, batch: 25819, sum loss: 5262.284668, avg loss: 3.386284, ppl: 29.555908 +epoch: 0, batch: 25820, sum loss: 5220.890625, avg loss: 3.056727, ppl: 21.257875 +epoch: 0, batch: 25821, sum loss: 5782.413574, avg loss: 3.377578, ppl: 29.299723 +epoch: 0, batch: 25822, sum loss: 4408.418945, avg loss: 2.911770, ppl: 18.389313 +epoch: 0, batch: 25823, sum loss: 4584.916504, avg loss: 2.909211, ppl: 18.342319 +epoch: 0, batch: 25824, sum loss: 7585.207520, avg loss: 3.615447, ppl: 37.167957 +epoch: 0, batch: 25825, sum loss: 5189.355957, avg loss: 3.443501, ppl: 31.296335 +epoch: 0, batch: 25826, sum loss: 4919.212402, avg loss: 3.236324, ppl: 25.440035 +epoch: 0, batch: 25827, sum loss: 6673.458496, avg loss: 3.363638, ppl: 28.894121 +epoch: 0, batch: 25828, sum loss: 5250.463379, avg loss: 3.219168, ppl: 25.007311 +epoch: 0, batch: 25829, sum loss: 5697.176758, avg loss: 3.216926, ppl: 24.951311 +epoch: 0, batch: 25830, sum loss: 4682.716309, avg loss: 2.890566, ppl: 18.003494 +epoch: 0, batch: 25831, sum loss: 5551.127930, avg loss: 3.058473, ppl: 21.295012 +epoch: 0, batch: 25832, sum loss: 6131.202637, avg loss: 3.381800, ppl: 29.423677 +epoch: 0, batch: 25833, sum loss: 5659.465820, avg loss: 3.021605, ppl: 20.524202 +epoch: 0, batch: 25834, sum loss: 5079.811035, avg loss: 2.996939, ppl: 20.024143 +epoch: 0, batch: 25835, sum loss: 5722.127441, avg loss: 3.253057, ppl: 25.869308 +epoch: 0, batch: 25836, sum loss: 5508.159180, avg loss: 3.209883, ppl: 24.776186 +epoch: 0, batch: 25837, sum loss: 4825.721680, avg loss: 3.058125, ppl: 21.287611 +epoch: 0, batch: 25838, sum loss: 5241.231934, avg loss: 3.129094, ppl: 22.853262 +epoch: 0, batch: 25839, sum loss: 4938.607910, avg loss: 3.180044, ppl: 24.047810 +epoch: 0, batch: 25840, sum loss: 5090.540039, avg loss: 3.064744, ppl: 21.428980 +epoch: 0, batch: 25841, sum loss: 5661.860840, avg loss: 3.263320, ppl: 26.136177 +epoch: 0, batch: 25842, sum loss: 4032.128906, avg loss: 2.693473, ppl: 14.782926 +epoch: 0, batch: 25843, sum loss: 5230.452637, avg loss: 3.058744, ppl: 21.300791 +epoch: 0, batch: 25844, sum loss: 5105.117188, avg loss: 2.959488, ppl: 19.288101 +epoch: 0, batch: 25845, sum loss: 4581.211914, avg loss: 2.857899, ppl: 17.424873 +epoch: 0, batch: 25846, sum loss: 4872.998047, avg loss: 3.168399, ppl: 23.769407 +epoch: 0, batch: 25847, sum loss: 4643.000000, avg loss: 3.099466, ppl: 22.186098 +epoch: 0, batch: 25848, sum loss: 5970.397949, avg loss: 3.616231, ppl: 37.197124 +epoch: 0, batch: 25849, sum loss: 5328.635254, avg loss: 2.937506, ppl: 18.868723 +epoch: 0, batch: 25850, sum loss: 4585.325684, avg loss: 2.758920, ppl: 15.782795 +epoch: 0, batch: 25851, sum loss: 4773.689453, avg loss: 2.950364, ppl: 19.112917 +epoch: 0, batch: 25852, sum loss: 4380.489258, avg loss: 2.939926, ppl: 18.914444 +epoch: 0, batch: 25853, sum loss: 4189.745117, avg loss: 2.834740, ppl: 17.025969 +epoch: 0, batch: 25854, sum loss: 6831.239746, avg loss: 3.183243, ppl: 24.124870 +epoch: 0, batch: 25855, sum loss: 5593.904297, avg loss: 3.065153, ppl: 21.437744 +epoch: 0, batch: 25856, sum loss: 5324.634766, avg loss: 3.095718, ppl: 22.103102 +epoch: 0, batch: 25857, sum loss: 4679.856934, avg loss: 3.042820, ppl: 20.964273 +epoch: 0, batch: 25858, sum loss: 5679.866699, avg loss: 3.065228, ppl: 21.439339 +epoch: 0, batch: 25859, sum loss: 4397.306152, avg loss: 2.829669, ppl: 16.939861 +epoch: 0, batch: 25860, sum loss: 3620.034424, avg loss: 2.617523, ppl: 13.701745 +epoch: 0, batch: 25861, sum loss: 4205.135254, avg loss: 2.759275, ppl: 15.788395 +epoch: 0, batch: 25862, sum loss: 5215.541504, avg loss: 3.035822, ppl: 20.818077 +epoch: 0, batch: 25863, sum loss: 6169.494629, avg loss: 3.324081, ppl: 27.773468 +epoch: 0, batch: 25864, sum loss: 4350.259277, avg loss: 2.985765, ppl: 19.801640 +epoch: 0, batch: 25865, sum loss: 4830.789062, avg loss: 2.870344, ppl: 17.643085 +epoch: 0, batch: 25866, sum loss: 5580.020508, avg loss: 3.170466, ppl: 23.818586 +epoch: 0, batch: 25867, sum loss: 5741.618164, avg loss: 3.351791, ppl: 28.553831 +epoch: 0, batch: 25868, sum loss: 4289.875000, avg loss: 2.881044, ppl: 17.832888 +epoch: 0, batch: 25869, sum loss: 5879.257812, avg loss: 3.155801, ppl: 23.471838 +epoch: 0, batch: 25870, sum loss: 4362.123047, avg loss: 2.939436, ppl: 18.905180 +epoch: 0, batch: 25871, sum loss: 6283.476562, avg loss: 3.502495, ppl: 33.198189 +epoch: 0, batch: 25872, sum loss: 4968.745605, avg loss: 2.995024, ppl: 19.985847 +epoch: 0, batch: 25873, sum loss: 5488.373535, avg loss: 3.129061, ppl: 22.852516 +epoch: 0, batch: 25874, sum loss: 4838.396973, avg loss: 2.866349, ppl: 17.572742 +epoch: 0, batch: 25875, sum loss: 5645.492188, avg loss: 2.969749, ppl: 19.487022 +epoch: 0, batch: 25876, sum loss: 5828.581055, avg loss: 3.452951, ppl: 31.593485 +epoch: 0, batch: 25877, sum loss: 5641.137695, avg loss: 3.091034, ppl: 21.999823 +epoch: 0, batch: 25878, sum loss: 4743.485352, avg loss: 2.983324, ppl: 19.753368 +epoch: 0, batch: 25879, sum loss: 4435.087402, avg loss: 2.844828, ppl: 17.198606 +epoch: 0, batch: 25880, sum loss: 5950.092773, avg loss: 3.178468, ppl: 24.009953 +epoch: 0, batch: 25881, sum loss: 4649.241699, avg loss: 2.940697, ppl: 18.929029 +epoch: 0, batch: 25882, sum loss: 5975.329102, avg loss: 3.216001, ppl: 24.928221 +epoch: 0, batch: 25883, sum loss: 4898.177246, avg loss: 3.076745, ppl: 21.687685 +epoch: 0, batch: 25884, sum loss: 5513.295898, avg loss: 3.172207, ppl: 23.860088 +epoch: 0, batch: 25885, sum loss: 4706.006348, avg loss: 2.799528, ppl: 16.436884 +epoch: 0, batch: 25886, sum loss: 4699.385254, avg loss: 2.961175, ppl: 19.320669 +epoch: 0, batch: 25887, sum loss: 5293.696777, avg loss: 2.929550, ppl: 18.719200 +epoch: 0, batch: 25888, sum loss: 5474.523438, avg loss: 3.032977, ppl: 20.758938 +epoch: 0, batch: 25889, sum loss: 4858.730469, avg loss: 3.001069, ppl: 20.107016 +epoch: 0, batch: 25890, sum loss: 5545.302734, avg loss: 2.962234, ppl: 19.341141 +epoch: 0, batch: 25891, sum loss: 5007.357910, avg loss: 3.159216, ppl: 23.552134 +epoch: 0, batch: 25892, sum loss: 5028.258789, avg loss: 2.921708, ppl: 18.572977 +epoch: 0, batch: 25893, sum loss: 4888.710938, avg loss: 3.028941, ppl: 20.675329 +epoch: 0, batch: 25894, sum loss: 4810.370117, avg loss: 2.987808, ppl: 19.842131 +epoch: 0, batch: 25895, sum loss: 5267.801758, avg loss: 2.959439, ppl: 19.287153 +epoch: 0, batch: 25896, sum loss: 5322.504883, avg loss: 3.212133, ppl: 24.832001 +epoch: 0, batch: 25897, sum loss: 5009.339355, avg loss: 3.109460, ppl: 22.408932 +epoch: 0, batch: 25898, sum loss: 5565.058594, avg loss: 3.114191, ppl: 22.515198 +epoch: 0, batch: 25899, sum loss: 4588.800293, avg loss: 3.090101, ppl: 21.979298 +epoch: 0, batch: 25900, sum loss: 4857.073730, avg loss: 3.009339, ppl: 20.274002 +epoch: 0, batch: 25901, sum loss: 5577.497070, avg loss: 2.989012, ppl: 19.866051 +epoch: 0, batch: 25902, sum loss: 5367.851562, avg loss: 3.222000, ppl: 25.078218 +epoch: 0, batch: 25903, sum loss: 4370.340332, avg loss: 2.911619, ppl: 18.386545 +epoch: 0, batch: 25904, sum loss: 5593.619629, avg loss: 3.218423, ppl: 24.988691 +epoch: 0, batch: 25905, sum loss: 5512.311523, avg loss: 3.091594, ppl: 22.012133 +epoch: 0, batch: 25906, sum loss: 4424.589844, avg loss: 2.961573, ppl: 19.328350 +epoch: 0, batch: 25907, sum loss: 5160.071289, avg loss: 3.235154, ppl: 25.410295 +epoch: 0, batch: 25908, sum loss: 4696.942383, avg loss: 3.061892, ppl: 21.367949 +epoch: 0, batch: 25909, sum loss: 4321.754883, avg loss: 2.726659, ppl: 15.281750 +epoch: 0, batch: 25910, sum loss: 5120.400391, avg loss: 3.337940, ppl: 28.161060 +epoch: 0, batch: 25911, sum loss: 5886.713867, avg loss: 3.129566, ppl: 22.864059 +epoch: 0, batch: 25912, sum loss: 5493.796387, avg loss: 2.945735, ppl: 19.024645 +epoch: 0, batch: 25913, sum loss: 5294.513672, avg loss: 3.001425, ppl: 20.114180 +epoch: 0, batch: 25914, sum loss: 5097.180176, avg loss: 3.267423, ppl: 26.243633 +epoch: 0, batch: 25915, sum loss: 6121.764160, avg loss: 3.180137, ppl: 24.050053 +epoch: 0, batch: 25916, sum loss: 4961.260742, avg loss: 2.891178, ppl: 18.014511 +epoch: 0, batch: 25917, sum loss: 4256.789551, avg loss: 2.935717, ppl: 18.835005 +epoch: 0, batch: 25918, sum loss: 5990.012695, avg loss: 3.434640, ppl: 31.020248 +epoch: 0, batch: 25919, sum loss: 5248.373535, avg loss: 3.071020, ppl: 21.563894 +epoch: 0, batch: 25920, sum loss: 4487.940918, avg loss: 2.897315, ppl: 18.125414 +epoch: 0, batch: 25921, sum loss: 4829.047363, avg loss: 2.951741, ppl: 19.139236 +epoch: 0, batch: 25922, sum loss: 4880.809082, avg loss: 2.898343, ppl: 18.144053 +epoch: 0, batch: 25923, sum loss: 5540.399414, avg loss: 3.083138, ppl: 21.826797 +epoch: 0, batch: 25924, sum loss: 6042.518066, avg loss: 3.276854, ppl: 26.492285 +epoch: 0, batch: 25925, sum loss: 5069.363281, avg loss: 2.968011, ppl: 19.453192 +epoch: 0, batch: 25926, sum loss: 4259.798828, avg loss: 2.773307, ppl: 16.011490 +epoch: 0, batch: 25927, sum loss: 4468.227051, avg loss: 2.875307, ppl: 17.730867 +epoch: 0, batch: 25928, sum loss: 5479.321289, avg loss: 3.194939, ppl: 24.408693 +epoch: 0, batch: 25929, sum loss: 4333.683594, avg loss: 2.790524, ppl: 16.289553 +epoch: 0, batch: 25930, sum loss: 5363.318359, avg loss: 3.085914, ppl: 21.887461 +epoch: 0, batch: 25931, sum loss: 5469.881348, avg loss: 3.086841, ppl: 21.907753 +epoch: 0, batch: 25932, sum loss: 6346.524414, avg loss: 3.464260, ppl: 31.952808 +epoch: 0, batch: 25933, sum loss: 4890.952637, avg loss: 3.123214, ppl: 22.719271 +epoch: 0, batch: 25934, sum loss: 4945.345703, avg loss: 3.194668, ppl: 24.402067 +epoch: 0, batch: 25935, sum loss: 4284.877930, avg loss: 3.034616, ppl: 20.792992 +epoch: 0, batch: 25936, sum loss: 5185.375977, avg loss: 3.037713, ppl: 20.857489 +epoch: 0, batch: 25937, sum loss: 4692.230469, avg loss: 2.930812, ppl: 18.742849 +epoch: 0, batch: 25938, sum loss: 5108.676270, avg loss: 2.927608, ppl: 18.682888 +epoch: 0, batch: 25939, sum loss: 5803.955078, avg loss: 3.156039, ppl: 23.477411 +epoch: 0, batch: 25940, sum loss: 4369.892578, avg loss: 3.196703, ppl: 24.451773 +epoch: 0, batch: 25941, sum loss: 4799.483398, avg loss: 2.982898, ppl: 19.744959 +epoch: 0, batch: 25942, sum loss: 5626.363770, avg loss: 3.213229, ppl: 24.859226 +epoch: 0, batch: 25943, sum loss: 4851.026367, avg loss: 2.856906, ppl: 17.407587 +epoch: 0, batch: 25944, sum loss: 4728.606934, avg loss: 3.000385, ppl: 20.093273 +epoch: 0, batch: 25945, sum loss: 5790.335449, avg loss: 3.052365, ppl: 21.165333 +epoch: 0, batch: 25946, sum loss: 4949.754395, avg loss: 2.909909, ppl: 18.355120 +epoch: 0, batch: 25947, sum loss: 6072.873535, avg loss: 3.249264, ppl: 25.771353 +epoch: 0, batch: 25948, sum loss: 5293.645996, avg loss: 2.940915, ppl: 18.933155 +epoch: 0, batch: 25949, sum loss: 6000.937500, avg loss: 3.110906, ppl: 22.441364 +epoch: 0, batch: 25950, sum loss: 4670.696777, avg loss: 3.064762, ppl: 21.429354 +epoch: 0, batch: 25951, sum loss: 4347.650391, avg loss: 2.824984, ppl: 16.860676 +epoch: 0, batch: 25952, sum loss: 5039.018555, avg loss: 2.946795, ppl: 19.044807 +epoch: 0, batch: 25953, sum loss: 5511.917480, avg loss: 3.273110, ppl: 26.393305 +epoch: 0, batch: 25954, sum loss: 5377.855957, avg loss: 3.249460, ppl: 25.776417 +epoch: 0, batch: 25955, sum loss: 5973.402344, avg loss: 3.382447, ppl: 29.442743 +epoch: 0, batch: 25956, sum loss: 5922.182617, avg loss: 3.457199, ppl: 31.727993 +epoch: 0, batch: 25957, sum loss: 5967.607422, avg loss: 3.365825, ppl: 28.957375 +epoch: 0, batch: 25958, sum loss: 5301.608398, avg loss: 2.933928, ppl: 18.801338 +epoch: 0, batch: 25959, sum loss: 4217.926270, avg loss: 2.846104, ppl: 17.220562 +epoch: 0, batch: 25960, sum loss: 5357.580078, avg loss: 3.095078, ppl: 22.088961 +epoch: 0, batch: 25961, sum loss: 4986.781738, avg loss: 2.943791, ppl: 18.987686 +epoch: 0, batch: 25962, sum loss: 4984.639160, avg loss: 3.205556, ppl: 24.669210 +epoch: 0, batch: 25963, sum loss: 5829.705566, avg loss: 3.249557, ppl: 25.778923 +epoch: 0, batch: 25964, sum loss: 5520.656738, avg loss: 3.152859, ppl: 23.402889 +epoch: 0, batch: 25965, sum loss: 5226.768555, avg loss: 2.840635, ppl: 17.126638 +epoch: 0, batch: 25966, sum loss: 4350.703613, avg loss: 3.085606, ppl: 21.880714 +epoch: 0, batch: 25967, sum loss: 5982.101562, avg loss: 3.245850, ppl: 25.683535 +epoch: 0, batch: 25968, sum loss: 6078.159668, avg loss: 3.303348, ppl: 27.203552 +epoch: 0, batch: 25969, sum loss: 5343.953125, avg loss: 3.031170, ppl: 20.721466 +epoch: 0, batch: 25970, sum loss: 4891.608398, avg loss: 3.139672, ppl: 23.096285 +epoch: 0, batch: 25971, sum loss: 5963.141113, avg loss: 3.138495, ppl: 23.069132 +epoch: 0, batch: 25972, sum loss: 5904.831543, avg loss: 3.091535, ppl: 22.010836 +epoch: 0, batch: 25973, sum loss: 6481.980469, avg loss: 3.348130, ppl: 28.449490 +epoch: 0, batch: 25974, sum loss: 5725.871094, avg loss: 3.065241, ppl: 21.439636 +epoch: 0, batch: 25975, sum loss: 4417.447754, avg loss: 3.082657, ppl: 21.816299 +epoch: 0, batch: 25976, sum loss: 6193.105469, avg loss: 3.180845, ppl: 24.067087 +epoch: 0, batch: 25977, sum loss: 4633.489746, avg loss: 3.070570, ppl: 21.554184 +epoch: 0, batch: 25978, sum loss: 5128.501465, avg loss: 3.049050, ppl: 21.095285 +epoch: 0, batch: 25979, sum loss: 4417.231445, avg loss: 2.851666, ppl: 17.316614 +epoch: 0, batch: 25980, sum loss: 5621.353516, avg loss: 2.935433, ppl: 18.829649 +epoch: 0, batch: 25981, sum loss: 4740.586914, avg loss: 3.122916, ppl: 22.712519 +epoch: 0, batch: 25982, sum loss: 5698.943359, avg loss: 3.234361, ppl: 25.390135 +epoch: 0, batch: 25983, sum loss: 5218.132812, avg loss: 2.968221, ppl: 19.457277 +epoch: 0, batch: 25984, sum loss: 5324.820801, avg loss: 2.928944, ppl: 18.707870 +epoch: 0, batch: 25985, sum loss: 5363.369141, avg loss: 3.094846, ppl: 22.083849 +epoch: 0, batch: 25986, sum loss: 5949.392090, avg loss: 3.119765, ppl: 22.641060 +epoch: 0, batch: 25987, sum loss: 5827.626953, avg loss: 3.128087, ppl: 22.830271 +epoch: 0, batch: 25988, sum loss: 5778.178711, avg loss: 2.963168, ppl: 19.359213 +epoch: 0, batch: 25989, sum loss: 4854.595703, avg loss: 2.951122, ppl: 19.127399 +epoch: 0, batch: 25990, sum loss: 5596.834961, avg loss: 3.325511, ppl: 27.813208 +epoch: 0, batch: 25991, sum loss: 5623.097168, avg loss: 3.284519, ppl: 26.696146 +epoch: 0, batch: 25992, sum loss: 4496.995117, avg loss: 3.014072, ppl: 20.370178 +epoch: 0, batch: 25993, sum loss: 5435.087891, avg loss: 2.994539, ppl: 19.976145 +epoch: 0, batch: 25994, sum loss: 5645.187012, avg loss: 3.340347, ppl: 28.228930 +epoch: 0, batch: 25995, sum loss: 4941.142090, avg loss: 2.916849, ppl: 18.482954 +epoch: 0, batch: 25996, sum loss: 5337.229004, avg loss: 3.051589, ppl: 21.148918 +epoch: 0, batch: 25997, sum loss: 5357.386230, avg loss: 2.813753, ppl: 16.672379 +epoch: 0, batch: 25998, sum loss: 5970.085938, avg loss: 3.178959, ppl: 24.021742 +epoch: 0, batch: 25999, sum loss: 5282.360352, avg loss: 3.149887, ppl: 23.333424 +epoch: 0, batch: 26000, sum loss: 5195.504883, avg loss: 3.207102, ppl: 24.707376 +epoch: 0, batch: 26001, sum loss: 4842.629883, avg loss: 3.064956, ppl: 21.433514 +epoch: 0, batch: 26002, sum loss: 3866.873047, avg loss: 2.756146, ppl: 15.739071 +epoch: 0, batch: 26003, sum loss: 4566.746094, avg loss: 2.827707, ppl: 16.906643 +epoch: 0, batch: 26004, sum loss: 4580.088867, avg loss: 2.913543, ppl: 18.421944 +epoch: 0, batch: 26005, sum loss: 4055.212402, avg loss: 2.766175, ppl: 15.897709 +epoch: 0, batch: 26006, sum loss: 4674.312988, avg loss: 2.986781, ppl: 19.821781 +epoch: 0, batch: 26007, sum loss: 4070.757568, avg loss: 2.765460, ppl: 15.886350 +epoch: 0, batch: 26008, sum loss: 4632.347656, avg loss: 2.975175, ppl: 19.593054 +epoch: 0, batch: 26009, sum loss: 5474.788574, avg loss: 2.933970, ppl: 18.802132 +epoch: 0, batch: 26010, sum loss: 4458.919922, avg loss: 2.873016, ppl: 17.690285 +epoch: 0, batch: 26011, sum loss: 5513.615723, avg loss: 3.220570, ppl: 25.042387 +epoch: 0, batch: 26012, sum loss: 5116.217773, avg loss: 3.069117, ppl: 21.522886 +epoch: 0, batch: 26013, sum loss: 5747.211914, avg loss: 3.121788, ppl: 22.686914 +epoch: 0, batch: 26014, sum loss: 4810.032227, avg loss: 2.993175, ppl: 19.948921 +epoch: 0, batch: 26015, sum loss: 5012.326172, avg loss: 3.032260, ppl: 20.744064 +epoch: 0, batch: 26016, sum loss: 4892.927246, avg loss: 2.965410, ppl: 19.402666 +epoch: 0, batch: 26017, sum loss: 6070.698730, avg loss: 3.339218, ppl: 28.197071 +epoch: 0, batch: 26018, sum loss: 5635.814453, avg loss: 3.211290, ppl: 24.811075 +epoch: 0, batch: 26019, sum loss: 5085.822266, avg loss: 2.924567, ppl: 18.626163 +epoch: 0, batch: 26020, sum loss: 5140.040039, avg loss: 3.273911, ppl: 26.414444 +epoch: 0, batch: 26021, sum loss: 6121.279785, avg loss: 3.343135, ppl: 28.307730 +epoch: 0, batch: 26022, sum loss: 5977.797852, avg loss: 3.413934, ppl: 30.384535 +epoch: 0, batch: 26023, sum loss: 4917.745605, avg loss: 3.043159, ppl: 20.971392 +epoch: 0, batch: 26024, sum loss: 4966.769531, avg loss: 3.073496, ppl: 21.617342 +epoch: 0, batch: 26025, sum loss: 6389.187988, avg loss: 3.101547, ppl: 22.232328 +epoch: 0, batch: 26026, sum loss: 4565.745117, avg loss: 2.860743, ppl: 17.474499 +epoch: 0, batch: 26027, sum loss: 4976.393066, avg loss: 3.165644, ppl: 23.704014 +epoch: 0, batch: 26028, sum loss: 5431.480957, avg loss: 3.202524, ppl: 24.594534 +epoch: 0, batch: 26029, sum loss: 5483.013184, avg loss: 2.958992, ppl: 19.278538 +epoch: 0, batch: 26030, sum loss: 6261.338867, avg loss: 3.269629, ppl: 26.301567 +epoch: 0, batch: 26031, sum loss: 4542.213379, avg loss: 2.821250, ppl: 16.797842 +epoch: 0, batch: 26032, sum loss: 4942.129883, avg loss: 2.973604, ppl: 19.562300 +epoch: 0, batch: 26033, sum loss: 5619.788574, avg loss: 3.070923, ppl: 21.561792 +epoch: 0, batch: 26034, sum loss: 4443.288086, avg loss: 2.830120, ppl: 16.947493 +epoch: 0, batch: 26035, sum loss: 5563.045410, avg loss: 3.164417, ppl: 23.674938 +epoch: 0, batch: 26036, sum loss: 5908.435547, avg loss: 3.168062, ppl: 23.761389 +epoch: 0, batch: 26037, sum loss: 7112.062500, avg loss: 3.435779, ppl: 31.055599 +epoch: 0, batch: 26038, sum loss: 4148.913574, avg loss: 2.607739, ppl: 13.568344 +epoch: 0, batch: 26039, sum loss: 5308.716797, avg loss: 3.180777, ppl: 24.065447 +epoch: 0, batch: 26040, sum loss: 5921.225098, avg loss: 3.104995, ppl: 22.309103 +epoch: 0, batch: 26041, sum loss: 4975.733398, avg loss: 3.050726, ppl: 21.130672 +epoch: 0, batch: 26042, sum loss: 5047.107422, avg loss: 3.200449, ppl: 24.543541 +epoch: 0, batch: 26043, sum loss: 5524.324707, avg loss: 3.050428, ppl: 21.124382 +epoch: 0, batch: 26044, sum loss: 5745.626953, avg loss: 3.170876, ppl: 23.828344 +epoch: 0, batch: 26045, sum loss: 4933.730469, avg loss: 3.058729, ppl: 21.300476 +epoch: 0, batch: 26046, sum loss: 5329.289551, avg loss: 3.295788, ppl: 26.998688 +epoch: 0, batch: 26047, sum loss: 5884.682129, avg loss: 3.201677, ppl: 24.573702 +epoch: 0, batch: 26048, sum loss: 5653.390625, avg loss: 3.192202, ppl: 24.341978 +epoch: 0, batch: 26049, sum loss: 4708.030273, avg loss: 3.071122, ppl: 21.566090 +epoch: 0, batch: 26050, sum loss: 4553.681152, avg loss: 2.889391, ppl: 17.982363 +epoch: 0, batch: 26051, sum loss: 5328.337891, avg loss: 3.027465, ppl: 20.644823 +epoch: 0, batch: 26052, sum loss: 4189.577148, avg loss: 2.711701, ppl: 15.054857 +epoch: 0, batch: 26053, sum loss: 5256.954590, avg loss: 3.164933, ppl: 23.687168 +epoch: 0, batch: 26054, sum loss: 4670.406250, avg loss: 2.920829, ppl: 18.556669 +epoch: 0, batch: 26055, sum loss: 5406.138184, avg loss: 3.189462, ppl: 24.275362 +epoch: 0, batch: 26056, sum loss: 3962.254395, avg loss: 2.749656, ppl: 15.637255 +epoch: 0, batch: 26057, sum loss: 5453.552246, avg loss: 3.125245, ppl: 22.765469 +epoch: 0, batch: 26058, sum loss: 6427.937500, avg loss: 3.332264, ppl: 28.001665 +epoch: 0, batch: 26059, sum loss: 5023.972656, avg loss: 3.253869, ppl: 25.890305 +epoch: 0, batch: 26060, sum loss: 5689.114258, avg loss: 3.112207, ppl: 22.470581 +epoch: 0, batch: 26061, sum loss: 4280.636719, avg loss: 2.931943, ppl: 18.764053 +epoch: 0, batch: 26062, sum loss: 5351.981445, avg loss: 3.020306, ppl: 20.497555 +epoch: 0, batch: 26063, sum loss: 5449.436523, avg loss: 3.122886, ppl: 22.711836 +epoch: 0, batch: 26064, sum loss: 5152.876953, avg loss: 3.027542, ppl: 20.646423 +epoch: 0, batch: 26065, sum loss: 4655.624023, avg loss: 3.003628, ppl: 20.158550 +epoch: 0, batch: 26066, sum loss: 5972.984863, avg loss: 3.086814, ppl: 21.907164 +epoch: 0, batch: 26067, sum loss: 4790.044922, avg loss: 3.060731, ppl: 21.343163 +epoch: 0, batch: 26068, sum loss: 3551.170898, avg loss: 2.547468, ppl: 12.774723 +epoch: 0, batch: 26069, sum loss: 3991.173828, avg loss: 2.852876, ppl: 17.337576 +epoch: 0, batch: 26070, sum loss: 5496.284180, avg loss: 3.051796, ppl: 21.153296 +epoch: 0, batch: 26071, sum loss: 5591.826172, avg loss: 3.024243, ppl: 20.578428 +epoch: 0, batch: 26072, sum loss: 4407.128906, avg loss: 3.039399, ppl: 20.892691 +epoch: 0, batch: 26073, sum loss: 5180.233887, avg loss: 3.043616, ppl: 20.980965 +epoch: 0, batch: 26074, sum loss: 5743.543945, avg loss: 3.223089, ppl: 25.105545 +epoch: 0, batch: 26075, sum loss: 4303.885742, avg loss: 2.898239, ppl: 18.142176 +epoch: 0, batch: 26076, sum loss: 5162.086914, avg loss: 3.067194, ppl: 21.481531 +epoch: 0, batch: 26077, sum loss: 4544.048340, avg loss: 2.850720, ppl: 17.300241 +epoch: 0, batch: 26078, sum loss: 6577.093750, avg loss: 3.438104, ppl: 31.127895 +epoch: 0, batch: 26079, sum loss: 4892.915039, avg loss: 2.994440, ppl: 19.974173 +epoch: 0, batch: 26080, sum loss: 4515.574707, avg loss: 2.859769, ppl: 17.457487 +epoch: 0, batch: 26081, sum loss: 5632.322754, avg loss: 3.174928, ppl: 23.925102 +epoch: 0, batch: 26082, sum loss: 4998.278809, avg loss: 2.975166, ppl: 19.592876 +epoch: 0, batch: 26083, sum loss: 4561.392090, avg loss: 2.834924, ppl: 17.029099 +epoch: 0, batch: 26084, sum loss: 4882.382324, avg loss: 3.011957, ppl: 20.327139 +epoch: 0, batch: 26085, sum loss: 5371.812012, avg loss: 3.043519, ppl: 20.978949 +epoch: 0, batch: 26086, sum loss: 5740.213379, avg loss: 3.189008, ppl: 24.264334 +epoch: 0, batch: 26087, sum loss: 5218.257324, avg loss: 3.130328, ppl: 22.881487 +epoch: 0, batch: 26088, sum loss: 4529.695801, avg loss: 2.976147, ppl: 19.612108 +epoch: 0, batch: 26089, sum loss: 5099.498047, avg loss: 3.209250, ppl: 24.760508 +epoch: 0, batch: 26090, sum loss: 5681.383789, avg loss: 3.097810, ppl: 22.149391 +epoch: 0, batch: 26091, sum loss: 5054.885254, avg loss: 3.037792, ppl: 20.859129 +epoch: 0, batch: 26092, sum loss: 5856.774414, avg loss: 3.242954, ppl: 25.609255 +epoch: 0, batch: 26093, sum loss: 5176.937988, avg loss: 3.342116, ppl: 28.278906 +epoch: 0, batch: 26094, sum loss: 4997.874512, avg loss: 3.056804, ppl: 21.259502 +epoch: 0, batch: 26095, sum loss: 4611.614258, avg loss: 2.998449, ppl: 20.054405 +epoch: 0, batch: 26096, sum loss: 5631.372070, avg loss: 2.982718, ppl: 19.741405 +epoch: 0, batch: 26097, sum loss: 4652.645996, avg loss: 2.712913, ppl: 15.073120 +epoch: 0, batch: 26098, sum loss: 5324.824707, avg loss: 2.827841, ppl: 16.908916 +epoch: 0, batch: 26099, sum loss: 5318.514160, avg loss: 3.150779, ppl: 23.354240 +epoch: 0, batch: 26100, sum loss: 5600.005859, avg loss: 3.171011, ppl: 23.831572 +epoch: 0, batch: 26101, sum loss: 5458.634766, avg loss: 3.122789, ppl: 22.709627 +epoch: 0, batch: 26102, sum loss: 5203.040527, avg loss: 3.141933, ppl: 23.148563 +epoch: 0, batch: 26103, sum loss: 4440.326660, avg loss: 3.341104, ppl: 28.250292 +epoch: 0, batch: 26104, sum loss: 4487.053711, avg loss: 2.859818, ppl: 17.458340 +epoch: 0, batch: 26105, sum loss: 5096.730469, avg loss: 2.954627, ppl: 19.194553 +epoch: 0, batch: 26106, sum loss: 4249.816406, avg loss: 2.610452, ppl: 13.605202 +epoch: 0, batch: 26107, sum loss: 5094.477539, avg loss: 3.054243, ppl: 21.205130 +epoch: 0, batch: 26108, sum loss: 5090.203613, avg loss: 3.013738, ppl: 20.363379 +epoch: 0, batch: 26109, sum loss: 6151.433594, avg loss: 3.261630, ppl: 26.092028 +epoch: 0, batch: 26110, sum loss: 6529.163086, avg loss: 3.262950, ppl: 26.126501 +epoch: 0, batch: 26111, sum loss: 4370.418457, avg loss: 3.112834, ppl: 22.484671 +epoch: 0, batch: 26112, sum loss: 4897.845215, avg loss: 3.131614, ppl: 22.910938 +epoch: 0, batch: 26113, sum loss: 5650.250000, avg loss: 3.349289, ppl: 28.482468 +epoch: 0, batch: 26114, sum loss: 6488.515625, avg loss: 3.254020, ppl: 25.894226 +epoch: 0, batch: 26115, sum loss: 5252.787109, avg loss: 2.875089, ppl: 17.726995 +epoch: 0, batch: 26116, sum loss: 6280.096191, avg loss: 3.296638, ppl: 27.021652 +epoch: 0, batch: 26117, sum loss: 5680.300293, avg loss: 3.114200, ppl: 22.515402 +epoch: 0, batch: 26118, sum loss: 5705.990234, avg loss: 3.420857, ppl: 30.595638 +epoch: 0, batch: 26119, sum loss: 4570.396973, avg loss: 2.754911, ppl: 15.719637 +epoch: 0, batch: 26120, sum loss: 4473.616211, avg loss: 2.664453, ppl: 14.360089 +epoch: 0, batch: 26121, sum loss: 6232.470703, avg loss: 3.409448, ppl: 30.248539 +epoch: 0, batch: 26122, sum loss: 5694.210449, avg loss: 3.379353, ppl: 29.351782 +epoch: 0, batch: 26123, sum loss: 5215.854492, avg loss: 3.180399, ppl: 24.056355 +epoch: 0, batch: 26124, sum loss: 5998.066406, avg loss: 3.390654, ppl: 29.685356 +epoch: 0, batch: 26125, sum loss: 4958.625488, avg loss: 2.848148, ppl: 17.255793 +epoch: 0, batch: 26126, sum loss: 5613.879883, avg loss: 3.228223, ppl: 25.234777 +epoch: 0, batch: 26127, sum loss: 5452.985352, avg loss: 3.245825, ppl: 25.682884 +epoch: 0, batch: 26128, sum loss: 4761.093262, avg loss: 3.124077, ppl: 22.738894 +epoch: 0, batch: 26129, sum loss: 5642.806641, avg loss: 3.362817, ppl: 28.870399 +epoch: 0, batch: 26130, sum loss: 4283.272461, avg loss: 2.927732, ppl: 18.685213 +epoch: 0, batch: 26131, sum loss: 4918.086914, avg loss: 2.964489, ppl: 19.384790 +epoch: 0, batch: 26132, sum loss: 5572.268555, avg loss: 3.080303, ppl: 21.765005 +epoch: 0, batch: 26133, sum loss: 4449.240234, avg loss: 2.792994, ppl: 16.329845 +epoch: 0, batch: 26134, sum loss: 5691.720215, avg loss: 3.105139, ppl: 22.312326 +epoch: 0, batch: 26135, sum loss: 5302.008301, avg loss: 3.139141, ppl: 23.084021 +epoch: 0, batch: 26136, sum loss: 3991.556152, avg loss: 2.750900, ppl: 15.656717 +epoch: 0, batch: 26137, sum loss: 4619.036133, avg loss: 3.015037, ppl: 20.389837 +epoch: 0, batch: 26138, sum loss: 4110.841797, avg loss: 2.923785, ppl: 18.611599 +epoch: 0, batch: 26139, sum loss: 5277.188477, avg loss: 2.973064, ppl: 19.551729 +epoch: 0, batch: 26140, sum loss: 5425.595703, avg loss: 3.087988, ppl: 21.932913 +epoch: 0, batch: 26141, sum loss: 4247.152344, avg loss: 2.654470, ppl: 14.217452 +epoch: 0, batch: 26142, sum loss: 5407.199219, avg loss: 3.027547, ppl: 20.646532 +epoch: 0, batch: 26143, sum loss: 5479.536133, avg loss: 2.939665, ppl: 18.909517 +epoch: 0, batch: 26144, sum loss: 4415.268555, avg loss: 2.682423, ppl: 14.620481 +epoch: 0, batch: 26145, sum loss: 4951.567383, avg loss: 3.143852, ppl: 23.193041 +epoch: 0, batch: 26146, sum loss: 4932.856445, avg loss: 2.853011, ppl: 17.339920 +epoch: 0, batch: 26147, sum loss: 4961.397461, avg loss: 3.053168, ppl: 21.182335 +epoch: 0, batch: 26148, sum loss: 6252.851074, avg loss: 3.003291, ppl: 20.151741 +epoch: 0, batch: 26149, sum loss: 4495.952148, avg loss: 3.019444, ppl: 20.479902 +epoch: 0, batch: 26150, sum loss: 5176.646973, avg loss: 2.913138, ppl: 18.414499 +epoch: 0, batch: 26151, sum loss: 4312.870605, avg loss: 2.789696, ppl: 16.276079 +epoch: 0, batch: 26152, sum loss: 5633.932617, avg loss: 3.249096, ppl: 25.767035 +epoch: 0, batch: 26153, sum loss: 5092.785645, avg loss: 2.867560, ppl: 17.594030 +epoch: 0, batch: 26154, sum loss: 4834.139648, avg loss: 2.944056, ppl: 18.992725 +epoch: 0, batch: 26155, sum loss: 5268.421875, avg loss: 3.063036, ppl: 21.392405 +epoch: 0, batch: 26156, sum loss: 5624.872070, avg loss: 3.048711, ppl: 21.088146 +epoch: 0, batch: 26157, sum loss: 4588.797852, avg loss: 2.989445, ppl: 19.874643 +epoch: 0, batch: 26158, sum loss: 5623.424805, avg loss: 3.311793, ppl: 27.434280 +epoch: 0, batch: 26159, sum loss: 4975.952637, avg loss: 2.930479, ppl: 18.736599 +epoch: 0, batch: 26160, sum loss: 6059.482422, avg loss: 3.284272, ppl: 26.689552 +epoch: 0, batch: 26161, sum loss: 4231.474609, avg loss: 2.924309, ppl: 18.621346 +epoch: 0, batch: 26162, sum loss: 4674.845703, avg loss: 2.761279, ppl: 15.820069 +epoch: 0, batch: 26163, sum loss: 6484.012695, avg loss: 3.144526, ppl: 23.208672 +epoch: 0, batch: 26164, sum loss: 5093.816895, avg loss: 3.017664, ppl: 20.443483 +epoch: 0, batch: 26165, sum loss: 4900.953125, avg loss: 3.070773, ppl: 21.558554 +epoch: 0, batch: 26166, sum loss: 5756.223145, avg loss: 3.300586, ppl: 27.128544 +epoch: 0, batch: 26167, sum loss: 6280.255371, avg loss: 3.184714, ppl: 24.160368 +epoch: 0, batch: 26168, sum loss: 4373.523926, avg loss: 2.902139, ppl: 18.213070 +epoch: 0, batch: 26169, sum loss: 5367.203125, avg loss: 3.175860, ppl: 23.947405 +epoch: 0, batch: 26170, sum loss: 4767.958984, avg loss: 2.898455, ppl: 18.146091 +epoch: 0, batch: 26171, sum loss: 5350.675781, avg loss: 2.928668, ppl: 18.702694 +epoch: 0, batch: 26172, sum loss: 4787.121094, avg loss: 3.269891, ppl: 26.308479 +epoch: 0, batch: 26173, sum loss: 6017.386719, avg loss: 3.177079, ppl: 23.976608 +epoch: 0, batch: 26174, sum loss: 5961.334473, avg loss: 3.227577, ppl: 25.218483 +epoch: 0, batch: 26175, sum loss: 4960.265137, avg loss: 2.921240, ppl: 18.564291 +epoch: 0, batch: 26176, sum loss: 5744.930664, avg loss: 3.022057, ppl: 20.533491 +epoch: 0, batch: 26177, sum loss: 5776.488770, avg loss: 3.137691, ppl: 23.050577 +epoch: 0, batch: 26178, sum loss: 4492.983887, avg loss: 3.164073, ppl: 23.666800 +epoch: 0, batch: 26179, sum loss: 5813.734863, avg loss: 3.163077, ppl: 23.643227 +epoch: 0, batch: 26180, sum loss: 4675.348633, avg loss: 2.911176, ppl: 18.378399 +epoch: 0, batch: 26181, sum loss: 4835.959961, avg loss: 3.204745, ppl: 24.649216 +epoch: 0, batch: 26182, sum loss: 5874.104004, avg loss: 3.173476, ppl: 23.890383 +epoch: 0, batch: 26183, sum loss: 5404.784180, avg loss: 3.024502, ppl: 20.583742 +epoch: 0, batch: 26184, sum loss: 4344.818848, avg loss: 2.839751, ppl: 17.111504 +epoch: 0, batch: 26185, sum loss: 4997.535156, avg loss: 2.999721, ppl: 20.079929 +epoch: 0, batch: 26186, sum loss: 4504.969727, avg loss: 3.013358, ppl: 20.355633 +epoch: 0, batch: 26187, sum loss: 5392.473145, avg loss: 3.304212, ppl: 27.227068 +epoch: 0, batch: 26188, sum loss: 5176.239258, avg loss: 3.057436, ppl: 21.272947 +epoch: 0, batch: 26189, sum loss: 4352.261230, avg loss: 3.095492, ppl: 22.098101 +epoch: 0, batch: 26190, sum loss: 5034.875977, avg loss: 2.863979, ppl: 17.531151 +epoch: 0, batch: 26191, sum loss: 4045.692871, avg loss: 2.877449, ppl: 17.768879 +epoch: 0, batch: 26192, sum loss: 5114.862305, avg loss: 2.946349, ppl: 19.036327 +epoch: 0, batch: 26193, sum loss: 5186.985352, avg loss: 3.017444, ppl: 20.438976 +epoch: 0, batch: 26194, sum loss: 5973.921875, avg loss: 3.280572, ppl: 26.590982 +epoch: 0, batch: 26195, sum loss: 4507.248047, avg loss: 2.725059, ppl: 15.257318 +epoch: 0, batch: 26196, sum loss: 4809.521484, avg loss: 2.834132, ppl: 17.015619 +epoch: 0, batch: 26197, sum loss: 4955.242676, avg loss: 2.783844, ppl: 16.181105 +epoch: 0, batch: 26198, sum loss: 5275.880859, avg loss: 2.942488, ppl: 18.962967 +epoch: 0, batch: 26199, sum loss: 5894.953125, avg loss: 3.104241, ppl: 22.292286 +epoch: 0, batch: 26200, sum loss: 5739.142090, avg loss: 3.090545, ppl: 21.989058 +epoch: 0, batch: 26201, sum loss: 4998.527832, avg loss: 3.091236, ppl: 22.004250 +epoch: 0, batch: 26202, sum loss: 5028.452637, avg loss: 3.016468, ppl: 20.419050 +epoch: 0, batch: 26203, sum loss: 6249.130859, avg loss: 3.285558, ppl: 26.723892 +epoch: 0, batch: 26204, sum loss: 4427.058105, avg loss: 2.710997, ppl: 15.044265 +epoch: 0, batch: 26205, sum loss: 4707.208984, avg loss: 2.765693, ppl: 15.890043 +epoch: 0, batch: 26206, sum loss: 5384.139160, avg loss: 3.170871, ppl: 23.828236 +epoch: 0, batch: 26207, sum loss: 5931.829590, avg loss: 3.081470, ppl: 21.790405 +epoch: 0, batch: 26208, sum loss: 4678.994141, avg loss: 2.790098, ppl: 16.282610 +epoch: 0, batch: 26209, sum loss: 3964.066406, avg loss: 2.893479, ppl: 18.056025 +epoch: 0, batch: 26210, sum loss: 5340.388184, avg loss: 3.135871, ppl: 23.008667 +epoch: 0, batch: 26211, sum loss: 4889.403320, avg loss: 3.110307, ppl: 22.427938 +epoch: 0, batch: 26212, sum loss: 5197.507812, avg loss: 3.159579, ppl: 23.560677 +epoch: 0, batch: 26213, sum loss: 5226.386719, avg loss: 2.905162, ppl: 18.268204 +epoch: 0, batch: 26214, sum loss: 4449.305664, avg loss: 2.800066, ppl: 16.445740 +epoch: 0, batch: 26215, sum loss: 4839.978027, avg loss: 2.920928, ppl: 18.558506 +epoch: 0, batch: 26216, sum loss: 4432.632812, avg loss: 2.887709, ppl: 17.952127 +epoch: 0, batch: 26217, sum loss: 5039.510742, avg loss: 2.952262, ppl: 19.149214 +epoch: 0, batch: 26218, sum loss: 5514.813477, avg loss: 3.077463, ppl: 21.703270 +epoch: 0, batch: 26219, sum loss: 5508.219727, avg loss: 3.119037, ppl: 22.624584 +epoch: 0, batch: 26220, sum loss: 4657.874512, avg loss: 2.805948, ppl: 16.542759 +epoch: 0, batch: 26221, sum loss: 4180.036133, avg loss: 2.820537, ppl: 16.785868 +epoch: 0, batch: 26222, sum loss: 4542.474609, avg loss: 3.167695, ppl: 23.752672 +epoch: 0, batch: 26223, sum loss: 5312.699707, avg loss: 3.151067, ppl: 23.360989 +epoch: 0, batch: 26224, sum loss: 6070.311523, avg loss: 3.445126, ppl: 31.347227 +epoch: 0, batch: 26225, sum loss: 6154.225098, avg loss: 3.277010, ppl: 26.496435 +epoch: 0, batch: 26226, sum loss: 5520.053223, avg loss: 3.384459, ppl: 29.502043 +epoch: 0, batch: 26227, sum loss: 5396.537598, avg loss: 3.064473, ppl: 21.423178 +epoch: 0, batch: 26228, sum loss: 5118.431152, avg loss: 3.302214, ppl: 27.172724 +epoch: 0, batch: 26229, sum loss: 6139.575195, avg loss: 3.392031, ppl: 29.726257 +epoch: 0, batch: 26230, sum loss: 6013.169434, avg loss: 3.168161, ppl: 23.763746 +epoch: 0, batch: 26231, sum loss: 3767.850098, avg loss: 2.714589, ppl: 15.098409 +epoch: 0, batch: 26232, sum loss: 6214.333496, avg loss: 3.140138, ppl: 23.107059 +epoch: 0, batch: 26233, sum loss: 4537.543457, avg loss: 2.756709, ppl: 15.747936 +epoch: 0, batch: 26234, sum loss: 4437.768066, avg loss: 3.086070, ppl: 21.890869 +epoch: 0, batch: 26235, sum loss: 5453.587891, avg loss: 3.058659, ppl: 21.298967 +epoch: 0, batch: 26236, sum loss: 5402.031250, avg loss: 2.994474, ppl: 19.974854 +epoch: 0, batch: 26237, sum loss: 4649.779785, avg loss: 3.059066, ppl: 21.307642 +epoch: 0, batch: 26238, sum loss: 5012.243652, avg loss: 2.917488, ppl: 18.494762 +epoch: 0, batch: 26239, sum loss: 5771.443848, avg loss: 3.220672, ppl: 25.044943 +epoch: 0, batch: 26240, sum loss: 5490.252930, avg loss: 3.046755, ppl: 21.046944 +epoch: 0, batch: 26241, sum loss: 4543.935547, avg loss: 2.999297, ppl: 20.071430 +epoch: 0, batch: 26242, sum loss: 4790.386719, avg loss: 2.926321, ppl: 18.658850 +epoch: 0, batch: 26243, sum loss: 4977.338867, avg loss: 2.964466, ppl: 19.384352 +epoch: 0, batch: 26244, sum loss: 4581.812988, avg loss: 2.756807, ppl: 15.749473 +epoch: 0, batch: 26245, sum loss: 4638.881836, avg loss: 3.012261, ppl: 20.333321 +epoch: 0, batch: 26246, sum loss: 4869.268555, avg loss: 3.248345, ppl: 25.747683 +epoch: 0, batch: 26247, sum loss: 4430.484863, avg loss: 2.969494, ppl: 19.482056 +epoch: 0, batch: 26248, sum loss: 4443.441895, avg loss: 2.835636, ppl: 17.041241 +epoch: 0, batch: 26249, sum loss: 4324.768066, avg loss: 2.898638, ppl: 18.149414 +epoch: 0, batch: 26250, sum loss: 4572.088867, avg loss: 3.037933, ppl: 20.862074 +epoch: 0, batch: 26251, sum loss: 6223.610840, avg loss: 3.360481, ppl: 28.803043 +epoch: 0, batch: 26252, sum loss: 4888.410156, avg loss: 2.953722, ppl: 19.177202 +epoch: 0, batch: 26253, sum loss: 4970.242676, avg loss: 3.122012, ppl: 22.691982 +epoch: 0, batch: 26254, sum loss: 4869.482422, avg loss: 2.976456, ppl: 19.618174 +epoch: 0, batch: 26255, sum loss: 5347.895020, avg loss: 3.156963, ppl: 23.499111 +epoch: 0, batch: 26256, sum loss: 6036.918457, avg loss: 3.110210, ppl: 22.425762 +epoch: 0, batch: 26257, sum loss: 4517.011719, avg loss: 2.840888, ppl: 17.130968 +epoch: 0, batch: 26258, sum loss: 6350.628906, avg loss: 3.178493, ppl: 24.010538 +epoch: 0, batch: 26259, sum loss: 4941.531250, avg loss: 3.121624, ppl: 22.683193 +epoch: 0, batch: 26260, sum loss: 4761.614746, avg loss: 2.750788, ppl: 15.654966 +epoch: 0, batch: 26261, sum loss: 5111.977539, avg loss: 3.083219, ppl: 21.828562 +epoch: 0, batch: 26262, sum loss: 4371.950684, avg loss: 2.848176, ppl: 17.256281 +epoch: 0, batch: 26263, sum loss: 5005.206543, avg loss: 3.013369, ppl: 20.355869 +epoch: 0, batch: 26264, sum loss: 5728.676758, avg loss: 3.309461, ppl: 27.370359 +epoch: 0, batch: 26265, sum loss: 5489.406738, avg loss: 3.122529, ppl: 22.703732 +epoch: 0, batch: 26266, sum loss: 6120.607910, avg loss: 3.312017, ppl: 27.440422 +epoch: 0, batch: 26267, sum loss: 5929.088379, avg loss: 3.148746, ppl: 23.306814 +epoch: 0, batch: 26268, sum loss: 4340.107910, avg loss: 2.918701, ppl: 18.517212 +epoch: 0, batch: 26269, sum loss: 4396.002930, avg loss: 2.986415, ppl: 19.814524 +epoch: 0, batch: 26270, sum loss: 5823.949707, avg loss: 3.031728, ppl: 20.733034 +epoch: 0, batch: 26271, sum loss: 4824.937012, avg loss: 3.057628, ppl: 21.277027 +epoch: 0, batch: 26272, sum loss: 5315.653809, avg loss: 3.027138, ppl: 20.638077 +epoch: 0, batch: 26273, sum loss: 4685.108887, avg loss: 3.092481, ppl: 22.031673 +epoch: 0, batch: 26274, sum loss: 4688.664062, avg loss: 2.881785, ppl: 17.846098 +epoch: 0, batch: 26275, sum loss: 5443.383301, avg loss: 2.959969, ppl: 19.297379 +epoch: 0, batch: 26276, sum loss: 4894.445312, avg loss: 2.923803, ppl: 18.611927 +epoch: 0, batch: 26277, sum loss: 5601.444336, avg loss: 3.065925, ppl: 21.454290 +epoch: 0, batch: 26278, sum loss: 5583.966797, avg loss: 3.280827, ppl: 26.597748 +epoch: 0, batch: 26279, sum loss: 4875.479004, avg loss: 2.976483, ppl: 19.618689 +epoch: 0, batch: 26280, sum loss: 5510.630859, avg loss: 3.051291, ppl: 21.142616 +epoch: 0, batch: 26281, sum loss: 5407.115723, avg loss: 3.070480, ppl: 21.552248 +epoch: 0, batch: 26282, sum loss: 5572.538574, avg loss: 3.148327, ppl: 23.297052 +epoch: 0, batch: 26283, sum loss: 4835.558594, avg loss: 2.923554, ppl: 18.607304 +epoch: 0, batch: 26284, sum loss: 4597.844238, avg loss: 3.164380, ppl: 23.674063 +epoch: 0, batch: 26285, sum loss: 4467.880859, avg loss: 2.849414, ppl: 17.277653 +epoch: 0, batch: 26286, sum loss: 4967.606445, avg loss: 2.976397, ppl: 19.617010 +epoch: 0, batch: 26287, sum loss: 4837.295410, avg loss: 2.850498, ppl: 17.296396 +epoch: 0, batch: 26288, sum loss: 5648.674805, avg loss: 3.313006, ppl: 27.467567 +epoch: 0, batch: 26289, sum loss: 4907.229980, avg loss: 3.051760, ppl: 21.152540 +epoch: 0, batch: 26290, sum loss: 5717.474121, avg loss: 3.304898, ppl: 27.245770 +epoch: 0, batch: 26291, sum loss: 5096.386719, avg loss: 3.086848, ppl: 21.907927 +epoch: 0, batch: 26292, sum loss: 4074.143066, avg loss: 2.914265, ppl: 18.435265 +epoch: 0, batch: 26293, sum loss: 5812.230957, avg loss: 3.190028, ppl: 24.289106 +epoch: 0, batch: 26294, sum loss: 5090.688477, avg loss: 3.179693, ppl: 24.039377 +epoch: 0, batch: 26295, sum loss: 4968.295410, avg loss: 2.881842, ppl: 17.847120 +epoch: 0, batch: 26296, sum loss: 6186.298828, avg loss: 3.233820, ppl: 25.376421 +epoch: 0, batch: 26297, sum loss: 4214.182617, avg loss: 2.677371, ppl: 14.546803 +epoch: 0, batch: 26298, sum loss: 4975.921875, avg loss: 3.034099, ppl: 20.782236 +epoch: 0, batch: 26299, sum loss: 4912.772949, avg loss: 3.091739, ppl: 22.015324 +epoch: 0, batch: 26300, sum loss: 4327.631836, avg loss: 2.763494, ppl: 15.855145 +epoch: 0, batch: 26301, sum loss: 4906.070312, avg loss: 2.932499, ppl: 18.774483 +epoch: 0, batch: 26302, sum loss: 5156.065918, avg loss: 3.159354, ppl: 23.555380 +epoch: 0, batch: 26303, sum loss: 4759.127930, avg loss: 2.955980, ppl: 19.220551 +epoch: 0, batch: 26304, sum loss: 4281.678223, avg loss: 2.684438, ppl: 14.649962 +epoch: 0, batch: 26305, sum loss: 5505.083008, avg loss: 3.232579, ppl: 25.344950 +epoch: 0, batch: 26306, sum loss: 5649.274414, avg loss: 3.188078, ppl: 24.241795 +epoch: 0, batch: 26307, sum loss: 3533.171387, avg loss: 2.917565, ppl: 18.496197 +epoch: 0, batch: 26308, sum loss: 4908.773438, avg loss: 2.835802, ppl: 17.044065 +epoch: 0, batch: 26309, sum loss: 5747.387207, avg loss: 3.085017, ppl: 21.867844 +epoch: 0, batch: 26310, sum loss: 4711.723145, avg loss: 2.850407, ppl: 17.294821 +epoch: 0, batch: 26311, sum loss: 5011.810059, avg loss: 3.082294, ppl: 21.808372 +epoch: 0, batch: 26312, sum loss: 4827.329590, avg loss: 2.945289, ppl: 19.016165 +epoch: 0, batch: 26313, sum loss: 4966.167969, avg loss: 3.282332, ppl: 26.637817 +epoch: 0, batch: 26314, sum loss: 4866.112793, avg loss: 2.992689, ppl: 19.939230 +epoch: 0, batch: 26315, sum loss: 4050.056396, avg loss: 2.509329, ppl: 12.296671 +epoch: 0, batch: 26316, sum loss: 5515.506836, avg loss: 3.284995, ppl: 26.708853 +epoch: 0, batch: 26317, sum loss: 4143.347168, avg loss: 2.841802, ppl: 17.146633 +epoch: 0, batch: 26318, sum loss: 4989.083008, avg loss: 3.151663, ppl: 23.374907 +epoch: 0, batch: 26319, sum loss: 5313.226562, avg loss: 3.114435, ppl: 22.520706 +epoch: 0, batch: 26320, sum loss: 5766.040039, avg loss: 3.150842, ppl: 23.355715 +epoch: 0, batch: 26321, sum loss: 5623.190430, avg loss: 3.323399, ppl: 27.754517 +epoch: 0, batch: 26322, sum loss: 5497.272461, avg loss: 3.152106, ppl: 23.385252 +epoch: 0, batch: 26323, sum loss: 5629.378906, avg loss: 3.282437, ppl: 26.640606 +epoch: 0, batch: 26324, sum loss: 3319.815430, avg loss: 2.696844, ppl: 14.832850 +epoch: 0, batch: 26325, sum loss: 4751.404785, avg loss: 2.848564, ppl: 17.262978 +epoch: 0, batch: 26326, sum loss: 4962.155762, avg loss: 3.012845, ppl: 20.345196 +epoch: 0, batch: 26327, sum loss: 4527.819824, avg loss: 3.094887, ppl: 22.084749 +epoch: 0, batch: 26328, sum loss: 5042.571289, avg loss: 3.032214, ppl: 20.743101 +epoch: 0, batch: 26329, sum loss: 5487.181641, avg loss: 2.939037, ppl: 18.897633 +epoch: 0, batch: 26330, sum loss: 5591.063477, avg loss: 3.097542, ppl: 22.143457 +epoch: 0, batch: 26331, sum loss: 6184.291016, avg loss: 3.113943, ppl: 22.509628 +epoch: 0, batch: 26332, sum loss: 4714.585938, avg loss: 2.995290, ppl: 19.991148 +epoch: 0, batch: 26333, sum loss: 5125.329102, avg loss: 3.059898, ppl: 21.325384 +epoch: 0, batch: 26334, sum loss: 5027.308594, avg loss: 3.114813, ppl: 22.529224 +epoch: 0, batch: 26335, sum loss: 5378.576660, avg loss: 2.993087, ppl: 19.947161 +epoch: 0, batch: 26336, sum loss: 5672.312988, avg loss: 3.241322, ppl: 25.567488 +epoch: 0, batch: 26337, sum loss: 4888.457031, avg loss: 3.250304, ppl: 25.798174 +epoch: 0, batch: 26338, sum loss: 5100.155273, avg loss: 3.152135, ppl: 23.385948 +epoch: 0, batch: 26339, sum loss: 5766.850098, avg loss: 3.075653, ppl: 21.664030 +epoch: 0, batch: 26340, sum loss: 4463.116699, avg loss: 2.875720, ppl: 17.738188 +epoch: 0, batch: 26341, sum loss: 4895.365234, avg loss: 3.042489, ppl: 20.957348 +epoch: 0, batch: 26342, sum loss: 4757.849121, avg loss: 2.849011, ppl: 17.270700 +epoch: 0, batch: 26343, sum loss: 5136.842285, avg loss: 2.948819, ppl: 19.083397 +epoch: 0, batch: 26344, sum loss: 5088.899902, avg loss: 2.955226, ppl: 19.206070 +epoch: 0, batch: 26345, sum loss: 5281.073730, avg loss: 3.095588, ppl: 22.100235 +epoch: 0, batch: 26346, sum loss: 6304.553711, avg loss: 3.302543, ppl: 27.181665 +epoch: 0, batch: 26347, sum loss: 5857.350586, avg loss: 3.069890, ppl: 21.539534 +epoch: 0, batch: 26348, sum loss: 4617.305664, avg loss: 2.896678, ppl: 18.113878 +epoch: 0, batch: 26349, sum loss: 4454.988770, avg loss: 2.832161, ppl: 16.982115 +epoch: 0, batch: 26350, sum loss: 5626.370605, avg loss: 3.099929, ppl: 22.196373 +epoch: 0, batch: 26351, sum loss: 5804.585938, avg loss: 3.268348, ppl: 26.267908 +epoch: 0, batch: 26352, sum loss: 4605.034180, avg loss: 2.840860, ppl: 17.130489 +epoch: 0, batch: 26353, sum loss: 4843.375488, avg loss: 2.928280, ppl: 18.695454 +epoch: 0, batch: 26354, sum loss: 5174.048340, avg loss: 3.221699, ppl: 25.070673 +epoch: 0, batch: 26355, sum loss: 5492.531250, avg loss: 3.253869, ppl: 25.890324 +epoch: 0, batch: 26356, sum loss: 5100.447266, avg loss: 2.961932, ppl: 19.335295 +epoch: 0, batch: 26357, sum loss: 4995.569336, avg loss: 2.926520, ppl: 18.662569 +epoch: 0, batch: 26358, sum loss: 5000.528320, avg loss: 3.071578, ppl: 21.575912 +epoch: 0, batch: 26359, sum loss: 5827.143066, avg loss: 3.139625, ppl: 23.095196 +epoch: 0, batch: 26360, sum loss: 4527.005371, avg loss: 2.939614, ppl: 18.908543 +epoch: 0, batch: 26361, sum loss: 3744.548828, avg loss: 2.815450, ppl: 16.700693 +epoch: 0, batch: 26362, sum loss: 5153.011230, avg loss: 3.020522, ppl: 20.501999 +epoch: 0, batch: 26363, sum loss: 6017.068848, avg loss: 3.250713, ppl: 25.808725 +epoch: 0, batch: 26364, sum loss: 5567.503418, avg loss: 3.229411, ppl: 25.264763 +epoch: 0, batch: 26365, sum loss: 5202.013672, avg loss: 2.914294, ppl: 18.435783 +epoch: 0, batch: 26366, sum loss: 6017.496094, avg loss: 3.422921, ppl: 30.658852 +epoch: 0, batch: 26367, sum loss: 4785.158691, avg loss: 2.948342, ppl: 19.074295 +epoch: 0, batch: 26368, sum loss: 4551.830566, avg loss: 2.837800, ppl: 17.078148 +epoch: 0, batch: 26369, sum loss: 5604.792969, avg loss: 3.266196, ppl: 26.211454 +epoch: 0, batch: 26370, sum loss: 5519.055176, avg loss: 3.235085, ppl: 25.408531 +epoch: 0, batch: 26371, sum loss: 4483.390625, avg loss: 3.075028, ppl: 21.650486 +epoch: 0, batch: 26372, sum loss: 4381.829590, avg loss: 2.944778, ppl: 19.006441 +epoch: 0, batch: 26373, sum loss: 6043.111328, avg loss: 3.441407, ppl: 31.230883 +epoch: 0, batch: 26374, sum loss: 5075.706055, avg loss: 3.186256, ppl: 24.197666 +epoch: 0, batch: 26375, sum loss: 4223.141602, avg loss: 2.813552, ppl: 16.669024 +epoch: 0, batch: 26376, sum loss: 4040.109375, avg loss: 2.770994, ppl: 15.974507 +epoch: 0, batch: 26377, sum loss: 5657.465820, avg loss: 3.214469, ppl: 24.890076 +epoch: 0, batch: 26378, sum loss: 5283.870605, avg loss: 3.173496, ppl: 23.890862 +epoch: 0, batch: 26379, sum loss: 5026.449219, avg loss: 3.234523, ppl: 25.394264 +epoch: 0, batch: 26380, sum loss: 4970.393066, avg loss: 2.985221, ppl: 19.790878 +epoch: 0, batch: 26381, sum loss: 4457.020996, avg loss: 2.890416, ppl: 18.000790 +epoch: 0, batch: 26382, sum loss: 6379.187500, avg loss: 3.411330, ppl: 30.305531 +epoch: 0, batch: 26383, sum loss: 5220.395508, avg loss: 2.969508, ppl: 19.482336 +epoch: 0, batch: 26384, sum loss: 5417.052734, avg loss: 3.253485, ppl: 25.880381 +epoch: 0, batch: 26385, sum loss: 5525.345215, avg loss: 3.203099, ppl: 24.608669 +epoch: 0, batch: 26386, sum loss: 6253.351562, avg loss: 3.415266, ppl: 30.425043 +epoch: 0, batch: 26387, sum loss: 4786.243164, avg loss: 2.893739, ppl: 18.060703 +epoch: 0, batch: 26388, sum loss: 5172.923828, avg loss: 3.090158, ppl: 21.980545 +epoch: 0, batch: 26389, sum loss: 4863.443848, avg loss: 2.924500, ppl: 18.624916 +epoch: 0, batch: 26390, sum loss: 4878.108398, avg loss: 2.800292, ppl: 16.449446 +epoch: 0, batch: 26391, sum loss: 4928.733398, avg loss: 3.117478, ppl: 22.589346 +epoch: 0, batch: 26392, sum loss: 4964.761230, avg loss: 2.998044, ppl: 20.046293 +epoch: 0, batch: 26393, sum loss: 6206.819336, avg loss: 3.215969, ppl: 24.927425 +epoch: 0, batch: 26394, sum loss: 5943.759766, avg loss: 3.202457, ppl: 24.592875 +epoch: 0, batch: 26395, sum loss: 5486.334961, avg loss: 3.217792, ppl: 24.972914 +epoch: 0, batch: 26396, sum loss: 5305.517578, avg loss: 3.082811, ppl: 21.819649 +epoch: 0, batch: 26397, sum loss: 4704.839844, avg loss: 2.973982, ppl: 19.569693 +epoch: 0, batch: 26398, sum loss: 5067.216309, avg loss: 3.192953, ppl: 24.360260 +epoch: 0, batch: 26399, sum loss: 5101.616211, avg loss: 2.926917, ppl: 18.669979 +epoch: 0, batch: 26400, sum loss: 4894.868164, avg loss: 2.968386, ppl: 19.460480 +epoch: 0, batch: 26401, sum loss: 3543.352539, avg loss: 2.725656, ppl: 15.266422 +epoch: 0, batch: 26402, sum loss: 5603.069336, avg loss: 3.225716, ppl: 25.171600 +epoch: 0, batch: 26403, sum loss: 6110.581055, avg loss: 3.359308, ppl: 28.769276 +epoch: 0, batch: 26404, sum loss: 5470.527832, avg loss: 3.017390, ppl: 20.437880 +epoch: 0, batch: 26405, sum loss: 4842.506836, avg loss: 3.005901, ppl: 20.204418 +epoch: 0, batch: 26406, sum loss: 4959.777832, avg loss: 2.905552, ppl: 18.275335 +epoch: 0, batch: 26407, sum loss: 4468.510742, avg loss: 2.924418, ppl: 18.623384 +epoch: 0, batch: 26408, sum loss: 4721.709473, avg loss: 2.764467, ppl: 15.870575 +epoch: 0, batch: 26409, sum loss: 4954.589844, avg loss: 3.035901, ppl: 20.819725 +epoch: 0, batch: 26410, sum loss: 6664.007324, avg loss: 3.268272, ppl: 26.265923 +epoch: 0, batch: 26411, sum loss: 4980.313965, avg loss: 2.893849, ppl: 18.062698 +epoch: 0, batch: 26412, sum loss: 4840.584961, avg loss: 3.098966, ppl: 22.175013 +epoch: 0, batch: 26413, sum loss: 6011.506348, avg loss: 3.211275, ppl: 24.810696 +epoch: 0, batch: 26414, sum loss: 4455.103516, avg loss: 2.841265, ppl: 17.137430 +epoch: 0, batch: 26415, sum loss: 4679.140137, avg loss: 3.090581, ppl: 21.989859 +epoch: 0, batch: 26416, sum loss: 5578.102539, avg loss: 3.273534, ppl: 26.404495 +epoch: 0, batch: 26417, sum loss: 5745.364258, avg loss: 3.035058, ppl: 20.802176 +epoch: 0, batch: 26418, sum loss: 4783.495117, avg loss: 2.945502, ppl: 19.020205 +epoch: 0, batch: 26419, sum loss: 4724.881348, avg loss: 2.800760, ppl: 16.457142 +epoch: 0, batch: 26420, sum loss: 5414.197266, avg loss: 3.164347, ppl: 23.673279 +epoch: 0, batch: 26421, sum loss: 5524.262695, avg loss: 3.053766, ppl: 21.195021 +epoch: 0, batch: 26422, sum loss: 5055.275391, avg loss: 2.932295, ppl: 18.770653 +epoch: 0, batch: 26423, sum loss: 5743.471680, avg loss: 3.259632, ppl: 26.039955 +epoch: 0, batch: 26424, sum loss: 4761.412109, avg loss: 2.996483, ppl: 20.015026 +epoch: 0, batch: 26425, sum loss: 4917.995117, avg loss: 2.978798, ppl: 19.664164 +epoch: 0, batch: 26426, sum loss: 5789.149414, avg loss: 3.272555, ppl: 26.378647 +epoch: 0, batch: 26427, sum loss: 5021.419922, avg loss: 3.140350, ppl: 23.111958 +epoch: 0, batch: 26428, sum loss: 5110.363770, avg loss: 3.056438, ppl: 21.251719 +epoch: 0, batch: 26429, sum loss: 3846.691162, avg loss: 2.730086, ppl: 15.334207 +epoch: 0, batch: 26430, sum loss: 5335.254395, avg loss: 2.977263, ppl: 19.633999 +epoch: 0, batch: 26431, sum loss: 5641.814453, avg loss: 3.079593, ppl: 21.749552 +epoch: 0, batch: 26432, sum loss: 5057.400879, avg loss: 3.037478, ppl: 20.852587 +epoch: 0, batch: 26433, sum loss: 5283.018555, avg loss: 2.971327, ppl: 19.517794 +epoch: 0, batch: 26434, sum loss: 4788.638672, avg loss: 3.273164, ppl: 26.394722 +epoch: 0, batch: 26435, sum loss: 4617.302734, avg loss: 2.789911, ppl: 16.279572 +epoch: 0, batch: 26436, sum loss: 4122.154297, avg loss: 2.862607, ppl: 17.507113 +epoch: 0, batch: 26437, sum loss: 4640.692383, avg loss: 2.955855, ppl: 19.218149 +epoch: 0, batch: 26438, sum loss: 4938.574707, avg loss: 2.941379, ppl: 18.941942 +epoch: 0, batch: 26439, sum loss: 5243.936035, avg loss: 3.283617, ppl: 26.672066 +epoch: 0, batch: 26440, sum loss: 5383.398438, avg loss: 2.948192, ppl: 19.071444 +epoch: 0, batch: 26441, sum loss: 5115.293457, avg loss: 3.050264, ppl: 21.120928 +epoch: 0, batch: 26442, sum loss: 5468.877930, avg loss: 3.058657, ppl: 21.298927 +epoch: 0, batch: 26443, sum loss: 5824.668945, avg loss: 3.113132, ppl: 22.491367 +epoch: 0, batch: 26444, sum loss: 4604.495605, avg loss: 3.047317, ppl: 21.058764 +epoch: 0, batch: 26445, sum loss: 5724.893066, avg loss: 3.475952, ppl: 32.328587 +epoch: 0, batch: 26446, sum loss: 4945.363281, avg loss: 2.816266, ppl: 16.714327 +epoch: 0, batch: 26447, sum loss: 4814.351562, avg loss: 2.884573, ppl: 17.895920 +epoch: 0, batch: 26448, sum loss: 4943.660156, avg loss: 2.764911, ppl: 15.877622 +epoch: 0, batch: 26449, sum loss: 6063.731934, avg loss: 3.328064, ppl: 27.884298 +epoch: 0, batch: 26450, sum loss: 5303.502441, avg loss: 2.972816, ppl: 19.546877 +epoch: 0, batch: 26451, sum loss: 5304.272461, avg loss: 3.118326, ppl: 22.608500 +epoch: 0, batch: 26452, sum loss: 4639.642578, avg loss: 2.925374, ppl: 18.641188 +epoch: 0, batch: 26453, sum loss: 6146.321289, avg loss: 3.264111, ppl: 26.156849 +epoch: 0, batch: 26454, sum loss: 4716.436523, avg loss: 3.021420, ppl: 20.520411 +epoch: 0, batch: 26455, sum loss: 5402.438477, avg loss: 3.028273, ppl: 20.661516 +epoch: 0, batch: 26456, sum loss: 5414.495117, avg loss: 3.166371, ppl: 23.721252 +epoch: 0, batch: 26457, sum loss: 5316.686035, avg loss: 3.078568, ppl: 21.727257 +epoch: 0, batch: 26458, sum loss: 4236.087891, avg loss: 2.887585, ppl: 17.949915 +epoch: 0, batch: 26459, sum loss: 5640.562012, avg loss: 2.843025, ppl: 17.167624 +epoch: 0, batch: 26460, sum loss: 5382.071289, avg loss: 3.056258, ppl: 21.247908 +epoch: 0, batch: 26461, sum loss: 6069.460449, avg loss: 3.587152, ppl: 36.131012 +epoch: 0, batch: 26462, sum loss: 5495.561523, avg loss: 3.138528, ppl: 23.069874 +epoch: 0, batch: 26463, sum loss: 5570.002441, avg loss: 3.175600, ppl: 23.941181 +epoch: 0, batch: 26464, sum loss: 5778.354492, avg loss: 3.103305, ppl: 22.271444 +epoch: 0, batch: 26465, sum loss: 6113.841309, avg loss: 3.333610, ppl: 28.039383 +epoch: 0, batch: 26466, sum loss: 4460.970703, avg loss: 2.948427, ppl: 19.075932 +epoch: 0, batch: 26467, sum loss: 4851.300293, avg loss: 2.845338, ppl: 17.207367 +epoch: 0, batch: 26468, sum loss: 5122.164062, avg loss: 2.974544, ppl: 19.580685 +epoch: 0, batch: 26469, sum loss: 5672.708984, avg loss: 3.201303, ppl: 24.564518 +epoch: 0, batch: 26470, sum loss: 4420.600098, avg loss: 2.863083, ppl: 17.515446 +epoch: 0, batch: 26471, sum loss: 5949.652344, avg loss: 3.128103, ppl: 22.830641 +epoch: 0, batch: 26472, sum loss: 5059.598633, avg loss: 2.955373, ppl: 19.208887 +epoch: 0, batch: 26473, sum loss: 5493.715332, avg loss: 3.250719, ppl: 25.808891 +epoch: 0, batch: 26474, sum loss: 4718.222656, avg loss: 3.016766, ppl: 20.425137 +epoch: 0, batch: 26475, sum loss: 4776.381348, avg loss: 2.905341, ppl: 18.271481 +epoch: 0, batch: 26476, sum loss: 5863.682617, avg loss: 3.107410, ppl: 22.363049 +epoch: 0, batch: 26477, sum loss: 4319.552246, avg loss: 2.883546, ppl: 17.877556 +epoch: 0, batch: 26478, sum loss: 5102.000000, avg loss: 2.973194, ppl: 19.554270 +epoch: 0, batch: 26479, sum loss: 5811.605469, avg loss: 3.189685, ppl: 24.280769 +epoch: 0, batch: 26480, sum loss: 5343.781250, avg loss: 3.194131, ppl: 24.388973 +epoch: 0, batch: 26481, sum loss: 4877.046875, avg loss: 3.094573, ppl: 22.077805 +epoch: 0, batch: 26482, sum loss: 5788.151367, avg loss: 3.290592, ppl: 26.858765 +epoch: 0, batch: 26483, sum loss: 5421.130371, avg loss: 3.008397, ppl: 20.254898 +epoch: 0, batch: 26484, sum loss: 4042.859619, avg loss: 2.896031, ppl: 18.102161 +epoch: 0, batch: 26485, sum loss: 5233.349609, avg loss: 2.931849, ppl: 18.762285 +epoch: 0, batch: 26486, sum loss: 4256.562988, avg loss: 2.822654, ppl: 16.821444 +epoch: 0, batch: 26487, sum loss: 5902.431152, avg loss: 3.340369, ppl: 28.229528 +epoch: 0, batch: 26488, sum loss: 4881.105469, avg loss: 2.996382, ppl: 20.012989 +epoch: 0, batch: 26489, sum loss: 5727.333008, avg loss: 3.105929, ppl: 22.329952 +epoch: 0, batch: 26490, sum loss: 6176.573242, avg loss: 3.240594, ppl: 25.548903 +epoch: 0, batch: 26491, sum loss: 5210.286621, avg loss: 2.853388, ppl: 17.346453 +epoch: 0, batch: 26492, sum loss: 4615.341797, avg loss: 2.998923, ppl: 20.063908 +epoch: 0, batch: 26493, sum loss: 5062.088379, avg loss: 2.840678, ppl: 17.127378 +epoch: 0, batch: 26494, sum loss: 5190.381348, avg loss: 3.053166, ppl: 21.182295 +epoch: 0, batch: 26495, sum loss: 4357.939453, avg loss: 2.857665, ppl: 17.420807 +epoch: 0, batch: 26496, sum loss: 5894.619141, avg loss: 3.155578, ppl: 23.466589 +epoch: 0, batch: 26497, sum loss: 4510.139160, avg loss: 2.667143, ppl: 14.398778 +epoch: 0, batch: 26498, sum loss: 5791.413086, avg loss: 3.388773, ppl: 29.629574 +epoch: 0, batch: 26499, sum loss: 4454.785156, avg loss: 2.799991, ppl: 16.444492 +epoch: 0, batch: 26500, sum loss: 5855.306641, avg loss: 3.177052, ppl: 23.975964 +epoch: 0, batch: 26501, sum loss: 4744.787109, avg loss: 2.921667, ppl: 18.572220 +epoch: 0, batch: 26502, sum loss: 4949.675293, avg loss: 2.994359, ppl: 19.972549 +epoch: 0, batch: 26503, sum loss: 5554.452148, avg loss: 3.170350, ppl: 23.815809 +epoch: 0, batch: 26504, sum loss: 5771.206055, avg loss: 3.010541, ppl: 20.298372 +epoch: 0, batch: 26505, sum loss: 4381.950195, avg loss: 2.843576, ppl: 17.177076 +epoch: 0, batch: 26506, sum loss: 5154.457031, avg loss: 3.166128, ppl: 23.715490 +epoch: 0, batch: 26507, sum loss: 5192.964844, avg loss: 3.434501, ppl: 31.015923 +epoch: 0, batch: 26508, sum loss: 5028.920898, avg loss: 2.923791, ppl: 18.611713 +epoch: 0, batch: 26509, sum loss: 5257.238770, avg loss: 3.127447, ppl: 22.815660 +epoch: 0, batch: 26510, sum loss: 5462.051758, avg loss: 3.029424, ppl: 20.685318 +epoch: 0, batch: 26511, sum loss: 4942.251953, avg loss: 2.898682, ppl: 18.150202 +epoch: 0, batch: 26512, sum loss: 4581.600586, avg loss: 2.843948, ppl: 17.183479 +epoch: 0, batch: 26513, sum loss: 4957.684570, avg loss: 2.919720, ppl: 18.536100 +epoch: 0, batch: 26514, sum loss: 4385.483398, avg loss: 2.842180, ppl: 17.153118 +epoch: 0, batch: 26515, sum loss: 5623.103027, avg loss: 3.047752, ppl: 21.067934 +epoch: 0, batch: 26516, sum loss: 3891.753418, avg loss: 2.912989, ppl: 18.411751 +epoch: 0, batch: 26517, sum loss: 4528.395996, avg loss: 3.008901, ppl: 20.265120 +epoch: 0, batch: 26518, sum loss: 5029.442383, avg loss: 3.205508, ppl: 24.668034 +epoch: 0, batch: 26519, sum loss: 5810.241699, avg loss: 3.211853, ppl: 24.825045 +epoch: 0, batch: 26520, sum loss: 5457.973633, avg loss: 3.254606, ppl: 25.909391 +epoch: 0, batch: 26521, sum loss: 6030.256348, avg loss: 3.304250, ppl: 27.228113 +epoch: 0, batch: 26522, sum loss: 4825.739746, avg loss: 2.775008, ppl: 16.038763 +epoch: 0, batch: 26523, sum loss: 5204.039551, avg loss: 2.936817, ppl: 18.855732 +epoch: 0, batch: 26524, sum loss: 4623.751953, avg loss: 3.049968, ppl: 21.114679 +epoch: 0, batch: 26525, sum loss: 5882.338379, avg loss: 3.120604, ppl: 22.660063 +epoch: 0, batch: 26526, sum loss: 5424.811035, avg loss: 3.211848, ppl: 24.824921 +epoch: 0, batch: 26527, sum loss: 5306.109863, avg loss: 3.137853, ppl: 23.054325 +epoch: 0, batch: 26528, sum loss: 4325.598633, avg loss: 2.885656, ppl: 17.915323 +epoch: 0, batch: 26529, sum loss: 5449.868164, avg loss: 3.036138, ppl: 20.824669 +epoch: 0, batch: 26530, sum loss: 3879.861816, avg loss: 2.657440, ppl: 14.259733 +epoch: 0, batch: 26531, sum loss: 5921.161621, avg loss: 3.249814, ppl: 25.785551 +epoch: 0, batch: 26532, sum loss: 5108.005859, avg loss: 2.932265, ppl: 18.770098 +epoch: 0, batch: 26533, sum loss: 5960.993652, avg loss: 3.284294, ppl: 26.690138 +epoch: 0, batch: 26534, sum loss: 4080.055420, avg loss: 2.811892, ppl: 16.641375 +epoch: 0, batch: 26535, sum loss: 5731.186035, avg loss: 3.091254, ppl: 22.004665 +epoch: 0, batch: 26536, sum loss: 5712.762695, avg loss: 3.108141, ppl: 22.379396 +epoch: 0, batch: 26537, sum loss: 6084.836426, avg loss: 3.354375, ppl: 28.627710 +epoch: 0, batch: 26538, sum loss: 4638.218750, avg loss: 3.055480, ppl: 21.231375 +epoch: 0, batch: 26539, sum loss: 6309.042969, avg loss: 3.421390, ppl: 30.611938 +epoch: 0, batch: 26540, sum loss: 5000.091309, avg loss: 2.891898, ppl: 18.027493 +epoch: 0, batch: 26541, sum loss: 4023.691650, avg loss: 2.841590, ppl: 17.143003 +epoch: 0, batch: 26542, sum loss: 4844.238770, avg loss: 3.069860, ppl: 21.538881 +epoch: 0, batch: 26543, sum loss: 6863.487305, avg loss: 3.336649, ppl: 28.124722 +epoch: 0, batch: 26544, sum loss: 6165.856445, avg loss: 3.224820, ppl: 25.149050 +epoch: 0, batch: 26545, sum loss: 4936.741211, avg loss: 2.703582, ppl: 14.933131 +epoch: 0, batch: 26546, sum loss: 5509.662598, avg loss: 3.382236, ppl: 29.436518 +epoch: 0, batch: 26547, sum loss: 6241.468262, avg loss: 3.281529, ppl: 26.616444 +epoch: 0, batch: 26548, sum loss: 4539.418457, avg loss: 3.145820, ppl: 23.238728 +epoch: 0, batch: 26549, sum loss: 4865.283203, avg loss: 3.012559, ppl: 20.339386 +epoch: 0, batch: 26550, sum loss: 5126.644531, avg loss: 3.152918, ppl: 23.404255 +epoch: 0, batch: 26551, sum loss: 6034.904297, avg loss: 3.199843, ppl: 24.528688 +epoch: 0, batch: 26552, sum loss: 5452.938477, avg loss: 3.168471, ppl: 23.771107 +epoch: 0, batch: 26553, sum loss: 5852.080566, avg loss: 3.102906, ppl: 22.262552 +epoch: 0, batch: 26554, sum loss: 4572.061035, avg loss: 2.945916, ppl: 19.028078 +epoch: 0, batch: 26555, sum loss: 6586.246094, avg loss: 3.368924, ppl: 29.047253 +epoch: 0, batch: 26556, sum loss: 5105.706055, avg loss: 3.141973, ppl: 23.149496 +epoch: 0, batch: 26557, sum loss: 6592.549805, avg loss: 3.394722, ppl: 29.806366 +epoch: 0, batch: 26558, sum loss: 4458.375488, avg loss: 2.700409, ppl: 14.885821 +epoch: 0, batch: 26559, sum loss: 4504.136719, avg loss: 2.764970, ppl: 15.878569 +epoch: 0, batch: 26560, sum loss: 5400.117188, avg loss: 3.071739, ppl: 21.579395 +epoch: 0, batch: 26561, sum loss: 5135.252930, avg loss: 3.256343, ppl: 25.954451 +epoch: 0, batch: 26562, sum loss: 5532.615234, avg loss: 3.073675, ppl: 21.621218 +epoch: 0, batch: 26563, sum loss: 5372.012695, avg loss: 3.023080, ppl: 20.554495 +epoch: 0, batch: 26564, sum loss: 5777.862305, avg loss: 3.227856, ppl: 25.225513 +epoch: 0, batch: 26565, sum loss: 6416.637695, avg loss: 3.287212, ppl: 26.768133 +epoch: 0, batch: 26566, sum loss: 4940.367188, avg loss: 3.003263, ppl: 20.151173 +epoch: 0, batch: 26567, sum loss: 6211.340820, avg loss: 3.390470, ppl: 29.679899 +epoch: 0, batch: 26568, sum loss: 4872.735840, avg loss: 3.015307, ppl: 20.395346 +epoch: 0, batch: 26569, sum loss: 4125.233887, avg loss: 2.901008, ppl: 18.192480 +epoch: 0, batch: 26570, sum loss: 6170.353516, avg loss: 3.382869, ppl: 29.455164 +epoch: 0, batch: 26571, sum loss: 6465.522461, avg loss: 3.286997, ppl: 26.762365 +epoch: 0, batch: 26572, sum loss: 4512.086426, avg loss: 2.611161, ppl: 13.614852 +epoch: 0, batch: 26573, sum loss: 5798.074219, avg loss: 3.221152, ppl: 25.056976 +epoch: 0, batch: 26574, sum loss: 5330.797852, avg loss: 3.356926, ppl: 28.700821 +epoch: 0, batch: 26575, sum loss: 6243.166504, avg loss: 3.278974, ppl: 26.548521 +epoch: 0, batch: 26576, sum loss: 4890.834473, avg loss: 2.998672, ppl: 20.058886 +epoch: 0, batch: 26577, sum loss: 6012.848633, avg loss: 3.253706, ppl: 25.886095 +epoch: 0, batch: 26578, sum loss: 5108.691406, avg loss: 3.005113, ppl: 20.188496 +epoch: 0, batch: 26579, sum loss: 7029.801270, avg loss: 3.300376, ppl: 27.122841 +epoch: 0, batch: 26580, sum loss: 5104.118652, avg loss: 2.869094, ppl: 17.621048 +epoch: 0, batch: 26581, sum loss: 5577.471191, avg loss: 3.275086, ppl: 26.445498 +epoch: 0, batch: 26582, sum loss: 5302.140625, avg loss: 2.963745, ppl: 19.370384 +epoch: 0, batch: 26583, sum loss: 6596.371094, avg loss: 3.348412, ppl: 28.457502 +epoch: 0, batch: 26584, sum loss: 4900.479980, avg loss: 2.973592, ppl: 19.562065 +epoch: 0, batch: 26585, sum loss: 5266.563477, avg loss: 2.882629, ppl: 17.861172 +epoch: 0, batch: 26586, sum loss: 5172.251465, avg loss: 2.884691, ppl: 17.898045 +epoch: 0, batch: 26587, sum loss: 6150.750000, avg loss: 3.223663, ppl: 25.119974 +epoch: 0, batch: 26588, sum loss: 6405.834473, avg loss: 3.380388, ppl: 29.382156 +epoch: 0, batch: 26589, sum loss: 6442.148438, avg loss: 3.285135, ppl: 26.712578 +epoch: 0, batch: 26590, sum loss: 4271.042480, avg loss: 2.862629, ppl: 17.507496 +epoch: 0, batch: 26591, sum loss: 5520.685547, avg loss: 3.025033, ppl: 20.594688 +epoch: 0, batch: 26592, sum loss: 5390.127930, avg loss: 3.112083, ppl: 22.467800 +epoch: 0, batch: 26593, sum loss: 4656.284180, avg loss: 3.027493, ppl: 20.645411 +epoch: 0, batch: 26594, sum loss: 5757.431152, avg loss: 3.146137, ppl: 23.246098 +epoch: 0, batch: 26595, sum loss: 5420.454590, avg loss: 3.067603, ppl: 21.490332 +epoch: 0, batch: 26596, sum loss: 4914.280273, avg loss: 3.102450, ppl: 22.252396 +epoch: 0, batch: 26597, sum loss: 4926.814453, avg loss: 2.995024, ppl: 19.985838 +epoch: 0, batch: 26598, sum loss: 4940.282715, avg loss: 2.949423, ppl: 19.094925 +epoch: 0, batch: 26599, sum loss: 5107.732422, avg loss: 3.202340, ppl: 24.590006 +epoch: 0, batch: 26600, sum loss: 5894.264160, avg loss: 3.368151, ppl: 29.024809 +epoch: 0, batch: 26601, sum loss: 3847.017090, avg loss: 2.697768, ppl: 14.846557 +epoch: 0, batch: 26602, sum loss: 6464.380859, avg loss: 3.184424, ppl: 24.153376 +epoch: 0, batch: 26603, sum loss: 5404.554688, avg loss: 3.218913, ppl: 25.000925 +epoch: 0, batch: 26604, sum loss: 5148.153809, avg loss: 3.071691, ppl: 21.578362 +epoch: 0, batch: 26605, sum loss: 5729.291016, avg loss: 3.093570, ppl: 22.055666 +epoch: 0, batch: 26606, sum loss: 5008.961914, avg loss: 3.194491, ppl: 24.397749 +epoch: 0, batch: 26607, sum loss: 4498.895508, avg loss: 2.872858, ppl: 17.687494 +epoch: 0, batch: 26608, sum loss: 4158.244141, avg loss: 2.865778, ppl: 17.562716 +epoch: 0, batch: 26609, sum loss: 4271.836914, avg loss: 2.812269, ppl: 16.647652 +epoch: 0, batch: 26610, sum loss: 5587.266113, avg loss: 3.058164, ppl: 21.288443 +epoch: 0, batch: 26611, sum loss: 5981.145020, avg loss: 3.457309, ppl: 31.731480 +epoch: 0, batch: 26612, sum loss: 5190.612793, avg loss: 3.082312, ppl: 21.808764 +epoch: 0, batch: 26613, sum loss: 4701.459473, avg loss: 3.037119, ppl: 20.845095 +epoch: 0, batch: 26614, sum loss: 5331.808594, avg loss: 3.166157, ppl: 23.716173 +epoch: 0, batch: 26615, sum loss: 5034.129883, avg loss: 2.918336, ppl: 18.510468 +epoch: 0, batch: 26616, sum loss: 5593.120117, avg loss: 3.216285, ppl: 24.935326 +epoch: 0, batch: 26617, sum loss: 4531.302734, avg loss: 2.888020, ppl: 17.957710 +epoch: 0, batch: 26618, sum loss: 4533.297363, avg loss: 2.852925, ppl: 17.338419 +epoch: 0, batch: 26619, sum loss: 6000.718750, avg loss: 3.293479, ppl: 26.936411 +epoch: 0, batch: 26620, sum loss: 5696.711914, avg loss: 3.152580, ppl: 23.396345 +epoch: 0, batch: 26621, sum loss: 5517.263184, avg loss: 3.156329, ppl: 23.484230 +epoch: 0, batch: 26622, sum loss: 4193.239258, avg loss: 2.982389, ppl: 19.734911 +epoch: 0, batch: 26623, sum loss: 5466.761719, avg loss: 3.057473, ppl: 21.273729 +epoch: 0, batch: 26624, sum loss: 5170.104004, avg loss: 3.255733, ppl: 25.938627 +epoch: 0, batch: 26625, sum loss: 5975.419922, avg loss: 3.353210, ppl: 28.594374 +epoch: 0, batch: 26626, sum loss: 5210.127930, avg loss: 2.985747, ppl: 19.801281 +epoch: 0, batch: 26627, sum loss: 5602.881836, avg loss: 3.214505, ppl: 24.890961 +epoch: 0, batch: 26628, sum loss: 4578.083984, avg loss: 2.932789, ppl: 18.779942 +epoch: 0, batch: 26629, sum loss: 5013.210938, avg loss: 2.867970, ppl: 17.601248 +epoch: 0, batch: 26630, sum loss: 5357.899902, avg loss: 2.879044, ppl: 17.797243 +epoch: 0, batch: 26631, sum loss: 5330.576660, avg loss: 2.996389, ppl: 20.013147 +epoch: 0, batch: 26632, sum loss: 4739.399414, avg loss: 3.059651, ppl: 21.320112 +epoch: 0, batch: 26633, sum loss: 4870.436523, avg loss: 3.168794, ppl: 23.778793 +epoch: 0, batch: 26634, sum loss: 5201.251953, avg loss: 2.884776, ppl: 17.899565 +epoch: 0, batch: 26635, sum loss: 4674.474121, avg loss: 2.896205, ppl: 18.105299 +epoch: 0, batch: 26636, sum loss: 5295.189453, avg loss: 3.027553, ppl: 20.646641 +epoch: 0, batch: 26637, sum loss: 5007.992188, avg loss: 3.102845, ppl: 22.261198 +epoch: 0, batch: 26638, sum loss: 4641.588379, avg loss: 3.000381, ppl: 20.093182 +epoch: 0, batch: 26639, sum loss: 5406.104004, avg loss: 3.119506, ppl: 22.635193 +epoch: 0, batch: 26640, sum loss: 5010.324219, avg loss: 3.042091, ppl: 20.949005 +epoch: 0, batch: 26641, sum loss: 6391.742188, avg loss: 3.189492, ppl: 24.276093 +epoch: 0, batch: 26642, sum loss: 4158.035645, avg loss: 2.899606, ppl: 18.166994 +epoch: 0, batch: 26643, sum loss: 4680.828125, avg loss: 2.784550, ppl: 16.192526 +epoch: 0, batch: 26644, sum loss: 5105.864258, avg loss: 3.100100, ppl: 22.200161 +epoch: 0, batch: 26645, sum loss: 4901.177734, avg loss: 3.017967, ppl: 20.449665 +epoch: 0, batch: 26646, sum loss: 4998.674805, avg loss: 2.938668, ppl: 18.890673 +epoch: 0, batch: 26647, sum loss: 5649.045898, avg loss: 2.984176, ppl: 19.770208 +epoch: 0, batch: 26648, sum loss: 4642.464355, avg loss: 2.823883, ppl: 16.842127 +epoch: 0, batch: 26649, sum loss: 5141.784180, avg loss: 2.768866, ppl: 15.940544 +epoch: 0, batch: 26650, sum loss: 4630.612305, avg loss: 2.995221, ppl: 19.989784 +epoch: 0, batch: 26651, sum loss: 5635.069336, avg loss: 3.015018, ppl: 20.389463 +epoch: 0, batch: 26652, sum loss: 5111.625488, avg loss: 3.126376, ppl: 22.791243 +epoch: 0, batch: 26653, sum loss: 5629.567383, avg loss: 3.162678, ppl: 23.633808 +epoch: 0, batch: 26654, sum loss: 5608.665527, avg loss: 3.100423, ppl: 22.207350 +epoch: 0, batch: 26655, sum loss: 4481.446289, avg loss: 2.948320, ppl: 19.073885 +epoch: 0, batch: 26656, sum loss: 5608.597656, avg loss: 3.199428, ppl: 24.518509 +epoch: 0, batch: 26657, sum loss: 5843.611816, avg loss: 3.217848, ppl: 24.974314 +epoch: 0, batch: 26658, sum loss: 4410.238770, avg loss: 2.834344, ppl: 17.019226 +epoch: 0, batch: 26659, sum loss: 5506.458984, avg loss: 3.015586, ppl: 20.401035 +epoch: 0, batch: 26660, sum loss: 5279.905273, avg loss: 2.933281, ppl: 18.789171 +epoch: 0, batch: 26661, sum loss: 3803.373779, avg loss: 2.676547, ppl: 14.534822 +epoch: 0, batch: 26662, sum loss: 5105.806152, avg loss: 3.119002, ppl: 22.623793 +epoch: 0, batch: 26663, sum loss: 5568.048828, avg loss: 3.170871, ppl: 23.828224 +epoch: 0, batch: 26664, sum loss: 5647.135742, avg loss: 3.228780, ppl: 25.248829 +epoch: 0, batch: 26665, sum loss: 5055.522949, avg loss: 3.149858, ppl: 23.332762 +epoch: 0, batch: 26666, sum loss: 5042.062012, avg loss: 3.234164, ppl: 25.385147 +epoch: 0, batch: 26667, sum loss: 5026.323242, avg loss: 3.191316, ppl: 24.320415 +epoch: 0, batch: 26668, sum loss: 6726.317871, avg loss: 3.536445, ppl: 34.344604 +epoch: 0, batch: 26669, sum loss: 4470.085938, avg loss: 3.097773, ppl: 22.148563 +epoch: 0, batch: 26670, sum loss: 4912.416504, avg loss: 2.946861, ppl: 19.046066 +epoch: 0, batch: 26671, sum loss: 4705.973145, avg loss: 2.991718, ppl: 19.919882 +epoch: 0, batch: 26672, sum loss: 5197.297363, avg loss: 3.025202, ppl: 20.598171 +epoch: 0, batch: 26673, sum loss: 4437.499512, avg loss: 2.921329, ppl: 18.565954 +epoch: 0, batch: 26674, sum loss: 4657.206055, avg loss: 2.876594, ppl: 17.753706 +epoch: 0, batch: 26675, sum loss: 6289.172363, avg loss: 3.128942, ppl: 22.849781 +epoch: 0, batch: 26676, sum loss: 5015.387207, avg loss: 3.071272, ppl: 21.569319 +epoch: 0, batch: 26677, sum loss: 6062.907227, avg loss: 3.271941, ppl: 26.362465 +epoch: 0, batch: 26678, sum loss: 4955.668945, avg loss: 3.323722, ppl: 27.763491 +epoch: 0, batch: 26679, sum loss: 4900.652344, avg loss: 3.139431, ppl: 23.090736 +epoch: 0, batch: 26680, sum loss: 4736.834961, avg loss: 2.913182, ppl: 18.415312 +epoch: 0, batch: 26681, sum loss: 4827.276855, avg loss: 3.082552, ppl: 21.814005 +epoch: 0, batch: 26682, sum loss: 5480.125000, avg loss: 3.193546, ppl: 24.374708 +epoch: 0, batch: 26683, sum loss: 5809.727539, avg loss: 3.148904, ppl: 23.310499 +epoch: 0, batch: 26684, sum loss: 4592.145996, avg loss: 2.783119, ppl: 16.169371 +epoch: 0, batch: 26685, sum loss: 4614.605957, avg loss: 3.147753, ppl: 23.283688 +epoch: 0, batch: 26686, sum loss: 5402.702637, avg loss: 3.050651, ppl: 21.129091 +epoch: 0, batch: 26687, sum loss: 4755.325684, avg loss: 3.046333, ppl: 21.038057 +epoch: 0, batch: 26688, sum loss: 5856.309570, avg loss: 3.172432, ppl: 23.865454 +epoch: 0, batch: 26689, sum loss: 5664.475586, avg loss: 3.193053, ppl: 24.362682 +epoch: 0, batch: 26690, sum loss: 5479.650879, avg loss: 3.149225, ppl: 23.317974 +epoch: 0, batch: 26691, sum loss: 4990.729004, avg loss: 3.180834, ppl: 24.066824 +epoch: 0, batch: 26692, sum loss: 5492.279785, avg loss: 3.002887, ppl: 20.143602 +epoch: 0, batch: 26693, sum loss: 5291.012207, avg loss: 3.260020, ppl: 26.050053 +epoch: 0, batch: 26694, sum loss: 3943.717773, avg loss: 2.727329, ppl: 15.291987 +epoch: 0, batch: 26695, sum loss: 5663.341797, avg loss: 2.916242, ppl: 18.471743 +epoch: 0, batch: 26696, sum loss: 4693.768066, avg loss: 2.913574, ppl: 18.422531 +epoch: 0, batch: 26697, sum loss: 5266.024414, avg loss: 2.945204, ppl: 19.014536 +epoch: 0, batch: 26698, sum loss: 5266.793945, avg loss: 3.032121, ppl: 20.741177 +epoch: 0, batch: 26699, sum loss: 3853.245117, avg loss: 3.024525, ppl: 20.584232 +epoch: 0, batch: 26700, sum loss: 4129.191406, avg loss: 2.826277, ppl: 16.882498 +epoch: 0, batch: 26701, sum loss: 5426.652832, avg loss: 3.081574, ppl: 21.792686 +epoch: 0, batch: 26702, sum loss: 4883.085938, avg loss: 3.084703, ppl: 21.860983 +epoch: 0, batch: 26703, sum loss: 4236.488770, avg loss: 2.837568, ppl: 17.074186 +epoch: 0, batch: 26704, sum loss: 4124.040039, avg loss: 2.786514, ppl: 16.224356 +epoch: 0, batch: 26705, sum loss: 5055.778320, avg loss: 3.159861, ppl: 23.567327 +epoch: 0, batch: 26706, sum loss: 6057.649414, avg loss: 3.297577, ppl: 27.047035 +epoch: 0, batch: 26707, sum loss: 7787.610352, avg loss: 3.356729, ppl: 28.695162 +epoch: 0, batch: 26708, sum loss: 4574.458008, avg loss: 3.174503, ppl: 23.914923 +epoch: 0, batch: 26709, sum loss: 5417.824707, avg loss: 3.045433, ppl: 21.019121 +epoch: 0, batch: 26710, sum loss: 5137.378418, avg loss: 3.091082, ppl: 22.000872 +epoch: 0, batch: 26711, sum loss: 5123.212402, avg loss: 3.172268, ppl: 23.861534 +epoch: 0, batch: 26712, sum loss: 4620.108887, avg loss: 2.851919, ppl: 17.320992 +epoch: 0, batch: 26713, sum loss: 4781.640137, avg loss: 2.947990, ppl: 19.067593 +epoch: 0, batch: 26714, sum loss: 5252.515625, avg loss: 2.987779, ppl: 19.841564 +epoch: 0, batch: 26715, sum loss: 5197.500977, avg loss: 3.110414, ppl: 22.430325 +epoch: 0, batch: 26716, sum loss: 5677.770996, avg loss: 3.024918, ppl: 20.592318 +epoch: 0, batch: 26717, sum loss: 5327.487305, avg loss: 3.113669, ppl: 22.503456 +epoch: 0, batch: 26718, sum loss: 4977.788086, avg loss: 2.941955, ppl: 18.952866 +epoch: 0, batch: 26719, sum loss: 4543.220703, avg loss: 2.891929, ppl: 18.028059 +epoch: 0, batch: 26720, sum loss: 5702.311523, avg loss: 3.187429, ppl: 24.226072 +epoch: 0, batch: 26721, sum loss: 5503.763184, avg loss: 3.212938, ppl: 24.851995 +epoch: 0, batch: 26722, sum loss: 6168.221680, avg loss: 3.186065, ppl: 24.193033 +epoch: 0, batch: 26723, sum loss: 5993.747070, avg loss: 3.107178, ppl: 22.357866 +epoch: 0, batch: 26724, sum loss: 4124.056152, avg loss: 2.633497, ppl: 13.922368 +epoch: 0, batch: 26725, sum loss: 4660.895020, avg loss: 2.735267, ppl: 15.413861 +epoch: 0, batch: 26726, sum loss: 5532.865234, avg loss: 3.394396, ppl: 29.796646 +epoch: 0, batch: 26727, sum loss: 5865.359375, avg loss: 3.353550, ppl: 28.604103 +epoch: 0, batch: 26728, sum loss: 4899.937988, avg loss: 3.058638, ppl: 21.298525 +epoch: 0, batch: 26729, sum loss: 5383.470703, avg loss: 3.032941, ppl: 20.758194 +epoch: 0, batch: 26730, sum loss: 5539.841309, avg loss: 3.023931, ppl: 20.571997 +epoch: 0, batch: 26731, sum loss: 3684.795166, avg loss: 2.658582, ppl: 14.276038 +epoch: 0, batch: 26732, sum loss: 4857.513184, avg loss: 3.035946, ppl: 20.820658 +epoch: 0, batch: 26733, sum loss: 4562.008789, avg loss: 2.913160, ppl: 18.414900 +epoch: 0, batch: 26734, sum loss: 4724.051758, avg loss: 2.847530, ppl: 17.245127 +epoch: 0, batch: 26735, sum loss: 5565.707031, avg loss: 3.185865, ppl: 24.188211 +epoch: 0, batch: 26736, sum loss: 4612.049316, avg loss: 3.044257, ppl: 20.994431 +epoch: 0, batch: 26737, sum loss: 4732.800293, avg loss: 2.761261, ppl: 15.819772 +epoch: 0, batch: 26738, sum loss: 5083.966309, avg loss: 2.906785, ppl: 18.297867 +epoch: 0, batch: 26739, sum loss: 5971.976562, avg loss: 3.070425, ppl: 21.551060 +epoch: 0, batch: 26740, sum loss: 5756.183594, avg loss: 3.043989, ppl: 20.988804 +epoch: 0, batch: 26741, sum loss: 4547.405762, avg loss: 2.782990, ppl: 16.167292 +epoch: 0, batch: 26742, sum loss: 4964.556152, avg loss: 3.211226, ppl: 24.809496 +epoch: 0, batch: 26743, sum loss: 5948.530762, avg loss: 3.184438, ppl: 24.153715 +epoch: 0, batch: 26744, sum loss: 5659.547852, avg loss: 3.165295, ppl: 23.695736 +epoch: 0, batch: 26745, sum loss: 5403.051758, avg loss: 3.057754, ppl: 21.279715 +epoch: 0, batch: 26746, sum loss: 5776.468750, avg loss: 3.202034, ppl: 24.582474 +epoch: 0, batch: 26747, sum loss: 4807.561035, avg loss: 2.987919, ppl: 19.844336 +epoch: 0, batch: 26748, sum loss: 5247.082031, avg loss: 2.864128, ppl: 17.533754 +epoch: 0, batch: 26749, sum loss: 6007.600586, avg loss: 3.124077, ppl: 22.738899 +epoch: 0, batch: 26750, sum loss: 5734.913574, avg loss: 3.056990, ppl: 21.263460 +epoch: 0, batch: 26751, sum loss: 5087.813477, avg loss: 3.042951, ppl: 20.967024 +epoch: 0, batch: 26752, sum loss: 5112.277344, avg loss: 2.958494, ppl: 19.268930 +epoch: 0, batch: 26753, sum loss: 5441.971680, avg loss: 3.100838, ppl: 22.216570 +epoch: 0, batch: 26754, sum loss: 4854.516602, avg loss: 3.047405, ppl: 21.060627 +epoch: 0, batch: 26755, sum loss: 5268.616211, avg loss: 3.101010, ppl: 22.220390 +epoch: 0, batch: 26756, sum loss: 5530.117676, avg loss: 3.305510, ppl: 27.262436 +epoch: 0, batch: 26757, sum loss: 6597.807617, avg loss: 3.470704, ppl: 32.159363 +epoch: 0, batch: 26758, sum loss: 5682.509277, avg loss: 3.247148, ppl: 25.716892 +epoch: 0, batch: 26759, sum loss: 5823.562500, avg loss: 3.100939, ppl: 22.218796 +epoch: 0, batch: 26760, sum loss: 5725.661621, avg loss: 3.118552, ppl: 22.613611 +epoch: 0, batch: 26761, sum loss: 3930.298584, avg loss: 2.673672, ppl: 14.493096 +epoch: 0, batch: 26762, sum loss: 4768.766602, avg loss: 3.118880, ppl: 22.621019 +epoch: 0, batch: 26763, sum loss: 4230.125000, avg loss: 2.873726, ppl: 17.702864 +epoch: 0, batch: 26764, sum loss: 4751.202637, avg loss: 2.973218, ppl: 19.554750 +epoch: 0, batch: 26765, sum loss: 4880.724121, avg loss: 2.801793, ppl: 16.474165 +epoch: 0, batch: 26766, sum loss: 3952.236328, avg loss: 2.686769, ppl: 14.684148 +epoch: 0, batch: 26767, sum loss: 5956.370117, avg loss: 3.346275, ppl: 28.396769 +epoch: 0, batch: 26768, sum loss: 4527.965820, avg loss: 2.840631, ppl: 17.126570 +epoch: 0, batch: 26769, sum loss: 5082.963867, avg loss: 3.198844, ppl: 24.504198 +epoch: 0, batch: 26770, sum loss: 5709.911133, avg loss: 3.144224, ppl: 23.201668 +epoch: 0, batch: 26771, sum loss: 6596.411133, avg loss: 3.424928, ppl: 30.720432 +epoch: 0, batch: 26772, sum loss: 4213.179688, avg loss: 2.887717, ppl: 17.952286 +epoch: 0, batch: 26773, sum loss: 4760.824219, avg loss: 3.009370, ppl: 20.274630 +epoch: 0, batch: 26774, sum loss: 4908.352539, avg loss: 3.187242, ppl: 24.221527 +epoch: 0, batch: 26775, sum loss: 5179.893555, avg loss: 3.079604, ppl: 21.749781 +epoch: 0, batch: 26776, sum loss: 6016.076660, avg loss: 3.166356, ppl: 23.720896 +epoch: 0, batch: 26777, sum loss: 5699.991211, avg loss: 3.041617, ppl: 20.939083 +epoch: 0, batch: 26778, sum loss: 5229.178223, avg loss: 3.047307, ppl: 21.058552 +epoch: 0, batch: 26779, sum loss: 4749.303223, avg loss: 2.754817, ppl: 15.718157 +epoch: 0, batch: 26780, sum loss: 4789.190918, avg loss: 2.943572, ppl: 18.983526 +epoch: 0, batch: 26781, sum loss: 4860.548340, avg loss: 2.837448, ppl: 17.072140 +epoch: 0, batch: 26782, sum loss: 5082.431152, avg loss: 3.270548, ppl: 26.325758 +epoch: 0, batch: 26783, sum loss: 3828.449219, avg loss: 2.677237, ppl: 14.544854 +epoch: 0, batch: 26784, sum loss: 5997.239258, avg loss: 3.231271, ppl: 25.311808 +epoch: 0, batch: 26785, sum loss: 5750.824219, avg loss: 3.135673, ppl: 23.004108 +epoch: 0, batch: 26786, sum loss: 6253.541016, avg loss: 3.185706, ppl: 24.184361 +epoch: 0, batch: 26787, sum loss: 5522.239746, avg loss: 3.281188, ppl: 26.607365 +epoch: 0, batch: 26788, sum loss: 4251.394531, avg loss: 3.015174, ppl: 20.392632 +epoch: 0, batch: 26789, sum loss: 4730.958496, avg loss: 2.920345, ppl: 18.547682 +epoch: 0, batch: 26790, sum loss: 4949.336426, avg loss: 2.728410, ppl: 15.308535 +epoch: 0, batch: 26791, sum loss: 4774.450684, avg loss: 2.987767, ppl: 19.841318 +epoch: 0, batch: 26792, sum loss: 4527.089844, avg loss: 2.994107, ppl: 19.967525 +epoch: 0, batch: 26793, sum loss: 5179.435059, avg loss: 2.841160, ppl: 17.135637 +epoch: 0, batch: 26794, sum loss: 4913.462891, avg loss: 3.207221, ppl: 24.710327 +epoch: 0, batch: 26795, sum loss: 4890.250977, avg loss: 3.095096, ppl: 22.089352 +epoch: 0, batch: 26796, sum loss: 5562.671875, avg loss: 3.249224, ppl: 25.770340 +epoch: 0, batch: 26797, sum loss: 6086.291504, avg loss: 3.232231, ppl: 25.336123 +epoch: 0, batch: 26798, sum loss: 4912.881836, avg loss: 3.074394, ppl: 21.636770 +epoch: 0, batch: 26799, sum loss: 5370.443848, avg loss: 3.039300, ppl: 20.890623 +epoch: 0, batch: 26800, sum loss: 6980.642578, avg loss: 3.258937, ppl: 26.021858 +epoch: 0, batch: 26801, sum loss: 5732.013672, avg loss: 3.139109, ppl: 23.083300 +epoch: 0, batch: 26802, sum loss: 5505.458984, avg loss: 3.106918, ppl: 22.352057 +epoch: 0, batch: 26803, sum loss: 6305.253906, avg loss: 3.101453, ppl: 22.230225 +epoch: 0, batch: 26804, sum loss: 5376.498047, avg loss: 3.109600, ppl: 22.412073 +epoch: 0, batch: 26805, sum loss: 5876.467285, avg loss: 3.071859, ppl: 21.581995 +epoch: 0, batch: 26806, sum loss: 5587.746094, avg loss: 3.337961, ppl: 28.161646 +epoch: 0, batch: 26807, sum loss: 6301.611328, avg loss: 3.273564, ppl: 26.405283 +epoch: 0, batch: 26808, sum loss: 4590.613281, avg loss: 2.961686, ppl: 19.330538 +epoch: 0, batch: 26809, sum loss: 5113.095703, avg loss: 3.140722, ppl: 23.120554 +epoch: 0, batch: 26810, sum loss: 5781.485352, avg loss: 3.266376, ppl: 26.216160 +epoch: 0, batch: 26811, sum loss: 4194.785156, avg loss: 2.892955, ppl: 18.046564 +epoch: 0, batch: 26812, sum loss: 3868.756348, avg loss: 2.908839, ppl: 18.335506 +epoch: 0, batch: 26813, sum loss: 4471.927246, avg loss: 2.901964, ppl: 18.209883 +epoch: 0, batch: 26814, sum loss: 5100.150879, avg loss: 3.012493, ppl: 20.338047 +epoch: 0, batch: 26815, sum loss: 5162.758301, avg loss: 3.058506, ppl: 21.295723 +epoch: 0, batch: 26816, sum loss: 4215.183105, avg loss: 2.846174, ppl: 17.221762 +epoch: 0, batch: 26817, sum loss: 5823.551758, avg loss: 3.196241, ppl: 24.440495 +epoch: 0, batch: 26818, sum loss: 5264.161621, avg loss: 3.073066, ppl: 21.608046 +epoch: 0, batch: 26819, sum loss: 5144.507812, avg loss: 3.097235, ppl: 22.136663 +epoch: 0, batch: 26820, sum loss: 5285.894531, avg loss: 3.195825, ppl: 24.430323 +epoch: 0, batch: 26821, sum loss: 4984.157715, avg loss: 2.909607, ppl: 18.349594 +epoch: 0, batch: 26822, sum loss: 4785.833984, avg loss: 3.179956, ppl: 24.045700 +epoch: 0, batch: 26823, sum loss: 4344.439941, avg loss: 2.865726, ppl: 17.561794 +epoch: 0, batch: 26824, sum loss: 5312.141602, avg loss: 3.028587, ppl: 20.668005 +epoch: 0, batch: 26825, sum loss: 4292.332520, avg loss: 2.667702, ppl: 14.406824 +epoch: 0, batch: 26826, sum loss: 5050.133301, avg loss: 3.066262, ppl: 21.461525 +epoch: 0, batch: 26827, sum loss: 4839.808105, avg loss: 3.140693, ppl: 23.119888 +epoch: 0, batch: 26828, sum loss: 5272.852539, avg loss: 2.958952, ppl: 19.277761 +epoch: 0, batch: 26829, sum loss: 7005.718750, avg loss: 3.348814, ppl: 28.468950 +epoch: 0, batch: 26830, sum loss: 5624.580078, avg loss: 3.065166, ppl: 21.438030 +epoch: 0, batch: 26831, sum loss: 6195.109863, avg loss: 3.357783, ppl: 28.725439 +epoch: 0, batch: 26832, sum loss: 5155.501953, avg loss: 2.804952, ppl: 16.526281 +epoch: 0, batch: 26833, sum loss: 5004.026367, avg loss: 2.852923, ppl: 17.338383 +epoch: 0, batch: 26834, sum loss: 5979.912109, avg loss: 3.047865, ppl: 21.070320 +epoch: 0, batch: 26835, sum loss: 5751.558594, avg loss: 3.174149, ppl: 23.906473 +epoch: 0, batch: 26836, sum loss: 4435.576660, avg loss: 2.920063, ppl: 18.542465 +epoch: 0, batch: 26837, sum loss: 6018.657715, avg loss: 3.091247, ppl: 22.004501 +epoch: 0, batch: 26838, sum loss: 4968.988770, avg loss: 2.904143, ppl: 18.249603 +epoch: 0, batch: 26839, sum loss: 5112.835938, avg loss: 3.322181, ppl: 27.720730 +epoch: 0, batch: 26840, sum loss: 5308.780273, avg loss: 2.920121, ppl: 18.543535 +epoch: 0, batch: 26841, sum loss: 5635.065918, avg loss: 3.334358, ppl: 28.060375 +epoch: 0, batch: 26842, sum loss: 5304.498047, avg loss: 3.122130, ppl: 22.694660 +epoch: 0, batch: 26843, sum loss: 4835.422363, avg loss: 2.970161, ppl: 19.495062 +epoch: 0, batch: 26844, sum loss: 4344.267578, avg loss: 2.873193, ppl: 17.693424 +epoch: 0, batch: 26845, sum loss: 4964.521973, avg loss: 3.102826, ppl: 22.260775 +epoch: 0, batch: 26846, sum loss: 4941.196777, avg loss: 2.910010, ppl: 18.356983 +epoch: 0, batch: 26847, sum loss: 5421.317871, avg loss: 3.071568, ppl: 21.575712 +epoch: 0, batch: 26848, sum loss: 5178.776367, avg loss: 2.962687, ppl: 19.349892 +epoch: 0, batch: 26849, sum loss: 4418.710938, avg loss: 2.916641, ppl: 18.479113 +epoch: 0, batch: 26850, sum loss: 3741.436523, avg loss: 2.627413, ppl: 13.837929 +epoch: 0, batch: 26851, sum loss: 6134.912598, avg loss: 3.376397, ppl: 29.265129 +epoch: 0, batch: 26852, sum loss: 5187.387695, avg loss: 3.082227, ppl: 21.806908 +epoch: 0, batch: 26853, sum loss: 4553.559570, avg loss: 2.882000, ppl: 17.849932 +epoch: 0, batch: 26854, sum loss: 5732.701172, avg loss: 3.047688, ppl: 21.066582 +epoch: 0, batch: 26855, sum loss: 4516.382812, avg loss: 2.831588, ppl: 16.972393 +epoch: 0, batch: 26856, sum loss: 6659.002930, avg loss: 3.402658, ppl: 30.043865 +epoch: 0, batch: 26857, sum loss: 5033.977051, avg loss: 2.883148, ppl: 17.870449 +epoch: 0, batch: 26858, sum loss: 5603.322266, avg loss: 3.055246, ppl: 21.226414 +epoch: 0, batch: 26859, sum loss: 5019.565918, avg loss: 2.914963, ppl: 18.448126 +epoch: 0, batch: 26860, sum loss: 5848.656738, avg loss: 3.097806, ppl: 22.149292 +epoch: 0, batch: 26861, sum loss: 6386.602051, avg loss: 3.329824, ppl: 27.933418 +epoch: 0, batch: 26862, sum loss: 6085.211914, avg loss: 3.236815, ppl: 25.452520 +epoch: 0, batch: 26863, sum loss: 4581.019531, avg loss: 2.843588, ppl: 17.177280 +epoch: 0, batch: 26864, sum loss: 5035.682129, avg loss: 2.929425, ppl: 18.716875 +epoch: 0, batch: 26865, sum loss: 6005.594727, avg loss: 3.103666, ppl: 22.279486 +epoch: 0, batch: 26866, sum loss: 5066.073242, avg loss: 2.906525, ppl: 18.293121 +epoch: 0, batch: 26867, sum loss: 4531.374023, avg loss: 2.779984, ppl: 16.118763 +epoch: 0, batch: 26868, sum loss: 5306.247070, avg loss: 3.047816, ppl: 21.069275 +epoch: 0, batch: 26869, sum loss: 4677.223633, avg loss: 2.950930, ppl: 19.123724 +epoch: 0, batch: 26870, sum loss: 5570.376953, avg loss: 3.210592, ppl: 24.793760 +epoch: 0, batch: 26871, sum loss: 4497.895020, avg loss: 2.776479, ppl: 16.062359 +epoch: 0, batch: 26872, sum loss: 5753.259766, avg loss: 3.208734, ppl: 24.747736 +epoch: 0, batch: 26873, sum loss: 3975.649902, avg loss: 2.741827, ppl: 15.515313 +epoch: 0, batch: 26874, sum loss: 5129.383789, avg loss: 2.939475, ppl: 18.905914 +epoch: 0, batch: 26875, sum loss: 5757.154785, avg loss: 3.297340, ppl: 27.040606 +epoch: 0, batch: 26876, sum loss: 5364.630371, avg loss: 3.140884, ppl: 23.124304 +epoch: 0, batch: 26877, sum loss: 5417.115234, avg loss: 3.214905, ppl: 24.900932 +epoch: 0, batch: 26878, sum loss: 4653.367676, avg loss: 2.868907, ppl: 17.617756 +epoch: 0, batch: 26879, sum loss: 5884.852539, avg loss: 3.110387, ppl: 22.429726 +epoch: 0, batch: 26880, sum loss: 5089.797852, avg loss: 3.020652, ppl: 20.504648 +epoch: 0, batch: 26881, sum loss: 4529.493164, avg loss: 2.907248, ppl: 18.306358 +epoch: 0, batch: 26882, sum loss: 4900.817383, avg loss: 3.147603, ppl: 23.280190 +epoch: 0, batch: 26883, sum loss: 3757.754883, avg loss: 2.695663, ppl: 14.815345 +epoch: 0, batch: 26884, sum loss: 4278.390625, avg loss: 2.781788, ppl: 16.147873 +epoch: 0, batch: 26885, sum loss: 4926.921387, avg loss: 2.886304, ppl: 17.926935 +epoch: 0, batch: 26886, sum loss: 5004.676270, avg loss: 3.141668, ppl: 23.142427 +epoch: 0, batch: 26887, sum loss: 5974.416016, avg loss: 3.215509, ppl: 24.915976 +epoch: 0, batch: 26888, sum loss: 5558.346680, avg loss: 3.084543, ppl: 21.857481 +epoch: 0, batch: 26889, sum loss: 4500.190430, avg loss: 3.034518, ppl: 20.790958 +epoch: 0, batch: 26890, sum loss: 4335.118652, avg loss: 2.943054, ppl: 18.973707 +epoch: 0, batch: 26891, sum loss: 5427.648438, avg loss: 3.113969, ppl: 22.510218 +epoch: 0, batch: 26892, sum loss: 5524.520996, avg loss: 3.302165, ppl: 27.171389 +epoch: 0, batch: 26893, sum loss: 4488.474609, avg loss: 2.907043, ppl: 18.302601 +epoch: 0, batch: 26894, sum loss: 4432.567871, avg loss: 2.951110, ppl: 19.127180 +epoch: 0, batch: 26895, sum loss: 6561.147949, avg loss: 3.495550, ppl: 32.968430 +epoch: 0, batch: 26896, sum loss: 4650.394531, avg loss: 2.939567, ppl: 18.907650 +epoch: 0, batch: 26897, sum loss: 4639.860352, avg loss: 2.865880, ppl: 17.564512 +epoch: 0, batch: 26898, sum loss: 5989.861328, avg loss: 3.382191, ppl: 29.435205 +epoch: 0, batch: 26899, sum loss: 5181.834961, avg loss: 2.935884, ppl: 18.838144 +epoch: 0, batch: 26900, sum loss: 5231.463867, avg loss: 3.164830, ppl: 23.684711 +epoch: 0, batch: 26901, sum loss: 4852.333008, avg loss: 3.078892, ppl: 21.734297 +epoch: 0, batch: 26902, sum loss: 5193.678711, avg loss: 3.003863, ppl: 20.163275 +epoch: 0, batch: 26903, sum loss: 5554.940918, avg loss: 2.888685, ppl: 17.969660 +epoch: 0, batch: 26904, sum loss: 4502.046875, avg loss: 3.041924, ppl: 20.945494 +epoch: 0, batch: 26905, sum loss: 4521.159180, avg loss: 2.834583, ppl: 17.023293 +epoch: 0, batch: 26906, sum loss: 5806.762695, avg loss: 3.180045, ppl: 24.047844 +epoch: 0, batch: 26907, sum loss: 4176.669434, avg loss: 2.806901, ppl: 16.558531 +epoch: 0, batch: 26908, sum loss: 5836.418945, avg loss: 3.220982, ppl: 25.052706 +epoch: 0, batch: 26909, sum loss: 4344.226562, avg loss: 2.975498, ppl: 19.599377 +epoch: 0, batch: 26910, sum loss: 5468.478516, avg loss: 3.115942, ppl: 22.554667 +epoch: 0, batch: 26911, sum loss: 4108.754883, avg loss: 2.883337, ppl: 17.873810 +epoch: 0, batch: 26912, sum loss: 4314.694336, avg loss: 3.027856, ppl: 20.652897 +epoch: 0, batch: 26913, sum loss: 5858.003418, avg loss: 3.435779, ppl: 31.055605 +epoch: 0, batch: 26914, sum loss: 6117.207520, avg loss: 3.246926, ppl: 25.711172 +epoch: 0, batch: 26915, sum loss: 5470.296387, avg loss: 3.256129, ppl: 25.948895 +epoch: 0, batch: 26916, sum loss: 4519.770508, avg loss: 2.895433, ppl: 18.091328 +epoch: 0, batch: 26917, sum loss: 5598.865723, avg loss: 3.314900, ppl: 27.519640 +epoch: 0, batch: 26918, sum loss: 5214.996094, avg loss: 2.868535, ppl: 17.611195 +epoch: 0, batch: 26919, sum loss: 4815.863281, avg loss: 2.904622, ppl: 18.258343 +epoch: 0, batch: 26920, sum loss: 5309.942383, avg loss: 2.925588, ppl: 18.645189 +epoch: 0, batch: 26921, sum loss: 5888.722656, avg loss: 3.233785, ppl: 25.375526 +epoch: 0, batch: 26922, sum loss: 5094.324707, avg loss: 2.857165, ppl: 17.412090 +epoch: 0, batch: 26923, sum loss: 4362.091797, avg loss: 2.949352, ppl: 19.093578 +epoch: 0, batch: 26924, sum loss: 5491.031738, avg loss: 3.010434, ppl: 20.296209 +epoch: 0, batch: 26925, sum loss: 5985.154785, avg loss: 3.171783, ppl: 23.849970 +epoch: 0, batch: 26926, sum loss: 6342.436035, avg loss: 3.264249, ppl: 26.160465 +epoch: 0, batch: 26927, sum loss: 5896.130859, avg loss: 3.192275, ppl: 24.343737 +epoch: 0, batch: 26928, sum loss: 5920.480957, avg loss: 3.099728, ppl: 22.191921 +epoch: 0, batch: 26929, sum loss: 5697.677246, avg loss: 3.079826, ppl: 21.754608 +epoch: 0, batch: 26930, sum loss: 5283.272949, avg loss: 3.096877, ppl: 22.128742 +epoch: 0, batch: 26931, sum loss: 5400.756348, avg loss: 2.998754, ppl: 20.060526 +epoch: 0, batch: 26932, sum loss: 5721.041016, avg loss: 3.214068, ppl: 24.880091 +epoch: 0, batch: 26933, sum loss: 4992.430664, avg loss: 2.968151, ppl: 19.455919 +epoch: 0, batch: 26934, sum loss: 5850.265625, avg loss: 3.108536, ppl: 22.388254 +epoch: 0, batch: 26935, sum loss: 5409.544434, avg loss: 2.983753, ppl: 19.761848 +epoch: 0, batch: 26936, sum loss: 5580.779297, avg loss: 2.955921, ppl: 19.219419 +epoch: 0, batch: 26937, sum loss: 5014.382812, avg loss: 3.015263, ppl: 20.394461 +epoch: 0, batch: 26938, sum loss: 4997.207520, avg loss: 3.039664, ppl: 20.898220 +epoch: 0, batch: 26939, sum loss: 4910.999512, avg loss: 2.987226, ppl: 19.830597 +epoch: 0, batch: 26940, sum loss: 5113.393555, avg loss: 3.087798, ppl: 21.928736 +epoch: 0, batch: 26941, sum loss: 6033.165527, avg loss: 3.134112, ppl: 22.968227 +epoch: 0, batch: 26942, sum loss: 5229.497070, avg loss: 3.125820, ppl: 22.778570 +epoch: 0, batch: 26943, sum loss: 4773.033203, avg loss: 2.940871, ppl: 18.932325 +epoch: 0, batch: 26944, sum loss: 4962.455566, avg loss: 3.050065, ppl: 21.116713 +epoch: 0, batch: 26945, sum loss: 6077.242676, avg loss: 3.178474, ppl: 24.010090 +epoch: 0, batch: 26946, sum loss: 5123.666992, avg loss: 3.101493, ppl: 22.231125 +epoch: 0, batch: 26947, sum loss: 4487.939453, avg loss: 2.987976, ppl: 19.845467 +epoch: 0, batch: 26948, sum loss: 6174.719238, avg loss: 3.241323, ppl: 25.567513 +epoch: 0, batch: 26949, sum loss: 5762.854004, avg loss: 3.178629, ppl: 24.013805 +epoch: 0, batch: 26950, sum loss: 4578.567383, avg loss: 3.018172, ppl: 20.453873 +epoch: 0, batch: 26951, sum loss: 4835.660156, avg loss: 3.056675, ppl: 21.256760 +epoch: 0, batch: 26952, sum loss: 5671.947266, avg loss: 3.087614, ppl: 21.924709 +epoch: 0, batch: 26953, sum loss: 5555.985352, avg loss: 3.254824, ppl: 25.915064 +epoch: 0, batch: 26954, sum loss: 5354.629395, avg loss: 2.986408, ppl: 19.814381 +epoch: 0, batch: 26955, sum loss: 4580.320312, avg loss: 3.120109, ppl: 22.648849 +epoch: 0, batch: 26956, sum loss: 5042.987305, avg loss: 3.249348, ppl: 25.773521 +epoch: 0, batch: 26957, sum loss: 4840.902832, avg loss: 3.008641, ppl: 20.259838 +epoch: 0, batch: 26958, sum loss: 4977.326660, avg loss: 3.188550, ppl: 24.253241 +epoch: 0, batch: 26959, sum loss: 5584.410645, avg loss: 3.021867, ppl: 20.529591 +epoch: 0, batch: 26960, sum loss: 4246.994141, avg loss: 2.833218, ppl: 17.000084 +epoch: 0, batch: 26961, sum loss: 5618.538574, avg loss: 2.856400, ppl: 17.398775 +epoch: 0, batch: 26962, sum loss: 5551.074219, avg loss: 3.154019, ppl: 23.430050 +epoch: 0, batch: 26963, sum loss: 5462.425293, avg loss: 3.185087, ppl: 24.169401 +epoch: 0, batch: 26964, sum loss: 5273.686523, avg loss: 3.004950, ppl: 20.185198 +epoch: 0, batch: 26965, sum loss: 5188.270508, avg loss: 3.159726, ppl: 23.564148 +epoch: 0, batch: 26966, sum loss: 4883.117188, avg loss: 3.248914, ppl: 25.762335 +epoch: 0, batch: 26967, sum loss: 5748.840820, avg loss: 3.294465, ppl: 26.962975 +epoch: 0, batch: 26968, sum loss: 4972.348633, avg loss: 2.870871, ppl: 17.652388 +epoch: 0, batch: 26969, sum loss: 5828.798340, avg loss: 3.206160, ppl: 24.684107 +epoch: 0, batch: 26970, sum loss: 4533.803223, avg loss: 3.006501, ppl: 20.216534 +epoch: 0, batch: 26971, sum loss: 5943.577148, avg loss: 3.089177, ppl: 21.959007 +epoch: 0, batch: 26972, sum loss: 5492.928223, avg loss: 3.041489, ppl: 20.936386 +epoch: 0, batch: 26973, sum loss: 5448.782715, avg loss: 3.177133, ppl: 23.977901 +epoch: 0, batch: 26974, sum loss: 4184.556641, avg loss: 2.749380, ppl: 15.632942 +epoch: 0, batch: 26975, sum loss: 4930.826172, avg loss: 3.079842, ppl: 21.754957 +epoch: 0, batch: 26976, sum loss: 6376.503418, avg loss: 3.225343, ppl: 25.162209 +epoch: 0, batch: 26977, sum loss: 4713.691406, avg loss: 3.362119, ppl: 28.850273 +epoch: 0, batch: 26978, sum loss: 4931.381348, avg loss: 2.933600, ppl: 18.795166 +epoch: 0, batch: 26979, sum loss: 5940.963867, avg loss: 3.057624, ppl: 21.276951 +epoch: 0, batch: 26980, sum loss: 4470.615723, avg loss: 2.858450, ppl: 17.434481 +epoch: 0, batch: 26981, sum loss: 4603.264160, avg loss: 2.877040, ppl: 17.761623 +epoch: 0, batch: 26982, sum loss: 5226.925293, avg loss: 3.146854, ppl: 23.262774 +epoch: 0, batch: 26983, sum loss: 3999.488037, avg loss: 2.810603, ppl: 16.619936 +epoch: 0, batch: 26984, sum loss: 6190.921875, avg loss: 3.373800, ppl: 29.189222 +epoch: 0, batch: 26985, sum loss: 4915.703125, avg loss: 3.127038, ppl: 22.806316 +epoch: 0, batch: 26986, sum loss: 5485.108398, avg loss: 3.342540, ppl: 28.290895 +epoch: 0, batch: 26987, sum loss: 4953.166992, avg loss: 3.146866, ppl: 23.263041 +epoch: 0, batch: 26988, sum loss: 4154.372559, avg loss: 2.923556, ppl: 18.607330 +epoch: 0, batch: 26989, sum loss: 4805.176270, avg loss: 2.926417, ppl: 18.660646 +epoch: 0, batch: 26990, sum loss: 5277.455078, avg loss: 3.100737, ppl: 22.214325 +epoch: 0, batch: 26991, sum loss: 5046.090820, avg loss: 3.036156, ppl: 20.825031 +epoch: 0, batch: 26992, sum loss: 4797.766602, avg loss: 2.807353, ppl: 16.566013 +epoch: 0, batch: 26993, sum loss: 6816.575684, avg loss: 3.526423, ppl: 34.002132 +epoch: 0, batch: 26994, sum loss: 6719.702637, avg loss: 3.227523, ppl: 25.217119 +epoch: 0, batch: 26995, sum loss: 4662.166016, avg loss: 2.822134, ppl: 16.812698 +epoch: 0, batch: 26996, sum loss: 4058.463379, avg loss: 2.860087, ppl: 17.463049 +epoch: 0, batch: 26997, sum loss: 5019.354980, avg loss: 3.068066, ppl: 21.500273 +epoch: 0, batch: 26998, sum loss: 5494.114746, avg loss: 3.069338, ppl: 21.527643 +epoch: 0, batch: 26999, sum loss: 5735.284180, avg loss: 3.012229, ppl: 20.332676 +epoch: 0, batch: 27000, sum loss: 5047.076660, avg loss: 2.934347, ppl: 18.809216 +epoch: 0, batch: 27001, sum loss: 5570.833984, avg loss: 3.034223, ppl: 20.784828 +epoch: 0, batch: 27002, sum loss: 4770.688965, avg loss: 2.824564, ppl: 16.853600 +epoch: 0, batch: 27003, sum loss: 6358.149414, avg loss: 3.275708, ppl: 26.461952 +epoch: 0, batch: 27004, sum loss: 5601.615234, avg loss: 3.211935, ppl: 24.827087 +epoch: 0, batch: 27005, sum loss: 4648.994629, avg loss: 2.931270, ppl: 18.751436 +epoch: 0, batch: 27006, sum loss: 6428.441406, avg loss: 3.165161, ppl: 23.692551 +epoch: 0, batch: 27007, sum loss: 4821.343750, avg loss: 3.070920, ppl: 21.561720 +epoch: 0, batch: 27008, sum loss: 5125.915039, avg loss: 3.356853, ppl: 28.698740 +epoch: 0, batch: 27009, sum loss: 5536.302734, avg loss: 3.183613, ppl: 24.133787 +epoch: 0, batch: 27010, sum loss: 5337.436035, avg loss: 3.048222, ppl: 21.077831 +epoch: 0, batch: 27011, sum loss: 4936.939453, avg loss: 2.966911, ppl: 19.431799 +epoch: 0, batch: 27012, sum loss: 4383.809570, avg loss: 2.795797, ppl: 16.375673 +epoch: 0, batch: 27013, sum loss: 4614.127441, avg loss: 2.922183, ppl: 18.581812 +epoch: 0, batch: 27014, sum loss: 4878.202637, avg loss: 2.974514, ppl: 19.580101 +epoch: 0, batch: 27015, sum loss: 3716.948975, avg loss: 2.459927, ppl: 11.703953 +epoch: 0, batch: 27016, sum loss: 5507.683105, avg loss: 3.342041, ppl: 28.276775 +epoch: 0, batch: 27017, sum loss: 5503.335449, avg loss: 3.168299, ppl: 23.767027 +epoch: 0, batch: 27018, sum loss: 5671.915039, avg loss: 3.152815, ppl: 23.401855 +epoch: 0, batch: 27019, sum loss: 5191.639648, avg loss: 3.271355, ppl: 26.347008 +epoch: 0, batch: 27020, sum loss: 5597.339844, avg loss: 3.006090, ppl: 20.208235 +epoch: 0, batch: 27021, sum loss: 5969.524902, avg loss: 3.333068, ppl: 28.024185 +epoch: 0, batch: 27022, sum loss: 5327.787109, avg loss: 3.225053, ppl: 25.154903 +epoch: 0, batch: 27023, sum loss: 5535.902344, avg loss: 3.246864, ppl: 25.709578 +epoch: 0, batch: 27024, sum loss: 5167.748047, avg loss: 3.124394, ppl: 22.746117 +epoch: 0, batch: 27025, sum loss: 5086.545898, avg loss: 3.018722, ppl: 20.465113 +epoch: 0, batch: 27026, sum loss: 6296.390137, avg loss: 3.281079, ppl: 26.604452 +epoch: 0, batch: 27027, sum loss: 6568.194336, avg loss: 3.399687, ppl: 29.954710 +epoch: 0, batch: 27028, sum loss: 4535.016602, avg loss: 3.118994, ppl: 22.623598 +epoch: 0, batch: 27029, sum loss: 6968.213867, avg loss: 3.343673, ppl: 28.322960 +epoch: 0, batch: 27030, sum loss: 3327.039551, avg loss: 2.577103, ppl: 13.158957 +epoch: 0, batch: 27031, sum loss: 5757.446777, avg loss: 3.134157, ppl: 22.969275 +epoch: 0, batch: 27032, sum loss: 5445.658203, avg loss: 2.935665, ppl: 18.834021 +epoch: 0, batch: 27033, sum loss: 5358.453125, avg loss: 3.187658, ppl: 24.231606 +epoch: 0, batch: 27034, sum loss: 4407.112793, avg loss: 2.712069, ppl: 15.060408 +epoch: 0, batch: 27035, sum loss: 5835.423828, avg loss: 3.181801, ppl: 24.090109 +epoch: 0, batch: 27036, sum loss: 4931.729980, avg loss: 3.051813, ppl: 21.153664 +epoch: 0, batch: 27037, sum loss: 5084.524414, avg loss: 3.111704, ppl: 22.459286 +epoch: 0, batch: 27038, sum loss: 5287.563965, avg loss: 3.068813, ppl: 21.516338 +epoch: 0, batch: 27039, sum loss: 4629.844727, avg loss: 2.859694, ppl: 17.456188 +epoch: 0, batch: 27040, sum loss: 5841.430176, avg loss: 3.171243, ppl: 23.837105 +epoch: 0, batch: 27041, sum loss: 5294.359375, avg loss: 2.923445, ppl: 18.605282 +epoch: 0, batch: 27042, sum loss: 6147.224609, avg loss: 3.190049, ppl: 24.289621 +epoch: 0, batch: 27043, sum loss: 4484.640137, avg loss: 3.067469, ppl: 21.487457 +epoch: 0, batch: 27044, sum loss: 4826.778320, avg loss: 3.041448, ppl: 20.935543 +epoch: 0, batch: 27045, sum loss: 4763.604492, avg loss: 2.979115, ppl: 19.670397 +epoch: 0, batch: 27046, sum loss: 5016.898926, avg loss: 3.143420, ppl: 23.183027 +epoch: 0, batch: 27047, sum loss: 5297.595215, avg loss: 2.763482, ppl: 15.854959 +epoch: 0, batch: 27048, sum loss: 5267.431641, avg loss: 2.913403, ppl: 18.419365 +epoch: 0, batch: 27049, sum loss: 4403.937988, avg loss: 2.716803, ppl: 15.131869 +epoch: 0, batch: 27050, sum loss: 5139.879395, avg loss: 3.079616, ppl: 21.750055 +epoch: 0, batch: 27051, sum loss: 5609.659668, avg loss: 3.276670, ppl: 26.487429 +epoch: 0, batch: 27052, sum loss: 4886.347168, avg loss: 3.050154, ppl: 21.118601 +epoch: 0, batch: 27053, sum loss: 4306.909668, avg loss: 3.022393, ppl: 20.540377 +epoch: 0, batch: 27054, sum loss: 4922.958984, avg loss: 2.987232, ppl: 19.830725 +epoch: 0, batch: 27055, sum loss: 6111.911621, avg loss: 3.147225, ppl: 23.271406 +epoch: 0, batch: 27056, sum loss: 4410.941406, avg loss: 2.928912, ppl: 18.707270 +epoch: 0, batch: 27057, sum loss: 4879.395996, avg loss: 2.937625, ppl: 18.870983 +epoch: 0, batch: 27058, sum loss: 5253.430664, avg loss: 2.759155, ppl: 15.786494 +epoch: 0, batch: 27059, sum loss: 5075.355469, avg loss: 3.085322, ppl: 21.874517 +epoch: 0, batch: 27060, sum loss: 4126.465820, avg loss: 2.762025, ppl: 15.831876 +epoch: 0, batch: 27061, sum loss: 4389.555176, avg loss: 2.926370, ppl: 18.659775 +epoch: 0, batch: 27062, sum loss: 5258.648438, avg loss: 3.030921, ppl: 20.716309 +epoch: 0, batch: 27063, sum loss: 6079.629883, avg loss: 3.353354, ppl: 28.598484 +epoch: 0, batch: 27064, sum loss: 4939.746094, avg loss: 3.026805, ppl: 20.631218 +epoch: 0, batch: 27065, sum loss: 5506.564941, avg loss: 3.121635, ppl: 22.683447 +epoch: 0, batch: 27066, sum loss: 4597.161621, avg loss: 2.813441, ppl: 16.667164 +epoch: 0, batch: 27067, sum loss: 5638.867676, avg loss: 3.017051, ppl: 20.430943 +epoch: 0, batch: 27068, sum loss: 5355.308105, avg loss: 3.131759, ppl: 22.914253 +epoch: 0, batch: 27069, sum loss: 4561.693359, avg loss: 2.865385, ppl: 17.555817 +epoch: 0, batch: 27070, sum loss: 4782.149414, avg loss: 2.957421, ppl: 19.248262 +epoch: 0, batch: 27071, sum loss: 5241.641602, avg loss: 3.008979, ppl: 20.266699 +epoch: 0, batch: 27072, sum loss: 5503.459473, avg loss: 3.030539, ppl: 20.708397 +epoch: 0, batch: 27073, sum loss: 5618.424805, avg loss: 3.001296, ppl: 20.111576 +epoch: 0, batch: 27074, sum loss: 5166.501953, avg loss: 3.163810, ppl: 23.660572 +epoch: 0, batch: 27075, sum loss: 4620.473145, avg loss: 2.911451, ppl: 18.383459 +epoch: 0, batch: 27076, sum loss: 4809.820801, avg loss: 2.970859, ppl: 19.508671 +epoch: 0, batch: 27077, sum loss: 5367.706543, avg loss: 3.044643, ppl: 21.002541 +epoch: 0, batch: 27078, sum loss: 4393.891602, avg loss: 2.758250, ppl: 15.772210 +epoch: 0, batch: 27079, sum loss: 4930.011719, avg loss: 2.924088, ppl: 18.617235 +epoch: 0, batch: 27080, sum loss: 5068.934082, avg loss: 3.184004, ppl: 24.143225 +epoch: 0, batch: 27081, sum loss: 5112.311035, avg loss: 2.970547, ppl: 19.502579 +epoch: 0, batch: 27082, sum loss: 5376.850586, avg loss: 2.920614, ppl: 18.552679 +epoch: 0, batch: 27083, sum loss: 5166.407227, avg loss: 3.078908, ppl: 21.734650 +epoch: 0, batch: 27084, sum loss: 5348.049805, avg loss: 3.116579, ppl: 22.569046 +epoch: 0, batch: 27085, sum loss: 5087.262695, avg loss: 3.153914, ppl: 23.427574 +epoch: 0, batch: 27086, sum loss: 4854.249023, avg loss: 3.007589, ppl: 20.238552 +epoch: 0, batch: 27087, sum loss: 5701.627930, avg loss: 3.349958, ppl: 28.501528 +epoch: 0, batch: 27088, sum loss: 5146.487793, avg loss: 3.039863, ppl: 20.902376 +epoch: 0, batch: 27089, sum loss: 5569.210938, avg loss: 3.171533, ppl: 23.844000 +epoch: 0, batch: 27090, sum loss: 4179.057617, avg loss: 2.912235, ppl: 18.397877 +epoch: 0, batch: 27091, sum loss: 6023.193359, avg loss: 3.327731, ppl: 27.875032 +epoch: 0, batch: 27092, sum loss: 4731.143555, avg loss: 2.933133, ppl: 18.786398 +epoch: 0, batch: 27093, sum loss: 4753.443848, avg loss: 2.786309, ppl: 16.221041 +epoch: 0, batch: 27094, sum loss: 4726.975098, avg loss: 3.005070, ppl: 20.187628 +epoch: 0, batch: 27095, sum loss: 4659.038574, avg loss: 3.197693, ppl: 24.475996 +epoch: 0, batch: 27096, sum loss: 4784.079590, avg loss: 2.834171, ppl: 17.016281 +epoch: 0, batch: 27097, sum loss: 4324.549805, avg loss: 2.644984, ppl: 14.083226 +epoch: 0, batch: 27098, sum loss: 4726.886230, avg loss: 2.847522, ppl: 17.244991 +epoch: 0, batch: 27099, sum loss: 4852.830566, avg loss: 3.036815, ppl: 20.838770 +epoch: 0, batch: 27100, sum loss: 5197.156738, avg loss: 3.250254, ppl: 25.796896 +epoch: 0, batch: 27101, sum loss: 4993.751953, avg loss: 3.123047, ppl: 22.715485 +epoch: 0, batch: 27102, sum loss: 5356.330566, avg loss: 3.017651, ppl: 20.443211 +epoch: 0, batch: 27103, sum loss: 4633.645996, avg loss: 2.811678, ppl: 16.637819 +epoch: 0, batch: 27104, sum loss: 5802.393066, avg loss: 3.101226, ppl: 22.225174 +epoch: 0, batch: 27105, sum loss: 4472.700195, avg loss: 2.830823, ppl: 16.959412 +epoch: 0, batch: 27106, sum loss: 4429.371582, avg loss: 2.720744, ppl: 15.191623 +epoch: 0, batch: 27107, sum loss: 6743.540039, avg loss: 3.189943, ppl: 24.287050 +epoch: 0, batch: 27108, sum loss: 3864.046387, avg loss: 2.661189, ppl: 14.313298 +epoch: 0, batch: 27109, sum loss: 5146.117676, avg loss: 3.088906, ppl: 21.953049 +epoch: 0, batch: 27110, sum loss: 4456.685059, avg loss: 3.011274, ppl: 20.313259 +epoch: 0, batch: 27111, sum loss: 4935.110352, avg loss: 3.044485, ppl: 20.999216 +epoch: 0, batch: 27112, sum loss: 5502.566895, avg loss: 3.177002, ppl: 23.974762 +epoch: 0, batch: 27113, sum loss: 4101.088867, avg loss: 2.647572, ppl: 14.119712 +epoch: 0, batch: 27114, sum loss: 4466.117676, avg loss: 2.798319, ppl: 16.417032 +epoch: 0, batch: 27115, sum loss: 5361.789062, avg loss: 3.182071, ppl: 24.096600 +epoch: 0, batch: 27116, sum loss: 5254.515137, avg loss: 2.992321, ppl: 19.931887 +epoch: 0, batch: 27117, sum loss: 4694.781250, avg loss: 2.792850, ppl: 16.327492 +epoch: 0, batch: 27118, sum loss: 4173.738770, avg loss: 2.765897, ppl: 15.893294 +epoch: 0, batch: 27119, sum loss: 5034.236816, avg loss: 2.752453, ppl: 15.681055 +epoch: 0, batch: 27120, sum loss: 5325.661621, avg loss: 3.200518, ppl: 24.545238 +epoch: 0, batch: 27121, sum loss: 4874.569336, avg loss: 3.046606, ppl: 21.043797 +epoch: 0, batch: 27122, sum loss: 6281.873047, avg loss: 3.159896, ppl: 23.568142 +epoch: 0, batch: 27123, sum loss: 4895.666992, avg loss: 2.972475, ppl: 19.540228 +epoch: 0, batch: 27124, sum loss: 5388.772461, avg loss: 3.256056, ppl: 25.947002 +epoch: 0, batch: 27125, sum loss: 5502.808594, avg loss: 3.033522, ppl: 20.770254 +epoch: 0, batch: 27126, sum loss: 4412.077148, avg loss: 2.787162, ppl: 16.234873 +epoch: 0, batch: 27127, sum loss: 4707.713867, avg loss: 3.080965, ppl: 21.779400 +epoch: 0, batch: 27128, sum loss: 4276.097656, avg loss: 2.826238, ppl: 16.881830 +epoch: 0, batch: 27129, sum loss: 4355.458984, avg loss: 2.670423, ppl: 14.446073 +epoch: 0, batch: 27130, sum loss: 5303.648926, avg loss: 3.233932, ppl: 25.379259 +epoch: 0, batch: 27131, sum loss: 4685.002441, avg loss: 2.948397, ppl: 19.075346 +epoch: 0, batch: 27132, sum loss: 4681.128906, avg loss: 3.012310, ppl: 20.334309 +epoch: 0, batch: 27133, sum loss: 4684.584473, avg loss: 2.935203, ppl: 18.825331 +epoch: 0, batch: 27134, sum loss: 4566.735840, avg loss: 2.998513, ppl: 20.055700 +epoch: 0, batch: 27135, sum loss: 5612.630859, avg loss: 3.028943, ppl: 20.675358 +epoch: 0, batch: 27136, sum loss: 4653.262695, avg loss: 2.971432, ppl: 19.519852 +epoch: 0, batch: 27137, sum loss: 6216.953613, avg loss: 3.280714, ppl: 26.594763 +epoch: 0, batch: 27138, sum loss: 5066.879883, avg loss: 3.030431, ppl: 20.706150 +epoch: 0, batch: 27139, sum loss: 5266.747070, avg loss: 3.026866, ppl: 20.632473 +epoch: 0, batch: 27140, sum loss: 5571.396484, avg loss: 3.312364, ppl: 27.449944 +epoch: 0, batch: 27141, sum loss: 5283.671875, avg loss: 3.141303, ppl: 23.133993 +epoch: 0, batch: 27142, sum loss: 5516.456543, avg loss: 2.946825, ppl: 19.045393 +epoch: 0, batch: 27143, sum loss: 5742.614746, avg loss: 2.995626, ppl: 19.997873 +epoch: 0, batch: 27144, sum loss: 4812.983398, avg loss: 2.892418, ppl: 18.036869 +epoch: 0, batch: 27145, sum loss: 5116.501465, avg loss: 2.927061, ppl: 18.672663 +epoch: 0, batch: 27146, sum loss: 4794.760254, avg loss: 2.934370, ppl: 18.809645 +epoch: 0, batch: 27147, sum loss: 5556.177734, avg loss: 2.963295, ppl: 19.361658 +epoch: 0, batch: 27148, sum loss: 4480.639648, avg loss: 2.965347, ppl: 19.401440 +epoch: 0, batch: 27149, sum loss: 6192.160156, avg loss: 3.329118, ppl: 27.913725 +epoch: 0, batch: 27150, sum loss: 5406.085938, avg loss: 3.163304, ppl: 23.648592 +epoch: 0, batch: 27151, sum loss: 5886.385254, avg loss: 3.168130, ppl: 23.762999 +epoch: 0, batch: 27152, sum loss: 5117.174316, avg loss: 3.145159, ppl: 23.223373 +epoch: 0, batch: 27153, sum loss: 4877.895508, avg loss: 3.022240, ppl: 20.537247 +epoch: 0, batch: 27154, sum loss: 5389.953613, avg loss: 3.148337, ppl: 23.297297 +epoch: 0, batch: 27155, sum loss: 4598.250488, avg loss: 2.843693, ppl: 17.179083 +epoch: 0, batch: 27156, sum loss: 5076.491699, avg loss: 3.027127, ppl: 20.637850 +epoch: 0, batch: 27157, sum loss: 4736.162598, avg loss: 2.875630, ppl: 17.736593 +epoch: 0, batch: 27158, sum loss: 4783.293457, avg loss: 2.970990, ppl: 19.511221 +epoch: 0, batch: 27159, sum loss: 5377.616211, avg loss: 3.121077, ppl: 22.670790 +epoch: 0, batch: 27160, sum loss: 5948.739258, avg loss: 3.088650, ppl: 21.947424 +epoch: 0, batch: 27161, sum loss: 4535.857910, avg loss: 2.988049, ppl: 19.846920 +epoch: 0, batch: 27162, sum loss: 5691.684570, avg loss: 3.221101, ppl: 25.055681 +epoch: 0, batch: 27163, sum loss: 4553.862305, avg loss: 2.811026, ppl: 16.626974 +epoch: 0, batch: 27164, sum loss: 5552.398926, avg loss: 3.074418, ppl: 21.637287 +epoch: 0, batch: 27165, sum loss: 5288.604492, avg loss: 3.067636, ppl: 21.491043 +epoch: 0, batch: 27166, sum loss: 4839.663086, avg loss: 2.956422, ppl: 19.229053 +epoch: 0, batch: 27167, sum loss: 4723.768555, avg loss: 3.039748, ppl: 20.899981 +epoch: 0, batch: 27168, sum loss: 5846.267090, avg loss: 3.379345, ppl: 29.351545 +epoch: 0, batch: 27169, sum loss: 4841.293457, avg loss: 3.029595, ppl: 20.688860 +epoch: 0, batch: 27170, sum loss: 5258.871094, avg loss: 3.128418, ppl: 22.837826 +epoch: 0, batch: 27171, sum loss: 5859.609375, avg loss: 3.170784, ppl: 23.826162 +epoch: 0, batch: 27172, sum loss: 5327.614746, avg loss: 3.244589, ppl: 25.651161 +epoch: 0, batch: 27173, sum loss: 5453.091797, avg loss: 3.100109, ppl: 22.200373 +epoch: 0, batch: 27174, sum loss: 4760.893555, avg loss: 2.958915, ppl: 19.277035 +epoch: 0, batch: 27175, sum loss: 4916.260742, avg loss: 3.027254, ppl: 20.640478 +epoch: 0, batch: 27176, sum loss: 5891.391602, avg loss: 3.349285, ppl: 28.482351 +epoch: 0, batch: 27177, sum loss: 5835.119629, avg loss: 3.112064, ppl: 22.467367 +epoch: 0, batch: 27178, sum loss: 5023.281738, avg loss: 3.163276, ppl: 23.647934 +epoch: 0, batch: 27179, sum loss: 4596.005371, avg loss: 3.023688, ppl: 20.566999 +epoch: 0, batch: 27180, sum loss: 5815.364258, avg loss: 3.157093, ppl: 23.502165 +epoch: 0, batch: 27181, sum loss: 4278.546875, avg loss: 2.985727, ppl: 19.800898 +epoch: 0, batch: 27182, sum loss: 5634.838379, avg loss: 3.121794, ppl: 22.687044 +epoch: 0, batch: 27183, sum loss: 5555.940430, avg loss: 3.264360, ppl: 26.163353 +epoch: 0, batch: 27184, sum loss: 5132.338867, avg loss: 3.236027, ppl: 25.432478 +epoch: 0, batch: 27185, sum loss: 6076.194336, avg loss: 3.136910, ppl: 23.032579 +epoch: 0, batch: 27186, sum loss: 5605.857422, avg loss: 3.064985, ppl: 21.434141 +epoch: 0, batch: 27187, sum loss: 5698.322266, avg loss: 3.018179, ppl: 20.454014 +epoch: 0, batch: 27188, sum loss: 3814.351562, avg loss: 2.625156, ppl: 13.806726 +epoch: 0, batch: 27189, sum loss: 4962.271973, avg loss: 2.806715, ppl: 16.555445 +epoch: 0, batch: 27190, sum loss: 5301.319336, avg loss: 3.138733, ppl: 23.074606 +epoch: 0, batch: 27191, sum loss: 4708.136230, avg loss: 3.067190, ppl: 21.481449 +epoch: 0, batch: 27192, sum loss: 4184.764648, avg loss: 2.760399, ppl: 15.806147 +epoch: 0, batch: 27193, sum loss: 4984.966309, avg loss: 3.153046, ppl: 23.407257 +epoch: 0, batch: 27194, sum loss: 4813.319824, avg loss: 2.811518, ppl: 16.635159 +epoch: 0, batch: 27195, sum loss: 5931.271484, avg loss: 3.334048, ppl: 28.051666 +epoch: 0, batch: 27196, sum loss: 4889.329102, avg loss: 3.018105, ppl: 20.452488 +epoch: 0, batch: 27197, sum loss: 4789.717285, avg loss: 2.835830, ppl: 17.044544 +epoch: 0, batch: 27198, sum loss: 5611.597656, avg loss: 3.031657, ppl: 20.731565 +epoch: 0, batch: 27199, sum loss: 5222.789062, avg loss: 3.133047, ppl: 22.943775 +epoch: 0, batch: 27200, sum loss: 4797.530762, avg loss: 2.879670, ppl: 17.808397 +epoch: 0, batch: 27201, sum loss: 4942.250977, avg loss: 2.954125, ppl: 19.184927 +epoch: 0, batch: 27202, sum loss: 5096.027344, avg loss: 3.001194, ppl: 20.109533 +epoch: 0, batch: 27203, sum loss: 4868.991211, avg loss: 3.007407, ppl: 20.234858 +epoch: 0, batch: 27204, sum loss: 4424.169922, avg loss: 2.794801, ppl: 16.359369 +epoch: 0, batch: 27205, sum loss: 5971.331543, avg loss: 3.111689, ppl: 22.458948 +epoch: 0, batch: 27206, sum loss: 4091.677734, avg loss: 2.798685, ppl: 16.423042 +epoch: 0, batch: 27207, sum loss: 5496.209961, avg loss: 2.966114, ppl: 19.416327 +epoch: 0, batch: 27208, sum loss: 4762.678711, avg loss: 2.941741, ppl: 18.948807 +epoch: 0, batch: 27209, sum loss: 6420.289062, avg loss: 3.202139, ppl: 24.585064 +epoch: 0, batch: 27210, sum loss: 5254.612793, avg loss: 3.009515, ppl: 20.277559 +epoch: 0, batch: 27211, sum loss: 5047.791016, avg loss: 3.076046, ppl: 21.672529 +epoch: 0, batch: 27212, sum loss: 5502.458984, avg loss: 3.189831, ppl: 24.284334 +epoch: 0, batch: 27213, sum loss: 5572.832520, avg loss: 3.223154, ppl: 25.107174 +epoch: 0, batch: 27214, sum loss: 5235.199707, avg loss: 3.188307, ppl: 24.247337 +epoch: 0, batch: 27215, sum loss: 5280.924805, avg loss: 3.110085, ppl: 22.422960 +epoch: 0, batch: 27216, sum loss: 4673.750977, avg loss: 3.060741, ppl: 21.343361 +epoch: 0, batch: 27217, sum loss: 4354.818359, avg loss: 2.914872, ppl: 18.446445 +epoch: 0, batch: 27218, sum loss: 5006.284668, avg loss: 2.960547, ppl: 19.308531 +epoch: 0, batch: 27219, sum loss: 5824.362305, avg loss: 3.250202, ppl: 25.795561 +epoch: 0, batch: 27220, sum loss: 5730.619629, avg loss: 2.952406, ppl: 19.151972 +epoch: 0, batch: 27221, sum loss: 4638.801758, avg loss: 2.943402, ppl: 18.980309 +epoch: 0, batch: 27222, sum loss: 6107.384277, avg loss: 3.179274, ppl: 24.029293 +epoch: 0, batch: 27223, sum loss: 5129.080078, avg loss: 3.269012, ppl: 26.285362 +epoch: 0, batch: 27224, sum loss: 4618.641113, avg loss: 2.866940, ppl: 17.583139 +epoch: 0, batch: 27225, sum loss: 5501.832520, avg loss: 3.531343, ppl: 34.169834 +epoch: 0, batch: 27226, sum loss: 6666.605957, avg loss: 3.527305, ppl: 34.032124 +epoch: 0, batch: 27227, sum loss: 4792.866211, avg loss: 3.110231, ppl: 22.426228 +epoch: 0, batch: 27228, sum loss: 4938.237793, avg loss: 2.989248, ppl: 19.870739 +epoch: 0, batch: 27229, sum loss: 5892.926758, avg loss: 3.017372, ppl: 20.437504 +epoch: 0, batch: 27230, sum loss: 5083.174316, avg loss: 3.209075, ppl: 24.756170 +epoch: 0, batch: 27231, sum loss: 4459.496582, avg loss: 2.957226, ppl: 19.244509 +epoch: 0, batch: 27232, sum loss: 6243.797363, avg loss: 3.201947, ppl: 24.580347 +epoch: 0, batch: 27233, sum loss: 4422.565918, avg loss: 2.919185, ppl: 18.526190 +epoch: 0, batch: 27234, sum loss: 5258.936523, avg loss: 2.918389, ppl: 18.511438 +epoch: 0, batch: 27235, sum loss: 5878.897461, avg loss: 3.355535, ppl: 28.660942 +epoch: 0, batch: 27236, sum loss: 4794.001953, avg loss: 2.998125, ppl: 20.047913 +epoch: 0, batch: 27237, sum loss: 4867.044434, avg loss: 3.202003, ppl: 24.581718 +epoch: 0, batch: 27238, sum loss: 4999.198242, avg loss: 2.949379, ppl: 19.094101 +epoch: 0, batch: 27239, sum loss: 5945.062500, avg loss: 3.347445, ppl: 28.430010 +epoch: 0, batch: 27240, sum loss: 6080.551270, avg loss: 3.178542, ppl: 24.011723 +epoch: 0, batch: 27241, sum loss: 4350.755371, avg loss: 2.937715, ppl: 18.872665 +epoch: 0, batch: 27242, sum loss: 5218.707031, avg loss: 2.950089, ppl: 19.107655 +epoch: 0, batch: 27243, sum loss: 5082.601074, avg loss: 3.149071, ppl: 23.314405 +epoch: 0, batch: 27244, sum loss: 4675.571289, avg loss: 3.076034, ppl: 21.672276 +epoch: 0, batch: 27245, sum loss: 4842.120117, avg loss: 2.934618, ppl: 18.814320 +epoch: 0, batch: 27246, sum loss: 4729.006348, avg loss: 2.811538, ppl: 16.635475 +epoch: 0, batch: 27247, sum loss: 5942.362793, avg loss: 3.477099, ppl: 32.365707 +epoch: 0, batch: 27248, sum loss: 4768.302734, avg loss: 3.031343, ppl: 20.725048 +epoch: 0, batch: 27249, sum loss: 5110.532227, avg loss: 3.340217, ppl: 28.225262 +epoch: 0, batch: 27250, sum loss: 5030.069336, avg loss: 2.810095, ppl: 16.611490 +epoch: 0, batch: 27251, sum loss: 5214.610840, avg loss: 3.009008, ppl: 20.267282 +epoch: 0, batch: 27252, sum loss: 5094.447266, avg loss: 3.007348, ppl: 20.233665 +epoch: 0, batch: 27253, sum loss: 5317.037598, avg loss: 3.341947, ppl: 28.274124 +epoch: 0, batch: 27254, sum loss: 4443.024414, avg loss: 2.859089, ppl: 17.445629 +epoch: 0, batch: 27255, sum loss: 3760.407959, avg loss: 2.478845, ppl: 11.927479 +epoch: 0, batch: 27256, sum loss: 5691.285156, avg loss: 3.139154, ppl: 23.084322 +epoch: 0, batch: 27257, sum loss: 5504.348145, avg loss: 3.159786, ppl: 23.565563 +epoch: 0, batch: 27258, sum loss: 4207.666016, avg loss: 2.734026, ppl: 15.394741 +epoch: 0, batch: 27259, sum loss: 4874.699219, avg loss: 2.896434, ppl: 18.109457 +epoch: 0, batch: 27260, sum loss: 5814.252441, avg loss: 3.251819, ppl: 25.837299 +epoch: 0, batch: 27261, sum loss: 5264.018555, avg loss: 3.089213, ppl: 21.959787 +epoch: 0, batch: 27262, sum loss: 5190.276855, avg loss: 2.861233, ppl: 17.483074 +epoch: 0, batch: 27263, sum loss: 5513.694336, avg loss: 3.211237, ppl: 24.809761 +epoch: 0, batch: 27264, sum loss: 5626.563965, avg loss: 3.286544, ppl: 26.750263 +epoch: 0, batch: 27265, sum loss: 4903.854492, avg loss: 2.938199, ppl: 18.881815 +epoch: 0, batch: 27266, sum loss: 5889.901367, avg loss: 3.091812, ppl: 22.016930 +epoch: 0, batch: 27267, sum loss: 4321.735352, avg loss: 2.828361, ppl: 16.917707 +epoch: 0, batch: 27268, sum loss: 5096.221680, avg loss: 2.968096, ppl: 19.454851 +epoch: 0, batch: 27269, sum loss: 4809.050781, avg loss: 3.170106, ppl: 23.810007 +epoch: 0, batch: 27270, sum loss: 4382.973145, avg loss: 2.931755, ppl: 18.760519 +epoch: 0, batch: 27271, sum loss: 5607.199707, avg loss: 3.060699, ppl: 21.342461 +epoch: 0, batch: 27272, sum loss: 5755.541016, avg loss: 3.126312, ppl: 22.789787 +epoch: 0, batch: 27273, sum loss: 4678.145508, avg loss: 3.154515, ppl: 23.441666 +epoch: 0, batch: 27274, sum loss: 4997.990234, avg loss: 3.007214, ppl: 20.230965 +epoch: 0, batch: 27275, sum loss: 5858.005859, avg loss: 3.254448, ppl: 25.905304 +epoch: 0, batch: 27276, sum loss: 5068.981445, avg loss: 3.107898, ppl: 22.373964 +epoch: 0, batch: 27277, sum loss: 4864.581055, avg loss: 3.000975, ppl: 20.105137 +epoch: 0, batch: 27278, sum loss: 6108.979980, avg loss: 3.222036, ppl: 25.079132 +epoch: 0, batch: 27279, sum loss: 4847.520508, avg loss: 3.010882, ppl: 20.305304 +epoch: 0, batch: 27280, sum loss: 5056.102051, avg loss: 2.910825, ppl: 18.371941 +epoch: 0, batch: 27281, sum loss: 5403.846191, avg loss: 3.018909, ppl: 20.468937 +epoch: 0, batch: 27282, sum loss: 5647.062500, avg loss: 3.232434, ppl: 25.341263 +epoch: 0, batch: 27283, sum loss: 5385.832520, avg loss: 3.015584, ppl: 20.400997 +epoch: 0, batch: 27284, sum loss: 4970.698242, avg loss: 3.012544, ppl: 20.339085 +epoch: 0, batch: 27285, sum loss: 3937.872070, avg loss: 2.616526, ppl: 13.688093 +epoch: 0, batch: 27286, sum loss: 5025.740234, avg loss: 3.014841, ppl: 20.385851 +epoch: 0, batch: 27287, sum loss: 4786.416992, avg loss: 3.066251, ppl: 21.461285 +epoch: 0, batch: 27288, sum loss: 4831.348145, avg loss: 2.812193, ppl: 16.646391 +epoch: 0, batch: 27289, sum loss: 5121.498047, avg loss: 3.147817, ppl: 23.285175 +epoch: 0, batch: 27290, sum loss: 5712.762207, avg loss: 3.304085, ppl: 27.223608 +epoch: 0, batch: 27291, sum loss: 3779.957764, avg loss: 2.990473, ppl: 19.895092 +epoch: 0, batch: 27292, sum loss: 5082.845703, avg loss: 3.034535, ppl: 20.791306 +epoch: 0, batch: 27293, sum loss: 4696.438965, avg loss: 3.004759, ppl: 20.181358 +epoch: 0, batch: 27294, sum loss: 5334.904297, avg loss: 3.134491, ppl: 22.976942 +epoch: 0, batch: 27295, sum loss: 5046.025879, avg loss: 3.054495, ppl: 21.210478 +epoch: 0, batch: 27296, sum loss: 5832.532227, avg loss: 2.995651, ppl: 19.998375 +epoch: 0, batch: 27297, sum loss: 4016.113525, avg loss: 2.645661, ppl: 14.092759 +epoch: 0, batch: 27298, sum loss: 4733.098633, avg loss: 2.927086, ppl: 18.673143 +epoch: 0, batch: 27299, sum loss: 5455.266602, avg loss: 2.979392, ppl: 19.675852 +epoch: 0, batch: 27300, sum loss: 5539.581055, avg loss: 3.163667, ppl: 23.657192 +epoch: 0, batch: 27301, sum loss: 4640.579102, avg loss: 2.990064, ppl: 19.886957 +epoch: 0, batch: 27302, sum loss: 5969.833984, avg loss: 3.104438, ppl: 22.296680 +epoch: 0, batch: 27303, sum loss: 4786.388184, avg loss: 3.008415, ppl: 20.255270 +epoch: 0, batch: 27304, sum loss: 5815.249512, avg loss: 3.189934, ppl: 24.286825 +epoch: 0, batch: 27305, sum loss: 5297.364258, avg loss: 3.140109, ppl: 23.106392 +epoch: 0, batch: 27306, sum loss: 5116.183105, avg loss: 3.059918, ppl: 21.325817 +epoch: 0, batch: 27307, sum loss: 5221.684082, avg loss: 2.883316, ppl: 17.873436 +epoch: 0, batch: 27308, sum loss: 5705.233398, avg loss: 3.131303, ppl: 22.903795 +epoch: 0, batch: 27309, sum loss: 5625.979004, avg loss: 3.024720, ppl: 20.588238 +epoch: 0, batch: 27310, sum loss: 5250.495117, avg loss: 3.036724, ppl: 20.836863 +epoch: 0, batch: 27311, sum loss: 4976.942383, avg loss: 2.840721, ppl: 17.128105 +epoch: 0, batch: 27312, sum loss: 4546.850098, avg loss: 2.892398, ppl: 18.036516 +epoch: 0, batch: 27313, sum loss: 5041.675781, avg loss: 3.048172, ppl: 21.076777 +epoch: 0, batch: 27314, sum loss: 4192.440430, avg loss: 2.842333, ppl: 17.155737 +epoch: 0, batch: 27315, sum loss: 5603.932129, avg loss: 3.315936, ppl: 27.548170 +epoch: 0, batch: 27316, sum loss: 4204.910156, avg loss: 2.944615, ppl: 19.003347 +epoch: 0, batch: 27317, sum loss: 5428.283203, avg loss: 3.022429, ppl: 20.541136 +epoch: 0, batch: 27318, sum loss: 5057.495117, avg loss: 3.137404, ppl: 23.043966 +epoch: 0, batch: 27319, sum loss: 4909.225586, avg loss: 3.081749, ppl: 21.796486 +epoch: 0, batch: 27320, sum loss: 3986.594238, avg loss: 2.797610, ppl: 16.405388 +epoch: 0, batch: 27321, sum loss: 5512.362793, avg loss: 3.206727, ppl: 24.698105 +epoch: 0, batch: 27322, sum loss: 6327.827148, avg loss: 3.302624, ppl: 27.183868 +epoch: 0, batch: 27323, sum loss: 4989.994141, avg loss: 3.061346, ppl: 21.356285 +epoch: 0, batch: 27324, sum loss: 6683.203613, avg loss: 3.249005, ppl: 25.764694 +epoch: 0, batch: 27325, sum loss: 6606.764648, avg loss: 3.469940, ppl: 32.134804 +epoch: 0, batch: 27326, sum loss: 4778.124023, avg loss: 3.001334, ppl: 20.112352 +epoch: 0, batch: 27327, sum loss: 6446.152344, avg loss: 3.277149, ppl: 26.500113 +epoch: 0, batch: 27328, sum loss: 5779.327637, avg loss: 3.022661, ppl: 20.545887 +epoch: 0, batch: 27329, sum loss: 4867.883301, avg loss: 2.885527, ppl: 17.912998 +epoch: 0, batch: 27330, sum loss: 4053.473145, avg loss: 2.625307, ppl: 13.808807 +epoch: 0, batch: 27331, sum loss: 5561.829590, avg loss: 2.782306, ppl: 16.156233 +epoch: 0, batch: 27332, sum loss: 5086.569824, avg loss: 2.923316, ppl: 18.602873 +epoch: 0, batch: 27333, sum loss: 4606.388184, avg loss: 3.034511, ppl: 20.790815 +epoch: 0, batch: 27334, sum loss: 4775.636230, avg loss: 3.032150, ppl: 20.741776 +epoch: 0, batch: 27335, sum loss: 5661.805664, avg loss: 3.095575, ppl: 22.099934 +epoch: 0, batch: 27336, sum loss: 4859.365234, avg loss: 2.939725, ppl: 18.910639 +epoch: 0, batch: 27337, sum loss: 4622.519531, avg loss: 2.858701, ppl: 17.438864 +epoch: 0, batch: 27338, sum loss: 5375.952148, avg loss: 3.112885, ppl: 22.485823 +epoch: 0, batch: 27339, sum loss: 4851.230469, avg loss: 3.032019, ppl: 20.739061 +epoch: 0, batch: 27340, sum loss: 5997.855469, avg loss: 3.170114, ppl: 23.810194 +epoch: 0, batch: 27341, sum loss: 5498.579590, avg loss: 3.012920, ppl: 20.346733 +epoch: 0, batch: 27342, sum loss: 4756.688965, avg loss: 2.971074, ppl: 19.512863 +epoch: 0, batch: 27343, sum loss: 5683.857910, avg loss: 3.329735, ppl: 27.930946 +epoch: 0, batch: 27344, sum loss: 5559.621582, avg loss: 3.264605, ppl: 26.169760 +epoch: 0, batch: 27345, sum loss: 5033.872070, avg loss: 2.873215, ppl: 17.693808 +epoch: 0, batch: 27346, sum loss: 5126.900391, avg loss: 2.926313, ppl: 18.658712 +epoch: 0, batch: 27347, sum loss: 5269.334473, avg loss: 3.216932, ppl: 24.951447 +epoch: 0, batch: 27348, sum loss: 5721.413086, avg loss: 3.173274, ppl: 23.885559 +epoch: 0, batch: 27349, sum loss: 5056.321777, avg loss: 3.077493, ppl: 21.703932 +epoch: 0, batch: 27350, sum loss: 4691.492676, avg loss: 2.912162, ppl: 18.396526 +epoch: 0, batch: 27351, sum loss: 4635.134277, avg loss: 2.812582, ppl: 16.652853 +epoch: 0, batch: 27352, sum loss: 4638.964355, avg loss: 2.533569, ppl: 12.598388 +epoch: 0, batch: 27353, sum loss: 5730.845215, avg loss: 3.176744, ppl: 23.968573 +epoch: 0, batch: 27354, sum loss: 5835.248047, avg loss: 3.123794, ppl: 22.732470 +epoch: 0, batch: 27355, sum loss: 5899.605469, avg loss: 3.373131, ppl: 29.169703 +epoch: 0, batch: 27356, sum loss: 5139.143066, avg loss: 3.097735, ppl: 22.147739 +epoch: 0, batch: 27357, sum loss: 4891.307617, avg loss: 3.080169, ppl: 21.762074 +epoch: 0, batch: 27358, sum loss: 4376.180176, avg loss: 2.939006, ppl: 18.897055 +epoch: 0, batch: 27359, sum loss: 4941.781738, avg loss: 2.751549, ppl: 15.666882 +epoch: 0, batch: 27360, sum loss: 5422.490234, avg loss: 3.223835, ppl: 25.124281 +epoch: 0, batch: 27361, sum loss: 5447.255859, avg loss: 3.174392, ppl: 23.912266 +epoch: 0, batch: 27362, sum loss: 6092.110352, avg loss: 3.208062, ppl: 24.731121 +epoch: 0, batch: 27363, sum loss: 4510.429199, avg loss: 2.913714, ppl: 18.425100 +epoch: 0, batch: 27364, sum loss: 4611.969238, avg loss: 2.988963, ppl: 19.865065 +epoch: 0, batch: 27365, sum loss: 6339.518066, avg loss: 3.254373, ppl: 25.903364 +epoch: 0, batch: 27366, sum loss: 5829.608887, avg loss: 3.271386, ppl: 26.347824 +epoch: 0, batch: 27367, sum loss: 4346.173828, avg loss: 2.911034, ppl: 18.375792 +epoch: 0, batch: 27368, sum loss: 5384.787109, avg loss: 3.136160, ppl: 23.015322 +epoch: 0, batch: 27369, sum loss: 5687.584961, avg loss: 3.052917, ppl: 21.177032 +epoch: 0, batch: 27370, sum loss: 4745.661133, avg loss: 2.865737, ppl: 17.561998 +epoch: 0, batch: 27371, sum loss: 5260.003906, avg loss: 3.056365, ppl: 21.250168 +epoch: 0, batch: 27372, sum loss: 5677.996094, avg loss: 3.244569, ppl: 25.650654 +epoch: 0, batch: 27373, sum loss: 5894.369141, avg loss: 3.212190, ppl: 24.833422 +epoch: 0, batch: 27374, sum loss: 5613.565430, avg loss: 3.185906, ppl: 24.189186 +epoch: 0, batch: 27375, sum loss: 4885.352539, avg loss: 2.775769, ppl: 16.050959 +epoch: 0, batch: 27376, sum loss: 6313.551270, avg loss: 3.072288, ppl: 21.591238 +epoch: 0, batch: 27377, sum loss: 6151.380859, avg loss: 3.815993, ppl: 45.421841 +epoch: 0, batch: 27378, sum loss: 4522.786621, avg loss: 2.979438, ppl: 19.676752 +epoch: 0, batch: 27379, sum loss: 5190.196289, avg loss: 3.045890, ppl: 21.028736 +epoch: 0, batch: 27380, sum loss: 5725.716309, avg loss: 3.292534, ppl: 26.910967 +epoch: 0, batch: 27381, sum loss: 5720.468750, avg loss: 2.889125, ppl: 17.977579 +epoch: 0, batch: 27382, sum loss: 3539.176758, avg loss: 2.497655, ppl: 12.153955 +epoch: 0, batch: 27383, sum loss: 4801.809570, avg loss: 3.033360, ppl: 20.766897 +epoch: 0, batch: 27384, sum loss: 4927.609375, avg loss: 2.945373, ppl: 19.017761 +epoch: 0, batch: 27385, sum loss: 5725.593262, avg loss: 2.969706, ppl: 19.486197 +epoch: 0, batch: 27386, sum loss: 5608.815918, avg loss: 3.274265, ppl: 26.423792 +epoch: 0, batch: 27387, sum loss: 4583.904297, avg loss: 2.914116, ppl: 18.432508 +epoch: 0, batch: 27388, sum loss: 5447.980469, avg loss: 3.233223, ppl: 25.361258 +epoch: 0, batch: 27389, sum loss: 4675.135254, avg loss: 3.161011, ppl: 23.594437 +epoch: 0, batch: 27390, sum loss: 4682.744629, avg loss: 2.789008, ppl: 16.264879 +epoch: 0, batch: 27391, sum loss: 4456.848145, avg loss: 2.717590, ppl: 15.143786 +epoch: 0, batch: 27392, sum loss: 5726.495117, avg loss: 3.418803, ppl: 30.532852 +epoch: 0, batch: 27393, sum loss: 5716.306152, avg loss: 2.910543, ppl: 18.366768 +epoch: 0, batch: 27394, sum loss: 5011.361328, avg loss: 2.997226, ppl: 20.029892 +epoch: 0, batch: 27395, sum loss: 4611.761719, avg loss: 2.971496, ppl: 19.521103 +epoch: 0, batch: 27396, sum loss: 5748.622559, avg loss: 3.056152, ppl: 21.245653 +epoch: 0, batch: 27397, sum loss: 5680.348145, avg loss: 3.255214, ppl: 25.925156 +epoch: 0, batch: 27398, sum loss: 5377.122559, avg loss: 3.214060, ppl: 24.879902 +epoch: 0, batch: 27399, sum loss: 5693.835938, avg loss: 3.213226, ppl: 24.859148 +epoch: 0, batch: 27400, sum loss: 4519.111816, avg loss: 2.869277, ppl: 17.624275 +epoch: 0, batch: 27401, sum loss: 4722.334961, avg loss: 2.898916, ppl: 18.154465 +epoch: 0, batch: 27402, sum loss: 5222.161621, avg loss: 2.967137, ppl: 19.436195 +epoch: 0, batch: 27403, sum loss: 4567.042480, avg loss: 3.077522, ppl: 21.704548 +epoch: 0, batch: 27404, sum loss: 4677.871094, avg loss: 2.921843, ppl: 18.575497 +epoch: 0, batch: 27405, sum loss: 4432.028809, avg loss: 2.846518, ppl: 17.227695 +epoch: 0, batch: 27406, sum loss: 5451.539062, avg loss: 3.115165, ppl: 22.537153 +epoch: 0, batch: 27407, sum loss: 5677.107910, avg loss: 3.220141, ppl: 25.031641 +epoch: 0, batch: 27408, sum loss: 4362.463379, avg loss: 2.810866, ppl: 16.624302 +epoch: 0, batch: 27409, sum loss: 4929.312012, avg loss: 2.791230, ppl: 16.301054 +epoch: 0, batch: 27410, sum loss: 4555.371582, avg loss: 2.848888, ppl: 17.268562 +epoch: 0, batch: 27411, sum loss: 5519.979980, avg loss: 3.196283, ppl: 24.441504 +epoch: 0, batch: 27412, sum loss: 6088.118164, avg loss: 3.159376, ppl: 23.555901 +epoch: 0, batch: 27413, sum loss: 5147.671875, avg loss: 3.114139, ppl: 22.514040 +epoch: 0, batch: 27414, sum loss: 5428.800781, avg loss: 3.237210, ppl: 25.462576 +epoch: 0, batch: 27415, sum loss: 5058.582031, avg loss: 3.282662, ppl: 26.646610 +epoch: 0, batch: 27416, sum loss: 5112.539551, avg loss: 2.933184, ppl: 18.787354 +epoch: 0, batch: 27417, sum loss: 5164.640137, avg loss: 3.041602, ppl: 20.938763 +epoch: 0, batch: 27418, sum loss: 6403.448242, avg loss: 3.305859, ppl: 27.271948 +epoch: 0, batch: 27419, sum loss: 5543.680664, avg loss: 3.280284, ppl: 26.583332 +epoch: 0, batch: 27420, sum loss: 4283.510254, avg loss: 2.836762, ppl: 17.060429 +epoch: 0, batch: 27421, sum loss: 6077.576172, avg loss: 3.253520, ppl: 25.881294 +epoch: 0, batch: 27422, sum loss: 5093.480957, avg loss: 3.288238, ppl: 26.795616 +epoch: 0, batch: 27423, sum loss: 4000.373535, avg loss: 2.913600, ppl: 18.423010 +epoch: 0, batch: 27424, sum loss: 5560.329590, avg loss: 3.020277, ppl: 20.496964 +epoch: 0, batch: 27425, sum loss: 4972.035645, avg loss: 3.071054, ppl: 21.564610 +epoch: 0, batch: 27426, sum loss: 4746.248535, avg loss: 2.983186, ppl: 19.750637 +epoch: 0, batch: 27427, sum loss: 5498.979492, avg loss: 3.051598, ppl: 21.149115 +epoch: 0, batch: 27428, sum loss: 6316.460449, avg loss: 3.214484, ppl: 24.890438 +epoch: 0, batch: 27429, sum loss: 5118.295898, avg loss: 3.046605, ppl: 21.043777 +epoch: 0, batch: 27430, sum loss: 5077.881348, avg loss: 3.057123, ppl: 21.266285 +epoch: 0, batch: 27431, sum loss: 5297.368164, avg loss: 2.981074, ppl: 19.708969 +epoch: 0, batch: 27432, sum loss: 3783.476318, avg loss: 2.866270, ppl: 17.571356 +epoch: 0, batch: 27433, sum loss: 5147.292480, avg loss: 3.052961, ppl: 21.177961 +epoch: 0, batch: 27434, sum loss: 4478.537598, avg loss: 2.754328, ppl: 15.710480 +epoch: 0, batch: 27435, sum loss: 4690.020020, avg loss: 3.085540, ppl: 21.879269 +epoch: 0, batch: 27436, sum loss: 4614.314941, avg loss: 2.746616, ppl: 15.589788 +epoch: 0, batch: 27437, sum loss: 5445.390625, avg loss: 3.133136, ppl: 22.945831 +epoch: 0, batch: 27438, sum loss: 5024.161133, avg loss: 3.193999, ppl: 24.385759 +epoch: 0, batch: 27439, sum loss: 4319.436035, avg loss: 2.881545, ppl: 17.841818 +epoch: 0, batch: 27440, sum loss: 4609.167969, avg loss: 2.941396, ppl: 18.942280 +epoch: 0, batch: 27441, sum loss: 5502.365234, avg loss: 3.133466, ppl: 22.953388 +epoch: 0, batch: 27442, sum loss: 6468.535645, avg loss: 3.242374, ppl: 25.594402 +epoch: 0, batch: 27443, sum loss: 5077.932617, avg loss: 2.974770, ppl: 19.585119 +epoch: 0, batch: 27444, sum loss: 4243.176758, avg loss: 2.787895, ppl: 16.246792 +epoch: 0, batch: 27445, sum loss: 5380.043945, avg loss: 3.093757, ppl: 22.059811 +epoch: 0, batch: 27446, sum loss: 5103.192871, avg loss: 2.973889, ppl: 19.567863 +epoch: 0, batch: 27447, sum loss: 5468.910156, avg loss: 3.251433, ppl: 25.827322 +epoch: 0, batch: 27448, sum loss: 5621.419922, avg loss: 3.354069, ppl: 28.618954 +epoch: 0, batch: 27449, sum loss: 5368.778809, avg loss: 2.969457, ppl: 19.481346 +epoch: 0, batch: 27450, sum loss: 4091.819336, avg loss: 2.676141, ppl: 14.528918 +epoch: 0, batch: 27451, sum loss: 5054.509766, avg loss: 3.048558, ppl: 21.084929 +epoch: 0, batch: 27452, sum loss: 5167.067383, avg loss: 2.981574, ppl: 19.718821 +epoch: 0, batch: 27453, sum loss: 5498.459961, avg loss: 3.115275, ppl: 22.539625 +epoch: 0, batch: 27454, sum loss: 4081.111328, avg loss: 2.865949, ppl: 17.565714 +epoch: 0, batch: 27455, sum loss: 4886.892578, avg loss: 3.415019, ppl: 30.417536 +epoch: 0, batch: 27456, sum loss: 5672.408691, avg loss: 3.170715, ppl: 23.824503 +epoch: 0, batch: 27457, sum loss: 5533.613281, avg loss: 3.129872, ppl: 22.871048 +epoch: 0, batch: 27458, sum loss: 5231.324219, avg loss: 3.370699, ppl: 29.098850 +epoch: 0, batch: 27459, sum loss: 5246.869141, avg loss: 3.162670, ppl: 23.633606 +epoch: 0, batch: 27460, sum loss: 4917.724121, avg loss: 2.887683, ppl: 17.951666 +epoch: 0, batch: 27461, sum loss: 5556.421387, avg loss: 3.064766, ppl: 21.429455 +epoch: 0, batch: 27462, sum loss: 4994.337891, avg loss: 3.025038, ppl: 20.594791 +epoch: 0, batch: 27463, sum loss: 5690.675781, avg loss: 3.224179, ppl: 25.132933 +epoch: 0, batch: 27464, sum loss: 5727.942383, avg loss: 3.162862, ppl: 23.638147 +epoch: 0, batch: 27465, sum loss: 4662.162598, avg loss: 3.099842, ppl: 22.194447 +epoch: 0, batch: 27466, sum loss: 4125.499023, avg loss: 2.761378, ppl: 15.821634 +epoch: 0, batch: 27467, sum loss: 4976.145508, avg loss: 2.932319, ppl: 18.771112 +epoch: 0, batch: 27468, sum loss: 5044.988281, avg loss: 2.962412, ppl: 19.344582 +epoch: 0, batch: 27469, sum loss: 4034.619385, avg loss: 3.019925, ppl: 20.489752 +epoch: 0, batch: 27470, sum loss: 4149.083496, avg loss: 2.786490, ppl: 16.223972 +epoch: 0, batch: 27471, sum loss: 5532.082031, avg loss: 3.221946, ppl: 25.076885 +epoch: 0, batch: 27472, sum loss: 4443.253906, avg loss: 2.942552, ppl: 18.964188 +epoch: 0, batch: 27473, sum loss: 4771.938965, avg loss: 3.108755, ppl: 22.393145 +epoch: 0, batch: 27474, sum loss: 5403.272461, avg loss: 3.284664, ppl: 26.700010 +epoch: 0, batch: 27475, sum loss: 4897.610840, avg loss: 2.896281, ppl: 18.106682 +epoch: 0, batch: 27476, sum loss: 4594.804688, avg loss: 2.831056, ppl: 16.963371 +epoch: 0, batch: 27477, sum loss: 5195.265625, avg loss: 3.141031, ppl: 23.127705 +epoch: 0, batch: 27478, sum loss: 5865.433594, avg loss: 3.424071, ppl: 30.694111 +epoch: 0, batch: 27479, sum loss: 5776.770020, avg loss: 3.102454, ppl: 22.252501 +epoch: 0, batch: 27480, sum loss: 5084.678223, avg loss: 3.228367, ppl: 25.238411 +epoch: 0, batch: 27481, sum loss: 5675.986816, avg loss: 3.032044, ppl: 20.739590 +epoch: 0, batch: 27482, sum loss: 4354.258789, avg loss: 2.906715, ppl: 18.296589 +epoch: 0, batch: 27483, sum loss: 5834.227539, avg loss: 2.967562, ppl: 19.444464 +epoch: 0, batch: 27484, sum loss: 4269.077637, avg loss: 2.995844, ppl: 20.002232 +epoch: 0, batch: 27485, sum loss: 4297.875977, avg loss: 2.957933, ppl: 19.258118 +epoch: 0, batch: 27486, sum loss: 4171.554199, avg loss: 2.994655, ppl: 19.978464 +epoch: 0, batch: 27487, sum loss: 4404.342285, avg loss: 2.728837, ppl: 15.315062 +epoch: 0, batch: 27488, sum loss: 5697.274414, avg loss: 3.091305, ppl: 22.005766 +epoch: 0, batch: 27489, sum loss: 4853.361328, avg loss: 3.007039, ppl: 20.227425 +epoch: 0, batch: 27490, sum loss: 5107.090820, avg loss: 3.189938, ppl: 24.286922 +epoch: 0, batch: 27491, sum loss: 4786.830078, avg loss: 2.986170, ppl: 19.809658 +epoch: 0, batch: 27492, sum loss: 4657.418457, avg loss: 2.972188, ppl: 19.534615 +epoch: 0, batch: 27493, sum loss: 5429.232422, avg loss: 3.290444, ppl: 26.854782 +epoch: 0, batch: 27494, sum loss: 5250.021484, avg loss: 3.106522, ppl: 22.343191 +epoch: 0, batch: 27495, sum loss: 4932.512695, avg loss: 2.918647, ppl: 18.516211 +epoch: 0, batch: 27496, sum loss: 4770.697266, avg loss: 2.921431, ppl: 18.567844 +epoch: 0, batch: 27497, sum loss: 5575.244141, avg loss: 2.897736, ppl: 18.133047 +epoch: 0, batch: 27498, sum loss: 5016.042969, avg loss: 3.344029, ppl: 28.333042 +epoch: 0, batch: 27499, sum loss: 4844.964844, avg loss: 2.909889, ppl: 18.354761 +epoch: 0, batch: 27500, sum loss: 5672.939941, avg loss: 3.333102, ppl: 28.025146 +epoch: 0, batch: 27501, sum loss: 4615.304199, avg loss: 2.891795, ppl: 18.025631 +epoch: 0, batch: 27502, sum loss: 5604.296875, avg loss: 2.960537, ppl: 19.308336 +epoch: 0, batch: 27503, sum loss: 4376.904785, avg loss: 3.029000, ppl: 20.676542 +epoch: 0, batch: 27504, sum loss: 4824.813965, avg loss: 2.800240, ppl: 16.448599 +epoch: 0, batch: 27505, sum loss: 5761.294922, avg loss: 3.330228, ppl: 27.944715 +epoch: 0, batch: 27506, sum loss: 5565.061035, avg loss: 3.254422, ppl: 25.904631 +epoch: 0, batch: 27507, sum loss: 4896.775391, avg loss: 3.120953, ppl: 22.667974 +epoch: 0, batch: 27508, sum loss: 5854.199707, avg loss: 3.142351, ppl: 23.158245 +epoch: 0, batch: 27509, sum loss: 4708.940918, avg loss: 2.982230, ppl: 19.731764 +epoch: 0, batch: 27510, sum loss: 5568.975586, avg loss: 3.153440, ppl: 23.416485 +epoch: 0, batch: 27511, sum loss: 4972.355957, avg loss: 3.006261, ppl: 20.211695 +epoch: 0, batch: 27512, sum loss: 5162.262695, avg loss: 2.985693, ppl: 19.800215 +epoch: 0, batch: 27513, sum loss: 5281.994141, avg loss: 3.078085, ppl: 21.716780 +epoch: 0, batch: 27514, sum loss: 5431.109863, avg loss: 2.972693, ppl: 19.544481 +epoch: 0, batch: 27515, sum loss: 5737.849609, avg loss: 3.118396, ppl: 22.610090 +epoch: 0, batch: 27516, sum loss: 5667.553711, avg loss: 3.073511, ppl: 21.617662 +epoch: 0, batch: 27517, sum loss: 5414.314453, avg loss: 2.821425, ppl: 16.800774 +epoch: 0, batch: 27518, sum loss: 4861.303223, avg loss: 2.801904, ppl: 16.475986 +epoch: 0, batch: 27519, sum loss: 5445.175293, avg loss: 2.991855, ppl: 19.922598 +epoch: 0, batch: 27520, sum loss: 5403.940430, avg loss: 3.134537, ppl: 22.977983 +epoch: 0, batch: 27521, sum loss: 6244.953125, avg loss: 3.034477, ppl: 20.790096 +epoch: 0, batch: 27522, sum loss: 6396.830566, avg loss: 3.428098, ppl: 30.817963 +epoch: 0, batch: 27523, sum loss: 5823.848633, avg loss: 3.087937, ppl: 21.931784 +epoch: 0, batch: 27524, sum loss: 4521.607910, avg loss: 3.090641, ppl: 21.991175 +epoch: 0, batch: 27525, sum loss: 5972.641602, avg loss: 2.962620, ppl: 19.348598 +epoch: 0, batch: 27526, sum loss: 5505.693848, avg loss: 3.050246, ppl: 21.120535 +epoch: 0, batch: 27527, sum loss: 6172.618164, avg loss: 3.149295, ppl: 23.319614 +epoch: 0, batch: 27528, sum loss: 5489.058594, avg loss: 3.156446, ppl: 23.486963 +epoch: 0, batch: 27529, sum loss: 5212.062012, avg loss: 3.136018, ppl: 23.012051 +epoch: 0, batch: 27530, sum loss: 4487.593262, avg loss: 2.746385, ppl: 15.586187 +epoch: 0, batch: 27531, sum loss: 5871.004883, avg loss: 3.053045, ppl: 21.179729 +epoch: 0, batch: 27532, sum loss: 5696.287109, avg loss: 2.991748, ppl: 19.920465 +epoch: 0, batch: 27533, sum loss: 5540.791992, avg loss: 3.313871, ppl: 27.491337 +epoch: 0, batch: 27534, sum loss: 5234.484863, avg loss: 3.010055, ppl: 20.288507 +epoch: 0, batch: 27535, sum loss: 4941.183594, avg loss: 2.872781, ppl: 17.686136 +epoch: 0, batch: 27536, sum loss: 6108.475586, avg loss: 3.460893, ppl: 31.845392 +epoch: 0, batch: 27537, sum loss: 4803.674805, avg loss: 2.892038, ppl: 18.030014 +epoch: 0, batch: 27538, sum loss: 4940.967773, avg loss: 2.916746, ppl: 18.481050 +epoch: 0, batch: 27539, sum loss: 4726.193848, avg loss: 2.903067, ppl: 18.229977 +epoch: 0, batch: 27540, sum loss: 4338.935547, avg loss: 2.869667, ppl: 17.631138 +epoch: 0, batch: 27541, sum loss: 5157.595703, avg loss: 2.943833, ppl: 18.988497 +epoch: 0, batch: 27542, sum loss: 4910.753906, avg loss: 2.998018, ppl: 20.045767 +epoch: 0, batch: 27543, sum loss: 5850.958008, avg loss: 3.415621, ppl: 30.435831 +epoch: 0, batch: 27544, sum loss: 4002.820801, avg loss: 2.684655, ppl: 14.653148 +epoch: 0, batch: 27545, sum loss: 5420.819336, avg loss: 2.952516, ppl: 19.154081 +epoch: 0, batch: 27546, sum loss: 5956.909180, avg loss: 3.014630, ppl: 20.381556 +epoch: 0, batch: 27547, sum loss: 5792.062500, avg loss: 3.255797, ppl: 25.940271 +epoch: 0, batch: 27548, sum loss: 5102.204102, avg loss: 3.062547, ppl: 21.381958 +epoch: 0, batch: 27549, sum loss: 4118.482422, avg loss: 2.586986, ppl: 13.289660 +epoch: 0, batch: 27550, sum loss: 4903.894531, avg loss: 3.023363, ppl: 20.560318 +epoch: 0, batch: 27551, sum loss: 5980.086914, avg loss: 3.152392, ppl: 23.391943 +epoch: 0, batch: 27552, sum loss: 5881.696289, avg loss: 3.265795, ppl: 26.200920 +epoch: 0, batch: 27553, sum loss: 4406.181152, avg loss: 2.708163, ppl: 15.001693 +epoch: 0, batch: 27554, sum loss: 4826.175781, avg loss: 3.079883, ppl: 21.755848 +epoch: 0, batch: 27555, sum loss: 4924.805664, avg loss: 3.078003, ppl: 21.715004 +epoch: 0, batch: 27556, sum loss: 5472.832520, avg loss: 3.111332, ppl: 22.450928 +epoch: 0, batch: 27557, sum loss: 5974.751465, avg loss: 3.231342, ppl: 25.313606 +epoch: 0, batch: 27558, sum loss: 5391.598633, avg loss: 3.079154, ppl: 21.740004 +epoch: 0, batch: 27559, sum loss: 5878.189453, avg loss: 3.173968, ppl: 23.902147 +epoch: 0, batch: 27560, sum loss: 5009.729492, avg loss: 2.962584, ppl: 19.347902 +epoch: 0, batch: 27561, sum loss: 5226.644043, avg loss: 3.005546, ppl: 20.197237 +epoch: 0, batch: 27562, sum loss: 5818.745605, avg loss: 3.223682, ppl: 25.120436 +epoch: 0, batch: 27563, sum loss: 4874.189453, avg loss: 3.059755, ppl: 21.322329 +epoch: 0, batch: 27564, sum loss: 4198.268066, avg loss: 2.804454, ppl: 16.518057 +epoch: 0, batch: 27565, sum loss: 4568.380859, avg loss: 2.652951, ppl: 14.195862 +epoch: 0, batch: 27566, sum loss: 5317.389648, avg loss: 2.945922, ppl: 19.028200 +epoch: 0, batch: 27567, sum loss: 5085.271973, avg loss: 2.934375, ppl: 18.809744 +epoch: 0, batch: 27568, sum loss: 5125.439453, avg loss: 3.054493, ppl: 21.210434 +epoch: 0, batch: 27569, sum loss: 4535.751465, avg loss: 3.174074, ppl: 23.904673 +epoch: 0, batch: 27570, sum loss: 4618.953125, avg loss: 3.044794, ppl: 21.005711 +epoch: 0, batch: 27571, sum loss: 4641.269531, avg loss: 2.913540, ppl: 18.421900 +epoch: 0, batch: 27572, sum loss: 4350.436035, avg loss: 2.943462, ppl: 18.981440 +epoch: 0, batch: 27573, sum loss: 4160.952637, avg loss: 2.976361, ppl: 19.616304 +epoch: 0, batch: 27574, sum loss: 5470.260742, avg loss: 2.966519, ppl: 19.424183 +epoch: 0, batch: 27575, sum loss: 5523.502930, avg loss: 3.291718, ppl: 26.889019 +epoch: 0, batch: 27576, sum loss: 5482.961426, avg loss: 3.044398, ppl: 20.997393 +epoch: 0, batch: 27577, sum loss: 6493.742188, avg loss: 3.435843, ppl: 31.057568 +epoch: 0, batch: 27578, sum loss: 4802.925781, avg loss: 2.962940, ppl: 19.354792 +epoch: 0, batch: 27579, sum loss: 5601.152344, avg loss: 2.963573, ppl: 19.367043 +epoch: 0, batch: 27580, sum loss: 6446.505371, avg loss: 3.302513, ppl: 27.180862 +epoch: 0, batch: 27581, sum loss: 5307.351562, avg loss: 2.960040, ppl: 19.298746 +epoch: 0, batch: 27582, sum loss: 4733.386719, avg loss: 3.085650, ppl: 21.881680 +epoch: 0, batch: 27583, sum loss: 4276.679688, avg loss: 2.854926, ppl: 17.373158 +epoch: 0, batch: 27584, sum loss: 5437.927734, avg loss: 3.053300, ppl: 21.185148 +epoch: 0, batch: 27585, sum loss: 5638.983887, avg loss: 3.157326, ppl: 23.507652 +epoch: 0, batch: 27586, sum loss: 6632.982422, avg loss: 3.286909, ppl: 26.760023 +epoch: 0, batch: 27587, sum loss: 5283.430664, avg loss: 3.031228, ppl: 20.722666 +epoch: 0, batch: 27588, sum loss: 5429.648438, avg loss: 3.190158, ppl: 24.292257 +epoch: 0, batch: 27589, sum loss: 5741.774414, avg loss: 3.151358, ppl: 23.367779 +epoch: 0, batch: 27590, sum loss: 6131.641602, avg loss: 3.044509, ppl: 20.999722 +epoch: 0, batch: 27591, sum loss: 5207.340820, avg loss: 2.992725, ppl: 19.939938 +epoch: 0, batch: 27592, sum loss: 5153.466797, avg loss: 3.243214, ppl: 25.615917 +epoch: 0, batch: 27593, sum loss: 5127.514160, avg loss: 3.021517, ppl: 20.522402 +epoch: 0, batch: 27594, sum loss: 5596.489746, avg loss: 3.188883, ppl: 24.261314 +epoch: 0, batch: 27595, sum loss: 5536.502930, avg loss: 3.181898, ppl: 24.092440 +epoch: 0, batch: 27596, sum loss: 6156.008301, avg loss: 3.192950, ppl: 24.360195 +epoch: 0, batch: 27597, sum loss: 4485.351562, avg loss: 2.860556, ppl: 17.471237 +epoch: 0, batch: 27598, sum loss: 5814.107422, avg loss: 3.277400, ppl: 26.506760 +epoch: 0, batch: 27599, sum loss: 4313.679688, avg loss: 3.016559, ppl: 20.420910 +epoch: 0, batch: 27600, sum loss: 5395.294922, avg loss: 3.048189, ppl: 21.077148 +epoch: 0, batch: 27601, sum loss: 5401.963867, avg loss: 3.055410, ppl: 21.229881 +epoch: 0, batch: 27602, sum loss: 4702.689941, avg loss: 2.784304, ppl: 16.188553 +epoch: 0, batch: 27603, sum loss: 6302.328125, avg loss: 3.194287, ppl: 24.392776 +epoch: 0, batch: 27604, sum loss: 6505.209473, avg loss: 3.278835, ppl: 26.544838 +epoch: 0, batch: 27605, sum loss: 4883.135254, avg loss: 3.036776, ppl: 20.837944 +epoch: 0, batch: 27606, sum loss: 5464.039551, avg loss: 3.189749, ppl: 24.282326 +epoch: 0, batch: 27607, sum loss: 6831.873047, avg loss: 3.350600, ppl: 28.519840 +epoch: 0, batch: 27608, sum loss: 4359.727539, avg loss: 3.147818, ppl: 23.285198 +epoch: 0, batch: 27609, sum loss: 5714.933594, avg loss: 3.052849, ppl: 21.175594 +epoch: 0, batch: 27610, sum loss: 4809.002441, avg loss: 2.925184, ppl: 18.637655 +epoch: 0, batch: 27611, sum loss: 4658.354004, avg loss: 3.095252, ppl: 22.092806 +epoch: 0, batch: 27612, sum loss: 5643.012207, avg loss: 3.150760, ppl: 23.353817 +epoch: 0, batch: 27613, sum loss: 4630.456055, avg loss: 2.872491, ppl: 17.681013 +epoch: 0, batch: 27614, sum loss: 5167.912109, avg loss: 3.090857, ppl: 21.995916 +epoch: 0, batch: 27615, sum loss: 5302.228516, avg loss: 2.924561, ppl: 18.626043 +epoch: 0, batch: 27616, sum loss: 4782.378906, avg loss: 3.075485, ppl: 21.660385 +epoch: 0, batch: 27617, sum loss: 4815.101074, avg loss: 2.937829, ppl: 18.874815 +epoch: 0, batch: 27618, sum loss: 5994.673340, avg loss: 3.249145, ppl: 25.768305 +epoch: 0, batch: 27619, sum loss: 4984.438965, avg loss: 3.152713, ppl: 23.399452 +epoch: 0, batch: 27620, sum loss: 4340.430664, avg loss: 2.920882, ppl: 18.557648 +epoch: 0, batch: 27621, sum loss: 5511.610840, avg loss: 3.128043, ppl: 22.829252 +epoch: 0, batch: 27622, sum loss: 4895.891113, avg loss: 2.931671, ppl: 18.758957 +epoch: 0, batch: 27623, sum loss: 4721.288086, avg loss: 2.851019, ppl: 17.305414 +epoch: 0, batch: 27624, sum loss: 5789.195801, avg loss: 2.990287, ppl: 19.891392 +epoch: 0, batch: 27625, sum loss: 4706.569824, avg loss: 2.905290, ppl: 18.270544 +epoch: 0, batch: 27626, sum loss: 4990.704590, avg loss: 3.024669, ppl: 20.587198 +epoch: 0, batch: 27627, sum loss: 4550.862305, avg loss: 2.842513, ppl: 17.158825 +epoch: 0, batch: 27628, sum loss: 4776.772461, avg loss: 2.723359, ppl: 15.231404 +epoch: 0, batch: 27629, sum loss: 4705.899414, avg loss: 2.915675, ppl: 18.461267 +epoch: 0, batch: 27630, sum loss: 5068.325195, avg loss: 3.094215, ppl: 22.069916 +epoch: 0, batch: 27631, sum loss: 5565.820801, avg loss: 3.039771, ppl: 20.900463 +epoch: 0, batch: 27632, sum loss: 5299.205566, avg loss: 3.150538, ppl: 23.348633 +epoch: 0, batch: 27633, sum loss: 4983.912598, avg loss: 3.164389, ppl: 23.674273 +epoch: 0, batch: 27634, sum loss: 5938.501953, avg loss: 3.165513, ppl: 23.700895 +epoch: 0, batch: 27635, sum loss: 5269.077148, avg loss: 3.121491, ppl: 22.680180 +epoch: 0, batch: 27636, sum loss: 4624.552246, avg loss: 2.921385, ppl: 18.566982 +epoch: 0, batch: 27637, sum loss: 4996.542969, avg loss: 3.042962, ppl: 20.967253 +epoch: 0, batch: 27638, sum loss: 5052.797852, avg loss: 2.961781, ppl: 19.332363 +epoch: 0, batch: 27639, sum loss: 5620.148926, avg loss: 3.184220, ppl: 24.148453 +epoch: 0, batch: 27640, sum loss: 5280.253906, avg loss: 2.925349, ppl: 18.640722 +epoch: 0, batch: 27641, sum loss: 5703.678223, avg loss: 3.076417, ppl: 21.680592 +epoch: 0, batch: 27642, sum loss: 6355.900391, avg loss: 3.501874, ppl: 33.177559 +epoch: 0, batch: 27643, sum loss: 3673.341309, avg loss: 2.558037, ppl: 12.910450 +epoch: 0, batch: 27644, sum loss: 5022.260254, avg loss: 3.040109, ppl: 20.907526 +epoch: 0, batch: 27645, sum loss: 5415.657227, avg loss: 3.172617, ppl: 23.869869 +epoch: 0, batch: 27646, sum loss: 4676.663574, avg loss: 2.670853, ppl: 14.452293 +epoch: 0, batch: 27647, sum loss: 4748.391113, avg loss: 2.860477, ppl: 17.469852 +epoch: 0, batch: 27648, sum loss: 4588.617676, avg loss: 3.012881, ppl: 20.345934 +epoch: 0, batch: 27649, sum loss: 5153.261719, avg loss: 3.132682, ppl: 22.935406 +epoch: 0, batch: 27650, sum loss: 5854.171875, avg loss: 3.209524, ppl: 24.767298 +epoch: 0, batch: 27651, sum loss: 4648.191895, avg loss: 3.125885, ppl: 22.780058 +epoch: 0, batch: 27652, sum loss: 4466.234863, avg loss: 2.841116, ppl: 17.134884 +epoch: 0, batch: 27653, sum loss: 4328.809570, avg loss: 3.006118, ppl: 20.208794 +epoch: 0, batch: 27654, sum loss: 5144.327148, avg loss: 3.146377, ppl: 23.251678 +epoch: 0, batch: 27655, sum loss: 5008.763184, avg loss: 2.972560, ppl: 19.541876 +epoch: 0, batch: 27656, sum loss: 5409.998047, avg loss: 3.051324, ppl: 21.143328 +epoch: 0, batch: 27657, sum loss: 4976.851562, avg loss: 2.974807, ppl: 19.585838 +epoch: 0, batch: 27658, sum loss: 5372.075684, avg loss: 3.132406, ppl: 22.929070 +epoch: 0, batch: 27659, sum loss: 4085.834717, avg loss: 3.028788, ppl: 20.672155 +epoch: 0, batch: 27660, sum loss: 5120.534180, avg loss: 2.902797, ppl: 18.225054 +epoch: 0, batch: 27661, sum loss: 5761.175781, avg loss: 3.168964, ppl: 23.782825 +epoch: 0, batch: 27662, sum loss: 4409.645020, avg loss: 2.928051, ppl: 18.691170 +epoch: 0, batch: 27663, sum loss: 5403.909668, avg loss: 3.051332, ppl: 21.143494 +epoch: 0, batch: 27664, sum loss: 6326.261230, avg loss: 3.354327, ppl: 28.626345 +epoch: 0, batch: 27665, sum loss: 4843.136719, avg loss: 2.845556, ppl: 17.211130 +epoch: 0, batch: 27666, sum loss: 5434.280273, avg loss: 2.924801, ppl: 18.630516 +epoch: 0, batch: 27667, sum loss: 5000.733887, avg loss: 2.912483, ppl: 18.402443 +epoch: 0, batch: 27668, sum loss: 4898.972656, avg loss: 3.050419, ppl: 21.124191 +epoch: 0, batch: 27669, sum loss: 3697.054688, avg loss: 2.605395, ppl: 13.536566 +epoch: 0, batch: 27670, sum loss: 5808.654785, avg loss: 3.428958, ppl: 30.844484 +epoch: 0, batch: 27671, sum loss: 4319.414062, avg loss: 2.779546, ppl: 16.111702 +epoch: 0, batch: 27672, sum loss: 5961.733398, avg loss: 3.146034, ppl: 23.243687 +epoch: 0, batch: 27673, sum loss: 4329.339844, avg loss: 3.031751, ppl: 20.733498 +epoch: 0, batch: 27674, sum loss: 5562.144531, avg loss: 3.156722, ppl: 23.493465 +epoch: 0, batch: 27675, sum loss: 5273.546875, avg loss: 3.107570, ppl: 22.366636 +epoch: 0, batch: 27676, sum loss: 6810.051270, avg loss: 3.333358, ppl: 28.032324 +epoch: 0, batch: 27677, sum loss: 5367.454102, avg loss: 3.200629, ppl: 24.547955 +epoch: 0, batch: 27678, sum loss: 4470.252441, avg loss: 2.783470, ppl: 16.175045 +epoch: 0, batch: 27679, sum loss: 3962.090576, avg loss: 2.879426, ppl: 17.804054 +epoch: 0, batch: 27680, sum loss: 4918.350586, avg loss: 2.995342, ppl: 19.992188 +epoch: 0, batch: 27681, sum loss: 5160.904297, avg loss: 2.732083, ppl: 15.364857 +epoch: 0, batch: 27682, sum loss: 4915.808594, avg loss: 3.032578, ppl: 20.750654 +epoch: 0, batch: 27683, sum loss: 5665.292480, avg loss: 3.229927, ppl: 25.277819 +epoch: 0, batch: 27684, sum loss: 4704.564941, avg loss: 3.013815, ppl: 20.364943 +epoch: 0, batch: 27685, sum loss: 4900.875977, avg loss: 3.101820, ppl: 22.238394 +epoch: 0, batch: 27686, sum loss: 4618.707520, avg loss: 2.823171, ppl: 16.830133 +epoch: 0, batch: 27687, sum loss: 5266.092773, avg loss: 2.946890, ppl: 19.046633 +epoch: 0, batch: 27688, sum loss: 4984.057129, avg loss: 2.963173, ppl: 19.359304 +epoch: 0, batch: 27689, sum loss: 4847.336426, avg loss: 2.916568, ppl: 18.477768 +epoch: 0, batch: 27690, sum loss: 4887.714355, avg loss: 2.940863, ppl: 18.932180 +epoch: 0, batch: 27691, sum loss: 4907.170898, avg loss: 2.888270, ppl: 17.962210 +epoch: 0, batch: 27692, sum loss: 3919.388184, avg loss: 2.527007, ppl: 12.515991 +epoch: 0, batch: 27693, sum loss: 5239.666992, avg loss: 2.983865, ppl: 19.764063 +epoch: 0, batch: 27694, sum loss: 4877.266113, avg loss: 3.104562, ppl: 22.299440 +epoch: 0, batch: 27695, sum loss: 4282.514648, avg loss: 2.953459, ppl: 19.172148 +epoch: 0, batch: 27696, sum loss: 4187.315918, avg loss: 2.926147, ppl: 18.655607 +epoch: 0, batch: 27697, sum loss: 4757.562012, avg loss: 3.011115, ppl: 20.310040 +epoch: 0, batch: 27698, sum loss: 7123.894531, avg loss: 3.630935, ppl: 37.748093 +epoch: 0, batch: 27699, sum loss: 5430.900391, avg loss: 3.157500, ppl: 23.511749 +epoch: 0, batch: 27700, sum loss: 4701.840332, avg loss: 2.992897, ppl: 19.943365 +epoch: 0, batch: 27701, sum loss: 5637.171875, avg loss: 3.161622, ppl: 23.608860 +epoch: 0, batch: 27702, sum loss: 4878.045898, avg loss: 2.933281, ppl: 18.789171 +epoch: 0, batch: 27703, sum loss: 5674.057129, avg loss: 3.147009, ppl: 23.266369 +epoch: 0, batch: 27704, sum loss: 4761.336914, avg loss: 2.921066, ppl: 18.561054 +epoch: 0, batch: 27705, sum loss: 5627.093262, avg loss: 3.091810, ppl: 22.016882 +epoch: 0, batch: 27706, sum loss: 5336.168945, avg loss: 3.161238, ppl: 23.599787 +epoch: 0, batch: 27707, sum loss: 4252.024902, avg loss: 2.855624, ppl: 17.385290 +epoch: 0, batch: 27708, sum loss: 4775.603516, avg loss: 2.986619, ppl: 19.818558 +epoch: 0, batch: 27709, sum loss: 5074.734375, avg loss: 3.286745, ppl: 26.755640 +epoch: 0, batch: 27710, sum loss: 4906.375977, avg loss: 2.892910, ppl: 18.045755 +epoch: 0, batch: 27711, sum loss: 4733.169922, avg loss: 2.797382, ppl: 16.401646 +epoch: 0, batch: 27712, sum loss: 4073.243164, avg loss: 2.814957, ppl: 16.692465 +epoch: 0, batch: 27713, sum loss: 4961.233398, avg loss: 3.056829, ppl: 21.260035 +epoch: 0, batch: 27714, sum loss: 5605.626465, avg loss: 3.270494, ppl: 26.324341 +epoch: 0, batch: 27715, sum loss: 5235.801758, avg loss: 2.875234, ppl: 17.729578 +epoch: 0, batch: 27716, sum loss: 5767.661621, avg loss: 3.236623, ppl: 25.447636 +epoch: 0, batch: 27717, sum loss: 4984.013672, avg loss: 2.785922, ppl: 16.214754 +epoch: 0, batch: 27718, sum loss: 4642.460938, avg loss: 2.815319, ppl: 16.698500 +epoch: 0, batch: 27719, sum loss: 5241.336914, avg loss: 2.819439, ppl: 16.767437 +epoch: 0, batch: 27720, sum loss: 5318.994141, avg loss: 3.123308, ppl: 22.721428 +epoch: 0, batch: 27721, sum loss: 5214.643555, avg loss: 3.118806, ppl: 22.619358 +epoch: 0, batch: 27722, sum loss: 4675.625977, avg loss: 3.038094, ppl: 20.865427 +epoch: 0, batch: 27723, sum loss: 5108.847656, avg loss: 3.053705, ppl: 21.193712 +epoch: 0, batch: 27724, sum loss: 5522.334473, avg loss: 3.273464, ppl: 26.402651 +epoch: 0, batch: 27725, sum loss: 4108.539551, avg loss: 2.671352, ppl: 14.459507 +epoch: 0, batch: 27726, sum loss: 5595.716797, avg loss: 3.257111, ppl: 25.974390 +epoch: 0, batch: 27727, sum loss: 5198.561523, avg loss: 2.999747, ppl: 20.080452 +epoch: 0, batch: 27728, sum loss: 5173.438965, avg loss: 3.004320, ppl: 20.172493 +epoch: 0, batch: 27729, sum loss: 5507.627930, avg loss: 3.165303, ppl: 23.695934 +epoch: 0, batch: 27730, sum loss: 4453.244629, avg loss: 2.802545, ppl: 16.486557 +epoch: 0, batch: 27731, sum loss: 6217.714355, avg loss: 3.270760, ppl: 26.331339 +epoch: 0, batch: 27732, sum loss: 5311.227539, avg loss: 3.017743, ppl: 20.445093 +epoch: 0, batch: 27733, sum loss: 5399.411133, avg loss: 3.024880, ppl: 20.591541 +epoch: 0, batch: 27734, sum loss: 6226.145508, avg loss: 3.290775, ppl: 26.863665 +epoch: 0, batch: 27735, sum loss: 4912.984375, avg loss: 2.936631, ppl: 18.852234 +epoch: 0, batch: 27736, sum loss: 5945.996582, avg loss: 3.325501, ppl: 27.812943 +epoch: 0, batch: 27737, sum loss: 3966.161865, avg loss: 2.739062, ppl: 15.472466 +epoch: 0, batch: 27738, sum loss: 4514.541992, avg loss: 2.925821, ppl: 18.649536 +epoch: 0, batch: 27739, sum loss: 4800.674316, avg loss: 2.874655, ppl: 17.719315 +epoch: 0, batch: 27740, sum loss: 4545.736328, avg loss: 2.855362, ppl: 17.380728 +epoch: 0, batch: 27741, sum loss: 5405.057617, avg loss: 2.926399, ppl: 18.660305 +epoch: 0, batch: 27742, sum loss: 4991.136719, avg loss: 3.079048, ppl: 21.737696 +epoch: 0, batch: 27743, sum loss: 5044.156738, avg loss: 3.109838, ppl: 22.417402 +epoch: 0, batch: 27744, sum loss: 6235.007812, avg loss: 3.414572, ppl: 30.403927 +epoch: 0, batch: 27745, sum loss: 5441.412598, avg loss: 3.014633, ppl: 20.381609 +epoch: 0, batch: 27746, sum loss: 5146.496582, avg loss: 3.074371, ppl: 21.636265 +epoch: 0, batch: 27747, sum loss: 4808.085938, avg loss: 2.734975, ppl: 15.409356 +epoch: 0, batch: 27748, sum loss: 5256.909180, avg loss: 3.052793, ppl: 21.174398 +epoch: 0, batch: 27749, sum loss: 5770.519531, avg loss: 3.232784, ppl: 25.350140 +epoch: 0, batch: 27750, sum loss: 4387.005859, avg loss: 3.109147, ppl: 22.401918 +epoch: 0, batch: 27751, sum loss: 4863.449219, avg loss: 2.905287, ppl: 18.270479 +epoch: 0, batch: 27752, sum loss: 4463.252930, avg loss: 2.977487, ppl: 19.638405 +epoch: 0, batch: 27753, sum loss: 6031.543457, avg loss: 3.121917, ppl: 22.689835 +epoch: 0, batch: 27754, sum loss: 5337.271484, avg loss: 2.971755, ppl: 19.526154 +epoch: 0, batch: 27755, sum loss: 5402.584473, avg loss: 3.246746, ppl: 25.706543 +epoch: 0, batch: 27756, sum loss: 5323.546387, avg loss: 2.937939, ppl: 18.876909 +epoch: 0, batch: 27757, sum loss: 4820.417480, avg loss: 3.113965, ppl: 22.510117 +epoch: 0, batch: 27758, sum loss: 5077.485840, avg loss: 3.064264, ppl: 21.418694 +epoch: 0, batch: 27759, sum loss: 4619.086426, avg loss: 2.987766, ppl: 19.841312 +epoch: 0, batch: 27760, sum loss: 4390.300781, avg loss: 2.607067, ppl: 13.559225 +epoch: 0, batch: 27761, sum loss: 6407.083008, avg loss: 3.160870, ppl: 23.591103 +epoch: 0, batch: 27762, sum loss: 5457.005859, avg loss: 3.057146, ppl: 21.266781 +epoch: 0, batch: 27763, sum loss: 5239.552246, avg loss: 3.006054, ppl: 20.207506 +epoch: 0, batch: 27764, sum loss: 4852.398926, avg loss: 3.057592, ppl: 21.276270 +epoch: 0, batch: 27765, sum loss: 5690.297363, avg loss: 2.998049, ppl: 20.046398 +epoch: 0, batch: 27766, sum loss: 4751.467773, avg loss: 3.079370, ppl: 21.744699 +epoch: 0, batch: 27767, sum loss: 4962.039551, avg loss: 3.130624, ppl: 22.888264 +epoch: 0, batch: 27768, sum loss: 5233.877441, avg loss: 2.861606, ppl: 17.489595 +epoch: 0, batch: 27769, sum loss: 5126.304199, avg loss: 3.160483, ppl: 23.581991 +epoch: 0, batch: 27770, sum loss: 6846.736328, avg loss: 3.322046, ppl: 27.716995 +epoch: 0, batch: 27771, sum loss: 4407.507324, avg loss: 2.810910, ppl: 16.625044 +epoch: 0, batch: 27772, sum loss: 6055.155273, avg loss: 3.056615, ppl: 21.255493 +epoch: 0, batch: 27773, sum loss: 5425.528809, avg loss: 2.956692, ppl: 19.234230 +epoch: 0, batch: 27774, sum loss: 3929.493896, avg loss: 2.566619, ppl: 13.021726 +epoch: 0, batch: 27775, sum loss: 4887.896484, avg loss: 2.985887, ppl: 19.804056 +epoch: 0, batch: 27776, sum loss: 4973.025391, avg loss: 3.039747, ppl: 20.899950 +epoch: 0, batch: 27777, sum loss: 5300.006348, avg loss: 3.121323, ppl: 22.676363 +epoch: 0, batch: 27778, sum loss: 4662.323242, avg loss: 2.981025, ppl: 19.708006 +epoch: 0, batch: 27779, sum loss: 5424.987793, avg loss: 3.078881, ppl: 21.734064 +epoch: 0, batch: 27780, sum loss: 5353.953613, avg loss: 3.154952, ppl: 23.451912 +epoch: 0, batch: 27781, sum loss: 3880.700684, avg loss: 2.868219, ppl: 17.605637 +epoch: 0, batch: 27782, sum loss: 5639.496094, avg loss: 3.012552, ppl: 20.339230 +epoch: 0, batch: 27783, sum loss: 4676.800781, avg loss: 2.908458, ppl: 18.328518 +epoch: 0, batch: 27784, sum loss: 5028.576660, avg loss: 3.156671, ppl: 23.492256 +epoch: 0, batch: 27785, sum loss: 5534.047363, avg loss: 3.040685, ppl: 20.919577 +epoch: 0, batch: 27786, sum loss: 3729.097900, avg loss: 2.633544, ppl: 13.923022 +epoch: 0, batch: 27787, sum loss: 5339.749512, avg loss: 3.039129, ppl: 20.887037 +epoch: 0, batch: 27788, sum loss: 5282.420898, avg loss: 2.996268, ppl: 20.010723 +epoch: 0, batch: 27789, sum loss: 5140.930176, avg loss: 3.163649, ppl: 23.656769 +epoch: 0, batch: 27790, sum loss: 4361.038086, avg loss: 2.852216, ppl: 17.326134 +epoch: 0, batch: 27791, sum loss: 4101.190918, avg loss: 2.859966, ppl: 17.460930 +epoch: 0, batch: 27792, sum loss: 5042.071289, avg loss: 3.131721, ppl: 22.913385 +epoch: 0, batch: 27793, sum loss: 4556.171387, avg loss: 3.047606, ppl: 21.064859 +epoch: 0, batch: 27794, sum loss: 4883.951172, avg loss: 2.847785, ppl: 17.249527 +epoch: 0, batch: 27795, sum loss: 5544.265137, avg loss: 3.151942, ppl: 23.381416 +epoch: 0, batch: 27796, sum loss: 4486.122070, avg loss: 3.165930, ppl: 23.710773 +epoch: 0, batch: 27797, sum loss: 4736.056641, avg loss: 2.974910, ppl: 19.587860 +epoch: 0, batch: 27798, sum loss: 6214.860352, avg loss: 3.350329, ppl: 28.512110 +epoch: 0, batch: 27799, sum loss: 5137.675293, avg loss: 3.015068, ppl: 20.390474 +epoch: 0, batch: 27800, sum loss: 4743.626953, avg loss: 2.987171, ppl: 19.829500 +epoch: 0, batch: 27801, sum loss: 4858.922852, avg loss: 3.046347, ppl: 21.038343 +epoch: 0, batch: 27802, sum loss: 5955.750000, avg loss: 3.236820, ppl: 25.452665 +epoch: 0, batch: 27803, sum loss: 4508.525391, avg loss: 2.985779, ppl: 19.801914 +epoch: 0, batch: 27804, sum loss: 5724.036133, avg loss: 3.139899, ppl: 23.101540 +epoch: 0, batch: 27805, sum loss: 4817.482422, avg loss: 2.914387, ppl: 18.437515 +epoch: 0, batch: 27806, sum loss: 4895.148926, avg loss: 2.920733, ppl: 18.554892 +epoch: 0, batch: 27807, sum loss: 5354.297852, avg loss: 3.025027, ppl: 20.594561 +epoch: 0, batch: 27808, sum loss: 5493.979004, avg loss: 3.264396, ppl: 26.164309 +epoch: 0, batch: 27809, sum loss: 4472.939941, avg loss: 2.946601, ppl: 19.041117 +epoch: 0, batch: 27810, sum loss: 4738.275879, avg loss: 2.919455, ppl: 18.531191 +epoch: 0, batch: 27811, sum loss: 6261.559570, avg loss: 3.207766, ppl: 24.723799 +epoch: 0, batch: 27812, sum loss: 5391.458496, avg loss: 3.111055, ppl: 22.444708 +epoch: 0, batch: 27813, sum loss: 4780.677246, avg loss: 2.757023, ppl: 15.752871 +epoch: 0, batch: 27814, sum loss: 4605.889160, avg loss: 2.696656, ppl: 14.830060 +epoch: 0, batch: 27815, sum loss: 5231.090332, avg loss: 2.947093, ppl: 19.050493 +epoch: 0, batch: 27816, sum loss: 5620.553711, avg loss: 2.944240, ppl: 18.996218 +epoch: 0, batch: 27817, sum loss: 5301.324219, avg loss: 3.052000, ppl: 21.157623 +epoch: 0, batch: 27818, sum loss: 4246.496094, avg loss: 2.924584, ppl: 18.626478 +epoch: 0, batch: 27819, sum loss: 5223.675781, avg loss: 3.028218, ppl: 20.660383 +epoch: 0, batch: 27820, sum loss: 4189.862793, avg loss: 2.942319, ppl: 18.959770 +epoch: 0, batch: 27821, sum loss: 4424.302246, avg loss: 2.955446, ppl: 19.210279 +epoch: 0, batch: 27822, sum loss: 6638.414062, avg loss: 3.273380, ppl: 26.400417 +epoch: 0, batch: 27823, sum loss: 6384.860352, avg loss: 3.239401, ppl: 25.518440 +epoch: 0, batch: 27824, sum loss: 5225.440430, avg loss: 3.010046, ppl: 20.288338 +epoch: 0, batch: 27825, sum loss: 6177.901855, avg loss: 3.368540, ppl: 29.036091 +epoch: 0, batch: 27826, sum loss: 4651.306641, avg loss: 2.968288, ppl: 19.458572 +epoch: 0, batch: 27827, sum loss: 5441.309570, avg loss: 3.231182, ppl: 25.309546 +epoch: 0, batch: 27828, sum loss: 5740.479004, avg loss: 3.335548, ppl: 28.093786 +epoch: 0, batch: 27829, sum loss: 5197.466797, avg loss: 3.035903, ppl: 20.819778 +epoch: 0, batch: 27830, sum loss: 4390.539551, avg loss: 2.996955, ppl: 20.024477 +epoch: 0, batch: 27831, sum loss: 3898.734375, avg loss: 2.641419, ppl: 14.033101 +epoch: 0, batch: 27832, sum loss: 5691.837891, avg loss: 3.009962, ppl: 20.286625 +epoch: 0, batch: 27833, sum loss: 5753.881348, avg loss: 3.189513, ppl: 24.276602 +epoch: 0, batch: 27834, sum loss: 5841.177246, avg loss: 3.024949, ppl: 20.592960 +epoch: 0, batch: 27835, sum loss: 5156.196777, avg loss: 2.971871, ppl: 19.528431 +epoch: 0, batch: 27836, sum loss: 4056.739258, avg loss: 2.877120, ppl: 17.763046 +epoch: 0, batch: 27837, sum loss: 6692.098633, avg loss: 3.553956, ppl: 34.951298 +epoch: 0, batch: 27838, sum loss: 4711.152832, avg loss: 2.913515, ppl: 18.421429 +epoch: 0, batch: 27839, sum loss: 5772.477051, avg loss: 3.226650, ppl: 25.195112 +epoch: 0, batch: 27840, sum loss: 5165.453125, avg loss: 3.085695, ppl: 21.882671 +epoch: 0, batch: 27841, sum loss: 4274.645996, avg loss: 2.814118, ppl: 16.678465 +epoch: 0, batch: 27842, sum loss: 6365.075684, avg loss: 3.292848, ppl: 26.919430 +epoch: 0, batch: 27843, sum loss: 5021.100586, avg loss: 3.043091, ppl: 20.969967 +epoch: 0, batch: 27844, sum loss: 5356.249023, avg loss: 2.847554, ppl: 17.245546 +epoch: 0, batch: 27845, sum loss: 5346.115234, avg loss: 3.146625, ppl: 23.257433 +epoch: 0, batch: 27846, sum loss: 5509.643066, avg loss: 2.892201, ppl: 18.032959 +epoch: 0, batch: 27847, sum loss: 4922.934082, avg loss: 3.010969, ppl: 20.307062 +epoch: 0, batch: 27848, sum loss: 5392.151855, avg loss: 3.234644, ppl: 25.397333 +epoch: 0, batch: 27849, sum loss: 4128.917480, avg loss: 2.911790, ppl: 18.389681 +epoch: 0, batch: 27850, sum loss: 5466.718750, avg loss: 3.000395, ppl: 20.093464 +epoch: 0, batch: 27851, sum loss: 4791.116211, avg loss: 3.028518, ppl: 20.666586 +epoch: 0, batch: 27852, sum loss: 5251.650391, avg loss: 3.142819, ppl: 23.169092 +epoch: 0, batch: 27853, sum loss: 5351.481934, avg loss: 2.927506, ppl: 18.680990 +epoch: 0, batch: 27854, sum loss: 4594.772461, avg loss: 3.096208, ppl: 22.113934 +epoch: 0, batch: 27855, sum loss: 5331.552734, avg loss: 2.998624, ppl: 20.057911 +epoch: 0, batch: 27856, sum loss: 4553.331543, avg loss: 3.076576, ppl: 21.684019 +epoch: 0, batch: 27857, sum loss: 4425.683105, avg loss: 2.713478, ppl: 15.081643 +epoch: 0, batch: 27858, sum loss: 4275.788086, avg loss: 2.649187, ppl: 14.142541 +epoch: 0, batch: 27859, sum loss: 4936.782227, avg loss: 3.114689, ppl: 22.526426 +epoch: 0, batch: 27860, sum loss: 5276.742676, avg loss: 3.044860, ppl: 21.007093 +epoch: 0, batch: 27861, sum loss: 5464.092773, avg loss: 3.149333, ppl: 23.320505 +epoch: 0, batch: 27862, sum loss: 4751.505371, avg loss: 3.016829, ppl: 20.426413 +epoch: 0, batch: 27863, sum loss: 4822.173340, avg loss: 3.004469, ppl: 20.175508 +epoch: 0, batch: 27864, sum loss: 4886.089355, avg loss: 2.943427, ppl: 18.980789 +epoch: 0, batch: 27865, sum loss: 5031.722656, avg loss: 3.047682, ppl: 21.066456 +epoch: 0, batch: 27866, sum loss: 5404.633789, avg loss: 3.198008, ppl: 24.483711 +epoch: 0, batch: 27867, sum loss: 6309.129395, avg loss: 3.280878, ppl: 26.599125 +epoch: 0, batch: 27868, sum loss: 4821.628418, avg loss: 2.923971, ppl: 18.615065 +epoch: 0, batch: 27869, sum loss: 6029.909668, avg loss: 3.397132, ppl: 29.878284 +epoch: 0, batch: 27870, sum loss: 5618.268066, avg loss: 2.938425, ppl: 18.886070 +epoch: 0, batch: 27871, sum loss: 5377.408203, avg loss: 3.027820, ppl: 20.652164 +epoch: 0, batch: 27872, sum loss: 5655.273926, avg loss: 3.040470, ppl: 20.915068 +epoch: 0, batch: 27873, sum loss: 5486.777832, avg loss: 2.983566, ppl: 19.758150 +epoch: 0, batch: 27874, sum loss: 4808.998535, avg loss: 3.020728, ppl: 20.506207 +epoch: 0, batch: 27875, sum loss: 4729.139160, avg loss: 2.818319, ppl: 16.748671 +epoch: 0, batch: 27876, sum loss: 4621.696289, avg loss: 2.881357, ppl: 17.838463 +epoch: 0, batch: 27877, sum loss: 6025.711426, avg loss: 2.923683, ppl: 18.609709 +epoch: 0, batch: 27878, sum loss: 4723.482910, avg loss: 3.047408, ppl: 21.060692 +epoch: 0, batch: 27879, sum loss: 4588.085938, avg loss: 2.811327, ppl: 16.631977 +epoch: 0, batch: 27880, sum loss: 5689.019531, avg loss: 3.100283, ppl: 22.204243 +epoch: 0, batch: 27881, sum loss: 5338.204102, avg loss: 3.351038, ppl: 28.532349 +epoch: 0, batch: 27882, sum loss: 4349.227051, avg loss: 2.874572, ppl: 17.717836 +epoch: 0, batch: 27883, sum loss: 4814.991211, avg loss: 3.059079, ppl: 21.307932 +epoch: 0, batch: 27884, sum loss: 5013.943848, avg loss: 2.904950, ppl: 18.264328 +epoch: 0, batch: 27885, sum loss: 5717.203613, avg loss: 3.129285, ppl: 22.857628 +epoch: 0, batch: 27886, sum loss: 4846.643555, avg loss: 2.867836, ppl: 17.598902 +epoch: 0, batch: 27887, sum loss: 4237.545410, avg loss: 2.798907, ppl: 16.426687 +epoch: 0, batch: 27888, sum loss: 4919.295898, avg loss: 3.086133, ppl: 21.892258 +epoch: 0, batch: 27889, sum loss: 5008.450684, avg loss: 2.844095, ppl: 17.185989 +epoch: 0, batch: 27890, sum loss: 5085.707031, avg loss: 2.995116, ppl: 19.987682 +epoch: 0, batch: 27891, sum loss: 5948.885742, avg loss: 3.306774, ppl: 27.296915 +epoch: 0, batch: 27892, sum loss: 4912.077637, avg loss: 3.058579, ppl: 21.297266 +epoch: 0, batch: 27893, sum loss: 5982.896973, avg loss: 3.289113, ppl: 26.819073 +epoch: 0, batch: 27894, sum loss: 4460.116211, avg loss: 2.778888, ppl: 16.101114 +epoch: 0, batch: 27895, sum loss: 5695.491211, avg loss: 3.228737, ppl: 25.247740 +epoch: 0, batch: 27896, sum loss: 6089.886230, avg loss: 3.275893, ppl: 26.466862 +epoch: 0, batch: 27897, sum loss: 4347.716797, avg loss: 2.836084, ppl: 17.048874 +epoch: 0, batch: 27898, sum loss: 5334.728027, avg loss: 3.319681, ppl: 27.651539 +epoch: 0, batch: 27899, sum loss: 3550.886230, avg loss: 2.561967, ppl: 12.961286 +epoch: 0, batch: 27900, sum loss: 4882.001953, avg loss: 3.019173, ppl: 20.474346 +epoch: 0, batch: 27901, sum loss: 4906.648438, avg loss: 3.087884, ppl: 21.930634 +epoch: 0, batch: 27902, sum loss: 4517.147461, avg loss: 2.754358, ppl: 15.710956 +epoch: 0, batch: 27903, sum loss: 5876.220703, avg loss: 2.998072, ppl: 20.046843 +epoch: 0, batch: 27904, sum loss: 5422.563477, avg loss: 3.229639, ppl: 25.270521 +epoch: 0, batch: 27905, sum loss: 4553.089844, avg loss: 2.928032, ppl: 18.690813 +epoch: 0, batch: 27906, sum loss: 4471.055176, avg loss: 3.100593, ppl: 22.211121 +epoch: 0, batch: 27907, sum loss: 5421.330078, avg loss: 3.211689, ppl: 24.820961 +epoch: 0, batch: 27908, sum loss: 5333.644531, avg loss: 3.074147, ppl: 21.631416 +epoch: 0, batch: 27909, sum loss: 4882.090332, avg loss: 2.993311, ppl: 19.951637 +epoch: 0, batch: 27910, sum loss: 4943.194824, avg loss: 3.051355, ppl: 21.143972 +epoch: 0, batch: 27911, sum loss: 6517.760742, avg loss: 3.236227, ppl: 25.437559 +epoch: 0, batch: 27912, sum loss: 5724.865234, avg loss: 3.145530, ppl: 23.231997 +epoch: 0, batch: 27913, sum loss: 4115.469727, avg loss: 2.828501, ppl: 16.920086 +epoch: 0, batch: 27914, sum loss: 4584.587891, avg loss: 2.953987, ppl: 19.182283 +epoch: 0, batch: 27915, sum loss: 5595.439941, avg loss: 3.228759, ppl: 25.248312 +epoch: 0, batch: 27916, sum loss: 4396.437500, avg loss: 2.763317, ppl: 15.852340 +epoch: 0, batch: 27917, sum loss: 5338.751953, avg loss: 3.164643, ppl: 23.680279 +epoch: 0, batch: 27918, sum loss: 4732.283691, avg loss: 2.899684, ppl: 18.168398 +epoch: 0, batch: 27919, sum loss: 5372.038574, avg loss: 3.160023, ppl: 23.571138 +epoch: 0, batch: 27920, sum loss: 5080.152344, avg loss: 3.016718, ppl: 20.424143 +epoch: 0, batch: 27921, sum loss: 5234.218750, avg loss: 3.077142, ppl: 21.696306 +epoch: 0, batch: 27922, sum loss: 4496.391113, avg loss: 2.910286, ppl: 18.362043 +epoch: 0, batch: 27923, sum loss: 6566.219238, avg loss: 3.377685, ppl: 29.302858 +epoch: 0, batch: 27924, sum loss: 5756.608398, avg loss: 3.111680, ppl: 22.458750 +epoch: 0, batch: 27925, sum loss: 4994.576172, avg loss: 2.994350, ppl: 19.972378 +epoch: 0, batch: 27926, sum loss: 5305.503906, avg loss: 3.079225, ppl: 21.741537 +epoch: 0, batch: 27927, sum loss: 5299.530273, avg loss: 3.088305, ppl: 21.939848 +epoch: 0, batch: 27928, sum loss: 5767.988281, avg loss: 3.042188, ppl: 20.951038 +epoch: 0, batch: 27929, sum loss: 5415.750488, avg loss: 3.258574, ppl: 26.012430 +epoch: 0, batch: 27930, sum loss: 6145.895996, avg loss: 3.472258, ppl: 32.209377 +epoch: 0, batch: 27931, sum loss: 4870.951660, avg loss: 3.114419, ppl: 22.520348 +epoch: 0, batch: 27932, sum loss: 5325.094238, avg loss: 3.134252, ppl: 22.971455 +epoch: 0, batch: 27933, sum loss: 5869.720703, avg loss: 3.020958, ppl: 20.510925 +epoch: 0, batch: 27934, sum loss: 5142.690430, avg loss: 3.037620, ppl: 20.855549 +epoch: 0, batch: 27935, sum loss: 4346.007812, avg loss: 2.891555, ppl: 18.021317 +epoch: 0, batch: 27936, sum loss: 4666.351074, avg loss: 2.868071, ppl: 17.603022 +epoch: 0, batch: 27937, sum loss: 6149.595703, avg loss: 3.336731, ppl: 28.127035 +epoch: 0, batch: 27938, sum loss: 7246.945801, avg loss: 3.485785, ppl: 32.648029 +epoch: 0, batch: 27939, sum loss: 5086.713867, avg loss: 3.197180, ppl: 24.463453 +epoch: 0, batch: 27940, sum loss: 5618.120117, avg loss: 3.236244, ppl: 25.438002 +epoch: 0, batch: 27941, sum loss: 4934.951172, avg loss: 2.994509, ppl: 19.975554 +epoch: 0, batch: 27942, sum loss: 5085.601074, avg loss: 2.914385, ppl: 18.437462 +epoch: 0, batch: 27943, sum loss: 5645.939941, avg loss: 2.998375, ppl: 20.052923 +epoch: 0, batch: 27944, sum loss: 5527.104492, avg loss: 3.119133, ppl: 22.626764 +epoch: 0, batch: 27945, sum loss: 5511.970703, avg loss: 3.043606, ppl: 20.980770 +epoch: 0, batch: 27946, sum loss: 5141.998047, avg loss: 2.918274, ppl: 18.509308 +epoch: 0, batch: 27947, sum loss: 5184.763672, avg loss: 2.920994, ppl: 18.559719 +epoch: 0, batch: 27948, sum loss: 5256.956055, avg loss: 3.161128, ppl: 23.597200 +epoch: 0, batch: 27949, sum loss: 4416.957520, avg loss: 2.718128, ppl: 15.151927 +epoch: 0, batch: 27950, sum loss: 4473.899902, avg loss: 2.941420, ppl: 18.942728 +epoch: 0, batch: 27951, sum loss: 4394.433105, avg loss: 2.870303, ppl: 17.642357 +epoch: 0, batch: 27952, sum loss: 5496.360352, avg loss: 2.874665, ppl: 17.719496 +epoch: 0, batch: 27953, sum loss: 5212.867188, avg loss: 2.968603, ppl: 19.464716 +epoch: 0, batch: 27954, sum loss: 4329.373535, avg loss: 2.827808, ppl: 16.908356 +epoch: 0, batch: 27955, sum loss: 5211.657715, avg loss: 3.189509, ppl: 24.276508 +epoch: 0, batch: 27956, sum loss: 6639.834473, avg loss: 3.434989, ppl: 31.031086 +epoch: 0, batch: 27957, sum loss: 4606.680664, avg loss: 3.081392, ppl: 21.788702 +epoch: 0, batch: 27958, sum loss: 5361.824707, avg loss: 3.203002, ppl: 24.606281 +epoch: 0, batch: 27959, sum loss: 5723.625000, avg loss: 3.327689, ppl: 27.873848 +epoch: 0, batch: 27960, sum loss: 5730.346680, avg loss: 3.306605, ppl: 27.292313 +epoch: 0, batch: 27961, sum loss: 5119.846680, avg loss: 2.997568, ppl: 20.036751 +epoch: 0, batch: 27962, sum loss: 4805.640625, avg loss: 3.028129, ppl: 20.658546 +epoch: 0, batch: 27963, sum loss: 5711.612305, avg loss: 3.126225, ppl: 22.787783 +epoch: 0, batch: 27964, sum loss: 4871.080078, avg loss: 3.065500, ppl: 21.445187 +epoch: 0, batch: 27965, sum loss: 5543.692871, avg loss: 3.022733, ppl: 20.547375 +epoch: 0, batch: 27966, sum loss: 5341.909180, avg loss: 3.275236, ppl: 26.449457 +epoch: 0, batch: 27967, sum loss: 4564.928223, avg loss: 2.804010, ppl: 16.510721 +epoch: 0, batch: 27968, sum loss: 5501.946777, avg loss: 3.023048, ppl: 20.553843 +epoch: 0, batch: 27969, sum loss: 5304.728027, avg loss: 3.038218, ppl: 20.868019 +epoch: 0, batch: 27970, sum loss: 5157.783203, avg loss: 3.086645, ppl: 21.903465 +epoch: 0, batch: 27971, sum loss: 5491.913574, avg loss: 2.872340, ppl: 17.678331 +epoch: 0, batch: 27972, sum loss: 5795.888184, avg loss: 3.245178, ppl: 25.666285 +epoch: 0, batch: 27973, sum loss: 5132.848145, avg loss: 3.068050, ppl: 21.499945 +epoch: 0, batch: 27974, sum loss: 6030.098145, avg loss: 3.495709, ppl: 32.973663 +epoch: 0, batch: 27975, sum loss: 6623.334961, avg loss: 3.345119, ppl: 28.363937 +epoch: 0, batch: 27976, sum loss: 3862.795166, avg loss: 2.987467, ppl: 19.835377 +epoch: 0, batch: 27977, sum loss: 5080.415039, avg loss: 3.181224, ppl: 24.076214 +epoch: 0, batch: 27978, sum loss: 5366.062500, avg loss: 2.872624, ppl: 17.683365 +epoch: 0, batch: 27979, sum loss: 5543.566895, avg loss: 3.040904, ppl: 20.924141 +epoch: 0, batch: 27980, sum loss: 5554.224121, avg loss: 3.012052, ppl: 20.329073 +epoch: 0, batch: 27981, sum loss: 4870.485352, avg loss: 2.957186, ppl: 19.243744 +epoch: 0, batch: 27982, sum loss: 3929.875000, avg loss: 2.748164, ppl: 15.613945 +epoch: 0, batch: 27983, sum loss: 4522.911133, avg loss: 2.963900, ppl: 19.373379 +epoch: 0, batch: 27984, sum loss: 4710.958496, avg loss: 2.966599, ppl: 19.425735 +epoch: 0, batch: 27985, sum loss: 6386.704590, avg loss: 3.094334, ppl: 22.072525 +epoch: 0, batch: 27986, sum loss: 5330.274414, avg loss: 3.066901, ppl: 21.475252 +epoch: 0, batch: 27987, sum loss: 5188.534668, avg loss: 2.997420, ppl: 20.033789 +epoch: 0, batch: 27988, sum loss: 5696.831055, avg loss: 3.057880, ppl: 21.282394 +epoch: 0, batch: 27989, sum loss: 5464.938477, avg loss: 3.224153, ppl: 25.132267 +epoch: 0, batch: 27990, sum loss: 4755.125488, avg loss: 2.937076, ppl: 18.860609 +epoch: 0, batch: 27991, sum loss: 5722.154297, avg loss: 3.218310, ppl: 24.985849 +epoch: 0, batch: 27992, sum loss: 4957.134766, avg loss: 3.006146, ppl: 20.209362 +epoch: 0, batch: 27993, sum loss: 5460.837891, avg loss: 3.018706, ppl: 20.464785 +epoch: 0, batch: 27994, sum loss: 4953.504883, avg loss: 3.025965, ppl: 20.613892 +epoch: 0, batch: 27995, sum loss: 4990.427734, avg loss: 2.940735, ppl: 18.929762 +epoch: 0, batch: 27996, sum loss: 4341.430176, avg loss: 2.953354, ppl: 19.170141 +epoch: 0, batch: 27997, sum loss: 4663.313477, avg loss: 2.833119, ppl: 16.998394 +epoch: 0, batch: 27998, sum loss: 5981.050293, avg loss: 3.054673, ppl: 21.214256 +epoch: 0, batch: 27999, sum loss: 5551.701660, avg loss: 3.045366, ppl: 21.017717 +epoch: 0, batch: 28000, sum loss: 5357.151367, avg loss: 3.238907, ppl: 25.505819 +epoch: 0, batch: 28001, sum loss: 6299.541016, avg loss: 3.098643, ppl: 22.167845 +epoch: 0, batch: 28002, sum loss: 5055.875000, avg loss: 3.097963, ppl: 22.152771 +epoch: 0, batch: 28003, sum loss: 4107.540039, avg loss: 2.852458, ppl: 17.330336 +epoch: 0, batch: 28004, sum loss: 5683.444824, avg loss: 3.104012, ppl: 22.287178 +epoch: 0, batch: 28005, sum loss: 4215.338379, avg loss: 2.751526, ppl: 15.666527 +epoch: 0, batch: 28006, sum loss: 4626.036133, avg loss: 2.926019, ppl: 18.653223 +epoch: 0, batch: 28007, sum loss: 6270.187988, avg loss: 3.255549, ppl: 25.933859 +epoch: 0, batch: 28008, sum loss: 4980.733887, avg loss: 3.080231, ppl: 21.763439 +epoch: 0, batch: 28009, sum loss: 4065.954346, avg loss: 2.725171, ppl: 15.259017 +epoch: 0, batch: 28010, sum loss: 4387.175781, avg loss: 2.954327, ppl: 19.188807 +epoch: 0, batch: 28011, sum loss: 5151.190918, avg loss: 3.017687, ppl: 20.443941 +epoch: 0, batch: 28012, sum loss: 5031.206543, avg loss: 2.903177, ppl: 18.231981 +epoch: 0, batch: 28013, sum loss: 5405.042969, avg loss: 3.301798, ppl: 27.161428 +epoch: 0, batch: 28014, sum loss: 5258.218262, avg loss: 2.982540, ppl: 19.737890 +epoch: 0, batch: 28015, sum loss: 5651.558105, avg loss: 3.310813, ppl: 27.407404 +epoch: 0, batch: 28016, sum loss: 6601.418945, avg loss: 3.263183, ppl: 26.132576 +epoch: 0, batch: 28017, sum loss: 5295.582031, avg loss: 3.207500, ppl: 24.717215 +epoch: 0, batch: 28018, sum loss: 4942.932129, avg loss: 2.844035, ppl: 17.184961 +epoch: 0, batch: 28019, sum loss: 3972.295898, avg loss: 2.730100, ppl: 15.334423 +epoch: 0, batch: 28020, sum loss: 5785.289062, avg loss: 3.296461, ppl: 27.016853 +epoch: 0, batch: 28021, sum loss: 5129.044922, avg loss: 2.912575, ppl: 18.404129 +epoch: 0, batch: 28022, sum loss: 5014.650879, avg loss: 2.951531, ppl: 19.135225 +epoch: 0, batch: 28023, sum loss: 5438.494141, avg loss: 2.882085, ppl: 17.851452 +epoch: 0, batch: 28024, sum loss: 5450.606934, avg loss: 3.026433, ppl: 20.623547 +epoch: 0, batch: 28025, sum loss: 4528.732910, avg loss: 2.999161, ppl: 20.068693 +epoch: 0, batch: 28026, sum loss: 6241.167480, avg loss: 3.092749, ppl: 22.037579 +epoch: 0, batch: 28027, sum loss: 4291.764160, avg loss: 2.968025, ppl: 19.453461 +epoch: 0, batch: 28028, sum loss: 4934.385742, avg loss: 3.140920, ppl: 23.125141 +epoch: 0, batch: 28029, sum loss: 5527.209961, avg loss: 3.151203, ppl: 23.364153 +epoch: 0, batch: 28030, sum loss: 5166.880371, avg loss: 2.912559, ppl: 18.403839 +epoch: 0, batch: 28031, sum loss: 3854.719727, avg loss: 2.878805, ppl: 17.792997 +epoch: 0, batch: 28032, sum loss: 4500.593750, avg loss: 2.974616, ppl: 19.582104 +epoch: 0, batch: 28033, sum loss: 6421.689453, avg loss: 3.266373, ppl: 26.216085 +epoch: 0, batch: 28034, sum loss: 5911.249023, avg loss: 3.144281, ppl: 23.202995 +epoch: 0, batch: 28035, sum loss: 4616.224609, avg loss: 3.007312, ppl: 20.232946 +epoch: 0, batch: 28036, sum loss: 4850.508789, avg loss: 3.208009, ppl: 24.729788 +epoch: 0, batch: 28037, sum loss: 5193.265137, avg loss: 3.029910, ppl: 20.695362 +epoch: 0, batch: 28038, sum loss: 5356.608887, avg loss: 3.069690, ppl: 21.535221 +epoch: 0, batch: 28039, sum loss: 4757.470215, avg loss: 2.883315, ppl: 17.873432 +epoch: 0, batch: 28040, sum loss: 4541.581543, avg loss: 2.939535, ppl: 18.907055 +epoch: 0, batch: 28041, sum loss: 4442.403809, avg loss: 3.017937, ppl: 20.449070 +epoch: 0, batch: 28042, sum loss: 5052.806641, avg loss: 2.951406, ppl: 19.132832 +epoch: 0, batch: 28043, sum loss: 5314.087402, avg loss: 3.180184, ppl: 24.051182 +epoch: 0, batch: 28044, sum loss: 4136.386230, avg loss: 2.937774, ppl: 18.873795 +epoch: 0, batch: 28045, sum loss: 5693.457520, avg loss: 3.209390, ppl: 24.763968 +epoch: 0, batch: 28046, sum loss: 4533.043457, avg loss: 3.022029, ppl: 20.532909 +epoch: 0, batch: 28047, sum loss: 4988.673828, avg loss: 2.857202, ppl: 17.412731 +epoch: 0, batch: 28048, sum loss: 5563.195801, avg loss: 3.033367, ppl: 20.767040 +epoch: 0, batch: 28049, sum loss: 5589.374512, avg loss: 2.952654, ppl: 19.156731 +epoch: 0, batch: 28050, sum loss: 5628.088379, avg loss: 3.306750, ppl: 27.296263 +epoch: 0, batch: 28051, sum loss: 6253.282227, avg loss: 3.057840, ppl: 21.281532 +epoch: 0, batch: 28052, sum loss: 4776.561035, avg loss: 3.028891, ppl: 20.674294 +epoch: 0, batch: 28053, sum loss: 4258.431152, avg loss: 2.615744, ppl: 13.677387 +epoch: 0, batch: 28054, sum loss: 5087.155762, avg loss: 3.109509, ppl: 22.410027 +epoch: 0, batch: 28055, sum loss: 5929.532715, avg loss: 3.240182, ppl: 25.538368 +epoch: 0, batch: 28056, sum loss: 5629.274414, avg loss: 3.319148, ppl: 27.636782 +epoch: 0, batch: 28057, sum loss: 5815.950195, avg loss: 3.342500, ppl: 28.289768 +epoch: 0, batch: 28058, sum loss: 4753.773926, avg loss: 2.648342, ppl: 14.130592 +epoch: 0, batch: 28059, sum loss: 6130.104004, avg loss: 3.420817, ppl: 30.594404 +epoch: 0, batch: 28060, sum loss: 5251.954102, avg loss: 3.096671, ppl: 22.124180 +epoch: 0, batch: 28061, sum loss: 5698.755859, avg loss: 3.252715, ppl: 25.860447 +epoch: 0, batch: 28062, sum loss: 4492.158691, avg loss: 2.866726, ppl: 17.579363 +epoch: 0, batch: 28063, sum loss: 4889.069336, avg loss: 2.999429, ppl: 20.074072 +epoch: 0, batch: 28064, sum loss: 5368.391113, avg loss: 3.053692, ppl: 21.193453 +epoch: 0, batch: 28065, sum loss: 4934.541992, avg loss: 3.091818, ppl: 22.017076 +epoch: 0, batch: 28066, sum loss: 4869.260254, avg loss: 3.180444, ppl: 24.057444 +epoch: 0, batch: 28067, sum loss: 5719.451660, avg loss: 3.198798, ppl: 24.503071 +epoch: 0, batch: 28068, sum loss: 6366.037109, avg loss: 3.164035, ppl: 23.665886 +epoch: 0, batch: 28069, sum loss: 4822.788086, avg loss: 2.781308, ppl: 16.140118 +epoch: 0, batch: 28070, sum loss: 6062.600098, avg loss: 2.875996, ppl: 17.743090 +epoch: 0, batch: 28071, sum loss: 5008.625977, avg loss: 3.196316, ppl: 24.442307 +epoch: 0, batch: 28072, sum loss: 5481.085938, avg loss: 3.006630, ppl: 20.219145 +epoch: 0, batch: 28073, sum loss: 4226.982910, avg loss: 2.834999, ppl: 17.030380 +epoch: 0, batch: 28074, sum loss: 5191.105469, avg loss: 3.044637, ppl: 21.002399 +epoch: 0, batch: 28075, sum loss: 4941.437500, avg loss: 2.923928, ppl: 18.614252 +epoch: 0, batch: 28076, sum loss: 5498.817383, avg loss: 3.249892, ppl: 25.787554 +epoch: 0, batch: 28077, sum loss: 4720.275879, avg loss: 2.935495, ppl: 18.830820 +epoch: 0, batch: 28078, sum loss: 4695.898438, avg loss: 2.639628, ppl: 14.007994 +epoch: 0, batch: 28079, sum loss: 5965.890625, avg loss: 3.183506, ppl: 24.131210 +epoch: 0, batch: 28080, sum loss: 4821.299316, avg loss: 2.974275, ppl: 19.575418 +epoch: 0, batch: 28081, sum loss: 6335.127930, avg loss: 3.309889, ppl: 27.382088 +epoch: 0, batch: 28082, sum loss: 5637.043945, avg loss: 3.159778, ppl: 23.565367 +epoch: 0, batch: 28083, sum loss: 5050.177246, avg loss: 2.833994, ppl: 17.013279 +epoch: 0, batch: 28084, sum loss: 3848.453857, avg loss: 2.829746, ppl: 16.941149 +epoch: 0, batch: 28085, sum loss: 4944.155762, avg loss: 3.005566, ppl: 20.197638 +epoch: 0, batch: 28086, sum loss: 5055.725098, avg loss: 3.116970, ppl: 22.577862 +epoch: 0, batch: 28087, sum loss: 5615.292969, avg loss: 3.297295, ppl: 27.039394 +epoch: 0, batch: 28088, sum loss: 5020.330078, avg loss: 2.997212, ppl: 20.029619 +epoch: 0, batch: 28089, sum loss: 5902.636719, avg loss: 3.310509, ppl: 27.399061 +epoch: 0, batch: 28090, sum loss: 5177.070312, avg loss: 3.203633, ppl: 24.621809 +epoch: 0, batch: 28091, sum loss: 5426.663574, avg loss: 3.045266, ppl: 21.015619 +epoch: 0, batch: 28092, sum loss: 5306.706543, avg loss: 3.149381, ppl: 23.321617 +epoch: 0, batch: 28093, sum loss: 6169.675781, avg loss: 3.117572, ppl: 22.591469 +epoch: 0, batch: 28094, sum loss: 5224.833984, avg loss: 3.048328, ppl: 21.080067 +epoch: 0, batch: 28095, sum loss: 4752.232422, avg loss: 2.951697, ppl: 19.138407 +epoch: 0, batch: 28096, sum loss: 4934.389648, avg loss: 3.085922, ppl: 21.887644 +epoch: 0, batch: 28097, sum loss: 4894.524414, avg loss: 3.101727, ppl: 22.236309 +epoch: 0, batch: 28098, sum loss: 4459.895996, avg loss: 2.949667, ppl: 19.099588 +epoch: 0, batch: 28099, sum loss: 4874.609375, avg loss: 3.018334, ppl: 20.457180 +epoch: 0, batch: 28100, sum loss: 5652.407227, avg loss: 3.269177, ppl: 26.289700 +epoch: 0, batch: 28101, sum loss: 5761.451172, avg loss: 2.901033, ppl: 18.192928 +epoch: 0, batch: 28102, sum loss: 5399.062988, avg loss: 3.106480, ppl: 22.342270 +epoch: 0, batch: 28103, sum loss: 4214.318359, avg loss: 2.894449, ppl: 18.073549 +epoch: 0, batch: 28104, sum loss: 4917.947266, avg loss: 3.280819, ppl: 26.597540 +epoch: 0, batch: 28105, sum loss: 5151.785645, avg loss: 3.019804, ppl: 20.487276 +epoch: 0, batch: 28106, sum loss: 6325.414551, avg loss: 3.117504, ppl: 22.589916 +epoch: 0, batch: 28107, sum loss: 5143.092285, avg loss: 3.065013, ppl: 21.434744 +epoch: 0, batch: 28108, sum loss: 4405.823242, avg loss: 2.743352, ppl: 15.538983 +epoch: 0, batch: 28109, sum loss: 5453.760254, avg loss: 3.023149, ppl: 20.555916 +epoch: 0, batch: 28110, sum loss: 5297.792969, avg loss: 2.981313, ppl: 19.713678 +epoch: 0, batch: 28111, sum loss: 4598.264648, avg loss: 2.814116, ppl: 16.678417 +epoch: 0, batch: 28112, sum loss: 5546.532715, avg loss: 3.084835, ppl: 21.863850 +epoch: 0, batch: 28113, sum loss: 4967.511719, avg loss: 3.143995, ppl: 23.196346 +epoch: 0, batch: 28114, sum loss: 5499.245605, avg loss: 3.094680, ppl: 22.080162 +epoch: 0, batch: 28115, sum loss: 5396.299805, avg loss: 3.043598, ppl: 20.980604 +epoch: 0, batch: 28116, sum loss: 5321.261719, avg loss: 3.014879, ppl: 20.386629 +epoch: 0, batch: 28117, sum loss: 6291.931641, avg loss: 3.313287, ppl: 27.475283 +epoch: 0, batch: 28118, sum loss: 5356.047852, avg loss: 2.885802, ppl: 17.917925 +epoch: 0, batch: 28119, sum loss: 5243.910156, avg loss: 3.050559, ppl: 21.127148 +epoch: 0, batch: 28120, sum loss: 5158.648438, avg loss: 2.889999, ppl: 17.993294 +epoch: 0, batch: 28121, sum loss: 5025.936523, avg loss: 3.187024, ppl: 24.216249 +epoch: 0, batch: 28122, sum loss: 5281.595703, avg loss: 3.031915, ppl: 20.736900 +epoch: 0, batch: 28123, sum loss: 4919.758789, avg loss: 2.875371, ppl: 17.731997 +epoch: 0, batch: 28124, sum loss: 4708.453613, avg loss: 2.799318, ppl: 16.433441 +epoch: 0, batch: 28125, sum loss: 5344.724609, avg loss: 3.078758, ppl: 21.731405 +epoch: 0, batch: 28126, sum loss: 5912.667480, avg loss: 3.136694, ppl: 23.027605 +epoch: 0, batch: 28127, sum loss: 5631.114258, avg loss: 3.285364, ppl: 26.718712 +epoch: 0, batch: 28128, sum loss: 4610.890137, avg loss: 2.871040, ppl: 17.655369 +epoch: 0, batch: 28129, sum loss: 4655.368164, avg loss: 2.926064, ppl: 18.654068 +epoch: 0, batch: 28130, sum loss: 5293.220703, avg loss: 2.945588, ppl: 19.021837 +epoch: 0, batch: 28131, sum loss: 4908.418945, avg loss: 2.858718, ppl: 17.439157 +epoch: 0, batch: 28132, sum loss: 5463.480957, avg loss: 2.966059, ppl: 19.415257 +epoch: 0, batch: 28133, sum loss: 5626.240234, avg loss: 3.295981, ppl: 27.003902 +epoch: 0, batch: 28134, sum loss: 5251.895996, avg loss: 3.056983, ppl: 21.263298 +epoch: 0, batch: 28135, sum loss: 4375.590332, avg loss: 2.850547, ppl: 17.297247 +epoch: 0, batch: 28136, sum loss: 4938.164551, avg loss: 2.985589, ppl: 19.798164 +epoch: 0, batch: 28137, sum loss: 5043.867188, avg loss: 3.127010, ppl: 22.805691 +epoch: 0, batch: 28138, sum loss: 5454.055664, avg loss: 3.281622, ppl: 26.618912 +epoch: 0, batch: 28139, sum loss: 5028.557129, avg loss: 3.005713, ppl: 20.200609 +epoch: 0, batch: 28140, sum loss: 5202.104492, avg loss: 2.810429, ppl: 16.617050 +epoch: 0, batch: 28141, sum loss: 6048.948242, avg loss: 3.098847, ppl: 22.172365 +epoch: 0, batch: 28142, sum loss: 4371.633789, avg loss: 2.900885, ppl: 18.190239 +epoch: 0, batch: 28143, sum loss: 5012.222168, avg loss: 2.836572, ppl: 17.057188 +epoch: 0, batch: 28144, sum loss: 5767.306152, avg loss: 3.085771, ppl: 21.884335 +epoch: 0, batch: 28145, sum loss: 4165.415039, avg loss: 2.916957, ppl: 18.484959 +epoch: 0, batch: 28146, sum loss: 6005.825684, avg loss: 3.187806, ppl: 24.235189 +epoch: 0, batch: 28147, sum loss: 5350.354980, avg loss: 3.050373, ppl: 21.123228 +epoch: 0, batch: 28148, sum loss: 5769.128906, avg loss: 3.164635, ppl: 23.680092 +epoch: 0, batch: 28149, sum loss: 6079.489746, avg loss: 3.092314, ppl: 22.027998 +epoch: 0, batch: 28150, sum loss: 5473.199707, avg loss: 3.343433, ppl: 28.316174 +epoch: 0, batch: 28151, sum loss: 5942.772949, avg loss: 3.198478, ppl: 24.495232 +epoch: 0, batch: 28152, sum loss: 5407.924805, avg loss: 3.002734, ppl: 20.140535 +epoch: 0, batch: 28153, sum loss: 5482.767090, avg loss: 3.156458, ppl: 23.487255 +epoch: 0, batch: 28154, sum loss: 5471.643555, avg loss: 3.212944, ppl: 24.852144 +epoch: 0, batch: 28155, sum loss: 5149.625000, avg loss: 3.136191, ppl: 23.016024 +epoch: 0, batch: 28156, sum loss: 4896.416016, avg loss: 2.875171, ppl: 17.728455 +epoch: 0, batch: 28157, sum loss: 5274.719238, avg loss: 3.169904, ppl: 23.805187 +epoch: 0, batch: 28158, sum loss: 5188.804688, avg loss: 3.118272, ppl: 22.607283 +epoch: 0, batch: 28159, sum loss: 5316.629395, avg loss: 3.121920, ppl: 22.689899 +epoch: 0, batch: 28160, sum loss: 5069.101074, avg loss: 2.872012, ppl: 17.672539 +epoch: 0, batch: 28161, sum loss: 4970.246582, avg loss: 2.906577, ppl: 18.294067 +epoch: 0, batch: 28162, sum loss: 4614.275391, avg loss: 2.896595, ppl: 18.112364 +epoch: 0, batch: 28163, sum loss: 4767.439941, avg loss: 3.056051, ppl: 21.243505 +epoch: 0, batch: 28164, sum loss: 4835.558594, avg loss: 2.946714, ppl: 19.043282 +epoch: 0, batch: 28165, sum loss: 5513.975098, avg loss: 2.978917, ppl: 19.666504 +epoch: 0, batch: 28166, sum loss: 4862.734863, avg loss: 2.753531, ppl: 15.697956 +epoch: 0, batch: 28167, sum loss: 5568.523926, avg loss: 3.250744, ppl: 25.809525 +epoch: 0, batch: 28168, sum loss: 5563.221191, avg loss: 3.137745, ppl: 23.051819 +epoch: 0, batch: 28169, sum loss: 5473.116211, avg loss: 3.104434, ppl: 22.296585 +epoch: 0, batch: 28170, sum loss: 5836.773926, avg loss: 2.958324, ppl: 19.265663 +epoch: 0, batch: 28171, sum loss: 4333.650391, avg loss: 2.826908, ppl: 16.893152 +epoch: 0, batch: 28172, sum loss: 5903.990234, avg loss: 3.267288, ppl: 26.240086 +epoch: 0, batch: 28173, sum loss: 5256.611328, avg loss: 3.343900, ppl: 28.329409 +epoch: 0, batch: 28174, sum loss: 5622.251953, avg loss: 3.140923, ppl: 23.125196 +epoch: 0, batch: 28175, sum loss: 5256.106934, avg loss: 3.034704, ppl: 20.794817 +epoch: 0, batch: 28176, sum loss: 5661.556641, avg loss: 3.291603, ppl: 26.885918 +epoch: 0, batch: 28177, sum loss: 6215.400879, avg loss: 3.139091, ppl: 23.082876 +epoch: 0, batch: 28178, sum loss: 4611.620605, avg loss: 2.878664, ppl: 17.790499 +epoch: 0, batch: 28179, sum loss: 4252.265625, avg loss: 2.836735, ppl: 17.059975 +epoch: 0, batch: 28180, sum loss: 4553.586914, avg loss: 2.920838, ppl: 18.556839 +epoch: 0, batch: 28181, sum loss: 5968.774414, avg loss: 3.176570, ppl: 23.964407 +epoch: 0, batch: 28182, sum loss: 5385.816406, avg loss: 2.908108, ppl: 18.322105 +epoch: 0, batch: 28183, sum loss: 4363.955566, avg loss: 2.938690, ppl: 18.891090 +epoch: 0, batch: 28184, sum loss: 4814.598145, avg loss: 2.977488, ppl: 19.638424 +epoch: 0, batch: 28185, sum loss: 4237.704102, avg loss: 2.898566, ppl: 18.148111 +epoch: 0, batch: 28186, sum loss: 5078.655762, avg loss: 3.050244, ppl: 21.120495 +epoch: 0, batch: 28187, sum loss: 4663.196777, avg loss: 3.006574, ppl: 20.218018 +epoch: 0, batch: 28188, sum loss: 4735.357910, avg loss: 2.974471, ppl: 19.579266 +epoch: 0, batch: 28189, sum loss: 4753.998047, avg loss: 2.988057, ppl: 19.847075 +epoch: 0, batch: 28190, sum loss: 4732.016602, avg loss: 2.910219, ppl: 18.360823 +epoch: 0, batch: 28191, sum loss: 5717.036133, avg loss: 3.053972, ppl: 21.199392 +epoch: 0, batch: 28192, sum loss: 5283.214844, avg loss: 3.043327, ppl: 20.974903 +epoch: 0, batch: 28193, sum loss: 4419.412598, avg loss: 2.974033, ppl: 19.570683 +epoch: 0, batch: 28194, sum loss: 5372.937012, avg loss: 2.975048, ppl: 19.590570 +epoch: 0, batch: 28195, sum loss: 5151.619629, avg loss: 2.981261, ppl: 19.712667 +epoch: 0, batch: 28196, sum loss: 6389.963867, avg loss: 3.321187, ppl: 27.693203 +epoch: 0, batch: 28197, sum loss: 4015.304199, avg loss: 2.767267, ppl: 15.915074 +epoch: 0, batch: 28198, sum loss: 4833.833496, avg loss: 2.778065, ppl: 16.087864 +epoch: 0, batch: 28199, sum loss: 5513.259766, avg loss: 2.981752, ppl: 19.722343 +epoch: 0, batch: 28200, sum loss: 4485.786621, avg loss: 2.569179, ppl: 13.055102 +epoch: 0, batch: 28201, sum loss: 5370.082031, avg loss: 3.162593, ppl: 23.631786 +epoch: 0, batch: 28202, sum loss: 5182.554688, avg loss: 3.195163, ppl: 24.414154 +epoch: 0, batch: 28203, sum loss: 5437.771973, avg loss: 3.114417, ppl: 22.520298 +epoch: 0, batch: 28204, sum loss: 5516.674316, avg loss: 3.083664, ppl: 21.838264 +epoch: 0, batch: 28205, sum loss: 4793.630371, avg loss: 2.906992, ppl: 18.301672 +epoch: 0, batch: 28206, sum loss: 4735.734863, avg loss: 2.825618, ppl: 16.871361 +epoch: 0, batch: 28207, sum loss: 4539.911133, avg loss: 3.061302, ppl: 21.355349 +epoch: 0, batch: 28208, sum loss: 4534.641602, avg loss: 2.818298, ppl: 16.748322 +epoch: 0, batch: 28209, sum loss: 4782.950195, avg loss: 2.867476, ppl: 17.592562 +epoch: 0, batch: 28210, sum loss: 4723.296875, avg loss: 2.860870, ppl: 17.476732 +epoch: 0, batch: 28211, sum loss: 4305.041016, avg loss: 2.942612, ppl: 18.965319 +epoch: 0, batch: 28212, sum loss: 4576.294922, avg loss: 2.768478, ppl: 15.934370 +epoch: 0, batch: 28213, sum loss: 5516.313965, avg loss: 3.128936, ppl: 22.849649 +epoch: 0, batch: 28214, sum loss: 4674.544922, avg loss: 2.839942, ppl: 17.114780 +epoch: 0, batch: 28215, sum loss: 4943.435059, avg loss: 2.796061, ppl: 16.379992 +epoch: 0, batch: 28216, sum loss: 5797.719727, avg loss: 3.040231, ppl: 20.910063 +epoch: 0, batch: 28217, sum loss: 5737.018555, avg loss: 3.126441, ppl: 22.792711 +epoch: 0, batch: 28218, sum loss: 4383.343262, avg loss: 2.760292, ppl: 15.804454 +epoch: 0, batch: 28219, sum loss: 5150.002441, avg loss: 2.922817, ppl: 18.593582 +epoch: 0, batch: 28220, sum loss: 3972.104736, avg loss: 2.843310, ppl: 17.172518 +epoch: 0, batch: 28221, sum loss: 4778.072266, avg loss: 3.049185, ppl: 21.098139 +epoch: 0, batch: 28222, sum loss: 4390.426758, avg loss: 3.133781, ppl: 22.960623 +epoch: 0, batch: 28223, sum loss: 5247.029297, avg loss: 3.070234, ppl: 21.546936 +epoch: 0, batch: 28224, sum loss: 5229.019531, avg loss: 3.118080, ppl: 22.602932 +epoch: 0, batch: 28225, sum loss: 5020.588379, avg loss: 3.151656, ppl: 23.374750 +epoch: 0, batch: 28226, sum loss: 5837.519531, avg loss: 3.330017, ppl: 27.938812 +epoch: 0, batch: 28227, sum loss: 5043.803711, avg loss: 2.885471, ppl: 17.912008 +epoch: 0, batch: 28228, sum loss: 4596.169922, avg loss: 2.770446, ppl: 15.965755 +epoch: 0, batch: 28229, sum loss: 5468.940430, avg loss: 3.183318, ppl: 24.126677 +epoch: 0, batch: 28230, sum loss: 4134.786621, avg loss: 2.615298, ppl: 13.671293 +epoch: 0, batch: 28231, sum loss: 4983.209473, avg loss: 3.149943, ppl: 23.334726 +epoch: 0, batch: 28232, sum loss: 5991.998047, avg loss: 3.071245, ppl: 21.568733 +epoch: 0, batch: 28233, sum loss: 4886.771973, avg loss: 3.098778, ppl: 22.170832 +epoch: 0, batch: 28234, sum loss: 5682.857422, avg loss: 3.185458, ppl: 24.178370 +epoch: 0, batch: 28235, sum loss: 5015.563965, avg loss: 3.190562, ppl: 24.302088 +epoch: 0, batch: 28236, sum loss: 5193.732910, avg loss: 2.962769, ppl: 19.351473 +epoch: 0, batch: 28237, sum loss: 4801.778320, avg loss: 2.917241, ppl: 18.490200 +epoch: 0, batch: 28238, sum loss: 6357.374023, avg loss: 3.374403, ppl: 29.206829 +epoch: 0, batch: 28239, sum loss: 4334.695312, avg loss: 2.958836, ppl: 19.275528 +epoch: 0, batch: 28240, sum loss: 5799.837891, avg loss: 3.273046, ppl: 26.391613 +epoch: 0, batch: 28241, sum loss: 5580.760254, avg loss: 3.061306, ppl: 21.355425 +epoch: 0, batch: 28242, sum loss: 6038.218750, avg loss: 3.248100, ppl: 25.741392 +epoch: 0, batch: 28243, sum loss: 4535.892578, avg loss: 2.972407, ppl: 19.538891 +epoch: 0, batch: 28244, sum loss: 5722.463379, avg loss: 3.302056, ppl: 27.168442 +epoch: 0, batch: 28245, sum loss: 5276.693359, avg loss: 3.267303, ppl: 26.240461 +epoch: 0, batch: 28246, sum loss: 5364.535645, avg loss: 3.086614, ppl: 21.902798 +epoch: 0, batch: 28247, sum loss: 4255.325684, avg loss: 2.825582, ppl: 16.870754 +epoch: 0, batch: 28248, sum loss: 4383.155273, avg loss: 3.008343, ppl: 20.253801 +epoch: 0, batch: 28249, sum loss: 5737.112305, avg loss: 3.302886, ppl: 27.190992 +epoch: 0, batch: 28250, sum loss: 5832.326172, avg loss: 2.998626, ppl: 20.057949 +epoch: 0, batch: 28251, sum loss: 5990.084961, avg loss: 3.229156, ppl: 25.258337 +epoch: 0, batch: 28252, sum loss: 5014.694336, avg loss: 2.847640, ppl: 17.247032 +epoch: 0, batch: 28253, sum loss: 6223.888184, avg loss: 3.471215, ppl: 32.175812 +epoch: 0, batch: 28254, sum loss: 5739.918945, avg loss: 3.117827, ppl: 22.597216 +epoch: 0, batch: 28255, sum loss: 4826.176270, avg loss: 2.810819, ppl: 16.623529 +epoch: 0, batch: 28256, sum loss: 4891.407227, avg loss: 2.984385, ppl: 19.774338 +epoch: 0, batch: 28257, sum loss: 4910.352051, avg loss: 2.815569, ppl: 16.702671 +epoch: 0, batch: 28258, sum loss: 5214.812500, avg loss: 3.124513, ppl: 22.748823 +epoch: 0, batch: 28259, sum loss: 5964.923828, avg loss: 3.254187, ppl: 25.898540 +epoch: 0, batch: 28260, sum loss: 4661.142578, avg loss: 2.937078, ppl: 18.860655 +epoch: 0, batch: 28261, sum loss: 4696.681152, avg loss: 2.895611, ppl: 18.094555 +epoch: 0, batch: 28262, sum loss: 5214.941895, avg loss: 2.900413, ppl: 18.181644 +epoch: 0, batch: 28263, sum loss: 5329.399414, avg loss: 2.816807, ppl: 16.723372 +epoch: 0, batch: 28264, sum loss: 4442.542969, avg loss: 3.065937, ppl: 21.454561 +epoch: 0, batch: 28265, sum loss: 5451.197754, avg loss: 3.136477, ppl: 23.022627 +epoch: 0, batch: 28266, sum loss: 5605.043945, avg loss: 3.254961, ppl: 25.918617 +epoch: 0, batch: 28267, sum loss: 4743.795898, avg loss: 3.000503, ppl: 20.095648 +epoch: 0, batch: 28268, sum loss: 4708.994629, avg loss: 3.145621, ppl: 23.234095 +epoch: 0, batch: 28269, sum loss: 4823.514648, avg loss: 3.001565, ppl: 20.116995 +epoch: 0, batch: 28270, sum loss: 5395.968262, avg loss: 3.094018, ppl: 22.065569 +epoch: 0, batch: 28271, sum loss: 5128.841309, avg loss: 3.219612, ppl: 25.018402 +epoch: 0, batch: 28272, sum loss: 5151.712891, avg loss: 3.041153, ppl: 20.929356 +epoch: 0, batch: 28273, sum loss: 5648.668945, avg loss: 3.009413, ppl: 20.275505 +epoch: 0, batch: 28274, sum loss: 5752.568359, avg loss: 2.975980, ppl: 19.608822 +epoch: 0, batch: 28275, sum loss: 4400.426270, avg loss: 2.862997, ppl: 17.513939 +epoch: 0, batch: 28276, sum loss: 4546.827637, avg loss: 2.734112, ppl: 15.396059 +epoch: 0, batch: 28277, sum loss: 4729.346191, avg loss: 2.897884, ppl: 18.135727 +epoch: 0, batch: 28278, sum loss: 5172.199707, avg loss: 3.300702, ppl: 27.131681 +epoch: 0, batch: 28279, sum loss: 5090.355469, avg loss: 2.920456, ppl: 18.549751 +epoch: 0, batch: 28280, sum loss: 6070.458984, avg loss: 3.515031, ppl: 33.616982 +epoch: 0, batch: 28281, sum loss: 3807.594238, avg loss: 2.967727, ppl: 19.447674 +epoch: 0, batch: 28282, sum loss: 5664.324219, avg loss: 2.987513, ppl: 19.836290 +epoch: 0, batch: 28283, sum loss: 5443.177734, avg loss: 3.151811, ppl: 23.378366 +epoch: 0, batch: 28284, sum loss: 4234.785156, avg loss: 3.113813, ppl: 22.506693 +epoch: 0, batch: 28285, sum loss: 4815.232910, avg loss: 2.930756, ppl: 18.741800 +epoch: 0, batch: 28286, sum loss: 5198.922363, avg loss: 3.022629, ppl: 20.545240 +epoch: 0, batch: 28287, sum loss: 5785.089355, avg loss: 2.957612, ppl: 19.251944 +epoch: 0, batch: 28288, sum loss: 4589.434082, avg loss: 3.187107, ppl: 24.218266 +epoch: 0, batch: 28289, sum loss: 5149.200195, avg loss: 3.302887, ppl: 27.191019 +epoch: 0, batch: 28290, sum loss: 4206.606445, avg loss: 2.844223, ppl: 17.188202 +epoch: 0, batch: 28291, sum loss: 5197.999512, avg loss: 2.921866, ppl: 18.575916 +epoch: 0, batch: 28292, sum loss: 5231.578125, avg loss: 3.130807, ppl: 22.892445 +epoch: 0, batch: 28293, sum loss: 6939.811035, avg loss: 3.289010, ppl: 26.816303 +epoch: 0, batch: 28294, sum loss: 6130.180176, avg loss: 3.048325, ppl: 21.079998 +epoch: 0, batch: 28295, sum loss: 4527.187500, avg loss: 2.903905, ppl: 18.245253 +epoch: 0, batch: 28296, sum loss: 4550.912109, avg loss: 2.871238, ppl: 17.658867 +epoch: 0, batch: 28297, sum loss: 5490.583496, avg loss: 3.081136, ppl: 21.783129 +epoch: 0, batch: 28298, sum loss: 5437.669434, avg loss: 3.125097, ppl: 22.762110 +epoch: 0, batch: 28299, sum loss: 4889.646484, avg loss: 2.958044, ppl: 19.260258 +epoch: 0, batch: 28300, sum loss: 5137.081543, avg loss: 3.032516, ppl: 20.749363 +epoch: 0, batch: 28301, sum loss: 4852.557617, avg loss: 2.811447, ppl: 16.633968 +epoch: 0, batch: 28302, sum loss: 5110.996094, avg loss: 2.797480, ppl: 16.403259 +epoch: 0, batch: 28303, sum loss: 4595.833008, avg loss: 2.773587, ppl: 16.015972 +epoch: 0, batch: 28304, sum loss: 5625.620605, avg loss: 3.037592, ppl: 20.854963 +epoch: 0, batch: 28305, sum loss: 5569.024414, avg loss: 3.011911, ppl: 20.326214 +epoch: 0, batch: 28306, sum loss: 4742.346680, avg loss: 2.982608, ppl: 19.739227 +epoch: 0, batch: 28307, sum loss: 5408.343262, avg loss: 2.984737, ppl: 19.781298 +epoch: 0, batch: 28308, sum loss: 4678.181152, avg loss: 2.866533, ppl: 17.575970 +epoch: 0, batch: 28309, sum loss: 4816.318848, avg loss: 2.885751, ppl: 17.917027 +epoch: 0, batch: 28310, sum loss: 6709.249512, avg loss: 3.196403, ppl: 24.444441 +epoch: 0, batch: 28311, sum loss: 5748.227051, avg loss: 3.144544, ppl: 23.209099 +epoch: 0, batch: 28312, sum loss: 4771.192383, avg loss: 2.879416, ppl: 17.803873 +epoch: 0, batch: 28313, sum loss: 4328.316895, avg loss: 2.781695, ppl: 16.146360 +epoch: 0, batch: 28314, sum loss: 4670.601562, avg loss: 2.956077, ppl: 19.222412 +epoch: 0, batch: 28315, sum loss: 4995.001953, avg loss: 3.000001, ppl: 20.085562 +epoch: 0, batch: 28316, sum loss: 4349.718750, avg loss: 2.772287, ppl: 15.995179 +epoch: 0, batch: 28317, sum loss: 3953.835449, avg loss: 2.802151, ppl: 16.480061 +epoch: 0, batch: 28318, sum loss: 5046.169922, avg loss: 3.142073, ppl: 23.151819 +epoch: 0, batch: 28319, sum loss: 4650.796387, avg loss: 3.037751, ppl: 20.858280 +epoch: 0, batch: 28320, sum loss: 5058.802246, avg loss: 3.045636, ppl: 21.023407 +epoch: 0, batch: 28321, sum loss: 5248.273438, avg loss: 3.069166, ppl: 21.523937 +epoch: 0, batch: 28322, sum loss: 4403.376465, avg loss: 2.963241, ppl: 19.360620 +epoch: 0, batch: 28323, sum loss: 5227.373535, avg loss: 3.062316, ppl: 21.377012 +epoch: 0, batch: 28324, sum loss: 5207.479980, avg loss: 3.063224, ppl: 21.396420 +epoch: 0, batch: 28325, sum loss: 5394.208984, avg loss: 3.174932, ppl: 23.925188 +epoch: 0, batch: 28326, sum loss: 5415.865723, avg loss: 3.246922, ppl: 25.711081 +epoch: 0, batch: 28327, sum loss: 4268.257812, avg loss: 3.020706, ppl: 20.505768 +epoch: 0, batch: 28328, sum loss: 5102.505859, avg loss: 2.881144, ppl: 17.834661 +epoch: 0, batch: 28329, sum loss: 4935.948730, avg loss: 3.037507, ppl: 20.853188 +epoch: 0, batch: 28330, sum loss: 5349.665039, avg loss: 2.940992, ppl: 18.934628 +epoch: 0, batch: 28331, sum loss: 5838.625977, avg loss: 3.132310, ppl: 22.926884 +epoch: 0, batch: 28332, sum loss: 4262.278809, avg loss: 2.957862, ppl: 19.256754 +epoch: 0, batch: 28333, sum loss: 4993.682617, avg loss: 3.146618, ppl: 23.257278 +epoch: 0, batch: 28334, sum loss: 4525.596680, avg loss: 2.844498, ppl: 17.192928 +epoch: 0, batch: 28335, sum loss: 5429.780762, avg loss: 3.038490, ppl: 20.873692 +epoch: 0, batch: 28336, sum loss: 5206.234863, avg loss: 3.182295, ppl: 24.102013 +epoch: 0, batch: 28337, sum loss: 5666.566406, avg loss: 3.094793, ppl: 22.082674 +epoch: 0, batch: 28338, sum loss: 5285.550293, avg loss: 2.972751, ppl: 19.545609 +epoch: 0, batch: 28339, sum loss: 6171.139648, avg loss: 3.287768, ppl: 26.783009 +epoch: 0, batch: 28340, sum loss: 4626.214844, avg loss: 3.073897, ppl: 21.626019 +epoch: 0, batch: 28341, sum loss: 3853.744141, avg loss: 2.770485, ppl: 15.966371 +epoch: 0, batch: 28342, sum loss: 5220.806641, avg loss: 2.981614, ppl: 19.719625 +epoch: 0, batch: 28343, sum loss: 4474.980469, avg loss: 2.883364, ppl: 17.874300 +epoch: 0, batch: 28344, sum loss: 4123.886230, avg loss: 2.840142, ppl: 17.118196 +epoch: 0, batch: 28345, sum loss: 5309.167969, avg loss: 2.951177, ppl: 19.128462 +epoch: 0, batch: 28346, sum loss: 5151.672852, avg loss: 3.037543, ppl: 20.853943 +epoch: 0, batch: 28347, sum loss: 5086.996582, avg loss: 3.233946, ppl: 25.379597 +epoch: 0, batch: 28348, sum loss: 4403.277832, avg loss: 2.898800, ppl: 18.152357 +epoch: 0, batch: 28349, sum loss: 4617.211914, avg loss: 2.801706, ppl: 16.472731 +epoch: 0, batch: 28350, sum loss: 4744.030273, avg loss: 3.044949, ppl: 21.008955 +epoch: 0, batch: 28351, sum loss: 4473.062988, avg loss: 2.678481, ppl: 14.562953 +epoch: 0, batch: 28352, sum loss: 4965.127441, avg loss: 3.093537, ppl: 22.054956 +epoch: 0, batch: 28353, sum loss: 5455.199219, avg loss: 3.363255, ppl: 28.883047 +epoch: 0, batch: 28354, sum loss: 5862.588379, avg loss: 3.215902, ppl: 24.925755 +epoch: 0, batch: 28355, sum loss: 4677.092773, avg loss: 3.000060, ppl: 20.086735 +epoch: 0, batch: 28356, sum loss: 5895.809570, avg loss: 3.250171, ppl: 25.794743 +epoch: 0, batch: 28357, sum loss: 4627.338379, avg loss: 3.105596, ppl: 22.322527 +epoch: 0, batch: 28358, sum loss: 5670.790039, avg loss: 3.047174, ppl: 21.055752 +epoch: 0, batch: 28359, sum loss: 5829.235352, avg loss: 3.229493, ppl: 25.266848 +epoch: 0, batch: 28360, sum loss: 5195.485352, avg loss: 3.217019, ppl: 24.953619 +epoch: 0, batch: 28361, sum loss: 5183.294434, avg loss: 2.965271, ppl: 19.399969 +epoch: 0, batch: 28362, sum loss: 4415.685059, avg loss: 2.924295, ppl: 18.621088 +epoch: 0, batch: 28363, sum loss: 4724.911133, avg loss: 2.934727, ppl: 18.816374 +epoch: 0, batch: 28364, sum loss: 5442.508789, avg loss: 3.104683, ppl: 22.302147 +epoch: 0, batch: 28365, sum loss: 4802.633301, avg loss: 2.955467, ppl: 19.210682 +epoch: 0, batch: 28366, sum loss: 4804.237793, avg loss: 2.987710, ppl: 19.840197 +epoch: 0, batch: 28367, sum loss: 5344.580566, avg loss: 2.909407, ppl: 18.345919 +epoch: 0, batch: 28368, sum loss: 5514.333984, avg loss: 3.122499, ppl: 22.703049 +epoch: 0, batch: 28369, sum loss: 5092.329102, avg loss: 2.935060, ppl: 18.822634 +epoch: 0, batch: 28370, sum loss: 4644.039062, avg loss: 2.800989, ppl: 16.460913 +epoch: 0, batch: 28371, sum loss: 4741.264648, avg loss: 2.950382, ppl: 19.113262 +epoch: 0, batch: 28372, sum loss: 5946.512207, avg loss: 3.359612, ppl: 28.778009 +epoch: 0, batch: 28373, sum loss: 5277.939941, avg loss: 3.251965, ppl: 25.841076 +epoch: 0, batch: 28374, sum loss: 4693.100098, avg loss: 3.141299, ppl: 23.133888 +epoch: 0, batch: 28375, sum loss: 5371.907715, avg loss: 2.974478, ppl: 19.579405 +epoch: 0, batch: 28376, sum loss: 5985.819336, avg loss: 3.456016, ppl: 31.690464 +epoch: 0, batch: 28377, sum loss: 6058.543945, avg loss: 3.310680, ppl: 27.403751 +epoch: 0, batch: 28378, sum loss: 5517.245117, avg loss: 2.977466, ppl: 19.637999 +epoch: 0, batch: 28379, sum loss: 5959.652832, avg loss: 3.363235, ppl: 28.882481 +epoch: 0, batch: 28380, sum loss: 4673.362793, avg loss: 2.972877, ppl: 19.548079 +epoch: 0, batch: 28381, sum loss: 4777.135254, avg loss: 3.025418, ppl: 20.602615 +epoch: 0, batch: 28382, sum loss: 6531.492188, avg loss: 3.247883, ppl: 25.735802 +epoch: 0, batch: 28383, sum loss: 3604.412598, avg loss: 2.652254, ppl: 14.185972 +epoch: 0, batch: 28384, sum loss: 5595.029297, avg loss: 3.200818, ppl: 24.552601 +epoch: 0, batch: 28385, sum loss: 4358.109375, avg loss: 3.028568, ppl: 20.667610 +epoch: 0, batch: 28386, sum loss: 5398.070801, avg loss: 3.114871, ppl: 22.530514 +epoch: 0, batch: 28387, sum loss: 4862.552734, avg loss: 3.215974, ppl: 24.927568 +epoch: 0, batch: 28388, sum loss: 4685.391113, avg loss: 3.060347, ppl: 21.334961 +epoch: 0, batch: 28389, sum loss: 5394.855469, avg loss: 3.061780, ppl: 21.365549 +epoch: 0, batch: 28390, sum loss: 4327.240723, avg loss: 2.996704, ppl: 20.019445 +epoch: 0, batch: 28391, sum loss: 6805.222656, avg loss: 3.177041, ppl: 23.975712 +epoch: 0, batch: 28392, sum loss: 4228.542969, avg loss: 3.003227, ppl: 20.150448 +epoch: 0, batch: 28393, sum loss: 4978.705078, avg loss: 3.032098, ppl: 20.740707 +epoch: 0, batch: 28394, sum loss: 6175.416992, avg loss: 3.178290, ppl: 24.005667 +epoch: 0, batch: 28395, sum loss: 4557.488281, avg loss: 3.014212, ppl: 20.373028 +epoch: 0, batch: 28396, sum loss: 4189.620117, avg loss: 2.819394, ppl: 16.766693 +epoch: 0, batch: 28397, sum loss: 4712.139648, avg loss: 2.945087, ppl: 19.012320 +epoch: 0, batch: 28398, sum loss: 4292.808105, avg loss: 2.948357, ppl: 19.074594 +epoch: 0, batch: 28399, sum loss: 4616.861328, avg loss: 3.145001, ppl: 23.219698 +epoch: 0, batch: 28400, sum loss: 5769.943848, avg loss: 3.103789, ppl: 22.282215 +epoch: 0, batch: 28401, sum loss: 5796.574707, avg loss: 3.318016, ppl: 27.605536 +epoch: 0, batch: 28402, sum loss: 5194.503906, avg loss: 3.117949, ppl: 22.599991 +epoch: 0, batch: 28403, sum loss: 5122.705078, avg loss: 3.275387, ppl: 26.453457 +epoch: 0, batch: 28404, sum loss: 5305.885254, avg loss: 3.184805, ppl: 24.162575 +epoch: 0, batch: 28405, sum loss: 5949.020020, avg loss: 3.383970, ppl: 29.487612 +epoch: 0, batch: 28406, sum loss: 5564.674805, avg loss: 3.314279, ppl: 27.502560 +epoch: 0, batch: 28407, sum loss: 4483.250977, avg loss: 2.826766, ppl: 16.890747 +epoch: 0, batch: 28408, sum loss: 6683.504395, avg loss: 3.385767, ppl: 29.540642 +epoch: 0, batch: 28409, sum loss: 5697.616211, avg loss: 3.322225, ppl: 27.721966 +epoch: 0, batch: 28410, sum loss: 4731.099121, avg loss: 2.979282, ppl: 19.673679 +epoch: 0, batch: 28411, sum loss: 4453.212402, avg loss: 2.966830, ppl: 19.430237 +epoch: 0, batch: 28412, sum loss: 5859.831543, avg loss: 3.170904, ppl: 23.829025 +epoch: 0, batch: 28413, sum loss: 5442.939941, avg loss: 3.003830, ppl: 20.162611 +epoch: 0, batch: 28414, sum loss: 5513.361328, avg loss: 3.022676, ppl: 20.546204 +epoch: 0, batch: 28415, sum loss: 6360.742676, avg loss: 3.307718, ppl: 27.322712 +epoch: 0, batch: 28416, sum loss: 6227.923828, avg loss: 3.218565, ppl: 24.992237 +epoch: 0, batch: 28417, sum loss: 5411.764648, avg loss: 3.140897, ppl: 23.124590 +epoch: 0, batch: 28418, sum loss: 4986.054688, avg loss: 2.898869, ppl: 18.153603 +epoch: 0, batch: 28419, sum loss: 4799.070312, avg loss: 2.926262, ppl: 18.657764 +epoch: 0, batch: 28420, sum loss: 4754.080566, avg loss: 2.941881, ppl: 18.951468 +epoch: 0, batch: 28421, sum loss: 4701.925781, avg loss: 3.029592, ppl: 20.688780 +epoch: 0, batch: 28422, sum loss: 5191.978027, avg loss: 3.002879, ppl: 20.143450 +epoch: 0, batch: 28423, sum loss: 5494.638672, avg loss: 2.991094, ppl: 19.907442 +epoch: 0, batch: 28424, sum loss: 5179.993164, avg loss: 3.205441, ppl: 24.666382 +epoch: 0, batch: 28425, sum loss: 4785.152832, avg loss: 3.026662, ppl: 20.628263 +epoch: 0, batch: 28426, sum loss: 4229.130371, avg loss: 2.904623, ppl: 18.258350 +epoch: 0, batch: 28427, sum loss: 5289.344727, avg loss: 2.993404, ppl: 19.953487 +epoch: 0, batch: 28428, sum loss: 5305.830566, avg loss: 3.160113, ppl: 23.573267 +epoch: 0, batch: 28429, sum loss: 6034.381348, avg loss: 3.213196, ppl: 24.858397 +epoch: 0, batch: 28430, sum loss: 5505.744629, avg loss: 3.133605, ppl: 22.956594 +epoch: 0, batch: 28431, sum loss: 4322.371094, avg loss: 2.668130, ppl: 14.412997 +epoch: 0, batch: 28432, sum loss: 5039.882812, avg loss: 2.852226, ppl: 17.326303 +epoch: 0, batch: 28433, sum loss: 4510.438965, avg loss: 2.831412, ppl: 16.969402 +epoch: 0, batch: 28434, sum loss: 5242.642090, avg loss: 3.018217, ppl: 20.454781 +epoch: 0, batch: 28435, sum loss: 5560.223633, avg loss: 3.125477, ppl: 22.770756 +epoch: 0, batch: 28436, sum loss: 5529.293457, avg loss: 2.993662, ppl: 19.958639 +epoch: 0, batch: 28437, sum loss: 5844.242188, avg loss: 3.120258, ppl: 22.652214 +epoch: 0, batch: 28438, sum loss: 6171.412109, avg loss: 3.337703, ppl: 28.154367 +epoch: 0, batch: 28439, sum loss: 5380.269531, avg loss: 3.217865, ppl: 24.974735 +epoch: 0, batch: 28440, sum loss: 4977.932617, avg loss: 2.875755, ppl: 17.738817 +epoch: 0, batch: 28441, sum loss: 4739.531250, avg loss: 2.791244, ppl: 16.301279 +epoch: 0, batch: 28442, sum loss: 4538.499023, avg loss: 2.928064, ppl: 18.691410 +epoch: 0, batch: 28443, sum loss: 4768.702148, avg loss: 2.965611, ppl: 19.406551 +epoch: 0, batch: 28444, sum loss: 5212.735840, avg loss: 3.032423, ppl: 20.747454 +epoch: 0, batch: 28445, sum loss: 4846.965820, avg loss: 2.868027, ppl: 17.602259 +epoch: 0, batch: 28446, sum loss: 4716.915039, avg loss: 2.935230, ppl: 18.825838 +epoch: 0, batch: 28447, sum loss: 4753.020508, avg loss: 2.841017, ppl: 17.133173 +epoch: 0, batch: 28448, sum loss: 4918.358398, avg loss: 3.083610, ppl: 21.837099 +epoch: 0, batch: 28449, sum loss: 4256.715820, avg loss: 2.951953, ppl: 19.143303 +epoch: 0, batch: 28450, sum loss: 5101.455078, avg loss: 2.815372, ppl: 16.699394 +epoch: 0, batch: 28451, sum loss: 5108.547852, avg loss: 3.062679, ppl: 21.384762 +epoch: 0, batch: 28452, sum loss: 6607.456055, avg loss: 3.335414, ppl: 28.090015 +epoch: 0, batch: 28453, sum loss: 5165.706543, avg loss: 2.948463, ppl: 19.076605 +epoch: 0, batch: 28454, sum loss: 4074.394775, avg loss: 2.906131, ppl: 18.285906 +epoch: 0, batch: 28455, sum loss: 4732.951660, avg loss: 3.014619, ppl: 20.381321 +epoch: 0, batch: 28456, sum loss: 5446.156738, avg loss: 2.947055, ppl: 19.049763 +epoch: 0, batch: 28457, sum loss: 5707.852539, avg loss: 3.013650, ppl: 20.361578 +epoch: 0, batch: 28458, sum loss: 4832.591797, avg loss: 3.144172, ppl: 23.200451 +epoch: 0, batch: 28459, sum loss: 5380.614258, avg loss: 3.148399, ppl: 23.298742 +epoch: 0, batch: 28460, sum loss: 5480.020508, avg loss: 3.014313, ppl: 20.375084 +epoch: 0, batch: 28461, sum loss: 5432.651367, avg loss: 3.216490, ppl: 24.940426 +epoch: 0, batch: 28462, sum loss: 5246.475586, avg loss: 3.122902, ppl: 22.712198 +epoch: 0, batch: 28463, sum loss: 5183.334473, avg loss: 3.103793, ppl: 22.282310 +epoch: 0, batch: 28464, sum loss: 5088.159668, avg loss: 2.996561, ppl: 20.016573 +epoch: 0, batch: 28465, sum loss: 5611.959961, avg loss: 3.203174, ppl: 24.610512 +epoch: 0, batch: 28466, sum loss: 5381.130859, avg loss: 3.115884, ppl: 22.553349 +epoch: 0, batch: 28467, sum loss: 4786.498047, avg loss: 3.199531, ppl: 24.521023 +epoch: 0, batch: 28468, sum loss: 4885.790039, avg loss: 3.164372, ppl: 23.673872 +epoch: 0, batch: 28469, sum loss: 5876.378418, avg loss: 3.197159, ppl: 24.462933 +epoch: 0, batch: 28470, sum loss: 4445.390625, avg loss: 2.882873, ppl: 17.865532 +epoch: 0, batch: 28471, sum loss: 5671.904785, avg loss: 3.382173, ppl: 29.434671 +epoch: 0, batch: 28472, sum loss: 4894.339844, avg loss: 2.995312, ppl: 19.991596 +epoch: 0, batch: 28473, sum loss: 4548.601074, avg loss: 2.945985, ppl: 19.029404 +epoch: 0, batch: 28474, sum loss: 5411.414551, avg loss: 3.045253, ppl: 21.015348 +epoch: 0, batch: 28475, sum loss: 5095.409668, avg loss: 2.900062, ppl: 18.175278 +epoch: 0, batch: 28476, sum loss: 4828.605957, avg loss: 3.042600, ppl: 20.959665 +epoch: 0, batch: 28477, sum loss: 5239.685547, avg loss: 3.131910, ppl: 22.917713 +epoch: 0, batch: 28478, sum loss: 4302.382324, avg loss: 2.940795, ppl: 18.930885 +epoch: 0, batch: 28479, sum loss: 4244.145996, avg loss: 2.594221, ppl: 13.386160 +epoch: 0, batch: 28480, sum loss: 4744.584473, avg loss: 2.948778, ppl: 19.082628 +epoch: 0, batch: 28481, sum loss: 4969.966797, avg loss: 3.030468, ppl: 20.706911 +epoch: 0, batch: 28482, sum loss: 4114.267578, avg loss: 2.766824, ppl: 15.908030 +epoch: 0, batch: 28483, sum loss: 4889.379395, avg loss: 3.144296, ppl: 23.203327 +epoch: 0, batch: 28484, sum loss: 4545.074219, avg loss: 2.830059, ppl: 16.946453 +epoch: 0, batch: 28485, sum loss: 5432.680176, avg loss: 3.088505, ppl: 21.944248 +epoch: 0, batch: 28486, sum loss: 4612.870117, avg loss: 2.999265, ppl: 20.070787 +epoch: 0, batch: 28487, sum loss: 4761.188477, avg loss: 2.887319, ppl: 17.945127 +epoch: 0, batch: 28488, sum loss: 5309.535645, avg loss: 3.291715, ppl: 26.888929 +epoch: 0, batch: 28489, sum loss: 4403.392578, avg loss: 2.848249, ppl: 17.257532 +epoch: 0, batch: 28490, sum loss: 5589.782227, avg loss: 2.959122, ppl: 19.281044 +epoch: 0, batch: 28491, sum loss: 5266.887207, avg loss: 3.018273, ppl: 20.455940 +epoch: 0, batch: 28492, sum loss: 5370.130859, avg loss: 2.923316, ppl: 18.602867 +epoch: 0, batch: 28493, sum loss: 5469.671875, avg loss: 3.246096, ppl: 25.689854 +epoch: 0, batch: 28494, sum loss: 4924.472168, avg loss: 3.091320, ppl: 22.006098 +epoch: 0, batch: 28495, sum loss: 4199.770020, avg loss: 2.736006, ppl: 15.425261 +epoch: 0, batch: 28496, sum loss: 5078.305664, avg loss: 3.102203, ppl: 22.246899 +epoch: 0, batch: 28497, sum loss: 5019.904297, avg loss: 3.074038, ppl: 21.629065 +epoch: 0, batch: 28498, sum loss: 4698.633789, avg loss: 2.832208, ppl: 16.982924 +epoch: 0, batch: 28499, sum loss: 5233.894043, avg loss: 3.062548, ppl: 21.381962 +epoch: 0, batch: 28500, sum loss: 3850.315186, avg loss: 2.685018, ppl: 14.658459 +epoch: 0, batch: 28501, sum loss: 5000.187988, avg loss: 3.069483, ppl: 21.530769 +epoch: 0, batch: 28502, sum loss: 5091.807617, avg loss: 3.000476, ppl: 20.095098 +epoch: 0, batch: 28503, sum loss: 5397.893066, avg loss: 3.129214, ppl: 22.855997 +epoch: 0, batch: 28504, sum loss: 5896.743164, avg loss: 3.346619, ppl: 28.406540 +epoch: 0, batch: 28505, sum loss: 4584.881836, avg loss: 2.842456, ppl: 17.157860 +epoch: 0, batch: 28506, sum loss: 5772.217773, avg loss: 3.128573, ppl: 22.841366 +epoch: 0, batch: 28507, sum loss: 4654.018555, avg loss: 3.100612, ppl: 22.211538 +epoch: 0, batch: 28508, sum loss: 5113.480469, avg loss: 3.131341, ppl: 22.904678 +epoch: 0, batch: 28509, sum loss: 5696.656250, avg loss: 3.266431, ppl: 26.217604 +epoch: 0, batch: 28510, sum loss: 5350.210938, avg loss: 3.121477, ppl: 22.679846 +epoch: 0, batch: 28511, sum loss: 5460.829590, avg loss: 3.151084, ppl: 23.361385 +epoch: 0, batch: 28512, sum loss: 5139.486816, avg loss: 2.970802, ppl: 19.507551 +epoch: 0, batch: 28513, sum loss: 5246.520508, avg loss: 3.139749, ppl: 23.098070 +epoch: 0, batch: 28514, sum loss: 4609.757324, avg loss: 2.912039, ppl: 18.394258 +epoch: 0, batch: 28515, sum loss: 5020.519043, avg loss: 2.875441, ppl: 17.733236 +epoch: 0, batch: 28516, sum loss: 4847.227051, avg loss: 2.920016, ppl: 18.541590 +epoch: 0, batch: 28517, sum loss: 4580.442383, avg loss: 2.732961, ppl: 15.378354 +epoch: 0, batch: 28518, sum loss: 6180.723633, avg loss: 3.355442, ppl: 28.658262 +epoch: 0, batch: 28519, sum loss: 5735.703613, avg loss: 3.336652, ppl: 28.124794 +epoch: 0, batch: 28520, sum loss: 5932.363281, avg loss: 3.419230, ppl: 30.545870 +epoch: 0, batch: 28521, sum loss: 3824.021484, avg loss: 2.683524, ppl: 14.636577 +epoch: 0, batch: 28522, sum loss: 4818.712402, avg loss: 2.803207, ppl: 16.497469 +epoch: 0, batch: 28523, sum loss: 5422.353027, avg loss: 3.260585, ppl: 26.064783 +epoch: 0, batch: 28524, sum loss: 5136.722656, avg loss: 3.214470, ppl: 24.890095 +epoch: 0, batch: 28525, sum loss: 5095.912598, avg loss: 2.775552, ppl: 16.047476 +epoch: 0, batch: 28526, sum loss: 4891.369629, avg loss: 3.089936, ppl: 21.975683 +epoch: 0, batch: 28527, sum loss: 4676.914062, avg loss: 2.831062, ppl: 16.963463 +epoch: 0, batch: 28528, sum loss: 5724.341797, avg loss: 3.383181, ppl: 29.464336 +epoch: 0, batch: 28529, sum loss: 4482.992188, avg loss: 2.663691, ppl: 14.349154 +epoch: 0, batch: 28530, sum loss: 4074.425781, avg loss: 2.762323, ppl: 15.836583 +epoch: 0, batch: 28531, sum loss: 5430.219727, avg loss: 3.126206, ppl: 22.787363 +epoch: 0, batch: 28532, sum loss: 6128.324219, avg loss: 3.309030, ppl: 27.358583 +epoch: 0, batch: 28533, sum loss: 4781.533691, avg loss: 3.035894, ppl: 20.819590 +epoch: 0, batch: 28534, sum loss: 4572.535645, avg loss: 2.940537, ppl: 18.926016 +epoch: 0, batch: 28535, sum loss: 6132.517578, avg loss: 3.073944, ppl: 21.627024 +epoch: 0, batch: 28536, sum loss: 4275.527832, avg loss: 2.803625, ppl: 16.504364 +epoch: 0, batch: 28537, sum loss: 4168.415527, avg loss: 3.027172, ppl: 20.638781 +epoch: 0, batch: 28538, sum loss: 5542.741211, avg loss: 2.952979, ppl: 19.162952 +epoch: 0, batch: 28539, sum loss: 4622.876953, avg loss: 2.896539, ppl: 18.111362 +epoch: 0, batch: 28540, sum loss: 4322.754395, avg loss: 2.774554, ppl: 16.031467 +epoch: 0, batch: 28541, sum loss: 4544.798340, avg loss: 2.849403, ppl: 17.277472 +epoch: 0, batch: 28542, sum loss: 5213.358398, avg loss: 3.186650, ppl: 24.207188 +epoch: 0, batch: 28543, sum loss: 4962.915039, avg loss: 2.998740, ppl: 20.060253 +epoch: 0, batch: 28544, sum loss: 5679.086426, avg loss: 3.201289, ppl: 24.564165 +epoch: 0, batch: 28545, sum loss: 6160.119141, avg loss: 3.160656, ppl: 23.586075 +epoch: 0, batch: 28546, sum loss: 4682.987305, avg loss: 2.910496, ppl: 18.365896 +epoch: 0, batch: 28547, sum loss: 5462.589355, avg loss: 3.181473, ppl: 24.082207 +epoch: 0, batch: 28548, sum loss: 4546.519043, avg loss: 3.012935, ppl: 20.347034 +epoch: 0, batch: 28549, sum loss: 4877.723633, avg loss: 2.947265, ppl: 19.053774 +epoch: 0, batch: 28550, sum loss: 5296.891602, avg loss: 3.200539, ppl: 24.545753 +epoch: 0, batch: 28551, sum loss: 5125.375000, avg loss: 3.050819, ppl: 21.132639 +epoch: 0, batch: 28552, sum loss: 5142.645508, avg loss: 2.931953, ppl: 18.764240 +epoch: 0, batch: 28553, sum loss: 4818.009277, avg loss: 3.045518, ppl: 21.020910 +epoch: 0, batch: 28554, sum loss: 5525.228516, avg loss: 3.104061, ppl: 22.288279 +epoch: 0, batch: 28555, sum loss: 6173.653809, avg loss: 3.164354, ppl: 23.673437 +epoch: 0, batch: 28556, sum loss: 5146.077637, avg loss: 2.858932, ppl: 17.442888 +epoch: 0, batch: 28557, sum loss: 4523.155273, avg loss: 2.933304, ppl: 18.789616 +epoch: 0, batch: 28558, sum loss: 4948.608887, avg loss: 3.026672, ppl: 20.628469 +epoch: 0, batch: 28559, sum loss: 4241.061035, avg loss: 2.727371, ppl: 15.292623 +epoch: 0, batch: 28560, sum loss: 4741.624023, avg loss: 2.925123, ppl: 18.636513 +epoch: 0, batch: 28561, sum loss: 4537.976562, avg loss: 2.773824, ppl: 16.019783 +epoch: 0, batch: 28562, sum loss: 5184.483887, avg loss: 2.986454, ppl: 19.815289 +epoch: 0, batch: 28563, sum loss: 5207.821289, avg loss: 3.010301, ppl: 20.293514 +epoch: 0, batch: 28564, sum loss: 3824.937988, avg loss: 2.789889, ppl: 16.279215 +epoch: 0, batch: 28565, sum loss: 5067.797363, avg loss: 3.016546, ppl: 20.420643 +epoch: 0, batch: 28566, sum loss: 5481.277832, avg loss: 3.278276, ppl: 26.530006 +epoch: 0, batch: 28567, sum loss: 3778.408936, avg loss: 2.782334, ppl: 16.156681 +epoch: 0, batch: 28568, sum loss: 4524.464355, avg loss: 2.943698, ppl: 18.985935 +epoch: 0, batch: 28569, sum loss: 4885.834961, avg loss: 2.918659, ppl: 18.516439 +epoch: 0, batch: 28570, sum loss: 5983.067383, avg loss: 3.134137, ppl: 22.968809 +epoch: 0, batch: 28571, sum loss: 4922.492676, avg loss: 3.257771, ppl: 25.991550 +epoch: 0, batch: 28572, sum loss: 6129.256836, avg loss: 3.161040, ppl: 23.595129 +epoch: 0, batch: 28573, sum loss: 5900.767578, avg loss: 2.983199, ppl: 19.750893 +epoch: 0, batch: 28574, sum loss: 5187.440430, avg loss: 3.067676, ppl: 21.491903 +epoch: 0, batch: 28575, sum loss: 4500.141113, avg loss: 2.982201, ppl: 19.731199 +epoch: 0, batch: 28576, sum loss: 5846.880859, avg loss: 3.232107, ppl: 25.332975 +epoch: 0, batch: 28577, sum loss: 5485.738770, avg loss: 3.074966, ppl: 21.649139 +epoch: 0, batch: 28578, sum loss: 4474.938965, avg loss: 2.883337, ppl: 17.873819 +epoch: 0, batch: 28579, sum loss: 5200.345215, avg loss: 3.068050, ppl: 21.499941 +epoch: 0, batch: 28580, sum loss: 5258.952637, avg loss: 3.126607, ppl: 22.796494 +epoch: 0, batch: 28581, sum loss: 5072.456055, avg loss: 3.044691, ppl: 21.003548 +epoch: 0, batch: 28582, sum loss: 4301.135742, avg loss: 2.852212, ppl: 17.326063 +epoch: 0, batch: 28583, sum loss: 6249.897461, avg loss: 3.312081, ppl: 27.442183 +epoch: 0, batch: 28584, sum loss: 5269.424316, avg loss: 3.106972, ppl: 22.353256 +epoch: 0, batch: 28585, sum loss: 4914.764648, avg loss: 2.975039, ppl: 19.590391 +epoch: 0, batch: 28586, sum loss: 4943.285645, avg loss: 2.899288, ppl: 18.161209 +epoch: 0, batch: 28587, sum loss: 4481.805664, avg loss: 2.802880, ppl: 16.492081 +epoch: 0, batch: 28588, sum loss: 5074.895020, avg loss: 2.777720, ppl: 16.082319 +epoch: 0, batch: 28589, sum loss: 4545.520508, avg loss: 2.953555, ppl: 19.173988 +epoch: 0, batch: 28590, sum loss: 4407.631836, avg loss: 2.946278, ppl: 19.034975 +epoch: 0, batch: 28591, sum loss: 5727.546387, avg loss: 3.359265, ppl: 28.768034 +epoch: 0, batch: 28592, sum loss: 4812.158203, avg loss: 3.011363, ppl: 20.315071 +epoch: 0, batch: 28593, sum loss: 4348.585938, avg loss: 2.791134, ppl: 16.299488 +epoch: 0, batch: 28594, sum loss: 4482.197266, avg loss: 3.156477, ppl: 23.487707 +epoch: 0, batch: 28595, sum loss: 4749.550293, avg loss: 2.935445, ppl: 18.829882 +epoch: 0, batch: 28596, sum loss: 5629.806641, avg loss: 3.231806, ppl: 25.325361 +epoch: 0, batch: 28597, sum loss: 5425.176758, avg loss: 3.193159, ppl: 24.365273 +epoch: 0, batch: 28598, sum loss: 5779.942383, avg loss: 3.142981, ppl: 23.172850 +epoch: 0, batch: 28599, sum loss: 6347.208984, avg loss: 3.235071, ppl: 25.408169 +epoch: 0, batch: 28600, sum loss: 4510.755859, avg loss: 2.714053, ppl: 15.090311 +epoch: 0, batch: 28601, sum loss: 4231.663574, avg loss: 2.676574, ppl: 14.535211 +epoch: 0, batch: 28602, sum loss: 5083.179199, avg loss: 3.062156, ppl: 21.373592 +epoch: 0, batch: 28603, sum loss: 4860.729492, avg loss: 3.101933, ppl: 22.240911 +epoch: 0, batch: 28604, sum loss: 4830.037598, avg loss: 3.100152, ppl: 22.201336 +epoch: 0, batch: 28605, sum loss: 4295.609863, avg loss: 2.879095, ppl: 17.798161 +epoch: 0, batch: 28606, sum loss: 5297.157227, avg loss: 3.069036, ppl: 21.521156 +epoch: 0, batch: 28607, sum loss: 6433.632324, avg loss: 3.280792, ppl: 26.596823 +epoch: 0, batch: 28608, sum loss: 4633.238281, avg loss: 2.814847, ppl: 16.690626 +epoch: 0, batch: 28609, sum loss: 4826.807129, avg loss: 2.986886, ppl: 19.823845 +epoch: 0, batch: 28610, sum loss: 5694.482422, avg loss: 3.091467, ppl: 22.009346 +epoch: 0, batch: 28611, sum loss: 5400.244141, avg loss: 3.040678, ppl: 20.919422 +epoch: 0, batch: 28612, sum loss: 6123.180176, avg loss: 3.055479, ppl: 21.231354 +epoch: 0, batch: 28613, sum loss: 4841.714355, avg loss: 2.932595, ppl: 18.776297 +epoch: 0, batch: 28614, sum loss: 5195.576660, avg loss: 2.888036, ppl: 17.958006 +epoch: 0, batch: 28615, sum loss: 5029.172852, avg loss: 3.009679, ppl: 20.280880 +epoch: 0, batch: 28616, sum loss: 4928.709473, avg loss: 3.111559, ppl: 22.456024 +epoch: 0, batch: 28617, sum loss: 6107.656250, avg loss: 3.391258, ppl: 29.703302 +epoch: 0, batch: 28618, sum loss: 4526.319824, avg loss: 3.021575, ppl: 20.523596 +epoch: 0, batch: 28619, sum loss: 4192.878418, avg loss: 2.776741, ppl: 16.066570 +epoch: 0, batch: 28620, sum loss: 5062.655273, avg loss: 2.948547, ppl: 19.078215 +epoch: 0, batch: 28621, sum loss: 5377.584961, avg loss: 3.074663, ppl: 21.642580 +epoch: 0, batch: 28622, sum loss: 5615.260742, avg loss: 3.240196, ppl: 25.538738 +epoch: 0, batch: 28623, sum loss: 5182.491211, avg loss: 2.936256, ppl: 18.845152 +epoch: 0, batch: 28624, sum loss: 4934.830078, avg loss: 3.086198, ppl: 21.893671 +epoch: 0, batch: 28625, sum loss: 5550.762207, avg loss: 3.251765, ppl: 25.835888 +epoch: 0, batch: 28626, sum loss: 4052.306641, avg loss: 2.810199, ppl: 16.613220 +epoch: 0, batch: 28627, sum loss: 4624.642578, avg loss: 2.910411, ppl: 18.364338 +epoch: 0, batch: 28628, sum loss: 4676.778320, avg loss: 3.113700, ppl: 22.504166 +epoch: 0, batch: 28629, sum loss: 5474.473145, avg loss: 2.818987, ppl: 16.759871 +epoch: 0, batch: 28630, sum loss: 4023.986572, avg loss: 3.009713, ppl: 20.281586 +epoch: 0, batch: 28631, sum loss: 4443.878418, avg loss: 2.812581, ppl: 16.652849 +epoch: 0, batch: 28632, sum loss: 4496.744141, avg loss: 3.101203, ppl: 22.224670 +epoch: 0, batch: 28633, sum loss: 3644.267334, avg loss: 2.733884, ppl: 15.392557 +epoch: 0, batch: 28634, sum loss: 4825.371582, avg loss: 3.097158, ppl: 22.134949 +epoch: 0, batch: 28635, sum loss: 6241.931152, avg loss: 3.125654, ppl: 22.774790 +epoch: 0, batch: 28636, sum loss: 5407.259277, avg loss: 3.063603, ppl: 21.404537 +epoch: 0, batch: 28637, sum loss: 4848.865723, avg loss: 2.887949, ppl: 17.956434 +epoch: 0, batch: 28638, sum loss: 4475.964355, avg loss: 2.802733, ppl: 16.489651 +epoch: 0, batch: 28639, sum loss: 4812.757812, avg loss: 2.806273, ppl: 16.548124 +epoch: 0, batch: 28640, sum loss: 4741.698242, avg loss: 3.129834, ppl: 22.870182 +epoch: 0, batch: 28641, sum loss: 4785.058594, avg loss: 2.986928, ppl: 19.824688 +epoch: 0, batch: 28642, sum loss: 4943.429688, avg loss: 3.012450, ppl: 20.337154 +epoch: 0, batch: 28643, sum loss: 4488.777832, avg loss: 2.788061, ppl: 16.249475 +epoch: 0, batch: 28644, sum loss: 4947.781738, avg loss: 2.934628, ppl: 18.814493 +epoch: 0, batch: 28645, sum loss: 4940.260742, avg loss: 3.051427, ppl: 21.145506 +epoch: 0, batch: 28646, sum loss: 5132.010254, avg loss: 3.248108, ppl: 25.741583 +epoch: 0, batch: 28647, sum loss: 5148.272949, avg loss: 3.053543, ppl: 21.190285 +epoch: 0, batch: 28648, sum loss: 6015.080078, avg loss: 3.119855, ppl: 22.643095 +epoch: 0, batch: 28649, sum loss: 5596.698242, avg loss: 2.956523, ppl: 19.230988 +epoch: 0, batch: 28650, sum loss: 4696.517578, avg loss: 3.256947, ppl: 25.970131 +epoch: 0, batch: 28651, sum loss: 5300.757812, avg loss: 2.909307, ppl: 18.344086 +epoch: 0, batch: 28652, sum loss: 5016.065918, avg loss: 2.877835, ppl: 17.775743 +epoch: 0, batch: 28653, sum loss: 5964.563965, avg loss: 3.090448, ppl: 21.986919 +epoch: 0, batch: 28654, sum loss: 4173.950684, avg loss: 2.799431, ppl: 16.435286 +epoch: 0, batch: 28655, sum loss: 5677.431641, avg loss: 2.868839, ppl: 17.616545 +epoch: 0, batch: 28656, sum loss: 4744.947266, avg loss: 3.071163, ppl: 21.566980 +epoch: 0, batch: 28657, sum loss: 4769.278320, avg loss: 2.992019, ppl: 19.925871 +epoch: 0, batch: 28658, sum loss: 5408.852539, avg loss: 3.217640, ppl: 24.969116 +epoch: 0, batch: 28659, sum loss: 4946.457031, avg loss: 3.030917, ppl: 20.716228 +epoch: 0, batch: 28660, sum loss: 4952.040039, avg loss: 2.890858, ppl: 18.008759 +epoch: 0, batch: 28661, sum loss: 6539.960938, avg loss: 3.274893, ppl: 26.440392 +epoch: 0, batch: 28662, sum loss: 4179.736328, avg loss: 2.831800, ppl: 16.975986 +epoch: 0, batch: 28663, sum loss: 4869.404297, avg loss: 3.005805, ppl: 20.202478 +epoch: 0, batch: 28664, sum loss: 5859.037109, avg loss: 3.101661, ppl: 22.234846 +epoch: 0, batch: 28665, sum loss: 4780.717285, avg loss: 3.080359, ppl: 21.766220 +epoch: 0, batch: 28666, sum loss: 5238.113281, avg loss: 3.024315, ppl: 20.579901 +epoch: 0, batch: 28667, sum loss: 5774.547363, avg loss: 3.086343, ppl: 21.896856 +epoch: 0, batch: 28668, sum loss: 4687.091309, avg loss: 2.849296, ppl: 17.275608 +epoch: 0, batch: 28669, sum loss: 4103.211426, avg loss: 2.910079, ppl: 18.358253 +epoch: 0, batch: 28670, sum loss: 4841.594727, avg loss: 2.906119, ppl: 18.285696 +epoch: 0, batch: 28671, sum loss: 4991.984375, avg loss: 3.005409, ppl: 20.194473 +epoch: 0, batch: 28672, sum loss: 5299.333984, avg loss: 3.057896, ppl: 21.282728 +epoch: 0, batch: 28673, sum loss: 5681.378906, avg loss: 3.229892, ppl: 25.276915 +epoch: 0, batch: 28674, sum loss: 4666.555664, avg loss: 2.861162, ppl: 17.481833 +epoch: 0, batch: 28675, sum loss: 5926.860352, avg loss: 3.147563, ppl: 23.279253 +epoch: 0, batch: 28676, sum loss: 5227.238281, avg loss: 3.025022, ppl: 20.594463 +epoch: 0, batch: 28677, sum loss: 4960.945312, avg loss: 3.012110, ppl: 20.330252 +epoch: 0, batch: 28678, sum loss: 5117.716797, avg loss: 3.066337, ppl: 21.463146 +epoch: 0, batch: 28679, sum loss: 5307.137695, avg loss: 3.114517, ppl: 22.522560 +epoch: 0, batch: 28680, sum loss: 5326.750000, avg loss: 2.803553, ppl: 16.503176 +epoch: 0, batch: 28681, sum loss: 4529.438477, avg loss: 2.892362, ppl: 18.035854 +epoch: 0, batch: 28682, sum loss: 5803.199219, avg loss: 3.130096, ppl: 22.876186 +epoch: 0, batch: 28683, sum loss: 5001.212891, avg loss: 2.825544, ppl: 16.870121 +epoch: 0, batch: 28684, sum loss: 4970.777344, avg loss: 3.051429, ppl: 21.145531 +epoch: 0, batch: 28685, sum loss: 5271.039551, avg loss: 3.010303, ppl: 20.293539 +epoch: 0, batch: 28686, sum loss: 4780.265137, avg loss: 3.029319, ppl: 20.683138 +epoch: 0, batch: 28687, sum loss: 4888.013672, avg loss: 3.245693, ppl: 25.679504 +epoch: 0, batch: 28688, sum loss: 4353.645020, avg loss: 2.670948, ppl: 14.453662 +epoch: 0, batch: 28689, sum loss: 4384.028320, avg loss: 2.924635, ppl: 18.627434 +epoch: 0, batch: 28690, sum loss: 6375.911133, avg loss: 3.298454, ppl: 27.070749 +epoch: 0, batch: 28691, sum loss: 5307.291016, avg loss: 2.969945, ppl: 19.490843 +epoch: 0, batch: 28692, sum loss: 4988.011230, avg loss: 2.896638, ppl: 18.113153 +epoch: 0, batch: 28693, sum loss: 5468.806641, avg loss: 3.211278, ppl: 24.810780 +epoch: 0, batch: 28694, sum loss: 4978.068848, avg loss: 2.926554, ppl: 18.663210 +epoch: 0, batch: 28695, sum loss: 5349.407715, avg loss: 3.124654, ppl: 22.752024 +epoch: 0, batch: 28696, sum loss: 5811.006836, avg loss: 3.142783, ppl: 23.168264 +epoch: 0, batch: 28697, sum loss: 5488.073730, avg loss: 3.043857, ppl: 20.986027 +epoch: 0, batch: 28698, sum loss: 5898.505371, avg loss: 3.249865, ppl: 25.786865 +epoch: 0, batch: 28699, sum loss: 5024.670410, avg loss: 2.936687, ppl: 18.853273 +epoch: 0, batch: 28700, sum loss: 4979.715332, avg loss: 2.978299, ppl: 19.654354 +epoch: 0, batch: 28701, sum loss: 4819.369141, avg loss: 3.061861, ppl: 21.367281 +epoch: 0, batch: 28702, sum loss: 6644.525391, avg loss: 3.233346, ppl: 25.364372 +epoch: 0, batch: 28703, sum loss: 4536.378906, avg loss: 2.951450, ppl: 19.133684 +epoch: 0, batch: 28704, sum loss: 5446.966797, avg loss: 3.082607, ppl: 21.815207 +epoch: 0, batch: 28705, sum loss: 6375.677734, avg loss: 3.355620, ppl: 28.663374 +epoch: 0, batch: 28706, sum loss: 5373.479980, avg loss: 2.962227, ppl: 19.340998 +epoch: 0, batch: 28707, sum loss: 4797.475586, avg loss: 2.932442, ppl: 18.773428 +epoch: 0, batch: 28708, sum loss: 5563.252930, avg loss: 3.073621, ppl: 21.620037 +epoch: 0, batch: 28709, sum loss: 4515.709961, avg loss: 2.699169, ppl: 14.867374 +epoch: 0, batch: 28710, sum loss: 4806.214355, avg loss: 3.045763, ppl: 21.026073 +epoch: 0, batch: 28711, sum loss: 4413.142578, avg loss: 2.805558, ppl: 16.536297 +epoch: 0, batch: 28712, sum loss: 5544.549805, avg loss: 3.056533, ppl: 21.253735 +epoch: 0, batch: 28713, sum loss: 5105.178711, avg loss: 2.945862, ppl: 19.027054 +epoch: 0, batch: 28714, sum loss: 4875.288086, avg loss: 3.000177, ppl: 20.089096 +epoch: 0, batch: 28715, sum loss: 4617.699707, avg loss: 2.961963, ppl: 19.335886 +epoch: 0, batch: 28716, sum loss: 5704.759766, avg loss: 3.256142, ppl: 25.949223 +epoch: 0, batch: 28717, sum loss: 5258.681641, avg loss: 3.301118, ppl: 27.142979 +epoch: 0, batch: 28718, sum loss: 3862.640625, avg loss: 2.897705, ppl: 18.132481 +epoch: 0, batch: 28719, sum loss: 5505.134277, avg loss: 3.089301, ppl: 21.961723 +epoch: 0, batch: 28720, sum loss: 5907.774414, avg loss: 3.318974, ppl: 27.632000 +epoch: 0, batch: 28721, sum loss: 4526.384766, avg loss: 2.966176, ppl: 19.417530 +epoch: 0, batch: 28722, sum loss: 5178.340332, avg loss: 2.974348, ppl: 19.576860 +epoch: 0, batch: 28723, sum loss: 4440.425781, avg loss: 2.940679, ppl: 18.928699 +epoch: 0, batch: 28724, sum loss: 4853.869141, avg loss: 3.115449, ppl: 22.543549 +epoch: 0, batch: 28725, sum loss: 3972.455566, avg loss: 2.666078, ppl: 14.383441 +epoch: 0, batch: 28726, sum loss: 4789.121582, avg loss: 3.025345, ppl: 20.601112 +epoch: 0, batch: 28727, sum loss: 5777.534668, avg loss: 3.084642, ppl: 21.859644 +epoch: 0, batch: 28728, sum loss: 4410.449219, avg loss: 2.890203, ppl: 17.996958 +epoch: 0, batch: 28729, sum loss: 5338.901367, avg loss: 2.987634, ppl: 19.838682 +epoch: 0, batch: 28730, sum loss: 5739.690430, avg loss: 3.215513, ppl: 24.916071 +epoch: 0, batch: 28731, sum loss: 4978.479004, avg loss: 2.938890, ppl: 18.894852 +epoch: 0, batch: 28732, sum loss: 5370.911133, avg loss: 3.007229, ppl: 20.231264 +epoch: 0, batch: 28733, sum loss: 4989.514648, avg loss: 2.773493, ppl: 16.014479 +epoch: 0, batch: 28734, sum loss: 5846.166016, avg loss: 3.080172, ppl: 21.762146 +epoch: 0, batch: 28735, sum loss: 4803.064453, avg loss: 2.837014, ppl: 17.064734 +epoch: 0, batch: 28736, sum loss: 5517.111816, avg loss: 3.039731, ppl: 20.899622 +epoch: 0, batch: 28737, sum loss: 6392.031738, avg loss: 3.212076, ppl: 24.830585 +epoch: 0, batch: 28738, sum loss: 4897.181641, avg loss: 3.232463, ppl: 25.342001 +epoch: 0, batch: 28739, sum loss: 5648.314941, avg loss: 2.960333, ppl: 19.304392 +epoch: 0, batch: 28740, sum loss: 4831.678223, avg loss: 2.933624, ppl: 18.795614 +epoch: 0, batch: 28741, sum loss: 5681.547363, avg loss: 3.233664, ppl: 25.372446 +epoch: 0, batch: 28742, sum loss: 4808.803223, avg loss: 2.864088, ppl: 17.533049 +epoch: 0, batch: 28743, sum loss: 4922.741211, avg loss: 3.057603, ppl: 21.276499 +epoch: 0, batch: 28744, sum loss: 4796.335938, avg loss: 3.016563, ppl: 20.420992 +epoch: 0, batch: 28745, sum loss: 5234.431641, avg loss: 2.969048, ppl: 19.473373 +epoch: 0, batch: 28746, sum loss: 5233.662109, avg loss: 3.025238, ppl: 20.598907 +epoch: 0, batch: 28747, sum loss: 5149.808594, avg loss: 3.022188, ppl: 20.536180 +epoch: 0, batch: 28748, sum loss: 5214.074219, avg loss: 3.045604, ppl: 21.022724 +epoch: 0, batch: 28749, sum loss: 6162.388672, avg loss: 3.263977, ppl: 26.153343 +epoch: 0, batch: 28750, sum loss: 5669.481934, avg loss: 3.172626, ppl: 23.870073 +epoch: 0, batch: 28751, sum loss: 5448.822266, avg loss: 3.235643, ppl: 25.422710 +epoch: 0, batch: 28752, sum loss: 4923.714844, avg loss: 2.852674, ppl: 17.334063 +epoch: 0, batch: 28753, sum loss: 4770.136230, avg loss: 2.861509, ppl: 17.487902 +epoch: 0, batch: 28754, sum loss: 4070.491699, avg loss: 2.717284, ppl: 15.139148 +epoch: 0, batch: 28755, sum loss: 5844.088379, avg loss: 3.079077, ppl: 21.738335 +epoch: 0, batch: 28756, sum loss: 5566.034668, avg loss: 2.911106, ppl: 18.377110 +epoch: 0, batch: 28757, sum loss: 4999.411133, avg loss: 2.965250, ppl: 19.399548 +epoch: 0, batch: 28758, sum loss: 4848.855469, avg loss: 3.032430, ppl: 20.747587 +epoch: 0, batch: 28759, sum loss: 5258.303223, avg loss: 2.691046, ppl: 14.747087 +epoch: 0, batch: 28760, sum loss: 6927.300293, avg loss: 3.240084, ppl: 25.535877 +epoch: 0, batch: 28761, sum loss: 4512.536133, avg loss: 2.728256, ppl: 15.306177 +epoch: 0, batch: 28762, sum loss: 4444.451172, avg loss: 2.886007, ppl: 17.921612 +epoch: 0, batch: 28763, sum loss: 4938.586914, avg loss: 2.991270, ppl: 19.910959 +epoch: 0, batch: 28764, sum loss: 6426.899414, avg loss: 3.145815, ppl: 23.238600 +epoch: 0, batch: 28765, sum loss: 5997.802734, avg loss: 3.085290, ppl: 21.873802 +epoch: 0, batch: 28766, sum loss: 5041.669922, avg loss: 3.024397, ppl: 20.581589 +epoch: 0, batch: 28767, sum loss: 5077.780273, avg loss: 3.029702, ppl: 20.691065 +epoch: 0, batch: 28768, sum loss: 5835.843750, avg loss: 3.102522, ppl: 22.254004 +epoch: 0, batch: 28769, sum loss: 5666.677246, avg loss: 3.335302, ppl: 28.086855 +epoch: 0, batch: 28770, sum loss: 4642.617188, avg loss: 2.919885, ppl: 18.539154 +epoch: 0, batch: 28771, sum loss: 5305.860352, avg loss: 2.752002, ppl: 15.673987 +epoch: 0, batch: 28772, sum loss: 5839.988281, avg loss: 3.111342, ppl: 22.451147 +epoch: 0, batch: 28773, sum loss: 6011.920898, avg loss: 3.235695, ppl: 25.424026 +epoch: 0, batch: 28774, sum loss: 5575.439941, avg loss: 3.224662, ppl: 25.145063 +epoch: 0, batch: 28775, sum loss: 4963.427734, avg loss: 2.930005, ppl: 18.727716 +epoch: 0, batch: 28776, sum loss: 5775.062988, avg loss: 3.031529, ppl: 20.728907 +epoch: 0, batch: 28777, sum loss: 4691.229492, avg loss: 2.988044, ppl: 19.846828 +epoch: 0, batch: 28778, sum loss: 4404.354980, avg loss: 2.819690, ppl: 16.771643 +epoch: 0, batch: 28779, sum loss: 4368.195801, avg loss: 2.778750, ppl: 16.098892 +epoch: 0, batch: 28780, sum loss: 4215.559570, avg loss: 2.753468, ppl: 15.696979 +epoch: 0, batch: 28781, sum loss: 4308.511719, avg loss: 2.827107, ppl: 16.896515 +epoch: 0, batch: 28782, sum loss: 6132.921387, avg loss: 3.258725, ppl: 26.016356 +epoch: 0, batch: 28783, sum loss: 4793.308594, avg loss: 2.804745, ppl: 16.522858 +epoch: 0, batch: 28784, sum loss: 6453.523926, avg loss: 3.306109, ppl: 27.278769 +epoch: 0, batch: 28785, sum loss: 5872.222168, avg loss: 3.198378, ppl: 24.492773 +epoch: 0, batch: 28786, sum loss: 4759.384277, avg loss: 2.893243, ppl: 18.051750 +epoch: 0, batch: 28787, sum loss: 5571.684082, avg loss: 3.142518, ppl: 23.162117 +epoch: 0, batch: 28788, sum loss: 4598.787109, avg loss: 2.830023, ppl: 16.945847 +epoch: 0, batch: 28789, sum loss: 6091.803223, avg loss: 3.360068, ppl: 28.791151 +epoch: 0, batch: 28790, sum loss: 4742.809082, avg loss: 3.120269, ppl: 22.652479 +epoch: 0, batch: 28791, sum loss: 5229.264160, avg loss: 3.092409, ppl: 22.030092 +epoch: 0, batch: 28792, sum loss: 4649.874023, avg loss: 2.748152, ppl: 15.613759 +epoch: 0, batch: 28793, sum loss: 5222.835938, avg loss: 3.010280, ppl: 20.293083 +epoch: 0, batch: 28794, sum loss: 5422.347168, avg loss: 3.137932, ppl: 23.056145 +epoch: 0, batch: 28795, sum loss: 5655.067383, avg loss: 3.093582, ppl: 22.055933 +epoch: 0, batch: 28796, sum loss: 5692.752441, avg loss: 3.167920, ppl: 23.758024 +epoch: 0, batch: 28797, sum loss: 4730.356934, avg loss: 2.839350, ppl: 17.104639 +epoch: 0, batch: 28798, sum loss: 5728.531250, avg loss: 3.068308, ppl: 21.505487 +epoch: 0, batch: 28799, sum loss: 5396.953613, avg loss: 3.057764, ppl: 21.279922 +epoch: 0, batch: 28800, sum loss: 4621.879883, avg loss: 2.993446, ppl: 19.954319 +epoch: 0, batch: 28801, sum loss: 5619.100098, avg loss: 3.118258, ppl: 22.606958 +epoch: 0, batch: 28802, sum loss: 6129.904785, avg loss: 3.209374, ppl: 24.763584 +epoch: 0, batch: 28803, sum loss: 3896.708008, avg loss: 2.852641, ppl: 17.333506 +epoch: 0, batch: 28804, sum loss: 5969.684570, avg loss: 3.351872, ppl: 28.556154 +epoch: 0, batch: 28805, sum loss: 5213.084961, avg loss: 2.965350, ppl: 19.401485 +epoch: 0, batch: 28806, sum loss: 4539.427246, avg loss: 2.963073, ppl: 19.357361 +epoch: 0, batch: 28807, sum loss: 5236.496094, avg loss: 2.935256, ppl: 18.826317 +epoch: 0, batch: 28808, sum loss: 5053.417480, avg loss: 2.747916, ppl: 15.610066 +epoch: 0, batch: 28809, sum loss: 5781.733398, avg loss: 3.161145, ppl: 23.597593 +epoch: 0, batch: 28810, sum loss: 4357.860840, avg loss: 2.893666, ppl: 18.059395 +epoch: 0, batch: 28811, sum loss: 4186.010254, avg loss: 2.693700, ppl: 14.786288 +epoch: 0, batch: 28812, sum loss: 5278.967773, avg loss: 3.044387, ppl: 20.997164 +epoch: 0, batch: 28813, sum loss: 4121.708008, avg loss: 2.860311, ppl: 17.466959 +epoch: 0, batch: 28814, sum loss: 5232.678223, avg loss: 3.024669, ppl: 20.587198 +epoch: 0, batch: 28815, sum loss: 5045.144531, avg loss: 2.864932, ppl: 17.547853 +epoch: 0, batch: 28816, sum loss: 5559.779785, avg loss: 3.206332, ppl: 24.688362 +epoch: 0, batch: 28817, sum loss: 3964.457764, avg loss: 2.653586, ppl: 14.204888 +epoch: 0, batch: 28818, sum loss: 5548.156738, avg loss: 3.082309, ppl: 21.808712 +epoch: 0, batch: 28819, sum loss: 3858.638428, avg loss: 2.732747, ppl: 15.375062 +epoch: 0, batch: 28820, sum loss: 4441.669922, avg loss: 2.880461, ppl: 17.822479 +epoch: 0, batch: 28821, sum loss: 4900.716309, avg loss: 3.176096, ppl: 23.953062 +epoch: 0, batch: 28822, sum loss: 6142.604492, avg loss: 3.314951, ppl: 27.521049 +epoch: 0, batch: 28823, sum loss: 5907.039062, avg loss: 3.208604, ppl: 24.744509 +epoch: 0, batch: 28824, sum loss: 4231.604004, avg loss: 2.969547, ppl: 19.483084 +epoch: 0, batch: 28825, sum loss: 6549.641113, avg loss: 3.286323, ppl: 26.744333 +epoch: 0, batch: 28826, sum loss: 4985.509277, avg loss: 2.965800, ppl: 19.410217 +epoch: 0, batch: 28827, sum loss: 5744.304688, avg loss: 3.245370, ppl: 25.671204 +epoch: 0, batch: 28828, sum loss: 5564.659668, avg loss: 3.224020, ppl: 25.128948 +epoch: 0, batch: 28829, sum loss: 4201.119629, avg loss: 2.857904, ppl: 17.424973 +epoch: 0, batch: 28830, sum loss: 5982.426758, avg loss: 3.360914, ppl: 28.815516 +epoch: 0, batch: 28831, sum loss: 4087.461914, avg loss: 2.868394, ppl: 17.608719 +epoch: 0, batch: 28832, sum loss: 5229.600098, avg loss: 3.146570, ppl: 23.256170 +epoch: 0, batch: 28833, sum loss: 5450.103516, avg loss: 3.174201, ppl: 23.907717 +epoch: 0, batch: 28834, sum loss: 5241.457031, avg loss: 3.136719, ppl: 23.028187 +epoch: 0, batch: 28835, sum loss: 3815.763672, avg loss: 2.892922, ppl: 18.045958 +epoch: 0, batch: 28836, sum loss: 4652.753906, avg loss: 2.971107, ppl: 19.513514 +epoch: 0, batch: 28837, sum loss: 4398.331055, avg loss: 2.715019, ppl: 15.104900 +epoch: 0, batch: 28838, sum loss: 6020.149414, avg loss: 3.040479, ppl: 20.915262 +epoch: 0, batch: 28839, sum loss: 5250.014648, avg loss: 3.095528, ppl: 22.098896 +epoch: 0, batch: 28840, sum loss: 6653.307129, avg loss: 3.288832, ppl: 26.811522 +epoch: 0, batch: 28841, sum loss: 4972.172852, avg loss: 2.872428, ppl: 17.679892 +epoch: 0, batch: 28842, sum loss: 5338.215820, avg loss: 3.121764, ppl: 22.686356 +epoch: 0, batch: 28843, sum loss: 4810.556641, avg loss: 2.846483, ppl: 17.227091 +epoch: 0, batch: 28844, sum loss: 4586.451172, avg loss: 2.715483, ppl: 15.111910 +epoch: 0, batch: 28845, sum loss: 4545.197266, avg loss: 2.982413, ppl: 19.735376 +epoch: 0, batch: 28846, sum loss: 5370.449219, avg loss: 3.060085, ppl: 21.329370 +epoch: 0, batch: 28847, sum loss: 5340.869141, avg loss: 3.046702, ppl: 21.045830 +epoch: 0, batch: 28848, sum loss: 4458.293945, avg loss: 2.859714, ppl: 17.456533 +epoch: 0, batch: 28849, sum loss: 5349.562500, avg loss: 3.224571, ppl: 25.142773 +epoch: 0, batch: 28850, sum loss: 3768.495117, avg loss: 2.742718, ppl: 15.529143 +epoch: 0, batch: 28851, sum loss: 4825.044922, avg loss: 2.917198, ppl: 18.489403 +epoch: 0, batch: 28852, sum loss: 5402.516113, avg loss: 2.855453, ppl: 17.382301 +epoch: 0, batch: 28853, sum loss: 5321.650879, avg loss: 3.099389, ppl: 22.184395 +epoch: 0, batch: 28854, sum loss: 5901.588379, avg loss: 3.284134, ppl: 26.685862 +epoch: 0, batch: 28855, sum loss: 5131.075195, avg loss: 3.004142, ppl: 20.168909 +epoch: 0, batch: 28856, sum loss: 5888.345215, avg loss: 3.062062, ppl: 21.371576 +epoch: 0, batch: 28857, sum loss: 5470.199707, avg loss: 3.010567, ppl: 20.298901 +epoch: 0, batch: 28858, sum loss: 5044.294922, avg loss: 2.979501, ppl: 19.677996 +epoch: 0, batch: 28859, sum loss: 4617.279297, avg loss: 2.900301, ppl: 18.179617 +epoch: 0, batch: 28860, sum loss: 5161.878418, avg loss: 3.128411, ppl: 22.837667 +epoch: 0, batch: 28861, sum loss: 4615.714844, avg loss: 2.847449, ppl: 17.243734 +epoch: 0, batch: 28862, sum loss: 5612.122070, avg loss: 3.270468, ppl: 26.323643 +epoch: 0, batch: 28863, sum loss: 3791.765625, avg loss: 2.553377, ppl: 12.850431 +epoch: 0, batch: 28864, sum loss: 3891.702393, avg loss: 2.577286, ppl: 13.161376 +epoch: 0, batch: 28865, sum loss: 4144.466309, avg loss: 2.926883, ppl: 18.669348 +epoch: 0, batch: 28866, sum loss: 4278.635254, avg loss: 3.021635, ppl: 20.524824 +epoch: 0, batch: 28867, sum loss: 5223.043945, avg loss: 3.050843, ppl: 21.133158 +epoch: 0, batch: 28868, sum loss: 4496.466309, avg loss: 3.164297, ppl: 23.672100 +epoch: 0, batch: 28869, sum loss: 4901.726562, avg loss: 3.170586, ppl: 23.821447 +epoch: 0, batch: 28870, sum loss: 4884.826660, avg loss: 2.923295, ppl: 18.602491 +epoch: 0, batch: 28871, sum loss: 6307.765625, avg loss: 3.362349, ppl: 28.856884 +epoch: 0, batch: 28872, sum loss: 4936.484863, avg loss: 3.211766, ppl: 24.822897 +epoch: 0, batch: 28873, sum loss: 5286.841309, avg loss: 2.976825, ppl: 19.625412 +epoch: 0, batch: 28874, sum loss: 4693.172852, avg loss: 2.851259, ppl: 17.309568 +epoch: 0, batch: 28875, sum loss: 5230.005859, avg loss: 3.007479, ppl: 20.236319 +epoch: 0, batch: 28876, sum loss: 5241.920898, avg loss: 3.058297, ppl: 21.291265 +epoch: 0, batch: 28877, sum loss: 5142.530762, avg loss: 2.976002, ppl: 19.609257 +epoch: 0, batch: 28878, sum loss: 5380.009277, avg loss: 3.019085, ppl: 20.472549 +epoch: 0, batch: 28879, sum loss: 5692.693359, avg loss: 3.093855, ppl: 22.061960 +epoch: 0, batch: 28880, sum loss: 5317.185547, avg loss: 3.361053, ppl: 28.819508 +epoch: 0, batch: 28881, sum loss: 5144.338867, avg loss: 3.106485, ppl: 22.342371 +epoch: 0, batch: 28882, sum loss: 4835.131836, avg loss: 2.837519, ppl: 17.073349 +epoch: 0, batch: 28883, sum loss: 4829.201172, avg loss: 2.992070, ppl: 19.926893 +epoch: 0, batch: 28884, sum loss: 4623.315430, avg loss: 2.735690, ppl: 15.420374 +epoch: 0, batch: 28885, sum loss: 4809.225098, avg loss: 2.909392, ppl: 18.345638 +epoch: 0, batch: 28886, sum loss: 4677.496094, avg loss: 2.772671, ppl: 16.001320 +epoch: 0, batch: 28887, sum loss: 6006.721191, avg loss: 3.033697, ppl: 20.773899 +epoch: 0, batch: 28888, sum loss: 4429.756348, avg loss: 2.845059, ppl: 17.202568 +epoch: 0, batch: 28889, sum loss: 5624.024414, avg loss: 3.152480, ppl: 23.394012 +epoch: 0, batch: 28890, sum loss: 5268.396484, avg loss: 3.301000, ppl: 27.139774 +epoch: 0, batch: 28891, sum loss: 4338.697266, avg loss: 2.867612, ppl: 17.594957 +epoch: 0, batch: 28892, sum loss: 5440.868164, avg loss: 3.067006, ppl: 21.477495 +epoch: 0, batch: 28893, sum loss: 5033.997070, avg loss: 2.868374, ppl: 17.608370 +epoch: 0, batch: 28894, sum loss: 4981.122559, avg loss: 2.947410, ppl: 19.056530 +epoch: 0, batch: 28895, sum loss: 5316.802734, avg loss: 3.098370, ppl: 22.161800 +epoch: 0, batch: 28896, sum loss: 5392.985840, avg loss: 3.007800, ppl: 20.242823 +epoch: 0, batch: 28897, sum loss: 5026.702637, avg loss: 2.774118, ppl: 16.024494 +epoch: 0, batch: 28898, sum loss: 6053.551758, avg loss: 3.247614, ppl: 25.728868 +epoch: 0, batch: 28899, sum loss: 5132.839844, avg loss: 2.823344, ppl: 16.833050 +epoch: 0, batch: 28900, sum loss: 4758.965332, avg loss: 2.829349, ppl: 16.934433 +epoch: 0, batch: 28901, sum loss: 4677.635254, avg loss: 3.037426, ppl: 20.851492 +epoch: 0, batch: 28902, sum loss: 4888.185059, avg loss: 3.043702, ppl: 20.982771 +epoch: 0, batch: 28903, sum loss: 5416.974121, avg loss: 2.948816, ppl: 19.083338 +epoch: 0, batch: 28904, sum loss: 4695.335938, avg loss: 2.929093, ppl: 18.710649 +epoch: 0, batch: 28905, sum loss: 5250.733887, avg loss: 3.054528, ppl: 21.211176 +epoch: 0, batch: 28906, sum loss: 3914.713135, avg loss: 2.743317, ppl: 15.538439 +epoch: 0, batch: 28907, sum loss: 4584.947754, avg loss: 2.935306, ppl: 18.827261 +epoch: 0, batch: 28908, sum loss: 4851.538574, avg loss: 2.797888, ppl: 16.409956 +epoch: 0, batch: 28909, sum loss: 5460.620117, avg loss: 3.258127, ppl: 26.000786 +epoch: 0, batch: 28910, sum loss: 4367.337402, avg loss: 2.845171, ppl: 17.204500 +epoch: 0, batch: 28911, sum loss: 5024.047363, avg loss: 3.001223, ppl: 20.110123 +epoch: 0, batch: 28912, sum loss: 5380.369629, avg loss: 3.142739, ppl: 23.167236 +epoch: 0, batch: 28913, sum loss: 3925.172119, avg loss: 2.707015, ppl: 14.984484 +epoch: 0, batch: 28914, sum loss: 5142.839355, avg loss: 3.352568, ppl: 28.576019 +epoch: 0, batch: 28915, sum loss: 5299.265625, avg loss: 3.004119, ppl: 20.168438 +epoch: 0, batch: 28916, sum loss: 5956.299316, avg loss: 3.260153, ppl: 26.053518 +epoch: 0, batch: 28917, sum loss: 4061.154297, avg loss: 2.900825, ppl: 18.189137 +epoch: 0, batch: 28918, sum loss: 5622.871094, avg loss: 3.039390, ppl: 20.892492 +epoch: 0, batch: 28919, sum loss: 4403.078125, avg loss: 2.939304, ppl: 18.902693 +epoch: 0, batch: 28920, sum loss: 4306.004883, avg loss: 2.836630, ppl: 17.058187 +epoch: 0, batch: 28921, sum loss: 5150.821289, avg loss: 3.024557, ppl: 20.584890 +epoch: 0, batch: 28922, sum loss: 6101.040039, avg loss: 3.273090, ppl: 26.392771 +epoch: 0, batch: 28923, sum loss: 4811.779785, avg loss: 3.126563, ppl: 22.795486 +epoch: 0, batch: 28924, sum loss: 5506.839844, avg loss: 3.125335, ppl: 22.767521 +epoch: 0, batch: 28925, sum loss: 5377.125000, avg loss: 3.000628, ppl: 20.098154 +epoch: 0, batch: 28926, sum loss: 4334.604980, avg loss: 2.820173, ppl: 16.779745 +epoch: 0, batch: 28927, sum loss: 4652.747070, avg loss: 2.859709, ppl: 17.456451 +epoch: 0, batch: 28928, sum loss: 4901.677246, avg loss: 2.856455, ppl: 17.399740 +epoch: 0, batch: 28929, sum loss: 4040.004883, avg loss: 2.679048, ppl: 14.571219 +epoch: 0, batch: 28930, sum loss: 4840.171387, avg loss: 3.290395, ppl: 26.853476 +epoch: 0, batch: 28931, sum loss: 4382.062500, avg loss: 2.989129, ppl: 19.868361 +epoch: 0, batch: 28932, sum loss: 4669.538574, avg loss: 3.072065, ppl: 21.586430 +epoch: 0, batch: 28933, sum loss: 5042.937500, avg loss: 3.111004, ppl: 22.443563 +epoch: 0, batch: 28934, sum loss: 4716.371094, avg loss: 2.802360, ppl: 16.483496 +epoch: 0, batch: 28935, sum loss: 5306.900391, avg loss: 2.876369, ppl: 17.749702 +epoch: 0, batch: 28936, sum loss: 5526.194336, avg loss: 3.269938, ppl: 26.309696 +epoch: 0, batch: 28937, sum loss: 5559.047852, avg loss: 3.031106, ppl: 20.720127 +epoch: 0, batch: 28938, sum loss: 5275.846680, avg loss: 3.230769, ppl: 25.299114 +epoch: 0, batch: 28939, sum loss: 5230.414551, avg loss: 3.014648, ppl: 20.381920 +epoch: 0, batch: 28940, sum loss: 4609.024414, avg loss: 2.695336, ppl: 14.810493 +epoch: 0, batch: 28941, sum loss: 4800.493164, avg loss: 2.902354, ppl: 18.216974 +epoch: 0, batch: 28942, sum loss: 5153.074219, avg loss: 3.121184, ppl: 22.673206 +epoch: 0, batch: 28943, sum loss: 4871.523438, avg loss: 2.981349, ppl: 19.714384 +epoch: 0, batch: 28944, sum loss: 4928.481445, avg loss: 2.967177, ppl: 19.436974 +epoch: 0, batch: 28945, sum loss: 5085.978516, avg loss: 2.936477, ppl: 18.849327 +epoch: 0, batch: 28946, sum loss: 5180.624023, avg loss: 2.982512, ppl: 19.737345 +epoch: 0, batch: 28947, sum loss: 5221.002930, avg loss: 3.056793, ppl: 21.259274 +epoch: 0, batch: 28948, sum loss: 6064.989258, avg loss: 3.197148, ppl: 24.462660 +epoch: 0, batch: 28949, sum loss: 4564.306641, avg loss: 3.004810, ppl: 20.182383 +epoch: 0, batch: 28950, sum loss: 4823.306152, avg loss: 3.121881, ppl: 22.689024 +epoch: 0, batch: 28951, sum loss: 4854.062500, avg loss: 3.125604, ppl: 22.773643 +epoch: 0, batch: 28952, sum loss: 6093.941406, avg loss: 3.319140, ppl: 27.636578 +epoch: 0, batch: 28953, sum loss: 4874.249512, avg loss: 3.232261, ppl: 25.336878 +epoch: 0, batch: 28954, sum loss: 4249.321289, avg loss: 2.900561, ppl: 18.184338 +epoch: 0, batch: 28955, sum loss: 5796.165527, avg loss: 3.184706, ppl: 24.160194 +epoch: 0, batch: 28956, sum loss: 4628.064453, avg loss: 2.910732, ppl: 18.370245 +epoch: 0, batch: 28957, sum loss: 3962.881836, avg loss: 2.832653, ppl: 16.990482 +epoch: 0, batch: 28958, sum loss: 5794.462891, avg loss: 3.189028, ppl: 24.264820 +epoch: 0, batch: 28959, sum loss: 5364.133789, avg loss: 2.944091, ppl: 18.993391 +epoch: 0, batch: 28960, sum loss: 4800.434570, avg loss: 3.103060, ppl: 22.265991 +epoch: 0, batch: 28961, sum loss: 4647.895508, avg loss: 2.926887, ppl: 18.669415 +epoch: 0, batch: 28962, sum loss: 7144.745117, avg loss: 3.234380, ppl: 25.390619 +epoch: 0, batch: 28963, sum loss: 5445.267578, avg loss: 3.129464, ppl: 22.861725 +epoch: 0, batch: 28964, sum loss: 5362.970215, avg loss: 2.914658, ppl: 18.442497 +epoch: 0, batch: 28965, sum loss: 5408.465332, avg loss: 3.128089, ppl: 22.830303 +epoch: 0, batch: 28966, sum loss: 6244.828125, avg loss: 3.278125, ppl: 26.525991 +epoch: 0, batch: 28967, sum loss: 4980.440918, avg loss: 2.947006, ppl: 19.048845 +epoch: 0, batch: 28968, sum loss: 5514.154297, avg loss: 3.156356, ppl: 23.484863 +epoch: 0, batch: 28969, sum loss: 4946.650391, avg loss: 2.974534, ppl: 19.580502 +epoch: 0, batch: 28970, sum loss: 4297.463867, avg loss: 2.818009, ppl: 16.743483 +epoch: 0, batch: 28971, sum loss: 5963.276367, avg loss: 3.118868, ppl: 22.620766 +epoch: 0, batch: 28972, sum loss: 4940.948730, avg loss: 3.014612, ppl: 20.381176 +epoch: 0, batch: 28973, sum loss: 5132.682617, avg loss: 3.001569, ppl: 20.117071 +epoch: 0, batch: 28974, sum loss: 4534.389648, avg loss: 2.850025, ppl: 17.288214 +epoch: 0, batch: 28975, sum loss: 5469.526855, avg loss: 3.267340, ppl: 26.241444 +epoch: 0, batch: 28976, sum loss: 4622.769531, avg loss: 2.811903, ppl: 16.641565 +epoch: 0, batch: 28977, sum loss: 4805.986816, avg loss: 2.838740, ppl: 17.094215 +epoch: 0, batch: 28978, sum loss: 5606.612793, avg loss: 3.128690, ppl: 22.844040 +epoch: 0, batch: 28979, sum loss: 4448.707520, avg loss: 2.792660, ppl: 16.324389 +epoch: 0, batch: 28980, sum loss: 5519.162109, avg loss: 3.119934, ppl: 22.644876 +epoch: 0, batch: 28981, sum loss: 5428.099609, avg loss: 3.132198, ppl: 22.924314 +epoch: 0, batch: 28982, sum loss: 4881.220703, avg loss: 3.179948, ppl: 24.045511 +epoch: 0, batch: 28983, sum loss: 6082.295410, avg loss: 3.325476, ppl: 27.812239 +epoch: 0, batch: 28984, sum loss: 4697.844238, avg loss: 3.182821, ppl: 24.114693 +epoch: 0, batch: 28985, sum loss: 4940.662109, avg loss: 2.743288, ppl: 15.537991 +epoch: 0, batch: 28986, sum loss: 5421.801270, avg loss: 2.979012, ppl: 19.668369 +epoch: 0, batch: 28987, sum loss: 5770.708984, avg loss: 3.186477, ppl: 24.203003 +epoch: 0, batch: 28988, sum loss: 4129.794922, avg loss: 2.792289, ppl: 16.318323 +epoch: 0, batch: 28989, sum loss: 5656.803711, avg loss: 3.096225, ppl: 22.114317 +epoch: 0, batch: 28990, sum loss: 4986.627930, avg loss: 3.072476, ppl: 21.595299 +epoch: 0, batch: 28991, sum loss: 5494.536133, avg loss: 3.099005, ppl: 22.175880 +epoch: 0, batch: 28992, sum loss: 4845.269531, avg loss: 2.924122, ppl: 18.617865 +epoch: 0, batch: 28993, sum loss: 5234.404297, avg loss: 2.898341, ppl: 18.144022 +epoch: 0, batch: 28994, sum loss: 5191.804688, avg loss: 2.885939, ppl: 17.920389 +epoch: 0, batch: 28995, sum loss: 4649.897461, avg loss: 2.725614, ppl: 15.265781 +epoch: 0, batch: 28996, sum loss: 4988.203125, avg loss: 2.985161, ppl: 19.789680 +epoch: 0, batch: 28997, sum loss: 5428.636719, avg loss: 3.145212, ppl: 23.224607 +epoch: 0, batch: 28998, sum loss: 4330.727539, avg loss: 2.739233, ppl: 15.475111 +epoch: 0, batch: 28999, sum loss: 5855.376465, avg loss: 3.278486, ppl: 26.535580 +epoch: 0, batch: 29000, sum loss: 5289.864746, avg loss: 2.940447, ppl: 18.924311 +epoch: 0, batch: 29001, sum loss: 5570.518066, avg loss: 2.939587, ppl: 18.908043 +epoch: 0, batch: 29002, sum loss: 5000.959473, avg loss: 2.806375, ppl: 16.549812 +epoch: 0, batch: 29003, sum loss: 4313.078125, avg loss: 3.071993, ppl: 21.584875 +epoch: 0, batch: 29004, sum loss: 5433.799316, avg loss: 3.186979, ppl: 24.215170 +epoch: 0, batch: 29005, sum loss: 5518.408691, avg loss: 3.084633, ppl: 21.859446 +epoch: 0, batch: 29006, sum loss: 4388.002441, avg loss: 2.678878, ppl: 14.568739 +epoch: 0, batch: 29007, sum loss: 4646.693359, avg loss: 2.550326, ppl: 12.811275 +epoch: 0, batch: 29008, sum loss: 6187.536621, avg loss: 3.160131, ppl: 23.573689 +epoch: 0, batch: 29009, sum loss: 5088.814941, avg loss: 2.926288, ppl: 18.658245 +epoch: 0, batch: 29010, sum loss: 4212.684570, avg loss: 2.623091, ppl: 13.778250 +epoch: 0, batch: 29011, sum loss: 3932.826904, avg loss: 2.831409, ppl: 16.969349 +epoch: 0, batch: 29012, sum loss: 4832.746094, avg loss: 2.986864, ppl: 19.823420 +epoch: 0, batch: 29013, sum loss: 5675.018066, avg loss: 2.969659, ppl: 19.485271 +epoch: 0, batch: 29014, sum loss: 5948.779785, avg loss: 3.194834, ppl: 24.406111 +epoch: 0, batch: 29015, sum loss: 4907.868164, avg loss: 2.969067, ppl: 19.473743 +epoch: 0, batch: 29016, sum loss: 4576.108887, avg loss: 2.876247, ppl: 17.747540 +epoch: 0, batch: 29017, sum loss: 5845.984375, avg loss: 3.315930, ppl: 27.547998 +epoch: 0, batch: 29018, sum loss: 4660.556152, avg loss: 3.036193, ppl: 20.825806 +epoch: 0, batch: 29019, sum loss: 4470.299316, avg loss: 2.729120, ppl: 15.319404 +epoch: 0, batch: 29020, sum loss: 5231.094238, avg loss: 2.786944, ppl: 16.231344 +epoch: 0, batch: 29021, sum loss: 4401.885742, avg loss: 2.974247, ppl: 19.574883 +epoch: 0, batch: 29022, sum loss: 4697.386230, avg loss: 2.673527, ppl: 14.490982 +epoch: 0, batch: 29023, sum loss: 4936.656250, avg loss: 2.890314, ppl: 17.998959 +epoch: 0, batch: 29024, sum loss: 5046.387695, avg loss: 3.010971, ppl: 20.307116 +epoch: 0, batch: 29025, sum loss: 5264.497070, avg loss: 3.091308, ppl: 22.005835 +epoch: 0, batch: 29026, sum loss: 4958.030273, avg loss: 2.990368, ppl: 19.893003 +epoch: 0, batch: 29027, sum loss: 3681.364258, avg loss: 2.696970, ppl: 14.834718 +epoch: 0, batch: 29028, sum loss: 5222.501953, avg loss: 3.093899, ppl: 22.062944 +epoch: 0, batch: 29029, sum loss: 4189.165039, avg loss: 2.787202, ppl: 16.235531 +epoch: 0, batch: 29030, sum loss: 4967.757812, avg loss: 2.937763, ppl: 18.873587 +epoch: 0, batch: 29031, sum loss: 4871.518066, avg loss: 2.842193, ppl: 17.153336 +epoch: 0, batch: 29032, sum loss: 4991.597168, avg loss: 2.880322, ppl: 17.820002 +epoch: 0, batch: 29033, sum loss: 4308.265137, avg loss: 2.819545, ppl: 16.769224 +epoch: 0, batch: 29034, sum loss: 5394.459473, avg loss: 3.239916, ppl: 25.531567 +epoch: 0, batch: 29035, sum loss: 4260.499023, avg loss: 3.054121, ppl: 21.202545 +epoch: 0, batch: 29036, sum loss: 5298.367188, avg loss: 3.238611, ppl: 25.498280 +epoch: 0, batch: 29037, sum loss: 5240.458984, avg loss: 2.860512, ppl: 17.470478 +epoch: 0, batch: 29038, sum loss: 5612.162109, avg loss: 3.012433, ppl: 20.336811 +epoch: 0, batch: 29039, sum loss: 4895.866211, avg loss: 3.077226, ppl: 21.698126 +epoch: 0, batch: 29040, sum loss: 4956.673340, avg loss: 3.009516, ppl: 20.277588 +epoch: 0, batch: 29041, sum loss: 4962.217285, avg loss: 3.082123, ppl: 21.804636 +epoch: 0, batch: 29042, sum loss: 5239.308105, avg loss: 3.122353, ppl: 22.699726 +epoch: 0, batch: 29043, sum loss: 6027.689453, avg loss: 2.986962, ppl: 19.825363 +epoch: 0, batch: 29044, sum loss: 5346.423828, avg loss: 3.201452, ppl: 24.568167 +epoch: 0, batch: 29045, sum loss: 4689.626465, avg loss: 2.912812, ppl: 18.408482 +epoch: 0, batch: 29046, sum loss: 6129.221191, avg loss: 3.152892, ppl: 23.403646 +epoch: 0, batch: 29047, sum loss: 4968.393555, avg loss: 3.224136, ppl: 25.131853 +epoch: 0, batch: 29048, sum loss: 5103.763672, avg loss: 2.901515, ppl: 18.201691 +epoch: 0, batch: 29049, sum loss: 5834.779297, avg loss: 3.128568, ppl: 22.841246 +epoch: 0, batch: 29050, sum loss: 5391.385742, avg loss: 2.861670, ppl: 17.490707 +epoch: 0, batch: 29051, sum loss: 4743.685547, avg loss: 3.052565, ppl: 21.169567 +epoch: 0, batch: 29052, sum loss: 5229.370605, avg loss: 2.952778, ppl: 19.159111 +epoch: 0, batch: 29053, sum loss: 5322.485352, avg loss: 3.143819, ppl: 23.192272 +epoch: 0, batch: 29054, sum loss: 4309.950195, avg loss: 2.775242, ppl: 16.042503 +epoch: 0, batch: 29055, sum loss: 4196.675781, avg loss: 2.568345, ppl: 13.044219 +epoch: 0, batch: 29056, sum loss: 5299.335449, avg loss: 3.100840, ppl: 22.216608 +epoch: 0, batch: 29057, sum loss: 4587.252930, avg loss: 2.934903, ppl: 18.819672 +epoch: 0, batch: 29058, sum loss: 4587.553223, avg loss: 2.874407, ppl: 17.714912 +epoch: 0, batch: 29059, sum loss: 5181.825195, avg loss: 2.880392, ppl: 17.821255 +epoch: 0, batch: 29060, sum loss: 4977.057617, avg loss: 3.010924, ppl: 20.306156 +epoch: 0, batch: 29061, sum loss: 5455.027344, avg loss: 3.049205, ppl: 21.098560 +epoch: 0, batch: 29062, sum loss: 5055.229004, avg loss: 2.890354, ppl: 17.999680 +epoch: 0, batch: 29063, sum loss: 5374.309082, avg loss: 2.975808, ppl: 19.605455 +epoch: 0, batch: 29064, sum loss: 4309.212402, avg loss: 2.990432, ppl: 19.894276 +epoch: 0, batch: 29065, sum loss: 4974.594238, avg loss: 2.948781, ppl: 19.082687 +epoch: 0, batch: 29066, sum loss: 4367.956055, avg loss: 2.967361, ppl: 19.440557 +epoch: 0, batch: 29067, sum loss: 5047.334473, avg loss: 3.009740, ppl: 20.282133 +epoch: 0, batch: 29068, sum loss: 4577.297852, avg loss: 2.871580, ppl: 17.664900 +epoch: 0, batch: 29069, sum loss: 5352.816406, avg loss: 3.112103, ppl: 22.468235 +epoch: 0, batch: 29070, sum loss: 5347.985840, avg loss: 3.045550, ppl: 21.021593 +epoch: 0, batch: 29071, sum loss: 6220.184570, avg loss: 3.291103, ppl: 26.872484 +epoch: 0, batch: 29072, sum loss: 4936.423828, avg loss: 3.096878, ppl: 22.128759 +epoch: 0, batch: 29073, sum loss: 5806.921387, avg loss: 3.074072, ppl: 21.629793 +epoch: 0, batch: 29074, sum loss: 5664.265625, avg loss: 3.200150, ppl: 24.536217 +epoch: 0, batch: 29075, sum loss: 4973.245117, avg loss: 3.038024, ppl: 20.863974 +epoch: 0, batch: 29076, sum loss: 5203.220215, avg loss: 3.151557, ppl: 23.372427 +epoch: 0, batch: 29077, sum loss: 5745.044922, avg loss: 3.269804, ppl: 26.306170 +epoch: 0, batch: 29078, sum loss: 6311.236328, avg loss: 3.035708, ppl: 20.815704 +epoch: 0, batch: 29079, sum loss: 5905.982422, avg loss: 3.063269, ppl: 21.397394 +epoch: 0, batch: 29080, sum loss: 4969.030762, avg loss: 2.880598, ppl: 17.824923 +epoch: 0, batch: 29081, sum loss: 6644.834961, avg loss: 3.121106, ppl: 22.671444 +epoch: 0, batch: 29082, sum loss: 4837.789062, avg loss: 2.687661, ppl: 14.697254 +epoch: 0, batch: 29083, sum loss: 5784.635254, avg loss: 3.001887, ppl: 20.123466 +epoch: 0, batch: 29084, sum loss: 4865.055664, avg loss: 2.892423, ppl: 18.036963 +epoch: 0, batch: 29085, sum loss: 4976.135254, avg loss: 2.995867, ppl: 20.002695 +epoch: 0, batch: 29086, sum loss: 4456.572266, avg loss: 2.750971, ppl: 15.657822 +epoch: 0, batch: 29087, sum loss: 4980.738281, avg loss: 2.952424, ppl: 19.152315 +epoch: 0, batch: 29088, sum loss: 4878.630859, avg loss: 2.972962, ppl: 19.549744 +epoch: 0, batch: 29089, sum loss: 4894.596191, avg loss: 2.896211, ppl: 18.105408 +epoch: 0, batch: 29090, sum loss: 5572.692383, avg loss: 3.101109, ppl: 22.222578 +epoch: 0, batch: 29091, sum loss: 4839.750488, avg loss: 2.980142, ppl: 19.690611 +epoch: 0, batch: 29092, sum loss: 4344.460449, avg loss: 2.710206, ppl: 15.032372 +epoch: 0, batch: 29093, sum loss: 4736.816406, avg loss: 3.085874, ppl: 21.886583 +epoch: 0, batch: 29094, sum loss: 4504.530762, avg loss: 2.989072, ppl: 19.867229 +epoch: 0, batch: 29095, sum loss: 5146.815430, avg loss: 2.917696, ppl: 18.498613 +epoch: 0, batch: 29096, sum loss: 4897.104980, avg loss: 2.953622, ppl: 19.175278 +epoch: 0, batch: 29097, sum loss: 5477.783203, avg loss: 2.926166, ppl: 18.655972 +epoch: 0, batch: 29098, sum loss: 5416.827148, avg loss: 3.098871, ppl: 22.172915 +epoch: 0, batch: 29099, sum loss: 5945.093262, avg loss: 3.143888, ppl: 23.193880 +epoch: 0, batch: 29100, sum loss: 5624.217285, avg loss: 3.095331, ppl: 22.094561 +epoch: 0, batch: 29101, sum loss: 5118.189941, avg loss: 2.812192, ppl: 16.646374 +epoch: 0, batch: 29102, sum loss: 5233.360352, avg loss: 2.953364, ppl: 19.170328 +epoch: 0, batch: 29103, sum loss: 5186.211914, avg loss: 2.827814, ppl: 16.908464 +epoch: 0, batch: 29104, sum loss: 4712.751953, avg loss: 3.013268, ppl: 20.353817 +epoch: 0, batch: 29105, sum loss: 5162.047363, avg loss: 3.351979, ppl: 28.559191 +epoch: 0, batch: 29106, sum loss: 3828.377441, avg loss: 2.911314, ppl: 18.380926 +epoch: 0, batch: 29107, sum loss: 5683.163086, avg loss: 3.223575, ppl: 25.117758 +epoch: 0, batch: 29108, sum loss: 4781.625488, avg loss: 2.770351, ppl: 15.964231 +epoch: 0, batch: 29109, sum loss: 5650.583984, avg loss: 3.067635, ppl: 21.491022 +epoch: 0, batch: 29110, sum loss: 4868.736328, avg loss: 2.855564, ppl: 17.384237 +epoch: 0, batch: 29111, sum loss: 6050.849121, avg loss: 3.096648, ppl: 22.123657 +epoch: 0, batch: 29112, sum loss: 5288.370117, avg loss: 3.074634, ppl: 21.641956 +epoch: 0, batch: 29113, sum loss: 5524.109375, avg loss: 3.182091, ppl: 24.097082 +epoch: 0, batch: 29114, sum loss: 4486.919922, avg loss: 2.816648, ppl: 16.720705 +epoch: 0, batch: 29115, sum loss: 4953.708008, avg loss: 2.953911, ppl: 19.180815 +epoch: 0, batch: 29116, sum loss: 5719.714355, avg loss: 3.061945, ppl: 21.369089 +epoch: 0, batch: 29117, sum loss: 5608.418457, avg loss: 3.120990, ppl: 22.668806 +epoch: 0, batch: 29118, sum loss: 6250.163086, avg loss: 3.299981, ppl: 27.112114 +epoch: 0, batch: 29119, sum loss: 4761.195801, avg loss: 2.871650, ppl: 17.666143 +epoch: 0, batch: 29120, sum loss: 4430.660156, avg loss: 2.899647, ppl: 18.167727 +epoch: 0, batch: 29121, sum loss: 5038.221680, avg loss: 2.956703, ppl: 19.234449 +epoch: 0, batch: 29122, sum loss: 5134.944336, avg loss: 2.983698, ppl: 19.760756 +epoch: 0, batch: 29123, sum loss: 4833.571777, avg loss: 3.020982, ppl: 20.511429 +epoch: 0, batch: 29124, sum loss: 4636.373047, avg loss: 2.983509, ppl: 19.757023 +epoch: 0, batch: 29125, sum loss: 4297.475098, avg loss: 2.823571, ppl: 16.836872 +epoch: 0, batch: 29126, sum loss: 4764.264648, avg loss: 2.857987, ppl: 17.426414 +epoch: 0, batch: 29127, sum loss: 6518.669922, avg loss: 3.206429, ppl: 24.690752 +epoch: 0, batch: 29128, sum loss: 6179.525879, avg loss: 3.213482, ppl: 24.865515 +epoch: 0, batch: 29129, sum loss: 4300.303223, avg loss: 2.921402, ppl: 18.567295 +epoch: 0, batch: 29130, sum loss: 5641.575195, avg loss: 3.061083, ppl: 21.350658 +epoch: 0, batch: 29131, sum loss: 4823.666504, avg loss: 3.049094, ppl: 21.096216 +epoch: 0, batch: 29132, sum loss: 4075.360840, avg loss: 2.772354, ppl: 15.996251 +epoch: 0, batch: 29133, sum loss: 4466.616211, avg loss: 2.803902, ppl: 16.508942 +epoch: 0, batch: 29134, sum loss: 4405.344727, avg loss: 2.892544, ppl: 18.039148 +epoch: 0, batch: 29135, sum loss: 5191.781250, avg loss: 2.956595, ppl: 19.232382 +epoch: 0, batch: 29136, sum loss: 5006.703125, avg loss: 2.964300, ppl: 19.381140 +epoch: 0, batch: 29137, sum loss: 4267.098633, avg loss: 2.723101, ppl: 15.227465 +epoch: 0, batch: 29138, sum loss: 4718.968262, avg loss: 2.866931, ppl: 17.582972 +epoch: 0, batch: 29139, sum loss: 4709.443359, avg loss: 3.020810, ppl: 20.507904 +epoch: 0, batch: 29140, sum loss: 4760.572266, avg loss: 2.936812, ppl: 18.855637 +epoch: 0, batch: 29141, sum loss: 4613.325684, avg loss: 2.886937, ppl: 17.938288 +epoch: 0, batch: 29142, sum loss: 4958.599609, avg loss: 2.861281, ppl: 17.483904 +epoch: 0, batch: 29143, sum loss: 5050.870117, avg loss: 2.874712, ppl: 17.720327 +epoch: 0, batch: 29144, sum loss: 4947.142090, avg loss: 3.349453, ppl: 28.487152 +epoch: 0, batch: 29145, sum loss: 5304.550293, avg loss: 3.159351, ppl: 23.555300 +epoch: 0, batch: 29146, sum loss: 5766.977051, avg loss: 3.085595, ppl: 21.880480 +epoch: 0, batch: 29147, sum loss: 4663.538574, avg loss: 2.878728, ppl: 17.791622 +epoch: 0, batch: 29148, sum loss: 5667.127930, avg loss: 3.189155, ppl: 24.267902 +epoch: 0, batch: 29149, sum loss: 6546.768555, avg loss: 3.371148, ppl: 29.111917 +epoch: 0, batch: 29150, sum loss: 5397.744629, avg loss: 2.967424, ppl: 19.441776 +epoch: 0, batch: 29151, sum loss: 4922.376465, avg loss: 2.845304, ppl: 17.206793 +epoch: 0, batch: 29152, sum loss: 4774.866211, avg loss: 2.906188, ppl: 18.286947 +epoch: 0, batch: 29153, sum loss: 5200.137207, avg loss: 2.897012, ppl: 18.119930 +epoch: 0, batch: 29154, sum loss: 4673.247559, avg loss: 2.858255, ppl: 17.431089 +epoch: 0, batch: 29155, sum loss: 5002.559082, avg loss: 3.180266, ppl: 24.053160 +epoch: 0, batch: 29156, sum loss: 5089.085938, avg loss: 3.022023, ppl: 20.532782 +epoch: 0, batch: 29157, sum loss: 4571.018066, avg loss: 2.918913, ppl: 18.521147 +epoch: 0, batch: 29158, sum loss: 4565.993164, avg loss: 2.767269, ppl: 15.915105 +epoch: 0, batch: 29159, sum loss: 5101.161621, avg loss: 2.983135, ppl: 19.749643 +epoch: 0, batch: 29160, sum loss: 5654.576660, avg loss: 3.233034, ppl: 25.356476 +epoch: 0, batch: 29161, sum loss: 5492.165039, avg loss: 3.261381, ppl: 26.085533 +epoch: 0, batch: 29162, sum loss: 4630.861816, avg loss: 2.885272, ppl: 17.908438 +epoch: 0, batch: 29163, sum loss: 5348.672852, avg loss: 3.106082, ppl: 22.333364 +epoch: 0, batch: 29164, sum loss: 5783.361328, avg loss: 3.116035, ppl: 22.556770 +epoch: 0, batch: 29165, sum loss: 6923.383789, avg loss: 3.217186, ppl: 24.957783 +epoch: 0, batch: 29166, sum loss: 4608.472656, avg loss: 2.671578, ppl: 14.462779 +epoch: 0, batch: 29167, sum loss: 5899.665527, avg loss: 3.069545, ppl: 21.532108 +epoch: 0, batch: 29168, sum loss: 4385.584961, avg loss: 2.844089, ppl: 17.185892 +epoch: 0, batch: 29169, sum loss: 4521.966309, avg loss: 2.812168, ppl: 16.645969 +epoch: 0, batch: 29170, sum loss: 5534.694824, avg loss: 3.248060, ppl: 25.740368 +epoch: 0, batch: 29171, sum loss: 4793.064453, avg loss: 3.104316, ppl: 22.293976 +epoch: 0, batch: 29172, sum loss: 5144.530762, avg loss: 3.237590, ppl: 25.472261 +epoch: 0, batch: 29173, sum loss: 4604.037109, avg loss: 2.814204, ppl: 16.679890 +epoch: 0, batch: 29174, sum loss: 5391.962891, avg loss: 3.334547, ppl: 28.065674 +epoch: 0, batch: 29175, sum loss: 5588.239746, avg loss: 3.065409, ppl: 21.443224 +epoch: 0, batch: 29176, sum loss: 4737.675781, avg loss: 2.855742, ppl: 17.387333 +epoch: 0, batch: 29177, sum loss: 5184.741699, avg loss: 2.894886, ppl: 18.081444 +epoch: 0, batch: 29178, sum loss: 3846.792480, avg loss: 2.793604, ppl: 16.339800 +epoch: 0, batch: 29179, sum loss: 4315.614258, avg loss: 2.741813, ppl: 15.515091 +epoch: 0, batch: 29180, sum loss: 5023.901367, avg loss: 2.695226, ppl: 14.808865 +epoch: 0, batch: 29181, sum loss: 4828.674805, avg loss: 2.891422, ppl: 18.018915 +epoch: 0, batch: 29182, sum loss: 4840.558105, avg loss: 3.073370, ppl: 21.614620 +epoch: 0, batch: 29183, sum loss: 5573.381836, avg loss: 2.983609, ppl: 19.759003 +epoch: 0, batch: 29184, sum loss: 5210.441895, avg loss: 3.135043, ppl: 22.989634 +epoch: 0, batch: 29185, sum loss: 6018.140625, avg loss: 3.214819, ppl: 24.898785 +epoch: 0, batch: 29186, sum loss: 4315.711914, avg loss: 2.762940, ppl: 15.846358 +epoch: 0, batch: 29187, sum loss: 5552.772949, avg loss: 3.233997, ppl: 25.380905 +epoch: 0, batch: 29188, sum loss: 4845.086426, avg loss: 2.932861, ppl: 18.781288 +epoch: 0, batch: 29189, sum loss: 4648.382812, avg loss: 2.916175, ppl: 18.470501 +epoch: 0, batch: 29190, sum loss: 5456.571289, avg loss: 2.916393, ppl: 18.474531 +epoch: 0, batch: 29191, sum loss: 4866.680664, avg loss: 2.857710, ppl: 17.421593 +epoch: 0, batch: 29192, sum loss: 4577.829590, avg loss: 2.861143, ppl: 17.481503 +epoch: 0, batch: 29193, sum loss: 5766.061523, avg loss: 3.073594, ppl: 21.619455 +epoch: 0, batch: 29194, sum loss: 4965.211914, avg loss: 3.020202, ppl: 20.495430 +epoch: 0, batch: 29195, sum loss: 4516.584473, avg loss: 3.047628, ppl: 21.065317 +epoch: 0, batch: 29196, sum loss: 4492.046387, avg loss: 2.883214, ppl: 17.871613 +epoch: 0, batch: 29197, sum loss: 5945.234863, avg loss: 2.987555, ppl: 19.837126 +epoch: 0, batch: 29198, sum loss: 4326.189453, avg loss: 3.010570, ppl: 20.298973 +epoch: 0, batch: 29199, sum loss: 5120.130859, avg loss: 3.120128, ppl: 22.649288 +epoch: 0, batch: 29200, sum loss: 4792.134766, avg loss: 2.757270, ppl: 15.756763 +epoch: 0, batch: 29201, sum loss: 4572.405273, avg loss: 2.971024, ppl: 19.511881 +epoch: 0, batch: 29202, sum loss: 4429.671875, avg loss: 2.770276, ppl: 15.963044 +epoch: 0, batch: 29203, sum loss: 6195.434570, avg loss: 3.228470, ppl: 25.241016 +epoch: 0, batch: 29204, sum loss: 4736.500000, avg loss: 2.861934, ppl: 17.495325 +epoch: 0, batch: 29205, sum loss: 5436.572754, avg loss: 3.138899, ppl: 23.078440 +epoch: 0, batch: 29206, sum loss: 4627.683594, avg loss: 2.872554, ppl: 17.682114 +epoch: 0, batch: 29207, sum loss: 4151.779785, avg loss: 2.751345, ppl: 15.663688 +epoch: 0, batch: 29208, sum loss: 5252.094727, avg loss: 3.226102, ppl: 25.181318 +epoch: 0, batch: 29209, sum loss: 5968.832031, avg loss: 3.263440, ppl: 26.139311 +epoch: 0, batch: 29210, sum loss: 4227.111816, avg loss: 2.931423, ppl: 18.754297 +epoch: 0, batch: 29211, sum loss: 4675.511719, avg loss: 2.968579, ppl: 19.464237 +epoch: 0, batch: 29212, sum loss: 4538.818359, avg loss: 2.810414, ppl: 16.616793 +epoch: 0, batch: 29213, sum loss: 4753.785645, avg loss: 3.066959, ppl: 21.476486 +epoch: 0, batch: 29214, sum loss: 4339.405273, avg loss: 2.746459, ppl: 15.587339 +epoch: 0, batch: 29215, sum loss: 5201.791992, avg loss: 3.247061, ppl: 25.714653 +epoch: 0, batch: 29216, sum loss: 4492.708008, avg loss: 2.993143, ppl: 19.948288 +epoch: 0, batch: 29217, sum loss: 4963.295898, avg loss: 2.929927, ppl: 18.726257 +epoch: 0, batch: 29218, sum loss: 5964.077148, avg loss: 3.137337, ppl: 23.042418 +epoch: 0, batch: 29219, sum loss: 4855.431152, avg loss: 2.995331, ppl: 19.991968 +epoch: 0, batch: 29220, sum loss: 4278.660645, avg loss: 2.846747, ppl: 17.231634 +epoch: 0, batch: 29221, sum loss: 4287.204590, avg loss: 2.694660, ppl: 14.800492 +epoch: 0, batch: 29222, sum loss: 4135.916016, avg loss: 2.844509, ppl: 17.193113 +epoch: 0, batch: 29223, sum loss: 4887.578613, avg loss: 3.223997, ppl: 25.128349 +epoch: 0, batch: 29224, sum loss: 5330.916504, avg loss: 3.117495, ppl: 22.589724 +epoch: 0, batch: 29225, sum loss: 5551.518066, avg loss: 3.063752, ppl: 21.407722 +epoch: 0, batch: 29226, sum loss: 4686.409668, avg loss: 2.919881, ppl: 18.539087 +epoch: 0, batch: 29227, sum loss: 4725.404785, avg loss: 2.812741, ppl: 16.655510 +epoch: 0, batch: 29228, sum loss: 5133.381836, avg loss: 3.151247, ppl: 23.365189 +epoch: 0, batch: 29229, sum loss: 4832.779297, avg loss: 2.914825, ppl: 18.445580 +epoch: 0, batch: 29230, sum loss: 4452.179199, avg loss: 2.942617, ppl: 18.965412 +epoch: 0, batch: 29231, sum loss: 5170.168945, avg loss: 2.990266, ppl: 19.890965 +epoch: 0, batch: 29232, sum loss: 5644.187500, avg loss: 3.049264, ppl: 21.099808 +epoch: 0, batch: 29233, sum loss: 5595.392090, avg loss: 3.181008, ppl: 24.070995 +epoch: 0, batch: 29234, sum loss: 5407.094727, avg loss: 2.894590, ppl: 18.076096 +epoch: 0, batch: 29235, sum loss: 4433.131836, avg loss: 2.812901, ppl: 16.658171 +epoch: 0, batch: 29236, sum loss: 5821.451172, avg loss: 3.257667, ppl: 25.988842 +epoch: 0, batch: 29237, sum loss: 5052.218750, avg loss: 2.862447, ppl: 17.504303 +epoch: 0, batch: 29238, sum loss: 5288.696289, avg loss: 2.986277, ppl: 19.811783 +epoch: 0, batch: 29239, sum loss: 4418.219727, avg loss: 2.914393, ppl: 18.437620 +epoch: 0, batch: 29240, sum loss: 4280.390625, avg loss: 2.745600, ppl: 15.573959 +epoch: 0, batch: 29241, sum loss: 5137.967285, avg loss: 2.997647, ppl: 20.038338 +epoch: 0, batch: 29242, sum loss: 4535.723633, avg loss: 2.711132, ppl: 15.046299 +epoch: 0, batch: 29243, sum loss: 5070.470703, avg loss: 3.114540, ppl: 22.523058 +epoch: 0, batch: 29244, sum loss: 4781.269531, avg loss: 2.888985, ppl: 17.975050 +epoch: 0, batch: 29245, sum loss: 5819.705078, avg loss: 3.287969, ppl: 26.788404 +epoch: 0, batch: 29246, sum loss: 5222.696289, avg loss: 3.075793, ppl: 21.667057 +epoch: 0, batch: 29247, sum loss: 5313.246094, avg loss: 2.984970, ppl: 19.785906 +epoch: 0, batch: 29248, sum loss: 4695.670410, avg loss: 3.170608, ppl: 23.821964 +epoch: 0, batch: 29249, sum loss: 5149.743164, avg loss: 3.058042, ppl: 21.285845 +epoch: 0, batch: 29250, sum loss: 6012.785156, avg loss: 3.113819, ppl: 22.506842 +epoch: 0, batch: 29251, sum loss: 4890.442383, avg loss: 3.134899, ppl: 22.986317 +epoch: 0, batch: 29252, sum loss: 5951.094238, avg loss: 3.241337, ppl: 25.567873 +epoch: 0, batch: 29253, sum loss: 4496.469727, avg loss: 2.964054, ppl: 19.376362 +epoch: 0, batch: 29254, sum loss: 4955.094238, avg loss: 2.815394, ppl: 16.699760 +epoch: 0, batch: 29255, sum loss: 5644.323242, avg loss: 3.245729, ppl: 25.680435 +epoch: 0, batch: 29256, sum loss: 4740.337891, avg loss: 3.017402, ppl: 20.438118 +epoch: 0, batch: 29257, sum loss: 4688.798340, avg loss: 2.841696, ppl: 17.144819 +epoch: 0, batch: 29258, sum loss: 3984.812500, avg loss: 2.908622, ppl: 18.331526 +epoch: 0, batch: 29259, sum loss: 4560.583008, avg loss: 2.925326, ppl: 18.640299 +epoch: 0, batch: 29260, sum loss: 5722.819336, avg loss: 2.787540, ppl: 16.241016 +epoch: 0, batch: 29261, sum loss: 5091.279297, avg loss: 2.991351, ppl: 19.912563 +epoch: 0, batch: 29262, sum loss: 4476.338867, avg loss: 2.815307, ppl: 16.698307 +epoch: 0, batch: 29263, sum loss: 5242.059570, avg loss: 3.092660, ppl: 22.035624 +epoch: 0, batch: 29264, sum loss: 5003.453613, avg loss: 2.967648, ppl: 19.446119 +epoch: 0, batch: 29265, sum loss: 4716.648926, avg loss: 3.138156, ppl: 23.061306 +epoch: 0, batch: 29266, sum loss: 4943.672852, avg loss: 3.010763, ppl: 20.302889 +epoch: 0, batch: 29267, sum loss: 5432.146973, avg loss: 3.007833, ppl: 20.243490 +epoch: 0, batch: 29268, sum loss: 5683.016602, avg loss: 3.167791, ppl: 23.754950 +epoch: 0, batch: 29269, sum loss: 4984.514648, avg loss: 3.262117, ppl: 26.104740 +epoch: 0, batch: 29270, sum loss: 5399.024414, avg loss: 2.896472, ppl: 18.110144 +epoch: 0, batch: 29271, sum loss: 5623.195801, avg loss: 3.278831, ppl: 26.544731 +epoch: 0, batch: 29272, sum loss: 5701.637207, avg loss: 2.934451, ppl: 18.811167 +epoch: 0, batch: 29273, sum loss: 4978.028809, avg loss: 3.078558, ppl: 21.727058 +epoch: 0, batch: 29274, sum loss: 5398.774414, avg loss: 3.021138, ppl: 20.514633 +epoch: 0, batch: 29275, sum loss: 4981.245605, avg loss: 3.117175, ppl: 22.582497 +epoch: 0, batch: 29276, sum loss: 4770.434570, avg loss: 2.983386, ppl: 19.754593 +epoch: 0, batch: 29277, sum loss: 4847.711914, avg loss: 3.062357, ppl: 21.377895 +epoch: 0, batch: 29278, sum loss: 5311.141113, avg loss: 3.224736, ppl: 25.146946 +epoch: 0, batch: 29279, sum loss: 4818.312500, avg loss: 2.921960, ppl: 18.577671 +epoch: 0, batch: 29280, sum loss: 4249.158691, avg loss: 3.106110, ppl: 22.333998 +epoch: 0, batch: 29281, sum loss: 5760.263672, avg loss: 3.003266, ppl: 20.151241 +epoch: 0, batch: 29282, sum loss: 4924.164551, avg loss: 2.997057, ppl: 20.026506 +epoch: 0, batch: 29283, sum loss: 4852.659180, avg loss: 3.081053, ppl: 21.781332 +epoch: 0, batch: 29284, sum loss: 6504.986816, avg loss: 3.077099, ppl: 21.695370 +epoch: 0, batch: 29285, sum loss: 4928.812012, avg loss: 2.865588, ppl: 17.559383 +epoch: 0, batch: 29286, sum loss: 6411.145996, avg loss: 3.314967, ppl: 27.521484 +epoch: 0, batch: 29287, sum loss: 5222.284180, avg loss: 3.032685, ppl: 20.752886 +epoch: 0, batch: 29288, sum loss: 5008.532227, avg loss: 3.084071, ppl: 21.847170 +epoch: 0, batch: 29289, sum loss: 4596.918945, avg loss: 2.842869, ppl: 17.164942 +epoch: 0, batch: 29290, sum loss: 4397.230469, avg loss: 2.860918, ppl: 17.477560 +epoch: 0, batch: 29291, sum loss: 5054.719238, avg loss: 2.855774, ppl: 17.387884 +epoch: 0, batch: 29292, sum loss: 5537.248535, avg loss: 2.994726, ppl: 19.979883 +epoch: 0, batch: 29293, sum loss: 4405.135742, avg loss: 2.915378, ppl: 18.455780 +epoch: 0, batch: 29294, sum loss: 4486.784668, avg loss: 2.890970, ppl: 18.010767 +epoch: 0, batch: 29295, sum loss: 5427.917480, avg loss: 3.142975, ppl: 23.172699 +epoch: 0, batch: 29296, sum loss: 4751.993652, avg loss: 2.953383, ppl: 19.170702 +epoch: 0, batch: 29297, sum loss: 4109.222168, avg loss: 2.780259, ppl: 16.123190 +epoch: 0, batch: 29298, sum loss: 5223.269531, avg loss: 3.114651, ppl: 22.525562 +epoch: 0, batch: 29299, sum loss: 5630.800293, avg loss: 3.375780, ppl: 29.247076 +epoch: 0, batch: 29300, sum loss: 5004.404785, avg loss: 2.996650, ppl: 20.018358 +epoch: 0, batch: 29301, sum loss: 4535.405762, avg loss: 2.758763, ppl: 15.780304 +epoch: 0, batch: 29302, sum loss: 5081.406250, avg loss: 2.940629, ppl: 18.927740 +epoch: 0, batch: 29303, sum loss: 3824.491699, avg loss: 2.511157, ppl: 12.319173 +epoch: 0, batch: 29304, sum loss: 4695.930664, avg loss: 2.896934, ppl: 18.118513 +epoch: 0, batch: 29305, sum loss: 5653.159180, avg loss: 3.286721, ppl: 26.754976 +epoch: 0, batch: 29306, sum loss: 4299.107422, avg loss: 3.042539, ppl: 20.958387 +epoch: 0, batch: 29307, sum loss: 5275.574219, avg loss: 3.179972, ppl: 24.046089 +epoch: 0, batch: 29308, sum loss: 5522.388184, avg loss: 3.281276, ppl: 26.609711 +epoch: 0, batch: 29309, sum loss: 5743.066406, avg loss: 3.122929, ppl: 22.712811 +epoch: 0, batch: 29310, sum loss: 5214.463379, avg loss: 3.036962, ppl: 20.841825 +epoch: 0, batch: 29311, sum loss: 4849.483887, avg loss: 2.912603, ppl: 18.404642 +epoch: 0, batch: 29312, sum loss: 4927.031738, avg loss: 2.881305, ppl: 17.837536 +epoch: 0, batch: 29313, sum loss: 4127.695801, avg loss: 2.984596, ppl: 19.778505 +epoch: 0, batch: 29314, sum loss: 5501.013672, avg loss: 3.268576, ppl: 26.273903 +epoch: 0, batch: 29315, sum loss: 4779.593750, avg loss: 2.778834, ppl: 16.100231 +epoch: 0, batch: 29316, sum loss: 5039.503418, avg loss: 2.855243, ppl: 17.378654 +epoch: 0, batch: 29317, sum loss: 4729.100586, avg loss: 2.899510, ppl: 18.165241 +epoch: 0, batch: 29318, sum loss: 5787.998047, avg loss: 3.288635, ppl: 26.806255 +epoch: 0, batch: 29319, sum loss: 5535.300293, avg loss: 3.328503, ppl: 27.896545 +epoch: 0, batch: 29320, sum loss: 5722.832031, avg loss: 3.227768, ppl: 25.223288 +epoch: 0, batch: 29321, sum loss: 5093.192383, avg loss: 3.171353, ppl: 23.839708 +epoch: 0, batch: 29322, sum loss: 4008.398926, avg loss: 2.736109, ppl: 15.426835 +epoch: 0, batch: 29323, sum loss: 5057.627930, avg loss: 3.082040, ppl: 21.802835 +epoch: 0, batch: 29324, sum loss: 5145.501953, avg loss: 2.977721, ppl: 19.643000 +epoch: 0, batch: 29325, sum loss: 4570.673828, avg loss: 2.914970, ppl: 18.448267 +epoch: 0, batch: 29326, sum loss: 4821.462891, avg loss: 2.832822, ppl: 16.993345 +epoch: 0, batch: 29327, sum loss: 4792.629395, avg loss: 3.035231, ppl: 20.805777 +epoch: 0, batch: 29328, sum loss: 5397.755859, avg loss: 2.933563, ppl: 18.794472 +epoch: 0, batch: 29329, sum loss: 5042.322754, avg loss: 2.987158, ppl: 19.829248 +epoch: 0, batch: 29330, sum loss: 4741.445312, avg loss: 2.954171, ppl: 19.185820 +epoch: 0, batch: 29331, sum loss: 5164.208984, avg loss: 3.127928, ppl: 22.826633 +epoch: 0, batch: 29332, sum loss: 5474.625000, avg loss: 3.149957, ppl: 23.335060 +epoch: 0, batch: 29333, sum loss: 4447.173828, avg loss: 2.869144, ppl: 17.621935 +epoch: 0, batch: 29334, sum loss: 4950.497559, avg loss: 2.912058, ppl: 18.394609 +epoch: 0, batch: 29335, sum loss: 4220.158203, avg loss: 2.890519, ppl: 18.002657 +epoch: 0, batch: 29336, sum loss: 5051.894043, avg loss: 3.087955, ppl: 21.932180 +epoch: 0, batch: 29337, sum loss: 4499.458496, avg loss: 2.893543, ppl: 18.057165 +epoch: 0, batch: 29338, sum loss: 4835.939453, avg loss: 2.808327, ppl: 16.582157 +epoch: 0, batch: 29339, sum loss: 5378.699219, avg loss: 3.138098, ppl: 23.059954 +epoch: 0, batch: 29340, sum loss: 5318.254883, avg loss: 2.986106, ppl: 19.808401 +epoch: 0, batch: 29341, sum loss: 5775.954102, avg loss: 2.983447, ppl: 19.755804 +epoch: 0, batch: 29342, sum loss: 5081.339355, avg loss: 3.087084, ppl: 21.913076 +epoch: 0, batch: 29343, sum loss: 5096.277344, avg loss: 3.022703, ppl: 20.546759 +epoch: 0, batch: 29344, sum loss: 5736.677246, avg loss: 2.963160, ppl: 19.359041 +epoch: 0, batch: 29345, sum loss: 5939.293945, avg loss: 3.266938, ppl: 26.230909 +epoch: 0, batch: 29346, sum loss: 4306.621094, avg loss: 2.705164, ppl: 14.956769 +epoch: 0, batch: 29347, sum loss: 4745.215332, avg loss: 2.941857, ppl: 18.951008 +epoch: 0, batch: 29348, sum loss: 5866.749023, avg loss: 3.172931, ppl: 23.877359 +epoch: 0, batch: 29349, sum loss: 5989.823242, avg loss: 3.031287, ppl: 20.723890 +epoch: 0, batch: 29350, sum loss: 5797.572266, avg loss: 3.264399, ppl: 26.164377 +epoch: 0, batch: 29351, sum loss: 5913.413086, avg loss: 3.158875, ppl: 23.544088 +epoch: 0, batch: 29352, sum loss: 4218.505371, avg loss: 3.017529, ppl: 20.440720 +epoch: 0, batch: 29353, sum loss: 4917.884766, avg loss: 2.882699, ppl: 17.862419 +epoch: 0, batch: 29354, sum loss: 5144.478516, avg loss: 3.051292, ppl: 21.142647 +epoch: 0, batch: 29355, sum loss: 6117.876465, avg loss: 3.092961, ppl: 22.042240 +epoch: 0, batch: 29356, sum loss: 5214.858398, avg loss: 3.156694, ppl: 23.492804 +epoch: 0, batch: 29357, sum loss: 5531.678223, avg loss: 3.170016, ppl: 23.807867 +epoch: 0, batch: 29358, sum loss: 5374.462891, avg loss: 3.144800, ppl: 23.215031 +epoch: 0, batch: 29359, sum loss: 5650.512695, avg loss: 3.069263, ppl: 21.526026 +epoch: 0, batch: 29360, sum loss: 4535.249512, avg loss: 2.925967, ppl: 18.652262 +epoch: 0, batch: 29361, sum loss: 5365.614258, avg loss: 2.962791, ppl: 19.351908 +epoch: 0, batch: 29362, sum loss: 5647.930664, avg loss: 2.997840, ppl: 20.042202 +epoch: 0, batch: 29363, sum loss: 4335.015625, avg loss: 3.050679, ppl: 21.129696 +epoch: 0, batch: 29364, sum loss: 5950.240234, avg loss: 3.121847, ppl: 22.688238 +epoch: 0, batch: 29365, sum loss: 4498.098633, avg loss: 2.881550, ppl: 17.841900 +epoch: 0, batch: 29366, sum loss: 5059.017578, avg loss: 2.877712, ppl: 17.773556 +epoch: 0, batch: 29367, sum loss: 5511.767578, avg loss: 2.977724, ppl: 19.643064 +epoch: 0, batch: 29368, sum loss: 5958.882812, avg loss: 3.118201, ppl: 22.605680 +epoch: 0, batch: 29369, sum loss: 4887.774414, avg loss: 2.823671, ppl: 16.838549 +epoch: 0, batch: 29370, sum loss: 5325.518066, avg loss: 2.932554, ppl: 18.775518 +epoch: 0, batch: 29371, sum loss: 5664.884277, avg loss: 3.050557, ppl: 21.127108 +epoch: 0, batch: 29372, sum loss: 5339.820312, avg loss: 2.989821, ppl: 19.882128 +epoch: 0, batch: 29373, sum loss: 3510.188965, avg loss: 2.805907, ppl: 16.542065 +epoch: 0, batch: 29374, sum loss: 5078.426758, avg loss: 2.893690, ppl: 18.059834 +epoch: 0, batch: 29375, sum loss: 4644.506348, avg loss: 2.720859, ppl: 15.193369 +epoch: 0, batch: 29376, sum loss: 6357.875977, avg loss: 3.280638, ppl: 26.592733 +epoch: 0, batch: 29377, sum loss: 4783.601074, avg loss: 2.971181, ppl: 19.514946 +epoch: 0, batch: 29378, sum loss: 5293.860840, avg loss: 2.974079, ppl: 19.571592 +epoch: 0, batch: 29379, sum loss: 5565.236328, avg loss: 3.091798, ppl: 22.016630 +epoch: 0, batch: 29380, sum loss: 5235.376465, avg loss: 3.182600, ppl: 24.109346 +epoch: 0, batch: 29381, sum loss: 5936.728516, avg loss: 3.276340, ppl: 26.478689 +epoch: 0, batch: 29382, sum loss: 4544.299805, avg loss: 3.015461, ppl: 20.398491 +epoch: 0, batch: 29383, sum loss: 5254.907227, avg loss: 2.869966, ppl: 17.636415 +epoch: 0, batch: 29384, sum loss: 5777.697266, avg loss: 2.850368, ppl: 17.294153 +epoch: 0, batch: 29385, sum loss: 5083.929199, avg loss: 2.908427, ppl: 18.327938 +epoch: 0, batch: 29386, sum loss: 5317.138184, avg loss: 2.955608, ppl: 19.213394 +epoch: 0, batch: 29387, sum loss: 4696.664062, avg loss: 2.839579, ppl: 17.108566 +epoch: 0, batch: 29388, sum loss: 4569.964355, avg loss: 2.702522, ppl: 14.917306 +epoch: 0, batch: 29389, sum loss: 4987.881836, avg loss: 3.096140, ppl: 22.112440 +epoch: 0, batch: 29390, sum loss: 5106.835938, avg loss: 2.926553, ppl: 18.663197 +epoch: 0, batch: 29391, sum loss: 4318.331055, avg loss: 3.021925, ppl: 20.530779 +epoch: 0, batch: 29392, sum loss: 4341.644531, avg loss: 2.896361, ppl: 18.108124 +epoch: 0, batch: 29393, sum loss: 4574.292480, avg loss: 2.896955, ppl: 18.118895 +epoch: 0, batch: 29394, sum loss: 6249.053711, avg loss: 3.229485, ppl: 25.266642 +epoch: 0, batch: 29395, sum loss: 5675.954102, avg loss: 3.194122, ppl: 24.388741 +epoch: 0, batch: 29396, sum loss: 5099.909668, avg loss: 3.012351, ppl: 20.335148 +epoch: 0, batch: 29397, sum loss: 4324.604980, avg loss: 2.931936, ppl: 18.763914 +epoch: 0, batch: 29398, sum loss: 4431.203125, avg loss: 2.811677, ppl: 16.637796 +epoch: 0, batch: 29399, sum loss: 5240.619629, avg loss: 3.036280, ppl: 20.827623 +epoch: 0, batch: 29400, sum loss: 5480.194336, avg loss: 3.142313, ppl: 23.157373 +epoch: 0, batch: 29401, sum loss: 4912.880371, avg loss: 2.924334, ppl: 18.621811 +epoch: 0, batch: 29402, sum loss: 4303.133301, avg loss: 2.967678, ppl: 19.446718 +epoch: 0, batch: 29403, sum loss: 5104.424316, avg loss: 2.850041, ppl: 17.288498 +epoch: 0, batch: 29404, sum loss: 5671.394043, avg loss: 3.150774, ppl: 23.354145 +epoch: 0, batch: 29405, sum loss: 4735.987793, avg loss: 3.180650, ppl: 24.062389 +epoch: 0, batch: 29406, sum loss: 5824.395020, avg loss: 3.017821, ppl: 20.446695 +epoch: 0, batch: 29407, sum loss: 5355.461914, avg loss: 2.993551, ppl: 19.956413 +epoch: 0, batch: 29408, sum loss: 4844.391113, avg loss: 2.900833, ppl: 18.189293 +epoch: 0, batch: 29409, sum loss: 5558.104004, avg loss: 3.057263, ppl: 21.269260 +epoch: 0, batch: 29410, sum loss: 4975.070312, avg loss: 3.132916, ppl: 22.940777 +epoch: 0, batch: 29411, sum loss: 5213.349609, avg loss: 3.027497, ppl: 20.645489 +epoch: 0, batch: 29412, sum loss: 4776.051758, avg loss: 3.113463, ppl: 22.498816 +epoch: 0, batch: 29413, sum loss: 4956.267090, avg loss: 3.203793, ppl: 24.625748 +epoch: 0, batch: 29414, sum loss: 4987.633301, avg loss: 2.956511, ppl: 19.230755 +epoch: 0, batch: 29415, sum loss: 4182.537109, avg loss: 3.039634, ppl: 20.897604 +epoch: 0, batch: 29416, sum loss: 4577.104492, avg loss: 3.073945, ppl: 21.627060 +epoch: 0, batch: 29417, sum loss: 5644.585938, avg loss: 3.013660, ppl: 20.361797 +epoch: 0, batch: 29418, sum loss: 4976.995117, avg loss: 2.719670, ppl: 15.175307 +epoch: 0, batch: 29419, sum loss: 5404.735352, avg loss: 3.259792, ppl: 26.044121 +epoch: 0, batch: 29420, sum loss: 5198.228027, avg loss: 2.899179, ppl: 18.159235 +epoch: 0, batch: 29421, sum loss: 4480.820312, avg loss: 2.969397, ppl: 19.480175 +epoch: 0, batch: 29422, sum loss: 5691.502441, avg loss: 3.219176, ppl: 25.007502 +epoch: 0, batch: 29423, sum loss: 4891.320312, avg loss: 2.880636, ppl: 17.825615 +epoch: 0, batch: 29424, sum loss: 5039.005859, avg loss: 3.139567, ppl: 23.093874 +epoch: 0, batch: 29425, sum loss: 4998.568848, avg loss: 2.938606, ppl: 18.889496 +epoch: 0, batch: 29426, sum loss: 4473.788086, avg loss: 2.824361, ppl: 16.850176 +epoch: 0, batch: 29427, sum loss: 4101.018066, avg loss: 2.728555, ppl: 15.310746 +epoch: 0, batch: 29428, sum loss: 6517.871094, avg loss: 3.461429, ppl: 31.862480 +epoch: 0, batch: 29429, sum loss: 4813.041504, avg loss: 2.909940, ppl: 18.355705 +epoch: 0, batch: 29430, sum loss: 5553.778809, avg loss: 3.290154, ppl: 26.846985 +epoch: 0, batch: 29431, sum loss: 5579.934570, avg loss: 3.115541, ppl: 22.545630 +epoch: 0, batch: 29432, sum loss: 4200.022949, avg loss: 2.845544, ppl: 17.210920 +epoch: 0, batch: 29433, sum loss: 5145.787598, avg loss: 3.026934, ppl: 20.633875 +epoch: 0, batch: 29434, sum loss: 4485.909180, avg loss: 2.853632, ppl: 17.350681 +epoch: 0, batch: 29435, sum loss: 5407.003906, avg loss: 3.027438, ppl: 20.644272 +epoch: 0, batch: 29436, sum loss: 5098.743164, avg loss: 2.974763, ppl: 19.584974 +epoch: 0, batch: 29437, sum loss: 3979.957764, avg loss: 2.763860, ppl: 15.860940 +epoch: 0, batch: 29438, sum loss: 4496.274414, avg loss: 2.871184, ppl: 17.657915 +epoch: 0, batch: 29439, sum loss: 5042.241211, avg loss: 2.843904, ppl: 17.182713 +epoch: 0, batch: 29440, sum loss: 5276.311523, avg loss: 3.011594, ppl: 20.319756 +epoch: 0, batch: 29441, sum loss: 4752.728027, avg loss: 3.224375, ppl: 25.137846 +epoch: 0, batch: 29442, sum loss: 4659.768066, avg loss: 2.928830, ppl: 18.705727 +epoch: 0, batch: 29443, sum loss: 4307.362793, avg loss: 2.750551, ppl: 15.651253 +epoch: 0, batch: 29444, sum loss: 3993.008789, avg loss: 2.592863, ppl: 13.367988 +epoch: 0, batch: 29445, sum loss: 5975.851562, avg loss: 3.031888, ppl: 20.736345 +epoch: 0, batch: 29446, sum loss: 6522.093262, avg loss: 3.418288, ppl: 30.517117 +epoch: 0, batch: 29447, sum loss: 5778.772949, avg loss: 3.154352, ppl: 23.437849 +epoch: 0, batch: 29448, sum loss: 4807.452148, avg loss: 2.969396, ppl: 19.480152 +epoch: 0, batch: 29449, sum loss: 4998.289062, avg loss: 3.040322, ppl: 20.911972 +epoch: 0, batch: 29450, sum loss: 5445.320801, avg loss: 2.985373, ppl: 19.793890 +epoch: 0, batch: 29451, sum loss: 4988.378906, avg loss: 2.971042, ppl: 19.512234 +epoch: 0, batch: 29452, sum loss: 5201.795410, avg loss: 2.927291, ppl: 18.676960 +epoch: 0, batch: 29453, sum loss: 4417.818359, avg loss: 2.785510, ppl: 16.208075 +epoch: 0, batch: 29454, sum loss: 5015.027344, avg loss: 2.674681, ppl: 14.507724 +epoch: 0, batch: 29455, sum loss: 5835.254395, avg loss: 3.150785, ppl: 23.354395 +epoch: 0, batch: 29456, sum loss: 6054.507812, avg loss: 3.294074, ppl: 26.952440 +epoch: 0, batch: 29457, sum loss: 4469.240234, avg loss: 2.802032, ppl: 16.478088 +epoch: 0, batch: 29458, sum loss: 5805.680176, avg loss: 3.182939, ppl: 24.117527 +epoch: 0, batch: 29459, sum loss: 5122.725098, avg loss: 2.849124, ppl: 17.272644 +epoch: 0, batch: 29460, sum loss: 4741.485840, avg loss: 2.851164, ppl: 17.307919 +epoch: 0, batch: 29461, sum loss: 3508.520264, avg loss: 2.696787, ppl: 14.832006 +epoch: 0, batch: 29462, sum loss: 5154.732910, avg loss: 2.894291, ppl: 18.070692 +epoch: 0, batch: 29463, sum loss: 4857.860840, avg loss: 2.960305, ppl: 19.303864 +epoch: 0, batch: 29464, sum loss: 4923.806641, avg loss: 2.987747, ppl: 19.840925 +epoch: 0, batch: 29465, sum loss: 5580.338867, avg loss: 3.170647, ppl: 23.822891 +epoch: 0, batch: 29466, sum loss: 3983.859863, avg loss: 2.654137, ppl: 14.212717 +epoch: 0, batch: 29467, sum loss: 5434.532227, avg loss: 3.094836, ppl: 22.083622 +epoch: 0, batch: 29468, sum loss: 6193.932617, avg loss: 3.067822, ppl: 21.495035 +epoch: 0, batch: 29469, sum loss: 5834.093262, avg loss: 3.281267, ppl: 26.609476 +epoch: 0, batch: 29470, sum loss: 4868.121094, avg loss: 3.364285, ppl: 28.912830 +epoch: 0, batch: 29471, sum loss: 4723.803223, avg loss: 2.937689, ppl: 18.872175 +epoch: 0, batch: 29472, sum loss: 5656.189941, avg loss: 3.284663, ppl: 26.699984 +epoch: 0, batch: 29473, sum loss: 5129.765625, avg loss: 2.949836, ppl: 19.102831 +epoch: 0, batch: 29474, sum loss: 4218.877441, avg loss: 2.956466, ppl: 19.229902 +epoch: 0, batch: 29475, sum loss: 5275.018555, avg loss: 2.876237, ppl: 17.747362 +epoch: 0, batch: 29476, sum loss: 4333.788086, avg loss: 2.727368, ppl: 15.292585 +epoch: 0, batch: 29477, sum loss: 4579.791504, avg loss: 2.860582, ppl: 17.471691 +epoch: 0, batch: 29478, sum loss: 4676.629395, avg loss: 3.013292, ppl: 20.354303 +epoch: 0, batch: 29479, sum loss: 5692.432617, avg loss: 3.076991, ppl: 21.693022 +epoch: 0, batch: 29480, sum loss: 5155.204590, avg loss: 3.099943, ppl: 22.196678 +epoch: 0, batch: 29481, sum loss: 4523.803711, avg loss: 3.023933, ppl: 20.572041 +epoch: 0, batch: 29482, sum loss: 4331.479492, avg loss: 2.872334, ppl: 17.678226 +epoch: 0, batch: 29483, sum loss: 5120.192383, avg loss: 2.825713, ppl: 16.872974 +epoch: 0, batch: 29484, sum loss: 4224.020020, avg loss: 2.850216, ppl: 17.291519 +epoch: 0, batch: 29485, sum loss: 4787.079590, avg loss: 2.951344, ppl: 19.131645 +epoch: 0, batch: 29486, sum loss: 4730.188477, avg loss: 3.093649, ppl: 22.057411 +epoch: 0, batch: 29487, sum loss: 5092.461914, avg loss: 3.133823, ppl: 22.961586 +epoch: 0, batch: 29488, sum loss: 3247.173828, avg loss: 2.536855, ppl: 12.639850 +epoch: 0, batch: 29489, sum loss: 5186.551270, avg loss: 2.968833, ppl: 19.469185 +epoch: 0, batch: 29490, sum loss: 3983.110107, avg loss: 2.841020, ppl: 17.133234 +epoch: 0, batch: 29491, sum loss: 4742.967773, avg loss: 2.936822, ppl: 18.855831 +epoch: 0, batch: 29492, sum loss: 4236.567871, avg loss: 2.803817, ppl: 16.507540 +epoch: 0, batch: 29493, sum loss: 5364.869141, avg loss: 3.187682, ppl: 24.232195 +epoch: 0, batch: 29494, sum loss: 5706.882324, avg loss: 3.261075, ppl: 26.077568 +epoch: 0, batch: 29495, sum loss: 5213.828125, avg loss: 2.994732, ppl: 19.980003 +epoch: 0, batch: 29496, sum loss: 5447.946777, avg loss: 3.009916, ppl: 20.285688 +epoch: 0, batch: 29497, sum loss: 4699.208984, avg loss: 2.916952, ppl: 18.484854 +epoch: 0, batch: 29498, sum loss: 4982.742676, avg loss: 3.045686, ppl: 21.024460 +epoch: 0, batch: 29499, sum loss: 5006.583008, avg loss: 3.088577, ppl: 21.945822 +epoch: 0, batch: 29500, sum loss: 6275.495117, avg loss: 3.412450, ppl: 30.339479 +epoch: 0, batch: 29501, sum loss: 4970.004883, avg loss: 2.976051, ppl: 19.610220 +epoch: 0, batch: 29502, sum loss: 5124.199219, avg loss: 3.159186, ppl: 23.551409 +epoch: 0, batch: 29503, sum loss: 5212.890137, avg loss: 3.021966, ppl: 20.531607 +epoch: 0, batch: 29504, sum loss: 4781.613770, avg loss: 3.063174, ppl: 21.395348 +epoch: 0, batch: 29505, sum loss: 5171.413574, avg loss: 2.830549, ppl: 16.954771 +epoch: 0, batch: 29506, sum loss: 5227.182129, avg loss: 3.252758, ppl: 25.861568 +epoch: 0, batch: 29507, sum loss: 5033.171875, avg loss: 2.995936, ppl: 20.004068 +epoch: 0, batch: 29508, sum loss: 5116.290527, avg loss: 3.045411, ppl: 21.018671 +epoch: 0, batch: 29509, sum loss: 4955.910156, avg loss: 3.166716, ppl: 23.729420 +epoch: 0, batch: 29510, sum loss: 4899.854004, avg loss: 2.958849, ppl: 19.275772 +epoch: 0, batch: 29511, sum loss: 5209.127930, avg loss: 3.023290, ppl: 20.558813 +epoch: 0, batch: 29512, sum loss: 4992.011719, avg loss: 3.155507, ppl: 23.464922 +epoch: 0, batch: 29513, sum loss: 4908.665039, avg loss: 3.013299, ppl: 20.354448 +epoch: 0, batch: 29514, sum loss: 6077.945801, avg loss: 3.323098, ppl: 27.746174 +epoch: 0, batch: 29515, sum loss: 4457.664062, avg loss: 2.770456, ppl: 15.965918 +epoch: 0, batch: 29516, sum loss: 4485.114258, avg loss: 2.948793, ppl: 19.082911 +epoch: 0, batch: 29517, sum loss: 5507.520508, avg loss: 3.190916, ppl: 24.310677 +epoch: 0, batch: 29518, sum loss: 5424.330566, avg loss: 3.099617, ppl: 22.189463 +epoch: 0, batch: 29519, sum loss: 4434.518555, avg loss: 2.771574, ppl: 15.983773 +epoch: 0, batch: 29520, sum loss: 3996.507324, avg loss: 2.731721, ppl: 15.359293 +epoch: 0, batch: 29521, sum loss: 5092.428711, avg loss: 2.896717, ppl: 18.114578 +epoch: 0, batch: 29522, sum loss: 4014.152344, avg loss: 2.923636, ppl: 18.608826 +epoch: 0, batch: 29523, sum loss: 5583.659180, avg loss: 3.351536, ppl: 28.546549 +epoch: 0, batch: 29524, sum loss: 5052.084961, avg loss: 2.863994, ppl: 17.531406 +epoch: 0, batch: 29525, sum loss: 4976.164551, avg loss: 2.901554, ppl: 18.202404 +epoch: 0, batch: 29526, sum loss: 5641.210449, avg loss: 3.044366, ppl: 20.996717 +epoch: 0, batch: 29527, sum loss: 4552.304688, avg loss: 2.748976, ppl: 15.626626 +epoch: 0, batch: 29528, sum loss: 4352.263672, avg loss: 2.903445, ppl: 18.236858 +epoch: 0, batch: 29529, sum loss: 4837.088867, avg loss: 3.053718, ppl: 21.193989 +epoch: 0, batch: 29530, sum loss: 4827.462891, avg loss: 2.976241, ppl: 19.613947 +epoch: 0, batch: 29531, sum loss: 5043.129883, avg loss: 3.043530, ppl: 20.979174 +epoch: 0, batch: 29532, sum loss: 6030.068359, avg loss: 3.197279, ppl: 24.465874 +epoch: 0, batch: 29533, sum loss: 3796.838135, avg loss: 2.715907, ppl: 15.118318 +epoch: 0, batch: 29534, sum loss: 4976.474609, avg loss: 2.888262, ppl: 17.962057 +epoch: 0, batch: 29535, sum loss: 3771.514648, avg loss: 2.655996, ppl: 14.239166 +epoch: 0, batch: 29536, sum loss: 5416.197754, avg loss: 3.123528, ppl: 22.726418 +epoch: 0, batch: 29537, sum loss: 5216.315430, avg loss: 3.018701, ppl: 20.464693 +epoch: 0, batch: 29538, sum loss: 4355.854492, avg loss: 3.016520, ppl: 20.420097 +epoch: 0, batch: 29539, sum loss: 5891.833496, avg loss: 3.217823, ppl: 24.973694 +epoch: 0, batch: 29540, sum loss: 6001.605957, avg loss: 3.202564, ppl: 24.595524 +epoch: 0, batch: 29541, sum loss: 4973.641113, avg loss: 3.272132, ppl: 26.367506 +epoch: 0, batch: 29542, sum loss: 5552.587891, avg loss: 3.093364, ppl: 22.051128 +epoch: 0, batch: 29543, sum loss: 6577.818359, avg loss: 3.028461, ppl: 20.665413 +epoch: 0, batch: 29544, sum loss: 5967.569336, avg loss: 3.243244, ppl: 25.616692 +epoch: 0, batch: 29545, sum loss: 5632.801270, avg loss: 3.311465, ppl: 27.425262 +epoch: 0, batch: 29546, sum loss: 5408.751953, avg loss: 3.155631, ppl: 23.467848 +epoch: 0, batch: 29547, sum loss: 4630.687012, avg loss: 2.631072, ppl: 13.888651 +epoch: 0, batch: 29548, sum loss: 4608.683105, avg loss: 2.815323, ppl: 16.698563 +epoch: 0, batch: 29549, sum loss: 4463.932617, avg loss: 2.950385, ppl: 19.113314 +epoch: 0, batch: 29550, sum loss: 5386.493652, avg loss: 3.097466, ppl: 22.141777 +epoch: 0, batch: 29551, sum loss: 4923.841309, avg loss: 2.991398, ppl: 19.913509 +epoch: 0, batch: 29552, sum loss: 4435.753906, avg loss: 2.703080, ppl: 14.925628 +epoch: 0, batch: 29553, sum loss: 5201.817383, avg loss: 2.894723, ppl: 18.078497 +epoch: 0, batch: 29554, sum loss: 5135.796387, avg loss: 2.936419, ppl: 18.848225 +epoch: 0, batch: 29555, sum loss: 4519.461914, avg loss: 2.831743, ppl: 16.975023 +epoch: 0, batch: 29556, sum loss: 6277.677734, avg loss: 3.044461, ppl: 20.998699 +epoch: 0, batch: 29557, sum loss: 4572.455078, avg loss: 3.036159, ppl: 20.825096 +epoch: 0, batch: 29558, sum loss: 5350.289062, avg loss: 2.979003, ppl: 19.668198 +epoch: 0, batch: 29559, sum loss: 5857.402344, avg loss: 3.227219, ppl: 25.209455 +epoch: 0, batch: 29560, sum loss: 5147.120117, avg loss: 2.973495, ppl: 19.560163 +epoch: 0, batch: 29561, sum loss: 4422.758789, avg loss: 2.896371, ppl: 18.108313 +epoch: 0, batch: 29562, sum loss: 4631.108887, avg loss: 2.694072, ppl: 14.791779 +epoch: 0, batch: 29563, sum loss: 5933.270020, avg loss: 2.911320, ppl: 18.381044 +epoch: 0, batch: 29564, sum loss: 6676.092285, avg loss: 3.198894, ppl: 24.505419 +epoch: 0, batch: 29565, sum loss: 5360.470703, avg loss: 3.049187, ppl: 21.098183 +epoch: 0, batch: 29566, sum loss: 5272.256348, avg loss: 3.103153, ppl: 22.268045 +epoch: 0, batch: 29567, sum loss: 4981.217285, avg loss: 2.756623, ppl: 15.746570 +epoch: 0, batch: 29568, sum loss: 4005.150391, avg loss: 2.720890, ppl: 15.193843 +epoch: 0, batch: 29569, sum loss: 5706.680664, avg loss: 3.270304, ppl: 26.319338 +epoch: 0, batch: 29570, sum loss: 6265.548828, avg loss: 3.089521, ppl: 21.966557 +epoch: 0, batch: 29571, sum loss: 3880.421875, avg loss: 2.698485, ppl: 14.857211 +epoch: 0, batch: 29572, sum loss: 5011.588379, avg loss: 3.078371, ppl: 21.722994 +epoch: 0, batch: 29573, sum loss: 4284.475098, avg loss: 2.657863, ppl: 14.265769 +epoch: 0, batch: 29574, sum loss: 4776.061035, avg loss: 3.009490, ppl: 20.277065 +epoch: 0, batch: 29575, sum loss: 4841.125488, avg loss: 3.135444, ppl: 22.998848 +epoch: 0, batch: 29576, sum loss: 4988.213867, avg loss: 2.910276, ppl: 18.361874 +epoch: 0, batch: 29577, sum loss: 5114.329590, avg loss: 3.006660, ppl: 20.219763 +epoch: 0, batch: 29578, sum loss: 5042.344727, avg loss: 2.935009, ppl: 18.821663 +epoch: 0, batch: 29579, sum loss: 5572.685059, avg loss: 3.143083, ppl: 23.175198 +epoch: 0, batch: 29580, sum loss: 4960.674805, avg loss: 2.885791, ppl: 17.917736 +epoch: 0, batch: 29581, sum loss: 5858.394531, avg loss: 2.961776, ppl: 19.332285 +epoch: 0, batch: 29582, sum loss: 4800.556152, avg loss: 2.961478, ppl: 19.326515 +epoch: 0, batch: 29583, sum loss: 5067.128418, avg loss: 3.014354, ppl: 20.375914 +epoch: 0, batch: 29584, sum loss: 4530.143066, avg loss: 2.934031, ppl: 18.803270 +epoch: 0, batch: 29585, sum loss: 4304.000488, avg loss: 2.829718, ppl: 16.940678 +epoch: 0, batch: 29586, sum loss: 5102.891602, avg loss: 3.187315, ppl: 24.223307 +epoch: 0, batch: 29587, sum loss: 5515.046875, avg loss: 3.025259, ppl: 20.599340 +epoch: 0, batch: 29588, sum loss: 4180.125488, avg loss: 2.746469, ppl: 15.587496 +epoch: 0, batch: 29589, sum loss: 4453.318848, avg loss: 2.820341, ppl: 16.782574 +epoch: 0, batch: 29590, sum loss: 5797.807617, avg loss: 3.159568, ppl: 23.560423 +epoch: 0, batch: 29591, sum loss: 5084.569336, avg loss: 2.976914, ppl: 19.627151 +epoch: 0, batch: 29592, sum loss: 4802.816895, avg loss: 2.888044, ppl: 17.958147 +epoch: 0, batch: 29593, sum loss: 5591.266113, avg loss: 3.144694, ppl: 23.212580 +epoch: 0, batch: 29594, sum loss: 5394.896484, avg loss: 3.230477, ppl: 25.291721 +epoch: 0, batch: 29595, sum loss: 5408.270508, avg loss: 3.162731, ppl: 23.635059 +epoch: 0, batch: 29596, sum loss: 4990.624023, avg loss: 2.954780, ppl: 19.197506 +epoch: 0, batch: 29597, sum loss: 3969.729248, avg loss: 2.803481, ppl: 16.501991 +epoch: 0, batch: 29598, sum loss: 4878.413574, avg loss: 2.921206, ppl: 18.563658 +epoch: 0, batch: 29599, sum loss: 5039.042969, avg loss: 2.931381, ppl: 18.753502 +epoch: 0, batch: 29600, sum loss: 5962.461426, avg loss: 3.297822, ppl: 27.053638 +epoch: 0, batch: 29601, sum loss: 4701.029785, avg loss: 2.835362, ppl: 17.036562 +epoch: 0, batch: 29602, sum loss: 5806.659180, avg loss: 3.013316, ppl: 20.354778 +epoch: 0, batch: 29603, sum loss: 4611.515137, avg loss: 2.850133, ppl: 17.290081 +epoch: 0, batch: 29604, sum loss: 5577.280762, avg loss: 2.992103, ppl: 19.927553 +epoch: 0, batch: 29605, sum loss: 4849.011719, avg loss: 2.708945, ppl: 15.013429 +epoch: 0, batch: 29606, sum loss: 3575.103027, avg loss: 2.784348, ppl: 16.189262 +epoch: 0, batch: 29607, sum loss: 4939.880371, avg loss: 2.914384, ppl: 18.437445 +epoch: 0, batch: 29608, sum loss: 4303.708496, avg loss: 2.713562, ppl: 15.082898 +epoch: 0, batch: 29609, sum loss: 5363.556641, avg loss: 3.325206, ppl: 27.804714 +epoch: 0, batch: 29610, sum loss: 5013.982910, avg loss: 2.944206, ppl: 18.995573 +epoch: 0, batch: 29611, sum loss: 5825.090332, avg loss: 3.108372, ppl: 22.384583 +epoch: 0, batch: 29612, sum loss: 5150.445801, avg loss: 3.187157, ppl: 24.219471 +epoch: 0, batch: 29613, sum loss: 4960.758789, avg loss: 2.979435, ppl: 19.676691 +epoch: 0, batch: 29614, sum loss: 4729.960449, avg loss: 2.690535, ppl: 14.739557 +epoch: 0, batch: 29615, sum loss: 4971.596680, avg loss: 3.000360, ppl: 20.092770 +epoch: 0, batch: 29616, sum loss: 4443.513184, avg loss: 3.051863, ppl: 21.154728 +epoch: 0, batch: 29617, sum loss: 5993.946289, avg loss: 3.024191, ppl: 20.577349 +epoch: 0, batch: 29618, sum loss: 4460.928711, avg loss: 2.786339, ppl: 16.221525 +epoch: 0, batch: 29619, sum loss: 5705.933594, avg loss: 3.080958, ppl: 21.779249 +epoch: 0, batch: 29620, sum loss: 5419.543945, avg loss: 3.114681, ppl: 22.526232 +epoch: 0, batch: 29621, sum loss: 4325.628906, avg loss: 2.912881, ppl: 18.409767 +epoch: 0, batch: 29622, sum loss: 4198.948242, avg loss: 2.717766, ppl: 15.146448 +epoch: 0, batch: 29623, sum loss: 4068.010742, avg loss: 2.947834, ppl: 19.064615 +epoch: 0, batch: 29624, sum loss: 5663.596680, avg loss: 3.033528, ppl: 20.770378 +epoch: 0, batch: 29625, sum loss: 6151.062012, avg loss: 3.208692, ppl: 24.746693 +epoch: 0, batch: 29626, sum loss: 5404.053711, avg loss: 2.943384, ppl: 18.979973 +epoch: 0, batch: 29627, sum loss: 4635.910156, avg loss: 2.865210, ppl: 17.552744 +epoch: 0, batch: 29628, sum loss: 4741.535156, avg loss: 3.010498, ppl: 20.297512 +epoch: 0, batch: 29629, sum loss: 4442.191406, avg loss: 2.936016, ppl: 18.840628 +epoch: 0, batch: 29630, sum loss: 6093.085449, avg loss: 3.127867, ppl: 22.825253 +epoch: 0, batch: 29631, sum loss: 5383.788574, avg loss: 3.110219, ppl: 22.425945 +epoch: 0, batch: 29632, sum loss: 4672.474121, avg loss: 3.008676, ppl: 20.260563 +epoch: 0, batch: 29633, sum loss: 4792.066895, avg loss: 3.019576, ppl: 20.482607 +epoch: 0, batch: 29634, sum loss: 4967.695801, avg loss: 3.014379, ppl: 20.376425 +epoch: 0, batch: 29635, sum loss: 4353.931641, avg loss: 2.682644, ppl: 14.623709 +epoch: 0, batch: 29636, sum loss: 4824.252441, avg loss: 2.970599, ppl: 19.503593 +epoch: 0, batch: 29637, sum loss: 4403.693848, avg loss: 2.688458, ppl: 14.708973 +epoch: 0, batch: 29638, sum loss: 5097.395508, avg loss: 3.070720, ppl: 21.557423 +epoch: 0, batch: 29639, sum loss: 5258.227539, avg loss: 2.950745, ppl: 19.120195 +epoch: 0, batch: 29640, sum loss: 4173.053711, avg loss: 2.875985, ppl: 17.742886 +epoch: 0, batch: 29641, sum loss: 5312.897461, avg loss: 3.202470, ppl: 24.593197 +epoch: 0, batch: 29642, sum loss: 5364.006348, avg loss: 2.975045, ppl: 19.590509 +epoch: 0, batch: 29643, sum loss: 5154.763184, avg loss: 2.984808, ppl: 19.782703 +epoch: 0, batch: 29644, sum loss: 4592.605469, avg loss: 3.250252, ppl: 25.796827 +epoch: 0, batch: 29645, sum loss: 4634.208984, avg loss: 2.714827, ppl: 15.101991 +epoch: 0, batch: 29646, sum loss: 5102.773438, avg loss: 3.144038, ppl: 23.197342 +epoch: 0, batch: 29647, sum loss: 4353.868652, avg loss: 2.896785, ppl: 18.115818 +epoch: 0, batch: 29648, sum loss: 5555.146973, avg loss: 2.875335, ppl: 17.731363 +epoch: 0, batch: 29649, sum loss: 5329.180664, avg loss: 3.183501, ppl: 24.131094 +epoch: 0, batch: 29650, sum loss: 4567.288574, avg loss: 2.807184, ppl: 16.563210 +epoch: 0, batch: 29651, sum loss: 4280.959473, avg loss: 2.873127, ppl: 17.692259 +epoch: 0, batch: 29652, sum loss: 4608.104980, avg loss: 2.865737, ppl: 17.561991 +epoch: 0, batch: 29653, sum loss: 5000.777832, avg loss: 2.746171, ppl: 15.582854 +epoch: 0, batch: 29654, sum loss: 4340.225098, avg loss: 3.001539, ppl: 20.116467 +epoch: 0, batch: 29655, sum loss: 4589.113281, avg loss: 2.949301, ppl: 19.092613 +epoch: 0, batch: 29656, sum loss: 3719.518311, avg loss: 2.574061, ppl: 13.118995 +epoch: 0, batch: 29657, sum loss: 5516.732422, avg loss: 3.287683, ppl: 26.780746 +epoch: 0, batch: 29658, sum loss: 4147.486816, avg loss: 2.817586, ppl: 16.736403 +epoch: 0, batch: 29659, sum loss: 3858.985352, avg loss: 2.623375, ppl: 13.782166 +epoch: 0, batch: 29660, sum loss: 5385.129883, avg loss: 2.970287, ppl: 19.497511 +epoch: 0, batch: 29661, sum loss: 5906.527832, avg loss: 3.229376, ppl: 25.263878 +epoch: 0, batch: 29662, sum loss: 5338.912598, avg loss: 3.140537, ppl: 23.116278 +epoch: 0, batch: 29663, sum loss: 5000.261230, avg loss: 3.092308, ppl: 22.027849 +epoch: 0, batch: 29664, sum loss: 4989.921387, avg loss: 2.828754, ppl: 16.924355 +epoch: 0, batch: 29665, sum loss: 4717.653809, avg loss: 2.963350, ppl: 19.362734 +epoch: 0, batch: 29666, sum loss: 5050.544922, avg loss: 2.812108, ppl: 16.644962 +epoch: 0, batch: 29667, sum loss: 4855.131348, avg loss: 2.942504, ppl: 18.963270 +epoch: 0, batch: 29668, sum loss: 4289.726074, avg loss: 2.735795, ppl: 15.421992 +epoch: 0, batch: 29669, sum loss: 5223.465820, avg loss: 3.081691, ppl: 21.795223 +epoch: 0, batch: 29670, sum loss: 5543.526367, avg loss: 3.251335, ppl: 25.824797 +epoch: 0, batch: 29671, sum loss: 5264.797363, avg loss: 3.064492, ppl: 21.423580 +epoch: 0, batch: 29672, sum loss: 4917.110352, avg loss: 3.188787, ppl: 24.258995 +epoch: 0, batch: 29673, sum loss: 5513.114746, avg loss: 3.205299, ppl: 24.662882 +epoch: 0, batch: 29674, sum loss: 4737.651367, avg loss: 3.046721, ppl: 21.046225 +epoch: 0, batch: 29675, sum loss: 4328.260254, avg loss: 2.942393, ppl: 18.961168 +epoch: 0, batch: 29676, sum loss: 4939.923828, avg loss: 2.949208, ppl: 19.090837 +epoch: 0, batch: 29677, sum loss: 4570.224609, avg loss: 3.171565, ppl: 23.844769 +epoch: 0, batch: 29678, sum loss: 5936.965820, avg loss: 3.203975, ppl: 24.630239 +epoch: 0, batch: 29679, sum loss: 5656.150391, avg loss: 3.232086, ppl: 25.332443 +epoch: 0, batch: 29680, sum loss: 4698.926758, avg loss: 2.865199, ppl: 17.552551 +epoch: 0, batch: 29681, sum loss: 5214.348633, avg loss: 3.171745, ppl: 23.849060 +epoch: 0, batch: 29682, sum loss: 5359.642578, avg loss: 3.017817, ppl: 20.446604 +epoch: 0, batch: 29683, sum loss: 4247.036621, avg loss: 2.848449, ppl: 17.260981 +epoch: 0, batch: 29684, sum loss: 5283.244629, avg loss: 2.973126, ppl: 19.552940 +epoch: 0, batch: 29685, sum loss: 4754.022461, avg loss: 2.791558, ppl: 16.306398 +epoch: 0, batch: 29686, sum loss: 5236.903320, avg loss: 3.006259, ppl: 20.211645 +epoch: 0, batch: 29687, sum loss: 4873.728027, avg loss: 3.132216, ppl: 22.924725 +epoch: 0, batch: 29688, sum loss: 5244.893555, avg loss: 2.928472, ppl: 18.699038 +epoch: 0, batch: 29689, sum loss: 5200.014648, avg loss: 3.082404, ppl: 21.810764 +epoch: 0, batch: 29690, sum loss: 5241.209473, avg loss: 2.864049, ppl: 17.532372 +epoch: 0, batch: 29691, sum loss: 5399.227539, avg loss: 3.308350, ppl: 27.339987 +epoch: 0, batch: 29692, sum loss: 5813.767578, avg loss: 3.087503, ppl: 21.922264 +epoch: 0, batch: 29693, sum loss: 4630.665527, avg loss: 2.921556, ppl: 18.570150 +epoch: 0, batch: 29694, sum loss: 6475.125488, avg loss: 3.035690, ppl: 20.815327 +epoch: 0, batch: 29695, sum loss: 5944.590820, avg loss: 3.213292, ppl: 24.860802 +epoch: 0, batch: 29696, sum loss: 5505.337891, avg loss: 2.864380, ppl: 17.538174 +epoch: 0, batch: 29697, sum loss: 4059.573975, avg loss: 2.757863, ppl: 15.766111 +epoch: 0, batch: 29698, sum loss: 4722.483398, avg loss: 3.086591, ppl: 21.902275 +epoch: 0, batch: 29699, sum loss: 5953.301758, avg loss: 3.263872, ppl: 26.150587 +epoch: 0, batch: 29700, sum loss: 5453.591797, avg loss: 3.018036, ppl: 20.451088 +epoch: 0, batch: 29701, sum loss: 4864.861816, avg loss: 2.876914, ppl: 17.759388 +epoch: 0, batch: 29702, sum loss: 4454.769531, avg loss: 2.732987, ppl: 15.378761 +epoch: 0, batch: 29703, sum loss: 4818.873047, avg loss: 2.970945, ppl: 19.510351 +epoch: 0, batch: 29704, sum loss: 4778.842773, avg loss: 3.119349, ppl: 22.631641 +epoch: 0, batch: 29705, sum loss: 4979.024414, avg loss: 3.047139, ppl: 21.055014 +epoch: 0, batch: 29706, sum loss: 5829.001953, avg loss: 3.250977, ppl: 25.815557 +epoch: 0, batch: 29707, sum loss: 5714.375488, avg loss: 3.176418, ppl: 23.960762 +epoch: 0, batch: 29708, sum loss: 4656.335938, avg loss: 2.726192, ppl: 15.274611 +epoch: 0, batch: 29709, sum loss: 6231.841309, avg loss: 3.336103, ppl: 28.109383 +epoch: 0, batch: 29710, sum loss: 4765.630859, avg loss: 2.938120, ppl: 18.880320 +epoch: 0, batch: 29711, sum loss: 4495.177734, avg loss: 2.936106, ppl: 18.842325 +epoch: 0, batch: 29712, sum loss: 4464.873535, avg loss: 2.804569, ppl: 16.519951 +epoch: 0, batch: 29713, sum loss: 4917.056641, avg loss: 2.983651, ppl: 19.759827 +epoch: 0, batch: 29714, sum loss: 5097.511230, avg loss: 3.183955, ppl: 24.142040 +epoch: 0, batch: 29715, sum loss: 5201.583496, avg loss: 3.185293, ppl: 24.174370 +epoch: 0, batch: 29716, sum loss: 5531.703125, avg loss: 2.909891, ppl: 18.354799 +epoch: 0, batch: 29717, sum loss: 5269.119141, avg loss: 3.090393, ppl: 21.985708 +epoch: 0, batch: 29718, sum loss: 5430.312988, avg loss: 3.016841, ppl: 20.426655 +epoch: 0, batch: 29719, sum loss: 4695.316895, avg loss: 2.998286, ppl: 20.051149 +epoch: 0, batch: 29720, sum loss: 5040.514160, avg loss: 2.711411, ppl: 15.050504 +epoch: 0, batch: 29721, sum loss: 5273.148438, avg loss: 3.281362, ppl: 26.611988 +epoch: 0, batch: 29722, sum loss: 4686.905762, avg loss: 2.922011, ppl: 18.578613 +epoch: 0, batch: 29723, sum loss: 4439.622559, avg loss: 2.899819, ppl: 18.170855 +epoch: 0, batch: 29724, sum loss: 6015.142578, avg loss: 3.036417, ppl: 20.830475 +epoch: 0, batch: 29725, sum loss: 5482.583496, avg loss: 2.947626, ppl: 19.060642 +epoch: 0, batch: 29726, sum loss: 5079.731934, avg loss: 3.315752, ppl: 27.543100 +epoch: 0, batch: 29727, sum loss: 5054.290039, avg loss: 2.985405, ppl: 19.794512 +epoch: 0, batch: 29728, sum loss: 5094.789062, avg loss: 3.218439, ppl: 24.989079 +epoch: 0, batch: 29729, sum loss: 4495.879883, avg loss: 2.983331, ppl: 19.753506 +epoch: 0, batch: 29730, sum loss: 5247.907227, avg loss: 2.878721, ppl: 17.791494 +epoch: 0, batch: 29731, sum loss: 4406.637207, avg loss: 2.846665, ppl: 17.230221 +epoch: 0, batch: 29732, sum loss: 5864.223633, avg loss: 3.187078, ppl: 24.217560 +epoch: 0, batch: 29733, sum loss: 4503.381836, avg loss: 2.843044, ppl: 17.167946 +epoch: 0, batch: 29734, sum loss: 4784.612305, avg loss: 2.770476, ppl: 15.966234 +epoch: 0, batch: 29735, sum loss: 3974.048340, avg loss: 2.736948, ppl: 15.439791 +epoch: 0, batch: 29736, sum loss: 4242.666016, avg loss: 2.783902, ppl: 16.182035 +epoch: 0, batch: 29737, sum loss: 5062.549805, avg loss: 3.096361, ppl: 22.117313 +epoch: 0, batch: 29738, sum loss: 4799.075195, avg loss: 2.967888, ppl: 19.450804 +epoch: 0, batch: 29739, sum loss: 5568.007812, avg loss: 3.248546, ppl: 25.752871 +epoch: 0, batch: 29740, sum loss: 5242.498535, avg loss: 2.943570, ppl: 18.983503 +epoch: 0, batch: 29741, sum loss: 4925.936523, avg loss: 3.151591, ppl: 23.373219 +epoch: 0, batch: 29742, sum loss: 4660.641602, avg loss: 2.909264, ppl: 18.343300 +epoch: 0, batch: 29743, sum loss: 5271.020508, avg loss: 2.959585, ppl: 19.289965 +epoch: 0, batch: 29744, sum loss: 4117.354004, avg loss: 2.820106, ppl: 16.778622 +epoch: 0, batch: 29745, sum loss: 5866.602539, avg loss: 3.414786, ppl: 30.410444 +epoch: 0, batch: 29746, sum loss: 4499.977539, avg loss: 2.851697, ppl: 17.317139 +epoch: 0, batch: 29747, sum loss: 5394.703613, avg loss: 3.112927, ppl: 22.486776 +epoch: 0, batch: 29748, sum loss: 5896.387207, avg loss: 3.146418, ppl: 23.252621 +epoch: 0, batch: 29749, sum loss: 4395.403809, avg loss: 2.891713, ppl: 18.024160 +epoch: 0, batch: 29750, sum loss: 4219.354980, avg loss: 2.687487, ppl: 14.694707 +epoch: 0, batch: 29751, sum loss: 5726.514648, avg loss: 2.954858, ppl: 19.198997 +epoch: 0, batch: 29752, sum loss: 6246.415039, avg loss: 3.334979, ppl: 28.077789 +epoch: 0, batch: 29753, sum loss: 4955.770508, avg loss: 2.872910, ppl: 17.688425 +epoch: 0, batch: 29754, sum loss: 5129.556641, avg loss: 2.929501, ppl: 18.718294 +epoch: 0, batch: 29755, sum loss: 6027.323730, avg loss: 3.194130, ppl: 24.388950 +epoch: 0, batch: 29756, sum loss: 4692.354492, avg loss: 2.951166, ppl: 19.128252 +epoch: 0, batch: 29757, sum loss: 5303.135742, avg loss: 3.063625, ppl: 21.405018 +epoch: 0, batch: 29758, sum loss: 4883.168945, avg loss: 3.177078, ppl: 23.976597 +epoch: 0, batch: 29759, sum loss: 5500.990723, avg loss: 2.954345, ppl: 19.189154 +epoch: 0, batch: 29760, sum loss: 4880.865234, avg loss: 2.855977, ppl: 17.391426 +epoch: 0, batch: 29761, sum loss: 4678.661133, avg loss: 3.096400, ppl: 22.118193 +epoch: 0, batch: 29762, sum loss: 3944.357178, avg loss: 2.725886, ppl: 15.269938 +epoch: 0, batch: 29763, sum loss: 4505.323242, avg loss: 2.917956, ppl: 18.503422 +epoch: 0, batch: 29764, sum loss: 5282.231934, avg loss: 2.987688, ppl: 19.839756 +epoch: 0, batch: 29765, sum loss: 5162.792969, avg loss: 3.087795, ppl: 21.928673 +epoch: 0, batch: 29766, sum loss: 5161.938477, avg loss: 3.076245, ppl: 21.676844 +epoch: 0, batch: 29767, sum loss: 5681.832031, avg loss: 3.046559, ppl: 21.042803 +epoch: 0, batch: 29768, sum loss: 5828.164551, avg loss: 3.230690, ppl: 25.297100 +epoch: 0, batch: 29769, sum loss: 4352.323730, avg loss: 2.861488, ppl: 17.487535 +epoch: 0, batch: 29770, sum loss: 5214.260742, avg loss: 3.081714, ppl: 21.795736 +epoch: 0, batch: 29771, sum loss: 5544.186035, avg loss: 3.195496, ppl: 24.422293 +epoch: 0, batch: 29772, sum loss: 4765.415039, avg loss: 2.748221, ppl: 15.614823 +epoch: 0, batch: 29773, sum loss: 4908.272461, avg loss: 3.224884, ppl: 25.150646 +epoch: 0, batch: 29774, sum loss: 5172.193359, avg loss: 2.947119, ppl: 19.050983 +epoch: 0, batch: 29775, sum loss: 4514.785156, avg loss: 2.982025, ppl: 19.727718 +epoch: 0, batch: 29776, sum loss: 5334.895508, avg loss: 3.154876, ppl: 23.450134 +epoch: 0, batch: 29777, sum loss: 5571.543457, avg loss: 2.864547, ppl: 17.541101 +epoch: 0, batch: 29778, sum loss: 5925.228516, avg loss: 3.194193, ppl: 24.390491 +epoch: 0, batch: 29779, sum loss: 4885.241699, avg loss: 3.131565, ppl: 22.909813 +epoch: 0, batch: 29780, sum loss: 5515.919922, avg loss: 3.139397, ppl: 23.089931 +epoch: 0, batch: 29781, sum loss: 5813.093750, avg loss: 3.145613, ppl: 23.233923 +epoch: 0, batch: 29782, sum loss: 4572.355957, avg loss: 2.813757, ppl: 16.672445 +epoch: 0, batch: 29783, sum loss: 5428.896484, avg loss: 3.081099, ppl: 21.782333 +epoch: 0, batch: 29784, sum loss: 4495.336426, avg loss: 2.855995, ppl: 17.391737 +epoch: 0, batch: 29785, sum loss: 4842.639648, avg loss: 2.956434, ppl: 19.229282 +epoch: 0, batch: 29786, sum loss: 4121.736816, avg loss: 2.892447, ppl: 18.037388 +epoch: 0, batch: 29787, sum loss: 6163.954102, avg loss: 3.319307, ppl: 27.641197 +epoch: 0, batch: 29788, sum loss: 4387.213379, avg loss: 2.801541, ppl: 16.470009 +epoch: 0, batch: 29789, sum loss: 3904.257812, avg loss: 2.928926, ppl: 18.707520 +epoch: 0, batch: 29790, sum loss: 4888.983887, avg loss: 3.044199, ppl: 20.993208 +epoch: 0, batch: 29791, sum loss: 5233.867188, avg loss: 3.016638, ppl: 20.422522 +epoch: 0, batch: 29792, sum loss: 4154.565918, avg loss: 2.843646, ppl: 17.178276 +epoch: 0, batch: 29793, sum loss: 5086.168457, avg loss: 2.977850, ppl: 19.645536 +epoch: 0, batch: 29794, sum loss: 5006.098145, avg loss: 2.872116, ppl: 17.674376 +epoch: 0, batch: 29795, sum loss: 5188.844238, avg loss: 2.982094, ppl: 19.729092 +epoch: 0, batch: 29796, sum loss: 3690.807617, avg loss: 2.626909, ppl: 13.830956 +epoch: 0, batch: 29797, sum loss: 4882.559570, avg loss: 2.887380, ppl: 17.946232 +epoch: 0, batch: 29798, sum loss: 4803.842773, avg loss: 3.117354, ppl: 22.586540 +epoch: 0, batch: 29799, sum loss: 5219.107422, avg loss: 2.997764, ppl: 20.040678 +epoch: 0, batch: 29800, sum loss: 5200.294922, avg loss: 3.064405, ppl: 21.421711 +epoch: 0, batch: 29801, sum loss: 4935.057129, avg loss: 3.215021, ppl: 24.903812 +epoch: 0, batch: 29802, sum loss: 5341.141113, avg loss: 2.902794, ppl: 18.224993 +epoch: 0, batch: 29803, sum loss: 5299.877930, avg loss: 3.081324, ppl: 21.787237 +epoch: 0, batch: 29804, sum loss: 5194.165527, avg loss: 2.880846, ppl: 17.829351 +epoch: 0, batch: 29805, sum loss: 4945.164551, avg loss: 3.198684, ppl: 24.500254 +epoch: 0, batch: 29806, sum loss: 5105.958984, avg loss: 3.167468, ppl: 23.747288 +epoch: 0, batch: 29807, sum loss: 4551.202148, avg loss: 2.972699, ppl: 19.544603 +epoch: 0, batch: 29808, sum loss: 4992.240234, avg loss: 3.012818, ppl: 20.344658 +epoch: 0, batch: 29809, sum loss: 5213.499023, avg loss: 2.907696, ppl: 18.314558 +epoch: 0, batch: 29810, sum loss: 5781.057129, avg loss: 3.197487, ppl: 24.470966 +epoch: 0, batch: 29811, sum loss: 4591.591797, avg loss: 3.003003, ppl: 20.145952 +epoch: 0, batch: 29812, sum loss: 4658.183105, avg loss: 2.754692, ppl: 15.716193 +epoch: 0, batch: 29813, sum loss: 4256.230957, avg loss: 2.767380, ppl: 15.916881 +epoch: 0, batch: 29814, sum loss: 4653.941406, avg loss: 2.964294, ppl: 19.381014 +epoch: 0, batch: 29815, sum loss: 4935.918457, avg loss: 3.297207, ppl: 27.037010 +epoch: 0, batch: 29816, sum loss: 5816.222168, avg loss: 3.125321, ppl: 22.767200 +epoch: 0, batch: 29817, sum loss: 4825.720703, avg loss: 2.872453, ppl: 17.680334 +epoch: 0, batch: 29818, sum loss: 4594.879883, avg loss: 2.909994, ppl: 18.356682 +epoch: 0, batch: 29819, sum loss: 4763.236816, avg loss: 2.931223, ppl: 18.750542 +epoch: 0, batch: 29820, sum loss: 5415.555664, avg loss: 2.760222, ppl: 15.803350 +epoch: 0, batch: 29821, sum loss: 5302.020996, avg loss: 3.131731, ppl: 22.913614 +epoch: 0, batch: 29822, sum loss: 4495.582520, avg loss: 2.863428, ppl: 17.521494 +epoch: 0, batch: 29823, sum loss: 4538.475586, avg loss: 2.789475, ppl: 16.272470 +epoch: 0, batch: 29824, sum loss: 5037.006836, avg loss: 3.019788, ppl: 20.486954 +epoch: 0, batch: 29825, sum loss: 4595.125000, avg loss: 2.913840, ppl: 18.427416 +epoch: 0, batch: 29826, sum loss: 5672.961914, avg loss: 3.155151, ppl: 23.456581 +epoch: 0, batch: 29827, sum loss: 4106.598633, avg loss: 2.958644, ppl: 19.271830 +epoch: 0, batch: 29828, sum loss: 4521.320312, avg loss: 2.958979, ppl: 19.278286 +epoch: 0, batch: 29829, sum loss: 5397.861328, avg loss: 3.109367, ppl: 22.406858 +epoch: 0, batch: 29830, sum loss: 4095.596924, avg loss: 2.938018, ppl: 18.878389 +epoch: 0, batch: 29831, sum loss: 5175.162109, avg loss: 3.035286, ppl: 20.806921 +epoch: 0, batch: 29832, sum loss: 4539.769531, avg loss: 2.858797, ppl: 17.440535 +epoch: 0, batch: 29833, sum loss: 4554.967773, avg loss: 2.845077, ppl: 17.202879 +epoch: 0, batch: 29834, sum loss: 5613.861328, avg loss: 3.228212, ppl: 25.234507 +epoch: 0, batch: 29835, sum loss: 5325.151855, avg loss: 3.034275, ppl: 20.785894 +epoch: 0, batch: 29836, sum loss: 4304.121094, avg loss: 2.793070, ppl: 16.331081 +epoch: 0, batch: 29837, sum loss: 5289.891113, avg loss: 3.233430, ppl: 25.366512 +epoch: 0, batch: 29838, sum loss: 5405.754883, avg loss: 3.161260, ppl: 23.600317 +epoch: 0, batch: 29839, sum loss: 4837.898926, avg loss: 2.953540, ppl: 19.173710 +epoch: 0, batch: 29840, sum loss: 4041.818359, avg loss: 3.029849, ppl: 20.694103 +epoch: 0, batch: 29841, sum loss: 4862.956543, avg loss: 2.874088, ppl: 17.709263 +epoch: 0, batch: 29842, sum loss: 4994.726562, avg loss: 3.133455, ppl: 22.953136 +epoch: 0, batch: 29843, sum loss: 4961.355469, avg loss: 2.967318, ppl: 19.439713 +epoch: 0, batch: 29844, sum loss: 5019.729980, avg loss: 2.844040, ppl: 17.185047 +epoch: 0, batch: 29845, sum loss: 4806.757812, avg loss: 2.983711, ppl: 19.761009 +epoch: 0, batch: 29846, sum loss: 5163.213867, avg loss: 2.970779, ppl: 19.507109 +epoch: 0, batch: 29847, sum loss: 5059.937500, avg loss: 3.125348, ppl: 22.767809 +epoch: 0, batch: 29848, sum loss: 4528.741699, avg loss: 2.850058, ppl: 17.288778 +epoch: 0, batch: 29849, sum loss: 3753.953369, avg loss: 2.576495, ppl: 13.150962 +epoch: 0, batch: 29850, sum loss: 4985.077148, avg loss: 3.017601, ppl: 20.442198 +epoch: 0, batch: 29851, sum loss: 4786.553223, avg loss: 2.738303, ppl: 15.460725 +epoch: 0, batch: 29852, sum loss: 5116.559082, avg loss: 3.016839, ppl: 20.426622 +epoch: 0, batch: 29853, sum loss: 4380.455078, avg loss: 2.827924, ppl: 16.910326 +epoch: 0, batch: 29854, sum loss: 5891.579590, avg loss: 3.104099, ppl: 22.289127 +epoch: 0, batch: 29855, sum loss: 4886.294922, avg loss: 2.874291, ppl: 17.712864 +epoch: 0, batch: 29856, sum loss: 5794.480469, avg loss: 2.989928, ppl: 19.884256 +epoch: 0, batch: 29857, sum loss: 4874.625000, avg loss: 3.052364, ppl: 21.165318 +epoch: 0, batch: 29858, sum loss: 5359.494141, avg loss: 3.090827, ppl: 21.995260 +epoch: 0, batch: 29859, sum loss: 6199.416504, avg loss: 3.259420, ppl: 26.034431 +epoch: 0, batch: 29860, sum loss: 5524.784180, avg loss: 3.103811, ppl: 22.282715 +epoch: 0, batch: 29861, sum loss: 5107.341797, avg loss: 3.036469, ppl: 20.831566 +epoch: 0, batch: 29862, sum loss: 4422.942383, avg loss: 3.256953, ppl: 25.970291 +epoch: 0, batch: 29863, sum loss: 5349.834961, avg loss: 2.988734, ppl: 19.860533 +epoch: 0, batch: 29864, sum loss: 4388.010742, avg loss: 2.604161, ppl: 13.519872 +epoch: 0, batch: 29865, sum loss: 4441.583008, avg loss: 2.714904, ppl: 15.103161 +epoch: 0, batch: 29866, sum loss: 4197.112305, avg loss: 3.026036, ppl: 20.615356 +epoch: 0, batch: 29867, sum loss: 5308.833008, avg loss: 2.880539, ppl: 17.823872 +epoch: 0, batch: 29868, sum loss: 5406.281250, avg loss: 3.008504, ppl: 20.257071 +epoch: 0, batch: 29869, sum loss: 5525.924316, avg loss: 3.085385, ppl: 21.875885 +epoch: 0, batch: 29870, sum loss: 5082.048828, avg loss: 2.975438, ppl: 19.598213 +epoch: 0, batch: 29871, sum loss: 4694.372070, avg loss: 3.044340, ppl: 20.996166 +epoch: 0, batch: 29872, sum loss: 4214.260742, avg loss: 2.834069, ppl: 17.014553 +epoch: 0, batch: 29873, sum loss: 5498.514160, avg loss: 3.066656, ppl: 21.469990 +epoch: 0, batch: 29874, sum loss: 4838.484375, avg loss: 2.999680, ppl: 20.079117 +epoch: 0, batch: 29875, sum loss: 5301.525391, avg loss: 2.950209, ppl: 19.109945 +epoch: 0, batch: 29876, sum loss: 5906.730957, avg loss: 3.240116, ppl: 25.536680 +epoch: 0, batch: 29877, sum loss: 6135.905762, avg loss: 3.380664, ppl: 29.390289 +epoch: 0, batch: 29878, sum loss: 5013.047363, avg loss: 2.867876, ppl: 17.599602 +epoch: 0, batch: 29879, sum loss: 5443.875488, avg loss: 3.002689, ppl: 20.139627 +epoch: 0, batch: 29880, sum loss: 5280.394531, avg loss: 3.137489, ppl: 23.045933 +epoch: 0, batch: 29881, sum loss: 5906.722168, avg loss: 3.218922, ppl: 25.001163 +epoch: 0, batch: 29882, sum loss: 5729.272461, avg loss: 3.127332, ppl: 22.813038 +epoch: 0, batch: 29883, sum loss: 5465.852539, avg loss: 3.014811, ppl: 20.385244 +epoch: 0, batch: 29884, sum loss: 5430.450684, avg loss: 2.973960, ppl: 19.569260 +epoch: 0, batch: 29885, sum loss: 5884.773438, avg loss: 2.969109, ppl: 19.474552 +epoch: 0, batch: 29886, sum loss: 5136.055664, avg loss: 2.926527, ppl: 18.662706 +epoch: 0, batch: 29887, sum loss: 3906.655762, avg loss: 2.881015, ppl: 17.832357 +epoch: 0, batch: 29888, sum loss: 5622.777344, avg loss: 3.026253, ppl: 20.619816 +epoch: 0, batch: 29889, sum loss: 5806.095703, avg loss: 3.098237, ppl: 22.158846 +epoch: 0, batch: 29890, sum loss: 5210.505371, avg loss: 3.121933, ppl: 22.690191 +epoch: 0, batch: 29891, sum loss: 4099.208008, avg loss: 2.819263, ppl: 16.764486 +epoch: 0, batch: 29892, sum loss: 5464.475586, avg loss: 3.266273, ppl: 26.213472 +epoch: 0, batch: 29893, sum loss: 4782.085938, avg loss: 3.085217, ppl: 21.872208 +epoch: 0, batch: 29894, sum loss: 5983.531738, avg loss: 3.084295, ppl: 21.852051 +epoch: 0, batch: 29895, sum loss: 3742.612061, avg loss: 2.803455, ppl: 16.501554 +epoch: 0, batch: 29896, sum loss: 4805.610840, avg loss: 2.889724, ppl: 17.988344 +epoch: 0, batch: 29897, sum loss: 5649.434570, avg loss: 3.191771, ppl: 24.331482 +epoch: 0, batch: 29898, sum loss: 4850.341797, avg loss: 3.027679, ppl: 20.649250 +epoch: 0, batch: 29899, sum loss: 5637.389648, avg loss: 3.167073, ppl: 23.737896 +epoch: 0, batch: 29900, sum loss: 4484.825684, avg loss: 2.734650, ppl: 15.404349 +epoch: 0, batch: 29901, sum loss: 4832.604492, avg loss: 2.981249, ppl: 19.712420 +epoch: 0, batch: 29902, sum loss: 3624.980957, avg loss: 2.481164, ppl: 11.955174 +epoch: 0, batch: 29903, sum loss: 5478.971680, avg loss: 3.165206, ppl: 23.693623 +epoch: 0, batch: 29904, sum loss: 4568.566895, avg loss: 2.932328, ppl: 18.771278 +epoch: 0, batch: 29905, sum loss: 4535.386230, avg loss: 2.789290, ppl: 16.269472 +epoch: 0, batch: 29906, sum loss: 4676.503906, avg loss: 2.946758, ppl: 19.044104 +epoch: 0, batch: 29907, sum loss: 5372.576660, avg loss: 3.123591, ppl: 22.727854 +epoch: 0, batch: 29908, sum loss: 4810.352051, avg loss: 2.922450, ppl: 18.586765 +epoch: 0, batch: 29909, sum loss: 4107.733398, avg loss: 2.593266, ppl: 13.373378 +epoch: 0, batch: 29910, sum loss: 4795.803223, avg loss: 2.870020, ppl: 17.637373 +epoch: 0, batch: 29911, sum loss: 5170.898438, avg loss: 3.087104, ppl: 21.913521 +epoch: 0, batch: 29912, sum loss: 4775.087891, avg loss: 2.922330, ppl: 18.584545 +epoch: 0, batch: 29913, sum loss: 4569.303223, avg loss: 2.882841, ppl: 17.864958 +epoch: 0, batch: 29914, sum loss: 5013.315430, avg loss: 2.892853, ppl: 18.044727 +epoch: 0, batch: 29915, sum loss: 5140.205078, avg loss: 3.145780, ppl: 23.237801 +epoch: 0, batch: 29916, sum loss: 4397.480469, avg loss: 2.829782, ppl: 16.941759 +epoch: 0, batch: 29917, sum loss: 3569.149170, avg loss: 2.403467, ppl: 11.061464 +epoch: 0, batch: 29918, sum loss: 4880.222656, avg loss: 3.079005, ppl: 21.736759 +epoch: 0, batch: 29919, sum loss: 4623.664062, avg loss: 2.907965, ppl: 18.319475 +epoch: 0, batch: 29920, sum loss: 4929.180176, avg loss: 2.829610, ppl: 16.938847 +epoch: 0, batch: 29921, sum loss: 5079.308594, avg loss: 2.994875, ppl: 19.982870 +epoch: 0, batch: 29922, sum loss: 6530.713867, avg loss: 3.168711, ppl: 23.776821 +epoch: 0, batch: 29923, sum loss: 5835.655762, avg loss: 2.878962, ppl: 17.795788 +epoch: 0, batch: 29924, sum loss: 5028.534668, avg loss: 2.893288, ppl: 18.052572 +epoch: 0, batch: 29925, sum loss: 5512.775391, avg loss: 3.033999, ppl: 20.780161 +epoch: 0, batch: 29926, sum loss: 5402.636719, avg loss: 3.038604, ppl: 20.876070 +epoch: 0, batch: 29927, sum loss: 5679.115723, avg loss: 3.000061, ppl: 20.086763 +epoch: 0, batch: 29928, sum loss: 4462.648438, avg loss: 3.069222, ppl: 21.525145 +epoch: 0, batch: 29929, sum loss: 5100.597656, avg loss: 2.904669, ppl: 18.259192 +epoch: 0, batch: 29930, sum loss: 5008.010254, avg loss: 2.845460, ppl: 17.209476 +epoch: 0, batch: 29931, sum loss: 4408.505371, avg loss: 2.662141, ppl: 14.326928 +epoch: 0, batch: 29932, sum loss: 5554.790039, avg loss: 2.981637, ppl: 19.720072 +epoch: 0, batch: 29933, sum loss: 4913.487793, avg loss: 2.856679, ppl: 17.403633 +epoch: 0, batch: 29934, sum loss: 5039.323730, avg loss: 3.006757, ppl: 20.221704 +epoch: 0, batch: 29935, sum loss: 4341.144043, avg loss: 3.129880, ppl: 22.871239 +epoch: 0, batch: 29936, sum loss: 4294.693848, avg loss: 2.697672, ppl: 14.845130 +epoch: 0, batch: 29937, sum loss: 5349.396484, avg loss: 3.006968, ppl: 20.225986 +epoch: 0, batch: 29938, sum loss: 5628.586426, avg loss: 3.214498, ppl: 24.890800 +epoch: 0, batch: 29939, sum loss: 5829.702637, avg loss: 3.125846, ppl: 22.779156 +epoch: 0, batch: 29940, sum loss: 5334.024414, avg loss: 3.005084, ppl: 20.187912 +epoch: 0, batch: 29941, sum loss: 4785.914551, avg loss: 2.923589, ppl: 18.607946 +epoch: 0, batch: 29942, sum loss: 4976.022949, avg loss: 3.106132, ppl: 22.334476 +epoch: 0, batch: 29943, sum loss: 5120.940918, avg loss: 2.943069, ppl: 18.973997 +epoch: 0, batch: 29944, sum loss: 5509.544434, avg loss: 3.074523, ppl: 21.639551 +epoch: 0, batch: 29945, sum loss: 4835.466309, avg loss: 2.957472, ppl: 19.249241 +epoch: 0, batch: 29946, sum loss: 4441.361328, avg loss: 2.872808, ppl: 17.686617 +epoch: 0, batch: 29947, sum loss: 5297.068848, avg loss: 3.147397, ppl: 23.275394 +epoch: 0, batch: 29948, sum loss: 5097.261719, avg loss: 3.134847, ppl: 22.985123 +epoch: 0, batch: 29949, sum loss: 4269.584473, avg loss: 2.835050, ppl: 17.031246 +epoch: 0, batch: 29950, sum loss: 5422.619141, avg loss: 3.053277, ppl: 21.184643 +epoch: 0, batch: 29951, sum loss: 4937.273438, avg loss: 2.762884, ppl: 15.845474 +epoch: 0, batch: 29952, sum loss: 4958.387695, avg loss: 2.988781, ppl: 19.861456 +epoch: 0, batch: 29953, sum loss: 5089.185059, avg loss: 2.779457, ppl: 16.110268 +epoch: 0, batch: 29954, sum loss: 5193.363770, avg loss: 2.979555, ppl: 19.679050 +epoch: 0, batch: 29955, sum loss: 4517.435547, avg loss: 2.884697, ppl: 17.898144 +epoch: 0, batch: 29956, sum loss: 4655.864258, avg loss: 2.792960, ppl: 16.329283 +epoch: 0, batch: 29957, sum loss: 5959.767578, avg loss: 3.207625, ppl: 24.720310 +epoch: 0, batch: 29958, sum loss: 4789.046875, avg loss: 2.724145, ppl: 15.243374 +epoch: 0, batch: 29959, sum loss: 4523.773438, avg loss: 3.023913, ppl: 20.571625 +epoch: 0, batch: 29960, sum loss: 5838.421387, avg loss: 3.178237, ppl: 24.004402 +epoch: 0, batch: 29961, sum loss: 4314.980957, avg loss: 2.990285, ppl: 19.891344 +epoch: 0, batch: 29962, sum loss: 4664.611816, avg loss: 2.990136, ppl: 19.888386 +epoch: 0, batch: 29963, sum loss: 5041.190918, avg loss: 3.000709, ppl: 20.099783 +epoch: 0, batch: 29964, sum loss: 4726.573730, avg loss: 3.029855, ppl: 20.694231 +epoch: 0, batch: 29965, sum loss: 4316.616211, avg loss: 2.706342, ppl: 14.974405 +epoch: 0, batch: 29966, sum loss: 5308.767090, avg loss: 3.301472, ppl: 27.152576 +epoch: 0, batch: 29967, sum loss: 4733.521973, avg loss: 2.965866, ppl: 19.411503 +epoch: 0, batch: 29968, sum loss: 5481.801758, avg loss: 2.961535, ppl: 19.327621 +epoch: 0, batch: 29969, sum loss: 4953.971680, avg loss: 2.873534, ppl: 17.699453 +epoch: 0, batch: 29970, sum loss: 6089.630371, avg loss: 3.205069, ppl: 24.657198 +epoch: 0, batch: 29971, sum loss: 4818.800781, avg loss: 3.104897, ppl: 22.306932 +epoch: 0, batch: 29972, sum loss: 5230.817383, avg loss: 2.963636, ppl: 19.368265 +epoch: 0, batch: 29973, sum loss: 4678.167969, avg loss: 2.885977, ppl: 17.921061 +epoch: 0, batch: 29974, sum loss: 5998.843750, avg loss: 3.100178, ppl: 22.201893 +epoch: 0, batch: 29975, sum loss: 4974.616699, avg loss: 3.027764, ppl: 20.651007 +epoch: 0, batch: 29976, sum loss: 4802.232422, avg loss: 2.798504, ppl: 16.420061 +epoch: 0, batch: 29977, sum loss: 5135.021973, avg loss: 3.098987, ppl: 22.175484 +epoch: 0, batch: 29978, sum loss: 4433.569336, avg loss: 2.772714, ppl: 16.001999 +epoch: 0, batch: 29979, sum loss: 4823.393555, avg loss: 3.033581, ppl: 20.771477 +epoch: 0, batch: 29980, sum loss: 5225.329590, avg loss: 3.057536, ppl: 21.275078 +epoch: 0, batch: 29981, sum loss: 4958.144531, avg loss: 3.091113, ppl: 22.001543 +epoch: 0, batch: 29982, sum loss: 4853.674805, avg loss: 2.748400, ppl: 15.617627 +epoch: 0, batch: 29983, sum loss: 5742.514648, avg loss: 3.136272, ppl: 23.017906 +epoch: 0, batch: 29984, sum loss: 6146.022461, avg loss: 3.263952, ppl: 26.152695 +epoch: 0, batch: 29985, sum loss: 5543.659180, avg loss: 3.139105, ppl: 23.083195 +epoch: 0, batch: 29986, sum loss: 4427.955566, avg loss: 2.879035, ppl: 17.797087 +epoch: 0, batch: 29987, sum loss: 5167.139160, avg loss: 2.979896, ppl: 19.685760 +epoch: 0, batch: 29988, sum loss: 5814.665039, avg loss: 3.207206, ppl: 24.709963 +epoch: 0, batch: 29989, sum loss: 4826.833496, avg loss: 3.084239, ppl: 21.850822 +epoch: 0, batch: 29990, sum loss: 5159.968750, avg loss: 2.877841, ppl: 17.775854 +epoch: 0, batch: 29991, sum loss: 5427.602539, avg loss: 3.040674, ppl: 20.919333 +epoch: 0, batch: 29992, sum loss: 5076.534668, avg loss: 3.164922, ppl: 23.686897 +epoch: 0, batch: 29993, sum loss: 4362.599609, avg loss: 2.814580, ppl: 16.686174 +epoch: 0, batch: 29994, sum loss: 4347.205078, avg loss: 2.860003, ppl: 17.461588 +epoch: 0, batch: 29995, sum loss: 5095.026367, avg loss: 3.043624, ppl: 20.981144 +epoch: 0, batch: 29996, sum loss: 4400.605469, avg loss: 2.792262, ppl: 16.317894 +epoch: 0, batch: 29997, sum loss: 5441.613770, avg loss: 3.197188, ppl: 24.463638 +epoch: 0, batch: 29998, sum loss: 4856.272949, avg loss: 2.756114, ppl: 15.738567 +epoch: 0, batch: 29999, sum loss: 4034.641113, avg loss: 2.869588, ppl: 17.629755 +epoch: 0, batch: 30000, sum loss: 5559.104980, avg loss: 3.169387, ppl: 23.792898 +epoch: 0, batch: 30001, sum loss: 4502.676270, avg loss: 2.916241, ppl: 18.471729 +epoch: 0, batch: 30002, sum loss: 6124.177734, avg loss: 3.523693, ppl: 33.909412 +epoch: 0, batch: 30003, sum loss: 4748.600098, avg loss: 2.980917, ppl: 19.705870 +epoch: 0, batch: 30004, sum loss: 5385.447754, avg loss: 3.220962, ppl: 25.052204 +epoch: 0, batch: 30005, sum loss: 5077.259766, avg loss: 2.933137, ppl: 18.786467 +epoch: 0, batch: 30006, sum loss: 3890.634521, avg loss: 2.846112, ppl: 17.220690 +epoch: 0, batch: 30007, sum loss: 4755.934082, avg loss: 2.924929, ppl: 18.632896 +epoch: 0, batch: 30008, sum loss: 4421.453125, avg loss: 2.756517, ppl: 15.744906 +epoch: 0, batch: 30009, sum loss: 5142.713867, avg loss: 2.907131, ppl: 18.304203 +epoch: 0, batch: 30010, sum loss: 5320.316895, avg loss: 3.080670, ppl: 21.772984 +epoch: 0, batch: 30011, sum loss: 4957.894043, avg loss: 3.133940, ppl: 22.964291 +epoch: 0, batch: 30012, sum loss: 4334.784668, avg loss: 2.748754, ppl: 15.623150 +epoch: 0, batch: 30013, sum loss: 4071.971436, avg loss: 2.696670, ppl: 14.830262 +epoch: 0, batch: 30014, sum loss: 5821.044434, avg loss: 2.829871, ppl: 16.943270 +epoch: 0, batch: 30015, sum loss: 5745.561035, avg loss: 3.100681, ppl: 22.213064 +epoch: 0, batch: 30016, sum loss: 5226.182617, avg loss: 2.979579, ppl: 19.679539 +epoch: 0, batch: 30017, sum loss: 4648.008301, avg loss: 2.778248, ppl: 16.090803 +epoch: 0, batch: 30018, sum loss: 5830.409668, avg loss: 2.842716, ppl: 17.162306 +epoch: 0, batch: 30019, sum loss: 4614.834473, avg loss: 3.044086, ppl: 20.990841 +epoch: 0, batch: 30020, sum loss: 4882.192871, avg loss: 2.975133, ppl: 19.592222 +epoch: 0, batch: 30021, sum loss: 4707.980957, avg loss: 2.918773, ppl: 18.518555 +epoch: 0, batch: 30022, sum loss: 5527.802246, avg loss: 3.318008, ppl: 27.605318 +epoch: 0, batch: 30023, sum loss: 5094.130859, avg loss: 2.869933, ppl: 17.635830 +epoch: 0, batch: 30024, sum loss: 5412.962891, avg loss: 3.143416, ppl: 23.182928 +epoch: 0, batch: 30025, sum loss: 4596.530273, avg loss: 2.595443, ppl: 13.402527 +epoch: 0, batch: 30026, sum loss: 5005.540039, avg loss: 2.947904, ppl: 19.065943 +epoch: 0, batch: 30027, sum loss: 6016.712891, avg loss: 3.271731, ppl: 26.356920 +epoch: 0, batch: 30028, sum loss: 4623.434570, avg loss: 3.092598, ppl: 22.034252 +epoch: 0, batch: 30029, sum loss: 4575.338867, avg loss: 3.008112, ppl: 20.249142 +epoch: 0, batch: 30030, sum loss: 4233.954590, avg loss: 2.878283, ppl: 17.783716 +epoch: 0, batch: 30031, sum loss: 4114.937500, avg loss: 2.700090, ppl: 14.881074 +epoch: 0, batch: 30032, sum loss: 4949.324219, avg loss: 2.892650, ppl: 18.041058 +epoch: 0, batch: 30033, sum loss: 4179.250977, avg loss: 2.573430, ppl: 13.110721 +epoch: 0, batch: 30034, sum loss: 4638.608887, avg loss: 2.977284, ppl: 19.634426 +epoch: 0, batch: 30035, sum loss: 4806.514648, avg loss: 3.071255, ppl: 21.568958 +epoch: 0, batch: 30036, sum loss: 5745.088379, avg loss: 3.093747, ppl: 22.059578 +epoch: 0, batch: 30037, sum loss: 5983.635254, avg loss: 3.081172, ppl: 21.783913 +epoch: 0, batch: 30038, sum loss: 4072.193848, avg loss: 2.527743, ppl: 12.525207 +epoch: 0, batch: 30039, sum loss: 4631.320801, avg loss: 2.844792, ppl: 17.197975 +epoch: 0, batch: 30040, sum loss: 5684.847656, avg loss: 2.931845, ppl: 18.762218 +epoch: 0, batch: 30041, sum loss: 4175.395508, avg loss: 2.863783, ppl: 17.527706 +epoch: 0, batch: 30042, sum loss: 5300.048828, avg loss: 3.158551, ppl: 23.536472 +epoch: 0, batch: 30043, sum loss: 4980.620605, avg loss: 2.938419, ppl: 18.885971 +epoch: 0, batch: 30044, sum loss: 6368.446289, avg loss: 3.187411, ppl: 24.225616 +epoch: 0, batch: 30045, sum loss: 4463.097656, avg loss: 2.888736, ppl: 17.970589 +epoch: 0, batch: 30046, sum loss: 5449.610352, avg loss: 3.135564, ppl: 23.001608 +epoch: 0, batch: 30047, sum loss: 5131.623047, avg loss: 2.910733, ppl: 18.370268 +epoch: 0, batch: 30048, sum loss: 4914.982422, avg loss: 3.048996, ppl: 21.094164 +epoch: 0, batch: 30049, sum loss: 5389.238770, avg loss: 2.980774, ppl: 19.703054 +epoch: 0, batch: 30050, sum loss: 4950.837402, avg loss: 2.975263, ppl: 19.594774 +epoch: 0, batch: 30051, sum loss: 5519.765137, avg loss: 3.154151, ppl: 23.433144 +epoch: 0, batch: 30052, sum loss: 5508.922363, avg loss: 3.075892, ppl: 21.669197 +epoch: 0, batch: 30053, sum loss: 4487.401367, avg loss: 2.824041, ppl: 16.844786 +epoch: 0, batch: 30054, sum loss: 4552.516113, avg loss: 2.883164, ppl: 17.870729 +epoch: 0, batch: 30055, sum loss: 4571.587402, avg loss: 2.947510, ppl: 19.058430 +epoch: 0, batch: 30056, sum loss: 4070.370361, avg loss: 2.822726, ppl: 16.822643 +epoch: 0, batch: 30057, sum loss: 4895.872070, avg loss: 3.073366, ppl: 21.614538 +epoch: 0, batch: 30058, sum loss: 5240.143555, avg loss: 3.191318, ppl: 24.320456 +epoch: 0, batch: 30059, sum loss: 5122.604004, avg loss: 2.967905, ppl: 19.451122 +epoch: 0, batch: 30060, sum loss: 5716.969238, avg loss: 3.062115, ppl: 21.372717 +epoch: 0, batch: 30061, sum loss: 4894.246094, avg loss: 2.860460, ppl: 17.469553 +epoch: 0, batch: 30062, sum loss: 4808.781250, avg loss: 2.972053, ppl: 19.531969 +epoch: 0, batch: 30063, sum loss: 5512.139160, avg loss: 3.149794, ppl: 23.331255 +epoch: 0, batch: 30064, sum loss: 6085.638184, avg loss: 3.108089, ppl: 22.378239 +epoch: 0, batch: 30065, sum loss: 5348.567383, avg loss: 3.222028, ppl: 25.078941 +epoch: 0, batch: 30066, sum loss: 5274.550781, avg loss: 2.882268, ppl: 17.854725 +epoch: 0, batch: 30067, sum loss: 4657.922363, avg loss: 3.076567, ppl: 21.683832 +epoch: 0, batch: 30068, sum loss: 4860.238281, avg loss: 3.068332, ppl: 21.506004 +epoch: 0, batch: 30069, sum loss: 5435.721680, avg loss: 3.083223, ppl: 21.828634 +epoch: 0, batch: 30070, sum loss: 4625.822266, avg loss: 2.929590, ppl: 18.719950 +epoch: 0, batch: 30071, sum loss: 5270.080078, avg loss: 2.932710, ppl: 18.778454 +epoch: 0, batch: 30072, sum loss: 4719.851562, avg loss: 3.033324, ppl: 20.766134 +epoch: 0, batch: 30073, sum loss: 6118.127930, avg loss: 3.179900, ppl: 24.044353 +epoch: 0, batch: 30074, sum loss: 5770.247070, avg loss: 3.261870, ppl: 26.098288 +epoch: 0, batch: 30075, sum loss: 4416.457520, avg loss: 2.974045, ppl: 19.570929 +epoch: 0, batch: 30076, sum loss: 5036.976562, avg loss: 2.980459, ppl: 19.696865 +epoch: 0, batch: 30077, sum loss: 6586.464844, avg loss: 3.236592, ppl: 25.446852 +epoch: 0, batch: 30078, sum loss: 5215.464844, avg loss: 2.857789, ppl: 17.422962 +epoch: 0, batch: 30079, sum loss: 5327.047852, avg loss: 3.004539, ppl: 20.176918 +epoch: 0, batch: 30080, sum loss: 4759.715820, avg loss: 3.183756, ppl: 24.137251 +epoch: 0, batch: 30081, sum loss: 5400.558594, avg loss: 2.939880, ppl: 18.913570 +epoch: 0, batch: 30082, sum loss: 5038.278809, avg loss: 3.087181, ppl: 21.915209 +epoch: 0, batch: 30083, sum loss: 5456.025391, avg loss: 3.002766, ppl: 20.141169 +epoch: 0, batch: 30084, sum loss: 5508.532715, avg loss: 3.120982, ppl: 22.668623 +epoch: 0, batch: 30085, sum loss: 4520.184082, avg loss: 2.802346, ppl: 16.483271 +epoch: 0, batch: 30086, sum loss: 5154.987793, avg loss: 3.073934, ppl: 21.626822 +epoch: 0, batch: 30087, sum loss: 4671.052734, avg loss: 2.867436, ppl: 17.591850 +epoch: 0, batch: 30088, sum loss: 4572.588867, avg loss: 2.674029, ppl: 14.498260 +epoch: 0, batch: 30089, sum loss: 4840.519531, avg loss: 2.891589, ppl: 18.021923 +epoch: 0, batch: 30090, sum loss: 5606.118652, avg loss: 3.321161, ppl: 27.692471 +epoch: 0, batch: 30091, sum loss: 5114.921875, avg loss: 2.912826, ppl: 18.408745 +epoch: 0, batch: 30092, sum loss: 4723.645508, avg loss: 2.720994, ppl: 15.195419 +epoch: 0, batch: 30093, sum loss: 6013.149414, avg loss: 3.210438, ppl: 24.789930 +epoch: 0, batch: 30094, sum loss: 6205.749512, avg loss: 3.203794, ppl: 24.625790 +epoch: 0, batch: 30095, sum loss: 4723.652832, avg loss: 2.801692, ppl: 16.472498 +epoch: 0, batch: 30096, sum loss: 4947.832520, avg loss: 2.729086, ppl: 15.318878 +epoch: 0, batch: 30097, sum loss: 4581.040039, avg loss: 2.803574, ppl: 16.503527 +epoch: 0, batch: 30098, sum loss: 4906.067871, avg loss: 2.927248, ppl: 18.676167 +epoch: 0, batch: 30099, sum loss: 5426.839355, avg loss: 3.190382, ppl: 24.297701 +epoch: 0, batch: 30100, sum loss: 4305.272461, avg loss: 3.025490, ppl: 20.604099 +epoch: 0, batch: 30101, sum loss: 6242.923828, avg loss: 3.137148, ppl: 23.038061 +epoch: 0, batch: 30102, sum loss: 4857.063477, avg loss: 3.099594, ppl: 22.188944 +epoch: 0, batch: 30103, sum loss: 5503.636230, avg loss: 2.983001, ppl: 19.746979 +epoch: 0, batch: 30104, sum loss: 5418.640625, avg loss: 3.007015, ppl: 20.226933 +epoch: 0, batch: 30105, sum loss: 5096.045898, avg loss: 2.810836, ppl: 16.623816 +epoch: 0, batch: 30106, sum loss: 5302.565430, avg loss: 3.065067, ppl: 21.435888 +epoch: 0, batch: 30107, sum loss: 6247.905273, avg loss: 3.250731, ppl: 25.809206 +epoch: 0, batch: 30108, sum loss: 4360.916992, avg loss: 2.820774, ppl: 16.789846 +epoch: 0, batch: 30109, sum loss: 5056.091797, avg loss: 2.982945, ppl: 19.745882 +epoch: 0, batch: 30110, sum loss: 4463.559082, avg loss: 2.767241, ppl: 15.914658 +epoch: 0, batch: 30111, sum loss: 4449.986328, avg loss: 2.854385, ppl: 17.363758 +epoch: 0, batch: 30112, sum loss: 5024.656738, avg loss: 2.800812, ppl: 16.458006 +epoch: 0, batch: 30113, sum loss: 4830.035156, avg loss: 3.198699, ppl: 24.500628 +epoch: 0, batch: 30114, sum loss: 5538.340820, avg loss: 3.087147, ppl: 21.914455 +epoch: 0, batch: 30115, sum loss: 5145.708008, avg loss: 3.210049, ppl: 24.780293 +epoch: 0, batch: 30116, sum loss: 4596.973633, avg loss: 3.089364, ppl: 21.963106 +epoch: 0, batch: 30117, sum loss: 4965.411133, avg loss: 2.966196, ppl: 19.417906 +epoch: 0, batch: 30118, sum loss: 5696.228516, avg loss: 3.055917, ppl: 21.240644 +epoch: 0, batch: 30119, sum loss: 4813.708984, avg loss: 3.077819, ppl: 21.711002 +epoch: 0, batch: 30120, sum loss: 4960.302246, avg loss: 3.125584, ppl: 22.773199 +epoch: 0, batch: 30121, sum loss: 6599.650391, avg loss: 3.231954, ppl: 25.329103 +epoch: 0, batch: 30122, sum loss: 6279.870117, avg loss: 3.335035, ppl: 28.079355 +epoch: 0, batch: 30123, sum loss: 4339.876953, avg loss: 2.895181, ppl: 18.086782 +epoch: 0, batch: 30124, sum loss: 5539.654297, avg loss: 3.178230, ppl: 24.004219 +epoch: 0, batch: 30125, sum loss: 5501.562500, avg loss: 3.111744, ppl: 22.460175 +epoch: 0, batch: 30126, sum loss: 5504.064453, avg loss: 3.247236, ppl: 25.719141 +epoch: 0, batch: 30127, sum loss: 4111.951660, avg loss: 2.824143, ppl: 16.846497 +epoch: 0, batch: 30128, sum loss: 5393.880859, avg loss: 2.949088, ppl: 19.088530 +epoch: 0, batch: 30129, sum loss: 4878.071777, avg loss: 2.891566, ppl: 18.021509 +epoch: 0, batch: 30130, sum loss: 5827.520020, avg loss: 3.071966, ppl: 21.584305 +epoch: 0, batch: 30131, sum loss: 4356.555664, avg loss: 2.778415, ppl: 16.093500 +epoch: 0, batch: 30132, sum loss: 6084.362305, avg loss: 3.173898, ppl: 23.900473 +epoch: 0, batch: 30133, sum loss: 4936.399414, avg loss: 2.895249, ppl: 18.088007 +epoch: 0, batch: 30134, sum loss: 4719.896484, avg loss: 3.039212, ppl: 20.888781 +epoch: 0, batch: 30135, sum loss: 6024.493164, avg loss: 3.169118, ppl: 23.786493 +epoch: 0, batch: 30136, sum loss: 5730.852539, avg loss: 3.232291, ppl: 25.337650 +epoch: 0, batch: 30137, sum loss: 4754.131836, avg loss: 2.860489, ppl: 17.470062 +epoch: 0, batch: 30138, sum loss: 4298.479492, avg loss: 2.706851, ppl: 14.982026 +epoch: 0, batch: 30139, sum loss: 4438.064941, avg loss: 2.857737, ppl: 17.422052 +epoch: 0, batch: 30140, sum loss: 4278.581055, avg loss: 2.839138, ppl: 17.101019 +epoch: 0, batch: 30141, sum loss: 4779.978027, avg loss: 2.912844, ppl: 18.409088 +epoch: 0, batch: 30142, sum loss: 4619.452148, avg loss: 2.808177, ppl: 16.579674 +epoch: 0, batch: 30143, sum loss: 6151.337891, avg loss: 3.193841, ppl: 24.381905 +epoch: 0, batch: 30144, sum loss: 4032.078125, avg loss: 2.982306, ppl: 19.733274 +epoch: 0, batch: 30145, sum loss: 5195.141113, avg loss: 3.366910, ppl: 28.988798 +epoch: 0, batch: 30146, sum loss: 4956.655273, avg loss: 2.966281, ppl: 19.419563 +epoch: 0, batch: 30147, sum loss: 4637.420898, avg loss: 2.535496, ppl: 12.622684 +epoch: 0, batch: 30148, sum loss: 5136.018555, avg loss: 3.073620, ppl: 21.620022 +epoch: 0, batch: 30149, sum loss: 5300.133789, avg loss: 3.067207, ppl: 21.481823 +epoch: 0, batch: 30150, sum loss: 4879.854004, avg loss: 2.961076, ppl: 19.318758 +epoch: 0, batch: 30151, sum loss: 5177.968262, avg loss: 3.102438, ppl: 22.252131 +epoch: 0, batch: 30152, sum loss: 4336.430176, avg loss: 2.939953, ppl: 18.914949 +epoch: 0, batch: 30153, sum loss: 4648.331543, avg loss: 3.203537, ppl: 24.619444 +epoch: 0, batch: 30154, sum loss: 4484.944336, avg loss: 2.999963, ppl: 20.084789 +epoch: 0, batch: 30155, sum loss: 5707.261719, avg loss: 3.047123, ppl: 21.054688 +epoch: 0, batch: 30156, sum loss: 4385.209473, avg loss: 2.869901, ppl: 17.635279 +epoch: 0, batch: 30157, sum loss: 5181.768555, avg loss: 2.853397, ppl: 17.346601 +epoch: 0, batch: 30158, sum loss: 4135.633301, avg loss: 2.731594, ppl: 15.357348 +epoch: 0, batch: 30159, sum loss: 5219.204102, avg loss: 2.958733, ppl: 19.273529 +epoch: 0, batch: 30160, sum loss: 5715.218750, avg loss: 3.180422, ppl: 24.056911 +epoch: 0, batch: 30161, sum loss: 5754.548828, avg loss: 3.286436, ppl: 26.747362 +epoch: 0, batch: 30162, sum loss: 4947.394531, avg loss: 2.939628, ppl: 18.908817 +epoch: 0, batch: 30163, sum loss: 4569.162598, avg loss: 2.908442, ppl: 18.328226 +epoch: 0, batch: 30164, sum loss: 5126.254883, avg loss: 3.392624, ppl: 29.743895 +epoch: 0, batch: 30165, sum loss: 5483.287598, avg loss: 3.119049, ppl: 22.624844 +epoch: 0, batch: 30166, sum loss: 5130.009277, avg loss: 2.883648, ppl: 17.879374 +epoch: 0, batch: 30167, sum loss: 5035.516602, avg loss: 3.145232, ppl: 23.225067 +epoch: 0, batch: 30168, sum loss: 4876.527832, avg loss: 3.092282, ppl: 22.027277 +epoch: 0, batch: 30169, sum loss: 4415.538574, avg loss: 2.865372, ppl: 17.555586 +epoch: 0, batch: 30170, sum loss: 5248.711426, avg loss: 2.832548, ppl: 16.988691 +epoch: 0, batch: 30171, sum loss: 5837.526367, avg loss: 3.023059, ppl: 20.554064 +epoch: 0, batch: 30172, sum loss: 5185.562500, avg loss: 3.068380, ppl: 21.507036 +epoch: 0, batch: 30173, sum loss: 4382.352051, avg loss: 2.703487, ppl: 14.931704 +epoch: 0, batch: 30174, sum loss: 5233.278809, avg loss: 2.983625, ppl: 19.759317 +epoch: 0, batch: 30175, sum loss: 4598.195801, avg loss: 3.005357, ppl: 20.193420 +epoch: 0, batch: 30176, sum loss: 5550.259277, avg loss: 2.971231, ppl: 19.515924 +epoch: 0, batch: 30177, sum loss: 4943.216309, avg loss: 2.942391, ppl: 18.961123 +epoch: 0, batch: 30178, sum loss: 5754.890137, avg loss: 3.255028, ppl: 25.920353 +epoch: 0, batch: 30179, sum loss: 3612.076172, avg loss: 2.719937, ppl: 15.179367 +epoch: 0, batch: 30180, sum loss: 4188.563477, avg loss: 2.777562, ppl: 16.079769 +epoch: 0, batch: 30181, sum loss: 4565.604980, avg loss: 3.017584, ppl: 20.441851 +epoch: 0, batch: 30182, sum loss: 3733.679199, avg loss: 2.600055, ppl: 13.464481 +epoch: 0, batch: 30183, sum loss: 5115.165039, avg loss: 3.107634, ppl: 22.368055 +epoch: 0, batch: 30184, sum loss: 6007.819336, avg loss: 3.180423, ppl: 24.056934 +epoch: 0, batch: 30185, sum loss: 4843.813965, avg loss: 3.056034, ppl: 21.243141 +epoch: 0, batch: 30186, sum loss: 4404.782715, avg loss: 2.871436, ppl: 17.662361 +epoch: 0, batch: 30187, sum loss: 4749.800781, avg loss: 2.979800, ppl: 19.683874 +epoch: 0, batch: 30188, sum loss: 3983.297119, avg loss: 2.698711, ppl: 14.860563 +epoch: 0, batch: 30189, sum loss: 4532.745605, avg loss: 2.978151, ppl: 19.651449 +epoch: 0, batch: 30190, sum loss: 4808.594727, avg loss: 2.838604, ppl: 17.091885 +epoch: 0, batch: 30191, sum loss: 5080.104004, avg loss: 3.018481, ppl: 20.460194 +epoch: 0, batch: 30192, sum loss: 5755.750977, avg loss: 3.111217, ppl: 22.448343 +epoch: 0, batch: 30193, sum loss: 3856.922852, avg loss: 2.638114, ppl: 13.986803 +epoch: 0, batch: 30194, sum loss: 6009.953613, avg loss: 3.101111, ppl: 22.222635 +epoch: 0, batch: 30195, sum loss: 4517.161621, avg loss: 2.868039, ppl: 17.602465 +epoch: 0, batch: 30196, sum loss: 6179.270996, avg loss: 3.149475, ppl: 23.323824 +epoch: 0, batch: 30197, sum loss: 5550.760742, avg loss: 3.261317, ppl: 26.083855 +epoch: 0, batch: 30198, sum loss: 5113.694336, avg loss: 3.034833, ppl: 20.797514 +epoch: 0, batch: 30199, sum loss: 4774.847656, avg loss: 2.907946, ppl: 18.319139 +epoch: 0, batch: 30200, sum loss: 4904.780762, avg loss: 2.999866, ppl: 20.082842 +epoch: 0, batch: 30201, sum loss: 4528.076172, avg loss: 3.105676, ppl: 22.324314 +epoch: 0, batch: 30202, sum loss: 5182.856445, avg loss: 3.092396, ppl: 22.029810 +epoch: 0, batch: 30203, sum loss: 6299.223145, avg loss: 3.242009, ppl: 25.585068 +epoch: 0, batch: 30204, sum loss: 5382.416016, avg loss: 3.015359, ppl: 20.396416 +epoch: 0, batch: 30205, sum loss: 4913.798340, avg loss: 2.945922, ppl: 19.028206 +epoch: 0, batch: 30206, sum loss: 6460.257324, avg loss: 3.321469, ppl: 27.701021 +epoch: 0, batch: 30207, sum loss: 4252.003906, avg loss: 2.779088, ppl: 16.104321 +epoch: 0, batch: 30208, sum loss: 4444.986328, avg loss: 2.949560, ppl: 19.097544 +epoch: 0, batch: 30209, sum loss: 4920.074219, avg loss: 3.119895, ppl: 22.644001 +epoch: 0, batch: 30210, sum loss: 4466.802246, avg loss: 2.811078, ppl: 16.627827 +epoch: 0, batch: 30211, sum loss: 5120.833496, avg loss: 2.854422, ppl: 17.364403 +epoch: 0, batch: 30212, sum loss: 4594.155273, avg loss: 2.981282, ppl: 19.713068 +epoch: 0, batch: 30213, sum loss: 3494.581055, avg loss: 2.784527, ppl: 16.192154 +epoch: 0, batch: 30214, sum loss: 4774.638184, avg loss: 2.993504, ppl: 19.955475 +epoch: 0, batch: 30215, sum loss: 5119.247070, avg loss: 3.074623, ppl: 21.641718 +epoch: 0, batch: 30216, sum loss: 4926.576660, avg loss: 3.152000, ppl: 23.382793 +epoch: 0, batch: 30217, sum loss: 5818.389648, avg loss: 3.055877, ppl: 21.239803 +epoch: 0, batch: 30218, sum loss: 5933.988281, avg loss: 3.171560, ppl: 23.844648 +epoch: 0, batch: 30219, sum loss: 5503.091309, avg loss: 2.860234, ppl: 17.465622 +epoch: 0, batch: 30220, sum loss: 6139.796387, avg loss: 3.342295, ppl: 28.283976 +epoch: 0, batch: 30221, sum loss: 6385.142578, avg loss: 3.099584, ppl: 22.188711 +epoch: 0, batch: 30222, sum loss: 4763.922852, avg loss: 3.047935, ppl: 21.071791 +epoch: 0, batch: 30223, sum loss: 4757.046387, avg loss: 2.941896, ppl: 18.951754 +epoch: 0, batch: 30224, sum loss: 4258.062988, avg loss: 2.850109, ppl: 17.289667 +epoch: 0, batch: 30225, sum loss: 4757.101074, avg loss: 2.960237, ppl: 19.302551 +epoch: 0, batch: 30226, sum loss: 5271.200195, avg loss: 3.167789, ppl: 23.754898 +epoch: 0, batch: 30227, sum loss: 5269.094727, avg loss: 3.021270, ppl: 20.517328 +epoch: 0, batch: 30228, sum loss: 4995.213867, avg loss: 3.062670, ppl: 21.384567 +epoch: 0, batch: 30229, sum loss: 4992.482910, avg loss: 2.838251, ppl: 17.085854 +epoch: 0, batch: 30230, sum loss: 5265.582520, avg loss: 2.909162, ppl: 18.341419 +epoch: 0, batch: 30231, sum loss: 5091.096191, avg loss: 3.094891, ppl: 22.084833 +epoch: 0, batch: 30232, sum loss: 5520.160645, avg loss: 3.018131, ppl: 20.453020 +epoch: 0, batch: 30233, sum loss: 4913.155762, avg loss: 2.934980, ppl: 18.821121 +epoch: 0, batch: 30234, sum loss: 4698.967773, avg loss: 2.803680, ppl: 16.505278 +epoch: 0, batch: 30235, sum loss: 5803.330566, avg loss: 3.072171, ppl: 21.588715 +epoch: 0, batch: 30236, sum loss: 4647.637207, avg loss: 3.218585, ppl: 24.992737 +epoch: 0, batch: 30237, sum loss: 4885.888184, avg loss: 2.909999, ppl: 18.356779 +epoch: 0, batch: 30238, sum loss: 5018.788086, avg loss: 3.119197, ppl: 22.628204 +epoch: 0, batch: 30239, sum loss: 4751.386230, avg loss: 2.995830, ppl: 20.001951 +epoch: 0, batch: 30240, sum loss: 5058.522949, avg loss: 3.101485, ppl: 22.230951 +epoch: 0, batch: 30241, sum loss: 5499.564453, avg loss: 3.082716, ppl: 21.817574 +epoch: 0, batch: 30242, sum loss: 4221.991211, avg loss: 2.872103, ppl: 17.674147 +epoch: 0, batch: 30243, sum loss: 4970.833984, avg loss: 3.114558, ppl: 22.523462 +epoch: 0, batch: 30244, sum loss: 4284.666992, avg loss: 2.689684, ppl: 14.727024 +epoch: 0, batch: 30245, sum loss: 5234.325195, avg loss: 3.207307, ppl: 24.712448 +epoch: 0, batch: 30246, sum loss: 5471.576172, avg loss: 3.249155, ppl: 25.768545 +epoch: 0, batch: 30247, sum loss: 3688.715088, avg loss: 2.850630, ppl: 17.298674 +epoch: 0, batch: 30248, sum loss: 4480.389648, avg loss: 2.886849, ppl: 17.936705 +epoch: 0, batch: 30249, sum loss: 4977.767090, avg loss: 2.962957, ppl: 19.355114 +epoch: 0, batch: 30250, sum loss: 6271.623535, avg loss: 3.251230, ppl: 25.822094 +epoch: 0, batch: 30251, sum loss: 4972.304199, avg loss: 2.972089, ppl: 19.532673 +epoch: 0, batch: 30252, sum loss: 5393.767578, avg loss: 2.884368, ppl: 17.892250 +epoch: 0, batch: 30253, sum loss: 4282.421875, avg loss: 2.715550, ppl: 15.112915 +epoch: 0, batch: 30254, sum loss: 6172.458984, avg loss: 3.154041, ppl: 23.430563 +epoch: 0, batch: 30255, sum loss: 5500.779785, avg loss: 2.989554, ppl: 19.876820 +epoch: 0, batch: 30256, sum loss: 5103.498047, avg loss: 3.026986, ppl: 20.634943 +epoch: 0, batch: 30257, sum loss: 5430.086426, avg loss: 2.964021, ppl: 19.375725 +epoch: 0, batch: 30258, sum loss: 5379.316895, avg loss: 3.192473, ppl: 24.348566 +epoch: 0, batch: 30259, sum loss: 4955.263672, avg loss: 3.066376, ppl: 21.463976 +epoch: 0, batch: 30260, sum loss: 5547.297852, avg loss: 2.964884, ppl: 19.392454 +epoch: 0, batch: 30261, sum loss: 5235.496582, avg loss: 2.845378, ppl: 17.208069 +epoch: 0, batch: 30262, sum loss: 4487.289062, avg loss: 2.902516, ppl: 18.219923 +epoch: 0, batch: 30263, sum loss: 4544.972656, avg loss: 2.882037, ppl: 17.850601 +epoch: 0, batch: 30264, sum loss: 4873.595215, avg loss: 3.242578, ppl: 25.599640 +epoch: 0, batch: 30265, sum loss: 5631.480957, avg loss: 3.065586, ppl: 21.447023 +epoch: 0, batch: 30266, sum loss: 5165.466797, avg loss: 3.003178, ppl: 20.149477 +epoch: 0, batch: 30267, sum loss: 4648.874023, avg loss: 2.947923, ppl: 19.066307 +epoch: 0, batch: 30268, sum loss: 4522.651367, avg loss: 2.814344, ppl: 16.682236 +epoch: 0, batch: 30269, sum loss: 4597.626465, avg loss: 3.024754, ppl: 20.588945 +epoch: 0, batch: 30270, sum loss: 5136.243164, avg loss: 3.010694, ppl: 20.301476 +epoch: 0, batch: 30271, sum loss: 4562.005371, avg loss: 2.646175, ppl: 14.100001 +epoch: 0, batch: 30272, sum loss: 4225.868652, avg loss: 2.774700, ppl: 16.033823 +epoch: 0, batch: 30273, sum loss: 5138.811523, avg loss: 2.916465, ppl: 18.475864 +epoch: 0, batch: 30274, sum loss: 4727.793945, avg loss: 3.024820, ppl: 20.590300 +epoch: 0, batch: 30275, sum loss: 5393.516602, avg loss: 3.161499, ppl: 23.605944 +epoch: 0, batch: 30276, sum loss: 4952.521484, avg loss: 2.849552, ppl: 17.280041 +epoch: 0, batch: 30277, sum loss: 4773.948730, avg loss: 2.954176, ppl: 19.185911 +epoch: 0, batch: 30278, sum loss: 4846.701660, avg loss: 3.021635, ppl: 20.524815 +epoch: 0, batch: 30279, sum loss: 4916.621094, avg loss: 2.981577, ppl: 19.718897 +epoch: 0, batch: 30280, sum loss: 4453.692871, avg loss: 2.718982, ppl: 15.164876 +epoch: 0, batch: 30281, sum loss: 4525.285645, avg loss: 2.913899, ppl: 18.428518 +epoch: 0, batch: 30282, sum loss: 5256.606445, avg loss: 2.925212, ppl: 18.638184 +epoch: 0, batch: 30283, sum loss: 5176.054688, avg loss: 3.060943, ppl: 21.347681 +epoch: 0, batch: 30284, sum loss: 5000.805176, avg loss: 3.056727, ppl: 21.257864 +epoch: 0, batch: 30285, sum loss: 4511.348633, avg loss: 3.128536, ppl: 22.840517 +epoch: 0, batch: 30286, sum loss: 3705.273682, avg loss: 2.706555, ppl: 14.977591 +epoch: 0, batch: 30287, sum loss: 5052.454102, avg loss: 2.963316, ppl: 19.362074 +epoch: 0, batch: 30288, sum loss: 5891.868652, avg loss: 3.334391, ppl: 28.061285 +epoch: 0, batch: 30289, sum loss: 4620.270508, avg loss: 3.065873, ppl: 21.453180 +epoch: 0, batch: 30290, sum loss: 5232.487305, avg loss: 3.003724, ppl: 20.160477 +epoch: 0, batch: 30291, sum loss: 4333.734863, avg loss: 2.760341, ppl: 15.805226 +epoch: 0, batch: 30292, sum loss: 6010.270020, avg loss: 2.790283, ppl: 16.285631 +epoch: 0, batch: 30293, sum loss: 4737.457031, avg loss: 3.139468, ppl: 23.091578 +epoch: 0, batch: 30294, sum loss: 5320.281738, avg loss: 2.931285, ppl: 18.751709 +epoch: 0, batch: 30295, sum loss: 5757.631348, avg loss: 3.205808, ppl: 24.675434 +epoch: 0, batch: 30296, sum loss: 5366.825684, avg loss: 3.175637, ppl: 23.942055 +epoch: 0, batch: 30297, sum loss: 4819.731445, avg loss: 2.860375, ppl: 17.468071 +epoch: 0, batch: 30298, sum loss: 4943.118164, avg loss: 2.963500, ppl: 19.365639 +epoch: 0, batch: 30299, sum loss: 4615.672852, avg loss: 2.797378, ppl: 16.401579 +epoch: 0, batch: 30300, sum loss: 4911.086426, avg loss: 2.998221, ppl: 20.049839 +epoch: 0, batch: 30301, sum loss: 5026.566895, avg loss: 3.009920, ppl: 20.285784 +epoch: 0, batch: 30302, sum loss: 4928.996582, avg loss: 2.718697, ppl: 15.160549 +epoch: 0, batch: 30303, sum loss: 5271.070801, avg loss: 3.100630, ppl: 22.211941 +epoch: 0, batch: 30304, sum loss: 4917.632812, avg loss: 2.944690, ppl: 19.004778 +epoch: 0, batch: 30305, sum loss: 5111.046875, avg loss: 3.033262, ppl: 20.764862 +epoch: 0, batch: 30306, sum loss: 4639.879883, avg loss: 2.648333, ppl: 14.130468 +epoch: 0, batch: 30307, sum loss: 4555.660645, avg loss: 2.922168, ppl: 18.581537 +epoch: 0, batch: 30308, sum loss: 4480.691406, avg loss: 2.795191, ppl: 16.365755 +epoch: 0, batch: 30309, sum loss: 6472.645996, avg loss: 3.030265, ppl: 20.702721 +epoch: 0, batch: 30310, sum loss: 5537.153320, avg loss: 2.889955, ppl: 17.992496 +epoch: 0, batch: 30311, sum loss: 5049.971680, avg loss: 3.053187, ppl: 21.182755 +epoch: 0, batch: 30312, sum loss: 5269.856445, avg loss: 3.060312, ppl: 21.334204 +epoch: 0, batch: 30313, sum loss: 5520.592773, avg loss: 3.163663, ppl: 23.657103 +epoch: 0, batch: 30314, sum loss: 3877.796631, avg loss: 2.698536, ppl: 14.857969 +epoch: 0, batch: 30315, sum loss: 3914.608643, avg loss: 2.620220, ppl: 13.738745 +epoch: 0, batch: 30316, sum loss: 5305.369141, avg loss: 2.838614, ppl: 17.092054 +epoch: 0, batch: 30317, sum loss: 5084.294922, avg loss: 3.049967, ppl: 21.114643 +epoch: 0, batch: 30318, sum loss: 5167.964844, avg loss: 3.170531, ppl: 23.820120 +epoch: 0, batch: 30319, sum loss: 4490.824219, avg loss: 2.970122, ppl: 19.494299 +epoch: 0, batch: 30320, sum loss: 4423.747070, avg loss: 2.702350, ppl: 14.914742 +epoch: 0, batch: 30321, sum loss: 5050.890625, avg loss: 3.089230, ppl: 21.960154 +epoch: 0, batch: 30322, sum loss: 6103.974609, avg loss: 3.003925, ppl: 20.164520 +epoch: 0, batch: 30323, sum loss: 5249.955566, avg loss: 3.138049, ppl: 23.058832 +epoch: 0, batch: 30324, sum loss: 5615.970215, avg loss: 3.207293, ppl: 24.712107 +epoch: 0, batch: 30325, sum loss: 5606.456055, avg loss: 2.946115, ppl: 19.031862 +epoch: 0, batch: 30326, sum loss: 5565.903320, avg loss: 3.064925, ppl: 21.432848 +epoch: 0, batch: 30327, sum loss: 4926.559570, avg loss: 2.951803, ppl: 19.140442 +epoch: 0, batch: 30328, sum loss: 4827.812012, avg loss: 3.084864, ppl: 21.864491 +epoch: 0, batch: 30329, sum loss: 5181.356445, avg loss: 3.406546, ppl: 30.160892 +epoch: 0, batch: 30330, sum loss: 5331.270996, avg loss: 2.914856, ppl: 18.446150 +epoch: 0, batch: 30331, sum loss: 3902.633789, avg loss: 2.781635, ppl: 16.145397 +epoch: 0, batch: 30332, sum loss: 4315.929688, avg loss: 2.717840, ppl: 15.147568 +epoch: 0, batch: 30333, sum loss: 5291.317871, avg loss: 3.069210, ppl: 21.524893 +epoch: 0, batch: 30334, sum loss: 4432.047363, avg loss: 2.676357, ppl: 14.532057 +epoch: 0, batch: 30335, sum loss: 5786.248047, avg loss: 3.056655, ppl: 21.256334 +epoch: 0, batch: 30336, sum loss: 6210.001953, avg loss: 3.150686, ppl: 23.352074 +epoch: 0, batch: 30337, sum loss: 5613.286133, avg loss: 3.064021, ppl: 21.413485 +epoch: 0, batch: 30338, sum loss: 4177.834473, avg loss: 2.639188, ppl: 14.001827 +epoch: 0, batch: 30339, sum loss: 5101.996094, avg loss: 2.851870, ppl: 17.320145 +epoch: 0, batch: 30340, sum loss: 4894.464844, avg loss: 2.934331, ppl: 18.808924 +epoch: 0, batch: 30341, sum loss: 4122.857910, avg loss: 2.857143, ppl: 17.411718 +epoch: 0, batch: 30342, sum loss: 4852.779785, avg loss: 3.040589, ppl: 20.917557 +epoch: 0, batch: 30343, sum loss: 4431.997070, avg loss: 2.846498, ppl: 17.227343 +epoch: 0, batch: 30344, sum loss: 4287.717285, avg loss: 2.864207, ppl: 17.535135 +epoch: 0, batch: 30345, sum loss: 4480.311523, avg loss: 2.707137, ppl: 14.986306 +epoch: 0, batch: 30346, sum loss: 5221.893555, avg loss: 3.157130, ppl: 23.503057 +epoch: 0, batch: 30347, sum loss: 4548.270996, avg loss: 2.968845, ppl: 19.469427 +epoch: 0, batch: 30348, sum loss: 4867.700684, avg loss: 3.004754, ppl: 20.181242 +epoch: 0, batch: 30349, sum loss: 5143.117676, avg loss: 2.997155, ppl: 20.028473 +epoch: 0, batch: 30350, sum loss: 5117.457520, avg loss: 3.026291, ppl: 20.620607 +epoch: 0, batch: 30351, sum loss: 4208.500000, avg loss: 2.809412, ppl: 16.600163 +epoch: 0, batch: 30352, sum loss: 5131.921387, avg loss: 2.975027, ppl: 19.590155 +epoch: 0, batch: 30353, sum loss: 4710.018555, avg loss: 3.017308, ppl: 20.436213 +epoch: 0, batch: 30354, sum loss: 4781.108398, avg loss: 2.917089, ppl: 18.487383 +epoch: 0, batch: 30355, sum loss: 5088.638184, avg loss: 3.100937, ppl: 22.218763 +epoch: 0, batch: 30356, sum loss: 4427.581055, avg loss: 2.888181, ppl: 17.960606 +epoch: 0, batch: 30357, sum loss: 5362.155273, avg loss: 3.366074, ppl: 28.964577 +epoch: 0, batch: 30358, sum loss: 4718.212402, avg loss: 2.999499, ppl: 20.075483 +epoch: 0, batch: 30359, sum loss: 5513.048828, avg loss: 3.210861, ppl: 24.800436 +epoch: 0, batch: 30360, sum loss: 5446.382812, avg loss: 3.146379, ppl: 23.251722 +epoch: 0, batch: 30361, sum loss: 4283.847168, avg loss: 2.861621, ppl: 17.489857 +epoch: 0, batch: 30362, sum loss: 4890.861328, avg loss: 2.969558, ppl: 19.483297 +epoch: 0, batch: 30363, sum loss: 4298.147461, avg loss: 2.694763, ppl: 14.802016 +epoch: 0, batch: 30364, sum loss: 4114.597656, avg loss: 2.924376, ppl: 18.622610 +epoch: 0, batch: 30365, sum loss: 5574.281250, avg loss: 3.088244, ppl: 21.938530 +epoch: 0, batch: 30366, sum loss: 6807.607910, avg loss: 3.453885, ppl: 31.623013 +epoch: 0, batch: 30367, sum loss: 3951.295654, avg loss: 2.697130, ppl: 14.837091 +epoch: 0, batch: 30368, sum loss: 6078.592285, avg loss: 3.045387, ppl: 21.018164 +epoch: 0, batch: 30369, sum loss: 4564.189941, avg loss: 2.961836, ppl: 19.333443 +epoch: 0, batch: 30370, sum loss: 6252.631348, avg loss: 3.099966, ppl: 22.197197 +epoch: 0, batch: 30371, sum loss: 3731.081299, avg loss: 2.944815, ppl: 19.007153 +epoch: 0, batch: 30372, sum loss: 5815.637695, avg loss: 3.086857, ppl: 21.908104 +epoch: 0, batch: 30373, sum loss: 5062.594727, avg loss: 2.958852, ppl: 19.275822 +epoch: 0, batch: 30374, sum loss: 4846.950195, avg loss: 2.829510, ppl: 16.937155 +epoch: 0, batch: 30375, sum loss: 5082.510742, avg loss: 2.842567, ppl: 17.159765 +epoch: 0, batch: 30376, sum loss: 4715.481934, avg loss: 2.732029, ppl: 15.364028 +epoch: 0, batch: 30377, sum loss: 5333.277832, avg loss: 3.035445, ppl: 20.810246 +epoch: 0, batch: 30378, sum loss: 4868.086914, avg loss: 2.642827, ppl: 14.052872 +epoch: 0, batch: 30379, sum loss: 5056.469727, avg loss: 3.027826, ppl: 20.652292 +epoch: 0, batch: 30380, sum loss: 5569.878418, avg loss: 3.102996, ppl: 22.264565 +epoch: 0, batch: 30381, sum loss: 5847.697266, avg loss: 3.452005, ppl: 31.563625 +epoch: 0, batch: 30382, sum loss: 4616.922363, avg loss: 2.927662, ppl: 18.683889 +epoch: 0, batch: 30383, sum loss: 5116.009766, avg loss: 2.947010, ppl: 19.048918 +epoch: 0, batch: 30384, sum loss: 5164.998047, avg loss: 2.836353, ppl: 17.053452 +epoch: 0, batch: 30385, sum loss: 4552.052734, avg loss: 2.984953, ppl: 19.785566 +epoch: 0, batch: 30386, sum loss: 5124.593262, avg loss: 3.043108, ppl: 20.970312 +epoch: 0, batch: 30387, sum loss: 5097.958496, avg loss: 3.012978, ppl: 20.347902 +epoch: 0, batch: 30388, sum loss: 4106.114746, avg loss: 2.816265, ppl: 16.714312 +epoch: 0, batch: 30389, sum loss: 3814.058594, avg loss: 2.753833, ppl: 15.702706 +epoch: 0, batch: 30390, sum loss: 4152.320312, avg loss: 2.993742, ppl: 19.960234 +epoch: 0, batch: 30391, sum loss: 4483.135254, avg loss: 2.805466, ppl: 16.534786 +epoch: 0, batch: 30392, sum loss: 4801.818359, avg loss: 3.207627, ppl: 24.720362 +epoch: 0, batch: 30393, sum loss: 6193.962891, avg loss: 3.277229, ppl: 26.502235 +epoch: 0, batch: 30394, sum loss: 4875.140625, avg loss: 3.035579, ppl: 20.813034 +epoch: 0, batch: 30395, sum loss: 4705.610840, avg loss: 2.812678, ppl: 16.654465 +epoch: 0, batch: 30396, sum loss: 5003.209961, avg loss: 2.908843, ppl: 18.335573 +epoch: 0, batch: 30397, sum loss: 4692.931641, avg loss: 2.998678, ppl: 20.059006 +epoch: 0, batch: 30398, sum loss: 5014.253906, avg loss: 2.781061, ppl: 16.136139 +epoch: 0, batch: 30399, sum loss: 5970.608887, avg loss: 3.155713, ppl: 23.469761 +epoch: 0, batch: 30400, sum loss: 4666.490234, avg loss: 3.062001, ppl: 21.370287 +epoch: 0, batch: 30401, sum loss: 5668.634766, avg loss: 3.193597, ppl: 24.375946 +epoch: 0, batch: 30402, sum loss: 5545.610352, avg loss: 3.529988, ppl: 34.123543 +epoch: 0, batch: 30403, sum loss: 4053.429932, avg loss: 2.751820, ppl: 15.671122 +epoch: 0, batch: 30404, sum loss: 5744.268555, avg loss: 3.123583, ppl: 22.727663 +epoch: 0, batch: 30405, sum loss: 5146.799316, avg loss: 2.961334, ppl: 19.323742 +epoch: 0, batch: 30406, sum loss: 5215.899902, avg loss: 2.920437, ppl: 18.549389 +epoch: 0, batch: 30407, sum loss: 4806.348633, avg loss: 2.768634, ppl: 15.936851 +epoch: 0, batch: 30408, sum loss: 4768.766602, avg loss: 3.237452, ppl: 25.468746 +epoch: 0, batch: 30409, sum loss: 4724.555664, avg loss: 2.907419, ppl: 18.309475 +epoch: 0, batch: 30410, sum loss: 4508.966309, avg loss: 2.778168, ppl: 16.089514 +epoch: 0, batch: 30411, sum loss: 6562.395020, avg loss: 3.317692, ppl: 27.596579 +epoch: 0, batch: 30412, sum loss: 4338.265625, avg loss: 2.929281, ppl: 18.714178 +epoch: 0, batch: 30413, sum loss: 4814.849121, avg loss: 3.001776, ppl: 20.121250 +epoch: 0, batch: 30414, sum loss: 4711.697754, avg loss: 2.965197, ppl: 19.398521 +epoch: 0, batch: 30415, sum loss: 4899.970215, avg loss: 2.749703, ppl: 15.637985 +epoch: 0, batch: 30416, sum loss: 5205.395020, avg loss: 2.977915, ppl: 19.646811 +epoch: 0, batch: 30417, sum loss: 5330.821289, avg loss: 2.938711, ppl: 18.891478 +epoch: 0, batch: 30418, sum loss: 4835.116211, avg loss: 2.890087, ppl: 17.994883 +epoch: 0, batch: 30419, sum loss: 4796.609375, avg loss: 2.785487, ppl: 16.207716 +epoch: 0, batch: 30420, sum loss: 5992.245605, avg loss: 3.177225, ppl: 23.980114 +epoch: 0, batch: 30421, sum loss: 4386.529297, avg loss: 2.967882, ppl: 19.450678 +epoch: 0, batch: 30422, sum loss: 4820.458008, avg loss: 2.917953, ppl: 18.503372 +epoch: 0, batch: 30423, sum loss: 5375.377930, avg loss: 3.114356, ppl: 22.518913 +epoch: 0, batch: 30424, sum loss: 5000.398438, avg loss: 2.897102, ppl: 18.121555 +epoch: 0, batch: 30425, sum loss: 5486.645020, avg loss: 3.080654, ppl: 21.772640 +epoch: 0, batch: 30426, sum loss: 6049.198242, avg loss: 3.000595, ppl: 20.097485 +epoch: 0, batch: 30427, sum loss: 5309.111816, avg loss: 3.239238, ppl: 25.514278 +epoch: 0, batch: 30428, sum loss: 5696.522949, avg loss: 3.049531, ppl: 21.105433 +epoch: 0, batch: 30429, sum loss: 5130.637695, avg loss: 3.120826, ppl: 22.665089 +epoch: 0, batch: 30430, sum loss: 5956.756348, avg loss: 3.232098, ppl: 25.332747 +epoch: 0, batch: 30431, sum loss: 4803.788574, avg loss: 2.857697, ppl: 17.421356 +epoch: 0, batch: 30432, sum loss: 3987.285156, avg loss: 2.677828, ppl: 14.553443 +epoch: 0, batch: 30433, sum loss: 4172.579590, avg loss: 2.737913, ppl: 15.454700 +epoch: 0, batch: 30434, sum loss: 4661.104004, avg loss: 3.109476, ppl: 22.409290 +epoch: 0, batch: 30435, sum loss: 5530.491211, avg loss: 2.918465, ppl: 18.512846 +epoch: 0, batch: 30436, sum loss: 4865.247070, avg loss: 2.999536, ppl: 20.076216 +epoch: 0, batch: 30437, sum loss: 4503.673828, avg loss: 2.941655, ppl: 18.947182 +epoch: 0, batch: 30438, sum loss: 4820.239746, avg loss: 3.052717, ppl: 21.172787 +epoch: 0, batch: 30439, sum loss: 4179.668945, avg loss: 2.564214, ppl: 12.990444 +epoch: 0, batch: 30440, sum loss: 5333.968262, avg loss: 3.152463, ppl: 23.393621 +epoch: 0, batch: 30441, sum loss: 5784.461914, avg loss: 3.123359, ppl: 22.722588 +epoch: 0, batch: 30442, sum loss: 5973.837891, avg loss: 2.976501, ppl: 19.619053 +epoch: 0, batch: 30443, sum loss: 5503.973145, avg loss: 2.930763, ppl: 18.741926 +epoch: 0, batch: 30444, sum loss: 3842.943604, avg loss: 2.579157, ppl: 13.186016 +epoch: 0, batch: 30445, sum loss: 5505.821777, avg loss: 2.958529, ppl: 19.269600 +epoch: 0, batch: 30446, sum loss: 5975.562988, avg loss: 3.086551, ppl: 21.901409 +epoch: 0, batch: 30447, sum loss: 5394.378906, avg loss: 3.340173, ppl: 28.224003 +epoch: 0, batch: 30448, sum loss: 4669.266602, avg loss: 2.925606, ppl: 18.645514 +epoch: 0, batch: 30449, sum loss: 5303.419922, avg loss: 2.971104, ppl: 19.513445 +epoch: 0, batch: 30450, sum loss: 5605.741699, avg loss: 3.293620, ppl: 26.940214 +epoch: 0, batch: 30451, sum loss: 5431.151855, avg loss: 2.860006, ppl: 17.461637 +epoch: 0, batch: 30452, sum loss: 5327.311035, avg loss: 3.070496, ppl: 21.552597 +epoch: 0, batch: 30453, sum loss: 4951.379883, avg loss: 2.892161, ppl: 18.032238 +epoch: 0, batch: 30454, sum loss: 5666.113770, avg loss: 3.203004, ppl: 24.606335 +epoch: 0, batch: 30455, sum loss: 3864.571533, avg loss: 2.727291, ppl: 15.291409 +epoch: 0, batch: 30456, sum loss: 4845.076172, avg loss: 2.952514, ppl: 19.154055 +epoch: 0, batch: 30457, sum loss: 5759.254395, avg loss: 3.228282, ppl: 25.236258 +epoch: 0, batch: 30458, sum loss: 5265.925293, avg loss: 3.040373, ppl: 20.913034 +epoch: 0, batch: 30459, sum loss: 4128.959961, avg loss: 2.863357, ppl: 17.520237 +epoch: 0, batch: 30460, sum loss: 5052.570312, avg loss: 3.107362, ppl: 22.361971 +epoch: 0, batch: 30461, sum loss: 4112.226074, avg loss: 2.824331, ppl: 16.849674 +epoch: 0, batch: 30462, sum loss: 4923.498047, avg loss: 2.998477, ppl: 20.054960 +epoch: 0, batch: 30463, sum loss: 4129.285156, avg loss: 2.962184, ppl: 19.340174 +epoch: 0, batch: 30464, sum loss: 4942.771484, avg loss: 3.045454, ppl: 21.019567 +epoch: 0, batch: 30465, sum loss: 6720.572754, avg loss: 3.183597, ppl: 24.133396 +epoch: 0, batch: 30466, sum loss: 6005.949219, avg loss: 3.167695, ppl: 23.752672 +epoch: 0, batch: 30467, sum loss: 4882.192383, avg loss: 2.695855, ppl: 14.818178 +epoch: 0, batch: 30468, sum loss: 5782.629395, avg loss: 3.228715, ppl: 25.247204 +epoch: 0, batch: 30469, sum loss: 5039.437500, avg loss: 2.914654, ppl: 18.442440 +epoch: 0, batch: 30470, sum loss: 4516.495117, avg loss: 2.854927, ppl: 17.373175 +epoch: 0, batch: 30471, sum loss: 4796.077637, avg loss: 2.958715, ppl: 19.273193 +epoch: 0, batch: 30472, sum loss: 4525.640625, avg loss: 2.809212, ppl: 16.596838 +epoch: 0, batch: 30473, sum loss: 4413.347168, avg loss: 2.948128, ppl: 19.070211 +epoch: 0, batch: 30474, sum loss: 4713.188477, avg loss: 2.856478, ppl: 17.400135 +epoch: 0, batch: 30475, sum loss: 6098.032227, avg loss: 3.194360, ppl: 24.394545 +epoch: 0, batch: 30476, sum loss: 4453.874023, avg loss: 2.811789, ppl: 16.639660 +epoch: 0, batch: 30477, sum loss: 5914.563965, avg loss: 3.080502, ppl: 21.769333 +epoch: 0, batch: 30478, sum loss: 5492.651855, avg loss: 3.103193, ppl: 22.268950 +epoch: 0, batch: 30479, sum loss: 5241.381836, avg loss: 3.155558, ppl: 23.466129 +epoch: 0, batch: 30480, sum loss: 5506.455078, avg loss: 3.297279, ppl: 27.038956 +epoch: 0, batch: 30481, sum loss: 5601.429688, avg loss: 3.294959, ppl: 26.976303 +epoch: 0, batch: 30482, sum loss: 6215.583008, avg loss: 3.123409, ppl: 22.723703 +epoch: 0, batch: 30483, sum loss: 5486.947266, avg loss: 3.053393, ppl: 21.187113 +epoch: 0, batch: 30484, sum loss: 5275.773438, avg loss: 3.039040, ppl: 20.885185 +epoch: 0, batch: 30485, sum loss: 4970.575684, avg loss: 2.906769, ppl: 18.297588 +epoch: 0, batch: 30486, sum loss: 4325.674805, avg loss: 2.640827, ppl: 14.024796 +epoch: 0, batch: 30487, sum loss: 4348.202148, avg loss: 2.682420, ppl: 14.620425 +epoch: 0, batch: 30488, sum loss: 5463.244629, avg loss: 3.030086, ppl: 20.699009 +epoch: 0, batch: 30489, sum loss: 5877.265625, avg loss: 3.136214, ppl: 23.016567 +epoch: 0, batch: 30490, sum loss: 4756.883301, avg loss: 2.891722, ppl: 18.024324 +epoch: 0, batch: 30491, sum loss: 4495.016113, avg loss: 2.814663, ppl: 16.687546 +epoch: 0, batch: 30492, sum loss: 4522.461426, avg loss: 2.796822, ppl: 16.392473 +epoch: 0, batch: 30493, sum loss: 4440.640625, avg loss: 2.732702, ppl: 15.374369 +epoch: 0, batch: 30494, sum loss: 5378.257812, avg loss: 3.016409, ppl: 20.417847 +epoch: 0, batch: 30495, sum loss: 5482.515625, avg loss: 3.152683, ppl: 23.398760 +epoch: 0, batch: 30496, sum loss: 4535.049316, avg loss: 2.673968, ppl: 14.497378 +epoch: 0, batch: 30497, sum loss: 5821.949219, avg loss: 2.912431, ppl: 18.401478 +epoch: 0, batch: 30498, sum loss: 5159.878906, avg loss: 2.805807, ppl: 16.540421 +epoch: 0, batch: 30499, sum loss: 5051.485840, avg loss: 2.938619, ppl: 18.889740 +epoch: 0, batch: 30500, sum loss: 5884.641602, avg loss: 3.381978, ppl: 29.428925 +epoch: 0, batch: 30501, sum loss: 5883.204102, avg loss: 3.070566, ppl: 21.554098 +epoch: 0, batch: 30502, sum loss: 4179.374512, avg loss: 2.868479, ppl: 17.610218 +epoch: 0, batch: 30503, sum loss: 5167.996094, avg loss: 3.057986, ppl: 21.284641 +epoch: 0, batch: 30504, sum loss: 4013.179199, avg loss: 2.846226, ppl: 17.222668 +epoch: 0, batch: 30505, sum loss: 5629.509277, avg loss: 2.950476, ppl: 19.115059 +epoch: 0, batch: 30506, sum loss: 4632.998047, avg loss: 2.639885, ppl: 14.011592 +epoch: 0, batch: 30507, sum loss: 5629.862305, avg loss: 3.321453, ppl: 27.700560 +epoch: 0, batch: 30508, sum loss: 5224.557617, avg loss: 2.928564, ppl: 18.700754 +epoch: 0, batch: 30509, sum loss: 6255.502441, avg loss: 3.092191, ppl: 22.025276 +epoch: 0, batch: 30510, sum loss: 5044.538574, avg loss: 3.196792, ppl: 24.453964 +epoch: 0, batch: 30511, sum loss: 4460.563965, avg loss: 2.779167, ppl: 16.105606 +epoch: 0, batch: 30512, sum loss: 4637.892578, avg loss: 2.809142, ppl: 16.595667 +epoch: 0, batch: 30513, sum loss: 4590.973633, avg loss: 2.969582, ppl: 19.483770 +epoch: 0, batch: 30514, sum loss: 4813.163574, avg loss: 2.849712, ppl: 17.282806 +epoch: 0, batch: 30515, sum loss: 4251.023438, avg loss: 2.617625, ppl: 13.703143 +epoch: 0, batch: 30516, sum loss: 5439.618652, avg loss: 2.904228, ppl: 18.251148 +epoch: 0, batch: 30517, sum loss: 5313.261719, avg loss: 3.020615, ppl: 20.503901 +epoch: 0, batch: 30518, sum loss: 4486.753906, avg loss: 2.766186, ppl: 15.897883 +epoch: 0, batch: 30519, sum loss: 4753.147461, avg loss: 2.939485, ppl: 18.906113 +epoch: 0, batch: 30520, sum loss: 4307.247070, avg loss: 2.813355, ppl: 16.665745 +epoch: 0, batch: 30521, sum loss: 6762.599609, avg loss: 3.268535, ppl: 26.272831 +epoch: 0, batch: 30522, sum loss: 5239.147461, avg loss: 3.216174, ppl: 24.932543 +epoch: 0, batch: 30523, sum loss: 4804.758789, avg loss: 2.868513, ppl: 17.610809 +epoch: 0, batch: 30524, sum loss: 5955.280273, avg loss: 3.154280, ppl: 23.436150 +epoch: 0, batch: 30525, sum loss: 5191.879883, avg loss: 3.154241, ppl: 23.435234 +epoch: 0, batch: 30526, sum loss: 4966.051758, avg loss: 2.986201, ppl: 19.810276 +epoch: 0, batch: 30527, sum loss: 4661.525879, avg loss: 2.724446, ppl: 15.247958 +epoch: 0, batch: 30528, sum loss: 4804.631348, avg loss: 3.136182, ppl: 23.015831 +epoch: 0, batch: 30529, sum loss: 5283.153320, avg loss: 2.954784, ppl: 19.197569 +epoch: 0, batch: 30530, sum loss: 5293.641113, avg loss: 3.009461, ppl: 20.276463 +epoch: 0, batch: 30531, sum loss: 5302.526367, avg loss: 3.203943, ppl: 24.629465 +epoch: 0, batch: 30532, sum loss: 4834.504395, avg loss: 2.759420, ppl: 15.790688 +epoch: 0, batch: 30533, sum loss: 4671.671875, avg loss: 3.067414, ppl: 21.486273 +epoch: 0, batch: 30534, sum loss: 5286.498047, avg loss: 2.807487, ppl: 16.568230 +epoch: 0, batch: 30535, sum loss: 5069.943848, avg loss: 3.017824, ppl: 20.446751 +epoch: 0, batch: 30536, sum loss: 4275.437012, avg loss: 2.770860, ppl: 15.972367 +epoch: 0, batch: 30537, sum loss: 4897.998047, avg loss: 2.977506, ppl: 19.638779 +epoch: 0, batch: 30538, sum loss: 5285.709961, avg loss: 2.966167, ppl: 19.417358 +epoch: 0, batch: 30539, sum loss: 4443.311035, avg loss: 2.870356, ppl: 17.643299 +epoch: 0, batch: 30540, sum loss: 5543.606934, avg loss: 3.042594, ppl: 20.959551 +epoch: 0, batch: 30541, sum loss: 4955.445801, avg loss: 2.782395, ppl: 16.157679 +epoch: 0, batch: 30542, sum loss: 5556.230469, avg loss: 3.155156, ppl: 23.456705 +epoch: 0, batch: 30543, sum loss: 5385.558594, avg loss: 3.025595, ppl: 20.606256 +epoch: 0, batch: 30544, sum loss: 5093.216797, avg loss: 3.205297, ppl: 24.662823 +epoch: 0, batch: 30545, sum loss: 4547.761230, avg loss: 2.822943, ppl: 16.826302 +epoch: 0, batch: 30546, sum loss: 4580.402344, avg loss: 2.794632, ppl: 16.356611 +epoch: 0, batch: 30547, sum loss: 5106.216797, avg loss: 3.089060, ppl: 21.956436 +epoch: 0, batch: 30548, sum loss: 5620.187500, avg loss: 3.229993, ppl: 25.279476 +epoch: 0, batch: 30549, sum loss: 4800.123535, avg loss: 2.953922, ppl: 19.181036 +epoch: 0, batch: 30550, sum loss: 5135.858398, avg loss: 3.069850, ppl: 21.538666 +epoch: 0, batch: 30551, sum loss: 5878.541016, avg loss: 3.235301, ppl: 25.414009 +epoch: 0, batch: 30552, sum loss: 5355.417969, avg loss: 2.887018, ppl: 17.939728 +epoch: 0, batch: 30553, sum loss: 5240.667969, avg loss: 2.905027, ppl: 18.265732 +epoch: 0, batch: 30554, sum loss: 5111.340820, avg loss: 3.068032, ppl: 21.499540 +epoch: 0, batch: 30555, sum loss: 6168.755371, avg loss: 3.226336, ppl: 25.187214 +epoch: 0, batch: 30556, sum loss: 4339.861816, avg loss: 2.751973, ppl: 15.673532 +epoch: 0, batch: 30557, sum loss: 5687.533203, avg loss: 3.051252, ppl: 21.141790 +epoch: 0, batch: 30558, sum loss: 4607.009766, avg loss: 2.928805, ppl: 18.705257 +epoch: 0, batch: 30559, sum loss: 4617.871094, avg loss: 2.834789, ppl: 17.026804 +epoch: 0, batch: 30560, sum loss: 4477.839355, avg loss: 3.187074, ppl: 24.217468 +epoch: 0, batch: 30561, sum loss: 5321.100586, avg loss: 3.188197, ppl: 24.244673 +epoch: 0, batch: 30562, sum loss: 5434.130859, avg loss: 3.317540, ppl: 27.592388 +epoch: 0, batch: 30563, sum loss: 5015.293945, avg loss: 3.225270, ppl: 25.160355 +epoch: 0, batch: 30564, sum loss: 5669.501953, avg loss: 3.313561, ppl: 27.482811 +epoch: 0, batch: 30565, sum loss: 5398.428711, avg loss: 3.222943, ppl: 25.101883 +epoch: 0, batch: 30566, sum loss: 5157.652832, avg loss: 3.093973, ppl: 22.064566 +epoch: 0, batch: 30567, sum loss: 6221.070312, avg loss: 3.552867, ppl: 34.913277 +epoch: 0, batch: 30568, sum loss: 5010.353516, avg loss: 2.971740, ppl: 19.525866 +epoch: 0, batch: 30569, sum loss: 4721.736328, avg loss: 2.795581, ppl: 16.372139 +epoch: 0, batch: 30570, sum loss: 5651.199219, avg loss: 2.986892, ppl: 19.823969 +epoch: 0, batch: 30571, sum loss: 4136.678711, avg loss: 3.021679, ppl: 20.525728 +epoch: 0, batch: 30572, sum loss: 4795.872559, avg loss: 3.029610, ppl: 20.689161 +epoch: 0, batch: 30573, sum loss: 5547.684082, avg loss: 3.071807, ppl: 21.580872 +epoch: 0, batch: 30574, sum loss: 5912.201172, avg loss: 3.027241, ppl: 20.640203 +epoch: 0, batch: 30575, sum loss: 4984.733887, avg loss: 2.801986, ppl: 16.477346 +epoch: 0, batch: 30576, sum loss: 6232.354004, avg loss: 3.325696, ppl: 27.818346 +epoch: 0, batch: 30577, sum loss: 5775.817383, avg loss: 3.248491, ppl: 25.751459 +epoch: 0, batch: 30578, sum loss: 5880.838379, avg loss: 3.064533, ppl: 21.424450 +epoch: 0, batch: 30579, sum loss: 6891.726562, avg loss: 3.266221, ppl: 26.212097 +epoch: 0, batch: 30580, sum loss: 5255.475586, avg loss: 2.942596, ppl: 18.965010 +epoch: 0, batch: 30581, sum loss: 4728.835938, avg loss: 2.953676, ppl: 19.176325 +epoch: 0, batch: 30582, sum loss: 5668.176758, avg loss: 3.097364, ppl: 22.139524 +epoch: 0, batch: 30583, sum loss: 5867.894531, avg loss: 2.947210, ppl: 19.052715 +epoch: 0, batch: 30584, sum loss: 4933.429199, avg loss: 3.130348, ppl: 22.881950 +epoch: 0, batch: 30585, sum loss: 4637.380859, avg loss: 2.991859, ppl: 19.922678 +epoch: 0, batch: 30586, sum loss: 4684.647461, avg loss: 3.100362, ppl: 22.205996 +epoch: 0, batch: 30587, sum loss: 4655.500000, avg loss: 2.796096, ppl: 16.380573 +epoch: 0, batch: 30588, sum loss: 5261.020996, avg loss: 3.023575, ppl: 20.564686 +epoch: 0, batch: 30589, sum loss: 5550.218750, avg loss: 3.019706, ppl: 20.485258 +epoch: 0, batch: 30590, sum loss: 5190.273438, avg loss: 3.109810, ppl: 22.416792 +epoch: 0, batch: 30591, sum loss: 5217.286621, avg loss: 2.868217, ppl: 17.605598 +epoch: 0, batch: 30592, sum loss: 4212.757812, avg loss: 2.859985, ppl: 17.461267 +epoch: 0, batch: 30593, sum loss: 5229.347168, avg loss: 3.019253, ppl: 20.476000 +epoch: 0, batch: 30594, sum loss: 4108.664551, avg loss: 2.816083, ppl: 16.711260 +epoch: 0, batch: 30595, sum loss: 4889.554199, avg loss: 3.134330, ppl: 22.973234 +epoch: 0, batch: 30596, sum loss: 4256.673340, avg loss: 2.751567, ppl: 15.667157 +epoch: 0, batch: 30597, sum loss: 4369.141602, avg loss: 3.029918, ppl: 20.695534 +epoch: 0, batch: 30598, sum loss: 4781.712402, avg loss: 2.979260, ppl: 19.673252 +epoch: 0, batch: 30599, sum loss: 5522.070801, avg loss: 3.139324, ppl: 23.088253 +epoch: 0, batch: 30600, sum loss: 5316.397461, avg loss: 2.839956, ppl: 17.115013 +epoch: 0, batch: 30601, sum loss: 5464.126953, avg loss: 3.052585, ppl: 21.169996 +epoch: 0, batch: 30602, sum loss: 5143.283203, avg loss: 3.105847, ppl: 22.328131 +epoch: 0, batch: 30603, sum loss: 4248.172852, avg loss: 2.884028, ppl: 17.886173 +epoch: 0, batch: 30604, sum loss: 6119.361328, avg loss: 3.052050, ppl: 21.158682 +epoch: 0, batch: 30605, sum loss: 5372.776855, avg loss: 2.974960, ppl: 19.588833 +epoch: 0, batch: 30606, sum loss: 5421.385742, avg loss: 3.047434, ppl: 21.061239 +epoch: 0, batch: 30607, sum loss: 5722.708008, avg loss: 3.238658, ppl: 25.499472 +epoch: 0, batch: 30608, sum loss: 4463.502930, avg loss: 2.864893, ppl: 17.547174 +epoch: 0, batch: 30609, sum loss: 5464.192871, avg loss: 3.303623, ppl: 27.211058 +epoch: 0, batch: 30610, sum loss: 5556.769531, avg loss: 3.197221, ppl: 24.464443 +epoch: 0, batch: 30611, sum loss: 5162.567383, avg loss: 2.938285, ppl: 18.883440 +epoch: 0, batch: 30612, sum loss: 5860.731934, avg loss: 3.122393, ppl: 22.700640 +epoch: 0, batch: 30613, sum loss: 4020.430176, avg loss: 2.793905, ppl: 16.344730 +epoch: 0, batch: 30614, sum loss: 3867.087402, avg loss: 2.820633, ppl: 16.787468 +epoch: 0, batch: 30615, sum loss: 3791.305908, avg loss: 2.717782, ppl: 15.146693 +epoch: 0, batch: 30616, sum loss: 4847.844727, avg loss: 2.839979, ppl: 17.115414 +epoch: 0, batch: 30617, sum loss: 4405.719727, avg loss: 2.913836, ppl: 18.427351 +epoch: 0, batch: 30618, sum loss: 5250.810547, avg loss: 2.976650, ppl: 19.621973 +epoch: 0, batch: 30619, sum loss: 4895.936035, avg loss: 2.952917, ppl: 19.161764 +epoch: 0, batch: 30620, sum loss: 5376.645508, avg loss: 3.095363, ppl: 22.095261 +epoch: 0, batch: 30621, sum loss: 4659.851562, avg loss: 2.990919, ppl: 19.903963 +epoch: 0, batch: 30622, sum loss: 5280.981445, avg loss: 3.196720, ppl: 24.452198 +epoch: 0, batch: 30623, sum loss: 3817.942871, avg loss: 2.640348, ppl: 14.018077 +epoch: 0, batch: 30624, sum loss: 4770.312500, avg loss: 3.121932, ppl: 22.690182 +epoch: 0, batch: 30625, sum loss: 4746.163574, avg loss: 2.946098, ppl: 19.031546 +epoch: 0, batch: 30626, sum loss: 5304.352051, avg loss: 3.208924, ppl: 24.752445 +epoch: 0, batch: 30627, sum loss: 5346.162109, avg loss: 3.020431, ppl: 20.500116 +epoch: 0, batch: 30628, sum loss: 4653.982910, avg loss: 3.113032, ppl: 22.489124 +epoch: 0, batch: 30629, sum loss: 5498.632324, avg loss: 3.097821, ppl: 22.149630 +epoch: 0, batch: 30630, sum loss: 4814.906738, avg loss: 2.981366, ppl: 19.714737 +epoch: 0, batch: 30631, sum loss: 5467.508301, avg loss: 2.960211, ppl: 19.302036 +epoch: 0, batch: 30632, sum loss: 4231.637695, avg loss: 2.938637, ppl: 18.890087 +epoch: 0, batch: 30633, sum loss: 5454.275391, avg loss: 3.100782, ppl: 22.215315 +epoch: 0, batch: 30634, sum loss: 4240.794922, avg loss: 2.533330, ppl: 12.595385 +epoch: 0, batch: 30635, sum loss: 5564.734863, avg loss: 3.292742, ppl: 26.916580 +epoch: 0, batch: 30636, sum loss: 4428.072266, avg loss: 2.880984, ppl: 17.831812 +epoch: 0, batch: 30637, sum loss: 5662.882812, avg loss: 3.133859, ppl: 22.962414 +epoch: 0, batch: 30638, sum loss: 4353.462891, avg loss: 2.973677, ppl: 19.563717 +epoch: 0, batch: 30639, sum loss: 5938.360352, avg loss: 3.086466, ppl: 21.899544 +epoch: 0, batch: 30640, sum loss: 4735.757812, avg loss: 2.737432, ppl: 15.447269 +epoch: 0, batch: 30641, sum loss: 4770.562988, avg loss: 2.861765, ppl: 17.492380 +epoch: 0, batch: 30642, sum loss: 5461.027832, avg loss: 3.028856, ppl: 20.673574 +epoch: 0, batch: 30643, sum loss: 4184.838867, avg loss: 2.797352, ppl: 16.401165 +epoch: 0, batch: 30644, sum loss: 4948.161621, avg loss: 2.908972, ppl: 18.337938 +epoch: 0, batch: 30645, sum loss: 4358.137207, avg loss: 2.815334, ppl: 16.698753 +epoch: 0, batch: 30646, sum loss: 4347.664062, avg loss: 2.884979, ppl: 17.903200 +epoch: 0, batch: 30647, sum loss: 4795.881836, avg loss: 2.910122, ppl: 18.359045 +epoch: 0, batch: 30648, sum loss: 4461.931152, avg loss: 2.785225, ppl: 16.203470 +epoch: 0, batch: 30649, sum loss: 5640.681152, avg loss: 3.133712, ppl: 22.959042 +epoch: 0, batch: 30650, sum loss: 6186.929199, avg loss: 3.355168, ppl: 28.650406 +epoch: 0, batch: 30651, sum loss: 4809.993164, avg loss: 3.053964, ppl: 21.199209 +epoch: 0, batch: 30652, sum loss: 5248.960938, avg loss: 3.065982, ppl: 21.455519 +epoch: 0, batch: 30653, sum loss: 4563.113770, avg loss: 2.828961, ppl: 16.927862 +epoch: 0, batch: 30654, sum loss: 4940.057617, avg loss: 2.878822, ppl: 17.793293 +epoch: 0, batch: 30655, sum loss: 5655.074707, avg loss: 2.995273, ppl: 19.990808 +epoch: 0, batch: 30656, sum loss: 3970.704590, avg loss: 2.586778, ppl: 13.286895 +epoch: 0, batch: 30657, sum loss: 4304.730469, avg loss: 2.761213, ppl: 15.819017 +epoch: 0, batch: 30658, sum loss: 5753.382324, avg loss: 3.285770, ppl: 26.729549 +epoch: 0, batch: 30659, sum loss: 4984.131836, avg loss: 3.089976, ppl: 21.976557 +epoch: 0, batch: 30660, sum loss: 5025.585449, avg loss: 3.299793, ppl: 27.107040 +epoch: 0, batch: 30661, sum loss: 4053.922363, avg loss: 2.728077, ppl: 15.303430 +epoch: 0, batch: 30662, sum loss: 4934.507812, avg loss: 2.827798, ppl: 16.908190 +epoch: 0, batch: 30663, sum loss: 5963.724121, avg loss: 3.102874, ppl: 22.261847 +epoch: 0, batch: 30664, sum loss: 5652.415527, avg loss: 2.882415, ppl: 17.857347 +epoch: 0, batch: 30665, sum loss: 5013.039062, avg loss: 3.195054, ppl: 24.411482 +epoch: 0, batch: 30666, sum loss: 4842.785156, avg loss: 2.908580, ppl: 18.330748 +epoch: 0, batch: 30667, sum loss: 4395.086426, avg loss: 2.696372, ppl: 14.825847 +epoch: 0, batch: 30668, sum loss: 5551.863281, avg loss: 3.203614, ppl: 24.621351 +epoch: 0, batch: 30669, sum loss: 5484.072266, avg loss: 3.063727, ppl: 21.407202 +epoch: 0, batch: 30670, sum loss: 5811.535156, avg loss: 2.927726, ppl: 18.685083 +epoch: 0, batch: 30671, sum loss: 5462.665039, avg loss: 2.824542, ppl: 16.853230 +epoch: 0, batch: 30672, sum loss: 4811.489258, avg loss: 2.781208, ppl: 16.138498 +epoch: 0, batch: 30673, sum loss: 4723.478516, avg loss: 2.873162, ppl: 17.692875 +epoch: 0, batch: 30674, sum loss: 4631.089844, avg loss: 2.925515, ppl: 18.643820 +epoch: 0, batch: 30675, sum loss: 4462.266113, avg loss: 2.801172, ppl: 16.463924 +epoch: 0, batch: 30676, sum loss: 5270.894531, avg loss: 3.037980, ppl: 20.863049 +epoch: 0, batch: 30677, sum loss: 4799.817871, avg loss: 2.672504, ppl: 14.476178 +epoch: 0, batch: 30678, sum loss: 4922.919922, avg loss: 2.994477, ppl: 19.974907 +epoch: 0, batch: 30679, sum loss: 5267.032715, avg loss: 2.982465, ppl: 19.736399 +epoch: 0, batch: 30680, sum loss: 4760.993164, avg loss: 2.854312, ppl: 17.362495 +epoch: 0, batch: 30681, sum loss: 5331.111328, avg loss: 3.141492, ppl: 23.138355 +epoch: 0, batch: 30682, sum loss: 4366.891113, avg loss: 2.656260, ppl: 14.242918 +epoch: 0, batch: 30683, sum loss: 5263.868652, avg loss: 3.217524, ppl: 24.966223 +epoch: 0, batch: 30684, sum loss: 4962.868652, avg loss: 2.924495, ppl: 18.624826 +epoch: 0, batch: 30685, sum loss: 5519.886719, avg loss: 3.009753, ppl: 20.282385 +epoch: 0, batch: 30686, sum loss: 5111.750000, avg loss: 3.077513, ppl: 21.704367 +epoch: 0, batch: 30687, sum loss: 5832.961914, avg loss: 3.240535, ppl: 25.547375 +epoch: 0, batch: 30688, sum loss: 5054.405762, avg loss: 2.996091, ppl: 20.007183 +epoch: 0, batch: 30689, sum loss: 4895.767578, avg loss: 2.900336, ppl: 18.180258 +epoch: 0, batch: 30690, sum loss: 5677.411133, avg loss: 3.083874, ppl: 21.842848 +epoch: 0, batch: 30691, sum loss: 6071.788086, avg loss: 3.054219, ppl: 21.204624 +epoch: 0, batch: 30692, sum loss: 5578.750977, avg loss: 2.976922, ppl: 19.627302 +epoch: 0, batch: 30693, sum loss: 5198.873535, avg loss: 2.999927, ppl: 20.084066 +epoch: 0, batch: 30694, sum loss: 4333.284180, avg loss: 2.639028, ppl: 13.999594 +epoch: 0, batch: 30695, sum loss: 4908.900879, avg loss: 2.734764, ppl: 15.406105 +epoch: 0, batch: 30696, sum loss: 5217.665527, avg loss: 2.991781, ppl: 19.921122 +epoch: 0, batch: 30697, sum loss: 4977.452637, avg loss: 2.996660, ppl: 20.018568 +epoch: 0, batch: 30698, sum loss: 5364.979492, avg loss: 3.130093, ppl: 22.876110 +epoch: 0, batch: 30699, sum loss: 4958.008789, avg loss: 2.664164, ppl: 14.355941 +epoch: 0, batch: 30700, sum loss: 5265.293945, avg loss: 3.160440, ppl: 23.580980 +epoch: 0, batch: 30701, sum loss: 4255.512695, avg loss: 2.712245, ppl: 15.063058 +epoch: 0, batch: 30702, sum loss: 5260.109375, avg loss: 2.922283, ppl: 18.583670 +epoch: 0, batch: 30703, sum loss: 5660.605957, avg loss: 3.120511, ppl: 22.657946 +epoch: 0, batch: 30704, sum loss: 5737.936523, avg loss: 3.229002, ppl: 25.254440 +epoch: 0, batch: 30705, sum loss: 4731.545410, avg loss: 2.955369, ppl: 19.208809 +epoch: 0, batch: 30706, sum loss: 4263.506836, avg loss: 2.966950, ppl: 19.432558 +epoch: 0, batch: 30707, sum loss: 5552.167969, avg loss: 2.962736, ppl: 19.350851 +epoch: 0, batch: 30708, sum loss: 5100.202148, avg loss: 3.072411, ppl: 21.593904 +epoch: 0, batch: 30709, sum loss: 5189.821777, avg loss: 3.193736, ppl: 24.379347 +epoch: 0, batch: 30710, sum loss: 5504.931641, avg loss: 3.044763, ppl: 21.005054 +epoch: 0, batch: 30711, sum loss: 4402.258789, avg loss: 2.517015, ppl: 12.391549 +epoch: 0, batch: 30712, sum loss: 6567.608398, avg loss: 3.527180, ppl: 34.027863 +epoch: 0, batch: 30713, sum loss: 6414.029785, avg loss: 3.292623, ppl: 26.913372 +epoch: 0, batch: 30714, sum loss: 4107.208984, avg loss: 2.693252, ppl: 14.779659 +epoch: 0, batch: 30715, sum loss: 5643.612793, avg loss: 3.095783, ppl: 22.104546 +epoch: 0, batch: 30716, sum loss: 5023.562988, avg loss: 3.195651, ppl: 24.426065 +epoch: 0, batch: 30717, sum loss: 5237.010742, avg loss: 2.962110, ppl: 19.338739 +epoch: 0, batch: 30718, sum loss: 6777.435547, avg loss: 3.282051, ppl: 26.630344 +epoch: 0, batch: 30719, sum loss: 4544.307129, avg loss: 2.849095, ppl: 17.272150 +epoch: 0, batch: 30720, sum loss: 5180.686523, avg loss: 3.111524, ppl: 22.455233 +epoch: 0, batch: 30721, sum loss: 5331.695801, avg loss: 2.998704, ppl: 20.059526 +epoch: 0, batch: 30722, sum loss: 5496.732422, avg loss: 3.188360, ppl: 24.248627 +epoch: 0, batch: 30723, sum loss: 5172.774414, avg loss: 2.820487, ppl: 16.785032 +epoch: 0, batch: 30724, sum loss: 5691.252441, avg loss: 3.280261, ppl: 26.582705 +epoch: 0, batch: 30725, sum loss: 4727.777344, avg loss: 2.929230, ppl: 18.713221 +epoch: 0, batch: 30726, sum loss: 4736.575684, avg loss: 2.839674, ppl: 17.110182 +epoch: 0, batch: 30727, sum loss: 4962.356934, avg loss: 2.964371, ppl: 19.382511 +epoch: 0, batch: 30728, sum loss: 4251.932617, avg loss: 2.799165, ppl: 16.430929 +epoch: 0, batch: 30729, sum loss: 5205.744141, avg loss: 2.947760, ppl: 19.063202 +epoch: 0, batch: 30730, sum loss: 5164.658691, avg loss: 3.160746, ppl: 23.588182 +epoch: 0, batch: 30731, sum loss: 5618.077148, avg loss: 3.136838, ppl: 23.030928 +epoch: 0, batch: 30732, sum loss: 5249.062988, avg loss: 3.262314, ppl: 26.109882 +epoch: 0, batch: 30733, sum loss: 4099.017578, avg loss: 2.832770, ppl: 16.992458 +epoch: 0, batch: 30734, sum loss: 5363.461426, avg loss: 2.991334, ppl: 19.912228 +epoch: 0, batch: 30735, sum loss: 5834.279297, avg loss: 3.163926, ppl: 23.663313 +epoch: 0, batch: 30736, sum loss: 4784.719727, avg loss: 2.979277, ppl: 19.673595 +epoch: 0, batch: 30737, sum loss: 5170.017578, avg loss: 2.814381, ppl: 16.682844 +epoch: 0, batch: 30738, sum loss: 6019.038574, avg loss: 3.217017, ppl: 24.953571 +epoch: 0, batch: 30739, sum loss: 5229.321289, avg loss: 2.889128, ppl: 17.977625 +epoch: 0, batch: 30740, sum loss: 5564.937988, avg loss: 3.079656, ppl: 21.750912 +epoch: 0, batch: 30741, sum loss: 5897.680176, avg loss: 3.063730, ppl: 21.407253 +epoch: 0, batch: 30742, sum loss: 4432.766113, avg loss: 2.870963, ppl: 17.654005 +epoch: 0, batch: 30743, sum loss: 4181.833008, avg loss: 2.825563, ppl: 16.870440 +epoch: 0, batch: 30744, sum loss: 5500.313477, avg loss: 3.098768, ppl: 22.170620 +epoch: 0, batch: 30745, sum loss: 4694.426758, avg loss: 2.993895, ppl: 19.963280 +epoch: 0, batch: 30746, sum loss: 5857.500000, avg loss: 3.164506, ppl: 23.677038 +epoch: 0, batch: 30747, sum loss: 5185.505371, avg loss: 2.953021, ppl: 19.163765 +epoch: 0, batch: 30748, sum loss: 4859.462891, avg loss: 2.863561, ppl: 17.523817 +epoch: 0, batch: 30749, sum loss: 4933.208984, avg loss: 2.961110, ppl: 19.319397 +epoch: 0, batch: 30750, sum loss: 5650.435547, avg loss: 3.241787, ppl: 25.579403 +epoch: 0, batch: 30751, sum loss: 6039.727051, avg loss: 3.408424, ppl: 30.217581 +epoch: 0, batch: 30752, sum loss: 5512.338379, avg loss: 3.000729, ppl: 20.100182 +epoch: 0, batch: 30753, sum loss: 4456.967773, avg loss: 2.899784, ppl: 18.170218 +epoch: 0, batch: 30754, sum loss: 5032.406250, avg loss: 3.029745, ppl: 20.691952 +epoch: 0, batch: 30755, sum loss: 5637.516113, avg loss: 3.011494, ppl: 20.317726 +epoch: 0, batch: 30756, sum loss: 5110.805176, avg loss: 2.812771, ppl: 16.656010 +epoch: 0, batch: 30757, sum loss: 4594.891113, avg loss: 2.861078, ppl: 17.480356 +epoch: 0, batch: 30758, sum loss: 6280.848633, avg loss: 3.382255, ppl: 29.437086 +epoch: 0, batch: 30759, sum loss: 3983.442627, avg loss: 2.963871, ppl: 19.372820 +epoch: 0, batch: 30760, sum loss: 5556.025879, avg loss: 3.110877, ppl: 22.440712 +epoch: 0, batch: 30761, sum loss: 4041.985352, avg loss: 2.958994, ppl: 19.278561 +epoch: 0, batch: 30762, sum loss: 5305.790039, avg loss: 3.054571, ppl: 21.212088 +epoch: 0, batch: 30763, sum loss: 5243.556641, avg loss: 3.156867, ppl: 23.496872 +epoch: 0, batch: 30764, sum loss: 6073.808105, avg loss: 3.014297, ppl: 20.374763 +epoch: 0, batch: 30765, sum loss: 4514.186523, avg loss: 2.995479, ppl: 19.994932 +epoch: 0, batch: 30766, sum loss: 4908.651855, avg loss: 2.894252, ppl: 18.069986 +epoch: 0, batch: 30767, sum loss: 6129.228516, avg loss: 3.360323, ppl: 28.798483 +epoch: 0, batch: 30768, sum loss: 6698.705078, avg loss: 3.301481, ppl: 27.152830 +epoch: 0, batch: 30769, sum loss: 4773.532227, avg loss: 2.941178, ppl: 18.938145 +epoch: 0, batch: 30770, sum loss: 4786.923828, avg loss: 2.839219, ppl: 17.102409 +epoch: 0, batch: 30771, sum loss: 4714.144043, avg loss: 3.025766, ppl: 20.609793 +epoch: 0, batch: 30772, sum loss: 5746.149414, avg loss: 3.176423, ppl: 23.960899 +epoch: 0, batch: 30773, sum loss: 6301.488281, avg loss: 3.341192, ppl: 28.252790 +epoch: 0, batch: 30774, sum loss: 5505.516113, avg loss: 3.082596, ppl: 21.814957 +epoch: 0, batch: 30775, sum loss: 4697.573730, avg loss: 2.952592, ppl: 19.155539 +epoch: 0, batch: 30776, sum loss: 5133.119629, avg loss: 2.987846, ppl: 19.842897 +epoch: 0, batch: 30777, sum loss: 4630.191406, avg loss: 2.833654, ppl: 17.007500 +epoch: 0, batch: 30778, sum loss: 4440.144043, avg loss: 2.810218, ppl: 16.613537 +epoch: 0, batch: 30779, sum loss: 4148.302246, avg loss: 2.835477, ppl: 17.038519 +epoch: 0, batch: 30780, sum loss: 4692.713867, avg loss: 2.796611, ppl: 16.389015 +epoch: 0, batch: 30781, sum loss: 5516.467773, avg loss: 2.846475, ppl: 17.226944 +epoch: 0, batch: 30782, sum loss: 4887.995605, avg loss: 2.966017, ppl: 19.414433 +epoch: 0, batch: 30783, sum loss: 4652.857422, avg loss: 3.023299, ppl: 20.559008 +epoch: 0, batch: 30784, sum loss: 4329.323242, avg loss: 2.707519, ppl: 14.992035 +epoch: 0, batch: 30785, sum loss: 4257.206543, avg loss: 2.741279, ppl: 15.506811 +epoch: 0, batch: 30786, sum loss: 5544.539551, avg loss: 3.080300, ppl: 21.764927 +epoch: 0, batch: 30787, sum loss: 4725.503906, avg loss: 3.098691, ppl: 22.168913 +epoch: 0, batch: 30788, sum loss: 4121.541504, avg loss: 2.605273, ppl: 13.534913 +epoch: 0, batch: 30789, sum loss: 4180.009766, avg loss: 2.900770, ppl: 18.188148 +epoch: 0, batch: 30790, sum loss: 5241.141602, avg loss: 3.176450, ppl: 23.961529 +epoch: 0, batch: 30791, sum loss: 4704.446777, avg loss: 2.800266, ppl: 16.449022 +epoch: 0, batch: 30792, sum loss: 4321.810547, avg loss: 2.659576, ppl: 14.290224 +epoch: 0, batch: 30793, sum loss: 5042.809570, avg loss: 2.896502, ppl: 18.110680 +epoch: 0, batch: 30794, sum loss: 4484.533203, avg loss: 2.799334, ppl: 16.433699 +epoch: 0, batch: 30795, sum loss: 4244.724609, avg loss: 2.625062, ppl: 13.805427 +epoch: 0, batch: 30796, sum loss: 4338.242188, avg loss: 2.699591, ppl: 14.873642 +epoch: 0, batch: 30797, sum loss: 5250.480469, avg loss: 3.019253, ppl: 20.475986 +epoch: 0, batch: 30798, sum loss: 5746.472656, avg loss: 3.188942, ppl: 24.262737 +epoch: 0, batch: 30799, sum loss: 5691.877930, avg loss: 3.311157, ppl: 27.416815 +epoch: 0, batch: 30800, sum loss: 5338.416016, avg loss: 2.931585, ppl: 18.757334 +epoch: 0, batch: 30801, sum loss: 5531.567383, avg loss: 3.214159, ppl: 24.882351 +epoch: 0, batch: 30802, sum loss: 5545.235352, avg loss: 3.111804, ppl: 22.461540 +epoch: 0, batch: 30803, sum loss: 5735.539551, avg loss: 3.275579, ppl: 26.458553 +epoch: 0, batch: 30804, sum loss: 5257.566406, avg loss: 3.096329, ppl: 22.116611 +epoch: 0, batch: 30805, sum loss: 4689.635254, avg loss: 2.866525, ppl: 17.575844 +epoch: 0, batch: 30806, sum loss: 5160.541992, avg loss: 3.095706, ppl: 22.102837 +epoch: 0, batch: 30807, sum loss: 5129.759766, avg loss: 2.987629, ppl: 19.838598 +epoch: 0, batch: 30808, sum loss: 4912.826660, avg loss: 2.777177, ppl: 16.073587 +epoch: 0, batch: 30809, sum loss: 4153.682617, avg loss: 2.800865, ppl: 16.458876 +epoch: 0, batch: 30810, sum loss: 4991.050293, avg loss: 3.088521, ppl: 21.944603 +epoch: 0, batch: 30811, sum loss: 5482.161133, avg loss: 3.336677, ppl: 28.125519 +epoch: 0, batch: 30812, sum loss: 4410.010254, avg loss: 2.810714, ppl: 16.621782 +epoch: 0, batch: 30813, sum loss: 5548.182617, avg loss: 3.168580, ppl: 23.773691 +epoch: 0, batch: 30814, sum loss: 4386.606445, avg loss: 2.794017, ppl: 16.346550 +epoch: 0, batch: 30815, sum loss: 4998.812500, avg loss: 3.046199, ppl: 21.035234 +epoch: 0, batch: 30816, sum loss: 4262.439453, avg loss: 2.802393, ppl: 16.484045 +epoch: 0, batch: 30817, sum loss: 5256.520508, avg loss: 2.842899, ppl: 17.165457 +epoch: 0, batch: 30818, sum loss: 5549.094727, avg loss: 3.215003, ppl: 24.903355 +epoch: 0, batch: 30819, sum loss: 4953.192871, avg loss: 3.144884, ppl: 23.216990 +epoch: 0, batch: 30820, sum loss: 5519.664062, avg loss: 3.003082, ppl: 20.147528 +epoch: 0, batch: 30821, sum loss: 6134.181641, avg loss: 3.128089, ppl: 22.830303 +epoch: 0, batch: 30822, sum loss: 5104.203125, avg loss: 2.896824, ppl: 18.116508 +epoch: 0, batch: 30823, sum loss: 5313.984375, avg loss: 2.903817, ppl: 18.243643 +epoch: 0, batch: 30824, sum loss: 5396.381836, avg loss: 3.052252, ppl: 21.162956 +epoch: 0, batch: 30825, sum loss: 4480.808594, avg loss: 2.885260, ppl: 17.908224 +epoch: 0, batch: 30826, sum loss: 4678.375000, avg loss: 2.715250, ppl: 15.108379 +epoch: 0, batch: 30827, sum loss: 4476.951172, avg loss: 2.988619, ppl: 19.858236 +epoch: 0, batch: 30828, sum loss: 4501.328125, avg loss: 2.834590, ppl: 17.023415 +epoch: 0, batch: 30829, sum loss: 4631.616699, avg loss: 2.778414, ppl: 16.093481 +epoch: 0, batch: 30830, sum loss: 5337.439941, avg loss: 3.186532, ppl: 24.204330 +epoch: 0, batch: 30831, sum loss: 4951.030762, avg loss: 3.233854, ppl: 25.377279 +epoch: 0, batch: 30832, sum loss: 5257.635742, avg loss: 3.190313, ppl: 24.296028 +epoch: 0, batch: 30833, sum loss: 4424.476074, avg loss: 2.689651, ppl: 14.726536 +epoch: 0, batch: 30834, sum loss: 5780.714844, avg loss: 3.139987, ppl: 23.103556 +epoch: 0, batch: 30835, sum loss: 4392.371094, avg loss: 2.865213, ppl: 17.552786 +epoch: 0, batch: 30836, sum loss: 5161.795898, avg loss: 2.949598, ppl: 19.098267 +epoch: 0, batch: 30837, sum loss: 5997.119141, avg loss: 3.250471, ppl: 25.802492 +epoch: 0, batch: 30838, sum loss: 4439.818359, avg loss: 2.703909, ppl: 14.938009 +epoch: 0, batch: 30839, sum loss: 4417.561523, avg loss: 2.693635, ppl: 14.785323 +epoch: 0, batch: 30840, sum loss: 5516.001953, avg loss: 3.373701, ppl: 29.186356 +epoch: 0, batch: 30841, sum loss: 5240.894043, avg loss: 2.939369, ppl: 18.903910 +epoch: 0, batch: 30842, sum loss: 4652.392578, avg loss: 2.782531, ppl: 16.159878 +epoch: 0, batch: 30843, sum loss: 5155.869629, avg loss: 3.083654, ppl: 21.838057 +epoch: 0, batch: 30844, sum loss: 4487.208008, avg loss: 2.917561, ppl: 18.496113 +epoch: 0, batch: 30845, sum loss: 5131.929688, avg loss: 2.935887, ppl: 18.838198 +epoch: 0, batch: 30846, sum loss: 4857.487793, avg loss: 2.867466, ppl: 17.592386 +epoch: 0, batch: 30847, sum loss: 6486.284668, avg loss: 3.182672, ppl: 24.111082 +epoch: 0, batch: 30848, sum loss: 5586.860840, avg loss: 3.307792, ppl: 27.324732 +epoch: 0, batch: 30849, sum loss: 4774.616211, avg loss: 2.790541, ppl: 16.289837 +epoch: 0, batch: 30850, sum loss: 4857.945312, avg loss: 2.793528, ppl: 16.338562 +epoch: 0, batch: 30851, sum loss: 5227.644531, avg loss: 2.838027, ppl: 17.082022 +epoch: 0, batch: 30852, sum loss: 4941.780762, avg loss: 3.046721, ppl: 21.046209 +epoch: 0, batch: 30853, sum loss: 3850.663086, avg loss: 2.586073, ppl: 13.277531 +epoch: 0, batch: 30854, sum loss: 4871.436035, avg loss: 3.018238, ppl: 20.455219 +epoch: 0, batch: 30855, sum loss: 5160.918457, avg loss: 2.922377, ppl: 18.585419 +epoch: 0, batch: 30856, sum loss: 5436.999023, avg loss: 3.155542, ppl: 23.465755 +epoch: 0, batch: 30857, sum loss: 4439.265625, avg loss: 2.827558, ppl: 16.904127 +epoch: 0, batch: 30858, sum loss: 4719.326660, avg loss: 2.962540, ppl: 19.347059 +epoch: 0, batch: 30859, sum loss: 6558.683594, avg loss: 3.207180, ppl: 24.709314 +epoch: 0, batch: 30860, sum loss: 5249.113281, avg loss: 2.979066, ppl: 19.669430 +epoch: 0, batch: 30861, sum loss: 4831.366699, avg loss: 2.978648, ppl: 19.661211 +epoch: 0, batch: 30862, sum loss: 5769.946777, avg loss: 3.218041, ppl: 24.979130 +epoch: 0, batch: 30863, sum loss: 4817.021484, avg loss: 3.089815, ppl: 21.973015 +epoch: 0, batch: 30864, sum loss: 6170.117676, avg loss: 3.169039, ppl: 23.784605 +epoch: 0, batch: 30865, sum loss: 4376.708496, avg loss: 2.887011, ppl: 17.939610 +epoch: 0, batch: 30866, sum loss: 5907.603516, avg loss: 2.956758, ppl: 19.235519 +epoch: 0, batch: 30867, sum loss: 5793.174805, avg loss: 2.973909, ppl: 19.568266 +epoch: 0, batch: 30868, sum loss: 5192.369141, avg loss: 2.967068, ppl: 19.434853 +epoch: 0, batch: 30869, sum loss: 4932.782715, avg loss: 3.031827, ppl: 20.735081 +epoch: 0, batch: 30870, sum loss: 4563.202148, avg loss: 2.767254, ppl: 15.914873 +epoch: 0, batch: 30871, sum loss: 4668.159668, avg loss: 2.915777, ppl: 18.463161 +epoch: 0, batch: 30872, sum loss: 5331.853027, avg loss: 2.745548, ppl: 15.573138 +epoch: 0, batch: 30873, sum loss: 4820.044922, avg loss: 2.900147, ppl: 18.176825 +epoch: 0, batch: 30874, sum loss: 5685.850586, avg loss: 3.095183, ppl: 22.091278 +epoch: 0, batch: 30875, sum loss: 4747.923828, avg loss: 3.041591, ppl: 20.938534 +epoch: 0, batch: 30876, sum loss: 5304.430176, avg loss: 2.986729, ppl: 19.820736 +epoch: 0, batch: 30877, sum loss: 4500.215820, avg loss: 2.905239, ppl: 18.269615 +epoch: 0, batch: 30878, sum loss: 4753.271484, avg loss: 3.050880, ppl: 21.133944 +epoch: 0, batch: 30879, sum loss: 4549.964844, avg loss: 2.608925, ppl: 13.584435 +epoch: 0, batch: 30880, sum loss: 5396.477539, avg loss: 3.078424, ppl: 21.724142 +epoch: 0, batch: 30881, sum loss: 5200.698730, avg loss: 2.929971, ppl: 18.727087 +epoch: 0, batch: 30882, sum loss: 5716.342773, avg loss: 3.140848, ppl: 23.123465 +epoch: 0, batch: 30883, sum loss: 4962.842285, avg loss: 3.088265, ppl: 21.938990 +epoch: 0, batch: 30884, sum loss: 4592.658203, avg loss: 2.904907, ppl: 18.263544 +epoch: 0, batch: 30885, sum loss: 5240.991211, avg loss: 3.082936, ppl: 21.822386 +epoch: 0, batch: 30886, sum loss: 5064.843262, avg loss: 3.054791, ppl: 21.216751 +epoch: 0, batch: 30887, sum loss: 5046.541016, avg loss: 3.210268, ppl: 24.785723 +epoch: 0, batch: 30888, sum loss: 4273.870117, avg loss: 2.689660, ppl: 14.726672 +epoch: 0, batch: 30889, sum loss: 4594.604492, avg loss: 2.817048, ppl: 16.727392 +epoch: 0, batch: 30890, sum loss: 4522.067871, avg loss: 2.750649, ppl: 15.652794 +epoch: 0, batch: 30891, sum loss: 5517.842773, avg loss: 3.294235, ppl: 26.956778 +epoch: 0, batch: 30892, sum loss: 5049.321289, avg loss: 3.021736, ppl: 20.526903 +epoch: 0, batch: 30893, sum loss: 4800.486816, avg loss: 2.998430, ppl: 20.054033 +epoch: 0, batch: 30894, sum loss: 4725.284668, avg loss: 2.975620, ppl: 19.601778 +epoch: 0, batch: 30895, sum loss: 5895.266602, avg loss: 3.152549, ppl: 23.395624 +epoch: 0, batch: 30896, sum loss: 4575.298828, avg loss: 2.931005, ppl: 18.746462 +epoch: 0, batch: 30897, sum loss: 5159.583496, avg loss: 3.031483, ppl: 20.727943 +epoch: 0, batch: 30898, sum loss: 4316.957520, avg loss: 2.932716, ppl: 18.778557 +epoch: 0, batch: 30899, sum loss: 4732.124023, avg loss: 2.933741, ppl: 18.797819 +epoch: 0, batch: 30900, sum loss: 4602.581543, avg loss: 2.973244, ppl: 19.555254 +epoch: 0, batch: 30901, sum loss: 5114.290527, avg loss: 2.973425, ppl: 19.558788 +epoch: 0, batch: 30902, sum loss: 5427.248535, avg loss: 3.026910, ppl: 20.633368 +epoch: 0, batch: 30903, sum loss: 5868.926758, avg loss: 2.816184, ppl: 16.712950 +epoch: 0, batch: 30904, sum loss: 5450.857422, avg loss: 3.111220, ppl: 22.448418 +epoch: 0, batch: 30905, sum loss: 4885.975586, avg loss: 2.874104, ppl: 17.709541 +epoch: 0, batch: 30906, sum loss: 4857.830566, avg loss: 2.942357, ppl: 18.960476 +epoch: 0, batch: 30907, sum loss: 5569.911133, avg loss: 3.148622, ppl: 23.303919 +epoch: 0, batch: 30908, sum loss: 4613.489746, avg loss: 2.725038, ppl: 15.256998 +epoch: 0, batch: 30909, sum loss: 5411.061035, avg loss: 3.196138, ppl: 24.437960 +epoch: 0, batch: 30910, sum loss: 5913.265625, avg loss: 3.180885, ppl: 24.068047 +epoch: 0, batch: 30911, sum loss: 5627.213867, avg loss: 3.347539, ppl: 28.432686 +epoch: 0, batch: 30912, sum loss: 6684.396484, avg loss: 3.227618, ppl: 25.219511 +epoch: 0, batch: 30913, sum loss: 4397.598633, avg loss: 2.965340, ppl: 19.401293 +epoch: 0, batch: 30914, sum loss: 4609.687988, avg loss: 2.838478, ppl: 17.089733 +epoch: 0, batch: 30915, sum loss: 4540.457520, avg loss: 2.929327, ppl: 18.715040 +epoch: 0, batch: 30916, sum loss: 4700.094238, avg loss: 2.704312, ppl: 14.944033 +epoch: 0, batch: 30917, sum loss: 5266.450684, avg loss: 3.265004, ppl: 26.180208 +epoch: 0, batch: 30918, sum loss: 4857.580078, avg loss: 2.949350, ppl: 19.093546 +epoch: 0, batch: 30919, sum loss: 4756.143066, avg loss: 2.989405, ppl: 19.873852 +epoch: 0, batch: 30920, sum loss: 4972.568848, avg loss: 3.094318, ppl: 22.072178 +epoch: 0, batch: 30921, sum loss: 5124.725098, avg loss: 2.797339, ppl: 16.400946 +epoch: 0, batch: 30922, sum loss: 4316.195801, avg loss: 2.800906, ppl: 16.459547 +epoch: 0, batch: 30923, sum loss: 4430.760254, avg loss: 2.862248, ppl: 17.500828 +epoch: 0, batch: 30924, sum loss: 4521.377930, avg loss: 2.966784, ppl: 19.429325 +epoch: 0, batch: 30925, sum loss: 5216.233398, avg loss: 3.030931, ppl: 20.716520 +epoch: 0, batch: 30926, sum loss: 4611.487793, avg loss: 2.867841, ppl: 17.598972 +epoch: 0, batch: 30927, sum loss: 5282.216797, avg loss: 2.862990, ppl: 17.513817 +epoch: 0, batch: 30928, sum loss: 5565.915039, avg loss: 3.046478, ppl: 21.041103 +epoch: 0, batch: 30929, sum loss: 4509.085449, avg loss: 2.746094, ppl: 15.581643 +epoch: 0, batch: 30930, sum loss: 5788.338379, avg loss: 3.052921, ppl: 21.177118 +epoch: 0, batch: 30931, sum loss: 4362.093750, avg loss: 2.787280, ppl: 16.236801 +epoch: 0, batch: 30932, sum loss: 4397.501953, avg loss: 2.916115, ppl: 18.469400 +epoch: 0, batch: 30933, sum loss: 5363.624512, avg loss: 3.096781, ppl: 22.126606 +epoch: 0, batch: 30934, sum loss: 5051.750000, avg loss: 3.045057, ppl: 21.011234 +epoch: 0, batch: 30935, sum loss: 4759.895020, avg loss: 2.976795, ppl: 19.624813 +epoch: 0, batch: 30936, sum loss: 5047.275879, avg loss: 3.053403, ppl: 21.187330 +epoch: 0, batch: 30937, sum loss: 5334.942383, avg loss: 2.932899, ppl: 18.781992 +epoch: 0, batch: 30938, sum loss: 4492.033691, avg loss: 2.965039, ppl: 19.395454 +epoch: 0, batch: 30939, sum loss: 4084.153320, avg loss: 2.880221, ppl: 17.818213 +epoch: 0, batch: 30940, sum loss: 4369.196777, avg loss: 2.777620, ppl: 16.080708 +epoch: 0, batch: 30941, sum loss: 5381.773926, avg loss: 3.109055, ppl: 22.399862 +epoch: 0, batch: 30942, sum loss: 4790.068848, avg loss: 2.962318, ppl: 19.342764 +epoch: 0, batch: 30943, sum loss: 5775.811035, avg loss: 3.093632, ppl: 22.057043 +epoch: 0, batch: 30944, sum loss: 5389.724609, avg loss: 3.126291, ppl: 22.789288 +epoch: 0, batch: 30945, sum loss: 4214.773926, avg loss: 2.865244, ppl: 17.553333 +epoch: 0, batch: 30946, sum loss: 4295.971680, avg loss: 2.968882, ppl: 19.470133 +epoch: 0, batch: 30947, sum loss: 4352.672852, avg loss: 2.833771, ppl: 17.009491 +epoch: 0, batch: 30948, sum loss: 4733.328125, avg loss: 3.124309, ppl: 22.744175 +epoch: 0, batch: 30949, sum loss: 5620.436523, avg loss: 3.171804, ppl: 23.850471 +epoch: 0, batch: 30950, sum loss: 4734.117676, avg loss: 2.722322, ppl: 15.215611 +epoch: 0, batch: 30951, sum loss: 5196.982422, avg loss: 3.383452, ppl: 29.472340 +epoch: 0, batch: 30952, sum loss: 5255.540039, avg loss: 3.078817, ppl: 21.732670 +epoch: 0, batch: 30953, sum loss: 5557.554688, avg loss: 3.051925, ppl: 21.156025 +epoch: 0, batch: 30954, sum loss: 5222.454590, avg loss: 3.063023, ppl: 21.392136 +epoch: 0, batch: 30955, sum loss: 4056.035889, avg loss: 2.772410, ppl: 15.997143 +epoch: 0, batch: 30956, sum loss: 5020.482910, avg loss: 2.730007, ppl: 15.332997 +epoch: 0, batch: 30957, sum loss: 5156.116211, avg loss: 2.964989, ppl: 19.394493 +epoch: 0, batch: 30958, sum loss: 5482.318848, avg loss: 3.123828, ppl: 22.733246 +epoch: 0, batch: 30959, sum loss: 4854.631348, avg loss: 2.843955, ppl: 17.183594 +epoch: 0, batch: 30960, sum loss: 5124.070312, avg loss: 3.331645, ppl: 27.984344 +epoch: 0, batch: 30961, sum loss: 5007.626953, avg loss: 2.982506, ppl: 19.737207 +epoch: 0, batch: 30962, sum loss: 5444.832031, avg loss: 3.125621, ppl: 22.774035 +epoch: 0, batch: 30963, sum loss: 5743.365234, avg loss: 3.325631, ppl: 27.816555 +epoch: 0, batch: 30964, sum loss: 5358.270508, avg loss: 3.198968, ppl: 24.507217 +epoch: 0, batch: 30965, sum loss: 3908.216309, avg loss: 2.615941, ppl: 13.680087 +epoch: 0, batch: 30966, sum loss: 5555.473145, avg loss: 3.229926, ppl: 25.277796 +epoch: 0, batch: 30967, sum loss: 5954.439453, avg loss: 3.215140, ppl: 24.906782 +epoch: 0, batch: 30968, sum loss: 4873.019043, avg loss: 2.909265, ppl: 18.343317 +epoch: 0, batch: 30969, sum loss: 5237.991211, avg loss: 3.013804, ppl: 20.364719 +epoch: 0, batch: 30970, sum loss: 5802.448242, avg loss: 3.096290, ppl: 22.115757 +epoch: 0, batch: 30971, sum loss: 5281.762207, avg loss: 3.105092, ppl: 22.311277 +epoch: 0, batch: 30972, sum loss: 5560.128906, avg loss: 2.987710, ppl: 19.840206 +epoch: 0, batch: 30973, sum loss: 5057.328125, avg loss: 2.964436, ppl: 19.383768 +epoch: 0, batch: 30974, sum loss: 4809.176758, avg loss: 3.001983, ppl: 20.125404 +epoch: 0, batch: 30975, sum loss: 4728.038574, avg loss: 2.897083, ppl: 18.121201 +epoch: 0, batch: 30976, sum loss: 4311.463867, avg loss: 2.796021, ppl: 16.379339 +epoch: 0, batch: 30977, sum loss: 4501.488281, avg loss: 3.007006, ppl: 20.226753 +epoch: 0, batch: 30978, sum loss: 4986.887207, avg loss: 3.038932, ppl: 20.882925 +epoch: 0, batch: 30979, sum loss: 4802.982910, avg loss: 2.696790, ppl: 14.832044 +epoch: 0, batch: 30980, sum loss: 4182.059082, avg loss: 2.762258, ppl: 15.835564 +epoch: 0, batch: 30981, sum loss: 5882.796875, avg loss: 3.380918, ppl: 29.397739 +epoch: 0, batch: 30982, sum loss: 5794.225586, avg loss: 2.850086, ppl: 17.289272 +epoch: 0, batch: 30983, sum loss: 5023.658691, avg loss: 3.108700, ppl: 22.391907 +epoch: 0, batch: 30984, sum loss: 5774.906738, avg loss: 3.152242, ppl: 23.388435 +epoch: 0, batch: 30985, sum loss: 6151.538086, avg loss: 3.045316, ppl: 21.016670 +epoch: 0, batch: 30986, sum loss: 4181.629395, avg loss: 2.972018, ppl: 19.531294 +epoch: 0, batch: 30987, sum loss: 5244.592285, avg loss: 3.165113, ppl: 23.691420 +epoch: 0, batch: 30988, sum loss: 5516.397461, avg loss: 2.957854, ppl: 19.256599 +epoch: 0, batch: 30989, sum loss: 5590.632324, avg loss: 3.070089, ppl: 21.543821 +epoch: 0, batch: 30990, sum loss: 3640.268066, avg loss: 2.565376, ppl: 13.005545 +epoch: 0, batch: 30991, sum loss: 3881.997070, avg loss: 2.848127, ppl: 17.255434 +epoch: 0, batch: 30992, sum loss: 4322.820801, avg loss: 2.948718, ppl: 19.081476 +epoch: 0, batch: 30993, sum loss: 4927.991211, avg loss: 3.199994, ppl: 24.532391 +epoch: 0, batch: 30994, sum loss: 4644.066895, avg loss: 3.296002, ppl: 27.004463 +epoch: 0, batch: 30995, sum loss: 5558.348145, avg loss: 3.134996, ppl: 22.988548 +epoch: 0, batch: 30996, sum loss: 5565.064941, avg loss: 3.009770, ppl: 20.282738 +epoch: 0, batch: 30997, sum loss: 5429.624512, avg loss: 2.895800, ppl: 18.097971 +epoch: 0, batch: 30998, sum loss: 4606.095703, avg loss: 2.896915, ppl: 18.118172 +epoch: 0, batch: 30999, sum loss: 5476.645508, avg loss: 3.341455, ppl: 28.260221 +epoch: 0, batch: 31000, sum loss: 5187.971680, avg loss: 2.908056, ppl: 18.321148 +epoch: 0, batch: 31001, sum loss: 5337.580078, avg loss: 3.199988, ppl: 24.532238 +epoch: 0, batch: 31002, sum loss: 4910.897949, avg loss: 2.830489, ppl: 16.953743 +epoch: 0, batch: 31003, sum loss: 5867.896484, avg loss: 3.141272, ppl: 23.133276 +epoch: 0, batch: 31004, sum loss: 3816.644287, avg loss: 2.615932, ppl: 13.679956 +epoch: 0, batch: 31005, sum loss: 4455.128418, avg loss: 2.817918, ppl: 16.741955 +epoch: 0, batch: 31006, sum loss: 4490.601074, avg loss: 2.942727, ppl: 18.967497 +epoch: 0, batch: 31007, sum loss: 4853.380371, avg loss: 2.930785, ppl: 18.742340 +epoch: 0, batch: 31008, sum loss: 5393.423340, avg loss: 3.165155, ppl: 23.692410 +epoch: 0, batch: 31009, sum loss: 5022.773438, avg loss: 2.940734, ppl: 18.929729 +epoch: 0, batch: 31010, sum loss: 5353.350586, avg loss: 3.105192, ppl: 22.313501 +epoch: 0, batch: 31011, sum loss: 5413.702637, avg loss: 2.942230, ppl: 18.958067 +epoch: 0, batch: 31012, sum loss: 5232.765625, avg loss: 2.921700, ppl: 18.572844 +epoch: 0, batch: 31013, sum loss: 4153.158691, avg loss: 2.924760, ppl: 18.629751 +epoch: 0, batch: 31014, sum loss: 4261.848633, avg loss: 2.850735, ppl: 17.300488 +epoch: 0, batch: 31015, sum loss: 5317.772461, avg loss: 3.167226, ppl: 23.741529 +epoch: 0, batch: 31016, sum loss: 5396.582031, avg loss: 3.061022, ppl: 21.349365 +epoch: 0, batch: 31017, sum loss: 5211.144531, avg loss: 3.197021, ppl: 24.459562 +epoch: 0, batch: 31018, sum loss: 5163.579590, avg loss: 2.948932, ppl: 19.085558 +epoch: 0, batch: 31019, sum loss: 5347.213379, avg loss: 2.912426, ppl: 18.401377 +epoch: 0, batch: 31020, sum loss: 4662.111328, avg loss: 2.943252, ppl: 18.977463 +epoch: 0, batch: 31021, sum loss: 5308.979492, avg loss: 3.092009, ppl: 22.021276 +epoch: 0, batch: 31022, sum loss: 5179.308105, avg loss: 3.148515, ppl: 23.301447 +epoch: 0, batch: 31023, sum loss: 5146.492676, avg loss: 3.259337, ppl: 26.032265 +epoch: 0, batch: 31024, sum loss: 4254.271484, avg loss: 2.894062, ppl: 18.066551 +epoch: 0, batch: 31025, sum loss: 4407.554688, avg loss: 2.690815, ppl: 14.743683 +epoch: 0, batch: 31026, sum loss: 4348.722656, avg loss: 2.711174, ppl: 15.046926 +epoch: 0, batch: 31027, sum loss: 4404.452148, avg loss: 2.928492, ppl: 18.699411 +epoch: 0, batch: 31028, sum loss: 4510.717773, avg loss: 2.696185, ppl: 14.823079 +epoch: 0, batch: 31029, sum loss: 4615.637695, avg loss: 2.763855, ppl: 15.860868 +epoch: 0, batch: 31030, sum loss: 5512.306641, avg loss: 3.195540, ppl: 24.423363 +epoch: 0, batch: 31031, sum loss: 5028.567871, avg loss: 3.038410, ppl: 20.872025 +epoch: 0, batch: 31032, sum loss: 5545.624023, avg loss: 3.113770, ppl: 22.505732 +epoch: 0, batch: 31033, sum loss: 3907.893311, avg loss: 2.922882, ppl: 18.594803 +epoch: 0, batch: 31034, sum loss: 5088.954590, avg loss: 3.149106, ppl: 23.315201 +epoch: 0, batch: 31035, sum loss: 4262.713867, avg loss: 2.876325, ppl: 17.748932 +epoch: 0, batch: 31036, sum loss: 5750.819824, avg loss: 3.072019, ppl: 21.585447 +epoch: 0, batch: 31037, sum loss: 5496.320312, avg loss: 3.055209, ppl: 21.225615 +epoch: 0, batch: 31038, sum loss: 4944.335938, avg loss: 2.985710, ppl: 19.800558 +epoch: 0, batch: 31039, sum loss: 4235.916016, avg loss: 2.921321, ppl: 18.565804 +epoch: 0, batch: 31040, sum loss: 5961.644043, avg loss: 3.212093, ppl: 24.830994 +epoch: 0, batch: 31041, sum loss: 5863.019043, avg loss: 3.103769, ppl: 22.281769 +epoch: 0, batch: 31042, sum loss: 4270.948730, avg loss: 2.849199, ppl: 17.273933 +epoch: 0, batch: 31043, sum loss: 4804.124023, avg loss: 3.154382, ppl: 23.438553 +epoch: 0, batch: 31044, sum loss: 4981.925293, avg loss: 3.139210, ppl: 23.085611 +epoch: 0, batch: 31045, sum loss: 4214.061035, avg loss: 2.599667, ppl: 13.459260 +epoch: 0, batch: 31046, sum loss: 4700.617188, avg loss: 2.848859, ppl: 17.268064 +epoch: 0, batch: 31047, sum loss: 5107.301758, avg loss: 2.869271, ppl: 17.624157 +epoch: 0, batch: 31048, sum loss: 5377.958496, avg loss: 3.371761, ppl: 29.129774 +epoch: 0, batch: 31049, sum loss: 5348.638184, avg loss: 3.137031, ppl: 23.035381 +epoch: 0, batch: 31050, sum loss: 5770.305176, avg loss: 3.278582, ppl: 26.538124 +epoch: 0, batch: 31051, sum loss: 4783.501465, avg loss: 2.895582, ppl: 18.094032 +epoch: 0, batch: 31052, sum loss: 4307.900879, avg loss: 2.868110, ppl: 17.603710 +epoch: 0, batch: 31053, sum loss: 4897.074707, avg loss: 2.958958, ppl: 19.277868 +epoch: 0, batch: 31054, sum loss: 4837.237305, avg loss: 3.040375, ppl: 20.913094 +epoch: 0, batch: 31055, sum loss: 3984.267578, avg loss: 2.977778, ppl: 19.644127 +epoch: 0, batch: 31056, sum loss: 5160.907227, avg loss: 3.105239, ppl: 22.314554 +epoch: 0, batch: 31057, sum loss: 5279.965332, avg loss: 3.337525, ppl: 28.149382 +epoch: 0, batch: 31058, sum loss: 5872.638672, avg loss: 3.165843, ppl: 23.708723 +epoch: 0, batch: 31059, sum loss: 4973.919434, avg loss: 2.912131, ppl: 18.395950 +epoch: 0, batch: 31060, sum loss: 4514.857422, avg loss: 3.007900, ppl: 20.244837 +epoch: 0, batch: 31061, sum loss: 3682.514648, avg loss: 2.406872, ppl: 11.099192 +epoch: 0, batch: 31062, sum loss: 5548.321777, avg loss: 3.300608, ppl: 27.129120 +epoch: 0, batch: 31063, sum loss: 4656.446777, avg loss: 2.696263, ppl: 14.824235 +epoch: 0, batch: 31064, sum loss: 4252.860840, avg loss: 2.980281, ppl: 19.693352 +epoch: 0, batch: 31065, sum loss: 5979.205566, avg loss: 3.249568, ppl: 25.779200 +epoch: 0, batch: 31066, sum loss: 4508.605469, avg loss: 3.109383, ppl: 22.407217 +epoch: 0, batch: 31067, sum loss: 4584.094727, avg loss: 2.718917, ppl: 15.163897 +epoch: 0, batch: 31068, sum loss: 6006.319336, avg loss: 3.274983, ppl: 26.442780 +epoch: 0, batch: 31069, sum loss: 4516.136719, avg loss: 2.757104, ppl: 15.754155 +epoch: 0, batch: 31070, sum loss: 4564.021973, avg loss: 3.022531, ppl: 20.543226 +epoch: 0, batch: 31071, sum loss: 4710.840332, avg loss: 2.886544, ppl: 17.931240 +epoch: 0, batch: 31072, sum loss: 4920.020508, avg loss: 2.867145, ppl: 17.586733 +epoch: 0, batch: 31073, sum loss: 4082.689209, avg loss: 2.453539, ppl: 11.629435 +epoch: 0, batch: 31074, sum loss: 4834.527344, avg loss: 3.034857, ppl: 20.798004 +epoch: 0, batch: 31075, sum loss: 5703.748047, avg loss: 3.175806, ppl: 23.946119 +epoch: 0, batch: 31076, sum loss: 5059.243164, avg loss: 2.944845, ppl: 19.007711 +epoch: 0, batch: 31077, sum loss: 5240.251465, avg loss: 3.018578, ppl: 20.462175 +epoch: 0, batch: 31078, sum loss: 5755.882812, avg loss: 3.304181, ppl: 27.226223 +epoch: 0, batch: 31079, sum loss: 5156.538086, avg loss: 2.918245, ppl: 18.508768 +epoch: 0, batch: 31080, sum loss: 4329.295898, avg loss: 2.985721, ppl: 19.800781 +epoch: 0, batch: 31081, sum loss: 4616.907227, avg loss: 2.869426, ppl: 17.626905 +epoch: 0, batch: 31082, sum loss: 5463.597656, avg loss: 3.161804, ppl: 23.613161 +epoch: 0, batch: 31083, sum loss: 5358.223145, avg loss: 3.170546, ppl: 23.820494 +epoch: 0, batch: 31084, sum loss: 5579.184570, avg loss: 3.197240, ppl: 24.464922 +epoch: 0, batch: 31085, sum loss: 5004.063477, avg loss: 3.083218, ppl: 21.828541 +epoch: 0, batch: 31086, sum loss: 5272.296875, avg loss: 3.085019, ppl: 21.867874 +epoch: 0, batch: 31087, sum loss: 3935.145508, avg loss: 2.822917, ppl: 16.825851 +epoch: 0, batch: 31088, sum loss: 5829.344238, avg loss: 3.240325, ppl: 25.542015 +epoch: 0, batch: 31089, sum loss: 5523.278809, avg loss: 3.043129, ppl: 20.970758 +epoch: 0, batch: 31090, sum loss: 4796.564453, avg loss: 3.003484, ppl: 20.155647 +epoch: 0, batch: 31091, sum loss: 5463.496094, avg loss: 3.086721, ppl: 21.905132 +epoch: 0, batch: 31092, sum loss: 5840.232910, avg loss: 3.005781, ppl: 20.201996 +epoch: 0, batch: 31093, sum loss: 5401.932129, avg loss: 3.046775, ppl: 21.047359 +epoch: 0, batch: 31094, sum loss: 4438.089844, avg loss: 2.863284, ppl: 17.518963 +epoch: 0, batch: 31095, sum loss: 5200.225586, avg loss: 2.990354, ppl: 19.892725 +epoch: 0, batch: 31096, sum loss: 5366.687988, avg loss: 3.089631, ppl: 21.968962 +epoch: 0, batch: 31097, sum loss: 5690.981445, avg loss: 3.239033, ppl: 25.509048 +epoch: 0, batch: 31098, sum loss: 5149.184570, avg loss: 2.900949, ppl: 18.191401 +epoch: 0, batch: 31099, sum loss: 4533.049805, avg loss: 2.784429, ppl: 16.190563 +epoch: 0, batch: 31100, sum loss: 4948.184570, avg loss: 3.133746, ppl: 22.959818 +epoch: 0, batch: 31101, sum loss: 4502.493652, avg loss: 3.007678, ppl: 20.240337 +epoch: 0, batch: 31102, sum loss: 4862.628906, avg loss: 3.233131, ppl: 25.358931 +epoch: 0, batch: 31103, sum loss: 5373.310059, avg loss: 3.027217, ppl: 20.639706 +epoch: 0, batch: 31104, sum loss: 5108.237305, avg loss: 2.956156, ppl: 19.223928 +epoch: 0, batch: 31105, sum loss: 6320.597168, avg loss: 3.176179, ppl: 23.955055 +epoch: 0, batch: 31106, sum loss: 5152.528320, avg loss: 2.907748, ppl: 18.315496 +epoch: 0, batch: 31107, sum loss: 4429.387207, avg loss: 2.870633, ppl: 17.648191 +epoch: 0, batch: 31108, sum loss: 4385.133301, avg loss: 2.877384, ppl: 17.767731 +epoch: 0, batch: 31109, sum loss: 4924.598633, avg loss: 3.095285, ppl: 22.093538 +epoch: 0, batch: 31110, sum loss: 5627.491211, avg loss: 3.237912, ppl: 25.480467 +epoch: 0, batch: 31111, sum loss: 4520.842285, avg loss: 2.854067, ppl: 17.358236 +epoch: 0, batch: 31112, sum loss: 5171.237305, avg loss: 3.011786, ppl: 20.323675 +epoch: 0, batch: 31113, sum loss: 5041.437988, avg loss: 2.833861, ppl: 17.011007 +epoch: 0, batch: 31114, sum loss: 4674.393555, avg loss: 2.848503, ppl: 17.261919 +epoch: 0, batch: 31115, sum loss: 4890.912109, avg loss: 2.888903, ppl: 17.973576 +epoch: 0, batch: 31116, sum loss: 5391.900391, avg loss: 3.169841, ppl: 23.803705 +epoch: 0, batch: 31117, sum loss: 4515.720703, avg loss: 2.894693, ppl: 18.077950 +epoch: 0, batch: 31118, sum loss: 5092.551758, avg loss: 3.219059, ppl: 25.004580 +epoch: 0, batch: 31119, sum loss: 4605.331055, avg loss: 2.837542, ppl: 17.073751 +epoch: 0, batch: 31120, sum loss: 5099.767090, avg loss: 2.812889, ppl: 16.657970 +epoch: 0, batch: 31121, sum loss: 3685.727539, avg loss: 2.657338, ppl: 14.258278 +epoch: 0, batch: 31122, sum loss: 6114.835449, avg loss: 3.102402, ppl: 22.251345 +epoch: 0, batch: 31123, sum loss: 6230.954590, avg loss: 3.166136, ppl: 23.715664 +epoch: 0, batch: 31124, sum loss: 5615.449707, avg loss: 3.102458, ppl: 22.252592 +epoch: 0, batch: 31125, sum loss: 4554.787598, avg loss: 2.870062, ppl: 17.638105 +epoch: 0, batch: 31126, sum loss: 5601.350098, avg loss: 3.098092, ppl: 22.155634 +epoch: 0, batch: 31127, sum loss: 5308.191406, avg loss: 3.050685, ppl: 21.129807 +epoch: 0, batch: 31128, sum loss: 5002.876953, avg loss: 3.052396, ppl: 21.165993 +epoch: 0, batch: 31129, sum loss: 6049.192871, avg loss: 3.103742, ppl: 22.281170 +epoch: 0, batch: 31130, sum loss: 5534.114746, avg loss: 3.020805, ppl: 20.507795 +epoch: 0, batch: 31131, sum loss: 5388.938965, avg loss: 3.098872, ppl: 22.172935 +epoch: 0, batch: 31132, sum loss: 4174.378418, avg loss: 2.739094, ppl: 15.472953 +epoch: 0, batch: 31133, sum loss: 5166.579102, avg loss: 2.996856, ppl: 20.022482 +epoch: 0, batch: 31134, sum loss: 3950.013672, avg loss: 2.679792, ppl: 14.582063 +epoch: 0, batch: 31135, sum loss: 5271.037109, avg loss: 2.954617, ppl: 19.194374 +epoch: 0, batch: 31136, sum loss: 4932.378906, avg loss: 3.090463, ppl: 21.987253 +epoch: 0, batch: 31137, sum loss: 4384.473633, avg loss: 2.573048, ppl: 13.105708 +epoch: 0, batch: 31138, sum loss: 4975.427734, avg loss: 3.137092, ppl: 23.036776 +epoch: 0, batch: 31139, sum loss: 5338.973633, avg loss: 2.903194, ppl: 18.232285 +epoch: 0, batch: 31140, sum loss: 5068.476562, avg loss: 2.953658, ppl: 19.175964 +epoch: 0, batch: 31141, sum loss: 4598.803711, avg loss: 3.124187, ppl: 22.741409 +epoch: 0, batch: 31142, sum loss: 4129.442383, avg loss: 2.623534, ppl: 13.784348 +epoch: 0, batch: 31143, sum loss: 5297.005371, avg loss: 2.934629, ppl: 18.814522 +epoch: 0, batch: 31144, sum loss: 5146.785156, avg loss: 3.186864, ppl: 24.212376 +epoch: 0, batch: 31145, sum loss: 4356.771973, avg loss: 2.775014, ppl: 16.038851 +epoch: 0, batch: 31146, sum loss: 4581.676270, avg loss: 2.766713, ppl: 15.906259 +epoch: 0, batch: 31147, sum loss: 4810.990234, avg loss: 3.103865, ppl: 22.283905 +epoch: 0, batch: 31148, sum loss: 4391.097168, avg loss: 2.933264, ppl: 18.788866 +epoch: 0, batch: 31149, sum loss: 5201.760742, avg loss: 3.205028, ppl: 24.656191 +epoch: 0, batch: 31150, sum loss: 4452.617188, avg loss: 2.889434, ppl: 17.983122 +epoch: 0, batch: 31151, sum loss: 5633.332031, avg loss: 3.064925, ppl: 21.432854 +epoch: 0, batch: 31152, sum loss: 5804.750977, avg loss: 3.264765, ppl: 26.173948 +epoch: 0, batch: 31153, sum loss: 4644.844727, avg loss: 2.921286, ppl: 18.565144 +epoch: 0, batch: 31154, sum loss: 5233.326172, avg loss: 2.915502, ppl: 18.458076 +epoch: 0, batch: 31155, sum loss: 5544.649414, avg loss: 3.099301, ppl: 22.182438 +epoch: 0, batch: 31156, sum loss: 4396.413086, avg loss: 2.974569, ppl: 19.581188 +epoch: 0, batch: 31157, sum loss: 4784.230957, avg loss: 3.193745, ppl: 24.379568 +epoch: 0, batch: 31158, sum loss: 5464.687500, avg loss: 3.032568, ppl: 20.750450 +epoch: 0, batch: 31159, sum loss: 5665.092773, avg loss: 2.933761, ppl: 18.798201 +epoch: 0, batch: 31160, sum loss: 5356.822754, avg loss: 3.054061, ppl: 21.201262 +epoch: 0, batch: 31161, sum loss: 7279.139160, avg loss: 3.453102, ppl: 31.598261 +epoch: 0, batch: 31162, sum loss: 5338.573730, avg loss: 3.048872, ppl: 21.091530 +epoch: 0, batch: 31163, sum loss: 5019.371094, avg loss: 2.742826, ppl: 15.530809 +epoch: 0, batch: 31164, sum loss: 4913.557617, avg loss: 3.131649, ppl: 22.911736 +epoch: 0, batch: 31165, sum loss: 4034.712891, avg loss: 3.004254, ppl: 20.171160 +epoch: 0, batch: 31166, sum loss: 4258.666016, avg loss: 3.024621, ppl: 20.586197 +epoch: 0, batch: 31167, sum loss: 5014.603027, avg loss: 3.173800, ppl: 23.898113 +epoch: 0, batch: 31168, sum loss: 5415.064453, avg loss: 3.151958, ppl: 23.381813 +epoch: 0, batch: 31169, sum loss: 5255.507324, avg loss: 3.120848, ppl: 22.665585 +epoch: 0, batch: 31170, sum loss: 5483.299316, avg loss: 3.099661, ppl: 22.190420 +epoch: 0, batch: 31171, sum loss: 4840.597656, avg loss: 2.837396, ppl: 17.071253 +epoch: 0, batch: 31172, sum loss: 5401.603027, avg loss: 2.910347, ppl: 18.363161 +epoch: 0, batch: 31173, sum loss: 5054.629883, avg loss: 2.980324, ppl: 19.694202 +epoch: 0, batch: 31174, sum loss: 6222.569336, avg loss: 2.950483, ppl: 19.115191 +epoch: 0, batch: 31175, sum loss: 5931.649414, avg loss: 3.118638, ppl: 22.615557 +epoch: 0, batch: 31176, sum loss: 5676.011719, avg loss: 3.001593, ppl: 20.117552 +epoch: 0, batch: 31177, sum loss: 5932.671875, avg loss: 3.177649, ppl: 23.990297 +epoch: 0, batch: 31178, sum loss: 6488.444336, avg loss: 3.020691, ppl: 20.505459 +epoch: 0, batch: 31179, sum loss: 5182.883301, avg loss: 2.934815, ppl: 18.818020 +epoch: 0, batch: 31180, sum loss: 5168.352051, avg loss: 3.058197, ppl: 21.289129 +epoch: 0, batch: 31181, sum loss: 3905.521973, avg loss: 2.364118, ppl: 10.634651 +epoch: 0, batch: 31182, sum loss: 4697.905273, avg loss: 2.887465, ppl: 17.947750 +epoch: 0, batch: 31183, sum loss: 4141.856934, avg loss: 2.694767, ppl: 14.802073 +epoch: 0, batch: 31184, sum loss: 5302.088379, avg loss: 3.278966, ppl: 26.548319 +epoch: 0, batch: 31185, sum loss: 4571.312988, avg loss: 2.891406, ppl: 18.018627 +epoch: 0, batch: 31186, sum loss: 4770.035156, avg loss: 2.797675, ppl: 16.406452 +epoch: 0, batch: 31187, sum loss: 5436.488281, avg loss: 2.969136, ppl: 19.475090 +epoch: 0, batch: 31188, sum loss: 5293.995117, avg loss: 3.095904, ppl: 22.107206 +epoch: 0, batch: 31189, sum loss: 5611.805664, avg loss: 3.026864, ppl: 20.632423 +epoch: 0, batch: 31190, sum loss: 3649.123779, avg loss: 2.739583, ppl: 15.480521 +epoch: 0, batch: 31191, sum loss: 4993.387207, avg loss: 2.843615, ppl: 17.177748 +epoch: 0, batch: 31192, sum loss: 5049.704102, avg loss: 2.958233, ppl: 19.263908 +epoch: 0, batch: 31193, sum loss: 5683.597656, avg loss: 2.946396, ppl: 19.037218 +epoch: 0, batch: 31194, sum loss: 4850.023438, avg loss: 2.880062, ppl: 17.815371 +epoch: 0, batch: 31195, sum loss: 4750.213867, avg loss: 2.919615, ppl: 18.534151 +epoch: 0, batch: 31196, sum loss: 3802.656982, avg loss: 2.720069, ppl: 15.181376 +epoch: 0, batch: 31197, sum loss: 4555.924316, avg loss: 2.918593, ppl: 18.515226 +epoch: 0, batch: 31198, sum loss: 5878.755371, avg loss: 3.133665, ppl: 22.957968 +epoch: 0, batch: 31199, sum loss: 4818.407227, avg loss: 2.740846, ppl: 15.500092 +epoch: 0, batch: 31200, sum loss: 4572.217773, avg loss: 2.759335, ppl: 15.789336 +epoch: 0, batch: 31201, sum loss: 5524.258789, avg loss: 3.094823, ppl: 22.083326 +epoch: 0, batch: 31202, sum loss: 4632.573242, avg loss: 2.975320, ppl: 19.595896 +epoch: 0, batch: 31203, sum loss: 4810.706055, avg loss: 2.828163, ppl: 16.914368 +epoch: 0, batch: 31204, sum loss: 5408.255859, avg loss: 3.265855, ppl: 26.202499 +epoch: 0, batch: 31205, sum loss: 4494.081055, avg loss: 2.966390, ppl: 19.421682 +epoch: 0, batch: 31206, sum loss: 4778.630859, avg loss: 3.028283, ppl: 20.661728 +epoch: 0, batch: 31207, sum loss: 5618.793945, avg loss: 3.421921, ppl: 30.628195 +epoch: 0, batch: 31208, sum loss: 6268.455566, avg loss: 3.127972, ppl: 22.827635 +epoch: 0, batch: 31209, sum loss: 4079.299316, avg loss: 2.533726, ppl: 12.600370 +epoch: 0, batch: 31210, sum loss: 4812.828125, avg loss: 2.609994, ppl: 13.598965 +epoch: 0, batch: 31211, sum loss: 4562.124023, avg loss: 2.885594, ppl: 17.914204 +epoch: 0, batch: 31212, sum loss: 5937.607910, avg loss: 3.098960, ppl: 22.174887 +epoch: 0, batch: 31213, sum loss: 5545.391113, avg loss: 3.310682, ppl: 27.403797 +epoch: 0, batch: 31214, sum loss: 5145.271973, avg loss: 3.176094, ppl: 23.953011 +epoch: 0, batch: 31215, sum loss: 5569.896484, avg loss: 3.038678, ppl: 20.877623 +epoch: 0, batch: 31216, sum loss: 5251.618652, avg loss: 3.055043, ppl: 21.222097 +epoch: 0, batch: 31217, sum loss: 5508.157227, avg loss: 3.115474, ppl: 22.544107 +epoch: 0, batch: 31218, sum loss: 5082.434082, avg loss: 2.860121, ppl: 17.463633 +epoch: 0, batch: 31219, sum loss: 4239.341309, avg loss: 2.868296, ppl: 17.606989 +epoch: 0, batch: 31220, sum loss: 4905.922363, avg loss: 3.144822, ppl: 23.215546 +epoch: 0, batch: 31221, sum loss: 4453.383301, avg loss: 2.867600, ppl: 17.594748 +epoch: 0, batch: 31222, sum loss: 5860.917969, avg loss: 3.050972, ppl: 21.135889 +epoch: 0, batch: 31223, sum loss: 4869.013184, avg loss: 2.905139, ppl: 18.267786 +epoch: 0, batch: 31224, sum loss: 5356.958008, avg loss: 3.121770, ppl: 22.686508 +epoch: 0, batch: 31225, sum loss: 4216.219727, avg loss: 2.750306, ppl: 15.647425 +epoch: 0, batch: 31226, sum loss: 6002.732422, avg loss: 3.211735, ppl: 24.822109 +epoch: 0, batch: 31227, sum loss: 4646.206543, avg loss: 2.838245, ppl: 17.085749 +epoch: 0, batch: 31228, sum loss: 5285.083008, avg loss: 2.931272, ppl: 18.751463 +epoch: 0, batch: 31229, sum loss: 4890.953125, avg loss: 2.909550, ppl: 18.348536 +epoch: 0, batch: 31230, sum loss: 5888.758301, avg loss: 3.154129, ppl: 23.432608 +epoch: 0, batch: 31231, sum loss: 6016.190918, avg loss: 3.136700, ppl: 23.027754 +epoch: 0, batch: 31232, sum loss: 4222.764160, avg loss: 2.876542, ppl: 17.752783 +epoch: 0, batch: 31233, sum loss: 5032.989746, avg loss: 2.759315, ppl: 15.789017 +epoch: 0, batch: 31234, sum loss: 5463.090820, avg loss: 3.165174, ppl: 23.692873 +epoch: 0, batch: 31235, sum loss: 6245.574707, avg loss: 3.129045, ppl: 22.852156 +epoch: 0, batch: 31236, sum loss: 5495.063965, avg loss: 3.158083, ppl: 23.525448 +epoch: 0, batch: 31237, sum loss: 4990.535156, avg loss: 3.009973, ppl: 20.286854 +epoch: 0, batch: 31238, sum loss: 6385.570312, avg loss: 3.394774, ppl: 29.807922 +epoch: 0, batch: 31239, sum loss: 5593.247070, avg loss: 3.033214, ppl: 20.763866 +epoch: 0, batch: 31240, sum loss: 5236.041992, avg loss: 3.023119, ppl: 20.555304 +epoch: 0, batch: 31241, sum loss: 5583.427734, avg loss: 3.197840, ppl: 24.479591 +epoch: 0, batch: 31242, sum loss: 4868.628418, avg loss: 2.842165, ppl: 17.152857 +epoch: 0, batch: 31243, sum loss: 5188.723145, avg loss: 3.014946, ppl: 20.387999 +epoch: 0, batch: 31244, sum loss: 5294.390137, avg loss: 3.278260, ppl: 26.529577 +epoch: 0, batch: 31245, sum loss: 5079.793945, avg loss: 2.804966, ppl: 16.526522 +epoch: 0, batch: 31246, sum loss: 4367.734375, avg loss: 3.008082, ppl: 20.248520 +epoch: 0, batch: 31247, sum loss: 4141.652832, avg loss: 2.890198, ppl: 17.996864 +epoch: 0, batch: 31248, sum loss: 5665.191406, avg loss: 2.992705, ppl: 19.939552 +epoch: 0, batch: 31249, sum loss: 5509.671875, avg loss: 3.146586, ppl: 23.256529 +epoch: 0, batch: 31250, sum loss: 5699.572754, avg loss: 3.007690, ppl: 20.240595 +epoch: 0, batch: 31251, sum loss: 3926.430420, avg loss: 2.806598, ppl: 16.553507 +epoch: 0, batch: 31252, sum loss: 5423.556152, avg loss: 3.026538, ppl: 20.625706 +epoch: 0, batch: 31253, sum loss: 4951.256836, avg loss: 2.947177, ppl: 19.052088 +epoch: 0, batch: 31254, sum loss: 4398.954102, avg loss: 2.809038, ppl: 16.593954 +epoch: 0, batch: 31255, sum loss: 4895.375488, avg loss: 3.021837, ppl: 20.528963 +epoch: 0, batch: 31256, sum loss: 4723.579102, avg loss: 3.033770, ppl: 20.775400 +epoch: 0, batch: 31257, sum loss: 5551.248535, avg loss: 3.159504, ppl: 23.558907 +epoch: 0, batch: 31258, sum loss: 4742.063477, avg loss: 3.001306, ppl: 20.111788 +epoch: 0, batch: 31259, sum loss: 4148.757812, avg loss: 2.606004, ppl: 13.544811 +epoch: 0, batch: 31260, sum loss: 4368.468750, avg loss: 2.713335, ppl: 15.079475 +epoch: 0, batch: 31261, sum loss: 4943.527832, avg loss: 3.012509, ppl: 20.338371 +epoch: 0, batch: 31262, sum loss: 5822.585938, avg loss: 3.195711, ppl: 24.427546 +epoch: 0, batch: 31263, sum loss: 4331.616699, avg loss: 2.688775, ppl: 14.713645 +epoch: 0, batch: 31264, sum loss: 5987.717773, avg loss: 3.064339, ppl: 21.420292 +epoch: 0, batch: 31265, sum loss: 4549.456055, avg loss: 2.706398, ppl: 14.975245 +epoch: 0, batch: 31266, sum loss: 4661.280273, avg loss: 2.959543, ppl: 19.289154 +epoch: 0, batch: 31267, sum loss: 5259.232422, avg loss: 3.090031, ppl: 21.977753 +epoch: 0, batch: 31268, sum loss: 5473.268555, avg loss: 2.955328, ppl: 19.208031 +epoch: 0, batch: 31269, sum loss: 4237.402344, avg loss: 2.749774, ppl: 15.639104 +epoch: 0, batch: 31270, sum loss: 5195.158203, avg loss: 3.013433, ppl: 20.357166 +epoch: 0, batch: 31271, sum loss: 5405.087891, avg loss: 3.151655, ppl: 23.374712 +epoch: 0, batch: 31272, sum loss: 6191.246094, avg loss: 3.133222, ppl: 22.947796 +epoch: 0, batch: 31273, sum loss: 5693.693359, avg loss: 3.076010, ppl: 21.671749 +epoch: 0, batch: 31274, sum loss: 6216.862305, avg loss: 3.048976, ppl: 21.093737 +epoch: 0, batch: 31275, sum loss: 5251.251953, avg loss: 2.983666, ppl: 19.760118 +epoch: 0, batch: 31276, sum loss: 5163.417969, avg loss: 3.223107, ppl: 25.106012 +epoch: 0, batch: 31277, sum loss: 5701.442871, avg loss: 3.085196, ppl: 21.871759 +epoch: 0, batch: 31278, sum loss: 5917.265137, avg loss: 3.341200, ppl: 28.253006 +epoch: 0, batch: 31279, sum loss: 4074.261963, avg loss: 2.649065, ppl: 14.140811 +epoch: 0, batch: 31280, sum loss: 5507.357422, avg loss: 2.910865, ppl: 18.372690 +epoch: 0, batch: 31281, sum loss: 5876.415527, avg loss: 3.188505, ppl: 24.252153 +epoch: 0, batch: 31282, sum loss: 5156.028809, avg loss: 3.001181, ppl: 20.109270 +epoch: 0, batch: 31283, sum loss: 5183.526855, avg loss: 2.917010, ppl: 18.485933 +epoch: 0, batch: 31284, sum loss: 5288.200684, avg loss: 3.159021, ppl: 23.547529 +epoch: 0, batch: 31285, sum loss: 5323.165039, avg loss: 2.886749, ppl: 17.934908 +epoch: 0, batch: 31286, sum loss: 4766.087402, avg loss: 2.913256, ppl: 18.416674 +epoch: 0, batch: 31287, sum loss: 5315.574219, avg loss: 3.182979, ppl: 24.118486 +epoch: 0, batch: 31288, sum loss: 5682.511719, avg loss: 3.130861, ppl: 22.893673 +epoch: 0, batch: 31289, sum loss: 5386.610840, avg loss: 3.012646, ppl: 20.341150 +epoch: 0, batch: 31290, sum loss: 4744.428223, avg loss: 2.840975, ppl: 17.132462 +epoch: 0, batch: 31291, sum loss: 5151.323730, avg loss: 3.060798, ppl: 21.344591 +epoch: 0, batch: 31292, sum loss: 6649.187500, avg loss: 3.312998, ppl: 27.467358 +epoch: 0, batch: 31293, sum loss: 4499.768066, avg loss: 2.849758, ppl: 17.283602 +epoch: 0, batch: 31294, sum loss: 5415.833496, avg loss: 3.266486, ppl: 26.219042 +epoch: 0, batch: 31295, sum loss: 5814.209961, avg loss: 3.161615, ppl: 23.608696 +epoch: 0, batch: 31296, sum loss: 5220.418945, avg loss: 2.929528, ppl: 18.718798 +epoch: 0, batch: 31297, sum loss: 4499.980957, avg loss: 3.118490, ppl: 22.612204 +epoch: 0, batch: 31298, sum loss: 4097.980957, avg loss: 2.757726, ppl: 15.763954 +epoch: 0, batch: 31299, sum loss: 4957.340332, avg loss: 2.826306, ppl: 16.882973 +epoch: 0, batch: 31300, sum loss: 5784.876953, avg loss: 3.070529, ppl: 21.553307 +epoch: 0, batch: 31301, sum loss: 6376.449707, avg loss: 3.062656, ppl: 21.384281 +epoch: 0, batch: 31302, sum loss: 4404.668945, avg loss: 2.565328, ppl: 13.004928 +epoch: 0, batch: 31303, sum loss: 5604.043457, avg loss: 3.032491, ppl: 20.748854 +epoch: 0, batch: 31304, sum loss: 4806.589844, avg loss: 2.830736, ppl: 16.957939 +epoch: 0, batch: 31305, sum loss: 5744.601074, avg loss: 3.156374, ppl: 23.485294 +epoch: 0, batch: 31306, sum loss: 5006.220703, avg loss: 3.142637, ppl: 23.164873 +epoch: 0, batch: 31307, sum loss: 5429.353516, avg loss: 3.009619, ppl: 20.279682 +epoch: 0, batch: 31308, sum loss: 4370.532715, avg loss: 2.852828, ppl: 17.336746 +epoch: 0, batch: 31309, sum loss: 6254.516602, avg loss: 3.233980, ppl: 25.380463 +epoch: 0, batch: 31310, sum loss: 3917.466797, avg loss: 2.694269, ppl: 14.794696 +epoch: 0, batch: 31311, sum loss: 5450.288086, avg loss: 3.094996, ppl: 22.087151 +epoch: 0, batch: 31312, sum loss: 4753.495605, avg loss: 2.804422, ppl: 16.517529 +epoch: 0, batch: 31313, sum loss: 5000.208008, avg loss: 2.883626, ppl: 17.878988 +epoch: 0, batch: 31314, sum loss: 4168.957520, avg loss: 2.897121, ppl: 18.121901 +epoch: 0, batch: 31315, sum loss: 4724.770508, avg loss: 2.934640, ppl: 18.814728 +epoch: 0, batch: 31316, sum loss: 3940.155518, avg loss: 2.698737, ppl: 14.860946 +epoch: 0, batch: 31317, sum loss: 6125.768555, avg loss: 3.376940, ppl: 29.281027 +epoch: 0, batch: 31318, sum loss: 4696.962891, avg loss: 2.970881, ppl: 19.509100 +epoch: 0, batch: 31319, sum loss: 6136.039062, avg loss: 3.025660, ppl: 20.607607 +epoch: 0, batch: 31320, sum loss: 6588.789551, avg loss: 3.546173, ppl: 34.680344 +epoch: 0, batch: 31321, sum loss: 5501.551758, avg loss: 3.042894, ppl: 20.965824 +epoch: 0, batch: 31322, sum loss: 4222.300781, avg loss: 2.741754, ppl: 15.514171 +epoch: 0, batch: 31323, sum loss: 5274.547852, avg loss: 2.963229, ppl: 19.360390 +epoch: 0, batch: 31324, sum loss: 4741.551270, avg loss: 2.742366, ppl: 15.523676 +epoch: 0, batch: 31325, sum loss: 5287.518555, avg loss: 2.929373, ppl: 18.715893 +epoch: 0, batch: 31326, sum loss: 4596.333984, avg loss: 3.109834, ppl: 22.417316 +epoch: 0, batch: 31327, sum loss: 6020.083496, avg loss: 3.202172, ppl: 24.585875 +epoch: 0, batch: 31328, sum loss: 4861.757812, avg loss: 2.930535, ppl: 18.737654 +epoch: 0, batch: 31329, sum loss: 4315.193359, avg loss: 2.831492, ppl: 16.970757 +epoch: 0, batch: 31330, sum loss: 4802.480469, avg loss: 3.049194, ppl: 21.098330 +epoch: 0, batch: 31331, sum loss: 3900.061523, avg loss: 2.584534, ppl: 13.257109 +epoch: 0, batch: 31332, sum loss: 4999.452148, avg loss: 3.057769, ppl: 21.280025 +epoch: 0, batch: 31333, sum loss: 4871.997559, avg loss: 3.003698, ppl: 20.159943 +epoch: 0, batch: 31334, sum loss: 4298.190918, avg loss: 3.024765, ppl: 20.589161 +epoch: 0, batch: 31335, sum loss: 4852.393066, avg loss: 2.712349, ppl: 15.064627 +epoch: 0, batch: 31336, sum loss: 4946.165039, avg loss: 2.770961, ppl: 15.973978 +epoch: 0, batch: 31337, sum loss: 6550.498047, avg loss: 3.209455, ppl: 24.765585 +epoch: 0, batch: 31338, sum loss: 4777.307129, avg loss: 3.015977, ppl: 20.409014 +epoch: 0, batch: 31339, sum loss: 4170.894531, avg loss: 2.924891, ppl: 18.632195 +epoch: 0, batch: 31340, sum loss: 5911.635742, avg loss: 3.282418, ppl: 26.640123 +epoch: 0, batch: 31341, sum loss: 4436.966309, avg loss: 2.795820, ppl: 16.376055 +epoch: 0, batch: 31342, sum loss: 5547.063965, avg loss: 3.184308, ppl: 24.150560 +epoch: 0, batch: 31343, sum loss: 5240.519531, avg loss: 2.820516, ppl: 16.785515 +epoch: 0, batch: 31344, sum loss: 5930.156738, avg loss: 3.055207, ppl: 21.225574 +epoch: 0, batch: 31345, sum loss: 5705.750488, avg loss: 3.194709, ppl: 24.403078 +epoch: 0, batch: 31346, sum loss: 5777.455078, avg loss: 3.012229, ppl: 20.332670 +epoch: 0, batch: 31347, sum loss: 4298.084473, avg loss: 2.890440, ppl: 18.001228 +epoch: 0, batch: 31348, sum loss: 5509.612305, avg loss: 3.043985, ppl: 20.988714 +epoch: 0, batch: 31349, sum loss: 5878.734375, avg loss: 3.328842, ppl: 27.905998 +epoch: 0, batch: 31350, sum loss: 4981.380859, avg loss: 3.084446, ppl: 21.855366 +epoch: 0, batch: 31351, sum loss: 5529.420898, avg loss: 3.082175, ppl: 21.805769 +epoch: 0, batch: 31352, sum loss: 5583.181641, avg loss: 3.037640, ppl: 20.855957 +epoch: 0, batch: 31353, sum loss: 5307.882812, avg loss: 2.852167, ppl: 17.325287 +epoch: 0, batch: 31354, sum loss: 5046.469727, avg loss: 3.064037, ppl: 21.413837 +epoch: 0, batch: 31355, sum loss: 5346.594727, avg loss: 3.178713, ppl: 24.015816 +epoch: 0, batch: 31356, sum loss: 5068.915039, avg loss: 2.894869, ppl: 18.081129 +epoch: 0, batch: 31357, sum loss: 5694.477539, avg loss: 3.006588, ppl: 20.218296 +epoch: 0, batch: 31358, sum loss: 4123.731445, avg loss: 2.618242, ppl: 13.711597 +epoch: 0, batch: 31359, sum loss: 5110.664062, avg loss: 2.957560, ppl: 19.250948 +epoch: 0, batch: 31360, sum loss: 4026.625977, avg loss: 2.591136, ppl: 13.344929 +epoch: 0, batch: 31361, sum loss: 5039.321289, avg loss: 2.945249, ppl: 19.015402 +epoch: 0, batch: 31362, sum loss: 4210.008789, avg loss: 2.979482, ppl: 19.677629 +epoch: 0, batch: 31363, sum loss: 5533.020020, avg loss: 3.073900, ppl: 21.626080 +epoch: 0, batch: 31364, sum loss: 4790.927734, avg loss: 2.814881, ppl: 16.691191 +epoch: 0, batch: 31365, sum loss: 5238.402344, avg loss: 3.129273, ppl: 22.857349 +epoch: 0, batch: 31366, sum loss: 5071.537598, avg loss: 2.806606, ppl: 16.553640 +epoch: 0, batch: 31367, sum loss: 5856.670410, avg loss: 2.994208, ppl: 19.969530 +epoch: 0, batch: 31368, sum loss: 6239.570801, avg loss: 2.925256, ppl: 18.638998 +epoch: 0, batch: 31369, sum loss: 4739.031250, avg loss: 3.180558, ppl: 24.060175 +epoch: 0, batch: 31370, sum loss: 5326.799805, avg loss: 3.093379, ppl: 22.051476 +epoch: 0, batch: 31371, sum loss: 5521.412598, avg loss: 3.156897, ppl: 23.497566 +epoch: 0, batch: 31372, sum loss: 6116.955566, avg loss: 3.192566, ppl: 24.350824 +epoch: 0, batch: 31373, sum loss: 4136.669434, avg loss: 2.748618, ppl: 15.621023 +epoch: 0, batch: 31374, sum loss: 5731.924805, avg loss: 2.997869, ppl: 20.042770 +epoch: 0, batch: 31375, sum loss: 6202.393066, avg loss: 3.232096, ppl: 25.332710 +epoch: 0, batch: 31376, sum loss: 4320.700684, avg loss: 2.807473, ppl: 16.567997 +epoch: 0, batch: 31377, sum loss: 5343.991699, avg loss: 3.296725, ppl: 27.023998 +epoch: 0, batch: 31378, sum loss: 4875.921387, avg loss: 3.170300, ppl: 23.814629 +epoch: 0, batch: 31379, sum loss: 5467.004395, avg loss: 2.980918, ppl: 19.705906 +epoch: 0, batch: 31380, sum loss: 5569.698242, avg loss: 3.058593, ppl: 21.297577 +epoch: 0, batch: 31381, sum loss: 5632.679199, avg loss: 3.062903, ppl: 21.389570 +epoch: 0, batch: 31382, sum loss: 5858.656250, avg loss: 3.136326, ppl: 23.019129 +epoch: 0, batch: 31383, sum loss: 4818.592773, avg loss: 2.742512, ppl: 15.525929 +epoch: 0, batch: 31384, sum loss: 4995.207031, avg loss: 2.941818, ppl: 18.950272 +epoch: 0, batch: 31385, sum loss: 4626.860840, avg loss: 2.996672, ppl: 20.018797 +epoch: 0, batch: 31386, sum loss: 5409.974609, avg loss: 3.091414, ppl: 22.008175 +epoch: 0, batch: 31387, sum loss: 6406.678711, avg loss: 3.065397, ppl: 21.442963 +epoch: 0, batch: 31388, sum loss: 4386.533203, avg loss: 2.722864, ppl: 15.223856 +epoch: 0, batch: 31389, sum loss: 4778.961914, avg loss: 2.727718, ppl: 15.297939 +epoch: 0, batch: 31390, sum loss: 5869.167480, avg loss: 3.224818, ppl: 25.148985 +epoch: 0, batch: 31391, sum loss: 4975.701172, avg loss: 3.103993, ppl: 22.286768 +epoch: 0, batch: 31392, sum loss: 4751.628906, avg loss: 3.113780, ppl: 22.505968 +epoch: 0, batch: 31393, sum loss: 5185.533203, avg loss: 2.900186, ppl: 18.177532 +epoch: 0, batch: 31394, sum loss: 4223.984375, avg loss: 2.881299, ppl: 17.837429 +epoch: 0, batch: 31395, sum loss: 5958.740234, avg loss: 3.037074, ppl: 20.844172 +epoch: 0, batch: 31396, sum loss: 4490.882324, avg loss: 2.858614, ppl: 17.437342 +epoch: 0, batch: 31397, sum loss: 4906.125488, avg loss: 3.008048, ppl: 20.247829 +epoch: 0, batch: 31398, sum loss: 5095.490234, avg loss: 2.959054, ppl: 19.279715 +epoch: 0, batch: 31399, sum loss: 5136.858398, avg loss: 2.858575, ppl: 17.436655 +epoch: 0, batch: 31400, sum loss: 5697.357910, avg loss: 3.020869, ppl: 20.509096 +epoch: 0, batch: 31401, sum loss: 5207.029785, avg loss: 3.214216, ppl: 24.883776 +epoch: 0, batch: 31402, sum loss: 4894.655762, avg loss: 2.882601, ppl: 17.860664 +epoch: 0, batch: 31403, sum loss: 3938.145020, avg loss: 2.965471, ppl: 19.403835 +epoch: 0, batch: 31404, sum loss: 5259.641113, avg loss: 2.788781, ppl: 16.261189 +epoch: 0, batch: 31405, sum loss: 5049.064453, avg loss: 2.964806, ppl: 19.390938 +epoch: 0, batch: 31406, sum loss: 5367.683594, avg loss: 3.093766, ppl: 22.059994 +epoch: 0, batch: 31407, sum loss: 3915.113770, avg loss: 2.579126, ppl: 13.185614 +epoch: 0, batch: 31408, sum loss: 5033.958984, avg loss: 3.198195, ppl: 24.488289 +epoch: 0, batch: 31409, sum loss: 4924.330566, avg loss: 3.028494, ppl: 20.666079 +epoch: 0, batch: 31410, sum loss: 6049.040527, avg loss: 3.153827, ppl: 23.425547 +epoch: 0, batch: 31411, sum loss: 4588.341797, avg loss: 2.842839, ppl: 17.164423 +epoch: 0, batch: 31412, sum loss: 4421.644531, avg loss: 2.706025, ppl: 14.969651 +epoch: 0, batch: 31413, sum loss: 5394.109863, avg loss: 3.127020, ppl: 22.805925 +epoch: 0, batch: 31414, sum loss: 5916.466797, avg loss: 3.052873, ppl: 21.176088 +epoch: 0, batch: 31415, sum loss: 4797.925293, avg loss: 3.040510, ppl: 20.915916 +epoch: 0, batch: 31416, sum loss: 4883.789062, avg loss: 3.124625, ppl: 22.751366 +epoch: 0, batch: 31417, sum loss: 4218.691895, avg loss: 2.687065, ppl: 14.688500 +epoch: 0, batch: 31418, sum loss: 4233.091309, avg loss: 2.766726, ppl: 15.906479 +epoch: 0, batch: 31419, sum loss: 3793.311768, avg loss: 2.630591, ppl: 13.881968 +epoch: 0, batch: 31420, sum loss: 5550.047852, avg loss: 2.953724, ppl: 19.177244 +epoch: 0, batch: 31421, sum loss: 4639.995605, avg loss: 2.903627, ppl: 18.240181 +epoch: 0, batch: 31422, sum loss: 5200.443848, avg loss: 2.975082, ppl: 19.591238 +epoch: 0, batch: 31423, sum loss: 5153.414062, avg loss: 2.999659, ppl: 20.078690 +epoch: 0, batch: 31424, sum loss: 5055.894043, avg loss: 3.022053, ppl: 20.533400 +epoch: 0, batch: 31425, sum loss: 4695.386719, avg loss: 2.973646, ppl: 19.563110 +epoch: 0, batch: 31426, sum loss: 4578.408203, avg loss: 2.834928, ppl: 17.029171 +epoch: 0, batch: 31427, sum loss: 6037.350586, avg loss: 3.370938, ppl: 29.105824 +epoch: 0, batch: 31428, sum loss: 5089.522461, avg loss: 2.870571, ppl: 17.647095 +epoch: 0, batch: 31429, sum loss: 5612.552246, avg loss: 3.072005, ppl: 21.585129 +epoch: 0, batch: 31430, sum loss: 5008.541992, avg loss: 3.002723, ppl: 20.140305 +epoch: 0, batch: 31431, sum loss: 4078.408447, avg loss: 2.963960, ppl: 19.374537 +epoch: 0, batch: 31432, sum loss: 5746.642578, avg loss: 3.121479, ppl: 22.679899 +epoch: 0, batch: 31433, sum loss: 4094.665283, avg loss: 2.937350, ppl: 18.865778 +epoch: 0, batch: 31434, sum loss: 4598.681152, avg loss: 2.964978, ppl: 19.394276 +epoch: 0, batch: 31435, sum loss: 5287.789062, avg loss: 3.108635, ppl: 22.390471 +epoch: 0, batch: 31436, sum loss: 4196.089844, avg loss: 2.737175, ppl: 15.443303 +epoch: 0, batch: 31437, sum loss: 5102.958008, avg loss: 2.959953, ppl: 19.297056 +epoch: 0, batch: 31438, sum loss: 4399.376465, avg loss: 2.936833, ppl: 18.856041 +epoch: 0, batch: 31439, sum loss: 5409.568848, avg loss: 3.152430, ppl: 23.392830 +epoch: 0, batch: 31440, sum loss: 5562.043457, avg loss: 2.968006, ppl: 19.453094 +epoch: 0, batch: 31441, sum loss: 4460.472656, avg loss: 2.721460, ppl: 15.202496 +epoch: 0, batch: 31442, sum loss: 5266.642578, avg loss: 2.833051, ppl: 16.997244 +epoch: 0, batch: 31443, sum loss: 5872.439941, avg loss: 3.349937, ppl: 28.500950 +epoch: 0, batch: 31444, sum loss: 4795.229004, avg loss: 2.952727, ppl: 19.158129 +epoch: 0, batch: 31445, sum loss: 5395.047852, avg loss: 2.990603, ppl: 19.897676 +epoch: 0, batch: 31446, sum loss: 4591.517090, avg loss: 2.979570, ppl: 19.679352 +epoch: 0, batch: 31447, sum loss: 4752.912598, avg loss: 2.910540, ppl: 18.366720 +epoch: 0, batch: 31448, sum loss: 4828.582520, avg loss: 2.584894, ppl: 13.261886 +epoch: 0, batch: 31449, sum loss: 5322.189453, avg loss: 3.013697, ppl: 20.362545 +epoch: 0, batch: 31450, sum loss: 4684.423340, avg loss: 2.851140, ppl: 17.307501 +epoch: 0, batch: 31451, sum loss: 5276.353516, avg loss: 2.924808, ppl: 18.630650 +epoch: 0, batch: 31452, sum loss: 4982.293945, avg loss: 3.015917, ppl: 20.407787 +epoch: 0, batch: 31453, sum loss: 4475.687500, avg loss: 2.863524, ppl: 17.523165 +epoch: 0, batch: 31454, sum loss: 4834.353027, avg loss: 2.796040, ppl: 16.379656 +epoch: 0, batch: 31455, sum loss: 4619.230469, avg loss: 2.932845, ppl: 18.780979 +epoch: 0, batch: 31456, sum loss: 5910.558594, avg loss: 3.256506, ppl: 25.958685 +epoch: 0, batch: 31457, sum loss: 5152.920898, avg loss: 2.878727, ppl: 17.791605 +epoch: 0, batch: 31458, sum loss: 5138.957031, avg loss: 2.791394, ppl: 16.303740 +epoch: 0, batch: 31459, sum loss: 4518.370605, avg loss: 2.799486, ppl: 16.436199 +epoch: 0, batch: 31460, sum loss: 5411.191406, avg loss: 3.085058, ppl: 21.868729 +epoch: 0, batch: 31461, sum loss: 5575.906250, avg loss: 2.999412, ppl: 20.073721 +epoch: 0, batch: 31462, sum loss: 5025.057129, avg loss: 3.030794, ppl: 20.713680 +epoch: 0, batch: 31463, sum loss: 5717.755859, avg loss: 3.183606, ppl: 24.133621 +epoch: 0, batch: 31464, sum loss: 5694.475586, avg loss: 3.091464, ppl: 22.009266 +epoch: 0, batch: 31465, sum loss: 5211.571777, avg loss: 2.983155, ppl: 19.750031 +epoch: 0, batch: 31466, sum loss: 4405.011230, avg loss: 2.712445, ppl: 15.066071 +epoch: 0, batch: 31467, sum loss: 4655.921875, avg loss: 3.083392, ppl: 21.832331 +epoch: 0, batch: 31468, sum loss: 4278.840820, avg loss: 2.877499, ppl: 17.769773 +epoch: 0, batch: 31469, sum loss: 5364.622070, avg loss: 2.926690, ppl: 18.665737 +epoch: 0, batch: 31470, sum loss: 5187.510254, avg loss: 2.875560, ppl: 17.735353 +epoch: 0, batch: 31471, sum loss: 4738.116699, avg loss: 2.827039, ppl: 16.895355 +epoch: 0, batch: 31472, sum loss: 4307.575195, avg loss: 2.757731, ppl: 15.764030 +epoch: 0, batch: 31473, sum loss: 5240.843262, avg loss: 3.121408, ppl: 22.678278 +epoch: 0, batch: 31474, sum loss: 5140.540527, avg loss: 2.964556, ppl: 19.386099 +epoch: 0, batch: 31475, sum loss: 4955.764160, avg loss: 2.983603, ppl: 19.758875 +epoch: 0, batch: 31476, sum loss: 6039.212891, avg loss: 3.267972, ppl: 26.258039 +epoch: 0, batch: 31477, sum loss: 6356.377441, avg loss: 3.094634, ppl: 22.079147 +epoch: 0, batch: 31478, sum loss: 5469.891602, avg loss: 3.145424, ppl: 23.229515 +epoch: 0, batch: 31479, sum loss: 5282.717285, avg loss: 2.908985, ppl: 18.338179 +epoch: 0, batch: 31480, sum loss: 6016.484375, avg loss: 3.359288, ppl: 28.768686 +epoch: 0, batch: 31481, sum loss: 4481.772949, avg loss: 3.048825, ppl: 21.090553 +epoch: 0, batch: 31482, sum loss: 3661.064453, avg loss: 2.572779, ppl: 13.102184 +epoch: 0, batch: 31483, sum loss: 6227.502441, avg loss: 3.245181, ppl: 25.666351 +epoch: 0, batch: 31484, sum loss: 5096.123047, avg loss: 3.042462, ppl: 20.956768 +epoch: 0, batch: 31485, sum loss: 5752.481445, avg loss: 2.866209, ppl: 17.570284 +epoch: 0, batch: 31486, sum loss: 5856.111328, avg loss: 3.264276, ppl: 26.161171 +epoch: 0, batch: 31487, sum loss: 5844.341797, avg loss: 3.191885, ppl: 24.334267 +epoch: 0, batch: 31488, sum loss: 5471.715820, avg loss: 2.957684, ppl: 19.253334 +epoch: 0, batch: 31489, sum loss: 5231.957520, avg loss: 3.040068, ppl: 20.906668 +epoch: 0, batch: 31490, sum loss: 4689.547852, avg loss: 2.910955, ppl: 18.374332 +epoch: 0, batch: 31491, sum loss: 4824.083496, avg loss: 2.946905, ppl: 19.046915 +epoch: 0, batch: 31492, sum loss: 5671.864258, avg loss: 3.206255, ppl: 24.686455 +epoch: 0, batch: 31493, sum loss: 5145.308105, avg loss: 2.970732, ppl: 19.506197 +epoch: 0, batch: 31494, sum loss: 4763.974121, avg loss: 2.957154, ppl: 19.243120 +epoch: 0, batch: 31495, sum loss: 5657.440430, avg loss: 3.171211, ppl: 23.836338 +epoch: 0, batch: 31496, sum loss: 4949.644531, avg loss: 2.864378, ppl: 17.538136 +epoch: 0, batch: 31497, sum loss: 4846.683105, avg loss: 2.832661, ppl: 16.990620 +epoch: 0, batch: 31498, sum loss: 5112.871582, avg loss: 2.995238, ppl: 19.990124 +epoch: 0, batch: 31499, sum loss: 4942.873047, avg loss: 3.164451, ppl: 23.675751 +epoch: 0, batch: 31500, sum loss: 5368.993164, avg loss: 2.997763, ppl: 20.040649 +epoch: 0, batch: 31501, sum loss: 4948.712891, avg loss: 2.743189, ppl: 15.536449 +epoch: 0, batch: 31502, sum loss: 4667.626953, avg loss: 2.736006, ppl: 15.425258 +epoch: 0, batch: 31503, sum loss: 4005.288086, avg loss: 2.622978, ppl: 13.776695 +epoch: 0, batch: 31504, sum loss: 5355.312012, avg loss: 2.928000, ppl: 18.690216 +epoch: 0, batch: 31505, sum loss: 5974.736328, avg loss: 3.196756, ppl: 24.453068 +epoch: 0, batch: 31506, sum loss: 5544.060059, avg loss: 3.132237, ppl: 22.925217 +epoch: 0, batch: 31507, sum loss: 4885.483398, avg loss: 2.988063, ppl: 19.847202 +epoch: 0, batch: 31508, sum loss: 5037.697754, avg loss: 3.058711, ppl: 21.300089 +epoch: 0, batch: 31509, sum loss: 4429.198242, avg loss: 2.826547, ppl: 16.887043 +epoch: 0, batch: 31510, sum loss: 4263.682129, avg loss: 2.833012, ppl: 16.996571 +epoch: 0, batch: 31511, sum loss: 6468.792969, avg loss: 3.252284, ppl: 25.849308 +epoch: 0, batch: 31512, sum loss: 5026.908691, avg loss: 2.870879, ppl: 17.652527 +epoch: 0, batch: 31513, sum loss: 4646.210449, avg loss: 2.817593, ppl: 16.736511 +epoch: 0, batch: 31514, sum loss: 5535.408203, avg loss: 3.019863, ppl: 20.488478 +epoch: 0, batch: 31515, sum loss: 5319.726074, avg loss: 3.038108, ppl: 20.865721 +epoch: 0, batch: 31516, sum loss: 4688.081055, avg loss: 3.068116, ppl: 21.501354 +epoch: 0, batch: 31517, sum loss: 5419.130859, avg loss: 3.250828, ppl: 25.811710 +epoch: 0, batch: 31518, sum loss: 5546.106445, avg loss: 2.981778, ppl: 19.722847 +epoch: 0, batch: 31519, sum loss: 4665.777832, avg loss: 2.966165, ppl: 19.417313 +epoch: 0, batch: 31520, sum loss: 5048.321289, avg loss: 3.019331, ppl: 20.477587 +epoch: 0, batch: 31521, sum loss: 4544.618164, avg loss: 2.983991, ppl: 19.766546 +epoch: 0, batch: 31522, sum loss: 6562.000977, avg loss: 3.148753, ppl: 23.306980 +epoch: 0, batch: 31523, sum loss: 4490.407715, avg loss: 2.863780, ppl: 17.527660 +epoch: 0, batch: 31524, sum loss: 5405.431152, avg loss: 3.187165, ppl: 24.219656 +epoch: 0, batch: 31525, sum loss: 4647.579102, avg loss: 2.897493, ppl: 18.128647 +epoch: 0, batch: 31526, sum loss: 5006.668457, avg loss: 2.743380, ppl: 15.539421 +epoch: 0, batch: 31527, sum loss: 4039.607422, avg loss: 2.532669, ppl: 12.587060 +epoch: 0, batch: 31528, sum loss: 5305.164062, avg loss: 3.221107, ppl: 25.055853 +epoch: 0, batch: 31529, sum loss: 5545.407227, avg loss: 3.099725, ppl: 22.191837 +epoch: 0, batch: 31530, sum loss: 5834.059570, avg loss: 3.139967, ppl: 23.103115 +epoch: 0, batch: 31531, sum loss: 4923.001953, avg loss: 2.925135, ppl: 18.636740 +epoch: 0, batch: 31532, sum loss: 5012.727539, avg loss: 3.090461, ppl: 21.987207 +epoch: 0, batch: 31533, sum loss: 4900.489258, avg loss: 2.784369, ppl: 16.189594 +epoch: 0, batch: 31534, sum loss: 3856.768311, avg loss: 2.656177, ppl: 14.241733 +epoch: 0, batch: 31535, sum loss: 5602.497559, avg loss: 3.430801, ppl: 30.901375 +epoch: 0, batch: 31536, sum loss: 4988.052246, avg loss: 2.850316, ppl: 17.293238 +epoch: 0, batch: 31537, sum loss: 5558.463379, avg loss: 3.025838, ppl: 20.611263 +epoch: 0, batch: 31538, sum loss: 5016.995117, avg loss: 3.033250, ppl: 20.764605 +epoch: 0, batch: 31539, sum loss: 4267.930664, avg loss: 2.789497, ppl: 16.272835 +epoch: 0, batch: 31540, sum loss: 4921.487793, avg loss: 2.915573, ppl: 18.459393 +epoch: 0, batch: 31541, sum loss: 5111.452637, avg loss: 3.026319, ppl: 20.621187 +epoch: 0, batch: 31542, sum loss: 5139.416992, avg loss: 3.088592, ppl: 21.946157 +epoch: 0, batch: 31543, sum loss: 5797.350586, avg loss: 3.233325, ppl: 25.363840 +epoch: 0, batch: 31544, sum loss: 4974.736816, avg loss: 2.959391, ppl: 19.286230 +epoch: 0, batch: 31545, sum loss: 5038.467773, avg loss: 2.902343, ppl: 18.216782 +epoch: 0, batch: 31546, sum loss: 5163.145020, avg loss: 3.075131, ppl: 21.652718 +epoch: 0, batch: 31547, sum loss: 4077.036133, avg loss: 2.829310, ppl: 16.933779 +epoch: 0, batch: 31548, sum loss: 4758.258301, avg loss: 2.926358, ppl: 18.659548 +epoch: 0, batch: 31549, sum loss: 5982.520508, avg loss: 3.430344, ppl: 30.887270 +epoch: 0, batch: 31550, sum loss: 5600.528320, avg loss: 3.162354, ppl: 23.626135 +epoch: 0, batch: 31551, sum loss: 4915.424805, avg loss: 2.906816, ppl: 18.298435 +epoch: 0, batch: 31552, sum loss: 4721.675293, avg loss: 2.837545, ppl: 17.073805 +epoch: 0, batch: 31553, sum loss: 4204.795410, avg loss: 2.662948, ppl: 14.338502 +epoch: 0, batch: 31554, sum loss: 4919.329102, avg loss: 3.034750, ppl: 20.795769 +epoch: 0, batch: 31555, sum loss: 6202.454102, avg loss: 3.445808, ppl: 31.368616 +epoch: 0, batch: 31556, sum loss: 5358.654297, avg loss: 3.049889, ppl: 21.112993 +epoch: 0, batch: 31557, sum loss: 4812.267090, avg loss: 2.837422, ppl: 17.071692 +epoch: 0, batch: 31558, sum loss: 5004.302246, avg loss: 2.969912, ppl: 19.490210 +epoch: 0, batch: 31559, sum loss: 4573.106445, avg loss: 2.726957, ppl: 15.286294 +epoch: 0, batch: 31560, sum loss: 4682.743164, avg loss: 2.975059, ppl: 19.590780 +epoch: 0, batch: 31561, sum loss: 5199.733887, avg loss: 3.065881, ppl: 21.453350 +epoch: 0, batch: 31562, sum loss: 5801.313965, avg loss: 2.998095, ppl: 20.047312 +epoch: 0, batch: 31563, sum loss: 4670.199219, avg loss: 2.895350, ppl: 18.089827 +epoch: 0, batch: 31564, sum loss: 5228.644531, avg loss: 2.914518, ppl: 18.439915 +epoch: 0, batch: 31565, sum loss: 4748.988281, avg loss: 2.975557, ppl: 19.600531 +epoch: 0, batch: 31566, sum loss: 4378.786133, avg loss: 2.948677, ppl: 19.080700 +epoch: 0, batch: 31567, sum loss: 6349.478516, avg loss: 2.986584, ppl: 19.817877 +epoch: 0, batch: 31568, sum loss: 4734.187012, avg loss: 2.879676, ppl: 17.808500 +epoch: 0, batch: 31569, sum loss: 5020.201660, avg loss: 2.928939, ppl: 18.707773 +epoch: 0, batch: 31570, sum loss: 5408.240234, avg loss: 3.293691, ppl: 26.942122 +epoch: 0, batch: 31571, sum loss: 4588.111816, avg loss: 2.816520, ppl: 16.718576 +epoch: 0, batch: 31572, sum loss: 4878.500000, avg loss: 3.105347, ppl: 22.316965 +epoch: 0, batch: 31573, sum loss: 5817.678223, avg loss: 3.166945, ppl: 23.734869 +epoch: 0, batch: 31574, sum loss: 4063.561523, avg loss: 2.825843, ppl: 16.875158 +epoch: 0, batch: 31575, sum loss: 4407.588867, avg loss: 2.940353, ppl: 18.922523 +epoch: 0, batch: 31576, sum loss: 5447.315918, avg loss: 3.039797, ppl: 20.901001 +epoch: 0, batch: 31577, sum loss: 5201.253418, avg loss: 3.013472, ppl: 20.357967 +epoch: 0, batch: 31578, sum loss: 4781.494141, avg loss: 2.931633, ppl: 18.758247 +epoch: 0, batch: 31579, sum loss: 4751.586426, avg loss: 2.808266, ppl: 16.581144 +epoch: 0, batch: 31580, sum loss: 4418.421387, avg loss: 2.758066, ppl: 15.769311 +epoch: 0, batch: 31581, sum loss: 3926.720459, avg loss: 2.715574, ppl: 15.113287 +epoch: 0, batch: 31582, sum loss: 5263.666016, avg loss: 3.006092, ppl: 20.208263 +epoch: 0, batch: 31583, sum loss: 4573.767578, avg loss: 2.823313, ppl: 16.832533 +epoch: 0, batch: 31584, sum loss: 3874.699951, avg loss: 2.911119, ppl: 18.377359 +epoch: 0, batch: 31585, sum loss: 4412.602539, avg loss: 2.823162, ppl: 16.829988 +epoch: 0, batch: 31586, sum loss: 5273.936523, avg loss: 2.964551, ppl: 19.386002 +epoch: 0, batch: 31587, sum loss: 5157.913086, avg loss: 3.235830, ppl: 25.427469 +epoch: 0, batch: 31588, sum loss: 5514.696289, avg loss: 3.178499, ppl: 24.010693 +epoch: 0, batch: 31589, sum loss: 4486.797852, avg loss: 2.807758, ppl: 16.572725 +epoch: 0, batch: 31590, sum loss: 5452.250488, avg loss: 3.233838, ppl: 25.376863 +epoch: 0, batch: 31591, sum loss: 4401.588379, avg loss: 2.630955, ppl: 13.887033 +epoch: 0, batch: 31592, sum loss: 4531.856445, avg loss: 2.825347, ppl: 16.866796 +epoch: 0, batch: 31593, sum loss: 5276.773926, avg loss: 3.196108, ppl: 24.437231 +epoch: 0, batch: 31594, sum loss: 5359.500488, avg loss: 2.883002, ppl: 17.867828 +epoch: 0, batch: 31595, sum loss: 4954.793457, avg loss: 3.131980, ppl: 22.919325 +epoch: 0, batch: 31596, sum loss: 5179.869629, avg loss: 3.077760, ppl: 21.709713 +epoch: 0, batch: 31597, sum loss: 4830.135742, avg loss: 2.844603, ppl: 17.194733 +epoch: 0, batch: 31598, sum loss: 6109.460938, avg loss: 3.284657, ppl: 26.699812 +epoch: 0, batch: 31599, sum loss: 5143.363770, avg loss: 3.143866, ppl: 23.193350 +epoch: 0, batch: 31600, sum loss: 4410.975586, avg loss: 2.795295, ppl: 16.367456 +epoch: 0, batch: 31601, sum loss: 3932.232910, avg loss: 2.647968, ppl: 14.125311 +epoch: 0, batch: 31602, sum loss: 5352.620117, avg loss: 3.253872, ppl: 25.890398 +epoch: 0, batch: 31603, sum loss: 4927.347168, avg loss: 3.104819, ppl: 22.305178 +epoch: 0, batch: 31604, sum loss: 4597.918945, avg loss: 2.699894, ppl: 14.878150 +epoch: 0, batch: 31605, sum loss: 5485.993164, avg loss: 3.124142, ppl: 22.740374 +epoch: 0, batch: 31606, sum loss: 5012.409668, avg loss: 2.808073, ppl: 16.577938 +epoch: 0, batch: 31607, sum loss: 5335.714844, avg loss: 3.009428, ppl: 20.275789 +epoch: 0, batch: 31608, sum loss: 5290.628906, avg loss: 2.900564, ppl: 18.184402 +epoch: 0, batch: 31609, sum loss: 5129.629395, avg loss: 3.307304, ppl: 27.311405 +epoch: 0, batch: 31610, sum loss: 4529.899414, avg loss: 2.772276, ppl: 15.995004 +epoch: 0, batch: 31611, sum loss: 5178.460449, avg loss: 3.180872, ppl: 24.067741 +epoch: 0, batch: 31612, sum loss: 4100.472656, avg loss: 2.857472, ppl: 17.417442 +epoch: 0, batch: 31613, sum loss: 4525.183105, avg loss: 2.961507, ppl: 19.327082 +epoch: 0, batch: 31614, sum loss: 4715.992188, avg loss: 2.891473, ppl: 18.019831 +epoch: 0, batch: 31615, sum loss: 4342.339844, avg loss: 3.000926, ppl: 20.104145 +epoch: 0, batch: 31616, sum loss: 5177.941406, avg loss: 3.024498, ppl: 20.583679 +epoch: 0, batch: 31617, sum loss: 5129.376953, avg loss: 2.941156, ppl: 18.937733 +epoch: 0, batch: 31618, sum loss: 6038.290039, avg loss: 2.992215, ppl: 19.929781 +epoch: 0, batch: 31619, sum loss: 5614.801270, avg loss: 3.078290, ppl: 21.721231 +epoch: 0, batch: 31620, sum loss: 5715.237305, avg loss: 3.072708, ppl: 21.600325 +epoch: 0, batch: 31621, sum loss: 4174.533203, avg loss: 2.495238, ppl: 12.124619 +epoch: 0, batch: 31622, sum loss: 4988.834473, avg loss: 2.922575, ppl: 18.589085 +epoch: 0, batch: 31623, sum loss: 5288.700195, avg loss: 2.998129, ppl: 20.048000 +epoch: 0, batch: 31624, sum loss: 5815.255859, avg loss: 3.239697, ppl: 25.525986 +epoch: 0, batch: 31625, sum loss: 4380.098633, avg loss: 2.906502, ppl: 18.292698 +epoch: 0, batch: 31626, sum loss: 4742.708984, avg loss: 3.015072, ppl: 20.390566 +epoch: 0, batch: 31627, sum loss: 5476.123535, avg loss: 3.085140, ppl: 21.870523 +epoch: 0, batch: 31628, sum loss: 5204.839844, avg loss: 3.096276, ppl: 22.115435 +epoch: 0, batch: 31629, sum loss: 5266.135254, avg loss: 3.149603, ppl: 23.326794 +epoch: 0, batch: 31630, sum loss: 5172.405273, avg loss: 2.730943, ppl: 15.347348 +epoch: 0, batch: 31631, sum loss: 5194.937988, avg loss: 3.129481, ppl: 22.862108 +epoch: 0, batch: 31632, sum loss: 4991.540039, avg loss: 3.021513, ppl: 20.522327 +epoch: 0, batch: 31633, sum loss: 5064.366211, avg loss: 2.761377, ppl: 15.821620 +epoch: 0, batch: 31634, sum loss: 5602.311523, avg loss: 3.054695, ppl: 21.214722 +epoch: 0, batch: 31635, sum loss: 5344.160156, avg loss: 3.053806, ppl: 21.195858 +epoch: 0, batch: 31636, sum loss: 3855.020752, avg loss: 2.737941, ppl: 15.455131 +epoch: 0, batch: 31637, sum loss: 5702.420898, avg loss: 3.218071, ppl: 24.979876 +epoch: 0, batch: 31638, sum loss: 4999.541016, avg loss: 2.977690, ppl: 19.642385 +epoch: 0, batch: 31639, sum loss: 5607.914062, avg loss: 3.141689, ppl: 23.142912 +epoch: 0, batch: 31640, sum loss: 4485.422852, avg loss: 2.884516, ppl: 17.894913 +epoch: 0, batch: 31641, sum loss: 6803.322266, avg loss: 3.333328, ppl: 28.031469 +epoch: 0, batch: 31642, sum loss: 5059.656250, avg loss: 2.936539, ppl: 18.850487 +epoch: 0, batch: 31643, sum loss: 4286.824707, avg loss: 2.900423, ppl: 18.181831 +epoch: 0, batch: 31644, sum loss: 3742.901123, avg loss: 2.789047, ppl: 16.265511 +epoch: 0, batch: 31645, sum loss: 5558.393066, avg loss: 3.059105, ppl: 21.308472 +epoch: 0, batch: 31646, sum loss: 6156.724121, avg loss: 3.257526, ppl: 25.985168 +epoch: 0, batch: 31647, sum loss: 5762.672363, avg loss: 3.063622, ppl: 21.404936 +epoch: 0, batch: 31648, sum loss: 4511.222168, avg loss: 2.719242, ppl: 15.168818 +epoch: 0, batch: 31649, sum loss: 4489.146484, avg loss: 2.996760, ppl: 20.020563 +epoch: 0, batch: 31650, sum loss: 5016.571777, avg loss: 2.891396, ppl: 18.018442 +epoch: 0, batch: 31651, sum loss: 4509.230957, avg loss: 3.075874, ppl: 21.668810 +epoch: 0, batch: 31652, sum loss: 5376.965820, avg loss: 3.003891, ppl: 20.163851 +epoch: 0, batch: 31653, sum loss: 4147.006348, avg loss: 2.897978, ppl: 18.137432 +epoch: 0, batch: 31654, sum loss: 4804.408691, avg loss: 2.821144, ppl: 16.796061 +epoch: 0, batch: 31655, sum loss: 4980.874023, avg loss: 3.011411, ppl: 20.316050 +epoch: 0, batch: 31656, sum loss: 3773.619385, avg loss: 2.657479, ppl: 14.260287 +epoch: 0, batch: 31657, sum loss: 4996.930176, avg loss: 3.209332, ppl: 24.762545 +epoch: 0, batch: 31658, sum loss: 5377.996582, avg loss: 3.134031, ppl: 22.966366 +epoch: 0, batch: 31659, sum loss: 4898.573730, avg loss: 2.950948, ppl: 19.124075 +epoch: 0, batch: 31660, sum loss: 4771.434570, avg loss: 2.850320, ppl: 17.293308 +epoch: 0, batch: 31661, sum loss: 5925.889648, avg loss: 3.133733, ppl: 22.959534 +epoch: 0, batch: 31662, sum loss: 5662.860840, avg loss: 3.056050, ppl: 21.243481 +epoch: 0, batch: 31663, sum loss: 5303.353516, avg loss: 3.138079, ppl: 23.059525 +epoch: 0, batch: 31664, sum loss: 5480.858887, avg loss: 3.110590, ppl: 22.434271 +epoch: 0, batch: 31665, sum loss: 5749.745117, avg loss: 3.024590, ppl: 20.585558 +epoch: 0, batch: 31666, sum loss: 5233.816406, avg loss: 2.989044, ppl: 19.866690 +epoch: 0, batch: 31667, sum loss: 6084.936523, avg loss: 3.267957, ppl: 26.257652 +epoch: 0, batch: 31668, sum loss: 4660.374512, avg loss: 2.760885, ppl: 15.813840 +epoch: 0, batch: 31669, sum loss: 4626.904297, avg loss: 2.755750, ppl: 15.732835 +epoch: 0, batch: 31670, sum loss: 6502.482910, avg loss: 3.073007, ppl: 21.606779 +epoch: 0, batch: 31671, sum loss: 4561.582520, avg loss: 2.635230, ppl: 13.946514 +epoch: 0, batch: 31672, sum loss: 5081.761719, avg loss: 3.039331, ppl: 20.891272 +epoch: 0, batch: 31673, sum loss: 4641.157715, avg loss: 2.857856, ppl: 17.424126 +epoch: 0, batch: 31674, sum loss: 5125.961426, avg loss: 2.962983, ppl: 19.355631 +epoch: 0, batch: 31675, sum loss: 4707.309570, avg loss: 2.854645, ppl: 17.368271 +epoch: 0, batch: 31676, sum loss: 4672.062500, avg loss: 3.008411, ppl: 20.255192 +epoch: 0, batch: 31677, sum loss: 4733.422852, avg loss: 2.753591, ppl: 15.698907 +epoch: 0, batch: 31678, sum loss: 5349.709473, avg loss: 2.973713, ppl: 19.564426 +epoch: 0, batch: 31679, sum loss: 4561.596680, avg loss: 2.710396, ppl: 15.035229 +epoch: 0, batch: 31680, sum loss: 4747.589355, avg loss: 2.705179, ppl: 14.956993 +epoch: 0, batch: 31681, sum loss: 4646.309570, avg loss: 3.026912, ppl: 20.633413 +epoch: 0, batch: 31682, sum loss: 4434.870605, avg loss: 2.712459, ppl: 15.066279 +epoch: 0, batch: 31683, sum loss: 4699.941406, avg loss: 2.872825, ppl: 17.686911 +epoch: 0, batch: 31684, sum loss: 4838.767578, avg loss: 2.772933, ppl: 16.005503 +epoch: 0, batch: 31685, sum loss: 4817.961914, avg loss: 2.806035, ppl: 16.544188 +epoch: 0, batch: 31686, sum loss: 5680.637695, avg loss: 3.302696, ppl: 27.185839 +epoch: 0, batch: 31687, sum loss: 4876.838867, avg loss: 2.667855, ppl: 14.409029 +epoch: 0, batch: 31688, sum loss: 5420.807617, avg loss: 3.110044, ppl: 22.422041 +epoch: 0, batch: 31689, sum loss: 5461.207520, avg loss: 3.075004, ppl: 21.649977 +epoch: 0, batch: 31690, sum loss: 5004.248047, avg loss: 2.864481, ppl: 17.539944 +epoch: 0, batch: 31691, sum loss: 4668.497559, avg loss: 2.865867, ppl: 17.564278 +epoch: 0, batch: 31692, sum loss: 4751.032715, avg loss: 2.988071, ppl: 19.847355 +epoch: 0, batch: 31693, sum loss: 5552.669434, avg loss: 3.213350, ppl: 24.862244 +epoch: 0, batch: 31694, sum loss: 5122.569336, avg loss: 3.117815, ppl: 22.596941 +epoch: 0, batch: 31695, sum loss: 5569.335938, avg loss: 3.065127, ppl: 21.437187 +epoch: 0, batch: 31696, sum loss: 4222.261719, avg loss: 2.680801, ppl: 14.596780 +epoch: 0, batch: 31697, sum loss: 5285.108887, avg loss: 3.026981, ppl: 20.634844 +epoch: 0, batch: 31698, sum loss: 4489.365234, avg loss: 2.911391, ppl: 18.382355 +epoch: 0, batch: 31699, sum loss: 5333.505371, avg loss: 3.008181, ppl: 20.250538 +epoch: 0, batch: 31700, sum loss: 4769.407227, avg loss: 2.670441, ppl: 14.446338 +epoch: 0, batch: 31701, sum loss: 4081.291992, avg loss: 2.968212, ppl: 19.457106 +epoch: 0, batch: 31702, sum loss: 5684.958984, avg loss: 2.990510, ppl: 19.895823 +epoch: 0, batch: 31703, sum loss: 5316.685059, avg loss: 3.210559, ppl: 24.792933 +epoch: 0, batch: 31704, sum loss: 4007.682129, avg loss: 2.470827, ppl: 11.832232 +epoch: 0, batch: 31705, sum loss: 5096.214844, avg loss: 2.905482, ppl: 18.274042 +epoch: 0, batch: 31706, sum loss: 5487.750000, avg loss: 3.015247, ppl: 20.394135 +epoch: 0, batch: 31707, sum loss: 4385.078125, avg loss: 2.716901, ppl: 15.133352 +epoch: 0, batch: 31708, sum loss: 5449.958008, avg loss: 3.124976, ppl: 22.759342 +epoch: 0, batch: 31709, sum loss: 5809.724609, avg loss: 3.333175, ppl: 28.027199 +epoch: 0, batch: 31710, sum loss: 6103.332520, avg loss: 2.925854, ppl: 18.650154 +epoch: 0, batch: 31711, sum loss: 4943.375000, avg loss: 2.994170, ppl: 19.968788 +epoch: 0, batch: 31712, sum loss: 5088.799805, avg loss: 3.060012, ppl: 21.327810 +epoch: 0, batch: 31713, sum loss: 4401.933594, avg loss: 2.680837, ppl: 14.597301 +epoch: 0, batch: 31714, sum loss: 5452.776367, avg loss: 2.882017, ppl: 17.850243 +epoch: 0, batch: 31715, sum loss: 4716.882812, avg loss: 2.848359, ppl: 17.259438 +epoch: 0, batch: 31716, sum loss: 5208.907715, avg loss: 2.997070, ppl: 20.026773 +epoch: 0, batch: 31717, sum loss: 4599.708984, avg loss: 2.856962, ppl: 17.408558 +epoch: 0, batch: 31718, sum loss: 6453.922852, avg loss: 3.183978, ppl: 24.142593 +epoch: 0, batch: 31719, sum loss: 5061.218750, avg loss: 2.814916, ppl: 16.691769 +epoch: 0, batch: 31720, sum loss: 5022.405762, avg loss: 3.111776, ppl: 22.460892 +epoch: 0, batch: 31721, sum loss: 5327.886230, avg loss: 3.016923, ppl: 20.428335 +epoch: 0, batch: 31722, sum loss: 4633.294922, avg loss: 3.000839, ppl: 20.102390 +epoch: 0, batch: 31723, sum loss: 5193.113281, avg loss: 3.166533, ppl: 23.725075 +epoch: 0, batch: 31724, sum loss: 5829.787109, avg loss: 3.057046, ppl: 21.264652 +epoch: 0, batch: 31725, sum loss: 5259.537109, avg loss: 3.176049, ppl: 23.951933 +epoch: 0, batch: 31726, sum loss: 5882.489258, avg loss: 3.099310, ppl: 22.182632 +epoch: 0, batch: 31727, sum loss: 5383.723145, avg loss: 3.016092, ppl: 20.411360 +epoch: 0, batch: 31728, sum loss: 4594.942383, avg loss: 2.736714, ppl: 15.436172 +epoch: 0, batch: 31729, sum loss: 4894.878418, avg loss: 3.049768, ppl: 21.110456 +epoch: 0, batch: 31730, sum loss: 6061.536621, avg loss: 3.066028, ppl: 21.456501 +epoch: 0, batch: 31731, sum loss: 5617.598145, avg loss: 3.175579, ppl: 23.940668 +epoch: 0, batch: 31732, sum loss: 6106.522461, avg loss: 3.438357, ppl: 31.135771 +epoch: 0, batch: 31733, sum loss: 5591.184570, avg loss: 3.041994, ppl: 20.946962 +epoch: 0, batch: 31734, sum loss: 4755.511230, avg loss: 3.102095, ppl: 22.244503 +epoch: 0, batch: 31735, sum loss: 5213.026855, avg loss: 2.963631, ppl: 19.368174 +epoch: 0, batch: 31736, sum loss: 4601.039551, avg loss: 2.620182, ppl: 13.738224 +epoch: 0, batch: 31737, sum loss: 3696.107910, avg loss: 2.651440, ppl: 14.174440 +epoch: 0, batch: 31738, sum loss: 4431.330078, avg loss: 2.855239, ppl: 17.378584 +epoch: 0, batch: 31739, sum loss: 4660.125000, avg loss: 2.996865, ppl: 20.022669 +epoch: 0, batch: 31740, sum loss: 5023.499512, avg loss: 3.097102, ppl: 22.133713 +epoch: 0, batch: 31741, sum loss: 4676.375977, avg loss: 2.915447, ppl: 18.457052 +epoch: 0, batch: 31742, sum loss: 4541.873535, avg loss: 2.810565, ppl: 16.619308 +epoch: 0, batch: 31743, sum loss: 6048.880371, avg loss: 3.111564, ppl: 22.456142 +epoch: 0, batch: 31744, sum loss: 5249.719727, avg loss: 2.974345, ppl: 19.576805 +epoch: 0, batch: 31745, sum loss: 5724.894043, avg loss: 2.931333, ppl: 18.752617 +epoch: 0, batch: 31746, sum loss: 4806.308594, avg loss: 3.047754, ppl: 21.067978 +epoch: 0, batch: 31747, sum loss: 5228.011230, avg loss: 3.028975, ppl: 20.676033 +epoch: 0, batch: 31748, sum loss: 4270.472656, avg loss: 2.746285, ppl: 15.584623 +epoch: 0, batch: 31749, sum loss: 6357.755371, avg loss: 3.174117, ppl: 23.905693 +epoch: 0, batch: 31750, sum loss: 4690.932617, avg loss: 2.788902, ppl: 16.263145 +epoch: 0, batch: 31751, sum loss: 5052.716797, avg loss: 2.980954, ppl: 19.706606 +epoch: 0, batch: 31752, sum loss: 4695.537598, avg loss: 2.771864, ppl: 15.988408 +epoch: 0, batch: 31753, sum loss: 4532.781250, avg loss: 3.119602, ppl: 22.637362 +epoch: 0, batch: 31754, sum loss: 5188.878418, avg loss: 3.122069, ppl: 22.693281 +epoch: 0, batch: 31755, sum loss: 5655.605957, avg loss: 2.967264, ppl: 19.438665 +epoch: 0, batch: 31756, sum loss: 4273.306641, avg loss: 2.959353, ppl: 19.285500 +epoch: 0, batch: 31757, sum loss: 4597.112305, avg loss: 2.928097, ppl: 18.692026 +epoch: 0, batch: 31758, sum loss: 6603.609375, avg loss: 3.325080, ppl: 27.801228 +epoch: 0, batch: 31759, sum loss: 4763.044922, avg loss: 2.806744, ppl: 16.555929 +epoch: 0, batch: 31760, sum loss: 5110.300293, avg loss: 3.084068, ppl: 21.847086 +epoch: 0, batch: 31761, sum loss: 5027.491211, avg loss: 2.936619, ppl: 18.851992 +epoch: 0, batch: 31762, sum loss: 4635.530273, avg loss: 2.760888, ppl: 15.813873 +epoch: 0, batch: 31763, sum loss: 5317.219727, avg loss: 2.990562, ppl: 19.896870 +epoch: 0, batch: 31764, sum loss: 3535.919922, avg loss: 2.560405, ppl: 12.941064 +epoch: 0, batch: 31765, sum loss: 6368.875488, avg loss: 3.231291, ppl: 25.312321 +epoch: 0, batch: 31766, sum loss: 5208.354492, avg loss: 3.074589, ppl: 21.640991 +epoch: 0, batch: 31767, sum loss: 4552.185059, avg loss: 2.753893, ppl: 15.703646 +epoch: 0, batch: 31768, sum loss: 4104.431152, avg loss: 2.738113, ppl: 15.457788 +epoch: 0, batch: 31769, sum loss: 5225.216797, avg loss: 2.882083, ppl: 17.851427 +epoch: 0, batch: 31770, sum loss: 5546.844727, avg loss: 3.173252, ppl: 23.885040 +epoch: 0, batch: 31771, sum loss: 3973.292969, avg loss: 2.640062, ppl: 14.014071 +epoch: 0, batch: 31772, sum loss: 4515.135742, avg loss: 3.077802, ppl: 21.710629 +epoch: 0, batch: 31773, sum loss: 4956.666504, avg loss: 2.888500, ppl: 17.966349 +epoch: 0, batch: 31774, sum loss: 4351.234375, avg loss: 2.687606, ppl: 14.696455 +epoch: 0, batch: 31775, sum loss: 5207.677734, avg loss: 2.948855, ppl: 19.084089 +epoch: 0, batch: 31776, sum loss: 6163.946289, avg loss: 3.283935, ppl: 26.680563 +epoch: 0, batch: 31777, sum loss: 6027.070312, avg loss: 3.257876, ppl: 25.994265 +epoch: 0, batch: 31778, sum loss: 5311.762207, avg loss: 2.969124, ppl: 19.474844 +epoch: 0, batch: 31779, sum loss: 4894.404297, avg loss: 2.927275, ppl: 18.676676 +epoch: 0, batch: 31780, sum loss: 5713.026855, avg loss: 3.130426, ppl: 22.883718 +epoch: 0, batch: 31781, sum loss: 5298.244629, avg loss: 2.961568, ppl: 19.328247 +epoch: 0, batch: 31782, sum loss: 4982.682617, avg loss: 2.985430, ppl: 19.795012 +epoch: 0, batch: 31783, sum loss: 4915.486328, avg loss: 2.841321, ppl: 17.138399 +epoch: 0, batch: 31784, sum loss: 5125.714844, avg loss: 3.343585, ppl: 28.320461 +epoch: 0, batch: 31785, sum loss: 4870.836914, avg loss: 2.950234, ppl: 19.110434 +epoch: 0, batch: 31786, sum loss: 4484.259277, avg loss: 2.666028, ppl: 14.382728 +epoch: 0, batch: 31787, sum loss: 4390.440918, avg loss: 2.827071, ppl: 16.895899 +epoch: 0, batch: 31788, sum loss: 5887.858887, avg loss: 3.148588, ppl: 23.303131 +epoch: 0, batch: 31789, sum loss: 4836.908691, avg loss: 2.835234, ppl: 17.034380 +epoch: 0, batch: 31790, sum loss: 5249.033691, avg loss: 3.118855, ppl: 22.620470 +epoch: 0, batch: 31791, sum loss: 5248.075195, avg loss: 3.009217, ppl: 20.271526 +epoch: 0, batch: 31792, sum loss: 4158.869141, avg loss: 2.618935, ppl: 13.721107 +epoch: 0, batch: 31793, sum loss: 5549.143066, avg loss: 3.020764, ppl: 20.506950 +epoch: 0, batch: 31794, sum loss: 4979.033691, avg loss: 2.942691, ppl: 18.966824 +epoch: 0, batch: 31795, sum loss: 5338.550293, avg loss: 2.934882, ppl: 18.819281 +epoch: 0, batch: 31796, sum loss: 4718.110352, avg loss: 3.162272, ppl: 23.624220 +epoch: 0, batch: 31797, sum loss: 4933.799316, avg loss: 2.850260, ppl: 17.292269 +epoch: 0, batch: 31798, sum loss: 4814.401367, avg loss: 2.852134, ppl: 17.324705 +epoch: 0, batch: 31799, sum loss: 4290.777832, avg loss: 2.809940, ppl: 16.608915 +epoch: 0, batch: 31800, sum loss: 4639.564453, avg loss: 2.719557, ppl: 15.173599 +epoch: 0, batch: 31801, sum loss: 3763.273438, avg loss: 2.495539, ppl: 12.128274 +epoch: 0, batch: 31802, sum loss: 6000.306641, avg loss: 3.213876, ppl: 24.875317 +epoch: 0, batch: 31803, sum loss: 4988.658203, avg loss: 2.815270, ppl: 16.697683 +epoch: 0, batch: 31804, sum loss: 6234.500977, avg loss: 3.090977, ppl: 21.998569 +epoch: 0, batch: 31805, sum loss: 4532.082031, avg loss: 2.653444, ppl: 14.202867 +epoch: 0, batch: 31806, sum loss: 5080.020508, avg loss: 3.002376, ppl: 20.133318 +epoch: 0, batch: 31807, sum loss: 5927.755371, avg loss: 3.110050, ppl: 22.422165 +epoch: 0, batch: 31808, sum loss: 4428.144043, avg loss: 2.809736, ppl: 16.605534 +epoch: 0, batch: 31809, sum loss: 4727.596680, avg loss: 3.079868, ppl: 21.755522 +epoch: 0, batch: 31810, sum loss: 4275.279297, avg loss: 2.788832, ppl: 16.262014 +epoch: 0, batch: 31811, sum loss: 4291.260742, avg loss: 2.843778, ppl: 17.180550 +epoch: 0, batch: 31812, sum loss: 4874.338379, avg loss: 2.983071, ppl: 19.748377 +epoch: 0, batch: 31813, sum loss: 5610.366211, avg loss: 3.059087, ppl: 21.308100 +epoch: 0, batch: 31814, sum loss: 5196.259766, avg loss: 3.164592, ppl: 23.679081 +epoch: 0, batch: 31815, sum loss: 5438.979980, avg loss: 2.919474, ppl: 18.531536 +epoch: 0, batch: 31816, sum loss: 5659.205566, avg loss: 3.080678, ppl: 21.773165 +epoch: 0, batch: 31817, sum loss: 5288.138184, avg loss: 2.960884, ppl: 19.315037 +epoch: 0, batch: 31818, sum loss: 4621.830566, avg loss: 2.872486, ppl: 17.680925 +epoch: 0, batch: 31819, sum loss: 4727.444824, avg loss: 3.034304, ppl: 20.786499 +epoch: 0, batch: 31820, sum loss: 4657.639648, avg loss: 2.887563, ppl: 17.949522 +epoch: 0, batch: 31821, sum loss: 4823.177246, avg loss: 3.076006, ppl: 21.671671 +epoch: 0, batch: 31822, sum loss: 5150.319336, avg loss: 2.862879, ppl: 17.511871 +epoch: 0, batch: 31823, sum loss: 4433.562500, avg loss: 2.934191, ppl: 18.806282 +epoch: 0, batch: 31824, sum loss: 6294.995117, avg loss: 3.332448, ppl: 28.006832 +epoch: 0, batch: 31825, sum loss: 4513.369629, avg loss: 2.833251, ppl: 17.000648 +epoch: 0, batch: 31826, sum loss: 5363.329102, avg loss: 3.040436, ppl: 20.914360 +epoch: 0, batch: 31827, sum loss: 4218.175781, avg loss: 2.746208, ppl: 15.583430 +epoch: 0, batch: 31828, sum loss: 5175.435059, avg loss: 2.733986, ppl: 15.394120 +epoch: 0, batch: 31829, sum loss: 4639.166504, avg loss: 2.784614, ppl: 16.193560 +epoch: 0, batch: 31830, sum loss: 5323.155273, avg loss: 3.005734, ppl: 20.201042 +epoch: 0, batch: 31831, sum loss: 4526.528809, avg loss: 2.813256, ppl: 16.664089 +epoch: 0, batch: 31832, sum loss: 4395.206055, avg loss: 2.870807, ppl: 17.651264 +epoch: 0, batch: 31833, sum loss: 5400.982422, avg loss: 3.096894, ppl: 22.129101 +epoch: 0, batch: 31834, sum loss: 4565.717773, avg loss: 2.663779, ppl: 14.350421 +epoch: 0, batch: 31835, sum loss: 4861.223633, avg loss: 2.903957, ppl: 18.246201 +epoch: 0, batch: 31836, sum loss: 4591.944336, avg loss: 2.937904, ppl: 18.876247 +epoch: 0, batch: 31837, sum loss: 4634.582031, avg loss: 2.918503, ppl: 18.513548 +epoch: 0, batch: 31838, sum loss: 5042.899414, avg loss: 3.126410, ppl: 22.792009 +epoch: 0, batch: 31839, sum loss: 4804.781738, avg loss: 3.037156, ppl: 20.845881 +epoch: 0, batch: 31840, sum loss: 4459.042480, avg loss: 2.806194, ppl: 16.546822 +epoch: 0, batch: 31841, sum loss: 4957.266113, avg loss: 2.829490, ppl: 16.936819 +epoch: 0, batch: 31842, sum loss: 5759.280762, avg loss: 3.222877, ppl: 25.100224 +epoch: 0, batch: 31843, sum loss: 5773.013672, avg loss: 3.312113, ppl: 27.443060 +epoch: 0, batch: 31844, sum loss: 4258.337891, avg loss: 2.982029, ppl: 19.727812 +epoch: 0, batch: 31845, sum loss: 4416.498047, avg loss: 2.748288, ppl: 15.615870 +epoch: 0, batch: 31846, sum loss: 5022.362793, avg loss: 2.919978, ppl: 18.540886 +epoch: 0, batch: 31847, sum loss: 5212.211914, avg loss: 3.319880, ppl: 27.657038 +epoch: 0, batch: 31848, sum loss: 5986.447754, avg loss: 3.257044, ppl: 25.972656 +epoch: 0, batch: 31849, sum loss: 4433.181152, avg loss: 2.850920, ppl: 17.303701 +epoch: 0, batch: 31850, sum loss: 5518.495605, avg loss: 2.965339, ppl: 19.401278 +epoch: 0, batch: 31851, sum loss: 4694.371094, avg loss: 3.088402, ppl: 21.941992 +epoch: 0, batch: 31852, sum loss: 5732.780273, avg loss: 3.313746, ppl: 27.487896 +epoch: 0, batch: 31853, sum loss: 4147.667480, avg loss: 2.665596, ppl: 14.376515 +epoch: 0, batch: 31854, sum loss: 5927.077148, avg loss: 3.318632, ppl: 27.622547 +epoch: 0, batch: 31855, sum loss: 4984.270508, avg loss: 3.020770, ppl: 20.507078 +epoch: 0, batch: 31856, sum loss: 4942.894531, avg loss: 2.840744, ppl: 17.128504 +epoch: 0, batch: 31857, sum loss: 5188.281738, avg loss: 2.929577, ppl: 18.719717 +epoch: 0, batch: 31858, sum loss: 4897.078613, avg loss: 3.047342, ppl: 21.059296 +epoch: 0, batch: 31859, sum loss: 4936.847656, avg loss: 2.825900, ppl: 16.876127 +epoch: 0, batch: 31860, sum loss: 4958.848633, avg loss: 2.883052, ppl: 17.868719 +epoch: 0, batch: 31861, sum loss: 5439.002930, avg loss: 3.142116, ppl: 23.152807 +epoch: 0, batch: 31862, sum loss: 4488.667480, avg loss: 2.662318, ppl: 14.329463 +epoch: 0, batch: 31863, sum loss: 4104.429199, avg loss: 2.754651, ppl: 15.715549 +epoch: 0, batch: 31864, sum loss: 3954.457031, avg loss: 2.653998, ppl: 14.210742 +epoch: 0, batch: 31865, sum loss: 5568.127441, avg loss: 3.258120, ppl: 26.000618 +epoch: 0, batch: 31866, sum loss: 5324.801270, avg loss: 3.018595, ppl: 20.462521 +epoch: 0, batch: 31867, sum loss: 4034.001465, avg loss: 2.599228, ppl: 13.453347 +epoch: 0, batch: 31868, sum loss: 6058.560547, avg loss: 3.262553, ppl: 26.116119 +epoch: 0, batch: 31869, sum loss: 4529.847168, avg loss: 3.013870, ppl: 20.366070 +epoch: 0, batch: 31870, sum loss: 4916.294922, avg loss: 3.025412, ppl: 20.602497 +epoch: 0, batch: 31871, sum loss: 4282.870605, avg loss: 2.899709, ppl: 18.168865 +epoch: 0, batch: 31872, sum loss: 4864.176270, avg loss: 2.677037, ppl: 14.541942 +epoch: 0, batch: 31873, sum loss: 5469.898926, avg loss: 3.167284, ppl: 23.742905 +epoch: 0, batch: 31874, sum loss: 5633.714844, avg loss: 3.009463, ppl: 20.276514 +epoch: 0, batch: 31875, sum loss: 5996.372070, avg loss: 3.385868, ppl: 29.543621 +epoch: 0, batch: 31876, sum loss: 5095.203125, avg loss: 2.936717, ppl: 18.853840 +epoch: 0, batch: 31877, sum loss: 5047.357910, avg loss: 2.979550, ppl: 19.678961 +epoch: 0, batch: 31878, sum loss: 5052.624512, avg loss: 2.897147, ppl: 18.122368 +epoch: 0, batch: 31879, sum loss: 5085.300293, avg loss: 3.085741, ppl: 21.883667 +epoch: 0, batch: 31880, sum loss: 5058.208984, avg loss: 3.163358, ppl: 23.649872 +epoch: 0, batch: 31881, sum loss: 4706.137207, avg loss: 2.912214, ppl: 18.397478 +epoch: 0, batch: 31882, sum loss: 4949.834961, avg loss: 3.063017, ppl: 21.391993 +epoch: 0, batch: 31883, sum loss: 6385.813965, avg loss: 3.345110, ppl: 28.363688 +epoch: 0, batch: 31884, sum loss: 5672.905273, avg loss: 3.127291, ppl: 22.812092 +epoch: 0, batch: 31885, sum loss: 4565.301758, avg loss: 2.797366, ppl: 16.401394 +epoch: 0, batch: 31886, sum loss: 5389.809082, avg loss: 3.021194, ppl: 20.515762 +epoch: 0, batch: 31887, sum loss: 4495.469727, avg loss: 2.768146, ppl: 15.929078 +epoch: 0, batch: 31888, sum loss: 3900.162354, avg loss: 2.675009, ppl: 14.512474 +epoch: 0, batch: 31889, sum loss: 5394.655273, avg loss: 3.228400, ppl: 25.239229 +epoch: 0, batch: 31890, sum loss: 4951.728516, avg loss: 2.977588, ppl: 19.640387 +epoch: 0, batch: 31891, sum loss: 5209.994629, avg loss: 2.902504, ppl: 18.219709 +epoch: 0, batch: 31892, sum loss: 5025.654297, avg loss: 2.779676, ppl: 16.113798 +epoch: 0, batch: 31893, sum loss: 3982.793701, avg loss: 2.674811, ppl: 14.509609 +epoch: 0, batch: 31894, sum loss: 4661.119629, avg loss: 2.891513, ppl: 18.020561 +epoch: 0, batch: 31895, sum loss: 5675.226074, avg loss: 2.962018, ppl: 19.336950 +epoch: 0, batch: 31896, sum loss: 4781.810547, avg loss: 2.968225, ppl: 19.457357 +epoch: 0, batch: 31897, sum loss: 5075.680664, avg loss: 2.838748, ppl: 17.094341 +epoch: 0, batch: 31898, sum loss: 5176.232910, avg loss: 3.014696, ppl: 20.382891 +epoch: 0, batch: 31899, sum loss: 5477.555664, avg loss: 3.168048, ppl: 23.761066 +epoch: 0, batch: 31900, sum loss: 4804.072266, avg loss: 3.023330, ppl: 20.559650 +epoch: 0, batch: 31901, sum loss: 5281.120605, avg loss: 2.874862, ppl: 17.722973 +epoch: 0, batch: 31902, sum loss: 4197.999512, avg loss: 2.865529, ppl: 17.558332 +epoch: 0, batch: 31903, sum loss: 5097.500488, avg loss: 3.021636, ppl: 20.524853 +epoch: 0, batch: 31904, sum loss: 6190.898926, avg loss: 3.063285, ppl: 21.397736 +epoch: 0, batch: 31905, sum loss: 4652.560547, avg loss: 2.948391, ppl: 19.075232 +epoch: 0, batch: 31906, sum loss: 4807.337402, avg loss: 2.982219, ppl: 19.731556 +epoch: 0, batch: 31907, sum loss: 4272.879883, avg loss: 2.798219, ppl: 16.415380 +epoch: 0, batch: 31908, sum loss: 4741.148926, avg loss: 2.724798, ppl: 15.253336 +epoch: 0, batch: 31909, sum loss: 4822.056641, avg loss: 2.934910, ppl: 18.819801 +epoch: 0, batch: 31910, sum loss: 4483.712402, avg loss: 3.058467, ppl: 21.294886 +epoch: 0, batch: 31911, sum loss: 6186.690918, avg loss: 3.088712, ppl: 21.948799 +epoch: 0, batch: 31912, sum loss: 5038.199219, avg loss: 3.015080, ppl: 20.390732 +epoch: 0, batch: 31913, sum loss: 3876.765381, avg loss: 2.669949, ppl: 14.439227 +epoch: 0, batch: 31914, sum loss: 5091.281738, avg loss: 3.057827, ppl: 21.281263 +epoch: 0, batch: 31915, sum loss: 5160.769043, avg loss: 3.282932, ppl: 26.653809 +epoch: 0, batch: 31916, sum loss: 4650.939941, avg loss: 2.676030, ppl: 14.527304 +epoch: 0, batch: 31917, sum loss: 5093.325195, avg loss: 2.959515, ppl: 19.288612 +epoch: 0, batch: 31918, sum loss: 4461.386719, avg loss: 2.898887, ppl: 18.153923 +epoch: 0, batch: 31919, sum loss: 5402.514648, avg loss: 3.288201, ppl: 26.794619 +epoch: 0, batch: 31920, sum loss: 4884.741211, avg loss: 3.076034, ppl: 21.672270 +epoch: 0, batch: 31921, sum loss: 5843.015625, avg loss: 3.226403, ppl: 25.188889 +epoch: 0, batch: 31922, sum loss: 4432.392578, avg loss: 2.798228, ppl: 16.415525 +epoch: 0, batch: 31923, sum loss: 4922.683105, avg loss: 2.921474, ppl: 18.568632 +epoch: 0, batch: 31924, sum loss: 4485.213379, avg loss: 2.792785, ppl: 16.326433 +epoch: 0, batch: 31925, sum loss: 6377.502930, avg loss: 3.257152, ppl: 25.975449 +epoch: 0, batch: 31926, sum loss: 5638.138184, avg loss: 3.139275, ppl: 23.087130 +epoch: 0, batch: 31927, sum loss: 4608.711914, avg loss: 2.749828, ppl: 15.639943 +epoch: 0, batch: 31928, sum loss: 4397.979004, avg loss: 2.740174, ppl: 15.489677 +epoch: 0, batch: 31929, sum loss: 4695.254395, avg loss: 2.830171, ppl: 16.948364 +epoch: 0, batch: 31930, sum loss: 5398.922852, avg loss: 2.798820, ppl: 16.425245 +epoch: 0, batch: 31931, sum loss: 4496.914062, avg loss: 2.882637, ppl: 17.861317 +epoch: 0, batch: 31932, sum loss: 5016.528320, avg loss: 2.785413, ppl: 16.206503 +epoch: 0, batch: 31933, sum loss: 5181.049805, avg loss: 2.943778, ppl: 18.987452 +epoch: 0, batch: 31934, sum loss: 5034.923340, avg loss: 2.918796, ppl: 18.518982 +epoch: 0, batch: 31935, sum loss: 5389.863770, avg loss: 2.946891, ppl: 19.046652 +epoch: 0, batch: 31936, sum loss: 5151.037598, avg loss: 2.696878, ppl: 14.833356 +epoch: 0, batch: 31937, sum loss: 4916.296875, avg loss: 2.966987, ppl: 19.433271 +epoch: 0, batch: 31938, sum loss: 4462.916504, avg loss: 2.780633, ppl: 16.129230 +epoch: 0, batch: 31939, sum loss: 4866.099121, avg loss: 2.899940, ppl: 18.173050 +epoch: 0, batch: 31940, sum loss: 5172.890137, avg loss: 2.930816, ppl: 18.742918 +epoch: 0, batch: 31941, sum loss: 4703.337402, avg loss: 2.910481, ppl: 18.365629 +epoch: 0, batch: 31942, sum loss: 6537.087891, avg loss: 3.402961, ppl: 30.052956 +epoch: 0, batch: 31943, sum loss: 5209.657715, avg loss: 2.938329, ppl: 18.884274 +epoch: 0, batch: 31944, sum loss: 5064.383789, avg loss: 3.118463, ppl: 22.611595 +epoch: 0, batch: 31945, sum loss: 4789.572754, avg loss: 2.897503, ppl: 18.128824 +epoch: 0, batch: 31946, sum loss: 5860.171387, avg loss: 3.150630, ppl: 23.350765 +epoch: 0, batch: 31947, sum loss: 4997.796875, avg loss: 2.890571, ppl: 18.003584 +epoch: 0, batch: 31948, sum loss: 4378.915527, avg loss: 2.825107, ppl: 16.862747 +epoch: 0, batch: 31949, sum loss: 4428.790039, avg loss: 2.817297, ppl: 16.731556 +epoch: 0, batch: 31950, sum loss: 5174.370117, avg loss: 2.801500, ppl: 16.469334 +epoch: 0, batch: 31951, sum loss: 5024.424805, avg loss: 3.014052, ppl: 20.369770 +epoch: 0, batch: 31952, sum loss: 5427.299316, avg loss: 3.146261, ppl: 23.248968 +epoch: 0, batch: 31953, sum loss: 5940.746094, avg loss: 3.237464, ppl: 25.469049 +epoch: 0, batch: 31954, sum loss: 4451.672852, avg loss: 2.913399, ppl: 18.419291 +epoch: 0, batch: 31955, sum loss: 4524.676270, avg loss: 2.760632, ppl: 15.809833 +epoch: 0, batch: 31956, sum loss: 4686.447266, avg loss: 2.718357, ppl: 15.155403 +epoch: 0, batch: 31957, sum loss: 5152.146484, avg loss: 2.892839, ppl: 18.044468 +epoch: 0, batch: 31958, sum loss: 4988.875000, avg loss: 2.957247, ppl: 19.244913 +epoch: 0, batch: 31959, sum loss: 5208.001953, avg loss: 3.034966, ppl: 20.800276 +epoch: 0, batch: 31960, sum loss: 4611.427734, avg loss: 2.781320, ppl: 16.140306 +epoch: 0, batch: 31961, sum loss: 4710.848633, avg loss: 2.888319, ppl: 17.963093 +epoch: 0, batch: 31962, sum loss: 4183.183105, avg loss: 3.062360, ppl: 21.377941 +epoch: 0, batch: 31963, sum loss: 4695.391113, avg loss: 2.811611, ppl: 16.636705 +epoch: 0, batch: 31964, sum loss: 5228.838379, avg loss: 3.025948, ppl: 20.613539 +epoch: 0, batch: 31965, sum loss: 6112.674805, avg loss: 3.077883, ppl: 21.712378 +epoch: 0, batch: 31966, sum loss: 4399.694336, avg loss: 2.671338, ppl: 14.459308 +epoch: 0, batch: 31967, sum loss: 4892.845215, avg loss: 2.823338, ppl: 16.832947 +epoch: 0, batch: 31968, sum loss: 4315.492188, avg loss: 2.693815, ppl: 14.787988 +epoch: 0, batch: 31969, sum loss: 4740.688965, avg loss: 2.785364, ppl: 16.205709 +epoch: 0, batch: 31970, sum loss: 5364.952148, avg loss: 2.939700, ppl: 18.910170 +epoch: 0, batch: 31971, sum loss: 5615.166016, avg loss: 3.217860, ppl: 24.974617 +epoch: 0, batch: 31972, sum loss: 5423.216797, avg loss: 3.053613, ppl: 21.191776 +epoch: 0, batch: 31973, sum loss: 5199.856934, avg loss: 2.833710, ppl: 17.008440 +epoch: 0, batch: 31974, sum loss: 5138.395020, avg loss: 3.008428, ppl: 20.255529 +epoch: 0, batch: 31975, sum loss: 5147.348145, avg loss: 3.020744, ppl: 20.506550 +epoch: 0, batch: 31976, sum loss: 5057.279297, avg loss: 3.061307, ppl: 21.355455 +epoch: 0, batch: 31977, sum loss: 4293.665039, avg loss: 2.983784, ppl: 19.762451 +epoch: 0, batch: 31978, sum loss: 4951.500977, avg loss: 3.143810, ppl: 23.192060 +epoch: 0, batch: 31979, sum loss: 5338.273438, avg loss: 3.118150, ppl: 22.604528 +epoch: 0, batch: 31980, sum loss: 4511.716309, avg loss: 2.914546, ppl: 18.440430 +epoch: 0, batch: 31981, sum loss: 5088.164551, avg loss: 3.050458, ppl: 21.125027 +epoch: 0, batch: 31982, sum loss: 5300.362305, avg loss: 3.166286, ppl: 23.719227 +epoch: 0, batch: 31983, sum loss: 5180.867676, avg loss: 3.035072, ppl: 20.802473 +epoch: 0, batch: 31984, sum loss: 4765.907715, avg loss: 2.940103, ppl: 18.917801 +epoch: 0, batch: 31985, sum loss: 5107.430664, avg loss: 3.168381, ppl: 23.768976 +epoch: 0, batch: 31986, sum loss: 4604.639648, avg loss: 2.961183, ppl: 19.320820 +epoch: 0, batch: 31987, sum loss: 5706.114258, avg loss: 3.107905, ppl: 22.374130 +epoch: 0, batch: 31988, sum loss: 5284.865723, avg loss: 2.853599, ppl: 17.350115 +epoch: 0, batch: 31989, sum loss: 4307.637207, avg loss: 2.795352, ppl: 16.368389 +epoch: 0, batch: 31990, sum loss: 5184.893555, avg loss: 2.855118, ppl: 17.376480 +epoch: 0, batch: 31991, sum loss: 5419.214844, avg loss: 3.054800, ppl: 21.216932 +epoch: 0, batch: 31992, sum loss: 4773.781738, avg loss: 2.821384, ppl: 16.800085 +epoch: 0, batch: 31993, sum loss: 4410.087402, avg loss: 2.803616, ppl: 16.504211 +epoch: 0, batch: 31994, sum loss: 4244.405762, avg loss: 2.835274, ppl: 17.035070 +epoch: 0, batch: 31995, sum loss: 4665.594727, avg loss: 2.962282, ppl: 19.342064 +epoch: 0, batch: 31996, sum loss: 4131.821777, avg loss: 2.723679, ppl: 15.236279 +epoch: 0, batch: 31997, sum loss: 5227.915039, avg loss: 2.943646, ppl: 18.984938 +epoch: 0, batch: 31998, sum loss: 4322.854492, avg loss: 2.676690, ppl: 14.536898 +epoch: 0, batch: 31999, sum loss: 5265.789062, avg loss: 3.014189, ppl: 20.372572 +epoch: 0, batch: 32000, sum loss: 4744.902832, avg loss: 3.039656, ppl: 20.898052 +epoch: 0, batch: 32001, sum loss: 4275.686035, avg loss: 2.608716, ppl: 13.581604 +epoch: 0, batch: 32002, sum loss: 5211.225586, avg loss: 2.812318, ppl: 16.648466 +epoch: 0, batch: 32003, sum loss: 4567.427734, avg loss: 2.705822, ppl: 14.966618 +epoch: 0, batch: 32004, sum loss: 5681.672363, avg loss: 3.144257, ppl: 23.202425 +epoch: 0, batch: 32005, sum loss: 4748.149902, avg loss: 2.884660, ppl: 17.897478 +epoch: 0, batch: 32006, sum loss: 4612.390137, avg loss: 2.958557, ppl: 19.270144 +epoch: 0, batch: 32007, sum loss: 4917.242188, avg loss: 3.079050, ppl: 21.737732 +epoch: 0, batch: 32008, sum loss: 4545.907715, avg loss: 2.707509, ppl: 14.991885 +epoch: 0, batch: 32009, sum loss: 4721.851074, avg loss: 3.044391, ppl: 20.997248 +epoch: 0, batch: 32010, sum loss: 5038.633789, avg loss: 3.051868, ppl: 21.154823 +epoch: 0, batch: 32011, sum loss: 4500.697266, avg loss: 2.686984, ppl: 14.687306 +epoch: 0, batch: 32012, sum loss: 4424.524414, avg loss: 2.847184, ppl: 17.239176 +epoch: 0, batch: 32013, sum loss: 5433.496094, avg loss: 3.257492, ppl: 25.984282 +epoch: 0, batch: 32014, sum loss: 5570.774414, avg loss: 2.966333, ppl: 19.420582 +epoch: 0, batch: 32015, sum loss: 4650.029297, avg loss: 3.135556, ppl: 23.001421 +epoch: 0, batch: 32016, sum loss: 3935.021729, avg loss: 2.800727, ppl: 16.456608 +epoch: 0, batch: 32017, sum loss: 5251.721191, avg loss: 2.723922, ppl: 15.239977 +epoch: 0, batch: 32018, sum loss: 4903.818848, avg loss: 2.893108, ppl: 18.049326 +epoch: 0, batch: 32019, sum loss: 5777.357422, avg loss: 3.096118, ppl: 22.111956 +epoch: 0, batch: 32020, sum loss: 5237.873047, avg loss: 3.059505, ppl: 21.317007 +epoch: 0, batch: 32021, sum loss: 4754.965820, avg loss: 2.881798, ppl: 17.846325 +epoch: 0, batch: 32022, sum loss: 5192.018066, avg loss: 3.050539, ppl: 21.126734 +epoch: 0, batch: 32023, sum loss: 4812.008789, avg loss: 3.009386, ppl: 20.274954 +epoch: 0, batch: 32024, sum loss: 5574.589844, avg loss: 3.061280, ppl: 21.354864 +epoch: 0, batch: 32025, sum loss: 4783.990723, avg loss: 3.033602, ppl: 20.771923 +epoch: 0, batch: 32026, sum loss: 4748.506348, avg loss: 2.874399, ppl: 17.714769 +epoch: 0, batch: 32027, sum loss: 5559.553223, avg loss: 3.217334, ppl: 24.961485 +epoch: 0, batch: 32028, sum loss: 4754.737305, avg loss: 2.836956, ppl: 17.063736 +epoch: 0, batch: 32029, sum loss: 5387.740234, avg loss: 3.110704, ppl: 22.436844 +epoch: 0, batch: 32030, sum loss: 5470.639648, avg loss: 2.922350, ppl: 18.584919 +epoch: 0, batch: 32031, sum loss: 4649.481934, avg loss: 3.066941, ppl: 21.476103 +epoch: 0, batch: 32032, sum loss: 5089.550293, avg loss: 2.908314, ppl: 18.325884 +epoch: 0, batch: 32033, sum loss: 5724.084961, avg loss: 3.043107, ppl: 20.970303 +epoch: 0, batch: 32034, sum loss: 5485.548828, avg loss: 2.987772, ppl: 19.841417 +epoch: 0, batch: 32035, sum loss: 5512.195801, avg loss: 2.963546, ppl: 19.366529 +epoch: 0, batch: 32036, sum loss: 4468.167480, avg loss: 2.966911, ppl: 19.431799 +epoch: 0, batch: 32037, sum loss: 4931.064453, avg loss: 3.087705, ppl: 21.926697 +epoch: 0, batch: 32038, sum loss: 4265.378906, avg loss: 3.095340, ppl: 22.094755 +epoch: 0, batch: 32039, sum loss: 4803.540527, avg loss: 2.612040, ppl: 13.626815 +epoch: 0, batch: 32040, sum loss: 4821.959473, avg loss: 3.011842, ppl: 20.324814 +epoch: 0, batch: 32041, sum loss: 6275.043457, avg loss: 3.170815, ppl: 23.826900 +epoch: 0, batch: 32042, sum loss: 4683.820312, avg loss: 2.814796, ppl: 16.689770 +epoch: 0, batch: 32043, sum loss: 5366.358398, avg loss: 2.964839, ppl: 19.391581 +epoch: 0, batch: 32044, sum loss: 4320.985840, avg loss: 2.683842, ppl: 14.641240 +epoch: 0, batch: 32045, sum loss: 5090.081055, avg loss: 2.971442, ppl: 19.520056 +epoch: 0, batch: 32046, sum loss: 5223.216797, avg loss: 2.865177, ppl: 17.552155 +epoch: 0, batch: 32047, sum loss: 4064.445801, avg loss: 2.816664, ppl: 16.720972 +epoch: 0, batch: 32048, sum loss: 5761.874023, avg loss: 3.058320, ppl: 21.291754 +epoch: 0, batch: 32049, sum loss: 4883.721191, avg loss: 3.031484, ppl: 20.727978 +epoch: 0, batch: 32050, sum loss: 5837.393555, avg loss: 3.111617, ppl: 22.457331 +epoch: 0, batch: 32051, sum loss: 5159.069824, avg loss: 3.263169, ppl: 26.132208 +epoch: 0, batch: 32052, sum loss: 4576.774902, avg loss: 2.860484, ppl: 17.469986 +epoch: 0, batch: 32053, sum loss: 5210.200684, avg loss: 2.861176, ppl: 17.482065 +epoch: 0, batch: 32054, sum loss: 4185.665039, avg loss: 2.760993, ppl: 15.815536 +epoch: 0, batch: 32055, sum loss: 4506.570312, avg loss: 2.836105, ppl: 17.049223 +epoch: 0, batch: 32056, sum loss: 4798.985352, avg loss: 3.046975, ppl: 21.051559 +epoch: 0, batch: 32057, sum loss: 5083.119629, avg loss: 2.960466, ppl: 19.306961 +epoch: 0, batch: 32058, sum loss: 4562.417969, avg loss: 3.041612, ppl: 20.938969 +epoch: 0, batch: 32059, sum loss: 4727.325195, avg loss: 2.914504, ppl: 18.439661 +epoch: 0, batch: 32060, sum loss: 4979.407227, avg loss: 2.967466, ppl: 19.442583 +epoch: 0, batch: 32061, sum loss: 5504.127441, avg loss: 3.176069, ppl: 23.952406 +epoch: 0, batch: 32062, sum loss: 4410.513672, avg loss: 2.784415, ppl: 16.190348 +epoch: 0, batch: 32063, sum loss: 4705.023438, avg loss: 2.936968, ppl: 18.858587 +epoch: 0, batch: 32064, sum loss: 4082.715332, avg loss: 2.627230, ppl: 13.835392 +epoch: 0, batch: 32065, sum loss: 4537.374512, avg loss: 3.018878, ppl: 20.468323 +epoch: 0, batch: 32066, sum loss: 4401.178711, avg loss: 2.832161, ppl: 16.982126 +epoch: 0, batch: 32067, sum loss: 3856.962891, avg loss: 2.656311, ppl: 14.243641 +epoch: 0, batch: 32068, sum loss: 4979.030273, avg loss: 3.079178, ppl: 21.740520 +epoch: 0, batch: 32069, sum loss: 5955.935547, avg loss: 3.057462, ppl: 21.273497 +epoch: 0, batch: 32070, sum loss: 4321.565918, avg loss: 2.939841, ppl: 18.912834 +epoch: 0, batch: 32071, sum loss: 4608.764160, avg loss: 2.909573, ppl: 18.348969 +epoch: 0, batch: 32072, sum loss: 5143.053223, avg loss: 2.984941, ppl: 19.785336 +epoch: 0, batch: 32073, sum loss: 4242.959473, avg loss: 2.704244, ppl: 14.943021 +epoch: 0, batch: 32074, sum loss: 5085.780273, avg loss: 2.934668, ppl: 18.815262 +epoch: 0, batch: 32075, sum loss: 4537.469727, avg loss: 2.916112, ppl: 18.469334 +epoch: 0, batch: 32076, sum loss: 6386.452637, avg loss: 3.161610, ppl: 23.608583 +epoch: 0, batch: 32077, sum loss: 5791.399414, avg loss: 3.336060, ppl: 28.108150 +epoch: 0, batch: 32078, sum loss: 3762.465332, avg loss: 2.463959, ppl: 11.751243 +epoch: 0, batch: 32079, sum loss: 4436.647461, avg loss: 2.842183, ppl: 17.153168 +epoch: 0, batch: 32080, sum loss: 4724.697266, avg loss: 2.934595, ppl: 18.813875 +epoch: 0, batch: 32081, sum loss: 4993.713379, avg loss: 3.059874, ppl: 21.324862 +epoch: 0, batch: 32082, sum loss: 5683.577148, avg loss: 3.045861, ppl: 21.028130 +epoch: 0, batch: 32083, sum loss: 4140.389648, avg loss: 2.769491, ppl: 15.950521 +epoch: 0, batch: 32084, sum loss: 6239.903809, avg loss: 3.118393, ppl: 22.610010 +epoch: 0, batch: 32085, sum loss: 4724.358398, avg loss: 2.849432, ppl: 17.277969 +epoch: 0, batch: 32086, sum loss: 4233.383301, avg loss: 2.833590, ppl: 17.006401 +epoch: 0, batch: 32087, sum loss: 5191.699219, avg loss: 2.890701, ppl: 18.005932 +epoch: 0, batch: 32088, sum loss: 4427.000000, avg loss: 2.973136, ppl: 19.553150 +epoch: 0, batch: 32089, sum loss: 4602.221680, avg loss: 2.828655, ppl: 16.922684 +epoch: 0, batch: 32090, sum loss: 4870.364258, avg loss: 2.958909, ppl: 19.276930 +epoch: 0, batch: 32091, sum loss: 4765.235840, avg loss: 2.768876, ppl: 15.940708 +epoch: 0, batch: 32092, sum loss: 4682.062988, avg loss: 2.900907, ppl: 18.190628 +epoch: 0, batch: 32093, sum loss: 5248.156738, avg loss: 3.024874, ppl: 20.591413 +epoch: 0, batch: 32094, sum loss: 4716.284180, avg loss: 2.986880, ppl: 19.823742 +epoch: 0, batch: 32095, sum loss: 4662.259766, avg loss: 2.811978, ppl: 16.642807 +epoch: 0, batch: 32096, sum loss: 4829.970703, avg loss: 2.981463, ppl: 19.716650 +epoch: 0, batch: 32097, sum loss: 4851.862793, avg loss: 3.072744, ppl: 21.601093 +epoch: 0, batch: 32098, sum loss: 5042.817871, avg loss: 2.985683, ppl: 19.800016 +epoch: 0, batch: 32099, sum loss: 4233.350586, avg loss: 2.703289, ppl: 14.928752 +epoch: 0, batch: 32100, sum loss: 5587.869141, avg loss: 3.169523, ppl: 23.796131 +epoch: 0, batch: 32101, sum loss: 5082.545898, avg loss: 2.905972, ppl: 18.283014 +epoch: 0, batch: 32102, sum loss: 4682.615723, avg loss: 2.869250, ppl: 17.623793 +epoch: 0, batch: 32103, sum loss: 5455.238281, avg loss: 2.948777, ppl: 19.082609 +epoch: 0, batch: 32104, sum loss: 4578.880371, avg loss: 2.788600, ppl: 16.258238 +epoch: 0, batch: 32105, sum loss: 4329.367676, avg loss: 2.750551, ppl: 15.651257 +epoch: 0, batch: 32106, sum loss: 5643.802734, avg loss: 2.978260, ppl: 19.653591 +epoch: 0, batch: 32107, sum loss: 4868.312988, avg loss: 3.018173, ppl: 20.453888 +epoch: 0, batch: 32108, sum loss: 5120.101562, avg loss: 3.123918, ppl: 22.735277 +epoch: 0, batch: 32109, sum loss: 5425.693359, avg loss: 2.878352, ppl: 17.784939 +epoch: 0, batch: 32110, sum loss: 5496.194336, avg loss: 3.053441, ppl: 21.188133 +epoch: 0, batch: 32111, sum loss: 6026.716797, avg loss: 3.001353, ppl: 20.112732 +epoch: 0, batch: 32112, sum loss: 3741.670654, avg loss: 2.798557, ppl: 16.420935 +epoch: 0, batch: 32113, sum loss: 5882.861328, avg loss: 3.044959, ppl: 21.009176 +epoch: 0, batch: 32114, sum loss: 4994.288086, avg loss: 3.109768, ppl: 22.415852 +epoch: 0, batch: 32115, sum loss: 4736.399414, avg loss: 2.841271, ppl: 17.137541 +epoch: 0, batch: 32116, sum loss: 4227.209473, avg loss: 3.112820, ppl: 22.484360 +epoch: 0, batch: 32117, sum loss: 5564.097168, avg loss: 3.074087, ppl: 21.630123 +epoch: 0, batch: 32118, sum loss: 5305.961914, avg loss: 3.135911, ppl: 23.009594 +epoch: 0, batch: 32119, sum loss: 5723.024902, avg loss: 3.018473, ppl: 20.460028 +epoch: 0, batch: 32120, sum loss: 4649.392090, avg loss: 2.847147, ppl: 17.238539 +epoch: 0, batch: 32121, sum loss: 5560.179199, avg loss: 2.921797, ppl: 18.574633 +epoch: 0, batch: 32122, sum loss: 4955.656738, avg loss: 2.806148, ppl: 16.546053 +epoch: 0, batch: 32123, sum loss: 4684.181152, avg loss: 2.758646, ppl: 15.778468 +epoch: 0, batch: 32124, sum loss: 4648.032227, avg loss: 2.951131, ppl: 19.127581 +epoch: 0, batch: 32125, sum loss: 4968.242188, avg loss: 2.891876, ppl: 18.027088 +epoch: 0, batch: 32126, sum loss: 5408.266113, avg loss: 2.953723, ppl: 19.177212 +epoch: 0, batch: 32127, sum loss: 3703.970459, avg loss: 2.565076, ppl: 13.001651 +epoch: 0, batch: 32128, sum loss: 5056.471680, avg loss: 3.060818, ppl: 21.345020 +epoch: 0, batch: 32129, sum loss: 5511.858887, avg loss: 3.195281, ppl: 24.417028 +epoch: 0, batch: 32130, sum loss: 5615.515137, avg loss: 2.999741, ppl: 20.080338 +epoch: 0, batch: 32131, sum loss: 4726.978516, avg loss: 2.864836, ppl: 17.546167 +epoch: 0, batch: 32132, sum loss: 5147.567383, avg loss: 2.870924, ppl: 17.653330 +epoch: 0, batch: 32133, sum loss: 5300.717285, avg loss: 3.164608, ppl: 23.679449 +epoch: 0, batch: 32134, sum loss: 4587.516602, avg loss: 2.888865, ppl: 17.972891 +epoch: 0, batch: 32135, sum loss: 4579.123535, avg loss: 2.981200, ppl: 19.711460 +epoch: 0, batch: 32136, sum loss: 5369.961426, avg loss: 3.006697, ppl: 20.220510 +epoch: 0, batch: 32137, sum loss: 5729.726074, avg loss: 3.088801, ppl: 21.950747 +epoch: 0, batch: 32138, sum loss: 5501.668457, avg loss: 3.202368, ppl: 24.590698 +epoch: 0, batch: 32139, sum loss: 4264.631836, avg loss: 3.043991, ppl: 20.988850 +epoch: 0, batch: 32140, sum loss: 4735.654785, avg loss: 3.061186, ppl: 21.352869 +epoch: 0, batch: 32141, sum loss: 4740.254883, avg loss: 2.920675, ppl: 18.553799 +epoch: 0, batch: 32142, sum loss: 4561.311035, avg loss: 2.827843, ppl: 16.908953 +epoch: 0, batch: 32143, sum loss: 4593.225098, avg loss: 2.951944, ppl: 19.143133 +epoch: 0, batch: 32144, sum loss: 5843.428711, avg loss: 3.207151, ppl: 24.708584 +epoch: 0, batch: 32145, sum loss: 5248.349121, avg loss: 2.864820, ppl: 17.545887 +epoch: 0, batch: 32146, sum loss: 4229.542480, avg loss: 2.799168, ppl: 16.430965 +epoch: 0, batch: 32147, sum loss: 4983.048828, avg loss: 2.892077, ppl: 18.030724 +epoch: 0, batch: 32148, sum loss: 5128.155273, avg loss: 3.004192, ppl: 20.169905 +epoch: 0, batch: 32149, sum loss: 5548.772949, avg loss: 2.820932, ppl: 16.792488 +epoch: 0, batch: 32150, sum loss: 4975.974121, avg loss: 2.986779, ppl: 19.821733 +epoch: 0, batch: 32151, sum loss: 5276.044434, avg loss: 2.977452, ppl: 19.637712 +epoch: 0, batch: 32152, sum loss: 5517.845703, avg loss: 3.065470, ppl: 21.444538 +epoch: 0, batch: 32153, sum loss: 4001.988525, avg loss: 2.588609, ppl: 13.311236 +epoch: 0, batch: 32154, sum loss: 4518.912109, avg loss: 2.812018, ppl: 16.643465 +epoch: 0, batch: 32155, sum loss: 6572.588867, avg loss: 3.338034, ppl: 28.163706 +epoch: 0, batch: 32156, sum loss: 4225.383301, avg loss: 2.896082, ppl: 18.103077 +epoch: 0, batch: 32157, sum loss: 4512.826172, avg loss: 2.885439, ppl: 17.911423 +epoch: 0, batch: 32158, sum loss: 5721.539551, avg loss: 3.005010, ppl: 20.186426 +epoch: 0, batch: 32159, sum loss: 4895.564941, avg loss: 3.044506, ppl: 20.999647 +epoch: 0, batch: 32160, sum loss: 4721.071777, avg loss: 2.677863, ppl: 14.553953 +epoch: 0, batch: 32161, sum loss: 6081.614258, avg loss: 3.077740, ppl: 21.709288 +epoch: 0, batch: 32162, sum loss: 4808.257324, avg loss: 2.862058, ppl: 17.497498 +epoch: 0, batch: 32163, sum loss: 4073.136719, avg loss: 2.850341, ppl: 17.293671 +epoch: 0, batch: 32164, sum loss: 5504.675781, avg loss: 3.136567, ppl: 23.024696 +epoch: 0, batch: 32165, sum loss: 4821.776367, avg loss: 2.721093, ppl: 15.196922 +epoch: 0, batch: 32166, sum loss: 5911.530273, avg loss: 3.136090, ppl: 23.013714 +epoch: 0, batch: 32167, sum loss: 4884.625977, avg loss: 3.049080, ppl: 21.095924 +epoch: 0, batch: 32168, sum loss: 5780.984375, avg loss: 3.238647, ppl: 25.499191 +epoch: 0, batch: 32169, sum loss: 4810.304688, avg loss: 2.967492, ppl: 19.443098 +epoch: 0, batch: 32170, sum loss: 5022.590820, avg loss: 2.957945, ppl: 19.258362 +epoch: 0, batch: 32171, sum loss: 4794.796875, avg loss: 2.907700, ppl: 18.314623 +epoch: 0, batch: 32172, sum loss: 4823.480469, avg loss: 2.946537, ppl: 19.039900 +epoch: 0, batch: 32173, sum loss: 5747.657227, avg loss: 3.184298, ppl: 24.150318 +epoch: 0, batch: 32174, sum loss: 5430.865234, avg loss: 3.000478, ppl: 20.095146 +epoch: 0, batch: 32175, sum loss: 4279.293457, avg loss: 2.493761, ppl: 12.106722 +epoch: 0, batch: 32176, sum loss: 4665.972168, avg loss: 2.958765, ppl: 19.274155 +epoch: 0, batch: 32177, sum loss: 4517.782715, avg loss: 2.614458, ppl: 13.659805 +epoch: 0, batch: 32178, sum loss: 5218.286133, avg loss: 2.911990, ppl: 18.393368 +epoch: 0, batch: 32179, sum loss: 5900.637207, avg loss: 3.210357, ppl: 24.787945 +epoch: 0, batch: 32180, sum loss: 4912.518555, avg loss: 2.975481, ppl: 19.599045 +epoch: 0, batch: 32181, sum loss: 5231.822754, avg loss: 3.195982, ppl: 24.434162 +epoch: 0, batch: 32182, sum loss: 6482.666016, avg loss: 3.179336, ppl: 24.030794 +epoch: 0, batch: 32183, sum loss: 4589.378906, avg loss: 3.320824, ppl: 27.683149 +epoch: 0, batch: 32184, sum loss: 4262.153809, avg loss: 2.995189, ppl: 19.989136 +epoch: 0, batch: 32185, sum loss: 5273.894043, avg loss: 2.797822, ppl: 16.408865 +epoch: 0, batch: 32186, sum loss: 4657.212891, avg loss: 2.981570, ppl: 19.718760 +epoch: 0, batch: 32187, sum loss: 4115.373047, avg loss: 2.591545, ppl: 13.350380 +epoch: 0, batch: 32188, sum loss: 4419.413086, avg loss: 2.670340, ppl: 14.444884 +epoch: 0, batch: 32189, sum loss: 5091.059570, avg loss: 2.904198, ppl: 18.250608 +epoch: 0, batch: 32190, sum loss: 5725.217285, avg loss: 3.424173, ppl: 30.697250 +epoch: 0, batch: 32191, sum loss: 4557.653320, avg loss: 2.932853, ppl: 18.781136 +epoch: 0, batch: 32192, sum loss: 5338.560547, avg loss: 3.054097, ppl: 21.202024 +epoch: 0, batch: 32193, sum loss: 5536.864746, avg loss: 3.172988, ppl: 23.878736 +epoch: 0, batch: 32194, sum loss: 4834.928223, avg loss: 2.957143, ppl: 19.242908 +epoch: 0, batch: 32195, sum loss: 5259.078613, avg loss: 3.036419, ppl: 20.830524 +epoch: 0, batch: 32196, sum loss: 4083.353027, avg loss: 2.819995, ppl: 16.776773 +epoch: 0, batch: 32197, sum loss: 4974.137207, avg loss: 2.896993, ppl: 18.119581 +epoch: 0, batch: 32198, sum loss: 5157.456055, avg loss: 3.156338, ppl: 23.484438 +epoch: 0, batch: 32199, sum loss: 6106.201172, avg loss: 3.066902, ppl: 21.475262 +epoch: 0, batch: 32200, sum loss: 4936.202637, avg loss: 2.950510, ppl: 19.115696 +epoch: 0, batch: 32201, sum loss: 4153.844727, avg loss: 2.958579, ppl: 19.270565 +epoch: 0, batch: 32202, sum loss: 5154.846680, avg loss: 3.187908, ppl: 24.237661 +epoch: 0, batch: 32203, sum loss: 5009.442871, avg loss: 2.892288, ppl: 18.034525 +epoch: 0, batch: 32204, sum loss: 5350.496582, avg loss: 3.034882, ppl: 20.798519 +epoch: 0, batch: 32205, sum loss: 5293.069336, avg loss: 3.091746, ppl: 22.015486 +epoch: 0, batch: 32206, sum loss: 4729.071289, avg loss: 3.010230, ppl: 20.292068 +epoch: 0, batch: 32207, sum loss: 4627.050293, avg loss: 2.779009, ppl: 16.103060 +epoch: 0, batch: 32208, sum loss: 5097.733887, avg loss: 3.002199, ppl: 20.129757 +epoch: 0, batch: 32209, sum loss: 4376.624023, avg loss: 2.787659, ppl: 16.242945 +epoch: 0, batch: 32210, sum loss: 5666.765625, avg loss: 3.111898, ppl: 22.463633 +epoch: 0, batch: 32211, sum loss: 4065.543945, avg loss: 2.801891, ppl: 16.475775 +epoch: 0, batch: 32212, sum loss: 5784.082031, avg loss: 3.186822, ppl: 24.211365 +epoch: 0, batch: 32213, sum loss: 4237.173340, avg loss: 2.671610, ppl: 14.463235 +epoch: 0, batch: 32214, sum loss: 5789.380859, avg loss: 3.387584, ppl: 29.594364 +epoch: 0, batch: 32215, sum loss: 5501.708008, avg loss: 3.280684, ppl: 26.593969 +epoch: 0, batch: 32216, sum loss: 6159.018555, avg loss: 3.300653, ppl: 27.130342 +epoch: 0, batch: 32217, sum loss: 4606.068359, avg loss: 2.730331, ppl: 15.337966 +epoch: 0, batch: 32218, sum loss: 5174.912109, avg loss: 3.186522, ppl: 24.204105 +epoch: 0, batch: 32219, sum loss: 3952.651367, avg loss: 2.785519, ppl: 16.208229 +epoch: 0, batch: 32220, sum loss: 4280.320312, avg loss: 2.880431, ppl: 17.821951 +epoch: 0, batch: 32221, sum loss: 4465.346680, avg loss: 2.895815, ppl: 18.098244 +epoch: 0, batch: 32222, sum loss: 4520.158203, avg loss: 2.897537, ppl: 18.129446 +epoch: 0, batch: 32223, sum loss: 4282.682129, avg loss: 2.909431, ppl: 18.346352 +epoch: 0, batch: 32224, sum loss: 6318.740234, avg loss: 3.391702, ppl: 29.716478 +epoch: 0, batch: 32225, sum loss: 4674.176758, avg loss: 2.967731, ppl: 19.447746 +epoch: 0, batch: 32226, sum loss: 5267.221680, avg loss: 2.861066, ppl: 17.480145 +epoch: 0, batch: 32227, sum loss: 5183.372070, avg loss: 3.003112, ppl: 20.148148 +epoch: 0, batch: 32228, sum loss: 5003.636719, avg loss: 3.025173, ppl: 20.597576 +epoch: 0, batch: 32229, sum loss: 4588.118164, avg loss: 2.887425, ppl: 17.947035 +epoch: 0, batch: 32230, sum loss: 4918.197754, avg loss: 2.978921, ppl: 19.666578 +epoch: 0, batch: 32231, sum loss: 4897.088379, avg loss: 3.099423, ppl: 22.185144 +epoch: 0, batch: 32232, sum loss: 5158.465332, avg loss: 3.048738, ppl: 21.088724 +epoch: 0, batch: 32233, sum loss: 5237.933594, avg loss: 3.073905, ppl: 21.626183 +epoch: 0, batch: 32234, sum loss: 5508.235352, avg loss: 2.909791, ppl: 18.352970 +epoch: 0, batch: 32235, sum loss: 5702.247070, avg loss: 2.896012, ppl: 18.101809 +epoch: 0, batch: 32236, sum loss: 6192.010742, avg loss: 3.350655, ppl: 28.521412 +epoch: 0, batch: 32237, sum loss: 4934.059082, avg loss: 3.064633, ppl: 21.426594 +epoch: 0, batch: 32238, sum loss: 5238.228027, avg loss: 3.001850, ppl: 20.122726 +epoch: 0, batch: 32239, sum loss: 5082.355469, avg loss: 3.016235, ppl: 20.414280 +epoch: 0, batch: 32240, sum loss: 5060.301270, avg loss: 3.039220, ppl: 20.888945 +epoch: 0, batch: 32241, sum loss: 5096.012695, avg loss: 3.213123, ppl: 24.856588 +epoch: 0, batch: 32242, sum loss: 5542.589844, avg loss: 3.015555, ppl: 20.400404 +epoch: 0, batch: 32243, sum loss: 4784.714844, avg loss: 2.865099, ppl: 17.550785 +epoch: 0, batch: 32244, sum loss: 4706.880859, avg loss: 2.982814, ppl: 19.743301 +epoch: 0, batch: 32245, sum loss: 5165.671875, avg loss: 3.169124, ppl: 23.786636 +epoch: 0, batch: 32246, sum loss: 6045.786621, avg loss: 3.090893, ppl: 21.996708 +epoch: 0, batch: 32247, sum loss: 4188.520996, avg loss: 2.843531, ppl: 17.176310 +epoch: 0, batch: 32248, sum loss: 5217.546875, avg loss: 3.312728, ppl: 27.459932 +epoch: 0, batch: 32249, sum loss: 4569.704102, avg loss: 2.959653, ppl: 19.291279 +epoch: 0, batch: 32250, sum loss: 6148.891602, avg loss: 3.249943, ppl: 25.788864 +epoch: 0, batch: 32251, sum loss: 5187.508789, avg loss: 2.995097, ppl: 19.987307 +epoch: 0, batch: 32252, sum loss: 4666.567871, avg loss: 2.989473, ppl: 19.875212 +epoch: 0, batch: 32253, sum loss: 4923.713379, avg loss: 3.073479, ppl: 21.616976 +epoch: 0, batch: 32254, sum loss: 4571.044434, avg loss: 3.037239, ppl: 20.847605 +epoch: 0, batch: 32255, sum loss: 5727.081055, avg loss: 2.996903, ppl: 20.023422 +epoch: 0, batch: 32256, sum loss: 5105.021973, avg loss: 2.942376, ppl: 18.960842 +epoch: 0, batch: 32257, sum loss: 5131.302734, avg loss: 3.213089, ppl: 24.855742 +epoch: 0, batch: 32258, sum loss: 5800.809082, avg loss: 3.080621, ppl: 21.771914 +epoch: 0, batch: 32259, sum loss: 4826.000977, avg loss: 3.133767, ppl: 22.960306 +epoch: 0, batch: 32260, sum loss: 4913.627930, avg loss: 2.729793, ppl: 15.329719 +epoch: 0, batch: 32261, sum loss: 4936.964355, avg loss: 2.883741, ppl: 17.881035 +epoch: 0, batch: 32262, sum loss: 5438.413574, avg loss: 3.067351, ppl: 21.484921 +epoch: 0, batch: 32263, sum loss: 4490.991211, avg loss: 2.918123, ppl: 18.506517 +epoch: 0, batch: 32264, sum loss: 5233.507324, avg loss: 3.297736, ppl: 27.051336 +epoch: 0, batch: 32265, sum loss: 5767.498535, avg loss: 3.328043, ppl: 27.883718 +epoch: 0, batch: 32266, sum loss: 4790.576172, avg loss: 2.767519, ppl: 15.919097 +epoch: 0, batch: 32267, sum loss: 6786.353027, avg loss: 3.228522, ppl: 25.242317 +epoch: 0, batch: 32268, sum loss: 4556.714355, avg loss: 3.101916, ppl: 22.240515 +epoch: 0, batch: 32269, sum loss: 4637.827148, avg loss: 2.896832, ppl: 18.116652 +epoch: 0, batch: 32270, sum loss: 4558.269531, avg loss: 2.920096, ppl: 18.543066 +epoch: 0, batch: 32271, sum loss: 5169.246094, avg loss: 2.991462, ppl: 19.914776 +epoch: 0, batch: 32272, sum loss: 5368.373535, avg loss: 3.101313, ppl: 22.227123 +epoch: 0, batch: 32273, sum loss: 5133.541016, avg loss: 3.035802, ppl: 20.817669 +epoch: 0, batch: 32274, sum loss: 4662.338867, avg loss: 3.110300, ppl: 22.427763 +epoch: 0, batch: 32275, sum loss: 6254.907227, avg loss: 3.122769, ppl: 22.709183 +epoch: 0, batch: 32276, sum loss: 3094.542969, avg loss: 2.450153, ppl: 11.590118 +epoch: 0, batch: 32277, sum loss: 4188.006348, avg loss: 2.890274, ppl: 17.998232 +epoch: 0, batch: 32278, sum loss: 6059.120605, avg loss: 3.204189, ppl: 24.635502 +epoch: 0, batch: 32279, sum loss: 5571.576660, avg loss: 2.981047, ppl: 19.708435 +epoch: 0, batch: 32280, sum loss: 4863.958496, avg loss: 2.679867, ppl: 14.583154 +epoch: 0, batch: 32281, sum loss: 4956.328125, avg loss: 3.044428, ppl: 20.998009 +epoch: 0, batch: 32282, sum loss: 4869.067383, avg loss: 2.882811, ppl: 17.864416 +epoch: 0, batch: 32283, sum loss: 4538.812988, avg loss: 2.820891, ppl: 16.791800 +epoch: 0, batch: 32284, sum loss: 6290.576172, avg loss: 3.024315, ppl: 20.579910 +epoch: 0, batch: 32285, sum loss: 4626.878906, avg loss: 3.041998, ppl: 20.947056 +epoch: 0, batch: 32286, sum loss: 5559.492188, avg loss: 3.078346, ppl: 21.722433 +epoch: 0, batch: 32287, sum loss: 5138.818848, avg loss: 3.008676, ppl: 20.260559 +epoch: 0, batch: 32288, sum loss: 3895.197021, avg loss: 2.778315, ppl: 16.091877 +epoch: 0, batch: 32289, sum loss: 5803.555664, avg loss: 3.130289, ppl: 22.880587 +epoch: 0, batch: 32290, sum loss: 4660.601562, avg loss: 2.754493, ppl: 15.713069 +epoch: 0, batch: 32291, sum loss: 5070.198730, avg loss: 3.000118, ppl: 20.087898 +epoch: 0, batch: 32292, sum loss: 5269.863770, avg loss: 2.892351, ppl: 18.035664 +epoch: 0, batch: 32293, sum loss: 4590.459961, avg loss: 2.909037, ppl: 18.339123 +epoch: 0, batch: 32294, sum loss: 5460.094238, avg loss: 2.995115, ppl: 19.987654 +epoch: 0, batch: 32295, sum loss: 5294.194824, avg loss: 2.975939, ppl: 19.608017 +epoch: 0, batch: 32296, sum loss: 5560.773926, avg loss: 3.163125, ppl: 23.644371 +epoch: 0, batch: 32297, sum loss: 4677.177246, avg loss: 2.794013, ppl: 16.346483 +epoch: 0, batch: 32298, sum loss: 5924.166016, avg loss: 3.041153, ppl: 20.929365 +epoch: 0, batch: 32299, sum loss: 4700.289062, avg loss: 2.932183, ppl: 18.768553 +epoch: 0, batch: 32300, sum loss: 4355.733398, avg loss: 2.741179, ppl: 15.505255 +epoch: 0, batch: 32301, sum loss: 6098.794922, avg loss: 3.176456, ppl: 23.961676 +epoch: 0, batch: 32302, sum loss: 5005.693359, avg loss: 2.770168, ppl: 15.961312 +epoch: 0, batch: 32303, sum loss: 5593.634277, avg loss: 3.231447, ppl: 25.316250 +epoch: 0, batch: 32304, sum loss: 4254.791992, avg loss: 2.790028, ppl: 16.281469 +epoch: 0, batch: 32305, sum loss: 5261.736328, avg loss: 3.095139, ppl: 22.090315 +epoch: 0, batch: 32306, sum loss: 4108.986328, avg loss: 2.581022, ppl: 13.210627 +epoch: 0, batch: 32307, sum loss: 5589.003906, avg loss: 2.952458, ppl: 19.152981 +epoch: 0, batch: 32308, sum loss: 5655.527832, avg loss: 3.065327, ppl: 21.441465 +epoch: 0, batch: 32309, sum loss: 6150.488281, avg loss: 3.280261, ppl: 26.582699 +epoch: 0, batch: 32310, sum loss: 5700.585938, avg loss: 3.235293, ppl: 25.413815 +epoch: 0, batch: 32311, sum loss: 4821.213867, avg loss: 2.904346, ppl: 18.253298 +epoch: 0, batch: 32312, sum loss: 4475.947754, avg loss: 2.702867, ppl: 14.922454 +epoch: 0, batch: 32313, sum loss: 5201.415039, avg loss: 3.206791, ppl: 24.699696 +epoch: 0, batch: 32314, sum loss: 5624.369629, avg loss: 3.088616, ppl: 21.946680 +epoch: 0, batch: 32315, sum loss: 4931.282715, avg loss: 3.059108, ppl: 21.308546 +epoch: 0, batch: 32316, sum loss: 5348.853027, avg loss: 3.187636, ppl: 24.231075 +epoch: 0, batch: 32317, sum loss: 4882.316406, avg loss: 2.863529, ppl: 17.523252 +epoch: 0, batch: 32318, sum loss: 4831.945801, avg loss: 3.044704, ppl: 21.003822 +epoch: 0, batch: 32319, sum loss: 5266.604980, avg loss: 3.002625, ppl: 20.138336 +epoch: 0, batch: 32320, sum loss: 5896.214355, avg loss: 3.438026, ppl: 31.125446 +epoch: 0, batch: 32321, sum loss: 4785.969727, avg loss: 2.927199, ppl: 18.675241 +epoch: 0, batch: 32322, sum loss: 6104.186035, avg loss: 3.174304, ppl: 23.910162 +epoch: 0, batch: 32323, sum loss: 5050.465820, avg loss: 3.121425, ppl: 22.678677 +epoch: 0, batch: 32324, sum loss: 5092.926758, avg loss: 3.029701, ppl: 20.691034 +epoch: 0, batch: 32325, sum loss: 5990.734863, avg loss: 3.159671, ppl: 23.562834 +epoch: 0, batch: 32326, sum loss: 5757.974121, avg loss: 3.251256, ppl: 25.822746 +epoch: 0, batch: 32327, sum loss: 5566.916992, avg loss: 3.122220, ppl: 22.696701 +epoch: 0, batch: 32328, sum loss: 5105.318848, avg loss: 2.874617, ppl: 17.718630 +epoch: 0, batch: 32329, sum loss: 4708.383301, avg loss: 2.816019, ppl: 16.710192 +epoch: 0, batch: 32330, sum loss: 4707.964844, avg loss: 3.023741, ppl: 20.568094 +epoch: 0, batch: 32331, sum loss: 5757.277832, avg loss: 3.154673, ppl: 23.445366 +epoch: 0, batch: 32332, sum loss: 5748.892578, avg loss: 3.324981, ppl: 27.798477 +epoch: 0, batch: 32333, sum loss: 5433.457520, avg loss: 2.967481, ppl: 19.442884 +epoch: 0, batch: 32334, sum loss: 5072.433594, avg loss: 2.921909, ppl: 18.576714 +epoch: 0, batch: 32335, sum loss: 4932.407227, avg loss: 3.096301, ppl: 22.115990 +epoch: 0, batch: 32336, sum loss: 4693.345215, avg loss: 2.888212, ppl: 17.961174 +epoch: 0, batch: 32337, sum loss: 4963.461426, avg loss: 3.032048, ppl: 20.739655 +epoch: 0, batch: 32338, sum loss: 4646.903320, avg loss: 2.733473, ppl: 15.386225 +epoch: 0, batch: 32339, sum loss: 5461.088379, avg loss: 3.083618, ppl: 21.837276 +epoch: 0, batch: 32340, sum loss: 4463.345703, avg loss: 3.183556, ppl: 24.132423 +epoch: 0, batch: 32341, sum loss: 4951.751953, avg loss: 3.041617, ppl: 20.939068 +epoch: 0, batch: 32342, sum loss: 4601.021484, avg loss: 2.720888, ppl: 15.193810 +epoch: 0, batch: 32343, sum loss: 4935.969727, avg loss: 3.321649, ppl: 27.705990 +epoch: 0, batch: 32344, sum loss: 5172.404297, avg loss: 3.008961, ppl: 20.266340 +epoch: 0, batch: 32345, sum loss: 5236.136719, avg loss: 2.774847, ppl: 16.036177 +epoch: 0, batch: 32346, sum loss: 6424.854980, avg loss: 3.077038, ppl: 21.694050 +epoch: 0, batch: 32347, sum loss: 4923.238281, avg loss: 2.989216, ppl: 19.870090 +epoch: 0, batch: 32348, sum loss: 5313.781738, avg loss: 2.880098, ppl: 17.816025 +epoch: 0, batch: 32349, sum loss: 4586.870117, avg loss: 2.903082, ppl: 18.230251 +epoch: 0, batch: 32350, sum loss: 4688.355957, avg loss: 2.869251, ppl: 17.623814 +epoch: 0, batch: 32351, sum loss: 4331.204590, avg loss: 3.039442, ppl: 20.893572 +epoch: 0, batch: 32352, sum loss: 5487.875977, avg loss: 3.199928, ppl: 24.530754 +epoch: 0, batch: 32353, sum loss: 4786.474121, avg loss: 2.952791, ppl: 19.159348 +epoch: 0, batch: 32354, sum loss: 4776.081055, avg loss: 2.738579, ppl: 15.464987 +epoch: 0, batch: 32355, sum loss: 4596.142090, avg loss: 3.092962, ppl: 22.042276 +epoch: 0, batch: 32356, sum loss: 4587.579590, avg loss: 2.780351, ppl: 16.124685 +epoch: 0, batch: 32357, sum loss: 5719.657715, avg loss: 3.154803, ppl: 23.448418 +epoch: 0, batch: 32358, sum loss: 6270.020508, avg loss: 3.270746, ppl: 26.330988 +epoch: 0, batch: 32359, sum loss: 4772.289551, avg loss: 2.776201, ppl: 16.057905 +epoch: 0, batch: 32360, sum loss: 6137.575195, avg loss: 3.059609, ppl: 21.319218 +epoch: 0, batch: 32361, sum loss: 4550.526855, avg loss: 2.773021, ppl: 16.006910 +epoch: 0, batch: 32362, sum loss: 5012.362305, avg loss: 2.892303, ppl: 18.034805 +epoch: 0, batch: 32363, sum loss: 4661.580566, avg loss: 2.928128, ppl: 18.692614 +epoch: 0, batch: 32364, sum loss: 5516.262207, avg loss: 3.190435, ppl: 24.299000 +epoch: 0, batch: 32365, sum loss: 5271.245117, avg loss: 3.106214, ppl: 22.336319 +epoch: 0, batch: 32366, sum loss: 5699.375000, avg loss: 3.221806, ppl: 25.073368 +epoch: 0, batch: 32367, sum loss: 5339.290527, avg loss: 3.025094, ppl: 20.595936 +epoch: 0, batch: 32368, sum loss: 4972.143555, avg loss: 3.140962, ppl: 23.126112 +epoch: 0, batch: 32369, sum loss: 4984.784668, avg loss: 2.993865, ppl: 19.962685 +epoch: 0, batch: 32370, sum loss: 6088.235840, avg loss: 3.371116, ppl: 29.111002 +epoch: 0, batch: 32371, sum loss: 4041.078857, avg loss: 2.474635, ppl: 11.877370 +epoch: 0, batch: 32372, sum loss: 5540.433105, avg loss: 3.206269, ppl: 24.686813 +epoch: 0, batch: 32373, sum loss: 4804.236328, avg loss: 2.918734, ppl: 18.517836 +epoch: 0, batch: 32374, sum loss: 4501.572266, avg loss: 2.932620, ppl: 18.776766 +epoch: 0, batch: 32375, sum loss: 5773.590820, avg loss: 2.948719, ppl: 19.081486 +epoch: 0, batch: 32376, sum loss: 4430.971680, avg loss: 2.669260, ppl: 14.429288 +epoch: 0, batch: 32377, sum loss: 5438.842773, avg loss: 3.095528, ppl: 22.098902 +epoch: 0, batch: 32378, sum loss: 5131.188477, avg loss: 3.079945, ppl: 21.757202 +epoch: 0, batch: 32379, sum loss: 5633.789062, avg loss: 3.009503, ppl: 20.277317 +epoch: 0, batch: 32380, sum loss: 4994.743652, avg loss: 2.973062, ppl: 19.551691 +epoch: 0, batch: 32381, sum loss: 4647.338867, avg loss: 3.155016, ppl: 23.453423 +epoch: 0, batch: 32382, sum loss: 5143.808105, avg loss: 2.768465, ppl: 15.934157 +epoch: 0, batch: 32383, sum loss: 5406.073242, avg loss: 2.895594, ppl: 18.094240 +epoch: 0, batch: 32384, sum loss: 5230.827148, avg loss: 2.937017, ppl: 18.859503 +epoch: 0, batch: 32385, sum loss: 4563.125000, avg loss: 2.871696, ppl: 17.666956 +epoch: 0, batch: 32386, sum loss: 4387.025879, avg loss: 2.812196, ppl: 16.646439 +epoch: 0, batch: 32387, sum loss: 4102.835449, avg loss: 2.746208, ppl: 15.583434 +epoch: 0, batch: 32388, sum loss: 5187.712891, avg loss: 2.878864, ppl: 17.794048 +epoch: 0, batch: 32389, sum loss: 5419.172363, avg loss: 2.946804, ppl: 19.044989 +epoch: 0, batch: 32390, sum loss: 4615.396484, avg loss: 3.048479, ppl: 21.083244 +epoch: 0, batch: 32391, sum loss: 5473.535645, avg loss: 3.007437, ppl: 20.235474 +epoch: 0, batch: 32392, sum loss: 5881.190918, avg loss: 2.946488, ppl: 19.038979 +epoch: 0, batch: 32393, sum loss: 6010.084473, avg loss: 3.207089, ppl: 24.707052 +epoch: 0, batch: 32394, sum loss: 4687.050293, avg loss: 2.870208, ppl: 17.640692 +epoch: 0, batch: 32395, sum loss: 4952.901367, avg loss: 3.109166, ppl: 22.402351 +epoch: 0, batch: 32396, sum loss: 4434.943848, avg loss: 3.000639, ppl: 20.098370 +epoch: 0, batch: 32397, sum loss: 4758.153809, avg loss: 2.807170, ppl: 16.562984 +epoch: 0, batch: 32398, sum loss: 5151.974609, avg loss: 2.949041, ppl: 19.087637 +epoch: 0, batch: 32399, sum loss: 5649.427734, avg loss: 3.075356, ppl: 21.657581 +epoch: 0, batch: 32400, sum loss: 5715.898926, avg loss: 3.135436, ppl: 22.998652 +epoch: 0, batch: 32401, sum loss: 4950.241699, avg loss: 2.869705, ppl: 17.631824 +epoch: 0, batch: 32402, sum loss: 5548.447266, avg loss: 3.296760, ppl: 27.024939 +epoch: 0, batch: 32403, sum loss: 4899.561035, avg loss: 2.978456, ppl: 19.657452 +epoch: 0, batch: 32404, sum loss: 4898.878906, avg loss: 3.016551, ppl: 20.420740 +epoch: 0, batch: 32405, sum loss: 4502.282227, avg loss: 2.775760, ppl: 16.050816 +epoch: 0, batch: 32406, sum loss: 5698.371582, avg loss: 3.105380, ppl: 22.317694 +epoch: 0, batch: 32407, sum loss: 4644.686523, avg loss: 2.798004, ppl: 16.411854 +epoch: 0, batch: 32408, sum loss: 5608.396973, avg loss: 3.295180, ppl: 26.982279 +epoch: 0, batch: 32409, sum loss: 4507.968750, avg loss: 2.884177, ppl: 17.888838 +epoch: 0, batch: 32410, sum loss: 4431.446289, avg loss: 2.826177, ppl: 16.880808 +epoch: 0, batch: 32411, sum loss: 5609.044922, avg loss: 3.172537, ppl: 23.867956 +epoch: 0, batch: 32412, sum loss: 3916.815430, avg loss: 2.533516, ppl: 12.597721 +epoch: 0, batch: 32413, sum loss: 4130.728516, avg loss: 2.798597, ppl: 16.421585 +epoch: 0, batch: 32414, sum loss: 5264.729492, avg loss: 3.032678, ppl: 20.752741 +epoch: 0, batch: 32415, sum loss: 5118.118164, avg loss: 3.118902, ppl: 22.621527 +epoch: 0, batch: 32416, sum loss: 4566.562988, avg loss: 2.942373, ppl: 18.960793 +epoch: 0, batch: 32417, sum loss: 4912.975586, avg loss: 3.121331, ppl: 22.676546 +epoch: 0, batch: 32418, sum loss: 4661.991211, avg loss: 2.844412, ppl: 17.191444 +epoch: 0, batch: 32419, sum loss: 5088.136230, avg loss: 3.030456, ppl: 20.706678 +epoch: 0, batch: 32420, sum loss: 5447.500000, avg loss: 3.050112, ppl: 21.117710 +epoch: 0, batch: 32421, sum loss: 6119.019043, avg loss: 3.246164, ppl: 25.691593 +epoch: 0, batch: 32422, sum loss: 4822.744141, avg loss: 3.058177, ppl: 21.288702 +epoch: 0, batch: 32423, sum loss: 4523.120605, avg loss: 2.987530, ppl: 19.836630 +epoch: 0, batch: 32424, sum loss: 5891.723633, avg loss: 3.326778, ppl: 27.848467 +epoch: 0, batch: 32425, sum loss: 4909.017578, avg loss: 3.054771, ppl: 21.216335 +epoch: 0, batch: 32426, sum loss: 4483.127441, avg loss: 2.970926, ppl: 19.509983 +epoch: 0, batch: 32427, sum loss: 3917.265381, avg loss: 2.816151, ppl: 16.712395 +epoch: 0, batch: 32428, sum loss: 3985.916016, avg loss: 2.678707, ppl: 14.566245 +epoch: 0, batch: 32429, sum loss: 6036.529297, avg loss: 3.021286, ppl: 20.517656 +epoch: 0, batch: 32430, sum loss: 5817.001953, avg loss: 3.090862, ppl: 21.996025 +epoch: 0, batch: 32431, sum loss: 3811.502930, avg loss: 2.601708, ppl: 13.486759 +epoch: 0, batch: 32432, sum loss: 5203.651367, avg loss: 3.041293, ppl: 20.932283 +epoch: 0, batch: 32433, sum loss: 4627.724609, avg loss: 2.901395, ppl: 18.199512 +epoch: 0, batch: 32434, sum loss: 4021.935059, avg loss: 2.785274, ppl: 16.204250 +epoch: 0, batch: 32435, sum loss: 4838.806641, avg loss: 2.946898, ppl: 19.046783 +epoch: 0, batch: 32436, sum loss: 5103.245117, avg loss: 2.984354, ppl: 19.773720 +epoch: 0, batch: 32437, sum loss: 5583.351562, avg loss: 2.973031, ppl: 19.551081 +epoch: 0, batch: 32438, sum loss: 4047.793213, avg loss: 2.714818, ppl: 15.101858 +epoch: 0, batch: 32439, sum loss: 5203.849121, avg loss: 2.852988, ppl: 17.339506 +epoch: 0, batch: 32440, sum loss: 5686.609375, avg loss: 3.054033, ppl: 21.200676 +epoch: 0, batch: 32441, sum loss: 5165.377441, avg loss: 2.956713, ppl: 19.234638 +epoch: 0, batch: 32442, sum loss: 4147.275391, avg loss: 2.696538, ppl: 14.828307 +epoch: 0, batch: 32443, sum loss: 5433.918945, avg loss: 3.257745, ppl: 25.990868 +epoch: 0, batch: 32444, sum loss: 5251.166016, avg loss: 2.928704, ppl: 18.703375 +epoch: 0, batch: 32445, sum loss: 5515.637207, avg loss: 2.970187, ppl: 19.495564 +epoch: 0, batch: 32446, sum loss: 5335.376465, avg loss: 3.118280, ppl: 22.607460 +epoch: 0, batch: 32447, sum loss: 4507.641113, avg loss: 2.854744, ppl: 17.369993 +epoch: 0, batch: 32448, sum loss: 4502.953613, avg loss: 2.848168, ppl: 17.256138 +epoch: 0, batch: 32449, sum loss: 5775.727539, avg loss: 2.883538, ppl: 17.877420 +epoch: 0, batch: 32450, sum loss: 4094.601807, avg loss: 2.781659, ppl: 16.145782 +epoch: 0, batch: 32451, sum loss: 5364.863281, avg loss: 3.110066, ppl: 22.422523 +epoch: 0, batch: 32452, sum loss: 4924.832520, avg loss: 2.963197, ppl: 19.359756 +epoch: 0, batch: 32453, sum loss: 4856.709961, avg loss: 2.897798, ppl: 18.134176 +epoch: 0, batch: 32454, sum loss: 4790.273438, avg loss: 2.929831, ppl: 18.724463 +epoch: 0, batch: 32455, sum loss: 5307.853516, avg loss: 3.174554, ppl: 23.916142 +epoch: 0, batch: 32456, sum loss: 4410.543945, avg loss: 2.717525, ppl: 15.142805 +epoch: 0, batch: 32457, sum loss: 4597.571289, avg loss: 2.882490, ppl: 17.858685 +epoch: 0, batch: 32458, sum loss: 4797.537109, avg loss: 2.930689, ppl: 18.740532 +epoch: 0, batch: 32459, sum loss: 5678.942383, avg loss: 3.101552, ppl: 22.232441 +epoch: 0, batch: 32460, sum loss: 4979.851074, avg loss: 3.271913, ppl: 26.361715 +epoch: 0, batch: 32461, sum loss: 5821.886719, avg loss: 3.248821, ppl: 25.759945 +epoch: 0, batch: 32462, sum loss: 4802.868164, avg loss: 2.674203, ppl: 14.500787 +epoch: 0, batch: 32463, sum loss: 5938.987793, avg loss: 3.401482, ppl: 30.008551 +epoch: 0, batch: 32464, sum loss: 4407.243164, avg loss: 2.794701, ppl: 16.357735 +epoch: 0, batch: 32465, sum loss: 5347.405762, avg loss: 3.078530, ppl: 21.726433 +epoch: 0, batch: 32466, sum loss: 5096.237793, avg loss: 3.008405, ppl: 20.255062 +epoch: 0, batch: 32467, sum loss: 4337.808594, avg loss: 2.917154, ppl: 18.488600 +epoch: 0, batch: 32468, sum loss: 5194.640137, avg loss: 3.169396, ppl: 23.793108 +epoch: 0, batch: 32469, sum loss: 4731.777344, avg loss: 2.938992, ppl: 18.896790 +epoch: 0, batch: 32470, sum loss: 6175.101562, avg loss: 3.184684, ppl: 24.159647 +epoch: 0, batch: 32471, sum loss: 4985.893066, avg loss: 2.950233, ppl: 19.110397 +epoch: 0, batch: 32472, sum loss: 5125.699219, avg loss: 2.805528, ppl: 16.535799 +epoch: 0, batch: 32473, sum loss: 5154.783691, avg loss: 3.129802, ppl: 22.869446 +epoch: 0, batch: 32474, sum loss: 4214.085938, avg loss: 2.870631, ppl: 17.648146 +epoch: 0, batch: 32475, sum loss: 5352.954102, avg loss: 2.937955, ppl: 18.877205 +epoch: 0, batch: 32476, sum loss: 5350.869141, avg loss: 3.105554, ppl: 22.321579 +epoch: 0, batch: 32477, sum loss: 4817.766113, avg loss: 3.098242, ppl: 22.158962 +epoch: 0, batch: 32478, sum loss: 4769.747559, avg loss: 3.003619, ppl: 20.158367 +epoch: 0, batch: 32479, sum loss: 4411.855469, avg loss: 2.933414, ppl: 18.791685 +epoch: 0, batch: 32480, sum loss: 4979.089355, avg loss: 2.956704, ppl: 19.234472 +epoch: 0, batch: 32481, sum loss: 4624.164062, avg loss: 3.074577, ppl: 21.640728 +epoch: 0, batch: 32482, sum loss: 4808.354004, avg loss: 2.894855, ppl: 18.080879 +epoch: 0, batch: 32483, sum loss: 4102.775879, avg loss: 3.003496, ppl: 20.155882 +epoch: 0, batch: 32484, sum loss: 4623.885254, avg loss: 2.807459, ppl: 16.567768 +epoch: 0, batch: 32485, sum loss: 5019.419922, avg loss: 2.940492, ppl: 18.925158 +epoch: 0, batch: 32486, sum loss: 5340.642090, avg loss: 3.019018, ppl: 20.471172 +epoch: 0, batch: 32487, sum loss: 6237.055176, avg loss: 3.224951, ppl: 25.152342 +epoch: 0, batch: 32488, sum loss: 4762.876465, avg loss: 2.890095, ppl: 17.995020 +epoch: 0, batch: 32489, sum loss: 5664.362793, avg loss: 2.993849, ppl: 19.962376 +epoch: 0, batch: 32490, sum loss: 5151.732422, avg loss: 2.979602, ppl: 19.679989 +epoch: 0, batch: 32491, sum loss: 5239.099121, avg loss: 2.978453, ppl: 19.657387 +epoch: 0, batch: 32492, sum loss: 5353.811035, avg loss: 3.103659, ppl: 22.279316 +epoch: 0, batch: 32493, sum loss: 6183.664551, avg loss: 3.145303, ppl: 23.226702 +epoch: 0, batch: 32494, sum loss: 5022.107422, avg loss: 2.837349, ppl: 17.070450 +epoch: 0, batch: 32495, sum loss: 5737.684570, avg loss: 3.293734, ppl: 26.943277 +epoch: 0, batch: 32496, sum loss: 5206.697266, avg loss: 2.956671, ppl: 19.233831 +epoch: 0, batch: 32497, sum loss: 4598.861328, avg loss: 2.903322, ppl: 18.234612 +epoch: 0, batch: 32498, sum loss: 5031.005859, avg loss: 2.962901, ppl: 19.354038 +epoch: 0, batch: 32499, sum loss: 4749.366211, avg loss: 3.002128, ppl: 20.128317 +epoch: 0, batch: 32500, sum loss: 5102.985352, avg loss: 3.068542, ppl: 21.510517 +epoch: 0, batch: 32501, sum loss: 4586.128418, avg loss: 2.761065, ppl: 15.816675 +epoch: 0, batch: 32502, sum loss: 4934.729004, avg loss: 2.902782, ppl: 18.224775 +epoch: 0, batch: 32503, sum loss: 4538.647461, avg loss: 2.903805, ppl: 18.243435 +epoch: 0, batch: 32504, sum loss: 5116.477539, avg loss: 3.100895, ppl: 22.217836 +epoch: 0, batch: 32505, sum loss: 4268.320312, avg loss: 2.764456, ppl: 15.870408 +epoch: 0, batch: 32506, sum loss: 4606.011719, avg loss: 2.983168, ppl: 19.750294 +epoch: 0, batch: 32507, sum loss: 5126.477539, avg loss: 2.958152, ppl: 19.262342 +epoch: 0, batch: 32508, sum loss: 4022.694824, avg loss: 2.602002, ppl: 13.490718 +epoch: 0, batch: 32509, sum loss: 4460.500977, avg loss: 2.940343, ppl: 18.922342 +epoch: 0, batch: 32510, sum loss: 4121.273438, avg loss: 2.718518, ppl: 15.157845 +epoch: 0, batch: 32511, sum loss: 5303.781250, avg loss: 2.951464, ppl: 19.133953 +epoch: 0, batch: 32512, sum loss: 4901.498047, avg loss: 2.853026, ppl: 17.340168 +epoch: 0, batch: 32513, sum loss: 4814.977539, avg loss: 2.717256, ppl: 15.138725 +epoch: 0, batch: 32514, sum loss: 5095.401855, avg loss: 2.999059, ppl: 20.066654 +epoch: 0, batch: 32515, sum loss: 5158.340820, avg loss: 2.896317, ppl: 18.107338 +epoch: 0, batch: 32516, sum loss: 5471.476562, avg loss: 3.135517, ppl: 23.000515 +epoch: 0, batch: 32517, sum loss: 4385.502441, avg loss: 2.894721, ppl: 18.078463 +epoch: 0, batch: 32518, sum loss: 4001.445068, avg loss: 2.937918, ppl: 18.876513 +epoch: 0, batch: 32519, sum loss: 5471.342773, avg loss: 2.935270, ppl: 18.826582 +epoch: 0, batch: 32520, sum loss: 4440.854004, avg loss: 2.812447, ppl: 16.650614 +epoch: 0, batch: 32521, sum loss: 5519.101562, avg loss: 3.114617, ppl: 22.524805 +epoch: 0, batch: 32522, sum loss: 5761.446289, avg loss: 2.889391, ppl: 17.982363 +epoch: 0, batch: 32523, sum loss: 4579.791992, avg loss: 2.862370, ppl: 17.502960 +epoch: 0, batch: 32524, sum loss: 4507.401367, avg loss: 2.745068, ppl: 15.565670 +epoch: 0, batch: 32525, sum loss: 5676.466309, avg loss: 3.262337, ppl: 26.110485 +epoch: 0, batch: 32526, sum loss: 4492.083496, avg loss: 2.855743, ppl: 17.387346 +epoch: 0, batch: 32527, sum loss: 5696.150391, avg loss: 3.218164, ppl: 24.982216 +epoch: 0, batch: 32528, sum loss: 5864.304199, avg loss: 3.139349, ppl: 23.088837 +epoch: 0, batch: 32529, sum loss: 5867.603027, avg loss: 3.001332, ppl: 20.112301 +epoch: 0, batch: 32530, sum loss: 5437.849609, avg loss: 2.790072, ppl: 16.282188 +epoch: 0, batch: 32531, sum loss: 5102.080078, avg loss: 3.225082, ppl: 25.155642 +epoch: 0, batch: 32532, sum loss: 5194.045410, avg loss: 2.993686, ppl: 19.959120 +epoch: 0, batch: 32533, sum loss: 4822.263672, avg loss: 3.046281, ppl: 21.036970 +epoch: 0, batch: 32534, sum loss: 5529.881836, avg loss: 3.211313, ppl: 24.811655 +epoch: 0, batch: 32535, sum loss: 4857.370117, avg loss: 2.827340, ppl: 16.900448 +epoch: 0, batch: 32536, sum loss: 4863.020996, avg loss: 3.105377, ppl: 22.317640 +epoch: 0, batch: 32537, sum loss: 5162.953613, avg loss: 3.082361, ppl: 21.809824 +epoch: 0, batch: 32538, sum loss: 4281.440430, avg loss: 2.647768, ppl: 14.122479 +epoch: 0, batch: 32539, sum loss: 5487.369141, avg loss: 2.995289, ppl: 19.991133 +epoch: 0, batch: 32540, sum loss: 4654.686523, avg loss: 2.798970, ppl: 16.427713 +epoch: 0, batch: 32541, sum loss: 5069.462891, avg loss: 3.053893, ppl: 21.197714 +epoch: 0, batch: 32542, sum loss: 4488.600586, avg loss: 2.671786, ppl: 14.465783 +epoch: 0, batch: 32543, sum loss: 4952.562500, avg loss: 3.062809, ppl: 21.387556 +epoch: 0, batch: 32544, sum loss: 5467.481934, avg loss: 3.188036, ppl: 24.240770 +epoch: 0, batch: 32545, sum loss: 4275.901367, avg loss: 2.767574, ppl: 15.919963 +epoch: 0, batch: 32546, sum loss: 4425.275391, avg loss: 2.917123, ppl: 18.488018 +epoch: 0, batch: 32547, sum loss: 6056.690918, avg loss: 3.228513, ppl: 25.242100 +epoch: 0, batch: 32548, sum loss: 4969.013672, avg loss: 2.950721, ppl: 19.119730 +epoch: 0, batch: 32549, sum loss: 4809.650391, avg loss: 3.258571, ppl: 26.012337 +epoch: 0, batch: 32550, sum loss: 4384.947754, avg loss: 2.827174, ppl: 16.897648 +epoch: 0, batch: 32551, sum loss: 4976.575684, avg loss: 3.006994, ppl: 20.226517 +epoch: 0, batch: 32552, sum loss: 4864.902344, avg loss: 3.122530, ppl: 22.703758 +epoch: 0, batch: 32553, sum loss: 5588.227051, avg loss: 3.125407, ppl: 22.769154 +epoch: 0, batch: 32554, sum loss: 4945.766602, avg loss: 2.931693, ppl: 18.759369 +epoch: 0, batch: 32555, sum loss: 6166.092285, avg loss: 3.218211, ppl: 24.983389 +epoch: 0, batch: 32556, sum loss: 5426.931152, avg loss: 3.003282, ppl: 20.151567 +epoch: 0, batch: 32557, sum loss: 5011.482422, avg loss: 2.949666, ppl: 19.099573 +epoch: 0, batch: 32558, sum loss: 4439.864258, avg loss: 2.771451, ppl: 15.981803 +epoch: 0, batch: 32559, sum loss: 6641.872070, avg loss: 3.135917, ppl: 23.009726 +epoch: 0, batch: 32560, sum loss: 5677.295410, avg loss: 3.145316, ppl: 23.227011 +epoch: 0, batch: 32561, sum loss: 6299.267090, avg loss: 3.212273, ppl: 24.835476 +epoch: 0, batch: 32562, sum loss: 4038.104004, avg loss: 2.693865, ppl: 14.788729 +epoch: 0, batch: 32563, sum loss: 5193.846680, avg loss: 3.075102, ppl: 21.652082 +epoch: 0, batch: 32564, sum loss: 5200.165039, avg loss: 3.287083, ppl: 26.764668 +epoch: 0, batch: 32565, sum loss: 5731.387695, avg loss: 2.940681, ppl: 18.928736 +epoch: 0, batch: 32566, sum loss: 4492.262695, avg loss: 2.887058, ppl: 17.940456 +epoch: 0, batch: 32567, sum loss: 5439.957031, avg loss: 3.064764, ppl: 21.429409 +epoch: 0, batch: 32568, sum loss: 4628.957031, avg loss: 3.067566, ppl: 21.489536 +epoch: 0, batch: 32569, sum loss: 5737.491699, avg loss: 3.136956, ppl: 23.033640 +epoch: 0, batch: 32570, sum loss: 4196.269531, avg loss: 2.710768, ppl: 15.040830 +epoch: 0, batch: 32571, sum loss: 6803.485352, avg loss: 3.189632, ppl: 24.279495 +epoch: 0, batch: 32572, sum loss: 4550.602051, avg loss: 2.776450, ppl: 16.061903 +epoch: 0, batch: 32573, sum loss: 4630.836914, avg loss: 2.827129, ppl: 16.896873 +epoch: 0, batch: 32574, sum loss: 5615.075195, avg loss: 2.999506, ppl: 20.075623 +epoch: 0, batch: 32575, sum loss: 5162.221680, avg loss: 2.971918, ppl: 19.529343 +epoch: 0, batch: 32576, sum loss: 4738.349609, avg loss: 2.923103, ppl: 18.598904 +epoch: 0, batch: 32577, sum loss: 4925.755859, avg loss: 2.801909, ppl: 16.476065 +epoch: 0, batch: 32578, sum loss: 4834.915039, avg loss: 2.847418, ppl: 17.243195 +epoch: 0, batch: 32579, sum loss: 5270.245117, avg loss: 3.073029, ppl: 21.607252 +epoch: 0, batch: 32580, sum loss: 4771.704102, avg loss: 2.857308, ppl: 17.414581 +epoch: 0, batch: 32581, sum loss: 4296.060547, avg loss: 2.777027, ppl: 16.071169 +epoch: 0, batch: 32582, sum loss: 5383.660156, avg loss: 3.140992, ppl: 23.126795 +epoch: 0, batch: 32583, sum loss: 5302.755859, avg loss: 3.012929, ppl: 20.346918 +epoch: 0, batch: 32584, sum loss: 3107.552734, avg loss: 2.559763, ppl: 12.932757 +epoch: 0, batch: 32585, sum loss: 5012.403320, avg loss: 3.103655, ppl: 22.279242 +epoch: 0, batch: 32586, sum loss: 5045.492676, avg loss: 2.860257, ppl: 17.466009 +epoch: 0, batch: 32587, sum loss: 6095.083008, avg loss: 3.348947, ppl: 28.472731 +epoch: 0, batch: 32588, sum loss: 5619.853516, avg loss: 2.953155, ppl: 19.166325 +epoch: 0, batch: 32589, sum loss: 4543.299805, avg loss: 2.989013, ppl: 19.866070 +epoch: 0, batch: 32590, sum loss: 4677.986816, avg loss: 2.821464, ppl: 16.801428 +epoch: 0, batch: 32591, sum loss: 6377.297852, avg loss: 3.263714, ppl: 26.146473 +epoch: 0, batch: 32592, sum loss: 5541.654785, avg loss: 2.933645, ppl: 18.796013 +epoch: 0, batch: 32593, sum loss: 4137.313477, avg loss: 2.620211, ppl: 13.738624 +epoch: 0, batch: 32594, sum loss: 5017.093750, avg loss: 3.009654, ppl: 20.280388 +epoch: 0, batch: 32595, sum loss: 4443.367676, avg loss: 3.045489, ppl: 21.020300 +epoch: 0, batch: 32596, sum loss: 4595.443848, avg loss: 2.796983, ppl: 16.395111 +epoch: 0, batch: 32597, sum loss: 5600.401855, avg loss: 2.949132, ppl: 19.089380 +epoch: 0, batch: 32598, sum loss: 4834.928711, avg loss: 3.089411, ppl: 21.964144 +epoch: 0, batch: 32599, sum loss: 4610.452637, avg loss: 2.840698, ppl: 17.127712 +epoch: 0, batch: 32600, sum loss: 4402.232422, avg loss: 2.761752, ppl: 15.827547 +epoch: 0, batch: 32601, sum loss: 5750.708984, avg loss: 2.956663, ppl: 19.233679 +epoch: 0, batch: 32602, sum loss: 4795.211914, avg loss: 2.874827, ppl: 17.722364 +epoch: 0, batch: 32603, sum loss: 4804.610840, avg loss: 2.927855, ppl: 18.687511 +epoch: 0, batch: 32604, sum loss: 5149.594238, avg loss: 2.949367, ppl: 19.093861 +epoch: 0, batch: 32605, sum loss: 5463.499512, avg loss: 3.174608, ppl: 23.917431 +epoch: 0, batch: 32606, sum loss: 5261.693848, avg loss: 3.059124, ppl: 21.308887 +epoch: 0, batch: 32607, sum loss: 4950.072754, avg loss: 2.835093, ppl: 17.031988 +epoch: 0, batch: 32608, sum loss: 4204.743164, avg loss: 2.744611, ppl: 15.558554 +epoch: 0, batch: 32609, sum loss: 4701.951660, avg loss: 3.105649, ppl: 22.323692 +epoch: 0, batch: 32610, sum loss: 5874.472656, avg loss: 3.048507, ppl: 21.083841 +epoch: 0, batch: 32611, sum loss: 5047.620117, avg loss: 2.889307, ppl: 17.980848 +epoch: 0, batch: 32612, sum loss: 5231.505859, avg loss: 2.958997, ppl: 19.278622 +epoch: 0, batch: 32613, sum loss: 4225.527832, avg loss: 2.862824, ppl: 17.510902 +epoch: 0, batch: 32614, sum loss: 6397.877930, avg loss: 3.313246, ppl: 27.474163 +epoch: 0, batch: 32615, sum loss: 4610.153320, avg loss: 2.847532, ppl: 17.245161 +epoch: 0, batch: 32616, sum loss: 5135.671387, avg loss: 3.035267, ppl: 20.806530 +epoch: 0, batch: 32617, sum loss: 4817.101074, avg loss: 3.064314, ppl: 21.419756 +epoch: 0, batch: 32618, sum loss: 4934.801758, avg loss: 3.115405, ppl: 22.542561 +epoch: 0, batch: 32619, sum loss: 6071.595215, avg loss: 3.102501, ppl: 22.253546 +epoch: 0, batch: 32620, sum loss: 6018.287109, avg loss: 3.073691, ppl: 21.621563 +epoch: 0, batch: 32621, sum loss: 4889.485352, avg loss: 2.931346, ppl: 18.752859 +epoch: 0, batch: 32622, sum loss: 4847.302734, avg loss: 2.726267, ppl: 15.275758 +epoch: 0, batch: 32623, sum loss: 4899.479004, avg loss: 3.039379, ppl: 20.892267 +epoch: 0, batch: 32624, sum loss: 4868.133789, avg loss: 2.835256, ppl: 17.034754 +epoch: 0, batch: 32625, sum loss: 5605.340332, avg loss: 3.136732, ppl: 23.028488 +epoch: 0, batch: 32626, sum loss: 4763.804688, avg loss: 2.901221, ppl: 18.196350 +epoch: 0, batch: 32627, sum loss: 5453.682617, avg loss: 2.986683, ppl: 19.819828 +epoch: 0, batch: 32628, sum loss: 5299.548828, avg loss: 3.316364, ppl: 27.559948 +epoch: 0, batch: 32629, sum loss: 5468.049805, avg loss: 3.021022, ppl: 20.512247 +epoch: 0, batch: 32630, sum loss: 5436.066406, avg loss: 3.188309, ppl: 24.247389 +epoch: 0, batch: 32631, sum loss: 4060.530273, avg loss: 2.747314, ppl: 15.600676 +epoch: 0, batch: 32632, sum loss: 4144.559082, avg loss: 2.807967, ppl: 16.576183 +epoch: 0, batch: 32633, sum loss: 5488.970703, avg loss: 2.955827, ppl: 19.217604 +epoch: 0, batch: 32634, sum loss: 4966.045898, avg loss: 3.046654, ppl: 21.044809 +epoch: 0, batch: 32635, sum loss: 4613.474609, avg loss: 2.823424, ppl: 16.834391 +epoch: 0, batch: 32636, sum loss: 4257.599609, avg loss: 2.920164, ppl: 18.544334 +epoch: 0, batch: 32637, sum loss: 5337.509766, avg loss: 2.934310, ppl: 18.808519 +epoch: 0, batch: 32638, sum loss: 4649.839844, avg loss: 3.031186, ppl: 20.721802 +epoch: 0, batch: 32639, sum loss: 4691.835938, avg loss: 2.855652, ppl: 17.385763 +epoch: 0, batch: 32640, sum loss: 5111.887207, avg loss: 3.120810, ppl: 22.664732 +epoch: 0, batch: 32641, sum loss: 5859.597168, avg loss: 2.890773, ppl: 18.007225 +epoch: 0, batch: 32642, sum loss: 5831.868164, avg loss: 2.972410, ppl: 19.538946 +epoch: 0, batch: 32643, sum loss: 6690.761719, avg loss: 3.368964, ppl: 29.048409 +epoch: 0, batch: 32644, sum loss: 5145.884766, avg loss: 3.000516, ppl: 20.095898 +epoch: 0, batch: 32645, sum loss: 5177.254395, avg loss: 3.094593, ppl: 22.078257 +epoch: 0, batch: 32646, sum loss: 4585.589355, avg loss: 3.069337, ppl: 21.527622 +epoch: 0, batch: 32647, sum loss: 4074.469482, avg loss: 2.692974, ppl: 14.775555 +epoch: 0, batch: 32648, sum loss: 4791.339355, avg loss: 2.987119, ppl: 19.828484 +epoch: 0, batch: 32649, sum loss: 4477.647461, avg loss: 2.823233, ppl: 16.831177 +epoch: 0, batch: 32650, sum loss: 6392.803223, avg loss: 3.402237, ppl: 30.031210 +epoch: 0, batch: 32651, sum loss: 4602.366211, avg loss: 2.901870, ppl: 18.208168 +epoch: 0, batch: 32652, sum loss: 4801.555664, avg loss: 3.075949, ppl: 21.670431 +epoch: 0, batch: 32653, sum loss: 5454.695312, avg loss: 2.993795, ppl: 19.961300 +epoch: 0, batch: 32654, sum loss: 5444.899414, avg loss: 3.053786, ppl: 21.195429 +epoch: 0, batch: 32655, sum loss: 5262.918945, avg loss: 2.922220, ppl: 18.582499 +epoch: 0, batch: 32656, sum loss: 5462.409180, avg loss: 2.928906, ppl: 18.707148 +epoch: 0, batch: 32657, sum loss: 5455.855469, avg loss: 2.914453, ppl: 18.438721 +epoch: 0, batch: 32658, sum loss: 4344.643555, avg loss: 2.884890, ppl: 17.901590 +epoch: 0, batch: 32659, sum loss: 4339.000488, avg loss: 2.957737, ppl: 19.254349 +epoch: 0, batch: 32660, sum loss: 4997.153809, avg loss: 2.936048, ppl: 18.841240 +epoch: 0, batch: 32661, sum loss: 5726.977051, avg loss: 3.158840, ppl: 23.543274 +epoch: 0, batch: 32662, sum loss: 5305.839844, avg loss: 3.184778, ppl: 24.161917 +epoch: 0, batch: 32663, sum loss: 4643.223633, avg loss: 3.024901, ppl: 20.591974 +epoch: 0, batch: 32664, sum loss: 5276.372559, avg loss: 2.991141, ppl: 19.908382 +epoch: 0, batch: 32665, sum loss: 4830.569336, avg loss: 2.735317, ppl: 15.414625 +epoch: 0, batch: 32666, sum loss: 4845.106445, avg loss: 2.850063, ppl: 17.288868 +epoch: 0, batch: 32667, sum loss: 4435.533203, avg loss: 2.786139, ppl: 16.218281 +epoch: 0, batch: 32668, sum loss: 4698.843262, avg loss: 3.002456, ppl: 20.134922 +epoch: 0, batch: 32669, sum loss: 5100.857910, avg loss: 3.106491, ppl: 22.342503 +epoch: 0, batch: 32670, sum loss: 5524.388672, avg loss: 3.005652, ppl: 20.199385 +epoch: 0, batch: 32671, sum loss: 4959.918457, avg loss: 2.795895, ppl: 16.377285 +epoch: 0, batch: 32672, sum loss: 5379.059570, avg loss: 3.167880, ppl: 23.757061 +epoch: 0, batch: 32673, sum loss: 4843.811035, avg loss: 3.182530, ppl: 24.107679 +epoch: 0, batch: 32674, sum loss: 5463.550781, avg loss: 3.111362, ppl: 22.451591 +epoch: 0, batch: 32675, sum loss: 4601.914062, avg loss: 2.707008, ppl: 14.984381 +epoch: 0, batch: 32676, sum loss: 4788.890625, avg loss: 2.727159, ppl: 15.289385 +epoch: 0, batch: 32677, sum loss: 6930.564453, avg loss: 3.377468, ppl: 29.296501 +epoch: 0, batch: 32678, sum loss: 4781.384277, avg loss: 2.591536, ppl: 13.350263 +epoch: 0, batch: 32679, sum loss: 4995.286133, avg loss: 3.198007, ppl: 24.483677 +epoch: 0, batch: 32680, sum loss: 5366.640625, avg loss: 2.929389, ppl: 18.716192 +epoch: 0, batch: 32681, sum loss: 4002.720947, avg loss: 2.808927, ppl: 16.592102 +epoch: 0, batch: 32682, sum loss: 5127.350586, avg loss: 2.887022, ppl: 17.939802 +epoch: 0, batch: 32683, sum loss: 4417.511230, avg loss: 2.781808, ppl: 16.148193 +epoch: 0, batch: 32684, sum loss: 4312.379883, avg loss: 2.919689, ppl: 18.535517 +epoch: 0, batch: 32685, sum loss: 4797.865234, avg loss: 2.830599, ppl: 16.955612 +epoch: 0, batch: 32686, sum loss: 5485.490234, avg loss: 3.066233, ppl: 21.460899 +epoch: 0, batch: 32687, sum loss: 4469.039551, avg loss: 2.934366, ppl: 18.809578 +epoch: 0, batch: 32688, sum loss: 4906.826172, avg loss: 2.847839, ppl: 17.250462 +epoch: 0, batch: 32689, sum loss: 4430.632812, avg loss: 2.947859, ppl: 19.065098 +epoch: 0, batch: 32690, sum loss: 5181.726074, avg loss: 3.100973, ppl: 22.219568 +epoch: 0, batch: 32691, sum loss: 5039.927246, avg loss: 3.307039, ppl: 27.304152 +epoch: 0, batch: 32692, sum loss: 4426.500977, avg loss: 2.935345, ppl: 18.828005 +epoch: 0, batch: 32693, sum loss: 4811.730957, avg loss: 2.791027, ppl: 16.297754 +epoch: 0, batch: 32694, sum loss: 4750.428711, avg loss: 2.957926, ppl: 19.257980 +epoch: 0, batch: 32695, sum loss: 4489.915527, avg loss: 2.706399, ppl: 14.975248 +epoch: 0, batch: 32696, sum loss: 6051.361328, avg loss: 3.348844, ppl: 28.469799 +epoch: 0, batch: 32697, sum loss: 4952.093750, avg loss: 3.026952, ppl: 20.634249 +epoch: 0, batch: 32698, sum loss: 4648.026367, avg loss: 2.849802, ppl: 17.284351 +epoch: 0, batch: 32699, sum loss: 5066.522949, avg loss: 2.844763, ppl: 17.197483 +epoch: 0, batch: 32700, sum loss: 4838.495117, avg loss: 2.859631, ppl: 17.455078 +epoch: 0, batch: 32701, sum loss: 5688.819336, avg loss: 3.066749, ppl: 21.471981 +epoch: 0, batch: 32702, sum loss: 4825.850586, avg loss: 3.052404, ppl: 21.166161 +epoch: 0, batch: 32703, sum loss: 5518.232422, avg loss: 3.147879, ppl: 23.286629 +epoch: 0, batch: 32704, sum loss: 4991.937012, avg loss: 2.854166, ppl: 17.359957 +epoch: 0, batch: 32705, sum loss: 5416.694824, avg loss: 3.184418, ppl: 24.153221 +epoch: 0, batch: 32706, sum loss: 4634.214355, avg loss: 2.966847, ppl: 19.430553 +epoch: 0, batch: 32707, sum loss: 5101.897949, avg loss: 2.932125, ppl: 18.767475 +epoch: 0, batch: 32708, sum loss: 5759.067871, avg loss: 3.178293, ppl: 24.005753 +epoch: 0, batch: 32709, sum loss: 5017.876465, avg loss: 2.956910, ppl: 19.238436 +epoch: 0, batch: 32710, sum loss: 5313.912109, avg loss: 3.052218, ppl: 21.162239 +epoch: 0, batch: 32711, sum loss: 5654.777344, avg loss: 3.093423, ppl: 22.052437 +epoch: 0, batch: 32712, sum loss: 6430.623047, avg loss: 3.244512, ppl: 25.649193 +epoch: 0, batch: 32713, sum loss: 6659.636230, avg loss: 3.192539, ppl: 24.350174 +epoch: 0, batch: 32714, sum loss: 4810.402832, avg loss: 2.742533, ppl: 15.526263 +epoch: 0, batch: 32715, sum loss: 5502.043945, avg loss: 3.096254, ppl: 22.114962 +epoch: 0, batch: 32716, sum loss: 4296.416016, avg loss: 2.627777, ppl: 13.842967 +epoch: 0, batch: 32717, sum loss: 4807.428223, avg loss: 2.793392, ppl: 16.336342 +epoch: 0, batch: 32718, sum loss: 6365.335938, avg loss: 3.435152, ppl: 31.036116 +epoch: 0, batch: 32719, sum loss: 5479.902832, avg loss: 2.930429, ppl: 18.735676 +epoch: 0, batch: 32720, sum loss: 4109.907227, avg loss: 2.621114, ppl: 13.751038 +epoch: 0, batch: 32721, sum loss: 4905.365234, avg loss: 2.788724, ppl: 16.260258 +epoch: 0, batch: 32722, sum loss: 5653.700195, avg loss: 2.877201, ppl: 17.764482 +epoch: 0, batch: 32723, sum loss: 4619.846680, avg loss: 2.853519, ppl: 17.348721 +epoch: 0, batch: 32724, sum loss: 4129.149902, avg loss: 2.559919, ppl: 12.934774 +epoch: 0, batch: 32725, sum loss: 5331.803711, avg loss: 3.062495, ppl: 21.380836 +epoch: 0, batch: 32726, sum loss: 4213.872070, avg loss: 2.942648, ppl: 18.966005 +epoch: 0, batch: 32727, sum loss: 4817.941895, avg loss: 2.952170, ppl: 19.147467 +epoch: 0, batch: 32728, sum loss: 5507.865234, avg loss: 3.071871, ppl: 21.582251 +epoch: 0, batch: 32729, sum loss: 4569.230469, avg loss: 2.815299, ppl: 16.698168 +epoch: 0, batch: 32730, sum loss: 4311.883301, avg loss: 2.903625, ppl: 18.240147 +epoch: 0, batch: 32731, sum loss: 5874.607422, avg loss: 3.231357, ppl: 25.313988 +epoch: 0, batch: 32732, sum loss: 4476.380859, avg loss: 2.858481, ppl: 17.435019 +epoch: 0, batch: 32733, sum loss: 5347.850586, avg loss: 2.923921, ppl: 18.614124 +epoch: 0, batch: 32734, sum loss: 4087.206543, avg loss: 2.673124, ppl: 14.485151 +epoch: 0, batch: 32735, sum loss: 4966.449707, avg loss: 2.935254, ppl: 18.826283 +epoch: 0, batch: 32736, sum loss: 5133.386230, avg loss: 3.253096, ppl: 25.870319 +epoch: 0, batch: 32737, sum loss: 5087.449219, avg loss: 3.085172, ppl: 21.871237 +epoch: 0, batch: 32738, sum loss: 4664.801758, avg loss: 2.731148, ppl: 15.350507 +epoch: 0, batch: 32739, sum loss: 4670.984863, avg loss: 2.829185, ppl: 16.931664 +epoch: 0, batch: 32740, sum loss: 4063.133789, avg loss: 2.749076, ppl: 15.628180 +epoch: 0, batch: 32741, sum loss: 5340.794922, avg loss: 2.929674, ppl: 18.721519 +epoch: 0, batch: 32742, sum loss: 5811.428711, avg loss: 3.235762, ppl: 25.425741 +epoch: 0, batch: 32743, sum loss: 5670.496094, avg loss: 3.071775, ppl: 21.580168 +epoch: 0, batch: 32744, sum loss: 4932.144531, avg loss: 3.012917, ppl: 20.346661 +epoch: 0, batch: 32745, sum loss: 5514.514160, avg loss: 3.041652, ppl: 20.939802 +epoch: 0, batch: 32746, sum loss: 4062.554199, avg loss: 2.836979, ppl: 17.064144 +epoch: 0, batch: 32747, sum loss: 4729.303223, avg loss: 2.896083, ppl: 18.103090 +epoch: 0, batch: 32748, sum loss: 5458.233398, avg loss: 3.367201, ppl: 28.997259 +epoch: 0, batch: 32749, sum loss: 5735.961914, avg loss: 2.999980, ppl: 20.085135 +epoch: 0, batch: 32750, sum loss: 5004.909668, avg loss: 2.956237, ppl: 19.225496 +epoch: 0, batch: 32751, sum loss: 4942.817383, avg loss: 3.136305, ppl: 23.018663 +epoch: 0, batch: 32752, sum loss: 5099.655273, avg loss: 2.841034, ppl: 17.133467 +epoch: 0, batch: 32753, sum loss: 4567.484863, avg loss: 2.778275, ppl: 16.091248 +epoch: 0, batch: 32754, sum loss: 4348.073730, avg loss: 2.854940, ppl: 17.373398 +epoch: 0, batch: 32755, sum loss: 5461.073242, avg loss: 2.923487, ppl: 18.606049 +epoch: 0, batch: 32756, sum loss: 5403.431152, avg loss: 3.100075, ppl: 22.199621 +epoch: 0, batch: 32757, sum loss: 5772.490234, avg loss: 3.196285, ppl: 24.441555 +epoch: 0, batch: 32758, sum loss: 5636.706543, avg loss: 3.041935, ppl: 20.945744 +epoch: 0, batch: 32759, sum loss: 5993.556641, avg loss: 3.128161, ppl: 22.831957 +epoch: 0, batch: 32760, sum loss: 4735.343262, avg loss: 2.859507, ppl: 17.452917 +epoch: 0, batch: 32761, sum loss: 5124.656250, avg loss: 3.100215, ppl: 22.202734 +epoch: 0, batch: 32762, sum loss: 4984.578125, avg loss: 3.013651, ppl: 20.361597 +epoch: 0, batch: 32763, sum loss: 6112.892578, avg loss: 3.210553, ppl: 24.792791 +epoch: 0, batch: 32764, sum loss: 5608.951660, avg loss: 2.861710, ppl: 17.491409 +epoch: 0, batch: 32765, sum loss: 5356.288574, avg loss: 2.967473, ppl: 19.442722 +epoch: 0, batch: 32766, sum loss: 5516.247070, avg loss: 2.938864, ppl: 18.894365 +epoch: 0, batch: 32767, sum loss: 5468.354980, avg loss: 3.036288, ppl: 20.827787 +epoch: 0, batch: 32768, sum loss: 5658.108887, avg loss: 3.058437, ppl: 21.294256 +epoch: 0, batch: 32769, sum loss: 4479.978516, avg loss: 2.791264, ppl: 16.301609 +epoch: 0, batch: 32770, sum loss: 4714.389648, avg loss: 2.843420, ppl: 17.174395 +epoch: 0, batch: 32771, sum loss: 4949.589355, avg loss: 3.032837, ppl: 20.756027 +epoch: 0, batch: 32772, sum loss: 4227.801270, avg loss: 2.807305, ppl: 16.565216 +epoch: 0, batch: 32773, sum loss: 5005.666992, avg loss: 2.751878, ppl: 15.672044 +epoch: 0, batch: 32774, sum loss: 4585.051758, avg loss: 2.994809, ppl: 19.981537 +epoch: 0, batch: 32775, sum loss: 4530.117676, avg loss: 2.917011, ppl: 18.485947 +epoch: 0, batch: 32776, sum loss: 5349.700684, avg loss: 2.912194, ppl: 18.397121 +epoch: 0, batch: 32777, sum loss: 4488.086914, avg loss: 3.059364, ppl: 21.313993 +epoch: 0, batch: 32778, sum loss: 5428.221680, avg loss: 3.022395, ppl: 20.540430 +epoch: 0, batch: 32779, sum loss: 5059.035645, avg loss: 2.934476, ppl: 18.811636 +epoch: 0, batch: 32780, sum loss: 5100.213867, avg loss: 2.896203, ppl: 18.105270 +epoch: 0, batch: 32781, sum loss: 4712.176758, avg loss: 2.842085, ppl: 17.151487 +epoch: 0, batch: 32782, sum loss: 6798.184570, avg loss: 3.457876, ppl: 31.749475 +epoch: 0, batch: 32783, sum loss: 5105.524902, avg loss: 2.996200, ppl: 20.009359 +epoch: 0, batch: 32784, sum loss: 4471.396484, avg loss: 2.691991, ppl: 14.761030 +epoch: 0, batch: 32785, sum loss: 4952.426758, avg loss: 3.001471, ppl: 20.115101 +epoch: 0, batch: 32786, sum loss: 4539.828613, avg loss: 2.739788, ppl: 15.483700 +epoch: 0, batch: 32787, sum loss: 4951.826172, avg loss: 2.958080, ppl: 19.260962 +epoch: 0, batch: 32788, sum loss: 5924.521973, avg loss: 2.969685, ppl: 19.485783 +epoch: 0, batch: 32789, sum loss: 4820.954590, avg loss: 3.024438, ppl: 20.582438 +epoch: 0, batch: 32790, sum loss: 4059.544922, avg loss: 2.622445, ppl: 13.769350 +epoch: 0, batch: 32791, sum loss: 4628.794922, avg loss: 2.950156, ppl: 19.108934 +epoch: 0, batch: 32792, sum loss: 5563.711914, avg loss: 3.005787, ppl: 20.202112 +epoch: 0, batch: 32793, sum loss: 4630.647461, avg loss: 3.167338, ppl: 23.744184 +epoch: 0, batch: 32794, sum loss: 4997.477539, avg loss: 2.950105, ppl: 19.107954 +epoch: 0, batch: 32795, sum loss: 4798.048828, avg loss: 2.885177, ppl: 17.906731 +epoch: 0, batch: 32796, sum loss: 5083.199219, avg loss: 2.955348, ppl: 19.208410 +epoch: 0, batch: 32797, sum loss: 5395.205566, avg loss: 3.244261, ppl: 25.642748 +epoch: 0, batch: 32798, sum loss: 4679.006348, avg loss: 2.886494, ppl: 17.930330 +epoch: 0, batch: 32799, sum loss: 5239.524414, avg loss: 2.999155, ppl: 20.068577 +epoch: 0, batch: 32800, sum loss: 6108.262695, avg loss: 3.181387, ppl: 24.080128 +epoch: 0, batch: 32801, sum loss: 5584.700195, avg loss: 2.806382, ppl: 16.549931 +epoch: 0, batch: 32802, sum loss: 4916.603027, avg loss: 3.061397, ppl: 21.357365 +epoch: 0, batch: 32803, sum loss: 4441.355469, avg loss: 2.895277, ppl: 18.088516 +epoch: 0, batch: 32804, sum loss: 5556.375488, avg loss: 3.069821, ppl: 21.538044 +epoch: 0, batch: 32805, sum loss: 5436.864746, avg loss: 3.020480, ppl: 20.501139 +epoch: 0, batch: 32806, sum loss: 5503.816895, avg loss: 3.095510, ppl: 22.098511 +epoch: 0, batch: 32807, sum loss: 5496.511719, avg loss: 3.105374, ppl: 22.317560 +epoch: 0, batch: 32808, sum loss: 6038.938477, avg loss: 3.253738, ppl: 25.886934 +epoch: 0, batch: 32809, sum loss: 4977.902344, avg loss: 2.889090, ppl: 17.976948 +epoch: 0, batch: 32810, sum loss: 5740.311035, avg loss: 3.147100, ppl: 23.268499 +epoch: 0, batch: 32811, sum loss: 4553.731934, avg loss: 2.749838, ppl: 15.640100 +epoch: 0, batch: 32812, sum loss: 4755.963379, avg loss: 2.784522, ppl: 16.192074 +epoch: 0, batch: 32813, sum loss: 5538.991211, avg loss: 3.165138, ppl: 23.692007 +epoch: 0, batch: 32814, sum loss: 5345.307129, avg loss: 3.243512, ppl: 25.623545 +epoch: 0, batch: 32815, sum loss: 5716.615234, avg loss: 3.137550, ppl: 23.047323 +epoch: 0, batch: 32816, sum loss: 6994.184082, avg loss: 3.190777, ppl: 24.307314 +epoch: 0, batch: 32817, sum loss: 4541.315430, avg loss: 2.870616, ppl: 17.647894 +epoch: 0, batch: 32818, sum loss: 4349.108887, avg loss: 2.865026, ppl: 17.549505 +epoch: 0, batch: 32819, sum loss: 4795.648438, avg loss: 2.804473, ppl: 16.518364 +epoch: 0, batch: 32820, sum loss: 5406.553711, avg loss: 3.000307, ppl: 20.091711 +epoch: 0, batch: 32821, sum loss: 5499.455078, avg loss: 3.084383, ppl: 21.853985 +epoch: 0, batch: 32822, sum loss: 4321.409180, avg loss: 2.697509, ppl: 14.842710 +epoch: 0, batch: 32823, sum loss: 4671.879395, avg loss: 2.979515, ppl: 19.678267 +epoch: 0, batch: 32824, sum loss: 5301.134277, avg loss: 3.153560, ppl: 23.419287 +epoch: 0, batch: 32825, sum loss: 6155.811523, avg loss: 3.307798, ppl: 27.324888 +epoch: 0, batch: 32826, sum loss: 4975.663574, avg loss: 3.100102, ppl: 22.200209 +epoch: 0, batch: 32827, sum loss: 4178.993164, avg loss: 2.812243, ppl: 16.647215 +epoch: 0, batch: 32828, sum loss: 4592.252930, avg loss: 2.669914, ppl: 14.438734 +epoch: 0, batch: 32829, sum loss: 4338.894043, avg loss: 2.732301, ppl: 15.368213 +epoch: 0, batch: 32830, sum loss: 5547.354980, avg loss: 3.139420, ppl: 23.090471 +epoch: 0, batch: 32831, sum loss: 4864.081055, avg loss: 2.995124, ppl: 19.987835 +epoch: 0, batch: 32832, sum loss: 5563.656250, avg loss: 2.914435, ppl: 18.438395 +epoch: 0, batch: 32833, sum loss: 4654.612305, avg loss: 2.883899, ppl: 17.883862 +epoch: 0, batch: 32834, sum loss: 5066.199219, avg loss: 2.983628, ppl: 19.759365 +epoch: 0, batch: 32835, sum loss: 5855.904785, avg loss: 2.838538, ppl: 17.090767 +epoch: 0, batch: 32836, sum loss: 5167.187500, avg loss: 3.046691, ppl: 21.045584 +epoch: 0, batch: 32837, sum loss: 5171.331543, avg loss: 2.844517, ppl: 17.193249 +epoch: 0, batch: 32838, sum loss: 5452.478027, avg loss: 2.992579, ppl: 19.937023 +epoch: 0, batch: 32839, sum loss: 5818.135742, avg loss: 3.272292, ppl: 26.371723 +epoch: 0, batch: 32840, sum loss: 4304.622559, avg loss: 2.768246, ppl: 15.930670 +epoch: 0, batch: 32841, sum loss: 4319.806641, avg loss: 2.940644, ppl: 18.928038 +epoch: 0, batch: 32842, sum loss: 5246.847656, avg loss: 3.269064, ppl: 26.286722 +epoch: 0, batch: 32843, sum loss: 6313.677734, avg loss: 3.317750, ppl: 27.598177 +epoch: 0, batch: 32844, sum loss: 5248.270996, avg loss: 3.046008, ppl: 21.031212 +epoch: 0, batch: 32845, sum loss: 5692.555664, avg loss: 3.139854, ppl: 23.100504 +epoch: 0, batch: 32846, sum loss: 6742.185059, avg loss: 3.439890, ppl: 31.183533 +epoch: 0, batch: 32847, sum loss: 5117.157715, avg loss: 3.118317, ppl: 22.608290 +epoch: 0, batch: 32848, sum loss: 4582.353516, avg loss: 2.907585, ppl: 18.312513 +epoch: 0, batch: 32849, sum loss: 5830.474121, avg loss: 2.988454, ppl: 19.854969 +epoch: 0, batch: 32850, sum loss: 5599.432129, avg loss: 3.179689, ppl: 24.039268 +epoch: 0, batch: 32851, sum loss: 6054.744141, avg loss: 3.078162, ppl: 21.718441 +epoch: 0, batch: 32852, sum loss: 5734.852539, avg loss: 3.145833, ppl: 23.239016 +epoch: 0, batch: 32853, sum loss: 5429.958008, avg loss: 3.050538, ppl: 21.126715 +epoch: 0, batch: 32854, sum loss: 5789.854004, avg loss: 3.044087, ppl: 20.990866 +epoch: 0, batch: 32855, sum loss: 5730.069336, avg loss: 2.877985, ppl: 17.778421 +epoch: 0, batch: 32856, sum loss: 5798.713867, avg loss: 3.449562, ppl: 31.486601 +epoch: 0, batch: 32857, sum loss: 6244.207031, avg loss: 3.355297, ppl: 28.654108 +epoch: 0, batch: 32858, sum loss: 5028.470703, avg loss: 3.045712, ppl: 21.025000 +epoch: 0, batch: 32859, sum loss: 5146.109863, avg loss: 3.041436, ppl: 20.935289 +epoch: 0, batch: 32860, sum loss: 4029.423584, avg loss: 2.540620, ppl: 12.687536 +epoch: 0, batch: 32861, sum loss: 5618.287598, avg loss: 2.980524, ppl: 19.698132 +epoch: 0, batch: 32862, sum loss: 5041.035156, avg loss: 3.031290, ppl: 20.723949 +epoch: 0, batch: 32863, sum loss: 5439.437988, avg loss: 3.372249, ppl: 29.144009 +epoch: 0, batch: 32864, sum loss: 5597.351074, avg loss: 3.141050, ppl: 23.128141 +epoch: 0, batch: 32865, sum loss: 4834.699707, avg loss: 2.867556, ppl: 17.593971 +epoch: 0, batch: 32866, sum loss: 4459.490723, avg loss: 2.781965, ppl: 16.150734 +epoch: 0, batch: 32867, sum loss: 5220.869629, avg loss: 2.867034, ppl: 17.584791 +epoch: 0, batch: 32868, sum loss: 5290.377930, avg loss: 2.902018, ppl: 18.210850 +epoch: 0, batch: 32869, sum loss: 5028.261719, avg loss: 2.784198, ppl: 16.186832 +epoch: 0, batch: 32870, sum loss: 5061.514648, avg loss: 2.961682, ppl: 19.330460 +epoch: 0, batch: 32871, sum loss: 4766.455078, avg loss: 3.099126, ppl: 22.178551 +epoch: 0, batch: 32872, sum loss: 5064.613281, avg loss: 3.065747, ppl: 21.450476 +epoch: 0, batch: 32873, sum loss: 4232.759766, avg loss: 2.568422, ppl: 13.045227 +epoch: 0, batch: 32874, sum loss: 5152.528320, avg loss: 3.290248, ppl: 26.849520 +epoch: 0, batch: 32875, sum loss: 4916.312500, avg loss: 2.999580, ppl: 20.077110 +epoch: 0, batch: 32876, sum loss: 5627.639648, avg loss: 3.090412, ppl: 21.986128 +epoch: 0, batch: 32877, sum loss: 5162.726074, avg loss: 3.123246, ppl: 22.720013 +epoch: 0, batch: 32878, sum loss: 5406.885742, avg loss: 2.885211, ppl: 17.907345 +epoch: 0, batch: 32879, sum loss: 5655.163086, avg loss: 3.214988, ppl: 24.902987 +epoch: 0, batch: 32880, sum loss: 5398.653320, avg loss: 3.120609, ppl: 22.660172 +epoch: 0, batch: 32881, sum loss: 4288.968750, avg loss: 2.770652, ppl: 15.969040 +epoch: 0, batch: 32882, sum loss: 4837.052246, avg loss: 2.874065, ppl: 17.708866 +epoch: 0, batch: 32883, sum loss: 5837.894043, avg loss: 3.103612, ppl: 22.278275 +epoch: 0, batch: 32884, sum loss: 6531.017578, avg loss: 3.330453, ppl: 27.950993 +epoch: 0, batch: 32885, sum loss: 5838.021484, avg loss: 3.198916, ppl: 24.505951 +epoch: 0, batch: 32886, sum loss: 4650.687500, avg loss: 2.806691, ppl: 16.555050 +epoch: 0, batch: 32887, sum loss: 4546.685547, avg loss: 2.730742, ppl: 15.344272 +epoch: 0, batch: 32888, sum loss: 4705.667969, avg loss: 2.944723, ppl: 19.005404 +epoch: 0, batch: 32889, sum loss: 5136.536133, avg loss: 3.009101, ppl: 20.269178 +epoch: 0, batch: 32890, sum loss: 5027.915039, avg loss: 2.904630, ppl: 18.258490 +epoch: 0, batch: 32891, sum loss: 4851.308594, avg loss: 3.002047, ppl: 20.126699 +epoch: 0, batch: 32892, sum loss: 5199.026367, avg loss: 3.042145, ppl: 20.950144 +epoch: 0, batch: 32893, sum loss: 5625.165039, avg loss: 3.183455, ppl: 24.129978 +epoch: 0, batch: 32894, sum loss: 5224.030273, avg loss: 3.007502, ppl: 20.236776 +epoch: 0, batch: 32895, sum loss: 5038.485352, avg loss: 2.934470, ppl: 18.811533 +epoch: 0, batch: 32896, sum loss: 5455.691406, avg loss: 3.025897, ppl: 20.612476 +epoch: 0, batch: 32897, sum loss: 4584.230469, avg loss: 2.741765, ppl: 15.514340 +epoch: 0, batch: 32898, sum loss: 4964.416504, avg loss: 2.925408, ppl: 18.641825 +epoch: 0, batch: 32899, sum loss: 5326.945801, avg loss: 2.885669, ppl: 17.915558 +epoch: 0, batch: 32900, sum loss: 5237.623535, avg loss: 3.209328, ppl: 24.762445 +epoch: 0, batch: 32901, sum loss: 4670.980957, avg loss: 2.977043, ppl: 19.629694 +epoch: 0, batch: 32902, sum loss: 4071.136719, avg loss: 2.844959, ppl: 17.200857 +epoch: 0, batch: 32903, sum loss: 5121.573242, avg loss: 3.074173, ppl: 21.631994 +epoch: 0, batch: 32904, sum loss: 4086.801025, avg loss: 2.991802, ppl: 19.921543 +epoch: 0, batch: 32905, sum loss: 5001.525391, avg loss: 2.867847, ppl: 17.599087 +epoch: 0, batch: 32906, sum loss: 5144.203125, avg loss: 2.937866, ppl: 18.875523 +epoch: 0, batch: 32907, sum loss: 4805.402344, avg loss: 2.935493, ppl: 18.830790 +epoch: 0, batch: 32908, sum loss: 5310.142578, avg loss: 3.393062, ppl: 29.756939 +epoch: 0, batch: 32909, sum loss: 5135.846680, avg loss: 3.256720, ppl: 25.964224 +epoch: 0, batch: 32910, sum loss: 5259.805664, avg loss: 3.035087, ppl: 20.802786 +epoch: 0, batch: 32911, sum loss: 4673.976562, avg loss: 2.686193, ppl: 14.675706 +epoch: 0, batch: 32912, sum loss: 4593.101074, avg loss: 2.901517, ppl: 18.201731 +epoch: 0, batch: 32913, sum loss: 4770.389160, avg loss: 2.961136, ppl: 19.319899 +epoch: 0, batch: 32914, sum loss: 5769.436523, avg loss: 3.214171, ppl: 24.882654 +epoch: 0, batch: 32915, sum loss: 5368.163574, avg loss: 2.972405, ppl: 19.538858 +epoch: 0, batch: 32916, sum loss: 4959.617676, avg loss: 3.107530, ppl: 22.365730 +epoch: 0, batch: 32917, sum loss: 4504.479004, avg loss: 3.004989, ppl: 20.186003 +epoch: 0, batch: 32918, sum loss: 5997.187988, avg loss: 3.268223, ppl: 26.264614 +epoch: 0, batch: 32919, sum loss: 4787.621582, avg loss: 3.080837, ppl: 21.776632 +epoch: 0, batch: 32920, sum loss: 4710.107422, avg loss: 2.962332, ppl: 19.343023 +epoch: 0, batch: 32921, sum loss: 4951.076660, avg loss: 2.701078, ppl: 14.895787 +epoch: 0, batch: 32922, sum loss: 4320.222168, avg loss: 2.938927, ppl: 18.895555 +epoch: 0, batch: 32923, sum loss: 4593.616699, avg loss: 2.878206, ppl: 17.782343 +epoch: 0, batch: 32924, sum loss: 5693.218262, avg loss: 2.779892, ppl: 16.117275 +epoch: 0, batch: 32925, sum loss: 5367.363770, avg loss: 2.945864, ppl: 19.027090 +epoch: 0, batch: 32926, sum loss: 4947.103516, avg loss: 2.964113, ppl: 19.377499 +epoch: 0, batch: 32927, sum loss: 5757.210938, avg loss: 3.052604, ppl: 21.170399 +epoch: 0, batch: 32928, sum loss: 5051.853027, avg loss: 2.982203, ppl: 19.731247 +epoch: 0, batch: 32929, sum loss: 4272.114746, avg loss: 2.676764, ppl: 14.537966 +epoch: 0, batch: 32930, sum loss: 5109.293945, avg loss: 2.818144, ppl: 16.745735 +epoch: 0, batch: 32931, sum loss: 4642.700195, avg loss: 2.953372, ppl: 19.170479 +epoch: 0, batch: 32932, sum loss: 5032.639160, avg loss: 3.102737, ppl: 22.258783 +epoch: 0, batch: 32933, sum loss: 4163.304688, avg loss: 2.538600, ppl: 12.661938 +epoch: 0, batch: 32934, sum loss: 4857.958496, avg loss: 2.877938, ppl: 17.777571 +epoch: 0, batch: 32935, sum loss: 5751.433594, avg loss: 3.213091, ppl: 24.855806 +epoch: 0, batch: 32936, sum loss: 5186.350586, avg loss: 2.997890, ppl: 20.043211 +epoch: 0, batch: 32937, sum loss: 4470.361328, avg loss: 3.130505, ppl: 22.885542 +epoch: 0, batch: 32938, sum loss: 4842.956543, avg loss: 3.173628, ppl: 23.894022 +epoch: 0, batch: 32939, sum loss: 4817.274902, avg loss: 2.874269, ppl: 17.712477 +epoch: 0, batch: 32940, sum loss: 5152.923828, avg loss: 3.198587, ppl: 24.497894 +epoch: 0, batch: 32941, sum loss: 5385.312012, avg loss: 3.173431, ppl: 23.889307 +epoch: 0, batch: 32942, sum loss: 4801.395020, avg loss: 2.915237, ppl: 18.453180 +epoch: 0, batch: 32943, sum loss: 4885.280273, avg loss: 3.072503, ppl: 21.595896 +epoch: 0, batch: 32944, sum loss: 5048.656250, avg loss: 3.046865, ppl: 21.049261 +epoch: 0, batch: 32945, sum loss: 4212.003418, avg loss: 2.562046, ppl: 12.962309 +epoch: 0, batch: 32946, sum loss: 5509.796387, avg loss: 2.986339, ppl: 19.813021 +epoch: 0, batch: 32947, sum loss: 5196.426758, avg loss: 3.040624, ppl: 20.918295 +epoch: 0, batch: 32948, sum loss: 5662.524414, avg loss: 3.301763, ppl: 27.160488 +epoch: 0, batch: 32949, sum loss: 5366.633789, avg loss: 2.872930, ppl: 17.688776 +epoch: 0, batch: 32950, sum loss: 5198.157227, avg loss: 3.081303, ppl: 21.786764 +epoch: 0, batch: 32951, sum loss: 5479.785645, avg loss: 3.035892, ppl: 20.819546 +epoch: 0, batch: 32952, sum loss: 4277.676758, avg loss: 2.605163, ppl: 13.533426 +epoch: 0, batch: 32953, sum loss: 4208.144531, avg loss: 2.772164, ppl: 15.993204 +epoch: 0, batch: 32954, sum loss: 5863.658203, avg loss: 3.118967, ppl: 22.622999 +epoch: 0, batch: 32955, sum loss: 5116.653809, avg loss: 2.758304, ppl: 15.773067 +epoch: 0, batch: 32956, sum loss: 5322.355957, avg loss: 3.145601, ppl: 23.233624 +epoch: 0, batch: 32957, sum loss: 5746.056641, avg loss: 3.217277, ppl: 24.960062 +epoch: 0, batch: 32958, sum loss: 5261.371094, avg loss: 3.055384, ppl: 21.229334 +epoch: 0, batch: 32959, sum loss: 4905.142090, avg loss: 2.704047, ppl: 14.940079 +epoch: 0, batch: 32960, sum loss: 5445.499023, avg loss: 3.129597, ppl: 22.864767 +epoch: 0, batch: 32961, sum loss: 5168.315430, avg loss: 3.070894, ppl: 21.561180 +epoch: 0, batch: 32962, sum loss: 4526.547363, avg loss: 2.743362, ppl: 15.539139 +epoch: 0, batch: 32963, sum loss: 5093.727051, avg loss: 2.866476, ppl: 17.574968 +epoch: 0, batch: 32964, sum loss: 5113.666504, avg loss: 2.937201, ppl: 18.862970 +epoch: 0, batch: 32965, sum loss: 6255.584961, avg loss: 3.084608, ppl: 21.858898 +epoch: 0, batch: 32966, sum loss: 5155.856934, avg loss: 2.956340, ppl: 19.227467 +epoch: 0, batch: 32967, sum loss: 4388.460938, avg loss: 2.605974, ppl: 13.544417 +epoch: 0, batch: 32968, sum loss: 5541.859375, avg loss: 2.946230, ppl: 19.034067 +epoch: 0, batch: 32969, sum loss: 5245.323242, avg loss: 3.074632, ppl: 21.641926 +epoch: 0, batch: 32970, sum loss: 4862.926758, avg loss: 2.796393, ppl: 16.385433 +epoch: 0, batch: 32971, sum loss: 5452.683594, avg loss: 3.172009, ppl: 23.855368 +epoch: 0, batch: 32972, sum loss: 4455.252930, avg loss: 2.731608, ppl: 15.357565 +epoch: 0, batch: 32973, sum loss: 6673.590820, avg loss: 3.068318, ppl: 21.505692 +epoch: 0, batch: 32974, sum loss: 3842.256348, avg loss: 2.546227, ppl: 12.758873 +epoch: 0, batch: 32975, sum loss: 5398.568848, avg loss: 3.010914, ppl: 20.305952 +epoch: 0, batch: 32976, sum loss: 4475.433105, avg loss: 2.868867, ppl: 17.617054 +epoch: 0, batch: 32977, sum loss: 4198.333496, avg loss: 2.956573, ppl: 19.231951 +epoch: 0, batch: 32978, sum loss: 5352.755371, avg loss: 2.880923, ppl: 17.830729 +epoch: 0, batch: 32979, sum loss: 4560.634277, avg loss: 2.787674, ppl: 16.243193 +epoch: 0, batch: 32980, sum loss: 4778.630371, avg loss: 3.035979, ppl: 20.821342 +epoch: 0, batch: 32981, sum loss: 5325.093750, avg loss: 2.963324, ppl: 19.362230 +epoch: 0, batch: 32982, sum loss: 4903.040039, avg loss: 3.047259, ppl: 21.057549 +epoch: 0, batch: 32983, sum loss: 5358.112793, avg loss: 3.090030, ppl: 21.977747 +epoch: 0, batch: 32984, sum loss: 5427.124512, avg loss: 3.142516, ppl: 23.162062 +epoch: 0, batch: 32985, sum loss: 5281.761230, avg loss: 3.065445, ppl: 21.443996 +epoch: 0, batch: 32986, sum loss: 5597.734863, avg loss: 3.017647, ppl: 20.443129 +epoch: 0, batch: 32987, sum loss: 4843.589355, avg loss: 2.964253, ppl: 19.380220 +epoch: 0, batch: 32988, sum loss: 4012.731445, avg loss: 2.854005, ppl: 17.357164 +epoch: 0, batch: 32989, sum loss: 6268.782715, avg loss: 3.201626, ppl: 24.572443 +epoch: 0, batch: 32990, sum loss: 5005.033691, avg loss: 2.918387, ppl: 18.511404 +epoch: 0, batch: 32991, sum loss: 4841.612305, avg loss: 2.979454, ppl: 19.677061 +epoch: 0, batch: 32992, sum loss: 5086.333984, avg loss: 2.969255, ppl: 19.477404 +epoch: 0, batch: 32993, sum loss: 5711.969238, avg loss: 3.100961, ppl: 22.219288 +epoch: 0, batch: 32994, sum loss: 4399.440430, avg loss: 2.594010, ppl: 13.383327 +epoch: 0, batch: 32995, sum loss: 5416.552734, avg loss: 3.029392, ppl: 20.684652 +epoch: 0, batch: 32996, sum loss: 5264.319336, avg loss: 2.882979, ppl: 17.867420 +epoch: 0, batch: 32997, sum loss: 4218.190430, avg loss: 2.931334, ppl: 18.752640 +epoch: 0, batch: 32998, sum loss: 6513.425781, avg loss: 3.279670, ppl: 26.567017 +epoch: 0, batch: 32999, sum loss: 4297.867188, avg loss: 2.869070, ppl: 17.620625 +epoch: 0, batch: 33000, sum loss: 5112.001953, avg loss: 3.136198, ppl: 23.016182 +epoch: 0, batch: 33001, sum loss: 4332.538086, avg loss: 2.804232, ppl: 16.514385 +epoch: 0, batch: 33002, sum loss: 4396.018555, avg loss: 2.919003, ppl: 18.522812 +epoch: 0, batch: 33003, sum loss: 5331.959961, avg loss: 3.048576, ppl: 21.085304 +epoch: 0, batch: 33004, sum loss: 4605.745605, avg loss: 2.706079, ppl: 14.970454 +epoch: 0, batch: 33005, sum loss: 4593.422363, avg loss: 3.046036, ppl: 21.031809 +epoch: 0, batch: 33006, sum loss: 6336.217285, avg loss: 3.269462, ppl: 26.297190 +epoch: 0, batch: 33007, sum loss: 5243.821289, avg loss: 2.969321, ppl: 19.478695 +epoch: 0, batch: 33008, sum loss: 5560.416016, avg loss: 2.968722, ppl: 19.467022 +epoch: 0, batch: 33009, sum loss: 5849.960938, avg loss: 3.118316, ppl: 22.608278 +epoch: 0, batch: 33010, sum loss: 5834.763672, avg loss: 2.972371, ppl: 19.538187 +epoch: 0, batch: 33011, sum loss: 5274.180176, avg loss: 3.062822, ppl: 21.387836 +epoch: 0, batch: 33012, sum loss: 4379.426270, avg loss: 2.672011, ppl: 14.469039 +epoch: 0, batch: 33013, sum loss: 4225.380859, avg loss: 2.882252, ppl: 17.854431 +epoch: 0, batch: 33014, sum loss: 5096.767578, avg loss: 3.193463, ppl: 24.372692 +epoch: 0, batch: 33015, sum loss: 5038.191406, avg loss: 2.794338, ppl: 16.351801 +epoch: 0, batch: 33016, sum loss: 5327.375977, avg loss: 2.999649, ppl: 20.078484 +epoch: 0, batch: 33017, sum loss: 5095.385742, avg loss: 3.190598, ppl: 24.302969 +epoch: 0, batch: 33018, sum loss: 4909.061523, avg loss: 3.024683, ppl: 20.587488 +epoch: 0, batch: 33019, sum loss: 4855.508301, avg loss: 2.876486, ppl: 17.751785 +epoch: 0, batch: 33020, sum loss: 4165.300293, avg loss: 3.035933, ppl: 20.820400 +epoch: 0, batch: 33021, sum loss: 4143.283203, avg loss: 2.671362, ppl: 14.459656 +epoch: 0, batch: 33022, sum loss: 3738.539551, avg loss: 2.834374, ppl: 17.019747 +epoch: 0, batch: 33023, sum loss: 4568.624023, avg loss: 3.041694, ppl: 20.940680 +epoch: 0, batch: 33024, sum loss: 5887.099609, avg loss: 3.103374, ppl: 22.272963 +epoch: 0, batch: 33025, sum loss: 5605.447754, avg loss: 3.163345, ppl: 23.649580 +epoch: 0, batch: 33026, sum loss: 4993.506836, avg loss: 3.072927, ppl: 21.605053 +epoch: 0, batch: 33027, sum loss: 5209.192871, avg loss: 2.865343, ppl: 17.555067 +epoch: 0, batch: 33028, sum loss: 5045.283691, avg loss: 3.022938, ppl: 20.551588 +epoch: 0, batch: 33029, sum loss: 4858.382812, avg loss: 2.734036, ppl: 15.394902 +epoch: 0, batch: 33030, sum loss: 5730.466797, avg loss: 3.276425, ppl: 26.480925 +epoch: 0, batch: 33031, sum loss: 5253.675781, avg loss: 2.976587, ppl: 19.620733 +epoch: 0, batch: 33032, sum loss: 4704.704590, avg loss: 2.994720, ppl: 19.979759 +epoch: 0, batch: 33033, sum loss: 5621.544922, avg loss: 3.065182, ppl: 21.438374 +epoch: 0, batch: 33034, sum loss: 5236.130859, avg loss: 3.021426, ppl: 20.520527 +epoch: 0, batch: 33035, sum loss: 3581.206055, avg loss: 2.846746, ppl: 17.231615 +epoch: 0, batch: 33036, sum loss: 5535.537109, avg loss: 3.001918, ppl: 20.124104 +epoch: 0, batch: 33037, sum loss: 4942.489258, avg loss: 2.817839, ppl: 16.740629 +epoch: 0, batch: 33038, sum loss: 4473.776855, avg loss: 2.860471, ppl: 17.469753 +epoch: 0, batch: 33039, sum loss: 6272.310547, avg loss: 3.166234, ppl: 23.718000 +epoch: 0, batch: 33040, sum loss: 5537.452637, avg loss: 3.149859, ppl: 23.332779 +epoch: 0, batch: 33041, sum loss: 5022.176270, avg loss: 3.030885, ppl: 20.715553 +epoch: 0, batch: 33042, sum loss: 4811.310547, avg loss: 2.838531, ppl: 17.090645 +epoch: 0, batch: 33043, sum loss: 5156.878906, avg loss: 3.119709, ppl: 22.639786 +epoch: 0, batch: 33044, sum loss: 4760.863770, avg loss: 3.020853, ppl: 20.508770 +epoch: 0, batch: 33045, sum loss: 5643.720703, avg loss: 3.317884, ppl: 27.601883 +epoch: 0, batch: 33046, sum loss: 4166.008789, avg loss: 2.779192, ppl: 16.106001 +epoch: 0, batch: 33047, sum loss: 4702.687500, avg loss: 2.942858, ppl: 18.969990 +epoch: 0, batch: 33048, sum loss: 4265.431152, avg loss: 2.679291, ppl: 14.574757 +epoch: 0, batch: 33049, sum loss: 4740.535156, avg loss: 3.013690, ppl: 20.362408 +epoch: 0, batch: 33050, sum loss: 4128.790527, avg loss: 2.776591, ppl: 16.064161 +epoch: 0, batch: 33051, sum loss: 5444.010254, avg loss: 2.934776, ppl: 18.817293 +epoch: 0, batch: 33052, sum loss: 5839.219238, avg loss: 3.381135, ppl: 29.404112 +epoch: 0, batch: 33053, sum loss: 6614.351074, avg loss: 3.076442, ppl: 21.681133 +epoch: 0, batch: 33054, sum loss: 5292.624512, avg loss: 3.038246, ppl: 20.868607 +epoch: 0, batch: 33055, sum loss: 4966.076660, avg loss: 2.751289, ppl: 15.662807 +epoch: 0, batch: 33056, sum loss: 5234.569336, avg loss: 2.847970, ppl: 17.252728 +epoch: 0, batch: 33057, sum loss: 5475.111816, avg loss: 3.033303, ppl: 20.765709 +epoch: 0, batch: 33058, sum loss: 5215.237305, avg loss: 2.963203, ppl: 19.359880 +epoch: 0, batch: 33059, sum loss: 5334.508789, avg loss: 3.169643, ppl: 23.798979 +epoch: 0, batch: 33060, sum loss: 4673.710938, avg loss: 2.890359, ppl: 17.999777 +epoch: 0, batch: 33061, sum loss: 5503.972656, avg loss: 3.034164, ppl: 20.783590 +epoch: 0, batch: 33062, sum loss: 5095.247070, avg loss: 3.007820, ppl: 20.243219 +epoch: 0, batch: 33063, sum loss: 4883.393555, avg loss: 3.036936, ppl: 20.841293 +epoch: 0, batch: 33064, sum loss: 5216.152832, avg loss: 3.101161, ppl: 22.223738 +epoch: 0, batch: 33065, sum loss: 5353.556641, avg loss: 3.022900, ppl: 20.550810 +epoch: 0, batch: 33066, sum loss: 4872.327148, avg loss: 3.167963, ppl: 23.759039 +epoch: 0, batch: 33067, sum loss: 4508.242676, avg loss: 2.740573, ppl: 15.495861 +epoch: 0, batch: 33068, sum loss: 5113.988281, avg loss: 3.058606, ppl: 21.297840 +epoch: 0, batch: 33069, sum loss: 5724.066895, avg loss: 2.984394, ppl: 19.774508 +epoch: 0, batch: 33070, sum loss: 3856.597656, avg loss: 2.668926, ppl: 14.424465 +epoch: 0, batch: 33071, sum loss: 4498.957031, avg loss: 2.838459, ppl: 17.089403 +epoch: 0, batch: 33072, sum loss: 6584.016602, avg loss: 3.135246, ppl: 22.994293 +epoch: 0, batch: 33073, sum loss: 5714.235352, avg loss: 3.102191, ppl: 22.246639 +epoch: 0, batch: 33074, sum loss: 5938.383789, avg loss: 3.372166, ppl: 29.141569 +epoch: 0, batch: 33075, sum loss: 5043.129883, avg loss: 2.881788, ppl: 17.846163 +epoch: 0, batch: 33076, sum loss: 5588.645508, avg loss: 3.094488, ppl: 22.075935 +epoch: 0, batch: 33077, sum loss: 4173.709473, avg loss: 2.720801, ppl: 15.192492 +epoch: 0, batch: 33078, sum loss: 4863.935547, avg loss: 2.953209, ppl: 19.167366 +epoch: 0, batch: 33079, sum loss: 4801.430664, avg loss: 2.788287, ppl: 16.253157 +epoch: 0, batch: 33080, sum loss: 4941.631836, avg loss: 2.930980, ppl: 18.745987 +epoch: 0, batch: 33081, sum loss: 5606.395996, avg loss: 3.253857, ppl: 25.890009 +epoch: 0, batch: 33082, sum loss: 5956.625977, avg loss: 2.948825, ppl: 19.083515 +epoch: 0, batch: 33083, sum loss: 4207.390137, avg loss: 2.840912, ppl: 17.131376 +epoch: 0, batch: 33084, sum loss: 5656.193359, avg loss: 3.288485, ppl: 26.802216 +epoch: 0, batch: 33085, sum loss: 5733.797852, avg loss: 3.094332, ppl: 22.072495 +epoch: 0, batch: 33086, sum loss: 5270.626953, avg loss: 2.847448, ppl: 17.243725 +epoch: 0, batch: 33087, sum loss: 4649.726074, avg loss: 2.842131, ppl: 17.152277 +epoch: 0, batch: 33088, sum loss: 5035.377930, avg loss: 3.072226, ppl: 21.589899 +epoch: 0, batch: 33089, sum loss: 4856.436035, avg loss: 2.992259, ppl: 19.930651 +epoch: 0, batch: 33090, sum loss: 5144.990723, avg loss: 3.035393, ppl: 20.809149 +epoch: 0, batch: 33091, sum loss: 6574.936035, avg loss: 3.334146, ppl: 28.054415 +epoch: 0, batch: 33092, sum loss: 4410.274414, avg loss: 2.741003, ppl: 15.502531 +epoch: 0, batch: 33093, sum loss: 5271.291992, avg loss: 2.968070, ppl: 19.454332 +epoch: 0, batch: 33094, sum loss: 4688.480469, avg loss: 2.754689, ppl: 15.716148 +epoch: 0, batch: 33095, sum loss: 4631.865723, avg loss: 2.654364, ppl: 14.215947 +epoch: 0, batch: 33096, sum loss: 6016.382324, avg loss: 3.231140, ppl: 25.308489 +epoch: 0, batch: 33097, sum loss: 5646.343750, avg loss: 3.110933, ppl: 22.441980 +epoch: 0, batch: 33098, sum loss: 5480.317383, avg loss: 3.119133, ppl: 22.626759 +epoch: 0, batch: 33099, sum loss: 4937.059570, avg loss: 2.974132, ppl: 19.572632 +epoch: 0, batch: 33100, sum loss: 5145.116211, avg loss: 3.143015, ppl: 23.173639 +epoch: 0, batch: 33101, sum loss: 4947.014648, avg loss: 2.876171, ppl: 17.746199 +epoch: 0, batch: 33102, sum loss: 6409.454102, avg loss: 3.431185, ppl: 30.913261 +epoch: 0, batch: 33103, sum loss: 5246.385742, avg loss: 3.224576, ppl: 25.142918 +epoch: 0, batch: 33104, sum loss: 5040.872559, avg loss: 2.953059, ppl: 19.164497 +epoch: 0, batch: 33105, sum loss: 4630.973633, avg loss: 2.888942, ppl: 17.974279 +epoch: 0, batch: 33106, sum loss: 5841.940918, avg loss: 3.125704, ppl: 22.775925 +epoch: 0, batch: 33107, sum loss: 4458.512695, avg loss: 3.018628, ppl: 20.463190 +epoch: 0, batch: 33108, sum loss: 5425.723633, avg loss: 2.976261, ppl: 19.614344 +epoch: 0, batch: 33109, sum loss: 4340.408691, avg loss: 2.857412, ppl: 17.416393 +epoch: 0, batch: 33110, sum loss: 4430.192871, avg loss: 2.796839, ppl: 16.392748 +epoch: 0, batch: 33111, sum loss: 4687.654297, avg loss: 2.927954, ppl: 18.689356 +epoch: 0, batch: 33112, sum loss: 4945.448242, avg loss: 2.870254, ppl: 17.641504 +epoch: 0, batch: 33113, sum loss: 5917.106934, avg loss: 3.147397, ppl: 23.275406 +epoch: 0, batch: 33114, sum loss: 5043.876953, avg loss: 2.960022, ppl: 19.298391 +epoch: 0, batch: 33115, sum loss: 4836.177246, avg loss: 2.834805, ppl: 17.027077 +epoch: 0, batch: 33116, sum loss: 4477.411621, avg loss: 3.015092, ppl: 20.390965 +epoch: 0, batch: 33117, sum loss: 4731.342773, avg loss: 2.855367, ppl: 17.380810 +epoch: 0, batch: 33118, sum loss: 4713.868164, avg loss: 2.884864, ppl: 17.901138 +epoch: 0, batch: 33119, sum loss: 4487.742188, avg loss: 2.944713, ppl: 19.005199 +epoch: 0, batch: 33120, sum loss: 5921.005859, avg loss: 3.237292, ppl: 25.464659 +epoch: 0, batch: 33121, sum loss: 5763.352539, avg loss: 3.161466, ppl: 23.605179 +epoch: 0, batch: 33122, sum loss: 6147.208496, avg loss: 3.134732, ppl: 22.982470 +epoch: 0, batch: 33123, sum loss: 4919.772461, avg loss: 2.979874, ppl: 19.685343 +epoch: 0, batch: 33124, sum loss: 4447.835938, avg loss: 2.860345, ppl: 17.467550 +epoch: 0, batch: 33125, sum loss: 4766.231934, avg loss: 3.020426, ppl: 20.500019 +epoch: 0, batch: 33126, sum loss: 5737.278320, avg loss: 3.429336, ppl: 30.856134 +epoch: 0, batch: 33127, sum loss: 5231.339844, avg loss: 3.061053, ppl: 21.350033 +epoch: 0, batch: 33128, sum loss: 6311.065430, avg loss: 3.113500, ppl: 22.499664 +epoch: 0, batch: 33129, sum loss: 5783.752441, avg loss: 3.073195, ppl: 21.610834 +epoch: 0, batch: 33130, sum loss: 4672.547852, avg loss: 2.734083, ppl: 15.395618 +epoch: 0, batch: 33131, sum loss: 5504.620605, avg loss: 3.115235, ppl: 22.538734 +epoch: 0, batch: 33132, sum loss: 4272.807617, avg loss: 2.946764, ppl: 19.044226 +epoch: 0, batch: 33133, sum loss: 5097.286621, avg loss: 3.010801, ppl: 20.303663 +epoch: 0, batch: 33134, sum loss: 4734.256836, avg loss: 3.038676, ppl: 20.877579 +epoch: 0, batch: 33135, sum loss: 5404.432617, avg loss: 3.082962, ppl: 21.822952 +epoch: 0, batch: 33136, sum loss: 4122.426270, avg loss: 2.683871, ppl: 14.641665 +epoch: 0, batch: 33137, sum loss: 4855.103027, avg loss: 2.982250, ppl: 19.732164 +epoch: 0, batch: 33138, sum loss: 4200.270508, avg loss: 2.777957, ppl: 16.086123 +epoch: 0, batch: 33139, sum loss: 4679.496582, avg loss: 2.812198, ppl: 16.646461 +epoch: 0, batch: 33140, sum loss: 5580.061523, avg loss: 2.957107, ppl: 19.242229 +epoch: 0, batch: 33141, sum loss: 4431.063965, avg loss: 2.797389, ppl: 16.401762 +epoch: 0, batch: 33142, sum loss: 5594.355469, avg loss: 3.187667, ppl: 24.231827 +epoch: 0, batch: 33143, sum loss: 5242.736328, avg loss: 2.852414, ppl: 17.329559 +epoch: 0, batch: 33144, sum loss: 5679.609375, avg loss: 2.806131, ppl: 16.545780 +epoch: 0, batch: 33145, sum loss: 4996.286133, avg loss: 2.973980, ppl: 19.569651 +epoch: 0, batch: 33146, sum loss: 4716.450195, avg loss: 2.960735, ppl: 19.312153 +epoch: 0, batch: 33147, sum loss: 5511.054688, avg loss: 3.078801, ppl: 21.732338 +epoch: 0, batch: 33148, sum loss: 5908.495117, avg loss: 3.069348, ppl: 21.527864 +epoch: 0, batch: 33149, sum loss: 4296.159180, avg loss: 2.964913, ppl: 19.393023 +epoch: 0, batch: 33150, sum loss: 5228.148926, avg loss: 2.989222, ppl: 19.870214 +epoch: 0, batch: 33151, sum loss: 4325.321777, avg loss: 2.819636, ppl: 16.770746 +epoch: 0, batch: 33152, sum loss: 4610.458984, avg loss: 2.881537, ppl: 17.841669 +epoch: 0, batch: 33153, sum loss: 4854.222656, avg loss: 2.880844, ppl: 17.829321 +epoch: 0, batch: 33154, sum loss: 5811.977051, avg loss: 2.950242, ppl: 19.110582 +epoch: 0, batch: 33155, sum loss: 5575.749512, avg loss: 3.027009, ppl: 20.635410 +epoch: 0, batch: 33156, sum loss: 4407.492188, avg loss: 2.841710, ppl: 17.145056 +epoch: 0, batch: 33157, sum loss: 6788.591309, avg loss: 3.173722, ppl: 23.896261 +epoch: 0, batch: 33158, sum loss: 5580.791504, avg loss: 2.940354, ppl: 18.922546 +epoch: 0, batch: 33159, sum loss: 5501.956543, avg loss: 3.219401, ppl: 25.013136 +epoch: 0, batch: 33160, sum loss: 5436.243652, avg loss: 3.131477, ppl: 22.907782 +epoch: 0, batch: 33161, sum loss: 5189.705078, avg loss: 2.960471, ppl: 19.307056 +epoch: 0, batch: 33162, sum loss: 6366.960938, avg loss: 3.063985, ppl: 21.412720 +epoch: 0, batch: 33163, sum loss: 4495.005859, avg loss: 2.939834, ppl: 18.912704 +epoch: 0, batch: 33164, sum loss: 4046.032715, avg loss: 2.851327, ppl: 17.310738 +epoch: 0, batch: 33165, sum loss: 5124.338867, avg loss: 2.968910, ppl: 19.470684 +epoch: 0, batch: 33166, sum loss: 4824.486816, avg loss: 2.811473, ppl: 16.634398 +epoch: 0, batch: 33167, sum loss: 5842.274902, avg loss: 2.968636, ppl: 19.465347 +epoch: 0, batch: 33168, sum loss: 5093.523438, avg loss: 2.853515, ppl: 17.348650 +epoch: 0, batch: 33169, sum loss: 4610.572266, avg loss: 2.846032, ppl: 17.219326 +epoch: 0, batch: 33170, sum loss: 4339.559570, avg loss: 2.695379, ppl: 14.811124 +epoch: 0, batch: 33171, sum loss: 6175.361816, avg loss: 3.329036, ppl: 27.911423 +epoch: 0, batch: 33172, sum loss: 6060.170898, avg loss: 3.171204, ppl: 23.836168 +epoch: 0, batch: 33173, sum loss: 5496.306152, avg loss: 3.171556, ppl: 23.844551 +epoch: 0, batch: 33174, sum loss: 4973.860840, avg loss: 2.915510, ppl: 18.458231 +epoch: 0, batch: 33175, sum loss: 4226.043945, avg loss: 2.523011, ppl: 12.466081 +epoch: 0, batch: 33176, sum loss: 5450.672852, avg loss: 3.028152, ppl: 20.659014 +epoch: 0, batch: 33177, sum loss: 4890.346191, avg loss: 2.945992, ppl: 19.029526 +epoch: 0, batch: 33178, sum loss: 5043.802734, avg loss: 3.290152, ppl: 26.846939 +epoch: 0, batch: 33179, sum loss: 5184.495605, avg loss: 2.998552, ppl: 20.056467 +epoch: 0, batch: 33180, sum loss: 6431.791016, avg loss: 3.329084, ppl: 27.912773 +epoch: 0, batch: 33181, sum loss: 4959.230469, avg loss: 2.782958, ppl: 16.166769 +epoch: 0, batch: 33182, sum loss: 5757.801270, avg loss: 3.205903, ppl: 24.677769 +epoch: 0, batch: 33183, sum loss: 4929.889648, avg loss: 3.000541, ppl: 20.096415 +epoch: 0, batch: 33184, sum loss: 4969.681152, avg loss: 2.779464, ppl: 16.110380 +epoch: 0, batch: 33185, sum loss: 5406.183594, avg loss: 2.773824, ppl: 16.019783 +epoch: 0, batch: 33186, sum loss: 4610.939941, avg loss: 2.944406, ppl: 18.999374 +epoch: 0, batch: 33187, sum loss: 5207.367188, avg loss: 2.994461, ppl: 19.974586 +epoch: 0, batch: 33188, sum loss: 5847.373535, avg loss: 3.023461, ppl: 20.562332 +epoch: 0, batch: 33189, sum loss: 4754.330078, avg loss: 2.829958, ppl: 16.944757 +epoch: 0, batch: 33190, sum loss: 5488.594238, avg loss: 3.037407, ppl: 20.851099 +epoch: 0, batch: 33191, sum loss: 5675.896484, avg loss: 3.336800, ppl: 28.128960 +epoch: 0, batch: 33192, sum loss: 5397.694824, avg loss: 2.970663, ppl: 19.504848 +epoch: 0, batch: 33193, sum loss: 3967.006348, avg loss: 2.616759, ppl: 13.691276 +epoch: 0, batch: 33194, sum loss: 3294.929443, avg loss: 2.540424, ppl: 12.685043 +epoch: 0, batch: 33195, sum loss: 5042.053711, avg loss: 3.065078, ppl: 21.436134 +epoch: 0, batch: 33196, sum loss: 5100.804688, avg loss: 2.974230, ppl: 19.574547 +epoch: 0, batch: 33197, sum loss: 4040.191650, avg loss: 2.951199, ppl: 19.128881 +epoch: 0, batch: 33198, sum loss: 6343.194336, avg loss: 3.231378, ppl: 25.314512 +epoch: 0, batch: 33199, sum loss: 5018.694336, avg loss: 3.117201, ppl: 22.583088 +epoch: 0, batch: 33200, sum loss: 4483.148438, avg loss: 2.638698, ppl: 13.994975 +epoch: 0, batch: 33201, sum loss: 5548.743652, avg loss: 2.849894, ppl: 17.285954 +epoch: 0, batch: 33202, sum loss: 4184.559570, avg loss: 2.833148, ppl: 16.998894 +epoch: 0, batch: 33203, sum loss: 5150.981934, avg loss: 3.095542, ppl: 22.099216 +epoch: 0, batch: 33204, sum loss: 4589.370117, avg loss: 2.725279, ppl: 15.260676 +epoch: 0, batch: 33205, sum loss: 5147.291504, avg loss: 2.873976, ppl: 17.707287 +epoch: 0, batch: 33206, sum loss: 4966.051758, avg loss: 2.929824, ppl: 18.724337 +epoch: 0, batch: 33207, sum loss: 5755.514160, avg loss: 2.934989, ppl: 18.821304 +epoch: 0, batch: 33208, sum loss: 5162.013672, avg loss: 2.941318, ppl: 18.940800 +epoch: 0, batch: 33209, sum loss: 5611.674316, avg loss: 2.807241, ppl: 16.564154 +epoch: 0, batch: 33210, sum loss: 4982.971680, avg loss: 2.951998, ppl: 19.144156 +epoch: 0, batch: 33211, sum loss: 5891.408691, avg loss: 3.285783, ppl: 26.729908 +epoch: 0, batch: 33212, sum loss: 5035.595215, avg loss: 3.000951, ppl: 20.104639 +epoch: 0, batch: 33213, sum loss: 4828.597656, avg loss: 2.949663, ppl: 19.099510 +epoch: 0, batch: 33214, sum loss: 5251.995605, avg loss: 2.927534, ppl: 18.681503 +epoch: 0, batch: 33215, sum loss: 5178.175293, avg loss: 2.950527, ppl: 19.116030 +epoch: 0, batch: 33216, sum loss: 4769.507324, avg loss: 2.960588, ppl: 19.309326 +epoch: 0, batch: 33217, sum loss: 5921.762695, avg loss: 3.324965, ppl: 27.798033 +epoch: 0, batch: 33218, sum loss: 5507.692383, avg loss: 2.879086, ppl: 17.798002 +epoch: 0, batch: 33219, sum loss: 4605.453613, avg loss: 2.973179, ppl: 19.553976 +epoch: 0, batch: 33220, sum loss: 5838.603516, avg loss: 3.063276, ppl: 21.397532 +epoch: 0, batch: 33221, sum loss: 5540.555664, avg loss: 3.140905, ppl: 23.124777 +epoch: 0, batch: 33222, sum loss: 4380.928711, avg loss: 2.855886, ppl: 17.389833 +epoch: 0, batch: 33223, sum loss: 4373.182129, avg loss: 2.780154, ppl: 16.121500 +epoch: 0, batch: 33224, sum loss: 4217.104492, avg loss: 2.679228, ppl: 14.573832 +epoch: 0, batch: 33225, sum loss: 5005.736328, avg loss: 2.917096, ppl: 18.487520 +epoch: 0, batch: 33226, sum loss: 5031.826660, avg loss: 3.023934, ppl: 20.572071 +epoch: 0, batch: 33227, sum loss: 5678.414062, avg loss: 3.072735, ppl: 21.600897 +epoch: 0, batch: 33228, sum loss: 5895.863770, avg loss: 2.992824, ppl: 19.941925 +epoch: 0, batch: 33229, sum loss: 4516.937012, avg loss: 2.956111, ppl: 19.223063 +epoch: 0, batch: 33230, sum loss: 5881.625977, avg loss: 3.028644, ppl: 20.669184 +epoch: 0, batch: 33231, sum loss: 6061.781250, avg loss: 3.083307, ppl: 21.830471 +epoch: 0, batch: 33232, sum loss: 6673.678711, avg loss: 3.391097, ppl: 29.698515 +epoch: 0, batch: 33233, sum loss: 5953.912598, avg loss: 3.225305, ppl: 25.161243 +epoch: 0, batch: 33234, sum loss: 5693.041016, avg loss: 2.903132, ppl: 18.231152 +epoch: 0, batch: 33235, sum loss: 5097.711426, avg loss: 3.083915, ppl: 21.843748 +epoch: 0, batch: 33236, sum loss: 5520.903809, avg loss: 2.869493, ppl: 17.628071 +epoch: 0, batch: 33237, sum loss: 5452.672363, avg loss: 2.956981, ppl: 19.239794 +epoch: 0, batch: 33238, sum loss: 5754.948242, avg loss: 3.005195, ppl: 20.190151 +epoch: 0, batch: 33239, sum loss: 5457.755859, avg loss: 3.113380, ppl: 22.496965 +epoch: 0, batch: 33240, sum loss: 6034.905273, avg loss: 3.235874, ppl: 25.428591 +epoch: 0, batch: 33241, sum loss: 4907.786621, avg loss: 2.720503, ppl: 15.187954 +epoch: 0, batch: 33242, sum loss: 5559.468262, avg loss: 3.295476, ppl: 26.990263 +epoch: 0, batch: 33243, sum loss: 5254.458008, avg loss: 3.239493, ppl: 25.520782 +epoch: 0, batch: 33244, sum loss: 5423.958008, avg loss: 2.874382, ppl: 17.714464 +epoch: 0, batch: 33245, sum loss: 5332.101074, avg loss: 3.055645, ppl: 21.234882 +epoch: 0, batch: 33246, sum loss: 4908.635254, avg loss: 2.927033, ppl: 18.672155 +epoch: 0, batch: 33247, sum loss: 5953.943848, avg loss: 3.199325, ppl: 24.515978 +epoch: 0, batch: 33248, sum loss: 5351.851562, avg loss: 2.874249, ppl: 17.712116 +epoch: 0, batch: 33249, sum loss: 5215.169434, avg loss: 2.983507, ppl: 19.756977 +epoch: 0, batch: 33250, sum loss: 5565.626953, avg loss: 2.940109, ppl: 18.917913 +epoch: 0, batch: 33251, sum loss: 5019.239258, avg loss: 2.940386, ppl: 18.923155 +epoch: 0, batch: 33252, sum loss: 4799.007812, avg loss: 2.863370, ppl: 17.520470 +epoch: 0, batch: 33253, sum loss: 5755.913086, avg loss: 3.180063, ppl: 24.048264 +epoch: 0, batch: 33254, sum loss: 6269.778320, avg loss: 3.024495, ppl: 20.583611 +epoch: 0, batch: 33255, sum loss: 6150.967285, avg loss: 3.235648, ppl: 25.422844 +epoch: 0, batch: 33256, sum loss: 5137.008789, avg loss: 2.842838, ppl: 17.164410 +epoch: 0, batch: 33257, sum loss: 4737.502441, avg loss: 2.953555, ppl: 19.174004 +epoch: 0, batch: 33258, sum loss: 4296.145996, avg loss: 2.713927, ppl: 15.088405 +epoch: 0, batch: 33259, sum loss: 6346.355957, avg loss: 3.242900, ppl: 25.607887 +epoch: 0, batch: 33260, sum loss: 4907.903320, avg loss: 3.005452, ppl: 20.195345 +epoch: 0, batch: 33261, sum loss: 5305.741211, avg loss: 3.161943, ppl: 23.616449 +epoch: 0, batch: 33262, sum loss: 4977.376465, avg loss: 2.857277, ppl: 17.414042 +epoch: 0, batch: 33263, sum loss: 4665.249023, avg loss: 2.755611, ppl: 15.730648 +epoch: 0, batch: 33264, sum loss: 5454.983398, avg loss: 2.850044, ppl: 17.288534 +epoch: 0, batch: 33265, sum loss: 5098.878906, avg loss: 2.892160, ppl: 18.032225 +epoch: 0, batch: 33266, sum loss: 3854.961182, avg loss: 2.667793, ppl: 14.408139 +epoch: 0, batch: 33267, sum loss: 5122.428711, avg loss: 3.138743, ppl: 23.074848 +epoch: 0, batch: 33268, sum loss: 4624.974609, avg loss: 2.813245, ppl: 16.663902 +epoch: 0, batch: 33269, sum loss: 4467.919922, avg loss: 2.853078, ppl: 17.341074 +epoch: 0, batch: 33270, sum loss: 6695.167969, avg loss: 3.235944, ppl: 25.430361 +epoch: 0, batch: 33271, sum loss: 6129.468750, avg loss: 3.174246, ppl: 23.908781 +epoch: 0, batch: 33272, sum loss: 6495.243164, avg loss: 3.059464, ppl: 21.316139 +epoch: 0, batch: 33273, sum loss: 4158.969727, avg loss: 2.679749, ppl: 14.581429 +epoch: 0, batch: 33274, sum loss: 5447.684570, avg loss: 3.102326, ppl: 22.249647 +epoch: 0, batch: 33275, sum loss: 3967.673828, avg loss: 2.717585, ppl: 15.143703 +epoch: 0, batch: 33276, sum loss: 5186.327148, avg loss: 2.849630, ppl: 17.281393 +epoch: 0, batch: 33277, sum loss: 5557.335938, avg loss: 3.246107, ppl: 25.690142 +epoch: 0, batch: 33278, sum loss: 4609.845215, avg loss: 3.155267, ppl: 23.459305 +epoch: 0, batch: 33279, sum loss: 5329.426758, avg loss: 2.904320, ppl: 18.252827 +epoch: 0, batch: 33280, sum loss: 5751.201660, avg loss: 3.252942, ppl: 25.866335 +epoch: 0, batch: 33281, sum loss: 4421.598633, avg loss: 2.963538, ppl: 19.366373 +epoch: 0, batch: 33282, sum loss: 6211.957520, avg loss: 3.116888, ppl: 22.576010 +epoch: 0, batch: 33283, sum loss: 4802.564941, avg loss: 3.225363, ppl: 25.162695 +epoch: 0, batch: 33284, sum loss: 5170.420898, avg loss: 2.924446, ppl: 18.623913 +epoch: 0, batch: 33285, sum loss: 4636.993164, avg loss: 3.089269, ppl: 21.961027 +epoch: 0, batch: 33286, sum loss: 5189.416992, avg loss: 2.950209, ppl: 19.109941 +epoch: 0, batch: 33287, sum loss: 5904.901367, avg loss: 3.065889, ppl: 21.453518 +epoch: 0, batch: 33288, sum loss: 4915.372070, avg loss: 2.925817, ppl: 18.649452 +epoch: 0, batch: 33289, sum loss: 5258.417969, avg loss: 3.044828, ppl: 21.006422 +epoch: 0, batch: 33290, sum loss: 5889.407715, avg loss: 3.157859, ppl: 23.520193 +epoch: 0, batch: 33291, sum loss: 4808.702148, avg loss: 2.794132, ppl: 16.348440 +epoch: 0, batch: 33292, sum loss: 4639.929688, avg loss: 2.901770, ppl: 18.206335 +epoch: 0, batch: 33293, sum loss: 4358.107910, avg loss: 2.722116, ppl: 15.212481 +epoch: 0, batch: 33294, sum loss: 4973.900391, avg loss: 3.172130, ppl: 23.858257 +epoch: 0, batch: 33295, sum loss: 4960.128906, avg loss: 2.968360, ppl: 19.459974 +epoch: 0, batch: 33296, sum loss: 5209.131836, avg loss: 3.108074, ppl: 22.377901 +epoch: 0, batch: 33297, sum loss: 5701.529297, avg loss: 2.994501, ppl: 19.975388 +epoch: 0, batch: 33298, sum loss: 5671.682617, avg loss: 3.037859, ppl: 20.860533 +epoch: 0, batch: 33299, sum loss: 4756.710938, avg loss: 3.090780, ppl: 21.994232 +epoch: 0, batch: 33300, sum loss: 6795.216309, avg loss: 3.399308, ppl: 29.943371 +epoch: 0, batch: 33301, sum loss: 4881.294922, avg loss: 2.620126, ppl: 13.737454 +epoch: 0, batch: 33302, sum loss: 5626.876465, avg loss: 3.120841, ppl: 22.665434 +epoch: 0, batch: 33303, sum loss: 3847.413330, avg loss: 2.655220, ppl: 14.228113 +epoch: 0, batch: 33304, sum loss: 5838.617676, avg loss: 3.118920, ppl: 22.621931 +epoch: 0, batch: 33305, sum loss: 5140.416504, avg loss: 3.020221, ppl: 20.495825 +epoch: 0, batch: 33306, sum loss: 5483.867188, avg loss: 2.988484, ppl: 19.855553 +epoch: 0, batch: 33307, sum loss: 5529.909180, avg loss: 3.030087, ppl: 20.699038 +epoch: 0, batch: 33308, sum loss: 5302.568848, avg loss: 2.903926, ppl: 18.245640 +epoch: 0, batch: 33309, sum loss: 5429.829590, avg loss: 2.967120, ppl: 19.435867 +epoch: 0, batch: 33310, sum loss: 6092.926758, avg loss: 3.205117, ppl: 24.658379 +epoch: 0, batch: 33311, sum loss: 5003.207520, avg loss: 2.967502, ppl: 19.443283 +epoch: 0, batch: 33312, sum loss: 4839.046875, avg loss: 2.916846, ppl: 18.482893 +epoch: 0, batch: 33313, sum loss: 5376.354492, avg loss: 3.134900, ppl: 22.986345 +epoch: 0, batch: 33314, sum loss: 5788.891602, avg loss: 3.196517, ppl: 24.447226 +epoch: 0, batch: 33315, sum loss: 4986.650879, avg loss: 2.968245, ppl: 19.457737 +epoch: 0, batch: 33316, sum loss: 4866.788086, avg loss: 3.193430, ppl: 24.371891 +epoch: 0, batch: 33317, sum loss: 4897.788574, avg loss: 3.190742, ppl: 24.306450 +epoch: 0, batch: 33318, sum loss: 4446.048340, avg loss: 2.954185, ppl: 19.186079 +epoch: 0, batch: 33319, sum loss: 4792.252930, avg loss: 2.945453, ppl: 19.019283 +epoch: 0, batch: 33320, sum loss: 4898.646484, avg loss: 2.996114, ppl: 20.007637 +epoch: 0, batch: 33321, sum loss: 5573.430176, avg loss: 3.145277, ppl: 23.226103 +epoch: 0, batch: 33322, sum loss: 5316.149902, avg loss: 3.071144, ppl: 21.566553 +epoch: 0, batch: 33323, sum loss: 5455.222656, avg loss: 3.164283, ppl: 23.671755 +epoch: 0, batch: 33324, sum loss: 5170.398926, avg loss: 2.980057, ppl: 19.688938 +epoch: 0, batch: 33325, sum loss: 4433.118164, avg loss: 2.688368, ppl: 14.707647 +epoch: 0, batch: 33326, sum loss: 5924.171875, avg loss: 3.111435, ppl: 22.453239 +epoch: 0, batch: 33327, sum loss: 5498.696289, avg loss: 3.054831, ppl: 21.217606 +epoch: 0, batch: 33328, sum loss: 4718.740234, avg loss: 2.893158, ppl: 18.050217 +epoch: 0, batch: 33329, sum loss: 5483.171875, avg loss: 3.156691, ppl: 23.492725 +epoch: 0, batch: 33330, sum loss: 4066.185059, avg loss: 2.877696, ppl: 17.773281 +epoch: 0, batch: 33331, sum loss: 5169.731934, avg loss: 3.110549, ppl: 22.433352 +epoch: 0, batch: 33332, sum loss: 5206.995605, avg loss: 2.958520, ppl: 19.269436 +epoch: 0, batch: 33333, sum loss: 5463.771973, avg loss: 2.990570, ppl: 19.897026 +epoch: 0, batch: 33334, sum loss: 4256.679199, avg loss: 2.835896, ppl: 17.045658 +epoch: 0, batch: 33335, sum loss: 4501.504883, avg loss: 2.816962, ppl: 16.725956 +epoch: 0, batch: 33336, sum loss: 5382.284180, avg loss: 3.230663, ppl: 25.296412 +epoch: 0, batch: 33337, sum loss: 5587.051270, avg loss: 2.862219, ppl: 17.500319 +epoch: 0, batch: 33338, sum loss: 5203.877930, avg loss: 3.103088, ppl: 22.266598 +epoch: 0, batch: 33339, sum loss: 5561.296875, avg loss: 3.093046, ppl: 22.044121 +epoch: 0, batch: 33340, sum loss: 5387.963867, avg loss: 3.184376, ppl: 24.152206 +epoch: 0, batch: 33341, sum loss: 5160.750977, avg loss: 3.092122, ppl: 22.023760 +epoch: 0, batch: 33342, sum loss: 3647.449219, avg loss: 2.513749, ppl: 12.351144 +epoch: 0, batch: 33343, sum loss: 4336.678223, avg loss: 2.753446, ppl: 15.696634 +epoch: 0, batch: 33344, sum loss: 6499.402344, avg loss: 3.193810, ppl: 24.381130 +epoch: 0, batch: 33345, sum loss: 4783.389648, avg loss: 2.930999, ppl: 18.746340 +epoch: 0, batch: 33346, sum loss: 4626.603516, avg loss: 2.753931, ppl: 15.704241 +epoch: 0, batch: 33347, sum loss: 4431.611328, avg loss: 2.831701, ppl: 16.974302 +epoch: 0, batch: 33348, sum loss: 4508.653320, avg loss: 2.685321, ppl: 14.662902 +epoch: 0, batch: 33349, sum loss: 4527.065918, avg loss: 2.794485, ppl: 16.354210 +epoch: 0, batch: 33350, sum loss: 3897.997070, avg loss: 2.516460, ppl: 12.384683 +epoch: 0, batch: 33351, sum loss: 4708.304199, avg loss: 2.935352, ppl: 18.828127 +epoch: 0, batch: 33352, sum loss: 5016.552246, avg loss: 3.029319, ppl: 20.683144 +epoch: 0, batch: 33353, sum loss: 6139.945312, avg loss: 3.236661, ppl: 25.448599 +epoch: 0, batch: 33354, sum loss: 5434.864746, avg loss: 3.100322, ppl: 22.205105 +epoch: 0, batch: 33355, sum loss: 5780.731934, avg loss: 3.211518, ppl: 24.816725 +epoch: 0, batch: 33356, sum loss: 5075.761230, avg loss: 2.942471, ppl: 18.962637 +epoch: 0, batch: 33357, sum loss: 5066.891602, avg loss: 2.998161, ppl: 20.048626 +epoch: 0, batch: 33358, sum loss: 5349.069336, avg loss: 2.929392, ppl: 18.716246 +epoch: 0, batch: 33359, sum loss: 4999.576172, avg loss: 2.858534, ppl: 17.435949 +epoch: 0, batch: 33360, sum loss: 5183.132324, avg loss: 2.807764, ppl: 16.572821 +epoch: 0, batch: 33361, sum loss: 5073.032227, avg loss: 3.162738, ppl: 23.635229 +epoch: 0, batch: 33362, sum loss: 4560.900391, avg loss: 2.822339, ppl: 16.816143 +epoch: 0, batch: 33363, sum loss: 4580.694824, avg loss: 2.868312, ppl: 17.607279 +epoch: 0, batch: 33364, sum loss: 4971.414551, avg loss: 3.042481, ppl: 20.957182 +epoch: 0, batch: 33365, sum loss: 4929.778320, avg loss: 2.884598, ppl: 17.896376 +epoch: 0, batch: 33366, sum loss: 4783.431152, avg loss: 2.980331, ppl: 19.694334 +epoch: 0, batch: 33367, sum loss: 4926.333496, avg loss: 3.014892, ppl: 20.386890 +epoch: 0, batch: 33368, sum loss: 5101.365723, avg loss: 3.018560, ppl: 20.461800 +epoch: 0, batch: 33369, sum loss: 5032.344727, avg loss: 2.981247, ppl: 19.712381 +epoch: 0, batch: 33370, sum loss: 4851.529785, avg loss: 3.041712, ppl: 20.941055 +epoch: 0, batch: 33371, sum loss: 5523.085938, avg loss: 3.268098, ppl: 26.261345 +epoch: 0, batch: 33372, sum loss: 4788.861816, avg loss: 2.813667, ppl: 16.670944 +epoch: 0, batch: 33373, sum loss: 5008.339844, avg loss: 2.932283, ppl: 18.770437 +epoch: 0, batch: 33374, sum loss: 4996.631836, avg loss: 3.039314, ppl: 20.890903 +epoch: 0, batch: 33375, sum loss: 5039.745605, avg loss: 3.214123, ppl: 24.881474 +epoch: 0, batch: 33376, sum loss: 5632.495117, avg loss: 3.157228, ppl: 23.505360 +epoch: 0, batch: 33377, sum loss: 4413.368164, avg loss: 2.775703, ppl: 16.049910 +epoch: 0, batch: 33378, sum loss: 4788.401367, avg loss: 2.753537, ppl: 15.698065 +epoch: 0, batch: 33379, sum loss: 4963.009277, avg loss: 2.938431, ppl: 18.886183 +epoch: 0, batch: 33380, sum loss: 4394.351074, avg loss: 2.815087, ppl: 16.694633 +epoch: 0, batch: 33381, sum loss: 5375.634766, avg loss: 3.279826, ppl: 26.571146 +epoch: 0, batch: 33382, sum loss: 6300.172852, avg loss: 3.234175, ppl: 25.385426 +epoch: 0, batch: 33383, sum loss: 3968.284912, avg loss: 2.907168, ppl: 18.304893 +epoch: 0, batch: 33384, sum loss: 5018.379395, avg loss: 2.844886, ppl: 17.199602 +epoch: 0, batch: 33385, sum loss: 5020.011719, avg loss: 3.059117, ppl: 21.308741 +epoch: 0, batch: 33386, sum loss: 4564.485352, avg loss: 2.981375, ppl: 19.714911 +epoch: 0, batch: 33387, sum loss: 4590.997070, avg loss: 2.967678, ppl: 19.446705 +epoch: 0, batch: 33388, sum loss: 4214.311523, avg loss: 2.774399, ppl: 16.028984 +epoch: 0, batch: 33389, sum loss: 4986.863770, avg loss: 3.020511, ppl: 20.501768 +epoch: 0, batch: 33390, sum loss: 5048.448730, avg loss: 2.751198, ppl: 15.661387 +epoch: 0, batch: 33391, sum loss: 5453.860840, avg loss: 3.081277, ppl: 21.786213 +epoch: 0, batch: 33392, sum loss: 4996.332031, avg loss: 2.909920, ppl: 18.355326 +epoch: 0, batch: 33393, sum loss: 4447.088379, avg loss: 2.916124, ppl: 18.469553 +epoch: 0, batch: 33394, sum loss: 4481.261230, avg loss: 2.830866, ppl: 16.960144 +epoch: 0, batch: 33395, sum loss: 5019.680664, avg loss: 3.164994, ppl: 23.688602 +epoch: 0, batch: 33396, sum loss: 5534.055664, avg loss: 3.040690, ppl: 20.919676 +epoch: 0, batch: 33397, sum loss: 5550.782715, avg loss: 3.204840, ppl: 24.651554 +epoch: 0, batch: 33398, sum loss: 3943.080322, avg loss: 2.776817, ppl: 16.067801 +epoch: 0, batch: 33399, sum loss: 4472.585938, avg loss: 2.743918, ppl: 15.547777 +epoch: 0, batch: 33400, sum loss: 5318.987305, avg loss: 3.065699, ppl: 21.449448 +epoch: 0, batch: 33401, sum loss: 5041.333984, avg loss: 2.866023, ppl: 17.567013 +epoch: 0, batch: 33402, sum loss: 5156.662109, avg loss: 3.035116, ppl: 20.803394 +epoch: 0, batch: 33403, sum loss: 5341.617188, avg loss: 3.016159, ppl: 20.412731 +epoch: 0, batch: 33404, sum loss: 5700.902344, avg loss: 3.116951, ppl: 22.577425 +epoch: 0, batch: 33405, sum loss: 5252.477539, avg loss: 2.984362, ppl: 19.773886 +epoch: 0, batch: 33406, sum loss: 5368.877441, avg loss: 3.161883, ppl: 23.615025 +epoch: 0, batch: 33407, sum loss: 5318.273926, avg loss: 3.039014, ppl: 20.884632 +epoch: 0, batch: 33408, sum loss: 6358.626953, avg loss: 3.265859, ppl: 26.202606 +epoch: 0, batch: 33409, sum loss: 4318.102539, avg loss: 2.784076, ppl: 16.184864 +epoch: 0, batch: 33410, sum loss: 4801.479492, avg loss: 2.982285, ppl: 19.732861 +epoch: 0, batch: 33411, sum loss: 4828.316895, avg loss: 2.905125, ppl: 18.267521 +epoch: 0, batch: 33412, sum loss: 4804.785645, avg loss: 2.947721, ppl: 19.062466 +epoch: 0, batch: 33413, sum loss: 4948.792969, avg loss: 3.030492, ppl: 20.707409 +epoch: 0, batch: 33414, sum loss: 5964.657715, avg loss: 3.137642, ppl: 23.049456 +epoch: 0, batch: 33415, sum loss: 4460.773926, avg loss: 2.628623, ppl: 13.854686 +epoch: 0, batch: 33416, sum loss: 4932.161133, avg loss: 2.937559, ppl: 18.869722 +epoch: 0, batch: 33417, sum loss: 5759.558594, avg loss: 3.183836, ppl: 24.139168 +epoch: 0, batch: 33418, sum loss: 4308.353516, avg loss: 2.714779, ppl: 15.101267 +epoch: 0, batch: 33419, sum loss: 4458.466309, avg loss: 2.711962, ppl: 15.058799 +epoch: 0, batch: 33420, sum loss: 4456.214844, avg loss: 2.792114, ppl: 16.315483 +epoch: 0, batch: 33421, sum loss: 4956.046875, avg loss: 2.864767, ppl: 17.544962 +epoch: 0, batch: 33422, sum loss: 5055.358398, avg loss: 2.864226, ppl: 17.535469 +epoch: 0, batch: 33423, sum loss: 5185.942871, avg loss: 2.985574, ppl: 19.797873 +epoch: 0, batch: 33424, sum loss: 5282.854492, avg loss: 3.135225, ppl: 22.993816 +epoch: 0, batch: 33425, sum loss: 4651.595703, avg loss: 3.018557, ppl: 20.461737 +epoch: 0, batch: 33426, sum loss: 5459.016602, avg loss: 3.019368, ppl: 20.478338 +epoch: 0, batch: 33427, sum loss: 4184.526367, avg loss: 2.997512, ppl: 20.035624 +epoch: 0, batch: 33428, sum loss: 5028.808105, avg loss: 3.125425, ppl: 22.769562 +epoch: 0, batch: 33429, sum loss: 4924.460449, avg loss: 2.669084, ppl: 14.426753 +epoch: 0, batch: 33430, sum loss: 5496.323242, avg loss: 2.983889, ppl: 19.764528 +epoch: 0, batch: 33431, sum loss: 5348.766602, avg loss: 2.999869, ppl: 20.082912 +epoch: 0, batch: 33432, sum loss: 5635.488281, avg loss: 3.012020, ppl: 20.328415 +epoch: 0, batch: 33433, sum loss: 5639.687012, avg loss: 2.955811, ppl: 19.217297 +epoch: 0, batch: 33434, sum loss: 6597.596680, avg loss: 3.226209, ppl: 25.183996 +epoch: 0, batch: 33435, sum loss: 4078.563232, avg loss: 2.911180, ppl: 18.378473 +epoch: 0, batch: 33436, sum loss: 4253.193848, avg loss: 2.630299, ppl: 13.877923 +epoch: 0, batch: 33437, sum loss: 5264.696777, avg loss: 2.994708, ppl: 19.979527 +epoch: 0, batch: 33438, sum loss: 5160.883789, avg loss: 3.221525, ppl: 25.066328 +epoch: 0, batch: 33439, sum loss: 5804.174805, avg loss: 3.098866, ppl: 22.172783 +epoch: 0, batch: 33440, sum loss: 4505.158203, avg loss: 2.940704, ppl: 18.929165 +epoch: 0, batch: 33441, sum loss: 5321.820312, avg loss: 3.213660, ppl: 24.869932 +epoch: 0, batch: 33442, sum loss: 6104.786133, avg loss: 3.245500, ppl: 25.674553 +epoch: 0, batch: 33443, sum loss: 4073.187500, avg loss: 2.856373, ppl: 17.398302 +epoch: 0, batch: 33444, sum loss: 5759.649414, avg loss: 2.887042, ppl: 17.940168 +epoch: 0, batch: 33445, sum loss: 4641.295410, avg loss: 2.772578, ppl: 15.999828 +epoch: 0, batch: 33446, sum loss: 5225.763672, avg loss: 2.977643, ppl: 19.641468 +epoch: 0, batch: 33447, sum loss: 4287.795410, avg loss: 2.916868, ppl: 18.483301 +epoch: 0, batch: 33448, sum loss: 5270.150391, avg loss: 2.800292, ppl: 16.449457 +epoch: 0, batch: 33449, sum loss: 4764.339844, avg loss: 3.103804, ppl: 22.282560 +epoch: 0, batch: 33450, sum loss: 5725.128418, avg loss: 3.123365, ppl: 22.722717 +epoch: 0, batch: 33451, sum loss: 4704.385254, avg loss: 2.909329, ppl: 18.344488 +epoch: 0, batch: 33452, sum loss: 5071.280273, avg loss: 3.205613, ppl: 24.670622 +epoch: 0, batch: 33453, sum loss: 5592.120605, avg loss: 3.062498, ppl: 21.380892 +epoch: 0, batch: 33454, sum loss: 4850.458984, avg loss: 2.934337, ppl: 18.809027 +epoch: 0, batch: 33455, sum loss: 4971.087402, avg loss: 2.936260, ppl: 18.845228 +epoch: 0, batch: 33456, sum loss: 4209.293945, avg loss: 2.815581, ppl: 16.702879 +epoch: 0, batch: 33457, sum loss: 4298.426758, avg loss: 2.896514, ppl: 18.110899 +epoch: 0, batch: 33458, sum loss: 4591.717285, avg loss: 2.864452, ppl: 17.539446 +epoch: 0, batch: 33459, sum loss: 5106.977539, avg loss: 2.908302, ppl: 18.325647 +epoch: 0, batch: 33460, sum loss: 4236.915039, avg loss: 2.763806, ppl: 15.860097 +epoch: 0, batch: 33461, sum loss: 5098.110352, avg loss: 3.133442, ppl: 22.952850 +epoch: 0, batch: 33462, sum loss: 6084.156738, avg loss: 3.222541, ppl: 25.091787 +epoch: 0, batch: 33463, sum loss: 4312.768066, avg loss: 3.047893, ppl: 21.070892 +epoch: 0, batch: 33464, sum loss: 5054.442383, avg loss: 2.971453, ppl: 19.520269 +epoch: 0, batch: 33465, sum loss: 5082.101562, avg loss: 3.068902, ppl: 21.518257 +epoch: 0, batch: 33466, sum loss: 6148.672363, avg loss: 3.102257, ppl: 22.248098 +epoch: 0, batch: 33467, sum loss: 5563.138672, avg loss: 3.020162, ppl: 20.494619 +epoch: 0, batch: 33468, sum loss: 5994.303711, avg loss: 3.159886, ppl: 23.567911 +epoch: 0, batch: 33469, sum loss: 4218.121582, avg loss: 2.834759, ppl: 17.026300 +epoch: 0, batch: 33470, sum loss: 5195.352539, avg loss: 2.963692, ppl: 19.369360 +epoch: 0, batch: 33471, sum loss: 4670.999512, avg loss: 2.917551, ppl: 18.495941 +epoch: 0, batch: 33472, sum loss: 4985.312988, avg loss: 2.890037, ppl: 17.993967 +epoch: 0, batch: 33473, sum loss: 5048.011719, avg loss: 2.812263, ppl: 16.647545 +epoch: 0, batch: 33474, sum loss: 4676.359863, avg loss: 2.820482, ppl: 16.784946 +epoch: 0, batch: 33475, sum loss: 5564.306641, avg loss: 3.093000, ppl: 22.043102 +epoch: 0, batch: 33476, sum loss: 5476.604492, avg loss: 2.978034, ppl: 19.649149 +epoch: 0, batch: 33477, sum loss: 5164.055176, avg loss: 3.050240, ppl: 21.120403 +epoch: 0, batch: 33478, sum loss: 5371.890625, avg loss: 3.212853, ppl: 24.849894 +epoch: 0, batch: 33479, sum loss: 4567.009766, avg loss: 3.073358, ppl: 21.614359 +epoch: 0, batch: 33480, sum loss: 4028.610107, avg loss: 2.811312, ppl: 16.631727 +epoch: 0, batch: 33481, sum loss: 5154.197266, avg loss: 3.073463, ppl: 21.616625 +epoch: 0, batch: 33482, sum loss: 5538.754883, avg loss: 2.985852, ppl: 19.803364 +epoch: 0, batch: 33483, sum loss: 5944.226562, avg loss: 3.204435, ppl: 24.641571 +epoch: 0, batch: 33484, sum loss: 4614.298828, avg loss: 2.748242, ppl: 15.615158 +epoch: 0, batch: 33485, sum loss: 5509.467285, avg loss: 3.071052, ppl: 21.564579 +epoch: 0, batch: 33486, sum loss: 4819.076660, avg loss: 2.906560, ppl: 18.293762 +epoch: 0, batch: 33487, sum loss: 5061.356445, avg loss: 3.167307, ppl: 23.743454 +epoch: 0, batch: 33488, sum loss: 5007.066406, avg loss: 2.926398, ppl: 18.660292 +epoch: 0, batch: 33489, sum loss: 5195.774414, avg loss: 2.918974, ppl: 18.522282 +epoch: 0, batch: 33490, sum loss: 4259.967285, avg loss: 2.978998, ppl: 19.668102 +epoch: 0, batch: 33491, sum loss: 4561.259766, avg loss: 2.836604, ppl: 17.057745 +epoch: 0, batch: 33492, sum loss: 5512.233887, avg loss: 3.069173, ppl: 21.524086 +epoch: 0, batch: 33493, sum loss: 4811.124023, avg loss: 2.828409, ppl: 16.918526 +epoch: 0, batch: 33494, sum loss: 5259.255859, avg loss: 3.071995, ppl: 21.584927 +epoch: 0, batch: 33495, sum loss: 4639.530273, avg loss: 2.970250, ppl: 19.496792 +epoch: 0, batch: 33496, sum loss: 4794.529297, avg loss: 3.024940, ppl: 20.592760 +epoch: 0, batch: 33497, sum loss: 5542.562012, avg loss: 3.010626, ppl: 20.300100 +epoch: 0, batch: 33498, sum loss: 5739.963867, avg loss: 3.069499, ppl: 21.531124 +epoch: 0, batch: 33499, sum loss: 5091.216797, avg loss: 2.936111, ppl: 18.842424 +epoch: 0, batch: 33500, sum loss: 4921.459473, avg loss: 2.859651, ppl: 17.455435 +epoch: 0, batch: 33501, sum loss: 5286.384766, avg loss: 3.192261, ppl: 24.343410 +epoch: 0, batch: 33502, sum loss: 5472.700684, avg loss: 3.003678, ppl: 20.159540 +epoch: 0, batch: 33503, sum loss: 3894.812012, avg loss: 2.624536, ppl: 13.798174 +epoch: 0, batch: 33504, sum loss: 5119.758789, avg loss: 3.084192, ppl: 21.849806 +epoch: 0, batch: 33505, sum loss: 4505.072754, avg loss: 2.799921, ppl: 16.443344 +epoch: 0, batch: 33506, sum loss: 4802.035156, avg loss: 2.788638, ppl: 16.258865 +epoch: 0, batch: 33507, sum loss: 4603.690918, avg loss: 2.819162, ppl: 16.762793 +epoch: 0, batch: 33508, sum loss: 4978.806641, avg loss: 3.149150, ppl: 23.316240 +epoch: 0, batch: 33509, sum loss: 5019.367676, avg loss: 3.069950, ppl: 21.540817 +epoch: 0, batch: 33510, sum loss: 4863.034180, avg loss: 2.972515, ppl: 19.541000 +epoch: 0, batch: 33511, sum loss: 5308.937012, avg loss: 3.093786, ppl: 22.060446 +epoch: 0, batch: 33512, sum loss: 5309.155273, avg loss: 3.202144, ppl: 24.585194 +epoch: 0, batch: 33513, sum loss: 4453.180664, avg loss: 2.687496, ppl: 14.694830 +epoch: 0, batch: 33514, sum loss: 6374.213867, avg loss: 3.193494, ppl: 24.373442 +epoch: 0, batch: 33515, sum loss: 5754.983398, avg loss: 2.930236, ppl: 18.732052 +epoch: 0, batch: 33516, sum loss: 4555.791016, avg loss: 2.966010, ppl: 19.414299 +epoch: 0, batch: 33517, sum loss: 5333.699707, avg loss: 2.945168, ppl: 19.013866 +epoch: 0, batch: 33518, sum loss: 4633.427246, avg loss: 2.890472, ppl: 18.001812 +epoch: 0, batch: 33519, sum loss: 4872.950195, avg loss: 3.017307, ppl: 20.436174 +epoch: 0, batch: 33520, sum loss: 6399.961426, avg loss: 3.307473, ppl: 27.316023 +epoch: 0, batch: 33521, sum loss: 5125.000977, avg loss: 3.185209, ppl: 24.172335 +epoch: 0, batch: 33522, sum loss: 5437.643555, avg loss: 3.198614, ppl: 24.498554 +epoch: 0, batch: 33523, sum loss: 4446.652832, avg loss: 2.748240, ppl: 15.615129 +epoch: 0, batch: 33524, sum loss: 5602.562988, avg loss: 3.370977, ppl: 29.106941 +epoch: 0, batch: 33525, sum loss: 4719.561523, avg loss: 2.740744, ppl: 15.498513 +epoch: 0, batch: 33526, sum loss: 5431.170898, avg loss: 3.007293, ppl: 20.232555 +epoch: 0, batch: 33527, sum loss: 6273.326660, avg loss: 3.104071, ppl: 22.288496 +epoch: 0, batch: 33528, sum loss: 6241.781250, avg loss: 3.200913, ppl: 24.554949 +epoch: 0, batch: 33529, sum loss: 4349.727539, avg loss: 2.923204, ppl: 18.600788 +epoch: 0, batch: 33530, sum loss: 5800.618164, avg loss: 3.016442, ppl: 20.418514 +epoch: 0, batch: 33531, sum loss: 5717.594727, avg loss: 3.185290, ppl: 24.174290 +epoch: 0, batch: 33532, sum loss: 4473.407227, avg loss: 2.863897, ppl: 17.529709 +epoch: 0, batch: 33533, sum loss: 3773.538818, avg loss: 2.659295, ppl: 14.286208 +epoch: 0, batch: 33534, sum loss: 5593.262695, avg loss: 3.013611, ppl: 20.360796 +epoch: 0, batch: 33535, sum loss: 5582.397461, avg loss: 2.998065, ppl: 20.046713 +epoch: 0, batch: 33536, sum loss: 5726.748535, avg loss: 3.059161, ppl: 21.309664 +epoch: 0, batch: 33537, sum loss: 5332.587891, avg loss: 3.134972, ppl: 22.987995 +epoch: 0, batch: 33538, sum loss: 5445.827148, avg loss: 3.255127, ppl: 25.922905 +epoch: 0, batch: 33539, sum loss: 5472.070801, avg loss: 3.190712, ppl: 24.305721 +epoch: 0, batch: 33540, sum loss: 6487.297363, avg loss: 3.256676, ppl: 25.963085 +epoch: 0, batch: 33541, sum loss: 5282.639648, avg loss: 2.857025, ppl: 17.409658 +epoch: 0, batch: 33542, sum loss: 5004.961914, avg loss: 2.930305, ppl: 18.733353 +epoch: 0, batch: 33543, sum loss: 5286.187500, avg loss: 3.201810, ppl: 24.576965 +epoch: 0, batch: 33544, sum loss: 5567.225098, avg loss: 3.216190, ppl: 24.932947 +epoch: 0, batch: 33545, sum loss: 6053.052734, avg loss: 3.209466, ppl: 24.765858 +epoch: 0, batch: 33546, sum loss: 5156.375977, avg loss: 2.861474, ppl: 17.487286 +epoch: 0, batch: 33547, sum loss: 5301.453613, avg loss: 2.960052, ppl: 19.298981 +epoch: 0, batch: 33548, sum loss: 4513.358398, avg loss: 2.904349, ppl: 18.253359 +epoch: 0, batch: 33549, sum loss: 4978.390137, avg loss: 2.918165, ppl: 18.507299 +epoch: 0, batch: 33550, sum loss: 4933.284180, avg loss: 3.020994, ppl: 20.511679 +epoch: 0, batch: 33551, sum loss: 6087.423340, avg loss: 3.110589, ppl: 22.434259 +epoch: 0, batch: 33552, sum loss: 4813.985352, avg loss: 3.048756, ppl: 21.089087 +epoch: 0, batch: 33553, sum loss: 4180.760742, avg loss: 2.770551, ppl: 15.967425 +epoch: 0, batch: 33554, sum loss: 5518.805176, avg loss: 2.907695, ppl: 18.314541 +epoch: 0, batch: 33555, sum loss: 4738.114258, avg loss: 2.729328, ppl: 15.322593 +epoch: 0, batch: 33556, sum loss: 4553.402344, avg loss: 2.831718, ppl: 16.974594 +epoch: 0, batch: 33557, sum loss: 4180.956055, avg loss: 2.952653, ppl: 19.156704 +epoch: 0, batch: 33558, sum loss: 4421.545898, avg loss: 2.619399, ppl: 13.727475 +epoch: 0, batch: 33559, sum loss: 4495.100098, avg loss: 2.992743, ppl: 19.940304 +epoch: 0, batch: 33560, sum loss: 5463.499512, avg loss: 2.735854, ppl: 15.422904 +epoch: 0, batch: 33561, sum loss: 4819.844238, avg loss: 2.850292, ppl: 17.292839 +epoch: 0, batch: 33562, sum loss: 5175.564941, avg loss: 3.169360, ppl: 23.792250 +epoch: 0, batch: 33563, sum loss: 5168.940918, avg loss: 3.084094, ppl: 21.847660 +epoch: 0, batch: 33564, sum loss: 4786.405762, avg loss: 2.653218, ppl: 14.199663 +epoch: 0, batch: 33565, sum loss: 6209.926270, avg loss: 3.222588, ppl: 25.092966 +epoch: 0, batch: 33566, sum loss: 5234.023926, avg loss: 3.158735, ppl: 23.540798 +epoch: 0, batch: 33567, sum loss: 6816.300781, avg loss: 3.264512, ppl: 26.167339 +epoch: 0, batch: 33568, sum loss: 6521.337891, avg loss: 3.124743, ppl: 22.754042 +epoch: 0, batch: 33569, sum loss: 6587.482422, avg loss: 3.293741, ppl: 26.943483 +epoch: 0, batch: 33570, sum loss: 3973.171875, avg loss: 2.590073, ppl: 13.330743 +epoch: 0, batch: 33571, sum loss: 4980.485352, avg loss: 2.660516, ppl: 14.303665 +epoch: 0, batch: 33572, sum loss: 4909.047852, avg loss: 3.026540, ppl: 20.625740 +epoch: 0, batch: 33573, sum loss: 4658.647949, avg loss: 2.889980, ppl: 17.992950 +epoch: 0, batch: 33574, sum loss: 5063.604980, avg loss: 2.920187, ppl: 18.544760 +epoch: 0, batch: 33575, sum loss: 4801.364258, avg loss: 2.942013, ppl: 18.953953 +epoch: 0, batch: 33576, sum loss: 4966.840820, avg loss: 3.039682, ppl: 20.898605 +epoch: 0, batch: 33577, sum loss: 4544.070801, avg loss: 2.744004, ppl: 15.549119 +epoch: 0, batch: 33578, sum loss: 6244.286621, avg loss: 3.332063, ppl: 27.996044 +epoch: 0, batch: 33579, sum loss: 6384.472656, avg loss: 3.262378, ppl: 26.111544 +epoch: 0, batch: 33580, sum loss: 5193.249512, avg loss: 2.909384, ppl: 18.345486 +epoch: 0, batch: 33581, sum loss: 5314.055664, avg loss: 3.321285, ppl: 27.695911 +epoch: 0, batch: 33582, sum loss: 5835.836426, avg loss: 3.240331, ppl: 25.542179 +epoch: 0, batch: 33583, sum loss: 3991.515869, avg loss: 2.911390, ppl: 18.382334 +epoch: 0, batch: 33584, sum loss: 5576.431641, avg loss: 3.166628, ppl: 23.727333 +epoch: 0, batch: 33585, sum loss: 5081.211914, avg loss: 3.126899, ppl: 22.803167 +epoch: 0, batch: 33586, sum loss: 4666.458496, avg loss: 2.861103, ppl: 17.480795 +epoch: 0, batch: 33587, sum loss: 5265.945801, avg loss: 3.155150, ppl: 23.456566 +epoch: 0, batch: 33588, sum loss: 6279.264160, avg loss: 3.111628, ppl: 22.457565 +epoch: 0, batch: 33589, sum loss: 4475.299805, avg loss: 2.654389, ppl: 14.216299 +epoch: 0, batch: 33590, sum loss: 5134.361328, avg loss: 2.772333, ppl: 15.995912 +epoch: 0, batch: 33591, sum loss: 5539.856934, avg loss: 2.984837, ppl: 19.783274 +epoch: 0, batch: 33592, sum loss: 4858.594727, avg loss: 2.599569, ppl: 13.457937 +epoch: 0, batch: 33593, sum loss: 4354.443359, avg loss: 2.645470, ppl: 14.090064 +epoch: 0, batch: 33594, sum loss: 4619.542969, avg loss: 2.953672, ppl: 19.176239 +epoch: 0, batch: 33595, sum loss: 5221.099609, avg loss: 2.911935, ppl: 18.392355 +epoch: 0, batch: 33596, sum loss: 4576.936035, avg loss: 2.849898, ppl: 17.286016 +epoch: 0, batch: 33597, sum loss: 5914.205566, avg loss: 3.130866, ppl: 22.893793 +epoch: 0, batch: 33598, sum loss: 5200.447754, avg loss: 2.939767, ppl: 18.911442 +epoch: 0, batch: 33599, sum loss: 5081.332520, avg loss: 3.046362, ppl: 21.038675 +epoch: 0, batch: 33600, sum loss: 4943.176758, avg loss: 3.081781, ppl: 21.797192 +epoch: 0, batch: 33601, sum loss: 5564.586426, avg loss: 3.281006, ppl: 26.602524 +epoch: 0, batch: 33602, sum loss: 5551.902832, avg loss: 3.014062, ppl: 20.369984 +epoch: 0, batch: 33603, sum loss: 4680.033203, avg loss: 2.992349, ppl: 19.932442 +epoch: 0, batch: 33604, sum loss: 6321.705566, avg loss: 3.033448, ppl: 20.768719 +epoch: 0, batch: 33605, sum loss: 5909.440430, avg loss: 3.061886, ppl: 21.367826 +epoch: 0, batch: 33606, sum loss: 5209.474609, avg loss: 2.985372, ppl: 19.793871 +epoch: 0, batch: 33607, sum loss: 4229.272461, avg loss: 2.778760, ppl: 16.099045 +epoch: 0, batch: 33608, sum loss: 4416.365234, avg loss: 2.944243, ppl: 18.996284 +epoch: 0, batch: 33609, sum loss: 3718.435547, avg loss: 2.744233, ppl: 15.552679 +epoch: 0, batch: 33610, sum loss: 5238.916504, avg loss: 2.809071, ppl: 16.594488 +epoch: 0, batch: 33611, sum loss: 5462.780762, avg loss: 2.860095, ppl: 17.463182 +epoch: 0, batch: 33612, sum loss: 5775.477539, avg loss: 3.046138, ppl: 21.033955 +epoch: 0, batch: 33613, sum loss: 5767.758301, avg loss: 3.112660, ppl: 22.480757 +epoch: 0, batch: 33614, sum loss: 5276.107910, avg loss: 2.829012, ppl: 16.928730 +epoch: 0, batch: 33615, sum loss: 5695.610352, avg loss: 3.301803, ppl: 27.161577 +epoch: 0, batch: 33616, sum loss: 5128.363770, avg loss: 2.995539, ppl: 19.996143 +epoch: 0, batch: 33617, sum loss: 5370.055664, avg loss: 2.991674, ppl: 19.919008 +epoch: 0, batch: 33618, sum loss: 6286.421875, avg loss: 3.322633, ppl: 27.733276 +epoch: 0, batch: 33619, sum loss: 4629.968750, avg loss: 2.541147, ppl: 12.694217 +epoch: 0, batch: 33620, sum loss: 4508.835938, avg loss: 3.067235, ppl: 21.482431 +epoch: 0, batch: 33621, sum loss: 5378.852051, avg loss: 3.215094, ppl: 24.905636 +epoch: 0, batch: 33622, sum loss: 5813.851562, avg loss: 3.176968, ppl: 23.973957 +epoch: 0, batch: 33623, sum loss: 4761.923340, avg loss: 2.775014, ppl: 16.038847 +epoch: 0, batch: 33624, sum loss: 3979.664062, avg loss: 2.602789, ppl: 13.501340 +epoch: 0, batch: 33625, sum loss: 4475.765625, avg loss: 2.752623, ppl: 15.683722 +epoch: 0, batch: 33626, sum loss: 5090.589355, avg loss: 2.959645, ppl: 19.291122 +epoch: 0, batch: 33627, sum loss: 5087.256836, avg loss: 2.952558, ppl: 19.154881 +epoch: 0, batch: 33628, sum loss: 5065.437012, avg loss: 2.754452, ppl: 15.712428 +epoch: 0, batch: 33629, sum loss: 5735.591797, avg loss: 3.098645, ppl: 22.167892 +epoch: 0, batch: 33630, sum loss: 5093.620117, avg loss: 3.037340, ppl: 20.849718 +epoch: 0, batch: 33631, sum loss: 5361.658203, avg loss: 2.902901, ppl: 18.226948 +epoch: 0, batch: 33632, sum loss: 4769.608398, avg loss: 2.874990, ppl: 17.725246 +epoch: 0, batch: 33633, sum loss: 5013.516602, avg loss: 3.224127, ppl: 25.131615 +epoch: 0, batch: 33634, sum loss: 5377.746582, avg loss: 3.110322, ppl: 22.428265 +epoch: 0, batch: 33635, sum loss: 4258.698242, avg loss: 2.731686, ppl: 15.358758 +epoch: 0, batch: 33636, sum loss: 4941.485352, avg loss: 2.830175, ppl: 16.948425 +epoch: 0, batch: 33637, sum loss: 6045.460449, avg loss: 3.081274, ppl: 21.786146 +epoch: 0, batch: 33638, sum loss: 4870.635742, avg loss: 2.641343, ppl: 14.032031 +epoch: 0, batch: 33639, sum loss: 4398.912598, avg loss: 2.875106, ppl: 17.727308 +epoch: 0, batch: 33640, sum loss: 4810.400879, avg loss: 3.031129, ppl: 20.720606 +epoch: 0, batch: 33641, sum loss: 4413.363770, avg loss: 2.732733, ppl: 15.374849 +epoch: 0, batch: 33642, sum loss: 5055.196289, avg loss: 2.985940, ppl: 19.805109 +epoch: 0, batch: 33643, sum loss: 4527.034668, avg loss: 2.572179, ppl: 13.094320 +epoch: 0, batch: 33644, sum loss: 5322.727051, avg loss: 2.971930, ppl: 19.529575 +epoch: 0, batch: 33645, sum loss: 5280.677734, avg loss: 3.133933, ppl: 22.964127 +epoch: 0, batch: 33646, sum loss: 5061.418945, avg loss: 2.808779, ppl: 16.589642 +epoch: 0, batch: 33647, sum loss: 5604.633789, avg loss: 2.871227, ppl: 17.658665 +epoch: 0, batch: 33648, sum loss: 4706.778809, avg loss: 2.956519, ppl: 19.230919 +epoch: 0, batch: 33649, sum loss: 5014.009766, avg loss: 2.797997, ppl: 16.411737 +epoch: 0, batch: 33650, sum loss: 4587.820312, avg loss: 2.837242, ppl: 17.068628 +epoch: 0, batch: 33651, sum loss: 5706.636719, avg loss: 3.224089, ppl: 25.130661 +epoch: 0, batch: 33652, sum loss: 3618.182617, avg loss: 2.519626, ppl: 12.423945 +epoch: 0, batch: 33653, sum loss: 4274.119629, avg loss: 2.725842, ppl: 15.269258 +epoch: 0, batch: 33654, sum loss: 5055.592773, avg loss: 2.972130, ppl: 19.533474 +epoch: 0, batch: 33655, sum loss: 4918.836914, avg loss: 2.830171, ppl: 16.948357 +epoch: 0, batch: 33656, sum loss: 5075.702637, avg loss: 2.897091, ppl: 18.121347 +epoch: 0, batch: 33657, sum loss: 5259.761719, avg loss: 2.762480, ppl: 15.839075 +epoch: 0, batch: 33658, sum loss: 4868.073242, avg loss: 3.003129, ppl: 20.148493 +epoch: 0, batch: 33659, sum loss: 5757.902344, avg loss: 3.072520, ppl: 21.596252 +epoch: 0, batch: 33660, sum loss: 4748.539062, avg loss: 2.862290, ppl: 17.501558 +epoch: 0, batch: 33661, sum loss: 5023.465820, avg loss: 3.037162, ppl: 20.845999 +epoch: 0, batch: 33662, sum loss: 4003.872314, avg loss: 2.878413, ppl: 17.786024 +epoch: 0, batch: 33663, sum loss: 5568.067871, avg loss: 3.081388, ppl: 21.788618 +epoch: 0, batch: 33664, sum loss: 4527.630371, avg loss: 2.530816, ppl: 12.563758 +epoch: 0, batch: 33665, sum loss: 5011.825195, avg loss: 2.863900, ppl: 17.529762 +epoch: 0, batch: 33666, sum loss: 5768.751465, avg loss: 3.154047, ppl: 23.430696 +epoch: 0, batch: 33667, sum loss: 4242.830566, avg loss: 2.826669, ppl: 16.889116 +epoch: 0, batch: 33668, sum loss: 4874.351562, avg loss: 3.098761, ppl: 22.170473 +epoch: 0, batch: 33669, sum loss: 5056.844238, avg loss: 3.081563, ppl: 21.792427 +epoch: 0, batch: 33670, sum loss: 4914.572266, avg loss: 2.911477, ppl: 18.383924 +epoch: 0, batch: 33671, sum loss: 5017.459473, avg loss: 2.954923, ppl: 19.200247 +epoch: 0, batch: 33672, sum loss: 5671.155273, avg loss: 3.057227, ppl: 21.268490 +epoch: 0, batch: 33673, sum loss: 4690.373047, avg loss: 2.978014, ppl: 19.648766 +epoch: 0, batch: 33674, sum loss: 4537.718262, avg loss: 2.999153, ppl: 20.068529 +epoch: 0, batch: 33675, sum loss: 5226.702148, avg loss: 2.804025, ppl: 16.510965 +epoch: 0, batch: 33676, sum loss: 4750.049805, avg loss: 2.817349, ppl: 16.732430 +epoch: 0, batch: 33677, sum loss: 5818.015625, avg loss: 3.086480, ppl: 21.899862 +epoch: 0, batch: 33678, sum loss: 5372.976074, avg loss: 2.934449, ppl: 18.811138 +epoch: 0, batch: 33679, sum loss: 4715.316406, avg loss: 2.842264, ppl: 17.154562 +epoch: 0, batch: 33680, sum loss: 4786.356445, avg loss: 2.761890, ppl: 15.829740 +epoch: 0, batch: 33681, sum loss: 5838.943359, avg loss: 2.960925, ppl: 19.315823 +epoch: 0, batch: 33682, sum loss: 5573.487793, avg loss: 3.065725, ppl: 21.450005 +epoch: 0, batch: 33683, sum loss: 4859.297852, avg loss: 2.964794, ppl: 19.390711 +epoch: 0, batch: 33684, sum loss: 5026.880859, avg loss: 2.730516, ppl: 15.340808 +epoch: 0, batch: 33685, sum loss: 5344.042969, avg loss: 2.921839, ppl: 18.575413 +epoch: 0, batch: 33686, sum loss: 5739.327637, avg loss: 3.380052, ppl: 29.372293 +epoch: 0, batch: 33687, sum loss: 4698.555664, avg loss: 2.681824, ppl: 14.611724 +epoch: 0, batch: 33688, sum loss: 4180.318359, avg loss: 2.744792, ppl: 15.561380 +epoch: 0, batch: 33689, sum loss: 4505.238770, avg loss: 2.745423, ppl: 15.571193 +epoch: 0, batch: 33690, sum loss: 5688.939453, avg loss: 3.083436, ppl: 21.833294 +epoch: 0, batch: 33691, sum loss: 5283.442871, avg loss: 2.990064, ppl: 19.886953 +epoch: 0, batch: 33692, sum loss: 4469.685547, avg loss: 3.013949, ppl: 20.367666 +epoch: 0, batch: 33693, sum loss: 5938.090332, avg loss: 3.236017, ppl: 25.432217 +epoch: 0, batch: 33694, sum loss: 4861.188965, avg loss: 3.264734, ppl: 26.173149 +epoch: 0, batch: 33695, sum loss: 4404.759277, avg loss: 3.132830, ppl: 22.938808 +epoch: 0, batch: 33696, sum loss: 5537.449707, avg loss: 3.006216, ppl: 20.210779 +epoch: 0, batch: 33697, sum loss: 5972.875977, avg loss: 3.323804, ppl: 27.765776 +epoch: 0, batch: 33698, sum loss: 4862.061035, avg loss: 2.990197, ppl: 19.889608 +epoch: 0, batch: 33699, sum loss: 5214.385254, avg loss: 3.179503, ppl: 24.034811 +epoch: 0, batch: 33700, sum loss: 4499.324707, avg loss: 2.784235, ppl: 16.187437 +epoch: 0, batch: 33701, sum loss: 5348.731445, avg loss: 3.187563, ppl: 24.229319 +epoch: 0, batch: 33702, sum loss: 5305.814941, avg loss: 3.088367, ppl: 21.941223 +epoch: 0, batch: 33703, sum loss: 4424.614258, avg loss: 2.963573, ppl: 19.367043 +epoch: 0, batch: 33704, sum loss: 5020.442383, avg loss: 3.076252, ppl: 21.676994 +epoch: 0, batch: 33705, sum loss: 4838.160156, avg loss: 2.925127, ppl: 18.636599 +epoch: 0, batch: 33706, sum loss: 6113.520020, avg loss: 3.161075, ppl: 23.595957 +epoch: 0, batch: 33707, sum loss: 5255.217773, avg loss: 3.102254, ppl: 22.248035 +epoch: 0, batch: 33708, sum loss: 5072.132324, avg loss: 3.300021, ppl: 27.113213 +epoch: 0, batch: 33709, sum loss: 5320.751465, avg loss: 3.049141, ppl: 21.097218 +epoch: 0, batch: 33710, sum loss: 5745.745117, avg loss: 3.248019, ppl: 25.739300 +epoch: 0, batch: 33711, sum loss: 4491.606934, avg loss: 2.740456, ppl: 15.494043 +epoch: 0, batch: 33712, sum loss: 5599.078125, avg loss: 3.100265, ppl: 22.203831 +epoch: 0, batch: 33713, sum loss: 4467.751465, avg loss: 2.897374, ppl: 18.126490 +epoch: 0, batch: 33714, sum loss: 4897.221191, avg loss: 2.921970, ppl: 18.577848 +epoch: 0, batch: 33715, sum loss: 4683.376953, avg loss: 3.002165, ppl: 20.129066 +epoch: 0, batch: 33716, sum loss: 5318.068848, avg loss: 2.986002, ppl: 19.806334 +epoch: 0, batch: 33717, sum loss: 5745.352539, avg loss: 3.117392, ppl: 22.587385 +epoch: 0, batch: 33718, sum loss: 5962.041016, avg loss: 3.268663, ppl: 26.276182 +epoch: 0, batch: 33719, sum loss: 5227.259766, avg loss: 3.069442, ppl: 21.529886 +epoch: 0, batch: 33720, sum loss: 5239.567871, avg loss: 3.115082, ppl: 22.535278 +epoch: 0, batch: 33721, sum loss: 5950.346191, avg loss: 3.115365, ppl: 22.541647 +epoch: 0, batch: 33722, sum loss: 4826.305664, avg loss: 2.764207, ppl: 15.866459 +epoch: 0, batch: 33723, sum loss: 5465.785156, avg loss: 3.263155, ppl: 26.131865 +epoch: 0, batch: 33724, sum loss: 5761.489746, avg loss: 2.938037, ppl: 18.878744 +epoch: 0, batch: 33725, sum loss: 5645.845215, avg loss: 3.170042, ppl: 23.808491 +epoch: 0, batch: 33726, sum loss: 5423.778320, avg loss: 3.069484, ppl: 21.530794 +epoch: 0, batch: 33727, sum loss: 5152.072266, avg loss: 2.894423, ppl: 18.073067 +epoch: 0, batch: 33728, sum loss: 5561.975586, avg loss: 3.129981, ppl: 22.873535 +epoch: 0, batch: 33729, sum loss: 6658.414551, avg loss: 3.455327, ppl: 31.668636 +epoch: 0, batch: 33730, sum loss: 4281.679199, avg loss: 2.862085, ppl: 17.497974 +epoch: 0, batch: 33731, sum loss: 4016.056396, avg loss: 2.663167, ppl: 14.341640 +epoch: 0, batch: 33732, sum loss: 5313.397949, avg loss: 2.942081, ppl: 18.955246 +epoch: 0, batch: 33733, sum loss: 4623.298340, avg loss: 2.726002, ppl: 15.271701 +epoch: 0, batch: 33734, sum loss: 5213.626465, avg loss: 2.996337, ppl: 20.012096 +epoch: 0, batch: 33735, sum loss: 5838.392578, avg loss: 3.066383, ppl: 21.464125 +epoch: 0, batch: 33736, sum loss: 5786.046875, avg loss: 3.051713, ppl: 21.151541 +epoch: 0, batch: 33737, sum loss: 4834.506836, avg loss: 2.955078, ppl: 19.203215 +epoch: 0, batch: 33738, sum loss: 5016.151367, avg loss: 2.800754, ppl: 16.457056 +epoch: 0, batch: 33739, sum loss: 4510.751953, avg loss: 2.829832, ppl: 16.942612 +epoch: 0, batch: 33740, sum loss: 6095.080078, avg loss: 3.042976, ppl: 20.967543 +epoch: 0, batch: 33741, sum loss: 4688.946289, avg loss: 2.866104, ppl: 17.568439 +epoch: 0, batch: 33742, sum loss: 4762.600098, avg loss: 2.912905, ppl: 18.410206 +epoch: 0, batch: 33743, sum loss: 5423.717285, avg loss: 3.273215, ppl: 26.396069 +epoch: 0, batch: 33744, sum loss: 4460.212402, avg loss: 3.011622, ppl: 20.320337 +epoch: 0, batch: 33745, sum loss: 4631.751465, avg loss: 2.866183, ppl: 17.569822 +epoch: 0, batch: 33746, sum loss: 5776.742188, avg loss: 3.013428, ppl: 20.357073 +epoch: 0, batch: 33747, sum loss: 5027.508789, avg loss: 2.969586, ppl: 19.483850 +epoch: 0, batch: 33748, sum loss: 4722.192871, avg loss: 2.812503, ppl: 16.651546 +epoch: 0, batch: 33749, sum loss: 6034.791016, avg loss: 3.057138, ppl: 21.266609 +epoch: 0, batch: 33750, sum loss: 4627.251953, avg loss: 2.859859, ppl: 17.459064 +epoch: 0, batch: 33751, sum loss: 4785.354492, avg loss: 2.891453, ppl: 18.019474 +epoch: 0, batch: 33752, sum loss: 6435.741211, avg loss: 3.298689, ppl: 27.077108 +epoch: 0, batch: 33753, sum loss: 5449.340332, avg loss: 2.999087, ppl: 20.067200 +epoch: 0, batch: 33754, sum loss: 5971.431152, avg loss: 3.210447, ppl: 24.790161 +epoch: 0, batch: 33755, sum loss: 5661.077148, avg loss: 3.027314, ppl: 20.641714 +epoch: 0, batch: 33756, sum loss: 5025.769043, avg loss: 2.966806, ppl: 19.429756 +epoch: 0, batch: 33757, sum loss: 5207.324707, avg loss: 3.018739, ppl: 20.465473 +epoch: 0, batch: 33758, sum loss: 4685.116211, avg loss: 3.074223, ppl: 21.633072 +epoch: 0, batch: 33759, sum loss: 5051.515137, avg loss: 2.985529, ppl: 19.796970 +epoch: 0, batch: 33760, sum loss: 5228.107422, avg loss: 2.914218, ppl: 18.434395 +epoch: 0, batch: 33761, sum loss: 5196.288574, avg loss: 2.979523, ppl: 19.678431 +epoch: 0, batch: 33762, sum loss: 4810.606445, avg loss: 2.790375, ppl: 16.287130 +epoch: 0, batch: 33763, sum loss: 4898.848145, avg loss: 2.979835, ppl: 19.684559 +epoch: 0, batch: 33764, sum loss: 4152.985352, avg loss: 2.864128, ppl: 17.533754 +epoch: 0, batch: 33765, sum loss: 5312.910645, avg loss: 3.138164, ppl: 23.061478 +epoch: 0, batch: 33766, sum loss: 5260.163574, avg loss: 2.844869, ppl: 17.199312 +epoch: 0, batch: 33767, sum loss: 3613.383301, avg loss: 2.355530, ppl: 10.543715 +epoch: 0, batch: 33768, sum loss: 4533.616211, avg loss: 3.088294, ppl: 21.939623 +epoch: 0, batch: 33769, sum loss: 5908.850586, avg loss: 3.075924, ppl: 21.669905 +epoch: 0, batch: 33770, sum loss: 6999.922852, avg loss: 3.311222, ppl: 27.418606 +epoch: 0, batch: 33771, sum loss: 5185.054199, avg loss: 3.053624, ppl: 21.192013 +epoch: 0, batch: 33772, sum loss: 5077.337402, avg loss: 3.049452, ppl: 21.103777 +epoch: 0, batch: 33773, sum loss: 4362.437500, avg loss: 2.889031, ppl: 17.975889 +epoch: 0, batch: 33774, sum loss: 4949.671387, avg loss: 3.156678, ppl: 23.492430 +epoch: 0, batch: 33775, sum loss: 4776.338867, avg loss: 2.772106, ppl: 15.992281 +epoch: 0, batch: 33776, sum loss: 5144.561523, avg loss: 3.049533, ppl: 21.105478 +epoch: 0, batch: 33777, sum loss: 5004.321289, avg loss: 2.923085, ppl: 18.598570 +epoch: 0, batch: 33778, sum loss: 4898.924805, avg loss: 3.094709, ppl: 22.080816 +epoch: 0, batch: 33779, sum loss: 5215.342773, avg loss: 3.141773, ppl: 23.144861 +epoch: 0, batch: 33780, sum loss: 4672.249023, avg loss: 2.995032, ppl: 19.985991 +epoch: 0, batch: 33781, sum loss: 4880.605957, avg loss: 2.729645, ppl: 15.327453 +epoch: 0, batch: 33782, sum loss: 5024.471680, avg loss: 3.097701, ppl: 22.146984 +epoch: 0, batch: 33783, sum loss: 5295.797363, avg loss: 2.925855, ppl: 18.650167 +epoch: 0, batch: 33784, sum loss: 5016.299316, avg loss: 2.971741, ppl: 19.525892 +epoch: 0, batch: 33785, sum loss: 5280.014160, avg loss: 3.094967, ppl: 22.086512 +epoch: 0, batch: 33786, sum loss: 4989.737793, avg loss: 3.035120, ppl: 20.803480 +epoch: 0, batch: 33787, sum loss: 4437.681641, avg loss: 2.831960, ppl: 16.978714 +epoch: 0, batch: 33788, sum loss: 4718.138184, avg loss: 2.738328, ppl: 15.461104 +epoch: 0, batch: 33789, sum loss: 4846.171387, avg loss: 2.785156, ppl: 16.202345 +epoch: 0, batch: 33790, sum loss: 4516.979004, avg loss: 2.828415, ppl: 16.918627 +epoch: 0, batch: 33791, sum loss: 4672.003418, avg loss: 2.882174, ppl: 17.853035 +epoch: 0, batch: 33792, sum loss: 4553.491211, avg loss: 2.949152, ppl: 19.089767 +epoch: 0, batch: 33793, sum loss: 5552.064941, avg loss: 3.064054, ppl: 21.414185 +epoch: 0, batch: 33794, sum loss: 4145.068848, avg loss: 2.872535, ppl: 17.681793 +epoch: 0, batch: 33795, sum loss: 4285.595703, avg loss: 2.947452, ppl: 19.057339 +epoch: 0, batch: 33796, sum loss: 4770.538574, avg loss: 2.930306, ppl: 18.733366 +epoch: 0, batch: 33797, sum loss: 5506.201172, avg loss: 2.976325, ppl: 19.615597 +epoch: 0, batch: 33798, sum loss: 6247.686035, avg loss: 3.240501, ppl: 25.546522 +epoch: 0, batch: 33799, sum loss: 5248.702637, avg loss: 2.898235, ppl: 18.142090 +epoch: 0, batch: 33800, sum loss: 5060.971191, avg loss: 2.865782, ppl: 17.562782 +epoch: 0, batch: 33801, sum loss: 4938.031250, avg loss: 3.105680, ppl: 22.324394 +epoch: 0, batch: 33802, sum loss: 4420.815430, avg loss: 2.841141, ppl: 17.135305 +epoch: 0, batch: 33803, sum loss: 5716.870605, avg loss: 3.298829, ppl: 27.080904 +epoch: 0, batch: 33804, sum loss: 4304.466797, avg loss: 2.678573, ppl: 14.564297 +epoch: 0, batch: 33805, sum loss: 5788.720703, avg loss: 2.850183, ppl: 17.290937 +epoch: 0, batch: 33806, sum loss: 5497.925781, avg loss: 3.052707, ppl: 21.172585 +epoch: 0, batch: 33807, sum loss: 4408.635742, avg loss: 2.840616, ppl: 17.126312 +epoch: 0, batch: 33808, sum loss: 3850.270020, avg loss: 2.601534, ppl: 13.484406 +epoch: 0, batch: 33809, sum loss: 4278.721680, avg loss: 2.670862, ppl: 14.452428 +epoch: 0, batch: 33810, sum loss: 4628.983398, avg loss: 2.868020, ppl: 17.602123 +epoch: 0, batch: 33811, sum loss: 5959.973145, avg loss: 3.289168, ppl: 26.820549 +epoch: 0, batch: 33812, sum loss: 5452.791992, avg loss: 3.103467, ppl: 22.275051 +epoch: 0, batch: 33813, sum loss: 5553.446777, avg loss: 3.099022, ppl: 22.176245 +epoch: 0, batch: 33814, sum loss: 5572.423828, avg loss: 3.153607, ppl: 23.420393 +epoch: 0, batch: 33815, sum loss: 3954.865234, avg loss: 2.853438, ppl: 17.347322 +epoch: 0, batch: 33816, sum loss: 5266.093750, avg loss: 3.127134, ppl: 22.808519 +epoch: 0, batch: 33817, sum loss: 4466.742188, avg loss: 2.728615, ppl: 15.311663 +epoch: 0, batch: 33818, sum loss: 5972.729980, avg loss: 3.287138, ppl: 26.766155 +epoch: 0, batch: 33819, sum loss: 4562.237305, avg loss: 2.880200, ppl: 17.817842 +epoch: 0, batch: 33820, sum loss: 4985.194824, avg loss: 2.830889, ppl: 16.960524 +epoch: 0, batch: 33821, sum loss: 5085.347656, avg loss: 2.806483, ppl: 16.551609 +epoch: 0, batch: 33822, sum loss: 5101.733887, avg loss: 3.040366, ppl: 20.912895 +epoch: 0, batch: 33823, sum loss: 4836.025391, avg loss: 2.882018, ppl: 17.850252 +epoch: 0, batch: 33824, sum loss: 3951.634277, avg loss: 2.706599, ppl: 14.978248 +epoch: 0, batch: 33825, sum loss: 4512.538086, avg loss: 2.834509, ppl: 17.022039 +epoch: 0, batch: 33826, sum loss: 4951.878418, avg loss: 2.751044, ppl: 15.658964 +epoch: 0, batch: 33827, sum loss: 4204.711426, avg loss: 2.651142, ppl: 14.170214 +epoch: 0, batch: 33828, sum loss: 4573.182617, avg loss: 2.844019, ppl: 17.184690 +epoch: 0, batch: 33829, sum loss: 5424.893555, avg loss: 3.266041, ppl: 26.207367 +epoch: 0, batch: 33830, sum loss: 5430.252930, avg loss: 3.047280, ppl: 21.057991 +epoch: 0, batch: 33831, sum loss: 5115.412598, avg loss: 2.748744, ppl: 15.622997 +epoch: 0, batch: 33832, sum loss: 5127.890137, avg loss: 2.837792, ppl: 17.078014 +epoch: 0, batch: 33833, sum loss: 5022.508789, avg loss: 3.062505, ppl: 21.381056 +epoch: 0, batch: 33834, sum loss: 4360.620605, avg loss: 2.907080, ppl: 18.303282 +epoch: 0, batch: 33835, sum loss: 5290.167480, avg loss: 3.289905, ppl: 26.840315 +epoch: 0, batch: 33836, sum loss: 4870.301758, avg loss: 2.923350, ppl: 18.603512 +epoch: 0, batch: 33837, sum loss: 4942.726562, avg loss: 3.148233, ppl: 23.294876 +epoch: 0, batch: 33838, sum loss: 5705.467773, avg loss: 3.013982, ppl: 20.368341 +epoch: 0, batch: 33839, sum loss: 4124.502930, avg loss: 2.688724, ppl: 14.712891 +epoch: 0, batch: 33840, sum loss: 5472.317871, avg loss: 3.217118, ppl: 24.956093 +epoch: 0, batch: 33841, sum loss: 5734.291016, avg loss: 3.199939, ppl: 24.531040 +epoch: 0, batch: 33842, sum loss: 4618.965820, avg loss: 2.757592, ppl: 15.761838 +epoch: 0, batch: 33843, sum loss: 6204.633301, avg loss: 3.128912, ppl: 22.849117 +epoch: 0, batch: 33844, sum loss: 4406.060059, avg loss: 2.921790, ppl: 18.574512 +epoch: 0, batch: 33845, sum loss: 4882.766602, avg loss: 3.001086, ppl: 20.107357 +epoch: 0, batch: 33846, sum loss: 5018.453125, avg loss: 3.043331, ppl: 20.974998 +epoch: 0, batch: 33847, sum loss: 6026.750488, avg loss: 3.063930, ppl: 21.411541 +epoch: 0, batch: 33848, sum loss: 4588.974121, avg loss: 2.823984, ppl: 16.843821 +epoch: 0, batch: 33849, sum loss: 5790.471680, avg loss: 2.990946, ppl: 19.904505 +epoch: 0, batch: 33850, sum loss: 4767.848145, avg loss: 2.870468, ppl: 17.645281 +epoch: 0, batch: 33851, sum loss: 5115.693359, avg loss: 3.115526, ppl: 22.545284 +epoch: 0, batch: 33852, sum loss: 5446.627441, avg loss: 3.168486, ppl: 23.771469 +epoch: 0, batch: 33853, sum loss: 5492.588867, avg loss: 3.104912, ppl: 22.307251 +epoch: 0, batch: 33854, sum loss: 5326.017578, avg loss: 3.066216, ppl: 21.460546 +epoch: 0, batch: 33855, sum loss: 5087.617188, avg loss: 2.900580, ppl: 18.184689 +epoch: 0, batch: 33856, sum loss: 6169.274902, avg loss: 3.107947, ppl: 22.375063 +epoch: 0, batch: 33857, sum loss: 5013.269531, avg loss: 3.088891, ppl: 21.952709 +epoch: 0, batch: 33858, sum loss: 5242.244629, avg loss: 3.142833, ppl: 23.169401 +epoch: 0, batch: 33859, sum loss: 4797.127441, avg loss: 2.835182, ppl: 17.033495 +epoch: 0, batch: 33860, sum loss: 5782.106934, avg loss: 3.317330, ppl: 27.586605 +epoch: 0, batch: 33861, sum loss: 4454.542969, avg loss: 3.038570, ppl: 20.875364 +epoch: 0, batch: 33862, sum loss: 4804.750488, avg loss: 2.767713, ppl: 15.922183 +epoch: 0, batch: 33863, sum loss: 4286.719238, avg loss: 2.827651, ppl: 16.905706 +epoch: 0, batch: 33864, sum loss: 5588.372070, avg loss: 3.208021, ppl: 24.730089 +epoch: 0, batch: 33865, sum loss: 5827.885742, avg loss: 3.151912, ppl: 23.380732 +epoch: 0, batch: 33866, sum loss: 5366.575684, avg loss: 2.999763, ppl: 20.080772 +epoch: 0, batch: 33867, sum loss: 4861.845703, avg loss: 3.112577, ppl: 22.478909 +epoch: 0, batch: 33868, sum loss: 4725.976562, avg loss: 2.933567, ppl: 18.794556 +epoch: 0, batch: 33869, sum loss: 4956.394043, avg loss: 2.886659, ppl: 17.933302 +epoch: 0, batch: 33870, sum loss: 5167.316895, avg loss: 3.118477, ppl: 22.611923 +epoch: 0, batch: 33871, sum loss: 5288.877441, avg loss: 2.949737, ppl: 19.100922 +epoch: 0, batch: 33872, sum loss: 5180.445312, avg loss: 3.067167, ppl: 21.480967 +epoch: 0, batch: 33873, sum loss: 5655.991699, avg loss: 3.199090, ppl: 24.510227 +epoch: 0, batch: 33874, sum loss: 5790.568359, avg loss: 3.236763, ppl: 25.451191 +epoch: 0, batch: 33875, sum loss: 5195.486328, avg loss: 2.943618, ppl: 18.984413 +epoch: 0, batch: 33876, sum loss: 4872.541016, avg loss: 2.859473, ppl: 17.452318 +epoch: 0, batch: 33877, sum loss: 4774.250977, avg loss: 2.826673, ppl: 16.889181 +epoch: 0, batch: 33878, sum loss: 5267.577637, avg loss: 3.027344, ppl: 20.642324 +epoch: 0, batch: 33879, sum loss: 4958.589355, avg loss: 2.856330, ppl: 17.397564 +epoch: 0, batch: 33880, sum loss: 5142.879883, avg loss: 2.938788, ppl: 18.892942 +epoch: 0, batch: 33881, sum loss: 4693.054688, avg loss: 2.895160, ppl: 18.086399 +epoch: 0, batch: 33882, sum loss: 5588.744629, avg loss: 2.991833, ppl: 19.922171 +epoch: 0, batch: 33883, sum loss: 4795.492188, avg loss: 3.068133, ppl: 21.501724 +epoch: 0, batch: 33884, sum loss: 4394.523926, avg loss: 2.826061, ppl: 16.878836 +epoch: 0, batch: 33885, sum loss: 6566.936035, avg loss: 3.230170, ppl: 25.283955 +epoch: 0, batch: 33886, sum loss: 4354.557129, avg loss: 2.833154, ppl: 16.998987 +epoch: 0, batch: 33887, sum loss: 5492.426758, avg loss: 3.063261, ppl: 21.397221 +epoch: 0, batch: 33888, sum loss: 5364.038574, avg loss: 3.157174, ppl: 23.504082 +epoch: 0, batch: 33889, sum loss: 5058.816895, avg loss: 3.052997, ppl: 21.178730 +epoch: 0, batch: 33890, sum loss: 5359.070801, avg loss: 3.176687, ppl: 23.967218 +epoch: 0, batch: 33891, sum loss: 5158.726562, avg loss: 3.043496, ppl: 20.978464 +epoch: 0, batch: 33892, sum loss: 5509.362305, avg loss: 2.864983, ppl: 17.548752 +epoch: 0, batch: 33893, sum loss: 6018.536621, avg loss: 3.121648, ppl: 22.683727 +epoch: 0, batch: 33894, sum loss: 4776.342285, avg loss: 3.113652, ppl: 22.503075 +epoch: 0, batch: 33895, sum loss: 4742.754395, avg loss: 2.651064, ppl: 14.169112 +epoch: 0, batch: 33896, sum loss: 4750.721680, avg loss: 2.822770, ppl: 16.823385 +epoch: 0, batch: 33897, sum loss: 4817.382812, avg loss: 2.855592, ppl: 17.384718 +epoch: 0, batch: 33898, sum loss: 5526.766602, avg loss: 3.048410, ppl: 21.081791 +epoch: 0, batch: 33899, sum loss: 5560.322754, avg loss: 3.215918, ppl: 24.926170 +epoch: 0, batch: 33900, sum loss: 5372.591309, avg loss: 3.145545, ppl: 23.232340 +epoch: 0, batch: 33901, sum loss: 5489.144043, avg loss: 3.088995, ppl: 21.955002 +epoch: 0, batch: 33902, sum loss: 6249.708008, avg loss: 3.172441, ppl: 23.865664 +epoch: 0, batch: 33903, sum loss: 4581.516113, avg loss: 2.774995, ppl: 16.038544 +epoch: 0, batch: 33904, sum loss: 4559.204102, avg loss: 2.700950, ppl: 14.893880 +epoch: 0, batch: 33905, sum loss: 3527.215088, avg loss: 2.427540, ppl: 11.330969 +epoch: 0, batch: 33906, sum loss: 5216.490723, avg loss: 3.140572, ppl: 23.117094 +epoch: 0, batch: 33907, sum loss: 5245.733887, avg loss: 3.137401, ppl: 23.043896 +epoch: 0, batch: 33908, sum loss: 4521.336914, avg loss: 2.715518, ppl: 15.112432 +epoch: 0, batch: 33909, sum loss: 5186.670898, avg loss: 2.933638, ppl: 18.795879 +epoch: 0, batch: 33910, sum loss: 3866.889404, avg loss: 2.689075, ppl: 14.718052 +epoch: 0, batch: 33911, sum loss: 5549.647461, avg loss: 3.319167, ppl: 27.637329 +epoch: 0, batch: 33912, sum loss: 5034.807129, avg loss: 3.190625, ppl: 24.303623 +epoch: 0, batch: 33913, sum loss: 4710.453613, avg loss: 3.086798, ppl: 21.906818 +epoch: 0, batch: 33914, sum loss: 5493.584961, avg loss: 3.114277, ppl: 22.517147 +epoch: 0, batch: 33915, sum loss: 4726.657227, avg loss: 2.963422, ppl: 19.364115 +epoch: 0, batch: 33916, sum loss: 4634.051758, avg loss: 2.677095, ppl: 14.542788 +epoch: 0, batch: 33917, sum loss: 5237.985840, avg loss: 3.178390, ppl: 24.008059 +epoch: 0, batch: 33918, sum loss: 5295.797852, avg loss: 2.993668, ppl: 19.958750 +epoch: 0, batch: 33919, sum loss: 4689.015625, avg loss: 2.840106, ppl: 17.117588 +epoch: 0, batch: 33920, sum loss: 5301.469727, avg loss: 3.039834, ppl: 20.901764 +epoch: 0, batch: 33921, sum loss: 5216.820801, avg loss: 3.101558, ppl: 22.232567 +epoch: 0, batch: 33922, sum loss: 5935.575195, avg loss: 3.119062, ppl: 22.625151 +epoch: 0, batch: 33923, sum loss: 6073.335449, avg loss: 3.148437, ppl: 23.299625 +epoch: 0, batch: 33924, sum loss: 4797.405762, avg loss: 2.859002, ppl: 17.444111 +epoch: 0, batch: 33925, sum loss: 4850.641602, avg loss: 2.901102, ppl: 18.194178 +epoch: 0, batch: 33926, sum loss: 5075.825684, avg loss: 2.805874, ppl: 16.541525 +epoch: 0, batch: 33927, sum loss: 5929.331543, avg loss: 3.081773, ppl: 21.797020 +epoch: 0, batch: 33928, sum loss: 5024.859863, avg loss: 2.945404, ppl: 19.018351 +epoch: 0, batch: 33929, sum loss: 4876.239746, avg loss: 2.978766, ppl: 19.663536 +epoch: 0, batch: 33930, sum loss: 4762.665527, avg loss: 2.858743, ppl: 17.439587 +epoch: 0, batch: 33931, sum loss: 4807.412109, avg loss: 2.693228, ppl: 14.779310 +epoch: 0, batch: 33932, sum loss: 5564.748535, avg loss: 3.117506, ppl: 22.589975 +epoch: 0, batch: 33933, sum loss: 4878.265137, avg loss: 2.871257, ppl: 17.659199 +epoch: 0, batch: 33934, sum loss: 4646.283691, avg loss: 2.598593, ppl: 13.444804 +epoch: 0, batch: 33935, sum loss: 4969.585449, avg loss: 3.058206, ppl: 21.289337 +epoch: 0, batch: 33936, sum loss: 4200.954102, avg loss: 2.667272, ppl: 14.400636 +epoch: 0, batch: 33937, sum loss: 4686.286621, avg loss: 2.742122, ppl: 15.519886 +epoch: 0, batch: 33938, sum loss: 5215.739258, avg loss: 2.994110, ppl: 19.967573 +epoch: 0, batch: 33939, sum loss: 5118.613281, avg loss: 2.948510, ppl: 19.077505 +epoch: 0, batch: 33940, sum loss: 3609.704590, avg loss: 2.382643, ppl: 10.833503 +epoch: 0, batch: 33941, sum loss: 4673.371582, avg loss: 3.018974, ppl: 20.470280 +epoch: 0, batch: 33942, sum loss: 4563.325684, avg loss: 2.836125, ppl: 17.049576 +epoch: 0, batch: 33943, sum loss: 4940.809570, avg loss: 2.951499, ppl: 19.134619 +epoch: 0, batch: 33944, sum loss: 6897.486816, avg loss: 3.392763, ppl: 29.748022 +epoch: 0, batch: 33945, sum loss: 4981.007324, avg loss: 2.875870, ppl: 17.740856 +epoch: 0, batch: 33946, sum loss: 5119.045410, avg loss: 2.921830, ppl: 18.575243 +epoch: 0, batch: 33947, sum loss: 5313.965820, avg loss: 3.113044, ppl: 22.489393 +epoch: 0, batch: 33948, sum loss: 4410.064453, avg loss: 3.062545, ppl: 21.381901 +epoch: 0, batch: 33949, sum loss: 4866.830078, avg loss: 2.777871, ppl: 16.084742 +epoch: 0, batch: 33950, sum loss: 5004.164551, avg loss: 3.058780, ppl: 21.301563 +epoch: 0, batch: 33951, sum loss: 4340.583496, avg loss: 2.844419, ppl: 17.191572 +epoch: 0, batch: 33952, sum loss: 4422.228027, avg loss: 2.719697, ppl: 15.175731 +epoch: 0, batch: 33953, sum loss: 5156.302246, avg loss: 2.982245, ppl: 19.732075 +epoch: 0, batch: 33954, sum loss: 5564.887207, avg loss: 2.936616, ppl: 18.851942 +epoch: 0, batch: 33955, sum loss: 5504.360352, avg loss: 2.920085, ppl: 18.542868 +epoch: 0, batch: 33956, sum loss: 5194.585938, avg loss: 2.964947, ppl: 19.393665 +epoch: 0, batch: 33957, sum loss: 4700.513672, avg loss: 2.752057, ppl: 15.674843 +epoch: 0, batch: 33958, sum loss: 5066.889160, avg loss: 2.927145, ppl: 18.674248 +epoch: 0, batch: 33959, sum loss: 4970.015625, avg loss: 2.914965, ppl: 18.448174 +epoch: 0, batch: 33960, sum loss: 3455.076172, avg loss: 2.440026, ppl: 11.473334 +epoch: 0, batch: 33961, sum loss: 4590.918457, avg loss: 2.907485, ppl: 18.310684 +epoch: 0, batch: 33962, sum loss: 5682.993164, avg loss: 3.183750, ppl: 24.137096 +epoch: 0, batch: 33963, sum loss: 4869.869629, avg loss: 2.831320, ppl: 16.967836 +epoch: 0, batch: 33964, sum loss: 6591.286133, avg loss: 3.298942, ppl: 27.083965 +epoch: 0, batch: 33965, sum loss: 4833.604492, avg loss: 2.927683, ppl: 18.684292 +epoch: 0, batch: 33966, sum loss: 5112.062012, avg loss: 3.132391, ppl: 22.928736 +epoch: 0, batch: 33967, sum loss: 4407.699219, avg loss: 2.818222, ppl: 16.747049 +epoch: 0, batch: 33968, sum loss: 5738.800293, avg loss: 2.916057, ppl: 18.468330 +epoch: 0, batch: 33969, sum loss: 4968.309082, avg loss: 2.959088, ppl: 19.280382 +epoch: 0, batch: 33970, sum loss: 5075.208984, avg loss: 2.903438, ppl: 18.236732 +epoch: 0, batch: 33971, sum loss: 4800.683594, avg loss: 2.776567, ppl: 16.063776 +epoch: 0, batch: 33972, sum loss: 6104.857422, avg loss: 3.189581, ppl: 24.278244 +epoch: 0, batch: 33973, sum loss: 5645.672363, avg loss: 2.963608, ppl: 19.367716 +epoch: 0, batch: 33974, sum loss: 4655.187500, avg loss: 2.875348, ppl: 17.731586 +epoch: 0, batch: 33975, sum loss: 4840.334961, avg loss: 2.783401, ppl: 16.173943 +epoch: 0, batch: 33976, sum loss: 5340.541504, avg loss: 2.990225, ppl: 19.890154 +epoch: 0, batch: 33977, sum loss: 5344.173340, avg loss: 2.884065, ppl: 17.886843 +epoch: 0, batch: 33978, sum loss: 5229.067871, avg loss: 2.905038, ppl: 18.265936 +epoch: 0, batch: 33979, sum loss: 4663.109863, avg loss: 2.938318, ppl: 18.884048 +epoch: 0, batch: 33980, sum loss: 5754.916016, avg loss: 2.837730, ppl: 17.076956 +epoch: 0, batch: 33981, sum loss: 5150.315430, avg loss: 2.845478, ppl: 17.209787 +epoch: 0, batch: 33982, sum loss: 4178.480469, avg loss: 2.787512, ppl: 16.240564 +epoch: 0, batch: 33983, sum loss: 5184.206543, avg loss: 2.902691, ppl: 18.223124 +epoch: 0, batch: 33984, sum loss: 5544.114258, avg loss: 3.197298, ppl: 24.466322 +epoch: 0, batch: 33985, sum loss: 5794.646973, avg loss: 2.898773, ppl: 18.151859 +epoch: 0, batch: 33986, sum loss: 5740.190918, avg loss: 2.808313, ppl: 16.581915 +epoch: 0, batch: 33987, sum loss: 4787.532227, avg loss: 2.984746, ppl: 19.781477 +epoch: 0, batch: 33988, sum loss: 4891.613281, avg loss: 3.162000, ppl: 23.617777 +epoch: 0, batch: 33989, sum loss: 4325.057129, avg loss: 2.815793, ppl: 16.706411 +epoch: 0, batch: 33990, sum loss: 4102.357910, avg loss: 2.695373, ppl: 14.811047 +epoch: 0, batch: 33991, sum loss: 5063.661133, avg loss: 2.847954, ppl: 17.252439 +epoch: 0, batch: 33992, sum loss: 6899.657227, avg loss: 3.420752, ppl: 30.592421 +epoch: 0, batch: 33993, sum loss: 4494.736328, avg loss: 2.909214, ppl: 18.342384 +epoch: 0, batch: 33994, sum loss: 4220.385742, avg loss: 2.666068, ppl: 14.383301 +epoch: 0, batch: 33995, sum loss: 5391.636719, avg loss: 2.936621, ppl: 18.852041 +epoch: 0, batch: 33996, sum loss: 5388.477539, avg loss: 2.947745, ppl: 19.062916 +epoch: 0, batch: 33997, sum loss: 5024.854980, avg loss: 2.985653, ppl: 19.799435 +epoch: 0, batch: 33998, sum loss: 4367.529297, avg loss: 2.830544, ppl: 16.954681 +epoch: 0, batch: 33999, sum loss: 5742.661621, avg loss: 3.040054, ppl: 20.906368 +epoch: 0, batch: 34000, sum loss: 4982.583984, avg loss: 3.010625, ppl: 20.300081 +epoch: 0, batch: 34001, sum loss: 5439.522461, avg loss: 3.030375, ppl: 20.704990 +epoch: 0, batch: 34002, sum loss: 5532.609375, avg loss: 3.092571, ppl: 22.033649 +epoch: 0, batch: 34003, sum loss: 5720.524414, avg loss: 3.013975, ppl: 20.368206 +epoch: 0, batch: 34004, sum loss: 4082.913086, avg loss: 2.825545, ppl: 16.870142 +epoch: 0, batch: 34005, sum loss: 4951.718750, avg loss: 2.974005, ppl: 19.570147 +epoch: 0, batch: 34006, sum loss: 5421.826660, avg loss: 2.933889, ppl: 18.800602 +epoch: 0, batch: 34007, sum loss: 5005.084961, avg loss: 3.015111, ppl: 20.391363 +epoch: 0, batch: 34008, sum loss: 5084.902344, avg loss: 2.907320, ppl: 18.307665 +epoch: 0, batch: 34009, sum loss: 4553.812988, avg loss: 2.743261, ppl: 15.537568 +epoch: 0, batch: 34010, sum loss: 5456.016602, avg loss: 3.211311, ppl: 24.811584 +epoch: 0, batch: 34011, sum loss: 6740.373535, avg loss: 3.164495, ppl: 23.676779 +epoch: 0, batch: 34012, sum loss: 5741.861328, avg loss: 3.214928, ppl: 24.901503 +epoch: 0, batch: 34013, sum loss: 4899.378906, avg loss: 2.912829, ppl: 18.408806 +epoch: 0, batch: 34014, sum loss: 5982.594727, avg loss: 3.305301, ppl: 27.256750 +epoch: 0, batch: 34015, sum loss: 5162.380859, avg loss: 2.856879, ppl: 17.407114 +epoch: 0, batch: 34016, sum loss: 6000.581055, avg loss: 3.133463, ppl: 22.953321 +epoch: 0, batch: 34017, sum loss: 5227.976074, avg loss: 2.875674, ppl: 17.737383 +epoch: 0, batch: 34018, sum loss: 5188.268555, avg loss: 3.037628, ppl: 20.855713 +epoch: 0, batch: 34019, sum loss: 4305.188477, avg loss: 2.856794, ppl: 17.405632 +epoch: 0, batch: 34020, sum loss: 5246.069336, avg loss: 2.957198, ppl: 19.243973 +epoch: 0, batch: 34021, sum loss: 5083.507812, avg loss: 3.044017, ppl: 20.989380 +epoch: 0, batch: 34022, sum loss: 5084.083984, avg loss: 3.134454, ppl: 22.976082 +epoch: 0, batch: 34023, sum loss: 5139.407715, avg loss: 3.001990, ppl: 20.125553 +epoch: 0, batch: 34024, sum loss: 4695.021484, avg loss: 2.943587, ppl: 18.983820 +epoch: 0, batch: 34025, sum loss: 4598.486816, avg loss: 2.976367, ppl: 19.616421 +epoch: 0, batch: 34026, sum loss: 4585.635254, avg loss: 2.823667, ppl: 16.838486 +epoch: 0, batch: 34027, sum loss: 4986.216797, avg loss: 2.880541, ppl: 17.823915 +epoch: 0, batch: 34028, sum loss: 4710.996582, avg loss: 3.025688, ppl: 20.608181 +epoch: 0, batch: 34029, sum loss: 4930.241211, avg loss: 2.954009, ppl: 19.182709 +epoch: 0, batch: 34030, sum loss: 5027.397949, avg loss: 2.788352, ppl: 16.254204 +epoch: 0, batch: 34031, sum loss: 5325.973633, avg loss: 2.781187, ppl: 16.138170 +epoch: 0, batch: 34032, sum loss: 5441.464844, avg loss: 2.957318, ppl: 19.246281 +epoch: 0, batch: 34033, sum loss: 5403.064453, avg loss: 3.146805, ppl: 23.261631 +epoch: 0, batch: 34034, sum loss: 4990.941406, avg loss: 2.848711, ppl: 17.265512 +epoch: 0, batch: 34035, sum loss: 5228.928711, avg loss: 2.977750, ppl: 19.643570 +epoch: 0, batch: 34036, sum loss: 4934.227539, avg loss: 3.187485, ppl: 24.227430 +epoch: 0, batch: 34037, sum loss: 4725.889160, avg loss: 2.755620, ppl: 15.730799 +epoch: 0, batch: 34038, sum loss: 4579.436035, avg loss: 2.711330, ppl: 15.049276 +epoch: 0, batch: 34039, sum loss: 3866.686523, avg loss: 2.719189, ppl: 15.168012 +epoch: 0, batch: 34040, sum loss: 4104.097656, avg loss: 2.734242, ppl: 15.398074 +epoch: 0, batch: 34041, sum loss: 4043.584473, avg loss: 2.582110, ppl: 13.225016 +epoch: 0, batch: 34042, sum loss: 4616.795410, avg loss: 2.851634, ppl: 17.316053 +epoch: 0, batch: 34043, sum loss: 4002.882812, avg loss: 2.570894, ppl: 13.077517 +epoch: 0, batch: 34044, sum loss: 3839.606445, avg loss: 2.610202, ppl: 13.601792 +epoch: 0, batch: 34045, sum loss: 5037.319336, avg loss: 3.003768, ppl: 20.161366 +epoch: 0, batch: 34046, sum loss: 5086.349121, avg loss: 2.802396, ppl: 16.484100 +epoch: 0, batch: 34047, sum loss: 5591.340820, avg loss: 3.157166, ppl: 23.503885 +epoch: 0, batch: 34048, sum loss: 4387.273438, avg loss: 2.984540, ppl: 19.777397 +epoch: 0, batch: 34049, sum loss: 4497.628418, avg loss: 2.926239, ppl: 18.657320 +epoch: 0, batch: 34050, sum loss: 5090.356934, avg loss: 2.989053, ppl: 19.866856 +epoch: 0, batch: 34051, sum loss: 4440.834473, avg loss: 2.839408, ppl: 17.105639 +epoch: 0, batch: 34052, sum loss: 4770.149902, avg loss: 2.859802, ppl: 17.458075 +epoch: 0, batch: 34053, sum loss: 4090.965576, avg loss: 2.917950, ppl: 18.503315 +epoch: 0, batch: 34054, sum loss: 4640.784668, avg loss: 2.793970, ppl: 16.345789 +epoch: 0, batch: 34055, sum loss: 6079.115723, avg loss: 3.119095, ppl: 22.625891 +epoch: 0, batch: 34056, sum loss: 4671.683594, avg loss: 2.834759, ppl: 17.026304 +epoch: 0, batch: 34057, sum loss: 5204.005859, avg loss: 3.059380, ppl: 21.314344 +epoch: 0, batch: 34058, sum loss: 4904.762695, avg loss: 2.888553, ppl: 17.967291 +epoch: 0, batch: 34059, sum loss: 5167.341797, avg loss: 3.243780, ppl: 25.630424 +epoch: 0, batch: 34060, sum loss: 5264.087891, avg loss: 3.056962, ppl: 21.262852 +epoch: 0, batch: 34061, sum loss: 5585.577637, avg loss: 3.249318, ppl: 25.772760 +epoch: 0, batch: 34062, sum loss: 6535.214844, avg loss: 3.246505, ppl: 25.700367 +epoch: 0, batch: 34063, sum loss: 5085.026367, avg loss: 2.887579, ppl: 17.949795 +epoch: 0, batch: 34064, sum loss: 5162.368164, avg loss: 2.845848, ppl: 17.216154 +epoch: 0, batch: 34065, sum loss: 4937.625000, avg loss: 2.932081, ppl: 18.766651 +epoch: 0, batch: 34066, sum loss: 5008.139648, avg loss: 3.112579, ppl: 22.478945 +epoch: 0, batch: 34067, sum loss: 5000.041016, avg loss: 2.942932, ppl: 18.971386 +epoch: 0, batch: 34068, sum loss: 4925.537598, avg loss: 2.928381, ppl: 18.697344 +epoch: 0, batch: 34069, sum loss: 4937.460938, avg loss: 2.889094, ppl: 17.977009 +epoch: 0, batch: 34070, sum loss: 5158.546387, avg loss: 3.059636, ppl: 21.319798 +epoch: 0, batch: 34071, sum loss: 4991.951172, avg loss: 2.844417, ppl: 17.191526 +epoch: 0, batch: 34072, sum loss: 4666.831055, avg loss: 2.830098, ppl: 16.947117 +epoch: 0, batch: 34073, sum loss: 5618.826660, avg loss: 3.205263, ppl: 24.661995 +epoch: 0, batch: 34074, sum loss: 4728.817871, avg loss: 2.949980, ppl: 19.105568 +epoch: 0, batch: 34075, sum loss: 5791.302734, avg loss: 2.982133, ppl: 19.729864 +epoch: 0, batch: 34076, sum loss: 4567.796387, avg loss: 2.960335, ppl: 19.304434 +epoch: 0, batch: 34077, sum loss: 4467.128906, avg loss: 2.919692, ppl: 18.535583 +epoch: 0, batch: 34078, sum loss: 5223.524414, avg loss: 3.092673, ppl: 22.035898 +epoch: 0, batch: 34079, sum loss: 3373.026611, avg loss: 2.480167, ppl: 11.943254 +epoch: 0, batch: 34080, sum loss: 5010.224121, avg loss: 2.909538, ppl: 18.348316 +epoch: 0, batch: 34081, sum loss: 5097.711914, avg loss: 2.906335, ppl: 18.289646 +epoch: 0, batch: 34082, sum loss: 5915.659180, avg loss: 2.899833, ppl: 18.171106 +epoch: 0, batch: 34083, sum loss: 4712.446777, avg loss: 2.864709, ppl: 17.543949 +epoch: 0, batch: 34084, sum loss: 4901.519531, avg loss: 2.829977, ppl: 16.945065 +epoch: 0, batch: 34085, sum loss: 6209.132812, avg loss: 3.376364, ppl: 29.264166 +epoch: 0, batch: 34086, sum loss: 6280.067383, avg loss: 3.084512, ppl: 21.856808 +epoch: 0, batch: 34087, sum loss: 4215.228516, avg loss: 2.804543, ppl: 16.519529 +epoch: 0, batch: 34088, sum loss: 5244.946289, avg loss: 3.090717, ppl: 21.992838 +epoch: 0, batch: 34089, sum loss: 5416.221680, avg loss: 3.084409, ppl: 21.854542 +epoch: 0, batch: 34090, sum loss: 4749.224121, avg loss: 2.962710, ppl: 19.350338 +epoch: 0, batch: 34091, sum loss: 5457.909180, avg loss: 3.037234, ppl: 20.847496 +epoch: 0, batch: 34092, sum loss: 5489.186035, avg loss: 2.943263, ppl: 18.977674 +epoch: 0, batch: 34093, sum loss: 4153.144531, avg loss: 2.507937, ppl: 12.279576 +epoch: 0, batch: 34094, sum loss: 5109.444336, avg loss: 2.961997, ppl: 19.336544 +epoch: 0, batch: 34095, sum loss: 5526.359375, avg loss: 3.010000, ppl: 20.287390 +epoch: 0, batch: 34096, sum loss: 3948.193359, avg loss: 2.913796, ppl: 18.426607 +epoch: 0, batch: 34097, sum loss: 5426.096191, avg loss: 3.026267, ppl: 20.620110 +epoch: 0, batch: 34098, sum loss: 4919.995117, avg loss: 2.754757, ppl: 15.717216 +epoch: 0, batch: 34099, sum loss: 4678.154297, avg loss: 2.797939, ppl: 16.410793 +epoch: 0, batch: 34100, sum loss: 4278.807129, avg loss: 2.800267, ppl: 16.449030 +epoch: 0, batch: 34101, sum loss: 4825.589355, avg loss: 2.868959, ppl: 17.618671 +epoch: 0, batch: 34102, sum loss: 4682.510742, avg loss: 2.954265, ppl: 19.187622 +epoch: 0, batch: 34103, sum loss: 5095.989746, avg loss: 3.020741, ppl: 20.506481 +epoch: 0, batch: 34104, sum loss: 4828.965820, avg loss: 2.666464, ppl: 14.388997 +epoch: 0, batch: 34105, sum loss: 5687.158203, avg loss: 2.926999, ppl: 18.671507 +epoch: 0, batch: 34106, sum loss: 5073.458008, avg loss: 3.054460, ppl: 21.209730 +epoch: 0, batch: 34107, sum loss: 5012.958984, avg loss: 2.952273, ppl: 19.149429 +epoch: 0, batch: 34108, sum loss: 5086.681641, avg loss: 3.079105, ppl: 21.738941 +epoch: 0, batch: 34109, sum loss: 4833.838379, avg loss: 2.931376, ppl: 18.753408 +epoch: 0, batch: 34110, sum loss: 4763.484863, avg loss: 2.653752, ppl: 14.207246 +epoch: 0, batch: 34111, sum loss: 4828.973145, avg loss: 2.867561, ppl: 17.594059 +epoch: 0, batch: 34112, sum loss: 4684.622070, avg loss: 2.851261, ppl: 17.309597 +epoch: 0, batch: 34113, sum loss: 5031.956543, avg loss: 2.838103, ppl: 17.083328 +epoch: 0, batch: 34114, sum loss: 4663.528320, avg loss: 2.981796, ppl: 19.723198 +epoch: 0, batch: 34115, sum loss: 4563.954102, avg loss: 2.927488, ppl: 18.680653 +epoch: 0, batch: 34116, sum loss: 4876.644043, avg loss: 2.646036, ppl: 14.098042 +epoch: 0, batch: 34117, sum loss: 5244.731934, avg loss: 3.028136, ppl: 20.658693 +epoch: 0, batch: 34118, sum loss: 5701.658203, avg loss: 3.212202, ppl: 24.833700 +epoch: 0, batch: 34119, sum loss: 4492.193848, avg loss: 2.807621, ppl: 16.570450 +epoch: 0, batch: 34120, sum loss: 5611.778320, avg loss: 3.058190, ppl: 21.288986 +epoch: 0, batch: 34121, sum loss: 5862.191895, avg loss: 3.168752, ppl: 23.777802 +epoch: 0, batch: 34122, sum loss: 5650.744141, avg loss: 3.081104, ppl: 21.782427 +epoch: 0, batch: 34123, sum loss: 4267.696289, avg loss: 2.891393, ppl: 18.018394 +epoch: 0, batch: 34124, sum loss: 4620.137695, avg loss: 2.678341, ppl: 14.560915 +epoch: 0, batch: 34125, sum loss: 4966.089844, avg loss: 2.928119, ppl: 18.692436 +epoch: 0, batch: 34126, sum loss: 4435.161621, avg loss: 2.817765, ppl: 16.739388 +epoch: 0, batch: 34127, sum loss: 4714.100098, avg loss: 2.766491, ppl: 15.902728 +epoch: 0, batch: 34128, sum loss: 4678.261230, avg loss: 2.791325, ppl: 16.302612 +epoch: 0, batch: 34129, sum loss: 6300.495605, avg loss: 3.198221, ppl: 24.488930 +epoch: 0, batch: 34130, sum loss: 5631.026367, avg loss: 2.965259, ppl: 19.399723 +epoch: 0, batch: 34131, sum loss: 5619.122070, avg loss: 2.861060, ppl: 17.480049 +epoch: 0, batch: 34132, sum loss: 4583.030273, avg loss: 2.829031, ppl: 16.929052 +epoch: 0, batch: 34133, sum loss: 3894.267334, avg loss: 2.584119, ppl: 13.251611 +epoch: 0, batch: 34134, sum loss: 4850.283691, avg loss: 3.168050, ppl: 23.761095 +epoch: 0, batch: 34135, sum loss: 5107.215820, avg loss: 3.023810, ppl: 20.569521 +epoch: 0, batch: 34136, sum loss: 5119.446289, avg loss: 3.025677, ppl: 20.607960 +epoch: 0, batch: 34137, sum loss: 4595.905273, avg loss: 2.942321, ppl: 18.959803 +epoch: 0, batch: 34138, sum loss: 5328.520996, avg loss: 3.048353, ppl: 21.080601 +epoch: 0, batch: 34139, sum loss: 4859.215332, avg loss: 2.751538, ppl: 15.666702 +epoch: 0, batch: 34140, sum loss: 5236.424805, avg loss: 2.983718, ppl: 19.761147 +epoch: 0, batch: 34141, sum loss: 4331.344238, avg loss: 2.637847, ppl: 13.983062 +epoch: 0, batch: 34142, sum loss: 3947.041016, avg loss: 2.578080, ppl: 13.171829 +epoch: 0, batch: 34143, sum loss: 6130.465820, avg loss: 3.138999, ppl: 23.080763 +epoch: 0, batch: 34144, sum loss: 4642.301758, avg loss: 2.743677, ppl: 15.544038 +epoch: 0, batch: 34145, sum loss: 5246.310547, avg loss: 2.741019, ppl: 15.502774 +epoch: 0, batch: 34146, sum loss: 5128.854492, avg loss: 2.978429, ppl: 19.656908 +epoch: 0, batch: 34147, sum loss: 4215.909180, avg loss: 2.925683, ppl: 18.646957 +epoch: 0, batch: 34148, sum loss: 5480.983887, avg loss: 2.962694, ppl: 19.350035 +epoch: 0, batch: 34149, sum loss: 4842.002930, avg loss: 2.954242, ppl: 19.187174 +epoch: 0, batch: 34150, sum loss: 5698.875488, avg loss: 3.072170, ppl: 21.588699 +epoch: 0, batch: 34151, sum loss: 6034.838867, avg loss: 3.191348, ppl: 24.321186 +epoch: 0, batch: 34152, sum loss: 4165.184082, avg loss: 2.563190, ppl: 12.977151 +epoch: 0, batch: 34153, sum loss: 4707.204102, avg loss: 2.947529, ppl: 19.058807 +epoch: 0, batch: 34154, sum loss: 5394.609375, avg loss: 3.075604, ppl: 21.662962 +epoch: 0, batch: 34155, sum loss: 5145.592285, avg loss: 3.012642, ppl: 20.341063 +epoch: 0, batch: 34156, sum loss: 5633.574707, avg loss: 2.966601, ppl: 19.425772 +epoch: 0, batch: 34157, sum loss: 5113.277832, avg loss: 2.915210, ppl: 18.452679 +epoch: 0, batch: 34158, sum loss: 4224.963867, avg loss: 2.596782, ppl: 13.420478 +epoch: 0, batch: 34159, sum loss: 4273.042969, avg loss: 2.816772, ppl: 16.722778 +epoch: 0, batch: 34160, sum loss: 5352.964844, avg loss: 3.072885, ppl: 21.604132 +epoch: 0, batch: 34161, sum loss: 4761.074219, avg loss: 2.808893, ppl: 16.591549 +epoch: 0, batch: 34162, sum loss: 5875.466797, avg loss: 3.150384, ppl: 23.345032 +epoch: 0, batch: 34163, sum loss: 5631.596680, avg loss: 3.119998, ppl: 22.646339 +epoch: 0, batch: 34164, sum loss: 5310.965332, avg loss: 3.111286, ppl: 22.449900 +epoch: 0, batch: 34165, sum loss: 6214.204102, avg loss: 3.090107, ppl: 21.979424 +epoch: 0, batch: 34166, sum loss: 5084.309570, avg loss: 2.994293, ppl: 19.971239 +epoch: 0, batch: 34167, sum loss: 4614.407227, avg loss: 2.774749, ppl: 16.034599 +epoch: 0, batch: 34168, sum loss: 5169.769531, avg loss: 2.947417, ppl: 19.056667 +epoch: 0, batch: 34169, sum loss: 4740.381836, avg loss: 2.906427, ppl: 18.291321 +epoch: 0, batch: 34170, sum loss: 4087.455078, avg loss: 2.724970, ppl: 15.255958 +epoch: 0, batch: 34171, sum loss: 4724.791016, avg loss: 3.106372, ppl: 22.339840 +epoch: 0, batch: 34172, sum loss: 5296.333984, avg loss: 3.097271, ppl: 22.137461 +epoch: 0, batch: 34173, sum loss: 6423.938965, avg loss: 3.365081, ppl: 28.935837 +epoch: 0, batch: 34174, sum loss: 4640.461426, avg loss: 2.909380, ppl: 18.345425 +epoch: 0, batch: 34175, sum loss: 5393.102539, avg loss: 3.212092, ppl: 24.830976 +epoch: 0, batch: 34176, sum loss: 5112.012207, avg loss: 3.151672, ppl: 23.375118 +epoch: 0, batch: 34177, sum loss: 5081.273926, avg loss: 2.949085, ppl: 19.088484 +epoch: 0, batch: 34178, sum loss: 5310.122070, avg loss: 2.867236, ppl: 17.588343 +epoch: 0, batch: 34179, sum loss: 4957.188477, avg loss: 2.912567, ppl: 18.403975 +epoch: 0, batch: 34180, sum loss: 4864.219727, avg loss: 2.759058, ppl: 15.784970 +epoch: 0, batch: 34181, sum loss: 4723.730957, avg loss: 2.831973, ppl: 16.978928 +epoch: 0, batch: 34182, sum loss: 5379.842773, avg loss: 3.037743, ppl: 20.858110 +epoch: 0, batch: 34183, sum loss: 3946.340820, avg loss: 2.662848, ppl: 14.337067 +epoch: 0, batch: 34184, sum loss: 5464.984863, avg loss: 3.051359, ppl: 21.144068 +epoch: 0, batch: 34185, sum loss: 5829.030273, avg loss: 3.066297, ppl: 21.462276 +epoch: 0, batch: 34186, sum loss: 4977.302734, avg loss: 2.987576, ppl: 19.837547 +epoch: 0, batch: 34187, sum loss: 4746.470215, avg loss: 2.787123, ppl: 16.234241 +epoch: 0, batch: 34188, sum loss: 5115.108887, avg loss: 2.973901, ppl: 19.568098 +epoch: 0, batch: 34189, sum loss: 4323.878418, avg loss: 2.888362, ppl: 17.963863 +epoch: 0, batch: 34190, sum loss: 5051.046387, avg loss: 2.914626, ppl: 18.441908 +epoch: 0, batch: 34191, sum loss: 4828.096680, avg loss: 2.667457, ppl: 14.403293 +epoch: 0, batch: 34192, sum loss: 4823.906250, avg loss: 2.941406, ppl: 18.942465 +epoch: 0, batch: 34193, sum loss: 4635.891602, avg loss: 2.915655, ppl: 18.460903 +epoch: 0, batch: 34194, sum loss: 5640.950684, avg loss: 3.118270, ppl: 22.607244 +epoch: 0, batch: 34195, sum loss: 4984.336914, avg loss: 2.835231, ppl: 17.034340 +epoch: 0, batch: 34196, sum loss: 5364.713379, avg loss: 2.881156, ppl: 17.834887 +epoch: 0, batch: 34197, sum loss: 4738.135254, avg loss: 2.838907, ppl: 17.097065 +epoch: 0, batch: 34198, sum loss: 4675.920898, avg loss: 2.859890, ppl: 17.459614 +epoch: 0, batch: 34199, sum loss: 5024.950684, avg loss: 2.823006, ppl: 16.827356 +epoch: 0, batch: 34200, sum loss: 4807.853516, avg loss: 2.908562, ppl: 18.330423 +epoch: 0, batch: 34201, sum loss: 4316.307617, avg loss: 2.894908, ppl: 18.081837 +epoch: 0, batch: 34202, sum loss: 5029.451660, avg loss: 3.066739, ppl: 21.471766 +epoch: 0, batch: 34203, sum loss: 6268.991211, avg loss: 3.167757, ppl: 23.754145 +epoch: 0, batch: 34204, sum loss: 5793.302734, avg loss: 3.028386, ppl: 20.663857 +epoch: 0, batch: 34205, sum loss: 4423.411133, avg loss: 2.794322, ppl: 16.351532 +epoch: 0, batch: 34206, sum loss: 4548.231445, avg loss: 2.828502, ppl: 16.920099 +epoch: 0, batch: 34207, sum loss: 5303.996582, avg loss: 3.027395, ppl: 20.643396 +epoch: 0, batch: 34208, sum loss: 4202.699219, avg loss: 2.777726, ppl: 16.082407 +epoch: 0, batch: 34209, sum loss: 5431.538086, avg loss: 2.986002, ppl: 19.806343 +epoch: 0, batch: 34210, sum loss: 5149.936035, avg loss: 3.153666, ppl: 23.421761 +epoch: 0, batch: 34211, sum loss: 4705.820312, avg loss: 2.829718, ppl: 16.940678 +epoch: 0, batch: 34212, sum loss: 4741.476562, avg loss: 2.772793, ppl: 16.003273 +epoch: 0, batch: 34213, sum loss: 5514.171875, avg loss: 3.228438, ppl: 25.240192 +epoch: 0, batch: 34214, sum loss: 5425.883301, avg loss: 3.055115, ppl: 21.223616 +epoch: 0, batch: 34215, sum loss: 4914.484863, avg loss: 2.899401, ppl: 18.163265 +epoch: 0, batch: 34216, sum loss: 4735.202148, avg loss: 2.594631, ppl: 13.391651 +epoch: 0, batch: 34217, sum loss: 3955.282227, avg loss: 2.612472, ppl: 13.632707 +epoch: 0, batch: 34218, sum loss: 4535.047852, avg loss: 2.907082, ppl: 18.303312 +epoch: 0, batch: 34219, sum loss: 4379.708984, avg loss: 2.751073, ppl: 15.659431 +epoch: 0, batch: 34220, sum loss: 5168.453125, avg loss: 3.070976, ppl: 21.562943 +epoch: 0, batch: 34221, sum loss: 5711.237305, avg loss: 3.176439, ppl: 23.961271 +epoch: 0, batch: 34222, sum loss: 5887.993652, avg loss: 3.081106, ppl: 21.782480 +epoch: 0, batch: 34223, sum loss: 5467.859863, avg loss: 3.029285, ppl: 20.682444 +epoch: 0, batch: 34224, sum loss: 5639.554688, avg loss: 3.051707, ppl: 21.151421 +epoch: 0, batch: 34225, sum loss: 5036.797852, avg loss: 2.820156, ppl: 16.779461 +epoch: 0, batch: 34226, sum loss: 4709.107422, avg loss: 2.847103, ppl: 17.237766 +epoch: 0, batch: 34227, sum loss: 5400.463379, avg loss: 2.906600, ppl: 18.294497 +epoch: 0, batch: 34228, sum loss: 4540.170410, avg loss: 2.659737, ppl: 14.292524 +epoch: 0, batch: 34229, sum loss: 5063.723633, avg loss: 3.065208, ppl: 21.438925 +epoch: 0, batch: 34230, sum loss: 5246.748047, avg loss: 2.739816, ppl: 15.484138 +epoch: 0, batch: 34231, sum loss: 5552.547852, avg loss: 3.086463, ppl: 21.899492 +epoch: 0, batch: 34232, sum loss: 4568.790039, avg loss: 2.735802, ppl: 15.422113 +epoch: 0, batch: 34233, sum loss: 6183.323242, avg loss: 3.175821, ppl: 23.946474 +epoch: 0, batch: 34234, sum loss: 4325.112305, avg loss: 2.656703, ppl: 14.249228 +epoch: 0, batch: 34235, sum loss: 5246.700195, avg loss: 2.903542, ppl: 18.238628 +epoch: 0, batch: 34236, sum loss: 4895.014648, avg loss: 2.847594, ppl: 17.246246 +epoch: 0, batch: 34237, sum loss: 4529.291992, avg loss: 2.715403, ppl: 15.110696 +epoch: 0, batch: 34238, sum loss: 5218.299805, avg loss: 2.834492, ppl: 17.021751 +epoch: 0, batch: 34239, sum loss: 4866.211914, avg loss: 3.005690, ppl: 20.200146 +epoch: 0, batch: 34240, sum loss: 5877.734863, avg loss: 3.274504, ppl: 26.430119 +epoch: 0, batch: 34241, sum loss: 6088.406250, avg loss: 3.291030, ppl: 26.870537 +epoch: 0, batch: 34242, sum loss: 4922.894531, avg loss: 2.890719, ppl: 18.006250 +epoch: 0, batch: 34243, sum loss: 4680.099121, avg loss: 3.007776, ppl: 20.242327 +epoch: 0, batch: 34244, sum loss: 5450.227539, avg loss: 2.913002, ppl: 18.411997 +epoch: 0, batch: 34245, sum loss: 5177.574219, avg loss: 3.105923, ppl: 22.329824 +epoch: 0, batch: 34246, sum loss: 5053.741699, avg loss: 2.810757, ppl: 16.622499 +epoch: 0, batch: 34247, sum loss: 4193.961426, avg loss: 2.822316, ppl: 16.815750 +epoch: 0, batch: 34248, sum loss: 4703.200195, avg loss: 2.956128, ppl: 19.223402 +epoch: 0, batch: 34249, sum loss: 4083.438721, avg loss: 2.757217, ppl: 15.755939 +epoch: 0, batch: 34250, sum loss: 5333.732422, avg loss: 3.020233, ppl: 20.496075 +epoch: 0, batch: 34251, sum loss: 5688.747559, avg loss: 3.208544, ppl: 24.743023 +epoch: 0, batch: 34252, sum loss: 4959.801758, avg loss: 3.185486, ppl: 24.179039 +epoch: 0, batch: 34253, sum loss: 4261.921875, avg loss: 2.682141, ppl: 14.616351 +epoch: 0, batch: 34254, sum loss: 4491.760742, avg loss: 2.750619, ppl: 15.652317 +epoch: 0, batch: 34255, sum loss: 4809.945312, avg loss: 2.985689, ppl: 19.800148 +epoch: 0, batch: 34256, sum loss: 5120.847656, avg loss: 2.686698, ppl: 14.683119 +epoch: 0, batch: 34257, sum loss: 4902.038574, avg loss: 3.018497, ppl: 20.460506 +epoch: 0, batch: 34258, sum loss: 5231.976562, avg loss: 3.003431, ppl: 20.154566 +epoch: 0, batch: 34259, sum loss: 4503.889648, avg loss: 2.885259, ppl: 17.908213 +epoch: 0, batch: 34260, sum loss: 4963.662109, avg loss: 2.855962, ppl: 17.391161 +epoch: 0, batch: 34261, sum loss: 4967.372070, avg loss: 3.114340, ppl: 22.518559 +epoch: 0, batch: 34262, sum loss: 5133.886719, avg loss: 2.918639, ppl: 18.516077 +epoch: 0, batch: 34263, sum loss: 5033.842285, avg loss: 2.839167, ppl: 17.101507 +epoch: 0, batch: 34264, sum loss: 5242.958008, avg loss: 2.854087, ppl: 17.358587 +epoch: 0, batch: 34265, sum loss: 5833.172852, avg loss: 3.005241, ppl: 20.191080 +epoch: 0, batch: 34266, sum loss: 5277.019531, avg loss: 3.043264, ppl: 20.973583 +epoch: 0, batch: 34267, sum loss: 4583.460938, avg loss: 2.955165, ppl: 19.204889 +epoch: 0, batch: 34268, sum loss: 4276.600098, avg loss: 2.881806, ppl: 17.846474 +epoch: 0, batch: 34269, sum loss: 4541.370605, avg loss: 2.890752, ppl: 18.006838 +epoch: 0, batch: 34270, sum loss: 6018.592285, avg loss: 3.072278, ppl: 21.591032 +epoch: 0, batch: 34271, sum loss: 5867.866211, avg loss: 3.213508, ppl: 24.866173 +epoch: 0, batch: 34272, sum loss: 4642.193359, avg loss: 2.847971, ppl: 17.252748 +epoch: 0, batch: 34273, sum loss: 6077.413574, avg loss: 3.286865, ppl: 26.758842 +epoch: 0, batch: 34274, sum loss: 5559.474609, avg loss: 3.144499, ppl: 23.208052 +epoch: 0, batch: 34275, sum loss: 4551.777832, avg loss: 2.915937, ppl: 18.466110 +epoch: 0, batch: 34276, sum loss: 4640.708984, avg loss: 3.141983, ppl: 23.149734 +epoch: 0, batch: 34277, sum loss: 5983.570801, avg loss: 3.100296, ppl: 22.204517 +epoch: 0, batch: 34278, sum loss: 4851.377930, avg loss: 2.853752, ppl: 17.352766 +epoch: 0, batch: 34279, sum loss: 5269.518555, avg loss: 3.161079, ppl: 23.596041 +epoch: 0, batch: 34280, sum loss: 4354.202148, avg loss: 2.711209, ppl: 15.047462 +epoch: 0, batch: 34281, sum loss: 5592.630371, avg loss: 3.203110, ppl: 24.608950 +epoch: 0, batch: 34282, sum loss: 5227.238281, avg loss: 3.009349, ppl: 20.274189 +epoch: 0, batch: 34283, sum loss: 4929.074219, avg loss: 2.948011, ppl: 19.067993 +epoch: 0, batch: 34284, sum loss: 5188.969727, avg loss: 2.999404, ppl: 20.073578 +epoch: 0, batch: 34285, sum loss: 4334.419922, avg loss: 2.714102, ppl: 15.091045 +epoch: 0, batch: 34286, sum loss: 4910.664062, avg loss: 3.031274, ppl: 20.723625 +epoch: 0, batch: 34287, sum loss: 3996.880371, avg loss: 2.737589, ppl: 15.449697 +epoch: 0, batch: 34288, sum loss: 4536.906738, avg loss: 2.802289, ppl: 16.482340 +epoch: 0, batch: 34289, sum loss: 4639.184570, avg loss: 2.870782, ppl: 17.650822 +epoch: 0, batch: 34290, sum loss: 5556.545410, avg loss: 2.995442, ppl: 19.994198 +epoch: 0, batch: 34291, sum loss: 4070.328613, avg loss: 2.674329, ppl: 14.502616 +epoch: 0, batch: 34292, sum loss: 4192.771973, avg loss: 2.650298, ppl: 14.158258 +epoch: 0, batch: 34293, sum loss: 4872.280762, avg loss: 2.665361, ppl: 14.373143 +epoch: 0, batch: 34294, sum loss: 4400.454102, avg loss: 2.933636, ppl: 18.795847 +epoch: 0, batch: 34295, sum loss: 5396.415527, avg loss: 2.991361, ppl: 19.912767 +epoch: 0, batch: 34296, sum loss: 5444.878418, avg loss: 2.988408, ppl: 19.854042 +epoch: 0, batch: 34297, sum loss: 4714.838867, avg loss: 2.970913, ppl: 19.509727 +epoch: 0, batch: 34298, sum loss: 4201.699707, avg loss: 2.604897, ppl: 13.529839 +epoch: 0, batch: 34299, sum loss: 6349.088867, avg loss: 3.272726, ppl: 26.383169 +epoch: 0, batch: 34300, sum loss: 5169.281738, avg loss: 2.925457, ppl: 18.642735 +epoch: 0, batch: 34301, sum loss: 4671.165527, avg loss: 3.021452, ppl: 20.521076 +epoch: 0, batch: 34302, sum loss: 4918.855469, avg loss: 3.045731, ppl: 21.025396 +epoch: 0, batch: 34303, sum loss: 5294.490723, avg loss: 3.008233, ppl: 20.251591 +epoch: 0, batch: 34304, sum loss: 6129.756348, avg loss: 3.279698, ppl: 26.567759 +epoch: 0, batch: 34305, sum loss: 6684.264160, avg loss: 3.357240, ppl: 28.709835 +epoch: 0, batch: 34306, sum loss: 5609.715332, avg loss: 3.185528, ppl: 24.180059 +epoch: 0, batch: 34307, sum loss: 5988.224121, avg loss: 3.030478, ppl: 20.707123 +epoch: 0, batch: 34308, sum loss: 4377.668945, avg loss: 2.732627, ppl: 15.373221 +epoch: 0, batch: 34309, sum loss: 4353.061035, avg loss: 2.606623, ppl: 13.553210 +epoch: 0, batch: 34310, sum loss: 5381.906250, avg loss: 3.247982, ppl: 25.738348 +epoch: 0, batch: 34311, sum loss: 3823.871094, avg loss: 2.651783, ppl: 14.179297 +epoch: 0, batch: 34312, sum loss: 5086.743164, avg loss: 3.035050, ppl: 20.802011 +epoch: 0, batch: 34313, sum loss: 4772.006836, avg loss: 2.822003, ppl: 16.810486 +epoch: 0, batch: 34314, sum loss: 5240.485840, avg loss: 3.006590, ppl: 20.218330 +epoch: 0, batch: 34315, sum loss: 4809.916016, avg loss: 2.915101, ppl: 18.450668 +epoch: 0, batch: 34316, sum loss: 4985.057617, avg loss: 3.094387, ppl: 22.073709 +epoch: 0, batch: 34317, sum loss: 5811.749512, avg loss: 3.210912, ppl: 24.801683 +epoch: 0, batch: 34318, sum loss: 5976.248047, avg loss: 3.162036, ppl: 23.618633 +epoch: 0, batch: 34319, sum loss: 5560.904297, avg loss: 3.118847, ppl: 22.620287 +epoch: 0, batch: 34320, sum loss: 5699.987305, avg loss: 3.054655, ppl: 21.213873 +epoch: 0, batch: 34321, sum loss: 5297.832520, avg loss: 2.834581, ppl: 17.023268 +epoch: 0, batch: 34322, sum loss: 4796.091797, avg loss: 2.973398, ppl: 19.558275 +epoch: 0, batch: 34323, sum loss: 4672.991211, avg loss: 2.951984, ppl: 19.143900 +epoch: 0, batch: 34324, sum loss: 4314.505371, avg loss: 2.725524, ppl: 15.264417 +epoch: 0, batch: 34325, sum loss: 5095.377930, avg loss: 2.984990, ppl: 19.786306 +epoch: 0, batch: 34326, sum loss: 4314.757324, avg loss: 2.872674, ppl: 17.684252 +epoch: 0, batch: 34327, sum loss: 5081.176758, avg loss: 3.094505, ppl: 22.076303 +epoch: 0, batch: 34328, sum loss: 5596.048340, avg loss: 3.018365, ppl: 20.457809 +epoch: 0, batch: 34329, sum loss: 5952.337891, avg loss: 3.142734, ppl: 23.167120 +epoch: 0, batch: 34330, sum loss: 3969.480957, avg loss: 2.570908, ppl: 13.077688 +epoch: 0, batch: 34331, sum loss: 5032.536133, avg loss: 3.091238, ppl: 22.004309 +epoch: 0, batch: 34332, sum loss: 4591.530273, avg loss: 2.911560, ppl: 18.385458 +epoch: 0, batch: 34333, sum loss: 4559.639648, avg loss: 2.853342, ppl: 17.345648 +epoch: 0, batch: 34334, sum loss: 5529.138672, avg loss: 2.991958, ppl: 19.924660 +epoch: 0, batch: 34335, sum loss: 5226.866211, avg loss: 3.058435, ppl: 21.294214 +epoch: 0, batch: 34336, sum loss: 4983.704590, avg loss: 2.768725, ppl: 15.938298 +epoch: 0, batch: 34337, sum loss: 5065.336914, avg loss: 2.891174, ppl: 18.014452 +epoch: 0, batch: 34338, sum loss: 4942.566406, avg loss: 2.883644, ppl: 17.879313 +epoch: 0, batch: 34339, sum loss: 4472.528809, avg loss: 2.956067, ppl: 19.222219 +epoch: 0, batch: 34340, sum loss: 5625.077148, avg loss: 3.045521, ppl: 21.020987 +epoch: 0, batch: 34341, sum loss: 4966.098145, avg loss: 2.834531, ppl: 17.022415 +epoch: 0, batch: 34342, sum loss: 5626.865723, avg loss: 3.069758, ppl: 21.536684 +epoch: 0, batch: 34343, sum loss: 4632.108398, avg loss: 2.873516, ppl: 17.699144 +epoch: 0, batch: 34344, sum loss: 4929.756836, avg loss: 2.927409, ppl: 18.679173 +epoch: 0, batch: 34345, sum loss: 4654.708984, avg loss: 2.815916, ppl: 16.708471 +epoch: 0, batch: 34346, sum loss: 5805.060059, avg loss: 3.061741, ppl: 21.364714 +epoch: 0, batch: 34347, sum loss: 5476.261230, avg loss: 2.916007, ppl: 18.467400 +epoch: 0, batch: 34348, sum loss: 4054.733887, avg loss: 2.745250, ppl: 15.568505 +epoch: 0, batch: 34349, sum loss: 4783.285156, avg loss: 2.920198, ppl: 18.544968 +epoch: 0, batch: 34350, sum loss: 4822.637695, avg loss: 2.857013, ppl: 17.409447 +epoch: 0, batch: 34351, sum loss: 6451.490234, avg loss: 3.273206, ppl: 26.395842 +epoch: 0, batch: 34352, sum loss: 5421.451172, avg loss: 3.110414, ppl: 22.430325 +epoch: 0, batch: 34353, sum loss: 6371.779297, avg loss: 3.091596, ppl: 22.012184 +epoch: 0, batch: 34354, sum loss: 4924.003906, avg loss: 2.851189, ppl: 17.308355 +epoch: 0, batch: 34355, sum loss: 4883.320312, avg loss: 3.023728, ppl: 20.567823 +epoch: 0, batch: 34356, sum loss: 5672.725586, avg loss: 2.773949, ppl: 16.021778 +epoch: 0, batch: 34357, sum loss: 6165.002930, avg loss: 3.076349, ppl: 21.679102 +epoch: 0, batch: 34358, sum loss: 5497.027344, avg loss: 3.218400, ppl: 24.988108 +epoch: 0, batch: 34359, sum loss: 4480.745605, avg loss: 2.816308, ppl: 16.715021 +epoch: 0, batch: 34360, sum loss: 5455.095703, avg loss: 2.980927, ppl: 19.706070 +epoch: 0, batch: 34361, sum loss: 3051.303223, avg loss: 2.444955, ppl: 11.530026 +epoch: 0, batch: 34362, sum loss: 5438.157227, avg loss: 2.926888, ppl: 18.669432 +epoch: 0, batch: 34363, sum loss: 5026.296875, avg loss: 3.033372, ppl: 20.767136 +epoch: 0, batch: 34364, sum loss: 5511.560547, avg loss: 3.058580, ppl: 21.297287 +epoch: 0, batch: 34365, sum loss: 5229.960449, avg loss: 3.042444, ppl: 20.956392 +epoch: 0, batch: 34366, sum loss: 5227.568848, avg loss: 2.955098, ppl: 19.203611 +epoch: 0, batch: 34367, sum loss: 4572.085938, avg loss: 2.891895, ppl: 18.027435 +epoch: 0, batch: 34368, sum loss: 3943.608154, avg loss: 2.777189, ppl: 16.073774 +epoch: 0, batch: 34369, sum loss: 4236.133789, avg loss: 2.747169, ppl: 15.598403 +epoch: 0, batch: 34370, sum loss: 4577.596191, avg loss: 2.888073, ppl: 17.958673 +epoch: 0, batch: 34371, sum loss: 4710.132324, avg loss: 2.777201, ppl: 16.073961 +epoch: 0, batch: 34372, sum loss: 5949.498535, avg loss: 3.321886, ppl: 27.712576 +epoch: 0, batch: 34373, sum loss: 4469.462402, avg loss: 2.791669, ppl: 16.308222 +epoch: 0, batch: 34374, sum loss: 4627.640137, avg loss: 2.823453, ppl: 16.834885 +epoch: 0, batch: 34375, sum loss: 5431.911621, avg loss: 3.036284, ppl: 20.827698 +epoch: 0, batch: 34376, sum loss: 5220.713379, avg loss: 3.236648, ppl: 25.448278 +epoch: 0, batch: 34377, sum loss: 3950.604736, avg loss: 2.660340, ppl: 14.301148 +epoch: 0, batch: 34378, sum loss: 4206.018555, avg loss: 2.732955, ppl: 15.378270 +epoch: 0, batch: 34379, sum loss: 4679.619141, avg loss: 3.070616, ppl: 21.555182 +epoch: 0, batch: 34380, sum loss: 4986.161621, avg loss: 2.917590, ppl: 18.496660 +epoch: 0, batch: 34381, sum loss: 4271.963867, avg loss: 2.671647, ppl: 14.463773 +epoch: 0, batch: 34382, sum loss: 4770.062500, avg loss: 2.796051, ppl: 16.379827 +epoch: 0, batch: 34383, sum loss: 5165.053223, avg loss: 3.180451, ppl: 24.057611 +epoch: 0, batch: 34384, sum loss: 4932.850098, avg loss: 2.879656, ppl: 17.808140 +epoch: 0, batch: 34385, sum loss: 6067.587891, avg loss: 3.030763, ppl: 20.713034 +epoch: 0, batch: 34386, sum loss: 4256.302734, avg loss: 2.692158, ppl: 14.763508 +epoch: 0, batch: 34387, sum loss: 4550.947266, avg loss: 3.123505, ppl: 22.725903 +epoch: 0, batch: 34388, sum loss: 4917.496094, avg loss: 2.806790, ppl: 16.556688 +epoch: 0, batch: 34389, sum loss: 4743.892578, avg loss: 2.974227, ppl: 19.574495 +epoch: 0, batch: 34390, sum loss: 5111.336914, avg loss: 2.858690, ppl: 17.438660 +epoch: 0, batch: 34391, sum loss: 5384.755371, avg loss: 2.924908, ppl: 18.632511 +epoch: 0, batch: 34392, sum loss: 4910.827148, avg loss: 3.038878, ppl: 20.881805 +epoch: 0, batch: 34393, sum loss: 5364.755859, avg loss: 3.024101, ppl: 20.575504 +epoch: 0, batch: 34394, sum loss: 5300.938477, avg loss: 3.206859, ppl: 24.701385 +epoch: 0, batch: 34395, sum loss: 5372.044434, avg loss: 3.116035, ppl: 22.556770 +epoch: 0, batch: 34396, sum loss: 5254.131348, avg loss: 2.927093, ppl: 18.673264 +epoch: 0, batch: 34397, sum loss: 6084.402344, avg loss: 3.178894, ppl: 24.020163 +epoch: 0, batch: 34398, sum loss: 4648.382812, avg loss: 2.824048, ppl: 16.844902 +epoch: 0, batch: 34399, sum loss: 5261.416992, avg loss: 3.076852, ppl: 21.690016 +epoch: 0, batch: 34400, sum loss: 5593.469238, avg loss: 2.984775, ppl: 19.782057 +epoch: 0, batch: 34401, sum loss: 4666.106445, avg loss: 2.789066, ppl: 16.265814 +epoch: 0, batch: 34402, sum loss: 4647.061523, avg loss: 2.816401, ppl: 16.716579 +epoch: 0, batch: 34403, sum loss: 4697.673340, avg loss: 2.826518, ppl: 16.886564 +epoch: 0, batch: 34404, sum loss: 3875.260742, avg loss: 2.760157, ppl: 15.802325 +epoch: 0, batch: 34405, sum loss: 5676.929688, avg loss: 3.053754, ppl: 21.194767 +epoch: 0, batch: 34406, sum loss: 5424.703125, avg loss: 3.012050, ppl: 20.329041 +epoch: 0, batch: 34407, sum loss: 5878.872070, avg loss: 3.052374, ppl: 21.165529 +epoch: 0, batch: 34408, sum loss: 4759.864746, avg loss: 2.846809, ppl: 17.232706 +epoch: 0, batch: 34409, sum loss: 4620.955566, avg loss: 2.886293, ppl: 17.926739 +epoch: 0, batch: 34410, sum loss: 4528.098633, avg loss: 2.885978, ppl: 17.921078 +epoch: 0, batch: 34411, sum loss: 5314.577637, avg loss: 3.050848, ppl: 21.133263 +epoch: 0, batch: 34412, sum loss: 4173.375977, avg loss: 2.848721, ppl: 17.265680 +epoch: 0, batch: 34413, sum loss: 5022.272461, avg loss: 3.032773, ppl: 20.754711 +epoch: 0, batch: 34414, sum loss: 5308.553223, avg loss: 3.097172, ppl: 22.135269 +epoch: 0, batch: 34415, sum loss: 6244.319824, avg loss: 3.065449, ppl: 21.444088 +epoch: 0, batch: 34416, sum loss: 4065.482422, avg loss: 2.737699, ppl: 15.451384 +epoch: 0, batch: 34417, sum loss: 4211.743652, avg loss: 2.669039, ppl: 14.426099 +epoch: 0, batch: 34418, sum loss: 5942.506836, avg loss: 3.007342, ppl: 20.233540 +epoch: 0, batch: 34419, sum loss: 4254.144531, avg loss: 2.760639, ppl: 15.809941 +epoch: 0, batch: 34420, sum loss: 6959.600098, avg loss: 3.188090, ppl: 24.242077 +epoch: 0, batch: 34421, sum loss: 4718.933594, avg loss: 2.839310, ppl: 17.103966 +epoch: 0, batch: 34422, sum loss: 5146.734863, avg loss: 3.015076, ppl: 20.390644 +epoch: 0, batch: 34423, sum loss: 4572.264648, avg loss: 2.915985, ppl: 18.466995 +epoch: 0, batch: 34424, sum loss: 5440.578125, avg loss: 3.019189, ppl: 20.474678 +epoch: 0, batch: 34425, sum loss: 5211.340332, avg loss: 2.944260, ppl: 18.996601 +epoch: 0, batch: 34426, sum loss: 4757.017578, avg loss: 2.680010, ppl: 14.585237 +epoch: 0, batch: 34427, sum loss: 4894.725098, avg loss: 2.782675, ppl: 16.162197 +epoch: 0, batch: 34428, sum loss: 4848.323242, avg loss: 2.956295, ppl: 19.226601 +epoch: 0, batch: 34429, sum loss: 3702.701660, avg loss: 2.536097, ppl: 12.630280 +epoch: 0, batch: 34430, sum loss: 4697.328613, avg loss: 2.958016, ppl: 19.259716 +epoch: 0, batch: 34431, sum loss: 5363.831055, avg loss: 2.923069, ppl: 18.598274 +epoch: 0, batch: 34432, sum loss: 5132.279297, avg loss: 3.146707, ppl: 23.259346 +epoch: 0, batch: 34433, sum loss: 4852.414062, avg loss: 2.895235, ppl: 18.087757 +epoch: 0, batch: 34434, sum loss: 5474.197754, avg loss: 2.994638, ppl: 19.978121 +epoch: 0, batch: 34435, sum loss: 5870.179199, avg loss: 3.192050, ppl: 24.338263 +epoch: 0, batch: 34436, sum loss: 5710.496582, avg loss: 2.922465, ppl: 18.587046 +epoch: 0, batch: 34437, sum loss: 4552.558594, avg loss: 3.041121, ppl: 20.928696 +epoch: 0, batch: 34438, sum loss: 4567.867188, avg loss: 2.781893, ppl: 16.149555 +epoch: 0, batch: 34439, sum loss: 5153.110840, avg loss: 2.966673, ppl: 19.427176 +epoch: 0, batch: 34440, sum loss: 5819.424805, avg loss: 3.159297, ppl: 23.554031 +epoch: 0, batch: 34441, sum loss: 5316.216797, avg loss: 3.015438, ppl: 20.398020 +epoch: 0, batch: 34442, sum loss: 5656.273438, avg loss: 3.109551, ppl: 22.410984 +epoch: 0, batch: 34443, sum loss: 5520.406250, avg loss: 3.051634, ppl: 21.149881 +epoch: 0, batch: 34444, sum loss: 3888.808594, avg loss: 2.615204, ppl: 13.670006 +epoch: 0, batch: 34445, sum loss: 5672.968750, avg loss: 3.043438, ppl: 20.977243 +epoch: 0, batch: 34446, sum loss: 4882.701660, avg loss: 3.047879, ppl: 21.070601 +epoch: 0, batch: 34447, sum loss: 4209.508301, avg loss: 2.694948, ppl: 14.804745 +epoch: 0, batch: 34448, sum loss: 5875.777832, avg loss: 3.244494, ppl: 25.648722 +epoch: 0, batch: 34449, sum loss: 5182.944336, avg loss: 2.923263, ppl: 18.601879 +epoch: 0, batch: 34450, sum loss: 5017.911621, avg loss: 3.059702, ppl: 21.321205 +epoch: 0, batch: 34451, sum loss: 4517.305664, avg loss: 2.814521, ppl: 16.685175 +epoch: 0, batch: 34452, sum loss: 4482.428711, avg loss: 2.701886, ppl: 14.907821 +epoch: 0, batch: 34453, sum loss: 4824.345215, avg loss: 2.655116, ppl: 14.226634 +epoch: 0, batch: 34454, sum loss: 4929.881836, avg loss: 3.004194, ppl: 20.169943 +epoch: 0, batch: 34455, sum loss: 5662.654297, avg loss: 3.094347, ppl: 22.072815 +epoch: 0, batch: 34456, sum loss: 4472.250977, avg loss: 2.772629, ppl: 16.000648 +epoch: 0, batch: 34457, sum loss: 4189.712891, avg loss: 2.685714, ppl: 14.668664 +epoch: 0, batch: 34458, sum loss: 4675.370605, avg loss: 2.957224, ppl: 19.244469 +epoch: 0, batch: 34459, sum loss: 4983.816895, avg loss: 2.841401, ppl: 17.139755 +epoch: 0, batch: 34460, sum loss: 4580.492188, avg loss: 2.728107, ppl: 15.303892 +epoch: 0, batch: 34461, sum loss: 5338.658203, avg loss: 3.177773, ppl: 23.993256 +epoch: 0, batch: 34462, sum loss: 5067.251465, avg loss: 2.915565, ppl: 18.459234 +epoch: 0, batch: 34463, sum loss: 4916.817871, avg loss: 2.907639, ppl: 18.313515 +epoch: 0, batch: 34464, sum loss: 6001.346191, avg loss: 3.207561, ppl: 24.718723 +epoch: 0, batch: 34465, sum loss: 5872.543945, avg loss: 3.203788, ppl: 24.625643 +epoch: 0, batch: 34466, sum loss: 3371.402588, avg loss: 2.686377, ppl: 14.678393 +epoch: 0, batch: 34467, sum loss: 4553.572266, avg loss: 2.871105, ppl: 17.656513 +epoch: 0, batch: 34468, sum loss: 5068.276367, avg loss: 3.181592, ppl: 24.085072 +epoch: 0, batch: 34469, sum loss: 4160.972168, avg loss: 2.726718, ppl: 15.282654 +epoch: 0, batch: 34470, sum loss: 4638.333496, avg loss: 2.908046, ppl: 18.320965 +epoch: 0, batch: 34471, sum loss: 5049.292480, avg loss: 3.058324, ppl: 21.291840 +epoch: 0, batch: 34472, sum loss: 5081.023438, avg loss: 3.240448, ppl: 25.545176 +epoch: 0, batch: 34473, sum loss: 5027.213867, avg loss: 2.938173, ppl: 18.881319 +epoch: 0, batch: 34474, sum loss: 5226.803711, avg loss: 3.044149, ppl: 20.992157 +epoch: 0, batch: 34475, sum loss: 5199.074707, avg loss: 2.943983, ppl: 18.991344 +epoch: 0, batch: 34476, sum loss: 5260.943848, avg loss: 2.885872, ppl: 17.919180 +epoch: 0, batch: 34477, sum loss: 4608.703125, avg loss: 3.016167, ppl: 20.412897 +epoch: 0, batch: 34478, sum loss: 4637.911621, avg loss: 2.798981, ppl: 16.427896 +epoch: 0, batch: 34479, sum loss: 6437.776855, avg loss: 3.095085, ppl: 22.089119 +epoch: 0, batch: 34480, sum loss: 4805.895996, avg loss: 3.098579, ppl: 22.166430 +epoch: 0, batch: 34481, sum loss: 4786.222168, avg loss: 2.679856, ppl: 14.582991 +epoch: 0, batch: 34482, sum loss: 4562.538086, avg loss: 2.775266, ppl: 16.042900 +epoch: 0, batch: 34483, sum loss: 4269.763184, avg loss: 2.567507, ppl: 13.033286 +epoch: 0, batch: 34484, sum loss: 5218.538086, avg loss: 3.143698, ppl: 23.189457 +epoch: 0, batch: 34485, sum loss: 5111.751953, avg loss: 3.022917, ppl: 20.551147 +epoch: 0, batch: 34486, sum loss: 5631.359375, avg loss: 3.037410, ppl: 20.851179 +epoch: 0, batch: 34487, sum loss: 5102.541504, avg loss: 2.905776, ppl: 18.279415 +epoch: 0, batch: 34488, sum loss: 5045.509277, avg loss: 3.072783, ppl: 21.601936 +epoch: 0, batch: 34489, sum loss: 4044.086670, avg loss: 2.620925, ppl: 13.748431 +epoch: 0, batch: 34490, sum loss: 4445.265625, avg loss: 2.920674, ppl: 18.553789 +epoch: 0, batch: 34491, sum loss: 5690.121094, avg loss: 3.182394, ppl: 24.104397 +epoch: 0, batch: 34492, sum loss: 5193.834961, avg loss: 3.084225, ppl: 21.850531 +epoch: 0, batch: 34493, sum loss: 5469.434570, avg loss: 2.996950, ppl: 20.024378 +epoch: 0, batch: 34494, sum loss: 4674.576660, avg loss: 2.794128, ppl: 16.348373 +epoch: 0, batch: 34495, sum loss: 4037.005371, avg loss: 2.606201, ppl: 13.547485 +epoch: 0, batch: 34496, sum loss: 3703.661133, avg loss: 2.791003, ppl: 16.297358 +epoch: 0, batch: 34497, sum loss: 5088.620117, avg loss: 2.891261, ppl: 18.016020 +epoch: 0, batch: 34498, sum loss: 5121.035156, avg loss: 2.949905, ppl: 19.104141 +epoch: 0, batch: 34499, sum loss: 6430.358887, avg loss: 3.205563, ppl: 24.669380 +epoch: 0, batch: 34500, sum loss: 5044.677734, avg loss: 3.242081, ppl: 25.586905 +epoch: 0, batch: 34501, sum loss: 5963.549805, avg loss: 3.127189, ppl: 22.809774 +epoch: 0, batch: 34502, sum loss: 5942.522461, avg loss: 3.286793, ppl: 26.756929 +epoch: 0, batch: 34503, sum loss: 4648.423828, avg loss: 2.837865, ppl: 17.079268 +epoch: 0, batch: 34504, sum loss: 4772.798828, avg loss: 2.846034, ppl: 17.219351 +epoch: 0, batch: 34505, sum loss: 5744.705078, avg loss: 3.086892, ppl: 21.908871 +epoch: 0, batch: 34506, sum loss: 5692.604492, avg loss: 3.083751, ppl: 21.840170 +epoch: 0, batch: 34507, sum loss: 5035.038086, avg loss: 3.002408, ppl: 20.133957 +epoch: 0, batch: 34508, sum loss: 5121.145996, avg loss: 3.124555, ppl: 22.749771 +epoch: 0, batch: 34509, sum loss: 5007.658203, avg loss: 3.137630, ppl: 23.049187 +epoch: 0, batch: 34510, sum loss: 5097.741699, avg loss: 3.080207, ppl: 21.762899 +epoch: 0, batch: 34511, sum loss: 4705.125488, avg loss: 2.853320, ppl: 17.345280 +epoch: 0, batch: 34512, sum loss: 4167.069336, avg loss: 2.748727, ppl: 15.622725 +epoch: 0, batch: 34513, sum loss: 5508.448242, avg loss: 3.015024, ppl: 20.389574 +epoch: 0, batch: 34514, sum loss: 5899.178711, avg loss: 3.230657, ppl: 25.296261 +epoch: 0, batch: 34515, sum loss: 4452.527344, avg loss: 2.716612, ppl: 15.128980 +epoch: 0, batch: 34516, sum loss: 3452.356445, avg loss: 2.572546, ppl: 13.099130 +epoch: 0, batch: 34517, sum loss: 6665.040527, avg loss: 3.098578, ppl: 22.166403 +epoch: 0, batch: 34518, sum loss: 5715.755371, avg loss: 3.045155, ppl: 21.013285 +epoch: 0, batch: 34519, sum loss: 5617.506836, avg loss: 2.933424, ppl: 18.791864 +epoch: 0, batch: 34520, sum loss: 3673.805664, avg loss: 2.576301, ppl: 13.148417 +epoch: 0, batch: 34521, sum loss: 5137.363770, avg loss: 2.947426, ppl: 19.056839 +epoch: 0, batch: 34522, sum loss: 5266.362793, avg loss: 2.863710, ppl: 17.526432 +epoch: 0, batch: 34523, sum loss: 5801.509277, avg loss: 2.915331, ppl: 18.454927 +epoch: 0, batch: 34524, sum loss: 5526.091797, avg loss: 3.080319, ppl: 21.765343 +epoch: 0, batch: 34525, sum loss: 6229.360352, avg loss: 3.178245, ppl: 24.004585 +epoch: 0, batch: 34526, sum loss: 5136.838867, avg loss: 2.948817, ppl: 19.083361 +epoch: 0, batch: 34527, sum loss: 4712.337891, avg loss: 2.754143, ppl: 15.707570 +epoch: 0, batch: 34528, sum loss: 5736.453613, avg loss: 2.967643, ppl: 19.446032 +epoch: 0, batch: 34529, sum loss: 5087.215332, avg loss: 2.781419, ppl: 16.141911 +epoch: 0, batch: 34530, sum loss: 4958.758301, avg loss: 3.053422, ppl: 21.187735 +epoch: 0, batch: 34531, sum loss: 4720.012207, avg loss: 2.804523, ppl: 16.519194 +epoch: 0, batch: 34532, sum loss: 4728.087891, avg loss: 2.826114, ppl: 16.879734 +epoch: 0, batch: 34533, sum loss: 4625.569336, avg loss: 2.692415, ppl: 14.767300 +epoch: 0, batch: 34534, sum loss: 4928.890625, avg loss: 2.800506, ppl: 16.452969 +epoch: 0, batch: 34535, sum loss: 4058.739746, avg loss: 2.753555, ppl: 15.698338 +epoch: 0, batch: 34536, sum loss: 5995.528320, avg loss: 3.253135, ppl: 25.871325 +epoch: 0, batch: 34537, sum loss: 5236.735352, avg loss: 2.927186, ppl: 18.675001 +epoch: 0, batch: 34538, sum loss: 4875.540039, avg loss: 3.009593, ppl: 20.279140 +epoch: 0, batch: 34539, sum loss: 4696.527832, avg loss: 3.033933, ppl: 20.778788 +epoch: 0, batch: 34540, sum loss: 5675.532227, avg loss: 3.097998, ppl: 22.153553 +epoch: 0, batch: 34541, sum loss: 4487.182617, avg loss: 2.946279, ppl: 19.034988 +epoch: 0, batch: 34542, sum loss: 5133.375000, avg loss: 3.206356, ppl: 24.688946 +epoch: 0, batch: 34543, sum loss: 5243.248535, avg loss: 2.887251, ppl: 17.943920 +epoch: 0, batch: 34544, sum loss: 4392.242188, avg loss: 2.779900, ppl: 16.117411 +epoch: 0, batch: 34545, sum loss: 3969.008057, avg loss: 2.468288, ppl: 11.802229 +epoch: 0, batch: 34546, sum loss: 5422.501953, avg loss: 2.893544, ppl: 18.057194 +epoch: 0, batch: 34547, sum loss: 5142.551758, avg loss: 3.066519, ppl: 21.467035 +epoch: 0, batch: 34548, sum loss: 5516.009766, avg loss: 2.994576, ppl: 19.976896 +epoch: 0, batch: 34549, sum loss: 4368.265625, avg loss: 2.604810, ppl: 13.528648 +epoch: 0, batch: 34550, sum loss: 5512.675293, avg loss: 3.107483, ppl: 22.364685 +epoch: 0, batch: 34551, sum loss: 4662.039551, avg loss: 2.693264, ppl: 14.779836 +epoch: 0, batch: 34552, sum loss: 4763.155762, avg loss: 3.007043, ppl: 20.227497 +epoch: 0, batch: 34553, sum loss: 4476.812988, avg loss: 2.746511, ppl: 15.588150 +epoch: 0, batch: 34554, sum loss: 4158.184082, avg loss: 2.566780, ppl: 13.023825 +epoch: 0, batch: 34555, sum loss: 5607.105469, avg loss: 3.271357, ppl: 26.347063 +epoch: 0, batch: 34556, sum loss: 4949.907715, avg loss: 2.856265, ppl: 17.396431 +epoch: 0, batch: 34557, sum loss: 4251.538086, avg loss: 2.914008, ppl: 18.430527 +epoch: 0, batch: 34558, sum loss: 4917.742188, avg loss: 3.033771, ppl: 20.775419 +epoch: 0, batch: 34559, sum loss: 5075.198730, avg loss: 3.261696, ppl: 26.093752 +epoch: 0, batch: 34560, sum loss: 4916.869141, avg loss: 2.972714, ppl: 19.544897 +epoch: 0, batch: 34561, sum loss: 4720.804199, avg loss: 2.754261, ppl: 15.709435 +epoch: 0, batch: 34562, sum loss: 4944.625000, avg loss: 2.856514, ppl: 17.400753 +epoch: 0, batch: 34563, sum loss: 5414.129395, avg loss: 3.004511, ppl: 20.176355 +epoch: 0, batch: 34564, sum loss: 4248.232422, avg loss: 2.751446, ppl: 15.665268 +epoch: 0, batch: 34565, sum loss: 4025.114258, avg loss: 2.832593, ppl: 16.989452 +epoch: 0, batch: 34566, sum loss: 4545.329102, avg loss: 2.833746, ppl: 17.009066 +epoch: 0, batch: 34567, sum loss: 5420.968750, avg loss: 2.938194, ppl: 18.881720 +epoch: 0, batch: 34568, sum loss: 4802.613770, avg loss: 2.587615, ppl: 13.298021 +epoch: 0, batch: 34569, sum loss: 4557.698730, avg loss: 2.823853, ppl: 16.841618 +epoch: 0, batch: 34570, sum loss: 4348.763184, avg loss: 2.827544, ppl: 16.903898 +epoch: 0, batch: 34571, sum loss: 5089.426758, avg loss: 2.978014, ppl: 19.648766 +epoch: 0, batch: 34572, sum loss: 4867.411621, avg loss: 2.621115, ppl: 13.751054 +epoch: 0, batch: 34573, sum loss: 5218.251953, avg loss: 2.796491, ppl: 16.387037 +epoch: 0, batch: 34574, sum loss: 4734.348145, avg loss: 3.098395, ppl: 22.162359 +epoch: 0, batch: 34575, sum loss: 5214.159180, avg loss: 3.196909, ppl: 24.456827 +epoch: 0, batch: 34576, sum loss: 5139.854980, avg loss: 3.176672, ppl: 23.966852 +epoch: 0, batch: 34577, sum loss: 5347.708984, avg loss: 3.007710, ppl: 20.241005 +epoch: 0, batch: 34578, sum loss: 4858.371094, avg loss: 2.749503, ppl: 15.634854 +epoch: 0, batch: 34579, sum loss: 4828.613281, avg loss: 2.931763, ppl: 18.760672 +epoch: 0, batch: 34580, sum loss: 5945.453125, avg loss: 3.011881, ppl: 20.325594 +epoch: 0, batch: 34581, sum loss: 5076.779297, avg loss: 2.979331, ppl: 19.674641 +epoch: 0, batch: 34582, sum loss: 5632.016602, avg loss: 3.183729, ppl: 24.136595 +epoch: 0, batch: 34583, sum loss: 4802.363770, avg loss: 3.039471, ppl: 20.894180 +epoch: 0, batch: 34584, sum loss: 4781.911133, avg loss: 2.858285, ppl: 17.431610 +epoch: 0, batch: 34585, sum loss: 4679.559570, avg loss: 2.926554, ppl: 18.663200 +epoch: 0, batch: 34586, sum loss: 4667.829590, avg loss: 2.992198, ppl: 19.929449 +epoch: 0, batch: 34587, sum loss: 5477.454102, avg loss: 2.959187, ppl: 19.282280 +epoch: 0, batch: 34588, sum loss: 5564.368164, avg loss: 3.192409, ppl: 24.346998 +epoch: 0, batch: 34589, sum loss: 5136.202148, avg loss: 2.882268, ppl: 17.854729 +epoch: 0, batch: 34590, sum loss: 4990.809082, avg loss: 3.052483, ppl: 21.167831 +epoch: 0, batch: 34591, sum loss: 5977.443848, avg loss: 3.152660, ppl: 23.398230 +epoch: 0, batch: 34592, sum loss: 5198.883789, avg loss: 3.047411, ppl: 21.060751 +epoch: 0, batch: 34593, sum loss: 4680.921875, avg loss: 2.960735, ppl: 19.312159 +epoch: 0, batch: 34594, sum loss: 3923.403564, avg loss: 2.983577, ppl: 19.758362 +epoch: 0, batch: 34595, sum loss: 5131.894531, avg loss: 2.988873, ppl: 19.863279 +epoch: 0, batch: 34596, sum loss: 4474.059082, avg loss: 2.630252, ppl: 13.877268 +epoch: 0, batch: 34597, sum loss: 4468.849121, avg loss: 2.824810, ppl: 16.857735 +epoch: 0, batch: 34598, sum loss: 4753.257324, avg loss: 3.116890, ppl: 22.576057 +epoch: 0, batch: 34599, sum loss: 5524.634277, avg loss: 3.091569, ppl: 22.011597 +epoch: 0, batch: 34600, sum loss: 4833.337891, avg loss: 2.922212, ppl: 18.582340 +epoch: 0, batch: 34601, sum loss: 5375.936523, avg loss: 2.873296, ppl: 17.695246 +epoch: 0, batch: 34602, sum loss: 5885.021973, avg loss: 3.076331, ppl: 21.678726 +epoch: 0, batch: 34603, sum loss: 5987.834961, avg loss: 3.295451, ppl: 26.989595 +epoch: 0, batch: 34604, sum loss: 4648.738281, avg loss: 3.103297, ppl: 22.271248 +epoch: 0, batch: 34605, sum loss: 4740.008789, avg loss: 2.808062, ppl: 16.577761 +epoch: 0, batch: 34606, sum loss: 5747.383301, avg loss: 3.018584, ppl: 20.462292 +epoch: 0, batch: 34607, sum loss: 5598.286133, avg loss: 3.285379, ppl: 26.719114 +epoch: 0, batch: 34608, sum loss: 5219.182617, avg loss: 3.027368, ppl: 20.642836 +epoch: 0, batch: 34609, sum loss: 5342.819336, avg loss: 3.214693, ppl: 24.895643 +epoch: 0, batch: 34610, sum loss: 5444.137207, avg loss: 3.006150, ppl: 20.209444 +epoch: 0, batch: 34611, sum loss: 4348.738770, avg loss: 2.736777, ppl: 15.437152 +epoch: 0, batch: 34612, sum loss: 3622.051758, avg loss: 2.655463, ppl: 14.231577 +epoch: 0, batch: 34613, sum loss: 5153.023438, avg loss: 2.911313, ppl: 18.380913 +epoch: 0, batch: 34614, sum loss: 5308.855469, avg loss: 2.975816, ppl: 19.605616 +epoch: 0, batch: 34615, sum loss: 4653.667969, avg loss: 3.012083, ppl: 20.329704 +epoch: 0, batch: 34616, sum loss: 4332.684570, avg loss: 2.861747, ppl: 17.492056 +epoch: 0, batch: 34617, sum loss: 4416.495605, avg loss: 2.960118, ppl: 19.300245 +epoch: 0, batch: 34618, sum loss: 4880.571777, avg loss: 3.102715, ppl: 22.258312 +epoch: 0, batch: 34619, sum loss: 4673.188965, avg loss: 2.982252, ppl: 19.732210 +epoch: 0, batch: 34620, sum loss: 4241.850586, avg loss: 2.856465, ppl: 17.399906 +epoch: 0, batch: 34621, sum loss: 4088.005127, avg loss: 2.701920, ppl: 14.908332 +epoch: 0, batch: 34622, sum loss: 4460.270996, avg loss: 2.739724, ppl: 15.482714 +epoch: 0, batch: 34623, sum loss: 6170.421875, avg loss: 3.089846, ppl: 21.973686 +epoch: 0, batch: 34624, sum loss: 4888.265137, avg loss: 2.906222, ppl: 18.287575 +epoch: 0, batch: 34625, sum loss: 5134.678223, avg loss: 2.887896, ppl: 17.955484 +epoch: 0, batch: 34626, sum loss: 5464.201172, avg loss: 2.948840, ppl: 19.083811 +epoch: 0, batch: 34627, sum loss: 4985.895508, avg loss: 2.862167, ppl: 17.499413 +epoch: 0, batch: 34628, sum loss: 3935.971436, avg loss: 2.845966, ppl: 17.218189 +epoch: 0, batch: 34629, sum loss: 4894.338867, avg loss: 3.026802, ppl: 20.631149 +epoch: 0, batch: 34630, sum loss: 4593.810547, avg loss: 2.858625, ppl: 17.437536 +epoch: 0, batch: 34631, sum loss: 6662.214355, avg loss: 3.327779, ppl: 27.876368 +epoch: 0, batch: 34632, sum loss: 5079.765625, avg loss: 2.894453, ppl: 18.073618 +epoch: 0, batch: 34633, sum loss: 5312.582031, avg loss: 3.016798, ppl: 20.425789 +epoch: 0, batch: 34634, sum loss: 4818.597656, avg loss: 3.017281, ppl: 20.435652 +epoch: 0, batch: 34635, sum loss: 5835.643066, avg loss: 3.022084, ppl: 20.534031 +epoch: 0, batch: 34636, sum loss: 4403.189453, avg loss: 2.678339, ppl: 14.560887 +epoch: 0, batch: 34637, sum loss: 4699.423828, avg loss: 2.955612, ppl: 19.213486 +epoch: 0, batch: 34638, sum loss: 4540.455566, avg loss: 3.063735, ppl: 21.407370 +epoch: 0, batch: 34639, sum loss: 5026.294922, avg loss: 2.883703, ppl: 17.880365 +epoch: 0, batch: 34640, sum loss: 5338.334473, avg loss: 2.964095, ppl: 19.377151 +epoch: 0, batch: 34641, sum loss: 4494.057617, avg loss: 2.745301, ppl: 15.569300 +epoch: 0, batch: 34642, sum loss: 5430.264648, avg loss: 2.873156, ppl: 17.692766 +epoch: 0, batch: 34643, sum loss: 4738.659668, avg loss: 3.020178, ppl: 20.494946 +epoch: 0, batch: 34644, sum loss: 4466.526855, avg loss: 2.814447, ppl: 16.683947 +epoch: 0, batch: 34645, sum loss: 5382.739746, avg loss: 2.985435, ppl: 19.795116 +epoch: 0, batch: 34646, sum loss: 5110.168945, avg loss: 3.000686, ppl: 20.099329 +epoch: 0, batch: 34647, sum loss: 4566.702637, avg loss: 2.950066, ppl: 19.107222 +epoch: 0, batch: 34648, sum loss: 4545.066895, avg loss: 2.842443, ppl: 17.157635 +epoch: 0, batch: 34649, sum loss: 5502.318359, avg loss: 3.092928, ppl: 22.041515 +epoch: 0, batch: 34650, sum loss: 4287.825684, avg loss: 2.688292, ppl: 14.706536 +epoch: 0, batch: 34651, sum loss: 5186.494141, avg loss: 3.052675, ppl: 21.171898 +epoch: 0, batch: 34652, sum loss: 5552.109863, avg loss: 2.983402, ppl: 19.754900 +epoch: 0, batch: 34653, sum loss: 4613.729492, avg loss: 2.827040, ppl: 16.895380 +epoch: 0, batch: 34654, sum loss: 3732.724365, avg loss: 2.710766, ppl: 15.040787 +epoch: 0, batch: 34655, sum loss: 5319.189941, avg loss: 3.055250, ppl: 21.226490 +epoch: 0, batch: 34656, sum loss: 4568.672363, avg loss: 2.739012, ppl: 15.471695 +epoch: 0, batch: 34657, sum loss: 4065.847168, avg loss: 2.769651, ppl: 15.953060 +epoch: 0, batch: 34658, sum loss: 5328.310547, avg loss: 3.114150, ppl: 22.514286 +epoch: 0, batch: 34659, sum loss: 4332.286133, avg loss: 2.666022, ppl: 14.382646 +epoch: 0, batch: 34660, sum loss: 5444.661133, avg loss: 3.150846, ppl: 23.355804 +epoch: 0, batch: 34661, sum loss: 5394.622070, avg loss: 3.123696, ppl: 22.730227 +epoch: 0, batch: 34662, sum loss: 5052.547852, avg loss: 3.025478, ppl: 20.603849 +epoch: 0, batch: 34663, sum loss: 3853.673828, avg loss: 2.692994, ppl: 14.775844 +epoch: 0, batch: 34664, sum loss: 4713.717773, avg loss: 2.909703, ppl: 18.351339 +epoch: 0, batch: 34665, sum loss: 5410.870117, avg loss: 3.162402, ppl: 23.627291 +epoch: 0, batch: 34666, sum loss: 4780.876953, avg loss: 3.082448, ppl: 21.811733 +epoch: 0, batch: 34667, sum loss: 5204.606934, avg loss: 3.193010, ppl: 24.361654 +epoch: 0, batch: 34668, sum loss: 5431.096680, avg loss: 2.870559, ppl: 17.646872 +epoch: 0, batch: 34669, sum loss: 5065.849609, avg loss: 2.918116, ppl: 18.506390 +epoch: 0, batch: 34670, sum loss: 4102.256836, avg loss: 2.714928, ppl: 15.103528 +epoch: 0, batch: 34671, sum loss: 4227.051270, avg loss: 2.771837, ppl: 15.987977 +epoch: 0, batch: 34672, sum loss: 5154.889160, avg loss: 2.832357, ppl: 16.985447 +epoch: 0, batch: 34673, sum loss: 5324.092285, avg loss: 3.167217, ppl: 23.741325 +epoch: 0, batch: 34674, sum loss: 6314.520508, avg loss: 3.008347, ppl: 20.253893 +epoch: 0, batch: 34675, sum loss: 5735.863770, avg loss: 3.093777, ppl: 22.060251 +epoch: 0, batch: 34676, sum loss: 4965.011719, avg loss: 2.943101, ppl: 18.974604 +epoch: 0, batch: 34677, sum loss: 4683.993652, avg loss: 2.826792, ppl: 16.891178 +epoch: 0, batch: 34678, sum loss: 4564.637207, avg loss: 3.030968, ppl: 20.717272 +epoch: 0, batch: 34679, sum loss: 4929.812500, avg loss: 3.090791, ppl: 21.994478 +epoch: 0, batch: 34680, sum loss: 5118.730469, avg loss: 2.986424, ppl: 19.814697 +epoch: 0, batch: 34681, sum loss: 4705.597656, avg loss: 2.771259, ppl: 15.978740 +epoch: 0, batch: 34682, sum loss: 4586.463867, avg loss: 2.921315, ppl: 18.565680 +epoch: 0, batch: 34683, sum loss: 5465.245117, avg loss: 3.177468, ppl: 23.985945 +epoch: 0, batch: 34684, sum loss: 6158.762207, avg loss: 3.110486, ppl: 22.431938 +epoch: 0, batch: 34685, sum loss: 5722.455078, avg loss: 3.156346, ppl: 23.484629 +epoch: 0, batch: 34686, sum loss: 4909.995117, avg loss: 3.047793, ppl: 21.068802 +epoch: 0, batch: 34687, sum loss: 4053.250000, avg loss: 2.633691, ppl: 13.925071 +epoch: 0, batch: 34688, sum loss: 4645.249023, avg loss: 3.040084, ppl: 20.907007 +epoch: 0, batch: 34689, sum loss: 5390.191895, avg loss: 2.912043, ppl: 18.394346 +epoch: 0, batch: 34690, sum loss: 4193.024902, avg loss: 2.924006, ppl: 18.615717 +epoch: 0, batch: 34691, sum loss: 4759.119141, avg loss: 2.865213, ppl: 17.552794 +epoch: 0, batch: 34692, sum loss: 5231.044434, avg loss: 2.973874, ppl: 19.567579 +epoch: 0, batch: 34693, sum loss: 4200.585938, avg loss: 2.685796, ppl: 14.669881 +epoch: 0, batch: 34694, sum loss: 5717.454590, avg loss: 3.246709, ppl: 25.705601 +epoch: 0, batch: 34695, sum loss: 5459.811523, avg loss: 2.677691, ppl: 14.551455 +epoch: 0, batch: 34696, sum loss: 4877.427246, avg loss: 2.963200, ppl: 19.359825 +epoch: 0, batch: 34697, sum loss: 4801.818359, avg loss: 3.114020, ppl: 22.511351 +epoch: 0, batch: 34698, sum loss: 6225.406250, avg loss: 3.109593, ppl: 22.411930 +epoch: 0, batch: 34699, sum loss: 4544.334473, avg loss: 3.013484, ppl: 20.358210 +epoch: 0, batch: 34700, sum loss: 5201.440918, avg loss: 3.018828, ppl: 20.467293 +epoch: 0, batch: 34701, sum loss: 5783.126465, avg loss: 3.191571, ppl: 24.326614 +epoch: 0, batch: 34702, sum loss: 4461.836914, avg loss: 2.950951, ppl: 19.124125 +epoch: 0, batch: 34703, sum loss: 5176.250000, avg loss: 3.163967, ppl: 23.664289 +epoch: 0, batch: 34704, sum loss: 5056.642578, avg loss: 2.976247, ppl: 19.614059 +epoch: 0, batch: 34705, sum loss: 5167.215820, avg loss: 2.924288, ppl: 18.620955 +epoch: 0, batch: 34706, sum loss: 4437.410645, avg loss: 2.803165, ppl: 16.496780 +epoch: 0, batch: 34707, sum loss: 4340.349609, avg loss: 2.930689, ppl: 18.740532 +epoch: 0, batch: 34708, sum loss: 4474.679199, avg loss: 2.848300, ppl: 17.258417 +epoch: 0, batch: 34709, sum loss: 4545.828613, avg loss: 2.859012, ppl: 17.444277 +epoch: 0, batch: 34710, sum loss: 4840.652832, avg loss: 2.791610, ppl: 16.307261 +epoch: 0, batch: 34711, sum loss: 5027.353027, avg loss: 3.008590, ppl: 20.258810 +epoch: 0, batch: 34712, sum loss: 4107.296875, avg loss: 2.790283, ppl: 16.285631 +epoch: 0, batch: 34713, sum loss: 5786.266602, avg loss: 2.953684, ppl: 19.176468 +epoch: 0, batch: 34714, sum loss: 6227.894531, avg loss: 3.121752, ppl: 22.686081 +epoch: 0, batch: 34715, sum loss: 4598.968262, avg loss: 2.702096, ppl: 14.910959 +epoch: 0, batch: 34716, sum loss: 5013.774414, avg loss: 2.959725, ppl: 19.292667 +epoch: 0, batch: 34717, sum loss: 4901.301758, avg loss: 2.994076, ppl: 19.966898 +epoch: 0, batch: 34718, sum loss: 3675.002686, avg loss: 2.649605, ppl: 14.148453 +epoch: 0, batch: 34719, sum loss: 4565.257812, avg loss: 2.586548, ppl: 13.283841 +epoch: 0, batch: 34720, sum loss: 4463.514160, avg loss: 2.760368, ppl: 15.805653 +epoch: 0, batch: 34721, sum loss: 5622.649414, avg loss: 3.008373, ppl: 20.254419 +epoch: 0, batch: 34722, sum loss: 5807.483887, avg loss: 3.331890, ppl: 27.991192 +epoch: 0, batch: 34723, sum loss: 5105.563965, avg loss: 2.897596, ppl: 18.130510 +epoch: 0, batch: 34724, sum loss: 5220.402832, avg loss: 3.012350, ppl: 20.335133 +epoch: 0, batch: 34725, sum loss: 5019.994141, avg loss: 2.979225, ppl: 19.672564 +epoch: 0, batch: 34726, sum loss: 5022.194336, avg loss: 2.971713, ppl: 19.525330 +epoch: 0, batch: 34727, sum loss: 6411.751953, avg loss: 3.318712, ppl: 27.624754 +epoch: 0, batch: 34728, sum loss: 5148.026855, avg loss: 2.960338, ppl: 19.304489 +epoch: 0, batch: 34729, sum loss: 3946.129395, avg loss: 2.744179, ppl: 15.551841 +epoch: 0, batch: 34730, sum loss: 6100.089844, avg loss: 3.037894, ppl: 20.861254 +epoch: 0, batch: 34731, sum loss: 5271.568848, avg loss: 2.981657, ppl: 19.720463 +epoch: 0, batch: 34732, sum loss: 5998.908691, avg loss: 2.999454, ppl: 20.074583 +epoch: 0, batch: 34733, sum loss: 4555.617188, avg loss: 2.812109, ppl: 16.644993 +epoch: 0, batch: 34734, sum loss: 4772.745605, avg loss: 2.835856, ppl: 17.044985 +epoch: 0, batch: 34735, sum loss: 4788.879883, avg loss: 2.983726, ppl: 19.761307 +epoch: 0, batch: 34736, sum loss: 5705.261719, avg loss: 3.047683, ppl: 21.066471 +epoch: 0, batch: 34737, sum loss: 5682.900879, avg loss: 3.254812, ppl: 25.914736 +epoch: 0, batch: 34738, sum loss: 5076.382812, avg loss: 2.627527, ppl: 13.839509 +epoch: 0, batch: 34739, sum loss: 4632.500977, avg loss: 2.904389, ppl: 18.254093 +epoch: 0, batch: 34740, sum loss: 4917.723633, avg loss: 2.867477, ppl: 17.592579 +epoch: 0, batch: 34741, sum loss: 4978.634766, avg loss: 3.035753, ppl: 20.816647 +epoch: 0, batch: 34742, sum loss: 5048.572266, avg loss: 2.906490, ppl: 18.292473 +epoch: 0, batch: 34743, sum loss: 4345.651367, avg loss: 2.836587, ppl: 17.057453 +epoch: 0, batch: 34744, sum loss: 5086.829590, avg loss: 3.035101, ppl: 20.803087 +epoch: 0, batch: 34745, sum loss: 4802.328613, avg loss: 3.197289, ppl: 24.466118 +epoch: 0, batch: 34746, sum loss: 4480.281738, avg loss: 3.130875, ppl: 22.893995 +epoch: 0, batch: 34747, sum loss: 5036.917969, avg loss: 2.933557, ppl: 18.794369 +epoch: 0, batch: 34748, sum loss: 4274.766113, avg loss: 2.670060, ppl: 14.440838 +epoch: 0, batch: 34749, sum loss: 5148.682129, avg loss: 2.928715, ppl: 18.703590 +epoch: 0, batch: 34750, sum loss: 5054.187500, avg loss: 3.017426, ppl: 20.438606 +epoch: 0, batch: 34751, sum loss: 5245.206055, avg loss: 2.960049, ppl: 19.298912 +epoch: 0, batch: 34752, sum loss: 5720.928711, avg loss: 3.157245, ppl: 23.505758 +epoch: 0, batch: 34753, sum loss: 4433.017578, avg loss: 2.886079, ppl: 17.922901 +epoch: 0, batch: 34754, sum loss: 5594.864746, avg loss: 2.958680, ppl: 19.272522 +epoch: 0, batch: 34755, sum loss: 5014.030273, avg loss: 2.842421, ppl: 17.157249 +epoch: 0, batch: 34756, sum loss: 4876.437988, avg loss: 2.894028, ppl: 18.065941 +epoch: 0, batch: 34757, sum loss: 5259.788574, avg loss: 2.716833, ppl: 15.132320 +epoch: 0, batch: 34758, sum loss: 4176.461914, avg loss: 2.780600, ppl: 16.128700 +epoch: 0, batch: 34759, sum loss: 6027.220703, avg loss: 3.126152, ppl: 22.786131 +epoch: 0, batch: 34760, sum loss: 4127.791504, avg loss: 2.668256, ppl: 14.414805 +epoch: 0, batch: 34761, sum loss: 4737.287109, avg loss: 2.714778, ppl: 15.101253 +epoch: 0, batch: 34762, sum loss: 5041.617188, avg loss: 2.965657, ppl: 19.407454 +epoch: 0, batch: 34763, sum loss: 4714.110352, avg loss: 2.792720, ppl: 16.325357 +epoch: 0, batch: 34764, sum loss: 4997.545898, avg loss: 2.988963, ppl: 19.865074 +epoch: 0, batch: 34765, sum loss: 4985.087891, avg loss: 2.840506, ppl: 17.124426 +epoch: 0, batch: 34766, sum loss: 5503.128906, avg loss: 3.206952, ppl: 24.703665 +epoch: 0, batch: 34767, sum loss: 4883.879395, avg loss: 2.781253, ppl: 16.139225 +epoch: 0, batch: 34768, sum loss: 5300.110840, avg loss: 3.042543, ppl: 20.958481 +epoch: 0, batch: 34769, sum loss: 4866.305176, avg loss: 2.830893, ppl: 16.960604 +epoch: 0, batch: 34770, sum loss: 4931.690430, avg loss: 2.816499, ppl: 16.718225 +epoch: 0, batch: 34771, sum loss: 5554.424805, avg loss: 3.030237, ppl: 20.702143 +epoch: 0, batch: 34772, sum loss: 5504.177246, avg loss: 2.988153, ppl: 19.848982 +epoch: 0, batch: 34773, sum loss: 5432.913086, avg loss: 2.968805, ppl: 19.468641 +epoch: 0, batch: 34774, sum loss: 4969.674316, avg loss: 2.784131, ppl: 16.185751 +epoch: 0, batch: 34775, sum loss: 4148.046387, avg loss: 2.677887, ppl: 14.554303 +epoch: 0, batch: 34776, sum loss: 4870.606445, avg loss: 2.792779, ppl: 16.326323 +epoch: 0, batch: 34777, sum loss: 6125.039062, avg loss: 2.936260, ppl: 18.845242 +epoch: 0, batch: 34778, sum loss: 4177.705078, avg loss: 2.599692, ppl: 13.459593 +epoch: 0, batch: 34779, sum loss: 4672.451660, avg loss: 2.845586, ppl: 17.211639 +epoch: 0, batch: 34780, sum loss: 6005.429688, avg loss: 3.119704, ppl: 22.639668 +epoch: 0, batch: 34781, sum loss: 5053.085449, avg loss: 2.905742, ppl: 18.278805 +epoch: 0, batch: 34782, sum loss: 4807.019043, avg loss: 2.881906, ppl: 17.848261 +epoch: 0, batch: 34783, sum loss: 4990.069336, avg loss: 2.843344, ppl: 17.173100 +epoch: 0, batch: 34784, sum loss: 4227.679688, avg loss: 2.608069, ppl: 13.572812 +epoch: 0, batch: 34785, sum loss: 4576.504395, avg loss: 2.538272, ppl: 12.657778 +epoch: 0, batch: 34786, sum loss: 4629.403320, avg loss: 2.958085, ppl: 19.261053 +epoch: 0, batch: 34787, sum loss: 4986.168457, avg loss: 2.788685, ppl: 16.259623 +epoch: 0, batch: 34788, sum loss: 4814.301270, avg loss: 2.896692, ppl: 18.114117 +epoch: 0, batch: 34789, sum loss: 4636.088867, avg loss: 3.026168, ppl: 20.618065 +epoch: 0, batch: 34790, sum loss: 4088.176758, avg loss: 2.652938, ppl: 14.195680 +epoch: 0, batch: 34791, sum loss: 5617.613281, avg loss: 2.928891, ppl: 18.706877 +epoch: 0, batch: 34792, sum loss: 5372.291992, avg loss: 2.943722, ppl: 18.986378 +epoch: 0, batch: 34793, sum loss: 5023.566895, avg loss: 2.976047, ppl: 19.610140 +epoch: 0, batch: 34794, sum loss: 4858.305664, avg loss: 2.883267, ppl: 17.872574 +epoch: 0, batch: 34795, sum loss: 5300.625000, avg loss: 3.001486, ppl: 20.115412 +epoch: 0, batch: 34796, sum loss: 5497.990723, avg loss: 3.022535, ppl: 20.543301 +epoch: 0, batch: 34797, sum loss: 3574.924316, avg loss: 2.562670, ppl: 12.970402 +epoch: 0, batch: 34798, sum loss: 5108.094727, avg loss: 3.268135, ppl: 26.262310 +epoch: 0, batch: 34799, sum loss: 5171.052246, avg loss: 2.980434, ppl: 19.696358 +epoch: 0, batch: 34800, sum loss: 4511.818848, avg loss: 2.798895, ppl: 16.426487 +epoch: 0, batch: 34801, sum loss: 5484.458496, avg loss: 3.214806, ppl: 24.898451 +epoch: 0, batch: 34802, sum loss: 5502.561523, avg loss: 2.979189, ppl: 19.671854 +epoch: 0, batch: 34803, sum loss: 5330.029785, avg loss: 2.839654, ppl: 17.109840 +epoch: 0, batch: 34804, sum loss: 5906.781738, avg loss: 3.310977, ppl: 27.411880 +epoch: 0, batch: 34805, sum loss: 5013.095703, avg loss: 2.859724, ppl: 17.456705 +epoch: 0, batch: 34806, sum loss: 4620.702637, avg loss: 2.893364, ppl: 18.053944 +epoch: 0, batch: 34807, sum loss: 5090.929199, avg loss: 2.837753, ppl: 17.077354 +epoch: 0, batch: 34808, sum loss: 5161.392090, avg loss: 2.956124, ppl: 19.223314 +epoch: 0, batch: 34809, sum loss: 5038.000977, avg loss: 3.087011, ppl: 21.911478 +epoch: 0, batch: 34810, sum loss: 4433.253418, avg loss: 2.858319, ppl: 17.432203 +epoch: 0, batch: 34811, sum loss: 4863.329102, avg loss: 3.016953, ppl: 20.428959 +epoch: 0, batch: 34812, sum loss: 4514.807617, avg loss: 2.721403, ppl: 15.201637 +epoch: 0, batch: 34813, sum loss: 4953.478516, avg loss: 3.163141, ppl: 23.644743 +epoch: 0, batch: 34814, sum loss: 4968.085938, avg loss: 2.870067, ppl: 17.638197 +epoch: 0, batch: 34815, sum loss: 4537.972656, avg loss: 2.908957, ppl: 18.337662 +epoch: 0, batch: 34816, sum loss: 5702.045898, avg loss: 2.982241, ppl: 19.731995 +epoch: 0, batch: 34817, sum loss: 5183.577148, avg loss: 2.992827, ppl: 19.941988 +epoch: 0, batch: 34818, sum loss: 5073.593750, avg loss: 3.108820, ppl: 22.394596 +epoch: 0, batch: 34819, sum loss: 5881.711426, avg loss: 2.933522, ppl: 18.793701 +epoch: 0, batch: 34820, sum loss: 3950.977783, avg loss: 2.818101, ppl: 16.745024 +epoch: 0, batch: 34821, sum loss: 4703.812988, avg loss: 2.936213, ppl: 18.844343 +epoch: 0, batch: 34822, sum loss: 4053.126221, avg loss: 2.652570, ppl: 14.190454 +epoch: 0, batch: 34823, sum loss: 4058.466797, avg loss: 2.703842, ppl: 14.937009 +epoch: 0, batch: 34824, sum loss: 5015.549805, avg loss: 2.825662, ppl: 16.872105 +epoch: 0, batch: 34825, sum loss: 6068.793945, avg loss: 3.271587, ppl: 26.353132 +epoch: 0, batch: 34826, sum loss: 5351.870605, avg loss: 2.886662, ppl: 17.933340 +epoch: 0, batch: 34827, sum loss: 5318.967773, avg loss: 3.074548, ppl: 21.640093 +epoch: 0, batch: 34828, sum loss: 3784.888428, avg loss: 2.545318, ppl: 12.747286 +epoch: 0, batch: 34829, sum loss: 4650.392090, avg loss: 3.015818, ppl: 20.405783 +epoch: 0, batch: 34830, sum loss: 5495.880859, avg loss: 2.972353, ppl: 19.537838 +epoch: 0, batch: 34831, sum loss: 5535.555176, avg loss: 2.993810, ppl: 19.961596 +epoch: 0, batch: 34832, sum loss: 4470.123047, avg loss: 2.869142, ppl: 17.621893 +epoch: 0, batch: 34833, sum loss: 4759.905273, avg loss: 2.888292, ppl: 17.962605 +epoch: 0, batch: 34834, sum loss: 5576.457031, avg loss: 2.958333, ppl: 19.265820 +epoch: 0, batch: 34835, sum loss: 4325.886230, avg loss: 2.673601, ppl: 14.492060 +epoch: 0, batch: 34836, sum loss: 5104.353516, avg loss: 2.585792, ppl: 13.273795 +epoch: 0, batch: 34837, sum loss: 5104.596680, avg loss: 2.935363, ppl: 18.828342 +epoch: 0, batch: 34838, sum loss: 4721.348145, avg loss: 3.022630, ppl: 20.545259 +epoch: 0, batch: 34839, sum loss: 4799.833496, avg loss: 2.738068, ppl: 15.457098 +epoch: 0, batch: 34840, sum loss: 5259.483887, avg loss: 3.033151, ppl: 20.762550 +epoch: 0, batch: 34841, sum loss: 5096.790039, avg loss: 2.984069, ppl: 19.768087 +epoch: 0, batch: 34842, sum loss: 5741.728516, avg loss: 3.090274, ppl: 21.983091 +epoch: 0, batch: 34843, sum loss: 5824.010254, avg loss: 3.228387, ppl: 25.238916 +epoch: 0, batch: 34844, sum loss: 3578.871826, avg loss: 2.652981, ppl: 14.196299 +epoch: 0, batch: 34845, sum loss: 5673.791504, avg loss: 3.030872, ppl: 20.715281 +epoch: 0, batch: 34846, sum loss: 5398.989258, avg loss: 3.022950, ppl: 20.551838 +epoch: 0, batch: 34847, sum loss: 4516.657227, avg loss: 2.952064, ppl: 19.145420 +epoch: 0, batch: 34848, sum loss: 5257.301758, avg loss: 2.987103, ppl: 19.828161 +epoch: 0, batch: 34849, sum loss: 4524.422852, avg loss: 2.733790, ppl: 15.391115 +epoch: 0, batch: 34850, sum loss: 4955.233398, avg loss: 2.951300, ppl: 19.130816 +epoch: 0, batch: 34851, sum loss: 4432.162598, avg loss: 2.819442, ppl: 16.767488 +epoch: 0, batch: 34852, sum loss: 5436.934570, avg loss: 3.080416, ppl: 21.767460 +epoch: 0, batch: 34853, sum loss: 5307.289551, avg loss: 2.963311, ppl: 19.361967 +epoch: 0, batch: 34854, sum loss: 5501.992188, avg loss: 3.124357, ppl: 22.745260 +epoch: 0, batch: 34855, sum loss: 4394.250977, avg loss: 2.879588, ppl: 17.806932 +epoch: 0, batch: 34856, sum loss: 4180.284668, avg loss: 2.925322, ppl: 18.640224 +epoch: 0, batch: 34857, sum loss: 4220.880371, avg loss: 2.773246, ppl: 16.010521 +epoch: 0, batch: 34858, sum loss: 5102.943359, avg loss: 2.917635, ppl: 18.497484 +epoch: 0, batch: 34859, sum loss: 4647.185059, avg loss: 2.799509, ppl: 16.436575 +epoch: 0, batch: 34860, sum loss: 4226.241699, avg loss: 2.533718, ppl: 12.600268 +epoch: 0, batch: 34861, sum loss: 5416.694336, avg loss: 3.039671, ppl: 20.898375 +epoch: 0, batch: 34862, sum loss: 5101.533691, avg loss: 3.031214, ppl: 20.722380 +epoch: 0, batch: 34863, sum loss: 5125.602051, avg loss: 2.990433, ppl: 19.894295 +epoch: 0, batch: 34864, sum loss: 5334.202148, avg loss: 3.141462, ppl: 23.137667 +epoch: 0, batch: 34865, sum loss: 5667.208984, avg loss: 2.931821, ppl: 18.761757 +epoch: 0, batch: 34866, sum loss: 5673.211426, avg loss: 2.906358, ppl: 18.290073 +epoch: 0, batch: 34867, sum loss: 5280.738281, avg loss: 3.185005, ppl: 24.167408 +epoch: 0, batch: 34868, sum loss: 4889.959473, avg loss: 3.022225, ppl: 20.536928 +epoch: 0, batch: 34869, sum loss: 5083.246094, avg loss: 3.022144, ppl: 20.535269 +epoch: 0, batch: 34870, sum loss: 4884.512207, avg loss: 2.683798, ppl: 14.640594 +epoch: 0, batch: 34871, sum loss: 5797.051758, avg loss: 3.299403, ppl: 27.096462 +epoch: 0, batch: 34872, sum loss: 5202.587891, avg loss: 2.883918, ppl: 17.884203 +epoch: 0, batch: 34873, sum loss: 5096.655762, avg loss: 3.008651, ppl: 20.260061 +epoch: 0, batch: 34874, sum loss: 5322.099121, avg loss: 2.862883, ppl: 17.511934 +epoch: 0, batch: 34875, sum loss: 5125.084961, avg loss: 3.020086, ppl: 20.493046 +epoch: 0, batch: 34876, sum loss: 4555.764160, avg loss: 2.842024, ppl: 17.150440 +epoch: 0, batch: 34877, sum loss: 4918.717773, avg loss: 3.038121, ppl: 20.865999 +epoch: 0, batch: 34878, sum loss: 3814.597412, avg loss: 2.634391, ppl: 13.934818 +epoch: 0, batch: 34879, sum loss: 4579.302734, avg loss: 2.826730, ppl: 16.890144 +epoch: 0, batch: 34880, sum loss: 4242.903809, avg loss: 2.830490, ppl: 16.953760 +epoch: 0, batch: 34881, sum loss: 5322.121094, avg loss: 2.995003, ppl: 19.985430 +epoch: 0, batch: 34882, sum loss: 5297.451172, avg loss: 3.037529, ppl: 20.853655 +epoch: 0, batch: 34883, sum loss: 5874.168945, avg loss: 3.218723, ppl: 24.996176 +epoch: 0, batch: 34884, sum loss: 4895.009766, avg loss: 2.845936, ppl: 17.217665 +epoch: 0, batch: 34885, sum loss: 5133.499023, avg loss: 3.017930, ppl: 20.448919 +epoch: 0, batch: 34886, sum loss: 5517.556641, avg loss: 3.180148, ppl: 24.050310 +epoch: 0, batch: 34887, sum loss: 4450.174316, avg loss: 2.912418, ppl: 18.401237 +epoch: 0, batch: 34888, sum loss: 5857.824707, avg loss: 2.928912, ppl: 18.707273 +epoch: 0, batch: 34889, sum loss: 4113.999023, avg loss: 2.835285, ppl: 17.035261 +epoch: 0, batch: 34890, sum loss: 4438.527832, avg loss: 2.856195, ppl: 17.395220 +epoch: 0, batch: 34891, sum loss: 4854.772461, avg loss: 2.809475, ppl: 16.601196 +epoch: 0, batch: 34892, sum loss: 5675.994141, avg loss: 3.012736, ppl: 20.342979 +epoch: 0, batch: 34893, sum loss: 5516.944824, avg loss: 3.139980, ppl: 23.103401 +epoch: 0, batch: 34894, sum loss: 4258.880371, avg loss: 2.666800, ppl: 14.393843 +epoch: 0, batch: 34895, sum loss: 5346.079590, avg loss: 2.913395, ppl: 18.419224 +epoch: 0, batch: 34896, sum loss: 5346.471680, avg loss: 2.981858, ppl: 19.724436 +epoch: 0, batch: 34897, sum loss: 4196.762207, avg loss: 2.908359, ppl: 18.326700 +epoch: 0, batch: 34898, sum loss: 5985.542480, avg loss: 3.127243, ppl: 22.810993 +epoch: 0, batch: 34899, sum loss: 4500.133301, avg loss: 2.833837, ppl: 17.010611 +epoch: 0, batch: 34900, sum loss: 4991.743164, avg loss: 2.860598, ppl: 17.471970 +epoch: 0, batch: 34901, sum loss: 4437.261230, avg loss: 2.794245, ppl: 16.350283 +epoch: 0, batch: 34902, sum loss: 4288.593750, avg loss: 2.923377, ppl: 18.603998 +epoch: 0, batch: 34903, sum loss: 4431.629883, avg loss: 2.783687, ppl: 16.178564 +epoch: 0, batch: 34904, sum loss: 4519.866699, avg loss: 2.747639, ppl: 15.605746 +epoch: 0, batch: 34905, sum loss: 4696.687500, avg loss: 2.710149, ppl: 15.031508 +epoch: 0, batch: 34906, sum loss: 4811.769531, avg loss: 2.879575, ppl: 17.806704 +epoch: 0, batch: 34907, sum loss: 3126.965820, avg loss: 2.452522, ppl: 11.617613 +epoch: 0, batch: 34908, sum loss: 4153.942871, avg loss: 2.795385, ppl: 16.368935 +epoch: 0, batch: 34909, sum loss: 4072.763428, avg loss: 2.898764, ppl: 18.151699 +epoch: 0, batch: 34910, sum loss: 4108.516113, avg loss: 2.555047, ppl: 12.871908 +epoch: 0, batch: 34911, sum loss: 5200.070312, avg loss: 3.044538, ppl: 21.000317 +epoch: 0, batch: 34912, sum loss: 4542.399414, avg loss: 2.982534, ppl: 19.737772 +epoch: 0, batch: 34913, sum loss: 5305.449219, avg loss: 3.080981, ppl: 21.779753 +epoch: 0, batch: 34914, sum loss: 4254.404297, avg loss: 2.838162, ppl: 17.084332 +epoch: 0, batch: 34915, sum loss: 4125.285156, avg loss: 2.823604, ppl: 16.837421 +epoch: 0, batch: 34916, sum loss: 5185.692383, avg loss: 3.037898, ppl: 20.861353 +epoch: 0, batch: 34917, sum loss: 4909.855469, avg loss: 2.636872, ppl: 13.969440 +epoch: 0, batch: 34918, sum loss: 4248.892578, avg loss: 2.692581, ppl: 14.769747 +epoch: 0, batch: 34919, sum loss: 4523.989746, avg loss: 2.831033, ppl: 16.962967 +epoch: 0, batch: 34920, sum loss: 4344.707520, avg loss: 2.808473, ppl: 16.584572 +epoch: 0, batch: 34921, sum loss: 3804.121094, avg loss: 2.531018, ppl: 12.566298 +epoch: 0, batch: 34922, sum loss: 5277.377441, avg loss: 3.046985, ppl: 21.051771 +epoch: 0, batch: 34923, sum loss: 4701.251953, avg loss: 2.840636, ppl: 17.126650 +epoch: 0, batch: 34924, sum loss: 5073.138184, avg loss: 2.993002, ppl: 19.945467 +epoch: 0, batch: 34925, sum loss: 4604.875977, avg loss: 3.158351, ppl: 23.531765 +epoch: 0, batch: 34926, sum loss: 5412.835449, avg loss: 2.944959, ppl: 19.009892 +epoch: 0, batch: 34927, sum loss: 5136.018555, avg loss: 2.848596, ppl: 17.263529 +epoch: 0, batch: 34928, sum loss: 5927.414551, avg loss: 2.912734, ppl: 18.407061 +epoch: 0, batch: 34929, sum loss: 4399.715332, avg loss: 2.744676, ppl: 15.559566 +epoch: 0, batch: 34930, sum loss: 5130.889648, avg loss: 3.206806, ppl: 24.700068 +epoch: 0, batch: 34931, sum loss: 4688.063965, avg loss: 2.808906, ppl: 16.591759 +epoch: 0, batch: 34932, sum loss: 4351.923828, avg loss: 2.699705, ppl: 14.875338 +epoch: 0, batch: 34933, sum loss: 5931.084961, avg loss: 2.986448, ppl: 19.815165 +epoch: 0, batch: 34934, sum loss: 4882.163574, avg loss: 2.858409, ppl: 17.433775 +epoch: 0, batch: 34935, sum loss: 4593.892090, avg loss: 2.658502, ppl: 14.274894 +epoch: 0, batch: 34936, sum loss: 5035.323242, avg loss: 3.205171, ppl: 24.659714 +epoch: 0, batch: 34937, sum loss: 4707.935547, avg loss: 2.810708, ppl: 16.621683 +epoch: 0, batch: 34938, sum loss: 4454.677734, avg loss: 2.835568, ppl: 17.040083 +epoch: 0, batch: 34939, sum loss: 4260.057129, avg loss: 2.830603, ppl: 16.955681 +epoch: 0, batch: 34940, sum loss: 4844.423340, avg loss: 2.824737, ppl: 16.856504 +epoch: 0, batch: 34941, sum loss: 4719.859375, avg loss: 3.019744, ppl: 20.486040 +epoch: 0, batch: 34942, sum loss: 5412.125488, avg loss: 3.050803, ppl: 21.132317 +epoch: 0, batch: 34943, sum loss: 5438.649902, avg loss: 2.913042, ppl: 18.412731 +epoch: 0, batch: 34944, sum loss: 5736.021973, avg loss: 3.015785, ppl: 20.405092 +epoch: 0, batch: 34945, sum loss: 4974.584473, avg loss: 2.968129, ppl: 19.455488 +epoch: 0, batch: 34946, sum loss: 4503.816895, avg loss: 2.848714, ppl: 17.265562 +epoch: 0, batch: 34947, sum loss: 4918.898438, avg loss: 2.814015, ppl: 16.676748 +epoch: 0, batch: 34948, sum loss: 4912.305664, avg loss: 2.968161, ppl: 19.456100 +epoch: 0, batch: 34949, sum loss: 4331.448242, avg loss: 2.891487, ppl: 18.020092 +epoch: 0, batch: 34950, sum loss: 5070.834961, avg loss: 2.934511, ppl: 18.812300 +epoch: 0, batch: 34951, sum loss: 5496.958984, avg loss: 3.005445, ppl: 20.195206 +epoch: 0, batch: 34952, sum loss: 5748.955078, avg loss: 3.158767, ppl: 23.541546 +epoch: 0, batch: 34953, sum loss: 4848.823730, avg loss: 2.920978, ppl: 18.559435 +epoch: 0, batch: 34954, sum loss: 4880.245117, avg loss: 2.842309, ppl: 17.155336 +epoch: 0, batch: 34955, sum loss: 4766.307129, avg loss: 2.726721, ppl: 15.282686 +epoch: 0, batch: 34956, sum loss: 5484.855469, avg loss: 2.959987, ppl: 19.297714 +epoch: 0, batch: 34957, sum loss: 5104.399414, avg loss: 2.867640, ppl: 17.595448 +epoch: 0, batch: 34958, sum loss: 5399.696289, avg loss: 3.268581, ppl: 26.274040 +epoch: 0, batch: 34959, sum loss: 4588.462891, avg loss: 2.857075, ppl: 17.410530 +epoch: 0, batch: 34960, sum loss: 4853.442383, avg loss: 2.948628, ppl: 19.079767 +epoch: 0, batch: 34961, sum loss: 5873.661133, avg loss: 3.059199, ppl: 21.310472 +epoch: 0, batch: 34962, sum loss: 4811.844727, avg loss: 2.909217, ppl: 18.342428 +epoch: 0, batch: 34963, sum loss: 4923.353027, avg loss: 2.810133, ppl: 16.612131 +epoch: 0, batch: 34964, sum loss: 5707.890137, avg loss: 3.092031, ppl: 22.021769 +epoch: 0, batch: 34965, sum loss: 5154.390137, avg loss: 3.073578, ppl: 21.619116 +epoch: 0, batch: 34966, sum loss: 6293.590820, avg loss: 3.191476, ppl: 24.324306 +epoch: 0, batch: 34967, sum loss: 4235.921875, avg loss: 2.674193, ppl: 14.500646 +epoch: 0, batch: 34968, sum loss: 4777.233398, avg loss: 2.879586, ppl: 17.806904 +epoch: 0, batch: 34969, sum loss: 5127.812012, avg loss: 2.847203, ppl: 17.239487 +epoch: 0, batch: 34970, sum loss: 4931.184082, avg loss: 2.750242, ppl: 15.646421 +epoch: 0, batch: 34971, sum loss: 5254.607910, avg loss: 2.963682, ppl: 19.369156 +epoch: 0, batch: 34972, sum loss: 4692.392090, avg loss: 2.843874, ppl: 17.182199 +epoch: 0, batch: 34973, sum loss: 4119.543945, avg loss: 2.856827, ppl: 17.406206 +epoch: 0, batch: 34974, sum loss: 4369.499023, avg loss: 2.808161, ppl: 16.579405 +epoch: 0, batch: 34975, sum loss: 5800.979980, avg loss: 3.173403, ppl: 23.888628 +epoch: 0, batch: 34976, sum loss: 6110.536133, avg loss: 3.049170, ppl: 21.097822 +epoch: 0, batch: 34977, sum loss: 4658.732910, avg loss: 2.919006, ppl: 18.522860 +epoch: 0, batch: 34978, sum loss: 4840.227051, avg loss: 2.949559, ppl: 19.097538 +epoch: 0, batch: 34979, sum loss: 5027.387207, avg loss: 2.931421, ppl: 18.754263 +epoch: 0, batch: 34980, sum loss: 5972.213867, avg loss: 2.980147, ppl: 19.690704 +epoch: 0, batch: 34981, sum loss: 5890.781250, avg loss: 3.133394, ppl: 22.951752 +epoch: 0, batch: 34982, sum loss: 5129.504395, avg loss: 3.093790, ppl: 22.060535 +epoch: 0, batch: 34983, sum loss: 6304.009766, avg loss: 2.947176, ppl: 19.052078 +epoch: 0, batch: 34984, sum loss: 4558.103027, avg loss: 3.079799, ppl: 21.754038 +epoch: 0, batch: 34985, sum loss: 4997.346191, avg loss: 3.008637, ppl: 20.259771 +epoch: 0, batch: 34986, sum loss: 4039.312500, avg loss: 2.751575, ppl: 15.667292 +epoch: 0, batch: 34987, sum loss: 5569.659668, avg loss: 3.012255, ppl: 20.333199 +epoch: 0, batch: 34988, sum loss: 5056.442871, avg loss: 2.899336, ppl: 18.162088 +epoch: 0, batch: 34989, sum loss: 4477.256836, avg loss: 2.644570, ppl: 14.077392 +epoch: 0, batch: 34990, sum loss: 5188.618164, avg loss: 2.949755, ppl: 19.101269 +epoch: 0, batch: 34991, sum loss: 6225.335449, avg loss: 3.220556, ppl: 25.042051 +epoch: 0, batch: 34992, sum loss: 4484.666016, avg loss: 2.867434, ppl: 17.591812 +epoch: 0, batch: 34993, sum loss: 5464.416504, avg loss: 2.775224, ppl: 16.042223 +epoch: 0, batch: 34994, sum loss: 5872.857422, avg loss: 3.190037, ppl: 24.289320 +epoch: 0, batch: 34995, sum loss: 4254.865723, avg loss: 2.732733, ppl: 15.374853 +epoch: 0, batch: 34996, sum loss: 4477.605469, avg loss: 2.707138, ppl: 14.986317 +epoch: 0, batch: 34997, sum loss: 4735.697754, avg loss: 2.907120, ppl: 18.303999 +epoch: 0, batch: 34998, sum loss: 5223.345703, avg loss: 2.962760, ppl: 19.351307 +epoch: 0, batch: 34999, sum loss: 5356.648926, avg loss: 3.055704, ppl: 21.236134 +epoch: 0, batch: 35000, sum loss: 4337.379883, avg loss: 2.750400, ppl: 15.648884 +epoch: 0, batch: 35001, sum loss: 4195.013672, avg loss: 2.779996, ppl: 16.118956 +epoch: 0, batch: 35002, sum loss: 4402.908203, avg loss: 2.696208, ppl: 14.823419 +epoch: 0, batch: 35003, sum loss: 4555.471680, avg loss: 3.084274, ppl: 21.851587 +epoch: 0, batch: 35004, sum loss: 5344.148438, avg loss: 3.066063, ppl: 21.457268 +epoch: 0, batch: 35005, sum loss: 4663.485352, avg loss: 2.811022, ppl: 16.626900 +epoch: 0, batch: 35006, sum loss: 3492.770996, avg loss: 2.596856, ppl: 13.421473 +epoch: 0, batch: 35007, sum loss: 4826.294922, avg loss: 2.634440, ppl: 13.935513 +epoch: 0, batch: 35008, sum loss: 4788.570312, avg loss: 2.800333, ppl: 16.450132 +epoch: 0, batch: 35009, sum loss: 5124.196289, avg loss: 2.756426, ppl: 15.743477 +epoch: 0, batch: 35010, sum loss: 4503.277344, avg loss: 2.819836, ppl: 16.774094 +epoch: 0, batch: 35011, sum loss: 4392.203125, avg loss: 2.706225, ppl: 14.972646 +epoch: 0, batch: 35012, sum loss: 5870.662109, avg loss: 3.084951, ppl: 21.866400 +epoch: 0, batch: 35013, sum loss: 5802.370605, avg loss: 3.007968, ppl: 20.246222 +epoch: 0, batch: 35014, sum loss: 4912.959961, avg loss: 2.802601, ppl: 16.487480 +epoch: 0, batch: 35015, sum loss: 4712.187988, avg loss: 2.961777, ppl: 19.332304 +epoch: 0, batch: 35016, sum loss: 6134.419922, avg loss: 3.035339, ppl: 20.808033 +epoch: 0, batch: 35017, sum loss: 4478.556641, avg loss: 2.771384, ppl: 15.980736 +epoch: 0, batch: 35018, sum loss: 4835.483398, avg loss: 2.873133, ppl: 17.692366 +epoch: 0, batch: 35019, sum loss: 3605.504883, avg loss: 2.680673, ppl: 14.594911 +epoch: 0, batch: 35020, sum loss: 4842.839844, avg loss: 2.969246, ppl: 19.477226 +epoch: 0, batch: 35021, sum loss: 4489.692871, avg loss: 2.680414, ppl: 14.591128 +epoch: 0, batch: 35022, sum loss: 5707.335449, avg loss: 3.039050, ppl: 20.885386 +epoch: 0, batch: 35023, sum loss: 4552.251465, avg loss: 2.873896, ppl: 17.705868 +epoch: 0, batch: 35024, sum loss: 4549.520996, avg loss: 2.848792, ppl: 17.266912 +epoch: 0, batch: 35025, sum loss: 3899.929688, avg loss: 2.530779, ppl: 12.563285 +epoch: 0, batch: 35026, sum loss: 4919.520508, avg loss: 2.917865, ppl: 18.501745 +epoch: 0, batch: 35027, sum loss: 4917.973633, avg loss: 2.773815, ppl: 16.019630 +epoch: 0, batch: 35028, sum loss: 5368.182129, avg loss: 3.144805, ppl: 23.215147 +epoch: 0, batch: 35029, sum loss: 5103.592285, avg loss: 2.979330, ppl: 19.674627 +epoch: 0, batch: 35030, sum loss: 4377.513184, avg loss: 2.669215, ppl: 14.428644 +epoch: 0, batch: 35031, sum loss: 5457.527344, avg loss: 3.074663, ppl: 21.642591 +epoch: 0, batch: 35032, sum loss: 4604.410156, avg loss: 2.861660, ppl: 17.490528 +epoch: 0, batch: 35033, sum loss: 5731.964355, avg loss: 3.073439, ppl: 21.616121 +epoch: 0, batch: 35034, sum loss: 5082.092773, avg loss: 2.866381, ppl: 17.573299 +epoch: 0, batch: 35035, sum loss: 4596.243652, avg loss: 2.942538, ppl: 18.963907 +epoch: 0, batch: 35036, sum loss: 4903.889648, avg loss: 2.828079, ppl: 16.912943 +epoch: 0, batch: 35037, sum loss: 5521.739258, avg loss: 2.895511, ppl: 18.092743 +epoch: 0, batch: 35038, sum loss: 4064.015137, avg loss: 2.645843, ppl: 14.095326 +epoch: 0, batch: 35039, sum loss: 4949.203125, avg loss: 2.867441, ppl: 17.591942 +epoch: 0, batch: 35040, sum loss: 5011.439453, avg loss: 3.061356, ppl: 21.356493 +epoch: 0, batch: 35041, sum loss: 5622.517090, avg loss: 2.936040, ppl: 18.841091 +epoch: 0, batch: 35042, sum loss: 4813.492188, avg loss: 2.677137, ppl: 14.543394 +epoch: 0, batch: 35043, sum loss: 3604.679443, avg loss: 2.716413, ppl: 15.125961 +epoch: 0, batch: 35044, sum loss: 5322.227539, avg loss: 3.018847, ppl: 20.467684 +epoch: 0, batch: 35045, sum loss: 4603.916504, avg loss: 2.812411, ppl: 16.650011 +epoch: 0, batch: 35046, sum loss: 4624.197266, avg loss: 2.921160, ppl: 18.562817 +epoch: 0, batch: 35047, sum loss: 5263.209473, avg loss: 3.149737, ppl: 23.329920 +epoch: 0, batch: 35048, sum loss: 5093.134766, avg loss: 2.890542, ppl: 18.003065 +epoch: 0, batch: 35049, sum loss: 4365.571777, avg loss: 2.891107, ppl: 18.013241 +epoch: 0, batch: 35050, sum loss: 5388.761719, avg loss: 3.088115, ppl: 21.935699 +epoch: 0, batch: 35051, sum loss: 4915.705078, avg loss: 2.999210, ppl: 20.069674 +epoch: 0, batch: 35052, sum loss: 4990.389160, avg loss: 3.046635, ppl: 21.044418 +epoch: 0, batch: 35053, sum loss: 6009.514648, avg loss: 3.134854, ppl: 22.985275 +epoch: 0, batch: 35054, sum loss: 4286.359863, avg loss: 2.623231, ppl: 13.780181 +epoch: 0, batch: 35055, sum loss: 6566.942383, avg loss: 3.316637, ppl: 27.567493 +epoch: 0, batch: 35056, sum loss: 5331.312500, avg loss: 3.160233, ppl: 23.576084 +epoch: 0, batch: 35057, sum loss: 4904.092285, avg loss: 2.917366, ppl: 18.492514 +epoch: 0, batch: 35058, sum loss: 4257.274414, avg loss: 2.923952, ppl: 18.614714 +epoch: 0, batch: 35059, sum loss: 5068.104492, avg loss: 3.016729, ppl: 20.424372 +epoch: 0, batch: 35060, sum loss: 6027.033203, avg loss: 2.948646, ppl: 19.080107 +epoch: 0, batch: 35061, sum loss: 4640.818848, avg loss: 2.882496, ppl: 17.858795 +epoch: 0, batch: 35062, sum loss: 4313.045410, avg loss: 2.623507, ppl: 13.783979 +epoch: 0, batch: 35063, sum loss: 4565.013672, avg loss: 2.928168, ppl: 18.693354 +epoch: 0, batch: 35064, sum loss: 6437.029785, avg loss: 3.259256, ppl: 26.030155 +epoch: 0, batch: 35065, sum loss: 4865.895996, avg loss: 2.725992, ppl: 15.271559 +epoch: 0, batch: 35066, sum loss: 5800.721680, avg loss: 3.092069, ppl: 22.022598 +epoch: 0, batch: 35067, sum loss: 5572.267578, avg loss: 3.234050, ppl: 25.382242 +epoch: 0, batch: 35068, sum loss: 5163.384277, avg loss: 2.943777, ppl: 18.987419 +epoch: 0, batch: 35069, sum loss: 5303.692871, avg loss: 3.055123, ppl: 21.223787 +epoch: 0, batch: 35070, sum loss: 4826.311523, avg loss: 2.809262, ppl: 16.597662 +epoch: 0, batch: 35071, sum loss: 4645.119141, avg loss: 2.724410, ppl: 15.247416 +epoch: 0, batch: 35072, sum loss: 4956.169922, avg loss: 3.187248, ppl: 24.221672 +epoch: 0, batch: 35073, sum loss: 4244.572266, avg loss: 2.691549, ppl: 14.754510 +epoch: 0, batch: 35074, sum loss: 4306.526855, avg loss: 2.635573, ppl: 13.951309 +epoch: 0, batch: 35075, sum loss: 4859.423340, avg loss: 2.937983, ppl: 18.877728 +epoch: 0, batch: 35076, sum loss: 4396.776367, avg loss: 2.793377, ppl: 16.336100 +epoch: 0, batch: 35077, sum loss: 3972.027100, avg loss: 2.616619, ppl: 13.689357 +epoch: 0, batch: 35078, sum loss: 5312.702148, avg loss: 2.794688, ppl: 16.357529 +epoch: 0, batch: 35079, sum loss: 5284.568359, avg loss: 3.004303, ppl: 20.172146 +epoch: 0, batch: 35080, sum loss: 5331.210938, avg loss: 3.023943, ppl: 20.572243 +epoch: 0, batch: 35081, sum loss: 4751.250488, avg loss: 2.936496, ppl: 18.849682 +epoch: 0, batch: 35082, sum loss: 4254.714844, avg loss: 2.741440, ppl: 15.509303 +epoch: 0, batch: 35083, sum loss: 4082.484375, avg loss: 2.637264, ppl: 13.974913 +epoch: 0, batch: 35084, sum loss: 3934.880859, avg loss: 2.616277, ppl: 13.684680 +epoch: 0, batch: 35085, sum loss: 4219.034180, avg loss: 2.543119, ppl: 12.719280 +epoch: 0, batch: 35086, sum loss: 5083.694824, avg loss: 3.122663, ppl: 22.706757 +epoch: 0, batch: 35087, sum loss: 4658.507812, avg loss: 2.924361, ppl: 18.622332 +epoch: 0, batch: 35088, sum loss: 4814.918945, avg loss: 3.005567, ppl: 20.197672 +epoch: 0, batch: 35089, sum loss: 4726.287109, avg loss: 3.063051, ppl: 21.392721 +epoch: 0, batch: 35090, sum loss: 3799.439209, avg loss: 2.850292, ppl: 17.292830 +epoch: 0, batch: 35091, sum loss: 5389.768555, avg loss: 3.034780, ppl: 20.796392 +epoch: 0, batch: 35092, sum loss: 4983.746582, avg loss: 2.872476, ppl: 17.680748 +epoch: 0, batch: 35093, sum loss: 4592.493164, avg loss: 2.957175, ppl: 19.243542 +epoch: 0, batch: 35094, sum loss: 5683.094727, avg loss: 3.126015, ppl: 22.783001 +epoch: 0, batch: 35095, sum loss: 7112.095703, avg loss: 3.292637, ppl: 26.913738 +epoch: 0, batch: 35096, sum loss: 5088.222168, avg loss: 2.986046, ppl: 19.807207 +epoch: 0, batch: 35097, sum loss: 4721.187988, avg loss: 3.032234, ppl: 20.743517 +epoch: 0, batch: 35098, sum loss: 5497.835449, avg loss: 3.005924, ppl: 20.204885 +epoch: 0, batch: 35099, sum loss: 5354.469238, avg loss: 3.319572, ppl: 27.648506 +epoch: 0, batch: 35100, sum loss: 5063.387207, avg loss: 2.959315, ppl: 19.284754 +epoch: 0, batch: 35101, sum loss: 5572.259766, avg loss: 3.023473, ppl: 20.562572 +epoch: 0, batch: 35102, sum loss: 5108.068848, avg loss: 3.006515, ppl: 20.216822 +epoch: 0, batch: 35103, sum loss: 5195.335938, avg loss: 2.900802, ppl: 18.188721 +epoch: 0, batch: 35104, sum loss: 4623.042969, avg loss: 2.781614, ppl: 16.145063 +epoch: 0, batch: 35105, sum loss: 4146.710449, avg loss: 2.740721, ppl: 15.498152 +epoch: 0, batch: 35106, sum loss: 4843.158691, avg loss: 2.910552, ppl: 18.366940 +epoch: 0, batch: 35107, sum loss: 5361.514648, avg loss: 3.135389, ppl: 22.997572 +epoch: 0, batch: 35108, sum loss: 4566.560547, avg loss: 2.917930, ppl: 18.502949 +epoch: 0, batch: 35109, sum loss: 4710.294922, avg loss: 2.825612, ppl: 16.871264 +epoch: 0, batch: 35110, sum loss: 4235.289062, avg loss: 2.748403, ppl: 15.617672 +epoch: 0, batch: 35111, sum loss: 4639.187012, avg loss: 3.026215, ppl: 20.619034 +epoch: 0, batch: 35112, sum loss: 5849.068359, avg loss: 3.165080, ppl: 23.690647 +epoch: 0, batch: 35113, sum loss: 4328.251953, avg loss: 2.816039, ppl: 16.710531 +epoch: 0, batch: 35114, sum loss: 5653.380859, avg loss: 3.351145, ppl: 28.535381 +epoch: 0, batch: 35115, sum loss: 4668.125000, avg loss: 3.164830, ppl: 23.684729 +epoch: 0, batch: 35116, sum loss: 4592.244141, avg loss: 2.702910, ppl: 14.923098 +epoch: 0, batch: 35117, sum loss: 4735.857422, avg loss: 2.852926, ppl: 17.338444 +epoch: 0, batch: 35118, sum loss: 4548.590332, avg loss: 2.787127, ppl: 16.234304 +epoch: 0, batch: 35119, sum loss: 5528.152832, avg loss: 2.976927, ppl: 19.627399 +epoch: 0, batch: 35120, sum loss: 4685.919434, avg loss: 3.060692, ppl: 21.342323 +epoch: 0, batch: 35121, sum loss: 4907.507812, avg loss: 2.912467, ppl: 18.402149 +epoch: 0, batch: 35122, sum loss: 4400.497070, avg loss: 2.755477, ppl: 15.728544 +epoch: 0, batch: 35123, sum loss: 4897.015625, avg loss: 3.117133, ppl: 22.581543 +epoch: 0, batch: 35124, sum loss: 5629.563477, avg loss: 3.218733, ppl: 24.996420 +epoch: 0, batch: 35125, sum loss: 5228.302246, avg loss: 3.115794, ppl: 22.551329 +epoch: 0, batch: 35126, sum loss: 4520.886230, avg loss: 3.017948, ppl: 20.449286 +epoch: 0, batch: 35127, sum loss: 5272.795898, avg loss: 3.108960, ppl: 22.397738 +epoch: 0, batch: 35128, sum loss: 5534.266113, avg loss: 3.531759, ppl: 34.184044 +epoch: 0, batch: 35129, sum loss: 4976.996582, avg loss: 2.888565, ppl: 17.967501 +epoch: 0, batch: 35130, sum loss: 5108.544434, avg loss: 3.107387, ppl: 22.362535 +epoch: 0, batch: 35131, sum loss: 6652.257812, avg loss: 3.151235, ppl: 23.364910 +epoch: 0, batch: 35132, sum loss: 4351.893066, avg loss: 2.851830, ppl: 17.319456 +epoch: 0, batch: 35133, sum loss: 6064.128418, avg loss: 3.101856, ppl: 22.239189 +epoch: 0, batch: 35134, sum loss: 4104.897949, avg loss: 2.894850, ppl: 18.080797 +epoch: 0, batch: 35135, sum loss: 5730.227539, avg loss: 3.129562, ppl: 22.863960 +epoch: 0, batch: 35136, sum loss: 5148.552734, avg loss: 3.023225, ppl: 20.557489 +epoch: 0, batch: 35137, sum loss: 5127.063477, avg loss: 2.756486, ppl: 15.744415 +epoch: 0, batch: 35138, sum loss: 5387.792480, avg loss: 2.968481, ppl: 19.462330 +epoch: 0, batch: 35139, sum loss: 4618.350586, avg loss: 2.770456, ppl: 15.965914 +epoch: 0, batch: 35140, sum loss: 5123.302734, avg loss: 2.655937, ppl: 14.238324 +epoch: 0, batch: 35141, sum loss: 4462.668945, avg loss: 2.650041, ppl: 14.154620 +epoch: 0, batch: 35142, sum loss: 4746.671387, avg loss: 3.050560, ppl: 21.127172 +epoch: 0, batch: 35143, sum loss: 6415.166504, avg loss: 3.222083, ppl: 25.080299 +epoch: 0, batch: 35144, sum loss: 4747.139160, avg loss: 2.800672, ppl: 16.455702 +epoch: 0, batch: 35145, sum loss: 5683.038574, avg loss: 3.014875, ppl: 20.386536 +epoch: 0, batch: 35146, sum loss: 5072.676270, avg loss: 2.813464, ppl: 16.667561 +epoch: 0, batch: 35147, sum loss: 4257.424805, avg loss: 2.701412, ppl: 14.900753 +epoch: 0, batch: 35148, sum loss: 6507.348145, avg loss: 3.263464, ppl: 26.139940 +epoch: 0, batch: 35149, sum loss: 5921.777344, avg loss: 3.357017, ppl: 28.703428 +epoch: 0, batch: 35150, sum loss: 4109.164551, avg loss: 2.696302, ppl: 14.824811 +epoch: 0, batch: 35151, sum loss: 5083.185547, avg loss: 2.886533, ppl: 17.931044 +epoch: 0, batch: 35152, sum loss: 4326.659668, avg loss: 2.595477, ppl: 13.402974 +epoch: 0, batch: 35153, sum loss: 5615.587891, avg loss: 2.966502, ppl: 19.423851 +epoch: 0, batch: 35154, sum loss: 5406.812012, avg loss: 3.132568, ppl: 22.932787 +epoch: 0, batch: 35155, sum loss: 5400.052734, avg loss: 2.983455, ppl: 19.755949 +epoch: 0, batch: 35156, sum loss: 5103.716797, avg loss: 2.975928, ppl: 19.607813 +epoch: 0, batch: 35157, sum loss: 5196.274414, avg loss: 3.109680, ppl: 22.413864 +epoch: 0, batch: 35158, sum loss: 5444.005859, avg loss: 3.004418, ppl: 20.174475 +epoch: 0, batch: 35159, sum loss: 6314.462402, avg loss: 3.048992, ppl: 21.094074 +epoch: 0, batch: 35160, sum loss: 5293.736328, avg loss: 2.970671, ppl: 19.505011 +epoch: 0, batch: 35161, sum loss: 5070.916504, avg loss: 3.211473, ppl: 24.815624 +epoch: 0, batch: 35162, sum loss: 5053.785156, avg loss: 2.843998, ppl: 17.184340 +epoch: 0, batch: 35163, sum loss: 5474.893555, avg loss: 2.918387, ppl: 18.511404 +epoch: 0, batch: 35164, sum loss: 6078.239258, avg loss: 3.298014, ppl: 27.058838 +epoch: 0, batch: 35165, sum loss: 4658.490723, avg loss: 2.837083, ppl: 17.065918 +epoch: 0, batch: 35166, sum loss: 5064.506348, avg loss: 2.840441, ppl: 17.123320 +epoch: 0, batch: 35167, sum loss: 5191.367676, avg loss: 3.103029, ppl: 22.265297 +epoch: 0, batch: 35168, sum loss: 5241.715820, avg loss: 2.979941, ppl: 19.686653 +epoch: 0, batch: 35169, sum loss: 4893.465332, avg loss: 2.815573, ppl: 16.702740 +epoch: 0, batch: 35170, sum loss: 4736.884766, avg loss: 2.860438, ppl: 17.469170 +epoch: 0, batch: 35171, sum loss: 6266.835938, avg loss: 3.166668, ppl: 23.728289 +epoch: 0, batch: 35172, sum loss: 4457.100098, avg loss: 2.873694, ppl: 17.702297 +epoch: 0, batch: 35173, sum loss: 4415.948242, avg loss: 2.818091, ppl: 16.744852 +epoch: 0, batch: 35174, sum loss: 5883.214844, avg loss: 3.088302, ppl: 21.939785 +epoch: 0, batch: 35175, sum loss: 5419.806641, avg loss: 3.098803, ppl: 22.171396 +epoch: 0, batch: 35176, sum loss: 5526.885742, avg loss: 3.070492, ppl: 21.552509 +epoch: 0, batch: 35177, sum loss: 4740.009766, avg loss: 3.151602, ppl: 23.373480 +epoch: 0, batch: 35178, sum loss: 5225.857910, avg loss: 3.025975, ppl: 20.614084 +epoch: 0, batch: 35179, sum loss: 5548.857422, avg loss: 3.241155, ppl: 25.563229 +epoch: 0, batch: 35180, sum loss: 4763.743652, avg loss: 3.007414, ppl: 20.235003 +epoch: 0, batch: 35181, sum loss: 5306.439941, avg loss: 3.032251, ppl: 20.743881 +epoch: 0, batch: 35182, sum loss: 5787.629883, avg loss: 2.877986, ppl: 17.778435 +epoch: 0, batch: 35183, sum loss: 4415.413574, avg loss: 2.787508, ppl: 16.240505 +epoch: 0, batch: 35184, sum loss: 5944.143066, avg loss: 2.963182, ppl: 19.359476 +epoch: 0, batch: 35185, sum loss: 5010.233398, avg loss: 3.032829, ppl: 20.755869 +epoch: 0, batch: 35186, sum loss: 5294.244629, avg loss: 3.030478, ppl: 20.707123 +epoch: 0, batch: 35187, sum loss: 4929.601562, avg loss: 2.975016, ppl: 19.589933 +epoch: 0, batch: 35188, sum loss: 4871.919922, avg loss: 2.839114, ppl: 17.100611 +epoch: 0, batch: 35189, sum loss: 4664.351562, avg loss: 2.756709, ppl: 15.747929 +epoch: 0, batch: 35190, sum loss: 4792.661621, avg loss: 2.880205, ppl: 17.817932 +epoch: 0, batch: 35191, sum loss: 4925.092773, avg loss: 2.888618, ppl: 17.968451 +epoch: 0, batch: 35192, sum loss: 4357.444336, avg loss: 2.811255, ppl: 16.630768 +epoch: 0, batch: 35193, sum loss: 4812.631348, avg loss: 2.885271, ppl: 17.908413 +epoch: 0, batch: 35194, sum loss: 4462.232422, avg loss: 2.809970, ppl: 16.609423 +epoch: 0, batch: 35195, sum loss: 4683.161133, avg loss: 2.923321, ppl: 18.602974 +epoch: 0, batch: 35196, sum loss: 5025.866211, avg loss: 2.817190, ppl: 16.729769 +epoch: 0, batch: 35197, sum loss: 5468.226562, avg loss: 3.063432, ppl: 21.400888 +epoch: 0, batch: 35198, sum loss: 5345.683594, avg loss: 3.117017, ppl: 22.578917 +epoch: 0, batch: 35199, sum loss: 5195.446289, avg loss: 2.967131, ppl: 19.436081 +epoch: 0, batch: 35200, sum loss: 6473.414062, avg loss: 3.267751, ppl: 26.252218 +epoch: 0, batch: 35201, sum loss: 4609.989746, avg loss: 2.912185, ppl: 18.396959 +epoch: 0, batch: 35202, sum loss: 5212.373047, avg loss: 3.395683, ppl: 29.835018 +epoch: 0, batch: 35203, sum loss: 4777.121582, avg loss: 3.106061, ppl: 22.332895 +epoch: 0, batch: 35204, sum loss: 5746.047852, avg loss: 3.029018, ppl: 20.676926 +epoch: 0, batch: 35205, sum loss: 4984.625000, avg loss: 2.920108, ppl: 18.543297 +epoch: 0, batch: 35206, sum loss: 5101.887695, avg loss: 3.006416, ppl: 20.214817 +epoch: 0, batch: 35207, sum loss: 4625.659668, avg loss: 2.891037, ppl: 18.011982 +epoch: 0, batch: 35208, sum loss: 4949.471680, avg loss: 2.771261, ppl: 15.978766 +epoch: 0, batch: 35209, sum loss: 4568.918457, avg loss: 2.823806, ppl: 16.840830 +epoch: 0, batch: 35210, sum loss: 4782.617188, avg loss: 3.004157, ppl: 20.169197 +epoch: 0, batch: 35211, sum loss: 4454.811523, avg loss: 2.896497, ppl: 18.110584 +epoch: 0, batch: 35212, sum loss: 4583.968750, avg loss: 2.908610, ppl: 18.331293 +epoch: 0, batch: 35213, sum loss: 5207.876953, avg loss: 2.947299, ppl: 19.054413 +epoch: 0, batch: 35214, sum loss: 4379.543945, avg loss: 2.685189, ppl: 14.660979 +epoch: 0, batch: 35215, sum loss: 5592.151367, avg loss: 2.987261, ppl: 19.831282 +epoch: 0, batch: 35216, sum loss: 4384.872070, avg loss: 2.733711, ppl: 15.389890 +epoch: 0, batch: 35217, sum loss: 5428.827148, avg loss: 3.082809, ppl: 21.819613 +epoch: 0, batch: 35218, sum loss: 5093.109375, avg loss: 3.097998, ppl: 22.153564 +epoch: 0, batch: 35219, sum loss: 4685.107422, avg loss: 2.954040, ppl: 19.183298 +epoch: 0, batch: 35220, sum loss: 4395.287109, avg loss: 2.735089, ppl: 15.411109 +epoch: 0, batch: 35221, sum loss: 4021.293945, avg loss: 2.724454, ppl: 15.248085 +epoch: 0, batch: 35222, sum loss: 5403.076172, avg loss: 3.050862, ppl: 21.133545 +epoch: 0, batch: 35223, sum loss: 5148.907715, avg loss: 2.756375, ppl: 15.742666 +epoch: 0, batch: 35224, sum loss: 4043.150391, avg loss: 2.757947, ppl: 15.767439 +epoch: 0, batch: 35225, sum loss: 4548.284180, avg loss: 2.900691, ppl: 18.186712 +epoch: 0, batch: 35226, sum loss: 5749.363281, avg loss: 3.061429, ppl: 21.358051 +epoch: 0, batch: 35227, sum loss: 5587.694824, avg loss: 3.018744, ppl: 20.465570 +epoch: 0, batch: 35228, sum loss: 5149.007812, avg loss: 2.938931, ppl: 18.895645 +epoch: 0, batch: 35229, sum loss: 4707.644531, avg loss: 2.807182, ppl: 16.563179 +epoch: 0, batch: 35230, sum loss: 4553.705078, avg loss: 2.746505, ppl: 15.588053 +epoch: 0, batch: 35231, sum loss: 5069.804199, avg loss: 3.015945, ppl: 20.408371 +epoch: 0, batch: 35232, sum loss: 4605.667969, avg loss: 2.920525, ppl: 18.551025 +epoch: 0, batch: 35233, sum loss: 4540.225098, avg loss: 2.860886, ppl: 17.476997 +epoch: 0, batch: 35234, sum loss: 5250.754883, avg loss: 2.981689, ppl: 19.721102 +epoch: 0, batch: 35235, sum loss: 5236.320801, avg loss: 2.812202, ppl: 16.646542 +epoch: 0, batch: 35236, sum loss: 4851.822754, avg loss: 2.702965, ppl: 14.923923 +epoch: 0, batch: 35237, sum loss: 4694.290527, avg loss: 2.821088, ppl: 16.795115 +epoch: 0, batch: 35238, sum loss: 4668.394043, avg loss: 2.825905, ppl: 16.876204 +epoch: 0, batch: 35239, sum loss: 6768.082520, avg loss: 3.227507, ppl: 25.216722 +epoch: 0, batch: 35240, sum loss: 6358.570801, avg loss: 2.837381, ppl: 17.070995 +epoch: 0, batch: 35241, sum loss: 5285.017578, avg loss: 2.997741, ppl: 20.040215 +epoch: 0, batch: 35242, sum loss: 4915.423340, avg loss: 2.841285, ppl: 17.137774 +epoch: 0, batch: 35243, sum loss: 5385.896973, avg loss: 2.993828, ppl: 19.961952 +epoch: 0, batch: 35244, sum loss: 6137.930664, avg loss: 3.112541, ppl: 22.478083 +epoch: 0, batch: 35245, sum loss: 5836.259766, avg loss: 3.084704, ppl: 21.860992 +epoch: 0, batch: 35246, sum loss: 4845.747559, avg loss: 2.827157, ppl: 16.897358 +epoch: 0, batch: 35247, sum loss: 4592.599609, avg loss: 2.886612, ppl: 17.932451 +epoch: 0, batch: 35248, sum loss: 5028.317383, avg loss: 2.824898, ppl: 16.859217 +epoch: 0, batch: 35249, sum loss: 3940.571045, avg loss: 2.802682, ppl: 16.488813 +epoch: 0, batch: 35250, sum loss: 4255.573730, avg loss: 2.856090, ppl: 17.393383 +epoch: 0, batch: 35251, sum loss: 4833.750488, avg loss: 3.192702, ppl: 24.354139 +epoch: 0, batch: 35252, sum loss: 4933.377930, avg loss: 3.043416, ppl: 20.976784 +epoch: 0, batch: 35253, sum loss: 4178.831055, avg loss: 2.603633, ppl: 13.512740 +epoch: 0, batch: 35254, sum loss: 5135.877930, avg loss: 3.252614, ppl: 25.857851 +epoch: 0, batch: 35255, sum loss: 4989.746094, avg loss: 3.024089, ppl: 20.575245 +epoch: 0, batch: 35256, sum loss: 5802.035156, avg loss: 3.139629, ppl: 23.095304 +epoch: 0, batch: 35257, sum loss: 4859.767578, avg loss: 2.884135, ppl: 17.888088 +epoch: 0, batch: 35258, sum loss: 6322.324707, avg loss: 3.380922, ppl: 29.397873 +epoch: 0, batch: 35259, sum loss: 5021.630859, avg loss: 2.902677, ppl: 18.222860 +epoch: 0, batch: 35260, sum loss: 4280.287598, avg loss: 2.745534, ppl: 15.572930 +epoch: 0, batch: 35261, sum loss: 5027.267090, avg loss: 2.934773, ppl: 18.817240 +epoch: 0, batch: 35262, sum loss: 5096.288086, avg loss: 2.956084, ppl: 19.222544 +epoch: 0, batch: 35263, sum loss: 4332.115723, avg loss: 2.776997, ppl: 16.070694 +epoch: 0, batch: 35264, sum loss: 5848.645020, avg loss: 3.227729, ppl: 25.222315 +epoch: 0, batch: 35265, sum loss: 4865.201172, avg loss: 2.800922, ppl: 16.459814 +epoch: 0, batch: 35266, sum loss: 5135.447266, avg loss: 2.924515, ppl: 18.625181 +epoch: 0, batch: 35267, sum loss: 5684.580078, avg loss: 2.928686, ppl: 18.703041 +epoch: 0, batch: 35268, sum loss: 4851.908691, avg loss: 2.658580, ppl: 14.276004 +epoch: 0, batch: 35269, sum loss: 5075.468750, avg loss: 2.740534, ppl: 15.495255 +epoch: 0, batch: 35270, sum loss: 4521.713379, avg loss: 3.014476, ppl: 20.378401 +epoch: 0, batch: 35271, sum loss: 5941.002441, avg loss: 3.029578, ppl: 20.688505 +epoch: 0, batch: 35272, sum loss: 5499.181641, avg loss: 2.926653, ppl: 18.665062 +epoch: 0, batch: 35273, sum loss: 5429.034180, avg loss: 2.858891, ppl: 17.442173 +epoch: 0, batch: 35274, sum loss: 5096.416992, avg loss: 3.183271, ppl: 24.125544 +epoch: 0, batch: 35275, sum loss: 4775.317383, avg loss: 2.880167, ppl: 17.817253 +epoch: 0, batch: 35276, sum loss: 4743.543945, avg loss: 2.894170, ppl: 18.068491 +epoch: 0, batch: 35277, sum loss: 5202.141602, avg loss: 3.054693, ppl: 21.214666 +epoch: 0, batch: 35278, sum loss: 6203.697266, avg loss: 3.211024, ppl: 24.804462 +epoch: 0, batch: 35279, sum loss: 5285.652344, avg loss: 3.057058, ppl: 21.264896 +epoch: 0, batch: 35280, sum loss: 3984.640137, avg loss: 2.661750, ppl: 14.321331 +epoch: 0, batch: 35281, sum loss: 5351.887207, avg loss: 2.942214, ppl: 18.957773 +epoch: 0, batch: 35282, sum loss: 5173.709961, avg loss: 3.034434, ppl: 20.789209 +epoch: 0, batch: 35283, sum loss: 5558.824707, avg loss: 2.892209, ppl: 18.033092 +epoch: 0, batch: 35284, sum loss: 4967.613770, avg loss: 2.679403, ppl: 14.576393 +epoch: 0, batch: 35285, sum loss: 4717.380371, avg loss: 2.665187, ppl: 14.370631 +epoch: 0, batch: 35286, sum loss: 3868.070801, avg loss: 2.903957, ppl: 18.246204 +epoch: 0, batch: 35287, sum loss: 4530.198730, avg loss: 2.813788, ppl: 16.672955 +epoch: 0, batch: 35288, sum loss: 6493.839844, avg loss: 3.298040, ppl: 27.059542 +epoch: 0, batch: 35289, sum loss: 4789.601074, avg loss: 2.892271, ppl: 18.034220 +epoch: 0, batch: 35290, sum loss: 4866.296875, avg loss: 3.024423, ppl: 20.582129 +epoch: 0, batch: 35291, sum loss: 3948.487793, avg loss: 2.628820, ppl: 13.857408 +epoch: 0, batch: 35292, sum loss: 4585.588867, avg loss: 2.644515, ppl: 14.076613 +epoch: 0, batch: 35293, sum loss: 5079.559570, avg loss: 2.973981, ppl: 19.569670 +epoch: 0, batch: 35294, sum loss: 4654.997559, avg loss: 2.887716, ppl: 17.952251 +epoch: 0, batch: 35295, sum loss: 4823.358887, avg loss: 3.003337, ppl: 20.152668 +epoch: 0, batch: 35296, sum loss: 4109.597656, avg loss: 2.539924, ppl: 12.678712 +epoch: 0, batch: 35297, sum loss: 5594.717773, avg loss: 3.136053, ppl: 23.012846 +epoch: 0, batch: 35298, sum loss: 4227.115234, avg loss: 3.006483, ppl: 20.216177 +epoch: 0, batch: 35299, sum loss: 5589.388184, avg loss: 2.965193, ppl: 19.398443 +epoch: 0, batch: 35300, sum loss: 4562.206055, avg loss: 2.807511, ppl: 16.568632 +epoch: 0, batch: 35301, sum loss: 3648.467041, avg loss: 2.551376, ppl: 12.824734 +epoch: 0, batch: 35302, sum loss: 6025.630859, avg loss: 2.971218, ppl: 19.515682 +epoch: 0, batch: 35303, sum loss: 4442.064941, avg loss: 2.799033, ppl: 16.428751 +epoch: 0, batch: 35304, sum loss: 4616.215820, avg loss: 2.739594, ppl: 15.480699 +epoch: 0, batch: 35305, sum loss: 4583.875488, avg loss: 2.953528, ppl: 19.173481 +epoch: 0, batch: 35306, sum loss: 4684.225586, avg loss: 2.964700, ppl: 19.388882 +epoch: 0, batch: 35307, sum loss: 5611.442383, avg loss: 3.088301, ppl: 21.939764 +epoch: 0, batch: 35308, sum loss: 5411.578125, avg loss: 3.021540, ppl: 20.522871 +epoch: 0, batch: 35309, sum loss: 4404.256348, avg loss: 2.747509, ppl: 15.603707 +epoch: 0, batch: 35310, sum loss: 4153.803223, avg loss: 2.908826, ppl: 18.335258 +epoch: 0, batch: 35311, sum loss: 4611.900391, avg loss: 2.884240, ppl: 17.889973 +epoch: 0, batch: 35312, sum loss: 4813.047363, avg loss: 2.822902, ppl: 16.825603 +epoch: 0, batch: 35313, sum loss: 5643.242676, avg loss: 3.126450, ppl: 22.792927 +epoch: 0, batch: 35314, sum loss: 4948.841797, avg loss: 2.963378, ppl: 19.363279 +epoch: 0, batch: 35315, sum loss: 4225.974609, avg loss: 2.932668, ppl: 18.777666 +epoch: 0, batch: 35316, sum loss: 4365.694824, avg loss: 2.786021, ppl: 16.216370 +epoch: 0, batch: 35317, sum loss: 4981.503418, avg loss: 2.817593, ppl: 16.736511 +epoch: 0, batch: 35318, sum loss: 5338.342285, avg loss: 2.972351, ppl: 19.537800 +epoch: 0, batch: 35319, sum loss: 5335.271973, avg loss: 2.946037, ppl: 19.030378 +epoch: 0, batch: 35320, sum loss: 4397.032227, avg loss: 2.775904, ppl: 16.053135 +epoch: 0, batch: 35321, sum loss: 5152.458008, avg loss: 2.761231, ppl: 15.819311 +epoch: 0, batch: 35322, sum loss: 5163.196777, avg loss: 2.860497, ppl: 17.470203 +epoch: 0, batch: 35323, sum loss: 4756.637695, avg loss: 2.952600, ppl: 19.155685 +epoch: 0, batch: 35324, sum loss: 4879.648438, avg loss: 2.962749, ppl: 19.351105 +epoch: 0, batch: 35325, sum loss: 5370.440430, avg loss: 3.079381, ppl: 21.744932 +epoch: 0, batch: 35326, sum loss: 5715.258789, avg loss: 3.141979, ppl: 23.149624 +epoch: 0, batch: 35327, sum loss: 4853.014160, avg loss: 2.873306, ppl: 17.695427 +epoch: 0, batch: 35328, sum loss: 5022.274414, avg loss: 2.935286, ppl: 18.826893 +epoch: 0, batch: 35329, sum loss: 5717.720703, avg loss: 3.082329, ppl: 21.809143 +epoch: 0, batch: 35330, sum loss: 5815.323730, avg loss: 3.113128, ppl: 22.491291 +epoch: 0, batch: 35331, sum loss: 4416.652344, avg loss: 2.877298, ppl: 17.766201 +epoch: 0, batch: 35332, sum loss: 6000.426270, avg loss: 3.257560, ppl: 25.986067 +epoch: 0, batch: 35333, sum loss: 3255.227783, avg loss: 2.423848, ppl: 11.289216 +epoch: 0, batch: 35334, sum loss: 3994.066895, avg loss: 2.706007, ppl: 14.969391 +epoch: 0, batch: 35335, sum loss: 4785.143555, avg loss: 2.831446, ppl: 16.969980 +epoch: 0, batch: 35336, sum loss: 3661.659668, avg loss: 2.587745, ppl: 13.299749 +epoch: 0, batch: 35337, sum loss: 4649.752930, avg loss: 2.784283, ppl: 16.188210 +epoch: 0, batch: 35338, sum loss: 5004.486816, avg loss: 2.872840, ppl: 17.687174 +epoch: 0, batch: 35339, sum loss: 5231.604492, avg loss: 2.808161, ppl: 16.579405 +epoch: 0, batch: 35340, sum loss: 4293.471191, avg loss: 2.500566, ppl: 12.189388 +epoch: 0, batch: 35341, sum loss: 4320.941406, avg loss: 2.590492, ppl: 13.336338 +epoch: 0, batch: 35342, sum loss: 5244.027344, avg loss: 3.043545, ppl: 20.979473 +epoch: 0, batch: 35343, sum loss: 5708.400879, avg loss: 3.099023, ppl: 22.176283 +epoch: 0, batch: 35344, sum loss: 4424.857422, avg loss: 2.895849, ppl: 18.098866 +epoch: 0, batch: 35345, sum loss: 5301.053223, avg loss: 2.968115, ppl: 19.455214 +epoch: 0, batch: 35346, sum loss: 4127.939453, avg loss: 2.854730, ppl: 17.369745 +epoch: 0, batch: 35347, sum loss: 5959.331055, avg loss: 3.074990, ppl: 21.649672 +epoch: 0, batch: 35348, sum loss: 4985.938965, avg loss: 2.893754, ppl: 18.060991 +epoch: 0, batch: 35349, sum loss: 5110.547852, avg loss: 3.146889, ppl: 23.263578 +epoch: 0, batch: 35350, sum loss: 4985.274414, avg loss: 2.819726, ppl: 16.772245 +epoch: 0, batch: 35351, sum loss: 5791.355957, avg loss: 3.021052, ppl: 20.512857 +epoch: 0, batch: 35352, sum loss: 4520.841309, avg loss: 2.914791, ppl: 18.444960 +epoch: 0, batch: 35353, sum loss: 4996.187988, avg loss: 3.009752, ppl: 20.282366 +epoch: 0, batch: 35354, sum loss: 5080.458008, avg loss: 2.962366, ppl: 19.343687 +epoch: 0, batch: 35355, sum loss: 5991.064453, avg loss: 3.021213, ppl: 20.516153 +epoch: 0, batch: 35356, sum loss: 5703.638184, avg loss: 3.186390, ppl: 24.200903 +epoch: 0, batch: 35357, sum loss: 4639.206055, avg loss: 2.890471, ppl: 18.001785 +epoch: 0, batch: 35358, sum loss: 4509.078613, avg loss: 2.935598, ppl: 18.832764 +epoch: 0, batch: 35359, sum loss: 5623.943359, avg loss: 3.073193, ppl: 21.610802 +epoch: 0, batch: 35360, sum loss: 4271.222656, avg loss: 2.710167, ppl: 15.031781 +epoch: 0, batch: 35361, sum loss: 5473.970215, avg loss: 2.910138, ppl: 18.359339 +epoch: 0, batch: 35362, sum loss: 5876.402832, avg loss: 3.237688, ppl: 25.474745 +epoch: 0, batch: 35363, sum loss: 3834.194336, avg loss: 2.602983, ppl: 13.503966 +epoch: 0, batch: 35364, sum loss: 5279.282227, avg loss: 2.991095, ppl: 19.907465 +epoch: 0, batch: 35365, sum loss: 4370.982910, avg loss: 2.929613, ppl: 18.720390 +epoch: 0, batch: 35366, sum loss: 5460.687988, avg loss: 3.233089, ppl: 25.357878 +epoch: 0, batch: 35367, sum loss: 4431.785156, avg loss: 2.808482, ppl: 16.584730 +epoch: 0, batch: 35368, sum loss: 5558.780273, avg loss: 3.149451, ppl: 23.323246 +epoch: 0, batch: 35369, sum loss: 4618.271484, avg loss: 2.979530, ppl: 19.678568 +epoch: 0, batch: 35370, sum loss: 5484.871582, avg loss: 3.093554, ppl: 22.055328 +epoch: 0, batch: 35371, sum loss: 4605.035645, avg loss: 2.745996, ppl: 15.580128 +epoch: 0, batch: 35372, sum loss: 4757.962891, avg loss: 2.960773, ppl: 19.312904 +epoch: 0, batch: 35373, sum loss: 4454.960449, avg loss: 2.591600, ppl: 13.351119 +epoch: 0, batch: 35374, sum loss: 4639.408691, avg loss: 2.917867, ppl: 18.501780 +epoch: 0, batch: 35375, sum loss: 5015.417969, avg loss: 2.895738, ppl: 18.096849 +epoch: 0, batch: 35376, sum loss: 5228.060547, avg loss: 3.036040, ppl: 20.822618 +epoch: 0, batch: 35377, sum loss: 4825.483398, avg loss: 2.912181, ppl: 18.396872 +epoch: 0, batch: 35378, sum loss: 4841.582031, avg loss: 3.103578, ppl: 22.277525 +epoch: 0, batch: 35379, sum loss: 5234.493652, avg loss: 3.027469, ppl: 20.644913 +epoch: 0, batch: 35380, sum loss: 5199.898438, avg loss: 3.009201, ppl: 20.271189 +epoch: 0, batch: 35381, sum loss: 5914.294922, avg loss: 3.307771, ppl: 27.324158 +epoch: 0, batch: 35382, sum loss: 5955.448242, avg loss: 3.046265, ppl: 21.036629 +epoch: 0, batch: 35383, sum loss: 5278.239258, avg loss: 3.021316, ppl: 20.518276 +epoch: 0, batch: 35384, sum loss: 4460.002930, avg loss: 2.835348, ppl: 17.036335 +epoch: 0, batch: 35385, sum loss: 6128.577637, avg loss: 3.021981, ppl: 20.531931 +epoch: 0, batch: 35386, sum loss: 5664.688477, avg loss: 3.178838, ppl: 24.018822 +epoch: 0, batch: 35387, sum loss: 3873.625488, avg loss: 2.591054, ppl: 13.343825 +epoch: 0, batch: 35388, sum loss: 4696.963867, avg loss: 2.838045, ppl: 17.082331 +epoch: 0, batch: 35389, sum loss: 5335.212891, avg loss: 3.175722, ppl: 23.944105 +epoch: 0, batch: 35390, sum loss: 4606.202148, avg loss: 2.803531, ppl: 16.502823 +epoch: 0, batch: 35391, sum loss: 4416.217285, avg loss: 2.763590, ppl: 15.856671 +epoch: 0, batch: 35392, sum loss: 5202.335938, avg loss: 2.858427, ppl: 17.434074 +epoch: 0, batch: 35393, sum loss: 4009.531738, avg loss: 2.813706, ppl: 16.671595 +epoch: 0, batch: 35394, sum loss: 4643.656250, avg loss: 2.959628, ppl: 19.290792 +epoch: 0, batch: 35395, sum loss: 4053.072021, avg loss: 2.650799, ppl: 14.165356 +epoch: 0, batch: 35396, sum loss: 4708.949707, avg loss: 2.855640, ppl: 17.385555 +epoch: 0, batch: 35397, sum loss: 5974.081543, avg loss: 3.122886, ppl: 22.711836 +epoch: 0, batch: 35398, sum loss: 6295.819336, avg loss: 3.151061, ppl: 23.360828 +epoch: 0, batch: 35399, sum loss: 5471.097656, avg loss: 2.849530, ppl: 17.279663 +epoch: 0, batch: 35400, sum loss: 4561.526855, avg loss: 2.905431, ppl: 18.273123 +epoch: 0, batch: 35401, sum loss: 4035.225098, avg loss: 2.827768, ppl: 16.907686 +epoch: 0, batch: 35402, sum loss: 4243.077637, avg loss: 2.764220, ppl: 15.866659 +epoch: 0, batch: 35403, sum loss: 5636.082031, avg loss: 3.124214, ppl: 22.742012 +epoch: 0, batch: 35404, sum loss: 4211.646973, avg loss: 2.455771, ppl: 11.655414 +epoch: 0, batch: 35405, sum loss: 4302.957031, avg loss: 2.779688, ppl: 16.113995 +epoch: 0, batch: 35406, sum loss: 4828.918457, avg loss: 2.815696, ppl: 16.704794 +epoch: 0, batch: 35407, sum loss: 4334.484375, avg loss: 2.944623, ppl: 19.003487 +epoch: 0, batch: 35408, sum loss: 4904.577637, avg loss: 3.108097, ppl: 22.378426 +epoch: 0, batch: 35409, sum loss: 4503.256348, avg loss: 2.916617, ppl: 18.478666 +epoch: 0, batch: 35410, sum loss: 5339.371094, avg loss: 3.042377, ppl: 20.954985 +epoch: 0, batch: 35411, sum loss: 4426.207031, avg loss: 2.933206, ppl: 18.787760 +epoch: 0, batch: 35412, sum loss: 4449.009766, avg loss: 2.782370, ppl: 16.157270 +epoch: 0, batch: 35413, sum loss: 4932.492676, avg loss: 2.913463, ppl: 18.420481 +epoch: 0, batch: 35414, sum loss: 4827.525391, avg loss: 2.863301, ppl: 17.519264 +epoch: 0, batch: 35415, sum loss: 4800.742188, avg loss: 2.886796, ppl: 17.935755 +epoch: 0, batch: 35416, sum loss: 6110.325684, avg loss: 3.117513, ppl: 22.590126 +epoch: 0, batch: 35417, sum loss: 4695.724609, avg loss: 2.703353, ppl: 14.929713 +epoch: 0, batch: 35418, sum loss: 5247.859863, avg loss: 2.909013, ppl: 18.338694 +epoch: 0, batch: 35419, sum loss: 5423.701172, avg loss: 2.955696, ppl: 19.215084 +epoch: 0, batch: 35420, sum loss: 4833.616699, avg loss: 2.906564, ppl: 18.293842 +epoch: 0, batch: 35421, sum loss: 4096.490234, avg loss: 2.712908, ppl: 15.073037 +epoch: 0, batch: 35422, sum loss: 4740.332031, avg loss: 2.867714, ppl: 17.596754 +epoch: 0, batch: 35423, sum loss: 4815.297852, avg loss: 3.002056, ppl: 20.126877 +epoch: 0, batch: 35424, sum loss: 4701.809082, avg loss: 2.863465, ppl: 17.522133 +epoch: 0, batch: 35425, sum loss: 5111.406738, avg loss: 2.947755, ppl: 19.063101 +epoch: 0, batch: 35426, sum loss: 6259.614258, avg loss: 3.275570, ppl: 26.458300 +epoch: 0, batch: 35427, sum loss: 5708.739258, avg loss: 2.933576, ppl: 18.794727 +epoch: 0, batch: 35428, sum loss: 5265.937500, avg loss: 2.914188, ppl: 18.433832 +epoch: 0, batch: 35429, sum loss: 4125.333984, avg loss: 2.843097, ppl: 17.168859 +epoch: 0, batch: 35430, sum loss: 4856.748047, avg loss: 2.899551, ppl: 18.165989 +epoch: 0, batch: 35431, sum loss: 4906.173828, avg loss: 2.930809, ppl: 18.742788 +epoch: 0, batch: 35432, sum loss: 5144.192383, avg loss: 3.056561, ppl: 21.254337 +epoch: 0, batch: 35433, sum loss: 5304.172363, avg loss: 2.761152, ppl: 15.818051 +epoch: 0, batch: 35434, sum loss: 5117.745605, avg loss: 2.857479, ppl: 17.417568 +epoch: 0, batch: 35435, sum loss: 4072.752441, avg loss: 2.731558, ppl: 15.356791 +epoch: 0, batch: 35436, sum loss: 3536.387695, avg loss: 2.654946, ppl: 14.224216 +epoch: 0, batch: 35437, sum loss: 4129.983398, avg loss: 2.546229, ppl: 12.758898 +epoch: 0, batch: 35438, sum loss: 4742.285645, avg loss: 2.671710, ppl: 14.464683 +epoch: 0, batch: 35439, sum loss: 4139.633301, avg loss: 2.736043, ppl: 15.425827 +epoch: 0, batch: 35440, sum loss: 5343.308105, avg loss: 2.868120, ppl: 17.603895 +epoch: 0, batch: 35441, sum loss: 4568.066895, avg loss: 2.930126, ppl: 18.729998 +epoch: 0, batch: 35442, sum loss: 4463.001953, avg loss: 2.753240, ppl: 15.693394 +epoch: 0, batch: 35443, sum loss: 5287.654785, avg loss: 3.097630, ppl: 22.145411 +epoch: 0, batch: 35444, sum loss: 6285.197754, avg loss: 3.205098, ppl: 24.657927 +epoch: 0, batch: 35445, sum loss: 4662.353027, avg loss: 2.744175, ppl: 15.551778 +epoch: 0, batch: 35446, sum loss: 4693.151367, avg loss: 2.879234, ppl: 17.800631 +epoch: 0, batch: 35447, sum loss: 4671.745117, avg loss: 3.133297, ppl: 22.949512 +epoch: 0, batch: 35448, sum loss: 5212.271484, avg loss: 2.903773, ppl: 18.242838 +epoch: 0, batch: 35449, sum loss: 4914.552734, avg loss: 2.920115, ppl: 18.543411 +epoch: 0, batch: 35450, sum loss: 4947.000000, avg loss: 2.876163, ppl: 17.746046 +epoch: 0, batch: 35451, sum loss: 5245.042480, avg loss: 2.800343, ppl: 16.450289 +epoch: 0, batch: 35452, sum loss: 5164.545898, avg loss: 3.039757, ppl: 20.900154 +epoch: 0, batch: 35453, sum loss: 5390.446289, avg loss: 3.241399, ppl: 25.569464 +epoch: 0, batch: 35454, sum loss: 5464.216309, avg loss: 3.010588, ppl: 20.299326 +epoch: 0, batch: 35455, sum loss: 5941.760742, avg loss: 3.252195, ppl: 25.847021 +epoch: 0, batch: 35456, sum loss: 4757.784180, avg loss: 2.878272, ppl: 17.783522 +epoch: 0, batch: 35457, sum loss: 4629.472168, avg loss: 2.836686, ppl: 17.059145 +epoch: 0, batch: 35458, sum loss: 5068.588379, avg loss: 3.134563, ppl: 22.978598 +epoch: 0, batch: 35459, sum loss: 4599.214355, avg loss: 3.002098, ppl: 20.127722 +epoch: 0, batch: 35460, sum loss: 4773.632812, avg loss: 2.990998, ppl: 19.905539 +epoch: 0, batch: 35461, sum loss: 4803.286133, avg loss: 2.817177, ppl: 16.729553 +epoch: 0, batch: 35462, sum loss: 5624.489746, avg loss: 2.986983, ppl: 19.825785 +epoch: 0, batch: 35463, sum loss: 5198.750977, avg loss: 2.991226, ppl: 19.910082 +epoch: 0, batch: 35464, sum loss: 5090.255371, avg loss: 3.044411, ppl: 20.997663 +epoch: 0, batch: 35465, sum loss: 4858.321289, avg loss: 2.776184, ppl: 16.057621 +epoch: 0, batch: 35466, sum loss: 4799.695801, avg loss: 2.810126, ppl: 16.612017 +epoch: 0, batch: 35467, sum loss: 4369.449707, avg loss: 2.793766, ppl: 16.342445 +epoch: 0, batch: 35468, sum loss: 6003.830078, avg loss: 3.096354, ppl: 22.117165 +epoch: 0, batch: 35469, sum loss: 5714.574219, avg loss: 3.162465, ppl: 23.628765 +epoch: 0, batch: 35470, sum loss: 4525.251953, avg loss: 2.973228, ppl: 19.554932 +epoch: 0, batch: 35471, sum loss: 5376.170898, avg loss: 2.950698, ppl: 19.119287 +epoch: 0, batch: 35472, sum loss: 5283.279297, avg loss: 2.845061, ppl: 17.202614 +epoch: 0, batch: 35473, sum loss: 4875.783203, avg loss: 2.956812, ppl: 19.236551 +epoch: 0, batch: 35474, sum loss: 5661.693359, avg loss: 3.060375, ppl: 21.335556 +epoch: 0, batch: 35475, sum loss: 4555.370605, avg loss: 2.991051, ppl: 19.906593 +epoch: 0, batch: 35476, sum loss: 5028.252441, avg loss: 2.876575, ppl: 17.753359 +epoch: 0, batch: 35477, sum loss: 4777.040039, avg loss: 2.800141, ppl: 16.446959 +epoch: 0, batch: 35478, sum loss: 4752.240234, avg loss: 3.052177, ppl: 21.161371 +epoch: 0, batch: 35479, sum loss: 4460.353516, avg loss: 2.755005, ppl: 15.721126 +epoch: 0, batch: 35480, sum loss: 4530.559082, avg loss: 2.909800, ppl: 18.353132 +epoch: 0, batch: 35481, sum loss: 6266.732422, avg loss: 3.071928, ppl: 21.583466 +epoch: 0, batch: 35482, sum loss: 4975.470215, avg loss: 2.970430, ppl: 19.500305 +epoch: 0, batch: 35483, sum loss: 6744.630859, avg loss: 3.298108, ppl: 27.061394 +epoch: 0, batch: 35484, sum loss: 5048.722656, avg loss: 2.914967, ppl: 18.448200 +epoch: 0, batch: 35485, sum loss: 5376.008789, avg loss: 3.052816, ppl: 21.174887 +epoch: 0, batch: 35486, sum loss: 4822.781250, avg loss: 2.905290, ppl: 18.270540 +epoch: 0, batch: 35487, sum loss: 5807.659668, avg loss: 3.045443, ppl: 21.019342 +epoch: 0, batch: 35488, sum loss: 4472.261719, avg loss: 2.900299, ppl: 18.179586 +epoch: 0, batch: 35489, sum loss: 4719.265625, avg loss: 2.979334, ppl: 19.674715 +epoch: 0, batch: 35490, sum loss: 5721.488281, avg loss: 3.154073, ppl: 23.431307 +epoch: 0, batch: 35491, sum loss: 5248.953125, avg loss: 2.932376, ppl: 18.772179 +epoch: 0, batch: 35492, sum loss: 5456.706543, avg loss: 3.154166, ppl: 23.433474 +epoch: 0, batch: 35493, sum loss: 4229.925293, avg loss: 2.797570, ppl: 16.404730 +epoch: 0, batch: 35494, sum loss: 4711.297852, avg loss: 2.924456, ppl: 18.624084 +epoch: 0, batch: 35495, sum loss: 5156.839844, avg loss: 2.943402, ppl: 18.980305 +epoch: 0, batch: 35496, sum loss: 3938.005127, avg loss: 2.555487, ppl: 12.877568 +epoch: 0, batch: 35497, sum loss: 5233.570312, avg loss: 2.973619, ppl: 19.562597 +epoch: 0, batch: 35498, sum loss: 4088.077881, avg loss: 2.642584, ppl: 14.049465 +epoch: 0, batch: 35499, sum loss: 5312.984863, avg loss: 2.919223, ppl: 18.526878 +epoch: 0, batch: 35500, sum loss: 4836.279297, avg loss: 2.730818, ppl: 15.345439 +epoch: 0, batch: 35501, sum loss: 5294.375000, avg loss: 3.055034, ppl: 21.221916 +epoch: 0, batch: 35502, sum loss: 5584.133301, avg loss: 3.172803, ppl: 23.874308 +epoch: 0, batch: 35503, sum loss: 4051.668701, avg loss: 2.702914, ppl: 14.923162 +epoch: 0, batch: 35504, sum loss: 5634.601562, avg loss: 3.133816, ppl: 22.961433 +epoch: 0, batch: 35505, sum loss: 5650.811035, avg loss: 2.996188, ppl: 20.009125 +epoch: 0, batch: 35506, sum loss: 4973.075195, avg loss: 2.920185, ppl: 18.544720 +epoch: 0, batch: 35507, sum loss: 4630.093262, avg loss: 2.823228, ppl: 16.831087 +epoch: 0, batch: 35508, sum loss: 5268.988281, avg loss: 2.973470, ppl: 19.559669 +epoch: 0, batch: 35509, sum loss: 5297.472656, avg loss: 3.015067, ppl: 20.390453 +epoch: 0, batch: 35510, sum loss: 4373.989258, avg loss: 2.660577, ppl: 14.304544 +epoch: 0, batch: 35511, sum loss: 5273.710938, avg loss: 3.093086, ppl: 22.044998 +epoch: 0, batch: 35512, sum loss: 4783.635254, avg loss: 2.670930, ppl: 14.453400 +epoch: 0, batch: 35513, sum loss: 5192.746094, avg loss: 2.958830, ppl: 19.275400 +epoch: 0, batch: 35514, sum loss: 5401.088867, avg loss: 3.165937, ppl: 23.710955 +epoch: 0, batch: 35515, sum loss: 4629.464844, avg loss: 2.792198, ppl: 16.316853 +epoch: 0, batch: 35516, sum loss: 5776.500000, avg loss: 3.062832, ppl: 21.388035 +epoch: 0, batch: 35517, sum loss: 4271.854492, avg loss: 2.766745, ppl: 15.906778 +epoch: 0, batch: 35518, sum loss: 6115.774902, avg loss: 3.139515, ppl: 23.092663 +epoch: 0, batch: 35519, sum loss: 6144.264648, avg loss: 3.289221, ppl: 26.821957 +epoch: 0, batch: 35520, sum loss: 4478.172363, avg loss: 2.892876, ppl: 18.045139 +epoch: 0, batch: 35521, sum loss: 5400.542480, avg loss: 3.134383, ppl: 22.974466 +epoch: 0, batch: 35522, sum loss: 4731.681641, avg loss: 2.814802, ppl: 16.689865 +epoch: 0, batch: 35523, sum loss: 5363.219238, avg loss: 2.956571, ppl: 19.231909 +epoch: 0, batch: 35524, sum loss: 5138.158691, avg loss: 2.880134, ppl: 17.816658 +epoch: 0, batch: 35525, sum loss: 4314.052246, avg loss: 2.740821, ppl: 15.499703 +epoch: 0, batch: 35526, sum loss: 4162.068848, avg loss: 2.800854, ppl: 16.458693 +epoch: 0, batch: 35527, sum loss: 4327.588867, avg loss: 2.761703, ppl: 15.826777 +epoch: 0, batch: 35528, sum loss: 3998.720703, avg loss: 2.885080, ppl: 17.904997 +epoch: 0, batch: 35529, sum loss: 4386.500977, avg loss: 2.701047, ppl: 14.895326 +epoch: 0, batch: 35530, sum loss: 4488.625000, avg loss: 2.819488, ppl: 16.768265 +epoch: 0, batch: 35531, sum loss: 4397.953613, avg loss: 2.778240, ppl: 16.090672 +epoch: 0, batch: 35532, sum loss: 4399.960449, avg loss: 2.929401, ppl: 18.716415 +epoch: 0, batch: 35533, sum loss: 4961.452148, avg loss: 3.093175, ppl: 22.046959 +epoch: 0, batch: 35534, sum loss: 3424.058105, avg loss: 2.440526, ppl: 11.479080 +epoch: 0, batch: 35535, sum loss: 4695.603516, avg loss: 2.851004, ppl: 17.305145 +epoch: 0, batch: 35536, sum loss: 3655.434082, avg loss: 2.769268, ppl: 15.946961 +epoch: 0, batch: 35537, sum loss: 5423.347168, avg loss: 3.104377, ppl: 22.295330 +epoch: 0, batch: 35538, sum loss: 5239.337891, avg loss: 2.961751, ppl: 19.331797 +epoch: 0, batch: 35539, sum loss: 5791.662109, avg loss: 3.083952, ppl: 21.844566 +epoch: 0, batch: 35540, sum loss: 5193.019531, avg loss: 3.074612, ppl: 21.641481 +epoch: 0, batch: 35541, sum loss: 5813.892578, avg loss: 3.149454, ppl: 23.323336 +epoch: 0, batch: 35542, sum loss: 4667.664551, avg loss: 2.650576, ppl: 14.162195 +epoch: 0, batch: 35543, sum loss: 5121.188477, avg loss: 2.948295, ppl: 19.073408 +epoch: 0, batch: 35544, sum loss: 4851.717773, avg loss: 3.022877, ppl: 20.550329 +epoch: 0, batch: 35545, sum loss: 6198.040039, avg loss: 3.413018, ppl: 30.356709 +epoch: 0, batch: 35546, sum loss: 5832.375000, avg loss: 3.087546, ppl: 21.923220 +epoch: 0, batch: 35547, sum loss: 5100.752930, avg loss: 2.860770, ppl: 17.474981 +epoch: 0, batch: 35548, sum loss: 4615.992676, avg loss: 2.851138, ppl: 17.307468 +epoch: 0, batch: 35549, sum loss: 4490.354980, avg loss: 2.792509, ppl: 16.321926 +epoch: 0, batch: 35550, sum loss: 4477.741699, avg loss: 2.708858, ppl: 15.012115 +epoch: 0, batch: 35551, sum loss: 6091.852051, avg loss: 2.986202, ppl: 19.810295 +epoch: 0, batch: 35552, sum loss: 5640.078613, avg loss: 3.042114, ppl: 20.949474 +epoch: 0, batch: 35553, sum loss: 4928.206543, avg loss: 2.881992, ppl: 17.849796 +epoch: 0, batch: 35554, sum loss: 4520.988770, avg loss: 2.804584, ppl: 16.520195 +epoch: 0, batch: 35555, sum loss: 4936.064453, avg loss: 3.192797, ppl: 24.356462 +epoch: 0, batch: 35556, sum loss: 5104.464844, avg loss: 3.069432, ppl: 21.529665 +epoch: 0, batch: 35557, sum loss: 5193.703125, avg loss: 3.017840, ppl: 20.447081 +epoch: 0, batch: 35558, sum loss: 5926.944336, avg loss: 3.139271, ppl: 23.087036 +epoch: 0, batch: 35559, sum loss: 5383.531250, avg loss: 3.076303, ppl: 21.678120 +epoch: 0, batch: 35560, sum loss: 4969.678711, avg loss: 2.773258, ppl: 16.010719 +epoch: 0, batch: 35561, sum loss: 4760.033203, avg loss: 2.777149, ppl: 16.073130 +epoch: 0, batch: 35562, sum loss: 5432.367676, avg loss: 2.957196, ppl: 19.243927 +epoch: 0, batch: 35563, sum loss: 3842.482422, avg loss: 2.651817, ppl: 14.179778 +epoch: 0, batch: 35564, sum loss: 5003.706055, avg loss: 2.875693, ppl: 17.737713 +epoch: 0, batch: 35565, sum loss: 5318.578125, avg loss: 3.037452, ppl: 20.852045 +epoch: 0, batch: 35566, sum loss: 5144.704102, avg loss: 2.938152, ppl: 18.880924 +epoch: 0, batch: 35567, sum loss: 4856.925781, avg loss: 2.733217, ppl: 15.382285 +epoch: 0, batch: 35568, sum loss: 5963.810547, avg loss: 3.125687, ppl: 22.775528 +epoch: 0, batch: 35569, sum loss: 5625.205566, avg loss: 3.094172, ppl: 22.068968 +epoch: 0, batch: 35570, sum loss: 4822.639648, avg loss: 2.935264, ppl: 18.826479 +epoch: 0, batch: 35571, sum loss: 4939.636230, avg loss: 2.842138, ppl: 17.152403 +epoch: 0, batch: 35572, sum loss: 4805.769531, avg loss: 2.994249, ppl: 19.970354 +epoch: 0, batch: 35573, sum loss: 4722.121582, avg loss: 2.726398, ppl: 15.277761 +epoch: 0, batch: 35574, sum loss: 5247.529785, avg loss: 2.929944, ppl: 18.726578 +epoch: 0, batch: 35575, sum loss: 5423.704102, avg loss: 3.052169, ppl: 21.161190 +epoch: 0, batch: 35576, sum loss: 4216.372070, avg loss: 2.677062, ppl: 14.542298 +epoch: 0, batch: 35577, sum loss: 5603.651855, avg loss: 3.068813, ppl: 21.516344 +epoch: 0, batch: 35578, sum loss: 5471.773438, avg loss: 2.959315, ppl: 19.284758 +epoch: 0, batch: 35579, sum loss: 4358.658691, avg loss: 2.544459, ppl: 12.736338 +epoch: 0, batch: 35580, sum loss: 5112.472656, avg loss: 2.760514, ppl: 15.807970 +epoch: 0, batch: 35581, sum loss: 5581.394531, avg loss: 3.048277, ppl: 21.078993 +epoch: 0, batch: 35582, sum loss: 5644.819336, avg loss: 3.180180, ppl: 24.051079 +epoch: 0, batch: 35583, sum loss: 5574.347168, avg loss: 3.019690, ppl: 20.484936 +epoch: 0, batch: 35584, sum loss: 4771.186523, avg loss: 3.106241, ppl: 22.336927 +epoch: 0, batch: 35585, sum loss: 3329.518066, avg loss: 2.551355, ppl: 12.824468 +epoch: 0, batch: 35586, sum loss: 4314.484375, avg loss: 2.727234, ppl: 15.290533 +epoch: 0, batch: 35587, sum loss: 5618.019531, avg loss: 3.103878, ppl: 22.284214 +epoch: 0, batch: 35588, sum loss: 4841.003906, avg loss: 2.988274, ppl: 19.851391 +epoch: 0, batch: 35589, sum loss: 5053.612305, avg loss: 2.977968, ppl: 19.647861 +epoch: 0, batch: 35590, sum loss: 4911.694824, avg loss: 2.885837, ppl: 17.918560 +epoch: 0, batch: 35591, sum loss: 4599.831055, avg loss: 2.712165, ppl: 15.061844 +epoch: 0, batch: 35592, sum loss: 5639.323242, avg loss: 3.088348, ppl: 21.940800 +epoch: 0, batch: 35593, sum loss: 5410.621094, avg loss: 3.171525, ppl: 23.843807 +epoch: 0, batch: 35594, sum loss: 3939.476074, avg loss: 2.786051, ppl: 16.216854 +epoch: 0, batch: 35595, sum loss: 6402.219727, avg loss: 3.150699, ppl: 23.352375 +epoch: 0, batch: 35596, sum loss: 4524.903320, avg loss: 2.745694, ppl: 15.575415 +epoch: 0, batch: 35597, sum loss: 4360.174805, avg loss: 2.904847, ppl: 18.262444 +epoch: 0, batch: 35598, sum loss: 4187.983398, avg loss: 2.733671, ppl: 15.389273 +epoch: 0, batch: 35599, sum loss: 4884.806152, avg loss: 2.841656, ppl: 17.144129 +epoch: 0, batch: 35600, sum loss: 5438.474609, avg loss: 3.065656, ppl: 21.448536 +epoch: 0, batch: 35601, sum loss: 3991.056396, avg loss: 2.713159, ppl: 15.076822 +epoch: 0, batch: 35602, sum loss: 4562.312012, avg loss: 2.608526, ppl: 13.579021 +epoch: 0, batch: 35603, sum loss: 5243.258301, avg loss: 3.187391, ppl: 24.225138 +epoch: 0, batch: 35604, sum loss: 4796.211426, avg loss: 3.068593, ppl: 21.511620 +epoch: 0, batch: 35605, sum loss: 4166.504395, avg loss: 2.735722, ppl: 15.420871 +epoch: 0, batch: 35606, sum loss: 5303.644531, avg loss: 3.210439, ppl: 24.789959 +epoch: 0, batch: 35607, sum loss: 5883.516602, avg loss: 3.199302, ppl: 24.515417 +epoch: 0, batch: 35608, sum loss: 4906.599609, avg loss: 3.026897, ppl: 20.633102 +epoch: 0, batch: 35609, sum loss: 5093.027344, avg loss: 2.918640, ppl: 18.516090 +epoch: 0, batch: 35610, sum loss: 5124.744141, avg loss: 2.910133, ppl: 18.359238 +epoch: 0, batch: 35611, sum loss: 5198.345215, avg loss: 3.105344, ppl: 22.316891 +epoch: 0, batch: 35612, sum loss: 3908.275391, avg loss: 2.809688, ppl: 16.604731 +epoch: 0, batch: 35613, sum loss: 4763.526367, avg loss: 2.823667, ppl: 16.838490 +epoch: 0, batch: 35614, sum loss: 3857.459229, avg loss: 2.733848, ppl: 15.391999 +epoch: 0, batch: 35615, sum loss: 4205.651855, avg loss: 2.555074, ppl: 12.872255 +epoch: 0, batch: 35616, sum loss: 5664.585449, avg loss: 2.956464, ppl: 19.229860 +epoch: 0, batch: 35617, sum loss: 5335.111328, avg loss: 2.724776, ppl: 15.252997 +epoch: 0, batch: 35618, sum loss: 5357.912598, avg loss: 2.822926, ppl: 16.826008 +epoch: 0, batch: 35619, sum loss: 4690.322266, avg loss: 2.709603, ppl: 15.023304 +epoch: 0, batch: 35620, sum loss: 5043.389648, avg loss: 2.937327, ppl: 18.865345 +epoch: 0, batch: 35621, sum loss: 5210.335938, avg loss: 2.909177, ppl: 18.341694 +epoch: 0, batch: 35622, sum loss: 5028.429199, avg loss: 2.765913, ppl: 15.893537 +epoch: 0, batch: 35623, sum loss: 5094.960938, avg loss: 2.974291, ppl: 19.575745 +epoch: 0, batch: 35624, sum loss: 5070.907227, avg loss: 2.970654, ppl: 19.504681 +epoch: 0, batch: 35625, sum loss: 5149.147949, avg loss: 2.905840, ppl: 18.280586 +epoch: 0, batch: 35626, sum loss: 5091.997070, avg loss: 3.065621, ppl: 21.447784 +epoch: 0, batch: 35627, sum loss: 5598.610840, avg loss: 2.993910, ppl: 19.963579 +epoch: 0, batch: 35628, sum loss: 5027.874512, avg loss: 2.971557, ppl: 19.522295 +epoch: 0, batch: 35629, sum loss: 4286.315430, avg loss: 2.700892, ppl: 14.893010 +epoch: 0, batch: 35630, sum loss: 4606.828613, avg loss: 2.701952, ppl: 14.908813 +epoch: 0, batch: 35631, sum loss: 6807.608887, avg loss: 3.102830, ppl: 22.260859 +epoch: 0, batch: 35632, sum loss: 5629.446289, avg loss: 2.999172, ppl: 20.068916 +epoch: 0, batch: 35633, sum loss: 4774.363281, avg loss: 2.695857, ppl: 14.818216 +epoch: 0, batch: 35634, sum loss: 4995.388672, avg loss: 2.935011, ppl: 18.821709 +epoch: 0, batch: 35635, sum loss: 5616.637207, avg loss: 3.062506, ppl: 21.381081 +epoch: 0, batch: 35636, sum loss: 4335.786133, avg loss: 2.630938, ppl: 13.886794 +epoch: 0, batch: 35637, sum loss: 4937.307617, avg loss: 2.858893, ppl: 17.442202 +epoch: 0, batch: 35638, sum loss: 4992.545898, avg loss: 2.889205, ppl: 17.979006 +epoch: 0, batch: 35639, sum loss: 6582.466309, avg loss: 3.192273, ppl: 24.343689 +epoch: 0, batch: 35640, sum loss: 4695.591309, avg loss: 2.706393, ppl: 14.975159 +epoch: 0, batch: 35641, sum loss: 4288.999023, avg loss: 2.687343, ppl: 14.692580 +epoch: 0, batch: 35642, sum loss: 4041.100830, avg loss: 2.536787, ppl: 12.638991 +epoch: 0, batch: 35643, sum loss: 5586.451172, avg loss: 3.177731, ppl: 23.992254 +epoch: 0, batch: 35644, sum loss: 4878.879395, avg loss: 2.974926, ppl: 19.588182 +epoch: 0, batch: 35645, sum loss: 4334.847168, avg loss: 2.802099, ppl: 16.479200 +epoch: 0, batch: 35646, sum loss: 4882.618164, avg loss: 2.767924, ppl: 15.925539 +epoch: 0, batch: 35647, sum loss: 5013.708496, avg loss: 2.864976, ppl: 17.548635 +epoch: 0, batch: 35648, sum loss: 4744.078125, avg loss: 2.875199, ppl: 17.728949 +epoch: 0, batch: 35649, sum loss: 5423.327148, avg loss: 3.012960, ppl: 20.347534 +epoch: 0, batch: 35650, sum loss: 4352.194824, avg loss: 2.872736, ppl: 17.685339 +epoch: 0, batch: 35651, sum loss: 6599.039551, avg loss: 3.145396, ppl: 23.228861 +epoch: 0, batch: 35652, sum loss: 4789.261719, avg loss: 2.857555, ppl: 17.418884 +epoch: 0, batch: 35653, sum loss: 3661.341553, avg loss: 2.706091, ppl: 14.970647 +epoch: 0, batch: 35654, sum loss: 4583.125488, avg loss: 2.834339, ppl: 17.019142 +epoch: 0, batch: 35655, sum loss: 5039.744141, avg loss: 3.078646, ppl: 21.728971 +epoch: 0, batch: 35656, sum loss: 4219.412598, avg loss: 2.614258, ppl: 13.657080 +epoch: 0, batch: 35657, sum loss: 5360.298828, avg loss: 2.685520, ppl: 14.665832 +epoch: 0, batch: 35658, sum loss: 5821.124512, avg loss: 3.246584, ppl: 25.702383 +epoch: 0, batch: 35659, sum loss: 3939.911865, avg loss: 2.484181, ppl: 11.991301 +epoch: 0, batch: 35660, sum loss: 4882.965820, avg loss: 2.804690, ppl: 16.521959 +epoch: 0, batch: 35661, sum loss: 6137.109375, avg loss: 3.490961, ppl: 32.817463 +epoch: 0, batch: 35662, sum loss: 4899.696289, avg loss: 2.801427, ppl: 16.468136 +epoch: 0, batch: 35663, sum loss: 4423.298828, avg loss: 2.659831, ppl: 14.293874 +epoch: 0, batch: 35664, sum loss: 5164.583984, avg loss: 3.009664, ppl: 20.280590 +epoch: 0, batch: 35665, sum loss: 5083.727539, avg loss: 2.773447, ppl: 16.013731 +epoch: 0, batch: 35666, sum loss: 5073.458984, avg loss: 3.039820, ppl: 20.901474 +epoch: 0, batch: 35667, sum loss: 4621.556641, avg loss: 2.874102, ppl: 17.709520 +epoch: 0, batch: 35668, sum loss: 5002.158203, avg loss: 3.053820, ppl: 21.196167 +epoch: 0, batch: 35669, sum loss: 5284.117188, avg loss: 3.104652, ppl: 22.301451 +epoch: 0, batch: 35670, sum loss: 4705.435059, avg loss: 2.909978, ppl: 18.356401 +epoch: 0, batch: 35671, sum loss: 4620.453125, avg loss: 2.812205, ppl: 16.646585 +epoch: 0, batch: 35672, sum loss: 5519.708496, avg loss: 3.154119, ppl: 23.432384 +epoch: 0, batch: 35673, sum loss: 4177.290039, avg loss: 2.902912, ppl: 18.227139 +epoch: 0, batch: 35674, sum loss: 4971.605957, avg loss: 2.872100, ppl: 17.674101 +epoch: 0, batch: 35675, sum loss: 4457.382812, avg loss: 2.842719, ppl: 17.162359 +epoch: 0, batch: 35676, sum loss: 4578.771973, avg loss: 2.824659, ppl: 16.855190 +epoch: 0, batch: 35677, sum loss: 4863.513184, avg loss: 2.956543, ppl: 19.231369 +epoch: 0, batch: 35678, sum loss: 3933.497559, avg loss: 2.647037, ppl: 14.112166 +epoch: 0, batch: 35679, sum loss: 4785.245117, avg loss: 3.017179, ppl: 20.433558 +epoch: 0, batch: 35680, sum loss: 4812.381836, avg loss: 2.876498, ppl: 17.752005 +epoch: 0, batch: 35681, sum loss: 5075.144531, avg loss: 2.940408, ppl: 18.923565 +epoch: 0, batch: 35682, sum loss: 5889.802246, avg loss: 3.144582, ppl: 23.209978 +epoch: 0, batch: 35683, sum loss: 6090.984375, avg loss: 3.028834, ppl: 20.673111 +epoch: 0, batch: 35684, sum loss: 5163.207031, avg loss: 2.905575, ppl: 18.275749 +epoch: 0, batch: 35685, sum loss: 4445.609375, avg loss: 2.622778, ppl: 13.773940 +epoch: 0, batch: 35686, sum loss: 5205.147949, avg loss: 2.954114, ppl: 19.184711 +epoch: 0, batch: 35687, sum loss: 4201.540527, avg loss: 2.767813, ppl: 15.923774 +epoch: 0, batch: 35688, sum loss: 5492.305664, avg loss: 3.080374, ppl: 21.766531 +epoch: 0, batch: 35689, sum loss: 6270.505859, avg loss: 3.213996, ppl: 24.878300 +epoch: 0, batch: 35690, sum loss: 4639.923340, avg loss: 2.827497, ppl: 16.903107 +epoch: 0, batch: 35691, sum loss: 5004.610352, avg loss: 2.808424, ppl: 16.583754 +epoch: 0, batch: 35692, sum loss: 4476.612305, avg loss: 2.984408, ppl: 19.774796 +epoch: 0, batch: 35693, sum loss: 4726.518066, avg loss: 2.627303, ppl: 13.836401 +epoch: 0, batch: 35694, sum loss: 4575.157227, avg loss: 2.978618, ppl: 19.660629 +epoch: 0, batch: 35695, sum loss: 6531.463867, avg loss: 3.323900, ppl: 27.768442 +epoch: 0, batch: 35696, sum loss: 5861.630859, avg loss: 3.211853, ppl: 24.825033 +epoch: 0, batch: 35697, sum loss: 5753.645996, avg loss: 3.118507, ppl: 22.612597 +epoch: 0, batch: 35698, sum loss: 5368.528320, avg loss: 3.039937, ppl: 20.903921 +epoch: 0, batch: 35699, sum loss: 4060.396729, avg loss: 2.683673, ppl: 14.638758 +epoch: 0, batch: 35700, sum loss: 5046.898438, avg loss: 2.929134, ppl: 18.711426 +epoch: 0, batch: 35701, sum loss: 4464.671875, avg loss: 2.570335, ppl: 13.070204 +epoch: 0, batch: 35702, sum loss: 4340.467773, avg loss: 2.743658, ppl: 15.543745 +epoch: 0, batch: 35703, sum loss: 3572.440430, avg loss: 2.364289, ppl: 10.636472 +epoch: 0, batch: 35704, sum loss: 5684.199219, avg loss: 2.816749, ppl: 16.722395 +epoch: 0, batch: 35705, sum loss: 5348.701660, avg loss: 3.011657, ppl: 20.321033 +epoch: 0, batch: 35706, sum loss: 6271.438477, avg loss: 3.156235, ppl: 23.482008 +epoch: 0, batch: 35707, sum loss: 4965.701660, avg loss: 3.013169, ppl: 20.351784 +epoch: 0, batch: 35708, sum loss: 5374.839844, avg loss: 3.096106, ppl: 22.111677 +epoch: 0, batch: 35709, sum loss: 5559.691406, avg loss: 2.985871, ppl: 19.803741 +epoch: 0, batch: 35710, sum loss: 5558.061035, avg loss: 2.965881, ppl: 19.411795 +epoch: 0, batch: 35711, sum loss: 5765.329102, avg loss: 3.140157, ppl: 23.107506 +epoch: 0, batch: 35712, sum loss: 5856.700195, avg loss: 3.014256, ppl: 20.373938 +epoch: 0, batch: 35713, sum loss: 5769.743652, avg loss: 3.216134, ppl: 24.931538 +epoch: 0, batch: 35714, sum loss: 4314.024414, avg loss: 2.868367, ppl: 17.608244 +epoch: 0, batch: 35715, sum loss: 4620.519531, avg loss: 2.878828, ppl: 17.793411 +epoch: 0, batch: 35716, sum loss: 4762.436523, avg loss: 2.923534, ppl: 18.606922 +epoch: 0, batch: 35717, sum loss: 4572.559082, avg loss: 2.974990, ppl: 19.589420 +epoch: 0, batch: 35718, sum loss: 5878.828613, avg loss: 3.025645, ppl: 20.607298 +epoch: 0, batch: 35719, sum loss: 5407.704102, avg loss: 3.007622, ppl: 20.239208 +epoch: 0, batch: 35720, sum loss: 4331.911621, avg loss: 2.866917, ppl: 17.582726 +epoch: 0, batch: 35721, sum loss: 4914.930664, avg loss: 2.772099, ppl: 15.992159 +epoch: 0, batch: 35722, sum loss: 4969.654297, avg loss: 2.991965, ppl: 19.924803 +epoch: 0, batch: 35723, sum loss: 4687.140137, avg loss: 2.887948, ppl: 17.956430 +epoch: 0, batch: 35724, sum loss: 5468.410645, avg loss: 2.808634, ppl: 16.587250 +epoch: 0, batch: 35725, sum loss: 5965.739258, avg loss: 3.078297, ppl: 21.721376 +epoch: 0, batch: 35726, sum loss: 4608.437012, avg loss: 2.827262, ppl: 16.899126 +epoch: 0, batch: 35727, sum loss: 4228.340332, avg loss: 2.898108, ppl: 18.139801 +epoch: 0, batch: 35728, sum loss: 4911.530273, avg loss: 3.071626, ppl: 21.576962 +epoch: 0, batch: 35729, sum loss: 5183.209961, avg loss: 2.800221, ppl: 16.448288 +epoch: 0, batch: 35730, sum loss: 5433.661621, avg loss: 2.970838, ppl: 19.508253 +epoch: 0, batch: 35731, sum loss: 5667.566406, avg loss: 3.068526, ppl: 21.510164 +epoch: 0, batch: 35732, sum loss: 5025.680176, avg loss: 2.958023, ppl: 19.259850 +epoch: 0, batch: 35733, sum loss: 4437.252930, avg loss: 2.681120, ppl: 14.601433 +epoch: 0, batch: 35734, sum loss: 5055.963379, avg loss: 2.899061, ppl: 18.157095 +epoch: 0, batch: 35735, sum loss: 4471.045898, avg loss: 2.835159, ppl: 17.033110 +epoch: 0, batch: 35736, sum loss: 5602.445312, avg loss: 3.177791, ppl: 23.993690 +epoch: 0, batch: 35737, sum loss: 4849.442871, avg loss: 2.891737, ppl: 18.024591 +epoch: 0, batch: 35738, sum loss: 5205.762695, avg loss: 3.007373, ppl: 20.234173 +epoch: 0, batch: 35739, sum loss: 5252.758789, avg loss: 3.088042, ppl: 21.934078 +epoch: 0, batch: 35740, sum loss: 4894.374023, avg loss: 2.853862, ppl: 17.354681 +epoch: 0, batch: 35741, sum loss: 4871.236328, avg loss: 2.825543, ppl: 16.870110 +epoch: 0, batch: 35742, sum loss: 5627.982422, avg loss: 2.910022, ppl: 18.357199 +epoch: 0, batch: 35743, sum loss: 5533.371094, avg loss: 2.766686, ppl: 15.905830 +epoch: 0, batch: 35744, sum loss: 3964.959961, avg loss: 2.581354, ppl: 13.215021 +epoch: 0, batch: 35745, sum loss: 4298.548828, avg loss: 2.766119, ppl: 15.896818 +epoch: 0, batch: 35746, sum loss: 5422.494141, avg loss: 2.958262, ppl: 19.264460 +epoch: 0, batch: 35747, sum loss: 5134.715332, avg loss: 2.841569, ppl: 17.142641 +epoch: 0, batch: 35748, sum loss: 5295.309082, avg loss: 3.120394, ppl: 22.655315 +epoch: 0, batch: 35749, sum loss: 5661.927734, avg loss: 2.905043, ppl: 18.266022 +epoch: 0, batch: 35750, sum loss: 5523.978516, avg loss: 3.062072, ppl: 21.371799 +epoch: 0, batch: 35751, sum loss: 4002.629150, avg loss: 2.921627, ppl: 18.571484 +epoch: 0, batch: 35752, sum loss: 4991.664551, avg loss: 2.992605, ppl: 19.937548 +epoch: 0, batch: 35753, sum loss: 4752.212891, avg loss: 2.880129, ppl: 17.816572 +epoch: 0, batch: 35754, sum loss: 5557.470215, avg loss: 2.932702, ppl: 18.778301 +epoch: 0, batch: 35755, sum loss: 4597.186523, avg loss: 2.876838, ppl: 17.758028 +epoch: 0, batch: 35756, sum loss: 4892.530762, avg loss: 2.907029, ppl: 18.302349 +epoch: 0, batch: 35757, sum loss: 4167.449707, avg loss: 2.878073, ppl: 17.779982 +epoch: 0, batch: 35758, sum loss: 4713.215332, avg loss: 2.929282, ppl: 18.714197 +epoch: 0, batch: 35759, sum loss: 5989.895020, avg loss: 3.282134, ppl: 26.632553 +epoch: 0, batch: 35760, sum loss: 3946.770508, avg loss: 2.668540, ppl: 14.418899 +epoch: 0, batch: 35761, sum loss: 5935.656738, avg loss: 2.972287, ppl: 19.536547 +epoch: 0, batch: 35762, sum loss: 5197.286621, avg loss: 2.800262, ppl: 16.448959 +epoch: 0, batch: 35763, sum loss: 5103.452637, avg loss: 3.134799, ppl: 22.984011 +epoch: 0, batch: 35764, sum loss: 4422.169434, avg loss: 2.840186, ppl: 17.118948 +epoch: 0, batch: 35765, sum loss: 5779.361328, avg loss: 3.230498, ppl: 25.292252 +epoch: 0, batch: 35766, sum loss: 4918.112305, avg loss: 2.968082, ppl: 19.454573 +epoch: 0, batch: 35767, sum loss: 4099.886719, avg loss: 2.707983, ppl: 14.999000 +epoch: 0, batch: 35768, sum loss: 5396.333496, avg loss: 3.052225, ppl: 21.162376 +epoch: 0, batch: 35769, sum loss: 5612.220703, avg loss: 2.803307, ppl: 16.499121 +epoch: 0, batch: 35770, sum loss: 5395.312012, avg loss: 3.004071, ppl: 20.167480 +epoch: 0, batch: 35771, sum loss: 5557.499512, avg loss: 3.073838, ppl: 21.624744 +epoch: 0, batch: 35772, sum loss: 5662.304688, avg loss: 3.149224, ppl: 23.317959 +epoch: 0, batch: 35773, sum loss: 4306.254883, avg loss: 3.032574, ppl: 20.750574 +epoch: 0, batch: 35774, sum loss: 5609.224609, avg loss: 3.109326, ppl: 22.405947 +epoch: 0, batch: 35775, sum loss: 5638.188477, avg loss: 2.891379, ppl: 18.018133 +epoch: 0, batch: 35776, sum loss: 5223.239258, avg loss: 3.090674, ppl: 21.991899 +epoch: 0, batch: 35777, sum loss: 4709.531250, avg loss: 2.794974, ppl: 16.362206 +epoch: 0, batch: 35778, sum loss: 4885.965820, avg loss: 3.120029, ppl: 22.647041 +epoch: 0, batch: 35779, sum loss: 5035.357422, avg loss: 3.160928, ppl: 23.592468 +epoch: 0, batch: 35780, sum loss: 5214.511719, avg loss: 3.065557, ppl: 21.446394 +epoch: 0, batch: 35781, sum loss: 4603.367676, avg loss: 2.810359, ppl: 16.615875 +epoch: 0, batch: 35782, sum loss: 4818.242188, avg loss: 2.961427, ppl: 19.325533 +epoch: 0, batch: 35783, sum loss: 4644.495605, avg loss: 2.973429, ppl: 19.558867 +epoch: 0, batch: 35784, sum loss: 4673.679688, avg loss: 2.706242, ppl: 14.972899 +epoch: 0, batch: 35785, sum loss: 4875.222656, avg loss: 2.816420, ppl: 16.716890 +epoch: 0, batch: 35786, sum loss: 4847.313965, avg loss: 3.054388, ppl: 21.208208 +epoch: 0, batch: 35787, sum loss: 4502.652344, avg loss: 2.788020, ppl: 16.248817 +epoch: 0, batch: 35788, sum loss: 4059.141602, avg loss: 2.668732, ppl: 14.421674 +epoch: 0, batch: 35789, sum loss: 4818.251953, avg loss: 2.851037, ppl: 17.305710 +epoch: 0, batch: 35790, sum loss: 5387.493164, avg loss: 3.016514, ppl: 20.419975 +epoch: 0, batch: 35791, sum loss: 5542.585449, avg loss: 3.119069, ppl: 22.625303 +epoch: 0, batch: 35792, sum loss: 5247.708984, avg loss: 3.108832, ppl: 22.394880 +epoch: 0, batch: 35793, sum loss: 5255.318848, avg loss: 2.996191, ppl: 20.009172 +epoch: 0, batch: 35794, sum loss: 5635.506836, avg loss: 2.847654, ppl: 17.247271 +epoch: 0, batch: 35795, sum loss: 4911.526367, avg loss: 3.007671, ppl: 20.240198 +epoch: 0, batch: 35796, sum loss: 3949.115234, avg loss: 2.708584, ppl: 15.008007 +epoch: 0, batch: 35797, sum loss: 5628.243652, avg loss: 3.009756, ppl: 20.282452 +epoch: 0, batch: 35798, sum loss: 5776.739258, avg loss: 3.084218, ppl: 21.850363 +epoch: 0, batch: 35799, sum loss: 5301.020020, avg loss: 3.149745, ppl: 23.330103 +epoch: 0, batch: 35800, sum loss: 4734.525391, avg loss: 2.796530, ppl: 16.387688 +epoch: 0, batch: 35801, sum loss: 6920.290527, avg loss: 3.453239, ppl: 31.602579 +epoch: 0, batch: 35802, sum loss: 5061.402344, avg loss: 2.805655, ppl: 16.537912 +epoch: 0, batch: 35803, sum loss: 4403.177246, avg loss: 2.683228, ppl: 14.632250 +epoch: 0, batch: 35804, sum loss: 4737.873047, avg loss: 3.096649, ppl: 22.123693 +epoch: 0, batch: 35805, sum loss: 4391.881836, avg loss: 2.786727, ppl: 16.227818 +epoch: 0, batch: 35806, sum loss: 4562.736328, avg loss: 2.970531, ppl: 19.502281 +epoch: 0, batch: 35807, sum loss: 3661.954102, avg loss: 2.682750, ppl: 14.625260 +epoch: 0, batch: 35808, sum loss: 4796.775879, avg loss: 3.249848, ppl: 25.786423 +epoch: 0, batch: 35809, sum loss: 3915.151611, avg loss: 2.670635, ppl: 14.449148 +epoch: 0, batch: 35810, sum loss: 6558.715820, avg loss: 3.085003, ppl: 21.867525 +epoch: 0, batch: 35811, sum loss: 4482.856934, avg loss: 2.828301, ppl: 16.916695 +epoch: 0, batch: 35812, sum loss: 4140.423340, avg loss: 2.751112, ppl: 15.660036 +epoch: 0, batch: 35813, sum loss: 4309.639648, avg loss: 2.820445, ppl: 16.784315 +epoch: 0, batch: 35814, sum loss: 4361.081055, avg loss: 2.767183, ppl: 15.913747 +epoch: 0, batch: 35815, sum loss: 4496.324219, avg loss: 2.702118, ppl: 14.911279 +epoch: 0, batch: 35816, sum loss: 5743.953125, avg loss: 3.099813, ppl: 22.193794 +epoch: 0, batch: 35817, sum loss: 5750.358398, avg loss: 3.390542, ppl: 29.682022 +epoch: 0, batch: 35818, sum loss: 5807.228516, avg loss: 3.176821, ppl: 23.970430 +epoch: 0, batch: 35819, sum loss: 5175.073242, avg loss: 2.974180, ppl: 19.573566 +epoch: 0, batch: 35820, sum loss: 5670.519531, avg loss: 3.119098, ppl: 22.625956 +epoch: 0, batch: 35821, sum loss: 4189.881836, avg loss: 2.761952, ppl: 15.830721 +epoch: 0, batch: 35822, sum loss: 4912.724121, avg loss: 2.898362, ppl: 18.144407 +epoch: 0, batch: 35823, sum loss: 4831.705078, avg loss: 2.867481, ppl: 17.592642 +epoch: 0, batch: 35824, sum loss: 5352.140625, avg loss: 3.197217, ppl: 24.464344 +epoch: 0, batch: 35825, sum loss: 5535.216797, avg loss: 3.250274, ppl: 25.797413 +epoch: 0, batch: 35826, sum loss: 5569.623535, avg loss: 3.028615, ppl: 20.668596 +epoch: 0, batch: 35827, sum loss: 5467.869141, avg loss: 3.099699, ppl: 22.191271 +epoch: 0, batch: 35828, sum loss: 4163.540039, avg loss: 2.708874, ppl: 15.012369 +epoch: 0, batch: 35829, sum loss: 4954.098145, avg loss: 2.919327, ppl: 18.528818 +epoch: 0, batch: 35830, sum loss: 4216.834473, avg loss: 2.750707, ppl: 15.653701 +epoch: 0, batch: 35831, sum loss: 5271.907715, avg loss: 3.054408, ppl: 21.208618 +epoch: 0, batch: 35832, sum loss: 4286.936523, avg loss: 2.696187, ppl: 14.823096 +epoch: 0, batch: 35833, sum loss: 4459.952148, avg loss: 2.771878, ppl: 15.988636 +epoch: 0, batch: 35834, sum loss: 4526.562012, avg loss: 2.933611, ppl: 18.795385 +epoch: 0, batch: 35835, sum loss: 4474.360840, avg loss: 2.864508, ppl: 17.540415 +epoch: 0, batch: 35836, sum loss: 4910.207520, avg loss: 2.974081, ppl: 19.571629 +epoch: 0, batch: 35837, sum loss: 4793.979980, avg loss: 2.808424, ppl: 16.583765 +epoch: 0, batch: 35838, sum loss: 6095.636230, avg loss: 3.098951, ppl: 22.174669 +epoch: 0, batch: 35839, sum loss: 5593.529297, avg loss: 2.965816, ppl: 19.410542 +epoch: 0, batch: 35840, sum loss: 5640.342285, avg loss: 2.954606, ppl: 19.194155 +epoch: 0, batch: 35841, sum loss: 5343.562012, avg loss: 2.869797, ppl: 17.633438 +epoch: 0, batch: 35842, sum loss: 4509.484375, avg loss: 2.903725, ppl: 18.241964 +epoch: 0, batch: 35843, sum loss: 5550.072266, avg loss: 2.972722, ppl: 19.545050 +epoch: 0, batch: 35844, sum loss: 4177.876465, avg loss: 2.739591, ppl: 15.480654 +epoch: 0, batch: 35845, sum loss: 5152.498535, avg loss: 2.837279, ppl: 17.069254 +epoch: 0, batch: 35846, sum loss: 3952.531738, avg loss: 3.001163, ppl: 20.108910 +epoch: 0, batch: 35847, sum loss: 5572.654297, avg loss: 3.109740, ppl: 22.415222 +epoch: 0, batch: 35848, sum loss: 4808.110352, avg loss: 2.997575, ppl: 20.036890 +epoch: 0, batch: 35849, sum loss: 5038.840820, avg loss: 2.866235, ppl: 17.570736 +epoch: 0, batch: 35850, sum loss: 4886.366211, avg loss: 2.811488, ppl: 16.634655 +epoch: 0, batch: 35851, sum loss: 5237.033203, avg loss: 2.882242, ppl: 17.854252 +epoch: 0, batch: 35852, sum loss: 4007.363037, avg loss: 2.627779, ppl: 13.842991 +epoch: 0, batch: 35853, sum loss: 5028.031738, avg loss: 2.950723, ppl: 19.119772 +epoch: 0, batch: 35854, sum loss: 5471.385254, avg loss: 3.280207, ppl: 26.581278 +epoch: 0, batch: 35855, sum loss: 4078.494629, avg loss: 2.718996, ppl: 15.165093 +epoch: 0, batch: 35856, sum loss: 5796.821289, avg loss: 2.968163, ppl: 19.456137 +epoch: 0, batch: 35857, sum loss: 4502.145996, avg loss: 2.694283, ppl: 14.794900 +epoch: 0, batch: 35858, sum loss: 5056.625977, avg loss: 2.717155, ppl: 15.137202 +epoch: 0, batch: 35859, sum loss: 4443.438477, avg loss: 2.683236, ppl: 14.632362 +epoch: 0, batch: 35860, sum loss: 5183.318848, avg loss: 2.855823, ppl: 17.388742 +epoch: 0, batch: 35861, sum loss: 5095.008789, avg loss: 3.084146, ppl: 21.848795 +epoch: 0, batch: 35862, sum loss: 4769.307617, avg loss: 2.938575, ppl: 18.888916 +epoch: 0, batch: 35863, sum loss: 5998.226562, avg loss: 3.284900, ppl: 26.706306 +epoch: 0, batch: 35864, sum loss: 4857.516602, avg loss: 2.938607, ppl: 18.889505 +epoch: 0, batch: 35865, sum loss: 4617.351074, avg loss: 2.829259, ppl: 16.932915 +epoch: 0, batch: 35866, sum loss: 5953.504395, avg loss: 3.180291, ppl: 24.053751 +epoch: 0, batch: 35867, sum loss: 3688.318115, avg loss: 2.715993, ppl: 15.119612 +epoch: 0, batch: 35868, sum loss: 4888.512695, avg loss: 2.984440, ppl: 19.775421 +epoch: 0, batch: 35869, sum loss: 4866.266602, avg loss: 2.920928, ppl: 18.558506 +epoch: 0, batch: 35870, sum loss: 4407.150879, avg loss: 2.778784, ppl: 16.099424 +epoch: 0, batch: 35871, sum loss: 4669.832031, avg loss: 2.920470, ppl: 18.550009 +epoch: 0, batch: 35872, sum loss: 5600.142578, avg loss: 3.050187, ppl: 21.119286 +epoch: 0, batch: 35873, sum loss: 5441.738281, avg loss: 2.879227, ppl: 17.800503 +epoch: 0, batch: 35874, sum loss: 4160.847656, avg loss: 2.638458, ppl: 13.991609 +epoch: 0, batch: 35875, sum loss: 4998.070801, avg loss: 3.163336, ppl: 23.649359 +epoch: 0, batch: 35876, sum loss: 5089.069824, avg loss: 2.847829, ppl: 17.250284 +epoch: 0, batch: 35877, sum loss: 5331.655273, avg loss: 3.211841, ppl: 24.824738 +epoch: 0, batch: 35878, sum loss: 4901.870117, avg loss: 2.817167, ppl: 16.729385 +epoch: 0, batch: 35879, sum loss: 3967.777588, avg loss: 2.670106, ppl: 14.441499 +epoch: 0, batch: 35880, sum loss: 4652.833008, avg loss: 2.994101, ppl: 19.967402 +epoch: 0, batch: 35881, sum loss: 4819.557617, avg loss: 2.625031, ppl: 13.805005 +epoch: 0, batch: 35882, sum loss: 3099.096436, avg loss: 2.326649, ppl: 10.243557 +epoch: 0, batch: 35883, sum loss: 4889.085938, avg loss: 2.963082, ppl: 19.357546 +epoch: 0, batch: 35884, sum loss: 5825.724609, avg loss: 3.016947, ppl: 20.428827 +epoch: 0, batch: 35885, sum loss: 5025.807617, avg loss: 2.982675, ppl: 19.740553 +epoch: 0, batch: 35886, sum loss: 5134.024414, avg loss: 2.800886, ppl: 16.459227 +epoch: 0, batch: 35887, sum loss: 5094.853516, avg loss: 2.919687, ppl: 18.535480 +epoch: 0, batch: 35888, sum loss: 5460.114258, avg loss: 2.991843, ppl: 19.922375 +epoch: 0, batch: 35889, sum loss: 4317.919434, avg loss: 2.816647, ppl: 16.720690 +epoch: 0, batch: 35890, sum loss: 4863.637695, avg loss: 2.654824, ppl: 14.222483 +epoch: 0, batch: 35891, sum loss: 5253.203613, avg loss: 3.108404, ppl: 22.385298 +epoch: 0, batch: 35892, sum loss: 4046.329346, avg loss: 2.930000, ppl: 18.727623 +epoch: 0, batch: 35893, sum loss: 5207.265625, avg loss: 2.920508, ppl: 18.550711 +epoch: 0, batch: 35894, sum loss: 5038.914551, avg loss: 2.811894, ppl: 16.641415 +epoch: 0, batch: 35895, sum loss: 5577.061523, avg loss: 3.110464, ppl: 22.431446 +epoch: 0, batch: 35896, sum loss: 5241.515625, avg loss: 3.012365, ppl: 20.335443 +epoch: 0, batch: 35897, sum loss: 5074.240723, avg loss: 2.889659, ppl: 17.987173 +epoch: 0, batch: 35898, sum loss: 5447.399902, avg loss: 3.068958, ppl: 21.519459 +epoch: 0, batch: 35899, sum loss: 6100.750977, avg loss: 3.207545, ppl: 24.718334 +epoch: 0, batch: 35900, sum loss: 4800.714355, avg loss: 2.874679, ppl: 17.719742 +epoch: 0, batch: 35901, sum loss: 4968.998047, avg loss: 2.887274, ppl: 17.944319 +epoch: 0, batch: 35902, sum loss: 5127.736328, avg loss: 3.052224, ppl: 21.162361 +epoch: 0, batch: 35903, sum loss: 4425.145996, avg loss: 2.763989, ppl: 15.862994 +epoch: 0, batch: 35904, sum loss: 5693.458008, avg loss: 3.036511, ppl: 20.832430 +epoch: 0, batch: 35905, sum loss: 4921.177734, avg loss: 3.028417, ppl: 20.664497 +epoch: 0, batch: 35906, sum loss: 4559.097656, avg loss: 2.696096, ppl: 14.821750 +epoch: 0, batch: 35907, sum loss: 5678.613770, avg loss: 3.250494, ppl: 25.803089 +epoch: 0, batch: 35908, sum loss: 5376.921387, avg loss: 3.135231, ppl: 22.993948 +epoch: 0, batch: 35909, sum loss: 3986.173828, avg loss: 2.494477, ppl: 12.115393 +epoch: 0, batch: 35910, sum loss: 5409.606934, avg loss: 2.900594, ppl: 18.184935 +epoch: 0, batch: 35911, sum loss: 5124.710938, avg loss: 2.913423, ppl: 18.419743 +epoch: 0, batch: 35912, sum loss: 5144.223633, avg loss: 2.838975, ppl: 17.098238 +epoch: 0, batch: 35913, sum loss: 4144.028809, avg loss: 2.692676, ppl: 14.771155 +epoch: 0, batch: 35914, sum loss: 5581.995117, avg loss: 3.121921, ppl: 22.689932 +epoch: 0, batch: 35915, sum loss: 5306.964355, avg loss: 2.775609, ppl: 16.048397 +epoch: 0, batch: 35916, sum loss: 5495.660156, avg loss: 3.180359, ppl: 24.055386 +epoch: 0, batch: 35917, sum loss: 3892.011963, avg loss: 2.649429, ppl: 14.145964 +epoch: 0, batch: 35918, sum loss: 5206.065918, avg loss: 3.035607, ppl: 20.813604 +epoch: 0, batch: 35919, sum loss: 4147.228516, avg loss: 2.665314, ppl: 14.372461 +epoch: 0, batch: 35920, sum loss: 4670.307617, avg loss: 3.072571, ppl: 21.597359 +epoch: 0, batch: 35921, sum loss: 5580.631836, avg loss: 3.084927, ppl: 21.865862 +epoch: 0, batch: 35922, sum loss: 4270.053223, avg loss: 2.519205, ppl: 12.418724 +epoch: 0, batch: 35923, sum loss: 5342.823242, avg loss: 2.908451, ppl: 18.328379 +epoch: 0, batch: 35924, sum loss: 4360.073730, avg loss: 2.859065, ppl: 17.445206 +epoch: 0, batch: 35925, sum loss: 5158.333008, avg loss: 2.969679, ppl: 19.485670 +epoch: 0, batch: 35926, sum loss: 5195.105957, avg loss: 3.018655, ppl: 20.463755 +epoch: 0, batch: 35927, sum loss: 5443.888672, avg loss: 2.979687, ppl: 19.681660 +epoch: 0, batch: 35928, sum loss: 5179.363281, avg loss: 3.163936, ppl: 23.663555 +epoch: 0, batch: 35929, sum loss: 5188.121582, avg loss: 2.896774, ppl: 18.115602 +epoch: 0, batch: 35930, sum loss: 5560.469727, avg loss: 3.357772, ppl: 28.725109 +epoch: 0, batch: 35931, sum loss: 4100.265137, avg loss: 2.859320, ppl: 17.449659 +epoch: 0, batch: 35932, sum loss: 4988.251953, avg loss: 2.778970, ppl: 16.102434 +epoch: 0, batch: 35933, sum loss: 5258.630371, avg loss: 3.158337, ppl: 23.531422 +epoch: 0, batch: 35934, sum loss: 4890.372070, avg loss: 2.722924, ppl: 15.224782 +epoch: 0, batch: 35935, sum loss: 5063.934082, avg loss: 2.982293, ppl: 19.733021 +epoch: 0, batch: 35936, sum loss: 5347.578125, avg loss: 3.096455, ppl: 22.119406 +epoch: 0, batch: 35937, sum loss: 5819.144043, avg loss: 3.137005, ppl: 23.034771 +epoch: 0, batch: 35938, sum loss: 5212.357422, avg loss: 2.948166, ppl: 19.070948 +epoch: 0, batch: 35939, sum loss: 4821.224609, avg loss: 2.783617, ppl: 16.177429 +epoch: 0, batch: 35940, sum loss: 4600.882812, avg loss: 2.906432, ppl: 18.291426 +epoch: 0, batch: 35941, sum loss: 4263.750000, avg loss: 2.710585, ppl: 15.038069 +epoch: 0, batch: 35942, sum loss: 5054.307129, avg loss: 3.001370, ppl: 20.113077 +epoch: 0, batch: 35943, sum loss: 6496.446289, avg loss: 2.962356, ppl: 19.343489 +epoch: 0, batch: 35944, sum loss: 4547.869629, avg loss: 2.851329, ppl: 17.310770 +epoch: 0, batch: 35945, sum loss: 4479.099609, avg loss: 2.808213, ppl: 16.580263 +epoch: 0, batch: 35946, sum loss: 4958.069824, avg loss: 2.833183, ppl: 16.999481 +epoch: 0, batch: 35947, sum loss: 4418.467285, avg loss: 2.957475, ppl: 19.249300 +epoch: 0, batch: 35948, sum loss: 5618.063477, avg loss: 3.061615, ppl: 21.362034 +epoch: 0, batch: 35949, sum loss: 4409.883301, avg loss: 3.350975, ppl: 28.530544 +epoch: 0, batch: 35950, sum loss: 4331.796387, avg loss: 2.678910, ppl: 14.569198 +epoch: 0, batch: 35951, sum loss: 5367.407715, avg loss: 3.215942, ppl: 24.926771 +epoch: 0, batch: 35952, sum loss: 4600.813477, avg loss: 2.855874, ppl: 17.389635 +epoch: 0, batch: 35953, sum loss: 4204.259277, avg loss: 2.671067, ppl: 14.455381 +epoch: 0, batch: 35954, sum loss: 4802.687012, avg loss: 2.858742, ppl: 17.439583 +epoch: 0, batch: 35955, sum loss: 5804.242676, avg loss: 2.973485, ppl: 19.559973 +epoch: 0, batch: 35956, sum loss: 4360.698730, avg loss: 2.733980, ppl: 15.394040 +epoch: 0, batch: 35957, sum loss: 4787.390625, avg loss: 3.153749, ppl: 23.423710 +epoch: 0, batch: 35958, sum loss: 4682.366699, avg loss: 2.886786, ppl: 17.935568 +epoch: 0, batch: 35959, sum loss: 6250.360352, avg loss: 3.054917, ppl: 21.219427 +epoch: 0, batch: 35960, sum loss: 5034.494629, avg loss: 2.890066, ppl: 17.994492 +epoch: 0, batch: 35961, sum loss: 4545.487305, avg loss: 2.625931, ppl: 13.817436 +epoch: 0, batch: 35962, sum loss: 4657.969238, avg loss: 2.847170, ppl: 17.238920 +epoch: 0, batch: 35963, sum loss: 4654.612793, avg loss: 2.887477, ppl: 17.947968 +epoch: 0, batch: 35964, sum loss: 5479.515625, avg loss: 2.999187, ppl: 20.069223 +epoch: 0, batch: 35965, sum loss: 4770.705078, avg loss: 2.851587, ppl: 17.315241 +epoch: 0, batch: 35966, sum loss: 4632.498535, avg loss: 2.707480, ppl: 14.991456 +epoch: 0, batch: 35967, sum loss: 4907.121094, avg loss: 2.871341, ppl: 17.660681 +epoch: 0, batch: 35968, sum loss: 4690.624512, avg loss: 3.002961, ppl: 20.145092 +epoch: 0, batch: 35969, sum loss: 6308.821777, avg loss: 3.235293, ppl: 25.413820 +epoch: 0, batch: 35970, sum loss: 4683.990723, avg loss: 2.771592, ppl: 15.984062 +epoch: 0, batch: 35971, sum loss: 4629.792969, avg loss: 2.744394, ppl: 15.555189 +epoch: 0, batch: 35972, sum loss: 4176.520020, avg loss: 2.754961, ppl: 15.720421 +epoch: 0, batch: 35973, sum loss: 4427.203125, avg loss: 2.841594, ppl: 17.143070 +epoch: 0, batch: 35974, sum loss: 5025.574707, avg loss: 2.975474, ppl: 19.598904 +epoch: 0, batch: 35975, sum loss: 7506.892578, avg loss: 3.258200, ppl: 26.002687 +epoch: 0, batch: 35976, sum loss: 4974.630371, avg loss: 3.000380, ppl: 20.093176 +epoch: 0, batch: 35977, sum loss: 4603.694824, avg loss: 3.114814, ppl: 22.529240 +epoch: 0, batch: 35978, sum loss: 5008.664551, avg loss: 2.986681, ppl: 19.819796 +epoch: 0, batch: 35979, sum loss: 4949.841309, avg loss: 3.005368, ppl: 20.193645 +epoch: 0, batch: 35980, sum loss: 4764.506348, avg loss: 2.964845, ppl: 19.391705 +epoch: 0, batch: 35981, sum loss: 3947.726074, avg loss: 2.347043, ppl: 10.454608 +epoch: 0, batch: 35982, sum loss: 5826.967285, avg loss: 3.124379, ppl: 22.745770 +epoch: 0, batch: 35983, sum loss: 4698.979980, avg loss: 3.181436, ppl: 24.081299 +epoch: 0, batch: 35984, sum loss: 4851.831055, avg loss: 2.704477, ppl: 14.946491 +epoch: 0, batch: 35985, sum loss: 4916.153809, avg loss: 2.983103, ppl: 19.749004 +epoch: 0, batch: 35986, sum loss: 6281.558594, avg loss: 3.115853, ppl: 22.552650 +epoch: 0, batch: 35987, sum loss: 6230.282715, avg loss: 3.069105, ppl: 21.522629 +epoch: 0, batch: 35988, sum loss: 4629.742676, avg loss: 2.749254, ppl: 15.630960 +epoch: 0, batch: 35989, sum loss: 4771.741699, avg loss: 2.785605, ppl: 16.209625 +epoch: 0, batch: 35990, sum loss: 3938.139648, avg loss: 2.785106, ppl: 16.201534 +epoch: 0, batch: 35991, sum loss: 4218.157715, avg loss: 2.773279, ppl: 16.011055 +epoch: 0, batch: 35992, sum loss: 4914.303711, avg loss: 2.956861, ppl: 19.237499 +epoch: 0, batch: 35993, sum loss: 4680.036133, avg loss: 2.797392, ppl: 16.401810 +epoch: 0, batch: 35994, sum loss: 5752.015625, avg loss: 3.005233, ppl: 20.190916 +epoch: 0, batch: 35995, sum loss: 4473.681641, avg loss: 2.899340, ppl: 18.162157 +epoch: 0, batch: 35996, sum loss: 4337.930664, avg loss: 2.820501, ppl: 16.785259 +epoch: 0, batch: 35997, sum loss: 5160.921387, avg loss: 2.991839, ppl: 19.922279 +epoch: 0, batch: 35998, sum loss: 4536.741211, avg loss: 2.739578, ppl: 15.480451 +epoch: 0, batch: 35999, sum loss: 4351.273926, avg loss: 2.607115, ppl: 13.559868 +epoch: 0, batch: 36000, sum loss: 4768.770508, avg loss: 2.818422, ppl: 16.750402 +epoch: 0, batch: 36001, sum loss: 5345.248047, avg loss: 3.174138, ppl: 23.906200 +epoch: 0, batch: 36002, sum loss: 5246.649414, avg loss: 2.913187, ppl: 18.415392 +epoch: 0, batch: 36003, sum loss: 5062.832520, avg loss: 3.053578, ppl: 21.191034 +epoch: 0, batch: 36004, sum loss: 5149.906738, avg loss: 2.765793, ppl: 15.891639 +epoch: 0, batch: 36005, sum loss: 5106.995117, avg loss: 2.851477, ppl: 17.313330 +epoch: 0, batch: 36006, sum loss: 6030.804688, avg loss: 3.019932, ppl: 20.489904 +epoch: 0, batch: 36007, sum loss: 4175.996582, avg loss: 2.760077, ppl: 15.801064 +epoch: 0, batch: 36008, sum loss: 5421.813477, avg loss: 3.077079, ppl: 21.694944 +epoch: 0, batch: 36009, sum loss: 5558.821777, avg loss: 2.872776, ppl: 17.686047 +epoch: 0, batch: 36010, sum loss: 4442.820801, avg loss: 2.855283, ppl: 17.379360 +epoch: 0, batch: 36011, sum loss: 4361.859863, avg loss: 2.755439, ppl: 15.727941 +epoch: 0, batch: 36012, sum loss: 4610.837402, avg loss: 2.742913, ppl: 15.532168 +epoch: 0, batch: 36013, sum loss: 5119.818848, avg loss: 3.084228, ppl: 21.850597 +epoch: 0, batch: 36014, sum loss: 4695.451660, avg loss: 2.850912, ppl: 17.303549 +epoch: 0, batch: 36015, sum loss: 4179.800293, avg loss: 2.904656, ppl: 18.258965 +epoch: 0, batch: 36016, sum loss: 4484.564941, avg loss: 2.698294, ppl: 14.854371 +epoch: 0, batch: 36017, sum loss: 4391.799805, avg loss: 2.947517, ppl: 19.058567 +epoch: 0, batch: 36018, sum loss: 5498.625488, avg loss: 3.209939, ppl: 24.777575 +epoch: 0, batch: 36019, sum loss: 5411.654785, avg loss: 3.119109, ppl: 22.626219 +epoch: 0, batch: 36020, sum loss: 4642.520020, avg loss: 2.718103, ppl: 15.151551 +epoch: 0, batch: 36021, sum loss: 6548.347168, avg loss: 3.096145, ppl: 22.112547 +epoch: 0, batch: 36022, sum loss: 5518.238281, avg loss: 3.003940, ppl: 20.164837 +epoch: 0, batch: 36023, sum loss: 5139.045898, avg loss: 2.994782, ppl: 19.981003 +epoch: 0, batch: 36024, sum loss: 5853.342773, avg loss: 3.075850, ppl: 21.668293 +epoch: 0, batch: 36025, sum loss: 4362.104980, avg loss: 2.755594, ppl: 15.730378 +epoch: 0, batch: 36026, sum loss: 4873.489258, avg loss: 2.973453, ppl: 19.559338 +epoch: 0, batch: 36027, sum loss: 4163.455078, avg loss: 2.816952, ppl: 16.725796 +epoch: 0, batch: 36028, sum loss: 5768.734375, avg loss: 2.877174, ppl: 17.764004 +epoch: 0, batch: 36029, sum loss: 5457.394043, avg loss: 3.086761, ppl: 21.906019 +epoch: 0, batch: 36030, sum loss: 6148.229492, avg loss: 3.092671, ppl: 22.035851 +epoch: 0, batch: 36031, sum loss: 5419.321289, avg loss: 3.056583, ppl: 21.254805 +epoch: 0, batch: 36032, sum loss: 4683.489258, avg loss: 2.914430, ppl: 18.438303 +epoch: 0, batch: 36033, sum loss: 5150.345215, avg loss: 2.921353, ppl: 18.566389 +epoch: 0, batch: 36034, sum loss: 5865.816406, avg loss: 3.200118, ppl: 24.535427 +epoch: 0, batch: 36035, sum loss: 4638.047852, avg loss: 3.185472, ppl: 24.178709 +epoch: 0, batch: 36036, sum loss: 4308.567871, avg loss: 2.790524, ppl: 16.289545 +epoch: 0, batch: 36037, sum loss: 4626.099609, avg loss: 2.807099, ppl: 16.561808 +epoch: 0, batch: 36038, sum loss: 5309.703613, avg loss: 3.108726, ppl: 22.392488 +epoch: 0, batch: 36039, sum loss: 5080.179199, avg loss: 2.901302, ppl: 18.197821 +epoch: 0, batch: 36040, sum loss: 4975.546875, avg loss: 2.689485, ppl: 14.724089 +epoch: 0, batch: 36041, sum loss: 5784.415527, avg loss: 3.036439, ppl: 20.830925 +epoch: 0, batch: 36042, sum loss: 5326.328125, avg loss: 3.103921, ppl: 22.285158 +epoch: 0, batch: 36043, sum loss: 3527.056885, avg loss: 2.723596, ppl: 15.235010 +epoch: 0, batch: 36044, sum loss: 4500.677734, avg loss: 2.734312, ppl: 15.399150 +epoch: 0, batch: 36045, sum loss: 3700.932861, avg loss: 2.548852, ppl: 12.792409 +epoch: 0, batch: 36046, sum loss: 4322.264648, avg loss: 2.828707, ppl: 16.923569 +epoch: 0, batch: 36047, sum loss: 4102.920898, avg loss: 2.847274, ppl: 17.240713 +epoch: 0, batch: 36048, sum loss: 5145.206543, avg loss: 2.831704, ppl: 16.974363 +epoch: 0, batch: 36049, sum loss: 4210.522461, avg loss: 2.825854, ppl: 16.875351 +epoch: 0, batch: 36050, sum loss: 4769.645996, avg loss: 2.769829, ppl: 15.955910 +epoch: 0, batch: 36051, sum loss: 4916.463867, avg loss: 3.096010, ppl: 22.109562 +epoch: 0, batch: 36052, sum loss: 5127.898438, avg loss: 3.061432, ppl: 21.358122 +epoch: 0, batch: 36053, sum loss: 5136.933105, avg loss: 2.807067, ppl: 16.561279 +epoch: 0, batch: 36054, sum loss: 3678.588623, avg loss: 2.607079, ppl: 13.559386 +epoch: 0, batch: 36055, sum loss: 4439.611816, avg loss: 2.542733, ppl: 12.714375 +epoch: 0, batch: 36056, sum loss: 5761.776855, avg loss: 3.124608, ppl: 22.750971 +epoch: 0, batch: 36057, sum loss: 4182.726074, avg loss: 2.781068, ppl: 16.136242 +epoch: 0, batch: 36058, sum loss: 5202.624023, avg loss: 3.147383, ppl: 23.275074 +epoch: 0, batch: 36059, sum loss: 5172.367188, avg loss: 3.000213, ppl: 20.089819 +epoch: 0, batch: 36060, sum loss: 4660.760742, avg loss: 2.772612, ppl: 16.000370 +epoch: 0, batch: 36061, sum loss: 4966.291992, avg loss: 2.870689, ppl: 17.649174 +epoch: 0, batch: 36062, sum loss: 5667.708496, avg loss: 3.098802, ppl: 22.171375 +epoch: 0, batch: 36063, sum loss: 4585.391113, avg loss: 2.918772, ppl: 18.518538 +epoch: 0, batch: 36064, sum loss: 4460.973145, avg loss: 2.668046, ppl: 14.411784 +epoch: 0, batch: 36065, sum loss: 5408.291016, avg loss: 3.111790, ppl: 22.461212 +epoch: 0, batch: 36066, sum loss: 4613.024414, avg loss: 3.038883, ppl: 20.881905 +epoch: 0, batch: 36067, sum loss: 4827.157715, avg loss: 2.708843, ppl: 15.011893 +epoch: 0, batch: 36068, sum loss: 4304.925781, avg loss: 2.942533, ppl: 18.963825 +epoch: 0, batch: 36069, sum loss: 5633.491211, avg loss: 3.019020, ppl: 20.471216 +epoch: 0, batch: 36070, sum loss: 4389.473633, avg loss: 2.945956, ppl: 19.028835 +epoch: 0, batch: 36071, sum loss: 5693.760254, avg loss: 2.957798, ppl: 19.255516 +epoch: 0, batch: 36072, sum loss: 5185.398438, avg loss: 2.911510, ppl: 18.384529 +epoch: 0, batch: 36073, sum loss: 4994.806641, avg loss: 2.770275, ppl: 15.963029 +epoch: 0, batch: 36074, sum loss: 5635.383789, avg loss: 3.007142, ppl: 20.229498 +epoch: 0, batch: 36075, sum loss: 4982.603027, avg loss: 3.021591, ppl: 20.523912 +epoch: 0, batch: 36076, sum loss: 4673.756836, avg loss: 2.825730, ppl: 16.873255 +epoch: 0, batch: 36077, sum loss: 5807.851562, avg loss: 2.902475, ppl: 18.219175 +epoch: 0, batch: 36078, sum loss: 5267.193848, avg loss: 3.041105, ppl: 20.928356 +epoch: 0, batch: 36079, sum loss: 5106.710449, avg loss: 3.203708, ppl: 24.623665 +epoch: 0, batch: 36080, sum loss: 5287.527344, avg loss: 3.382935, ppl: 29.457102 +epoch: 0, batch: 36081, sum loss: 5481.807617, avg loss: 2.898894, ppl: 18.154049 +epoch: 0, batch: 36082, sum loss: 5357.880371, avg loss: 3.021929, ppl: 20.530863 +epoch: 0, batch: 36083, sum loss: 5429.117188, avg loss: 3.138218, ppl: 23.062731 +epoch: 0, batch: 36084, sum loss: 5124.523438, avg loss: 2.929973, ppl: 18.727131 +epoch: 0, batch: 36085, sum loss: 4536.917969, avg loss: 2.976980, ppl: 19.628452 +epoch: 0, batch: 36086, sum loss: 5511.628906, avg loss: 2.963241, ppl: 19.360624 +epoch: 0, batch: 36087, sum loss: 4123.756836, avg loss: 2.875702, ppl: 17.737875 +epoch: 0, batch: 36088, sum loss: 5014.447266, avg loss: 2.857235, ppl: 17.413311 +epoch: 0, batch: 36089, sum loss: 4752.345703, avg loss: 2.750200, ppl: 15.645761 +epoch: 0, batch: 36090, sum loss: 5680.003418, avg loss: 2.994203, ppl: 19.969440 +epoch: 0, batch: 36091, sum loss: 4999.654785, avg loss: 2.870066, ppl: 17.638182 +epoch: 0, batch: 36092, sum loss: 4734.210938, avg loss: 2.998234, ppl: 20.050089 +epoch: 0, batch: 36093, sum loss: 5322.361816, avg loss: 2.966757, ppl: 19.428810 +epoch: 0, batch: 36094, sum loss: 4697.968262, avg loss: 2.925260, ppl: 18.639078 +epoch: 0, batch: 36095, sum loss: 4857.755371, avg loss: 2.936974, ppl: 18.858700 +epoch: 0, batch: 36096, sum loss: 5033.152344, avg loss: 2.859746, ppl: 17.457083 +epoch: 0, batch: 36097, sum loss: 5175.131348, avg loss: 3.035268, ppl: 20.806549 +epoch: 0, batch: 36098, sum loss: 5162.249023, avg loss: 2.943129, ppl: 18.975132 +epoch: 0, batch: 36099, sum loss: 5104.526855, avg loss: 2.952300, ppl: 19.149950 +epoch: 0, batch: 36100, sum loss: 4703.298340, avg loss: 2.887230, ppl: 17.943544 +epoch: 0, batch: 36101, sum loss: 5230.829102, avg loss: 2.838214, ppl: 17.085232 +epoch: 0, batch: 36102, sum loss: 5062.247070, avg loss: 2.914362, ppl: 18.437050 +epoch: 0, batch: 36103, sum loss: 5921.925781, avg loss: 3.089163, ppl: 21.958698 +epoch: 0, batch: 36104, sum loss: 4503.918457, avg loss: 2.773349, ppl: 16.012165 +epoch: 0, batch: 36105, sum loss: 4955.901855, avg loss: 2.854782, ppl: 17.370651 +epoch: 0, batch: 36106, sum loss: 4239.436035, avg loss: 2.779958, ppl: 16.118343 +epoch: 0, batch: 36107, sum loss: 5240.812988, avg loss: 3.043445, ppl: 20.977394 +epoch: 0, batch: 36108, sum loss: 5027.251953, avg loss: 2.861270, ppl: 17.483721 +epoch: 0, batch: 36109, sum loss: 5168.099121, avg loss: 3.054432, ppl: 21.209139 +epoch: 0, batch: 36110, sum loss: 4556.472656, avg loss: 2.983938, ppl: 19.765490 +epoch: 0, batch: 36111, sum loss: 4402.301270, avg loss: 2.901978, ppl: 18.210135 +epoch: 0, batch: 36112, sum loss: 5371.085449, avg loss: 2.850895, ppl: 17.303255 +epoch: 0, batch: 36113, sum loss: 4442.480957, avg loss: 2.817046, ppl: 16.727360 +epoch: 0, batch: 36114, sum loss: 4326.655762, avg loss: 2.853995, ppl: 17.356977 +epoch: 0, batch: 36115, sum loss: 5046.085938, avg loss: 3.045314, ppl: 21.016636 +epoch: 0, batch: 36116, sum loss: 6052.329102, avg loss: 3.157188, ppl: 23.504408 +epoch: 0, batch: 36117, sum loss: 4307.507324, avg loss: 2.798900, ppl: 16.426569 +epoch: 0, batch: 36118, sum loss: 5050.162598, avg loss: 3.115461, ppl: 22.543823 +epoch: 0, batch: 36119, sum loss: 6798.457520, avg loss: 3.182798, ppl: 24.114140 +epoch: 0, batch: 36120, sum loss: 3978.717529, avg loss: 2.817789, ppl: 16.739792 +epoch: 0, batch: 36121, sum loss: 4599.506836, avg loss: 2.847992, ppl: 17.253101 +epoch: 0, batch: 36122, sum loss: 6253.917969, avg loss: 3.331869, ppl: 27.990604 +epoch: 0, batch: 36123, sum loss: 5166.649414, avg loss: 3.090101, ppl: 21.979309 +epoch: 0, batch: 36124, sum loss: 4807.116211, avg loss: 2.789969, ppl: 16.280510 +epoch: 0, batch: 36125, sum loss: 4878.900391, avg loss: 2.907569, ppl: 18.312222 +epoch: 0, batch: 36126, sum loss: 6354.760254, avg loss: 3.118135, ppl: 22.604195 +epoch: 0, batch: 36127, sum loss: 4920.967285, avg loss: 2.986024, ppl: 19.806772 +epoch: 0, batch: 36128, sum loss: 4006.544922, avg loss: 2.710788, ppl: 15.041127 +epoch: 0, batch: 36129, sum loss: 4437.330078, avg loss: 2.902113, ppl: 18.212584 +epoch: 0, batch: 36130, sum loss: 5131.234375, avg loss: 2.974629, ppl: 19.582350 +epoch: 0, batch: 36131, sum loss: 4713.339844, avg loss: 2.893394, ppl: 18.054491 +epoch: 0, batch: 36132, sum loss: 5787.408691, avg loss: 2.931818, ppl: 18.761703 +epoch: 0, batch: 36133, sum loss: 4979.876953, avg loss: 2.875218, ppl: 17.729282 +epoch: 0, batch: 36134, sum loss: 3421.639648, avg loss: 2.592151, ppl: 13.358480 +epoch: 0, batch: 36135, sum loss: 4974.394531, avg loss: 2.946916, ppl: 19.047129 +epoch: 0, batch: 36136, sum loss: 5506.207520, avg loss: 2.881323, ppl: 17.837851 +epoch: 0, batch: 36137, sum loss: 5280.449219, avg loss: 2.974901, ppl: 19.587679 +epoch: 0, batch: 36138, sum loss: 4784.750488, avg loss: 2.994212, ppl: 19.969616 +epoch: 0, batch: 36139, sum loss: 4817.919922, avg loss: 2.990639, ppl: 19.898403 +epoch: 0, batch: 36140, sum loss: 4925.243652, avg loss: 2.943959, ppl: 18.990887 +epoch: 0, batch: 36141, sum loss: 4900.107910, avg loss: 3.013597, ppl: 20.360497 +epoch: 0, batch: 36142, sum loss: 5098.963867, avg loss: 2.943974, ppl: 18.991177 +epoch: 0, batch: 36143, sum loss: 4789.138672, avg loss: 2.687508, ppl: 14.695008 +epoch: 0, batch: 36144, sum loss: 5201.496582, avg loss: 2.962128, ppl: 19.339081 +epoch: 0, batch: 36145, sum loss: 5611.708984, avg loss: 2.832766, ppl: 16.992390 +epoch: 0, batch: 36146, sum loss: 4849.061523, avg loss: 2.755148, ppl: 15.723374 +epoch: 0, batch: 36147, sum loss: 5745.203125, avg loss: 3.167146, ppl: 23.739639 +epoch: 0, batch: 36148, sum loss: 4168.980469, avg loss: 2.799853, ppl: 16.442223 +epoch: 0, batch: 36149, sum loss: 4274.094238, avg loss: 2.797182, ppl: 16.398373 +epoch: 0, batch: 36150, sum loss: 5478.231445, avg loss: 2.990301, ppl: 19.891672 +epoch: 0, batch: 36151, sum loss: 4716.503906, avg loss: 2.860221, ppl: 17.465380 +epoch: 0, batch: 36152, sum loss: 5287.360840, avg loss: 2.965430, ppl: 19.403040 +epoch: 0, batch: 36153, sum loss: 5572.229492, avg loss: 3.144599, ppl: 23.210371 +epoch: 0, batch: 36154, sum loss: 4432.312500, avg loss: 2.891267, ppl: 18.016127 +epoch: 0, batch: 36155, sum loss: 4680.488770, avg loss: 3.089432, ppl: 21.964588 +epoch: 0, batch: 36156, sum loss: 5580.788574, avg loss: 3.005271, ppl: 20.191687 +epoch: 0, batch: 36157, sum loss: 4049.135986, avg loss: 2.890176, ppl: 17.996469 +epoch: 0, batch: 36158, sum loss: 5997.301758, avg loss: 3.222623, ppl: 25.093864 +epoch: 0, batch: 36159, sum loss: 5114.643555, avg loss: 2.896174, ppl: 18.104748 +epoch: 0, batch: 36160, sum loss: 4008.677002, avg loss: 2.694003, ppl: 14.790771 +epoch: 0, batch: 36161, sum loss: 4745.156250, avg loss: 2.819463, ppl: 16.767843 +epoch: 0, batch: 36162, sum loss: 5228.557617, avg loss: 3.088339, ppl: 21.940601 +epoch: 0, batch: 36163, sum loss: 5530.422852, avg loss: 2.918429, ppl: 18.512180 +epoch: 0, batch: 36164, sum loss: 5186.619629, avg loss: 2.897553, ppl: 18.129723 +epoch: 0, batch: 36165, sum loss: 5415.553223, avg loss: 3.049298, ppl: 21.100527 +epoch: 0, batch: 36166, sum loss: 5048.985840, avg loss: 3.067428, ppl: 21.486561 +epoch: 0, batch: 36167, sum loss: 4991.848145, avg loss: 3.123810, ppl: 22.732822 +epoch: 0, batch: 36168, sum loss: 5376.740234, avg loss: 3.013868, ppl: 20.366026 +epoch: 0, batch: 36169, sum loss: 4382.666992, avg loss: 2.943363, ppl: 18.979563 +epoch: 0, batch: 36170, sum loss: 5550.693359, avg loss: 2.901565, ppl: 18.202602 +epoch: 0, batch: 36171, sum loss: 4705.497070, avg loss: 2.877980, ppl: 17.778320 +epoch: 0, batch: 36172, sum loss: 5184.201172, avg loss: 2.848462, ppl: 17.261219 +epoch: 0, batch: 36173, sum loss: 5761.121582, avg loss: 2.985037, ppl: 19.787241 +epoch: 0, batch: 36174, sum loss: 5688.213379, avg loss: 3.079704, ppl: 21.751968 +epoch: 0, batch: 36175, sum loss: 5152.408203, avg loss: 2.929169, ppl: 18.712078 +epoch: 0, batch: 36176, sum loss: 5002.682617, avg loss: 2.810496, ppl: 16.618156 +epoch: 0, batch: 36177, sum loss: 5160.520020, avg loss: 2.960711, ppl: 19.311707 +epoch: 0, batch: 36178, sum loss: 5383.898926, avg loss: 2.910216, ppl: 18.360758 +epoch: 0, batch: 36179, sum loss: 4950.966797, avg loss: 3.106002, ppl: 22.331575 +epoch: 0, batch: 36180, sum loss: 6077.784180, avg loss: 3.197151, ppl: 24.462742 +epoch: 0, batch: 36181, sum loss: 5320.070801, avg loss: 2.962178, ppl: 19.340038 +epoch: 0, batch: 36182, sum loss: 4733.280762, avg loss: 2.737583, ppl: 15.449597 +epoch: 0, batch: 36183, sum loss: 5367.045898, avg loss: 3.213800, ppl: 24.873425 +epoch: 0, batch: 36184, sum loss: 5285.587891, avg loss: 2.834095, ppl: 17.014999 +epoch: 0, batch: 36185, sum loss: 4626.032227, avg loss: 2.735679, ppl: 15.420205 +epoch: 0, batch: 36186, sum loss: 5666.419434, avg loss: 3.104887, ppl: 22.306709 +epoch: 0, batch: 36187, sum loss: 5321.185547, avg loss: 3.190159, ppl: 24.292297 +epoch: 0, batch: 36188, sum loss: 5318.571289, avg loss: 2.923898, ppl: 18.613710 +epoch: 0, batch: 36189, sum loss: 4449.279785, avg loss: 2.915649, ppl: 18.460785 +epoch: 0, batch: 36190, sum loss: 4206.541992, avg loss: 2.738634, ppl: 15.465846 +epoch: 0, batch: 36191, sum loss: 4822.489258, avg loss: 2.865412, ppl: 17.556293 +epoch: 0, batch: 36192, sum loss: 4489.061523, avg loss: 2.752337, ppl: 15.679231 +epoch: 0, batch: 36193, sum loss: 4695.993652, avg loss: 2.916766, ppl: 18.481424 +epoch: 0, batch: 36194, sum loss: 5322.676270, avg loss: 3.121804, ppl: 22.687281 +epoch: 0, batch: 36195, sum loss: 3658.537354, avg loss: 2.589198, ppl: 13.319090 +epoch: 0, batch: 36196, sum loss: 5035.083496, avg loss: 3.060841, ppl: 21.345497 +epoch: 0, batch: 36197, sum loss: 5219.681152, avg loss: 2.935704, ppl: 18.834753 +epoch: 0, batch: 36198, sum loss: 3887.705322, avg loss: 2.646498, ppl: 14.104554 +epoch: 0, batch: 36199, sum loss: 4347.455566, avg loss: 2.772612, ppl: 16.000370 +epoch: 0, batch: 36200, sum loss: 4777.280762, avg loss: 3.042854, ppl: 20.964994 +epoch: 0, batch: 36201, sum loss: 4560.443359, avg loss: 2.701684, ppl: 14.904818 +epoch: 0, batch: 36202, sum loss: 4515.335938, avg loss: 2.857808, ppl: 17.423286 +epoch: 0, batch: 36203, sum loss: 4522.221680, avg loss: 2.900720, ppl: 18.187229 +epoch: 0, batch: 36204, sum loss: 5111.454590, avg loss: 2.813129, ppl: 16.661966 +epoch: 0, batch: 36205, sum loss: 4456.769531, avg loss: 2.684801, ppl: 14.655283 +epoch: 0, batch: 36206, sum loss: 5208.072754, avg loss: 3.076239, ppl: 21.676725 +epoch: 0, batch: 36207, sum loss: 5261.840332, avg loss: 3.045047, ppl: 21.011009 +epoch: 0, batch: 36208, sum loss: 6111.112793, avg loss: 2.989781, ppl: 19.881330 +epoch: 0, batch: 36209, sum loss: 5323.987305, avg loss: 2.906107, ppl: 18.285469 +epoch: 0, batch: 36210, sum loss: 5489.925781, avg loss: 3.058455, ppl: 21.294621 +epoch: 0, batch: 36211, sum loss: 4492.241211, avg loss: 2.839596, ppl: 17.108852 +epoch: 0, batch: 36212, sum loss: 5891.651855, avg loss: 2.999823, ppl: 20.081980 +epoch: 0, batch: 36213, sum loss: 5158.985352, avg loss: 2.976910, ppl: 19.627077 +epoch: 0, batch: 36214, sum loss: 5523.428223, avg loss: 3.108288, ppl: 22.382698 +epoch: 0, batch: 36215, sum loss: 5899.310059, avg loss: 3.166565, ppl: 23.725840 +epoch: 0, batch: 36216, sum loss: 5596.797852, avg loss: 3.007414, ppl: 20.235012 +epoch: 0, batch: 36217, sum loss: 5218.705078, avg loss: 2.892852, ppl: 18.044701 +epoch: 0, batch: 36218, sum loss: 4836.772461, avg loss: 2.714238, ppl: 15.093111 +epoch: 0, batch: 36219, sum loss: 4563.402344, avg loss: 2.850345, ppl: 17.293749 +epoch: 0, batch: 36220, sum loss: 5668.900391, avg loss: 3.404745, ppl: 30.106621 +epoch: 0, batch: 36221, sum loss: 5183.081055, avg loss: 3.079668, ppl: 21.751175 +epoch: 0, batch: 36222, sum loss: 4639.923828, avg loss: 2.892721, ppl: 18.042330 +epoch: 0, batch: 36223, sum loss: 4692.420410, avg loss: 2.894769, ppl: 18.079319 +epoch: 0, batch: 36224, sum loss: 5110.564453, avg loss: 3.116198, ppl: 22.560438 +epoch: 0, batch: 36225, sum loss: 5759.374512, avg loss: 3.026471, ppl: 20.624323 +epoch: 0, batch: 36226, sum loss: 4188.883789, avg loss: 2.894875, ppl: 18.081238 +epoch: 0, batch: 36227, sum loss: 4306.122559, avg loss: 2.897794, ppl: 18.134106 +epoch: 0, batch: 36228, sum loss: 5876.319336, avg loss: 3.010410, ppl: 20.295712 +epoch: 0, batch: 36229, sum loss: 4229.037598, avg loss: 2.920606, ppl: 18.552530 +epoch: 0, batch: 36230, sum loss: 4760.902344, avg loss: 2.883648, ppl: 17.879377 +epoch: 0, batch: 36231, sum loss: 4045.085449, avg loss: 2.566679, ppl: 13.022499 +epoch: 0, batch: 36232, sum loss: 4512.210449, avg loss: 2.701922, ppl: 14.908364 +epoch: 0, batch: 36233, sum loss: 5326.377441, avg loss: 2.663189, ppl: 14.341951 +epoch: 0, batch: 36234, sum loss: 5129.067383, avg loss: 2.762018, ppl: 15.831755 +epoch: 0, batch: 36235, sum loss: 4792.324707, avg loss: 2.943688, ppl: 18.985744 +epoch: 0, batch: 36236, sum loss: 3951.691895, avg loss: 2.800632, ppl: 16.455044 +epoch: 0, batch: 36237, sum loss: 4987.313965, avg loss: 2.909751, ppl: 18.352236 +epoch: 0, batch: 36238, sum loss: 3981.210205, avg loss: 2.617495, ppl: 13.701363 +epoch: 0, batch: 36239, sum loss: 5283.371582, avg loss: 2.923836, ppl: 18.612543 +epoch: 0, batch: 36240, sum loss: 4330.260254, avg loss: 2.992578, ppl: 19.937014 +epoch: 0, batch: 36241, sum loss: 5247.933594, avg loss: 3.049351, ppl: 21.101650 +epoch: 0, batch: 36242, sum loss: 4303.289062, avg loss: 2.725326, ppl: 15.261382 +epoch: 0, batch: 36243, sum loss: 3920.618896, avg loss: 2.737863, ppl: 15.453918 +epoch: 0, batch: 36244, sum loss: 4045.310059, avg loss: 2.581564, ppl: 13.217791 +epoch: 0, batch: 36245, sum loss: 5282.515625, avg loss: 3.004844, ppl: 20.183065 +epoch: 0, batch: 36246, sum loss: 4696.428223, avg loss: 2.867172, ppl: 17.587215 +epoch: 0, batch: 36247, sum loss: 5025.846680, avg loss: 2.726992, ppl: 15.286837 +epoch: 0, batch: 36248, sum loss: 4757.086914, avg loss: 2.823197, ppl: 16.830570 +epoch: 0, batch: 36249, sum loss: 4942.483887, avg loss: 3.077512, ppl: 21.704325 +epoch: 0, batch: 36250, sum loss: 4525.535645, avg loss: 2.805664, ppl: 16.538050 +epoch: 0, batch: 36251, sum loss: 5619.373535, avg loss: 3.102912, ppl: 22.262690 +epoch: 0, batch: 36252, sum loss: 4767.551758, avg loss: 2.873750, ppl: 17.703285 +epoch: 0, batch: 36253, sum loss: 5423.833496, avg loss: 3.243920, ppl: 25.634007 +epoch: 0, batch: 36254, sum loss: 6124.636719, avg loss: 3.171744, ppl: 23.849031 +epoch: 0, batch: 36255, sum loss: 5528.843262, avg loss: 2.763040, ppl: 15.847949 +epoch: 0, batch: 36256, sum loss: 5015.781250, avg loss: 2.957418, ppl: 19.248213 +epoch: 0, batch: 36257, sum loss: 4291.354492, avg loss: 2.973912, ppl: 19.568312 +epoch: 0, batch: 36258, sum loss: 3885.350098, avg loss: 2.632351, ppl: 13.906427 +epoch: 0, batch: 36259, sum loss: 4707.573730, avg loss: 2.920331, ppl: 18.547426 +epoch: 0, batch: 36260, sum loss: 5048.971191, avg loss: 2.783336, ppl: 16.172882 +epoch: 0, batch: 36261, sum loss: 5237.903320, avg loss: 2.855999, ppl: 17.391811 +epoch: 0, batch: 36262, sum loss: 5252.411133, avg loss: 2.959105, ppl: 19.280703 +epoch: 0, batch: 36263, sum loss: 5292.456055, avg loss: 2.911142, ppl: 18.377771 +epoch: 0, batch: 36264, sum loss: 4760.501953, avg loss: 3.125740, ppl: 22.776745 +epoch: 0, batch: 36265, sum loss: 5434.845703, avg loss: 3.208291, ppl: 24.736782 +epoch: 0, batch: 36266, sum loss: 5002.827148, avg loss: 2.965517, ppl: 19.404734 +epoch: 0, batch: 36267, sum loss: 4248.859375, avg loss: 2.930248, ppl: 18.732275 +epoch: 0, batch: 36268, sum loss: 4457.531738, avg loss: 2.872121, ppl: 17.674467 +epoch: 0, batch: 36269, sum loss: 5422.587402, avg loss: 2.990947, ppl: 19.904533 +epoch: 0, batch: 36270, sum loss: 4827.310547, avg loss: 3.003927, ppl: 20.164568 +epoch: 0, batch: 36271, sum loss: 5525.573242, avg loss: 2.958016, ppl: 19.259716 +epoch: 0, batch: 36272, sum loss: 4278.334961, avg loss: 2.668955, ppl: 14.424889 +epoch: 0, batch: 36273, sum loss: 6008.066406, avg loss: 3.111376, ppl: 22.451912 +epoch: 0, batch: 36274, sum loss: 4666.479492, avg loss: 2.936740, ppl: 18.854275 +epoch: 0, batch: 36275, sum loss: 4935.739258, avg loss: 2.888086, ppl: 17.958910 +epoch: 0, batch: 36276, sum loss: 4624.064453, avg loss: 2.890040, ppl: 17.994032 +epoch: 0, batch: 36277, sum loss: 4828.113770, avg loss: 2.867051, ppl: 17.585085 +epoch: 0, batch: 36278, sum loss: 4542.381348, avg loss: 2.556208, ppl: 12.886856 +epoch: 0, batch: 36279, sum loss: 4452.968750, avg loss: 2.690616, ppl: 14.740748 +epoch: 0, batch: 36280, sum loss: 5726.917969, avg loss: 3.098982, ppl: 22.175356 +epoch: 0, batch: 36281, sum loss: 5902.577637, avg loss: 2.929319, ppl: 18.714884 +epoch: 0, batch: 36282, sum loss: 6238.409180, avg loss: 3.207408, ppl: 24.714952 +epoch: 0, batch: 36283, sum loss: 3834.671387, avg loss: 2.687226, ppl: 14.690867 +epoch: 0, batch: 36284, sum loss: 5073.906250, avg loss: 3.062104, ppl: 21.372477 +epoch: 0, batch: 36285, sum loss: 4688.622559, avg loss: 2.948819, ppl: 19.083406 +epoch: 0, batch: 36286, sum loss: 5182.495605, avg loss: 2.868011, ppl: 17.601969 +epoch: 0, batch: 36287, sum loss: 4477.681152, avg loss: 2.817924, ppl: 16.742058 +epoch: 0, batch: 36288, sum loss: 5344.500488, avg loss: 2.864148, ppl: 17.534105 +epoch: 0, batch: 36289, sum loss: 4753.113770, avg loss: 3.008300, ppl: 20.252937 +epoch: 0, batch: 36290, sum loss: 4496.168457, avg loss: 2.792651, ppl: 16.324242 +epoch: 0, batch: 36291, sum loss: 5136.539062, avg loss: 3.032195, ppl: 20.742720 +epoch: 0, batch: 36292, sum loss: 5444.334961, avg loss: 2.997982, ppl: 20.045040 +epoch: 0, batch: 36293, sum loss: 4982.176270, avg loss: 2.916965, ppl: 18.485100 +epoch: 0, batch: 36294, sum loss: 5142.900391, avg loss: 2.892520, ppl: 18.038710 +epoch: 0, batch: 36295, sum loss: 4916.058594, avg loss: 2.949045, ppl: 19.087719 +epoch: 0, batch: 36296, sum loss: 5834.073242, avg loss: 3.134913, ppl: 22.986641 +epoch: 0, batch: 36297, sum loss: 4989.178711, avg loss: 2.989322, ppl: 19.872208 +epoch: 0, batch: 36298, sum loss: 3881.031738, avg loss: 2.615250, ppl: 13.670638 +epoch: 0, batch: 36299, sum loss: 4662.049316, avg loss: 2.811851, ppl: 16.640696 +epoch: 0, batch: 36300, sum loss: 5208.497559, avg loss: 2.777865, ppl: 16.084650 +epoch: 0, batch: 36301, sum loss: 4918.879883, avg loss: 3.135041, ppl: 22.989584 +epoch: 0, batch: 36302, sum loss: 5631.670898, avg loss: 3.024528, ppl: 20.584288 +epoch: 0, batch: 36303, sum loss: 5589.151367, avg loss: 2.985658, ppl: 19.799524 +epoch: 0, batch: 36304, sum loss: 4930.035156, avg loss: 2.774359, ppl: 16.028341 +epoch: 0, batch: 36305, sum loss: 5445.571777, avg loss: 3.071389, ppl: 21.571838 +epoch: 0, batch: 36306, sum loss: 5751.466309, avg loss: 2.912135, ppl: 18.396030 +epoch: 0, batch: 36307, sum loss: 4799.078613, avg loss: 3.048970, ppl: 21.093601 +epoch: 0, batch: 36308, sum loss: 3791.878906, avg loss: 2.759737, ppl: 15.795692 +epoch: 0, batch: 36309, sum loss: 4931.979004, avg loss: 2.946224, ppl: 19.033949 +epoch: 0, batch: 36310, sum loss: 4928.297852, avg loss: 3.059155, ppl: 21.309532 +epoch: 0, batch: 36311, sum loss: 4180.206543, avg loss: 2.896886, ppl: 18.117638 +epoch: 0, batch: 36312, sum loss: 4725.703613, avg loss: 2.922513, ppl: 18.587944 +epoch: 0, batch: 36313, sum loss: 5172.391113, avg loss: 3.031882, ppl: 20.736223 +epoch: 0, batch: 36314, sum loss: 5525.066406, avg loss: 3.052523, ppl: 21.168688 +epoch: 0, batch: 36315, sum loss: 4851.074707, avg loss: 2.661039, ppl: 14.311155 +epoch: 0, batch: 36316, sum loss: 4866.814453, avg loss: 2.732630, ppl: 15.373269 +epoch: 0, batch: 36317, sum loss: 6149.791016, avg loss: 3.238437, ppl: 25.493835 +epoch: 0, batch: 36318, sum loss: 3549.481689, avg loss: 2.672803, ppl: 14.480496 +epoch: 0, batch: 36319, sum loss: 4262.605469, avg loss: 2.970457, ppl: 19.500822 +epoch: 0, batch: 36320, sum loss: 4263.515137, avg loss: 2.772116, ppl: 15.992445 +epoch: 0, batch: 36321, sum loss: 3840.567383, avg loss: 2.420017, ppl: 11.246056 +epoch: 0, batch: 36322, sum loss: 4793.765137, avg loss: 2.823183, ppl: 16.830341 +epoch: 0, batch: 36323, sum loss: 5922.444824, avg loss: 3.079794, ppl: 21.753929 +epoch: 0, batch: 36324, sum loss: 4030.534180, avg loss: 2.681659, ppl: 14.609317 +epoch: 0, batch: 36325, sum loss: 5446.183105, avg loss: 3.035777, ppl: 20.817139 +epoch: 0, batch: 36326, sum loss: 4293.012207, avg loss: 2.837417, ppl: 17.071619 +epoch: 0, batch: 36327, sum loss: 3887.849854, avg loss: 2.646596, ppl: 14.105943 +epoch: 0, batch: 36328, sum loss: 5316.122559, avg loss: 3.053488, ppl: 21.189123 +epoch: 0, batch: 36329, sum loss: 5814.661621, avg loss: 2.867190, ppl: 17.587530 +epoch: 0, batch: 36330, sum loss: 5027.516602, avg loss: 2.829216, ppl: 16.932177 +epoch: 0, batch: 36331, sum loss: 5519.734375, avg loss: 2.857005, ppl: 17.409313 +epoch: 0, batch: 36332, sum loss: 5148.778320, avg loss: 2.881241, ppl: 17.836401 +epoch: 0, batch: 36333, sum loss: 4950.059570, avg loss: 2.893080, ppl: 18.048815 +epoch: 0, batch: 36334, sum loss: 3609.461670, avg loss: 2.657925, ppl: 14.266650 +epoch: 0, batch: 36335, sum loss: 4690.482910, avg loss: 3.020272, ppl: 20.496876 +epoch: 0, batch: 36336, sum loss: 5249.978516, avg loss: 3.104659, ppl: 22.301620 +epoch: 0, batch: 36337, sum loss: 4827.495605, avg loss: 2.943595, ppl: 18.983971 +epoch: 0, batch: 36338, sum loss: 5313.788574, avg loss: 2.850745, ppl: 17.300665 +epoch: 0, batch: 36339, sum loss: 5476.305176, avg loss: 3.127530, ppl: 22.817558 +epoch: 0, batch: 36340, sum loss: 4607.739746, avg loss: 2.840777, ppl: 17.129065 +epoch: 0, batch: 36341, sum loss: 4604.125488, avg loss: 2.773570, ppl: 16.015701 +epoch: 0, batch: 36342, sum loss: 5491.003418, avg loss: 3.114579, ppl: 22.523956 +epoch: 0, batch: 36343, sum loss: 5057.255859, avg loss: 3.030112, ppl: 20.699541 +epoch: 0, batch: 36344, sum loss: 5087.405273, avg loss: 2.907089, ppl: 18.303434 +epoch: 0, batch: 36345, sum loss: 6067.955566, avg loss: 3.215663, ppl: 24.919802 +epoch: 0, batch: 36346, sum loss: 5736.254883, avg loss: 2.970614, ppl: 19.503885 +epoch: 0, batch: 36347, sum loss: 5075.818848, avg loss: 3.059565, ppl: 21.318289 +epoch: 0, batch: 36348, sum loss: 5899.553223, avg loss: 2.996218, ppl: 20.009716 +epoch: 0, batch: 36349, sum loss: 4846.121582, avg loss: 2.971258, ppl: 19.516455 +epoch: 0, batch: 36350, sum loss: 4905.223633, avg loss: 2.949623, ppl: 19.098759 +epoch: 0, batch: 36351, sum loss: 5165.704102, avg loss: 2.902081, ppl: 18.212006 +epoch: 0, batch: 36352, sum loss: 4273.415527, avg loss: 2.708121, ppl: 15.001066 +epoch: 0, batch: 36353, sum loss: 4418.429688, avg loss: 2.740961, ppl: 15.501880 +epoch: 0, batch: 36354, sum loss: 4071.010254, avg loss: 2.941481, ppl: 18.943888 +epoch: 0, batch: 36355, sum loss: 4637.347656, avg loss: 2.922084, ppl: 18.579973 +epoch: 0, batch: 36356, sum loss: 4699.936523, avg loss: 2.857104, ppl: 17.411032 +epoch: 0, batch: 36357, sum loss: 3874.412842, avg loss: 2.401992, ppl: 11.045155 +epoch: 0, batch: 36358, sum loss: 5221.620117, avg loss: 2.955076, ppl: 19.203192 +epoch: 0, batch: 36359, sum loss: 5210.838867, avg loss: 3.041937, ppl: 20.945784 +epoch: 0, batch: 36360, sum loss: 4830.205566, avg loss: 2.827989, ppl: 16.911419 +epoch: 0, batch: 36361, sum loss: 4264.676270, avg loss: 2.877650, ppl: 17.772451 +epoch: 0, batch: 36362, sum loss: 4228.090820, avg loss: 2.837645, ppl: 17.075506 +epoch: 0, batch: 36363, sum loss: 3548.245117, avg loss: 2.614772, ppl: 13.664095 +epoch: 0, batch: 36364, sum loss: 3882.065186, avg loss: 2.515921, ppl: 12.377999 +epoch: 0, batch: 36365, sum loss: 4744.981934, avg loss: 3.109425, ppl: 22.408146 +epoch: 0, batch: 36366, sum loss: 4983.733398, avg loss: 2.917876, ppl: 18.501957 +epoch: 0, batch: 36367, sum loss: 5152.919922, avg loss: 2.999371, ppl: 20.072912 +epoch: 0, batch: 36368, sum loss: 4678.313477, avg loss: 2.854371, ppl: 17.363504 +epoch: 0, batch: 36369, sum loss: 5191.833008, avg loss: 2.926625, ppl: 18.664532 +epoch: 0, batch: 36370, sum loss: 4152.800781, avg loss: 2.679226, ppl: 14.573814 +epoch: 0, batch: 36371, sum loss: 5046.105469, avg loss: 2.920200, ppl: 18.544994 +epoch: 0, batch: 36372, sum loss: 5210.216309, avg loss: 3.103166, ppl: 22.268349 +epoch: 0, batch: 36373, sum loss: 4895.019531, avg loss: 2.934664, ppl: 18.815180 +epoch: 0, batch: 36374, sum loss: 4986.179688, avg loss: 2.922731, ppl: 18.591995 +epoch: 0, batch: 36375, sum loss: 5352.886719, avg loss: 3.232419, ppl: 25.340895 +epoch: 0, batch: 36376, sum loss: 4937.305176, avg loss: 2.885626, ppl: 17.914772 +epoch: 0, batch: 36377, sum loss: 4944.442871, avg loss: 3.155356, ppl: 23.461391 +epoch: 0, batch: 36378, sum loss: 3954.678467, avg loss: 2.659501, ppl: 14.289162 +epoch: 0, batch: 36379, sum loss: 5990.335938, avg loss: 3.094182, ppl: 22.069168 +epoch: 0, batch: 36380, sum loss: 5243.800781, avg loss: 2.960927, ppl: 19.315861 +epoch: 0, batch: 36381, sum loss: 5751.299805, avg loss: 2.972248, ppl: 19.535784 +epoch: 0, batch: 36382, sum loss: 5281.225586, avg loss: 3.191073, ppl: 24.314501 +epoch: 0, batch: 36383, sum loss: 4403.442871, avg loss: 2.743578, ppl: 15.542496 +epoch: 0, batch: 36384, sum loss: 5015.311035, avg loss: 3.048821, ppl: 21.090469 +epoch: 0, batch: 36385, sum loss: 5561.224121, avg loss: 2.724755, ppl: 15.252670 +epoch: 0, batch: 36386, sum loss: 4758.431152, avg loss: 2.880406, ppl: 17.821514 +epoch: 0, batch: 36387, sum loss: 5128.097656, avg loss: 3.068880, ppl: 21.517780 +epoch: 0, batch: 36388, sum loss: 4985.694824, avg loss: 3.019803, ppl: 20.487261 +epoch: 0, batch: 36389, sum loss: 4370.415039, avg loss: 2.839776, ppl: 17.111929 +epoch: 0, batch: 36390, sum loss: 4417.290039, avg loss: 2.725040, ppl: 15.257024 +epoch: 0, batch: 36391, sum loss: 4899.474121, avg loss: 3.005812, ppl: 20.202621 +epoch: 0, batch: 36392, sum loss: 5506.356445, avg loss: 3.025471, ppl: 20.603701 +epoch: 0, batch: 36393, sum loss: 4619.361328, avg loss: 3.021165, ppl: 20.515181 +epoch: 0, batch: 36394, sum loss: 4837.517578, avg loss: 2.971448, ppl: 19.520168 +epoch: 0, batch: 36395, sum loss: 4296.380859, avg loss: 2.780829, ppl: 16.132393 +epoch: 0, batch: 36396, sum loss: 4248.607910, avg loss: 2.714765, ppl: 15.101066 +epoch: 0, batch: 36397, sum loss: 5028.302734, avg loss: 2.964801, ppl: 19.390846 +epoch: 0, batch: 36398, sum loss: 5276.115234, avg loss: 2.824473, ppl: 16.852060 +epoch: 0, batch: 36399, sum loss: 5401.630859, avg loss: 2.871680, ppl: 17.666677 +epoch: 0, batch: 36400, sum loss: 5579.030273, avg loss: 3.162716, ppl: 23.634693 +epoch: 0, batch: 36401, sum loss: 4480.215820, avg loss: 2.616948, ppl: 13.693871 +epoch: 0, batch: 36402, sum loss: 5922.835938, avg loss: 3.021855, ppl: 20.529335 +epoch: 0, batch: 36403, sum loss: 5110.677246, avg loss: 2.973053, ppl: 19.551514 +epoch: 0, batch: 36404, sum loss: 4852.144531, avg loss: 2.930039, ppl: 18.728359 +epoch: 0, batch: 36405, sum loss: 5051.033691, avg loss: 2.728813, ppl: 15.314704 +epoch: 0, batch: 36406, sum loss: 4817.845703, avg loss: 2.714279, ppl: 15.093726 +epoch: 0, batch: 36407, sum loss: 5458.423340, avg loss: 3.128036, ppl: 22.829105 +epoch: 0, batch: 36408, sum loss: 4654.778320, avg loss: 2.966717, ppl: 19.428024 +epoch: 0, batch: 36409, sum loss: 4550.191895, avg loss: 2.781291, ppl: 16.139845 +epoch: 0, batch: 36410, sum loss: 4842.990234, avg loss: 2.646443, ppl: 14.103781 +epoch: 0, batch: 36411, sum loss: 4398.168457, avg loss: 2.855953, ppl: 17.391010 +epoch: 0, batch: 36412, sum loss: 4230.250000, avg loss: 2.844822, ppl: 17.198492 +epoch: 0, batch: 36413, sum loss: 4208.482422, avg loss: 2.736335, ppl: 15.430323 +epoch: 0, batch: 36414, sum loss: 5340.894043, avg loss: 3.024289, ppl: 20.579361 +epoch: 0, batch: 36415, sum loss: 5675.483887, avg loss: 3.106450, ppl: 22.341587 +epoch: 0, batch: 36416, sum loss: 6034.063477, avg loss: 3.008008, ppl: 20.247023 +epoch: 0, batch: 36417, sum loss: 5388.944336, avg loss: 2.812602, ppl: 16.653187 +epoch: 0, batch: 36418, sum loss: 4680.958496, avg loss: 2.766524, ppl: 15.903255 +epoch: 0, batch: 36419, sum loss: 5303.298828, avg loss: 3.026997, ppl: 20.635180 +epoch: 0, batch: 36420, sum loss: 5431.537109, avg loss: 2.939143, ppl: 18.899651 +epoch: 0, batch: 36421, sum loss: 5047.003906, avg loss: 3.103938, ppl: 22.285547 +epoch: 0, batch: 36422, sum loss: 5017.308105, avg loss: 2.990052, ppl: 19.886726 +epoch: 0, batch: 36423, sum loss: 4762.516602, avg loss: 2.934391, ppl: 18.810045 +epoch: 0, batch: 36424, sum loss: 4573.935059, avg loss: 2.732339, ppl: 15.368792 +epoch: 0, batch: 36425, sum loss: 5372.955566, avg loss: 2.996629, ppl: 20.017942 +epoch: 0, batch: 36426, sum loss: 6639.464355, avg loss: 3.265846, ppl: 26.202257 +epoch: 0, batch: 36427, sum loss: 4535.631348, avg loss: 2.791158, ppl: 16.299879 +epoch: 0, batch: 36428, sum loss: 4788.109375, avg loss: 3.018985, ppl: 20.470495 +epoch: 0, batch: 36429, sum loss: 4416.524902, avg loss: 2.649385, ppl: 14.145336 +epoch: 0, batch: 36430, sum loss: 4610.256836, avg loss: 3.075555, ppl: 21.661903 +epoch: 0, batch: 36431, sum loss: 6333.041016, avg loss: 3.236097, ppl: 25.434248 +epoch: 0, batch: 36432, sum loss: 4495.238281, avg loss: 2.939986, ppl: 18.915581 +epoch: 0, batch: 36433, sum loss: 5646.877930, avg loss: 2.751890, ppl: 15.672223 +epoch: 0, batch: 36434, sum loss: 4664.070312, avg loss: 2.895140, ppl: 18.086033 +epoch: 0, batch: 36435, sum loss: 4208.908203, avg loss: 2.668934, ppl: 14.424579 +epoch: 0, batch: 36436, sum loss: 4373.229492, avg loss: 2.682963, ppl: 14.628371 +epoch: 0, batch: 36437, sum loss: 3950.500732, avg loss: 2.525896, ppl: 12.502087 +epoch: 0, batch: 36438, sum loss: 4962.567871, avg loss: 2.982312, ppl: 19.733397 +epoch: 0, batch: 36439, sum loss: 4368.761230, avg loss: 2.442013, ppl: 11.496160 +epoch: 0, batch: 36440, sum loss: 4645.280273, avg loss: 2.788283, ppl: 16.253096 +epoch: 0, batch: 36441, sum loss: 3810.937500, avg loss: 2.476243, ppl: 11.896482 +epoch: 0, batch: 36442, sum loss: 4408.786621, avg loss: 2.868436, ppl: 17.609461 +epoch: 0, batch: 36443, sum loss: 4733.027344, avg loss: 2.894818, ppl: 18.080208 +epoch: 0, batch: 36444, sum loss: 4569.858398, avg loss: 2.961671, ppl: 19.330248 +epoch: 0, batch: 36445, sum loss: 5634.813965, avg loss: 3.026216, ppl: 20.619062 +epoch: 0, batch: 36446, sum loss: 4709.061035, avg loss: 2.954242, ppl: 19.187164 +epoch: 0, batch: 36447, sum loss: 5178.562988, avg loss: 2.834462, ppl: 17.021248 +epoch: 0, batch: 36448, sum loss: 5278.560059, avg loss: 3.018045, ppl: 20.451265 +epoch: 0, batch: 36449, sum loss: 4706.210449, avg loss: 3.011011, ppl: 20.307924 +epoch: 0, batch: 36450, sum loss: 4151.855957, avg loss: 2.676890, ppl: 14.539799 +epoch: 0, batch: 36451, sum loss: 5784.631836, avg loss: 3.059033, ppl: 21.306946 +epoch: 0, batch: 36452, sum loss: 5394.065918, avg loss: 2.963773, ppl: 19.370911 +epoch: 0, batch: 36453, sum loss: 4481.113770, avg loss: 2.665743, ppl: 14.378627 +epoch: 0, batch: 36454, sum loss: 4786.360352, avg loss: 3.089968, ppl: 21.976374 +epoch: 0, batch: 36455, sum loss: 5072.782227, avg loss: 3.061425, ppl: 21.357981 +epoch: 0, batch: 36456, sum loss: 4776.626953, avg loss: 2.894925, ppl: 18.082151 +epoch: 0, batch: 36457, sum loss: 5034.044434, avg loss: 2.845701, ppl: 17.213617 +epoch: 0, batch: 36458, sum loss: 6162.042480, avg loss: 3.307591, ppl: 27.319239 +epoch: 0, batch: 36459, sum loss: 5235.314941, avg loss: 2.673808, ppl: 14.495056 +epoch: 0, batch: 36460, sum loss: 4630.476074, avg loss: 3.260899, ppl: 26.072962 +epoch: 0, batch: 36461, sum loss: 4729.004395, avg loss: 2.878274, ppl: 17.783552 +epoch: 0, batch: 36462, sum loss: 4537.247559, avg loss: 2.975244, ppl: 19.594410 +epoch: 0, batch: 36463, sum loss: 4916.182617, avg loss: 2.822148, ppl: 16.812931 +epoch: 0, batch: 36464, sum loss: 3967.193115, avg loss: 2.622071, ppl: 13.764196 +epoch: 0, batch: 36465, sum loss: 4172.136230, avg loss: 2.669313, ppl: 14.430052 +epoch: 0, batch: 36466, sum loss: 4636.208008, avg loss: 2.904892, ppl: 18.263275 +epoch: 0, batch: 36467, sum loss: 5136.890137, avg loss: 2.960744, ppl: 19.312328 +epoch: 0, batch: 36468, sum loss: 5039.024902, avg loss: 2.761110, ppl: 15.817384 +epoch: 0, batch: 36469, sum loss: 5723.991211, avg loss: 3.188853, ppl: 24.260586 +epoch: 0, batch: 36470, sum loss: 4489.400879, avg loss: 3.039540, ppl: 20.895636 +epoch: 0, batch: 36471, sum loss: 5658.922852, avg loss: 3.032649, ppl: 20.752129 +epoch: 0, batch: 36472, sum loss: 5136.354492, avg loss: 3.014292, ppl: 20.374670 +epoch: 0, batch: 36473, sum loss: 5118.753418, avg loss: 2.806334, ppl: 16.549137 +epoch: 0, batch: 36474, sum loss: 4984.396484, avg loss: 2.984668, ppl: 19.779943 +epoch: 0, batch: 36475, sum loss: 4547.824707, avg loss: 2.731426, ppl: 15.354771 +epoch: 0, batch: 36476, sum loss: 5141.554199, avg loss: 3.091734, ppl: 22.015228 +epoch: 0, batch: 36477, sum loss: 5297.693359, avg loss: 2.834507, ppl: 17.022001 +epoch: 0, batch: 36478, sum loss: 4369.779297, avg loss: 2.857933, ppl: 17.425472 +epoch: 0, batch: 36479, sum loss: 4482.137695, avg loss: 2.638103, ppl: 13.986652 +epoch: 0, batch: 36480, sum loss: 4967.901367, avg loss: 2.850202, ppl: 17.291267 +epoch: 0, batch: 36481, sum loss: 4728.392578, avg loss: 2.865692, ppl: 17.561209 +epoch: 0, batch: 36482, sum loss: 5087.311523, avg loss: 3.064646, ppl: 21.426865 +epoch: 0, batch: 36483, sum loss: 4656.997070, avg loss: 2.674898, ppl: 14.510868 +epoch: 0, batch: 36484, sum loss: 5880.172852, avg loss: 3.013928, ppl: 20.367239 +epoch: 0, batch: 36485, sum loss: 6815.827637, avg loss: 3.401112, ppl: 29.997427 +epoch: 0, batch: 36486, sum loss: 4515.482422, avg loss: 2.816895, ppl: 16.724836 +epoch: 0, batch: 36487, sum loss: 4731.016602, avg loss: 2.888288, ppl: 17.962536 +epoch: 0, batch: 36488, sum loss: 5141.981934, avg loss: 3.141101, ppl: 23.129316 +epoch: 0, batch: 36489, sum loss: 4522.275391, avg loss: 2.767611, ppl: 15.920547 +epoch: 0, batch: 36490, sum loss: 4564.394043, avg loss: 2.910965, ppl: 18.374529 +epoch: 0, batch: 36491, sum loss: 4996.599121, avg loss: 2.946108, ppl: 19.031740 +epoch: 0, batch: 36492, sum loss: 4045.036133, avg loss: 2.782006, ppl: 16.151381 +epoch: 0, batch: 36493, sum loss: 5351.645508, avg loss: 3.023529, ppl: 20.563725 +epoch: 0, batch: 36494, sum loss: 6157.329590, avg loss: 3.276918, ppl: 26.494003 +epoch: 0, batch: 36495, sum loss: 5107.935059, avg loss: 2.930542, ppl: 18.737787 +epoch: 0, batch: 36496, sum loss: 4914.725586, avg loss: 2.978621, ppl: 19.660696 +epoch: 0, batch: 36497, sum loss: 5447.180176, avg loss: 3.137777, ppl: 23.052555 +epoch: 0, batch: 36498, sum loss: 4478.291016, avg loss: 2.879930, ppl: 17.813026 +epoch: 0, batch: 36499, sum loss: 5878.498047, avg loss: 3.087447, ppl: 21.921036 +epoch: 0, batch: 36500, sum loss: 5828.563965, avg loss: 2.993613, ppl: 19.957659 +epoch: 0, batch: 36501, sum loss: 4668.942383, avg loss: 2.914446, ppl: 18.438593 +epoch: 0, batch: 36502, sum loss: 4755.747070, avg loss: 3.000471, ppl: 20.095007 +epoch: 0, batch: 36503, sum loss: 4915.634766, avg loss: 2.856266, ppl: 17.396456 +epoch: 0, batch: 36504, sum loss: 5007.821289, avg loss: 3.145616, ppl: 23.233990 +epoch: 0, batch: 36505, sum loss: 4029.365234, avg loss: 2.702458, ppl: 14.916357 +epoch: 0, batch: 36506, sum loss: 4507.846680, avg loss: 2.737005, ppl: 15.440663 +epoch: 0, batch: 36507, sum loss: 3956.266602, avg loss: 2.745501, ppl: 15.572411 +epoch: 0, batch: 36508, sum loss: 4468.328125, avg loss: 2.698266, ppl: 14.853950 +epoch: 0, batch: 36509, sum loss: 4607.950195, avg loss: 2.959506, ppl: 19.288433 +epoch: 0, batch: 36510, sum loss: 5021.393066, avg loss: 3.086289, ppl: 21.895681 +epoch: 0, batch: 36511, sum loss: 4928.031738, avg loss: 2.975865, ppl: 19.606569 +epoch: 0, batch: 36512, sum loss: 4886.347656, avg loss: 2.894756, ppl: 18.079086 +epoch: 0, batch: 36513, sum loss: 4934.287109, avg loss: 2.647150, ppl: 14.113754 +epoch: 0, batch: 36514, sum loss: 4068.185303, avg loss: 2.703113, ppl: 14.926129 +epoch: 0, batch: 36515, sum loss: 6467.160645, avg loss: 3.231964, ppl: 25.329365 +epoch: 0, batch: 36516, sum loss: 4132.056152, avg loss: 2.638605, ppl: 13.993674 +epoch: 0, batch: 36517, sum loss: 5902.300781, avg loss: 3.127875, ppl: 22.825432 +epoch: 0, batch: 36518, sum loss: 6165.361328, avg loss: 3.053671, ppl: 21.192993 +epoch: 0, batch: 36519, sum loss: 4909.003906, avg loss: 2.967959, ppl: 19.452175 +epoch: 0, batch: 36520, sum loss: 4733.122070, avg loss: 3.043808, ppl: 20.985012 +epoch: 0, batch: 36521, sum loss: 4018.365723, avg loss: 2.589153, ppl: 13.318490 +epoch: 0, batch: 36522, sum loss: 5091.098633, avg loss: 3.125291, ppl: 22.766510 +epoch: 0, batch: 36523, sum loss: 4556.384277, avg loss: 2.759773, ppl: 15.796249 +epoch: 0, batch: 36524, sum loss: 4066.032715, avg loss: 2.845369, ppl: 17.207912 +epoch: 0, batch: 36525, sum loss: 4879.656738, avg loss: 2.880553, ppl: 17.824127 +epoch: 0, batch: 36526, sum loss: 5461.051270, avg loss: 2.836910, ppl: 17.062952 +epoch: 0, batch: 36527, sum loss: 3842.069824, avg loss: 2.798303, ppl: 16.416758 +epoch: 0, batch: 36528, sum loss: 4960.234375, avg loss: 2.929849, ppl: 18.724806 +epoch: 0, batch: 36529, sum loss: 4989.149414, avg loss: 3.068357, ppl: 21.506548 +epoch: 0, batch: 36530, sum loss: 4423.831055, avg loss: 2.656956, ppl: 14.252833 +epoch: 0, batch: 36531, sum loss: 4952.240723, avg loss: 2.694364, ppl: 14.796103 +epoch: 0, batch: 36532, sum loss: 4203.873535, avg loss: 2.574325, ppl: 13.122460 +epoch: 0, batch: 36533, sum loss: 5202.049316, avg loss: 2.977704, ppl: 19.642666 +epoch: 0, batch: 36534, sum loss: 5221.660645, avg loss: 2.966852, ppl: 19.430664 +epoch: 0, batch: 36535, sum loss: 5032.913086, avg loss: 3.050250, ppl: 21.120630 +epoch: 0, batch: 36536, sum loss: 5858.670898, avg loss: 3.099826, ppl: 22.194086 +epoch: 0, batch: 36537, sum loss: 5024.617676, avg loss: 2.978434, ppl: 19.657007 +epoch: 0, batch: 36538, sum loss: 5270.500000, avg loss: 2.894289, ppl: 18.070644 +epoch: 0, batch: 36539, sum loss: 5690.779297, avg loss: 3.235236, ppl: 25.412361 +epoch: 0, batch: 36540, sum loss: 4450.481445, avg loss: 2.790271, ppl: 16.285425 +epoch: 0, batch: 36541, sum loss: 5032.038086, avg loss: 3.092832, ppl: 22.039408 +epoch: 0, batch: 36542, sum loss: 4697.463867, avg loss: 2.826392, ppl: 16.884434 +epoch: 0, batch: 36543, sum loss: 4641.994629, avg loss: 2.789660, ppl: 16.275492 +epoch: 0, batch: 36544, sum loss: 3866.922363, avg loss: 2.674220, ppl: 14.501040 +epoch: 0, batch: 36545, sum loss: 4720.778320, avg loss: 3.079438, ppl: 21.746178 +epoch: 0, batch: 36546, sum loss: 5316.684082, avg loss: 3.080350, ppl: 21.766018 +epoch: 0, batch: 36547, sum loss: 4275.626465, avg loss: 2.749599, ppl: 15.636364 +epoch: 0, batch: 36548, sum loss: 4811.937500, avg loss: 2.800895, ppl: 16.459372 +epoch: 0, batch: 36549, sum loss: 5316.640137, avg loss: 3.045040, ppl: 21.010880 +epoch: 0, batch: 36550, sum loss: 4676.655762, avg loss: 2.834337, ppl: 17.019108 +epoch: 0, batch: 36551, sum loss: 6611.598633, avg loss: 3.201743, ppl: 24.575325 +epoch: 0, batch: 36552, sum loss: 5173.394531, avg loss: 3.109011, ppl: 22.398886 +epoch: 0, batch: 36553, sum loss: 4166.273438, avg loss: 2.717726, ppl: 15.145838 +epoch: 0, batch: 36554, sum loss: 4486.633789, avg loss: 2.926702, ppl: 18.665964 +epoch: 0, batch: 36555, sum loss: 4761.236328, avg loss: 2.753752, ppl: 15.701426 +epoch: 0, batch: 36556, sum loss: 6154.547852, avg loss: 3.161042, ppl: 23.595163 +epoch: 0, batch: 36557, sum loss: 4173.635742, avg loss: 2.780570, ppl: 16.128216 +epoch: 0, batch: 36558, sum loss: 5571.432129, avg loss: 3.013214, ppl: 20.352701 +epoch: 0, batch: 36559, sum loss: 4542.666992, avg loss: 3.044683, ppl: 21.003372 +epoch: 0, batch: 36560, sum loss: 4107.823730, avg loss: 2.477578, ppl: 11.912374 +epoch: 0, batch: 36561, sum loss: 5092.974121, avg loss: 2.732282, ppl: 15.367919 +epoch: 0, batch: 36562, sum loss: 4334.036133, avg loss: 2.904850, ppl: 18.262505 +epoch: 0, batch: 36563, sum loss: 5405.427734, avg loss: 3.244554, ppl: 25.650274 +epoch: 0, batch: 36564, sum loss: 5254.029785, avg loss: 2.855451, ppl: 17.382273 +epoch: 0, batch: 36565, sum loss: 4136.059082, avg loss: 2.854423, ppl: 17.364420 +epoch: 0, batch: 36566, sum loss: 5228.800293, avg loss: 3.239653, ppl: 25.524872 +epoch: 0, batch: 36567, sum loss: 4882.186523, avg loss: 2.897440, ppl: 18.127678 +epoch: 0, batch: 36568, sum loss: 5151.824707, avg loss: 3.016291, ppl: 20.415424 +epoch: 0, batch: 36569, sum loss: 4626.901855, avg loss: 3.149695, ppl: 23.328941 +epoch: 0, batch: 36570, sum loss: 4319.258301, avg loss: 2.646605, ppl: 14.106061 +epoch: 0, batch: 36571, sum loss: 6013.888672, avg loss: 3.057391, ppl: 21.271990 +epoch: 0, batch: 36572, sum loss: 4415.274902, avg loss: 2.618787, ppl: 13.719073 +epoch: 0, batch: 36573, sum loss: 4616.983887, avg loss: 2.771299, ppl: 15.979372 +epoch: 0, batch: 36574, sum loss: 5701.870605, avg loss: 2.933061, ppl: 18.785049 +epoch: 0, batch: 36575, sum loss: 5531.130859, avg loss: 2.903481, ppl: 18.237516 +epoch: 0, batch: 36576, sum loss: 4668.286621, avg loss: 2.606525, ppl: 13.551879 +epoch: 0, batch: 36577, sum loss: 4833.000977, avg loss: 3.181699, ppl: 24.087645 +epoch: 0, batch: 36578, sum loss: 4300.979492, avg loss: 2.744722, ppl: 15.560293 +epoch: 0, batch: 36579, sum loss: 5683.272461, avg loss: 3.171469, ppl: 23.842489 +epoch: 0, batch: 36580, sum loss: 4559.670898, avg loss: 2.997811, ppl: 20.041624 +epoch: 0, batch: 36581, sum loss: 4377.328125, avg loss: 2.979801, ppl: 19.683903 +epoch: 0, batch: 36582, sum loss: 5449.270020, avg loss: 3.005665, ppl: 20.199640 +epoch: 0, batch: 36583, sum loss: 4769.103027, avg loss: 2.758301, ppl: 15.773029 +epoch: 0, batch: 36584, sum loss: 4791.703125, avg loss: 2.823632, ppl: 16.837891 +epoch: 0, batch: 36585, sum loss: 5708.651367, avg loss: 3.014071, ppl: 20.370169 +epoch: 0, batch: 36586, sum loss: 4015.216064, avg loss: 2.573857, ppl: 13.116311 +epoch: 0, batch: 36587, sum loss: 4330.194824, avg loss: 2.884873, ppl: 17.901300 +epoch: 0, batch: 36588, sum loss: 5170.481445, avg loss: 3.020141, ppl: 20.494179 +epoch: 0, batch: 36589, sum loss: 4517.971680, avg loss: 2.861287, ppl: 17.484009 +epoch: 0, batch: 36590, sum loss: 5057.535156, avg loss: 2.926814, ppl: 18.668066 +epoch: 0, batch: 36591, sum loss: 4346.092773, avg loss: 2.689414, ppl: 14.723042 +epoch: 0, batch: 36592, sum loss: 5080.492676, avg loss: 2.924866, ppl: 18.631737 +epoch: 0, batch: 36593, sum loss: 5036.744629, avg loss: 2.996279, ppl: 20.010933 +epoch: 0, batch: 36594, sum loss: 5050.806152, avg loss: 2.981586, ppl: 19.719057 +epoch: 0, batch: 36595, sum loss: 4107.128906, avg loss: 2.723560, ppl: 15.234466 +epoch: 0, batch: 36596, sum loss: 5504.766602, avg loss: 3.127708, ppl: 22.821617 +epoch: 0, batch: 36597, sum loss: 5270.284180, avg loss: 2.844190, ppl: 17.187634 +epoch: 0, batch: 36598, sum loss: 4189.514160, avg loss: 2.857786, ppl: 17.422909 +epoch: 0, batch: 36599, sum loss: 4365.687500, avg loss: 2.920192, ppl: 18.544851 +epoch: 0, batch: 36600, sum loss: 4542.860352, avg loss: 2.891700, ppl: 18.023924 +epoch: 0, batch: 36601, sum loss: 5776.874023, avg loss: 3.122635, ppl: 22.706123 +epoch: 0, batch: 36602, sum loss: 4849.493164, avg loss: 2.916111, ppl: 18.469330 +epoch: 0, batch: 36603, sum loss: 4349.752441, avg loss: 2.615606, ppl: 13.675499 +epoch: 0, batch: 36604, sum loss: 4605.155273, avg loss: 2.736278, ppl: 15.429443 +epoch: 0, batch: 36605, sum loss: 4433.780273, avg loss: 2.851306, ppl: 17.310369 +epoch: 0, batch: 36606, sum loss: 6640.851074, avg loss: 3.323749, ppl: 27.764252 +epoch: 0, batch: 36607, sum loss: 5459.042969, avg loss: 2.952430, ppl: 19.152433 +epoch: 0, batch: 36608, sum loss: 4956.213379, avg loss: 3.029470, ppl: 20.686275 +epoch: 0, batch: 36609, sum loss: 4351.801758, avg loss: 2.764804, ppl: 15.875930 +epoch: 0, batch: 36610, sum loss: 5061.186523, avg loss: 2.989478, ppl: 19.875311 +epoch: 0, batch: 36611, sum loss: 4634.181152, avg loss: 2.571688, ppl: 13.087894 +epoch: 0, batch: 36612, sum loss: 4260.883301, avg loss: 2.902509, ppl: 18.219801 +epoch: 0, batch: 36613, sum loss: 4592.117676, avg loss: 3.015179, ppl: 20.392744 +epoch: 0, batch: 36614, sum loss: 4821.265625, avg loss: 2.660743, ppl: 14.306909 +epoch: 0, batch: 36615, sum loss: 5311.334961, avg loss: 2.927969, ppl: 18.689629 +epoch: 0, batch: 36616, sum loss: 5199.701172, avg loss: 2.972957, ppl: 19.549635 +epoch: 0, batch: 36617, sum loss: 4985.638672, avg loss: 3.038171, ppl: 20.867039 +epoch: 0, batch: 36618, sum loss: 4398.846680, avg loss: 2.756170, ppl: 15.739438 +epoch: 0, batch: 36619, sum loss: 5515.770508, avg loss: 3.123313, ppl: 22.721525 +epoch: 0, batch: 36620, sum loss: 4922.981445, avg loss: 2.930346, ppl: 18.734116 +epoch: 0, batch: 36621, sum loss: 4700.658691, avg loss: 3.080379, ppl: 21.766655 +epoch: 0, batch: 36622, sum loss: 5508.332520, avg loss: 3.124409, ppl: 22.746441 +epoch: 0, batch: 36623, sum loss: 4816.203613, avg loss: 2.894353, ppl: 18.071808 +epoch: 0, batch: 36624, sum loss: 5105.949219, avg loss: 2.989432, ppl: 19.874384 +epoch: 0, batch: 36625, sum loss: 4790.779297, avg loss: 2.839822, ppl: 17.112717 +epoch: 0, batch: 36626, sum loss: 5594.697754, avg loss: 2.964864, ppl: 19.392057 +epoch: 0, batch: 36627, sum loss: 5324.234375, avg loss: 2.842624, ppl: 17.160736 +epoch: 0, batch: 36628, sum loss: 4080.494629, avg loss: 2.456649, ppl: 11.665658 +epoch: 0, batch: 36629, sum loss: 4487.465332, avg loss: 3.067304, ppl: 21.483902 +epoch: 0, batch: 36630, sum loss: 4726.992188, avg loss: 2.795383, ppl: 16.368896 +epoch: 0, batch: 36631, sum loss: 5038.375000, avg loss: 3.092925, ppl: 22.041456 +epoch: 0, batch: 36632, sum loss: 5369.041992, avg loss: 2.997790, ppl: 20.041195 +epoch: 0, batch: 36633, sum loss: 5602.123535, avg loss: 2.936123, ppl: 18.842659 +epoch: 0, batch: 36634, sum loss: 4536.538086, avg loss: 2.863976, ppl: 17.531092 +epoch: 0, batch: 36635, sum loss: 4704.199707, avg loss: 3.042820, ppl: 20.964279 +epoch: 0, batch: 36636, sum loss: 4301.107422, avg loss: 2.625829, ppl: 13.816016 +epoch: 0, batch: 36637, sum loss: 4450.754395, avg loss: 2.827671, ppl: 16.906042 +epoch: 0, batch: 36638, sum loss: 5382.335449, avg loss: 3.044307, ppl: 20.995485 +epoch: 0, batch: 36639, sum loss: 4253.077637, avg loss: 2.719359, ppl: 15.170597 +epoch: 0, batch: 36640, sum loss: 4884.745605, avg loss: 3.159603, ppl: 23.561232 +epoch: 0, batch: 36641, sum loss: 4752.995117, avg loss: 2.898168, ppl: 18.140879 +epoch: 0, batch: 36642, sum loss: 5450.590820, avg loss: 2.963889, ppl: 19.373161 +epoch: 0, batch: 36643, sum loss: 5189.986328, avg loss: 2.897815, ppl: 18.134474 +epoch: 0, batch: 36644, sum loss: 4170.963379, avg loss: 2.808729, ppl: 16.588827 +epoch: 0, batch: 36645, sum loss: 5069.895508, avg loss: 2.878987, ppl: 17.796228 +epoch: 0, batch: 36646, sum loss: 4665.806641, avg loss: 2.860703, ppl: 17.473806 +epoch: 0, batch: 36647, sum loss: 4164.729980, avg loss: 2.558188, ppl: 12.912398 +epoch: 0, batch: 36648, sum loss: 6422.845703, avg loss: 3.238954, ppl: 25.507029 +epoch: 0, batch: 36649, sum loss: 4860.169434, avg loss: 3.081908, ppl: 21.799967 +epoch: 0, batch: 36650, sum loss: 5621.385742, avg loss: 3.075156, ppl: 21.653269 +epoch: 0, batch: 36651, sum loss: 5958.269531, avg loss: 3.368157, ppl: 29.024988 +epoch: 0, batch: 36652, sum loss: 5072.032715, avg loss: 3.010108, ppl: 20.289600 +epoch: 0, batch: 36653, sum loss: 5060.509766, avg loss: 2.976771, ppl: 19.624340 +epoch: 0, batch: 36654, sum loss: 5349.020508, avg loss: 3.172610, ppl: 23.869703 +epoch: 0, batch: 36655, sum loss: 5003.360840, avg loss: 2.903866, ppl: 18.244539 +epoch: 0, batch: 36656, sum loss: 4929.973145, avg loss: 3.002420, ppl: 20.134201 +epoch: 0, batch: 36657, sum loss: 4076.235840, avg loss: 2.735729, ppl: 15.420978 +epoch: 0, batch: 36658, sum loss: 5679.748047, avg loss: 2.902273, ppl: 18.215502 +epoch: 0, batch: 36659, sum loss: 4083.216553, avg loss: 2.889750, ppl: 17.988808 +epoch: 0, batch: 36660, sum loss: 4546.488770, avg loss: 2.895853, ppl: 18.098930 +epoch: 0, batch: 36661, sum loss: 4671.415527, avg loss: 2.899699, ppl: 18.168684 +epoch: 0, batch: 36662, sum loss: 5012.161133, avg loss: 2.903917, ppl: 18.245474 +epoch: 0, batch: 36663, sum loss: 4835.379395, avg loss: 2.759920, ppl: 15.798577 +epoch: 0, batch: 36664, sum loss: 4870.358887, avg loss: 3.074722, ppl: 21.643854 +epoch: 0, batch: 36665, sum loss: 3749.592285, avg loss: 2.474978, ppl: 11.881451 +epoch: 0, batch: 36666, sum loss: 4762.583496, avg loss: 2.698348, ppl: 14.855165 +epoch: 0, batch: 36667, sum loss: 5010.581055, avg loss: 2.964841, ppl: 19.391617 +epoch: 0, batch: 36668, sum loss: 5288.073242, avg loss: 3.001177, ppl: 20.109188 +epoch: 0, batch: 36669, sum loss: 5369.772461, avg loss: 3.068441, ppl: 21.508354 +epoch: 0, batch: 36670, sum loss: 5357.163086, avg loss: 3.145721, ppl: 23.236427 +epoch: 0, batch: 36671, sum loss: 5772.295410, avg loss: 3.126921, ppl: 22.803652 +epoch: 0, batch: 36672, sum loss: 5117.134277, avg loss: 2.822468, ppl: 16.818308 +epoch: 0, batch: 36673, sum loss: 5265.780762, avg loss: 2.950017, ppl: 19.106287 +epoch: 0, batch: 36674, sum loss: 4895.831055, avg loss: 2.815314, ppl: 16.698420 +epoch: 0, batch: 36675, sum loss: 5099.784180, avg loss: 2.970171, ppl: 19.495262 +epoch: 0, batch: 36676, sum loss: 5510.727539, avg loss: 2.874662, ppl: 17.719440 +epoch: 0, batch: 36677, sum loss: 5882.378906, avg loss: 2.981439, ppl: 19.716169 +epoch: 0, batch: 36678, sum loss: 5188.334473, avg loss: 2.978378, ppl: 19.655905 +epoch: 0, batch: 36679, sum loss: 4574.833984, avg loss: 2.899134, ppl: 18.158421 +epoch: 0, batch: 36680, sum loss: 4667.262695, avg loss: 2.891737, ppl: 18.024582 +epoch: 0, batch: 36681, sum loss: 4596.185547, avg loss: 2.818017, ppl: 16.743616 +epoch: 0, batch: 36682, sum loss: 4392.964355, avg loss: 2.926692, ppl: 18.665777 +epoch: 0, batch: 36683, sum loss: 4813.741211, avg loss: 2.973281, ppl: 19.555971 +epoch: 0, batch: 36684, sum loss: 6107.598145, avg loss: 3.186019, ppl: 24.191927 +epoch: 0, batch: 36685, sum loss: 7101.082031, avg loss: 3.224833, ppl: 25.149368 +epoch: 0, batch: 36686, sum loss: 4717.871582, avg loss: 2.848956, ppl: 17.269749 +epoch: 0, batch: 36687, sum loss: 5579.918945, avg loss: 2.841100, ppl: 17.134594 +epoch: 0, batch: 36688, sum loss: 5896.042969, avg loss: 3.252092, ppl: 25.844358 +epoch: 0, batch: 36689, sum loss: 4104.699219, avg loss: 2.942437, ppl: 18.962000 +epoch: 0, batch: 36690, sum loss: 5394.889160, avg loss: 3.096951, ppl: 22.130379 +epoch: 0, batch: 36691, sum loss: 5324.384766, avg loss: 3.104597, ppl: 22.300238 +epoch: 0, batch: 36692, sum loss: 4953.997559, avg loss: 2.805208, ppl: 16.530514 +epoch: 0, batch: 36693, sum loss: 5172.456055, avg loss: 2.780890, ppl: 16.133381 +epoch: 0, batch: 36694, sum loss: 4973.469727, avg loss: 3.056834, ppl: 21.260151 +epoch: 0, batch: 36695, sum loss: 5068.464355, avg loss: 2.906229, ppl: 18.287714 +epoch: 0, batch: 36696, sum loss: 5200.968750, avg loss: 2.980498, ppl: 19.697620 +epoch: 0, batch: 36697, sum loss: 5691.182129, avg loss: 3.252104, ppl: 25.844662 +epoch: 0, batch: 36698, sum loss: 5140.465820, avg loss: 2.914096, ppl: 18.432148 +epoch: 0, batch: 36699, sum loss: 4929.572754, avg loss: 2.877742, ppl: 17.774099 +epoch: 0, batch: 36700, sum loss: 7177.576172, avg loss: 3.253661, ppl: 25.884930 +epoch: 0, batch: 36701, sum loss: 4804.067383, avg loss: 2.859564, ppl: 17.453917 +epoch: 0, batch: 36702, sum loss: 4502.388672, avg loss: 2.891708, ppl: 18.024063 +epoch: 0, batch: 36703, sum loss: 4976.739746, avg loss: 2.799066, ppl: 16.429300 +epoch: 0, batch: 36704, sum loss: 4210.384766, avg loss: 2.868109, ppl: 17.603706 +epoch: 0, batch: 36705, sum loss: 5580.047363, avg loss: 3.349368, ppl: 28.484728 +epoch: 0, batch: 36706, sum loss: 4202.598145, avg loss: 2.755802, ppl: 15.733657 +epoch: 0, batch: 36707, sum loss: 4567.305664, avg loss: 2.795169, ppl: 16.365389 +epoch: 0, batch: 36708, sum loss: 4576.270020, avg loss: 2.609048, ppl: 13.586109 +epoch: 0, batch: 36709, sum loss: 4992.763672, avg loss: 3.014954, ppl: 20.388149 +epoch: 0, batch: 36710, sum loss: 4949.950195, avg loss: 3.005434, ppl: 20.194979 +epoch: 0, batch: 36711, sum loss: 3577.757568, avg loss: 2.469122, ppl: 11.812071 +epoch: 0, batch: 36712, sum loss: 4923.092773, avg loss: 2.907911, ppl: 18.318487 +epoch: 0, batch: 36713, sum loss: 4616.436523, avg loss: 2.779312, ppl: 16.107929 +epoch: 0, batch: 36714, sum loss: 5886.020996, avg loss: 2.969738, ppl: 19.486813 +epoch: 0, batch: 36715, sum loss: 4873.719727, avg loss: 2.800988, ppl: 16.460909 +epoch: 0, batch: 36716, sum loss: 4622.318359, avg loss: 2.706275, ppl: 14.973399 +epoch: 0, batch: 36717, sum loss: 5526.882324, avg loss: 3.152814, ppl: 23.401817 +epoch: 0, batch: 36718, sum loss: 5344.363281, avg loss: 2.674856, ppl: 14.510266 +epoch: 0, batch: 36719, sum loss: 4985.798340, avg loss: 2.844152, ppl: 17.186977 +epoch: 0, batch: 36720, sum loss: 6419.553711, avg loss: 3.362784, ppl: 28.869442 +epoch: 0, batch: 36721, sum loss: 5881.997559, avg loss: 3.110522, ppl: 22.432756 +epoch: 0, batch: 36722, sum loss: 4013.844482, avg loss: 2.692049, ppl: 14.761889 +epoch: 0, batch: 36723, sum loss: 5115.203125, avg loss: 3.007174, ppl: 20.230148 +epoch: 0, batch: 36724, sum loss: 6224.410156, avg loss: 3.289857, ppl: 26.839035 +epoch: 0, batch: 36725, sum loss: 5350.693848, avg loss: 3.134560, ppl: 22.978519 +epoch: 0, batch: 36726, sum loss: 5784.888184, avg loss: 3.101817, ppl: 22.238314 +epoch: 0, batch: 36727, sum loss: 5567.823730, avg loss: 2.939717, ppl: 18.910496 +epoch: 0, batch: 36728, sum loss: 5873.531738, avg loss: 3.005902, ppl: 20.204424 +epoch: 0, batch: 36729, sum loss: 5449.512695, avg loss: 3.073611, ppl: 21.619837 +epoch: 0, batch: 36730, sum loss: 4097.131348, avg loss: 2.772078, ppl: 15.991835 +epoch: 0, batch: 36731, sum loss: 4035.704590, avg loss: 2.769873, ppl: 15.956602 +epoch: 0, batch: 36732, sum loss: 4687.609375, avg loss: 2.812003, ppl: 16.643223 +epoch: 0, batch: 36733, sum loss: 5044.746582, avg loss: 3.004614, ppl: 20.178419 +epoch: 0, batch: 36734, sum loss: 5899.699707, avg loss: 3.202877, ppl: 24.603218 +epoch: 0, batch: 36735, sum loss: 4756.152344, avg loss: 2.950467, ppl: 19.114872 +epoch: 0, batch: 36736, sum loss: 6071.997559, avg loss: 3.112249, ppl: 22.471529 +epoch: 0, batch: 36737, sum loss: 4607.761230, avg loss: 2.840790, ppl: 17.129290 +epoch: 0, batch: 36738, sum loss: 4430.347656, avg loss: 2.604555, ppl: 13.525201 +epoch: 0, batch: 36739, sum loss: 4615.895020, avg loss: 3.022852, ppl: 20.549814 +epoch: 0, batch: 36740, sum loss: 3470.512451, avg loss: 2.496772, ppl: 12.143227 +epoch: 0, batch: 36741, sum loss: 5389.553711, avg loss: 2.933889, ppl: 18.800602 +epoch: 0, batch: 36742, sum loss: 4892.134766, avg loss: 2.797104, ppl: 16.397091 +epoch: 0, batch: 36743, sum loss: 5185.109863, avg loss: 2.891863, ppl: 18.026859 +epoch: 0, batch: 36744, sum loss: 4477.751953, avg loss: 2.700695, ppl: 14.890074 +epoch: 0, batch: 36745, sum loss: 5639.772461, avg loss: 2.828371, ppl: 16.917885 +epoch: 0, batch: 36746, sum loss: 5280.142090, avg loss: 3.071636, ppl: 21.577173 +epoch: 0, batch: 36747, sum loss: 4765.858887, avg loss: 2.826725, ppl: 16.890064 +epoch: 0, batch: 36748, sum loss: 5685.165039, avg loss: 2.936552, ppl: 18.850739 +epoch: 0, batch: 36749, sum loss: 4651.954102, avg loss: 2.944275, ppl: 18.996878 +epoch: 0, batch: 36750, sum loss: 3937.297852, avg loss: 2.694934, ppl: 14.804537 +epoch: 0, batch: 36751, sum loss: 4716.401367, avg loss: 2.719954, ppl: 15.179631 +epoch: 0, batch: 36752, sum loss: 4563.813965, avg loss: 3.069142, ppl: 21.523424 +epoch: 0, batch: 36753, sum loss: 5871.528320, avg loss: 3.032814, ppl: 20.755558 +epoch: 0, batch: 36754, sum loss: 5173.562012, avg loss: 3.052249, ppl: 21.162886 +epoch: 0, batch: 36755, sum loss: 4498.859863, avg loss: 2.867342, ppl: 17.590206 +epoch: 0, batch: 36756, sum loss: 4049.206055, avg loss: 2.833594, ppl: 17.006477 +epoch: 0, batch: 36757, sum loss: 4608.172852, avg loss: 2.912878, ppl: 18.409702 +epoch: 0, batch: 36758, sum loss: 5388.858398, avg loss: 3.002150, ppl: 20.128759 +epoch: 0, batch: 36759, sum loss: 5163.461914, avg loss: 2.855897, ppl: 17.390028 +epoch: 0, batch: 36760, sum loss: 4283.409180, avg loss: 2.913884, ppl: 18.428234 +epoch: 0, batch: 36761, sum loss: 5165.470215, avg loss: 3.083863, ppl: 21.842619 +epoch: 0, batch: 36762, sum loss: 4919.595215, avg loss: 3.067079, ppl: 21.479076 +epoch: 0, batch: 36763, sum loss: 5157.828125, avg loss: 3.097795, ppl: 22.149054 +epoch: 0, batch: 36764, sum loss: 3679.975586, avg loss: 2.591532, ppl: 13.350211 +epoch: 0, batch: 36765, sum loss: 5054.976074, avg loss: 2.835096, ppl: 17.032038 +epoch: 0, batch: 36766, sum loss: 4386.068848, avg loss: 2.813386, ppl: 16.666258 +epoch: 0, batch: 36767, sum loss: 5101.434570, avg loss: 3.073153, ppl: 21.609941 +epoch: 0, batch: 36768, sum loss: 4529.501953, avg loss: 2.792541, ppl: 16.322447 +epoch: 0, batch: 36769, sum loss: 4270.865234, avg loss: 2.667624, ppl: 14.405694 +epoch: 0, batch: 36770, sum loss: 5888.865723, avg loss: 2.931242, ppl: 18.750910 +epoch: 0, batch: 36771, sum loss: 4661.544922, avg loss: 2.900775, ppl: 18.188231 +epoch: 0, batch: 36772, sum loss: 4302.063965, avg loss: 2.946619, ppl: 19.041471 +epoch: 0, batch: 36773, sum loss: 3952.376465, avg loss: 2.761968, ppl: 15.830970 +epoch: 0, batch: 36774, sum loss: 4438.823242, avg loss: 2.908797, ppl: 18.334721 +epoch: 0, batch: 36775, sum loss: 4205.155273, avg loss: 2.814696, ppl: 16.688095 +epoch: 0, batch: 36776, sum loss: 5199.303711, avg loss: 2.924243, ppl: 18.620125 +epoch: 0, batch: 36777, sum loss: 3765.106934, avg loss: 2.760342, ppl: 15.805253 +epoch: 0, batch: 36778, sum loss: 4228.236816, avg loss: 2.792759, ppl: 16.326000 +epoch: 0, batch: 36779, sum loss: 4746.655273, avg loss: 2.930034, ppl: 18.728270 +epoch: 0, batch: 36780, sum loss: 4380.408691, avg loss: 2.749786, ppl: 15.639279 +epoch: 0, batch: 36781, sum loss: 5112.259277, avg loss: 2.961911, ppl: 19.334894 +epoch: 0, batch: 36782, sum loss: 5353.930664, avg loss: 2.924047, ppl: 18.616486 +epoch: 0, batch: 36783, sum loss: 4862.479492, avg loss: 2.828668, ppl: 16.922899 +epoch: 0, batch: 36784, sum loss: 5481.376953, avg loss: 2.951738, ppl: 19.139181 +epoch: 0, batch: 36785, sum loss: 5599.142578, avg loss: 3.046323, ppl: 21.037851 +epoch: 0, batch: 36786, sum loss: 5336.082520, avg loss: 2.989402, ppl: 19.873796 +epoch: 0, batch: 36787, sum loss: 5041.244141, avg loss: 2.827394, ppl: 16.901367 +epoch: 0, batch: 36788, sum loss: 4235.972656, avg loss: 2.856354, ppl: 17.397978 +epoch: 0, batch: 36789, sum loss: 4454.560059, avg loss: 2.837299, ppl: 17.069605 +epoch: 0, batch: 36790, sum loss: 4501.803223, avg loss: 2.989245, ppl: 19.870678 +epoch: 0, batch: 36791, sum loss: 4574.697266, avg loss: 2.830877, ppl: 16.960329 +epoch: 0, batch: 36792, sum loss: 5458.154297, avg loss: 3.162314, ppl: 23.625206 +epoch: 0, batch: 36793, sum loss: 6184.475586, avg loss: 3.184591, ppl: 24.157408 +epoch: 0, batch: 36794, sum loss: 4760.072266, avg loss: 3.005096, ppl: 20.188158 +epoch: 0, batch: 36795, sum loss: 5066.748535, avg loss: 3.083839, ppl: 21.842102 +epoch: 0, batch: 36796, sum loss: 5221.072266, avg loss: 2.879797, ppl: 17.810661 +epoch: 0, batch: 36797, sum loss: 5425.394531, avg loss: 3.121631, ppl: 22.683344 +epoch: 0, batch: 36798, sum loss: 5757.671387, avg loss: 2.927133, ppl: 18.674021 +epoch: 0, batch: 36799, sum loss: 5507.946777, avg loss: 3.206023, ppl: 24.680723 +epoch: 0, batch: 36800, sum loss: 4504.333984, avg loss: 2.785612, ppl: 16.209728 +epoch: 0, batch: 36801, sum loss: 4572.270996, avg loss: 3.085203, ppl: 21.871910 +epoch: 0, batch: 36802, sum loss: 5765.328125, avg loss: 2.908844, ppl: 18.335581 +epoch: 0, batch: 36803, sum loss: 5513.171875, avg loss: 3.029216, ppl: 20.681004 +epoch: 0, batch: 36804, sum loss: 5634.527344, avg loss: 2.839983, ppl: 17.115482 +epoch: 0, batch: 36805, sum loss: 5326.354980, avg loss: 3.055855, ppl: 21.239332 +epoch: 0, batch: 36806, sum loss: 4269.915039, avg loss: 2.584694, ppl: 13.259236 +epoch: 0, batch: 36807, sum loss: 5310.916992, avg loss: 3.091337, ppl: 22.006481 +epoch: 0, batch: 36808, sum loss: 5459.435059, avg loss: 2.991471, ppl: 19.914961 +epoch: 0, batch: 36809, sum loss: 4761.996582, avg loss: 2.861777, ppl: 17.492580 +epoch: 0, batch: 36810, sum loss: 5786.467773, avg loss: 2.976578, ppl: 19.620565 +epoch: 0, batch: 36811, sum loss: 5309.142578, avg loss: 2.984341, ppl: 19.773466 +epoch: 0, batch: 36812, sum loss: 4594.607422, avg loss: 2.811877, ppl: 16.641129 +epoch: 0, batch: 36813, sum loss: 4400.890625, avg loss: 2.889620, ppl: 17.986465 +epoch: 0, batch: 36814, sum loss: 4740.245605, avg loss: 2.803221, ppl: 16.497692 +epoch: 0, batch: 36815, sum loss: 4088.636230, avg loss: 2.827549, ppl: 16.903982 +epoch: 0, batch: 36816, sum loss: 5905.545898, avg loss: 2.970596, ppl: 19.503546 +epoch: 0, batch: 36817, sum loss: 6401.000000, avg loss: 3.182994, ppl: 24.118849 +epoch: 0, batch: 36818, sum loss: 5797.923828, avg loss: 2.988621, ppl: 19.858274 +epoch: 0, batch: 36819, sum loss: 4905.072754, avg loss: 2.895556, ppl: 18.093567 +epoch: 0, batch: 36820, sum loss: 4056.668701, avg loss: 2.895552, ppl: 18.093489 +epoch: 0, batch: 36821, sum loss: 6801.288574, avg loss: 3.271423, ppl: 26.348810 +epoch: 0, batch: 36822, sum loss: 4309.653320, avg loss: 2.557658, ppl: 12.905554 +epoch: 0, batch: 36823, sum loss: 4337.847656, avg loss: 2.780672, ppl: 16.129850 +epoch: 0, batch: 36824, sum loss: 5393.576172, avg loss: 2.838724, ppl: 17.093946 +epoch: 0, batch: 36825, sum loss: 5522.761719, avg loss: 3.150463, ppl: 23.346874 +epoch: 0, batch: 36826, sum loss: 4671.895020, avg loss: 3.049540, ppl: 21.105629 +epoch: 0, batch: 36827, sum loss: 3874.373047, avg loss: 2.696154, ppl: 14.822613 +epoch: 0, batch: 36828, sum loss: 5983.453613, avg loss: 3.316770, ppl: 27.571161 +epoch: 0, batch: 36829, sum loss: 5098.565918, avg loss: 2.820003, ppl: 16.776905 +epoch: 0, batch: 36830, sum loss: 3883.306152, avg loss: 2.738580, ppl: 15.465009 +epoch: 0, batch: 36831, sum loss: 5876.850586, avg loss: 2.866756, ppl: 17.579905 +epoch: 0, batch: 36832, sum loss: 5471.024414, avg loss: 3.053027, ppl: 21.179361 +epoch: 0, batch: 36833, sum loss: 4413.020020, avg loss: 2.777231, ppl: 16.074450 +epoch: 0, batch: 36834, sum loss: 5335.932129, avg loss: 3.100483, ppl: 22.208685 +epoch: 0, batch: 36835, sum loss: 5224.925781, avg loss: 2.978863, ppl: 19.665443 +epoch: 0, batch: 36836, sum loss: 4925.320801, avg loss: 2.968849, ppl: 19.469501 +epoch: 0, batch: 36837, sum loss: 5012.427734, avg loss: 2.988925, ppl: 19.864321 +epoch: 0, batch: 36838, sum loss: 5035.866699, avg loss: 2.948400, ppl: 19.075399 +epoch: 0, batch: 36839, sum loss: 3241.488037, avg loss: 2.254164, ppl: 9.527328 +epoch: 0, batch: 36840, sum loss: 4289.385254, avg loss: 2.482283, ppl: 11.968559 +epoch: 0, batch: 36841, sum loss: 4001.145752, avg loss: 2.725576, ppl: 15.265206 +epoch: 0, batch: 36842, sum loss: 4479.466797, avg loss: 2.897456, ppl: 18.127968 +epoch: 0, batch: 36843, sum loss: 5272.941895, avg loss: 3.254903, ppl: 25.917089 +epoch: 0, batch: 36844, sum loss: 5070.726562, avg loss: 2.961873, ppl: 19.334146 +epoch: 0, batch: 36845, sum loss: 5281.847168, avg loss: 2.898928, ppl: 18.154676 +epoch: 0, batch: 36846, sum loss: 5428.424316, avg loss: 2.890535, ppl: 18.002932 +epoch: 0, batch: 36847, sum loss: 4550.004395, avg loss: 2.782877, ppl: 16.165466 +epoch: 0, batch: 36848, sum loss: 5128.392090, avg loss: 2.869833, ppl: 17.634077 +epoch: 0, batch: 36849, sum loss: 4996.945312, avg loss: 2.818356, ppl: 16.749296 +epoch: 0, batch: 36850, sum loss: 5237.400879, avg loss: 2.934118, ppl: 18.804914 +epoch: 0, batch: 36851, sum loss: 5729.090332, avg loss: 3.002668, ppl: 20.139194 +epoch: 0, batch: 36852, sum loss: 5580.188477, avg loss: 2.960312, ppl: 19.303997 +epoch: 0, batch: 36853, sum loss: 4657.379883, avg loss: 2.744478, ppl: 15.556499 +epoch: 0, batch: 36854, sum loss: 5770.555664, avg loss: 2.982199, ppl: 19.731161 +epoch: 0, batch: 36855, sum loss: 4189.323242, avg loss: 2.956474, ppl: 19.230043 +epoch: 0, batch: 36856, sum loss: 3875.791260, avg loss: 2.617010, ppl: 13.694710 +epoch: 0, batch: 36857, sum loss: 4833.144531, avg loss: 2.966939, ppl: 19.432354 +epoch: 0, batch: 36858, sum loss: 4406.347168, avg loss: 2.738563, ppl: 15.464740 +epoch: 0, batch: 36859, sum loss: 5414.488770, avg loss: 3.157136, ppl: 23.503191 +epoch: 0, batch: 36860, sum loss: 5700.435059, avg loss: 2.923300, ppl: 18.602575 +epoch: 0, batch: 36861, sum loss: 5300.330078, avg loss: 3.006427, ppl: 20.215034 +epoch: 0, batch: 36862, sum loss: 5057.126953, avg loss: 3.081735, ppl: 21.796185 +epoch: 0, batch: 36863, sum loss: 5533.705078, avg loss: 3.028848, ppl: 20.673401 +epoch: 0, batch: 36864, sum loss: 4511.904785, avg loss: 2.883006, ppl: 17.867905 +epoch: 0, batch: 36865, sum loss: 4495.521484, avg loss: 2.830933, ppl: 16.961281 +epoch: 0, batch: 36866, sum loss: 4472.838379, avg loss: 2.668758, ppl: 14.422049 +epoch: 0, batch: 36867, sum loss: 5593.679688, avg loss: 3.167429, ppl: 23.746353 +epoch: 0, batch: 36868, sum loss: 4895.472168, avg loss: 2.996005, ppl: 20.005457 +epoch: 0, batch: 36869, sum loss: 4752.100098, avg loss: 2.805254, ppl: 16.531271 +epoch: 0, batch: 36870, sum loss: 4753.053711, avg loss: 2.830884, ppl: 16.960443 +epoch: 0, batch: 36871, sum loss: 5741.011719, avg loss: 2.985445, ppl: 19.795319 +epoch: 0, batch: 36872, sum loss: 4622.055664, avg loss: 2.782695, ppl: 16.162514 +epoch: 0, batch: 36873, sum loss: 5390.884766, avg loss: 2.971822, ppl: 19.527466 +epoch: 0, batch: 36874, sum loss: 4979.972168, avg loss: 3.025500, ppl: 20.604296 +epoch: 0, batch: 36875, sum loss: 5281.010742, avg loss: 3.072141, ppl: 21.588081 +epoch: 0, batch: 36876, sum loss: 5378.075195, avg loss: 3.004511, ppl: 20.176350 +epoch: 0, batch: 36877, sum loss: 5449.727539, avg loss: 2.812037, ppl: 16.643787 +epoch: 0, batch: 36878, sum loss: 4445.010254, avg loss: 2.901443, ppl: 18.200386 +epoch: 0, batch: 36879, sum loss: 4848.238281, avg loss: 3.070449, ppl: 21.551569 +epoch: 0, batch: 36880, sum loss: 5237.635254, avg loss: 2.829625, ppl: 16.939102 +epoch: 0, batch: 36881, sum loss: 3889.437988, avg loss: 2.780156, ppl: 16.121534 +epoch: 0, batch: 36882, sum loss: 4262.383789, avg loss: 2.849187, ppl: 17.273735 +epoch: 0, batch: 36883, sum loss: 3591.195312, avg loss: 2.623225, ppl: 13.780096 +epoch: 0, batch: 36884, sum loss: 5126.931152, avg loss: 2.911375, ppl: 18.382053 +epoch: 0, batch: 36885, sum loss: 4722.164551, avg loss: 2.804136, ppl: 16.512800 +epoch: 0, batch: 36886, sum loss: 5678.487793, avg loss: 2.883945, ppl: 17.884693 +epoch: 0, batch: 36887, sum loss: 5406.469727, avg loss: 3.071858, ppl: 21.581959 +epoch: 0, batch: 36888, sum loss: 4679.358887, avg loss: 2.853268, ppl: 17.344364 +epoch: 0, batch: 36889, sum loss: 5526.139648, avg loss: 2.905436, ppl: 18.273214 +epoch: 0, batch: 36890, sum loss: 4960.567871, avg loss: 2.823317, ppl: 16.832588 +epoch: 0, batch: 36891, sum loss: 4849.332031, avg loss: 2.917769, ppl: 18.499968 +epoch: 0, batch: 36892, sum loss: 5683.834473, avg loss: 2.874979, ppl: 17.725061 +epoch: 0, batch: 36893, sum loss: 4842.908691, avg loss: 2.893016, ppl: 18.047661 +epoch: 0, batch: 36894, sum loss: 4730.087891, avg loss: 2.812181, ppl: 16.646175 +epoch: 0, batch: 36895, sum loss: 4376.728027, avg loss: 2.708371, ppl: 15.004815 +epoch: 0, batch: 36896, sum loss: 5144.949219, avg loss: 3.069779, ppl: 21.537140 +epoch: 0, batch: 36897, sum loss: 5120.202637, avg loss: 3.033296, ppl: 20.765556 +epoch: 0, batch: 36898, sum loss: 5003.144531, avg loss: 2.872069, ppl: 17.673550 +epoch: 0, batch: 36899, sum loss: 5540.681152, avg loss: 2.888781, ppl: 17.971382 +epoch: 0, batch: 36900, sum loss: 4613.714844, avg loss: 2.701238, ppl: 14.898167 +epoch: 0, batch: 36901, sum loss: 5468.993164, avg loss: 3.224642, ppl: 25.144577 +epoch: 0, batch: 36902, sum loss: 5348.118164, avg loss: 3.006250, ppl: 20.211458 +epoch: 0, batch: 36903, sum loss: 4796.024414, avg loss: 2.814568, ppl: 16.685970 +epoch: 0, batch: 36904, sum loss: 4960.475098, avg loss: 2.972124, ppl: 19.533367 +epoch: 0, batch: 36905, sum loss: 5890.680664, avg loss: 3.194512, ppl: 24.398273 +epoch: 0, batch: 36906, sum loss: 5550.620605, avg loss: 3.177230, ppl: 23.980234 +epoch: 0, batch: 36907, sum loss: 5708.229492, avg loss: 3.176533, ppl: 23.963528 +epoch: 0, batch: 36908, sum loss: 4586.883789, avg loss: 3.021663, ppl: 20.525391 +epoch: 0, batch: 36909, sum loss: 5230.391602, avg loss: 2.960041, ppl: 19.298754 +epoch: 0, batch: 36910, sum loss: 5157.517578, avg loss: 3.108811, ppl: 22.394405 +epoch: 0, batch: 36911, sum loss: 5084.162598, avg loss: 3.138372, ppl: 23.066288 +epoch: 0, batch: 36912, sum loss: 4968.394531, avg loss: 2.842331, ppl: 17.155712 +epoch: 0, batch: 36913, sum loss: 5122.007324, avg loss: 2.759702, ppl: 15.795138 +epoch: 0, batch: 36914, sum loss: 4469.648926, avg loss: 2.870680, ppl: 17.649017 +epoch: 0, batch: 36915, sum loss: 4088.189697, avg loss: 2.815558, ppl: 16.702488 +epoch: 0, batch: 36916, sum loss: 5706.255371, avg loss: 3.121584, ppl: 22.682278 +epoch: 0, batch: 36917, sum loss: 5949.170898, avg loss: 3.093693, ppl: 22.058380 +epoch: 0, batch: 36918, sum loss: 4177.255859, avg loss: 2.847482, ppl: 17.244301 +epoch: 0, batch: 36919, sum loss: 4791.624023, avg loss: 3.021200, ppl: 20.515905 +epoch: 0, batch: 36920, sum loss: 6516.503418, avg loss: 3.200640, ppl: 24.548241 +epoch: 0, batch: 36921, sum loss: 5136.008789, avg loss: 3.021182, ppl: 20.515524 +epoch: 0, batch: 36922, sum loss: 4928.545898, avg loss: 2.887256, ppl: 17.944002 +epoch: 0, batch: 36923, sum loss: 4997.510742, avg loss: 2.946645, ppl: 19.041969 +epoch: 0, batch: 36924, sum loss: 5031.115234, avg loss: 3.092265, ppl: 22.026911 +epoch: 0, batch: 36925, sum loss: 4395.876953, avg loss: 2.654515, ppl: 14.218089 +epoch: 0, batch: 36926, sum loss: 4624.865234, avg loss: 2.728534, ppl: 15.310425 +epoch: 0, batch: 36927, sum loss: 4730.581055, avg loss: 2.739190, ppl: 15.474447 +epoch: 0, batch: 36928, sum loss: 4085.616455, avg loss: 2.495795, ppl: 12.131374 +epoch: 0, batch: 36929, sum loss: 5426.771484, avg loss: 2.950936, ppl: 19.123852 +epoch: 0, batch: 36930, sum loss: 3580.924561, avg loss: 2.579917, ppl: 13.196039 +epoch: 0, batch: 36931, sum loss: 4982.767578, avg loss: 3.068207, ppl: 21.503304 +epoch: 0, batch: 36932, sum loss: 3991.170898, avg loss: 2.522864, ppl: 12.464241 +epoch: 0, batch: 36933, sum loss: 4428.073730, avg loss: 2.899852, ppl: 18.171452 +epoch: 0, batch: 36934, sum loss: 5349.790527, avg loss: 3.022480, ppl: 20.542183 +epoch: 0, batch: 36935, sum loss: 6050.255859, avg loss: 3.192747, ppl: 24.355242 +epoch: 0, batch: 36936, sum loss: 4959.265625, avg loss: 2.969620, ppl: 19.484518 +epoch: 0, batch: 36937, sum loss: 5045.483398, avg loss: 3.006844, ppl: 20.223465 +epoch: 0, batch: 36938, sum loss: 5564.661133, avg loss: 2.999817, ppl: 20.081869 +epoch: 0, batch: 36939, sum loss: 5407.867676, avg loss: 2.884196, ppl: 17.889179 +epoch: 0, batch: 36940, sum loss: 4433.677246, avg loss: 2.731779, ppl: 15.360187 +epoch: 0, batch: 36941, sum loss: 4394.903809, avg loss: 2.696260, ppl: 14.824185 +epoch: 0, batch: 36942, sum loss: 4498.461914, avg loss: 2.942094, ppl: 18.955503 +epoch: 0, batch: 36943, sum loss: 5330.887207, avg loss: 2.844657, ppl: 17.195658 +epoch: 0, batch: 36944, sum loss: 4465.589355, avg loss: 2.828112, ppl: 16.913504 +epoch: 0, batch: 36945, sum loss: 4918.509277, avg loss: 3.049293, ppl: 21.100416 +epoch: 0, batch: 36946, sum loss: 4949.636719, avg loss: 3.051564, ppl: 21.148394 +epoch: 0, batch: 36947, sum loss: 4377.533691, avg loss: 2.763594, ppl: 15.856736 +epoch: 0, batch: 36948, sum loss: 5197.148438, avg loss: 3.112065, ppl: 22.467394 +epoch: 0, batch: 36949, sum loss: 5423.486328, avg loss: 2.926868, ppl: 18.669067 +epoch: 0, batch: 36950, sum loss: 4490.686035, avg loss: 2.744918, ppl: 15.563343 +epoch: 0, batch: 36951, sum loss: 5113.701172, avg loss: 3.052956, ppl: 21.177856 +epoch: 0, batch: 36952, sum loss: 5846.582031, avg loss: 3.064246, ppl: 21.418310 +epoch: 0, batch: 36953, sum loss: 5563.650391, avg loss: 3.018801, ppl: 20.466738 +epoch: 0, batch: 36954, sum loss: 4734.364746, avg loss: 2.992645, ppl: 19.938351 +epoch: 0, batch: 36955, sum loss: 4867.120605, avg loss: 2.942636, ppl: 18.965784 +epoch: 0, batch: 36956, sum loss: 6121.869629, avg loss: 3.300199, ppl: 27.118042 +epoch: 0, batch: 36957, sum loss: 4303.863281, avg loss: 2.771322, ppl: 15.979750 +epoch: 0, batch: 36958, sum loss: 4691.576660, avg loss: 2.967474, ppl: 19.442745 +epoch: 0, batch: 36959, sum loss: 4759.044922, avg loss: 2.957766, ppl: 19.254900 +epoch: 0, batch: 36960, sum loss: 4145.690430, avg loss: 2.573365, ppl: 13.109861 +epoch: 0, batch: 36961, sum loss: 4827.884766, avg loss: 3.008028, ppl: 20.247429 +epoch: 0, batch: 36962, sum loss: 6390.926270, avg loss: 3.381443, ppl: 29.413170 +epoch: 0, batch: 36963, sum loss: 4752.519043, avg loss: 2.887314, ppl: 17.945049 +epoch: 0, batch: 36964, sum loss: 4877.482422, avg loss: 2.940013, ppl: 18.916100 +epoch: 0, batch: 36965, sum loss: 4636.189941, avg loss: 2.870706, ppl: 17.649471 +epoch: 0, batch: 36966, sum loss: 5839.780273, avg loss: 2.962851, ppl: 19.353075 +epoch: 0, batch: 36967, sum loss: 5047.249023, avg loss: 2.907402, ppl: 18.309162 +epoch: 0, batch: 36968, sum loss: 5341.404785, avg loss: 2.857894, ppl: 17.424799 +epoch: 0, batch: 36969, sum loss: 4761.384766, avg loss: 2.752245, ppl: 15.677795 +epoch: 0, batch: 36970, sum loss: 4354.034180, avg loss: 2.741835, ppl: 15.515435 +epoch: 0, batch: 36971, sum loss: 5416.699219, avg loss: 2.995962, ppl: 20.004593 +epoch: 0, batch: 36972, sum loss: 3689.918457, avg loss: 2.693371, ppl: 14.781425 +epoch: 0, batch: 36973, sum loss: 4384.621582, avg loss: 2.576158, ppl: 13.146536 +epoch: 0, batch: 36974, sum loss: 5236.305176, avg loss: 3.152502, ppl: 23.394514 +epoch: 0, batch: 36975, sum loss: 4747.722656, avg loss: 2.768351, ppl: 15.932345 +epoch: 0, batch: 36976, sum loss: 4888.501953, avg loss: 2.833914, ppl: 17.011921 +epoch: 0, batch: 36977, sum loss: 4550.501953, avg loss: 2.767945, ppl: 15.925878 +epoch: 0, batch: 36978, sum loss: 4841.315918, avg loss: 2.885170, ppl: 17.906620 +epoch: 0, batch: 36979, sum loss: 4913.381836, avg loss: 2.735736, ppl: 15.421091 +epoch: 0, batch: 36980, sum loss: 5944.879395, avg loss: 3.220411, ppl: 25.038416 +epoch: 0, batch: 36981, sum loss: 5054.786133, avg loss: 2.968166, ppl: 19.456198 +epoch: 0, batch: 36982, sum loss: 4156.158203, avg loss: 2.549790, ppl: 12.804420 +epoch: 0, batch: 36983, sum loss: 4648.106445, avg loss: 2.930710, ppl: 18.740938 +epoch: 0, batch: 36984, sum loss: 3690.521484, avg loss: 2.520848, ppl: 12.439141 +epoch: 0, batch: 36985, sum loss: 6289.342773, avg loss: 3.235259, ppl: 25.412949 +epoch: 0, batch: 36986, sum loss: 4867.424316, avg loss: 2.567207, ppl: 13.029384 +epoch: 0, batch: 36987, sum loss: 5288.167969, avg loss: 3.054979, ppl: 21.220732 +epoch: 0, batch: 36988, sum loss: 4810.431641, avg loss: 3.006520, ppl: 20.216919 +epoch: 0, batch: 36989, sum loss: 5333.404785, avg loss: 3.133610, ppl: 22.956709 +epoch: 0, batch: 36990, sum loss: 4701.432617, avg loss: 2.727049, ppl: 15.287712 +epoch: 0, batch: 36991, sum loss: 4216.960449, avg loss: 2.740065, ppl: 15.487997 +epoch: 0, batch: 36992, sum loss: 3960.702637, avg loss: 2.767787, ppl: 15.923348 +epoch: 0, batch: 36993, sum loss: 5498.645508, avg loss: 2.942025, ppl: 18.954197 +epoch: 0, batch: 36994, sum loss: 4597.787109, avg loss: 2.889873, ppl: 17.991016 +epoch: 0, batch: 36995, sum loss: 4655.135742, avg loss: 2.743156, ppl: 15.535942 +epoch: 0, batch: 36996, sum loss: 5174.192871, avg loss: 3.151153, ppl: 23.362984 +epoch: 0, batch: 36997, sum loss: 5083.065430, avg loss: 2.922982, ppl: 18.596661 +epoch: 0, batch: 36998, sum loss: 4171.214355, avg loss: 2.767893, ppl: 15.925042 +epoch: 0, batch: 36999, sum loss: 3160.015625, avg loss: 2.451525, ppl: 11.606032 +epoch: 0, batch: 37000, sum loss: 5626.851562, avg loss: 3.182609, ppl: 24.109564 +epoch: 0, batch: 37001, sum loss: 4012.095947, avg loss: 2.768872, ppl: 15.940651 +epoch: 0, batch: 37002, sum loss: 5110.429688, avg loss: 2.981581, ppl: 19.718967 +epoch: 0, batch: 37003, sum loss: 5966.520508, avg loss: 2.998251, ppl: 20.050447 +epoch: 0, batch: 37004, sum loss: 4605.127930, avg loss: 2.871027, ppl: 17.655149 +epoch: 0, batch: 37005, sum loss: 5038.696289, avg loss: 2.785349, ppl: 16.205475 +epoch: 0, batch: 37006, sum loss: 4457.662109, avg loss: 2.815958, ppl: 16.709179 +epoch: 0, batch: 37007, sum loss: 4785.250977, avg loss: 2.856866, ppl: 17.406895 +epoch: 0, batch: 37008, sum loss: 4215.884766, avg loss: 2.744717, ppl: 15.560205 +epoch: 0, batch: 37009, sum loss: 4060.166504, avg loss: 2.871405, ppl: 17.661814 +epoch: 0, batch: 37010, sum loss: 4980.283691, avg loss: 2.877114, ppl: 17.762928 +epoch: 0, batch: 37011, sum loss: 4009.235352, avg loss: 2.688957, ppl: 14.716326 +epoch: 0, batch: 37012, sum loss: 5576.713379, avg loss: 3.113743, ppl: 22.505114 +epoch: 0, batch: 37013, sum loss: 3525.966797, avg loss: 2.594530, ppl: 13.390297 +epoch: 0, batch: 37014, sum loss: 4451.530273, avg loss: 2.829962, ppl: 16.944817 +epoch: 0, batch: 37015, sum loss: 4342.330078, avg loss: 2.646149, ppl: 14.099631 +epoch: 0, batch: 37016, sum loss: 3442.342041, avg loss: 2.584341, ppl: 13.254552 +epoch: 0, batch: 37017, sum loss: 4528.530273, avg loss: 2.714946, ppl: 15.103799 +epoch: 0, batch: 37018, sum loss: 4100.843262, avg loss: 2.623700, ppl: 13.786642 +epoch: 0, batch: 37019, sum loss: 5677.647949, avg loss: 3.013614, ppl: 20.360846 +epoch: 0, batch: 37020, sum loss: 5513.460938, avg loss: 2.942081, ppl: 18.955259 +epoch: 0, batch: 37021, sum loss: 4833.906250, avg loss: 2.721794, ppl: 15.207582 +epoch: 0, batch: 37022, sum loss: 4961.711426, avg loss: 2.911803, ppl: 18.389917 +epoch: 0, batch: 37023, sum loss: 4662.138672, avg loss: 2.853206, ppl: 17.343298 +epoch: 0, batch: 37024, sum loss: 5432.427734, avg loss: 3.065704, ppl: 21.449560 +epoch: 0, batch: 37025, sum loss: 5241.257324, avg loss: 2.989879, ppl: 19.883274 +epoch: 0, batch: 37026, sum loss: 5299.257812, avg loss: 3.010942, ppl: 20.306515 +epoch: 0, batch: 37027, sum loss: 5579.881348, avg loss: 2.991894, ppl: 19.923372 +epoch: 0, batch: 37028, sum loss: 5500.787598, avg loss: 2.907393, ppl: 18.309004 +epoch: 0, batch: 37029, sum loss: 4751.291016, avg loss: 2.954783, ppl: 19.197556 +epoch: 0, batch: 37030, sum loss: 4542.087402, avg loss: 2.907866, ppl: 18.317677 +epoch: 0, batch: 37031, sum loss: 4324.834473, avg loss: 2.779457, ppl: 16.110264 +epoch: 0, batch: 37032, sum loss: 4425.925781, avg loss: 2.663012, ppl: 14.339415 +epoch: 0, batch: 37033, sum loss: 3767.447754, avg loss: 2.762058, ppl: 15.832400 +epoch: 0, batch: 37034, sum loss: 3977.244629, avg loss: 2.577605, ppl: 13.165572 +epoch: 0, batch: 37035, sum loss: 4946.158691, avg loss: 3.030735, ppl: 20.712440 +epoch: 0, batch: 37036, sum loss: 4290.065430, avg loss: 2.785757, ppl: 16.212084 +epoch: 0, batch: 37037, sum loss: 4381.576172, avg loss: 2.863775, ppl: 17.527573 +epoch: 0, batch: 37038, sum loss: 4920.760742, avg loss: 3.002294, ppl: 20.131672 +epoch: 0, batch: 37039, sum loss: 4280.069336, avg loss: 2.826994, ppl: 16.894606 +epoch: 0, batch: 37040, sum loss: 5036.426270, avg loss: 2.887859, ppl: 17.954824 +epoch: 0, batch: 37041, sum loss: 4409.117676, avg loss: 2.965109, ppl: 19.396824 +epoch: 0, batch: 37042, sum loss: 4316.971680, avg loss: 2.691379, ppl: 14.752003 +epoch: 0, batch: 37043, sum loss: 4594.128418, avg loss: 2.871330, ppl: 17.660496 +epoch: 0, batch: 37044, sum loss: 4544.585449, avg loss: 2.781264, ppl: 16.139410 +epoch: 0, batch: 37045, sum loss: 5187.962402, avg loss: 3.142315, ppl: 23.157423 +epoch: 0, batch: 37046, sum loss: 4546.441895, avg loss: 2.711056, ppl: 15.045162 +epoch: 0, batch: 37047, sum loss: 4615.376465, avg loss: 2.714927, ppl: 15.103514 +epoch: 0, batch: 37048, sum loss: 5119.413086, avg loss: 2.945577, ppl: 19.021637 +epoch: 0, batch: 37049, sum loss: 4681.716309, avg loss: 2.793387, ppl: 16.336256 +epoch: 0, batch: 37050, sum loss: 5078.052734, avg loss: 3.072022, ppl: 21.585508 +epoch: 0, batch: 37051, sum loss: 4697.107422, avg loss: 2.865837, ppl: 17.563753 +epoch: 0, batch: 37052, sum loss: 4559.745117, avg loss: 2.771881, ppl: 15.988686 +epoch: 0, batch: 37053, sum loss: 4767.123047, avg loss: 2.908556, ppl: 18.330305 +epoch: 0, batch: 37054, sum loss: 4801.300781, avg loss: 2.876753, ppl: 17.756525 +epoch: 0, batch: 37055, sum loss: 4167.288086, avg loss: 2.607815, ppl: 13.569366 +epoch: 0, batch: 37056, sum loss: 4771.290039, avg loss: 2.956190, ppl: 19.224579 +epoch: 0, batch: 37057, sum loss: 3954.940186, avg loss: 2.699618, ppl: 14.874046 +epoch: 0, batch: 37058, sum loss: 5321.188965, avg loss: 3.097316, ppl: 22.138453 +epoch: 0, batch: 37059, sum loss: 4246.596680, avg loss: 2.672496, ppl: 14.476061 +epoch: 0, batch: 37060, sum loss: 4341.084961, avg loss: 2.829912, ppl: 16.943970 +epoch: 0, batch: 37061, sum loss: 4917.158203, avg loss: 3.057934, ppl: 21.283541 +epoch: 0, batch: 37062, sum loss: 5382.125488, avg loss: 2.960465, ppl: 19.306946 +epoch: 0, batch: 37063, sum loss: 5879.499023, avg loss: 3.033797, ppl: 20.775974 +epoch: 0, batch: 37064, sum loss: 5196.644531, avg loss: 2.906401, ppl: 18.290846 +epoch: 0, batch: 37065, sum loss: 4806.771973, avg loss: 2.925607, ppl: 18.645535 +epoch: 0, batch: 37066, sum loss: 6091.032227, avg loss: 3.182357, ppl: 24.103506 +epoch: 0, batch: 37067, sum loss: 5003.751465, avg loss: 2.905779, ppl: 18.279476 +epoch: 0, batch: 37068, sum loss: 4421.478516, avg loss: 2.935909, ppl: 18.838615 +epoch: 0, batch: 37069, sum loss: 5641.121094, avg loss: 3.024730, ppl: 20.588440 +epoch: 0, batch: 37070, sum loss: 5142.489258, avg loss: 3.079335, ppl: 21.743937 +epoch: 0, batch: 37071, sum loss: 4375.802734, avg loss: 2.846977, ppl: 17.235592 +epoch: 0, batch: 37072, sum loss: 4821.465820, avg loss: 2.766188, ppl: 15.897918 +epoch: 0, batch: 37073, sum loss: 3777.414551, avg loss: 2.692384, ppl: 14.766838 +epoch: 0, batch: 37074, sum loss: 5453.302734, avg loss: 3.089690, ppl: 21.970266 +epoch: 0, batch: 37075, sum loss: 5167.471680, avg loss: 3.179982, ppl: 24.046331 +epoch: 0, batch: 37076, sum loss: 6374.124023, avg loss: 3.204688, ppl: 24.647799 +epoch: 0, batch: 37077, sum loss: 4694.540527, avg loss: 2.616801, ppl: 13.691851 +epoch: 0, batch: 37078, sum loss: 3360.220703, avg loss: 2.723031, ppl: 15.226408 +epoch: 0, batch: 37079, sum loss: 5447.218750, avg loss: 3.041440, ppl: 20.935364 +epoch: 0, batch: 37080, sum loss: 5258.650391, avg loss: 2.898925, ppl: 18.154625 +epoch: 0, batch: 37081, sum loss: 4219.399902, avg loss: 2.748795, ppl: 15.623791 +epoch: 0, batch: 37082, sum loss: 5097.663574, avg loss: 3.206078, ppl: 24.682083 +epoch: 0, batch: 37083, sum loss: 4581.197754, avg loss: 2.688496, ppl: 14.709541 +epoch: 0, batch: 37084, sum loss: 6549.001953, avg loss: 3.057424, ppl: 21.272684 +epoch: 0, batch: 37085, sum loss: 5319.721191, avg loss: 2.971911, ppl: 19.529207 +epoch: 0, batch: 37086, sum loss: 4074.360352, avg loss: 2.725325, ppl: 15.261368 +epoch: 0, batch: 37087, sum loss: 5000.122559, avg loss: 2.552385, ppl: 12.837689 +epoch: 0, batch: 37088, sum loss: 5067.582520, avg loss: 2.851763, ppl: 17.318283 +epoch: 0, batch: 37089, sum loss: 5866.241211, avg loss: 2.938999, ppl: 18.896915 +epoch: 0, batch: 37090, sum loss: 3631.704102, avg loss: 2.629764, ppl: 13.870497 +epoch: 0, batch: 37091, sum loss: 4188.925293, avg loss: 2.634544, ppl: 13.936957 +epoch: 0, batch: 37092, sum loss: 4331.596191, avg loss: 2.790977, ppl: 16.296934 +epoch: 0, batch: 37093, sum loss: 4928.382812, avg loss: 2.895642, ppl: 18.095116 +epoch: 0, batch: 37094, sum loss: 4766.232910, avg loss: 2.627471, ppl: 13.838734 +epoch: 0, batch: 37095, sum loss: 3895.099121, avg loss: 2.733403, ppl: 15.385150 +epoch: 0, batch: 37096, sum loss: 4781.533203, avg loss: 2.979148, ppl: 19.671057 +epoch: 0, batch: 37097, sum loss: 5064.950195, avg loss: 2.861554, ppl: 17.488682 +epoch: 0, batch: 37098, sum loss: 4561.912598, avg loss: 2.639996, ppl: 14.013144 +epoch: 0, batch: 37099, sum loss: 5195.358887, avg loss: 2.871951, ppl: 17.671459 +epoch: 0, batch: 37100, sum loss: 5599.447266, avg loss: 3.126436, ppl: 22.792603 +epoch: 0, batch: 37101, sum loss: 5147.718750, avg loss: 3.148452, ppl: 23.299963 +epoch: 0, batch: 37102, sum loss: 4746.058105, avg loss: 2.929666, ppl: 18.721369 +epoch: 0, batch: 37103, sum loss: 5718.785645, avg loss: 3.006722, ppl: 20.221016 +epoch: 0, batch: 37104, sum loss: 4572.435059, avg loss: 2.843554, ppl: 17.176704 +epoch: 0, batch: 37105, sum loss: 5178.299316, avg loss: 3.087835, ppl: 21.929550 +epoch: 0, batch: 37106, sum loss: 6027.764648, avg loss: 3.084833, ppl: 21.863823 +epoch: 0, batch: 37107, sum loss: 5372.696289, avg loss: 2.943943, ppl: 18.990583 +epoch: 0, batch: 37108, sum loss: 5071.805664, avg loss: 3.055305, ppl: 21.227654 +epoch: 0, batch: 37109, sum loss: 5191.301758, avg loss: 2.893702, ppl: 18.060049 +epoch: 0, batch: 37110, sum loss: 3920.490967, avg loss: 2.745442, ppl: 15.571494 +epoch: 0, batch: 37111, sum loss: 4433.167969, avg loss: 2.918478, ppl: 18.513084 +epoch: 0, batch: 37112, sum loss: 5716.882324, avg loss: 2.927231, ppl: 18.675846 +epoch: 0, batch: 37113, sum loss: 4339.910156, avg loss: 2.750260, ppl: 15.646698 +epoch: 0, batch: 37114, sum loss: 5199.470703, avg loss: 2.877405, ppl: 17.768099 +epoch: 0, batch: 37115, sum loss: 5734.368164, avg loss: 3.286171, ppl: 26.740271 +epoch: 0, batch: 37116, sum loss: 5137.615723, avg loss: 2.992205, ppl: 19.929577 +epoch: 0, batch: 37117, sum loss: 5704.479004, avg loss: 3.224691, ppl: 25.145813 +epoch: 0, batch: 37118, sum loss: 4340.203613, avg loss: 2.846035, ppl: 17.219376 +epoch: 0, batch: 37119, sum loss: 4958.751953, avg loss: 2.869648, ppl: 17.630816 +epoch: 0, batch: 37120, sum loss: 4696.541992, avg loss: 2.642961, ppl: 14.054759 +epoch: 0, batch: 37121, sum loss: 4229.431152, avg loss: 2.991111, ppl: 19.907793 +epoch: 0, batch: 37122, sum loss: 5104.674316, avg loss: 2.732695, ppl: 15.374267 +epoch: 0, batch: 37123, sum loss: 4670.078613, avg loss: 2.820096, ppl: 16.778458 +epoch: 0, batch: 37124, sum loss: 4936.389160, avg loss: 3.013668, ppl: 20.361958 +epoch: 0, batch: 37125, sum loss: 5627.179688, avg loss: 3.018873, ppl: 20.468216 +epoch: 0, batch: 37126, sum loss: 4925.962402, avg loss: 2.803621, ppl: 16.504301 +epoch: 0, batch: 37127, sum loss: 4799.588867, avg loss: 2.933734, ppl: 18.797693 +epoch: 0, batch: 37128, sum loss: 4143.148438, avg loss: 2.595958, ppl: 13.409424 +epoch: 0, batch: 37129, sum loss: 5030.330078, avg loss: 3.030319, ppl: 20.703846 +epoch: 0, batch: 37130, sum loss: 5633.959473, avg loss: 3.395997, ppl: 29.844402 +epoch: 0, batch: 37131, sum loss: 3993.148438, avg loss: 2.727560, ppl: 15.295525 +epoch: 0, batch: 37132, sum loss: 5354.430176, avg loss: 3.040562, ppl: 20.916998 +epoch: 0, batch: 37133, sum loss: 5374.430664, avg loss: 2.911392, ppl: 18.382378 +epoch: 0, batch: 37134, sum loss: 4485.727539, avg loss: 2.757054, ppl: 15.753370 +epoch: 0, batch: 37135, sum loss: 5291.030762, avg loss: 2.866214, ppl: 17.570368 +epoch: 0, batch: 37136, sum loss: 5445.685547, avg loss: 3.071453, ppl: 21.573223 +epoch: 0, batch: 37137, sum loss: 4411.916504, avg loss: 2.799439, ppl: 16.435431 +epoch: 0, batch: 37138, sum loss: 5216.560547, avg loss: 2.870975, ppl: 17.654215 +epoch: 0, batch: 37139, sum loss: 4177.156250, avg loss: 2.735531, ppl: 15.417933 +epoch: 0, batch: 37140, sum loss: 5324.386230, avg loss: 2.954710, ppl: 19.196146 +epoch: 0, batch: 37141, sum loss: 5440.512207, avg loss: 2.974583, ppl: 19.581459 +epoch: 0, batch: 37142, sum loss: 4890.082520, avg loss: 2.657653, ppl: 14.262780 +epoch: 0, batch: 37143, sum loss: 5185.220215, avg loss: 2.931159, ppl: 18.749350 +epoch: 0, batch: 37144, sum loss: 5186.093750, avg loss: 2.877965, ppl: 17.778067 +epoch: 0, batch: 37145, sum loss: 5383.420410, avg loss: 3.168582, ppl: 23.773748 +epoch: 0, batch: 37146, sum loss: 4782.382812, avg loss: 3.011576, ppl: 20.319401 +epoch: 0, batch: 37147, sum loss: 4441.093750, avg loss: 2.821533, ppl: 16.802597 +epoch: 0, batch: 37148, sum loss: 6348.129883, avg loss: 3.318416, ppl: 27.616568 +epoch: 0, batch: 37149, sum loss: 4741.570801, avg loss: 2.882414, ppl: 17.857323 +epoch: 0, batch: 37150, sum loss: 5263.075684, avg loss: 2.990384, ppl: 19.893318 +epoch: 0, batch: 37151, sum loss: 4917.854980, avg loss: 2.906534, ppl: 18.293280 +epoch: 0, batch: 37152, sum loss: 4954.187500, avg loss: 2.758456, ppl: 15.775475 +epoch: 0, batch: 37153, sum loss: 4418.648926, avg loss: 3.026472, ppl: 20.624338 +epoch: 0, batch: 37154, sum loss: 5338.846191, avg loss: 2.930212, ppl: 18.731602 +epoch: 0, batch: 37155, sum loss: 4660.953125, avg loss: 2.831685, ppl: 16.974035 +epoch: 0, batch: 37156, sum loss: 4623.250488, avg loss: 2.778396, ppl: 16.093185 +epoch: 0, batch: 37157, sum loss: 4611.801270, avg loss: 2.740227, ppl: 15.490494 +epoch: 0, batch: 37158, sum loss: 4821.302734, avg loss: 2.869823, ppl: 17.633900 +epoch: 0, batch: 37159, sum loss: 4278.407227, avg loss: 2.730318, ppl: 15.337757 +epoch: 0, batch: 37160, sum loss: 3823.072754, avg loss: 2.452260, ppl: 11.614563 +epoch: 0, batch: 37161, sum loss: 4703.631836, avg loss: 2.803118, ppl: 16.496000 +epoch: 0, batch: 37162, sum loss: 4213.789062, avg loss: 2.636914, ppl: 13.970029 +epoch: 0, batch: 37163, sum loss: 5011.785645, avg loss: 3.103273, ppl: 22.270721 +epoch: 0, batch: 37164, sum loss: 4502.048828, avg loss: 2.921511, ppl: 18.569328 +epoch: 0, batch: 37165, sum loss: 5924.532227, avg loss: 3.146326, ppl: 23.250486 +epoch: 0, batch: 37166, sum loss: 4053.879639, avg loss: 2.568999, ppl: 13.052747 +epoch: 0, batch: 37167, sum loss: 5004.238281, avg loss: 3.096682, ppl: 22.124422 +epoch: 0, batch: 37168, sum loss: 5018.833008, avg loss: 3.138732, ppl: 23.074594 +epoch: 0, batch: 37169, sum loss: 6036.681152, avg loss: 3.044216, ppl: 20.993574 +epoch: 0, batch: 37170, sum loss: 6293.639648, avg loss: 3.055165, ppl: 21.224684 +epoch: 0, batch: 37171, sum loss: 4801.679688, avg loss: 2.876980, ppl: 17.760557 +epoch: 0, batch: 37172, sum loss: 5006.897461, avg loss: 2.769302, ppl: 15.947494 +epoch: 0, batch: 37173, sum loss: 4369.685059, avg loss: 2.621287, ppl: 13.753407 +epoch: 0, batch: 37174, sum loss: 4633.760742, avg loss: 2.823742, ppl: 16.839746 +epoch: 0, batch: 37175, sum loss: 5433.402344, avg loss: 3.142512, ppl: 23.161968 +epoch: 0, batch: 37176, sum loss: 4285.648438, avg loss: 2.686927, ppl: 14.686473 +epoch: 0, batch: 37177, sum loss: 4625.121582, avg loss: 2.821917, ppl: 16.809036 +epoch: 0, batch: 37178, sum loss: 4750.176758, avg loss: 3.054776, ppl: 21.216436 +epoch: 0, batch: 37179, sum loss: 4183.609863, avg loss: 2.720162, ppl: 15.182788 +epoch: 0, batch: 37180, sum loss: 5438.732422, avg loss: 3.320349, ppl: 27.670012 +epoch: 0, batch: 37181, sum loss: 4368.520508, avg loss: 2.937808, ppl: 18.874424 +epoch: 0, batch: 37182, sum loss: 4397.741699, avg loss: 2.916274, ppl: 18.472338 +epoch: 0, batch: 37183, sum loss: 5926.164551, avg loss: 3.172465, ppl: 23.866238 +epoch: 0, batch: 37184, sum loss: 4890.555664, avg loss: 2.767717, ppl: 15.922240 +epoch: 0, batch: 37185, sum loss: 4056.884766, avg loss: 2.686679, ppl: 14.682828 +epoch: 0, batch: 37186, sum loss: 4220.293945, avg loss: 2.771040, ppl: 15.975239 +epoch: 0, batch: 37187, sum loss: 4426.012695, avg loss: 2.853651, ppl: 17.351011 +epoch: 0, batch: 37188, sum loss: 5071.369629, avg loss: 2.946757, ppl: 19.044100 +epoch: 0, batch: 37189, sum loss: 4603.058594, avg loss: 2.979326, ppl: 19.674551 +epoch: 0, batch: 37190, sum loss: 4346.637695, avg loss: 2.786306, ppl: 16.220995 +epoch: 0, batch: 37191, sum loss: 6004.132812, avg loss: 3.256037, ppl: 25.946520 +epoch: 0, batch: 37192, sum loss: 4779.416504, avg loss: 2.735785, ppl: 15.421844 +epoch: 0, batch: 37193, sum loss: 5621.745117, avg loss: 3.126666, ppl: 22.797836 +epoch: 0, batch: 37194, sum loss: 5666.535645, avg loss: 3.054736, ppl: 21.215588 +epoch: 0, batch: 37195, sum loss: 4539.793945, avg loss: 2.942187, ppl: 18.957258 +epoch: 0, batch: 37196, sum loss: 5768.493164, avg loss: 2.913380, ppl: 18.418953 +epoch: 0, batch: 37197, sum loss: 5300.787109, avg loss: 3.246042, ppl: 25.688471 +epoch: 0, batch: 37198, sum loss: 4747.013184, avg loss: 2.861370, ppl: 17.485468 +epoch: 0, batch: 37199, sum loss: 5478.219238, avg loss: 3.038391, ppl: 20.871635 +epoch: 0, batch: 37200, sum loss: 5492.358398, avg loss: 2.989852, ppl: 19.882748 +epoch: 0, batch: 37201, sum loss: 5087.089355, avg loss: 2.976647, ppl: 19.621912 +epoch: 0, batch: 37202, sum loss: 4238.429688, avg loss: 2.622791, ppl: 13.774108 +epoch: 0, batch: 37203, sum loss: 3984.696777, avg loss: 2.614630, ppl: 13.662167 +epoch: 0, batch: 37204, sum loss: 4855.794922, avg loss: 2.841308, ppl: 17.138166 +epoch: 0, batch: 37205, sum loss: 4492.425293, avg loss: 2.651963, ppl: 14.181850 +epoch: 0, batch: 37206, sum loss: 5339.340820, avg loss: 2.852212, ppl: 17.326069 +epoch: 0, batch: 37207, sum loss: 4184.239746, avg loss: 2.710000, ppl: 15.029276 +epoch: 0, batch: 37208, sum loss: 4537.413574, avg loss: 2.741640, ppl: 15.512399 +epoch: 0, batch: 37209, sum loss: 4615.568848, avg loss: 3.014741, ppl: 20.383820 +epoch: 0, batch: 37210, sum loss: 4548.268555, avg loss: 3.087759, ppl: 21.927877 +epoch: 0, batch: 37211, sum loss: 4921.133789, avg loss: 3.106776, ppl: 22.348886 +epoch: 0, batch: 37212, sum loss: 5205.001953, avg loss: 2.986232, ppl: 19.810890 +epoch: 0, batch: 37213, sum loss: 4826.552734, avg loss: 2.897090, ppl: 18.121344 +epoch: 0, batch: 37214, sum loss: 5724.065430, avg loss: 2.973540, ppl: 19.561049 +epoch: 0, batch: 37215, sum loss: 5068.346680, avg loss: 3.027686, ppl: 20.649403 +epoch: 0, batch: 37216, sum loss: 5910.846191, avg loss: 2.900317, ppl: 18.179911 +epoch: 0, batch: 37217, sum loss: 4877.819336, avg loss: 2.910393, ppl: 18.364023 +epoch: 0, batch: 37218, sum loss: 5755.058105, avg loss: 3.080866, ppl: 21.777256 +epoch: 0, batch: 37219, sum loss: 5605.989258, avg loss: 3.033544, ppl: 20.770716 +epoch: 0, batch: 37220, sum loss: 3882.153076, avg loss: 2.501387, ppl: 12.199408 +epoch: 0, batch: 37221, sum loss: 3953.895508, avg loss: 2.698905, ppl: 14.863443 +epoch: 0, batch: 37222, sum loss: 5034.166992, avg loss: 2.950860, ppl: 19.122387 +epoch: 0, batch: 37223, sum loss: 4897.636230, avg loss: 2.899725, ppl: 18.169157 +epoch: 0, batch: 37224, sum loss: 4403.995117, avg loss: 2.626115, ppl: 13.819976 +epoch: 0, batch: 37225, sum loss: 5013.184082, avg loss: 3.107988, ppl: 22.375971 +epoch: 0, batch: 37226, sum loss: 5950.740234, avg loss: 2.979840, ppl: 19.684668 +epoch: 0, batch: 37227, sum loss: 5400.021484, avg loss: 3.003349, ppl: 20.152912 +epoch: 0, batch: 37228, sum loss: 5583.645996, avg loss: 3.133359, ppl: 22.950947 +epoch: 0, batch: 37229, sum loss: 4779.156250, avg loss: 2.834612, ppl: 17.023800 +epoch: 0, batch: 37230, sum loss: 4512.308594, avg loss: 2.949221, ppl: 19.091084 +epoch: 0, batch: 37231, sum loss: 4852.958496, avg loss: 2.926996, ppl: 18.671448 +epoch: 0, batch: 37232, sum loss: 4416.496094, avg loss: 2.786433, ppl: 16.223045 +epoch: 0, batch: 37233, sum loss: 4713.208984, avg loss: 2.875661, ppl: 17.737150 +epoch: 0, batch: 37234, sum loss: 5165.394043, avg loss: 2.926569, ppl: 18.663486 +epoch: 0, batch: 37235, sum loss: 4381.477539, avg loss: 2.689673, ppl: 14.726862 +epoch: 0, batch: 37236, sum loss: 6325.637695, avg loss: 3.173928, ppl: 23.901173 +epoch: 0, batch: 37237, sum loss: 4600.772461, avg loss: 2.827764, ppl: 16.907618 +epoch: 0, batch: 37238, sum loss: 4566.604004, avg loss: 2.734493, ppl: 15.401940 +epoch: 0, batch: 37239, sum loss: 5062.099121, avg loss: 2.823257, ppl: 16.831577 +epoch: 0, batch: 37240, sum loss: 4587.827148, avg loss: 3.016323, ppl: 20.416086 +epoch: 0, batch: 37241, sum loss: 4939.978516, avg loss: 3.025094, ppl: 20.595942 +epoch: 0, batch: 37242, sum loss: 4276.390137, avg loss: 2.897283, ppl: 18.124840 +epoch: 0, batch: 37243, sum loss: 5268.762207, avg loss: 3.075751, ppl: 21.666153 +epoch: 0, batch: 37244, sum loss: 4627.124512, avg loss: 2.969913, ppl: 19.490225 +epoch: 0, batch: 37245, sum loss: 5408.250000, avg loss: 3.149825, ppl: 23.331989 +epoch: 0, batch: 37246, sum loss: 4213.348145, avg loss: 2.901755, ppl: 18.206066 +epoch: 0, batch: 37247, sum loss: 4516.882324, avg loss: 2.779620, ppl: 16.112896 +epoch: 0, batch: 37248, sum loss: 5982.180176, avg loss: 3.104401, ppl: 22.295856 +epoch: 0, batch: 37249, sum loss: 4555.935059, avg loss: 2.856386, ppl: 17.398529 +epoch: 0, batch: 37250, sum loss: 4541.284180, avg loss: 2.789487, ppl: 16.272663 +epoch: 0, batch: 37251, sum loss: 4892.796875, avg loss: 2.833119, ppl: 16.998404 +epoch: 0, batch: 37252, sum loss: 4874.041504, avg loss: 2.868771, ppl: 17.615353 +epoch: 0, batch: 37253, sum loss: 4891.501465, avg loss: 2.892668, ppl: 18.041380 +epoch: 0, batch: 37254, sum loss: 5040.042969, avg loss: 2.793815, ppl: 16.343256 +epoch: 0, batch: 37255, sum loss: 3967.918945, avg loss: 2.840314, ppl: 17.121143 +epoch: 0, batch: 37256, sum loss: 5313.716309, avg loss: 3.087575, ppl: 21.923841 +epoch: 0, batch: 37257, sum loss: 4923.679688, avg loss: 2.872625, ppl: 17.683382 +epoch: 0, batch: 37258, sum loss: 4730.013672, avg loss: 2.827265, ppl: 16.899174 +epoch: 0, batch: 37259, sum loss: 4854.812500, avg loss: 2.834100, ppl: 17.015072 +epoch: 0, batch: 37260, sum loss: 4816.552734, avg loss: 2.987936, ppl: 19.844681 +epoch: 0, batch: 37261, sum loss: 5101.782715, avg loss: 3.084512, ppl: 21.856798 +epoch: 0, batch: 37262, sum loss: 5302.767578, avg loss: 3.006104, ppl: 20.208513 +epoch: 0, batch: 37263, sum loss: 4607.964844, avg loss: 2.686860, ppl: 14.685489 +epoch: 0, batch: 37264, sum loss: 4108.045898, avg loss: 2.729599, ppl: 15.326736 +epoch: 0, batch: 37265, sum loss: 4518.701172, avg loss: 2.891044, ppl: 18.012098 +epoch: 0, batch: 37266, sum loss: 5532.679688, avg loss: 3.129344, ppl: 22.858973 +epoch: 0, batch: 37267, sum loss: 4715.231934, avg loss: 2.685212, ppl: 14.661307 +epoch: 0, batch: 37268, sum loss: 5093.630859, avg loss: 2.876132, ppl: 17.745510 +epoch: 0, batch: 37269, sum loss: 5214.964844, avg loss: 2.919913, ppl: 18.539680 +epoch: 0, batch: 37270, sum loss: 4076.874756, avg loss: 2.763983, ppl: 15.862900 +epoch: 0, batch: 37271, sum loss: 5483.274414, avg loss: 3.026090, ppl: 20.616457 +epoch: 0, batch: 37272, sum loss: 4960.001953, avg loss: 3.031786, ppl: 20.734236 +epoch: 0, batch: 37273, sum loss: 4542.815918, avg loss: 2.744904, ppl: 15.563121 +epoch: 0, batch: 37274, sum loss: 4282.211914, avg loss: 2.793354, ppl: 16.335722 +epoch: 0, batch: 37275, sum loss: 4947.586914, avg loss: 2.868166, ppl: 17.604710 +epoch: 0, batch: 37276, sum loss: 5715.838379, avg loss: 2.978550, ppl: 19.659298 +epoch: 0, batch: 37277, sum loss: 5108.509766, avg loss: 2.876413, ppl: 17.750490 +epoch: 0, batch: 37278, sum loss: 4803.876465, avg loss: 3.034666, ppl: 20.794033 +epoch: 0, batch: 37279, sum loss: 5779.489258, avg loss: 3.216188, ppl: 24.932888 +epoch: 0, batch: 37280, sum loss: 4770.341309, avg loss: 2.985195, ppl: 19.790359 +epoch: 0, batch: 37281, sum loss: 4640.060059, avg loss: 2.864235, ppl: 17.535627 +epoch: 0, batch: 37282, sum loss: 3947.169922, avg loss: 2.528616, ppl: 12.536146 +epoch: 0, batch: 37283, sum loss: 4460.901367, avg loss: 3.018201, ppl: 20.454468 +epoch: 0, batch: 37284, sum loss: 5162.211914, avg loss: 2.869490, ppl: 17.628019 +epoch: 0, batch: 37285, sum loss: 5959.099609, avg loss: 3.078047, ppl: 21.715956 +epoch: 0, batch: 37286, sum loss: 3798.640381, avg loss: 2.430352, ppl: 11.362884 +epoch: 0, batch: 37287, sum loss: 4691.125000, avg loss: 2.765994, ppl: 15.894825 +epoch: 0, batch: 37288, sum loss: 4537.478516, avg loss: 2.971499, ppl: 19.521154 +epoch: 0, batch: 37289, sum loss: 5778.229492, avg loss: 3.133530, ppl: 22.954870 +epoch: 0, batch: 37290, sum loss: 4420.344727, avg loss: 2.803009, ppl: 16.494200 +epoch: 0, batch: 37291, sum loss: 4793.303711, avg loss: 2.901516, ppl: 18.201714 +epoch: 0, batch: 37292, sum loss: 4286.958984, avg loss: 2.718427, ppl: 15.156458 +epoch: 0, batch: 37293, sum loss: 5546.528809, avg loss: 3.173072, ppl: 23.880724 +epoch: 0, batch: 37294, sum loss: 3546.860107, avg loss: 2.581412, ppl: 13.215787 +epoch: 0, batch: 37295, sum loss: 4687.878906, avg loss: 2.939109, ppl: 18.899002 +epoch: 0, batch: 37296, sum loss: 5652.766113, avg loss: 2.928895, ppl: 18.706940 +epoch: 0, batch: 37297, sum loss: 4605.571289, avg loss: 2.823771, ppl: 16.840244 +epoch: 0, batch: 37298, sum loss: 5143.285156, avg loss: 2.914043, ppl: 18.431160 +epoch: 0, batch: 37299, sum loss: 4378.920410, avg loss: 3.003375, ppl: 20.153437 +epoch: 0, batch: 37300, sum loss: 3959.293701, avg loss: 2.564310, ppl: 12.991686 +epoch: 0, batch: 37301, sum loss: 4530.959961, avg loss: 2.651235, ppl: 14.171524 +epoch: 0, batch: 37302, sum loss: 5729.454590, avg loss: 3.153250, ppl: 23.412024 +epoch: 0, batch: 37303, sum loss: 5644.744141, avg loss: 2.974049, ppl: 19.570999 +epoch: 0, batch: 37304, sum loss: 4447.519531, avg loss: 3.042079, ppl: 20.948755 +epoch: 0, batch: 37305, sum loss: 5417.008789, avg loss: 3.145766, ppl: 23.237463 +epoch: 0, batch: 37306, sum loss: 5537.086914, avg loss: 2.993020, ppl: 19.945829 +epoch: 0, batch: 37307, sum loss: 4636.138672, avg loss: 2.956721, ppl: 19.234798 +epoch: 0, batch: 37308, sum loss: 4473.878906, avg loss: 3.137362, ppl: 23.043011 +epoch: 0, batch: 37309, sum loss: 5356.768066, avg loss: 2.892423, ppl: 18.036968 +epoch: 0, batch: 37310, sum loss: 5341.723145, avg loss: 2.799645, ppl: 16.438812 +epoch: 0, batch: 37311, sum loss: 3981.420410, avg loss: 2.732615, ppl: 15.373038 +epoch: 0, batch: 37312, sum loss: 4174.358887, avg loss: 2.686203, ppl: 14.675842 +epoch: 0, batch: 37313, sum loss: 5218.149414, avg loss: 3.135907, ppl: 23.009501 +epoch: 0, batch: 37314, sum loss: 5157.877441, avg loss: 2.870271, ppl: 17.641806 +epoch: 0, batch: 37315, sum loss: 3796.326172, avg loss: 2.645523, ppl: 14.090817 +epoch: 0, batch: 37316, sum loss: 5731.854980, avg loss: 2.846005, ppl: 17.218863 +epoch: 0, batch: 37317, sum loss: 5457.032715, avg loss: 3.069197, ppl: 21.524620 +epoch: 0, batch: 37318, sum loss: 4611.712402, avg loss: 2.837977, ppl: 17.081171 +epoch: 0, batch: 37319, sum loss: 3867.361572, avg loss: 2.542644, ppl: 12.713241 +epoch: 0, batch: 37320, sum loss: 5914.060547, avg loss: 3.244137, ppl: 25.639563 +epoch: 0, batch: 37321, sum loss: 5410.655762, avg loss: 2.919944, ppl: 18.540245 +epoch: 0, batch: 37322, sum loss: 5488.568359, avg loss: 2.979679, ppl: 19.681496 +epoch: 0, batch: 37323, sum loss: 4150.498535, avg loss: 2.672568, ppl: 14.477103 +epoch: 0, batch: 37324, sum loss: 5293.402344, avg loss: 3.213966, ppl: 24.877558 +epoch: 0, batch: 37325, sum loss: 4397.769531, avg loss: 2.955490, ppl: 19.211140 +epoch: 0, batch: 37326, sum loss: 5732.364258, avg loss: 3.189964, ppl: 24.287542 +epoch: 0, batch: 37327, sum loss: 4823.061035, avg loss: 2.912477, ppl: 18.402317 +epoch: 0, batch: 37328, sum loss: 5040.439453, avg loss: 2.875322, ppl: 17.731134 +epoch: 0, batch: 37329, sum loss: 4550.943848, avg loss: 2.883995, ppl: 17.885580 +epoch: 0, batch: 37330, sum loss: 5564.077637, avg loss: 2.925383, ppl: 18.641357 +epoch: 0, batch: 37331, sum loss: 5525.724609, avg loss: 2.993350, ppl: 19.952417 +epoch: 0, batch: 37332, sum loss: 4452.352051, avg loss: 2.826890, ppl: 16.892841 +epoch: 0, batch: 37333, sum loss: 5282.988281, avg loss: 3.137167, ppl: 23.038494 +epoch: 0, batch: 37334, sum loss: 4441.738281, avg loss: 2.825533, ppl: 16.869936 +epoch: 0, batch: 37335, sum loss: 3985.729248, avg loss: 2.655383, ppl: 14.230430 +epoch: 0, batch: 37336, sum loss: 4219.590332, avg loss: 2.805578, ppl: 16.536638 +epoch: 0, batch: 37337, sum loss: 4863.328125, avg loss: 2.889678, ppl: 17.987516 +epoch: 0, batch: 37338, sum loss: 4676.366211, avg loss: 2.844505, ppl: 17.193047 +epoch: 0, batch: 37339, sum loss: 5209.849609, avg loss: 3.043136, ppl: 20.970913 +epoch: 0, batch: 37340, sum loss: 5082.958984, avg loss: 2.960372, ppl: 19.305157 +epoch: 0, batch: 37341, sum loss: 5239.184570, avg loss: 3.105622, ppl: 22.323107 +epoch: 0, batch: 37342, sum loss: 4987.132812, avg loss: 2.819182, ppl: 16.763136 +epoch: 0, batch: 37343, sum loss: 5160.624512, avg loss: 2.915607, ppl: 18.460018 +epoch: 0, batch: 37344, sum loss: 6274.775391, avg loss: 3.206324, ppl: 24.688156 +epoch: 0, batch: 37345, sum loss: 3828.442139, avg loss: 2.788377, ppl: 16.254622 +epoch: 0, batch: 37346, sum loss: 4896.659180, avg loss: 2.830439, ppl: 16.952900 +epoch: 0, batch: 37347, sum loss: 4920.011719, avg loss: 2.965649, ppl: 19.407291 +epoch: 0, batch: 37348, sum loss: 5279.598633, avg loss: 2.910473, ppl: 18.365490 +epoch: 0, batch: 37349, sum loss: 6044.791992, avg loss: 3.026937, ppl: 20.633924 +epoch: 0, batch: 37350, sum loss: 6892.375977, avg loss: 3.251121, ppl: 25.819262 +epoch: 0, batch: 37351, sum loss: 5455.157227, avg loss: 3.155094, ppl: 23.455240 +epoch: 0, batch: 37352, sum loss: 3831.711426, avg loss: 2.449943, ppl: 11.587689 +epoch: 0, batch: 37353, sum loss: 4824.324219, avg loss: 2.844531, ppl: 17.193489 +epoch: 0, batch: 37354, sum loss: 4994.848633, avg loss: 2.912448, ppl: 18.401794 +epoch: 0, batch: 37355, sum loss: 4335.741699, avg loss: 2.678037, ppl: 14.556490 +epoch: 0, batch: 37356, sum loss: 5015.915039, avg loss: 2.887689, ppl: 17.951769 +epoch: 0, batch: 37357, sum loss: 4857.288086, avg loss: 2.815819, ppl: 16.706858 +epoch: 0, batch: 37358, sum loss: 4891.657227, avg loss: 2.977272, ppl: 19.634172 +epoch: 0, batch: 37359, sum loss: 5217.452148, avg loss: 2.782641, ppl: 16.161650 +epoch: 0, batch: 37360, sum loss: 4755.800781, avg loss: 2.923049, ppl: 18.597906 +epoch: 0, batch: 37361, sum loss: 4371.250000, avg loss: 2.814713, ppl: 16.688393 +epoch: 0, batch: 37362, sum loss: 5471.621094, avg loss: 2.885876, ppl: 17.919262 +epoch: 0, batch: 37363, sum loss: 5315.362305, avg loss: 3.106582, ppl: 22.344549 +epoch: 0, batch: 37364, sum loss: 5046.556641, avg loss: 3.021890, ppl: 20.530060 +epoch: 0, batch: 37365, sum loss: 4823.729980, avg loss: 2.855968, ppl: 17.391264 +epoch: 0, batch: 37366, sum loss: 5995.366211, avg loss: 3.024907, ppl: 20.592096 +epoch: 0, batch: 37367, sum loss: 4001.850098, avg loss: 2.796541, ppl: 16.387863 +epoch: 0, batch: 37368, sum loss: 4994.428223, avg loss: 3.043527, ppl: 20.979109 +epoch: 0, batch: 37369, sum loss: 5644.274414, avg loss: 3.167382, ppl: 23.745237 +epoch: 0, batch: 37370, sum loss: 6549.599121, avg loss: 3.061991, ppl: 21.370068 +epoch: 0, batch: 37371, sum loss: 4050.049316, avg loss: 2.616311, ppl: 13.685147 +epoch: 0, batch: 37372, sum loss: 5839.165527, avg loss: 3.061964, ppl: 21.369486 +epoch: 0, batch: 37373, sum loss: 5154.525391, avg loss: 2.918757, ppl: 18.518259 +epoch: 0, batch: 37374, sum loss: 5261.176270, avg loss: 2.897123, ppl: 18.121939 +epoch: 0, batch: 37375, sum loss: 4549.519531, avg loss: 2.834592, ppl: 17.023447 +epoch: 0, batch: 37376, sum loss: 5691.304688, avg loss: 3.020863, ppl: 20.508974 +epoch: 0, batch: 37377, sum loss: 4729.746582, avg loss: 2.925014, ppl: 18.634478 +epoch: 0, batch: 37378, sum loss: 5120.438477, avg loss: 2.985678, ppl: 19.799927 +epoch: 0, batch: 37379, sum loss: 5135.084961, avg loss: 2.995965, ppl: 20.004665 +epoch: 0, batch: 37380, sum loss: 4810.142578, avg loss: 3.059887, ppl: 21.325151 +epoch: 0, batch: 37381, sum loss: 5655.449707, avg loss: 2.922713, ppl: 18.591654 +epoch: 0, batch: 37382, sum loss: 4941.448242, avg loss: 2.943090, ppl: 18.974386 +epoch: 0, batch: 37383, sum loss: 4355.622559, avg loss: 2.858020, ppl: 17.426989 +epoch: 0, batch: 37384, sum loss: 5304.121094, avg loss: 3.098202, ppl: 22.158064 +epoch: 0, batch: 37385, sum loss: 4790.625000, avg loss: 2.532043, ppl: 12.579176 +epoch: 0, batch: 37386, sum loss: 4368.050293, avg loss: 2.830882, ppl: 16.960411 +epoch: 0, batch: 37387, sum loss: 4639.088867, avg loss: 2.956717, ppl: 19.234720 +epoch: 0, batch: 37388, sum loss: 5636.824219, avg loss: 3.121165, ppl: 22.672785 +epoch: 0, batch: 37389, sum loss: 4389.836426, avg loss: 2.731697, ppl: 15.358923 +epoch: 0, batch: 37390, sum loss: 4022.157227, avg loss: 2.601654, ppl: 13.486026 +epoch: 0, batch: 37391, sum loss: 4351.142578, avg loss: 3.007010, ppl: 20.226822 +epoch: 0, batch: 37392, sum loss: 5070.144043, avg loss: 2.907192, ppl: 18.305334 +epoch: 0, batch: 37393, sum loss: 4433.728516, avg loss: 2.799071, ppl: 16.429379 +epoch: 0, batch: 37394, sum loss: 5511.216309, avg loss: 2.980647, ppl: 19.700556 +epoch: 0, batch: 37395, sum loss: 4423.778320, avg loss: 2.702369, ppl: 14.915027 +epoch: 0, batch: 37396, sum loss: 4623.386719, avg loss: 2.812279, ppl: 16.647816 +epoch: 0, batch: 37397, sum loss: 6433.445801, avg loss: 3.078204, ppl: 21.719353 +epoch: 0, batch: 37398, sum loss: 5494.044922, avg loss: 2.997297, ppl: 20.031315 +epoch: 0, batch: 37399, sum loss: 5831.034180, avg loss: 3.106571, ppl: 22.344299 +epoch: 0, batch: 37400, sum loss: 4439.487793, avg loss: 2.981523, ppl: 19.717825 +epoch: 0, batch: 37401, sum loss: 4745.882812, avg loss: 2.878037, ppl: 17.779333 +epoch: 0, batch: 37402, sum loss: 5081.862793, avg loss: 2.837444, ppl: 17.072079 +epoch: 0, batch: 37403, sum loss: 4795.091309, avg loss: 2.892094, ppl: 18.031021 +epoch: 0, batch: 37404, sum loss: 4903.141113, avg loss: 2.874057, ppl: 17.708714 +epoch: 0, batch: 37405, sum loss: 4446.259766, avg loss: 2.925171, ppl: 18.637415 +epoch: 0, batch: 37406, sum loss: 4564.999512, avg loss: 2.865662, ppl: 17.560677 +epoch: 0, batch: 37407, sum loss: 4734.547363, avg loss: 2.785028, ppl: 16.200272 +epoch: 0, batch: 37408, sum loss: 4050.825195, avg loss: 2.891381, ppl: 18.018181 +epoch: 0, batch: 37409, sum loss: 5263.525879, avg loss: 3.056635, ppl: 21.255913 +epoch: 0, batch: 37410, sum loss: 5065.832520, avg loss: 3.055387, ppl: 21.229410 +epoch: 0, batch: 37411, sum loss: 4695.454102, avg loss: 2.816709, ppl: 16.721729 +epoch: 0, batch: 37412, sum loss: 4625.948242, avg loss: 2.673959, ppl: 14.497244 +epoch: 0, batch: 37413, sum loss: 5644.723633, avg loss: 3.270408, ppl: 26.322067 +epoch: 0, batch: 37414, sum loss: 5897.747559, avg loss: 3.043214, ppl: 20.972532 +epoch: 0, batch: 37415, sum loss: 4287.724609, avg loss: 3.015277, ppl: 20.394747 +epoch: 0, batch: 37416, sum loss: 4816.489258, avg loss: 3.107413, ppl: 22.363106 +epoch: 0, batch: 37417, sum loss: 5107.230469, avg loss: 2.901835, ppl: 18.207533 +epoch: 0, batch: 37418, sum loss: 4849.148926, avg loss: 2.802976, ppl: 16.493662 +epoch: 0, batch: 37419, sum loss: 5178.661621, avg loss: 2.840736, ppl: 17.128370 +epoch: 0, batch: 37420, sum loss: 4782.887695, avg loss: 2.870881, ppl: 17.652561 +epoch: 0, batch: 37421, sum loss: 5418.625000, avg loss: 3.061370, ppl: 21.356798 +epoch: 0, batch: 37422, sum loss: 4009.057617, avg loss: 2.569909, ppl: 13.064633 +epoch: 0, batch: 37423, sum loss: 5389.162109, avg loss: 2.961078, ppl: 19.318789 +epoch: 0, batch: 37424, sum loss: 4919.930664, avg loss: 2.784341, ppl: 16.189148 +epoch: 0, batch: 37425, sum loss: 5400.955078, avg loss: 2.944905, ppl: 19.008848 +epoch: 0, batch: 37426, sum loss: 4407.940918, avg loss: 2.809395, ppl: 16.599873 +epoch: 0, batch: 37427, sum loss: 5184.246582, avg loss: 2.910863, ppl: 18.372646 +epoch: 0, batch: 37428, sum loss: 4711.579102, avg loss: 2.944737, ppl: 19.005663 +epoch: 0, batch: 37429, sum loss: 4353.529297, avg loss: 2.851034, ppl: 17.305670 +epoch: 0, batch: 37430, sum loss: 6402.503906, avg loss: 3.161730, ppl: 23.611416 +epoch: 0, batch: 37431, sum loss: 5686.206055, avg loss: 3.068649, ppl: 21.512810 +epoch: 0, batch: 37432, sum loss: 4838.115723, avg loss: 2.840937, ppl: 17.131809 +epoch: 0, batch: 37433, sum loss: 4839.558594, avg loss: 2.991075, ppl: 19.907063 +epoch: 0, batch: 37434, sum loss: 4567.789062, avg loss: 2.664988, ppl: 14.367774 +epoch: 0, batch: 37435, sum loss: 3842.287354, avg loss: 2.638934, ppl: 13.998269 +epoch: 0, batch: 37436, sum loss: 4869.573730, avg loss: 2.949469, ppl: 19.095818 +epoch: 0, batch: 37437, sum loss: 4342.015137, avg loss: 2.834213, ppl: 17.017012 +epoch: 0, batch: 37438, sum loss: 4181.747070, avg loss: 2.684048, ppl: 14.644256 +epoch: 0, batch: 37439, sum loss: 4611.366211, avg loss: 2.924139, ppl: 18.618181 +epoch: 0, batch: 37440, sum loss: 5073.078613, avg loss: 2.862911, ppl: 17.512436 +epoch: 0, batch: 37441, sum loss: 4361.058105, avg loss: 2.802737, ppl: 16.489710 +epoch: 0, batch: 37442, sum loss: 5656.144531, avg loss: 3.163392, ppl: 23.650679 +epoch: 0, batch: 37443, sum loss: 4868.556641, avg loss: 2.870611, ppl: 17.647800 +epoch: 0, batch: 37444, sum loss: 4524.848145, avg loss: 2.898686, ppl: 18.150274 +epoch: 0, batch: 37445, sum loss: 5042.666016, avg loss: 2.933488, ppl: 18.793070 +epoch: 0, batch: 37446, sum loss: 4416.521484, avg loss: 2.519408, ppl: 12.421238 +epoch: 0, batch: 37447, sum loss: 3716.160400, avg loss: 2.596898, ppl: 13.422033 +epoch: 0, batch: 37448, sum loss: 4488.728516, avg loss: 3.016619, ppl: 20.422123 +epoch: 0, batch: 37449, sum loss: 4533.027832, avg loss: 2.764041, ppl: 15.863826 +epoch: 0, batch: 37450, sum loss: 5184.731445, avg loss: 2.731682, ppl: 15.358692 +epoch: 0, batch: 37451, sum loss: 5519.444336, avg loss: 2.981872, ppl: 19.724699 +epoch: 0, batch: 37452, sum loss: 6236.484863, avg loss: 3.108916, ppl: 22.396749 +epoch: 0, batch: 37453, sum loss: 4817.490723, avg loss: 2.891651, ppl: 18.023039 +epoch: 0, batch: 37454, sum loss: 4986.580566, avg loss: 2.854368, ppl: 17.363455 +epoch: 0, batch: 37455, sum loss: 5441.112305, avg loss: 2.950712, ppl: 19.119556 +epoch: 0, batch: 37456, sum loss: 5139.315430, avg loss: 2.772015, ppl: 15.990817 +epoch: 0, batch: 37457, sum loss: 4307.930176, avg loss: 2.742158, ppl: 15.520445 +epoch: 0, batch: 37458, sum loss: 6400.091309, avg loss: 3.190474, ppl: 24.299950 +epoch: 0, batch: 37459, sum loss: 5313.285156, avg loss: 3.132833, ppl: 22.938879 +epoch: 0, batch: 37460, sum loss: 5192.703613, avg loss: 2.803836, ppl: 16.507843 +epoch: 0, batch: 37461, sum loss: 4970.988770, avg loss: 3.166235, ppl: 23.718018 +epoch: 0, batch: 37462, sum loss: 4826.421387, avg loss: 2.815882, ppl: 16.707901 +epoch: 0, batch: 37463, sum loss: 4897.557617, avg loss: 3.010177, ppl: 20.290985 +epoch: 0, batch: 37464, sum loss: 5424.269531, avg loss: 2.859394, ppl: 17.450941 +epoch: 0, batch: 37465, sum loss: 5152.630859, avg loss: 2.886628, ppl: 17.932741 +epoch: 0, batch: 37466, sum loss: 5536.061035, avg loss: 3.357223, ppl: 28.709362 +epoch: 0, batch: 37467, sum loss: 5303.049316, avg loss: 2.992692, ppl: 19.939278 +epoch: 0, batch: 37468, sum loss: 4117.065430, avg loss: 2.944968, ppl: 19.010057 +epoch: 0, batch: 37469, sum loss: 5008.357422, avg loss: 2.937453, ppl: 18.867729 +epoch: 0, batch: 37470, sum loss: 4443.416016, avg loss: 2.879725, ppl: 17.809383 +epoch: 0, batch: 37471, sum loss: 4394.204102, avg loss: 2.945177, ppl: 19.014029 +epoch: 0, batch: 37472, sum loss: 4744.781250, avg loss: 2.894924, ppl: 18.082134 +epoch: 0, batch: 37473, sum loss: 5094.836914, avg loss: 2.862268, ppl: 17.501175 +epoch: 0, batch: 37474, sum loss: 4346.612305, avg loss: 2.899675, ppl: 18.168234 +epoch: 0, batch: 37475, sum loss: 4489.742188, avg loss: 2.850630, ppl: 17.298674 +epoch: 0, batch: 37476, sum loss: 5440.656738, avg loss: 3.189131, ppl: 24.267319 +epoch: 0, batch: 37477, sum loss: 5668.860352, avg loss: 2.975780, ppl: 19.604904 +epoch: 0, batch: 37478, sum loss: 5867.172852, avg loss: 3.097768, ppl: 22.148468 +epoch: 0, batch: 37479, sum loss: 6389.346191, avg loss: 3.204286, ppl: 24.637899 +epoch: 0, batch: 37480, sum loss: 5262.473145, avg loss: 2.701475, ppl: 14.901694 +epoch: 0, batch: 37481, sum loss: 4289.956055, avg loss: 2.744694, ppl: 15.559844 +epoch: 0, batch: 37482, sum loss: 5287.137695, avg loss: 3.031615, ppl: 20.730696 +epoch: 0, batch: 37483, sum loss: 3116.475098, avg loss: 2.229238, ppl: 9.292785 +epoch: 0, batch: 37484, sum loss: 4190.148438, avg loss: 2.907806, ppl: 18.316570 +epoch: 0, batch: 37485, sum loss: 5017.519531, avg loss: 3.057599, ppl: 21.276402 +epoch: 0, batch: 37486, sum loss: 5021.230469, avg loss: 2.895750, ppl: 18.097065 +epoch: 0, batch: 37487, sum loss: 5507.229004, avg loss: 3.222486, ppl: 25.090424 +epoch: 0, batch: 37488, sum loss: 5224.734375, avg loss: 3.113668, ppl: 22.503429 +epoch: 0, batch: 37489, sum loss: 4904.176758, avg loss: 2.789634, ppl: 16.275063 +epoch: 0, batch: 37490, sum loss: 5198.422363, avg loss: 2.862567, ppl: 17.506411 +epoch: 0, batch: 37491, sum loss: 4784.029785, avg loss: 2.736859, ppl: 15.438422 +epoch: 0, batch: 37492, sum loss: 5415.914551, avg loss: 2.848982, ppl: 17.270189 +epoch: 0, batch: 37493, sum loss: 5117.887695, avg loss: 3.084923, ppl: 21.865789 +epoch: 0, batch: 37494, sum loss: 4745.262207, avg loss: 2.932795, ppl: 18.780045 +epoch: 0, batch: 37495, sum loss: 5270.353027, avg loss: 2.832001, ppl: 16.979399 +epoch: 0, batch: 37496, sum loss: 5055.198730, avg loss: 2.776057, ppl: 16.055582 +epoch: 0, batch: 37497, sum loss: 5590.092773, avg loss: 3.170784, ppl: 23.826162 +epoch: 0, batch: 37498, sum loss: 4806.130371, avg loss: 2.736976, ppl: 15.440229 +epoch: 0, batch: 37499, sum loss: 5337.760254, avg loss: 3.046667, ppl: 21.045086 +epoch: 0, batch: 37500, sum loss: 5688.066895, avg loss: 3.036875, ppl: 20.840017 +epoch: 0, batch: 37501, sum loss: 5102.299805, avg loss: 2.976838, ppl: 19.625654 +epoch: 0, batch: 37502, sum loss: 6125.453125, avg loss: 3.289717, ppl: 26.835274 +epoch: 0, batch: 37503, sum loss: 5298.005859, avg loss: 2.786957, ppl: 16.231556 +epoch: 0, batch: 37504, sum loss: 5465.031250, avg loss: 3.103368, ppl: 22.272840 +epoch: 0, batch: 37505, sum loss: 5623.699707, avg loss: 3.088248, ppl: 21.938608 +epoch: 0, batch: 37506, sum loss: 4140.496094, avg loss: 3.042245, ppl: 20.952236 +epoch: 0, batch: 37507, sum loss: 5340.485352, avg loss: 2.899287, ppl: 18.161182 +epoch: 0, batch: 37508, sum loss: 4736.674316, avg loss: 2.905935, ppl: 18.282331 +epoch: 0, batch: 37509, sum loss: 4670.614746, avg loss: 2.839279, ppl: 17.103437 +epoch: 0, batch: 37510, sum loss: 6328.384277, avg loss: 2.987906, ppl: 19.844080 +epoch: 0, batch: 37511, sum loss: 4667.078613, avg loss: 2.880913, ppl: 17.830542 +epoch: 0, batch: 37512, sum loss: 4928.317383, avg loss: 3.084054, ppl: 21.846779 +epoch: 0, batch: 37513, sum loss: 5668.005371, avg loss: 3.067102, ppl: 21.479574 +epoch: 0, batch: 37514, sum loss: 4542.514160, avg loss: 2.768138, ppl: 15.928942 +epoch: 0, batch: 37515, sum loss: 5286.493652, avg loss: 3.001984, ppl: 20.125423 +epoch: 0, batch: 37516, sum loss: 4333.823730, avg loss: 3.091172, ppl: 22.002861 +epoch: 0, batch: 37517, sum loss: 5397.411133, avg loss: 3.022067, ppl: 20.533688 +epoch: 0, batch: 37518, sum loss: 4501.177246, avg loss: 2.718102, ppl: 15.151541 +epoch: 0, batch: 37519, sum loss: 5530.845215, avg loss: 3.128306, ppl: 22.835268 +epoch: 0, batch: 37520, sum loss: 4645.864746, avg loss: 2.929297, ppl: 18.714464 +epoch: 0, batch: 37521, sum loss: 4780.152832, avg loss: 2.847024, ppl: 17.236404 +epoch: 0, batch: 37522, sum loss: 4984.283203, avg loss: 2.792316, ppl: 16.318766 +epoch: 0, batch: 37523, sum loss: 4773.637207, avg loss: 3.058064, ppl: 21.286301 +epoch: 0, batch: 37524, sum loss: 4198.166504, avg loss: 2.749290, ppl: 15.631536 +epoch: 0, batch: 37525, sum loss: 5495.059570, avg loss: 3.044354, ppl: 20.996468 +epoch: 0, batch: 37526, sum loss: 4886.046387, avg loss: 2.847347, ppl: 17.241970 +epoch: 0, batch: 37527, sum loss: 5418.557617, avg loss: 2.914770, ppl: 18.444567 +epoch: 0, batch: 37528, sum loss: 5044.840332, avg loss: 3.119877, ppl: 22.643585 +epoch: 0, batch: 37529, sum loss: 4818.113770, avg loss: 2.862813, ppl: 17.510708 +epoch: 0, batch: 37530, sum loss: 4594.519531, avg loss: 3.020723, ppl: 20.506115 +epoch: 0, batch: 37531, sum loss: 4283.316895, avg loss: 2.677073, ppl: 14.542465 +epoch: 0, batch: 37532, sum loss: 4252.128906, avg loss: 2.848043, ppl: 17.253990 +epoch: 0, batch: 37533, sum loss: 4700.123047, avg loss: 2.976645, ppl: 19.621880 +epoch: 0, batch: 37534, sum loss: 5429.224609, avg loss: 3.193662, ppl: 24.377527 +epoch: 0, batch: 37535, sum loss: 5857.203125, avg loss: 2.886744, ppl: 17.934814 +epoch: 0, batch: 37536, sum loss: 4785.709473, avg loss: 2.733130, ppl: 15.380946 +epoch: 0, batch: 37537, sum loss: 4171.467285, avg loss: 2.810962, ppl: 16.625900 +epoch: 0, batch: 37538, sum loss: 4639.301758, avg loss: 2.890531, ppl: 18.002859 +epoch: 0, batch: 37539, sum loss: 4763.173340, avg loss: 2.913256, ppl: 18.416660 +epoch: 0, batch: 37540, sum loss: 5721.171875, avg loss: 3.261786, ppl: 26.096090 +epoch: 0, batch: 37541, sum loss: 5081.229004, avg loss: 2.794955, ppl: 16.361900 +epoch: 0, batch: 37542, sum loss: 4892.126953, avg loss: 2.801906, ppl: 16.476013 +epoch: 0, batch: 37543, sum loss: 4323.731445, avg loss: 2.727906, ppl: 15.300817 +epoch: 0, batch: 37544, sum loss: 4762.505859, avg loss: 3.066649, ppl: 21.469841 +epoch: 0, batch: 37545, sum loss: 4694.987793, avg loss: 2.739199, ppl: 15.474591 +epoch: 0, batch: 37546, sum loss: 4833.846191, avg loss: 2.752760, ppl: 15.685864 +epoch: 0, batch: 37547, sum loss: 4258.429199, avg loss: 2.938875, ppl: 18.894573 +epoch: 0, batch: 37548, sum loss: 5846.318359, avg loss: 2.985862, ppl: 19.803576 +epoch: 0, batch: 37549, sum loss: 5964.445312, avg loss: 3.189543, ppl: 24.277330 +epoch: 0, batch: 37550, sum loss: 6037.131348, avg loss: 3.059874, ppl: 21.324871 +epoch: 0, batch: 37551, sum loss: 5645.570312, avg loss: 2.858517, ppl: 17.435646 +epoch: 0, batch: 37552, sum loss: 4551.146484, avg loss: 2.541120, ppl: 12.693884 +epoch: 0, batch: 37553, sum loss: 4920.442871, avg loss: 3.016826, ppl: 20.426353 +epoch: 0, batch: 37554, sum loss: 4251.951172, avg loss: 2.912295, ppl: 18.398983 +epoch: 0, batch: 37555, sum loss: 5227.661133, avg loss: 3.134089, ppl: 22.967714 +epoch: 0, batch: 37556, sum loss: 4555.291992, avg loss: 2.990999, ppl: 19.905567 +epoch: 0, batch: 37557, sum loss: 5820.973145, avg loss: 3.081511, ppl: 21.791294 +epoch: 0, batch: 37558, sum loss: 4245.341797, avg loss: 2.758507, ppl: 15.776268 +epoch: 0, batch: 37559, sum loss: 4735.250000, avg loss: 2.899724, ppl: 18.169134 +epoch: 0, batch: 37560, sum loss: 5581.932617, avg loss: 3.097632, ppl: 22.145447 +epoch: 0, batch: 37561, sum loss: 5001.450195, avg loss: 3.029346, ppl: 20.683706 +epoch: 0, batch: 37562, sum loss: 5184.908203, avg loss: 2.911234, ppl: 18.379473 +epoch: 0, batch: 37563, sum loss: 5531.061523, avg loss: 3.169663, ppl: 23.799456 +epoch: 0, batch: 37564, sum loss: 4967.472656, avg loss: 2.925485, ppl: 18.643259 +epoch: 0, batch: 37565, sum loss: 5147.594238, avg loss: 2.805229, ppl: 16.530857 +epoch: 0, batch: 37566, sum loss: 4356.568359, avg loss: 2.707625, ppl: 14.993622 +epoch: 0, batch: 37567, sum loss: 6323.161133, avg loss: 3.229398, ppl: 25.264444 +epoch: 0, batch: 37568, sum loss: 4576.236816, avg loss: 2.920381, ppl: 18.548355 +epoch: 0, batch: 37569, sum loss: 4690.381348, avg loss: 2.854766, ppl: 17.370378 +epoch: 0, batch: 37570, sum loss: 5464.083984, avg loss: 3.127695, ppl: 22.821323 +epoch: 0, batch: 37571, sum loss: 5624.195801, avg loss: 3.173925, ppl: 23.901121 +epoch: 0, batch: 37572, sum loss: 5567.786621, avg loss: 2.913546, ppl: 18.422009 +epoch: 0, batch: 37573, sum loss: 5337.059082, avg loss: 2.899000, ppl: 18.155983 +epoch: 0, batch: 37574, sum loss: 3936.722656, avg loss: 2.903188, ppl: 18.232172 +epoch: 0, batch: 37575, sum loss: 4253.317383, avg loss: 2.751176, ppl: 15.661033 +epoch: 0, batch: 37576, sum loss: 3749.730957, avg loss: 2.600368, ppl: 13.468698 +epoch: 0, batch: 37577, sum loss: 4560.325684, avg loss: 2.809813, ppl: 16.606806 +epoch: 0, batch: 37578, sum loss: 5170.486816, avg loss: 2.951191, ppl: 19.128727 +epoch: 0, batch: 37579, sum loss: 5129.857422, avg loss: 2.975556, ppl: 19.600512 +epoch: 0, batch: 37580, sum loss: 5180.811523, avg loss: 2.907302, ppl: 18.307333 +epoch: 0, batch: 37581, sum loss: 5281.654785, avg loss: 3.114183, ppl: 22.515032 +epoch: 0, batch: 37582, sum loss: 5387.065918, avg loss: 3.258963, ppl: 26.022535 +epoch: 0, batch: 37583, sum loss: 5116.698242, avg loss: 2.825344, ppl: 16.866751 +epoch: 0, batch: 37584, sum loss: 4344.171875, avg loss: 2.875031, ppl: 17.725973 +epoch: 0, batch: 37585, sum loss: 5920.126465, avg loss: 3.032852, ppl: 20.756344 +epoch: 0, batch: 37586, sum loss: 4861.321289, avg loss: 2.919713, ppl: 18.535959 +epoch: 0, batch: 37587, sum loss: 5705.938965, avg loss: 3.128256, ppl: 22.834124 +epoch: 0, batch: 37588, sum loss: 4483.675293, avg loss: 2.934342, ppl: 18.809130 +epoch: 0, batch: 37589, sum loss: 4937.506836, avg loss: 2.818212, ppl: 16.746878 +epoch: 0, batch: 37590, sum loss: 5831.584961, avg loss: 3.212995, ppl: 24.853401 +epoch: 0, batch: 37591, sum loss: 5285.357910, avg loss: 3.101736, ppl: 22.236517 +epoch: 0, batch: 37592, sum loss: 5818.571777, avg loss: 3.013243, ppl: 20.353292 +epoch: 0, batch: 37593, sum loss: 4909.971191, avg loss: 2.945394, ppl: 19.018147 +epoch: 0, batch: 37594, sum loss: 5593.188477, avg loss: 3.029896, ppl: 20.695086 +epoch: 0, batch: 37595, sum loss: 4757.568359, avg loss: 2.933149, ppl: 18.786703 +epoch: 0, batch: 37596, sum loss: 5099.702148, avg loss: 2.992783, ppl: 19.941103 +epoch: 0, batch: 37597, sum loss: 4325.205078, avg loss: 2.823241, ppl: 16.831312 +epoch: 0, batch: 37598, sum loss: 6011.411133, avg loss: 3.328578, ppl: 27.898628 +epoch: 0, batch: 37599, sum loss: 5501.098145, avg loss: 3.402040, ppl: 30.025282 +epoch: 0, batch: 37600, sum loss: 5681.278320, avg loss: 2.980733, ppl: 19.702261 +epoch: 0, batch: 37601, sum loss: 5060.653320, avg loss: 2.880281, ppl: 17.819275 +epoch: 0, batch: 37602, sum loss: 4869.970703, avg loss: 2.762320, ppl: 15.836546 +epoch: 0, batch: 37603, sum loss: 4457.582520, avg loss: 2.751594, ppl: 15.667591 +epoch: 0, batch: 37604, sum loss: 4810.011719, avg loss: 2.847846, ppl: 17.250584 +epoch: 0, batch: 37605, sum loss: 5338.001465, avg loss: 3.002251, ppl: 20.130795 +epoch: 0, batch: 37606, sum loss: 5181.981445, avg loss: 2.935967, ppl: 18.839706 +epoch: 0, batch: 37607, sum loss: 4477.685547, avg loss: 3.088059, ppl: 21.934465 +epoch: 0, batch: 37608, sum loss: 4927.186523, avg loss: 3.004382, ppl: 20.173748 +epoch: 0, batch: 37609, sum loss: 4668.486328, avg loss: 2.829386, ppl: 16.935055 +epoch: 0, batch: 37610, sum loss: 5531.654785, avg loss: 2.836746, ppl: 17.060162 +epoch: 0, batch: 37611, sum loss: 4613.490723, avg loss: 2.853117, ppl: 17.341761 +epoch: 0, batch: 37612, sum loss: 4879.074707, avg loss: 2.861628, ppl: 17.489969 +epoch: 0, batch: 37613, sum loss: 3979.399414, avg loss: 2.828287, ppl: 16.916456 +epoch: 0, batch: 37614, sum loss: 4374.726562, avg loss: 2.730791, ppl: 15.345015 +epoch: 0, batch: 37615, sum loss: 3743.346191, avg loss: 2.583400, ppl: 13.242082 +epoch: 0, batch: 37616, sum loss: 5000.391602, avg loss: 3.100057, ppl: 22.199215 +epoch: 0, batch: 37617, sum loss: 5114.986328, avg loss: 2.735287, ppl: 15.414165 +epoch: 0, batch: 37618, sum loss: 5239.350098, avg loss: 3.139215, ppl: 23.085743 +epoch: 0, batch: 37619, sum loss: 5264.565918, avg loss: 3.188714, ppl: 24.257202 +epoch: 0, batch: 37620, sum loss: 5212.272461, avg loss: 2.964888, ppl: 19.392525 +epoch: 0, batch: 37621, sum loss: 3913.438965, avg loss: 2.459735, ppl: 11.701715 +epoch: 0, batch: 37622, sum loss: 4795.123047, avg loss: 2.774956, ppl: 16.037914 +epoch: 0, batch: 37623, sum loss: 5590.979980, avg loss: 2.981856, ppl: 19.724394 +epoch: 0, batch: 37624, sum loss: 6039.753906, avg loss: 3.087809, ppl: 21.928970 +epoch: 0, batch: 37625, sum loss: 4545.334473, avg loss: 2.928695, ppl: 18.703207 +epoch: 0, batch: 37626, sum loss: 4950.808105, avg loss: 3.073128, ppl: 21.609381 +epoch: 0, batch: 37627, sum loss: 4338.196777, avg loss: 2.691189, ppl: 14.749203 +epoch: 0, batch: 37628, sum loss: 4826.981934, avg loss: 2.996264, ppl: 20.010647 +epoch: 0, batch: 37629, sum loss: 4879.241699, avg loss: 2.780195, ppl: 16.122156 +epoch: 0, batch: 37630, sum loss: 5433.416504, avg loss: 2.825490, ppl: 16.869202 +epoch: 0, batch: 37631, sum loss: 4387.260254, avg loss: 2.597549, ppl: 13.430779 +epoch: 0, batch: 37632, sum loss: 5058.584961, avg loss: 2.951333, ppl: 19.131439 +epoch: 0, batch: 37633, sum loss: 5560.951660, avg loss: 2.986548, ppl: 19.817150 +epoch: 0, batch: 37634, sum loss: 5113.537598, avg loss: 2.981654, ppl: 19.720415 +epoch: 0, batch: 37635, sum loss: 5202.152832, avg loss: 2.886877, ppl: 17.937210 +epoch: 0, batch: 37636, sum loss: 4457.850098, avg loss: 2.821424, ppl: 16.800762 +epoch: 0, batch: 37637, sum loss: 5520.186035, avg loss: 3.131132, ppl: 22.899891 +epoch: 0, batch: 37638, sum loss: 5001.614258, avg loss: 2.587488, ppl: 13.296331 +epoch: 0, batch: 37639, sum loss: 4872.350586, avg loss: 2.824551, ppl: 16.853378 +epoch: 0, batch: 37640, sum loss: 5456.928711, avg loss: 2.852550, ppl: 17.331926 +epoch: 0, batch: 37641, sum loss: 4163.329590, avg loss: 2.794181, ppl: 16.349236 +epoch: 0, batch: 37642, sum loss: 4423.635254, avg loss: 2.757877, ppl: 15.766341 +epoch: 0, batch: 37643, sum loss: 4776.734863, avg loss: 2.775558, ppl: 16.047575 +epoch: 0, batch: 37644, sum loss: 5694.828613, avg loss: 2.889309, ppl: 17.980883 +epoch: 0, batch: 37645, sum loss: 4794.911621, avg loss: 2.959822, ppl: 19.294540 +epoch: 0, batch: 37646, sum loss: 4989.043457, avg loss: 2.943388, ppl: 18.980051 +epoch: 0, batch: 37647, sum loss: 5654.193848, avg loss: 2.919047, ppl: 18.523624 +epoch: 0, batch: 37648, sum loss: 4781.803711, avg loss: 2.999877, ppl: 20.083061 +epoch: 0, batch: 37649, sum loss: 5109.633301, avg loss: 2.857737, ppl: 17.422052 +epoch: 0, batch: 37650, sum loss: 5758.008789, avg loss: 3.190032, ppl: 24.289217 +epoch: 0, batch: 37651, sum loss: 4899.637695, avg loss: 2.820747, ppl: 16.789391 +epoch: 0, batch: 37652, sum loss: 4605.205078, avg loss: 2.821817, ppl: 16.807360 +epoch: 0, batch: 37653, sum loss: 4407.978516, avg loss: 2.768831, ppl: 15.939985 +epoch: 0, batch: 37654, sum loss: 4357.493164, avg loss: 2.728549, ppl: 15.310658 +epoch: 0, batch: 37655, sum loss: 5023.465820, avg loss: 2.956719, ppl: 19.234766 +epoch: 0, batch: 37656, sum loss: 4653.191406, avg loss: 2.851220, ppl: 17.308891 +epoch: 0, batch: 37657, sum loss: 5461.510254, avg loss: 2.915916, ppl: 18.465719 +epoch: 0, batch: 37658, sum loss: 5070.341797, avg loss: 3.014472, ppl: 20.378328 +epoch: 0, batch: 37659, sum loss: 4765.026855, avg loss: 2.844792, ppl: 17.197987 +epoch: 0, batch: 37660, sum loss: 5579.390625, avg loss: 3.077436, ppl: 21.702679 +epoch: 0, batch: 37661, sum loss: 5739.571777, avg loss: 3.246364, ppl: 25.696745 +epoch: 0, batch: 37662, sum loss: 5621.526367, avg loss: 2.882834, ppl: 17.864830 +epoch: 0, batch: 37663, sum loss: 5674.652832, avg loss: 3.020039, ppl: 20.492088 +epoch: 0, batch: 37664, sum loss: 5386.742676, avg loss: 2.885240, ppl: 17.907862 +epoch: 0, batch: 37665, sum loss: 4698.294922, avg loss: 3.058786, ppl: 21.301680 +epoch: 0, batch: 37666, sum loss: 5637.624023, avg loss: 3.232582, ppl: 25.345022 +epoch: 0, batch: 37667, sum loss: 4947.648438, avg loss: 2.932809, ppl: 18.780308 +epoch: 0, batch: 37668, sum loss: 4490.182129, avg loss: 2.618182, ppl: 13.710774 +epoch: 0, batch: 37669, sum loss: 4632.674805, avg loss: 2.857912, ppl: 17.425098 +epoch: 0, batch: 37670, sum loss: 3030.867188, avg loss: 2.096035, ppl: 8.133859 +epoch: 0, batch: 37671, sum loss: 4998.410645, avg loss: 2.973475, ppl: 19.559763 +epoch: 0, batch: 37672, sum loss: 4517.174316, avg loss: 2.923738, ppl: 18.610716 +epoch: 0, batch: 37673, sum loss: 4417.313477, avg loss: 2.889021, ppl: 17.975710 +epoch: 0, batch: 37674, sum loss: 4829.635254, avg loss: 2.946696, ppl: 19.042936 +epoch: 0, batch: 37675, sum loss: 4783.683105, avg loss: 3.004826, ppl: 20.182705 +epoch: 0, batch: 37676, sum loss: 4867.182617, avg loss: 3.040089, ppl: 20.907106 +epoch: 0, batch: 37677, sum loss: 5622.217285, avg loss: 2.899545, ppl: 18.165873 +epoch: 0, batch: 37678, sum loss: 4751.926270, avg loss: 2.759539, ppl: 15.792559 +epoch: 0, batch: 37679, sum loss: 5037.580078, avg loss: 2.956326, ppl: 19.227211 +epoch: 0, batch: 37680, sum loss: 4492.400879, avg loss: 2.523821, ppl: 12.476172 +epoch: 0, batch: 37681, sum loss: 4826.772461, avg loss: 3.078299, ppl: 21.721418 +epoch: 0, batch: 37682, sum loss: 4295.277832, avg loss: 2.875019, ppl: 17.725754 +epoch: 0, batch: 37683, sum loss: 4907.518555, avg loss: 2.723373, ppl: 15.231618 +epoch: 0, batch: 37684, sum loss: 4883.305664, avg loss: 3.023719, ppl: 20.567633 +epoch: 0, batch: 37685, sum loss: 4346.276367, avg loss: 2.938659, ppl: 18.890491 +epoch: 0, batch: 37686, sum loss: 4612.286621, avg loss: 2.862996, ppl: 17.513922 +epoch: 0, batch: 37687, sum loss: 4861.056641, avg loss: 2.718712, ppl: 15.160780 +epoch: 0, batch: 37688, sum loss: 5114.675781, avg loss: 2.936094, ppl: 18.842106 +epoch: 0, batch: 37689, sum loss: 5219.692871, avg loss: 2.977577, ppl: 19.640179 +epoch: 0, batch: 37690, sum loss: 5322.587891, avg loss: 2.905343, ppl: 18.271507 +epoch: 0, batch: 37691, sum loss: 5164.883301, avg loss: 2.901620, ppl: 18.203609 +epoch: 0, batch: 37692, sum loss: 5505.530762, avg loss: 3.080879, ppl: 21.777536 +epoch: 0, batch: 37693, sum loss: 4836.066406, avg loss: 2.883760, ppl: 17.881390 +epoch: 0, batch: 37694, sum loss: 5045.876953, avg loss: 2.909964, ppl: 18.356131 +epoch: 0, batch: 37695, sum loss: 5022.973633, avg loss: 3.016801, ppl: 20.425848 +epoch: 0, batch: 37696, sum loss: 4330.002930, avg loss: 2.439438, ppl: 11.466596 +epoch: 0, batch: 37697, sum loss: 4316.910645, avg loss: 2.801370, ppl: 16.467186 +epoch: 0, batch: 37698, sum loss: 4847.254395, avg loss: 2.957446, ppl: 19.248749 +epoch: 0, batch: 37699, sum loss: 4654.871094, avg loss: 2.927592, ppl: 18.682585 +epoch: 0, batch: 37700, sum loss: 4808.100098, avg loss: 2.860262, ppl: 17.466097 +epoch: 0, batch: 37701, sum loss: 4899.829102, avg loss: 2.911366, ppl: 18.381891 +epoch: 0, batch: 37702, sum loss: 4472.492676, avg loss: 2.663784, ppl: 14.350482 +epoch: 0, batch: 37703, sum loss: 5308.976562, avg loss: 2.985926, ppl: 19.804836 +epoch: 0, batch: 37704, sum loss: 5368.149902, avg loss: 3.032853, ppl: 20.756369 +epoch: 0, batch: 37705, sum loss: 5207.191895, avg loss: 2.876902, ppl: 17.759167 +epoch: 0, batch: 37706, sum loss: 4516.715820, avg loss: 2.794997, ppl: 16.362587 +epoch: 0, batch: 37707, sum loss: 4808.945312, avg loss: 2.848901, ppl: 17.268797 +epoch: 0, batch: 37708, sum loss: 4365.436523, avg loss: 2.782305, ppl: 16.156218 +epoch: 0, batch: 37709, sum loss: 5381.878906, avg loss: 3.184544, ppl: 24.156261 +epoch: 0, batch: 37710, sum loss: 4883.029785, avg loss: 2.623874, ppl: 13.789042 +epoch: 0, batch: 37711, sum loss: 4135.978027, avg loss: 2.705022, ppl: 14.954640 +epoch: 0, batch: 37712, sum loss: 5231.283203, avg loss: 3.009944, ppl: 20.286272 +epoch: 0, batch: 37713, sum loss: 4907.700195, avg loss: 2.845044, ppl: 17.202311 +epoch: 0, batch: 37714, sum loss: 5120.843262, avg loss: 2.838605, ppl: 17.091904 +epoch: 0, batch: 37715, sum loss: 5380.403809, avg loss: 2.932100, ppl: 18.767006 +epoch: 0, batch: 37716, sum loss: 4383.313477, avg loss: 2.762012, ppl: 15.831672 +epoch: 0, batch: 37717, sum loss: 5884.115234, avg loss: 3.042459, ppl: 20.956707 +epoch: 0, batch: 37718, sum loss: 6003.651855, avg loss: 3.013882, ppl: 20.366302 +epoch: 0, batch: 37719, sum loss: 5140.850586, avg loss: 3.031162, ppl: 20.721296 +epoch: 0, batch: 37720, sum loss: 4765.740723, avg loss: 2.909487, ppl: 18.347393 +epoch: 0, batch: 37721, sum loss: 5585.349609, avg loss: 2.921208, ppl: 18.563698 +epoch: 0, batch: 37722, sum loss: 4166.729980, avg loss: 2.805879, ppl: 16.541603 +epoch: 0, batch: 37723, sum loss: 4939.196289, avg loss: 3.028324, ppl: 20.662571 +epoch: 0, batch: 37724, sum loss: 4376.231934, avg loss: 2.536946, ppl: 12.641010 +epoch: 0, batch: 37725, sum loss: 5236.545898, avg loss: 2.966882, ppl: 19.431229 +epoch: 0, batch: 37726, sum loss: 5144.892090, avg loss: 2.970492, ppl: 19.501511 +epoch: 0, batch: 37727, sum loss: 4709.631348, avg loss: 2.896452, ppl: 18.109781 +epoch: 0, batch: 37728, sum loss: 4590.361328, avg loss: 2.933138, ppl: 18.786493 +epoch: 0, batch: 37729, sum loss: 5252.499023, avg loss: 2.921301, ppl: 18.565424 +epoch: 0, batch: 37730, sum loss: 4428.262207, avg loss: 2.783320, ppl: 16.172628 +epoch: 0, batch: 37731, sum loss: 4824.838867, avg loss: 2.592606, ppl: 13.364549 +epoch: 0, batch: 37732, sum loss: 5629.456055, avg loss: 3.036384, ppl: 20.829784 +epoch: 0, batch: 37733, sum loss: 4935.981934, avg loss: 2.931106, ppl: 18.748352 +epoch: 0, batch: 37734, sum loss: 4550.501953, avg loss: 2.721592, ppl: 15.204512 +epoch: 0, batch: 37735, sum loss: 5757.452148, avg loss: 2.975427, ppl: 19.597998 +epoch: 0, batch: 37736, sum loss: 5890.192383, avg loss: 3.000607, ppl: 20.097742 +epoch: 0, batch: 37737, sum loss: 3860.636719, avg loss: 2.601507, ppl: 13.484046 +epoch: 0, batch: 37738, sum loss: 4487.937500, avg loss: 2.721611, ppl: 15.204805 +epoch: 0, batch: 37739, sum loss: 3358.391846, avg loss: 2.473042, ppl: 11.858471 +epoch: 0, batch: 37740, sum loss: 5980.315430, avg loss: 3.132695, ppl: 22.935719 +epoch: 0, batch: 37741, sum loss: 5859.567871, avg loss: 3.158797, ppl: 23.542253 +epoch: 0, batch: 37742, sum loss: 5284.626953, avg loss: 3.004336, ppl: 20.172819 +epoch: 0, batch: 37743, sum loss: 4445.583496, avg loss: 2.752683, ppl: 15.684664 +epoch: 0, batch: 37744, sum loss: 5315.641113, avg loss: 3.184926, ppl: 24.165495 +epoch: 0, batch: 37745, sum loss: 4929.832520, avg loss: 3.002334, ppl: 20.132473 +epoch: 0, batch: 37746, sum loss: 4532.818848, avg loss: 2.667933, ppl: 14.410159 +epoch: 0, batch: 37747, sum loss: 4523.639648, avg loss: 3.031930, ppl: 20.737221 +epoch: 0, batch: 37748, sum loss: 4267.850586, avg loss: 2.830140, ppl: 16.947826 +epoch: 0, batch: 37749, sum loss: 3774.171387, avg loss: 2.613692, ppl: 13.649352 +epoch: 0, batch: 37750, sum loss: 6252.012695, avg loss: 3.070733, ppl: 21.557705 +epoch: 0, batch: 37751, sum loss: 5076.016602, avg loss: 2.872675, ppl: 17.684259 +epoch: 0, batch: 37752, sum loss: 5829.754395, avg loss: 3.026872, ppl: 20.632586 +epoch: 0, batch: 37753, sum loss: 4440.125977, avg loss: 2.751008, ppl: 15.658401 +epoch: 0, batch: 37754, sum loss: 5698.071289, avg loss: 3.024454, ppl: 20.582766 +epoch: 0, batch: 37755, sum loss: 4691.185547, avg loss: 2.926504, ppl: 18.662266 +epoch: 0, batch: 37756, sum loss: 5677.248047, avg loss: 3.077099, ppl: 21.695375 +epoch: 0, batch: 37757, sum loss: 5539.371582, avg loss: 3.122532, ppl: 22.703785 +epoch: 0, batch: 37758, sum loss: 4969.411133, avg loss: 3.037538, ppl: 20.853828 +epoch: 0, batch: 37759, sum loss: 4552.240723, avg loss: 2.820471, ppl: 16.784760 +epoch: 0, batch: 37760, sum loss: 4051.521484, avg loss: 2.817470, ppl: 16.734457 +epoch: 0, batch: 37761, sum loss: 5214.135254, avg loss: 2.994908, ppl: 19.983528 +epoch: 0, batch: 37762, sum loss: 5271.905273, avg loss: 2.927210, ppl: 18.675451 +epoch: 0, batch: 37763, sum loss: 4814.915527, avg loss: 2.817388, ppl: 16.733084 +epoch: 0, batch: 37764, sum loss: 5015.285156, avg loss: 2.809684, ppl: 16.604668 +epoch: 0, batch: 37765, sum loss: 5610.937500, avg loss: 3.021506, ppl: 20.522181 +epoch: 0, batch: 37766, sum loss: 4858.444336, avg loss: 3.019543, ppl: 20.481922 +epoch: 0, batch: 37767, sum loss: 5355.476562, avg loss: 3.135525, ppl: 23.000702 +epoch: 0, batch: 37768, sum loss: 5791.952637, avg loss: 3.152941, ppl: 23.404802 +epoch: 0, batch: 37769, sum loss: 5115.145020, avg loss: 3.140052, ppl: 23.105070 +epoch: 0, batch: 37770, sum loss: 4883.973145, avg loss: 2.983490, ppl: 19.756647 +epoch: 0, batch: 37771, sum loss: 5340.364746, avg loss: 3.193998, ppl: 24.385735 +epoch: 0, batch: 37772, sum loss: 5225.738281, avg loss: 2.989553, ppl: 19.876795 +epoch: 0, batch: 37773, sum loss: 4902.965820, avg loss: 3.017210, ppl: 20.434196 +epoch: 0, batch: 37774, sum loss: 4834.219238, avg loss: 2.711284, ppl: 15.048588 +epoch: 0, batch: 37775, sum loss: 5212.882812, avg loss: 2.926942, ppl: 18.670443 +epoch: 0, batch: 37776, sum loss: 4514.128906, avg loss: 2.814295, ppl: 16.681408 +epoch: 0, batch: 37777, sum loss: 4942.320801, avg loss: 2.914104, ppl: 18.432293 +epoch: 0, batch: 37778, sum loss: 5985.100586, avg loss: 3.224731, ppl: 25.146809 +epoch: 0, batch: 37779, sum loss: 5134.935059, avg loss: 3.058329, ppl: 21.291956 +epoch: 0, batch: 37780, sum loss: 5133.004883, avg loss: 2.658211, ppl: 14.270732 +epoch: 0, batch: 37781, sum loss: 5155.362305, avg loss: 2.899529, ppl: 18.165586 +epoch: 0, batch: 37782, sum loss: 5165.099609, avg loss: 2.759135, ppl: 15.786175 +epoch: 0, batch: 37783, sum loss: 5176.684570, avg loss: 3.013204, ppl: 20.352507 +epoch: 0, batch: 37784, sum loss: 5836.422852, avg loss: 2.982332, ppl: 19.733772 +epoch: 0, batch: 37785, sum loss: 4585.676270, avg loss: 2.703819, ppl: 14.936660 +epoch: 0, batch: 37786, sum loss: 5864.211914, avg loss: 3.071877, ppl: 21.582365 +epoch: 0, batch: 37787, sum loss: 5735.680176, avg loss: 3.102044, ppl: 22.243378 +epoch: 0, batch: 37788, sum loss: 6110.135742, avg loss: 3.165874, ppl: 23.709446 +epoch: 0, batch: 37789, sum loss: 4328.553711, avg loss: 3.001771, ppl: 20.121140 +epoch: 0, batch: 37790, sum loss: 5423.969727, avg loss: 3.040342, ppl: 20.912392 +epoch: 0, batch: 37791, sum loss: 5131.513672, avg loss: 2.781308, ppl: 16.140121 +epoch: 0, batch: 37792, sum loss: 5729.155273, avg loss: 3.158300, ppl: 23.530552 +epoch: 0, batch: 37793, sum loss: 4367.199219, avg loss: 2.997391, ppl: 20.033207 +epoch: 0, batch: 37794, sum loss: 5243.492676, avg loss: 3.111865, ppl: 22.462900 +epoch: 0, batch: 37795, sum loss: 4738.881836, avg loss: 2.837654, ppl: 17.075657 +epoch: 0, batch: 37796, sum loss: 4706.439941, avg loss: 2.770124, ppl: 15.960608 +epoch: 0, batch: 37797, sum loss: 5700.008789, avg loss: 3.011098, ppl: 20.309687 +epoch: 0, batch: 37798, sum loss: 5084.287598, avg loss: 2.990758, ppl: 19.900755 +epoch: 0, batch: 37799, sum loss: 5306.175781, avg loss: 2.816442, ppl: 16.717257 +epoch: 0, batch: 37800, sum loss: 4779.049316, avg loss: 2.880681, ppl: 17.826405 +epoch: 0, batch: 37801, sum loss: 5146.313477, avg loss: 2.964466, ppl: 19.384356 +epoch: 0, batch: 37802, sum loss: 4803.481934, avg loss: 2.822257, ppl: 16.814764 +epoch: 0, batch: 37803, sum loss: 5769.424805, avg loss: 3.049379, ppl: 21.102234 +epoch: 0, batch: 37804, sum loss: 4580.896484, avg loss: 2.749638, ppl: 15.636964 +epoch: 0, batch: 37805, sum loss: 5198.558594, avg loss: 2.979117, ppl: 19.670433 +epoch: 0, batch: 37806, sum loss: 5737.751953, avg loss: 3.004059, ppl: 20.167221 +epoch: 0, batch: 37807, sum loss: 5148.315430, avg loss: 2.810216, ppl: 16.613506 +epoch: 0, batch: 37808, sum loss: 4918.587891, avg loss: 2.945262, ppl: 19.015648 +epoch: 0, batch: 37809, sum loss: 4133.996094, avg loss: 2.679194, ppl: 14.573342 +epoch: 0, batch: 37810, sum loss: 4634.158203, avg loss: 2.896349, ppl: 18.107908 +epoch: 0, batch: 37811, sum loss: 4390.016602, avg loss: 2.680108, ppl: 14.586662 +epoch: 0, batch: 37812, sum loss: 4717.154297, avg loss: 2.794523, ppl: 16.354822 +epoch: 0, batch: 37813, sum loss: 5175.970703, avg loss: 2.972987, ppl: 19.550232 +epoch: 0, batch: 37814, sum loss: 5378.853516, avg loss: 3.082438, ppl: 21.811504 +epoch: 0, batch: 37815, sum loss: 3794.336914, avg loss: 2.751513, ppl: 15.666321 +epoch: 0, batch: 37816, sum loss: 4120.425293, avg loss: 2.551347, ppl: 12.824367 +epoch: 0, batch: 37817, sum loss: 5587.031250, avg loss: 2.897838, ppl: 18.134893 +epoch: 0, batch: 37818, sum loss: 5345.061035, avg loss: 2.933623, ppl: 18.795605 +epoch: 0, batch: 37819, sum loss: 5071.098633, avg loss: 2.990035, ppl: 19.886370 +epoch: 0, batch: 37820, sum loss: 5395.150391, avg loss: 2.793967, ppl: 16.345736 +epoch: 0, batch: 37821, sum loss: 4945.711914, avg loss: 2.883797, ppl: 17.882042 +epoch: 0, batch: 37822, sum loss: 5936.619629, avg loss: 3.129478, ppl: 22.862041 +epoch: 0, batch: 37823, sum loss: 5973.956543, avg loss: 3.101743, ppl: 22.236675 +epoch: 0, batch: 37824, sum loss: 4053.678223, avg loss: 2.661640, ppl: 14.319760 +epoch: 0, batch: 37825, sum loss: 4016.389160, avg loss: 2.701001, ppl: 14.894640 +epoch: 0, batch: 37826, sum loss: 4998.911133, avg loss: 2.716799, ppl: 15.131815 +epoch: 0, batch: 37827, sum loss: 5067.637207, avg loss: 2.982718, ppl: 19.741396 +epoch: 0, batch: 37828, sum loss: 5594.756348, avg loss: 3.125562, ppl: 22.772694 +epoch: 0, batch: 37829, sum loss: 4047.829834, avg loss: 2.801266, ppl: 16.465487 +epoch: 0, batch: 37830, sum loss: 4831.972168, avg loss: 2.817476, ppl: 16.734564 +epoch: 0, batch: 37831, sum loss: 4898.775391, avg loss: 3.079054, ppl: 21.737837 +epoch: 0, batch: 37832, sum loss: 4765.990723, avg loss: 2.952906, ppl: 19.161564 +epoch: 0, batch: 37833, sum loss: 5380.816406, avg loss: 2.896026, ppl: 18.102062 +epoch: 0, batch: 37834, sum loss: 6665.194824, avg loss: 3.280116, ppl: 26.578846 +epoch: 0, batch: 37835, sum loss: 4546.540527, avg loss: 2.809976, ppl: 16.609514 +epoch: 0, batch: 37836, sum loss: 5181.876465, avg loss: 2.904639, ppl: 18.258659 +epoch: 0, batch: 37837, sum loss: 4004.981445, avg loss: 2.695142, ppl: 14.807625 +epoch: 0, batch: 37838, sum loss: 4223.629395, avg loss: 2.797106, ppl: 16.397118 +epoch: 0, batch: 37839, sum loss: 4320.760742, avg loss: 2.850106, ppl: 17.289619 +epoch: 0, batch: 37840, sum loss: 5031.071289, avg loss: 3.115214, ppl: 22.538261 +epoch: 0, batch: 37841, sum loss: 5248.553711, avg loss: 2.930516, ppl: 18.737288 +epoch: 0, batch: 37842, sum loss: 4518.513672, avg loss: 2.758555, ppl: 15.777031 +epoch: 0, batch: 37843, sum loss: 5069.044434, avg loss: 2.945406, ppl: 19.018391 +epoch: 0, batch: 37844, sum loss: 5256.015625, avg loss: 2.934682, ppl: 18.815517 +epoch: 0, batch: 37845, sum loss: 4753.089844, avg loss: 2.979994, ppl: 19.687691 +epoch: 0, batch: 37846, sum loss: 5154.253906, avg loss: 2.748935, ppl: 15.625989 +epoch: 0, batch: 37847, sum loss: 5197.479004, avg loss: 3.062746, ppl: 21.386194 +epoch: 0, batch: 37848, sum loss: 5671.435547, avg loss: 3.163099, ppl: 23.643744 +epoch: 0, batch: 37849, sum loss: 3953.792725, avg loss: 2.704373, ppl: 14.944938 +epoch: 0, batch: 37850, sum loss: 5166.759766, avg loss: 2.950748, ppl: 19.120255 +epoch: 0, batch: 37851, sum loss: 4092.835693, avg loss: 2.685588, ppl: 14.666818 +epoch: 0, batch: 37852, sum loss: 6105.434570, avg loss: 3.277206, ppl: 26.501616 +epoch: 0, batch: 37853, sum loss: 5599.067871, avg loss: 3.047941, ppl: 21.071917 +epoch: 0, batch: 37854, sum loss: 3540.276367, avg loss: 2.595510, ppl: 13.403428 +epoch: 0, batch: 37855, sum loss: 6101.238770, avg loss: 3.023409, ppl: 20.561264 +epoch: 0, batch: 37856, sum loss: 4089.060059, avg loss: 2.679594, ppl: 14.579170 +epoch: 0, batch: 37857, sum loss: 5151.898438, avg loss: 3.004022, ppl: 20.166491 +epoch: 0, batch: 37858, sum loss: 4675.665039, avg loss: 2.929615, ppl: 18.720417 +epoch: 0, batch: 37859, sum loss: 5152.894531, avg loss: 2.936122, ppl: 18.842636 +epoch: 0, batch: 37860, sum loss: 4717.126953, avg loss: 3.033522, ppl: 20.770264 +epoch: 0, batch: 37861, sum loss: 4960.453613, avg loss: 2.982834, ppl: 19.743698 +epoch: 0, batch: 37862, sum loss: 4958.163086, avg loss: 2.899511, ppl: 18.165253 +epoch: 0, batch: 37863, sum loss: 5324.636719, avg loss: 2.976320, ppl: 19.615499 +epoch: 0, batch: 37864, sum loss: 4392.081055, avg loss: 2.622138, ppl: 13.765122 +epoch: 0, batch: 37865, sum loss: 5149.277344, avg loss: 2.973024, ppl: 19.550951 +epoch: 0, batch: 37866, sum loss: 5031.875000, avg loss: 2.949516, ppl: 19.096714 +epoch: 0, batch: 37867, sum loss: 4567.843750, avg loss: 2.760026, ppl: 15.800261 +epoch: 0, batch: 37868, sum loss: 5056.560547, avg loss: 3.180227, ppl: 24.052208 +epoch: 0, batch: 37869, sum loss: 5489.462891, avg loss: 3.044627, ppl: 21.002199 +epoch: 0, batch: 37870, sum loss: 4562.149414, avg loss: 2.548687, ppl: 12.790296 +epoch: 0, batch: 37871, sum loss: 5991.088379, avg loss: 3.058238, ppl: 21.290016 +epoch: 0, batch: 37872, sum loss: 4968.033203, avg loss: 2.978437, ppl: 19.657072 +epoch: 0, batch: 37873, sum loss: 4036.088379, avg loss: 2.723407, ppl: 15.232123 +epoch: 0, batch: 37874, sum loss: 6471.163574, avg loss: 3.243691, ppl: 25.628139 +epoch: 0, batch: 37875, sum loss: 4858.470703, avg loss: 2.795438, ppl: 16.369795 +epoch: 0, batch: 37876, sum loss: 6303.017578, avg loss: 3.197878, ppl: 24.480524 +epoch: 0, batch: 37877, sum loss: 4917.651855, avg loss: 3.102619, ppl: 22.256172 +epoch: 0, batch: 37878, sum loss: 5730.435547, avg loss: 3.035188, ppl: 20.804897 +epoch: 0, batch: 37879, sum loss: 6660.248535, avg loss: 3.258439, ppl: 26.008894 +epoch: 0, batch: 37880, sum loss: 4115.326172, avg loss: 2.653337, ppl: 14.201353 +epoch: 0, batch: 37881, sum loss: 4677.104492, avg loss: 2.779028, ppl: 16.103363 +epoch: 0, batch: 37882, sum loss: 5740.464844, avg loss: 3.091257, ppl: 22.004723 +epoch: 0, batch: 37883, sum loss: 4687.693359, avg loss: 2.927979, ppl: 18.689812 +epoch: 0, batch: 37884, sum loss: 4919.029785, avg loss: 2.943764, ppl: 18.987185 +epoch: 0, batch: 37885, sum loss: 4684.675293, avg loss: 2.877565, ppl: 17.770943 +epoch: 0, batch: 37886, sum loss: 6046.649902, avg loss: 3.070924, ppl: 21.561823 +epoch: 0, batch: 37887, sum loss: 4982.570312, avg loss: 2.932649, ppl: 18.777304 +epoch: 0, batch: 37888, sum loss: 5381.363770, avg loss: 3.083876, ppl: 21.842899 +epoch: 0, batch: 37889, sum loss: 5694.648438, avg loss: 2.838808, ppl: 17.095373 +epoch: 0, batch: 37890, sum loss: 5383.922363, avg loss: 2.908656, ppl: 18.332146 +epoch: 0, batch: 37891, sum loss: 5462.871094, avg loss: 2.986808, ppl: 19.822306 +epoch: 0, batch: 37892, sum loss: 4999.961914, avg loss: 2.993989, ppl: 19.965170 +epoch: 0, batch: 37893, sum loss: 4701.312500, avg loss: 2.907429, ppl: 18.309664 +epoch: 0, batch: 37894, sum loss: 4588.537598, avg loss: 2.737791, ppl: 15.452813 +epoch: 0, batch: 37895, sum loss: 5794.305176, avg loss: 3.176702, ppl: 23.967590 +epoch: 0, batch: 37896, sum loss: 4378.908203, avg loss: 2.896103, ppl: 18.103466 +epoch: 0, batch: 37897, sum loss: 4455.111328, avg loss: 2.877979, ppl: 17.778307 +epoch: 0, batch: 37898, sum loss: 4428.445312, avg loss: 2.767778, ppl: 15.923216 +epoch: 0, batch: 37899, sum loss: 3762.822266, avg loss: 2.687730, ppl: 14.698274 +epoch: 0, batch: 37900, sum loss: 4455.572266, avg loss: 2.725121, ppl: 15.258253 +epoch: 0, batch: 37901, sum loss: 5586.461914, avg loss: 3.184984, ppl: 24.166901 +epoch: 0, batch: 37902, sum loss: 4896.024414, avg loss: 2.912566, ppl: 18.403971 +epoch: 0, batch: 37903, sum loss: 4661.589844, avg loss: 2.710227, ppl: 15.032681 +epoch: 0, batch: 37904, sum loss: 5580.635742, avg loss: 3.237028, ppl: 25.457945 +epoch: 0, batch: 37905, sum loss: 4732.818359, avg loss: 2.669384, ppl: 14.431084 +epoch: 0, batch: 37906, sum loss: 4856.830078, avg loss: 2.922281, ppl: 18.583620 +epoch: 0, batch: 37907, sum loss: 4219.747070, avg loss: 2.691165, ppl: 14.748852 +epoch: 0, batch: 37908, sum loss: 3580.358643, avg loss: 2.417528, ppl: 11.218093 +epoch: 0, batch: 37909, sum loss: 4630.581055, avg loss: 3.162965, ppl: 23.640589 +epoch: 0, batch: 37910, sum loss: 4879.464844, avg loss: 2.971660, ppl: 19.524296 +epoch: 0, batch: 37911, sum loss: 5057.833496, avg loss: 2.808347, ppl: 16.582489 +epoch: 0, batch: 37912, sum loss: 6127.759277, avg loss: 3.018601, ppl: 20.462639 +epoch: 0, batch: 37913, sum loss: 5329.253906, avg loss: 3.080493, ppl: 21.769142 +epoch: 0, batch: 37914, sum loss: 6458.883789, avg loss: 3.258771, ppl: 26.017534 +epoch: 0, batch: 37915, sum loss: 6678.558105, avg loss: 3.469381, ppl: 32.116844 +epoch: 0, batch: 37916, sum loss: 4829.132324, avg loss: 2.825706, ppl: 16.872862 +epoch: 0, batch: 37917, sum loss: 4864.724121, avg loss: 2.875133, ppl: 17.727774 +epoch: 0, batch: 37918, sum loss: 6142.066406, avg loss: 3.025649, ppl: 20.607367 +epoch: 0, batch: 37919, sum loss: 5356.714844, avg loss: 3.004327, ppl: 20.172636 +epoch: 0, batch: 37920, sum loss: 6148.088867, avg loss: 3.149636, ppl: 23.327566 +epoch: 0, batch: 37921, sum loss: 4784.938965, avg loss: 2.848178, ppl: 17.256315 +epoch: 0, batch: 37922, sum loss: 4899.645508, avg loss: 2.599282, ppl: 13.454068 +epoch: 0, batch: 37923, sum loss: 5375.574707, avg loss: 3.077032, ppl: 21.693916 +epoch: 0, batch: 37924, sum loss: 5656.182129, avg loss: 3.118072, ppl: 22.602755 +epoch: 0, batch: 37925, sum loss: 5019.213867, avg loss: 2.859951, ppl: 17.460667 +epoch: 0, batch: 37926, sum loss: 4900.340332, avg loss: 2.785867, ppl: 16.213873 +epoch: 0, batch: 37927, sum loss: 4188.105957, avg loss: 2.570967, ppl: 13.078471 +epoch: 0, batch: 37928, sum loss: 4573.960449, avg loss: 2.777147, ppl: 16.073092 +epoch: 0, batch: 37929, sum loss: 4972.660156, avg loss: 2.916516, ppl: 18.476809 +epoch: 0, batch: 37930, sum loss: 4389.979004, avg loss: 2.754065, ppl: 15.706342 +epoch: 0, batch: 37931, sum loss: 5156.115234, avg loss: 2.862918, ppl: 17.512552 +epoch: 0, batch: 37932, sum loss: 5071.244141, avg loss: 2.886308, ppl: 17.927008 +epoch: 0, batch: 37933, sum loss: 5431.252930, avg loss: 3.077197, ppl: 21.697500 +epoch: 0, batch: 37934, sum loss: 5020.370117, avg loss: 3.017050, ppl: 20.430922 +epoch: 0, batch: 37935, sum loss: 4806.567871, avg loss: 2.956069, ppl: 19.222256 +epoch: 0, batch: 37936, sum loss: 4306.642090, avg loss: 2.725723, ppl: 15.267446 +epoch: 0, batch: 37937, sum loss: 5894.932617, avg loss: 3.415372, ppl: 30.428270 +epoch: 0, batch: 37938, sum loss: 4014.031738, avg loss: 2.663591, ppl: 14.347721 +epoch: 0, batch: 37939, sum loss: 5949.617188, avg loss: 3.207341, ppl: 24.713280 +epoch: 0, batch: 37940, sum loss: 4684.027344, avg loss: 2.820004, ppl: 16.776926 +epoch: 0, batch: 37941, sum loss: 4632.905762, avg loss: 3.088604, ppl: 21.946413 +epoch: 0, batch: 37942, sum loss: 4094.866943, avg loss: 2.755630, ppl: 15.730956 +epoch: 0, batch: 37943, sum loss: 3974.630127, avg loss: 2.752514, ppl: 15.682005 +epoch: 0, batch: 37944, sum loss: 5755.353516, avg loss: 3.037126, ppl: 20.845243 +epoch: 0, batch: 37945, sum loss: 4283.997559, avg loss: 2.733885, ppl: 15.392571 +epoch: 0, batch: 37946, sum loss: 5467.131348, avg loss: 2.860875, ppl: 17.476807 +epoch: 0, batch: 37947, sum loss: 5358.661133, avg loss: 2.973730, ppl: 19.564756 +epoch: 0, batch: 37948, sum loss: 4796.699707, avg loss: 2.790401, ppl: 16.287558 +epoch: 0, batch: 37949, sum loss: 4789.509766, avg loss: 2.871409, ppl: 17.661880 +epoch: 0, batch: 37950, sum loss: 4764.229492, avg loss: 3.024908, ppl: 20.592102 +epoch: 0, batch: 37951, sum loss: 4093.612305, avg loss: 2.854681, ppl: 17.368893 +epoch: 0, batch: 37952, sum loss: 5023.020020, avg loss: 3.057225, ppl: 21.268444 +epoch: 0, batch: 37953, sum loss: 4290.878906, avg loss: 2.938958, ppl: 18.896149 +epoch: 0, batch: 37954, sum loss: 4561.662109, avg loss: 2.833330, ppl: 17.001989 +epoch: 0, batch: 37955, sum loss: 4647.780762, avg loss: 2.675752, ppl: 14.523266 +epoch: 0, batch: 37956, sum loss: 5745.729004, avg loss: 3.040068, ppl: 20.906673 +epoch: 0, batch: 37957, sum loss: 3953.165039, avg loss: 2.758664, ppl: 15.778747 +epoch: 0, batch: 37958, sum loss: 4999.275391, avg loss: 3.033541, ppl: 20.770651 +epoch: 0, batch: 37959, sum loss: 4375.623047, avg loss: 2.813906, ppl: 16.674919 +epoch: 0, batch: 37960, sum loss: 5644.712891, avg loss: 2.903659, ppl: 18.240768 +epoch: 0, batch: 37961, sum loss: 5746.953613, avg loss: 3.111507, ppl: 22.454868 +epoch: 0, batch: 37962, sum loss: 4966.947266, avg loss: 2.897869, ppl: 18.135456 +epoch: 0, batch: 37963, sum loss: 4332.344727, avg loss: 2.826057, ppl: 16.878771 +epoch: 0, batch: 37964, sum loss: 5101.253906, avg loss: 2.953824, ppl: 19.179155 +epoch: 0, batch: 37965, sum loss: 5387.575195, avg loss: 2.969997, ppl: 19.491869 +epoch: 0, batch: 37966, sum loss: 5280.734863, avg loss: 2.960053, ppl: 19.298998 +epoch: 0, batch: 37967, sum loss: 6177.149902, avg loss: 2.978375, ppl: 19.655849 +epoch: 0, batch: 37968, sum loss: 3964.745117, avg loss: 2.659118, ppl: 14.283688 +epoch: 0, batch: 37969, sum loss: 4597.425293, avg loss: 2.813602, ppl: 16.669851 +epoch: 0, batch: 37970, sum loss: 4705.160156, avg loss: 2.711908, ppl: 15.057977 +epoch: 0, batch: 37971, sum loss: 4914.049316, avg loss: 2.819305, ppl: 16.765202 +epoch: 0, batch: 37972, sum loss: 4206.975098, avg loss: 2.595296, ppl: 13.400556 +epoch: 0, batch: 37973, sum loss: 4642.583496, avg loss: 2.707046, ppl: 14.984938 +epoch: 0, batch: 37974, sum loss: 4988.425781, avg loss: 2.835944, ppl: 17.046488 +epoch: 0, batch: 37975, sum loss: 4740.712402, avg loss: 2.764264, ppl: 15.867352 +epoch: 0, batch: 37976, sum loss: 4057.302734, avg loss: 2.662272, ppl: 14.328811 +epoch: 0, batch: 37977, sum loss: 4108.027832, avg loss: 2.872747, ppl: 17.685534 +epoch: 0, batch: 37978, sum loss: 4380.157715, avg loss: 2.616582, ppl: 13.688854 +epoch: 0, batch: 37979, sum loss: 4162.853516, avg loss: 2.600158, ppl: 13.465872 +epoch: 0, batch: 37980, sum loss: 4118.338379, avg loss: 2.599961, ppl: 13.463214 +epoch: 0, batch: 37981, sum loss: 4489.184082, avg loss: 2.750726, ppl: 15.653989 +epoch: 0, batch: 37982, sum loss: 5477.966309, avg loss: 3.011526, ppl: 20.318390 +epoch: 0, batch: 37983, sum loss: 4929.101562, avg loss: 2.969338, ppl: 19.479029 +epoch: 0, batch: 37984, sum loss: 4675.397461, avg loss: 2.776364, ppl: 16.060524 +epoch: 0, batch: 37985, sum loss: 4627.647949, avg loss: 2.774369, ppl: 16.028513 +epoch: 0, batch: 37986, sum loss: 5195.058594, avg loss: 2.870198, ppl: 17.640514 +epoch: 0, batch: 37987, sum loss: 4910.861816, avg loss: 3.007264, ppl: 20.231962 +epoch: 0, batch: 37988, sum loss: 6083.747070, avg loss: 3.253341, ppl: 25.876642 +epoch: 0, batch: 37989, sum loss: 4860.370117, avg loss: 2.894800, ppl: 18.079893 +epoch: 0, batch: 37990, sum loss: 3878.638184, avg loss: 2.686038, ppl: 14.673418 +epoch: 0, batch: 37991, sum loss: 4508.991211, avg loss: 2.746036, ppl: 15.580751 +epoch: 0, batch: 37992, sum loss: 4480.688477, avg loss: 2.725480, ppl: 15.263733 +epoch: 0, batch: 37993, sum loss: 4754.855469, avg loss: 2.775748, ppl: 16.050621 +epoch: 0, batch: 37994, sum loss: 4218.653809, avg loss: 2.784590, ppl: 16.193178 +epoch: 0, batch: 37995, sum loss: 5556.636719, avg loss: 3.059822, ppl: 21.323757 +epoch: 0, batch: 37996, sum loss: 4061.825928, avg loss: 2.814848, ppl: 16.690641 +epoch: 0, batch: 37997, sum loss: 4323.364746, avg loss: 2.915283, ppl: 18.454039 +epoch: 0, batch: 37998, sum loss: 5345.810547, avg loss: 3.088279, ppl: 21.939283 +epoch: 0, batch: 37999, sum loss: 4151.519531, avg loss: 2.891030, ppl: 18.011858 +epoch: 0, batch: 38000, sum loss: 5559.661133, avg loss: 2.895657, ppl: 18.095387 +epoch: 0, batch: 38001, sum loss: 5213.859375, avg loss: 2.909520, ppl: 18.347988 +epoch: 0, batch: 38002, sum loss: 6383.561523, avg loss: 3.391903, ppl: 29.722458 +epoch: 0, batch: 38003, sum loss: 4517.630859, avg loss: 2.802500, ppl: 16.485817 +epoch: 0, batch: 38004, sum loss: 4489.778320, avg loss: 2.790415, ppl: 16.287783 +epoch: 0, batch: 38005, sum loss: 6022.897461, avg loss: 3.038798, ppl: 20.880127 +epoch: 0, batch: 38006, sum loss: 4805.890625, avg loss: 2.903862, ppl: 18.244461 +epoch: 0, batch: 38007, sum loss: 4190.218262, avg loss: 2.596170, ppl: 13.412270 +epoch: 0, batch: 38008, sum loss: 5382.378418, avg loss: 2.988550, ppl: 19.856869 +epoch: 0, batch: 38009, sum loss: 4515.023438, avg loss: 2.918567, ppl: 18.514740 +epoch: 0, batch: 38010, sum loss: 4337.043457, avg loss: 2.920568, ppl: 18.551817 +epoch: 0, batch: 38011, sum loss: 4996.765137, avg loss: 2.799308, ppl: 16.433275 +epoch: 0, batch: 38012, sum loss: 4446.655273, avg loss: 2.614142, ppl: 13.655491 +epoch: 0, batch: 38013, sum loss: 5799.231445, avg loss: 3.265333, ppl: 26.188835 +epoch: 0, batch: 38014, sum loss: 5029.684570, avg loss: 3.131809, ppl: 22.915384 +epoch: 0, batch: 38015, sum loss: 4447.639160, avg loss: 2.816744, ppl: 16.722315 +epoch: 0, batch: 38016, sum loss: 4607.154297, avg loss: 2.957095, ppl: 19.242001 +epoch: 0, batch: 38017, sum loss: 5426.073730, avg loss: 3.140089, ppl: 23.105925 +epoch: 0, batch: 38018, sum loss: 4916.840332, avg loss: 2.992599, ppl: 19.937433 +epoch: 0, batch: 38019, sum loss: 4989.966797, avg loss: 2.838434, ppl: 17.088980 +epoch: 0, batch: 38020, sum loss: 5384.320312, avg loss: 2.847340, ppl: 17.241859 +epoch: 0, batch: 38021, sum loss: 4854.547852, avg loss: 2.969142, ppl: 19.475212 +epoch: 0, batch: 38022, sum loss: 3712.510010, avg loss: 2.450502, ppl: 11.594162 +epoch: 0, batch: 38023, sum loss: 4313.029297, avg loss: 2.745404, ppl: 15.570904 +epoch: 0, batch: 38024, sum loss: 4703.100586, avg loss: 2.883569, ppl: 17.877962 +epoch: 0, batch: 38025, sum loss: 4891.669922, avg loss: 3.021415, ppl: 20.520298 +epoch: 0, batch: 38026, sum loss: 5239.196777, avg loss: 3.122287, ppl: 22.698221 +epoch: 0, batch: 38027, sum loss: 5354.975586, avg loss: 2.971685, ppl: 19.524786 +epoch: 0, batch: 38028, sum loss: 3754.141113, avg loss: 2.638188, ppl: 13.987830 +epoch: 0, batch: 38029, sum loss: 4681.391602, avg loss: 2.596446, ppl: 13.415970 +epoch: 0, batch: 38030, sum loss: 4396.886719, avg loss: 2.642360, ppl: 14.046313 +epoch: 0, batch: 38031, sum loss: 6214.609863, avg loss: 3.056866, ppl: 21.260830 +epoch: 0, batch: 38032, sum loss: 3990.730957, avg loss: 2.665819, ppl: 14.379721 +epoch: 0, batch: 38033, sum loss: 4253.513672, avg loss: 2.927401, ppl: 18.679022 +epoch: 0, batch: 38034, sum loss: 5920.962891, avg loss: 2.996439, ppl: 20.014133 +epoch: 0, batch: 38035, sum loss: 5126.140625, avg loss: 2.899401, ppl: 18.163261 +epoch: 0, batch: 38036, sum loss: 4968.840332, avg loss: 2.769699, ppl: 15.953836 +epoch: 0, batch: 38037, sum loss: 5529.185059, avg loss: 3.019763, ppl: 20.486425 +epoch: 0, batch: 38038, sum loss: 4392.627441, avg loss: 2.903257, ppl: 18.233433 +epoch: 0, batch: 38039, sum loss: 5967.602051, avg loss: 3.032318, ppl: 20.745272 +epoch: 0, batch: 38040, sum loss: 5427.929199, avg loss: 2.941967, ppl: 18.953091 +epoch: 0, batch: 38041, sum loss: 4671.094238, avg loss: 2.755808, ppl: 15.733747 +epoch: 0, batch: 38042, sum loss: 5374.150391, avg loss: 3.083276, ppl: 21.829805 +epoch: 0, batch: 38043, sum loss: 4976.104492, avg loss: 2.879690, ppl: 17.808754 +epoch: 0, batch: 38044, sum loss: 4972.489258, avg loss: 3.024628, ppl: 20.586353 +epoch: 0, batch: 38045, sum loss: 4789.818359, avg loss: 3.072366, ppl: 21.592932 +epoch: 0, batch: 38046, sum loss: 5157.559570, avg loss: 2.879709, ppl: 17.809093 +epoch: 0, batch: 38047, sum loss: 5988.489258, avg loss: 3.140267, ppl: 23.110039 +epoch: 0, batch: 38048, sum loss: 4531.059082, avg loss: 2.708344, ppl: 15.004404 +epoch: 0, batch: 38049, sum loss: 4791.398926, avg loss: 2.853722, ppl: 17.352245 +epoch: 0, batch: 38050, sum loss: 4162.190430, avg loss: 2.844970, ppl: 17.201038 +epoch: 0, batch: 38051, sum loss: 4915.235840, avg loss: 2.906704, ppl: 18.296389 +epoch: 0, batch: 38052, sum loss: 4624.532715, avg loss: 2.723518, ppl: 15.233815 +epoch: 0, batch: 38053, sum loss: 4333.384766, avg loss: 2.929942, ppl: 18.726551 +epoch: 0, batch: 38054, sum loss: 5126.149902, avg loss: 2.892861, ppl: 18.044865 +epoch: 0, batch: 38055, sum loss: 4517.034180, avg loss: 2.828450, ppl: 16.919210 +epoch: 0, batch: 38056, sum loss: 4567.893066, avg loss: 2.977766, ppl: 19.643885 +epoch: 0, batch: 38057, sum loss: 5351.648438, avg loss: 2.848137, ppl: 17.255598 +epoch: 0, batch: 38058, sum loss: 4824.926758, avg loss: 2.917126, ppl: 18.488085 +epoch: 0, batch: 38059, sum loss: 5048.983887, avg loss: 2.938873, ppl: 18.894541 +epoch: 0, batch: 38060, sum loss: 4265.575684, avg loss: 2.626586, ppl: 13.826485 +epoch: 0, batch: 38061, sum loss: 4270.761719, avg loss: 2.748238, ppl: 15.615091 +epoch: 0, batch: 38062, sum loss: 4945.265625, avg loss: 2.905561, ppl: 18.275501 +epoch: 0, batch: 38063, sum loss: 4719.547363, avg loss: 2.692269, ppl: 14.765141 +epoch: 0, batch: 38064, sum loss: 5994.476074, avg loss: 3.085166, ppl: 21.871086 +epoch: 0, batch: 38065, sum loss: 5459.461426, avg loss: 2.963877, ppl: 19.372940 +epoch: 0, batch: 38066, sum loss: 4453.218750, avg loss: 2.589081, ppl: 13.317521 +epoch: 0, batch: 38067, sum loss: 6031.949219, avg loss: 3.269349, ppl: 26.294218 +epoch: 0, batch: 38068, sum loss: 4594.335938, avg loss: 2.864299, ppl: 17.536760 +epoch: 0, batch: 38069, sum loss: 4713.828613, avg loss: 2.814226, ppl: 16.680264 +epoch: 0, batch: 38070, sum loss: 4558.520020, avg loss: 2.808700, ppl: 16.588337 +epoch: 0, batch: 38071, sum loss: 5041.107910, avg loss: 2.920688, ppl: 18.554050 +epoch: 0, batch: 38072, sum loss: 4540.891602, avg loss: 2.804751, ppl: 16.522961 +epoch: 0, batch: 38073, sum loss: 5707.419922, avg loss: 3.158506, ppl: 23.535400 +epoch: 0, batch: 38074, sum loss: 5553.375977, avg loss: 3.039615, ppl: 20.897190 +epoch: 0, batch: 38075, sum loss: 4296.276367, avg loss: 2.627692, ppl: 13.841786 +epoch: 0, batch: 38076, sum loss: 6209.222656, avg loss: 3.140730, ppl: 23.120731 +epoch: 0, batch: 38077, sum loss: 5055.013672, avg loss: 2.886930, ppl: 17.938150 +epoch: 0, batch: 38078, sum loss: 5358.775391, avg loss: 2.780890, ppl: 16.133377 +epoch: 0, batch: 38079, sum loss: 4865.021484, avg loss: 2.988342, ppl: 19.852745 +epoch: 0, batch: 38080, sum loss: 4614.907227, avg loss: 2.956379, ppl: 19.228218 +epoch: 0, batch: 38081, sum loss: 5336.273926, avg loss: 3.174464, ppl: 23.913998 +epoch: 0, batch: 38082, sum loss: 4174.818848, avg loss: 2.612527, ppl: 13.633464 +epoch: 0, batch: 38083, sum loss: 4529.571777, avg loss: 2.740213, ppl: 15.490279 +epoch: 0, batch: 38084, sum loss: 4863.413086, avg loss: 2.859149, ppl: 17.446678 +epoch: 0, batch: 38085, sum loss: 5052.396484, avg loss: 2.971998, ppl: 19.530907 +epoch: 0, batch: 38086, sum loss: 5615.658203, avg loss: 2.860753, ppl: 17.474682 +epoch: 0, batch: 38087, sum loss: 4723.756836, avg loss: 2.752772, ppl: 15.686055 +epoch: 0, batch: 38088, sum loss: 4876.297852, avg loss: 2.820300, ppl: 16.781878 +epoch: 0, batch: 38089, sum loss: 5686.720703, avg loss: 3.169856, ppl: 23.804047 +epoch: 0, batch: 38090, sum loss: 4147.090820, avg loss: 2.807780, ppl: 16.573086 +epoch: 0, batch: 38091, sum loss: 4609.457031, avg loss: 2.870147, ppl: 17.639620 +epoch: 0, batch: 38092, sum loss: 5283.302734, avg loss: 2.946627, ppl: 19.041626 +epoch: 0, batch: 38093, sum loss: 4295.466309, avg loss: 2.946136, ppl: 19.032270 +epoch: 0, batch: 38094, sum loss: 5724.630859, avg loss: 3.076105, ppl: 21.673815 +epoch: 0, batch: 38095, sum loss: 5374.948242, avg loss: 3.148769, ppl: 23.307348 +epoch: 0, batch: 38096, sum loss: 3770.396973, avg loss: 2.750107, ppl: 15.644310 +epoch: 0, batch: 38097, sum loss: 5464.169922, avg loss: 2.979373, ppl: 19.675470 +epoch: 0, batch: 38098, sum loss: 4794.344727, avg loss: 2.810284, ppl: 16.614639 +epoch: 0, batch: 38099, sum loss: 3997.525391, avg loss: 2.734286, ppl: 15.398739 +epoch: 0, batch: 38100, sum loss: 4897.324219, avg loss: 2.816173, ppl: 16.712765 +epoch: 0, batch: 38101, sum loss: 5484.098633, avg loss: 3.067169, ppl: 21.481009 +epoch: 0, batch: 38102, sum loss: 5845.813477, avg loss: 3.051051, ppl: 21.137547 +epoch: 0, batch: 38103, sum loss: 5902.515137, avg loss: 2.954212, ppl: 19.186592 +epoch: 0, batch: 38104, sum loss: 4607.641602, avg loss: 2.965020, ppl: 19.395100 +epoch: 0, batch: 38105, sum loss: 5014.328613, avg loss: 3.163614, ppl: 23.655941 +epoch: 0, batch: 38106, sum loss: 4850.388672, avg loss: 3.016411, ppl: 20.417877 +epoch: 0, batch: 38107, sum loss: 4100.645020, avg loss: 2.553328, ppl: 12.849797 +epoch: 0, batch: 38108, sum loss: 5043.851074, avg loss: 2.927366, ppl: 18.678362 +epoch: 0, batch: 38109, sum loss: 4137.407715, avg loss: 2.630265, ppl: 13.877454 +epoch: 0, batch: 38110, sum loss: 5098.105469, avg loss: 3.009507, ppl: 20.277405 +epoch: 0, batch: 38111, sum loss: 4578.736328, avg loss: 2.500675, ppl: 12.190725 +epoch: 0, batch: 38112, sum loss: 4014.033691, avg loss: 2.681385, ppl: 14.605309 +epoch: 0, batch: 38113, sum loss: 5765.021973, avg loss: 3.171079, ppl: 23.833191 +epoch: 0, batch: 38114, sum loss: 3804.411865, avg loss: 2.618315, ppl: 13.712602 +epoch: 0, batch: 38115, sum loss: 4710.272949, avg loss: 2.956857, ppl: 19.237413 +epoch: 0, batch: 38116, sum loss: 4170.193848, avg loss: 2.644384, ppl: 14.074775 +epoch: 0, batch: 38117, sum loss: 4557.600098, avg loss: 2.727469, ppl: 15.294125 +epoch: 0, batch: 38118, sum loss: 5690.023438, avg loss: 3.104214, ppl: 22.291685 +epoch: 0, batch: 38119, sum loss: 4969.914551, avg loss: 3.028589, ppl: 20.668049 +epoch: 0, batch: 38120, sum loss: 5178.303711, avg loss: 2.962416, ppl: 19.344660 +epoch: 0, batch: 38121, sum loss: 4572.419922, avg loss: 2.967177, ppl: 19.436974 +epoch: 0, batch: 38122, sum loss: 4938.800781, avg loss: 3.026226, ppl: 20.619268 +epoch: 0, batch: 38123, sum loss: 4610.154785, avg loss: 2.762226, ppl: 15.835051 +epoch: 0, batch: 38124, sum loss: 4357.194336, avg loss: 2.777052, ppl: 16.071571 +epoch: 0, batch: 38125, sum loss: 5067.129883, avg loss: 2.725729, ppl: 15.267536 +epoch: 0, batch: 38126, sum loss: 5494.450195, avg loss: 3.120074, ppl: 22.648050 +epoch: 0, batch: 38127, sum loss: 4843.512695, avg loss: 3.130907, ppl: 22.894732 +epoch: 0, batch: 38128, sum loss: 4641.534180, avg loss: 2.661430, ppl: 14.316750 +epoch: 0, batch: 38129, sum loss: 4823.328613, avg loss: 3.107815, ppl: 22.372108 +epoch: 0, batch: 38130, sum loss: 4287.117188, avg loss: 2.865720, ppl: 17.561693 +epoch: 0, batch: 38131, sum loss: 4293.890625, avg loss: 2.670330, ppl: 14.444736 +epoch: 0, batch: 38132, sum loss: 4724.373047, avg loss: 2.812127, ppl: 16.645283 +epoch: 0, batch: 38133, sum loss: 5710.792480, avg loss: 3.160372, ppl: 23.579367 +epoch: 0, batch: 38134, sum loss: 4065.958984, avg loss: 2.654020, ppl: 14.211057 +epoch: 0, batch: 38135, sum loss: 5039.888672, avg loss: 2.954214, ppl: 19.186628 +epoch: 0, batch: 38136, sum loss: 5000.904785, avg loss: 2.957366, ppl: 19.247198 +epoch: 0, batch: 38137, sum loss: 4773.237305, avg loss: 2.951909, ppl: 19.142467 +epoch: 0, batch: 38138, sum loss: 4218.979004, avg loss: 2.677017, ppl: 14.541651 +epoch: 0, batch: 38139, sum loss: 4675.071777, avg loss: 2.799444, ppl: 16.435509 +epoch: 0, batch: 38140, sum loss: 5234.484863, avg loss: 3.046848, ppl: 21.048895 +epoch: 0, batch: 38141, sum loss: 4141.382324, avg loss: 2.488811, ppl: 12.046949 +epoch: 0, batch: 38142, sum loss: 5081.950195, avg loss: 2.963236, ppl: 19.360523 +epoch: 0, batch: 38143, sum loss: 4642.091309, avg loss: 2.786369, ppl: 16.222015 +epoch: 0, batch: 38144, sum loss: 6081.172363, avg loss: 2.992703, ppl: 19.939505 +epoch: 0, batch: 38145, sum loss: 4772.335938, avg loss: 3.067054, ppl: 21.478535 +epoch: 0, batch: 38146, sum loss: 4787.012207, avg loss: 3.096386, ppl: 22.117867 +epoch: 0, batch: 38147, sum loss: 5529.458008, avg loss: 2.968040, ppl: 19.453749 +epoch: 0, batch: 38148, sum loss: 5419.664062, avg loss: 3.009253, ppl: 20.272242 +epoch: 0, batch: 38149, sum loss: 4254.850586, avg loss: 2.720493, ppl: 15.187802 +epoch: 0, batch: 38150, sum loss: 4301.887207, avg loss: 2.817215, ppl: 16.730192 +epoch: 0, batch: 38151, sum loss: 5336.677734, avg loss: 3.242210, ppl: 25.590218 +epoch: 0, batch: 38152, sum loss: 5250.768555, avg loss: 2.877133, ppl: 17.763279 +epoch: 0, batch: 38153, sum loss: 4546.430664, avg loss: 2.620421, ppl: 13.741509 +epoch: 0, batch: 38154, sum loss: 4887.832520, avg loss: 3.060634, ppl: 21.341085 +epoch: 0, batch: 38155, sum loss: 4578.013672, avg loss: 2.940279, ppl: 18.921116 +epoch: 0, batch: 38156, sum loss: 5242.085449, avg loss: 2.992058, ppl: 19.926651 +epoch: 0, batch: 38157, sum loss: 4224.304199, avg loss: 2.604380, ppl: 13.522837 +epoch: 0, batch: 38158, sum loss: 5475.772949, avg loss: 3.010320, ppl: 20.293901 +epoch: 0, batch: 38159, sum loss: 4541.472168, avg loss: 2.770880, ppl: 15.972680 +epoch: 0, batch: 38160, sum loss: 4163.718750, avg loss: 2.688004, ppl: 14.702305 +epoch: 0, batch: 38161, sum loss: 3993.947754, avg loss: 2.669751, ppl: 14.436377 +epoch: 0, batch: 38162, sum loss: 4915.117676, avg loss: 3.015410, ppl: 20.397446 +epoch: 0, batch: 38163, sum loss: 5324.603027, avg loss: 3.067168, ppl: 21.480972 +epoch: 0, batch: 38164, sum loss: 5941.053711, avg loss: 3.171946, ppl: 23.853849 +epoch: 0, batch: 38165, sum loss: 4143.874512, avg loss: 2.786735, ppl: 16.227942 +epoch: 0, batch: 38166, sum loss: 4673.497070, avg loss: 2.725071, ppl: 15.257497 +epoch: 0, batch: 38167, sum loss: 5920.570801, avg loss: 3.354431, ppl: 28.629314 +epoch: 0, batch: 38168, sum loss: 4652.237793, avg loss: 2.747926, ppl: 15.610215 +epoch: 0, batch: 38169, sum loss: 5060.101074, avg loss: 3.013759, ppl: 20.363796 +epoch: 0, batch: 38170, sum loss: 4904.123535, avg loss: 3.001300, ppl: 20.111658 +epoch: 0, batch: 38171, sum loss: 4999.222168, avg loss: 2.848560, ppl: 17.262899 +epoch: 0, batch: 38172, sum loss: 4162.475586, avg loss: 2.771289, ppl: 15.979212 +epoch: 0, batch: 38173, sum loss: 5045.278809, avg loss: 3.041157, ppl: 20.929434 +epoch: 0, batch: 38174, sum loss: 4676.786621, avg loss: 2.813951, ppl: 16.675674 +epoch: 0, batch: 38175, sum loss: 5100.709473, avg loss: 2.931442, ppl: 18.754660 +epoch: 0, batch: 38176, sum loss: 5495.198730, avg loss: 3.087190, ppl: 21.915417 +epoch: 0, batch: 38177, sum loss: 4723.048828, avg loss: 2.722218, ppl: 15.214034 +epoch: 0, batch: 38178, sum loss: 4199.002930, avg loss: 2.681355, ppl: 14.604877 +epoch: 0, batch: 38179, sum loss: 5064.294434, avg loss: 2.693774, ppl: 14.787371 +epoch: 0, batch: 38180, sum loss: 4566.465332, avg loss: 2.919735, ppl: 18.536373 +epoch: 0, batch: 38181, sum loss: 3861.806152, avg loss: 2.715757, ppl: 15.116043 +epoch: 0, batch: 38182, sum loss: 4565.368164, avg loss: 2.947300, ppl: 19.054441 +epoch: 0, batch: 38183, sum loss: 4774.378906, avg loss: 2.993341, ppl: 19.952230 +epoch: 0, batch: 38184, sum loss: 5533.624023, avg loss: 3.015599, ppl: 20.401308 +epoch: 0, batch: 38185, sum loss: 4992.501953, avg loss: 2.664088, ppl: 14.354859 +epoch: 0, batch: 38186, sum loss: 4037.061768, avg loss: 2.673551, ppl: 14.491334 +epoch: 0, batch: 38187, sum loss: 4428.412598, avg loss: 2.695321, ppl: 14.810273 +epoch: 0, batch: 38188, sum loss: 4171.658203, avg loss: 2.636952, ppl: 13.970555 +epoch: 0, batch: 38189, sum loss: 3870.261719, avg loss: 2.762499, ppl: 15.839381 +epoch: 0, batch: 38190, sum loss: 4573.588379, avg loss: 3.016879, ppl: 20.427441 +epoch: 0, batch: 38191, sum loss: 5071.495117, avg loss: 3.105631, ppl: 22.323292 +epoch: 0, batch: 38192, sum loss: 3585.703125, avg loss: 2.652147, ppl: 14.184464 +epoch: 0, batch: 38193, sum loss: 5046.467285, avg loss: 2.977267, ppl: 19.634079 +epoch: 0, batch: 38194, sum loss: 5940.455566, avg loss: 3.269376, ppl: 26.294922 +epoch: 0, batch: 38195, sum loss: 5270.806152, avg loss: 3.073356, ppl: 21.614323 +epoch: 0, batch: 38196, sum loss: 5261.446289, avg loss: 3.122520, ppl: 22.703520 +epoch: 0, batch: 38197, sum loss: 4627.220703, avg loss: 2.821476, ppl: 16.801632 +epoch: 0, batch: 38198, sum loss: 5267.372559, avg loss: 2.969207, ppl: 19.476460 +epoch: 0, batch: 38199, sum loss: 5303.250977, avg loss: 3.175599, ppl: 23.941164 +epoch: 0, batch: 38200, sum loss: 5640.879883, avg loss: 2.928806, ppl: 18.705280 +epoch: 0, batch: 38201, sum loss: 4399.030273, avg loss: 2.759743, ppl: 15.795782 +epoch: 0, batch: 38202, sum loss: 5615.342773, avg loss: 2.875240, ppl: 17.729671 +epoch: 0, batch: 38203, sum loss: 5108.149902, avg loss: 3.090230, ppl: 21.982128 +epoch: 0, batch: 38204, sum loss: 5447.717285, avg loss: 3.123691, ppl: 22.730124 +epoch: 0, batch: 38205, sum loss: 4786.767578, avg loss: 2.796009, ppl: 16.379148 +epoch: 0, batch: 38206, sum loss: 5228.710449, avg loss: 2.901615, ppl: 18.203524 +epoch: 0, batch: 38207, sum loss: 6363.791016, avg loss: 3.072811, ppl: 21.602539 +epoch: 0, batch: 38208, sum loss: 4545.241699, avg loss: 2.898751, ppl: 18.151461 +epoch: 0, batch: 38209, sum loss: 4799.661621, avg loss: 2.914184, ppl: 18.433771 +epoch: 0, batch: 38210, sum loss: 4338.367676, avg loss: 2.808005, ppl: 16.576817 +epoch: 0, batch: 38211, sum loss: 5377.074219, avg loss: 2.903388, ppl: 18.235821 +epoch: 0, batch: 38212, sum loss: 3605.528809, avg loss: 2.792819, ppl: 16.326973 +epoch: 0, batch: 38213, sum loss: 4463.921875, avg loss: 2.625837, ppl: 13.816128 +epoch: 0, batch: 38214, sum loss: 5332.707520, avg loss: 3.176121, ppl: 23.953663 +epoch: 0, batch: 38215, sum loss: 5457.019531, avg loss: 3.114737, ppl: 22.527512 +epoch: 0, batch: 38216, sum loss: 3726.128418, avg loss: 2.684530, ppl: 14.651320 +epoch: 0, batch: 38217, sum loss: 3925.318848, avg loss: 2.854777, ppl: 17.370569 +epoch: 0, batch: 38218, sum loss: 5205.774902, avg loss: 3.078519, ppl: 21.726194 +epoch: 0, batch: 38219, sum loss: 4991.717285, avg loss: 3.025283, ppl: 20.599840 +epoch: 0, batch: 38220, sum loss: 3890.382812, avg loss: 2.551071, ppl: 12.820824 +epoch: 0, batch: 38221, sum loss: 3867.538818, avg loss: 2.839603, ppl: 17.108967 +epoch: 0, batch: 38222, sum loss: 4359.252930, avg loss: 2.792603, ppl: 16.323450 +epoch: 0, batch: 38223, sum loss: 4828.377441, avg loss: 2.792584, ppl: 16.323143 +epoch: 0, batch: 38224, sum loss: 4128.901367, avg loss: 2.754437, ppl: 15.712195 +epoch: 0, batch: 38225, sum loss: 5689.610840, avg loss: 3.060576, ppl: 21.339844 +epoch: 0, batch: 38226, sum loss: 4429.964355, avg loss: 2.709458, ppl: 15.021137 +epoch: 0, batch: 38227, sum loss: 4257.366211, avg loss: 2.694535, ppl: 14.798643 +epoch: 0, batch: 38228, sum loss: 5165.719727, avg loss: 2.890722, ppl: 18.006302 +epoch: 0, batch: 38229, sum loss: 4741.332520, avg loss: 2.823903, ppl: 16.842453 +epoch: 0, batch: 38230, sum loss: 5223.096191, avg loss: 2.991464, ppl: 19.914820 +epoch: 0, batch: 38231, sum loss: 5610.960938, avg loss: 2.838119, ppl: 17.083597 +epoch: 0, batch: 38232, sum loss: 5382.008301, avg loss: 2.955523, ppl: 19.211777 +epoch: 0, batch: 38233, sum loss: 4544.375977, avg loss: 2.716304, ppl: 15.124316 +epoch: 0, batch: 38234, sum loss: 4406.236816, avg loss: 2.701555, ppl: 14.902895 +epoch: 0, batch: 38235, sum loss: 4504.268555, avg loss: 2.753221, ppl: 15.693091 +epoch: 0, batch: 38236, sum loss: 4682.673828, avg loss: 2.908493, ppl: 18.329157 +epoch: 0, batch: 38237, sum loss: 4574.386719, avg loss: 2.941728, ppl: 18.948559 +epoch: 0, batch: 38238, sum loss: 4447.483887, avg loss: 3.023443, ppl: 20.561954 +epoch: 0, batch: 38239, sum loss: 4877.402344, avg loss: 2.898041, ppl: 18.138569 +epoch: 0, batch: 38240, sum loss: 5146.966309, avg loss: 3.093129, ppl: 22.045950 +epoch: 0, batch: 38241, sum loss: 4223.990723, avg loss: 3.004261, ppl: 20.171299 +epoch: 0, batch: 38242, sum loss: 4846.586914, avg loss: 2.683603, ppl: 14.637739 +epoch: 0, batch: 38243, sum loss: 5677.167969, avg loss: 3.067082, ppl: 21.479128 +epoch: 0, batch: 38244, sum loss: 5204.162598, avg loss: 3.258712, ppl: 26.016003 +epoch: 0, batch: 38245, sum loss: 5066.046875, avg loss: 3.059207, ppl: 21.310650 +epoch: 0, batch: 38246, sum loss: 4703.678711, avg loss: 2.914299, ppl: 18.435888 +epoch: 0, batch: 38247, sum loss: 4216.510742, avg loss: 2.646899, ppl: 14.110222 +epoch: 0, batch: 38248, sum loss: 4616.872559, avg loss: 2.761288, ppl: 15.820201 +epoch: 0, batch: 38249, sum loss: 4792.373535, avg loss: 2.915069, ppl: 18.450087 +epoch: 0, batch: 38250, sum loss: 4261.014648, avg loss: 2.774098, ppl: 16.024170 +epoch: 0, batch: 38251, sum loss: 4255.678223, avg loss: 2.906884, ppl: 18.299686 +epoch: 0, batch: 38252, sum loss: 4646.081055, avg loss: 2.644326, ppl: 14.073956 +epoch: 0, batch: 38253, sum loss: 5134.451172, avg loss: 2.882904, ppl: 17.866074 +epoch: 0, batch: 38254, sum loss: 5505.388672, avg loss: 3.075636, ppl: 21.663658 +epoch: 0, batch: 38255, sum loss: 5144.255371, avg loss: 2.718951, ppl: 15.164406 +epoch: 0, batch: 38256, sum loss: 6040.789062, avg loss: 3.361597, ppl: 28.835199 +epoch: 0, batch: 38257, sum loss: 5556.411621, avg loss: 3.026368, ppl: 20.622194 +epoch: 0, batch: 38258, sum loss: 5223.157715, avg loss: 3.019166, ppl: 20.474215 +epoch: 0, batch: 38259, sum loss: 5864.185059, avg loss: 3.127565, ppl: 22.818357 +epoch: 0, batch: 38260, sum loss: 4940.390625, avg loss: 2.661848, ppl: 14.322738 +epoch: 0, batch: 38261, sum loss: 4362.366211, avg loss: 2.877550, ppl: 17.770689 +epoch: 0, batch: 38262, sum loss: 5471.331543, avg loss: 2.840775, ppl: 17.129028 +epoch: 0, batch: 38263, sum loss: 5909.563965, avg loss: 3.182317, ppl: 24.102547 +epoch: 0, batch: 38264, sum loss: 4698.860352, avg loss: 2.849521, ppl: 17.279507 +epoch: 0, batch: 38265, sum loss: 5429.192871, avg loss: 2.952253, ppl: 19.149046 +epoch: 0, batch: 38266, sum loss: 4920.153320, avg loss: 2.953273, ppl: 19.168596 +epoch: 0, batch: 38267, sum loss: 5245.667480, avg loss: 3.100276, ppl: 22.204084 +epoch: 0, batch: 38268, sum loss: 5662.287598, avg loss: 3.013458, ppl: 20.357676 +epoch: 0, batch: 38269, sum loss: 5278.358887, avg loss: 2.911395, ppl: 18.382425 +epoch: 0, batch: 38270, sum loss: 5015.548340, avg loss: 2.879189, ppl: 17.799841 +epoch: 0, batch: 38271, sum loss: 4637.247070, avg loss: 2.948027, ppl: 19.068302 +epoch: 0, batch: 38272, sum loss: 4612.482422, avg loss: 2.775260, ppl: 16.042801 +epoch: 0, batch: 38273, sum loss: 5580.110840, avg loss: 2.939995, ppl: 18.915758 +epoch: 0, batch: 38274, sum loss: 5488.538086, avg loss: 3.263102, ppl: 26.130476 +epoch: 0, batch: 38275, sum loss: 4641.229004, avg loss: 2.635564, ppl: 13.951183 +epoch: 0, batch: 38276, sum loss: 5891.622559, avg loss: 2.938465, ppl: 18.886835 +epoch: 0, batch: 38277, sum loss: 4839.773438, avg loss: 2.749871, ppl: 15.640618 +epoch: 0, batch: 38278, sum loss: 4369.465332, avg loss: 2.730916, ppl: 15.346935 +epoch: 0, batch: 38279, sum loss: 4525.729980, avg loss: 2.880796, ppl: 17.828453 +epoch: 0, batch: 38280, sum loss: 5620.543945, avg loss: 2.965986, ppl: 19.413841 +epoch: 0, batch: 38281, sum loss: 6450.014160, avg loss: 3.199412, ppl: 24.518106 +epoch: 0, batch: 38282, sum loss: 5247.149414, avg loss: 3.059562, ppl: 21.318222 +epoch: 0, batch: 38283, sum loss: 5945.635254, avg loss: 3.083836, ppl: 21.842024 +epoch: 0, batch: 38284, sum loss: 4675.209473, avg loss: 2.962744, ppl: 19.350988 +epoch: 0, batch: 38285, sum loss: 5039.712402, avg loss: 2.931770, ppl: 18.760809 +epoch: 0, batch: 38286, sum loss: 3611.161133, avg loss: 2.453234, ppl: 11.625889 +epoch: 0, batch: 38287, sum loss: 5418.649414, avg loss: 3.115957, ppl: 22.555012 +epoch: 0, batch: 38288, sum loss: 3869.770264, avg loss: 2.623573, ppl: 13.784890 +epoch: 0, batch: 38289, sum loss: 5399.372559, avg loss: 3.148322, ppl: 23.296942 +epoch: 0, batch: 38290, sum loss: 4820.290527, avg loss: 2.926709, ppl: 18.666107 +epoch: 0, batch: 38291, sum loss: 3538.410400, avg loss: 2.646530, ppl: 14.105008 +epoch: 0, batch: 38292, sum loss: 4276.684570, avg loss: 2.921232, ppl: 18.564154 +epoch: 0, batch: 38293, sum loss: 4314.629395, avg loss: 2.656791, ppl: 14.250492 +epoch: 0, batch: 38294, sum loss: 4858.125488, avg loss: 2.974970, ppl: 19.589027 +epoch: 0, batch: 38295, sum loss: 6261.133789, avg loss: 2.897332, ppl: 18.125715 +epoch: 0, batch: 38296, sum loss: 4633.734863, avg loss: 2.638801, ppl: 13.996416 +epoch: 0, batch: 38297, sum loss: 4172.660645, avg loss: 2.704252, ppl: 14.943135 +epoch: 0, batch: 38298, sum loss: 5257.818359, avg loss: 3.207943, ppl: 24.728161 +epoch: 0, batch: 38299, sum loss: 4310.481934, avg loss: 2.654238, ppl: 14.214144 +epoch: 0, batch: 38300, sum loss: 4608.860840, avg loss: 2.850254, ppl: 17.292179 +epoch: 0, batch: 38301, sum loss: 4881.962402, avg loss: 2.815434, ppl: 16.700418 +epoch: 0, batch: 38302, sum loss: 4565.489258, avg loss: 2.855215, ppl: 17.378178 +epoch: 0, batch: 38303, sum loss: 5970.345215, avg loss: 3.291260, ppl: 26.876701 +epoch: 0, batch: 38304, sum loss: 5258.489746, avg loss: 3.087780, ppl: 21.928349 +epoch: 0, batch: 38305, sum loss: 5348.001953, avg loss: 2.906523, ppl: 18.293077 +epoch: 0, batch: 38306, sum loss: 5100.789062, avg loss: 2.751235, ppl: 15.661955 +epoch: 0, batch: 38307, sum loss: 5011.861816, avg loss: 2.850888, ppl: 17.303144 +epoch: 0, batch: 38308, sum loss: 6162.338867, avg loss: 3.003089, ppl: 20.147680 +epoch: 0, batch: 38309, sum loss: 5230.540039, avg loss: 2.990589, ppl: 19.897396 +epoch: 0, batch: 38310, sum loss: 4102.688477, avg loss: 2.887184, ppl: 17.942711 +epoch: 0, batch: 38311, sum loss: 4717.061035, avg loss: 2.802769, ppl: 16.490252 +epoch: 0, batch: 38312, sum loss: 4312.944336, avg loss: 2.729712, ppl: 15.328465 +epoch: 0, batch: 38313, sum loss: 3624.189941, avg loss: 2.672706, ppl: 14.479102 +epoch: 0, batch: 38314, sum loss: 5225.688477, avg loss: 3.057746, ppl: 21.279549 +epoch: 0, batch: 38315, sum loss: 4939.721680, avg loss: 2.711154, ppl: 15.046622 +epoch: 0, batch: 38316, sum loss: 4948.224609, avg loss: 3.002563, ppl: 20.137093 +epoch: 0, batch: 38317, sum loss: 4218.533691, avg loss: 2.644849, ppl: 14.081316 +epoch: 0, batch: 38318, sum loss: 4469.389160, avg loss: 2.819804, ppl: 16.773561 +epoch: 0, batch: 38319, sum loss: 4458.339355, avg loss: 2.944742, ppl: 19.005758 +epoch: 0, batch: 38320, sum loss: 5461.799805, avg loss: 2.960325, ppl: 19.304245 +epoch: 0, batch: 38321, sum loss: 5232.960938, avg loss: 2.814933, ppl: 16.692059 +epoch: 0, batch: 38322, sum loss: 4886.854492, avg loss: 2.723999, ppl: 15.241154 +epoch: 0, batch: 38323, sum loss: 5603.904785, avg loss: 2.987156, ppl: 19.829210 +epoch: 0, batch: 38324, sum loss: 5625.101074, avg loss: 3.016140, ppl: 20.412348 +epoch: 0, batch: 38325, sum loss: 4735.358398, avg loss: 2.889175, ppl: 17.978474 +epoch: 0, batch: 38326, sum loss: 4695.016113, avg loss: 2.901741, ppl: 18.205807 +epoch: 0, batch: 38327, sum loss: 4444.348633, avg loss: 2.850769, ppl: 17.301083 +epoch: 0, batch: 38328, sum loss: 4087.096191, avg loss: 2.699535, ppl: 14.872816 +epoch: 0, batch: 38329, sum loss: 5402.396484, avg loss: 2.976527, ppl: 19.619558 +epoch: 0, batch: 38330, sum loss: 5081.346680, avg loss: 3.142453, ppl: 23.160614 +epoch: 0, batch: 38331, sum loss: 5959.409668, avg loss: 3.125018, ppl: 22.760307 +epoch: 0, batch: 38332, sum loss: 3501.989014, avg loss: 2.423522, ppl: 11.285534 +epoch: 0, batch: 38333, sum loss: 5224.536133, avg loss: 2.907366, ppl: 18.308506 +epoch: 0, batch: 38334, sum loss: 4721.885254, avg loss: 2.750079, ppl: 15.643863 +epoch: 0, batch: 38335, sum loss: 6336.890137, avg loss: 3.160544, ppl: 23.583414 +epoch: 0, batch: 38336, sum loss: 4922.672852, avg loss: 2.969043, ppl: 19.473270 +epoch: 0, batch: 38337, sum loss: 4289.399414, avg loss: 2.559308, ppl: 12.926864 +epoch: 0, batch: 38338, sum loss: 5275.454590, avg loss: 3.088674, ppl: 21.947947 +epoch: 0, batch: 38339, sum loss: 4683.346191, avg loss: 2.761407, ppl: 15.822087 +epoch: 0, batch: 38340, sum loss: 4849.799805, avg loss: 2.975337, ppl: 19.596231 +epoch: 0, batch: 38341, sum loss: 4046.028809, avg loss: 2.859384, ppl: 17.450779 +epoch: 0, batch: 38342, sum loss: 4214.621582, avg loss: 2.859309, ppl: 17.449469 +epoch: 0, batch: 38343, sum loss: 4247.561035, avg loss: 2.783461, ppl: 16.174904 +epoch: 0, batch: 38344, sum loss: 4555.754883, avg loss: 2.834944, ppl: 17.029448 +epoch: 0, batch: 38345, sum loss: 4116.764160, avg loss: 2.858864, ppl: 17.441704 +epoch: 0, batch: 38346, sum loss: 5505.125000, avg loss: 2.877744, ppl: 17.774134 +epoch: 0, batch: 38347, sum loss: 5503.759277, avg loss: 3.005876, ppl: 20.203913 +epoch: 0, batch: 38348, sum loss: 4240.950195, avg loss: 2.759239, ppl: 15.787823 +epoch: 0, batch: 38349, sum loss: 5217.168945, avg loss: 3.068923, ppl: 21.518715 +epoch: 0, batch: 38350, sum loss: 6007.417969, avg loss: 3.066574, ppl: 21.468224 +epoch: 0, batch: 38351, sum loss: 4392.914062, avg loss: 2.771554, ppl: 15.983460 +epoch: 0, batch: 38352, sum loss: 4362.254883, avg loss: 2.757430, ppl: 15.759295 +epoch: 0, batch: 38353, sum loss: 4764.063477, avg loss: 3.032504, ppl: 20.749121 +epoch: 0, batch: 38354, sum loss: 5070.041992, avg loss: 2.963204, ppl: 19.359905 +epoch: 0, batch: 38355, sum loss: 5664.908203, avg loss: 2.958177, ppl: 19.262815 +epoch: 0, batch: 38356, sum loss: 5396.967285, avg loss: 3.117832, ppl: 22.597334 +epoch: 0, batch: 38357, sum loss: 5433.872559, avg loss: 3.008789, ppl: 20.262844 +epoch: 0, batch: 38358, sum loss: 5096.351562, avg loss: 2.889088, ppl: 17.976910 +epoch: 0, batch: 38359, sum loss: 4543.734863, avg loss: 2.832752, ppl: 16.992167 +epoch: 0, batch: 38360, sum loss: 4503.460449, avg loss: 2.764555, ppl: 15.871979 +epoch: 0, batch: 38361, sum loss: 5439.034668, avg loss: 2.980293, ppl: 19.693586 +epoch: 0, batch: 38362, sum loss: 5088.741699, avg loss: 2.794477, ppl: 16.354065 +epoch: 0, batch: 38363, sum loss: 4273.535645, avg loss: 2.780440, ppl: 16.126108 +epoch: 0, batch: 38364, sum loss: 4303.532227, avg loss: 2.821988, ppl: 16.810242 +epoch: 0, batch: 38365, sum loss: 4099.541016, avg loss: 2.581575, ppl: 13.217942 +epoch: 0, batch: 38366, sum loss: 4310.100098, avg loss: 2.672102, ppl: 14.470350 +epoch: 0, batch: 38367, sum loss: 5185.193848, avg loss: 2.988584, ppl: 19.857550 +epoch: 0, batch: 38368, sum loss: 4909.819336, avg loss: 3.032625, ppl: 20.751629 +epoch: 0, batch: 38369, sum loss: 4419.584961, avg loss: 2.610505, ppl: 13.605922 +epoch: 0, batch: 38370, sum loss: 5175.782715, avg loss: 2.719802, ppl: 15.177312 +epoch: 0, batch: 38371, sum loss: 5535.348145, avg loss: 3.166675, ppl: 23.728464 +epoch: 0, batch: 38372, sum loss: 5125.886719, avg loss: 3.150514, ppl: 23.348064 +epoch: 0, batch: 38373, sum loss: 4970.186523, avg loss: 2.953171, ppl: 19.166630 +epoch: 0, batch: 38374, sum loss: 5468.349121, avg loss: 2.888721, ppl: 17.970310 +epoch: 0, batch: 38375, sum loss: 4928.127441, avg loss: 3.121043, ppl: 22.670017 +epoch: 0, batch: 38376, sum loss: 5356.511230, avg loss: 2.962672, ppl: 19.349609 +epoch: 0, batch: 38377, sum loss: 4758.578125, avg loss: 2.832487, ppl: 16.987658 +epoch: 0, batch: 38378, sum loss: 4895.887207, avg loss: 2.672428, ppl: 14.475067 +epoch: 0, batch: 38379, sum loss: 4288.449219, avg loss: 2.795599, ppl: 16.372437 +epoch: 0, batch: 38380, sum loss: 4945.350586, avg loss: 2.931447, ppl: 18.754745 +epoch: 0, batch: 38381, sum loss: 5240.559570, avg loss: 2.903357, ppl: 18.235258 +epoch: 0, batch: 38382, sum loss: 5074.705566, avg loss: 2.945273, ppl: 19.015856 +epoch: 0, batch: 38383, sum loss: 4186.239746, avg loss: 2.861408, ppl: 17.486130 +epoch: 0, batch: 38384, sum loss: 5158.905762, avg loss: 2.677170, ppl: 14.543869 +epoch: 0, batch: 38385, sum loss: 4831.526367, avg loss: 2.982424, ppl: 19.735594 +epoch: 0, batch: 38386, sum loss: 5329.223633, avg loss: 2.851377, ppl: 17.311604 +epoch: 0, batch: 38387, sum loss: 5006.406250, avg loss: 2.865716, ppl: 17.561626 +epoch: 0, batch: 38388, sum loss: 3885.313721, avg loss: 2.590209, ppl: 13.332558 +epoch: 0, batch: 38389, sum loss: 5373.933105, avg loss: 2.944621, ppl: 19.003456 +epoch: 0, batch: 38390, sum loss: 5447.143555, avg loss: 2.966854, ppl: 19.430687 +epoch: 0, batch: 38391, sum loss: 5251.339844, avg loss: 2.846255, ppl: 17.223152 +epoch: 0, batch: 38392, sum loss: 5402.848633, avg loss: 2.765020, ppl: 15.879353 +epoch: 0, batch: 38393, sum loss: 5984.679199, avg loss: 3.014952, ppl: 20.388105 +epoch: 0, batch: 38394, sum loss: 4313.525391, avg loss: 2.864227, ppl: 17.535490 +epoch: 0, batch: 38395, sum loss: 5158.106445, avg loss: 3.112919, ppl: 22.486578 +epoch: 0, batch: 38396, sum loss: 6011.237793, avg loss: 2.945241, ppl: 19.015249 +epoch: 0, batch: 38397, sum loss: 4693.136719, avg loss: 2.754188, ppl: 15.708285 +epoch: 0, batch: 38398, sum loss: 4194.569824, avg loss: 2.683666, ppl: 14.638660 +epoch: 0, batch: 38399, sum loss: 5440.297363, avg loss: 3.089323, ppl: 21.962194 +epoch: 0, batch: 38400, sum loss: 4417.026367, avg loss: 3.012979, ppl: 20.347918 +epoch: 0, batch: 38401, sum loss: 4251.782227, avg loss: 2.618093, ppl: 13.709548 +epoch: 0, batch: 38402, sum loss: 5654.443848, avg loss: 3.056456, ppl: 21.252108 +epoch: 0, batch: 38403, sum loss: 5783.386230, avg loss: 3.058375, ppl: 21.292921 +epoch: 0, batch: 38404, sum loss: 4608.181641, avg loss: 2.860448, ppl: 17.469353 +epoch: 0, batch: 38405, sum loss: 4567.239746, avg loss: 2.712138, ppl: 15.061438 +epoch: 0, batch: 38406, sum loss: 3759.727783, avg loss: 2.385614, ppl: 10.865731 +epoch: 0, batch: 38407, sum loss: 4312.640137, avg loss: 2.597976, ppl: 13.436515 +epoch: 0, batch: 38408, sum loss: 5728.034180, avg loss: 3.087889, ppl: 21.930733 +epoch: 0, batch: 38409, sum loss: 3744.410156, avg loss: 2.695760, ppl: 14.816769 +epoch: 0, batch: 38410, sum loss: 5565.833496, avg loss: 3.155234, ppl: 23.458534 +epoch: 0, batch: 38411, sum loss: 4408.090332, avg loss: 2.858684, ppl: 17.438560 +epoch: 0, batch: 38412, sum loss: 4325.367676, avg loss: 2.616677, ppl: 13.690160 +epoch: 0, batch: 38413, sum loss: 4207.894531, avg loss: 2.782999, ppl: 16.167439 +epoch: 0, batch: 38414, sum loss: 6280.343750, avg loss: 3.115250, ppl: 22.539066 +epoch: 0, batch: 38415, sum loss: 5283.476074, avg loss: 3.180901, ppl: 24.068424 +epoch: 0, batch: 38416, sum loss: 4820.318359, avg loss: 2.752895, ppl: 15.687977 +epoch: 0, batch: 38417, sum loss: 5762.375977, avg loss: 2.961139, ppl: 19.319963 +epoch: 0, batch: 38418, sum loss: 4860.387695, avg loss: 3.020751, ppl: 20.506676 +epoch: 0, batch: 38419, sum loss: 4292.787109, avg loss: 2.814942, ppl: 16.692213 +epoch: 0, batch: 38420, sum loss: 5048.537598, avg loss: 2.978488, ppl: 19.658075 +epoch: 0, batch: 38421, sum loss: 5982.739258, avg loss: 3.109532, ppl: 22.410551 +epoch: 0, batch: 38422, sum loss: 4477.553223, avg loss: 2.788016, ppl: 16.248743 +epoch: 0, batch: 38423, sum loss: 5346.560547, avg loss: 3.072736, ppl: 21.600922 +epoch: 0, batch: 38424, sum loss: 3786.821533, avg loss: 2.604416, ppl: 13.523331 +epoch: 0, batch: 38425, sum loss: 5410.564453, avg loss: 3.000868, ppl: 20.102970 +epoch: 0, batch: 38426, sum loss: 4886.943359, avg loss: 2.826457, ppl: 16.885521 +epoch: 0, batch: 38427, sum loss: 4450.810059, avg loss: 2.669952, ppl: 14.439275 +epoch: 0, batch: 38428, sum loss: 5300.020996, avg loss: 2.999446, ppl: 20.074411 +epoch: 0, batch: 38429, sum loss: 4174.442383, avg loss: 2.872982, ppl: 17.689686 +epoch: 0, batch: 38430, sum loss: 4438.539062, avg loss: 2.690024, ppl: 14.732024 +epoch: 0, batch: 38431, sum loss: 4681.821289, avg loss: 2.872283, ppl: 17.677330 +epoch: 0, batch: 38432, sum loss: 4366.609863, avg loss: 2.770691, ppl: 15.969671 +epoch: 0, batch: 38433, sum loss: 4090.593018, avg loss: 2.823046, ppl: 16.828026 +epoch: 0, batch: 38434, sum loss: 3989.362793, avg loss: 2.600628, ppl: 13.472192 +epoch: 0, batch: 38435, sum loss: 5662.448242, avg loss: 3.182939, ppl: 24.117527 +epoch: 0, batch: 38436, sum loss: 4518.463379, avg loss: 2.990379, ppl: 19.893227 +epoch: 0, batch: 38437, sum loss: 5826.468262, avg loss: 3.042542, ppl: 20.958456 +epoch: 0, batch: 38438, sum loss: 4655.683594, avg loss: 2.781173, ppl: 16.137943 +epoch: 0, batch: 38439, sum loss: 4775.338379, avg loss: 2.673762, ppl: 14.494392 +epoch: 0, batch: 38440, sum loss: 5660.213867, avg loss: 3.134116, ppl: 22.968327 +epoch: 0, batch: 38441, sum loss: 5271.285156, avg loss: 2.939925, ppl: 18.914427 +epoch: 0, batch: 38442, sum loss: 5483.855469, avg loss: 3.227696, ppl: 25.221485 +epoch: 0, batch: 38443, sum loss: 5008.122559, avg loss: 2.968656, ppl: 19.465742 +epoch: 0, batch: 38444, sum loss: 4906.331543, avg loss: 3.097431, ppl: 22.141006 +epoch: 0, batch: 38445, sum loss: 5581.827148, avg loss: 2.868359, ppl: 17.608105 +epoch: 0, batch: 38446, sum loss: 7090.350098, avg loss: 3.221422, ppl: 25.063740 +epoch: 0, batch: 38447, sum loss: 4071.562012, avg loss: 2.691052, ppl: 14.747189 +epoch: 0, batch: 38448, sum loss: 5339.008789, avg loss: 2.885951, ppl: 17.920599 +epoch: 0, batch: 38449, sum loss: 5307.482910, avg loss: 2.922623, ppl: 18.589979 +epoch: 0, batch: 38450, sum loss: 4794.821289, avg loss: 3.038543, ppl: 20.874811 +epoch: 0, batch: 38451, sum loss: 4843.464355, avg loss: 2.785201, ppl: 16.203075 +epoch: 0, batch: 38452, sum loss: 5045.840332, avg loss: 2.708449, ppl: 15.005981 +epoch: 0, batch: 38453, sum loss: 5669.968750, avg loss: 3.203372, ppl: 24.615400 +epoch: 0, batch: 38454, sum loss: 5233.132812, avg loss: 3.031942, ppl: 20.737473 +epoch: 0, batch: 38455, sum loss: 4838.364258, avg loss: 2.859553, ppl: 17.453730 +epoch: 0, batch: 38456, sum loss: 5017.798340, avg loss: 2.951646, ppl: 19.137434 +epoch: 0, batch: 38457, sum loss: 4626.216797, avg loss: 2.841656, ppl: 17.144140 +epoch: 0, batch: 38458, sum loss: 4299.225586, avg loss: 2.741853, ppl: 15.515709 +epoch: 0, batch: 38459, sum loss: 4772.933594, avg loss: 2.994312, ppl: 19.971615 +epoch: 0, batch: 38460, sum loss: 4739.844238, avg loss: 2.888388, ppl: 17.964321 +epoch: 0, batch: 38461, sum loss: 4683.558105, avg loss: 2.838520, ppl: 17.090454 +epoch: 0, batch: 38462, sum loss: 4215.306641, avg loss: 2.931368, ppl: 18.753260 +epoch: 0, batch: 38463, sum loss: 5344.568848, avg loss: 3.022946, ppl: 20.551756 +epoch: 0, batch: 38464, sum loss: 5653.009766, avg loss: 3.095843, ppl: 22.105873 +epoch: 0, batch: 38465, sum loss: 4826.072266, avg loss: 2.832202, ppl: 16.982815 +epoch: 0, batch: 38466, sum loss: 4606.416992, avg loss: 2.846982, ppl: 17.235685 +epoch: 0, batch: 38467, sum loss: 5079.340332, avg loss: 2.939433, ppl: 18.905127 +epoch: 0, batch: 38468, sum loss: 5177.728027, avg loss: 2.870137, ppl: 17.639442 +epoch: 0, batch: 38469, sum loss: 5221.031250, avg loss: 2.926587, ppl: 18.663824 +epoch: 0, batch: 38470, sum loss: 4418.615723, avg loss: 2.899354, ppl: 18.162413 +epoch: 0, batch: 38471, sum loss: 4466.735352, avg loss: 2.765780, ppl: 15.891438 +epoch: 0, batch: 38472, sum loss: 5442.316406, avg loss: 3.086963, ppl: 21.910444 +epoch: 0, batch: 38473, sum loss: 4819.457520, avg loss: 2.908544, ppl: 18.330091 +epoch: 0, batch: 38474, sum loss: 4611.566406, avg loss: 2.735211, ppl: 15.413001 +epoch: 0, batch: 38475, sum loss: 5106.515625, avg loss: 2.851209, ppl: 17.308695 +epoch: 0, batch: 38476, sum loss: 5083.431641, avg loss: 2.993776, ppl: 19.960915 +epoch: 0, batch: 38477, sum loss: 5725.829590, avg loss: 3.053776, ppl: 21.195223 +epoch: 0, batch: 38478, sum loss: 4368.583984, avg loss: 2.831228, ppl: 16.966274 +epoch: 0, batch: 38479, sum loss: 5497.496094, avg loss: 2.908728, ppl: 18.333466 +epoch: 0, batch: 38480, sum loss: 5276.958008, avg loss: 2.826437, ppl: 16.885191 +epoch: 0, batch: 38481, sum loss: 4222.033691, avg loss: 2.720383, ppl: 15.186132 +epoch: 0, batch: 38482, sum loss: 4768.078613, avg loss: 2.749757, ppl: 15.638828 +epoch: 0, batch: 38483, sum loss: 5974.762695, avg loss: 3.060842, ppl: 21.345518 +epoch: 0, batch: 38484, sum loss: 5149.705078, avg loss: 3.074451, ppl: 21.637999 +epoch: 0, batch: 38485, sum loss: 4986.401367, avg loss: 2.865748, ppl: 17.562183 +epoch: 0, batch: 38486, sum loss: 3848.585938, avg loss: 2.731431, ppl: 15.354840 +epoch: 0, batch: 38487, sum loss: 4185.124023, avg loss: 2.573877, ppl: 13.116577 +epoch: 0, batch: 38488, sum loss: 4743.824219, avg loss: 2.857725, ppl: 17.421854 +epoch: 0, batch: 38489, sum loss: 5160.414551, avg loss: 2.886138, ppl: 17.923952 +epoch: 0, batch: 38490, sum loss: 4576.882324, avg loss: 2.785686, ppl: 16.210936 +epoch: 0, batch: 38491, sum loss: 4929.154297, avg loss: 2.802248, ppl: 16.481659 +epoch: 0, batch: 38492, sum loss: 5381.830078, avg loss: 2.920146, ppl: 18.544003 +epoch: 0, batch: 38493, sum loss: 5033.824707, avg loss: 2.773457, ppl: 16.013903 +epoch: 0, batch: 38494, sum loss: 4541.349609, avg loss: 2.774190, ppl: 16.025648 +epoch: 0, batch: 38495, sum loss: 4099.410156, avg loss: 2.644781, ppl: 14.080359 +epoch: 0, batch: 38496, sum loss: 4661.084961, avg loss: 2.784400, ppl: 16.190096 +epoch: 0, batch: 38497, sum loss: 4010.644287, avg loss: 2.754564, ppl: 15.714181 +epoch: 0, batch: 38498, sum loss: 4622.834961, avg loss: 2.907443, ppl: 18.309925 +epoch: 0, batch: 38499, sum loss: 6546.772949, avg loss: 3.400921, ppl: 29.991705 +epoch: 0, batch: 38500, sum loss: 4858.198242, avg loss: 2.788862, ppl: 16.262506 +epoch: 0, batch: 38501, sum loss: 5547.557129, avg loss: 3.080265, ppl: 21.764166 +epoch: 0, batch: 38502, sum loss: 6160.182129, avg loss: 3.049595, ppl: 21.106796 +epoch: 0, batch: 38503, sum loss: 6283.779785, avg loss: 3.146610, ppl: 23.257084 +epoch: 0, batch: 38504, sum loss: 4839.997070, avg loss: 2.931555, ppl: 18.756775 +epoch: 0, batch: 38505, sum loss: 4529.299805, avg loss: 2.852204, ppl: 17.325928 +epoch: 0, batch: 38506, sum loss: 5953.257812, avg loss: 2.978118, ppl: 19.650803 +epoch: 0, batch: 38507, sum loss: 4327.077148, avg loss: 2.888569, ppl: 17.967587 +epoch: 0, batch: 38508, sum loss: 5371.753906, avg loss: 2.933782, ppl: 18.798586 +epoch: 0, batch: 38509, sum loss: 4473.856445, avg loss: 2.766763, ppl: 15.907066 +epoch: 0, batch: 38510, sum loss: 4619.305664, avg loss: 2.767709, ppl: 15.922112 +epoch: 0, batch: 38511, sum loss: 4921.745605, avg loss: 2.878214, ppl: 17.782484 +epoch: 0, batch: 38512, sum loss: 4957.686523, avg loss: 3.028520, ppl: 20.666616 +epoch: 0, batch: 38513, sum loss: 5029.856445, avg loss: 2.962224, ppl: 19.340942 +epoch: 0, batch: 38514, sum loss: 4352.366699, avg loss: 2.842826, ppl: 17.164206 +epoch: 0, batch: 38515, sum loss: 5545.301758, avg loss: 2.891190, ppl: 18.014727 +epoch: 0, batch: 38516, sum loss: 4617.638672, avg loss: 2.701954, ppl: 14.908830 +epoch: 0, batch: 38517, sum loss: 5204.822754, avg loss: 3.183378, ppl: 24.128115 +epoch: 0, batch: 38518, sum loss: 5194.381836, avg loss: 3.025266, ppl: 20.599487 +epoch: 0, batch: 38519, sum loss: 5669.078613, avg loss: 3.101247, ppl: 22.225641 +epoch: 0, batch: 38520, sum loss: 5042.482910, avg loss: 2.809183, ppl: 16.596348 +epoch: 0, batch: 38521, sum loss: 4716.531250, avg loss: 2.822580, ppl: 16.820192 +epoch: 0, batch: 38522, sum loss: 5218.680176, avg loss: 2.836239, ppl: 17.051516 +epoch: 0, batch: 38523, sum loss: 4686.188477, avg loss: 2.873200, ppl: 17.693542 +epoch: 0, batch: 38524, sum loss: 4725.186523, avg loss: 2.709396, ppl: 15.020199 +epoch: 0, batch: 38525, sum loss: 5366.415039, avg loss: 3.203830, ppl: 24.626671 +epoch: 0, batch: 38526, sum loss: 4154.401367, avg loss: 2.826123, ppl: 16.879898 +epoch: 0, batch: 38527, sum loss: 4139.877441, avg loss: 2.759918, ppl: 15.798551 +epoch: 0, batch: 38528, sum loss: 6588.277344, avg loss: 3.390776, ppl: 29.688980 +epoch: 0, batch: 38529, sum loss: 4889.862793, avg loss: 3.007296, ppl: 20.232609 +epoch: 0, batch: 38530, sum loss: 5773.489746, avg loss: 2.920329, ppl: 18.547382 +epoch: 0, batch: 38531, sum loss: 4989.462891, avg loss: 3.049794, ppl: 21.111000 +epoch: 0, batch: 38532, sum loss: 4676.119141, avg loss: 2.818637, ppl: 16.754002 +epoch: 0, batch: 38533, sum loss: 4813.304199, avg loss: 2.853174, ppl: 17.342739 +epoch: 0, batch: 38534, sum loss: 5618.218750, avg loss: 2.977328, ppl: 19.635286 +epoch: 0, batch: 38535, sum loss: 5469.431152, avg loss: 3.221102, ppl: 25.055716 +epoch: 0, batch: 38536, sum loss: 4185.713867, avg loss: 2.841625, ppl: 17.143604 +epoch: 0, batch: 38537, sum loss: 5723.097656, avg loss: 3.015331, ppl: 20.395836 +epoch: 0, batch: 38538, sum loss: 5120.555664, avg loss: 2.851089, ppl: 17.306618 +epoch: 0, batch: 38539, sum loss: 5177.732422, avg loss: 2.715119, ppl: 15.106413 +epoch: 0, batch: 38540, sum loss: 4933.368652, avg loss: 3.172585, ppl: 23.869095 +epoch: 0, batch: 38541, sum loss: 5484.291504, avg loss: 3.313771, ppl: 27.488604 +epoch: 0, batch: 38542, sum loss: 4420.134766, avg loss: 2.804654, ppl: 16.521357 +epoch: 0, batch: 38543, sum loss: 5736.109375, avg loss: 2.932571, ppl: 18.775845 +epoch: 0, batch: 38544, sum loss: 4941.387695, avg loss: 2.671021, ppl: 14.454713 +epoch: 0, batch: 38545, sum loss: 4714.169922, avg loss: 2.816111, ppl: 16.711739 +epoch: 0, batch: 38546, sum loss: 4975.182129, avg loss: 2.991691, ppl: 19.919340 +epoch: 0, batch: 38547, sum loss: 5364.912598, avg loss: 3.119135, ppl: 22.626802 +epoch: 0, batch: 38548, sum loss: 5112.961914, avg loss: 2.970925, ppl: 19.509954 +epoch: 0, batch: 38549, sum loss: 5442.191895, avg loss: 2.856794, ppl: 17.405628 +epoch: 0, batch: 38550, sum loss: 5141.713379, avg loss: 2.831340, ppl: 16.968180 +epoch: 0, batch: 38551, sum loss: 5773.082520, avg loss: 3.205487, ppl: 24.667517 +epoch: 0, batch: 38552, sum loss: 5456.522461, avg loss: 3.141349, ppl: 23.135046 +epoch: 0, batch: 38553, sum loss: 5024.226562, avg loss: 2.825774, ppl: 16.874008 +epoch: 0, batch: 38554, sum loss: 3747.194336, avg loss: 2.530179, ppl: 12.555748 +epoch: 0, batch: 38555, sum loss: 4913.440918, avg loss: 2.912532, ppl: 18.403334 +epoch: 0, batch: 38556, sum loss: 5367.145020, avg loss: 2.899592, ppl: 18.166735 +epoch: 0, batch: 38557, sum loss: 5429.698242, avg loss: 3.065894, ppl: 21.453630 +epoch: 0, batch: 38558, sum loss: 5199.937988, avg loss: 2.947811, ppl: 19.064169 +epoch: 0, batch: 38559, sum loss: 5314.087891, avg loss: 2.791013, ppl: 16.297518 +epoch: 0, batch: 38560, sum loss: 5992.393066, avg loss: 3.034123, ppl: 20.782747 +epoch: 0, batch: 38561, sum loss: 4389.720703, avg loss: 2.867225, ppl: 17.588139 +epoch: 0, batch: 38562, sum loss: 5678.655273, avg loss: 2.984054, ppl: 19.767799 +epoch: 0, batch: 38563, sum loss: 4121.025391, avg loss: 2.650177, ppl: 14.156548 +epoch: 0, batch: 38564, sum loss: 5616.850098, avg loss: 3.091277, ppl: 22.005157 +epoch: 0, batch: 38565, sum loss: 5887.269531, avg loss: 2.881679, ppl: 17.844200 +epoch: 0, batch: 38566, sum loss: 5276.381836, avg loss: 2.864485, ppl: 17.540022 +epoch: 0, batch: 38567, sum loss: 4628.499023, avg loss: 2.986129, ppl: 19.808846 +epoch: 0, batch: 38568, sum loss: 5345.990234, avg loss: 2.924502, ppl: 18.624956 +epoch: 0, batch: 38569, sum loss: 4809.848145, avg loss: 2.993061, ppl: 19.946638 +epoch: 0, batch: 38570, sum loss: 5699.680664, avg loss: 3.203868, ppl: 24.627598 +epoch: 0, batch: 38571, sum loss: 5001.766602, avg loss: 2.881202, ppl: 17.835699 +epoch: 0, batch: 38572, sum loss: 5248.519531, avg loss: 2.874326, ppl: 17.713486 +epoch: 0, batch: 38573, sum loss: 5252.311523, avg loss: 3.060788, ppl: 21.344362 +epoch: 0, batch: 38574, sum loss: 4268.366699, avg loss: 2.752009, ppl: 15.674096 +epoch: 0, batch: 38575, sum loss: 5348.912109, avg loss: 2.793166, ppl: 16.332642 +epoch: 0, batch: 38576, sum loss: 4016.017578, avg loss: 2.652588, ppl: 14.190712 +epoch: 0, batch: 38577, sum loss: 4653.649414, avg loss: 2.774985, ppl: 16.038380 +epoch: 0, batch: 38578, sum loss: 4163.309082, avg loss: 2.782961, ppl: 16.166815 +epoch: 0, batch: 38579, sum loss: 4190.306641, avg loss: 2.984549, ppl: 19.777576 +epoch: 0, batch: 38580, sum loss: 4915.687988, avg loss: 2.808964, ppl: 16.592728 +epoch: 0, batch: 38581, sum loss: 4871.475586, avg loss: 2.730648, ppl: 15.342823 +epoch: 0, batch: 38582, sum loss: 4468.336914, avg loss: 2.721277, ppl: 15.199723 +epoch: 0, batch: 38583, sum loss: 5392.878418, avg loss: 3.108287, ppl: 22.382677 +epoch: 0, batch: 38584, sum loss: 5143.044922, avg loss: 2.964291, ppl: 19.380959 +epoch: 0, batch: 38585, sum loss: 5209.433594, avg loss: 2.923363, ppl: 18.603756 +epoch: 0, batch: 38586, sum loss: 5064.287109, avg loss: 2.934118, ppl: 18.804901 +epoch: 0, batch: 38587, sum loss: 4799.860352, avg loss: 2.816819, ppl: 16.723576 +epoch: 0, batch: 38588, sum loss: 5348.519531, avg loss: 2.841934, ppl: 17.148895 +epoch: 0, batch: 38589, sum loss: 6341.363281, avg loss: 3.217333, ppl: 24.961454 +epoch: 0, batch: 38590, sum loss: 4880.178223, avg loss: 2.829089, ppl: 16.930029 +epoch: 0, batch: 38591, sum loss: 4700.014160, avg loss: 2.839888, ppl: 17.113846 +epoch: 0, batch: 38592, sum loss: 5218.152832, avg loss: 3.135909, ppl: 23.009550 +epoch: 0, batch: 38593, sum loss: 5322.339355, avg loss: 2.914753, ppl: 18.444263 +epoch: 0, batch: 38594, sum loss: 5293.084961, avg loss: 3.093562, ppl: 22.055508 +epoch: 0, batch: 38595, sum loss: 5800.199707, avg loss: 2.875657, ppl: 17.737070 +epoch: 0, batch: 38596, sum loss: 5641.813477, avg loss: 3.123928, ppl: 22.735506 +epoch: 0, batch: 38597, sum loss: 4616.642090, avg loss: 2.712481, ppl: 15.066603 +epoch: 0, batch: 38598, sum loss: 5492.030273, avg loss: 3.080219, ppl: 21.763168 +epoch: 0, batch: 38599, sum loss: 5106.133789, avg loss: 2.770555, ppl: 15.967502 +epoch: 0, batch: 38600, sum loss: 5507.871094, avg loss: 2.991782, ppl: 19.921154 +epoch: 0, batch: 38601, sum loss: 4957.869141, avg loss: 2.887518, ppl: 17.948713 +epoch: 0, batch: 38602, sum loss: 4647.028320, avg loss: 2.950494, ppl: 19.115395 +epoch: 0, batch: 38603, sum loss: 4784.091309, avg loss: 2.897693, ppl: 18.132269 +epoch: 0, batch: 38604, sum loss: 5111.285156, avg loss: 2.737700, ppl: 15.451398 +epoch: 0, batch: 38605, sum loss: 4674.893066, avg loss: 2.737057, ppl: 15.441470 +epoch: 0, batch: 38606, sum loss: 4418.125977, avg loss: 2.830318, ppl: 16.950846 +epoch: 0, batch: 38607, sum loss: 4674.740234, avg loss: 2.657613, ppl: 14.262198 +epoch: 0, batch: 38608, sum loss: 4856.018066, avg loss: 2.878493, ppl: 17.787453 +epoch: 0, batch: 38609, sum loss: 4643.551758, avg loss: 2.855813, ppl: 17.388565 +epoch: 0, batch: 38610, sum loss: 4564.928223, avg loss: 2.913164, ppl: 18.414978 +epoch: 0, batch: 38611, sum loss: 5017.439453, avg loss: 2.930747, ppl: 18.741631 +epoch: 0, batch: 38612, sum loss: 4713.750000, avg loss: 2.907927, ppl: 18.318785 +epoch: 0, batch: 38613, sum loss: 4602.351074, avg loss: 2.759203, ppl: 15.787262 +epoch: 0, batch: 38614, sum loss: 4534.626953, avg loss: 2.567739, ppl: 13.036313 +epoch: 0, batch: 38615, sum loss: 4742.221680, avg loss: 2.794474, ppl: 16.354019 +epoch: 0, batch: 38616, sum loss: 5418.751465, avg loss: 2.879251, ppl: 17.800943 +epoch: 0, batch: 38617, sum loss: 4619.879883, avg loss: 2.891039, ppl: 18.012012 +epoch: 0, batch: 38618, sum loss: 5573.449707, avg loss: 3.062335, ppl: 21.377420 +epoch: 0, batch: 38619, sum loss: 6547.790527, avg loss: 3.222338, ppl: 25.086704 +epoch: 0, batch: 38620, sum loss: 5136.174805, avg loss: 2.980949, ppl: 19.706509 +epoch: 0, batch: 38621, sum loss: 5334.219727, avg loss: 2.707726, ppl: 14.995134 +epoch: 0, batch: 38622, sum loss: 4756.716309, avg loss: 2.929013, ppl: 18.709146 +epoch: 0, batch: 38623, sum loss: 4177.974609, avg loss: 2.752289, ppl: 15.678480 +epoch: 0, batch: 38624, sum loss: 4920.525879, avg loss: 3.077252, ppl: 21.698690 +epoch: 0, batch: 38625, sum loss: 4270.049805, avg loss: 2.491278, ppl: 12.076696 +epoch: 0, batch: 38626, sum loss: 4699.006836, avg loss: 2.760874, ppl: 15.813651 +epoch: 0, batch: 38627, sum loss: 4572.829590, avg loss: 2.819254, ppl: 16.764334 +epoch: 0, batch: 38628, sum loss: 5273.904785, avg loss: 2.838485, ppl: 17.089851 +epoch: 0, batch: 38629, sum loss: 5262.632812, avg loss: 3.081167, ppl: 21.783798 +epoch: 0, batch: 38630, sum loss: 5219.486328, avg loss: 2.798652, ppl: 16.422497 +epoch: 0, batch: 38631, sum loss: 5408.725586, avg loss: 2.914184, ppl: 18.433765 +epoch: 0, batch: 38632, sum loss: 4634.736328, avg loss: 3.021340, ppl: 20.518776 +epoch: 0, batch: 38633, sum loss: 4805.684570, avg loss: 2.718148, ppl: 15.152227 +epoch: 0, batch: 38634, sum loss: 4317.549805, avg loss: 2.744787, ppl: 15.561295 +epoch: 0, batch: 38635, sum loss: 5190.726074, avg loss: 3.285270, ppl: 26.716190 +epoch: 0, batch: 38636, sum loss: 5009.523438, avg loss: 2.910821, ppl: 18.371880 +epoch: 0, batch: 38637, sum loss: 3921.942139, avg loss: 2.630411, ppl: 13.879469 +epoch: 0, batch: 38638, sum loss: 5781.794922, avg loss: 3.067265, ppl: 21.483072 +epoch: 0, batch: 38639, sum loss: 5721.558105, avg loss: 3.480267, ppl: 32.468376 +epoch: 0, batch: 38640, sum loss: 4617.847168, avg loss: 2.829563, ppl: 16.938065 +epoch: 0, batch: 38641, sum loss: 5224.746094, avg loss: 2.878648, ppl: 17.790205 +epoch: 0, batch: 38642, sum loss: 4584.804199, avg loss: 2.740469, ppl: 15.494246 +epoch: 0, batch: 38643, sum loss: 5978.149414, avg loss: 2.910491, ppl: 18.365822 +epoch: 0, batch: 38644, sum loss: 5002.320801, avg loss: 2.598608, ppl: 13.445010 +epoch: 0, batch: 38645, sum loss: 5287.687988, avg loss: 2.647816, ppl: 14.123156 +epoch: 0, batch: 38646, sum loss: 5088.737305, avg loss: 3.045325, ppl: 21.016857 +epoch: 0, batch: 38647, sum loss: 4591.970703, avg loss: 2.717142, ppl: 15.137004 +epoch: 0, batch: 38648, sum loss: 5798.086426, avg loss: 3.092313, ppl: 22.027966 +epoch: 0, batch: 38649, sum loss: 3955.957520, avg loss: 2.621576, ppl: 13.757382 +epoch: 0, batch: 38650, sum loss: 6255.293945, avg loss: 3.057328, ppl: 21.270657 +epoch: 0, batch: 38651, sum loss: 4933.363281, avg loss: 2.929551, ppl: 18.719221 +epoch: 0, batch: 38652, sum loss: 5460.080078, avg loss: 3.008309, ppl: 20.253117 +epoch: 0, batch: 38653, sum loss: 4504.366211, avg loss: 2.718386, ppl: 15.155848 +epoch: 0, batch: 38654, sum loss: 5324.132812, avg loss: 3.006286, ppl: 20.212194 +epoch: 0, batch: 38655, sum loss: 4735.787598, avg loss: 2.952486, ppl: 19.153511 +epoch: 0, batch: 38656, sum loss: 4997.682129, avg loss: 2.902254, ppl: 18.215162 +epoch: 0, batch: 38657, sum loss: 4080.309082, avg loss: 2.745834, ppl: 15.577599 +epoch: 0, batch: 38658, sum loss: 4747.250488, avg loss: 2.952270, ppl: 19.149374 +epoch: 0, batch: 38659, sum loss: 4796.575195, avg loss: 2.821515, ppl: 16.802288 +epoch: 0, batch: 38660, sum loss: 5014.102539, avg loss: 2.876708, ppl: 17.755730 +epoch: 0, batch: 38661, sum loss: 5200.748047, avg loss: 3.025450, ppl: 20.603270 +epoch: 0, batch: 38662, sum loss: 4456.445312, avg loss: 2.972946, ppl: 19.549421 +epoch: 0, batch: 38663, sum loss: 4507.633301, avg loss: 2.930841, ppl: 18.743387 +epoch: 0, batch: 38664, sum loss: 4859.409180, avg loss: 2.623871, ppl: 13.788999 +epoch: 0, batch: 38665, sum loss: 4257.801758, avg loss: 2.810430, ppl: 16.617067 +epoch: 0, batch: 38666, sum loss: 4603.374023, avg loss: 2.861016, ppl: 17.479269 +epoch: 0, batch: 38667, sum loss: 4180.969727, avg loss: 2.800382, ppl: 16.450924 +epoch: 0, batch: 38668, sum loss: 5496.874512, avg loss: 3.091606, ppl: 22.012394 +epoch: 0, batch: 38669, sum loss: 5049.842285, avg loss: 2.887274, ppl: 17.944326 +epoch: 0, batch: 38670, sum loss: 6125.708008, avg loss: 2.988150, ppl: 19.848934 +epoch: 0, batch: 38671, sum loss: 5317.083008, avg loss: 3.098533, ppl: 22.165419 +epoch: 0, batch: 38672, sum loss: 4991.300781, avg loss: 2.842427, ppl: 17.157349 +epoch: 0, batch: 38673, sum loss: 4370.761230, avg loss: 2.592385, ppl: 13.361602 +epoch: 0, batch: 38674, sum loss: 4473.346191, avg loss: 2.954654, ppl: 19.195084 +epoch: 0, batch: 38675, sum loss: 4505.190918, avg loss: 2.722170, ppl: 15.213298 +epoch: 0, batch: 38676, sum loss: 4998.185059, avg loss: 2.859374, ppl: 17.450596 +epoch: 0, batch: 38677, sum loss: 4194.072754, avg loss: 2.651120, ppl: 14.169906 +epoch: 0, batch: 38678, sum loss: 4699.720703, avg loss: 2.839710, ppl: 17.110811 +epoch: 0, batch: 38679, sum loss: 4889.323730, avg loss: 2.839329, ppl: 17.104277 +epoch: 0, batch: 38680, sum loss: 4249.335938, avg loss: 2.910504, ppl: 18.366055 +epoch: 0, batch: 38681, sum loss: 4320.039062, avg loss: 2.779948, ppl: 16.118183 +epoch: 0, batch: 38682, sum loss: 5469.419922, avg loss: 3.136135, ppl: 23.014746 +epoch: 0, batch: 38683, sum loss: 4771.442383, avg loss: 2.900573, ppl: 18.184559 +epoch: 0, batch: 38684, sum loss: 3888.624268, avg loss: 2.777589, ppl: 16.080202 +epoch: 0, batch: 38685, sum loss: 4992.354492, avg loss: 2.980510, ppl: 19.697865 +epoch: 0, batch: 38686, sum loss: 3972.864014, avg loss: 2.799763, ppl: 16.440758 +epoch: 0, batch: 38687, sum loss: 5161.826172, avg loss: 3.011567, ppl: 20.319218 +epoch: 0, batch: 38688, sum loss: 4384.765625, avg loss: 2.672008, ppl: 14.468998 +epoch: 0, batch: 38689, sum loss: 4144.787598, avg loss: 2.606785, ppl: 13.555394 +epoch: 0, batch: 38690, sum loss: 4948.267578, avg loss: 2.934916, ppl: 18.819914 +epoch: 0, batch: 38691, sum loss: 4696.582031, avg loss: 2.980065, ppl: 19.689089 +epoch: 0, batch: 38692, sum loss: 5207.168457, avg loss: 2.927020, ppl: 18.671902 +epoch: 0, batch: 38693, sum loss: 4921.089355, avg loss: 2.927477, ppl: 18.680443 +epoch: 0, batch: 38694, sum loss: 4816.609375, avg loss: 2.848379, ppl: 17.259787 +epoch: 0, batch: 38695, sum loss: 4337.939453, avg loss: 3.027174, ppl: 20.638815 +epoch: 0, batch: 38696, sum loss: 3952.290527, avg loss: 2.718219, ppl: 15.153311 +epoch: 0, batch: 38697, sum loss: 4638.362793, avg loss: 2.751105, ppl: 15.659924 +epoch: 0, batch: 38698, sum loss: 4707.657715, avg loss: 2.849672, ppl: 17.282110 +epoch: 0, batch: 38699, sum loss: 5747.570801, avg loss: 3.318459, ppl: 27.617754 +epoch: 0, batch: 38700, sum loss: 4069.040527, avg loss: 2.698303, ppl: 14.854499 +epoch: 0, batch: 38701, sum loss: 5517.038086, avg loss: 3.028012, ppl: 20.656134 +epoch: 0, batch: 38702, sum loss: 4853.140137, avg loss: 2.762174, ppl: 15.834231 +epoch: 0, batch: 38703, sum loss: 6162.053711, avg loss: 3.219464, ppl: 25.014704 +epoch: 0, batch: 38704, sum loss: 5366.427734, avg loss: 3.045646, ppl: 21.023607 +epoch: 0, batch: 38705, sum loss: 5432.993652, avg loss: 3.004974, ppl: 20.185699 +epoch: 0, batch: 38706, sum loss: 5150.156250, avg loss: 3.052849, ppl: 21.175589 +epoch: 0, batch: 38707, sum loss: 4858.880859, avg loss: 2.735856, ppl: 15.422944 +epoch: 0, batch: 38708, sum loss: 4783.499512, avg loss: 2.888587, ppl: 17.967894 +epoch: 0, batch: 38709, sum loss: 5813.089355, avg loss: 3.258458, ppl: 26.009409 +epoch: 0, batch: 38710, sum loss: 5118.865723, avg loss: 2.981285, ppl: 19.713123 +epoch: 0, batch: 38711, sum loss: 4813.269043, avg loss: 2.934920, ppl: 18.820000 +epoch: 0, batch: 38712, sum loss: 5041.441895, avg loss: 2.851494, ppl: 17.313635 +epoch: 0, batch: 38713, sum loss: 4549.297363, avg loss: 2.829165, ppl: 16.931316 +epoch: 0, batch: 38714, sum loss: 5302.248535, avg loss: 3.028126, ppl: 20.658482 +epoch: 0, batch: 38715, sum loss: 5064.254883, avg loss: 2.843490, ppl: 17.175598 +epoch: 0, batch: 38716, sum loss: 5403.629883, avg loss: 3.156326, ppl: 23.484152 +epoch: 0, batch: 38717, sum loss: 5356.306152, avg loss: 3.024453, ppl: 20.582741 +epoch: 0, batch: 38718, sum loss: 4343.020508, avg loss: 2.636928, ppl: 13.970219 +epoch: 0, batch: 38719, sum loss: 5067.498047, avg loss: 2.930884, ppl: 18.744190 +epoch: 0, batch: 38720, sum loss: 4924.951660, avg loss: 3.010362, ppl: 20.294739 +epoch: 0, batch: 38721, sum loss: 5026.275391, avg loss: 2.923953, ppl: 18.614731 +epoch: 0, batch: 38722, sum loss: 4861.105469, avg loss: 3.011837, ppl: 20.324713 +epoch: 0, batch: 38723, sum loss: 4392.875000, avg loss: 2.745547, ppl: 15.573127 +epoch: 0, batch: 38724, sum loss: 4981.728027, avg loss: 2.688466, ppl: 14.709100 +epoch: 0, batch: 38725, sum loss: 5215.201660, avg loss: 3.080450, ppl: 21.768198 +epoch: 0, batch: 38726, sum loss: 4520.945312, avg loss: 2.870441, ppl: 17.644806 +epoch: 0, batch: 38727, sum loss: 4267.739746, avg loss: 2.720038, ppl: 15.180902 +epoch: 0, batch: 38728, sum loss: 4282.954102, avg loss: 2.815881, ppl: 16.707882 +epoch: 0, batch: 38729, sum loss: 4349.019531, avg loss: 2.738678, ppl: 15.466517 +epoch: 0, batch: 38730, sum loss: 5766.599121, avg loss: 3.230588, ppl: 25.294525 +epoch: 0, batch: 38731, sum loss: 4296.325195, avg loss: 2.768251, ppl: 15.930746 +epoch: 0, batch: 38732, sum loss: 4725.520020, avg loss: 2.822891, ppl: 16.825430 +epoch: 0, batch: 38733, sum loss: 4872.102539, avg loss: 2.865943, ppl: 17.565609 +epoch: 0, batch: 38734, sum loss: 4385.604004, avg loss: 2.621401, ppl: 13.754982 +epoch: 0, batch: 38735, sum loss: 4820.768066, avg loss: 2.780143, ppl: 16.121326 +epoch: 0, batch: 38736, sum loss: 5301.527344, avg loss: 2.991833, ppl: 19.922161 +epoch: 0, batch: 38737, sum loss: 4721.517578, avg loss: 2.849437, ppl: 17.278055 +epoch: 0, batch: 38738, sum loss: 5056.021973, avg loss: 2.894117, ppl: 18.067533 +epoch: 0, batch: 38739, sum loss: 5315.241211, avg loss: 2.807840, ppl: 16.574078 +epoch: 0, batch: 38740, sum loss: 4909.359863, avg loss: 2.945027, ppl: 19.011173 +epoch: 0, batch: 38741, sum loss: 5319.768555, avg loss: 2.976927, ppl: 19.627409 +epoch: 0, batch: 38742, sum loss: 4792.477051, avg loss: 2.765422, ppl: 15.885748 +epoch: 0, batch: 38743, sum loss: 4984.645996, avg loss: 2.949495, ppl: 19.096300 +epoch: 0, batch: 38744, sum loss: 4347.769043, avg loss: 2.743072, ppl: 15.534631 +epoch: 0, batch: 38745, sum loss: 4662.016602, avg loss: 3.051058, ppl: 21.137697 +epoch: 0, batch: 38746, sum loss: 4347.854492, avg loss: 2.812325, ppl: 16.648581 +epoch: 0, batch: 38747, sum loss: 5579.765625, avg loss: 3.150630, ppl: 23.350771 +epoch: 0, batch: 38748, sum loss: 5570.301758, avg loss: 2.974000, ppl: 19.570044 +epoch: 0, batch: 38749, sum loss: 4288.223633, avg loss: 2.634044, ppl: 13.929988 +epoch: 0, batch: 38750, sum loss: 4439.701172, avg loss: 2.820649, ppl: 16.787737 +epoch: 0, batch: 38751, sum loss: 4070.290039, avg loss: 2.484914, ppl: 12.000092 +epoch: 0, batch: 38752, sum loss: 3432.071045, avg loss: 2.487008, ppl: 12.025244 +epoch: 0, batch: 38753, sum loss: 5802.719238, avg loss: 3.075103, ppl: 21.652107 +epoch: 0, batch: 38754, sum loss: 5605.610352, avg loss: 2.978539, ppl: 19.659073 +epoch: 0, batch: 38755, sum loss: 4922.709473, avg loss: 2.987081, ppl: 19.827723 +epoch: 0, batch: 38756, sum loss: 5575.419922, avg loss: 3.121736, ppl: 22.685724 +epoch: 0, batch: 38757, sum loss: 4025.980713, avg loss: 2.766997, ppl: 15.910784 +epoch: 0, batch: 38758, sum loss: 4431.880371, avg loss: 2.936965, ppl: 18.858528 +epoch: 0, batch: 38759, sum loss: 5649.785645, avg loss: 2.994057, ppl: 19.966522 +epoch: 0, batch: 38760, sum loss: 5011.796387, avg loss: 2.908762, ppl: 18.334082 +epoch: 0, batch: 38761, sum loss: 5890.849609, avg loss: 2.963204, ppl: 19.359900 +epoch: 0, batch: 38762, sum loss: 5618.003906, avg loss: 3.080046, ppl: 21.759401 +epoch: 0, batch: 38763, sum loss: 5706.955078, avg loss: 3.177592, ppl: 23.988920 +epoch: 0, batch: 38764, sum loss: 4865.185059, avg loss: 2.995804, ppl: 20.001427 +epoch: 0, batch: 38765, sum loss: 4434.765625, avg loss: 2.803265, ppl: 16.498428 +epoch: 0, batch: 38766, sum loss: 4642.736816, avg loss: 2.964710, ppl: 19.389090 +epoch: 0, batch: 38767, sum loss: 5503.284180, avg loss: 2.864802, ppl: 17.545576 +epoch: 0, batch: 38768, sum loss: 5576.255859, avg loss: 2.916452, ppl: 18.475615 +epoch: 0, batch: 38769, sum loss: 4687.812988, avg loss: 2.695695, ppl: 14.815807 +epoch: 0, batch: 38770, sum loss: 3866.224609, avg loss: 2.615849, ppl: 13.678825 +epoch: 0, batch: 38771, sum loss: 4193.499512, avg loss: 2.751640, ppl: 15.668308 +epoch: 0, batch: 38772, sum loss: 4363.637207, avg loss: 2.556319, ppl: 12.888294 +epoch: 0, batch: 38773, sum loss: 5010.099121, avg loss: 2.872763, ppl: 17.685820 +epoch: 0, batch: 38774, sum loss: 4748.443848, avg loss: 2.911370, ppl: 18.381956 +epoch: 0, batch: 38775, sum loss: 3889.687744, avg loss: 2.851677, ppl: 17.316801 +epoch: 0, batch: 38776, sum loss: 6269.341309, avg loss: 2.957237, ppl: 19.244717 +epoch: 0, batch: 38777, sum loss: 5774.365723, avg loss: 2.968826, ppl: 19.469042 +epoch: 0, batch: 38778, sum loss: 5226.020020, avg loss: 2.981187, ppl: 19.711193 +epoch: 0, batch: 38779, sum loss: 4789.173340, avg loss: 2.930951, ppl: 18.745441 +epoch: 0, batch: 38780, sum loss: 4614.593262, avg loss: 3.014104, ppl: 20.370832 +epoch: 0, batch: 38781, sum loss: 5321.885742, avg loss: 3.020367, ppl: 20.498806 +epoch: 0, batch: 38782, sum loss: 4970.671875, avg loss: 2.884894, ppl: 17.901663 +epoch: 0, batch: 38783, sum loss: 5474.814453, avg loss: 2.902871, ppl: 18.226400 +epoch: 0, batch: 38784, sum loss: 5092.990723, avg loss: 3.086661, ppl: 21.903822 +epoch: 0, batch: 38785, sum loss: 4733.709961, avg loss: 2.799356, ppl: 16.434052 +epoch: 0, batch: 38786, sum loss: 4751.490723, avg loss: 2.956746, ppl: 19.235281 +epoch: 0, batch: 38787, sum loss: 5236.916992, avg loss: 2.787077, ppl: 16.233496 +epoch: 0, batch: 38788, sum loss: 5010.150391, avg loss: 2.901072, ppl: 18.193630 +epoch: 0, batch: 38789, sum loss: 5260.361816, avg loss: 2.883970, ppl: 17.885141 +epoch: 0, batch: 38790, sum loss: 3770.797119, avg loss: 2.752407, ppl: 15.680326 +epoch: 0, batch: 38791, sum loss: 4931.328125, avg loss: 2.855430, ppl: 17.381916 +epoch: 0, batch: 38792, sum loss: 4371.004883, avg loss: 2.855000, ppl: 17.374437 +epoch: 0, batch: 38793, sum loss: 5451.260254, avg loss: 2.980460, ppl: 19.696869 +epoch: 0, batch: 38794, sum loss: 3936.034180, avg loss: 2.697762, ppl: 14.846461 +epoch: 0, batch: 38795, sum loss: 5335.957520, avg loss: 3.212497, ppl: 24.841038 +epoch: 0, batch: 38796, sum loss: 5325.347656, avg loss: 3.092536, ppl: 22.032887 +epoch: 0, batch: 38797, sum loss: 5451.491699, avg loss: 3.038736, ppl: 20.878828 +epoch: 0, batch: 38798, sum loss: 5482.662598, avg loss: 3.020751, ppl: 20.506681 +epoch: 0, batch: 38799, sum loss: 4540.811523, avg loss: 2.704474, ppl: 14.946449 +epoch: 0, batch: 38800, sum loss: 6392.489258, avg loss: 3.163033, ppl: 23.642195 +epoch: 0, batch: 38801, sum loss: 5281.258301, avg loss: 2.825713, ppl: 16.872974 +epoch: 0, batch: 38802, sum loss: 4653.549805, avg loss: 2.713440, ppl: 15.081064 +epoch: 0, batch: 38803, sum loss: 4739.772461, avg loss: 2.845001, ppl: 17.201584 +epoch: 0, batch: 38804, sum loss: 5181.500000, avg loss: 2.954105, ppl: 19.184544 +epoch: 0, batch: 38805, sum loss: 4538.287109, avg loss: 2.668011, ppl: 14.411280 +epoch: 0, batch: 38806, sum loss: 5769.120605, avg loss: 2.982999, ppl: 19.746956 +epoch: 0, batch: 38807, sum loss: 4965.855469, avg loss: 2.746602, ppl: 15.589561 +epoch: 0, batch: 38808, sum loss: 3975.858154, avg loss: 2.736310, ppl: 15.429940 +epoch: 0, batch: 38809, sum loss: 4525.337891, avg loss: 3.024959, ppl: 20.593153 +epoch: 0, batch: 38810, sum loss: 4531.217773, avg loss: 2.752866, ppl: 15.687532 +epoch: 0, batch: 38811, sum loss: 4695.642090, avg loss: 2.832112, ppl: 16.981293 +epoch: 0, batch: 38812, sum loss: 5425.930664, avg loss: 2.901567, ppl: 18.202650 +epoch: 0, batch: 38813, sum loss: 5228.611328, avg loss: 3.057667, ppl: 21.277868 +epoch: 0, batch: 38814, sum loss: 5740.139648, avg loss: 3.129847, ppl: 22.870481 +epoch: 0, batch: 38815, sum loss: 5573.601074, avg loss: 2.913539, ppl: 18.421886 +epoch: 0, batch: 38816, sum loss: 5130.014160, avg loss: 2.821790, ppl: 16.806908 +epoch: 0, batch: 38817, sum loss: 5032.549316, avg loss: 2.903952, ppl: 18.246115 +epoch: 0, batch: 38818, sum loss: 5443.150391, avg loss: 2.985821, ppl: 19.802744 +epoch: 0, batch: 38819, sum loss: 5188.923340, avg loss: 3.032685, ppl: 20.752874 +epoch: 0, batch: 38820, sum loss: 4647.896484, avg loss: 2.841013, ppl: 17.133108 +epoch: 0, batch: 38821, sum loss: 5392.719727, avg loss: 3.050181, ppl: 21.119165 +epoch: 0, batch: 38822, sum loss: 5674.923340, avg loss: 3.055963, ppl: 21.241627 +epoch: 0, batch: 38823, sum loss: 4702.417480, avg loss: 2.877856, ppl: 17.776129 +epoch: 0, batch: 38824, sum loss: 4173.250977, avg loss: 2.535389, ppl: 12.621345 +epoch: 0, batch: 38825, sum loss: 4487.615234, avg loss: 2.942698, ppl: 18.966959 +epoch: 0, batch: 38826, sum loss: 5050.827637, avg loss: 2.882893, ppl: 17.865877 +epoch: 0, batch: 38827, sum loss: 4468.851074, avg loss: 2.758550, ppl: 15.776953 +epoch: 0, batch: 38828, sum loss: 5627.003906, avg loss: 3.100278, ppl: 22.204115 +epoch: 0, batch: 38829, sum loss: 5088.792969, avg loss: 3.020055, ppl: 20.492420 +epoch: 0, batch: 38830, sum loss: 5257.404785, avg loss: 3.178600, ppl: 24.013119 +epoch: 0, batch: 38831, sum loss: 4410.308594, avg loss: 2.677783, ppl: 14.552794 +epoch: 0, batch: 38832, sum loss: 5559.743164, avg loss: 2.904777, ppl: 18.261168 +epoch: 0, batch: 38833, sum loss: 5607.512695, avg loss: 2.973231, ppl: 19.554993 +epoch: 0, batch: 38834, sum loss: 4798.214355, avg loss: 2.940082, ppl: 18.917404 +epoch: 0, batch: 38835, sum loss: 4414.521973, avg loss: 2.701666, ppl: 14.904540 +epoch: 0, batch: 38836, sum loss: 4590.749023, avg loss: 2.700441, ppl: 14.886290 +epoch: 0, batch: 38837, sum loss: 4841.372070, avg loss: 2.814751, ppl: 16.689022 +epoch: 0, batch: 38838, sum loss: 4806.575684, avg loss: 3.000359, ppl: 20.092754 +epoch: 0, batch: 38839, sum loss: 4077.209473, avg loss: 2.753011, ppl: 15.689806 +epoch: 0, batch: 38840, sum loss: 4655.113770, avg loss: 2.894971, ppl: 18.082979 +epoch: 0, batch: 38841, sum loss: 4898.103516, avg loss: 3.008663, ppl: 20.260292 +epoch: 0, batch: 38842, sum loss: 4402.926758, avg loss: 2.733040, ppl: 15.379564 +epoch: 0, batch: 38843, sum loss: 4372.180176, avg loss: 2.654633, ppl: 14.219760 +epoch: 0, batch: 38844, sum loss: 5058.947266, avg loss: 2.956720, ppl: 19.234776 +epoch: 0, batch: 38845, sum loss: 5440.564941, avg loss: 2.914068, ppl: 18.431625 +epoch: 0, batch: 38846, sum loss: 5186.389648, avg loss: 3.036528, ppl: 20.832783 +epoch: 0, batch: 38847, sum loss: 4155.667969, avg loss: 2.648609, ppl: 14.134366 +epoch: 0, batch: 38848, sum loss: 4399.220703, avg loss: 2.784317, ppl: 16.188757 +epoch: 0, batch: 38849, sum loss: 5354.588867, avg loss: 2.761521, ppl: 15.823891 +epoch: 0, batch: 38850, sum loss: 5576.197266, avg loss: 2.913374, ppl: 18.418833 +epoch: 0, batch: 38851, sum loss: 4720.149414, avg loss: 2.593489, ppl: 13.376356 +epoch: 0, batch: 38852, sum loss: 4022.153564, avg loss: 2.620296, ppl: 13.739783 +epoch: 0, batch: 38853, sum loss: 5030.897949, avg loss: 2.777967, ppl: 16.086285 +epoch: 0, batch: 38854, sum loss: 5129.083496, avg loss: 2.753131, ppl: 15.691689 +epoch: 0, batch: 38855, sum loss: 5593.187500, avg loss: 2.875675, ppl: 17.737392 +epoch: 0, batch: 38856, sum loss: 5571.376953, avg loss: 2.979346, ppl: 19.674946 +epoch: 0, batch: 38857, sum loss: 4896.431641, avg loss: 2.687394, ppl: 14.693334 +epoch: 0, batch: 38858, sum loss: 4220.480957, avg loss: 2.798727, ppl: 16.423731 +epoch: 0, batch: 38859, sum loss: 5026.704102, avg loss: 3.173424, ppl: 23.889147 +epoch: 0, batch: 38860, sum loss: 4303.238770, avg loss: 2.754954, ppl: 15.720323 +epoch: 0, batch: 38861, sum loss: 4359.597656, avg loss: 2.789250, ppl: 16.268812 +epoch: 0, batch: 38862, sum loss: 5189.446289, avg loss: 2.921986, ppl: 18.578140 +epoch: 0, batch: 38863, sum loss: 4974.500977, avg loss: 2.975180, ppl: 19.593153 +epoch: 0, batch: 38864, sum loss: 5858.340820, avg loss: 3.112827, ppl: 22.484520 +epoch: 0, batch: 38865, sum loss: 5531.002441, avg loss: 3.234504, ppl: 25.393780 +epoch: 0, batch: 38866, sum loss: 5354.197266, avg loss: 3.028392, ppl: 20.663984 +epoch: 0, batch: 38867, sum loss: 4690.766113, avg loss: 2.677378, ppl: 14.546904 +epoch: 0, batch: 38868, sum loss: 4270.763184, avg loss: 2.703015, ppl: 14.924656 +epoch: 0, batch: 38869, sum loss: 4802.979492, avg loss: 2.975824, ppl: 19.605770 +epoch: 0, batch: 38870, sum loss: 4005.391846, avg loss: 2.600904, ppl: 13.475911 +epoch: 0, batch: 38871, sum loss: 5753.330078, avg loss: 2.971761, ppl: 19.526279 +epoch: 0, batch: 38872, sum loss: 5144.651855, avg loss: 3.006810, ppl: 20.222790 +epoch: 0, batch: 38873, sum loss: 5708.655762, avg loss: 3.326723, ppl: 27.846928 +epoch: 0, batch: 38874, sum loss: 5280.068359, avg loss: 3.239306, ppl: 25.516001 +epoch: 0, batch: 38875, sum loss: 5349.601074, avg loss: 3.074483, ppl: 21.638700 +epoch: 0, batch: 38876, sum loss: 4049.539551, avg loss: 2.540489, ppl: 12.685872 +epoch: 0, batch: 38877, sum loss: 5004.303223, avg loss: 2.894334, ppl: 18.071459 +epoch: 0, batch: 38878, sum loss: 4491.625000, avg loss: 2.844601, ppl: 17.194695 +epoch: 0, batch: 38879, sum loss: 5994.200195, avg loss: 3.115489, ppl: 22.544447 +epoch: 0, batch: 38880, sum loss: 5041.001953, avg loss: 2.836805, ppl: 17.061161 +epoch: 0, batch: 38881, sum loss: 5606.284668, avg loss: 3.075307, ppl: 21.656532 +epoch: 0, batch: 38882, sum loss: 5194.069336, avg loss: 3.004089, ppl: 20.167833 +epoch: 0, batch: 38883, sum loss: 5509.422363, avg loss: 2.763000, ppl: 15.847318 +epoch: 0, batch: 38884, sum loss: 4773.455566, avg loss: 3.028842, ppl: 20.673283 +epoch: 0, batch: 38885, sum loss: 4992.113281, avg loss: 2.897338, ppl: 18.125828 +epoch: 0, batch: 38886, sum loss: 5066.421387, avg loss: 2.938760, ppl: 18.892401 +epoch: 0, batch: 38887, sum loss: 5712.768066, avg loss: 3.061505, ppl: 21.359671 +epoch: 0, batch: 38888, sum loss: 4819.338867, avg loss: 3.004575, ppl: 20.177649 +epoch: 0, batch: 38889, sum loss: 4730.171387, avg loss: 2.847785, ppl: 17.249537 +epoch: 0, batch: 38890, sum loss: 4298.429688, avg loss: 2.656631, ppl: 14.248213 +epoch: 0, batch: 38891, sum loss: 4695.467285, avg loss: 3.097274, ppl: 22.137524 +epoch: 0, batch: 38892, sum loss: 4469.369141, avg loss: 2.731888, ppl: 15.361867 +epoch: 0, batch: 38893, sum loss: 5032.204102, avg loss: 3.000718, ppl: 20.099960 +epoch: 0, batch: 38894, sum loss: 4591.729004, avg loss: 2.756140, ppl: 15.738969 +epoch: 0, batch: 38895, sum loss: 4402.516602, avg loss: 3.078683, ppl: 21.729769 +epoch: 0, batch: 38896, sum loss: 5147.577148, avg loss: 2.943154, ppl: 18.975607 +epoch: 0, batch: 38897, sum loss: 3896.683350, avg loss: 2.665310, ppl: 14.372406 +epoch: 0, batch: 38898, sum loss: 4663.833496, avg loss: 3.005048, ppl: 20.187176 +epoch: 0, batch: 38899, sum loss: 4433.410156, avg loss: 2.822031, ppl: 16.810955 +epoch: 0, batch: 38900, sum loss: 5455.420898, avg loss: 3.103197, ppl: 22.269039 +epoch: 0, batch: 38901, sum loss: 5508.864258, avg loss: 3.164196, ppl: 23.669695 +epoch: 0, batch: 38902, sum loss: 5649.673828, avg loss: 2.915209, ppl: 18.452662 +epoch: 0, batch: 38903, sum loss: 5121.133301, avg loss: 3.015980, ppl: 20.409077 +epoch: 0, batch: 38904, sum loss: 5027.125488, avg loss: 3.118564, ppl: 22.613886 +epoch: 0, batch: 38905, sum loss: 6238.630371, avg loss: 3.290417, ppl: 26.854059 +epoch: 0, batch: 38906, sum loss: 4978.000977, avg loss: 2.931685, ppl: 18.759212 +epoch: 0, batch: 38907, sum loss: 4671.715820, avg loss: 2.709812, ppl: 15.026453 +epoch: 0, batch: 38908, sum loss: 3948.621582, avg loss: 2.677032, ppl: 14.541862 +epoch: 0, batch: 38909, sum loss: 4728.317383, avg loss: 2.824563, ppl: 16.853571 +epoch: 0, batch: 38910, sum loss: 5557.852539, avg loss: 2.976889, ppl: 19.626671 +epoch: 0, batch: 38911, sum loss: 5162.427734, avg loss: 3.056500, ppl: 21.253035 +epoch: 0, batch: 38912, sum loss: 5758.786133, avg loss: 2.920277, ppl: 18.546421 +epoch: 0, batch: 38913, sum loss: 4173.568359, avg loss: 2.782379, ppl: 16.157412 +epoch: 0, batch: 38914, sum loss: 4697.598633, avg loss: 2.878431, ppl: 17.786337 +epoch: 0, batch: 38915, sum loss: 4923.735352, avg loss: 2.844445, ppl: 17.192022 +epoch: 0, batch: 38916, sum loss: 4741.157227, avg loss: 2.758090, ppl: 15.769702 +epoch: 0, batch: 38917, sum loss: 5186.650879, avg loss: 2.982548, ppl: 19.738045 +epoch: 0, batch: 38918, sum loss: 3884.912598, avg loss: 2.650009, ppl: 14.154161 +epoch: 0, batch: 38919, sum loss: 4609.617676, avg loss: 2.834943, ppl: 17.029430 +epoch: 0, batch: 38920, sum loss: 4304.740723, avg loss: 2.629652, ppl: 13.868946 +epoch: 0, batch: 38921, sum loss: 4931.999512, avg loss: 2.925267, ppl: 18.639196 +epoch: 0, batch: 38922, sum loss: 5214.901367, avg loss: 2.954618, ppl: 19.194397 +epoch: 0, batch: 38923, sum loss: 4678.171875, avg loss: 2.938550, ppl: 18.888443 +epoch: 0, batch: 38924, sum loss: 5754.511230, avg loss: 3.090500, ppl: 21.988071 +epoch: 0, batch: 38925, sum loss: 5497.050781, avg loss: 2.995668, ppl: 19.998718 +epoch: 0, batch: 38926, sum loss: 4536.343750, avg loss: 2.856640, ppl: 17.402952 +epoch: 0, batch: 38927, sum loss: 4780.473633, avg loss: 2.920265, ppl: 18.546202 +epoch: 0, batch: 38928, sum loss: 4617.321777, avg loss: 2.820600, ppl: 16.786917 +epoch: 0, batch: 38929, sum loss: 3985.073730, avg loss: 2.533422, ppl: 12.596543 +epoch: 0, batch: 38930, sum loss: 4639.668457, avg loss: 2.791618, ppl: 16.307377 +epoch: 0, batch: 38931, sum loss: 4327.281738, avg loss: 2.747480, ppl: 15.603269 +epoch: 0, batch: 38932, sum loss: 5132.690430, avg loss: 2.869028, ppl: 17.619877 +epoch: 0, batch: 38933, sum loss: 3594.430908, avg loss: 2.635213, ppl: 13.946288 +epoch: 0, batch: 38934, sum loss: 5136.166016, avg loss: 2.967167, ppl: 19.436775 +epoch: 0, batch: 38935, sum loss: 5191.224609, avg loss: 2.866496, ppl: 17.575333 +epoch: 0, batch: 38936, sum loss: 4941.871582, avg loss: 2.782585, ppl: 16.160749 +epoch: 0, batch: 38937, sum loss: 5178.875977, avg loss: 2.885168, ppl: 17.906572 +epoch: 0, batch: 38938, sum loss: 4820.200195, avg loss: 3.043056, ppl: 20.969223 +epoch: 0, batch: 38939, sum loss: 4580.528809, avg loss: 2.870006, ppl: 17.637117 +epoch: 0, batch: 38940, sum loss: 6026.320312, avg loss: 3.185159, ppl: 24.171125 +epoch: 0, batch: 38941, sum loss: 4947.215820, avg loss: 2.888042, ppl: 17.958113 +epoch: 0, batch: 38942, sum loss: 5264.390625, avg loss: 2.954204, ppl: 19.186438 +epoch: 0, batch: 38943, sum loss: 4521.321777, avg loss: 2.976512, ppl: 19.619265 +epoch: 0, batch: 38944, sum loss: 5097.103516, avg loss: 2.725724, ppl: 15.267460 +epoch: 0, batch: 38945, sum loss: 4706.409668, avg loss: 2.712628, ppl: 15.068827 +epoch: 0, batch: 38946, sum loss: 4926.743652, avg loss: 2.781899, ppl: 16.149664 +epoch: 0, batch: 38947, sum loss: 4805.315918, avg loss: 2.915847, ppl: 18.464447 +epoch: 0, batch: 38948, sum loss: 4787.559570, avg loss: 2.506576, ppl: 12.262868 +epoch: 0, batch: 38949, sum loss: 5348.971191, avg loss: 2.956867, ppl: 19.237597 +epoch: 0, batch: 38950, sum loss: 4344.685059, avg loss: 2.710346, ppl: 15.034480 +epoch: 0, batch: 38951, sum loss: 4400.770996, avg loss: 2.582612, ppl: 13.231654 +epoch: 0, batch: 38952, sum loss: 5137.566406, avg loss: 2.981756, ppl: 19.722427 +epoch: 0, batch: 38953, sum loss: 4921.529785, avg loss: 2.802694, ppl: 16.489002 +epoch: 0, batch: 38954, sum loss: 5162.502441, avg loss: 3.180839, ppl: 24.066944 +epoch: 0, batch: 38955, sum loss: 4316.281738, avg loss: 2.788296, ppl: 16.253296 +epoch: 0, batch: 38956, sum loss: 5917.148438, avg loss: 2.968966, ppl: 19.471766 +epoch: 0, batch: 38957, sum loss: 4839.636719, avg loss: 2.927790, ppl: 18.686287 +epoch: 0, batch: 38958, sum loss: 4123.344727, avg loss: 2.572267, ppl: 13.095482 +epoch: 0, batch: 38959, sum loss: 4806.436035, avg loss: 3.055585, ppl: 21.233612 +epoch: 0, batch: 38960, sum loss: 5763.784180, avg loss: 2.951246, ppl: 19.129780 +epoch: 0, batch: 38961, sum loss: 4454.242676, avg loss: 2.824504, ppl: 16.852583 +epoch: 0, batch: 38962, sum loss: 4872.385254, avg loss: 2.832782, ppl: 16.992670 +epoch: 0, batch: 38963, sum loss: 4899.754395, avg loss: 2.870389, ppl: 17.643885 +epoch: 0, batch: 38964, sum loss: 5667.564453, avg loss: 2.874018, ppl: 17.708035 +epoch: 0, batch: 38965, sum loss: 4699.128906, avg loss: 2.746423, ppl: 15.586775 +epoch: 0, batch: 38966, sum loss: 4383.632812, avg loss: 2.681121, ppl: 14.601451 +epoch: 0, batch: 38967, sum loss: 5079.827637, avg loss: 2.958549, ppl: 19.269981 +epoch: 0, batch: 38968, sum loss: 5413.009766, avg loss: 2.850453, ppl: 17.295612 +epoch: 0, batch: 38969, sum loss: 4800.612793, avg loss: 3.085227, ppl: 21.872427 +epoch: 0, batch: 38970, sum loss: 5750.936035, avg loss: 3.105257, ppl: 22.314955 +epoch: 0, batch: 38971, sum loss: 5264.528320, avg loss: 2.818270, ppl: 16.747850 +epoch: 0, batch: 38972, sum loss: 3918.961426, avg loss: 2.715843, ppl: 15.117348 +epoch: 0, batch: 38973, sum loss: 4607.260254, avg loss: 3.015223, ppl: 20.393635 +epoch: 0, batch: 38974, sum loss: 5148.370605, avg loss: 2.935217, ppl: 18.825586 +epoch: 0, batch: 38975, sum loss: 5521.346680, avg loss: 3.088001, ppl: 21.933201 +epoch: 0, batch: 38976, sum loss: 5517.555664, avg loss: 3.120790, ppl: 22.664267 +epoch: 0, batch: 38977, sum loss: 5562.191406, avg loss: 2.980810, ppl: 19.703770 +epoch: 0, batch: 38978, sum loss: 4481.511230, avg loss: 2.667566, ppl: 14.404869 +epoch: 0, batch: 38979, sum loss: 4884.000000, avg loss: 2.883117, ppl: 17.869883 +epoch: 0, batch: 38980, sum loss: 5910.473633, avg loss: 2.885973, ppl: 17.921005 +epoch: 0, batch: 38981, sum loss: 4602.674805, avg loss: 2.827196, ppl: 16.898010 +epoch: 0, batch: 38982, sum loss: 4186.063965, avg loss: 2.661198, ppl: 14.313421 +epoch: 0, batch: 38983, sum loss: 4145.042969, avg loss: 2.611873, ppl: 13.624551 +epoch: 0, batch: 38984, sum loss: 5408.492676, avg loss: 3.223178, ppl: 25.107784 +epoch: 0, batch: 38985, sum loss: 4531.752930, avg loss: 2.756541, ppl: 15.745282 +epoch: 0, batch: 38986, sum loss: 4906.667969, avg loss: 2.971937, ppl: 19.529716 +epoch: 0, batch: 38987, sum loss: 4431.353516, avg loss: 2.675938, ppl: 14.525970 +epoch: 0, batch: 38988, sum loss: 4673.037109, avg loss: 3.125777, ppl: 22.777592 +epoch: 0, batch: 38989, sum loss: 5109.779297, avg loss: 2.861019, ppl: 17.479322 +epoch: 0, batch: 38990, sum loss: 5614.631836, avg loss: 3.016997, ppl: 20.429855 +epoch: 0, batch: 38991, sum loss: 5871.173340, avg loss: 3.383962, ppl: 29.487360 +epoch: 0, batch: 38992, sum loss: 5236.866211, avg loss: 2.824631, ppl: 16.854725 +epoch: 0, batch: 38993, sum loss: 4319.563477, avg loss: 2.737366, ppl: 15.446245 +epoch: 0, batch: 38994, sum loss: 4529.934082, avg loss: 2.725592, ppl: 15.265450 +epoch: 0, batch: 38995, sum loss: 3937.591797, avg loss: 3.152596, ppl: 23.396719 +epoch: 0, batch: 38996, sum loss: 4171.355469, avg loss: 2.665403, ppl: 14.373739 +epoch: 0, batch: 38997, sum loss: 4880.456055, avg loss: 3.100671, ppl: 22.212847 +epoch: 0, batch: 38998, sum loss: 5509.792480, avg loss: 3.069522, ppl: 21.531616 +epoch: 0, batch: 38999, sum loss: 4761.991211, avg loss: 2.749418, ppl: 15.633523 +epoch: 0, batch: 39000, sum loss: 5535.307617, avg loss: 3.087177, ppl: 21.915119 +epoch: 0, batch: 39001, sum loss: 5291.675781, avg loss: 3.195456, ppl: 24.421314 +epoch: 0, batch: 39002, sum loss: 4630.140137, avg loss: 2.899274, ppl: 18.160954 +epoch: 0, batch: 39003, sum loss: 3816.246338, avg loss: 2.452600, ppl: 11.618521 +epoch: 0, batch: 39004, sum loss: 4276.043945, avg loss: 2.583712, ppl: 13.246222 +epoch: 0, batch: 39005, sum loss: 4441.110840, avg loss: 2.680212, ppl: 14.588182 +epoch: 0, batch: 39006, sum loss: 4332.958984, avg loss: 2.961694, ppl: 19.330700 +epoch: 0, batch: 39007, sum loss: 4872.458008, avg loss: 2.765300, ppl: 15.883801 +epoch: 0, batch: 39008, sum loss: 4210.820312, avg loss: 2.744994, ppl: 15.564516 +epoch: 0, batch: 39009, sum loss: 5539.438965, avg loss: 3.053715, ppl: 21.193933 +epoch: 0, batch: 39010, sum loss: 5418.215332, avg loss: 2.908328, ppl: 18.326132 +epoch: 0, batch: 39011, sum loss: 5397.438965, avg loss: 3.134401, ppl: 22.974873 +epoch: 0, batch: 39012, sum loss: 5229.814941, avg loss: 2.959714, ppl: 19.292458 +epoch: 0, batch: 39013, sum loss: 5721.606934, avg loss: 2.970720, ppl: 19.505964 +epoch: 0, batch: 39014, sum loss: 4760.334473, avg loss: 2.960407, ppl: 19.305828 +epoch: 0, batch: 39015, sum loss: 5497.702148, avg loss: 2.870863, ppl: 17.652241 +epoch: 0, batch: 39016, sum loss: 6875.779297, avg loss: 3.168562, ppl: 23.773272 +epoch: 0, batch: 39017, sum loss: 5502.318848, avg loss: 2.804444, ppl: 16.517887 +epoch: 0, batch: 39018, sum loss: 5384.027832, avg loss: 3.101398, ppl: 22.229015 +epoch: 0, batch: 39019, sum loss: 4565.288086, avg loss: 2.766841, ppl: 15.908303 +epoch: 0, batch: 39020, sum loss: 5062.765137, avg loss: 2.876571, ppl: 17.753292 +epoch: 0, batch: 39021, sum loss: 5071.640625, avg loss: 2.933280, ppl: 18.789154 +epoch: 0, batch: 39022, sum loss: 4550.696777, avg loss: 3.003760, ppl: 20.161207 +epoch: 0, batch: 39023, sum loss: 5362.071289, avg loss: 2.933300, ppl: 18.789526 +epoch: 0, batch: 39024, sum loss: 5330.164551, avg loss: 3.019923, ppl: 20.489717 +epoch: 0, batch: 39025, sum loss: 5253.284180, avg loss: 2.825866, ppl: 16.875546 +epoch: 0, batch: 39026, sum loss: 5160.262695, avg loss: 2.778817, ppl: 16.099958 +epoch: 0, batch: 39027, sum loss: 5649.452148, avg loss: 3.000240, ppl: 20.090361 +epoch: 0, batch: 39028, sum loss: 4378.249023, avg loss: 2.656704, ppl: 14.249252 +epoch: 0, batch: 39029, sum loss: 3980.664307, avg loss: 2.695101, ppl: 14.807018 +epoch: 0, batch: 39030, sum loss: 4848.391113, avg loss: 2.653744, ppl: 14.207137 +epoch: 0, batch: 39031, sum loss: 4298.705078, avg loss: 2.811449, ppl: 16.634001 +epoch: 0, batch: 39032, sum loss: 3851.043701, avg loss: 2.593295, ppl: 13.373770 +epoch: 0, batch: 39033, sum loss: 4801.180664, avg loss: 2.718675, ppl: 15.160228 +epoch: 0, batch: 39034, sum loss: 4575.947266, avg loss: 2.937065, ppl: 18.860413 +epoch: 0, batch: 39035, sum loss: 4651.493164, avg loss: 2.824222, ppl: 16.847826 +epoch: 0, batch: 39036, sum loss: 4299.910156, avg loss: 2.612339, ppl: 13.630899 +epoch: 0, batch: 39037, sum loss: 3913.427246, avg loss: 2.469039, ppl: 11.811093 +epoch: 0, batch: 39038, sum loss: 5414.292480, avg loss: 2.965111, ppl: 19.396856 +epoch: 0, batch: 39039, sum loss: 5551.165039, avg loss: 3.035082, ppl: 20.802687 +epoch: 0, batch: 39040, sum loss: 4810.308105, avg loss: 2.858175, ppl: 17.429686 +epoch: 0, batch: 39041, sum loss: 5085.360840, avg loss: 2.904261, ppl: 18.251753 +epoch: 0, batch: 39042, sum loss: 3793.045410, avg loss: 2.669279, ppl: 14.429560 +epoch: 0, batch: 39043, sum loss: 6371.295410, avg loss: 3.060180, ppl: 21.331406 +epoch: 0, batch: 39044, sum loss: 5101.873047, avg loss: 2.997575, ppl: 20.036894 +epoch: 0, batch: 39045, sum loss: 5542.875000, avg loss: 3.087953, ppl: 21.932129 +epoch: 0, batch: 39046, sum loss: 5668.972168, avg loss: 2.866012, ppl: 17.566820 +epoch: 0, batch: 39047, sum loss: 5912.480469, avg loss: 2.902543, ppl: 18.220423 +epoch: 0, batch: 39048, sum loss: 4298.366211, avg loss: 2.640274, ppl: 14.017044 +epoch: 0, batch: 39049, sum loss: 5844.027832, avg loss: 3.017051, ppl: 20.430952 +epoch: 0, batch: 39050, sum loss: 4666.960938, avg loss: 2.763150, ppl: 15.849698 +epoch: 0, batch: 39051, sum loss: 5014.520020, avg loss: 2.891880, ppl: 18.027170 +epoch: 0, batch: 39052, sum loss: 4952.479492, avg loss: 3.156456, ppl: 23.487209 +epoch: 0, batch: 39053, sum loss: 4274.035645, avg loss: 2.705086, ppl: 14.955599 +epoch: 0, batch: 39054, sum loss: 4707.035156, avg loss: 2.837273, ppl: 17.069149 +epoch: 0, batch: 39055, sum loss: 4436.755859, avg loss: 2.740430, ppl: 15.493644 +epoch: 0, batch: 39056, sum loss: 4752.361328, avg loss: 2.970226, ppl: 19.496321 +epoch: 0, batch: 39057, sum loss: 5266.062500, avg loss: 2.983605, ppl: 19.758917 +epoch: 0, batch: 39058, sum loss: 5478.940430, avg loss: 3.012062, ppl: 20.329273 +epoch: 0, batch: 39059, sum loss: 4692.425781, avg loss: 2.873500, ppl: 17.698858 +epoch: 0, batch: 39060, sum loss: 5099.831055, avg loss: 2.847477, ppl: 17.244215 +epoch: 0, batch: 39061, sum loss: 5270.818359, avg loss: 2.912055, ppl: 18.394552 +epoch: 0, batch: 39062, sum loss: 4999.338379, avg loss: 2.908283, ppl: 18.325308 +epoch: 0, batch: 39063, sum loss: 5198.708008, avg loss: 3.105561, ppl: 22.321728 +epoch: 0, batch: 39064, sum loss: 4623.738770, avg loss: 2.628618, ppl: 13.854609 +epoch: 0, batch: 39065, sum loss: 5308.294922, avg loss: 2.940884, ppl: 18.932568 +epoch: 0, batch: 39066, sum loss: 6492.950195, avg loss: 3.277612, ppl: 26.512390 +epoch: 0, batch: 39067, sum loss: 5768.442871, avg loss: 3.005963, ppl: 20.205662 +epoch: 0, batch: 39068, sum loss: 4584.703613, avg loss: 2.795551, ppl: 16.371649 +epoch: 0, batch: 39069, sum loss: 4981.667969, avg loss: 2.887924, ppl: 17.955990 +epoch: 0, batch: 39070, sum loss: 4850.220215, avg loss: 2.902586, ppl: 18.221195 +epoch: 0, batch: 39071, sum loss: 5939.208984, avg loss: 3.149104, ppl: 23.315157 +epoch: 0, batch: 39072, sum loss: 4838.402344, avg loss: 2.883434, ppl: 17.875553 +epoch: 0, batch: 39073, sum loss: 4786.144531, avg loss: 2.840442, ppl: 17.123331 +epoch: 0, batch: 39074, sum loss: 5006.729004, avg loss: 2.764621, ppl: 15.873031 +epoch: 0, batch: 39075, sum loss: 5067.874512, avg loss: 2.988134, ppl: 19.848604 +epoch: 0, batch: 39076, sum loss: 5050.284668, avg loss: 3.000763, ppl: 20.100872 +epoch: 0, batch: 39077, sum loss: 5623.755859, avg loss: 3.043158, ppl: 20.971363 +epoch: 0, batch: 39078, sum loss: 4424.301758, avg loss: 2.791358, ppl: 16.303137 +epoch: 0, batch: 39079, sum loss: 4562.098145, avg loss: 2.918809, ppl: 18.519213 +epoch: 0, batch: 39080, sum loss: 5775.467285, avg loss: 3.215739, ppl: 24.921703 +epoch: 0, batch: 39081, sum loss: 4719.082031, avg loss: 2.837692, ppl: 17.076313 +epoch: 0, batch: 39082, sum loss: 4708.527832, avg loss: 2.980081, ppl: 19.689409 +epoch: 0, batch: 39083, sum loss: 4862.157715, avg loss: 2.943195, ppl: 18.976377 +epoch: 0, batch: 39084, sum loss: 4273.830078, avg loss: 2.834105, ppl: 17.015162 +epoch: 0, batch: 39085, sum loss: 4516.047363, avg loss: 2.803257, ppl: 16.498299 +epoch: 0, batch: 39086, sum loss: 4614.945801, avg loss: 2.655320, ppl: 14.229538 +epoch: 0, batch: 39087, sum loss: 6266.759277, avg loss: 3.096225, ppl: 22.114313 +epoch: 0, batch: 39088, sum loss: 5065.533203, avg loss: 2.941657, ppl: 18.947212 +epoch: 0, batch: 39089, sum loss: 5527.749023, avg loss: 2.938729, ppl: 18.891815 +epoch: 0, batch: 39090, sum loss: 4676.535156, avg loss: 2.773746, ppl: 16.018524 +epoch: 0, batch: 39091, sum loss: 5772.120117, avg loss: 3.009447, ppl: 20.276192 +epoch: 0, batch: 39092, sum loss: 4573.708496, avg loss: 2.753587, ppl: 15.698847 +epoch: 0, batch: 39093, sum loss: 3988.263428, avg loss: 2.718652, ppl: 15.159881 +epoch: 0, batch: 39094, sum loss: 4950.410156, avg loss: 3.090144, ppl: 21.980236 +epoch: 0, batch: 39095, sum loss: 5236.268555, avg loss: 3.187017, ppl: 24.216076 +epoch: 0, batch: 39096, sum loss: 5468.963867, avg loss: 2.901307, ppl: 18.197916 +epoch: 0, batch: 39097, sum loss: 4590.774414, avg loss: 2.872825, ppl: 17.686916 +epoch: 0, batch: 39098, sum loss: 4471.491699, avg loss: 2.674337, ppl: 14.502737 +epoch: 0, batch: 39099, sum loss: 5518.114258, avg loss: 3.026942, ppl: 20.634027 +epoch: 0, batch: 39100, sum loss: 5346.812012, avg loss: 2.833499, ppl: 17.004852 +epoch: 0, batch: 39101, sum loss: 5220.355469, avg loss: 2.901809, ppl: 18.207060 +epoch: 0, batch: 39102, sum loss: 5602.876953, avg loss: 2.883622, ppl: 17.878912 +epoch: 0, batch: 39103, sum loss: 4296.459961, avg loss: 2.594481, ppl: 13.389629 +epoch: 0, batch: 39104, sum loss: 4503.073242, avg loss: 2.894006, ppl: 18.065536 +epoch: 0, batch: 39105, sum loss: 4649.327148, avg loss: 2.905829, ppl: 18.280399 +epoch: 0, batch: 39106, sum loss: 5580.906738, avg loss: 2.995656, ppl: 19.998470 +epoch: 0, batch: 39107, sum loss: 5638.189453, avg loss: 3.123651, ppl: 22.729202 +epoch: 0, batch: 39108, sum loss: 5276.696289, avg loss: 3.105766, ppl: 22.326315 +epoch: 0, batch: 39109, sum loss: 5010.521484, avg loss: 3.102490, ppl: 22.253298 +epoch: 0, batch: 39110, sum loss: 3746.140137, avg loss: 2.774919, ppl: 16.037321 +epoch: 0, batch: 39111, sum loss: 5808.347656, avg loss: 3.003282, ppl: 20.151567 +epoch: 0, batch: 39112, sum loss: 4340.024414, avg loss: 2.669142, ppl: 14.427582 +epoch: 0, batch: 39113, sum loss: 5559.034180, avg loss: 3.133615, ppl: 22.956829 +epoch: 0, batch: 39114, sum loss: 4291.859375, avg loss: 2.927598, ppl: 18.682709 +epoch: 0, batch: 39115, sum loss: 5328.319336, avg loss: 2.788236, ppl: 16.252327 +epoch: 0, batch: 39116, sum loss: 4594.957031, avg loss: 2.818992, ppl: 16.759951 +epoch: 0, batch: 39117, sum loss: 5457.758301, avg loss: 3.295748, ppl: 26.997595 +epoch: 0, batch: 39118, sum loss: 5266.407227, avg loss: 3.103363, ppl: 22.272736 +epoch: 0, batch: 39119, sum loss: 5346.022949, avg loss: 2.951973, ppl: 19.143690 +epoch: 0, batch: 39120, sum loss: 5128.711426, avg loss: 2.811794, ppl: 16.639736 +epoch: 0, batch: 39121, sum loss: 5908.519531, avg loss: 2.996207, ppl: 20.009487 +epoch: 0, batch: 39122, sum loss: 5149.022461, avg loss: 2.928909, ppl: 18.707211 +epoch: 0, batch: 39123, sum loss: 5520.445801, avg loss: 2.893315, ppl: 18.053062 +epoch: 0, batch: 39124, sum loss: 4376.323242, avg loss: 2.762830, ppl: 15.844624 +epoch: 0, batch: 39125, sum loss: 5162.597656, avg loss: 2.921674, ppl: 18.572348 +epoch: 0, batch: 39126, sum loss: 5179.466309, avg loss: 2.990454, ppl: 19.894712 +epoch: 0, batch: 39127, sum loss: 4046.651855, avg loss: 2.843747, ppl: 17.180017 +epoch: 0, batch: 39128, sum loss: 3885.337402, avg loss: 2.551108, ppl: 12.821301 +epoch: 0, batch: 39129, sum loss: 4694.748535, avg loss: 2.910569, ppl: 18.367254 +epoch: 0, batch: 39130, sum loss: 5519.845703, avg loss: 3.031217, ppl: 20.722433 +epoch: 0, batch: 39131, sum loss: 5511.413086, avg loss: 3.106772, ppl: 22.348780 +epoch: 0, batch: 39132, sum loss: 5081.539551, avg loss: 3.206019, ppl: 24.680630 +epoch: 0, batch: 39133, sum loss: 5869.518555, avg loss: 2.921612, ppl: 18.571201 +epoch: 0, batch: 39134, sum loss: 4366.882812, avg loss: 2.767353, ppl: 15.916445 +epoch: 0, batch: 39135, sum loss: 4874.833496, avg loss: 2.887935, ppl: 17.956186 +epoch: 0, batch: 39136, sum loss: 5240.428711, avg loss: 2.944061, ppl: 18.992821 +epoch: 0, batch: 39137, sum loss: 5396.665527, avg loss: 3.335393, ppl: 28.089413 +epoch: 0, batch: 39138, sum loss: 5482.611328, avg loss: 3.029067, ppl: 20.677937 +epoch: 0, batch: 39139, sum loss: 5245.668457, avg loss: 3.126143, ppl: 22.785929 +epoch: 0, batch: 39140, sum loss: 4924.092773, avg loss: 3.033945, ppl: 20.779047 +epoch: 0, batch: 39141, sum loss: 5325.401855, avg loss: 2.922833, ppl: 18.593893 +epoch: 0, batch: 39142, sum loss: 6459.172852, avg loss: 3.094956, ppl: 22.086266 +epoch: 0, batch: 39143, sum loss: 4098.921875, avg loss: 2.957375, ppl: 19.247381 +epoch: 0, batch: 39144, sum loss: 5005.812012, avg loss: 2.922249, ppl: 18.583027 +epoch: 0, batch: 39145, sum loss: 5899.065430, avg loss: 3.124505, ppl: 22.748632 +epoch: 0, batch: 39146, sum loss: 4736.599609, avg loss: 3.055871, ppl: 21.239672 +epoch: 0, batch: 39147, sum loss: 5252.775879, avg loss: 2.642241, ppl: 14.044645 +epoch: 0, batch: 39148, sum loss: 4754.182129, avg loss: 2.860519, ppl: 17.470587 +epoch: 0, batch: 39149, sum loss: 4384.286133, avg loss: 2.783674, ppl: 16.178343 +epoch: 0, batch: 39150, sum loss: 4185.546875, avg loss: 2.739232, ppl: 15.475101 +epoch: 0, batch: 39151, sum loss: 4775.372070, avg loss: 3.076915, ppl: 21.691387 +epoch: 0, batch: 39152, sum loss: 5230.300293, avg loss: 2.912194, ppl: 18.397118 +epoch: 0, batch: 39153, sum loss: 4288.230957, avg loss: 2.741836, ppl: 15.515439 +epoch: 0, batch: 39154, sum loss: 4649.551758, avg loss: 2.765944, ppl: 15.894033 +epoch: 0, batch: 39155, sum loss: 4173.793945, avg loss: 2.806855, ppl: 16.557766 +epoch: 0, batch: 39156, sum loss: 4620.075684, avg loss: 2.743513, ppl: 15.541489 +epoch: 0, batch: 39157, sum loss: 5500.481445, avg loss: 3.067753, ppl: 21.493559 +epoch: 0, batch: 39158, sum loss: 5265.333008, avg loss: 2.902609, ppl: 18.221630 +epoch: 0, batch: 39159, sum loss: 4621.895508, avg loss: 2.809663, ppl: 16.604319 +epoch: 0, batch: 39160, sum loss: 4347.506836, avg loss: 2.774414, ppl: 16.029236 +epoch: 0, batch: 39161, sum loss: 4281.995605, avg loss: 2.732607, ppl: 15.372917 +epoch: 0, batch: 39162, sum loss: 4130.724121, avg loss: 2.866568, ppl: 17.576590 +epoch: 0, batch: 39163, sum loss: 5474.235840, avg loss: 2.854138, ppl: 17.359461 +epoch: 0, batch: 39164, sum loss: 4681.712402, avg loss: 3.049975, ppl: 21.114824 +epoch: 0, batch: 39165, sum loss: 5073.942383, avg loss: 2.696037, ppl: 14.820885 +epoch: 0, batch: 39166, sum loss: 5600.016602, avg loss: 3.465357, ppl: 31.987871 +epoch: 0, batch: 39167, sum loss: 4857.266602, avg loss: 2.965364, ppl: 19.401764 +epoch: 0, batch: 39168, sum loss: 4844.738770, avg loss: 2.870106, ppl: 17.638887 +epoch: 0, batch: 39169, sum loss: 4995.983887, avg loss: 2.828983, ppl: 16.928234 +epoch: 0, batch: 39170, sum loss: 3797.870605, avg loss: 2.716646, ppl: 15.129488 +epoch: 0, batch: 39171, sum loss: 5220.327637, avg loss: 3.029790, ppl: 20.692881 +epoch: 0, batch: 39172, sum loss: 4217.829102, avg loss: 2.616519, ppl: 13.687996 +epoch: 0, batch: 39173, sum loss: 4420.124512, avg loss: 2.802869, ppl: 16.491896 +epoch: 0, batch: 39174, sum loss: 5614.646484, avg loss: 2.892657, ppl: 18.041174 +epoch: 0, batch: 39175, sum loss: 4333.041992, avg loss: 2.777591, ppl: 16.080236 +epoch: 0, batch: 39176, sum loss: 4769.130371, avg loss: 2.934850, ppl: 18.818670 +epoch: 0, batch: 39177, sum loss: 5669.360352, avg loss: 2.929902, ppl: 18.725792 +epoch: 0, batch: 39178, sum loss: 4285.029785, avg loss: 2.817245, ppl: 16.730698 +epoch: 0, batch: 39179, sum loss: 5881.343262, avg loss: 3.210340, ppl: 24.787519 +epoch: 0, batch: 39180, sum loss: 4224.102539, avg loss: 2.574103, ppl: 13.119539 +epoch: 0, batch: 39181, sum loss: 6134.770508, avg loss: 3.011669, ppl: 20.321291 +epoch: 0, batch: 39182, sum loss: 5997.529297, avg loss: 3.141713, ppl: 23.143471 +epoch: 0, batch: 39183, sum loss: 5202.071289, avg loss: 2.855144, ppl: 17.376932 +epoch: 0, batch: 39184, sum loss: 4347.114258, avg loss: 2.925380, ppl: 18.641304 +epoch: 0, batch: 39185, sum loss: 4716.849609, avg loss: 2.846620, ppl: 17.229448 +epoch: 0, batch: 39186, sum loss: 5201.498047, avg loss: 3.072356, ppl: 21.592710 +epoch: 0, batch: 39187, sum loss: 4229.649902, avg loss: 2.695761, ppl: 14.816796 +epoch: 0, batch: 39188, sum loss: 4835.173340, avg loss: 2.888395, ppl: 17.964458 +epoch: 0, batch: 39189, sum loss: 5581.933105, avg loss: 3.247198, ppl: 25.718180 +epoch: 0, batch: 39190, sum loss: 4620.143555, avg loss: 2.988450, ppl: 19.854885 +epoch: 0, batch: 39191, sum loss: 5582.888672, avg loss: 2.938363, ppl: 18.884899 +epoch: 0, batch: 39192, sum loss: 4366.516113, avg loss: 2.882189, ppl: 17.853312 +epoch: 0, batch: 39193, sum loss: 4774.052734, avg loss: 2.862142, ppl: 17.498972 +epoch: 0, batch: 39194, sum loss: 5358.112305, avg loss: 2.834980, ppl: 17.030060 +epoch: 0, batch: 39195, sum loss: 4307.367188, avg loss: 2.793364, ppl: 16.335882 +epoch: 0, batch: 39196, sum loss: 4707.127930, avg loss: 2.911026, ppl: 18.375633 +epoch: 0, batch: 39197, sum loss: 4307.635254, avg loss: 2.634639, ppl: 13.938284 +epoch: 0, batch: 39198, sum loss: 5865.380859, avg loss: 2.987968, ppl: 19.845316 +epoch: 0, batch: 39199, sum loss: 3426.916748, avg loss: 2.517940, ppl: 12.403024 +epoch: 0, batch: 39200, sum loss: 5332.736816, avg loss: 2.768815, ppl: 15.939727 +epoch: 0, batch: 39201, sum loss: 4378.044434, avg loss: 2.852146, ppl: 17.324924 +epoch: 0, batch: 39202, sum loss: 5019.493652, avg loss: 2.871564, ppl: 17.664627 +epoch: 0, batch: 39203, sum loss: 4592.299316, avg loss: 2.628677, ppl: 13.855433 +epoch: 0, batch: 39204, sum loss: 4228.334473, avg loss: 2.705268, ppl: 14.958331 +epoch: 0, batch: 39205, sum loss: 5233.229980, avg loss: 3.053226, ppl: 21.183582 +epoch: 0, batch: 39206, sum loss: 4974.427734, avg loss: 2.875392, ppl: 17.732368 +epoch: 0, batch: 39207, sum loss: 4951.982910, avg loss: 2.832942, ppl: 16.995392 +epoch: 0, batch: 39208, sum loss: 4512.870117, avg loss: 2.959259, ppl: 19.283678 +epoch: 0, batch: 39209, sum loss: 4477.990234, avg loss: 2.786553, ppl: 16.224995 +epoch: 0, batch: 39210, sum loss: 5476.043945, avg loss: 2.870044, ppl: 17.637794 +epoch: 0, batch: 39211, sum loss: 5098.918457, avg loss: 2.908681, ppl: 18.332609 +epoch: 0, batch: 39212, sum loss: 4880.376465, avg loss: 2.765086, ppl: 15.880404 +epoch: 0, batch: 39213, sum loss: 5236.601562, avg loss: 2.789878, ppl: 16.279039 +epoch: 0, batch: 39214, sum loss: 3987.983643, avg loss: 2.623674, ppl: 13.786277 +epoch: 0, batch: 39215, sum loss: 4283.405762, avg loss: 2.958153, ppl: 19.262365 +epoch: 0, batch: 39216, sum loss: 4021.108398, avg loss: 2.670059, ppl: 14.440817 +epoch: 0, batch: 39217, sum loss: 4326.628418, avg loss: 2.722862, ppl: 15.223838 +epoch: 0, batch: 39218, sum loss: 4832.002930, avg loss: 2.889954, ppl: 17.992483 +epoch: 0, batch: 39219, sum loss: 5514.307617, avg loss: 3.110157, ppl: 22.424561 +epoch: 0, batch: 39220, sum loss: 4696.850586, avg loss: 2.685449, ppl: 14.664786 +epoch: 0, batch: 39221, sum loss: 5436.985352, avg loss: 3.140950, ppl: 23.125835 +epoch: 0, batch: 39222, sum loss: 4761.610352, avg loss: 2.921233, ppl: 18.564171 +epoch: 0, batch: 39223, sum loss: 5262.028809, avg loss: 2.977945, ppl: 19.647402 +epoch: 0, batch: 39224, sum loss: 5049.206055, avg loss: 2.803557, ppl: 16.503242 +epoch: 0, batch: 39225, sum loss: 4736.479492, avg loss: 2.722115, ppl: 15.212456 +epoch: 0, batch: 39226, sum loss: 4876.041992, avg loss: 2.603333, ppl: 13.508684 +epoch: 0, batch: 39227, sum loss: 4233.315430, avg loss: 2.689527, ppl: 14.724707 +epoch: 0, batch: 39228, sum loss: 4949.235352, avg loss: 2.867460, ppl: 17.592268 +epoch: 0, batch: 39229, sum loss: 5337.475586, avg loss: 3.221168, ppl: 25.057371 +epoch: 0, batch: 39230, sum loss: 5967.388184, avg loss: 3.229106, ppl: 25.257072 +epoch: 0, batch: 39231, sum loss: 4676.826172, avg loss: 2.867459, ppl: 17.592264 +epoch: 0, batch: 39232, sum loss: 4877.231934, avg loss: 3.040668, ppl: 20.919222 +epoch: 0, batch: 39233, sum loss: 4826.383789, avg loss: 2.971911, ppl: 19.529207 +epoch: 0, batch: 39234, sum loss: 4857.502441, avg loss: 2.915668, ppl: 18.461132 +epoch: 0, batch: 39235, sum loss: 4890.607910, avg loss: 2.813929, ppl: 16.675301 +epoch: 0, batch: 39236, sum loss: 4379.770508, avg loss: 2.619480, ppl: 13.728584 +epoch: 0, batch: 39237, sum loss: 4761.816895, avg loss: 3.200146, ppl: 24.536106 +epoch: 0, batch: 39238, sum loss: 5634.965820, avg loss: 3.029552, ppl: 20.687952 +epoch: 0, batch: 39239, sum loss: 5036.523926, avg loss: 3.122458, ppl: 22.702101 +epoch: 0, batch: 39240, sum loss: 5098.958008, avg loss: 2.927071, ppl: 18.672859 +epoch: 0, batch: 39241, sum loss: 5569.232422, avg loss: 3.121767, ppl: 22.686438 +epoch: 0, batch: 39242, sum loss: 6285.011230, avg loss: 3.177457, ppl: 23.985689 +epoch: 0, batch: 39243, sum loss: 3713.554932, avg loss: 2.648755, ppl: 14.136433 +epoch: 0, batch: 39244, sum loss: 5080.970703, avg loss: 3.006491, ppl: 20.216345 +epoch: 0, batch: 39245, sum loss: 5133.420410, avg loss: 2.785361, ppl: 16.205667 +epoch: 0, batch: 39246, sum loss: 5078.100586, avg loss: 2.746404, ppl: 15.586477 +epoch: 0, batch: 39247, sum loss: 3852.259521, avg loss: 2.688248, ppl: 14.705891 +epoch: 0, batch: 39248, sum loss: 4523.186523, avg loss: 2.834077, ppl: 17.014683 +epoch: 0, batch: 39249, sum loss: 4538.696777, avg loss: 2.885376, ppl: 17.910305 +epoch: 0, batch: 39250, sum loss: 5978.598633, avg loss: 3.252774, ppl: 25.861982 +epoch: 0, batch: 39251, sum loss: 4673.605957, avg loss: 2.793548, ppl: 16.338890 +epoch: 0, batch: 39252, sum loss: 4708.136719, avg loss: 2.953662, ppl: 19.176043 +epoch: 0, batch: 39253, sum loss: 5074.051270, avg loss: 2.878078, ppl: 17.780062 +epoch: 0, batch: 39254, sum loss: 4804.447754, avg loss: 2.851304, ppl: 17.310345 +epoch: 0, batch: 39255, sum loss: 4238.167480, avg loss: 2.836792, ppl: 17.060949 +epoch: 0, batch: 39256, sum loss: 5262.474121, avg loss: 3.001982, ppl: 20.125385 +epoch: 0, batch: 39257, sum loss: 5448.700684, avg loss: 2.881386, ppl: 17.838978 +epoch: 0, batch: 39258, sum loss: 5371.581543, avg loss: 3.176571, ppl: 23.964441 +epoch: 0, batch: 39259, sum loss: 4394.606445, avg loss: 2.753513, ppl: 15.697679 +epoch: 0, batch: 39260, sum loss: 5008.735352, avg loss: 2.734026, ppl: 15.394741 +epoch: 0, batch: 39261, sum loss: 5391.950195, avg loss: 3.000529, ppl: 20.096161 +epoch: 0, batch: 39262, sum loss: 4865.881348, avg loss: 2.874118, ppl: 17.709795 +epoch: 0, batch: 39263, sum loss: 4716.750977, avg loss: 2.781103, ppl: 16.136812 +epoch: 0, batch: 39264, sum loss: 5217.668945, avg loss: 2.846519, ppl: 17.227703 +epoch: 0, batch: 39265, sum loss: 5361.826172, avg loss: 2.836945, ppl: 17.063559 +epoch: 0, batch: 39266, sum loss: 5010.059082, avg loss: 2.788013, ppl: 16.248701 +epoch: 0, batch: 39267, sum loss: 4847.647949, avg loss: 3.137636, ppl: 23.049324 +epoch: 0, batch: 39268, sum loss: 4396.711914, avg loss: 2.735975, ppl: 15.424776 +epoch: 0, batch: 39269, sum loss: 3274.903320, avg loss: 2.660360, ppl: 14.301438 +epoch: 0, batch: 39270, sum loss: 5143.857910, avg loss: 2.876878, ppl: 17.758743 +epoch: 0, batch: 39271, sum loss: 4703.223145, avg loss: 2.773127, ppl: 16.008612 +epoch: 0, batch: 39272, sum loss: 4999.989746, avg loss: 2.842518, ppl: 17.158922 +epoch: 0, batch: 39273, sum loss: 5146.220215, avg loss: 2.878199, ppl: 17.782225 +epoch: 0, batch: 39274, sum loss: 4828.643555, avg loss: 2.660410, ppl: 14.302147 +epoch: 0, batch: 39275, sum loss: 3777.643799, avg loss: 2.692547, ppl: 14.769250 +epoch: 0, batch: 39276, sum loss: 5313.047363, avg loss: 2.830606, ppl: 16.955732 +epoch: 0, batch: 39277, sum loss: 4225.925781, avg loss: 2.482918, ppl: 11.976154 +epoch: 0, batch: 39278, sum loss: 4987.044922, avg loss: 2.814359, ppl: 16.682486 +epoch: 0, batch: 39279, sum loss: 4659.412598, avg loss: 2.721619, ppl: 15.204925 +epoch: 0, batch: 39280, sum loss: 5234.348633, avg loss: 2.895104, ppl: 18.085390 +epoch: 0, batch: 39281, sum loss: 4663.105469, avg loss: 3.049775, ppl: 21.110592 +epoch: 0, batch: 39282, sum loss: 5386.500977, avg loss: 2.969405, ppl: 19.480330 +epoch: 0, batch: 39283, sum loss: 4105.811035, avg loss: 2.583896, ppl: 13.248656 +epoch: 0, batch: 39284, sum loss: 4386.225586, avg loss: 2.813487, ppl: 16.667931 +epoch: 0, batch: 39285, sum loss: 5124.152344, avg loss: 2.904848, ppl: 18.262474 +epoch: 0, batch: 39286, sum loss: 4353.956055, avg loss: 2.976047, ppl: 19.610136 +epoch: 0, batch: 39287, sum loss: 5289.407227, avg loss: 3.062772, ppl: 21.386761 +epoch: 0, batch: 39288, sum loss: 4832.155762, avg loss: 2.999476, ppl: 20.075018 +epoch: 0, batch: 39289, sum loss: 5072.654785, avg loss: 2.928785, ppl: 18.704884 +epoch: 0, batch: 39290, sum loss: 5836.677734, avg loss: 3.161797, ppl: 23.612997 +epoch: 0, batch: 39291, sum loss: 5203.739258, avg loss: 2.920168, ppl: 18.544405 +epoch: 0, batch: 39292, sum loss: 5077.712402, avg loss: 2.918226, ppl: 18.508415 +epoch: 0, batch: 39293, sum loss: 5470.465820, avg loss: 3.145754, ppl: 23.237186 +epoch: 0, batch: 39294, sum loss: 5421.663086, avg loss: 3.030555, ppl: 20.708723 +epoch: 0, batch: 39295, sum loss: 4394.896484, avg loss: 2.692951, ppl: 14.775220 +epoch: 0, batch: 39296, sum loss: 5399.309082, avg loss: 2.961771, ppl: 19.332188 +epoch: 0, batch: 39297, sum loss: 4747.782227, avg loss: 2.706831, ppl: 14.981727 +epoch: 0, batch: 39298, sum loss: 4543.317383, avg loss: 2.832492, ppl: 16.987743 +epoch: 0, batch: 39299, sum loss: 4531.016602, avg loss: 2.849696, ppl: 17.282526 +epoch: 0, batch: 39300, sum loss: 4243.339844, avg loss: 2.707939, ppl: 14.998331 +epoch: 0, batch: 39301, sum loss: 4387.567871, avg loss: 2.668837, ppl: 14.423182 +epoch: 0, batch: 39302, sum loss: 4918.486328, avg loss: 2.746223, ppl: 15.583668 +epoch: 0, batch: 39303, sum loss: 4521.067383, avg loss: 2.777068, ppl: 16.071835 +epoch: 0, batch: 39304, sum loss: 4717.442383, avg loss: 2.912002, ppl: 18.393578 +epoch: 0, batch: 39305, sum loss: 4832.360352, avg loss: 2.992174, ppl: 19.928955 +epoch: 0, batch: 39306, sum loss: 4168.224609, avg loss: 2.614946, ppl: 13.666483 +epoch: 0, batch: 39307, sum loss: 4208.446289, avg loss: 2.604237, ppl: 13.520900 +epoch: 0, batch: 39308, sum loss: 4906.711426, avg loss: 2.917189, ppl: 18.489235 +epoch: 0, batch: 39309, sum loss: 5553.307617, avg loss: 3.019743, ppl: 20.486031 +epoch: 0, batch: 39310, sum loss: 4577.093750, avg loss: 2.913491, ppl: 18.420990 +epoch: 0, batch: 39311, sum loss: 4438.737305, avg loss: 2.893571, ppl: 18.057673 +epoch: 0, batch: 39312, sum loss: 4438.444824, avg loss: 2.755087, ppl: 15.722407 +epoch: 0, batch: 39313, sum loss: 5250.253906, avg loss: 2.782328, ppl: 16.156595 +epoch: 0, batch: 39314, sum loss: 5300.262695, avg loss: 2.933183, ppl: 18.787344 +epoch: 0, batch: 39315, sum loss: 3377.644775, avg loss: 2.494568, ppl: 12.116493 +epoch: 0, batch: 39316, sum loss: 5634.830078, avg loss: 2.870520, ppl: 17.646185 +epoch: 0, batch: 39317, sum loss: 4682.846680, avg loss: 2.702162, ppl: 14.911933 +epoch: 0, batch: 39318, sum loss: 5346.514160, avg loss: 2.712590, ppl: 15.068244 +epoch: 0, batch: 39319, sum loss: 5849.724609, avg loss: 3.029376, ppl: 20.684317 +epoch: 0, batch: 39320, sum loss: 4438.207031, avg loss: 2.859670, ppl: 17.455761 +epoch: 0, batch: 39321, sum loss: 4455.624512, avg loss: 2.939066, ppl: 18.898195 +epoch: 0, batch: 39322, sum loss: 5019.437988, avg loss: 2.815165, ppl: 16.695923 +epoch: 0, batch: 39323, sum loss: 5999.342773, avg loss: 3.189443, ppl: 24.274906 +epoch: 0, batch: 39324, sum loss: 4005.900879, avg loss: 2.723250, ppl: 15.229741 +epoch: 0, batch: 39325, sum loss: 5718.717285, avg loss: 3.282846, ppl: 26.651508 +epoch: 0, batch: 39326, sum loss: 5330.583984, avg loss: 3.061794, ppl: 21.365860 +epoch: 0, batch: 39327, sum loss: 4891.831543, avg loss: 2.784195, ppl: 16.186789 +epoch: 0, batch: 39328, sum loss: 5422.194824, avg loss: 2.969439, ppl: 19.480984 +epoch: 0, batch: 39329, sum loss: 4753.758301, avg loss: 2.877578, ppl: 17.771175 +epoch: 0, batch: 39330, sum loss: 3738.011963, avg loss: 2.353912, ppl: 10.526669 +epoch: 0, batch: 39331, sum loss: 3570.598633, avg loss: 2.445616, ppl: 11.537649 +epoch: 0, batch: 39332, sum loss: 5062.984375, avg loss: 3.109941, ppl: 22.419727 +epoch: 0, batch: 39333, sum loss: 5191.884766, avg loss: 2.687311, ppl: 14.692115 +epoch: 0, batch: 39334, sum loss: 4152.538086, avg loss: 2.803874, ppl: 16.508486 +epoch: 0, batch: 39335, sum loss: 5277.053711, avg loss: 2.964637, ppl: 19.387665 +epoch: 0, batch: 39336, sum loss: 4409.908203, avg loss: 2.666208, ppl: 14.385321 +epoch: 0, batch: 39337, sum loss: 4413.815430, avg loss: 2.784742, ppl: 16.195633 +epoch: 0, batch: 39338, sum loss: 5640.806641, avg loss: 3.275730, ppl: 26.462526 +epoch: 0, batch: 39339, sum loss: 4905.618164, avg loss: 2.769971, ppl: 15.958166 +epoch: 0, batch: 39340, sum loss: 5508.311523, avg loss: 3.014949, ppl: 20.388048 +epoch: 0, batch: 39341, sum loss: 4654.801270, avg loss: 2.957307, ppl: 19.246069 +epoch: 0, batch: 39342, sum loss: 4085.643066, avg loss: 2.582581, ppl: 13.231241 +epoch: 0, batch: 39343, sum loss: 5050.331543, avg loss: 2.955138, ppl: 19.204382 +epoch: 0, batch: 39344, sum loss: 5163.289062, avg loss: 2.928695, ppl: 18.703207 +epoch: 0, batch: 39345, sum loss: 4812.520020, avg loss: 2.871432, ppl: 17.662294 +epoch: 0, batch: 39346, sum loss: 5603.017578, avg loss: 2.999474, ppl: 20.074976 +epoch: 0, batch: 39347, sum loss: 3878.291992, avg loss: 2.518371, ppl: 12.408371 +epoch: 0, batch: 39348, sum loss: 4109.591797, avg loss: 2.859841, ppl: 17.458757 +epoch: 0, batch: 39349, sum loss: 5478.471191, avg loss: 3.062309, ppl: 21.376865 +epoch: 0, batch: 39350, sum loss: 4678.905273, avg loss: 2.556779, ppl: 12.894217 +epoch: 0, batch: 39351, sum loss: 4923.921875, avg loss: 2.667347, ppl: 14.401707 +epoch: 0, batch: 39352, sum loss: 5709.020508, avg loss: 3.041567, ppl: 20.938019 +epoch: 0, batch: 39353, sum loss: 4966.663574, avg loss: 2.909586, ppl: 18.349203 +epoch: 0, batch: 39354, sum loss: 5176.134766, avg loss: 2.911212, ppl: 18.379061 +epoch: 0, batch: 39355, sum loss: 4780.980469, avg loss: 3.094486, ppl: 22.075888 +epoch: 0, batch: 39356, sum loss: 5633.643066, avg loss: 2.858266, ppl: 17.431280 +epoch: 0, batch: 39357, sum loss: 5103.777832, avg loss: 3.059819, ppl: 21.323696 +epoch: 0, batch: 39358, sum loss: 6455.642090, avg loss: 3.085871, ppl: 21.886520 +epoch: 0, batch: 39359, sum loss: 4812.610352, avg loss: 2.892194, ppl: 18.032827 +epoch: 0, batch: 39360, sum loss: 5361.315918, avg loss: 2.909016, ppl: 18.338737 +epoch: 0, batch: 39361, sum loss: 5837.251953, avg loss: 2.833617, ppl: 17.006870 +epoch: 0, batch: 39362, sum loss: 4991.313477, avg loss: 2.855443, ppl: 17.382132 +epoch: 0, batch: 39363, sum loss: 5041.933594, avg loss: 3.020931, ppl: 20.510378 +epoch: 0, batch: 39364, sum loss: 4581.603516, avg loss: 2.881512, ppl: 17.841223 +epoch: 0, batch: 39365, sum loss: 4354.496094, avg loss: 2.855407, ppl: 17.381514 +epoch: 0, batch: 39366, sum loss: 4702.184082, avg loss: 2.935196, ppl: 18.825191 +epoch: 0, batch: 39367, sum loss: 5661.098633, avg loss: 3.103673, ppl: 22.279623 +epoch: 0, batch: 39368, sum loss: 5093.101074, avg loss: 2.902052, ppl: 18.211475 +epoch: 0, batch: 39369, sum loss: 4997.607422, avg loss: 2.862318, ppl: 17.502054 +epoch: 0, batch: 39370, sum loss: 6268.872070, avg loss: 2.940372, ppl: 18.922874 +epoch: 0, batch: 39371, sum loss: 5471.021484, avg loss: 2.734144, ppl: 15.396554 +epoch: 0, batch: 39372, sum loss: 4340.117188, avg loss: 2.609812, ppl: 13.596495 +epoch: 0, batch: 39373, sum loss: 4731.836426, avg loss: 2.841944, ppl: 17.149071 +epoch: 0, batch: 39374, sum loss: 3886.915039, avg loss: 2.469450, ppl: 11.815952 +epoch: 0, batch: 39375, sum loss: 5517.161621, avg loss: 2.830765, ppl: 16.958437 +epoch: 0, batch: 39376, sum loss: 5087.889160, avg loss: 2.694857, ppl: 14.803396 +epoch: 0, batch: 39377, sum loss: 4147.740723, avg loss: 2.761478, ppl: 15.823219 +epoch: 0, batch: 39378, sum loss: 4365.131836, avg loss: 2.951407, ppl: 19.132862 +epoch: 0, batch: 39379, sum loss: 4888.301758, avg loss: 2.851985, ppl: 17.322128 +epoch: 0, batch: 39380, sum loss: 4816.314453, avg loss: 2.758485, ppl: 15.775922 +epoch: 0, batch: 39381, sum loss: 5399.088867, avg loss: 2.986222, ppl: 19.810692 +epoch: 0, batch: 39382, sum loss: 4204.934082, avg loss: 2.831605, ppl: 16.972687 +epoch: 0, batch: 39383, sum loss: 5172.580566, avg loss: 3.024901, ppl: 20.591965 +epoch: 0, batch: 39384, sum loss: 4821.902344, avg loss: 2.848141, ppl: 17.255672 +epoch: 0, batch: 39385, sum loss: 5287.039062, avg loss: 3.047285, ppl: 21.058090 +epoch: 0, batch: 39386, sum loss: 4251.105957, avg loss: 2.776686, ppl: 16.065691 +epoch: 0, batch: 39387, sum loss: 4212.101562, avg loss: 2.712235, ppl: 15.062910 +epoch: 0, batch: 39388, sum loss: 5442.065918, avg loss: 2.995083, ppl: 19.987020 +epoch: 0, batch: 39389, sum loss: 5153.180664, avg loss: 2.875659, ppl: 17.737108 +epoch: 0, batch: 39390, sum loss: 3933.375977, avg loss: 2.627506, ppl: 13.839206 +epoch: 0, batch: 39391, sum loss: 4782.041504, avg loss: 2.940985, ppl: 18.934486 +epoch: 0, batch: 39392, sum loss: 4170.531250, avg loss: 2.678569, ppl: 14.564231 +epoch: 0, batch: 39393, sum loss: 4198.178711, avg loss: 2.679119, ppl: 14.572244 +epoch: 0, batch: 39394, sum loss: 4846.382812, avg loss: 2.872782, ppl: 17.686148 +epoch: 0, batch: 39395, sum loss: 4654.140625, avg loss: 2.600079, ppl: 13.464796 +epoch: 0, batch: 39396, sum loss: 5082.102051, avg loss: 2.863156, ppl: 17.516720 +epoch: 0, batch: 39397, sum loss: 5253.053711, avg loss: 3.005180, ppl: 20.189842 +epoch: 0, batch: 39398, sum loss: 4898.253906, avg loss: 2.908702, ppl: 18.332985 +epoch: 0, batch: 39399, sum loss: 5151.478027, avg loss: 3.088416, ppl: 21.942295 +epoch: 0, batch: 39400, sum loss: 5408.139648, avg loss: 2.824094, ppl: 16.845673 +epoch: 0, batch: 39401, sum loss: 5108.254395, avg loss: 2.890919, ppl: 18.009861 +epoch: 0, batch: 39402, sum loss: 4271.819824, avg loss: 2.773909, ppl: 16.021135 +epoch: 0, batch: 39403, sum loss: 4948.423340, avg loss: 2.857057, ppl: 17.410219 +epoch: 0, batch: 39404, sum loss: 4559.115723, avg loss: 2.960465, ppl: 19.306942 +epoch: 0, batch: 39405, sum loss: 5148.193848, avg loss: 2.906942, ppl: 18.300743 +epoch: 0, batch: 39406, sum loss: 5286.754395, avg loss: 3.002132, ppl: 20.128405 +epoch: 0, batch: 39407, sum loss: 5172.340332, avg loss: 3.095356, ppl: 22.095114 +epoch: 0, batch: 39408, sum loss: 5908.159668, avg loss: 3.088426, ppl: 21.942520 +epoch: 0, batch: 39409, sum loss: 4707.428223, avg loss: 2.808728, ppl: 16.588808 +epoch: 0, batch: 39410, sum loss: 4919.108887, avg loss: 2.815746, ppl: 16.705639 +epoch: 0, batch: 39411, sum loss: 4221.504395, avg loss: 2.623682, ppl: 13.786392 +epoch: 0, batch: 39412, sum loss: 5122.053223, avg loss: 2.979671, ppl: 19.681332 +epoch: 0, batch: 39413, sum loss: 4661.523438, avg loss: 2.922585, ppl: 18.589283 +epoch: 0, batch: 39414, sum loss: 4363.786133, avg loss: 2.804490, ppl: 16.518646 +epoch: 0, batch: 39415, sum loss: 4696.312988, avg loss: 2.730415, ppl: 15.339246 +epoch: 0, batch: 39416, sum loss: 4652.845703, avg loss: 2.917145, ppl: 18.488424 +epoch: 0, batch: 39417, sum loss: 4794.601074, avg loss: 3.091296, ppl: 22.005589 +epoch: 0, batch: 39418, sum loss: 4431.634277, avg loss: 2.844438, ppl: 17.191895 +epoch: 0, batch: 39419, sum loss: 4317.195312, avg loss: 2.655102, ppl: 14.226431 +epoch: 0, batch: 39420, sum loss: 4299.875977, avg loss: 2.684067, ppl: 14.644535 +epoch: 0, batch: 39421, sum loss: 5284.263672, avg loss: 2.828835, ppl: 16.925732 +epoch: 0, batch: 39422, sum loss: 4335.929199, avg loss: 2.651944, ppl: 14.181586 +epoch: 0, batch: 39423, sum loss: 4621.926758, avg loss: 2.749510, ppl: 15.634974 +epoch: 0, batch: 39424, sum loss: 5081.499023, avg loss: 2.792032, ppl: 16.314144 +epoch: 0, batch: 39425, sum loss: 5553.765625, avg loss: 3.162737, ppl: 23.635195 +epoch: 0, batch: 39426, sum loss: 5197.456055, avg loss: 3.060928, ppl: 21.347366 +epoch: 0, batch: 39427, sum loss: 5493.156250, avg loss: 2.935947, ppl: 18.839334 +epoch: 0, batch: 39428, sum loss: 5555.692383, avg loss: 2.978924, ppl: 19.666636 +epoch: 0, batch: 39429, sum loss: 4676.441406, avg loss: 2.695355, ppl: 14.810781 +epoch: 0, batch: 39430, sum loss: 4738.703125, avg loss: 3.027925, ppl: 20.654335 +epoch: 0, batch: 39431, sum loss: 5717.854492, avg loss: 2.908370, ppl: 18.326893 +epoch: 0, batch: 39432, sum loss: 5999.915039, avg loss: 3.031791, ppl: 20.734344 +epoch: 0, batch: 39433, sum loss: 4789.267090, avg loss: 2.825526, ppl: 16.869820 +epoch: 0, batch: 39434, sum loss: 5014.584961, avg loss: 2.903639, ppl: 18.240408 +epoch: 0, batch: 39435, sum loss: 5298.921387, avg loss: 2.943845, ppl: 18.988724 +epoch: 0, batch: 39436, sum loss: 4851.559570, avg loss: 2.926152, ppl: 18.655701 +epoch: 0, batch: 39437, sum loss: 5553.935547, avg loss: 2.982780, ppl: 19.742620 +epoch: 0, batch: 39438, sum loss: 4958.867188, avg loss: 2.901619, ppl: 18.203596 +epoch: 0, batch: 39439, sum loss: 5504.154297, avg loss: 2.798248, ppl: 16.415861 +epoch: 0, batch: 39440, sum loss: 4915.245117, avg loss: 2.987991, ppl: 19.845770 +epoch: 0, batch: 39441, sum loss: 5243.172852, avg loss: 2.953900, ppl: 19.180614 +epoch: 0, batch: 39442, sum loss: 4244.310547, avg loss: 2.657677, ppl: 14.263124 +epoch: 0, batch: 39443, sum loss: 4445.437988, avg loss: 2.773199, ppl: 16.009766 +epoch: 0, batch: 39444, sum loss: 4170.992676, avg loss: 2.767746, ppl: 15.922700 +epoch: 0, batch: 39445, sum loss: 4805.825684, avg loss: 3.108555, ppl: 22.388666 +epoch: 0, batch: 39446, sum loss: 4691.273438, avg loss: 2.673090, ppl: 14.484661 +epoch: 0, batch: 39447, sum loss: 5597.010254, avg loss: 2.945795, ppl: 19.025784 +epoch: 0, batch: 39448, sum loss: 5305.018066, avg loss: 3.019361, ppl: 20.478207 +epoch: 0, batch: 39449, sum loss: 5831.498047, avg loss: 3.051543, ppl: 21.147940 +epoch: 0, batch: 39450, sum loss: 4834.723633, avg loss: 2.666698, ppl: 14.392370 +epoch: 0, batch: 39451, sum loss: 5066.445312, avg loss: 2.896767, ppl: 18.115482 +epoch: 0, batch: 39452, sum loss: 6637.344238, avg loss: 3.312048, ppl: 27.441267 +epoch: 0, batch: 39453, sum loss: 4433.895508, avg loss: 2.967802, ppl: 19.449116 +epoch: 0, batch: 39454, sum loss: 4501.109863, avg loss: 2.703369, ppl: 14.929952 +epoch: 0, batch: 39455, sum loss: 4444.791016, avg loss: 2.906992, ppl: 18.301668 +epoch: 0, batch: 39456, sum loss: 4414.221191, avg loss: 2.772752, ppl: 16.002613 +epoch: 0, batch: 39457, sum loss: 4397.333984, avg loss: 2.822423, ppl: 16.817541 +epoch: 0, batch: 39458, sum loss: 5151.319824, avg loss: 2.860255, ppl: 17.465984 +epoch: 0, batch: 39459, sum loss: 5214.471191, avg loss: 2.901765, ppl: 18.206249 +epoch: 0, batch: 39460, sum loss: 5148.426270, avg loss: 3.032053, ppl: 20.739773 +epoch: 0, batch: 39461, sum loss: 5058.814453, avg loss: 2.897374, ppl: 18.126482 +epoch: 0, batch: 39462, sum loss: 4796.100586, avg loss: 2.772312, ppl: 15.995580 +epoch: 0, batch: 39463, sum loss: 4493.383789, avg loss: 2.833155, ppl: 16.999006 +epoch: 0, batch: 39464, sum loss: 4255.069824, avg loss: 2.748753, ppl: 15.623138 +epoch: 0, batch: 39465, sum loss: 5477.900391, avg loss: 3.093112, ppl: 22.045567 +epoch: 0, batch: 39466, sum loss: 4697.592773, avg loss: 2.848752, ppl: 17.266230 +epoch: 0, batch: 39467, sum loss: 4495.977051, avg loss: 2.783887, ppl: 16.181791 +epoch: 0, batch: 39468, sum loss: 4823.329102, avg loss: 2.988432, ppl: 19.854525 +epoch: 0, batch: 39469, sum loss: 4409.555664, avg loss: 2.540067, ppl: 12.680517 +epoch: 0, batch: 39470, sum loss: 4961.364746, avg loss: 2.862876, ppl: 17.511822 +epoch: 0, batch: 39471, sum loss: 5440.819824, avg loss: 3.020999, ppl: 20.511776 +epoch: 0, batch: 39472, sum loss: 4195.081543, avg loss: 2.697802, ppl: 14.847059 +epoch: 0, batch: 39473, sum loss: 5041.703613, avg loss: 2.834010, ppl: 17.013546 +epoch: 0, batch: 39474, sum loss: 5152.188965, avg loss: 2.849662, ppl: 17.281940 +epoch: 0, batch: 39475, sum loss: 5020.053223, avg loss: 3.055419, ppl: 21.230074 +epoch: 0, batch: 39476, sum loss: 4913.378906, avg loss: 2.935113, ppl: 18.823629 +epoch: 0, batch: 39477, sum loss: 4821.874023, avg loss: 2.885622, ppl: 17.914707 +epoch: 0, batch: 39478, sum loss: 4246.792969, avg loss: 2.938957, ppl: 18.896132 +epoch: 0, batch: 39479, sum loss: 4948.658691, avg loss: 2.986517, ppl: 19.816540 +epoch: 0, batch: 39480, sum loss: 6015.542480, avg loss: 2.927271, ppl: 18.676596 +epoch: 0, batch: 39481, sum loss: 4992.070312, avg loss: 3.189821, ppl: 24.284086 +epoch: 0, batch: 39482, sum loss: 4909.059570, avg loss: 3.020960, ppl: 20.510965 +epoch: 0, batch: 39483, sum loss: 4258.307129, avg loss: 2.571441, ppl: 13.084671 +epoch: 0, batch: 39484, sum loss: 3672.402832, avg loss: 2.476334, ppl: 11.897566 +epoch: 0, batch: 39485, sum loss: 5465.638672, avg loss: 3.130377, ppl: 22.882612 +epoch: 0, batch: 39486, sum loss: 4491.086914, avg loss: 2.760348, ppl: 15.805347 +epoch: 0, batch: 39487, sum loss: 5393.214355, avg loss: 2.929503, ppl: 18.718321 +epoch: 0, batch: 39488, sum loss: 4523.541992, avg loss: 2.738222, ppl: 15.459468 +epoch: 0, batch: 39489, sum loss: 4600.771973, avg loss: 2.778244, ppl: 16.090740 +epoch: 0, batch: 39490, sum loss: 4760.438477, avg loss: 2.729609, ppl: 15.326893 +epoch: 0, batch: 39491, sum loss: 3481.338867, avg loss: 2.366648, ppl: 10.661592 +epoch: 0, batch: 39492, sum loss: 4761.358398, avg loss: 2.859675, ppl: 17.455847 +epoch: 0, batch: 39493, sum loss: 5116.321777, avg loss: 3.085840, ppl: 21.885832 +epoch: 0, batch: 39494, sum loss: 5292.257812, avg loss: 2.974850, ppl: 19.586679 +epoch: 0, batch: 39495, sum loss: 4863.928223, avg loss: 2.933612, ppl: 18.795389 +epoch: 0, batch: 39496, sum loss: 4792.963867, avg loss: 2.864892, ppl: 17.547153 +epoch: 0, batch: 39497, sum loss: 4704.417969, avg loss: 3.015653, ppl: 20.402403 +epoch: 0, batch: 39498, sum loss: 5195.872070, avg loss: 2.867479, ppl: 17.592613 +epoch: 0, batch: 39499, sum loss: 4575.772461, avg loss: 2.702760, ppl: 14.920856 +epoch: 0, batch: 39500, sum loss: 4632.029297, avg loss: 2.752245, ppl: 15.677795 +epoch: 0, batch: 39501, sum loss: 5230.778809, avg loss: 2.877216, ppl: 17.764750 +epoch: 0, batch: 39502, sum loss: 4866.170410, avg loss: 2.959958, ppl: 19.297153 +epoch: 0, batch: 39503, sum loss: 5693.983398, avg loss: 3.044911, ppl: 21.008160 +epoch: 0, batch: 39504, sum loss: 5020.373047, avg loss: 2.713715, ppl: 15.085217 +epoch: 0, batch: 39505, sum loss: 4758.449219, avg loss: 3.000283, ppl: 20.091227 +epoch: 0, batch: 39506, sum loss: 5020.744629, avg loss: 2.993885, ppl: 19.963079 +epoch: 0, batch: 39507, sum loss: 4276.946777, avg loss: 2.725906, ppl: 15.270245 +epoch: 0, batch: 39508, sum loss: 4952.606934, avg loss: 2.812383, ppl: 16.649549 +epoch: 0, batch: 39509, sum loss: 5253.072754, avg loss: 2.988096, ppl: 19.847857 +epoch: 0, batch: 39510, sum loss: 4215.229492, avg loss: 2.647757, ppl: 14.122331 +epoch: 0, batch: 39511, sum loss: 5217.399414, avg loss: 2.815650, ppl: 16.704029 +epoch: 0, batch: 39512, sum loss: 5224.708008, avg loss: 3.075167, ppl: 21.653492 +epoch: 0, batch: 39513, sum loss: 5222.167480, avg loss: 3.002972, ppl: 20.145313 +epoch: 0, batch: 39514, sum loss: 5431.638672, avg loss: 3.143309, ppl: 23.180452 +epoch: 0, batch: 39515, sum loss: 5901.771484, avg loss: 3.267869, ppl: 26.255329 +epoch: 0, batch: 39516, sum loss: 5627.305664, avg loss: 3.221125, ppl: 25.056295 +epoch: 0, batch: 39517, sum loss: 4751.985352, avg loss: 2.801878, ppl: 16.475563 +epoch: 0, batch: 39518, sum loss: 5587.392090, avg loss: 2.852165, ppl: 17.325258 +epoch: 0, batch: 39519, sum loss: 5926.472656, avg loss: 3.274294, ppl: 26.424574 +epoch: 0, batch: 39520, sum loss: 5730.010254, avg loss: 3.319820, ppl: 27.655384 +epoch: 0, batch: 39521, sum loss: 4477.700195, avg loss: 2.890704, ppl: 18.005980 +epoch: 0, batch: 39522, sum loss: 5446.402832, avg loss: 2.985966, ppl: 19.805634 +epoch: 0, batch: 39523, sum loss: 5100.305176, avg loss: 3.044959, ppl: 21.009161 +epoch: 0, batch: 39524, sum loss: 5281.250488, avg loss: 3.061595, ppl: 21.361595 +epoch: 0, batch: 39525, sum loss: 3866.431885, avg loss: 2.607169, ppl: 13.560612 +epoch: 0, batch: 39526, sum loss: 4266.753906, avg loss: 2.820062, ppl: 16.777893 +epoch: 0, batch: 39527, sum loss: 5755.920898, avg loss: 3.040634, ppl: 20.918505 +epoch: 0, batch: 39528, sum loss: 4379.396973, avg loss: 2.771770, ppl: 15.986910 +epoch: 0, batch: 39529, sum loss: 3456.345459, avg loss: 2.343285, ppl: 10.415396 +epoch: 0, batch: 39530, sum loss: 4562.957031, avg loss: 2.842964, ppl: 17.166567 +epoch: 0, batch: 39531, sum loss: 3881.446289, avg loss: 2.896602, ppl: 18.112488 +epoch: 0, batch: 39532, sum loss: 4400.392090, avg loss: 2.804584, ppl: 16.520199 +epoch: 0, batch: 39533, sum loss: 4093.191895, avg loss: 2.698215, ppl: 14.853191 +epoch: 0, batch: 39534, sum loss: 5408.746094, avg loss: 2.912626, ppl: 18.405058 +epoch: 0, batch: 39535, sum loss: 4259.385254, avg loss: 2.648871, ppl: 14.138074 +epoch: 0, batch: 39536, sum loss: 5307.958496, avg loss: 2.957080, ppl: 19.241707 +epoch: 0, batch: 39537, sum loss: 5495.196289, avg loss: 3.019339, ppl: 20.477749 +epoch: 0, batch: 39538, sum loss: 4786.155762, avg loss: 2.918388, ppl: 18.511417 +epoch: 0, batch: 39539, sum loss: 4151.457520, avg loss: 2.716923, ppl: 15.133677 +epoch: 0, batch: 39540, sum loss: 5106.062500, avg loss: 2.897879, ppl: 18.135637 +epoch: 0, batch: 39541, sum loss: 4349.230957, avg loss: 2.789757, ppl: 16.277063 +epoch: 0, batch: 39542, sum loss: 4990.605957, avg loss: 2.769482, ppl: 15.950364 +epoch: 0, batch: 39543, sum loss: 4173.474609, avg loss: 2.868367, ppl: 17.608248 +epoch: 0, batch: 39544, sum loss: 4764.570801, avg loss: 3.042510, ppl: 20.957783 +epoch: 0, batch: 39545, sum loss: 4790.948242, avg loss: 2.924877, ppl: 18.631927 +epoch: 0, batch: 39546, sum loss: 5218.628906, avg loss: 2.768503, ppl: 15.934769 +epoch: 0, batch: 39547, sum loss: 4422.398438, avg loss: 2.718131, ppl: 15.151971 +epoch: 0, batch: 39548, sum loss: 4788.591797, avg loss: 2.841894, ppl: 17.148216 +epoch: 0, batch: 39549, sum loss: 5595.714844, avg loss: 2.982790, ppl: 19.742832 +epoch: 0, batch: 39550, sum loss: 5395.941406, avg loss: 2.968065, ppl: 19.454231 +epoch: 0, batch: 39551, sum loss: 4548.230957, avg loss: 2.860523, ppl: 17.470654 +epoch: 0, batch: 39552, sum loss: 5664.717773, avg loss: 3.085358, ppl: 21.875299 +epoch: 0, batch: 39553, sum loss: 4575.661621, avg loss: 3.022234, ppl: 20.537115 +epoch: 0, batch: 39554, sum loss: 4360.476562, avg loss: 2.645920, ppl: 14.096412 +epoch: 0, batch: 39555, sum loss: 5459.976562, avg loss: 3.152411, ppl: 23.392406 +epoch: 0, batch: 39556, sum loss: 4432.785156, avg loss: 2.787915, ppl: 16.247112 +epoch: 0, batch: 39557, sum loss: 5123.589355, avg loss: 2.807446, ppl: 16.567554 +epoch: 0, batch: 39558, sum loss: 5141.850586, avg loss: 2.822092, ppl: 16.811977 +epoch: 0, batch: 39559, sum loss: 4131.174805, avg loss: 2.654997, ppl: 14.224938 +epoch: 0, batch: 39560, sum loss: 3917.195312, avg loss: 2.620198, ppl: 13.738437 +epoch: 0, batch: 39561, sum loss: 5269.289551, avg loss: 2.899994, ppl: 18.174044 +epoch: 0, batch: 39562, sum loss: 5489.790039, avg loss: 2.994976, ppl: 19.984871 +epoch: 0, batch: 39563, sum loss: 4979.277344, avg loss: 2.789511, ppl: 16.273064 +epoch: 0, batch: 39564, sum loss: 4470.808594, avg loss: 2.766589, ppl: 15.904298 +epoch: 0, batch: 39565, sum loss: 3874.636719, avg loss: 2.547427, ppl: 12.774196 +epoch: 0, batch: 39566, sum loss: 4598.626953, avg loss: 2.828184, ppl: 16.914715 +epoch: 0, batch: 39567, sum loss: 4386.944824, avg loss: 2.808543, ppl: 16.585743 +epoch: 0, batch: 39568, sum loss: 4324.928223, avg loss: 2.781304, ppl: 16.140060 +epoch: 0, batch: 39569, sum loss: 5731.714355, avg loss: 2.843112, ppl: 17.169121 +epoch: 0, batch: 39570, sum loss: 3979.661133, avg loss: 2.842615, ppl: 17.160583 +epoch: 0, batch: 39571, sum loss: 5089.535645, avg loss: 2.840143, ppl: 17.118208 +epoch: 0, batch: 39572, sum loss: 5534.671875, avg loss: 3.022759, ppl: 20.547909 +epoch: 0, batch: 39573, sum loss: 4706.431641, avg loss: 3.054141, ppl: 21.202971 +epoch: 0, batch: 39574, sum loss: 3530.175537, avg loss: 2.609147, ppl: 13.587454 +epoch: 0, batch: 39575, sum loss: 4061.765625, avg loss: 2.452757, ppl: 11.620338 +epoch: 0, batch: 39576, sum loss: 5276.718262, avg loss: 3.098484, ppl: 22.164326 +epoch: 0, batch: 39577, sum loss: 4554.601562, avg loss: 2.743736, ppl: 15.544953 +epoch: 0, batch: 39578, sum loss: 3736.650146, avg loss: 2.566381, ppl: 13.018621 +epoch: 0, batch: 39579, sum loss: 4636.053711, avg loss: 2.921269, ppl: 18.564831 +epoch: 0, batch: 39580, sum loss: 4455.544434, avg loss: 2.952647, ppl: 19.156599 +epoch: 0, batch: 39581, sum loss: 4498.873047, avg loss: 2.684292, ppl: 14.647824 +epoch: 0, batch: 39582, sum loss: 4676.214844, avg loss: 2.881217, ppl: 17.835962 +epoch: 0, batch: 39583, sum loss: 4620.408203, avg loss: 2.885952, ppl: 17.920610 +epoch: 0, batch: 39584, sum loss: 4754.664062, avg loss: 2.915183, ppl: 18.452194 +epoch: 0, batch: 39585, sum loss: 3740.483887, avg loss: 2.498653, ppl: 12.166098 +epoch: 0, batch: 39586, sum loss: 4198.475098, avg loss: 2.583677, ppl: 13.245754 +epoch: 0, batch: 39587, sum loss: 3977.014648, avg loss: 2.750356, ppl: 15.648201 +epoch: 0, batch: 39588, sum loss: 5225.009766, avg loss: 2.990847, ppl: 19.902531 +epoch: 0, batch: 39589, sum loss: 4414.745117, avg loss: 2.850061, ppl: 17.288843 +epoch: 0, batch: 39590, sum loss: 3697.469971, avg loss: 2.486530, ppl: 12.019494 +epoch: 0, batch: 39591, sum loss: 4108.617188, avg loss: 2.502203, ppl: 12.209361 +epoch: 0, batch: 39592, sum loss: 4909.487793, avg loss: 2.846080, ppl: 17.220148 +epoch: 0, batch: 39593, sum loss: 4318.754395, avg loss: 2.908252, ppl: 18.324739 +epoch: 0, batch: 39594, sum loss: 4048.689697, avg loss: 2.656621, ppl: 14.248056 +epoch: 0, batch: 39595, sum loss: 4215.951172, avg loss: 2.764558, ppl: 15.872025 +epoch: 0, batch: 39596, sum loss: 4115.192383, avg loss: 2.575214, ppl: 13.134132 +epoch: 0, batch: 39597, sum loss: 4362.063477, avg loss: 2.767807, ppl: 15.923672 +epoch: 0, batch: 39598, sum loss: 4754.958984, avg loss: 2.918943, ppl: 18.521708 +epoch: 0, batch: 39599, sum loss: 4740.661133, avg loss: 2.876615, ppl: 17.754070 +epoch: 0, batch: 39600, sum loss: 5465.167969, avg loss: 3.112283, ppl: 22.472279 +epoch: 0, batch: 39601, sum loss: 4185.843750, avg loss: 2.773919, ppl: 16.021299 +epoch: 0, batch: 39602, sum loss: 4644.763672, avg loss: 2.768036, ppl: 15.927317 +epoch: 0, batch: 39603, sum loss: 4607.434570, avg loss: 2.760596, ppl: 15.809263 +epoch: 0, batch: 39604, sum loss: 4636.011719, avg loss: 2.858207, ppl: 17.430246 +epoch: 0, batch: 39605, sum loss: 4973.087402, avg loss: 2.967236, ppl: 19.438120 +epoch: 0, batch: 39606, sum loss: 5247.966797, avg loss: 2.990294, ppl: 19.891539 +epoch: 0, batch: 39607, sum loss: 5666.281250, avg loss: 3.116766, ppl: 22.573269 +epoch: 0, batch: 39608, sum loss: 5047.974609, avg loss: 2.818523, ppl: 16.752089 +epoch: 0, batch: 39609, sum loss: 4813.606934, avg loss: 3.129783, ppl: 22.869026 +epoch: 0, batch: 39610, sum loss: 3264.937012, avg loss: 2.333765, ppl: 10.316709 +epoch: 0, batch: 39611, sum loss: 4921.499023, avg loss: 2.775803, ppl: 16.051516 +epoch: 0, batch: 39612, sum loss: 5547.886230, avg loss: 3.003729, ppl: 20.160568 +epoch: 0, batch: 39613, sum loss: 4324.647461, avg loss: 2.676143, ppl: 14.528950 +epoch: 0, batch: 39614, sum loss: 4652.296875, avg loss: 2.765932, ppl: 15.893840 +epoch: 0, batch: 39615, sum loss: 4539.204102, avg loss: 2.909746, ppl: 18.352144 +epoch: 0, batch: 39616, sum loss: 3748.982910, avg loss: 2.456738, ppl: 11.666698 +epoch: 0, batch: 39617, sum loss: 5156.592773, avg loss: 2.778337, ppl: 16.092230 +epoch: 0, batch: 39618, sum loss: 4637.538086, avg loss: 2.933294, ppl: 18.789423 +epoch: 0, batch: 39619, sum loss: 5185.689941, avg loss: 2.928114, ppl: 18.692343 +epoch: 0, batch: 39620, sum loss: 6138.528320, avg loss: 3.100267, ppl: 22.203873 +epoch: 0, batch: 39621, sum loss: 4728.386719, avg loss: 2.789609, ppl: 16.274651 +epoch: 0, batch: 39622, sum loss: 4452.453125, avg loss: 2.664544, ppl: 14.361401 +epoch: 0, batch: 39623, sum loss: 4749.195801, avg loss: 2.904707, ppl: 18.259892 +epoch: 0, batch: 39624, sum loss: 4845.981445, avg loss: 2.881083, ppl: 17.833572 +epoch: 0, batch: 39625, sum loss: 5885.412109, avg loss: 2.922250, ppl: 18.583057 +epoch: 0, batch: 39626, sum loss: 5183.941406, avg loss: 3.094891, ppl: 22.084822 +epoch: 0, batch: 39627, sum loss: 4752.945801, avg loss: 2.779500, ppl: 16.110971 +epoch: 0, batch: 39628, sum loss: 4560.473145, avg loss: 2.604496, ppl: 13.524414 +epoch: 0, batch: 39629, sum loss: 4724.655273, avg loss: 2.962166, ppl: 19.339823 +epoch: 0, batch: 39630, sum loss: 5642.185547, avg loss: 3.020442, ppl: 20.500351 +epoch: 0, batch: 39631, sum loss: 5171.504883, avg loss: 3.033141, ppl: 20.762342 +epoch: 0, batch: 39632, sum loss: 5347.293457, avg loss: 2.907718, ppl: 18.314959 +epoch: 0, batch: 39633, sum loss: 4930.461914, avg loss: 2.954141, ppl: 19.185244 +epoch: 0, batch: 39634, sum loss: 5512.082520, avg loss: 3.107149, ppl: 22.357210 +epoch: 0, batch: 39635, sum loss: 4652.747559, avg loss: 2.872066, ppl: 17.673502 +epoch: 0, batch: 39636, sum loss: 4483.586914, avg loss: 2.949728, ppl: 19.100763 +epoch: 0, batch: 39637, sum loss: 5422.315430, avg loss: 2.956551, ppl: 19.231533 +epoch: 0, batch: 39638, sum loss: 4953.510254, avg loss: 2.850121, ppl: 17.289875 +epoch: 0, batch: 39639, sum loss: 5855.201172, avg loss: 3.046411, ppl: 21.039694 +epoch: 0, batch: 39640, sum loss: 5322.066406, avg loss: 3.227451, ppl: 25.215298 +epoch: 0, batch: 39641, sum loss: 5325.355469, avg loss: 2.973398, ppl: 19.558260 +epoch: 0, batch: 39642, sum loss: 5255.626953, avg loss: 2.879796, ppl: 17.810631 +epoch: 0, batch: 39643, sum loss: 4469.066406, avg loss: 2.703609, ppl: 14.933537 +epoch: 0, batch: 39644, sum loss: 4514.705078, avg loss: 2.774865, ppl: 16.036457 +epoch: 0, batch: 39645, sum loss: 5306.916992, avg loss: 3.136476, ppl: 23.022587 +epoch: 0, batch: 39646, sum loss: 4242.659180, avg loss: 2.709233, ppl: 15.017754 +epoch: 0, batch: 39647, sum loss: 5614.712891, avg loss: 3.054795, ppl: 21.216831 +epoch: 0, batch: 39648, sum loss: 5422.297363, avg loss: 2.977648, ppl: 19.641571 +epoch: 0, batch: 39649, sum loss: 5189.930664, avg loss: 2.977585, ppl: 19.640329 +epoch: 0, batch: 39650, sum loss: 4091.459473, avg loss: 2.859161, ppl: 17.446886 +epoch: 0, batch: 39651, sum loss: 4712.500977, avg loss: 2.783521, ppl: 16.175879 +epoch: 0, batch: 39652, sum loss: 4943.159668, avg loss: 2.868926, ppl: 17.618088 +epoch: 0, batch: 39653, sum loss: 3897.832764, avg loss: 2.682610, ppl: 14.623214 +epoch: 0, batch: 39654, sum loss: 4408.688965, avg loss: 2.776253, ppl: 16.058731 +epoch: 0, batch: 39655, sum loss: 4774.042969, avg loss: 2.860421, ppl: 17.468887 +epoch: 0, batch: 39656, sum loss: 5639.282227, avg loss: 2.988491, ppl: 19.855694 +epoch: 0, batch: 39657, sum loss: 4044.575928, avg loss: 2.574523, ppl: 13.125058 +epoch: 0, batch: 39658, sum loss: 5745.617676, avg loss: 3.148284, ppl: 23.296047 +epoch: 0, batch: 39659, sum loss: 4052.227783, avg loss: 2.622801, ppl: 13.774256 +epoch: 0, batch: 39660, sum loss: 4207.599609, avg loss: 2.554705, ppl: 12.867505 +epoch: 0, batch: 39661, sum loss: 4400.859863, avg loss: 2.740261, ppl: 15.491033 +epoch: 0, batch: 39662, sum loss: 4439.580566, avg loss: 2.975590, ppl: 19.601189 +epoch: 0, batch: 39663, sum loss: 4699.387695, avg loss: 2.906239, ppl: 18.287880 +epoch: 0, batch: 39664, sum loss: 4490.990234, avg loss: 2.828080, ppl: 16.912952 +epoch: 0, batch: 39665, sum loss: 5220.314941, avg loss: 2.858880, ppl: 17.441982 +epoch: 0, batch: 39666, sum loss: 4498.041016, avg loss: 2.706403, ppl: 14.975306 +epoch: 0, batch: 39667, sum loss: 4470.463379, avg loss: 2.899133, ppl: 18.158400 +epoch: 0, batch: 39668, sum loss: 5343.662109, avg loss: 2.825839, ppl: 16.875101 +epoch: 0, batch: 39669, sum loss: 4929.259766, avg loss: 2.875881, ppl: 17.741047 +epoch: 0, batch: 39670, sum loss: 4794.979004, avg loss: 2.689276, ppl: 14.721014 +epoch: 0, batch: 39671, sum loss: 5243.023438, avg loss: 3.238433, ppl: 25.493752 +epoch: 0, batch: 39672, sum loss: 5433.183594, avg loss: 2.871661, ppl: 17.666346 +epoch: 0, batch: 39673, sum loss: 4521.429688, avg loss: 2.951325, ppl: 19.131285 +epoch: 0, batch: 39674, sum loss: 4499.326172, avg loss: 2.643552, ppl: 14.063071 +epoch: 0, batch: 39675, sum loss: 4490.832031, avg loss: 2.972093, ppl: 19.532751 +epoch: 0, batch: 39676, sum loss: 4661.384277, avg loss: 3.040694, ppl: 20.919762 +epoch: 0, batch: 39677, sum loss: 5049.712891, avg loss: 2.856173, ppl: 17.394821 +epoch: 0, batch: 39678, sum loss: 5116.718262, avg loss: 2.945722, ppl: 19.024385 +epoch: 0, batch: 39679, sum loss: 5571.362793, avg loss: 2.968227, ppl: 19.457399 +epoch: 0, batch: 39680, sum loss: 4484.665039, avg loss: 2.559741, ppl: 12.932462 +epoch: 0, batch: 39681, sum loss: 5338.929688, avg loss: 2.980977, ppl: 19.707058 +epoch: 0, batch: 39682, sum loss: 4862.663086, avg loss: 2.838682, ppl: 17.093229 +epoch: 0, batch: 39683, sum loss: 5446.349609, avg loss: 2.966421, ppl: 19.422289 +epoch: 0, batch: 39684, sum loss: 4608.646484, avg loss: 2.808438, ppl: 16.583990 +epoch: 0, batch: 39685, sum loss: 5910.062500, avg loss: 3.015338, ppl: 20.395983 +epoch: 0, batch: 39686, sum loss: 4468.461914, avg loss: 2.851603, ppl: 17.315521 +epoch: 0, batch: 39687, sum loss: 5329.539062, avg loss: 3.071780, ppl: 21.580292 +epoch: 0, batch: 39688, sum loss: 4277.750488, avg loss: 2.790444, ppl: 16.288248 +epoch: 0, batch: 39689, sum loss: 5739.692383, avg loss: 3.008224, ppl: 20.251411 +epoch: 0, batch: 39690, sum loss: 5058.537109, avg loss: 2.975610, ppl: 19.601582 +epoch: 0, batch: 39691, sum loss: 4986.272461, avg loss: 2.690919, ppl: 14.745216 +epoch: 0, batch: 39692, sum loss: 4296.996094, avg loss: 2.731720, ppl: 15.359285 +epoch: 0, batch: 39693, sum loss: 4692.449707, avg loss: 2.752170, ppl: 15.676615 +epoch: 0, batch: 39694, sum loss: 4446.226562, avg loss: 2.929003, ppl: 18.708969 +epoch: 0, batch: 39695, sum loss: 4322.695312, avg loss: 2.785242, ppl: 16.203739 +epoch: 0, batch: 39696, sum loss: 4303.377930, avg loss: 2.992613, ppl: 19.937717 +epoch: 0, batch: 39697, sum loss: 4380.899902, avg loss: 2.813680, ppl: 16.671158 +epoch: 0, batch: 39698, sum loss: 4644.576660, avg loss: 2.621093, ppl: 13.750746 +epoch: 0, batch: 39699, sum loss: 4894.061523, avg loss: 2.838783, ppl: 17.094948 +epoch: 0, batch: 39700, sum loss: 4436.612305, avg loss: 2.634568, ppl: 13.937290 +epoch: 0, batch: 39701, sum loss: 3529.017822, avg loss: 2.425442, ppl: 11.307223 +epoch: 0, batch: 39702, sum loss: 5101.626953, avg loss: 2.942115, ppl: 18.955889 +epoch: 0, batch: 39703, sum loss: 4333.184570, avg loss: 2.833999, ppl: 17.013365 +epoch: 0, batch: 39704, sum loss: 5158.195801, avg loss: 3.025335, ppl: 20.600901 +epoch: 0, batch: 39705, sum loss: 4061.616211, avg loss: 2.630581, ppl: 13.881828 +epoch: 0, batch: 39706, sum loss: 5336.663574, avg loss: 3.090135, ppl: 21.980053 +epoch: 0, batch: 39707, sum loss: 4724.308594, avg loss: 2.901909, ppl: 18.208879 +epoch: 0, batch: 39708, sum loss: 4725.785645, avg loss: 2.736413, ppl: 15.431537 +epoch: 0, batch: 39709, sum loss: 4740.284668, avg loss: 2.759188, ppl: 15.787018 +epoch: 0, batch: 39710, sum loss: 4795.792480, avg loss: 2.873453, ppl: 17.698019 +epoch: 0, batch: 39711, sum loss: 5018.959473, avg loss: 3.140776, ppl: 23.121796 +epoch: 0, batch: 39712, sum loss: 4401.626465, avg loss: 2.845266, ppl: 17.206137 +epoch: 0, batch: 39713, sum loss: 5769.291016, avg loss: 2.987722, ppl: 19.840433 +epoch: 0, batch: 39714, sum loss: 3669.467529, avg loss: 2.754856, ppl: 15.718772 +epoch: 0, batch: 39715, sum loss: 4626.181641, avg loss: 2.852146, ppl: 17.324928 +epoch: 0, batch: 39716, sum loss: 4176.796875, avg loss: 2.674006, ppl: 14.497928 +epoch: 0, batch: 39717, sum loss: 4245.834473, avg loss: 2.773243, ppl: 16.010468 +epoch: 0, batch: 39718, sum loss: 4606.868652, avg loss: 2.872113, ppl: 17.674320 +epoch: 0, batch: 39719, sum loss: 5882.155273, avg loss: 3.152280, ppl: 23.389339 +epoch: 0, batch: 39720, sum loss: 3909.359863, avg loss: 2.637895, ppl: 13.983735 +epoch: 0, batch: 39721, sum loss: 5256.361328, avg loss: 3.110273, ppl: 22.427164 +epoch: 0, batch: 39722, sum loss: 5139.452148, avg loss: 2.977666, ppl: 19.641926 +epoch: 0, batch: 39723, sum loss: 4722.265625, avg loss: 3.017422, ppl: 20.438532 +epoch: 0, batch: 39724, sum loss: 4890.409668, avg loss: 2.747421, ppl: 15.602345 +epoch: 0, batch: 39725, sum loss: 4520.881836, avg loss: 2.778661, ppl: 16.097456 +epoch: 0, batch: 39726, sum loss: 5417.500488, avg loss: 3.177420, ppl: 23.984785 +epoch: 0, batch: 39727, sum loss: 3933.427490, avg loss: 2.686767, ppl: 14.684131 +epoch: 0, batch: 39728, sum loss: 5976.338379, avg loss: 3.177213, ppl: 23.979839 +epoch: 0, batch: 39729, sum loss: 4915.886719, avg loss: 2.752456, ppl: 15.681100 +epoch: 0, batch: 39730, sum loss: 5812.458496, avg loss: 3.000753, ppl: 20.100666 +epoch: 0, batch: 39731, sum loss: 4195.064453, avg loss: 2.736506, ppl: 15.432975 +epoch: 0, batch: 39732, sum loss: 5625.589355, avg loss: 2.990744, ppl: 19.900480 +epoch: 0, batch: 39733, sum loss: 3835.219727, avg loss: 2.570523, ppl: 13.072657 +epoch: 0, batch: 39734, sum loss: 4563.566406, avg loss: 2.888333, ppl: 17.963341 +epoch: 0, batch: 39735, sum loss: 4757.050781, avg loss: 2.865693, ppl: 17.561224 +epoch: 0, batch: 39736, sum loss: 5095.096191, avg loss: 3.007731, ppl: 20.241419 +epoch: 0, batch: 39737, sum loss: 5132.967773, avg loss: 3.103366, ppl: 22.272804 +epoch: 0, batch: 39738, sum loss: 4184.109863, avg loss: 2.732926, ppl: 15.377819 +epoch: 0, batch: 39739, sum loss: 4007.379150, avg loss: 2.715027, ppl: 15.105012 +epoch: 0, batch: 39740, sum loss: 4279.822754, avg loss: 2.759396, ppl: 15.790297 +epoch: 0, batch: 39741, sum loss: 4840.976562, avg loss: 2.918009, ppl: 18.504404 +epoch: 0, batch: 39742, sum loss: 4818.390625, avg loss: 2.839358, ppl: 17.104782 +epoch: 0, batch: 39743, sum loss: 4487.381836, avg loss: 2.822253, ppl: 16.814688 +epoch: 0, batch: 39744, sum loss: 5752.197754, avg loss: 2.977328, ppl: 19.635283 +epoch: 0, batch: 39745, sum loss: 4522.921387, avg loss: 2.848187, ppl: 17.256475 +epoch: 0, batch: 39746, sum loss: 4791.065430, avg loss: 2.544379, ppl: 12.735314 +epoch: 0, batch: 39747, sum loss: 4932.047363, avg loss: 2.963971, ppl: 19.374754 +epoch: 0, batch: 39748, sum loss: 4453.903809, avg loss: 2.792416, ppl: 16.320408 +epoch: 0, batch: 39749, sum loss: 5422.265625, avg loss: 3.082584, ppl: 21.814707 +epoch: 0, batch: 39750, sum loss: 3964.508301, avg loss: 2.823724, ppl: 16.839441 +epoch: 0, batch: 39751, sum loss: 5383.980957, avg loss: 2.959858, ppl: 19.295225 +epoch: 0, batch: 39752, sum loss: 4902.700684, avg loss: 2.877172, ppl: 17.763962 +epoch: 0, batch: 39753, sum loss: 5464.555664, avg loss: 3.252712, ppl: 25.860373 +epoch: 0, batch: 39754, sum loss: 5162.610840, avg loss: 3.130753, ppl: 22.891201 +epoch: 0, batch: 39755, sum loss: 5013.650879, avg loss: 2.899740, ppl: 18.169424 +epoch: 0, batch: 39756, sum loss: 4517.954590, avg loss: 2.935643, ppl: 18.833609 +epoch: 0, batch: 39757, sum loss: 5368.332031, avg loss: 2.832893, ppl: 16.994553 +epoch: 0, batch: 39758, sum loss: 5547.674805, avg loss: 3.208603, ppl: 24.744497 +epoch: 0, batch: 39759, sum loss: 5050.303711, avg loss: 2.915880, ppl: 18.465050 +epoch: 0, batch: 39760, sum loss: 3973.000000, avg loss: 2.721233, ppl: 15.199049 +epoch: 0, batch: 39761, sum loss: 6291.893555, avg loss: 3.187382, ppl: 24.224936 +epoch: 0, batch: 39762, sum loss: 4320.330078, avg loss: 2.863042, ppl: 17.514723 +epoch: 0, batch: 39763, sum loss: 4624.445801, avg loss: 2.790854, ppl: 16.294937 +epoch: 0, batch: 39764, sum loss: 4884.467773, avg loss: 2.808780, ppl: 16.589663 +epoch: 0, batch: 39765, sum loss: 3986.899902, avg loss: 2.585538, ppl: 13.270429 +epoch: 0, batch: 39766, sum loss: 4552.165039, avg loss: 3.150287, ppl: 23.342766 +epoch: 0, batch: 39767, sum loss: 5038.571289, avg loss: 2.768446, ppl: 15.933853 +epoch: 0, batch: 39768, sum loss: 4017.702393, avg loss: 2.552543, ppl: 12.839709 +epoch: 0, batch: 39769, sum loss: 5367.347168, avg loss: 3.060061, ppl: 21.328857 +epoch: 0, batch: 39770, sum loss: 4786.221680, avg loss: 2.808815, ppl: 16.590256 +epoch: 0, batch: 39771, sum loss: 5198.894531, avg loss: 2.853400, ppl: 17.346661 +epoch: 0, batch: 39772, sum loss: 4942.739258, avg loss: 2.829273, ppl: 16.933138 +epoch: 0, batch: 39773, sum loss: 4440.428711, avg loss: 2.747790, ppl: 15.608102 +epoch: 0, batch: 39774, sum loss: 4619.936523, avg loss: 2.907449, ppl: 18.310030 +epoch: 0, batch: 39775, sum loss: 4851.624023, avg loss: 2.761311, ppl: 15.820575 +epoch: 0, batch: 39776, sum loss: 5039.564453, avg loss: 2.856896, ppl: 17.407410 +epoch: 0, batch: 39777, sum loss: 3584.695068, avg loss: 2.675146, ppl: 14.514463 +epoch: 0, batch: 39778, sum loss: 4854.882812, avg loss: 3.232279, ppl: 25.337330 +epoch: 0, batch: 39779, sum loss: 5546.320312, avg loss: 2.822555, ppl: 16.819767 +epoch: 0, batch: 39780, sum loss: 4438.717773, avg loss: 2.772466, ppl: 15.998035 +epoch: 0, batch: 39781, sum loss: 4598.881836, avg loss: 2.905169, ppl: 18.268322 +epoch: 0, batch: 39782, sum loss: 4987.385742, avg loss: 3.188865, ppl: 24.260885 +epoch: 0, batch: 39783, sum loss: 5101.333496, avg loss: 2.941945, ppl: 18.952679 +epoch: 0, batch: 39784, sum loss: 5370.750977, avg loss: 2.990396, ppl: 19.893559 +epoch: 0, batch: 39785, sum loss: 4222.595215, avg loss: 2.670838, ppl: 14.452077 +epoch: 0, batch: 39786, sum loss: 4485.722656, avg loss: 2.962829, ppl: 19.352640 +epoch: 0, batch: 39787, sum loss: 5002.586914, avg loss: 2.845613, ppl: 17.212099 +epoch: 0, batch: 39788, sum loss: 4884.433594, avg loss: 2.834842, ppl: 17.027718 +epoch: 0, batch: 39789, sum loss: 4293.571289, avg loss: 2.779011, ppl: 16.103083 +epoch: 0, batch: 39790, sum loss: 5184.134766, avg loss: 2.767824, ppl: 15.923948 +epoch: 0, batch: 39791, sum loss: 5499.062012, avg loss: 3.029786, ppl: 20.692812 +epoch: 0, batch: 39792, sum loss: 5492.508789, avg loss: 2.913798, ppl: 18.426647 +epoch: 0, batch: 39793, sum loss: 5740.396973, avg loss: 3.154064, ppl: 23.431105 +epoch: 0, batch: 39794, sum loss: 5989.529297, avg loss: 3.199535, ppl: 24.521128 +epoch: 0, batch: 39795, sum loss: 5905.894531, avg loss: 3.008607, ppl: 20.259153 +epoch: 0, batch: 39796, sum loss: 4820.373047, avg loss: 2.813995, ppl: 16.676401 +epoch: 0, batch: 39797, sum loss: 4403.242188, avg loss: 2.701375, ppl: 14.900212 +epoch: 0, batch: 39798, sum loss: 4602.852051, avg loss: 2.916890, ppl: 18.483707 +epoch: 0, batch: 39799, sum loss: 3544.193115, avg loss: 2.676883, ppl: 14.539702 +epoch: 0, batch: 39800, sum loss: 5072.438477, avg loss: 3.037388, ppl: 20.850718 +epoch: 0, batch: 39801, sum loss: 4339.421875, avg loss: 2.708753, ppl: 15.010540 +epoch: 0, batch: 39802, sum loss: 4911.687012, avg loss: 2.944657, ppl: 19.004135 +epoch: 0, batch: 39803, sum loss: 4849.313477, avg loss: 2.916003, ppl: 18.467331 +epoch: 0, batch: 39804, sum loss: 4286.685059, avg loss: 2.639584, ppl: 14.007380 +epoch: 0, batch: 39805, sum loss: 5460.685547, avg loss: 2.954916, ppl: 19.200119 +epoch: 0, batch: 39806, sum loss: 4199.875000, avg loss: 2.805528, ppl: 16.535799 +epoch: 0, batch: 39807, sum loss: 4982.591797, avg loss: 2.906996, ppl: 18.301746 +epoch: 0, batch: 39808, sum loss: 5185.844238, avg loss: 2.903608, ppl: 18.239841 +epoch: 0, batch: 39809, sum loss: 4909.342285, avg loss: 3.083758, ppl: 21.840317 +epoch: 0, batch: 39810, sum loss: 4894.406250, avg loss: 2.880757, ppl: 17.827765 +epoch: 0, batch: 39811, sum loss: 5700.921875, avg loss: 2.972326, ppl: 19.537315 +epoch: 0, batch: 39812, sum loss: 4504.662109, avg loss: 2.766991, ppl: 15.910692 +epoch: 0, batch: 39813, sum loss: 4496.734863, avg loss: 2.784356, ppl: 16.189386 +epoch: 0, batch: 39814, sum loss: 3876.856689, avg loss: 2.651749, ppl: 14.178814 +epoch: 0, batch: 39815, sum loss: 5489.526855, avg loss: 3.153088, ppl: 23.408245 +epoch: 0, batch: 39816, sum loss: 4741.528809, avg loss: 2.910699, ppl: 18.369637 +epoch: 0, batch: 39817, sum loss: 3942.562744, avg loss: 2.796144, ppl: 16.381359 +epoch: 0, batch: 39818, sum loss: 5164.419434, avg loss: 2.964649, ppl: 19.387905 +epoch: 0, batch: 39819, sum loss: 4730.367676, avg loss: 2.769536, ppl: 15.951231 +epoch: 0, batch: 39820, sum loss: 6320.636719, avg loss: 3.281743, ppl: 26.622135 +epoch: 0, batch: 39821, sum loss: 4176.843262, avg loss: 2.912722, ppl: 18.406832 +epoch: 0, batch: 39822, sum loss: 4970.325195, avg loss: 2.832094, ppl: 16.980982 +epoch: 0, batch: 39823, sum loss: 4254.328613, avg loss: 2.802588, ppl: 16.487261 +epoch: 0, batch: 39824, sum loss: 4148.218262, avg loss: 2.754461, ppl: 15.712570 +epoch: 0, batch: 39825, sum loss: 5443.940430, avg loss: 3.084386, ppl: 21.854036 +epoch: 0, batch: 39826, sum loss: 4221.922852, avg loss: 2.614194, ppl: 13.656200 +epoch: 0, batch: 39827, sum loss: 5283.971680, avg loss: 3.188879, ppl: 24.261204 +epoch: 0, batch: 39828, sum loss: 4702.265137, avg loss: 2.789007, ppl: 16.264856 +epoch: 0, batch: 39829, sum loss: 4122.175293, avg loss: 2.909086, ppl: 18.340036 +epoch: 0, batch: 39830, sum loss: 4838.818359, avg loss: 2.939744, ppl: 18.911005 +epoch: 0, batch: 39831, sum loss: 4593.434082, avg loss: 2.885323, ppl: 17.909348 +epoch: 0, batch: 39832, sum loss: 5673.727539, avg loss: 3.024375, ppl: 20.581142 +epoch: 0, batch: 39833, sum loss: 5128.760254, avg loss: 3.211497, ppl: 24.816204 +epoch: 0, batch: 39834, sum loss: 4619.738281, avg loss: 2.749844, ppl: 15.640197 +epoch: 0, batch: 39835, sum loss: 4585.007812, avg loss: 2.799150, ppl: 16.430676 +epoch: 0, batch: 39836, sum loss: 4187.536133, avg loss: 2.604189, ppl: 13.520258 +epoch: 0, batch: 39837, sum loss: 5135.876953, avg loss: 2.948265, ppl: 19.072826 +epoch: 0, batch: 39838, sum loss: 4578.285156, avg loss: 2.899484, ppl: 18.164768 +epoch: 0, batch: 39839, sum loss: 4843.392578, avg loss: 2.766073, ppl: 15.896080 +epoch: 0, batch: 39840, sum loss: 5418.560547, avg loss: 3.018697, ppl: 20.464605 +epoch: 0, batch: 39841, sum loss: 5154.261719, avg loss: 2.903809, ppl: 18.243507 +epoch: 0, batch: 39842, sum loss: 4741.153809, avg loss: 2.575315, ppl: 13.135448 +epoch: 0, batch: 39843, sum loss: 4214.768066, avg loss: 2.821130, ppl: 16.795816 +epoch: 0, batch: 39844, sum loss: 4943.903320, avg loss: 2.817039, ppl: 16.727243 +epoch: 0, batch: 39845, sum loss: 4562.649902, avg loss: 2.787202, ppl: 16.235531 +epoch: 0, batch: 39846, sum loss: 5485.011230, avg loss: 3.062541, ppl: 21.381819 +epoch: 0, batch: 39847, sum loss: 4233.368164, avg loss: 2.852674, ppl: 17.334072 +epoch: 0, batch: 39848, sum loss: 4909.816895, avg loss: 3.034497, ppl: 20.790527 +epoch: 0, batch: 39849, sum loss: 5150.536133, avg loss: 3.098999, ppl: 22.175743 +epoch: 0, batch: 39850, sum loss: 4979.346680, avg loss: 2.760170, ppl: 15.802529 +epoch: 0, batch: 39851, sum loss: 4897.997559, avg loss: 2.844366, ppl: 17.190649 +epoch: 0, batch: 39852, sum loss: 5126.327637, avg loss: 2.844799, ppl: 17.198103 +epoch: 0, batch: 39853, sum loss: 5395.235840, avg loss: 2.953057, ppl: 19.164455 +epoch: 0, batch: 39854, sum loss: 4750.441406, avg loss: 3.076711, ppl: 21.686954 +epoch: 0, batch: 39855, sum loss: 5243.987793, avg loss: 3.018991, ppl: 20.470631 +epoch: 0, batch: 39856, sum loss: 4425.181641, avg loss: 2.734970, ppl: 15.409283 +epoch: 0, batch: 39857, sum loss: 5190.453125, avg loss: 2.886792, ppl: 17.935688 +epoch: 0, batch: 39858, sum loss: 4314.035645, avg loss: 2.515473, ppl: 12.372456 +epoch: 0, batch: 39859, sum loss: 6627.690918, avg loss: 3.325485, ppl: 27.812477 +epoch: 0, batch: 39860, sum loss: 5710.864258, avg loss: 3.044171, ppl: 20.992619 +epoch: 0, batch: 39861, sum loss: 4108.270996, avg loss: 2.796644, ppl: 16.389559 +epoch: 0, batch: 39862, sum loss: 3721.362549, avg loss: 2.654324, ppl: 14.215378 +epoch: 0, batch: 39863, sum loss: 4153.976074, avg loss: 2.793528, ppl: 16.338558 +epoch: 0, batch: 39864, sum loss: 5729.686035, avg loss: 2.877793, ppl: 17.775002 +epoch: 0, batch: 39865, sum loss: 4939.443359, avg loss: 3.000877, ppl: 20.103157 +epoch: 0, batch: 39866, sum loss: 4871.620117, avg loss: 2.840595, ppl: 17.125948 +epoch: 0, batch: 39867, sum loss: 4804.399414, avg loss: 2.754816, ppl: 15.718149 +epoch: 0, batch: 39868, sum loss: 5309.063965, avg loss: 3.093860, ppl: 22.062077 +epoch: 0, batch: 39869, sum loss: 5279.783203, avg loss: 3.027399, ppl: 20.643461 +epoch: 0, batch: 39870, sum loss: 4347.193359, avg loss: 2.751388, ppl: 15.664364 +epoch: 0, batch: 39871, sum loss: 5184.284668, avg loss: 3.021145, ppl: 20.514765 +epoch: 0, batch: 39872, sum loss: 4545.325684, avg loss: 2.728287, ppl: 15.306641 +epoch: 0, batch: 39873, sum loss: 4753.145996, avg loss: 2.782872, ppl: 16.165384 +epoch: 0, batch: 39874, sum loss: 5254.889160, avg loss: 2.982344, ppl: 19.734013 +epoch: 0, batch: 39875, sum loss: 5034.907715, avg loss: 3.044080, ppl: 20.990705 +epoch: 0, batch: 39876, sum loss: 5427.925293, avg loss: 2.967701, ppl: 19.447163 +epoch: 0, batch: 39877, sum loss: 4570.350098, avg loss: 2.691608, ppl: 14.755383 +epoch: 0, batch: 39878, sum loss: 4408.974609, avg loss: 2.769456, ppl: 15.949961 +epoch: 0, batch: 39879, sum loss: 3462.615967, avg loss: 2.615269, ppl: 13.670893 +epoch: 0, batch: 39880, sum loss: 4453.798340, avg loss: 2.840433, ppl: 17.123173 +epoch: 0, batch: 39881, sum loss: 5231.881836, avg loss: 2.927746, ppl: 18.685463 +epoch: 0, batch: 39882, sum loss: 5528.248535, avg loss: 2.903492, ppl: 18.237720 +epoch: 0, batch: 39883, sum loss: 4241.150391, avg loss: 2.829320, ppl: 16.933941 +epoch: 0, batch: 39884, sum loss: 5069.727051, avg loss: 3.001615, ppl: 20.117992 +epoch: 0, batch: 39885, sum loss: 3964.749268, avg loss: 2.691615, ppl: 14.755492 +epoch: 0, batch: 39886, sum loss: 4555.147949, avg loss: 2.579359, ppl: 13.188679 +epoch: 0, batch: 39887, sum loss: 3835.108887, avg loss: 2.822008, ppl: 16.810570 +epoch: 0, batch: 39888, sum loss: 4773.547363, avg loss: 2.853286, ppl: 17.344683 +epoch: 0, batch: 39889, sum loss: 4353.787598, avg loss: 2.908342, ppl: 18.326382 +epoch: 0, batch: 39890, sum loss: 3930.088135, avg loss: 2.773527, ppl: 16.015026 +epoch: 0, batch: 39891, sum loss: 4902.488770, avg loss: 3.035597, ppl: 20.813396 +epoch: 0, batch: 39892, sum loss: 4511.567383, avg loss: 2.687056, ppl: 14.688371 +epoch: 0, batch: 39893, sum loss: 4224.251953, avg loss: 2.700928, ppl: 14.893553 +epoch: 0, batch: 39894, sum loss: 5521.129883, avg loss: 3.128119, ppl: 22.830994 +epoch: 0, batch: 39895, sum loss: 3968.701904, avg loss: 2.477342, ppl: 11.909565 +epoch: 0, batch: 39896, sum loss: 4661.000000, avg loss: 2.663429, ppl: 14.345388 +epoch: 0, batch: 39897, sum loss: 4027.154785, avg loss: 2.830045, ppl: 16.946232 +epoch: 0, batch: 39898, sum loss: 4399.878418, avg loss: 2.656931, ppl: 14.252487 +epoch: 0, batch: 39899, sum loss: 5816.912109, avg loss: 3.348827, ppl: 28.469316 +epoch: 0, batch: 39900, sum loss: 5054.899902, avg loss: 3.080378, ppl: 21.766619 +epoch: 0, batch: 39901, sum loss: 5437.241699, avg loss: 3.077103, ppl: 21.695467 +epoch: 0, batch: 39902, sum loss: 5573.371094, avg loss: 2.955128, ppl: 19.204180 +epoch: 0, batch: 39903, sum loss: 5416.440918, avg loss: 2.774816, ppl: 16.035681 +epoch: 0, batch: 39904, sum loss: 4975.597656, avg loss: 2.833484, ppl: 17.004601 +epoch: 0, batch: 39905, sum loss: 4557.724609, avg loss: 2.818630, ppl: 16.753881 +epoch: 0, batch: 39906, sum loss: 4223.297852, avg loss: 3.023119, ppl: 20.555313 +epoch: 0, batch: 39907, sum loss: 4710.077148, avg loss: 2.900294, ppl: 18.179485 +epoch: 0, batch: 39908, sum loss: 5126.705078, avg loss: 2.862482, ppl: 17.504917 +epoch: 0, batch: 39909, sum loss: 4567.306152, avg loss: 2.720254, ppl: 15.184175 +epoch: 0, batch: 39910, sum loss: 4949.247070, avg loss: 3.034486, ppl: 20.790295 +epoch: 0, batch: 39911, sum loss: 5353.999023, avg loss: 3.229191, ppl: 25.259222 +epoch: 0, batch: 39912, sum loss: 5065.276367, avg loss: 3.088583, ppl: 21.945963 +epoch: 0, batch: 39913, sum loss: 4681.883789, avg loss: 2.898999, ppl: 18.155966 +epoch: 0, batch: 39914, sum loss: 6297.427246, avg loss: 3.380261, ppl: 29.378450 +epoch: 0, batch: 39915, sum loss: 4584.080078, avg loss: 2.921657, ppl: 18.572037 +epoch: 0, batch: 39916, sum loss: 5131.456543, avg loss: 3.119426, ppl: 22.633390 +epoch: 0, batch: 39917, sum loss: 4984.491699, avg loss: 2.769162, ppl: 15.945270 +epoch: 0, batch: 39918, sum loss: 5075.054688, avg loss: 3.008331, ppl: 20.253574 +epoch: 0, batch: 39919, sum loss: 4094.481445, avg loss: 2.713374, ppl: 15.080072 +epoch: 0, batch: 39920, sum loss: 4788.150879, avg loss: 2.944742, ppl: 19.005762 +epoch: 0, batch: 39921, sum loss: 3905.715332, avg loss: 2.815945, ppl: 16.708952 +epoch: 0, batch: 39922, sum loss: 4875.201660, avg loss: 3.029958, ppl: 20.696354 +epoch: 0, batch: 39923, sum loss: 4670.183594, avg loss: 2.786506, ppl: 16.224232 +epoch: 0, batch: 39924, sum loss: 5174.730469, avg loss: 3.038597, ppl: 20.875931 +epoch: 0, batch: 39925, sum loss: 5640.465820, avg loss: 2.953124, ppl: 19.165726 +epoch: 0, batch: 39926, sum loss: 4826.494141, avg loss: 2.812642, ppl: 16.653866 +epoch: 0, batch: 39927, sum loss: 5143.777344, avg loss: 2.930927, ppl: 18.745005 +epoch: 0, batch: 39928, sum loss: 4832.407227, avg loss: 2.873013, ppl: 17.690231 +epoch: 0, batch: 39929, sum loss: 5967.504883, avg loss: 3.127623, ppl: 22.819675 +epoch: 0, batch: 39930, sum loss: 5564.689941, avg loss: 3.185283, ppl: 24.174133 +epoch: 0, batch: 39931, sum loss: 4035.931152, avg loss: 2.612253, ppl: 13.629726 +epoch: 0, batch: 39932, sum loss: 4368.661621, avg loss: 2.766727, ppl: 15.906483 +epoch: 0, batch: 39933, sum loss: 4399.427734, avg loss: 2.637547, ppl: 13.978868 +epoch: 0, batch: 39934, sum loss: 4414.155762, avg loss: 2.670391, ppl: 14.445611 +epoch: 0, batch: 39935, sum loss: 4208.539062, avg loss: 2.566182, ppl: 13.016039 +epoch: 0, batch: 39936, sum loss: 5335.791504, avg loss: 3.026541, ppl: 20.625759 +epoch: 0, batch: 39937, sum loss: 5819.236328, avg loss: 2.852567, ppl: 17.332212 +epoch: 0, batch: 39938, sum loss: 4023.647949, avg loss: 2.579262, ppl: 13.187396 +epoch: 0, batch: 39939, sum loss: 4861.707031, avg loss: 3.036669, ppl: 20.835724 +epoch: 0, batch: 39940, sum loss: 4944.823242, avg loss: 2.984202, ppl: 19.770721 +epoch: 0, batch: 39941, sum loss: 4599.245605, avg loss: 2.744180, ppl: 15.551856 +epoch: 0, batch: 39942, sum loss: 4514.914551, avg loss: 2.802554, ppl: 16.486702 +epoch: 0, batch: 39943, sum loss: 5702.831543, avg loss: 3.196655, ppl: 24.450596 +epoch: 0, batch: 39944, sum loss: 5623.821289, avg loss: 3.009000, ppl: 20.267124 +epoch: 0, batch: 39945, sum loss: 5979.015625, avg loss: 3.072464, ppl: 21.595057 +epoch: 0, batch: 39946, sum loss: 5728.197266, avg loss: 2.936032, ppl: 18.840929 +epoch: 0, batch: 39947, sum loss: 5115.532227, avg loss: 2.829387, ppl: 16.935080 +epoch: 0, batch: 39948, sum loss: 5286.754883, avg loss: 2.782503, ppl: 16.159412 +epoch: 0, batch: 39949, sum loss: 4881.148926, avg loss: 2.954691, ppl: 19.195784 +epoch: 0, batch: 39950, sum loss: 5022.160645, avg loss: 2.837379, ppl: 17.070963 +epoch: 0, batch: 39951, sum loss: 4409.372559, avg loss: 2.728572, ppl: 15.311009 +epoch: 0, batch: 39952, sum loss: 4073.763672, avg loss: 2.797915, ppl: 16.410391 +epoch: 0, batch: 39953, sum loss: 5410.129395, avg loss: 3.080940, ppl: 21.778854 +epoch: 0, batch: 39954, sum loss: 4781.370605, avg loss: 2.768599, ppl: 15.936292 +epoch: 0, batch: 39955, sum loss: 5315.981934, avg loss: 2.781780, ppl: 16.147743 +epoch: 0, batch: 39956, sum loss: 5403.059570, avg loss: 3.043977, ppl: 20.988550 +epoch: 0, batch: 39957, sum loss: 4663.813477, avg loss: 2.692733, ppl: 14.771994 +epoch: 0, batch: 39958, sum loss: 4618.256348, avg loss: 2.800641, ppl: 16.455185 +epoch: 0, batch: 39959, sum loss: 4573.849121, avg loss: 2.989444, ppl: 19.874630 +epoch: 0, batch: 39960, sum loss: 4331.867188, avg loss: 2.690601, ppl: 14.740527 +epoch: 0, batch: 39961, sum loss: 5147.867676, avg loss: 2.896943, ppl: 18.118670 +epoch: 0, batch: 39962, sum loss: 3987.358887, avg loss: 2.528446, ppl: 12.534010 +epoch: 0, batch: 39963, sum loss: 4478.984863, avg loss: 2.759695, ppl: 15.795022 +epoch: 0, batch: 39964, sum loss: 5178.156738, avg loss: 2.996619, ppl: 20.017733 +epoch: 0, batch: 39965, sum loss: 3984.589111, avg loss: 2.570703, ppl: 13.075010 +epoch: 0, batch: 39966, sum loss: 4129.573730, avg loss: 2.498230, ppl: 12.160947 +epoch: 0, batch: 39967, sum loss: 5401.327148, avg loss: 3.098868, ppl: 22.172840 +epoch: 0, batch: 39968, sum loss: 4956.441895, avg loss: 2.846894, ppl: 17.234165 +epoch: 0, batch: 39969, sum loss: 5171.077148, avg loss: 2.961671, ppl: 19.330244 +epoch: 0, batch: 39970, sum loss: 4756.730469, avg loss: 2.684385, ppl: 14.649194 +epoch: 0, batch: 39971, sum loss: 4881.233887, avg loss: 2.818264, ppl: 16.747755 +epoch: 0, batch: 39972, sum loss: 4528.119629, avg loss: 2.862275, ppl: 17.501299 +epoch: 0, batch: 39973, sum loss: 4131.838379, avg loss: 2.610132, ppl: 13.600840 +epoch: 0, batch: 39974, sum loss: 5658.052734, avg loss: 2.936198, ppl: 18.844061 +epoch: 0, batch: 39975, sum loss: 4479.804688, avg loss: 2.785948, ppl: 16.215183 +epoch: 0, batch: 39976, sum loss: 4614.069336, avg loss: 2.731835, ppl: 15.361051 +epoch: 0, batch: 39977, sum loss: 5383.188477, avg loss: 3.055158, ppl: 21.224543 +epoch: 0, batch: 39978, sum loss: 4776.172852, avg loss: 2.926577, ppl: 18.663628 +epoch: 0, batch: 39979, sum loss: 5207.615234, avg loss: 2.836392, ppl: 17.054117 +epoch: 0, batch: 39980, sum loss: 5690.079102, avg loss: 3.196674, ppl: 24.451061 +epoch: 0, batch: 39981, sum loss: 4711.322754, avg loss: 2.991316, ppl: 19.911865 +epoch: 0, batch: 39982, sum loss: 4638.343750, avg loss: 2.889934, ppl: 17.992119 +epoch: 0, batch: 39983, sum loss: 3360.518799, avg loss: 2.478259, ppl: 11.920488 +epoch: 0, batch: 39984, sum loss: 5176.125488, avg loss: 2.914485, ppl: 18.439318 +epoch: 0, batch: 39985, sum loss: 4979.933594, avg loss: 2.863677, ppl: 17.525846 +epoch: 0, batch: 39986, sum loss: 4400.767090, avg loss: 2.842873, ppl: 17.165007 +epoch: 0, batch: 39987, sum loss: 5680.762207, avg loss: 3.032975, ppl: 20.758904 +epoch: 0, batch: 39988, sum loss: 4793.309570, avg loss: 2.905036, ppl: 18.265905 +epoch: 0, batch: 39989, sum loss: 4764.915527, avg loss: 2.826166, ppl: 16.880615 +epoch: 0, batch: 39990, sum loss: 4820.976074, avg loss: 2.819284, ppl: 16.764847 +epoch: 0, batch: 39991, sum loss: 4300.635254, avg loss: 2.749767, ppl: 15.638985 +epoch: 0, batch: 39992, sum loss: 3431.026855, avg loss: 2.524670, ppl: 12.486775 +epoch: 0, batch: 39993, sum loss: 3597.023926, avg loss: 2.482418, ppl: 11.970177 +epoch: 0, batch: 39994, sum loss: 4293.693848, avg loss: 2.589683, ppl: 13.325541 +epoch: 0, batch: 39995, sum loss: 5539.055176, avg loss: 3.163367, ppl: 23.650087 +epoch: 0, batch: 39996, sum loss: 4635.507812, avg loss: 2.909923, ppl: 18.355391 +epoch: 0, batch: 39997, sum loss: 4730.285645, avg loss: 3.114079, ppl: 22.512682 +epoch: 0, batch: 39998, sum loss: 5238.400391, avg loss: 2.804283, ppl: 16.515228 +epoch: 0, batch: 39999, sum loss: 4296.149414, avg loss: 2.606887, ppl: 13.556781 +epoch: 0, batch: 40000, sum loss: 3895.226074, avg loss: 2.569411, ppl: 13.058125 +epoch: 0, batch: 40001, sum loss: 4917.624512, avg loss: 2.826221, ppl: 16.881544 +epoch: 0, batch: 40002, sum loss: 4690.304688, avg loss: 2.611528, ppl: 13.619851 +epoch: 0, batch: 40003, sum loss: 5570.153320, avg loss: 3.244120, ppl: 25.639128 +epoch: 0, batch: 40004, sum loss: 4225.856934, avg loss: 2.647780, ppl: 14.122651 +epoch: 0, batch: 40005, sum loss: 4068.653320, avg loss: 2.721507, ppl: 15.203218 +epoch: 0, batch: 40006, sum loss: 4716.752441, avg loss: 2.664832, ppl: 14.365534 +epoch: 0, batch: 40007, sum loss: 4477.095215, avg loss: 2.951282, ppl: 19.130468 +epoch: 0, batch: 40008, sum loss: 5679.987793, avg loss: 2.984755, ppl: 19.781647 +epoch: 0, batch: 40009, sum loss: 4748.799805, avg loss: 2.847002, ppl: 17.236034 +epoch: 0, batch: 40010, sum loss: 6902.909180, avg loss: 3.207671, ppl: 24.721453 +epoch: 0, batch: 40011, sum loss: 4737.422852, avg loss: 2.981386, ppl: 19.715132 +epoch: 0, batch: 40012, sum loss: 4272.620117, avg loss: 2.801718, ppl: 16.472923 +epoch: 0, batch: 40013, sum loss: 4812.700195, avg loss: 2.849438, ppl: 17.278065 +epoch: 0, batch: 40014, sum loss: 4653.304688, avg loss: 2.945130, ppl: 19.013126 +epoch: 0, batch: 40015, sum loss: 4262.714355, avg loss: 2.748365, ppl: 15.617080 +epoch: 0, batch: 40016, sum loss: 4632.398438, avg loss: 2.826356, ppl: 16.883831 +epoch: 0, batch: 40017, sum loss: 4856.905273, avg loss: 2.872209, ppl: 17.676023 +epoch: 0, batch: 40018, sum loss: 5929.449219, avg loss: 3.349971, ppl: 28.501915 +epoch: 0, batch: 40019, sum loss: 5354.555664, avg loss: 3.068513, ppl: 21.509897 +epoch: 0, batch: 40020, sum loss: 4693.495117, avg loss: 2.611851, ppl: 13.624239 +epoch: 0, batch: 40021, sum loss: 4051.345215, avg loss: 2.590374, ppl: 13.334761 +epoch: 0, batch: 40022, sum loss: 5589.510742, avg loss: 2.888636, ppl: 17.968781 +epoch: 0, batch: 40023, sum loss: 4598.711426, avg loss: 2.709907, ppl: 15.027871 +epoch: 0, batch: 40024, sum loss: 5186.300781, avg loss: 2.806440, ppl: 16.550890 +epoch: 0, batch: 40025, sum loss: 5871.578613, avg loss: 3.090305, ppl: 21.983778 +epoch: 0, batch: 40026, sum loss: 3962.585693, avg loss: 2.612120, ppl: 13.627907 +epoch: 0, batch: 40027, sum loss: 4134.021484, avg loss: 2.578928, ppl: 13.182999 +epoch: 0, batch: 40028, sum loss: 4600.071289, avg loss: 2.597443, ppl: 13.429354 +epoch: 0, batch: 40029, sum loss: 4242.242188, avg loss: 2.607401, ppl: 13.563758 +epoch: 0, batch: 40030, sum loss: 5305.402344, avg loss: 2.913455, ppl: 18.420340 +epoch: 0, batch: 40031, sum loss: 4790.000488, avg loss: 2.935050, ppl: 18.822435 +epoch: 0, batch: 40032, sum loss: 4930.213379, avg loss: 2.957536, ppl: 19.250488 +epoch: 0, batch: 40033, sum loss: 5424.738281, avg loss: 2.956261, ppl: 19.225945 +epoch: 0, batch: 40034, sum loss: 4873.100586, avg loss: 2.874986, ppl: 17.725170 +epoch: 0, batch: 40035, sum loss: 4725.438965, avg loss: 2.737798, ppl: 15.452916 +epoch: 0, batch: 40036, sum loss: 5414.273926, avg loss: 2.727594, ppl: 15.296039 +epoch: 0, batch: 40037, sum loss: 4289.881836, avg loss: 2.904456, ppl: 18.255318 +epoch: 0, batch: 40038, sum loss: 5740.623535, avg loss: 3.175124, ppl: 23.929779 +epoch: 0, batch: 40039, sum loss: 4542.254883, avg loss: 2.805593, ppl: 16.536880 +epoch: 0, batch: 40040, sum loss: 5144.211914, avg loss: 2.780655, ppl: 16.129585 +epoch: 0, batch: 40041, sum loss: 5239.690918, avg loss: 2.970346, ppl: 19.498674 +epoch: 0, batch: 40042, sum loss: 5017.784180, avg loss: 2.809510, ppl: 16.601778 +epoch: 0, batch: 40043, sum loss: 4634.560547, avg loss: 2.948194, ppl: 19.071476 +epoch: 0, batch: 40044, sum loss: 3613.494141, avg loss: 2.488632, ppl: 12.044793 +epoch: 0, batch: 40045, sum loss: 4764.085938, avg loss: 2.857880, ppl: 17.424545 +epoch: 0, batch: 40046, sum loss: 3763.812988, avg loss: 2.428267, ppl: 11.339209 +epoch: 0, batch: 40047, sum loss: 3851.738037, avg loss: 2.547446, ppl: 12.774436 +epoch: 0, batch: 40048, sum loss: 4288.211914, avg loss: 2.611579, ppl: 13.620537 +epoch: 0, batch: 40049, sum loss: 4928.844727, avg loss: 3.042497, ppl: 20.957508 +epoch: 0, batch: 40050, sum loss: 5389.093262, avg loss: 3.036109, ppl: 20.824053 +epoch: 0, batch: 40051, sum loss: 4604.119141, avg loss: 2.850848, ppl: 17.302443 +epoch: 0, batch: 40052, sum loss: 5035.482422, avg loss: 2.997311, ppl: 20.031601 +epoch: 0, batch: 40053, sum loss: 3771.480713, avg loss: 2.521043, ppl: 12.441570 +epoch: 0, batch: 40054, sum loss: 6320.849609, avg loss: 3.266589, ppl: 26.221743 +epoch: 0, batch: 40055, sum loss: 5753.551270, avg loss: 3.066925, ppl: 21.475763 +epoch: 0, batch: 40056, sum loss: 3944.610107, avg loss: 2.724178, ppl: 15.243883 +epoch: 0, batch: 40057, sum loss: 3899.978516, avg loss: 2.680398, ppl: 14.590892 +epoch: 0, batch: 40058, sum loss: 4094.674561, avg loss: 2.737082, ppl: 15.441860 +epoch: 0, batch: 40059, sum loss: 3784.444092, avg loss: 2.447894, ppl: 11.563969 +epoch: 0, batch: 40060, sum loss: 4032.329102, avg loss: 2.581517, ppl: 13.217170 +epoch: 0, batch: 40061, sum loss: 4734.181152, avg loss: 2.911551, ppl: 18.385283 +epoch: 0, batch: 40062, sum loss: 4348.407715, avg loss: 2.827313, ppl: 16.899992 +epoch: 0, batch: 40063, sum loss: 4509.409180, avg loss: 2.850448, ppl: 17.295530 +epoch: 0, batch: 40064, sum loss: 4632.090332, avg loss: 2.959802, ppl: 19.294153 +epoch: 0, batch: 40065, sum loss: 4836.927734, avg loss: 2.841908, ppl: 17.148457 +epoch: 0, batch: 40066, sum loss: 4586.691406, avg loss: 2.817378, ppl: 16.732920 +epoch: 0, batch: 40067, sum loss: 5448.820312, avg loss: 2.980755, ppl: 19.702688 +epoch: 0, batch: 40068, sum loss: 4855.564453, avg loss: 2.967949, ppl: 19.451986 +epoch: 0, batch: 40069, sum loss: 4151.772949, avg loss: 2.790170, ppl: 16.283787 +epoch: 0, batch: 40070, sum loss: 4036.099854, avg loss: 2.655329, ppl: 14.229667 +epoch: 0, batch: 40071, sum loss: 5083.660645, avg loss: 2.883528, ppl: 17.877241 +epoch: 0, batch: 40072, sum loss: 5069.966797, avg loss: 2.752425, ppl: 15.680611 +epoch: 0, batch: 40073, sum loss: 4318.269531, avg loss: 2.682155, ppl: 14.616556 +epoch: 0, batch: 40074, sum loss: 4843.213867, avg loss: 2.903605, ppl: 18.239790 +epoch: 0, batch: 40075, sum loss: 4240.101074, avg loss: 2.767690, ppl: 15.921811 +epoch: 0, batch: 40076, sum loss: 4827.370605, avg loss: 3.059170, ppl: 21.309868 +epoch: 0, batch: 40077, sum loss: 5901.914551, avg loss: 3.042224, ppl: 20.951792 +epoch: 0, batch: 40078, sum loss: 5661.762207, avg loss: 2.979875, ppl: 19.685358 +epoch: 0, batch: 40079, sum loss: 5342.048828, avg loss: 3.136846, ppl: 23.031113 +epoch: 0, batch: 40080, sum loss: 4286.919434, avg loss: 2.831519, ppl: 16.971218 +epoch: 0, batch: 40081, sum loss: 5539.282227, avg loss: 3.106720, ppl: 22.347633 +epoch: 0, batch: 40082, sum loss: 4582.290039, avg loss: 2.581572, ppl: 13.217898 +epoch: 0, batch: 40083, sum loss: 4277.384277, avg loss: 2.876519, ppl: 17.752373 +epoch: 0, batch: 40084, sum loss: 4882.853516, avg loss: 2.832282, ppl: 16.984171 +epoch: 0, batch: 40085, sum loss: 5491.184082, avg loss: 3.022116, ppl: 20.534691 +epoch: 0, batch: 40086, sum loss: 4758.959961, avg loss: 2.903575, ppl: 18.239237 +epoch: 0, batch: 40087, sum loss: 4043.679688, avg loss: 2.847662, ppl: 17.247410 +epoch: 0, batch: 40088, sum loss: 4605.396973, avg loss: 2.720258, ppl: 15.184243 +epoch: 0, batch: 40089, sum loss: 4352.484375, avg loss: 2.923092, ppl: 18.598709 +epoch: 0, batch: 40090, sum loss: 4872.008789, avg loss: 2.882845, ppl: 17.865034 +epoch: 0, batch: 40091, sum loss: 5413.523438, avg loss: 3.127396, ppl: 22.814501 +epoch: 0, batch: 40092, sum loss: 4880.402832, avg loss: 2.771381, ppl: 15.980694 +epoch: 0, batch: 40093, sum loss: 5097.688477, avg loss: 2.998641, ppl: 20.058250 +epoch: 0, batch: 40094, sum loss: 5056.891113, avg loss: 2.678438, ppl: 14.562332 +epoch: 0, batch: 40095, sum loss: 5297.232422, avg loss: 2.788017, ppl: 16.248772 +epoch: 0, batch: 40096, sum loss: 5088.994629, avg loss: 2.725760, ppl: 15.268017 +epoch: 0, batch: 40097, sum loss: 5071.346680, avg loss: 2.879811, ppl: 17.810900 +epoch: 0, batch: 40098, sum loss: 4689.707520, avg loss: 2.747339, ppl: 15.601063 +epoch: 0, batch: 40099, sum loss: 4622.775391, avg loss: 2.823931, ppl: 16.842934 +epoch: 0, batch: 40100, sum loss: 4309.679199, avg loss: 2.657016, ppl: 14.253686 +epoch: 0, batch: 40101, sum loss: 5046.541504, avg loss: 3.163976, ppl: 23.664497 +epoch: 0, batch: 40102, sum loss: 5118.958008, avg loss: 2.896977, ppl: 18.119287 +epoch: 0, batch: 40103, sum loss: 5340.361328, avg loss: 3.055127, ppl: 21.223879 +epoch: 0, batch: 40104, sum loss: 4342.891602, avg loss: 2.602092, ppl: 13.491934 +epoch: 0, batch: 40105, sum loss: 4792.083008, avg loss: 2.668198, ppl: 14.413966 +epoch: 0, batch: 40106, sum loss: 4959.552246, avg loss: 2.698342, ppl: 14.855080 +epoch: 0, batch: 40107, sum loss: 5882.916016, avg loss: 2.972671, ppl: 19.544052 +epoch: 0, batch: 40108, sum loss: 5100.441406, avg loss: 3.136803, ppl: 23.030115 +epoch: 0, batch: 40109, sum loss: 4440.434082, avg loss: 2.560804, ppl: 12.946220 +epoch: 0, batch: 40110, sum loss: 5204.632812, avg loss: 2.853417, ppl: 17.346958 +epoch: 0, batch: 40111, sum loss: 5568.351074, avg loss: 3.039493, ppl: 20.894650 +epoch: 0, batch: 40112, sum loss: 5010.285645, avg loss: 2.906198, ppl: 18.287144 +epoch: 0, batch: 40113, sum loss: 4040.404053, avg loss: 2.778820, ppl: 16.100008 +epoch: 0, batch: 40114, sum loss: 5006.121094, avg loss: 2.972756, ppl: 19.545717 +epoch: 0, batch: 40115, sum loss: 4638.646484, avg loss: 2.828443, ppl: 16.919098 +epoch: 0, batch: 40116, sum loss: 5695.086914, avg loss: 3.125734, ppl: 22.776604 +epoch: 0, batch: 40117, sum loss: 3885.503418, avg loss: 2.470123, ppl: 11.823899 +epoch: 0, batch: 40118, sum loss: 5116.268555, avg loss: 3.056314, ppl: 21.249079 +epoch: 0, batch: 40119, sum loss: 5377.266602, avg loss: 2.831631, ppl: 16.973116 +epoch: 0, batch: 40120, sum loss: 5412.312988, avg loss: 2.935094, ppl: 18.823271 +epoch: 0, batch: 40121, sum loss: 5118.000000, avg loss: 2.954965, ppl: 19.201057 +epoch: 0, batch: 40122, sum loss: 4462.969238, avg loss: 2.981275, ppl: 19.712940 +epoch: 0, batch: 40123, sum loss: 5819.862793, avg loss: 3.108901, ppl: 22.396423 +epoch: 0, batch: 40124, sum loss: 4554.738281, avg loss: 2.782369, ppl: 16.157255 +epoch: 0, batch: 40125, sum loss: 4887.305664, avg loss: 2.781620, ppl: 16.145147 +epoch: 0, batch: 40126, sum loss: 4764.139160, avg loss: 2.781167, ppl: 16.137844 +epoch: 0, batch: 40127, sum loss: 4013.652344, avg loss: 2.734096, ppl: 15.395813 +epoch: 0, batch: 40128, sum loss: 5686.104004, avg loss: 3.073570, ppl: 21.618940 +epoch: 0, batch: 40129, sum loss: 4753.830078, avg loss: 2.941726, ppl: 18.948532 +epoch: 0, batch: 40130, sum loss: 3752.108887, avg loss: 2.460399, ppl: 11.709487 +epoch: 0, batch: 40131, sum loss: 3598.168945, avg loss: 2.566454, ppl: 13.019581 +epoch: 0, batch: 40132, sum loss: 5632.695801, avg loss: 3.175138, ppl: 23.930134 +epoch: 0, batch: 40133, sum loss: 4285.327148, avg loss: 2.860699, ppl: 17.473736 +epoch: 0, batch: 40134, sum loss: 4548.633789, avg loss: 2.923286, ppl: 18.602322 +epoch: 0, batch: 40135, sum loss: 4765.710938, avg loss: 2.835045, ppl: 17.031168 +epoch: 0, batch: 40136, sum loss: 3883.184814, avg loss: 2.543016, ppl: 12.717967 +epoch: 0, batch: 40137, sum loss: 4770.820801, avg loss: 2.819634, ppl: 16.770710 +epoch: 0, batch: 40138, sum loss: 4070.776855, avg loss: 2.528433, ppl: 12.533848 +epoch: 0, batch: 40139, sum loss: 3823.568115, avg loss: 2.510550, ppl: 12.311703 +epoch: 0, batch: 40140, sum loss: 4427.591309, avg loss: 2.838200, ppl: 17.084978 +epoch: 0, batch: 40141, sum loss: 5710.462891, avg loss: 3.073446, ppl: 21.616264 +epoch: 0, batch: 40142, sum loss: 5861.424805, avg loss: 3.222334, ppl: 25.086596 +epoch: 0, batch: 40143, sum loss: 4354.628418, avg loss: 2.972443, ppl: 19.539589 +epoch: 0, batch: 40144, sum loss: 4916.453125, avg loss: 2.795027, ppl: 16.363079 +epoch: 0, batch: 40145, sum loss: 4933.964355, avg loss: 2.782834, ppl: 16.164768 +epoch: 0, batch: 40146, sum loss: 3889.668701, avg loss: 2.537292, ppl: 12.645381 +epoch: 0, batch: 40147, sum loss: 5553.222168, avg loss: 2.963299, ppl: 19.361736 +epoch: 0, batch: 40148, sum loss: 4513.076660, avg loss: 2.648519, ppl: 14.133096 +epoch: 0, batch: 40149, sum loss: 3714.173584, avg loss: 2.796818, ppl: 16.392395 +epoch: 0, batch: 40150, sum loss: 4406.782715, avg loss: 2.715208, ppl: 15.107753 +epoch: 0, batch: 40151, sum loss: 5979.512207, avg loss: 3.128996, ppl: 22.851034 +epoch: 0, batch: 40152, sum loss: 4048.095703, avg loss: 2.914396, ppl: 18.437664 +epoch: 0, batch: 40153, sum loss: 4546.385742, avg loss: 2.773878, ppl: 16.020636 +epoch: 0, batch: 40154, sum loss: 4732.548828, avg loss: 2.798669, ppl: 16.422771 +epoch: 0, batch: 40155, sum loss: 4939.976074, avg loss: 3.001201, ppl: 20.109667 +epoch: 0, batch: 40156, sum loss: 4413.561035, avg loss: 2.600802, ppl: 13.474543 +epoch: 0, batch: 40157, sum loss: 5347.125000, avg loss: 2.770531, ppl: 15.967113 +epoch: 0, batch: 40158, sum loss: 4408.154785, avg loss: 2.786444, ppl: 16.223230 +epoch: 0, batch: 40159, sum loss: 5641.455566, avg loss: 3.036305, ppl: 20.828150 +epoch: 0, batch: 40160, sum loss: 5468.677734, avg loss: 2.929126, ppl: 18.711266 +epoch: 0, batch: 40161, sum loss: 4187.546875, avg loss: 2.699901, ppl: 14.878261 +epoch: 0, batch: 40162, sum loss: 5794.778320, avg loss: 3.102130, ppl: 22.245277 +epoch: 0, batch: 40163, sum loss: 5122.427734, avg loss: 3.059993, ppl: 21.327404 +epoch: 0, batch: 40164, sum loss: 4819.329102, avg loss: 2.938615, ppl: 18.889671 +epoch: 0, batch: 40165, sum loss: 4470.127930, avg loss: 2.978100, ppl: 19.650446 +epoch: 0, batch: 40166, sum loss: 5050.393555, avg loss: 2.854943, ppl: 17.373444 +epoch: 0, batch: 40167, sum loss: 4513.407715, avg loss: 2.829723, ppl: 16.940762 +epoch: 0, batch: 40168, sum loss: 4541.241211, avg loss: 2.831198, ppl: 16.965773 +epoch: 0, batch: 40169, sum loss: 5319.260742, avg loss: 2.875276, ppl: 17.730318 +epoch: 0, batch: 40170, sum loss: 5269.114258, avg loss: 2.915946, ppl: 18.466269 +epoch: 0, batch: 40171, sum loss: 5034.233887, avg loss: 3.218820, ppl: 24.998594 +epoch: 0, batch: 40172, sum loss: 5293.465820, avg loss: 3.024837, ppl: 20.590658 +epoch: 0, batch: 40173, sum loss: 4297.906250, avg loss: 2.735778, ppl: 15.421731 +epoch: 0, batch: 40174, sum loss: 3947.426025, avg loss: 2.493636, ppl: 12.105212 +epoch: 0, batch: 40175, sum loss: 5353.404785, avg loss: 3.147210, ppl: 23.271055 +epoch: 0, batch: 40176, sum loss: 6105.770508, avg loss: 3.148928, ppl: 23.311054 +epoch: 0, batch: 40177, sum loss: 6318.138672, avg loss: 2.815570, ppl: 16.702692 +epoch: 0, batch: 40178, sum loss: 4335.485352, avg loss: 2.800701, ppl: 16.456181 +epoch: 0, batch: 40179, sum loss: 4137.990723, avg loss: 2.742207, ppl: 15.521207 +epoch: 0, batch: 40180, sum loss: 4496.115234, avg loss: 2.827745, ppl: 16.907299 +epoch: 0, batch: 40181, sum loss: 6166.083008, avg loss: 3.342050, ppl: 28.277044 +epoch: 0, batch: 40182, sum loss: 5578.241699, avg loss: 3.012009, ppl: 20.328205 +epoch: 0, batch: 40183, sum loss: 4015.886719, avg loss: 2.761958, ppl: 15.830808 +epoch: 0, batch: 40184, sum loss: 4817.054688, avg loss: 2.907094, ppl: 18.303532 +epoch: 0, batch: 40185, sum loss: 4355.732910, avg loss: 2.858093, ppl: 17.428251 +epoch: 0, batch: 40186, sum loss: 4986.634766, avg loss: 2.924713, ppl: 18.628872 +epoch: 0, batch: 40187, sum loss: 4911.803711, avg loss: 2.946493, ppl: 19.039064 +epoch: 0, batch: 40188, sum loss: 5264.721680, avg loss: 2.919979, ppl: 18.540895 +epoch: 0, batch: 40189, sum loss: 5353.746094, avg loss: 3.009413, ppl: 20.275499 +epoch: 0, batch: 40190, sum loss: 4976.348145, avg loss: 3.141634, ppl: 23.141649 +epoch: 0, batch: 40191, sum loss: 4382.256836, avg loss: 2.766576, ppl: 15.904090 +epoch: 0, batch: 40192, sum loss: 4098.323730, avg loss: 2.714122, ppl: 15.091352 +epoch: 0, batch: 40193, sum loss: 5223.081543, avg loss: 2.727458, ppl: 15.293957 +epoch: 0, batch: 40194, sum loss: 3730.281738, avg loss: 2.632521, ppl: 13.908785 +epoch: 0, batch: 40195, sum loss: 5513.840820, avg loss: 2.897447, ppl: 18.127800 +epoch: 0, batch: 40196, sum loss: 4510.530273, avg loss: 2.862011, ppl: 17.496685 +epoch: 0, batch: 40197, sum loss: 4825.999023, avg loss: 2.666298, ppl: 14.386610 +epoch: 0, batch: 40198, sum loss: 4867.863770, avg loss: 2.702867, ppl: 14.922454 +epoch: 0, batch: 40199, sum loss: 4510.417480, avg loss: 2.977173, ppl: 19.632244 +epoch: 0, batch: 40200, sum loss: 5732.280273, avg loss: 3.010652, ppl: 20.300623 +epoch: 0, batch: 40201, sum loss: 5220.850098, avg loss: 2.938014, ppl: 18.878307 +epoch: 0, batch: 40202, sum loss: 4846.197266, avg loss: 3.084785, ppl: 21.862770 +epoch: 0, batch: 40203, sum loss: 4685.948242, avg loss: 2.766203, ppl: 15.898156 +epoch: 0, batch: 40204, sum loss: 5026.760254, avg loss: 2.849637, ppl: 17.281511 +epoch: 0, batch: 40205, sum loss: 5657.877930, avg loss: 3.189333, ppl: 24.272243 +epoch: 0, batch: 40206, sum loss: 4556.859375, avg loss: 2.771812, ppl: 15.987580 +epoch: 0, batch: 40207, sum loss: 4061.186768, avg loss: 2.864025, ppl: 17.531944 +epoch: 0, batch: 40208, sum loss: 4807.252930, avg loss: 3.097457, ppl: 22.141573 +epoch: 0, batch: 40209, sum loss: 3940.113525, avg loss: 2.667646, ppl: 14.406023 +epoch: 0, batch: 40210, sum loss: 4762.181641, avg loss: 2.730609, ppl: 15.342223 +epoch: 0, batch: 40211, sum loss: 3912.176758, avg loss: 2.584001, ppl: 13.250040 +epoch: 0, batch: 40212, sum loss: 5379.966309, avg loss: 3.262563, ppl: 26.116388 +epoch: 0, batch: 40213, sum loss: 4736.419434, avg loss: 3.079597, ppl: 21.749624 +epoch: 0, batch: 40214, sum loss: 5130.345703, avg loss: 2.905066, ppl: 18.266441 +epoch: 0, batch: 40215, sum loss: 4289.290039, avg loss: 2.876788, ppl: 17.757139 +epoch: 0, batch: 40216, sum loss: 4391.928711, avg loss: 2.815339, ppl: 16.698837 +epoch: 0, batch: 40217, sum loss: 5160.729492, avg loss: 2.914020, ppl: 18.430742 +epoch: 0, batch: 40218, sum loss: 5087.686035, avg loss: 2.832788, ppl: 16.992762 +epoch: 0, batch: 40219, sum loss: 5064.953613, avg loss: 2.832748, ppl: 16.992094 +epoch: 0, batch: 40220, sum loss: 4540.240723, avg loss: 2.971362, ppl: 19.518484 +epoch: 0, batch: 40221, sum loss: 4593.800781, avg loss: 2.772360, ppl: 15.996343 +epoch: 0, batch: 40222, sum loss: 3893.043945, avg loss: 2.600564, ppl: 13.471328 +epoch: 0, batch: 40223, sum loss: 5164.263672, avg loss: 3.215606, ppl: 24.918394 +epoch: 0, batch: 40224, sum loss: 4639.791992, avg loss: 2.718097, ppl: 15.151464 +epoch: 0, batch: 40225, sum loss: 4101.917969, avg loss: 2.830861, ppl: 16.960062 +epoch: 0, batch: 40226, sum loss: 4364.328613, avg loss: 2.830304, ppl: 16.950611 +epoch: 0, batch: 40227, sum loss: 5785.890625, avg loss: 3.198392, ppl: 24.493111 +epoch: 0, batch: 40228, sum loss: 4527.706055, avg loss: 2.844036, ppl: 17.184990 +epoch: 0, batch: 40229, sum loss: 4015.018799, avg loss: 2.731305, ppl: 15.352915 +epoch: 0, batch: 40230, sum loss: 4419.599609, avg loss: 2.866148, ppl: 17.569202 +epoch: 0, batch: 40231, sum loss: 4825.929688, avg loss: 2.997472, ppl: 20.034821 +epoch: 0, batch: 40232, sum loss: 4226.009277, avg loss: 2.851558, ppl: 17.314745 +epoch: 0, batch: 40233, sum loss: 5565.865234, avg loss: 2.836832, ppl: 17.061634 +epoch: 0, batch: 40234, sum loss: 4427.952148, avg loss: 2.698325, ppl: 14.854835 +epoch: 0, batch: 40235, sum loss: 5738.797852, avg loss: 2.904250, ppl: 18.251549 +epoch: 0, batch: 40236, sum loss: 4904.832520, avg loss: 3.164408, ppl: 23.674728 +epoch: 0, batch: 40237, sum loss: 5695.989746, avg loss: 3.049245, ppl: 21.099411 +epoch: 0, batch: 40238, sum loss: 4342.572754, avg loss: 2.600343, ppl: 13.468357 +epoch: 0, batch: 40239, sum loss: 3860.457764, avg loss: 2.714808, ppl: 15.101717 +epoch: 0, batch: 40240, sum loss: 5243.621094, avg loss: 2.917986, ppl: 18.503986 +epoch: 0, batch: 40241, sum loss: 4922.120605, avg loss: 2.870041, ppl: 17.637743 +epoch: 0, batch: 40242, sum loss: 4205.040527, avg loss: 2.567180, ppl: 13.029027 +epoch: 0, batch: 40243, sum loss: 5396.866699, avg loss: 2.994932, ppl: 19.983995 +epoch: 0, batch: 40244, sum loss: 4802.054199, avg loss: 2.808219, ppl: 16.580359 +epoch: 0, batch: 40245, sum loss: 4252.398926, avg loss: 2.661076, ppl: 14.311678 +epoch: 0, batch: 40246, sum loss: 4692.207520, avg loss: 2.947366, ppl: 19.055704 +epoch: 0, batch: 40247, sum loss: 4823.448242, avg loss: 2.992213, ppl: 19.929749 +epoch: 0, batch: 40248, sum loss: 4291.362305, avg loss: 2.795676, ppl: 16.373690 +epoch: 0, batch: 40249, sum loss: 4015.911621, avg loss: 2.562803, ppl: 12.972124 +epoch: 0, batch: 40250, sum loss: 5379.490234, avg loss: 2.949282, ppl: 19.092239 +epoch: 0, batch: 40251, sum loss: 4613.151855, avg loss: 2.663483, ppl: 14.346165 +epoch: 0, batch: 40252, sum loss: 4568.300781, avg loss: 3.027370, ppl: 20.642864 +epoch: 0, batch: 40253, sum loss: 4146.011719, avg loss: 2.549823, ppl: 12.804832 +epoch: 0, batch: 40254, sum loss: 4856.581543, avg loss: 2.934490, ppl: 18.811914 +epoch: 0, batch: 40255, sum loss: 5254.660645, avg loss: 3.215827, ppl: 24.923889 +epoch: 0, batch: 40256, sum loss: 3866.618652, avg loss: 2.670317, ppl: 14.444547 +epoch: 0, batch: 40257, sum loss: 5049.830078, avg loss: 2.819559, ppl: 16.769451 +epoch: 0, batch: 40258, sum loss: 5264.130859, avg loss: 2.871866, ppl: 17.669964 +epoch: 0, batch: 40259, sum loss: 4384.005859, avg loss: 2.611081, ppl: 13.613765 +epoch: 0, batch: 40260, sum loss: 5167.402344, avg loss: 2.978330, ppl: 19.654963 +epoch: 0, batch: 40261, sum loss: 4630.061523, avg loss: 3.134774, ppl: 22.983452 +epoch: 0, batch: 40262, sum loss: 4506.066406, avg loss: 2.672637, ppl: 14.478101 +epoch: 0, batch: 40263, sum loss: 5291.705566, avg loss: 2.969532, ppl: 19.482796 +epoch: 0, batch: 40264, sum loss: 5642.792480, avg loss: 3.053459, ppl: 21.188513 +epoch: 0, batch: 40265, sum loss: 4911.015625, avg loss: 2.911094, ppl: 18.376896 +epoch: 0, batch: 40266, sum loss: 4797.620117, avg loss: 2.884919, ppl: 17.902117 +epoch: 0, batch: 40267, sum loss: 4905.652344, avg loss: 2.939276, ppl: 18.902164 +epoch: 0, batch: 40268, sum loss: 4283.952148, avg loss: 2.940255, ppl: 18.920677 +epoch: 0, batch: 40269, sum loss: 4745.095703, avg loss: 2.822781, ppl: 16.823578 +epoch: 0, batch: 40270, sum loss: 4929.389160, avg loss: 2.823247, ppl: 16.831417 +epoch: 0, batch: 40271, sum loss: 4574.700195, avg loss: 2.974447, ppl: 19.578802 +epoch: 0, batch: 40272, sum loss: 3893.140381, avg loss: 2.637629, ppl: 13.980018 +epoch: 0, batch: 40273, sum loss: 4597.935547, avg loss: 2.921179, ppl: 18.563158 +epoch: 0, batch: 40274, sum loss: 3345.972412, avg loss: 2.476663, ppl: 11.901487 +epoch: 0, batch: 40275, sum loss: 5240.512207, avg loss: 3.045039, ppl: 21.010849 +epoch: 0, batch: 40276, sum loss: 4817.774902, avg loss: 2.900527, ppl: 18.183722 +epoch: 0, batch: 40277, sum loss: 4787.208496, avg loss: 2.933339, ppl: 18.790260 +epoch: 0, batch: 40278, sum loss: 4317.640625, avg loss: 2.739620, ppl: 15.481097 +epoch: 0, batch: 40279, sum loss: 5095.610352, avg loss: 2.893589, ppl: 18.058004 +epoch: 0, batch: 40280, sum loss: 4022.008789, avg loss: 2.535945, ppl: 12.628358 +epoch: 0, batch: 40281, sum loss: 4321.316406, avg loss: 2.510933, ppl: 12.316421 +epoch: 0, batch: 40282, sum loss: 4299.895020, avg loss: 2.792140, ppl: 16.315891 +epoch: 0, batch: 40283, sum loss: 6059.664062, avg loss: 3.025294, ppl: 20.600061 +epoch: 0, batch: 40284, sum loss: 4193.729004, avg loss: 2.833601, ppl: 17.006588 +epoch: 0, batch: 40285, sum loss: 5270.812500, avg loss: 2.815605, ppl: 16.703281 +epoch: 0, batch: 40286, sum loss: 3669.769775, avg loss: 2.600829, ppl: 13.474906 +epoch: 0, batch: 40287, sum loss: 4915.448730, avg loss: 2.910272, ppl: 18.361786 +epoch: 0, batch: 40288, sum loss: 4886.265625, avg loss: 2.837553, ppl: 17.073927 +epoch: 0, batch: 40289, sum loss: 4418.129883, avg loss: 2.810515, ppl: 16.618477 +epoch: 0, batch: 40290, sum loss: 4671.304688, avg loss: 2.767361, ppl: 15.916570 +epoch: 0, batch: 40291, sum loss: 4228.580566, avg loss: 2.465645, ppl: 11.771068 +epoch: 0, batch: 40292, sum loss: 4680.989258, avg loss: 2.828393, ppl: 16.918243 +epoch: 0, batch: 40293, sum loss: 4495.240234, avg loss: 2.976980, ppl: 19.628458 +epoch: 0, batch: 40294, sum loss: 5533.641602, avg loss: 3.108788, ppl: 22.393877 +epoch: 0, batch: 40295, sum loss: 4435.861816, avg loss: 2.739878, ppl: 15.485091 +epoch: 0, batch: 40296, sum loss: 4465.007812, avg loss: 2.597445, ppl: 13.429382 +epoch: 0, batch: 40297, sum loss: 3523.843994, avg loss: 2.783447, ppl: 16.174679 +epoch: 0, batch: 40298, sum loss: 5730.996094, avg loss: 3.196317, ppl: 24.442343 +epoch: 0, batch: 40299, sum loss: 4247.911621, avg loss: 2.745903, ppl: 15.578671 +epoch: 0, batch: 40300, sum loss: 5554.241211, avg loss: 2.929452, ppl: 18.717375 +epoch: 0, batch: 40301, sum loss: 4859.066406, avg loss: 2.858274, ppl: 17.431423 +epoch: 0, batch: 40302, sum loss: 4729.818359, avg loss: 2.930495, ppl: 18.736898 +epoch: 0, batch: 40303, sum loss: 4864.523926, avg loss: 2.875014, ppl: 17.725674 +epoch: 0, batch: 40304, sum loss: 4716.314453, avg loss: 2.904135, ppl: 18.249443 +epoch: 0, batch: 40305, sum loss: 4265.601562, avg loss: 2.903745, ppl: 18.242334 +epoch: 0, batch: 40306, sum loss: 4970.923828, avg loss: 2.829211, ppl: 16.932096 +epoch: 0, batch: 40307, sum loss: 4883.708496, avg loss: 2.959823, ppl: 19.294563 +epoch: 0, batch: 40308, sum loss: 5908.543945, avg loss: 3.005363, ppl: 20.193550 +epoch: 0, batch: 40309, sum loss: 4159.886230, avg loss: 2.740373, ppl: 15.492764 +epoch: 0, batch: 40310, sum loss: 5327.874512, avg loss: 2.940328, ppl: 18.922054 +epoch: 0, batch: 40311, sum loss: 5601.125977, avg loss: 2.848996, ppl: 17.270432 +epoch: 0, batch: 40312, sum loss: 5318.342285, avg loss: 2.918958, ppl: 18.521986 +epoch: 0, batch: 40313, sum loss: 5138.620117, avg loss: 2.873949, ppl: 17.706797 +epoch: 0, batch: 40314, sum loss: 4377.331543, avg loss: 2.815004, ppl: 16.693249 +epoch: 0, batch: 40315, sum loss: 5037.953613, avg loss: 2.836686, ppl: 17.059132 +epoch: 0, batch: 40316, sum loss: 3988.006592, avg loss: 2.685526, ppl: 14.665915 +epoch: 0, batch: 40317, sum loss: 4602.435547, avg loss: 2.802945, ppl: 16.493151 +epoch: 0, batch: 40318, sum loss: 4248.397461, avg loss: 2.700825, ppl: 14.892009 +epoch: 0, batch: 40319, sum loss: 4066.474854, avg loss: 2.521063, ppl: 12.441816 +epoch: 0, batch: 40320, sum loss: 4949.628906, avg loss: 2.748267, ppl: 15.615545 +epoch: 0, batch: 40321, sum loss: 5417.496094, avg loss: 3.120677, ppl: 22.661728 +epoch: 0, batch: 40322, sum loss: 4044.405273, avg loss: 2.479709, ppl: 11.937789 +epoch: 0, batch: 40323, sum loss: 5974.536621, avg loss: 2.979819, ppl: 19.684246 +epoch: 0, batch: 40324, sum loss: 4940.722656, avg loss: 2.812022, ppl: 16.643538 +epoch: 0, batch: 40325, sum loss: 4509.993164, avg loss: 2.773674, ppl: 16.017366 +epoch: 0, batch: 40326, sum loss: 4617.339844, avg loss: 2.846695, ppl: 17.230743 +epoch: 0, batch: 40327, sum loss: 5008.053711, avg loss: 3.029675, ppl: 20.690517 +epoch: 0, batch: 40328, sum loss: 4707.747070, avg loss: 2.958986, ppl: 19.278419 +epoch: 0, batch: 40329, sum loss: 5158.642578, avg loss: 2.672872, ppl: 14.481498 +epoch: 0, batch: 40330, sum loss: 5056.146484, avg loss: 2.948190, ppl: 19.071407 +epoch: 0, batch: 40331, sum loss: 4688.845215, avg loss: 2.990335, ppl: 19.892345 +epoch: 0, batch: 40332, sum loss: 5397.261230, avg loss: 2.936486, ppl: 18.849493 +epoch: 0, batch: 40333, sum loss: 5210.967285, avg loss: 2.761509, ppl: 15.823702 +epoch: 0, batch: 40334, sum loss: 4480.701660, avg loss: 2.774428, ppl: 16.029461 +epoch: 0, batch: 40335, sum loss: 4872.329590, avg loss: 2.681524, ppl: 14.607342 +epoch: 0, batch: 40336, sum loss: 5239.010254, avg loss: 2.936665, ppl: 18.852869 +epoch: 0, batch: 40337, sum loss: 5893.946289, avg loss: 3.156907, ppl: 23.497812 +epoch: 0, batch: 40338, sum loss: 4459.874023, avg loss: 2.858894, ppl: 17.442219 +epoch: 0, batch: 40339, sum loss: 4781.795898, avg loss: 2.859926, ppl: 17.460234 +epoch: 0, batch: 40340, sum loss: 4983.720703, avg loss: 2.952441, ppl: 19.152653 +epoch: 0, batch: 40341, sum loss: 6117.495605, avg loss: 3.368665, ppl: 29.039726 +epoch: 0, batch: 40342, sum loss: 4714.787109, avg loss: 2.937562, ppl: 18.869785 +epoch: 0, batch: 40343, sum loss: 4872.211426, avg loss: 2.697792, ppl: 14.846908 +epoch: 0, batch: 40344, sum loss: 4662.422852, avg loss: 2.791870, ppl: 16.311495 +epoch: 0, batch: 40345, sum loss: 5014.786621, avg loss: 2.809404, ppl: 16.600027 +epoch: 0, batch: 40346, sum loss: 4236.941895, avg loss: 2.695256, ppl: 14.809306 +epoch: 0, batch: 40347, sum loss: 5238.108887, avg loss: 2.976198, ppl: 19.613110 +epoch: 0, batch: 40348, sum loss: 4946.511719, avg loss: 2.937359, ppl: 18.865948 +epoch: 0, batch: 40349, sum loss: 4305.139648, avg loss: 2.705933, ppl: 14.968277 +epoch: 0, batch: 40350, sum loss: 4579.187988, avg loss: 2.887256, ppl: 17.944002 +epoch: 0, batch: 40351, sum loss: 5482.474609, avg loss: 3.044128, ppl: 20.991716 +epoch: 0, batch: 40352, sum loss: 5511.625000, avg loss: 3.000340, ppl: 20.092377 +epoch: 0, batch: 40353, sum loss: 5182.187012, avg loss: 2.766784, ppl: 15.907400 +epoch: 0, batch: 40354, sum loss: 4535.356934, avg loss: 2.914754, ppl: 18.444273 +epoch: 0, batch: 40355, sum loss: 4161.066406, avg loss: 2.940683, ppl: 18.928768 +epoch: 0, batch: 40356, sum loss: 4797.792480, avg loss: 2.757352, ppl: 15.758059 +epoch: 0, batch: 40357, sum loss: 4624.081543, avg loss: 2.838601, ppl: 17.091843 +epoch: 0, batch: 40358, sum loss: 5087.943359, avg loss: 2.860002, ppl: 17.461559 +epoch: 0, batch: 40359, sum loss: 3612.308594, avg loss: 2.350233, ppl: 10.488016 +epoch: 0, batch: 40360, sum loss: 4205.932617, avg loss: 2.704780, ppl: 14.951025 +epoch: 0, batch: 40361, sum loss: 4841.062012, avg loss: 2.793458, ppl: 16.337408 +epoch: 0, batch: 40362, sum loss: 4480.137207, avg loss: 2.707032, ppl: 14.984731 +epoch: 0, batch: 40363, sum loss: 4383.423340, avg loss: 2.934018, ppl: 18.803036 +epoch: 0, batch: 40364, sum loss: 5152.842285, avg loss: 3.008081, ppl: 20.248501 +epoch: 0, batch: 40365, sum loss: 4606.255859, avg loss: 2.947061, ppl: 19.049875 +epoch: 0, batch: 40366, sum loss: 4170.947266, avg loss: 2.749471, ppl: 15.634355 +epoch: 0, batch: 40367, sum loss: 5209.489746, avg loss: 3.009526, ppl: 20.277786 +epoch: 0, batch: 40368, sum loss: 5248.759277, avg loss: 2.843315, ppl: 17.172596 +epoch: 0, batch: 40369, sum loss: 6344.646973, avg loss: 3.260353, ppl: 26.058737 +epoch: 0, batch: 40370, sum loss: 4244.561523, avg loss: 2.578713, ppl: 13.180163 +epoch: 0, batch: 40371, sum loss: 5012.604980, avg loss: 2.843225, ppl: 17.171045 +epoch: 0, batch: 40372, sum loss: 5464.500000, avg loss: 3.223894, ppl: 25.125767 +epoch: 0, batch: 40373, sum loss: 4392.164062, avg loss: 2.712887, ppl: 15.072728 +epoch: 0, batch: 40374, sum loss: 5080.256836, avg loss: 3.004292, ppl: 20.171921 +epoch: 0, batch: 40375, sum loss: 5069.636230, avg loss: 2.734432, ppl: 15.400986 +epoch: 0, batch: 40376, sum loss: 4930.687500, avg loss: 2.966719, ppl: 19.428080 +epoch: 0, batch: 40377, sum loss: 4237.026367, avg loss: 2.800414, ppl: 16.451458 +epoch: 0, batch: 40378, sum loss: 6502.639648, avg loss: 3.557243, ppl: 35.066372 +epoch: 0, batch: 40379, sum loss: 4122.846680, avg loss: 2.622676, ppl: 13.772528 +epoch: 0, batch: 40380, sum loss: 4713.407227, avg loss: 2.757991, ppl: 15.768138 +epoch: 0, batch: 40381, sum loss: 5522.160645, avg loss: 3.009352, ppl: 20.274258 +epoch: 0, batch: 40382, sum loss: 4777.431641, avg loss: 2.982167, ppl: 19.730526 +epoch: 0, batch: 40383, sum loss: 5221.069336, avg loss: 3.124518, ppl: 22.748926 +epoch: 0, batch: 40384, sum loss: 4898.513184, avg loss: 2.795955, ppl: 16.378265 +epoch: 0, batch: 40385, sum loss: 4008.479004, avg loss: 2.706603, ppl: 14.978309 +epoch: 0, batch: 40386, sum loss: 4936.865723, avg loss: 3.045568, ppl: 21.021969 +epoch: 0, batch: 40387, sum loss: 5136.741211, avg loss: 2.890682, ppl: 18.005577 +epoch: 0, batch: 40388, sum loss: 4850.214844, avg loss: 2.696062, ppl: 14.821245 +epoch: 0, batch: 40389, sum loss: 4956.928711, avg loss: 2.910704, ppl: 18.369724 +epoch: 0, batch: 40390, sum loss: 4608.736328, avg loss: 2.801663, ppl: 16.472023 +epoch: 0, batch: 40391, sum loss: 4616.582031, avg loss: 2.693455, ppl: 14.782665 +epoch: 0, batch: 40392, sum loss: 4247.460938, avg loss: 2.945535, ppl: 19.020830 +epoch: 0, batch: 40393, sum loss: 5469.574707, avg loss: 2.931176, ppl: 18.749666 +epoch: 0, batch: 40394, sum loss: 4991.726074, avg loss: 2.832989, ppl: 16.996181 +epoch: 0, batch: 40395, sum loss: 4973.556641, avg loss: 2.951666, ppl: 19.137804 +epoch: 0, batch: 40396, sum loss: 5005.620117, avg loss: 3.063415, ppl: 21.400517 +epoch: 0, batch: 40397, sum loss: 5155.694824, avg loss: 3.009746, ppl: 20.282244 +epoch: 0, batch: 40398, sum loss: 4858.585449, avg loss: 2.978900, ppl: 19.666166 +epoch: 0, batch: 40399, sum loss: 5117.096680, avg loss: 3.139323, ppl: 23.088230 +epoch: 0, batch: 40400, sum loss: 4114.677734, avg loss: 2.507421, ppl: 12.273233 +epoch: 0, batch: 40401, sum loss: 4162.260742, avg loss: 2.531789, ppl: 12.575982 +epoch: 0, batch: 40402, sum loss: 3971.938477, avg loss: 2.607970, ppl: 13.571476 +epoch: 0, batch: 40403, sum loss: 5019.678711, avg loss: 2.866750, ppl: 17.579786 +epoch: 0, batch: 40404, sum loss: 4387.627930, avg loss: 3.063986, ppl: 21.412741 +epoch: 0, batch: 40405, sum loss: 4810.824219, avg loss: 2.884187, ppl: 17.889021 +epoch: 0, batch: 40406, sum loss: 4074.725342, avg loss: 2.723747, ppl: 15.237309 +epoch: 0, batch: 40407, sum loss: 4791.031738, avg loss: 2.985066, ppl: 19.787817 +epoch: 0, batch: 40408, sum loss: 4588.764648, avg loss: 3.038917, ppl: 20.882616 +epoch: 0, batch: 40409, sum loss: 5613.536621, avg loss: 2.880214, ppl: 17.818085 +epoch: 0, batch: 40410, sum loss: 5130.590332, avg loss: 2.746569, ppl: 15.589049 +epoch: 0, batch: 40411, sum loss: 5350.890625, avg loss: 2.911257, ppl: 18.379889 +epoch: 0, batch: 40412, sum loss: 4679.728516, avg loss: 2.919356, ppl: 18.529358 +epoch: 0, batch: 40413, sum loss: 5152.080078, avg loss: 2.826155, ppl: 16.880430 +epoch: 0, batch: 40414, sum loss: 5115.861328, avg loss: 2.747509, ppl: 15.603711 +epoch: 0, batch: 40415, sum loss: 4024.864258, avg loss: 2.787302, ppl: 16.237154 +epoch: 0, batch: 40416, sum loss: 4586.517090, avg loss: 2.791550, ppl: 16.306278 +epoch: 0, batch: 40417, sum loss: 4514.734863, avg loss: 2.688943, ppl: 14.716108 +epoch: 0, batch: 40418, sum loss: 4396.096680, avg loss: 2.728800, ppl: 15.314500 +epoch: 0, batch: 40419, sum loss: 5488.446289, avg loss: 2.950778, ppl: 19.120815 +epoch: 0, batch: 40420, sum loss: 5907.410645, avg loss: 3.083200, ppl: 21.828135 +epoch: 0, batch: 40421, sum loss: 5307.694824, avg loss: 2.973499, ppl: 19.560234 +epoch: 0, batch: 40422, sum loss: 4924.426758, avg loss: 2.856396, ppl: 17.398712 +epoch: 0, batch: 40423, sum loss: 4173.884766, avg loss: 2.638359, ppl: 13.990232 +epoch: 0, batch: 40424, sum loss: 4716.509277, avg loss: 2.861959, ppl: 17.495775 +epoch: 0, batch: 40425, sum loss: 3626.004883, avg loss: 2.593709, ppl: 13.379300 +epoch: 0, batch: 40426, sum loss: 5398.678711, avg loss: 2.882370, ppl: 17.856543 +epoch: 0, batch: 40427, sum loss: 3688.679932, avg loss: 2.788118, ppl: 16.250406 +epoch: 0, batch: 40428, sum loss: 4044.288574, avg loss: 2.787242, ppl: 16.236181 +epoch: 0, batch: 40429, sum loss: 4147.911133, avg loss: 2.655513, ppl: 14.232283 +epoch: 0, batch: 40430, sum loss: 4667.637207, avg loss: 2.825446, ppl: 16.868473 +epoch: 0, batch: 40431, sum loss: 5609.172852, avg loss: 3.276386, ppl: 26.479902 +epoch: 0, batch: 40432, sum loss: 4087.606934, avg loss: 2.675135, ppl: 14.514315 +epoch: 0, batch: 40433, sum loss: 5261.892578, avg loss: 2.872212, ppl: 17.676077 +epoch: 0, batch: 40434, sum loss: 4748.128906, avg loss: 2.812873, ppl: 16.657701 +epoch: 0, batch: 40435, sum loss: 5133.996094, avg loss: 2.794772, ppl: 16.358900 +epoch: 0, batch: 40436, sum loss: 4877.174316, avg loss: 2.877389, ppl: 17.767820 +epoch: 0, batch: 40437, sum loss: 4874.333496, avg loss: 2.832268, ppl: 16.983936 +epoch: 0, batch: 40438, sum loss: 4538.652344, avg loss: 2.628056, ppl: 13.846823 +epoch: 0, batch: 40439, sum loss: 3723.349854, avg loss: 2.659536, ppl: 14.289652 +epoch: 0, batch: 40440, sum loss: 4404.419922, avg loss: 2.834247, ppl: 17.017584 +epoch: 0, batch: 40441, sum loss: 4934.550781, avg loss: 2.992450, ppl: 19.934471 +epoch: 0, batch: 40442, sum loss: 4735.090332, avg loss: 2.828609, ppl: 16.921898 +epoch: 0, batch: 40443, sum loss: 4568.886719, avg loss: 2.476361, ppl: 11.897892 +epoch: 0, batch: 40444, sum loss: 5378.314453, avg loss: 2.934160, ppl: 18.805691 +epoch: 0, batch: 40445, sum loss: 4039.767578, avg loss: 2.664754, ppl: 14.364421 +epoch: 0, batch: 40446, sum loss: 4600.073242, avg loss: 2.841305, ppl: 17.138121 +epoch: 0, batch: 40447, sum loss: 3742.995605, avg loss: 2.662159, ppl: 14.327188 +epoch: 0, batch: 40448, sum loss: 6916.350098, avg loss: 3.329971, ppl: 27.937534 +epoch: 0, batch: 40449, sum loss: 5023.933594, avg loss: 2.937973, ppl: 18.877539 +epoch: 0, batch: 40450, sum loss: 4629.080566, avg loss: 3.009805, ppl: 20.283449 +epoch: 0, batch: 40451, sum loss: 5066.018555, avg loss: 2.717821, ppl: 15.147283 +epoch: 0, batch: 40452, sum loss: 5630.902344, avg loss: 3.110996, ppl: 22.443388 +epoch: 0, batch: 40453, sum loss: 5598.103027, avg loss: 2.995239, ppl: 19.990147 +epoch: 0, batch: 40454, sum loss: 4607.249023, avg loss: 2.917827, ppl: 18.501043 +epoch: 0, batch: 40455, sum loss: 4476.278809, avg loss: 2.873093, ppl: 17.691656 +epoch: 0, batch: 40456, sum loss: 4922.500488, avg loss: 2.656503, ppl: 14.246386 +epoch: 0, batch: 40457, sum loss: 5513.940430, avg loss: 3.303739, ppl: 27.214205 +epoch: 0, batch: 40458, sum loss: 4932.882324, avg loss: 2.619693, ppl: 13.731508 +epoch: 0, batch: 40459, sum loss: 5161.358398, avg loss: 2.992092, ppl: 19.927324 +epoch: 0, batch: 40460, sum loss: 3752.619141, avg loss: 2.286788, ppl: 9.843270 +epoch: 0, batch: 40461, sum loss: 4628.262695, avg loss: 2.799917, ppl: 16.443277 +epoch: 0, batch: 40462, sum loss: 4453.558105, avg loss: 2.827656, ppl: 16.905785 +epoch: 0, batch: 40463, sum loss: 4145.603516, avg loss: 2.669416, ppl: 14.431545 +epoch: 0, batch: 40464, sum loss: 4191.441406, avg loss: 2.813048, ppl: 16.660625 +epoch: 0, batch: 40465, sum loss: 4547.930664, avg loss: 3.081254, ppl: 21.785706 +epoch: 0, batch: 40466, sum loss: 4681.567383, avg loss: 2.709240, ppl: 15.017864 +epoch: 0, batch: 40467, sum loss: 4644.344238, avg loss: 2.704918, ppl: 14.953093 +epoch: 0, batch: 40468, sum loss: 4823.021973, avg loss: 2.738797, ppl: 15.468369 +epoch: 0, batch: 40469, sum loss: 5665.150879, avg loss: 2.962945, ppl: 19.354887 +epoch: 0, batch: 40470, sum loss: 4269.911133, avg loss: 2.662040, ppl: 14.325477 +epoch: 0, batch: 40471, sum loss: 3761.802246, avg loss: 2.481400, ppl: 11.957994 +epoch: 0, batch: 40472, sum loss: 4891.092773, avg loss: 3.008052, ppl: 20.247921 +epoch: 0, batch: 40473, sum loss: 3940.374756, avg loss: 2.708161, ppl: 15.001667 +epoch: 0, batch: 40474, sum loss: 4020.901367, avg loss: 2.680601, ppl: 14.593860 +epoch: 0, batch: 40475, sum loss: 5100.005859, avg loss: 2.991206, ppl: 19.909678 +epoch: 0, batch: 40476, sum loss: 4427.760742, avg loss: 2.897749, ppl: 18.133286 +epoch: 0, batch: 40477, sum loss: 4298.777344, avg loss: 2.518323, ppl: 12.407771 +epoch: 0, batch: 40478, sum loss: 3968.072998, avg loss: 2.588436, ppl: 13.308945 +epoch: 0, batch: 40479, sum loss: 5395.736816, avg loss: 2.853378, ppl: 17.346272 +epoch: 0, batch: 40480, sum loss: 4749.061523, avg loss: 2.975602, ppl: 19.601427 +epoch: 0, batch: 40481, sum loss: 4096.793457, avg loss: 2.695259, ppl: 14.809352 +epoch: 0, batch: 40482, sum loss: 4687.578125, avg loss: 2.931568, ppl: 18.757025 +epoch: 0, batch: 40483, sum loss: 4936.472656, avg loss: 2.860065, ppl: 17.462666 +epoch: 0, batch: 40484, sum loss: 5019.101074, avg loss: 2.837253, ppl: 17.068819 +epoch: 0, batch: 40485, sum loss: 4495.407227, avg loss: 2.806122, ppl: 16.545628 +epoch: 0, batch: 40486, sum loss: 5187.390625, avg loss: 3.078570, ppl: 21.727312 +epoch: 0, batch: 40487, sum loss: 4379.703125, avg loss: 2.723696, ppl: 15.236532 +epoch: 0, batch: 40488, sum loss: 4831.258789, avg loss: 2.846941, ppl: 17.234980 +epoch: 0, batch: 40489, sum loss: 4244.109375, avg loss: 2.621439, ppl: 13.755500 +epoch: 0, batch: 40490, sum loss: 4428.348145, avg loss: 2.896238, ppl: 18.105909 +epoch: 0, batch: 40491, sum loss: 4722.252930, avg loss: 2.940382, ppl: 18.923065 +epoch: 0, batch: 40492, sum loss: 5562.711914, avg loss: 2.943234, ppl: 18.977119 +epoch: 0, batch: 40493, sum loss: 4954.709473, avg loss: 3.017485, ppl: 20.439814 +epoch: 0, batch: 40494, sum loss: 4655.943848, avg loss: 2.735572, ppl: 15.418562 +epoch: 0, batch: 40495, sum loss: 4126.961426, avg loss: 2.743990, ppl: 15.548904 +epoch: 0, batch: 40496, sum loss: 4066.915527, avg loss: 2.951318, ppl: 19.131144 +epoch: 0, batch: 40497, sum loss: 5015.876465, avg loss: 2.751441, ppl: 15.665186 +epoch: 0, batch: 40498, sum loss: 5066.436035, avg loss: 3.030165, ppl: 20.700651 +epoch: 0, batch: 40499, sum loss: 5045.758301, avg loss: 2.868538, ppl: 17.611254 +epoch: 0, batch: 40500, sum loss: 5005.894043, avg loss: 2.920592, ppl: 18.552259 +epoch: 0, batch: 40501, sum loss: 4463.688477, avg loss: 2.861339, ppl: 17.484921 +epoch: 0, batch: 40502, sum loss: 4917.088379, avg loss: 3.001885, ppl: 20.123438 +epoch: 0, batch: 40503, sum loss: 4609.836914, avg loss: 2.865032, ppl: 17.549618 +epoch: 0, batch: 40504, sum loss: 6144.616699, avg loss: 3.125441, ppl: 22.769930 +epoch: 0, batch: 40505, sum loss: 4145.017578, avg loss: 2.778162, ppl: 16.089422 +epoch: 0, batch: 40506, sum loss: 4616.550781, avg loss: 2.825306, ppl: 16.866112 +epoch: 0, batch: 40507, sum loss: 5541.785156, avg loss: 2.927515, ppl: 18.681141 +epoch: 0, batch: 40508, sum loss: 4805.332031, avg loss: 2.928295, ppl: 18.695724 +epoch: 0, batch: 40509, sum loss: 5341.207031, avg loss: 2.925086, ppl: 18.635830 +epoch: 0, batch: 40510, sum loss: 4302.758301, avg loss: 2.772396, ppl: 15.996918 +epoch: 0, batch: 40511, sum loss: 4704.049316, avg loss: 2.775250, ppl: 16.042645 +epoch: 0, batch: 40512, sum loss: 5141.338867, avg loss: 3.102799, ppl: 22.260180 +epoch: 0, batch: 40513, sum loss: 4190.089844, avg loss: 2.474950, ppl: 11.881111 +epoch: 0, batch: 40514, sum loss: 4804.247070, avg loss: 2.799678, ppl: 16.439350 +epoch: 0, batch: 40515, sum loss: 4001.579834, avg loss: 2.818014, ppl: 16.743567 +epoch: 0, batch: 40516, sum loss: 5088.871582, avg loss: 2.928004, ppl: 18.690292 +epoch: 0, batch: 40517, sum loss: 4796.269531, avg loss: 2.967988, ppl: 19.452751 +epoch: 0, batch: 40518, sum loss: 5318.583496, avg loss: 3.056657, ppl: 21.256380 +epoch: 0, batch: 40519, sum loss: 4636.708984, avg loss: 2.769838, ppl: 15.956055 +epoch: 0, batch: 40520, sum loss: 5332.253906, avg loss: 3.033136, ppl: 20.762249 +epoch: 0, batch: 40521, sum loss: 5210.406250, avg loss: 3.165496, ppl: 23.700499 +epoch: 0, batch: 40522, sum loss: 4536.522949, avg loss: 2.842433, ppl: 17.157459 +epoch: 0, batch: 40523, sum loss: 4480.065918, avg loss: 2.658793, ppl: 14.279040 +epoch: 0, batch: 40524, sum loss: 4223.940430, avg loss: 2.656566, ppl: 14.247285 +epoch: 0, batch: 40525, sum loss: 4317.993164, avg loss: 2.748564, ppl: 15.620178 +epoch: 0, batch: 40526, sum loss: 5170.250488, avg loss: 2.839237, ppl: 17.102711 +epoch: 0, batch: 40527, sum loss: 4900.172852, avg loss: 2.705783, ppl: 14.966032 +epoch: 0, batch: 40528, sum loss: 4597.106934, avg loss: 2.941207, ppl: 18.938694 +epoch: 0, batch: 40529, sum loss: 5205.577637, avg loss: 2.768924, ppl: 15.941476 +epoch: 0, batch: 40530, sum loss: 5428.021973, avg loss: 3.066679, ppl: 21.470486 +epoch: 0, batch: 40531, sum loss: 4287.374023, avg loss: 2.856345, ppl: 17.397825 +epoch: 0, batch: 40532, sum loss: 4952.701172, avg loss: 2.958603, ppl: 19.271034 +epoch: 0, batch: 40533, sum loss: 5671.804199, avg loss: 2.955604, ppl: 19.213326 +epoch: 0, batch: 40534, sum loss: 4614.547852, avg loss: 2.996459, ppl: 20.014549 +epoch: 0, batch: 40535, sum loss: 4561.572266, avg loss: 2.931602, ppl: 18.757652 +epoch: 0, batch: 40536, sum loss: 4872.911621, avg loss: 2.731453, ppl: 15.355181 +epoch: 0, batch: 40537, sum loss: 4871.484375, avg loss: 2.796489, ppl: 16.387014 +epoch: 0, batch: 40538, sum loss: 4438.402832, avg loss: 2.854279, ppl: 17.361908 +epoch: 0, batch: 40539, sum loss: 4374.417480, avg loss: 2.672216, ppl: 14.472003 +epoch: 0, batch: 40540, sum loss: 4786.693848, avg loss: 2.859435, ppl: 17.451666 +epoch: 0, batch: 40541, sum loss: 4581.746094, avg loss: 3.034269, ppl: 20.785780 +epoch: 0, batch: 40542, sum loss: 4978.408203, avg loss: 3.123217, ppl: 22.719353 +epoch: 0, batch: 40543, sum loss: 4371.914551, avg loss: 2.851869, ppl: 17.320116 +epoch: 0, batch: 40544, sum loss: 5467.193848, avg loss: 3.143872, ppl: 23.193504 +epoch: 0, batch: 40545, sum loss: 4048.556885, avg loss: 2.737361, ppl: 15.446168 +epoch: 0, batch: 40546, sum loss: 5223.804688, avg loss: 2.805480, ppl: 16.535019 +epoch: 0, batch: 40547, sum loss: 4436.905273, avg loss: 2.603818, ppl: 13.515237 +epoch: 0, batch: 40548, sum loss: 4733.415039, avg loss: 2.769699, ppl: 15.953825 +epoch: 0, batch: 40549, sum loss: 5927.618652, avg loss: 3.151312, ppl: 23.366711 +epoch: 0, batch: 40550, sum loss: 4162.832520, avg loss: 2.626393, ppl: 13.823812 +epoch: 0, batch: 40551, sum loss: 3950.197021, avg loss: 2.603953, ppl: 13.517067 +epoch: 0, batch: 40552, sum loss: 5147.216797, avg loss: 2.888449, ppl: 17.965431 +epoch: 0, batch: 40553, sum loss: 4870.764160, avg loss: 2.877002, ppl: 17.760946 +epoch: 0, batch: 40554, sum loss: 5479.333008, avg loss: 3.022247, ppl: 20.537384 +epoch: 0, batch: 40555, sum loss: 5235.370605, avg loss: 2.926423, ppl: 18.660759 +epoch: 0, batch: 40556, sum loss: 4935.976074, avg loss: 3.151964, ppl: 23.381947 +epoch: 0, batch: 40557, sum loss: 5844.787109, avg loss: 2.942995, ppl: 18.972576 +epoch: 0, batch: 40558, sum loss: 4506.354980, avg loss: 2.781701, ppl: 16.146460 +epoch: 0, batch: 40559, sum loss: 3577.129150, avg loss: 2.586500, ppl: 13.283195 +epoch: 0, batch: 40560, sum loss: 4067.493164, avg loss: 2.653290, ppl: 14.200679 +epoch: 0, batch: 40561, sum loss: 4611.432129, avg loss: 2.748172, ppl: 15.614060 +epoch: 0, batch: 40562, sum loss: 4529.863281, avg loss: 2.653698, ppl: 14.206484 +epoch: 0, batch: 40563, sum loss: 4302.167969, avg loss: 2.858583, ppl: 17.436810 +epoch: 0, batch: 40564, sum loss: 5140.183594, avg loss: 2.847747, ppl: 17.248878 +epoch: 0, batch: 40565, sum loss: 4334.488281, avg loss: 2.746824, ppl: 15.593030 +epoch: 0, batch: 40566, sum loss: 5909.179688, avg loss: 3.044399, ppl: 20.997419 +epoch: 0, batch: 40567, sum loss: 4684.932129, avg loss: 2.815464, ppl: 16.700924 +epoch: 0, batch: 40568, sum loss: 5138.500977, avg loss: 2.926254, ppl: 18.657600 +epoch: 0, batch: 40569, sum loss: 3868.365234, avg loss: 2.688231, ppl: 14.705646 +epoch: 0, batch: 40570, sum loss: 5516.696289, avg loss: 3.022847, ppl: 20.549723 +epoch: 0, batch: 40571, sum loss: 4640.710449, avg loss: 2.728225, ppl: 15.305692 +epoch: 0, batch: 40572, sum loss: 4483.926758, avg loss: 2.696288, ppl: 14.824598 +epoch: 0, batch: 40573, sum loss: 5747.241211, avg loss: 3.182304, ppl: 24.102224 +epoch: 0, batch: 40574, sum loss: 5302.762695, avg loss: 3.000998, ppl: 20.105583 +epoch: 0, batch: 40575, sum loss: 5515.740723, avg loss: 2.940160, ppl: 18.918879 +epoch: 0, batch: 40576, sum loss: 4167.342285, avg loss: 2.415851, ppl: 11.199293 +epoch: 0, batch: 40577, sum loss: 3806.054199, avg loss: 2.861695, ppl: 17.491146 +epoch: 0, batch: 40578, sum loss: 5210.332031, avg loss: 3.027503, ppl: 20.645607 +epoch: 0, batch: 40579, sum loss: 5638.817871, avg loss: 3.043075, ppl: 20.969622 +epoch: 0, batch: 40580, sum loss: 4410.542969, avg loss: 2.827271, ppl: 16.899284 +epoch: 0, batch: 40581, sum loss: 4235.448730, avg loss: 2.716773, ppl: 15.131412 +epoch: 0, batch: 40582, sum loss: 4348.973145, avg loss: 2.681241, ppl: 14.603205 +epoch: 0, batch: 40583, sum loss: 4555.894531, avg loss: 2.973822, ppl: 19.566553 +epoch: 0, batch: 40584, sum loss: 4634.791016, avg loss: 3.077551, ppl: 21.705173 +epoch: 0, batch: 40585, sum loss: 5248.519531, avg loss: 2.860229, ppl: 17.465523 +epoch: 0, batch: 40586, sum loss: 4336.217285, avg loss: 2.737511, ppl: 15.448484 +epoch: 0, batch: 40587, sum loss: 4153.453613, avg loss: 2.632100, ppl: 13.902933 +epoch: 0, batch: 40588, sum loss: 4422.303711, avg loss: 2.802474, ppl: 16.485378 +epoch: 0, batch: 40589, sum loss: 4604.613770, avg loss: 2.821455, ppl: 16.801271 +epoch: 0, batch: 40590, sum loss: 4427.788574, avg loss: 2.675401, ppl: 14.518170 +epoch: 0, batch: 40591, sum loss: 4431.691406, avg loss: 2.877722, ppl: 17.773731 +epoch: 0, batch: 40592, sum loss: 4700.858887, avg loss: 3.040659, ppl: 20.919029 +epoch: 0, batch: 40593, sum loss: 5349.066895, avg loss: 3.146510, ppl: 23.254766 +epoch: 0, batch: 40594, sum loss: 3938.838135, avg loss: 2.681306, ppl: 14.604149 +epoch: 0, batch: 40595, sum loss: 5605.575684, avg loss: 3.170575, ppl: 23.821169 +epoch: 0, batch: 40596, sum loss: 4451.748047, avg loss: 2.763345, ppl: 15.852778 +epoch: 0, batch: 40597, sum loss: 5932.127930, avg loss: 2.832917, ppl: 16.994963 +epoch: 0, batch: 40598, sum loss: 3790.423340, avg loss: 2.693975, ppl: 14.790358 +epoch: 0, batch: 40599, sum loss: 5375.858398, avg loss: 3.077194, ppl: 21.697439 +epoch: 0, batch: 40600, sum loss: 5552.620605, avg loss: 2.902572, ppl: 18.220953 +epoch: 0, batch: 40601, sum loss: 4637.718750, avg loss: 2.822714, ppl: 16.822439 +epoch: 0, batch: 40602, sum loss: 4650.314453, avg loss: 2.990556, ppl: 19.896742 +epoch: 0, batch: 40603, sum loss: 4820.476562, avg loss: 2.847299, ppl: 17.241144 +epoch: 0, batch: 40604, sum loss: 5489.804199, avg loss: 2.970673, ppl: 19.505049 +epoch: 0, batch: 40605, sum loss: 5116.622559, avg loss: 2.762755, ppl: 15.843434 +epoch: 0, batch: 40606, sum loss: 6028.981445, avg loss: 3.079153, ppl: 21.739983 +epoch: 0, batch: 40607, sum loss: 3923.969971, avg loss: 2.566364, ppl: 13.018401 +epoch: 0, batch: 40608, sum loss: 5169.193359, avg loss: 2.984523, ppl: 19.777063 +epoch: 0, batch: 40609, sum loss: 4925.805664, avg loss: 2.809929, ppl: 16.608742 +epoch: 0, batch: 40610, sum loss: 5045.966797, avg loss: 2.721665, ppl: 15.205617 +epoch: 0, batch: 40611, sum loss: 4760.642578, avg loss: 2.909928, ppl: 18.355482 +epoch: 0, batch: 40612, sum loss: 4444.974121, avg loss: 2.882603, ppl: 17.860706 +epoch: 0, batch: 40613, sum loss: 4840.432617, avg loss: 2.772298, ppl: 15.995355 +epoch: 0, batch: 40614, sum loss: 5319.473633, avg loss: 2.961845, ppl: 19.333612 +epoch: 0, batch: 40615, sum loss: 4643.755859, avg loss: 2.810990, ppl: 16.626375 +epoch: 0, batch: 40616, sum loss: 4632.220703, avg loss: 2.950459, ppl: 19.114725 +epoch: 0, batch: 40617, sum loss: 4844.473633, avg loss: 2.810020, ppl: 16.610247 +epoch: 0, batch: 40618, sum loss: 4235.871582, avg loss: 2.686032, ppl: 14.673330 +epoch: 0, batch: 40619, sum loss: 4740.023438, avg loss: 3.019123, ppl: 20.473335 +epoch: 0, batch: 40620, sum loss: 5908.742676, avg loss: 3.275356, ppl: 26.452648 +epoch: 0, batch: 40621, sum loss: 3325.971680, avg loss: 2.469170, ppl: 11.812636 +epoch: 0, batch: 40622, sum loss: 5172.167969, avg loss: 2.993153, ppl: 19.948479 +epoch: 0, batch: 40623, sum loss: 3994.064697, avg loss: 2.720752, ppl: 15.191750 +epoch: 0, batch: 40624, sum loss: 5579.191406, avg loss: 3.223103, ppl: 25.105904 +epoch: 0, batch: 40625, sum loss: 4048.902588, avg loss: 2.759988, ppl: 15.799654 +epoch: 0, batch: 40626, sum loss: 5530.107422, avg loss: 3.043537, ppl: 20.979324 +epoch: 0, batch: 40627, sum loss: 4372.204590, avg loss: 2.777766, ppl: 16.083059 +epoch: 0, batch: 40628, sum loss: 5321.978516, avg loss: 2.953373, ppl: 19.170511 +epoch: 0, batch: 40629, sum loss: 5428.060059, avg loss: 3.194856, ppl: 24.406656 +epoch: 0, batch: 40630, sum loss: 4431.308105, avg loss: 2.792255, ppl: 16.317770 +epoch: 0, batch: 40631, sum loss: 4528.345215, avg loss: 2.855199, ppl: 17.377893 +epoch: 0, batch: 40632, sum loss: 3839.487061, avg loss: 2.431594, ppl: 11.377004 +epoch: 0, batch: 40633, sum loss: 4660.963867, avg loss: 3.096986, ppl: 22.131149 +epoch: 0, batch: 40634, sum loss: 5448.794434, avg loss: 2.864771, ppl: 17.545036 +epoch: 0, batch: 40635, sum loss: 5085.679199, avg loss: 2.949930, ppl: 19.104620 +epoch: 0, batch: 40636, sum loss: 4622.626465, avg loss: 2.701710, ppl: 14.905205 +epoch: 0, batch: 40637, sum loss: 4276.531250, avg loss: 2.610825, ppl: 13.610272 +epoch: 0, batch: 40638, sum loss: 4788.507812, avg loss: 2.801936, ppl: 16.476509 +epoch: 0, batch: 40639, sum loss: 4126.665527, avg loss: 2.709564, ppl: 15.022720 +epoch: 0, batch: 40640, sum loss: 5498.829102, avg loss: 2.972340, ppl: 19.537586 +epoch: 0, batch: 40641, sum loss: 4889.382324, avg loss: 2.934803, ppl: 18.817801 +epoch: 0, batch: 40642, sum loss: 4181.548828, avg loss: 2.752830, ppl: 15.686964 +epoch: 0, batch: 40643, sum loss: 3735.192139, avg loss: 2.740420, ppl: 15.493489 +epoch: 0, batch: 40644, sum loss: 3906.998047, avg loss: 2.601197, ppl: 13.479864 +epoch: 0, batch: 40645, sum loss: 3688.285400, avg loss: 2.557757, ppl: 12.906834 +epoch: 0, batch: 40646, sum loss: 5293.824707, avg loss: 2.989172, ppl: 19.869232 +epoch: 0, batch: 40647, sum loss: 4514.977051, avg loss: 2.836041, ppl: 17.048134 +epoch: 0, batch: 40648, sum loss: 4841.629395, avg loss: 3.005357, ppl: 20.193420 +epoch: 0, batch: 40649, sum loss: 5035.987793, avg loss: 2.826031, ppl: 16.878345 +epoch: 0, batch: 40650, sum loss: 4711.530762, avg loss: 2.853744, ppl: 17.352625 +epoch: 0, batch: 40651, sum loss: 5154.065430, avg loss: 2.963810, ppl: 19.371637 +epoch: 0, batch: 40652, sum loss: 4447.375977, avg loss: 2.802379, ppl: 16.483822 +epoch: 0, batch: 40653, sum loss: 4737.258789, avg loss: 2.767090, ppl: 15.912267 +epoch: 0, batch: 40654, sum loss: 5938.196777, avg loss: 3.060926, ppl: 21.347321 +epoch: 0, batch: 40655, sum loss: 4773.549805, avg loss: 2.796456, ppl: 16.386463 +epoch: 0, batch: 40656, sum loss: 4537.125000, avg loss: 2.888049, ppl: 17.958241 +epoch: 0, batch: 40657, sum loss: 4998.628418, avg loss: 2.977146, ppl: 19.631710 +epoch: 0, batch: 40658, sum loss: 5219.619629, avg loss: 3.110620, ppl: 22.434940 +epoch: 0, batch: 40659, sum loss: 4145.360352, avg loss: 2.587616, ppl: 13.298028 +epoch: 0, batch: 40660, sum loss: 5007.424805, avg loss: 2.959471, ppl: 19.287766 +epoch: 0, batch: 40661, sum loss: 4940.917969, avg loss: 2.685281, ppl: 14.662325 +epoch: 0, batch: 40662, sum loss: 4559.261230, avg loss: 2.781734, ppl: 16.146988 +epoch: 0, batch: 40663, sum loss: 4773.554688, avg loss: 2.803027, ppl: 16.494499 +epoch: 0, batch: 40664, sum loss: 4694.679688, avg loss: 2.804468, ppl: 16.518291 +epoch: 0, batch: 40665, sum loss: 4026.647949, avg loss: 2.949925, ppl: 19.104525 +epoch: 0, batch: 40666, sum loss: 5214.703613, avg loss: 2.957858, ppl: 19.256678 +epoch: 0, batch: 40667, sum loss: 5067.287109, avg loss: 3.000170, ppl: 20.088951 +epoch: 0, batch: 40668, sum loss: 5262.645508, avg loss: 2.781525, ppl: 16.143623 +epoch: 0, batch: 40669, sum loss: 5607.401855, avg loss: 2.952818, ppl: 19.159878 +epoch: 0, batch: 40670, sum loss: 4709.030273, avg loss: 2.739401, ppl: 15.477713 +epoch: 0, batch: 40671, sum loss: 5039.227539, avg loss: 3.175317, ppl: 23.934401 +epoch: 0, batch: 40672, sum loss: 4747.224121, avg loss: 2.963311, ppl: 19.361973 +epoch: 0, batch: 40673, sum loss: 4893.657715, avg loss: 2.764778, ppl: 15.875522 +epoch: 0, batch: 40674, sum loss: 3912.803711, avg loss: 2.572521, ppl: 13.098802 +epoch: 0, batch: 40675, sum loss: 5113.898926, avg loss: 2.803673, ppl: 16.505156 +epoch: 0, batch: 40676, sum loss: 4831.242188, avg loss: 2.817051, ppl: 16.727444 +epoch: 0, batch: 40677, sum loss: 4329.559570, avg loss: 2.755926, ppl: 15.735607 +epoch: 0, batch: 40678, sum loss: 4285.830566, avg loss: 2.731568, ppl: 15.356953 +epoch: 0, batch: 40679, sum loss: 4259.474609, avg loss: 2.735693, ppl: 15.420433 +epoch: 0, batch: 40680, sum loss: 4200.648926, avg loss: 2.585015, ppl: 13.263482 +epoch: 0, batch: 40681, sum loss: 4844.622559, avg loss: 2.997910, ppl: 20.043602 +epoch: 0, batch: 40682, sum loss: 4191.512695, avg loss: 2.779518, ppl: 16.111248 +epoch: 0, batch: 40683, sum loss: 4118.649902, avg loss: 2.799898, ppl: 16.442968 +epoch: 0, batch: 40684, sum loss: 4574.551758, avg loss: 2.734341, ppl: 15.399587 +epoch: 0, batch: 40685, sum loss: 4024.851562, avg loss: 2.937848, ppl: 18.875185 +epoch: 0, batch: 40686, sum loss: 5552.382324, avg loss: 3.091527, ppl: 22.010662 +epoch: 0, batch: 40687, sum loss: 5085.278320, avg loss: 2.975587, ppl: 19.601128 +epoch: 0, batch: 40688, sum loss: 4786.229004, avg loss: 3.105924, ppl: 22.329845 +epoch: 0, batch: 40689, sum loss: 5281.611328, avg loss: 2.977233, ppl: 19.633415 +epoch: 0, batch: 40690, sum loss: 4152.040039, avg loss: 2.820679, ppl: 16.788254 +epoch: 0, batch: 40691, sum loss: 4489.766602, avg loss: 2.648830, ppl: 14.137487 +epoch: 0, batch: 40692, sum loss: 4762.018066, avg loss: 2.856639, ppl: 17.402945 +epoch: 0, batch: 40693, sum loss: 5449.163086, avg loss: 3.075149, ppl: 21.653099 +epoch: 0, batch: 40694, sum loss: 5060.644531, avg loss: 2.862356, ppl: 17.502710 +epoch: 0, batch: 40695, sum loss: 4241.170898, avg loss: 2.961712, ppl: 19.331032 +epoch: 0, batch: 40696, sum loss: 4097.829102, avg loss: 2.724620, ppl: 15.250623 +epoch: 0, batch: 40697, sum loss: 4123.467773, avg loss: 2.624741, ppl: 13.800997 +epoch: 0, batch: 40698, sum loss: 4490.954102, avg loss: 2.791146, ppl: 16.299688 +epoch: 0, batch: 40699, sum loss: 4641.961914, avg loss: 2.714597, ppl: 15.098531 +epoch: 0, batch: 40700, sum loss: 4980.455566, avg loss: 2.993062, ppl: 19.946671 +epoch: 0, batch: 40701, sum loss: 4887.536621, avg loss: 2.869957, ppl: 17.636259 +epoch: 0, batch: 40702, sum loss: 5761.716797, avg loss: 2.918803, ppl: 18.519102 +epoch: 0, batch: 40703, sum loss: 4047.646973, avg loss: 2.507836, ppl: 12.278329 +epoch: 0, batch: 40704, sum loss: 4543.395020, avg loss: 2.712475, ppl: 15.066516 +epoch: 0, batch: 40705, sum loss: 4411.352051, avg loss: 2.888901, ppl: 17.973551 +epoch: 0, batch: 40706, sum loss: 5274.995605, avg loss: 2.997156, ppl: 20.028502 +epoch: 0, batch: 40707, sum loss: 5656.415039, avg loss: 3.102806, ppl: 22.260328 +epoch: 0, batch: 40708, sum loss: 5246.269043, avg loss: 2.868381, ppl: 17.608492 +epoch: 0, batch: 40709, sum loss: 5014.337891, avg loss: 2.737084, ppl: 15.441894 +epoch: 0, batch: 40710, sum loss: 4319.477539, avg loss: 2.843632, ppl: 17.178047 +epoch: 0, batch: 40711, sum loss: 4699.966797, avg loss: 2.883415, ppl: 17.875217 +epoch: 0, batch: 40712, sum loss: 4654.666992, avg loss: 2.831306, ppl: 16.967606 +epoch: 0, batch: 40713, sum loss: 5296.194824, avg loss: 2.985454, ppl: 19.795479 +epoch: 0, batch: 40714, sum loss: 3885.357666, avg loss: 2.777239, ppl: 16.074583 +epoch: 0, batch: 40715, sum loss: 4533.310547, avg loss: 3.071349, ppl: 21.570974 +epoch: 0, batch: 40716, sum loss: 4989.526855, avg loss: 2.955881, ppl: 19.218645 +epoch: 0, batch: 40717, sum loss: 5139.673828, avg loss: 2.908701, ppl: 18.332964 +epoch: 0, batch: 40718, sum loss: 4106.095215, avg loss: 2.712084, ppl: 15.060630 +epoch: 0, batch: 40719, sum loss: 5292.990723, avg loss: 3.019390, ppl: 20.478798 +epoch: 0, batch: 40720, sum loss: 5786.961914, avg loss: 2.939036, ppl: 18.897623 +epoch: 0, batch: 40721, sum loss: 5046.929688, avg loss: 2.895542, ppl: 18.093304 +epoch: 0, batch: 40722, sum loss: 4449.840332, avg loss: 2.931384, ppl: 18.753561 +epoch: 0, batch: 40723, sum loss: 4808.125000, avg loss: 2.953394, ppl: 19.170904 +epoch: 0, batch: 40724, sum loss: 5056.284668, avg loss: 2.937992, ppl: 18.877903 +epoch: 0, batch: 40725, sum loss: 4840.438477, avg loss: 2.982402, ppl: 19.735161 +epoch: 0, batch: 40726, sum loss: 4445.790039, avg loss: 2.782096, ppl: 16.152847 +epoch: 0, batch: 40727, sum loss: 5668.343262, avg loss: 2.822880, ppl: 16.825243 +epoch: 0, batch: 40728, sum loss: 5634.064941, avg loss: 3.123096, ppl: 22.716597 +epoch: 0, batch: 40729, sum loss: 5220.217285, avg loss: 3.105424, ppl: 22.318674 +epoch: 0, batch: 40730, sum loss: 4543.952148, avg loss: 2.806641, ppl: 16.554224 +epoch: 0, batch: 40731, sum loss: 4821.569824, avg loss: 2.829560, ppl: 16.938004 +epoch: 0, batch: 40732, sum loss: 4691.921875, avg loss: 2.878480, ppl: 17.787210 +epoch: 0, batch: 40733, sum loss: 4272.396484, avg loss: 2.835034, ppl: 17.030981 +epoch: 0, batch: 40734, sum loss: 4652.109863, avg loss: 2.893103, ppl: 18.049232 +epoch: 0, batch: 40735, sum loss: 5142.895020, avg loss: 2.986583, ppl: 19.817839 +epoch: 0, batch: 40736, sum loss: 4787.591797, avg loss: 2.964453, ppl: 19.384102 +epoch: 0, batch: 40737, sum loss: 5208.300293, avg loss: 2.777760, ppl: 16.082958 +epoch: 0, batch: 40738, sum loss: 5137.433105, avg loss: 3.140240, ppl: 23.109423 +epoch: 0, batch: 40739, sum loss: 4794.309082, avg loss: 2.705592, ppl: 14.963175 +epoch: 0, batch: 40740, sum loss: 5335.458008, avg loss: 3.007586, ppl: 20.238485 +epoch: 0, batch: 40741, sum loss: 5080.485840, avg loss: 2.909786, ppl: 18.352865 +epoch: 0, batch: 40742, sum loss: 4660.933594, avg loss: 2.913083, ppl: 18.413486 +epoch: 0, batch: 40743, sum loss: 5024.540527, avg loss: 2.881044, ppl: 17.832874 +epoch: 0, batch: 40744, sum loss: 5298.204590, avg loss: 3.025817, ppl: 20.610830 +epoch: 0, batch: 40745, sum loss: 6493.335938, avg loss: 3.057126, ppl: 21.266350 +epoch: 0, batch: 40746, sum loss: 5407.350586, avg loss: 2.956452, ppl: 19.229626 +epoch: 0, batch: 40747, sum loss: 3967.828613, avg loss: 2.706568, ppl: 14.977783 +epoch: 0, batch: 40748, sum loss: 4599.521973, avg loss: 2.664845, ppl: 14.365719 +epoch: 0, batch: 40749, sum loss: 5906.359375, avg loss: 3.252400, ppl: 25.852322 +epoch: 0, batch: 40750, sum loss: 4308.776367, avg loss: 2.709922, ppl: 15.028108 +epoch: 0, batch: 40751, sum loss: 4775.110352, avg loss: 2.980718, ppl: 19.701956 +epoch: 0, batch: 40752, sum loss: 4634.269531, avg loss: 3.050869, ppl: 21.133692 +epoch: 0, batch: 40753, sum loss: 4717.416992, avg loss: 2.698751, ppl: 14.861161 +epoch: 0, batch: 40754, sum loss: 4677.027344, avg loss: 2.809026, ppl: 16.593740 +epoch: 0, batch: 40755, sum loss: 4815.937500, avg loss: 2.851354, ppl: 17.311213 +epoch: 0, batch: 40756, sum loss: 4440.877441, avg loss: 2.960585, ppl: 19.309261 +epoch: 0, batch: 40757, sum loss: 3328.768799, avg loss: 2.413900, ppl: 11.177472 +epoch: 0, batch: 40758, sum loss: 4980.389160, avg loss: 2.852457, ppl: 17.330307 +epoch: 0, batch: 40759, sum loss: 4042.808350, avg loss: 2.729783, ppl: 15.329557 +epoch: 0, batch: 40760, sum loss: 4024.809570, avg loss: 2.658395, ppl: 14.273359 +epoch: 0, batch: 40761, sum loss: 5023.616699, avg loss: 2.890458, ppl: 18.001558 +epoch: 0, batch: 40762, sum loss: 4647.803223, avg loss: 2.816850, ppl: 16.724094 +epoch: 0, batch: 40763, sum loss: 4967.340332, avg loss: 2.693785, ppl: 14.787547 +epoch: 0, batch: 40764, sum loss: 4784.075684, avg loss: 2.696773, ppl: 14.831793 +epoch: 0, batch: 40765, sum loss: 4734.886719, avg loss: 2.591618, ppl: 13.351360 +epoch: 0, batch: 40766, sum loss: 4516.347168, avg loss: 2.798233, ppl: 16.415607 +epoch: 0, batch: 40767, sum loss: 4251.267090, avg loss: 2.860880, ppl: 17.476894 +epoch: 0, batch: 40768, sum loss: 4847.173340, avg loss: 2.824693, ppl: 16.855772 +epoch: 0, batch: 40769, sum loss: 4295.895020, avg loss: 2.720643, ppl: 15.190084 +epoch: 0, batch: 40770, sum loss: 4229.006348, avg loss: 2.702240, ppl: 14.913107 +epoch: 0, batch: 40771, sum loss: 4395.519531, avg loss: 2.688391, ppl: 14.707992 +epoch: 0, batch: 40772, sum loss: 4956.012695, avg loss: 2.950008, ppl: 19.106100 +epoch: 0, batch: 40773, sum loss: 4224.408691, avg loss: 2.671985, ppl: 14.468663 +epoch: 0, batch: 40774, sum loss: 3857.525635, avg loss: 2.400452, ppl: 11.028156 +epoch: 0, batch: 40775, sum loss: 4202.781250, avg loss: 2.945187, ppl: 19.014210 +epoch: 0, batch: 40776, sum loss: 4210.822266, avg loss: 2.820377, ppl: 16.783171 +epoch: 0, batch: 40777, sum loss: 5467.174805, avg loss: 3.151109, ppl: 23.361969 +epoch: 0, batch: 40778, sum loss: 5594.292480, avg loss: 2.949021, ppl: 19.087255 +epoch: 0, batch: 40779, sum loss: 3677.218262, avg loss: 2.436858, ppl: 11.437046 +epoch: 0, batch: 40780, sum loss: 5067.163086, avg loss: 2.902155, ppl: 18.213360 +epoch: 0, batch: 40781, sum loss: 4763.767090, avg loss: 2.947876, ppl: 19.065411 +epoch: 0, batch: 40782, sum loss: 4090.264404, avg loss: 2.776826, ppl: 16.067938 +epoch: 0, batch: 40783, sum loss: 4174.421875, avg loss: 2.533023, ppl: 12.591515 +epoch: 0, batch: 40784, sum loss: 4192.753906, avg loss: 2.680789, ppl: 14.596605 +epoch: 0, batch: 40785, sum loss: 4220.599121, avg loss: 2.708985, ppl: 15.014033 +epoch: 0, batch: 40786, sum loss: 4466.176270, avg loss: 2.758602, ppl: 15.777769 +epoch: 0, batch: 40787, sum loss: 4461.235352, avg loss: 3.012313, ppl: 20.334377 +epoch: 0, batch: 40788, sum loss: 4848.760254, avg loss: 2.838852, ppl: 17.096134 +epoch: 0, batch: 40789, sum loss: 4796.058105, avg loss: 2.854797, ppl: 17.370905 +epoch: 0, batch: 40790, sum loss: 5791.602051, avg loss: 3.040211, ppl: 20.909655 +epoch: 0, batch: 40791, sum loss: 4357.514648, avg loss: 2.960268, ppl: 19.303150 +epoch: 0, batch: 40792, sum loss: 5190.062012, avg loss: 2.656122, ppl: 14.240952 +epoch: 0, batch: 40793, sum loss: 4857.770508, avg loss: 2.777456, ppl: 16.078068 +epoch: 0, batch: 40794, sum loss: 4672.248047, avg loss: 2.931147, ppl: 18.749121 +epoch: 0, batch: 40795, sum loss: 5317.439453, avg loss: 2.845072, ppl: 17.202793 +epoch: 0, batch: 40796, sum loss: 5305.752441, avg loss: 2.989156, ppl: 19.868912 +epoch: 0, batch: 40797, sum loss: 5269.011230, avg loss: 2.880816, ppl: 17.828806 +epoch: 0, batch: 40798, sum loss: 4575.751953, avg loss: 2.852713, ppl: 17.334753 +epoch: 0, batch: 40799, sum loss: 4754.994141, avg loss: 3.005685, ppl: 20.200050 +epoch: 0, batch: 40800, sum loss: 4588.686035, avg loss: 2.789475, ppl: 16.272470 +epoch: 0, batch: 40801, sum loss: 4869.528320, avg loss: 2.965608, ppl: 19.406502 +epoch: 0, batch: 40802, sum loss: 5206.526367, avg loss: 3.209942, ppl: 24.777651 +epoch: 0, batch: 40803, sum loss: 5020.018555, avg loss: 2.802914, ppl: 16.492630 +epoch: 0, batch: 40804, sum loss: 4531.083008, avg loss: 3.069839, ppl: 21.538445 +epoch: 0, batch: 40805, sum loss: 3276.853027, avg loss: 2.438135, ppl: 11.451660 +epoch: 0, batch: 40806, sum loss: 5623.626953, avg loss: 2.988112, ppl: 19.848173 +epoch: 0, batch: 40807, sum loss: 5855.570801, avg loss: 2.910324, ppl: 18.362740 +epoch: 0, batch: 40808, sum loss: 5591.945312, avg loss: 2.977607, ppl: 19.640751 +epoch: 0, batch: 40809, sum loss: 5840.410645, avg loss: 2.967689, ppl: 19.446917 +epoch: 0, batch: 40810, sum loss: 5004.899902, avg loss: 2.837245, ppl: 17.068676 +epoch: 0, batch: 40811, sum loss: 4899.601562, avg loss: 2.868619, ppl: 17.612679 +epoch: 0, batch: 40812, sum loss: 4482.237305, avg loss: 2.675963, ppl: 14.526327 +epoch: 0, batch: 40813, sum loss: 5882.805664, avg loss: 3.299387, ppl: 27.096010 +epoch: 0, batch: 40814, sum loss: 4955.744629, avg loss: 2.896403, ppl: 18.108883 +epoch: 0, batch: 40815, sum loss: 4752.404785, avg loss: 2.780810, ppl: 16.132088 +epoch: 0, batch: 40816, sum loss: 6335.110352, avg loss: 3.311610, ppl: 27.429256 +epoch: 0, batch: 40817, sum loss: 4376.584961, avg loss: 2.711639, ppl: 15.053927 +epoch: 0, batch: 40818, sum loss: 4802.735352, avg loss: 3.013008, ppl: 20.348524 +epoch: 0, batch: 40819, sum loss: 4407.718262, avg loss: 2.759999, ppl: 15.799828 +epoch: 0, batch: 40820, sum loss: 4067.464844, avg loss: 2.688344, ppl: 14.707304 +epoch: 0, batch: 40821, sum loss: 5201.418457, avg loss: 3.088728, ppl: 21.949150 +epoch: 0, batch: 40822, sum loss: 3977.686035, avg loss: 2.634229, ppl: 13.932569 +epoch: 0, batch: 40823, sum loss: 4244.107422, avg loss: 2.738134, ppl: 15.458112 +epoch: 0, batch: 40824, sum loss: 5057.913574, avg loss: 2.987545, ppl: 19.836924 +epoch: 0, batch: 40825, sum loss: 3636.404541, avg loss: 2.313234, ppl: 10.107061 +epoch: 0, batch: 40826, sum loss: 5433.870117, avg loss: 3.265547, ppl: 26.194437 +epoch: 0, batch: 40827, sum loss: 4303.700684, avg loss: 2.764098, ppl: 15.864726 +epoch: 0, batch: 40828, sum loss: 4267.800293, avg loss: 2.599148, ppl: 13.452270 +epoch: 0, batch: 40829, sum loss: 5051.680176, avg loss: 3.034042, ppl: 20.781063 +epoch: 0, batch: 40830, sum loss: 4731.269043, avg loss: 2.977514, ppl: 19.638926 +epoch: 0, batch: 40831, sum loss: 3760.030273, avg loss: 2.762697, ppl: 15.842508 +epoch: 0, batch: 40832, sum loss: 4959.457031, avg loss: 2.901964, ppl: 18.209883 +epoch: 0, batch: 40833, sum loss: 4690.758301, avg loss: 2.677374, ppl: 14.546838 +epoch: 0, batch: 40834, sum loss: 4626.657227, avg loss: 2.795563, ppl: 16.371851 +epoch: 0, batch: 40835, sum loss: 4911.010254, avg loss: 2.962009, ppl: 19.336775 +epoch: 0, batch: 40836, sum loss: 4391.252930, avg loss: 2.754864, ppl: 15.718899 +epoch: 0, batch: 40837, sum loss: 4138.344727, avg loss: 2.597831, ppl: 13.434567 +epoch: 0, batch: 40838, sum loss: 5425.296875, avg loss: 2.865978, ppl: 17.566229 +epoch: 0, batch: 40839, sum loss: 4828.939941, avg loss: 2.905499, ppl: 18.274368 +epoch: 0, batch: 40840, sum loss: 4232.081055, avg loss: 2.732137, ppl: 15.365695 +epoch: 0, batch: 40841, sum loss: 3817.446045, avg loss: 2.675155, ppl: 14.514598 +epoch: 0, batch: 40842, sum loss: 4770.281738, avg loss: 2.837764, ppl: 17.077541 +epoch: 0, batch: 40843, sum loss: 4648.989746, avg loss: 2.852141, ppl: 17.324833 +epoch: 0, batch: 40844, sum loss: 4574.187012, avg loss: 2.876847, ppl: 17.758194 +epoch: 0, batch: 40845, sum loss: 5655.540527, avg loss: 2.788728, ppl: 16.260323 +epoch: 0, batch: 40846, sum loss: 5065.057129, avg loss: 2.910952, ppl: 18.374289 +epoch: 0, batch: 40847, sum loss: 4664.598633, avg loss: 2.786499, ppl: 16.224115 +epoch: 0, batch: 40848, sum loss: 4054.817383, avg loss: 2.669399, ppl: 14.431293 +epoch: 0, batch: 40849, sum loss: 5543.170898, avg loss: 2.973804, ppl: 19.566214 +epoch: 0, batch: 40850, sum loss: 5473.004883, avg loss: 2.885084, ppl: 17.905079 +epoch: 0, batch: 40851, sum loss: 5015.523438, avg loss: 2.740723, ppl: 15.498192 +epoch: 0, batch: 40852, sum loss: 4427.426758, avg loss: 2.791568, ppl: 16.306570 +epoch: 0, batch: 40853, sum loss: 5107.104492, avg loss: 2.891905, ppl: 18.027620 +epoch: 0, batch: 40854, sum loss: 5182.461914, avg loss: 3.123847, ppl: 22.733664 +epoch: 0, batch: 40855, sum loss: 5237.390625, avg loss: 2.840234, ppl: 17.119764 +epoch: 0, batch: 40856, sum loss: 4835.885742, avg loss: 2.858088, ppl: 17.428181 +epoch: 0, batch: 40857, sum loss: 5553.174805, avg loss: 2.908945, ppl: 18.337435 +epoch: 0, batch: 40858, sum loss: 4005.478760, avg loss: 2.775800, ppl: 16.051455 +epoch: 0, batch: 40859, sum loss: 3350.687256, avg loss: 2.241262, ppl: 9.405197 +epoch: 0, batch: 40860, sum loss: 3998.462402, avg loss: 2.679935, ppl: 14.584142 +epoch: 0, batch: 40861, sum loss: 5751.847656, avg loss: 3.107427, ppl: 22.363432 +epoch: 0, batch: 40862, sum loss: 4450.469238, avg loss: 2.617923, ppl: 13.707228 +epoch: 0, batch: 40863, sum loss: 4813.088867, avg loss: 2.913492, ppl: 18.421017 +epoch: 0, batch: 40864, sum loss: 4286.874512, avg loss: 2.755061, ppl: 15.721994 +epoch: 0, batch: 40865, sum loss: 4573.341797, avg loss: 2.860126, ppl: 17.463728 +epoch: 0, batch: 40866, sum loss: 4723.409668, avg loss: 2.747766, ppl: 15.607725 +epoch: 0, batch: 40867, sum loss: 5223.931641, avg loss: 2.933145, ppl: 18.786627 +epoch: 0, batch: 40868, sum loss: 5117.989258, avg loss: 2.770974, ppl: 15.974192 +epoch: 0, batch: 40869, sum loss: 4428.137695, avg loss: 2.664343, ppl: 14.358511 +epoch: 0, batch: 40870, sum loss: 4398.569336, avg loss: 2.785668, ppl: 16.210638 +epoch: 0, batch: 40871, sum loss: 4808.124512, avg loss: 2.843362, ppl: 17.173403 +epoch: 0, batch: 40872, sum loss: 5068.304199, avg loss: 2.848962, ppl: 17.269852 +epoch: 0, batch: 40873, sum loss: 4478.904785, avg loss: 2.711202, ppl: 15.047346 +epoch: 0, batch: 40874, sum loss: 4721.703613, avg loss: 2.967758, ppl: 19.448277 +epoch: 0, batch: 40875, sum loss: 4587.400879, avg loss: 2.851088, ppl: 17.306602 +epoch: 0, batch: 40876, sum loss: 4520.193359, avg loss: 2.825121, ppl: 16.862980 +epoch: 0, batch: 40877, sum loss: 5055.213867, avg loss: 2.848008, ppl: 17.253374 +epoch: 0, batch: 40878, sum loss: 5353.281250, avg loss: 2.906233, ppl: 18.287781 +epoch: 0, batch: 40879, sum loss: 3375.161377, avg loss: 2.746266, ppl: 15.584337 +epoch: 0, batch: 40880, sum loss: 3987.768311, avg loss: 2.788649, ppl: 16.259045 +epoch: 0, batch: 40881, sum loss: 4813.168945, avg loss: 2.892529, ppl: 18.038881 +epoch: 0, batch: 40882, sum loss: 4676.562012, avg loss: 2.870818, ppl: 17.651445 +epoch: 0, batch: 40883, sum loss: 4904.979492, avg loss: 2.870088, ppl: 17.638563 +epoch: 0, batch: 40884, sum loss: 6419.794922, avg loss: 3.192340, ppl: 24.345325 +epoch: 0, batch: 40885, sum loss: 4643.864258, avg loss: 2.809355, ppl: 16.599213 +epoch: 0, batch: 40886, sum loss: 4700.395508, avg loss: 2.764939, ppl: 15.878065 +epoch: 0, batch: 40887, sum loss: 4265.607910, avg loss: 2.696339, ppl: 14.825352 +epoch: 0, batch: 40888, sum loss: 4095.599854, avg loss: 2.748725, ppl: 15.622699 +epoch: 0, batch: 40889, sum loss: 3876.630859, avg loss: 2.603513, ppl: 13.511119 +epoch: 0, batch: 40890, sum loss: 4862.041992, avg loss: 3.046392, ppl: 21.039301 +epoch: 0, batch: 40891, sum loss: 5256.838867, avg loss: 3.026390, ppl: 20.622646 +epoch: 0, batch: 40892, sum loss: 4453.088379, avg loss: 2.711991, ppl: 15.059222 +epoch: 0, batch: 40893, sum loss: 5241.896973, avg loss: 3.099880, ppl: 22.195288 +epoch: 0, batch: 40894, sum loss: 4291.036133, avg loss: 2.817489, ppl: 16.734783 +epoch: 0, batch: 40895, sum loss: 5575.574219, avg loss: 3.028558, ppl: 20.667398 +epoch: 0, batch: 40896, sum loss: 4165.419434, avg loss: 2.491280, ppl: 12.076719 +epoch: 0, batch: 40897, sum loss: 4663.725586, avg loss: 2.769433, ppl: 15.949593 +epoch: 0, batch: 40898, sum loss: 4597.361816, avg loss: 2.979496, ppl: 19.677893 +epoch: 0, batch: 40899, sum loss: 4951.853027, avg loss: 2.867315, ppl: 17.589727 +epoch: 0, batch: 40900, sum loss: 4289.504395, avg loss: 2.545700, ppl: 12.752150 +epoch: 0, batch: 40901, sum loss: 6005.572266, avg loss: 3.349455, ppl: 28.487200 +epoch: 0, batch: 40902, sum loss: 4026.570312, avg loss: 2.444791, ppl: 11.528135 +epoch: 0, batch: 40903, sum loss: 4327.366211, avg loss: 2.512989, ppl: 12.341759 +epoch: 0, batch: 40904, sum loss: 4658.784180, avg loss: 2.884696, ppl: 17.898127 +epoch: 0, batch: 40905, sum loss: 5512.909180, avg loss: 2.994519, ppl: 19.975750 +epoch: 0, batch: 40906, sum loss: 5375.337402, avg loss: 2.927744, ppl: 18.685423 +epoch: 0, batch: 40907, sum loss: 4892.294922, avg loss: 2.722479, ppl: 15.218002 +epoch: 0, batch: 40908, sum loss: 4695.839844, avg loss: 2.791819, ppl: 16.310663 +epoch: 0, batch: 40909, sum loss: 5818.801758, avg loss: 3.148702, ppl: 23.305798 +epoch: 0, batch: 40910, sum loss: 5983.779785, avg loss: 3.095592, ppl: 22.100325 +epoch: 0, batch: 40911, sum loss: 4426.807129, avg loss: 2.835879, ppl: 17.045378 +epoch: 0, batch: 40912, sum loss: 5805.325684, avg loss: 2.889659, ppl: 17.987183 +epoch: 0, batch: 40913, sum loss: 4640.968750, avg loss: 2.861263, ppl: 17.483595 +epoch: 0, batch: 40914, sum loss: 4498.313477, avg loss: 2.769897, ppl: 15.956998 +epoch: 0, batch: 40915, sum loss: 4172.163086, avg loss: 2.665919, ppl: 14.381157 +epoch: 0, batch: 40916, sum loss: 4048.180908, avg loss: 2.729724, ppl: 15.328658 +epoch: 0, batch: 40917, sum loss: 5033.763184, avg loss: 2.909690, ppl: 18.351103 +epoch: 0, batch: 40918, sum loss: 4235.741211, avg loss: 2.757644, ppl: 15.762665 +epoch: 0, batch: 40919, sum loss: 4483.758789, avg loss: 2.828870, ppl: 16.926321 +epoch: 0, batch: 40920, sum loss: 4645.774414, avg loss: 3.107541, ppl: 22.365986 +epoch: 0, batch: 40921, sum loss: 5365.067383, avg loss: 3.024277, ppl: 20.579115 +epoch: 0, batch: 40922, sum loss: 5784.787109, avg loss: 3.060734, ppl: 21.343218 +epoch: 0, batch: 40923, sum loss: 4330.943848, avg loss: 2.795961, ppl: 16.378363 +epoch: 0, batch: 40924, sum loss: 4282.656250, avg loss: 2.802786, ppl: 16.490519 +epoch: 0, batch: 40925, sum loss: 4433.179688, avg loss: 2.704808, ppl: 14.951439 +epoch: 0, batch: 40926, sum loss: 5044.359375, avg loss: 3.060898, ppl: 21.346708 +epoch: 0, batch: 40927, sum loss: 5831.778320, avg loss: 3.097068, ppl: 22.132954 +epoch: 0, batch: 40928, sum loss: 4937.180176, avg loss: 2.938798, ppl: 18.893122 +epoch: 0, batch: 40929, sum loss: 4156.224609, avg loss: 2.638873, ppl: 13.997415 +epoch: 0, batch: 40930, sum loss: 4384.056641, avg loss: 2.812095, ppl: 16.644760 +epoch: 0, batch: 40931, sum loss: 4339.482422, avg loss: 2.889136, ppl: 17.977772 +epoch: 0, batch: 40932, sum loss: 4969.970703, avg loss: 3.102354, ppl: 22.250257 +epoch: 0, batch: 40933, sum loss: 5016.473633, avg loss: 2.913167, ppl: 18.415028 +epoch: 0, batch: 40934, sum loss: 5083.122070, avg loss: 2.883223, ppl: 17.871778 +epoch: 0, batch: 40935, sum loss: 3546.655518, avg loss: 2.473260, ppl: 11.861055 +epoch: 0, batch: 40936, sum loss: 5368.440430, avg loss: 2.787352, ppl: 16.237970 +epoch: 0, batch: 40937, sum loss: 3932.629883, avg loss: 2.558640, ppl: 12.918240 +epoch: 0, batch: 40938, sum loss: 4932.536133, avg loss: 2.782028, ppl: 16.151751 +epoch: 0, batch: 40939, sum loss: 5228.216309, avg loss: 3.044972, ppl: 21.009438 +epoch: 0, batch: 40940, sum loss: 5584.916992, avg loss: 3.167849, ppl: 23.756319 +epoch: 0, batch: 40941, sum loss: 5440.809570, avg loss: 3.093127, ppl: 22.045898 +epoch: 0, batch: 40942, sum loss: 4361.081543, avg loss: 2.531098, ppl: 12.567296 +epoch: 0, batch: 40943, sum loss: 6028.699707, avg loss: 3.244725, ppl: 25.654659 +epoch: 0, batch: 40944, sum loss: 4798.937012, avg loss: 2.911976, ppl: 18.393114 +epoch: 0, batch: 40945, sum loss: 4371.432617, avg loss: 2.756263, ppl: 15.740906 +epoch: 0, batch: 40946, sum loss: 4173.202637, avg loss: 2.963923, ppl: 19.373817 +epoch: 0, batch: 40947, sum loss: 4091.410889, avg loss: 2.831426, ppl: 16.969645 +epoch: 0, batch: 40948, sum loss: 4835.905762, avg loss: 3.020553, ppl: 20.502634 +epoch: 0, batch: 40949, sum loss: 4716.705566, avg loss: 2.962755, ppl: 19.351206 +epoch: 0, batch: 40950, sum loss: 6223.505371, avg loss: 3.062749, ppl: 21.386261 +epoch: 0, batch: 40951, sum loss: 4112.955078, avg loss: 2.446731, ppl: 11.550528 +epoch: 0, batch: 40952, sum loss: 4037.665527, avg loss: 2.515679, ppl: 12.375013 +epoch: 0, batch: 40953, sum loss: 4571.730957, avg loss: 2.809915, ppl: 16.608500 +epoch: 0, batch: 40954, sum loss: 4684.452148, avg loss: 2.721936, ppl: 15.209740 +epoch: 0, batch: 40955, sum loss: 4753.587402, avg loss: 2.741400, ppl: 15.508678 +epoch: 0, batch: 40956, sum loss: 4537.102539, avg loss: 2.499781, ppl: 12.179828 +epoch: 0, batch: 40957, sum loss: 5121.062012, avg loss: 3.000036, ppl: 20.086266 +epoch: 0, batch: 40958, sum loss: 5293.265625, avg loss: 2.883042, ppl: 17.868549 +epoch: 0, batch: 40959, sum loss: 4753.388184, avg loss: 2.948752, ppl: 19.082123 +epoch: 0, batch: 40960, sum loss: 4543.544434, avg loss: 3.047314, ppl: 21.058699 +epoch: 0, batch: 40961, sum loss: 7167.660645, avg loss: 3.286410, ppl: 26.746674 +epoch: 0, batch: 40962, sum loss: 6531.059082, avg loss: 3.441022, ppl: 31.218845 +epoch: 0, batch: 40963, sum loss: 4323.398926, avg loss: 2.668765, ppl: 14.422144 +epoch: 0, batch: 40964, sum loss: 4378.388672, avg loss: 2.748518, ppl: 15.619463 +epoch: 0, batch: 40965, sum loss: 4918.102539, avg loss: 3.028388, ppl: 20.663900 +epoch: 0, batch: 40966, sum loss: 4650.224609, avg loss: 2.774597, ppl: 16.032167 +epoch: 0, batch: 40967, sum loss: 4460.379883, avg loss: 2.868412, ppl: 17.609024 +epoch: 0, batch: 40968, sum loss: 4230.689941, avg loss: 2.801781, ppl: 16.473969 +epoch: 0, batch: 40969, sum loss: 5831.377441, avg loss: 3.322722, ppl: 27.735750 +epoch: 0, batch: 40970, sum loss: 4384.893066, avg loss: 2.852891, ppl: 17.337828 +epoch: 0, batch: 40971, sum loss: 4491.319824, avg loss: 2.676591, ppl: 14.535460 +epoch: 0, batch: 40972, sum loss: 5729.855957, avg loss: 3.064094, ppl: 21.415052 +epoch: 0, batch: 40973, sum loss: 4820.151367, avg loss: 3.018254, ppl: 20.455542 +epoch: 0, batch: 40974, sum loss: 5159.668457, avg loss: 2.996323, ppl: 20.011820 +epoch: 0, batch: 40975, sum loss: 4598.851562, avg loss: 2.739042, ppl: 15.472157 +epoch: 0, batch: 40976, sum loss: 4694.806641, avg loss: 3.058506, ppl: 21.295717 +epoch: 0, batch: 40977, sum loss: 4516.244629, avg loss: 2.689842, ppl: 14.729348 +epoch: 0, batch: 40978, sum loss: 4760.170410, avg loss: 2.795168, ppl: 16.365374 +epoch: 0, batch: 40979, sum loss: 5109.533203, avg loss: 2.838630, ppl: 17.092329 +epoch: 0, batch: 40980, sum loss: 4156.274902, avg loss: 2.439128, ppl: 11.463045 +epoch: 0, batch: 40981, sum loss: 4631.012695, avg loss: 2.835893, ppl: 17.045610 +epoch: 0, batch: 40982, sum loss: 4520.562500, avg loss: 2.713423, ppl: 15.080806 +epoch: 0, batch: 40983, sum loss: 5491.599609, avg loss: 2.997598, ppl: 20.037354 +epoch: 0, batch: 40984, sum loss: 4219.296875, avg loss: 2.656988, ppl: 14.253296 +epoch: 0, batch: 40985, sum loss: 4184.336426, avg loss: 2.738440, ppl: 15.462845 +epoch: 0, batch: 40986, sum loss: 5044.138672, avg loss: 2.766944, ppl: 15.909938 +epoch: 0, batch: 40987, sum loss: 6403.780273, avg loss: 3.140648, ppl: 23.118835 +epoch: 0, batch: 40988, sum loss: 5419.229492, avg loss: 3.037685, ppl: 20.856901 +epoch: 0, batch: 40989, sum loss: 5473.071289, avg loss: 3.002234, ppl: 20.130447 +epoch: 0, batch: 40990, sum loss: 4794.323730, avg loss: 2.935899, ppl: 18.838436 +epoch: 0, batch: 40991, sum loss: 4643.213379, avg loss: 2.951820, ppl: 19.140760 +epoch: 0, batch: 40992, sum loss: 4258.864258, avg loss: 2.770894, ppl: 15.972912 +epoch: 0, batch: 40993, sum loss: 4812.723145, avg loss: 2.939965, ppl: 18.915190 +epoch: 0, batch: 40994, sum loss: 4914.415527, avg loss: 2.994769, ppl: 19.980736 +epoch: 0, batch: 40995, sum loss: 3437.231445, avg loss: 2.546097, ppl: 12.757218 +epoch: 0, batch: 40996, sum loss: 4160.596680, avg loss: 2.687724, ppl: 14.698187 +epoch: 0, batch: 40997, sum loss: 5085.881348, avg loss: 3.018327, ppl: 20.457043 +epoch: 0, batch: 40998, sum loss: 5394.774902, avg loss: 3.086256, ppl: 21.894945 +epoch: 0, batch: 40999, sum loss: 3893.820557, avg loss: 2.402110, ppl: 11.046461 +epoch: 0, batch: 41000, sum loss: 4658.043457, avg loss: 2.864725, ppl: 17.544233 +epoch: 0, batch: 41001, sum loss: 4694.091797, avg loss: 2.693111, ppl: 14.777574 +epoch: 0, batch: 41002, sum loss: 4895.033203, avg loss: 2.768684, ppl: 15.937649 +epoch: 0, batch: 41003, sum loss: 4927.346680, avg loss: 2.943457, ppl: 18.981350 +epoch: 0, batch: 41004, sum loss: 4087.409668, avg loss: 2.771125, ppl: 15.976603 +epoch: 0, batch: 41005, sum loss: 4496.204102, avg loss: 2.766895, ppl: 15.909157 +epoch: 0, batch: 41006, sum loss: 4687.053711, avg loss: 2.859703, ppl: 17.456347 +epoch: 0, batch: 41007, sum loss: 6022.404297, avg loss: 3.104332, ppl: 22.294327 +epoch: 0, batch: 41008, sum loss: 4655.442383, avg loss: 2.884413, ppl: 17.893061 +epoch: 0, batch: 41009, sum loss: 5126.773926, avg loss: 2.810731, ppl: 16.622072 +epoch: 0, batch: 41010, sum loss: 4945.436035, avg loss: 3.110337, ppl: 22.428602 +epoch: 0, batch: 41011, sum loss: 4476.521484, avg loss: 2.648829, ppl: 14.137477 +epoch: 0, batch: 41012, sum loss: 4868.333008, avg loss: 2.936268, ppl: 18.845390 +epoch: 0, batch: 41013, sum loss: 3858.440674, avg loss: 2.516922, ppl: 12.390394 +epoch: 0, batch: 41014, sum loss: 4121.459473, avg loss: 2.564692, ppl: 12.996652 +epoch: 0, batch: 41015, sum loss: 4729.665527, avg loss: 2.598717, ppl: 13.446481 +epoch: 0, batch: 41016, sum loss: 5263.262695, avg loss: 2.906275, ppl: 18.288538 +epoch: 0, batch: 41017, sum loss: 5376.984375, avg loss: 3.090221, ppl: 21.981934 +epoch: 0, batch: 41018, sum loss: 4479.428223, avg loss: 2.903064, ppl: 18.229925 +epoch: 0, batch: 41019, sum loss: 4693.573242, avg loss: 2.997173, ppl: 20.028837 +epoch: 0, batch: 41020, sum loss: 5152.264160, avg loss: 2.859192, ppl: 17.447426 +epoch: 0, batch: 41021, sum loss: 4660.908691, avg loss: 2.873556, ppl: 17.699854 +epoch: 0, batch: 41022, sum loss: 3773.343506, avg loss: 2.860761, ppl: 17.474819 +epoch: 0, batch: 41023, sum loss: 4090.117432, avg loss: 2.593606, ppl: 13.377932 +epoch: 0, batch: 41024, sum loss: 4787.171875, avg loss: 2.897804, ppl: 18.134279 +epoch: 0, batch: 41025, sum loss: 3824.080566, avg loss: 2.639117, ppl: 14.000839 +epoch: 0, batch: 41026, sum loss: 4115.161133, avg loss: 2.549666, ppl: 12.802829 +epoch: 0, batch: 41027, sum loss: 4749.375488, avg loss: 2.942612, ppl: 18.965319 +epoch: 0, batch: 41028, sum loss: 4935.452148, avg loss: 3.040944, ppl: 20.924984 +epoch: 0, batch: 41029, sum loss: 4203.198730, avg loss: 2.743602, ppl: 15.542874 +epoch: 0, batch: 41030, sum loss: 4726.236816, avg loss: 2.749411, ppl: 15.633415 +epoch: 0, batch: 41031, sum loss: 4899.749512, avg loss: 2.882206, ppl: 17.853609 +epoch: 0, batch: 41032, sum loss: 5766.018555, avg loss: 3.049190, ppl: 21.098249 +epoch: 0, batch: 41033, sum loss: 4420.522461, avg loss: 2.592682, ppl: 13.365568 +epoch: 0, batch: 41034, sum loss: 4957.150391, avg loss: 2.811770, ppl: 16.639343 +epoch: 0, batch: 41035, sum loss: 5409.266113, avg loss: 2.930263, ppl: 18.732561 +epoch: 0, batch: 41036, sum loss: 5079.498047, avg loss: 2.852048, ppl: 17.323231 +epoch: 0, batch: 41037, sum loss: 5056.401367, avg loss: 3.152370, ppl: 23.391436 +epoch: 0, batch: 41038, sum loss: 4783.846191, avg loss: 2.942095, ppl: 18.955513 +epoch: 0, batch: 41039, sum loss: 4898.362305, avg loss: 2.883086, ppl: 17.869328 +epoch: 0, batch: 41040, sum loss: 4960.487793, avg loss: 3.011832, ppl: 20.324606 +epoch: 0, batch: 41041, sum loss: 5438.666504, avg loss: 3.072693, ppl: 21.599995 +epoch: 0, batch: 41042, sum loss: 4955.280273, avg loss: 3.021512, ppl: 20.522303 +epoch: 0, batch: 41043, sum loss: 5869.499512, avg loss: 3.008457, ppl: 20.256124 +epoch: 0, batch: 41044, sum loss: 5354.582520, avg loss: 3.070288, ppl: 21.548107 +epoch: 0, batch: 41045, sum loss: 4546.294922, avg loss: 2.844991, ppl: 17.201399 +epoch: 0, batch: 41046, sum loss: 6536.909668, avg loss: 3.096594, ppl: 22.122471 +epoch: 0, batch: 41047, sum loss: 4805.928223, avg loss: 2.898630, ppl: 18.149261 +epoch: 0, batch: 41048, sum loss: 4640.412598, avg loss: 2.674589, ppl: 14.506392 +epoch: 0, batch: 41049, sum loss: 5234.574707, avg loss: 2.772550, ppl: 15.999382 +epoch: 0, batch: 41050, sum loss: 5186.701172, avg loss: 2.972322, ppl: 19.537222 +epoch: 0, batch: 41051, sum loss: 5236.478516, avg loss: 2.993984, ppl: 19.965069 +epoch: 0, batch: 41052, sum loss: 4814.107910, avg loss: 2.949821, ppl: 19.102539 +epoch: 0, batch: 41053, sum loss: 4217.682129, avg loss: 2.584364, ppl: 13.254859 +epoch: 0, batch: 41054, sum loss: 4253.739258, avg loss: 2.648654, ppl: 14.135007 +epoch: 0, batch: 41055, sum loss: 5112.929199, avg loss: 2.903424, ppl: 18.236471 +epoch: 0, batch: 41056, sum loss: 4373.888672, avg loss: 2.634873, ppl: 13.941537 +epoch: 0, batch: 41057, sum loss: 4162.759766, avg loss: 2.773324, ppl: 16.011772 +epoch: 0, batch: 41058, sum loss: 4440.948242, avg loss: 2.749813, ppl: 15.639709 +epoch: 0, batch: 41059, sum loss: 5219.101074, avg loss: 2.825718, ppl: 16.873058 +epoch: 0, batch: 41060, sum loss: 4872.490723, avg loss: 3.115403, ppl: 22.542517 +epoch: 0, batch: 41061, sum loss: 5548.114258, avg loss: 3.010371, ppl: 20.294931 +epoch: 0, batch: 41062, sum loss: 4026.808838, avg loss: 2.718980, ppl: 15.164840 +epoch: 0, batch: 41063, sum loss: 5360.596680, avg loss: 3.173829, ppl: 23.898813 +epoch: 0, batch: 41064, sum loss: 5131.990723, avg loss: 3.038479, ppl: 20.873466 +epoch: 0, batch: 41065, sum loss: 5055.253418, avg loss: 3.000150, ppl: 20.088558 +epoch: 0, batch: 41066, sum loss: 4667.381348, avg loss: 2.813370, ppl: 16.665993 +epoch: 0, batch: 41067, sum loss: 4899.507324, avg loss: 2.830449, ppl: 16.953070 +epoch: 0, batch: 41068, sum loss: 4479.611328, avg loss: 2.824471, ppl: 16.852032 +epoch: 0, batch: 41069, sum loss: 4544.015625, avg loss: 2.704771, ppl: 14.950896 +epoch: 0, batch: 41070, sum loss: 4366.627930, avg loss: 2.641638, ppl: 14.036177 +epoch: 0, batch: 41071, sum loss: 5148.303223, avg loss: 2.887439, ppl: 17.947283 +epoch: 0, batch: 41072, sum loss: 4735.049805, avg loss: 2.861057, ppl: 17.479998 +epoch: 0, batch: 41073, sum loss: 4481.558105, avg loss: 2.832843, ppl: 16.993706 +epoch: 0, batch: 41074, sum loss: 5522.251953, avg loss: 3.170064, ppl: 23.809013 +epoch: 0, batch: 41075, sum loss: 3837.002197, avg loss: 2.725144, ppl: 15.258606 +epoch: 0, batch: 41076, sum loss: 5800.445801, avg loss: 2.832249, ppl: 16.983612 +epoch: 0, batch: 41077, sum loss: 5150.710938, avg loss: 2.982461, ppl: 19.736338 +epoch: 0, batch: 41078, sum loss: 4517.088379, avg loss: 3.119536, ppl: 22.635883 +epoch: 0, batch: 41079, sum loss: 5810.701660, avg loss: 3.189189, ppl: 24.268730 +epoch: 0, batch: 41080, sum loss: 6025.369629, avg loss: 3.156296, ppl: 23.483463 +epoch: 0, batch: 41081, sum loss: 4288.266113, avg loss: 2.695328, ppl: 14.810369 +epoch: 0, batch: 41082, sum loss: 5608.986328, avg loss: 2.989865, ppl: 19.882994 +epoch: 0, batch: 41083, sum loss: 4890.668457, avg loss: 2.885350, ppl: 17.909840 +epoch: 0, batch: 41084, sum loss: 5456.792969, avg loss: 3.021480, ppl: 20.521643 +epoch: 0, batch: 41085, sum loss: 4955.834961, avg loss: 2.859685, ppl: 17.456036 +epoch: 0, batch: 41086, sum loss: 5255.419922, avg loss: 2.931077, ppl: 18.747803 +epoch: 0, batch: 41087, sum loss: 4926.532227, avg loss: 2.918562, ppl: 18.514643 +epoch: 0, batch: 41088, sum loss: 3811.061768, avg loss: 2.550911, ppl: 12.818782 +epoch: 0, batch: 41089, sum loss: 3841.500977, avg loss: 2.481590, ppl: 11.960263 +epoch: 0, batch: 41090, sum loss: 5171.496582, avg loss: 3.087461, ppl: 21.921343 +epoch: 0, batch: 41091, sum loss: 5314.048340, avg loss: 2.960473, ppl: 19.307095 +epoch: 0, batch: 41092, sum loss: 4845.854492, avg loss: 2.956592, ppl: 19.232317 +epoch: 0, batch: 41093, sum loss: 4605.668945, avg loss: 2.929815, ppl: 18.724163 +epoch: 0, batch: 41094, sum loss: 4762.650391, avg loss: 2.816470, ppl: 16.717731 +epoch: 0, batch: 41095, sum loss: 5379.946289, avg loss: 3.223455, ppl: 25.114740 +epoch: 0, batch: 41096, sum loss: 6436.836914, avg loss: 3.403933, ppl: 30.082174 +epoch: 0, batch: 41097, sum loss: 4941.854980, avg loss: 2.975229, ppl: 19.594105 +epoch: 0, batch: 41098, sum loss: 4024.791992, avg loss: 2.838358, ppl: 17.087692 +epoch: 0, batch: 41099, sum loss: 4204.489258, avg loss: 2.777074, ppl: 16.071920 +epoch: 0, batch: 41100, sum loss: 4501.262207, avg loss: 3.102179, ppl: 22.246380 +epoch: 0, batch: 41101, sum loss: 5278.149414, avg loss: 2.920946, ppl: 18.558834 +epoch: 0, batch: 41102, sum loss: 3942.982178, avg loss: 2.836678, ppl: 17.058998 +epoch: 0, batch: 41103, sum loss: 4979.729004, avg loss: 3.072010, ppl: 21.585251 +epoch: 0, batch: 41104, sum loss: 5126.064453, avg loss: 2.930855, ppl: 18.743641 +epoch: 0, batch: 41105, sum loss: 5621.538574, avg loss: 2.930938, ppl: 18.745201 +epoch: 0, batch: 41106, sum loss: 4576.437500, avg loss: 2.637716, ppl: 13.981235 +epoch: 0, batch: 41107, sum loss: 4904.526367, avg loss: 2.935085, ppl: 18.823099 +epoch: 0, batch: 41108, sum loss: 5198.762695, avg loss: 2.992955, ppl: 19.944530 +epoch: 0, batch: 41109, sum loss: 4540.923340, avg loss: 2.779023, ppl: 16.103279 +epoch: 0, batch: 41110, sum loss: 4512.264648, avg loss: 2.825463, ppl: 16.868759 +epoch: 0, batch: 41111, sum loss: 4844.220703, avg loss: 3.035226, ppl: 20.805677 +epoch: 0, batch: 41112, sum loss: 5646.208984, avg loss: 3.070261, ppl: 21.547516 +epoch: 0, batch: 41113, sum loss: 3836.592773, avg loss: 2.530734, ppl: 12.562724 +epoch: 0, batch: 41114, sum loss: 4416.842773, avg loss: 2.769181, ppl: 15.945561 +epoch: 0, batch: 41115, sum loss: 4591.020996, avg loss: 3.022397, ppl: 20.540464 +epoch: 0, batch: 41116, sum loss: 4329.150391, avg loss: 2.715904, ppl: 15.118263 +epoch: 0, batch: 41117, sum loss: 3708.549316, avg loss: 2.606148, ppl: 13.546775 +epoch: 0, batch: 41118, sum loss: 4933.088867, avg loss: 2.980718, ppl: 19.701965 +epoch: 0, batch: 41119, sum loss: 4750.097168, avg loss: 2.844370, ppl: 17.190720 +epoch: 0, batch: 41120, sum loss: 4143.994141, avg loss: 2.875777, ppl: 17.739199 +epoch: 0, batch: 41121, sum loss: 5011.992188, avg loss: 2.828438, ppl: 16.919014 +epoch: 0, batch: 41122, sum loss: 4289.015137, avg loss: 2.628073, ppl: 13.847060 +epoch: 0, batch: 41123, sum loss: 4419.739746, avg loss: 2.921176, ppl: 18.563112 +epoch: 0, batch: 41124, sum loss: 4714.102051, avg loss: 2.794370, ppl: 16.352322 +epoch: 0, batch: 41125, sum loss: 3840.855225, avg loss: 2.697230, ppl: 14.838570 +epoch: 0, batch: 41126, sum loss: 5377.138184, avg loss: 3.140852, ppl: 23.123554 +epoch: 0, batch: 41127, sum loss: 5425.811035, avg loss: 3.016015, ppl: 20.409798 +epoch: 0, batch: 41128, sum loss: 5598.353027, avg loss: 3.079402, ppl: 21.745399 +epoch: 0, batch: 41129, sum loss: 5229.136230, avg loss: 3.063349, ppl: 21.399097 +epoch: 0, batch: 41130, sum loss: 5593.875977, avg loss: 3.271272, ppl: 26.344839 +epoch: 0, batch: 41131, sum loss: 5292.582520, avg loss: 3.036479, ppl: 20.831760 +epoch: 0, batch: 41132, sum loss: 5479.828125, avg loss: 2.632002, ppl: 13.901575 +epoch: 0, batch: 41133, sum loss: 4644.468262, avg loss: 2.823385, ppl: 16.833733 +epoch: 0, batch: 41134, sum loss: 5330.790039, avg loss: 3.083164, ppl: 21.827349 +epoch: 0, batch: 41135, sum loss: 5546.513184, avg loss: 2.994877, ppl: 19.982908 +epoch: 0, batch: 41136, sum loss: 4821.131836, avg loss: 2.767584, ppl: 15.920130 +epoch: 0, batch: 41137, sum loss: 4183.708984, avg loss: 2.560410, ppl: 12.941116 +epoch: 0, batch: 41138, sum loss: 5258.276367, avg loss: 3.066050, ppl: 21.456985 +epoch: 0, batch: 41139, sum loss: 3593.489014, avg loss: 2.559465, ppl: 12.928901 +epoch: 0, batch: 41140, sum loss: 3602.685059, avg loss: 2.531753, ppl: 12.575536 +epoch: 0, batch: 41141, sum loss: 5601.397461, avg loss: 3.065899, ppl: 21.453737 +epoch: 0, batch: 41142, sum loss: 4737.830078, avg loss: 2.808435, ppl: 16.583948 +epoch: 0, batch: 41143, sum loss: 4162.805664, avg loss: 2.784485, ppl: 16.191483 +epoch: 0, batch: 41144, sum loss: 6029.560059, avg loss: 3.402686, ppl: 30.044703 +epoch: 0, batch: 41145, sum loss: 5596.516602, avg loss: 3.028418, ppl: 20.664516 +epoch: 0, batch: 41146, sum loss: 5480.496094, avg loss: 3.011262, ppl: 20.313013 +epoch: 0, batch: 41147, sum loss: 5194.326172, avg loss: 3.176958, ppl: 23.973711 +epoch: 0, batch: 41148, sum loss: 3830.835938, avg loss: 2.763951, ppl: 15.862389 +epoch: 0, batch: 41149, sum loss: 5496.150879, avg loss: 3.122813, ppl: 22.710173 +epoch: 0, batch: 41150, sum loss: 5195.865234, avg loss: 2.905965, ppl: 18.282875 +epoch: 0, batch: 41151, sum loss: 4621.925781, avg loss: 2.821688, ppl: 16.805201 +epoch: 0, batch: 41152, sum loss: 4481.791016, avg loss: 2.712949, ppl: 15.073656 +epoch: 0, batch: 41153, sum loss: 3680.312988, avg loss: 2.708104, ppl: 15.000802 +epoch: 0, batch: 41154, sum loss: 4219.254395, avg loss: 2.604478, ppl: 13.524165 +epoch: 0, batch: 41155, sum loss: 5386.922852, avg loss: 2.934054, ppl: 18.803699 +epoch: 0, batch: 41156, sum loss: 5605.638672, avg loss: 3.122919, ppl: 22.712572 +epoch: 0, batch: 41157, sum loss: 5073.216797, avg loss: 3.076541, ppl: 21.683279 +epoch: 0, batch: 41158, sum loss: 4725.313965, avg loss: 2.860360, ppl: 17.467808 +epoch: 0, batch: 41159, sum loss: 5095.788574, avg loss: 2.835720, ppl: 17.042667 +epoch: 0, batch: 41160, sum loss: 5381.393555, avg loss: 2.905720, ppl: 18.278399 +epoch: 0, batch: 41161, sum loss: 6256.134277, avg loss: 3.188651, ppl: 24.255692 +epoch: 0, batch: 41162, sum loss: 5593.081543, avg loss: 3.163508, ppl: 23.653419 +epoch: 0, batch: 41163, sum loss: 4833.759766, avg loss: 3.043930, ppl: 20.987555 +epoch: 0, batch: 41164, sum loss: 5185.577148, avg loss: 2.993982, ppl: 19.965027 +epoch: 0, batch: 41165, sum loss: 5126.864258, avg loss: 3.037242, ppl: 20.847664 +epoch: 0, batch: 41166, sum loss: 5396.944336, avg loss: 2.947539, ppl: 19.058998 +epoch: 0, batch: 41167, sum loss: 4935.343750, avg loss: 3.013030, ppl: 20.348970 +epoch: 0, batch: 41168, sum loss: 5185.473145, avg loss: 2.905027, ppl: 18.265739 +epoch: 0, batch: 41169, sum loss: 4317.955566, avg loss: 2.782188, ppl: 16.154327 +epoch: 0, batch: 41170, sum loss: 3307.536621, avg loss: 2.435594, ppl: 11.422603 +epoch: 0, batch: 41171, sum loss: 4197.159668, avg loss: 2.845532, ppl: 17.210711 +epoch: 0, batch: 41172, sum loss: 4568.392578, avg loss: 2.884086, ppl: 17.887213 +epoch: 0, batch: 41173, sum loss: 4779.503418, avg loss: 2.571008, ppl: 13.078998 +epoch: 0, batch: 41174, sum loss: 5021.581055, avg loss: 2.919524, ppl: 18.532463 +epoch: 0, batch: 41175, sum loss: 5911.282227, avg loss: 3.051772, ppl: 21.152792 +epoch: 0, batch: 41176, sum loss: 4420.293457, avg loss: 2.572930, ppl: 13.104161 +epoch: 0, batch: 41177, sum loss: 4483.747559, avg loss: 2.885294, ppl: 17.908840 +epoch: 0, batch: 41178, sum loss: 5895.491211, avg loss: 3.183311, ppl: 24.126492 +epoch: 0, batch: 41179, sum loss: 4814.376465, avg loss: 3.049003, ppl: 21.094311 +epoch: 0, batch: 41180, sum loss: 5501.714355, avg loss: 3.071867, ppl: 21.582165 +epoch: 0, batch: 41181, sum loss: 4937.209961, avg loss: 2.992249, ppl: 19.930447 +epoch: 0, batch: 41182, sum loss: 4282.567383, avg loss: 3.098819, ppl: 22.171741 +epoch: 0, batch: 41183, sum loss: 4585.257324, avg loss: 2.789086, ppl: 16.266144 +epoch: 0, batch: 41184, sum loss: 3939.213623, avg loss: 2.541428, ppl: 12.697795 +epoch: 0, batch: 41185, sum loss: 4451.134766, avg loss: 2.756121, ppl: 15.738669 +epoch: 0, batch: 41186, sum loss: 5099.448730, avg loss: 3.022791, ppl: 20.548571 +epoch: 0, batch: 41187, sum loss: 4558.108887, avg loss: 2.836409, ppl: 17.054411 +epoch: 0, batch: 41188, sum loss: 4559.767578, avg loss: 2.728766, ppl: 15.313977 +epoch: 0, batch: 41189, sum loss: 5334.759766, avg loss: 2.658077, ppl: 14.268821 +epoch: 0, batch: 41190, sum loss: 4987.257324, avg loss: 2.797116, ppl: 16.397285 +epoch: 0, batch: 41191, sum loss: 4657.854980, avg loss: 3.022619, ppl: 20.545019 +epoch: 0, batch: 41192, sum loss: 4606.693848, avg loss: 2.897292, ppl: 18.124990 +epoch: 0, batch: 41193, sum loss: 5280.421387, avg loss: 3.027764, ppl: 20.651012 +epoch: 0, batch: 41194, sum loss: 5060.062500, avg loss: 2.825272, ppl: 16.865534 +epoch: 0, batch: 41195, sum loss: 4607.495605, avg loss: 2.755679, ppl: 15.731725 +epoch: 0, batch: 41196, sum loss: 5053.678711, avg loss: 3.051738, ppl: 21.152081 +epoch: 0, batch: 41197, sum loss: 4481.662598, avg loss: 2.674023, ppl: 14.498180 +epoch: 0, batch: 41198, sum loss: 4874.662598, avg loss: 2.655045, ppl: 14.225627 +epoch: 0, batch: 41199, sum loss: 4989.211426, avg loss: 2.809241, ppl: 16.597313 +epoch: 0, batch: 41200, sum loss: 5203.065430, avg loss: 3.227708, ppl: 25.221785 +epoch: 0, batch: 41201, sum loss: 3870.111084, avg loss: 2.561291, ppl: 12.952531 +epoch: 0, batch: 41202, sum loss: 4070.293457, avg loss: 2.637909, ppl: 13.983932 +epoch: 0, batch: 41203, sum loss: 4668.205078, avg loss: 2.750858, ppl: 15.656053 +epoch: 0, batch: 41204, sum loss: 4942.291016, avg loss: 3.026510, ppl: 20.625126 +epoch: 0, batch: 41205, sum loss: 5668.782715, avg loss: 2.922053, ppl: 18.579393 +epoch: 0, batch: 41206, sum loss: 4736.898926, avg loss: 2.686840, ppl: 14.685198 +epoch: 0, batch: 41207, sum loss: 5203.816406, avg loss: 2.910412, ppl: 18.364361 +epoch: 0, batch: 41208, sum loss: 4751.139648, avg loss: 2.826377, ppl: 16.884176 +epoch: 0, batch: 41209, sum loss: 4905.502930, avg loss: 2.980257, ppl: 19.692877 +epoch: 0, batch: 41210, sum loss: 5454.018555, avg loss: 2.654024, ppl: 14.211104 +epoch: 0, batch: 41211, sum loss: 6128.947754, avg loss: 3.360169, ppl: 28.794048 +epoch: 0, batch: 41212, sum loss: 4817.781250, avg loss: 2.708140, ppl: 15.001349 +epoch: 0, batch: 41213, sum loss: 4738.451660, avg loss: 2.845917, ppl: 17.217339 +epoch: 0, batch: 41214, sum loss: 6717.452637, avg loss: 3.125851, ppl: 22.779264 +epoch: 0, batch: 41215, sum loss: 5040.026855, avg loss: 3.169828, ppl: 23.803394 +epoch: 0, batch: 41216, sum loss: 4837.521484, avg loss: 2.847276, ppl: 17.240753 +epoch: 0, batch: 41217, sum loss: 5034.801758, avg loss: 3.004058, ppl: 20.167217 +epoch: 0, batch: 41218, sum loss: 5178.016602, avg loss: 2.794396, ppl: 16.352755 +epoch: 0, batch: 41219, sum loss: 5554.541016, avg loss: 3.259707, ppl: 26.041912 +epoch: 0, batch: 41220, sum loss: 4267.750000, avg loss: 2.475493, ppl: 11.887568 +epoch: 0, batch: 41221, sum loss: 5558.955078, avg loss: 3.081461, ppl: 21.790209 +epoch: 0, batch: 41222, sum loss: 4616.900879, avg loss: 2.744888, ppl: 15.562864 +epoch: 0, batch: 41223, sum loss: 4385.608398, avg loss: 2.640342, ppl: 14.018001 +epoch: 0, batch: 41224, sum loss: 5167.268066, avg loss: 2.961185, ppl: 19.320852 +epoch: 0, batch: 41225, sum loss: 4408.578125, avg loss: 2.986842, ppl: 19.822977 +epoch: 0, batch: 41226, sum loss: 5882.484863, avg loss: 2.942714, ppl: 18.967253 +epoch: 0, batch: 41227, sum loss: 4778.311523, avg loss: 2.805820, ppl: 16.540634 +epoch: 0, batch: 41228, sum loss: 5571.878906, avg loss: 2.960616, ppl: 19.309856 +epoch: 0, batch: 41229, sum loss: 4250.518555, avg loss: 2.754711, ppl: 15.716496 +epoch: 0, batch: 41230, sum loss: 4951.391113, avg loss: 2.910871, ppl: 18.372786 +epoch: 0, batch: 41231, sum loss: 5339.348633, avg loss: 2.872162, ppl: 17.675184 +epoch: 0, batch: 41232, sum loss: 4824.767578, avg loss: 2.983777, ppl: 19.762323 +epoch: 0, batch: 41233, sum loss: 4652.883301, avg loss: 2.926342, ppl: 18.659241 +epoch: 0, batch: 41234, sum loss: 4775.367676, avg loss: 2.902959, ppl: 18.228001 +epoch: 0, batch: 41235, sum loss: 5243.117188, avg loss: 2.994356, ppl: 19.972496 +epoch: 0, batch: 41236, sum loss: 3681.862061, avg loss: 2.533973, ppl: 12.603475 +epoch: 0, batch: 41237, sum loss: 4105.965332, avg loss: 2.610277, ppl: 13.602815 +epoch: 0, batch: 41238, sum loss: 4853.163574, avg loss: 2.893955, ppl: 18.064623 +epoch: 0, batch: 41239, sum loss: 3984.679688, avg loss: 2.683286, ppl: 14.633098 +epoch: 0, batch: 41240, sum loss: 3838.564453, avg loss: 2.616608, ppl: 13.689213 +epoch: 0, batch: 41241, sum loss: 4673.373535, avg loss: 2.872387, ppl: 17.679167 +epoch: 0, batch: 41242, sum loss: 4121.316406, avg loss: 2.564603, ppl: 12.995493 +epoch: 0, batch: 41243, sum loss: 5106.236816, avg loss: 3.068652, ppl: 21.512888 +epoch: 0, batch: 41244, sum loss: 5406.051758, avg loss: 2.954127, ppl: 19.184963 +epoch: 0, batch: 41245, sum loss: 5487.460449, avg loss: 3.181137, ppl: 24.074100 +epoch: 0, batch: 41246, sum loss: 5094.472168, avg loss: 3.002046, ppl: 20.126677 +epoch: 0, batch: 41247, sum loss: 4581.030273, avg loss: 2.903061, ppl: 18.229860 +epoch: 0, batch: 41248, sum loss: 5055.949707, avg loss: 3.084777, ppl: 21.862593 +epoch: 0, batch: 41249, sum loss: 5457.173828, avg loss: 3.113049, ppl: 22.489500 +epoch: 0, batch: 41250, sum loss: 4783.617188, avg loss: 2.757128, ppl: 15.754531 +epoch: 0, batch: 41251, sum loss: 4064.814209, avg loss: 2.836577, ppl: 17.057274 +epoch: 0, batch: 41252, sum loss: 4299.631348, avg loss: 2.704171, ppl: 14.941920 +epoch: 0, batch: 41253, sum loss: 4867.811035, avg loss: 2.909630, ppl: 18.350008 +epoch: 0, batch: 41254, sum loss: 4938.320801, avg loss: 2.980278, ppl: 19.693291 +epoch: 0, batch: 41255, sum loss: 4509.575195, avg loss: 2.731421, ppl: 15.354683 +epoch: 0, batch: 41256, sum loss: 4649.774414, avg loss: 2.583208, ppl: 13.239544 +epoch: 0, batch: 41257, sum loss: 4863.379883, avg loss: 2.864182, ppl: 17.534695 +epoch: 0, batch: 41258, sum loss: 5224.867188, avg loss: 3.197593, ppl: 24.473557 +epoch: 0, batch: 41259, sum loss: 4447.293457, avg loss: 2.914347, ppl: 18.436773 +epoch: 0, batch: 41260, sum loss: 4878.591309, avg loss: 2.818366, ppl: 16.749456 +epoch: 0, batch: 41261, sum loss: 4981.523926, avg loss: 2.897920, ppl: 18.136381 +epoch: 0, batch: 41262, sum loss: 4617.354492, avg loss: 2.740270, ppl: 15.491162 +epoch: 0, batch: 41263, sum loss: 4658.936523, avg loss: 2.816769, ppl: 16.722738 +epoch: 0, batch: 41264, sum loss: 5182.541992, avg loss: 3.081178, ppl: 21.784058 +epoch: 0, batch: 41265, sum loss: 4313.231445, avg loss: 2.798982, ppl: 16.427917 +epoch: 0, batch: 41266, sum loss: 5698.556152, avg loss: 3.090323, ppl: 21.984188 +epoch: 0, batch: 41267, sum loss: 5907.453125, avg loss: 3.186328, ppl: 24.199413 +epoch: 0, batch: 41268, sum loss: 4306.711426, avg loss: 2.931730, ppl: 18.760054 +epoch: 0, batch: 41269, sum loss: 4841.069824, avg loss: 2.923351, ppl: 18.603529 +epoch: 0, batch: 41270, sum loss: 5159.673340, avg loss: 2.805695, ppl: 16.538570 +epoch: 0, batch: 41271, sum loss: 5224.867676, avg loss: 3.239224, ppl: 25.513908 +epoch: 0, batch: 41272, sum loss: 4575.707520, avg loss: 2.814088, ppl: 16.677959 +epoch: 0, batch: 41273, sum loss: 5688.367676, avg loss: 3.105004, ppl: 22.309315 +epoch: 0, batch: 41274, sum loss: 4406.837891, avg loss: 2.725317, ppl: 15.261254 +epoch: 0, batch: 41275, sum loss: 5502.000977, avg loss: 3.163888, ppl: 23.662416 +epoch: 0, batch: 41276, sum loss: 4926.216797, avg loss: 3.076962, ppl: 21.692411 +epoch: 0, batch: 41277, sum loss: 4644.545898, avg loss: 2.823432, ppl: 16.834528 +epoch: 0, batch: 41278, sum loss: 5545.050293, avg loss: 3.094336, ppl: 22.072584 +epoch: 0, batch: 41279, sum loss: 5221.009766, avg loss: 2.814560, ppl: 16.685839 +epoch: 0, batch: 41280, sum loss: 4285.397949, avg loss: 2.611455, ppl: 13.618855 +epoch: 0, batch: 41281, sum loss: 6226.237305, avg loss: 3.000596, ppl: 20.097517 +epoch: 0, batch: 41282, sum loss: 4791.408691, avg loss: 2.756852, ppl: 15.750182 +epoch: 0, batch: 41283, sum loss: 6947.964355, avg loss: 3.114283, ppl: 22.517271 +epoch: 0, batch: 41284, sum loss: 4345.915039, avg loss: 2.658052, ppl: 14.268467 +epoch: 0, batch: 41285, sum loss: 3983.460449, avg loss: 2.754814, ppl: 15.718112 +epoch: 0, batch: 41286, sum loss: 4321.837402, avg loss: 2.848937, ppl: 17.269411 +epoch: 0, batch: 41287, sum loss: 5196.387207, avg loss: 2.780303, ppl: 16.123913 +epoch: 0, batch: 41288, sum loss: 4734.349609, avg loss: 2.888560, ppl: 17.967415 +epoch: 0, batch: 41289, sum loss: 4352.732910, avg loss: 2.761886, ppl: 15.829676 +epoch: 0, batch: 41290, sum loss: 5415.516602, avg loss: 2.820582, ppl: 16.786612 +epoch: 0, batch: 41291, sum loss: 4072.736084, avg loss: 2.720598, ppl: 15.189410 +epoch: 0, batch: 41292, sum loss: 5803.774414, avg loss: 3.016515, ppl: 20.420000 +epoch: 0, batch: 41293, sum loss: 5664.097656, avg loss: 3.028929, ppl: 20.675083 +epoch: 0, batch: 41294, sum loss: 5321.338867, avg loss: 2.969497, ppl: 19.482122 +epoch: 0, batch: 41295, sum loss: 4459.254883, avg loss: 2.670213, ppl: 14.443038 +epoch: 0, batch: 41296, sum loss: 5043.699707, avg loss: 2.970377, ppl: 19.499269 +epoch: 0, batch: 41297, sum loss: 4944.049316, avg loss: 2.736054, ppl: 15.425989 +epoch: 0, batch: 41298, sum loss: 5164.724609, avg loss: 2.856596, ppl: 17.402180 +epoch: 0, batch: 41299, sum loss: 4633.823730, avg loss: 2.681611, ppl: 14.608610 +epoch: 0, batch: 41300, sum loss: 4853.867676, avg loss: 2.713174, ppl: 15.077048 +epoch: 0, batch: 41301, sum loss: 4771.925293, avg loss: 3.004991, ppl: 20.186031 +epoch: 0, batch: 41302, sum loss: 4648.351074, avg loss: 2.896169, ppl: 18.104649 +epoch: 0, batch: 41303, sum loss: 4355.425781, avg loss: 2.772391, ppl: 15.996834 +epoch: 0, batch: 41304, sum loss: 5688.030762, avg loss: 2.992126, ppl: 19.927994 +epoch: 0, batch: 41305, sum loss: 5595.896973, avg loss: 3.021543, ppl: 20.522924 +epoch: 0, batch: 41306, sum loss: 4397.338867, avg loss: 2.783126, ppl: 16.169485 +epoch: 0, batch: 41307, sum loss: 4773.029785, avg loss: 2.885750, ppl: 17.916992 +epoch: 0, batch: 41308, sum loss: 3857.455811, avg loss: 2.724192, ppl: 15.244090 +epoch: 0, batch: 41309, sum loss: 4693.731445, avg loss: 2.616350, ppl: 13.685676 +epoch: 0, batch: 41310, sum loss: 5926.037598, avg loss: 3.248924, ppl: 25.762611 +epoch: 0, batch: 41311, sum loss: 6912.872559, avg loss: 3.137936, ppl: 23.056221 +epoch: 0, batch: 41312, sum loss: 3913.438477, avg loss: 2.710137, ppl: 15.031341 +epoch: 0, batch: 41313, sum loss: 4783.015137, avg loss: 2.788930, ppl: 16.263607 +epoch: 0, batch: 41314, sum loss: 4946.952148, avg loss: 2.745257, ppl: 15.568609 +epoch: 0, batch: 41315, sum loss: 5046.047852, avg loss: 3.056359, ppl: 21.250036 +epoch: 0, batch: 41316, sum loss: 4221.228516, avg loss: 2.644880, ppl: 14.081756 +epoch: 0, batch: 41317, sum loss: 4756.144043, avg loss: 2.873803, ppl: 17.704222 +epoch: 0, batch: 41318, sum loss: 5850.898438, avg loss: 3.132173, ppl: 22.923729 +epoch: 0, batch: 41319, sum loss: 4234.092773, avg loss: 2.751197, ppl: 15.661372 +epoch: 0, batch: 41320, sum loss: 4786.234375, avg loss: 2.773021, ppl: 16.006918 +epoch: 0, batch: 41321, sum loss: 4690.704102, avg loss: 2.719249, ppl: 15.168926 +epoch: 0, batch: 41322, sum loss: 5534.801758, avg loss: 3.041100, ppl: 20.928251 +epoch: 0, batch: 41323, sum loss: 4402.490234, avg loss: 2.929135, ppl: 18.711439 +epoch: 0, batch: 41324, sum loss: 5326.369141, avg loss: 2.992342, ppl: 19.932314 +epoch: 0, batch: 41325, sum loss: 5136.414062, avg loss: 2.864704, ppl: 17.543858 +epoch: 0, batch: 41326, sum loss: 5316.498535, avg loss: 2.963489, ppl: 19.365412 +epoch: 0, batch: 41327, sum loss: 4907.793457, avg loss: 3.154109, ppl: 23.432150 +epoch: 0, batch: 41328, sum loss: 4808.158203, avg loss: 2.840023, ppl: 17.116152 +epoch: 0, batch: 41329, sum loss: 5541.881836, avg loss: 3.147008, ppl: 23.266357 +epoch: 0, batch: 41330, sum loss: 4452.228516, avg loss: 2.583998, ppl: 13.250006 +epoch: 0, batch: 41331, sum loss: 3656.418945, avg loss: 2.636207, ppl: 13.960151 +epoch: 0, batch: 41332, sum loss: 4989.476074, avg loss: 2.928096, ppl: 18.692013 +epoch: 0, batch: 41333, sum loss: 5701.368652, avg loss: 3.170950, ppl: 23.830116 +epoch: 0, batch: 41334, sum loss: 4848.835938, avg loss: 3.059202, ppl: 21.310555 +epoch: 0, batch: 41335, sum loss: 4195.547852, avg loss: 2.701576, ppl: 14.903204 +epoch: 0, batch: 41336, sum loss: 5023.877441, avg loss: 2.856099, ppl: 17.393536 +epoch: 0, batch: 41337, sum loss: 4106.689941, avg loss: 2.497987, ppl: 12.157990 +epoch: 0, batch: 41338, sum loss: 3939.969727, avg loss: 2.717221, ppl: 15.138187 +epoch: 0, batch: 41339, sum loss: 6218.867676, avg loss: 3.329158, ppl: 27.914837 +epoch: 0, batch: 41340, sum loss: 5903.964844, avg loss: 3.084621, ppl: 21.859180 +epoch: 0, batch: 41341, sum loss: 5190.442871, avg loss: 3.113643, ppl: 22.502872 +epoch: 0, batch: 41342, sum loss: 5392.321289, avg loss: 3.266094, ppl: 26.208773 +epoch: 0, batch: 41343, sum loss: 5467.348145, avg loss: 3.133151, ppl: 22.946165 +epoch: 0, batch: 41344, sum loss: 4609.930176, avg loss: 2.881206, ppl: 17.835775 +epoch: 0, batch: 41345, sum loss: 4685.669922, avg loss: 2.839800, ppl: 17.112341 +epoch: 0, batch: 41346, sum loss: 4074.346924, avg loss: 2.578701, ppl: 13.180000 +epoch: 0, batch: 41347, sum loss: 5425.113281, avg loss: 2.867396, ppl: 17.591148 +epoch: 0, batch: 41348, sum loss: 4557.001953, avg loss: 3.044089, ppl: 20.990906 +epoch: 0, batch: 41349, sum loss: 6251.522949, avg loss: 3.261097, ppl: 26.078135 +epoch: 0, batch: 41350, sum loss: 4331.753906, avg loss: 2.600092, ppl: 13.464982 +epoch: 0, batch: 41351, sum loss: 5217.352539, avg loss: 2.967777, ppl: 19.448643 +epoch: 0, batch: 41352, sum loss: 4762.030273, avg loss: 3.027355, ppl: 20.642570 +epoch: 0, batch: 41353, sum loss: 4810.252441, avg loss: 2.904742, ppl: 18.260527 +epoch: 0, batch: 41354, sum loss: 5444.718262, avg loss: 2.928842, ppl: 18.705961 +epoch: 0, batch: 41355, sum loss: 3829.741211, avg loss: 2.670670, ppl: 14.449651 +epoch: 0, batch: 41356, sum loss: 5121.314941, avg loss: 2.909838, ppl: 18.353825 +epoch: 0, batch: 41357, sum loss: 4886.699219, avg loss: 2.616006, ppl: 13.680971 +epoch: 0, batch: 41358, sum loss: 3951.283691, avg loss: 2.913926, ppl: 18.429010 +epoch: 0, batch: 41359, sum loss: 5052.385254, avg loss: 2.932319, ppl: 18.771105 +epoch: 0, batch: 41360, sum loss: 4565.908691, avg loss: 2.851911, ppl: 17.320843 +epoch: 0, batch: 41361, sum loss: 3804.919922, avg loss: 2.565691, ppl: 13.009648 +epoch: 0, batch: 41362, sum loss: 5184.211914, avg loss: 2.943902, ppl: 18.989805 +epoch: 0, batch: 41363, sum loss: 4904.549316, avg loss: 2.775636, ppl: 16.048838 +epoch: 0, batch: 41364, sum loss: 4667.360840, avg loss: 2.656437, ppl: 14.245448 +epoch: 0, batch: 41365, sum loss: 4817.958984, avg loss: 2.728176, ppl: 15.304947 +epoch: 0, batch: 41366, sum loss: 4403.535156, avg loss: 2.601025, ppl: 13.477547 +epoch: 0, batch: 41367, sum loss: 5096.133301, avg loss: 2.650095, ppl: 14.155387 +epoch: 0, batch: 41368, sum loss: 4119.020996, avg loss: 2.783122, ppl: 16.169428 +epoch: 0, batch: 41369, sum loss: 5241.049316, avg loss: 2.947722, ppl: 19.062479 +epoch: 0, batch: 41370, sum loss: 4509.891602, avg loss: 2.695691, ppl: 14.815758 +epoch: 0, batch: 41371, sum loss: 5186.823242, avg loss: 2.972391, ppl: 19.538588 +epoch: 0, batch: 41372, sum loss: 5353.385742, avg loss: 3.004145, ppl: 20.168957 +epoch: 0, batch: 41373, sum loss: 4437.056641, avg loss: 2.780111, ppl: 16.120804 +epoch: 0, batch: 41374, sum loss: 5175.854492, avg loss: 3.236932, ppl: 25.455505 +epoch: 0, batch: 41375, sum loss: 4370.649902, avg loss: 2.807097, ppl: 16.561769 +epoch: 0, batch: 41376, sum loss: 5620.636719, avg loss: 2.983353, ppl: 19.753939 +epoch: 0, batch: 41377, sum loss: 5453.413086, avg loss: 2.991450, ppl: 19.914539 +epoch: 0, batch: 41378, sum loss: 4600.076660, avg loss: 2.598913, ppl: 13.449117 +epoch: 0, batch: 41379, sum loss: 4754.259766, avg loss: 2.707437, ppl: 14.990808 +epoch: 0, batch: 41380, sum loss: 4513.308105, avg loss: 2.683299, ppl: 14.633283 +epoch: 0, batch: 41381, sum loss: 5129.012695, avg loss: 2.959615, ppl: 19.290539 +epoch: 0, batch: 41382, sum loss: 5021.939453, avg loss: 2.630665, ppl: 13.883000 +epoch: 0, batch: 41383, sum loss: 5144.587891, avg loss: 3.285177, ppl: 26.713724 +epoch: 0, batch: 41384, sum loss: 5752.242188, avg loss: 3.043514, ppl: 20.978844 +epoch: 0, batch: 41385, sum loss: 4391.626953, avg loss: 2.851706, ppl: 17.317297 +epoch: 0, batch: 41386, sum loss: 4662.549805, avg loss: 2.849969, ppl: 17.287252 +epoch: 0, batch: 41387, sum loss: 4913.409180, avg loss: 2.956323, ppl: 19.227150 +epoch: 0, batch: 41388, sum loss: 4530.477051, avg loss: 2.784559, ppl: 16.192671 +epoch: 0, batch: 41389, sum loss: 5287.729492, avg loss: 2.927868, ppl: 18.687744 +epoch: 0, batch: 41390, sum loss: 4628.625977, avg loss: 2.857177, ppl: 17.412294 +epoch: 0, batch: 41391, sum loss: 4380.579102, avg loss: 2.684178, ppl: 14.646162 +epoch: 0, batch: 41392, sum loss: 4942.363281, avg loss: 2.736635, ppl: 15.434962 +epoch: 0, batch: 41393, sum loss: 5038.195312, avg loss: 2.927481, ppl: 18.680519 +epoch: 0, batch: 41394, sum loss: 5182.556641, avg loss: 3.011364, ppl: 20.315081 +epoch: 0, batch: 41395, sum loss: 4713.740723, avg loss: 2.877741, ppl: 17.774082 +epoch: 0, batch: 41396, sum loss: 4114.238281, avg loss: 2.652636, ppl: 14.191395 +epoch: 0, batch: 41397, sum loss: 4734.194336, avg loss: 2.862270, ppl: 17.501207 +epoch: 0, batch: 41398, sum loss: 5180.017090, avg loss: 2.903597, ppl: 18.239637 +epoch: 0, batch: 41399, sum loss: 4901.315918, avg loss: 2.788007, ppl: 16.248600 +epoch: 0, batch: 41400, sum loss: 4328.617676, avg loss: 2.663765, ppl: 14.350212 +epoch: 0, batch: 41401, sum loss: 3913.247314, avg loss: 2.777323, ppl: 16.075920 +epoch: 0, batch: 41402, sum loss: 4576.752930, avg loss: 2.673337, ppl: 14.488235 +epoch: 0, batch: 41403, sum loss: 5352.097656, avg loss: 2.902439, ppl: 18.218529 +epoch: 0, batch: 41404, sum loss: 4539.795898, avg loss: 2.967187, ppl: 19.437164 +epoch: 0, batch: 41405, sum loss: 4597.297852, avg loss: 2.895024, ppl: 18.083937 +epoch: 0, batch: 41406, sum loss: 5171.633789, avg loss: 2.933428, ppl: 18.791941 +epoch: 0, batch: 41407, sum loss: 4566.287598, avg loss: 2.897391, ppl: 18.126783 +epoch: 0, batch: 41408, sum loss: 5873.005859, avg loss: 2.870482, ppl: 17.645517 +epoch: 0, batch: 41409, sum loss: 4701.216797, avg loss: 2.773579, ppl: 16.015854 +epoch: 0, batch: 41410, sum loss: 4141.931641, avg loss: 2.643224, ppl: 14.058455 +epoch: 0, batch: 41411, sum loss: 5129.523926, avg loss: 3.101284, ppl: 22.226479 +epoch: 0, batch: 41412, sum loss: 5499.334473, avg loss: 3.169645, ppl: 23.799042 +epoch: 0, batch: 41413, sum loss: 5304.802734, avg loss: 3.024403, ppl: 20.581711 +epoch: 0, batch: 41414, sum loss: 3521.572266, avg loss: 2.631967, ppl: 13.901091 +epoch: 0, batch: 41415, sum loss: 5221.125977, avg loss: 2.842203, ppl: 17.153507 +epoch: 0, batch: 41416, sum loss: 3846.524902, avg loss: 2.680505, ppl: 14.592462 +epoch: 0, batch: 41417, sum loss: 5067.954102, avg loss: 2.763334, ppl: 15.852600 +epoch: 0, batch: 41418, sum loss: 4389.542480, avg loss: 2.891662, ppl: 18.023233 +epoch: 0, batch: 41419, sum loss: 4900.787109, avg loss: 2.899874, ppl: 18.171856 +epoch: 0, batch: 41420, sum loss: 4862.749512, avg loss: 2.833770, ppl: 17.009470 +epoch: 0, batch: 41421, sum loss: 4480.052734, avg loss: 2.774026, ppl: 16.023020 +epoch: 0, batch: 41422, sum loss: 5460.306641, avg loss: 3.064145, ppl: 21.416151 +epoch: 0, batch: 41423, sum loss: 4461.056152, avg loss: 2.613390, ppl: 13.645226 +epoch: 0, batch: 41424, sum loss: 5140.124023, avg loss: 3.052330, ppl: 21.164606 +epoch: 0, batch: 41425, sum loss: 4429.648926, avg loss: 2.773731, ppl: 16.018295 +epoch: 0, batch: 41426, sum loss: 3889.869629, avg loss: 2.614160, ppl: 13.655735 +epoch: 0, batch: 41427, sum loss: 4040.857910, avg loss: 2.752628, ppl: 15.683792 +epoch: 0, batch: 41428, sum loss: 5546.805664, avg loss: 3.093590, ppl: 22.056107 +epoch: 0, batch: 41429, sum loss: 4847.538574, avg loss: 2.793970, ppl: 16.345789 +epoch: 0, batch: 41430, sum loss: 4008.222900, avg loss: 2.713760, ppl: 15.085887 +epoch: 0, batch: 41431, sum loss: 4676.524414, avg loss: 2.839420, ppl: 17.105835 +epoch: 0, batch: 41432, sum loss: 4624.395020, avg loss: 2.737949, ppl: 15.455249 +epoch: 0, batch: 41433, sum loss: 4871.946777, avg loss: 2.605319, ppl: 13.535546 +epoch: 0, batch: 41434, sum loss: 3859.580322, avg loss: 2.691479, ppl: 14.753476 +epoch: 0, batch: 41435, sum loss: 5447.812500, avg loss: 2.968835, ppl: 19.469233 +epoch: 0, batch: 41436, sum loss: 3733.527344, avg loss: 2.499014, ppl: 12.170492 +epoch: 0, batch: 41437, sum loss: 5877.161133, avg loss: 3.116204, ppl: 22.560583 +epoch: 0, batch: 41438, sum loss: 4979.754883, avg loss: 2.758867, ppl: 15.781952 +epoch: 0, batch: 41439, sum loss: 3947.435547, avg loss: 2.558286, ppl: 12.913667 +epoch: 0, batch: 41440, sum loss: 4760.052734, avg loss: 2.867502, ppl: 17.593012 +epoch: 0, batch: 41441, sum loss: 4201.989746, avg loss: 2.801327, ppl: 16.466475 +epoch: 0, batch: 41442, sum loss: 4588.337402, avg loss: 2.883933, ppl: 17.884476 +epoch: 0, batch: 41443, sum loss: 5181.160156, avg loss: 3.029918, ppl: 20.695539 +epoch: 0, batch: 41444, sum loss: 4665.604980, avg loss: 2.752569, ppl: 15.682873 +epoch: 0, batch: 41445, sum loss: 4805.645508, avg loss: 2.680226, ppl: 14.588395 +epoch: 0, batch: 41446, sum loss: 4631.352051, avg loss: 2.829170, ppl: 16.931410 +epoch: 0, batch: 41447, sum loss: 5232.458008, avg loss: 2.846822, ppl: 17.232922 +epoch: 0, batch: 41448, sum loss: 5789.424316, avg loss: 2.938794, ppl: 18.893049 +epoch: 0, batch: 41449, sum loss: 4895.423340, avg loss: 2.744071, ppl: 15.550169 +epoch: 0, batch: 41450, sum loss: 5381.093262, avg loss: 3.134009, ppl: 22.965862 +epoch: 0, batch: 41451, sum loss: 4572.519531, avg loss: 2.826032, ppl: 16.878353 +epoch: 0, batch: 41452, sum loss: 4797.012695, avg loss: 2.813497, ppl: 16.668110 +epoch: 0, batch: 41453, sum loss: 5550.020996, avg loss: 3.284036, ppl: 26.683254 +epoch: 0, batch: 41454, sum loss: 5829.706543, avg loss: 2.917771, ppl: 18.500008 +epoch: 0, batch: 41455, sum loss: 5160.287598, avg loss: 3.023015, ppl: 20.553177 +epoch: 0, batch: 41456, sum loss: 5083.225098, avg loss: 2.909688, ppl: 18.351072 +epoch: 0, batch: 41457, sum loss: 4577.298828, avg loss: 2.806437, ppl: 16.550842 +epoch: 0, batch: 41458, sum loss: 5114.768066, avg loss: 2.843117, ppl: 17.169203 +epoch: 0, batch: 41459, sum loss: 4322.621094, avg loss: 2.720340, ppl: 15.185492 +epoch: 0, batch: 41460, sum loss: 5245.079102, avg loss: 2.738945, ppl: 15.470652 +epoch: 0, batch: 41461, sum loss: 4670.348633, avg loss: 2.886495, ppl: 17.930351 +epoch: 0, batch: 41462, sum loss: 4552.398438, avg loss: 2.596919, ppl: 13.422318 +epoch: 0, batch: 41463, sum loss: 4657.276367, avg loss: 2.843270, ppl: 17.171824 +epoch: 0, batch: 41464, sum loss: 5308.130371, avg loss: 2.992182, ppl: 19.929111 +epoch: 0, batch: 41465, sum loss: 4825.387207, avg loss: 3.046330, ppl: 21.037998 +epoch: 0, batch: 41466, sum loss: 4784.870117, avg loss: 2.686620, ppl: 14.681967 +epoch: 0, batch: 41467, sum loss: 4773.040039, avg loss: 2.946321, ppl: 19.035797 +epoch: 0, batch: 41468, sum loss: 4352.925293, avg loss: 2.822909, ppl: 16.825720 +epoch: 0, batch: 41469, sum loss: 4193.083984, avg loss: 2.705216, ppl: 14.957542 +epoch: 0, batch: 41470, sum loss: 4985.109375, avg loss: 2.701956, ppl: 14.908869 +epoch: 0, batch: 41471, sum loss: 4745.039062, avg loss: 2.605733, ppl: 13.541143 +epoch: 0, batch: 41472, sum loss: 5333.024414, avg loss: 2.966087, ppl: 19.415794 +epoch: 0, batch: 41473, sum loss: 4344.466309, avg loss: 2.700103, ppl: 14.881269 +epoch: 0, batch: 41474, sum loss: 5263.711914, avg loss: 2.808811, ppl: 16.590179 +epoch: 0, batch: 41475, sum loss: 4884.204590, avg loss: 2.815104, ppl: 16.694904 +epoch: 0, batch: 41476, sum loss: 5168.867676, avg loss: 3.228525, ppl: 25.242390 +epoch: 0, batch: 41477, sum loss: 4131.187012, avg loss: 2.789458, ppl: 16.272194 +epoch: 0, batch: 41478, sum loss: 5149.692871, avg loss: 2.920983, ppl: 18.559519 +epoch: 0, batch: 41479, sum loss: 4844.925781, avg loss: 2.731074, ppl: 15.349365 +epoch: 0, batch: 41480, sum loss: 4805.789062, avg loss: 2.795689, ppl: 16.373905 +epoch: 0, batch: 41481, sum loss: 4760.184082, avg loss: 2.840205, ppl: 17.119282 +epoch: 0, batch: 41482, sum loss: 5189.217773, avg loss: 2.982309, ppl: 19.733330 +epoch: 0, batch: 41483, sum loss: 5293.729492, avg loss: 2.786174, ppl: 16.218842 +epoch: 0, batch: 41484, sum loss: 5036.132324, avg loss: 2.911059, ppl: 18.376251 +epoch: 0, batch: 41485, sum loss: 4972.560059, avg loss: 2.944085, ppl: 18.993282 +epoch: 0, batch: 41486, sum loss: 5115.081543, avg loss: 3.033856, ppl: 20.777199 +epoch: 0, batch: 41487, sum loss: 5815.336914, avg loss: 3.104825, ppl: 22.305315 +epoch: 0, batch: 41488, sum loss: 4030.625244, avg loss: 2.885201, ppl: 17.907158 +epoch: 0, batch: 41489, sum loss: 4040.174805, avg loss: 2.784407, ppl: 16.190216 +epoch: 0, batch: 41490, sum loss: 5115.329590, avg loss: 2.924717, ppl: 18.628948 +epoch: 0, batch: 41491, sum loss: 5068.845703, avg loss: 2.833340, ppl: 17.002155 +epoch: 0, batch: 41492, sum loss: 5769.359863, avg loss: 2.975431, ppl: 19.598063 +epoch: 0, batch: 41493, sum loss: 5373.284180, avg loss: 3.022095, ppl: 20.534260 +epoch: 0, batch: 41494, sum loss: 5287.625488, avg loss: 2.852009, ppl: 17.322554 +epoch: 0, batch: 41495, sum loss: 5545.781738, avg loss: 2.942059, ppl: 18.954840 +epoch: 0, batch: 41496, sum loss: 4676.379883, avg loss: 2.692216, ppl: 14.764363 +epoch: 0, batch: 41497, sum loss: 4716.498535, avg loss: 2.727877, ppl: 15.300364 +epoch: 0, batch: 41498, sum loss: 4140.041016, avg loss: 2.456998, ppl: 11.669723 +epoch: 0, batch: 41499, sum loss: 3889.510498, avg loss: 2.557206, ppl: 12.899727 +epoch: 0, batch: 41500, sum loss: 6770.805664, avg loss: 3.291592, ppl: 26.885641 +epoch: 0, batch: 41501, sum loss: 5700.748047, avg loss: 3.121987, ppl: 22.691425 +epoch: 0, batch: 41502, sum loss: 5795.932617, avg loss: 3.004631, ppl: 20.178761 +epoch: 0, batch: 41503, sum loss: 4157.849609, avg loss: 2.764528, ppl: 15.871540 +epoch: 0, batch: 41504, sum loss: 4116.366211, avg loss: 2.671231, ppl: 14.457749 +epoch: 0, batch: 41505, sum loss: 5860.870117, avg loss: 2.991766, ppl: 19.920841 +epoch: 0, batch: 41506, sum loss: 4548.198730, avg loss: 2.882255, ppl: 17.854490 +epoch: 0, batch: 41507, sum loss: 5337.126465, avg loss: 2.889619, ppl: 17.986462 +epoch: 0, batch: 41508, sum loss: 6545.013184, avg loss: 3.093107, ppl: 22.045456 +epoch: 0, batch: 41509, sum loss: 5172.553223, avg loss: 2.912474, ppl: 18.402264 +epoch: 0, batch: 41510, sum loss: 5214.252930, avg loss: 3.060008, ppl: 21.327719 +epoch: 0, batch: 41511, sum loss: 4755.447754, avg loss: 2.782591, ppl: 16.160837 +epoch: 0, batch: 41512, sum loss: 4854.370117, avg loss: 2.805994, ppl: 16.543516 +epoch: 0, batch: 41513, sum loss: 4145.075195, avg loss: 2.776340, ppl: 16.060129 +epoch: 0, batch: 41514, sum loss: 4907.370605, avg loss: 3.006967, ppl: 20.225967 +epoch: 0, batch: 41515, sum loss: 5173.605957, avg loss: 2.976758, ppl: 19.624096 +epoch: 0, batch: 41516, sum loss: 4674.479980, avg loss: 2.596933, ppl: 13.422513 +epoch: 0, batch: 41517, sum loss: 5798.182129, avg loss: 2.996476, ppl: 20.014889 +epoch: 0, batch: 41518, sum loss: 5066.506836, avg loss: 2.883612, ppl: 17.878738 +epoch: 0, batch: 41519, sum loss: 3925.983398, avg loss: 2.586287, ppl: 13.280367 +epoch: 0, batch: 41520, sum loss: 4150.666504, avg loss: 2.652183, ppl: 14.184972 +epoch: 0, batch: 41521, sum loss: 5206.307617, avg loss: 2.918334, ppl: 18.510424 +epoch: 0, batch: 41522, sum loss: 4660.871582, avg loss: 3.149238, ppl: 23.318281 +epoch: 0, batch: 41523, sum loss: 4447.628906, avg loss: 2.608580, ppl: 13.579756 +epoch: 0, batch: 41524, sum loss: 4514.920410, avg loss: 2.844941, ppl: 17.200539 +epoch: 0, batch: 41525, sum loss: 5050.840332, avg loss: 3.129393, ppl: 22.860102 +epoch: 0, batch: 41526, sum loss: 4730.072266, avg loss: 2.670848, ppl: 14.452221 +epoch: 0, batch: 41527, sum loss: 4634.192383, avg loss: 2.781628, ppl: 16.145283 +epoch: 0, batch: 41528, sum loss: 3873.046875, avg loss: 2.587206, ppl: 13.292576 +epoch: 0, batch: 41529, sum loss: 4850.021484, avg loss: 2.937627, ppl: 18.871010 +epoch: 0, batch: 41530, sum loss: 4321.440430, avg loss: 2.906147, ppl: 18.286201 +epoch: 0, batch: 41531, sum loss: 4501.931152, avg loss: 3.025491, ppl: 20.604128 +epoch: 0, batch: 41532, sum loss: 6013.170410, avg loss: 3.041563, ppl: 20.937950 +epoch: 0, batch: 41533, sum loss: 4980.136230, avg loss: 2.749937, ppl: 15.641651 +epoch: 0, batch: 41534, sum loss: 4657.506348, avg loss: 2.788926, ppl: 16.263546 +epoch: 0, batch: 41535, sum loss: 5147.867188, avg loss: 2.885576, ppl: 17.913883 +epoch: 0, batch: 41536, sum loss: 6416.477051, avg loss: 2.974723, ppl: 19.584190 +epoch: 0, batch: 41537, sum loss: 5108.608398, avg loss: 3.062715, ppl: 21.385542 +epoch: 0, batch: 41538, sum loss: 6157.452637, avg loss: 3.403788, ppl: 30.077829 +epoch: 0, batch: 41539, sum loss: 5671.234863, avg loss: 3.002242, ppl: 20.130617 +epoch: 0, batch: 41540, sum loss: 4317.367188, avg loss: 2.805307, ppl: 16.532148 +epoch: 0, batch: 41541, sum loss: 4533.526855, avg loss: 2.801933, ppl: 16.476458 +epoch: 0, batch: 41542, sum loss: 4505.926758, avg loss: 3.144401, ppl: 23.205778 +epoch: 0, batch: 41543, sum loss: 5247.593750, avg loss: 3.040321, ppl: 20.911947 +epoch: 0, batch: 41544, sum loss: 5207.565430, avg loss: 3.018879, ppl: 20.468328 +epoch: 0, batch: 41545, sum loss: 5698.463867, avg loss: 3.100361, ppl: 22.205969 +epoch: 0, batch: 41546, sum loss: 4995.633789, avg loss: 2.945539, ppl: 19.020906 +epoch: 0, batch: 41547, sum loss: 4690.788086, avg loss: 2.798800, ppl: 16.424921 +epoch: 0, batch: 41548, sum loss: 5283.733398, avg loss: 2.968390, ppl: 19.460554 +epoch: 0, batch: 41549, sum loss: 5922.593262, avg loss: 2.962778, ppl: 19.351658 +epoch: 0, batch: 41550, sum loss: 3521.873535, avg loss: 2.425533, ppl: 11.308253 +epoch: 0, batch: 41551, sum loss: 4390.126465, avg loss: 2.980398, ppl: 19.695658 +epoch: 0, batch: 41552, sum loss: 4601.069824, avg loss: 2.868498, ppl: 17.610540 +epoch: 0, batch: 41553, sum loss: 4567.000488, avg loss: 2.638360, ppl: 13.990234 +epoch: 0, batch: 41554, sum loss: 5351.396973, avg loss: 3.008093, ppl: 20.248741 +epoch: 0, batch: 41555, sum loss: 5042.514648, avg loss: 2.933400, ppl: 18.791416 +epoch: 0, batch: 41556, sum loss: 4771.653320, avg loss: 2.753406, ppl: 15.696006 +epoch: 0, batch: 41557, sum loss: 5069.346680, avg loss: 2.931953, ppl: 18.764240 +epoch: 0, batch: 41558, sum loss: 4993.527344, avg loss: 2.899842, ppl: 18.171267 +epoch: 0, batch: 41559, sum loss: 3712.341797, avg loss: 2.499893, ppl: 12.181195 +epoch: 0, batch: 41560, sum loss: 5410.593262, avg loss: 2.801965, ppl: 16.476984 +epoch: 0, batch: 41561, sum loss: 5017.641602, avg loss: 2.798462, ppl: 16.419369 +epoch: 0, batch: 41562, sum loss: 6031.778809, avg loss: 3.099578, ppl: 22.188589 +epoch: 0, batch: 41563, sum loss: 5593.241211, avg loss: 3.018479, ppl: 20.460146 +epoch: 0, batch: 41564, sum loss: 4662.475586, avg loss: 2.930532, ppl: 18.737587 +epoch: 0, batch: 41565, sum loss: 4052.067383, avg loss: 2.669346, ppl: 14.430530 +epoch: 0, batch: 41566, sum loss: 3575.390381, avg loss: 2.452257, ppl: 11.614528 +epoch: 0, batch: 41567, sum loss: 4500.746094, avg loss: 2.675830, ppl: 14.524399 +epoch: 0, batch: 41568, sum loss: 5652.547852, avg loss: 3.095590, ppl: 22.100281 +epoch: 0, batch: 41569, sum loss: 5168.471191, avg loss: 2.858668, ppl: 17.438276 +epoch: 0, batch: 41570, sum loss: 4695.432129, avg loss: 2.971792, ppl: 19.526890 +epoch: 0, batch: 41571, sum loss: 4703.569336, avg loss: 2.923287, ppl: 18.602341 +epoch: 0, batch: 41572, sum loss: 4793.900879, avg loss: 2.714553, ppl: 15.097861 +epoch: 0, batch: 41573, sum loss: 4665.117188, avg loss: 2.941436, ppl: 18.943026 +epoch: 0, batch: 41574, sum loss: 4840.129883, avg loss: 3.025081, ppl: 20.595676 +epoch: 0, batch: 41575, sum loss: 4248.302246, avg loss: 2.802310, ppl: 16.482681 +epoch: 0, batch: 41576, sum loss: 4891.462891, avg loss: 3.015698, ppl: 20.403336 +epoch: 0, batch: 41577, sum loss: 5955.578125, avg loss: 2.939575, ppl: 18.907803 +epoch: 0, batch: 41578, sum loss: 5079.662598, avg loss: 2.886172, ppl: 17.924561 +epoch: 0, batch: 41579, sum loss: 4643.850586, avg loss: 2.760910, ppl: 15.814228 +epoch: 0, batch: 41580, sum loss: 5765.912109, avg loss: 3.182071, ppl: 24.096600 +epoch: 0, batch: 41581, sum loss: 4611.625488, avg loss: 2.907708, ppl: 18.314781 +epoch: 0, batch: 41582, sum loss: 4627.175293, avg loss: 3.016412, ppl: 20.417891 +epoch: 0, batch: 41583, sum loss: 4845.226562, avg loss: 2.691792, ppl: 14.758106 +epoch: 0, batch: 41584, sum loss: 4097.971680, avg loss: 2.642148, ppl: 14.043337 +epoch: 0, batch: 41585, sum loss: 5039.800781, avg loss: 3.036025, ppl: 20.822300 +epoch: 0, batch: 41586, sum loss: 3826.925781, avg loss: 2.733518, ppl: 15.386929 +epoch: 0, batch: 41587, sum loss: 5450.903320, avg loss: 2.960838, ppl: 19.314156 +epoch: 0, batch: 41588, sum loss: 5315.395020, avg loss: 2.791699, ppl: 16.308708 +epoch: 0, batch: 41589, sum loss: 5040.858887, avg loss: 2.927328, ppl: 18.677654 +epoch: 0, batch: 41590, sum loss: 4812.732910, avg loss: 2.976335, ppl: 19.615784 +epoch: 0, batch: 41591, sum loss: 4548.575195, avg loss: 2.663100, ppl: 14.340676 +epoch: 0, batch: 41592, sum loss: 4819.013184, avg loss: 3.044228, ppl: 20.993820 +epoch: 0, batch: 41593, sum loss: 5378.960938, avg loss: 2.973445, ppl: 19.559179 +epoch: 0, batch: 41594, sum loss: 4509.891602, avg loss: 2.845357, ppl: 17.207708 +epoch: 0, batch: 41595, sum loss: 5226.867676, avg loss: 2.961398, ppl: 19.324972 +epoch: 0, batch: 41596, sum loss: 5462.180664, avg loss: 3.119464, ppl: 22.634237 +epoch: 0, batch: 41597, sum loss: 4013.811035, avg loss: 2.581229, ppl: 13.213370 +epoch: 0, batch: 41598, sum loss: 5712.257324, avg loss: 2.901096, ppl: 18.194069 +epoch: 0, batch: 41599, sum loss: 4902.987305, avg loss: 2.800107, ppl: 16.446407 +epoch: 0, batch: 41600, sum loss: 3997.555420, avg loss: 2.633436, ppl: 13.921518 +epoch: 0, batch: 41601, sum loss: 5809.725586, avg loss: 2.857710, ppl: 17.421593 +epoch: 0, batch: 41602, sum loss: 4434.628906, avg loss: 2.778589, ppl: 16.096300 +epoch: 0, batch: 41603, sum loss: 5766.414551, avg loss: 3.105231, ppl: 22.314379 +epoch: 0, batch: 41604, sum loss: 5243.918457, avg loss: 3.082844, ppl: 21.820383 +epoch: 0, batch: 41605, sum loss: 5208.787109, avg loss: 2.974750, ppl: 19.584726 +epoch: 0, batch: 41606, sum loss: 5196.693359, avg loss: 3.132425, ppl: 22.929518 +epoch: 0, batch: 41607, sum loss: 4679.253906, avg loss: 2.791918, ppl: 16.312273 +epoch: 0, batch: 41608, sum loss: 4471.414551, avg loss: 2.746569, ppl: 15.589056 +epoch: 0, batch: 41609, sum loss: 4591.062988, avg loss: 2.790920, ppl: 16.295998 +epoch: 0, batch: 41610, sum loss: 4477.280762, avg loss: 2.800050, ppl: 16.445477 +epoch: 0, batch: 41611, sum loss: 4514.487305, avg loss: 2.546242, ppl: 12.759068 +epoch: 0, batch: 41612, sum loss: 4273.749023, avg loss: 2.787834, ppl: 16.245789 +epoch: 0, batch: 41613, sum loss: 4111.822754, avg loss: 2.590941, ppl: 13.342317 +epoch: 0, batch: 41614, sum loss: 4774.691895, avg loss: 2.965647, ppl: 19.407255 +epoch: 0, batch: 41615, sum loss: 5265.699219, avg loss: 2.964921, ppl: 19.393166 +epoch: 0, batch: 41616, sum loss: 4988.173340, avg loss: 2.881671, ppl: 17.844074 +epoch: 0, batch: 41617, sum loss: 4898.554688, avg loss: 2.976036, ppl: 19.609926 +epoch: 0, batch: 41618, sum loss: 4425.994629, avg loss: 2.842643, ppl: 17.161058 +epoch: 0, batch: 41619, sum loss: 5210.205078, avg loss: 2.945283, ppl: 19.016043 +epoch: 0, batch: 41620, sum loss: 5260.568359, avg loss: 3.069176, ppl: 21.524168 +epoch: 0, batch: 41621, sum loss: 5377.349609, avg loss: 3.043209, ppl: 20.972427 +epoch: 0, batch: 41622, sum loss: 5263.656738, avg loss: 3.070978, ppl: 21.562990 +epoch: 0, batch: 41623, sum loss: 5729.178223, avg loss: 3.076895, ppl: 21.690947 +epoch: 0, batch: 41624, sum loss: 5466.978516, avg loss: 3.221555, ppl: 25.067068 +epoch: 0, batch: 41625, sum loss: 4379.101074, avg loss: 2.858421, ppl: 17.433979 +epoch: 0, batch: 41626, sum loss: 4786.852051, avg loss: 2.876714, ppl: 17.755831 +epoch: 0, batch: 41627, sum loss: 4610.387695, avg loss: 2.812927, ppl: 16.658607 +epoch: 0, batch: 41628, sum loss: 5532.152344, avg loss: 2.966301, ppl: 19.419960 +epoch: 0, batch: 41629, sum loss: 5545.870117, avg loss: 2.986467, ppl: 19.815557 +epoch: 0, batch: 41630, sum loss: 5048.862793, avg loss: 2.772577, ppl: 15.999813 +epoch: 0, batch: 41631, sum loss: 4788.178711, avg loss: 2.961150, ppl: 19.320171 +epoch: 0, batch: 41632, sum loss: 5095.208496, avg loss: 2.976173, ppl: 19.612619 +epoch: 0, batch: 41633, sum loss: 4677.046875, avg loss: 2.869354, ppl: 17.625629 +epoch: 0, batch: 41634, sum loss: 3912.795654, avg loss: 2.610271, ppl: 13.602734 +epoch: 0, batch: 41635, sum loss: 5454.308105, avg loss: 2.922995, ppl: 18.596895 +epoch: 0, batch: 41636, sum loss: 4683.681641, avg loss: 2.813022, ppl: 16.660192 +epoch: 0, batch: 41637, sum loss: 4334.480957, avg loss: 2.675606, ppl: 14.521140 +epoch: 0, batch: 41638, sum loss: 5581.839844, avg loss: 2.980160, ppl: 19.690971 +epoch: 0, batch: 41639, sum loss: 5033.307129, avg loss: 2.976527, ppl: 19.619564 +epoch: 0, batch: 41640, sum loss: 5567.998047, avg loss: 3.170842, ppl: 23.827538 +epoch: 0, batch: 41641, sum loss: 5140.063965, avg loss: 2.799599, ppl: 16.438053 +epoch: 0, batch: 41642, sum loss: 4753.519531, avg loss: 2.916270, ppl: 18.472248 +epoch: 0, batch: 41643, sum loss: 4425.000977, avg loss: 2.818472, ppl: 16.751234 +epoch: 0, batch: 41644, sum loss: 5591.850586, avg loss: 2.924608, ppl: 18.626923 +epoch: 0, batch: 41645, sum loss: 4505.260254, avg loss: 2.678514, ppl: 14.563433 +epoch: 0, batch: 41646, sum loss: 4130.032227, avg loss: 2.864100, ppl: 17.533270 +epoch: 0, batch: 41647, sum loss: 5884.732422, avg loss: 3.168946, ppl: 23.782400 +epoch: 0, batch: 41648, sum loss: 5581.861816, avg loss: 2.991351, ppl: 19.912573 +epoch: 0, batch: 41649, sum loss: 4966.885742, avg loss: 2.975965, ppl: 19.608541 +epoch: 0, batch: 41650, sum loss: 4772.979492, avg loss: 2.999987, ppl: 20.085278 +epoch: 0, batch: 41651, sum loss: 4563.093750, avg loss: 3.089434, ppl: 21.964640 +epoch: 0, batch: 41652, sum loss: 4181.629883, avg loss: 2.699567, ppl: 14.873295 +epoch: 0, batch: 41653, sum loss: 5839.184570, avg loss: 3.096068, ppl: 22.110849 +epoch: 0, batch: 41654, sum loss: 5890.621582, avg loss: 3.155127, ppl: 23.456005 +epoch: 0, batch: 41655, sum loss: 5782.327148, avg loss: 3.010061, ppl: 20.288637 +epoch: 0, batch: 41656, sum loss: 5311.054199, avg loss: 2.668871, ppl: 14.423681 +epoch: 0, batch: 41657, sum loss: 4701.722168, avg loss: 2.818778, ppl: 16.756367 +epoch: 0, batch: 41658, sum loss: 4624.244629, avg loss: 2.782337, ppl: 16.156742 +epoch: 0, batch: 41659, sum loss: 4712.726074, avg loss: 2.741551, ppl: 15.511026 +epoch: 0, batch: 41660, sum loss: 4169.812012, avg loss: 2.794780, ppl: 16.359034 +epoch: 0, batch: 41661, sum loss: 5045.250488, avg loss: 2.866620, ppl: 17.577499 +epoch: 0, batch: 41662, sum loss: 5036.550293, avg loss: 2.943630, ppl: 18.984631 +epoch: 0, batch: 41663, sum loss: 5289.531250, avg loss: 2.945174, ppl: 19.013962 +epoch: 0, batch: 41664, sum loss: 4641.697266, avg loss: 2.740081, ppl: 15.488240 +epoch: 0, batch: 41665, sum loss: 4186.631348, avg loss: 2.651445, ppl: 14.174501 +epoch: 0, batch: 41666, sum loss: 4878.486328, avg loss: 2.921249, ppl: 18.564466 +epoch: 0, batch: 41667, sum loss: 4715.857422, avg loss: 2.815438, ppl: 16.700481 +epoch: 0, batch: 41668, sum loss: 5174.715820, avg loss: 2.948556, ppl: 19.078384 +epoch: 0, batch: 41669, sum loss: 4741.901855, avg loss: 2.822561, ppl: 16.819868 +epoch: 0, batch: 41670, sum loss: 5285.761719, avg loss: 2.840281, ppl: 17.120567 +epoch: 0, batch: 41671, sum loss: 5617.354980, avg loss: 2.997521, ppl: 20.035810 +epoch: 0, batch: 41672, sum loss: 4570.651367, avg loss: 2.952617, ppl: 19.156023 +epoch: 0, batch: 41673, sum loss: 4906.088867, avg loss: 3.085590, ppl: 21.880381 +epoch: 0, batch: 41674, sum loss: 4542.412598, avg loss: 2.891415, ppl: 18.018785 +epoch: 0, batch: 41675, sum loss: 5263.635742, avg loss: 2.846747, ppl: 17.231638 +epoch: 0, batch: 41676, sum loss: 4225.834961, avg loss: 2.554919, ppl: 12.870251 +epoch: 0, batch: 41677, sum loss: 4502.250977, avg loss: 2.853137, ppl: 17.342108 +epoch: 0, batch: 41678, sum loss: 4258.122070, avg loss: 3.043690, ppl: 20.982525 +epoch: 0, batch: 41679, sum loss: 5740.141113, avg loss: 2.924168, ppl: 18.618727 +epoch: 0, batch: 41680, sum loss: 5556.402832, avg loss: 3.064756, ppl: 21.429241 +epoch: 0, batch: 41681, sum loss: 4785.816406, avg loss: 2.912852, ppl: 18.409233 +epoch: 0, batch: 41682, sum loss: 6013.954590, avg loss: 3.098379, ppl: 22.162006 +epoch: 0, batch: 41683, sum loss: 4617.588379, avg loss: 2.884190, ppl: 17.889078 +epoch: 0, batch: 41684, sum loss: 4641.068359, avg loss: 2.795824, ppl: 16.376122 +epoch: 0, batch: 41685, sum loss: 6035.153320, avg loss: 2.941108, ppl: 18.936817 +epoch: 0, batch: 41686, sum loss: 5539.939453, avg loss: 3.140555, ppl: 23.116701 +epoch: 0, batch: 41687, sum loss: 4645.273438, avg loss: 2.796673, ppl: 16.390024 +epoch: 0, batch: 41688, sum loss: 4992.772461, avg loss: 2.924881, ppl: 18.632017 +epoch: 0, batch: 41689, sum loss: 4957.480469, avg loss: 3.058285, ppl: 21.291018 +epoch: 0, batch: 41690, sum loss: 5624.335938, avg loss: 3.014113, ppl: 20.371023 +epoch: 0, batch: 41691, sum loss: 4496.315918, avg loss: 2.893382, ppl: 18.054272 +epoch: 0, batch: 41692, sum loss: 4377.242188, avg loss: 2.879765, ppl: 17.810083 +epoch: 0, batch: 41693, sum loss: 4597.330078, avg loss: 2.662033, ppl: 14.325377 +epoch: 0, batch: 41694, sum loss: 4763.912109, avg loss: 2.837351, ppl: 17.070484 +epoch: 0, batch: 41695, sum loss: 4333.021484, avg loss: 2.726886, ppl: 15.285212 +epoch: 0, batch: 41696, sum loss: 5335.493164, avg loss: 2.835012, ppl: 17.030607 +epoch: 0, batch: 41697, sum loss: 4841.271484, avg loss: 2.881709, ppl: 17.844749 +epoch: 0, batch: 41698, sum loss: 4147.844727, avg loss: 2.770771, ppl: 15.970947 +epoch: 0, batch: 41699, sum loss: 4993.758789, avg loss: 3.011917, ppl: 20.326336 +epoch: 0, batch: 41700, sum loss: 5205.501953, avg loss: 3.065667, ppl: 21.448757 +epoch: 0, batch: 41701, sum loss: 5187.873535, avg loss: 3.060692, ppl: 21.342327 +epoch: 0, batch: 41702, sum loss: 4119.250000, avg loss: 2.699378, ppl: 14.870473 +epoch: 0, batch: 41703, sum loss: 5101.286133, avg loss: 2.790638, ppl: 16.291410 +epoch: 0, batch: 41704, sum loss: 4588.887207, avg loss: 2.677297, ppl: 14.545725 +epoch: 0, batch: 41705, sum loss: 4630.361816, avg loss: 2.695205, ppl: 14.808550 +epoch: 0, batch: 41706, sum loss: 5119.652344, avg loss: 2.762899, ppl: 15.845716 +epoch: 0, batch: 41707, sum loss: 4601.417969, avg loss: 2.785362, ppl: 16.205683 +epoch: 0, batch: 41708, sum loss: 6004.487793, avg loss: 3.219564, ppl: 25.017221 +epoch: 0, batch: 41709, sum loss: 5171.557617, avg loss: 2.973869, ppl: 19.567476 +epoch: 0, batch: 41710, sum loss: 5212.513184, avg loss: 3.064382, ppl: 21.421211 +epoch: 0, batch: 41711, sum loss: 5175.220703, avg loss: 2.779388, ppl: 16.109161 +epoch: 0, batch: 41712, sum loss: 4900.666504, avg loss: 2.938050, ppl: 18.878988 +epoch: 0, batch: 41713, sum loss: 4807.905273, avg loss: 2.899822, ppl: 18.170916 +epoch: 0, batch: 41714, sum loss: 4031.819336, avg loss: 2.894343, ppl: 18.071619 +epoch: 0, batch: 41715, sum loss: 4948.998047, avg loss: 2.852448, ppl: 17.330162 +epoch: 0, batch: 41716, sum loss: 4940.987305, avg loss: 2.766510, ppl: 15.903039 +epoch: 0, batch: 41717, sum loss: 5194.222656, avg loss: 3.030468, ppl: 20.706926 +epoch: 0, batch: 41718, sum loss: 5371.164062, avg loss: 2.965856, ppl: 19.411304 +epoch: 0, batch: 41719, sum loss: 4496.170410, avg loss: 2.827780, ppl: 16.907885 +epoch: 0, batch: 41720, sum loss: 5594.197266, avg loss: 3.051935, ppl: 21.156246 +epoch: 0, batch: 41721, sum loss: 5066.465820, avg loss: 2.901756, ppl: 18.206089 +epoch: 0, batch: 41722, sum loss: 4239.851562, avg loss: 2.607535, ppl: 13.565566 +epoch: 0, batch: 41723, sum loss: 5847.283691, avg loss: 3.066221, ppl: 21.460655 +epoch: 0, batch: 41724, sum loss: 3993.204346, avg loss: 2.651530, ppl: 14.175714 +epoch: 0, batch: 41725, sum loss: 4318.807617, avg loss: 2.784531, ppl: 16.192221 +epoch: 0, batch: 41726, sum loss: 5285.126953, avg loss: 3.008040, ppl: 20.247679 +epoch: 0, batch: 41727, sum loss: 4453.104492, avg loss: 3.004794, ppl: 20.182060 +epoch: 0, batch: 41728, sum loss: 5276.117188, avg loss: 3.089061, ppl: 21.956463 +epoch: 0, batch: 41729, sum loss: 5396.413086, avg loss: 2.614541, ppl: 13.660949 +epoch: 0, batch: 41730, sum loss: 4999.878418, avg loss: 2.963769, ppl: 19.370848 +epoch: 0, batch: 41731, sum loss: 4841.179688, avg loss: 2.677644, ppl: 14.550764 +epoch: 0, batch: 41732, sum loss: 4311.815430, avg loss: 2.889957, ppl: 17.992531 +epoch: 0, batch: 41733, sum loss: 4159.527344, avg loss: 2.906728, ppl: 18.296829 +epoch: 0, batch: 41734, sum loss: 4022.344727, avg loss: 2.676211, ppl: 14.529930 +epoch: 0, batch: 41735, sum loss: 4006.212891, avg loss: 2.686930, ppl: 14.686525 +epoch: 0, batch: 41736, sum loss: 4886.829590, avg loss: 2.842833, ppl: 17.164324 +epoch: 0, batch: 41737, sum loss: 4697.209961, avg loss: 2.969159, ppl: 19.475536 +epoch: 0, batch: 41738, sum loss: 4506.024414, avg loss: 2.737561, ppl: 15.449251 +epoch: 0, batch: 41739, sum loss: 3896.938477, avg loss: 2.685692, ppl: 14.668343 +epoch: 0, batch: 41740, sum loss: 3746.500977, avg loss: 2.601737, ppl: 13.487142 +epoch: 0, batch: 41741, sum loss: 4509.348633, avg loss: 2.587119, ppl: 13.291425 +epoch: 0, batch: 41742, sum loss: 4789.542969, avg loss: 2.784618, ppl: 16.193628 +epoch: 0, batch: 41743, sum loss: 4961.437500, avg loss: 2.777961, ppl: 16.086180 +epoch: 0, batch: 41744, sum loss: 5840.160156, avg loss: 3.175726, ppl: 23.944201 +epoch: 0, batch: 41745, sum loss: 4335.077148, avg loss: 2.895843, ppl: 18.098753 +epoch: 0, batch: 41746, sum loss: 5033.535645, avg loss: 2.987262, ppl: 19.831301 +epoch: 0, batch: 41747, sum loss: 4616.513184, avg loss: 2.726824, ppl: 15.284271 +epoch: 0, batch: 41748, sum loss: 4706.526367, avg loss: 2.965675, ppl: 19.407806 +epoch: 0, batch: 41749, sum loss: 4429.448242, avg loss: 3.015281, ppl: 20.394825 +epoch: 0, batch: 41750, sum loss: 4858.712891, avg loss: 2.841352, ppl: 17.138929 +epoch: 0, batch: 41751, sum loss: 4083.156738, avg loss: 2.656576, ppl: 14.247417 +epoch: 0, batch: 41752, sum loss: 4588.829590, avg loss: 2.825634, ppl: 16.871637 +epoch: 0, batch: 41753, sum loss: 4866.247559, avg loss: 2.889696, ppl: 17.987839 +epoch: 0, batch: 41754, sum loss: 3830.422607, avg loss: 2.921756, ppl: 18.573879 +epoch: 0, batch: 41755, sum loss: 4705.496094, avg loss: 2.997131, ppl: 20.028002 +epoch: 0, batch: 41756, sum loss: 4961.767578, avg loss: 2.659039, ppl: 14.282560 +epoch: 0, batch: 41757, sum loss: 4503.593262, avg loss: 3.118832, ppl: 22.619942 +epoch: 0, batch: 41758, sum loss: 4905.931152, avg loss: 2.825997, ppl: 16.877766 +epoch: 0, batch: 41759, sum loss: 5414.150391, avg loss: 2.979720, ppl: 19.682299 +epoch: 0, batch: 41760, sum loss: 3774.204102, avg loss: 2.565740, ppl: 13.010287 +epoch: 0, batch: 41761, sum loss: 4975.914062, avg loss: 2.944328, ppl: 18.997889 +epoch: 0, batch: 41762, sum loss: 4682.841309, avg loss: 2.815900, ppl: 16.708204 +epoch: 0, batch: 41763, sum loss: 3983.485352, avg loss: 2.571650, ppl: 13.087399 +epoch: 0, batch: 41764, sum loss: 4683.912598, avg loss: 2.849095, ppl: 17.272150 +epoch: 0, batch: 41765, sum loss: 5287.186523, avg loss: 2.810838, ppl: 16.623842 +epoch: 0, batch: 41766, sum loss: 3946.072998, avg loss: 2.606389, ppl: 13.550034 +epoch: 0, batch: 41767, sum loss: 4999.294922, avg loss: 2.842124, ppl: 17.152149 +epoch: 0, batch: 41768, sum loss: 4107.397461, avg loss: 2.805599, ppl: 16.536982 +epoch: 0, batch: 41769, sum loss: 5085.993652, avg loss: 2.737349, ppl: 15.445976 +epoch: 0, batch: 41770, sum loss: 4328.668945, avg loss: 2.796298, ppl: 16.383879 +epoch: 0, batch: 41771, sum loss: 5466.883789, avg loss: 2.900204, ppl: 18.177849 +epoch: 0, batch: 41772, sum loss: 5398.162109, avg loss: 2.882094, ppl: 17.851618 +epoch: 0, batch: 41773, sum loss: 5863.471680, avg loss: 3.143953, ppl: 23.195368 +epoch: 0, batch: 41774, sum loss: 5137.876953, avg loss: 2.968155, ppl: 19.455997 +epoch: 0, batch: 41775, sum loss: 4419.692871, avg loss: 2.602882, ppl: 13.502592 +epoch: 0, batch: 41776, sum loss: 4224.427734, avg loss: 2.675382, ppl: 14.517890 +epoch: 0, batch: 41777, sum loss: 4963.281250, avg loss: 2.751265, ppl: 15.662426 +epoch: 0, batch: 41778, sum loss: 4405.119629, avg loss: 2.884820, ppl: 17.900341 +epoch: 0, batch: 41779, sum loss: 4704.937500, avg loss: 2.586552, ppl: 13.283885 +epoch: 0, batch: 41780, sum loss: 4908.407227, avg loss: 2.868736, ppl: 17.614744 +epoch: 0, batch: 41781, sum loss: 3893.117188, avg loss: 2.600613, ppl: 13.471989 +epoch: 0, batch: 41782, sum loss: 5832.563965, avg loss: 3.115686, ppl: 22.548893 +epoch: 0, batch: 41783, sum loss: 3876.107666, avg loss: 2.665824, ppl: 14.379789 +epoch: 0, batch: 41784, sum loss: 6009.577637, avg loss: 3.174632, ppl: 23.918007 +epoch: 0, batch: 41785, sum loss: 4926.251465, avg loss: 2.813393, ppl: 16.666378 +epoch: 0, batch: 41786, sum loss: 5889.752930, avg loss: 3.054851, ppl: 21.218031 +epoch: 0, batch: 41787, sum loss: 3902.489258, avg loss: 2.871589, ppl: 17.665064 +epoch: 0, batch: 41788, sum loss: 5537.571777, avg loss: 3.255480, ppl: 25.932060 +epoch: 0, batch: 41789, sum loss: 3662.147461, avg loss: 2.617689, ppl: 13.704022 +epoch: 0, batch: 41790, sum loss: 4137.852051, avg loss: 2.777082, ppl: 16.072054 +epoch: 0, batch: 41791, sum loss: 4272.563477, avg loss: 2.948629, ppl: 19.079781 +epoch: 0, batch: 41792, sum loss: 5339.813477, avg loss: 2.903651, ppl: 18.240616 +epoch: 0, batch: 41793, sum loss: 5135.845215, avg loss: 2.955032, ppl: 19.202335 +epoch: 0, batch: 41794, sum loss: 4581.381348, avg loss: 2.906968, ppl: 18.301224 +epoch: 0, batch: 41795, sum loss: 4769.797363, avg loss: 2.931652, ppl: 18.758587 +epoch: 0, batch: 41796, sum loss: 5187.573242, avg loss: 2.974526, ppl: 19.580339 +epoch: 0, batch: 41797, sum loss: 4653.330078, avg loss: 2.904700, ppl: 18.259769 +epoch: 0, batch: 41798, sum loss: 4437.023438, avg loss: 2.664879, ppl: 14.366212 +epoch: 0, batch: 41799, sum loss: 5125.710449, avg loss: 3.067451, ppl: 21.487062 +epoch: 0, batch: 41800, sum loss: 4825.588379, avg loss: 2.807207, ppl: 16.563589 +epoch: 0, batch: 41801, sum loss: 4177.031250, avg loss: 2.757116, ppl: 15.754347 +epoch: 0, batch: 41802, sum loss: 5435.156250, avg loss: 3.086403, ppl: 21.898170 +epoch: 0, batch: 41803, sum loss: 4649.819336, avg loss: 2.873807, ppl: 17.704285 +epoch: 0, batch: 41804, sum loss: 4278.480469, avg loss: 2.634532, ppl: 13.936791 +epoch: 0, batch: 41805, sum loss: 4556.847168, avg loss: 2.848029, ppl: 17.253748 +epoch: 0, batch: 41806, sum loss: 6421.729492, avg loss: 3.220526, ppl: 25.041300 +epoch: 0, batch: 41807, sum loss: 4747.027344, avg loss: 2.751900, ppl: 15.672380 +epoch: 0, batch: 41808, sum loss: 4474.555176, avg loss: 2.934134, ppl: 18.805220 +epoch: 0, batch: 41809, sum loss: 6199.723633, avg loss: 3.134339, ppl: 22.973454 +epoch: 0, batch: 41810, sum loss: 4115.029297, avg loss: 2.651437, ppl: 14.174386 +epoch: 0, batch: 41811, sum loss: 4694.132812, avg loss: 2.647565, ppl: 14.119617 +epoch: 0, batch: 41812, sum loss: 4138.610840, avg loss: 2.642791, ppl: 14.052369 +epoch: 0, batch: 41813, sum loss: 4861.813965, avg loss: 2.820078, ppl: 16.778158 +epoch: 0, batch: 41814, sum loss: 4687.373047, avg loss: 2.827125, ppl: 16.896811 +epoch: 0, batch: 41815, sum loss: 3988.774170, avg loss: 2.750879, ppl: 15.656384 +epoch: 0, batch: 41816, sum loss: 5202.170898, avg loss: 2.850505, ppl: 17.296509 +epoch: 0, batch: 41817, sum loss: 4198.861328, avg loss: 2.566541, ppl: 13.020710 +epoch: 0, batch: 41818, sum loss: 4040.168213, avg loss: 2.616689, ppl: 13.690326 +epoch: 0, batch: 41819, sum loss: 3775.938477, avg loss: 2.677971, ppl: 14.555525 +epoch: 0, batch: 41820, sum loss: 5600.443848, avg loss: 2.941410, ppl: 18.942533 +epoch: 0, batch: 41821, sum loss: 4679.687988, avg loss: 2.883357, ppl: 17.874168 +epoch: 0, batch: 41822, sum loss: 4814.681152, avg loss: 2.809032, ppl: 16.593851 +epoch: 0, batch: 41823, sum loss: 5769.622559, avg loss: 3.108633, ppl: 22.390411 +epoch: 0, batch: 41824, sum loss: 4975.293945, avg loss: 2.760984, ppl: 15.815404 +epoch: 0, batch: 41825, sum loss: 6747.137207, avg loss: 3.209865, ppl: 24.775749 +epoch: 0, batch: 41826, sum loss: 5039.863281, avg loss: 2.962883, ppl: 19.353680 +epoch: 0, batch: 41827, sum loss: 4903.467285, avg loss: 2.915260, ppl: 18.453608 +epoch: 0, batch: 41828, sum loss: 4299.505371, avg loss: 2.554667, ppl: 12.867021 +epoch: 0, batch: 41829, sum loss: 4523.107422, avg loss: 2.785165, ppl: 16.202484 +epoch: 0, batch: 41830, sum loss: 4881.792969, avg loss: 3.049215, ppl: 21.098778 +epoch: 0, batch: 41831, sum loss: 4079.076660, avg loss: 3.021538, ppl: 20.522837 +epoch: 0, batch: 41832, sum loss: 4835.137695, avg loss: 2.802979, ppl: 16.493700 +epoch: 0, batch: 41833, sum loss: 4061.995605, avg loss: 2.731672, ppl: 15.358539 +epoch: 0, batch: 41834, sum loss: 4729.781738, avg loss: 2.863064, ppl: 17.515112 +epoch: 0, batch: 41835, sum loss: 4118.261230, avg loss: 2.662095, ppl: 14.326276 +epoch: 0, batch: 41836, sum loss: 5498.461426, avg loss: 2.940354, ppl: 18.922541 +epoch: 0, batch: 41837, sum loss: 5682.419922, avg loss: 2.942734, ppl: 18.967638 +epoch: 0, batch: 41838, sum loss: 5856.096191, avg loss: 3.131602, ppl: 22.910660 +epoch: 0, batch: 41839, sum loss: 4505.007812, avg loss: 2.820919, ppl: 16.792280 +epoch: 0, batch: 41840, sum loss: 4656.178223, avg loss: 2.789801, ppl: 16.277786 +epoch: 0, batch: 41841, sum loss: 5736.905273, avg loss: 3.079391, ppl: 21.745150 +epoch: 0, batch: 41842, sum loss: 5519.108398, avg loss: 2.937258, ppl: 18.864059 +epoch: 0, batch: 41843, sum loss: 5320.836914, avg loss: 3.079188, ppl: 21.740744 +epoch: 0, batch: 41844, sum loss: 5924.908691, avg loss: 2.928773, ppl: 18.704674 +epoch: 0, batch: 41845, sum loss: 4340.087402, avg loss: 2.669180, ppl: 14.428139 +epoch: 0, batch: 41846, sum loss: 5501.850098, avg loss: 2.912573, ppl: 18.404089 +epoch: 0, batch: 41847, sum loss: 4407.589355, avg loss: 2.694126, ppl: 14.792580 +epoch: 0, batch: 41848, sum loss: 5268.885742, avg loss: 3.166398, ppl: 23.721880 +epoch: 0, batch: 41849, sum loss: 4620.345703, avg loss: 2.944771, ppl: 19.006310 +epoch: 0, batch: 41850, sum loss: 4410.751465, avg loss: 2.831034, ppl: 16.962999 +epoch: 0, batch: 41851, sum loss: 4512.419434, avg loss: 2.839786, ppl: 17.112097 +epoch: 0, batch: 41852, sum loss: 4885.089844, avg loss: 2.904334, ppl: 18.253084 +epoch: 0, batch: 41853, sum loss: 4472.380859, avg loss: 2.738751, ppl: 15.467656 +epoch: 0, batch: 41854, sum loss: 4650.391602, avg loss: 2.672639, ppl: 14.478125 +epoch: 0, batch: 41855, sum loss: 4780.022949, avg loss: 2.835126, ppl: 17.032553 +epoch: 0, batch: 41856, sum loss: 4940.257324, avg loss: 3.045781, ppl: 21.026455 +epoch: 0, batch: 41857, sum loss: 4656.801270, avg loss: 2.740907, ppl: 15.501041 +epoch: 0, batch: 41858, sum loss: 4571.421875, avg loss: 2.809725, ppl: 16.605345 +epoch: 0, batch: 41859, sum loss: 4492.574219, avg loss: 2.848811, ppl: 17.267233 +epoch: 0, batch: 41860, sum loss: 4401.836426, avg loss: 2.867646, ppl: 17.595545 +epoch: 0, batch: 41861, sum loss: 5880.583008, avg loss: 3.343140, ppl: 28.307871 +epoch: 0, batch: 41862, sum loss: 4726.165527, avg loss: 2.771945, ppl: 15.989700 +epoch: 0, batch: 41863, sum loss: 5410.308105, avg loss: 3.230035, ppl: 25.280537 +epoch: 0, batch: 41864, sum loss: 4981.505859, avg loss: 2.817594, ppl: 16.736534 +epoch: 0, batch: 41865, sum loss: 5772.335938, avg loss: 3.003297, ppl: 20.151861 +epoch: 0, batch: 41866, sum loss: 4345.677246, avg loss: 2.630555, ppl: 13.881477 +epoch: 0, batch: 41867, sum loss: 4608.169434, avg loss: 2.998158, ppl: 20.048582 +epoch: 0, batch: 41868, sum loss: 4858.285156, avg loss: 2.832819, ppl: 16.993301 +epoch: 0, batch: 41869, sum loss: 4503.158203, avg loss: 2.717657, ppl: 15.144801 +epoch: 0, batch: 41870, sum loss: 6514.219727, avg loss: 3.237684, ppl: 25.474655 +epoch: 0, batch: 41871, sum loss: 4155.956055, avg loss: 2.815688, ppl: 16.704670 +epoch: 0, batch: 41872, sum loss: 4370.413574, avg loss: 2.596799, ppl: 13.420714 +epoch: 0, batch: 41873, sum loss: 4328.522949, avg loss: 2.748268, ppl: 15.615568 +epoch: 0, batch: 41874, sum loss: 4605.928711, avg loss: 2.873318, ppl: 17.695635 +epoch: 0, batch: 41875, sum loss: 4397.827637, avg loss: 2.957517, ppl: 19.250113 +epoch: 0, batch: 41876, sum loss: 5062.732910, avg loss: 2.841040, ppl: 17.133574 +epoch: 0, batch: 41877, sum loss: 4294.633301, avg loss: 2.913591, ppl: 18.422838 +epoch: 0, batch: 41878, sum loss: 5859.868652, avg loss: 2.774559, ppl: 16.031555 +epoch: 0, batch: 41879, sum loss: 5907.809082, avg loss: 3.076984, ppl: 21.692881 +epoch: 0, batch: 41880, sum loss: 4456.475098, avg loss: 2.941568, ppl: 18.945528 +epoch: 0, batch: 41881, sum loss: 4599.768066, avg loss: 2.889302, ppl: 17.980747 +epoch: 0, batch: 41882, sum loss: 4952.751465, avg loss: 2.857906, ppl: 17.425003 +epoch: 0, batch: 41883, sum loss: 5036.173828, avg loss: 3.065230, ppl: 21.439400 +epoch: 0, batch: 41884, sum loss: 4180.482422, avg loss: 2.609540, ppl: 13.592790 +epoch: 0, batch: 41885, sum loss: 4217.337891, avg loss: 2.496944, ppl: 12.145318 +epoch: 0, batch: 41886, sum loss: 5016.617188, avg loss: 2.839059, ppl: 17.099669 +epoch: 0, batch: 41887, sum loss: 4201.466309, avg loss: 2.815996, ppl: 16.709814 +epoch: 0, batch: 41888, sum loss: 5300.756348, avg loss: 3.010083, ppl: 20.289083 +epoch: 0, batch: 41889, sum loss: 5113.630859, avg loss: 2.986934, ppl: 19.824800 +epoch: 0, batch: 41890, sum loss: 4485.866211, avg loss: 2.807175, ppl: 16.563068 +epoch: 0, batch: 41891, sum loss: 4874.264160, avg loss: 2.807756, ppl: 16.572687 +epoch: 0, batch: 41892, sum loss: 5340.580078, avg loss: 2.891489, ppl: 18.020126 +epoch: 0, batch: 41893, sum loss: 6344.022461, avg loss: 3.276871, ppl: 26.492746 +epoch: 0, batch: 41894, sum loss: 5893.324219, avg loss: 3.111576, ppl: 22.456404 +epoch: 0, batch: 41895, sum loss: 4631.398926, avg loss: 2.769976, ppl: 15.958245 +epoch: 0, batch: 41896, sum loss: 4431.048828, avg loss: 2.687113, ppl: 14.689200 +epoch: 0, batch: 41897, sum loss: 3770.518799, avg loss: 2.431024, ppl: 11.370523 +epoch: 0, batch: 41898, sum loss: 4726.156738, avg loss: 2.752567, ppl: 15.682832 +epoch: 0, batch: 41899, sum loss: 4776.894531, avg loss: 2.907422, ppl: 18.309536 +epoch: 0, batch: 41900, sum loss: 4858.828125, avg loss: 2.934075, ppl: 18.804098 +epoch: 0, batch: 41901, sum loss: 4810.628418, avg loss: 2.727114, ppl: 15.288696 +epoch: 0, batch: 41902, sum loss: 4876.078613, avg loss: 2.998818, ppl: 20.061813 +epoch: 0, batch: 41903, sum loss: 5416.831055, avg loss: 2.916979, ppl: 18.485365 +epoch: 0, batch: 41904, sum loss: 4984.915527, avg loss: 3.171066, ppl: 23.832872 +epoch: 0, batch: 41905, sum loss: 4144.255371, avg loss: 2.910292, ppl: 18.362154 +epoch: 0, batch: 41906, sum loss: 4541.265625, avg loss: 2.514544, ppl: 12.360965 +epoch: 0, batch: 41907, sum loss: 4111.734863, avg loss: 2.714016, ppl: 15.089761 +epoch: 0, batch: 41908, sum loss: 4725.664062, avg loss: 2.862304, ppl: 17.501808 +epoch: 0, batch: 41909, sum loss: 4734.786621, avg loss: 2.744804, ppl: 15.561563 +epoch: 0, batch: 41910, sum loss: 5387.538574, avg loss: 2.901205, ppl: 18.196064 +epoch: 0, batch: 41911, sum loss: 5363.027344, avg loss: 2.819678, ppl: 16.771450 +epoch: 0, batch: 41912, sum loss: 4488.552734, avg loss: 2.621818, ppl: 13.760718 +epoch: 0, batch: 41913, sum loss: 5023.849121, avg loss: 2.974452, ppl: 19.578886 +epoch: 0, batch: 41914, sum loss: 4983.036133, avg loss: 2.872067, ppl: 17.673512 +epoch: 0, batch: 41915, sum loss: 4789.472656, avg loss: 2.963783, ppl: 19.371105 +epoch: 0, batch: 41916, sum loss: 4885.440918, avg loss: 3.036321, ppl: 20.828478 +epoch: 0, batch: 41917, sum loss: 5277.141602, avg loss: 2.891584, ppl: 18.021841 +epoch: 0, batch: 41918, sum loss: 4170.767090, avg loss: 2.651473, ppl: 14.174904 +epoch: 0, batch: 41919, sum loss: 5616.972656, avg loss: 2.908841, ppl: 18.335543 +epoch: 0, batch: 41920, sum loss: 4706.354492, avg loss: 2.819865, ppl: 16.774586 +epoch: 0, batch: 41921, sum loss: 4322.871094, avg loss: 3.035724, ppl: 20.816046 +epoch: 0, batch: 41922, sum loss: 4981.271484, avg loss: 2.950990, ppl: 19.124886 +epoch: 0, batch: 41923, sum loss: 5861.836914, avg loss: 3.077080, ppl: 21.694956 +epoch: 0, batch: 41924, sum loss: 5067.081055, avg loss: 3.014325, ppl: 20.375341 +epoch: 0, batch: 41925, sum loss: 5146.577148, avg loss: 2.939222, ppl: 18.901133 +epoch: 0, batch: 41926, sum loss: 4641.895508, avg loss: 2.872460, ppl: 17.680460 +epoch: 0, batch: 41927, sum loss: 4433.244629, avg loss: 2.606258, ppl: 13.548254 +epoch: 0, batch: 41928, sum loss: 4891.706055, avg loss: 2.930920, ppl: 18.744875 +epoch: 0, batch: 41929, sum loss: 4270.737305, avg loss: 2.734147, ppl: 15.396602 +epoch: 0, batch: 41930, sum loss: 4717.428711, avg loss: 2.828195, ppl: 16.914896 +epoch: 0, batch: 41931, sum loss: 5308.530273, avg loss: 2.844871, ppl: 17.199345 +epoch: 0, batch: 41932, sum loss: 4550.759277, avg loss: 2.642717, ppl: 14.051334 +epoch: 0, batch: 41933, sum loss: 4919.747559, avg loss: 2.994369, ppl: 19.972744 +epoch: 0, batch: 41934, sum loss: 5072.851562, avg loss: 2.920467, ppl: 18.549955 +epoch: 0, batch: 41935, sum loss: 4679.251465, avg loss: 2.806989, ppl: 16.559988 +epoch: 0, batch: 41936, sum loss: 4572.382812, avg loss: 2.812043, ppl: 16.643894 +epoch: 0, batch: 41937, sum loss: 4807.547852, avg loss: 3.119759, ppl: 22.640913 +epoch: 0, batch: 41938, sum loss: 4010.467529, avg loss: 2.575766, ppl: 13.141377 +epoch: 0, batch: 41939, sum loss: 4402.668457, avg loss: 2.857020, ppl: 17.409576 +epoch: 0, batch: 41940, sum loss: 4723.770996, avg loss: 3.022246, ppl: 20.537374 +epoch: 0, batch: 41941, sum loss: 4315.375000, avg loss: 2.829754, ppl: 16.941296 +epoch: 0, batch: 41942, sum loss: 4933.359863, avg loss: 2.675358, ppl: 14.517544 +epoch: 0, batch: 41943, sum loss: 4023.905029, avg loss: 2.696987, ppl: 14.834972 +epoch: 0, batch: 41944, sum loss: 4077.403564, avg loss: 2.666713, ppl: 14.392579 +epoch: 0, batch: 41945, sum loss: 5286.009766, avg loss: 3.127816, ppl: 22.824087 +epoch: 0, batch: 41946, sum loss: 4146.726074, avg loss: 2.566043, ppl: 13.014230 +epoch: 0, batch: 41947, sum loss: 4534.775391, avg loss: 2.857452, ppl: 17.417086 +epoch: 0, batch: 41948, sum loss: 5145.083496, avg loss: 3.084583, ppl: 21.858341 +epoch: 0, batch: 41949, sum loss: 4609.097168, avg loss: 2.868138, ppl: 17.604206 +epoch: 0, batch: 41950, sum loss: 4815.427734, avg loss: 2.676725, ppl: 14.537400 +epoch: 0, batch: 41951, sum loss: 4891.290039, avg loss: 2.953677, ppl: 19.176344 +epoch: 0, batch: 41952, sum loss: 4513.656738, avg loss: 2.738869, ppl: 15.469486 +epoch: 0, batch: 41953, sum loss: 4031.017090, avg loss: 2.653731, ppl: 14.206941 +epoch: 0, batch: 41954, sum loss: 5899.105957, avg loss: 3.050210, ppl: 21.119774 +epoch: 0, batch: 41955, sum loss: 3980.882568, avg loss: 2.596792, ppl: 13.420619 +epoch: 0, batch: 41956, sum loss: 4877.541016, avg loss: 2.780810, ppl: 16.132084 +epoch: 0, batch: 41957, sum loss: 4639.147461, avg loss: 2.741813, ppl: 15.515088 +epoch: 0, batch: 41958, sum loss: 4168.117676, avg loss: 2.646424, ppl: 14.103512 +epoch: 0, batch: 41959, sum loss: 5088.737305, avg loss: 2.932990, ppl: 18.783707 +epoch: 0, batch: 41960, sum loss: 4127.163574, avg loss: 2.801876, ppl: 16.475531 +epoch: 0, batch: 41961, sum loss: 3941.312012, avg loss: 2.706945, ppl: 14.983434 +epoch: 0, batch: 41962, sum loss: 4523.658691, avg loss: 2.802763, ppl: 16.490137 +epoch: 0, batch: 41963, sum loss: 4357.070801, avg loss: 2.731706, ppl: 15.359066 +epoch: 0, batch: 41964, sum loss: 5010.825195, avg loss: 3.089288, ppl: 21.961435 +epoch: 0, batch: 41965, sum loss: 4193.605957, avg loss: 2.665992, ppl: 14.382214 +epoch: 0, batch: 41966, sum loss: 5206.214355, avg loss: 2.900398, ppl: 18.181381 +epoch: 0, batch: 41967, sum loss: 4988.971680, avg loss: 2.938146, ppl: 18.880806 +epoch: 0, batch: 41968, sum loss: 5086.605469, avg loss: 2.964222, ppl: 19.379629 +epoch: 0, batch: 41969, sum loss: 5548.996582, avg loss: 3.101731, ppl: 22.236404 +epoch: 0, batch: 41970, sum loss: 5337.100098, avg loss: 2.981620, ppl: 19.719738 +epoch: 0, batch: 41971, sum loss: 3910.751465, avg loss: 2.608907, ppl: 13.584195 +epoch: 0, batch: 41972, sum loss: 5444.430664, avg loss: 3.148890, ppl: 23.310177 +epoch: 0, batch: 41973, sum loss: 4990.080078, avg loss: 2.789312, ppl: 16.269829 +epoch: 0, batch: 41974, sum loss: 5814.494141, avg loss: 3.096110, ppl: 22.111761 +epoch: 0, batch: 41975, sum loss: 4337.925293, avg loss: 2.870897, ppl: 17.652842 +epoch: 0, batch: 41976, sum loss: 5844.083496, avg loss: 3.290588, ppl: 26.858643 +epoch: 0, batch: 41977, sum loss: 5727.702148, avg loss: 2.889860, ppl: 17.990786 +epoch: 0, batch: 41978, sum loss: 4638.368652, avg loss: 2.931965, ppl: 18.764463 +epoch: 0, batch: 41979, sum loss: 5149.157715, avg loss: 2.819911, ppl: 16.775362 +epoch: 0, batch: 41980, sum loss: 6044.628906, avg loss: 3.154817, ppl: 23.448742 +epoch: 0, batch: 41981, sum loss: 4612.890137, avg loss: 2.956981, ppl: 19.239798 +epoch: 0, batch: 41982, sum loss: 5637.600586, avg loss: 2.885159, ppl: 17.906414 +epoch: 0, batch: 41983, sum loss: 4581.965820, avg loss: 2.911033, ppl: 18.375769 +epoch: 0, batch: 41984, sum loss: 5774.979492, avg loss: 2.960010, ppl: 19.298166 +epoch: 0, batch: 41985, sum loss: 4263.739258, avg loss: 2.734920, ppl: 15.408504 +epoch: 0, batch: 41986, sum loss: 4782.087402, avg loss: 2.930201, ppl: 18.731392 +epoch: 0, batch: 41987, sum loss: 5900.781250, avg loss: 3.049499, ppl: 21.104774 +epoch: 0, batch: 41988, sum loss: 4654.267090, avg loss: 3.008576, ppl: 20.258533 +epoch: 0, batch: 41989, sum loss: 4552.120117, avg loss: 2.679294, ppl: 14.574798 +epoch: 0, batch: 41990, sum loss: 5655.082520, avg loss: 3.209468, ppl: 24.765911 +epoch: 0, batch: 41991, sum loss: 4665.586914, avg loss: 2.747696, ppl: 15.606627 +epoch: 0, batch: 41992, sum loss: 4654.263672, avg loss: 2.462573, ppl: 11.734970 +epoch: 0, batch: 41993, sum loss: 4601.935059, avg loss: 2.734364, ppl: 15.399950 +epoch: 0, batch: 41994, sum loss: 5133.323730, avg loss: 2.888758, ppl: 17.970984 +epoch: 0, batch: 41995, sum loss: 5630.472656, avg loss: 3.020640, ppl: 20.504408 +epoch: 0, batch: 41996, sum loss: 5129.825195, avg loss: 2.921313, ppl: 18.565645 +epoch: 0, batch: 41997, sum loss: 6063.464355, avg loss: 3.081029, ppl: 21.780802 +epoch: 0, batch: 41998, sum loss: 5206.064941, avg loss: 2.988556, ppl: 19.856991 +epoch: 0, batch: 41999, sum loss: 5485.368652, avg loss: 3.017255, ppl: 20.435112 +epoch: 0, batch: 42000, sum loss: 4912.693848, avg loss: 2.977390, ppl: 19.636503 +epoch: 0, batch: 42001, sum loss: 4357.849609, avg loss: 2.804279, ppl: 16.515165 +epoch: 0, batch: 42002, sum loss: 4878.963867, avg loss: 2.740991, ppl: 15.502338 +epoch: 0, batch: 42003, sum loss: 5306.442383, avg loss: 2.746606, ppl: 15.589629 +epoch: 0, batch: 42004, sum loss: 4213.202637, avg loss: 2.551910, ppl: 12.831585 +epoch: 0, batch: 42005, sum loss: 4751.267090, avg loss: 2.833194, ppl: 16.999680 +epoch: 0, batch: 42006, sum loss: 4111.532227, avg loss: 2.678523, ppl: 14.563561 +epoch: 0, batch: 42007, sum loss: 4801.610352, avg loss: 2.680966, ppl: 14.599188 +epoch: 0, batch: 42008, sum loss: 4649.937012, avg loss: 2.816437, ppl: 16.717176 +epoch: 0, batch: 42009, sum loss: 5223.284668, avg loss: 3.250333, ppl: 25.798925 +epoch: 0, batch: 42010, sum loss: 4413.151855, avg loss: 2.843526, ppl: 17.176220 +epoch: 0, batch: 42011, sum loss: 4201.174805, avg loss: 2.728035, ppl: 15.302794 +epoch: 0, batch: 42012, sum loss: 4751.539551, avg loss: 2.927628, ppl: 18.683252 +epoch: 0, batch: 42013, sum loss: 4942.828125, avg loss: 2.895623, ppl: 18.094767 +epoch: 0, batch: 42014, sum loss: 3703.077148, avg loss: 2.396814, ppl: 10.988111 +epoch: 0, batch: 42015, sum loss: 4859.684082, avg loss: 2.742486, ppl: 15.525526 +epoch: 0, batch: 42016, sum loss: 4316.296875, avg loss: 2.666027, ppl: 14.382708 +epoch: 0, batch: 42017, sum loss: 4096.934082, avg loss: 2.591356, ppl: 13.347859 +epoch: 0, batch: 42018, sum loss: 4902.846680, avg loss: 2.904530, ppl: 18.256662 +epoch: 0, batch: 42019, sum loss: 4555.693848, avg loss: 2.868825, ppl: 17.616306 +epoch: 0, batch: 42020, sum loss: 4630.404785, avg loss: 2.852991, ppl: 17.339569 +epoch: 0, batch: 42021, sum loss: 4581.189453, avg loss: 2.709160, ppl: 15.016658 +epoch: 0, batch: 42022, sum loss: 4785.653320, avg loss: 2.928797, ppl: 18.705107 +epoch: 0, batch: 42023, sum loss: 4076.466553, avg loss: 2.708616, ppl: 15.008486 +epoch: 0, batch: 42024, sum loss: 4754.180176, avg loss: 3.003272, ppl: 20.151371 +epoch: 0, batch: 42025, sum loss: 4975.415039, avg loss: 2.849608, ppl: 17.281006 +epoch: 0, batch: 42026, sum loss: 4849.006836, avg loss: 3.042037, ppl: 20.947866 +epoch: 0, batch: 42027, sum loss: 4378.833008, avg loss: 2.878917, ppl: 17.794994 +epoch: 0, batch: 42028, sum loss: 4598.405273, avg loss: 2.791989, ppl: 16.313429 +epoch: 0, batch: 42029, sum loss: 5250.665039, avg loss: 2.779600, ppl: 16.112581 +epoch: 0, batch: 42030, sum loss: 4318.327637, avg loss: 2.822436, ppl: 16.817774 +epoch: 0, batch: 42031, sum loss: 4956.812500, avg loss: 3.000492, ppl: 20.095419 +epoch: 0, batch: 42032, sum loss: 4164.035156, avg loss: 2.623841, ppl: 13.788581 +epoch: 0, batch: 42033, sum loss: 4442.140137, avg loss: 2.882635, ppl: 17.861273 +epoch: 0, batch: 42034, sum loss: 4462.653320, avg loss: 2.773557, ppl: 16.015499 +epoch: 0, batch: 42035, sum loss: 5322.440430, avg loss: 2.960200, ppl: 19.301838 +epoch: 0, batch: 42036, sum loss: 4377.520508, avg loss: 2.802510, ppl: 16.485975 +epoch: 0, batch: 42037, sum loss: 5296.773926, avg loss: 2.850793, ppl: 17.301498 +epoch: 0, batch: 42038, sum loss: 5816.566406, avg loss: 3.220690, ppl: 25.045403 +epoch: 0, batch: 42039, sum loss: 5923.493164, avg loss: 3.080339, ppl: 21.765789 +epoch: 0, batch: 42040, sum loss: 6286.786133, avg loss: 3.178355, ppl: 24.007223 +epoch: 0, batch: 42041, sum loss: 4578.586426, avg loss: 2.638955, ppl: 13.998563 +epoch: 0, batch: 42042, sum loss: 5154.830078, avg loss: 2.663995, ppl: 14.353514 +epoch: 0, batch: 42043, sum loss: 4338.936523, avg loss: 2.722043, ppl: 15.211368 +epoch: 0, batch: 42044, sum loss: 5672.513672, avg loss: 3.104824, ppl: 22.305294 +epoch: 0, batch: 42045, sum loss: 4807.999512, avg loss: 2.969734, ppl: 19.486740 +epoch: 0, batch: 42046, sum loss: 4023.952393, avg loss: 2.746725, ppl: 15.591487 +epoch: 0, batch: 42047, sum loss: 4464.807617, avg loss: 2.710873, ppl: 15.042397 +epoch: 0, batch: 42048, sum loss: 4443.063477, avg loss: 2.833586, ppl: 17.006344 +epoch: 0, batch: 42049, sum loss: 4331.913574, avg loss: 2.784006, ppl: 16.183725 +epoch: 0, batch: 42050, sum loss: 4700.756836, avg loss: 2.838621, ppl: 17.092182 +epoch: 0, batch: 42051, sum loss: 4417.167480, avg loss: 2.752129, ppl: 15.675972 +epoch: 0, batch: 42052, sum loss: 4607.884277, avg loss: 2.946218, ppl: 19.033823 +epoch: 0, batch: 42053, sum loss: 3602.484863, avg loss: 2.477638, ppl: 11.913087 +epoch: 0, batch: 42054, sum loss: 5226.362793, avg loss: 2.900312, ppl: 18.179825 +epoch: 0, batch: 42055, sum loss: 5333.303711, avg loss: 3.093564, ppl: 22.055540 +epoch: 0, batch: 42056, sum loss: 5070.659668, avg loss: 2.942925, ppl: 18.971256 +epoch: 0, batch: 42057, sum loss: 4680.625977, avg loss: 2.691562, ppl: 14.754704 +epoch: 0, batch: 42058, sum loss: 4531.556641, avg loss: 2.798985, ppl: 16.427963 +epoch: 0, batch: 42059, sum loss: 4495.267578, avg loss: 2.726057, ppl: 15.272546 +epoch: 0, batch: 42060, sum loss: 5597.209961, avg loss: 2.972496, ppl: 19.540632 +epoch: 0, batch: 42061, sum loss: 5139.748047, avg loss: 3.074012, ppl: 21.628508 +epoch: 0, batch: 42062, sum loss: 4491.824707, avg loss: 2.810904, ppl: 16.624941 +epoch: 0, batch: 42063, sum loss: 4468.787109, avg loss: 2.881229, ppl: 17.836187 +epoch: 0, batch: 42064, sum loss: 5456.528320, avg loss: 3.161372, ppl: 23.602955 +epoch: 0, batch: 42065, sum loss: 4516.972656, avg loss: 2.745880, ppl: 15.578315 +epoch: 0, batch: 42066, sum loss: 4971.461426, avg loss: 3.018495, ppl: 20.460478 +epoch: 0, batch: 42067, sum loss: 4518.291016, avg loss: 2.753376, ppl: 15.695539 +epoch: 0, batch: 42068, sum loss: 3556.200195, avg loss: 2.457637, ppl: 11.677181 +epoch: 0, batch: 42069, sum loss: 4792.234375, avg loss: 2.849128, ppl: 17.272722 +epoch: 0, batch: 42070, sum loss: 4773.169922, avg loss: 2.859898, ppl: 17.459747 +epoch: 0, batch: 42071, sum loss: 4390.490723, avg loss: 2.733805, ppl: 15.391339 +epoch: 0, batch: 42072, sum loss: 4401.512207, avg loss: 2.610624, ppl: 13.607540 +epoch: 0, batch: 42073, sum loss: 3772.477051, avg loss: 2.598125, ppl: 13.438514 +epoch: 0, batch: 42074, sum loss: 5679.089355, avg loss: 2.918340, ppl: 18.510534 +epoch: 0, batch: 42075, sum loss: 3480.282471, avg loss: 2.437173, ppl: 11.440648 +epoch: 0, batch: 42076, sum loss: 4775.677246, avg loss: 2.868275, ppl: 17.606615 +epoch: 0, batch: 42077, sum loss: 5138.918457, avg loss: 2.929828, ppl: 18.724409 +epoch: 0, batch: 42078, sum loss: 4791.641113, avg loss: 2.756986, ppl: 15.752289 +epoch: 0, batch: 42079, sum loss: 4574.553223, avg loss: 2.999707, ppl: 20.079653 +epoch: 0, batch: 42080, sum loss: 5212.618652, avg loss: 2.999205, ppl: 20.069578 +epoch: 0, batch: 42081, sum loss: 4892.152832, avg loss: 2.905079, ppl: 18.266685 +epoch: 0, batch: 42082, sum loss: 4468.096191, avg loss: 2.773493, ppl: 16.014467 +epoch: 0, batch: 42083, sum loss: 5106.314941, avg loss: 2.843160, ppl: 17.169935 +epoch: 0, batch: 42084, sum loss: 4798.910156, avg loss: 2.894397, ppl: 18.072601 +epoch: 0, batch: 42085, sum loss: 3801.051758, avg loss: 2.726723, ppl: 15.282722 +epoch: 0, batch: 42086, sum loss: 3907.420898, avg loss: 2.532353, ppl: 12.583081 +epoch: 0, batch: 42087, sum loss: 4468.484375, avg loss: 2.721367, ppl: 15.201086 +epoch: 0, batch: 42088, sum loss: 5326.802734, avg loss: 2.840961, ppl: 17.132229 +epoch: 0, batch: 42089, sum loss: 5231.418945, avg loss: 2.750483, ppl: 15.650193 +epoch: 0, batch: 42090, sum loss: 5679.974609, avg loss: 3.157295, ppl: 23.506935 +epoch: 0, batch: 42091, sum loss: 5279.885742, avg loss: 2.971236, ppl: 19.516018 +epoch: 0, batch: 42092, sum loss: 4070.080811, avg loss: 2.656711, ppl: 14.249344 +epoch: 0, batch: 42093, sum loss: 4841.318848, avg loss: 2.992162, ppl: 19.928732 +epoch: 0, batch: 42094, sum loss: 4404.493164, avg loss: 2.856351, ppl: 17.397924 +epoch: 0, batch: 42095, sum loss: 4655.899414, avg loss: 2.845904, ppl: 17.217121 +epoch: 0, batch: 42096, sum loss: 6439.395020, avg loss: 3.057642, ppl: 21.277336 +epoch: 0, batch: 42097, sum loss: 3832.825439, avg loss: 2.710626, ppl: 15.038692 +epoch: 0, batch: 42098, sum loss: 5196.463867, avg loss: 2.998536, ppl: 20.056160 +epoch: 0, batch: 42099, sum loss: 4956.184570, avg loss: 2.827259, ppl: 16.899075 +epoch: 0, batch: 42100, sum loss: 3844.994629, avg loss: 2.516358, ppl: 12.383410 +epoch: 0, batch: 42101, sum loss: 4108.261719, avg loss: 2.934473, ppl: 18.811579 +epoch: 0, batch: 42102, sum loss: 5156.385742, avg loss: 3.015430, ppl: 20.397865 +epoch: 0, batch: 42103, sum loss: 4583.675293, avg loss: 2.798337, ppl: 16.417315 +epoch: 0, batch: 42104, sum loss: 4795.893555, avg loss: 2.836129, ppl: 17.049633 +epoch: 0, batch: 42105, sum loss: 3387.785645, avg loss: 2.339631, ppl: 10.377407 +epoch: 0, batch: 42106, sum loss: 5114.639160, avg loss: 2.838313, ppl: 17.086908 +epoch: 0, batch: 42107, sum loss: 4609.071289, avg loss: 2.733732, ppl: 15.390208 +epoch: 0, batch: 42108, sum loss: 4730.449219, avg loss: 2.893241, ppl: 18.051720 +epoch: 0, batch: 42109, sum loss: 5723.854004, avg loss: 3.006226, ppl: 20.210981 +epoch: 0, batch: 42110, sum loss: 4793.062988, avg loss: 2.993793, ppl: 19.961262 +epoch: 0, batch: 42111, sum loss: 4344.406738, avg loss: 3.012765, ppl: 20.343571 +epoch: 0, batch: 42112, sum loss: 5145.083496, avg loss: 2.788663, ppl: 16.259270 +epoch: 0, batch: 42113, sum loss: 4570.510742, avg loss: 2.754979, ppl: 15.720716 +epoch: 0, batch: 42114, sum loss: 4795.815430, avg loss: 2.931428, ppl: 18.754387 +epoch: 0, batch: 42115, sum loss: 6163.632812, avg loss: 3.310222, ppl: 27.391197 +epoch: 0, batch: 42116, sum loss: 5812.453125, avg loss: 3.077000, ppl: 21.693228 +epoch: 0, batch: 42117, sum loss: 4954.485840, avg loss: 2.923001, ppl: 18.597006 +epoch: 0, batch: 42118, sum loss: 4087.097412, avg loss: 2.743018, ppl: 15.533801 +epoch: 0, batch: 42119, sum loss: 4779.827148, avg loss: 2.896865, ppl: 18.117256 +epoch: 0, batch: 42120, sum loss: 3829.533203, avg loss: 2.456404, ppl: 11.662793 +epoch: 0, batch: 42121, sum loss: 4266.326172, avg loss: 2.705343, ppl: 14.959451 +epoch: 0, batch: 42122, sum loss: 5444.374023, avg loss: 2.939727, ppl: 18.910679 +epoch: 0, batch: 42123, sum loss: 5164.804688, avg loss: 2.969985, ppl: 19.491636 +epoch: 0, batch: 42124, sum loss: 3889.166504, avg loss: 2.744648, ppl: 15.559139 +epoch: 0, batch: 42125, sum loss: 4535.690430, avg loss: 2.769042, ppl: 15.943346 +epoch: 0, batch: 42126, sum loss: 5277.976074, avg loss: 2.885717, ppl: 17.916407 +epoch: 0, batch: 42127, sum loss: 4233.936035, avg loss: 2.620010, ppl: 13.735860 +epoch: 0, batch: 42128, sum loss: 5323.990234, avg loss: 2.928488, ppl: 18.699327 +epoch: 0, batch: 42129, sum loss: 4139.322266, avg loss: 2.746730, ppl: 15.591565 +epoch: 0, batch: 42130, sum loss: 4373.989746, avg loss: 2.855085, ppl: 17.375908 +epoch: 0, batch: 42131, sum loss: 4973.454102, avg loss: 2.787811, ppl: 16.245417 +epoch: 0, batch: 42132, sum loss: 4706.513184, avg loss: 2.689436, ppl: 14.723369 +epoch: 0, batch: 42133, sum loss: 4690.343750, avg loss: 2.957342, ppl: 19.246740 +epoch: 0, batch: 42134, sum loss: 5475.147949, avg loss: 2.899972, ppl: 18.173645 +epoch: 0, batch: 42135, sum loss: 3798.825439, avg loss: 2.541020, ppl: 12.692616 +epoch: 0, batch: 42136, sum loss: 4158.416016, avg loss: 2.650361, ppl: 14.159146 +epoch: 0, batch: 42137, sum loss: 4394.865234, avg loss: 2.615991, ppl: 13.680772 +epoch: 0, batch: 42138, sum loss: 4417.150879, avg loss: 2.706588, ppl: 14.978076 +epoch: 0, batch: 42139, sum loss: 5001.006836, avg loss: 2.875795, ppl: 17.739515 +epoch: 0, batch: 42140, sum loss: 4800.450684, avg loss: 2.897073, ppl: 18.121033 +epoch: 0, batch: 42141, sum loss: 4346.108398, avg loss: 2.901274, ppl: 18.197309 +epoch: 0, batch: 42142, sum loss: 4639.363770, avg loss: 2.815148, ppl: 16.695644 +epoch: 0, batch: 42143, sum loss: 4145.923340, avg loss: 2.837730, ppl: 17.076956 +epoch: 0, batch: 42144, sum loss: 4559.776855, avg loss: 2.895096, ppl: 18.085243 +epoch: 0, batch: 42145, sum loss: 4828.116699, avg loss: 2.850128, ppl: 17.289993 +epoch: 0, batch: 42146, sum loss: 5519.573242, avg loss: 3.141476, ppl: 23.137991 +epoch: 0, batch: 42147, sum loss: 4957.540039, avg loss: 3.075397, ppl: 21.658480 +epoch: 0, batch: 42148, sum loss: 4662.654785, avg loss: 3.039540, ppl: 20.895636 +epoch: 0, batch: 42149, sum loss: 4059.840820, avg loss: 2.796034, ppl: 16.379551 +epoch: 0, batch: 42150, sum loss: 4671.094727, avg loss: 2.975220, ppl: 19.593927 +epoch: 0, batch: 42151, sum loss: 4655.188477, avg loss: 2.749668, ppl: 15.637445 +epoch: 0, batch: 42152, sum loss: 4488.070801, avg loss: 2.682648, ppl: 14.623772 +epoch: 0, batch: 42153, sum loss: 3667.040283, avg loss: 2.457802, ppl: 11.679111 +epoch: 0, batch: 42154, sum loss: 4252.095215, avg loss: 2.842310, ppl: 17.155344 +epoch: 0, batch: 42155, sum loss: 3868.854736, avg loss: 2.675557, ppl: 14.520431 +epoch: 0, batch: 42156, sum loss: 5345.997070, avg loss: 3.212739, ppl: 24.847042 +epoch: 0, batch: 42157, sum loss: 4165.185059, avg loss: 2.896513, ppl: 18.110874 +epoch: 0, batch: 42158, sum loss: 4647.497559, avg loss: 2.677130, ppl: 14.543294 +epoch: 0, batch: 42159, sum loss: 5224.332031, avg loss: 2.963319, ppl: 19.362133 +epoch: 0, batch: 42160, sum loss: 5248.213379, avg loss: 2.909209, ppl: 18.342289 +epoch: 0, batch: 42161, sum loss: 4249.908691, avg loss: 2.472315, ppl: 11.849843 +epoch: 0, batch: 42162, sum loss: 5289.092285, avg loss: 2.746154, ppl: 15.582587 +epoch: 0, batch: 42163, sum loss: 5221.952637, avg loss: 3.136308, ppl: 23.018724 +epoch: 0, batch: 42164, sum loss: 4997.641602, avg loss: 3.019723, ppl: 20.485620 +epoch: 0, batch: 42165, sum loss: 4101.417969, avg loss: 2.692986, ppl: 14.775734 +epoch: 0, batch: 42166, sum loss: 4203.882324, avg loss: 2.572755, ppl: 13.101875 +epoch: 0, batch: 42167, sum loss: 5154.836914, avg loss: 2.859033, ppl: 17.444656 +epoch: 0, batch: 42168, sum loss: 4456.030762, avg loss: 2.679513, ppl: 14.577999 +epoch: 0, batch: 42169, sum loss: 5043.086426, avg loss: 2.908354, ppl: 18.326612 +epoch: 0, batch: 42170, sum loss: 5035.510742, avg loss: 2.856217, ppl: 17.395597 +epoch: 0, batch: 42171, sum loss: 6039.299316, avg loss: 3.025701, ppl: 20.608448 +epoch: 0, batch: 42172, sum loss: 5366.702148, avg loss: 3.049263, ppl: 21.099777 +epoch: 0, batch: 42173, sum loss: 5666.239258, avg loss: 2.941973, ppl: 18.953199 +epoch: 0, batch: 42174, sum loss: 4133.236816, avg loss: 2.658030, ppl: 14.268157 +epoch: 0, batch: 42175, sum loss: 4309.701172, avg loss: 2.550119, ppl: 12.808627 +epoch: 0, batch: 42176, sum loss: 5255.951660, avg loss: 2.875247, ppl: 17.729803 +epoch: 0, batch: 42177, sum loss: 5138.814453, avg loss: 2.939825, ppl: 18.912542 +epoch: 0, batch: 42178, sum loss: 4191.951660, avg loss: 2.702741, ppl: 14.920575 +epoch: 0, batch: 42179, sum loss: 5044.586914, avg loss: 2.706323, ppl: 14.974120 +epoch: 0, batch: 42180, sum loss: 4553.104492, avg loss: 2.698936, ppl: 14.863904 +epoch: 0, batch: 42181, sum loss: 5611.268555, avg loss: 3.042987, ppl: 20.967789 +epoch: 0, batch: 42182, sum loss: 4891.102051, avg loss: 2.865320, ppl: 17.554678 +epoch: 0, batch: 42183, sum loss: 5798.426758, avg loss: 3.196487, ppl: 24.446491 +epoch: 0, batch: 42184, sum loss: 5509.934082, avg loss: 2.896916, ppl: 18.118185 +epoch: 0, batch: 42185, sum loss: 4583.278320, avg loss: 2.776062, ppl: 16.055670 +epoch: 0, batch: 42186, sum loss: 5024.747070, avg loss: 2.757819, ppl: 15.765428 +epoch: 0, batch: 42187, sum loss: 4523.825195, avg loss: 2.672076, ppl: 14.469984 +epoch: 0, batch: 42188, sum loss: 4855.961914, avg loss: 2.787579, ppl: 16.241644 +epoch: 0, batch: 42189, sum loss: 5069.397461, avg loss: 2.617139, ppl: 13.696477 +epoch: 0, batch: 42190, sum loss: 5583.718262, avg loss: 2.943447, ppl: 18.981155 +epoch: 0, batch: 42191, sum loss: 5514.246094, avg loss: 2.979063, ppl: 19.669384 +epoch: 0, batch: 42192, sum loss: 4412.636719, avg loss: 2.780490, ppl: 16.126917 +epoch: 0, batch: 42193, sum loss: 4847.990723, avg loss: 2.762388, ppl: 15.837614 +epoch: 0, batch: 42194, sum loss: 5444.665527, avg loss: 3.125525, ppl: 22.771858 +epoch: 0, batch: 42195, sum loss: 4532.515625, avg loss: 2.843485, ppl: 17.175524 +epoch: 0, batch: 42196, sum loss: 5254.349121, avg loss: 3.219577, ppl: 25.017532 +epoch: 0, batch: 42197, sum loss: 4515.028320, avg loss: 2.894249, ppl: 18.069925 +epoch: 0, batch: 42198, sum loss: 4527.712891, avg loss: 3.067556, ppl: 21.489321 +epoch: 0, batch: 42199, sum loss: 5682.391602, avg loss: 3.045226, ppl: 21.014776 +epoch: 0, batch: 42200, sum loss: 5955.745117, avg loss: 3.082684, ppl: 21.816875 +epoch: 0, batch: 42201, sum loss: 5116.445801, avg loss: 2.753738, ppl: 15.701216 +epoch: 0, batch: 42202, sum loss: 4409.000488, avg loss: 2.716574, ppl: 15.128410 +epoch: 0, batch: 42203, sum loss: 5415.095215, avg loss: 2.833645, ppl: 17.007338 +epoch: 0, batch: 42204, sum loss: 4561.294922, avg loss: 2.678388, ppl: 14.561603 +epoch: 0, batch: 42205, sum loss: 4254.298828, avg loss: 2.549011, ppl: 12.794440 +epoch: 0, batch: 42206, sum loss: 4543.351074, avg loss: 2.830748, ppl: 16.958147 +epoch: 0, batch: 42207, sum loss: 4489.719727, avg loss: 2.614863, ppl: 13.665343 +epoch: 0, batch: 42208, sum loss: 3914.426514, avg loss: 2.865612, ppl: 17.559801 +epoch: 0, batch: 42209, sum loss: 4592.519043, avg loss: 2.685684, ppl: 14.668227 +epoch: 0, batch: 42210, sum loss: 5943.729492, avg loss: 2.883906, ppl: 17.883986 +epoch: 0, batch: 42211, sum loss: 5580.840332, avg loss: 2.926502, ppl: 18.662245 +epoch: 0, batch: 42212, sum loss: 3802.861328, avg loss: 2.613650, ppl: 13.648783 +epoch: 0, batch: 42213, sum loss: 5233.967285, avg loss: 2.945395, ppl: 19.018177 +epoch: 0, batch: 42214, sum loss: 4875.033691, avg loss: 2.760495, ppl: 15.807661 +epoch: 0, batch: 42215, sum loss: 6561.606445, avg loss: 2.970397, ppl: 19.499655 +epoch: 0, batch: 42216, sum loss: 4886.607422, avg loss: 2.745285, ppl: 15.569051 +epoch: 0, batch: 42217, sum loss: 5264.001953, avg loss: 3.035757, ppl: 20.816721 +epoch: 0, batch: 42218, sum loss: 4963.437988, avg loss: 3.098276, ppl: 22.159712 +epoch: 0, batch: 42219, sum loss: 4187.721680, avg loss: 2.650457, ppl: 14.160503 +epoch: 0, batch: 42220, sum loss: 4357.466797, avg loss: 2.698122, ppl: 14.851810 +epoch: 0, batch: 42221, sum loss: 5439.195312, avg loss: 2.907106, ppl: 18.303753 +epoch: 0, batch: 42222, sum loss: 5491.770508, avg loss: 3.010839, ppl: 20.304432 +epoch: 0, batch: 42223, sum loss: 5495.843750, avg loss: 2.956344, ppl: 19.227551 +epoch: 0, batch: 42224, sum loss: 3904.722900, avg loss: 2.787097, ppl: 16.233824 +epoch: 0, batch: 42225, sum loss: 4763.852051, avg loss: 2.927998, ppl: 18.690168 +epoch: 0, batch: 42226, sum loss: 5086.462891, avg loss: 2.888395, ppl: 17.964447 +epoch: 0, batch: 42227, sum loss: 5564.586914, avg loss: 2.952035, ppl: 19.144882 +epoch: 0, batch: 42228, sum loss: 4378.713867, avg loss: 2.823155, ppl: 16.829868 +epoch: 0, batch: 42229, sum loss: 5259.416992, avg loss: 3.010542, ppl: 20.298397 +epoch: 0, batch: 42230, sum loss: 6243.237305, avg loss: 3.455029, ppl: 31.659199 +epoch: 0, batch: 42231, sum loss: 5814.971680, avg loss: 3.042895, ppl: 20.965843 +epoch: 0, batch: 42232, sum loss: 4565.662109, avg loss: 2.917356, ppl: 18.492329 +epoch: 0, batch: 42233, sum loss: 5600.026367, avg loss: 2.941190, ppl: 18.938379 +epoch: 0, batch: 42234, sum loss: 5036.330078, avg loss: 2.907812, ppl: 18.316675 +epoch: 0, batch: 42235, sum loss: 4611.367188, avg loss: 2.676359, ppl: 14.532091 +epoch: 0, batch: 42236, sum loss: 4145.018555, avg loss: 2.613505, ppl: 13.646794 +epoch: 0, batch: 42237, sum loss: 5309.109375, avg loss: 2.891672, ppl: 18.023413 +epoch: 0, batch: 42238, sum loss: 4879.617676, avg loss: 2.681109, ppl: 14.601273 +epoch: 0, batch: 42239, sum loss: 4353.385742, avg loss: 2.799605, ppl: 16.438154 +epoch: 0, batch: 42240, sum loss: 5478.909668, avg loss: 2.980908, ppl: 19.705709 +epoch: 0, batch: 42241, sum loss: 5058.869141, avg loss: 2.975806, ppl: 19.605410 +epoch: 0, batch: 42242, sum loss: 4767.002930, avg loss: 2.971947, ppl: 19.529907 +epoch: 0, batch: 42243, sum loss: 3785.793457, avg loss: 2.763353, ppl: 15.852910 +epoch: 0, batch: 42244, sum loss: 3767.675781, avg loss: 2.611002, ppl: 13.612683 +epoch: 0, batch: 42245, sum loss: 4863.212891, avg loss: 2.794950, ppl: 16.361811 +epoch: 0, batch: 42246, sum loss: 3996.427246, avg loss: 2.634428, ppl: 13.935340 +epoch: 0, batch: 42247, sum loss: 4308.133301, avg loss: 2.529732, ppl: 12.550139 +epoch: 0, batch: 42248, sum loss: 4466.393555, avg loss: 2.814363, ppl: 16.682541 +epoch: 0, batch: 42249, sum loss: 5270.080078, avg loss: 3.131361, ppl: 22.905121 +epoch: 0, batch: 42250, sum loss: 5333.540039, avg loss: 3.131850, ppl: 22.916330 +epoch: 0, batch: 42251, sum loss: 4476.012695, avg loss: 2.653238, ppl: 14.199948 +epoch: 0, batch: 42252, sum loss: 5013.687500, avg loss: 2.807216, ppl: 16.563740 +epoch: 0, batch: 42253, sum loss: 5141.259277, avg loss: 2.964971, ppl: 19.394133 +epoch: 0, batch: 42254, sum loss: 5300.691895, avg loss: 3.195113, ppl: 24.412926 +epoch: 0, batch: 42255, sum loss: 4826.842773, avg loss: 2.748772, ppl: 15.623429 +epoch: 0, batch: 42256, sum loss: 4392.163574, avg loss: 2.704534, ppl: 14.947354 +epoch: 0, batch: 42257, sum loss: 4897.446289, avg loss: 2.880851, ppl: 17.829441 +epoch: 0, batch: 42258, sum loss: 4531.439453, avg loss: 2.821569, ppl: 16.803190 +epoch: 0, batch: 42259, sum loss: 5270.797852, avg loss: 3.279899, ppl: 26.573092 +epoch: 0, batch: 42260, sum loss: 4647.656250, avg loss: 2.849575, ppl: 17.280428 +epoch: 0, batch: 42261, sum loss: 4311.668457, avg loss: 2.725454, ppl: 15.263343 +epoch: 0, batch: 42262, sum loss: 4854.197266, avg loss: 2.843701, ppl: 17.179226 +epoch: 0, batch: 42263, sum loss: 5523.513184, avg loss: 2.930246, ppl: 18.732231 +epoch: 0, batch: 42264, sum loss: 5473.474609, avg loss: 3.019015, ppl: 20.471125 +epoch: 0, batch: 42265, sum loss: 5358.487793, avg loss: 3.029106, ppl: 20.678730 +epoch: 0, batch: 42266, sum loss: 5303.928711, avg loss: 2.778381, ppl: 16.092943 +epoch: 0, batch: 42267, sum loss: 4829.084473, avg loss: 2.881315, ppl: 17.837723 +epoch: 0, batch: 42268, sum loss: 4655.777832, avg loss: 2.821684, ppl: 16.805120 +epoch: 0, batch: 42269, sum loss: 4474.806641, avg loss: 2.684347, ppl: 14.648631 +epoch: 0, batch: 42270, sum loss: 4993.445801, avg loss: 2.878067, ppl: 17.779867 +epoch: 0, batch: 42271, sum loss: 5594.080078, avg loss: 2.907526, ppl: 18.311440 +epoch: 0, batch: 42272, sum loss: 4819.223145, avg loss: 2.687799, ppl: 14.699284 +epoch: 0, batch: 42273, sum loss: 4714.904785, avg loss: 2.887265, ppl: 17.944174 +epoch: 0, batch: 42274, sum loss: 5021.720703, avg loss: 2.931536, ppl: 18.756413 +epoch: 0, batch: 42275, sum loss: 5336.384277, avg loss: 2.930469, ppl: 18.736422 +epoch: 0, batch: 42276, sum loss: 5563.974609, avg loss: 2.951711, ppl: 19.138666 +epoch: 0, batch: 42277, sum loss: 5928.745117, avg loss: 2.949625, ppl: 19.098782 +epoch: 0, batch: 42278, sum loss: 5025.025391, avg loss: 3.036269, ppl: 20.827394 +epoch: 0, batch: 42279, sum loss: 4657.847656, avg loss: 2.800870, ppl: 16.458967 +epoch: 0, batch: 42280, sum loss: 4186.124023, avg loss: 2.685134, ppl: 14.660169 +epoch: 0, batch: 42281, sum loss: 5087.101562, avg loss: 2.928671, ppl: 18.702761 +epoch: 0, batch: 42282, sum loss: 4485.176270, avg loss: 2.763510, ppl: 15.855393 +epoch: 0, batch: 42283, sum loss: 3781.257324, avg loss: 2.691286, ppl: 14.750638 +epoch: 0, batch: 42284, sum loss: 4595.603027, avg loss: 2.583251, ppl: 13.240108 +epoch: 0, batch: 42285, sum loss: 5558.443359, avg loss: 3.055769, ppl: 21.237505 +epoch: 0, batch: 42286, sum loss: 4915.000977, avg loss: 2.841041, ppl: 17.133591 +epoch: 0, batch: 42287, sum loss: 4902.116699, avg loss: 3.000072, ppl: 20.086973 +epoch: 0, batch: 42288, sum loss: 5732.072266, avg loss: 2.908205, ppl: 18.323877 +epoch: 0, batch: 42289, sum loss: 4183.387695, avg loss: 2.606472, ppl: 13.551158 +epoch: 0, batch: 42290, sum loss: 5187.671387, avg loss: 2.950894, ppl: 19.123035 +epoch: 0, batch: 42291, sum loss: 4001.698975, avg loss: 2.540761, ppl: 12.689327 +epoch: 0, batch: 42292, sum loss: 3987.593262, avg loss: 2.559431, ppl: 12.928457 +epoch: 0, batch: 42293, sum loss: 5773.622070, avg loss: 3.274885, ppl: 26.440184 +epoch: 0, batch: 42294, sum loss: 4680.706055, avg loss: 2.776220, ppl: 16.058199 +epoch: 0, batch: 42295, sum loss: 4856.825684, avg loss: 2.868769, ppl: 17.615318 +epoch: 0, batch: 42296, sum loss: 5603.000977, avg loss: 3.009130, ppl: 20.269768 +epoch: 0, batch: 42297, sum loss: 5024.385254, avg loss: 2.808488, ppl: 16.584826 +epoch: 0, batch: 42298, sum loss: 5138.720703, avg loss: 2.934735, ppl: 18.816517 +epoch: 0, batch: 42299, sum loss: 5453.111328, avg loss: 2.976589, ppl: 19.620779 +epoch: 0, batch: 42300, sum loss: 4173.330078, avg loss: 2.606702, ppl: 13.554279 +epoch: 0, batch: 42301, sum loss: 4572.422363, avg loss: 2.726549, ppl: 15.280060 +epoch: 0, batch: 42302, sum loss: 4068.101074, avg loss: 2.560164, ppl: 12.937942 +epoch: 0, batch: 42303, sum loss: 4684.979980, avg loss: 2.868941, ppl: 17.618343 +epoch: 0, batch: 42304, sum loss: 4586.891113, avg loss: 2.710928, ppl: 15.043236 +epoch: 0, batch: 42305, sum loss: 4879.442871, avg loss: 2.830303, ppl: 16.950603 +epoch: 0, batch: 42306, sum loss: 4334.728516, avg loss: 2.791197, ppl: 16.300516 +epoch: 0, batch: 42307, sum loss: 4760.536133, avg loss: 2.838722, ppl: 17.093897 +epoch: 0, batch: 42308, sum loss: 5939.399414, avg loss: 3.233206, ppl: 25.360840 +epoch: 0, batch: 42309, sum loss: 5410.331543, avg loss: 2.819349, ppl: 16.765938 +epoch: 0, batch: 42310, sum loss: 5458.360352, avg loss: 3.017336, ppl: 20.436773 +epoch: 0, batch: 42311, sum loss: 5878.998535, avg loss: 2.997960, ppl: 20.044596 +epoch: 0, batch: 42312, sum loss: 5067.818359, avg loss: 2.919250, ppl: 18.527391 +epoch: 0, batch: 42313, sum loss: 5732.147461, avg loss: 3.085117, ppl: 21.870022 +epoch: 0, batch: 42314, sum loss: 5016.758789, avg loss: 2.951035, ppl: 19.125734 +epoch: 0, batch: 42315, sum loss: 4590.767578, avg loss: 2.933398, ppl: 18.791370 +epoch: 0, batch: 42316, sum loss: 5170.945312, avg loss: 3.032813, ppl: 20.755527 +epoch: 0, batch: 42317, sum loss: 4587.795898, avg loss: 2.760407, ppl: 15.806271 +epoch: 0, batch: 42318, sum loss: 5440.043945, avg loss: 3.017218, ppl: 20.434362 +epoch: 0, batch: 42319, sum loss: 4654.179688, avg loss: 2.878281, ppl: 17.783670 +epoch: 0, batch: 42320, sum loss: 5654.872070, avg loss: 2.951395, ppl: 19.132616 +epoch: 0, batch: 42321, sum loss: 5730.057129, avg loss: 2.936985, ppl: 18.858896 +epoch: 0, batch: 42322, sum loss: 4713.699707, avg loss: 2.985244, ppl: 19.791323 +epoch: 0, batch: 42323, sum loss: 4801.740234, avg loss: 2.894358, ppl: 18.071898 +epoch: 0, batch: 42324, sum loss: 4358.536133, avg loss: 2.808335, ppl: 16.582291 +epoch: 0, batch: 42325, sum loss: 4418.108398, avg loss: 2.537684, ppl: 12.650345 +epoch: 0, batch: 42326, sum loss: 5721.894043, avg loss: 2.888386, ppl: 17.964300 +epoch: 0, batch: 42327, sum loss: 4732.149902, avg loss: 2.744867, ppl: 15.562538 +epoch: 0, batch: 42328, sum loss: 3996.327393, avg loss: 2.698398, ppl: 14.855915 +epoch: 0, batch: 42329, sum loss: 5828.975098, avg loss: 2.889923, ppl: 17.991930 +epoch: 0, batch: 42330, sum loss: 5593.883301, avg loss: 2.988186, ppl: 19.849636 +epoch: 0, batch: 42331, sum loss: 5756.518555, avg loss: 2.984198, ppl: 19.770647 +epoch: 0, batch: 42332, sum loss: 4763.704102, avg loss: 2.822100, ppl: 16.812115 +epoch: 0, batch: 42333, sum loss: 5704.498047, avg loss: 2.957231, ppl: 19.244602 +epoch: 0, batch: 42334, sum loss: 4514.778320, avg loss: 2.790345, ppl: 16.286636 +epoch: 0, batch: 42335, sum loss: 4656.998047, avg loss: 2.860564, ppl: 17.471375 +epoch: 0, batch: 42336, sum loss: 5035.221680, avg loss: 2.867438, ppl: 17.591896 +epoch: 0, batch: 42337, sum loss: 4908.690430, avg loss: 2.913169, ppl: 18.415071 +epoch: 0, batch: 42338, sum loss: 4115.563965, avg loss: 2.826624, ppl: 16.888344 +epoch: 0, batch: 42339, sum loss: 5003.429688, avg loss: 3.021395, ppl: 20.519892 +epoch: 0, batch: 42340, sum loss: 4355.095703, avg loss: 2.675120, ppl: 14.514093 +epoch: 0, batch: 42341, sum loss: 4475.609375, avg loss: 2.683219, ppl: 14.632121 +epoch: 0, batch: 42342, sum loss: 4513.662109, avg loss: 3.100043, ppl: 22.198902 +epoch: 0, batch: 42343, sum loss: 5360.101562, avg loss: 3.130900, ppl: 22.894585 +epoch: 0, batch: 42344, sum loss: 5571.131836, avg loss: 2.862863, ppl: 17.511597 +epoch: 0, batch: 42345, sum loss: 3619.501465, avg loss: 2.503113, ppl: 12.220477 +epoch: 0, batch: 42346, sum loss: 5334.005859, avg loss: 3.304837, ppl: 27.244087 +epoch: 0, batch: 42347, sum loss: 4576.349609, avg loss: 2.924185, ppl: 18.619045 +epoch: 0, batch: 42348, sum loss: 5269.275879, avg loss: 2.995609, ppl: 19.997530 +epoch: 0, batch: 42349, sum loss: 4604.592773, avg loss: 3.029337, ppl: 20.683523 +epoch: 0, batch: 42350, sum loss: 6505.220703, avg loss: 3.020065, ppl: 20.492630 +epoch: 0, batch: 42351, sum loss: 4888.124512, avg loss: 2.895808, ppl: 18.098127 +epoch: 0, batch: 42352, sum loss: 5611.230957, avg loss: 3.018414, ppl: 20.458809 +epoch: 0, batch: 42353, sum loss: 5802.776855, avg loss: 3.005063, ppl: 20.187489 +epoch: 0, batch: 42354, sum loss: 5282.403320, avg loss: 2.806803, ppl: 16.556900 +epoch: 0, batch: 42355, sum loss: 4769.682129, avg loss: 2.702369, ppl: 14.915030 +epoch: 0, batch: 42356, sum loss: 4408.808594, avg loss: 2.764143, ppl: 15.865445 +epoch: 0, batch: 42357, sum loss: 5513.554199, avg loss: 3.347635, ppl: 28.435392 +epoch: 0, batch: 42358, sum loss: 5037.349121, avg loss: 2.952725, ppl: 19.158092 +epoch: 0, batch: 42359, sum loss: 4402.587891, avg loss: 2.712623, ppl: 15.068754 +epoch: 0, batch: 42360, sum loss: 5126.813965, avg loss: 2.975516, ppl: 19.599735 +epoch: 0, batch: 42361, sum loss: 4744.316406, avg loss: 2.961496, ppl: 19.326860 +epoch: 0, batch: 42362, sum loss: 5133.794922, avg loss: 2.905374, ppl: 18.272068 +epoch: 0, batch: 42363, sum loss: 4361.704590, avg loss: 2.710817, ppl: 15.041561 +epoch: 0, batch: 42364, sum loss: 3633.174561, avg loss: 2.667529, ppl: 14.404334 +epoch: 0, batch: 42365, sum loss: 5282.689453, avg loss: 3.036028, ppl: 20.822380 +epoch: 0, batch: 42366, sum loss: 4963.670410, avg loss: 3.067781, ppl: 21.494164 +epoch: 0, batch: 42367, sum loss: 4802.109375, avg loss: 2.777391, ppl: 16.077024 +epoch: 0, batch: 42368, sum loss: 4498.113770, avg loss: 2.917065, ppl: 18.486942 +epoch: 0, batch: 42369, sum loss: 5123.252441, avg loss: 2.887966, ppl: 17.956755 +epoch: 0, batch: 42370, sum loss: 5168.041016, avg loss: 2.985581, ppl: 19.798004 +epoch: 0, batch: 42371, sum loss: 5429.377930, avg loss: 3.098960, ppl: 22.174881 +epoch: 0, batch: 42372, sum loss: 5380.840332, avg loss: 3.048635, ppl: 21.086536 +epoch: 0, batch: 42373, sum loss: 4216.041992, avg loss: 2.692236, ppl: 14.764655 +epoch: 0, batch: 42374, sum loss: 4140.209473, avg loss: 2.921813, ppl: 18.574942 +epoch: 0, batch: 42375, sum loss: 4943.271484, avg loss: 2.860690, ppl: 17.473574 +epoch: 0, batch: 42376, sum loss: 4717.492188, avg loss: 2.937417, ppl: 18.867054 +epoch: 0, batch: 42377, sum loss: 4515.542969, avg loss: 2.872483, ppl: 17.680861 +epoch: 0, batch: 42378, sum loss: 4411.228027, avg loss: 3.078317, ppl: 21.721811 +epoch: 0, batch: 42379, sum loss: 4088.697998, avg loss: 2.530135, ppl: 12.555200 +epoch: 0, batch: 42380, sum loss: 5272.549316, avg loss: 3.170505, ppl: 23.819506 +epoch: 0, batch: 42381, sum loss: 4936.310547, avg loss: 2.988081, ppl: 19.847567 +epoch: 0, batch: 42382, sum loss: 5015.034180, avg loss: 2.948286, ppl: 19.073231 +epoch: 0, batch: 42383, sum loss: 5813.783691, avg loss: 3.189130, ppl: 24.267307 +epoch: 0, batch: 42384, sum loss: 4620.400391, avg loss: 2.913241, ppl: 18.416391 +epoch: 0, batch: 42385, sum loss: 4487.990723, avg loss: 3.046837, ppl: 21.048664 +epoch: 0, batch: 42386, sum loss: 5070.860352, avg loss: 2.821848, ppl: 16.807882 +epoch: 0, batch: 42387, sum loss: 4358.168457, avg loss: 2.715370, ppl: 15.110195 +epoch: 0, batch: 42388, sum loss: 4573.473145, avg loss: 2.901950, ppl: 18.209618 +epoch: 0, batch: 42389, sum loss: 5642.389160, avg loss: 2.988554, ppl: 19.856939 +epoch: 0, batch: 42390, sum loss: 3657.698486, avg loss: 2.629546, ppl: 13.867475 +epoch: 0, batch: 42391, sum loss: 4781.510254, avg loss: 2.852930, ppl: 17.338507 +epoch: 0, batch: 42392, sum loss: 4561.769531, avg loss: 2.749710, ppl: 15.638102 +epoch: 0, batch: 42393, sum loss: 5058.615723, avg loss: 2.895601, ppl: 18.094378 +epoch: 0, batch: 42394, sum loss: 4549.071289, avg loss: 2.934885, ppl: 18.819334 +epoch: 0, batch: 42395, sum loss: 5035.015625, avg loss: 2.898685, ppl: 18.150261 +epoch: 0, batch: 42396, sum loss: 6255.971191, avg loss: 3.048719, ppl: 21.088312 +epoch: 0, batch: 42397, sum loss: 4583.687012, avg loss: 2.824206, ppl: 16.847569 +epoch: 0, batch: 42398, sum loss: 4557.808105, avg loss: 2.729226, ppl: 15.321030 +epoch: 0, batch: 42399, sum loss: 4681.876953, avg loss: 2.790153, ppl: 16.283512 +epoch: 0, batch: 42400, sum loss: 4923.804688, avg loss: 2.703902, ppl: 14.937899 +epoch: 0, batch: 42401, sum loss: 4785.077637, avg loss: 2.968410, ppl: 19.460962 +epoch: 0, batch: 42402, sum loss: 4913.284180, avg loss: 2.947381, ppl: 19.055981 +epoch: 0, batch: 42403, sum loss: 5158.292969, avg loss: 2.999007, ppl: 20.065611 +epoch: 0, batch: 42404, sum loss: 3747.750732, avg loss: 2.496836, ppl: 12.144009 +epoch: 0, batch: 42405, sum loss: 5637.293457, avg loss: 3.009767, ppl: 20.282675 +epoch: 0, batch: 42406, sum loss: 5847.224121, avg loss: 3.329854, ppl: 27.934277 +epoch: 0, batch: 42407, sum loss: 4148.923340, avg loss: 2.758593, ppl: 15.777622 +epoch: 0, batch: 42408, sum loss: 5648.968262, avg loss: 3.075105, ppl: 21.652164 +epoch: 0, batch: 42409, sum loss: 4664.292480, avg loss: 2.891688, ppl: 18.023706 +epoch: 0, batch: 42410, sum loss: 5028.812500, avg loss: 2.865420, ppl: 17.556427 +epoch: 0, batch: 42411, sum loss: 4753.530762, avg loss: 2.791269, ppl: 16.301691 +epoch: 0, batch: 42412, sum loss: 4126.071777, avg loss: 2.608136, ppl: 13.573729 +epoch: 0, batch: 42413, sum loss: 4896.637695, avg loss: 2.926861, ppl: 18.668928 +epoch: 0, batch: 42414, sum loss: 4629.985352, avg loss: 2.897363, ppl: 18.126278 +epoch: 0, batch: 42415, sum loss: 5736.417969, avg loss: 2.998650, ppl: 20.058441 +epoch: 0, batch: 42416, sum loss: 5089.667480, avg loss: 2.813526, ppl: 16.668583 +epoch: 0, batch: 42417, sum loss: 4525.915039, avg loss: 3.041610, ppl: 20.938917 +epoch: 0, batch: 42418, sum loss: 5240.461914, avg loss: 3.115613, ppl: 22.547258 +epoch: 0, batch: 42419, sum loss: 4147.043457, avg loss: 2.863980, ppl: 17.531168 +epoch: 0, batch: 42420, sum loss: 4539.821777, avg loss: 2.921378, ppl: 18.566862 +epoch: 0, batch: 42421, sum loss: 5005.802734, avg loss: 2.787195, ppl: 16.235422 +epoch: 0, batch: 42422, sum loss: 4623.901367, avg loss: 2.854260, ppl: 17.361588 +epoch: 0, batch: 42423, sum loss: 4949.995117, avg loss: 2.857965, ppl: 17.426025 +epoch: 0, batch: 42424, sum loss: 5375.645508, avg loss: 2.838250, ppl: 17.085842 +epoch: 0, batch: 42425, sum loss: 3480.747314, avg loss: 2.585994, ppl: 13.276473 +epoch: 0, batch: 42426, sum loss: 5082.644531, avg loss: 2.977530, ppl: 19.639257 +epoch: 0, batch: 42427, sum loss: 3891.883057, avg loss: 2.432427, ppl: 11.386483 +epoch: 0, batch: 42428, sum loss: 4464.869141, avg loss: 2.874996, ppl: 17.725361 +epoch: 0, batch: 42429, sum loss: 4299.585449, avg loss: 2.740335, ppl: 15.492174 +epoch: 0, batch: 42430, sum loss: 6166.617188, avg loss: 3.040738, ppl: 20.920685 +epoch: 0, batch: 42431, sum loss: 5768.328125, avg loss: 3.032770, ppl: 20.754642 +epoch: 0, batch: 42432, sum loss: 5751.449707, avg loss: 3.044706, ppl: 21.003857 +epoch: 0, batch: 42433, sum loss: 4539.399902, avg loss: 2.878504, ppl: 17.787636 +epoch: 0, batch: 42434, sum loss: 5159.510742, avg loss: 2.827129, ppl: 16.896883 +epoch: 0, batch: 42435, sum loss: 5736.311035, avg loss: 3.112486, ppl: 22.476845 +epoch: 0, batch: 42436, sum loss: 3940.643311, avg loss: 2.712074, ppl: 15.060475 +epoch: 0, batch: 42437, sum loss: 5096.520996, avg loss: 2.990916, ppl: 19.903906 +epoch: 0, batch: 42438, sum loss: 4775.088867, avg loss: 2.911639, ppl: 18.386919 +epoch: 0, batch: 42439, sum loss: 3783.636230, avg loss: 2.512375, ppl: 12.334185 +epoch: 0, batch: 42440, sum loss: 4846.710449, avg loss: 3.015999, ppl: 20.409472 +epoch: 0, batch: 42441, sum loss: 5540.419922, avg loss: 3.047536, ppl: 21.063374 +epoch: 0, batch: 42442, sum loss: 4286.261719, avg loss: 2.803311, ppl: 16.499180 +epoch: 0, batch: 42443, sum loss: 5370.596191, avg loss: 3.120625, ppl: 22.660543 +epoch: 0, batch: 42444, sum loss: 4258.321289, avg loss: 2.881138, ppl: 17.834551 +epoch: 0, batch: 42445, sum loss: 5399.591309, avg loss: 2.843387, ppl: 17.173828 +epoch: 0, batch: 42446, sum loss: 3977.783447, avg loss: 2.773908, ppl: 16.021116 +epoch: 0, batch: 42447, sum loss: 5058.368164, avg loss: 2.927296, ppl: 18.677067 +epoch: 0, batch: 42448, sum loss: 4725.546875, avg loss: 2.957163, ppl: 19.243307 +epoch: 0, batch: 42449, sum loss: 5159.735352, avg loss: 3.113902, ppl: 22.508699 +epoch: 0, batch: 42450, sum loss: 4406.494629, avg loss: 2.837408, ppl: 17.071461 +epoch: 0, batch: 42451, sum loss: 4419.851074, avg loss: 2.735056, ppl: 15.410612 +epoch: 0, batch: 42452, sum loss: 5322.220215, avg loss: 2.969989, ppl: 19.491707 +epoch: 0, batch: 42453, sum loss: 6269.943359, avg loss: 3.113179, ppl: 22.492445 +epoch: 0, batch: 42454, sum loss: 4491.953125, avg loss: 2.727355, ppl: 15.292377 +epoch: 0, batch: 42455, sum loss: 5294.964844, avg loss: 2.779509, ppl: 16.111109 +epoch: 0, batch: 42456, sum loss: 4262.434082, avg loss: 2.742879, ppl: 15.531639 +epoch: 0, batch: 42457, sum loss: 4448.094727, avg loss: 2.674741, ppl: 14.508595 +epoch: 0, batch: 42458, sum loss: 3510.812988, avg loss: 2.451685, ppl: 11.607892 +epoch: 0, batch: 42459, sum loss: 4438.372559, avg loss: 2.798470, ppl: 16.419498 +epoch: 0, batch: 42460, sum loss: 5221.005371, avg loss: 2.981728, ppl: 19.721863 +epoch: 0, batch: 42461, sum loss: 5586.656738, avg loss: 3.047822, ppl: 21.069395 +epoch: 0, batch: 42462, sum loss: 5919.733398, avg loss: 2.980732, ppl: 19.702229 +epoch: 0, batch: 42463, sum loss: 5709.955078, avg loss: 3.335254, ppl: 28.085514 +epoch: 0, batch: 42464, sum loss: 5375.408691, avg loss: 2.934175, ppl: 18.805983 +epoch: 0, batch: 42465, sum loss: 4513.892578, avg loss: 2.653670, ppl: 14.206077 +epoch: 0, batch: 42466, sum loss: 4152.549316, avg loss: 2.826786, ppl: 16.891090 +epoch: 0, batch: 42467, sum loss: 4525.385742, avg loss: 2.823073, ppl: 16.828480 +epoch: 0, batch: 42468, sum loss: 4881.478516, avg loss: 2.863037, ppl: 17.514645 +epoch: 0, batch: 42469, sum loss: 4499.084961, avg loss: 2.662180, ppl: 14.327495 +epoch: 0, batch: 42470, sum loss: 4992.698242, avg loss: 3.126298, ppl: 22.789461 +epoch: 0, batch: 42471, sum loss: 5087.306641, avg loss: 2.920383, ppl: 18.548386 +epoch: 0, batch: 42472, sum loss: 4952.605957, avg loss: 2.887817, ppl: 17.954067 +epoch: 0, batch: 42473, sum loss: 5577.045898, avg loss: 3.115668, ppl: 22.548489 +epoch: 0, batch: 42474, sum loss: 4917.138672, avg loss: 2.663672, ppl: 14.348881 +epoch: 0, batch: 42475, sum loss: 5063.632812, avg loss: 2.933738, ppl: 18.797773 +epoch: 0, batch: 42476, sum loss: 3829.007080, avg loss: 2.534088, ppl: 12.604932 +epoch: 0, batch: 42477, sum loss: 5497.457520, avg loss: 3.060945, ppl: 21.347727 +epoch: 0, batch: 42478, sum loss: 4952.416992, avg loss: 2.782257, ppl: 16.155439 +epoch: 0, batch: 42479, sum loss: 5793.543457, avg loss: 2.952876, ppl: 19.160988 +epoch: 0, batch: 42480, sum loss: 4298.291992, avg loss: 2.865528, ppl: 17.558319 +epoch: 0, batch: 42481, sum loss: 4787.325195, avg loss: 2.821052, ppl: 16.794510 +epoch: 0, batch: 42482, sum loss: 4625.317383, avg loss: 2.944187, ppl: 18.995211 +epoch: 0, batch: 42483, sum loss: 4560.723633, avg loss: 2.803149, ppl: 16.496511 +epoch: 0, batch: 42484, sum loss: 4463.038574, avg loss: 2.643980, ppl: 14.069091 +epoch: 0, batch: 42485, sum loss: 4645.994141, avg loss: 2.898312, ppl: 18.143490 +epoch: 0, batch: 42486, sum loss: 5107.925293, avg loss: 3.015304, ppl: 20.395292 +epoch: 0, batch: 42487, sum loss: 3507.083984, avg loss: 2.415347, ppl: 11.193655 +epoch: 0, batch: 42488, sum loss: 4302.939453, avg loss: 2.589013, ppl: 13.316620 +epoch: 0, batch: 42489, sum loss: 5024.414062, avg loss: 2.759151, ppl: 15.786434 +epoch: 0, batch: 42490, sum loss: 5059.125000, avg loss: 2.809064, ppl: 16.594385 +epoch: 0, batch: 42491, sum loss: 5531.119629, avg loss: 2.986566, ppl: 19.817505 +epoch: 0, batch: 42492, sum loss: 4079.988525, avg loss: 2.642480, ppl: 14.047998 +epoch: 0, batch: 42493, sum loss: 4577.731445, avg loss: 2.829253, ppl: 16.932806 +epoch: 0, batch: 42494, sum loss: 4565.833008, avg loss: 2.698483, ppl: 14.857173 +epoch: 0, batch: 42495, sum loss: 5597.952637, avg loss: 2.991958, ppl: 19.924650 +epoch: 0, batch: 42496, sum loss: 5282.105469, avg loss: 2.772759, ppl: 16.002720 +epoch: 0, batch: 42497, sum loss: 4735.386230, avg loss: 2.767613, ppl: 15.920592 +epoch: 0, batch: 42498, sum loss: 5515.223633, avg loss: 3.284826, ppl: 26.704346 +epoch: 0, batch: 42499, sum loss: 4732.354492, avg loss: 2.871574, ppl: 17.664808 +epoch: 0, batch: 42500, sum loss: 5388.933594, avg loss: 2.906652, ppl: 18.295446 +epoch: 0, batch: 42501, sum loss: 4567.923828, avg loss: 2.701315, ppl: 14.899314 +epoch: 0, batch: 42502, sum loss: 5081.044434, avg loss: 2.865789, ppl: 17.562912 +epoch: 0, batch: 42503, sum loss: 5017.809570, avg loss: 2.924132, ppl: 18.618052 +epoch: 0, batch: 42504, sum loss: 5668.979980, avg loss: 2.768057, ppl: 15.927650 +epoch: 0, batch: 42505, sum loss: 5771.171387, avg loss: 3.238592, ppl: 25.497805 +epoch: 0, batch: 42506, sum loss: 5021.326172, avg loss: 2.870970, ppl: 17.654131 +epoch: 0, batch: 42507, sum loss: 5157.579102, avg loss: 3.005582, ppl: 20.197975 +epoch: 0, batch: 42508, sum loss: 4756.440430, avg loss: 2.693341, ppl: 14.780977 +epoch: 0, batch: 42509, sum loss: 4120.006836, avg loss: 2.787555, ppl: 16.241268 +epoch: 0, batch: 42510, sum loss: 5096.190430, avg loss: 2.928845, ppl: 18.706017 +epoch: 0, batch: 42511, sum loss: 4059.044434, avg loss: 2.789721, ppl: 16.276482 +epoch: 0, batch: 42512, sum loss: 4969.839355, avg loss: 2.815773, ppl: 16.706085 +epoch: 0, batch: 42513, sum loss: 4692.489258, avg loss: 2.693737, ppl: 14.786824 +epoch: 0, batch: 42514, sum loss: 4306.993652, avg loss: 2.877083, ppl: 17.762386 +epoch: 0, batch: 42515, sum loss: 4083.773926, avg loss: 2.702696, ppl: 14.919903 +epoch: 0, batch: 42516, sum loss: 4724.924316, avg loss: 2.931094, ppl: 18.748137 +epoch: 0, batch: 42517, sum loss: 4553.797852, avg loss: 2.814461, ppl: 16.684181 +epoch: 0, batch: 42518, sum loss: 5202.365234, avg loss: 2.838170, ppl: 17.084469 +epoch: 0, batch: 42519, sum loss: 4793.888672, avg loss: 2.915991, ppl: 18.467096 +epoch: 0, batch: 42520, sum loss: 5181.313965, avg loss: 2.886526, ppl: 17.930906 +epoch: 0, batch: 42521, sum loss: 3960.564453, avg loss: 2.765758, ppl: 15.891074 +epoch: 0, batch: 42522, sum loss: 3460.955811, avg loss: 2.656144, ppl: 14.241271 +epoch: 0, batch: 42523, sum loss: 4513.364258, avg loss: 2.805074, ppl: 16.528299 +epoch: 0, batch: 42524, sum loss: 4913.754883, avg loss: 2.740521, ppl: 15.495063 +epoch: 0, batch: 42525, sum loss: 5820.662598, avg loss: 3.194656, ppl: 24.401770 +epoch: 0, batch: 42526, sum loss: 4822.307129, avg loss: 2.922610, ppl: 18.589748 +epoch: 0, batch: 42527, sum loss: 5331.894043, avg loss: 3.136408, ppl: 23.021034 +epoch: 0, batch: 42528, sum loss: 4406.006348, avg loss: 2.818942, ppl: 16.759111 +epoch: 0, batch: 42529, sum loss: 4341.619629, avg loss: 2.758335, ppl: 15.773560 +epoch: 0, batch: 42530, sum loss: 3920.422363, avg loss: 2.470335, ppl: 11.826414 +epoch: 0, batch: 42531, sum loss: 4784.406250, avg loss: 2.674347, ppl: 14.502872 +epoch: 0, batch: 42532, sum loss: 3687.028076, avg loss: 2.814525, ppl: 16.685255 +epoch: 0, batch: 42533, sum loss: 5678.020508, avg loss: 3.130111, ppl: 22.876513 +epoch: 0, batch: 42534, sum loss: 4787.763184, avg loss: 2.863495, ppl: 17.522659 +epoch: 0, batch: 42535, sum loss: 4337.689453, avg loss: 2.989448, ppl: 19.874714 +epoch: 0, batch: 42536, sum loss: 4673.554199, avg loss: 2.931966, ppl: 18.764490 +epoch: 0, batch: 42537, sum loss: 4905.770508, avg loss: 2.860507, ppl: 17.470390 +epoch: 0, batch: 42538, sum loss: 4281.397461, avg loss: 2.839123, ppl: 17.100761 +epoch: 0, batch: 42539, sum loss: 4225.218262, avg loss: 2.625990, ppl: 13.818250 +epoch: 0, batch: 42540, sum loss: 5980.494141, avg loss: 3.203264, ppl: 24.612736 +epoch: 0, batch: 42541, sum loss: 5345.309082, avg loss: 2.935370, ppl: 18.828472 +epoch: 0, batch: 42542, sum loss: 4695.285645, avg loss: 2.891186, ppl: 18.014658 +epoch: 0, batch: 42543, sum loss: 5026.132324, avg loss: 2.787650, ppl: 16.242798 +epoch: 0, batch: 42544, sum loss: 6427.354980, avg loss: 3.213678, ppl: 24.870382 +epoch: 0, batch: 42545, sum loss: 4895.102539, avg loss: 2.905105, ppl: 18.267168 +epoch: 0, batch: 42546, sum loss: 4164.864258, avg loss: 2.793336, ppl: 16.335430 +epoch: 0, batch: 42547, sum loss: 4617.988770, avg loss: 2.740646, ppl: 15.496995 +epoch: 0, batch: 42548, sum loss: 4430.748047, avg loss: 2.701676, ppl: 14.904686 +epoch: 0, batch: 42549, sum loss: 4415.286133, avg loss: 2.806921, ppl: 16.558847 +epoch: 0, batch: 42550, sum loss: 3757.831787, avg loss: 2.589822, ppl: 13.327400 +epoch: 0, batch: 42551, sum loss: 4915.748047, avg loss: 2.871348, ppl: 17.660812 +epoch: 0, batch: 42552, sum loss: 5154.920898, avg loss: 2.793995, ppl: 16.346191 +epoch: 0, batch: 42553, sum loss: 4547.714355, avg loss: 2.844099, ppl: 17.186068 +epoch: 0, batch: 42554, sum loss: 4939.220703, avg loss: 2.866640, ppl: 17.577858 +epoch: 0, batch: 42555, sum loss: 5884.649414, avg loss: 2.993209, ppl: 19.949606 +epoch: 0, batch: 42556, sum loss: 4586.854004, avg loss: 2.766498, ppl: 15.902850 +epoch: 0, batch: 42557, sum loss: 6175.281738, avg loss: 3.176585, ppl: 23.964785 +epoch: 0, batch: 42558, sum loss: 5850.978027, avg loss: 2.935764, ppl: 18.835894 +epoch: 0, batch: 42559, sum loss: 3926.246582, avg loss: 2.770816, ppl: 15.971666 +epoch: 0, batch: 42560, sum loss: 4239.848633, avg loss: 2.785709, ppl: 16.211306 +epoch: 0, batch: 42561, sum loss: 4023.447510, avg loss: 2.928273, ppl: 18.695324 +epoch: 0, batch: 42562, sum loss: 4728.038574, avg loss: 2.672718, ppl: 14.479275 +epoch: 0, batch: 42563, sum loss: 5755.076660, avg loss: 3.311321, ppl: 27.421339 +epoch: 0, batch: 42564, sum loss: 4917.524414, avg loss: 2.719870, ppl: 15.178343 +epoch: 0, batch: 42565, sum loss: 4955.833496, avg loss: 2.699256, ppl: 14.868661 +epoch: 0, batch: 42566, sum loss: 4886.448242, avg loss: 2.945418, ppl: 19.018608 +epoch: 0, batch: 42567, sum loss: 4579.776367, avg loss: 2.782367, ppl: 16.157223 +epoch: 0, batch: 42568, sum loss: 5250.145996, avg loss: 2.915128, ppl: 18.451178 +epoch: 0, batch: 42569, sum loss: 5553.002930, avg loss: 3.057821, ppl: 21.281130 +epoch: 0, batch: 42570, sum loss: 4928.654297, avg loss: 2.845643, ppl: 17.212627 +epoch: 0, batch: 42571, sum loss: 4576.840820, avg loss: 2.889420, ppl: 17.982872 +epoch: 0, batch: 42572, sum loss: 3755.230713, avg loss: 2.624200, ppl: 13.793540 +epoch: 0, batch: 42573, sum loss: 5507.899902, avg loss: 2.808720, ppl: 16.588673 +epoch: 0, batch: 42574, sum loss: 4833.519531, avg loss: 3.324291, ppl: 27.779303 +epoch: 0, batch: 42575, sum loss: 5337.157715, avg loss: 2.968386, ppl: 19.460480 +epoch: 0, batch: 42576, sum loss: 4038.107666, avg loss: 2.618747, ppl: 13.718520 +epoch: 0, batch: 42577, sum loss: 4478.029785, avg loss: 2.712314, ppl: 15.064088 +epoch: 0, batch: 42578, sum loss: 4025.500977, avg loss: 2.435270, ppl: 11.418900 +epoch: 0, batch: 42579, sum loss: 4205.674805, avg loss: 2.689050, ppl: 14.717694 +epoch: 0, batch: 42580, sum loss: 4470.769531, avg loss: 2.761439, ppl: 15.822593 +epoch: 0, batch: 42581, sum loss: 5724.261719, avg loss: 3.006440, ppl: 20.215309 +epoch: 0, batch: 42582, sum loss: 4529.347656, avg loss: 2.760114, ppl: 15.801647 +epoch: 0, batch: 42583, sum loss: 5057.260254, avg loss: 2.943691, ppl: 18.985785 +epoch: 0, batch: 42584, sum loss: 4443.182617, avg loss: 2.782206, ppl: 16.154615 +epoch: 0, batch: 42585, sum loss: 4964.252930, avg loss: 2.911585, ppl: 18.385923 +epoch: 0, batch: 42586, sum loss: 5696.201172, avg loss: 2.979185, ppl: 19.671770 +epoch: 0, batch: 42587, sum loss: 5491.218262, avg loss: 3.091902, ppl: 22.018925 +epoch: 0, batch: 42588, sum loss: 4976.118164, avg loss: 3.015829, ppl: 20.406008 +epoch: 0, batch: 42589, sum loss: 4568.288574, avg loss: 3.019358, ppl: 20.478144 +epoch: 0, batch: 42590, sum loss: 4397.390137, avg loss: 2.894924, ppl: 18.082134 +epoch: 0, batch: 42591, sum loss: 4910.362793, avg loss: 3.016193, ppl: 20.413437 +epoch: 0, batch: 42592, sum loss: 4645.019531, avg loss: 2.773146, ppl: 16.008921 +epoch: 0, batch: 42593, sum loss: 4934.136230, avg loss: 2.863689, ppl: 17.526064 +epoch: 0, batch: 42594, sum loss: 5409.577148, avg loss: 2.975565, ppl: 19.600693 +epoch: 0, batch: 42595, sum loss: 5296.345703, avg loss: 3.146967, ppl: 23.265398 +epoch: 0, batch: 42596, sum loss: 4701.187500, avg loss: 2.580235, ppl: 13.200236 +epoch: 0, batch: 42597, sum loss: 4578.796875, avg loss: 2.831662, ppl: 16.973642 +epoch: 0, batch: 42598, sum loss: 4727.785156, avg loss: 2.945660, ppl: 19.023220 +epoch: 0, batch: 42599, sum loss: 4509.426758, avg loss: 2.846860, ppl: 17.233591 +epoch: 0, batch: 42600, sum loss: 5552.107910, avg loss: 3.072555, ppl: 21.597019 +epoch: 0, batch: 42601, sum loss: 3964.447510, avg loss: 2.835799, ppl: 17.044022 +epoch: 0, batch: 42602, sum loss: 4411.470215, avg loss: 2.483936, ppl: 11.988358 +epoch: 0, batch: 42603, sum loss: 4614.808594, avg loss: 2.918917, ppl: 18.521225 +epoch: 0, batch: 42604, sum loss: 6007.360840, avg loss: 2.927564, ppl: 18.682060 +epoch: 0, batch: 42605, sum loss: 6229.516602, avg loss: 3.158984, ppl: 23.546659 +epoch: 0, batch: 42606, sum loss: 5017.759277, avg loss: 2.811070, ppl: 16.627695 +epoch: 0, batch: 42607, sum loss: 4790.477051, avg loss: 2.742116, ppl: 15.519794 +epoch: 0, batch: 42608, sum loss: 4383.229492, avg loss: 2.947700, ppl: 19.062052 +epoch: 0, batch: 42609, sum loss: 4313.859863, avg loss: 2.671121, ppl: 14.456161 +epoch: 0, batch: 42610, sum loss: 5494.106445, avg loss: 3.119879, ppl: 22.643629 +epoch: 0, batch: 42611, sum loss: 5043.517578, avg loss: 3.040095, ppl: 20.907227 +epoch: 0, batch: 42612, sum loss: 3623.153564, avg loss: 2.600972, ppl: 13.476827 +epoch: 0, batch: 42613, sum loss: 5746.644043, avg loss: 3.308373, ppl: 27.340612 +epoch: 0, batch: 42614, sum loss: 4801.107910, avg loss: 2.982054, ppl: 19.728306 +epoch: 0, batch: 42615, sum loss: 4825.696289, avg loss: 2.988047, ppl: 19.846891 +epoch: 0, batch: 42616, sum loss: 4998.398926, avg loss: 2.833560, ppl: 17.005886 +epoch: 0, batch: 42617, sum loss: 4530.633789, avg loss: 3.028499, ppl: 20.666182 +epoch: 0, batch: 42618, sum loss: 4844.897949, avg loss: 2.798901, ppl: 16.426586 +epoch: 0, batch: 42619, sum loss: 4413.755859, avg loss: 2.611690, ppl: 13.622050 +epoch: 0, batch: 42620, sum loss: 4701.032227, avg loss: 2.760442, ppl: 15.806825 +epoch: 0, batch: 42621, sum loss: 4396.110840, avg loss: 2.653054, ppl: 14.197334 +epoch: 0, batch: 42622, sum loss: 5780.980469, avg loss: 3.231403, ppl: 25.315159 +epoch: 0, batch: 42623, sum loss: 4545.619141, avg loss: 2.895299, ppl: 18.088909 +epoch: 0, batch: 42624, sum loss: 4651.411133, avg loss: 2.864169, ppl: 17.534483 +epoch: 0, batch: 42625, sum loss: 4235.167969, avg loss: 2.916782, ppl: 18.481724 +epoch: 0, batch: 42626, sum loss: 4969.036133, avg loss: 2.888974, ppl: 17.974865 +epoch: 0, batch: 42627, sum loss: 5957.828125, avg loss: 2.939234, ppl: 18.901367 +epoch: 0, batch: 42628, sum loss: 4989.677734, avg loss: 3.020386, ppl: 20.499207 +epoch: 0, batch: 42629, sum loss: 4756.424805, avg loss: 3.037308, ppl: 20.849047 +epoch: 0, batch: 42630, sum loss: 4960.331055, avg loss: 2.800864, ppl: 16.458868 +epoch: 0, batch: 42631, sum loss: 5417.971680, avg loss: 3.140853, ppl: 23.123592 +epoch: 0, batch: 42632, sum loss: 5257.314453, avg loss: 3.009338, ppl: 20.273981 +epoch: 0, batch: 42633, sum loss: 4482.979492, avg loss: 2.899728, ppl: 18.169205 +epoch: 0, batch: 42634, sum loss: 4737.972168, avg loss: 3.013977, ppl: 20.368250 +epoch: 0, batch: 42635, sum loss: 4761.389648, avg loss: 2.887441, ppl: 17.947317 +epoch: 0, batch: 42636, sum loss: 5001.234375, avg loss: 2.849706, ppl: 17.282703 +epoch: 0, batch: 42637, sum loss: 3903.854248, avg loss: 2.575102, ppl: 13.132655 +epoch: 0, batch: 42638, sum loss: 3520.475098, avg loss: 2.718513, ppl: 15.157773 +epoch: 0, batch: 42639, sum loss: 5861.231445, avg loss: 3.107758, ppl: 22.370834 +epoch: 0, batch: 42640, sum loss: 6057.998047, avg loss: 3.070450, ppl: 21.551605 +epoch: 0, batch: 42641, sum loss: 5097.576172, avg loss: 3.080106, ppl: 21.760719 +epoch: 0, batch: 42642, sum loss: 4801.579102, avg loss: 2.927792, ppl: 18.686327 +epoch: 0, batch: 42643, sum loss: 5818.872070, avg loss: 3.184933, ppl: 24.165663 +epoch: 0, batch: 42644, sum loss: 5213.146484, avg loss: 3.052193, ppl: 21.161709 +epoch: 0, batch: 42645, sum loss: 4525.417969, avg loss: 2.772928, ppl: 16.005426 +epoch: 0, batch: 42646, sum loss: 5426.373535, avg loss: 2.923693, ppl: 18.609882 +epoch: 0, batch: 42647, sum loss: 4846.743652, avg loss: 2.939202, ppl: 18.900749 +epoch: 0, batch: 42648, sum loss: 4898.399902, avg loss: 2.727394, ppl: 15.292987 +epoch: 0, batch: 42649, sum loss: 5639.809570, avg loss: 2.957425, ppl: 19.248346 +epoch: 0, batch: 42650, sum loss: 4263.627930, avg loss: 2.683215, ppl: 14.632055 +epoch: 0, batch: 42651, sum loss: 5804.895020, avg loss: 3.031277, ppl: 20.723673 +epoch: 0, batch: 42652, sum loss: 4760.932129, avg loss: 2.815454, ppl: 16.700752 +epoch: 0, batch: 42653, sum loss: 3955.156006, avg loss: 2.605505, ppl: 13.538057 +epoch: 0, batch: 42654, sum loss: 5071.253418, avg loss: 2.943269, ppl: 18.977793 +epoch: 0, batch: 42655, sum loss: 3083.269043, avg loss: 2.439295, ppl: 11.464958 +epoch: 0, batch: 42656, sum loss: 3906.070801, avg loss: 2.578265, ppl: 13.174257 +epoch: 0, batch: 42657, sum loss: 4841.395996, avg loss: 2.811496, ppl: 16.634781 +epoch: 0, batch: 42658, sum loss: 5538.414551, avg loss: 2.918027, ppl: 18.504740 +epoch: 0, batch: 42659, sum loss: 4665.739746, avg loss: 2.800564, ppl: 16.453924 +epoch: 0, batch: 42660, sum loss: 5024.400391, avg loss: 2.933100, ppl: 18.785776 +epoch: 0, batch: 42661, sum loss: 4870.280273, avg loss: 3.021266, ppl: 20.517244 +epoch: 0, batch: 42662, sum loss: 4244.085938, avg loss: 2.642644, ppl: 14.050300 +epoch: 0, batch: 42663, sum loss: 5878.129883, avg loss: 2.956806, ppl: 19.236427 +epoch: 0, batch: 42664, sum loss: 4918.342285, avg loss: 3.077811, ppl: 21.710831 +epoch: 0, batch: 42665, sum loss: 4690.216797, avg loss: 2.711108, ppl: 15.045937 +epoch: 0, batch: 42666, sum loss: 5115.865723, avg loss: 2.784903, ppl: 16.198240 +epoch: 0, batch: 42667, sum loss: 5679.881348, avg loss: 3.032505, ppl: 20.749140 +epoch: 0, batch: 42668, sum loss: 5028.562500, avg loss: 2.880047, ppl: 17.815117 +epoch: 0, batch: 42669, sum loss: 5343.311035, avg loss: 2.846730, ppl: 17.231335 +epoch: 0, batch: 42670, sum loss: 4947.717773, avg loss: 2.787446, ppl: 16.239500 +epoch: 0, batch: 42671, sum loss: 5657.872070, avg loss: 2.940682, ppl: 18.928751 +epoch: 0, batch: 42672, sum loss: 4376.757812, avg loss: 2.845746, ppl: 17.214401 +epoch: 0, batch: 42673, sum loss: 4545.932617, avg loss: 2.990745, ppl: 19.900509 +epoch: 0, batch: 42674, sum loss: 4788.159180, avg loss: 2.770926, ppl: 15.973411 +epoch: 0, batch: 42675, sum loss: 3758.332031, avg loss: 2.493917, ppl: 12.108607 +epoch: 0, batch: 42676, sum loss: 4979.350586, avg loss: 2.894971, ppl: 18.082983 +epoch: 0, batch: 42677, sum loss: 5078.531738, avg loss: 2.838754, ppl: 17.094460 +epoch: 0, batch: 42678, sum loss: 4743.756836, avg loss: 2.669531, ppl: 14.433200 +epoch: 0, batch: 42679, sum loss: 4667.260254, avg loss: 2.616177, ppl: 13.683316 +epoch: 0, batch: 42680, sum loss: 4589.069824, avg loss: 2.859233, ppl: 17.448145 +epoch: 0, batch: 42681, sum loss: 5725.555176, avg loss: 2.809399, ppl: 16.599941 +epoch: 0, batch: 42682, sum loss: 4494.789551, avg loss: 2.595144, ppl: 13.398517 +epoch: 0, batch: 42683, sum loss: 4100.523926, avg loss: 2.655780, ppl: 14.236084 +epoch: 0, batch: 42684, sum loss: 5259.974609, avg loss: 2.909278, ppl: 18.343548 +epoch: 0, batch: 42685, sum loss: 4429.134766, avg loss: 2.663340, ppl: 14.344123 +epoch: 0, batch: 42686, sum loss: 4874.095703, avg loss: 2.995756, ppl: 20.000483 +epoch: 0, batch: 42687, sum loss: 5048.180664, avg loss: 3.050260, ppl: 21.120842 +epoch: 0, batch: 42688, sum loss: 4010.062500, avg loss: 2.704021, ppl: 14.939680 +epoch: 0, batch: 42689, sum loss: 5417.089844, avg loss: 3.074399, ppl: 21.636864 +epoch: 0, batch: 42690, sum loss: 5315.723145, avg loss: 3.037556, ppl: 20.854212 +epoch: 0, batch: 42691, sum loss: 5030.946777, avg loss: 2.914801, ppl: 18.445143 +epoch: 0, batch: 42692, sum loss: 5206.497070, avg loss: 3.062645, ppl: 21.384052 +epoch: 0, batch: 42693, sum loss: 4762.486816, avg loss: 3.029572, ppl: 20.688372 +epoch: 0, batch: 42694, sum loss: 4696.776367, avg loss: 2.855183, ppl: 17.377619 +epoch: 0, batch: 42695, sum loss: 6174.763184, avg loss: 3.234554, ppl: 25.395039 +epoch: 0, batch: 42696, sum loss: 5210.813965, avg loss: 3.094308, ppl: 22.071953 +epoch: 0, batch: 42697, sum loss: 4717.219727, avg loss: 2.769947, ppl: 15.957789 +epoch: 0, batch: 42698, sum loss: 4583.091797, avg loss: 2.777632, ppl: 16.080889 +epoch: 0, batch: 42699, sum loss: 4479.087402, avg loss: 3.018253, ppl: 20.455521 +epoch: 0, batch: 42700, sum loss: 5464.321777, avg loss: 3.229505, ppl: 25.267136 +epoch: 0, batch: 42701, sum loss: 5307.323730, avg loss: 2.805139, ppl: 16.529379 +epoch: 0, batch: 42702, sum loss: 5305.501953, avg loss: 2.932837, ppl: 18.780836 +epoch: 0, batch: 42703, sum loss: 5222.497559, avg loss: 3.022279, ppl: 20.538040 +epoch: 0, batch: 42704, sum loss: 4720.824707, avg loss: 2.655132, ppl: 14.226865 +epoch: 0, batch: 42705, sum loss: 3979.337891, avg loss: 2.856668, ppl: 17.403433 +epoch: 0, batch: 42706, sum loss: 4661.067871, avg loss: 2.601043, ppl: 13.477781 +epoch: 0, batch: 42707, sum loss: 4668.842773, avg loss: 2.733514, ppl: 15.386867 +epoch: 0, batch: 42708, sum loss: 3859.692383, avg loss: 2.557782, ppl: 12.907153 +epoch: 0, batch: 42709, sum loss: 5246.497559, avg loss: 2.863809, ppl: 17.528162 +epoch: 0, batch: 42710, sum loss: 4391.715332, avg loss: 2.686064, ppl: 14.673810 +epoch: 0, batch: 42711, sum loss: 4994.110840, avg loss: 2.898497, ppl: 18.146856 +epoch: 0, batch: 42712, sum loss: 3787.376465, avg loss: 2.659674, ppl: 14.291636 +epoch: 0, batch: 42713, sum loss: 4625.473633, avg loss: 2.687666, ppl: 14.697335 +epoch: 0, batch: 42714, sum loss: 5639.581055, avg loss: 3.071667, ppl: 21.577847 +epoch: 0, batch: 42715, sum loss: 4494.413086, avg loss: 2.842766, ppl: 17.163170 +epoch: 0, batch: 42716, sum loss: 4308.224609, avg loss: 2.774131, ppl: 16.024689 +epoch: 0, batch: 42717, sum loss: 4110.500000, avg loss: 2.872467, ppl: 17.680578 +epoch: 0, batch: 42718, sum loss: 4935.532227, avg loss: 2.859520, ppl: 17.453156 +epoch: 0, batch: 42719, sum loss: 5162.178711, avg loss: 3.140011, ppl: 23.104128 +epoch: 0, batch: 42720, sum loss: 4498.929199, avg loss: 2.649546, ppl: 14.147616 +epoch: 0, batch: 42721, sum loss: 5533.935547, avg loss: 2.870299, ppl: 17.642286 +epoch: 0, batch: 42722, sum loss: 3970.587158, avg loss: 2.710298, ppl: 15.033759 +epoch: 0, batch: 42723, sum loss: 5400.349609, avg loss: 2.877118, ppl: 17.763000 +epoch: 0, batch: 42724, sum loss: 4478.324219, avg loss: 2.836177, ppl: 17.050463 +epoch: 0, batch: 42725, sum loss: 6009.977539, avg loss: 3.011011, ppl: 20.307919 +epoch: 0, batch: 42726, sum loss: 4895.643555, avg loss: 3.067446, ppl: 21.486950 +epoch: 0, batch: 42727, sum loss: 5074.384766, avg loss: 2.879901, ppl: 17.812504 +epoch: 0, batch: 42728, sum loss: 5380.664551, avg loss: 2.812684, ppl: 16.654560 +epoch: 0, batch: 42729, sum loss: 4092.387207, avg loss: 2.674763, ppl: 14.508910 +epoch: 0, batch: 42730, sum loss: 5219.346680, avg loss: 2.801582, ppl: 16.470676 +epoch: 0, batch: 42731, sum loss: 5290.117676, avg loss: 3.283748, ppl: 26.675564 +epoch: 0, batch: 42732, sum loss: 5205.044434, avg loss: 3.013923, ppl: 20.367138 +epoch: 0, batch: 42733, sum loss: 5996.705078, avg loss: 3.215391, ppl: 24.913040 +epoch: 0, batch: 42734, sum loss: 5455.590820, avg loss: 2.964995, ppl: 19.394604 +epoch: 0, batch: 42735, sum loss: 5141.681152, avg loss: 2.923071, ppl: 18.598309 +epoch: 0, batch: 42736, sum loss: 5372.888184, avg loss: 2.947278, ppl: 19.054024 +epoch: 0, batch: 42737, sum loss: 4972.492676, avg loss: 2.661934, ppl: 14.323963 +epoch: 0, batch: 42738, sum loss: 5244.088867, avg loss: 2.807328, ppl: 16.565596 +epoch: 0, batch: 42739, sum loss: 4395.757324, avg loss: 2.740497, ppl: 15.494686 +epoch: 0, batch: 42740, sum loss: 5304.042969, avg loss: 2.887340, ppl: 17.945507 +epoch: 0, batch: 42741, sum loss: 5448.927246, avg loss: 2.813076, ppl: 16.661081 +epoch: 0, batch: 42742, sum loss: 5356.550781, avg loss: 2.810362, ppl: 16.615938 +epoch: 0, batch: 42743, sum loss: 4284.202637, avg loss: 2.708092, ppl: 15.000634 +epoch: 0, batch: 42744, sum loss: 4716.581543, avg loss: 2.814190, ppl: 16.679655 +epoch: 0, batch: 42745, sum loss: 4569.986328, avg loss: 2.872399, ppl: 17.679373 +epoch: 0, batch: 42746, sum loss: 4998.791504, avg loss: 2.901214, ppl: 18.196220 +epoch: 0, batch: 42747, sum loss: 4118.443359, avg loss: 2.640028, ppl: 14.013593 +epoch: 0, batch: 42748, sum loss: 4199.449219, avg loss: 2.659563, ppl: 14.290037 +epoch: 0, batch: 42749, sum loss: 4037.106445, avg loss: 2.643816, ppl: 14.066773 +epoch: 0, batch: 42750, sum loss: 4038.537109, avg loss: 2.582185, ppl: 13.226003 +epoch: 0, batch: 42751, sum loss: 5389.519531, avg loss: 2.888274, ppl: 17.962278 +epoch: 0, batch: 42752, sum loss: 5269.895020, avg loss: 3.033906, ppl: 20.778238 +epoch: 0, batch: 42753, sum loss: 3445.091309, avg loss: 2.654153, ppl: 14.212948 +epoch: 0, batch: 42754, sum loss: 4109.505371, avg loss: 2.847890, ppl: 17.251341 +epoch: 0, batch: 42755, sum loss: 4640.168945, avg loss: 2.659122, ppl: 14.283750 +epoch: 0, batch: 42756, sum loss: 4534.737793, avg loss: 2.651893, ppl: 14.180863 +epoch: 0, batch: 42757, sum loss: 5468.183594, avg loss: 3.009457, ppl: 20.276388 +epoch: 0, batch: 42758, sum loss: 4045.800781, avg loss: 2.542929, ppl: 12.716869 +epoch: 0, batch: 42759, sum loss: 4527.412598, avg loss: 2.842067, ppl: 17.151180 +epoch: 0, batch: 42760, sum loss: 5262.963379, avg loss: 2.936922, ppl: 18.857706 +epoch: 0, batch: 42761, sum loss: 5031.642578, avg loss: 3.053181, ppl: 21.182623 +epoch: 0, batch: 42762, sum loss: 5161.902832, avg loss: 3.038201, ppl: 20.867666 +epoch: 0, batch: 42763, sum loss: 5623.131348, avg loss: 3.018321, ppl: 20.456907 +epoch: 0, batch: 42764, sum loss: 4348.484375, avg loss: 2.930245, ppl: 18.732227 +epoch: 0, batch: 42765, sum loss: 4322.379883, avg loss: 2.826933, ppl: 16.893562 +epoch: 0, batch: 42766, sum loss: 4471.204590, avg loss: 2.695120, ppl: 14.807301 +epoch: 0, batch: 42767, sum loss: 4703.377930, avg loss: 2.952528, ppl: 19.154324 +epoch: 0, batch: 42768, sum loss: 4830.177246, avg loss: 3.032126, ppl: 20.741291 +epoch: 0, batch: 42769, sum loss: 4956.748047, avg loss: 2.860212, ppl: 17.465235 +epoch: 0, batch: 42770, sum loss: 5128.024902, avg loss: 2.845741, ppl: 17.214306 +epoch: 0, batch: 42771, sum loss: 4962.485840, avg loss: 2.749300, ppl: 15.631679 +epoch: 0, batch: 42772, sum loss: 5354.698242, avg loss: 3.051110, ppl: 21.138796 +epoch: 0, batch: 42773, sum loss: 4115.118164, avg loss: 2.727050, ppl: 15.287719 +epoch: 0, batch: 42774, sum loss: 5261.328613, avg loss: 3.032466, ppl: 20.748339 +epoch: 0, batch: 42775, sum loss: 4327.149902, avg loss: 2.921776, ppl: 18.574244 +epoch: 0, batch: 42776, sum loss: 5240.227051, avg loss: 2.942295, ppl: 18.959305 +epoch: 0, batch: 42777, sum loss: 5783.368164, avg loss: 3.249083, ppl: 25.766708 +epoch: 0, batch: 42778, sum loss: 4127.861816, avg loss: 2.541787, ppl: 12.702346 +epoch: 0, batch: 42779, sum loss: 4742.899414, avg loss: 2.614608, ppl: 13.661864 +epoch: 0, batch: 42780, sum loss: 4615.600586, avg loss: 2.956823, ppl: 19.236761 +epoch: 0, batch: 42781, sum loss: 5118.522949, avg loss: 3.061318, ppl: 21.355679 +epoch: 0, batch: 42782, sum loss: 5453.352539, avg loss: 3.060243, ppl: 21.332739 +epoch: 0, batch: 42783, sum loss: 4791.643066, avg loss: 2.641479, ppl: 14.033948 +epoch: 0, batch: 42784, sum loss: 5150.808105, avg loss: 2.918305, ppl: 18.509886 +epoch: 0, batch: 42785, sum loss: 5382.157227, avg loss: 3.013526, ppl: 20.359058 +epoch: 0, batch: 42786, sum loss: 5043.228516, avg loss: 2.868731, ppl: 17.614643 +epoch: 0, batch: 42787, sum loss: 4815.242188, avg loss: 2.921870, ppl: 18.575996 +epoch: 0, batch: 42788, sum loss: 5725.468262, avg loss: 3.035773, ppl: 20.817068 +epoch: 0, batch: 42789, sum loss: 5140.423828, avg loss: 3.105996, ppl: 22.331459 +epoch: 0, batch: 42790, sum loss: 4344.455078, avg loss: 2.676805, ppl: 14.538572 +epoch: 0, batch: 42791, sum loss: 5001.240723, avg loss: 2.840000, ppl: 17.115772 +epoch: 0, batch: 42792, sum loss: 5496.328613, avg loss: 2.876153, ppl: 17.745872 +epoch: 0, batch: 42793, sum loss: 4430.402832, avg loss: 2.928224, ppl: 18.694401 +epoch: 0, batch: 42794, sum loss: 5852.663086, avg loss: 2.989103, ppl: 19.867849 +epoch: 0, batch: 42795, sum loss: 4122.286621, avg loss: 2.654402, ppl: 14.216486 +epoch: 0, batch: 42796, sum loss: 5740.408691, avg loss: 3.152339, ppl: 23.390705 +epoch: 0, batch: 42797, sum loss: 5556.333008, avg loss: 2.892417, ppl: 18.036856 +epoch: 0, batch: 42798, sum loss: 6053.986328, avg loss: 3.077776, ppl: 21.710075 +epoch: 0, batch: 42799, sum loss: 5976.746582, avg loss: 3.108032, ppl: 22.376974 +epoch: 0, batch: 42800, sum loss: 5554.549805, avg loss: 3.053628, ppl: 21.192095 +epoch: 0, batch: 42801, sum loss: 5122.986328, avg loss: 2.865205, ppl: 17.552649 +epoch: 0, batch: 42802, sum loss: 4593.416016, avg loss: 2.986616, ppl: 19.818510 +epoch: 0, batch: 42803, sum loss: 4151.801758, avg loss: 2.982616, ppl: 19.739391 +epoch: 0, batch: 42804, sum loss: 5303.949219, avg loss: 2.940105, ppl: 18.917831 +epoch: 0, batch: 42805, sum loss: 5605.756836, avg loss: 3.035061, ppl: 20.802244 +epoch: 0, batch: 42806, sum loss: 4957.425293, avg loss: 3.028360, ppl: 20.663321 +epoch: 0, batch: 42807, sum loss: 5263.210938, avg loss: 2.706021, ppl: 14.969594 +epoch: 0, batch: 42808, sum loss: 4388.880859, avg loss: 2.866676, ppl: 17.578491 +epoch: 0, batch: 42809, sum loss: 4478.208008, avg loss: 2.688000, ppl: 14.702242 +epoch: 0, batch: 42810, sum loss: 5552.738281, avg loss: 2.899602, ppl: 18.166916 +epoch: 0, batch: 42811, sum loss: 4268.709961, avg loss: 2.656322, ppl: 14.243811 +epoch: 0, batch: 42812, sum loss: 4667.762207, avg loss: 3.001776, ppl: 20.121250 +epoch: 0, batch: 42813, sum loss: 5655.616699, avg loss: 3.030877, ppl: 20.715389 +epoch: 0, batch: 42814, sum loss: 3974.824219, avg loss: 2.741258, ppl: 15.506482 +epoch: 0, batch: 42815, sum loss: 4277.857422, avg loss: 2.768840, ppl: 15.940130 +epoch: 0, batch: 42816, sum loss: 4321.689941, avg loss: 2.641620, ppl: 14.035922 +epoch: 0, batch: 42817, sum loss: 4529.039551, avg loss: 2.807836, ppl: 16.574015 +epoch: 0, batch: 42818, sum loss: 4301.241211, avg loss: 2.730947, ppl: 15.347411 +epoch: 0, batch: 42819, sum loss: 4489.061523, avg loss: 2.742249, ppl: 15.521854 +epoch: 0, batch: 42820, sum loss: 4561.314941, avg loss: 2.715069, ppl: 15.105646 +epoch: 0, batch: 42821, sum loss: 5730.858887, avg loss: 3.201597, ppl: 24.571739 +epoch: 0, batch: 42822, sum loss: 5731.965332, avg loss: 2.807035, ppl: 16.560741 +epoch: 0, batch: 42823, sum loss: 4558.407227, avg loss: 2.840129, ppl: 17.117975 +epoch: 0, batch: 42824, sum loss: 5895.310059, avg loss: 3.062499, ppl: 21.380913 +epoch: 0, batch: 42825, sum loss: 4857.207520, avg loss: 2.972587, ppl: 19.542418 +epoch: 0, batch: 42826, sum loss: 5085.779297, avg loss: 2.724038, ppl: 15.241746 +epoch: 0, batch: 42827, sum loss: 6350.056152, avg loss: 3.231581, ppl: 25.319649 +epoch: 0, batch: 42828, sum loss: 4662.008301, avg loss: 2.748826, ppl: 15.624275 +epoch: 0, batch: 42829, sum loss: 5325.843750, avg loss: 2.935967, ppl: 18.839712 +epoch: 0, batch: 42830, sum loss: 5072.572754, avg loss: 2.952604, ppl: 19.155762 +epoch: 0, batch: 42831, sum loss: 5045.546875, avg loss: 2.959265, ppl: 19.283798 +epoch: 0, batch: 42832, sum loss: 4763.290039, avg loss: 2.874647, ppl: 17.719166 +epoch: 0, batch: 42833, sum loss: 4401.079102, avg loss: 2.630651, ppl: 13.882805 +epoch: 0, batch: 42834, sum loss: 4810.016113, avg loss: 2.754878, ppl: 15.719116 +epoch: 0, batch: 42835, sum loss: 4994.597656, avg loss: 2.815444, ppl: 16.700584 +epoch: 0, batch: 42836, sum loss: 5695.733887, avg loss: 3.313400, ppl: 27.478388 +epoch: 0, batch: 42837, sum loss: 4082.664307, avg loss: 2.853015, ppl: 17.339983 +epoch: 0, batch: 42838, sum loss: 4556.402832, avg loss: 2.774910, ppl: 16.037191 +epoch: 0, batch: 42839, sum loss: 4910.288574, avg loss: 2.895217, ppl: 18.087433 +epoch: 0, batch: 42840, sum loss: 5352.224121, avg loss: 3.063666, ppl: 21.405880 +epoch: 0, batch: 42841, sum loss: 3983.518311, avg loss: 2.791534, ppl: 16.306009 +epoch: 0, batch: 42842, sum loss: 4879.080078, avg loss: 3.036142, ppl: 20.824749 +epoch: 0, batch: 42843, sum loss: 4576.011719, avg loss: 2.758295, ppl: 15.772928 +epoch: 0, batch: 42844, sum loss: 5179.122559, avg loss: 3.011118, ppl: 20.310087 +epoch: 0, batch: 42845, sum loss: 4386.413086, avg loss: 2.908762, ppl: 18.334087 +epoch: 0, batch: 42846, sum loss: 4368.591309, avg loss: 2.827567, ppl: 16.904289 +epoch: 0, batch: 42847, sum loss: 5609.472656, avg loss: 3.038718, ppl: 20.878450 +epoch: 0, batch: 42848, sum loss: 5606.559082, avg loss: 2.936909, ppl: 18.857468 +epoch: 0, batch: 42849, sum loss: 5820.032715, avg loss: 3.017124, ppl: 20.432447 +epoch: 0, batch: 42850, sum loss: 5117.302246, avg loss: 2.957978, ppl: 19.258991 +epoch: 0, batch: 42851, sum loss: 4653.559082, avg loss: 2.705558, ppl: 14.962657 +epoch: 0, batch: 42852, sum loss: 4819.725098, avg loss: 2.874016, ppl: 17.707993 +epoch: 0, batch: 42853, sum loss: 4572.580078, avg loss: 2.777995, ppl: 16.086740 +epoch: 0, batch: 42854, sum loss: 4774.036133, avg loss: 2.834939, ppl: 17.029354 +epoch: 0, batch: 42855, sum loss: 4627.879395, avg loss: 2.808179, ppl: 16.579706 +epoch: 0, batch: 42856, sum loss: 4700.294922, avg loss: 2.873041, ppl: 17.690733 +epoch: 0, batch: 42857, sum loss: 4964.810547, avg loss: 2.817713, ppl: 16.738531 +epoch: 0, batch: 42858, sum loss: 4799.000488, avg loss: 2.870216, ppl: 17.640827 +epoch: 0, batch: 42859, sum loss: 5664.865723, avg loss: 3.045627, ppl: 21.023207 +epoch: 0, batch: 42860, sum loss: 6421.030273, avg loss: 3.142942, ppl: 23.171938 +epoch: 0, batch: 42861, sum loss: 4684.871582, avg loss: 2.725347, ppl: 15.261713 +epoch: 0, batch: 42862, sum loss: 4185.600098, avg loss: 2.504848, ppl: 12.241693 +epoch: 0, batch: 42863, sum loss: 5347.622559, avg loss: 3.011049, ppl: 20.308689 +epoch: 0, batch: 42864, sum loss: 4773.294434, avg loss: 2.788139, ppl: 16.250751 +epoch: 0, batch: 42865, sum loss: 3704.170410, avg loss: 2.717660, ppl: 15.144841 +epoch: 0, batch: 42866, sum loss: 4077.410645, avg loss: 2.627198, ppl: 13.834947 +epoch: 0, batch: 42867, sum loss: 4936.360352, avg loss: 3.049018, ppl: 21.094622 +epoch: 0, batch: 42868, sum loss: 5042.962891, avg loss: 3.112940, ppl: 22.487066 +epoch: 0, batch: 42869, sum loss: 4678.257324, avg loss: 2.995043, ppl: 19.986219 +epoch: 0, batch: 42870, sum loss: 5467.748535, avg loss: 3.073495, ppl: 21.617332 +epoch: 0, batch: 42871, sum loss: 4861.958984, avg loss: 2.795836, ppl: 16.376318 +epoch: 0, batch: 42872, sum loss: 3970.687744, avg loss: 2.575024, ppl: 13.131638 +epoch: 0, batch: 42873, sum loss: 5802.134766, avg loss: 2.998519, ppl: 20.055817 +epoch: 0, batch: 42874, sum loss: 4423.841309, avg loss: 2.720690, ppl: 15.190793 +epoch: 0, batch: 42875, sum loss: 5162.736816, avg loss: 2.930044, ppl: 18.728449 +epoch: 0, batch: 42876, sum loss: 4909.435547, avg loss: 2.768999, ppl: 15.942673 +epoch: 0, batch: 42877, sum loss: 4599.094238, avg loss: 2.963334, ppl: 19.362421 +epoch: 0, batch: 42878, sum loss: 5460.759766, avg loss: 3.346054, ppl: 28.390478 +epoch: 0, batch: 42879, sum loss: 4833.348633, avg loss: 2.920452, ppl: 18.549677 +epoch: 0, batch: 42880, sum loss: 4903.014648, avg loss: 3.041572, ppl: 20.938139 +epoch: 0, batch: 42881, sum loss: 4563.695801, avg loss: 2.553831, ppl: 12.856260 +epoch: 0, batch: 42882, sum loss: 4130.186523, avg loss: 2.544785, ppl: 12.740492 +epoch: 0, batch: 42883, sum loss: 4793.145996, avg loss: 2.877038, ppl: 17.761589 +epoch: 0, batch: 42884, sum loss: 5504.438965, avg loss: 2.837340, ppl: 17.070295 +epoch: 0, batch: 42885, sum loss: 4534.864746, avg loss: 2.832520, ppl: 16.988222 +epoch: 0, batch: 42886, sum loss: 4669.884766, avg loss: 2.808109, ppl: 16.578535 +epoch: 0, batch: 42887, sum loss: 5470.973145, avg loss: 3.068409, ppl: 21.507662 +epoch: 0, batch: 42888, sum loss: 4087.251953, avg loss: 2.946829, ppl: 19.045465 +epoch: 0, batch: 42889, sum loss: 4379.662109, avg loss: 2.811080, ppl: 16.627863 +epoch: 0, batch: 42890, sum loss: 4345.245117, avg loss: 2.577251, ppl: 13.160909 +epoch: 0, batch: 42891, sum loss: 6056.566895, avg loss: 3.112316, ppl: 22.473034 +epoch: 0, batch: 42892, sum loss: 5050.325684, avg loss: 2.924335, ppl: 18.621830 +epoch: 0, batch: 42893, sum loss: 4104.128906, avg loss: 2.620772, ppl: 13.746330 +epoch: 0, batch: 42894, sum loss: 5391.692383, avg loss: 3.030743, ppl: 20.712624 +epoch: 0, batch: 42895, sum loss: 3446.717285, avg loss: 2.472538, ppl: 11.852485 +epoch: 0, batch: 42896, sum loss: 3867.704102, avg loss: 2.656390, ppl: 14.244775 +epoch: 0, batch: 42897, sum loss: 5780.918945, avg loss: 3.015607, ppl: 20.401478 +epoch: 0, batch: 42898, sum loss: 4742.256348, avg loss: 2.768393, ppl: 15.933002 +epoch: 0, batch: 42899, sum loss: 4996.086426, avg loss: 2.961521, ppl: 19.327354 +epoch: 0, batch: 42900, sum loss: 5490.069336, avg loss: 2.792507, ppl: 16.321894 +epoch: 0, batch: 42901, sum loss: 4232.602539, avg loss: 2.939307, ppl: 18.902750 +epoch: 0, batch: 42902, sum loss: 5280.483887, avg loss: 3.071835, ppl: 21.581465 +epoch: 0, batch: 42903, sum loss: 4415.158203, avg loss: 2.637490, ppl: 13.978075 +epoch: 0, batch: 42904, sum loss: 4259.148926, avg loss: 2.670313, ppl: 14.444488 +epoch: 0, batch: 42905, sum loss: 4150.488281, avg loss: 2.576343, ppl: 13.148962 +epoch: 0, batch: 42906, sum loss: 5610.896973, avg loss: 3.049400, ppl: 21.102686 +epoch: 0, batch: 42907, sum loss: 4953.232422, avg loss: 3.169055, ppl: 23.784990 +epoch: 0, batch: 42908, sum loss: 4614.254395, avg loss: 3.233535, ppl: 25.369181 +epoch: 0, batch: 42909, sum loss: 4071.290527, avg loss: 2.706975, ppl: 14.983880 +epoch: 0, batch: 42910, sum loss: 4822.946777, avg loss: 2.749685, ppl: 15.637698 +epoch: 0, batch: 42911, sum loss: 5418.570312, avg loss: 3.013665, ppl: 20.361895 +epoch: 0, batch: 42912, sum loss: 4793.069824, avg loss: 3.052911, ppl: 21.176897 +epoch: 0, batch: 42913, sum loss: 5448.256836, avg loss: 2.811278, ppl: 16.631161 +epoch: 0, batch: 42914, sum loss: 5631.220215, avg loss: 3.133679, ppl: 22.958281 +epoch: 0, batch: 42915, sum loss: 5165.106445, avg loss: 2.867910, ppl: 17.600199 +epoch: 0, batch: 42916, sum loss: 5252.808594, avg loss: 2.848595, ppl: 17.263504 +epoch: 0, batch: 42917, sum loss: 4652.126953, avg loss: 2.850568, ppl: 17.297606 +epoch: 0, batch: 42918, sum loss: 5059.054199, avg loss: 2.936189, ppl: 18.843903 +epoch: 0, batch: 42919, sum loss: 5562.236328, avg loss: 3.036155, ppl: 20.825022 +epoch: 0, batch: 42920, sum loss: 3794.223633, avg loss: 2.568872, ppl: 13.051094 +epoch: 0, batch: 42921, sum loss: 4241.722656, avg loss: 2.801666, ppl: 16.472071 +epoch: 0, batch: 42922, sum loss: 4320.818359, avg loss: 2.766209, ppl: 15.898248 +epoch: 0, batch: 42923, sum loss: 4686.870117, avg loss: 2.914720, ppl: 18.443649 +epoch: 0, batch: 42924, sum loss: 4850.511719, avg loss: 2.759108, ppl: 15.785753 +epoch: 0, batch: 42925, sum loss: 4527.213867, avg loss: 2.817183, ppl: 16.729666 +epoch: 0, batch: 42926, sum loss: 5257.727539, avg loss: 3.103735, ppl: 22.281021 +epoch: 0, batch: 42927, sum loss: 4220.826172, avg loss: 2.700465, ppl: 14.886648 +epoch: 0, batch: 42928, sum loss: 4044.549316, avg loss: 2.523113, ppl: 12.467341 +epoch: 0, batch: 42929, sum loss: 4463.069336, avg loss: 2.817594, ppl: 16.736540 +epoch: 0, batch: 42930, sum loss: 4518.457520, avg loss: 2.843585, ppl: 17.177244 +epoch: 0, batch: 42931, sum loss: 5219.257324, avg loss: 2.807562, ppl: 16.569466 +epoch: 0, batch: 42932, sum loss: 5848.433105, avg loss: 3.062007, ppl: 21.370403 +epoch: 0, batch: 42933, sum loss: 5444.881836, avg loss: 3.116704, ppl: 22.571859 +epoch: 0, batch: 42934, sum loss: 4535.233887, avg loss: 2.864961, ppl: 17.548376 +epoch: 0, batch: 42935, sum loss: 4930.781250, avg loss: 2.721182, ppl: 15.198271 +epoch: 0, batch: 42936, sum loss: 4918.672852, avg loss: 2.900161, ppl: 18.177073 +epoch: 0, batch: 42937, sum loss: 4877.166504, avg loss: 2.899623, ppl: 18.167303 +epoch: 0, batch: 42938, sum loss: 6481.807129, avg loss: 2.982884, ppl: 19.744682 +epoch: 0, batch: 42939, sum loss: 5277.423340, avg loss: 2.920544, ppl: 18.551374 +epoch: 0, batch: 42940, sum loss: 5152.626465, avg loss: 2.995713, ppl: 19.999615 +epoch: 0, batch: 42941, sum loss: 4809.605469, avg loss: 2.822538, ppl: 16.819490 +epoch: 0, batch: 42942, sum loss: 4435.990234, avg loss: 2.762136, ppl: 15.833624 +epoch: 0, batch: 42943, sum loss: 5371.623535, avg loss: 2.770306, ppl: 15.963520 +epoch: 0, batch: 42944, sum loss: 4850.153320, avg loss: 2.714132, ppl: 15.091502 +epoch: 0, batch: 42945, sum loss: 5628.757812, avg loss: 3.167562, ppl: 23.749512 +epoch: 0, batch: 42946, sum loss: 4223.341797, avg loss: 2.605393, ppl: 13.536540 +epoch: 0, batch: 42947, sum loss: 4731.109863, avg loss: 2.776473, ppl: 16.062269 +epoch: 0, batch: 42948, sum loss: 5494.870117, avg loss: 3.366955, ppl: 28.990112 +epoch: 0, batch: 42949, sum loss: 4889.996094, avg loss: 2.979888, ppl: 19.685606 +epoch: 0, batch: 42950, sum loss: 4873.357422, avg loss: 2.864995, ppl: 17.548973 +epoch: 0, batch: 42951, sum loss: 5561.244141, avg loss: 2.811549, ppl: 16.635666 +epoch: 0, batch: 42952, sum loss: 4926.191895, avg loss: 2.852456, ppl: 17.330299 +epoch: 0, batch: 42953, sum loss: 4678.540527, avg loss: 2.813314, ppl: 16.665051 +epoch: 0, batch: 42954, sum loss: 4548.026855, avg loss: 2.976457, ppl: 19.618193 +epoch: 0, batch: 42955, sum loss: 3795.048340, avg loss: 2.521627, ppl: 12.448833 +epoch: 0, batch: 42956, sum loss: 5154.151367, avg loss: 3.024737, ppl: 20.588581 +epoch: 0, batch: 42957, sum loss: 5526.846680, avg loss: 2.821259, ppl: 16.797991 +epoch: 0, batch: 42958, sum loss: 4717.481934, avg loss: 2.921041, ppl: 18.560608 +epoch: 0, batch: 42959, sum loss: 4360.396973, avg loss: 2.754515, ppl: 15.713413 +epoch: 0, batch: 42960, sum loss: 4817.596680, avg loss: 2.916221, ppl: 18.471350 +epoch: 0, batch: 42961, sum loss: 4880.848145, avg loss: 3.060093, ppl: 21.329538 +epoch: 0, batch: 42962, sum loss: 3368.086182, avg loss: 2.350374, ppl: 10.489494 +epoch: 0, batch: 42963, sum loss: 5477.781250, avg loss: 3.038148, ppl: 20.866566 +epoch: 0, batch: 42964, sum loss: 4107.638672, avg loss: 2.573709, ppl: 13.114369 +epoch: 0, batch: 42965, sum loss: 4576.736816, avg loss: 2.842694, ppl: 17.161930 +epoch: 0, batch: 42966, sum loss: 5557.264160, avg loss: 3.066923, ppl: 21.475718 +epoch: 0, batch: 42967, sum loss: 4130.906738, avg loss: 2.641245, ppl: 14.030656 +epoch: 0, batch: 42968, sum loss: 4650.452148, avg loss: 2.793064, ppl: 16.330988 +epoch: 0, batch: 42969, sum loss: 5609.570312, avg loss: 2.960195, ppl: 19.301741 +epoch: 0, batch: 42970, sum loss: 4640.097656, avg loss: 2.873126, ppl: 17.692230 +epoch: 0, batch: 42971, sum loss: 4603.181641, avg loss: 2.860896, ppl: 17.477177 +epoch: 0, batch: 42972, sum loss: 4750.373535, avg loss: 2.912553, ppl: 18.403721 +epoch: 0, batch: 42973, sum loss: 4269.956055, avg loss: 2.848536, ppl: 17.262499 +epoch: 0, batch: 42974, sum loss: 4949.838379, avg loss: 2.757570, ppl: 15.761500 +epoch: 0, batch: 42975, sum loss: 5139.089355, avg loss: 3.039083, ppl: 20.886087 +epoch: 0, batch: 42976, sum loss: 3143.818848, avg loss: 2.308237, ppl: 10.056680 +epoch: 0, batch: 42977, sum loss: 4235.120117, avg loss: 2.670315, ppl: 14.444522 +epoch: 0, batch: 42978, sum loss: 4862.078613, avg loss: 2.835031, ppl: 17.030930 +epoch: 0, batch: 42979, sum loss: 3484.456787, avg loss: 2.469495, ppl: 11.816473 +epoch: 0, batch: 42980, sum loss: 4435.741211, avg loss: 2.604663, ppl: 13.526668 +epoch: 0, batch: 42981, sum loss: 4782.080078, avg loss: 2.826288, ppl: 16.882683 +epoch: 0, batch: 42982, sum loss: 5313.855957, avg loss: 3.284213, ppl: 26.687962 +epoch: 0, batch: 42983, sum loss: 4712.816895, avg loss: 2.903769, ppl: 18.242769 +epoch: 0, batch: 42984, sum loss: 4735.700195, avg loss: 2.907121, ppl: 18.304024 +epoch: 0, batch: 42985, sum loss: 5402.965820, avg loss: 2.828778, ppl: 16.924767 +epoch: 0, batch: 42986, sum loss: 5249.386719, avg loss: 2.889041, ppl: 17.976057 +epoch: 0, batch: 42987, sum loss: 4707.754395, avg loss: 2.745046, ppl: 15.565332 +epoch: 0, batch: 42988, sum loss: 3894.779297, avg loss: 2.605203, ppl: 13.533978 +epoch: 0, batch: 42989, sum loss: 4043.375244, avg loss: 2.610313, ppl: 13.603311 +epoch: 0, batch: 42990, sum loss: 4243.246094, avg loss: 2.800822, ppl: 16.458178 +epoch: 0, batch: 42991, sum loss: 5616.217285, avg loss: 3.128812, ppl: 22.846817 +epoch: 0, batch: 42992, sum loss: 4095.019043, avg loss: 2.623331, ppl: 13.781548 +epoch: 0, batch: 42993, sum loss: 4581.431641, avg loss: 2.951954, ppl: 19.143316 +epoch: 0, batch: 42994, sum loss: 4724.037109, avg loss: 2.760980, ppl: 15.815340 +epoch: 0, batch: 42995, sum loss: 4604.358398, avg loss: 2.997629, ppl: 20.037973 +epoch: 0, batch: 42996, sum loss: 5234.951660, avg loss: 2.805440, ppl: 16.534353 +epoch: 0, batch: 42997, sum loss: 4437.950195, avg loss: 2.872460, ppl: 17.680456 +epoch: 0, batch: 42998, sum loss: 5715.363770, avg loss: 2.997045, ppl: 20.026262 +epoch: 0, batch: 42999, sum loss: 5259.131348, avg loss: 2.934783, ppl: 18.817423 +epoch: 0, batch: 43000, sum loss: 5211.284180, avg loss: 2.857064, ppl: 17.410332 +epoch: 0, batch: 43001, sum loss: 5075.241211, avg loss: 2.940464, ppl: 18.924625 +epoch: 0, batch: 43002, sum loss: 5189.001465, avg loss: 2.999423, ppl: 20.073946 +epoch: 0, batch: 43003, sum loss: 4453.227539, avg loss: 2.822071, ppl: 16.811625 +epoch: 0, batch: 43004, sum loss: 3895.493652, avg loss: 2.720317, ppl: 15.185133 +epoch: 0, batch: 43005, sum loss: 4791.219238, avg loss: 2.909058, ppl: 18.339521 +epoch: 0, batch: 43006, sum loss: 4277.189941, avg loss: 2.684991, ppl: 14.658064 +epoch: 0, batch: 43007, sum loss: 5348.309570, avg loss: 2.999613, ppl: 20.077761 +epoch: 0, batch: 43008, sum loss: 4961.512207, avg loss: 2.814244, ppl: 16.680561 +epoch: 0, batch: 43009, sum loss: 4262.173340, avg loss: 2.694168, ppl: 14.793200 +epoch: 0, batch: 43010, sum loss: 3591.286377, avg loss: 2.589247, ppl: 13.319744 +epoch: 0, batch: 43011, sum loss: 5256.517578, avg loss: 2.995167, ppl: 19.988689 +epoch: 0, batch: 43012, sum loss: 4261.353516, avg loss: 2.758158, ppl: 15.770762 +epoch: 0, batch: 43013, sum loss: 4481.699707, avg loss: 2.794077, ppl: 16.347536 +epoch: 0, batch: 43014, sum loss: 5173.268066, avg loss: 2.995523, ppl: 19.995815 +epoch: 0, batch: 43015, sum loss: 3788.464355, avg loss: 2.561504, ppl: 12.955286 +epoch: 0, batch: 43016, sum loss: 5427.488770, avg loss: 3.000270, ppl: 20.090967 +epoch: 0, batch: 43017, sum loss: 4925.056152, avg loss: 2.840286, ppl: 17.120661 +epoch: 0, batch: 43018, sum loss: 4718.598145, avg loss: 2.909123, ppl: 18.340714 +epoch: 0, batch: 43019, sum loss: 4172.458984, avg loss: 2.373412, ppl: 10.733958 +epoch: 0, batch: 43020, sum loss: 3972.287598, avg loss: 2.702236, ppl: 14.913046 +epoch: 0, batch: 43021, sum loss: 4579.154297, avg loss: 2.766861, ppl: 15.908614 +epoch: 0, batch: 43022, sum loss: 5534.087402, avg loss: 2.894397, ppl: 18.072601 +epoch: 0, batch: 43023, sum loss: 4355.755859, avg loss: 2.715559, ppl: 15.113049 +epoch: 0, batch: 43024, sum loss: 4814.842773, avg loss: 3.086438, ppl: 21.898928 +epoch: 0, batch: 43025, sum loss: 3907.381348, avg loss: 2.700333, ppl: 14.884682 +epoch: 0, batch: 43026, sum loss: 4567.763184, avg loss: 2.928053, ppl: 18.691210 +epoch: 0, batch: 43027, sum loss: 5579.996094, avg loss: 2.943036, ppl: 18.973364 +epoch: 0, batch: 43028, sum loss: 4163.424805, avg loss: 2.794245, ppl: 16.350279 +epoch: 0, batch: 43029, sum loss: 4747.731445, avg loss: 2.961779, ppl: 19.332327 +epoch: 0, batch: 43030, sum loss: 5187.143066, avg loss: 2.996616, ppl: 20.017689 +epoch: 0, batch: 43031, sum loss: 6233.361816, avg loss: 3.107359, ppl: 22.361908 +epoch: 0, batch: 43032, sum loss: 5575.399414, avg loss: 3.055013, ppl: 21.221470 +epoch: 0, batch: 43033, sum loss: 3838.868164, avg loss: 2.749906, ppl: 15.641155 +epoch: 0, batch: 43034, sum loss: 4816.768066, avg loss: 2.792330, ppl: 16.318991 +epoch: 0, batch: 43035, sum loss: 5119.684082, avg loss: 2.954232, ppl: 19.186981 +epoch: 0, batch: 43036, sum loss: 4795.428223, avg loss: 2.980378, ppl: 19.695259 +epoch: 0, batch: 43037, sum loss: 4514.747559, avg loss: 2.597668, ppl: 13.432383 +epoch: 0, batch: 43038, sum loss: 4526.253906, avg loss: 2.783674, ppl: 16.178350 +epoch: 0, batch: 43039, sum loss: 5473.545898, avg loss: 3.109969, ppl: 22.420353 +epoch: 0, batch: 43040, sum loss: 4857.203125, avg loss: 2.874085, ppl: 17.709208 +epoch: 0, batch: 43041, sum loss: 4309.505371, avg loss: 2.720647, ppl: 15.190152 +epoch: 0, batch: 43042, sum loss: 4701.781738, avg loss: 2.692888, ppl: 14.774280 +epoch: 0, batch: 43043, sum loss: 4053.286377, avg loss: 2.778126, ppl: 16.088850 +epoch: 0, batch: 43044, sum loss: 4289.394043, avg loss: 2.721697, ppl: 15.206100 +epoch: 0, batch: 43045, sum loss: 5199.463867, avg loss: 3.053120, ppl: 21.181335 +epoch: 0, batch: 43046, sum loss: 4151.949219, avg loss: 2.666634, ppl: 14.391447 +epoch: 0, batch: 43047, sum loss: 5562.627930, avg loss: 3.005202, ppl: 20.190285 +epoch: 0, batch: 43048, sum loss: 4473.714844, avg loss: 2.790839, ppl: 16.294685 +epoch: 0, batch: 43049, sum loss: 4819.607422, avg loss: 2.953191, ppl: 19.167019 +epoch: 0, batch: 43050, sum loss: 3799.753418, avg loss: 2.488378, ppl: 12.041729 +epoch: 0, batch: 43051, sum loss: 3741.481934, avg loss: 2.625601, ppl: 13.812878 +epoch: 0, batch: 43052, sum loss: 4217.904785, avg loss: 2.582918, ppl: 13.235700 +epoch: 0, batch: 43053, sum loss: 5423.089355, avg loss: 2.809891, ppl: 16.608109 +epoch: 0, batch: 43054, sum loss: 4383.968262, avg loss: 2.733147, ppl: 15.381222 +epoch: 0, batch: 43055, sum loss: 4853.747070, avg loss: 3.018499, ppl: 20.460566 +epoch: 0, batch: 43056, sum loss: 4640.398438, avg loss: 2.873312, ppl: 17.695524 +epoch: 0, batch: 43057, sum loss: 5146.445312, avg loss: 2.844912, ppl: 17.200043 +epoch: 0, batch: 43058, sum loss: 5782.994141, avg loss: 2.967160, ppl: 19.436636 +epoch: 0, batch: 43059, sum loss: 4290.040039, avg loss: 2.863845, ppl: 17.528797 +epoch: 0, batch: 43060, sum loss: 4744.724121, avg loss: 2.894890, ppl: 18.081505 +epoch: 0, batch: 43061, sum loss: 4444.708984, avg loss: 2.862015, ppl: 17.496748 +epoch: 0, batch: 43062, sum loss: 4637.535645, avg loss: 2.903905, ppl: 18.245249 +epoch: 0, batch: 43063, sum loss: 4167.699707, avg loss: 2.353303, ppl: 10.520261 +epoch: 0, batch: 43064, sum loss: 4634.576172, avg loss: 3.059126, ppl: 21.308933 +epoch: 0, batch: 43065, sum loss: 6289.013184, avg loss: 3.226790, ppl: 25.198639 +epoch: 0, batch: 43066, sum loss: 4842.920898, avg loss: 2.972941, ppl: 19.549328 +epoch: 0, batch: 43067, sum loss: 5137.560059, avg loss: 2.978296, ppl: 19.654293 +epoch: 0, batch: 43068, sum loss: 5066.934570, avg loss: 2.980550, ppl: 19.698645 +epoch: 0, batch: 43069, sum loss: 4333.023926, avg loss: 2.666476, ppl: 14.389175 +epoch: 0, batch: 43070, sum loss: 5168.044922, avg loss: 3.160884, ppl: 23.591434 +epoch: 0, batch: 43071, sum loss: 5676.991699, avg loss: 2.918762, ppl: 18.518347 +epoch: 0, batch: 43072, sum loss: 4013.620117, avg loss: 2.730354, ppl: 15.338313 +epoch: 0, batch: 43073, sum loss: 4945.130859, avg loss: 2.977201, ppl: 19.632788 +epoch: 0, batch: 43074, sum loss: 4146.439941, avg loss: 2.624329, ppl: 13.795316 +epoch: 0, batch: 43075, sum loss: 5444.867188, avg loss: 2.877837, ppl: 17.775778 +epoch: 0, batch: 43076, sum loss: 4391.085449, avg loss: 2.793311, ppl: 16.335022 +epoch: 0, batch: 43077, sum loss: 4886.905273, avg loss: 2.884832, ppl: 17.900558 +epoch: 0, batch: 43078, sum loss: 4471.091309, avg loss: 2.858754, ppl: 17.439781 +epoch: 0, batch: 43079, sum loss: 4623.484863, avg loss: 2.631465, ppl: 13.894115 +epoch: 0, batch: 43080, sum loss: 4531.161133, avg loss: 2.816135, ppl: 16.712132 +epoch: 0, batch: 43081, sum loss: 5031.654785, avg loss: 3.002181, ppl: 20.129387 +epoch: 0, batch: 43082, sum loss: 4534.782227, avg loss: 2.910643, ppl: 18.368612 +epoch: 0, batch: 43083, sum loss: 5322.256348, avg loss: 2.927534, ppl: 18.681498 +epoch: 0, batch: 43084, sum loss: 4594.765625, avg loss: 2.698042, ppl: 14.850628 +epoch: 0, batch: 43085, sum loss: 4990.536621, avg loss: 2.780243, ppl: 16.122944 +epoch: 0, batch: 43086, sum loss: 4755.335938, avg loss: 3.015432, ppl: 20.397898 +epoch: 0, batch: 43087, sum loss: 5153.170898, avg loss: 2.917990, ppl: 18.504061 +epoch: 0, batch: 43088, sum loss: 4089.332764, avg loss: 2.665797, ppl: 14.379405 +epoch: 0, batch: 43089, sum loss: 4252.167969, avg loss: 2.957001, ppl: 19.240192 +epoch: 0, batch: 43090, sum loss: 4912.376465, avg loss: 2.882850, ppl: 17.865120 +epoch: 0, batch: 43091, sum loss: 4543.918945, avg loss: 3.061940, ppl: 21.368971 +epoch: 0, batch: 43092, sum loss: 4271.830078, avg loss: 2.981040, ppl: 19.708303 +epoch: 0, batch: 43093, sum loss: 5649.109375, avg loss: 2.945312, ppl: 19.016603 +epoch: 0, batch: 43094, sum loss: 4670.238770, avg loss: 2.989910, ppl: 19.883886 +epoch: 0, batch: 43095, sum loss: 5415.846680, avg loss: 2.916449, ppl: 18.475571 +epoch: 0, batch: 43096, sum loss: 4661.815918, avg loss: 2.773240, ppl: 16.010418 +epoch: 0, batch: 43097, sum loss: 3661.637207, avg loss: 2.469074, ppl: 11.811510 +epoch: 0, batch: 43098, sum loss: 4973.995605, avg loss: 2.797523, ppl: 16.403965 +epoch: 0, batch: 43099, sum loss: 5063.979004, avg loss: 3.112464, ppl: 22.476357 +epoch: 0, batch: 43100, sum loss: 4681.614258, avg loss: 2.913264, ppl: 18.416805 +epoch: 0, batch: 43101, sum loss: 4102.689941, avg loss: 2.760895, ppl: 15.813991 +epoch: 0, batch: 43102, sum loss: 4806.228027, avg loss: 2.770160, ppl: 15.961191 +epoch: 0, batch: 43103, sum loss: 4955.342285, avg loss: 2.836487, ppl: 17.055737 +epoch: 0, batch: 43104, sum loss: 5435.608887, avg loss: 2.860847, ppl: 17.476322 +epoch: 0, batch: 43105, sum loss: 3860.279297, avg loss: 2.779179, ppl: 16.105787 +epoch: 0, batch: 43106, sum loss: 5481.628418, avg loss: 2.911114, ppl: 18.377264 +epoch: 0, batch: 43107, sum loss: 4556.200684, avg loss: 2.669127, ppl: 14.427375 +epoch: 0, batch: 43108, sum loss: 5992.060059, avg loss: 3.095072, ppl: 22.088835 +epoch: 0, batch: 43109, sum loss: 4686.248047, avg loss: 2.828152, ppl: 16.914173 +epoch: 0, batch: 43110, sum loss: 5516.785156, avg loss: 2.953311, ppl: 19.169317 +epoch: 0, batch: 43111, sum loss: 4197.760742, avg loss: 2.804115, ppl: 16.512461 +epoch: 0, batch: 43112, sum loss: 4935.509766, avg loss: 3.056043, ppl: 21.243334 +epoch: 0, batch: 43113, sum loss: 4069.019531, avg loss: 2.714489, ppl: 15.096901 +epoch: 0, batch: 43114, sum loss: 7017.823242, avg loss: 3.372332, ppl: 29.146420 +epoch: 0, batch: 43115, sum loss: 4592.413086, avg loss: 2.895595, ppl: 18.094257 +epoch: 0, batch: 43116, sum loss: 4609.198730, avg loss: 2.960308, ppl: 19.303909 +epoch: 0, batch: 43117, sum loss: 4169.818359, avg loss: 2.598018, ppl: 13.437076 +epoch: 0, batch: 43118, sum loss: 5203.209961, avg loss: 3.091628, ppl: 22.012882 +epoch: 0, batch: 43119, sum loss: 5127.508789, avg loss: 3.039424, ppl: 20.893213 +epoch: 0, batch: 43120, sum loss: 5322.711914, avg loss: 3.027709, ppl: 20.649860 +epoch: 0, batch: 43121, sum loss: 5080.762207, avg loss: 2.719894, ppl: 15.178716 +epoch: 0, batch: 43122, sum loss: 4905.391113, avg loss: 2.815953, ppl: 16.709101 +epoch: 0, batch: 43123, sum loss: 4575.594727, avg loss: 2.592405, ppl: 13.361870 +epoch: 0, batch: 43124, sum loss: 5862.486816, avg loss: 3.140057, ppl: 23.105186 +epoch: 0, batch: 43125, sum loss: 5588.440430, avg loss: 2.956847, ppl: 19.237215 +epoch: 0, batch: 43126, sum loss: 5221.794922, avg loss: 3.021872, ppl: 20.529688 +epoch: 0, batch: 43127, sum loss: 4458.765625, avg loss: 2.893424, ppl: 18.055016 +epoch: 0, batch: 43128, sum loss: 4989.411133, avg loss: 2.738425, ppl: 15.462620 +epoch: 0, batch: 43129, sum loss: 4340.165039, avg loss: 2.814634, ppl: 16.687061 +epoch: 0, batch: 43130, sum loss: 5660.342773, avg loss: 2.892357, ppl: 18.035772 +epoch: 0, batch: 43131, sum loss: 5324.829590, avg loss: 2.932175, ppl: 18.768402 +epoch: 0, batch: 43132, sum loss: 5028.591309, avg loss: 2.925301, ppl: 18.639828 +epoch: 0, batch: 43133, sum loss: 5000.741211, avg loss: 2.799967, ppl: 16.444109 +epoch: 0, batch: 43134, sum loss: 4472.562500, avg loss: 2.805874, ppl: 16.541521 +epoch: 0, batch: 43135, sum loss: 4292.257812, avg loss: 2.958138, ppl: 19.262068 +epoch: 0, batch: 43136, sum loss: 4649.187500, avg loss: 2.847022, ppl: 17.236380 +epoch: 0, batch: 43137, sum loss: 5013.614258, avg loss: 3.112113, ppl: 22.468475 +epoch: 0, batch: 43138, sum loss: 4587.767090, avg loss: 2.729189, ppl: 15.320459 +epoch: 0, batch: 43139, sum loss: 5199.976562, avg loss: 3.032056, ppl: 20.739832 +epoch: 0, batch: 43140, sum loss: 3770.563477, avg loss: 2.618447, ppl: 13.714406 +epoch: 0, batch: 43141, sum loss: 4999.174316, avg loss: 2.975699, ppl: 19.603325 +epoch: 0, batch: 43142, sum loss: 5443.068848, avg loss: 2.990697, ppl: 19.899555 +epoch: 0, batch: 43143, sum loss: 5346.181641, avg loss: 3.150372, ppl: 23.344753 +epoch: 0, batch: 43144, sum loss: 4826.720703, avg loss: 2.905913, ppl: 18.281921 +epoch: 0, batch: 43145, sum loss: 4941.129395, avg loss: 2.849555, ppl: 17.280100 +epoch: 0, batch: 43146, sum loss: 4675.877441, avg loss: 2.763521, ppl: 15.855572 +epoch: 0, batch: 43147, sum loss: 4552.738281, avg loss: 2.805138, ppl: 16.529350 +epoch: 0, batch: 43148, sum loss: 5146.047363, avg loss: 2.879713, ppl: 17.809166 +epoch: 0, batch: 43149, sum loss: 4238.801758, avg loss: 2.725918, ppl: 15.270419 +epoch: 0, batch: 43150, sum loss: 5211.719727, avg loss: 3.133926, ppl: 22.963968 +epoch: 0, batch: 43151, sum loss: 3796.254395, avg loss: 2.696203, ppl: 14.823347 +epoch: 0, batch: 43152, sum loss: 4592.597168, avg loss: 2.865001, ppl: 17.549074 +epoch: 0, batch: 43153, sum loss: 4638.413086, avg loss: 2.743000, ppl: 15.533520 +epoch: 0, batch: 43154, sum loss: 5543.076172, avg loss: 3.140553, ppl: 23.116648 +epoch: 0, batch: 43155, sum loss: 5244.392578, avg loss: 3.017487, ppl: 20.439863 +epoch: 0, batch: 43156, sum loss: 4691.799316, avg loss: 2.756639, ppl: 15.746825 +epoch: 0, batch: 43157, sum loss: 5368.770508, avg loss: 3.232252, ppl: 25.336647 +epoch: 0, batch: 43158, sum loss: 6359.251465, avg loss: 3.134180, ppl: 22.969799 +epoch: 0, batch: 43159, sum loss: 4791.047363, avg loss: 2.863746, ppl: 17.527063 +epoch: 0, batch: 43160, sum loss: 5518.177734, avg loss: 3.108832, ppl: 22.394880 +epoch: 0, batch: 43161, sum loss: 5596.798340, avg loss: 3.075164, ppl: 21.653435 +epoch: 0, batch: 43162, sum loss: 5101.908691, avg loss: 2.917043, ppl: 18.486546 +epoch: 0, batch: 43163, sum loss: 4693.211426, avg loss: 2.695699, ppl: 14.815867 +epoch: 0, batch: 43164, sum loss: 4798.933594, avg loss: 2.913742, ppl: 18.425625 +epoch: 0, batch: 43165, sum loss: 4452.068359, avg loss: 2.998026, ppl: 20.045919 +epoch: 0, batch: 43166, sum loss: 3881.303467, avg loss: 2.987916, ppl: 19.844294 +epoch: 0, batch: 43167, sum loss: 4855.777344, avg loss: 2.918136, ppl: 18.506752 +epoch: 0, batch: 43168, sum loss: 5249.364746, avg loss: 2.672793, ppl: 14.480351 +epoch: 0, batch: 43169, sum loss: 4151.212891, avg loss: 2.787920, ppl: 16.247190 +epoch: 0, batch: 43170, sum loss: 4563.057129, avg loss: 2.579456, ppl: 13.189959 +epoch: 0, batch: 43171, sum loss: 4796.684570, avg loss: 3.149498, ppl: 23.324341 +epoch: 0, batch: 43172, sum loss: 4937.738770, avg loss: 2.887567, ppl: 17.949577 +epoch: 0, batch: 43173, sum loss: 5105.094238, avg loss: 3.056943, ppl: 21.262451 +epoch: 0, batch: 43174, sum loss: 4212.784180, avg loss: 2.503140, ppl: 12.220804 +epoch: 0, batch: 43175, sum loss: 4932.285156, avg loss: 2.982035, ppl: 19.727917 +epoch: 0, batch: 43176, sum loss: 4498.082031, avg loss: 2.930346, ppl: 18.734121 +epoch: 0, batch: 43177, sum loss: 4489.982422, avg loss: 2.518218, ppl: 12.406469 +epoch: 0, batch: 43178, sum loss: 3947.694580, avg loss: 2.817769, ppl: 16.739464 +epoch: 0, batch: 43179, sum loss: 3748.819824, avg loss: 2.400013, ppl: 11.023316 +epoch: 0, batch: 43180, sum loss: 4760.779297, avg loss: 2.828746, ppl: 16.924221 +epoch: 0, batch: 43181, sum loss: 4540.503418, avg loss: 2.639827, ppl: 14.010786 +epoch: 0, batch: 43182, sum loss: 5779.657715, avg loss: 2.979205, ppl: 19.672174 +epoch: 0, batch: 43183, sum loss: 4576.353516, avg loss: 2.845991, ppl: 17.218613 +epoch: 0, batch: 43184, sum loss: 4583.169434, avg loss: 2.908102, ppl: 18.321995 +epoch: 0, batch: 43185, sum loss: 4053.853027, avg loss: 2.706177, ppl: 14.971925 +epoch: 0, batch: 43186, sum loss: 4055.783447, avg loss: 2.589900, ppl: 13.328439 +epoch: 0, batch: 43187, sum loss: 4058.076660, avg loss: 2.641977, ppl: 14.040936 +epoch: 0, batch: 43188, sum loss: 4353.921387, avg loss: 2.704299, ppl: 14.943837 +epoch: 0, batch: 43189, sum loss: 5007.632324, avg loss: 2.825978, ppl: 16.877436 +epoch: 0, batch: 43190, sum loss: 5285.261719, avg loss: 3.078196, ppl: 21.719177 +epoch: 0, batch: 43191, sum loss: 4976.906738, avg loss: 3.021801, ppl: 20.528234 +epoch: 0, batch: 43192, sum loss: 5131.403809, avg loss: 2.913915, ppl: 18.428801 +epoch: 0, batch: 43193, sum loss: 5326.577637, avg loss: 2.846915, ppl: 17.234531 +epoch: 0, batch: 43194, sum loss: 3861.895020, avg loss: 2.477162, ppl: 11.907419 +epoch: 0, batch: 43195, sum loss: 4317.926758, avg loss: 2.857662, ppl: 17.420744 +epoch: 0, batch: 43196, sum loss: 4490.158203, avg loss: 2.688718, ppl: 14.712796 +epoch: 0, batch: 43197, sum loss: 4504.034180, avg loss: 2.848851, ppl: 17.267937 +epoch: 0, batch: 43198, sum loss: 5411.322266, avg loss: 3.155290, ppl: 23.459837 +epoch: 0, batch: 43199, sum loss: 4657.452637, avg loss: 2.936603, ppl: 18.851700 +epoch: 0, batch: 43200, sum loss: 3713.030762, avg loss: 2.582080, ppl: 13.224615 +epoch: 0, batch: 43201, sum loss: 6000.212891, avg loss: 3.163001, ppl: 23.641439 +epoch: 0, batch: 43202, sum loss: 5013.389160, avg loss: 3.021934, ppl: 20.530966 +epoch: 0, batch: 43203, sum loss: 4437.116699, avg loss: 2.965987, ppl: 19.413858 +epoch: 0, batch: 43204, sum loss: 4512.452637, avg loss: 3.051016, ppl: 21.136805 +epoch: 0, batch: 43205, sum loss: 5008.722168, avg loss: 3.020942, ppl: 20.510609 +epoch: 0, batch: 43206, sum loss: 4992.464844, avg loss: 2.675490, ppl: 14.519464 +epoch: 0, batch: 43207, sum loss: 5440.767090, avg loss: 2.986151, ppl: 19.809290 +epoch: 0, batch: 43208, sum loss: 5695.878906, avg loss: 3.055729, ppl: 21.236660 +epoch: 0, batch: 43209, sum loss: 3905.871094, avg loss: 2.781960, ppl: 16.150639 +epoch: 0, batch: 43210, sum loss: 4440.557617, avg loss: 2.794561, ppl: 16.355450 +epoch: 0, batch: 43211, sum loss: 4709.946289, avg loss: 2.682202, ppl: 14.617243 +epoch: 0, batch: 43212, sum loss: 5033.374512, avg loss: 3.097461, ppl: 22.141666 +epoch: 0, batch: 43213, sum loss: 4769.359375, avg loss: 2.920612, ppl: 18.552635 +epoch: 0, batch: 43214, sum loss: 4422.629883, avg loss: 2.755533, ppl: 15.729415 +epoch: 0, batch: 43215, sum loss: 4606.690430, avg loss: 2.798718, ppl: 16.423586 +epoch: 0, batch: 43216, sum loss: 4242.338867, avg loss: 2.620345, ppl: 13.740468 +epoch: 0, batch: 43217, sum loss: 4493.458496, avg loss: 2.635460, ppl: 13.949723 +epoch: 0, batch: 43218, sum loss: 5071.944336, avg loss: 2.760993, ppl: 15.815544 +epoch: 0, batch: 43219, sum loss: 5040.580078, avg loss: 2.778710, ppl: 16.098244 +epoch: 0, batch: 43220, sum loss: 4724.389160, avg loss: 2.898398, ppl: 18.145056 +epoch: 0, batch: 43221, sum loss: 5647.837402, avg loss: 3.122077, ppl: 22.693470 +epoch: 0, batch: 43222, sum loss: 4792.721191, avg loss: 2.974998, ppl: 19.589579 +epoch: 0, batch: 43223, sum loss: 4932.078125, avg loss: 2.615100, ppl: 13.668582 +epoch: 0, batch: 43224, sum loss: 5138.711914, avg loss: 2.806506, ppl: 16.551983 +epoch: 0, batch: 43225, sum loss: 4095.854736, avg loss: 2.260406, ppl: 9.586976 +epoch: 0, batch: 43226, sum loss: 4499.117188, avg loss: 2.590165, ppl: 13.331976 +epoch: 0, batch: 43227, sum loss: 5097.404297, avg loss: 2.965331, ppl: 19.401131 +epoch: 0, batch: 43228, sum loss: 4723.629395, avg loss: 2.838720, ppl: 17.093864 +epoch: 0, batch: 43229, sum loss: 5057.263184, avg loss: 2.845956, ppl: 17.218004 +epoch: 0, batch: 43230, sum loss: 5192.533691, avg loss: 2.925371, ppl: 18.641140 +epoch: 0, batch: 43231, sum loss: 3659.454834, avg loss: 2.879194, ppl: 17.799913 +epoch: 0, batch: 43232, sum loss: 4725.038574, avg loss: 2.842983, ppl: 17.166906 +epoch: 0, batch: 43233, sum loss: 4744.094727, avg loss: 2.767850, ppl: 15.924358 +epoch: 0, batch: 43234, sum loss: 5195.433594, avg loss: 3.150657, ppl: 23.351406 +epoch: 0, batch: 43235, sum loss: 3956.912598, avg loss: 2.615276, ppl: 13.670990 +epoch: 0, batch: 43236, sum loss: 5446.752441, avg loss: 2.872760, ppl: 17.685760 +epoch: 0, batch: 43237, sum loss: 5002.002441, avg loss: 2.706711, ppl: 14.979930 +epoch: 0, batch: 43238, sum loss: 4409.850586, avg loss: 2.805248, ppl: 16.531183 +epoch: 0, batch: 43239, sum loss: 4531.584961, avg loss: 2.698978, ppl: 14.864539 +epoch: 0, batch: 43240, sum loss: 3898.021484, avg loss: 2.644519, ppl: 14.076677 +epoch: 0, batch: 43241, sum loss: 5630.460938, avg loss: 3.020633, ppl: 20.504276 +epoch: 0, batch: 43242, sum loss: 5420.425293, avg loss: 2.915775, ppl: 18.463108 +epoch: 0, batch: 43243, sum loss: 4847.492676, avg loss: 2.678173, ppl: 14.558468 +epoch: 0, batch: 43244, sum loss: 4611.856934, avg loss: 2.895077, ppl: 18.084885 +epoch: 0, batch: 43245, sum loss: 3395.564941, avg loss: 2.439343, ppl: 11.465503 +epoch: 0, batch: 43246, sum loss: 4557.878906, avg loss: 2.700165, ppl: 14.882191 +epoch: 0, batch: 43247, sum loss: 4901.619141, avg loss: 2.994270, ppl: 19.970768 +epoch: 0, batch: 43248, sum loss: 5909.275879, avg loss: 2.839633, ppl: 17.109482 +epoch: 0, batch: 43249, sum loss: 3885.968506, avg loss: 2.599310, ppl: 13.454451 +epoch: 0, batch: 43250, sum loss: 4185.692383, avg loss: 2.900688, ppl: 18.186647 +epoch: 0, batch: 43251, sum loss: 4588.656250, avg loss: 2.809955, ppl: 16.609165 +epoch: 0, batch: 43252, sum loss: 4044.668457, avg loss: 2.584453, ppl: 13.256031 +epoch: 0, batch: 43253, sum loss: 4076.457275, avg loss: 2.546195, ppl: 12.758460 +epoch: 0, batch: 43254, sum loss: 4930.583008, avg loss: 2.771548, ppl: 15.983350 +epoch: 0, batch: 43255, sum loss: 4549.244629, avg loss: 2.994894, ppl: 19.983248 +epoch: 0, batch: 43256, sum loss: 5333.541504, avg loss: 3.052972, ppl: 21.178185 +epoch: 0, batch: 43257, sum loss: 5372.000488, avg loss: 3.158143, ppl: 23.526855 +epoch: 0, batch: 43258, sum loss: 4839.688477, avg loss: 2.843530, ppl: 17.176294 +epoch: 0, batch: 43259, sum loss: 6171.340332, avg loss: 2.975574, ppl: 19.600872 +epoch: 0, batch: 43260, sum loss: 4786.367188, avg loss: 2.989611, ppl: 19.877951 +epoch: 0, batch: 43261, sum loss: 4156.780273, avg loss: 2.594744, ppl: 13.393161 +epoch: 0, batch: 43262, sum loss: 5372.076660, avg loss: 2.892879, ppl: 18.045187 +epoch: 0, batch: 43263, sum loss: 4002.947266, avg loss: 2.552900, ppl: 12.844299 +epoch: 0, batch: 43264, sum loss: 5329.686523, avg loss: 2.884029, ppl: 17.886198 +epoch: 0, batch: 43265, sum loss: 4595.002441, avg loss: 2.621222, ppl: 13.752522 +epoch: 0, batch: 43266, sum loss: 5060.662598, avg loss: 2.862366, ppl: 17.502890 +epoch: 0, batch: 43267, sum loss: 4853.245605, avg loss: 2.911365, ppl: 18.381874 +epoch: 0, batch: 43268, sum loss: 5940.823730, avg loss: 3.043455, ppl: 20.977592 +epoch: 0, batch: 43269, sum loss: 5283.562500, avg loss: 2.938577, ppl: 18.888956 +epoch: 0, batch: 43270, sum loss: 4282.965820, avg loss: 2.666853, ppl: 14.394597 +epoch: 0, batch: 43271, sum loss: 5237.348633, avg loss: 2.817293, ppl: 16.731504 +epoch: 0, batch: 43272, sum loss: 4097.364746, avg loss: 2.541790, ppl: 12.702382 +epoch: 0, batch: 43273, sum loss: 5286.213867, avg loss: 2.822325, ppl: 16.815893 +epoch: 0, batch: 43274, sum loss: 5228.024414, avg loss: 2.824432, ppl: 16.851377 +epoch: 0, batch: 43275, sum loss: 4327.334473, avg loss: 2.630598, ppl: 13.882073 +epoch: 0, batch: 43276, sum loss: 5177.620117, avg loss: 3.010244, ppl: 20.292353 +epoch: 0, batch: 43277, sum loss: 4117.752441, avg loss: 2.851629, ppl: 17.315968 +epoch: 0, batch: 43278, sum loss: 5161.114746, avg loss: 3.048503, ppl: 21.083752 +epoch: 0, batch: 43279, sum loss: 5108.974121, avg loss: 2.966884, ppl: 19.431274 +epoch: 0, batch: 43280, sum loss: 4588.812012, avg loss: 2.787857, ppl: 16.246164 +epoch: 0, batch: 43281, sum loss: 5500.941895, avg loss: 2.979925, ppl: 19.686342 +epoch: 0, batch: 43282, sum loss: 5883.212402, avg loss: 3.243226, ppl: 25.616234 +epoch: 0, batch: 43283, sum loss: 3908.976318, avg loss: 2.739297, ppl: 15.476096 +epoch: 0, batch: 43284, sum loss: 5387.698730, avg loss: 3.096379, ppl: 22.117708 +epoch: 0, batch: 43285, sum loss: 4372.462402, avg loss: 2.777930, ppl: 16.085693 +epoch: 0, batch: 43286, sum loss: 4589.215332, avg loss: 2.843380, ppl: 17.173714 +epoch: 0, batch: 43287, sum loss: 3962.932129, avg loss: 2.571663, ppl: 13.087566 +epoch: 0, batch: 43288, sum loss: 4856.265625, avg loss: 2.820131, ppl: 16.779049 +epoch: 0, batch: 43289, sum loss: 4491.190430, avg loss: 3.010181, ppl: 20.291080 +epoch: 0, batch: 43290, sum loss: 4592.146484, avg loss: 2.861150, ppl: 17.481625 +epoch: 0, batch: 43291, sum loss: 4553.199707, avg loss: 2.822814, ppl: 16.824135 +epoch: 0, batch: 43292, sum loss: 5329.886230, avg loss: 3.150051, ppl: 23.337252 +epoch: 0, batch: 43293, sum loss: 5220.330078, avg loss: 3.098119, ppl: 22.156231 +epoch: 0, batch: 43294, sum loss: 4637.181641, avg loss: 3.141722, ppl: 23.143684 +epoch: 0, batch: 43295, sum loss: 4656.789062, avg loss: 2.600106, ppl: 13.465158 +epoch: 0, batch: 43296, sum loss: 4228.618652, avg loss: 2.745856, ppl: 15.577948 +epoch: 0, batch: 43297, sum loss: 3921.023438, avg loss: 2.788779, ppl: 16.261152 +epoch: 0, batch: 43298, sum loss: 5797.997070, avg loss: 3.108845, ppl: 22.395174 +epoch: 0, batch: 43299, sum loss: 4091.532227, avg loss: 2.531889, ppl: 12.577239 +epoch: 0, batch: 43300, sum loss: 4880.745117, avg loss: 2.803415, ppl: 16.500898 +epoch: 0, batch: 43301, sum loss: 4880.512207, avg loss: 2.997858, ppl: 20.042551 +epoch: 0, batch: 43302, sum loss: 4620.911133, avg loss: 2.695981, ppl: 14.820047 +epoch: 0, batch: 43303, sum loss: 4354.433594, avg loss: 2.938215, ppl: 18.882103 +epoch: 0, batch: 43304, sum loss: 5946.723145, avg loss: 3.038693, ppl: 20.877947 +epoch: 0, batch: 43305, sum loss: 4776.576172, avg loss: 2.814718, ppl: 16.688469 +epoch: 0, batch: 43306, sum loss: 4130.521973, avg loss: 2.559183, ppl: 12.925258 +epoch: 0, batch: 43307, sum loss: 4770.562012, avg loss: 2.748020, ppl: 15.611686 +epoch: 0, batch: 43308, sum loss: 4669.227051, avg loss: 2.727352, ppl: 15.292341 +epoch: 0, batch: 43309, sum loss: 5127.274902, avg loss: 2.818733, ppl: 16.755604 +epoch: 0, batch: 43310, sum loss: 4913.767090, avg loss: 3.063446, ppl: 21.401175 +epoch: 0, batch: 43311, sum loss: 5099.189941, avg loss: 2.908836, ppl: 18.335451 +epoch: 0, batch: 43312, sum loss: 4004.302734, avg loss: 2.535974, ppl: 12.628722 +epoch: 0, batch: 43313, sum loss: 4097.626953, avg loss: 2.679939, ppl: 14.584208 +epoch: 0, batch: 43314, sum loss: 4557.522461, avg loss: 2.930883, ppl: 18.744169 +epoch: 0, batch: 43315, sum loss: 3496.580078, avg loss: 2.455464, ppl: 11.651834 +epoch: 0, batch: 43316, sum loss: 4241.991211, avg loss: 2.758122, ppl: 15.770194 +epoch: 0, batch: 43317, sum loss: 4426.708008, avg loss: 2.717439, ppl: 15.141494 +epoch: 0, batch: 43318, sum loss: 5823.262695, avg loss: 2.963492, ppl: 19.365486 +epoch: 0, batch: 43319, sum loss: 4108.846191, avg loss: 2.656009, ppl: 14.239349 +epoch: 0, batch: 43320, sum loss: 5466.887207, avg loss: 3.045620, ppl: 21.023056 +epoch: 0, batch: 43321, sum loss: 5028.844238, avg loss: 2.958144, ppl: 19.262186 +epoch: 0, batch: 43322, sum loss: 5679.623047, avg loss: 3.024293, ppl: 20.579458 +epoch: 0, batch: 43323, sum loss: 4242.159180, avg loss: 2.826222, ppl: 16.881561 +epoch: 0, batch: 43324, sum loss: 4216.134277, avg loss: 2.744879, ppl: 15.562735 +epoch: 0, batch: 43325, sum loss: 4958.333008, avg loss: 2.782454, ppl: 16.158627 +epoch: 0, batch: 43326, sum loss: 4787.953125, avg loss: 2.979436, ppl: 19.676710 +epoch: 0, batch: 43327, sum loss: 4176.346191, avg loss: 2.704888, ppl: 14.952637 +epoch: 0, batch: 43328, sum loss: 4794.033203, avg loss: 2.973966, ppl: 19.569376 +epoch: 0, batch: 43329, sum loss: 4745.843750, avg loss: 2.776971, ppl: 16.070276 +epoch: 0, batch: 43330, sum loss: 5443.904297, avg loss: 3.019359, ppl: 20.478163 +epoch: 0, batch: 43331, sum loss: 5054.665039, avg loss: 2.846095, ppl: 17.220409 +epoch: 0, batch: 43332, sum loss: 5100.937012, avg loss: 2.951931, ppl: 19.142887 +epoch: 0, batch: 43333, sum loss: 4950.394043, avg loss: 2.825567, ppl: 16.870516 +epoch: 0, batch: 43334, sum loss: 5279.805664, avg loss: 3.209608, ppl: 24.769377 +epoch: 0, batch: 43335, sum loss: 5007.146484, avg loss: 2.926445, ppl: 18.661167 +epoch: 0, batch: 43336, sum loss: 4376.344238, avg loss: 2.995445, ppl: 19.994246 +epoch: 0, batch: 43337, sum loss: 5318.624512, avg loss: 3.135982, ppl: 23.011211 +epoch: 0, batch: 43338, sum loss: 5613.046875, avg loss: 3.235186, ppl: 25.411089 +epoch: 0, batch: 43339, sum loss: 4421.161133, avg loss: 2.874617, ppl: 17.718637 +epoch: 0, batch: 43340, sum loss: 4207.267578, avg loss: 2.629542, ppl: 13.867418 +epoch: 0, batch: 43341, sum loss: 4560.078613, avg loss: 2.595378, ppl: 13.401648 +epoch: 0, batch: 43342, sum loss: 5283.048340, avg loss: 2.909168, ppl: 18.341524 +epoch: 0, batch: 43343, sum loss: 5121.081543, avg loss: 2.818427, ppl: 16.750479 +epoch: 0, batch: 43344, sum loss: 4643.452637, avg loss: 2.755758, ppl: 15.732966 +epoch: 0, batch: 43345, sum loss: 6257.415527, avg loss: 3.068865, ppl: 21.517467 +epoch: 0, batch: 43346, sum loss: 4708.638672, avg loss: 2.833116, ppl: 16.998346 +epoch: 0, batch: 43347, sum loss: 4878.830566, avg loss: 2.772063, ppl: 15.991587 +epoch: 0, batch: 43348, sum loss: 4639.358887, avg loss: 2.708324, ppl: 15.004107 +epoch: 0, batch: 43349, sum loss: 4506.129395, avg loss: 2.862852, ppl: 17.511400 +epoch: 0, batch: 43350, sum loss: 5004.058594, avg loss: 2.933211, ppl: 18.787868 +epoch: 0, batch: 43351, sum loss: 4380.562500, avg loss: 2.623091, ppl: 13.778253 +epoch: 0, batch: 43352, sum loss: 5250.590332, avg loss: 2.855134, ppl: 17.376757 +epoch: 0, batch: 43353, sum loss: 5040.631836, avg loss: 2.783342, ppl: 16.172979 +epoch: 0, batch: 43354, sum loss: 4811.756836, avg loss: 2.700200, ppl: 14.882712 +epoch: 0, batch: 43355, sum loss: 5345.256836, avg loss: 3.045730, ppl: 21.025381 +epoch: 0, batch: 43356, sum loss: 4643.590332, avg loss: 2.645920, ppl: 14.096412 +epoch: 0, batch: 43357, sum loss: 4476.765625, avg loss: 2.855080, ppl: 17.375830 +epoch: 0, batch: 43358, sum loss: 4367.473145, avg loss: 2.569102, ppl: 13.054097 +epoch: 0, batch: 43359, sum loss: 4905.739746, avg loss: 2.788937, ppl: 16.263720 +epoch: 0, batch: 43360, sum loss: 4318.011719, avg loss: 3.040854, ppl: 20.923094 +epoch: 0, batch: 43361, sum loss: 4732.362793, avg loss: 3.152807, ppl: 23.401655 +epoch: 0, batch: 43362, sum loss: 4669.712402, avg loss: 2.922223, ppl: 18.582552 +epoch: 0, batch: 43363, sum loss: 4800.323730, avg loss: 3.002079, ppl: 20.127329 +epoch: 0, batch: 43364, sum loss: 4926.325195, avg loss: 2.864143, ppl: 17.534014 +epoch: 0, batch: 43365, sum loss: 5608.919922, avg loss: 2.930470, ppl: 18.736435 +epoch: 0, batch: 43366, sum loss: 4807.697266, avg loss: 2.728546, ppl: 15.310604 +epoch: 0, batch: 43367, sum loss: 3935.854004, avg loss: 2.688425, ppl: 14.708489 +epoch: 0, batch: 43368, sum loss: 3466.818848, avg loss: 2.573733, ppl: 13.114695 +epoch: 0, batch: 43369, sum loss: 5260.531250, avg loss: 2.924142, ppl: 18.618242 +epoch: 0, batch: 43370, sum loss: 4541.562988, avg loss: 2.868959, ppl: 17.618675 +epoch: 0, batch: 43371, sum loss: 4868.566895, avg loss: 2.745949, ppl: 15.579389 +epoch: 0, batch: 43372, sum loss: 3928.088379, avg loss: 2.783904, ppl: 16.182070 +epoch: 0, batch: 43373, sum loss: 4760.052246, avg loss: 2.850331, ppl: 17.293503 +epoch: 0, batch: 43374, sum loss: 5231.980469, avg loss: 3.040082, ppl: 20.906946 +epoch: 0, batch: 43375, sum loss: 4348.834961, avg loss: 2.728253, ppl: 15.306118 +epoch: 0, batch: 43376, sum loss: 4327.487793, avg loss: 2.557617, ppl: 12.905027 +epoch: 0, batch: 43377, sum loss: 4798.433594, avg loss: 2.829265, ppl: 16.933012 +epoch: 0, batch: 43378, sum loss: 4959.778809, avg loss: 2.813261, ppl: 16.664167 +epoch: 0, batch: 43379, sum loss: 4490.761230, avg loss: 2.716734, ppl: 15.130823 +epoch: 0, batch: 43380, sum loss: 5092.571289, avg loss: 2.818246, ppl: 16.747456 +epoch: 0, batch: 43381, sum loss: 5416.818359, avg loss: 2.964870, ppl: 19.392191 +epoch: 0, batch: 43382, sum loss: 4412.361816, avg loss: 2.867032, ppl: 17.584745 +epoch: 0, batch: 43383, sum loss: 5121.133789, avg loss: 2.661712, ppl: 14.320785 +epoch: 0, batch: 43384, sum loss: 4305.695312, avg loss: 2.659478, ppl: 14.288835 +epoch: 0, batch: 43385, sum loss: 4251.756348, avg loss: 2.541397, ppl: 12.697392 +epoch: 0, batch: 43386, sum loss: 4684.431152, avg loss: 2.844220, ppl: 17.188154 +epoch: 0, batch: 43387, sum loss: 4148.901367, avg loss: 2.734938, ppl: 15.408791 +epoch: 0, batch: 43388, sum loss: 5583.440918, avg loss: 3.014817, ppl: 20.385355 +epoch: 0, batch: 43389, sum loss: 4422.456543, avg loss: 2.982102, ppl: 19.729239 +epoch: 0, batch: 43390, sum loss: 3787.042480, avg loss: 2.572719, ppl: 13.101400 +epoch: 0, batch: 43391, sum loss: 5209.458008, avg loss: 2.721765, ppl: 15.207136 +epoch: 0, batch: 43392, sum loss: 4605.450684, avg loss: 2.777714, ppl: 16.082222 +epoch: 0, batch: 43393, sum loss: 5993.847656, avg loss: 3.112071, ppl: 22.467516 +epoch: 0, batch: 43394, sum loss: 4921.850586, avg loss: 2.948982, ppl: 19.086514 +epoch: 0, batch: 43395, sum loss: 5574.331055, avg loss: 3.158261, ppl: 23.529650 +epoch: 0, batch: 43396, sum loss: 4526.750000, avg loss: 2.768654, ppl: 15.937174 +epoch: 0, batch: 43397, sum loss: 5250.169922, avg loss: 3.048879, ppl: 21.091690 +epoch: 0, batch: 43398, sum loss: 4279.949707, avg loss: 2.508763, ppl: 12.289717 +epoch: 0, batch: 43399, sum loss: 4772.211914, avg loss: 2.934940, ppl: 18.820366 +epoch: 0, batch: 43400, sum loss: 4517.547852, avg loss: 2.804189, ppl: 16.513674 +epoch: 0, batch: 43401, sum loss: 5363.939941, avg loss: 3.170177, ppl: 23.811705 +epoch: 0, batch: 43402, sum loss: 5224.623047, avg loss: 3.098828, ppl: 22.171942 +epoch: 0, batch: 43403, sum loss: 5291.705566, avg loss: 3.134897, ppl: 22.986263 +epoch: 0, batch: 43404, sum loss: 4196.734375, avg loss: 2.714576, ppl: 15.098207 +epoch: 0, batch: 43405, sum loss: 5313.181152, avg loss: 2.812695, ppl: 16.654747 +epoch: 0, batch: 43406, sum loss: 5406.166016, avg loss: 3.210313, ppl: 24.786833 +epoch: 0, batch: 43407, sum loss: 3944.454590, avg loss: 2.663373, ppl: 14.344585 +epoch: 0, batch: 43408, sum loss: 4916.725098, avg loss: 2.965456, ppl: 19.403540 +epoch: 0, batch: 43409, sum loss: 4348.853516, avg loss: 2.761177, ppl: 15.818447 +epoch: 0, batch: 43410, sum loss: 5668.370117, avg loss: 3.229841, ppl: 25.275625 +epoch: 0, batch: 43411, sum loss: 4712.053223, avg loss: 2.976660, ppl: 19.622173 +epoch: 0, batch: 43412, sum loss: 4916.906250, avg loss: 2.830689, ppl: 16.957136 +epoch: 0, batch: 43413, sum loss: 4183.526367, avg loss: 2.646127, ppl: 14.099322 +epoch: 0, batch: 43414, sum loss: 4874.043457, avg loss: 2.710814, ppl: 15.041511 +epoch: 0, batch: 43415, sum loss: 5905.362793, avg loss: 3.000693, ppl: 20.099453 +epoch: 0, batch: 43416, sum loss: 5322.000977, avg loss: 3.030752, ppl: 20.712812 +epoch: 0, batch: 43417, sum loss: 5132.469727, avg loss: 3.045976, ppl: 21.030550 +epoch: 0, batch: 43418, sum loss: 5224.827637, avg loss: 3.044771, ppl: 21.005230 +epoch: 0, batch: 43419, sum loss: 5611.320312, avg loss: 3.126084, ppl: 22.784576 +epoch: 0, batch: 43420, sum loss: 4575.669922, avg loss: 2.852662, ppl: 17.333864 +epoch: 0, batch: 43421, sum loss: 4539.713867, avg loss: 2.833779, ppl: 17.009617 +epoch: 0, batch: 43422, sum loss: 4699.574707, avg loss: 2.987651, ppl: 19.839018 +epoch: 0, batch: 43423, sum loss: 4771.957031, avg loss: 2.906186, ppl: 18.286917 +epoch: 0, batch: 43424, sum loss: 4713.490234, avg loss: 2.909562, ppl: 18.348763 +epoch: 0, batch: 43425, sum loss: 5173.805664, avg loss: 2.883950, ppl: 17.884775 +epoch: 0, batch: 43426, sum loss: 4200.459473, avg loss: 2.820994, ppl: 16.793530 +epoch: 0, batch: 43427, sum loss: 4488.432129, avg loss: 2.703875, ppl: 14.937500 +epoch: 0, batch: 43428, sum loss: 3651.702881, avg loss: 2.841792, ppl: 17.146467 +epoch: 0, batch: 43429, sum loss: 5552.901367, avg loss: 3.088377, ppl: 21.941427 +epoch: 0, batch: 43430, sum loss: 4281.618652, avg loss: 2.576185, ppl: 13.146880 +epoch: 0, batch: 43431, sum loss: 4322.854492, avg loss: 2.767513, ppl: 15.918987 +epoch: 0, batch: 43432, sum loss: 4495.435059, avg loss: 2.565888, ppl: 13.012204 +epoch: 0, batch: 43433, sum loss: 4971.228027, avg loss: 3.101203, ppl: 22.224665 +epoch: 0, batch: 43434, sum loss: 4442.136719, avg loss: 2.815042, ppl: 16.693882 +epoch: 0, batch: 43435, sum loss: 5348.277832, avg loss: 2.996234, ppl: 20.010040 +epoch: 0, batch: 43436, sum loss: 4750.618652, avg loss: 2.760383, ppl: 15.805890 +epoch: 0, batch: 43437, sum loss: 4846.283203, avg loss: 3.094689, ppl: 22.080374 +epoch: 0, batch: 43438, sum loss: 4767.330078, avg loss: 2.820905, ppl: 16.792048 +epoch: 0, batch: 43439, sum loss: 5347.497070, avg loss: 3.091039, ppl: 21.999918 +epoch: 0, batch: 43440, sum loss: 5696.508301, avg loss: 2.991864, ppl: 19.922779 +epoch: 0, batch: 43441, sum loss: 5344.185547, avg loss: 2.777643, ppl: 16.081076 +epoch: 0, batch: 43442, sum loss: 4309.001465, avg loss: 2.787194, ppl: 16.235395 +epoch: 0, batch: 43443, sum loss: 5348.458496, avg loss: 2.963135, ppl: 19.358561 +epoch: 0, batch: 43444, sum loss: 4463.929688, avg loss: 2.758918, ppl: 15.782762 +epoch: 0, batch: 43445, sum loss: 5535.996094, avg loss: 3.187102, ppl: 24.218143 +epoch: 0, batch: 43446, sum loss: 4621.482422, avg loss: 3.097508, ppl: 22.142712 +epoch: 0, batch: 43447, sum loss: 5151.382812, avg loss: 2.975958, ppl: 19.608393 +epoch: 0, batch: 43448, sum loss: 3685.490234, avg loss: 2.686218, ppl: 14.676060 +epoch: 0, batch: 43449, sum loss: 4967.660645, avg loss: 2.932503, ppl: 18.774569 +epoch: 0, batch: 43450, sum loss: 4940.022461, avg loss: 2.912749, ppl: 18.407331 +epoch: 0, batch: 43451, sum loss: 5287.141113, avg loss: 2.913026, ppl: 18.412424 +epoch: 0, batch: 43452, sum loss: 3973.174805, avg loss: 2.833934, ppl: 17.012249 +epoch: 0, batch: 43453, sum loss: 5125.941895, avg loss: 3.018812, ppl: 20.466957 +epoch: 0, batch: 43454, sum loss: 4688.795410, avg loss: 2.873037, ppl: 17.690657 +epoch: 0, batch: 43455, sum loss: 5416.997559, avg loss: 2.907674, ppl: 18.314156 +epoch: 0, batch: 43456, sum loss: 5478.989746, avg loss: 2.882162, ppl: 17.852827 +epoch: 0, batch: 43457, sum loss: 5635.777832, avg loss: 2.938362, ppl: 18.884886 +epoch: 0, batch: 43458, sum loss: 4596.491699, avg loss: 2.914706, ppl: 18.443394 +epoch: 0, batch: 43459, sum loss: 4791.720215, avg loss: 2.959679, ppl: 19.291780 +epoch: 0, batch: 43460, sum loss: 5457.080078, avg loss: 3.086584, ppl: 21.902128 +epoch: 0, batch: 43461, sum loss: 5737.986816, avg loss: 3.282601, ppl: 26.644997 +epoch: 0, batch: 43462, sum loss: 4675.709961, avg loss: 2.779851, ppl: 16.116623 +epoch: 0, batch: 43463, sum loss: 4355.530762, avg loss: 2.876837, ppl: 17.758011 +epoch: 0, batch: 43464, sum loss: 4870.330078, avg loss: 2.967904, ppl: 19.451099 +epoch: 0, batch: 43465, sum loss: 5706.802734, avg loss: 2.801572, ppl: 16.470520 +epoch: 0, batch: 43466, sum loss: 4740.928711, avg loss: 2.873290, ppl: 17.695141 +epoch: 0, batch: 43467, sum loss: 4726.228516, avg loss: 2.953893, ppl: 19.180473 +epoch: 0, batch: 43468, sum loss: 4543.417969, avg loss: 2.815005, ppl: 16.693260 +epoch: 0, batch: 43469, sum loss: 5186.973145, avg loss: 3.061968, ppl: 21.369562 +epoch: 0, batch: 43470, sum loss: 5224.474121, avg loss: 2.912193, ppl: 18.397100 +epoch: 0, batch: 43471, sum loss: 5848.045410, avg loss: 2.977620, ppl: 19.641018 +epoch: 0, batch: 43472, sum loss: 5785.386719, avg loss: 2.920437, ppl: 18.549398 +epoch: 0, batch: 43473, sum loss: 5142.143555, avg loss: 2.924996, ppl: 18.634153 +epoch: 0, batch: 43474, sum loss: 5429.635742, avg loss: 2.647311, ppl: 14.116035 +epoch: 0, batch: 43475, sum loss: 4368.926270, avg loss: 2.732286, ppl: 15.367985 +epoch: 0, batch: 43476, sum loss: 5047.518555, avg loss: 2.718104, ppl: 15.151563 +epoch: 0, batch: 43477, sum loss: 4676.313477, avg loss: 2.563768, ppl: 12.984656 +epoch: 0, batch: 43478, sum loss: 4911.915527, avg loss: 2.880889, ppl: 17.830116 +epoch: 0, batch: 43479, sum loss: 5170.549316, avg loss: 2.757626, ppl: 15.762383 +epoch: 0, batch: 43480, sum loss: 4310.024414, avg loss: 2.740003, ppl: 15.487026 +epoch: 0, batch: 43481, sum loss: 4293.665527, avg loss: 2.788095, ppl: 16.250027 +epoch: 0, batch: 43482, sum loss: 4749.304199, avg loss: 3.066045, ppl: 21.456879 +epoch: 0, batch: 43483, sum loss: 4892.836914, avg loss: 2.744160, ppl: 15.551544 +epoch: 0, batch: 43484, sum loss: 4170.536133, avg loss: 2.669997, ppl: 14.439933 +epoch: 0, batch: 43485, sum loss: 4556.260742, avg loss: 2.829976, ppl: 16.945047 +epoch: 0, batch: 43486, sum loss: 5011.918945, avg loss: 2.859053, ppl: 17.444992 +epoch: 0, batch: 43487, sum loss: 5219.759766, avg loss: 2.930803, ppl: 18.742672 +epoch: 0, batch: 43488, sum loss: 5072.537109, avg loss: 3.096787, ppl: 22.126738 +epoch: 0, batch: 43489, sum loss: 4302.249512, avg loss: 2.881614, ppl: 17.843044 +epoch: 0, batch: 43490, sum loss: 4893.555176, avg loss: 2.881953, ppl: 17.849091 +epoch: 0, batch: 43491, sum loss: 5520.732422, avg loss: 2.879881, ppl: 17.812160 +epoch: 0, batch: 43492, sum loss: 5302.941406, avg loss: 2.985890, ppl: 19.804129 +epoch: 0, batch: 43493, sum loss: 4670.783203, avg loss: 2.832494, ppl: 16.987780 +epoch: 0, batch: 43494, sum loss: 3982.995605, avg loss: 2.724347, ppl: 15.246460 +epoch: 0, batch: 43495, sum loss: 4926.082031, avg loss: 3.005541, ppl: 20.197142 +epoch: 0, batch: 43496, sum loss: 5608.549316, avg loss: 2.924165, ppl: 18.618683 +epoch: 0, batch: 43497, sum loss: 4309.020996, avg loss: 2.728956, ppl: 15.316884 +epoch: 0, batch: 43498, sum loss: 4975.018555, avg loss: 2.727532, ppl: 15.295094 +epoch: 0, batch: 43499, sum loss: 5427.651855, avg loss: 2.924381, ppl: 18.622704 +epoch: 0, batch: 43500, sum loss: 4534.109375, avg loss: 2.737989, ppl: 15.455868 +epoch: 0, batch: 43501, sum loss: 4962.661133, avg loss: 3.024169, ppl: 20.576893 +epoch: 0, batch: 43502, sum loss: 5097.957031, avg loss: 2.805700, ppl: 16.538651 +epoch: 0, batch: 43503, sum loss: 4698.503906, avg loss: 2.750880, ppl: 15.656411 +epoch: 0, batch: 43504, sum loss: 4646.705566, avg loss: 2.638674, ppl: 13.994638 +epoch: 0, batch: 43505, sum loss: 5220.213867, avg loss: 2.754730, ppl: 15.716801 +epoch: 0, batch: 43506, sum loss: 6017.788574, avg loss: 3.259907, ppl: 26.047121 +epoch: 0, batch: 43507, sum loss: 4805.893555, avg loss: 2.999933, ppl: 20.084202 +epoch: 0, batch: 43508, sum loss: 5637.161133, avg loss: 3.226766, ppl: 25.198044 +epoch: 0, batch: 43509, sum loss: 4123.399414, avg loss: 2.608096, ppl: 13.573178 +epoch: 0, batch: 43510, sum loss: 3738.952148, avg loss: 2.484354, ppl: 11.993367 +epoch: 0, batch: 43511, sum loss: 4478.023926, avg loss: 2.652858, ppl: 14.194546 +epoch: 0, batch: 43512, sum loss: 4464.676758, avg loss: 2.884158, ppl: 17.888502 +epoch: 0, batch: 43513, sum loss: 4360.333984, avg loss: 2.825881, ppl: 16.875803 +epoch: 0, batch: 43514, sum loss: 4906.954590, avg loss: 2.975715, ppl: 19.603643 +epoch: 0, batch: 43515, sum loss: 5317.877930, avg loss: 3.122653, ppl: 22.706541 +epoch: 0, batch: 43516, sum loss: 4025.501465, avg loss: 2.644876, ppl: 14.081702 +epoch: 0, batch: 43517, sum loss: 4782.455078, avg loss: 2.808253, ppl: 16.580927 +epoch: 0, batch: 43518, sum loss: 4570.560547, avg loss: 2.718953, ppl: 15.164442 +epoch: 0, batch: 43519, sum loss: 3838.833008, avg loss: 2.590306, ppl: 13.333849 +epoch: 0, batch: 43520, sum loss: 4485.842285, avg loss: 3.008613, ppl: 20.259289 +epoch: 0, batch: 43521, sum loss: 4725.638672, avg loss: 2.706552, ppl: 14.977537 +epoch: 0, batch: 43522, sum loss: 4828.180664, avg loss: 2.805451, ppl: 16.534527 +epoch: 0, batch: 43523, sum loss: 4832.739258, avg loss: 2.953997, ppl: 19.182476 +epoch: 0, batch: 43524, sum loss: 4674.515625, avg loss: 2.898026, ppl: 18.138300 +epoch: 0, batch: 43525, sum loss: 5267.586426, avg loss: 3.122458, ppl: 22.702112 +epoch: 0, batch: 43526, sum loss: 6004.270508, avg loss: 2.921786, ppl: 18.574432 +epoch: 0, batch: 43527, sum loss: 5678.339844, avg loss: 3.013981, ppl: 20.368322 +epoch: 0, batch: 43528, sum loss: 5191.400391, avg loss: 3.073653, ppl: 21.620749 +epoch: 0, batch: 43529, sum loss: 4747.495605, avg loss: 2.737887, ppl: 15.454290 +epoch: 0, batch: 43530, sum loss: 5413.596680, avg loss: 2.927851, ppl: 18.687428 +epoch: 0, batch: 43531, sum loss: 4233.401855, avg loss: 2.846941, ppl: 17.234983 +epoch: 0, batch: 43532, sum loss: 4410.839844, avg loss: 2.814831, ppl: 16.690355 +epoch: 0, batch: 43533, sum loss: 5400.782715, avg loss: 2.964206, ppl: 19.379305 +epoch: 0, batch: 43534, sum loss: 4744.650879, avg loss: 3.023997, ppl: 20.573357 +epoch: 0, batch: 43535, sum loss: 4206.347656, avg loss: 2.632258, ppl: 13.905128 +epoch: 0, batch: 43536, sum loss: 4862.015625, avg loss: 2.861693, ppl: 17.491108 +epoch: 0, batch: 43537, sum loss: 5529.258789, avg loss: 2.944227, ppl: 18.995977 +epoch: 0, batch: 43538, sum loss: 5355.553711, avg loss: 3.135570, ppl: 23.001743 +epoch: 0, batch: 43539, sum loss: 4202.976074, avg loss: 2.701141, ppl: 14.896725 +epoch: 0, batch: 43540, sum loss: 4144.019531, avg loss: 2.550166, ppl: 12.809229 +epoch: 0, batch: 43541, sum loss: 4518.458984, avg loss: 2.808240, ppl: 16.580717 +epoch: 0, batch: 43542, sum loss: 5756.525391, avg loss: 2.974948, ppl: 19.588614 +epoch: 0, batch: 43543, sum loss: 5096.965332, avg loss: 2.966802, ppl: 19.429678 +epoch: 0, batch: 43544, sum loss: 5215.818359, avg loss: 2.825470, ppl: 16.868879 +epoch: 0, batch: 43545, sum loss: 4308.030273, avg loss: 2.766879, ppl: 15.908902 +epoch: 0, batch: 43546, sum loss: 3378.422363, avg loss: 2.448132, ppl: 11.566723 +epoch: 0, batch: 43547, sum loss: 5343.659180, avg loss: 2.886904, ppl: 17.937689 +epoch: 0, batch: 43548, sum loss: 5902.094238, avg loss: 3.259025, ppl: 26.024155 +epoch: 0, batch: 43549, sum loss: 4583.594727, avg loss: 2.762866, ppl: 15.845191 +epoch: 0, batch: 43550, sum loss: 5119.437988, avg loss: 2.713004, ppl: 15.074486 +epoch: 0, batch: 43551, sum loss: 4184.424805, avg loss: 2.814004, ppl: 16.676565 +epoch: 0, batch: 43552, sum loss: 4261.757324, avg loss: 2.740680, ppl: 15.497523 +epoch: 0, batch: 43553, sum loss: 5038.437500, avg loss: 2.939579, ppl: 18.907875 +epoch: 0, batch: 43554, sum loss: 5638.743164, avg loss: 3.132635, ppl: 22.934334 +epoch: 0, batch: 43555, sum loss: 5710.144531, avg loss: 2.863663, ppl: 17.525610 +epoch: 0, batch: 43556, sum loss: 5596.447754, avg loss: 3.068228, ppl: 21.503765 +epoch: 0, batch: 43557, sum loss: 5741.000000, avg loss: 2.895108, ppl: 18.085463 +epoch: 0, batch: 43558, sum loss: 4782.733398, avg loss: 3.087626, ppl: 21.924976 +epoch: 0, batch: 43559, sum loss: 5095.079102, avg loss: 2.969160, ppl: 19.475559 +epoch: 0, batch: 43560, sum loss: 4765.459961, avg loss: 2.794991, ppl: 16.362486 +epoch: 0, batch: 43561, sum loss: 6340.960938, avg loss: 3.282071, ppl: 26.630865 +epoch: 0, batch: 43562, sum loss: 5048.692383, avg loss: 2.781649, ppl: 16.145620 +epoch: 0, batch: 43563, sum loss: 4957.125977, avg loss: 2.814949, ppl: 16.692329 +epoch: 0, batch: 43564, sum loss: 4997.172852, avg loss: 2.828055, ppl: 16.912533 +epoch: 0, batch: 43565, sum loss: 4914.916992, avg loss: 3.000560, ppl: 20.096779 +epoch: 0, batch: 43566, sum loss: 4593.441406, avg loss: 3.076652, ppl: 21.685673 +epoch: 0, batch: 43567, sum loss: 4364.047852, avg loss: 2.932828, ppl: 18.780666 +epoch: 0, batch: 43568, sum loss: 4663.142090, avg loss: 3.043827, ppl: 20.985392 +epoch: 0, batch: 43569, sum loss: 4689.621094, avg loss: 2.905589, ppl: 18.276011 +epoch: 0, batch: 43570, sum loss: 5512.930664, avg loss: 2.766147, ppl: 15.897262 +epoch: 0, batch: 43571, sum loss: 4101.674316, avg loss: 2.760211, ppl: 15.803185 +epoch: 0, batch: 43572, sum loss: 4384.526855, avg loss: 2.550627, ppl: 12.815130 +epoch: 0, batch: 43573, sum loss: 4752.224121, avg loss: 2.944377, ppl: 18.998821 +epoch: 0, batch: 43574, sum loss: 4999.162598, avg loss: 2.925198, ppl: 18.637909 +epoch: 0, batch: 43575, sum loss: 5167.890137, avg loss: 2.834827, ppl: 17.027462 +epoch: 0, batch: 43576, sum loss: 4879.068359, avg loss: 2.744133, ppl: 15.551126 +epoch: 0, batch: 43577, sum loss: 4834.506836, avg loss: 2.838818, ppl: 17.095543 +epoch: 0, batch: 43578, sum loss: 5761.733398, avg loss: 3.186800, ppl: 24.210817 +epoch: 0, batch: 43579, sum loss: 4718.388672, avg loss: 2.682427, ppl: 14.620533 +epoch: 0, batch: 43580, sum loss: 4792.033691, avg loss: 3.069849, ppl: 21.538645 +epoch: 0, batch: 43581, sum loss: 4843.271484, avg loss: 2.756557, ppl: 15.745545 +epoch: 0, batch: 43582, sum loss: 4975.040039, avg loss: 2.680517, ppl: 14.592639 +epoch: 0, batch: 43583, sum loss: 5200.021484, avg loss: 2.836891, ppl: 17.062637 +epoch: 0, batch: 43584, sum loss: 4702.207520, avg loss: 2.634290, ppl: 13.933416 +epoch: 0, batch: 43585, sum loss: 5094.972656, avg loss: 3.091610, ppl: 22.012484 +epoch: 0, batch: 43586, sum loss: 5164.925781, avg loss: 3.170611, ppl: 23.822039 +epoch: 0, batch: 43587, sum loss: 4362.974609, avg loss: 2.744009, ppl: 15.549201 +epoch: 0, batch: 43588, sum loss: 4756.984863, avg loss: 2.945502, ppl: 19.020201 +epoch: 0, batch: 43589, sum loss: 4453.037598, avg loss: 2.791873, ppl: 16.311546 +epoch: 0, batch: 43590, sum loss: 4676.488281, avg loss: 2.649569, ppl: 14.147933 +epoch: 0, batch: 43591, sum loss: 4535.581543, avg loss: 2.777453, ppl: 16.078024 +epoch: 0, batch: 43592, sum loss: 4643.153809, avg loss: 2.713708, ppl: 15.085106 +epoch: 0, batch: 43593, sum loss: 4467.722168, avg loss: 2.852952, ppl: 17.338882 +epoch: 0, batch: 43594, sum loss: 3787.220947, avg loss: 2.648406, ppl: 14.131499 +epoch: 0, batch: 43595, sum loss: 4537.568359, avg loss: 2.931246, ppl: 18.750977 +epoch: 0, batch: 43596, sum loss: 4865.452148, avg loss: 2.870473, ppl: 17.645365 +epoch: 0, batch: 43597, sum loss: 4930.590332, avg loss: 2.812659, ppl: 16.654135 +epoch: 0, batch: 43598, sum loss: 4626.147949, avg loss: 2.819103, ppl: 16.761805 +epoch: 0, batch: 43599, sum loss: 4359.062500, avg loss: 2.646668, ppl: 14.106958 +epoch: 0, batch: 43600, sum loss: 4638.968262, avg loss: 2.803002, ppl: 16.494089 +epoch: 0, batch: 43601, sum loss: 3761.143555, avg loss: 2.536172, ppl: 12.631231 +epoch: 0, batch: 43602, sum loss: 5283.625977, avg loss: 2.819438, ppl: 16.767416 +epoch: 0, batch: 43603, sum loss: 5359.604004, avg loss: 2.923952, ppl: 18.614710 +epoch: 0, batch: 43604, sum loss: 5022.824219, avg loss: 3.087169, ppl: 21.914953 +epoch: 0, batch: 43605, sum loss: 4843.002930, avg loss: 3.036366, ppl: 20.829401 +epoch: 0, batch: 43606, sum loss: 4623.563965, avg loss: 2.965724, ppl: 19.408754 +epoch: 0, batch: 43607, sum loss: 5000.312500, avg loss: 2.976377, ppl: 19.616608 +epoch: 0, batch: 43608, sum loss: 4651.901367, avg loss: 2.929409, ppl: 18.716566 +epoch: 0, batch: 43609, sum loss: 5376.649414, avg loss: 2.957453, ppl: 19.248882 +epoch: 0, batch: 43610, sum loss: 5973.324219, avg loss: 3.280244, ppl: 26.582254 +epoch: 0, batch: 43611, sum loss: 3169.103516, avg loss: 2.468149, ppl: 11.800586 +epoch: 0, batch: 43612, sum loss: 4830.952637, avg loss: 2.885874, ppl: 17.919222 +epoch: 0, batch: 43613, sum loss: 4360.387695, avg loss: 2.665274, ppl: 14.371882 +epoch: 0, batch: 43614, sum loss: 4905.965820, avg loss: 2.939464, ppl: 18.905716 +epoch: 0, batch: 43615, sum loss: 4502.142578, avg loss: 2.973674, ppl: 19.563671 +epoch: 0, batch: 43616, sum loss: 4465.729492, avg loss: 2.677296, ppl: 14.545708 +epoch: 0, batch: 43617, sum loss: 4821.940430, avg loss: 2.772824, ppl: 16.003761 +epoch: 0, batch: 43618, sum loss: 5548.165039, avg loss: 2.882164, ppl: 17.852856 +epoch: 0, batch: 43619, sum loss: 4861.307129, avg loss: 2.957000, ppl: 19.240156 +epoch: 0, batch: 43620, sum loss: 4794.726562, avg loss: 2.754007, ppl: 15.705443 +epoch: 0, batch: 43621, sum loss: 5507.739258, avg loss: 2.797227, ppl: 16.399103 +epoch: 0, batch: 43622, sum loss: 5159.329590, avg loss: 2.747247, ppl: 15.599623 +epoch: 0, batch: 43623, sum loss: 4430.953613, avg loss: 2.907450, ppl: 18.310047 +epoch: 0, batch: 43624, sum loss: 5382.797852, avg loss: 2.949478, ppl: 19.095991 +epoch: 0, batch: 43625, sum loss: 5797.442383, avg loss: 3.101895, ppl: 22.240065 +epoch: 0, batch: 43626, sum loss: 4360.957031, avg loss: 2.715415, ppl: 15.110884 +epoch: 0, batch: 43627, sum loss: 4518.395996, avg loss: 2.765236, ppl: 15.882793 +epoch: 0, batch: 43628, sum loss: 4873.152344, avg loss: 3.032453, ppl: 20.748072 +epoch: 0, batch: 43629, sum loss: 6434.071289, avg loss: 3.102252, ppl: 22.248003 +epoch: 0, batch: 43630, sum loss: 4677.887695, avg loss: 2.746851, ppl: 15.593454 +epoch: 0, batch: 43631, sum loss: 4197.462402, avg loss: 2.504453, ppl: 12.236860 +epoch: 0, batch: 43632, sum loss: 5098.683105, avg loss: 3.000991, ppl: 20.105450 +epoch: 0, batch: 43633, sum loss: 5348.775391, avg loss: 2.938888, ppl: 18.894815 +epoch: 0, batch: 43634, sum loss: 5343.202637, avg loss: 3.122854, ppl: 22.711105 +epoch: 0, batch: 43635, sum loss: 4423.140625, avg loss: 2.862874, ppl: 17.511787 +epoch: 0, batch: 43636, sum loss: 4264.923828, avg loss: 2.776643, ppl: 16.065004 +epoch: 0, batch: 43637, sum loss: 5788.234375, avg loss: 3.017849, ppl: 20.447262 +epoch: 0, batch: 43638, sum loss: 4741.230957, avg loss: 2.950362, ppl: 19.112865 +epoch: 0, batch: 43639, sum loss: 5553.602051, avg loss: 2.935308, ppl: 18.827291 +epoch: 0, batch: 43640, sum loss: 4846.978027, avg loss: 2.771285, ppl: 15.979158 +epoch: 0, batch: 43641, sum loss: 4758.623047, avg loss: 2.819090, ppl: 16.761585 +epoch: 0, batch: 43642, sum loss: 5004.148438, avg loss: 2.884235, ppl: 17.889883 +epoch: 0, batch: 43643, sum loss: 4708.843750, avg loss: 2.948556, ppl: 19.078384 +epoch: 0, batch: 43644, sum loss: 4208.181152, avg loss: 2.886270, ppl: 17.926311 +epoch: 0, batch: 43645, sum loss: 4982.288086, avg loss: 2.813262, ppl: 16.664196 +epoch: 0, batch: 43646, sum loss: 4989.410156, avg loss: 2.771895, ppl: 15.988899 +epoch: 0, batch: 43647, sum loss: 4559.020508, avg loss: 2.705650, ppl: 14.964042 +epoch: 0, batch: 43648, sum loss: 4674.953613, avg loss: 2.859299, ppl: 17.449286 +epoch: 0, batch: 43649, sum loss: 3812.798340, avg loss: 2.713736, ppl: 15.085524 +epoch: 0, batch: 43650, sum loss: 5300.904297, avg loss: 2.917394, ppl: 18.493027 +epoch: 0, batch: 43651, sum loss: 3902.169922, avg loss: 2.597983, ppl: 13.436604 +epoch: 0, batch: 43652, sum loss: 4687.519531, avg loss: 2.686258, ppl: 14.676647 +epoch: 0, batch: 43653, sum loss: 5265.621094, avg loss: 3.070333, ppl: 21.549072 +epoch: 0, batch: 43654, sum loss: 4678.877441, avg loss: 2.707684, ppl: 14.994505 +epoch: 0, batch: 43655, sum loss: 5282.420898, avg loss: 2.829363, ppl: 16.934671 +epoch: 0, batch: 43656, sum loss: 5581.561523, avg loss: 2.881550, ppl: 17.841900 +epoch: 0, batch: 43657, sum loss: 4104.368652, avg loss: 2.632693, ppl: 13.911186 +epoch: 0, batch: 43658, sum loss: 5047.627930, avg loss: 2.899269, ppl: 18.160872 +epoch: 0, batch: 43659, sum loss: 5986.204102, avg loss: 2.838409, ppl: 17.088551 +epoch: 0, batch: 43660, sum loss: 4538.774902, avg loss: 2.819115, ppl: 16.762005 +epoch: 0, batch: 43661, sum loss: 4295.049316, avg loss: 2.762090, ppl: 15.832895 +epoch: 0, batch: 43662, sum loss: 5336.771973, avg loss: 2.929074, ppl: 18.710289 +epoch: 0, batch: 43663, sum loss: 5059.630371, avg loss: 2.928027, ppl: 18.690716 +epoch: 0, batch: 43664, sum loss: 4701.106445, avg loss: 2.891209, ppl: 18.015083 +epoch: 0, batch: 43665, sum loss: 4378.632324, avg loss: 2.696202, ppl: 14.823326 +epoch: 0, batch: 43666, sum loss: 5200.325684, avg loss: 2.892283, ppl: 18.034443 +epoch: 0, batch: 43667, sum loss: 5298.007812, avg loss: 2.973068, ppl: 19.551823 +epoch: 0, batch: 43668, sum loss: 4968.178711, avg loss: 2.842208, ppl: 17.153593 +epoch: 0, batch: 43669, sum loss: 3909.989746, avg loss: 2.676242, ppl: 14.530387 +epoch: 0, batch: 43670, sum loss: 4651.677246, avg loss: 2.792123, ppl: 16.315622 +epoch: 0, batch: 43671, sum loss: 4468.901367, avg loss: 2.660060, ppl: 14.297153 +epoch: 0, batch: 43672, sum loss: 4223.365234, avg loss: 2.758567, ppl: 15.777212 +epoch: 0, batch: 43673, sum loss: 5411.386719, avg loss: 2.921915, ppl: 18.576830 +epoch: 0, batch: 43674, sum loss: 4270.112793, avg loss: 2.455499, ppl: 11.652248 +epoch: 0, batch: 43675, sum loss: 4906.935547, avg loss: 2.759806, ppl: 15.796784 +epoch: 0, batch: 43676, sum loss: 4346.003906, avg loss: 2.825750, ppl: 16.873602 +epoch: 0, batch: 43677, sum loss: 4542.340820, avg loss: 2.702166, ppl: 14.911994 +epoch: 0, batch: 43678, sum loss: 4898.087402, avg loss: 2.676551, ppl: 14.534870 +epoch: 0, batch: 43679, sum loss: 3944.714355, avg loss: 2.654586, ppl: 14.219092 +epoch: 0, batch: 43680, sum loss: 5016.233887, avg loss: 2.913028, ppl: 18.412462 +epoch: 0, batch: 43681, sum loss: 4677.966797, avg loss: 2.894781, ppl: 18.079548 +epoch: 0, batch: 43682, sum loss: 5639.860840, avg loss: 3.110789, ppl: 22.438747 +epoch: 0, batch: 43683, sum loss: 5008.681152, avg loss: 2.749002, ppl: 15.627025 +epoch: 0, batch: 43684, sum loss: 5628.225098, avg loss: 2.854069, ppl: 17.358276 +epoch: 0, batch: 43685, sum loss: 3703.346436, avg loss: 2.465610, ppl: 11.770660 +epoch: 0, batch: 43686, sum loss: 4592.032227, avg loss: 2.956879, ppl: 19.237831 +epoch: 0, batch: 43687, sum loss: 4469.595703, avg loss: 3.015922, ppl: 20.407890 +epoch: 0, batch: 43688, sum loss: 4442.050781, avg loss: 2.693784, ppl: 14.787533 +epoch: 0, batch: 43689, sum loss: 5040.629395, avg loss: 2.923799, ppl: 18.611860 +epoch: 0, batch: 43690, sum loss: 4924.051758, avg loss: 2.852869, ppl: 17.337448 +epoch: 0, batch: 43691, sum loss: 5836.943848, avg loss: 3.179163, ppl: 24.026640 +epoch: 0, batch: 43692, sum loss: 4901.202637, avg loss: 2.907001, ppl: 18.301821 +epoch: 0, batch: 43693, sum loss: 4426.266602, avg loss: 2.588460, ppl: 13.309259 +epoch: 0, batch: 43694, sum loss: 4575.121582, avg loss: 2.929015, ppl: 18.709196 +epoch: 0, batch: 43695, sum loss: 5262.657715, avg loss: 2.935113, ppl: 18.823633 +epoch: 0, batch: 43696, sum loss: 4960.864258, avg loss: 2.707895, ppl: 14.997676 +epoch: 0, batch: 43697, sum loss: 5413.451172, avg loss: 2.984262, ppl: 19.771906 +epoch: 0, batch: 43698, sum loss: 5521.448730, avg loss: 2.947917, ppl: 19.066202 +epoch: 0, batch: 43699, sum loss: 5081.333496, avg loss: 2.916954, ppl: 18.484894 +epoch: 0, batch: 43700, sum loss: 5362.194336, avg loss: 2.823694, ppl: 16.838934 +epoch: 0, batch: 43701, sum loss: 5130.534180, avg loss: 3.012645, ppl: 20.341125 +epoch: 0, batch: 43702, sum loss: 6093.716797, avg loss: 3.162282, ppl: 23.624439 +epoch: 0, batch: 43703, sum loss: 5177.164551, avg loss: 2.865061, ppl: 17.550116 +epoch: 0, batch: 43704, sum loss: 5450.923340, avg loss: 3.185812, ppl: 24.186914 +epoch: 0, batch: 43705, sum loss: 4941.176758, avg loss: 2.612997, ppl: 13.639866 +epoch: 0, batch: 43706, sum loss: 4924.912109, avg loss: 2.929751, ppl: 18.722975 +epoch: 0, batch: 43707, sum loss: 5134.856445, avg loss: 3.041977, ppl: 20.946608 +epoch: 0, batch: 43708, sum loss: 5096.864258, avg loss: 2.991117, ppl: 19.907917 +epoch: 0, batch: 43709, sum loss: 5736.037109, avg loss: 3.051084, ppl: 21.138237 +epoch: 0, batch: 43710, sum loss: 4928.973145, avg loss: 2.821393, ppl: 16.800234 +epoch: 0, batch: 43711, sum loss: 4445.537598, avg loss: 2.370953, ppl: 10.707595 +epoch: 0, batch: 43712, sum loss: 4336.543945, avg loss: 2.871883, ppl: 17.670267 +epoch: 0, batch: 43713, sum loss: 5304.284668, avg loss: 2.904866, ppl: 18.262791 +epoch: 0, batch: 43714, sum loss: 4983.550781, avg loss: 2.890691, ppl: 18.005743 +epoch: 0, batch: 43715, sum loss: 5459.661133, avg loss: 3.084554, ppl: 21.857725 +epoch: 0, batch: 43716, sum loss: 5813.603027, avg loss: 3.308824, ppl: 27.352928 +epoch: 0, batch: 43717, sum loss: 3893.117432, avg loss: 2.716760, ppl: 15.131224 +epoch: 0, batch: 43718, sum loss: 5936.997559, avg loss: 3.139607, ppl: 23.094799 +epoch: 0, batch: 43719, sum loss: 5649.049805, avg loss: 2.976317, ppl: 19.615442 +epoch: 0, batch: 43720, sum loss: 5129.725586, avg loss: 2.849848, ppl: 17.285147 +epoch: 0, batch: 43721, sum loss: 5208.059570, avg loss: 3.089003, ppl: 21.955185 +epoch: 0, batch: 43722, sum loss: 4934.184082, avg loss: 2.928299, ppl: 18.695805 +epoch: 0, batch: 43723, sum loss: 4077.443115, avg loss: 2.639122, ppl: 14.000906 +epoch: 0, batch: 43724, sum loss: 5319.696289, avg loss: 3.071418, ppl: 21.572472 +epoch: 0, batch: 43725, sum loss: 4402.482422, avg loss: 2.705889, ppl: 14.967624 +epoch: 0, batch: 43726, sum loss: 4386.443359, avg loss: 2.769219, ppl: 15.946181 +epoch: 0, batch: 43727, sum loss: 4693.525879, avg loss: 2.685084, ppl: 14.659428 +epoch: 0, batch: 43728, sum loss: 5213.479980, avg loss: 2.899599, ppl: 18.166864 +epoch: 0, batch: 43729, sum loss: 4371.033691, avg loss: 2.868132, ppl: 17.604109 +epoch: 0, batch: 43730, sum loss: 4509.635254, avg loss: 2.978623, ppl: 19.660728 +epoch: 0, batch: 43731, sum loss: 5168.079102, avg loss: 2.958259, ppl: 19.264410 +epoch: 0, batch: 43732, sum loss: 4281.284180, avg loss: 2.641138, ppl: 14.029154 +epoch: 0, batch: 43733, sum loss: 4606.090332, avg loss: 2.744988, ppl: 15.564430 +epoch: 0, batch: 43734, sum loss: 3802.124512, avg loss: 2.642199, ppl: 14.044053 +epoch: 0, batch: 43735, sum loss: 4979.496582, avg loss: 2.781842, ppl: 16.148731 +epoch: 0, batch: 43736, sum loss: 3980.147217, avg loss: 2.667659, ppl: 14.406205 +epoch: 0, batch: 43737, sum loss: 4533.854980, avg loss: 3.008530, ppl: 20.257603 +epoch: 0, batch: 43738, sum loss: 5286.658691, avg loss: 2.976722, ppl: 19.623390 +epoch: 0, batch: 43739, sum loss: 4986.195312, avg loss: 2.810708, ppl: 16.621675 +epoch: 0, batch: 43740, sum loss: 4278.282227, avg loss: 2.702642, ppl: 14.919092 +epoch: 0, batch: 43741, sum loss: 4636.166016, avg loss: 2.919500, ppl: 18.532021 +epoch: 0, batch: 43742, sum loss: 5418.266602, avg loss: 2.865292, ppl: 17.554171 +epoch: 0, batch: 43743, sum loss: 6039.494141, avg loss: 3.178681, ppl: 24.015066 +epoch: 0, batch: 43744, sum loss: 4798.586426, avg loss: 2.783403, ppl: 16.173965 +epoch: 0, batch: 43745, sum loss: 3885.253906, avg loss: 2.611058, ppl: 13.613443 +epoch: 0, batch: 43746, sum loss: 4631.349609, avg loss: 2.999579, ppl: 20.077082 +epoch: 0, batch: 43747, sum loss: 5587.076660, avg loss: 2.828900, ppl: 16.926825 +epoch: 0, batch: 43748, sum loss: 4729.457520, avg loss: 2.656999, ppl: 14.253445 +epoch: 0, batch: 43749, sum loss: 4136.572266, avg loss: 2.735828, ppl: 15.422514 +epoch: 0, batch: 43750, sum loss: 5357.802246, avg loss: 2.993186, ppl: 19.949129 +epoch: 0, batch: 43751, sum loss: 5244.743164, avg loss: 3.167115, ppl: 23.738909 +epoch: 0, batch: 43752, sum loss: 5082.566406, avg loss: 2.845782, ppl: 17.215015 +epoch: 0, batch: 43753, sum loss: 5986.981934, avg loss: 2.986026, ppl: 19.806810 +epoch: 0, batch: 43754, sum loss: 4966.272949, avg loss: 2.867363, ppl: 17.590574 +epoch: 0, batch: 43755, sum loss: 5364.657227, avg loss: 3.088461, ppl: 21.943291 +epoch: 0, batch: 43756, sum loss: 5561.005859, avg loss: 2.847417, ppl: 17.243187 +epoch: 0, batch: 43757, sum loss: 4072.730225, avg loss: 2.649792, ppl: 14.151094 +epoch: 0, batch: 43758, sum loss: 5114.899414, avg loss: 3.021205, ppl: 20.515993 +epoch: 0, batch: 43759, sum loss: 3600.253906, avg loss: 2.535390, ppl: 12.621354 +epoch: 0, batch: 43760, sum loss: 6130.031250, avg loss: 3.105386, ppl: 22.317822 +epoch: 0, batch: 43761, sum loss: 3658.011475, avg loss: 2.302084, ppl: 9.994990 +epoch: 0, batch: 43762, sum loss: 5042.571289, avg loss: 3.291496, ppl: 26.883039 +epoch: 0, batch: 43763, sum loss: 3960.130859, avg loss: 2.479731, ppl: 11.938057 +epoch: 0, batch: 43764, sum loss: 4230.770508, avg loss: 2.850924, ppl: 17.303755 +epoch: 0, batch: 43765, sum loss: 4580.851562, avg loss: 2.617630, ppl: 13.703202 +epoch: 0, batch: 43766, sum loss: 4120.370605, avg loss: 2.797265, ppl: 16.399725 +epoch: 0, batch: 43767, sum loss: 4452.793945, avg loss: 2.597896, ppl: 13.435441 +epoch: 0, batch: 43768, sum loss: 4028.676758, avg loss: 2.652190, ppl: 14.185069 +epoch: 0, batch: 43769, sum loss: 4161.728027, avg loss: 2.678075, ppl: 14.557038 +epoch: 0, batch: 43770, sum loss: 5526.417969, avg loss: 2.856030, ppl: 17.392338 +epoch: 0, batch: 43771, sum loss: 4346.316406, avg loss: 2.666452, ppl: 14.388823 +epoch: 0, batch: 43772, sum loss: 5232.032715, avg loss: 2.834254, ppl: 17.017696 +epoch: 0, batch: 43773, sum loss: 5428.378418, avg loss: 3.058241, ppl: 21.290077 +epoch: 0, batch: 43774, sum loss: 6130.110840, avg loss: 3.137211, ppl: 23.039528 +epoch: 0, batch: 43775, sum loss: 5782.306641, avg loss: 3.152839, ppl: 23.402403 +epoch: 0, batch: 43776, sum loss: 5885.833496, avg loss: 3.063943, ppl: 21.411810 +epoch: 0, batch: 43777, sum loss: 5613.575195, avg loss: 2.864069, ppl: 17.532719 +epoch: 0, batch: 43778, sum loss: 5141.276855, avg loss: 3.011879, ppl: 20.325550 +epoch: 0, batch: 43779, sum loss: 4377.960938, avg loss: 2.817221, ppl: 16.730288 +epoch: 0, batch: 43780, sum loss: 4350.805176, avg loss: 3.083491, ppl: 21.834486 +epoch: 0, batch: 43781, sum loss: 5739.663086, avg loss: 3.176349, ppl: 23.959124 +epoch: 0, batch: 43782, sum loss: 4976.894531, avg loss: 2.780388, ppl: 16.125275 +epoch: 0, batch: 43783, sum loss: 6632.929688, avg loss: 3.140592, ppl: 23.117540 +epoch: 0, batch: 43784, sum loss: 4458.402832, avg loss: 2.537509, ppl: 12.648122 +epoch: 0, batch: 43785, sum loss: 5470.586914, avg loss: 3.010780, ppl: 20.303226 +epoch: 0, batch: 43786, sum loss: 4086.018799, avg loss: 2.658438, ppl: 14.273972 +epoch: 0, batch: 43787, sum loss: 4614.665527, avg loss: 2.732188, ppl: 15.366472 +epoch: 0, batch: 43788, sum loss: 6083.934082, avg loss: 3.011849, ppl: 20.324940 +epoch: 0, batch: 43789, sum loss: 5454.799805, avg loss: 2.971024, ppl: 19.511885 +epoch: 0, batch: 43790, sum loss: 3822.570068, avg loss: 2.693848, ppl: 14.788471 +epoch: 0, batch: 43791, sum loss: 4829.636719, avg loss: 2.957524, ppl: 19.250246 +epoch: 0, batch: 43792, sum loss: 5238.203125, avg loss: 2.913350, ppl: 18.418394 +epoch: 0, batch: 43793, sum loss: 4384.775879, avg loss: 2.577764, ppl: 13.167657 +epoch: 0, batch: 43794, sum loss: 5992.765625, avg loss: 3.049753, ppl: 21.110138 +epoch: 0, batch: 43795, sum loss: 4584.558594, avg loss: 2.836979, ppl: 17.064144 +epoch: 0, batch: 43796, sum loss: 3927.457520, avg loss: 2.533844, ppl: 12.601851 +epoch: 0, batch: 43797, sum loss: 4685.309570, avg loss: 2.800544, ppl: 16.453592 +epoch: 0, batch: 43798, sum loss: 6316.348145, avg loss: 3.041092, ppl: 20.928083 +epoch: 0, batch: 43799, sum loss: 5715.668945, avg loss: 3.003505, ppl: 20.156050 +epoch: 0, batch: 43800, sum loss: 4333.375000, avg loss: 2.841557, ppl: 17.142441 +epoch: 0, batch: 43801, sum loss: 4370.839355, avg loss: 3.073727, ppl: 21.622332 +epoch: 0, batch: 43802, sum loss: 6153.549805, avg loss: 3.144379, ppl: 23.205257 +epoch: 0, batch: 43803, sum loss: 4362.117676, avg loss: 2.898417, ppl: 18.145403 +epoch: 0, batch: 43804, sum loss: 3869.006836, avg loss: 2.543726, ppl: 12.727000 +epoch: 0, batch: 43805, sum loss: 3978.452881, avg loss: 2.632993, ppl: 13.915359 +epoch: 0, batch: 43806, sum loss: 4745.027832, avg loss: 2.941741, ppl: 18.948803 +epoch: 0, batch: 43807, sum loss: 5025.154297, avg loss: 3.001884, ppl: 20.123423 +epoch: 0, batch: 43808, sum loss: 5977.703125, avg loss: 2.944681, ppl: 19.004606 +epoch: 0, batch: 43809, sum loss: 5429.145020, avg loss: 2.973245, ppl: 19.555271 +epoch: 0, batch: 43810, sum loss: 4754.929688, avg loss: 3.059800, ppl: 21.323299 +epoch: 0, batch: 43811, sum loss: 5342.877441, avg loss: 3.109940, ppl: 22.419706 +epoch: 0, batch: 43812, sum loss: 5097.002930, avg loss: 2.681222, ppl: 14.602926 +epoch: 0, batch: 43813, sum loss: 4602.273926, avg loss: 2.980747, ppl: 19.702538 +epoch: 0, batch: 43814, sum loss: 4678.375488, avg loss: 2.806464, ppl: 16.551289 +epoch: 0, batch: 43815, sum loss: 4476.794922, avg loss: 2.672714, ppl: 14.479205 +epoch: 0, batch: 43816, sum loss: 4539.417969, avg loss: 2.812527, ppl: 16.651939 +epoch: 0, batch: 43817, sum loss: 4815.650391, avg loss: 2.883623, ppl: 17.878933 +epoch: 0, batch: 43818, sum loss: 4815.563965, avg loss: 2.842718, ppl: 17.162344 +epoch: 0, batch: 43819, sum loss: 4854.071777, avg loss: 2.828713, ppl: 16.923670 +epoch: 0, batch: 43820, sum loss: 6374.248535, avg loss: 3.029586, ppl: 20.688662 +epoch: 0, batch: 43821, sum loss: 5038.658203, avg loss: 2.897446, ppl: 18.127787 +epoch: 0, batch: 43822, sum loss: 4183.572266, avg loss: 2.700821, ppl: 14.891959 +epoch: 0, batch: 43823, sum loss: 4637.039551, avg loss: 3.038689, ppl: 20.877857 +epoch: 0, batch: 43824, sum loss: 5000.684082, avg loss: 2.789004, ppl: 16.264812 +epoch: 0, batch: 43825, sum loss: 4585.252930, avg loss: 2.700385, ppl: 14.885456 +epoch: 0, batch: 43826, sum loss: 4920.304199, avg loss: 2.868982, ppl: 17.619070 +epoch: 0, batch: 43827, sum loss: 5010.699707, avg loss: 2.993250, ppl: 19.950409 +epoch: 0, batch: 43828, sum loss: 4178.336426, avg loss: 2.699184, ppl: 14.867590 +epoch: 0, batch: 43829, sum loss: 4233.865723, avg loss: 2.720993, ppl: 15.195408 +epoch: 0, batch: 43830, sum loss: 5001.303223, avg loss: 2.701947, ppl: 14.908727 +epoch: 0, batch: 43831, sum loss: 4914.222168, avg loss: 3.037220, ppl: 20.847208 +epoch: 0, batch: 43832, sum loss: 4969.998535, avg loss: 2.856321, ppl: 17.397406 +epoch: 0, batch: 43833, sum loss: 5508.897949, avg loss: 3.041910, ppl: 20.945204 +epoch: 0, batch: 43834, sum loss: 6018.658691, avg loss: 2.941671, ppl: 18.947479 +epoch: 0, batch: 43835, sum loss: 4240.093262, avg loss: 2.821087, ppl: 16.795092 +epoch: 0, batch: 43836, sum loss: 5207.256836, avg loss: 2.923783, ppl: 18.611559 +epoch: 0, batch: 43837, sum loss: 5663.385742, avg loss: 2.871899, ppl: 17.670549 +epoch: 0, batch: 43838, sum loss: 4473.893555, avg loss: 2.703259, ppl: 14.928304 +epoch: 0, batch: 43839, sum loss: 5798.746582, avg loss: 3.207271, ppl: 24.711565 +epoch: 0, batch: 43840, sum loss: 4391.280273, avg loss: 2.791659, ppl: 16.308058 +epoch: 0, batch: 43841, sum loss: 5248.779785, avg loss: 3.033977, ppl: 20.779705 +epoch: 0, batch: 43842, sum loss: 4623.534668, avg loss: 2.850514, ppl: 17.296677 +epoch: 0, batch: 43843, sum loss: 4282.779297, avg loss: 2.795548, ppl: 16.371597 +epoch: 0, batch: 43844, sum loss: 5454.463379, avg loss: 3.026894, ppl: 20.633053 +epoch: 0, batch: 43845, sum loss: 4331.808594, avg loss: 2.971062, ppl: 19.512634 +epoch: 0, batch: 43846, sum loss: 5745.148438, avg loss: 2.862555, ppl: 17.506203 +epoch: 0, batch: 43847, sum loss: 4470.389648, avg loss: 2.897207, ppl: 18.123447 +epoch: 0, batch: 43848, sum loss: 5654.820801, avg loss: 3.020738, ppl: 20.506413 +epoch: 0, batch: 43849, sum loss: 4875.963379, avg loss: 2.839816, ppl: 17.112610 +epoch: 0, batch: 43850, sum loss: 4809.350586, avg loss: 3.069145, ppl: 21.523502 +epoch: 0, batch: 43851, sum loss: 3558.950439, avg loss: 2.793525, ppl: 16.338507 +epoch: 0, batch: 43852, sum loss: 4389.730957, avg loss: 2.729932, ppl: 15.331845 +epoch: 0, batch: 43853, sum loss: 4606.727051, avg loss: 2.773466, ppl: 16.014044 +epoch: 0, batch: 43854, sum loss: 4248.385254, avg loss: 2.760484, ppl: 15.807495 +epoch: 0, batch: 43855, sum loss: 5067.128906, avg loss: 2.833965, ppl: 17.012779 +epoch: 0, batch: 43856, sum loss: 4389.416504, avg loss: 2.880195, ppl: 17.817741 +epoch: 0, batch: 43857, sum loss: 4047.524170, avg loss: 2.498472, ppl: 12.163890 +epoch: 0, batch: 43858, sum loss: 5304.061523, avg loss: 3.098167, ppl: 22.157293 +epoch: 0, batch: 43859, sum loss: 4623.037109, avg loss: 2.697221, ppl: 14.838443 +epoch: 0, batch: 43860, sum loss: 6141.582031, avg loss: 3.095555, ppl: 22.099506 +epoch: 0, batch: 43861, sum loss: 5602.536621, avg loss: 2.846817, ppl: 17.232851 +epoch: 0, batch: 43862, sum loss: 4050.909180, avg loss: 2.659822, ppl: 14.293748 +epoch: 0, batch: 43863, sum loss: 4737.927734, avg loss: 2.843894, ppl: 17.182545 +epoch: 0, batch: 43864, sum loss: 5297.335449, avg loss: 3.049704, ppl: 21.109091 +epoch: 0, batch: 43865, sum loss: 4651.557617, avg loss: 2.997138, ppl: 20.028130 +epoch: 0, batch: 43866, sum loss: 5613.904297, avg loss: 3.349585, ppl: 28.490910 +epoch: 0, batch: 43867, sum loss: 4655.527344, avg loss: 2.761286, ppl: 15.820168 +epoch: 0, batch: 43868, sum loss: 4253.655762, avg loss: 2.688783, ppl: 14.713764 +epoch: 0, batch: 43869, sum loss: 5471.651367, avg loss: 2.938588, ppl: 18.889164 +epoch: 0, batch: 43870, sum loss: 4474.667969, avg loss: 2.922709, ppl: 18.591593 +epoch: 0, batch: 43871, sum loss: 5222.777344, avg loss: 2.879150, ppl: 17.799131 +epoch: 0, batch: 43872, sum loss: 4009.145020, avg loss: 2.613523, ppl: 13.647052 +epoch: 0, batch: 43873, sum loss: 5339.609375, avg loss: 2.917819, ppl: 18.500902 +epoch: 0, batch: 43874, sum loss: 4971.769531, avg loss: 3.031567, ppl: 20.729687 +epoch: 0, batch: 43875, sum loss: 5468.336914, avg loss: 2.908690, ppl: 18.332762 +epoch: 0, batch: 43876, sum loss: 5254.143066, avg loss: 2.796244, ppl: 16.383003 +epoch: 0, batch: 43877, sum loss: 4859.787109, avg loss: 3.079713, ppl: 21.752155 +epoch: 0, batch: 43878, sum loss: 4123.051758, avg loss: 2.759740, ppl: 15.795737 +epoch: 0, batch: 43879, sum loss: 5126.877441, avg loss: 2.763815, ppl: 15.860241 +epoch: 0, batch: 43880, sum loss: 3799.671875, avg loss: 2.470528, ppl: 11.828690 +epoch: 0, batch: 43881, sum loss: 5213.591309, avg loss: 3.202452, ppl: 24.592751 +epoch: 0, batch: 43882, sum loss: 5203.968750, avg loss: 2.840594, ppl: 17.125940 +epoch: 0, batch: 43883, sum loss: 4732.712891, avg loss: 2.877029, ppl: 17.761425 +epoch: 0, batch: 43884, sum loss: 4688.803223, avg loss: 2.792617, ppl: 16.323677 +epoch: 0, batch: 43885, sum loss: 4796.486816, avg loss: 2.939024, ppl: 18.897392 +epoch: 0, batch: 43886, sum loss: 4646.178711, avg loss: 2.739492, ppl: 15.479122 +epoch: 0, batch: 43887, sum loss: 6063.982422, avg loss: 3.227239, ppl: 25.209965 +epoch: 0, batch: 43888, sum loss: 5770.823730, avg loss: 3.146578, ppl: 23.256336 +epoch: 0, batch: 43889, sum loss: 4776.678223, avg loss: 2.998543, ppl: 20.056284 +epoch: 0, batch: 43890, sum loss: 4394.941406, avg loss: 2.568639, ppl: 13.048058 +epoch: 0, batch: 43891, sum loss: 4021.439209, avg loss: 2.784930, ppl: 16.198689 +epoch: 0, batch: 43892, sum loss: 4782.293457, avg loss: 2.856806, ppl: 17.405849 +epoch: 0, batch: 43893, sum loss: 3511.275391, avg loss: 2.492034, ppl: 12.085831 +epoch: 0, batch: 43894, sum loss: 5505.631836, avg loss: 2.963203, ppl: 19.359886 +epoch: 0, batch: 43895, sum loss: 4519.968262, avg loss: 2.901135, ppl: 18.194784 +epoch: 0, batch: 43896, sum loss: 4562.113281, avg loss: 2.849540, ppl: 17.279831 +epoch: 0, batch: 43897, sum loss: 5022.913574, avg loss: 2.762879, ppl: 15.845390 +epoch: 0, batch: 43898, sum loss: 4841.720215, avg loss: 2.829761, ppl: 16.941408 +epoch: 0, batch: 43899, sum loss: 4705.069824, avg loss: 2.615381, ppl: 13.672418 +epoch: 0, batch: 43900, sum loss: 4297.956055, avg loss: 2.859585, ppl: 17.454279 +epoch: 0, batch: 43901, sum loss: 4395.123535, avg loss: 2.760756, ppl: 15.811792 +epoch: 0, batch: 43902, sum loss: 4671.549316, avg loss: 2.814186, ppl: 16.679598 +epoch: 0, batch: 43903, sum loss: 3920.971680, avg loss: 2.685597, ppl: 14.666958 +epoch: 0, batch: 43904, sum loss: 4310.400879, avg loss: 2.532550, ppl: 12.585563 +epoch: 0, batch: 43905, sum loss: 4440.142578, avg loss: 2.719009, ppl: 15.165292 +epoch: 0, batch: 43906, sum loss: 6154.688477, avg loss: 3.018484, ppl: 20.460247 +epoch: 0, batch: 43907, sum loss: 4709.227539, avg loss: 2.718954, ppl: 15.164446 +epoch: 0, batch: 43908, sum loss: 5790.100098, avg loss: 3.119666, ppl: 22.638819 +epoch: 0, batch: 43909, sum loss: 5462.883789, avg loss: 3.031567, ppl: 20.729692 +epoch: 0, batch: 43910, sum loss: 4967.264648, avg loss: 2.956705, ppl: 19.234495 +epoch: 0, batch: 43911, sum loss: 4793.634277, avg loss: 2.948114, ppl: 19.069962 +epoch: 0, batch: 43912, sum loss: 4961.394043, avg loss: 3.047539, ppl: 21.063448 +epoch: 0, batch: 43913, sum loss: 4552.049805, avg loss: 2.622149, ppl: 13.765266 +epoch: 0, batch: 43914, sum loss: 5781.012207, avg loss: 3.094760, ppl: 22.081942 +epoch: 0, batch: 43915, sum loss: 4587.851074, avg loss: 2.959904, ppl: 19.296118 +epoch: 0, batch: 43916, sum loss: 5295.253906, avg loss: 2.882555, ppl: 17.859852 +epoch: 0, batch: 43917, sum loss: 6365.067383, avg loss: 3.171434, ppl: 23.841640 +epoch: 0, batch: 43918, sum loss: 4810.600586, avg loss: 3.012274, ppl: 20.333576 +epoch: 0, batch: 43919, sum loss: 4168.345703, avg loss: 2.517117, ppl: 12.392817 +epoch: 0, batch: 43920, sum loss: 4870.321289, avg loss: 2.825012, ppl: 16.861155 +epoch: 0, batch: 43921, sum loss: 3939.702148, avg loss: 2.525450, ppl: 12.496520 +epoch: 0, batch: 43922, sum loss: 5122.952148, avg loss: 2.914080, ppl: 18.431841 +epoch: 0, batch: 43923, sum loss: 4503.140625, avg loss: 2.771163, ppl: 15.977212 +epoch: 0, batch: 43924, sum loss: 4651.797852, avg loss: 2.944176, ppl: 18.995003 +epoch: 0, batch: 43925, sum loss: 5077.425293, avg loss: 2.754978, ppl: 15.720701 +epoch: 0, batch: 43926, sum loss: 5700.068359, avg loss: 2.968786, ppl: 19.468266 +epoch: 0, batch: 43927, sum loss: 4212.017090, avg loss: 2.660781, ppl: 14.307465 +epoch: 0, batch: 43928, sum loss: 4991.769043, avg loss: 2.771665, ppl: 15.985229 +epoch: 0, batch: 43929, sum loss: 4857.378906, avg loss: 2.912098, ppl: 18.395346 +epoch: 0, batch: 43930, sum loss: 4228.511719, avg loss: 2.637874, ppl: 13.983438 +epoch: 0, batch: 43931, sum loss: 4039.806152, avg loss: 2.563329, ppl: 12.978950 +epoch: 0, batch: 43932, sum loss: 4629.949219, avg loss: 2.659362, ppl: 14.287172 +epoch: 0, batch: 43933, sum loss: 4617.914062, avg loss: 2.686396, ppl: 14.678674 +epoch: 0, batch: 43934, sum loss: 4712.241211, avg loss: 2.855904, ppl: 17.390148 +epoch: 0, batch: 43935, sum loss: 4730.279785, avg loss: 2.925343, ppl: 18.640623 +epoch: 0, batch: 43936, sum loss: 4828.063965, avg loss: 2.848415, ppl: 17.260408 +epoch: 0, batch: 43937, sum loss: 4016.882324, avg loss: 2.501172, ppl: 12.196781 +epoch: 0, batch: 43938, sum loss: 5573.682617, avg loss: 2.924282, ppl: 18.620857 +epoch: 0, batch: 43939, sum loss: 4440.298828, avg loss: 2.725782, ppl: 15.268348 +epoch: 0, batch: 43940, sum loss: 5268.391602, avg loss: 3.122935, ppl: 22.712952 +epoch: 0, batch: 43941, sum loss: 4206.887695, avg loss: 2.611352, ppl: 13.617449 +epoch: 0, batch: 43942, sum loss: 5235.292480, avg loss: 2.941176, ppl: 18.938099 +epoch: 0, batch: 43943, sum loss: 5221.698242, avg loss: 2.851829, ppl: 17.319427 +epoch: 0, batch: 43944, sum loss: 5067.122559, avg loss: 2.971920, ppl: 19.529371 +epoch: 0, batch: 43945, sum loss: 5482.764648, avg loss: 2.949308, ppl: 19.092745 +epoch: 0, batch: 43946, sum loss: 4711.815430, avg loss: 2.995432, ppl: 19.994003 +epoch: 0, batch: 43947, sum loss: 4203.584473, avg loss: 2.617425, ppl: 13.700399 +epoch: 0, batch: 43948, sum loss: 3046.250488, avg loss: 2.249816, ppl: 9.485988 +epoch: 0, batch: 43949, sum loss: 4705.992188, avg loss: 2.873011, ppl: 17.690201 +epoch: 0, batch: 43950, sum loss: 4979.086426, avg loss: 2.809868, ppl: 16.607731 +epoch: 0, batch: 43951, sum loss: 4282.036621, avg loss: 2.606230, ppl: 13.547882 +epoch: 0, batch: 43952, sum loss: 5997.260254, avg loss: 3.128461, ppl: 22.838812 +epoch: 0, batch: 43953, sum loss: 4414.317383, avg loss: 2.688379, ppl: 14.707808 +epoch: 0, batch: 43954, sum loss: 4291.665039, avg loss: 2.492256, ppl: 12.088519 +epoch: 0, batch: 43955, sum loss: 4066.795898, avg loss: 2.622048, ppl: 13.763879 +epoch: 0, batch: 43956, sum loss: 5420.184570, avg loss: 2.918785, ppl: 18.518780 +epoch: 0, batch: 43957, sum loss: 5264.979492, avg loss: 2.769584, ppl: 15.952000 +epoch: 0, batch: 43958, sum loss: 4517.147461, avg loss: 2.762781, ppl: 15.843845 +epoch: 0, batch: 43959, sum loss: 5159.225098, avg loss: 2.882249, ppl: 17.854376 +epoch: 0, batch: 43960, sum loss: 4891.414062, avg loss: 2.995354, ppl: 19.992445 +epoch: 0, batch: 43961, sum loss: 4593.187500, avg loss: 2.802433, ppl: 16.484701 +epoch: 0, batch: 43962, sum loss: 5634.821777, avg loss: 3.016500, ppl: 20.419693 +epoch: 0, batch: 43963, sum loss: 4870.658203, avg loss: 2.812158, ppl: 16.645807 +epoch: 0, batch: 43964, sum loss: 4805.701172, avg loss: 3.060956, ppl: 21.347961 +epoch: 0, batch: 43965, sum loss: 4750.702148, avg loss: 2.982236, ppl: 19.731892 +epoch: 0, batch: 43966, sum loss: 4320.093262, avg loss: 2.648739, ppl: 14.136203 +epoch: 0, batch: 43967, sum loss: 4785.364258, avg loss: 3.034473, ppl: 20.790022 +epoch: 0, batch: 43968, sum loss: 4371.442383, avg loss: 2.634986, ppl: 13.943123 +epoch: 0, batch: 43969, sum loss: 4409.295410, avg loss: 2.985305, ppl: 19.792540 +epoch: 0, batch: 43970, sum loss: 4156.614746, avg loss: 2.652594, ppl: 14.190804 +epoch: 0, batch: 43971, sum loss: 4440.351562, avg loss: 2.630540, ppl: 13.881266 +epoch: 0, batch: 43972, sum loss: 5827.589355, avg loss: 2.947693, ppl: 19.061930 +epoch: 0, batch: 43973, sum loss: 4710.235352, avg loss: 2.741697, ppl: 15.513290 +epoch: 0, batch: 43974, sum loss: 4670.351074, avg loss: 2.890069, ppl: 17.994547 +epoch: 0, batch: 43975, sum loss: 5058.470215, avg loss: 2.835465, ppl: 17.038328 +epoch: 0, batch: 43976, sum loss: 5130.232422, avg loss: 2.929887, ppl: 18.725519 +epoch: 0, batch: 43977, sum loss: 4249.829102, avg loss: 2.626594, ppl: 13.826597 +epoch: 0, batch: 43978, sum loss: 4735.237793, avg loss: 2.801916, ppl: 16.476183 +epoch: 0, batch: 43979, sum loss: 4665.429688, avg loss: 3.031468, ppl: 20.727646 +epoch: 0, batch: 43980, sum loss: 5040.978027, avg loss: 2.925698, ppl: 18.647242 +epoch: 0, batch: 43981, sum loss: 3719.394287, avg loss: 2.508021, ppl: 12.280599 +epoch: 0, batch: 43982, sum loss: 4445.524902, avg loss: 2.932405, ppl: 18.772717 +epoch: 0, batch: 43983, sum loss: 4655.284180, avg loss: 2.902297, ppl: 18.215940 +epoch: 0, batch: 43984, sum loss: 5593.662109, avg loss: 3.015451, ppl: 20.398293 +epoch: 0, batch: 43985, sum loss: 4689.158203, avg loss: 2.667325, ppl: 14.401402 +epoch: 0, batch: 43986, sum loss: 5322.373047, avg loss: 3.130808, ppl: 22.892466 +epoch: 0, batch: 43987, sum loss: 5055.435059, avg loss: 2.885522, ppl: 17.912922 +epoch: 0, batch: 43988, sum loss: 4552.735840, avg loss: 2.708350, ppl: 15.004493 +epoch: 0, batch: 43989, sum loss: 4593.152832, avg loss: 2.698680, ppl: 14.860099 +epoch: 0, batch: 43990, sum loss: 4966.821289, avg loss: 2.956441, ppl: 19.229420 +epoch: 0, batch: 43991, sum loss: 5148.915039, avg loss: 2.743162, ppl: 15.536031 +epoch: 0, batch: 43992, sum loss: 3786.615234, avg loss: 2.633251, ppl: 13.918949 +epoch: 0, batch: 43993, sum loss: 5801.748047, avg loss: 3.015462, ppl: 20.398506 +epoch: 0, batch: 43994, sum loss: 4489.710938, avg loss: 2.797328, ppl: 16.400757 +epoch: 0, batch: 43995, sum loss: 4281.777344, avg loss: 2.671103, ppl: 14.455898 +epoch: 0, batch: 43996, sum loss: 4130.991211, avg loss: 2.724928, ppl: 15.255322 +epoch: 0, batch: 43997, sum loss: 3817.869629, avg loss: 2.622163, ppl: 13.765471 +epoch: 0, batch: 43998, sum loss: 4929.984375, avg loss: 2.971660, ppl: 19.524305 +epoch: 0, batch: 43999, sum loss: 3815.600586, avg loss: 2.492228, ppl: 12.088176 +epoch: 0, batch: 44000, sum loss: 5345.153320, avg loss: 3.196862, ppl: 24.455673 +epoch: 0, batch: 44001, sum loss: 4896.576172, avg loss: 2.926824, ppl: 18.668243 +epoch: 0, batch: 44002, sum loss: 4287.043945, avg loss: 2.681078, ppl: 14.600823 +epoch: 0, batch: 44003, sum loss: 5050.229492, avg loss: 2.965490, ppl: 19.404211 +epoch: 0, batch: 44004, sum loss: 4246.752930, avg loss: 2.598992, ppl: 13.450175 +epoch: 0, batch: 44005, sum loss: 4226.054199, avg loss: 2.594263, ppl: 13.386716 +epoch: 0, batch: 44006, sum loss: 4053.728027, avg loss: 2.742712, ppl: 15.529043 +epoch: 0, batch: 44007, sum loss: 4548.833496, avg loss: 2.714101, ppl: 15.091042 +epoch: 0, batch: 44008, sum loss: 4330.222656, avg loss: 2.725124, ppl: 15.258311 +epoch: 0, batch: 44009, sum loss: 5189.584961, avg loss: 2.972271, ppl: 19.536236 +epoch: 0, batch: 44010, sum loss: 4993.674316, avg loss: 2.791322, ppl: 16.302549 +epoch: 0, batch: 44011, sum loss: 6320.035156, avg loss: 3.104143, ppl: 22.290112 +epoch: 0, batch: 44012, sum loss: 4725.467285, avg loss: 2.789532, ppl: 16.273401 +epoch: 0, batch: 44013, sum loss: 4646.785156, avg loss: 2.845551, ppl: 17.211044 +epoch: 0, batch: 44014, sum loss: 4786.477539, avg loss: 2.693572, ppl: 14.784392 +epoch: 0, batch: 44015, sum loss: 4117.844238, avg loss: 2.818511, ppl: 16.751884 +epoch: 0, batch: 44016, sum loss: 5745.750000, avg loss: 2.893127, ppl: 18.049662 +epoch: 0, batch: 44017, sum loss: 4752.628906, avg loss: 2.712688, ppl: 15.069728 +epoch: 0, batch: 44018, sum loss: 4862.808594, avg loss: 2.863845, ppl: 17.528793 +epoch: 0, batch: 44019, sum loss: 4940.528809, avg loss: 2.997894, ppl: 20.043278 +epoch: 0, batch: 44020, sum loss: 4948.811523, avg loss: 3.026796, ppl: 20.631021 +epoch: 0, batch: 44021, sum loss: 4108.579102, avg loss: 2.605313, ppl: 13.535465 +epoch: 0, batch: 44022, sum loss: 5307.946777, avg loss: 3.230643, ppl: 25.295918 +epoch: 0, batch: 44023, sum loss: 5881.532227, avg loss: 3.085799, ppl: 21.884935 +epoch: 0, batch: 44024, sum loss: 4362.730469, avg loss: 2.811038, ppl: 16.627165 +epoch: 0, batch: 44025, sum loss: 4102.600586, avg loss: 2.577010, ppl: 13.157743 +epoch: 0, batch: 44026, sum loss: 4041.408936, avg loss: 2.513314, ppl: 12.345777 +epoch: 0, batch: 44027, sum loss: 5164.658203, avg loss: 3.186094, ppl: 24.193737 +epoch: 0, batch: 44028, sum loss: 4060.770264, avg loss: 2.583187, ppl: 13.239265 +epoch: 0, batch: 44029, sum loss: 4634.794434, avg loss: 3.019410, ppl: 20.479204 +epoch: 0, batch: 44030, sum loss: 4572.498047, avg loss: 2.673975, ppl: 14.497489 +epoch: 0, batch: 44031, sum loss: 3246.211182, avg loss: 2.516443, ppl: 12.384464 +epoch: 0, batch: 44032, sum loss: 4475.691406, avg loss: 2.962072, ppl: 19.338005 +epoch: 0, batch: 44033, sum loss: 5865.472656, avg loss: 3.059715, ppl: 21.321470 +epoch: 0, batch: 44034, sum loss: 4783.185547, avg loss: 2.900658, ppl: 18.186115 +epoch: 0, batch: 44035, sum loss: 4513.599121, avg loss: 2.799999, ppl: 16.444637 +epoch: 0, batch: 44036, sum loss: 4398.807617, avg loss: 2.821557, ppl: 16.802998 +epoch: 0, batch: 44037, sum loss: 4944.901367, avg loss: 3.002369, ppl: 20.133169 +epoch: 0, batch: 44038, sum loss: 4830.898438, avg loss: 2.825087, ppl: 16.862410 +epoch: 0, batch: 44039, sum loss: 4670.982910, avg loss: 2.770452, ppl: 15.965857 +epoch: 0, batch: 44040, sum loss: 4954.668945, avg loss: 2.990144, ppl: 19.888552 +epoch: 0, batch: 44041, sum loss: 5080.593262, avg loss: 3.134234, ppl: 22.971033 +epoch: 0, batch: 44042, sum loss: 5327.724121, avg loss: 3.230882, ppl: 25.301956 +epoch: 0, batch: 44043, sum loss: 4916.901367, avg loss: 3.108028, ppl: 22.376883 +epoch: 0, batch: 44044, sum loss: 5842.244141, avg loss: 3.044421, ppl: 20.997869 +epoch: 0, batch: 44045, sum loss: 4498.995117, avg loss: 2.760120, ppl: 15.801734 +epoch: 0, batch: 44046, sum loss: 4224.612305, avg loss: 2.627246, ppl: 13.835619 +epoch: 0, batch: 44047, sum loss: 4087.119141, avg loss: 2.680078, ppl: 14.586231 +epoch: 0, batch: 44048, sum loss: 4128.157227, avg loss: 2.661610, ppl: 14.319323 +epoch: 0, batch: 44049, sum loss: 4511.020996, avg loss: 2.722403, ppl: 15.216838 +epoch: 0, batch: 44050, sum loss: 5032.052246, avg loss: 2.795585, ppl: 16.372198 +epoch: 0, batch: 44051, sum loss: 4629.686035, avg loss: 2.948845, ppl: 19.083893 +epoch: 0, batch: 44052, sum loss: 4447.956543, avg loss: 2.808053, ppl: 16.577614 +epoch: 0, batch: 44053, sum loss: 4658.405273, avg loss: 2.786128, ppl: 16.218098 +epoch: 0, batch: 44054, sum loss: 4666.916992, avg loss: 3.022615, ppl: 20.544941 +epoch: 0, batch: 44055, sum loss: 4835.374512, avg loss: 2.778951, ppl: 16.102119 +epoch: 0, batch: 44056, sum loss: 5690.964355, avg loss: 2.999981, ppl: 20.085159 +epoch: 0, batch: 44057, sum loss: 5031.262207, avg loss: 2.793594, ppl: 16.339634 +epoch: 0, batch: 44058, sum loss: 5833.811523, avg loss: 2.916906, ppl: 18.484007 +epoch: 0, batch: 44059, sum loss: 5251.192383, avg loss: 2.960086, ppl: 19.299629 +epoch: 0, batch: 44060, sum loss: 4452.331055, avg loss: 2.968221, ppl: 19.457268 +epoch: 0, batch: 44061, sum loss: 5533.687988, avg loss: 3.057286, ppl: 21.269758 +epoch: 0, batch: 44062, sum loss: 5409.712891, avg loss: 3.172852, ppl: 23.875486 +epoch: 0, batch: 44063, sum loss: 5083.717285, avg loss: 2.903322, ppl: 18.234629 +epoch: 0, batch: 44064, sum loss: 5057.507812, avg loss: 2.920039, ppl: 18.542013 +epoch: 0, batch: 44065, sum loss: 4800.258789, avg loss: 2.888242, ppl: 17.961714 +epoch: 0, batch: 44066, sum loss: 5111.892090, avg loss: 3.044605, ppl: 21.001734 +epoch: 0, batch: 44067, sum loss: 5171.222656, avg loss: 2.821180, ppl: 16.796658 +epoch: 0, batch: 44068, sum loss: 4518.780273, avg loss: 2.699391, ppl: 14.870674 +epoch: 0, batch: 44069, sum loss: 4019.122070, avg loss: 2.681202, ppl: 14.602637 +epoch: 0, batch: 44070, sum loss: 4541.844727, avg loss: 2.821022, ppl: 16.793999 +epoch: 0, batch: 44071, sum loss: 5772.428223, avg loss: 3.166445, ppl: 23.722988 +epoch: 0, batch: 44072, sum loss: 5881.846680, avg loss: 3.042859, ppl: 20.965103 +epoch: 0, batch: 44073, sum loss: 5392.870605, avg loss: 3.198618, ppl: 24.498659 +epoch: 0, batch: 44074, sum loss: 5534.087402, avg loss: 2.972120, ppl: 19.533287 +epoch: 0, batch: 44075, sum loss: 4846.649414, avg loss: 2.700083, ppl: 14.880971 +epoch: 0, batch: 44076, sum loss: 5107.298340, avg loss: 2.895294, ppl: 18.088818 +epoch: 0, batch: 44077, sum loss: 5129.986328, avg loss: 3.053563, ppl: 21.190720 +epoch: 0, batch: 44078, sum loss: 5636.175781, avg loss: 2.918786, ppl: 18.518793 +epoch: 0, batch: 44079, sum loss: 5008.245117, avg loss: 3.042676, ppl: 20.961269 +epoch: 0, batch: 44080, sum loss: 6012.206055, avg loss: 3.034935, ppl: 20.799622 +epoch: 0, batch: 44081, sum loss: 4148.907715, avg loss: 2.695847, ppl: 14.818058 +epoch: 0, batch: 44082, sum loss: 5268.734863, avg loss: 2.985119, ppl: 19.788855 +epoch: 0, batch: 44083, sum loss: 5030.969727, avg loss: 2.829567, ppl: 16.938124 +epoch: 0, batch: 44084, sum loss: 5605.355469, avg loss: 2.973663, ppl: 19.563456 +epoch: 0, batch: 44085, sum loss: 5252.873047, avg loss: 3.111892, ppl: 22.463499 +epoch: 0, batch: 44086, sum loss: 4954.904297, avg loss: 2.972348, ppl: 19.537741 +epoch: 0, batch: 44087, sum loss: 4212.934570, avg loss: 2.716270, ppl: 15.123801 +epoch: 0, batch: 44088, sum loss: 4140.479492, avg loss: 2.745676, ppl: 15.575140 +epoch: 0, batch: 44089, sum loss: 4850.466309, avg loss: 2.983067, ppl: 19.748283 +epoch: 0, batch: 44090, sum loss: 4972.250977, avg loss: 3.146994, ppl: 23.266029 +epoch: 0, batch: 44091, sum loss: 5612.521973, avg loss: 3.097418, ppl: 22.140717 +epoch: 0, batch: 44092, sum loss: 4741.044922, avg loss: 2.712268, ppl: 15.063406 +epoch: 0, batch: 44093, sum loss: 4500.775879, avg loss: 2.741033, ppl: 15.502989 +epoch: 0, batch: 44094, sum loss: 5951.357422, avg loss: 3.056681, ppl: 21.256887 +epoch: 0, batch: 44095, sum loss: 4523.823242, avg loss: 2.678403, ppl: 14.561825 +epoch: 0, batch: 44096, sum loss: 6370.523438, avg loss: 3.150605, ppl: 23.350185 +epoch: 0, batch: 44097, sum loss: 5043.214355, avg loss: 2.898399, ppl: 18.145075 +epoch: 0, batch: 44098, sum loss: 5276.423828, avg loss: 2.814093, ppl: 16.678036 +epoch: 0, batch: 44099, sum loss: 5434.852539, avg loss: 2.992760, ppl: 19.940647 +epoch: 0, batch: 44100, sum loss: 4047.520264, avg loss: 2.628260, ppl: 13.849649 +epoch: 0, batch: 44101, sum loss: 4279.358887, avg loss: 2.679624, ppl: 14.579604 +epoch: 0, batch: 44102, sum loss: 4505.194824, avg loss: 2.906577, ppl: 18.294077 +epoch: 0, batch: 44103, sum loss: 5179.166504, avg loss: 2.584414, ppl: 13.255526 +epoch: 0, batch: 44104, sum loss: 5123.257324, avg loss: 2.865356, ppl: 17.555309 +epoch: 0, batch: 44105, sum loss: 5344.331055, avg loss: 3.052160, ppl: 21.160994 +epoch: 0, batch: 44106, sum loss: 4476.182129, avg loss: 2.818755, ppl: 16.755972 +epoch: 0, batch: 44107, sum loss: 4012.183838, avg loss: 2.644814, ppl: 14.080832 +epoch: 0, batch: 44108, sum loss: 5583.263672, avg loss: 3.138428, ppl: 23.067575 +epoch: 0, batch: 44109, sum loss: 4543.543945, avg loss: 2.837941, ppl: 17.080568 +epoch: 0, batch: 44110, sum loss: 5595.027832, avg loss: 2.965039, ppl: 19.395451 +epoch: 0, batch: 44111, sum loss: 4668.490723, avg loss: 2.864105, ppl: 17.533350 +epoch: 0, batch: 44112, sum loss: 4242.302246, avg loss: 2.813198, ppl: 16.663115 +epoch: 0, batch: 44113, sum loss: 4823.515625, avg loss: 2.746877, ppl: 15.593856 +epoch: 0, batch: 44114, sum loss: 4376.847168, avg loss: 2.842108, ppl: 17.151892 +epoch: 0, batch: 44115, sum loss: 4462.688477, avg loss: 2.924436, ppl: 18.623713 +epoch: 0, batch: 44116, sum loss: 4883.357422, avg loss: 2.966803, ppl: 19.429701 +epoch: 0, batch: 44117, sum loss: 4934.290039, avg loss: 2.546073, ppl: 12.756914 +epoch: 0, batch: 44118, sum loss: 3914.387207, avg loss: 2.695859, ppl: 14.818241 +epoch: 0, batch: 44119, sum loss: 4692.771484, avg loss: 2.868442, ppl: 17.609566 +epoch: 0, batch: 44120, sum loss: 4441.227051, avg loss: 2.741498, ppl: 15.510205 +epoch: 0, batch: 44121, sum loss: 4728.347656, avg loss: 2.685035, ppl: 14.658721 +epoch: 0, batch: 44122, sum loss: 5103.066406, avg loss: 2.901118, ppl: 18.194477 +epoch: 0, batch: 44123, sum loss: 4670.246582, avg loss: 2.753683, ppl: 15.700352 +epoch: 0, batch: 44124, sum loss: 4180.341797, avg loss: 2.894974, ppl: 18.083023 +epoch: 0, batch: 44125, sum loss: 4646.160156, avg loss: 2.785468, ppl: 16.207399 +epoch: 0, batch: 44126, sum loss: 5127.102539, avg loss: 2.882014, ppl: 17.850183 +epoch: 0, batch: 44127, sum loss: 4423.707031, avg loss: 2.650514, ppl: 14.161311 +epoch: 0, batch: 44128, sum loss: 4837.558594, avg loss: 2.854017, ppl: 17.357363 +epoch: 0, batch: 44129, sum loss: 5254.525879, avg loss: 2.927313, ppl: 18.677370 +epoch: 0, batch: 44130, sum loss: 4625.027344, avg loss: 2.917998, ppl: 18.504211 +epoch: 0, batch: 44131, sum loss: 4154.883301, avg loss: 2.626348, ppl: 13.823199 +epoch: 0, batch: 44132, sum loss: 4481.666016, avg loss: 2.858205, ppl: 17.430218 +epoch: 0, batch: 44133, sum loss: 4765.515625, avg loss: 3.096501, ppl: 22.120424 +epoch: 0, batch: 44134, sum loss: 5248.176270, avg loss: 3.017928, ppl: 20.448875 +epoch: 0, batch: 44135, sum loss: 5028.796875, avg loss: 2.847563, ppl: 17.245707 +epoch: 0, batch: 44136, sum loss: 4738.094238, avg loss: 2.721479, ppl: 15.202786 +epoch: 0, batch: 44137, sum loss: 6386.331055, avg loss: 3.178861, ppl: 24.019371 +epoch: 0, batch: 44138, sum loss: 4389.883789, avg loss: 3.065562, ppl: 21.446501 +epoch: 0, batch: 44139, sum loss: 4204.331055, avg loss: 2.897540, ppl: 18.129498 +epoch: 0, batch: 44140, sum loss: 6158.247559, avg loss: 3.108656, ppl: 22.390924 +epoch: 0, batch: 44141, sum loss: 5529.348633, avg loss: 2.992072, ppl: 19.926926 +epoch: 0, batch: 44142, sum loss: 4920.944824, avg loss: 2.993275, ppl: 19.950922 +epoch: 0, batch: 44143, sum loss: 5163.181152, avg loss: 2.876424, ppl: 17.750685 +epoch: 0, batch: 44144, sum loss: 4883.164551, avg loss: 2.906646, ppl: 18.295328 +epoch: 0, batch: 44145, sum loss: 5935.304688, avg loss: 3.034409, ppl: 20.788694 +epoch: 0, batch: 44146, sum loss: 4437.505371, avg loss: 2.582948, ppl: 13.236107 +epoch: 0, batch: 44147, sum loss: 3740.711914, avg loss: 2.720518, ppl: 15.188182 +epoch: 0, batch: 44148, sum loss: 4364.295898, avg loss: 2.724280, ppl: 15.245428 +epoch: 0, batch: 44149, sum loss: 4475.173340, avg loss: 2.691024, ppl: 14.746774 +epoch: 0, batch: 44150, sum loss: 4936.810059, avg loss: 2.945591, ppl: 19.021896 +epoch: 0, batch: 44151, sum loss: 5664.207031, avg loss: 2.995350, ppl: 19.992359 +epoch: 0, batch: 44152, sum loss: 4040.489746, avg loss: 2.792322, ppl: 16.318867 +epoch: 0, batch: 44153, sum loss: 4887.432129, avg loss: 2.566929, ppl: 13.025759 +epoch: 0, batch: 44154, sum loss: 4237.844727, avg loss: 2.510572, ppl: 12.311967 +epoch: 0, batch: 44155, sum loss: 3821.759033, avg loss: 2.587515, ppl: 13.296683 +epoch: 0, batch: 44156, sum loss: 4756.510742, avg loss: 2.809516, ppl: 16.601885 +epoch: 0, batch: 44157, sum loss: 5697.275391, avg loss: 3.200716, ppl: 24.550114 +epoch: 0, batch: 44158, sum loss: 4730.648438, avg loss: 2.635459, ppl: 13.949710 +epoch: 0, batch: 44159, sum loss: 4981.296875, avg loss: 2.790643, ppl: 16.291487 +epoch: 0, batch: 44160, sum loss: 4471.233398, avg loss: 2.667800, ppl: 14.408242 +epoch: 0, batch: 44161, sum loss: 4699.395508, avg loss: 2.753014, ppl: 15.689855 +epoch: 0, batch: 44162, sum loss: 4826.534180, avg loss: 2.829152, ppl: 16.931103 +epoch: 0, batch: 44163, sum loss: 4513.920410, avg loss: 2.871451, ppl: 17.662622 +epoch: 0, batch: 44164, sum loss: 4919.781250, avg loss: 2.835609, ppl: 17.040770 +epoch: 0, batch: 44165, sum loss: 5273.808105, avg loss: 2.910490, ppl: 18.365797 +epoch: 0, batch: 44166, sum loss: 4474.547852, avg loss: 2.705289, ppl: 14.958637 +epoch: 0, batch: 44167, sum loss: 5054.916504, avg loss: 2.828717, ppl: 16.923725 +epoch: 0, batch: 44168, sum loss: 4694.740234, avg loss: 2.841853, ppl: 17.147505 +epoch: 0, batch: 44169, sum loss: 5073.177734, avg loss: 2.866202, ppl: 17.570162 +epoch: 0, batch: 44170, sum loss: 5212.177734, avg loss: 2.866984, ppl: 17.583912 +epoch: 0, batch: 44171, sum loss: 5088.220703, avg loss: 2.934383, ppl: 18.809896 +epoch: 0, batch: 44172, sum loss: 6057.355469, avg loss: 3.127184, ppl: 22.809654 +epoch: 0, batch: 44173, sum loss: 4247.950684, avg loss: 2.818813, ppl: 16.756943 +epoch: 0, batch: 44174, sum loss: 5184.691406, avg loss: 2.837817, ppl: 17.078442 +epoch: 0, batch: 44175, sum loss: 4049.008789, avg loss: 2.552969, ppl: 12.845184 +epoch: 0, batch: 44176, sum loss: 4509.047852, avg loss: 2.866528, ppl: 17.575880 +epoch: 0, batch: 44177, sum loss: 5302.348633, avg loss: 3.241045, ppl: 25.560406 +epoch: 0, batch: 44178, sum loss: 4583.570312, avg loss: 2.864731, ppl: 17.544338 +epoch: 0, batch: 44179, sum loss: 4213.227539, avg loss: 2.779174, ppl: 16.105709 +epoch: 0, batch: 44180, sum loss: 4885.668457, avg loss: 2.799810, ppl: 16.441521 +epoch: 0, batch: 44181, sum loss: 4472.810059, avg loss: 2.544261, ppl: 12.733808 +epoch: 0, batch: 44182, sum loss: 4508.751953, avg loss: 2.786620, ppl: 16.226089 +epoch: 0, batch: 44183, sum loss: 4208.942383, avg loss: 2.968225, ppl: 19.457342 +epoch: 0, batch: 44184, sum loss: 3549.250488, avg loss: 2.640811, ppl: 14.024579 +epoch: 0, batch: 44185, sum loss: 5305.468750, avg loss: 3.002529, ppl: 20.136400 +epoch: 0, batch: 44186, sum loss: 5432.041992, avg loss: 2.787092, ppl: 16.233744 +epoch: 0, batch: 44187, sum loss: 4374.112793, avg loss: 2.744111, ppl: 15.550780 +epoch: 0, batch: 44188, sum loss: 4262.776855, avg loss: 2.725561, ppl: 15.264970 +epoch: 0, batch: 44189, sum loss: 5260.330566, avg loss: 3.024917, ppl: 20.592299 +epoch: 0, batch: 44190, sum loss: 4810.557129, avg loss: 2.870261, ppl: 17.641621 +epoch: 0, batch: 44191, sum loss: 3979.221436, avg loss: 2.552419, ppl: 12.838124 +epoch: 0, batch: 44192, sum loss: 4524.975586, avg loss: 2.759131, ppl: 15.786126 +epoch: 0, batch: 44193, sum loss: 4762.316406, avg loss: 2.882758, ppl: 17.863476 +epoch: 0, batch: 44194, sum loss: 5105.478516, avg loss: 2.808294, ppl: 16.581606 +epoch: 0, batch: 44195, sum loss: 5529.973633, avg loss: 2.827185, ppl: 16.897825 +epoch: 0, batch: 44196, sum loss: 5421.389160, avg loss: 2.825111, ppl: 16.862823 +epoch: 0, batch: 44197, sum loss: 4280.712402, avg loss: 2.709312, ppl: 15.018935 +epoch: 0, batch: 44198, sum loss: 4804.248535, avg loss: 2.657217, ppl: 14.256558 +epoch: 0, batch: 44199, sum loss: 5312.527344, avg loss: 2.977874, ppl: 19.646009 +epoch: 0, batch: 44200, sum loss: 4907.173828, avg loss: 3.008690, ppl: 20.260849 +epoch: 0, batch: 44201, sum loss: 5009.479492, avg loss: 2.790797, ppl: 16.293993 +epoch: 0, batch: 44202, sum loss: 5130.483887, avg loss: 2.900217, ppl: 18.178091 +epoch: 0, batch: 44203, sum loss: 4677.670898, avg loss: 2.776066, ppl: 16.055731 +epoch: 0, batch: 44204, sum loss: 5348.456055, avg loss: 2.886377, ppl: 17.928230 +epoch: 0, batch: 44205, sum loss: 5228.380859, avg loss: 3.045068, ppl: 21.011450 +epoch: 0, batch: 44206, sum loss: 5008.071777, avg loss: 2.898190, ppl: 18.141275 +epoch: 0, batch: 44207, sum loss: 4381.159180, avg loss: 2.644031, ppl: 14.069802 +epoch: 0, batch: 44208, sum loss: 6035.260254, avg loss: 3.071379, ppl: 21.571634 +epoch: 0, batch: 44209, sum loss: 6032.401367, avg loss: 3.217281, ppl: 24.960157 +epoch: 0, batch: 44210, sum loss: 4486.715332, avg loss: 2.747529, ppl: 15.604031 +epoch: 0, batch: 44211, sum loss: 4556.138184, avg loss: 2.948957, ppl: 19.086037 +epoch: 0, batch: 44212, sum loss: 4755.336426, avg loss: 2.708050, ppl: 15.000005 +epoch: 0, batch: 44213, sum loss: 4234.792969, avg loss: 2.819436, ppl: 16.767389 +epoch: 0, batch: 44214, sum loss: 5141.182617, avg loss: 2.891554, ppl: 18.021290 +epoch: 0, batch: 44215, sum loss: 3846.648926, avg loss: 2.684333, ppl: 14.648429 +epoch: 0, batch: 44216, sum loss: 5501.164062, avg loss: 2.975210, ppl: 19.593746 +epoch: 0, batch: 44217, sum loss: 5011.137207, avg loss: 2.981045, ppl: 19.708406 +epoch: 0, batch: 44218, sum loss: 5084.530273, avg loss: 2.792164, ppl: 16.316288 +epoch: 0, batch: 44219, sum loss: 4371.920898, avg loss: 2.789994, ppl: 16.280930 +epoch: 0, batch: 44220, sum loss: 4669.750000, avg loss: 2.666905, ppl: 14.395342 +epoch: 0, batch: 44221, sum loss: 4266.905762, avg loss: 2.692054, ppl: 14.761967 +epoch: 0, batch: 44222, sum loss: 5457.173340, avg loss: 2.978806, ppl: 19.664333 +epoch: 0, batch: 44223, sum loss: 5183.910645, avg loss: 2.982687, ppl: 19.740799 +epoch: 0, batch: 44224, sum loss: 4108.132812, avg loss: 2.582107, ppl: 13.224977 +epoch: 0, batch: 44225, sum loss: 4784.087891, avg loss: 2.866440, ppl: 17.574339 +epoch: 0, batch: 44226, sum loss: 4943.472656, avg loss: 2.797664, ppl: 16.406281 +epoch: 0, batch: 44227, sum loss: 4768.768555, avg loss: 2.872752, ppl: 17.685625 +epoch: 0, batch: 44228, sum loss: 4644.950684, avg loss: 2.681842, ppl: 14.611985 +epoch: 0, batch: 44229, sum loss: 5134.301758, avg loss: 3.048873, ppl: 21.091560 +epoch: 0, batch: 44230, sum loss: 5147.094727, avg loss: 2.873866, ppl: 17.705341 +epoch: 0, batch: 44231, sum loss: 6009.679688, avg loss: 3.029072, ppl: 20.678040 +epoch: 0, batch: 44232, sum loss: 4673.260254, avg loss: 2.740915, ppl: 15.501166 +epoch: 0, batch: 44233, sum loss: 3881.071777, avg loss: 2.810334, ppl: 16.615475 +epoch: 0, batch: 44234, sum loss: 5479.805664, avg loss: 3.009229, ppl: 20.271763 +epoch: 0, batch: 44235, sum loss: 4797.860352, avg loss: 2.782982, ppl: 16.167158 +epoch: 0, batch: 44236, sum loss: 4263.959961, avg loss: 2.894746, ppl: 18.078901 +epoch: 0, batch: 44237, sum loss: 4396.177246, avg loss: 2.618331, ppl: 13.712814 +epoch: 0, batch: 44238, sum loss: 5099.987793, avg loss: 3.063056, ppl: 21.392828 +epoch: 0, batch: 44239, sum loss: 5832.317871, avg loss: 2.989399, ppl: 19.873743 +epoch: 0, batch: 44240, sum loss: 4721.872070, avg loss: 2.984748, ppl: 19.781525 +epoch: 0, batch: 44241, sum loss: 5314.746582, avg loss: 3.272627, ppl: 26.380552 +epoch: 0, batch: 44242, sum loss: 4941.745605, avg loss: 2.962677, ppl: 19.349707 +epoch: 0, batch: 44243, sum loss: 6334.526367, avg loss: 3.186381, ppl: 24.200695 +epoch: 0, batch: 44244, sum loss: 5153.403809, avg loss: 2.961726, ppl: 19.331318 +epoch: 0, batch: 44245, sum loss: 5107.665527, avg loss: 2.877558, ppl: 17.770824 +epoch: 0, batch: 44246, sum loss: 4061.754395, avg loss: 2.608706, ppl: 13.581458 +epoch: 0, batch: 44247, sum loss: 4188.792480, avg loss: 2.678256, ppl: 14.559680 +epoch: 0, batch: 44248, sum loss: 5193.053711, avg loss: 3.003501, ppl: 20.155989 +epoch: 0, batch: 44249, sum loss: 4864.506348, avg loss: 2.980703, ppl: 19.701654 +epoch: 0, batch: 44250, sum loss: 5191.907227, avg loss: 3.054063, ppl: 21.201317 +epoch: 0, batch: 44251, sum loss: 4795.303711, avg loss: 2.784729, ppl: 16.195433 +epoch: 0, batch: 44252, sum loss: 4995.645020, avg loss: 2.836823, ppl: 17.061470 +epoch: 0, batch: 44253, sum loss: 5072.147949, avg loss: 2.830440, ppl: 16.952915 +epoch: 0, batch: 44254, sum loss: 4575.628906, avg loss: 2.815772, ppl: 16.706060 +epoch: 0, batch: 44255, sum loss: 3467.788574, avg loss: 2.559253, ppl: 12.926164 +epoch: 0, batch: 44256, sum loss: 4907.299316, avg loss: 2.786655, ppl: 16.226650 +epoch: 0, batch: 44257, sum loss: 6034.477539, avg loss: 3.152810, ppl: 23.401722 +epoch: 0, batch: 44258, sum loss: 5103.982422, avg loss: 2.972616, ppl: 19.542986 +epoch: 0, batch: 44259, sum loss: 4597.884277, avg loss: 2.803588, ppl: 16.503756 +epoch: 0, batch: 44260, sum loss: 4580.066406, avg loss: 2.792723, ppl: 16.325420 +epoch: 0, batch: 44261, sum loss: 4847.402344, avg loss: 2.918364, ppl: 18.510975 +epoch: 0, batch: 44262, sum loss: 4605.666504, avg loss: 2.715605, ppl: 15.113755 +epoch: 0, batch: 44263, sum loss: 5102.241211, avg loss: 3.147588, ppl: 23.279858 +epoch: 0, batch: 44264, sum loss: 5252.055664, avg loss: 2.643209, ppl: 14.058237 +epoch: 0, batch: 44265, sum loss: 5239.366211, avg loss: 2.883526, ppl: 17.877195 +epoch: 0, batch: 44266, sum loss: 4822.341797, avg loss: 2.958492, ppl: 19.268888 +epoch: 0, batch: 44267, sum loss: 5304.698242, avg loss: 2.913069, ppl: 18.413218 +epoch: 0, batch: 44268, sum loss: 3801.614746, avg loss: 2.527669, ppl: 12.524281 +epoch: 0, batch: 44269, sum loss: 4349.273438, avg loss: 2.740563, ppl: 15.495709 +epoch: 0, batch: 44270, sum loss: 5354.687500, avg loss: 3.063323, ppl: 21.398537 +epoch: 0, batch: 44271, sum loss: 4763.532227, avg loss: 2.942268, ppl: 18.958803 +epoch: 0, batch: 44272, sum loss: 4597.136230, avg loss: 2.900401, ppl: 18.181440 +epoch: 0, batch: 44273, sum loss: 4695.334473, avg loss: 2.672359, ppl: 14.474070 +epoch: 0, batch: 44274, sum loss: 4745.063965, avg loss: 3.081210, ppl: 21.784754 +epoch: 0, batch: 44275, sum loss: 4485.242188, avg loss: 2.668199, ppl: 14.413983 +epoch: 0, batch: 44276, sum loss: 4632.090332, avg loss: 2.834817, ppl: 17.027279 +epoch: 0, batch: 44277, sum loss: 5905.610352, avg loss: 3.008462, ppl: 20.256216 +epoch: 0, batch: 44278, sum loss: 4122.709961, avg loss: 2.737523, ppl: 15.448676 +epoch: 0, batch: 44279, sum loss: 4762.630371, avg loss: 2.855294, ppl: 17.379545 +epoch: 0, batch: 44280, sum loss: 4838.181641, avg loss: 2.918083, ppl: 18.505777 +epoch: 0, batch: 44281, sum loss: 5136.612305, avg loss: 3.062977, ppl: 21.391140 +epoch: 0, batch: 44282, sum loss: 3814.122559, avg loss: 2.734138, ppl: 15.396470 +epoch: 0, batch: 44283, sum loss: 4399.873047, avg loss: 2.911895, ppl: 18.391615 +epoch: 0, batch: 44284, sum loss: 4746.156738, avg loss: 2.816710, ppl: 16.721750 +epoch: 0, batch: 44285, sum loss: 4460.812012, avg loss: 2.896631, ppl: 18.113024 +epoch: 0, batch: 44286, sum loss: 4978.720215, avg loss: 2.911532, ppl: 18.384945 +epoch: 0, batch: 44287, sum loss: 5141.634766, avg loss: 2.890182, ppl: 17.996593 +epoch: 0, batch: 44288, sum loss: 4972.103027, avg loss: 2.885724, ppl: 17.916540 +epoch: 0, batch: 44289, sum loss: 4646.024902, avg loss: 2.976313, ppl: 19.615368 +epoch: 0, batch: 44290, sum loss: 5981.669922, avg loss: 2.930754, ppl: 18.741764 +epoch: 0, batch: 44291, sum loss: 5828.507324, avg loss: 2.901198, ppl: 18.195934 +epoch: 0, batch: 44292, sum loss: 5252.685547, avg loss: 2.967619, ppl: 19.445564 +epoch: 0, batch: 44293, sum loss: 6394.122559, avg loss: 3.065255, ppl: 21.439938 +epoch: 0, batch: 44294, sum loss: 4983.523926, avg loss: 2.841234, ppl: 17.136894 +epoch: 0, batch: 44295, sum loss: 4908.009277, avg loss: 3.005517, ppl: 20.196655 +epoch: 0, batch: 44296, sum loss: 4861.590332, avg loss: 2.726635, ppl: 15.281382 +epoch: 0, batch: 44297, sum loss: 4487.747070, avg loss: 2.647638, ppl: 14.120651 +epoch: 0, batch: 44298, sum loss: 5599.084961, avg loss: 3.024897, ppl: 20.591890 +epoch: 0, batch: 44299, sum loss: 4427.805664, avg loss: 2.662541, ppl: 14.332661 +epoch: 0, batch: 44300, sum loss: 4916.227051, avg loss: 2.917642, ppl: 18.497622 +epoch: 0, batch: 44301, sum loss: 4157.886719, avg loss: 2.726483, ppl: 15.279058 +epoch: 0, batch: 44302, sum loss: 4468.495117, avg loss: 2.775463, ppl: 16.046053 +epoch: 0, batch: 44303, sum loss: 4435.787109, avg loss: 2.558124, ppl: 12.911573 +epoch: 0, batch: 44304, sum loss: 5017.443848, avg loss: 2.755323, ppl: 15.726126 +epoch: 0, batch: 44305, sum loss: 5309.668457, avg loss: 3.138102, ppl: 23.060053 +epoch: 0, batch: 44306, sum loss: 4840.769531, avg loss: 2.896930, ppl: 18.118437 +epoch: 0, batch: 44307, sum loss: 4926.939941, avg loss: 2.636137, ppl: 13.959172 +epoch: 0, batch: 44308, sum loss: 5623.053711, avg loss: 3.072707, ppl: 21.600300 +epoch: 0, batch: 44309, sum loss: 5557.149902, avg loss: 3.139633, ppl: 23.095383 +epoch: 0, batch: 44310, sum loss: 5088.862305, avg loss: 2.765686, ppl: 15.889934 +epoch: 0, batch: 44311, sum loss: 4534.657227, avg loss: 2.637962, ppl: 13.984678 +epoch: 0, batch: 44312, sum loss: 4635.029297, avg loss: 2.852326, ppl: 17.328035 +epoch: 0, batch: 44313, sum loss: 5305.257324, avg loss: 3.118905, ppl: 22.621592 +epoch: 0, batch: 44314, sum loss: 5324.947266, avg loss: 2.932240, ppl: 18.769619 +epoch: 0, batch: 44315, sum loss: 4827.490234, avg loss: 2.715124, ppl: 15.106482 +epoch: 0, batch: 44316, sum loss: 5225.857422, avg loss: 2.981094, ppl: 19.709364 +epoch: 0, batch: 44317, sum loss: 4850.978027, avg loss: 2.786317, ppl: 16.221169 +epoch: 0, batch: 44318, sum loss: 4943.208008, avg loss: 2.857346, ppl: 17.415237 +epoch: 0, batch: 44319, sum loss: 4841.682617, avg loss: 2.844702, ppl: 17.196430 +epoch: 0, batch: 44320, sum loss: 3411.256348, avg loss: 2.495433, ppl: 12.126979 +epoch: 0, batch: 44321, sum loss: 5162.853516, avg loss: 2.940122, ppl: 18.918148 +epoch: 0, batch: 44322, sum loss: 4720.138184, avg loss: 2.833216, ppl: 17.000048 +epoch: 0, batch: 44323, sum loss: 5444.415527, avg loss: 3.024675, ppl: 20.587320 +epoch: 0, batch: 44324, sum loss: 5106.504395, avg loss: 2.863996, ppl: 17.531439 +epoch: 0, batch: 44325, sum loss: 5084.562012, avg loss: 3.017544, ppl: 20.441032 +epoch: 0, batch: 44326, sum loss: 4736.551758, avg loss: 2.855064, ppl: 17.375557 +epoch: 0, batch: 44327, sum loss: 4377.044922, avg loss: 2.638363, ppl: 13.990288 +epoch: 0, batch: 44328, sum loss: 5146.149902, avg loss: 3.170764, ppl: 23.825680 +epoch: 0, batch: 44329, sum loss: 4705.604492, avg loss: 2.910083, ppl: 18.358328 +epoch: 0, batch: 44330, sum loss: 4963.861816, avg loss: 2.802858, ppl: 16.491714 +epoch: 0, batch: 44331, sum loss: 4122.144531, avg loss: 2.846785, ppl: 17.232292 +epoch: 0, batch: 44332, sum loss: 5334.395996, avg loss: 2.963553, ppl: 19.366667 +epoch: 0, batch: 44333, sum loss: 4988.049805, avg loss: 3.019401, ppl: 20.479017 +epoch: 0, batch: 44334, sum loss: 4813.408691, avg loss: 2.759982, ppl: 15.799557 +epoch: 0, batch: 44335, sum loss: 4281.299805, avg loss: 2.713118, ppl: 15.076207 +epoch: 0, batch: 44336, sum loss: 4481.524414, avg loss: 2.795711, ppl: 16.374264 +epoch: 0, batch: 44337, sum loss: 6040.970215, avg loss: 3.085276, ppl: 21.873507 +epoch: 0, batch: 44338, sum loss: 4650.324707, avg loss: 2.792988, ppl: 16.329737 +epoch: 0, batch: 44339, sum loss: 5405.913086, avg loss: 2.901725, ppl: 18.205515 +epoch: 0, batch: 44340, sum loss: 4284.400879, avg loss: 2.672739, ppl: 14.479575 +epoch: 0, batch: 44341, sum loss: 5658.757812, avg loss: 2.818107, ppl: 16.745117 +epoch: 0, batch: 44342, sum loss: 4392.950684, avg loss: 2.745594, ppl: 15.573862 +epoch: 0, batch: 44343, sum loss: 4214.071289, avg loss: 2.798188, ppl: 16.414879 +epoch: 0, batch: 44344, sum loss: 4085.935303, avg loss: 2.806274, ppl: 16.548151 +epoch: 0, batch: 44345, sum loss: 4952.179199, avg loss: 2.700207, ppl: 14.882809 +epoch: 0, batch: 44346, sum loss: 4793.193848, avg loss: 2.899694, ppl: 18.168579 +epoch: 0, batch: 44347, sum loss: 3604.916260, avg loss: 2.640964, ppl: 14.026723 +epoch: 0, batch: 44348, sum loss: 5220.561523, avg loss: 2.824979, ppl: 16.860592 +epoch: 0, batch: 44349, sum loss: 4505.279785, avg loss: 2.847838, ppl: 17.250444 +epoch: 0, batch: 44350, sum loss: 5300.966797, avg loss: 3.051795, ppl: 21.153271 +epoch: 0, batch: 44351, sum loss: 3842.037598, avg loss: 2.647855, ppl: 14.123712 +epoch: 0, batch: 44352, sum loss: 4958.006348, avg loss: 2.805889, ppl: 16.541780 +epoch: 0, batch: 44353, sum loss: 4902.164551, avg loss: 2.938948, ppl: 18.895950 +epoch: 0, batch: 44354, sum loss: 4914.572754, avg loss: 2.744038, ppl: 15.549653 +epoch: 0, batch: 44355, sum loss: 5714.159180, avg loss: 3.093752, ppl: 22.059689 +epoch: 0, batch: 44356, sum loss: 6006.593750, avg loss: 3.418664, ppl: 30.528616 +epoch: 0, batch: 44357, sum loss: 4419.025391, avg loss: 2.657261, ppl: 14.257187 +epoch: 0, batch: 44358, sum loss: 5094.804688, avg loss: 2.977677, ppl: 19.642132 +epoch: 0, batch: 44359, sum loss: 4758.388184, avg loss: 2.723748, ppl: 15.237328 +epoch: 0, batch: 44360, sum loss: 5542.912598, avg loss: 3.161958, ppl: 23.616798 +epoch: 0, batch: 44361, sum loss: 4116.046387, avg loss: 2.811507, ppl: 16.634968 +epoch: 0, batch: 44362, sum loss: 4986.005859, avg loss: 2.832958, ppl: 16.995655 +epoch: 0, batch: 44363, sum loss: 5345.560547, avg loss: 2.829836, ppl: 16.942684 +epoch: 0, batch: 44364, sum loss: 4459.355957, avg loss: 2.757796, ppl: 15.765059 +epoch: 0, batch: 44365, sum loss: 4018.822266, avg loss: 2.695387, ppl: 14.811255 +epoch: 0, batch: 44366, sum loss: 4805.171875, avg loss: 2.764771, ppl: 15.875403 +epoch: 0, batch: 44367, sum loss: 5420.866699, avg loss: 3.138892, ppl: 23.078285 +epoch: 0, batch: 44368, sum loss: 4899.518555, avg loss: 3.030005, ppl: 20.697346 +epoch: 0, batch: 44369, sum loss: 4992.801758, avg loss: 2.975448, ppl: 19.598400 +epoch: 0, batch: 44370, sum loss: 4038.108643, avg loss: 2.639287, ppl: 14.003212 +epoch: 0, batch: 44371, sum loss: 5644.699707, avg loss: 2.826590, ppl: 16.887772 +epoch: 0, batch: 44372, sum loss: 5549.624512, avg loss: 2.856215, ppl: 17.395552 +epoch: 0, batch: 44373, sum loss: 4750.077637, avg loss: 2.824065, ppl: 16.845190 +epoch: 0, batch: 44374, sum loss: 4874.158203, avg loss: 2.842075, ppl: 17.151312 +epoch: 0, batch: 44375, sum loss: 4983.582520, avg loss: 3.083900, ppl: 21.843426 +epoch: 0, batch: 44376, sum loss: 4771.522949, avg loss: 2.823386, ppl: 16.833761 +epoch: 0, batch: 44377, sum loss: 3426.774902, avg loss: 2.226624, ppl: 9.268527 +epoch: 0, batch: 44378, sum loss: 5318.101562, avg loss: 2.906066, ppl: 18.284733 +epoch: 0, batch: 44379, sum loss: 4577.520508, avg loss: 2.762535, ppl: 15.839944 +epoch: 0, batch: 44380, sum loss: 5042.525879, avg loss: 2.909709, ppl: 18.351458 +epoch: 0, batch: 44381, sum loss: 5839.105469, avg loss: 3.099313, ppl: 22.182707 +epoch: 0, batch: 44382, sum loss: 4404.506348, avg loss: 2.725561, ppl: 15.264974 +epoch: 0, batch: 44383, sum loss: 4433.219238, avg loss: 2.821909, ppl: 16.808912 +epoch: 0, batch: 44384, sum loss: 5909.581543, avg loss: 3.033666, ppl: 20.773251 +epoch: 0, batch: 44385, sum loss: 5065.160645, avg loss: 2.924458, ppl: 18.624121 +epoch: 0, batch: 44386, sum loss: 4942.009766, avg loss: 2.845141, ppl: 17.203983 +epoch: 0, batch: 44387, sum loss: 4760.353027, avg loss: 2.823460, ppl: 16.834997 +epoch: 0, batch: 44388, sum loss: 3870.068604, avg loss: 2.463443, ppl: 11.745178 +epoch: 0, batch: 44389, sum loss: 5219.835449, avg loss: 3.188659, ppl: 24.255888 +epoch: 0, batch: 44390, sum loss: 4708.684570, avg loss: 2.970779, ppl: 19.507109 +epoch: 0, batch: 44391, sum loss: 4708.394531, avg loss: 2.895692, ppl: 18.096012 +epoch: 0, batch: 44392, sum loss: 4971.030273, avg loss: 3.093361, ppl: 22.051060 +epoch: 0, batch: 44393, sum loss: 4418.743652, avg loss: 2.709224, ppl: 15.017610 +epoch: 0, batch: 44394, sum loss: 3755.585205, avg loss: 2.715535, ppl: 15.112696 +epoch: 0, batch: 44395, sum loss: 5567.551270, avg loss: 3.108627, ppl: 22.390284 +epoch: 0, batch: 44396, sum loss: 4677.065430, avg loss: 2.730336, ppl: 15.338039 +epoch: 0, batch: 44397, sum loss: 4946.510742, avg loss: 2.703012, ppl: 14.924610 +epoch: 0, batch: 44398, sum loss: 5605.349121, avg loss: 2.851144, ppl: 17.307571 +epoch: 0, batch: 44399, sum loss: 4770.304199, avg loss: 2.757401, ppl: 15.758836 +epoch: 0, batch: 44400, sum loss: 5270.382324, avg loss: 2.832016, ppl: 16.979666 +epoch: 0, batch: 44401, sum loss: 4244.874023, avg loss: 2.580470, ppl: 13.203346 +epoch: 0, batch: 44402, sum loss: 5556.069336, avg loss: 3.036103, ppl: 20.823944 +epoch: 0, batch: 44403, sum loss: 4982.205566, avg loss: 2.842103, ppl: 17.151791 +epoch: 0, batch: 44404, sum loss: 4068.947021, avg loss: 2.476535, ppl: 11.899958 +epoch: 0, batch: 44405, sum loss: 5816.004883, avg loss: 2.757707, ppl: 15.763661 +epoch: 0, batch: 44406, sum loss: 5216.309082, avg loss: 3.064811, ppl: 21.430422 +epoch: 0, batch: 44407, sum loss: 5188.397461, avg loss: 2.786465, ppl: 16.223566 +epoch: 0, batch: 44408, sum loss: 4778.737305, avg loss: 2.992321, ppl: 19.931900 +epoch: 0, batch: 44409, sum loss: 5610.647461, avg loss: 3.117026, ppl: 22.579136 +epoch: 0, batch: 44410, sum loss: 5942.551270, avg loss: 2.953555, ppl: 19.173988 +epoch: 0, batch: 44411, sum loss: 4149.890625, avg loss: 2.656780, ppl: 14.250333 +epoch: 0, batch: 44412, sum loss: 4246.341797, avg loss: 2.680771, ppl: 14.596348 +epoch: 0, batch: 44413, sum loss: 5556.057129, avg loss: 2.945948, ppl: 19.028687 +epoch: 0, batch: 44414, sum loss: 4696.972656, avg loss: 2.732387, ppl: 15.369528 +epoch: 0, batch: 44415, sum loss: 3878.379883, avg loss: 2.909512, ppl: 18.347847 +epoch: 0, batch: 44416, sum loss: 6007.584473, avg loss: 2.955034, ppl: 19.202375 +epoch: 0, batch: 44417, sum loss: 4795.630859, avg loss: 2.686628, ppl: 14.682086 +epoch: 0, batch: 44418, sum loss: 6007.244629, avg loss: 3.120646, ppl: 22.661024 +epoch: 0, batch: 44419, sum loss: 4699.758789, avg loss: 2.848339, ppl: 17.259085 +epoch: 0, batch: 44420, sum loss: 5631.648926, avg loss: 3.154985, ppl: 23.452696 +epoch: 0, batch: 44421, sum loss: 5155.090820, avg loss: 2.841836, ppl: 17.147223 +epoch: 0, batch: 44422, sum loss: 5475.130859, avg loss: 2.884685, ppl: 17.897921 +epoch: 0, batch: 44423, sum loss: 5200.344727, avg loss: 2.833976, ppl: 17.012962 +epoch: 0, batch: 44424, sum loss: 4388.826660, avg loss: 2.692532, ppl: 14.769017 +epoch: 0, batch: 44425, sum loss: 5324.425781, avg loss: 3.102812, ppl: 22.260466 +epoch: 0, batch: 44426, sum loss: 4933.226074, avg loss: 2.975408, ppl: 19.597614 +epoch: 0, batch: 44427, sum loss: 4671.160645, avg loss: 2.683033, ppl: 14.629400 +epoch: 0, batch: 44428, sum loss: 4869.460449, avg loss: 2.893322, ppl: 18.053179 +epoch: 0, batch: 44429, sum loss: 4760.566895, avg loss: 2.790485, ppl: 16.288912 +epoch: 0, batch: 44430, sum loss: 5137.033203, avg loss: 3.021784, ppl: 20.527887 +epoch: 0, batch: 44431, sum loss: 4799.468262, avg loss: 2.548841, ppl: 12.792272 +epoch: 0, batch: 44432, sum loss: 4553.036621, avg loss: 2.644040, ppl: 14.069930 +epoch: 0, batch: 44433, sum loss: 4719.759766, avg loss: 2.938829, ppl: 18.893713 +epoch: 0, batch: 44434, sum loss: 4683.107910, avg loss: 2.925114, ppl: 18.636358 +epoch: 0, batch: 44435, sum loss: 4688.315918, avg loss: 2.910190, ppl: 18.360289 +epoch: 0, batch: 44436, sum loss: 4293.059570, avg loss: 2.676471, ppl: 14.533716 +epoch: 0, batch: 44437, sum loss: 6483.093750, avg loss: 3.231851, ppl: 25.326502 +epoch: 0, batch: 44438, sum loss: 4542.243164, avg loss: 2.936162, ppl: 18.843395 +epoch: 0, batch: 44439, sum loss: 3911.605713, avg loss: 2.556605, ppl: 12.891976 +epoch: 0, batch: 44440, sum loss: 4585.914551, avg loss: 2.841335, ppl: 17.138632 +epoch: 0, batch: 44441, sum loss: 4896.253906, avg loss: 2.777229, ppl: 16.074411 +epoch: 0, batch: 44442, sum loss: 4813.129883, avg loss: 2.496437, ppl: 12.139163 +epoch: 0, batch: 44443, sum loss: 4792.493164, avg loss: 2.909832, ppl: 18.353710 +epoch: 0, batch: 44444, sum loss: 5308.339355, avg loss: 2.825088, ppl: 16.862421 +epoch: 0, batch: 44445, sum loss: 4882.586914, avg loss: 2.856985, ppl: 17.408957 +epoch: 0, batch: 44446, sum loss: 5518.604004, avg loss: 3.007414, ppl: 20.235003 +epoch: 0, batch: 44447, sum loss: 4701.080078, avg loss: 2.835392, ppl: 17.037077 +epoch: 0, batch: 44448, sum loss: 4855.976562, avg loss: 2.863194, ppl: 17.517384 +epoch: 0, batch: 44449, sum loss: 4534.437012, avg loss: 2.676763, ppl: 14.537962 +epoch: 0, batch: 44450, sum loss: 5676.030762, avg loss: 3.059855, ppl: 21.324459 +epoch: 0, batch: 44451, sum loss: 5722.576660, avg loss: 3.066761, ppl: 21.472246 +epoch: 0, batch: 44452, sum loss: 4461.330078, avg loss: 2.644535, ppl: 14.076896 +epoch: 0, batch: 44453, sum loss: 5070.987793, avg loss: 3.002361, ppl: 20.133017 +epoch: 0, batch: 44454, sum loss: 3948.359131, avg loss: 2.818244, ppl: 16.747408 +epoch: 0, batch: 44455, sum loss: 5859.593750, avg loss: 3.126784, ppl: 22.800543 +epoch: 0, batch: 44456, sum loss: 4976.642578, avg loss: 2.775595, ppl: 16.048180 +epoch: 0, batch: 44457, sum loss: 4631.257324, avg loss: 2.649461, ppl: 14.146409 +epoch: 0, batch: 44458, sum loss: 6663.437988, avg loss: 3.397980, ppl: 29.903625 +epoch: 0, batch: 44459, sum loss: 4426.784180, avg loss: 2.730897, ppl: 15.346646 +epoch: 0, batch: 44460, sum loss: 5148.948730, avg loss: 2.892668, ppl: 18.041376 +epoch: 0, batch: 44461, sum loss: 4222.290039, avg loss: 2.637283, ppl: 13.975183 +epoch: 0, batch: 44462, sum loss: 4836.809082, avg loss: 2.826890, ppl: 16.892851 +epoch: 0, batch: 44463, sum loss: 4733.104004, avg loss: 3.198043, ppl: 24.484575 +epoch: 0, batch: 44464, sum loss: 3876.557861, avg loss: 2.598229, ppl: 13.439918 +epoch: 0, batch: 44465, sum loss: 5604.012695, avg loss: 3.150091, ppl: 23.338198 +epoch: 0, batch: 44466, sum loss: 4371.387207, avg loss: 2.752763, ppl: 15.685909 +epoch: 0, batch: 44467, sum loss: 5446.790527, avg loss: 2.781813, ppl: 16.148277 +epoch: 0, batch: 44468, sum loss: 5834.004395, avg loss: 2.984145, ppl: 19.769600 +epoch: 0, batch: 44469, sum loss: 5692.098633, avg loss: 3.088496, ppl: 21.944054 +epoch: 0, batch: 44470, sum loss: 4663.507812, avg loss: 2.995188, ppl: 19.989117 +epoch: 0, batch: 44471, sum loss: 4535.570312, avg loss: 2.894429, ppl: 18.073183 +epoch: 0, batch: 44472, sum loss: 5254.977539, avg loss: 2.953894, ppl: 19.180496 +epoch: 0, batch: 44473, sum loss: 4785.487793, avg loss: 2.798531, ppl: 16.420504 +epoch: 0, batch: 44474, sum loss: 4929.604492, avg loss: 2.753969, ppl: 15.704840 +epoch: 0, batch: 44475, sum loss: 4656.582031, avg loss: 2.945340, ppl: 19.017120 +epoch: 0, batch: 44476, sum loss: 4738.417969, avg loss: 2.917745, ppl: 18.499527 +epoch: 0, batch: 44477, sum loss: 5096.067871, avg loss: 2.974937, ppl: 19.588398 +epoch: 0, batch: 44478, sum loss: 4877.358398, avg loss: 2.750907, ppl: 15.656829 +epoch: 0, batch: 44479, sum loss: 4270.629395, avg loss: 2.569572, ppl: 13.060240 +epoch: 0, batch: 44480, sum loss: 4170.699707, avg loss: 2.724167, ppl: 15.243712 +epoch: 0, batch: 44481, sum loss: 4849.800781, avg loss: 3.023567, ppl: 20.564508 +epoch: 0, batch: 44482, sum loss: 4829.302246, avg loss: 2.839096, ppl: 17.100298 +epoch: 0, batch: 44483, sum loss: 4779.572754, avg loss: 2.724956, ppl: 15.255739 +epoch: 0, batch: 44484, sum loss: 6196.296387, avg loss: 3.015229, ppl: 20.393764 +epoch: 0, batch: 44485, sum loss: 5008.881836, avg loss: 2.708968, ppl: 15.013772 +epoch: 0, batch: 44486, sum loss: 4264.473633, avg loss: 2.575165, ppl: 13.133488 +epoch: 0, batch: 44487, sum loss: 5545.587891, avg loss: 2.860025, ppl: 17.461958 +epoch: 0, batch: 44488, sum loss: 5344.727051, avg loss: 2.920616, ppl: 18.552711 +epoch: 0, batch: 44489, sum loss: 5036.208984, avg loss: 2.787055, ppl: 16.233147 +epoch: 0, batch: 44490, sum loss: 4531.296387, avg loss: 2.791926, ppl: 16.312410 +epoch: 0, batch: 44491, sum loss: 4684.444336, avg loss: 2.879191, ppl: 17.799875 +epoch: 0, batch: 44492, sum loss: 4259.891113, avg loss: 2.797040, ppl: 16.396034 +epoch: 0, batch: 44493, sum loss: 5024.536133, avg loss: 2.872805, ppl: 17.686562 +epoch: 0, batch: 44494, sum loss: 5964.964355, avg loss: 2.980992, ppl: 19.707354 +epoch: 0, batch: 44495, sum loss: 4046.631104, avg loss: 2.602335, ppl: 13.495215 +epoch: 0, batch: 44496, sum loss: 5801.068848, avg loss: 3.130636, ppl: 22.888536 +epoch: 0, batch: 44497, sum loss: 4101.694824, avg loss: 2.758369, ppl: 15.774094 +epoch: 0, batch: 44498, sum loss: 4636.020996, avg loss: 2.754617, ppl: 15.715024 +epoch: 0, batch: 44499, sum loss: 4032.403320, avg loss: 2.413168, ppl: 11.169289 +epoch: 0, batch: 44500, sum loss: 4104.237793, avg loss: 2.700156, ppl: 14.882060 +epoch: 0, batch: 44501, sum loss: 6113.129883, avg loss: 3.195572, ppl: 24.424143 +epoch: 0, batch: 44502, sum loss: 4591.975098, avg loss: 2.827571, ppl: 16.904348 +epoch: 0, batch: 44503, sum loss: 3977.238770, avg loss: 2.597805, ppl: 13.434215 +epoch: 0, batch: 44504, sum loss: 5325.775391, avg loss: 2.944044, ppl: 18.992498 +epoch: 0, batch: 44505, sum loss: 4030.118652, avg loss: 2.487728, ppl: 12.033899 +epoch: 0, batch: 44506, sum loss: 4043.883789, avg loss: 2.676296, ppl: 14.531174 +epoch: 0, batch: 44507, sum loss: 4026.844727, avg loss: 2.625062, ppl: 13.805427 +epoch: 0, batch: 44508, sum loss: 4908.702148, avg loss: 2.932319, ppl: 18.771112 +epoch: 0, batch: 44509, sum loss: 4052.360596, avg loss: 2.692599, ppl: 14.770007 +epoch: 0, batch: 44510, sum loss: 4973.106445, avg loss: 2.916778, ppl: 18.481646 +epoch: 0, batch: 44511, sum loss: 4128.309082, avg loss: 2.636213, ppl: 13.960230 +epoch: 0, batch: 44512, sum loss: 4057.707031, avg loss: 2.725122, ppl: 15.258279 +epoch: 0, batch: 44513, sum loss: 4936.897949, avg loss: 2.961546, ppl: 19.327837 +epoch: 0, batch: 44514, sum loss: 5888.915527, avg loss: 3.070342, ppl: 21.549267 +epoch: 0, batch: 44515, sum loss: 4466.144043, avg loss: 2.983396, ppl: 19.754791 +epoch: 0, batch: 44516, sum loss: 4245.606934, avg loss: 2.522642, ppl: 12.461478 +epoch: 0, batch: 44517, sum loss: 3339.833740, avg loss: 2.571080, ppl: 13.079949 +epoch: 0, batch: 44518, sum loss: 4268.977539, avg loss: 2.731272, ppl: 15.352399 +epoch: 0, batch: 44519, sum loss: 5476.931641, avg loss: 2.999415, ppl: 20.073790 +epoch: 0, batch: 44520, sum loss: 4908.080078, avg loss: 2.895622, ppl: 18.094761 +epoch: 0, batch: 44521, sum loss: 4351.942871, avg loss: 2.805895, ppl: 16.541872 +epoch: 0, batch: 44522, sum loss: 4694.730469, avg loss: 2.928715, ppl: 18.703585 +epoch: 0, batch: 44523, sum loss: 4338.285156, avg loss: 2.682922, ppl: 14.627779 +epoch: 0, batch: 44524, sum loss: 4861.735840, avg loss: 2.740550, ppl: 15.495506 +epoch: 0, batch: 44525, sum loss: 5003.663574, avg loss: 2.714956, ppl: 15.103943 +epoch: 0, batch: 44526, sum loss: 5520.052246, avg loss: 2.872036, ppl: 17.672958 +epoch: 0, batch: 44527, sum loss: 5840.093750, avg loss: 3.044887, ppl: 21.007664 +epoch: 0, batch: 44528, sum loss: 4377.745117, avg loss: 2.868772, ppl: 17.615366 +epoch: 0, batch: 44529, sum loss: 4822.008301, avg loss: 2.713567, ppl: 15.082977 +epoch: 0, batch: 44530, sum loss: 4230.866211, avg loss: 2.872279, ppl: 17.677254 +epoch: 0, batch: 44531, sum loss: 4293.623047, avg loss: 2.700392, ppl: 14.885562 +epoch: 0, batch: 44532, sum loss: 4947.467773, avg loss: 3.038985, ppl: 20.884035 +epoch: 0, batch: 44533, sum loss: 4761.768066, avg loss: 2.819283, ppl: 16.764818 +epoch: 0, batch: 44534, sum loss: 4104.832031, avg loss: 2.559122, ppl: 12.924469 +epoch: 0, batch: 44535, sum loss: 4094.232178, avg loss: 2.510259, ppl: 12.308116 +epoch: 0, batch: 44536, sum loss: 4974.705078, avg loss: 2.991404, ppl: 19.913622 +epoch: 0, batch: 44537, sum loss: 3851.190918, avg loss: 2.505655, ppl: 12.251575 +epoch: 0, batch: 44538, sum loss: 3278.618652, avg loss: 2.382717, ppl: 10.834301 +epoch: 0, batch: 44539, sum loss: 4796.379883, avg loss: 2.728316, ppl: 15.307089 +epoch: 0, batch: 44540, sum loss: 5680.326172, avg loss: 3.266433, ppl: 26.217642 +epoch: 0, batch: 44541, sum loss: 4340.667480, avg loss: 2.800431, ppl: 16.451733 +epoch: 0, batch: 44542, sum loss: 5292.443848, avg loss: 2.919164, ppl: 18.525793 +epoch: 0, batch: 44543, sum loss: 4440.299805, avg loss: 2.678106, ppl: 14.557496 +epoch: 0, batch: 44544, sum loss: 4746.529297, avg loss: 3.011757, ppl: 20.323078 +epoch: 0, batch: 44545, sum loss: 4547.537598, avg loss: 2.789900, ppl: 16.279396 +epoch: 0, batch: 44546, sum loss: 4052.752441, avg loss: 2.523507, ppl: 12.472261 +epoch: 0, batch: 44547, sum loss: 5236.621094, avg loss: 2.882015, ppl: 17.850204 +epoch: 0, batch: 44548, sum loss: 5514.951172, avg loss: 3.185991, ppl: 24.191257 +epoch: 0, batch: 44549, sum loss: 5094.391602, avg loss: 2.941335, ppl: 18.941111 +epoch: 0, batch: 44550, sum loss: 5551.961426, avg loss: 2.831189, ppl: 16.965620 +epoch: 0, batch: 44551, sum loss: 4892.593750, avg loss: 2.540288, ppl: 12.683320 +epoch: 0, batch: 44552, sum loss: 4595.887695, avg loss: 3.027594, ppl: 20.647497 +epoch: 0, batch: 44553, sum loss: 5229.069824, avg loss: 2.960968, ppl: 19.316662 +epoch: 0, batch: 44554, sum loss: 4051.537598, avg loss: 2.610527, ppl: 13.606220 +epoch: 0, batch: 44555, sum loss: 5508.884766, avg loss: 3.176981, ppl: 23.974260 +epoch: 0, batch: 44556, sum loss: 5089.737305, avg loss: 2.882071, ppl: 17.851200 +epoch: 0, batch: 44557, sum loss: 5747.291504, avg loss: 2.939791, ppl: 18.911898 +epoch: 0, batch: 44558, sum loss: 4360.647461, avg loss: 2.646024, ppl: 14.097874 +epoch: 0, batch: 44559, sum loss: 4670.850098, avg loss: 2.986477, ppl: 19.815752 +epoch: 0, batch: 44560, sum loss: 5002.474609, avg loss: 2.790003, ppl: 16.281061 +epoch: 0, batch: 44561, sum loss: 4698.931641, avg loss: 2.773867, ppl: 16.020472 +epoch: 0, batch: 44562, sum loss: 6078.416016, avg loss: 3.079238, ppl: 21.741827 +epoch: 0, batch: 44563, sum loss: 3973.456543, avg loss: 2.633172, ppl: 13.917848 +epoch: 0, batch: 44564, sum loss: 4857.496094, avg loss: 2.865779, ppl: 17.562737 +epoch: 0, batch: 44565, sum loss: 4565.147949, avg loss: 2.683802, ppl: 14.640656 +epoch: 0, batch: 44566, sum loss: 4914.031250, avg loss: 2.918071, ppl: 18.505556 +epoch: 0, batch: 44567, sum loss: 4330.107422, avg loss: 2.788221, ppl: 16.252087 +epoch: 0, batch: 44568, sum loss: 3772.207275, avg loss: 2.399623, ppl: 11.019020 +epoch: 0, batch: 44569, sum loss: 4904.964844, avg loss: 3.096569, ppl: 22.121916 +epoch: 0, batch: 44570, sum loss: 4604.037598, avg loss: 2.479288, ppl: 11.932764 +epoch: 0, batch: 44571, sum loss: 4121.677734, avg loss: 2.553704, ppl: 12.854627 +epoch: 0, batch: 44572, sum loss: 4381.361816, avg loss: 2.455920, ppl: 11.657159 +epoch: 0, batch: 44573, sum loss: 5239.067871, avg loss: 2.983524, ppl: 19.757320 +epoch: 0, batch: 44574, sum loss: 5175.206055, avg loss: 2.862393, ppl: 17.503357 +epoch: 0, batch: 44575, sum loss: 4747.918457, avg loss: 2.771698, ppl: 15.985751 +epoch: 0, batch: 44576, sum loss: 4906.913086, avg loss: 2.884722, ppl: 17.898600 +epoch: 0, batch: 44577, sum loss: 3896.986084, avg loss: 2.558757, ppl: 12.919743 +epoch: 0, batch: 44578, sum loss: 5130.155273, avg loss: 2.934872, ppl: 18.819088 +epoch: 0, batch: 44579, sum loss: 3358.322754, avg loss: 2.238882, ppl: 9.382833 +epoch: 0, batch: 44580, sum loss: 4470.081543, avg loss: 2.663934, ppl: 14.352645 +epoch: 0, batch: 44581, sum loss: 4458.563477, avg loss: 2.795338, ppl: 16.368155 +epoch: 0, batch: 44582, sum loss: 4351.167480, avg loss: 2.883478, ppl: 17.876333 +epoch: 0, batch: 44583, sum loss: 5439.141113, avg loss: 2.816748, ppl: 16.722387 +epoch: 0, batch: 44584, sum loss: 4741.000488, avg loss: 2.810315, ppl: 16.615145 +epoch: 0, batch: 44585, sum loss: 5135.858887, avg loss: 2.991182, ppl: 19.909193 +epoch: 0, batch: 44586, sum loss: 4641.160645, avg loss: 2.804327, ppl: 16.515953 +epoch: 0, batch: 44587, sum loss: 5079.066895, avg loss: 2.900666, ppl: 18.186262 +epoch: 0, batch: 44588, sum loss: 4260.343750, avg loss: 2.918044, ppl: 18.505053 +epoch: 0, batch: 44589, sum loss: 5294.060547, avg loss: 2.960884, ppl: 19.315041 +epoch: 0, batch: 44590, sum loss: 4285.041016, avg loss: 2.528048, ppl: 12.529023 +epoch: 0, batch: 44591, sum loss: 5318.075195, avg loss: 3.154256, ppl: 23.435591 +epoch: 0, batch: 44592, sum loss: 6449.055176, avg loss: 3.116991, ppl: 22.578346 +epoch: 0, batch: 44593, sum loss: 4731.307129, avg loss: 2.714462, ppl: 15.096483 +epoch: 0, batch: 44594, sum loss: 5802.520996, avg loss: 2.904165, ppl: 18.249990 +epoch: 0, batch: 44595, sum loss: 4725.205566, avg loss: 2.872465, ppl: 17.680553 +epoch: 0, batch: 44596, sum loss: 4460.755371, avg loss: 2.736660, ppl: 15.435341 +epoch: 0, batch: 44597, sum loss: 6678.539062, avg loss: 3.232594, ppl: 25.345318 +epoch: 0, batch: 44598, sum loss: 4005.064209, avg loss: 2.478381, ppl: 11.921952 +epoch: 0, batch: 44599, sum loss: 4158.537598, avg loss: 2.895917, ppl: 18.100100 +epoch: 0, batch: 44600, sum loss: 4609.146484, avg loss: 3.010546, ppl: 20.298489 +epoch: 0, batch: 44601, sum loss: 4330.975586, avg loss: 2.599625, ppl: 13.458692 +epoch: 0, batch: 44602, sum loss: 5254.752930, avg loss: 2.995868, ppl: 20.002718 +epoch: 0, batch: 44603, sum loss: 4725.508789, avg loss: 2.872650, ppl: 17.683813 +epoch: 0, batch: 44604, sum loss: 4630.221680, avg loss: 2.707732, ppl: 14.995227 +epoch: 0, batch: 44605, sum loss: 3711.020264, avg loss: 2.620777, ppl: 13.746402 +epoch: 0, batch: 44606, sum loss: 5317.537109, avg loss: 3.201407, ppl: 24.567070 +epoch: 0, batch: 44607, sum loss: 4560.777344, avg loss: 2.646998, ppl: 14.111611 +epoch: 0, batch: 44608, sum loss: 5053.539551, avg loss: 2.812209, ppl: 16.646652 +epoch: 0, batch: 44609, sum loss: 5592.541992, avg loss: 2.985874, ppl: 19.803806 +epoch: 0, batch: 44610, sum loss: 5133.267578, avg loss: 3.090468, ppl: 21.987370 +epoch: 0, batch: 44611, sum loss: 4481.612793, avg loss: 2.706288, ppl: 14.973588 +epoch: 0, batch: 44612, sum loss: 4872.017578, avg loss: 2.942039, ppl: 18.954464 +epoch: 0, batch: 44613, sum loss: 4390.163086, avg loss: 2.698318, ppl: 14.854722 +epoch: 0, batch: 44614, sum loss: 5815.104004, avg loss: 2.870239, ppl: 17.641233 +epoch: 0, batch: 44615, sum loss: 4577.856934, avg loss: 2.982317, ppl: 19.733490 +epoch: 0, batch: 44616, sum loss: 4107.344727, avg loss: 2.751068, ppl: 15.659348 +epoch: 0, batch: 44617, sum loss: 5224.340820, avg loss: 2.948274, ppl: 19.072998 +epoch: 0, batch: 44618, sum loss: 3986.034912, avg loss: 2.641508, ppl: 14.034347 +epoch: 0, batch: 44619, sum loss: 5384.195312, avg loss: 3.174644, ppl: 23.918291 +epoch: 0, batch: 44620, sum loss: 5234.202148, avg loss: 2.811064, ppl: 16.627609 +epoch: 0, batch: 44621, sum loss: 5477.208008, avg loss: 2.979983, ppl: 19.687475 +epoch: 0, batch: 44622, sum loss: 4042.236084, avg loss: 2.668143, ppl: 14.413177 +epoch: 0, batch: 44623, sum loss: 4164.895020, avg loss: 2.740063, ppl: 15.487956 +epoch: 0, batch: 44624, sum loss: 4906.651367, avg loss: 2.799003, ppl: 16.428253 +epoch: 0, batch: 44625, sum loss: 5271.037109, avg loss: 3.008583, ppl: 20.258675 +epoch: 0, batch: 44626, sum loss: 5178.979492, avg loss: 3.146403, ppl: 23.252283 +epoch: 0, batch: 44627, sum loss: 4700.873535, avg loss: 2.878673, ppl: 17.790655 +epoch: 0, batch: 44628, sum loss: 6061.425293, avg loss: 3.148792, ppl: 23.307898 +epoch: 0, batch: 44629, sum loss: 5879.819824, avg loss: 3.162894, ppl: 23.638903 +epoch: 0, batch: 44630, sum loss: 5051.906250, avg loss: 2.870401, ppl: 17.644094 +epoch: 0, batch: 44631, sum loss: 6532.505859, avg loss: 3.137611, ppl: 23.048735 +epoch: 0, batch: 44632, sum loss: 4597.351562, avg loss: 2.603257, ppl: 13.507657 +epoch: 0, batch: 44633, sum loss: 5233.651855, avg loss: 3.173834, ppl: 23.898928 +epoch: 0, batch: 44634, sum loss: 5483.020508, avg loss: 3.071720, ppl: 21.578995 +epoch: 0, batch: 44635, sum loss: 4858.124512, avg loss: 3.040128, ppl: 20.907919 +epoch: 0, batch: 44636, sum loss: 4719.311523, avg loss: 2.820868, ppl: 16.791416 +epoch: 0, batch: 44637, sum loss: 4330.947754, avg loss: 2.864384, ppl: 17.538240 +epoch: 0, batch: 44638, sum loss: 4065.920166, avg loss: 2.586463, ppl: 13.282710 +epoch: 0, batch: 44639, sum loss: 4737.334473, avg loss: 2.838427, ppl: 17.088865 +epoch: 0, batch: 44640, sum loss: 4590.418945, avg loss: 2.876202, ppl: 17.746750 +epoch: 0, batch: 44641, sum loss: 4021.579834, avg loss: 2.540480, ppl: 12.685758 +epoch: 0, batch: 44642, sum loss: 4537.064453, avg loss: 2.624098, ppl: 13.792122 +epoch: 0, batch: 44643, sum loss: 5259.216309, avg loss: 3.038253, ppl: 20.868755 +epoch: 0, batch: 44644, sum loss: 4060.406006, avg loss: 2.676602, ppl: 14.535623 +epoch: 0, batch: 44645, sum loss: 5037.772461, avg loss: 2.928937, ppl: 18.707743 +epoch: 0, batch: 44646, sum loss: 4960.425293, avg loss: 2.749681, ppl: 15.637650 +epoch: 0, batch: 44647, sum loss: 5024.348633, avg loss: 2.778954, ppl: 16.102165 +epoch: 0, batch: 44648, sum loss: 4824.928711, avg loss: 2.841537, ppl: 17.142084 +epoch: 0, batch: 44649, sum loss: 5631.191895, avg loss: 2.787719, ppl: 16.243921 +epoch: 0, batch: 44650, sum loss: 4300.401855, avg loss: 2.818088, ppl: 16.744801 +epoch: 0, batch: 44651, sum loss: 4418.098145, avg loss: 2.787444, ppl: 16.239452 +epoch: 0, batch: 44652, sum loss: 4865.586914, avg loss: 3.003449, ppl: 20.154930 +epoch: 0, batch: 44653, sum loss: 4328.644043, avg loss: 2.950677, ppl: 19.118900 +epoch: 0, batch: 44654, sum loss: 5095.497070, avg loss: 2.853022, ppl: 17.340101 +epoch: 0, batch: 44655, sum loss: 4445.306152, avg loss: 2.617966, ppl: 13.707812 +epoch: 0, batch: 44656, sum loss: 4985.692383, avg loss: 2.913906, ppl: 18.428638 +epoch: 0, batch: 44657, sum loss: 4153.629883, avg loss: 2.795175, ppl: 16.365490 +epoch: 0, batch: 44658, sum loss: 5265.689453, avg loss: 2.869586, ppl: 17.629713 +epoch: 0, batch: 44659, sum loss: 4490.677734, avg loss: 2.810186, ppl: 16.613014 +epoch: 0, batch: 44660, sum loss: 5622.408203, avg loss: 3.123560, ppl: 22.727148 +epoch: 0, batch: 44661, sum loss: 5367.809570, avg loss: 2.801571, ppl: 16.470499 +epoch: 0, batch: 44662, sum loss: 4203.453613, avg loss: 2.485780, ppl: 12.010485 +epoch: 0, batch: 44663, sum loss: 5113.097656, avg loss: 3.063570, ppl: 21.403828 +epoch: 0, batch: 44664, sum loss: 5229.942383, avg loss: 2.853215, ppl: 17.343447 +epoch: 0, batch: 44665, sum loss: 4936.439941, avg loss: 2.870023, ppl: 17.637428 +epoch: 0, batch: 44666, sum loss: 4745.648926, avg loss: 2.600356, ppl: 13.468527 +epoch: 0, batch: 44667, sum loss: 4908.327148, avg loss: 2.517091, ppl: 12.392492 +epoch: 0, batch: 44668, sum loss: 4397.889160, avg loss: 2.729913, ppl: 15.331550 +epoch: 0, batch: 44669, sum loss: 5734.662109, avg loss: 2.934832, ppl: 18.818344 +epoch: 0, batch: 44670, sum loss: 6052.476562, avg loss: 3.168836, ppl: 23.779785 +epoch: 0, batch: 44671, sum loss: 4785.769531, avg loss: 2.788910, ppl: 16.263285 +epoch: 0, batch: 44672, sum loss: 4145.668457, avg loss: 2.695493, ppl: 14.812823 +epoch: 0, batch: 44673, sum loss: 5127.513672, avg loss: 2.945154, ppl: 19.013597 +epoch: 0, batch: 44674, sum loss: 4885.394531, avg loss: 2.778950, ppl: 16.102108 +epoch: 0, batch: 44675, sum loss: 5099.876953, avg loss: 2.740396, ppl: 15.493119 +epoch: 0, batch: 44676, sum loss: 4439.165039, avg loss: 2.688774, ppl: 14.713620 +epoch: 0, batch: 44677, sum loss: 5950.514160, avg loss: 3.115453, ppl: 22.543631 +epoch: 0, batch: 44678, sum loss: 5066.754395, avg loss: 3.033985, ppl: 20.779869 +epoch: 0, batch: 44679, sum loss: 5234.159668, avg loss: 2.863326, ppl: 17.519697 +epoch: 0, batch: 44680, sum loss: 6325.145020, avg loss: 2.925599, ppl: 18.645393 +epoch: 0, batch: 44681, sum loss: 4793.659180, avg loss: 2.759735, ppl: 15.795654 +epoch: 0, batch: 44682, sum loss: 5812.232422, avg loss: 2.962402, ppl: 19.344374 +epoch: 0, batch: 44683, sum loss: 3945.093994, avg loss: 2.540305, ppl: 12.683543 +epoch: 0, batch: 44684, sum loss: 5613.798828, avg loss: 2.933019, ppl: 18.784258 +epoch: 0, batch: 44685, sum loss: 4481.649902, avg loss: 2.931099, ppl: 18.748222 +epoch: 0, batch: 44686, sum loss: 4395.938477, avg loss: 2.507666, ppl: 12.276245 +epoch: 0, batch: 44687, sum loss: 6411.593262, avg loss: 3.150660, ppl: 23.351473 +epoch: 0, batch: 44688, sum loss: 4517.841797, avg loss: 2.761517, ppl: 15.823831 +epoch: 0, batch: 44689, sum loss: 4810.992676, avg loss: 2.846741, ppl: 17.231537 +epoch: 0, batch: 44690, sum loss: 6402.258301, avg loss: 3.258147, ppl: 26.001307 +epoch: 0, batch: 44691, sum loss: 5202.562012, avg loss: 2.947627, ppl: 19.060675 +epoch: 0, batch: 44692, sum loss: 5287.658691, avg loss: 2.736883, ppl: 15.438793 +epoch: 0, batch: 44693, sum loss: 5781.789062, avg loss: 2.908345, ppl: 18.326435 +epoch: 0, batch: 44694, sum loss: 4808.644531, avg loss: 3.053108, ppl: 21.181063 +epoch: 0, batch: 44695, sum loss: 5035.432129, avg loss: 3.123717, ppl: 22.730715 +epoch: 0, batch: 44696, sum loss: 4657.050781, avg loss: 2.668797, ppl: 14.422608 +epoch: 0, batch: 44697, sum loss: 4904.494629, avg loss: 2.744541, ppl: 15.557470 +epoch: 0, batch: 44698, sum loss: 4213.295898, avg loss: 2.576939, ppl: 13.156808 +epoch: 0, batch: 44699, sum loss: 4107.352539, avg loss: 2.532276, ppl: 12.582115 +epoch: 0, batch: 44700, sum loss: 4685.206543, avg loss: 2.834366, ppl: 17.019600 +epoch: 0, batch: 44701, sum loss: 4166.250000, avg loss: 2.750000, ppl: 15.642632 +epoch: 0, batch: 44702, sum loss: 3980.260742, avg loss: 2.834944, ppl: 17.029438 +epoch: 0, batch: 44703, sum loss: 6415.649414, avg loss: 3.056527, ppl: 21.253609 +epoch: 0, batch: 44704, sum loss: 4106.532227, avg loss: 2.587607, ppl: 13.297914 +epoch: 0, batch: 44705, sum loss: 5209.712891, avg loss: 2.834446, ppl: 17.020975 +epoch: 0, batch: 44706, sum loss: 4033.074707, avg loss: 2.637721, ppl: 13.981298 +epoch: 0, batch: 44707, sum loss: 4684.406250, avg loss: 2.785022, ppl: 16.200167 +epoch: 0, batch: 44708, sum loss: 5228.936523, avg loss: 2.947540, ppl: 19.059017 +epoch: 0, batch: 44709, sum loss: 4595.706543, avg loss: 2.556010, ppl: 12.884310 +epoch: 0, batch: 44710, sum loss: 4793.560547, avg loss: 3.026238, ppl: 20.619511 +epoch: 0, batch: 44711, sum loss: 4107.228027, avg loss: 2.686219, ppl: 14.676073 +epoch: 0, batch: 44712, sum loss: 4406.981445, avg loss: 2.718681, ppl: 15.160307 +epoch: 0, batch: 44713, sum loss: 4528.372070, avg loss: 2.764574, ppl: 15.872274 +epoch: 0, batch: 44714, sum loss: 4267.561523, avg loss: 2.805760, ppl: 16.539648 +epoch: 0, batch: 44715, sum loss: 4915.232910, avg loss: 2.876087, ppl: 17.744705 +epoch: 0, batch: 44716, sum loss: 4581.185059, avg loss: 2.866824, ppl: 17.581095 +epoch: 0, batch: 44717, sum loss: 5304.116699, avg loss: 3.090977, ppl: 21.998564 +epoch: 0, batch: 44718, sum loss: 5314.302246, avg loss: 2.963917, ppl: 19.373701 +epoch: 0, batch: 44719, sum loss: 4830.400391, avg loss: 2.948962, ppl: 19.086136 +epoch: 0, batch: 44720, sum loss: 5988.357422, avg loss: 3.074106, ppl: 21.630526 +epoch: 0, batch: 44721, sum loss: 4553.926758, avg loss: 2.907999, ppl: 18.320105 +epoch: 0, batch: 44722, sum loss: 4479.932617, avg loss: 2.677784, ppl: 14.552808 +epoch: 0, batch: 44723, sum loss: 4557.409180, avg loss: 2.827177, ppl: 16.897688 +epoch: 0, batch: 44724, sum loss: 4387.608398, avg loss: 2.742255, ppl: 15.521951 +epoch: 0, batch: 44725, sum loss: 4817.111816, avg loss: 3.046876, ppl: 21.049492 +epoch: 0, batch: 44726, sum loss: 6146.666992, avg loss: 3.018992, ppl: 20.470640 +epoch: 0, batch: 44727, sum loss: 5175.499023, avg loss: 3.064239, ppl: 21.418152 +epoch: 0, batch: 44728, sum loss: 5159.444824, avg loss: 2.890445, ppl: 18.001328 +epoch: 0, batch: 44729, sum loss: 5023.757812, avg loss: 3.157610, ppl: 23.514334 +epoch: 0, batch: 44730, sum loss: 4850.498047, avg loss: 2.821698, ppl: 16.805357 +epoch: 0, batch: 44731, sum loss: 5069.028809, avg loss: 2.760909, ppl: 15.814209 +epoch: 0, batch: 44732, sum loss: 5186.149414, avg loss: 2.925070, ppl: 18.635536 +epoch: 0, batch: 44733, sum loss: 3876.413574, avg loss: 2.579117, ppl: 13.185494 +epoch: 0, batch: 44734, sum loss: 4973.405273, avg loss: 2.905026, ppl: 18.265726 +epoch: 0, batch: 44735, sum loss: 4934.877930, avg loss: 2.811896, ppl: 16.641441 +epoch: 0, batch: 44736, sum loss: 5284.993164, avg loss: 2.916663, ppl: 18.479517 +epoch: 0, batch: 44737, sum loss: 5727.533203, avg loss: 2.879604, ppl: 17.807226 +epoch: 0, batch: 44738, sum loss: 4924.774902, avg loss: 2.817377, ppl: 16.732904 +epoch: 0, batch: 44739, sum loss: 4434.867676, avg loss: 2.763157, ppl: 15.849808 +epoch: 0, batch: 44740, sum loss: 4103.817871, avg loss: 2.485656, ppl: 12.008996 +epoch: 0, batch: 44741, sum loss: 4212.190430, avg loss: 2.721053, ppl: 15.196321 +epoch: 0, batch: 44742, sum loss: 5406.569336, avg loss: 2.854577, ppl: 17.367094 +epoch: 0, batch: 44743, sum loss: 6126.501953, avg loss: 3.084845, ppl: 21.864075 +epoch: 0, batch: 44744, sum loss: 3405.759033, avg loss: 2.530282, ppl: 12.557041 +epoch: 0, batch: 44745, sum loss: 4613.042969, avg loss: 2.737711, ppl: 15.451575 +epoch: 0, batch: 44746, sum loss: 4548.374512, avg loss: 2.897054, ppl: 18.120682 +epoch: 0, batch: 44747, sum loss: 4196.646973, avg loss: 2.755513, ppl: 15.729115 +epoch: 0, batch: 44748, sum loss: 4665.311523, avg loss: 2.672000, ppl: 14.468877 +epoch: 0, batch: 44749, sum loss: 4304.504883, avg loss: 2.589955, ppl: 13.329170 +epoch: 0, batch: 44750, sum loss: 4613.156738, avg loss: 2.581509, ppl: 13.217070 +epoch: 0, batch: 44751, sum loss: 5681.118164, avg loss: 2.877973, ppl: 17.778193 +epoch: 0, batch: 44752, sum loss: 4228.563477, avg loss: 2.696788, ppl: 14.832012 +epoch: 0, batch: 44753, sum loss: 4942.155273, avg loss: 2.931290, ppl: 18.751812 +epoch: 0, batch: 44754, sum loss: 5080.600098, avg loss: 2.883428, ppl: 17.875446 +epoch: 0, batch: 44755, sum loss: 4245.002930, avg loss: 2.870184, ppl: 17.640270 +epoch: 0, batch: 44756, sum loss: 4827.041504, avg loss: 2.730227, ppl: 15.336371 +epoch: 0, batch: 44757, sum loss: 5294.571289, avg loss: 2.817760, ppl: 16.739317 +epoch: 0, batch: 44758, sum loss: 5849.249023, avg loss: 3.081796, ppl: 21.797525 +epoch: 0, batch: 44759, sum loss: 4704.847168, avg loss: 2.641689, ppl: 14.036886 +epoch: 0, batch: 44760, sum loss: 4313.319336, avg loss: 2.885163, ppl: 17.906492 +epoch: 0, batch: 44761, sum loss: 5044.174805, avg loss: 2.944644, ppl: 19.003891 +epoch: 0, batch: 44762, sum loss: 4243.864746, avg loss: 2.558086, ppl: 12.911080 +epoch: 0, batch: 44763, sum loss: 4804.033203, avg loss: 2.740464, ppl: 15.494172 +epoch: 0, batch: 44764, sum loss: 4993.278320, avg loss: 3.031741, ppl: 20.733305 +epoch: 0, batch: 44765, sum loss: 5060.143066, avg loss: 2.940234, ppl: 18.920277 +epoch: 0, batch: 44766, sum loss: 3469.650635, avg loss: 2.441696, ppl: 11.492520 +epoch: 0, batch: 44767, sum loss: 4280.391602, avg loss: 2.817901, ppl: 16.741671 +epoch: 0, batch: 44768, sum loss: 5333.589355, avg loss: 3.046025, ppl: 21.031574 +epoch: 0, batch: 44769, sum loss: 4208.624023, avg loss: 2.482964, ppl: 11.976711 +epoch: 0, batch: 44770, sum loss: 4256.989746, avg loss: 2.486559, ppl: 12.019849 +epoch: 0, batch: 44771, sum loss: 5030.323730, avg loss: 2.988903, ppl: 19.863876 +epoch: 0, batch: 44772, sum loss: 4484.556641, avg loss: 2.831160, ppl: 16.965122 +epoch: 0, batch: 44773, sum loss: 4943.997070, avg loss: 2.697216, ppl: 14.838365 +epoch: 0, batch: 44774, sum loss: 5622.651367, avg loss: 2.928464, ppl: 18.698895 +epoch: 0, batch: 44775, sum loss: 4935.073730, avg loss: 2.990954, ppl: 19.904655 +epoch: 0, batch: 44776, sum loss: 4732.374023, avg loss: 2.515882, ppl: 12.377522 +epoch: 0, batch: 44777, sum loss: 5434.167480, avg loss: 3.030768, ppl: 20.713142 +epoch: 0, batch: 44778, sum loss: 4761.045410, avg loss: 2.717492, ppl: 15.142296 +epoch: 0, batch: 44779, sum loss: 5698.496094, avg loss: 3.160563, ppl: 23.583881 +epoch: 0, batch: 44780, sum loss: 4452.360352, avg loss: 2.883653, ppl: 17.879471 +epoch: 0, batch: 44781, sum loss: 5683.493164, avg loss: 3.077149, ppl: 21.696451 +epoch: 0, batch: 44782, sum loss: 4842.164062, avg loss: 2.913456, ppl: 18.420353 +epoch: 0, batch: 44783, sum loss: 4914.632324, avg loss: 2.761029, ppl: 15.816117 +epoch: 0, batch: 44784, sum loss: 4383.867188, avg loss: 2.629794, ppl: 13.870917 +epoch: 0, batch: 44785, sum loss: 5001.158203, avg loss: 3.077636, ppl: 21.707022 +epoch: 0, batch: 44786, sum loss: 5014.951172, avg loss: 2.890462, ppl: 18.001619 +epoch: 0, batch: 44787, sum loss: 4714.981445, avg loss: 2.954249, ppl: 19.187305 +epoch: 0, batch: 44788, sum loss: 4957.453125, avg loss: 2.594167, ppl: 13.385432 +epoch: 0, batch: 44789, sum loss: 4861.491699, avg loss: 3.055620, ppl: 21.234350 +epoch: 0, batch: 44790, sum loss: 4574.772949, avg loss: 3.055960, ppl: 21.241577 +epoch: 0, batch: 44791, sum loss: 4975.329590, avg loss: 2.916371, ppl: 18.474129 +epoch: 0, batch: 44792, sum loss: 4470.002930, avg loss: 2.678252, ppl: 14.559624 +epoch: 0, batch: 44793, sum loss: 3270.313232, avg loss: 2.535126, ppl: 12.618027 +epoch: 0, batch: 44794, sum loss: 4607.481934, avg loss: 2.381128, ppl: 10.817093 +epoch: 0, batch: 44795, sum loss: 5237.954102, avg loss: 2.882749, ppl: 17.863304 +epoch: 0, batch: 44796, sum loss: 4800.169434, avg loss: 2.707371, ppl: 14.989822 +epoch: 0, batch: 44797, sum loss: 4568.429688, avg loss: 2.747102, ppl: 15.597358 +epoch: 0, batch: 44798, sum loss: 4252.509766, avg loss: 2.605705, ppl: 13.540762 +epoch: 0, batch: 44799, sum loss: 4181.114258, avg loss: 2.688820, ppl: 14.714297 +epoch: 0, batch: 44800, sum loss: 3802.691895, avg loss: 2.565919, ppl: 13.012613 +epoch: 0, batch: 44801, sum loss: 5536.622559, avg loss: 2.852459, ppl: 17.330339 +epoch: 0, batch: 44802, sum loss: 5325.645996, avg loss: 3.103523, ppl: 22.276304 +epoch: 0, batch: 44803, sum loss: 3980.929688, avg loss: 2.458882, ppl: 11.691731 +epoch: 0, batch: 44804, sum loss: 3885.889648, avg loss: 2.704168, ppl: 14.941885 +epoch: 0, batch: 44805, sum loss: 4848.379395, avg loss: 2.780034, ppl: 16.119566 +epoch: 0, batch: 44806, sum loss: 5578.680664, avg loss: 3.060165, ppl: 21.331079 +epoch: 0, batch: 44807, sum loss: 5293.785645, avg loss: 2.980735, ppl: 19.702299 +epoch: 0, batch: 44808, sum loss: 3878.998291, avg loss: 2.735542, ppl: 15.418095 +epoch: 0, batch: 44809, sum loss: 4398.904297, avg loss: 2.722094, ppl: 15.212148 +epoch: 0, batch: 44810, sum loss: 5754.286133, avg loss: 2.940361, ppl: 18.922672 +epoch: 0, batch: 44811, sum loss: 4754.199707, avg loss: 2.838328, ppl: 17.087177 +epoch: 0, batch: 44812, sum loss: 4268.414551, avg loss: 2.629953, ppl: 13.873123 +epoch: 0, batch: 44813, sum loss: 4056.507080, avg loss: 2.494777, ppl: 12.119027 +epoch: 0, batch: 44814, sum loss: 4169.778809, avg loss: 2.746890, ppl: 15.594056 +epoch: 0, batch: 44815, sum loss: 5701.244141, avg loss: 3.071791, ppl: 21.580517 +epoch: 0, batch: 44816, sum loss: 6112.524902, avg loss: 3.212047, ppl: 24.829851 +epoch: 0, batch: 44817, sum loss: 5148.535156, avg loss: 2.889189, ppl: 17.978727 +epoch: 0, batch: 44818, sum loss: 3988.662842, avg loss: 2.605267, ppl: 13.534833 +epoch: 0, batch: 44819, sum loss: 4235.882812, avg loss: 2.772175, ppl: 15.993376 +epoch: 0, batch: 44820, sum loss: 5282.951660, avg loss: 2.910717, ppl: 18.369970 +epoch: 0, batch: 44821, sum loss: 6271.319824, avg loss: 3.386242, ppl: 29.554667 +epoch: 0, batch: 44822, sum loss: 5164.140625, avg loss: 2.862606, ppl: 17.507084 +epoch: 0, batch: 44823, sum loss: 4376.380371, avg loss: 2.631618, ppl: 13.896233 +epoch: 0, batch: 44824, sum loss: 4486.859863, avg loss: 2.766251, ppl: 15.898922 +epoch: 0, batch: 44825, sum loss: 4464.272461, avg loss: 2.874612, ppl: 17.718550 +epoch: 0, batch: 44826, sum loss: 4684.129883, avg loss: 2.690482, ppl: 14.738784 +epoch: 0, batch: 44827, sum loss: 4492.881836, avg loss: 2.911783, ppl: 18.389566 +epoch: 0, batch: 44828, sum loss: 5472.045898, avg loss: 2.916869, ppl: 18.483324 +epoch: 0, batch: 44829, sum loss: 4729.344238, avg loss: 2.791821, ppl: 16.310686 +epoch: 0, batch: 44830, sum loss: 4343.871094, avg loss: 2.410583, ppl: 11.140457 +epoch: 0, batch: 44831, sum loss: 5051.513672, avg loss: 2.871810, ppl: 17.668970 +epoch: 0, batch: 44832, sum loss: 4577.752930, avg loss: 2.644571, ppl: 14.077409 +epoch: 0, batch: 44833, sum loss: 4125.606445, avg loss: 2.581731, ppl: 13.220006 +epoch: 0, batch: 44834, sum loss: 6029.407227, avg loss: 3.090419, ppl: 21.986290 +epoch: 0, batch: 44835, sum loss: 4684.250000, avg loss: 2.875537, ppl: 17.734947 +epoch: 0, batch: 44836, sum loss: 4170.266113, avg loss: 2.634407, ppl: 13.935044 +epoch: 0, batch: 44837, sum loss: 5233.799805, avg loss: 2.956949, ppl: 19.239183 +epoch: 0, batch: 44838, sum loss: 4350.823730, avg loss: 2.709105, ppl: 15.015838 +epoch: 0, batch: 44839, sum loss: 5772.774902, avg loss: 3.080456, ppl: 21.768328 +epoch: 0, batch: 44840, sum loss: 4213.555176, avg loss: 2.636768, ppl: 13.967987 +epoch: 0, batch: 44841, sum loss: 4618.319336, avg loss: 2.703934, ppl: 14.938383 +epoch: 0, batch: 44842, sum loss: 4284.776367, avg loss: 2.732638, ppl: 15.373386 +epoch: 0, batch: 44843, sum loss: 5309.322266, avg loss: 2.977747, ppl: 19.643505 +epoch: 0, batch: 44844, sum loss: 4545.940430, avg loss: 2.638387, ppl: 13.990615 +epoch: 0, batch: 44845, sum loss: 4146.628906, avg loss: 2.670077, ppl: 14.441075 +epoch: 0, batch: 44846, sum loss: 4317.955078, avg loss: 2.739819, ppl: 15.484183 +epoch: 0, batch: 44847, sum loss: 4947.424805, avg loss: 2.734895, ppl: 15.408125 +epoch: 0, batch: 44848, sum loss: 5831.195312, avg loss: 2.981184, ppl: 19.711132 +epoch: 0, batch: 44849, sum loss: 5432.954590, avg loss: 3.030092, ppl: 20.699137 +epoch: 0, batch: 44850, sum loss: 4583.059082, avg loss: 2.789445, ppl: 16.271994 +epoch: 0, batch: 44851, sum loss: 5761.870605, avg loss: 2.970037, ppl: 19.492632 +epoch: 0, batch: 44852, sum loss: 4368.044922, avg loss: 2.796444, ppl: 16.386269 +epoch: 0, batch: 44853, sum loss: 4884.940918, avg loss: 2.996896, ppl: 20.023293 +epoch: 0, batch: 44854, sum loss: 5130.349609, avg loss: 3.028542, ppl: 20.667068 +epoch: 0, batch: 44855, sum loss: 5022.702148, avg loss: 2.625563, ppl: 13.812347 +epoch: 0, batch: 44856, sum loss: 4137.856445, avg loss: 2.649076, ppl: 14.140966 +epoch: 0, batch: 44857, sum loss: 5019.848633, avg loss: 3.044178, ppl: 20.992758 +epoch: 0, batch: 44858, sum loss: 4461.370605, avg loss: 2.968310, ppl: 19.459013 +epoch: 0, batch: 44859, sum loss: 5118.163086, avg loss: 2.926337, ppl: 18.659153 +epoch: 0, batch: 44860, sum loss: 5734.756836, avg loss: 2.994651, ppl: 19.978388 +epoch: 0, batch: 44861, sum loss: 4457.218750, avg loss: 2.569002, ppl: 13.052794 +epoch: 0, batch: 44862, sum loss: 5325.036133, avg loss: 2.761948, ppl: 15.830657 +epoch: 0, batch: 44863, sum loss: 4134.224609, avg loss: 2.865021, ppl: 17.549414 +epoch: 0, batch: 44864, sum loss: 4924.670410, avg loss: 2.901986, ppl: 18.210278 +epoch: 0, batch: 44865, sum loss: 4706.378906, avg loss: 3.026610, ppl: 20.627195 +epoch: 0, batch: 44866, sum loss: 5485.516113, avg loss: 2.947618, ppl: 19.060488 +epoch: 0, batch: 44867, sum loss: 5851.295898, avg loss: 2.959684, ppl: 19.291882 +epoch: 0, batch: 44868, sum loss: 5049.101562, avg loss: 3.211897, ppl: 24.826128 +epoch: 0, batch: 44869, sum loss: 4322.650391, avg loss: 2.776269, ppl: 16.058989 +epoch: 0, batch: 44870, sum loss: 5904.107422, avg loss: 3.157277, ppl: 23.506498 +epoch: 0, batch: 44871, sum loss: 5315.866699, avg loss: 2.853390, ppl: 17.346495 +epoch: 0, batch: 44872, sum loss: 4741.059082, avg loss: 2.983675, ppl: 19.760298 +epoch: 0, batch: 44873, sum loss: 4325.502441, avg loss: 2.689989, ppl: 14.731515 +epoch: 0, batch: 44874, sum loss: 3962.340576, avg loss: 2.641560, ppl: 14.035086 +epoch: 0, batch: 44875, sum loss: 3922.378906, avg loss: 2.754480, ppl: 15.712863 +epoch: 0, batch: 44876, sum loss: 4167.799805, avg loss: 2.729404, ppl: 15.323750 +epoch: 0, batch: 44877, sum loss: 4623.881836, avg loss: 2.793886, ppl: 16.344418 +epoch: 0, batch: 44878, sum loss: 4214.426270, avg loss: 2.624176, ppl: 13.793200 +epoch: 0, batch: 44879, sum loss: 5551.240723, avg loss: 3.157702, ppl: 23.516499 +epoch: 0, batch: 44880, sum loss: 4304.412598, avg loss: 2.879206, ppl: 17.800129 +epoch: 0, batch: 44881, sum loss: 5641.862793, avg loss: 2.833683, ppl: 17.007986 +epoch: 0, batch: 44882, sum loss: 4277.050781, avg loss: 2.640155, ppl: 14.015373 +epoch: 0, batch: 44883, sum loss: 5882.359375, avg loss: 3.052600, ppl: 21.170309 +epoch: 0, batch: 44884, sum loss: 4652.823242, avg loss: 2.573464, ppl: 13.111165 +epoch: 0, batch: 44885, sum loss: 5245.991211, avg loss: 3.032365, ppl: 20.746237 +epoch: 0, batch: 44886, sum loss: 4846.665039, avg loss: 2.912659, ppl: 18.405682 +epoch: 0, batch: 44887, sum loss: 4278.494629, avg loss: 2.800062, ppl: 16.445665 +epoch: 0, batch: 44888, sum loss: 4323.914062, avg loss: 2.846553, ppl: 17.228291 +epoch: 0, batch: 44889, sum loss: 4758.230469, avg loss: 2.731476, ppl: 15.355529 +epoch: 0, batch: 44890, sum loss: 5543.322266, avg loss: 3.047456, ppl: 21.061695 +epoch: 0, batch: 44891, sum loss: 5558.975098, avg loss: 2.889280, ppl: 17.980366 +epoch: 0, batch: 44892, sum loss: 5711.625000, avg loss: 3.127944, ppl: 22.826994 +epoch: 0, batch: 44893, sum loss: 4758.666016, avg loss: 3.025217, ppl: 20.598471 +epoch: 0, batch: 44894, sum loss: 5653.213379, avg loss: 3.024726, ppl: 20.588366 +epoch: 0, batch: 44895, sum loss: 4947.404297, avg loss: 2.985760, ppl: 19.801546 +epoch: 0, batch: 44896, sum loss: 6496.676758, avg loss: 3.073168, ppl: 21.610250 +epoch: 0, batch: 44897, sum loss: 4572.577148, avg loss: 2.938674, ppl: 18.890785 +epoch: 0, batch: 44898, sum loss: 4381.320801, avg loss: 2.704519, ppl: 14.947125 +epoch: 0, batch: 44899, sum loss: 5055.516113, avg loss: 2.927340, ppl: 18.677881 +epoch: 0, batch: 44900, sum loss: 4251.395508, avg loss: 2.732259, ppl: 15.367567 +epoch: 0, batch: 44901, sum loss: 4064.311035, avg loss: 2.538608, ppl: 12.662031 +epoch: 0, batch: 44902, sum loss: 4888.693359, avg loss: 2.925610, ppl: 18.645588 +epoch: 0, batch: 44903, sum loss: 4747.827148, avg loss: 3.047386, ppl: 21.060219 +epoch: 0, batch: 44904, sum loss: 6112.464355, avg loss: 3.118604, ppl: 22.614792 +epoch: 0, batch: 44905, sum loss: 4737.568848, avg loss: 2.836868, ppl: 17.062235 +epoch: 0, batch: 44906, sum loss: 4540.503906, avg loss: 2.870103, ppl: 17.638842 +epoch: 0, batch: 44907, sum loss: 5029.807617, avg loss: 2.683995, ppl: 14.643484 +epoch: 0, batch: 44908, sum loss: 4488.212402, avg loss: 2.829894, ppl: 16.943670 +epoch: 0, batch: 44909, sum loss: 4477.266602, avg loss: 2.803548, ppl: 16.503101 +epoch: 0, batch: 44910, sum loss: 4848.543945, avg loss: 3.004055, ppl: 20.167139 +epoch: 0, batch: 44911, sum loss: 5204.738770, avg loss: 2.847231, ppl: 17.239985 +epoch: 0, batch: 44912, sum loss: 4788.551270, avg loss: 2.835140, ppl: 17.032785 +epoch: 0, batch: 44913, sum loss: 6337.769043, avg loss: 3.218776, ppl: 24.997492 +epoch: 0, batch: 44914, sum loss: 4971.099121, avg loss: 2.757127, ppl: 15.754508 +epoch: 0, batch: 44915, sum loss: 4548.073730, avg loss: 2.790229, ppl: 16.284754 +epoch: 0, batch: 44916, sum loss: 4527.554199, avg loss: 2.747302, ppl: 15.600490 +epoch: 0, batch: 44917, sum loss: 4168.691406, avg loss: 2.535700, ppl: 12.625270 +epoch: 0, batch: 44918, sum loss: 4738.758789, avg loss: 3.041565, ppl: 20.937994 +epoch: 0, batch: 44919, sum loss: 4896.935059, avg loss: 2.941102, ppl: 18.936708 +epoch: 0, batch: 44920, sum loss: 4248.007812, avg loss: 2.721338, ppl: 15.200641 +epoch: 0, batch: 44921, sum loss: 5077.566406, avg loss: 2.867062, ppl: 17.585270 +epoch: 0, batch: 44922, sum loss: 4830.114746, avg loss: 2.890553, ppl: 18.003271 +epoch: 0, batch: 44923, sum loss: 4906.898438, avg loss: 2.846229, ppl: 17.222713 +epoch: 0, batch: 44924, sum loss: 4411.380859, avg loss: 2.706369, ppl: 14.974798 +epoch: 0, batch: 44925, sum loss: 5886.586426, avg loss: 2.977535, ppl: 19.639341 +epoch: 0, batch: 44926, sum loss: 5779.416016, avg loss: 2.982155, ppl: 19.730288 +epoch: 0, batch: 44927, sum loss: 5256.208984, avg loss: 2.883274, ppl: 17.872698 +epoch: 0, batch: 44928, sum loss: 5080.130859, avg loss: 2.976058, ppl: 19.610361 +epoch: 0, batch: 44929, sum loss: 5097.268066, avg loss: 3.061422, ppl: 21.357914 +epoch: 0, batch: 44930, sum loss: 5895.284180, avg loss: 3.139129, ppl: 23.083750 +epoch: 0, batch: 44931, sum loss: 4339.422363, avg loss: 2.741265, ppl: 15.506586 +epoch: 0, batch: 44932, sum loss: 4511.215332, avg loss: 2.704566, ppl: 14.947821 +epoch: 0, batch: 44933, sum loss: 5050.282715, avg loss: 2.723993, ppl: 15.241055 +epoch: 0, batch: 44934, sum loss: 3960.104492, avg loss: 2.877983, ppl: 17.778376 +epoch: 0, batch: 44935, sum loss: 4895.973633, avg loss: 2.881680, ppl: 17.844223 +epoch: 0, batch: 44936, sum loss: 4804.617676, avg loss: 2.690156, ppl: 14.733967 +epoch: 0, batch: 44937, sum loss: 5425.241211, avg loss: 2.953316, ppl: 19.169415 +epoch: 0, batch: 44938, sum loss: 4768.302734, avg loss: 2.851856, ppl: 17.319897 +epoch: 0, batch: 44939, sum loss: 5222.746582, avg loss: 3.045333, ppl: 21.017033 +epoch: 0, batch: 44940, sum loss: 6221.762695, avg loss: 3.030571, ppl: 20.709059 +epoch: 0, batch: 44941, sum loss: 6002.660156, avg loss: 3.047036, ppl: 21.052845 +epoch: 0, batch: 44942, sum loss: 4354.726074, avg loss: 2.920675, ppl: 18.553802 +epoch: 0, batch: 44943, sum loss: 4819.004395, avg loss: 2.904765, ppl: 18.260946 +epoch: 0, batch: 44944, sum loss: 6313.019043, avg loss: 3.252457, ppl: 25.853783 +epoch: 0, batch: 44945, sum loss: 4407.960938, avg loss: 2.731079, ppl: 15.349434 +epoch: 0, batch: 44946, sum loss: 4787.034668, avg loss: 2.593193, ppl: 13.372405 +epoch: 0, batch: 44947, sum loss: 5435.675293, avg loss: 3.071003, ppl: 21.563519 +epoch: 0, batch: 44948, sum loss: 4472.046387, avg loss: 2.800280, ppl: 16.449245 +epoch: 0, batch: 44949, sum loss: 4472.105469, avg loss: 2.963622, ppl: 19.367998 +epoch: 0, batch: 44950, sum loss: 4880.540039, avg loss: 2.743418, ppl: 15.540003 +epoch: 0, batch: 44951, sum loss: 4691.051758, avg loss: 2.832761, ppl: 16.992304 +epoch: 0, batch: 44952, sum loss: 5434.164551, avg loss: 3.017304, ppl: 20.436121 +epoch: 0, batch: 44953, sum loss: 4888.993652, avg loss: 2.689215, ppl: 14.720122 +epoch: 0, batch: 44954, sum loss: 4762.236816, avg loss: 2.831294, ppl: 16.967403 +epoch: 0, batch: 44955, sum loss: 3990.777100, avg loss: 2.461923, ppl: 11.727340 +epoch: 0, batch: 44956, sum loss: 4970.804688, avg loss: 2.865017, ppl: 17.549355 +epoch: 0, batch: 44957, sum loss: 5137.699707, avg loss: 2.973206, ppl: 19.554506 +epoch: 0, batch: 44958, sum loss: 5127.262695, avg loss: 2.936577, ppl: 18.851200 +epoch: 0, batch: 44959, sum loss: 6125.403809, avg loss: 2.929414, ppl: 18.716652 +epoch: 0, batch: 44960, sum loss: 3951.236328, avg loss: 2.738209, ppl: 15.459280 +epoch: 0, batch: 44961, sum loss: 4051.781738, avg loss: 2.629320, ppl: 13.864334 +epoch: 0, batch: 44962, sum loss: 5056.829590, avg loss: 2.866683, ppl: 17.578621 +epoch: 0, batch: 44963, sum loss: 4440.690918, avg loss: 2.784132, ppl: 16.185766 +epoch: 0, batch: 44964, sum loss: 5581.172852, avg loss: 2.976625, ppl: 19.621490 +epoch: 0, batch: 44965, sum loss: 5453.205566, avg loss: 3.117899, ppl: 22.598854 +epoch: 0, batch: 44966, sum loss: 4821.004395, avg loss: 2.684301, ppl: 14.647958 +epoch: 0, batch: 44967, sum loss: 4650.433105, avg loss: 2.928484, ppl: 18.699268 +epoch: 0, batch: 44968, sum loss: 4941.104004, avg loss: 2.908243, ppl: 18.324568 +epoch: 0, batch: 44969, sum loss: 4386.181152, avg loss: 2.639098, ppl: 14.000572 +epoch: 0, batch: 44970, sum loss: 5034.401855, avg loss: 2.938939, ppl: 18.895781 +epoch: 0, batch: 44971, sum loss: 4036.677246, avg loss: 2.660960, ppl: 14.310026 +epoch: 0, batch: 44972, sum loss: 4859.866699, avg loss: 2.764429, ppl: 15.869977 +epoch: 0, batch: 44973, sum loss: 4793.222168, avg loss: 2.894458, ppl: 18.073700 +epoch: 0, batch: 44974, sum loss: 5092.651367, avg loss: 2.807415, ppl: 16.567045 +epoch: 0, batch: 44975, sum loss: 4807.562012, avg loss: 2.920755, ppl: 18.555285 +epoch: 0, batch: 44976, sum loss: 5198.974609, avg loss: 3.089111, ppl: 21.957556 +epoch: 0, batch: 44977, sum loss: 5416.846191, avg loss: 2.956794, ppl: 19.236197 +epoch: 0, batch: 44978, sum loss: 4323.982910, avg loss: 2.869265, ppl: 17.624065 +epoch: 0, batch: 44979, sum loss: 4736.254883, avg loss: 2.916413, ppl: 18.474901 +epoch: 0, batch: 44980, sum loss: 3971.381104, avg loss: 2.437926, ppl: 11.449268 +epoch: 0, batch: 44981, sum loss: 5134.979492, avg loss: 2.878352, ppl: 17.784939 +epoch: 0, batch: 44982, sum loss: 4579.056641, avg loss: 2.798934, ppl: 16.427134 +epoch: 0, batch: 44983, sum loss: 5010.565430, avg loss: 2.993170, ppl: 19.948811 +epoch: 0, batch: 44984, sum loss: 5124.276855, avg loss: 2.948376, ppl: 19.074945 +epoch: 0, batch: 44985, sum loss: 4706.119141, avg loss: 2.773199, ppl: 16.009773 +epoch: 0, batch: 44986, sum loss: 4891.415527, avg loss: 2.906367, ppl: 18.290226 +epoch: 0, batch: 44987, sum loss: 3646.556885, avg loss: 2.416539, ppl: 11.207002 +epoch: 0, batch: 44988, sum loss: 4692.098633, avg loss: 2.814696, ppl: 16.688103 +epoch: 0, batch: 44989, sum loss: 4858.402344, avg loss: 2.944486, ppl: 19.000900 +epoch: 0, batch: 44990, sum loss: 3850.800781, avg loss: 2.617812, ppl: 13.705698 +epoch: 0, batch: 44991, sum loss: 4837.470703, avg loss: 2.801083, ppl: 16.462471 +epoch: 0, batch: 44992, sum loss: 4060.467773, avg loss: 2.779239, ppl: 16.106754 +epoch: 0, batch: 44993, sum loss: 4410.652832, avg loss: 2.720946, ppl: 15.194683 +epoch: 0, batch: 44994, sum loss: 3788.706787, avg loss: 2.566875, ppl: 13.025051 +epoch: 0, batch: 44995, sum loss: 4661.097656, avg loss: 2.781085, ppl: 16.136513 +epoch: 0, batch: 44996, sum loss: 5257.271973, avg loss: 2.912616, ppl: 18.404882 +epoch: 0, batch: 44997, sum loss: 4438.662598, avg loss: 2.738225, ppl: 15.459517 +epoch: 0, batch: 44998, sum loss: 5024.830566, avg loss: 2.816609, ppl: 16.720058 +epoch: 0, batch: 44999, sum loss: 4509.220703, avg loss: 2.732861, ppl: 15.376818 +epoch: 0, batch: 45000, sum loss: 3672.949463, avg loss: 2.403763, ppl: 11.064733 +epoch: 0, batch: 45001, sum loss: 4574.694336, avg loss: 2.690997, ppl: 14.746369 +epoch: 0, batch: 45002, sum loss: 4789.791992, avg loss: 2.840921, ppl: 17.131531 +epoch: 0, batch: 45003, sum loss: 4353.868652, avg loss: 2.755613, ppl: 15.730682 +epoch: 0, batch: 45004, sum loss: 5225.997070, avg loss: 2.937604, ppl: 18.870573 +epoch: 0, batch: 45005, sum loss: 5410.213379, avg loss: 2.951562, ppl: 19.135824 +epoch: 0, batch: 45006, sum loss: 4071.515625, avg loss: 2.683926, ppl: 14.642465 +epoch: 0, batch: 45007, sum loss: 5415.453125, avg loss: 2.913100, ppl: 18.413794 +epoch: 0, batch: 45008, sum loss: 5074.068848, avg loss: 2.911112, ppl: 18.377228 +epoch: 0, batch: 45009, sum loss: 5723.320801, avg loss: 2.987119, ppl: 19.828484 +epoch: 0, batch: 45010, sum loss: 4877.702148, avg loss: 2.787258, ppl: 16.236444 +epoch: 0, batch: 45011, sum loss: 5110.643066, avg loss: 2.840824, ppl: 17.129881 +epoch: 0, batch: 45012, sum loss: 4839.786133, avg loss: 2.687277, ppl: 14.691617 +epoch: 0, batch: 45013, sum loss: 5054.074707, avg loss: 3.083633, ppl: 21.837593 +epoch: 0, batch: 45014, sum loss: 5651.152832, avg loss: 2.880302, ppl: 17.819653 +epoch: 0, batch: 45015, sum loss: 4709.378418, avg loss: 2.914219, ppl: 18.434416 +epoch: 0, batch: 45016, sum loss: 4606.961914, avg loss: 2.742239, ppl: 15.521703 +epoch: 0, batch: 45017, sum loss: 5653.028320, avg loss: 3.168738, ppl: 23.777456 +epoch: 0, batch: 45018, sum loss: 4887.418457, avg loss: 2.683920, ppl: 14.642382 +epoch: 0, batch: 45019, sum loss: 3471.579590, avg loss: 2.499337, ppl: 12.174422 +epoch: 0, batch: 45020, sum loss: 5087.252930, avg loss: 2.938910, ppl: 18.895235 +epoch: 0, batch: 45021, sum loss: 5553.409180, avg loss: 2.919774, ppl: 18.537090 +epoch: 0, batch: 45022, sum loss: 5369.043457, avg loss: 3.123353, ppl: 22.722441 +epoch: 0, batch: 45023, sum loss: 4201.807617, avg loss: 2.601738, ppl: 13.487164 +epoch: 0, batch: 45024, sum loss: 4853.902832, avg loss: 2.719273, ppl: 15.169295 +epoch: 0, batch: 45025, sum loss: 5158.050781, avg loss: 2.932377, ppl: 18.772196 +epoch: 0, batch: 45026, sum loss: 3854.901367, avg loss: 2.805605, ppl: 16.537081 +epoch: 0, batch: 45027, sum loss: 4780.701660, avg loss: 2.869569, ppl: 17.629416 +epoch: 0, batch: 45028, sum loss: 5505.309570, avg loss: 2.892963, ppl: 18.046711 +epoch: 0, batch: 45029, sum loss: 4008.479492, avg loss: 2.670539, ppl: 14.447760 +epoch: 0, batch: 45030, sum loss: 5047.788574, avg loss: 2.812139, ppl: 16.645477 +epoch: 0, batch: 45031, sum loss: 5119.766602, avg loss: 2.983547, ppl: 19.757772 +epoch: 0, batch: 45032, sum loss: 4192.021973, avg loss: 2.847841, ppl: 17.250498 +epoch: 0, batch: 45033, sum loss: 3860.281006, avg loss: 2.498564, ppl: 12.165012 +epoch: 0, batch: 45034, sum loss: 4596.245117, avg loss: 2.938776, ppl: 18.892698 +epoch: 0, batch: 45035, sum loss: 4988.066406, avg loss: 2.874966, ppl: 17.724825 +epoch: 0, batch: 45036, sum loss: 5816.793945, avg loss: 3.122272, ppl: 22.697903 +epoch: 0, batch: 45037, sum loss: 5126.690430, avg loss: 2.830862, ppl: 16.960072 +epoch: 0, batch: 45038, sum loss: 4108.814941, avg loss: 2.755744, ppl: 15.732749 +epoch: 0, batch: 45039, sum loss: 4054.562988, avg loss: 2.504363, ppl: 12.235757 +epoch: 0, batch: 45040, sum loss: 6059.392578, avg loss: 3.023649, ppl: 20.566200 +epoch: 0, batch: 45041, sum loss: 5574.692383, avg loss: 2.875035, ppl: 17.726042 +epoch: 0, batch: 45042, sum loss: 3386.235352, avg loss: 2.525157, ppl: 12.492853 +epoch: 0, batch: 45043, sum loss: 5181.371094, avg loss: 3.084150, ppl: 21.848879 +epoch: 0, batch: 45044, sum loss: 4166.011719, avg loss: 2.749843, ppl: 15.640174 +epoch: 0, batch: 45045, sum loss: 4528.859863, avg loss: 2.816455, ppl: 16.717484 +epoch: 0, batch: 45046, sum loss: 5640.366211, avg loss: 3.024325, ppl: 20.580107 +epoch: 0, batch: 45047, sum loss: 3884.667236, avg loss: 2.534029, ppl: 12.604193 +epoch: 0, batch: 45048, sum loss: 5415.420898, avg loss: 2.936779, ppl: 18.855021 +epoch: 0, batch: 45049, sum loss: 3971.136230, avg loss: 2.480410, ppl: 11.946162 +epoch: 0, batch: 45050, sum loss: 4651.154785, avg loss: 2.830891, ppl: 16.960573 +epoch: 0, batch: 45051, sum loss: 5818.714844, avg loss: 3.005534, ppl: 20.197006 +epoch: 0, batch: 45052, sum loss: 4253.416504, avg loss: 2.585663, ppl: 13.272090 +epoch: 0, batch: 45053, sum loss: 4411.249023, avg loss: 3.023474, ppl: 20.562611 +epoch: 0, batch: 45054, sum loss: 5168.824219, avg loss: 3.020938, ppl: 20.510519 +epoch: 0, batch: 45055, sum loss: 4390.521484, avg loss: 2.742362, ppl: 15.523609 +epoch: 0, batch: 45056, sum loss: 4712.756836, avg loss: 2.674664, ppl: 14.507468 +epoch: 0, batch: 45057, sum loss: 4942.399414, avg loss: 2.843728, ppl: 17.179693 +epoch: 0, batch: 45058, sum loss: 4798.130859, avg loss: 2.814153, ppl: 16.679047 +epoch: 0, batch: 45059, sum loss: 4769.480957, avg loss: 2.833916, ppl: 17.011953 +epoch: 0, batch: 45060, sum loss: 4000.245117, avg loss: 2.575818, ppl: 13.142064 +epoch: 0, batch: 45061, sum loss: 5348.404785, avg loss: 3.164737, ppl: 23.682503 +epoch: 0, batch: 45062, sum loss: 5019.589844, avg loss: 2.815250, ppl: 16.697344 +epoch: 0, batch: 45063, sum loss: 5263.218262, avg loss: 2.699086, ppl: 14.866140 +epoch: 0, batch: 45064, sum loss: 4169.276367, avg loss: 3.010308, ppl: 20.293650 +epoch: 0, batch: 45065, sum loss: 5427.299805, avg loss: 3.008481, ppl: 20.256607 +epoch: 0, batch: 45066, sum loss: 3880.709229, avg loss: 2.685612, ppl: 14.667174 +epoch: 0, batch: 45067, sum loss: 5626.988281, avg loss: 3.136560, ppl: 23.024525 +epoch: 0, batch: 45068, sum loss: 5037.500000, avg loss: 2.891791, ppl: 18.025562 +epoch: 0, batch: 45069, sum loss: 4083.292480, avg loss: 2.709550, ppl: 15.022519 +epoch: 0, batch: 45070, sum loss: 3844.210938, avg loss: 2.473752, ppl: 11.866891 +epoch: 0, batch: 45071, sum loss: 4154.882812, avg loss: 2.659976, ppl: 14.295950 +epoch: 0, batch: 45072, sum loss: 4385.525391, avg loss: 2.822088, ppl: 16.811926 +epoch: 0, batch: 45073, sum loss: 5287.750000, avg loss: 2.875340, ppl: 17.731451 +epoch: 0, batch: 45074, sum loss: 5605.623535, avg loss: 3.061509, ppl: 21.359772 +epoch: 0, batch: 45075, sum loss: 4396.791992, avg loss: 2.767018, ppl: 15.911121 +epoch: 0, batch: 45076, sum loss: 5095.164062, avg loss: 2.953718, ppl: 19.177130 +epoch: 0, batch: 45077, sum loss: 5395.316406, avg loss: 2.882114, ppl: 17.851967 +epoch: 0, batch: 45078, sum loss: 4763.733398, avg loss: 2.720579, ppl: 15.189113 +epoch: 0, batch: 45079, sum loss: 4758.615234, avg loss: 2.685449, ppl: 14.664783 +epoch: 0, batch: 45080, sum loss: 5516.550293, avg loss: 3.007934, ppl: 20.245522 +epoch: 0, batch: 45081, sum loss: 4915.619141, avg loss: 2.995502, ppl: 19.995396 +epoch: 0, batch: 45082, sum loss: 4534.466797, avg loss: 2.636318, ppl: 13.961701 +epoch: 0, batch: 45083, sum loss: 4425.107422, avg loss: 2.701531, ppl: 14.902526 +epoch: 0, batch: 45084, sum loss: 3713.946045, avg loss: 2.604450, ppl: 13.523788 +epoch: 0, batch: 45085, sum loss: 4205.170410, avg loss: 2.639781, ppl: 14.010128 +epoch: 0, batch: 45086, sum loss: 5313.119141, avg loss: 3.020534, ppl: 20.502237 +epoch: 0, batch: 45087, sum loss: 4662.238770, avg loss: 2.834188, ppl: 17.016569 +epoch: 0, batch: 45088, sum loss: 4765.110840, avg loss: 2.898486, ppl: 18.146648 +epoch: 0, batch: 45089, sum loss: 4019.735352, avg loss: 2.667376, ppl: 14.402125 +epoch: 0, batch: 45090, sum loss: 6085.047363, avg loss: 3.036451, ppl: 20.831179 +epoch: 0, batch: 45091, sum loss: 4749.032715, avg loss: 2.968145, ppl: 19.455803 +epoch: 0, batch: 45092, sum loss: 3960.665527, avg loss: 2.612576, ppl: 13.634130 +epoch: 0, batch: 45093, sum loss: 4209.932617, avg loss: 2.786190, ppl: 16.219099 +epoch: 0, batch: 45094, sum loss: 4828.746582, avg loss: 2.708215, ppl: 15.002468 +epoch: 0, batch: 45095, sum loss: 4734.908203, avg loss: 3.037145, ppl: 20.845636 +epoch: 0, batch: 45096, sum loss: 3619.843018, avg loss: 2.498167, ppl: 12.160178 +epoch: 0, batch: 45097, sum loss: 4853.965820, avg loss: 2.836918, ppl: 17.063089 +epoch: 0, batch: 45098, sum loss: 5157.953613, avg loss: 2.995327, ppl: 19.991901 +epoch: 0, batch: 45099, sum loss: 3726.628174, avg loss: 2.702413, ppl: 14.915685 +epoch: 0, batch: 45100, sum loss: 4270.070312, avg loss: 2.730224, ppl: 15.336320 +epoch: 0, batch: 45101, sum loss: 4051.217773, avg loss: 2.782430, ppl: 16.158237 +epoch: 0, batch: 45102, sum loss: 4437.113770, avg loss: 2.747439, ppl: 15.602621 +epoch: 0, batch: 45103, sum loss: 4357.458008, avg loss: 2.972345, ppl: 19.537683 +epoch: 0, batch: 45104, sum loss: 4337.875000, avg loss: 2.816802, ppl: 16.723280 +epoch: 0, batch: 45105, sum loss: 5444.939453, avg loss: 2.843310, ppl: 17.172518 +epoch: 0, batch: 45106, sum loss: 4650.689453, avg loss: 2.823734, ppl: 16.839605 +epoch: 0, batch: 45107, sum loss: 4711.102539, avg loss: 2.915286, ppl: 18.454090 +epoch: 0, batch: 45108, sum loss: 3850.124756, avg loss: 2.582243, ppl: 13.226778 +epoch: 0, batch: 45109, sum loss: 4953.716797, avg loss: 2.711394, ppl: 15.050242 +epoch: 0, batch: 45110, sum loss: 5027.583984, avg loss: 2.879487, ppl: 17.805138 +epoch: 0, batch: 45111, sum loss: 5399.984375, avg loss: 2.883067, ppl: 17.868992 +epoch: 0, batch: 45112, sum loss: 5048.686035, avg loss: 3.046884, ppl: 21.049643 +epoch: 0, batch: 45113, sum loss: 4215.995117, avg loss: 2.651569, ppl: 14.176266 +epoch: 0, batch: 45114, sum loss: 4351.378906, avg loss: 2.769815, ppl: 15.955678 +epoch: 0, batch: 45115, sum loss: 4835.578613, avg loss: 2.804860, ppl: 16.524765 +epoch: 0, batch: 45116, sum loss: 5455.711914, avg loss: 2.950628, ppl: 19.117962 +epoch: 0, batch: 45117, sum loss: 4914.841797, avg loss: 2.802076, ppl: 16.478823 +epoch: 0, batch: 45118, sum loss: 4486.233887, avg loss: 2.924533, ppl: 18.625528 +epoch: 0, batch: 45119, sum loss: 5278.830566, avg loss: 3.160976, ppl: 23.593622 +epoch: 0, batch: 45120, sum loss: 3663.162598, avg loss: 2.669944, ppl: 14.439155 +epoch: 0, batch: 45121, sum loss: 4802.148438, avg loss: 2.899848, ppl: 18.171383 +epoch: 0, batch: 45122, sum loss: 5005.537109, avg loss: 2.852158, ppl: 17.325127 +epoch: 0, batch: 45123, sum loss: 5837.981934, avg loss: 3.155666, ppl: 23.468658 +epoch: 0, batch: 45124, sum loss: 5002.810059, avg loss: 2.707148, ppl: 14.986478 +epoch: 0, batch: 45125, sum loss: 5612.074219, avg loss: 2.980390, ppl: 19.695494 +epoch: 0, batch: 45126, sum loss: 4846.641602, avg loss: 3.112808, ppl: 22.484085 +epoch: 0, batch: 45127, sum loss: 4466.364746, avg loss: 2.909684, ppl: 18.350998 +epoch: 0, batch: 45128, sum loss: 4618.229492, avg loss: 2.928491, ppl: 18.699385 +epoch: 0, batch: 45129, sum loss: 4531.172852, avg loss: 2.824921, ppl: 16.859612 +epoch: 0, batch: 45130, sum loss: 5899.509277, avg loss: 3.017652, ppl: 20.443230 +epoch: 0, batch: 45131, sum loss: 5359.499023, avg loss: 2.850797, ppl: 17.301569 +epoch: 0, batch: 45132, sum loss: 4986.046387, avg loss: 2.762352, ppl: 15.837055 +epoch: 0, batch: 45133, sum loss: 4608.935547, avg loss: 2.543563, ppl: 12.724925 +epoch: 0, batch: 45134, sum loss: 4797.034668, avg loss: 2.916130, ppl: 18.469677 +epoch: 0, batch: 45135, sum loss: 5592.833984, avg loss: 2.986030, ppl: 19.806890 +epoch: 0, batch: 45136, sum loss: 5109.084961, avg loss: 2.936256, ppl: 18.845152 +epoch: 0, batch: 45137, sum loss: 4631.999023, avg loss: 2.747330, ppl: 15.600928 +epoch: 0, batch: 45138, sum loss: 4885.935059, avg loss: 2.672831, ppl: 14.480907 +epoch: 0, batch: 45139, sum loss: 4758.956055, avg loss: 2.939442, ppl: 18.905289 +epoch: 0, batch: 45140, sum loss: 5060.462891, avg loss: 2.870370, ppl: 17.643553 +epoch: 0, batch: 45141, sum loss: 4712.343750, avg loss: 2.736553, ppl: 15.433689 +epoch: 0, batch: 45142, sum loss: 4846.868164, avg loss: 2.869668, ppl: 17.631155 +epoch: 0, batch: 45143, sum loss: 4416.737793, avg loss: 2.704677, ppl: 14.949489 +epoch: 0, batch: 45144, sum loss: 4971.555664, avg loss: 2.862151, ppl: 17.499121 +epoch: 0, batch: 45145, sum loss: 5085.141602, avg loss: 2.930917, ppl: 18.744816 +epoch: 0, batch: 45146, sum loss: 4404.883789, avg loss: 2.555037, ppl: 12.871779 +epoch: 0, batch: 45147, sum loss: 5204.937500, avg loss: 2.987909, ppl: 19.844141 +epoch: 0, batch: 45148, sum loss: 6162.022949, avg loss: 3.338041, ppl: 28.163889 +epoch: 0, batch: 45149, sum loss: 4755.176270, avg loss: 2.683508, ppl: 14.636350 +epoch: 0, batch: 45150, sum loss: 4748.008789, avg loss: 2.846528, ppl: 17.227863 +epoch: 0, batch: 45151, sum loss: 4799.157715, avg loss: 2.938859, ppl: 18.894285 +epoch: 0, batch: 45152, sum loss: 5271.578125, avg loss: 2.846424, ppl: 17.226076 +epoch: 0, batch: 45153, sum loss: 5357.479492, avg loss: 2.787451, ppl: 16.239565 +epoch: 0, batch: 45154, sum loss: 4687.006836, avg loss: 2.896790, ppl: 18.115904 +epoch: 0, batch: 45155, sum loss: 5793.284668, avg loss: 2.945239, ppl: 19.015198 +epoch: 0, batch: 45156, sum loss: 5842.807129, avg loss: 2.976468, ppl: 19.618408 +epoch: 0, batch: 45157, sum loss: 5913.056152, avg loss: 3.194520, ppl: 24.398455 +epoch: 0, batch: 45158, sum loss: 5518.571289, avg loss: 2.959019, ppl: 19.279058 +epoch: 0, batch: 45159, sum loss: 5470.589844, avg loss: 2.947516, ppl: 19.058554 +epoch: 0, batch: 45160, sum loss: 5913.285156, avg loss: 3.152071, ppl: 23.384443 +epoch: 0, batch: 45161, sum loss: 4168.107910, avg loss: 2.701301, ppl: 14.899108 +epoch: 0, batch: 45162, sum loss: 4545.315430, avg loss: 2.758080, ppl: 15.769533 +epoch: 0, batch: 45163, sum loss: 5060.734375, avg loss: 2.841513, ppl: 17.141680 +epoch: 0, batch: 45164, sum loss: 5024.695801, avg loss: 2.733784, ppl: 15.391024 +epoch: 0, batch: 45165, sum loss: 4690.538574, avg loss: 2.815449, ppl: 16.700676 +epoch: 0, batch: 45166, sum loss: 4879.052734, avg loss: 2.962388, ppl: 19.344107 +epoch: 0, batch: 45167, sum loss: 5004.023926, avg loss: 2.885827, ppl: 17.918377 +epoch: 0, batch: 45168, sum loss: 4468.289062, avg loss: 2.808478, ppl: 16.584663 +epoch: 0, batch: 45169, sum loss: 5249.663086, avg loss: 3.022259, ppl: 20.537624 +epoch: 0, batch: 45170, sum loss: 4207.376953, avg loss: 2.903642, ppl: 18.240456 +epoch: 0, batch: 45171, sum loss: 4884.012207, avg loss: 3.058242, ppl: 21.290092 +epoch: 0, batch: 45172, sum loss: 4929.875000, avg loss: 2.964447, ppl: 19.383976 +epoch: 0, batch: 45173, sum loss: 4460.936035, avg loss: 2.814471, ppl: 16.684340 +epoch: 0, batch: 45174, sum loss: 4345.536133, avg loss: 2.801764, ppl: 16.473680 +epoch: 0, batch: 45175, sum loss: 5358.819336, avg loss: 3.031007, ppl: 20.718077 +epoch: 0, batch: 45176, sum loss: 4999.242188, avg loss: 2.853449, ppl: 17.347507 +epoch: 0, batch: 45177, sum loss: 3794.259277, avg loss: 2.633074, ppl: 13.916481 +epoch: 0, batch: 45178, sum loss: 5372.107910, avg loss: 2.792156, ppl: 16.316158 +epoch: 0, batch: 45179, sum loss: 4275.912598, avg loss: 2.624870, ppl: 13.802774 +epoch: 0, batch: 45180, sum loss: 5415.239746, avg loss: 3.187310, ppl: 24.223185 +epoch: 0, batch: 45181, sum loss: 4379.503906, avg loss: 2.866168, ppl: 17.569555 +epoch: 0, batch: 45182, sum loss: 4871.142578, avg loss: 2.840316, ppl: 17.121180 +epoch: 0, batch: 45183, sum loss: 3797.491211, avg loss: 2.567607, ppl: 13.034597 +epoch: 0, batch: 45184, sum loss: 5169.072754, avg loss: 2.832369, ppl: 16.985645 +epoch: 0, batch: 45185, sum loss: 4968.096191, avg loss: 2.893475, ppl: 18.055943 +epoch: 0, batch: 45186, sum loss: 4584.647949, avg loss: 2.776892, ppl: 16.068996 +epoch: 0, batch: 45187, sum loss: 5343.176758, avg loss: 3.042811, ppl: 20.964098 +epoch: 0, batch: 45188, sum loss: 5505.879395, avg loss: 3.033542, ppl: 20.770679 +epoch: 0, batch: 45189, sum loss: 3430.644531, avg loss: 2.505949, ppl: 12.255187 +epoch: 0, batch: 45190, sum loss: 3797.822021, avg loss: 2.554016, ppl: 12.858642 +epoch: 0, batch: 45191, sum loss: 4185.838867, avg loss: 2.697061, ppl: 14.836069 +epoch: 0, batch: 45192, sum loss: 5544.866699, avg loss: 3.055023, ppl: 21.221678 +epoch: 0, batch: 45193, sum loss: 4607.804199, avg loss: 2.878079, ppl: 17.780085 +epoch: 0, batch: 45194, sum loss: 4773.449219, avg loss: 2.822856, ppl: 16.824833 +epoch: 0, batch: 45195, sum loss: 4915.780273, avg loss: 2.695055, ppl: 14.806334 +epoch: 0, batch: 45196, sum loss: 4114.315918, avg loss: 2.650977, ppl: 14.167872 +epoch: 0, batch: 45197, sum loss: 4236.957520, avg loss: 2.791145, ppl: 16.299665 +epoch: 0, batch: 45198, sum loss: 5427.063477, avg loss: 2.899072, ppl: 18.157295 +epoch: 0, batch: 45199, sum loss: 4614.194824, avg loss: 3.041658, ppl: 20.939926 +epoch: 0, batch: 45200, sum loss: 4639.931152, avg loss: 2.761864, ppl: 15.829321 +epoch: 0, batch: 45201, sum loss: 5078.479004, avg loss: 2.869197, ppl: 17.622864 +epoch: 0, batch: 45202, sum loss: 4641.459961, avg loss: 2.650748, ppl: 14.164634 +epoch: 0, batch: 45203, sum loss: 5913.409180, avg loss: 2.876172, ppl: 17.746208 +epoch: 0, batch: 45204, sum loss: 4407.278809, avg loss: 2.819756, ppl: 16.772758 +epoch: 0, batch: 45205, sum loss: 5348.623047, avg loss: 2.897412, ppl: 18.127176 +epoch: 0, batch: 45206, sum loss: 4544.872070, avg loss: 2.885633, ppl: 17.914904 +epoch: 0, batch: 45207, sum loss: 4016.540039, avg loss: 2.611534, ppl: 13.619933 +epoch: 0, batch: 45208, sum loss: 4698.502930, avg loss: 2.907489, ppl: 18.310768 +epoch: 0, batch: 45209, sum loss: 4823.699707, avg loss: 3.078303, ppl: 21.721502 +epoch: 0, batch: 45210, sum loss: 5438.633301, avg loss: 3.111346, ppl: 22.451254 +epoch: 0, batch: 45211, sum loss: 4592.242676, avg loss: 3.033185, ppl: 20.763268 +epoch: 0, batch: 45212, sum loss: 5398.227539, avg loss: 3.081180, ppl: 21.784100 +epoch: 0, batch: 45213, sum loss: 5464.963867, avg loss: 3.068481, ppl: 21.509199 +epoch: 0, batch: 45214, sum loss: 4269.814941, avg loss: 2.670303, ppl: 14.444346 +epoch: 0, batch: 45215, sum loss: 4630.157227, avg loss: 2.718824, ppl: 15.162479 +epoch: 0, batch: 45216, sum loss: 3993.119629, avg loss: 2.688969, ppl: 14.716501 +epoch: 0, batch: 45217, sum loss: 4076.100586, avg loss: 2.669352, ppl: 14.430616 +epoch: 0, batch: 45218, sum loss: 4842.949219, avg loss: 2.783304, ppl: 16.172369 +epoch: 0, batch: 45219, sum loss: 5038.478027, avg loss: 2.774492, ppl: 16.030485 +epoch: 0, batch: 45220, sum loss: 5042.034668, avg loss: 3.010170, ppl: 20.290854 +epoch: 0, batch: 45221, sum loss: 4005.277832, avg loss: 2.709931, ppl: 15.028237 +epoch: 0, batch: 45222, sum loss: 4857.237305, avg loss: 2.901576, ppl: 18.202803 +epoch: 0, batch: 45223, sum loss: 5358.048340, avg loss: 3.109720, ppl: 22.414778 +epoch: 0, batch: 45224, sum loss: 4864.965332, avg loss: 2.936008, ppl: 18.840485 +epoch: 0, batch: 45225, sum loss: 4278.954590, avg loss: 2.879512, ppl: 17.805578 +epoch: 0, batch: 45226, sum loss: 4493.205078, avg loss: 2.751503, ppl: 15.666164 +epoch: 0, batch: 45227, sum loss: 4999.455078, avg loss: 2.772854, ppl: 16.004238 +epoch: 0, batch: 45228, sum loss: 5265.520996, avg loss: 3.191225, ppl: 24.318195 +epoch: 0, batch: 45229, sum loss: 5477.972168, avg loss: 2.986899, ppl: 19.824106 +epoch: 0, batch: 45230, sum loss: 4996.816406, avg loss: 3.054289, ppl: 21.206100 +epoch: 0, batch: 45231, sum loss: 4415.370117, avg loss: 2.591180, ppl: 13.345505 +epoch: 0, batch: 45232, sum loss: 4718.038086, avg loss: 2.801685, ppl: 16.472385 +epoch: 0, batch: 45233, sum loss: 5309.910645, avg loss: 2.948312, ppl: 19.073736 +epoch: 0, batch: 45234, sum loss: 4825.604980, avg loss: 2.741821, ppl: 15.515210 +epoch: 0, batch: 45235, sum loss: 5114.846680, avg loss: 2.827445, ppl: 16.902212 +epoch: 0, batch: 45236, sum loss: 4271.693848, avg loss: 2.962340, ppl: 19.343180 +epoch: 0, batch: 45237, sum loss: 4239.127441, avg loss: 2.567612, ppl: 13.034662 +epoch: 0, batch: 45238, sum loss: 4763.293945, avg loss: 2.742253, ppl: 15.521921 +epoch: 0, batch: 45239, sum loss: 4923.627930, avg loss: 2.932476, ppl: 18.774063 +epoch: 0, batch: 45240, sum loss: 5082.032227, avg loss: 2.815530, ppl: 16.702030 +epoch: 0, batch: 45241, sum loss: 4421.926758, avg loss: 2.922622, ppl: 18.589966 +epoch: 0, batch: 45242, sum loss: 4755.342773, avg loss: 2.792333, ppl: 16.319046 +epoch: 0, batch: 45243, sum loss: 5201.414062, avg loss: 2.805509, ppl: 16.535492 +epoch: 0, batch: 45244, sum loss: 5944.384766, avg loss: 2.960351, ppl: 19.304747 +epoch: 0, batch: 45245, sum loss: 4409.470215, avg loss: 2.669171, ppl: 14.428001 +epoch: 0, batch: 45246, sum loss: 6139.050781, avg loss: 3.075677, ppl: 21.664537 +epoch: 0, batch: 45247, sum loss: 5644.807617, avg loss: 2.935417, ppl: 18.829357 +epoch: 0, batch: 45248, sum loss: 5091.525391, avg loss: 2.755154, ppl: 15.723468 +epoch: 0, batch: 45249, sum loss: 4566.540039, avg loss: 2.702095, ppl: 14.910934 +epoch: 0, batch: 45250, sum loss: 5456.242188, avg loss: 2.816852, ppl: 16.724117 +epoch: 0, batch: 45251, sum loss: 4213.319336, avg loss: 2.620224, ppl: 13.738794 +epoch: 0, batch: 45252, sum loss: 4953.229980, avg loss: 2.838527, ppl: 17.090576 +epoch: 0, batch: 45253, sum loss: 5083.659668, avg loss: 2.631294, ppl: 13.891731 +epoch: 0, batch: 45254, sum loss: 4833.497559, avg loss: 3.090472, ppl: 21.987442 +epoch: 0, batch: 45255, sum loss: 5287.843262, avg loss: 2.847519, ppl: 17.244947 +epoch: 0, batch: 45256, sum loss: 4781.897461, avg loss: 2.901637, ppl: 18.203918 +epoch: 0, batch: 45257, sum loss: 4014.738037, avg loss: 2.445029, ppl: 11.530887 +epoch: 0, batch: 45258, sum loss: 4574.193848, avg loss: 2.820095, ppl: 16.778442 +epoch: 0, batch: 45259, sum loss: 4282.029785, avg loss: 2.762600, ppl: 15.840975 +epoch: 0, batch: 45260, sum loss: 4557.430664, avg loss: 2.859116, ppl: 17.446095 +epoch: 0, batch: 45261, sum loss: 5108.633301, avg loss: 2.805400, ppl: 16.533686 +epoch: 0, batch: 45262, sum loss: 5243.544922, avg loss: 2.738143, ppl: 15.458260 +epoch: 0, batch: 45263, sum loss: 6209.001953, avg loss: 3.254194, ppl: 25.898726 +epoch: 0, batch: 45264, sum loss: 5000.951172, avg loss: 2.904153, ppl: 18.249777 +epoch: 0, batch: 45265, sum loss: 4260.400879, avg loss: 2.593062, ppl: 13.370649 +epoch: 0, batch: 45266, sum loss: 5113.984375, avg loss: 3.033206, ppl: 20.763683 +epoch: 0, batch: 45267, sum loss: 5567.689453, avg loss: 2.916548, ppl: 18.477388 +epoch: 0, batch: 45268, sum loss: 4564.606445, avg loss: 2.835159, ppl: 17.033113 +epoch: 0, batch: 45269, sum loss: 4304.401855, avg loss: 2.626236, ppl: 13.821653 +epoch: 0, batch: 45270, sum loss: 5043.964844, avg loss: 3.014922, ppl: 20.387508 +epoch: 0, batch: 45271, sum loss: 6049.895508, avg loss: 3.077261, ppl: 21.698893 +epoch: 0, batch: 45272, sum loss: 5118.611328, avg loss: 2.703968, ppl: 14.938889 +epoch: 0, batch: 45273, sum loss: 4358.807617, avg loss: 2.719156, ppl: 15.167520 +epoch: 0, batch: 45274, sum loss: 5192.974609, avg loss: 2.880186, ppl: 17.817579 +epoch: 0, batch: 45275, sum loss: 4160.388184, avg loss: 2.507769, ppl: 12.277504 +epoch: 0, batch: 45276, sum loss: 5644.360352, avg loss: 3.118431, ppl: 22.610884 +epoch: 0, batch: 45277, sum loss: 4286.816895, avg loss: 2.651093, ppl: 14.169514 +epoch: 0, batch: 45278, sum loss: 4494.431641, avg loss: 2.918462, ppl: 18.512794 +epoch: 0, batch: 45279, sum loss: 4941.205078, avg loss: 2.833260, ppl: 17.000786 +epoch: 0, batch: 45280, sum loss: 5482.377930, avg loss: 2.958650, ppl: 19.271931 +epoch: 0, batch: 45281, sum loss: 4586.830566, avg loss: 2.754853, ppl: 15.718734 +epoch: 0, batch: 45282, sum loss: 4528.865234, avg loss: 2.667176, ppl: 14.399252 +epoch: 0, batch: 45283, sum loss: 5469.624023, avg loss: 2.789201, ppl: 16.268024 +epoch: 0, batch: 45284, sum loss: 5295.692383, avg loss: 2.942051, ppl: 18.954691 +epoch: 0, batch: 45285, sum loss: 3834.114990, avg loss: 2.480023, ppl: 11.941535 +epoch: 0, batch: 45286, sum loss: 5641.628906, avg loss: 2.997677, ppl: 20.038939 +epoch: 0, batch: 45287, sum loss: 5069.645996, avg loss: 2.810225, ppl: 16.613655 +epoch: 0, batch: 45288, sum loss: 4883.146973, avg loss: 2.703847, ppl: 14.937079 +epoch: 0, batch: 45289, sum loss: 4045.485840, avg loss: 2.584975, ppl: 13.262958 +epoch: 0, batch: 45290, sum loss: 4873.500000, avg loss: 2.973459, ppl: 19.559464 +epoch: 0, batch: 45291, sum loss: 5398.208008, avg loss: 2.992355, ppl: 19.932562 +epoch: 0, batch: 45292, sum loss: 4019.695068, avg loss: 2.346582, ppl: 10.449791 +epoch: 0, batch: 45293, sum loss: 4391.587891, avg loss: 2.842452, ppl: 17.157782 +epoch: 0, batch: 45294, sum loss: 4617.066406, avg loss: 2.798222, ppl: 16.415436 +epoch: 0, batch: 45295, sum loss: 4593.867676, avg loss: 2.903835, ppl: 18.243982 +epoch: 0, batch: 45296, sum loss: 4994.709961, avg loss: 2.720430, ppl: 15.186853 +epoch: 0, batch: 45297, sum loss: 4570.084961, avg loss: 2.842093, ppl: 17.151619 +epoch: 0, batch: 45298, sum loss: 4615.919922, avg loss: 2.671250, ppl: 14.458029 +epoch: 0, batch: 45299, sum loss: 4642.640625, avg loss: 2.609691, ppl: 13.594851 +epoch: 0, batch: 45300, sum loss: 4432.845215, avg loss: 2.577236, ppl: 13.160707 +epoch: 0, batch: 45301, sum loss: 4945.354980, avg loss: 3.037687, ppl: 20.856951 +epoch: 0, batch: 45302, sum loss: 6272.826172, avg loss: 3.195530, ppl: 24.423124 +epoch: 0, batch: 45303, sum loss: 5964.145996, avg loss: 2.903674, ppl: 18.241037 +epoch: 0, batch: 45304, sum loss: 4680.081543, avg loss: 2.857193, ppl: 17.412573 +epoch: 0, batch: 45305, sum loss: 4265.940430, avg loss: 2.727583, ppl: 15.295878 +epoch: 0, batch: 45306, sum loss: 4281.684570, avg loss: 2.708213, ppl: 15.002439 +epoch: 0, batch: 45307, sum loss: 4183.862793, avg loss: 2.661490, ppl: 14.317613 +epoch: 0, batch: 45308, sum loss: 4289.577637, avg loss: 2.647887, ppl: 14.124169 +epoch: 0, batch: 45309, sum loss: 5470.561523, avg loss: 2.897543, ppl: 18.129549 +epoch: 0, batch: 45310, sum loss: 5620.722656, avg loss: 3.208175, ppl: 24.733910 +epoch: 0, batch: 45311, sum loss: 4726.636230, avg loss: 2.910490, ppl: 18.365801 +epoch: 0, batch: 45312, sum loss: 3973.843506, avg loss: 2.690483, ppl: 14.738798 +epoch: 0, batch: 45313, sum loss: 4366.552734, avg loss: 2.800868, ppl: 16.458923 +epoch: 0, batch: 45314, sum loss: 4794.135742, avg loss: 2.747356, ppl: 15.601323 +epoch: 0, batch: 45315, sum loss: 5536.527832, avg loss: 2.903266, ppl: 18.233595 +epoch: 0, batch: 45316, sum loss: 4327.179199, avg loss: 2.754411, ppl: 15.711780 +epoch: 0, batch: 45317, sum loss: 4762.651367, avg loss: 2.936283, ppl: 18.845669 +epoch: 0, batch: 45318, sum loss: 4930.869629, avg loss: 2.933295, ppl: 18.789444 +epoch: 0, batch: 45319, sum loss: 4349.395996, avg loss: 2.855808, ppl: 17.388487 +epoch: 0, batch: 45320, sum loss: 4979.565918, avg loss: 2.843841, ppl: 17.181639 +epoch: 0, batch: 45321, sum loss: 4369.097168, avg loss: 2.659219, ppl: 14.285129 +epoch: 0, batch: 45322, sum loss: 4844.760254, avg loss: 2.800439, ppl: 16.451874 +epoch: 0, batch: 45323, sum loss: 4097.272461, avg loss: 2.518299, ppl: 12.407472 +epoch: 0, batch: 45324, sum loss: 4769.346191, avg loss: 2.918817, ppl: 18.519358 +epoch: 0, batch: 45325, sum loss: 4062.226318, avg loss: 2.580830, ppl: 13.208095 +epoch: 0, batch: 45326, sum loss: 4828.574219, avg loss: 2.729550, ppl: 15.325994 +epoch: 0, batch: 45327, sum loss: 4295.204590, avg loss: 2.790906, ppl: 16.295780 +epoch: 0, batch: 45328, sum loss: 4918.019043, avg loss: 2.901486, ppl: 18.201176 +epoch: 0, batch: 45329, sum loss: 4134.903809, avg loss: 2.679782, ppl: 14.581916 +epoch: 0, batch: 45330, sum loss: 5034.532715, avg loss: 3.045694, ppl: 21.024624 +epoch: 0, batch: 45331, sum loss: 4998.109863, avg loss: 2.597770, ppl: 13.433750 +epoch: 0, batch: 45332, sum loss: 4451.672363, avg loss: 2.466300, ppl: 11.778790 +epoch: 0, batch: 45333, sum loss: 5376.545410, avg loss: 2.876696, ppl: 17.755514 +epoch: 0, batch: 45334, sum loss: 5514.515625, avg loss: 3.000280, ppl: 20.091169 +epoch: 0, batch: 45335, sum loss: 4534.509277, avg loss: 2.844736, ppl: 17.197020 +epoch: 0, batch: 45336, sum loss: 4824.596191, avg loss: 2.816460, ppl: 16.717567 +epoch: 0, batch: 45337, sum loss: 4433.250488, avg loss: 2.709811, ppl: 15.026435 +epoch: 0, batch: 45338, sum loss: 4445.533691, avg loss: 2.820770, ppl: 16.789774 +epoch: 0, batch: 45339, sum loss: 4837.685547, avg loss: 2.742452, ppl: 15.525008 +epoch: 0, batch: 45340, sum loss: 4594.442383, avg loss: 2.791277, ppl: 16.301830 +epoch: 0, batch: 45341, sum loss: 4342.461914, avg loss: 2.552888, ppl: 12.844140 +epoch: 0, batch: 45342, sum loss: 4679.331543, avg loss: 2.876049, ppl: 17.744024 +epoch: 0, batch: 45343, sum loss: 5139.367676, avg loss: 2.925081, ppl: 18.635736 +epoch: 0, batch: 45344, sum loss: 4148.477051, avg loss: 2.814435, ppl: 16.683746 +epoch: 0, batch: 45345, sum loss: 4255.052734, avg loss: 2.623337, ppl: 13.781636 +epoch: 0, batch: 45346, sum loss: 4995.190430, avg loss: 2.776648, ppl: 16.065084 +epoch: 0, batch: 45347, sum loss: 5856.123535, avg loss: 2.956145, ppl: 19.223717 +epoch: 0, batch: 45348, sum loss: 4704.841797, avg loss: 3.004369, ppl: 20.173479 +epoch: 0, batch: 45349, sum loss: 5625.710449, avg loss: 3.205533, ppl: 24.668646 +epoch: 0, batch: 45350, sum loss: 5034.717773, avg loss: 3.009395, ppl: 20.275133 +epoch: 0, batch: 45351, sum loss: 4575.473145, avg loss: 2.702583, ppl: 14.918217 +epoch: 0, batch: 45352, sum loss: 4984.474609, avg loss: 2.701612, ppl: 14.903741 +epoch: 0, batch: 45353, sum loss: 6108.988770, avg loss: 3.074478, ppl: 21.638592 +epoch: 0, batch: 45354, sum loss: 4995.237305, avg loss: 2.749167, ppl: 15.629614 +epoch: 0, batch: 45355, sum loss: 5471.425781, avg loss: 3.258741, ppl: 26.016752 +epoch: 0, batch: 45356, sum loss: 5157.645508, avg loss: 2.697513, ppl: 14.842777 +epoch: 0, batch: 45357, sum loss: 4905.597168, avg loss: 2.835605, ppl: 17.040709 +epoch: 0, batch: 45358, sum loss: 4155.497559, avg loss: 2.691385, ppl: 14.752087 +epoch: 0, batch: 45359, sum loss: 4195.760254, avg loss: 2.670758, ppl: 14.450915 +epoch: 0, batch: 45360, sum loss: 4242.535156, avg loss: 2.633479, ppl: 13.922126 +epoch: 0, batch: 45361, sum loss: 5029.408691, avg loss: 2.805024, ppl: 16.527479 +epoch: 0, batch: 45362, sum loss: 4647.419922, avg loss: 2.820036, ppl: 16.777462 +epoch: 0, batch: 45363, sum loss: 4457.032715, avg loss: 2.576319, ppl: 13.148655 +epoch: 0, batch: 45364, sum loss: 4183.997559, avg loss: 2.573184, ppl: 13.107495 +epoch: 0, batch: 45365, sum loss: 4563.112305, avg loss: 2.765523, ppl: 15.887342 +epoch: 0, batch: 45366, sum loss: 4501.710449, avg loss: 2.775407, ppl: 16.045156 +epoch: 0, batch: 45367, sum loss: 4704.928711, avg loss: 2.686996, ppl: 14.687481 +epoch: 0, batch: 45368, sum loss: 5614.951172, avg loss: 3.053263, ppl: 21.184366 +epoch: 0, batch: 45369, sum loss: 4970.036133, avg loss: 2.747394, ppl: 15.601925 +epoch: 0, batch: 45370, sum loss: 5135.457031, avg loss: 2.932871, ppl: 18.781477 +epoch: 0, batch: 45371, sum loss: 4888.349121, avg loss: 2.971641, ppl: 19.523924 +epoch: 0, batch: 45372, sum loss: 5291.359375, avg loss: 3.004747, ppl: 20.181108 +epoch: 0, batch: 45373, sum loss: 5487.333984, avg loss: 2.878979, ppl: 17.796089 +epoch: 0, batch: 45374, sum loss: 4948.319336, avg loss: 2.963066, ppl: 19.357224 +epoch: 0, batch: 45375, sum loss: 4685.481445, avg loss: 2.913857, ppl: 18.427729 +epoch: 0, batch: 45376, sum loss: 4000.669678, avg loss: 2.823338, ppl: 16.832941 +epoch: 0, batch: 45377, sum loss: 3781.015137, avg loss: 2.559929, ppl: 12.934900 +epoch: 0, batch: 45378, sum loss: 5392.633789, avg loss: 3.135252, ppl: 22.994434 +epoch: 0, batch: 45379, sum loss: 4628.507324, avg loss: 2.830891, ppl: 16.960573 +epoch: 0, batch: 45380, sum loss: 4436.943359, avg loss: 2.818897, ppl: 16.758348 +epoch: 0, batch: 45381, sum loss: 6106.835938, avg loss: 3.078042, ppl: 21.715847 +epoch: 0, batch: 45382, sum loss: 3892.874023, avg loss: 2.554379, ppl: 12.863312 +epoch: 0, batch: 45383, sum loss: 4651.504395, avg loss: 3.016540, ppl: 20.420511 +epoch: 0, batch: 45384, sum loss: 5299.336914, avg loss: 2.760072, ppl: 15.800973 +epoch: 0, batch: 45385, sum loss: 5284.544922, avg loss: 3.141822, ppl: 23.146004 +epoch: 0, batch: 45386, sum loss: 5235.950195, avg loss: 3.010897, ppl: 20.305609 +epoch: 0, batch: 45387, sum loss: 4360.061523, avg loss: 2.883639, ppl: 17.879215 +epoch: 0, batch: 45388, sum loss: 4269.895508, avg loss: 2.540092, ppl: 12.680843 +epoch: 0, batch: 45389, sum loss: 4522.909668, avg loss: 2.732876, ppl: 15.377048 +epoch: 0, batch: 45390, sum loss: 5456.681641, avg loss: 2.996530, ppl: 20.015966 +epoch: 0, batch: 45391, sum loss: 3787.047852, avg loss: 2.404475, ppl: 11.072613 +epoch: 0, batch: 45392, sum loss: 4182.295410, avg loss: 2.674102, ppl: 14.499321 +epoch: 0, batch: 45393, sum loss: 4592.590820, avg loss: 2.803779, ppl: 16.506914 +epoch: 0, batch: 45394, sum loss: 4786.746094, avg loss: 2.815733, ppl: 16.705420 +epoch: 0, batch: 45395, sum loss: 4716.735352, avg loss: 2.673886, ppl: 14.496197 +epoch: 0, batch: 45396, sum loss: 4504.162598, avg loss: 2.708456, ppl: 15.006092 +epoch: 0, batch: 45397, sum loss: 4546.113770, avg loss: 2.790739, ppl: 16.293053 +epoch: 0, batch: 45398, sum loss: 5472.584961, avg loss: 2.948591, ppl: 19.079052 +epoch: 0, batch: 45399, sum loss: 4670.174805, avg loss: 2.858124, ppl: 17.428804 +epoch: 0, batch: 45400, sum loss: 5180.667480, avg loss: 2.815580, ppl: 16.702864 +epoch: 0, batch: 45401, sum loss: 5058.463379, avg loss: 2.966841, ppl: 19.430437 +epoch: 0, batch: 45402, sum loss: 4380.492676, avg loss: 2.616782, ppl: 13.691592 +epoch: 0, batch: 45403, sum loss: 5807.265625, avg loss: 3.203125, ppl: 24.609320 +epoch: 0, batch: 45404, sum loss: 4832.813477, avg loss: 2.999884, ppl: 20.083214 +epoch: 0, batch: 45405, sum loss: 4752.098633, avg loss: 2.754840, ppl: 15.718524 +epoch: 0, batch: 45406, sum loss: 4434.024414, avg loss: 2.648760, ppl: 14.136503 +epoch: 0, batch: 45407, sum loss: 6097.111328, avg loss: 2.927082, ppl: 18.673059 +epoch: 0, batch: 45408, sum loss: 5236.995605, avg loss: 2.809547, ppl: 16.602394 +epoch: 0, batch: 45409, sum loss: 4786.810059, avg loss: 3.058665, ppl: 21.299095 +epoch: 0, batch: 45410, sum loss: 5047.612793, avg loss: 2.781054, ppl: 16.136019 +epoch: 0, batch: 45411, sum loss: 3920.158691, avg loss: 2.477976, ppl: 11.917124 +epoch: 0, batch: 45412, sum loss: 5496.583984, avg loss: 3.006884, ppl: 20.224285 +epoch: 0, batch: 45413, sum loss: 6421.244629, avg loss: 3.078257, ppl: 21.720518 +epoch: 0, batch: 45414, sum loss: 4853.452148, avg loss: 2.983068, ppl: 19.748316 +epoch: 0, batch: 45415, sum loss: 4202.115234, avg loss: 2.597105, ppl: 13.424810 +epoch: 0, batch: 45416, sum loss: 4614.120117, avg loss: 2.521377, ppl: 12.445724 +epoch: 0, batch: 45417, sum loss: 4103.926758, avg loss: 2.654545, ppl: 14.218516 +epoch: 0, batch: 45418, sum loss: 4945.688477, avg loss: 2.945616, ppl: 19.022367 +epoch: 0, batch: 45419, sum loss: 5056.402832, avg loss: 2.785897, ppl: 16.214355 +epoch: 0, batch: 45420, sum loss: 4813.570801, avg loss: 2.806747, ppl: 16.555969 +epoch: 0, batch: 45421, sum loss: 5626.721191, avg loss: 2.938236, ppl: 18.882500 +epoch: 0, batch: 45422, sum loss: 4139.000977, avg loss: 2.639669, ppl: 14.008565 +epoch: 0, batch: 45423, sum loss: 4182.926758, avg loss: 2.611065, ppl: 13.613546 +epoch: 0, batch: 45424, sum loss: 4589.155762, avg loss: 2.702683, ppl: 14.919711 +epoch: 0, batch: 45425, sum loss: 4398.864746, avg loss: 2.886394, ppl: 17.928547 +epoch: 0, batch: 45426, sum loss: 3767.023682, avg loss: 2.552184, ppl: 12.835107 +epoch: 0, batch: 45427, sum loss: 4511.581055, avg loss: 2.814461, ppl: 16.684181 +epoch: 0, batch: 45428, sum loss: 5301.141602, avg loss: 2.904735, ppl: 18.260406 +epoch: 0, batch: 45429, sum loss: 4490.132812, avg loss: 2.845458, ppl: 17.209440 +epoch: 0, batch: 45430, sum loss: 5006.016602, avg loss: 2.948184, ppl: 19.071293 +epoch: 0, batch: 45431, sum loss: 4956.359375, avg loss: 2.816113, ppl: 16.711769 +epoch: 0, batch: 45432, sum loss: 4439.154297, avg loss: 3.076337, ppl: 21.678844 +epoch: 0, batch: 45433, sum loss: 5604.735840, avg loss: 2.997185, ppl: 20.029076 +epoch: 0, batch: 45434, sum loss: 4796.301758, avg loss: 2.860049, ppl: 17.462379 +epoch: 0, batch: 45435, sum loss: 5027.713867, avg loss: 2.864794, ppl: 17.545443 +epoch: 0, batch: 45436, sum loss: 4017.551758, avg loss: 2.552447, ppl: 12.838482 +epoch: 0, batch: 45437, sum loss: 4094.215576, avg loss: 2.655133, ppl: 14.226882 +epoch: 0, batch: 45438, sum loss: 5808.509277, avg loss: 3.184490, ppl: 24.154959 +epoch: 0, batch: 45439, sum loss: 5949.116211, avg loss: 3.243793, ppl: 25.630749 +epoch: 0, batch: 45440, sum loss: 4809.039062, avg loss: 3.068947, ppl: 21.519222 +epoch: 0, batch: 45441, sum loss: 5054.719727, avg loss: 3.006972, ppl: 20.226059 +epoch: 0, batch: 45442, sum loss: 3729.312988, avg loss: 2.665699, ppl: 14.377999 +epoch: 0, batch: 45443, sum loss: 4755.608887, avg loss: 2.897994, ppl: 18.137730 +epoch: 0, batch: 45444, sum loss: 4693.368652, avg loss: 2.813770, ppl: 16.672661 +epoch: 0, batch: 45445, sum loss: 4602.468750, avg loss: 2.749384, ppl: 15.633001 +epoch: 0, batch: 45446, sum loss: 4577.859863, avg loss: 2.641581, ppl: 14.035374 +epoch: 0, batch: 45447, sum loss: 4553.890625, avg loss: 2.816259, ppl: 16.714205 +epoch: 0, batch: 45448, sum loss: 5015.102051, avg loss: 2.800169, ppl: 16.447418 +epoch: 0, batch: 45449, sum loss: 5241.209473, avg loss: 2.787877, ppl: 16.246496 +epoch: 0, batch: 45450, sum loss: 4462.672363, avg loss: 2.605179, ppl: 13.533651 +epoch: 0, batch: 45451, sum loss: 4772.059570, avg loss: 2.780921, ppl: 16.133865 +epoch: 0, batch: 45452, sum loss: 3994.604736, avg loss: 2.730420, ppl: 15.339334 +epoch: 0, batch: 45453, sum loss: 5480.297363, avg loss: 3.024447, ppl: 20.582613 +epoch: 0, batch: 45454, sum loss: 5863.582031, avg loss: 3.110654, ppl: 22.435705 +epoch: 0, batch: 45455, sum loss: 4429.878418, avg loss: 2.499931, ppl: 12.181658 +epoch: 0, batch: 45456, sum loss: 6322.324707, avg loss: 3.082557, ppl: 21.814114 +epoch: 0, batch: 45457, sum loss: 4300.198242, avg loss: 2.821652, ppl: 16.804596 +epoch: 0, batch: 45458, sum loss: 4186.709961, avg loss: 2.676924, ppl: 14.540305 +epoch: 0, batch: 45459, sum loss: 4972.117188, avg loss: 2.745509, ppl: 15.572544 +epoch: 0, batch: 45460, sum loss: 5183.228516, avg loss: 3.024054, ppl: 20.574533 +epoch: 0, batch: 45461, sum loss: 4869.983398, avg loss: 2.794024, ppl: 16.346663 +epoch: 0, batch: 45462, sum loss: 4615.646973, avg loss: 2.840398, ppl: 17.122580 +epoch: 0, batch: 45463, sum loss: 5677.410156, avg loss: 3.000745, ppl: 20.100512 +epoch: 0, batch: 45464, sum loss: 4968.750000, avg loss: 2.696012, ppl: 14.820510 +epoch: 0, batch: 45465, sum loss: 4703.186035, avg loss: 2.912189, ppl: 18.397034 +epoch: 0, batch: 45466, sum loss: 4841.734375, avg loss: 2.851434, ppl: 17.312586 +epoch: 0, batch: 45467, sum loss: 5376.813965, avg loss: 2.822474, ppl: 16.818417 +epoch: 0, batch: 45468, sum loss: 3808.525879, avg loss: 2.621147, ppl: 13.751483 +epoch: 0, batch: 45469, sum loss: 4548.067383, avg loss: 2.716886, ppl: 15.133128 +epoch: 0, batch: 45470, sum loss: 5439.881348, avg loss: 2.884349, ppl: 17.891909 +epoch: 0, batch: 45471, sum loss: 4708.224609, avg loss: 2.701219, ppl: 14.897879 +epoch: 0, batch: 45472, sum loss: 4980.674316, avg loss: 3.105159, ppl: 22.312757 +epoch: 0, batch: 45473, sum loss: 4731.105469, avg loss: 2.955094, ppl: 19.203529 +epoch: 0, batch: 45474, sum loss: 4072.178223, avg loss: 2.564344, ppl: 12.992135 +epoch: 0, batch: 45475, sum loss: 5296.718750, avg loss: 3.097496, ppl: 22.142443 +epoch: 0, batch: 45476, sum loss: 5112.919922, avg loss: 2.826379, ppl: 16.884218 +epoch: 0, batch: 45477, sum loss: 4563.124512, avg loss: 2.687353, ppl: 14.692728 +epoch: 0, batch: 45478, sum loss: 4142.382812, avg loss: 2.576109, ppl: 13.145884 +epoch: 0, batch: 45479, sum loss: 4322.560547, avg loss: 2.735798, ppl: 15.422044 +epoch: 0, batch: 45480, sum loss: 5025.912109, avg loss: 2.823546, ppl: 16.836451 +epoch: 0, batch: 45481, sum loss: 4459.344238, avg loss: 2.851243, ppl: 17.309284 +epoch: 0, batch: 45482, sum loss: 4966.233398, avg loss: 2.804197, ppl: 16.513815 +epoch: 0, batch: 45483, sum loss: 5109.507324, avg loss: 2.766382, ppl: 15.901000 +epoch: 0, batch: 45484, sum loss: 4279.374512, avg loss: 2.644855, ppl: 14.081397 +epoch: 0, batch: 45485, sum loss: 5234.994629, avg loss: 3.013814, ppl: 20.364923 +epoch: 0, batch: 45486, sum loss: 4574.806152, avg loss: 2.855684, ppl: 17.386330 +epoch: 0, batch: 45487, sum loss: 4700.696777, avg loss: 2.999807, ppl: 20.081654 +epoch: 0, batch: 45488, sum loss: 4548.285156, avg loss: 2.856963, ppl: 17.408575 +epoch: 0, batch: 45489, sum loss: 5658.501465, avg loss: 2.951748, ppl: 19.139387 +epoch: 0, batch: 45490, sum loss: 4945.329590, avg loss: 2.726202, ppl: 15.274756 +epoch: 0, batch: 45491, sum loss: 5203.153320, avg loss: 2.973230, ppl: 19.554987 +epoch: 0, batch: 45492, sum loss: 5357.270508, avg loss: 3.031845, ppl: 20.735460 +epoch: 0, batch: 45493, sum loss: 4485.937500, avg loss: 2.824898, ppl: 16.859221 +epoch: 0, batch: 45494, sum loss: 5323.688477, avg loss: 2.671193, ppl: 14.457211 +epoch: 0, batch: 45495, sum loss: 4905.239258, avg loss: 2.930251, ppl: 18.732325 +epoch: 0, batch: 45496, sum loss: 4762.767578, avg loss: 2.818206, ppl: 16.746773 +epoch: 0, batch: 45497, sum loss: 5097.683594, avg loss: 2.828903, ppl: 16.926886 +epoch: 0, batch: 45498, sum loss: 5016.595215, avg loss: 3.055174, ppl: 21.224876 +epoch: 0, batch: 45499, sum loss: 5515.472168, avg loss: 3.169811, ppl: 23.802998 +epoch: 0, batch: 45500, sum loss: 5453.176758, avg loss: 2.770923, ppl: 15.973376 +epoch: 0, batch: 45501, sum loss: 4125.797852, avg loss: 2.529613, ppl: 12.548643 +epoch: 0, batch: 45502, sum loss: 5225.101562, avg loss: 2.967122, ppl: 19.435900 +epoch: 0, batch: 45503, sum loss: 4303.090820, avg loss: 3.021833, ppl: 20.528894 +epoch: 0, batch: 45504, sum loss: 4861.890625, avg loss: 3.010459, ppl: 20.296707 +epoch: 0, batch: 45505, sum loss: 4240.712891, avg loss: 2.570129, ppl: 13.067513 +epoch: 0, batch: 45506, sum loss: 4792.010742, avg loss: 2.600114, ppl: 13.465278 +epoch: 0, batch: 45507, sum loss: 6638.971680, avg loss: 3.263998, ppl: 26.153879 +epoch: 0, batch: 45508, sum loss: 5220.952637, avg loss: 2.974902, ppl: 19.587698 +epoch: 0, batch: 45509, sum loss: 5982.979492, avg loss: 2.917104, ppl: 18.487661 +epoch: 0, batch: 45510, sum loss: 4967.278320, avg loss: 2.913360, ppl: 18.418579 +epoch: 0, batch: 45511, sum loss: 4888.504883, avg loss: 2.959143, ppl: 19.281448 +epoch: 0, batch: 45512, sum loss: 4695.551758, avg loss: 2.909264, ppl: 18.343290 +epoch: 0, batch: 45513, sum loss: 5285.725586, avg loss: 2.957877, ppl: 19.257040 +epoch: 0, batch: 45514, sum loss: 5497.543457, avg loss: 2.960443, ppl: 19.306528 +epoch: 0, batch: 45515, sum loss: 4219.919922, avg loss: 2.698159, ppl: 14.852356 +epoch: 0, batch: 45516, sum loss: 5192.468262, avg loss: 2.856143, ppl: 17.394312 +epoch: 0, batch: 45517, sum loss: 4138.928711, avg loss: 2.622895, ppl: 13.775550 +epoch: 0, batch: 45518, sum loss: 4087.078125, avg loss: 2.486057, ppl: 12.013816 +epoch: 0, batch: 45519, sum loss: 6186.181152, avg loss: 2.842914, ppl: 17.165716 +epoch: 0, batch: 45520, sum loss: 4657.556152, avg loss: 2.891096, ppl: 18.013048 +epoch: 0, batch: 45521, sum loss: 5230.354492, avg loss: 3.133826, ppl: 22.961653 +epoch: 0, batch: 45522, sum loss: 4927.195312, avg loss: 2.657603, ppl: 14.262055 +epoch: 0, batch: 45523, sum loss: 5925.971191, avg loss: 3.210169, ppl: 24.783264 +epoch: 0, batch: 45524, sum loss: 4480.018555, avg loss: 2.612256, ppl: 13.629759 +epoch: 0, batch: 45525, sum loss: 5271.336914, avg loss: 2.949825, ppl: 19.102608 +epoch: 0, batch: 45526, sum loss: 5356.597168, avg loss: 3.014405, ppl: 20.376959 +epoch: 0, batch: 45527, sum loss: 4689.417480, avg loss: 2.848978, ppl: 17.270124 +epoch: 0, batch: 45528, sum loss: 4741.894043, avg loss: 3.012639, ppl: 20.341009 +epoch: 0, batch: 45529, sum loss: 4662.585938, avg loss: 2.890630, ppl: 18.004644 +epoch: 0, batch: 45530, sum loss: 4242.811523, avg loss: 2.938235, ppl: 18.882490 +epoch: 0, batch: 45531, sum loss: 4616.976074, avg loss: 2.815229, ppl: 16.697006 +epoch: 0, batch: 45532, sum loss: 4059.634033, avg loss: 2.735602, ppl: 15.419028 +epoch: 0, batch: 45533, sum loss: 4628.066895, avg loss: 2.523482, ppl: 12.471952 +epoch: 0, batch: 45534, sum loss: 3882.052490, avg loss: 2.498103, ppl: 12.159410 +epoch: 0, batch: 45535, sum loss: 4212.298340, avg loss: 2.804460, ppl: 16.518147 +epoch: 0, batch: 45536, sum loss: 5027.022949, avg loss: 2.692567, ppl: 14.769543 +epoch: 0, batch: 45537, sum loss: 5490.873047, avg loss: 2.934726, ppl: 18.816357 +epoch: 0, batch: 45538, sum loss: 4940.000000, avg loss: 2.797282, ppl: 16.400011 +epoch: 0, batch: 45539, sum loss: 4908.243164, avg loss: 2.748177, ppl: 15.614135 +epoch: 0, batch: 45540, sum loss: 4392.543945, avg loss: 2.792463, ppl: 16.321163 +epoch: 0, batch: 45541, sum loss: 4410.141113, avg loss: 2.803650, ppl: 16.504774 +epoch: 0, batch: 45542, sum loss: 5158.561035, avg loss: 2.791429, ppl: 16.304302 +epoch: 0, batch: 45543, sum loss: 4365.194336, avg loss: 2.883220, ppl: 17.871723 +epoch: 0, batch: 45544, sum loss: 4248.319824, avg loss: 2.515287, ppl: 12.370160 +epoch: 0, batch: 45545, sum loss: 4893.693848, avg loss: 2.812468, ppl: 16.650959 +epoch: 0, batch: 45546, sum loss: 5091.902832, avg loss: 2.796212, ppl: 16.382479 +epoch: 0, batch: 45547, sum loss: 4875.424316, avg loss: 2.806808, ppl: 16.556976 +epoch: 0, batch: 45548, sum loss: 4111.330566, avg loss: 2.697723, ppl: 14.845895 +epoch: 0, batch: 45549, sum loss: 3928.893555, avg loss: 2.681839, ppl: 14.611933 +epoch: 0, batch: 45550, sum loss: 5248.227539, avg loss: 2.803540, ppl: 16.502972 +epoch: 0, batch: 45551, sum loss: 4816.733398, avg loss: 2.929887, ppl: 18.725506 +epoch: 0, batch: 45552, sum loss: 4797.117188, avg loss: 2.644497, ppl: 14.076362 +epoch: 0, batch: 45553, sum loss: 4889.905762, avg loss: 3.085114, ppl: 21.869961 +epoch: 0, batch: 45554, sum loss: 5548.169922, avg loss: 3.050121, ppl: 21.117895 +epoch: 0, batch: 45555, sum loss: 4692.759766, avg loss: 2.813405, ppl: 16.666576 +epoch: 0, batch: 45556, sum loss: 4597.941406, avg loss: 2.687283, ppl: 14.691709 +epoch: 0, batch: 45557, sum loss: 5046.025879, avg loss: 2.817435, ppl: 16.733870 +epoch: 0, batch: 45558, sum loss: 4538.140625, avg loss: 2.599164, ppl: 13.452491 +epoch: 0, batch: 45559, sum loss: 4314.878418, avg loss: 2.619841, ppl: 13.733541 +epoch: 0, batch: 45560, sum loss: 3890.366211, avg loss: 2.616251, ppl: 13.684331 +epoch: 0, batch: 45561, sum loss: 5028.710938, avg loss: 2.730028, ppl: 15.333311 +epoch: 0, batch: 45562, sum loss: 4789.372559, avg loss: 2.730543, ppl: 15.341213 +epoch: 0, batch: 45563, sum loss: 4876.507812, avg loss: 2.721266, ppl: 15.199546 +epoch: 0, batch: 45564, sum loss: 4438.022949, avg loss: 2.699527, ppl: 14.872699 +epoch: 0, batch: 45565, sum loss: 4124.377441, avg loss: 2.468209, ppl: 11.801295 +epoch: 0, batch: 45566, sum loss: 4820.786133, avg loss: 2.822474, ppl: 16.818413 +epoch: 0, batch: 45567, sum loss: 5408.160645, avg loss: 2.940816, ppl: 18.931292 +epoch: 0, batch: 45568, sum loss: 4125.300293, avg loss: 2.551206, ppl: 12.822560 +epoch: 0, batch: 45569, sum loss: 5098.973633, avg loss: 2.949088, ppl: 19.088543 +epoch: 0, batch: 45570, sum loss: 4740.876953, avg loss: 2.647056, ppl: 14.112425 +epoch: 0, batch: 45571, sum loss: 6197.130371, avg loss: 3.100115, ppl: 22.200510 +epoch: 0, batch: 45572, sum loss: 5211.767578, avg loss: 2.885807, ppl: 17.918022 +epoch: 0, batch: 45573, sum loss: 4116.444824, avg loss: 2.694008, ppl: 14.790844 +epoch: 0, batch: 45574, sum loss: 5256.719238, avg loss: 2.812584, ppl: 16.652889 +epoch: 0, batch: 45575, sum loss: 4857.510742, avg loss: 2.854002, ppl: 17.357098 +epoch: 0, batch: 45576, sum loss: 4967.410645, avg loss: 2.930626, ppl: 18.739351 +epoch: 0, batch: 45577, sum loss: 5658.598633, avg loss: 3.011495, ppl: 20.317749 +epoch: 0, batch: 45578, sum loss: 4777.669922, avg loss: 2.940104, ppl: 18.917822 +epoch: 0, batch: 45579, sum loss: 4884.010742, avg loss: 2.903692, ppl: 18.241377 +epoch: 0, batch: 45580, sum loss: 4776.505859, avg loss: 2.760986, ppl: 15.815431 +epoch: 0, batch: 45581, sum loss: 5006.050293, avg loss: 2.934379, ppl: 18.809816 +epoch: 0, batch: 45582, sum loss: 4136.784180, avg loss: 2.791353, ppl: 16.303055 +epoch: 0, batch: 45583, sum loss: 5250.865234, avg loss: 2.940014, ppl: 18.916113 +epoch: 0, batch: 45584, sum loss: 5520.077148, avg loss: 2.839546, ppl: 17.108000 +epoch: 0, batch: 45585, sum loss: 5218.337891, avg loss: 2.958241, ppl: 19.264065 +epoch: 0, batch: 45586, sum loss: 4782.670410, avg loss: 2.821635, ppl: 16.804295 +epoch: 0, batch: 45587, sum loss: 4817.838867, avg loss: 3.011149, ppl: 20.310726 +epoch: 0, batch: 45588, sum loss: 5618.210938, avg loss: 3.260715, ppl: 26.068157 +epoch: 0, batch: 45589, sum loss: 5090.030762, avg loss: 3.044277, ppl: 20.994846 +epoch: 0, batch: 45590, sum loss: 5053.202637, avg loss: 2.784134, ppl: 16.185789 +epoch: 0, batch: 45591, sum loss: 4368.392578, avg loss: 2.768310, ppl: 15.931681 +epoch: 0, batch: 45592, sum loss: 3761.390137, avg loss: 2.447228, ppl: 11.556273 +epoch: 0, batch: 45593, sum loss: 4721.150391, avg loss: 2.749651, ppl: 15.637169 +epoch: 0, batch: 45594, sum loss: 3700.637939, avg loss: 2.555689, ppl: 12.880176 +epoch: 0, batch: 45595, sum loss: 4547.540039, avg loss: 2.810593, ppl: 16.619778 +epoch: 0, batch: 45596, sum loss: 4089.028564, avg loss: 2.612798, ppl: 13.637151 +epoch: 0, batch: 45597, sum loss: 5230.081055, avg loss: 2.708483, ppl: 15.006496 +epoch: 0, batch: 45598, sum loss: 5290.212402, avg loss: 2.851866, ppl: 17.320080 +epoch: 0, batch: 45599, sum loss: 4764.103516, avg loss: 2.636471, ppl: 13.963839 +epoch: 0, batch: 45600, sum loss: 4414.171387, avg loss: 2.649563, ppl: 14.147849 +epoch: 0, batch: 45601, sum loss: 4967.957520, avg loss: 2.964175, ppl: 19.378714 +epoch: 0, batch: 45602, sum loss: 4251.959473, avg loss: 2.817733, ppl: 16.738865 +epoch: 0, batch: 45603, sum loss: 5203.225586, avg loss: 2.995524, ppl: 19.995844 +epoch: 0, batch: 45604, sum loss: 3784.216064, avg loss: 2.633414, ppl: 13.921220 +epoch: 0, batch: 45605, sum loss: 4939.740234, avg loss: 2.753479, ppl: 15.697140 +epoch: 0, batch: 45606, sum loss: 4374.187988, avg loss: 2.906437, ppl: 18.291517 +epoch: 0, batch: 45607, sum loss: 4940.832031, avg loss: 2.720722, ppl: 15.191293 +epoch: 0, batch: 45608, sum loss: 4557.003906, avg loss: 2.715735, ppl: 15.115723 +epoch: 0, batch: 45609, sum loss: 5010.110352, avg loss: 2.916246, ppl: 18.471813 +epoch: 0, batch: 45610, sum loss: 5040.242676, avg loss: 2.932078, ppl: 18.766594 +epoch: 0, batch: 45611, sum loss: 4063.778809, avg loss: 2.572012, ppl: 13.092138 +epoch: 0, batch: 45612, sum loss: 5551.286133, avg loss: 3.008827, ppl: 20.263620 +epoch: 0, batch: 45613, sum loss: 4641.048828, avg loss: 2.784072, ppl: 16.184797 +epoch: 0, batch: 45614, sum loss: 5743.918945, avg loss: 2.890749, ppl: 18.006796 +epoch: 0, batch: 45615, sum loss: 4842.996582, avg loss: 2.906961, ppl: 18.301092 +epoch: 0, batch: 45616, sum loss: 4798.535645, avg loss: 2.764133, ppl: 15.865286 +epoch: 0, batch: 45617, sum loss: 5389.024902, avg loss: 2.861936, ppl: 17.495359 +epoch: 0, batch: 45618, sum loss: 3905.727539, avg loss: 2.505278, ppl: 12.246959 +epoch: 0, batch: 45619, sum loss: 4604.611328, avg loss: 2.901457, ppl: 18.200638 +epoch: 0, batch: 45620, sum loss: 4774.525879, avg loss: 2.695949, ppl: 14.819577 +epoch: 0, batch: 45621, sum loss: 5208.539062, avg loss: 3.031746, ppl: 20.733395 +epoch: 0, batch: 45622, sum loss: 4731.852051, avg loss: 3.048874, ppl: 21.091579 +epoch: 0, batch: 45623, sum loss: 4256.794434, avg loss: 2.806061, ppl: 16.544617 +epoch: 0, batch: 45624, sum loss: 4150.878906, avg loss: 2.541873, ppl: 12.703445 +epoch: 0, batch: 45625, sum loss: 4279.621582, avg loss: 2.545878, ppl: 12.754427 +epoch: 0, batch: 45626, sum loss: 4959.600586, avg loss: 2.764549, ppl: 15.871877 +epoch: 0, batch: 45627, sum loss: 5265.416016, avg loss: 3.093664, ppl: 22.057749 +epoch: 0, batch: 45628, sum loss: 4622.873535, avg loss: 2.764877, ppl: 15.877081 +epoch: 0, batch: 45629, sum loss: 3553.928711, avg loss: 2.545794, ppl: 12.753353 +epoch: 0, batch: 45630, sum loss: 5645.888672, avg loss: 2.988824, ppl: 19.862312 +epoch: 0, batch: 45631, sum loss: 5210.616699, avg loss: 3.006703, ppl: 20.220625 +epoch: 0, batch: 45632, sum loss: 5718.664551, avg loss: 2.845107, ppl: 17.203396 +epoch: 0, batch: 45633, sum loss: 4766.329102, avg loss: 2.830362, ppl: 16.951593 +epoch: 0, batch: 45634, sum loss: 4742.545410, avg loss: 2.766946, ppl: 15.909972 +epoch: 0, batch: 45635, sum loss: 5396.513672, avg loss: 2.843263, ppl: 17.171713 +epoch: 0, batch: 45636, sum loss: 5461.569336, avg loss: 2.779425, ppl: 16.109751 +epoch: 0, batch: 45637, sum loss: 5087.475586, avg loss: 2.845344, ppl: 17.207481 +epoch: 0, batch: 45638, sum loss: 5336.447754, avg loss: 3.100783, ppl: 22.215342 +epoch: 0, batch: 45639, sum loss: 3854.889160, avg loss: 2.688207, ppl: 14.705288 +epoch: 0, batch: 45640, sum loss: 4246.913086, avg loss: 2.781214, ppl: 16.138594 +epoch: 0, batch: 45641, sum loss: 5157.013184, avg loss: 2.885850, ppl: 17.918787 +epoch: 0, batch: 45642, sum loss: 5080.508301, avg loss: 2.612087, ppl: 13.627456 +epoch: 0, batch: 45643, sum loss: 4222.554199, avg loss: 2.675890, ppl: 14.525268 +epoch: 0, batch: 45644, sum loss: 4738.209473, avg loss: 2.810326, ppl: 16.615332 +epoch: 0, batch: 45645, sum loss: 5740.219238, avg loss: 3.091125, ppl: 22.001816 +epoch: 0, batch: 45646, sum loss: 5235.824219, avg loss: 2.903951, ppl: 18.246101 +epoch: 0, batch: 45647, sum loss: 5183.355957, avg loss: 2.751251, ppl: 15.662210 +epoch: 0, batch: 45648, sum loss: 4364.017090, avg loss: 2.521096, ppl: 12.442223 +epoch: 0, batch: 45649, sum loss: 5658.609863, avg loss: 2.882634, ppl: 17.861252 +epoch: 0, batch: 45650, sum loss: 4823.894531, avg loss: 2.832586, ppl: 16.989344 +epoch: 0, batch: 45651, sum loss: 4498.328125, avg loss: 2.885393, ppl: 17.910612 +epoch: 0, batch: 45652, sum loss: 4145.719238, avg loss: 2.690279, ppl: 14.735780 +epoch: 0, batch: 45653, sum loss: 4831.563477, avg loss: 2.779956, ppl: 16.118313 +epoch: 0, batch: 45654, sum loss: 4315.874023, avg loss: 2.815313, ppl: 16.698395 +epoch: 0, batch: 45655, sum loss: 4172.267090, avg loss: 2.774114, ppl: 16.024418 +epoch: 0, batch: 45656, sum loss: 5315.404785, avg loss: 3.051323, ppl: 21.143297 +epoch: 0, batch: 45657, sum loss: 4521.836426, avg loss: 2.787815, ppl: 16.245489 +epoch: 0, batch: 45658, sum loss: 4562.219238, avg loss: 2.740072, ppl: 15.488096 +epoch: 0, batch: 45659, sum loss: 4373.245117, avg loss: 2.764377, ppl: 15.869156 +epoch: 0, batch: 45660, sum loss: 4241.717285, avg loss: 2.827811, ppl: 16.908417 +epoch: 0, batch: 45661, sum loss: 5167.055176, avg loss: 2.751360, ppl: 15.663927 +epoch: 0, batch: 45662, sum loss: 5587.231445, avg loss: 2.862311, ppl: 17.501934 +epoch: 0, batch: 45663, sum loss: 4306.005859, avg loss: 2.805216, ppl: 16.530640 +epoch: 0, batch: 45664, sum loss: 4515.102539, avg loss: 2.748084, ppl: 15.612694 +epoch: 0, batch: 45665, sum loss: 4398.780762, avg loss: 2.805345, ppl: 16.532776 +epoch: 0, batch: 45666, sum loss: 5231.753418, avg loss: 3.103057, ppl: 22.265907 +epoch: 0, batch: 45667, sum loss: 4444.287598, avg loss: 2.988761, ppl: 19.861059 +epoch: 0, batch: 45668, sum loss: 4427.747070, avg loss: 2.871431, ppl: 17.662281 +epoch: 0, batch: 45669, sum loss: 3888.115723, avg loss: 2.815435, ppl: 16.700438 +epoch: 0, batch: 45670, sum loss: 4735.545898, avg loss: 2.756429, ppl: 15.743529 +epoch: 0, batch: 45671, sum loss: 5060.948242, avg loss: 2.740091, ppl: 15.488400 +epoch: 0, batch: 45672, sum loss: 4788.913086, avg loss: 2.852241, ppl: 17.326572 +epoch: 0, batch: 45673, sum loss: 3940.631836, avg loss: 2.486203, ppl: 12.015566 +epoch: 0, batch: 45674, sum loss: 5136.339355, avg loss: 2.963843, ppl: 19.372269 +epoch: 0, batch: 45675, sum loss: 5379.937500, avg loss: 2.797679, ppl: 16.406527 +epoch: 0, batch: 45676, sum loss: 4284.471680, avg loss: 2.885166, ppl: 17.906538 +epoch: 0, batch: 45677, sum loss: 4366.256836, avg loss: 2.793510, ppl: 16.338274 +epoch: 0, batch: 45678, sum loss: 4466.392578, avg loss: 2.653828, ppl: 14.208323 +epoch: 0, batch: 45679, sum loss: 4564.032227, avg loss: 2.971375, ppl: 19.518744 +epoch: 0, batch: 45680, sum loss: 5336.967285, avg loss: 3.228655, ppl: 25.245687 +epoch: 0, batch: 45681, sum loss: 5626.535156, avg loss: 2.900276, ppl: 18.179161 +epoch: 0, batch: 45682, sum loss: 4911.080078, avg loss: 2.937249, ppl: 18.863880 +epoch: 0, batch: 45683, sum loss: 4108.238281, avg loss: 2.615047, ppl: 13.667855 +epoch: 0, batch: 45684, sum loss: 5637.128418, avg loss: 3.070331, ppl: 21.549042 +epoch: 0, batch: 45685, sum loss: 3891.004395, avg loss: 2.553153, ppl: 12.847543 +epoch: 0, batch: 45686, sum loss: 4864.477051, avg loss: 2.861457, ppl: 17.486994 +epoch: 0, batch: 45687, sum loss: 4763.130859, avg loss: 2.698658, ppl: 14.859773 +epoch: 0, batch: 45688, sum loss: 3470.829590, avg loss: 2.377281, ppl: 10.775561 +epoch: 0, batch: 45689, sum loss: 4754.286621, avg loss: 2.735493, ppl: 15.417341 +epoch: 0, batch: 45690, sum loss: 4899.977539, avg loss: 2.982336, ppl: 19.733852 +epoch: 0, batch: 45691, sum loss: 4467.451660, avg loss: 2.651307, ppl: 14.172544 +epoch: 0, batch: 45692, sum loss: 4232.577148, avg loss: 2.931148, ppl: 18.749134 +epoch: 0, batch: 45693, sum loss: 5548.401367, avg loss: 2.954420, ppl: 19.190596 +epoch: 0, batch: 45694, sum loss: 4173.314941, avg loss: 2.754663, ppl: 15.715751 +epoch: 0, batch: 45695, sum loss: 4479.827148, avg loss: 2.882772, ppl: 17.863718 +epoch: 0, batch: 45696, sum loss: 4810.836426, avg loss: 2.872141, ppl: 17.674826 +epoch: 0, batch: 45697, sum loss: 5389.287109, avg loss: 2.974220, ppl: 19.574354 +epoch: 0, batch: 45698, sum loss: 4786.037598, avg loss: 3.011981, ppl: 20.327625 +epoch: 0, batch: 45699, sum loss: 5018.896484, avg loss: 2.864667, ppl: 17.543213 +epoch: 0, batch: 45700, sum loss: 5190.644531, avg loss: 2.974581, ppl: 19.581421 +epoch: 0, batch: 45701, sum loss: 3863.456055, avg loss: 2.757642, ppl: 15.762628 +epoch: 0, batch: 45702, sum loss: 5957.725586, avg loss: 3.133996, ppl: 22.965555 +epoch: 0, batch: 45703, sum loss: 4386.542480, avg loss: 2.924362, ppl: 18.622335 +epoch: 0, batch: 45704, sum loss: 4523.688477, avg loss: 2.689470, ppl: 14.723867 +epoch: 0, batch: 45705, sum loss: 4594.860352, avg loss: 2.871788, ppl: 17.668573 +epoch: 0, batch: 45706, sum loss: 4686.356445, avg loss: 2.892813, ppl: 18.043991 +epoch: 0, batch: 45707, sum loss: 3768.322266, avg loss: 2.644437, ppl: 14.075513 +epoch: 0, batch: 45708, sum loss: 4594.000977, avg loss: 2.782557, ppl: 16.160286 +epoch: 0, batch: 45709, sum loss: 4077.885254, avg loss: 2.667028, ppl: 14.397113 +epoch: 0, batch: 45710, sum loss: 4251.567383, avg loss: 2.605127, ppl: 13.532946 +epoch: 0, batch: 45711, sum loss: 4553.812012, avg loss: 2.797182, ppl: 16.398369 +epoch: 0, batch: 45712, sum loss: 4476.418945, avg loss: 2.778659, ppl: 16.097414 +epoch: 0, batch: 45713, sum loss: 4342.420898, avg loss: 2.729366, ppl: 15.323167 +epoch: 0, batch: 45714, sum loss: 4938.883789, avg loss: 2.768433, ppl: 15.933644 +epoch: 0, batch: 45715, sum loss: 4101.063965, avg loss: 2.496083, ppl: 12.134863 +epoch: 0, batch: 45716, sum loss: 5410.074219, avg loss: 2.891542, ppl: 18.021072 +epoch: 0, batch: 45717, sum loss: 5648.686523, avg loss: 2.963634, ppl: 19.368229 +epoch: 0, batch: 45718, sum loss: 4062.404785, avg loss: 2.489219, ppl: 12.051856 +epoch: 0, batch: 45719, sum loss: 4480.894531, avg loss: 2.654558, ppl: 14.218706 +epoch: 0, batch: 45720, sum loss: 5100.668457, avg loss: 2.693067, ppl: 14.776925 +epoch: 0, batch: 45721, sum loss: 4705.596680, avg loss: 2.696617, ppl: 14.829477 +epoch: 0, batch: 45722, sum loss: 4908.045898, avg loss: 2.850201, ppl: 17.291256 +epoch: 0, batch: 45723, sum loss: 3857.200928, avg loss: 2.640110, ppl: 14.014749 +epoch: 0, batch: 45724, sum loss: 5065.832520, avg loss: 3.092694, ppl: 22.036354 +epoch: 0, batch: 45725, sum loss: 5822.100586, avg loss: 3.108436, ppl: 22.386007 +epoch: 0, batch: 45726, sum loss: 4390.708008, avg loss: 2.605762, ppl: 13.541533 +epoch: 0, batch: 45727, sum loss: 3686.869629, avg loss: 2.855825, ppl: 17.388771 +epoch: 0, batch: 45728, sum loss: 4475.314941, avg loss: 2.777973, ppl: 16.086388 +epoch: 0, batch: 45729, sum loss: 4814.596680, avg loss: 2.643930, ppl: 14.068387 +epoch: 0, batch: 45730, sum loss: 5579.096680, avg loss: 3.197190, ppl: 24.463692 +epoch: 0, batch: 45731, sum loss: 4527.475586, avg loss: 2.932303, ppl: 18.770809 +epoch: 0, batch: 45732, sum loss: 4704.826172, avg loss: 2.893497, ppl: 18.056343 +epoch: 0, batch: 45733, sum loss: 4921.536133, avg loss: 2.834986, ppl: 17.030165 +epoch: 0, batch: 45734, sum loss: 5319.280273, avg loss: 2.886207, ppl: 17.925196 +epoch: 0, batch: 45735, sum loss: 5263.875977, avg loss: 2.980677, ppl: 19.701153 +epoch: 0, batch: 45736, sum loss: 5362.416016, avg loss: 2.936701, ppl: 18.853546 +epoch: 0, batch: 45737, sum loss: 5337.968750, avg loss: 2.970489, ppl: 19.501453 +epoch: 0, batch: 45738, sum loss: 5791.607422, avg loss: 2.983826, ppl: 19.763294 +epoch: 0, batch: 45739, sum loss: 4267.532227, avg loss: 2.778341, ppl: 16.092306 +epoch: 0, batch: 45740, sum loss: 4227.582520, avg loss: 2.903559, ppl: 18.238951 +epoch: 0, batch: 45741, sum loss: 5280.150391, avg loss: 2.765925, ppl: 15.893734 +epoch: 0, batch: 45742, sum loss: 5289.979980, avg loss: 2.942147, ppl: 18.956497 +epoch: 0, batch: 45743, sum loss: 5607.606934, avg loss: 3.060921, ppl: 21.347208 +epoch: 0, batch: 45744, sum loss: 4887.386719, avg loss: 2.885116, ppl: 17.905647 +epoch: 0, batch: 45745, sum loss: 3896.858398, avg loss: 2.779500, ppl: 16.110956 +epoch: 0, batch: 45746, sum loss: 4557.188477, avg loss: 2.746949, ppl: 15.594978 +epoch: 0, batch: 45747, sum loss: 5341.021973, avg loss: 2.960655, ppl: 19.310619 +epoch: 0, batch: 45748, sum loss: 5205.280762, avg loss: 2.901494, ppl: 18.201323 +epoch: 0, batch: 45749, sum loss: 4903.496094, avg loss: 2.750138, ppl: 15.644792 +epoch: 0, batch: 45750, sum loss: 5001.065430, avg loss: 2.943535, ppl: 18.982830 +epoch: 0, batch: 45751, sum loss: 5632.472656, avg loss: 2.884011, ppl: 17.885862 +epoch: 0, batch: 45752, sum loss: 4006.533203, avg loss: 2.663918, ppl: 14.352415 +epoch: 0, batch: 45753, sum loss: 4705.874512, avg loss: 2.802784, ppl: 16.490492 +epoch: 0, batch: 45754, sum loss: 5197.088379, avg loss: 3.196241, ppl: 24.440489 +epoch: 0, batch: 45755, sum loss: 4601.767090, avg loss: 2.635606, ppl: 13.951758 +epoch: 0, batch: 45756, sum loss: 4960.685547, avg loss: 3.065937, ppl: 21.454546 +epoch: 0, batch: 45757, sum loss: 5230.888184, avg loss: 2.917394, ppl: 18.493040 +epoch: 0, batch: 45758, sum loss: 5170.698242, avg loss: 2.956374, ppl: 19.228123 +epoch: 0, batch: 45759, sum loss: 5146.569336, avg loss: 2.843409, ppl: 17.174206 +epoch: 0, batch: 45760, sum loss: 4497.970703, avg loss: 2.907544, ppl: 18.311771 +epoch: 0, batch: 45761, sum loss: 4705.223633, avg loss: 2.893741, ppl: 18.060755 +epoch: 0, batch: 45762, sum loss: 4986.344727, avg loss: 2.741256, ppl: 15.506449 +epoch: 0, batch: 45763, sum loss: 4340.000488, avg loss: 2.796392, ppl: 16.385426 +epoch: 0, batch: 45764, sum loss: 4808.508789, avg loss: 2.713605, ppl: 15.083560 +epoch: 0, batch: 45765, sum loss: 4472.268555, avg loss: 2.770922, ppl: 15.973361 +epoch: 0, batch: 45766, sum loss: 4565.746094, avg loss: 2.841161, ppl: 17.135653 +epoch: 0, batch: 45767, sum loss: 4648.998047, avg loss: 2.718712, ppl: 15.160788 +epoch: 0, batch: 45768, sum loss: 5515.002930, avg loss: 2.922630, ppl: 18.590117 +epoch: 0, batch: 45769, sum loss: 5058.626953, avg loss: 2.859597, ppl: 17.454491 +epoch: 0, batch: 45770, sum loss: 4044.303467, avg loss: 2.678347, ppl: 14.560999 +epoch: 0, batch: 45771, sum loss: 4519.067383, avg loss: 2.750497, ppl: 15.650414 +epoch: 0, batch: 45772, sum loss: 4911.563477, avg loss: 2.719581, ppl: 15.173965 +epoch: 0, batch: 45773, sum loss: 4365.934570, avg loss: 2.634843, ppl: 13.941122 +epoch: 0, batch: 45774, sum loss: 5303.810547, avg loss: 2.904606, ppl: 18.258051 +pass_id = 0 time_consumed = 26388.6190331 +epoch: 1, batch: 0, sum loss: 5060.735352, avg loss: 2.817781, ppl: 16.739672 +epoch: 1, batch: 1, sum loss: 5196.693359, avg loss: 2.927714, ppl: 18.684874 +epoch: 1, batch: 2, sum loss: 3898.017822, avg loss: 2.442367, ppl: 11.500231 +epoch: 1, batch: 3, sum loss: 4818.640137, avg loss: 2.965317, ppl: 19.400852 +epoch: 1, batch: 4, sum loss: 4386.920898, avg loss: 2.810327, ppl: 16.615355 +epoch: 1, batch: 5, sum loss: 5360.865234, avg loss: 2.991554, ppl: 19.916618 +epoch: 1, batch: 6, sum loss: 4477.905273, avg loss: 2.809225, ppl: 16.597055 +epoch: 1, batch: 7, sum loss: 5129.081543, avg loss: 2.930904, ppl: 18.744562 +epoch: 1, batch: 8, sum loss: 6282.841797, avg loss: 2.946924, ppl: 19.047274 +epoch: 1, batch: 9, sum loss: 3550.524414, avg loss: 2.565408, ppl: 13.005960 +epoch: 1, batch: 10, sum loss: 4398.693848, avg loss: 2.740619, ppl: 15.496577 +epoch: 1, batch: 11, sum loss: 4605.036621, avg loss: 2.849651, ppl: 17.281755 +epoch: 1, batch: 12, sum loss: 5795.287598, avg loss: 3.016808, ppl: 20.425983 +epoch: 1, batch: 13, sum loss: 5726.727539, avg loss: 3.077231, ppl: 21.698246 +epoch: 1, batch: 14, sum loss: 4160.286133, avg loss: 2.643130, ppl: 14.057128 +epoch: 1, batch: 15, sum loss: 5751.770508, avg loss: 2.966359, ppl: 19.421085 +epoch: 1, batch: 16, sum loss: 4083.345703, avg loss: 2.849509, ppl: 17.279291 +epoch: 1, batch: 17, sum loss: 5721.232910, avg loss: 3.023907, ppl: 20.571516 +epoch: 1, batch: 18, sum loss: 4075.772217, avg loss: 2.626142, ppl: 13.820349 +epoch: 1, batch: 19, sum loss: 3990.370117, avg loss: 2.596207, ppl: 13.412768 +epoch: 1, batch: 20, sum loss: 5142.294434, avg loss: 3.109005, ppl: 22.398752 +epoch: 1, batch: 21, sum loss: 4077.177246, avg loss: 2.559433, ppl: 12.928488 +epoch: 1, batch: 22, sum loss: 5645.179688, avg loss: 2.889038, ppl: 17.976002 +epoch: 1, batch: 23, sum loss: 5281.257812, avg loss: 3.066932, ppl: 21.475924 +epoch: 1, batch: 24, sum loss: 4261.425781, avg loss: 2.666725, ppl: 14.392751 +epoch: 1, batch: 25, sum loss: 4769.098633, avg loss: 2.949350, ppl: 19.093536 +epoch: 1, batch: 26, sum loss: 4312.577148, avg loss: 2.822367, ppl: 16.816612 +epoch: 1, batch: 27, sum loss: 4009.812988, avg loss: 2.510841, ppl: 12.315282 +epoch: 1, batch: 28, sum loss: 5429.818359, avg loss: 2.817757, ppl: 16.739269 +epoch: 1, batch: 29, sum loss: 5438.377930, avg loss: 3.014622, ppl: 20.381384 +epoch: 1, batch: 30, sum loss: 5152.975098, avg loss: 2.878757, ppl: 17.792143 +epoch: 1, batch: 31, sum loss: 5255.409668, avg loss: 2.934344, ppl: 18.809153 +epoch: 1, batch: 32, sum loss: 5385.583984, avg loss: 2.930133, ppl: 18.730114 +epoch: 1, batch: 33, sum loss: 4434.857422, avg loss: 2.776993, ppl: 16.070621 +epoch: 1, batch: 34, sum loss: 5036.272949, avg loss: 2.827778, ppl: 16.907856 +epoch: 1, batch: 35, sum loss: 5232.500488, avg loss: 2.811661, ppl: 16.637526 +epoch: 1, batch: 36, sum loss: 4593.430664, avg loss: 2.547660, ppl: 12.777168 +epoch: 1, batch: 37, sum loss: 4453.854492, avg loss: 2.815331, ppl: 16.698709 +epoch: 1, batch: 38, sum loss: 4505.199707, avg loss: 2.727118, ppl: 15.288769 +epoch: 1, batch: 39, sum loss: 4700.245117, avg loss: 2.867752, ppl: 17.597408 +epoch: 1, batch: 40, sum loss: 5354.469238, avg loss: 3.212039, ppl: 24.829662 +epoch: 1, batch: 41, sum loss: 4568.541016, avg loss: 2.532451, ppl: 12.584309 +epoch: 1, batch: 42, sum loss: 3998.575439, avg loss: 2.729403, ppl: 15.323736 +epoch: 1, batch: 43, sum loss: 5685.836914, avg loss: 3.107015, ppl: 22.354216 +epoch: 1, batch: 44, sum loss: 4423.332520, avg loss: 2.695510, ppl: 14.813077 +epoch: 1, batch: 45, sum loss: 3920.114990, avg loss: 2.645152, ppl: 14.085584 +epoch: 1, batch: 46, sum loss: 5643.618164, avg loss: 3.247191, ppl: 25.718002 +epoch: 1, batch: 47, sum loss: 5610.518555, avg loss: 3.134368, ppl: 22.974104 +epoch: 1, batch: 48, sum loss: 4848.747070, avg loss: 2.823965, ppl: 16.843496 +epoch: 1, batch: 49, sum loss: 5013.055664, avg loss: 2.935044, ppl: 18.822336 +epoch: 1, batch: 50, sum loss: 4397.749512, avg loss: 2.679920, ppl: 14.583933 +epoch: 1, batch: 51, sum loss: 6030.434082, avg loss: 3.031892, ppl: 20.736435 +epoch: 1, batch: 52, sum loss: 5508.449219, avg loss: 2.899184, ppl: 18.159321 +epoch: 1, batch: 53, sum loss: 4217.704102, avg loss: 2.800601, ppl: 16.454525 +epoch: 1, batch: 54, sum loss: 4728.329102, avg loss: 2.883127, ppl: 17.870073 +epoch: 1, batch: 55, sum loss: 5093.555664, avg loss: 2.842386, ppl: 17.156652 +epoch: 1, batch: 56, sum loss: 4548.464844, avg loss: 2.617068, ppl: 13.695514 +epoch: 1, batch: 57, sum loss: 4962.179688, avg loss: 2.756767, ppl: 15.748837 +epoch: 1, batch: 58, sum loss: 5797.327637, avg loss: 3.301440, ppl: 27.151697 +epoch: 1, batch: 59, sum loss: 4185.127930, avg loss: 2.803167, ppl: 16.496803 +epoch: 1, batch: 60, sum loss: 4186.065918, avg loss: 2.664587, ppl: 14.362017 +epoch: 1, batch: 61, sum loss: 3899.248535, avg loss: 2.704056, ppl: 14.940207 +epoch: 1, batch: 62, sum loss: 4159.819336, avg loss: 2.680296, ppl: 14.589414 +epoch: 1, batch: 63, sum loss: 4962.726562, avg loss: 2.902179, ppl: 18.213795 +epoch: 1, batch: 64, sum loss: 4630.744629, avg loss: 3.028610, ppl: 20.668484 +epoch: 1, batch: 65, sum loss: 4588.420898, avg loss: 2.711833, ppl: 15.056846 +epoch: 1, batch: 66, sum loss: 4754.401855, avg loss: 2.765795, ppl: 15.891672 +epoch: 1, batch: 67, sum loss: 4596.985352, avg loss: 2.855270, ppl: 17.379135 +epoch: 1, batch: 68, sum loss: 4795.237305, avg loss: 3.006418, ppl: 20.214870 +epoch: 1, batch: 69, sum loss: 4526.869141, avg loss: 2.847087, ppl: 17.237503 +epoch: 1, batch: 70, sum loss: 3788.684814, avg loss: 2.649430, ppl: 14.145973 +epoch: 1, batch: 71, sum loss: 4707.751953, avg loss: 2.743445, ppl: 15.540432 +epoch: 1, batch: 72, sum loss: 3517.607666, avg loss: 2.601781, ppl: 13.487737 +epoch: 1, batch: 73, sum loss: 3988.750244, avg loss: 2.851144, ppl: 17.307568 +epoch: 1, batch: 74, sum loss: 5944.192871, avg loss: 3.110514, ppl: 22.432581 +epoch: 1, batch: 75, sum loss: 4245.879395, avg loss: 2.688967, ppl: 14.716470 +epoch: 1, batch: 76, sum loss: 4331.041016, avg loss: 2.838166, ppl: 17.084404 +epoch: 1, batch: 77, sum loss: 4225.668457, avg loss: 2.667720, ppl: 14.407084 +epoch: 1, batch: 78, sum loss: 4754.884766, avg loss: 2.737412, ppl: 15.446959 +epoch: 1, batch: 79, sum loss: 4230.658203, avg loss: 2.889794, ppl: 17.989601 +epoch: 1, batch: 80, sum loss: 5274.778320, avg loss: 2.970033, ppl: 19.492561 +epoch: 1, batch: 81, sum loss: 4033.024414, avg loss: 2.648079, ppl: 14.126877 +epoch: 1, batch: 82, sum loss: 4803.972656, avg loss: 2.824205, ppl: 16.847544 +epoch: 1, batch: 83, sum loss: 3918.123779, avg loss: 2.908778, ppl: 18.334379 +epoch: 1, batch: 84, sum loss: 4824.839844, avg loss: 2.826503, ppl: 16.886299 +epoch: 1, batch: 85, sum loss: 4008.373291, avg loss: 2.579391, ppl: 13.189100 +epoch: 1, batch: 86, sum loss: 3915.115967, avg loss: 2.390181, ppl: 10.915465 +epoch: 1, batch: 87, sum loss: 4004.611816, avg loss: 2.632881, ppl: 13.913796 +epoch: 1, batch: 88, sum loss: 4785.684570, avg loss: 2.734677, ppl: 15.404764 +epoch: 1, batch: 89, sum loss: 5309.332520, avg loss: 2.991173, ppl: 19.909023 +epoch: 1, batch: 90, sum loss: 5669.742676, avg loss: 3.091463, ppl: 22.009245 +epoch: 1, batch: 91, sum loss: 4837.359863, avg loss: 2.783291, ppl: 16.172157 +epoch: 1, batch: 92, sum loss: 4948.528320, avg loss: 2.711522, ppl: 15.052173 +epoch: 1, batch: 93, sum loss: 3820.509766, avg loss: 2.690500, ppl: 14.739044 +epoch: 1, batch: 94, sum loss: 5001.377930, avg loss: 2.923073, ppl: 18.598358 +epoch: 1, batch: 95, sum loss: 3926.864746, avg loss: 2.509179, ppl: 12.294827 +epoch: 1, batch: 96, sum loss: 5134.009277, avg loss: 3.005860, ppl: 20.203585 +epoch: 1, batch: 97, sum loss: 5258.714844, avg loss: 2.924758, ppl: 18.629717 +epoch: 1, batch: 98, sum loss: 5373.773926, avg loss: 2.801759, ppl: 16.473598 +epoch: 1, batch: 99, sum loss: 4407.769043, avg loss: 2.686026, ppl: 14.673250 +epoch: 1, batch: 100, sum loss: 4857.895508, avg loss: 2.675052, ppl: 14.513110 +epoch: 1, batch: 101, sum loss: 4385.664062, avg loss: 2.834948, ppl: 17.029512 +epoch: 1, batch: 102, sum loss: 4730.516602, avg loss: 2.844568, ppl: 17.194130 +epoch: 1, batch: 103, sum loss: 4818.327148, avg loss: 3.112615, ppl: 22.479744 +epoch: 1, batch: 104, sum loss: 6068.947754, avg loss: 2.887225, ppl: 17.943455 +epoch: 1, batch: 105, sum loss: 3996.113281, avg loss: 2.703730, ppl: 14.935342 +epoch: 1, batch: 106, sum loss: 5502.450195, avg loss: 3.020006, ppl: 20.491409 +epoch: 1, batch: 107, sum loss: 5468.785645, avg loss: 3.003177, ppl: 20.149454 +epoch: 1, batch: 108, sum loss: 4388.609863, avg loss: 2.719089, ppl: 15.166503 +epoch: 1, batch: 109, sum loss: 4577.183105, avg loss: 2.828914, ppl: 16.927071 +epoch: 1, batch: 110, sum loss: 4600.300781, avg loss: 3.004769, ppl: 20.181551 +epoch: 1, batch: 111, sum loss: 4933.338867, avg loss: 2.733152, ppl: 15.381288 +epoch: 1, batch: 112, sum loss: 6385.921875, avg loss: 3.033692, ppl: 20.773790 +epoch: 1, batch: 113, sum loss: 4148.994141, avg loss: 2.671600, ppl: 14.463086 +epoch: 1, batch: 114, sum loss: 5298.323730, avg loss: 2.927251, ppl: 18.676216 +epoch: 1, batch: 115, sum loss: 4582.648438, avg loss: 2.909618, ppl: 18.349785 +epoch: 1, batch: 116, sum loss: 4682.212402, avg loss: 2.872523, ppl: 17.681570 +epoch: 1, batch: 117, sum loss: 4940.312500, avg loss: 2.756871, ppl: 15.750483 +epoch: 1, batch: 118, sum loss: 4906.114258, avg loss: 2.768687, ppl: 15.937702 +epoch: 1, batch: 119, sum loss: 4659.015137, avg loss: 2.659255, ppl: 14.285646 +epoch: 1, batch: 120, sum loss: 4966.424805, avg loss: 2.912859, ppl: 18.409359 +epoch: 1, batch: 121, sum loss: 4545.806641, avg loss: 2.715536, ppl: 15.112706 +epoch: 1, batch: 122, sum loss: 4330.316895, avg loss: 2.645276, ppl: 14.087334 +epoch: 1, batch: 123, sum loss: 6205.022949, avg loss: 3.177175, ppl: 23.978918 +epoch: 1, batch: 124, sum loss: 4124.881836, avg loss: 2.671556, ppl: 14.462452 +epoch: 1, batch: 125, sum loss: 4721.067383, avg loss: 2.946983, ppl: 19.048405 +epoch: 1, batch: 126, sum loss: 4329.963867, avg loss: 2.904067, ppl: 18.248211 +epoch: 1, batch: 127, sum loss: 4065.467285, avg loss: 2.629668, ppl: 13.869171 +epoch: 1, batch: 128, sum loss: 4891.602539, avg loss: 3.055342, ppl: 21.228449 +epoch: 1, batch: 129, sum loss: 4882.455078, avg loss: 2.833694, ppl: 17.008177 +epoch: 1, batch: 130, sum loss: 4918.603027, avg loss: 2.945271, ppl: 19.015820 +epoch: 1, batch: 131, sum loss: 4982.894531, avg loss: 2.920806, ppl: 18.556232 +epoch: 1, batch: 132, sum loss: 3810.525879, avg loss: 2.698673, ppl: 14.859996 +epoch: 1, batch: 133, sum loss: 4295.766113, avg loss: 2.628988, ppl: 13.859734 +epoch: 1, batch: 134, sum loss: 3854.203857, avg loss: 2.815343, ppl: 16.698896 +epoch: 1, batch: 135, sum loss: 5169.312500, avg loss: 2.773236, ppl: 16.010365 +epoch: 1, batch: 136, sum loss: 4312.157715, avg loss: 2.724041, ppl: 15.241797 +epoch: 1, batch: 137, sum loss: 5709.407227, avg loss: 2.973650, ppl: 19.563190 +epoch: 1, batch: 138, sum loss: 4851.131836, avg loss: 2.836919, ppl: 17.063114 +epoch: 1, batch: 139, sum loss: 4004.851074, avg loss: 2.539538, ppl: 12.673813 +epoch: 1, batch: 140, sum loss: 4289.066895, avg loss: 2.531917, ppl: 12.577589 +epoch: 1, batch: 141, sum loss: 3904.930908, avg loss: 2.451306, ppl: 11.603495 +epoch: 1, batch: 142, sum loss: 4961.149414, avg loss: 3.004936, ppl: 20.184929 +epoch: 1, batch: 143, sum loss: 4485.525391, avg loss: 2.573451, ppl: 13.110993 +epoch: 1, batch: 144, sum loss: 5354.525391, avg loss: 2.940431, ppl: 18.924007 +epoch: 1, batch: 145, sum loss: 4278.567871, avg loss: 2.531697, ppl: 12.574828 +epoch: 1, batch: 146, sum loss: 5267.852539, avg loss: 2.736547, ppl: 15.433597 +epoch: 1, batch: 147, sum loss: 4849.304688, avg loss: 2.909001, ppl: 18.338467 +epoch: 1, batch: 148, sum loss: 3945.041016, avg loss: 2.445779, ppl: 11.539531 +epoch: 1, batch: 149, sum loss: 4970.790527, avg loss: 2.808356, ppl: 16.582638 +epoch: 1, batch: 150, sum loss: 5187.780273, avg loss: 2.799665, ppl: 16.439146 +epoch: 1, batch: 151, sum loss: 4141.335449, avg loss: 2.896039, ppl: 18.102295 +epoch: 1, batch: 152, sum loss: 4580.731445, avg loss: 2.838124, ppl: 17.083679 +epoch: 1, batch: 153, sum loss: 4578.004395, avg loss: 2.881060, ppl: 17.833168 +epoch: 1, batch: 154, sum loss: 3966.570801, avg loss: 2.555780, ppl: 12.881349 +epoch: 1, batch: 155, sum loss: 4761.867188, avg loss: 2.811019, ppl: 16.626860 +epoch: 1, batch: 156, sum loss: 5203.523438, avg loss: 2.818810, ppl: 16.756897 +epoch: 1, batch: 157, sum loss: 4340.490234, avg loss: 2.651491, ppl: 14.175154 +epoch: 1, batch: 158, sum loss: 4515.542969, avg loss: 2.937894, ppl: 18.876053 +epoch: 1, batch: 159, sum loss: 4658.413086, avg loss: 2.764637, ppl: 15.873273 +epoch: 1, batch: 160, sum loss: 4763.000000, avg loss: 2.841886, ppl: 17.148069 +epoch: 1, batch: 161, sum loss: 6359.806152, avg loss: 3.114499, ppl: 22.522135 +epoch: 1, batch: 162, sum loss: 4880.467285, avg loss: 2.889560, ppl: 17.985397 +epoch: 1, batch: 163, sum loss: 4721.156738, avg loss: 2.988074, ppl: 19.847416 +epoch: 1, batch: 164, sum loss: 5595.572266, avg loss: 2.925025, ppl: 18.634684 +epoch: 1, batch: 165, sum loss: 4721.819824, avg loss: 2.729376, ppl: 15.323317 +epoch: 1, batch: 166, sum loss: 4057.245361, avg loss: 2.825380, ppl: 16.867352 +epoch: 1, batch: 167, sum loss: 4462.174805, avg loss: 2.744265, ppl: 15.553176 +epoch: 1, batch: 168, sum loss: 4947.850586, avg loss: 2.809682, ppl: 16.604643 +epoch: 1, batch: 169, sum loss: 4498.333008, avg loss: 2.792261, ppl: 16.317879 +epoch: 1, batch: 170, sum loss: 4061.742676, avg loss: 2.565851, ppl: 13.011729 +epoch: 1, batch: 171, sum loss: 4337.290039, avg loss: 2.678993, ppl: 14.570416 +epoch: 1, batch: 172, sum loss: 4392.207520, avg loss: 2.697916, ppl: 14.848755 +epoch: 1, batch: 173, sum loss: 5030.188477, avg loss: 2.931345, ppl: 18.752840 +epoch: 1, batch: 174, sum loss: 5022.779297, avg loss: 2.973819, ppl: 19.566492 +epoch: 1, batch: 175, sum loss: 4927.070801, avg loss: 2.864576, ppl: 17.541616 +epoch: 1, batch: 176, sum loss: 5191.145508, avg loss: 3.077146, ppl: 21.696400 +epoch: 1, batch: 177, sum loss: 4387.775879, avg loss: 2.591717, ppl: 13.352673 +epoch: 1, batch: 178, sum loss: 5609.886719, avg loss: 2.879819, ppl: 17.811043 +epoch: 1, batch: 179, sum loss: 5705.331543, avg loss: 3.099040, ppl: 22.176641 +epoch: 1, batch: 180, sum loss: 4771.241699, avg loss: 2.757943, ppl: 15.767379 +epoch: 1, batch: 181, sum loss: 4441.931641, avg loss: 2.633036, ppl: 13.915956 +epoch: 1, batch: 182, sum loss: 5178.090820, avg loss: 2.696923, ppl: 14.834010 +epoch: 1, batch: 183, sum loss: 4828.180664, avg loss: 3.153613, ppl: 23.420521 +epoch: 1, batch: 184, sum loss: 4638.516113, avg loss: 2.804423, ppl: 16.517548 +epoch: 1, batch: 185, sum loss: 3736.186523, avg loss: 2.590976, ppl: 13.342781 +epoch: 1, batch: 186, sum loss: 4462.522949, avg loss: 2.759755, ppl: 15.795967 +epoch: 1, batch: 187, sum loss: 4066.155029, avg loss: 2.867528, ppl: 17.593481 +epoch: 1, batch: 188, sum loss: 4686.325195, avg loss: 2.883892, ppl: 17.883747 +epoch: 1, batch: 189, sum loss: 4781.217773, avg loss: 2.797670, ppl: 16.406374 +epoch: 1, batch: 190, sum loss: 4715.285645, avg loss: 2.610900, ppl: 13.611298 +epoch: 1, batch: 191, sum loss: 4694.775391, avg loss: 2.928743, ppl: 18.704107 +epoch: 1, batch: 192, sum loss: 4280.650879, avg loss: 3.095192, ppl: 22.091484 +epoch: 1, batch: 193, sum loss: 4953.287598, avg loss: 2.765655, ppl: 15.889438 +epoch: 1, batch: 194, sum loss: 3592.208008, avg loss: 2.556732, ppl: 12.893608 +epoch: 1, batch: 195, sum loss: 5308.772461, avg loss: 3.097300, ppl: 22.138103 +epoch: 1, batch: 196, sum loss: 5199.166016, avg loss: 2.775850, ppl: 16.052259 +epoch: 1, batch: 197, sum loss: 6107.760742, avg loss: 3.031147, ppl: 20.720985 +epoch: 1, batch: 198, sum loss: 4456.379883, avg loss: 2.740701, ppl: 15.497845 +epoch: 1, batch: 199, sum loss: 4953.720703, avg loss: 2.782989, ppl: 16.167276 +epoch: 1, batch: 200, sum loss: 5472.049316, avg loss: 3.188840, ppl: 24.260267 +epoch: 1, batch: 201, sum loss: 4938.758301, avg loss: 2.996819, ppl: 20.021757 +epoch: 1, batch: 202, sum loss: 4266.730469, avg loss: 2.554929, ppl: 12.870379 +epoch: 1, batch: 203, sum loss: 5185.315430, avg loss: 2.838158, ppl: 17.084274 +epoch: 1, batch: 204, sum loss: 4876.471680, avg loss: 3.004604, ppl: 20.178217 +epoch: 1, batch: 205, sum loss: 4683.359375, avg loss: 2.889179, ppl: 17.978542 +epoch: 1, batch: 206, sum loss: 4432.082031, avg loss: 2.837440, ppl: 17.072014 +epoch: 1, batch: 207, sum loss: 4651.680664, avg loss: 2.859054, ppl: 17.445013 +epoch: 1, batch: 208, sum loss: 5338.916992, avg loss: 3.063062, ppl: 21.392962 +epoch: 1, batch: 209, sum loss: 4138.187988, avg loss: 2.680174, ppl: 14.587626 +epoch: 1, batch: 210, sum loss: 4706.334961, avg loss: 2.753853, ppl: 15.703020 +epoch: 1, batch: 211, sum loss: 5300.817383, avg loss: 2.791373, ppl: 16.303389 +epoch: 1, batch: 212, sum loss: 4526.737305, avg loss: 2.681716, ppl: 14.610149 +epoch: 1, batch: 213, sum loss: 4594.065918, avg loss: 2.797848, ppl: 16.409292 +epoch: 1, batch: 214, sum loss: 4746.976074, avg loss: 2.798925, ppl: 16.426973 +epoch: 1, batch: 215, sum loss: 5634.121582, avg loss: 2.923779, ppl: 18.611483 +epoch: 1, batch: 216, sum loss: 5806.015137, avg loss: 3.096541, ppl: 22.121309 +epoch: 1, batch: 217, sum loss: 5188.860840, avg loss: 2.921656, ppl: 18.572016 +epoch: 1, batch: 218, sum loss: 5503.901367, avg loss: 2.996136, ppl: 20.008076 +epoch: 1, batch: 219, sum loss: 5104.682129, avg loss: 2.882373, ppl: 17.856590 +epoch: 1, batch: 220, sum loss: 3879.329590, avg loss: 2.794906, ppl: 16.361090 +epoch: 1, batch: 221, sum loss: 5059.643555, avg loss: 2.879706, ppl: 17.809034 +epoch: 1, batch: 222, sum loss: 5243.034180, avg loss: 2.826433, ppl: 16.885132 +epoch: 1, batch: 223, sum loss: 4226.645508, avg loss: 2.493596, ppl: 12.104728 +epoch: 1, batch: 224, sum loss: 4380.756348, avg loss: 2.653396, ppl: 14.202186 +epoch: 1, batch: 225, sum loss: 6421.413574, avg loss: 3.172635, ppl: 23.870308 +epoch: 1, batch: 226, sum loss: 5327.115234, avg loss: 2.830561, ppl: 16.954964 +epoch: 1, batch: 227, sum loss: 4883.437988, avg loss: 2.912008, ppl: 18.393702 +epoch: 1, batch: 228, sum loss: 3998.091797, avg loss: 2.640748, ppl: 14.023683 +epoch: 1, batch: 229, sum loss: 4749.034180, avg loss: 2.726196, ppl: 15.274676 +epoch: 1, batch: 230, sum loss: 4502.043457, avg loss: 2.649820, ppl: 14.151485 +epoch: 1, batch: 231, sum loss: 5275.647461, avg loss: 2.953890, ppl: 19.180422 +epoch: 1, batch: 232, sum loss: 4775.895996, avg loss: 2.692162, ppl: 14.763564 +epoch: 1, batch: 233, sum loss: 4524.996582, avg loss: 2.898781, ppl: 18.152000 +epoch: 1, batch: 234, sum loss: 4272.041016, avg loss: 2.788538, ppl: 16.257242 +epoch: 1, batch: 235, sum loss: 5639.944824, avg loss: 2.914700, ppl: 18.443279 +epoch: 1, batch: 236, sum loss: 5231.819824, avg loss: 2.879373, ppl: 17.803101 +epoch: 1, batch: 237, sum loss: 3999.581543, avg loss: 2.608990, ppl: 13.585322 +epoch: 1, batch: 238, sum loss: 4502.126953, avg loss: 2.738520, ppl: 15.464080 +epoch: 1, batch: 239, sum loss: 4568.411621, avg loss: 2.853474, ppl: 17.347942 +epoch: 1, batch: 240, sum loss: 4172.380859, avg loss: 2.569200, ppl: 13.055376 +epoch: 1, batch: 241, sum loss: 3341.867188, avg loss: 2.293663, ppl: 9.911176 +epoch: 1, batch: 242, sum loss: 5140.447754, avg loss: 2.939078, ppl: 18.898417 +epoch: 1, batch: 243, sum loss: 4117.519531, avg loss: 2.607675, ppl: 13.567474 +epoch: 1, batch: 244, sum loss: 4836.750488, avg loss: 2.792581, ppl: 16.323097 +epoch: 1, batch: 245, sum loss: 3829.285156, avg loss: 2.830218, ppl: 16.949156 +epoch: 1, batch: 246, sum loss: 4638.341797, avg loss: 2.856122, ppl: 17.393938 +epoch: 1, batch: 247, sum loss: 4833.658203, avg loss: 2.976390, ppl: 19.616879 +epoch: 1, batch: 248, sum loss: 5782.284668, avg loss: 3.075683, ppl: 21.664682 +epoch: 1, batch: 249, sum loss: 5679.854004, avg loss: 2.944455, ppl: 19.000307 +epoch: 1, batch: 250, sum loss: 4936.198242, avg loss: 2.989823, ppl: 19.882170 +epoch: 1, batch: 251, sum loss: 5673.966797, avg loss: 3.203821, ppl: 24.626448 +epoch: 1, batch: 252, sum loss: 4310.469727, avg loss: 2.588871, ppl: 13.314728 +epoch: 1, batch: 253, sum loss: 3983.847168, avg loss: 2.668350, ppl: 14.416169 +epoch: 1, batch: 254, sum loss: 3930.450195, avg loss: 2.575655, ppl: 13.139927 +epoch: 1, batch: 255, sum loss: 3796.799561, avg loss: 2.459067, ppl: 11.693900 +epoch: 1, batch: 256, sum loss: 4479.531250, avg loss: 2.658475, ppl: 14.274509 +epoch: 1, batch: 257, sum loss: 4366.024414, avg loss: 2.747655, ppl: 15.605999 +epoch: 1, batch: 258, sum loss: 6053.224121, avg loss: 3.038767, ppl: 20.879490 +epoch: 1, batch: 259, sum loss: 4424.795410, avg loss: 2.777649, ppl: 16.081177 +epoch: 1, batch: 260, sum loss: 6266.905762, avg loss: 2.873409, ppl: 17.697250 +epoch: 1, batch: 261, sum loss: 5084.412598, avg loss: 2.976822, ppl: 19.625351 +epoch: 1, batch: 262, sum loss: 5515.470215, avg loss: 2.970097, ppl: 19.493807 +epoch: 1, batch: 263, sum loss: 5094.528809, avg loss: 3.078265, ppl: 21.720688 +epoch: 1, batch: 264, sum loss: 4501.444336, avg loss: 2.666733, ppl: 14.392864 +epoch: 1, batch: 265, sum loss: 5748.708008, avg loss: 2.987894, ppl: 19.843849 +epoch: 1, batch: 266, sum loss: 5611.289551, avg loss: 2.984728, ppl: 19.781128 +epoch: 1, batch: 267, sum loss: 4141.783203, avg loss: 2.842679, ppl: 17.161676 +epoch: 1, batch: 268, sum loss: 5221.548340, avg loss: 2.816369, ppl: 16.716045 +epoch: 1, batch: 269, sum loss: 5971.323242, avg loss: 3.024986, ppl: 20.593721 +epoch: 1, batch: 270, sum loss: 4207.506836, avg loss: 2.654578, ppl: 14.218990 +epoch: 1, batch: 271, sum loss: 4482.427246, avg loss: 3.002295, ppl: 20.131697 +epoch: 1, batch: 272, sum loss: 5375.951172, avg loss: 3.091404, ppl: 22.007956 +epoch: 1, batch: 273, sum loss: 3671.667969, avg loss: 2.553316, ppl: 12.849638 +epoch: 1, batch: 274, sum loss: 4247.784180, avg loss: 2.534477, ppl: 12.609840 +epoch: 1, batch: 275, sum loss: 5409.408203, avg loss: 2.900487, ppl: 18.182997 +epoch: 1, batch: 276, sum loss: 4158.994141, avg loss: 2.632275, ppl: 13.905367 +epoch: 1, batch: 277, sum loss: 4585.943359, avg loss: 2.697614, ppl: 14.844271 +epoch: 1, batch: 278, sum loss: 4590.926758, avg loss: 2.894657, ppl: 18.077307 +epoch: 1, batch: 279, sum loss: 4599.074707, avg loss: 2.998093, ppl: 20.047268 +epoch: 1, batch: 280, sum loss: 5291.900391, avg loss: 2.789615, ppl: 16.274759 +epoch: 1, batch: 281, sum loss: 4455.702148, avg loss: 2.677706, ppl: 14.551670 +epoch: 1, batch: 282, sum loss: 4249.580078, avg loss: 2.528007, ppl: 12.528512 +epoch: 1, batch: 283, sum loss: 4371.014648, avg loss: 2.475093, ppl: 11.882813 +epoch: 1, batch: 284, sum loss: 4402.280762, avg loss: 2.559466, ppl: 12.928906 +epoch: 1, batch: 285, sum loss: 5806.772461, avg loss: 3.090353, ppl: 21.984833 +epoch: 1, batch: 286, sum loss: 4418.091309, avg loss: 2.826674, ppl: 16.889194 +epoch: 1, batch: 287, sum loss: 5857.348145, avg loss: 3.025490, ppl: 20.604090 +epoch: 1, batch: 288, sum loss: 3820.507324, avg loss: 2.506895, ppl: 12.266777 +epoch: 1, batch: 289, sum loss: 5498.094727, avg loss: 3.076718, ppl: 21.687105 +epoch: 1, batch: 290, sum loss: 5561.392578, avg loss: 2.917834, ppl: 18.501175 +epoch: 1, batch: 291, sum loss: 5228.580078, avg loss: 2.847811, ppl: 17.249971 +epoch: 1, batch: 292, sum loss: 5290.670898, avg loss: 2.699322, ppl: 14.869643 +epoch: 1, batch: 293, sum loss: 4471.548828, avg loss: 2.784277, ppl: 16.188108 +epoch: 1, batch: 294, sum loss: 4936.607422, avg loss: 2.952517, ppl: 19.154095 +epoch: 1, batch: 295, sum loss: 5500.401367, avg loss: 2.986103, ppl: 19.808334 +epoch: 1, batch: 296, sum loss: 4371.829102, avg loss: 2.742678, ppl: 15.528518 +epoch: 1, batch: 297, sum loss: 4841.835938, avg loss: 2.853174, ppl: 17.342739 +epoch: 1, batch: 298, sum loss: 3258.691650, avg loss: 2.457535, ppl: 11.675998 +epoch: 1, batch: 299, sum loss: 4996.491699, avg loss: 2.715484, ppl: 15.111928 +epoch: 1, batch: 300, sum loss: 4941.228027, avg loss: 2.974851, ppl: 19.586712 +epoch: 1, batch: 301, sum loss: 4630.318359, avg loss: 3.120161, ppl: 22.650017 +epoch: 1, batch: 302, sum loss: 5901.695312, avg loss: 3.080217, ppl: 21.763123 +epoch: 1, batch: 303, sum loss: 5038.584473, avg loss: 2.874264, ppl: 17.712383 +epoch: 1, batch: 304, sum loss: 5060.944336, avg loss: 2.814763, ppl: 16.689220 +epoch: 1, batch: 305, sum loss: 4574.999512, avg loss: 2.786236, ppl: 16.219854 +epoch: 1, batch: 306, sum loss: 4674.799805, avg loss: 2.953127, ppl: 19.165785 +epoch: 1, batch: 307, sum loss: 5795.738281, avg loss: 3.047181, ppl: 21.055906 +epoch: 1, batch: 308, sum loss: 4818.450195, avg loss: 2.847784, ppl: 17.249512 +epoch: 1, batch: 309, sum loss: 4443.292969, avg loss: 2.691274, ppl: 14.750455 +epoch: 1, batch: 310, sum loss: 4662.154297, avg loss: 2.947000, ppl: 19.048721 +epoch: 1, batch: 311, sum loss: 5076.326660, avg loss: 3.056187, ppl: 21.246387 +epoch: 1, batch: 312, sum loss: 4570.654297, avg loss: 2.960268, ppl: 19.303154 +epoch: 1, batch: 313, sum loss: 4388.558594, avg loss: 2.646899, ppl: 14.110215 +epoch: 1, batch: 314, sum loss: 3889.973145, avg loss: 2.589862, ppl: 13.327933 +epoch: 1, batch: 315, sum loss: 4680.459961, avg loss: 2.603148, ppl: 13.506186 +epoch: 1, batch: 316, sum loss: 5237.541992, avg loss: 2.984354, ppl: 19.773729 +epoch: 1, batch: 317, sum loss: 4519.637695, avg loss: 2.796805, ppl: 16.392197 +epoch: 1, batch: 318, sum loss: 5024.868652, avg loss: 2.747331, ppl: 15.600944 +epoch: 1, batch: 319, sum loss: 5339.300293, avg loss: 3.068563, ppl: 21.510979 +epoch: 1, batch: 320, sum loss: 4892.612305, avg loss: 2.922708, ppl: 18.591557 +epoch: 1, batch: 321, sum loss: 4800.771484, avg loss: 2.885079, ppl: 17.904985 +epoch: 1, batch: 322, sum loss: 3880.653076, avg loss: 2.590556, ppl: 13.337184 +epoch: 1, batch: 323, sum loss: 4670.164551, avg loss: 3.018852, ppl: 20.467791 +epoch: 1, batch: 324, sum loss: 4913.457031, avg loss: 2.812511, ppl: 16.651682 +epoch: 1, batch: 325, sum loss: 4964.597656, avg loss: 2.874695, ppl: 17.720016 +epoch: 1, batch: 326, sum loss: 5325.590820, avg loss: 3.015623, ppl: 20.401804 +epoch: 1, batch: 327, sum loss: 4866.318359, avg loss: 2.801565, ppl: 16.470406 +epoch: 1, batch: 328, sum loss: 4280.641113, avg loss: 3.010296, ppl: 20.293407 +epoch: 1, batch: 329, sum loss: 4233.309082, avg loss: 2.947987, ppl: 19.067530 +epoch: 1, batch: 330, sum loss: 4308.922852, avg loss: 2.614638, ppl: 13.662265 +epoch: 1, batch: 331, sum loss: 4597.395020, avg loss: 2.815306, ppl: 16.698288 +epoch: 1, batch: 332, sum loss: 5028.522949, avg loss: 2.910025, ppl: 18.357256 +epoch: 1, batch: 333, sum loss: 5317.963867, avg loss: 2.888628, ppl: 17.968639 +epoch: 1, batch: 334, sum loss: 5211.168945, avg loss: 2.669656, ppl: 14.435007 +epoch: 1, batch: 335, sum loss: 4987.731445, avg loss: 2.899844, ppl: 18.171309 +epoch: 1, batch: 336, sum loss: 3917.392090, avg loss: 2.495154, ppl: 12.123602 +epoch: 1, batch: 337, sum loss: 4544.048340, avg loss: 2.975801, ppl: 19.605326 +epoch: 1, batch: 338, sum loss: 5041.058594, avg loss: 2.789739, ppl: 16.276770 +epoch: 1, batch: 339, sum loss: 4608.652344, avg loss: 2.741613, ppl: 15.511992 +epoch: 1, batch: 340, sum loss: 5286.985840, avg loss: 2.845525, ppl: 17.210596 +epoch: 1, batch: 341, sum loss: 4710.969238, avg loss: 2.758179, ppl: 15.771093 +epoch: 1, batch: 342, sum loss: 4857.715820, avg loss: 2.933403, ppl: 18.791473 +epoch: 1, batch: 343, sum loss: 4905.694336, avg loss: 3.079532, ppl: 21.748220 +epoch: 1, batch: 344, sum loss: 5261.270020, avg loss: 2.994462, ppl: 19.974611 +epoch: 1, batch: 345, sum loss: 5914.999512, avg loss: 2.751163, ppl: 15.660828 +epoch: 1, batch: 346, sum loss: 4872.426270, avg loss: 2.691948, ppl: 14.760407 +epoch: 1, batch: 347, sum loss: 4969.378418, avg loss: 2.970340, ppl: 19.498543 +epoch: 1, batch: 348, sum loss: 5044.791016, avg loss: 2.716635, ppl: 15.129323 +epoch: 1, batch: 349, sum loss: 5268.931152, avg loss: 2.883925, ppl: 17.884335 +epoch: 1, batch: 350, sum loss: 4620.839844, avg loss: 2.793736, ppl: 16.341967 +epoch: 1, batch: 351, sum loss: 5290.030273, avg loss: 3.007408, ppl: 20.234880 +epoch: 1, batch: 352, sum loss: 4568.583984, avg loss: 2.732407, ppl: 15.369836 +epoch: 1, batch: 353, sum loss: 5873.481445, avg loss: 3.114253, ppl: 22.516611 +epoch: 1, batch: 354, sum loss: 4092.367432, avg loss: 2.648782, ppl: 14.136803 +epoch: 1, batch: 355, sum loss: 4000.070801, avg loss: 2.536507, ppl: 12.635454 +epoch: 1, batch: 356, sum loss: 5647.530762, avg loss: 3.146257, ppl: 23.248873 +epoch: 1, batch: 357, sum loss: 4804.451172, avg loss: 2.943904, ppl: 18.989836 +epoch: 1, batch: 358, sum loss: 5064.583984, avg loss: 2.691065, ppl: 14.747371 +epoch: 1, batch: 359, sum loss: 3726.949219, avg loss: 2.608082, ppl: 13.572993 +epoch: 1, batch: 360, sum loss: 4817.842285, avg loss: 2.694543, ppl: 14.798749 +epoch: 1, batch: 361, sum loss: 4312.345703, avg loss: 2.837070, ppl: 17.065685 +epoch: 1, batch: 362, sum loss: 4541.144531, avg loss: 2.679141, ppl: 14.572575 +epoch: 1, batch: 363, sum loss: 4627.722168, avg loss: 2.972204, ppl: 19.534935 +epoch: 1, batch: 364, sum loss: 4450.590332, avg loss: 2.715430, ppl: 15.111111 +epoch: 1, batch: 365, sum loss: 4538.574219, avg loss: 2.719337, ppl: 15.170268 +epoch: 1, batch: 366, sum loss: 5011.918945, avg loss: 2.990405, ppl: 19.893744 +epoch: 1, batch: 367, sum loss: 4233.126953, avg loss: 2.699698, ppl: 14.875242 +epoch: 1, batch: 368, sum loss: 4493.036621, avg loss: 2.655459, ppl: 14.231516 +epoch: 1, batch: 369, sum loss: 4751.245605, avg loss: 2.888295, ppl: 17.962660 +epoch: 1, batch: 370, sum loss: 4351.971680, avg loss: 2.733650, ppl: 15.388961 +epoch: 1, batch: 371, sum loss: 4878.522949, avg loss: 3.007721, ppl: 20.241211 +epoch: 1, batch: 372, sum loss: 5370.331543, avg loss: 2.988498, ppl: 19.855846 +epoch: 1, batch: 373, sum loss: 5058.700195, avg loss: 2.908971, ppl: 18.337915 +epoch: 1, batch: 374, sum loss: 4683.854492, avg loss: 2.938428, ppl: 18.886137 +epoch: 1, batch: 375, sum loss: 5738.022949, avg loss: 2.948624, ppl: 19.079689 +epoch: 1, batch: 376, sum loss: 4512.253418, avg loss: 2.792236, ppl: 16.317467 +epoch: 1, batch: 377, sum loss: 5033.014160, avg loss: 2.807035, ppl: 16.560745 +epoch: 1, batch: 378, sum loss: 5236.421875, avg loss: 3.023338, ppl: 20.559813 +epoch: 1, batch: 379, sum loss: 3499.450195, avg loss: 2.576915, ppl: 13.156485 +epoch: 1, batch: 380, sum loss: 4778.878418, avg loss: 2.783272, ppl: 16.171854 +epoch: 1, batch: 381, sum loss: 4722.120605, avg loss: 2.820861, ppl: 16.791300 +epoch: 1, batch: 382, sum loss: 5031.752930, avg loss: 2.905169, ppl: 18.268332 +epoch: 1, batch: 383, sum loss: 4574.448242, avg loss: 2.889734, ppl: 17.988516 +epoch: 1, batch: 384, sum loss: 5506.280762, avg loss: 3.081299, ppl: 21.786682 +epoch: 1, batch: 385, sum loss: 4621.782715, avg loss: 2.947566, ppl: 19.059498 +epoch: 1, batch: 386, sum loss: 4130.421875, avg loss: 2.649405, ppl: 14.145617 +epoch: 1, batch: 387, sum loss: 4059.722656, avg loss: 2.542093, ppl: 12.706238 +epoch: 1, batch: 388, sum loss: 5448.028809, avg loss: 2.862863, ppl: 17.511597 +epoch: 1, batch: 389, sum loss: 4785.429688, avg loss: 2.720540, ppl: 15.188522 +epoch: 1, batch: 390, sum loss: 4346.063477, avg loss: 2.672856, ppl: 14.481263 +epoch: 1, batch: 391, sum loss: 5702.082520, avg loss: 2.994792, ppl: 19.981194 +epoch: 1, batch: 392, sum loss: 4638.282227, avg loss: 2.661091, ppl: 14.311899 +epoch: 1, batch: 393, sum loss: 3551.926025, avg loss: 2.449604, ppl: 11.583762 +epoch: 1, batch: 394, sum loss: 5038.594727, avg loss: 2.880843, ppl: 17.829300 +epoch: 1, batch: 395, sum loss: 4744.301758, avg loss: 2.830729, ppl: 16.957819 +epoch: 1, batch: 396, sum loss: 3814.400391, avg loss: 2.470467, ppl: 11.827968 +epoch: 1, batch: 397, sum loss: 4861.224121, avg loss: 2.885000, ppl: 17.903559 +epoch: 1, batch: 398, sum loss: 4661.795898, avg loss: 2.959870, ppl: 19.295469 +epoch: 1, batch: 399, sum loss: 5224.328613, avg loss: 3.042707, ppl: 20.961920 +epoch: 1, batch: 400, sum loss: 4511.264648, avg loss: 2.636625, ppl: 13.965986 +epoch: 1, batch: 401, sum loss: 4633.801758, avg loss: 2.943965, ppl: 18.991005 +epoch: 1, batch: 402, sum loss: 4729.925781, avg loss: 2.835687, ppl: 17.042103 +epoch: 1, batch: 403, sum loss: 4197.894531, avg loss: 2.635213, ppl: 13.946284 +epoch: 1, batch: 404, sum loss: 4821.005859, avg loss: 2.921822, ppl: 18.575098 +epoch: 1, batch: 405, sum loss: 4552.105469, avg loss: 3.044887, ppl: 21.007648 +epoch: 1, batch: 406, sum loss: 5212.267090, avg loss: 2.921674, ppl: 18.572361 +epoch: 1, batch: 407, sum loss: 5247.008789, avg loss: 2.859406, ppl: 17.451149 +epoch: 1, batch: 408, sum loss: 5673.840820, avg loss: 2.643915, ppl: 14.068169 +epoch: 1, batch: 409, sum loss: 4063.192139, avg loss: 2.680206, ppl: 14.588099 +epoch: 1, batch: 410, sum loss: 4373.907715, avg loss: 2.825522, ppl: 16.869747 +epoch: 1, batch: 411, sum loss: 4870.764160, avg loss: 2.750290, ppl: 15.647171 +epoch: 1, batch: 412, sum loss: 4584.600098, avg loss: 2.851119, ppl: 17.307142 +epoch: 1, batch: 413, sum loss: 5006.845215, avg loss: 2.817583, ppl: 16.736351 +epoch: 1, batch: 414, sum loss: 4161.013672, avg loss: 2.571702, ppl: 13.088078 +epoch: 1, batch: 415, sum loss: 6686.251465, avg loss: 3.117134, ppl: 22.581560 +epoch: 1, batch: 416, sum loss: 3728.572021, avg loss: 2.411754, ppl: 11.153509 +epoch: 1, batch: 417, sum loss: 4754.963379, avg loss: 2.895837, ppl: 18.098637 +epoch: 1, batch: 418, sum loss: 4967.212402, avg loss: 2.867906, ppl: 17.600119 +epoch: 1, batch: 419, sum loss: 4749.501465, avg loss: 3.007917, ppl: 20.245193 +epoch: 1, batch: 420, sum loss: 4278.527832, avg loss: 2.665749, ppl: 14.378719 +epoch: 1, batch: 421, sum loss: 4331.835449, avg loss: 2.625355, ppl: 13.809472 +epoch: 1, batch: 422, sum loss: 6040.157227, avg loss: 3.099106, ppl: 22.178112 +epoch: 1, batch: 423, sum loss: 5003.285156, avg loss: 2.939651, ppl: 18.909237 +epoch: 1, batch: 424, sum loss: 4963.023926, avg loss: 2.902353, ppl: 18.216961 +epoch: 1, batch: 425, sum loss: 4151.115234, avg loss: 2.722043, ppl: 15.211365 +epoch: 1, batch: 426, sum loss: 5813.088379, avg loss: 2.846762, ppl: 17.231897 +epoch: 1, batch: 427, sum loss: 5873.730469, avg loss: 3.076863, ppl: 21.690245 +epoch: 1, batch: 428, sum loss: 4682.357910, avg loss: 2.908297, ppl: 18.325560 +epoch: 1, batch: 429, sum loss: 4836.937500, avg loss: 2.663512, ppl: 14.346582 +epoch: 1, batch: 430, sum loss: 4870.974121, avg loss: 2.825391, ppl: 16.867544 +epoch: 1, batch: 431, sum loss: 5012.283203, avg loss: 2.862526, ppl: 17.505693 +epoch: 1, batch: 432, sum loss: 5024.187988, avg loss: 2.828935, ppl: 16.927418 +epoch: 1, batch: 433, sum loss: 4852.171875, avg loss: 2.804724, ppl: 16.522507 +epoch: 1, batch: 434, sum loss: 3797.348877, avg loss: 2.546847, ppl: 12.766788 +epoch: 1, batch: 435, sum loss: 5454.080078, avg loss: 3.052087, ppl: 21.159464 +epoch: 1, batch: 436, sum loss: 5087.762207, avg loss: 2.871198, ppl: 17.658155 +epoch: 1, batch: 437, sum loss: 4237.619141, avg loss: 2.458016, ppl: 11.681611 +epoch: 1, batch: 438, sum loss: 5666.592773, avg loss: 3.165694, ppl: 23.705194 +epoch: 1, batch: 439, sum loss: 4568.558594, avg loss: 2.514342, ppl: 12.358469 +epoch: 1, batch: 440, sum loss: 4736.350586, avg loss: 2.868777, ppl: 17.615461 +epoch: 1, batch: 441, sum loss: 4110.019043, avg loss: 2.599633, ppl: 13.458791 +epoch: 1, batch: 442, sum loss: 3230.721680, avg loss: 2.590795, ppl: 13.340377 +epoch: 1, batch: 443, sum loss: 4255.903809, avg loss: 2.799937, ppl: 16.443607 +epoch: 1, batch: 444, sum loss: 4162.012695, avg loss: 2.634185, ppl: 13.931957 +epoch: 1, batch: 445, sum loss: 4530.022949, avg loss: 2.833035, ppl: 16.996969 +epoch: 1, batch: 446, sum loss: 4489.230469, avg loss: 2.662652, ppl: 14.334247 +epoch: 1, batch: 447, sum loss: 4812.282227, avg loss: 2.830755, ppl: 16.958252 +epoch: 1, batch: 448, sum loss: 4936.106445, avg loss: 2.896776, ppl: 18.115650 +epoch: 1, batch: 449, sum loss: 5212.924805, avg loss: 2.736444, ppl: 15.432004 +epoch: 1, batch: 450, sum loss: 4750.944336, avg loss: 2.771846, ppl: 15.988126 +epoch: 1, batch: 451, sum loss: 5282.102051, avg loss: 2.916677, ppl: 18.479782 +epoch: 1, batch: 452, sum loss: 4191.363281, avg loss: 2.698882, ppl: 14.863104 +epoch: 1, batch: 453, sum loss: 5397.836426, avg loss: 2.843960, ppl: 17.183683 +epoch: 1, batch: 454, sum loss: 5280.386230, avg loss: 3.173309, ppl: 23.886396 +epoch: 1, batch: 455, sum loss: 4966.963867, avg loss: 2.946005, ppl: 19.029770 +epoch: 1, batch: 456, sum loss: 4962.178711, avg loss: 2.853467, ppl: 17.347818 +epoch: 1, batch: 457, sum loss: 4493.919434, avg loss: 2.705550, ppl: 14.962550 +epoch: 1, batch: 458, sum loss: 4117.736816, avg loss: 2.529322, ppl: 12.545003 +epoch: 1, batch: 459, sum loss: 5844.774902, avg loss: 2.929712, ppl: 18.722229 +epoch: 1, batch: 460, sum loss: 4290.585449, avg loss: 2.581580, ppl: 13.218002 +epoch: 1, batch: 461, sum loss: 4903.860352, avg loss: 3.082250, ppl: 21.807423 +epoch: 1, batch: 462, sum loss: 4017.953369, avg loss: 2.659135, ppl: 14.283933 +epoch: 1, batch: 463, sum loss: 4679.353516, avg loss: 2.592440, ppl: 13.362331 +epoch: 1, batch: 464, sum loss: 4915.315430, avg loss: 2.705182, ppl: 14.957036 +epoch: 1, batch: 465, sum loss: 4354.532227, avg loss: 2.645524, ppl: 14.090827 +epoch: 1, batch: 466, sum loss: 5115.267090, avg loss: 2.860888, ppl: 17.477036 +epoch: 1, batch: 467, sum loss: 3984.719238, avg loss: 2.600992, ppl: 13.477097 +epoch: 1, batch: 468, sum loss: 4764.437012, avg loss: 2.782965, ppl: 16.166891 +epoch: 1, batch: 469, sum loss: 5229.444824, avg loss: 2.916589, ppl: 18.478161 +epoch: 1, batch: 470, sum loss: 5537.695801, avg loss: 3.062885, ppl: 21.389172 +epoch: 1, batch: 471, sum loss: 3827.489990, avg loss: 2.634198, ppl: 13.932137 +epoch: 1, batch: 472, sum loss: 3632.982422, avg loss: 2.540547, ppl: 12.686610 +epoch: 1, batch: 473, sum loss: 4677.476562, avg loss: 2.817757, ppl: 16.739265 +epoch: 1, batch: 474, sum loss: 3585.833740, avg loss: 2.625061, ppl: 13.805420 +epoch: 1, batch: 475, sum loss: 4467.447266, avg loss: 2.671918, ppl: 14.467697 +epoch: 1, batch: 476, sum loss: 5110.180176, avg loss: 2.931830, ppl: 18.761936 +epoch: 1, batch: 477, sum loss: 5493.123535, avg loss: 2.871471, ppl: 17.662975 +epoch: 1, batch: 478, sum loss: 4564.560547, avg loss: 2.697731, ppl: 14.846004 +epoch: 1, batch: 479, sum loss: 4336.716797, avg loss: 2.922316, ppl: 18.584276 +epoch: 1, batch: 480, sum loss: 4630.561523, avg loss: 2.774453, ppl: 16.029852 +epoch: 1, batch: 481, sum loss: 4606.749023, avg loss: 2.870249, ppl: 17.641403 +epoch: 1, batch: 482, sum loss: 4615.844238, avg loss: 2.981812, ppl: 19.723515 +epoch: 1, batch: 483, sum loss: 4830.893555, avg loss: 2.934929, ppl: 18.820173 +epoch: 1, batch: 484, sum loss: 4541.562988, avg loss: 2.865339, ppl: 17.555008 +epoch: 1, batch: 485, sum loss: 4863.558594, avg loss: 2.826007, ppl: 16.877935 +epoch: 1, batch: 486, sum loss: 4901.028320, avg loss: 2.862750, ppl: 17.509613 +epoch: 1, batch: 487, sum loss: 4277.616211, avg loss: 2.738551, ppl: 15.464556 +epoch: 1, batch: 488, sum loss: 5450.154297, avg loss: 2.912963, ppl: 18.411278 +epoch: 1, batch: 489, sum loss: 5250.039551, avg loss: 2.816545, ppl: 16.718983 +epoch: 1, batch: 490, sum loss: 5215.476562, avg loss: 2.742101, ppl: 15.519564 +epoch: 1, batch: 491, sum loss: 5204.237305, avg loss: 2.837643, ppl: 17.075470 +epoch: 1, batch: 492, sum loss: 4988.179688, avg loss: 2.871721, ppl: 17.667402 +epoch: 1, batch: 493, sum loss: 4605.247559, avg loss: 2.865742, ppl: 17.562082 +epoch: 1, batch: 494, sum loss: 4816.133301, avg loss: 2.960131, ppl: 19.300499 +epoch: 1, batch: 495, sum loss: 4434.520508, avg loss: 2.538363, ppl: 12.658934 +epoch: 1, batch: 496, sum loss: 4523.880371, avg loss: 2.765208, ppl: 15.882347 +epoch: 1, batch: 497, sum loss: 3864.442627, avg loss: 2.436597, ppl: 11.434063 +epoch: 1, batch: 498, sum loss: 3843.528809, avg loss: 2.366705, ppl: 10.662202 +epoch: 1, batch: 499, sum loss: 5133.769531, avg loss: 2.856856, ppl: 17.406708 +epoch: 1, batch: 500, sum loss: 4872.140625, avg loss: 3.028055, ppl: 20.657015 +epoch: 1, batch: 501, sum loss: 5448.228516, avg loss: 2.870511, ppl: 17.646025 +epoch: 1, batch: 502, sum loss: 5384.963867, avg loss: 3.096586, ppl: 22.122307 +epoch: 1, batch: 503, sum loss: 3780.909912, avg loss: 2.490718, ppl: 12.069941 +epoch: 1, batch: 504, sum loss: 5559.212891, avg loss: 3.019670, ppl: 20.484535 +epoch: 1, batch: 505, sum loss: 4773.103516, avg loss: 2.831022, ppl: 16.962791 +epoch: 1, batch: 506, sum loss: 5174.644043, avg loss: 2.678387, ppl: 14.561589 +epoch: 1, batch: 507, sum loss: 5099.117676, avg loss: 3.058859, ppl: 21.303228 +epoch: 1, batch: 508, sum loss: 4511.418945, avg loss: 2.619872, ppl: 13.733960 +epoch: 1, batch: 509, sum loss: 4981.546875, avg loss: 3.150883, ppl: 23.356689 +epoch: 1, batch: 510, sum loss: 3905.973877, avg loss: 2.598785, ppl: 13.447389 +epoch: 1, batch: 511, sum loss: 4420.938477, avg loss: 2.810514, ppl: 16.618458 +epoch: 1, batch: 512, sum loss: 4262.236328, avg loss: 2.970199, ppl: 19.495806 +epoch: 1, batch: 513, sum loss: 5172.907227, avg loss: 3.007504, ppl: 20.236830 +epoch: 1, batch: 514, sum loss: 4602.783203, avg loss: 2.767759, ppl: 15.922912 +epoch: 1, batch: 515, sum loss: 5161.999023, avg loss: 2.918032, ppl: 18.504841 +epoch: 1, batch: 516, sum loss: 4985.172852, avg loss: 2.997699, ppl: 20.039370 +epoch: 1, batch: 517, sum loss: 5274.453125, avg loss: 2.857234, ppl: 17.413286 +epoch: 1, batch: 518, sum loss: 4842.362793, avg loss: 2.703720, ppl: 14.935189 +epoch: 1, batch: 519, sum loss: 5832.054688, avg loss: 3.100507, ppl: 22.209219 +epoch: 1, batch: 520, sum loss: 4086.599609, avg loss: 2.655360, ppl: 14.230115 +epoch: 1, batch: 521, sum loss: 3943.817383, avg loss: 2.716128, ppl: 15.121653 +epoch: 1, batch: 522, sum loss: 5021.341797, avg loss: 2.825741, ppl: 16.873444 +epoch: 1, batch: 523, sum loss: 4014.464111, avg loss: 2.635892, ppl: 13.955761 +epoch: 1, batch: 524, sum loss: 4480.129883, avg loss: 2.988746, ppl: 19.860760 +epoch: 1, batch: 525, sum loss: 4559.430664, avg loss: 2.785236, ppl: 16.203636 +epoch: 1, batch: 526, sum loss: 4114.622070, avg loss: 2.770789, ppl: 15.971232 +epoch: 1, batch: 527, sum loss: 5116.897461, avg loss: 2.900736, ppl: 18.187519 +epoch: 1, batch: 528, sum loss: 4684.244141, avg loss: 2.636040, ppl: 13.957828 +epoch: 1, batch: 529, sum loss: 4234.772461, avg loss: 2.784203, ppl: 16.186909 +epoch: 1, batch: 530, sum loss: 4711.184570, avg loss: 2.904553, ppl: 18.257076 +epoch: 1, batch: 531, sum loss: 6662.677734, avg loss: 3.181794, ppl: 24.089941 +epoch: 1, batch: 532, sum loss: 4851.093262, avg loss: 2.745384, ppl: 15.570592 +epoch: 1, batch: 533, sum loss: 5344.416504, avg loss: 3.064459, ppl: 21.422865 +epoch: 1, batch: 534, sum loss: 4662.859375, avg loss: 2.787125, ppl: 16.234274 +epoch: 1, batch: 535, sum loss: 4725.744141, avg loss: 2.855435, ppl: 17.381990 +epoch: 1, batch: 536, sum loss: 6025.474609, avg loss: 2.896863, ppl: 18.117218 +epoch: 1, batch: 537, sum loss: 3815.049805, avg loss: 2.534917, ppl: 12.615382 +epoch: 1, batch: 538, sum loss: 3755.493896, avg loss: 2.554758, ppl: 12.868183 +epoch: 1, batch: 539, sum loss: 5639.177246, avg loss: 2.977391, ppl: 19.636509 +epoch: 1, batch: 540, sum loss: 4740.278320, avg loss: 2.922490, ppl: 18.587505 +epoch: 1, batch: 541, sum loss: 5310.555664, avg loss: 2.995237, ppl: 19.990105 +epoch: 1, batch: 542, sum loss: 4322.250977, avg loss: 2.682962, ppl: 14.628354 +epoch: 1, batch: 543, sum loss: 5338.224121, avg loss: 2.822963, ppl: 16.826643 +epoch: 1, batch: 544, sum loss: 4742.886230, avg loss: 3.020947, ppl: 20.510700 +epoch: 1, batch: 545, sum loss: 4398.089844, avg loss: 2.534922, ppl: 12.615448 +epoch: 1, batch: 546, sum loss: 5747.895020, avg loss: 3.059018, ppl: 21.306627 +epoch: 1, batch: 547, sum loss: 4012.789795, avg loss: 2.814018, ppl: 16.676792 +epoch: 1, batch: 548, sum loss: 5080.901367, avg loss: 2.776449, ppl: 16.061884 +epoch: 1, batch: 549, sum loss: 4218.418945, avg loss: 2.583233, ppl: 13.239869 +epoch: 1, batch: 550, sum loss: 3760.301270, avg loss: 2.620419, ppl: 13.741481 +epoch: 1, batch: 551, sum loss: 5783.908203, avg loss: 3.099629, ppl: 22.189720 +epoch: 1, batch: 552, sum loss: 4932.513672, avg loss: 2.937769, ppl: 18.873686 +epoch: 1, batch: 553, sum loss: 4195.326660, avg loss: 2.703175, ppl: 14.927044 +epoch: 1, batch: 554, sum loss: 5348.220703, avg loss: 2.875387, ppl: 17.732292 +epoch: 1, batch: 555, sum loss: 4512.370605, avg loss: 2.771726, ppl: 15.986208 +epoch: 1, batch: 556, sum loss: 5207.532715, avg loss: 3.001460, ppl: 20.114880 +epoch: 1, batch: 557, sum loss: 4787.840332, avg loss: 2.560343, ppl: 12.940249 +epoch: 1, batch: 558, sum loss: 4474.297852, avg loss: 2.624222, ppl: 13.793836 +epoch: 1, batch: 559, sum loss: 4776.524414, avg loss: 2.946653, ppl: 19.042110 +epoch: 1, batch: 560, sum loss: 5915.042969, avg loss: 3.142956, ppl: 23.172258 +epoch: 1, batch: 561, sum loss: 4870.244141, avg loss: 2.795777, ppl: 16.375353 +epoch: 1, batch: 562, sum loss: 5182.746094, avg loss: 3.022009, ppl: 20.532503 +epoch: 1, batch: 563, sum loss: 4832.705078, avg loss: 2.964850, ppl: 19.391788 +epoch: 1, batch: 564, sum loss: 5161.163574, avg loss: 2.723569, ppl: 15.234600 +epoch: 1, batch: 565, sum loss: 4040.732178, avg loss: 2.538148, ppl: 12.656215 +epoch: 1, batch: 566, sum loss: 4704.796875, avg loss: 2.829102, ppl: 16.930256 +epoch: 1, batch: 567, sum loss: 4534.964355, avg loss: 2.756817, ppl: 15.749634 +epoch: 1, batch: 568, sum loss: 4730.628418, avg loss: 3.079836, ppl: 21.754843 +epoch: 1, batch: 569, sum loss: 5972.889160, avg loss: 3.182147, ppl: 24.098427 +epoch: 1, batch: 570, sum loss: 4551.589355, avg loss: 2.910223, ppl: 18.360897 +epoch: 1, batch: 571, sum loss: 4753.792969, avg loss: 2.939884, ppl: 18.913660 +epoch: 1, batch: 572, sum loss: 3579.477295, avg loss: 2.620408, ppl: 13.741330 +epoch: 1, batch: 573, sum loss: 3794.017578, avg loss: 2.575708, ppl: 13.140616 +epoch: 1, batch: 574, sum loss: 4042.271484, avg loss: 2.838674, ppl: 17.093082 +epoch: 1, batch: 575, sum loss: 4273.211914, avg loss: 2.589825, ppl: 13.327444 +epoch: 1, batch: 576, sum loss: 5210.822754, avg loss: 2.979316, ppl: 19.674345 +epoch: 1, batch: 577, sum loss: 4523.523438, avg loss: 2.749862, ppl: 15.640476 +epoch: 1, batch: 578, sum loss: 3849.268311, avg loss: 2.636485, ppl: 13.964035 +epoch: 1, batch: 579, sum loss: 5549.552246, avg loss: 2.975631, ppl: 19.601994 +epoch: 1, batch: 580, sum loss: 5319.545898, avg loss: 2.940600, ppl: 18.927206 +epoch: 1, batch: 581, sum loss: 5210.365723, avg loss: 2.813372, ppl: 16.666027 +epoch: 1, batch: 582, sum loss: 5207.068359, avg loss: 2.987417, ppl: 19.834389 +epoch: 1, batch: 583, sum loss: 4473.641113, avg loss: 2.829628, ppl: 16.939150 +epoch: 1, batch: 584, sum loss: 4756.103027, avg loss: 2.697733, ppl: 14.846037 +epoch: 1, batch: 585, sum loss: 4353.683594, avg loss: 2.774814, ppl: 16.035650 +epoch: 1, batch: 586, sum loss: 5759.970703, avg loss: 3.080198, ppl: 21.762718 +epoch: 1, batch: 587, sum loss: 5104.092285, avg loss: 2.856235, ppl: 17.395912 +epoch: 1, batch: 588, sum loss: 5058.995117, avg loss: 2.892507, ppl: 18.038477 +epoch: 1, batch: 589, sum loss: 6065.868652, avg loss: 2.967646, ppl: 19.446093 +epoch: 1, batch: 590, sum loss: 4499.992188, avg loss: 2.701076, ppl: 14.895744 +epoch: 1, batch: 591, sum loss: 4672.566895, avg loss: 2.870127, ppl: 17.639257 +epoch: 1, batch: 592, sum loss: 4116.020996, avg loss: 2.718640, ppl: 15.159693 +epoch: 1, batch: 593, sum loss: 4982.368164, avg loss: 3.098488, ppl: 22.164404 +epoch: 1, batch: 594, sum loss: 4455.217285, avg loss: 2.628447, ppl: 13.852238 +epoch: 1, batch: 595, sum loss: 4589.841797, avg loss: 2.917891, ppl: 18.502216 +epoch: 1, batch: 596, sum loss: 3920.988281, avg loss: 2.476935, ppl: 11.904719 +epoch: 1, batch: 597, sum loss: 6052.263672, avg loss: 2.902764, ppl: 18.224455 +epoch: 1, batch: 598, sum loss: 4188.074219, avg loss: 2.795777, ppl: 16.375349 +epoch: 1, batch: 599, sum loss: 5350.069824, avg loss: 3.012427, ppl: 20.336695 +epoch: 1, batch: 600, sum loss: 5430.312500, avg loss: 3.015165, ppl: 20.392458 +epoch: 1, batch: 601, sum loss: 5413.724609, avg loss: 2.991008, ppl: 19.905743 +epoch: 1, batch: 602, sum loss: 3962.804932, avg loss: 2.731085, ppl: 15.349537 +epoch: 1, batch: 603, sum loss: 6062.642090, avg loss: 3.075922, ppl: 21.669847 +epoch: 1, batch: 604, sum loss: 4814.343750, avg loss: 2.792543, ppl: 16.322474 +epoch: 1, batch: 605, sum loss: 4446.138672, avg loss: 2.996050, ppl: 20.006363 +epoch: 1, batch: 606, sum loss: 4436.347656, avg loss: 2.631286, ppl: 13.891619 +epoch: 1, batch: 607, sum loss: 5155.723633, avg loss: 2.713539, ppl: 15.082557 +epoch: 1, batch: 608, sum loss: 4990.147461, avg loss: 2.989903, ppl: 19.883749 +epoch: 1, batch: 609, sum loss: 4500.977539, avg loss: 2.799115, ppl: 16.430107 +epoch: 1, batch: 610, sum loss: 5115.334961, avg loss: 2.903141, ppl: 18.231329 +epoch: 1, batch: 611, sum loss: 5050.350586, avg loss: 3.002587, ppl: 20.137558 +epoch: 1, batch: 612, sum loss: 5294.431641, avg loss: 3.087132, ppl: 21.914137 +epoch: 1, batch: 613, sum loss: 5299.608887, avg loss: 3.070457, ppl: 21.551748 +epoch: 1, batch: 614, sum loss: 4152.021484, avg loss: 2.708429, ppl: 15.005681 +epoch: 1, batch: 615, sum loss: 3844.735107, avg loss: 2.786040, ppl: 16.216675 +epoch: 1, batch: 616, sum loss: 4935.760742, avg loss: 2.849746, ppl: 17.283400 +epoch: 1, batch: 617, sum loss: 5271.380371, avg loss: 3.026051, ppl: 20.615652 +epoch: 1, batch: 618, sum loss: 4457.166016, avg loss: 2.719442, ppl: 15.171856 +epoch: 1, batch: 619, sum loss: 4707.689941, avg loss: 2.920403, ppl: 18.548765 +epoch: 1, batch: 620, sum loss: 4313.405762, avg loss: 2.576706, ppl: 13.153741 +epoch: 1, batch: 621, sum loss: 4037.862061, avg loss: 2.825656, ppl: 16.872004 +epoch: 1, batch: 622, sum loss: 3917.280273, avg loss: 2.533817, ppl: 12.601509 +epoch: 1, batch: 623, sum loss: 4146.810547, avg loss: 2.653110, ppl: 14.198123 +epoch: 1, batch: 624, sum loss: 4108.173340, avg loss: 2.790879, ppl: 16.295334 +epoch: 1, batch: 625, sum loss: 5737.640137, avg loss: 2.933354, ppl: 18.790546 +epoch: 1, batch: 626, sum loss: 5887.371582, avg loss: 2.917429, ppl: 18.493679 +epoch: 1, batch: 627, sum loss: 4785.675293, avg loss: 2.852011, ppl: 17.322590 +epoch: 1, batch: 628, sum loss: 5016.930664, avg loss: 2.807460, ppl: 16.567780 +epoch: 1, batch: 629, sum loss: 3914.961670, avg loss: 2.690695, ppl: 14.741922 +epoch: 1, batch: 630, sum loss: 5144.902344, avg loss: 2.890395, ppl: 18.000412 +epoch: 1, batch: 631, sum loss: 4669.358398, avg loss: 2.887668, ppl: 17.951391 +epoch: 1, batch: 632, sum loss: 4268.079590, avg loss: 2.834050, ppl: 17.014233 +epoch: 1, batch: 633, sum loss: 4740.491699, avg loss: 3.070267, ppl: 21.547655 +epoch: 1, batch: 634, sum loss: 4020.815186, avg loss: 2.652253, ppl: 14.185963 +epoch: 1, batch: 635, sum loss: 3910.562744, avg loss: 2.660247, ppl: 14.299819 +epoch: 1, batch: 636, sum loss: 4985.370605, avg loss: 2.799197, ppl: 16.431454 +epoch: 1, batch: 637, sum loss: 5054.971191, avg loss: 2.777457, ppl: 16.078079 +epoch: 1, batch: 638, sum loss: 4791.419434, avg loss: 2.909180, ppl: 18.341751 +epoch: 1, batch: 639, sum loss: 4481.385254, avg loss: 2.620693, ppl: 13.745249 +epoch: 1, batch: 640, sum loss: 5064.736816, avg loss: 3.009350, ppl: 20.274223 +epoch: 1, batch: 641, sum loss: 4442.090820, avg loss: 2.786757, ppl: 16.228306 +epoch: 1, batch: 642, sum loss: 4750.825684, avg loss: 2.786408, ppl: 16.222647 +epoch: 1, batch: 643, sum loss: 4446.705078, avg loss: 2.624973, ppl: 13.804206 +epoch: 1, batch: 644, sum loss: 4979.694336, avg loss: 2.881768, ppl: 17.845787 +epoch: 1, batch: 645, sum loss: 5241.871582, avg loss: 2.848843, ppl: 17.267794 +epoch: 1, batch: 646, sum loss: 4097.112305, avg loss: 2.966772, ppl: 19.429102 +epoch: 1, batch: 647, sum loss: 5433.751465, avg loss: 3.150001, ppl: 23.336088 +epoch: 1, batch: 648, sum loss: 4830.985840, avg loss: 2.770061, ppl: 15.959600 +epoch: 1, batch: 649, sum loss: 5139.981445, avg loss: 2.858721, ppl: 17.439217 +epoch: 1, batch: 650, sum loss: 4948.599609, avg loss: 2.761495, ppl: 15.823487 +epoch: 1, batch: 651, sum loss: 4633.821289, avg loss: 2.856856, ppl: 17.406719 +epoch: 1, batch: 652, sum loss: 4456.125000, avg loss: 2.790310, ppl: 16.286070 +epoch: 1, batch: 653, sum loss: 5476.895508, avg loss: 2.984684, ppl: 19.780260 +epoch: 1, batch: 654, sum loss: 4742.109863, avg loss: 2.812639, ppl: 16.653818 +epoch: 1, batch: 655, sum loss: 4966.508301, avg loss: 2.982888, ppl: 19.744757 +epoch: 1, batch: 656, sum loss: 4378.412598, avg loss: 2.686143, ppl: 14.674961 +epoch: 1, batch: 657, sum loss: 5427.111328, avg loss: 2.960781, ppl: 19.313047 +epoch: 1, batch: 658, sum loss: 4907.095703, avg loss: 2.770805, ppl: 15.971484 +epoch: 1, batch: 659, sum loss: 4629.520020, avg loss: 2.720047, ppl: 15.181036 +epoch: 1, batch: 660, sum loss: 4789.600586, avg loss: 2.824057, ppl: 16.845055 +epoch: 1, batch: 661, sum loss: 5514.109863, avg loss: 3.152721, ppl: 23.399658 +epoch: 1, batch: 662, sum loss: 4413.596680, avg loss: 2.486533, ppl: 12.019534 +epoch: 1, batch: 663, sum loss: 6035.191406, avg loss: 3.133537, ppl: 22.955019 +epoch: 1, batch: 664, sum loss: 3829.037598, avg loss: 2.592443, ppl: 13.362372 +epoch: 1, batch: 665, sum loss: 3839.500488, avg loss: 2.644284, ppl: 14.073365 +epoch: 1, batch: 666, sum loss: 4907.754883, avg loss: 2.880138, ppl: 17.816730 +epoch: 1, batch: 667, sum loss: 3826.228516, avg loss: 2.624299, ppl: 13.794905 +epoch: 1, batch: 668, sum loss: 5731.122559, avg loss: 3.017969, ppl: 20.449709 +epoch: 1, batch: 669, sum loss: 3346.618652, avg loss: 2.414588, ppl: 11.185158 +epoch: 1, batch: 670, sum loss: 4443.064941, avg loss: 2.638400, ppl: 13.990798 +epoch: 1, batch: 671, sum loss: 4108.447266, avg loss: 2.505151, ppl: 12.245405 +epoch: 1, batch: 672, sum loss: 4958.028809, avg loss: 3.023188, ppl: 20.556730 +epoch: 1, batch: 673, sum loss: 4604.036133, avg loss: 3.013113, ppl: 20.350649 +epoch: 1, batch: 674, sum loss: 4352.867188, avg loss: 2.514655, ppl: 12.362338 +epoch: 1, batch: 675, sum loss: 4963.347168, avg loss: 3.041267, ppl: 20.931740 +epoch: 1, batch: 676, sum loss: 4841.254883, avg loss: 2.914663, ppl: 18.442593 +epoch: 1, batch: 677, sum loss: 4921.852539, avg loss: 2.879961, ppl: 17.813570 +epoch: 1, batch: 678, sum loss: 4656.154785, avg loss: 2.608490, ppl: 13.578535 +epoch: 1, batch: 679, sum loss: 4720.838867, avg loss: 2.862849, ppl: 17.511354 +epoch: 1, batch: 680, sum loss: 5511.722656, avg loss: 2.753108, ppl: 15.691325 +epoch: 1, batch: 681, sum loss: 4412.469238, avg loss: 2.895321, ppl: 18.089310 +epoch: 1, batch: 682, sum loss: 3589.207764, avg loss: 2.714983, ppl: 15.104357 +epoch: 1, batch: 683, sum loss: 4850.831543, avg loss: 2.977797, ppl: 19.644493 +epoch: 1, batch: 684, sum loss: 4092.947998, avg loss: 2.616974, ppl: 13.694227 +epoch: 1, batch: 685, sum loss: 4638.477051, avg loss: 2.777531, ppl: 16.079275 +epoch: 1, batch: 686, sum loss: 5040.810059, avg loss: 2.691303, ppl: 14.750880 +epoch: 1, batch: 687, sum loss: 4908.765625, avg loss: 2.916676, ppl: 18.479755 +epoch: 1, batch: 688, sum loss: 5070.083984, avg loss: 2.854777, ppl: 17.370565 +epoch: 1, batch: 689, sum loss: 4976.571289, avg loss: 2.944717, ppl: 19.005278 +epoch: 1, batch: 690, sum loss: 4937.467773, avg loss: 2.744562, ppl: 15.557804 +epoch: 1, batch: 691, sum loss: 4883.030273, avg loss: 2.948690, ppl: 19.080936 +epoch: 1, batch: 692, sum loss: 4652.706055, avg loss: 2.994019, ppl: 19.965769 +epoch: 1, batch: 693, sum loss: 5022.853516, avg loss: 3.135364, ppl: 22.997011 +epoch: 1, batch: 694, sum loss: 4884.688965, avg loss: 2.839936, ppl: 17.114662 +epoch: 1, batch: 695, sum loss: 4341.410156, avg loss: 2.609021, ppl: 13.585740 +epoch: 1, batch: 696, sum loss: 4694.303223, avg loss: 2.831305, ppl: 16.967585 +epoch: 1, batch: 697, sum loss: 4176.317383, avg loss: 2.729619, ppl: 15.327050 +epoch: 1, batch: 698, sum loss: 4876.055664, avg loss: 2.871647, ppl: 17.666084 +epoch: 1, batch: 699, sum loss: 5036.593750, avg loss: 3.035922, ppl: 20.820156 +epoch: 1, batch: 700, sum loss: 4992.177246, avg loss: 3.064565, ppl: 21.425150 +epoch: 1, batch: 701, sum loss: 4789.980469, avg loss: 2.945867, ppl: 19.027157 +epoch: 1, batch: 702, sum loss: 4713.264648, avg loss: 2.870441, ppl: 17.644806 +epoch: 1, batch: 703, sum loss: 4760.762207, avg loss: 2.640467, ppl: 14.019752 +epoch: 1, batch: 704, sum loss: 6846.048340, avg loss: 3.329790, ppl: 27.932478 +epoch: 1, batch: 705, sum loss: 4777.377441, avg loss: 2.835239, ppl: 17.034470 +epoch: 1, batch: 706, sum loss: 5778.794922, avg loss: 3.065674, ppl: 21.448910 +epoch: 1, batch: 707, sum loss: 4305.879883, avg loss: 2.523962, ppl: 12.477940 +epoch: 1, batch: 708, sum loss: 4405.811523, avg loss: 2.722998, ppl: 15.225907 +epoch: 1, batch: 709, sum loss: 4762.146484, avg loss: 2.695046, ppl: 14.806203 +epoch: 1, batch: 710, sum loss: 4946.508301, avg loss: 2.954904, ppl: 19.199871 +epoch: 1, batch: 711, sum loss: 4716.307129, avg loss: 3.005932, ppl: 20.205036 +epoch: 1, batch: 712, sum loss: 4711.270020, avg loss: 2.769706, ppl: 15.953943 +epoch: 1, batch: 713, sum loss: 3771.014404, avg loss: 2.672583, ppl: 14.477314 +epoch: 1, batch: 714, sum loss: 4410.371094, avg loss: 2.727502, ppl: 15.294639 +epoch: 1, batch: 715, sum loss: 4990.407227, avg loss: 2.792617, ppl: 16.323689 +epoch: 1, batch: 716, sum loss: 4932.691895, avg loss: 3.024336, ppl: 20.580332 +epoch: 1, batch: 717, sum loss: 4528.553711, avg loss: 3.011006, ppl: 20.307827 +epoch: 1, batch: 718, sum loss: 4706.418945, avg loss: 2.986306, ppl: 19.812368 +epoch: 1, batch: 719, sum loss: 4793.727539, avg loss: 2.867062, ppl: 17.585278 +epoch: 1, batch: 720, sum loss: 4942.077148, avg loss: 2.737993, ppl: 15.455930 +epoch: 1, batch: 721, sum loss: 4680.174316, avg loss: 2.850289, ppl: 17.292780 +epoch: 1, batch: 722, sum loss: 4606.755859, avg loss: 2.802163, ppl: 16.480253 +epoch: 1, batch: 723, sum loss: 5091.710449, avg loss: 2.911212, ppl: 18.379068 +epoch: 1, batch: 724, sum loss: 4356.112305, avg loss: 2.760527, ppl: 15.808178 +epoch: 1, batch: 725, sum loss: 5207.787598, avg loss: 2.935619, ppl: 18.833151 +epoch: 1, batch: 726, sum loss: 4469.666016, avg loss: 2.879940, ppl: 17.813196 +epoch: 1, batch: 727, sum loss: 5602.701172, avg loss: 2.950343, ppl: 19.112507 +epoch: 1, batch: 728, sum loss: 5242.616699, avg loss: 2.935396, ppl: 18.828953 +epoch: 1, batch: 729, sum loss: 4345.160156, avg loss: 2.690502, ppl: 14.739068 +epoch: 1, batch: 730, sum loss: 3988.501465, avg loss: 2.726249, ppl: 15.275474 +epoch: 1, batch: 731, sum loss: 4552.209473, avg loss: 2.610212, ppl: 13.601933 +epoch: 1, batch: 732, sum loss: 4364.306152, avg loss: 2.772748, ppl: 16.002556 +epoch: 1, batch: 733, sum loss: 5715.081055, avg loss: 2.958117, ppl: 19.261660 +epoch: 1, batch: 734, sum loss: 3699.475098, avg loss: 2.482869, ppl: 11.975577 +epoch: 1, batch: 735, sum loss: 5027.299316, avg loss: 2.911001, ppl: 18.375191 +epoch: 1, batch: 736, sum loss: 4533.057617, avg loss: 2.994094, ppl: 19.967255 +epoch: 1, batch: 737, sum loss: 4183.209961, avg loss: 2.656006, ppl: 14.239305 +epoch: 1, batch: 738, sum loss: 5749.156738, avg loss: 2.764018, ppl: 15.863448 +epoch: 1, batch: 739, sum loss: 4914.055664, avg loss: 2.978215, ppl: 19.652714 +epoch: 1, batch: 740, sum loss: 5477.322754, avg loss: 2.932186, ppl: 18.768608 +epoch: 1, batch: 741, sum loss: 4263.613770, avg loss: 2.871120, ppl: 17.656786 +epoch: 1, batch: 742, sum loss: 3891.125244, avg loss: 2.521792, ppl: 12.450891 +epoch: 1, batch: 743, sum loss: 5581.633789, avg loss: 3.162399, ppl: 23.627205 +epoch: 1, batch: 744, sum loss: 4661.065430, avg loss: 2.978317, ppl: 19.654701 +epoch: 1, batch: 745, sum loss: 5394.051270, avg loss: 3.110756, ppl: 22.438005 +epoch: 1, batch: 746, sum loss: 4934.188965, avg loss: 2.720060, ppl: 15.181234 +epoch: 1, batch: 747, sum loss: 4299.141113, avg loss: 2.833976, ppl: 17.012962 +epoch: 1, batch: 748, sum loss: 4940.136230, avg loss: 2.660278, ppl: 14.300261 +epoch: 1, batch: 749, sum loss: 4642.320801, avg loss: 2.778169, ppl: 16.089540 +epoch: 1, batch: 750, sum loss: 6398.885742, avg loss: 3.061668, ppl: 21.363153 +epoch: 1, batch: 751, sum loss: 4354.126465, avg loss: 2.857038, ppl: 17.409887 +epoch: 1, batch: 752, sum loss: 5555.695312, avg loss: 3.001456, ppl: 20.114807 +epoch: 1, batch: 753, sum loss: 4760.823242, avg loss: 2.847383, ppl: 17.242592 +epoch: 1, batch: 754, sum loss: 3990.433105, avg loss: 2.720131, ppl: 15.182317 +epoch: 1, batch: 755, sum loss: 5100.152832, avg loss: 3.048507, ppl: 21.083853 +epoch: 1, batch: 756, sum loss: 3974.086182, avg loss: 2.688827, ppl: 14.714406 +epoch: 1, batch: 757, sum loss: 5007.437500, avg loss: 2.789659, ppl: 16.275465 +epoch: 1, batch: 758, sum loss: 4595.112305, avg loss: 2.610859, ppl: 13.610740 +epoch: 1, batch: 759, sum loss: 4902.913086, avg loss: 2.821009, ppl: 16.793781 +epoch: 1, batch: 760, sum loss: 5561.176270, avg loss: 3.115505, ppl: 22.544811 +epoch: 1, batch: 761, sum loss: 4286.942871, avg loss: 2.684373, ppl: 14.649009 +epoch: 1, batch: 762, sum loss: 4478.351562, avg loss: 2.863396, ppl: 17.520929 +epoch: 1, batch: 763, sum loss: 4034.020996, avg loss: 2.600916, ppl: 13.476078 +epoch: 1, batch: 764, sum loss: 4598.755859, avg loss: 3.088486, ppl: 21.943834 +epoch: 1, batch: 765, sum loss: 5684.389160, avg loss: 2.962162, ppl: 19.339739 +epoch: 1, batch: 766, sum loss: 5279.700195, avg loss: 2.944618, ppl: 19.003407 +epoch: 1, batch: 767, sum loss: 6138.822754, avg loss: 3.045051, ppl: 21.011105 +epoch: 1, batch: 768, sum loss: 4474.684082, avg loss: 2.740162, ppl: 15.489489 +epoch: 1, batch: 769, sum loss: 5125.649414, avg loss: 3.087741, ppl: 21.927481 +epoch: 1, batch: 770, sum loss: 4720.454590, avg loss: 2.711347, ppl: 15.049528 +epoch: 1, batch: 771, sum loss: 4899.493652, avg loss: 2.911167, ppl: 18.378227 +epoch: 1, batch: 772, sum loss: 5184.803711, avg loss: 3.075210, ppl: 21.654427 +epoch: 1, batch: 773, sum loss: 5583.162109, avg loss: 3.232867, ppl: 25.352249 +epoch: 1, batch: 774, sum loss: 4578.201660, avg loss: 2.693060, ppl: 14.776823 +epoch: 1, batch: 775, sum loss: 3817.250732, avg loss: 2.498201, ppl: 12.160593 +epoch: 1, batch: 776, sum loss: 4370.020508, avg loss: 2.743265, ppl: 15.537627 +epoch: 1, batch: 777, sum loss: 5469.264160, avg loss: 2.820662, ppl: 16.787966 +epoch: 1, batch: 778, sum loss: 4181.670410, avg loss: 2.623382, ppl: 13.782251 +epoch: 1, batch: 779, sum loss: 4217.691406, avg loss: 2.679600, ppl: 14.579268 +epoch: 1, batch: 780, sum loss: 4930.884766, avg loss: 2.621417, ppl: 13.755195 +epoch: 1, batch: 781, sum loss: 3617.215332, avg loss: 2.455679, ppl: 11.654346 +epoch: 1, batch: 782, sum loss: 4897.674805, avg loss: 2.834303, ppl: 17.018528 +epoch: 1, batch: 783, sum loss: 5034.296875, avg loss: 2.876741, ppl: 17.756310 +epoch: 1, batch: 784, sum loss: 4195.975586, avg loss: 2.654001, ppl: 14.210782 +epoch: 1, batch: 785, sum loss: 4547.617676, avg loss: 2.898418, ppl: 18.145416 +epoch: 1, batch: 786, sum loss: 4917.530273, avg loss: 2.918416, ppl: 18.511932 +epoch: 1, batch: 787, sum loss: 5289.497559, avg loss: 3.131733, ppl: 22.913664 +epoch: 1, batch: 788, sum loss: 4747.693848, avg loss: 2.947048, ppl: 19.049635 +epoch: 1, batch: 789, sum loss: 4296.862793, avg loss: 2.735113, ppl: 15.411490 +epoch: 1, batch: 790, sum loss: 4650.891602, avg loss: 2.737429, ppl: 15.447218 +epoch: 1, batch: 791, sum loss: 5111.186035, avg loss: 2.710067, ppl: 15.030283 +epoch: 1, batch: 792, sum loss: 3569.812500, avg loss: 2.351655, ppl: 10.502940 +epoch: 1, batch: 793, sum loss: 5345.781738, avg loss: 2.761251, ppl: 15.819616 +epoch: 1, batch: 794, sum loss: 3461.860840, avg loss: 2.377652, ppl: 10.779559 +epoch: 1, batch: 795, sum loss: 5412.379883, avg loss: 2.872813, ppl: 17.686705 +epoch: 1, batch: 796, sum loss: 4746.875000, avg loss: 2.910408, ppl: 18.364286 +epoch: 1, batch: 797, sum loss: 5483.333008, avg loss: 2.986565, ppl: 19.817486 +epoch: 1, batch: 798, sum loss: 5276.985840, avg loss: 3.091380, ppl: 22.007429 +epoch: 1, batch: 799, sum loss: 5318.929688, avg loss: 2.993207, ppl: 19.949568 +epoch: 1, batch: 800, sum loss: 4133.164551, avg loss: 2.538799, ppl: 12.664450 +epoch: 1, batch: 801, sum loss: 4166.542969, avg loss: 2.615532, ppl: 13.674495 +epoch: 1, batch: 802, sum loss: 4181.924805, avg loss: 2.904114, ppl: 18.249077 +epoch: 1, batch: 803, sum loss: 5109.407227, avg loss: 3.014400, ppl: 20.376852 +epoch: 1, batch: 804, sum loss: 4605.275391, avg loss: 2.769258, ppl: 15.946790 +epoch: 1, batch: 805, sum loss: 5076.915527, avg loss: 2.953412, ppl: 19.171261 +epoch: 1, batch: 806, sum loss: 4163.278320, avg loss: 2.680798, ppl: 14.596730 +epoch: 1, batch: 807, sum loss: 4267.461914, avg loss: 2.740823, ppl: 15.499741 +epoch: 1, batch: 808, sum loss: 5126.470703, avg loss: 2.809025, ppl: 16.593733 +epoch: 1, batch: 809, sum loss: 4455.536621, avg loss: 2.625537, ppl: 13.811992 +epoch: 1, batch: 810, sum loss: 4179.217773, avg loss: 2.825705, ppl: 16.872837 +epoch: 1, batch: 811, sum loss: 4712.039062, avg loss: 2.786540, ppl: 16.224785 +epoch: 1, batch: 812, sum loss: 4207.906738, avg loss: 2.772007, ppl: 15.990695 +epoch: 1, batch: 813, sum loss: 5598.965820, avg loss: 2.904028, ppl: 18.247501 +epoch: 1, batch: 814, sum loss: 4641.746582, avg loss: 2.720836, ppl: 15.193021 +epoch: 1, batch: 815, sum loss: 3901.907959, avg loss: 2.823378, ppl: 16.833612 +epoch: 1, batch: 816, sum loss: 4709.033691, avg loss: 2.921237, ppl: 18.564232 +epoch: 1, batch: 817, sum loss: 4575.955078, avg loss: 2.727029, ppl: 15.287406 +epoch: 1, batch: 818, sum loss: 4629.937500, avg loss: 2.849192, ppl: 17.273821 +epoch: 1, batch: 819, sum loss: 4239.402344, avg loss: 2.758232, ppl: 15.771932 +epoch: 1, batch: 820, sum loss: 4590.382812, avg loss: 2.737259, ppl: 15.444589 +epoch: 1, batch: 821, sum loss: 5005.016113, avg loss: 2.797661, ppl: 16.406233 +epoch: 1, batch: 822, sum loss: 4743.788574, avg loss: 2.777394, ppl: 16.077063 +epoch: 1, batch: 823, sum loss: 5352.676270, avg loss: 2.539220, ppl: 12.669789 +epoch: 1, batch: 824, sum loss: 4981.385254, avg loss: 2.792256, ppl: 16.317797 +epoch: 1, batch: 825, sum loss: 4977.377930, avg loss: 2.924429, ppl: 18.623583 +epoch: 1, batch: 826, sum loss: 4543.318359, avg loss: 2.735291, ppl: 15.414228 +epoch: 1, batch: 827, sum loss: 5041.017090, avg loss: 2.956608, ppl: 19.232630 +epoch: 1, batch: 828, sum loss: 6383.372070, avg loss: 3.260149, ppl: 26.053425 +epoch: 1, batch: 829, sum loss: 4650.826660, avg loss: 2.773301, ppl: 16.011406 +epoch: 1, batch: 830, sum loss: 4427.677734, avg loss: 2.678571, ppl: 14.564262 +epoch: 1, batch: 831, sum loss: 4400.302246, avg loss: 2.889233, ppl: 17.979521 +epoch: 1, batch: 832, sum loss: 4885.661133, avg loss: 2.782267, ppl: 16.155609 +epoch: 1, batch: 833, sum loss: 4594.632324, avg loss: 2.472891, ppl: 11.856679 +epoch: 1, batch: 834, sum loss: 4799.457520, avg loss: 2.733176, ppl: 15.381669 +epoch: 1, batch: 835, sum loss: 4520.219727, avg loss: 2.629564, ppl: 13.867716 +epoch: 1, batch: 836, sum loss: 5822.715332, avg loss: 3.004497, ppl: 20.176071 +epoch: 1, batch: 837, sum loss: 4158.914062, avg loss: 2.714696, ppl: 15.100018 +epoch: 1, batch: 838, sum loss: 4528.903320, avg loss: 2.802539, ppl: 16.486454 +epoch: 1, batch: 839, sum loss: 4083.269775, avg loss: 2.644605, ppl: 14.077882 +epoch: 1, batch: 840, sum loss: 5875.512207, avg loss: 3.002306, ppl: 20.131903 +epoch: 1, batch: 841, sum loss: 4551.914551, avg loss: 2.722437, ppl: 15.217364 +epoch: 1, batch: 842, sum loss: 5200.363281, avg loss: 2.905231, ppl: 18.269459 +epoch: 1, batch: 843, sum loss: 4736.641602, avg loss: 2.858565, ppl: 17.436481 +epoch: 1, batch: 844, sum loss: 4606.000977, avg loss: 2.830978, ppl: 16.962040 +epoch: 1, batch: 845, sum loss: 4631.693359, avg loss: 2.885790, ppl: 17.917719 +epoch: 1, batch: 846, sum loss: 4475.440430, avg loss: 2.694425, ppl: 14.797013 +epoch: 1, batch: 847, sum loss: 4634.667969, avg loss: 2.817427, ppl: 16.733742 +epoch: 1, batch: 848, sum loss: 4499.724121, avg loss: 2.808816, ppl: 16.590271 +epoch: 1, batch: 849, sum loss: 4704.358887, avg loss: 2.825441, ppl: 16.868380 +epoch: 1, batch: 850, sum loss: 4697.246094, avg loss: 2.852001, ppl: 17.322412 +epoch: 1, batch: 851, sum loss: 4258.700195, avg loss: 2.685183, ppl: 14.660885 +epoch: 1, batch: 852, sum loss: 4874.822754, avg loss: 2.860811, ppl: 17.475702 +epoch: 1, batch: 853, sum loss: 4365.827148, avg loss: 2.842335, ppl: 17.155785 +epoch: 1, batch: 854, sum loss: 3969.481201, avg loss: 2.644558, ppl: 14.077217 +epoch: 1, batch: 855, sum loss: 4605.521484, avg loss: 2.924140, ppl: 18.618216 +epoch: 1, batch: 856, sum loss: 3943.195801, avg loss: 2.684272, ppl: 14.647534 +epoch: 1, batch: 857, sum loss: 4518.901367, avg loss: 2.675489, ppl: 14.519454 +epoch: 1, batch: 858, sum loss: 4425.912598, avg loss: 2.677503, ppl: 14.548721 +epoch: 1, batch: 859, sum loss: 4282.678223, avg loss: 2.638742, ppl: 13.995585 +epoch: 1, batch: 860, sum loss: 4756.830078, avg loss: 2.916512, ppl: 18.476719 +epoch: 1, batch: 861, sum loss: 4303.264648, avg loss: 2.794328, ppl: 16.351633 +epoch: 1, batch: 862, sum loss: 5218.603516, avg loss: 2.976956, ppl: 19.627975 +epoch: 1, batch: 863, sum loss: 5888.561523, avg loss: 2.974021, ppl: 19.570450 +epoch: 1, batch: 864, sum loss: 4133.514160, avg loss: 2.673683, ppl: 14.493252 +epoch: 1, batch: 865, sum loss: 4573.441406, avg loss: 2.963993, ppl: 19.375189 +epoch: 1, batch: 866, sum loss: 5575.241211, avg loss: 2.848871, ppl: 17.268278 +epoch: 1, batch: 867, sum loss: 5068.000000, avg loss: 2.958552, ppl: 19.270050 +epoch: 1, batch: 868, sum loss: 4141.395996, avg loss: 2.706795, ppl: 14.981180 +epoch: 1, batch: 869, sum loss: 3599.165527, avg loss: 2.619480, ppl: 13.728584 +epoch: 1, batch: 870, sum loss: 4794.616699, avg loss: 2.698152, ppl: 14.852263 +epoch: 1, batch: 871, sum loss: 4451.494629, avg loss: 2.741068, ppl: 15.503536 +epoch: 1, batch: 872, sum loss: 3896.656738, avg loss: 2.738339, ppl: 15.461286 +epoch: 1, batch: 873, sum loss: 4506.673828, avg loss: 2.816671, ppl: 16.721096 +epoch: 1, batch: 874, sum loss: 5333.556641, avg loss: 3.070557, ppl: 21.553898 +epoch: 1, batch: 875, sum loss: 4688.489746, avg loss: 2.777541, ppl: 16.079439 +epoch: 1, batch: 876, sum loss: 6327.448242, avg loss: 3.394554, ppl: 29.801350 +epoch: 1, batch: 877, sum loss: 4854.297363, avg loss: 2.950941, ppl: 19.123934 +epoch: 1, batch: 878, sum loss: 5408.265625, avg loss: 3.016322, ppl: 20.416071 +epoch: 1, batch: 879, sum loss: 5308.381348, avg loss: 2.713896, ppl: 15.087948 +epoch: 1, batch: 880, sum loss: 4172.687500, avg loss: 2.662851, ppl: 14.337108 +epoch: 1, batch: 881, sum loss: 3407.099121, avg loss: 2.483309, ppl: 11.980838 +epoch: 1, batch: 882, sum loss: 3882.370605, avg loss: 2.637480, ppl: 13.977935 +epoch: 1, batch: 883, sum loss: 3933.747803, avg loss: 2.711060, ppl: 15.045212 +epoch: 1, batch: 884, sum loss: 5017.089355, avg loss: 2.731132, ppl: 15.350254 +epoch: 1, batch: 885, sum loss: 5015.026367, avg loss: 2.985135, ppl: 19.789171 +epoch: 1, batch: 886, sum loss: 4787.232422, avg loss: 2.786515, ppl: 16.224375 +epoch: 1, batch: 887, sum loss: 4241.950195, avg loss: 2.772516, ppl: 15.998844 +epoch: 1, batch: 888, sum loss: 5014.312500, avg loss: 2.711905, ppl: 15.057934 +epoch: 1, batch: 889, sum loss: 3938.891602, avg loss: 2.690500, ppl: 14.739037 +epoch: 1, batch: 890, sum loss: 3080.403076, avg loss: 2.417899, ppl: 11.222256 +epoch: 1, batch: 891, sum loss: 5736.973145, avg loss: 2.981795, ppl: 19.723185 +epoch: 1, batch: 892, sum loss: 4517.708008, avg loss: 2.584501, ppl: 13.256676 +epoch: 1, batch: 893, sum loss: 4846.875000, avg loss: 2.854461, ppl: 17.365078 +epoch: 1, batch: 894, sum loss: 4722.250977, avg loss: 2.962516, ppl: 19.346592 +epoch: 1, batch: 895, sum loss: 4486.433594, avg loss: 2.720700, ppl: 15.190946 +epoch: 1, batch: 896, sum loss: 5627.276367, avg loss: 3.230354, ppl: 25.288603 +epoch: 1, batch: 897, sum loss: 5455.082031, avg loss: 2.708581, ppl: 15.007964 +epoch: 1, batch: 898, sum loss: 4944.394043, avg loss: 2.810912, ppl: 16.625072 +epoch: 1, batch: 899, sum loss: 5091.829102, avg loss: 2.909617, ppl: 18.349764 +epoch: 1, batch: 900, sum loss: 6226.643066, avg loss: 3.059775, ppl: 21.322767 +epoch: 1, batch: 901, sum loss: 4474.309082, avg loss: 2.901627, ppl: 18.203739 +epoch: 1, batch: 902, sum loss: 4174.285156, avg loss: 2.536018, ppl: 12.629279 +epoch: 1, batch: 903, sum loss: 4147.165039, avg loss: 2.759258, ppl: 15.788124 +epoch: 1, batch: 904, sum loss: 3897.058105, avg loss: 2.575716, ppl: 13.140723 +epoch: 1, batch: 905, sum loss: 5047.025391, avg loss: 2.898923, ppl: 18.154589 +epoch: 1, batch: 906, sum loss: 4512.604004, avg loss: 2.679694, ppl: 14.580626 +epoch: 1, batch: 907, sum loss: 4162.496582, avg loss: 2.884613, ppl: 17.896641 +epoch: 1, batch: 908, sum loss: 4493.491699, avg loss: 2.777189, ppl: 16.073774 +epoch: 1, batch: 909, sum loss: 4605.163086, avg loss: 2.816613, ppl: 16.720131 +epoch: 1, batch: 910, sum loss: 4831.011230, avg loss: 2.682405, ppl: 14.620209 +epoch: 1, batch: 911, sum loss: 5016.648438, avg loss: 2.730892, ppl: 15.346573 +epoch: 1, batch: 912, sum loss: 4963.697754, avg loss: 2.860921, ppl: 17.477615 +epoch: 1, batch: 913, sum loss: 4259.356445, avg loss: 2.746200, ppl: 15.583304 +epoch: 1, batch: 914, sum loss: 4917.033203, avg loss: 2.835659, ppl: 17.041632 +epoch: 1, batch: 915, sum loss: 5082.243652, avg loss: 2.899169, ppl: 18.159058 +epoch: 1, batch: 916, sum loss: 4666.338867, avg loss: 3.047903, ppl: 21.071108 +epoch: 1, batch: 917, sum loss: 4609.497070, avg loss: 2.745382, ppl: 15.570565 +epoch: 1, batch: 918, sum loss: 4283.409180, avg loss: 2.635944, ppl: 13.956483 +epoch: 1, batch: 919, sum loss: 5016.015137, avg loss: 2.712826, ppl: 15.071805 +epoch: 1, batch: 920, sum loss: 4456.998047, avg loss: 2.864395, ppl: 17.538433 +epoch: 1, batch: 921, sum loss: 5475.108887, avg loss: 3.146614, ppl: 23.257189 +epoch: 1, batch: 922, sum loss: 4511.777832, avg loss: 2.793670, ppl: 16.340887 +epoch: 1, batch: 923, sum loss: 4768.278320, avg loss: 2.709249, ppl: 15.017993 +epoch: 1, batch: 924, sum loss: 4410.250000, avg loss: 2.570076, ppl: 13.066814 +epoch: 1, batch: 925, sum loss: 4646.916992, avg loss: 2.735090, ppl: 15.411123 +epoch: 1, batch: 926, sum loss: 4617.210938, avg loss: 2.617466, ppl: 13.700968 +epoch: 1, batch: 927, sum loss: 4912.762695, avg loss: 2.644113, ppl: 14.070963 +epoch: 1, batch: 928, sum loss: 4847.409180, avg loss: 2.920126, ppl: 18.543623 +epoch: 1, batch: 929, sum loss: 4978.119141, avg loss: 3.057813, ppl: 21.280958 +epoch: 1, batch: 930, sum loss: 4506.822754, avg loss: 2.684230, ppl: 14.646924 +epoch: 1, batch: 931, sum loss: 4351.805176, avg loss: 2.982732, ppl: 19.741669 +epoch: 1, batch: 932, sum loss: 5201.213379, avg loss: 2.960281, ppl: 19.303389 +epoch: 1, batch: 933, sum loss: 4535.972656, avg loss: 2.840309, ppl: 17.121050 +epoch: 1, batch: 934, sum loss: 4499.408691, avg loss: 2.833381, ppl: 17.002850 +epoch: 1, batch: 935, sum loss: 5579.440430, avg loss: 2.990054, ppl: 19.886749 +epoch: 1, batch: 936, sum loss: 5499.741211, avg loss: 3.052021, ppl: 21.158056 +epoch: 1, batch: 937, sum loss: 5121.772461, avg loss: 2.930076, ppl: 18.729052 +epoch: 1, batch: 938, sum loss: 4890.365234, avg loss: 2.897136, ppl: 18.122169 +epoch: 1, batch: 939, sum loss: 4382.133789, avg loss: 2.627179, ppl: 13.834682 +epoch: 1, batch: 940, sum loss: 4153.875000, avg loss: 2.440585, ppl: 11.479751 +epoch: 1, batch: 941, sum loss: 4585.102051, avg loss: 2.872871, ppl: 17.687725 +epoch: 1, batch: 942, sum loss: 4208.064453, avg loss: 2.894130, ppl: 18.067766 +epoch: 1, batch: 943, sum loss: 3645.911133, avg loss: 2.556740, ppl: 12.893713 +epoch: 1, batch: 944, sum loss: 4667.044434, avg loss: 2.929720, ppl: 18.722395 +epoch: 1, batch: 945, sum loss: 4677.114746, avg loss: 2.926855, ppl: 18.668831 +epoch: 1, batch: 946, sum loss: 4174.555176, avg loss: 2.816839, ppl: 16.723902 +epoch: 1, batch: 947, sum loss: 3971.352051, avg loss: 2.701600, ppl: 14.903560 +epoch: 1, batch: 948, sum loss: 2803.363281, avg loss: 2.143244, ppl: 8.527057 +epoch: 1, batch: 949, sum loss: 4568.205566, avg loss: 2.688761, ppl: 14.713442 +epoch: 1, batch: 950, sum loss: 4374.026367, avg loss: 2.782459, ppl: 16.158714 +epoch: 1, batch: 951, sum loss: 5185.125000, avg loss: 2.981671, ppl: 19.720736 +epoch: 1, batch: 952, sum loss: 4645.092285, avg loss: 2.556463, ppl: 12.890138 +epoch: 1, batch: 953, sum loss: 5831.710449, avg loss: 2.854484, ppl: 17.365471 +epoch: 1, batch: 954, sum loss: 5265.011719, avg loss: 2.902432, ppl: 18.218403 +epoch: 1, batch: 955, sum loss: 4086.795654, avg loss: 2.818480, ppl: 16.751366 +epoch: 1, batch: 956, sum loss: 4313.395996, avg loss: 2.800906, ppl: 16.459558 +epoch: 1, batch: 957, sum loss: 5270.347656, avg loss: 2.902174, ppl: 18.213694 +epoch: 1, batch: 958, sum loss: 5287.772461, avg loss: 2.603532, ppl: 13.511371 +epoch: 1, batch: 959, sum loss: 5730.509766, avg loss: 2.908888, ppl: 18.336403 +epoch: 1, batch: 960, sum loss: 5240.916992, avg loss: 3.093812, ppl: 22.061005 +epoch: 1, batch: 961, sum loss: 4614.393066, avg loss: 2.820534, ppl: 16.785809 +epoch: 1, batch: 962, sum loss: 4728.340820, avg loss: 2.973799, ppl: 19.566114 +epoch: 1, batch: 963, sum loss: 5990.116699, avg loss: 3.056182, ppl: 21.246283 +epoch: 1, batch: 964, sum loss: 4641.417969, avg loss: 2.873943, ppl: 17.706699 +epoch: 1, batch: 965, sum loss: 4955.936035, avg loss: 2.869679, ppl: 17.631361 +epoch: 1, batch: 966, sum loss: 5490.488770, avg loss: 3.205189, ppl: 24.660160 +epoch: 1, batch: 967, sum loss: 4246.573242, avg loss: 2.632717, ppl: 13.911521 +epoch: 1, batch: 968, sum loss: 3798.175293, avg loss: 2.667258, ppl: 14.400430 +epoch: 1, batch: 969, sum loss: 4014.714355, avg loss: 2.692632, ppl: 14.770504 +epoch: 1, batch: 970, sum loss: 5159.269531, avg loss: 2.779779, ppl: 16.115454 +epoch: 1, batch: 971, sum loss: 4057.343262, avg loss: 2.703094, ppl: 14.925834 +epoch: 1, batch: 972, sum loss: 6143.786133, avg loss: 3.155514, ppl: 23.465107 +epoch: 1, batch: 973, sum loss: 5537.522461, avg loss: 2.833942, ppl: 17.012390 +epoch: 1, batch: 974, sum loss: 3900.700195, avg loss: 2.628504, ppl: 13.853031 +epoch: 1, batch: 975, sum loss: 4486.883301, avg loss: 2.704571, ppl: 14.947903 +epoch: 1, batch: 976, sum loss: 4567.741211, avg loss: 2.887321, ppl: 17.945162 +epoch: 1, batch: 977, sum loss: 5916.049805, avg loss: 3.257736, ppl: 25.990639 +epoch: 1, batch: 978, sum loss: 4345.904297, avg loss: 2.597672, ppl: 13.432425 +epoch: 1, batch: 979, sum loss: 3729.518555, avg loss: 2.452018, ppl: 11.611751 +epoch: 1, batch: 980, sum loss: 5625.110352, avg loss: 3.012914, ppl: 20.346603 +epoch: 1, batch: 981, sum loss: 4614.720703, avg loss: 2.673651, ppl: 14.492779 +epoch: 1, batch: 982, sum loss: 4618.428223, avg loss: 2.680458, ppl: 14.591769 +epoch: 1, batch: 983, sum loss: 4295.083496, avg loss: 2.701310, ppl: 14.899242 +epoch: 1, batch: 984, sum loss: 5179.047852, avg loss: 3.037565, ppl: 20.854395 +epoch: 1, batch: 985, sum loss: 5147.645996, avg loss: 2.906632, ppl: 18.295084 +epoch: 1, batch: 986, sum loss: 4290.733398, avg loss: 2.493163, ppl: 12.099484 +epoch: 1, batch: 987, sum loss: 5082.093750, avg loss: 3.030467, ppl: 20.706905 +epoch: 1, batch: 988, sum loss: 4477.627441, avg loss: 2.598739, ppl: 13.446773 +epoch: 1, batch: 989, sum loss: 4435.693848, avg loss: 2.460174, ppl: 11.706849 +epoch: 1, batch: 990, sum loss: 4243.650879, avg loss: 2.709866, ppl: 15.027266 +epoch: 1, batch: 991, sum loss: 4861.289062, avg loss: 2.760528, ppl: 15.808182 +epoch: 1, batch: 992, sum loss: 5305.099609, avg loss: 3.077204, ppl: 21.697657 +epoch: 1, batch: 993, sum loss: 4562.114258, avg loss: 2.768273, ppl: 15.931103 +epoch: 1, batch: 994, sum loss: 4953.515137, avg loss: 2.861649, ppl: 17.490349 +epoch: 1, batch: 995, sum loss: 5260.413086, avg loss: 2.937137, ppl: 18.861774 +epoch: 1, batch: 996, sum loss: 3398.805908, avg loss: 2.327949, ppl: 10.256886 +epoch: 1, batch: 997, sum loss: 3377.263672, avg loss: 2.355135, ppl: 10.539551 +epoch: 1, batch: 998, sum loss: 3899.522949, avg loss: 2.461820, ppl: 11.726135 +epoch: 1, batch: 999, sum loss: 5027.367676, avg loss: 2.841926, ppl: 17.148767 +epoch: 1, batch: 1000, sum loss: 3821.612305, avg loss: 2.457629, ppl: 11.677089 +epoch: 1, batch: 1001, sum loss: 4426.408203, avg loss: 2.564547, ppl: 12.994771 +epoch: 1, batch: 1002, sum loss: 4337.290039, avg loss: 2.783883, ppl: 16.181738 +epoch: 1, batch: 1003, sum loss: 4251.857910, avg loss: 2.771746, ppl: 15.986517 +epoch: 1, batch: 1004, sum loss: 4372.063965, avg loss: 2.782982, ppl: 16.167154 +epoch: 1, batch: 1005, sum loss: 4410.570312, avg loss: 2.663388, ppl: 14.344804 +epoch: 1, batch: 1006, sum loss: 5107.271484, avg loss: 2.923452, ppl: 18.605410 +epoch: 1, batch: 1007, sum loss: 5281.462891, avg loss: 3.179689, ppl: 24.039268 +epoch: 1, batch: 1008, sum loss: 4865.319336, avg loss: 3.100905, ppl: 22.218042 +epoch: 1, batch: 1009, sum loss: 4658.591309, avg loss: 2.811461, ppl: 16.634207 +epoch: 1, batch: 1010, sum loss: 4674.227051, avg loss: 2.770733, ppl: 15.970342 +epoch: 1, batch: 1011, sum loss: 4697.825684, avg loss: 2.676824, ppl: 14.538839 +epoch: 1, batch: 1012, sum loss: 5158.933594, avg loss: 2.932879, ppl: 18.781620 +epoch: 1, batch: 1013, sum loss: 3895.025635, avg loss: 2.760472, ppl: 15.807300 +epoch: 1, batch: 1014, sum loss: 5185.264648, avg loss: 3.004209, ppl: 20.170252 +epoch: 1, batch: 1015, sum loss: 4222.597168, avg loss: 2.765290, ppl: 15.883638 +epoch: 1, batch: 1016, sum loss: 4699.654297, avg loss: 2.884993, ppl: 17.903448 +epoch: 1, batch: 1017, sum loss: 4325.447754, avg loss: 2.548879, ppl: 12.792757 +epoch: 1, batch: 1018, sum loss: 4457.425293, avg loss: 2.789378, ppl: 16.270891 +epoch: 1, batch: 1019, sum loss: 3811.197266, avg loss: 2.571658, ppl: 13.087507 +epoch: 1, batch: 1020, sum loss: 4818.102539, avg loss: 2.859408, ppl: 17.451191 +epoch: 1, batch: 1021, sum loss: 5297.280273, avg loss: 2.962685, ppl: 19.349850 +epoch: 1, batch: 1022, sum loss: 4401.124023, avg loss: 2.914652, ppl: 18.442387 +epoch: 1, batch: 1023, sum loss: 5184.578125, avg loss: 2.925834, ppl: 18.649776 +epoch: 1, batch: 1024, sum loss: 4473.670898, avg loss: 2.732847, ppl: 15.376605 +epoch: 1, batch: 1025, sum loss: 4913.721680, avg loss: 2.976210, ppl: 19.613335 +epoch: 1, batch: 1026, sum loss: 4598.336426, avg loss: 2.856110, ppl: 17.393726 +epoch: 1, batch: 1027, sum loss: 5665.966797, avg loss: 2.986804, ppl: 19.822226 +epoch: 1, batch: 1028, sum loss: 4227.887207, avg loss: 2.622759, ppl: 13.773671 +epoch: 1, batch: 1029, sum loss: 5370.362793, avg loss: 2.942665, ppl: 18.966316 +epoch: 1, batch: 1030, sum loss: 4108.766602, avg loss: 2.627089, ppl: 13.833436 +epoch: 1, batch: 1031, sum loss: 5785.591797, avg loss: 2.862737, ppl: 17.509392 +epoch: 1, batch: 1032, sum loss: 4176.371094, avg loss: 2.751233, ppl: 15.661925 +epoch: 1, batch: 1033, sum loss: 5022.480469, avg loss: 3.066227, ppl: 21.460783 +epoch: 1, batch: 1034, sum loss: 5743.080078, avg loss: 3.131450, ppl: 22.907175 +epoch: 1, batch: 1035, sum loss: 4793.934570, avg loss: 2.601158, ppl: 13.479340 +epoch: 1, batch: 1036, sum loss: 4703.507812, avg loss: 2.545188, ppl: 12.745626 +epoch: 1, batch: 1037, sum loss: 4641.181641, avg loss: 2.564189, ppl: 12.990119 +epoch: 1, batch: 1038, sum loss: 4858.029297, avg loss: 2.738461, ppl: 15.463161 +epoch: 1, batch: 1039, sum loss: 3823.429688, avg loss: 2.996418, ppl: 20.013729 +epoch: 1, batch: 1040, sum loss: 4694.936523, avg loss: 2.658514, ppl: 14.275067 +epoch: 1, batch: 1041, sum loss: 4589.150879, avg loss: 2.798263, ppl: 16.416105 +epoch: 1, batch: 1042, sum loss: 4230.148438, avg loss: 2.665500, ppl: 14.375137 +epoch: 1, batch: 1043, sum loss: 4063.304932, avg loss: 2.485202, ppl: 12.003543 +epoch: 1, batch: 1044, sum loss: 4839.663086, avg loss: 2.740466, ppl: 15.494205 +epoch: 1, batch: 1045, sum loss: 4786.988281, avg loss: 2.933204, ppl: 18.787724 +epoch: 1, batch: 1046, sum loss: 4620.915527, avg loss: 3.018234, ppl: 20.455132 +epoch: 1, batch: 1047, sum loss: 4268.925781, avg loss: 2.686549, ppl: 14.680921 +epoch: 1, batch: 1048, sum loss: 5712.600098, avg loss: 2.754388, ppl: 15.711416 +epoch: 1, batch: 1049, sum loss: 4650.306641, avg loss: 2.751661, ppl: 15.668633 +epoch: 1, batch: 1050, sum loss: 4767.741699, avg loss: 2.959492, ppl: 19.288176 +epoch: 1, batch: 1051, sum loss: 4643.321289, avg loss: 2.726554, ppl: 15.280140 +epoch: 1, batch: 1052, sum loss: 5190.852539, avg loss: 2.917849, ppl: 18.501440 +epoch: 1, batch: 1053, sum loss: 4404.439941, avg loss: 2.912990, ppl: 18.411760 +epoch: 1, batch: 1054, sum loss: 3643.507324, avg loss: 2.539029, ppl: 12.667370 +epoch: 1, batch: 1055, sum loss: 4233.585449, avg loss: 2.662632, ppl: 14.333970 +epoch: 1, batch: 1056, sum loss: 4854.354492, avg loss: 2.807608, ppl: 16.570236 +epoch: 1, batch: 1057, sum loss: 4800.763672, avg loss: 3.026963, ppl: 20.634476 +epoch: 1, batch: 1058, sum loss: 4572.742188, avg loss: 2.700970, ppl: 14.894175 +epoch: 1, batch: 1059, sum loss: 5749.180176, avg loss: 2.995925, ppl: 20.003855 +epoch: 1, batch: 1060, sum loss: 5714.570312, avg loss: 2.798516, ppl: 16.420265 +epoch: 1, batch: 1061, sum loss: 4266.533203, avg loss: 2.765090, ppl: 15.880466 +epoch: 1, batch: 1062, sum loss: 4202.129395, avg loss: 2.793969, ppl: 16.345766 +epoch: 1, batch: 1063, sum loss: 4387.010254, avg loss: 2.839489, ppl: 17.107021 +epoch: 1, batch: 1064, sum loss: 4515.383789, avg loss: 3.116207, ppl: 22.560648 +epoch: 1, batch: 1065, sum loss: 4406.425293, avg loss: 2.803070, ppl: 16.495203 +epoch: 1, batch: 1066, sum loss: 4643.715820, avg loss: 2.790695, ppl: 16.292334 +epoch: 1, batch: 1067, sum loss: 4923.591797, avg loss: 2.918549, ppl: 18.514400 +epoch: 1, batch: 1068, sum loss: 4476.351074, avg loss: 2.534740, ppl: 12.613151 +epoch: 1, batch: 1069, sum loss: 4816.285156, avg loss: 2.823145, ppl: 16.829691 +epoch: 1, batch: 1070, sum loss: 4576.181152, avg loss: 2.673003, ppl: 14.483397 +epoch: 1, batch: 1071, sum loss: 4630.520020, avg loss: 2.941880, ppl: 18.951450 +epoch: 1, batch: 1072, sum loss: 4778.467773, avg loss: 2.774952, ppl: 16.037861 +epoch: 1, batch: 1073, sum loss: 4464.301270, avg loss: 2.692582, ppl: 14.769764 +epoch: 1, batch: 1074, sum loss: 4557.814453, avg loss: 2.636099, ppl: 13.958639 +epoch: 1, batch: 1075, sum loss: 5212.347656, avg loss: 2.946494, ppl: 19.039087 +epoch: 1, batch: 1076, sum loss: 4574.056641, avg loss: 2.885840, ppl: 17.918615 +epoch: 1, batch: 1077, sum loss: 5137.042969, avg loss: 2.719451, ppl: 15.171989 +epoch: 1, batch: 1078, sum loss: 5333.465820, avg loss: 3.009857, ppl: 20.284492 +epoch: 1, batch: 1079, sum loss: 4152.248535, avg loss: 2.534950, ppl: 12.615800 +epoch: 1, batch: 1080, sum loss: 5136.686523, avg loss: 2.941974, ppl: 18.953226 +epoch: 1, batch: 1081, sum loss: 4745.935547, avg loss: 2.808246, ppl: 16.580809 +epoch: 1, batch: 1082, sum loss: 4927.555664, avg loss: 3.039824, ppl: 20.901575 +epoch: 1, batch: 1083, sum loss: 4751.496094, avg loss: 2.907892, ppl: 18.318151 +epoch: 1, batch: 1084, sum loss: 5319.787598, avg loss: 2.772166, ppl: 15.993246 +epoch: 1, batch: 1085, sum loss: 4170.229492, avg loss: 2.603139, ppl: 13.506072 +epoch: 1, batch: 1086, sum loss: 4590.855469, avg loss: 2.855010, ppl: 17.374603 +epoch: 1, batch: 1087, sum loss: 4582.558594, avg loss: 2.760577, ppl: 15.808969 +epoch: 1, batch: 1088, sum loss: 5216.997559, avg loss: 2.696123, ppl: 14.822149 +epoch: 1, batch: 1089, sum loss: 4422.055664, avg loss: 2.721265, ppl: 15.199538 +epoch: 1, batch: 1090, sum loss: 4683.776855, avg loss: 2.811391, ppl: 16.633032 +epoch: 1, batch: 1091, sum loss: 3733.922363, avg loss: 2.568035, ppl: 13.040171 +epoch: 1, batch: 1092, sum loss: 4713.789551, avg loss: 2.824320, ppl: 16.849482 +epoch: 1, batch: 1093, sum loss: 5350.653320, avg loss: 3.036693, ppl: 20.836231 +epoch: 1, batch: 1094, sum loss: 4709.123535, avg loss: 2.745845, ppl: 15.577765 +epoch: 1, batch: 1095, sum loss: 4004.891602, avg loss: 2.614159, ppl: 13.655728 +epoch: 1, batch: 1096, sum loss: 4978.142578, avg loss: 2.880870, ppl: 17.829771 +epoch: 1, batch: 1097, sum loss: 4104.530273, avg loss: 2.557340, ppl: 12.901449 +epoch: 1, batch: 1098, sum loss: 5682.604492, avg loss: 3.088372, ppl: 21.941322 +epoch: 1, batch: 1099, sum loss: 4508.129883, avg loss: 2.891680, ppl: 18.023573 +epoch: 1, batch: 1100, sum loss: 4922.221680, avg loss: 2.768404, ppl: 15.933181 +epoch: 1, batch: 1101, sum loss: 4771.428223, avg loss: 2.818327, ppl: 16.748814 +epoch: 1, batch: 1102, sum loss: 4703.590332, avg loss: 2.813152, ppl: 16.662361 +epoch: 1, batch: 1103, sum loss: 5133.426758, avg loss: 3.012574, ppl: 20.339695 +epoch: 1, batch: 1104, sum loss: 4546.528809, avg loss: 2.818679, ppl: 16.754702 +epoch: 1, batch: 1105, sum loss: 4997.671875, avg loss: 2.882164, ppl: 17.852856 +epoch: 1, batch: 1106, sum loss: 5125.279297, avg loss: 2.887481, ppl: 17.948046 +epoch: 1, batch: 1107, sum loss: 3729.742432, avg loss: 2.559878, ppl: 12.934240 +epoch: 1, batch: 1108, sum loss: 3840.176514, avg loss: 2.657562, ppl: 14.261471 +epoch: 1, batch: 1109, sum loss: 4344.039551, avg loss: 2.790006, ppl: 16.281120 +epoch: 1, batch: 1110, sum loss: 5703.156250, avg loss: 2.749834, ppl: 15.640040 +epoch: 1, batch: 1111, sum loss: 4947.133301, avg loss: 2.833410, ppl: 17.003340 +epoch: 1, batch: 1112, sum loss: 4868.837891, avg loss: 2.769532, ppl: 15.951170 +epoch: 1, batch: 1113, sum loss: 4880.268555, avg loss: 2.845638, ppl: 17.212530 +epoch: 1, batch: 1114, sum loss: 4760.931641, avg loss: 2.784171, ppl: 16.186388 +epoch: 1, batch: 1115, sum loss: 5465.328125, avg loss: 3.255109, ppl: 25.922441 +epoch: 1, batch: 1116, sum loss: 4745.102051, avg loss: 2.620156, ppl: 13.737864 +epoch: 1, batch: 1117, sum loss: 4900.021484, avg loss: 2.641521, ppl: 14.034534 +epoch: 1, batch: 1118, sum loss: 5129.279297, avg loss: 2.760645, ppl: 15.810043 +epoch: 1, batch: 1119, sum loss: 3986.606934, avg loss: 2.650669, ppl: 14.163516 +epoch: 1, batch: 1120, sum loss: 5040.635254, avg loss: 2.748438, ppl: 15.618216 +epoch: 1, batch: 1121, sum loss: 4261.245605, avg loss: 2.955094, ppl: 19.203529 +epoch: 1, batch: 1122, sum loss: 5449.748535, avg loss: 2.863767, ppl: 17.527428 +epoch: 1, batch: 1123, sum loss: 4970.322266, avg loss: 2.874680, ppl: 17.719757 +epoch: 1, batch: 1124, sum loss: 4282.276367, avg loss: 2.874011, ppl: 17.707903 +epoch: 1, batch: 1125, sum loss: 4621.744629, avg loss: 3.118586, ppl: 22.614388 +epoch: 1, batch: 1126, sum loss: 4649.867188, avg loss: 2.676953, ppl: 14.540721 +epoch: 1, batch: 1127, sum loss: 5936.687012, avg loss: 3.071230, ppl: 21.568415 +epoch: 1, batch: 1128, sum loss: 4428.868652, avg loss: 2.723781, ppl: 15.237833 +epoch: 1, batch: 1129, sum loss: 4967.939941, avg loss: 3.097219, ppl: 22.136314 +epoch: 1, batch: 1130, sum loss: 4814.535645, avg loss: 2.771754, ppl: 15.986643 +epoch: 1, batch: 1131, sum loss: 4569.047363, avg loss: 2.599003, ppl: 13.450322 +epoch: 1, batch: 1132, sum loss: 4483.387695, avg loss: 2.656035, ppl: 14.239722 +epoch: 1, batch: 1133, sum loss: 6183.584961, avg loss: 3.161342, ppl: 23.602247 +epoch: 1, batch: 1134, sum loss: 4260.613281, avg loss: 2.623530, ppl: 13.784302 +epoch: 1, batch: 1135, sum loss: 4717.356445, avg loss: 2.703356, ppl: 14.929752 +epoch: 1, batch: 1136, sum loss: 4979.720703, avg loss: 2.832606, ppl: 16.989672 +epoch: 1, batch: 1137, sum loss: 4953.334961, avg loss: 2.946660, ppl: 19.042238 +epoch: 1, batch: 1138, sum loss: 5026.703125, avg loss: 2.995651, ppl: 19.998384 +epoch: 1, batch: 1139, sum loss: 5221.505371, avg loss: 2.786289, ppl: 16.220709 +epoch: 1, batch: 1140, sum loss: 4901.463379, avg loss: 2.926247, ppl: 18.657475 +epoch: 1, batch: 1141, sum loss: 4713.518555, avg loss: 2.761288, ppl: 15.820205 +epoch: 1, batch: 1142, sum loss: 4146.982910, avg loss: 2.566202, ppl: 13.016296 +epoch: 1, batch: 1143, sum loss: 5500.559082, avg loss: 2.867862, ppl: 17.599350 +epoch: 1, batch: 1144, sum loss: 4832.358398, avg loss: 2.814420, ppl: 16.683489 +epoch: 1, batch: 1145, sum loss: 4872.857422, avg loss: 3.028501, ppl: 20.666222 +epoch: 1, batch: 1146, sum loss: 4553.791992, avg loss: 2.733368, ppl: 15.384622 +epoch: 1, batch: 1147, sum loss: 5179.310547, avg loss: 2.985193, ppl: 19.790331 +epoch: 1, batch: 1148, sum loss: 4495.375000, avg loss: 2.687014, ppl: 14.687757 +epoch: 1, batch: 1149, sum loss: 4303.060059, avg loss: 2.679365, ppl: 14.575833 +epoch: 1, batch: 1150, sum loss: 4046.259033, avg loss: 2.660262, ppl: 14.300040 +epoch: 1, batch: 1151, sum loss: 3621.793457, avg loss: 2.509905, ppl: 12.303765 +epoch: 1, batch: 1152, sum loss: 4581.341309, avg loss: 2.685429, ppl: 14.664486 +epoch: 1, batch: 1153, sum loss: 5115.307617, avg loss: 3.017881, ppl: 20.447910 +epoch: 1, batch: 1154, sum loss: 4796.369629, avg loss: 2.860089, ppl: 17.463081 +epoch: 1, batch: 1155, sum loss: 4441.891113, avg loss: 2.795400, ppl: 16.369181 +epoch: 1, batch: 1156, sum loss: 5327.486816, avg loss: 2.712570, ppl: 15.067946 +epoch: 1, batch: 1157, sum loss: 4547.728516, avg loss: 2.928351, ppl: 18.696768 +epoch: 1, batch: 1158, sum loss: 5145.965332, avg loss: 3.072218, ppl: 21.589745 +epoch: 1, batch: 1159, sum loss: 4512.913086, avg loss: 2.728484, ppl: 15.309666 +epoch: 1, batch: 1160, sum loss: 4183.958008, avg loss: 2.770833, ppl: 15.971937 +epoch: 1, batch: 1161, sum loss: 5025.495605, avg loss: 2.966644, ppl: 19.426620 +epoch: 1, batch: 1162, sum loss: 4631.993164, avg loss: 2.762071, ppl: 15.832597 +epoch: 1, batch: 1163, sum loss: 4876.392578, avg loss: 2.713630, ppl: 15.083927 +epoch: 1, batch: 1164, sum loss: 4572.048340, avg loss: 2.861107, ppl: 17.480862 +epoch: 1, batch: 1165, sum loss: 3987.610107, avg loss: 2.707135, ppl: 14.986281 +epoch: 1, batch: 1166, sum loss: 5098.392090, avg loss: 2.854643, ppl: 17.368233 +epoch: 1, batch: 1167, sum loss: 3988.551270, avg loss: 2.569943, ppl: 13.065079 +epoch: 1, batch: 1168, sum loss: 5307.707031, avg loss: 2.892483, ppl: 18.038052 +epoch: 1, batch: 1169, sum loss: 5047.063965, avg loss: 3.075603, ppl: 21.662931 +epoch: 1, batch: 1170, sum loss: 4711.030762, avg loss: 2.753379, ppl: 15.695576 +epoch: 1, batch: 1171, sum loss: 4330.179199, avg loss: 2.582098, ppl: 13.224861 +epoch: 1, batch: 1172, sum loss: 5859.870117, avg loss: 3.128601, ppl: 22.842009 +epoch: 1, batch: 1173, sum loss: 4984.142578, avg loss: 3.052139, ppl: 21.160549 +epoch: 1, batch: 1174, sum loss: 5818.094727, avg loss: 3.096378, ppl: 22.117702 +epoch: 1, batch: 1175, sum loss: 3941.750000, avg loss: 2.440712, ppl: 11.481215 +epoch: 1, batch: 1176, sum loss: 4518.329590, avg loss: 2.539814, ppl: 12.677316 +epoch: 1, batch: 1177, sum loss: 4276.684570, avg loss: 2.858747, ppl: 17.439653 +epoch: 1, batch: 1178, sum loss: 4780.448730, avg loss: 2.803782, ppl: 16.506966 +epoch: 1, batch: 1179, sum loss: 5666.566895, avg loss: 2.911905, ppl: 18.391802 +epoch: 1, batch: 1180, sum loss: 5667.252441, avg loss: 3.030616, ppl: 20.709991 +epoch: 1, batch: 1181, sum loss: 4443.770508, avg loss: 2.817863, ppl: 16.741045 +epoch: 1, batch: 1182, sum loss: 2932.351074, avg loss: 2.397671, ppl: 10.997528 +epoch: 1, batch: 1183, sum loss: 4122.878906, avg loss: 2.808501, ppl: 16.585032 +epoch: 1, batch: 1184, sum loss: 5435.179199, avg loss: 2.933178, ppl: 18.787245 +epoch: 1, batch: 1185, sum loss: 5209.916504, avg loss: 3.044954, ppl: 21.009071 +epoch: 1, batch: 1186, sum loss: 4469.279297, avg loss: 2.715237, ppl: 15.108185 +epoch: 1, batch: 1187, sum loss: 4987.936523, avg loss: 2.883200, ppl: 17.871372 +epoch: 1, batch: 1188, sum loss: 4914.366699, avg loss: 2.873899, ppl: 17.705910 +epoch: 1, batch: 1189, sum loss: 4500.655273, avg loss: 2.644333, ppl: 14.074059 +epoch: 1, batch: 1190, sum loss: 4141.166992, avg loss: 2.661418, ppl: 14.316582 +epoch: 1, batch: 1191, sum loss: 4564.108887, avg loss: 2.742854, ppl: 15.531250 +epoch: 1, batch: 1192, sum loss: 4319.187500, avg loss: 2.873711, ppl: 17.702589 +epoch: 1, batch: 1193, sum loss: 5081.421387, avg loss: 2.973330, ppl: 19.556940 +epoch: 1, batch: 1194, sum loss: 4683.617188, avg loss: 2.840277, ppl: 17.120510 +epoch: 1, batch: 1195, sum loss: 4572.233887, avg loss: 2.953640, ppl: 19.175617 +epoch: 1, batch: 1196, sum loss: 3586.814209, avg loss: 2.494308, ppl: 12.113342 +epoch: 1, batch: 1197, sum loss: 4346.822266, avg loss: 2.601330, ppl: 13.481657 +epoch: 1, batch: 1198, sum loss: 3789.067383, avg loss: 2.484634, ppl: 11.996734 +epoch: 1, batch: 1199, sum loss: 4326.745605, avg loss: 2.672480, ppl: 14.475830 +epoch: 1, batch: 1200, sum loss: 4764.171875, avg loss: 2.829081, ppl: 16.929892 +epoch: 1, batch: 1201, sum loss: 4783.486816, avg loss: 3.019878, ppl: 20.488794 +epoch: 1, batch: 1202, sum loss: 5862.098145, avg loss: 2.996983, ppl: 20.025023 +epoch: 1, batch: 1203, sum loss: 4499.105957, avg loss: 2.784100, ppl: 16.185249 +epoch: 1, batch: 1204, sum loss: 4428.663086, avg loss: 2.879495, ppl: 17.805277 +epoch: 1, batch: 1205, sum loss: 4290.723145, avg loss: 2.683379, ppl: 14.634459 +epoch: 1, batch: 1206, sum loss: 5696.070312, avg loss: 3.225408, ppl: 25.163836 +epoch: 1, batch: 1207, sum loss: 4882.462402, avg loss: 2.793171, ppl: 16.332727 +epoch: 1, batch: 1208, sum loss: 4834.656250, avg loss: 2.980676, ppl: 19.701124 +epoch: 1, batch: 1209, sum loss: 5512.010742, avg loss: 2.969833, ppl: 19.488672 +epoch: 1, batch: 1210, sum loss: 5434.045898, avg loss: 2.854016, ppl: 17.357346 +epoch: 1, batch: 1211, sum loss: 5111.098633, avg loss: 2.831634, ppl: 16.973166 +epoch: 1, batch: 1212, sum loss: 4270.480469, avg loss: 2.714864, ppl: 15.102549 +epoch: 1, batch: 1213, sum loss: 4614.452148, avg loss: 2.866119, ppl: 17.568708 +epoch: 1, batch: 1214, sum loss: 5655.851562, avg loss: 2.975198, ppl: 19.593502 +epoch: 1, batch: 1215, sum loss: 5086.361816, avg loss: 2.901519, ppl: 18.201765 +epoch: 1, batch: 1216, sum loss: 4654.729980, avg loss: 2.747774, ppl: 15.607856 +epoch: 1, batch: 1217, sum loss: 4662.900391, avg loss: 2.589062, ppl: 13.317270 +epoch: 1, batch: 1218, sum loss: 5456.044922, avg loss: 2.934935, ppl: 18.820286 +epoch: 1, batch: 1219, sum loss: 4510.178711, avg loss: 2.998789, ppl: 20.061224 +epoch: 1, batch: 1220, sum loss: 4657.864258, avg loss: 2.911165, ppl: 18.378197 +epoch: 1, batch: 1221, sum loss: 4355.887207, avg loss: 2.919496, ppl: 18.531937 +epoch: 1, batch: 1222, sum loss: 4073.861572, avg loss: 2.659178, ppl: 14.284550 +epoch: 1, batch: 1223, sum loss: 3655.885498, avg loss: 2.763330, ppl: 15.852544 +epoch: 1, batch: 1224, sum loss: 4944.546875, avg loss: 2.941432, ppl: 18.942949 +epoch: 1, batch: 1225, sum loss: 4771.897461, avg loss: 2.878105, ppl: 17.780542 +epoch: 1, batch: 1226, sum loss: 4758.750000, avg loss: 2.820836, ppl: 16.790878 +epoch: 1, batch: 1227, sum loss: 4623.540527, avg loss: 2.632996, ppl: 13.915396 +epoch: 1, batch: 1228, sum loss: 4245.796875, avg loss: 2.791451, ppl: 16.304661 +epoch: 1, batch: 1229, sum loss: 3236.648682, avg loss: 2.119613, ppl: 8.327911 +epoch: 1, batch: 1230, sum loss: 5097.125488, avg loss: 2.692618, ppl: 14.770288 +epoch: 1, batch: 1231, sum loss: 5072.690918, avg loss: 2.870793, ppl: 17.651007 +epoch: 1, batch: 1232, sum loss: 4957.089844, avg loss: 2.832623, ppl: 16.989964 +epoch: 1, batch: 1233, sum loss: 5990.153809, avg loss: 3.100494, ppl: 22.208912 +epoch: 1, batch: 1234, sum loss: 5211.928711, avg loss: 2.901965, ppl: 18.209890 +epoch: 1, batch: 1235, sum loss: 6111.757324, avg loss: 2.911747, ppl: 18.388901 +epoch: 1, batch: 1236, sum loss: 4956.306641, avg loss: 2.987527, ppl: 19.836559 +epoch: 1, batch: 1237, sum loss: 4381.307617, avg loss: 2.492211, ppl: 12.087977 +epoch: 1, batch: 1238, sum loss: 4992.095215, avg loss: 3.049539, ppl: 21.105614 +epoch: 1, batch: 1239, sum loss: 4767.297852, avg loss: 2.794430, ppl: 16.353306 +epoch: 1, batch: 1240, sum loss: 5195.909668, avg loss: 2.894657, ppl: 18.077307 +epoch: 1, batch: 1241, sum loss: 5125.052246, avg loss: 2.907006, ppl: 18.301926 +epoch: 1, batch: 1242, sum loss: 4957.113281, avg loss: 2.682420, ppl: 14.620440 +epoch: 1, batch: 1243, sum loss: 5217.165527, avg loss: 3.008746, ppl: 20.261974 +epoch: 1, batch: 1244, sum loss: 4607.036621, avg loss: 2.782027, ppl: 16.151728 +epoch: 1, batch: 1245, sum loss: 4117.926270, avg loss: 2.653303, ppl: 14.200869 +epoch: 1, batch: 1246, sum loss: 4684.160156, avg loss: 2.608107, ppl: 13.573334 +epoch: 1, batch: 1247, sum loss: 4923.094727, avg loss: 2.827740, ppl: 16.907202 +epoch: 1, batch: 1248, sum loss: 3878.400879, avg loss: 2.531593, ppl: 12.573524 +epoch: 1, batch: 1249, sum loss: 5887.540039, avg loss: 3.044230, ppl: 20.993849 +epoch: 1, batch: 1250, sum loss: 6260.663574, avg loss: 3.316030, ppl: 27.550745 +epoch: 1, batch: 1251, sum loss: 4845.004395, avg loss: 2.653343, ppl: 14.201438 +epoch: 1, batch: 1252, sum loss: 5474.562988, avg loss: 2.919767, ppl: 18.536966 +epoch: 1, batch: 1253, sum loss: 4976.104492, avg loss: 2.665294, ppl: 14.372177 +epoch: 1, batch: 1254, sum loss: 5284.850586, avg loss: 2.911763, ppl: 18.389198 +epoch: 1, batch: 1255, sum loss: 4370.492188, avg loss: 2.601484, ppl: 13.483727 +epoch: 1, batch: 1256, sum loss: 4698.581543, avg loss: 2.762247, ppl: 15.835379 +epoch: 1, batch: 1257, sum loss: 4636.653320, avg loss: 2.733876, ppl: 15.392428 +epoch: 1, batch: 1258, sum loss: 5195.041992, avg loss: 3.057706, ppl: 21.278685 +epoch: 1, batch: 1259, sum loss: 4908.577148, avg loss: 2.765395, ppl: 15.885320 +epoch: 1, batch: 1260, sum loss: 4796.928711, avg loss: 2.968396, ppl: 19.460688 +epoch: 1, batch: 1261, sum loss: 5620.555176, avg loss: 2.930425, ppl: 18.735590 +epoch: 1, batch: 1262, sum loss: 4879.598633, avg loss: 2.700387, ppl: 14.885484 +epoch: 1, batch: 1263, sum loss: 4208.946289, avg loss: 2.698043, ppl: 14.850636 +epoch: 1, batch: 1264, sum loss: 5439.783203, avg loss: 2.881241, ppl: 17.836395 +epoch: 1, batch: 1265, sum loss: 4620.157227, avg loss: 2.665988, ppl: 14.382152 +epoch: 1, batch: 1266, sum loss: 5076.741211, avg loss: 2.937929, ppl: 18.876711 +epoch: 1, batch: 1267, sum loss: 4488.305176, avg loss: 2.819287, ppl: 16.764894 +epoch: 1, batch: 1268, sum loss: 4825.790527, avg loss: 2.896633, ppl: 18.113054 +epoch: 1, batch: 1269, sum loss: 5315.027344, avg loss: 2.999451, ppl: 20.074516 +epoch: 1, batch: 1270, sum loss: 4689.163574, avg loss: 2.801173, ppl: 16.463951 +epoch: 1, batch: 1271, sum loss: 4602.714355, avg loss: 2.568479, ppl: 13.045967 +epoch: 1, batch: 1272, sum loss: 4577.423828, avg loss: 2.995696, ppl: 19.999281 +epoch: 1, batch: 1273, sum loss: 5325.376465, avg loss: 2.776526, ppl: 16.063116 +epoch: 1, batch: 1274, sum loss: 4435.408203, avg loss: 2.787812, ppl: 16.245428 +epoch: 1, batch: 1275, sum loss: 5270.706543, avg loss: 2.883319, ppl: 17.873491 +epoch: 1, batch: 1276, sum loss: 5081.660156, avg loss: 2.944183, ppl: 18.995140 +epoch: 1, batch: 1277, sum loss: 5237.759766, avg loss: 2.922857, ppl: 18.594337 +epoch: 1, batch: 1278, sum loss: 3886.223145, avg loss: 2.663621, ppl: 14.348152 +epoch: 1, batch: 1279, sum loss: 5065.103516, avg loss: 2.760275, ppl: 15.804183 +epoch: 1, batch: 1280, sum loss: 3967.666016, avg loss: 2.568069, ppl: 13.040615 +epoch: 1, batch: 1281, sum loss: 4510.313477, avg loss: 2.580271, ppl: 13.200715 +epoch: 1, batch: 1282, sum loss: 5327.645508, avg loss: 3.163685, ppl: 23.657616 +epoch: 1, batch: 1283, sum loss: 3617.536133, avg loss: 2.561994, ppl: 12.961641 +epoch: 1, batch: 1284, sum loss: 5368.682617, avg loss: 2.813775, ppl: 16.672737 +epoch: 1, batch: 1285, sum loss: 5341.907715, avg loss: 2.855108, ppl: 17.376322 +epoch: 1, batch: 1286, sum loss: 4523.831055, avg loss: 2.673659, ppl: 14.492903 +epoch: 1, batch: 1287, sum loss: 4144.952148, avg loss: 2.613463, ppl: 13.646225 +epoch: 1, batch: 1288, sum loss: 5656.386719, avg loss: 2.878568, ppl: 17.788788 +epoch: 1, batch: 1289, sum loss: 4699.464844, avg loss: 2.856817, ppl: 17.406044 +epoch: 1, batch: 1290, sum loss: 4173.468262, avg loss: 2.704775, ppl: 14.950957 +epoch: 1, batch: 1291, sum loss: 5925.407715, avg loss: 3.099062, ppl: 22.177149 +epoch: 1, batch: 1292, sum loss: 5331.118164, avg loss: 2.704778, ppl: 14.951000 +epoch: 1, batch: 1293, sum loss: 3637.375000, avg loss: 2.267690, ppl: 9.657069 +epoch: 1, batch: 1294, sum loss: 5657.727539, avg loss: 2.945199, ppl: 19.014450 +epoch: 1, batch: 1295, sum loss: 4308.894043, avg loss: 2.619388, ppl: 13.727324 +epoch: 1, batch: 1296, sum loss: 5398.613770, avg loss: 2.900921, ppl: 18.190893 +epoch: 1, batch: 1297, sum loss: 4472.343750, avg loss: 2.720404, ppl: 15.186451 +epoch: 1, batch: 1298, sum loss: 4207.371094, avg loss: 2.662893, ppl: 14.337709 +epoch: 1, batch: 1299, sum loss: 5013.322266, avg loss: 2.966463, ppl: 19.423096 +epoch: 1, batch: 1300, sum loss: 5643.395508, avg loss: 3.030825, ppl: 20.714308 +epoch: 1, batch: 1301, sum loss: 4377.875488, avg loss: 2.783138, ppl: 16.169674 +epoch: 1, batch: 1302, sum loss: 5188.804199, avg loss: 2.926568, ppl: 18.663460 +epoch: 1, batch: 1303, sum loss: 5692.991699, avg loss: 2.958935, ppl: 19.277439 +epoch: 1, batch: 1304, sum loss: 4900.474609, avg loss: 2.822854, ppl: 16.824800 +epoch: 1, batch: 1305, sum loss: 4642.975098, avg loss: 2.791927, ppl: 16.312429 +epoch: 1, batch: 1306, sum loss: 5188.392578, avg loss: 2.771577, ppl: 15.983827 +epoch: 1, batch: 1307, sum loss: 3817.336914, avg loss: 2.434526, ppl: 11.410409 +epoch: 1, batch: 1308, sum loss: 5174.372559, avg loss: 2.871461, ppl: 17.662804 +epoch: 1, batch: 1309, sum loss: 5824.225098, avg loss: 2.953461, ppl: 19.172194 +epoch: 1, batch: 1310, sum loss: 4626.233398, avg loss: 2.795307, ppl: 16.367659 +epoch: 1, batch: 1311, sum loss: 4557.534668, avg loss: 2.895511, ppl: 18.092747 +epoch: 1, batch: 1312, sum loss: 4692.300293, avg loss: 2.816507, ppl: 16.718346 +epoch: 1, batch: 1313, sum loss: 4985.289062, avg loss: 2.915374, ppl: 18.455709 +epoch: 1, batch: 1314, sum loss: 4014.764160, avg loss: 2.776462, ppl: 16.062094 +epoch: 1, batch: 1315, sum loss: 5241.103027, avg loss: 2.886070, ppl: 17.922731 +epoch: 1, batch: 1316, sum loss: 4549.329102, avg loss: 2.631191, ppl: 13.890304 +epoch: 1, batch: 1317, sum loss: 5397.498047, avg loss: 3.015362, ppl: 20.396473 +epoch: 1, batch: 1318, sum loss: 3904.857178, avg loss: 2.636636, ppl: 13.966136 +epoch: 1, batch: 1319, sum loss: 4503.309570, avg loss: 2.816329, ppl: 16.715368 +epoch: 1, batch: 1320, sum loss: 4897.714844, avg loss: 2.865837, ppl: 17.563742 +epoch: 1, batch: 1321, sum loss: 5291.203125, avg loss: 3.049685, ppl: 21.108688 +epoch: 1, batch: 1322, sum loss: 5455.071777, avg loss: 3.245135, ppl: 25.665171 +epoch: 1, batch: 1323, sum loss: 5259.002930, avg loss: 3.128497, ppl: 22.839617 +epoch: 1, batch: 1324, sum loss: 4687.390137, avg loss: 2.877465, ppl: 17.769167 +epoch: 1, batch: 1325, sum loss: 5099.196289, avg loss: 2.712338, ppl: 15.064462 +epoch: 1, batch: 1326, sum loss: 4347.558105, avg loss: 2.915867, ppl: 18.464821 +epoch: 1, batch: 1327, sum loss: 4506.161621, avg loss: 2.828727, ppl: 16.923899 +epoch: 1, batch: 1328, sum loss: 4661.052246, avg loss: 2.642320, ppl: 14.045751 +epoch: 1, batch: 1329, sum loss: 4849.446289, avg loss: 2.700137, ppl: 14.881772 +epoch: 1, batch: 1330, sum loss: 5068.501953, avg loss: 2.917963, ppl: 18.503563 +epoch: 1, batch: 1331, sum loss: 3670.650146, avg loss: 2.527996, ppl: 12.528375 +epoch: 1, batch: 1332, sum loss: 4025.951172, avg loss: 2.564300, ppl: 12.991562 +epoch: 1, batch: 1333, sum loss: 4435.388184, avg loss: 2.821494, ppl: 16.801931 +epoch: 1, batch: 1334, sum loss: 5396.474121, avg loss: 2.899771, ppl: 18.169989 +epoch: 1, batch: 1335, sum loss: 4542.922852, avg loss: 2.753287, ppl: 15.694127 +epoch: 1, batch: 1336, sum loss: 5522.958008, avg loss: 2.909883, ppl: 18.354656 +epoch: 1, batch: 1337, sum loss: 5622.613281, avg loss: 3.153457, ppl: 23.416876 +epoch: 1, batch: 1338, sum loss: 4389.171387, avg loss: 2.621966, ppl: 13.762759 +epoch: 1, batch: 1339, sum loss: 3943.087646, avg loss: 2.630479, ppl: 13.880415 +epoch: 1, batch: 1340, sum loss: 4931.936035, avg loss: 2.674586, ppl: 14.506340 +epoch: 1, batch: 1341, sum loss: 4328.944824, avg loss: 2.563022, ppl: 12.974974 +epoch: 1, batch: 1342, sum loss: 4947.089844, avg loss: 2.693027, ppl: 14.776333 +epoch: 1, batch: 1343, sum loss: 5026.566406, avg loss: 2.875610, ppl: 17.736246 +epoch: 1, batch: 1344, sum loss: 4706.122559, avg loss: 2.974793, ppl: 19.585567 +epoch: 1, batch: 1345, sum loss: 5376.228516, avg loss: 2.983479, ppl: 19.756426 +epoch: 1, batch: 1346, sum loss: 4367.002930, avg loss: 2.797568, ppl: 16.404696 +epoch: 1, batch: 1347, sum loss: 4619.862793, avg loss: 2.793146, ppl: 16.332315 +epoch: 1, batch: 1348, sum loss: 5147.655273, avg loss: 2.831494, ppl: 16.970789 +epoch: 1, batch: 1349, sum loss: 5465.902832, avg loss: 2.801590, ppl: 16.470821 +epoch: 1, batch: 1350, sum loss: 4939.965332, avg loss: 2.952759, ppl: 19.158735 +epoch: 1, batch: 1351, sum loss: 4354.430176, avg loss: 2.676355, ppl: 14.532033 +epoch: 1, batch: 1352, sum loss: 5311.546875, avg loss: 2.797023, ppl: 16.395765 +epoch: 1, batch: 1353, sum loss: 4146.948242, avg loss: 2.609785, ppl: 13.596126 +epoch: 1, batch: 1354, sum loss: 4627.833008, avg loss: 2.627957, ppl: 13.845460 +epoch: 1, batch: 1355, sum loss: 4550.473145, avg loss: 2.814146, ppl: 16.678919 +epoch: 1, batch: 1356, sum loss: 4701.692383, avg loss: 2.902279, ppl: 18.215618 +epoch: 1, batch: 1357, sum loss: 4113.619629, avg loss: 2.671181, ppl: 14.457039 +epoch: 1, batch: 1358, sum loss: 5291.568359, avg loss: 2.829716, ppl: 16.940645 +epoch: 1, batch: 1359, sum loss: 4578.528809, avg loss: 2.907002, ppl: 18.301851 +epoch: 1, batch: 1360, sum loss: 5437.423828, avg loss: 3.022470, ppl: 20.541973 +epoch: 1, batch: 1361, sum loss: 4051.555420, avg loss: 2.831276, ppl: 16.967093 +epoch: 1, batch: 1362, sum loss: 5581.281738, avg loss: 2.862196, ppl: 17.499910 +epoch: 1, batch: 1363, sum loss: 3912.542969, avg loss: 2.575736, ppl: 13.140983 +epoch: 1, batch: 1364, sum loss: 4136.360840, avg loss: 2.787305, ppl: 16.237204 +epoch: 1, batch: 1365, sum loss: 5241.381348, avg loss: 2.913497, ppl: 18.421108 +epoch: 1, batch: 1366, sum loss: 4500.479004, avg loss: 2.711132, ppl: 15.046299 +epoch: 1, batch: 1367, sum loss: 4081.657227, avg loss: 2.396745, ppl: 10.987356 +epoch: 1, batch: 1368, sum loss: 4673.676758, avg loss: 2.720417, ppl: 15.186658 +epoch: 1, batch: 1369, sum loss: 5588.649414, avg loss: 2.903194, ppl: 18.232294 +epoch: 1, batch: 1370, sum loss: 5463.658691, avg loss: 2.964546, ppl: 19.385904 +epoch: 1, batch: 1371, sum loss: 5308.607910, avg loss: 3.023125, ppl: 20.555435 +epoch: 1, batch: 1372, sum loss: 4789.468262, avg loss: 2.799222, ppl: 16.431854 +epoch: 1, batch: 1373, sum loss: 5107.250000, avg loss: 2.945358, ppl: 19.017462 +epoch: 1, batch: 1374, sum loss: 4453.061523, avg loss: 2.897243, ppl: 18.124100 +epoch: 1, batch: 1375, sum loss: 5729.256836, avg loss: 2.789317, ppl: 16.269901 +epoch: 1, batch: 1376, sum loss: 5059.482422, avg loss: 2.922867, ppl: 18.594513 +epoch: 1, batch: 1377, sum loss: 4865.093262, avg loss: 2.939634, ppl: 18.908916 +epoch: 1, batch: 1378, sum loss: 5344.591309, avg loss: 2.892095, ppl: 18.031042 +epoch: 1, batch: 1379, sum loss: 5252.541504, avg loss: 2.921324, ppl: 18.565861 +epoch: 1, batch: 1380, sum loss: 4552.854492, avg loss: 2.771062, ppl: 15.975586 +epoch: 1, batch: 1381, sum loss: 4793.443359, avg loss: 2.782033, ppl: 16.151827 +epoch: 1, batch: 1382, sum loss: 4533.536133, avg loss: 2.885765, ppl: 17.917261 +epoch: 1, batch: 1383, sum loss: 4365.639160, avg loss: 2.953748, ppl: 19.177696 +epoch: 1, batch: 1384, sum loss: 4765.853516, avg loss: 2.674441, ppl: 14.504238 +epoch: 1, batch: 1385, sum loss: 4213.543945, avg loss: 2.732519, ppl: 15.371554 +epoch: 1, batch: 1386, sum loss: 5153.926270, avg loss: 3.008713, ppl: 20.261316 +epoch: 1, batch: 1387, sum loss: 4981.524414, avg loss: 2.755268, ppl: 15.725252 +epoch: 1, batch: 1388, sum loss: 4076.980469, avg loss: 2.743594, ppl: 15.542741 +epoch: 1, batch: 1389, sum loss: 4294.678223, avg loss: 2.699358, ppl: 14.870178 +epoch: 1, batch: 1390, sum loss: 4534.345215, avg loss: 2.761477, ppl: 15.823200 +epoch: 1, batch: 1391, sum loss: 4711.061035, avg loss: 2.865609, ppl: 17.559742 +epoch: 1, batch: 1392, sum loss: 4543.848145, avg loss: 2.638704, ppl: 13.995051 +epoch: 1, batch: 1393, sum loss: 5068.786621, avg loss: 2.916448, ppl: 18.475548 +epoch: 1, batch: 1394, sum loss: 4856.544434, avg loss: 2.707104, ppl: 14.985813 +epoch: 1, batch: 1395, sum loss: 4653.018555, avg loss: 2.753265, ppl: 15.693794 +epoch: 1, batch: 1396, sum loss: 4679.426758, avg loss: 2.805412, ppl: 16.533884 +epoch: 1, batch: 1397, sum loss: 4693.750488, avg loss: 2.659349, ppl: 14.286982 +epoch: 1, batch: 1398, sum loss: 4476.056152, avg loss: 2.550459, ppl: 12.812988 +epoch: 1, batch: 1399, sum loss: 4912.571777, avg loss: 2.970116, ppl: 19.494183 +epoch: 1, batch: 1400, sum loss: 5352.354980, avg loss: 2.958737, ppl: 19.273617 +epoch: 1, batch: 1401, sum loss: 5636.382324, avg loss: 2.954079, ppl: 19.184040 +epoch: 1, batch: 1402, sum loss: 4872.879883, avg loss: 2.873160, ppl: 17.692846 +epoch: 1, batch: 1403, sum loss: 3942.473633, avg loss: 2.519152, ppl: 12.418067 +epoch: 1, batch: 1404, sum loss: 4821.401367, avg loss: 2.799885, ppl: 16.442749 +epoch: 1, batch: 1405, sum loss: 4896.978516, avg loss: 2.942896, ppl: 18.970703 +epoch: 1, batch: 1406, sum loss: 4951.372070, avg loss: 3.116030, ppl: 22.556656 +epoch: 1, batch: 1407, sum loss: 6053.522461, avg loss: 3.088532, ppl: 21.944834 +epoch: 1, batch: 1408, sum loss: 4758.167969, avg loss: 2.758358, ppl: 15.773929 +epoch: 1, batch: 1409, sum loss: 5386.822266, avg loss: 2.977790, ppl: 19.644361 +epoch: 1, batch: 1410, sum loss: 4141.423340, avg loss: 2.735418, ppl: 15.416191 +epoch: 1, batch: 1411, sum loss: 4940.816406, avg loss: 2.958573, ppl: 19.270451 +epoch: 1, batch: 1412, sum loss: 5914.364746, avg loss: 3.012922, ppl: 20.346758 +epoch: 1, batch: 1413, sum loss: 4857.612793, avg loss: 2.832427, ppl: 16.986641 +epoch: 1, batch: 1414, sum loss: 4787.974121, avg loss: 2.941016, ppl: 18.935074 +epoch: 1, batch: 1415, sum loss: 4880.444824, avg loss: 2.956054, ppl: 19.221972 +epoch: 1, batch: 1416, sum loss: 4855.659180, avg loss: 2.898901, ppl: 18.154186 +epoch: 1, batch: 1417, sum loss: 3896.183838, avg loss: 2.650465, ppl: 14.160625 +epoch: 1, batch: 1418, sum loss: 5745.321289, avg loss: 3.211471, ppl: 24.815559 +epoch: 1, batch: 1419, sum loss: 4818.604492, avg loss: 2.994782, ppl: 19.981009 +epoch: 1, batch: 1420, sum loss: 5409.297363, avg loss: 2.985263, ppl: 19.791714 +epoch: 1, batch: 1421, sum loss: 5509.497070, avg loss: 2.832646, ppl: 16.990364 +epoch: 1, batch: 1422, sum loss: 5219.681641, avg loss: 2.685022, ppl: 14.658519 +epoch: 1, batch: 1423, sum loss: 5548.103516, avg loss: 3.092588, ppl: 22.034039 +epoch: 1, batch: 1424, sum loss: 4756.920898, avg loss: 2.775333, ppl: 16.043972 +epoch: 1, batch: 1425, sum loss: 5394.676758, avg loss: 3.129163, ppl: 22.854843 +epoch: 1, batch: 1426, sum loss: 3968.109619, avg loss: 2.543660, ppl: 12.726166 +epoch: 1, batch: 1427, sum loss: 5394.859863, avg loss: 2.845390, ppl: 17.208275 +epoch: 1, batch: 1428, sum loss: 4100.859375, avg loss: 2.765246, ppl: 15.882945 +epoch: 1, batch: 1429, sum loss: 4936.510742, avg loss: 2.819252, ppl: 16.764315 +epoch: 1, batch: 1430, sum loss: 4418.787109, avg loss: 2.509249, ppl: 12.295690 +epoch: 1, batch: 1431, sum loss: 4317.326172, avg loss: 2.626111, ppl: 13.819917 +epoch: 1, batch: 1432, sum loss: 4338.658203, avg loss: 2.522476, ppl: 12.459405 +epoch: 1, batch: 1433, sum loss: 4513.587891, avg loss: 2.786165, ppl: 16.218710 +epoch: 1, batch: 1434, sum loss: 5513.241211, avg loss: 2.774656, ppl: 16.033108 +epoch: 1, batch: 1435, sum loss: 4510.397949, avg loss: 3.089314, ppl: 21.962002 +epoch: 1, batch: 1436, sum loss: 4885.683594, avg loss: 2.768093, ppl: 15.928224 +epoch: 1, batch: 1437, sum loss: 4489.263672, avg loss: 2.814585, ppl: 16.686256 +epoch: 1, batch: 1438, sum loss: 4473.067871, avg loss: 2.544407, ppl: 12.735675 +epoch: 1, batch: 1439, sum loss: 4325.712891, avg loss: 2.991503, ppl: 19.915588 +epoch: 1, batch: 1440, sum loss: 5665.375000, avg loss: 2.997553, ppl: 20.036446 +epoch: 1, batch: 1441, sum loss: 4319.643555, avg loss: 2.718467, ppl: 15.157065 +epoch: 1, batch: 1442, sum loss: 4555.115723, avg loss: 2.509706, ppl: 12.301311 +epoch: 1, batch: 1443, sum loss: 5008.187012, avg loss: 2.944260, ppl: 18.996607 +epoch: 1, batch: 1444, sum loss: 4234.394043, avg loss: 2.886431, ppl: 17.929201 +epoch: 1, batch: 1445, sum loss: 4311.976562, avg loss: 2.626052, ppl: 13.819099 +epoch: 1, batch: 1446, sum loss: 4669.042480, avg loss: 2.951354, ppl: 19.131842 +epoch: 1, batch: 1447, sum loss: 5281.668457, avg loss: 3.011213, ppl: 20.312029 +epoch: 1, batch: 1448, sum loss: 4794.104004, avg loss: 2.833395, ppl: 17.003084 +epoch: 1, batch: 1449, sum loss: 3914.963379, avg loss: 2.557128, ppl: 12.898725 +epoch: 1, batch: 1450, sum loss: 4672.953125, avg loss: 2.880982, ppl: 17.831778 +epoch: 1, batch: 1451, sum loss: 4322.172852, avg loss: 2.562047, ppl: 12.962321 +epoch: 1, batch: 1452, sum loss: 4581.272949, avg loss: 2.738358, ppl: 15.461576 +epoch: 1, batch: 1453, sum loss: 5330.464844, avg loss: 2.984583, ppl: 19.778252 +epoch: 1, batch: 1454, sum loss: 4777.288574, avg loss: 2.759843, ppl: 15.797364 +epoch: 1, batch: 1455, sum loss: 5282.930664, avg loss: 3.032681, ppl: 20.752802 +epoch: 1, batch: 1456, sum loss: 4631.349609, avg loss: 2.742066, ppl: 15.519016 +epoch: 1, batch: 1457, sum loss: 4334.276367, avg loss: 2.692097, ppl: 14.762604 +epoch: 1, batch: 1458, sum loss: 4461.679688, avg loss: 2.654182, ppl: 14.213351 +epoch: 1, batch: 1459, sum loss: 5095.625000, avg loss: 3.054931, ppl: 21.219725 +epoch: 1, batch: 1460, sum loss: 4461.985352, avg loss: 2.897393, ppl: 18.126827 +epoch: 1, batch: 1461, sum loss: 4780.320312, avg loss: 2.672063, ppl: 14.469785 +epoch: 1, batch: 1462, sum loss: 6141.557129, avg loss: 3.000272, ppl: 20.091007 +epoch: 1, batch: 1463, sum loss: 4458.578613, avg loss: 2.677825, ppl: 14.553405 +epoch: 1, batch: 1464, sum loss: 4906.322266, avg loss: 2.854172, ppl: 17.360065 +epoch: 1, batch: 1465, sum loss: 4996.693359, avg loss: 2.743928, ppl: 15.547940 +epoch: 1, batch: 1466, sum loss: 4700.997070, avg loss: 2.771814, ppl: 15.987615 +epoch: 1, batch: 1467, sum loss: 4995.720703, avg loss: 2.832041, ppl: 16.980085 +epoch: 1, batch: 1468, sum loss: 4816.962891, avg loss: 2.707680, ppl: 14.994448 +epoch: 1, batch: 1469, sum loss: 4234.332520, avg loss: 2.910194, ppl: 18.360363 +epoch: 1, batch: 1470, sum loss: 4394.629883, avg loss: 2.758713, ppl: 15.779522 +epoch: 1, batch: 1471, sum loss: 5283.342285, avg loss: 2.961515, ppl: 19.327229 +epoch: 1, batch: 1472, sum loss: 4453.729492, avg loss: 2.752614, ppl: 15.683576 +epoch: 1, batch: 1473, sum loss: 4257.502441, avg loss: 2.928131, ppl: 18.692663 +epoch: 1, batch: 1474, sum loss: 5320.314453, avg loss: 3.054142, ppl: 21.202976 +epoch: 1, batch: 1475, sum loss: 5306.750000, avg loss: 3.042861, ppl: 20.965143 +epoch: 1, batch: 1476, sum loss: 4554.562500, avg loss: 2.660375, ppl: 14.301653 +epoch: 1, batch: 1477, sum loss: 5988.020996, avg loss: 3.161574, ppl: 23.607729 +epoch: 1, batch: 1478, sum loss: 4387.928223, avg loss: 2.682108, ppl: 14.615870 +epoch: 1, batch: 1479, sum loss: 4030.853760, avg loss: 2.605594, ppl: 13.539264 +epoch: 1, batch: 1480, sum loss: 4355.295898, avg loss: 2.727174, ppl: 15.289611 +epoch: 1, batch: 1481, sum loss: 4806.862305, avg loss: 2.890476, ppl: 18.001884 +epoch: 1, batch: 1482, sum loss: 4485.216797, avg loss: 2.677742, ppl: 14.552191 +epoch: 1, batch: 1483, sum loss: 4904.942871, avg loss: 2.817314, ppl: 16.731844 +epoch: 1, batch: 1484, sum loss: 4240.363770, avg loss: 2.723419, ppl: 15.232315 +epoch: 1, batch: 1485, sum loss: 5099.339355, avg loss: 2.920584, ppl: 18.552118 +epoch: 1, batch: 1486, sum loss: 4490.925293, avg loss: 2.772176, ppl: 15.993402 +epoch: 1, batch: 1487, sum loss: 6121.482910, avg loss: 2.906687, ppl: 18.296083 +epoch: 1, batch: 1488, sum loss: 5047.187988, avg loss: 2.917450, ppl: 18.494062 +epoch: 1, batch: 1489, sum loss: 5141.370605, avg loss: 2.854731, ppl: 17.369761 +epoch: 1, batch: 1490, sum loss: 4969.944336, avg loss: 3.021243, ppl: 20.516771 +epoch: 1, batch: 1491, sum loss: 5108.980957, avg loss: 2.911100, ppl: 18.377005 +epoch: 1, batch: 1492, sum loss: 4531.171387, avg loss: 2.764595, ppl: 15.872611 +epoch: 1, batch: 1493, sum loss: 5493.703613, avg loss: 2.956783, ppl: 19.235996 +epoch: 1, batch: 1494, sum loss: 5232.092285, avg loss: 2.914815, ppl: 18.445395 +epoch: 1, batch: 1495, sum loss: 5730.572754, avg loss: 2.940263, ppl: 18.920828 +epoch: 1, batch: 1496, sum loss: 5203.655273, avg loss: 2.769375, ppl: 15.948661 +epoch: 1, batch: 1497, sum loss: 4687.727539, avg loss: 2.904416, ppl: 18.254581 +epoch: 1, batch: 1498, sum loss: 4738.747070, avg loss: 2.748693, ppl: 15.622208 +epoch: 1, batch: 1499, sum loss: 4750.015625, avg loss: 2.972476, ppl: 19.540232 +epoch: 1, batch: 1500, sum loss: 4642.230469, avg loss: 2.874446, ppl: 17.715611 +epoch: 1, batch: 1501, sum loss: 4086.290771, avg loss: 2.606053, ppl: 13.545476 +epoch: 1, batch: 1502, sum loss: 4289.961426, avg loss: 2.782076, ppl: 16.152521 +epoch: 1, batch: 1503, sum loss: 5099.965332, avg loss: 2.845963, ppl: 17.218132 +epoch: 1, batch: 1504, sum loss: 4348.666992, avg loss: 2.762812, ppl: 15.844340 +epoch: 1, batch: 1505, sum loss: 3993.652832, avg loss: 2.537263, ppl: 12.645020 +epoch: 1, batch: 1506, sum loss: 4829.892578, avg loss: 2.834444, ppl: 17.020935 +epoch: 1, batch: 1507, sum loss: 3357.334961, avg loss: 2.387863, ppl: 10.890193 +epoch: 1, batch: 1508, sum loss: 5449.973633, avg loss: 3.192720, ppl: 24.354591 +epoch: 1, batch: 1509, sum loss: 4327.825195, avg loss: 2.569968, ppl: 13.065400 +epoch: 1, batch: 1510, sum loss: 4044.333008, avg loss: 2.494962, ppl: 12.121270 +epoch: 1, batch: 1511, sum loss: 5460.342285, avg loss: 3.052176, ppl: 21.161331 +epoch: 1, batch: 1512, sum loss: 4456.406250, avg loss: 2.654203, ppl: 14.213649 +epoch: 1, batch: 1513, sum loss: 5302.541016, avg loss: 2.942587, ppl: 18.964838 +epoch: 1, batch: 1514, sum loss: 3754.197266, avg loss: 2.509490, ppl: 12.298659 +epoch: 1, batch: 1515, sum loss: 4623.229004, avg loss: 2.628328, ppl: 13.850594 +epoch: 1, batch: 1516, sum loss: 4880.853027, avg loss: 3.067790, ppl: 21.494339 +epoch: 1, batch: 1517, sum loss: 4788.917969, avg loss: 2.738089, ppl: 15.457419 +epoch: 1, batch: 1518, sum loss: 3566.956543, avg loss: 2.359098, ppl: 10.581408 +epoch: 1, batch: 1519, sum loss: 5079.666504, avg loss: 3.043539, ppl: 20.979359 +epoch: 1, batch: 1520, sum loss: 3921.481934, avg loss: 2.631867, ppl: 13.899699 +epoch: 1, batch: 1521, sum loss: 5630.595703, avg loss: 2.743955, ppl: 15.548360 +epoch: 1, batch: 1522, sum loss: 3674.403320, avg loss: 2.574915, ppl: 13.130200 +epoch: 1, batch: 1523, sum loss: 4530.487793, avg loss: 2.760809, ppl: 15.812630 +epoch: 1, batch: 1524, sum loss: 4670.457031, avg loss: 2.766859, ppl: 15.908580 +epoch: 1, batch: 1525, sum loss: 4421.451172, avg loss: 2.825208, ppl: 16.864460 +epoch: 1, batch: 1526, sum loss: 4910.279297, avg loss: 3.157736, ppl: 23.517294 +epoch: 1, batch: 1527, sum loss: 4724.240234, avg loss: 3.226940, ppl: 25.202417 +epoch: 1, batch: 1528, sum loss: 4569.743652, avg loss: 2.703990, ppl: 14.939223 +epoch: 1, batch: 1529, sum loss: 4103.397461, avg loss: 2.810546, ppl: 16.618996 +epoch: 1, batch: 1530, sum loss: 4164.200684, avg loss: 2.561009, ppl: 12.948875 +epoch: 1, batch: 1531, sum loss: 5113.484863, avg loss: 2.877594, ppl: 17.771467 +epoch: 1, batch: 1532, sum loss: 5960.325195, avg loss: 3.294818, ppl: 26.972502 +epoch: 1, batch: 1533, sum loss: 4751.610840, avg loss: 2.803310, ppl: 16.499172 +epoch: 1, batch: 1534, sum loss: 5893.342285, avg loss: 2.936394, ppl: 18.847757 +epoch: 1, batch: 1535, sum loss: 4177.959473, avg loss: 2.521400, ppl: 12.446006 +epoch: 1, batch: 1536, sum loss: 4367.458984, avg loss: 2.499976, ppl: 12.182206 +epoch: 1, batch: 1537, sum loss: 5555.761719, avg loss: 3.049266, ppl: 21.099844 +epoch: 1, batch: 1538, sum loss: 5591.918457, avg loss: 2.983948, ppl: 19.765697 +epoch: 1, batch: 1539, sum loss: 4372.619629, avg loss: 2.763982, ppl: 15.862881 +epoch: 1, batch: 1540, sum loss: 6016.141113, avg loss: 3.206898, ppl: 24.702345 +epoch: 1, batch: 1541, sum loss: 4964.383301, avg loss: 3.034464, ppl: 20.789839 +epoch: 1, batch: 1542, sum loss: 4556.874512, avg loss: 2.830357, ppl: 16.951509 +epoch: 1, batch: 1543, sum loss: 3716.157715, avg loss: 2.526280, ppl: 12.506893 +epoch: 1, batch: 1544, sum loss: 5665.666992, avg loss: 3.179387, ppl: 24.032015 +epoch: 1, batch: 1545, sum loss: 4677.065918, avg loss: 2.803996, ppl: 16.510496 +epoch: 1, batch: 1546, sum loss: 5209.325684, avg loss: 2.971663, ppl: 19.524370 +epoch: 1, batch: 1547, sum loss: 4194.867188, avg loss: 2.745332, ppl: 15.569782 +epoch: 1, batch: 1548, sum loss: 5365.750977, avg loss: 2.984288, ppl: 19.772429 +epoch: 1, batch: 1549, sum loss: 5666.838379, avg loss: 3.096633, ppl: 22.123341 +epoch: 1, batch: 1550, sum loss: 5021.980469, avg loss: 2.891181, ppl: 18.014563 +epoch: 1, batch: 1551, sum loss: 5637.915527, avg loss: 3.126964, ppl: 22.804630 +epoch: 1, batch: 1552, sum loss: 6232.335449, avg loss: 3.012245, ppl: 20.333000 +epoch: 1, batch: 1553, sum loss: 4106.402832, avg loss: 2.768984, ppl: 15.942426 +epoch: 1, batch: 1554, sum loss: 4412.814453, avg loss: 2.634516, ppl: 13.936569 +epoch: 1, batch: 1555, sum loss: 3861.347168, avg loss: 2.520462, ppl: 12.434335 +epoch: 1, batch: 1556, sum loss: 3746.065430, avg loss: 2.477557, ppl: 11.912124 +epoch: 1, batch: 1557, sum loss: 3985.460205, avg loss: 2.662298, ppl: 14.329179 +epoch: 1, batch: 1558, sum loss: 4501.263672, avg loss: 2.630780, ppl: 13.884592 +epoch: 1, batch: 1559, sum loss: 5943.830078, avg loss: 3.149883, ppl: 23.333342 +epoch: 1, batch: 1560, sum loss: 4572.085938, avg loss: 2.755929, ppl: 15.735649 +epoch: 1, batch: 1561, sum loss: 4579.338867, avg loss: 2.738839, ppl: 15.469017 +epoch: 1, batch: 1562, sum loss: 5107.053711, avg loss: 3.000619, ppl: 20.097973 +epoch: 1, batch: 1563, sum loss: 4045.945557, avg loss: 2.538234, ppl: 12.657301 +epoch: 1, batch: 1564, sum loss: 5060.556641, avg loss: 3.035727, ppl: 20.816101 +epoch: 1, batch: 1565, sum loss: 5064.590332, avg loss: 2.879244, ppl: 17.800812 +epoch: 1, batch: 1566, sum loss: 4780.351562, avg loss: 2.879730, ppl: 17.809464 +epoch: 1, batch: 1567, sum loss: 4889.477051, avg loss: 2.770242, ppl: 15.962496 +epoch: 1, batch: 1568, sum loss: 4663.951660, avg loss: 2.643964, ppl: 14.068856 +epoch: 1, batch: 1569, sum loss: 4480.710449, avg loss: 2.692735, ppl: 14.772018 +epoch: 1, batch: 1570, sum loss: 4884.062012, avg loss: 2.805320, ppl: 16.532366 +epoch: 1, batch: 1571, sum loss: 3905.891602, avg loss: 2.718087, ppl: 15.151317 +epoch: 1, batch: 1572, sum loss: 6535.396973, avg loss: 3.224172, ppl: 25.132759 +epoch: 1, batch: 1573, sum loss: 4770.258301, avg loss: 3.032586, ppl: 20.750832 +epoch: 1, batch: 1574, sum loss: 4236.675781, avg loss: 2.854903, ppl: 17.372747 +epoch: 1, batch: 1575, sum loss: 5574.908203, avg loss: 2.924925, ppl: 18.632835 +epoch: 1, batch: 1576, sum loss: 5298.803223, avg loss: 3.068213, ppl: 21.503437 +epoch: 1, batch: 1577, sum loss: 4055.071045, avg loss: 2.650373, ppl: 14.159322 +epoch: 1, batch: 1578, sum loss: 4462.102539, avg loss: 2.752685, ppl: 15.684690 +epoch: 1, batch: 1579, sum loss: 4388.743652, avg loss: 2.512160, ppl: 12.331538 +epoch: 1, batch: 1580, sum loss: 4373.272461, avg loss: 2.684636, ppl: 14.652868 +epoch: 1, batch: 1581, sum loss: 5210.865234, avg loss: 3.059815, ppl: 21.323616 +epoch: 1, batch: 1582, sum loss: 5665.238281, avg loss: 2.978569, ppl: 19.659668 +epoch: 1, batch: 1583, sum loss: 4743.381836, avg loss: 2.953538, ppl: 19.173664 +epoch: 1, batch: 1584, sum loss: 5179.041992, avg loss: 2.694611, ppl: 14.799758 +epoch: 1, batch: 1585, sum loss: 4961.014160, avg loss: 2.862674, ppl: 17.508282 +epoch: 1, batch: 1586, sum loss: 5019.065430, avg loss: 2.916366, ppl: 18.474024 +epoch: 1, batch: 1587, sum loss: 3058.708008, avg loss: 2.224515, ppl: 9.248996 +epoch: 1, batch: 1588, sum loss: 4792.681641, avg loss: 2.721568, ppl: 15.204145 +epoch: 1, batch: 1589, sum loss: 4364.045898, avg loss: 2.698853, ppl: 14.862681 +epoch: 1, batch: 1590, sum loss: 3983.483887, avg loss: 2.538868, ppl: 12.665325 +epoch: 1, batch: 1591, sum loss: 4810.391113, avg loss: 2.785403, ppl: 16.206352 +epoch: 1, batch: 1592, sum loss: 4541.916504, avg loss: 2.365582, ppl: 10.650233 +epoch: 1, batch: 1593, sum loss: 4155.789062, avg loss: 2.790993, ppl: 16.297199 +epoch: 1, batch: 1594, sum loss: 3980.262207, avg loss: 2.501736, ppl: 12.203663 +epoch: 1, batch: 1595, sum loss: 4936.768555, avg loss: 2.822623, ppl: 16.820923 +epoch: 1, batch: 1596, sum loss: 4769.171875, avg loss: 2.714383, ppl: 15.095299 +epoch: 1, batch: 1597, sum loss: 5049.951660, avg loss: 2.867661, ppl: 17.595818 +epoch: 1, batch: 1598, sum loss: 5467.297363, avg loss: 2.865460, ppl: 17.557121 +epoch: 1, batch: 1599, sum loss: 4369.069336, avg loss: 2.662443, ppl: 14.331260 +epoch: 1, batch: 1600, sum loss: 4923.821777, avg loss: 2.756899, ppl: 15.750925 +epoch: 1, batch: 1601, sum loss: 4616.751465, avg loss: 2.695126, ppl: 14.807388 +epoch: 1, batch: 1602, sum loss: 4306.917969, avg loss: 2.559072, ppl: 12.923816 +epoch: 1, batch: 1603, sum loss: 4277.273926, avg loss: 2.695195, ppl: 14.808402 +epoch: 1, batch: 1604, sum loss: 4298.944336, avg loss: 2.702039, ppl: 14.910106 +epoch: 1, batch: 1605, sum loss: 5721.053223, avg loss: 2.898203, ppl: 18.141518 +epoch: 1, batch: 1606, sum loss: 5588.696289, avg loss: 3.239824, ppl: 25.529230 +epoch: 1, batch: 1607, sum loss: 4876.783203, avg loss: 2.630412, ppl: 13.879482 +epoch: 1, batch: 1608, sum loss: 4436.382812, avg loss: 2.656517, ppl: 14.246579 +epoch: 1, batch: 1609, sum loss: 3587.570068, avg loss: 2.628257, ppl: 13.849603 +epoch: 1, batch: 1610, sum loss: 4323.407227, avg loss: 2.604462, ppl: 13.523950 +epoch: 1, batch: 1611, sum loss: 5045.393555, avg loss: 2.853729, ppl: 17.352377 +epoch: 1, batch: 1612, sum loss: 4326.687500, avg loss: 2.432090, ppl: 11.382642 +epoch: 1, batch: 1613, sum loss: 4876.709473, avg loss: 2.848545, ppl: 17.262648 +epoch: 1, batch: 1614, sum loss: 5506.568359, avg loss: 2.963707, ppl: 19.369650 +epoch: 1, batch: 1615, sum loss: 4787.747559, avg loss: 2.836344, ppl: 17.053297 +epoch: 1, batch: 1616, sum loss: 4951.941406, avg loss: 2.824838, ppl: 16.858221 +epoch: 1, batch: 1617, sum loss: 4872.277344, avg loss: 2.816345, ppl: 16.715647 +epoch: 1, batch: 1618, sum loss: 3650.533936, avg loss: 2.690150, ppl: 14.733890 +epoch: 1, batch: 1619, sum loss: 4839.515625, avg loss: 2.794178, ppl: 16.349176 +epoch: 1, batch: 1620, sum loss: 5684.944824, avg loss: 3.342119, ppl: 28.278992 +epoch: 1, batch: 1621, sum loss: 4429.776855, avg loss: 2.719323, ppl: 15.170044 +epoch: 1, batch: 1622, sum loss: 5077.657227, avg loss: 2.940161, ppl: 18.918882 +epoch: 1, batch: 1623, sum loss: 4482.927246, avg loss: 2.620063, ppl: 13.736587 +epoch: 1, batch: 1624, sum loss: 4584.282715, avg loss: 2.774990, ppl: 16.038460 +epoch: 1, batch: 1625, sum loss: 4867.685547, avg loss: 3.015914, ppl: 20.407740 +epoch: 1, batch: 1626, sum loss: 4830.479980, avg loss: 2.833126, ppl: 16.998520 +epoch: 1, batch: 1627, sum loss: 5622.887207, avg loss: 3.008500, ppl: 20.256998 +epoch: 1, batch: 1628, sum loss: 4554.798828, avg loss: 2.709577, ppl: 15.022917 +epoch: 1, batch: 1629, sum loss: 4003.537842, avg loss: 2.566370, ppl: 13.018488 +epoch: 1, batch: 1630, sum loss: 4887.251953, avg loss: 2.825001, ppl: 16.860962 +epoch: 1, batch: 1631, sum loss: 4507.746582, avg loss: 2.791174, ppl: 16.300152 +epoch: 1, batch: 1632, sum loss: 4486.381348, avg loss: 2.932276, ppl: 18.770294 +epoch: 1, batch: 1633, sum loss: 3601.832031, avg loss: 2.265303, ppl: 9.634045 +epoch: 1, batch: 1634, sum loss: 5132.947754, avg loss: 2.708679, ppl: 15.009441 +epoch: 1, batch: 1635, sum loss: 4157.112793, avg loss: 2.612893, ppl: 13.638452 +epoch: 1, batch: 1636, sum loss: 5318.793945, avg loss: 3.104959, ppl: 22.308294 +epoch: 1, batch: 1637, sum loss: 4971.696289, avg loss: 2.924527, ppl: 18.625422 +epoch: 1, batch: 1638, sum loss: 5112.039551, avg loss: 2.784335, ppl: 16.189051 +epoch: 1, batch: 1639, sum loss: 4028.012207, avg loss: 2.655249, ppl: 14.228523 +epoch: 1, batch: 1640, sum loss: 4900.509277, avg loss: 2.724019, ppl: 15.241448 +epoch: 1, batch: 1641, sum loss: 5497.492188, avg loss: 2.796283, ppl: 16.383636 +epoch: 1, batch: 1642, sum loss: 4564.322266, avg loss: 2.807086, ppl: 16.561592 +epoch: 1, batch: 1643, sum loss: 4154.341797, avg loss: 2.642711, ppl: 14.051248 +epoch: 1, batch: 1644, sum loss: 4987.332520, avg loss: 2.889532, ppl: 17.984892 +epoch: 1, batch: 1645, sum loss: 4737.271484, avg loss: 2.671896, ppl: 14.467373 +epoch: 1, batch: 1646, sum loss: 5596.736816, avg loss: 2.997716, ppl: 20.039722 +epoch: 1, batch: 1647, sum loss: 5801.822754, avg loss: 3.132734, ppl: 22.936592 +epoch: 1, batch: 1648, sum loss: 4837.752930, avg loss: 2.751850, ppl: 15.671603 +epoch: 1, batch: 1649, sum loss: 5154.731445, avg loss: 2.993456, ppl: 19.954529 +epoch: 1, batch: 1650, sum loss: 4571.813965, avg loss: 2.525864, ppl: 12.501694 +epoch: 1, batch: 1651, sum loss: 4640.590332, avg loss: 2.797221, ppl: 16.399017 +epoch: 1, batch: 1652, sum loss: 4629.911621, avg loss: 2.759185, ppl: 15.786965 +epoch: 1, batch: 1653, sum loss: 5408.166992, avg loss: 2.760678, ppl: 15.810553 +epoch: 1, batch: 1654, sum loss: 4832.389648, avg loss: 2.845930, ppl: 17.217569 +epoch: 1, batch: 1655, sum loss: 4532.583496, avg loss: 2.773919, ppl: 16.021296 +epoch: 1, batch: 1656, sum loss: 5235.466797, avg loss: 2.799715, ppl: 16.439957 +epoch: 1, batch: 1657, sum loss: 4642.595215, avg loss: 2.822246, ppl: 16.814579 +epoch: 1, batch: 1658, sum loss: 5030.541504, avg loss: 2.916256, ppl: 18.472002 +epoch: 1, batch: 1659, sum loss: 3710.815918, avg loss: 2.444543, ppl: 11.525280 +epoch: 1, batch: 1660, sum loss: 4383.120117, avg loss: 2.779404, ppl: 16.109419 +epoch: 1, batch: 1661, sum loss: 4611.675781, avg loss: 2.827514, ppl: 16.903393 +epoch: 1, batch: 1662, sum loss: 4005.196533, avg loss: 2.631535, ppl: 13.895086 +epoch: 1, batch: 1663, sum loss: 5711.122070, avg loss: 2.954538, ppl: 19.192856 +epoch: 1, batch: 1664, sum loss: 4950.589844, avg loss: 3.005822, ppl: 20.202824 +epoch: 1, batch: 1665, sum loss: 4362.598145, avg loss: 2.663369, ppl: 14.344530 +epoch: 1, batch: 1666, sum loss: 5088.065430, avg loss: 2.954742, ppl: 19.196764 +epoch: 1, batch: 1667, sum loss: 3924.700439, avg loss: 2.599139, ppl: 13.452157 +epoch: 1, batch: 1668, sum loss: 4951.401855, avg loss: 2.848908, ppl: 17.268913 +epoch: 1, batch: 1669, sum loss: 3599.879395, avg loss: 2.544084, ppl: 12.731565 +epoch: 1, batch: 1670, sum loss: 5872.906250, avg loss: 2.979658, ppl: 19.681087 +epoch: 1, batch: 1671, sum loss: 5038.309082, avg loss: 3.079651, ppl: 21.750813 +epoch: 1, batch: 1672, sum loss: 4548.982422, avg loss: 2.634037, ppl: 13.929895 +epoch: 1, batch: 1673, sum loss: 4363.656250, avg loss: 2.710346, ppl: 15.034469 +epoch: 1, batch: 1674, sum loss: 4013.220947, avg loss: 2.752552, ppl: 15.682604 +epoch: 1, batch: 1675, sum loss: 4968.592285, avg loss: 2.815067, ppl: 16.694286 +epoch: 1, batch: 1676, sum loss: 5321.498535, avg loss: 3.148816, ppl: 23.308443 +epoch: 1, batch: 1677, sum loss: 4784.605957, avg loss: 2.794746, ppl: 16.358480 +epoch: 1, batch: 1678, sum loss: 5346.986328, avg loss: 3.114145, ppl: 22.514162 +epoch: 1, batch: 1679, sum loss: 5887.336914, avg loss: 2.905892, ppl: 18.281546 +epoch: 1, batch: 1680, sum loss: 4537.009766, avg loss: 2.498353, ppl: 12.162449 +epoch: 1, batch: 1681, sum loss: 4505.183105, avg loss: 2.531002, ppl: 12.566089 +epoch: 1, batch: 1682, sum loss: 4605.305176, avg loss: 2.818424, ppl: 16.750435 +epoch: 1, batch: 1683, sum loss: 4296.677246, avg loss: 2.728049, ppl: 15.303002 +epoch: 1, batch: 1684, sum loss: 4999.656250, avg loss: 3.162338, ppl: 23.625763 +epoch: 1, batch: 1685, sum loss: 3840.193848, avg loss: 2.506654, ppl: 12.263827 +epoch: 1, batch: 1686, sum loss: 4139.333008, avg loss: 2.755881, ppl: 15.734895 +epoch: 1, batch: 1687, sum loss: 4008.408936, avg loss: 2.618164, ppl: 13.710528 +epoch: 1, batch: 1688, sum loss: 4959.138184, avg loss: 2.913712, ppl: 18.425070 +epoch: 1, batch: 1689, sum loss: 5331.094727, avg loss: 2.872357, ppl: 17.678640 +epoch: 1, batch: 1690, sum loss: 5194.708008, avg loss: 2.838638, ppl: 17.092476 +epoch: 1, batch: 1691, sum loss: 4710.206543, avg loss: 2.879099, ppl: 17.798237 +epoch: 1, batch: 1692, sum loss: 4448.424805, avg loss: 2.866253, ppl: 17.571062 +epoch: 1, batch: 1693, sum loss: 6271.511230, avg loss: 3.018052, ppl: 20.451406 +epoch: 1, batch: 1694, sum loss: 4813.917480, avg loss: 2.800417, ppl: 16.451513 +epoch: 1, batch: 1695, sum loss: 5552.713379, avg loss: 2.810078, ppl: 16.611208 +epoch: 1, batch: 1696, sum loss: 4986.564941, avg loss: 2.847838, ppl: 17.250452 +epoch: 1, batch: 1697, sum loss: 4285.493164, avg loss: 2.552408, ppl: 12.837980 +epoch: 1, batch: 1698, sum loss: 5149.852539, avg loss: 3.157482, ppl: 23.511312 +epoch: 1, batch: 1699, sum loss: 4983.506836, avg loss: 2.774781, ppl: 16.035118 +epoch: 1, batch: 1700, sum loss: 4722.825684, avg loss: 2.809533, ppl: 16.602169 +epoch: 1, batch: 1701, sum loss: 4859.100586, avg loss: 2.786181, ppl: 16.218969 +epoch: 1, batch: 1702, sum loss: 4364.217773, avg loss: 2.777987, ppl: 16.086611 +epoch: 1, batch: 1703, sum loss: 4549.200195, avg loss: 2.790920, ppl: 16.296009 +epoch: 1, batch: 1704, sum loss: 4559.025391, avg loss: 2.489910, ppl: 12.060192 +epoch: 1, batch: 1705, sum loss: 3875.099121, avg loss: 2.559511, ppl: 12.929489 +epoch: 1, batch: 1706, sum loss: 4478.175781, avg loss: 2.825347, ppl: 16.866804 +epoch: 1, batch: 1707, sum loss: 4922.495605, avg loss: 2.954679, ppl: 19.195564 +epoch: 1, batch: 1708, sum loss: 4077.902832, avg loss: 2.716791, ppl: 15.131681 +epoch: 1, batch: 1709, sum loss: 5421.779297, avg loss: 3.008757, ppl: 20.262192 +epoch: 1, batch: 1710, sum loss: 5305.625977, avg loss: 2.892926, ppl: 18.046030 +epoch: 1, batch: 1711, sum loss: 3838.661377, avg loss: 2.487791, ppl: 12.034663 +epoch: 1, batch: 1712, sum loss: 4720.110352, avg loss: 2.953761, ppl: 19.177954 +epoch: 1, batch: 1713, sum loss: 4970.832520, avg loss: 2.837233, ppl: 17.068478 +epoch: 1, batch: 1714, sum loss: 4454.512207, avg loss: 2.657824, ppl: 14.265208 +epoch: 1, batch: 1715, sum loss: 4160.821289, avg loss: 2.641791, ppl: 14.038325 +epoch: 1, batch: 1716, sum loss: 4626.694336, avg loss: 2.817719, ppl: 16.738626 +epoch: 1, batch: 1717, sum loss: 4124.185059, avg loss: 2.574398, ppl: 13.123408 +epoch: 1, batch: 1718, sum loss: 4745.020508, avg loss: 2.727023, ppl: 15.287314 +epoch: 1, batch: 1719, sum loss: 5379.885254, avg loss: 3.002168, ppl: 20.129133 +epoch: 1, batch: 1720, sum loss: 4694.871094, avg loss: 2.779675, ppl: 16.113783 +epoch: 1, batch: 1721, sum loss: 3750.638672, avg loss: 2.572454, ppl: 13.097933 +epoch: 1, batch: 1722, sum loss: 5043.638672, avg loss: 3.198249, ppl: 24.489614 +epoch: 1, batch: 1723, sum loss: 4676.412598, avg loss: 2.946700, ppl: 19.043005 +epoch: 1, batch: 1724, sum loss: 5116.998047, avg loss: 2.950979, ppl: 19.124672 +epoch: 1, batch: 1725, sum loss: 4899.805664, avg loss: 2.868739, ppl: 17.614786 +epoch: 1, batch: 1726, sum loss: 5109.426758, avg loss: 2.860821, ppl: 17.475874 +epoch: 1, batch: 1727, sum loss: 4934.549316, avg loss: 3.084093, ppl: 21.847649 +epoch: 1, batch: 1728, sum loss: 4505.333008, avg loss: 2.740470, ppl: 15.494268 +epoch: 1, batch: 1729, sum loss: 4905.978516, avg loss: 2.937712, ppl: 18.872610 +epoch: 1, batch: 1730, sum loss: 4398.794434, avg loss: 2.839764, ppl: 17.111725 +epoch: 1, batch: 1731, sum loss: 4661.071289, avg loss: 2.782729, ppl: 16.163076 +epoch: 1, batch: 1732, sum loss: 5665.965820, avg loss: 2.799390, ppl: 16.434624 +epoch: 1, batch: 1733, sum loss: 5589.069824, avg loss: 3.008111, ppl: 20.249109 +epoch: 1, batch: 1734, sum loss: 5267.802734, avg loss: 3.106016, ppl: 22.331896 +epoch: 1, batch: 1735, sum loss: 4936.401855, avg loss: 2.809563, ppl: 16.602655 +epoch: 1, batch: 1736, sum loss: 4609.558105, avg loss: 2.760215, ppl: 15.803233 +epoch: 1, batch: 1737, sum loss: 4420.651855, avg loss: 2.695519, ppl: 14.813211 +epoch: 1, batch: 1738, sum loss: 4352.819336, avg loss: 2.691911, ppl: 14.759851 +epoch: 1, batch: 1739, sum loss: 3612.245117, avg loss: 2.385895, ppl: 10.868789 +epoch: 1, batch: 1740, sum loss: 4699.757812, avg loss: 2.794148, ppl: 16.348701 +epoch: 1, batch: 1741, sum loss: 5032.227539, avg loss: 3.006110, ppl: 20.208630 +epoch: 1, batch: 1742, sum loss: 4927.247070, avg loss: 2.891577, ppl: 18.021708 +epoch: 1, batch: 1743, sum loss: 5202.561523, avg loss: 3.062132, ppl: 21.373068 +epoch: 1, batch: 1744, sum loss: 4951.330078, avg loss: 2.731015, ppl: 15.348461 +epoch: 1, batch: 1745, sum loss: 4994.915039, avg loss: 2.732448, ppl: 15.370469 +epoch: 1, batch: 1746, sum loss: 4620.847656, avg loss: 2.735848, ppl: 15.422819 +epoch: 1, batch: 1747, sum loss: 6213.368652, avg loss: 3.086622, ppl: 21.902954 +epoch: 1, batch: 1748, sum loss: 5428.135254, avg loss: 3.040972, ppl: 20.925577 +epoch: 1, batch: 1749, sum loss: 4755.981934, avg loss: 2.797637, ppl: 16.405827 +epoch: 1, batch: 1750, sum loss: 4883.075195, avg loss: 2.979301, ppl: 19.674068 +epoch: 1, batch: 1751, sum loss: 6074.328613, avg loss: 3.077167, ppl: 21.696854 +epoch: 1, batch: 1752, sum loss: 4939.006348, avg loss: 2.859876, ppl: 17.459370 +epoch: 1, batch: 1753, sum loss: 5387.694336, avg loss: 3.037032, ppl: 20.843281 +epoch: 1, batch: 1754, sum loss: 3692.902344, avg loss: 2.582449, ppl: 13.229500 +epoch: 1, batch: 1755, sum loss: 4229.302734, avg loss: 2.789778, ppl: 16.277401 +epoch: 1, batch: 1756, sum loss: 4108.198242, avg loss: 2.762743, ppl: 15.843234 +epoch: 1, batch: 1757, sum loss: 4103.452637, avg loss: 2.697865, ppl: 14.847998 +epoch: 1, batch: 1758, sum loss: 4799.531738, avg loss: 2.782337, ppl: 16.156742 +epoch: 1, batch: 1759, sum loss: 4737.433594, avg loss: 2.806537, ppl: 16.552492 +epoch: 1, batch: 1760, sum loss: 5131.328613, avg loss: 2.967802, ppl: 19.449116 +epoch: 1, batch: 1761, sum loss: 3976.187744, avg loss: 2.614193, ppl: 13.656194 +epoch: 1, batch: 1762, sum loss: 4725.879883, avg loss: 2.804677, ppl: 16.521730 +epoch: 1, batch: 1763, sum loss: 4876.006836, avg loss: 2.641391, ppl: 14.032703 +epoch: 1, batch: 1764, sum loss: 4604.422852, avg loss: 2.778770, ppl: 16.099215 +epoch: 1, batch: 1765, sum loss: 4451.140625, avg loss: 2.714110, ppl: 15.091175 +epoch: 1, batch: 1766, sum loss: 4775.290039, avg loss: 2.761880, ppl: 15.829570 +epoch: 1, batch: 1767, sum loss: 4235.184570, avg loss: 2.655288, ppl: 14.229087 +epoch: 1, batch: 1768, sum loss: 4467.938965, avg loss: 2.724353, ppl: 15.246548 +epoch: 1, batch: 1769, sum loss: 4708.664062, avg loss: 2.791147, ppl: 16.299698 +epoch: 1, batch: 1770, sum loss: 4601.449707, avg loss: 2.843912, ppl: 17.182856 +epoch: 1, batch: 1771, sum loss: 4539.333008, avg loss: 2.667058, ppl: 14.397552 +epoch: 1, batch: 1772, sum loss: 4838.857422, avg loss: 2.957737, ppl: 19.254349 +epoch: 1, batch: 1773, sum loss: 4311.595215, avg loss: 2.577164, ppl: 13.159763 +epoch: 1, batch: 1774, sum loss: 4978.084473, avg loss: 2.954353, ppl: 19.189301 +epoch: 1, batch: 1775, sum loss: 5108.838379, avg loss: 2.953086, ppl: 19.164999 +epoch: 1, batch: 1776, sum loss: 5058.182129, avg loss: 2.912022, ppl: 18.393955 +epoch: 1, batch: 1777, sum loss: 4202.821777, avg loss: 2.621848, ppl: 13.761126 +epoch: 1, batch: 1778, sum loss: 4556.583984, avg loss: 2.704204, ppl: 14.942419 +epoch: 1, batch: 1779, sum loss: 4137.439941, avg loss: 2.683165, ppl: 14.631322 +epoch: 1, batch: 1780, sum loss: 4395.264160, avg loss: 2.719842, ppl: 15.177919 +epoch: 1, batch: 1781, sum loss: 4705.960938, avg loss: 2.634917, ppl: 13.942149 +epoch: 1, batch: 1782, sum loss: 4330.501953, avg loss: 2.763562, ppl: 15.856230 +epoch: 1, batch: 1783, sum loss: 4400.231445, avg loss: 2.722915, ppl: 15.224644 +epoch: 1, batch: 1784, sum loss: 5227.145020, avg loss: 2.706963, ppl: 14.983698 +epoch: 1, batch: 1785, sum loss: 4789.194336, avg loss: 2.805621, ppl: 16.537336 +epoch: 1, batch: 1786, sum loss: 6035.502441, avg loss: 3.151698, ppl: 23.375731 +epoch: 1, batch: 1787, sum loss: 4408.341797, avg loss: 2.684739, ppl: 14.654381 +epoch: 1, batch: 1788, sum loss: 3856.779541, avg loss: 2.512560, ppl: 12.336470 +epoch: 1, batch: 1789, sum loss: 3821.976074, avg loss: 2.445282, ppl: 11.533804 +epoch: 1, batch: 1790, sum loss: 3964.650391, avg loss: 2.562799, ppl: 12.972078 +epoch: 1, batch: 1791, sum loss: 5186.588379, avg loss: 2.843524, ppl: 17.176195 +epoch: 1, batch: 1792, sum loss: 4169.521484, avg loss: 2.635601, ppl: 13.951699 +epoch: 1, batch: 1793, sum loss: 4977.312988, avg loss: 3.101130, ppl: 22.223040 +epoch: 1, batch: 1794, sum loss: 4772.731934, avg loss: 2.602362, ppl: 13.495576 +epoch: 1, batch: 1795, sum loss: 5545.702637, avg loss: 3.120823, ppl: 22.665028 +epoch: 1, batch: 1796, sum loss: 4391.112305, avg loss: 2.713914, ppl: 15.088210 +epoch: 1, batch: 1797, sum loss: 5009.603516, avg loss: 2.867546, ppl: 17.593796 +epoch: 1, batch: 1798, sum loss: 4167.334961, avg loss: 2.564514, ppl: 12.994337 +epoch: 1, batch: 1799, sum loss: 4683.799805, avg loss: 2.768203, ppl: 15.929986 +epoch: 1, batch: 1800, sum loss: 5235.332031, avg loss: 3.012274, ppl: 20.333586 +epoch: 1, batch: 1801, sum loss: 4993.062988, avg loss: 2.894529, ppl: 18.074993 +epoch: 1, batch: 1802, sum loss: 5382.187500, avg loss: 2.941086, ppl: 18.936401 +epoch: 1, batch: 1803, sum loss: 4606.344238, avg loss: 2.708021, ppl: 14.999564 +epoch: 1, batch: 1804, sum loss: 4005.964844, avg loss: 2.930479, ppl: 18.736605 +epoch: 1, batch: 1805, sum loss: 5353.647949, avg loss: 2.895429, ppl: 18.091259 +epoch: 1, batch: 1806, sum loss: 4474.953125, avg loss: 3.122787, ppl: 22.709578 +epoch: 1, batch: 1807, sum loss: 5015.126953, avg loss: 2.601207, ppl: 13.479999 +epoch: 1, batch: 1808, sum loss: 4663.305664, avg loss: 2.741508, ppl: 15.510361 +epoch: 1, batch: 1809, sum loss: 5374.277344, avg loss: 2.890951, ppl: 18.010424 +epoch: 1, batch: 1810, sum loss: 4992.154785, avg loss: 2.862474, ppl: 17.504780 +epoch: 1, batch: 1811, sum loss: 3980.704590, avg loss: 2.709805, ppl: 15.026353 +epoch: 1, batch: 1812, sum loss: 4761.235352, avg loss: 2.761738, ppl: 15.827320 +epoch: 1, batch: 1813, sum loss: 3995.825928, avg loss: 2.655034, ppl: 14.225471 +epoch: 1, batch: 1814, sum loss: 4735.453125, avg loss: 2.766036, ppl: 15.895493 +epoch: 1, batch: 1815, sum loss: 5353.635254, avg loss: 2.850711, ppl: 17.300076 +epoch: 1, batch: 1816, sum loss: 4774.264648, avg loss: 2.912913, ppl: 18.410351 +epoch: 1, batch: 1817, sum loss: 4906.005371, avg loss: 2.837481, ppl: 17.072714 +epoch: 1, batch: 1818, sum loss: 4756.003418, avg loss: 2.769950, ppl: 15.957830 +epoch: 1, batch: 1819, sum loss: 4385.963867, avg loss: 2.739515, ppl: 15.479481 +epoch: 1, batch: 1820, sum loss: 6205.859375, avg loss: 3.134272, ppl: 22.971909 +epoch: 1, batch: 1821, sum loss: 5020.554688, avg loss: 2.910467, ppl: 18.365368 +epoch: 1, batch: 1822, sum loss: 4139.476562, avg loss: 2.884653, ppl: 17.897348 +epoch: 1, batch: 1823, sum loss: 4933.146484, avg loss: 2.905269, ppl: 18.270157 +epoch: 1, batch: 1824, sum loss: 4259.152344, avg loss: 2.957745, ppl: 19.254501 +epoch: 1, batch: 1825, sum loss: 5600.416016, avg loss: 3.038750, ppl: 20.879128 +epoch: 1, batch: 1826, sum loss: 4850.202148, avg loss: 2.688582, ppl: 14.710804 +epoch: 1, batch: 1827, sum loss: 5150.112793, avg loss: 2.862764, ppl: 17.509859 +epoch: 1, batch: 1828, sum loss: 5242.403809, avg loss: 3.014608, ppl: 20.381107 +epoch: 1, batch: 1829, sum loss: 3552.624023, avg loss: 2.741222, ppl: 15.505924 +epoch: 1, batch: 1830, sum loss: 4896.091309, avg loss: 2.960152, ppl: 19.300909 +epoch: 1, batch: 1831, sum loss: 4865.913574, avg loss: 2.732125, ppl: 15.365498 +epoch: 1, batch: 1832, sum loss: 4630.984375, avg loss: 2.806657, ppl: 16.554489 +epoch: 1, batch: 1833, sum loss: 5008.771484, avg loss: 2.920566, ppl: 18.551790 +epoch: 1, batch: 1834, sum loss: 5247.979980, avg loss: 2.993714, ppl: 19.959667 +epoch: 1, batch: 1835, sum loss: 5102.243164, avg loss: 2.939080, ppl: 18.898457 +epoch: 1, batch: 1836, sum loss: 5195.490723, avg loss: 2.856235, ppl: 17.395901 +epoch: 1, batch: 1837, sum loss: 4789.711426, avg loss: 2.876704, ppl: 17.755650 +epoch: 1, batch: 1838, sum loss: 5730.885742, avg loss: 2.807881, ppl: 16.574760 +epoch: 1, batch: 1839, sum loss: 3704.143555, avg loss: 2.453075, ppl: 11.624038 +epoch: 1, batch: 1840, sum loss: 4333.829102, avg loss: 2.693492, ppl: 14.783212 +epoch: 1, batch: 1841, sum loss: 4718.520020, avg loss: 2.711793, ppl: 15.056250 +epoch: 1, batch: 1842, sum loss: 4491.548340, avg loss: 2.814253, ppl: 16.680716 +epoch: 1, batch: 1843, sum loss: 4397.237305, avg loss: 2.631501, ppl: 13.894607 +epoch: 1, batch: 1844, sum loss: 5188.574707, avg loss: 2.726524, ppl: 15.279676 +epoch: 1, batch: 1845, sum loss: 4187.247559, avg loss: 2.517888, ppl: 12.402373 +epoch: 1, batch: 1846, sum loss: 4671.611328, avg loss: 2.762633, ppl: 15.841493 +epoch: 1, batch: 1847, sum loss: 4351.132812, avg loss: 2.503529, ppl: 12.225557 +epoch: 1, batch: 1848, sum loss: 5076.385254, avg loss: 2.754414, ppl: 15.711832 +epoch: 1, batch: 1849, sum loss: 4944.187012, avg loss: 2.866195, ppl: 17.570045 +epoch: 1, batch: 1850, sum loss: 5325.417480, avg loss: 3.190784, ppl: 24.307465 +epoch: 1, batch: 1851, sum loss: 3977.593262, avg loss: 2.705846, ppl: 14.966970 +epoch: 1, batch: 1852, sum loss: 5025.368164, avg loss: 2.898136, ppl: 18.140303 +epoch: 1, batch: 1853, sum loss: 4664.358887, avg loss: 2.629289, ppl: 13.863908 +epoch: 1, batch: 1854, sum loss: 4613.002930, avg loss: 2.716727, ppl: 15.130726 +epoch: 1, batch: 1855, sum loss: 4997.750977, avg loss: 2.941584, ppl: 18.945835 +epoch: 1, batch: 1856, sum loss: 3883.370850, avg loss: 2.691179, ppl: 14.749048 +epoch: 1, batch: 1857, sum loss: 5973.125977, avg loss: 2.998558, ppl: 20.056585 +epoch: 1, batch: 1858, sum loss: 4919.749023, avg loss: 2.883792, ppl: 17.881952 +epoch: 1, batch: 1859, sum loss: 4224.067383, avg loss: 2.661668, ppl: 14.320160 +epoch: 1, batch: 1860, sum loss: 5724.989258, avg loss: 2.983319, ppl: 19.753269 +epoch: 1, batch: 1861, sum loss: 4619.720703, avg loss: 2.684323, ppl: 14.648286 +epoch: 1, batch: 1862, sum loss: 6610.477539, avg loss: 3.336939, ppl: 28.132889 +epoch: 1, batch: 1863, sum loss: 4152.222168, avg loss: 2.481902, ppl: 11.964000 +epoch: 1, batch: 1864, sum loss: 5265.767578, avg loss: 3.019362, ppl: 20.478222 +epoch: 1, batch: 1865, sum loss: 4369.732422, avg loss: 2.608796, ppl: 13.582683 +epoch: 1, batch: 1866, sum loss: 5062.571289, avg loss: 2.855370, ppl: 17.380873 +epoch: 1, batch: 1867, sum loss: 4651.774414, avg loss: 2.898302, ppl: 18.143309 +epoch: 1, batch: 1868, sum loss: 5271.274414, avg loss: 2.739748, ppl: 15.483075 +epoch: 1, batch: 1869, sum loss: 4812.527344, avg loss: 2.957915, ppl: 19.257774 +epoch: 1, batch: 1870, sum loss: 4250.865234, avg loss: 2.781980, ppl: 16.150965 +epoch: 1, batch: 1871, sum loss: 5168.091797, avg loss: 3.011708, ppl: 20.322090 +epoch: 1, batch: 1872, sum loss: 5615.478516, avg loss: 3.055211, ppl: 21.225670 +epoch: 1, batch: 1873, sum loss: 5515.714355, avg loss: 2.887809, ppl: 17.953922 +epoch: 1, batch: 1874, sum loss: 5013.831055, avg loss: 2.881512, ppl: 17.841232 +epoch: 1, batch: 1875, sum loss: 4434.137695, avg loss: 2.806416, ppl: 16.550499 +epoch: 1, batch: 1876, sum loss: 5225.982910, avg loss: 2.612992, ppl: 13.639794 +epoch: 1, batch: 1877, sum loss: 4717.723145, avg loss: 2.784961, ppl: 16.199179 +epoch: 1, batch: 1878, sum loss: 5296.846680, avg loss: 2.891292, ppl: 18.016569 +epoch: 1, batch: 1879, sum loss: 4768.511719, avg loss: 2.762753, ppl: 15.843400 +epoch: 1, batch: 1880, sum loss: 4710.292969, avg loss: 2.754557, ppl: 15.714084 +epoch: 1, batch: 1881, sum loss: 5235.899902, avg loss: 3.058353, ppl: 21.292454 +epoch: 1, batch: 1882, sum loss: 4177.739746, avg loss: 2.644139, ppl: 14.071325 +epoch: 1, batch: 1883, sum loss: 4729.231934, avg loss: 2.899590, ppl: 18.166704 +epoch: 1, batch: 1884, sum loss: 5349.654785, avg loss: 2.853149, ppl: 17.342310 +epoch: 1, batch: 1885, sum loss: 3786.623047, avg loss: 2.590029, ppl: 13.330165 +epoch: 1, batch: 1886, sum loss: 5183.654297, avg loss: 2.891051, ppl: 18.012232 +epoch: 1, batch: 1887, sum loss: 4402.706543, avg loss: 2.732903, ppl: 15.377463 +epoch: 1, batch: 1888, sum loss: 4258.253906, avg loss: 2.708813, ppl: 15.011446 +epoch: 1, batch: 1889, sum loss: 5035.951660, avg loss: 2.819682, ppl: 16.771515 +epoch: 1, batch: 1890, sum loss: 4449.985840, avg loss: 2.705158, ppl: 14.956687 +epoch: 1, batch: 1891, sum loss: 4076.521484, avg loss: 2.604806, ppl: 13.528600 +epoch: 1, batch: 1892, sum loss: 4364.637695, avg loss: 2.643633, ppl: 14.064204 +epoch: 1, batch: 1893, sum loss: 5035.365234, avg loss: 2.872427, ppl: 17.679884 +epoch: 1, batch: 1894, sum loss: 5349.465820, avg loss: 3.037743, ppl: 20.858116 +epoch: 1, batch: 1895, sum loss: 4192.363770, avg loss: 2.969096, ppl: 19.474310 +epoch: 1, batch: 1896, sum loss: 4469.028320, avg loss: 2.669671, ppl: 14.435213 +epoch: 1, batch: 1897, sum loss: 4750.016602, avg loss: 2.856294, ppl: 17.396933 +epoch: 1, batch: 1898, sum loss: 5640.926270, avg loss: 3.199618, ppl: 24.523157 +epoch: 1, batch: 1899, sum loss: 4421.819336, avg loss: 2.665352, ppl: 14.373010 +epoch: 1, batch: 1900, sum loss: 4539.074219, avg loss: 2.891130, ppl: 18.013657 +epoch: 1, batch: 1901, sum loss: 4553.148926, avg loss: 2.664218, ppl: 14.356721 +epoch: 1, batch: 1902, sum loss: 4882.609375, avg loss: 2.858670, ppl: 17.438326 +epoch: 1, batch: 1903, sum loss: 3585.793457, avg loss: 2.555804, ppl: 12.881659 +epoch: 1, batch: 1904, sum loss: 5217.195312, avg loss: 3.052777, ppl: 21.174059 +epoch: 1, batch: 1905, sum loss: 5283.085449, avg loss: 2.848025, ppl: 17.253666 +epoch: 1, batch: 1906, sum loss: 3527.736084, avg loss: 2.642499, ppl: 14.048266 +epoch: 1, batch: 1907, sum loss: 5255.862793, avg loss: 2.844082, ppl: 17.185768 +epoch: 1, batch: 1908, sum loss: 5180.611816, avg loss: 2.723771, ppl: 15.237673 +epoch: 1, batch: 1909, sum loss: 4200.018555, avg loss: 2.576699, ppl: 13.153640 +epoch: 1, batch: 1910, sum loss: 4549.835449, avg loss: 2.569077, ppl: 13.053770 +epoch: 1, batch: 1911, sum loss: 4547.357910, avg loss: 2.659274, ppl: 14.285909 +epoch: 1, batch: 1912, sum loss: 4451.443359, avg loss: 2.676755, ppl: 14.537841 +epoch: 1, batch: 1913, sum loss: 3887.346436, avg loss: 2.675393, ppl: 14.518060 +epoch: 1, batch: 1914, sum loss: 5502.965820, avg loss: 3.048734, ppl: 21.088638 +epoch: 1, batch: 1915, sum loss: 4900.322266, avg loss: 3.078092, ppl: 21.716925 +epoch: 1, batch: 1916, sum loss: 4362.095703, avg loss: 2.745183, ppl: 15.567462 +epoch: 1, batch: 1917, sum loss: 4757.521484, avg loss: 2.798542, ppl: 16.420692 +epoch: 1, batch: 1918, sum loss: 4891.129883, avg loss: 3.101541, ppl: 22.232180 +epoch: 1, batch: 1919, sum loss: 5097.562988, avg loss: 2.968878, ppl: 19.470058 +epoch: 1, batch: 1920, sum loss: 4149.305176, avg loss: 2.550280, ppl: 12.810685 +epoch: 1, batch: 1921, sum loss: 5205.333496, avg loss: 2.761450, ppl: 15.822770 +epoch: 1, batch: 1922, sum loss: 5154.720703, avg loss: 3.068286, ppl: 21.505014 +epoch: 1, batch: 1923, sum loss: 4577.290527, avg loss: 2.639729, ppl: 14.009410 +epoch: 1, batch: 1924, sum loss: 4232.648926, avg loss: 2.784638, ppl: 16.193949 +epoch: 1, batch: 1925, sum loss: 4802.650391, avg loss: 2.763320, ppl: 15.852389 +epoch: 1, batch: 1926, sum loss: 5122.291016, avg loss: 2.920348, ppl: 18.547745 +epoch: 1, batch: 1927, sum loss: 4931.027344, avg loss: 2.990314, ppl: 19.891924 +epoch: 1, batch: 1928, sum loss: 5216.426758, avg loss: 3.188525, ppl: 24.252628 +epoch: 1, batch: 1929, sum loss: 5463.595703, avg loss: 3.198826, ppl: 24.503754 +epoch: 1, batch: 1930, sum loss: 5778.291016, avg loss: 2.940606, ppl: 18.927315 +epoch: 1, batch: 1931, sum loss: 4674.591797, avg loss: 2.732082, ppl: 15.364841 +epoch: 1, batch: 1932, sum loss: 4112.696777, avg loss: 2.736325, ppl: 15.430175 +epoch: 1, batch: 1933, sum loss: 4577.030273, avg loss: 3.007247, ppl: 20.231634 +epoch: 1, batch: 1934, sum loss: 4672.522461, avg loss: 2.797918, ppl: 16.410439 +epoch: 1, batch: 1935, sum loss: 3344.673584, avg loss: 2.631529, ppl: 13.895000 +epoch: 1, batch: 1936, sum loss: 4926.557617, avg loss: 2.773963, ppl: 16.021999 +epoch: 1, batch: 1937, sum loss: 5010.956055, avg loss: 2.956316, ppl: 19.227013 +epoch: 1, batch: 1938, sum loss: 4177.884277, avg loss: 2.619363, ppl: 13.726981 +epoch: 1, batch: 1939, sum loss: 5283.726562, avg loss: 2.704057, ppl: 14.940214 +epoch: 1, batch: 1940, sum loss: 4468.390137, avg loss: 2.661340, ppl: 14.315459 +epoch: 1, batch: 1941, sum loss: 4385.674805, avg loss: 2.872086, ppl: 17.673840 +epoch: 1, batch: 1942, sum loss: 5156.112305, avg loss: 2.850256, ppl: 17.292204 +epoch: 1, batch: 1943, sum loss: 5096.724121, avg loss: 2.803479, ppl: 16.501953 +epoch: 1, batch: 1944, sum loss: 4605.994141, avg loss: 2.813680, ppl: 16.671158 +epoch: 1, batch: 1945, sum loss: 4212.319824, avg loss: 2.540603, ppl: 12.687321 +epoch: 1, batch: 1946, sum loss: 5462.002930, avg loss: 2.919296, ppl: 18.528244 +epoch: 1, batch: 1947, sum loss: 4180.022461, avg loss: 2.811044, ppl: 16.627268 +epoch: 1, batch: 1948, sum loss: 5922.167480, avg loss: 2.931766, ppl: 18.760738 +epoch: 1, batch: 1949, sum loss: 4054.801025, avg loss: 2.804150, ppl: 16.513035 +epoch: 1, batch: 1950, sum loss: 5744.625000, avg loss: 3.080228, ppl: 21.763361 +epoch: 1, batch: 1951, sum loss: 5350.706543, avg loss: 3.110876, ppl: 22.440691 +epoch: 1, batch: 1952, sum loss: 6640.828125, avg loss: 3.274570, ppl: 26.431864 +epoch: 1, batch: 1953, sum loss: 5087.664062, avg loss: 2.944250, ppl: 18.996412 +epoch: 1, batch: 1954, sum loss: 6226.641113, avg loss: 3.008039, ppl: 20.247660 +epoch: 1, batch: 1955, sum loss: 3889.723145, avg loss: 2.697450, ppl: 14.841839 +epoch: 1, batch: 1956, sum loss: 3545.380859, avg loss: 2.534225, ppl: 12.606660 +epoch: 1, batch: 1957, sum loss: 4849.194336, avg loss: 2.809498, ppl: 16.601587 +epoch: 1, batch: 1958, sum loss: 4828.947754, avg loss: 2.939104, ppl: 18.898903 +epoch: 1, batch: 1959, sum loss: 5067.213867, avg loss: 2.908848, ppl: 18.335669 +epoch: 1, batch: 1960, sum loss: 4130.803711, avg loss: 2.629411, ppl: 13.865594 +epoch: 1, batch: 1961, sum loss: 5554.593262, avg loss: 3.017161, ppl: 20.433197 +epoch: 1, batch: 1962, sum loss: 4670.844727, avg loss: 2.877908, ppl: 17.777044 +epoch: 1, batch: 1963, sum loss: 4810.926758, avg loss: 2.937073, ppl: 18.860569 +epoch: 1, batch: 1964, sum loss: 4056.067383, avg loss: 2.909661, ppl: 18.350578 +epoch: 1, batch: 1965, sum loss: 5669.209961, avg loss: 2.907287, ppl: 18.307066 +epoch: 1, batch: 1966, sum loss: 4830.203613, avg loss: 2.710552, ppl: 15.037574 +epoch: 1, batch: 1967, sum loss: 4055.253418, avg loss: 2.754928, ppl: 15.719903 +epoch: 1, batch: 1968, sum loss: 4076.217041, avg loss: 2.706651, ppl: 14.979033 +epoch: 1, batch: 1969, sum loss: 3630.245117, avg loss: 2.474605, ppl: 11.877010 +epoch: 1, batch: 1970, sum loss: 5145.441895, avg loss: 2.989798, ppl: 19.881659 +epoch: 1, batch: 1971, sum loss: 4916.275391, avg loss: 2.802894, ppl: 16.492300 +epoch: 1, batch: 1972, sum loss: 5211.677246, avg loss: 2.903441, ppl: 18.236797 +epoch: 1, batch: 1973, sum loss: 4518.580566, avg loss: 2.932239, ppl: 18.769613 +epoch: 1, batch: 1974, sum loss: 4374.300781, avg loss: 3.025104, ppl: 20.596153 +epoch: 1, batch: 1975, sum loss: 5006.487305, avg loss: 2.846212, ppl: 17.222425 +epoch: 1, batch: 1976, sum loss: 4179.772461, avg loss: 2.872696, ppl: 17.684626 +epoch: 1, batch: 1977, sum loss: 4721.404785, avg loss: 2.854537, ppl: 17.366404 +epoch: 1, batch: 1978, sum loss: 5025.602051, avg loss: 2.759804, ppl: 15.796740 +epoch: 1, batch: 1979, sum loss: 4794.178223, avg loss: 2.797070, ppl: 16.396540 +epoch: 1, batch: 1980, sum loss: 3855.640869, avg loss: 2.546659, ppl: 12.764380 +epoch: 1, batch: 1981, sum loss: 5019.208008, avg loss: 2.697049, ppl: 14.835885 +epoch: 1, batch: 1982, sum loss: 4414.311523, avg loss: 2.752065, ppl: 15.674963 +epoch: 1, batch: 1983, sum loss: 3851.289795, avg loss: 2.555600, ppl: 12.879030 +epoch: 1, batch: 1984, sum loss: 4544.957520, avg loss: 2.651667, ppl: 14.177655 +epoch: 1, batch: 1985, sum loss: 4812.370117, avg loss: 2.927233, ppl: 18.675873 +epoch: 1, batch: 1986, sum loss: 4754.766602, avg loss: 2.780565, ppl: 16.128136 +epoch: 1, batch: 1987, sum loss: 3877.353027, avg loss: 2.652088, ppl: 14.183629 +epoch: 1, batch: 1988, sum loss: 4359.080078, avg loss: 2.669369, ppl: 14.430863 +epoch: 1, batch: 1989, sum loss: 5324.782715, avg loss: 2.903371, ppl: 18.235516 +epoch: 1, batch: 1990, sum loss: 4746.165039, avg loss: 2.803405, ppl: 16.500740 +epoch: 1, batch: 1991, sum loss: 5084.041016, avg loss: 3.022616, ppl: 20.544975 +epoch: 1, batch: 1992, sum loss: 5523.300781, avg loss: 2.789546, ppl: 16.273623 +epoch: 1, batch: 1993, sum loss: 4496.471680, avg loss: 2.689277, ppl: 14.721035 +epoch: 1, batch: 1994, sum loss: 5104.509277, avg loss: 2.972923, ppl: 19.548983 +epoch: 1, batch: 1995, sum loss: 4934.667480, avg loss: 2.849115, ppl: 17.272491 +epoch: 1, batch: 1996, sum loss: 4401.183594, avg loss: 2.496417, ppl: 12.138926 +epoch: 1, batch: 1997, sum loss: 4303.283691, avg loss: 2.939401, ppl: 18.904526 +epoch: 1, batch: 1998, sum loss: 4389.949219, avg loss: 2.839553, ppl: 17.108118 +epoch: 1, batch: 1999, sum loss: 5139.411621, avg loss: 2.915151, ppl: 18.451601 +epoch: 1, batch: 2000, sum loss: 4340.739746, avg loss: 2.470541, ppl: 11.828839 +epoch: 1, batch: 2001, sum loss: 4726.029785, avg loss: 2.972346, ppl: 19.537699 +epoch: 1, batch: 2002, sum loss: 4903.141602, avg loss: 2.894416, ppl: 18.072950 +epoch: 1, batch: 2003, sum loss: 5407.619141, avg loss: 2.899528, ppl: 18.165562 +epoch: 1, batch: 2004, sum loss: 4677.553711, avg loss: 2.977437, ppl: 19.637426 +epoch: 1, batch: 2005, sum loss: 5575.424805, avg loss: 3.175071, ppl: 23.928524 +epoch: 1, batch: 2006, sum loss: 4711.333496, avg loss: 2.784476, ppl: 16.191332 +epoch: 1, batch: 2007, sum loss: 4340.324707, avg loss: 2.878199, ppl: 17.782225 +epoch: 1, batch: 2008, sum loss: 4829.058105, avg loss: 2.876151, ppl: 17.745844 +epoch: 1, batch: 2009, sum loss: 5276.315430, avg loss: 2.803568, ppl: 16.503428 +epoch: 1, batch: 2010, sum loss: 4832.416016, avg loss: 2.971966, ppl: 19.530270 +epoch: 1, batch: 2011, sum loss: 4816.669922, avg loss: 2.656740, ppl: 14.249765 +epoch: 1, batch: 2012, sum loss: 4559.484375, avg loss: 2.715595, ppl: 15.113604 +epoch: 1, batch: 2013, sum loss: 4405.794922, avg loss: 2.881488, ppl: 17.840801 +epoch: 1, batch: 2014, sum loss: 4406.587891, avg loss: 2.839297, ppl: 17.103729 +epoch: 1, batch: 2015, sum loss: 4603.572266, avg loss: 2.913653, ppl: 18.423985 +epoch: 1, batch: 2016, sum loss: 4700.004395, avg loss: 2.941179, ppl: 18.938166 +epoch: 1, batch: 2017, sum loss: 4307.553223, avg loss: 2.833917, ppl: 17.011961 +epoch: 1, batch: 2018, sum loss: 4259.572266, avg loss: 2.868399, ppl: 17.608799 +epoch: 1, batch: 2019, sum loss: 5534.606445, avg loss: 2.835352, ppl: 17.036396 +epoch: 1, batch: 2020, sum loss: 5024.378418, avg loss: 2.778970, ppl: 16.102430 +epoch: 1, batch: 2021, sum loss: 4430.494629, avg loss: 2.804111, ppl: 16.512383 +epoch: 1, batch: 2022, sum loss: 3994.512695, avg loss: 2.661234, ppl: 14.313947 +epoch: 1, batch: 2023, sum loss: 5666.324707, avg loss: 3.123663, ppl: 22.729490 +epoch: 1, batch: 2024, sum loss: 4710.342773, avg loss: 2.735391, ppl: 15.415765 +epoch: 1, batch: 2025, sum loss: 4257.291016, avg loss: 2.755528, ppl: 15.729347 +epoch: 1, batch: 2026, sum loss: 4100.413574, avg loss: 2.685274, ppl: 14.662220 +epoch: 1, batch: 2027, sum loss: 4622.422363, avg loss: 2.667295, ppl: 14.400962 +epoch: 1, batch: 2028, sum loss: 4261.115723, avg loss: 2.740268, ppl: 15.491129 +epoch: 1, batch: 2029, sum loss: 5333.873047, avg loss: 2.971517, ppl: 19.521513 +epoch: 1, batch: 2030, sum loss: 5379.791504, avg loss: 2.825521, ppl: 16.869732 +epoch: 1, batch: 2031, sum loss: 4493.920898, avg loss: 2.964328, ppl: 19.381670 +epoch: 1, batch: 2032, sum loss: 3133.341064, avg loss: 2.049275, ppl: 7.762271 +epoch: 1, batch: 2033, sum loss: 5393.231445, avg loss: 3.152093, ppl: 23.384968 +epoch: 1, batch: 2034, sum loss: 4312.846191, avg loss: 2.506012, ppl: 12.255952 +epoch: 1, batch: 2035, sum loss: 4903.758301, avg loss: 2.887961, ppl: 17.956665 +epoch: 1, batch: 2036, sum loss: 4426.055176, avg loss: 2.713706, ppl: 15.085085 +epoch: 1, batch: 2037, sum loss: 5180.149414, avg loss: 2.961778, ppl: 19.332308 +epoch: 1, batch: 2038, sum loss: 4898.432129, avg loss: 2.915734, ppl: 18.462351 +epoch: 1, batch: 2039, sum loss: 3977.899902, avg loss: 2.479988, ppl: 11.941116 +epoch: 1, batch: 2040, sum loss: 3522.424805, avg loss: 2.534119, ppl: 12.605316 +epoch: 1, batch: 2041, sum loss: 5943.571289, avg loss: 2.952594, ppl: 19.155581 +epoch: 1, batch: 2042, sum loss: 5014.174805, avg loss: 2.739986, ppl: 15.486774 +epoch: 1, batch: 2043, sum loss: 4667.642578, avg loss: 2.851340, ppl: 17.310955 +epoch: 1, batch: 2044, sum loss: 4158.702637, avg loss: 2.962039, ppl: 19.337360 +epoch: 1, batch: 2045, sum loss: 4712.510742, avg loss: 2.960120, ppl: 19.300282 +epoch: 1, batch: 2046, sum loss: 4222.046387, avg loss: 2.743370, ppl: 15.539265 +epoch: 1, batch: 2047, sum loss: 4256.100586, avg loss: 2.957679, ppl: 19.253239 +epoch: 1, batch: 2048, sum loss: 4621.255859, avg loss: 2.707238, ppl: 14.987828 +epoch: 1, batch: 2049, sum loss: 4761.685059, avg loss: 3.204364, ppl: 24.639826 +epoch: 1, batch: 2050, sum loss: 4437.205078, avg loss: 2.750902, ppl: 15.656751 +epoch: 1, batch: 2051, sum loss: 4733.222656, avg loss: 2.579413, ppl: 13.189393 +epoch: 1, batch: 2052, sum loss: 4195.594727, avg loss: 2.696398, ppl: 14.826224 +epoch: 1, batch: 2053, sum loss: 3935.889893, avg loss: 2.785485, ppl: 16.207670 +epoch: 1, batch: 2054, sum loss: 5146.671875, avg loss: 2.987041, ppl: 19.826933 +epoch: 1, batch: 2055, sum loss: 4368.379395, avg loss: 2.885323, ppl: 17.909357 +epoch: 1, batch: 2056, sum loss: 4592.895508, avg loss: 2.555868, ppl: 12.882482 +epoch: 1, batch: 2057, sum loss: 4824.017578, avg loss: 2.832659, ppl: 16.990570 +epoch: 1, batch: 2058, sum loss: 5603.356445, avg loss: 3.019050, ppl: 20.471832 +epoch: 1, batch: 2059, sum loss: 3727.524414, avg loss: 2.486674, ppl: 12.021228 +epoch: 1, batch: 2060, sum loss: 4326.554688, avg loss: 2.748764, ppl: 15.623310 +epoch: 1, batch: 2061, sum loss: 5041.681641, avg loss: 2.967441, ppl: 19.442101 +epoch: 1, batch: 2062, sum loss: 4344.168457, avg loss: 2.791882, ppl: 16.311689 +epoch: 1, batch: 2063, sum loss: 4914.240723, avg loss: 2.840601, ppl: 17.126062 +epoch: 1, batch: 2064, sum loss: 4549.324707, avg loss: 2.740557, ppl: 15.495617 +epoch: 1, batch: 2065, sum loss: 5473.818359, avg loss: 2.941332, ppl: 18.941057 +epoch: 1, batch: 2066, sum loss: 5255.244141, avg loss: 2.839138, ppl: 17.101015 +epoch: 1, batch: 2067, sum loss: 4566.163574, avg loss: 2.889977, ppl: 17.992895 +epoch: 1, batch: 2068, sum loss: 4875.220703, avg loss: 2.798634, ppl: 16.422199 +epoch: 1, batch: 2069, sum loss: 4294.129395, avg loss: 2.627986, ppl: 13.845859 +epoch: 1, batch: 2070, sum loss: 5461.150391, avg loss: 2.947194, ppl: 19.052416 +epoch: 1, batch: 2071, sum loss: 4337.034180, avg loss: 2.829116, ppl: 16.930481 +epoch: 1, batch: 2072, sum loss: 5211.767090, avg loss: 3.035392, ppl: 20.809124 +epoch: 1, batch: 2073, sum loss: 4403.105469, avg loss: 2.717967, ppl: 15.149486 +epoch: 1, batch: 2074, sum loss: 4363.416992, avg loss: 2.636506, ppl: 13.964325 +epoch: 1, batch: 2075, sum loss: 4993.260254, avg loss: 3.101404, ppl: 22.229134 +epoch: 1, batch: 2076, sum loss: 4533.712402, avg loss: 2.885877, ppl: 17.919273 +epoch: 1, batch: 2077, sum loss: 4630.781738, avg loss: 2.945790, ppl: 19.025682 +epoch: 1, batch: 2078, sum loss: 4315.963379, avg loss: 2.649456, ppl: 14.146338 +epoch: 1, batch: 2079, sum loss: 5354.572266, avg loss: 2.650779, ppl: 14.165062 +epoch: 1, batch: 2080, sum loss: 4469.705566, avg loss: 2.643232, ppl: 14.058573 +epoch: 1, batch: 2081, sum loss: 4351.150879, avg loss: 2.760882, ppl: 15.813790 +epoch: 1, batch: 2082, sum loss: 4882.280273, avg loss: 2.794665, ppl: 16.357153 +epoch: 1, batch: 2083, sum loss: 4723.213379, avg loss: 2.698979, ppl: 14.864550 +epoch: 1, batch: 2084, sum loss: 4616.272461, avg loss: 2.894215, ppl: 18.069305 +epoch: 1, batch: 2085, sum loss: 4843.793457, avg loss: 2.712090, ppl: 15.060727 +epoch: 1, batch: 2086, sum loss: 4170.934570, avg loss: 2.673676, ppl: 14.493149 +epoch: 1, batch: 2087, sum loss: 4984.394531, avg loss: 2.773731, ppl: 16.018288 +epoch: 1, batch: 2088, sum loss: 4654.606445, avg loss: 2.726776, ppl: 15.283531 +epoch: 1, batch: 2089, sum loss: 5691.431152, avg loss: 2.979807, ppl: 19.684015 +epoch: 1, batch: 2090, sum loss: 3903.843262, avg loss: 2.606037, ppl: 13.545259 +epoch: 1, batch: 2091, sum loss: 4873.292969, avg loss: 2.770491, ppl: 15.966466 +epoch: 1, batch: 2092, sum loss: 3508.110352, avg loss: 2.721575, ppl: 15.204254 +epoch: 1, batch: 2093, sum loss: 4763.845215, avg loss: 2.857735, ppl: 17.422028 +epoch: 1, batch: 2094, sum loss: 4628.289062, avg loss: 2.698711, ppl: 14.860559 +epoch: 1, batch: 2095, sum loss: 5880.973633, avg loss: 3.063007, ppl: 21.391792 +epoch: 1, batch: 2096, sum loss: 3712.052734, avg loss: 2.615964, ppl: 13.680397 +epoch: 1, batch: 2097, sum loss: 4973.391602, avg loss: 2.845190, ppl: 17.204824 +epoch: 1, batch: 2098, sum loss: 4032.054443, avg loss: 2.566553, ppl: 13.020863 +epoch: 1, batch: 2099, sum loss: 4610.996582, avg loss: 2.959561, ppl: 19.289509 +epoch: 1, batch: 2100, sum loss: 4785.762695, avg loss: 2.585501, ppl: 13.269938 +epoch: 1, batch: 2101, sum loss: 5021.017578, avg loss: 3.167834, ppl: 23.755980 +epoch: 1, batch: 2102, sum loss: 4290.059570, avg loss: 2.816848, ppl: 16.724054 +epoch: 1, batch: 2103, sum loss: 4482.241699, avg loss: 3.093335, ppl: 22.050488 +epoch: 1, batch: 2104, sum loss: 5117.148438, avg loss: 2.844440, ppl: 17.191936 +epoch: 1, batch: 2105, sum loss: 5986.582520, avg loss: 3.413103, ppl: 30.359299 +epoch: 1, batch: 2106, sum loss: 4347.296387, avg loss: 2.821088, ppl: 16.795111 +epoch: 1, batch: 2107, sum loss: 4655.935059, avg loss: 2.599629, ppl: 13.458739 +epoch: 1, batch: 2108, sum loss: 4589.774414, avg loss: 2.917848, ppl: 18.501423 +epoch: 1, batch: 2109, sum loss: 4968.942871, avg loss: 2.799404, ppl: 16.434855 +epoch: 1, batch: 2110, sum loss: 4618.634766, avg loss: 2.833518, ppl: 17.005184 +epoch: 1, batch: 2111, sum loss: 5647.191406, avg loss: 2.852117, ppl: 17.324411 +epoch: 1, batch: 2112, sum loss: 4680.639160, avg loss: 3.019767, ppl: 20.486523 +epoch: 1, batch: 2113, sum loss: 4272.158691, avg loss: 2.880754, ppl: 17.827719 +epoch: 1, batch: 2114, sum loss: 4339.508301, avg loss: 2.682020, ppl: 14.614584 +epoch: 1, batch: 2115, sum loss: 4216.895996, avg loss: 2.685921, ppl: 14.671707 +epoch: 1, batch: 2116, sum loss: 4839.414551, avg loss: 2.863559, ppl: 17.523779 +epoch: 1, batch: 2117, sum loss: 5644.943359, avg loss: 3.123931, ppl: 22.735577 +epoch: 1, batch: 2118, sum loss: 4191.268555, avg loss: 2.922781, ppl: 18.592932 +epoch: 1, batch: 2119, sum loss: 4303.201172, avg loss: 2.720102, ppl: 15.181869 +epoch: 1, batch: 2120, sum loss: 4685.112305, avg loss: 2.841184, ppl: 17.136042 +epoch: 1, batch: 2121, sum loss: 4209.725586, avg loss: 2.595392, ppl: 13.401836 +epoch: 1, batch: 2122, sum loss: 4205.002930, avg loss: 2.546943, ppl: 12.768015 +epoch: 1, batch: 2123, sum loss: 4906.855469, avg loss: 2.939997, ppl: 18.915798 +epoch: 1, batch: 2124, sum loss: 4614.592285, avg loss: 2.896794, ppl: 18.115965 +epoch: 1, batch: 2125, sum loss: 5125.542480, avg loss: 2.844363, ppl: 17.190609 +epoch: 1, batch: 2126, sum loss: 4697.062012, avg loss: 2.631407, ppl: 13.893311 +epoch: 1, batch: 2127, sum loss: 4943.069824, avg loss: 2.912828, ppl: 18.408792 +epoch: 1, batch: 2128, sum loss: 4274.423828, avg loss: 2.720830, ppl: 15.192926 +epoch: 1, batch: 2129, sum loss: 4662.091797, avg loss: 2.865453, ppl: 17.557001 +epoch: 1, batch: 2130, sum loss: 4463.801270, avg loss: 2.911808, ppl: 18.390013 +epoch: 1, batch: 2131, sum loss: 4051.711426, avg loss: 2.511910, ppl: 12.328461 +epoch: 1, batch: 2132, sum loss: 5284.356934, avg loss: 2.901898, ppl: 18.208679 +epoch: 1, batch: 2133, sum loss: 4524.319824, avg loss: 2.803172, ppl: 16.496893 +epoch: 1, batch: 2134, sum loss: 4624.822266, avg loss: 2.821734, ppl: 16.805965 +epoch: 1, batch: 2135, sum loss: 5114.289551, avg loss: 2.935872, ppl: 18.837929 +epoch: 1, batch: 2136, sum loss: 3949.958008, avg loss: 2.538533, ppl: 12.661089 +epoch: 1, batch: 2137, sum loss: 4155.743164, avg loss: 2.527824, ppl: 12.526224 +epoch: 1, batch: 2138, sum loss: 4197.238281, avg loss: 2.851385, ppl: 17.311735 +epoch: 1, batch: 2139, sum loss: 5750.301270, avg loss: 3.050558, ppl: 21.127121 +epoch: 1, batch: 2140, sum loss: 5087.298828, avg loss: 2.832572, ppl: 16.989096 +epoch: 1, batch: 2141, sum loss: 4544.422852, avg loss: 2.595330, ppl: 13.401012 +epoch: 1, batch: 2142, sum loss: 4426.578125, avg loss: 2.746016, ppl: 15.580440 +epoch: 1, batch: 2143, sum loss: 4352.597656, avg loss: 2.660512, ppl: 14.303614 +epoch: 1, batch: 2144, sum loss: 5248.849609, avg loss: 2.982301, ppl: 19.733166 +epoch: 1, batch: 2145, sum loss: 3944.313721, avg loss: 2.659686, ppl: 14.291795 +epoch: 1, batch: 2146, sum loss: 4854.013184, avg loss: 2.853623, ppl: 17.350533 +epoch: 1, batch: 2147, sum loss: 4935.221680, avg loss: 2.836334, ppl: 17.053139 +epoch: 1, batch: 2148, sum loss: 5360.970703, avg loss: 3.113223, ppl: 22.493431 +epoch: 1, batch: 2149, sum loss: 4161.247070, avg loss: 2.743076, ppl: 15.534701 +epoch: 1, batch: 2150, sum loss: 4540.608398, avg loss: 2.746889, ppl: 15.594049 +epoch: 1, batch: 2151, sum loss: 4524.014160, avg loss: 2.881538, ppl: 17.841686 +epoch: 1, batch: 2152, sum loss: 4872.433594, avg loss: 2.816436, ppl: 16.717157 +epoch: 1, batch: 2153, sum loss: 4463.140137, avg loss: 2.625377, ppl: 13.809775 +epoch: 1, batch: 2154, sum loss: 3948.552002, avg loss: 2.429878, ppl: 11.357496 +epoch: 1, batch: 2155, sum loss: 5561.170898, avg loss: 3.165151, ppl: 23.692329 +epoch: 1, batch: 2156, sum loss: 4706.019043, avg loss: 2.848680, ppl: 17.264977 +epoch: 1, batch: 2157, sum loss: 4744.994141, avg loss: 2.778100, ppl: 16.088417 +epoch: 1, batch: 2158, sum loss: 4319.582520, avg loss: 2.704811, ppl: 14.951485 +epoch: 1, batch: 2159, sum loss: 3845.753906, avg loss: 2.513565, ppl: 12.348873 +epoch: 1, batch: 2160, sum loss: 4380.532227, avg loss: 2.788372, ppl: 16.254536 +epoch: 1, batch: 2161, sum loss: 4899.740723, avg loss: 2.833858, ppl: 17.010967 +epoch: 1, batch: 2162, sum loss: 4831.841309, avg loss: 2.801068, ppl: 16.462212 +epoch: 1, batch: 2163, sum loss: 5028.266602, avg loss: 2.694676, ppl: 14.800729 +epoch: 1, batch: 2164, sum loss: 5054.379883, avg loss: 2.945443, ppl: 19.019085 +epoch: 1, batch: 2165, sum loss: 4818.622559, avg loss: 2.772510, ppl: 15.998741 +epoch: 1, batch: 2166, sum loss: 4975.107422, avg loss: 2.796575, ppl: 16.388426 +epoch: 1, batch: 2167, sum loss: 4721.201172, avg loss: 2.618525, ppl: 13.715482 +epoch: 1, batch: 2168, sum loss: 4947.100586, avg loss: 2.806070, ppl: 16.544764 +epoch: 1, batch: 2169, sum loss: 6113.438965, avg loss: 2.960503, ppl: 19.307688 +epoch: 1, batch: 2170, sum loss: 5103.306152, avg loss: 2.637367, ppl: 13.976362 +epoch: 1, batch: 2171, sum loss: 5186.760254, avg loss: 2.741417, ppl: 15.508941 +epoch: 1, batch: 2172, sum loss: 4945.472656, avg loss: 2.765924, ppl: 15.893723 +epoch: 1, batch: 2173, sum loss: 4559.761719, avg loss: 2.893250, ppl: 18.051878 +epoch: 1, batch: 2174, sum loss: 4734.557129, avg loss: 3.064438, ppl: 21.422426 +epoch: 1, batch: 2175, sum loss: 4970.086914, avg loss: 2.925302, ppl: 18.639847 +epoch: 1, batch: 2176, sum loss: 4059.000488, avg loss: 2.510205, ppl: 12.307447 +epoch: 1, batch: 2177, sum loss: 4123.868164, avg loss: 2.653712, ppl: 14.206674 +epoch: 1, batch: 2178, sum loss: 4880.415527, avg loss: 2.940009, ppl: 18.916023 +epoch: 1, batch: 2179, sum loss: 4850.649414, avg loss: 2.743580, ppl: 15.542529 +epoch: 1, batch: 2180, sum loss: 5215.966309, avg loss: 3.090028, ppl: 21.977684 +epoch: 1, batch: 2181, sum loss: 4004.732910, avg loss: 2.669822, ppl: 14.437399 +epoch: 1, batch: 2182, sum loss: 4346.867188, avg loss: 2.790030, ppl: 16.281515 +epoch: 1, batch: 2183, sum loss: 4982.376465, avg loss: 3.006866, ppl: 20.223913 +epoch: 1, batch: 2184, sum loss: 4615.730469, avg loss: 2.783915, ppl: 16.182243 +epoch: 1, batch: 2185, sum loss: 4859.607422, avg loss: 2.836899, ppl: 17.062765 +epoch: 1, batch: 2186, sum loss: 5437.455078, avg loss: 3.116020, ppl: 22.556425 +epoch: 1, batch: 2187, sum loss: 5287.236816, avg loss: 3.065065, ppl: 21.435854 +epoch: 1, batch: 2188, sum loss: 4987.543457, avg loss: 3.048621, ppl: 21.086245 +epoch: 1, batch: 2189, sum loss: 4627.295898, avg loss: 2.806122, ppl: 16.545635 +epoch: 1, batch: 2190, sum loss: 4946.505859, avg loss: 2.935612, ppl: 18.833021 +epoch: 1, batch: 2191, sum loss: 4147.137207, avg loss: 2.770299, ppl: 15.963402 +epoch: 1, batch: 2192, sum loss: 4576.910156, avg loss: 2.765505, ppl: 15.887058 +epoch: 1, batch: 2193, sum loss: 5355.808105, avg loss: 2.902877, ppl: 18.226505 +epoch: 1, batch: 2194, sum loss: 5486.871094, avg loss: 2.832664, ppl: 16.990671 +epoch: 1, batch: 2195, sum loss: 3979.416992, avg loss: 2.589081, ppl: 13.317525 +epoch: 1, batch: 2196, sum loss: 4014.764404, avg loss: 2.694473, ppl: 14.797715 +epoch: 1, batch: 2197, sum loss: 4442.885742, avg loss: 2.553383, ppl: 12.850499 +epoch: 1, batch: 2198, sum loss: 4831.579590, avg loss: 2.734340, ppl: 15.399583 +epoch: 1, batch: 2199, sum loss: 3953.654053, avg loss: 2.597670, ppl: 13.432408 +epoch: 1, batch: 2200, sum loss: 4424.882812, avg loss: 2.791724, ppl: 16.309116 +epoch: 1, batch: 2201, sum loss: 4149.696289, avg loss: 2.899858, ppl: 18.171556 +epoch: 1, batch: 2202, sum loss: 4161.451172, avg loss: 2.776152, ppl: 16.057108 +epoch: 1, batch: 2203, sum loss: 4839.900391, avg loss: 2.762500, ppl: 15.839396 +epoch: 1, batch: 2204, sum loss: 5494.554199, avg loss: 2.830785, ppl: 16.958773 +epoch: 1, batch: 2205, sum loss: 4308.037598, avg loss: 2.799245, ppl: 16.432230 +epoch: 1, batch: 2206, sum loss: 5318.367676, avg loss: 2.912578, ppl: 18.404190 +epoch: 1, batch: 2207, sum loss: 4468.091309, avg loss: 2.771769, ppl: 15.986887 +epoch: 1, batch: 2208, sum loss: 4315.150391, avg loss: 2.813005, ppl: 16.659914 +epoch: 1, batch: 2209, sum loss: 4746.280273, avg loss: 2.951667, ppl: 19.137827 +epoch: 1, batch: 2210, sum loss: 4435.298340, avg loss: 2.739530, ppl: 15.479702 +epoch: 1, batch: 2211, sum loss: 3986.915771, avg loss: 2.657944, ppl: 14.266922 +epoch: 1, batch: 2212, sum loss: 4000.877197, avg loss: 2.809605, ppl: 16.603357 +epoch: 1, batch: 2213, sum loss: 3863.647461, avg loss: 2.479877, ppl: 11.939790 +epoch: 1, batch: 2214, sum loss: 4470.267090, avg loss: 2.968305, ppl: 19.458906 +epoch: 1, batch: 2215, sum loss: 3864.558105, avg loss: 2.584989, ppl: 13.263138 +epoch: 1, batch: 2216, sum loss: 4289.016602, avg loss: 2.680635, ppl: 14.594361 +epoch: 1, batch: 2217, sum loss: 4273.313477, avg loss: 2.569641, ppl: 13.061139 +epoch: 1, batch: 2218, sum loss: 5144.736328, avg loss: 2.836128, ppl: 17.049625 +epoch: 1, batch: 2219, sum loss: 4689.794434, avg loss: 2.784914, ppl: 16.198421 +epoch: 1, batch: 2220, sum loss: 4996.529297, avg loss: 2.724389, ppl: 15.247092 +epoch: 1, batch: 2221, sum loss: 4178.494629, avg loss: 2.844448, ppl: 17.192072 +epoch: 1, batch: 2222, sum loss: 5089.963379, avg loss: 2.948994, ppl: 19.086737 +epoch: 1, batch: 2223, sum loss: 4920.687500, avg loss: 2.899639, ppl: 18.167587 +epoch: 1, batch: 2224, sum loss: 5269.723633, avg loss: 2.990763, ppl: 19.900856 +epoch: 1, batch: 2225, sum loss: 5156.790039, avg loss: 2.712672, ppl: 15.069491 +epoch: 1, batch: 2226, sum loss: 4369.871094, avg loss: 2.895872, ppl: 18.099283 +epoch: 1, batch: 2227, sum loss: 4204.970215, avg loss: 2.517946, ppl: 12.403097 +epoch: 1, batch: 2228, sum loss: 4141.908203, avg loss: 2.794810, ppl: 16.359522 +epoch: 1, batch: 2229, sum loss: 4666.322266, avg loss: 2.861019, ppl: 17.479332 +epoch: 1, batch: 2230, sum loss: 4508.736816, avg loss: 2.873637, ppl: 17.701284 +epoch: 1, batch: 2231, sum loss: 5279.242188, avg loss: 2.873839, ppl: 17.704859 +epoch: 1, batch: 2232, sum loss: 5751.152344, avg loss: 3.093681, ppl: 22.058117 +epoch: 1, batch: 2233, sum loss: 5141.185547, avg loss: 2.800210, ppl: 16.448101 +epoch: 1, batch: 2234, sum loss: 5150.085938, avg loss: 2.983827, ppl: 19.763309 +epoch: 1, batch: 2235, sum loss: 4954.206055, avg loss: 3.092513, ppl: 22.032377 +epoch: 1, batch: 2236, sum loss: 4690.783203, avg loss: 2.798797, ppl: 16.424870 +epoch: 1, batch: 2237, sum loss: 4503.539062, avg loss: 2.747736, ppl: 15.607253 +epoch: 1, batch: 2238, sum loss: 5130.576660, avg loss: 2.831444, ppl: 16.969948 +epoch: 1, batch: 2239, sum loss: 4348.518555, avg loss: 2.627504, ppl: 13.839183 +epoch: 1, batch: 2240, sum loss: 4169.739258, avg loss: 2.752303, ppl: 15.678700 +epoch: 1, batch: 2241, sum loss: 4989.024414, avg loss: 2.946854, ppl: 19.045948 +epoch: 1, batch: 2242, sum loss: 5594.363770, avg loss: 2.871850, ppl: 17.669678 +epoch: 1, batch: 2243, sum loss: 4250.109863, avg loss: 2.774223, ppl: 16.026171 +epoch: 1, batch: 2244, sum loss: 5164.296875, avg loss: 3.030691, ppl: 20.711533 +epoch: 1, batch: 2245, sum loss: 4104.481445, avg loss: 2.602715, ppl: 13.500342 +epoch: 1, batch: 2246, sum loss: 3952.117920, avg loss: 2.506099, ppl: 12.257022 +epoch: 1, batch: 2247, sum loss: 4470.374023, avg loss: 2.831142, ppl: 16.964830 +epoch: 1, batch: 2248, sum loss: 4263.584961, avg loss: 2.669746, ppl: 14.436308 +epoch: 1, batch: 2249, sum loss: 4787.389160, avg loss: 2.764081, ppl: 15.864462 +epoch: 1, batch: 2250, sum loss: 4329.932617, avg loss: 2.818967, ppl: 16.759523 +epoch: 1, batch: 2251, sum loss: 5587.640137, avg loss: 3.140888, ppl: 23.124397 +epoch: 1, batch: 2252, sum loss: 3936.893066, avg loss: 2.568097, ppl: 13.040988 +epoch: 1, batch: 2253, sum loss: 4857.429688, avg loss: 2.961847, ppl: 19.333654 +epoch: 1, batch: 2254, sum loss: 4499.179688, avg loss: 2.869375, ppl: 17.625994 +epoch: 1, batch: 2255, sum loss: 5106.166016, avg loss: 2.884840, ppl: 17.900696 +epoch: 1, batch: 2256, sum loss: 5057.494141, avg loss: 2.823838, ppl: 16.841364 +epoch: 1, batch: 2257, sum loss: 5239.849121, avg loss: 2.952028, ppl: 19.144732 +epoch: 1, batch: 2258, sum loss: 4159.078125, avg loss: 2.649094, ppl: 14.141226 +epoch: 1, batch: 2259, sum loss: 5571.251953, avg loss: 3.064495, ppl: 21.423643 +epoch: 1, batch: 2260, sum loss: 4171.706055, avg loss: 2.939892, ppl: 18.913795 +epoch: 1, batch: 2261, sum loss: 5275.291992, avg loss: 2.817998, ppl: 16.743296 +epoch: 1, batch: 2262, sum loss: 4739.715332, avg loss: 2.923945, ppl: 18.614580 +epoch: 1, batch: 2263, sum loss: 4721.093750, avg loss: 2.832090, ppl: 16.980907 +epoch: 1, batch: 2264, sum loss: 4712.923828, avg loss: 2.911009, ppl: 18.375336 +epoch: 1, batch: 2265, sum loss: 5318.355469, avg loss: 2.831925, ppl: 16.978115 +epoch: 1, batch: 2266, sum loss: 6023.030273, avg loss: 3.008507, ppl: 20.257124 +epoch: 1, batch: 2267, sum loss: 4743.825195, avg loss: 2.919277, ppl: 18.527885 +epoch: 1, batch: 2268, sum loss: 4555.145020, avg loss: 2.841638, ppl: 17.143818 +epoch: 1, batch: 2269, sum loss: 4021.081787, avg loss: 2.612789, ppl: 13.637027 +epoch: 1, batch: 2270, sum loss: 4498.406250, avg loss: 2.940135, ppl: 18.918400 +epoch: 1, batch: 2271, sum loss: 4556.606445, avg loss: 2.826679, ppl: 16.889278 +epoch: 1, batch: 2272, sum loss: 4896.999023, avg loss: 2.819228, ppl: 16.763906 +epoch: 1, batch: 2273, sum loss: 4225.283203, avg loss: 2.868488, ppl: 17.610376 +epoch: 1, batch: 2274, sum loss: 3824.807617, avg loss: 2.568709, ppl: 13.048966 +epoch: 1, batch: 2275, sum loss: 4674.486328, avg loss: 2.841633, ppl: 17.143736 +epoch: 1, batch: 2276, sum loss: 4363.588867, avg loss: 2.571355, ppl: 13.083536 +epoch: 1, batch: 2277, sum loss: 3799.672852, avg loss: 2.478586, ppl: 11.924397 +epoch: 1, batch: 2278, sum loss: 5366.556641, avg loss: 2.960042, ppl: 19.298792 +epoch: 1, batch: 2279, sum loss: 4122.706543, avg loss: 2.622587, ppl: 13.771303 +epoch: 1, batch: 2280, sum loss: 5403.056152, avg loss: 2.928486, ppl: 18.699291 +epoch: 1, batch: 2281, sum loss: 5862.202637, avg loss: 2.959214, ppl: 19.282804 +epoch: 1, batch: 2282, sum loss: 5637.825195, avg loss: 2.981399, ppl: 19.715376 +epoch: 1, batch: 2283, sum loss: 4871.170410, avg loss: 2.936209, ppl: 18.844267 +epoch: 1, batch: 2284, sum loss: 4423.780273, avg loss: 2.806967, ppl: 16.559616 +epoch: 1, batch: 2285, sum loss: 3919.187988, avg loss: 2.649890, ppl: 14.152488 +epoch: 1, batch: 2286, sum loss: 4292.798828, avg loss: 2.730788, ppl: 15.344974 +epoch: 1, batch: 2287, sum loss: 3636.703125, avg loss: 2.575569, ppl: 13.138786 +epoch: 1, batch: 2288, sum loss: 4386.085449, avg loss: 2.906617, ppl: 18.294809 +epoch: 1, batch: 2289, sum loss: 3683.351807, avg loss: 2.455568, ppl: 11.653049 +epoch: 1, batch: 2290, sum loss: 5446.188965, avg loss: 2.931210, ppl: 18.750309 +epoch: 1, batch: 2291, sum loss: 4865.566895, avg loss: 2.769247, ppl: 15.946619 +epoch: 1, batch: 2292, sum loss: 5105.569336, avg loss: 2.935923, ppl: 18.838877 +epoch: 1, batch: 2293, sum loss: 5153.890625, avg loss: 2.879268, ppl: 17.801245 +epoch: 1, batch: 2294, sum loss: 5747.353516, avg loss: 2.845225, ppl: 17.205423 +epoch: 1, batch: 2295, sum loss: 4439.116211, avg loss: 2.629808, ppl: 13.871109 +epoch: 1, batch: 2296, sum loss: 4838.028809, avg loss: 2.881494, ppl: 17.840912 +epoch: 1, batch: 2297, sum loss: 5193.988281, avg loss: 3.234115, ppl: 25.383888 +epoch: 1, batch: 2298, sum loss: 5196.078125, avg loss: 3.031551, ppl: 20.729357 +epoch: 1, batch: 2299, sum loss: 5524.057617, avg loss: 3.040208, ppl: 20.909590 +epoch: 1, batch: 2300, sum loss: 4809.764648, avg loss: 2.881824, ppl: 17.846800 +epoch: 1, batch: 2301, sum loss: 3965.637451, avg loss: 2.497253, ppl: 12.149074 +epoch: 1, batch: 2302, sum loss: 4662.997070, avg loss: 2.783879, ppl: 16.181669 +epoch: 1, batch: 2303, sum loss: 5193.326172, avg loss: 2.864493, ppl: 17.540165 +epoch: 1, batch: 2304, sum loss: 4612.005859, avg loss: 3.008484, ppl: 20.256666 +epoch: 1, batch: 2305, sum loss: 5650.499512, avg loss: 2.955282, ppl: 19.207142 +epoch: 1, batch: 2306, sum loss: 4500.150391, avg loss: 2.864513, ppl: 17.540516 +epoch: 1, batch: 2307, sum loss: 5752.985840, avg loss: 3.002602, ppl: 20.137875 +epoch: 1, batch: 2308, sum loss: 4317.744629, avg loss: 2.831308, ppl: 16.967638 +epoch: 1, batch: 2309, sum loss: 5204.172363, avg loss: 3.063080, ppl: 21.393343 +epoch: 1, batch: 2310, sum loss: 4481.064941, avg loss: 2.776372, ppl: 16.060654 +epoch: 1, batch: 2311, sum loss: 5274.120605, avg loss: 3.018959, ppl: 20.469963 +epoch: 1, batch: 2312, sum loss: 4605.692871, avg loss: 2.659176, ppl: 14.284515 +epoch: 1, batch: 2313, sum loss: 5257.227051, avg loss: 2.917440, ppl: 18.493885 +epoch: 1, batch: 2314, sum loss: 4006.042725, avg loss: 2.625192, ppl: 13.807224 +epoch: 1, batch: 2315, sum loss: 4352.998047, avg loss: 2.581849, ppl: 13.221567 +epoch: 1, batch: 2316, sum loss: 6186.835938, avg loss: 3.044703, ppl: 21.003788 +epoch: 1, batch: 2317, sum loss: 4344.586914, avg loss: 2.636278, ppl: 13.961149 +epoch: 1, batch: 2318, sum loss: 5141.054688, avg loss: 2.946163, ppl: 19.032787 +epoch: 1, batch: 2319, sum loss: 5137.676758, avg loss: 2.961197, ppl: 19.321083 +epoch: 1, batch: 2320, sum loss: 5420.170410, avg loss: 2.680598, ppl: 14.593825 +epoch: 1, batch: 2321, sum loss: 4478.689453, avg loss: 2.683457, ppl: 14.635600 +epoch: 1, batch: 2322, sum loss: 5172.546875, avg loss: 2.979578, ppl: 19.679506 +epoch: 1, batch: 2323, sum loss: 4161.980469, avg loss: 2.583477, ppl: 13.243098 +epoch: 1, batch: 2324, sum loss: 4362.391113, avg loss: 2.773294, ppl: 16.011284 +epoch: 1, batch: 2325, sum loss: 3596.061523, avg loss: 2.306646, ppl: 10.040695 +epoch: 1, batch: 2326, sum loss: 5338.932129, avg loss: 2.952949, ppl: 19.162386 +epoch: 1, batch: 2327, sum loss: 4634.201172, avg loss: 2.873032, ppl: 17.690580 +epoch: 1, batch: 2328, sum loss: 4708.707520, avg loss: 2.604374, ppl: 13.522754 +epoch: 1, batch: 2329, sum loss: 4804.898438, avg loss: 2.906775, ppl: 18.297684 +epoch: 1, batch: 2330, sum loss: 4221.128906, avg loss: 2.596020, ppl: 13.410262 +epoch: 1, batch: 2331, sum loss: 4531.460938, avg loss: 2.733089, ppl: 15.380320 +epoch: 1, batch: 2332, sum loss: 4358.568359, avg loss: 2.519404, ppl: 12.421187 +epoch: 1, batch: 2333, sum loss: 4526.827148, avg loss: 2.831036, ppl: 16.963032 +epoch: 1, batch: 2334, sum loss: 4357.078613, avg loss: 2.559976, ppl: 12.935502 +epoch: 1, batch: 2335, sum loss: 3873.368164, avg loss: 2.656631, ppl: 14.248206 +epoch: 1, batch: 2336, sum loss: 5081.728516, avg loss: 3.035680, ppl: 20.815134 +epoch: 1, batch: 2337, sum loss: 4964.997559, avg loss: 2.851808, ppl: 17.319063 +epoch: 1, batch: 2338, sum loss: 5051.385254, avg loss: 2.801656, ppl: 16.471897 +epoch: 1, batch: 2339, sum loss: 4809.133301, avg loss: 2.800893, ppl: 16.459339 +epoch: 1, batch: 2340, sum loss: 6065.431152, avg loss: 3.116871, ppl: 22.575632 +epoch: 1, batch: 2341, sum loss: 3398.223877, avg loss: 2.364805, ppl: 10.641958 +epoch: 1, batch: 2342, sum loss: 4979.701172, avg loss: 2.998014, ppl: 20.045687 +epoch: 1, batch: 2343, sum loss: 4523.093750, avg loss: 2.714942, ppl: 15.103737 +epoch: 1, batch: 2344, sum loss: 4564.669922, avg loss: 2.912999, ppl: 18.411940 +epoch: 1, batch: 2345, sum loss: 5096.461914, avg loss: 2.949341, ppl: 19.093372 +epoch: 1, batch: 2346, sum loss: 5270.122559, avg loss: 2.851798, ppl: 17.318890 +epoch: 1, batch: 2347, sum loss: 5458.759766, avg loss: 2.941142, ppl: 18.937462 +epoch: 1, batch: 2348, sum loss: 5047.551270, avg loss: 2.851724, ppl: 17.317610 +epoch: 1, batch: 2349, sum loss: 4395.424316, avg loss: 2.641481, ppl: 14.033975 +epoch: 1, batch: 2350, sum loss: 3993.197754, avg loss: 2.725732, ppl: 15.267591 +epoch: 1, batch: 2351, sum loss: 4822.395020, avg loss: 2.674651, ppl: 14.507281 +epoch: 1, batch: 2352, sum loss: 5212.370605, avg loss: 2.929944, ppl: 18.726582 +epoch: 1, batch: 2353, sum loss: 4431.635742, avg loss: 2.890826, ppl: 18.008173 +epoch: 1, batch: 2354, sum loss: 3956.336426, avg loss: 2.782234, ppl: 16.155067 +epoch: 1, batch: 2355, sum loss: 3963.322510, avg loss: 2.742784, ppl: 15.530158 +epoch: 1, batch: 2356, sum loss: 4281.249023, avg loss: 2.580620, ppl: 13.205327 +epoch: 1, batch: 2357, sum loss: 4375.875000, avg loss: 2.595418, ppl: 13.402191 +epoch: 1, batch: 2358, sum loss: 4375.452148, avg loss: 2.707582, ppl: 14.992975 +epoch: 1, batch: 2359, sum loss: 4194.198730, avg loss: 2.662983, ppl: 14.339001 +epoch: 1, batch: 2360, sum loss: 5300.286133, avg loss: 3.030467, ppl: 20.706892 +epoch: 1, batch: 2361, sum loss: 3789.838867, avg loss: 2.630006, ppl: 13.873857 +epoch: 1, batch: 2362, sum loss: 4068.889160, avg loss: 2.802265, ppl: 16.481943 +epoch: 1, batch: 2363, sum loss: 5205.371094, avg loss: 2.838261, ppl: 17.086029 +epoch: 1, batch: 2364, sum loss: 3938.298096, avg loss: 2.522933, ppl: 12.465100 +epoch: 1, batch: 2365, sum loss: 4276.402344, avg loss: 2.817129, ppl: 16.728760 +epoch: 1, batch: 2366, sum loss: 4660.830078, avg loss: 2.855901, ppl: 17.390099 +epoch: 1, batch: 2367, sum loss: 3882.096436, avg loss: 2.458579, ppl: 11.688191 +epoch: 1, batch: 2368, sum loss: 5134.127441, avg loss: 3.096579, ppl: 22.122133 +epoch: 1, batch: 2369, sum loss: 4200.348633, avg loss: 2.820919, ppl: 16.792276 +epoch: 1, batch: 2370, sum loss: 4602.337891, avg loss: 2.860372, ppl: 17.468018 +epoch: 1, batch: 2371, sum loss: 5011.957520, avg loss: 2.728338, ppl: 15.307432 +epoch: 1, batch: 2372, sum loss: 4567.522461, avg loss: 2.876274, ppl: 17.748014 +epoch: 1, batch: 2373, sum loss: 5190.670898, avg loss: 2.793687, ppl: 16.341160 +epoch: 1, batch: 2374, sum loss: 4522.589844, avg loss: 2.646337, ppl: 14.102287 +epoch: 1, batch: 2375, sum loss: 4632.221191, avg loss: 2.994325, ppl: 19.971882 +epoch: 1, batch: 2376, sum loss: 5186.763672, avg loss: 3.033195, ppl: 20.763466 +epoch: 1, batch: 2377, sum loss: 4713.427734, avg loss: 2.749958, ppl: 15.641972 +epoch: 1, batch: 2378, sum loss: 4727.125977, avg loss: 2.764401, ppl: 15.869534 +epoch: 1, batch: 2379, sum loss: 4670.877930, avg loss: 2.860305, ppl: 17.466850 +epoch: 1, batch: 2380, sum loss: 3577.667480, avg loss: 2.687954, ppl: 14.701572 +epoch: 1, batch: 2381, sum loss: 4616.703125, avg loss: 2.853339, ppl: 17.345610 +epoch: 1, batch: 2382, sum loss: 4923.306641, avg loss: 2.832743, ppl: 16.992001 +epoch: 1, batch: 2383, sum loss: 5854.648926, avg loss: 2.953910, ppl: 19.180798 +epoch: 1, batch: 2384, sum loss: 5931.037109, avg loss: 2.907371, ppl: 18.308603 +epoch: 1, batch: 2385, sum loss: 4592.006836, avg loss: 2.852178, ppl: 17.325478 +epoch: 1, batch: 2386, sum loss: 5494.490723, avg loss: 2.861714, ppl: 17.491484 +epoch: 1, batch: 2387, sum loss: 4502.714844, avg loss: 2.752271, ppl: 15.678192 +epoch: 1, batch: 2388, sum loss: 4682.653320, avg loss: 2.817481, ppl: 16.734644 +epoch: 1, batch: 2389, sum loss: 4188.887207, avg loss: 2.766768, ppl: 15.907146 +epoch: 1, batch: 2390, sum loss: 3943.209717, avg loss: 2.673362, ppl: 14.488605 +epoch: 1, batch: 2391, sum loss: 5406.604980, avg loss: 2.903655, ppl: 18.240690 +epoch: 1, batch: 2392, sum loss: 4577.278320, avg loss: 2.759059, ppl: 15.784978 +epoch: 1, batch: 2393, sum loss: 5031.111328, avg loss: 2.775020, ppl: 16.038950 +epoch: 1, batch: 2394, sum loss: 4847.926758, avg loss: 2.841692, ppl: 17.144749 +epoch: 1, batch: 2395, sum loss: 6100.570312, avg loss: 3.011141, ppl: 20.310553 +epoch: 1, batch: 2396, sum loss: 4492.725586, avg loss: 2.698334, ppl: 14.854959 +epoch: 1, batch: 2397, sum loss: 5367.613281, avg loss: 3.077760, ppl: 21.709713 +epoch: 1, batch: 2398, sum loss: 3998.854492, avg loss: 2.780845, ppl: 16.132643 +epoch: 1, batch: 2399, sum loss: 4856.115723, avg loss: 2.722038, ppl: 15.211291 +epoch: 1, batch: 2400, sum loss: 5450.073730, avg loss: 2.938045, ppl: 18.878902 +epoch: 1, batch: 2401, sum loss: 3854.941162, avg loss: 2.493494, ppl: 12.103487 +epoch: 1, batch: 2402, sum loss: 4031.507812, avg loss: 2.753762, ppl: 15.701591 +epoch: 1, batch: 2403, sum loss: 4775.159668, avg loss: 2.844050, ppl: 17.185219 +epoch: 1, batch: 2404, sum loss: 4983.736328, avg loss: 3.130488, ppl: 22.885138 +epoch: 1, batch: 2405, sum loss: 4777.565918, avg loss: 2.823621, ppl: 16.837702 +epoch: 1, batch: 2406, sum loss: 4512.516113, avg loss: 2.861456, ppl: 17.486973 +epoch: 1, batch: 2407, sum loss: 5450.871094, avg loss: 2.811177, ppl: 16.629473 +epoch: 1, batch: 2408, sum loss: 5034.635742, avg loss: 2.740684, ppl: 15.497579 +epoch: 1, batch: 2409, sum loss: 3631.149170, avg loss: 2.478600, ppl: 11.924559 +epoch: 1, batch: 2410, sum loss: 3969.237305, avg loss: 2.640876, ppl: 14.025489 +epoch: 1, batch: 2411, sum loss: 5606.135254, avg loss: 3.073539, ppl: 21.618275 +epoch: 1, batch: 2412, sum loss: 5026.265625, avg loss: 2.912089, ppl: 18.395184 +epoch: 1, batch: 2413, sum loss: 3987.108398, avg loss: 2.730896, ppl: 15.346635 +epoch: 1, batch: 2414, sum loss: 4693.497559, avg loss: 2.597398, ppl: 13.428745 +epoch: 1, batch: 2415, sum loss: 5224.122559, avg loss: 2.926679, ppl: 18.665546 +epoch: 1, batch: 2416, sum loss: 5570.337891, avg loss: 3.125891, ppl: 22.780188 +epoch: 1, batch: 2417, sum loss: 6292.293945, avg loss: 3.047116, ppl: 21.054531 +epoch: 1, batch: 2418, sum loss: 3784.928223, avg loss: 2.550491, ppl: 12.813389 +epoch: 1, batch: 2419, sum loss: 3248.259277, avg loss: 2.415063, ppl: 11.190472 +epoch: 1, batch: 2420, sum loss: 5852.011719, avg loss: 3.073536, ppl: 21.618202 +epoch: 1, batch: 2421, sum loss: 5117.622559, avg loss: 2.822737, ppl: 16.822840 +epoch: 1, batch: 2422, sum loss: 4729.830566, avg loss: 2.777352, ppl: 16.076395 +epoch: 1, batch: 2423, sum loss: 4447.809570, avg loss: 2.807961, ppl: 16.576077 +epoch: 1, batch: 2424, sum loss: 5341.277344, avg loss: 2.997350, ppl: 20.032381 +epoch: 1, batch: 2425, sum loss: 5106.271484, avg loss: 2.886530, ppl: 17.930984 +epoch: 1, batch: 2426, sum loss: 4948.119629, avg loss: 2.898723, ppl: 18.150946 +epoch: 1, batch: 2427, sum loss: 5569.297363, avg loss: 2.828490, ppl: 16.919897 +epoch: 1, batch: 2428, sum loss: 4914.460938, avg loss: 2.750118, ppl: 15.644478 +epoch: 1, batch: 2429, sum loss: 5119.351074, avg loss: 2.814377, ppl: 16.682772 +epoch: 1, batch: 2430, sum loss: 5114.788574, avg loss: 2.914409, ppl: 18.437920 +epoch: 1, batch: 2431, sum loss: 4586.828125, avg loss: 2.888431, ppl: 17.965101 +epoch: 1, batch: 2432, sum loss: 4997.641602, avg loss: 2.815573, ppl: 16.702736 +epoch: 1, batch: 2433, sum loss: 4189.932617, avg loss: 2.797018, ppl: 16.395676 +epoch: 1, batch: 2434, sum loss: 5047.100586, avg loss: 3.098281, ppl: 22.159834 +epoch: 1, batch: 2435, sum loss: 4855.446777, avg loss: 2.730848, ppl: 15.345889 +epoch: 1, batch: 2436, sum loss: 4623.714355, avg loss: 2.747305, ppl: 15.600531 +epoch: 1, batch: 2437, sum loss: 3961.897705, avg loss: 2.597966, ppl: 13.436377 +epoch: 1, batch: 2438, sum loss: 4116.153320, avg loss: 2.699117, ppl: 14.866598 +epoch: 1, batch: 2439, sum loss: 3880.144043, avg loss: 2.619949, ppl: 13.735022 +epoch: 1, batch: 2440, sum loss: 4759.168457, avg loss: 2.879110, ppl: 17.798418 +epoch: 1, batch: 2441, sum loss: 5303.850586, avg loss: 2.988085, ppl: 19.847630 +epoch: 1, batch: 2442, sum loss: 5028.765137, avg loss: 2.833107, ppl: 16.998192 +epoch: 1, batch: 2443, sum loss: 3968.387695, avg loss: 2.792672, ppl: 16.324587 +epoch: 1, batch: 2444, sum loss: 5096.256836, avg loss: 3.001329, ppl: 20.112251 +epoch: 1, batch: 2445, sum loss: 4986.145508, avg loss: 3.034781, ppl: 20.796423 +epoch: 1, batch: 2446, sum loss: 4912.424316, avg loss: 2.898185, ppl: 18.141197 +epoch: 1, batch: 2447, sum loss: 5042.533203, avg loss: 2.709583, ppl: 15.023006 +epoch: 1, batch: 2448, sum loss: 4456.658203, avg loss: 2.665466, ppl: 14.374640 +epoch: 1, batch: 2449, sum loss: 4957.916016, avg loss: 2.930210, ppl: 18.731571 +epoch: 1, batch: 2450, sum loss: 4856.863770, avg loss: 2.776937, ppl: 16.069733 +epoch: 1, batch: 2451, sum loss: 4802.969727, avg loss: 3.011266, ppl: 20.313105 +epoch: 1, batch: 2452, sum loss: 4241.071289, avg loss: 2.589176, ppl: 13.318798 +epoch: 1, batch: 2453, sum loss: 5559.081055, avg loss: 3.037749, ppl: 20.858244 +epoch: 1, batch: 2454, sum loss: 3574.590332, avg loss: 2.614916, ppl: 13.666070 +epoch: 1, batch: 2455, sum loss: 3985.614258, avg loss: 2.497252, ppl: 12.149062 +epoch: 1, batch: 2456, sum loss: 4569.574707, avg loss: 3.012244, ppl: 20.332981 +epoch: 1, batch: 2457, sum loss: 4467.319336, avg loss: 2.686301, ppl: 14.677291 +epoch: 1, batch: 2458, sum loss: 4252.916992, avg loss: 2.635017, ppl: 13.943545 +epoch: 1, batch: 2459, sum loss: 4808.749512, avg loss: 2.939334, ppl: 18.903246 +epoch: 1, batch: 2460, sum loss: 3274.758545, avg loss: 2.397334, ppl: 10.993832 +epoch: 1, batch: 2461, sum loss: 4166.028809, avg loss: 2.613569, ppl: 13.647670 +epoch: 1, batch: 2462, sum loss: 5282.239746, avg loss: 3.136722, ppl: 23.028259 +epoch: 1, batch: 2463, sum loss: 4241.489746, avg loss: 2.639384, ppl: 14.004571 +epoch: 1, batch: 2464, sum loss: 4005.077637, avg loss: 2.697022, ppl: 14.835485 +epoch: 1, batch: 2465, sum loss: 4544.636230, avg loss: 2.807064, ppl: 16.561220 +epoch: 1, batch: 2466, sum loss: 5126.419922, avg loss: 2.626240, ppl: 13.821699 +epoch: 1, batch: 2467, sum loss: 5065.607422, avg loss: 2.894633, ppl: 18.076862 +epoch: 1, batch: 2468, sum loss: 4994.224609, avg loss: 2.925732, ppl: 18.647873 +epoch: 1, batch: 2469, sum loss: 4792.740234, avg loss: 2.890676, ppl: 18.005474 +epoch: 1, batch: 2470, sum loss: 3974.022461, avg loss: 2.507270, ppl: 12.271379 +epoch: 1, batch: 2471, sum loss: 5611.072266, avg loss: 3.015085, ppl: 20.390820 +epoch: 1, batch: 2472, sum loss: 4239.352539, avg loss: 2.698506, ppl: 14.857516 +epoch: 1, batch: 2473, sum loss: 4882.604004, avg loss: 2.927221, ppl: 18.675652 +epoch: 1, batch: 2474, sum loss: 4958.909180, avg loss: 3.109034, ppl: 22.399397 +epoch: 1, batch: 2475, sum loss: 4630.441406, avg loss: 2.754575, ppl: 15.714369 +epoch: 1, batch: 2476, sum loss: 3501.534668, avg loss: 2.636698, ppl: 13.967009 +epoch: 1, batch: 2477, sum loss: 3658.706055, avg loss: 2.589318, ppl: 13.320682 +epoch: 1, batch: 2478, sum loss: 4382.643066, avg loss: 2.659371, ppl: 14.287298 +epoch: 1, batch: 2479, sum loss: 4600.269043, avg loss: 2.898721, ppl: 18.150906 +epoch: 1, batch: 2480, sum loss: 4493.679199, avg loss: 2.575174, ppl: 13.133607 +epoch: 1, batch: 2481, sum loss: 4710.561035, avg loss: 2.671901, ppl: 14.467443 +epoch: 1, batch: 2482, sum loss: 4526.061523, avg loss: 2.753079, ppl: 15.690866 +epoch: 1, batch: 2483, sum loss: 4126.936035, avg loss: 2.709741, ppl: 15.025389 +epoch: 1, batch: 2484, sum loss: 5165.153320, avg loss: 2.951516, ppl: 19.134943 +epoch: 1, batch: 2485, sum loss: 4917.115723, avg loss: 2.722656, ppl: 15.220688 +epoch: 1, batch: 2486, sum loss: 4702.507324, avg loss: 2.837965, ppl: 17.080963 +epoch: 1, batch: 2487, sum loss: 5128.420898, avg loss: 2.860246, ppl: 17.465822 +epoch: 1, batch: 2488, sum loss: 4172.375977, avg loss: 2.735984, ppl: 15.424919 +epoch: 1, batch: 2489, sum loss: 5311.424805, avg loss: 2.954074, ppl: 19.183945 +epoch: 1, batch: 2490, sum loss: 4635.793945, avg loss: 2.695229, ppl: 14.808910 +epoch: 1, batch: 2491, sum loss: 4354.064453, avg loss: 2.736684, ppl: 15.435717 +epoch: 1, batch: 2492, sum loss: 4461.865234, avg loss: 2.742388, ppl: 15.524013 +epoch: 1, batch: 2493, sum loss: 3660.783936, avg loss: 2.760772, ppl: 15.812049 +epoch: 1, batch: 2494, sum loss: 4395.283203, avg loss: 2.735086, ppl: 15.411072 +epoch: 1, batch: 2495, sum loss: 5650.801270, avg loss: 2.946195, ppl: 19.033386 +epoch: 1, batch: 2496, sum loss: 3914.951416, avg loss: 2.709309, ppl: 15.018891 +epoch: 1, batch: 2497, sum loss: 5931.396484, avg loss: 3.084450, ppl: 21.855433 +epoch: 1, batch: 2498, sum loss: 5364.459473, avg loss: 2.779513, ppl: 16.111172 +epoch: 1, batch: 2499, sum loss: 4343.196289, avg loss: 2.874385, ppl: 17.714533 +epoch: 1, batch: 2500, sum loss: 5178.921875, avg loss: 2.808526, ppl: 16.585453 +epoch: 1, batch: 2501, sum loss: 5134.627930, avg loss: 2.863708, ppl: 17.526390 +epoch: 1, batch: 2502, sum loss: 4690.962402, avg loss: 2.691315, ppl: 14.751063 +epoch: 1, batch: 2503, sum loss: 4914.485352, avg loss: 2.842386, ppl: 17.156652 +epoch: 1, batch: 2504, sum loss: 4408.875488, avg loss: 2.840770, ppl: 17.128958 +epoch: 1, batch: 2505, sum loss: 5400.329102, avg loss: 2.817073, ppl: 16.727823 +epoch: 1, batch: 2506, sum loss: 5764.261719, avg loss: 3.095737, ppl: 22.103518 +epoch: 1, batch: 2507, sum loss: 5272.199219, avg loss: 3.029999, ppl: 20.697222 +epoch: 1, batch: 2508, sum loss: 5004.648926, avg loss: 2.775734, ppl: 16.050407 +epoch: 1, batch: 2509, sum loss: 5692.446289, avg loss: 2.899871, ppl: 18.171799 +epoch: 1, batch: 2510, sum loss: 5227.299805, avg loss: 3.014590, ppl: 20.380739 +epoch: 1, batch: 2511, sum loss: 5412.870605, avg loss: 3.002147, ppl: 20.128702 +epoch: 1, batch: 2512, sum loss: 4537.048828, avg loss: 2.835655, ppl: 17.041567 +epoch: 1, batch: 2513, sum loss: 4802.800781, avg loss: 2.701238, ppl: 14.898163 +epoch: 1, batch: 2514, sum loss: 4816.771973, avg loss: 2.858618, ppl: 17.437416 +epoch: 1, batch: 2515, sum loss: 5250.768066, avg loss: 2.741915, ppl: 15.516678 +epoch: 1, batch: 2516, sum loss: 5541.759766, avg loss: 3.177614, ppl: 23.989458 +epoch: 1, batch: 2517, sum loss: 4093.070801, avg loss: 2.902887, ppl: 18.226696 +epoch: 1, batch: 2518, sum loss: 4122.024414, avg loss: 2.770178, ppl: 15.961469 +epoch: 1, batch: 2519, sum loss: 4042.790771, avg loss: 2.770933, ppl: 15.973525 +epoch: 1, batch: 2520, sum loss: 5099.846680, avg loss: 3.083342, ppl: 21.831232 +epoch: 1, batch: 2521, sum loss: 3875.727051, avg loss: 2.508561, ppl: 12.287241 +epoch: 1, batch: 2522, sum loss: 5464.381348, avg loss: 3.056142, ppl: 21.245426 +epoch: 1, batch: 2523, sum loss: 5688.132812, avg loss: 3.017577, ppl: 20.441706 +epoch: 1, batch: 2524, sum loss: 4736.674805, avg loss: 2.975298, ppl: 19.595465 +epoch: 1, batch: 2525, sum loss: 4788.511230, avg loss: 2.833439, ppl: 17.003830 +epoch: 1, batch: 2526, sum loss: 5013.402344, avg loss: 2.838846, ppl: 17.096029 +epoch: 1, batch: 2527, sum loss: 3741.403076, avg loss: 2.435809, ppl: 11.425062 +epoch: 1, batch: 2528, sum loss: 5190.679199, avg loss: 2.878912, ppl: 17.794910 +epoch: 1, batch: 2529, sum loss: 4231.947754, avg loss: 2.719761, ppl: 15.176689 +epoch: 1, batch: 2530, sum loss: 4520.926758, avg loss: 2.929959, ppl: 18.726864 +epoch: 1, batch: 2531, sum loss: 4229.577148, avg loss: 2.730521, ppl: 15.340877 +epoch: 1, batch: 2532, sum loss: 4519.437012, avg loss: 2.923310, ppl: 18.602757 +epoch: 1, batch: 2533, sum loss: 4686.657227, avg loss: 2.925504, ppl: 18.643616 +epoch: 1, batch: 2534, sum loss: 5598.367676, avg loss: 2.952726, ppl: 19.158100 +epoch: 1, batch: 2535, sum loss: 3794.456055, avg loss: 2.583020, ppl: 13.237050 +epoch: 1, batch: 2536, sum loss: 4342.372559, avg loss: 2.920223, ppl: 18.545431 +epoch: 1, batch: 2537, sum loss: 4693.441406, avg loss: 2.854891, ppl: 17.372549 +epoch: 1, batch: 2538, sum loss: 4573.584473, avg loss: 2.628497, ppl: 13.852932 +epoch: 1, batch: 2539, sum loss: 5595.869141, avg loss: 2.720403, ppl: 15.186440 +epoch: 1, batch: 2540, sum loss: 4291.770020, avg loss: 2.821677, ppl: 16.805004 +epoch: 1, batch: 2541, sum loss: 5958.519043, avg loss: 2.997243, ppl: 20.030235 +epoch: 1, batch: 2542, sum loss: 4514.394043, avg loss: 2.848198, ppl: 17.256660 +epoch: 1, batch: 2543, sum loss: 4762.350098, avg loss: 2.914535, ppl: 18.440237 +epoch: 1, batch: 2544, sum loss: 5061.563477, avg loss: 3.012836, ppl: 20.345007 +epoch: 1, batch: 2545, sum loss: 4816.629883, avg loss: 2.901584, ppl: 18.202963 +epoch: 1, batch: 2546, sum loss: 4151.436035, avg loss: 2.784330, ppl: 16.188971 +epoch: 1, batch: 2547, sum loss: 4278.580566, avg loss: 2.639470, ppl: 14.005774 +epoch: 1, batch: 2548, sum loss: 4772.727539, avg loss: 2.971810, ppl: 19.527239 +epoch: 1, batch: 2549, sum loss: 5105.435059, avg loss: 2.968276, ppl: 19.458349 +epoch: 1, batch: 2550, sum loss: 5284.854492, avg loss: 3.197129, ppl: 24.462198 +epoch: 1, batch: 2551, sum loss: 5307.512695, avg loss: 2.925862, ppl: 18.650288 +epoch: 1, batch: 2552, sum loss: 4914.615234, avg loss: 2.571750, ppl: 13.088715 +epoch: 1, batch: 2553, sum loss: 4131.777344, avg loss: 2.575921, ppl: 13.143417 +epoch: 1, batch: 2554, sum loss: 4494.480469, avg loss: 2.788139, ppl: 16.250755 +epoch: 1, batch: 2555, sum loss: 4408.962891, avg loss: 2.575329, ppl: 13.135635 +epoch: 1, batch: 2556, sum loss: 4246.220215, avg loss: 2.855561, ppl: 17.384195 +epoch: 1, batch: 2557, sum loss: 4404.291016, avg loss: 2.700362, ppl: 14.885126 +epoch: 1, batch: 2558, sum loss: 5302.941406, avg loss: 2.733475, ppl: 15.386261 +epoch: 1, batch: 2559, sum loss: 4774.942383, avg loss: 3.043303, ppl: 20.974407 +epoch: 1, batch: 2560, sum loss: 4190.492676, avg loss: 2.640512, ppl: 14.020384 +epoch: 1, batch: 2561, sum loss: 3885.069336, avg loss: 2.559334, ppl: 12.927209 +epoch: 1, batch: 2562, sum loss: 4175.533203, avg loss: 2.757948, ppl: 15.767457 +epoch: 1, batch: 2563, sum loss: 4023.590332, avg loss: 2.438540, ppl: 11.456297 +epoch: 1, batch: 2564, sum loss: 5149.536133, avg loss: 2.744955, ppl: 15.563918 +epoch: 1, batch: 2565, sum loss: 5104.170898, avg loss: 2.955513, ppl: 19.211576 +epoch: 1, batch: 2566, sum loss: 5065.395020, avg loss: 2.760433, ppl: 15.806693 +epoch: 1, batch: 2567, sum loss: 3530.291992, avg loss: 2.408112, ppl: 11.112958 +epoch: 1, batch: 2568, sum loss: 4729.448730, avg loss: 3.010470, ppl: 20.296946 +epoch: 1, batch: 2569, sum loss: 5215.681152, avg loss: 3.032373, ppl: 20.746399 +epoch: 1, batch: 2570, sum loss: 4522.309570, avg loss: 2.658618, ppl: 14.276548 +epoch: 1, batch: 2571, sum loss: 4294.180664, avg loss: 2.786619, ppl: 16.226074 +epoch: 1, batch: 2572, sum loss: 4822.477051, avg loss: 2.733830, ppl: 15.391731 +epoch: 1, batch: 2573, sum loss: 4095.006348, avg loss: 2.452100, ppl: 11.612702 +epoch: 1, batch: 2574, sum loss: 5040.236328, avg loss: 3.023537, ppl: 20.563896 +epoch: 1, batch: 2575, sum loss: 4829.469727, avg loss: 2.725435, ppl: 15.263045 +epoch: 1, batch: 2576, sum loss: 5360.218262, avg loss: 2.919509, ppl: 18.532181 +epoch: 1, batch: 2577, sum loss: 4455.816406, avg loss: 2.664962, ppl: 14.367404 +epoch: 1, batch: 2578, sum loss: 4171.186035, avg loss: 2.620092, ppl: 13.736983 +epoch: 1, batch: 2579, sum loss: 4672.657227, avg loss: 2.598808, ppl: 13.447700 +epoch: 1, batch: 2580, sum loss: 6023.022461, avg loss: 3.158376, ppl: 23.532343 +epoch: 1, batch: 2581, sum loss: 4519.423828, avg loss: 2.940419, ppl: 18.923773 +epoch: 1, batch: 2582, sum loss: 4420.133789, avg loss: 2.752263, ppl: 15.678065 +epoch: 1, batch: 2583, sum loss: 5011.569824, avg loss: 2.898537, ppl: 18.147570 +epoch: 1, batch: 2584, sum loss: 4465.336914, avg loss: 2.820807, ppl: 16.790386 +epoch: 1, batch: 2585, sum loss: 3839.672363, avg loss: 2.679465, ppl: 14.577286 +epoch: 1, batch: 2586, sum loss: 5140.465820, avg loss: 2.924042, ppl: 18.616383 +epoch: 1, batch: 2587, sum loss: 4913.460449, avg loss: 2.931659, ppl: 18.758724 +epoch: 1, batch: 2588, sum loss: 4063.850342, avg loss: 2.578585, ppl: 13.178479 +epoch: 1, batch: 2589, sum loss: 4862.192383, avg loss: 3.063763, ppl: 21.407972 +epoch: 1, batch: 2590, sum loss: 3652.108398, avg loss: 2.320272, ppl: 10.178442 +epoch: 1, batch: 2591, sum loss: 3706.661133, avg loss: 2.363942, ppl: 10.632782 +epoch: 1, batch: 2592, sum loss: 4600.236328, avg loss: 2.714004, ppl: 15.089570 +epoch: 1, batch: 2593, sum loss: 4299.262695, avg loss: 2.490882, ppl: 12.071920 +epoch: 1, batch: 2594, sum loss: 3684.355225, avg loss: 2.587328, ppl: 13.294205 +epoch: 1, batch: 2595, sum loss: 5401.916504, avg loss: 2.932637, ppl: 18.777075 +epoch: 1, batch: 2596, sum loss: 4667.735840, avg loss: 2.758709, ppl: 15.779462 +epoch: 1, batch: 2597, sum loss: 3791.974609, avg loss: 2.745818, ppl: 15.577350 +epoch: 1, batch: 2598, sum loss: 5159.948242, avg loss: 2.998227, ppl: 20.049950 +epoch: 1, batch: 2599, sum loss: 4115.283691, avg loss: 2.795709, ppl: 16.374237 +epoch: 1, batch: 2600, sum loss: 5091.983398, avg loss: 2.871959, ppl: 17.671606 +epoch: 1, batch: 2601, sum loss: 4094.123535, avg loss: 2.541356, ppl: 12.696872 +epoch: 1, batch: 2602, sum loss: 4394.821289, avg loss: 2.686321, ppl: 14.677582 +epoch: 1, batch: 2603, sum loss: 4131.197754, avg loss: 2.613028, ppl: 13.640295 +epoch: 1, batch: 2604, sum loss: 4048.760498, avg loss: 2.446381, ppl: 11.546485 +epoch: 1, batch: 2605, sum loss: 5083.054688, avg loss: 2.956984, ppl: 19.239849 +epoch: 1, batch: 2606, sum loss: 4740.940918, avg loss: 2.726246, ppl: 15.275430 +epoch: 1, batch: 2607, sum loss: 4853.416016, avg loss: 2.810316, ppl: 16.615173 +epoch: 1, batch: 2608, sum loss: 5321.805664, avg loss: 2.780463, ppl: 16.126482 +epoch: 1, batch: 2609, sum loss: 4955.892090, avg loss: 2.893107, ppl: 18.049301 +epoch: 1, batch: 2610, sum loss: 4651.488770, avg loss: 2.569883, ppl: 13.064301 +epoch: 1, batch: 2611, sum loss: 6812.457520, avg loss: 3.015696, ppl: 20.403292 +epoch: 1, batch: 2612, sum loss: 5434.944824, avg loss: 3.048203, ppl: 21.077423 +epoch: 1, batch: 2613, sum loss: 5081.388184, avg loss: 2.702866, ppl: 14.922440 +epoch: 1, batch: 2614, sum loss: 5108.230957, avg loss: 2.887638, ppl: 17.950857 +epoch: 1, batch: 2615, sum loss: 4104.514160, avg loss: 2.541495, ppl: 12.698639 +epoch: 1, batch: 2616, sum loss: 4501.467285, avg loss: 2.942136, ppl: 18.956285 +epoch: 1, batch: 2617, sum loss: 4785.696289, avg loss: 2.750400, ppl: 15.648891 +epoch: 1, batch: 2618, sum loss: 4891.956055, avg loss: 2.814704, ppl: 16.688242 +epoch: 1, batch: 2619, sum loss: 4000.071777, avg loss: 2.699104, ppl: 14.866403 +epoch: 1, batch: 2620, sum loss: 4909.282227, avg loss: 2.927419, ppl: 18.679359 +epoch: 1, batch: 2621, sum loss: 5651.726562, avg loss: 3.066591, ppl: 21.468582 +epoch: 1, batch: 2622, sum loss: 4854.437012, avg loss: 2.734894, ppl: 15.408111 +epoch: 1, batch: 2623, sum loss: 4889.084961, avg loss: 2.814672, ppl: 16.687700 +epoch: 1, batch: 2624, sum loss: 4818.247070, avg loss: 2.539930, ppl: 12.678782 +epoch: 1, batch: 2625, sum loss: 4152.689453, avg loss: 2.854082, ppl: 17.358496 +epoch: 1, batch: 2626, sum loss: 4742.849121, avg loss: 2.771975, ppl: 15.990184 +epoch: 1, batch: 2627, sum loss: 4483.743164, avg loss: 2.784934, ppl: 16.198742 +epoch: 1, batch: 2628, sum loss: 4941.056641, avg loss: 2.971171, ppl: 19.514751 +epoch: 1, batch: 2629, sum loss: 5538.715332, avg loss: 2.941431, ppl: 18.942940 +epoch: 1, batch: 2630, sum loss: 4510.213867, avg loss: 2.735121, ppl: 15.411601 +epoch: 1, batch: 2631, sum loss: 4768.117188, avg loss: 2.905617, ppl: 18.276512 +epoch: 1, batch: 2632, sum loss: 4603.075195, avg loss: 2.869748, ppl: 17.632568 +epoch: 1, batch: 2633, sum loss: 5009.050781, avg loss: 2.784353, ppl: 16.189337 +epoch: 1, batch: 2634, sum loss: 4362.197266, avg loss: 2.634177, ppl: 13.931842 +epoch: 1, batch: 2635, sum loss: 5279.687500, avg loss: 2.908919, ppl: 18.336964 +epoch: 1, batch: 2636, sum loss: 4894.546875, avg loss: 2.634309, ppl: 13.933685 +epoch: 1, batch: 2637, sum loss: 4406.233398, avg loss: 2.662377, ppl: 14.330310 +epoch: 1, batch: 2638, sum loss: 4359.127441, avg loss: 2.762438, ppl: 15.838411 +epoch: 1, batch: 2639, sum loss: 5004.945312, avg loss: 3.116404, ppl: 22.565096 +epoch: 1, batch: 2640, sum loss: 4993.847168, avg loss: 2.807109, ppl: 16.561970 +epoch: 1, batch: 2641, sum loss: 5346.904297, avg loss: 3.015739, ppl: 20.404158 +epoch: 1, batch: 2642, sum loss: 4836.362793, avg loss: 3.009560, ppl: 20.278477 +epoch: 1, batch: 2643, sum loss: 4790.960938, avg loss: 2.851763, ppl: 17.318279 +epoch: 1, batch: 2644, sum loss: 4369.896973, avg loss: 2.687513, ppl: 14.695089 +epoch: 1, batch: 2645, sum loss: 3863.465332, avg loss: 2.679241, ppl: 14.574026 +epoch: 1, batch: 2646, sum loss: 4681.041992, avg loss: 2.574831, ppl: 13.129092 +epoch: 1, batch: 2647, sum loss: 4641.961914, avg loss: 2.567457, ppl: 13.032637 +epoch: 1, batch: 2648, sum loss: 4642.919922, avg loss: 2.718337, ppl: 15.155099 +epoch: 1, batch: 2649, sum loss: 3761.808594, avg loss: 2.727925, ppl: 15.301105 +epoch: 1, batch: 2650, sum loss: 5374.464844, avg loss: 2.891052, ppl: 18.012239 +epoch: 1, batch: 2651, sum loss: 5679.249512, avg loss: 3.003305, ppl: 20.152029 +epoch: 1, batch: 2652, sum loss: 5305.301758, avg loss: 3.014376, ppl: 20.376371 +epoch: 1, batch: 2653, sum loss: 4519.988281, avg loss: 2.772999, ppl: 16.006567 +epoch: 1, batch: 2654, sum loss: 5339.579590, avg loss: 3.052933, ppl: 21.177361 +epoch: 1, batch: 2655, sum loss: 3374.044922, avg loss: 2.328533, ppl: 10.262879 +epoch: 1, batch: 2656, sum loss: 4749.133789, avg loss: 2.706059, ppl: 14.970161 +epoch: 1, batch: 2657, sum loss: 4591.596680, avg loss: 2.716921, ppl: 15.133656 +epoch: 1, batch: 2658, sum loss: 4489.034180, avg loss: 2.861080, ppl: 17.480391 +epoch: 1, batch: 2659, sum loss: 5456.070312, avg loss: 2.792257, ppl: 16.317808 +epoch: 1, batch: 2660, sum loss: 4827.878418, avg loss: 2.695633, ppl: 14.814889 +epoch: 1, batch: 2661, sum loss: 5895.924805, avg loss: 2.964266, ppl: 19.380465 +epoch: 1, batch: 2662, sum loss: 4640.389160, avg loss: 2.802167, ppl: 16.480328 +epoch: 1, batch: 2663, sum loss: 4818.965332, avg loss: 2.889068, ppl: 17.976549 +epoch: 1, batch: 2664, sum loss: 4853.966309, avg loss: 2.827004, ppl: 16.894772 +epoch: 1, batch: 2665, sum loss: 4684.471191, avg loss: 2.909609, ppl: 18.349628 +epoch: 1, batch: 2666, sum loss: 5307.877441, avg loss: 2.990353, ppl: 19.892710 +epoch: 1, batch: 2667, sum loss: 4741.624512, avg loss: 2.656372, ppl: 14.244520 +epoch: 1, batch: 2668, sum loss: 5343.277344, avg loss: 2.731737, ppl: 15.359542 +epoch: 1, batch: 2669, sum loss: 4522.853516, avg loss: 2.651145, ppl: 14.170254 +epoch: 1, batch: 2670, sum loss: 4924.681641, avg loss: 2.755838, ppl: 15.734216 +epoch: 1, batch: 2671, sum loss: 5431.619141, avg loss: 3.060067, ppl: 21.328985 +epoch: 1, batch: 2672, sum loss: 5075.575195, avg loss: 2.753975, ppl: 15.704930 +epoch: 1, batch: 2673, sum loss: 4150.754395, avg loss: 2.787612, ppl: 16.242186 +epoch: 1, batch: 2674, sum loss: 3529.178711, avg loss: 2.480098, ppl: 11.942430 +epoch: 1, batch: 2675, sum loss: 4318.154785, avg loss: 3.053858, ppl: 21.196960 +epoch: 1, batch: 2676, sum loss: 6144.726562, avg loss: 3.188753, ppl: 24.258156 +epoch: 1, batch: 2677, sum loss: 5779.721680, avg loss: 2.861248, ppl: 17.483341 +epoch: 1, batch: 2678, sum loss: 4036.108398, avg loss: 2.631101, ppl: 13.889049 +epoch: 1, batch: 2679, sum loss: 6053.061035, avg loss: 3.012972, ppl: 20.347786 +epoch: 1, batch: 2680, sum loss: 5228.691895, avg loss: 2.880822, ppl: 17.828926 +epoch: 1, batch: 2681, sum loss: 4262.227539, avg loss: 2.830164, ppl: 16.948248 +epoch: 1, batch: 2682, sum loss: 4527.100586, avg loss: 2.702747, ppl: 14.920661 +epoch: 1, batch: 2683, sum loss: 3681.730469, avg loss: 2.496089, ppl: 12.134935 +epoch: 1, batch: 2684, sum loss: 4183.394531, avg loss: 2.616257, ppl: 13.684403 +epoch: 1, batch: 2685, sum loss: 5126.981445, avg loss: 2.894964, ppl: 18.082850 +epoch: 1, batch: 2686, sum loss: 5553.896484, avg loss: 2.990790, ppl: 19.901392 +epoch: 1, batch: 2687, sum loss: 5110.715820, avg loss: 3.015172, ppl: 20.392588 +epoch: 1, batch: 2688, sum loss: 4138.423828, avg loss: 2.614292, ppl: 13.657538 +epoch: 1, batch: 2689, sum loss: 5346.279785, avg loss: 2.985081, ppl: 19.788099 +epoch: 1, batch: 2690, sum loss: 4639.251465, avg loss: 2.981524, ppl: 19.717844 +epoch: 1, batch: 2691, sum loss: 5142.923828, avg loss: 2.995296, ppl: 19.991285 +epoch: 1, batch: 2692, sum loss: 4694.359375, avg loss: 2.946867, ppl: 19.046192 +epoch: 1, batch: 2693, sum loss: 5137.687012, avg loss: 2.824457, ppl: 16.851791 +epoch: 1, batch: 2694, sum loss: 4149.065918, avg loss: 2.777153, ppl: 16.073189 +epoch: 1, batch: 2695, sum loss: 4411.769043, avg loss: 2.560516, ppl: 12.942495 +epoch: 1, batch: 2696, sum loss: 3493.758301, avg loss: 2.683378, ppl: 14.634448 +epoch: 1, batch: 2697, sum loss: 4460.458984, avg loss: 2.696771, ppl: 14.831761 +epoch: 1, batch: 2698, sum loss: 4243.531738, avg loss: 2.720213, ppl: 15.183552 +epoch: 1, batch: 2699, sum loss: 4764.546387, avg loss: 2.915879, ppl: 18.465036 +epoch: 1, batch: 2700, sum loss: 5252.197266, avg loss: 2.873193, ppl: 17.693428 +epoch: 1, batch: 2701, sum loss: 4992.888184, avg loss: 3.020501, ppl: 20.501558 +epoch: 1, batch: 2702, sum loss: 4375.109375, avg loss: 2.795597, ppl: 16.372402 +epoch: 1, batch: 2703, sum loss: 4533.179199, avg loss: 2.696716, ppl: 14.830941 +epoch: 1, batch: 2704, sum loss: 4552.087402, avg loss: 2.871979, ppl: 17.671965 +epoch: 1, batch: 2705, sum loss: 4603.732910, avg loss: 2.953004, ppl: 19.163431 +epoch: 1, batch: 2706, sum loss: 4305.351562, avg loss: 2.615645, ppl: 13.676034 +epoch: 1, batch: 2707, sum loss: 4342.667480, avg loss: 2.571147, ppl: 13.080822 +epoch: 1, batch: 2708, sum loss: 3991.758057, avg loss: 2.698957, ppl: 14.864227 +epoch: 1, batch: 2709, sum loss: 4364.169922, avg loss: 2.727606, ppl: 15.296225 +epoch: 1, batch: 2710, sum loss: 3999.411133, avg loss: 2.643365, ppl: 14.060439 +epoch: 1, batch: 2711, sum loss: 6372.858398, avg loss: 3.150202, ppl: 23.340775 +epoch: 1, batch: 2712, sum loss: 4648.872070, avg loss: 2.775446, ppl: 16.045784 +epoch: 1, batch: 2713, sum loss: 4267.325684, avg loss: 2.858222, ppl: 17.430508 +epoch: 1, batch: 2714, sum loss: 5156.886230, avg loss: 2.877727, ppl: 17.773823 +epoch: 1, batch: 2715, sum loss: 4485.895508, avg loss: 2.837379, ppl: 17.070955 +epoch: 1, batch: 2716, sum loss: 4667.621094, avg loss: 2.789971, ppl: 16.280546 +epoch: 1, batch: 2717, sum loss: 4527.785156, avg loss: 2.896856, ppl: 18.117088 +epoch: 1, batch: 2718, sum loss: 4916.057617, avg loss: 2.607988, ppl: 13.571718 +epoch: 1, batch: 2719, sum loss: 5058.267578, avg loss: 2.870754, ppl: 17.650312 +epoch: 1, batch: 2720, sum loss: 5172.238281, avg loss: 2.857590, ppl: 17.419502 +epoch: 1, batch: 2721, sum loss: 5342.396484, avg loss: 2.878446, ppl: 17.786617 +epoch: 1, batch: 2722, sum loss: 4274.692871, avg loss: 2.761430, ppl: 15.822446 +epoch: 1, batch: 2723, sum loss: 4243.857910, avg loss: 2.553464, ppl: 12.851550 +epoch: 1, batch: 2724, sum loss: 4261.267578, avg loss: 2.573229, ppl: 13.108083 +epoch: 1, batch: 2725, sum loss: 5103.037109, avg loss: 2.892878, ppl: 18.045174 +epoch: 1, batch: 2726, sum loss: 4430.515625, avg loss: 3.153392, ppl: 23.415352 +epoch: 1, batch: 2727, sum loss: 4566.505859, avg loss: 2.796391, ppl: 16.385397 +epoch: 1, batch: 2728, sum loss: 4939.046875, avg loss: 2.917334, ppl: 18.491928 +epoch: 1, batch: 2729, sum loss: 5165.800293, avg loss: 2.863526, ppl: 17.523199 +epoch: 1, batch: 2730, sum loss: 4691.397461, avg loss: 2.688480, ppl: 14.709299 +epoch: 1, batch: 2731, sum loss: 4427.176270, avg loss: 2.887917, ppl: 17.955866 +epoch: 1, batch: 2732, sum loss: 4969.016113, avg loss: 2.912671, ppl: 18.405888 +epoch: 1, batch: 2733, sum loss: 4708.997070, avg loss: 2.838455, ppl: 17.089346 +epoch: 1, batch: 2734, sum loss: 4253.691406, avg loss: 2.666891, ppl: 14.395146 +epoch: 1, batch: 2735, sum loss: 5202.895996, avg loss: 2.794251, ppl: 16.350386 +epoch: 1, batch: 2736, sum loss: 5443.057617, avg loss: 3.010541, ppl: 20.298372 +epoch: 1, batch: 2737, sum loss: 5287.364746, avg loss: 3.021351, ppl: 20.519001 +epoch: 1, batch: 2738, sum loss: 4834.714355, avg loss: 3.021696, ppl: 20.526081 +epoch: 1, batch: 2739, sum loss: 4224.267578, avg loss: 2.755556, ppl: 15.729786 +epoch: 1, batch: 2740, sum loss: 4755.811035, avg loss: 2.923055, ppl: 18.598021 +epoch: 1, batch: 2741, sum loss: 4082.148682, avg loss: 2.516738, ppl: 12.388117 +epoch: 1, batch: 2742, sum loss: 4446.671875, avg loss: 2.758481, ppl: 15.775866 +epoch: 1, batch: 2743, sum loss: 5001.447754, avg loss: 2.911204, ppl: 18.378906 +epoch: 1, batch: 2744, sum loss: 4244.027344, avg loss: 2.741620, ppl: 15.512092 +epoch: 1, batch: 2745, sum loss: 4226.967773, avg loss: 2.673604, ppl: 14.492102 +epoch: 1, batch: 2746, sum loss: 4618.483887, avg loss: 2.915709, ppl: 18.461906 +epoch: 1, batch: 2747, sum loss: 5452.818359, avg loss: 2.919068, ppl: 18.524008 +epoch: 1, batch: 2748, sum loss: 4093.249268, avg loss: 2.732476, ppl: 15.370898 +epoch: 1, batch: 2749, sum loss: 5245.205078, avg loss: 2.792974, ppl: 16.329512 +epoch: 1, batch: 2750, sum loss: 4126.802734, avg loss: 2.626864, ppl: 13.830326 +epoch: 1, batch: 2751, sum loss: 4174.558594, avg loss: 2.520869, ppl: 12.439399 +epoch: 1, batch: 2752, sum loss: 4735.172852, avg loss: 2.664701, ppl: 14.363647 +epoch: 1, batch: 2753, sum loss: 4532.663086, avg loss: 2.861530, ppl: 17.488260 +epoch: 1, batch: 2754, sum loss: 4373.836426, avg loss: 2.910071, ppl: 18.358095 +epoch: 1, batch: 2755, sum loss: 4947.012695, avg loss: 3.042443, ppl: 20.956383 +epoch: 1, batch: 2756, sum loss: 4948.088379, avg loss: 2.747411, ppl: 15.602193 +epoch: 1, batch: 2757, sum loss: 4713.558594, avg loss: 2.929496, ppl: 18.718185 +epoch: 1, batch: 2758, sum loss: 4626.419434, avg loss: 2.926261, ppl: 18.657747 +epoch: 1, batch: 2759, sum loss: 3592.904541, avg loss: 2.491612, ppl: 12.080737 +epoch: 1, batch: 2760, sum loss: 5372.057617, avg loss: 2.789230, ppl: 16.268494 +epoch: 1, batch: 2761, sum loss: 5351.050293, avg loss: 2.951490, ppl: 19.134438 +epoch: 1, batch: 2762, sum loss: 4354.581543, avg loss: 2.881920, ppl: 17.848515 +epoch: 1, batch: 2763, sum loss: 4588.053711, avg loss: 2.782325, ppl: 16.156538 +epoch: 1, batch: 2764, sum loss: 5218.737305, avg loss: 2.950106, ppl: 19.107977 +epoch: 1, batch: 2765, sum loss: 4182.581055, avg loss: 2.694962, ppl: 14.804960 +epoch: 1, batch: 2766, sum loss: 5225.062012, avg loss: 2.869337, ppl: 17.625322 +epoch: 1, batch: 2767, sum loss: 4412.793945, avg loss: 2.720588, ppl: 15.189250 +epoch: 1, batch: 2768, sum loss: 4906.275391, avg loss: 2.968104, ppl: 19.454990 +epoch: 1, batch: 2769, sum loss: 4655.392578, avg loss: 2.833471, ppl: 17.004377 +epoch: 1, batch: 2770, sum loss: 3500.018066, avg loss: 2.677902, ppl: 14.554529 +epoch: 1, batch: 2771, sum loss: 5542.108887, avg loss: 2.907717, ppl: 18.314938 +epoch: 1, batch: 2772, sum loss: 4914.875977, avg loss: 2.538676, ppl: 12.662889 +epoch: 1, batch: 2773, sum loss: 5361.225098, avg loss: 3.208393, ppl: 24.739307 +epoch: 1, batch: 2774, sum loss: 4794.285156, avg loss: 2.953965, ppl: 19.181858 +epoch: 1, batch: 2775, sum loss: 3409.548828, avg loss: 2.444121, ppl: 11.520420 +epoch: 1, batch: 2776, sum loss: 5672.801270, avg loss: 3.001482, ppl: 20.115332 +epoch: 1, batch: 2777, sum loss: 4516.051270, avg loss: 2.725438, ppl: 15.263103 +epoch: 1, batch: 2778, sum loss: 4951.356445, avg loss: 2.676409, ppl: 14.532812 +epoch: 1, batch: 2779, sum loss: 4417.457520, avg loss: 2.640441, ppl: 14.019384 +epoch: 1, batch: 2780, sum loss: 4261.411621, avg loss: 2.717737, ppl: 15.146008 +epoch: 1, batch: 2781, sum loss: 4288.103516, avg loss: 2.517970, ppl: 12.403397 +epoch: 1, batch: 2782, sum loss: 4634.500977, avg loss: 2.602190, ppl: 13.493262 +epoch: 1, batch: 2783, sum loss: 3925.761963, avg loss: 2.758793, ppl: 15.780778 +epoch: 1, batch: 2784, sum loss: 5023.178711, avg loss: 2.963527, ppl: 19.366159 +epoch: 1, batch: 2785, sum loss: 4403.644043, avg loss: 2.808446, ppl: 16.584133 +epoch: 1, batch: 2786, sum loss: 5106.726562, avg loss: 2.913136, ppl: 18.414448 +epoch: 1, batch: 2787, sum loss: 5814.547852, avg loss: 3.200081, ppl: 24.534527 +epoch: 1, batch: 2788, sum loss: 4213.632324, avg loss: 2.671929, ppl: 14.467853 +epoch: 1, batch: 2789, sum loss: 5416.326172, avg loss: 3.030960, ppl: 20.717117 +epoch: 1, batch: 2790, sum loss: 3436.803955, avg loss: 2.534516, ppl: 12.610327 +epoch: 1, batch: 2791, sum loss: 4893.383301, avg loss: 2.721570, ppl: 15.204174 +epoch: 1, batch: 2792, sum loss: 4377.419434, avg loss: 2.536164, ppl: 12.631125 +epoch: 1, batch: 2793, sum loss: 5122.912109, avg loss: 2.679347, ppl: 14.575577 +epoch: 1, batch: 2794, sum loss: 4878.694824, avg loss: 2.937203, ppl: 18.863020 +epoch: 1, batch: 2795, sum loss: 4670.754883, avg loss: 2.853241, ppl: 17.343897 +epoch: 1, batch: 2796, sum loss: 4114.558594, avg loss: 2.733926, ppl: 15.393203 +epoch: 1, batch: 2797, sum loss: 5479.243164, avg loss: 2.936357, ppl: 18.847071 +epoch: 1, batch: 2798, sum loss: 4324.691406, avg loss: 2.815554, ppl: 16.702433 +epoch: 1, batch: 2799, sum loss: 4384.498535, avg loss: 2.735183, ppl: 15.412563 +epoch: 1, batch: 2800, sum loss: 5639.114746, avg loss: 2.704611, ppl: 14.948505 +epoch: 1, batch: 2801, sum loss: 5031.020020, avg loss: 2.943839, ppl: 18.988596 +epoch: 1, batch: 2802, sum loss: 4743.198242, avg loss: 2.862521, ppl: 17.505610 +epoch: 1, batch: 2803, sum loss: 4391.916992, avg loss: 2.824384, ppl: 16.850561 +epoch: 1, batch: 2804, sum loss: 3752.730957, avg loss: 2.663400, ppl: 14.344982 +epoch: 1, batch: 2805, sum loss: 4216.967285, avg loss: 2.565065, ppl: 13.001505 +epoch: 1, batch: 2806, sum loss: 5544.268555, avg loss: 2.833045, ppl: 16.997135 +epoch: 1, batch: 2807, sum loss: 4449.541016, avg loss: 2.883695, ppl: 17.880217 +epoch: 1, batch: 2808, sum loss: 6105.937988, avg loss: 3.235791, ppl: 25.426481 +epoch: 1, batch: 2809, sum loss: 4815.020996, avg loss: 2.772033, ppl: 15.991111 +epoch: 1, batch: 2810, sum loss: 4866.861816, avg loss: 2.740350, ppl: 15.492411 +epoch: 1, batch: 2811, sum loss: 4944.371094, avg loss: 2.716687, ppl: 15.130119 +epoch: 1, batch: 2812, sum loss: 4553.415527, avg loss: 2.913254, ppl: 18.416624 +epoch: 1, batch: 2813, sum loss: 5408.513672, avg loss: 2.960325, ppl: 19.304245 +epoch: 1, batch: 2814, sum loss: 3782.900879, avg loss: 2.528677, ppl: 12.536912 +epoch: 1, batch: 2815, sum loss: 4237.928711, avg loss: 2.723605, ppl: 15.235141 +epoch: 1, batch: 2816, sum loss: 4829.361328, avg loss: 2.893566, ppl: 18.057587 +epoch: 1, batch: 2817, sum loss: 4972.960449, avg loss: 2.894622, ppl: 18.076664 +epoch: 1, batch: 2818, sum loss: 3192.395508, avg loss: 2.313330, ppl: 10.108030 +epoch: 1, batch: 2819, sum loss: 4493.548340, avg loss: 2.617093, ppl: 13.695849 +epoch: 1, batch: 2820, sum loss: 4141.475098, avg loss: 2.677101, ppl: 14.542871 +epoch: 1, batch: 2821, sum loss: 5511.902344, avg loss: 2.968176, ppl: 19.456392 +epoch: 1, batch: 2822, sum loss: 3820.530762, avg loss: 2.725058, ppl: 15.257293 +epoch: 1, batch: 2823, sum loss: 4431.100098, avg loss: 2.785104, ppl: 16.201500 +epoch: 1, batch: 2824, sum loss: 4007.302246, avg loss: 2.575387, ppl: 13.136400 +epoch: 1, batch: 2825, sum loss: 4997.051758, avg loss: 3.107619, ppl: 22.367729 +epoch: 1, batch: 2826, sum loss: 4920.358398, avg loss: 2.897738, ppl: 18.133078 +epoch: 1, batch: 2827, sum loss: 5201.058105, avg loss: 2.832820, ppl: 16.993317 +epoch: 1, batch: 2828, sum loss: 4921.865723, avg loss: 2.807682, ppl: 16.571453 +epoch: 1, batch: 2829, sum loss: 4855.408691, avg loss: 2.813099, ppl: 16.661470 +epoch: 1, batch: 2830, sum loss: 5477.339844, avg loss: 3.034537, ppl: 20.791355 +epoch: 1, batch: 2831, sum loss: 4630.673828, avg loss: 2.830485, ppl: 16.953688 +epoch: 1, batch: 2832, sum loss: 4334.729492, avg loss: 2.529014, ppl: 12.541130 +epoch: 1, batch: 2833, sum loss: 4729.100098, avg loss: 2.733584, ppl: 15.387938 +epoch: 1, batch: 2834, sum loss: 4783.456543, avg loss: 2.812144, ppl: 16.645561 +epoch: 1, batch: 2835, sum loss: 4718.408203, avg loss: 2.816960, ppl: 16.725931 +epoch: 1, batch: 2836, sum loss: 5235.128418, avg loss: 3.075869, ppl: 21.668695 +epoch: 1, batch: 2837, sum loss: 4772.929688, avg loss: 2.799372, ppl: 16.434330 +epoch: 1, batch: 2838, sum loss: 4005.857422, avg loss: 2.756956, ppl: 15.751826 +epoch: 1, batch: 2839, sum loss: 3652.801270, avg loss: 2.572395, ppl: 13.097158 +epoch: 1, batch: 2840, sum loss: 4688.905273, avg loss: 2.973307, ppl: 19.556488 +epoch: 1, batch: 2841, sum loss: 4393.757324, avg loss: 2.944878, ppl: 19.008337 +epoch: 1, batch: 2842, sum loss: 6239.139160, avg loss: 3.167076, ppl: 23.737970 +epoch: 1, batch: 2843, sum loss: 5343.963379, avg loss: 2.973825, ppl: 19.566618 +epoch: 1, batch: 2844, sum loss: 4807.347168, avg loss: 2.806391, ppl: 16.550074 +epoch: 1, batch: 2845, sum loss: 4736.740723, avg loss: 2.781410, ppl: 16.141760 +epoch: 1, batch: 2846, sum loss: 4700.391113, avg loss: 2.698273, ppl: 14.854053 +epoch: 1, batch: 2847, sum loss: 3823.921631, avg loss: 2.566391, ppl: 13.018748 +epoch: 1, batch: 2848, sum loss: 4752.190430, avg loss: 2.883611, ppl: 17.878712 +epoch: 1, batch: 2849, sum loss: 4610.274902, avg loss: 2.629935, ppl: 13.872862 +epoch: 1, batch: 2850, sum loss: 5816.842773, avg loss: 3.178603, ppl: 24.013176 +epoch: 1, batch: 2851, sum loss: 4467.474121, avg loss: 2.667149, ppl: 14.398857 +epoch: 1, batch: 2852, sum loss: 5158.208496, avg loss: 2.776216, ppl: 16.058134 +epoch: 1, batch: 2853, sum loss: 3852.064209, avg loss: 2.656596, ppl: 14.247710 +epoch: 1, batch: 2854, sum loss: 4740.832031, avg loss: 2.951950, ppl: 19.143248 +epoch: 1, batch: 2855, sum loss: 5030.076660, avg loss: 2.763778, ppl: 15.859654 +epoch: 1, batch: 2856, sum loss: 5691.348145, avg loss: 3.134002, ppl: 22.965710 +epoch: 1, batch: 2857, sum loss: 4947.153320, avg loss: 2.965919, ppl: 19.412540 +epoch: 1, batch: 2858, sum loss: 3631.015137, avg loss: 2.819111, ppl: 16.761944 +epoch: 1, batch: 2859, sum loss: 4005.884766, avg loss: 2.586110, ppl: 13.278025 +epoch: 1, batch: 2860, sum loss: 4401.973633, avg loss: 2.752954, ppl: 15.688909 +epoch: 1, batch: 2861, sum loss: 4189.726562, avg loss: 2.610421, ppl: 13.604783 +epoch: 1, batch: 2862, sum loss: 4315.738281, avg loss: 2.677257, ppl: 14.545142 +epoch: 1, batch: 2863, sum loss: 4869.360840, avg loss: 2.795270, ppl: 16.367050 +epoch: 1, batch: 2864, sum loss: 4714.201172, avg loss: 2.890375, ppl: 18.000051 +epoch: 1, batch: 2865, sum loss: 4881.007812, avg loss: 2.931536, ppl: 18.756422 +epoch: 1, batch: 2866, sum loss: 4133.155273, avg loss: 2.571970, ppl: 13.091586 +epoch: 1, batch: 2867, sum loss: 4960.977539, avg loss: 2.901156, ppl: 18.195175 +epoch: 1, batch: 2868, sum loss: 5543.044922, avg loss: 3.010888, ppl: 20.305426 +epoch: 1, batch: 2869, sum loss: 4661.188477, avg loss: 2.806254, ppl: 16.547821 +epoch: 1, batch: 2870, sum loss: 4211.346680, avg loss: 2.685808, ppl: 14.670046 +epoch: 1, batch: 2871, sum loss: 5339.342285, avg loss: 3.035442, ppl: 20.810177 +epoch: 1, batch: 2872, sum loss: 5497.737305, avg loss: 3.235867, ppl: 25.428404 +epoch: 1, batch: 2873, sum loss: 5809.847656, avg loss: 3.152386, ppl: 23.391815 +epoch: 1, batch: 2874, sum loss: 4618.775879, avg loss: 2.779047, ppl: 16.103666 +epoch: 1, batch: 2875, sum loss: 3824.689453, avg loss: 2.645014, ppl: 14.083636 +epoch: 1, batch: 2876, sum loss: 4916.405762, avg loss: 2.693921, ppl: 14.789554 +epoch: 1, batch: 2877, sum loss: 5644.340332, avg loss: 2.969143, ppl: 19.475216 +epoch: 1, batch: 2878, sum loss: 3539.175293, avg loss: 2.655045, ppl: 14.225630 +epoch: 1, batch: 2879, sum loss: 4642.353027, avg loss: 3.004759, ppl: 20.181358 +epoch: 1, batch: 2880, sum loss: 4080.891113, avg loss: 2.530001, ppl: 12.553514 +epoch: 1, batch: 2881, sum loss: 3608.408203, avg loss: 2.438114, ppl: 11.451420 +epoch: 1, batch: 2882, sum loss: 3980.242188, avg loss: 2.567898, ppl: 13.038392 +epoch: 1, batch: 2883, sum loss: 4751.733398, avg loss: 2.680053, ppl: 14.585863 +epoch: 1, batch: 2884, sum loss: 5382.355469, avg loss: 2.733548, ppl: 15.387380 +epoch: 1, batch: 2885, sum loss: 4787.928711, avg loss: 3.015069, ppl: 20.390488 +epoch: 1, batch: 2886, sum loss: 3872.863525, avg loss: 2.604481, ppl: 13.524207 +epoch: 1, batch: 2887, sum loss: 3362.270996, avg loss: 2.293500, ppl: 9.909560 +epoch: 1, batch: 2888, sum loss: 5035.450195, avg loss: 2.902277, ppl: 18.215570 +epoch: 1, batch: 2889, sum loss: 4997.074707, avg loss: 2.686599, ppl: 14.681663 +epoch: 1, batch: 2890, sum loss: 4886.553711, avg loss: 2.689353, ppl: 14.722144 +epoch: 1, batch: 2891, sum loss: 3800.735107, avg loss: 2.686032, ppl: 14.673334 +epoch: 1, batch: 2892, sum loss: 4544.337891, avg loss: 2.682608, ppl: 14.623179 +epoch: 1, batch: 2893, sum loss: 4399.800781, avg loss: 2.652080, ppl: 14.183511 +epoch: 1, batch: 2894, sum loss: 5066.572754, avg loss: 2.893531, ppl: 18.056957 +epoch: 1, batch: 2895, sum loss: 4441.571289, avg loss: 2.666009, ppl: 14.382457 +epoch: 1, batch: 2896, sum loss: 5257.887207, avg loss: 2.937367, ppl: 18.866106 +epoch: 1, batch: 2897, sum loss: 3763.054199, avg loss: 2.502031, ppl: 12.207257 +epoch: 1, batch: 2898, sum loss: 4226.691406, avg loss: 2.947484, ppl: 19.057936 +epoch: 1, batch: 2899, sum loss: 5088.449219, avg loss: 2.911012, ppl: 18.375393 +epoch: 1, batch: 2900, sum loss: 4917.387695, avg loss: 2.829337, ppl: 16.934231 +epoch: 1, batch: 2901, sum loss: 4149.835449, avg loss: 2.955723, ppl: 19.215616 +epoch: 1, batch: 2902, sum loss: 4832.482910, avg loss: 2.948434, ppl: 19.076050 +epoch: 1, batch: 2903, sum loss: 5725.104004, avg loss: 2.895854, ppl: 18.098955 +epoch: 1, batch: 2904, sum loss: 4626.732910, avg loss: 2.836746, ppl: 17.060165 +epoch: 1, batch: 2905, sum loss: 4729.479492, avg loss: 2.832024, ppl: 16.979786 +epoch: 1, batch: 2906, sum loss: 4277.157227, avg loss: 2.733008, ppl: 15.379073 +epoch: 1, batch: 2907, sum loss: 4451.225586, avg loss: 2.817231, ppl: 16.730467 +epoch: 1, batch: 2908, sum loss: 4429.595703, avg loss: 2.654042, ppl: 14.211362 +epoch: 1, batch: 2909, sum loss: 3574.862793, avg loss: 2.513968, ppl: 12.353856 +epoch: 1, batch: 2910, sum loss: 4950.213379, avg loss: 2.737950, ppl: 15.455267 +epoch: 1, batch: 2911, sum loss: 4984.292480, avg loss: 2.982820, ppl: 19.743420 +epoch: 1, batch: 2912, sum loss: 4519.184570, avg loss: 2.786180, ppl: 16.218948 +epoch: 1, batch: 2913, sum loss: 4597.964355, avg loss: 2.921197, ppl: 18.563494 +epoch: 1, batch: 2914, sum loss: 4479.009766, avg loss: 2.809918, ppl: 16.608559 +epoch: 1, batch: 2915, sum loss: 5073.793457, avg loss: 2.780161, ppl: 16.121614 +epoch: 1, batch: 2916, sum loss: 5125.465820, avg loss: 2.920493, ppl: 18.550438 +epoch: 1, batch: 2917, sum loss: 5239.537109, avg loss: 2.997447, ppl: 20.034325 +epoch: 1, batch: 2918, sum loss: 4608.427734, avg loss: 2.779510, ppl: 16.111124 +epoch: 1, batch: 2919, sum loss: 4470.759277, avg loss: 2.538762, ppl: 12.663979 +epoch: 1, batch: 2920, sum loss: 5484.003906, avg loss: 3.067116, ppl: 21.479872 +epoch: 1, batch: 2921, sum loss: 5095.383789, avg loss: 2.805828, ppl: 16.540764 +epoch: 1, batch: 2922, sum loss: 4989.184082, avg loss: 3.009158, ppl: 20.270329 +epoch: 1, batch: 2923, sum loss: 4876.796387, avg loss: 2.873775, ppl: 17.703728 +epoch: 1, batch: 2924, sum loss: 4955.758789, avg loss: 2.804617, ppl: 16.520754 +epoch: 1, batch: 2925, sum loss: 3981.682373, avg loss: 2.501057, ppl: 12.195374 +epoch: 1, batch: 2926, sum loss: 5014.409180, avg loss: 2.771924, ppl: 15.989361 +epoch: 1, batch: 2927, sum loss: 3726.783936, avg loss: 2.789509, ppl: 16.273029 +epoch: 1, batch: 2928, sum loss: 4932.826172, avg loss: 3.073412, ppl: 21.615528 +epoch: 1, batch: 2929, sum loss: 5160.134277, avg loss: 2.784746, ppl: 16.195702 +epoch: 1, batch: 2930, sum loss: 4679.501465, avg loss: 3.082675, ppl: 21.816694 +epoch: 1, batch: 2931, sum loss: 4128.070801, avg loss: 2.783595, ppl: 16.177067 +epoch: 1, batch: 2932, sum loss: 4545.825195, avg loss: 2.669304, ppl: 14.429928 +epoch: 1, batch: 2933, sum loss: 4930.343750, avg loss: 2.931239, ppl: 18.750847 +epoch: 1, batch: 2934, sum loss: 4800.517090, avg loss: 2.977988, ppl: 19.648249 +epoch: 1, batch: 2935, sum loss: 4635.069824, avg loss: 2.732942, ppl: 15.378064 +epoch: 1, batch: 2936, sum loss: 5083.134277, avg loss: 2.965656, ppl: 19.407431 +epoch: 1, batch: 2937, sum loss: 5143.326172, avg loss: 2.889509, ppl: 17.984480 +epoch: 1, batch: 2938, sum loss: 5213.198242, avg loss: 2.958683, ppl: 19.272564 +epoch: 1, batch: 2939, sum loss: 4374.100586, avg loss: 2.764918, ppl: 15.877739 +epoch: 1, batch: 2940, sum loss: 4612.928223, avg loss: 2.736019, ppl: 15.425456 +epoch: 1, batch: 2941, sum loss: 4063.909424, avg loss: 2.781595, ppl: 16.144743 +epoch: 1, batch: 2942, sum loss: 4929.818848, avg loss: 2.794682, ppl: 16.357428 +epoch: 1, batch: 2943, sum loss: 5809.263184, avg loss: 3.191903, ppl: 24.334696 +epoch: 1, batch: 2944, sum loss: 4051.226074, avg loss: 2.939932, ppl: 18.914558 +epoch: 1, batch: 2945, sum loss: 4256.895508, avg loss: 2.600425, ppl: 13.469461 +epoch: 1, batch: 2946, sum loss: 4807.774902, avg loss: 2.725496, ppl: 15.263984 +epoch: 1, batch: 2947, sum loss: 4111.995605, avg loss: 2.698160, ppl: 14.852377 +epoch: 1, batch: 2948, sum loss: 5266.959961, avg loss: 2.783805, ppl: 16.180477 +epoch: 1, batch: 2949, sum loss: 4248.301270, avg loss: 2.685399, ppl: 14.664048 +epoch: 1, batch: 2950, sum loss: 5217.109375, avg loss: 2.877611, ppl: 17.771772 +epoch: 1, batch: 2951, sum loss: 4584.721680, avg loss: 2.660895, ppl: 14.309088 +epoch: 1, batch: 2952, sum loss: 4472.923828, avg loss: 2.991922, ppl: 19.923943 +epoch: 1, batch: 2953, sum loss: 5502.424805, avg loss: 2.763649, ppl: 15.857598 +epoch: 1, batch: 2954, sum loss: 4938.126953, avg loss: 3.023960, ppl: 20.572601 +epoch: 1, batch: 2955, sum loss: 4561.777344, avg loss: 2.659928, ppl: 14.295264 +epoch: 1, batch: 2956, sum loss: 5044.846680, avg loss: 2.943318, ppl: 18.978712 +epoch: 1, batch: 2957, sum loss: 4336.874512, avg loss: 2.719044, ppl: 15.165812 +epoch: 1, batch: 2958, sum loss: 5625.722168, avg loss: 2.950038, ppl: 19.106678 +epoch: 1, batch: 2959, sum loss: 4130.613770, avg loss: 2.675268, ppl: 14.516242 +epoch: 1, batch: 2960, sum loss: 4827.597656, avg loss: 3.034317, ppl: 20.786766 +epoch: 1, batch: 2961, sum loss: 5026.279297, avg loss: 2.748103, ppl: 15.612981 +epoch: 1, batch: 2962, sum loss: 5058.166992, avg loss: 2.949368, ppl: 19.093891 +epoch: 1, batch: 2963, sum loss: 5709.825195, avg loss: 2.922121, ppl: 18.580660 +epoch: 1, batch: 2964, sum loss: 3734.077148, avg loss: 2.474538, ppl: 11.876214 +epoch: 1, batch: 2965, sum loss: 5548.768066, avg loss: 2.849907, ppl: 17.286169 +epoch: 1, batch: 2966, sum loss: 5258.104980, avg loss: 2.786489, ppl: 16.223957 +epoch: 1, batch: 2967, sum loss: 4459.715820, avg loss: 2.684958, ppl: 14.657593 +epoch: 1, batch: 2968, sum loss: 5564.672852, avg loss: 2.836225, ppl: 17.051268 +epoch: 1, batch: 2969, sum loss: 4921.811035, avg loss: 2.945429, ppl: 19.018812 +epoch: 1, batch: 2970, sum loss: 5563.701172, avg loss: 2.807115, ppl: 16.562061 +epoch: 1, batch: 2971, sum loss: 4889.058594, avg loss: 2.918841, ppl: 18.519814 +epoch: 1, batch: 2972, sum loss: 5324.917480, avg loss: 2.739155, ppl: 15.473908 +epoch: 1, batch: 2973, sum loss: 4037.216797, avg loss: 2.780452, ppl: 16.126316 +epoch: 1, batch: 2974, sum loss: 4982.889648, avg loss: 2.800950, ppl: 16.460274 +epoch: 1, batch: 2975, sum loss: 3873.961914, avg loss: 2.703393, ppl: 14.930305 +epoch: 1, batch: 2976, sum loss: 5858.584961, avg loss: 2.812571, ppl: 16.652674 +epoch: 1, batch: 2977, sum loss: 4553.814453, avg loss: 2.930382, ppl: 18.734795 +epoch: 1, batch: 2978, sum loss: 5414.034180, avg loss: 3.214985, ppl: 24.902916 +epoch: 1, batch: 2979, sum loss: 3825.507324, avg loss: 2.764095, ppl: 15.864674 +epoch: 1, batch: 2980, sum loss: 5255.621094, avg loss: 2.737303, ppl: 15.445270 +epoch: 1, batch: 2981, sum loss: 5780.976562, avg loss: 2.943471, ppl: 18.981617 +epoch: 1, batch: 2982, sum loss: 3833.557617, avg loss: 2.552302, ppl: 12.836618 +epoch: 1, batch: 2983, sum loss: 4907.937988, avg loss: 2.885325, ppl: 17.909386 +epoch: 1, batch: 2984, sum loss: 3642.407959, avg loss: 2.471104, ppl: 11.835510 +epoch: 1, batch: 2985, sum loss: 4155.661133, avg loss: 2.457517, ppl: 11.675786 +epoch: 1, batch: 2986, sum loss: 5520.486328, avg loss: 3.178173, ppl: 24.002861 +epoch: 1, batch: 2987, sum loss: 4112.232910, avg loss: 2.462415, ppl: 11.733112 +epoch: 1, batch: 2988, sum loss: 5828.011719, avg loss: 2.820916, ppl: 16.792221 +epoch: 1, batch: 2989, sum loss: 4590.414062, avg loss: 2.534740, ppl: 12.613153 +epoch: 1, batch: 2990, sum loss: 4616.977051, avg loss: 2.751476, ppl: 15.665742 +epoch: 1, batch: 2991, sum loss: 5671.568848, avg loss: 3.049231, ppl: 21.099104 +epoch: 1, batch: 2992, sum loss: 4995.360352, avg loss: 3.011067, ppl: 20.309061 +epoch: 1, batch: 2993, sum loss: 3705.632080, avg loss: 2.308805, ppl: 10.062393 +epoch: 1, batch: 2994, sum loss: 5688.587402, avg loss: 3.063321, ppl: 21.398506 +epoch: 1, batch: 2995, sum loss: 4715.071777, avg loss: 2.778475, ppl: 16.094456 +epoch: 1, batch: 2996, sum loss: 4896.611328, avg loss: 2.885452, ppl: 17.911654 +epoch: 1, batch: 2997, sum loss: 4464.239746, avg loss: 2.695797, ppl: 14.817323 +epoch: 1, batch: 2998, sum loss: 5649.081055, avg loss: 2.914903, ppl: 18.447018 +epoch: 1, batch: 2999, sum loss: 4495.679688, avg loss: 2.693637, ppl: 14.785351 +epoch: 1, batch: 3000, sum loss: 4600.742676, avg loss: 2.500403, ppl: 12.187409 +epoch: 1, batch: 3001, sum loss: 5059.195801, avg loss: 2.958594, ppl: 19.270859 +epoch: 1, batch: 3002, sum loss: 4070.338623, avg loss: 2.737282, ppl: 15.444949 +epoch: 1, batch: 3003, sum loss: 3851.775879, avg loss: 2.439377, ppl: 11.465894 +epoch: 1, batch: 3004, sum loss: 5027.455078, avg loss: 2.850031, ppl: 17.288324 +epoch: 1, batch: 3005, sum loss: 4680.504883, avg loss: 2.855708, ppl: 17.386736 +epoch: 1, batch: 3006, sum loss: 4440.031250, avg loss: 2.838895, ppl: 17.096857 +epoch: 1, batch: 3007, sum loss: 4167.483398, avg loss: 2.572521, ppl: 13.098802 +epoch: 1, batch: 3008, sum loss: 4683.626465, avg loss: 2.802889, ppl: 16.492218 +epoch: 1, batch: 3009, sum loss: 4239.175781, avg loss: 2.578574, ppl: 13.178331 +epoch: 1, batch: 3010, sum loss: 5058.923340, avg loss: 2.700974, ppl: 14.894224 +epoch: 1, batch: 3011, sum loss: 4602.500977, avg loss: 2.834052, ppl: 17.014269 +epoch: 1, batch: 3012, sum loss: 4810.993164, avg loss: 2.889485, ppl: 17.984053 +epoch: 1, batch: 3013, sum loss: 4901.833496, avg loss: 2.963624, ppl: 19.368031 +epoch: 1, batch: 3014, sum loss: 4162.296875, avg loss: 2.596567, ppl: 13.417595 +epoch: 1, batch: 3015, sum loss: 3969.416016, avg loss: 2.696614, ppl: 14.829438 +epoch: 1, batch: 3016, sum loss: 4287.857422, avg loss: 2.778910, ppl: 16.101456 +epoch: 1, batch: 3017, sum loss: 5434.687988, avg loss: 3.156032, ppl: 23.477261 +epoch: 1, batch: 3018, sum loss: 4290.218750, avg loss: 2.593845, ppl: 13.381119 +epoch: 1, batch: 3019, sum loss: 4739.020508, avg loss: 2.688043, ppl: 14.702880 +epoch: 1, batch: 3020, sum loss: 4687.785156, avg loss: 2.928036, ppl: 18.690880 +epoch: 1, batch: 3021, sum loss: 4088.052246, avg loss: 2.852793, ppl: 17.336134 +epoch: 1, batch: 3022, sum loss: 4865.282227, avg loss: 2.690975, ppl: 14.746042 +epoch: 1, batch: 3023, sum loss: 4084.668945, avg loss: 2.826761, ppl: 16.890656 +epoch: 1, batch: 3024, sum loss: 5837.844727, avg loss: 3.042129, ppl: 20.949789 +epoch: 1, batch: 3025, sum loss: 4580.286621, avg loss: 2.673839, ppl: 14.495512 +epoch: 1, batch: 3026, sum loss: 4389.331055, avg loss: 2.837318, ppl: 17.069925 +epoch: 1, batch: 3027, sum loss: 6393.612793, avg loss: 3.097681, ppl: 22.146524 +epoch: 1, batch: 3028, sum loss: 4088.942383, avg loss: 2.827761, ppl: 16.907560 +epoch: 1, batch: 3029, sum loss: 5666.154297, avg loss: 3.082783, ppl: 21.819029 +epoch: 1, batch: 3030, sum loss: 4929.469727, avg loss: 2.781868, ppl: 16.149155 +epoch: 1, batch: 3031, sum loss: 5009.710938, avg loss: 2.921114, ppl: 18.561958 +epoch: 1, batch: 3032, sum loss: 5073.650391, avg loss: 2.839200, ppl: 17.102079 +epoch: 1, batch: 3033, sum loss: 5259.665039, avg loss: 3.022796, ppl: 20.548664 +epoch: 1, batch: 3034, sum loss: 4885.057129, avg loss: 2.878643, ppl: 17.790115 +epoch: 1, batch: 3035, sum loss: 4771.996582, avg loss: 2.734668, ppl: 15.404635 +epoch: 1, batch: 3036, sum loss: 3383.528564, avg loss: 2.423731, ppl: 11.287897 +epoch: 1, batch: 3037, sum loss: 3598.685059, avg loss: 2.528942, ppl: 12.540236 +epoch: 1, batch: 3038, sum loss: 5259.140137, avg loss: 3.066554, ppl: 21.467793 +epoch: 1, batch: 3039, sum loss: 5274.020508, avg loss: 2.889874, ppl: 17.991047 +epoch: 1, batch: 3040, sum loss: 5200.973633, avg loss: 2.879830, ppl: 17.811251 +epoch: 1, batch: 3041, sum loss: 3965.962158, avg loss: 2.497458, ppl: 12.151560 +epoch: 1, batch: 3042, sum loss: 4419.699219, avg loss: 2.643361, ppl: 14.060379 +epoch: 1, batch: 3043, sum loss: 4504.299316, avg loss: 2.854435, ppl: 17.364632 +epoch: 1, batch: 3044, sum loss: 5289.426270, avg loss: 2.938570, ppl: 18.888821 +epoch: 1, batch: 3045, sum loss: 3486.014404, avg loss: 2.574604, ppl: 13.126122 +epoch: 1, batch: 3046, sum loss: 3631.260986, avg loss: 2.586368, ppl: 13.281450 +epoch: 1, batch: 3047, sum loss: 4799.034668, avg loss: 2.928026, ppl: 18.690699 +epoch: 1, batch: 3048, sum loss: 6500.991211, avg loss: 3.291641, ppl: 26.886955 +epoch: 1, batch: 3049, sum loss: 5270.963379, avg loss: 2.986382, ppl: 19.813856 +epoch: 1, batch: 3050, sum loss: 3921.695312, avg loss: 2.761757, ppl: 15.827634 +epoch: 1, batch: 3051, sum loss: 5258.015137, avg loss: 2.908194, ppl: 18.323681 +epoch: 1, batch: 3052, sum loss: 4534.747070, avg loss: 2.983386, ppl: 19.754599 +epoch: 1, batch: 3053, sum loss: 4651.639648, avg loss: 2.838096, ppl: 17.083210 +epoch: 1, batch: 3054, sum loss: 4284.083496, avg loss: 3.019086, ppl: 20.472578 +epoch: 1, batch: 3055, sum loss: 4511.752930, avg loss: 2.866425, ppl: 17.574076 +epoch: 1, batch: 3056, sum loss: 4624.262695, avg loss: 2.849207, ppl: 17.274073 +epoch: 1, batch: 3057, sum loss: 4196.144043, avg loss: 2.944662, ppl: 19.004244 +epoch: 1, batch: 3058, sum loss: 5083.172363, avg loss: 3.084449, ppl: 21.855427 +epoch: 1, batch: 3059, sum loss: 4889.869141, avg loss: 2.784664, ppl: 16.194370 +epoch: 1, batch: 3060, sum loss: 4755.331055, avg loss: 2.904906, ppl: 18.263527 +epoch: 1, batch: 3061, sum loss: 4883.916016, avg loss: 2.949225, ppl: 19.091146 +epoch: 1, batch: 3062, sum loss: 4293.147461, avg loss: 2.567672, ppl: 13.035442 +epoch: 1, batch: 3063, sum loss: 4712.781250, avg loss: 2.682289, ppl: 14.618511 +epoch: 1, batch: 3064, sum loss: 4115.513672, avg loss: 2.667216, ppl: 14.399821 +epoch: 1, batch: 3065, sum loss: 4497.045898, avg loss: 2.888276, ppl: 17.962318 +epoch: 1, batch: 3066, sum loss: 4172.477539, avg loss: 2.828798, ppl: 16.925110 +epoch: 1, batch: 3067, sum loss: 2826.063477, avg loss: 2.129663, ppl: 8.412035 +epoch: 1, batch: 3068, sum loss: 5634.263672, avg loss: 2.954517, ppl: 19.192448 +epoch: 1, batch: 3069, sum loss: 4192.238281, avg loss: 2.889206, ppl: 17.979033 +epoch: 1, batch: 3070, sum loss: 5472.393066, avg loss: 2.951668, ppl: 19.137854 +epoch: 1, batch: 3071, sum loss: 4485.128906, avg loss: 2.819063, ppl: 16.761133 +epoch: 1, batch: 3072, sum loss: 4360.992188, avg loss: 2.872854, ppl: 17.687426 +epoch: 1, batch: 3073, sum loss: 4803.026855, avg loss: 2.675781, ppl: 14.523688 +epoch: 1, batch: 3074, sum loss: 4384.749023, avg loss: 2.792834, ppl: 16.327223 +epoch: 1, batch: 3075, sum loss: 3624.127930, avg loss: 2.568482, ppl: 13.046004 +epoch: 1, batch: 3076, sum loss: 4971.607422, avg loss: 2.985950, ppl: 19.805317 +epoch: 1, batch: 3077, sum loss: 6493.514648, avg loss: 3.169114, ppl: 23.786404 +epoch: 1, batch: 3078, sum loss: 4586.740723, avg loss: 2.715655, ppl: 15.114501 +epoch: 1, batch: 3079, sum loss: 4528.825195, avg loss: 2.848318, ppl: 17.258722 +epoch: 1, batch: 3080, sum loss: 4079.587402, avg loss: 2.586929, ppl: 13.288903 +epoch: 1, batch: 3081, sum loss: 4664.147461, avg loss: 2.799608, ppl: 16.438206 +epoch: 1, batch: 3082, sum loss: 5207.459961, avg loss: 2.948731, ppl: 19.081732 +epoch: 1, batch: 3083, sum loss: 4323.632812, avg loss: 2.722691, ppl: 15.221225 +epoch: 1, batch: 3084, sum loss: 5377.403809, avg loss: 3.029523, ppl: 20.687365 +epoch: 1, batch: 3085, sum loss: 4284.739258, avg loss: 2.795003, ppl: 16.362677 +epoch: 1, batch: 3086, sum loss: 4393.641113, avg loss: 2.530899, ppl: 12.564800 +epoch: 1, batch: 3087, sum loss: 5444.384766, avg loss: 3.128957, ppl: 22.850130 +epoch: 1, batch: 3088, sum loss: 5420.675293, avg loss: 2.986598, ppl: 19.818146 +epoch: 1, batch: 3089, sum loss: 4978.115234, avg loss: 2.738237, ppl: 15.459708 +epoch: 1, batch: 3090, sum loss: 5595.546387, avg loss: 2.925011, ppl: 18.634434 +epoch: 1, batch: 3091, sum loss: 4352.208496, avg loss: 2.523020, ppl: 12.466182 +epoch: 1, batch: 3092, sum loss: 4206.191406, avg loss: 2.632160, ppl: 13.903769 +epoch: 1, batch: 3093, sum loss: 4778.961426, avg loss: 2.880628, ppl: 17.825457 +epoch: 1, batch: 3094, sum loss: 4946.679688, avg loss: 2.891105, ppl: 18.013197 +epoch: 1, batch: 3095, sum loss: 4451.388672, avg loss: 2.792590, ppl: 16.323244 +epoch: 1, batch: 3096, sum loss: 4425.496094, avg loss: 2.816993, ppl: 16.726482 +epoch: 1, batch: 3097, sum loss: 5106.530762, avg loss: 2.916351, ppl: 18.473759 +epoch: 1, batch: 3098, sum loss: 5115.010742, avg loss: 2.758905, ppl: 15.782558 +epoch: 1, batch: 3099, sum loss: 4727.969727, avg loss: 2.622279, ppl: 13.767069 +epoch: 1, batch: 3100, sum loss: 4931.752441, avg loss: 2.692005, ppl: 14.761238 +epoch: 1, batch: 3101, sum loss: 4953.244141, avg loss: 2.822361, ppl: 16.816511 +epoch: 1, batch: 3102, sum loss: 4677.330566, avg loss: 2.899771, ppl: 18.169983 +epoch: 1, batch: 3103, sum loss: 4521.132812, avg loss: 2.859666, ppl: 17.455702 +epoch: 1, batch: 3104, sum loss: 5922.394043, avg loss: 3.055931, ppl: 21.240953 +epoch: 1, batch: 3105, sum loss: 4876.248047, avg loss: 3.015614, ppl: 20.401619 +epoch: 1, batch: 3106, sum loss: 4164.592773, avg loss: 2.852461, ppl: 17.330378 +epoch: 1, batch: 3107, sum loss: 4504.989258, avg loss: 2.808597, ppl: 16.586628 +epoch: 1, batch: 3108, sum loss: 2857.613770, avg loss: 2.269749, ppl: 9.676970 +epoch: 1, batch: 3109, sum loss: 4857.767578, avg loss: 2.812836, ppl: 16.657089 +epoch: 1, batch: 3110, sum loss: 4479.793945, avg loss: 2.817480, ppl: 16.734632 +epoch: 1, batch: 3111, sum loss: 5493.749512, avg loss: 2.902139, ppl: 18.213064 +epoch: 1, batch: 3112, sum loss: 3376.251221, avg loss: 2.464417, ppl: 11.756626 +epoch: 1, batch: 3113, sum loss: 4151.377930, avg loss: 2.767585, ppl: 15.920145 +epoch: 1, batch: 3114, sum loss: 4740.299805, avg loss: 2.637897, ppl: 13.983758 +epoch: 1, batch: 3115, sum loss: 4826.101074, avg loss: 2.830558, ppl: 16.954916 +epoch: 1, batch: 3116, sum loss: 4619.329102, avg loss: 2.592216, ppl: 13.359346 +epoch: 1, batch: 3117, sum loss: 4405.045410, avg loss: 2.800410, ppl: 16.451395 +epoch: 1, batch: 3118, sum loss: 5140.786133, avg loss: 2.987092, ppl: 19.827950 +epoch: 1, batch: 3119, sum loss: 4768.469727, avg loss: 2.828274, ppl: 16.916239 +epoch: 1, batch: 3120, sum loss: 4669.165039, avg loss: 2.630515, ppl: 13.880921 +epoch: 1, batch: 3121, sum loss: 4014.726074, avg loss: 2.789942, ppl: 16.280067 +epoch: 1, batch: 3122, sum loss: 5338.454102, avg loss: 2.912414, ppl: 18.401157 +epoch: 1, batch: 3123, sum loss: 4443.542969, avg loss: 3.002394, ppl: 20.133678 +epoch: 1, batch: 3124, sum loss: 4674.187500, avg loss: 2.910453, ppl: 18.365112 +epoch: 1, batch: 3125, sum loss: 3997.958496, avg loss: 2.803617, ppl: 16.504242 +epoch: 1, batch: 3126, sum loss: 5029.166016, avg loss: 2.852618, ppl: 17.333105 +epoch: 1, batch: 3127, sum loss: 5483.179688, avg loss: 2.832221, ppl: 16.983135 +epoch: 1, batch: 3128, sum loss: 4737.641602, avg loss: 2.845430, ppl: 17.208963 +epoch: 1, batch: 3129, sum loss: 4838.545898, avg loss: 2.725941, ppl: 15.270780 +epoch: 1, batch: 3130, sum loss: 3995.736328, avg loss: 2.599698, ppl: 13.459677 +epoch: 1, batch: 3131, sum loss: 5090.551270, avg loss: 3.061065, ppl: 21.350286 +epoch: 1, batch: 3132, sum loss: 4599.824219, avg loss: 2.830661, ppl: 16.956667 +epoch: 1, batch: 3133, sum loss: 4555.758789, avg loss: 2.642552, ppl: 14.049006 +epoch: 1, batch: 3134, sum loss: 4845.331055, avg loss: 2.911858, ppl: 18.390930 +epoch: 1, batch: 3135, sum loss: 4662.800293, avg loss: 2.750915, ppl: 15.656945 +epoch: 1, batch: 3136, sum loss: 4376.506836, avg loss: 2.721708, ppl: 15.206273 +epoch: 1, batch: 3137, sum loss: 4732.721680, avg loss: 2.887567, ppl: 17.949577 +epoch: 1, batch: 3138, sum loss: 3987.696777, avg loss: 2.630407, ppl: 13.879416 +epoch: 1, batch: 3139, sum loss: 5633.213379, avg loss: 3.017254, ppl: 20.435102 +epoch: 1, batch: 3140, sum loss: 4091.415771, avg loss: 2.698823, ppl: 14.862232 +epoch: 1, batch: 3141, sum loss: 3971.121582, avg loss: 2.755810, ppl: 15.733777 +epoch: 1, batch: 3142, sum loss: 4807.874023, avg loss: 2.973330, ppl: 19.556932 +epoch: 1, batch: 3143, sum loss: 3936.257080, avg loss: 2.815634, ppl: 16.703772 +epoch: 1, batch: 3144, sum loss: 4133.879883, avg loss: 2.691328, ppl: 14.751253 +epoch: 1, batch: 3145, sum loss: 3901.746582, avg loss: 2.652445, ppl: 14.188688 +epoch: 1, batch: 3146, sum loss: 4612.555664, avg loss: 2.884650, ppl: 17.897303 +epoch: 1, batch: 3147, sum loss: 3882.574463, avg loss: 2.639412, ppl: 14.004962 +epoch: 1, batch: 3148, sum loss: 5076.482910, avg loss: 2.809343, ppl: 16.599007 +epoch: 1, batch: 3149, sum loss: 5599.363281, avg loss: 2.855361, ppl: 17.380714 +epoch: 1, batch: 3150, sum loss: 3769.344238, avg loss: 2.486375, ppl: 12.017632 +epoch: 1, batch: 3151, sum loss: 5474.839844, avg loss: 2.700957, ppl: 14.893979 +epoch: 1, batch: 3152, sum loss: 5123.131836, avg loss: 2.909217, ppl: 18.342438 +epoch: 1, batch: 3153, sum loss: 5537.834961, avg loss: 3.090310, ppl: 21.983889 +epoch: 1, batch: 3154, sum loss: 5272.022949, avg loss: 2.805760, ppl: 16.539644 +epoch: 1, batch: 3155, sum loss: 4360.404785, avg loss: 2.414399, ppl: 11.183049 +epoch: 1, batch: 3156, sum loss: 4387.014160, avg loss: 2.956209, ppl: 19.224951 +epoch: 1, batch: 3157, sum loss: 5139.323242, avg loss: 2.880787, ppl: 17.828293 +epoch: 1, batch: 3158, sum loss: 4978.832031, avg loss: 2.879602, ppl: 17.807188 +epoch: 1, batch: 3159, sum loss: 4413.223145, avg loss: 2.779108, ppl: 16.104647 +epoch: 1, batch: 3160, sum loss: 4795.708496, avg loss: 2.857991, ppl: 17.426477 +epoch: 1, batch: 3161, sum loss: 4210.601562, avg loss: 2.644850, ppl: 14.081336 +epoch: 1, batch: 3162, sum loss: 4937.623047, avg loss: 3.012583, ppl: 20.339861 +epoch: 1, batch: 3163, sum loss: 5423.475586, avg loss: 2.947541, ppl: 19.059031 +epoch: 1, batch: 3164, sum loss: 5680.112305, avg loss: 2.997421, ppl: 20.033798 +epoch: 1, batch: 3165, sum loss: 4072.681396, avg loss: 2.600691, ppl: 13.473040 +epoch: 1, batch: 3166, sum loss: 4035.938721, avg loss: 2.762450, ppl: 15.838593 +epoch: 1, batch: 3167, sum loss: 4997.006836, avg loss: 2.881780, ppl: 17.846014 +epoch: 1, batch: 3168, sum loss: 4480.703613, avg loss: 2.725489, ppl: 15.263875 +epoch: 1, batch: 3169, sum loss: 5507.233398, avg loss: 2.934062, ppl: 18.803848 +epoch: 1, batch: 3170, sum loss: 3790.666260, avg loss: 2.591023, ppl: 13.343414 +epoch: 1, batch: 3171, sum loss: 4798.997070, avg loss: 2.882281, ppl: 17.854946 +epoch: 1, batch: 3172, sum loss: 4939.314941, avg loss: 2.796894, ppl: 16.393650 +epoch: 1, batch: 3173, sum loss: 4707.281250, avg loss: 2.628298, ppl: 13.850171 +epoch: 1, batch: 3174, sum loss: 4311.390137, avg loss: 2.718405, ppl: 15.156125 +epoch: 1, batch: 3175, sum loss: 5181.689453, avg loss: 2.834622, ppl: 17.023970 +epoch: 1, batch: 3176, sum loss: 4895.274902, avg loss: 2.692670, ppl: 14.771068 +epoch: 1, batch: 3177, sum loss: 5066.775391, avg loss: 2.767218, ppl: 15.914293 +epoch: 1, batch: 3178, sum loss: 5319.514648, avg loss: 2.914803, ppl: 18.445169 +epoch: 1, batch: 3179, sum loss: 4530.035645, avg loss: 2.883537, ppl: 17.877386 +epoch: 1, batch: 3180, sum loss: 3977.938965, avg loss: 2.588119, ppl: 13.304722 +epoch: 1, batch: 3181, sum loss: 4278.731934, avg loss: 2.684273, ppl: 14.647555 +epoch: 1, batch: 3182, sum loss: 4796.601562, avg loss: 2.848338, ppl: 17.259079 +epoch: 1, batch: 3183, sum loss: 5132.227051, avg loss: 2.838621, ppl: 17.092182 +epoch: 1, batch: 3184, sum loss: 5032.363281, avg loss: 2.763516, ppl: 15.855499 +epoch: 1, batch: 3185, sum loss: 4110.593750, avg loss: 2.681405, ppl: 14.605600 +epoch: 1, batch: 3186, sum loss: 4541.579590, avg loss: 2.706543, ppl: 14.977412 +epoch: 1, batch: 3187, sum loss: 4399.453125, avg loss: 2.832874, ppl: 16.994234 +epoch: 1, batch: 3188, sum loss: 4524.075195, avg loss: 2.670646, ppl: 14.449306 +epoch: 1, batch: 3189, sum loss: 4092.743896, avg loss: 2.583803, ppl: 13.247421 +epoch: 1, batch: 3190, sum loss: 4524.843262, avg loss: 2.949702, ppl: 19.100266 +epoch: 1, batch: 3191, sum loss: 4527.553711, avg loss: 2.854700, ppl: 17.369219 +epoch: 1, batch: 3192, sum loss: 4873.992188, avg loss: 2.964715, ppl: 19.389185 +epoch: 1, batch: 3193, sum loss: 5230.114746, avg loss: 2.828618, ppl: 16.922060 +epoch: 1, batch: 3194, sum loss: 4072.531982, avg loss: 2.362258, ppl: 10.614890 +epoch: 1, batch: 3195, sum loss: 4885.140625, avg loss: 2.833608, ppl: 17.006718 +epoch: 1, batch: 3196, sum loss: 4438.610840, avg loss: 2.317812, ppl: 10.153439 +epoch: 1, batch: 3197, sum loss: 4802.482422, avg loss: 2.713267, ppl: 15.078454 +epoch: 1, batch: 3198, sum loss: 4543.839355, avg loss: 2.737253, ppl: 15.444496 +epoch: 1, batch: 3199, sum loss: 4545.734375, avg loss: 2.688193, ppl: 14.705081 +epoch: 1, batch: 3200, sum loss: 5052.702637, avg loss: 2.706322, ppl: 14.974095 +epoch: 1, batch: 3201, sum loss: 4344.586914, avg loss: 2.758468, ppl: 15.775651 +epoch: 1, batch: 3202, sum loss: 6043.583008, avg loss: 3.129769, ppl: 22.868687 +epoch: 1, batch: 3203, sum loss: 4627.073242, avg loss: 2.744409, ppl: 15.555415 +epoch: 1, batch: 3204, sum loss: 4329.125000, avg loss: 2.831344, ppl: 16.968252 +epoch: 1, batch: 3205, sum loss: 4169.689453, avg loss: 2.537851, ppl: 12.652452 +epoch: 1, batch: 3206, sum loss: 4009.681152, avg loss: 2.629299, ppl: 13.864050 +epoch: 1, batch: 3207, sum loss: 4331.052734, avg loss: 2.830754, ppl: 16.958235 +epoch: 1, batch: 3208, sum loss: 4754.836914, avg loss: 2.640109, ppl: 14.014735 +epoch: 1, batch: 3209, sum loss: 5015.812012, avg loss: 2.838603, ppl: 17.091875 +epoch: 1, batch: 3210, sum loss: 4912.915039, avg loss: 2.873050, ppl: 17.690889 +epoch: 1, batch: 3211, sum loss: 5428.664062, avg loss: 2.901478, ppl: 18.201019 +epoch: 1, batch: 3212, sum loss: 4559.220215, avg loss: 2.791929, ppl: 16.312456 +epoch: 1, batch: 3213, sum loss: 3895.430664, avg loss: 2.677272, ppl: 14.545357 +epoch: 1, batch: 3214, sum loss: 4559.868164, avg loss: 2.799182, ppl: 16.431208 +epoch: 1, batch: 3215, sum loss: 4036.366211, avg loss: 2.546603, ppl: 12.763675 +epoch: 1, batch: 3216, sum loss: 4529.106445, avg loss: 2.825394, ppl: 16.867588 +epoch: 1, batch: 3217, sum loss: 3982.509766, avg loss: 2.672825, ppl: 14.480824 +epoch: 1, batch: 3218, sum loss: 4928.730469, avg loss: 3.070860, ppl: 21.560434 +epoch: 1, batch: 3219, sum loss: 4784.587891, avg loss: 2.865023, ppl: 17.549450 +epoch: 1, batch: 3220, sum loss: 4790.921387, avg loss: 2.806632, ppl: 16.554075 +epoch: 1, batch: 3221, sum loss: 4543.014160, avg loss: 2.816500, ppl: 16.718233 +epoch: 1, batch: 3222, sum loss: 5202.237793, avg loss: 2.771570, ppl: 15.983716 +epoch: 1, batch: 3223, sum loss: 5391.028320, avg loss: 2.971901, ppl: 19.529007 +epoch: 1, batch: 3224, sum loss: 5393.357422, avg loss: 3.304754, ppl: 27.241827 +epoch: 1, batch: 3225, sum loss: 5085.500000, avg loss: 2.914327, ppl: 18.436394 +epoch: 1, batch: 3226, sum loss: 4436.777344, avg loss: 2.572045, ppl: 13.092572 +epoch: 1, batch: 3227, sum loss: 4198.302246, avg loss: 2.825237, ppl: 16.864943 +epoch: 1, batch: 3228, sum loss: 4696.537109, avg loss: 2.908072, ppl: 18.321449 +epoch: 1, batch: 3229, sum loss: 3933.873535, avg loss: 2.612134, ppl: 13.628102 +epoch: 1, batch: 3230, sum loss: 4491.289062, avg loss: 2.429037, ppl: 11.347947 +epoch: 1, batch: 3231, sum loss: 4619.431152, avg loss: 2.772768, ppl: 16.002865 +epoch: 1, batch: 3232, sum loss: 4315.315918, avg loss: 2.957722, ppl: 19.254055 +epoch: 1, batch: 3233, sum loss: 4360.471680, avg loss: 2.759792, ppl: 15.796562 +epoch: 1, batch: 3234, sum loss: 5581.573242, avg loss: 3.193120, ppl: 24.364321 +epoch: 1, batch: 3235, sum loss: 4938.713867, avg loss: 2.783942, ppl: 16.182692 +epoch: 1, batch: 3236, sum loss: 5049.058594, avg loss: 2.812846, ppl: 16.657261 +epoch: 1, batch: 3237, sum loss: 4852.599121, avg loss: 2.937409, ppl: 18.866894 +epoch: 1, batch: 3238, sum loss: 5115.634766, avg loss: 2.957014, ppl: 19.240440 +epoch: 1, batch: 3239, sum loss: 4758.139160, avg loss: 2.861178, ppl: 17.482111 +epoch: 1, batch: 3240, sum loss: 4853.342285, avg loss: 2.808647, ppl: 16.587463 +epoch: 1, batch: 3241, sum loss: 5034.394043, avg loss: 2.807805, ppl: 16.573496 +epoch: 1, batch: 3242, sum loss: 4884.131348, avg loss: 2.841263, ppl: 17.137402 +epoch: 1, batch: 3243, sum loss: 4855.772461, avg loss: 2.720321, ppl: 15.185195 +epoch: 1, batch: 3244, sum loss: 4481.377441, avg loss: 3.013704, ppl: 20.362675 +epoch: 1, batch: 3245, sum loss: 4290.472656, avg loss: 2.748541, ppl: 15.619828 +epoch: 1, batch: 3246, sum loss: 4983.533691, avg loss: 2.912644, ppl: 18.405401 +epoch: 1, batch: 3247, sum loss: 4727.795898, avg loss: 2.756732, ppl: 15.748297 +epoch: 1, batch: 3248, sum loss: 4674.844727, avg loss: 2.740237, ppl: 15.490660 +epoch: 1, batch: 3249, sum loss: 5269.726074, avg loss: 2.950575, ppl: 19.116936 +epoch: 1, batch: 3250, sum loss: 5042.730469, avg loss: 2.957613, ppl: 19.251966 +epoch: 1, batch: 3251, sum loss: 4954.467285, avg loss: 2.835986, ppl: 17.047195 +epoch: 1, batch: 3252, sum loss: 3924.273193, avg loss: 2.334487, ppl: 10.324164 +epoch: 1, batch: 3253, sum loss: 4805.363770, avg loss: 2.833351, ppl: 17.002346 +epoch: 1, batch: 3254, sum loss: 4451.701172, avg loss: 2.770194, ppl: 15.961727 +epoch: 1, batch: 3255, sum loss: 4645.409180, avg loss: 2.747138, ppl: 15.597919 +epoch: 1, batch: 3256, sum loss: 4999.441895, avg loss: 2.918530, ppl: 18.514051 +epoch: 1, batch: 3257, sum loss: 4273.367676, avg loss: 2.490308, ppl: 12.064986 +epoch: 1, batch: 3258, sum loss: 4074.132568, avg loss: 2.935254, ppl: 18.826283 +epoch: 1, batch: 3259, sum loss: 4555.147461, avg loss: 2.687403, ppl: 14.693463 +epoch: 1, batch: 3260, sum loss: 4651.140137, avg loss: 2.788453, ppl: 16.255857 +epoch: 1, batch: 3261, sum loss: 5228.915039, avg loss: 3.136722, ppl: 23.028248 +epoch: 1, batch: 3262, sum loss: 4673.952637, avg loss: 2.707968, ppl: 14.998763 +epoch: 1, batch: 3263, sum loss: 5535.062500, avg loss: 3.052986, ppl: 21.178482 +epoch: 1, batch: 3264, sum loss: 4297.290527, avg loss: 2.735386, ppl: 15.415688 +epoch: 1, batch: 3265, sum loss: 3729.038086, avg loss: 2.467927, ppl: 11.797967 +epoch: 1, batch: 3266, sum loss: 4940.055176, avg loss: 3.105000, ppl: 22.309225 +epoch: 1, batch: 3267, sum loss: 5049.720215, avg loss: 2.667575, ppl: 14.405000 +epoch: 1, batch: 3268, sum loss: 4074.969238, avg loss: 2.615513, ppl: 13.674231 +epoch: 1, batch: 3269, sum loss: 5362.662598, avg loss: 2.964435, ppl: 19.383751 +epoch: 1, batch: 3270, sum loss: 4836.919922, avg loss: 2.942165, ppl: 18.956850 +epoch: 1, batch: 3271, sum loss: 5521.777344, avg loss: 2.994456, ppl: 19.974497 +epoch: 1, batch: 3272, sum loss: 5585.401367, avg loss: 2.931969, ppl: 18.764545 +epoch: 1, batch: 3273, sum loss: 3585.150146, avg loss: 2.548081, ppl: 12.782553 +epoch: 1, batch: 3274, sum loss: 4950.655762, avg loss: 2.762643, ppl: 15.841655 +epoch: 1, batch: 3275, sum loss: 5368.856934, avg loss: 3.066166, ppl: 21.459478 +epoch: 1, batch: 3276, sum loss: 4506.371094, avg loss: 2.751142, ppl: 15.660506 +epoch: 1, batch: 3277, sum loss: 4939.369629, avg loss: 2.993557, ppl: 19.956551 +epoch: 1, batch: 3278, sum loss: 5034.376953, avg loss: 2.815647, ppl: 16.703981 +epoch: 1, batch: 3279, sum loss: 4596.764648, avg loss: 2.732916, ppl: 15.377661 +epoch: 1, batch: 3280, sum loss: 4258.181641, avg loss: 2.661363, ppl: 14.315793 +epoch: 1, batch: 3281, sum loss: 4656.737305, avg loss: 2.604439, ppl: 13.523640 +epoch: 1, batch: 3282, sum loss: 4427.444336, avg loss: 2.995565, ppl: 19.996649 +epoch: 1, batch: 3283, sum loss: 5226.487305, avg loss: 2.942842, ppl: 18.969681 +epoch: 1, batch: 3284, sum loss: 5402.019531, avg loss: 2.992809, ppl: 19.941612 +epoch: 1, batch: 3285, sum loss: 3678.677490, avg loss: 2.588795, ppl: 13.313715 +epoch: 1, batch: 3286, sum loss: 5124.605469, avg loss: 2.998599, ppl: 20.057413 +epoch: 1, batch: 3287, sum loss: 5164.108398, avg loss: 3.009387, ppl: 20.274973 +epoch: 1, batch: 3288, sum loss: 4455.106445, avg loss: 2.563352, ppl: 12.979256 +epoch: 1, batch: 3289, sum loss: 5926.975586, avg loss: 2.967940, ppl: 19.451805 +epoch: 1, batch: 3290, sum loss: 6226.008301, avg loss: 2.954916, ppl: 19.200109 +epoch: 1, batch: 3291, sum loss: 4457.097656, avg loss: 2.930373, ppl: 18.734625 +epoch: 1, batch: 3292, sum loss: 5345.780273, avg loss: 2.867908, ppl: 17.600157 +epoch: 1, batch: 3293, sum loss: 4835.116211, avg loss: 2.914476, ppl: 18.439156 +epoch: 1, batch: 3294, sum loss: 3775.795410, avg loss: 2.627554, ppl: 13.839879 +epoch: 1, batch: 3295, sum loss: 4015.292480, avg loss: 2.636436, ppl: 13.963356 +epoch: 1, batch: 3296, sum loss: 5133.158203, avg loss: 2.800414, ppl: 16.451450 +epoch: 1, batch: 3297, sum loss: 4173.423340, avg loss: 2.661622, ppl: 14.319497 +epoch: 1, batch: 3298, sum loss: 4920.254395, avg loss: 2.879025, ppl: 17.796921 +epoch: 1, batch: 3299, sum loss: 4622.040527, avg loss: 2.784362, ppl: 16.189484 +epoch: 1, batch: 3300, sum loss: 4879.152344, avg loss: 2.873470, ppl: 17.698330 +epoch: 1, batch: 3301, sum loss: 5239.168945, avg loss: 2.973422, ppl: 19.558731 +epoch: 1, batch: 3302, sum loss: 5553.512695, avg loss: 3.018213, ppl: 20.454712 +epoch: 1, batch: 3303, sum loss: 5054.863770, avg loss: 2.727935, ppl: 15.301258 +epoch: 1, batch: 3304, sum loss: 5991.221191, avg loss: 3.156597, ppl: 23.490530 +epoch: 1, batch: 3305, sum loss: 4282.368164, avg loss: 2.603263, ppl: 13.507744 +epoch: 1, batch: 3306, sum loss: 4934.314453, avg loss: 2.839076, ppl: 17.099962 +epoch: 1, batch: 3307, sum loss: 5096.638184, avg loss: 2.803431, ppl: 16.501169 +epoch: 1, batch: 3308, sum loss: 5352.646484, avg loss: 3.184204, ppl: 24.148050 +epoch: 1, batch: 3309, sum loss: 4389.157227, avg loss: 2.820795, ppl: 16.790195 +epoch: 1, batch: 3310, sum loss: 5640.158691, avg loss: 2.937583, ppl: 18.870176 +epoch: 1, batch: 3311, sum loss: 4079.993408, avg loss: 2.574128, ppl: 13.119877 +epoch: 1, batch: 3312, sum loss: 5024.307129, avg loss: 2.841803, ppl: 17.146650 +epoch: 1, batch: 3313, sum loss: 4673.804199, avg loss: 2.634613, ppl: 13.937922 +epoch: 1, batch: 3314, sum loss: 4838.408203, avg loss: 2.842778, ppl: 17.163378 +epoch: 1, batch: 3315, sum loss: 5768.013184, avg loss: 2.763782, ppl: 15.859715 +epoch: 1, batch: 3316, sum loss: 4303.368652, avg loss: 2.561529, ppl: 12.955613 +epoch: 1, batch: 3317, sum loss: 4482.507324, avg loss: 2.655514, ppl: 14.232300 +epoch: 1, batch: 3318, sum loss: 4268.747559, avg loss: 2.752255, ppl: 15.677945 +epoch: 1, batch: 3319, sum loss: 4358.236816, avg loss: 2.670488, ppl: 14.447022 +epoch: 1, batch: 3320, sum loss: 4766.059082, avg loss: 2.790433, ppl: 16.288067 +epoch: 1, batch: 3321, sum loss: 6064.944824, avg loss: 3.115021, ppl: 22.533897 +epoch: 1, batch: 3322, sum loss: 4926.112793, avg loss: 2.969326, ppl: 19.478796 +epoch: 1, batch: 3323, sum loss: 4785.391602, avg loss: 2.754975, ppl: 15.720649 +epoch: 1, batch: 3324, sum loss: 4408.195312, avg loss: 2.805981, ppl: 16.543291 +epoch: 1, batch: 3325, sum loss: 4732.932617, avg loss: 2.923368, ppl: 18.603840 +epoch: 1, batch: 3326, sum loss: 4511.402344, avg loss: 2.739163, ppl: 15.474034 +epoch: 1, batch: 3327, sum loss: 4485.848633, avg loss: 2.655920, ppl: 14.238079 +epoch: 1, batch: 3328, sum loss: 5366.230469, avg loss: 2.861989, ppl: 17.496302 +epoch: 1, batch: 3329, sum loss: 4477.800781, avg loss: 2.752182, ppl: 15.676805 +epoch: 1, batch: 3330, sum loss: 4667.043945, avg loss: 2.773050, ppl: 16.007387 +epoch: 1, batch: 3331, sum loss: 3871.326172, avg loss: 2.724367, ppl: 15.246765 +epoch: 1, batch: 3332, sum loss: 4085.603027, avg loss: 2.630781, ppl: 13.884612 +epoch: 1, batch: 3333, sum loss: 4580.957520, avg loss: 2.572127, ppl: 13.093640 +epoch: 1, batch: 3334, sum loss: 5686.989258, avg loss: 2.908946, ppl: 18.337461 +epoch: 1, batch: 3335, sum loss: 4843.617188, avg loss: 2.696892, ppl: 14.833550 +epoch: 1, batch: 3336, sum loss: 3918.769043, avg loss: 2.579835, ppl: 13.194957 +epoch: 1, batch: 3337, sum loss: 4598.877930, avg loss: 2.745599, ppl: 15.573940 +epoch: 1, batch: 3338, sum loss: 3667.891113, avg loss: 2.659819, ppl: 14.293707 +epoch: 1, batch: 3339, sum loss: 4586.489258, avg loss: 2.880960, ppl: 17.831392 +epoch: 1, batch: 3340, sum loss: 3804.392090, avg loss: 2.700065, ppl: 14.880705 +epoch: 1, batch: 3341, sum loss: 4490.991211, avg loss: 2.711951, ppl: 15.058630 +epoch: 1, batch: 3342, sum loss: 5146.914551, avg loss: 2.971660, ppl: 19.524296 +epoch: 1, batch: 3343, sum loss: 4942.104492, avg loss: 2.708003, ppl: 14.999286 +epoch: 1, batch: 3344, sum loss: 5069.111328, avg loss: 3.022726, ppl: 20.547224 +epoch: 1, batch: 3345, sum loss: 4565.070801, avg loss: 2.745082, ppl: 15.565885 +epoch: 1, batch: 3346, sum loss: 4266.349609, avg loss: 2.834784, ppl: 17.026722 +epoch: 1, batch: 3347, sum loss: 4639.420898, avg loss: 2.551937, ppl: 12.831930 +epoch: 1, batch: 3348, sum loss: 3510.048584, avg loss: 2.613588, ppl: 13.647933 +epoch: 1, batch: 3349, sum loss: 4209.438965, avg loss: 2.771191, ppl: 15.977651 +epoch: 1, batch: 3350, sum loss: 4731.213867, avg loss: 3.156247, ppl: 23.482300 +epoch: 1, batch: 3351, sum loss: 5747.700195, avg loss: 2.982719, ppl: 19.741428 +epoch: 1, batch: 3352, sum loss: 3348.347412, avg loss: 2.577635, ppl: 13.165958 +epoch: 1, batch: 3353, sum loss: 4991.692383, avg loss: 2.807476, ppl: 16.568048 +epoch: 1, batch: 3354, sum loss: 4460.779297, avg loss: 2.637954, ppl: 13.984555 +epoch: 1, batch: 3355, sum loss: 5221.287598, avg loss: 2.930016, ppl: 18.727922 +epoch: 1, batch: 3356, sum loss: 5054.400879, avg loss: 2.794030, ppl: 16.346771 +epoch: 1, batch: 3357, sum loss: 5053.986328, avg loss: 2.828196, ppl: 16.914919 +epoch: 1, batch: 3358, sum loss: 5182.143555, avg loss: 2.793608, ppl: 16.339874 +epoch: 1, batch: 3359, sum loss: 5653.619141, avg loss: 3.144393, ppl: 23.205589 +epoch: 1, batch: 3360, sum loss: 3523.167480, avg loss: 2.364542, ppl: 10.639165 +epoch: 1, batch: 3361, sum loss: 4561.840820, avg loss: 2.810746, ppl: 16.622313 +epoch: 1, batch: 3362, sum loss: 4369.624512, avg loss: 2.857832, ppl: 17.423706 +epoch: 1, batch: 3363, sum loss: 4318.601562, avg loss: 2.722952, ppl: 15.225199 +epoch: 1, batch: 3364, sum loss: 5170.528809, avg loss: 2.549571, ppl: 12.801605 +epoch: 1, batch: 3365, sum loss: 3977.143066, avg loss: 2.826683, ppl: 16.889347 +epoch: 1, batch: 3366, sum loss: 6056.611816, avg loss: 3.079111, ppl: 21.739069 +epoch: 1, batch: 3367, sum loss: 4562.957031, avg loss: 2.886121, ppl: 17.923645 +epoch: 1, batch: 3368, sum loss: 6126.104980, avg loss: 2.978174, ppl: 19.651909 +epoch: 1, batch: 3369, sum loss: 4911.457520, avg loss: 2.962278, ppl: 19.341990 +epoch: 1, batch: 3370, sum loss: 4917.892090, avg loss: 2.867575, ppl: 17.594307 +epoch: 1, batch: 3371, sum loss: 5153.097656, avg loss: 2.864423, ppl: 17.538935 +epoch: 1, batch: 3372, sum loss: 4887.938965, avg loss: 2.660827, ppl: 14.308116 +epoch: 1, batch: 3373, sum loss: 4975.061035, avg loss: 2.723077, ppl: 15.227098 +epoch: 1, batch: 3374, sum loss: 3988.156738, avg loss: 2.816495, ppl: 16.718149 +epoch: 1, batch: 3375, sum loss: 4351.034668, avg loss: 2.836398, ppl: 17.054228 +epoch: 1, batch: 3376, sum loss: 3977.471924, avg loss: 2.490590, ppl: 12.068393 +epoch: 1, batch: 3377, sum loss: 4727.085449, avg loss: 2.856245, ppl: 17.396082 +epoch: 1, batch: 3378, sum loss: 4833.726562, avg loss: 2.911884, ppl: 18.391407 +epoch: 1, batch: 3379, sum loss: 4849.350586, avg loss: 2.955119, ppl: 19.204010 +epoch: 1, batch: 3380, sum loss: 4115.144531, avg loss: 2.480497, ppl: 11.947199 +epoch: 1, batch: 3381, sum loss: 4933.571777, avg loss: 2.663916, ppl: 14.352378 +epoch: 1, batch: 3382, sum loss: 3805.240723, avg loss: 2.653585, ppl: 14.204871 +epoch: 1, batch: 3383, sum loss: 4209.346680, avg loss: 2.591962, ppl: 13.355951 +epoch: 1, batch: 3384, sum loss: 5411.086426, avg loss: 3.016213, ppl: 20.413841 +epoch: 1, batch: 3385, sum loss: 4937.432129, avg loss: 2.850711, ppl: 17.300083 +epoch: 1, batch: 3386, sum loss: 5239.385254, avg loss: 2.829042, ppl: 16.929230 +epoch: 1, batch: 3387, sum loss: 5203.869629, avg loss: 2.845200, ppl: 17.204992 +epoch: 1, batch: 3388, sum loss: 3963.801270, avg loss: 2.568893, ppl: 13.051365 +epoch: 1, batch: 3389, sum loss: 3910.603516, avg loss: 2.476633, ppl: 11.901127 +epoch: 1, batch: 3390, sum loss: 4764.527344, avg loss: 2.794444, ppl: 16.353539 +epoch: 1, batch: 3391, sum loss: 4533.428711, avg loss: 2.769352, ppl: 15.948292 +epoch: 1, batch: 3392, sum loss: 4895.816406, avg loss: 2.736622, ppl: 15.434756 +epoch: 1, batch: 3393, sum loss: 5652.352051, avg loss: 3.040534, ppl: 20.916401 +epoch: 1, batch: 3394, sum loss: 4991.021484, avg loss: 2.622712, ppl: 13.773030 +epoch: 1, batch: 3395, sum loss: 4476.873047, avg loss: 2.545124, ppl: 12.744809 +epoch: 1, batch: 3396, sum loss: 4093.050537, avg loss: 2.712426, ppl: 15.065780 +epoch: 1, batch: 3397, sum loss: 4982.803223, avg loss: 2.785245, ppl: 16.203787 +epoch: 1, batch: 3398, sum loss: 4631.549316, avg loss: 2.597616, ppl: 13.431682 +epoch: 1, batch: 3399, sum loss: 4986.653320, avg loss: 2.798347, ppl: 16.417479 +epoch: 1, batch: 3400, sum loss: 5106.885742, avg loss: 3.020039, ppl: 20.492092 +epoch: 1, batch: 3401, sum loss: 3861.868408, avg loss: 2.550772, ppl: 12.816991 +epoch: 1, batch: 3402, sum loss: 4750.498535, avg loss: 2.972778, ppl: 19.546135 +epoch: 1, batch: 3403, sum loss: 4850.713379, avg loss: 2.883896, ppl: 17.883816 +epoch: 1, batch: 3404, sum loss: 5015.119141, avg loss: 2.784630, ppl: 16.193830 +epoch: 1, batch: 3405, sum loss: 4844.246094, avg loss: 2.885197, ppl: 17.907099 +epoch: 1, batch: 3406, sum loss: 4185.592773, avg loss: 2.671087, ppl: 14.455671 +epoch: 1, batch: 3407, sum loss: 4194.794434, avg loss: 2.615209, ppl: 13.670068 +epoch: 1, batch: 3408, sum loss: 5416.837891, avg loss: 2.934365, ppl: 18.809555 +epoch: 1, batch: 3409, sum loss: 4634.149414, avg loss: 2.763357, ppl: 15.852967 +epoch: 1, batch: 3410, sum loss: 4676.738281, avg loss: 2.672422, ppl: 14.474984 +epoch: 1, batch: 3411, sum loss: 5730.915527, avg loss: 2.966312, ppl: 19.420174 +epoch: 1, batch: 3412, sum loss: 4718.166992, avg loss: 2.967401, ppl: 19.441317 +epoch: 1, batch: 3413, sum loss: 4449.137207, avg loss: 2.585205, ppl: 13.266003 +epoch: 1, batch: 3414, sum loss: 5769.169434, avg loss: 3.034808, ppl: 20.796978 +epoch: 1, batch: 3415, sum loss: 4345.694336, avg loss: 2.722866, ppl: 15.223892 +epoch: 1, batch: 3416, sum loss: 5172.715820, avg loss: 2.814317, ppl: 16.681786 +epoch: 1, batch: 3417, sum loss: 4814.888184, avg loss: 2.708036, ppl: 14.999790 +epoch: 1, batch: 3418, sum loss: 4917.917969, avg loss: 2.828015, ppl: 16.911860 +epoch: 1, batch: 3419, sum loss: 4591.840332, avg loss: 2.822274, ppl: 16.815048 +epoch: 1, batch: 3420, sum loss: 4624.903320, avg loss: 2.665650, ppl: 14.377297 +epoch: 1, batch: 3421, sum loss: 4612.427734, avg loss: 2.778571, ppl: 16.096001 +epoch: 1, batch: 3422, sum loss: 5214.327637, avg loss: 2.900071, ppl: 18.175434 +epoch: 1, batch: 3423, sum loss: 3957.759277, avg loss: 2.663364, ppl: 14.344465 +epoch: 1, batch: 3424, sum loss: 4774.489258, avg loss: 2.709699, ppl: 15.024751 +epoch: 1, batch: 3425, sum loss: 3774.263672, avg loss: 2.462012, ppl: 11.728380 +epoch: 1, batch: 3426, sum loss: 5386.046387, avg loss: 2.911376, ppl: 18.382084 +epoch: 1, batch: 3427, sum loss: 5748.631836, avg loss: 3.049672, ppl: 21.108423 +epoch: 1, batch: 3428, sum loss: 4141.475586, avg loss: 2.539225, ppl: 12.669847 +epoch: 1, batch: 3429, sum loss: 5337.300293, avg loss: 2.888149, ppl: 17.960043 +epoch: 1, batch: 3430, sum loss: 4352.441406, avg loss: 2.747753, ppl: 15.607529 +epoch: 1, batch: 3431, sum loss: 4708.771484, avg loss: 2.850346, ppl: 17.293766 +epoch: 1, batch: 3432, sum loss: 6035.986328, avg loss: 3.013473, ppl: 20.357981 +epoch: 1, batch: 3433, sum loss: 4956.478516, avg loss: 2.976864, ppl: 19.626175 +epoch: 1, batch: 3434, sum loss: 4171.781250, avg loss: 2.719544, ppl: 15.173408 +epoch: 1, batch: 3435, sum loss: 4569.162109, avg loss: 2.972780, ppl: 19.546173 +epoch: 1, batch: 3436, sum loss: 5806.950195, avg loss: 2.876152, ppl: 17.745848 +epoch: 1, batch: 3437, sum loss: 4135.509766, avg loss: 2.736936, ppl: 15.439600 +epoch: 1, batch: 3438, sum loss: 4576.232422, avg loss: 2.725570, ppl: 15.265116 +epoch: 1, batch: 3439, sum loss: 5109.629883, avg loss: 2.931515, ppl: 18.756016 +epoch: 1, batch: 3440, sum loss: 4622.094238, avg loss: 2.696671, ppl: 14.830279 +epoch: 1, batch: 3441, sum loss: 5926.799316, avg loss: 2.979788, ppl: 19.683649 +epoch: 1, batch: 3442, sum loss: 3545.615479, avg loss: 2.475989, ppl: 11.893459 +epoch: 1, batch: 3443, sum loss: 3612.264893, avg loss: 2.665878, ppl: 14.380571 +epoch: 1, batch: 3444, sum loss: 4695.067383, avg loss: 2.673729, ppl: 14.493912 +epoch: 1, batch: 3445, sum loss: 4980.499023, avg loss: 2.796462, ppl: 16.386574 +epoch: 1, batch: 3446, sum loss: 4585.290039, avg loss: 2.648925, ppl: 14.138836 +epoch: 1, batch: 3447, sum loss: 5026.305664, avg loss: 2.784657, ppl: 16.194258 +epoch: 1, batch: 3448, sum loss: 4873.275879, avg loss: 2.909419, ppl: 18.346128 +epoch: 1, batch: 3449, sum loss: 4288.713867, avg loss: 2.673762, ppl: 14.494392 +epoch: 1, batch: 3450, sum loss: 4825.397461, avg loss: 2.913887, ppl: 18.428295 +epoch: 1, batch: 3451, sum loss: 4621.799316, avg loss: 2.999221, ppl: 20.069893 +epoch: 1, batch: 3452, sum loss: 5346.217773, avg loss: 2.960253, ppl: 19.302864 +epoch: 1, batch: 3453, sum loss: 4660.843262, avg loss: 2.797625, ppl: 16.405634 +epoch: 1, batch: 3454, sum loss: 5574.044434, avg loss: 2.996798, ppl: 20.021326 +epoch: 1, batch: 3455, sum loss: 3820.528809, avg loss: 2.541935, ppl: 12.704232 +epoch: 1, batch: 3456, sum loss: 4976.921875, avg loss: 2.768032, ppl: 15.927259 +epoch: 1, batch: 3457, sum loss: 5232.559082, avg loss: 2.961267, ppl: 19.322443 +epoch: 1, batch: 3458, sum loss: 4719.959961, avg loss: 2.862317, ppl: 17.502026 +epoch: 1, batch: 3459, sum loss: 4005.986572, avg loss: 2.712246, ppl: 15.063065 +epoch: 1, batch: 3460, sum loss: 5219.872559, avg loss: 2.770633, ppl: 15.968742 +epoch: 1, batch: 3461, sum loss: 5267.810547, avg loss: 2.966110, ppl: 19.416233 +epoch: 1, batch: 3462, sum loss: 4866.541504, avg loss: 2.859308, ppl: 17.449440 +epoch: 1, batch: 3463, sum loss: 5239.562988, avg loss: 2.832196, ppl: 16.982718 +epoch: 1, batch: 3464, sum loss: 4937.464355, avg loss: 2.899274, ppl: 18.160961 +epoch: 1, batch: 3465, sum loss: 4220.489258, avg loss: 2.776638, ppl: 16.064917 +epoch: 1, batch: 3466, sum loss: 4753.238281, avg loss: 2.598818, ppl: 13.447838 +epoch: 1, batch: 3467, sum loss: 5230.908203, avg loss: 2.942018, ppl: 18.954062 +epoch: 1, batch: 3468, sum loss: 4720.530273, avg loss: 2.791562, ppl: 16.306463 +epoch: 1, batch: 3469, sum loss: 4436.772461, avg loss: 2.707000, ppl: 14.984248 +epoch: 1, batch: 3470, sum loss: 4200.579590, avg loss: 2.740104, ppl: 15.488599 +epoch: 1, batch: 3471, sum loss: 4568.306641, avg loss: 2.844525, ppl: 17.193384 +epoch: 1, batch: 3472, sum loss: 4220.527832, avg loss: 2.627975, ppl: 13.845704 +epoch: 1, batch: 3473, sum loss: 5807.792969, avg loss: 3.050312, ppl: 21.121923 +epoch: 1, batch: 3474, sum loss: 4898.709473, avg loss: 2.999822, ppl: 20.081961 +epoch: 1, batch: 3475, sum loss: 5295.792969, avg loss: 2.951947, ppl: 19.143194 +epoch: 1, batch: 3476, sum loss: 4747.504883, avg loss: 2.797587, ppl: 16.405012 +epoch: 1, batch: 3477, sum loss: 4182.580078, avg loss: 2.599490, ppl: 13.456879 +epoch: 1, batch: 3478, sum loss: 5317.206543, avg loss: 3.057623, ppl: 21.276924 +epoch: 1, batch: 3479, sum loss: 4360.549316, avg loss: 2.735602, ppl: 15.419022 +epoch: 1, batch: 3480, sum loss: 3712.080811, avg loss: 2.368909, ppl: 10.685732 +epoch: 1, batch: 3481, sum loss: 5119.233398, avg loss: 3.231839, ppl: 25.326193 +epoch: 1, batch: 3482, sum loss: 5049.475586, avg loss: 2.851200, ppl: 17.308542 +epoch: 1, batch: 3483, sum loss: 5027.027344, avg loss: 3.095460, ppl: 22.097404 +epoch: 1, batch: 3484, sum loss: 4572.236816, avg loss: 2.561477, ppl: 12.954943 +epoch: 1, batch: 3485, sum loss: 4454.477051, avg loss: 2.716145, ppl: 15.121908 +epoch: 1, batch: 3486, sum loss: 4473.384277, avg loss: 2.721037, ppl: 15.196068 +epoch: 1, batch: 3487, sum loss: 4571.487793, avg loss: 2.955066, ppl: 19.202999 +epoch: 1, batch: 3488, sum loss: 4665.418945, avg loss: 2.871027, ppl: 17.655140 +epoch: 1, batch: 3489, sum loss: 3794.503906, avg loss: 2.716180, ppl: 15.122449 +epoch: 1, batch: 3490, sum loss: 5053.071289, avg loss: 2.811949, ppl: 16.642315 +epoch: 1, batch: 3491, sum loss: 5353.622559, avg loss: 3.028067, ppl: 20.657267 +epoch: 1, batch: 3492, sum loss: 4952.876465, avg loss: 2.894726, ppl: 18.078552 +epoch: 1, batch: 3493, sum loss: 5150.407227, avg loss: 2.815969, ppl: 16.709364 +epoch: 1, batch: 3494, sum loss: 5282.622070, avg loss: 2.989599, ppl: 19.877720 +epoch: 1, batch: 3495, sum loss: 3955.174805, avg loss: 2.638542, ppl: 13.992793 +epoch: 1, batch: 3496, sum loss: 4012.873779, avg loss: 2.615954, ppl: 13.680264 +epoch: 1, batch: 3497, sum loss: 4690.355469, avg loss: 2.734901, ppl: 15.408217 +epoch: 1, batch: 3498, sum loss: 4348.775879, avg loss: 2.735079, ppl: 15.410961 +epoch: 1, batch: 3499, sum loss: 4881.798340, avg loss: 2.944390, ppl: 18.999065 +epoch: 1, batch: 3500, sum loss: 4195.446777, avg loss: 2.658711, ppl: 14.277880 +epoch: 1, batch: 3501, sum loss: 5835.842773, avg loss: 3.119104, ppl: 22.626091 +epoch: 1, batch: 3502, sum loss: 3682.846191, avg loss: 2.595381, ppl: 13.401699 +epoch: 1, batch: 3503, sum loss: 3671.933594, avg loss: 2.584049, ppl: 13.250682 +epoch: 1, batch: 3504, sum loss: 4450.957031, avg loss: 2.681299, ppl: 14.604058 +epoch: 1, batch: 3505, sum loss: 4384.865234, avg loss: 2.633553, ppl: 13.923148 +epoch: 1, batch: 3506, sum loss: 4440.528320, avg loss: 2.687971, ppl: 14.701821 +epoch: 1, batch: 3507, sum loss: 3983.489746, avg loss: 2.495921, ppl: 12.132901 +epoch: 1, batch: 3508, sum loss: 4392.272949, avg loss: 2.591311, ppl: 13.347264 +epoch: 1, batch: 3509, sum loss: 5092.479492, avg loss: 2.909988, ppl: 18.356581 +epoch: 1, batch: 3510, sum loss: 4614.449219, avg loss: 2.692211, ppl: 14.764278 +epoch: 1, batch: 3511, sum loss: 4783.320801, avg loss: 2.764925, ppl: 15.877853 +epoch: 1, batch: 3512, sum loss: 4984.104492, avg loss: 2.919803, ppl: 18.537642 +epoch: 1, batch: 3513, sum loss: 5017.036621, avg loss: 2.771844, ppl: 15.988084 +epoch: 1, batch: 3514, sum loss: 3687.409180, avg loss: 2.515286, ppl: 12.370146 +epoch: 1, batch: 3515, sum loss: 4318.810059, avg loss: 2.682491, ppl: 14.621464 +epoch: 1, batch: 3516, sum loss: 4204.349121, avg loss: 2.522105, ppl: 12.454786 +epoch: 1, batch: 3517, sum loss: 4482.292969, avg loss: 2.685616, ppl: 14.667233 +epoch: 1, batch: 3518, sum loss: 4789.775391, avg loss: 2.880202, ppl: 17.817865 +epoch: 1, batch: 3519, sum loss: 4653.925293, avg loss: 2.707345, ppl: 14.989422 +epoch: 1, batch: 3520, sum loss: 4195.521484, avg loss: 2.580271, ppl: 13.200722 +epoch: 1, batch: 3521, sum loss: 4548.438477, avg loss: 2.721986, ppl: 15.210501 +epoch: 1, batch: 3522, sum loss: 4366.681641, avg loss: 2.774258, ppl: 16.026726 +epoch: 1, batch: 3523, sum loss: 4720.443848, avg loss: 2.857412, ppl: 17.416388 +epoch: 1, batch: 3524, sum loss: 4491.724121, avg loss: 2.733855, ppl: 15.392110 +epoch: 1, batch: 3525, sum loss: 4591.825195, avg loss: 2.646585, ppl: 14.105788 +epoch: 1, batch: 3526, sum loss: 3935.528076, avg loss: 2.701117, ppl: 14.896366 +epoch: 1, batch: 3527, sum loss: 4832.570801, avg loss: 2.701269, ppl: 14.898632 +epoch: 1, batch: 3528, sum loss: 4854.574707, avg loss: 2.917413, ppl: 18.493380 +epoch: 1, batch: 3529, sum loss: 4411.680176, avg loss: 2.654441, ppl: 14.217031 +epoch: 1, batch: 3530, sum loss: 3833.067139, avg loss: 2.634410, ppl: 13.935094 +epoch: 1, batch: 3531, sum loss: 4472.460449, avg loss: 2.708941, ppl: 15.013361 +epoch: 1, batch: 3532, sum loss: 4694.422852, avg loss: 2.769571, ppl: 15.951790 +epoch: 1, batch: 3533, sum loss: 5014.484375, avg loss: 2.844290, ppl: 17.189358 +epoch: 1, batch: 3534, sum loss: 4506.244629, avg loss: 2.839474, ppl: 17.106760 +epoch: 1, batch: 3535, sum loss: 4960.618164, avg loss: 2.911161, ppl: 18.378122 +epoch: 1, batch: 3536, sum loss: 4803.809082, avg loss: 2.873092, ppl: 17.691631 +epoch: 1, batch: 3537, sum loss: 5218.189941, avg loss: 2.772683, ppl: 16.001511 +epoch: 1, batch: 3538, sum loss: 3656.401855, avg loss: 2.378921, ppl: 10.793254 +epoch: 1, batch: 3539, sum loss: 4445.165527, avg loss: 2.468165, ppl: 11.800775 +epoch: 1, batch: 3540, sum loss: 5643.521973, avg loss: 3.149287, ppl: 23.319431 +epoch: 1, batch: 3541, sum loss: 4775.839355, avg loss: 2.878746, ppl: 17.791945 +epoch: 1, batch: 3542, sum loss: 4287.983398, avg loss: 2.746947, ppl: 15.594941 +epoch: 1, batch: 3543, sum loss: 5403.638184, avg loss: 3.107325, ppl: 22.361151 +epoch: 1, batch: 3544, sum loss: 4378.095215, avg loss: 2.850322, ppl: 17.293358 +epoch: 1, batch: 3545, sum loss: 4806.114258, avg loss: 2.872752, ppl: 17.685625 +epoch: 1, batch: 3546, sum loss: 4308.598633, avg loss: 2.887801, ppl: 17.953781 +epoch: 1, batch: 3547, sum loss: 4303.739258, avg loss: 2.814741, ppl: 16.688860 +epoch: 1, batch: 3548, sum loss: 4393.988281, avg loss: 2.724109, ppl: 15.242833 +epoch: 1, batch: 3549, sum loss: 4659.524902, avg loss: 3.021741, ppl: 20.527000 +epoch: 1, batch: 3550, sum loss: 4083.778320, avg loss: 2.584670, ppl: 13.258910 +epoch: 1, batch: 3551, sum loss: 4435.570312, avg loss: 2.633949, ppl: 13.928670 +epoch: 1, batch: 3552, sum loss: 5724.374023, avg loss: 2.889638, ppl: 17.986805 +epoch: 1, batch: 3553, sum loss: 4899.944336, avg loss: 2.719170, ppl: 15.167729 +epoch: 1, batch: 3554, sum loss: 4644.282227, avg loss: 2.882857, ppl: 17.865238 +epoch: 1, batch: 3555, sum loss: 4823.620605, avg loss: 2.862683, ppl: 17.508436 +epoch: 1, batch: 3556, sum loss: 4783.685059, avg loss: 2.672450, ppl: 14.475385 +epoch: 1, batch: 3557, sum loss: 4907.820312, avg loss: 2.711503, ppl: 15.051882 +epoch: 1, batch: 3558, sum loss: 4385.570312, avg loss: 2.640319, ppl: 14.017679 +epoch: 1, batch: 3559, sum loss: 4845.739258, avg loss: 2.894707, ppl: 18.078203 +epoch: 1, batch: 3560, sum loss: 3603.213135, avg loss: 2.459531, ppl: 11.699324 +epoch: 1, batch: 3561, sum loss: 4226.602051, avg loss: 2.685262, ppl: 14.662038 +epoch: 1, batch: 3562, sum loss: 4296.026855, avg loss: 2.691746, ppl: 14.757423 +epoch: 1, batch: 3563, sum loss: 4582.056641, avg loss: 2.770288, ppl: 15.963234 +epoch: 1, batch: 3564, sum loss: 5219.770996, avg loss: 3.182787, ppl: 24.113871 +epoch: 1, batch: 3565, sum loss: 4652.767578, avg loss: 2.872079, ppl: 17.673721 +epoch: 1, batch: 3566, sum loss: 4503.582520, avg loss: 2.734416, ppl: 15.400740 +epoch: 1, batch: 3567, sum loss: 4286.405762, avg loss: 2.779770, ppl: 16.115320 +epoch: 1, batch: 3568, sum loss: 4400.701172, avg loss: 2.647835, ppl: 14.123425 +epoch: 1, batch: 3569, sum loss: 4698.337891, avg loss: 2.652929, ppl: 14.195561 +epoch: 1, batch: 3570, sum loss: 4190.653809, avg loss: 2.624079, ppl: 13.791862 +epoch: 1, batch: 3571, sum loss: 4725.952637, avg loss: 3.092901, ppl: 22.040926 +epoch: 1, batch: 3572, sum loss: 5251.498047, avg loss: 2.893388, ppl: 18.054367 +epoch: 1, batch: 3573, sum loss: 5343.977051, avg loss: 2.863868, ppl: 17.529190 +epoch: 1, batch: 3574, sum loss: 4493.731445, avg loss: 2.705437, ppl: 14.960859 +epoch: 1, batch: 3575, sum loss: 4809.126953, avg loss: 2.796004, ppl: 16.379066 +epoch: 1, batch: 3576, sum loss: 4337.357910, avg loss: 2.717643, ppl: 15.144581 +epoch: 1, batch: 3577, sum loss: 4614.686035, avg loss: 2.781607, ppl: 16.144943 +epoch: 1, batch: 3578, sum loss: 4709.490723, avg loss: 2.838753, ppl: 17.094431 +epoch: 1, batch: 3579, sum loss: 4035.972656, avg loss: 2.559273, ppl: 12.926411 +epoch: 1, batch: 3580, sum loss: 4030.310547, avg loss: 2.683296, ppl: 14.633245 +epoch: 1, batch: 3581, sum loss: 5128.697266, avg loss: 3.013336, ppl: 20.355186 +epoch: 1, batch: 3582, sum loss: 5375.247070, avg loss: 2.910259, ppl: 18.361546 +epoch: 1, batch: 3583, sum loss: 4329.813477, avg loss: 2.797037, ppl: 16.395996 +epoch: 1, batch: 3584, sum loss: 6928.438477, avg loss: 3.299256, ppl: 27.092484 +epoch: 1, batch: 3585, sum loss: 4264.401367, avg loss: 2.965509, ppl: 19.404572 +epoch: 1, batch: 3586, sum loss: 4145.045410, avg loss: 2.519784, ppl: 12.425915 +epoch: 1, batch: 3587, sum loss: 4134.740723, avg loss: 2.572956, ppl: 13.104508 +epoch: 1, batch: 3588, sum loss: 4736.317871, avg loss: 2.771397, ppl: 15.980949 +epoch: 1, batch: 3589, sum loss: 4493.852539, avg loss: 2.780849, ppl: 16.132715 +epoch: 1, batch: 3590, sum loss: 3837.266602, avg loss: 2.672191, ppl: 14.471643 +epoch: 1, batch: 3591, sum loss: 4049.869385, avg loss: 2.589431, ppl: 13.322184 +epoch: 1, batch: 3592, sum loss: 5206.520996, avg loss: 2.905425, ppl: 18.273005 +epoch: 1, batch: 3593, sum loss: 2893.363770, avg loss: 2.415162, ppl: 11.191579 +epoch: 1, batch: 3594, sum loss: 4522.686523, avg loss: 2.803897, ppl: 16.508863 +epoch: 1, batch: 3595, sum loss: 4195.274414, avg loss: 2.903304, ppl: 18.234293 +epoch: 1, batch: 3596, sum loss: 4817.929199, avg loss: 2.762574, ppl: 15.840564 +epoch: 1, batch: 3597, sum loss: 4363.427734, avg loss: 2.551712, ppl: 12.829049 +epoch: 1, batch: 3598, sum loss: 4507.084961, avg loss: 2.879926, ppl: 17.812963 +epoch: 1, batch: 3599, sum loss: 3916.171387, avg loss: 2.559589, ppl: 12.930504 +epoch: 1, batch: 3600, sum loss: 4828.158691, avg loss: 2.664547, ppl: 14.361439 +epoch: 1, batch: 3601, sum loss: 4869.321777, avg loss: 3.157796, ppl: 23.518707 +epoch: 1, batch: 3602, sum loss: 5387.184082, avg loss: 2.811683, ppl: 16.637892 +epoch: 1, batch: 3603, sum loss: 4175.469727, avg loss: 2.617849, ppl: 13.706215 +epoch: 1, batch: 3604, sum loss: 5214.949219, avg loss: 2.826531, ppl: 16.886774 +epoch: 1, batch: 3605, sum loss: 5167.001465, avg loss: 2.961032, ppl: 19.317905 +epoch: 1, batch: 3606, sum loss: 3740.977539, avg loss: 2.740643, ppl: 15.496946 +epoch: 1, batch: 3607, sum loss: 4999.169922, avg loss: 3.022473, ppl: 20.542027 +epoch: 1, batch: 3608, sum loss: 5484.140625, avg loss: 2.838582, ppl: 17.091513 +epoch: 1, batch: 3609, sum loss: 5279.957520, avg loss: 2.857120, ppl: 17.411306 +epoch: 1, batch: 3610, sum loss: 4645.678711, avg loss: 2.696273, ppl: 14.824383 +epoch: 1, batch: 3611, sum loss: 5568.054688, avg loss: 2.886498, ppl: 17.930407 +epoch: 1, batch: 3612, sum loss: 4419.730469, avg loss: 2.718161, ppl: 15.152436 +epoch: 1, batch: 3613, sum loss: 5317.798340, avg loss: 3.066781, ppl: 21.472672 +epoch: 1, batch: 3614, sum loss: 6106.950195, avg loss: 3.123760, ppl: 22.731684 +epoch: 1, batch: 3615, sum loss: 4806.315918, avg loss: 2.884943, ppl: 17.902555 +epoch: 1, batch: 3616, sum loss: 5133.303711, avg loss: 2.546282, ppl: 12.759573 +epoch: 1, batch: 3617, sum loss: 3924.812500, avg loss: 2.614799, ppl: 13.664463 +epoch: 1, batch: 3618, sum loss: 4113.505371, avg loss: 2.625083, ppl: 13.805726 +epoch: 1, batch: 3619, sum loss: 5039.605957, avg loss: 2.847235, ppl: 17.240047 +epoch: 1, batch: 3620, sum loss: 4621.833984, avg loss: 2.849466, ppl: 17.278555 +epoch: 1, batch: 3621, sum loss: 4416.793945, avg loss: 2.622799, ppl: 13.774229 +epoch: 1, batch: 3622, sum loss: 4689.293457, avg loss: 2.655319, ppl: 14.229525 +epoch: 1, batch: 3623, sum loss: 4499.195801, avg loss: 2.927258, ppl: 18.676353 +epoch: 1, batch: 3624, sum loss: 4575.960449, avg loss: 2.715703, ppl: 15.115240 +epoch: 1, batch: 3625, sum loss: 5141.979004, avg loss: 2.895258, ppl: 18.088175 +epoch: 1, batch: 3626, sum loss: 4708.312988, avg loss: 2.834625, ppl: 17.024023 +epoch: 1, batch: 3627, sum loss: 4929.378906, avg loss: 2.842779, ppl: 17.163395 +epoch: 1, batch: 3628, sum loss: 5031.941406, avg loss: 2.966947, ppl: 19.432493 +epoch: 1, batch: 3629, sum loss: 5131.844727, avg loss: 2.822797, ppl: 16.823837 +epoch: 1, batch: 3630, sum loss: 4708.293457, avg loss: 2.815965, ppl: 16.709295 +epoch: 1, batch: 3631, sum loss: 4467.314941, avg loss: 2.818495, ppl: 16.751625 +epoch: 1, batch: 3632, sum loss: 4377.066895, avg loss: 2.639968, ppl: 14.012754 +epoch: 1, batch: 3633, sum loss: 4000.024414, avg loss: 2.580661, ppl: 13.205865 +epoch: 1, batch: 3634, sum loss: 3953.955566, avg loss: 2.596163, ppl: 13.412170 +epoch: 1, batch: 3635, sum loss: 5613.573730, avg loss: 2.908587, ppl: 18.330887 +epoch: 1, batch: 3636, sum loss: 4569.331055, avg loss: 2.786177, ppl: 16.218903 +epoch: 1, batch: 3637, sum loss: 4551.608398, avg loss: 2.946025, ppl: 19.030157 +epoch: 1, batch: 3638, sum loss: 4079.182861, avg loss: 2.457339, ppl: 11.673707 +epoch: 1, batch: 3639, sum loss: 6949.180664, avg loss: 3.126037, ppl: 22.783512 +epoch: 1, batch: 3640, sum loss: 5400.859375, avg loss: 3.030785, ppl: 20.713493 +epoch: 1, batch: 3641, sum loss: 4952.427734, avg loss: 2.913193, ppl: 18.415504 +epoch: 1, batch: 3642, sum loss: 4847.645508, avg loss: 2.741881, ppl: 15.516146 +epoch: 1, batch: 3643, sum loss: 5299.715332, avg loss: 3.137783, ppl: 23.052704 +epoch: 1, batch: 3644, sum loss: 4539.723633, avg loss: 2.769813, ppl: 15.955651 +epoch: 1, batch: 3645, sum loss: 4759.708984, avg loss: 3.068800, ppl: 21.516068 +epoch: 1, batch: 3646, sum loss: 4883.732910, avg loss: 2.798701, ppl: 16.423296 +epoch: 1, batch: 3647, sum loss: 3908.785645, avg loss: 2.340590, ppl: 10.387365 +epoch: 1, batch: 3648, sum loss: 4417.061035, avg loss: 2.873820, ppl: 17.704517 +epoch: 1, batch: 3649, sum loss: 4750.483398, avg loss: 2.956119, ppl: 19.223227 +epoch: 1, batch: 3650, sum loss: 4176.470703, avg loss: 2.461091, ppl: 11.717584 +epoch: 1, batch: 3651, sum loss: 4068.656250, avg loss: 2.601443, ppl: 13.483174 +epoch: 1, batch: 3652, sum loss: 3958.119141, avg loss: 2.660026, ppl: 14.296665 +epoch: 1, batch: 3653, sum loss: 4198.641113, avg loss: 2.594957, ppl: 13.396016 +epoch: 1, batch: 3654, sum loss: 5067.253906, avg loss: 3.019818, ppl: 20.487555 +epoch: 1, batch: 3655, sum loss: 4584.089355, avg loss: 2.819243, ppl: 16.764158 +epoch: 1, batch: 3656, sum loss: 5077.007812, avg loss: 2.742846, ppl: 15.531124 +epoch: 1, batch: 3657, sum loss: 4855.684082, avg loss: 2.675308, ppl: 14.516824 +epoch: 1, batch: 3658, sum loss: 3588.642822, avg loss: 2.457975, ppl: 11.681129 +epoch: 1, batch: 3659, sum loss: 4983.850586, avg loss: 2.591706, ppl: 13.352529 +epoch: 1, batch: 3660, sum loss: 5169.529785, avg loss: 2.912411, ppl: 18.401110 +epoch: 1, batch: 3661, sum loss: 4272.902344, avg loss: 2.706081, ppl: 14.970493 +epoch: 1, batch: 3662, sum loss: 3904.797363, avg loss: 2.451222, ppl: 11.602521 +epoch: 1, batch: 3663, sum loss: 4401.164062, avg loss: 2.652902, ppl: 14.195168 +epoch: 1, batch: 3664, sum loss: 5066.914551, avg loss: 2.874030, ppl: 17.708237 +epoch: 1, batch: 3665, sum loss: 5210.898926, avg loss: 2.962421, ppl: 19.344757 +epoch: 1, batch: 3666, sum loss: 3361.293213, avg loss: 2.449922, ppl: 11.587446 +epoch: 1, batch: 3667, sum loss: 5075.494141, avg loss: 2.770466, ppl: 15.966078 +epoch: 1, batch: 3668, sum loss: 4364.054199, avg loss: 2.692199, ppl: 14.764103 +epoch: 1, batch: 3669, sum loss: 4508.963867, avg loss: 2.668026, ppl: 14.411492 +epoch: 1, batch: 3670, sum loss: 5147.307617, avg loss: 2.916322, ppl: 18.473209 +epoch: 1, batch: 3671, sum loss: 5073.543945, avg loss: 2.842322, ppl: 17.155548 +epoch: 1, batch: 3672, sum loss: 4277.229004, avg loss: 2.507168, ppl: 12.270132 +epoch: 1, batch: 3673, sum loss: 6464.372070, avg loss: 3.187560, ppl: 24.229244 +epoch: 1, batch: 3674, sum loss: 4355.209473, avg loss: 2.639521, ppl: 14.006492 +epoch: 1, batch: 3675, sum loss: 4975.164551, avg loss: 2.991680, ppl: 19.919127 +epoch: 1, batch: 3676, sum loss: 3872.782715, avg loss: 2.493743, ppl: 12.106505 +epoch: 1, batch: 3677, sum loss: 5195.522461, avg loss: 3.245173, ppl: 25.666162 +epoch: 1, batch: 3678, sum loss: 4524.278320, avg loss: 2.806624, ppl: 16.553940 +epoch: 1, batch: 3679, sum loss: 4201.749023, avg loss: 2.626093, ppl: 13.819673 +epoch: 1, batch: 3680, sum loss: 3793.570801, avg loss: 2.656562, ppl: 14.247228 +epoch: 1, batch: 3681, sum loss: 4615.429199, avg loss: 2.884643, ppl: 17.897179 +epoch: 1, batch: 3682, sum loss: 4468.596680, avg loss: 2.871849, ppl: 17.669657 +epoch: 1, batch: 3683, sum loss: 4684.052734, avg loss: 2.896755, ppl: 18.115265 +epoch: 1, batch: 3684, sum loss: 4357.570312, avg loss: 2.895396, ppl: 18.090660 +epoch: 1, batch: 3685, sum loss: 4909.128418, avg loss: 2.829469, ppl: 16.936464 +epoch: 1, batch: 3686, sum loss: 5797.548828, avg loss: 2.960955, ppl: 19.316404 +epoch: 1, batch: 3687, sum loss: 5116.937988, avg loss: 2.944153, ppl: 18.994568 +epoch: 1, batch: 3688, sum loss: 3982.956055, avg loss: 2.869565, ppl: 17.629343 +epoch: 1, batch: 3689, sum loss: 5405.536133, avg loss: 2.644587, ppl: 14.077630 +epoch: 1, batch: 3690, sum loss: 4286.724609, avg loss: 2.744382, ppl: 15.554996 +epoch: 1, batch: 3691, sum loss: 4028.696777, avg loss: 2.536963, ppl: 12.641218 +epoch: 1, batch: 3692, sum loss: 5267.363770, avg loss: 2.638960, ppl: 13.998636 +epoch: 1, batch: 3693, sum loss: 4934.298828, avg loss: 2.829300, ppl: 16.933598 +epoch: 1, batch: 3694, sum loss: 5385.131836, avg loss: 2.832789, ppl: 16.992786 +epoch: 1, batch: 3695, sum loss: 4338.705566, avg loss: 2.590272, ppl: 13.333401 +epoch: 1, batch: 3696, sum loss: 5718.014648, avg loss: 2.909931, ppl: 18.355536 +epoch: 1, batch: 3697, sum loss: 4932.691895, avg loss: 2.874529, ppl: 17.717079 +epoch: 1, batch: 3698, sum loss: 4819.649902, avg loss: 2.739994, ppl: 15.486897 +epoch: 1, batch: 3699, sum loss: 5352.110352, avg loss: 2.781762, ppl: 16.147449 +epoch: 1, batch: 3700, sum loss: 4996.364258, avg loss: 3.028100, ppl: 20.657936 +epoch: 1, batch: 3701, sum loss: 5197.299805, avg loss: 2.806317, ppl: 16.548862 +epoch: 1, batch: 3702, sum loss: 4483.410156, avg loss: 2.777825, ppl: 16.084005 +epoch: 1, batch: 3703, sum loss: 5616.073242, avg loss: 2.943434, ppl: 18.980925 +epoch: 1, batch: 3704, sum loss: 4211.154297, avg loss: 2.675447, ppl: 14.518842 +epoch: 1, batch: 3705, sum loss: 3959.909668, avg loss: 2.603491, ppl: 13.510823 +epoch: 1, batch: 3706, sum loss: 4419.486328, avg loss: 2.790080, ppl: 16.282320 +epoch: 1, batch: 3707, sum loss: 3760.173340, avg loss: 2.513485, ppl: 12.347887 +epoch: 1, batch: 3708, sum loss: 5113.612793, avg loss: 2.910423, ppl: 18.364557 +epoch: 1, batch: 3709, sum loss: 4712.146973, avg loss: 2.921356, ppl: 18.566446 +epoch: 1, batch: 3710, sum loss: 4996.671875, avg loss: 3.071095, ppl: 21.565510 +epoch: 1, batch: 3711, sum loss: 4349.784180, avg loss: 2.756517, ppl: 15.744910 +epoch: 1, batch: 3712, sum loss: 5116.352051, avg loss: 2.887332, ppl: 17.945366 +epoch: 1, batch: 3713, sum loss: 4239.847656, avg loss: 2.586850, ppl: 13.287851 +epoch: 1, batch: 3714, sum loss: 4322.656250, avg loss: 2.676567, ppl: 14.535113 +epoch: 1, batch: 3715, sum loss: 4389.431641, avg loss: 2.719598, ppl: 15.174226 +epoch: 1, batch: 3716, sum loss: 4076.380859, avg loss: 2.724854, ppl: 15.254179 +epoch: 1, batch: 3717, sum loss: 6071.772461, avg loss: 3.129780, ppl: 22.868944 +epoch: 1, batch: 3718, sum loss: 4822.584473, avg loss: 2.845183, ppl: 17.204700 +epoch: 1, batch: 3719, sum loss: 5649.131348, avg loss: 2.847344, ppl: 17.241934 +epoch: 1, batch: 3720, sum loss: 5301.998535, avg loss: 2.818713, ppl: 16.755268 +epoch: 1, batch: 3721, sum loss: 3505.516113, avg loss: 2.618011, ppl: 13.708424 +epoch: 1, batch: 3722, sum loss: 4781.796387, avg loss: 2.652133, ppl: 14.184264 +epoch: 1, batch: 3723, sum loss: 4374.744141, avg loss: 2.582494, ppl: 13.230087 +epoch: 1, batch: 3724, sum loss: 4565.060059, avg loss: 2.920704, ppl: 18.554342 +epoch: 1, batch: 3725, sum loss: 5023.428711, avg loss: 2.948022, ppl: 19.068193 +epoch: 1, batch: 3726, sum loss: 4892.534668, avg loss: 2.792543, ppl: 16.322470 +epoch: 1, batch: 3727, sum loss: 4968.603516, avg loss: 2.740543, ppl: 15.495392 +epoch: 1, batch: 3728, sum loss: 5104.171387, avg loss: 2.964095, ppl: 19.377157 +epoch: 1, batch: 3729, sum loss: 4437.000000, avg loss: 2.755901, ppl: 15.735206 +epoch: 1, batch: 3730, sum loss: 5085.132324, avg loss: 2.818810, ppl: 16.756889 +epoch: 1, batch: 3731, sum loss: 4421.199707, avg loss: 2.580969, ppl: 13.209930 +epoch: 1, batch: 3732, sum loss: 3910.142578, avg loss: 2.661772, ppl: 14.321638 +epoch: 1, batch: 3733, sum loss: 4697.352051, avg loss: 2.792718, ppl: 16.325335 +epoch: 1, batch: 3734, sum loss: 4599.283203, avg loss: 2.660083, ppl: 14.297473 +epoch: 1, batch: 3735, sum loss: 4888.840820, avg loss: 2.792028, ppl: 16.314070 +epoch: 1, batch: 3736, sum loss: 4674.220703, avg loss: 2.704989, ppl: 14.954151 +epoch: 1, batch: 3737, sum loss: 6010.284180, avg loss: 3.096488, ppl: 22.120140 +epoch: 1, batch: 3738, sum loss: 6026.678711, avg loss: 3.098550, ppl: 22.165779 +epoch: 1, batch: 3739, sum loss: 4580.399902, avg loss: 2.827407, ppl: 16.901585 +epoch: 1, batch: 3740, sum loss: 4274.150391, avg loss: 2.826819, ppl: 16.891645 +epoch: 1, batch: 3741, sum loss: 5220.475098, avg loss: 2.846497, ppl: 17.227322 +epoch: 1, batch: 3742, sum loss: 5532.402344, avg loss: 2.941203, ppl: 18.938608 +epoch: 1, batch: 3743, sum loss: 4252.098145, avg loss: 2.739754, ppl: 15.483179 +epoch: 1, batch: 3744, sum loss: 4142.920898, avg loss: 2.665972, ppl: 14.381926 +epoch: 1, batch: 3745, sum loss: 3951.591797, avg loss: 2.601443, ppl: 13.483178 +epoch: 1, batch: 3746, sum loss: 4157.703125, avg loss: 2.781072, ppl: 16.136312 +epoch: 1, batch: 3747, sum loss: 5473.229004, avg loss: 2.933134, ppl: 18.786421 +epoch: 1, batch: 3748, sum loss: 3911.264893, avg loss: 2.669805, ppl: 14.437158 +epoch: 1, batch: 3749, sum loss: 5269.310547, avg loss: 2.770405, ppl: 15.965103 +epoch: 1, batch: 3750, sum loss: 3782.088379, avg loss: 2.545147, ppl: 12.745101 +epoch: 1, batch: 3751, sum loss: 4116.087891, avg loss: 2.693775, ppl: 14.787393 +epoch: 1, batch: 3752, sum loss: 3967.877441, avg loss: 2.548412, ppl: 12.786783 +epoch: 1, batch: 3753, sum loss: 4215.916992, avg loss: 2.633302, ppl: 13.919663 +epoch: 1, batch: 3754, sum loss: 3772.612305, avg loss: 2.401408, ppl: 11.038713 +epoch: 1, batch: 3755, sum loss: 4335.238281, avg loss: 2.766585, ppl: 15.904226 +epoch: 1, batch: 3756, sum loss: 4458.251465, avg loss: 2.569597, ppl: 13.060566 +epoch: 1, batch: 3757, sum loss: 5168.638184, avg loss: 2.844600, ppl: 17.194679 +epoch: 1, batch: 3758, sum loss: 5039.510742, avg loss: 2.891286, ppl: 18.016459 +epoch: 1, batch: 3759, sum loss: 5471.386719, avg loss: 2.924312, ppl: 18.621403 +epoch: 1, batch: 3760, sum loss: 4896.677734, avg loss: 2.711339, ppl: 15.049410 +epoch: 1, batch: 3761, sum loss: 4820.387207, avg loss: 3.084061, ppl: 21.846941 +epoch: 1, batch: 3762, sum loss: 5483.505859, avg loss: 3.160522, ppl: 23.582909 +epoch: 1, batch: 3763, sum loss: 3998.964111, avg loss: 2.568378, ppl: 13.044645 +epoch: 1, batch: 3764, sum loss: 4533.637695, avg loss: 2.739358, ppl: 15.477049 +epoch: 1, batch: 3765, sum loss: 4268.976562, avg loss: 2.693361, ppl: 14.781266 +epoch: 1, batch: 3766, sum loss: 5483.538574, avg loss: 2.838270, ppl: 17.086187 +epoch: 1, batch: 3767, sum loss: 4755.154297, avg loss: 3.030691, ppl: 20.711542 +epoch: 1, batch: 3768, sum loss: 4951.174316, avg loss: 2.866922, ppl: 17.582817 +epoch: 1, batch: 3769, sum loss: 4680.873047, avg loss: 2.799565, ppl: 16.437500 +epoch: 1, batch: 3770, sum loss: 6025.903809, avg loss: 2.909659, ppl: 18.350542 +epoch: 1, batch: 3771, sum loss: 4297.022949, avg loss: 2.941152, ppl: 18.937653 +epoch: 1, batch: 3772, sum loss: 3524.418213, avg loss: 2.508483, ppl: 12.286274 +epoch: 1, batch: 3773, sum loss: 6035.507812, avg loss: 3.117514, ppl: 22.590153 +epoch: 1, batch: 3774, sum loss: 5714.244629, avg loss: 3.018618, ppl: 20.462999 +epoch: 1, batch: 3775, sum loss: 4452.031250, avg loss: 2.631224, ppl: 13.890764 +epoch: 1, batch: 3776, sum loss: 6249.861816, avg loss: 3.205057, ppl: 24.656908 +epoch: 1, batch: 3777, sum loss: 5436.849609, avg loss: 3.017120, ppl: 20.432354 +epoch: 1, batch: 3778, sum loss: 5259.154785, avg loss: 2.988156, ppl: 19.849049 +epoch: 1, batch: 3779, sum loss: 4218.945801, avg loss: 2.714894, ppl: 15.103014 +epoch: 1, batch: 3780, sum loss: 6048.687500, avg loss: 3.018307, ppl: 20.456633 +epoch: 1, batch: 3781, sum loss: 4615.184570, avg loss: 2.800476, ppl: 16.452478 +epoch: 1, batch: 3782, sum loss: 4937.557617, avg loss: 2.860694, ppl: 17.473644 +epoch: 1, batch: 3783, sum loss: 5655.750977, avg loss: 2.995631, ppl: 19.997969 +epoch: 1, batch: 3784, sum loss: 4864.550781, avg loss: 3.032763, ppl: 20.754488 +epoch: 1, batch: 3785, sum loss: 4970.843750, avg loss: 2.893390, ppl: 18.054409 +epoch: 1, batch: 3786, sum loss: 4402.199707, avg loss: 2.773913, ppl: 16.021200 +epoch: 1, batch: 3787, sum loss: 3840.303711, avg loss: 2.762808, ppl: 15.844276 +epoch: 1, batch: 3788, sum loss: 4453.569824, avg loss: 2.705693, ppl: 14.964677 +epoch: 1, batch: 3789, sum loss: 4836.215332, avg loss: 2.824892, ppl: 16.859125 +epoch: 1, batch: 3790, sum loss: 4228.059082, avg loss: 2.637591, ppl: 13.979491 +epoch: 1, batch: 3791, sum loss: 5310.340332, avg loss: 2.905000, ppl: 18.265247 +epoch: 1, batch: 3792, sum loss: 4897.816895, avg loss: 2.719498, ppl: 15.172709 +epoch: 1, batch: 3793, sum loss: 5016.555176, avg loss: 3.032984, ppl: 20.759081 +epoch: 1, batch: 3794, sum loss: 3584.574707, avg loss: 2.397709, ppl: 10.997948 +epoch: 1, batch: 3795, sum loss: 4729.581543, avg loss: 2.655576, ppl: 14.233189 +epoch: 1, batch: 3796, sum loss: 4836.300781, avg loss: 2.925772, ppl: 18.648611 +epoch: 1, batch: 3797, sum loss: 4413.387695, avg loss: 2.710926, ppl: 15.043200 +epoch: 1, batch: 3798, sum loss: 3376.121826, avg loss: 2.056103, ppl: 7.815457 +epoch: 1, batch: 3799, sum loss: 4954.194336, avg loss: 2.638016, ppl: 13.985432 +epoch: 1, batch: 3800, sum loss: 5195.498047, avg loss: 2.787284, ppl: 16.236866 +epoch: 1, batch: 3801, sum loss: 3935.255371, avg loss: 2.792942, ppl: 16.328989 +epoch: 1, batch: 3802, sum loss: 4947.108887, avg loss: 2.716699, ppl: 15.130297 +epoch: 1, batch: 3803, sum loss: 5606.502930, avg loss: 2.950791, ppl: 19.121075 +epoch: 1, batch: 3804, sum loss: 4800.384766, avg loss: 2.957723, ppl: 19.254084 +epoch: 1, batch: 3805, sum loss: 4399.819336, avg loss: 2.763705, ppl: 15.858498 +epoch: 1, batch: 3806, sum loss: 4031.771240, avg loss: 2.491824, ppl: 12.083295 +epoch: 1, batch: 3807, sum loss: 5723.742676, avg loss: 2.971829, ppl: 19.527607 +epoch: 1, batch: 3808, sum loss: 5500.502441, avg loss: 2.860376, ppl: 17.468088 +epoch: 1, batch: 3809, sum loss: 4153.414062, avg loss: 2.608928, ppl: 13.584487 +epoch: 1, batch: 3810, sum loss: 4833.221680, avg loss: 2.625324, ppl: 13.809051 +epoch: 1, batch: 3811, sum loss: 5645.624023, avg loss: 2.929748, ppl: 18.722908 +epoch: 1, batch: 3812, sum loss: 4520.919922, avg loss: 2.733325, ppl: 15.383958 +epoch: 1, batch: 3813, sum loss: 4948.994141, avg loss: 2.867320, ppl: 17.589811 +epoch: 1, batch: 3814, sum loss: 3856.633301, avg loss: 2.560846, ppl: 12.946760 +epoch: 1, batch: 3815, sum loss: 4561.914062, avg loss: 2.909384, ppl: 18.345495 +epoch: 1, batch: 3816, sum loss: 4819.114258, avg loss: 2.890890, ppl: 18.009336 +epoch: 1, batch: 3817, sum loss: 4805.994629, avg loss: 2.955716, ppl: 19.215479 +epoch: 1, batch: 3818, sum loss: 4590.624512, avg loss: 2.775468, ppl: 16.046141 +epoch: 1, batch: 3819, sum loss: 4799.217285, avg loss: 3.124490, ppl: 22.748302 +epoch: 1, batch: 3820, sum loss: 4441.795898, avg loss: 2.884283, ppl: 17.890736 +epoch: 1, batch: 3821, sum loss: 4081.493652, avg loss: 2.770872, ppl: 15.972550 +epoch: 1, batch: 3822, sum loss: 5194.966797, avg loss: 2.668191, ppl: 14.413867 +epoch: 1, batch: 3823, sum loss: 5060.101074, avg loss: 2.889835, ppl: 17.990343 +epoch: 1, batch: 3824, sum loss: 4398.999023, avg loss: 2.825305, ppl: 16.866079 +epoch: 1, batch: 3825, sum loss: 5101.300781, avg loss: 2.969325, ppl: 19.478773 +epoch: 1, batch: 3826, sum loss: 4481.785156, avg loss: 2.574259, ppl: 13.121594 +epoch: 1, batch: 3827, sum loss: 5145.389160, avg loss: 2.923517, ppl: 18.606604 +epoch: 1, batch: 3828, sum loss: 4436.233398, avg loss: 2.825626, ppl: 16.871510 +epoch: 1, batch: 3829, sum loss: 4763.956543, avg loss: 2.685432, ppl: 14.664535 +epoch: 1, batch: 3830, sum loss: 4484.729004, avg loss: 2.522345, ppl: 12.457774 +epoch: 1, batch: 3831, sum loss: 4781.465820, avg loss: 2.926234, ppl: 18.657230 +epoch: 1, batch: 3832, sum loss: 4680.132812, avg loss: 2.774234, ppl: 16.026346 +epoch: 1, batch: 3833, sum loss: 4965.875000, avg loss: 2.980717, ppl: 19.701941 +epoch: 1, batch: 3834, sum loss: 3991.966309, avg loss: 2.736098, ppl: 15.426666 +epoch: 1, batch: 3835, sum loss: 3771.970703, avg loss: 2.540047, ppl: 12.680272 +epoch: 1, batch: 3836, sum loss: 5307.491211, avg loss: 3.064371, ppl: 21.420992 +epoch: 1, batch: 3837, sum loss: 4677.268066, avg loss: 3.045096, ppl: 21.012056 +epoch: 1, batch: 3838, sum loss: 4201.725098, avg loss: 2.454279, ppl: 11.638036 +epoch: 1, batch: 3839, sum loss: 4471.992188, avg loss: 2.723503, ppl: 15.233598 +epoch: 1, batch: 3840, sum loss: 5130.139160, avg loss: 2.989592, ppl: 19.877563 +epoch: 1, batch: 3841, sum loss: 4680.062500, avg loss: 2.706803, ppl: 14.981305 +epoch: 1, batch: 3842, sum loss: 4447.534668, avg loss: 2.781447, ppl: 16.142368 +epoch: 1, batch: 3843, sum loss: 4375.972656, avg loss: 2.755651, ppl: 15.731271 +epoch: 1, batch: 3844, sum loss: 4983.897461, avg loss: 2.817353, ppl: 16.732502 +epoch: 1, batch: 3845, sum loss: 4976.200195, avg loss: 2.990505, ppl: 19.895727 +epoch: 1, batch: 3846, sum loss: 3845.135254, avg loss: 2.721256, ppl: 15.199405 +epoch: 1, batch: 3847, sum loss: 4553.447754, avg loss: 2.838808, ppl: 17.095373 +epoch: 1, batch: 3848, sum loss: 4312.244629, avg loss: 2.706996, ppl: 14.984195 +epoch: 1, batch: 3849, sum loss: 4500.061523, avg loss: 2.846339, ppl: 17.224598 +epoch: 1, batch: 3850, sum loss: 5971.960449, avg loss: 3.193562, ppl: 24.375092 +epoch: 1, batch: 3851, sum loss: 4872.347168, avg loss: 2.738812, ppl: 15.468600 +epoch: 1, batch: 3852, sum loss: 4939.293945, avg loss: 2.900349, ppl: 18.180483 +epoch: 1, batch: 3853, sum loss: 4700.468262, avg loss: 2.823104, ppl: 16.829010 +epoch: 1, batch: 3854, sum loss: 4582.306641, avg loss: 2.670342, ppl: 14.444908 +epoch: 1, batch: 3855, sum loss: 4269.054199, avg loss: 2.511209, ppl: 12.319810 +epoch: 1, batch: 3856, sum loss: 4821.274414, avg loss: 2.750299, ppl: 15.647313 +epoch: 1, batch: 3857, sum loss: 4735.843750, avg loss: 2.717065, ppl: 15.135828 +epoch: 1, batch: 3858, sum loss: 4805.289551, avg loss: 2.760074, ppl: 15.801018 +epoch: 1, batch: 3859, sum loss: 4070.019287, avg loss: 2.698952, ppl: 14.864141 +epoch: 1, batch: 3860, sum loss: 4384.812500, avg loss: 2.762957, ppl: 15.846634 +epoch: 1, batch: 3861, sum loss: 3754.875488, avg loss: 2.531946, ppl: 12.577955 +epoch: 1, batch: 3862, sum loss: 4787.099609, avg loss: 2.693922, ppl: 14.789568 +epoch: 1, batch: 3863, sum loss: 4362.333008, avg loss: 2.743606, ppl: 15.542926 +epoch: 1, batch: 3864, sum loss: 3517.127441, avg loss: 2.335410, ppl: 10.333697 +epoch: 1, batch: 3865, sum loss: 5769.584961, avg loss: 3.159685, ppl: 23.563175 +epoch: 1, batch: 3866, sum loss: 4118.523438, avg loss: 2.723891, ppl: 15.239508 +epoch: 1, batch: 3867, sum loss: 4303.580078, avg loss: 2.898034, ppl: 18.138443 +epoch: 1, batch: 3868, sum loss: 5713.542480, avg loss: 3.057005, ppl: 21.263775 +epoch: 1, batch: 3869, sum loss: 4112.319824, avg loss: 2.687791, ppl: 14.699167 +epoch: 1, batch: 3870, sum loss: 4723.035645, avg loss: 2.634153, ppl: 13.931502 +epoch: 1, batch: 3871, sum loss: 4961.970215, avg loss: 2.820904, ppl: 16.792028 +epoch: 1, batch: 3872, sum loss: 3869.174316, avg loss: 2.696289, ppl: 14.824609 +epoch: 1, batch: 3873, sum loss: 4431.181641, avg loss: 2.747168, ppl: 15.598392 +epoch: 1, batch: 3874, sum loss: 4999.390625, avg loss: 2.679202, ppl: 14.573453 +epoch: 1, batch: 3875, sum loss: 5447.164062, avg loss: 3.065371, ppl: 21.442411 +epoch: 1, batch: 3876, sum loss: 5196.616699, avg loss: 2.810501, ppl: 16.618244 +epoch: 1, batch: 3877, sum loss: 4928.250000, avg loss: 2.994077, ppl: 19.966917 +epoch: 1, batch: 3878, sum loss: 5310.000000, avg loss: 2.745605, ppl: 15.574033 +epoch: 1, batch: 3879, sum loss: 4431.301758, avg loss: 2.631414, ppl: 13.893407 +epoch: 1, batch: 3880, sum loss: 5158.666992, avg loss: 2.964751, ppl: 19.389879 +epoch: 1, batch: 3881, sum loss: 5239.191895, avg loss: 3.100114, ppl: 22.200474 +epoch: 1, batch: 3882, sum loss: 5437.013184, avg loss: 3.013865, ppl: 20.365973 +epoch: 1, batch: 3883, sum loss: 4775.040039, avg loss: 2.933071, ppl: 18.785234 +epoch: 1, batch: 3884, sum loss: 4579.702637, avg loss: 2.785707, ppl: 16.211279 +epoch: 1, batch: 3885, sum loss: 4237.998535, avg loss: 2.980308, ppl: 19.693888 +epoch: 1, batch: 3886, sum loss: 5010.337891, avg loss: 2.830699, ppl: 16.957317 +epoch: 1, batch: 3887, sum loss: 4110.403809, avg loss: 2.500246, ppl: 12.185486 +epoch: 1, batch: 3888, sum loss: 4217.713379, avg loss: 2.619698, ppl: 13.731573 +epoch: 1, batch: 3889, sum loss: 3833.413086, avg loss: 2.525305, ppl: 12.494706 +epoch: 1, batch: 3890, sum loss: 5157.339844, avg loss: 2.915399, ppl: 18.456167 +epoch: 1, batch: 3891, sum loss: 5062.341797, avg loss: 2.662989, ppl: 14.339084 +epoch: 1, batch: 3892, sum loss: 5064.492188, avg loss: 2.840433, ppl: 17.123180 +epoch: 1, batch: 3893, sum loss: 4066.614502, avg loss: 2.827965, ppl: 16.911020 +epoch: 1, batch: 3894, sum loss: 3927.297607, avg loss: 2.697320, ppl: 14.839904 +epoch: 1, batch: 3895, sum loss: 4051.854980, avg loss: 2.743301, ppl: 15.538187 +epoch: 1, batch: 3896, sum loss: 5529.815918, avg loss: 2.791426, ppl: 16.304260 +epoch: 1, batch: 3897, sum loss: 3715.065674, avg loss: 2.556824, ppl: 12.894801 +epoch: 1, batch: 3898, sum loss: 4351.575684, avg loss: 2.761152, ppl: 15.818055 +epoch: 1, batch: 3899, sum loss: 5071.811523, avg loss: 2.891569, ppl: 18.021557 +epoch: 1, batch: 3900, sum loss: 5008.982422, avg loss: 2.951669, ppl: 19.137873 +epoch: 1, batch: 3901, sum loss: 4372.285645, avg loss: 2.708975, ppl: 15.013880 +epoch: 1, batch: 3902, sum loss: 4565.991699, avg loss: 2.742337, ppl: 15.523228 +epoch: 1, batch: 3903, sum loss: 5258.443359, avg loss: 2.984361, ppl: 19.773857 +epoch: 1, batch: 3904, sum loss: 6030.098145, avg loss: 3.075012, ppl: 21.650141 +epoch: 1, batch: 3905, sum loss: 4721.710449, avg loss: 2.751580, ppl: 15.667359 +epoch: 1, batch: 3906, sum loss: 4775.676270, avg loss: 2.887350, ppl: 17.945683 +epoch: 1, batch: 3907, sum loss: 4131.833984, avg loss: 2.709399, ppl: 15.020249 +epoch: 1, batch: 3908, sum loss: 5027.501953, avg loss: 2.931488, ppl: 18.755514 +epoch: 1, batch: 3909, sum loss: 4278.314941, avg loss: 2.583524, ppl: 13.243720 +epoch: 1, batch: 3910, sum loss: 4331.099609, avg loss: 2.817892, ppl: 16.741520 +epoch: 1, batch: 3911, sum loss: 5410.768555, avg loss: 3.095406, ppl: 22.096199 +epoch: 1, batch: 3912, sum loss: 5297.513672, avg loss: 3.018526, ppl: 20.461117 +epoch: 1, batch: 3913, sum loss: 4230.209961, avg loss: 2.665539, ppl: 14.375693 +epoch: 1, batch: 3914, sum loss: 3639.696777, avg loss: 2.730455, ppl: 15.339868 +epoch: 1, batch: 3915, sum loss: 5128.136230, avg loss: 2.869690, ppl: 17.631556 +epoch: 1, batch: 3916, sum loss: 6154.109375, avg loss: 2.984534, ppl: 19.777288 +epoch: 1, batch: 3917, sum loss: 4826.367188, avg loss: 2.829055, ppl: 16.929447 +epoch: 1, batch: 3918, sum loss: 3443.481934, avg loss: 2.386335, ppl: 10.873573 +epoch: 1, batch: 3919, sum loss: 4233.647949, avg loss: 2.911725, ppl: 18.388487 +epoch: 1, batch: 3920, sum loss: 4442.168945, avg loss: 2.715262, ppl: 15.108574 +epoch: 1, batch: 3921, sum loss: 4281.801270, avg loss: 2.644720, ppl: 14.079500 +epoch: 1, batch: 3922, sum loss: 6016.714355, avg loss: 3.071319, ppl: 21.570343 +epoch: 1, batch: 3923, sum loss: 3776.721436, avg loss: 2.805885, ppl: 16.541710 +epoch: 1, batch: 3924, sum loss: 5470.009766, avg loss: 3.059290, ppl: 21.312414 +epoch: 1, batch: 3925, sum loss: 4399.131348, avg loss: 2.775477, ppl: 16.046282 +epoch: 1, batch: 3926, sum loss: 4707.747070, avg loss: 2.897075, ppl: 18.121067 +epoch: 1, batch: 3927, sum loss: 5991.660645, avg loss: 3.341696, ppl: 28.267027 +epoch: 1, batch: 3928, sum loss: 4483.129883, avg loss: 2.624783, ppl: 13.801582 +epoch: 1, batch: 3929, sum loss: 5470.329590, avg loss: 2.969777, ppl: 19.487579 +epoch: 1, batch: 3930, sum loss: 4168.791016, avg loss: 2.584495, ppl: 13.256597 +epoch: 1, batch: 3931, sum loss: 4610.820312, avg loss: 2.869210, ppl: 17.623091 +epoch: 1, batch: 3932, sum loss: 4802.145508, avg loss: 2.703911, ppl: 14.938038 +epoch: 1, batch: 3933, sum loss: 4431.895508, avg loss: 2.874122, ppl: 17.709862 +epoch: 1, batch: 3934, sum loss: 4347.528809, avg loss: 2.646092, ppl: 14.098828 +epoch: 1, batch: 3935, sum loss: 4640.321777, avg loss: 2.841593, ppl: 17.143057 +epoch: 1, batch: 3936, sum loss: 4064.669678, avg loss: 2.733470, ppl: 15.386180 +epoch: 1, batch: 3937, sum loss: 5336.282715, avg loss: 2.847536, ppl: 17.245239 +epoch: 1, batch: 3938, sum loss: 4994.173828, avg loss: 2.920569, ppl: 18.551847 +epoch: 1, batch: 3939, sum loss: 5038.990723, avg loss: 2.926243, ppl: 18.657404 +epoch: 1, batch: 3940, sum loss: 4081.691650, avg loss: 2.522677, ppl: 12.461915 +epoch: 1, batch: 3941, sum loss: 5876.131348, avg loss: 3.021147, ppl: 20.514814 +epoch: 1, batch: 3942, sum loss: 3807.486328, avg loss: 2.638591, ppl: 13.993467 +epoch: 1, batch: 3943, sum loss: 4533.893555, avg loss: 2.996625, ppl: 20.017866 +epoch: 1, batch: 3944, sum loss: 4263.533691, avg loss: 2.788446, ppl: 16.255737 +epoch: 1, batch: 3945, sum loss: 5325.944336, avg loss: 2.942511, ppl: 18.963400 +epoch: 1, batch: 3946, sum loss: 3935.956543, avg loss: 2.435617, ppl: 11.422861 +epoch: 1, batch: 3947, sum loss: 5031.489258, avg loss: 2.810888, ppl: 16.624672 +epoch: 1, batch: 3948, sum loss: 4987.607910, avg loss: 2.954744, ppl: 19.196810 +epoch: 1, batch: 3949, sum loss: 5246.930664, avg loss: 2.823967, ppl: 16.843536 +epoch: 1, batch: 3950, sum loss: 6146.178711, avg loss: 3.068487, ppl: 21.509327 +epoch: 1, batch: 3951, sum loss: 4019.044678, avg loss: 2.584595, ppl: 13.257915 +epoch: 1, batch: 3952, sum loss: 4315.634766, avg loss: 2.685523, ppl: 14.665863 +epoch: 1, batch: 3953, sum loss: 4923.952148, avg loss: 2.781894, ppl: 16.149578 +epoch: 1, batch: 3954, sum loss: 5293.814453, avg loss: 2.863069, ppl: 17.515196 +epoch: 1, batch: 3955, sum loss: 5632.022461, avg loss: 2.912111, ppl: 18.395588 +epoch: 1, batch: 3956, sum loss: 4984.352051, avg loss: 3.009874, ppl: 20.284851 +epoch: 1, batch: 3957, sum loss: 5060.111328, avg loss: 2.808053, ppl: 16.577610 +epoch: 1, batch: 3958, sum loss: 4197.500977, avg loss: 2.727421, ppl: 15.293395 +epoch: 1, batch: 3959, sum loss: 5025.696289, avg loss: 2.933856, ppl: 18.799994 +epoch: 1, batch: 3960, sum loss: 4552.542969, avg loss: 2.472864, ppl: 11.856357 +epoch: 1, batch: 3961, sum loss: 4207.264648, avg loss: 2.662826, ppl: 14.336745 +epoch: 1, batch: 3962, sum loss: 4595.590820, avg loss: 2.738731, ppl: 15.467347 +epoch: 1, batch: 3963, sum loss: 5083.680664, avg loss: 2.974652, ppl: 19.582813 +epoch: 1, batch: 3964, sum loss: 5835.236816, avg loss: 3.066336, ppl: 21.463110 +epoch: 1, batch: 3965, sum loss: 5304.090332, avg loss: 2.935302, ppl: 18.827179 +epoch: 1, batch: 3966, sum loss: 4223.001953, avg loss: 2.763745, ppl: 15.859121 +epoch: 1, batch: 3967, sum loss: 4594.388672, avg loss: 2.841304, ppl: 17.138100 +epoch: 1, batch: 3968, sum loss: 5620.772461, avg loss: 2.967673, ppl: 19.446615 +epoch: 1, batch: 3969, sum loss: 5050.394043, avg loss: 2.894208, ppl: 18.069193 +epoch: 1, batch: 3970, sum loss: 4767.774902, avg loss: 2.930409, ppl: 18.735287 +epoch: 1, batch: 3971, sum loss: 4911.313477, avg loss: 2.763823, ppl: 15.860362 +epoch: 1, batch: 3972, sum loss: 5207.066406, avg loss: 2.822258, ppl: 16.814779 +epoch: 1, batch: 3973, sum loss: 4514.456543, avg loss: 2.825067, ppl: 16.862072 +epoch: 1, batch: 3974, sum loss: 4489.847168, avg loss: 2.645756, ppl: 14.094090 +epoch: 1, batch: 3975, sum loss: 4111.983398, avg loss: 2.671854, ppl: 14.466766 +epoch: 1, batch: 3976, sum loss: 4542.055664, avg loss: 2.874719, ppl: 17.720442 +epoch: 1, batch: 3977, sum loss: 5325.576172, avg loss: 3.136382, ppl: 23.020426 +epoch: 1, batch: 3978, sum loss: 5253.205078, avg loss: 2.723279, ppl: 15.230180 +epoch: 1, batch: 3979, sum loss: 4588.759277, avg loss: 2.818648, ppl: 16.754185 +epoch: 1, batch: 3980, sum loss: 5192.577148, avg loss: 2.602796, ppl: 13.501430 +epoch: 1, batch: 3981, sum loss: 5733.691406, avg loss: 3.079319, ppl: 21.743586 +epoch: 1, batch: 3982, sum loss: 4329.420898, avg loss: 2.769943, ppl: 15.957725 +epoch: 1, batch: 3983, sum loss: 4460.284180, avg loss: 2.514252, ppl: 12.357368 +epoch: 1, batch: 3984, sum loss: 4456.000000, avg loss: 2.694075, ppl: 14.791832 +epoch: 1, batch: 3985, sum loss: 5472.354492, avg loss: 3.236165, ppl: 25.435982 +epoch: 1, batch: 3986, sum loss: 4161.082031, avg loss: 2.707275, ppl: 14.988382 +epoch: 1, batch: 3987, sum loss: 4788.593262, avg loss: 3.077502, ppl: 21.704119 +epoch: 1, batch: 3988, sum loss: 3469.917480, avg loss: 2.603089, ppl: 13.505390 +epoch: 1, batch: 3989, sum loss: 5039.730469, avg loss: 2.871641, ppl: 17.665987 +epoch: 1, batch: 3990, sum loss: 4440.295410, avg loss: 2.844520, ppl: 17.193302 +epoch: 1, batch: 3991, sum loss: 4814.051270, avg loss: 2.779475, ppl: 16.110563 +epoch: 1, batch: 3992, sum loss: 5386.700195, avg loss: 2.824699, ppl: 16.855865 +epoch: 1, batch: 3993, sum loss: 4313.827148, avg loss: 2.891305, ppl: 18.016809 +epoch: 1, batch: 3994, sum loss: 5283.971680, avg loss: 2.953589, ppl: 19.174656 +epoch: 1, batch: 3995, sum loss: 3579.327393, avg loss: 2.529560, ppl: 12.547982 +epoch: 1, batch: 3996, sum loss: 4906.501953, avg loss: 2.903255, ppl: 18.233408 +epoch: 1, batch: 3997, sum loss: 5079.327148, avg loss: 2.847157, ppl: 17.238695 +epoch: 1, batch: 3998, sum loss: 4076.541504, avg loss: 2.372841, ppl: 10.727831 +epoch: 1, batch: 3999, sum loss: 4284.892578, avg loss: 2.755558, ppl: 15.729815 +epoch: 1, batch: 4000, sum loss: 4615.469238, avg loss: 2.646485, ppl: 14.104369 +epoch: 1, batch: 4001, sum loss: 4993.424805, avg loss: 2.808450, ppl: 16.584200 +epoch: 1, batch: 4002, sum loss: 4545.882324, avg loss: 2.799189, ppl: 16.431309 +epoch: 1, batch: 4003, sum loss: 3599.879883, avg loss: 2.684474, ppl: 14.650496 +epoch: 1, batch: 4004, sum loss: 4751.390137, avg loss: 2.929340, ppl: 18.715281 +epoch: 1, batch: 4005, sum loss: 4000.761475, avg loss: 2.478787, ppl: 11.926785 +epoch: 1, batch: 4006, sum loss: 5086.432617, avg loss: 2.843171, ppl: 17.170124 +epoch: 1, batch: 4007, sum loss: 4798.567871, avg loss: 2.692799, ppl: 14.772969 +epoch: 1, batch: 4008, sum loss: 4889.552246, avg loss: 2.906987, ppl: 18.301573 +epoch: 1, batch: 4009, sum loss: 4663.714355, avg loss: 2.974308, ppl: 19.576063 +epoch: 1, batch: 4010, sum loss: 4410.846191, avg loss: 2.697765, ppl: 14.846515 +epoch: 1, batch: 4011, sum loss: 4240.902832, avg loss: 2.808545, ppl: 16.585766 +epoch: 1, batch: 4012, sum loss: 4421.979980, avg loss: 2.670278, ppl: 14.443982 +epoch: 1, batch: 4013, sum loss: 4720.688965, avg loss: 2.914006, ppl: 18.430479 +epoch: 1, batch: 4014, sum loss: 5239.984375, avg loss: 2.911103, ppl: 18.377048 +epoch: 1, batch: 4015, sum loss: 4476.252930, avg loss: 2.690056, ppl: 14.732499 +epoch: 1, batch: 4016, sum loss: 3789.163818, avg loss: 2.694996, ppl: 14.805454 +epoch: 1, batch: 4017, sum loss: 4862.472656, avg loss: 3.141132, ppl: 23.130043 +epoch: 1, batch: 4018, sum loss: 5342.869141, avg loss: 2.784194, ppl: 16.186773 +epoch: 1, batch: 4019, sum loss: 5151.922852, avg loss: 3.126167, ppl: 22.786467 +epoch: 1, batch: 4020, sum loss: 4564.521973, avg loss: 2.763028, ppl: 15.847755 +epoch: 1, batch: 4021, sum loss: 5329.982422, avg loss: 2.763081, ppl: 15.848591 +epoch: 1, batch: 4022, sum loss: 4625.834961, avg loss: 2.755113, ppl: 15.722819 +epoch: 1, batch: 4023, sum loss: 4291.493652, avg loss: 2.799409, ppl: 16.434929 +epoch: 1, batch: 4024, sum loss: 5234.170898, avg loss: 2.738970, ppl: 15.471035 +epoch: 1, batch: 4025, sum loss: 4411.632324, avg loss: 2.667251, ppl: 14.400323 +epoch: 1, batch: 4026, sum loss: 6344.516113, avg loss: 3.025520, ppl: 20.604723 +epoch: 1, batch: 4027, sum loss: 4519.784180, avg loss: 2.889888, ppl: 17.991287 +epoch: 1, batch: 4028, sum loss: 4977.940918, avg loss: 2.897521, ppl: 18.129148 +epoch: 1, batch: 4029, sum loss: 3939.116455, avg loss: 2.627830, ppl: 13.843690 +epoch: 1, batch: 4030, sum loss: 5113.910645, avg loss: 2.871371, ppl: 17.661207 +epoch: 1, batch: 4031, sum loss: 4295.875000, avg loss: 2.554028, ppl: 12.858792 +epoch: 1, batch: 4032, sum loss: 4200.224609, avg loss: 2.500134, ppl: 12.184123 +epoch: 1, batch: 4033, sum loss: 6280.499512, avg loss: 3.257521, ppl: 25.985031 +epoch: 1, batch: 4034, sum loss: 5355.307617, avg loss: 2.963645, ppl: 19.368450 +epoch: 1, batch: 4035, sum loss: 4219.063965, avg loss: 2.911707, ppl: 18.388168 +epoch: 1, batch: 4036, sum loss: 5232.251465, avg loss: 3.190397, ppl: 24.298079 +epoch: 1, batch: 4037, sum loss: 4960.601074, avg loss: 2.828165, ppl: 16.914391 +epoch: 1, batch: 4038, sum loss: 3758.534668, avg loss: 2.574339, ppl: 13.122639 +epoch: 1, batch: 4039, sum loss: 4957.409668, avg loss: 3.004491, ppl: 20.175936 +epoch: 1, batch: 4040, sum loss: 4942.136719, avg loss: 2.697673, ppl: 14.845144 +epoch: 1, batch: 4041, sum loss: 4645.327637, avg loss: 3.000858, ppl: 20.102774 +epoch: 1, batch: 4042, sum loss: 3638.777832, avg loss: 2.573393, ppl: 13.110233 +epoch: 1, batch: 4043, sum loss: 4429.773438, avg loss: 2.660525, ppl: 14.303791 +epoch: 1, batch: 4044, sum loss: 4357.408203, avg loss: 2.636060, ppl: 13.958107 +epoch: 1, batch: 4045, sum loss: 3875.771484, avg loss: 2.689640, ppl: 14.726377 +epoch: 1, batch: 4046, sum loss: 3747.749512, avg loss: 2.725636, ppl: 15.266120 +epoch: 1, batch: 4047, sum loss: 4197.796387, avg loss: 2.580084, ppl: 13.198245 +epoch: 1, batch: 4048, sum loss: 5126.284180, avg loss: 2.760519, ppl: 15.808045 +epoch: 1, batch: 4049, sum loss: 5426.162109, avg loss: 3.001196, ppl: 20.109571 +epoch: 1, batch: 4050, sum loss: 5065.315430, avg loss: 2.742456, ppl: 15.525064 +epoch: 1, batch: 4051, sum loss: 5709.649902, avg loss: 3.051657, ppl: 21.150360 +epoch: 1, batch: 4052, sum loss: 5015.282715, avg loss: 2.830295, ppl: 16.950462 +epoch: 1, batch: 4053, sum loss: 3954.600830, avg loss: 2.721680, ppl: 15.205846 +epoch: 1, batch: 4054, sum loss: 5441.075195, avg loss: 2.880400, ppl: 17.821400 +epoch: 1, batch: 4055, sum loss: 3984.561279, avg loss: 2.628339, ppl: 13.850739 +epoch: 1, batch: 4056, sum loss: 4856.269531, avg loss: 2.943194, ppl: 18.976355 +epoch: 1, batch: 4057, sum loss: 4244.637207, avg loss: 2.569393, ppl: 13.057898 +epoch: 1, batch: 4058, sum loss: 5394.940918, avg loss: 2.995525, ppl: 19.995857 +epoch: 1, batch: 4059, sum loss: 4551.126953, avg loss: 2.678709, ppl: 14.566280 +epoch: 1, batch: 4060, sum loss: 4185.556152, avg loss: 2.624173, ppl: 13.793165 +epoch: 1, batch: 4061, sum loss: 4685.597168, avg loss: 2.824350, ppl: 16.849995 +epoch: 1, batch: 4062, sum loss: 4879.338379, avg loss: 2.815544, ppl: 16.702261 +epoch: 1, batch: 4063, sum loss: 4475.758789, avg loss: 2.689759, ppl: 14.728127 +epoch: 1, batch: 4064, sum loss: 3953.139160, avg loss: 2.557011, ppl: 12.897211 +epoch: 1, batch: 4065, sum loss: 5103.866699, avg loss: 2.906530, ppl: 18.293213 +epoch: 1, batch: 4066, sum loss: 4683.754883, avg loss: 2.658204, ppl: 14.270634 +epoch: 1, batch: 4067, sum loss: 5366.039551, avg loss: 3.302178, ppl: 27.171759 +epoch: 1, batch: 4068, sum loss: 4123.498535, avg loss: 2.732604, ppl: 15.372859 +epoch: 1, batch: 4069, sum loss: 4412.085938, avg loss: 3.126921, ppl: 22.803663 +epoch: 1, batch: 4070, sum loss: 4275.556152, avg loss: 2.675567, ppl: 14.520583 +epoch: 1, batch: 4071, sum loss: 4335.443359, avg loss: 2.841051, ppl: 17.133760 +epoch: 1, batch: 4072, sum loss: 4454.114258, avg loss: 2.694564, ppl: 14.799063 +epoch: 1, batch: 4073, sum loss: 4335.432129, avg loss: 2.605428, ppl: 13.537018 +epoch: 1, batch: 4074, sum loss: 4295.871094, avg loss: 2.744966, ppl: 15.564077 +epoch: 1, batch: 4075, sum loss: 4632.853516, avg loss: 2.457747, ppl: 11.678473 +epoch: 1, batch: 4076, sum loss: 4445.639648, avg loss: 2.616621, ppl: 13.689396 +epoch: 1, batch: 4077, sum loss: 3658.101562, avg loss: 2.384681, ppl: 10.855604 +epoch: 1, batch: 4078, sum loss: 6095.410645, avg loss: 3.332647, ppl: 28.012388 +epoch: 1, batch: 4079, sum loss: 4636.169922, avg loss: 3.036130, ppl: 20.824490 +epoch: 1, batch: 4080, sum loss: 5225.309570, avg loss: 2.869473, ppl: 17.627718 +epoch: 1, batch: 4081, sum loss: 5839.166992, avg loss: 3.027044, ppl: 20.636133 +epoch: 1, batch: 4082, sum loss: 4854.223633, avg loss: 2.802670, ppl: 16.488605 +epoch: 1, batch: 4083, sum loss: 4311.818848, avg loss: 2.751640, ppl: 15.668301 +epoch: 1, batch: 4084, sum loss: 4077.472412, avg loss: 2.764388, ppl: 15.869327 +epoch: 1, batch: 4085, sum loss: 4047.354736, avg loss: 2.503002, ppl: 12.219126 +epoch: 1, batch: 4086, sum loss: 4935.117676, avg loss: 2.882662, ppl: 17.861759 +epoch: 1, batch: 4087, sum loss: 4546.498047, avg loss: 2.879353, ppl: 17.802748 +epoch: 1, batch: 4088, sum loss: 5664.257812, avg loss: 2.894357, ppl: 18.071886 +epoch: 1, batch: 4089, sum loss: 4897.201172, avg loss: 2.790428, ppl: 16.287989 +epoch: 1, batch: 4090, sum loss: 5590.319336, avg loss: 3.054819, ppl: 21.217352 +epoch: 1, batch: 4091, sum loss: 4828.895508, avg loss: 3.014292, ppl: 20.374657 +epoch: 1, batch: 4092, sum loss: 4238.407227, avg loss: 2.879353, ppl: 17.802748 +epoch: 1, batch: 4093, sum loss: 4821.385742, avg loss: 2.689005, ppl: 14.717024 +epoch: 1, batch: 4094, sum loss: 4706.999023, avg loss: 2.731863, ppl: 15.361472 +epoch: 1, batch: 4095, sum loss: 4496.854004, avg loss: 2.974110, ppl: 19.572193 +epoch: 1, batch: 4096, sum loss: 4704.175781, avg loss: 2.943790, ppl: 18.987669 +epoch: 1, batch: 4097, sum loss: 4318.304688, avg loss: 2.714208, ppl: 15.092650 +epoch: 1, batch: 4098, sum loss: 4948.606445, avg loss: 2.806924, ppl: 16.558899 +epoch: 1, batch: 4099, sum loss: 5283.506348, avg loss: 2.946741, ppl: 19.043787 +epoch: 1, batch: 4100, sum loss: 3659.443848, avg loss: 2.393358, ppl: 10.950200 +epoch: 1, batch: 4101, sum loss: 4655.625977, avg loss: 2.709910, ppl: 15.027929 +epoch: 1, batch: 4102, sum loss: 4275.147949, avg loss: 2.772470, ppl: 15.998093 +epoch: 1, batch: 4103, sum loss: 5113.315430, avg loss: 2.976319, ppl: 19.615471 +epoch: 1, batch: 4104, sum loss: 4374.022461, avg loss: 2.655751, ppl: 14.235676 +epoch: 1, batch: 4105, sum loss: 5525.634277, avg loss: 2.863023, ppl: 17.514393 +epoch: 1, batch: 4106, sum loss: 5801.522949, avg loss: 2.782505, ppl: 16.159451 +epoch: 1, batch: 4107, sum loss: 4817.117676, avg loss: 2.613737, ppl: 13.649967 +epoch: 1, batch: 4108, sum loss: 4965.777832, avg loss: 2.948799, ppl: 19.083029 +epoch: 1, batch: 4109, sum loss: 4953.532227, avg loss: 2.755023, ppl: 15.721410 +epoch: 1, batch: 4110, sum loss: 4035.734375, avg loss: 2.478952, ppl: 11.928759 +epoch: 1, batch: 4111, sum loss: 5326.346680, avg loss: 3.012640, ppl: 20.341024 +epoch: 1, batch: 4112, sum loss: 4329.093750, avg loss: 2.596937, ppl: 13.422558 +epoch: 1, batch: 4113, sum loss: 4643.500977, avg loss: 2.957644, ppl: 19.252560 +epoch: 1, batch: 4114, sum loss: 5030.044922, avg loss: 2.794469, ppl: 16.353949 +epoch: 1, batch: 4115, sum loss: 4052.096191, avg loss: 2.612570, ppl: 13.634046 +epoch: 1, batch: 4116, sum loss: 5107.513184, avg loss: 2.837507, ppl: 17.073158 +epoch: 1, batch: 4117, sum loss: 3807.053467, avg loss: 2.803427, ppl: 16.501095 +epoch: 1, batch: 4118, sum loss: 3983.626953, avg loss: 2.540578, ppl: 12.687007 +epoch: 1, batch: 4119, sum loss: 5075.535156, avg loss: 2.925380, ppl: 18.641317 +epoch: 1, batch: 4120, sum loss: 4553.162109, avg loss: 2.656454, ppl: 14.245686 +epoch: 1, batch: 4121, sum loss: 5743.601562, avg loss: 2.903742, ppl: 18.242273 +epoch: 1, batch: 4122, sum loss: 4114.514160, avg loss: 2.615711, ppl: 13.676944 +epoch: 1, batch: 4123, sum loss: 4436.964844, avg loss: 2.752460, ppl: 15.681152 +epoch: 1, batch: 4124, sum loss: 4793.371582, avg loss: 2.836314, ppl: 17.052801 +epoch: 1, batch: 4125, sum loss: 4845.002441, avg loss: 2.724973, ppl: 15.256009 +epoch: 1, batch: 4126, sum loss: 5808.446777, avg loss: 2.946954, ppl: 19.047850 +epoch: 1, batch: 4127, sum loss: 4037.361816, avg loss: 2.558531, ppl: 12.916826 +epoch: 1, batch: 4128, sum loss: 4345.952148, avg loss: 2.735023, ppl: 15.410102 +epoch: 1, batch: 4129, sum loss: 4813.366211, avg loss: 3.002724, ppl: 20.140318 +epoch: 1, batch: 4130, sum loss: 5294.430664, avg loss: 3.028851, ppl: 20.673456 +epoch: 1, batch: 4131, sum loss: 4059.470215, avg loss: 2.577441, ppl: 13.163413 +epoch: 1, batch: 4132, sum loss: 4714.391113, avg loss: 2.995166, ppl: 19.988674 +epoch: 1, batch: 4133, sum loss: 5132.347656, avg loss: 2.655120, ppl: 14.226699 +epoch: 1, batch: 4134, sum loss: 4908.826660, avg loss: 3.112763, ppl: 22.483072 +epoch: 1, batch: 4135, sum loss: 4734.597656, avg loss: 2.917189, ppl: 18.489239 +epoch: 1, batch: 4136, sum loss: 4078.967529, avg loss: 2.706681, ppl: 14.979469 +epoch: 1, batch: 4137, sum loss: 4735.203125, avg loss: 2.746638, ppl: 15.590127 +epoch: 1, batch: 4138, sum loss: 3951.079346, avg loss: 2.738101, ppl: 15.457600 +epoch: 1, batch: 4139, sum loss: 4242.602539, avg loss: 2.585376, ppl: 13.268281 +epoch: 1, batch: 4140, sum loss: 3900.833984, avg loss: 2.660869, ppl: 14.308720 +epoch: 1, batch: 4141, sum loss: 4271.941895, avg loss: 2.724453, ppl: 15.248067 +epoch: 1, batch: 4142, sum loss: 5134.537598, avg loss: 3.108074, ppl: 22.377897 +epoch: 1, batch: 4143, sum loss: 4189.715820, avg loss: 2.655080, ppl: 14.226118 +epoch: 1, batch: 4144, sum loss: 5026.857910, avg loss: 2.811442, ppl: 16.633884 +epoch: 1, batch: 4145, sum loss: 5454.023926, avg loss: 2.938591, ppl: 18.889204 +epoch: 1, batch: 4146, sum loss: 4989.437500, avg loss: 2.774993, ppl: 16.038513 +epoch: 1, batch: 4147, sum loss: 4770.397461, avg loss: 2.693618, ppl: 14.785069 +epoch: 1, batch: 4148, sum loss: 4303.489746, avg loss: 2.899926, ppl: 18.172794 +epoch: 1, batch: 4149, sum loss: 4942.846680, avg loss: 3.089279, ppl: 21.961243 +epoch: 1, batch: 4150, sum loss: 4192.835938, avg loss: 2.694625, ppl: 14.799963 +epoch: 1, batch: 4151, sum loss: 4250.094727, avg loss: 2.492724, ppl: 12.094178 +epoch: 1, batch: 4152, sum loss: 4835.668457, avg loss: 2.722786, ppl: 15.222680 +epoch: 1, batch: 4153, sum loss: 4704.324219, avg loss: 2.683585, ppl: 14.637474 +epoch: 1, batch: 4154, sum loss: 5004.439941, avg loss: 2.928286, ppl: 18.695551 +epoch: 1, batch: 4155, sum loss: 4081.524170, avg loss: 2.618040, ppl: 13.708829 +epoch: 1, batch: 4156, sum loss: 5317.561035, avg loss: 2.820987, ppl: 16.793421 +epoch: 1, batch: 4157, sum loss: 4561.834961, avg loss: 2.828168, ppl: 16.914444 +epoch: 1, batch: 4158, sum loss: 4567.742676, avg loss: 2.677458, ppl: 14.548059 +epoch: 1, batch: 4159, sum loss: 4856.190918, avg loss: 2.626388, ppl: 13.823743 +epoch: 1, batch: 4160, sum loss: 4995.016602, avg loss: 2.854295, ppl: 17.362198 +epoch: 1, batch: 4161, sum loss: 5260.351562, avg loss: 2.678387, ppl: 14.561585 +epoch: 1, batch: 4162, sum loss: 4509.367188, avg loss: 2.816594, ppl: 16.719809 +epoch: 1, batch: 4163, sum loss: 5004.319824, avg loss: 3.117956, ppl: 22.600142 +epoch: 1, batch: 4164, sum loss: 5552.792969, avg loss: 2.969408, ppl: 19.480385 +epoch: 1, batch: 4165, sum loss: 5464.755371, avg loss: 2.886823, ppl: 17.936230 +epoch: 1, batch: 4166, sum loss: 4226.948242, avg loss: 2.663484, ppl: 14.346179 +epoch: 1, batch: 4167, sum loss: 4866.731445, avg loss: 2.731050, ppl: 15.348999 +epoch: 1, batch: 4168, sum loss: 5547.177734, avg loss: 2.931912, ppl: 18.763475 +epoch: 1, batch: 4169, sum loss: 4959.323730, avg loss: 2.910401, ppl: 18.364168 +epoch: 1, batch: 4170, sum loss: 5038.471191, avg loss: 3.108249, ppl: 22.381813 +epoch: 1, batch: 4171, sum loss: 4198.663574, avg loss: 2.443925, ppl: 11.518166 +epoch: 1, batch: 4172, sum loss: 4403.264648, avg loss: 2.831681, ppl: 16.973978 +epoch: 1, batch: 4173, sum loss: 4772.279785, avg loss: 2.869681, ppl: 17.631395 +epoch: 1, batch: 4174, sum loss: 4503.315918, avg loss: 2.538509, ppl: 12.660785 +epoch: 1, batch: 4175, sum loss: 3547.791016, avg loss: 2.475779, ppl: 11.890965 +epoch: 1, batch: 4176, sum loss: 3910.924072, avg loss: 2.515064, ppl: 12.367397 +epoch: 1, batch: 4177, sum loss: 4579.157227, avg loss: 2.792169, ppl: 16.316374 +epoch: 1, batch: 4178, sum loss: 4670.648438, avg loss: 2.800149, ppl: 16.447096 +epoch: 1, batch: 4179, sum loss: 4489.612793, avg loss: 2.969321, ppl: 19.478685 +epoch: 1, batch: 4180, sum loss: 5525.500000, avg loss: 2.975498, ppl: 19.599380 +epoch: 1, batch: 4181, sum loss: 5110.295898, avg loss: 2.928536, ppl: 18.700237 +epoch: 1, batch: 4182, sum loss: 4476.386230, avg loss: 2.887991, ppl: 17.957201 +epoch: 1, batch: 4183, sum loss: 5351.315918, avg loss: 2.849476, ppl: 17.278723 +epoch: 1, batch: 4184, sum loss: 3700.843750, avg loss: 2.467229, ppl: 11.789734 +epoch: 1, batch: 4185, sum loss: 5038.301758, avg loss: 3.072135, ppl: 21.587948 +epoch: 1, batch: 4186, sum loss: 3624.608887, avg loss: 2.559752, ppl: 12.932610 +epoch: 1, batch: 4187, sum loss: 4529.816895, avg loss: 2.650566, ppl: 14.162050 +epoch: 1, batch: 4188, sum loss: 5044.505371, avg loss: 2.816586, ppl: 16.719669 +epoch: 1, batch: 4189, sum loss: 4496.322754, avg loss: 2.730008, ppl: 15.333004 +epoch: 1, batch: 4190, sum loss: 5341.891602, avg loss: 2.909527, ppl: 18.348120 +epoch: 1, batch: 4191, sum loss: 4744.920898, avg loss: 2.741144, ppl: 15.504715 +epoch: 1, batch: 4192, sum loss: 5170.958496, avg loss: 2.796624, ppl: 16.389231 +epoch: 1, batch: 4193, sum loss: 4425.225098, avg loss: 2.681955, ppl: 14.613629 +epoch: 1, batch: 4194, sum loss: 5591.883789, avg loss: 2.966516, ppl: 19.424137 +epoch: 1, batch: 4195, sum loss: 5253.754395, avg loss: 2.974946, ppl: 19.588562 +epoch: 1, batch: 4196, sum loss: 5265.930176, avg loss: 2.912572, ppl: 18.404072 +epoch: 1, batch: 4197, sum loss: 5249.035156, avg loss: 3.016687, ppl: 20.423515 +epoch: 1, batch: 4198, sum loss: 5023.081543, avg loss: 2.901838, ppl: 18.207577 +epoch: 1, batch: 4199, sum loss: 5258.852539, avg loss: 2.867422, ppl: 17.591610 +epoch: 1, batch: 4200, sum loss: 4060.449951, avg loss: 2.548933, ppl: 12.793444 +epoch: 1, batch: 4201, sum loss: 5102.745605, avg loss: 2.909205, ppl: 18.342211 +epoch: 1, batch: 4202, sum loss: 6058.090820, avg loss: 3.186792, ppl: 24.210627 +epoch: 1, batch: 4203, sum loss: 4967.158203, avg loss: 2.928749, ppl: 18.704218 +epoch: 1, batch: 4204, sum loss: 4403.336914, avg loss: 2.689882, ppl: 14.729939 +epoch: 1, batch: 4205, sum loss: 5031.911133, avg loss: 2.927232, ppl: 18.675859 +epoch: 1, batch: 4206, sum loss: 6024.372070, avg loss: 3.142604, ppl: 23.164116 +epoch: 1, batch: 4207, sum loss: 3715.770508, avg loss: 2.734195, ppl: 15.397336 +epoch: 1, batch: 4208, sum loss: 3932.082275, avg loss: 2.575037, ppl: 13.131809 +epoch: 1, batch: 4209, sum loss: 5397.472656, avg loss: 2.983678, ppl: 19.760355 +epoch: 1, batch: 4210, sum loss: 5963.347656, avg loss: 3.044077, ppl: 20.990656 +epoch: 1, batch: 4211, sum loss: 4615.056152, avg loss: 2.581128, ppl: 13.212029 +epoch: 1, batch: 4212, sum loss: 5034.349609, avg loss: 2.970118, ppl: 19.494217 +epoch: 1, batch: 4213, sum loss: 5074.508301, avg loss: 2.809805, ppl: 16.606686 +epoch: 1, batch: 4214, sum loss: 4409.889648, avg loss: 2.725519, ppl: 15.264333 +epoch: 1, batch: 4215, sum loss: 4491.168457, avg loss: 2.891931, ppl: 18.028086 +epoch: 1, batch: 4216, sum loss: 3865.071777, avg loss: 2.480791, ppl: 11.950709 +epoch: 1, batch: 4217, sum loss: 5411.548828, avg loss: 3.160951, ppl: 23.593031 +epoch: 1, batch: 4218, sum loss: 4518.312012, avg loss: 2.777082, ppl: 16.072050 +epoch: 1, batch: 4219, sum loss: 3643.858887, avg loss: 2.497504, ppl: 12.152130 +epoch: 1, batch: 4220, sum loss: 3799.345215, avg loss: 2.529524, ppl: 12.547533 +epoch: 1, batch: 4221, sum loss: 3740.610596, avg loss: 2.485456, ppl: 12.006591 +epoch: 1, batch: 4222, sum loss: 3893.889648, avg loss: 2.781350, ppl: 16.140791 +epoch: 1, batch: 4223, sum loss: 3796.056885, avg loss: 2.554547, ppl: 12.865471 +epoch: 1, batch: 4224, sum loss: 5614.222656, avg loss: 3.182666, ppl: 24.110949 +epoch: 1, batch: 4225, sum loss: 5114.541016, avg loss: 2.732127, ppl: 15.365530 +epoch: 1, batch: 4226, sum loss: 4473.685547, avg loss: 2.960745, ppl: 19.312351 +epoch: 1, batch: 4227, sum loss: 5544.098633, avg loss: 2.977497, ppl: 19.638592 +epoch: 1, batch: 4228, sum loss: 4323.354004, avg loss: 2.606000, ppl: 13.544763 +epoch: 1, batch: 4229, sum loss: 4758.345215, avg loss: 2.915653, ppl: 18.460859 +epoch: 1, batch: 4230, sum loss: 4643.818359, avg loss: 2.769122, ppl: 15.944634 +epoch: 1, batch: 4231, sum loss: 4707.391602, avg loss: 2.788739, ppl: 16.260506 +epoch: 1, batch: 4232, sum loss: 5555.848145, avg loss: 3.066141, ppl: 21.458941 +epoch: 1, batch: 4233, sum loss: 4238.297852, avg loss: 2.709909, ppl: 15.027911 +epoch: 1, batch: 4234, sum loss: 4161.114258, avg loss: 2.660559, ppl: 14.304282 +epoch: 1, batch: 4235, sum loss: 4593.430664, avg loss: 2.705201, ppl: 14.957318 +epoch: 1, batch: 4236, sum loss: 3556.326660, avg loss: 2.675942, ppl: 14.526026 +epoch: 1, batch: 4237, sum loss: 4525.906738, avg loss: 2.958109, ppl: 19.261517 +epoch: 1, batch: 4238, sum loss: 4395.436035, avg loss: 2.709887, ppl: 15.027571 +epoch: 1, batch: 4239, sum loss: 4149.441895, avg loss: 2.757104, ppl: 15.754159 +epoch: 1, batch: 4240, sum loss: 4904.422363, avg loss: 2.706635, ppl: 14.978784 +epoch: 1, batch: 4241, sum loss: 4334.922852, avg loss: 2.609827, ppl: 13.596699 +epoch: 1, batch: 4242, sum loss: 4258.674805, avg loss: 2.765373, ppl: 15.884968 +epoch: 1, batch: 4243, sum loss: 4441.235352, avg loss: 2.733068, ppl: 15.379996 +epoch: 1, batch: 4244, sum loss: 4431.480957, avg loss: 2.437558, ppl: 11.445060 +epoch: 1, batch: 4245, sum loss: 4269.028809, avg loss: 2.530545, ppl: 12.560347 +epoch: 1, batch: 4246, sum loss: 5357.397949, avg loss: 2.796137, ppl: 16.381241 +epoch: 1, batch: 4247, sum loss: 4776.488281, avg loss: 2.821316, ppl: 16.798948 +epoch: 1, batch: 4248, sum loss: 4639.918457, avg loss: 2.825773, ppl: 16.873976 +epoch: 1, batch: 4249, sum loss: 5863.716309, avg loss: 2.951040, ppl: 19.125830 +epoch: 1, batch: 4250, sum loss: 4070.296143, avg loss: 2.755786, ppl: 15.733409 +epoch: 1, batch: 4251, sum loss: 3799.856934, avg loss: 2.569207, ppl: 13.055464 +epoch: 1, batch: 4252, sum loss: 5132.805176, avg loss: 2.960095, ppl: 19.299809 +epoch: 1, batch: 4253, sum loss: 3498.806641, avg loss: 2.451862, ppl: 11.609942 +epoch: 1, batch: 4254, sum loss: 5147.592773, avg loss: 3.272468, ppl: 26.376364 +epoch: 1, batch: 4255, sum loss: 4597.999023, avg loss: 2.558708, ppl: 12.919121 +epoch: 1, batch: 4256, sum loss: 3737.048096, avg loss: 2.644762, ppl: 14.080088 +epoch: 1, batch: 4257, sum loss: 4714.309082, avg loss: 2.726610, ppl: 15.280999 +epoch: 1, batch: 4258, sum loss: 4999.295898, avg loss: 2.903192, ppl: 18.232243 +epoch: 1, batch: 4259, sum loss: 3685.705322, avg loss: 2.431204, ppl: 11.372567 +epoch: 1, batch: 4260, sum loss: 5236.172363, avg loss: 2.894512, ppl: 18.074682 +epoch: 1, batch: 4261, sum loss: 5002.319824, avg loss: 2.724575, ppl: 15.249932 +epoch: 1, batch: 4262, sum loss: 4362.734375, avg loss: 2.658583, ppl: 14.276041 +epoch: 1, batch: 4263, sum loss: 4181.966797, avg loss: 2.587851, ppl: 13.301154 +epoch: 1, batch: 4264, sum loss: 3738.565430, avg loss: 2.512477, ppl: 12.335443 +epoch: 1, batch: 4265, sum loss: 4232.987793, avg loss: 2.772094, ppl: 15.992090 +epoch: 1, batch: 4266, sum loss: 5233.774902, avg loss: 2.804810, ppl: 16.523930 +epoch: 1, batch: 4267, sum loss: 5451.853516, avg loss: 2.918551, ppl: 18.514444 +epoch: 1, batch: 4268, sum loss: 5031.381836, avg loss: 2.818702, ppl: 16.755079 +epoch: 1, batch: 4269, sum loss: 5258.596680, avg loss: 3.032639, ppl: 20.751930 +epoch: 1, batch: 4270, sum loss: 4711.337402, avg loss: 2.663277, ppl: 14.343221 +epoch: 1, batch: 4271, sum loss: 4949.813965, avg loss: 2.780794, ppl: 16.131830 +epoch: 1, batch: 4272, sum loss: 5541.888672, avg loss: 2.950952, ppl: 19.124157 +epoch: 1, batch: 4273, sum loss: 5197.998535, avg loss: 2.958451, ppl: 19.268103 +epoch: 1, batch: 4274, sum loss: 3835.813477, avg loss: 2.468349, ppl: 11.802938 +epoch: 1, batch: 4275, sum loss: 4118.842285, avg loss: 2.590467, ppl: 13.335995 +epoch: 1, batch: 4276, sum loss: 5225.146484, avg loss: 2.958746, ppl: 19.273796 +epoch: 1, batch: 4277, sum loss: 3981.608398, avg loss: 2.663283, ppl: 14.343303 +epoch: 1, batch: 4278, sum loss: 5668.357422, avg loss: 2.983346, ppl: 19.753807 +epoch: 1, batch: 4279, sum loss: 4333.761230, avg loss: 2.896899, ppl: 18.117878 +epoch: 1, batch: 4280, sum loss: 4218.243164, avg loss: 2.873463, ppl: 17.698195 +epoch: 1, batch: 4281, sum loss: 4705.250977, avg loss: 3.043500, ppl: 20.978539 +epoch: 1, batch: 4282, sum loss: 4938.299316, avg loss: 2.867769, ppl: 17.597723 +epoch: 1, batch: 4283, sum loss: 4582.068359, avg loss: 2.838952, ppl: 17.097839 +epoch: 1, batch: 4284, sum loss: 4777.105957, avg loss: 2.978246, ppl: 19.653309 +epoch: 1, batch: 4285, sum loss: 4607.652832, avg loss: 2.819861, ppl: 16.774517 +epoch: 1, batch: 4286, sum loss: 4631.367676, avg loss: 2.864173, ppl: 17.534550 +epoch: 1, batch: 4287, sum loss: 5003.643066, avg loss: 2.697382, ppl: 14.840823 +epoch: 1, batch: 4288, sum loss: 4423.599609, avg loss: 2.653629, ppl: 14.205498 +epoch: 1, batch: 4289, sum loss: 4956.886719, avg loss: 3.015138, ppl: 20.391903 +epoch: 1, batch: 4290, sum loss: 4529.954590, avg loss: 2.775707, ppl: 16.049978 +epoch: 1, batch: 4291, sum loss: 4483.607422, avg loss: 2.600701, ppl: 13.473174 +epoch: 1, batch: 4292, sum loss: 4536.756348, avg loss: 2.837246, ppl: 17.068693 +epoch: 1, batch: 4293, sum loss: 4347.505371, avg loss: 2.672099, ppl: 14.470312 +epoch: 1, batch: 4294, sum loss: 4984.445312, avg loss: 3.013570, ppl: 20.359962 +epoch: 1, batch: 4295, sum loss: 4571.646484, avg loss: 2.808137, ppl: 16.578999 +epoch: 1, batch: 4296, sum loss: 4666.630371, avg loss: 2.861208, ppl: 17.482637 +epoch: 1, batch: 4297, sum loss: 4499.322266, avg loss: 2.735150, ppl: 15.412057 +epoch: 1, batch: 4298, sum loss: 4604.185547, avg loss: 2.503636, ppl: 12.226866 +epoch: 1, batch: 4299, sum loss: 4798.485352, avg loss: 2.568782, ppl: 13.049924 +epoch: 1, batch: 4300, sum loss: 3973.059082, avg loss: 2.734383, ppl: 15.400244 +epoch: 1, batch: 4301, sum loss: 5011.692383, avg loss: 2.804528, ppl: 16.519285 +epoch: 1, batch: 4302, sum loss: 4162.408203, avg loss: 2.657987, ppl: 14.267545 +epoch: 1, batch: 4303, sum loss: 4882.122070, avg loss: 2.695816, ppl: 14.817602 +epoch: 1, batch: 4304, sum loss: 4859.391602, avg loss: 3.073619, ppl: 21.620001 +epoch: 1, batch: 4305, sum loss: 5574.861328, avg loss: 2.952787, ppl: 19.159269 +epoch: 1, batch: 4306, sum loss: 4686.059570, avg loss: 2.916030, ppl: 18.467819 +epoch: 1, batch: 4307, sum loss: 5064.016113, avg loss: 2.937365, ppl: 18.866060 +epoch: 1, batch: 4308, sum loss: 4773.644043, avg loss: 2.829665, ppl: 16.939779 +epoch: 1, batch: 4309, sum loss: 4737.295898, avg loss: 2.676438, ppl: 14.533238 +epoch: 1, batch: 4310, sum loss: 4791.782227, avg loss: 2.891842, ppl: 18.026482 +epoch: 1, batch: 4311, sum loss: 4329.686035, avg loss: 2.811484, ppl: 16.634592 +epoch: 1, batch: 4312, sum loss: 4277.756836, avg loss: 2.692106, ppl: 14.762737 +epoch: 1, batch: 4313, sum loss: 4019.528320, avg loss: 2.545616, ppl: 12.751085 +epoch: 1, batch: 4314, sum loss: 5201.645996, avg loss: 2.765362, ppl: 15.884789 +epoch: 1, batch: 4315, sum loss: 4833.866699, avg loss: 2.840110, ppl: 17.117641 +epoch: 1, batch: 4316, sum loss: 4915.799805, avg loss: 2.793068, ppl: 16.331045 +epoch: 1, batch: 4317, sum loss: 3915.812744, avg loss: 2.749869, ppl: 15.640577 +epoch: 1, batch: 4318, sum loss: 4781.343750, avg loss: 2.749479, ppl: 15.634485 +epoch: 1, batch: 4319, sum loss: 4871.709473, avg loss: 2.860663, ppl: 17.473110 +epoch: 1, batch: 4320, sum loss: 4403.035156, avg loss: 2.806268, ppl: 16.548054 +epoch: 1, batch: 4321, sum loss: 3973.681641, avg loss: 2.573628, ppl: 13.113316 +epoch: 1, batch: 4322, sum loss: 5004.960449, avg loss: 2.763645, ppl: 15.857537 +epoch: 1, batch: 4323, sum loss: 4553.007812, avg loss: 2.664136, ppl: 14.355536 +epoch: 1, batch: 4324, sum loss: 4454.569336, avg loss: 2.879489, ppl: 17.805172 +epoch: 1, batch: 4325, sum loss: 3785.062012, avg loss: 2.543725, ppl: 12.726985 +epoch: 1, batch: 4326, sum loss: 5022.461426, avg loss: 2.826371, ppl: 16.884077 +epoch: 1, batch: 4327, sum loss: 4313.245117, avg loss: 2.517948, ppl: 12.403121 +epoch: 1, batch: 4328, sum loss: 4044.809082, avg loss: 2.602837, ppl: 13.501993 +epoch: 1, batch: 4329, sum loss: 4308.906738, avg loss: 2.756818, ppl: 15.749649 +epoch: 1, batch: 4330, sum loss: 5669.687500, avg loss: 2.937662, ppl: 18.871675 +epoch: 1, batch: 4331, sum loss: 5294.332031, avg loss: 2.843358, ppl: 17.173334 +epoch: 1, batch: 4332, sum loss: 5558.666504, avg loss: 2.852061, ppl: 17.323444 +epoch: 1, batch: 4333, sum loss: 4454.048828, avg loss: 2.729197, ppl: 15.320577 +epoch: 1, batch: 4334, sum loss: 4319.502930, avg loss: 2.836181, ppl: 17.050520 +epoch: 1, batch: 4335, sum loss: 5115.652832, avg loss: 2.877195, ppl: 17.764376 +epoch: 1, batch: 4336, sum loss: 4794.106934, avg loss: 2.632678, ppl: 13.910977 +epoch: 1, batch: 4337, sum loss: 4666.915039, avg loss: 2.837030, ppl: 17.065010 +epoch: 1, batch: 4338, sum loss: 5281.348633, avg loss: 3.145532, ppl: 23.232035 +epoch: 1, batch: 4339, sum loss: 4015.717285, avg loss: 2.628087, ppl: 13.847259 +epoch: 1, batch: 4340, sum loss: 5382.649414, avg loss: 2.955876, ppl: 19.218544 +epoch: 1, batch: 4341, sum loss: 4266.906738, avg loss: 2.754620, ppl: 15.715073 +epoch: 1, batch: 4342, sum loss: 4976.343262, avg loss: 2.822656, ppl: 16.821476 +epoch: 1, batch: 4343, sum loss: 3890.244873, avg loss: 2.498552, ppl: 12.164862 +epoch: 1, batch: 4344, sum loss: 4535.821289, avg loss: 2.690285, ppl: 14.735882 +epoch: 1, batch: 4345, sum loss: 4808.536621, avg loss: 2.955462, ppl: 19.210596 +epoch: 1, batch: 4346, sum loss: 4573.009766, avg loss: 2.828083, ppl: 16.913004 +epoch: 1, batch: 4347, sum loss: 3863.069824, avg loss: 2.554940, ppl: 12.870533 +epoch: 1, batch: 4348, sum loss: 4725.765137, avg loss: 2.824725, ppl: 16.856312 +epoch: 1, batch: 4349, sum loss: 4896.644043, avg loss: 2.747836, ppl: 15.608824 +epoch: 1, batch: 4350, sum loss: 4976.344238, avg loss: 3.012315, ppl: 20.334421 +epoch: 1, batch: 4351, sum loss: 5012.287109, avg loss: 2.825415, ppl: 16.867950 +epoch: 1, batch: 4352, sum loss: 4762.173828, avg loss: 2.702709, ppl: 14.920102 +epoch: 1, batch: 4353, sum loss: 5699.408691, avg loss: 3.131543, ppl: 22.909311 +epoch: 1, batch: 4354, sum loss: 5077.921387, avg loss: 2.926756, ppl: 18.666975 +epoch: 1, batch: 4355, sum loss: 5306.831055, avg loss: 2.768300, ppl: 15.931528 +epoch: 1, batch: 4356, sum loss: 5795.608398, avg loss: 2.931517, ppl: 18.756056 +epoch: 1, batch: 4357, sum loss: 4401.212402, avg loss: 2.522185, ppl: 12.455778 +epoch: 1, batch: 4358, sum loss: 4449.363281, avg loss: 2.564475, ppl: 12.993830 +epoch: 1, batch: 4359, sum loss: 5471.793945, avg loss: 3.108974, ppl: 22.398046 +epoch: 1, batch: 4360, sum loss: 4858.179688, avg loss: 2.670797, ppl: 14.451481 +epoch: 1, batch: 4361, sum loss: 4946.745605, avg loss: 2.859390, ppl: 17.450888 +epoch: 1, batch: 4362, sum loss: 5033.875000, avg loss: 2.649408, ppl: 14.145663 +epoch: 1, batch: 4363, sum loss: 3454.525146, avg loss: 2.705188, ppl: 14.957129 +epoch: 1, batch: 4364, sum loss: 3523.946777, avg loss: 2.716998, ppl: 15.134821 +epoch: 1, batch: 4365, sum loss: 5231.784180, avg loss: 3.036439, ppl: 20.830931 +epoch: 1, batch: 4366, sum loss: 4750.893555, avg loss: 2.781554, ppl: 16.144081 +epoch: 1, batch: 4367, sum loss: 4262.063965, avg loss: 2.815102, ppl: 16.694876 +epoch: 1, batch: 4368, sum loss: 4877.606445, avg loss: 2.887866, ppl: 17.954962 +epoch: 1, batch: 4369, sum loss: 4889.908203, avg loss: 2.974397, ppl: 19.577808 +epoch: 1, batch: 4370, sum loss: 4071.407715, avg loss: 2.619954, ppl: 13.735086 +epoch: 1, batch: 4371, sum loss: 4607.259277, avg loss: 2.785526, ppl: 16.208334 +epoch: 1, batch: 4372, sum loss: 5131.977539, avg loss: 2.840054, ppl: 17.116686 +epoch: 1, batch: 4373, sum loss: 4447.736328, avg loss: 2.802607, ppl: 16.487568 +epoch: 1, batch: 4374, sum loss: 4857.305664, avg loss: 2.857239, ppl: 17.413378 +epoch: 1, batch: 4375, sum loss: 4579.144043, avg loss: 2.809291, ppl: 16.598145 +epoch: 1, batch: 4376, sum loss: 3995.898926, avg loss: 2.498999, ppl: 12.170301 +epoch: 1, batch: 4377, sum loss: 4049.594238, avg loss: 2.422006, ppl: 11.268445 +epoch: 1, batch: 4378, sum loss: 4952.853027, avg loss: 2.941124, ppl: 18.937124 +epoch: 1, batch: 4379, sum loss: 4764.582031, avg loss: 2.768496, ppl: 15.934654 +epoch: 1, batch: 4380, sum loss: 4296.129883, avg loss: 2.686760, ppl: 14.684029 +epoch: 1, batch: 4381, sum loss: 5089.429688, avg loss: 3.235492, ppl: 25.418886 +epoch: 1, batch: 4382, sum loss: 5041.615234, avg loss: 2.767077, ppl: 15.912062 +epoch: 1, batch: 4383, sum loss: 5307.700195, avg loss: 2.773093, ppl: 16.008070 +epoch: 1, batch: 4384, sum loss: 4033.498779, avg loss: 2.751364, ppl: 15.663976 +epoch: 1, batch: 4385, sum loss: 5564.000000, avg loss: 3.047098, ppl: 21.054150 +epoch: 1, batch: 4386, sum loss: 5395.293945, avg loss: 3.010767, ppl: 20.302961 +epoch: 1, batch: 4387, sum loss: 3751.491943, avg loss: 2.619757, ppl: 13.732388 +epoch: 1, batch: 4388, sum loss: 4038.050537, avg loss: 2.565470, ppl: 13.006776 +epoch: 1, batch: 4389, sum loss: 4023.432617, avg loss: 2.638316, ppl: 13.989631 +epoch: 1, batch: 4390, sum loss: 4867.310059, avg loss: 2.795698, ppl: 16.374052 +epoch: 1, batch: 4391, sum loss: 5313.321777, avg loss: 2.892391, ppl: 18.036383 +epoch: 1, batch: 4392, sum loss: 5103.403320, avg loss: 2.714576, ppl: 15.098210 +epoch: 1, batch: 4393, sum loss: 5099.602051, avg loss: 2.823700, ppl: 16.839039 +epoch: 1, batch: 4394, sum loss: 4784.483398, avg loss: 2.692450, ppl: 14.767810 +epoch: 1, batch: 4395, sum loss: 6149.607422, avg loss: 3.107432, ppl: 22.363533 +epoch: 1, batch: 4396, sum loss: 5185.259277, avg loss: 2.762525, ppl: 15.839786 +epoch: 1, batch: 4397, sum loss: 5253.387207, avg loss: 2.886477, ppl: 17.930021 +epoch: 1, batch: 4398, sum loss: 4264.777344, avg loss: 2.811323, ppl: 16.631910 +epoch: 1, batch: 4399, sum loss: 4157.642090, avg loss: 2.973993, ppl: 19.569908 +epoch: 1, batch: 4400, sum loss: 3607.492432, avg loss: 2.554881, ppl: 12.869772 +epoch: 1, batch: 4401, sum loss: 4725.162109, avg loss: 2.923986, ppl: 18.615349 +epoch: 1, batch: 4402, sum loss: 4095.630615, avg loss: 2.617016, ppl: 13.694801 +epoch: 1, batch: 4403, sum loss: 6000.898926, avg loss: 3.043052, ppl: 20.969147 +epoch: 1, batch: 4404, sum loss: 4941.165039, avg loss: 2.783755, ppl: 16.179659 +epoch: 1, batch: 4405, sum loss: 4235.181641, avg loss: 2.757280, ppl: 15.756924 +epoch: 1, batch: 4406, sum loss: 4014.047607, avg loss: 2.686779, ppl: 14.684299 +epoch: 1, batch: 4407, sum loss: 3527.260010, avg loss: 2.578407, ppl: 13.176126 +epoch: 1, batch: 4408, sum loss: 4743.205078, avg loss: 2.823336, ppl: 16.832918 +epoch: 1, batch: 4409, sum loss: 4098.134766, avg loss: 2.730270, ppl: 15.337023 +epoch: 1, batch: 4410, sum loss: 4738.392578, avg loss: 2.723214, ppl: 15.229193 +epoch: 1, batch: 4411, sum loss: 4297.166504, avg loss: 2.724900, ppl: 15.254881 +epoch: 1, batch: 4412, sum loss: 5011.073730, avg loss: 2.884902, ppl: 17.901804 +epoch: 1, batch: 4413, sum loss: 4325.507812, avg loss: 2.762138, ppl: 15.833653 +epoch: 1, batch: 4414, sum loss: 5197.124023, avg loss: 2.980002, ppl: 19.687859 +epoch: 1, batch: 4415, sum loss: 4828.659180, avg loss: 2.900096, ppl: 18.175884 +epoch: 1, batch: 4416, sum loss: 3953.975342, avg loss: 2.579240, ppl: 13.187117 +epoch: 1, batch: 4417, sum loss: 5101.685059, avg loss: 2.795444, ppl: 16.369892 +epoch: 1, batch: 4418, sum loss: 3813.710938, avg loss: 2.556107, ppl: 12.885550 +epoch: 1, batch: 4419, sum loss: 4584.849609, avg loss: 2.785450, ppl: 16.207102 +epoch: 1, batch: 4420, sum loss: 4458.248535, avg loss: 2.757111, ppl: 15.754264 +epoch: 1, batch: 4421, sum loss: 5063.255859, avg loss: 2.809798, ppl: 16.606564 +epoch: 1, batch: 4422, sum loss: 4798.104492, avg loss: 2.590769, ppl: 13.340027 +epoch: 1, batch: 4423, sum loss: 4024.586914, avg loss: 2.656493, ppl: 14.246243 +epoch: 1, batch: 4424, sum loss: 5520.938965, avg loss: 2.973042, ppl: 19.551300 +epoch: 1, batch: 4425, sum loss: 5008.364258, avg loss: 2.944365, ppl: 18.998587 +epoch: 1, batch: 4426, sum loss: 4683.885742, avg loss: 2.583500, ppl: 13.243414 +epoch: 1, batch: 4427, sum loss: 5150.452148, avg loss: 2.891888, ppl: 18.027311 +epoch: 1, batch: 4428, sum loss: 5005.125488, avg loss: 3.167801, ppl: 23.755186 +epoch: 1, batch: 4429, sum loss: 5178.830078, avg loss: 2.967811, ppl: 19.449295 +epoch: 1, batch: 4430, sum loss: 4262.625000, avg loss: 2.750081, ppl: 15.643896 +epoch: 1, batch: 4431, sum loss: 5299.858887, avg loss: 3.126760, ppl: 22.799999 +epoch: 1, batch: 4432, sum loss: 4882.840820, avg loss: 2.601407, ppl: 13.482696 +epoch: 1, batch: 4433, sum loss: 4379.962891, avg loss: 2.756427, ppl: 15.743495 +epoch: 1, batch: 4434, sum loss: 4801.436523, avg loss: 2.801305, ppl: 16.466118 +epoch: 1, batch: 4435, sum loss: 4533.541504, avg loss: 2.926754, ppl: 18.666935 +epoch: 1, batch: 4436, sum loss: 4619.816406, avg loss: 2.730388, ppl: 15.338837 +epoch: 1, batch: 4437, sum loss: 5152.113770, avg loss: 2.957585, ppl: 19.251431 +epoch: 1, batch: 4438, sum loss: 3722.373291, avg loss: 2.614026, ppl: 13.653915 +epoch: 1, batch: 4439, sum loss: 4671.681641, avg loss: 2.666485, ppl: 14.389303 +epoch: 1, batch: 4440, sum loss: 4820.193848, avg loss: 2.877728, ppl: 17.773840 +epoch: 1, batch: 4441, sum loss: 4607.561035, avg loss: 2.959256, ppl: 19.283613 +epoch: 1, batch: 4442, sum loss: 4592.290039, avg loss: 2.879179, ppl: 17.799650 +epoch: 1, batch: 4443, sum loss: 4585.368164, avg loss: 2.653569, ppl: 14.204651 +epoch: 1, batch: 4444, sum loss: 5235.410156, avg loss: 2.865577, ppl: 17.559191 +epoch: 1, batch: 4445, sum loss: 4672.003906, avg loss: 2.713126, ppl: 15.076337 +epoch: 1, batch: 4446, sum loss: 4908.911621, avg loss: 2.913301, ppl: 18.417490 +epoch: 1, batch: 4447, sum loss: 5882.302734, avg loss: 2.791790, ppl: 16.310196 +epoch: 1, batch: 4448, sum loss: 4619.409668, avg loss: 2.849728, ppl: 17.283087 +epoch: 1, batch: 4449, sum loss: 4311.583008, avg loss: 2.527305, ppl: 12.519725 +epoch: 1, batch: 4450, sum loss: 5499.490723, avg loss: 2.846527, ppl: 17.227852 +epoch: 1, batch: 4451, sum loss: 6343.597168, avg loss: 3.103521, ppl: 22.276251 +epoch: 1, batch: 4452, sum loss: 4640.009766, avg loss: 2.678989, ppl: 14.570361 +epoch: 1, batch: 4453, sum loss: 4081.341309, avg loss: 2.579862, ppl: 13.195312 +epoch: 1, batch: 4454, sum loss: 3782.459961, avg loss: 2.734967, ppl: 15.409243 +epoch: 1, batch: 4455, sum loss: 5051.606445, avg loss: 3.085893, ppl: 21.887001 +epoch: 1, batch: 4456, sum loss: 3952.136475, avg loss: 2.612119, ppl: 13.627904 +epoch: 1, batch: 4457, sum loss: 5322.766113, avg loss: 2.981942, ppl: 19.726086 +epoch: 1, batch: 4458, sum loss: 4854.362793, avg loss: 2.835492, ppl: 17.038784 +epoch: 1, batch: 4459, sum loss: 4500.405273, avg loss: 2.841165, ppl: 17.135714 +epoch: 1, batch: 4460, sum loss: 4587.283691, avg loss: 2.790318, ppl: 16.286205 +epoch: 1, batch: 4461, sum loss: 5306.800293, avg loss: 2.974664, ppl: 19.583042 +epoch: 1, batch: 4462, sum loss: 4608.991699, avg loss: 2.730445, ppl: 15.339718 +epoch: 1, batch: 4463, sum loss: 4731.256836, avg loss: 2.758750, ppl: 15.780108 +epoch: 1, batch: 4464, sum loss: 4281.117676, avg loss: 2.516824, ppl: 12.389186 +epoch: 1, batch: 4465, sum loss: 5185.171387, avg loss: 2.733353, ppl: 15.384390 +epoch: 1, batch: 4466, sum loss: 3772.242432, avg loss: 2.496521, ppl: 12.140179 +epoch: 1, batch: 4467, sum loss: 4138.293945, avg loss: 2.649356, ppl: 14.144925 +epoch: 1, batch: 4468, sum loss: 4167.275391, avg loss: 2.707781, ppl: 14.995967 +epoch: 1, batch: 4469, sum loss: 5296.712891, avg loss: 2.891219, ppl: 18.015255 +epoch: 1, batch: 4470, sum loss: 3638.457031, avg loss: 2.517963, ppl: 12.403311 +epoch: 1, batch: 4471, sum loss: 4704.404785, avg loss: 2.782025, ppl: 16.151701 +epoch: 1, batch: 4472, sum loss: 5267.419922, avg loss: 3.219694, ppl: 25.020472 +epoch: 1, batch: 4473, sum loss: 4053.786133, avg loss: 2.560825, ppl: 12.946492 +epoch: 1, batch: 4474, sum loss: 5333.298340, avg loss: 2.842910, ppl: 17.165638 +epoch: 1, batch: 4475, sum loss: 4156.055664, avg loss: 2.691746, ppl: 14.757420 +epoch: 1, batch: 4476, sum loss: 4109.832031, avg loss: 2.725353, ppl: 15.261797 +epoch: 1, batch: 4477, sum loss: 4424.120117, avg loss: 2.695990, ppl: 14.820189 +epoch: 1, batch: 4478, sum loss: 5227.936523, avg loss: 2.946976, ppl: 19.048273 +epoch: 1, batch: 4479, sum loss: 4606.142090, avg loss: 2.736864, ppl: 15.438492 +epoch: 1, batch: 4480, sum loss: 4033.440918, avg loss: 2.721620, ppl: 15.204936 +epoch: 1, batch: 4481, sum loss: 4829.869141, avg loss: 2.834430, ppl: 17.020699 +epoch: 1, batch: 4482, sum loss: 5214.753906, avg loss: 3.107720, ppl: 22.369980 +epoch: 1, batch: 4483, sum loss: 5340.607910, avg loss: 2.828712, ppl: 16.923645 +epoch: 1, batch: 4484, sum loss: 3854.016113, avg loss: 2.597046, ppl: 13.424024 +epoch: 1, batch: 4485, sum loss: 5364.483398, avg loss: 2.888790, ppl: 17.971554 +epoch: 1, batch: 4486, sum loss: 4472.834961, avg loss: 2.555906, ppl: 12.882961 +epoch: 1, batch: 4487, sum loss: 4689.230469, avg loss: 2.818047, ppl: 16.744122 +epoch: 1, batch: 4488, sum loss: 5635.427734, avg loss: 2.980131, ppl: 19.690393 +epoch: 1, batch: 4489, sum loss: 3993.121582, avg loss: 2.897766, ppl: 18.133593 +epoch: 1, batch: 4490, sum loss: 4393.066895, avg loss: 2.621162, ppl: 13.751690 +epoch: 1, batch: 4491, sum loss: 3937.049561, avg loss: 2.509273, ppl: 12.295988 +epoch: 1, batch: 4492, sum loss: 4798.260742, avg loss: 2.797820, ppl: 16.408831 +epoch: 1, batch: 4493, sum loss: 4287.015625, avg loss: 2.676040, ppl: 14.527446 +epoch: 1, batch: 4494, sum loss: 5048.753418, avg loss: 2.847577, ppl: 17.245941 +epoch: 1, batch: 4495, sum loss: 5236.983398, avg loss: 2.927324, ppl: 18.677588 +epoch: 1, batch: 4496, sum loss: 3922.951660, avg loss: 2.712968, ppl: 15.073951 +epoch: 1, batch: 4497, sum loss: 5077.501953, avg loss: 2.773076, ppl: 16.007799 +epoch: 1, batch: 4498, sum loss: 4861.878906, avg loss: 2.674301, ppl: 14.502208 +epoch: 1, batch: 4499, sum loss: 4881.011230, avg loss: 2.871183, ppl: 17.657898 +epoch: 1, batch: 4500, sum loss: 4389.653320, avg loss: 2.766007, ppl: 15.895041 +epoch: 1, batch: 4501, sum loss: 5134.857422, avg loss: 2.659170, ppl: 14.284431 +epoch: 1, batch: 4502, sum loss: 5406.791016, avg loss: 3.042651, ppl: 20.960741 +epoch: 1, batch: 4503, sum loss: 3915.187744, avg loss: 2.580875, ppl: 13.208693 +epoch: 1, batch: 4504, sum loss: 4388.571777, avg loss: 2.584553, ppl: 13.257368 +epoch: 1, batch: 4505, sum loss: 4434.388672, avg loss: 2.692404, ppl: 14.767127 +epoch: 1, batch: 4506, sum loss: 5233.405273, avg loss: 2.732849, ppl: 15.376627 +epoch: 1, batch: 4507, sum loss: 4152.156250, avg loss: 2.659934, ppl: 14.295340 +epoch: 1, batch: 4508, sum loss: 3763.998047, avg loss: 2.729513, ppl: 15.325417 +epoch: 1, batch: 4509, sum loss: 4136.550781, avg loss: 2.696578, ppl: 14.828900 +epoch: 1, batch: 4510, sum loss: 6197.082520, avg loss: 2.890430, ppl: 18.001057 +epoch: 1, batch: 4511, sum loss: 4285.739258, avg loss: 2.558650, ppl: 12.918372 +epoch: 1, batch: 4512, sum loss: 4682.016602, avg loss: 2.757371, ppl: 15.758367 +epoch: 1, batch: 4513, sum loss: 5114.157227, avg loss: 3.084534, ppl: 21.857277 +epoch: 1, batch: 4514, sum loss: 4150.613770, avg loss: 2.453081, ppl: 11.624110 +epoch: 1, batch: 4515, sum loss: 4418.686523, avg loss: 2.666679, ppl: 14.392089 +epoch: 1, batch: 4516, sum loss: 4048.257812, avg loss: 2.386945, ppl: 10.880199 +epoch: 1, batch: 4517, sum loss: 4653.023926, avg loss: 2.769657, ppl: 15.953163 +epoch: 1, batch: 4518, sum loss: 5345.121582, avg loss: 3.129462, ppl: 22.861683 +epoch: 1, batch: 4519, sum loss: 5004.138672, avg loss: 2.660361, ppl: 14.301448 +epoch: 1, batch: 4520, sum loss: 4659.289551, avg loss: 2.861971, ppl: 17.495985 +epoch: 1, batch: 4521, sum loss: 5833.777344, avg loss: 2.993216, ppl: 19.949734 +epoch: 1, batch: 4522, sum loss: 4783.321289, avg loss: 2.684243, ppl: 14.647112 +epoch: 1, batch: 4523, sum loss: 5969.793945, avg loss: 2.871474, ppl: 17.663031 +epoch: 1, batch: 4524, sum loss: 4044.475586, avg loss: 2.433499, ppl: 11.398698 +epoch: 1, batch: 4525, sum loss: 4743.550293, avg loss: 2.823542, ppl: 16.836378 +epoch: 1, batch: 4526, sum loss: 4873.923828, avg loss: 2.809178, ppl: 16.596272 +epoch: 1, batch: 4527, sum loss: 4080.496094, avg loss: 2.423098, ppl: 11.280748 +epoch: 1, batch: 4528, sum loss: 4079.251221, avg loss: 2.683718, ppl: 14.639421 +epoch: 1, batch: 4529, sum loss: 5382.355469, avg loss: 2.828353, ppl: 16.917574 +epoch: 1, batch: 4530, sum loss: 4286.012695, avg loss: 2.583492, ppl: 13.243298 +epoch: 1, batch: 4531, sum loss: 4447.421387, avg loss: 2.637854, ppl: 13.983162 +epoch: 1, batch: 4532, sum loss: 4664.131836, avg loss: 2.702278, ppl: 14.913665 +epoch: 1, batch: 4533, sum loss: 5511.373535, avg loss: 3.124362, ppl: 22.745367 +epoch: 1, batch: 4534, sum loss: 4942.989258, avg loss: 2.756826, ppl: 15.749776 +epoch: 1, batch: 4535, sum loss: 4656.822266, avg loss: 2.630973, ppl: 13.887277 +epoch: 1, batch: 4536, sum loss: 5165.721680, avg loss: 2.816642, ppl: 16.720610 +epoch: 1, batch: 4537, sum loss: 4795.503906, avg loss: 2.934825, ppl: 18.818209 +epoch: 1, batch: 4538, sum loss: 4785.213379, avg loss: 2.800008, ppl: 16.444775 +epoch: 1, batch: 4539, sum loss: 4671.817871, avg loss: 2.894559, ppl: 18.075527 +epoch: 1, batch: 4540, sum loss: 5345.522949, avg loss: 2.906755, ppl: 18.297335 +epoch: 1, batch: 4541, sum loss: 4960.296875, avg loss: 2.701687, ppl: 14.904849 +epoch: 1, batch: 4542, sum loss: 5944.607910, avg loss: 3.064231, ppl: 21.417984 +epoch: 1, batch: 4543, sum loss: 4560.376465, avg loss: 2.696852, ppl: 14.832964 +epoch: 1, batch: 4544, sum loss: 5326.430664, avg loss: 2.913802, ppl: 18.426731 +epoch: 1, batch: 4545, sum loss: 4953.051758, avg loss: 2.793600, ppl: 16.339731 +epoch: 1, batch: 4546, sum loss: 3857.765381, avg loss: 2.787403, ppl: 16.238787 +epoch: 1, batch: 4547, sum loss: 4480.786133, avg loss: 2.843138, ppl: 17.169567 +epoch: 1, batch: 4548, sum loss: 4805.712891, avg loss: 2.818600, ppl: 16.753378 +epoch: 1, batch: 4549, sum loss: 5096.790039, avg loss: 2.856945, ppl: 17.408268 +epoch: 1, batch: 4550, sum loss: 5057.661133, avg loss: 2.893399, ppl: 18.054573 +epoch: 1, batch: 4551, sum loss: 4489.933594, avg loss: 2.708042, ppl: 14.999876 +epoch: 1, batch: 4552, sum loss: 4067.537109, avg loss: 2.693733, ppl: 14.786776 +epoch: 1, batch: 4553, sum loss: 4571.625488, avg loss: 2.763982, ppl: 15.862877 +epoch: 1, batch: 4554, sum loss: 5235.799805, avg loss: 2.718484, ppl: 15.157325 +epoch: 1, batch: 4555, sum loss: 4481.562500, avg loss: 2.439610, ppl: 11.468562 +epoch: 1, batch: 4556, sum loss: 4423.619141, avg loss: 2.558484, ppl: 12.916225 +epoch: 1, batch: 4557, sum loss: 4628.959473, avg loss: 2.831168, ppl: 16.965260 +epoch: 1, batch: 4558, sum loss: 4370.642090, avg loss: 2.536647, ppl: 12.637222 +epoch: 1, batch: 4559, sum loss: 5107.686523, avg loss: 3.000991, ppl: 20.105450 +epoch: 1, batch: 4560, sum loss: 3975.271484, avg loss: 2.720925, ppl: 15.194372 +epoch: 1, batch: 4561, sum loss: 3807.838379, avg loss: 2.490411, ppl: 12.066235 +epoch: 1, batch: 4562, sum loss: 4682.693848, avg loss: 2.688113, ppl: 14.703910 +epoch: 1, batch: 4563, sum loss: 4862.367676, avg loss: 2.872042, ppl: 17.673077 +epoch: 1, batch: 4564, sum loss: 4732.122559, avg loss: 2.664484, ppl: 14.360531 +epoch: 1, batch: 4565, sum loss: 4715.191895, avg loss: 2.808333, ppl: 16.582258 +epoch: 1, batch: 4566, sum loss: 4064.699463, avg loss: 2.466444, ppl: 11.780478 +epoch: 1, batch: 4567, sum loss: 5035.601562, avg loss: 2.701503, ppl: 14.902113 +epoch: 1, batch: 4568, sum loss: 4241.323242, avg loss: 2.901042, ppl: 18.193092 +epoch: 1, batch: 4569, sum loss: 5176.442383, avg loss: 2.734518, ppl: 15.402315 +epoch: 1, batch: 4570, sum loss: 4808.418945, avg loss: 2.742966, ppl: 15.532987 +epoch: 1, batch: 4571, sum loss: 5089.972168, avg loss: 2.779887, ppl: 16.117195 +epoch: 1, batch: 4572, sum loss: 5179.565430, avg loss: 2.958061, ppl: 19.260597 +epoch: 1, batch: 4573, sum loss: 4930.956543, avg loss: 3.032568, ppl: 20.750462 +epoch: 1, batch: 4574, sum loss: 5571.327148, avg loss: 2.932278, ppl: 18.770334 +epoch: 1, batch: 4575, sum loss: 4569.598633, avg loss: 2.855999, ppl: 17.391804 +epoch: 1, batch: 4576, sum loss: 5770.048828, avg loss: 2.825685, ppl: 16.872499 +epoch: 1, batch: 4577, sum loss: 4745.026367, avg loss: 2.882762, ppl: 17.863548 +epoch: 1, batch: 4578, sum loss: 5887.129395, avg loss: 3.037735, ppl: 20.857937 +epoch: 1, batch: 4579, sum loss: 5629.698730, avg loss: 3.101763, ppl: 22.237110 +epoch: 1, batch: 4580, sum loss: 4303.624512, avg loss: 2.758734, ppl: 15.779849 +epoch: 1, batch: 4581, sum loss: 4409.562012, avg loss: 2.783814, ppl: 16.180624 +epoch: 1, batch: 4582, sum loss: 4298.315918, avg loss: 2.705045, ppl: 14.954983 +epoch: 1, batch: 4583, sum loss: 3823.794678, avg loss: 2.561148, ppl: 12.950682 +epoch: 1, batch: 4584, sum loss: 4272.996582, avg loss: 2.682358, ppl: 14.619530 +epoch: 1, batch: 4585, sum loss: 4719.464355, avg loss: 2.804197, ppl: 16.513815 +epoch: 1, batch: 4586, sum loss: 4519.947266, avg loss: 2.849904, ppl: 17.286116 +epoch: 1, batch: 4587, sum loss: 4429.796875, avg loss: 2.639927, ppl: 14.012177 +epoch: 1, batch: 4588, sum loss: 4204.269043, avg loss: 2.534219, ppl: 12.606579 +epoch: 1, batch: 4589, sum loss: 5069.443359, avg loss: 2.749156, ppl: 15.629432 +epoch: 1, batch: 4590, sum loss: 4555.271484, avg loss: 2.543423, ppl: 12.723153 +epoch: 1, batch: 4591, sum loss: 4318.890137, avg loss: 2.717993, ppl: 15.149879 +epoch: 1, batch: 4592, sum loss: 4355.500488, avg loss: 2.577219, ppl: 13.160491 +epoch: 1, batch: 4593, sum loss: 4438.247070, avg loss: 2.574389, ppl: 13.123302 +epoch: 1, batch: 4594, sum loss: 6028.526855, avg loss: 3.102690, ppl: 22.257750 +epoch: 1, batch: 4595, sum loss: 4975.815430, avg loss: 2.920079, ppl: 18.542761 +epoch: 1, batch: 4596, sum loss: 4770.364258, avg loss: 2.722811, ppl: 15.223050 +epoch: 1, batch: 4597, sum loss: 4692.578125, avg loss: 2.837109, ppl: 17.066357 +epoch: 1, batch: 4598, sum loss: 4996.208984, avg loss: 3.065159, ppl: 21.437866 +epoch: 1, batch: 4599, sum loss: 3851.446289, avg loss: 2.547253, ppl: 12.771970 +epoch: 1, batch: 4600, sum loss: 4600.217285, avg loss: 2.728480, ppl: 15.309600 +epoch: 1, batch: 4601, sum loss: 3809.812012, avg loss: 2.694351, ppl: 14.795909 +epoch: 1, batch: 4602, sum loss: 3960.651367, avg loss: 2.833084, ppl: 16.997803 +epoch: 1, batch: 4603, sum loss: 3298.296875, avg loss: 2.470634, ppl: 11.829950 +epoch: 1, batch: 4604, sum loss: 5186.824707, avg loss: 2.809764, ppl: 16.606001 +epoch: 1, batch: 4605, sum loss: 5397.979492, avg loss: 3.007231, ppl: 20.231302 +epoch: 1, batch: 4606, sum loss: 4800.569824, avg loss: 2.698465, ppl: 14.856914 +epoch: 1, batch: 4607, sum loss: 5309.000000, avg loss: 2.839037, ppl: 17.099298 +epoch: 1, batch: 4608, sum loss: 4418.784180, avg loss: 2.724281, ppl: 15.245450 +epoch: 1, batch: 4609, sum loss: 5060.133301, avg loss: 2.913145, ppl: 18.414627 +epoch: 1, batch: 4610, sum loss: 4643.134766, avg loss: 2.820860, ppl: 16.791279 +epoch: 1, batch: 4611, sum loss: 5249.812012, avg loss: 2.655444, ppl: 14.231298 +epoch: 1, batch: 4612, sum loss: 4816.878418, avg loss: 2.944302, ppl: 18.997404 +epoch: 1, batch: 4613, sum loss: 4982.943359, avg loss: 2.617092, ppl: 13.695840 +epoch: 1, batch: 4614, sum loss: 4809.020996, avg loss: 3.035998, ppl: 20.821751 +epoch: 1, batch: 4615, sum loss: 4355.151855, avg loss: 2.622006, ppl: 13.763305 +epoch: 1, batch: 4616, sum loss: 5385.290527, avg loss: 2.695341, ppl: 14.810563 +epoch: 1, batch: 4617, sum loss: 3746.049316, avg loss: 2.617784, ppl: 13.705322 +epoch: 1, batch: 4618, sum loss: 4268.932129, avg loss: 3.060167, ppl: 21.331110 +epoch: 1, batch: 4619, sum loss: 5462.215332, avg loss: 2.883958, ppl: 17.884914 +epoch: 1, batch: 4620, sum loss: 3220.730225, avg loss: 2.350898, ppl: 10.494990 +epoch: 1, batch: 4621, sum loss: 4208.930664, avg loss: 2.593303, ppl: 13.373872 +epoch: 1, batch: 4622, sum loss: 5523.119629, avg loss: 2.936268, ppl: 18.845377 +epoch: 1, batch: 4623, sum loss: 4464.619629, avg loss: 2.945000, ppl: 19.010660 +epoch: 1, batch: 4624, sum loss: 4289.168457, avg loss: 2.560698, ppl: 12.944847 +epoch: 1, batch: 4625, sum loss: 4198.991699, avg loss: 2.622731, ppl: 13.773283 +epoch: 1, batch: 4626, sum loss: 4678.714844, avg loss: 2.815111, ppl: 16.695036 +epoch: 1, batch: 4627, sum loss: 4549.112793, avg loss: 2.674376, ppl: 14.503290 +epoch: 1, batch: 4628, sum loss: 4195.819824, avg loss: 2.487149, ppl: 12.026936 +epoch: 1, batch: 4629, sum loss: 5022.809570, avg loss: 2.677404, ppl: 14.547278 +epoch: 1, batch: 4630, sum loss: 3826.900391, avg loss: 2.860165, ppl: 17.464401 +epoch: 1, batch: 4631, sum loss: 5484.803711, avg loss: 2.880674, ppl: 17.826290 +epoch: 1, batch: 4632, sum loss: 5574.196289, avg loss: 3.034402, ppl: 20.788546 +epoch: 1, batch: 4633, sum loss: 4871.357910, avg loss: 2.660491, ppl: 14.303304 +epoch: 1, batch: 4634, sum loss: 4809.244141, avg loss: 2.835639, ppl: 17.041290 +epoch: 1, batch: 4635, sum loss: 5109.625000, avg loss: 2.783020, ppl: 16.167774 +epoch: 1, batch: 4636, sum loss: 4107.715332, avg loss: 2.723949, ppl: 15.240391 +epoch: 1, batch: 4637, sum loss: 5098.018066, avg loss: 2.867277, ppl: 17.589056 +epoch: 1, batch: 4638, sum loss: 4326.722656, avg loss: 2.782459, ppl: 16.158699 +epoch: 1, batch: 4639, sum loss: 4349.984863, avg loss: 2.884605, ppl: 17.896503 +epoch: 1, batch: 4640, sum loss: 5281.681641, avg loss: 2.797501, ppl: 16.403601 +epoch: 1, batch: 4641, sum loss: 4574.721680, avg loss: 2.878994, ppl: 17.796366 +epoch: 1, batch: 4642, sum loss: 4366.158691, avg loss: 2.804212, ppl: 16.514063 +epoch: 1, batch: 4643, sum loss: 4935.228516, avg loss: 2.841237, ppl: 17.136951 +epoch: 1, batch: 4644, sum loss: 5069.546875, avg loss: 3.068733, ppl: 21.514631 +epoch: 1, batch: 4645, sum loss: 4443.969238, avg loss: 2.789686, ppl: 16.275904 +epoch: 1, batch: 4646, sum loss: 5098.499023, avg loss: 2.861111, ppl: 17.480932 +epoch: 1, batch: 4647, sum loss: 4122.134277, avg loss: 2.657727, ppl: 14.263827 +epoch: 1, batch: 4648, sum loss: 4731.704590, avg loss: 2.845282, ppl: 17.206415 +epoch: 1, batch: 4649, sum loss: 4350.982422, avg loss: 2.532586, ppl: 12.586010 +epoch: 1, batch: 4650, sum loss: 4870.801758, avg loss: 2.920145, ppl: 18.543978 +epoch: 1, batch: 4651, sum loss: 4661.138672, avg loss: 2.880803, ppl: 17.828577 +epoch: 1, batch: 4652, sum loss: 3926.620361, avg loss: 2.598690, ppl: 13.446110 +epoch: 1, batch: 4653, sum loss: 5366.505859, avg loss: 3.070084, ppl: 21.543705 +epoch: 1, batch: 4654, sum loss: 3988.875977, avg loss: 2.624261, ppl: 13.794372 +epoch: 1, batch: 4655, sum loss: 4061.211670, avg loss: 2.541434, ppl: 12.697867 +epoch: 1, batch: 4656, sum loss: 4118.595703, avg loss: 2.548636, ppl: 12.789646 +epoch: 1, batch: 4657, sum loss: 4787.387695, avg loss: 2.673025, ppl: 14.483715 +epoch: 1, batch: 4658, sum loss: 4755.902832, avg loss: 2.835959, ppl: 17.046736 +epoch: 1, batch: 4659, sum loss: 4035.432861, avg loss: 2.693880, ppl: 14.788951 +epoch: 1, batch: 4660, sum loss: 4569.490234, avg loss: 2.879326, ppl: 17.802273 +epoch: 1, batch: 4661, sum loss: 3716.361816, avg loss: 2.424241, ppl: 11.293657 +epoch: 1, batch: 4662, sum loss: 4583.789062, avg loss: 2.845307, ppl: 17.206839 +epoch: 1, batch: 4663, sum loss: 4490.294922, avg loss: 2.811706, ppl: 16.638285 +epoch: 1, batch: 4664, sum loss: 4825.567383, avg loss: 2.597184, ppl: 13.425874 +epoch: 1, batch: 4665, sum loss: 5028.073242, avg loss: 2.909765, ppl: 18.352476 +epoch: 1, batch: 4666, sum loss: 5332.499023, avg loss: 2.894951, ppl: 18.082609 +epoch: 1, batch: 4667, sum loss: 5346.359863, avg loss: 2.957057, ppl: 19.241270 +epoch: 1, batch: 4668, sum loss: 5166.037109, avg loss: 2.832257, ppl: 16.983755 +epoch: 1, batch: 4669, sum loss: 3951.862061, avg loss: 2.855392, ppl: 17.381241 +epoch: 1, batch: 4670, sum loss: 4562.905762, avg loss: 2.740484, ppl: 15.494483 +epoch: 1, batch: 4671, sum loss: 4018.948975, avg loss: 2.642307, ppl: 14.045570 +epoch: 1, batch: 4672, sum loss: 5052.641602, avg loss: 2.757992, ppl: 15.768153 +epoch: 1, batch: 4673, sum loss: 5751.461426, avg loss: 2.860001, ppl: 17.461542 +epoch: 1, batch: 4674, sum loss: 5274.412109, avg loss: 3.040007, ppl: 20.905392 +epoch: 1, batch: 4675, sum loss: 4985.314453, avg loss: 2.832565, ppl: 16.988979 +epoch: 1, batch: 4676, sum loss: 4455.649902, avg loss: 2.666457, ppl: 14.388905 +epoch: 1, batch: 4677, sum loss: 4831.292480, avg loss: 2.808891, ppl: 16.591509 +epoch: 1, batch: 4678, sum loss: 4450.233398, avg loss: 2.895402, ppl: 18.090780 +epoch: 1, batch: 4679, sum loss: 5006.728027, avg loss: 2.582119, ppl: 13.225129 +epoch: 1, batch: 4680, sum loss: 4714.017090, avg loss: 2.777853, ppl: 16.084459 +epoch: 1, batch: 4681, sum loss: 3396.166016, avg loss: 2.313465, ppl: 10.109389 +epoch: 1, batch: 4682, sum loss: 4563.723633, avg loss: 2.734406, ppl: 15.400597 +epoch: 1, batch: 4683, sum loss: 4128.013672, avg loss: 2.464486, ppl: 11.757436 +epoch: 1, batch: 4684, sum loss: 4588.441895, avg loss: 2.973715, ppl: 19.564463 +epoch: 1, batch: 4685, sum loss: 5118.625000, avg loss: 2.664563, ppl: 14.361671 +epoch: 1, batch: 4686, sum loss: 5005.370117, avg loss: 3.033558, ppl: 20.770998 +epoch: 1, batch: 4687, sum loss: 3882.513184, avg loss: 2.598737, ppl: 13.446744 +epoch: 1, batch: 4688, sum loss: 5066.842285, avg loss: 2.708093, ppl: 15.000645 +epoch: 1, batch: 4689, sum loss: 4457.372070, avg loss: 2.653198, ppl: 14.199372 +epoch: 1, batch: 4690, sum loss: 4642.026367, avg loss: 2.964257, ppl: 19.380293 +epoch: 1, batch: 4691, sum loss: 4477.579590, avg loss: 2.606275, ppl: 13.548483 +epoch: 1, batch: 4692, sum loss: 5247.999512, avg loss: 3.116390, ppl: 22.564762 +epoch: 1, batch: 4693, sum loss: 5672.380859, avg loss: 2.999673, ppl: 20.078962 +epoch: 1, batch: 4694, sum loss: 4463.242188, avg loss: 2.658274, ppl: 14.271634 +epoch: 1, batch: 4695, sum loss: 5411.325195, avg loss: 3.138820, ppl: 23.076618 +epoch: 1, batch: 4696, sum loss: 4579.100098, avg loss: 2.903678, ppl: 18.241112 +epoch: 1, batch: 4697, sum loss: 4461.200195, avg loss: 2.726895, ppl: 15.285354 +epoch: 1, batch: 4698, sum loss: 4087.502930, avg loss: 2.703375, ppl: 14.930037 +epoch: 1, batch: 4699, sum loss: 4811.541016, avg loss: 2.864013, ppl: 17.531736 +epoch: 1, batch: 4700, sum loss: 5300.585938, avg loss: 2.725237, ppl: 15.260032 +epoch: 1, batch: 4701, sum loss: 5317.726562, avg loss: 2.752446, ppl: 15.680947 +epoch: 1, batch: 4702, sum loss: 3405.696045, avg loss: 2.541564, ppl: 12.699520 +epoch: 1, batch: 4703, sum loss: 6423.929199, avg loss: 3.098856, ppl: 22.172581 +epoch: 1, batch: 4704, sum loss: 4669.171387, avg loss: 2.795911, ppl: 16.377542 +epoch: 1, batch: 4705, sum loss: 3850.953125, avg loss: 2.339583, ppl: 10.376905 +epoch: 1, batch: 4706, sum loss: 5383.416992, avg loss: 2.732699, ppl: 15.374329 +epoch: 1, batch: 4707, sum loss: 3758.214111, avg loss: 2.401415, ppl: 11.038781 +epoch: 1, batch: 4708, sum loss: 4187.856445, avg loss: 2.623970, ppl: 13.790366 +epoch: 1, batch: 4709, sum loss: 5320.818848, avg loss: 2.947822, ppl: 19.064388 +epoch: 1, batch: 4710, sum loss: 4570.840332, avg loss: 2.723981, ppl: 15.240878 +epoch: 1, batch: 4711, sum loss: 3824.886719, avg loss: 2.518029, ppl: 12.404130 +epoch: 1, batch: 4712, sum loss: 4592.198242, avg loss: 2.786528, ppl: 16.224592 +epoch: 1, batch: 4713, sum loss: 4771.956543, avg loss: 2.931177, ppl: 18.749689 +epoch: 1, batch: 4714, sum loss: 5053.659180, avg loss: 2.733185, ppl: 15.381801 +epoch: 1, batch: 4715, sum loss: 4953.793945, avg loss: 2.709953, ppl: 15.028570 +epoch: 1, batch: 4716, sum loss: 4927.484863, avg loss: 3.008232, ppl: 20.251570 +epoch: 1, batch: 4717, sum loss: 4558.684082, avg loss: 2.565382, ppl: 13.005626 +epoch: 1, batch: 4718, sum loss: 4833.749023, avg loss: 2.835043, ppl: 17.031139 +epoch: 1, batch: 4719, sum loss: 5380.827637, avg loss: 3.078277, ppl: 21.720942 +epoch: 1, batch: 4720, sum loss: 4801.004883, avg loss: 2.630688, ppl: 13.883314 +epoch: 1, batch: 4721, sum loss: 3982.734619, avg loss: 2.664037, ppl: 14.354113 +epoch: 1, batch: 4722, sum loss: 4220.718750, avg loss: 2.749654, ppl: 15.637218 +epoch: 1, batch: 4723, sum loss: 4478.485352, avg loss: 2.686554, ppl: 14.680994 +epoch: 1, batch: 4724, sum loss: 4656.285156, avg loss: 2.798248, ppl: 16.415865 +epoch: 1, batch: 4725, sum loss: 4551.831055, avg loss: 2.698181, ppl: 14.852689 +epoch: 1, batch: 4726, sum loss: 4678.404785, avg loss: 2.918531, ppl: 18.514065 +epoch: 1, batch: 4727, sum loss: 4687.583008, avg loss: 2.711153, ppl: 15.046611 +epoch: 1, batch: 4728, sum loss: 4974.795898, avg loss: 2.766850, ppl: 15.908440 +epoch: 1, batch: 4729, sum loss: 4357.235352, avg loss: 2.650386, ppl: 14.159508 +epoch: 1, batch: 4730, sum loss: 5569.362305, avg loss: 2.979862, ppl: 19.685099 +epoch: 1, batch: 4731, sum loss: 5327.733398, avg loss: 2.911330, ppl: 18.381226 +epoch: 1, batch: 4732, sum loss: 4546.645996, avg loss: 2.881271, ppl: 17.836931 +epoch: 1, batch: 4733, sum loss: 4108.276855, avg loss: 2.460046, ppl: 11.705351 +epoch: 1, batch: 4734, sum loss: 5264.314941, avg loss: 2.952504, ppl: 19.153862 +epoch: 1, batch: 4735, sum loss: 5167.103027, avg loss: 2.745538, ppl: 15.572993 +epoch: 1, batch: 4736, sum loss: 4837.216797, avg loss: 2.735982, ppl: 15.424890 +epoch: 1, batch: 4737, sum loss: 4494.595703, avg loss: 2.636127, ppl: 13.959028 +epoch: 1, batch: 4738, sum loss: 4313.035156, avg loss: 2.886904, ppl: 17.937696 +epoch: 1, batch: 4739, sum loss: 4389.814941, avg loss: 2.634943, ppl: 13.942514 +epoch: 1, batch: 4740, sum loss: 6567.473145, avg loss: 3.213049, ppl: 24.854763 +epoch: 1, batch: 4741, sum loss: 4933.792969, avg loss: 2.798521, ppl: 16.420347 +epoch: 1, batch: 4742, sum loss: 4142.046875, avg loss: 2.735830, ppl: 15.422544 +epoch: 1, batch: 4743, sum loss: 4236.115723, avg loss: 2.856450, ppl: 17.399654 +epoch: 1, batch: 4744, sum loss: 4500.892090, avg loss: 2.410762, ppl: 11.142444 +epoch: 1, batch: 4745, sum loss: 4836.846680, avg loss: 2.792637, ppl: 16.324003 +epoch: 1, batch: 4746, sum loss: 5560.397461, avg loss: 3.015400, ppl: 20.397247 +epoch: 1, batch: 4747, sum loss: 5069.349609, avg loss: 2.838382, ppl: 17.088091 +epoch: 1, batch: 4748, sum loss: 4721.529297, avg loss: 2.757903, ppl: 15.766740 +epoch: 1, batch: 4749, sum loss: 3728.685547, avg loss: 2.591164, ppl: 13.345302 +epoch: 1, batch: 4750, sum loss: 4132.703125, avg loss: 2.736890, ppl: 15.438890 +epoch: 1, batch: 4751, sum loss: 4925.537598, avg loss: 2.827519, ppl: 16.903465 +epoch: 1, batch: 4752, sum loss: 4613.424316, avg loss: 2.734691, ppl: 15.404988 +epoch: 1, batch: 4753, sum loss: 4753.643555, avg loss: 2.980341, ppl: 19.694527 +epoch: 1, batch: 4754, sum loss: 4241.478516, avg loss: 2.603731, ppl: 13.514071 +epoch: 1, batch: 4755, sum loss: 5023.981445, avg loss: 2.748349, ppl: 15.616823 +epoch: 1, batch: 4756, sum loss: 4234.878906, avg loss: 2.695658, ppl: 14.815267 +epoch: 1, batch: 4757, sum loss: 4322.737793, avg loss: 2.838305, ppl: 17.086771 +epoch: 1, batch: 4758, sum loss: 4488.299805, avg loss: 2.884511, ppl: 17.894823 +epoch: 1, batch: 4759, sum loss: 4078.184814, avg loss: 2.442027, ppl: 11.496319 +epoch: 1, batch: 4760, sum loss: 3618.663086, avg loss: 2.509475, ppl: 12.298474 +epoch: 1, batch: 4761, sum loss: 4233.672363, avg loss: 2.852879, ppl: 17.337622 +epoch: 1, batch: 4762, sum loss: 4048.314697, avg loss: 2.772818, ppl: 16.003674 +epoch: 1, batch: 4763, sum loss: 4758.946289, avg loss: 2.972484, ppl: 19.540396 +epoch: 1, batch: 4764, sum loss: 4259.166992, avg loss: 2.556523, ppl: 12.890912 +epoch: 1, batch: 4765, sum loss: 4981.172363, avg loss: 2.931826, ppl: 18.761860 +epoch: 1, batch: 4766, sum loss: 4969.547363, avg loss: 2.937085, ppl: 18.860781 +epoch: 1, batch: 4767, sum loss: 4835.386719, avg loss: 2.816183, ppl: 16.712940 +epoch: 1, batch: 4768, sum loss: 4073.994629, avg loss: 2.775201, ppl: 16.041845 +epoch: 1, batch: 4769, sum loss: 4437.230957, avg loss: 2.591841, ppl: 13.354328 +epoch: 1, batch: 4770, sum loss: 6256.753906, avg loss: 3.205304, ppl: 24.663006 +epoch: 1, batch: 4771, sum loss: 4993.236328, avg loss: 2.864737, ppl: 17.544436 +epoch: 1, batch: 4772, sum loss: 5928.933594, avg loss: 2.883723, ppl: 17.880711 +epoch: 1, batch: 4773, sum loss: 4985.567383, avg loss: 2.635078, ppl: 13.944400 +epoch: 1, batch: 4774, sum loss: 4528.463867, avg loss: 2.538377, ppl: 12.659106 +epoch: 1, batch: 4775, sum loss: 4614.974609, avg loss: 2.706730, ppl: 14.980212 +epoch: 1, batch: 4776, sum loss: 5148.707520, avg loss: 2.953934, ppl: 19.181269 +epoch: 1, batch: 4777, sum loss: 5772.741699, avg loss: 3.139066, ppl: 23.082293 +epoch: 1, batch: 4778, sum loss: 4189.308105, avg loss: 2.639766, ppl: 14.009921 +epoch: 1, batch: 4779, sum loss: 4354.842285, avg loss: 2.539267, ppl: 12.670375 +epoch: 1, batch: 4780, sum loss: 5034.488281, avg loss: 2.886748, ppl: 17.934887 +epoch: 1, batch: 4781, sum loss: 4497.241699, avg loss: 2.715726, ppl: 15.115575 +epoch: 1, batch: 4782, sum loss: 3407.843750, avg loss: 2.503926, ppl: 12.230420 +epoch: 1, batch: 4783, sum loss: 5485.684570, avg loss: 2.840852, ppl: 17.130350 +epoch: 1, batch: 4784, sum loss: 5624.250000, avg loss: 3.027045, ppl: 20.636168 +epoch: 1, batch: 4785, sum loss: 5199.152344, avg loss: 2.949037, ppl: 19.087564 +epoch: 1, batch: 4786, sum loss: 3982.052979, avg loss: 2.668936, ppl: 14.424621 +epoch: 1, batch: 4787, sum loss: 5518.622070, avg loss: 2.781564, ppl: 16.144243 +epoch: 1, batch: 4788, sum loss: 5048.143555, avg loss: 2.812336, ppl: 16.648767 +epoch: 1, batch: 4789, sum loss: 4351.541992, avg loss: 2.754141, ppl: 15.707537 +epoch: 1, batch: 4790, sum loss: 5305.976074, avg loss: 2.969209, ppl: 19.476501 +epoch: 1, batch: 4791, sum loss: 4691.354492, avg loss: 2.689997, ppl: 14.731627 +epoch: 1, batch: 4792, sum loss: 4494.718262, avg loss: 2.672246, ppl: 14.472441 +epoch: 1, batch: 4793, sum loss: 4491.822754, avg loss: 2.877529, ppl: 17.770311 +epoch: 1, batch: 4794, sum loss: 4156.981934, avg loss: 2.495187, ppl: 12.124002 +epoch: 1, batch: 4795, sum loss: 5488.206055, avg loss: 3.090206, ppl: 21.981609 +epoch: 1, batch: 4796, sum loss: 6361.880859, avg loss: 3.073373, ppl: 21.614677 +epoch: 1, batch: 4797, sum loss: 3221.770508, avg loss: 2.327869, ppl: 10.256060 +epoch: 1, batch: 4798, sum loss: 5451.604004, avg loss: 3.145761, ppl: 23.237352 +epoch: 1, batch: 4799, sum loss: 5961.083008, avg loss: 3.191158, ppl: 24.316565 +epoch: 1, batch: 4800, sum loss: 4720.684570, avg loss: 2.750982, ppl: 15.657997 +epoch: 1, batch: 4801, sum loss: 3828.995605, avg loss: 2.519076, ppl: 12.417119 +epoch: 1, batch: 4802, sum loss: 4255.296875, avg loss: 2.827440, ppl: 16.902136 +epoch: 1, batch: 4803, sum loss: 3887.369873, avg loss: 2.679097, ppl: 14.571932 +epoch: 1, batch: 4804, sum loss: 5625.357910, avg loss: 3.008213, ppl: 20.251175 +epoch: 1, batch: 4805, sum loss: 4431.378418, avg loss: 2.754120, ppl: 15.707207 +epoch: 1, batch: 4806, sum loss: 4121.510742, avg loss: 2.844383, ppl: 17.190948 +epoch: 1, batch: 4807, sum loss: 5047.720215, avg loss: 3.200837, ppl: 24.553076 +epoch: 1, batch: 4808, sum loss: 4783.714844, avg loss: 2.673960, ppl: 14.497268 +epoch: 1, batch: 4809, sum loss: 4895.933594, avg loss: 2.791296, ppl: 16.302137 +epoch: 1, batch: 4810, sum loss: 5370.540039, avg loss: 2.898295, ppl: 18.143179 +epoch: 1, batch: 4811, sum loss: 3696.013184, avg loss: 2.485550, ppl: 12.007725 +epoch: 1, batch: 4812, sum loss: 4869.014648, avg loss: 2.931376, ppl: 18.753408 +epoch: 1, batch: 4813, sum loss: 5088.286133, avg loss: 2.924302, ppl: 18.621231 +epoch: 1, batch: 4814, sum loss: 4800.928711, avg loss: 2.778315, ppl: 16.091888 +epoch: 1, batch: 4815, sum loss: 4579.587891, avg loss: 2.674993, ppl: 14.512245 +epoch: 1, batch: 4816, sum loss: 4476.748047, avg loss: 2.761720, ppl: 15.827041 +epoch: 1, batch: 4817, sum loss: 4241.868652, avg loss: 2.862260, ppl: 17.501028 +epoch: 1, batch: 4818, sum loss: 5157.838379, avg loss: 2.942292, ppl: 18.959259 +epoch: 1, batch: 4819, sum loss: 5171.309570, avg loss: 2.799843, ppl: 16.442066 +epoch: 1, batch: 4820, sum loss: 3682.632812, avg loss: 2.471566, ppl: 11.840972 +epoch: 1, batch: 4821, sum loss: 4303.001953, avg loss: 2.921251, ppl: 18.564489 +epoch: 1, batch: 4822, sum loss: 5911.454590, avg loss: 3.008374, ppl: 20.254435 +epoch: 1, batch: 4823, sum loss: 5105.883789, avg loss: 2.739208, ppl: 15.474724 +epoch: 1, batch: 4824, sum loss: 4290.184570, avg loss: 2.760736, ppl: 15.811483 +epoch: 1, batch: 4825, sum loss: 4747.381348, avg loss: 2.774624, ppl: 16.032598 +epoch: 1, batch: 4826, sum loss: 4917.404785, avg loss: 2.834239, ppl: 17.017445 +epoch: 1, batch: 4827, sum loss: 4316.350098, avg loss: 2.726690, ppl: 15.282216 +epoch: 1, batch: 4828, sum loss: 4200.676270, avg loss: 2.791147, ppl: 16.299709 +epoch: 1, batch: 4829, sum loss: 3965.035156, avg loss: 2.759245, ppl: 15.787921 +epoch: 1, batch: 4830, sum loss: 4848.210938, avg loss: 2.949033, ppl: 19.087496 +epoch: 1, batch: 4831, sum loss: 4860.589355, avg loss: 2.735278, ppl: 15.414030 +epoch: 1, batch: 4832, sum loss: 4358.433105, avg loss: 2.600497, ppl: 13.470434 +epoch: 1, batch: 4833, sum loss: 4542.173828, avg loss: 2.716611, ppl: 15.128969 +epoch: 1, batch: 4834, sum loss: 4432.796387, avg loss: 2.694709, ppl: 14.801208 +epoch: 1, batch: 4835, sum loss: 3715.175049, avg loss: 2.663208, ppl: 14.342225 +epoch: 1, batch: 4836, sum loss: 4490.903320, avg loss: 2.720111, ppl: 15.182010 +epoch: 1, batch: 4837, sum loss: 4607.361328, avg loss: 2.879601, ppl: 17.807163 +epoch: 1, batch: 4838, sum loss: 4424.806641, avg loss: 2.753458, ppl: 15.696815 +epoch: 1, batch: 4839, sum loss: 3847.443848, avg loss: 2.532879, ppl: 12.589705 +epoch: 1, batch: 4840, sum loss: 6013.054688, avg loss: 3.205253, ppl: 24.661741 +epoch: 1, batch: 4841, sum loss: 5271.107422, avg loss: 3.039854, ppl: 20.902197 +epoch: 1, batch: 4842, sum loss: 4974.844727, avg loss: 2.895719, ppl: 18.096504 +epoch: 1, batch: 4843, sum loss: 3895.705566, avg loss: 2.439390, ppl: 11.466043 +epoch: 1, batch: 4844, sum loss: 6117.060059, avg loss: 3.200973, ppl: 24.556419 +epoch: 1, batch: 4845, sum loss: 4567.875000, avg loss: 2.847802, ppl: 17.249832 +epoch: 1, batch: 4846, sum loss: 3994.585449, avg loss: 2.688146, ppl: 14.704394 +epoch: 1, batch: 4847, sum loss: 5853.381836, avg loss: 3.184647, ppl: 24.158766 +epoch: 1, batch: 4848, sum loss: 4900.470215, avg loss: 2.955652, ppl: 19.214237 +epoch: 1, batch: 4849, sum loss: 4947.086426, avg loss: 2.774586, ppl: 16.031988 +epoch: 1, batch: 4850, sum loss: 4902.106445, avg loss: 2.665637, ppl: 14.377105 +epoch: 1, batch: 4851, sum loss: 4473.217285, avg loss: 2.612860, ppl: 13.638006 +epoch: 1, batch: 4852, sum loss: 4802.390625, avg loss: 2.648864, ppl: 14.137973 +epoch: 1, batch: 4853, sum loss: 5060.254883, avg loss: 2.880054, ppl: 17.815235 +epoch: 1, batch: 4854, sum loss: 5483.250488, avg loss: 3.202833, ppl: 24.602133 +epoch: 1, batch: 4855, sum loss: 4753.580566, avg loss: 2.848161, ppl: 17.256018 +epoch: 1, batch: 4856, sum loss: 3563.738770, avg loss: 2.724571, ppl: 15.249870 +epoch: 1, batch: 4857, sum loss: 4536.102539, avg loss: 2.808732, ppl: 16.588875 +epoch: 1, batch: 4858, sum loss: 4644.457031, avg loss: 2.882965, ppl: 17.867176 +epoch: 1, batch: 4859, sum loss: 4380.921875, avg loss: 2.709290, ppl: 15.018609 +epoch: 1, batch: 4860, sum loss: 4152.390625, avg loss: 2.618153, ppl: 13.710379 +epoch: 1, batch: 4861, sum loss: 4018.434570, avg loss: 2.557883, ppl: 12.908464 +epoch: 1, batch: 4862, sum loss: 4221.215820, avg loss: 2.678436, ppl: 14.562304 +epoch: 1, batch: 4863, sum loss: 4895.399414, avg loss: 2.898401, ppl: 18.145109 +epoch: 1, batch: 4864, sum loss: 3639.467773, avg loss: 2.529164, ppl: 12.543019 +epoch: 1, batch: 4865, sum loss: 4109.618652, avg loss: 2.560510, ppl: 12.942415 +epoch: 1, batch: 4866, sum loss: 4142.664062, avg loss: 2.718283, ppl: 15.154286 +epoch: 1, batch: 4867, sum loss: 4058.299316, avg loss: 2.808512, ppl: 16.585217 +epoch: 1, batch: 4868, sum loss: 4758.373535, avg loss: 2.910320, ppl: 18.362675 +epoch: 1, batch: 4869, sum loss: 4877.556152, avg loss: 2.988699, ppl: 19.859823 +epoch: 1, batch: 4870, sum loss: 5349.382812, avg loss: 2.876012, ppl: 17.743378 +epoch: 1, batch: 4871, sum loss: 4573.744629, avg loss: 2.727337, ppl: 15.292111 +epoch: 1, batch: 4872, sum loss: 3334.687988, avg loss: 2.353344, ppl: 10.520689 +epoch: 1, batch: 4873, sum loss: 5233.396484, avg loss: 2.956721, ppl: 19.234804 +epoch: 1, batch: 4874, sum loss: 4198.892090, avg loss: 2.595113, ppl: 13.398095 +epoch: 1, batch: 4875, sum loss: 5229.437012, avg loss: 2.969584, ppl: 19.483809 +epoch: 1, batch: 4876, sum loss: 4584.644043, avg loss: 2.693680, ppl: 14.785993 +epoch: 1, batch: 4877, sum loss: 4281.418945, avg loss: 2.571423, ppl: 13.084428 +epoch: 1, batch: 4878, sum loss: 5168.925293, avg loss: 2.940230, ppl: 18.920204 +epoch: 1, batch: 4879, sum loss: 4559.071289, avg loss: 2.514656, ppl: 12.362356 +epoch: 1, batch: 4880, sum loss: 4491.940430, avg loss: 2.623797, ppl: 13.787976 +epoch: 1, batch: 4881, sum loss: 5016.789551, avg loss: 2.858569, ppl: 17.436563 +epoch: 1, batch: 4882, sum loss: 4753.386230, avg loss: 2.659981, ppl: 14.296021 +epoch: 1, batch: 4883, sum loss: 5054.238281, avg loss: 2.705695, ppl: 14.964712 +epoch: 1, batch: 4884, sum loss: 4608.205078, avg loss: 2.734840, ppl: 15.407273 +epoch: 1, batch: 4885, sum loss: 5057.650391, avg loss: 2.842974, ppl: 17.166739 +epoch: 1, batch: 4886, sum loss: 4326.734863, avg loss: 2.736708, ppl: 15.436081 +epoch: 1, batch: 4887, sum loss: 4303.356445, avg loss: 2.730556, ppl: 15.341414 +epoch: 1, batch: 4888, sum loss: 5710.084473, avg loss: 3.005308, ppl: 20.192432 +epoch: 1, batch: 4889, sum loss: 4296.370117, avg loss: 2.856629, ppl: 17.402761 +epoch: 1, batch: 4890, sum loss: 5060.319824, avg loss: 2.723531, ppl: 15.234012 +epoch: 1, batch: 4891, sum loss: 4395.274902, avg loss: 2.961776, ppl: 19.332266 +epoch: 1, batch: 4892, sum loss: 4286.032227, avg loss: 2.682123, ppl: 14.616090 +epoch: 1, batch: 4893, sum loss: 3671.901611, avg loss: 2.516725, ppl: 12.387958 +epoch: 1, batch: 4894, sum loss: 4945.815918, avg loss: 2.723467, ppl: 15.233042 +epoch: 1, batch: 4895, sum loss: 4789.554199, avg loss: 2.830706, ppl: 16.957426 +epoch: 1, batch: 4896, sum loss: 4261.625977, avg loss: 2.831645, ppl: 16.973364 +epoch: 1, batch: 4897, sum loss: 5109.241211, avg loss: 2.801119, ppl: 16.463060 +epoch: 1, batch: 4898, sum loss: 4349.815918, avg loss: 2.468681, ppl: 11.806865 +epoch: 1, batch: 4899, sum loss: 5397.347168, avg loss: 2.949370, ppl: 19.093924 +epoch: 1, batch: 4900, sum loss: 4584.469727, avg loss: 2.648452, ppl: 14.132139 +epoch: 1, batch: 4901, sum loss: 4596.716797, avg loss: 2.809729, ppl: 16.605419 +epoch: 1, batch: 4902, sum loss: 5313.420898, avg loss: 2.925892, ppl: 18.650862 +epoch: 1, batch: 4903, sum loss: 4132.725586, avg loss: 2.898124, ppl: 18.140091 +epoch: 1, batch: 4904, sum loss: 4938.118652, avg loss: 2.942860, ppl: 18.970016 +epoch: 1, batch: 4905, sum loss: 3857.144287, avg loss: 2.645504, ppl: 14.090538 +epoch: 1, batch: 4906, sum loss: 4562.449707, avg loss: 2.790489, ppl: 16.288982 +epoch: 1, batch: 4907, sum loss: 4746.344727, avg loss: 2.911868, ppl: 18.391119 +epoch: 1, batch: 4908, sum loss: 3985.625000, avg loss: 2.667754, ppl: 14.407579 +epoch: 1, batch: 4909, sum loss: 4471.095703, avg loss: 2.726278, ppl: 15.275922 +epoch: 1, batch: 4910, sum loss: 4766.429199, avg loss: 2.911686, ppl: 18.387770 +epoch: 1, batch: 4911, sum loss: 5328.887207, avg loss: 2.936026, ppl: 18.840826 +epoch: 1, batch: 4912, sum loss: 4819.975586, avg loss: 2.762164, ppl: 15.834065 +epoch: 1, batch: 4913, sum loss: 5073.421387, avg loss: 2.847038, ppl: 17.236652 +epoch: 1, batch: 4914, sum loss: 4410.371094, avg loss: 2.765123, ppl: 15.880992 +epoch: 1, batch: 4915, sum loss: 4002.546631, avg loss: 2.592323, ppl: 13.360773 +epoch: 1, batch: 4916, sum loss: 3800.470703, avg loss: 2.648412, ppl: 14.131573 +epoch: 1, batch: 4917, sum loss: 5398.391602, avg loss: 2.854781, ppl: 17.370640 +epoch: 1, batch: 4918, sum loss: 5070.631836, avg loss: 2.877771, ppl: 17.774603 +epoch: 1, batch: 4919, sum loss: 4821.543945, avg loss: 2.698122, ppl: 14.851814 +epoch: 1, batch: 4920, sum loss: 5587.905762, avg loss: 2.930207, ppl: 18.731516 +epoch: 1, batch: 4921, sum loss: 6103.310059, avg loss: 3.036473, ppl: 20.831636 +epoch: 1, batch: 4922, sum loss: 4324.298340, avg loss: 2.723110, ppl: 15.227606 +epoch: 1, batch: 4923, sum loss: 3782.580566, avg loss: 2.501707, ppl: 12.203305 +epoch: 1, batch: 4924, sum loss: 4878.992188, avg loss: 2.841580, ppl: 17.142824 +epoch: 1, batch: 4925, sum loss: 4263.154785, avg loss: 2.565075, ppl: 13.001636 +epoch: 1, batch: 4926, sum loss: 4427.544434, avg loss: 2.763761, ppl: 15.859371 +epoch: 1, batch: 4927, sum loss: 5108.602051, avg loss: 3.042646, ppl: 20.960625 +epoch: 1, batch: 4928, sum loss: 4792.825195, avg loss: 3.003023, ppl: 20.146355 +epoch: 1, batch: 4929, sum loss: 5363.361328, avg loss: 2.793417, ppl: 16.336754 +epoch: 1, batch: 4930, sum loss: 4687.475586, avg loss: 2.793490, ppl: 16.337934 +epoch: 1, batch: 4931, sum loss: 4417.253906, avg loss: 2.693448, ppl: 14.782553 +epoch: 1, batch: 4932, sum loss: 5079.516113, avg loss: 2.826665, ppl: 16.889036 +epoch: 1, batch: 4933, sum loss: 3477.473389, avg loss: 2.483910, ppl: 11.988042 +epoch: 1, batch: 4934, sum loss: 4443.799805, avg loss: 2.582103, ppl: 13.224924 +epoch: 1, batch: 4935, sum loss: 4234.541992, avg loss: 2.560183, ppl: 12.938179 +epoch: 1, batch: 4936, sum loss: 5770.659180, avg loss: 2.881008, ppl: 17.832241 +epoch: 1, batch: 4937, sum loss: 4700.081543, avg loss: 2.745375, ppl: 15.570447 +epoch: 1, batch: 4938, sum loss: 3734.942139, avg loss: 2.449142, ppl: 11.578413 +epoch: 1, batch: 4939, sum loss: 3995.173828, avg loss: 2.594269, ppl: 13.386792 +epoch: 1, batch: 4940, sum loss: 4768.916992, avg loss: 2.805245, ppl: 16.531132 +epoch: 1, batch: 4941, sum loss: 4726.484375, avg loss: 2.752757, ppl: 15.685823 +epoch: 1, batch: 4942, sum loss: 4551.158691, avg loss: 2.520021, ppl: 12.428864 +epoch: 1, batch: 4943, sum loss: 4756.769043, avg loss: 3.088811, ppl: 21.950960 +epoch: 1, batch: 4944, sum loss: 5487.915039, avg loss: 2.953668, ppl: 19.176161 +epoch: 1, batch: 4945, sum loss: 4051.352539, avg loss: 2.665364, ppl: 14.373174 +epoch: 1, batch: 4946, sum loss: 5110.208984, avg loss: 2.957297, ppl: 19.245876 +epoch: 1, batch: 4947, sum loss: 4638.274414, avg loss: 2.805974, ppl: 16.543173 +epoch: 1, batch: 4948, sum loss: 5306.382812, avg loss: 2.926852, ppl: 18.668774 +epoch: 1, batch: 4949, sum loss: 4828.408691, avg loss: 2.894730, ppl: 18.078613 +epoch: 1, batch: 4950, sum loss: 6136.372070, avg loss: 2.929056, ppl: 18.709959 +epoch: 1, batch: 4951, sum loss: 4517.698242, avg loss: 2.853884, ppl: 17.355053 +epoch: 1, batch: 4952, sum loss: 4849.533691, avg loss: 2.966076, ppl: 19.415577 +epoch: 1, batch: 4953, sum loss: 3943.770020, avg loss: 2.584384, ppl: 13.255124 +epoch: 1, batch: 4954, sum loss: 4491.563477, avg loss: 2.727118, ppl: 15.288761 +epoch: 1, batch: 4955, sum loss: 4358.602539, avg loss: 2.632006, ppl: 13.901634 +epoch: 1, batch: 4956, sum loss: 4627.145508, avg loss: 2.605375, ppl: 13.536298 +epoch: 1, batch: 4957, sum loss: 4392.666016, avg loss: 2.624054, ppl: 13.791521 +epoch: 1, batch: 4958, sum loss: 4975.187012, avg loss: 2.785659, ppl: 16.210499 +epoch: 1, batch: 4959, sum loss: 4365.048340, avg loss: 2.716272, ppl: 15.123830 +epoch: 1, batch: 4960, sum loss: 4426.172852, avg loss: 2.815632, ppl: 16.703724 +epoch: 1, batch: 4961, sum loss: 4213.087891, avg loss: 2.688633, ppl: 14.711555 +epoch: 1, batch: 4962, sum loss: 5053.435547, avg loss: 2.905943, ppl: 18.282482 +epoch: 1, batch: 4963, sum loss: 4545.386719, avg loss: 2.703978, ppl: 14.939038 +epoch: 1, batch: 4964, sum loss: 3225.269531, avg loss: 2.343946, ppl: 10.422281 +epoch: 1, batch: 4965, sum loss: 4058.201660, avg loss: 2.894580, ppl: 18.075914 +epoch: 1, batch: 4966, sum loss: 3831.582275, avg loss: 2.606519, ppl: 13.551788 +epoch: 1, batch: 4967, sum loss: 4367.732910, avg loss: 2.706154, ppl: 14.971589 +epoch: 1, batch: 4968, sum loss: 4763.517578, avg loss: 2.975339, ppl: 19.596264 +epoch: 1, batch: 4969, sum loss: 4529.359375, avg loss: 2.766866, ppl: 15.908697 +epoch: 1, batch: 4970, sum loss: 4792.082520, avg loss: 2.647560, ppl: 14.119540 +epoch: 1, batch: 4971, sum loss: 5298.112793, avg loss: 2.880975, ppl: 17.831650 +epoch: 1, batch: 4972, sum loss: 4541.762207, avg loss: 2.671625, ppl: 14.463451 +epoch: 1, batch: 4973, sum loss: 3984.749023, avg loss: 2.663602, ppl: 14.347882 +epoch: 1, batch: 4974, sum loss: 5647.269531, avg loss: 2.939755, ppl: 18.911217 +epoch: 1, batch: 4975, sum loss: 4567.231445, avg loss: 2.721830, ppl: 15.208134 +epoch: 1, batch: 4976, sum loss: 4673.210938, avg loss: 2.710679, ppl: 15.039488 +epoch: 1, batch: 4977, sum loss: 3603.129395, avg loss: 2.653262, ppl: 14.200279 +epoch: 1, batch: 4978, sum loss: 3574.960205, avg loss: 2.412254, ppl: 11.159086 +epoch: 1, batch: 4979, sum loss: 4006.572754, avg loss: 2.488554, ppl: 12.043854 +epoch: 1, batch: 4980, sum loss: 5214.472168, avg loss: 3.056549, ppl: 21.254074 +epoch: 1, batch: 4981, sum loss: 5101.077637, avg loss: 2.804331, ppl: 16.516020 +epoch: 1, batch: 4982, sum loss: 5017.181641, avg loss: 2.845820, ppl: 17.215677 +epoch: 1, batch: 4983, sum loss: 5088.734375, avg loss: 2.764114, ppl: 15.864984 +epoch: 1, batch: 4984, sum loss: 4445.972168, avg loss: 2.678297, ppl: 14.560269 +epoch: 1, batch: 4985, sum loss: 4401.305176, avg loss: 2.749098, ppl: 15.628522 +epoch: 1, batch: 4986, sum loss: 5401.063477, avg loss: 2.988967, ppl: 19.865150 +epoch: 1, batch: 4987, sum loss: 4386.155273, avg loss: 2.639083, ppl: 14.000355 +epoch: 1, batch: 4988, sum loss: 6084.354492, avg loss: 3.200607, ppl: 24.547434 +epoch: 1, batch: 4989, sum loss: 4653.396973, avg loss: 2.895705, ppl: 18.096247 +epoch: 1, batch: 4990, sum loss: 4996.503418, avg loss: 2.876513, ppl: 17.752268 +epoch: 1, batch: 4991, sum loss: 4581.378418, avg loss: 3.006154, ppl: 20.209520 +epoch: 1, batch: 4992, sum loss: 4532.165527, avg loss: 2.799361, ppl: 16.434141 +epoch: 1, batch: 4993, sum loss: 4876.404785, avg loss: 2.728822, ppl: 15.314836 +epoch: 1, batch: 4994, sum loss: 5423.888184, avg loss: 3.078257, ppl: 21.720507 +epoch: 1, batch: 4995, sum loss: 4543.550781, avg loss: 2.758683, ppl: 15.779044 +epoch: 1, batch: 4996, sum loss: 5359.637207, avg loss: 3.082023, ppl: 21.802458 +epoch: 1, batch: 4997, sum loss: 4502.963867, avg loss: 2.712629, ppl: 15.068837 +epoch: 1, batch: 4998, sum loss: 4316.749512, avg loss: 2.795822, ppl: 16.376091 +epoch: 1, batch: 4999, sum loss: 4960.757812, avg loss: 2.829868, ppl: 16.943218 +epoch: 1, batch: 5000, sum loss: 3829.774170, avg loss: 2.650363, ppl: 14.159174 +epoch: 1, batch: 5001, sum loss: 5483.514160, avg loss: 3.078896, ppl: 21.734400 +epoch: 1, batch: 5002, sum loss: 4702.862793, avg loss: 2.879891, ppl: 17.812338 +epoch: 1, batch: 5003, sum loss: 4853.882812, avg loss: 2.897841, ppl: 18.134945 +epoch: 1, batch: 5004, sum loss: 3866.098633, avg loss: 2.793424, ppl: 16.336861 +epoch: 1, batch: 5005, sum loss: 4346.168945, avg loss: 2.702841, ppl: 14.922070 +epoch: 1, batch: 5006, sum loss: 5151.994629, avg loss: 2.904168, ppl: 18.250055 +epoch: 1, batch: 5007, sum loss: 4475.592285, avg loss: 2.576622, ppl: 13.152634 +epoch: 1, batch: 5008, sum loss: 5749.197266, avg loss: 2.868861, ppl: 17.616940 +epoch: 1, batch: 5009, sum loss: 4455.134277, avg loss: 2.791437, ppl: 16.304440 +epoch: 1, batch: 5010, sum loss: 5239.875977, avg loss: 2.940447, ppl: 18.924301 +epoch: 1, batch: 5011, sum loss: 4552.757324, avg loss: 2.894315, ppl: 18.071114 +epoch: 1, batch: 5012, sum loss: 4687.124023, avg loss: 2.696849, ppl: 14.832925 +epoch: 1, batch: 5013, sum loss: 4817.696777, avg loss: 2.934042, ppl: 18.803480 +epoch: 1, batch: 5014, sum loss: 5150.830566, avg loss: 2.788755, ppl: 16.260765 +epoch: 1, batch: 5015, sum loss: 4557.590820, avg loss: 2.802946, ppl: 16.493170 +epoch: 1, batch: 5016, sum loss: 4275.167480, avg loss: 2.614781, ppl: 13.664228 +epoch: 1, batch: 5017, sum loss: 5215.649414, avg loss: 2.887957, ppl: 17.956579 +epoch: 1, batch: 5018, sum loss: 4959.899414, avg loss: 2.933116, ppl: 18.786081 +epoch: 1, batch: 5019, sum loss: 4882.159668, avg loss: 2.785031, ppl: 16.200325 +epoch: 1, batch: 5020, sum loss: 5778.923828, avg loss: 3.228449, ppl: 25.240475 +epoch: 1, batch: 5021, sum loss: 5101.558105, avg loss: 3.122129, ppl: 22.694639 +epoch: 1, batch: 5022, sum loss: 4379.031250, avg loss: 2.962809, ppl: 19.352253 +epoch: 1, batch: 5023, sum loss: 5336.146484, avg loss: 2.961236, ppl: 19.321835 +epoch: 1, batch: 5024, sum loss: 3895.299316, avg loss: 2.784346, ppl: 16.189220 +epoch: 1, batch: 5025, sum loss: 5140.902344, avg loss: 3.038358, ppl: 20.870955 +epoch: 1, batch: 5026, sum loss: 4648.476074, avg loss: 2.896247, ppl: 18.106060 +epoch: 1, batch: 5027, sum loss: 5011.679688, avg loss: 2.896924, ppl: 18.118336 +epoch: 1, batch: 5028, sum loss: 4731.250488, avg loss: 2.843300, ppl: 17.172335 +epoch: 1, batch: 5029, sum loss: 4328.146484, avg loss: 2.631092, ppl: 13.888929 +epoch: 1, batch: 5030, sum loss: 4183.881836, avg loss: 2.633028, ppl: 13.915847 +epoch: 1, batch: 5031, sum loss: 5354.464844, avg loss: 2.867951, ppl: 17.600920 +epoch: 1, batch: 5032, sum loss: 6307.194336, avg loss: 3.262905, ppl: 26.125311 +epoch: 1, batch: 5033, sum loss: 5386.027344, avg loss: 2.992238, ppl: 19.930227 +epoch: 1, batch: 5034, sum loss: 5260.571777, avg loss: 2.859006, ppl: 17.444181 +epoch: 1, batch: 5035, sum loss: 5218.719238, avg loss: 2.854879, ppl: 17.372341 +epoch: 1, batch: 5036, sum loss: 5412.225098, avg loss: 3.033759, ppl: 20.775173 +epoch: 1, batch: 5037, sum loss: 4875.019531, avg loss: 2.829379, ppl: 16.934938 +epoch: 1, batch: 5038, sum loss: 4924.805176, avg loss: 2.982923, ppl: 19.745443 +epoch: 1, batch: 5039, sum loss: 4072.307617, avg loss: 2.622220, ppl: 13.766252 +epoch: 1, batch: 5040, sum loss: 4766.704102, avg loss: 2.931552, ppl: 18.756721 +epoch: 1, batch: 5041, sum loss: 3966.245850, avg loss: 2.720333, ppl: 15.185383 +epoch: 1, batch: 5042, sum loss: 4630.342285, avg loss: 2.895774, ppl: 18.097498 +epoch: 1, batch: 5043, sum loss: 5125.232910, avg loss: 2.855283, ppl: 17.379354 +epoch: 1, batch: 5044, sum loss: 5003.933105, avg loss: 2.900831, ppl: 18.189255 +epoch: 1, batch: 5045, sum loss: 4510.497070, avg loss: 2.946112, ppl: 19.031813 +epoch: 1, batch: 5046, sum loss: 5218.638672, avg loss: 2.970198, ppl: 19.495787 +epoch: 1, batch: 5047, sum loss: 4992.880859, avg loss: 2.803414, ppl: 16.500891 +epoch: 1, batch: 5048, sum loss: 5288.129883, avg loss: 2.974201, ppl: 19.573986 +epoch: 1, batch: 5049, sum loss: 5238.294434, avg loss: 2.894085, ppl: 18.066971 +epoch: 1, batch: 5050, sum loss: 3887.631836, avg loss: 2.527719, ppl: 12.524904 +epoch: 1, batch: 5051, sum loss: 3917.989014, avg loss: 2.569173, ppl: 13.055025 +epoch: 1, batch: 5052, sum loss: 4901.904297, avg loss: 2.994444, ppl: 19.974245 +epoch: 1, batch: 5053, sum loss: 4802.409668, avg loss: 2.957149, ppl: 19.243027 +epoch: 1, batch: 5054, sum loss: 4282.695312, avg loss: 2.559890, ppl: 12.934392 +epoch: 1, batch: 5055, sum loss: 4350.220703, avg loss: 2.581733, ppl: 13.220032 +epoch: 1, batch: 5056, sum loss: 4204.125977, avg loss: 2.603174, ppl: 13.506539 +epoch: 1, batch: 5057, sum loss: 4204.210938, avg loss: 2.802807, ppl: 16.490877 +epoch: 1, batch: 5058, sum loss: 4367.667480, avg loss: 2.905966, ppl: 18.282902 +epoch: 1, batch: 5059, sum loss: 4922.496094, avg loss: 2.622534, ppl: 13.770572 +epoch: 1, batch: 5060, sum loss: 4969.293945, avg loss: 2.949136, ppl: 19.089449 +epoch: 1, batch: 5061, sum loss: 4589.860840, avg loss: 2.582927, ppl: 13.235820 +epoch: 1, batch: 5062, sum loss: 3966.401123, avg loss: 2.760196, ppl: 15.802936 +epoch: 1, batch: 5063, sum loss: 4050.713867, avg loss: 2.586663, ppl: 13.285361 +epoch: 1, batch: 5064, sum loss: 4410.915527, avg loss: 2.873561, ppl: 17.699930 +epoch: 1, batch: 5065, sum loss: 3655.439453, avg loss: 2.490081, ppl: 12.062257 +epoch: 1, batch: 5066, sum loss: 5245.010254, avg loss: 2.783976, ppl: 16.183235 +epoch: 1, batch: 5067, sum loss: 4887.834473, avg loss: 2.974945, ppl: 19.588543 +epoch: 1, batch: 5068, sum loss: 5239.208008, avg loss: 2.889801, ppl: 17.989721 +epoch: 1, batch: 5069, sum loss: 4671.907715, avg loss: 2.932773, ppl: 18.779640 +epoch: 1, batch: 5070, sum loss: 5009.810059, avg loss: 2.885835, ppl: 17.918526 +epoch: 1, batch: 5071, sum loss: 5127.083496, avg loss: 3.001805, ppl: 20.121830 +epoch: 1, batch: 5072, sum loss: 5110.536133, avg loss: 2.729987, ppl: 15.332693 +epoch: 1, batch: 5073, sum loss: 4765.441895, avg loss: 2.608343, ppl: 13.576531 +epoch: 1, batch: 5074, sum loss: 4277.068848, avg loss: 2.878243, ppl: 17.782997 +epoch: 1, batch: 5075, sum loss: 4032.642822, avg loss: 2.467958, ppl: 11.798326 +epoch: 1, batch: 5076, sum loss: 4440.145508, avg loss: 2.563594, ppl: 12.982397 +epoch: 1, batch: 5077, sum loss: 4712.160156, avg loss: 2.712815, ppl: 15.071647 +epoch: 1, batch: 5078, sum loss: 4571.680176, avg loss: 2.860876, ppl: 17.476835 +epoch: 1, batch: 5079, sum loss: 4260.451660, avg loss: 2.558830, ppl: 12.920689 +epoch: 1, batch: 5080, sum loss: 4514.731445, avg loss: 2.907104, ppl: 18.303705 +epoch: 1, batch: 5081, sum loss: 4494.055664, avg loss: 2.651360, ppl: 14.173306 +epoch: 1, batch: 5082, sum loss: 5185.596680, avg loss: 2.944689, ppl: 19.004742 +epoch: 1, batch: 5083, sum loss: 4600.361328, avg loss: 2.777996, ppl: 16.086748 +epoch: 1, batch: 5084, sum loss: 4808.135742, avg loss: 2.805213, ppl: 16.530603 +epoch: 1, batch: 5085, sum loss: 4688.600098, avg loss: 2.826160, ppl: 16.880522 +epoch: 1, batch: 5086, sum loss: 4357.049805, avg loss: 2.692861, ppl: 14.773889 +epoch: 1, batch: 5087, sum loss: 3194.330566, avg loss: 2.345323, ppl: 10.436646 +epoch: 1, batch: 5088, sum loss: 4441.655762, avg loss: 2.664460, ppl: 14.360199 +epoch: 1, batch: 5089, sum loss: 4620.551270, avg loss: 2.641825, ppl: 14.038797 +epoch: 1, batch: 5090, sum loss: 4510.422363, avg loss: 2.856506, ppl: 17.400616 +epoch: 1, batch: 5091, sum loss: 4002.251953, avg loss: 2.680678, ppl: 14.594980 +epoch: 1, batch: 5092, sum loss: 4246.304688, avg loss: 2.562646, ppl: 12.970093 +epoch: 1, batch: 5093, sum loss: 5252.321289, avg loss: 2.984273, ppl: 19.772131 +epoch: 1, batch: 5094, sum loss: 5104.439941, avg loss: 2.964251, ppl: 19.380178 +epoch: 1, batch: 5095, sum loss: 4590.391602, avg loss: 2.837078, ppl: 17.065821 +epoch: 1, batch: 5096, sum loss: 6283.954102, avg loss: 3.215944, ppl: 24.926807 +epoch: 1, batch: 5097, sum loss: 4843.598633, avg loss: 2.794921, ppl: 16.361340 +epoch: 1, batch: 5098, sum loss: 5426.814453, avg loss: 3.059083, ppl: 21.308018 +epoch: 1, batch: 5099, sum loss: 4618.429688, avg loss: 2.799048, ppl: 16.429007 +epoch: 1, batch: 5100, sum loss: 3983.923340, avg loss: 2.708310, ppl: 15.003892 +epoch: 1, batch: 5101, sum loss: 5811.208496, avg loss: 3.242862, ppl: 25.606903 +epoch: 1, batch: 5102, sum loss: 4758.276367, avg loss: 2.873355, ppl: 17.696293 +epoch: 1, batch: 5103, sum loss: 4385.495605, avg loss: 2.610414, ppl: 13.604686 +epoch: 1, batch: 5104, sum loss: 3926.012451, avg loss: 2.728292, ppl: 15.306721 +epoch: 1, batch: 5105, sum loss: 4235.024902, avg loss: 2.746449, ppl: 15.587191 +epoch: 1, batch: 5106, sum loss: 5430.225586, avg loss: 3.113661, ppl: 22.503284 +epoch: 1, batch: 5107, sum loss: 5394.124512, avg loss: 2.957305, ppl: 19.246037 +epoch: 1, batch: 5108, sum loss: 5548.830078, avg loss: 3.124341, ppl: 22.744907 +epoch: 1, batch: 5109, sum loss: 4061.382812, avg loss: 2.684325, ppl: 14.648303 +epoch: 1, batch: 5110, sum loss: 6649.719727, avg loss: 3.346613, ppl: 28.406349 +epoch: 1, batch: 5111, sum loss: 4670.931152, avg loss: 2.855093, ppl: 17.376045 +epoch: 1, batch: 5112, sum loss: 5372.207031, avg loss: 3.023189, ppl: 20.556744 +epoch: 1, batch: 5113, sum loss: 4707.191406, avg loss: 2.720920, ppl: 15.194292 +epoch: 1, batch: 5114, sum loss: 4030.151367, avg loss: 2.674288, ppl: 14.502014 +epoch: 1, batch: 5115, sum loss: 5174.961914, avg loss: 2.814009, ppl: 16.676636 +epoch: 1, batch: 5116, sum loss: 4820.557617, avg loss: 2.794526, ppl: 16.354879 +epoch: 1, batch: 5117, sum loss: 4471.514160, avg loss: 2.685594, ppl: 14.666912 +epoch: 1, batch: 5118, sum loss: 4139.857422, avg loss: 2.770989, ppl: 15.974424 +epoch: 1, batch: 5119, sum loss: 4666.273438, avg loss: 2.680226, ppl: 14.588391 +epoch: 1, batch: 5120, sum loss: 3282.835938, avg loss: 2.384049, ppl: 10.848745 +epoch: 1, batch: 5121, sum loss: 4435.338867, avg loss: 2.839525, ppl: 17.107645 +epoch: 1, batch: 5122, sum loss: 4481.095215, avg loss: 2.876184, ppl: 17.746431 +epoch: 1, batch: 5123, sum loss: 5851.332031, avg loss: 3.127382, ppl: 22.814180 +epoch: 1, batch: 5124, sum loss: 4486.273926, avg loss: 2.526055, ppl: 12.504082 +epoch: 1, batch: 5125, sum loss: 3733.824951, avg loss: 2.620228, ppl: 13.738853 +epoch: 1, batch: 5126, sum loss: 5763.521973, avg loss: 3.075519, ppl: 21.661112 +epoch: 1, batch: 5127, sum loss: 4688.504395, avg loss: 2.772623, ppl: 16.000542 +epoch: 1, batch: 5128, sum loss: 4806.394531, avg loss: 2.779870, ppl: 16.116919 +epoch: 1, batch: 5129, sum loss: 4090.502197, avg loss: 2.635633, ppl: 13.952141 +epoch: 1, batch: 5130, sum loss: 4458.497559, avg loss: 2.758971, ppl: 15.783597 +epoch: 1, batch: 5131, sum loss: 5314.324707, avg loss: 3.071864, ppl: 21.582092 +epoch: 1, batch: 5132, sum loss: 4292.687988, avg loss: 2.556693, ppl: 12.893113 +epoch: 1, batch: 5133, sum loss: 5652.591309, avg loss: 3.075403, ppl: 21.658613 +epoch: 1, batch: 5134, sum loss: 4772.694824, avg loss: 2.754007, ppl: 15.705443 +epoch: 1, batch: 5135, sum loss: 3562.305664, avg loss: 2.583253, ppl: 13.240134 +epoch: 1, batch: 5136, sum loss: 5289.150391, avg loss: 2.863644, ppl: 17.525274 +epoch: 1, batch: 5137, sum loss: 4570.036621, avg loss: 2.664744, ppl: 14.364277 +epoch: 1, batch: 5138, sum loss: 4129.815918, avg loss: 2.613808, ppl: 13.650930 +epoch: 1, batch: 5139, sum loss: 4784.438965, avg loss: 3.035811, ppl: 20.817863 +epoch: 1, batch: 5140, sum loss: 3752.409424, avg loss: 2.589655, ppl: 13.325170 +epoch: 1, batch: 5141, sum loss: 3888.343994, avg loss: 2.536428, ppl: 12.634459 +epoch: 1, batch: 5142, sum loss: 5694.040527, avg loss: 2.885981, ppl: 17.921141 +epoch: 1, batch: 5143, sum loss: 4056.903320, avg loss: 2.454267, ppl: 11.637900 +epoch: 1, batch: 5144, sum loss: 4910.348633, avg loss: 2.702448, ppl: 14.916207 +epoch: 1, batch: 5145, sum loss: 5717.678223, avg loss: 3.224861, ppl: 25.150076 +epoch: 1, batch: 5146, sum loss: 4481.543945, avg loss: 2.781840, ppl: 16.148705 +epoch: 1, batch: 5147, sum loss: 4466.779785, avg loss: 2.647765, ppl: 14.122442 +epoch: 1, batch: 5148, sum loss: 4878.746094, avg loss: 2.993096, ppl: 19.947336 +epoch: 1, batch: 5149, sum loss: 4115.288086, avg loss: 2.458356, ppl: 11.685586 +epoch: 1, batch: 5150, sum loss: 3875.039062, avg loss: 2.623588, ppl: 13.785093 +epoch: 1, batch: 5151, sum loss: 4684.071777, avg loss: 2.658384, ppl: 14.273199 +epoch: 1, batch: 5152, sum loss: 3719.081055, avg loss: 2.654590, ppl: 14.219160 +epoch: 1, batch: 5153, sum loss: 4331.312988, avg loss: 2.851424, ppl: 17.312416 +epoch: 1, batch: 5154, sum loss: 5962.547852, avg loss: 3.022072, ppl: 20.533794 +epoch: 1, batch: 5155, sum loss: 4558.970703, avg loss: 2.704016, ppl: 14.939609 +epoch: 1, batch: 5156, sum loss: 4959.449219, avg loss: 2.953811, ppl: 19.178909 +epoch: 1, batch: 5157, sum loss: 4952.217773, avg loss: 2.748179, ppl: 15.614168 +epoch: 1, batch: 5158, sum loss: 4396.548828, avg loss: 2.777352, ppl: 16.076399 +epoch: 1, batch: 5159, sum loss: 5185.412598, avg loss: 2.839766, ppl: 17.111761 +epoch: 1, batch: 5160, sum loss: 6342.366699, avg loss: 3.086310, ppl: 21.896124 +epoch: 1, batch: 5161, sum loss: 4639.147949, avg loss: 2.917703, ppl: 18.498751 +epoch: 1, batch: 5162, sum loss: 4337.935059, avg loss: 2.750751, ppl: 15.654391 +epoch: 1, batch: 5163, sum loss: 3659.078613, avg loss: 2.712438, ppl: 15.065956 +epoch: 1, batch: 5164, sum loss: 4052.175293, avg loss: 2.622767, ppl: 13.773786 +epoch: 1, batch: 5165, sum loss: 4217.536133, avg loss: 2.686329, ppl: 14.677693 +epoch: 1, batch: 5166, sum loss: 4474.629883, avg loss: 2.772385, ppl: 15.996746 +epoch: 1, batch: 5167, sum loss: 4527.123047, avg loss: 2.847247, ppl: 17.240255 +epoch: 1, batch: 5168, sum loss: 5150.752930, avg loss: 2.905106, ppl: 18.267181 +epoch: 1, batch: 5169, sum loss: 3560.558105, avg loss: 2.600846, ppl: 13.475134 +epoch: 1, batch: 5170, sum loss: 3561.471680, avg loss: 2.372733, ppl: 10.726665 +epoch: 1, batch: 5171, sum loss: 5221.186523, avg loss: 2.899049, ppl: 18.156862 +epoch: 1, batch: 5172, sum loss: 4228.675293, avg loss: 2.849512, ppl: 17.279341 +epoch: 1, batch: 5173, sum loss: 4460.307129, avg loss: 2.819410, ppl: 16.766956 +epoch: 1, batch: 5174, sum loss: 4433.995605, avg loss: 2.842305, ppl: 17.155262 +epoch: 1, batch: 5175, sum loss: 5919.178711, avg loss: 3.112082, ppl: 22.467764 +epoch: 1, batch: 5176, sum loss: 4671.734863, avg loss: 2.876684, ppl: 17.755299 +epoch: 1, batch: 5177, sum loss: 5343.893066, avg loss: 2.836462, ppl: 17.055309 +epoch: 1, batch: 5178, sum loss: 5134.867188, avg loss: 2.939249, ppl: 18.901638 +epoch: 1, batch: 5179, sum loss: 4696.314941, avg loss: 2.853168, ppl: 17.342640 +epoch: 1, batch: 5180, sum loss: 4939.576660, avg loss: 2.742686, ppl: 15.528632 +epoch: 1, batch: 5181, sum loss: 4309.732910, avg loss: 2.829766, ppl: 16.941488 +epoch: 1, batch: 5182, sum loss: 4325.020508, avg loss: 2.746045, ppl: 15.580882 +epoch: 1, batch: 5183, sum loss: 5761.331055, avg loss: 3.124366, ppl: 22.745470 +epoch: 1, batch: 5184, sum loss: 5516.580566, avg loss: 2.999772, ppl: 20.080959 +epoch: 1, batch: 5185, sum loss: 3751.256104, avg loss: 2.442224, ppl: 11.498586 +epoch: 1, batch: 5186, sum loss: 3779.347900, avg loss: 2.593924, ppl: 13.382184 +epoch: 1, batch: 5187, sum loss: 5183.388184, avg loss: 2.915292, ppl: 18.454191 +epoch: 1, batch: 5188, sum loss: 5810.284180, avg loss: 3.128855, ppl: 22.847803 +epoch: 1, batch: 5189, sum loss: 4874.665039, avg loss: 2.772847, ppl: 16.004131 +epoch: 1, batch: 5190, sum loss: 5310.080566, avg loss: 2.727314, ppl: 15.291761 +epoch: 1, batch: 5191, sum loss: 4271.748535, avg loss: 2.902003, ppl: 18.210585 +epoch: 1, batch: 5192, sum loss: 5012.035156, avg loss: 2.801585, ppl: 16.470728 +epoch: 1, batch: 5193, sum loss: 3744.332520, avg loss: 2.651793, ppl: 14.179446 +epoch: 1, batch: 5194, sum loss: 4815.016602, avg loss: 2.738917, ppl: 15.470223 +epoch: 1, batch: 5195, sum loss: 4840.418945, avg loss: 2.884636, ppl: 17.897051 +epoch: 1, batch: 5196, sum loss: 4798.332031, avg loss: 2.658356, ppl: 14.272801 +epoch: 1, batch: 5197, sum loss: 3801.273926, avg loss: 2.742622, ppl: 15.527643 +epoch: 1, batch: 5198, sum loss: 5590.854004, avg loss: 2.899821, ppl: 18.170889 +epoch: 1, batch: 5199, sum loss: 5083.628418, avg loss: 3.006285, ppl: 20.212181 +epoch: 1, batch: 5200, sum loss: 4900.919922, avg loss: 2.854351, ppl: 17.363157 +epoch: 1, batch: 5201, sum loss: 4926.921387, avg loss: 2.901603, ppl: 18.203297 +epoch: 1, batch: 5202, sum loss: 4930.293457, avg loss: 3.079509, ppl: 21.747717 +epoch: 1, batch: 5203, sum loss: 4477.607422, avg loss: 2.903766, ppl: 18.242722 +epoch: 1, batch: 5204, sum loss: 4491.073242, avg loss: 2.668493, ppl: 14.418219 +epoch: 1, batch: 5205, sum loss: 4671.794434, avg loss: 2.578253, ppl: 13.174104 +epoch: 1, batch: 5206, sum loss: 4821.532227, avg loss: 2.902789, ppl: 18.224901 +epoch: 1, batch: 5207, sum loss: 4291.542969, avg loss: 2.597786, ppl: 13.433968 +epoch: 1, batch: 5208, sum loss: 4349.939453, avg loss: 2.749645, ppl: 15.637087 +epoch: 1, batch: 5209, sum loss: 3828.994629, avg loss: 2.544183, ppl: 12.732815 +epoch: 1, batch: 5210, sum loss: 5884.617188, avg loss: 2.960069, ppl: 19.299303 +epoch: 1, batch: 5211, sum loss: 4164.419434, avg loss: 2.667790, ppl: 14.408088 +epoch: 1, batch: 5212, sum loss: 4224.957520, avg loss: 2.674024, ppl: 14.498191 +epoch: 1, batch: 5213, sum loss: 4011.660645, avg loss: 2.473280, ppl: 11.861290 +epoch: 1, batch: 5214, sum loss: 5219.788574, avg loss: 2.967475, ppl: 19.442768 +epoch: 1, batch: 5215, sum loss: 4639.628418, avg loss: 2.995241, ppl: 19.990181 +epoch: 1, batch: 5216, sum loss: 4381.718750, avg loss: 2.771486, ppl: 15.982359 +epoch: 1, batch: 5217, sum loss: 4369.928223, avg loss: 2.783394, ppl: 16.173820 +epoch: 1, batch: 5218, sum loss: 5513.416016, avg loss: 2.953088, ppl: 19.165049 +epoch: 1, batch: 5219, sum loss: 4081.690918, avg loss: 2.816902, ppl: 16.724958 +epoch: 1, batch: 5220, sum loss: 5522.084473, avg loss: 2.975261, ppl: 19.594736 +epoch: 1, batch: 5221, sum loss: 5173.019043, avg loss: 2.878697, ppl: 17.791082 +epoch: 1, batch: 5222, sum loss: 3165.315186, avg loss: 2.342942, ppl: 10.411825 +epoch: 1, batch: 5223, sum loss: 4501.939453, avg loss: 2.840340, ppl: 17.121592 +epoch: 1, batch: 5224, sum loss: 4967.512695, avg loss: 2.889769, ppl: 17.989155 +epoch: 1, batch: 5225, sum loss: 4578.552734, avg loss: 2.714021, ppl: 15.089826 +epoch: 1, batch: 5226, sum loss: 4882.470703, avg loss: 2.906233, ppl: 18.287771 +epoch: 1, batch: 5227, sum loss: 5288.359375, avg loss: 2.742925, ppl: 15.532353 +epoch: 1, batch: 5228, sum loss: 4792.010742, avg loss: 2.732047, ppl: 15.364306 +epoch: 1, batch: 5229, sum loss: 4173.422852, avg loss: 2.676987, ppl: 14.541217 +epoch: 1, batch: 5230, sum loss: 5661.686523, avg loss: 3.143635, ppl: 23.187998 +epoch: 1, batch: 5231, sum loss: 4083.592285, avg loss: 2.636276, ppl: 13.961123 +epoch: 1, batch: 5232, sum loss: 5278.099121, avg loss: 2.963559, ppl: 19.366783 +epoch: 1, batch: 5233, sum loss: 4603.217285, avg loss: 2.893286, ppl: 18.052525 +epoch: 1, batch: 5234, sum loss: 3524.226074, avg loss: 2.402335, ppl: 11.048950 +epoch: 1, batch: 5235, sum loss: 5526.630859, avg loss: 2.902642, ppl: 18.222235 +epoch: 1, batch: 5236, sum loss: 4840.596191, avg loss: 2.874463, ppl: 17.715914 +epoch: 1, batch: 5237, sum loss: 4204.900879, avg loss: 2.554618, ppl: 12.866383 +epoch: 1, batch: 5238, sum loss: 3853.219727, avg loss: 2.690796, ppl: 14.743409 +epoch: 1, batch: 5239, sum loss: 5562.239746, avg loss: 2.922879, ppl: 18.594753 +epoch: 1, batch: 5240, sum loss: 4287.924805, avg loss: 2.782560, ppl: 16.160336 +epoch: 1, batch: 5241, sum loss: 4971.206055, avg loss: 2.819742, ppl: 16.772530 +epoch: 1, batch: 5242, sum loss: 4524.880371, avg loss: 2.947805, ppl: 19.064056 +epoch: 1, batch: 5243, sum loss: 4624.878906, avg loss: 2.709361, ppl: 15.019672 +epoch: 1, batch: 5244, sum loss: 5468.073730, avg loss: 2.905459, ppl: 18.273628 +epoch: 1, batch: 5245, sum loss: 4695.031250, avg loss: 2.806355, ppl: 16.549480 +epoch: 1, batch: 5246, sum loss: 5095.752930, avg loss: 2.781525, ppl: 16.143616 +epoch: 1, batch: 5247, sum loss: 4802.500488, avg loss: 2.836681, ppl: 17.059052 +epoch: 1, batch: 5248, sum loss: 4936.799805, avg loss: 3.025000, ppl: 20.594007 +epoch: 1, batch: 5249, sum loss: 3351.995117, avg loss: 2.497761, ppl: 12.155248 +epoch: 1, batch: 5250, sum loss: 4130.335938, avg loss: 2.584691, ppl: 13.259189 +epoch: 1, batch: 5251, sum loss: 3941.724121, avg loss: 2.418236, ppl: 11.226034 +epoch: 1, batch: 5252, sum loss: 4557.915039, avg loss: 2.799702, ppl: 16.439751 +epoch: 1, batch: 5253, sum loss: 4521.737305, avg loss: 2.727224, ppl: 15.290380 +epoch: 1, batch: 5254, sum loss: 5857.417969, avg loss: 2.979358, ppl: 19.675184 +epoch: 1, batch: 5255, sum loss: 4335.631836, avg loss: 2.789982, ppl: 16.280724 +epoch: 1, batch: 5256, sum loss: 4906.143555, avg loss: 2.767143, ppl: 15.913097 +epoch: 1, batch: 5257, sum loss: 4952.652832, avg loss: 2.771490, ppl: 15.982431 +epoch: 1, batch: 5258, sum loss: 4177.578613, avg loss: 2.705686, ppl: 14.964577 +epoch: 1, batch: 5259, sum loss: 4978.839844, avg loss: 2.863048, ppl: 17.514828 +epoch: 1, batch: 5260, sum loss: 4573.372070, avg loss: 2.748421, ppl: 15.617948 +epoch: 1, batch: 5261, sum loss: 5009.075195, avg loss: 2.818838, ppl: 16.757370 +epoch: 1, batch: 5262, sum loss: 4262.091309, avg loss: 2.648907, ppl: 14.138576 +epoch: 1, batch: 5263, sum loss: 4287.808105, avg loss: 2.710371, ppl: 15.034860 +epoch: 1, batch: 5264, sum loss: 4202.394531, avg loss: 2.735934, ppl: 15.424143 +epoch: 1, batch: 5265, sum loss: 4809.975098, avg loss: 2.769128, ppl: 15.944721 +epoch: 1, batch: 5266, sum loss: 3788.051514, avg loss: 2.483968, ppl: 11.988745 +epoch: 1, batch: 5267, sum loss: 4288.110352, avg loss: 2.759402, ppl: 15.790394 +epoch: 1, batch: 5268, sum loss: 4012.662354, avg loss: 2.546105, ppl: 12.757322 +epoch: 1, batch: 5269, sum loss: 4462.225098, avg loss: 2.697839, ppl: 14.847609 +epoch: 1, batch: 5270, sum loss: 3654.124268, avg loss: 2.615694, ppl: 13.676702 +epoch: 1, batch: 5271, sum loss: 4838.041016, avg loss: 2.821015, ppl: 16.793890 +epoch: 1, batch: 5272, sum loss: 3793.886230, avg loss: 2.688792, ppl: 14.713898 +epoch: 1, batch: 5273, sum loss: 4639.380859, avg loss: 2.786415, ppl: 16.222759 +epoch: 1, batch: 5274, sum loss: 4940.145996, avg loss: 3.043836, ppl: 20.985592 +epoch: 1, batch: 5275, sum loss: 4417.334961, avg loss: 2.736887, ppl: 15.438845 +epoch: 1, batch: 5276, sum loss: 4063.410889, avg loss: 2.536461, ppl: 12.634878 +epoch: 1, batch: 5277, sum loss: 5463.736328, avg loss: 3.225346, ppl: 25.162281 +epoch: 1, batch: 5278, sum loss: 4437.857910, avg loss: 2.724283, ppl: 15.245486 +epoch: 1, batch: 5279, sum loss: 3978.323486, avg loss: 2.747461, ppl: 15.602967 +epoch: 1, batch: 5280, sum loss: 4351.943359, avg loss: 2.759634, ppl: 15.794069 +epoch: 1, batch: 5281, sum loss: 4548.480469, avg loss: 2.652175, ppl: 14.184860 +epoch: 1, batch: 5282, sum loss: 3633.072266, avg loss: 2.420435, ppl: 11.250749 +epoch: 1, batch: 5283, sum loss: 4221.155273, avg loss: 2.702404, ppl: 14.915549 +epoch: 1, batch: 5284, sum loss: 3931.399658, avg loss: 2.756942, ppl: 15.751605 +epoch: 1, batch: 5285, sum loss: 4566.180176, avg loss: 2.901004, ppl: 18.192398 +epoch: 1, batch: 5286, sum loss: 5098.195312, avg loss: 2.898349, ppl: 18.144161 +epoch: 1, batch: 5287, sum loss: 5362.231445, avg loss: 2.952770, ppl: 19.158960 +epoch: 1, batch: 5288, sum loss: 4042.502197, avg loss: 2.534484, ppl: 12.609924 +epoch: 1, batch: 5289, sum loss: 5286.672852, avg loss: 2.848423, ppl: 17.260536 +epoch: 1, batch: 5290, sum loss: 3778.312012, avg loss: 2.489007, ppl: 12.049299 +epoch: 1, batch: 5291, sum loss: 5131.128906, avg loss: 2.937109, ppl: 18.861231 +epoch: 1, batch: 5292, sum loss: 3795.663086, avg loss: 2.517018, ppl: 12.391588 +epoch: 1, batch: 5293, sum loss: 4323.709961, avg loss: 2.628395, ppl: 13.851521 +epoch: 1, batch: 5294, sum loss: 4430.477051, avg loss: 2.527369, ppl: 12.520515 +epoch: 1, batch: 5295, sum loss: 4230.395020, avg loss: 2.632480, ppl: 13.908218 +epoch: 1, batch: 5296, sum loss: 5310.741211, avg loss: 2.733269, ppl: 15.383092 +epoch: 1, batch: 5297, sum loss: 5091.865234, avg loss: 2.912966, ppl: 18.411335 +epoch: 1, batch: 5298, sum loss: 5659.283691, avg loss: 3.008657, ppl: 20.260172 +epoch: 1, batch: 5299, sum loss: 4701.460938, avg loss: 2.810198, ppl: 16.613205 +epoch: 1, batch: 5300, sum loss: 3507.635010, avg loss: 2.596325, ppl: 13.414349 +epoch: 1, batch: 5301, sum loss: 4266.774414, avg loss: 2.435374, ppl: 11.420087 +epoch: 1, batch: 5302, sum loss: 5593.004883, avg loss: 2.923682, ppl: 18.609692 +epoch: 1, batch: 5303, sum loss: 5240.401855, avg loss: 2.982585, ppl: 19.738775 +epoch: 1, batch: 5304, sum loss: 4405.884766, avg loss: 2.671853, ppl: 14.466745 +epoch: 1, batch: 5305, sum loss: 5601.899414, avg loss: 2.823538, ppl: 16.836309 +epoch: 1, batch: 5306, sum loss: 4487.858398, avg loss: 2.810181, ppl: 16.612919 +epoch: 1, batch: 5307, sum loss: 4300.741211, avg loss: 2.587690, ppl: 13.299020 +epoch: 1, batch: 5308, sum loss: 3974.362793, avg loss: 2.707332, ppl: 14.989225 +epoch: 1, batch: 5309, sum loss: 4949.999512, avg loss: 2.790304, ppl: 16.285969 +epoch: 1, batch: 5310, sum loss: 4334.935059, avg loss: 2.672586, ppl: 14.477363 +epoch: 1, batch: 5311, sum loss: 5580.582031, avg loss: 2.971556, ppl: 19.522272 +epoch: 1, batch: 5312, sum loss: 5721.267578, avg loss: 3.276786, ppl: 26.490484 +epoch: 1, batch: 5313, sum loss: 6207.812500, avg loss: 2.971667, ppl: 19.524441 +epoch: 1, batch: 5314, sum loss: 4674.971191, avg loss: 2.821347, ppl: 16.799456 +epoch: 1, batch: 5315, sum loss: 3838.012695, avg loss: 2.609118, ppl: 13.587062 +epoch: 1, batch: 5316, sum loss: 4514.831543, avg loss: 2.731296, ppl: 15.352765 +epoch: 1, batch: 5317, sum loss: 5256.744141, avg loss: 2.938370, ppl: 18.885038 +epoch: 1, batch: 5318, sum loss: 4052.118652, avg loss: 2.574408, ppl: 13.123549 +epoch: 1, batch: 5319, sum loss: 5172.011230, avg loss: 2.815466, ppl: 16.700960 +epoch: 1, batch: 5320, sum loss: 5150.058594, avg loss: 2.909638, ppl: 18.350149 +epoch: 1, batch: 5321, sum loss: 4333.181152, avg loss: 2.813754, ppl: 16.672386 +epoch: 1, batch: 5322, sum loss: 4664.245117, avg loss: 2.672920, ppl: 14.482192 +epoch: 1, batch: 5323, sum loss: 3900.162598, avg loss: 2.302339, ppl: 9.997540 +epoch: 1, batch: 5324, sum loss: 5396.611816, avg loss: 2.724185, ppl: 15.243992 +epoch: 1, batch: 5325, sum loss: 5094.114258, avg loss: 2.632617, ppl: 13.910125 +epoch: 1, batch: 5326, sum loss: 4919.943359, avg loss: 2.754728, ppl: 15.716759 +epoch: 1, batch: 5327, sum loss: 5833.201172, avg loss: 3.016133, ppl: 20.412201 +epoch: 1, batch: 5328, sum loss: 4289.839844, avg loss: 2.698012, ppl: 14.850185 +epoch: 1, batch: 5329, sum loss: 4338.665039, avg loss: 2.917730, ppl: 18.499252 +epoch: 1, batch: 5330, sum loss: 5051.277344, avg loss: 2.911399, ppl: 18.382500 +epoch: 1, batch: 5331, sum loss: 4674.421875, avg loss: 2.678752, ppl: 14.566898 +epoch: 1, batch: 5332, sum loss: 4487.784180, avg loss: 2.692132, ppl: 14.763117 +epoch: 1, batch: 5333, sum loss: 5391.263672, avg loss: 2.794849, ppl: 16.360157 +epoch: 1, batch: 5334, sum loss: 4753.461426, avg loss: 2.762034, ppl: 15.832019 +epoch: 1, batch: 5335, sum loss: 4861.923828, avg loss: 2.737570, ppl: 15.449394 +epoch: 1, batch: 5336, sum loss: 5319.260742, avg loss: 2.856746, ppl: 17.404799 +epoch: 1, batch: 5337, sum loss: 4457.177734, avg loss: 2.709530, ppl: 15.022219 +epoch: 1, batch: 5338, sum loss: 5019.189453, avg loss: 2.723380, ppl: 15.231720 +epoch: 1, batch: 5339, sum loss: 3437.117432, avg loss: 2.691556, ppl: 14.754620 +epoch: 1, batch: 5340, sum loss: 4715.175293, avg loss: 2.945144, ppl: 19.013399 +epoch: 1, batch: 5341, sum loss: 5249.616211, avg loss: 2.664780, ppl: 14.364787 +epoch: 1, batch: 5342, sum loss: 5921.200195, avg loss: 3.118062, ppl: 22.602539 +epoch: 1, batch: 5343, sum loss: 4311.085449, avg loss: 2.761746, ppl: 15.827453 +epoch: 1, batch: 5344, sum loss: 5018.950195, avg loss: 2.886113, ppl: 17.923504 +epoch: 1, batch: 5345, sum loss: 4554.244141, avg loss: 2.886086, ppl: 17.923025 +epoch: 1, batch: 5346, sum loss: 5074.533691, avg loss: 2.873462, ppl: 17.698179 +epoch: 1, batch: 5347, sum loss: 4046.181641, avg loss: 2.765675, ppl: 15.889755 +epoch: 1, batch: 5348, sum loss: 5023.576660, avg loss: 2.838179, ppl: 17.084623 +epoch: 1, batch: 5349, sum loss: 4156.002930, avg loss: 2.615483, ppl: 13.673823 +epoch: 1, batch: 5350, sum loss: 4222.464844, avg loss: 2.732987, ppl: 15.378757 +epoch: 1, batch: 5351, sum loss: 4254.570801, avg loss: 2.605371, ppl: 13.536246 +epoch: 1, batch: 5352, sum loss: 4329.981934, avg loss: 2.773851, ppl: 16.020216 +epoch: 1, batch: 5353, sum loss: 5207.295898, avg loss: 2.880141, ppl: 17.816793 +epoch: 1, batch: 5354, sum loss: 5179.743164, avg loss: 2.783312, ppl: 16.172493 +epoch: 1, batch: 5355, sum loss: 4027.282471, avg loss: 2.658272, ppl: 14.271610 +epoch: 1, batch: 5356, sum loss: 3844.104248, avg loss: 2.620384, ppl: 13.741005 +epoch: 1, batch: 5357, sum loss: 4149.185547, avg loss: 2.682085, ppl: 14.615536 +epoch: 1, batch: 5358, sum loss: 5429.060547, avg loss: 2.840953, ppl: 17.132078 +epoch: 1, batch: 5359, sum loss: 5035.236816, avg loss: 2.724695, ppl: 15.251764 +epoch: 1, batch: 5360, sum loss: 4418.167480, avg loss: 2.744203, ppl: 15.552219 +epoch: 1, batch: 5361, sum loss: 5618.313477, avg loss: 2.990055, ppl: 19.886778 +epoch: 1, batch: 5362, sum loss: 4522.071777, avg loss: 2.787960, ppl: 16.247845 +epoch: 1, batch: 5363, sum loss: 4606.635742, avg loss: 2.500888, ppl: 12.193318 +epoch: 1, batch: 5364, sum loss: 3844.827148, avg loss: 2.721038, ppl: 15.196093 +epoch: 1, batch: 5365, sum loss: 4976.646484, avg loss: 3.055032, ppl: 21.221855 +epoch: 1, batch: 5366, sum loss: 4752.668945, avg loss: 2.708073, ppl: 15.000348 +epoch: 1, batch: 5367, sum loss: 4760.392090, avg loss: 2.830197, ppl: 16.948805 +epoch: 1, batch: 5368, sum loss: 4451.147461, avg loss: 2.719088, ppl: 15.166489 +epoch: 1, batch: 5369, sum loss: 5301.902344, avg loss: 2.868995, ppl: 17.619297 +epoch: 1, batch: 5370, sum loss: 4251.484375, avg loss: 2.542754, ppl: 12.714638 +epoch: 1, batch: 5371, sum loss: 4516.063477, avg loss: 2.589486, ppl: 13.322920 +epoch: 1, batch: 5372, sum loss: 4355.295410, avg loss: 3.169793, ppl: 23.802553 +epoch: 1, batch: 5373, sum loss: 4439.759277, avg loss: 2.855151, ppl: 17.377060 +epoch: 1, batch: 5374, sum loss: 5930.246582, avg loss: 2.934313, ppl: 18.808582 +epoch: 1, batch: 5375, sum loss: 5748.134766, avg loss: 2.922285, ppl: 18.583704 +epoch: 1, batch: 5376, sum loss: 5536.078125, avg loss: 2.932245, ppl: 18.769716 +epoch: 1, batch: 5377, sum loss: 3556.465820, avg loss: 2.478373, ppl: 11.921855 +epoch: 1, batch: 5378, sum loss: 3646.684082, avg loss: 2.528907, ppl: 12.539797 +epoch: 1, batch: 5379, sum loss: 4296.860840, avg loss: 2.599432, ppl: 13.456089 +epoch: 1, batch: 5380, sum loss: 5126.976562, avg loss: 2.875478, ppl: 17.733894 +epoch: 1, batch: 5381, sum loss: 5214.759277, avg loss: 2.692183, ppl: 14.763878 +epoch: 1, batch: 5382, sum loss: 4927.749023, avg loss: 2.986515, ppl: 19.816492 +epoch: 1, batch: 5383, sum loss: 5338.447754, avg loss: 2.859372, ppl: 17.450567 +epoch: 1, batch: 5384, sum loss: 6074.681641, avg loss: 3.155679, ppl: 23.468962 +epoch: 1, batch: 5385, sum loss: 5357.089844, avg loss: 3.154941, ppl: 23.451654 +epoch: 1, batch: 5386, sum loss: 4588.748535, avg loss: 2.685049, ppl: 14.658917 +epoch: 1, batch: 5387, sum loss: 4871.476562, avg loss: 2.782111, ppl: 16.153088 +epoch: 1, batch: 5388, sum loss: 3561.897461, avg loss: 2.571767, ppl: 13.088933 +epoch: 1, batch: 5389, sum loss: 4148.833984, avg loss: 2.654404, ppl: 14.216516 +epoch: 1, batch: 5390, sum loss: 4564.856445, avg loss: 2.870979, ppl: 17.654291 +epoch: 1, batch: 5391, sum loss: 4911.264160, avg loss: 2.784163, ppl: 16.186272 +epoch: 1, batch: 5392, sum loss: 4833.327148, avg loss: 2.747770, ppl: 15.607785 +epoch: 1, batch: 5393, sum loss: 4574.361328, avg loss: 2.891505, ppl: 18.020411 +epoch: 1, batch: 5394, sum loss: 4727.104492, avg loss: 2.705841, ppl: 14.966900 +epoch: 1, batch: 5395, sum loss: 4323.663086, avg loss: 2.655813, ppl: 14.236552 +epoch: 1, batch: 5396, sum loss: 5083.867676, avg loss: 2.990510, ppl: 19.895836 +epoch: 1, batch: 5397, sum loss: 3855.532715, avg loss: 2.638968, ppl: 13.998756 +epoch: 1, batch: 5398, sum loss: 4633.770508, avg loss: 2.719349, ppl: 15.170442 +epoch: 1, batch: 5399, sum loss: 4930.342773, avg loss: 2.774532, ppl: 16.031116 +epoch: 1, batch: 5400, sum loss: 4393.789062, avg loss: 2.669374, ppl: 14.430929 +epoch: 1, batch: 5401, sum loss: 4821.418945, avg loss: 2.803151, ppl: 16.496540 +epoch: 1, batch: 5402, sum loss: 4677.991699, avg loss: 2.796170, ppl: 16.381781 +epoch: 1, batch: 5403, sum loss: 4489.020508, avg loss: 2.817966, ppl: 16.742769 +epoch: 1, batch: 5404, sum loss: 4215.359375, avg loss: 2.717833, ppl: 15.147467 +epoch: 1, batch: 5405, sum loss: 3651.710205, avg loss: 2.532393, ppl: 12.583579 +epoch: 1, batch: 5406, sum loss: 4409.007324, avg loss: 2.716579, ppl: 15.128475 +epoch: 1, batch: 5407, sum loss: 4649.761230, avg loss: 3.067125, ppl: 21.480055 +epoch: 1, batch: 5408, sum loss: 4379.345703, avg loss: 2.782303, ppl: 16.156191 +epoch: 1, batch: 5409, sum loss: 4695.521484, avg loss: 2.652837, ppl: 14.194251 +epoch: 1, batch: 5410, sum loss: 4964.342285, avg loss: 2.819047, ppl: 16.760874 +epoch: 1, batch: 5411, sum loss: 4173.664551, avg loss: 2.618359, ppl: 13.713203 +epoch: 1, batch: 5412, sum loss: 4887.472656, avg loss: 2.784885, ppl: 16.197950 +epoch: 1, batch: 5413, sum loss: 4146.695312, avg loss: 2.641207, ppl: 14.030130 +epoch: 1, batch: 5414, sum loss: 5378.986816, avg loss: 2.788485, ppl: 16.256367 +epoch: 1, batch: 5415, sum loss: 5331.064453, avg loss: 2.884775, ppl: 17.899542 +epoch: 1, batch: 5416, sum loss: 4700.349121, avg loss: 2.845248, ppl: 17.205820 +epoch: 1, batch: 5417, sum loss: 4690.626953, avg loss: 2.870641, ppl: 17.648323 +epoch: 1, batch: 5418, sum loss: 5100.264648, avg loss: 2.922788, ppl: 18.593046 +epoch: 1, batch: 5419, sum loss: 3131.510498, avg loss: 2.475502, ppl: 11.887676 +epoch: 1, batch: 5420, sum loss: 5070.712891, avg loss: 2.856740, ppl: 17.404686 +epoch: 1, batch: 5421, sum loss: 4512.862305, avg loss: 2.552524, ppl: 12.839471 +epoch: 1, batch: 5422, sum loss: 5249.808594, avg loss: 2.941069, ppl: 18.936085 +epoch: 1, batch: 5423, sum loss: 5376.274414, avg loss: 3.113071, ppl: 22.490015 +epoch: 1, batch: 5424, sum loss: 5065.236816, avg loss: 2.955214, ppl: 19.205833 +epoch: 1, batch: 5425, sum loss: 4673.627930, avg loss: 2.628587, ppl: 13.854183 +epoch: 1, batch: 5426, sum loss: 5386.012695, avg loss: 2.916087, ppl: 18.468880 +epoch: 1, batch: 5427, sum loss: 5691.294922, avg loss: 2.742793, ppl: 15.530294 +epoch: 1, batch: 5428, sum loss: 5810.394043, avg loss: 2.752437, ppl: 15.680794 +epoch: 1, batch: 5429, sum loss: 4778.568359, avg loss: 2.722831, ppl: 15.223359 +epoch: 1, batch: 5430, sum loss: 3680.144287, avg loss: 2.575328, ppl: 13.135630 +epoch: 1, batch: 5431, sum loss: 4574.404785, avg loss: 2.882423, ppl: 17.857489 +epoch: 1, batch: 5432, sum loss: 5666.433105, avg loss: 2.761420, ppl: 15.822291 +epoch: 1, batch: 5433, sum loss: 4450.778809, avg loss: 2.666734, ppl: 14.392885 +epoch: 1, batch: 5434, sum loss: 5863.245117, avg loss: 3.221563, ppl: 25.067278 +epoch: 1, batch: 5435, sum loss: 3781.536621, avg loss: 2.574225, ppl: 13.121143 +epoch: 1, batch: 5436, sum loss: 3649.342041, avg loss: 2.337823, ppl: 10.358665 +epoch: 1, batch: 5437, sum loss: 5029.494141, avg loss: 2.820805, ppl: 16.790354 +epoch: 1, batch: 5438, sum loss: 4663.811035, avg loss: 2.786028, ppl: 16.216486 +epoch: 1, batch: 5439, sum loss: 4372.773926, avg loss: 2.765828, ppl: 15.892192 +epoch: 1, batch: 5440, sum loss: 4329.015625, avg loss: 2.617301, ppl: 13.698701 +epoch: 1, batch: 5441, sum loss: 5159.500488, avg loss: 2.975490, ppl: 19.599232 +epoch: 1, batch: 5442, sum loss: 4737.060547, avg loss: 3.096118, ppl: 22.111950 +epoch: 1, batch: 5443, sum loss: 4608.835449, avg loss: 2.915139, ppl: 18.451385 +epoch: 1, batch: 5444, sum loss: 4946.147461, avg loss: 2.731169, ppl: 15.350829 +epoch: 1, batch: 5445, sum loss: 3501.927734, avg loss: 2.673227, ppl: 14.486647 +epoch: 1, batch: 5446, sum loss: 4701.450195, avg loss: 2.886096, ppl: 17.923197 +epoch: 1, batch: 5447, sum loss: 4773.049805, avg loss: 2.770197, ppl: 15.961781 +epoch: 1, batch: 5448, sum loss: 5197.382324, avg loss: 2.823130, ppl: 16.829447 +epoch: 1, batch: 5449, sum loss: 4472.158691, avg loss: 2.615298, ppl: 13.671284 +epoch: 1, batch: 5450, sum loss: 6205.803223, avg loss: 3.187367, ppl: 24.224560 +epoch: 1, batch: 5451, sum loss: 4941.524414, avg loss: 2.929179, ppl: 18.712255 +epoch: 1, batch: 5452, sum loss: 4979.662598, avg loss: 2.983621, ppl: 19.759228 +epoch: 1, batch: 5453, sum loss: 4735.417480, avg loss: 3.023894, ppl: 20.571232 +epoch: 1, batch: 5454, sum loss: 4006.507812, avg loss: 2.705272, ppl: 14.958385 +epoch: 1, batch: 5455, sum loss: 4246.745605, avg loss: 2.670909, ppl: 14.453103 +epoch: 1, batch: 5456, sum loss: 4799.873047, avg loss: 2.617161, ppl: 13.696780 +epoch: 1, batch: 5457, sum loss: 4247.885254, avg loss: 2.668270, ppl: 14.415005 +epoch: 1, batch: 5458, sum loss: 4741.278320, avg loss: 2.802174, ppl: 16.480434 +epoch: 1, batch: 5459, sum loss: 5174.296387, avg loss: 2.868235, ppl: 17.605919 +epoch: 1, batch: 5460, sum loss: 4534.520508, avg loss: 2.653318, ppl: 14.201082 +epoch: 1, batch: 5461, sum loss: 4078.938965, avg loss: 2.522535, ppl: 12.460144 +epoch: 1, batch: 5462, sum loss: 4248.110352, avg loss: 2.661723, ppl: 14.320949 +epoch: 1, batch: 5463, sum loss: 4748.864258, avg loss: 2.908061, ppl: 18.321243 +epoch: 1, batch: 5464, sum loss: 5248.615234, avg loss: 2.823354, ppl: 16.833216 +epoch: 1, batch: 5465, sum loss: 3214.851562, avg loss: 2.267173, ppl: 9.652079 +epoch: 1, batch: 5466, sum loss: 5119.666016, avg loss: 2.743658, ppl: 15.543741 +epoch: 1, batch: 5467, sum loss: 5009.760254, avg loss: 2.719740, ppl: 15.176371 +epoch: 1, batch: 5468, sum loss: 4981.724609, avg loss: 2.954760, ppl: 19.197107 +epoch: 1, batch: 5469, sum loss: 5010.119141, avg loss: 2.598610, ppl: 13.445032 +epoch: 1, batch: 5470, sum loss: 4317.548828, avg loss: 2.637476, ppl: 13.977885 +epoch: 1, batch: 5471, sum loss: 4739.094727, avg loss: 2.586842, ppl: 13.287744 +epoch: 1, batch: 5472, sum loss: 4051.116943, avg loss: 2.615311, ppl: 13.671470 +epoch: 1, batch: 5473, sum loss: 4510.074707, avg loss: 2.657675, ppl: 14.263093 +epoch: 1, batch: 5474, sum loss: 4572.129395, avg loss: 2.857581, ppl: 17.419336 +epoch: 1, batch: 5475, sum loss: 5096.356445, avg loss: 2.969905, ppl: 19.490061 +epoch: 1, batch: 5476, sum loss: 4947.099609, avg loss: 2.690103, ppl: 14.733197 +epoch: 1, batch: 5477, sum loss: 4547.916016, avg loss: 2.766372, ppl: 15.900845 +epoch: 1, batch: 5478, sum loss: 4588.690918, avg loss: 2.806539, ppl: 16.552528 +epoch: 1, batch: 5479, sum loss: 5386.807617, avg loss: 2.826237, ppl: 16.881811 +epoch: 1, batch: 5480, sum loss: 4898.854004, avg loss: 2.967204, ppl: 19.437502 +epoch: 1, batch: 5481, sum loss: 5479.430664, avg loss: 3.035696, ppl: 20.815451 +epoch: 1, batch: 5482, sum loss: 4231.644531, avg loss: 2.492135, ppl: 12.087049 +epoch: 1, batch: 5483, sum loss: 5238.350586, avg loss: 3.024452, ppl: 20.582716 +epoch: 1, batch: 5484, sum loss: 4953.727051, avg loss: 2.827470, ppl: 16.902643 +epoch: 1, batch: 5485, sum loss: 3316.262695, avg loss: 2.429497, ppl: 11.353165 +epoch: 1, batch: 5486, sum loss: 5367.763672, avg loss: 2.923619, ppl: 18.608501 +epoch: 1, batch: 5487, sum loss: 4242.265625, avg loss: 2.708982, ppl: 15.013980 +epoch: 1, batch: 5488, sum loss: 3488.842041, avg loss: 2.409421, ppl: 11.127522 +epoch: 1, batch: 5489, sum loss: 4924.566406, avg loss: 2.828585, ppl: 16.921499 +epoch: 1, batch: 5490, sum loss: 5027.023926, avg loss: 3.035642, ppl: 20.814344 +epoch: 1, batch: 5491, sum loss: 4306.502930, avg loss: 2.645272, ppl: 14.087276 +epoch: 1, batch: 5492, sum loss: 4048.724365, avg loss: 2.519430, ppl: 12.421519 +epoch: 1, batch: 5493, sum loss: 5342.009766, avg loss: 2.892263, ppl: 18.034079 +epoch: 1, batch: 5494, sum loss: 4104.564941, avg loss: 2.709284, ppl: 15.018516 +epoch: 1, batch: 5495, sum loss: 4486.472656, avg loss: 2.636000, ppl: 13.957269 +epoch: 1, batch: 5496, sum loss: 4729.699707, avg loss: 2.608770, ppl: 13.582333 +epoch: 1, batch: 5497, sum loss: 4576.269043, avg loss: 2.833603, ppl: 17.006628 +epoch: 1, batch: 5498, sum loss: 4331.564453, avg loss: 2.741497, ppl: 15.510180 +epoch: 1, batch: 5499, sum loss: 4917.252930, avg loss: 3.056093, ppl: 21.244383 +epoch: 1, batch: 5500, sum loss: 4044.838867, avg loss: 2.707389, ppl: 14.990084 +epoch: 1, batch: 5501, sum loss: 5140.277832, avg loss: 2.912339, ppl: 18.399780 +epoch: 1, batch: 5502, sum loss: 4848.578125, avg loss: 2.867285, ppl: 17.589195 +epoch: 1, batch: 5503, sum loss: 4250.596191, avg loss: 2.792770, ppl: 16.326187 +epoch: 1, batch: 5504, sum loss: 3540.555664, avg loss: 2.421721, ppl: 11.265227 +epoch: 1, batch: 5505, sum loss: 4836.216309, avg loss: 2.904634, ppl: 18.258568 +epoch: 1, batch: 5506, sum loss: 6326.178223, avg loss: 3.105635, ppl: 22.323383 +epoch: 1, batch: 5507, sum loss: 5245.702148, avg loss: 2.841659, ppl: 17.144182 +epoch: 1, batch: 5508, sum loss: 4027.166992, avg loss: 2.493602, ppl: 12.104796 +epoch: 1, batch: 5509, sum loss: 4920.715332, avg loss: 2.807025, ppl: 16.560585 +epoch: 1, batch: 5510, sum loss: 4640.351562, avg loss: 2.763759, ppl: 15.859348 +epoch: 1, batch: 5511, sum loss: 4299.266602, avg loss: 2.545451, ppl: 12.748976 +epoch: 1, batch: 5512, sum loss: 4256.192871, avg loss: 2.541011, ppl: 12.692494 +epoch: 1, batch: 5513, sum loss: 4521.368164, avg loss: 2.621083, ppl: 13.750607 +epoch: 1, batch: 5514, sum loss: 5044.135742, avg loss: 2.934343, ppl: 18.809143 +epoch: 1, batch: 5515, sum loss: 5006.196289, avg loss: 3.092153, ppl: 22.024452 +epoch: 1, batch: 5516, sum loss: 4826.079590, avg loss: 2.889868, ppl: 17.990934 +epoch: 1, batch: 5517, sum loss: 4830.525391, avg loss: 2.792211, ppl: 16.317059 +epoch: 1, batch: 5518, sum loss: 5123.059082, avg loss: 2.900939, ppl: 18.191227 +epoch: 1, batch: 5519, sum loss: 4705.859375, avg loss: 2.669234, ppl: 14.428909 +epoch: 1, batch: 5520, sum loss: 4370.596191, avg loss: 2.570939, ppl: 13.078100 +epoch: 1, batch: 5521, sum loss: 5147.153320, avg loss: 2.854772, ppl: 17.370470 +epoch: 1, batch: 5522, sum loss: 4290.922363, avg loss: 2.566341, ppl: 13.018109 +epoch: 1, batch: 5523, sum loss: 4003.896973, avg loss: 2.701685, ppl: 14.904824 +epoch: 1, batch: 5524, sum loss: 4233.927734, avg loss: 2.526210, ppl: 12.506020 +epoch: 1, batch: 5525, sum loss: 5533.895508, avg loss: 2.973614, ppl: 19.562490 +epoch: 1, batch: 5526, sum loss: 4100.577637, avg loss: 2.575740, ppl: 13.141033 +epoch: 1, batch: 5527, sum loss: 5434.213867, avg loss: 2.997360, ppl: 20.032585 +epoch: 1, batch: 5528, sum loss: 4312.315430, avg loss: 2.520348, ppl: 12.432923 +epoch: 1, batch: 5529, sum loss: 4510.850586, avg loss: 2.882333, ppl: 17.855875 +epoch: 1, batch: 5530, sum loss: 5015.629395, avg loss: 3.016013, ppl: 20.409754 +epoch: 1, batch: 5531, sum loss: 4716.544922, avg loss: 2.678333, ppl: 14.560804 +epoch: 1, batch: 5532, sum loss: 4602.139160, avg loss: 2.598610, ppl: 13.445042 +epoch: 1, batch: 5533, sum loss: 4826.444336, avg loss: 2.835749, ppl: 17.043156 +epoch: 1, batch: 5534, sum loss: 3791.399414, avg loss: 2.596849, ppl: 13.421380 +epoch: 1, batch: 5535, sum loss: 4955.549316, avg loss: 2.740901, ppl: 15.500949 +epoch: 1, batch: 5536, sum loss: 4893.411133, avg loss: 2.727654, ppl: 15.296958 +epoch: 1, batch: 5537, sum loss: 4135.047852, avg loss: 2.599025, ppl: 13.450611 +epoch: 1, batch: 5538, sum loss: 4689.542969, avg loss: 2.532151, ppl: 12.580531 +epoch: 1, batch: 5539, sum loss: 3975.693359, avg loss: 2.631167, ppl: 13.889969 +epoch: 1, batch: 5540, sum loss: 3914.342773, avg loss: 2.743057, ppl: 15.534409 +epoch: 1, batch: 5541, sum loss: 4043.657959, avg loss: 2.653319, ppl: 14.201093 +epoch: 1, batch: 5542, sum loss: 4792.063965, avg loss: 2.936314, ppl: 18.846249 +epoch: 1, batch: 5543, sum loss: 5180.335938, avg loss: 2.863646, ppl: 17.525312 +epoch: 1, batch: 5544, sum loss: 4607.797852, avg loss: 2.839062, ppl: 17.099718 +epoch: 1, batch: 5545, sum loss: 4438.375977, avg loss: 2.818016, ppl: 16.743603 +epoch: 1, batch: 5546, sum loss: 4618.524414, avg loss: 2.718378, ppl: 15.155725 +epoch: 1, batch: 5547, sum loss: 3442.100342, avg loss: 2.382077, ppl: 10.827363 +epoch: 1, batch: 5548, sum loss: 4699.531250, avg loss: 2.824238, ppl: 16.848095 +epoch: 1, batch: 5549, sum loss: 4645.370605, avg loss: 2.934536, ppl: 18.812771 +epoch: 1, batch: 5550, sum loss: 4564.791992, avg loss: 2.663239, ppl: 14.342673 +epoch: 1, batch: 5551, sum loss: 5414.721680, avg loss: 2.890935, ppl: 18.010145 +epoch: 1, batch: 5552, sum loss: 5481.058594, avg loss: 2.982078, ppl: 19.728762 +epoch: 1, batch: 5553, sum loss: 4240.409668, avg loss: 2.612699, ppl: 13.635798 +epoch: 1, batch: 5554, sum loss: 4403.378906, avg loss: 2.706441, ppl: 14.975877 +epoch: 1, batch: 5555, sum loss: 4118.848633, avg loss: 2.618467, ppl: 13.714681 +epoch: 1, batch: 5556, sum loss: 5219.715820, avg loss: 3.204245, ppl: 24.636900 +epoch: 1, batch: 5557, sum loss: 4503.708496, avg loss: 2.863133, ppl: 17.516323 +epoch: 1, batch: 5558, sum loss: 4369.775879, avg loss: 2.576519, ppl: 13.151276 +epoch: 1, batch: 5559, sum loss: 4427.290039, avg loss: 2.734583, ppl: 15.403321 +epoch: 1, batch: 5560, sum loss: 5332.215332, avg loss: 3.247391, ppl: 25.723135 +epoch: 1, batch: 5561, sum loss: 4544.787109, avg loss: 2.786504, ppl: 16.224194 +epoch: 1, batch: 5562, sum loss: 4837.482422, avg loss: 2.725342, ppl: 15.261633 +epoch: 1, batch: 5563, sum loss: 5044.393555, avg loss: 2.843514, ppl: 17.176012 +epoch: 1, batch: 5564, sum loss: 4988.831055, avg loss: 2.883717, ppl: 17.880617 +epoch: 1, batch: 5565, sum loss: 4675.613281, avg loss: 2.887964, ppl: 17.956709 +epoch: 1, batch: 5566, sum loss: 3942.513184, avg loss: 2.576806, ppl: 13.155055 +epoch: 1, batch: 5567, sum loss: 3929.821045, avg loss: 2.614651, ppl: 13.662450 +epoch: 1, batch: 5568, sum loss: 5511.599121, avg loss: 3.098145, ppl: 22.156801 +epoch: 1, batch: 5569, sum loss: 4362.664551, avg loss: 2.679769, ppl: 14.581729 +epoch: 1, batch: 5570, sum loss: 4159.096680, avg loss: 2.548466, ppl: 12.787476 +epoch: 1, batch: 5571, sum loss: 4087.790283, avg loss: 2.777031, ppl: 16.071241 +epoch: 1, batch: 5572, sum loss: 4883.498047, avg loss: 2.793763, ppl: 16.342407 +epoch: 1, batch: 5573, sum loss: 4449.584473, avg loss: 2.782729, ppl: 16.163076 +epoch: 1, batch: 5574, sum loss: 5204.127441, avg loss: 2.938525, ppl: 18.887960 +epoch: 1, batch: 5575, sum loss: 4569.348633, avg loss: 2.784490, ppl: 16.191565 +epoch: 1, batch: 5576, sum loss: 4574.582031, avg loss: 2.721346, ppl: 15.200764 +epoch: 1, batch: 5577, sum loss: 3571.170654, avg loss: 2.538146, ppl: 12.656178 +epoch: 1, batch: 5578, sum loss: 4546.752930, avg loss: 2.709626, ppl: 15.023659 +epoch: 1, batch: 5579, sum loss: 4736.251953, avg loss: 2.668311, ppl: 14.415599 +epoch: 1, batch: 5580, sum loss: 4856.165039, avg loss: 2.807032, ppl: 16.560692 +epoch: 1, batch: 5581, sum loss: 4990.800781, avg loss: 2.803821, ppl: 16.507595 +epoch: 1, batch: 5582, sum loss: 4237.205078, avg loss: 2.747863, ppl: 15.609243 +epoch: 1, batch: 5583, sum loss: 4320.936523, avg loss: 2.789501, ppl: 16.272892 +epoch: 1, batch: 5584, sum loss: 4231.449707, avg loss: 2.702075, ppl: 14.910640 +epoch: 1, batch: 5585, sum loss: 4208.745605, avg loss: 2.524742, ppl: 12.487678 +epoch: 1, batch: 5586, sum loss: 4411.546875, avg loss: 2.743499, ppl: 15.541273 +epoch: 1, batch: 5587, sum loss: 5099.008301, avg loss: 2.959378, ppl: 19.285973 +epoch: 1, batch: 5588, sum loss: 4078.762695, avg loss: 2.570109, ppl: 13.067247 +epoch: 1, batch: 5589, sum loss: 4137.022949, avg loss: 2.864974, ppl: 17.548601 +epoch: 1, batch: 5590, sum loss: 5161.971680, avg loss: 2.776746, ppl: 16.066660 +epoch: 1, batch: 5591, sum loss: 6349.446289, avg loss: 3.026428, ppl: 20.623438 +epoch: 1, batch: 5592, sum loss: 4355.467285, avg loss: 2.703580, ppl: 14.933099 +epoch: 1, batch: 5593, sum loss: 4524.752930, avg loss: 2.646054, ppl: 14.098301 +epoch: 1, batch: 5594, sum loss: 4700.806152, avg loss: 2.646850, ppl: 14.109529 +epoch: 1, batch: 5595, sum loss: 4507.423828, avg loss: 2.619072, ppl: 13.722989 +epoch: 1, batch: 5596, sum loss: 4572.277832, avg loss: 2.855889, ppl: 17.389887 +epoch: 1, batch: 5597, sum loss: 3161.905518, avg loss: 2.421061, ppl: 11.257798 +epoch: 1, batch: 5598, sum loss: 4375.020020, avg loss: 2.786637, ppl: 16.226360 +epoch: 1, batch: 5599, sum loss: 3383.639893, avg loss: 2.205763, ppl: 9.077171 +epoch: 1, batch: 5600, sum loss: 4729.298340, avg loss: 2.752793, ppl: 15.686384 +epoch: 1, batch: 5601, sum loss: 4869.669922, avg loss: 2.754338, ppl: 15.710641 +epoch: 1, batch: 5602, sum loss: 5458.708008, avg loss: 2.898942, ppl: 18.154926 +epoch: 1, batch: 5603, sum loss: 4987.100098, avg loss: 2.911325, ppl: 18.381142 +epoch: 1, batch: 5604, sum loss: 5152.061523, avg loss: 3.068530, ppl: 21.510256 +epoch: 1, batch: 5605, sum loss: 4745.186035, avg loss: 2.814464, ppl: 16.684229 +epoch: 1, batch: 5606, sum loss: 4789.974121, avg loss: 2.856276, ppl: 17.396614 +epoch: 1, batch: 5607, sum loss: 5035.791504, avg loss: 2.782205, ppl: 16.154608 +epoch: 1, batch: 5608, sum loss: 5237.359863, avg loss: 2.782869, ppl: 16.165335 +epoch: 1, batch: 5609, sum loss: 4061.514893, avg loss: 2.635636, ppl: 13.952181 +epoch: 1, batch: 5610, sum loss: 4478.479492, avg loss: 2.696255, ppl: 14.824111 +epoch: 1, batch: 5611, sum loss: 5747.487305, avg loss: 2.708524, ppl: 15.007105 +epoch: 1, batch: 5612, sum loss: 4731.331055, avg loss: 2.762015, ppl: 15.831706 +epoch: 1, batch: 5613, sum loss: 4899.061035, avg loss: 2.791488, ppl: 16.305260 +epoch: 1, batch: 5614, sum loss: 4634.758301, avg loss: 2.648433, ppl: 14.131880 +epoch: 1, batch: 5615, sum loss: 5369.829102, avg loss: 2.993216, ppl: 19.949734 +epoch: 1, batch: 5616, sum loss: 5458.666504, avg loss: 2.853459, ppl: 17.347677 +epoch: 1, batch: 5617, sum loss: 4373.324707, avg loss: 2.511961, ppl: 12.329089 +epoch: 1, batch: 5618, sum loss: 4583.632812, avg loss: 2.569301, ppl: 13.056696 +epoch: 1, batch: 5619, sum loss: 3928.070801, avg loss: 2.575784, ppl: 13.141619 +epoch: 1, batch: 5620, sum loss: 5907.938965, avg loss: 3.028160, ppl: 20.659176 +epoch: 1, batch: 5621, sum loss: 5692.411133, avg loss: 3.021450, ppl: 20.521021 +epoch: 1, batch: 5622, sum loss: 4834.102051, avg loss: 2.657560, ppl: 14.261454 +epoch: 1, batch: 5623, sum loss: 5136.162598, avg loss: 2.900148, ppl: 18.176838 +epoch: 1, batch: 5624, sum loss: 4264.564941, avg loss: 2.598760, ppl: 13.447052 +epoch: 1, batch: 5625, sum loss: 4347.798340, avg loss: 3.070479, ppl: 21.552227 +epoch: 1, batch: 5626, sum loss: 4572.275879, avg loss: 2.769398, ppl: 15.949029 +epoch: 1, batch: 5627, sum loss: 4942.339844, avg loss: 2.705167, ppl: 14.956812 +epoch: 1, batch: 5628, sum loss: 4770.169434, avg loss: 2.693489, ppl: 14.783166 +epoch: 1, batch: 5629, sum loss: 4399.560059, avg loss: 2.834768, ppl: 17.026455 +epoch: 1, batch: 5630, sum loss: 4966.378906, avg loss: 2.828234, ppl: 16.915565 +epoch: 1, batch: 5631, sum loss: 4805.326172, avg loss: 2.915853, ppl: 18.464561 +epoch: 1, batch: 5632, sum loss: 4120.441406, avg loss: 2.617815, ppl: 13.705748 +epoch: 1, batch: 5633, sum loss: 5192.262695, avg loss: 2.800573, ppl: 16.454075 +epoch: 1, batch: 5634, sum loss: 4107.056152, avg loss: 2.475621, ppl: 11.889093 +epoch: 1, batch: 5635, sum loss: 4406.740234, avg loss: 2.715182, ppl: 15.107357 +epoch: 1, batch: 5636, sum loss: 5028.616699, avg loss: 2.910079, ppl: 18.358253 +epoch: 1, batch: 5637, sum loss: 5023.447266, avg loss: 2.918912, ppl: 18.521120 +epoch: 1, batch: 5638, sum loss: 5531.227539, avg loss: 2.775327, ppl: 16.043880 +epoch: 1, batch: 5639, sum loss: 4029.025391, avg loss: 2.427124, ppl: 11.326259 +epoch: 1, batch: 5640, sum loss: 4135.623047, avg loss: 2.760763, ppl: 15.811902 +epoch: 1, batch: 5641, sum loss: 3280.750732, avg loss: 2.318552, ppl: 10.160949 +epoch: 1, batch: 5642, sum loss: 5063.082520, avg loss: 2.863734, ppl: 17.526859 +epoch: 1, batch: 5643, sum loss: 3544.067383, avg loss: 2.598290, ppl: 13.440734 +epoch: 1, batch: 5644, sum loss: 4441.372070, avg loss: 2.897177, ppl: 18.122911 +epoch: 1, batch: 5645, sum loss: 3849.847412, avg loss: 2.573427, ppl: 13.110683 +epoch: 1, batch: 5646, sum loss: 4665.360840, avg loss: 2.673559, ppl: 14.491455 +epoch: 1, batch: 5647, sum loss: 5228.620605, avg loss: 2.929199, ppl: 18.712645 +epoch: 1, batch: 5648, sum loss: 4636.141113, avg loss: 2.626709, ppl: 13.828183 +epoch: 1, batch: 5649, sum loss: 5834.948242, avg loss: 2.871530, ppl: 17.664017 +epoch: 1, batch: 5650, sum loss: 4519.070312, avg loss: 2.639644, ppl: 14.008215 +epoch: 1, batch: 5651, sum loss: 4882.699219, avg loss: 2.758587, ppl: 15.777535 +epoch: 1, batch: 5652, sum loss: 3987.104980, avg loss: 2.600851, ppl: 13.475204 +epoch: 1, batch: 5653, sum loss: 4788.627930, avg loss: 2.782468, ppl: 16.158857 +epoch: 1, batch: 5654, sum loss: 3443.523926, avg loss: 2.511688, ppl: 12.325715 +epoch: 1, batch: 5655, sum loss: 4229.434570, avg loss: 2.604331, ppl: 13.522182 +epoch: 1, batch: 5656, sum loss: 5413.282715, avg loss: 2.938807, ppl: 18.893297 +epoch: 1, batch: 5657, sum loss: 5083.398926, avg loss: 3.009709, ppl: 20.281504 +epoch: 1, batch: 5658, sum loss: 5351.040039, avg loss: 2.956376, ppl: 19.228159 +epoch: 1, batch: 5659, sum loss: 4509.819336, avg loss: 2.674863, ppl: 14.510366 +epoch: 1, batch: 5660, sum loss: 4977.102539, avg loss: 2.865344, ppl: 17.555092 +epoch: 1, batch: 5661, sum loss: 5129.198242, avg loss: 2.840088, ppl: 17.117266 +epoch: 1, batch: 5662, sum loss: 3951.233154, avg loss: 2.390341, ppl: 10.917212 +epoch: 1, batch: 5663, sum loss: 4634.510254, avg loss: 2.808794, ppl: 16.589899 +epoch: 1, batch: 5664, sum loss: 4451.110352, avg loss: 2.687869, ppl: 14.700310 +epoch: 1, batch: 5665, sum loss: 4480.683594, avg loss: 2.750573, ppl: 15.651600 +epoch: 1, batch: 5666, sum loss: 5251.942383, avg loss: 2.990856, ppl: 19.902706 +epoch: 1, batch: 5667, sum loss: 4511.095703, avg loss: 2.625783, ppl: 13.815393 +epoch: 1, batch: 5668, sum loss: 5414.130859, avg loss: 2.921819, ppl: 18.575048 +epoch: 1, batch: 5669, sum loss: 3887.021729, avg loss: 2.610492, ppl: 13.605737 +epoch: 1, batch: 5670, sum loss: 3566.781982, avg loss: 2.735262, ppl: 15.413784 +epoch: 1, batch: 5671, sum loss: 5052.747070, avg loss: 2.879058, ppl: 17.797503 +epoch: 1, batch: 5672, sum loss: 5004.405762, avg loss: 2.674723, ppl: 14.508326 +epoch: 1, batch: 5673, sum loss: 3360.270996, avg loss: 2.440284, ppl: 11.476299 +epoch: 1, batch: 5674, sum loss: 4302.296387, avg loss: 2.752589, ppl: 15.683179 +epoch: 1, batch: 5675, sum loss: 5048.373047, avg loss: 2.672511, ppl: 14.476272 +epoch: 1, batch: 5676, sum loss: 5318.423828, avg loss: 2.994608, ppl: 19.977530 +epoch: 1, batch: 5677, sum loss: 4672.325195, avg loss: 2.794453, ppl: 16.353683 +epoch: 1, batch: 5678, sum loss: 5105.593750, avg loss: 2.844342, ppl: 17.190243 +epoch: 1, batch: 5679, sum loss: 5231.484375, avg loss: 3.106582, ppl: 22.344549 +epoch: 1, batch: 5680, sum loss: 4779.863770, avg loss: 3.021405, ppl: 20.520111 +epoch: 1, batch: 5681, sum loss: 5527.158691, avg loss: 2.950966, ppl: 19.124413 +epoch: 1, batch: 5682, sum loss: 4509.191406, avg loss: 2.778306, ppl: 16.091745 +epoch: 1, batch: 5683, sum loss: 4865.227539, avg loss: 2.641274, ppl: 14.031075 +epoch: 1, batch: 5684, sum loss: 4635.836426, avg loss: 2.749606, ppl: 15.636476 +epoch: 1, batch: 5685, sum loss: 4719.616211, avg loss: 2.677037, ppl: 14.541942 +epoch: 1, batch: 5686, sum loss: 4863.336914, avg loss: 2.592397, ppl: 13.361764 +epoch: 1, batch: 5687, sum loss: 4043.061523, avg loss: 2.621959, ppl: 13.762665 +epoch: 1, batch: 5688, sum loss: 4711.934082, avg loss: 2.811417, ppl: 16.633465 +epoch: 1, batch: 5689, sum loss: 4343.962891, avg loss: 2.665008, ppl: 14.368062 +epoch: 1, batch: 5690, sum loss: 3384.826660, avg loss: 2.516600, ppl: 12.386407 +epoch: 1, batch: 5691, sum loss: 4443.229492, avg loss: 2.780494, ppl: 16.126989 +epoch: 1, batch: 5692, sum loss: 3904.186523, avg loss: 2.556769, ppl: 12.894091 +epoch: 1, batch: 5693, sum loss: 5108.160156, avg loss: 2.934038, ppl: 18.803404 +epoch: 1, batch: 5694, sum loss: 4515.370117, avg loss: 2.848814, ppl: 17.267290 +epoch: 1, batch: 5695, sum loss: 4731.540527, avg loss: 2.808036, ppl: 16.577326 +epoch: 1, batch: 5696, sum loss: 4804.085938, avg loss: 2.931108, ppl: 18.748388 +epoch: 1, batch: 5697, sum loss: 3831.865967, avg loss: 2.708032, ppl: 14.999733 +epoch: 1, batch: 5698, sum loss: 5536.174316, avg loss: 2.909183, ppl: 18.341799 +epoch: 1, batch: 5699, sum loss: 4296.442383, avg loss: 2.563510, ppl: 12.981301 +epoch: 1, batch: 5700, sum loss: 4426.847656, avg loss: 2.597915, ppl: 13.435701 +epoch: 1, batch: 5701, sum loss: 4778.142090, avg loss: 2.733491, ppl: 15.386507 +epoch: 1, batch: 5702, sum loss: 4363.258789, avg loss: 2.786244, ppl: 16.219990 +epoch: 1, batch: 5703, sum loss: 4420.810547, avg loss: 2.625185, ppl: 13.807122 +epoch: 1, batch: 5704, sum loss: 5203.042969, avg loss: 3.023267, ppl: 20.558352 +epoch: 1, batch: 5705, sum loss: 4218.600098, avg loss: 2.661577, ppl: 14.318855 +epoch: 1, batch: 5706, sum loss: 4523.052246, avg loss: 2.632743, ppl: 13.911877 +epoch: 1, batch: 5707, sum loss: 4238.119141, avg loss: 2.721978, ppl: 15.210374 +epoch: 1, batch: 5708, sum loss: 4751.559082, avg loss: 2.696685, ppl: 14.830488 +epoch: 1, batch: 5709, sum loss: 4999.860352, avg loss: 2.900151, ppl: 18.176895 +epoch: 1, batch: 5710, sum loss: 4112.946777, avg loss: 2.551456, ppl: 12.825761 +epoch: 1, batch: 5711, sum loss: 4891.255859, avg loss: 2.731019, ppl: 15.348523 +epoch: 1, batch: 5712, sum loss: 4347.794922, avg loss: 2.917983, ppl: 18.503933 +epoch: 1, batch: 5713, sum loss: 4562.433594, avg loss: 2.704466, ppl: 14.946331 +epoch: 1, batch: 5714, sum loss: 4560.779297, avg loss: 2.789467, ppl: 16.272350 +epoch: 1, batch: 5715, sum loss: 4537.303223, avg loss: 2.844704, ppl: 17.196470 +epoch: 1, batch: 5716, sum loss: 4012.590332, avg loss: 2.895087, ppl: 18.085070 +epoch: 1, batch: 5717, sum loss: 4552.709961, avg loss: 2.754210, ppl: 15.708633 +epoch: 1, batch: 5718, sum loss: 4370.392090, avg loss: 2.764321, ppl: 15.868267 +epoch: 1, batch: 5719, sum loss: 3935.540527, avg loss: 2.411483, ppl: 11.150488 +epoch: 1, batch: 5720, sum loss: 5184.227051, avg loss: 2.942240, ppl: 18.958269 +epoch: 1, batch: 5721, sum loss: 4553.658203, avg loss: 2.423448, ppl: 11.284700 +epoch: 1, batch: 5722, sum loss: 3817.896973, avg loss: 2.611421, ppl: 13.618383 +epoch: 1, batch: 5723, sum loss: 4214.293457, avg loss: 2.710157, ppl: 15.031631 +epoch: 1, batch: 5724, sum loss: 4235.856934, avg loss: 2.694566, ppl: 14.799088 +epoch: 1, batch: 5725, sum loss: 3992.408203, avg loss: 2.805628, ppl: 16.537451 +epoch: 1, batch: 5726, sum loss: 4025.984131, avg loss: 2.469929, ppl: 11.821608 +epoch: 1, batch: 5727, sum loss: 4630.937012, avg loss: 2.883522, ppl: 17.877131 +epoch: 1, batch: 5728, sum loss: 4977.198242, avg loss: 2.748315, ppl: 15.616298 +epoch: 1, batch: 5729, sum loss: 4860.813477, avg loss: 2.790364, ppl: 16.286940 +epoch: 1, batch: 5730, sum loss: 4568.302734, avg loss: 2.711159, ppl: 15.046704 +epoch: 1, batch: 5731, sum loss: 4880.003418, avg loss: 2.934458, ppl: 18.811300 +epoch: 1, batch: 5732, sum loss: 4278.044922, avg loss: 2.631024, ppl: 13.887982 +epoch: 1, batch: 5733, sum loss: 4413.571289, avg loss: 2.782832, ppl: 16.164734 +epoch: 1, batch: 5734, sum loss: 6026.046875, avg loss: 3.215607, ppl: 24.918406 +epoch: 1, batch: 5735, sum loss: 4580.521973, avg loss: 2.746116, ppl: 15.582000 +epoch: 1, batch: 5736, sum loss: 4030.787598, avg loss: 2.625920, ppl: 13.817285 +epoch: 1, batch: 5737, sum loss: 4730.832031, avg loss: 2.671277, ppl: 14.458425 +epoch: 1, batch: 5738, sum loss: 4900.277832, avg loss: 3.032350, ppl: 20.745930 +epoch: 1, batch: 5739, sum loss: 4748.083984, avg loss: 2.875884, ppl: 17.741096 +epoch: 1, batch: 5740, sum loss: 4593.070312, avg loss: 2.671943, ppl: 14.468056 +epoch: 1, batch: 5741, sum loss: 5041.114258, avg loss: 2.997095, ppl: 20.027281 +epoch: 1, batch: 5742, sum loss: 4389.985352, avg loss: 2.730090, ppl: 15.334273 +epoch: 1, batch: 5743, sum loss: 5076.663086, avg loss: 2.817238, ppl: 16.730579 +epoch: 1, batch: 5744, sum loss: 3747.957275, avg loss: 2.581238, ppl: 13.213484 +epoch: 1, batch: 5745, sum loss: 4693.896973, avg loss: 2.825946, ppl: 16.876909 +epoch: 1, batch: 5746, sum loss: 4169.601562, avg loss: 2.609263, ppl: 13.589028 +epoch: 1, batch: 5747, sum loss: 5230.177734, avg loss: 2.964953, ppl: 19.393801 +epoch: 1, batch: 5748, sum loss: 3828.386719, avg loss: 2.344389, ppl: 10.426897 +epoch: 1, batch: 5749, sum loss: 6088.029297, avg loss: 3.308712, ppl: 27.349863 +epoch: 1, batch: 5750, sum loss: 3938.288818, avg loss: 2.666411, ppl: 14.388236 +epoch: 1, batch: 5751, sum loss: 4554.626465, avg loss: 2.752040, ppl: 15.674578 +epoch: 1, batch: 5752, sum loss: 5227.691895, avg loss: 3.004421, ppl: 20.174522 +epoch: 1, batch: 5753, sum loss: 4212.876465, avg loss: 2.654617, ppl: 14.219533 +epoch: 1, batch: 5754, sum loss: 4794.798828, avg loss: 3.004260, ppl: 20.171280 +epoch: 1, batch: 5755, sum loss: 3763.007812, avg loss: 2.470787, ppl: 11.831750 +epoch: 1, batch: 5756, sum loss: 5422.146484, avg loss: 3.141452, ppl: 23.137440 +epoch: 1, batch: 5757, sum loss: 4173.044434, avg loss: 2.613052, ppl: 13.640624 +epoch: 1, batch: 5758, sum loss: 4561.378418, avg loss: 2.791541, ppl: 16.306133 +epoch: 1, batch: 5759, sum loss: 4822.209961, avg loss: 2.805242, ppl: 16.531069 +epoch: 1, batch: 5760, sum loss: 4160.967773, avg loss: 2.579645, ppl: 13.192459 +epoch: 1, batch: 5761, sum loss: 5483.426270, avg loss: 2.830886, ppl: 16.960484 +epoch: 1, batch: 5762, sum loss: 3922.629639, avg loss: 2.636176, ppl: 13.959718 +epoch: 1, batch: 5763, sum loss: 4623.175293, avg loss: 2.790088, ppl: 16.282448 +epoch: 1, batch: 5764, sum loss: 4668.907715, avg loss: 2.671000, ppl: 14.454416 +epoch: 1, batch: 5765, sum loss: 5132.919922, avg loss: 2.752236, ppl: 15.677647 +epoch: 1, batch: 5766, sum loss: 4327.164551, avg loss: 2.591116, ppl: 13.344662 +epoch: 1, batch: 5767, sum loss: 4524.012207, avg loss: 2.616549, ppl: 13.688397 +epoch: 1, batch: 5768, sum loss: 4563.500977, avg loss: 2.679683, ppl: 14.580477 +epoch: 1, batch: 5769, sum loss: 4887.090820, avg loss: 2.841332, ppl: 17.138578 +epoch: 1, batch: 5770, sum loss: 4790.956055, avg loss: 2.893089, ppl: 18.048983 +epoch: 1, batch: 5771, sum loss: 4384.837402, avg loss: 2.757759, ppl: 15.764481 +epoch: 1, batch: 5772, sum loss: 4381.564941, avg loss: 2.731649, ppl: 15.358194 +epoch: 1, batch: 5773, sum loss: 4304.041504, avg loss: 2.802110, ppl: 16.479389 +epoch: 1, batch: 5774, sum loss: 4550.241699, avg loss: 2.814003, ppl: 16.676533 +epoch: 1, batch: 5775, sum loss: 5481.670898, avg loss: 3.177780, ppl: 23.993439 +epoch: 1, batch: 5776, sum loss: 4860.874023, avg loss: 2.879665, ppl: 17.808304 +epoch: 1, batch: 5777, sum loss: 4146.732422, avg loss: 2.738925, ppl: 15.470345 +epoch: 1, batch: 5778, sum loss: 5016.258789, avg loss: 2.712958, ppl: 15.073792 +epoch: 1, batch: 5779, sum loss: 4530.829102, avg loss: 2.731060, ppl: 15.349153 +epoch: 1, batch: 5780, sum loss: 4724.229980, avg loss: 3.009064, ppl: 20.268414 +epoch: 1, batch: 5781, sum loss: 4725.650879, avg loss: 2.589398, ppl: 13.321745 +epoch: 1, batch: 5782, sum loss: 3607.793457, avg loss: 2.571485, ppl: 13.085242 +epoch: 1, batch: 5783, sum loss: 5313.555664, avg loss: 2.838438, ppl: 17.089052 +epoch: 1, batch: 5784, sum loss: 4428.186523, avg loss: 2.820501, ppl: 16.785259 +epoch: 1, batch: 5785, sum loss: 4605.641113, avg loss: 2.767813, ppl: 15.923774 +epoch: 1, batch: 5786, sum loss: 5489.374023, avg loss: 2.905968, ppl: 18.282936 +epoch: 1, batch: 5787, sum loss: 4606.400391, avg loss: 2.759976, ppl: 15.799470 +epoch: 1, batch: 5788, sum loss: 4560.318359, avg loss: 2.579366, ppl: 13.188770 +epoch: 1, batch: 5789, sum loss: 3909.647461, avg loss: 2.622165, ppl: 13.765490 +epoch: 1, batch: 5790, sum loss: 5216.573730, avg loss: 2.771825, ppl: 15.987779 +epoch: 1, batch: 5791, sum loss: 3528.006104, avg loss: 2.613338, ppl: 13.644517 +epoch: 1, batch: 5792, sum loss: 6207.753418, avg loss: 2.961715, ppl: 19.331087 +epoch: 1, batch: 5793, sum loss: 4286.449707, avg loss: 2.667361, ppl: 14.401919 +epoch: 1, batch: 5794, sum loss: 4257.018066, avg loss: 2.635925, ppl: 13.956210 +epoch: 1, batch: 5795, sum loss: 4696.794922, avg loss: 2.837943, ppl: 17.080587 +epoch: 1, batch: 5796, sum loss: 4652.675293, avg loss: 2.639067, ppl: 14.000138 +epoch: 1, batch: 5797, sum loss: 5368.688477, avg loss: 2.933710, ppl: 18.797232 +epoch: 1, batch: 5798, sum loss: 4406.854980, avg loss: 2.903067, ppl: 18.229965 +epoch: 1, batch: 5799, sum loss: 5149.234863, avg loss: 2.925702, ppl: 18.647305 +epoch: 1, batch: 5800, sum loss: 4292.404297, avg loss: 2.735758, ppl: 15.421430 +epoch: 1, batch: 5801, sum loss: 4153.323242, avg loss: 2.648803, ppl: 14.137106 +epoch: 1, batch: 5802, sum loss: 4474.775879, avg loss: 2.616828, ppl: 13.692223 +epoch: 1, batch: 5803, sum loss: 4414.679688, avg loss: 2.736937, ppl: 15.439626 +epoch: 1, batch: 5804, sum loss: 4805.978516, avg loss: 2.805592, ppl: 16.536856 +epoch: 1, batch: 5805, sum loss: 3738.317383, avg loss: 2.488893, ppl: 12.047932 +epoch: 1, batch: 5806, sum loss: 5110.562500, avg loss: 2.771455, ppl: 15.981868 +epoch: 1, batch: 5807, sum loss: 5306.043945, avg loss: 2.843539, ppl: 17.176445 +epoch: 1, batch: 5808, sum loss: 4692.211914, avg loss: 2.745589, ppl: 15.573789 +epoch: 1, batch: 5809, sum loss: 5007.560547, avg loss: 2.731893, ppl: 15.361944 +epoch: 1, batch: 5810, sum loss: 4069.143555, avg loss: 2.507174, ppl: 12.270205 +epoch: 1, batch: 5811, sum loss: 3910.650879, avg loss: 2.572797, ppl: 13.102418 +epoch: 1, batch: 5812, sum loss: 4229.494141, avg loss: 2.564884, ppl: 12.999153 +epoch: 1, batch: 5813, sum loss: 4743.617188, avg loss: 3.017568, ppl: 20.441519 +epoch: 1, batch: 5814, sum loss: 4663.519531, avg loss: 2.891209, ppl: 18.015070 +epoch: 1, batch: 5815, sum loss: 4932.851074, avg loss: 3.114174, ppl: 22.514818 +epoch: 1, batch: 5816, sum loss: 5110.361328, avg loss: 2.945453, ppl: 19.019279 +epoch: 1, batch: 5817, sum loss: 4718.132324, avg loss: 2.889242, ppl: 17.979675 +epoch: 1, batch: 5818, sum loss: 4148.060547, avg loss: 2.655609, ppl: 14.233646 +epoch: 1, batch: 5819, sum loss: 5536.548340, avg loss: 2.885122, ppl: 17.905745 +epoch: 1, batch: 5820, sum loss: 4709.047363, avg loss: 2.818101, ppl: 16.745028 +epoch: 1, batch: 5821, sum loss: 5251.247559, avg loss: 3.047735, ppl: 21.067577 +epoch: 1, batch: 5822, sum loss: 4593.322754, avg loss: 2.879826, ppl: 17.811180 +epoch: 1, batch: 5823, sum loss: 3911.031982, avg loss: 2.510290, ppl: 12.308501 +epoch: 1, batch: 5824, sum loss: 3600.096191, avg loss: 2.486254, ppl: 12.016185 +epoch: 1, batch: 5825, sum loss: 5553.176270, avg loss: 2.887767, ppl: 17.953176 +epoch: 1, batch: 5826, sum loss: 3709.619141, avg loss: 2.483011, ppl: 11.977280 +epoch: 1, batch: 5827, sum loss: 4985.538086, avg loss: 2.831084, ppl: 16.963831 +epoch: 1, batch: 5828, sum loss: 4200.809570, avg loss: 2.670572, ppl: 14.448229 +epoch: 1, batch: 5829, sum loss: 4138.330078, avg loss: 2.624179, ppl: 13.793243 +epoch: 1, batch: 5830, sum loss: 4369.193359, avg loss: 2.561074, ppl: 12.949724 +epoch: 1, batch: 5831, sum loss: 5315.599609, avg loss: 2.901528, ppl: 18.201939 +epoch: 1, batch: 5832, sum loss: 4440.132324, avg loss: 2.524237, ppl: 12.481364 +epoch: 1, batch: 5833, sum loss: 5839.844238, avg loss: 2.946440, ppl: 19.038057 +epoch: 1, batch: 5834, sum loss: 4276.234863, avg loss: 2.672647, ppl: 14.478239 +epoch: 1, batch: 5835, sum loss: 4084.859863, avg loss: 2.562647, ppl: 12.970108 +epoch: 1, batch: 5836, sum loss: 4544.838379, avg loss: 2.856592, ppl: 17.402126 +epoch: 1, batch: 5837, sum loss: 5145.802734, avg loss: 2.922091, ppl: 18.580103 +epoch: 1, batch: 5838, sum loss: 5482.291992, avg loss: 2.877844, ppl: 17.775900 +epoch: 1, batch: 5839, sum loss: 4040.348633, avg loss: 2.573471, ppl: 13.111249 +epoch: 1, batch: 5840, sum loss: 4355.358398, avg loss: 2.589393, ppl: 13.321678 +epoch: 1, batch: 5841, sum loss: 4793.157227, avg loss: 2.915546, ppl: 18.458883 +epoch: 1, batch: 5842, sum loss: 6201.162109, avg loss: 3.053256, ppl: 21.184198 +epoch: 1, batch: 5843, sum loss: 3409.701904, avg loss: 2.396136, ppl: 10.980668 +epoch: 1, batch: 5844, sum loss: 5427.644531, avg loss: 3.030510, ppl: 20.707800 +epoch: 1, batch: 5845, sum loss: 4953.795410, avg loss: 2.856860, ppl: 17.406782 +epoch: 1, batch: 5846, sum loss: 5053.966309, avg loss: 2.950360, ppl: 19.112829 +epoch: 1, batch: 5847, sum loss: 4424.050293, avg loss: 2.602383, ppl: 13.495855 +epoch: 1, batch: 5848, sum loss: 5007.331055, avg loss: 2.719898, ppl: 15.178766 +epoch: 1, batch: 5849, sum loss: 4640.096680, avg loss: 2.993611, ppl: 19.957617 +epoch: 1, batch: 5850, sum loss: 4458.464844, avg loss: 2.668142, ppl: 14.413162 +epoch: 1, batch: 5851, sum loss: 5154.138672, avg loss: 2.987907, ppl: 19.844099 +epoch: 1, batch: 5852, sum loss: 5747.759766, avg loss: 3.243657, ppl: 25.627266 +epoch: 1, batch: 5853, sum loss: 5756.978516, avg loss: 3.036381, ppl: 20.829729 +epoch: 1, batch: 5854, sum loss: 4891.634766, avg loss: 2.848943, ppl: 17.269514 +epoch: 1, batch: 5855, sum loss: 3589.405029, avg loss: 2.621917, ppl: 13.762087 +epoch: 1, batch: 5856, sum loss: 4375.270020, avg loss: 2.705795, ppl: 14.966208 +epoch: 1, batch: 5857, sum loss: 3897.317383, avg loss: 2.574186, ppl: 13.120630 +epoch: 1, batch: 5858, sum loss: 5958.642578, avg loss: 2.980812, ppl: 19.703806 +epoch: 1, batch: 5859, sum loss: 4633.309570, avg loss: 2.767807, ppl: 15.923683 +epoch: 1, batch: 5860, sum loss: 4794.225586, avg loss: 2.734869, ppl: 15.407728 +epoch: 1, batch: 5861, sum loss: 4919.755371, avg loss: 2.612722, ppl: 13.636117 +epoch: 1, batch: 5862, sum loss: 4706.818359, avg loss: 2.739708, ppl: 15.482463 +epoch: 1, batch: 5863, sum loss: 4049.436035, avg loss: 2.424812, ppl: 11.300106 +epoch: 1, batch: 5864, sum loss: 5930.659668, avg loss: 3.055466, ppl: 21.231075 +epoch: 1, batch: 5865, sum loss: 4274.163574, avg loss: 2.470615, ppl: 11.829716 +epoch: 1, batch: 5866, sum loss: 4018.239746, avg loss: 2.694997, ppl: 14.805469 +epoch: 1, batch: 5867, sum loss: 5632.193848, avg loss: 2.948793, ppl: 19.082901 +epoch: 1, batch: 5868, sum loss: 3496.867676, avg loss: 2.504920, ppl: 12.242574 +epoch: 1, batch: 5869, sum loss: 4413.062988, avg loss: 2.909072, ppl: 18.339779 +epoch: 1, batch: 5870, sum loss: 5221.967773, avg loss: 2.885065, ppl: 17.904737 +epoch: 1, batch: 5871, sum loss: 4611.982910, avg loss: 2.831174, ppl: 16.965368 +epoch: 1, batch: 5872, sum loss: 5854.962402, avg loss: 2.905689, ppl: 18.277828 +epoch: 1, batch: 5873, sum loss: 4346.169922, avg loss: 2.822188, ppl: 16.813601 +epoch: 1, batch: 5874, sum loss: 5469.892090, avg loss: 3.054099, ppl: 21.202080 +epoch: 1, batch: 5875, sum loss: 5925.839355, avg loss: 2.878018, ppl: 17.779003 +epoch: 1, batch: 5876, sum loss: 4974.541016, avg loss: 2.751405, ppl: 15.664633 +epoch: 1, batch: 5877, sum loss: 4343.584961, avg loss: 2.932873, ppl: 18.781513 +epoch: 1, batch: 5878, sum loss: 4489.981934, avg loss: 2.562775, ppl: 12.971766 +epoch: 1, batch: 5879, sum loss: 5559.900879, avg loss: 2.894274, ppl: 18.070387 +epoch: 1, batch: 5880, sum loss: 5595.286621, avg loss: 2.988935, ppl: 19.864521 +epoch: 1, batch: 5881, sum loss: 4880.011719, avg loss: 2.946867, ppl: 19.046188 +epoch: 1, batch: 5882, sum loss: 4033.402832, avg loss: 2.690729, ppl: 14.742421 +epoch: 1, batch: 5883, sum loss: 5442.689453, avg loss: 3.000380, ppl: 20.093176 +epoch: 1, batch: 5884, sum loss: 4187.484863, avg loss: 2.729781, ppl: 15.329535 +epoch: 1, batch: 5885, sum loss: 4384.735352, avg loss: 2.665493, ppl: 14.375028 +epoch: 1, batch: 5886, sum loss: 3608.041504, avg loss: 2.528410, ppl: 12.533567 +epoch: 1, batch: 5887, sum loss: 3603.270752, avg loss: 2.312754, ppl: 10.102209 +epoch: 1, batch: 5888, sum loss: 6198.496094, avg loss: 3.038478, ppl: 20.873457 +epoch: 1, batch: 5889, sum loss: 4476.347168, avg loss: 2.696595, ppl: 14.829148 +epoch: 1, batch: 5890, sum loss: 4968.623535, avg loss: 3.086102, ppl: 21.891567 +epoch: 1, batch: 5891, sum loss: 3836.020020, avg loss: 2.688171, ppl: 14.704759 +epoch: 1, batch: 5892, sum loss: 3525.108398, avg loss: 2.404576, ppl: 11.073734 +epoch: 1, batch: 5893, sum loss: 5280.110352, avg loss: 2.931766, ppl: 18.760729 +epoch: 1, batch: 5894, sum loss: 4206.082031, avg loss: 2.665451, ppl: 14.374435 +epoch: 1, batch: 5895, sum loss: 4865.534668, avg loss: 3.137031, ppl: 23.035364 +epoch: 1, batch: 5896, sum loss: 5537.962891, avg loss: 3.118222, ppl: 22.606161 +epoch: 1, batch: 5897, sum loss: 4127.682617, avg loss: 2.620751, ppl: 13.746038 +epoch: 1, batch: 5898, sum loss: 5155.701660, avg loss: 2.891588, ppl: 18.021910 +epoch: 1, batch: 5899, sum loss: 5315.196289, avg loss: 2.731344, ppl: 15.353516 +epoch: 1, batch: 5900, sum loss: 4643.940918, avg loss: 2.823064, ppl: 16.828339 +epoch: 1, batch: 5901, sum loss: 5203.455566, avg loss: 2.730040, ppl: 15.333494 +epoch: 1, batch: 5902, sum loss: 5464.994629, avg loss: 2.955649, ppl: 19.214182 +epoch: 1, batch: 5903, sum loss: 5753.819336, avg loss: 2.884120, ppl: 17.887819 +epoch: 1, batch: 5904, sum loss: 4322.133789, avg loss: 2.686224, ppl: 14.676147 +epoch: 1, batch: 5905, sum loss: 4544.950684, avg loss: 2.847713, ppl: 17.248299 +epoch: 1, batch: 5906, sum loss: 4747.495117, avg loss: 2.741048, ppl: 15.503222 +epoch: 1, batch: 5907, sum loss: 4873.255859, avg loss: 2.493990, ppl: 12.109493 +epoch: 1, batch: 5908, sum loss: 5201.961914, avg loss: 2.974249, ppl: 19.574919 +epoch: 1, batch: 5909, sum loss: 4835.950684, avg loss: 2.946953, ppl: 19.047832 +epoch: 1, batch: 5910, sum loss: 4676.338867, avg loss: 2.657011, ppl: 14.253615 +epoch: 1, batch: 5911, sum loss: 4075.109863, avg loss: 2.589015, ppl: 13.316648 +epoch: 1, batch: 5912, sum loss: 4661.662598, avg loss: 2.828679, ppl: 16.923088 +epoch: 1, batch: 5913, sum loss: 4245.636230, avg loss: 2.609488, ppl: 13.592084 +epoch: 1, batch: 5914, sum loss: 5536.698242, avg loss: 2.765584, ppl: 15.888309 +epoch: 1, batch: 5915, sum loss: 4072.944336, avg loss: 2.458023, ppl: 11.681695 +epoch: 1, batch: 5916, sum loss: 4029.105957, avg loss: 2.700473, ppl: 14.886776 +epoch: 1, batch: 5917, sum loss: 5602.800781, avg loss: 2.687195, ppl: 14.690406 +epoch: 1, batch: 5918, sum loss: 4759.011719, avg loss: 2.681133, ppl: 14.601631 +epoch: 1, batch: 5919, sum loss: 4357.820312, avg loss: 2.655588, ppl: 14.233355 +epoch: 1, batch: 5920, sum loss: 3628.997070, avg loss: 2.272384, ppl: 9.702503 +epoch: 1, batch: 5921, sum loss: 4805.273438, avg loss: 2.875687, ppl: 17.737612 +epoch: 1, batch: 5922, sum loss: 4355.834473, avg loss: 2.503353, ppl: 12.223412 +epoch: 1, batch: 5923, sum loss: 4434.976562, avg loss: 2.771860, ppl: 15.988351 +epoch: 1, batch: 5924, sum loss: 4562.201172, avg loss: 2.691564, ppl: 14.754736 +epoch: 1, batch: 5925, sum loss: 4645.160156, avg loss: 2.941837, ppl: 18.950619 +epoch: 1, batch: 5926, sum loss: 5567.067383, avg loss: 2.815917, ppl: 16.708487 +epoch: 1, batch: 5927, sum loss: 4706.699219, avg loss: 2.691080, ppl: 14.747596 +epoch: 1, batch: 5928, sum loss: 4110.241211, avg loss: 2.653481, ppl: 14.203388 +epoch: 1, batch: 5929, sum loss: 3693.739746, avg loss: 2.554454, ppl: 12.864269 +epoch: 1, batch: 5930, sum loss: 4836.406250, avg loss: 2.821707, ppl: 16.805517 +epoch: 1, batch: 5931, sum loss: 5234.333984, avg loss: 2.879172, ppl: 17.799522 +epoch: 1, batch: 5932, sum loss: 5118.562500, avg loss: 3.009149, ppl: 20.270140 +epoch: 1, batch: 5933, sum loss: 4570.275391, avg loss: 2.567570, ppl: 13.034119 +epoch: 1, batch: 5934, sum loss: 4380.322266, avg loss: 2.908581, ppl: 18.330761 +epoch: 1, batch: 5935, sum loss: 4488.888672, avg loss: 2.870133, ppl: 17.639372 +epoch: 1, batch: 5936, sum loss: 4736.727051, avg loss: 2.784672, ppl: 16.194506 +epoch: 1, batch: 5937, sum loss: 3958.234375, avg loss: 2.503627, ppl: 12.226761 +epoch: 1, batch: 5938, sum loss: 4486.947754, avg loss: 2.778296, ppl: 16.091574 +epoch: 1, batch: 5939, sum loss: 4219.195312, avg loss: 2.622247, ppl: 13.766623 +epoch: 1, batch: 5940, sum loss: 4181.939453, avg loss: 2.802909, ppl: 16.492548 +epoch: 1, batch: 5941, sum loss: 4989.104980, avg loss: 2.890559, ppl: 18.003370 +epoch: 1, batch: 5942, sum loss: 4466.267090, avg loss: 2.817834, ppl: 16.740553 +epoch: 1, batch: 5943, sum loss: 5205.060547, avg loss: 3.054613, ppl: 21.212976 +epoch: 1, batch: 5944, sum loss: 4558.405762, avg loss: 2.627323, ppl: 13.836681 +epoch: 1, batch: 5945, sum loss: 4831.067383, avg loss: 2.851870, ppl: 17.320133 +epoch: 1, batch: 5946, sum loss: 4360.541992, avg loss: 2.798808, ppl: 16.425051 +epoch: 1, batch: 5947, sum loss: 4607.594727, avg loss: 2.894218, ppl: 18.069361 +epoch: 1, batch: 5948, sum loss: 3728.714111, avg loss: 2.751818, ppl: 15.671103 +epoch: 1, batch: 5949, sum loss: 3965.860840, avg loss: 2.376190, ppl: 10.763814 +epoch: 1, batch: 5950, sum loss: 4821.374023, avg loss: 2.831106, ppl: 16.964216 +epoch: 1, batch: 5951, sum loss: 5223.185547, avg loss: 3.020929, ppl: 20.510328 +epoch: 1, batch: 5952, sum loss: 3705.650635, avg loss: 2.593178, ppl: 13.372195 +epoch: 1, batch: 5953, sum loss: 4978.490723, avg loss: 2.916515, ppl: 18.476782 +epoch: 1, batch: 5954, sum loss: 4668.542969, avg loss: 2.640579, ppl: 14.021317 +epoch: 1, batch: 5955, sum loss: 5282.652344, avg loss: 2.888274, ppl: 17.962275 +epoch: 1, batch: 5956, sum loss: 4653.318359, avg loss: 2.611290, ppl: 13.616605 +epoch: 1, batch: 5957, sum loss: 4522.489258, avg loss: 2.737584, ppl: 15.449619 +epoch: 1, batch: 5958, sum loss: 3313.299561, avg loss: 2.525381, ppl: 12.495650 +epoch: 1, batch: 5959, sum loss: 5458.104004, avg loss: 3.017194, ppl: 20.433880 +epoch: 1, batch: 5960, sum loss: 3727.525146, avg loss: 2.417331, ppl: 11.215889 +epoch: 1, batch: 5961, sum loss: 4493.527832, avg loss: 2.685910, ppl: 14.671550 +epoch: 1, batch: 5962, sum loss: 3879.945801, avg loss: 2.479198, ppl: 11.931697 +epoch: 1, batch: 5963, sum loss: 4237.487305, avg loss: 2.702479, ppl: 14.916666 +epoch: 1, batch: 5964, sum loss: 4121.951172, avg loss: 2.632153, ppl: 13.903666 +epoch: 1, batch: 5965, sum loss: 4616.557617, avg loss: 2.620067, ppl: 13.736642 +epoch: 1, batch: 5966, sum loss: 5541.812500, avg loss: 3.136283, ppl: 23.018158 +epoch: 1, batch: 5967, sum loss: 4341.186035, avg loss: 2.829978, ppl: 16.945084 +epoch: 1, batch: 5968, sum loss: 4859.598145, avg loss: 2.792872, ppl: 16.327852 +epoch: 1, batch: 5969, sum loss: 4917.200195, avg loss: 2.994641, ppl: 19.978184 +epoch: 1, batch: 5970, sum loss: 4706.695801, avg loss: 2.930695, ppl: 18.740643 +epoch: 1, batch: 5971, sum loss: 4528.930664, avg loss: 2.753149, ppl: 15.691973 +epoch: 1, batch: 5972, sum loss: 4833.207031, avg loss: 2.721400, ppl: 15.201597 +epoch: 1, batch: 5973, sum loss: 5092.617188, avg loss: 2.651024, ppl: 14.168541 +epoch: 1, batch: 5974, sum loss: 4072.887939, avg loss: 2.731649, ppl: 15.358187 +epoch: 1, batch: 5975, sum loss: 4568.510254, avg loss: 2.607597, ppl: 13.566417 +epoch: 1, batch: 5976, sum loss: 5608.178711, avg loss: 2.919406, ppl: 18.530275 +epoch: 1, batch: 5977, sum loss: 3619.510742, avg loss: 2.550748, ppl: 12.816683 +epoch: 1, batch: 5978, sum loss: 4325.262695, avg loss: 2.828818, ppl: 16.925444 +epoch: 1, batch: 5979, sum loss: 7028.058594, avg loss: 3.374008, ppl: 29.195307 +epoch: 1, batch: 5980, sum loss: 4067.779297, avg loss: 2.624374, ppl: 13.795934 +epoch: 1, batch: 5981, sum loss: 5236.854004, avg loss: 2.833796, ppl: 17.009901 +epoch: 1, batch: 5982, sum loss: 4657.912598, avg loss: 2.911195, ppl: 18.378754 +epoch: 1, batch: 5983, sum loss: 4042.352295, avg loss: 2.483017, ppl: 11.977351 +epoch: 1, batch: 5984, sum loss: 5526.901367, avg loss: 2.815538, ppl: 16.702162 +epoch: 1, batch: 5985, sum loss: 5067.656250, avg loss: 2.970490, ppl: 19.501472 +epoch: 1, batch: 5986, sum loss: 4797.800293, avg loss: 2.752611, ppl: 15.683523 +epoch: 1, batch: 5987, sum loss: 5164.853027, avg loss: 2.994118, ppl: 19.967741 +epoch: 1, batch: 5988, sum loss: 4536.891602, avg loss: 2.684551, ppl: 14.651625 +epoch: 1, batch: 5989, sum loss: 4683.798340, avg loss: 2.799640, ppl: 16.438734 +epoch: 1, batch: 5990, sum loss: 4484.408691, avg loss: 2.636337, ppl: 13.961961 +epoch: 1, batch: 5991, sum loss: 4869.246094, avg loss: 3.026256, ppl: 20.619888 +epoch: 1, batch: 5992, sum loss: 3741.321289, avg loss: 2.215110, ppl: 9.162419 +epoch: 1, batch: 5993, sum loss: 4582.854492, avg loss: 2.791020, ppl: 16.297634 +epoch: 1, batch: 5994, sum loss: 5034.139648, avg loss: 2.969994, ppl: 19.491798 +epoch: 1, batch: 5995, sum loss: 4537.582520, avg loss: 2.850240, ppl: 17.291935 +epoch: 1, batch: 5996, sum loss: 4828.472168, avg loss: 2.689957, ppl: 14.731037 +epoch: 1, batch: 5997, sum loss: 4361.312500, avg loss: 2.497888, ppl: 12.156793 +epoch: 1, batch: 5998, sum loss: 4766.454102, avg loss: 2.820387, ppl: 16.783346 +epoch: 1, batch: 5999, sum loss: 4318.119141, avg loss: 2.863474, ppl: 17.522297 +epoch: 1, batch: 6000, sum loss: 4904.671875, avg loss: 2.940451, ppl: 18.924377 +epoch: 1, batch: 6001, sum loss: 4805.609863, avg loss: 2.793959, ppl: 16.345606 +epoch: 1, batch: 6002, sum loss: 5519.113770, avg loss: 3.071293, ppl: 21.569782 +epoch: 1, batch: 6003, sum loss: 4848.409180, avg loss: 2.742313, ppl: 15.522846 +epoch: 1, batch: 6004, sum loss: 4824.712402, avg loss: 2.733548, ppl: 15.387387 +epoch: 1, batch: 6005, sum loss: 4403.643066, avg loss: 2.842894, ppl: 17.165371 +epoch: 1, batch: 6006, sum loss: 4496.660645, avg loss: 2.681372, ppl: 14.605117 +epoch: 1, batch: 6007, sum loss: 4533.612305, avg loss: 3.103089, ppl: 22.266619 +epoch: 1, batch: 6008, sum loss: 4619.960449, avg loss: 2.860657, ppl: 17.472998 +epoch: 1, batch: 6009, sum loss: 5082.210938, avg loss: 2.821883, ppl: 16.808466 +epoch: 1, batch: 6010, sum loss: 4785.528320, avg loss: 2.836709, ppl: 17.059534 +epoch: 1, batch: 6011, sum loss: 4271.174805, avg loss: 2.730930, ppl: 15.347155 +epoch: 1, batch: 6012, sum loss: 4766.741211, avg loss: 2.691553, ppl: 14.754578 +epoch: 1, batch: 6013, sum loss: 4281.338867, avg loss: 2.861858, ppl: 17.493994 +epoch: 1, batch: 6014, sum loss: 4563.511230, avg loss: 2.651663, ppl: 14.177590 +epoch: 1, batch: 6015, sum loss: 3598.526367, avg loss: 2.555772, ppl: 12.881235 +epoch: 1, batch: 6016, sum loss: 4543.485352, avg loss: 3.006939, ppl: 20.225403 +epoch: 1, batch: 6017, sum loss: 4341.490234, avg loss: 2.609069, ppl: 13.586391 +epoch: 1, batch: 6018, sum loss: 5558.606934, avg loss: 2.886089, ppl: 17.923073 +epoch: 1, batch: 6019, sum loss: 4414.722656, avg loss: 2.538656, ppl: 12.662641 +epoch: 1, batch: 6020, sum loss: 4484.064941, avg loss: 2.669086, ppl: 14.426784 +epoch: 1, batch: 6021, sum loss: 5526.504883, avg loss: 2.899530, ppl: 18.165609 +epoch: 1, batch: 6022, sum loss: 4109.599609, avg loss: 2.602660, ppl: 13.499596 +epoch: 1, batch: 6023, sum loss: 5083.022461, avg loss: 2.839677, ppl: 17.110243 +epoch: 1, batch: 6024, sum loss: 4130.232910, avg loss: 2.790698, ppl: 16.292389 +epoch: 1, batch: 6025, sum loss: 4568.438477, avg loss: 2.738872, ppl: 15.469526 +epoch: 1, batch: 6026, sum loss: 5288.205078, avg loss: 3.242308, ppl: 25.592731 +epoch: 1, batch: 6027, sum loss: 4406.447266, avg loss: 2.781848, ppl: 16.148836 +epoch: 1, batch: 6028, sum loss: 4229.997559, avg loss: 2.838925, ppl: 17.097370 +epoch: 1, batch: 6029, sum loss: 5471.515625, avg loss: 2.884299, ppl: 17.891027 +epoch: 1, batch: 6030, sum loss: 3945.861816, avg loss: 2.522929, ppl: 12.465058 +epoch: 1, batch: 6031, sum loss: 5464.366699, avg loss: 2.835686, ppl: 17.042088 +epoch: 1, batch: 6032, sum loss: 5029.988281, avg loss: 2.845016, ppl: 17.201834 +epoch: 1, batch: 6033, sum loss: 5372.526367, avg loss: 2.885353, ppl: 17.909882 +epoch: 1, batch: 6034, sum loss: 4475.535156, avg loss: 2.747413, ppl: 15.602212 +epoch: 1, batch: 6035, sum loss: 4784.447266, avg loss: 2.922692, ppl: 18.591274 +epoch: 1, batch: 6036, sum loss: 4346.075195, avg loss: 2.711213, ppl: 15.047523 +epoch: 1, batch: 6037, sum loss: 5248.319824, avg loss: 2.803590, ppl: 16.503786 +epoch: 1, batch: 6038, sum loss: 4473.604004, avg loss: 2.776911, ppl: 16.069311 +epoch: 1, batch: 6039, sum loss: 4961.820312, avg loss: 2.687877, ppl: 14.700429 +epoch: 1, batch: 6040, sum loss: 4664.382324, avg loss: 2.578432, ppl: 13.176456 +epoch: 1, batch: 6041, sum loss: 4238.500000, avg loss: 2.539545, ppl: 12.673901 +epoch: 1, batch: 6042, sum loss: 3863.699707, avg loss: 2.677547, ppl: 14.549353 +epoch: 1, batch: 6043, sum loss: 4343.370605, avg loss: 2.807609, ppl: 16.570250 +epoch: 1, batch: 6044, sum loss: 5357.446777, avg loss: 2.897483, ppl: 18.128466 +epoch: 1, batch: 6045, sum loss: 5272.702148, avg loss: 2.950589, ppl: 19.117208 +epoch: 1, batch: 6046, sum loss: 4103.947754, avg loss: 2.746953, ppl: 15.595041 +epoch: 1, batch: 6047, sum loss: 5527.579590, avg loss: 2.898573, ppl: 18.148237 +epoch: 1, batch: 6048, sum loss: 4153.503418, avg loss: 2.747026, ppl: 15.596183 +epoch: 1, batch: 6049, sum loss: 4160.479980, avg loss: 2.656756, ppl: 14.249986 +epoch: 1, batch: 6050, sum loss: 5260.624512, avg loss: 2.965403, ppl: 19.402514 +epoch: 1, batch: 6051, sum loss: 5127.113770, avg loss: 2.717071, ppl: 15.135928 +epoch: 1, batch: 6052, sum loss: 5231.293945, avg loss: 2.912747, ppl: 18.407301 +epoch: 1, batch: 6053, sum loss: 3675.035645, avg loss: 2.769432, ppl: 15.949566 +epoch: 1, batch: 6054, sum loss: 5391.209961, avg loss: 3.073666, ppl: 21.621017 +epoch: 1, batch: 6055, sum loss: 4965.763184, avg loss: 2.720966, ppl: 15.194995 +epoch: 1, batch: 6056, sum loss: 5360.535645, avg loss: 2.979731, ppl: 19.682514 +epoch: 1, batch: 6057, sum loss: 4818.508301, avg loss: 2.906217, ppl: 18.287493 +epoch: 1, batch: 6058, sum loss: 4518.998535, avg loss: 2.648885, ppl: 14.138269 +epoch: 1, batch: 6059, sum loss: 4256.028809, avg loss: 2.735237, ppl: 15.413398 +epoch: 1, batch: 6060, sum loss: 3707.671875, avg loss: 2.376713, ppl: 10.769444 +epoch: 1, batch: 6061, sum loss: 4538.047852, avg loss: 2.706051, ppl: 14.970043 +epoch: 1, batch: 6062, sum loss: 4421.267578, avg loss: 2.623898, ppl: 13.789364 +epoch: 1, batch: 6063, sum loss: 5961.402832, avg loss: 2.982193, ppl: 19.731030 +epoch: 1, batch: 6064, sum loss: 4053.539795, avg loss: 2.620259, ppl: 13.739276 +epoch: 1, batch: 6065, sum loss: 5341.207031, avg loss: 2.925086, ppl: 18.635830 +epoch: 1, batch: 6066, sum loss: 5016.884277, avg loss: 2.782520, ppl: 16.159697 +epoch: 1, batch: 6067, sum loss: 4139.619141, avg loss: 2.772685, ppl: 16.001545 +epoch: 1, batch: 6068, sum loss: 3611.591553, avg loss: 2.561413, ppl: 12.954103 +epoch: 1, batch: 6069, sum loss: 4813.234863, avg loss: 2.569800, ppl: 13.063207 +epoch: 1, batch: 6070, sum loss: 4410.133301, avg loss: 2.810792, ppl: 16.623085 +epoch: 1, batch: 6071, sum loss: 4077.734619, avg loss: 2.665186, ppl: 14.370625 +epoch: 1, batch: 6072, sum loss: 4358.587402, avg loss: 2.630409, ppl: 13.879442 +epoch: 1, batch: 6073, sum loss: 5278.021484, avg loss: 3.000581, ppl: 20.097206 +epoch: 1, batch: 6074, sum loss: 4211.253906, avg loss: 2.632034, ppl: 13.902012 +epoch: 1, batch: 6075, sum loss: 3924.461426, avg loss: 2.783306, ppl: 16.172401 +epoch: 1, batch: 6076, sum loss: 5212.617188, avg loss: 2.892684, ppl: 18.041677 +epoch: 1, batch: 6077, sum loss: 4556.253906, avg loss: 2.664476, ppl: 14.360421 +epoch: 1, batch: 6078, sum loss: 4064.152344, avg loss: 2.543274, ppl: 12.721257 +epoch: 1, batch: 6079, sum loss: 3679.988037, avg loss: 2.430640, ppl: 11.366148 +epoch: 1, batch: 6080, sum loss: 4801.967773, avg loss: 2.761339, ppl: 15.821009 +epoch: 1, batch: 6081, sum loss: 5111.581543, avg loss: 2.833471, ppl: 17.004377 +epoch: 1, batch: 6082, sum loss: 4334.456543, avg loss: 2.636531, ppl: 13.964671 +epoch: 1, batch: 6083, sum loss: 3509.015381, avg loss: 2.369355, ppl: 10.690499 +epoch: 1, batch: 6084, sum loss: 4552.168457, avg loss: 2.780799, ppl: 16.131912 +epoch: 1, batch: 6085, sum loss: 5150.360840, avg loss: 2.685277, ppl: 14.662258 +epoch: 1, batch: 6086, sum loss: 4873.652344, avg loss: 2.883818, ppl: 17.882416 +epoch: 1, batch: 6087, sum loss: 5561.642090, avg loss: 2.930265, ppl: 18.732588 +epoch: 1, batch: 6088, sum loss: 5573.352051, avg loss: 3.067338, ppl: 21.484625 +epoch: 1, batch: 6089, sum loss: 4003.062500, avg loss: 2.572662, ppl: 13.100657 +epoch: 1, batch: 6090, sum loss: 4994.000000, avg loss: 2.910256, ppl: 18.361506 +epoch: 1, batch: 6091, sum loss: 4666.468750, avg loss: 2.691158, ppl: 14.748750 +epoch: 1, batch: 6092, sum loss: 4890.301270, avg loss: 3.033686, ppl: 20.773657 +epoch: 1, batch: 6093, sum loss: 5455.516113, avg loss: 2.964954, ppl: 19.393818 +epoch: 1, batch: 6094, sum loss: 4763.831543, avg loss: 2.828879, ppl: 16.926470 +epoch: 1, batch: 6095, sum loss: 3709.207031, avg loss: 2.530155, ppl: 12.555451 +epoch: 1, batch: 6096, sum loss: 5237.119629, avg loss: 2.826292, ppl: 16.882748 +epoch: 1, batch: 6097, sum loss: 3837.016113, avg loss: 2.696427, ppl: 14.826667 +epoch: 1, batch: 6098, sum loss: 3839.005371, avg loss: 2.609793, ppl: 13.596235 +epoch: 1, batch: 6099, sum loss: 4429.804199, avg loss: 2.867187, ppl: 17.587479 +epoch: 1, batch: 6100, sum loss: 4678.495117, avg loss: 2.814979, ppl: 16.692827 +epoch: 1, batch: 6101, sum loss: 4842.934082, avg loss: 2.809127, ppl: 16.595417 +epoch: 1, batch: 6102, sum loss: 6202.337402, avg loss: 3.374503, ppl: 29.209774 +epoch: 1, batch: 6103, sum loss: 4233.988770, avg loss: 2.905963, ppl: 18.282848 +epoch: 1, batch: 6104, sum loss: 4609.989746, avg loss: 2.747312, ppl: 15.600642 +epoch: 1, batch: 6105, sum loss: 4157.928711, avg loss: 2.708748, ppl: 15.010476 +epoch: 1, batch: 6106, sum loss: 4965.546875, avg loss: 2.726824, ppl: 15.284271 +epoch: 1, batch: 6107, sum loss: 3895.289062, avg loss: 2.616044, ppl: 13.681486 +epoch: 1, batch: 6108, sum loss: 4589.550293, avg loss: 2.594432, ppl: 13.388982 +epoch: 1, batch: 6109, sum loss: 4414.428711, avg loss: 2.866512, ppl: 17.575609 +epoch: 1, batch: 6110, sum loss: 4338.720215, avg loss: 2.615262, ppl: 13.670801 +epoch: 1, batch: 6111, sum loss: 4751.806641, avg loss: 2.608017, ppl: 13.572110 +epoch: 1, batch: 6112, sum loss: 4676.957031, avg loss: 2.903139, ppl: 18.231285 +epoch: 1, batch: 6113, sum loss: 4653.916992, avg loss: 3.035823, ppl: 20.818111 +epoch: 1, batch: 6114, sum loss: 4355.538086, avg loss: 2.678683, ppl: 14.565891 +epoch: 1, batch: 6115, sum loss: 5633.717285, avg loss: 2.929650, ppl: 18.721079 +epoch: 1, batch: 6116, sum loss: 4986.036133, avg loss: 2.815379, ppl: 16.699503 +epoch: 1, batch: 6117, sum loss: 4468.280273, avg loss: 2.599349, ppl: 13.454973 +epoch: 1, batch: 6118, sum loss: 3509.908691, avg loss: 2.454482, ppl: 11.640398 +epoch: 1, batch: 6119, sum loss: 4522.823242, avg loss: 2.726235, ppl: 15.275262 +epoch: 1, batch: 6120, sum loss: 4552.670898, avg loss: 2.653072, ppl: 14.197581 +epoch: 1, batch: 6121, sum loss: 4182.251953, avg loss: 2.604142, ppl: 13.519620 +epoch: 1, batch: 6122, sum loss: 6035.264160, avg loss: 3.038904, ppl: 20.882353 +epoch: 1, batch: 6123, sum loss: 4850.883789, avg loss: 2.897780, ppl: 18.133846 +epoch: 1, batch: 6124, sum loss: 5708.107910, avg loss: 2.787162, ppl: 16.234880 +epoch: 1, batch: 6125, sum loss: 4404.026367, avg loss: 2.599779, ppl: 13.460768 +epoch: 1, batch: 6126, sum loss: 4194.481934, avg loss: 2.785181, ppl: 16.202744 +epoch: 1, batch: 6127, sum loss: 4448.302734, avg loss: 2.577232, ppl: 13.160661 +epoch: 1, batch: 6128, sum loss: 5297.388672, avg loss: 2.905863, ppl: 18.281019 +epoch: 1, batch: 6129, sum loss: 4841.319336, avg loss: 2.692614, ppl: 14.770229 +epoch: 1, batch: 6130, sum loss: 4255.185547, avg loss: 2.597793, ppl: 13.434057 +epoch: 1, batch: 6131, sum loss: 4421.588867, avg loss: 2.736132, ppl: 15.427192 +epoch: 1, batch: 6132, sum loss: 4764.363770, avg loss: 2.622104, ppl: 13.764660 +epoch: 1, batch: 6133, sum loss: 5304.191895, avg loss: 3.003506, ppl: 20.156084 +epoch: 1, batch: 6134, sum loss: 4693.528320, avg loss: 2.938966, ppl: 18.896294 +epoch: 1, batch: 6135, sum loss: 4915.526855, avg loss: 2.922430, ppl: 18.586393 +epoch: 1, batch: 6136, sum loss: 5483.409668, avg loss: 2.790539, ppl: 16.289803 +epoch: 1, batch: 6137, sum loss: 4197.726562, avg loss: 2.605665, ppl: 13.540230 +epoch: 1, batch: 6138, sum loss: 5044.315430, avg loss: 2.872617, ppl: 17.683235 +epoch: 1, batch: 6139, sum loss: 4921.093750, avg loss: 2.872792, ppl: 17.686338 +epoch: 1, batch: 6140, sum loss: 4280.785156, avg loss: 2.627861, ppl: 13.844123 +epoch: 1, batch: 6141, sum loss: 5130.207031, avg loss: 2.853285, ppl: 17.344671 +epoch: 1, batch: 6142, sum loss: 4681.236816, avg loss: 2.798109, ppl: 16.413584 +epoch: 1, batch: 6143, sum loss: 4836.279297, avg loss: 2.895976, ppl: 18.101152 +epoch: 1, batch: 6144, sum loss: 4647.268066, avg loss: 2.919138, ppl: 18.525316 +epoch: 1, batch: 6145, sum loss: 5513.263672, avg loss: 2.956173, ppl: 19.224268 +epoch: 1, batch: 6146, sum loss: 4143.981445, avg loss: 2.687407, ppl: 14.693526 +epoch: 1, batch: 6147, sum loss: 4523.092285, avg loss: 2.912487, ppl: 18.402510 +epoch: 1, batch: 6148, sum loss: 4550.921875, avg loss: 2.633635, ppl: 13.924296 +epoch: 1, batch: 6149, sum loss: 5429.420410, avg loss: 2.860601, ppl: 17.472023 +epoch: 1, batch: 6150, sum loss: 3989.041016, avg loss: 2.597032, ppl: 13.423838 +epoch: 1, batch: 6151, sum loss: 5431.193359, avg loss: 2.924714, ppl: 18.628891 +epoch: 1, batch: 6152, sum loss: 5059.462891, avg loss: 2.909410, ppl: 18.345963 +epoch: 1, batch: 6153, sum loss: 4978.560059, avg loss: 2.876118, ppl: 17.745247 +epoch: 1, batch: 6154, sum loss: 3249.457764, avg loss: 2.471071, ppl: 11.835110 +epoch: 1, batch: 6155, sum loss: 5112.404297, avg loss: 2.827657, ppl: 16.905807 +epoch: 1, batch: 6156, sum loss: 4590.877930, avg loss: 2.740823, ppl: 15.499733 +epoch: 1, batch: 6157, sum loss: 4377.669434, avg loss: 2.727520, ppl: 15.294905 +epoch: 1, batch: 6158, sum loss: 4788.348633, avg loss: 2.862133, ppl: 17.498817 +epoch: 1, batch: 6159, sum loss: 4391.335938, avg loss: 2.648574, ppl: 14.133871 +epoch: 1, batch: 6160, sum loss: 5315.833984, avg loss: 3.067417, ppl: 21.486336 +epoch: 1, batch: 6161, sum loss: 3824.103027, avg loss: 2.582109, ppl: 13.224997 +epoch: 1, batch: 6162, sum loss: 4365.400391, avg loss: 2.598453, ppl: 13.442924 +epoch: 1, batch: 6163, sum loss: 4124.963379, avg loss: 2.768432, ppl: 15.933629 +epoch: 1, batch: 6164, sum loss: 5574.753906, avg loss: 2.924845, ppl: 18.631329 +epoch: 1, batch: 6165, sum loss: 4331.810547, avg loss: 2.660817, ppl: 14.307976 +epoch: 1, batch: 6166, sum loss: 5114.316895, avg loss: 2.915802, ppl: 18.463614 +epoch: 1, batch: 6167, sum loss: 4665.055664, avg loss: 2.653615, ppl: 14.205301 +epoch: 1, batch: 6168, sum loss: 5740.450684, avg loss: 3.000758, ppl: 20.100771 +epoch: 1, batch: 6169, sum loss: 3913.441895, avg loss: 2.499005, ppl: 12.170380 +epoch: 1, batch: 6170, sum loss: 3882.607178, avg loss: 2.463583, ppl: 11.746828 +epoch: 1, batch: 6171, sum loss: 5747.831055, avg loss: 3.106936, ppl: 22.352446 +epoch: 1, batch: 6172, sum loss: 4610.462891, avg loss: 2.835463, ppl: 17.038288 +epoch: 1, batch: 6173, sum loss: 5523.068359, avg loss: 2.739617, ppl: 15.481060 +epoch: 1, batch: 6174, sum loss: 5097.978516, avg loss: 3.050855, ppl: 21.133410 +epoch: 1, batch: 6175, sum loss: 4672.214844, avg loss: 2.889434, ppl: 17.983130 +epoch: 1, batch: 6176, sum loss: 4397.072754, avg loss: 2.607991, ppl: 13.571757 +epoch: 1, batch: 6177, sum loss: 4320.811035, avg loss: 2.680404, ppl: 14.590986 +epoch: 1, batch: 6178, sum loss: 5018.125488, avg loss: 2.856076, ppl: 17.393143 +epoch: 1, batch: 6179, sum loss: 4800.680664, avg loss: 2.710717, ppl: 15.040062 +epoch: 1, batch: 6180, sum loss: 4341.997070, avg loss: 2.850950, ppl: 17.304216 +epoch: 1, batch: 6181, sum loss: 4428.101074, avg loss: 2.730025, ppl: 15.333275 +epoch: 1, batch: 6182, sum loss: 3784.719971, avg loss: 2.465616, ppl: 11.770725 +epoch: 1, batch: 6183, sum loss: 5498.333008, avg loss: 2.937144, ppl: 18.861897 +epoch: 1, batch: 6184, sum loss: 5165.095703, avg loss: 3.002963, ppl: 20.145130 +epoch: 1, batch: 6185, sum loss: 4584.553711, avg loss: 2.828226, ppl: 16.915421 +epoch: 1, batch: 6186, sum loss: 4815.240723, avg loss: 3.049551, ppl: 21.105862 +epoch: 1, batch: 6187, sum loss: 4467.748047, avg loss: 2.776723, ppl: 16.066292 +epoch: 1, batch: 6188, sum loss: 4392.414551, avg loss: 2.630188, ppl: 13.876385 +epoch: 1, batch: 6189, sum loss: 4055.716309, avg loss: 2.795118, ppl: 16.364561 +epoch: 1, batch: 6190, sum loss: 6701.676758, avg loss: 3.188238, ppl: 24.245672 +epoch: 1, batch: 6191, sum loss: 4395.228516, avg loss: 2.736755, ppl: 15.436810 +epoch: 1, batch: 6192, sum loss: 4276.309082, avg loss: 2.909054, ppl: 18.339437 +epoch: 1, batch: 6193, sum loss: 4163.120117, avg loss: 2.512444, ppl: 12.335043 +epoch: 1, batch: 6194, sum loss: 5110.115723, avg loss: 3.007720, ppl: 20.241192 +epoch: 1, batch: 6195, sum loss: 4255.189453, avg loss: 2.715501, ppl: 15.112177 +epoch: 1, batch: 6196, sum loss: 5193.669434, avg loss: 2.926011, ppl: 18.653072 +epoch: 1, batch: 6197, sum loss: 3623.050537, avg loss: 2.608388, ppl: 13.577143 +epoch: 1, batch: 6198, sum loss: 4048.955811, avg loss: 2.726569, ppl: 15.280376 +epoch: 1, batch: 6199, sum loss: 4859.740723, avg loss: 2.915261, ppl: 18.453632 +epoch: 1, batch: 6200, sum loss: 4692.058594, avg loss: 2.710606, ppl: 15.038381 +epoch: 1, batch: 6201, sum loss: 4315.372070, avg loss: 2.698794, ppl: 14.861799 +epoch: 1, batch: 6202, sum loss: 4996.476562, avg loss: 2.935650, ppl: 18.833744 +epoch: 1, batch: 6203, sum loss: 3785.329102, avg loss: 2.750966, ppl: 15.657747 +epoch: 1, batch: 6204, sum loss: 5109.556641, avg loss: 2.813632, ppl: 16.670362 +epoch: 1, batch: 6205, sum loss: 4876.966797, avg loss: 2.827227, ppl: 16.898542 +epoch: 1, batch: 6206, sum loss: 4789.092285, avg loss: 2.753935, ppl: 15.704301 +epoch: 1, batch: 6207, sum loss: 5199.835449, avg loss: 2.882392, ppl: 17.856939 +epoch: 1, batch: 6208, sum loss: 4447.362305, avg loss: 2.827312, ppl: 16.899977 +epoch: 1, batch: 6209, sum loss: 4047.336914, avg loss: 2.493738, ppl: 12.106444 +epoch: 1, batch: 6210, sum loss: 4317.954590, avg loss: 2.734613, ppl: 15.403788 +epoch: 1, batch: 6211, sum loss: 4185.121582, avg loss: 2.458943, ppl: 11.692451 +epoch: 1, batch: 6212, sum loss: 4611.748535, avg loss: 2.617337, ppl: 13.699201 +epoch: 1, batch: 6213, sum loss: 5255.099121, avg loss: 2.619690, ppl: 13.731472 +epoch: 1, batch: 6214, sum loss: 4073.402588, avg loss: 2.606144, ppl: 13.546710 +epoch: 1, batch: 6215, sum loss: 4206.804688, avg loss: 2.821465, ppl: 16.801455 +epoch: 1, batch: 6216, sum loss: 3411.568848, avg loss: 2.602264, ppl: 13.494260 +epoch: 1, batch: 6217, sum loss: 4223.815430, avg loss: 2.949592, ppl: 19.098158 +epoch: 1, batch: 6218, sum loss: 4832.702148, avg loss: 2.852835, ppl: 17.336857 +epoch: 1, batch: 6219, sum loss: 5086.973145, avg loss: 2.867516, ppl: 17.593258 +epoch: 1, batch: 6220, sum loss: 4877.121094, avg loss: 3.128365, ppl: 22.836613 +epoch: 1, batch: 6221, sum loss: 5192.040039, avg loss: 2.783936, ppl: 16.182583 +epoch: 1, batch: 6222, sum loss: 4896.144043, avg loss: 2.775592, ppl: 16.048122 +epoch: 1, batch: 6223, sum loss: 4300.025391, avg loss: 2.781388, ppl: 16.141407 +epoch: 1, batch: 6224, sum loss: 4702.940430, avg loss: 2.801037, ppl: 16.461702 +epoch: 1, batch: 6225, sum loss: 4699.648438, avg loss: 2.653669, ppl: 14.206070 +epoch: 1, batch: 6226, sum loss: 4650.284180, avg loss: 2.722649, ppl: 15.220582 +epoch: 1, batch: 6227, sum loss: 5763.490234, avg loss: 3.082080, ppl: 21.803715 +epoch: 1, batch: 6228, sum loss: 5617.130371, avg loss: 2.772522, ppl: 15.998940 +epoch: 1, batch: 6229, sum loss: 3780.115234, avg loss: 2.619622, ppl: 13.730539 +epoch: 1, batch: 6230, sum loss: 5643.973633, avg loss: 2.921311, ppl: 18.565619 +epoch: 1, batch: 6231, sum loss: 4664.589844, avg loss: 2.645825, ppl: 14.095071 +epoch: 1, batch: 6232, sum loss: 4315.748047, avg loss: 2.740157, ppl: 15.489423 +epoch: 1, batch: 6233, sum loss: 4668.889648, avg loss: 2.708173, ppl: 15.001839 +epoch: 1, batch: 6234, sum loss: 5246.054199, avg loss: 2.996033, ppl: 20.006023 +epoch: 1, batch: 6235, sum loss: 5045.378906, avg loss: 2.799877, ppl: 16.442631 +epoch: 1, batch: 6236, sum loss: 4701.832520, avg loss: 2.793721, ppl: 16.341713 +epoch: 1, batch: 6237, sum loss: 4411.925781, avg loss: 2.742030, ppl: 15.518451 +epoch: 1, batch: 6238, sum loss: 4944.352539, avg loss: 2.768394, ppl: 15.933032 +epoch: 1, batch: 6239, sum loss: 4970.917480, avg loss: 2.843775, ppl: 17.180492 +epoch: 1, batch: 6240, sum loss: 4064.380371, avg loss: 2.548201, ppl: 12.784082 +epoch: 1, batch: 6241, sum loss: 5289.166992, avg loss: 2.944971, ppl: 19.010103 +epoch: 1, batch: 6242, sum loss: 4784.166016, avg loss: 2.696824, ppl: 14.832550 +epoch: 1, batch: 6243, sum loss: 5035.475098, avg loss: 2.944722, ppl: 19.005381 +epoch: 1, batch: 6244, sum loss: 5168.665527, avg loss: 2.982496, ppl: 19.737015 +epoch: 1, batch: 6245, sum loss: 4261.863770, avg loss: 2.765648, ppl: 15.889335 +epoch: 1, batch: 6246, sum loss: 6128.927246, avg loss: 2.988263, ppl: 19.851170 +epoch: 1, batch: 6247, sum loss: 5706.693848, avg loss: 2.973785, ppl: 19.565840 +epoch: 1, batch: 6248, sum loss: 4717.270508, avg loss: 2.985614, ppl: 19.798660 +epoch: 1, batch: 6249, sum loss: 5097.607910, avg loss: 2.777988, ppl: 16.086626 +epoch: 1, batch: 6250, sum loss: 5954.819336, avg loss: 3.031986, ppl: 20.738369 +epoch: 1, batch: 6251, sum loss: 4957.769043, avg loss: 2.669773, ppl: 14.436693 +epoch: 1, batch: 6252, sum loss: 4511.475586, avg loss: 2.749223, ppl: 15.630486 +epoch: 1, batch: 6253, sum loss: 4095.510742, avg loss: 2.397840, ppl: 10.999393 +epoch: 1, batch: 6254, sum loss: 4304.610352, avg loss: 2.676996, ppl: 14.541352 +epoch: 1, batch: 6255, sum loss: 4161.800293, avg loss: 2.686766, ppl: 14.684110 +epoch: 1, batch: 6256, sum loss: 5024.660645, avg loss: 2.871235, ppl: 17.658808 +epoch: 1, batch: 6257, sum loss: 3575.467773, avg loss: 2.420764, ppl: 11.254452 +epoch: 1, batch: 6258, sum loss: 4873.690918, avg loss: 2.809044, ppl: 16.594046 +epoch: 1, batch: 6259, sum loss: 5392.569824, avg loss: 3.065702, ppl: 21.449518 +epoch: 1, batch: 6260, sum loss: 3900.505127, avg loss: 2.646204, ppl: 14.100415 +epoch: 1, batch: 6261, sum loss: 6120.525391, avg loss: 3.201112, ppl: 24.559814 +epoch: 1, batch: 6262, sum loss: 5938.555664, avg loss: 3.385721, ppl: 29.539295 +epoch: 1, batch: 6263, sum loss: 4765.343750, avg loss: 2.816397, ppl: 16.716511 +epoch: 1, batch: 6264, sum loss: 4801.003906, avg loss: 2.775147, ppl: 16.040981 +epoch: 1, batch: 6265, sum loss: 4635.759277, avg loss: 2.678082, ppl: 14.557138 +epoch: 1, batch: 6266, sum loss: 4950.575684, avg loss: 2.709675, ppl: 15.024389 +epoch: 1, batch: 6267, sum loss: 5859.432129, avg loss: 2.931182, ppl: 18.749773 +epoch: 1, batch: 6268, sum loss: 4012.209961, avg loss: 2.526581, ppl: 12.510656 +epoch: 1, batch: 6269, sum loss: 4418.684570, avg loss: 2.899399, ppl: 18.163231 +epoch: 1, batch: 6270, sum loss: 4404.862793, avg loss: 2.563948, ppl: 12.986991 +epoch: 1, batch: 6271, sum loss: 4959.543945, avg loss: 2.883456, ppl: 17.875942 +epoch: 1, batch: 6272, sum loss: 4409.043457, avg loss: 2.788769, ppl: 16.260986 +epoch: 1, batch: 6273, sum loss: 5020.131836, avg loss: 2.935750, ppl: 18.835615 +epoch: 1, batch: 6274, sum loss: 3811.520996, avg loss: 2.539321, ppl: 12.671066 +epoch: 1, batch: 6275, sum loss: 4077.313965, avg loss: 2.689521, ppl: 14.724626 +epoch: 1, batch: 6276, sum loss: 5106.961914, avg loss: 2.706392, ppl: 14.975148 +epoch: 1, batch: 6277, sum loss: 4860.266113, avg loss: 2.692668, ppl: 14.771035 +epoch: 1, batch: 6278, sum loss: 4187.514160, avg loss: 2.405235, ppl: 11.081035 +epoch: 1, batch: 6279, sum loss: 4520.604492, avg loss: 2.814822, ppl: 16.690208 +epoch: 1, batch: 6280, sum loss: 4840.770996, avg loss: 2.769320, ppl: 15.947786 +epoch: 1, batch: 6281, sum loss: 3823.786133, avg loss: 2.745001, ppl: 15.564627 +epoch: 1, batch: 6282, sum loss: 4474.353027, avg loss: 2.741638, ppl: 15.512373 +epoch: 1, batch: 6283, sum loss: 4419.747070, avg loss: 2.568127, ppl: 13.041377 +epoch: 1, batch: 6284, sum loss: 4868.941406, avg loss: 2.893013, ppl: 18.047609 +epoch: 1, batch: 6285, sum loss: 4385.925781, avg loss: 2.654919, ppl: 14.223832 +epoch: 1, batch: 6286, sum loss: 4692.649414, avg loss: 2.833725, ppl: 17.008709 +epoch: 1, batch: 6287, sum loss: 4464.165527, avg loss: 2.633726, ppl: 13.925558 +epoch: 1, batch: 6288, sum loss: 4483.465820, avg loss: 2.585620, ppl: 13.271521 +epoch: 1, batch: 6289, sum loss: 4614.790039, avg loss: 2.832897, ppl: 16.994631 +epoch: 1, batch: 6290, sum loss: 6131.297852, avg loss: 2.987962, ppl: 19.845198 +epoch: 1, batch: 6291, sum loss: 5472.166016, avg loss: 3.091619, ppl: 22.012693 +epoch: 1, batch: 6292, sum loss: 4510.486816, avg loss: 2.654789, ppl: 14.221988 +epoch: 1, batch: 6293, sum loss: 4676.210449, avg loss: 2.808535, ppl: 16.585600 +epoch: 1, batch: 6294, sum loss: 3972.071777, avg loss: 2.549469, ppl: 12.800299 +epoch: 1, batch: 6295, sum loss: 5357.368652, avg loss: 2.818184, ppl: 16.746418 +epoch: 1, batch: 6296, sum loss: 4959.437500, avg loss: 2.915601, ppl: 18.459904 +epoch: 1, batch: 6297, sum loss: 4874.271484, avg loss: 2.882479, ppl: 17.858488 +epoch: 1, batch: 6298, sum loss: 4947.960938, avg loss: 2.721651, ppl: 15.205400 +epoch: 1, batch: 6299, sum loss: 4444.670898, avg loss: 2.705216, ppl: 14.957553 +epoch: 1, batch: 6300, sum loss: 5817.868164, avg loss: 2.929440, ppl: 18.717152 +epoch: 1, batch: 6301, sum loss: 5045.489258, avg loss: 2.689493, ppl: 14.724212 +epoch: 1, batch: 6302, sum loss: 4035.043457, avg loss: 2.603254, ppl: 13.507618 +epoch: 1, batch: 6303, sum loss: 5347.527344, avg loss: 2.912596, ppl: 18.404524 +epoch: 1, batch: 6304, sum loss: 4874.055176, avg loss: 2.712329, ppl: 15.064322 +epoch: 1, batch: 6305, sum loss: 3602.642578, avg loss: 2.620104, ppl: 13.737146 +epoch: 1, batch: 6306, sum loss: 5150.851562, avg loss: 2.776739, ppl: 16.066547 +epoch: 1, batch: 6307, sum loss: 5208.929688, avg loss: 2.939577, ppl: 18.907839 +epoch: 1, batch: 6308, sum loss: 4197.632324, avg loss: 2.699442, ppl: 14.871433 +epoch: 1, batch: 6309, sum loss: 5651.298340, avg loss: 2.907047, ppl: 18.302668 +epoch: 1, batch: 6310, sum loss: 4962.138672, avg loss: 2.713034, ppl: 15.074943 +epoch: 1, batch: 6311, sum loss: 4464.207031, avg loss: 2.854352, ppl: 17.363186 +epoch: 1, batch: 6312, sum loss: 4521.800781, avg loss: 2.585363, ppl: 13.268109 +epoch: 1, batch: 6313, sum loss: 4291.812988, avg loss: 2.514243, ppl: 12.357252 +epoch: 1, batch: 6314, sum loss: 4113.888184, avg loss: 2.650701, ppl: 14.163968 +epoch: 1, batch: 6315, sum loss: 4761.085449, avg loss: 2.597428, ppl: 13.429155 +epoch: 1, batch: 6316, sum loss: 3303.417480, avg loss: 2.379984, ppl: 10.804727 +epoch: 1, batch: 6317, sum loss: 4593.586426, avg loss: 2.883607, ppl: 17.878653 +epoch: 1, batch: 6318, sum loss: 5795.839355, avg loss: 2.719774, ppl: 15.176899 +epoch: 1, batch: 6319, sum loss: 4811.988770, avg loss: 2.883157, ppl: 17.870602 +epoch: 1, batch: 6320, sum loss: 4942.643066, avg loss: 2.942050, ppl: 18.954655 +epoch: 1, batch: 6321, sum loss: 3792.319092, avg loss: 2.488398, ppl: 12.041973 +epoch: 1, batch: 6322, sum loss: 4115.583984, avg loss: 2.941804, ppl: 18.950005 +epoch: 1, batch: 6323, sum loss: 5590.262207, avg loss: 3.244493, ppl: 25.648716 +epoch: 1, batch: 6324, sum loss: 4398.157715, avg loss: 2.755738, ppl: 15.732644 +epoch: 1, batch: 6325, sum loss: 5012.685547, avg loss: 2.702256, ppl: 14.913345 +epoch: 1, batch: 6326, sum loss: 4621.174805, avg loss: 2.737663, ppl: 15.450831 +epoch: 1, batch: 6327, sum loss: 4656.540039, avg loss: 2.694757, ppl: 14.801921 +epoch: 1, batch: 6328, sum loss: 4779.694336, avg loss: 2.935930, ppl: 18.839016 +epoch: 1, batch: 6329, sum loss: 5290.830078, avg loss: 2.900674, ppl: 18.186405 +epoch: 1, batch: 6330, sum loss: 5179.277344, avg loss: 2.931113, ppl: 18.748495 +epoch: 1, batch: 6331, sum loss: 3986.124268, avg loss: 2.535702, ppl: 12.625297 +epoch: 1, batch: 6332, sum loss: 5282.658203, avg loss: 2.979503, ppl: 19.678028 +epoch: 1, batch: 6333, sum loss: 5163.563477, avg loss: 2.803238, ppl: 16.497976 +epoch: 1, batch: 6334, sum loss: 3871.521729, avg loss: 2.553774, ppl: 12.855534 +epoch: 1, batch: 6335, sum loss: 4453.949707, avg loss: 2.811837, ppl: 16.640457 +epoch: 1, batch: 6336, sum loss: 4417.477051, avg loss: 2.667559, ppl: 14.404759 +epoch: 1, batch: 6337, sum loss: 5213.458496, avg loss: 3.025803, ppl: 20.610550 +epoch: 1, batch: 6338, sum loss: 4959.584473, avg loss: 2.665011, ppl: 14.368103 +epoch: 1, batch: 6339, sum loss: 4570.016602, avg loss: 2.581930, ppl: 13.222638 +epoch: 1, batch: 6340, sum loss: 4171.956055, avg loss: 2.777601, ppl: 16.080391 +epoch: 1, batch: 6341, sum loss: 4844.832520, avg loss: 2.603349, ppl: 13.508906 +epoch: 1, batch: 6342, sum loss: 4564.988770, avg loss: 2.776757, ppl: 16.066835 +epoch: 1, batch: 6343, sum loss: 4965.652832, avg loss: 2.920972, ppl: 18.559324 +epoch: 1, batch: 6344, sum loss: 4279.634766, avg loss: 2.412421, ppl: 11.160946 +epoch: 1, batch: 6345, sum loss: 4193.968750, avg loss: 2.874550, ppl: 17.717455 +epoch: 1, batch: 6346, sum loss: 4648.976562, avg loss: 2.701323, ppl: 14.899427 +epoch: 1, batch: 6347, sum loss: 4714.300293, avg loss: 2.848520, ppl: 17.262215 +epoch: 1, batch: 6348, sum loss: 4902.899902, avg loss: 2.695382, ppl: 14.811177 +epoch: 1, batch: 6349, sum loss: 5011.744141, avg loss: 2.821928, ppl: 16.809229 +epoch: 1, batch: 6350, sum loss: 4049.240234, avg loss: 2.597332, ppl: 13.427861 +epoch: 1, batch: 6351, sum loss: 4430.213867, avg loss: 2.827195, ppl: 16.897989 +epoch: 1, batch: 6352, sum loss: 4982.985840, avg loss: 2.748476, ppl: 15.618804 +epoch: 1, batch: 6353, sum loss: 4832.250977, avg loss: 2.861013, ppl: 17.479227 +epoch: 1, batch: 6354, sum loss: 3817.405029, avg loss: 2.538168, ppl: 12.656466 +epoch: 1, batch: 6355, sum loss: 4179.742188, avg loss: 2.586474, ppl: 13.282856 +epoch: 1, batch: 6356, sum loss: 5011.463379, avg loss: 2.839356, ppl: 17.104746 +epoch: 1, batch: 6357, sum loss: 4397.720703, avg loss: 2.675013, ppl: 14.512532 +epoch: 1, batch: 6358, sum loss: 4270.560547, avg loss: 2.828186, ppl: 16.914745 +epoch: 1, batch: 6359, sum loss: 4337.308594, avg loss: 2.677351, ppl: 14.546509 +epoch: 1, batch: 6360, sum loss: 5182.090820, avg loss: 2.700412, ppl: 14.885864 +epoch: 1, batch: 6361, sum loss: 4847.399902, avg loss: 2.816618, ppl: 16.720211 +epoch: 1, batch: 6362, sum loss: 4571.522949, avg loss: 2.825416, ppl: 16.867958 +epoch: 1, batch: 6363, sum loss: 3754.849365, avg loss: 2.346781, ppl: 10.451869 +epoch: 1, batch: 6364, sum loss: 4369.385254, avg loss: 2.732574, ppl: 15.372397 +epoch: 1, batch: 6365, sum loss: 5739.449219, avg loss: 3.020763, ppl: 20.506931 +epoch: 1, batch: 6366, sum loss: 4559.144043, avg loss: 2.776580, ppl: 16.063986 +epoch: 1, batch: 6367, sum loss: 4827.664551, avg loss: 2.806782, ppl: 16.556549 +epoch: 1, batch: 6368, sum loss: 6481.020508, avg loss: 3.261711, ppl: 26.094149 +epoch: 1, batch: 6369, sum loss: 4581.689941, avg loss: 2.802257, ppl: 16.481802 +epoch: 1, batch: 6370, sum loss: 4195.481445, avg loss: 2.861856, ppl: 17.493973 +epoch: 1, batch: 6371, sum loss: 4568.167480, avg loss: 2.797408, ppl: 16.402079 +epoch: 1, batch: 6372, sum loss: 4955.029785, avg loss: 2.694415, ppl: 14.796865 +epoch: 1, batch: 6373, sum loss: 4428.453125, avg loss: 2.672573, ppl: 14.477169 +epoch: 1, batch: 6374, sum loss: 4680.351562, avg loss: 2.762899, ppl: 15.845719 +epoch: 1, batch: 6375, sum loss: 5600.892578, avg loss: 2.835895, ppl: 17.045650 +epoch: 1, batch: 6376, sum loss: 4548.759277, avg loss: 2.725440, ppl: 15.263128 +epoch: 1, batch: 6377, sum loss: 5411.422363, avg loss: 2.790832, ppl: 16.294563 +epoch: 1, batch: 6378, sum loss: 4700.390137, avg loss: 2.816291, ppl: 16.714746 +epoch: 1, batch: 6379, sum loss: 5086.727539, avg loss: 3.120692, ppl: 22.662052 +epoch: 1, batch: 6380, sum loss: 4424.555664, avg loss: 2.727839, ppl: 15.299795 +epoch: 1, batch: 6381, sum loss: 3468.655029, avg loss: 2.561784, ppl: 12.958910 +epoch: 1, batch: 6382, sum loss: 5166.375488, avg loss: 2.913918, ppl: 18.428854 +epoch: 1, batch: 6383, sum loss: 3564.532959, avg loss: 2.421558, ppl: 11.263391 +epoch: 1, batch: 6384, sum loss: 3953.907959, avg loss: 2.774672, ppl: 16.033367 +epoch: 1, batch: 6385, sum loss: 4564.249023, avg loss: 2.961875, ppl: 19.334185 +epoch: 1, batch: 6386, sum loss: 4588.985352, avg loss: 2.723433, ppl: 15.232533 +epoch: 1, batch: 6387, sum loss: 4629.105469, avg loss: 2.696043, ppl: 14.820966 +epoch: 1, batch: 6388, sum loss: 4385.774902, avg loss: 2.715650, ppl: 15.114432 +epoch: 1, batch: 6389, sum loss: 4954.114258, avg loss: 2.729540, ppl: 15.325830 +epoch: 1, batch: 6390, sum loss: 5376.946777, avg loss: 2.847959, ppl: 17.252535 +epoch: 1, batch: 6391, sum loss: 4910.360840, avg loss: 2.749362, ppl: 15.632659 +epoch: 1, batch: 6392, sum loss: 3978.381836, avg loss: 2.657570, ppl: 14.261586 +epoch: 1, batch: 6393, sum loss: 4342.917969, avg loss: 2.741741, ppl: 15.513974 +epoch: 1, batch: 6394, sum loss: 4236.461426, avg loss: 2.759910, ppl: 15.798415 +epoch: 1, batch: 6395, sum loss: 4749.038086, avg loss: 2.818420, ppl: 16.750366 +epoch: 1, batch: 6396, sum loss: 4922.556152, avg loss: 2.800089, ppl: 16.446104 +epoch: 1, batch: 6397, sum loss: 5267.929688, avg loss: 2.896058, ppl: 18.102646 +epoch: 1, batch: 6398, sum loss: 4970.556641, avg loss: 2.963957, ppl: 19.374491 +epoch: 1, batch: 6399, sum loss: 4747.064453, avg loss: 2.777685, ppl: 16.081755 +epoch: 1, batch: 6400, sum loss: 5264.256348, avg loss: 3.049975, ppl: 21.114809 +epoch: 1, batch: 6401, sum loss: 4093.754883, avg loss: 2.612479, ppl: 13.632811 +epoch: 1, batch: 6402, sum loss: 4349.497559, avg loss: 2.663501, ppl: 14.346432 +epoch: 1, batch: 6403, sum loss: 4990.304199, avg loss: 2.891254, ppl: 18.015882 +epoch: 1, batch: 6404, sum loss: 3837.240723, avg loss: 2.599757, ppl: 13.460463 +epoch: 1, batch: 6405, sum loss: 4578.803223, avg loss: 2.764978, ppl: 15.878686 +epoch: 1, batch: 6406, sum loss: 4973.945312, avg loss: 2.738956, ppl: 15.470832 +epoch: 1, batch: 6407, sum loss: 5436.750977, avg loss: 2.772438, ppl: 15.997589 +epoch: 1, batch: 6408, sum loss: 5064.634766, avg loss: 2.872737, ppl: 17.685352 +epoch: 1, batch: 6409, sum loss: 4140.674805, avg loss: 2.642422, ppl: 14.047184 +epoch: 1, batch: 6410, sum loss: 3919.868164, avg loss: 2.776111, ppl: 16.056450 +epoch: 1, batch: 6411, sum loss: 4990.868652, avg loss: 2.829291, ppl: 16.933445 +epoch: 1, batch: 6412, sum loss: 5572.949219, avg loss: 2.947091, ppl: 19.050457 +epoch: 1, batch: 6413, sum loss: 4662.802246, avg loss: 2.890764, ppl: 18.007061 +epoch: 1, batch: 6414, sum loss: 4213.980469, avg loss: 2.508322, ppl: 12.284297 +epoch: 1, batch: 6415, sum loss: 4928.220215, avg loss: 2.687143, ppl: 14.689646 +epoch: 1, batch: 6416, sum loss: 5383.680664, avg loss: 2.953198, ppl: 19.167160 +epoch: 1, batch: 6417, sum loss: 5214.382324, avg loss: 2.950980, ppl: 19.124695 +epoch: 1, batch: 6418, sum loss: 4379.233887, avg loss: 2.823490, ppl: 16.835510 +epoch: 1, batch: 6419, sum loss: 4753.123535, avg loss: 2.889437, ppl: 17.983177 +epoch: 1, batch: 6420, sum loss: 4078.722900, avg loss: 2.546019, ppl: 12.756224 +epoch: 1, batch: 6421, sum loss: 5566.698242, avg loss: 3.077224, ppl: 21.698090 +epoch: 1, batch: 6422, sum loss: 5241.187012, avg loss: 2.977947, ppl: 19.647438 +epoch: 1, batch: 6423, sum loss: 4911.287109, avg loss: 2.677910, ppl: 14.554644 +epoch: 1, batch: 6424, sum loss: 3806.035400, avg loss: 2.621237, ppl: 13.752719 +epoch: 1, batch: 6425, sum loss: 3997.382568, avg loss: 2.544483, ppl: 12.736641 +epoch: 1, batch: 6426, sum loss: 4629.183594, avg loss: 2.902309, ppl: 18.216166 +epoch: 1, batch: 6427, sum loss: 4511.899414, avg loss: 2.679275, ppl: 14.574527 +epoch: 1, batch: 6428, sum loss: 4452.388672, avg loss: 2.667699, ppl: 14.406775 +epoch: 1, batch: 6429, sum loss: 5216.665039, avg loss: 3.052466, ppl: 21.167488 +epoch: 1, batch: 6430, sum loss: 4682.678711, avg loss: 2.822591, ppl: 16.820377 +epoch: 1, batch: 6431, sum loss: 5498.724121, avg loss: 2.934218, ppl: 18.806786 +epoch: 1, batch: 6432, sum loss: 5242.017578, avg loss: 2.859802, ppl: 17.458075 +epoch: 1, batch: 6433, sum loss: 4902.309082, avg loss: 2.918041, ppl: 18.505005 +epoch: 1, batch: 6434, sum loss: 4661.228516, avg loss: 2.889788, ppl: 17.989502 +epoch: 1, batch: 6435, sum loss: 4334.673340, avg loss: 2.769759, ppl: 15.954791 +epoch: 1, batch: 6436, sum loss: 4239.544434, avg loss: 2.516050, ppl: 12.379601 +epoch: 1, batch: 6437, sum loss: 4933.428711, avg loss: 3.138314, ppl: 23.064936 +epoch: 1, batch: 6438, sum loss: 5136.817871, avg loss: 2.880997, ppl: 17.832047 +epoch: 1, batch: 6439, sum loss: 4075.705566, avg loss: 2.692012, ppl: 14.761340 +epoch: 1, batch: 6440, sum loss: 4862.657227, avg loss: 2.877312, ppl: 17.766451 +epoch: 1, batch: 6441, sum loss: 5799.724609, avg loss: 3.136682, ppl: 23.027330 +epoch: 1, batch: 6442, sum loss: 4990.998535, avg loss: 2.580661, ppl: 13.205865 +epoch: 1, batch: 6443, sum loss: 5200.955078, avg loss: 2.912069, ppl: 18.394819 +epoch: 1, batch: 6444, sum loss: 4173.078613, avg loss: 2.571213, ppl: 13.081683 +epoch: 1, batch: 6445, sum loss: 4501.785156, avg loss: 2.756757, ppl: 15.748694 +epoch: 1, batch: 6446, sum loss: 5188.448242, avg loss: 2.978443, ppl: 19.657190 +epoch: 1, batch: 6447, sum loss: 4706.448242, avg loss: 2.804796, ppl: 16.523708 +epoch: 1, batch: 6448, sum loss: 4954.097168, avg loss: 2.659204, ppl: 14.284914 +epoch: 1, batch: 6449, sum loss: 4614.699219, avg loss: 2.741948, ppl: 15.517189 +epoch: 1, batch: 6450, sum loss: 5052.205078, avg loss: 2.778991, ppl: 16.102760 +epoch: 1, batch: 6451, sum loss: 6445.021484, avg loss: 3.305139, ppl: 27.252331 +epoch: 1, batch: 6452, sum loss: 4171.209473, avg loss: 2.557455, ppl: 12.902942 +epoch: 1, batch: 6453, sum loss: 3592.487549, avg loss: 2.551483, ppl: 12.826107 +epoch: 1, batch: 6454, sum loss: 5201.932617, avg loss: 3.033197, ppl: 20.763500 +epoch: 1, batch: 6455, sum loss: 3897.463379, avg loss: 2.473010, ppl: 11.858084 +epoch: 1, batch: 6456, sum loss: 4969.929688, avg loss: 3.106206, ppl: 22.336140 +epoch: 1, batch: 6457, sum loss: 4345.771973, avg loss: 2.986785, ppl: 19.821846 +epoch: 1, batch: 6458, sum loss: 5854.678711, avg loss: 3.102638, ppl: 22.256592 +epoch: 1, batch: 6459, sum loss: 4537.135742, avg loss: 2.731569, ppl: 15.356960 +epoch: 1, batch: 6460, sum loss: 4702.947266, avg loss: 2.350299, ppl: 10.488702 +epoch: 1, batch: 6461, sum loss: 6892.103516, avg loss: 3.360363, ppl: 28.799629 +epoch: 1, batch: 6462, sum loss: 4567.977051, avg loss: 2.661991, ppl: 14.324786 +epoch: 1, batch: 6463, sum loss: 5570.934570, avg loss: 2.956972, ppl: 19.239624 +epoch: 1, batch: 6464, sum loss: 5651.366211, avg loss: 2.874551, ppl: 17.717461 +epoch: 1, batch: 6465, sum loss: 4514.885742, avg loss: 2.744611, ppl: 15.558564 +epoch: 1, batch: 6466, sum loss: 5251.557129, avg loss: 2.877566, ppl: 17.770960 +epoch: 1, batch: 6467, sum loss: 5416.018555, avg loss: 3.282435, ppl: 26.640575 +epoch: 1, batch: 6468, sum loss: 4602.586426, avg loss: 2.779340, ppl: 16.108379 +epoch: 1, batch: 6469, sum loss: 3776.725586, avg loss: 2.455608, ppl: 11.653521 +epoch: 1, batch: 6470, sum loss: 4486.535156, avg loss: 2.593373, ppl: 13.374806 +epoch: 1, batch: 6471, sum loss: 4872.014648, avg loss: 2.837516, ppl: 17.073299 +epoch: 1, batch: 6472, sum loss: 5836.661621, avg loss: 3.014804, ppl: 20.385103 +epoch: 1, batch: 6473, sum loss: 5267.711914, avg loss: 3.032650, ppl: 20.752144 +epoch: 1, batch: 6474, sum loss: 4623.127441, avg loss: 2.683185, ppl: 14.631618 +epoch: 1, batch: 6475, sum loss: 4962.666992, avg loss: 2.689792, ppl: 14.728618 +epoch: 1, batch: 6476, sum loss: 3782.305664, avg loss: 2.480201, ppl: 11.943659 +epoch: 1, batch: 6477, sum loss: 4996.309082, avg loss: 2.951157, ppl: 19.128078 +epoch: 1, batch: 6478, sum loss: 4275.055176, avg loss: 2.888551, ppl: 17.967253 +epoch: 1, batch: 6479, sum loss: 4141.880859, avg loss: 2.623104, ppl: 13.778423 +epoch: 1, batch: 6480, sum loss: 4480.521484, avg loss: 2.720414, ppl: 15.186604 +epoch: 1, batch: 6481, sum loss: 5446.841309, avg loss: 2.857734, ppl: 17.422003 +epoch: 1, batch: 6482, sum loss: 5051.395508, avg loss: 2.752804, ppl: 15.686560 +epoch: 1, batch: 6483, sum loss: 4699.728516, avg loss: 2.711903, ppl: 15.057908 +epoch: 1, batch: 6484, sum loss: 5397.723633, avg loss: 2.831964, ppl: 16.978775 +epoch: 1, batch: 6485, sum loss: 4751.548340, avg loss: 2.886724, ppl: 17.934469 +epoch: 1, batch: 6486, sum loss: 5011.358398, avg loss: 2.898414, ppl: 18.145351 +epoch: 1, batch: 6487, sum loss: 4963.536133, avg loss: 3.004562, ppl: 20.177374 +epoch: 1, batch: 6488, sum loss: 4679.117188, avg loss: 2.899081, ppl: 18.157455 +epoch: 1, batch: 6489, sum loss: 4987.051270, avg loss: 2.949173, ppl: 19.090158 +epoch: 1, batch: 6490, sum loss: 5569.037598, avg loss: 2.759682, ppl: 15.794815 +epoch: 1, batch: 6491, sum loss: 4920.037598, avg loss: 2.765620, ppl: 15.888885 +epoch: 1, batch: 6492, sum loss: 3797.197998, avg loss: 2.798230, ppl: 16.415564 +epoch: 1, batch: 6493, sum loss: 4563.652832, avg loss: 2.654830, ppl: 14.222571 +epoch: 1, batch: 6494, sum loss: 3795.484131, avg loss: 2.440826, ppl: 11.482521 +epoch: 1, batch: 6495, sum loss: 5354.243652, avg loss: 2.994544, ppl: 19.976240 +epoch: 1, batch: 6496, sum loss: 4439.193359, avg loss: 2.605160, ppl: 13.533397 +epoch: 1, batch: 6497, sum loss: 4070.588379, avg loss: 2.587787, ppl: 13.300302 +epoch: 1, batch: 6498, sum loss: 4212.096191, avg loss: 2.754805, ppl: 15.717970 +epoch: 1, batch: 6499, sum loss: 4107.908691, avg loss: 2.722272, ppl: 15.214857 +epoch: 1, batch: 6500, sum loss: 4951.650879, avg loss: 2.723680, ppl: 15.236293 +epoch: 1, batch: 6501, sum loss: 4457.008789, avg loss: 2.727668, ppl: 15.297166 +epoch: 1, batch: 6502, sum loss: 4376.018066, avg loss: 2.719713, ppl: 15.175965 +epoch: 1, batch: 6503, sum loss: 4224.465332, avg loss: 2.970791, ppl: 19.507351 +epoch: 1, batch: 6504, sum loss: 4955.879883, avg loss: 2.841674, ppl: 17.144444 +epoch: 1, batch: 6505, sum loss: 4870.385742, avg loss: 2.861566, ppl: 17.488899 +epoch: 1, batch: 6506, sum loss: 4237.487793, avg loss: 2.658399, ppl: 14.273417 +epoch: 1, batch: 6507, sum loss: 4329.643555, avg loss: 2.707720, ppl: 14.995041 +epoch: 1, batch: 6508, sum loss: 4393.552734, avg loss: 2.578376, ppl: 13.175724 +epoch: 1, batch: 6509, sum loss: 4185.032715, avg loss: 2.617281, ppl: 13.698430 +epoch: 1, batch: 6510, sum loss: 5106.251465, avg loss: 2.975671, ppl: 19.602774 +epoch: 1, batch: 6511, sum loss: 4038.886963, avg loss: 2.732671, ppl: 15.373889 +epoch: 1, batch: 6512, sum loss: 4320.540039, avg loss: 2.612177, ppl: 13.628683 +epoch: 1, batch: 6513, sum loss: 4256.315918, avg loss: 2.681989, ppl: 14.614128 +epoch: 1, batch: 6514, sum loss: 4413.538086, avg loss: 2.717696, ppl: 15.145383 +epoch: 1, batch: 6515, sum loss: 4751.684570, avg loss: 2.732424, ppl: 15.370092 +epoch: 1, batch: 6516, sum loss: 4026.830322, avg loss: 2.815965, ppl: 16.709299 +epoch: 1, batch: 6517, sum loss: 5037.328125, avg loss: 3.023606, ppl: 20.565323 +epoch: 1, batch: 6518, sum loss: 5122.837402, avg loss: 3.104750, ppl: 22.303640 +epoch: 1, batch: 6519, sum loss: 3425.190918, avg loss: 2.400274, ppl: 11.026197 +epoch: 1, batch: 6520, sum loss: 4240.760742, avg loss: 2.461266, ppl: 11.719637 +epoch: 1, batch: 6521, sum loss: 4352.506348, avg loss: 2.811697, ppl: 16.638124 +epoch: 1, batch: 6522, sum loss: 4718.296875, avg loss: 2.773837, ppl: 16.019987 +epoch: 1, batch: 6523, sum loss: 5019.622070, avg loss: 2.842368, ppl: 17.156345 +epoch: 1, batch: 6524, sum loss: 4976.130859, avg loss: 2.881373, ppl: 17.838747 +epoch: 1, batch: 6525, sum loss: 5049.519531, avg loss: 2.922176, ppl: 18.581671 +epoch: 1, batch: 6526, sum loss: 4059.799316, avg loss: 2.784499, ppl: 16.191702 +epoch: 1, batch: 6527, sum loss: 3898.063965, avg loss: 2.500362, ppl: 12.186904 +epoch: 1, batch: 6528, sum loss: 4769.817871, avg loss: 2.830752, ppl: 16.958216 +epoch: 1, batch: 6529, sum loss: 5159.439453, avg loss: 2.984060, ppl: 19.767908 +epoch: 1, batch: 6530, sum loss: 4336.230469, avg loss: 2.670093, ppl: 14.441306 +epoch: 1, batch: 6531, sum loss: 5307.916992, avg loss: 3.005615, ppl: 20.198645 +epoch: 1, batch: 6532, sum loss: 5244.725098, avg loss: 2.741623, ppl: 15.512143 +epoch: 1, batch: 6533, sum loss: 4495.767090, avg loss: 2.705034, ppl: 14.954832 +epoch: 1, batch: 6534, sum loss: 5145.501953, avg loss: 2.884250, ppl: 17.890148 +epoch: 1, batch: 6535, sum loss: 4803.971680, avg loss: 2.890476, ppl: 18.001884 +epoch: 1, batch: 6536, sum loss: 5214.198242, avg loss: 3.027990, ppl: 20.655664 +epoch: 1, batch: 6537, sum loss: 4509.870117, avg loss: 2.905844, ppl: 18.280670 +epoch: 1, batch: 6538, sum loss: 4488.544434, avg loss: 2.576662, ppl: 13.153154 +epoch: 1, batch: 6539, sum loss: 4787.180664, avg loss: 2.743370, ppl: 15.539265 +epoch: 1, batch: 6540, sum loss: 4841.792480, avg loss: 2.727770, ppl: 15.298738 +epoch: 1, batch: 6541, sum loss: 4377.274902, avg loss: 2.838700, ppl: 17.093523 +epoch: 1, batch: 6542, sum loss: 4455.996094, avg loss: 2.970664, ppl: 19.504868 +epoch: 1, batch: 6543, sum loss: 3700.278809, avg loss: 2.603996, ppl: 13.517651 +epoch: 1, batch: 6544, sum loss: 4284.377930, avg loss: 2.718514, ppl: 15.157781 +epoch: 1, batch: 6545, sum loss: 4647.458984, avg loss: 2.801362, ppl: 16.467052 +epoch: 1, batch: 6546, sum loss: 4312.880371, avg loss: 2.663916, ppl: 14.352388 +epoch: 1, batch: 6547, sum loss: 3617.235596, avg loss: 2.403479, ppl: 11.061591 +epoch: 1, batch: 6548, sum loss: 2966.901855, avg loss: 2.321519, ppl: 10.191147 +epoch: 1, batch: 6549, sum loss: 4310.326660, avg loss: 2.601283, ppl: 13.481028 +epoch: 1, batch: 6550, sum loss: 4669.833008, avg loss: 2.857915, ppl: 17.425156 +epoch: 1, batch: 6551, sum loss: 5660.571289, avg loss: 2.919325, ppl: 18.528778 +epoch: 1, batch: 6552, sum loss: 4897.360352, avg loss: 2.599448, ppl: 13.456311 +epoch: 1, batch: 6553, sum loss: 3631.727783, avg loss: 2.400349, ppl: 11.027023 +epoch: 1, batch: 6554, sum loss: 4808.188965, avg loss: 2.813452, ppl: 16.667351 +epoch: 1, batch: 6555, sum loss: 4079.136719, avg loss: 2.666102, ppl: 14.383798 +epoch: 1, batch: 6556, sum loss: 4324.265137, avg loss: 2.770189, ppl: 15.961651 +epoch: 1, batch: 6557, sum loss: 4142.981934, avg loss: 2.618825, ppl: 13.719600 +epoch: 1, batch: 6558, sum loss: 4093.745117, avg loss: 2.581176, ppl: 13.212667 +epoch: 1, batch: 6559, sum loss: 4562.925293, avg loss: 2.556261, ppl: 12.887538 +epoch: 1, batch: 6560, sum loss: 4203.297852, avg loss: 2.548998, ppl: 12.794279 +epoch: 1, batch: 6561, sum loss: 5385.201660, avg loss: 3.091390, ppl: 22.007650 +epoch: 1, batch: 6562, sum loss: 3958.656738, avg loss: 2.540858, ppl: 12.690556 +epoch: 1, batch: 6563, sum loss: 4570.057129, avg loss: 2.781532, ppl: 16.143732 +epoch: 1, batch: 6564, sum loss: 5100.615723, avg loss: 2.914638, ppl: 18.442127 +epoch: 1, batch: 6565, sum loss: 5088.656738, avg loss: 3.104733, ppl: 22.303253 +epoch: 1, batch: 6566, sum loss: 5446.921387, avg loss: 3.014345, ppl: 20.375734 +epoch: 1, batch: 6567, sum loss: 4701.679688, avg loss: 2.889785, ppl: 17.989433 +epoch: 1, batch: 6568, sum loss: 4183.449707, avg loss: 2.788966, ppl: 16.264200 +epoch: 1, batch: 6569, sum loss: 5521.119141, avg loss: 2.872591, ppl: 17.682772 +epoch: 1, batch: 6570, sum loss: 4754.729004, avg loss: 2.696954, ppl: 14.834470 +epoch: 1, batch: 6571, sum loss: 3974.282715, avg loss: 2.637215, ppl: 13.974230 +epoch: 1, batch: 6572, sum loss: 4665.509766, avg loss: 2.896034, ppl: 18.102201 +epoch: 1, batch: 6573, sum loss: 4530.393555, avg loss: 2.704713, ppl: 14.950024 +epoch: 1, batch: 6574, sum loss: 5108.077148, avg loss: 2.887551, ppl: 17.949295 +epoch: 1, batch: 6575, sum loss: 5449.773926, avg loss: 2.992737, ppl: 19.940186 +epoch: 1, batch: 6576, sum loss: 5261.797363, avg loss: 3.095175, ppl: 22.091105 +epoch: 1, batch: 6577, sum loss: 4779.592773, avg loss: 2.796719, ppl: 16.390781 +epoch: 1, batch: 6578, sum loss: 4827.064941, avg loss: 2.994457, ppl: 19.974516 +epoch: 1, batch: 6579, sum loss: 5313.895508, avg loss: 2.750463, ppl: 15.649883 +epoch: 1, batch: 6580, sum loss: 4645.736328, avg loss: 2.843168, ppl: 17.170074 +epoch: 1, batch: 6581, sum loss: 4872.973633, avg loss: 2.776623, ppl: 16.064678 +epoch: 1, batch: 6582, sum loss: 4342.398438, avg loss: 2.638152, ppl: 13.987336 +epoch: 1, batch: 6583, sum loss: 5819.103027, avg loss: 3.157408, ppl: 23.509581 +epoch: 1, batch: 6584, sum loss: 5460.503906, avg loss: 3.036988, ppl: 20.842361 +epoch: 1, batch: 6585, sum loss: 5114.454102, avg loss: 2.745279, ppl: 15.568951 +epoch: 1, batch: 6586, sum loss: 4993.058105, avg loss: 3.015132, ppl: 20.391777 +epoch: 1, batch: 6587, sum loss: 4029.354248, avg loss: 2.581265, ppl: 13.213840 +epoch: 1, batch: 6588, sum loss: 5972.432129, avg loss: 2.927663, ppl: 18.683908 +epoch: 1, batch: 6589, sum loss: 4201.726562, avg loss: 2.642595, ppl: 14.049619 +epoch: 1, batch: 6590, sum loss: 5267.193359, avg loss: 3.018449, ppl: 20.459532 +epoch: 1, batch: 6591, sum loss: 4952.087891, avg loss: 2.660982, ppl: 14.310340 +epoch: 1, batch: 6592, sum loss: 4642.903809, avg loss: 2.620149, ppl: 13.737769 +epoch: 1, batch: 6593, sum loss: 4946.072266, avg loss: 2.892440, ppl: 18.037264 +epoch: 1, batch: 6594, sum loss: 5969.152832, avg loss: 3.045486, ppl: 21.020245 +epoch: 1, batch: 6595, sum loss: 4110.796875, avg loss: 2.498965, ppl: 12.169886 +epoch: 1, batch: 6596, sum loss: 4520.673828, avg loss: 2.678124, ppl: 14.557763 +epoch: 1, batch: 6597, sum loss: 4032.205811, avg loss: 2.654513, ppl: 14.218065 +epoch: 1, batch: 6598, sum loss: 3315.274170, avg loss: 2.586017, ppl: 13.276790 +epoch: 1, batch: 6599, sum loss: 4272.690430, avg loss: 2.716269, ppl: 15.123783 +epoch: 1, batch: 6600, sum loss: 4791.964355, avg loss: 2.781175, ppl: 16.137970 +epoch: 1, batch: 6601, sum loss: 4695.109863, avg loss: 2.721803, ppl: 15.207716 +epoch: 1, batch: 6602, sum loss: 4047.419434, avg loss: 2.787479, ppl: 16.240026 +epoch: 1, batch: 6603, sum loss: 5769.330566, avg loss: 3.001733, ppl: 20.120377 +epoch: 1, batch: 6604, sum loss: 4482.377441, avg loss: 2.980304, ppl: 19.693802 +epoch: 1, batch: 6605, sum loss: 4592.672363, avg loss: 2.940251, ppl: 18.920601 +epoch: 1, batch: 6606, sum loss: 4323.252441, avg loss: 2.807307, ppl: 16.565245 +epoch: 1, batch: 6607, sum loss: 5069.331055, avg loss: 2.890154, ppl: 17.996088 +epoch: 1, batch: 6608, sum loss: 5323.150879, avg loss: 2.954024, ppl: 19.182987 +epoch: 1, batch: 6609, sum loss: 4482.135254, avg loss: 2.607409, ppl: 13.563855 +epoch: 1, batch: 6610, sum loss: 5025.041504, avg loss: 2.633669, ppl: 13.924771 +epoch: 1, batch: 6611, sum loss: 4524.630859, avg loss: 2.680469, ppl: 14.591928 +epoch: 1, batch: 6612, sum loss: 5107.231934, avg loss: 2.750259, ppl: 15.646690 +epoch: 1, batch: 6613, sum loss: 4774.018066, avg loss: 2.798369, ppl: 16.417850 +epoch: 1, batch: 6614, sum loss: 5055.692383, avg loss: 2.935942, ppl: 18.839245 +epoch: 1, batch: 6615, sum loss: 5061.247070, avg loss: 2.765709, ppl: 15.890301 +epoch: 1, batch: 6616, sum loss: 4100.195801, avg loss: 2.759217, ppl: 15.787469 +epoch: 1, batch: 6617, sum loss: 4859.757324, avg loss: 2.812360, ppl: 16.649157 +epoch: 1, batch: 6618, sum loss: 4251.813477, avg loss: 2.697851, ppl: 14.847793 +epoch: 1, batch: 6619, sum loss: 5574.706543, avg loss: 3.033029, ppl: 20.760012 +epoch: 1, batch: 6620, sum loss: 4258.443359, avg loss: 2.392384, ppl: 10.939544 +epoch: 1, batch: 6621, sum loss: 4690.523438, avg loss: 2.617480, ppl: 13.701150 +epoch: 1, batch: 6622, sum loss: 5228.020020, avg loss: 2.773485, ppl: 16.014353 +epoch: 1, batch: 6623, sum loss: 4012.006592, avg loss: 2.664015, ppl: 14.353805 +epoch: 1, batch: 6624, sum loss: 4578.411621, avg loss: 2.969139, ppl: 19.475138 +epoch: 1, batch: 6625, sum loss: 4740.994141, avg loss: 2.866381, ppl: 17.573309 +epoch: 1, batch: 6626, sum loss: 4307.310547, avg loss: 2.743510, ppl: 15.541440 +epoch: 1, batch: 6627, sum loss: 5348.500977, avg loss: 2.917895, ppl: 18.502296 +epoch: 1, batch: 6628, sum loss: 5047.139648, avg loss: 2.997114, ppl: 20.027653 +epoch: 1, batch: 6629, sum loss: 4035.856689, avg loss: 2.676297, ppl: 14.531191 +epoch: 1, batch: 6630, sum loss: 4514.692383, avg loss: 2.816402, ppl: 16.716595 +epoch: 1, batch: 6631, sum loss: 4869.880371, avg loss: 2.980343, ppl: 19.694572 +epoch: 1, batch: 6632, sum loss: 4305.009766, avg loss: 2.551873, ppl: 12.831117 +epoch: 1, batch: 6633, sum loss: 4448.538086, avg loss: 2.649516, ppl: 14.147195 +epoch: 1, batch: 6634, sum loss: 5058.926270, avg loss: 2.858150, ppl: 17.429262 +epoch: 1, batch: 6635, sum loss: 4653.668945, avg loss: 2.665332, ppl: 14.372715 +epoch: 1, batch: 6636, sum loss: 5552.286133, avg loss: 2.943948, ppl: 18.990679 +epoch: 1, batch: 6637, sum loss: 5170.853516, avg loss: 2.888745, ppl: 17.970739 +epoch: 1, batch: 6638, sum loss: 5013.046875, avg loss: 2.772703, ppl: 16.001827 +epoch: 1, batch: 6639, sum loss: 4448.869141, avg loss: 2.763273, ppl: 15.851637 +epoch: 1, batch: 6640, sum loss: 3762.289307, avg loss: 2.594682, ppl: 13.392334 +epoch: 1, batch: 6641, sum loss: 4768.581543, avg loss: 2.700216, ppl: 14.882947 +epoch: 1, batch: 6642, sum loss: 4963.020508, avg loss: 3.082621, ppl: 21.815514 +epoch: 1, batch: 6643, sum loss: 4832.146973, avg loss: 2.942842, ppl: 18.969690 +epoch: 1, batch: 6644, sum loss: 3377.654053, avg loss: 2.560769, ppl: 12.945766 +epoch: 1, batch: 6645, sum loss: 4844.297852, avg loss: 2.716937, ppl: 15.133890 +epoch: 1, batch: 6646, sum loss: 5186.093262, avg loss: 3.004689, ppl: 20.179939 +epoch: 1, batch: 6647, sum loss: 4742.550781, avg loss: 2.843256, ppl: 17.171585 +epoch: 1, batch: 6648, sum loss: 4011.332520, avg loss: 2.413558, ppl: 11.173641 +epoch: 1, batch: 6649, sum loss: 4889.796387, avg loss: 2.790980, ppl: 16.296980 +epoch: 1, batch: 6650, sum loss: 3499.938965, avg loss: 2.473455, ppl: 11.863363 +epoch: 1, batch: 6651, sum loss: 4570.058594, avg loss: 2.937056, ppl: 18.860231 +epoch: 1, batch: 6652, sum loss: 5854.550293, avg loss: 3.062003, ppl: 21.370321 +epoch: 1, batch: 6653, sum loss: 4326.854980, avg loss: 2.800554, ppl: 16.453753 +epoch: 1, batch: 6654, sum loss: 4675.225586, avg loss: 2.861215, ppl: 17.482758 +epoch: 1, batch: 6655, sum loss: 5231.175781, avg loss: 2.930631, ppl: 18.739450 +epoch: 1, batch: 6656, sum loss: 5527.676270, avg loss: 2.888023, ppl: 17.957775 +epoch: 1, batch: 6657, sum loss: 3872.334961, avg loss: 2.567861, ppl: 13.037910 +epoch: 1, batch: 6658, sum loss: 4234.318359, avg loss: 2.394977, ppl: 10.967941 +epoch: 1, batch: 6659, sum loss: 4947.632812, avg loss: 2.638737, ppl: 13.995522 +epoch: 1, batch: 6660, sum loss: 4806.298828, avg loss: 2.883203, ppl: 17.871416 +epoch: 1, batch: 6661, sum loss: 4674.026367, avg loss: 2.805538, ppl: 16.535969 +epoch: 1, batch: 6662, sum loss: 5195.991699, avg loss: 2.998264, ppl: 20.050699 +epoch: 1, batch: 6663, sum loss: 4640.523926, avg loss: 3.036992, ppl: 20.842457 +epoch: 1, batch: 6664, sum loss: 5196.480469, avg loss: 2.935865, ppl: 18.837786 +epoch: 1, batch: 6665, sum loss: 3962.316895, avg loss: 2.749700, ppl: 15.637934 +epoch: 1, batch: 6666, sum loss: 3721.857422, avg loss: 2.358592, ppl: 10.576045 +epoch: 1, batch: 6667, sum loss: 3948.294922, avg loss: 2.578899, ppl: 13.182621 +epoch: 1, batch: 6668, sum loss: 4239.372070, avg loss: 2.469058, ppl: 11.811313 +epoch: 1, batch: 6669, sum loss: 4365.817383, avg loss: 2.689968, ppl: 14.731199 +epoch: 1, batch: 6670, sum loss: 5792.292969, avg loss: 3.015249, ppl: 20.394169 +epoch: 1, batch: 6671, sum loss: 4048.597168, avg loss: 2.488382, ppl: 12.041772 +epoch: 1, batch: 6672, sum loss: 3668.477539, avg loss: 2.519559, ppl: 12.423116 +epoch: 1, batch: 6673, sum loss: 3757.257324, avg loss: 2.414690, ppl: 11.186299 +epoch: 1, batch: 6674, sum loss: 5566.889648, avg loss: 2.850430, ppl: 17.295218 +epoch: 1, batch: 6675, sum loss: 4735.388672, avg loss: 2.652879, ppl: 14.194847 +epoch: 1, batch: 6676, sum loss: 5173.455566, avg loss: 2.969837, ppl: 19.488737 +epoch: 1, batch: 6677, sum loss: 4784.109375, avg loss: 3.043327, ppl: 20.974903 +epoch: 1, batch: 6678, sum loss: 4513.493652, avg loss: 2.878504, ppl: 17.787636 +epoch: 1, batch: 6679, sum loss: 5018.556641, avg loss: 2.866109, ppl: 17.568523 +epoch: 1, batch: 6680, sum loss: 5457.642090, avg loss: 2.997058, ppl: 20.026525 +epoch: 1, batch: 6681, sum loss: 4412.155762, avg loss: 2.607657, ppl: 13.567225 +epoch: 1, batch: 6682, sum loss: 6108.946777, avg loss: 2.982884, ppl: 19.744682 +epoch: 1, batch: 6683, sum loss: 5292.438965, avg loss: 3.041631, ppl: 20.939377 +epoch: 1, batch: 6684, sum loss: 4344.571289, avg loss: 2.703529, ppl: 14.932337 +epoch: 1, batch: 6685, sum loss: 3978.178223, avg loss: 2.387862, ppl: 10.890185 +epoch: 1, batch: 6686, sum loss: 3882.728027, avg loss: 2.392315, ppl: 10.938792 +epoch: 1, batch: 6687, sum loss: 4833.019531, avg loss: 3.009352, ppl: 20.274258 +epoch: 1, batch: 6688, sum loss: 4463.377441, avg loss: 2.775732, ppl: 16.050377 +epoch: 1, batch: 6689, sum loss: 4161.841797, avg loss: 2.821588, ppl: 16.803507 +epoch: 1, batch: 6690, sum loss: 4937.049805, avg loss: 2.798781, ppl: 16.424616 +epoch: 1, batch: 6691, sum loss: 4992.783691, avg loss: 2.803360, ppl: 16.499990 +epoch: 1, batch: 6692, sum loss: 4271.890625, avg loss: 2.747197, ppl: 15.598842 +epoch: 1, batch: 6693, sum loss: 4727.666504, avg loss: 2.769576, ppl: 15.951870 +epoch: 1, batch: 6694, sum loss: 3946.559326, avg loss: 2.524990, ppl: 12.490771 +epoch: 1, batch: 6695, sum loss: 3967.489258, avg loss: 2.541633, ppl: 12.700396 +epoch: 1, batch: 6696, sum loss: 4275.513184, avg loss: 2.712889, ppl: 15.072757 +epoch: 1, batch: 6697, sum loss: 4746.179199, avg loss: 2.994435, ppl: 19.974068 +epoch: 1, batch: 6698, sum loss: 4387.796875, avg loss: 2.785903, ppl: 16.214449 +epoch: 1, batch: 6699, sum loss: 4150.478027, avg loss: 2.569955, ppl: 13.065241 +epoch: 1, batch: 6700, sum loss: 5134.393555, avg loss: 3.177224, ppl: 23.980085 +epoch: 1, batch: 6701, sum loss: 4432.875977, avg loss: 2.727924, ppl: 15.301084 +epoch: 1, batch: 6702, sum loss: 4961.687012, avg loss: 2.951628, ppl: 19.137087 +epoch: 1, batch: 6703, sum loss: 5138.518555, avg loss: 2.792673, ppl: 16.324598 +epoch: 1, batch: 6704, sum loss: 5095.645508, avg loss: 2.781466, ppl: 16.142670 +epoch: 1, batch: 6705, sum loss: 5566.786133, avg loss: 2.934521, ppl: 18.812489 +epoch: 1, batch: 6706, sum loss: 4757.643555, avg loss: 2.938631, ppl: 18.889969 +epoch: 1, batch: 6707, sum loss: 3985.114502, avg loss: 2.812360, ppl: 16.649168 +epoch: 1, batch: 6708, sum loss: 5354.924805, avg loss: 2.816899, ppl: 16.724903 +epoch: 1, batch: 6709, sum loss: 4387.714355, avg loss: 2.907697, ppl: 18.314571 +epoch: 1, batch: 6710, sum loss: 5118.048828, avg loss: 3.090609, ppl: 21.990467 +epoch: 1, batch: 6711, sum loss: 4107.797852, avg loss: 2.660491, ppl: 14.303310 +epoch: 1, batch: 6712, sum loss: 5667.993164, avg loss: 3.129759, ppl: 22.868469 +epoch: 1, batch: 6713, sum loss: 4683.079102, avg loss: 2.934260, ppl: 18.807583 +epoch: 1, batch: 6714, sum loss: 4919.263672, avg loss: 2.912530, ppl: 18.403307 +epoch: 1, batch: 6715, sum loss: 3905.434814, avg loss: 2.513150, ppl: 12.343748 +epoch: 1, batch: 6716, sum loss: 4705.169434, avg loss: 2.832733, ppl: 16.991835 +epoch: 1, batch: 6717, sum loss: 4732.784668, avg loss: 2.698281, ppl: 14.854173 +epoch: 1, batch: 6718, sum loss: 4002.837891, avg loss: 2.684667, ppl: 14.653316 +epoch: 1, batch: 6719, sum loss: 4303.267090, avg loss: 2.471721, ppl: 11.842815 +epoch: 1, batch: 6720, sum loss: 5236.909180, avg loss: 2.980597, ppl: 19.699574 +epoch: 1, batch: 6721, sum loss: 3921.025879, avg loss: 2.605333, ppl: 13.535730 +epoch: 1, batch: 6722, sum loss: 4371.578125, avg loss: 2.639842, ppl: 14.010987 +epoch: 1, batch: 6723, sum loss: 4913.188477, avg loss: 2.732585, ppl: 15.372577 +epoch: 1, batch: 6724, sum loss: 4705.525391, avg loss: 2.867474, ppl: 17.592529 +epoch: 1, batch: 6725, sum loss: 4518.801270, avg loss: 2.827785, ppl: 16.907976 +epoch: 1, batch: 6726, sum loss: 4641.225098, avg loss: 2.838670, ppl: 17.093014 +epoch: 1, batch: 6727, sum loss: 4556.830566, avg loss: 2.589108, ppl: 13.317890 +epoch: 1, batch: 6728, sum loss: 5117.092285, avg loss: 2.808503, ppl: 16.585070 +epoch: 1, batch: 6729, sum loss: 4552.939453, avg loss: 2.627201, ppl: 13.834992 +epoch: 1, batch: 6730, sum loss: 4904.284668, avg loss: 2.838128, ppl: 17.083748 +epoch: 1, batch: 6731, sum loss: 4013.510742, avg loss: 2.663245, ppl: 14.342762 +epoch: 1, batch: 6732, sum loss: 4153.507812, avg loss: 2.640501, ppl: 14.020223 +epoch: 1, batch: 6733, sum loss: 5608.652344, avg loss: 2.831223, ppl: 16.966190 +epoch: 1, batch: 6734, sum loss: 4637.953613, avg loss: 2.607057, ppl: 13.559082 +epoch: 1, batch: 6735, sum loss: 4797.371094, avg loss: 2.817012, ppl: 16.726793 +epoch: 1, batch: 6736, sum loss: 3664.066162, avg loss: 2.415337, ppl: 11.193541 +epoch: 1, batch: 6737, sum loss: 5164.359375, avg loss: 2.776537, ppl: 16.063305 +epoch: 1, batch: 6738, sum loss: 4316.700195, avg loss: 2.554260, ppl: 12.861785 +epoch: 1, batch: 6739, sum loss: 5158.812012, avg loss: 2.840755, ppl: 17.128700 +epoch: 1, batch: 6740, sum loss: 4298.591797, avg loss: 2.476147, ppl: 11.895349 +epoch: 1, batch: 6741, sum loss: 3375.576904, avg loss: 2.327984, ppl: 10.257243 +epoch: 1, batch: 6742, sum loss: 4272.640625, avg loss: 2.627700, ppl: 13.841902 +epoch: 1, batch: 6743, sum loss: 3852.920898, avg loss: 2.289317, ppl: 9.868196 +epoch: 1, batch: 6744, sum loss: 4800.287598, avg loss: 2.761961, ppl: 15.830853 +epoch: 1, batch: 6745, sum loss: 5069.542480, avg loss: 2.762694, ppl: 15.842460 +epoch: 1, batch: 6746, sum loss: 4131.823242, avg loss: 2.782372, ppl: 16.157309 +epoch: 1, batch: 6747, sum loss: 5118.687012, avg loss: 2.900106, ppl: 18.176071 +epoch: 1, batch: 6748, sum loss: 4497.121094, avg loss: 2.720581, ppl: 15.189149 +epoch: 1, batch: 6749, sum loss: 4347.929688, avg loss: 2.731112, ppl: 15.349939 +epoch: 1, batch: 6750, sum loss: 4700.487793, avg loss: 2.878437, ppl: 17.786451 +epoch: 1, batch: 6751, sum loss: 5326.050293, avg loss: 2.900899, ppl: 18.190485 +epoch: 1, batch: 6752, sum loss: 2958.012207, avg loss: 2.178212, ppl: 8.830504 +epoch: 1, batch: 6753, sum loss: 4438.735352, avg loss: 2.893569, ppl: 18.057646 +epoch: 1, batch: 6754, sum loss: 4504.428223, avg loss: 2.698879, ppl: 14.863054 +epoch: 1, batch: 6755, sum loss: 4368.134766, avg loss: 2.742081, ppl: 15.519246 +epoch: 1, batch: 6756, sum loss: 4432.330566, avg loss: 2.630463, ppl: 13.880197 +epoch: 1, batch: 6757, sum loss: 4459.115234, avg loss: 2.629195, ppl: 13.862613 +epoch: 1, batch: 6758, sum loss: 4829.500488, avg loss: 2.656491, ppl: 14.246208 +epoch: 1, batch: 6759, sum loss: 4355.524414, avg loss: 2.977119, ppl: 19.631172 +epoch: 1, batch: 6760, sum loss: 4440.156250, avg loss: 2.635108, ppl: 13.944812 +epoch: 1, batch: 6761, sum loss: 5323.746094, avg loss: 3.016287, ppl: 20.415346 +epoch: 1, batch: 6762, sum loss: 4275.581543, avg loss: 2.523956, ppl: 12.477862 +epoch: 1, batch: 6763, sum loss: 5265.852051, avg loss: 3.106697, ppl: 22.347116 +epoch: 1, batch: 6764, sum loss: 5975.783203, avg loss: 2.927870, ppl: 18.687784 +epoch: 1, batch: 6765, sum loss: 4184.661133, avg loss: 2.670492, ppl: 14.447078 +epoch: 1, batch: 6766, sum loss: 4133.523438, avg loss: 2.509729, ppl: 12.301592 +epoch: 1, batch: 6767, sum loss: 5134.046875, avg loss: 2.935418, ppl: 18.829380 +epoch: 1, batch: 6768, sum loss: 4591.122070, avg loss: 2.681730, ppl: 14.610348 +epoch: 1, batch: 6769, sum loss: 4532.888672, avg loss: 2.840156, ppl: 17.118433 +epoch: 1, batch: 6770, sum loss: 4297.150879, avg loss: 2.827073, ppl: 16.895935 +epoch: 1, batch: 6771, sum loss: 3925.906006, avg loss: 2.480042, ppl: 11.941760 +epoch: 1, batch: 6772, sum loss: 5110.302246, avg loss: 2.754880, ppl: 15.719153 +epoch: 1, batch: 6773, sum loss: 4348.402832, avg loss: 2.548888, ppl: 12.792870 +epoch: 1, batch: 6774, sum loss: 4087.479004, avg loss: 2.620179, ppl: 13.738182 +epoch: 1, batch: 6775, sum loss: 4241.408691, avg loss: 2.722342, ppl: 15.215917 +epoch: 1, batch: 6776, sum loss: 4286.838867, avg loss: 2.553210, ppl: 12.848275 +epoch: 1, batch: 6777, sum loss: 4885.647949, avg loss: 2.955625, ppl: 19.213724 +epoch: 1, batch: 6778, sum loss: 5265.341797, avg loss: 2.907423, ppl: 18.309546 +epoch: 1, batch: 6779, sum loss: 4271.385254, avg loss: 2.496426, ppl: 12.139035 +epoch: 1, batch: 6780, sum loss: 5099.434082, avg loss: 2.782015, ppl: 16.151539 +epoch: 1, batch: 6781, sum loss: 4043.490479, avg loss: 2.525603, ppl: 12.498430 +epoch: 1, batch: 6782, sum loss: 4327.487305, avg loss: 2.569767, ppl: 13.062780 +epoch: 1, batch: 6783, sum loss: 4754.306641, avg loss: 2.806556, ppl: 16.552820 +epoch: 1, batch: 6784, sum loss: 3544.889648, avg loss: 2.539319, ppl: 12.671042 +epoch: 1, batch: 6785, sum loss: 4673.623047, avg loss: 3.060657, ppl: 21.341570 +epoch: 1, batch: 6786, sum loss: 4880.202148, avg loss: 2.747862, ppl: 15.609218 +epoch: 1, batch: 6787, sum loss: 4845.500977, avg loss: 2.875668, ppl: 17.737274 +epoch: 1, batch: 6788, sum loss: 3704.169189, avg loss: 2.701801, ppl: 14.906555 +epoch: 1, batch: 6789, sum loss: 5137.073242, avg loss: 2.794925, ppl: 16.361408 +epoch: 1, batch: 6790, sum loss: 4370.806152, avg loss: 2.724941, ppl: 15.255522 +epoch: 1, batch: 6791, sum loss: 4830.426270, avg loss: 2.892471, ppl: 18.037823 +epoch: 1, batch: 6792, sum loss: 5126.215332, avg loss: 2.669904, ppl: 14.438583 +epoch: 1, batch: 6793, sum loss: 3915.784424, avg loss: 2.594953, ppl: 13.395962 +epoch: 1, batch: 6794, sum loss: 4366.645020, avg loss: 2.758462, ppl: 15.775557 +epoch: 1, batch: 6795, sum loss: 4838.586426, avg loss: 2.827929, ppl: 16.910408 +epoch: 1, batch: 6796, sum loss: 4726.688477, avg loss: 2.685618, ppl: 14.667269 +epoch: 1, batch: 6797, sum loss: 4224.902344, avg loss: 2.734565, ppl: 15.403038 +epoch: 1, batch: 6798, sum loss: 5078.428711, avg loss: 2.971579, ppl: 19.522722 +epoch: 1, batch: 6799, sum loss: 4820.178711, avg loss: 2.926641, ppl: 18.664839 +epoch: 1, batch: 6800, sum loss: 4532.907227, avg loss: 2.779220, ppl: 16.106447 +epoch: 1, batch: 6801, sum loss: 5277.998047, avg loss: 2.930593, ppl: 18.738739 +epoch: 1, batch: 6802, sum loss: 4785.030273, avg loss: 2.783613, ppl: 16.177361 +epoch: 1, batch: 6803, sum loss: 4053.906738, avg loss: 2.625587, ppl: 13.812686 +epoch: 1, batch: 6804, sum loss: 4792.585449, avg loss: 2.738620, ppl: 15.465632 +epoch: 1, batch: 6805, sum loss: 4763.278809, avg loss: 3.030076, ppl: 20.698797 +epoch: 1, batch: 6806, sum loss: 4703.657715, avg loss: 2.804805, ppl: 16.523849 +epoch: 1, batch: 6807, sum loss: 5632.908203, avg loss: 2.963129, ppl: 19.358450 +epoch: 1, batch: 6808, sum loss: 4452.583984, avg loss: 2.789840, ppl: 16.278406 +epoch: 1, batch: 6809, sum loss: 5340.210938, avg loss: 3.051549, ppl: 21.148077 +epoch: 1, batch: 6810, sum loss: 4092.543213, avg loss: 2.620066, ppl: 13.736632 +epoch: 1, batch: 6811, sum loss: 4563.679199, avg loss: 2.843414, ppl: 17.174295 +epoch: 1, batch: 6812, sum loss: 3959.702637, avg loss: 2.655736, ppl: 14.235462 +epoch: 1, batch: 6813, sum loss: 5270.221680, avg loss: 2.982582, ppl: 19.738708 +epoch: 1, batch: 6814, sum loss: 4759.754883, avg loss: 2.650198, ppl: 14.156837 +epoch: 1, batch: 6815, sum loss: 3688.326172, avg loss: 2.531452, ppl: 12.571753 +epoch: 1, batch: 6816, sum loss: 3257.308838, avg loss: 2.618415, ppl: 13.713975 +epoch: 1, batch: 6817, sum loss: 4480.147461, avg loss: 2.555703, ppl: 12.880354 +epoch: 1, batch: 6818, sum loss: 3942.240479, avg loss: 2.624661, ppl: 13.799891 +epoch: 1, batch: 6819, sum loss: 4253.764160, avg loss: 2.939713, ppl: 18.910410 +epoch: 1, batch: 6820, sum loss: 4107.559570, avg loss: 2.651749, ppl: 14.178818 +epoch: 1, batch: 6821, sum loss: 5002.720703, avg loss: 3.013687, ppl: 20.362341 +epoch: 1, batch: 6822, sum loss: 5096.084473, avg loss: 2.898797, ppl: 18.152287 +epoch: 1, batch: 6823, sum loss: 5155.844727, avg loss: 2.818942, ppl: 16.759111 +epoch: 1, batch: 6824, sum loss: 4618.463379, avg loss: 2.696126, ppl: 14.822195 +epoch: 1, batch: 6825, sum loss: 4647.121094, avg loss: 2.698677, ppl: 14.860053 +epoch: 1, batch: 6826, sum loss: 4640.955078, avg loss: 2.661098, ppl: 14.311996 +epoch: 1, batch: 6827, sum loss: 5315.647461, avg loss: 3.018539, ppl: 20.461380 +epoch: 1, batch: 6828, sum loss: 4437.745605, avg loss: 2.746130, ppl: 15.582208 +epoch: 1, batch: 6829, sum loss: 5696.914062, avg loss: 2.950240, ppl: 19.110546 +epoch: 1, batch: 6830, sum loss: 5272.138184, avg loss: 3.029964, ppl: 20.696497 +epoch: 1, batch: 6831, sum loss: 4236.957520, avg loss: 2.613792, ppl: 13.650723 +epoch: 1, batch: 6832, sum loss: 4012.800537, avg loss: 2.542966, ppl: 12.717337 +epoch: 1, batch: 6833, sum loss: 4289.952148, avg loss: 2.482611, ppl: 11.972486 +epoch: 1, batch: 6834, sum loss: 4666.900879, avg loss: 2.740400, ppl: 15.493178 +epoch: 1, batch: 6835, sum loss: 4894.976562, avg loss: 2.779657, ppl: 16.113499 +epoch: 1, batch: 6836, sum loss: 4013.895020, avg loss: 2.596310, ppl: 13.414147 +epoch: 1, batch: 6837, sum loss: 3950.177002, avg loss: 2.522463, ppl: 12.459244 +epoch: 1, batch: 6838, sum loss: 4304.420898, avg loss: 2.695317, ppl: 14.810210 +epoch: 1, batch: 6839, sum loss: 4769.624023, avg loss: 3.011126, ppl: 20.310263 +epoch: 1, batch: 6840, sum loss: 5535.359863, avg loss: 2.857697, ppl: 17.421364 +epoch: 1, batch: 6841, sum loss: 5375.397461, avg loss: 2.857734, ppl: 17.422003 +epoch: 1, batch: 6842, sum loss: 4546.184082, avg loss: 2.468070, ppl: 11.799646 +epoch: 1, batch: 6843, sum loss: 5552.793457, avg loss: 3.032656, ppl: 20.752287 +epoch: 1, batch: 6844, sum loss: 4596.137207, avg loss: 2.664428, ppl: 14.359727 +epoch: 1, batch: 6845, sum loss: 3882.734863, avg loss: 2.685156, ppl: 14.660482 +epoch: 1, batch: 6846, sum loss: 5025.066406, avg loss: 2.704557, ppl: 14.947689 +epoch: 1, batch: 6847, sum loss: 4511.117188, avg loss: 2.878824, ppl: 17.793341 +epoch: 1, batch: 6848, sum loss: 4770.539551, avg loss: 2.666596, ppl: 14.390895 +epoch: 1, batch: 6849, sum loss: 5119.900391, avg loss: 2.940781, ppl: 18.930632 +epoch: 1, batch: 6850, sum loss: 4502.986328, avg loss: 2.873635, ppl: 17.701250 +epoch: 1, batch: 6851, sum loss: 5910.966797, avg loss: 2.948113, ppl: 19.069935 +epoch: 1, batch: 6852, sum loss: 5440.187500, avg loss: 2.782705, ppl: 16.162676 +epoch: 1, batch: 6853, sum loss: 4643.969727, avg loss: 2.707854, ppl: 14.997058 +epoch: 1, batch: 6854, sum loss: 4479.583496, avg loss: 2.592352, ppl: 13.361156 +epoch: 1, batch: 6855, sum loss: 4812.351074, avg loss: 2.753061, ppl: 15.690592 +epoch: 1, batch: 6856, sum loss: 5344.842285, avg loss: 2.691260, ppl: 14.750248 +epoch: 1, batch: 6857, sum loss: 5355.571289, avg loss: 2.904323, ppl: 18.252880 +epoch: 1, batch: 6858, sum loss: 4832.250488, avg loss: 3.039151, ppl: 20.887506 +epoch: 1, batch: 6859, sum loss: 4347.400879, avg loss: 2.623658, ppl: 13.786057 +epoch: 1, batch: 6860, sum loss: 5443.030273, avg loss: 2.950152, ppl: 19.108856 +epoch: 1, batch: 6861, sum loss: 3569.068359, avg loss: 2.223719, ppl: 9.241633 +epoch: 1, batch: 6862, sum loss: 5496.286621, avg loss: 2.925113, ppl: 18.636326 +epoch: 1, batch: 6863, sum loss: 4636.960938, avg loss: 2.678776, ppl: 14.567249 +epoch: 1, batch: 6864, sum loss: 4529.356445, avg loss: 2.830848, ppl: 16.959833 +epoch: 1, batch: 6865, sum loss: 5123.003418, avg loss: 2.764708, ppl: 15.874401 +epoch: 1, batch: 6866, sum loss: 4097.238770, avg loss: 2.715201, ppl: 15.107652 +epoch: 1, batch: 6867, sum loss: 4331.212402, avg loss: 2.799750, ppl: 16.440529 +epoch: 1, batch: 6868, sum loss: 4679.833008, avg loss: 2.648463, ppl: 14.132294 +epoch: 1, batch: 6869, sum loss: 4723.915527, avg loss: 2.835483, ppl: 17.038633 +epoch: 1, batch: 6870, sum loss: 4845.941406, avg loss: 2.869119, ppl: 17.621481 +epoch: 1, batch: 6871, sum loss: 4537.774414, avg loss: 2.813251, ppl: 16.664013 +epoch: 1, batch: 6872, sum loss: 4780.928223, avg loss: 2.615387, ppl: 13.672513 +epoch: 1, batch: 6873, sum loss: 4331.437012, avg loss: 2.690334, ppl: 14.736591 +epoch: 1, batch: 6874, sum loss: 3918.860107, avg loss: 2.447758, ppl: 11.562391 +epoch: 1, batch: 6875, sum loss: 4546.992676, avg loss: 2.874205, ppl: 17.711340 +epoch: 1, batch: 6876, sum loss: 5162.044434, avg loss: 2.999445, ppl: 20.074387 +epoch: 1, batch: 6877, sum loss: 3621.034668, avg loss: 2.396449, ppl: 10.984103 +epoch: 1, batch: 6878, sum loss: 3630.395020, avg loss: 2.531656, ppl: 12.574315 +epoch: 1, batch: 6879, sum loss: 4412.476562, avg loss: 2.893427, ppl: 18.055084 +epoch: 1, batch: 6880, sum loss: 4790.626953, avg loss: 2.658506, ppl: 14.274941 +epoch: 1, batch: 6881, sum loss: 4625.736328, avg loss: 2.995943, ppl: 20.004221 +epoch: 1, batch: 6882, sum loss: 3577.421387, avg loss: 2.588583, ppl: 13.310893 +epoch: 1, batch: 6883, sum loss: 4490.711914, avg loss: 2.753349, ppl: 15.695105 +epoch: 1, batch: 6884, sum loss: 5400.624023, avg loss: 2.967376, ppl: 19.440840 +epoch: 1, batch: 6885, sum loss: 4634.275391, avg loss: 2.805252, ppl: 16.531235 +epoch: 1, batch: 6886, sum loss: 5051.153320, avg loss: 3.033726, ppl: 20.774488 +epoch: 1, batch: 6887, sum loss: 4837.117676, avg loss: 2.825419, ppl: 16.868015 +epoch: 1, batch: 6888, sum loss: 5823.579590, avg loss: 2.948648, ppl: 19.080139 +epoch: 1, batch: 6889, sum loss: 4388.666992, avg loss: 2.885383, ppl: 17.910419 +epoch: 1, batch: 6890, sum loss: 3745.074951, avg loss: 2.498382, ppl: 12.162803 +epoch: 1, batch: 6891, sum loss: 4972.620605, avg loss: 2.841497, ppl: 17.141415 +epoch: 1, batch: 6892, sum loss: 3927.453369, avg loss: 2.664487, ppl: 14.360576 +epoch: 1, batch: 6893, sum loss: 4583.973145, avg loss: 2.863194, ppl: 17.517384 +epoch: 1, batch: 6894, sum loss: 4061.708740, avg loss: 2.652978, ppl: 14.196248 +epoch: 1, batch: 6895, sum loss: 4711.497559, avg loss: 2.774734, ppl: 16.034353 +epoch: 1, batch: 6896, sum loss: 4090.024414, avg loss: 2.578830, ppl: 13.181706 +epoch: 1, batch: 6897, sum loss: 4220.383301, avg loss: 2.769280, ppl: 15.947155 +epoch: 1, batch: 6898, sum loss: 4990.603027, avg loss: 2.869812, ppl: 17.633703 +epoch: 1, batch: 6899, sum loss: 4299.472656, avg loss: 2.754307, ppl: 15.710147 +epoch: 1, batch: 6900, sum loss: 4265.154785, avg loss: 2.616659, ppl: 13.689915 +epoch: 1, batch: 6901, sum loss: 5519.756836, avg loss: 2.851114, ppl: 17.307053 +epoch: 1, batch: 6902, sum loss: 4380.100586, avg loss: 2.959527, ppl: 19.288855 +epoch: 1, batch: 6903, sum loss: 4940.358887, avg loss: 2.747697, ppl: 15.606643 +epoch: 1, batch: 6904, sum loss: 5311.269531, avg loss: 2.874063, ppl: 17.708832 +epoch: 1, batch: 6905, sum loss: 5238.785645, avg loss: 3.028200, ppl: 20.660004 +epoch: 1, batch: 6906, sum loss: 4596.889648, avg loss: 2.830597, ppl: 16.955580 +epoch: 1, batch: 6907, sum loss: 4155.106445, avg loss: 2.749905, ppl: 15.641144 +epoch: 1, batch: 6908, sum loss: 4421.122559, avg loss: 2.641053, ppl: 14.027966 +epoch: 1, batch: 6909, sum loss: 3310.936523, avg loss: 2.394025, ppl: 10.957510 +epoch: 1, batch: 6910, sum loss: 5690.512207, avg loss: 3.061061, ppl: 21.350191 +epoch: 1, batch: 6911, sum loss: 5404.074707, avg loss: 2.936997, ppl: 18.859125 +epoch: 1, batch: 6912, sum loss: 4310.202637, avg loss: 2.655701, ppl: 14.234961 +epoch: 1, batch: 6913, sum loss: 4839.179688, avg loss: 2.752662, ppl: 15.684327 +epoch: 1, batch: 6914, sum loss: 4842.292969, avg loss: 2.757570, ppl: 15.761497 +epoch: 1, batch: 6915, sum loss: 4214.415039, avg loss: 2.878699, ppl: 17.791109 +epoch: 1, batch: 6916, sum loss: 4970.961914, avg loss: 3.154164, ppl: 23.433430 +epoch: 1, batch: 6917, sum loss: 4334.894531, avg loss: 2.665987, ppl: 14.382134 +epoch: 1, batch: 6918, sum loss: 4529.980957, avg loss: 2.607934, ppl: 13.570981 +epoch: 1, batch: 6919, sum loss: 4514.328125, avg loss: 2.786623, ppl: 16.226124 +epoch: 1, batch: 6920, sum loss: 5466.135254, avg loss: 2.843983, ppl: 17.184076 +epoch: 1, batch: 6921, sum loss: 3751.177246, avg loss: 2.507472, ppl: 12.273857 +epoch: 1, batch: 6922, sum loss: 4790.770996, avg loss: 2.756485, ppl: 15.744404 +epoch: 1, batch: 6923, sum loss: 4739.170410, avg loss: 2.733085, ppl: 15.380268 +epoch: 1, batch: 6924, sum loss: 4139.004395, avg loss: 2.588495, ppl: 13.309731 +epoch: 1, batch: 6925, sum loss: 5085.531250, avg loss: 2.661188, ppl: 14.313289 +epoch: 1, batch: 6926, sum loss: 4216.506348, avg loss: 2.607611, ppl: 13.566597 +epoch: 1, batch: 6927, sum loss: 4970.033203, avg loss: 3.131716, ppl: 22.913265 +epoch: 1, batch: 6928, sum loss: 5479.974609, avg loss: 2.749611, ppl: 15.636546 +epoch: 1, batch: 6929, sum loss: 4278.510254, avg loss: 2.805581, ppl: 16.536674 +epoch: 1, batch: 6930, sum loss: 3894.510254, avg loss: 2.557131, ppl: 12.898755 +epoch: 1, batch: 6931, sum loss: 4876.171875, avg loss: 2.825129, ppl: 16.863113 +epoch: 1, batch: 6932, sum loss: 4500.774414, avg loss: 2.716219, ppl: 15.123030 +epoch: 1, batch: 6933, sum loss: 4197.887207, avg loss: 2.594491, ppl: 13.389776 +epoch: 1, batch: 6934, sum loss: 5931.822266, avg loss: 2.995870, ppl: 20.002747 +epoch: 1, batch: 6935, sum loss: 4333.809570, avg loss: 2.558329, ppl: 12.914221 +epoch: 1, batch: 6936, sum loss: 5645.147949, avg loss: 3.025267, ppl: 20.599497 +epoch: 1, batch: 6937, sum loss: 4488.520508, avg loss: 2.654359, ppl: 14.215869 +epoch: 1, batch: 6938, sum loss: 3855.788330, avg loss: 2.533370, ppl: 12.595877 +epoch: 1, batch: 6939, sum loss: 4168.009766, avg loss: 2.898477, ppl: 18.146484 +epoch: 1, batch: 6940, sum loss: 4724.376953, avg loss: 2.762794, ppl: 15.844042 +epoch: 1, batch: 6941, sum loss: 3900.221924, avg loss: 2.363771, ppl: 10.630965 +epoch: 1, batch: 6942, sum loss: 4535.181641, avg loss: 2.629091, ppl: 13.861165 +epoch: 1, batch: 6943, sum loss: 4361.022461, avg loss: 2.657540, ppl: 14.261158 +epoch: 1, batch: 6944, sum loss: 3718.067871, avg loss: 2.527578, ppl: 12.523143 +epoch: 1, batch: 6945, sum loss: 4833.406738, avg loss: 2.757220, ppl: 15.755980 +epoch: 1, batch: 6946, sum loss: 4173.668945, avg loss: 2.631569, ppl: 13.895560 +epoch: 1, batch: 6947, sum loss: 4255.849609, avg loss: 2.761745, ppl: 15.827445 +epoch: 1, batch: 6948, sum loss: 4612.840332, avg loss: 2.861563, ppl: 17.488848 +epoch: 1, batch: 6949, sum loss: 5479.005371, avg loss: 2.925256, ppl: 18.639006 +epoch: 1, batch: 6950, sum loss: 4996.956055, avg loss: 2.816773, ppl: 16.722801 +epoch: 1, batch: 6951, sum loss: 5267.620605, avg loss: 2.818417, ppl: 16.750307 +epoch: 1, batch: 6952, sum loss: 4407.718262, avg loss: 2.737713, ppl: 15.451609 +epoch: 1, batch: 6953, sum loss: 5171.911133, avg loss: 2.840149, ppl: 17.118315 +epoch: 1, batch: 6954, sum loss: 4483.837402, avg loss: 2.573959, ppl: 13.117660 +epoch: 1, batch: 6955, sum loss: 4429.986816, avg loss: 2.683214, ppl: 14.632051 +epoch: 1, batch: 6956, sum loss: 6126.477051, avg loss: 3.116214, ppl: 22.560808 +epoch: 1, batch: 6957, sum loss: 5329.644531, avg loss: 2.928376, ppl: 18.697245 +epoch: 1, batch: 6958, sum loss: 3352.124268, avg loss: 2.367319, ppl: 10.668755 +epoch: 1, batch: 6959, sum loss: 4494.647949, avg loss: 2.907275, ppl: 18.306852 +epoch: 1, batch: 6960, sum loss: 4456.469238, avg loss: 2.638525, ppl: 13.992553 +epoch: 1, batch: 6961, sum loss: 4147.545898, avg loss: 2.630023, ppl: 13.874085 +epoch: 1, batch: 6962, sum loss: 4728.906738, avg loss: 2.848739, ppl: 17.265999 +epoch: 1, batch: 6963, sum loss: 4272.500488, avg loss: 2.668645, ppl: 14.420415 +epoch: 1, batch: 6964, sum loss: 5505.600586, avg loss: 2.800407, ppl: 16.451344 +epoch: 1, batch: 6965, sum loss: 4811.709473, avg loss: 2.975702, ppl: 19.603376 +epoch: 1, batch: 6966, sum loss: 5059.351562, avg loss: 2.868113, ppl: 17.603773 +epoch: 1, batch: 6967, sum loss: 4836.146973, avg loss: 2.994518, ppl: 19.975735 +epoch: 1, batch: 6968, sum loss: 5020.968262, avg loss: 2.758774, ppl: 15.780481 +epoch: 1, batch: 6969, sum loss: 4448.324219, avg loss: 2.654131, ppl: 14.212636 +epoch: 1, batch: 6970, sum loss: 4355.458008, avg loss: 2.753134, ppl: 15.691730 +epoch: 1, batch: 6971, sum loss: 4138.559082, avg loss: 2.664881, ppl: 14.366233 +epoch: 1, batch: 6972, sum loss: 4573.645508, avg loss: 2.698316, ppl: 14.854693 +epoch: 1, batch: 6973, sum loss: 4887.970215, avg loss: 2.645005, ppl: 14.083522 +epoch: 1, batch: 6974, sum loss: 4594.566895, avg loss: 2.619479, ppl: 13.728575 +epoch: 1, batch: 6975, sum loss: 4085.142578, avg loss: 2.738031, ppl: 15.456527 +epoch: 1, batch: 6976, sum loss: 4954.634766, avg loss: 2.807158, ppl: 16.562788 +epoch: 1, batch: 6977, sum loss: 5112.328613, avg loss: 3.014345, ppl: 20.375734 +epoch: 1, batch: 6978, sum loss: 4178.521484, avg loss: 2.601819, ppl: 13.488252 +epoch: 1, batch: 6979, sum loss: 3752.230225, avg loss: 2.478356, ppl: 11.921645 +epoch: 1, batch: 6980, sum loss: 5329.874023, avg loss: 2.941432, ppl: 18.942944 +epoch: 1, batch: 6981, sum loss: 4335.298828, avg loss: 2.957230, ppl: 19.244587 +epoch: 1, batch: 6982, sum loss: 5517.720215, avg loss: 2.939649, ppl: 18.909201 +epoch: 1, batch: 6983, sum loss: 3877.530029, avg loss: 2.618184, ppl: 13.710800 +epoch: 1, batch: 6984, sum loss: 5473.155273, avg loss: 3.013852, ppl: 20.365696 +epoch: 1, batch: 6985, sum loss: 3640.354980, avg loss: 2.556429, ppl: 12.889708 +epoch: 1, batch: 6986, sum loss: 4112.094727, avg loss: 2.725046, ppl: 15.257122 +epoch: 1, batch: 6987, sum loss: 5757.479492, avg loss: 2.909287, ppl: 18.343719 +epoch: 1, batch: 6988, sum loss: 5152.965820, avg loss: 3.142052, ppl: 23.151335 +epoch: 1, batch: 6989, sum loss: 4516.019043, avg loss: 2.740303, ppl: 15.491675 +epoch: 1, batch: 6990, sum loss: 5265.541504, avg loss: 2.915582, ppl: 18.459557 +epoch: 1, batch: 6991, sum loss: 5447.272461, avg loss: 2.952451, ppl: 19.152843 +epoch: 1, batch: 6992, sum loss: 4026.008301, avg loss: 2.482126, ppl: 11.966679 +epoch: 1, batch: 6993, sum loss: 4650.255859, avg loss: 2.651229, ppl: 14.171447 +epoch: 1, batch: 6994, sum loss: 5570.104004, avg loss: 2.939369, ppl: 18.903913 +epoch: 1, batch: 6995, sum loss: 6158.163086, avg loss: 2.953556, ppl: 19.174007 +epoch: 1, batch: 6996, sum loss: 5146.826660, avg loss: 2.911101, ppl: 18.377022 +epoch: 1, batch: 6997, sum loss: 4580.154297, avg loss: 2.829002, ppl: 16.928560 +epoch: 1, batch: 6998, sum loss: 4868.740234, avg loss: 2.698858, ppl: 14.862753 +epoch: 1, batch: 6999, sum loss: 5424.756836, avg loss: 2.967591, ppl: 19.445026 +epoch: 1, batch: 7000, sum loss: 4274.447266, avg loss: 2.455168, ppl: 11.648391 +epoch: 1, batch: 7001, sum loss: 5712.518066, avg loss: 2.888027, ppl: 17.957844 +epoch: 1, batch: 7002, sum loss: 4125.311035, avg loss: 2.815912, ppl: 16.708406 +epoch: 1, batch: 7003, sum loss: 4644.142578, avg loss: 2.812927, ppl: 16.658611 +epoch: 1, batch: 7004, sum loss: 4754.580078, avg loss: 2.693813, ppl: 14.787956 +epoch: 1, batch: 7005, sum loss: 6990.253906, avg loss: 3.171622, ppl: 23.846121 +epoch: 1, batch: 7006, sum loss: 4832.767090, avg loss: 2.822878, ppl: 16.825203 +epoch: 1, batch: 7007, sum loss: 4938.359375, avg loss: 2.915206, ppl: 18.452618 +epoch: 1, batch: 7008, sum loss: 4339.834473, avg loss: 2.912641, ppl: 18.405340 +epoch: 1, batch: 7009, sum loss: 3831.731934, avg loss: 2.564747, ppl: 12.997368 +epoch: 1, batch: 7010, sum loss: 4529.739746, avg loss: 2.775576, ppl: 16.047867 +epoch: 1, batch: 7011, sum loss: 4698.136230, avg loss: 2.717256, ppl: 15.138729 +epoch: 1, batch: 7012, sum loss: 4383.544922, avg loss: 2.607701, ppl: 13.567817 +epoch: 1, batch: 7013, sum loss: 4197.236328, avg loss: 2.598908, ppl: 13.449043 +epoch: 1, batch: 7014, sum loss: 4782.318848, avg loss: 2.724968, ppl: 15.255925 +epoch: 1, batch: 7015, sum loss: 4709.376465, avg loss: 2.971215, ppl: 19.515627 +epoch: 1, batch: 7016, sum loss: 4476.678711, avg loss: 2.831549, ppl: 16.971724 +epoch: 1, batch: 7017, sum loss: 4957.684570, avg loss: 2.721012, ppl: 15.195698 +epoch: 1, batch: 7018, sum loss: 4148.923828, avg loss: 2.652765, ppl: 14.193222 +epoch: 1, batch: 7019, sum loss: 5854.716797, avg loss: 3.134217, ppl: 22.970633 +epoch: 1, batch: 7020, sum loss: 5678.293945, avg loss: 2.922436, ppl: 18.586517 +epoch: 1, batch: 7021, sum loss: 5309.837402, avg loss: 2.966389, ppl: 19.421669 +epoch: 1, batch: 7022, sum loss: 4708.522949, avg loss: 2.732747, ppl: 15.375062 +epoch: 1, batch: 7023, sum loss: 4638.958008, avg loss: 2.865323, ppl: 17.554724 +epoch: 1, batch: 7024, sum loss: 4090.963867, avg loss: 2.687887, ppl: 14.700583 +epoch: 1, batch: 7025, sum loss: 4371.193359, avg loss: 2.716714, ppl: 15.130527 +epoch: 1, batch: 7026, sum loss: 3902.700195, avg loss: 2.626312, ppl: 13.822701 +epoch: 1, batch: 7027, sum loss: 5074.236816, avg loss: 2.897908, ppl: 18.136164 +epoch: 1, batch: 7028, sum loss: 4809.867188, avg loss: 2.906264, ppl: 18.288351 +epoch: 1, batch: 7029, sum loss: 5089.516602, avg loss: 2.794902, ppl: 16.361027 +epoch: 1, batch: 7030, sum loss: 4800.844727, avg loss: 2.670103, ppl: 14.441451 +epoch: 1, batch: 7031, sum loss: 4892.902344, avg loss: 2.647674, ppl: 14.121160 +epoch: 1, batch: 7032, sum loss: 5639.480957, avg loss: 2.949519, ppl: 19.096769 +epoch: 1, batch: 7033, sum loss: 4833.232422, avg loss: 2.838069, ppl: 17.082754 +epoch: 1, batch: 7034, sum loss: 4130.163086, avg loss: 2.715426, ppl: 15.111049 +epoch: 1, batch: 7035, sum loss: 5522.449219, avg loss: 2.980275, ppl: 19.693226 +epoch: 1, batch: 7036, sum loss: 5215.399902, avg loss: 2.772674, ppl: 16.001366 +epoch: 1, batch: 7037, sum loss: 5156.416016, avg loss: 2.966868, ppl: 19.430960 +epoch: 1, batch: 7038, sum loss: 4956.505371, avg loss: 2.787686, ppl: 16.243387 +epoch: 1, batch: 7039, sum loss: 4198.366211, avg loss: 2.606062, ppl: 13.545609 +epoch: 1, batch: 7040, sum loss: 5489.108398, avg loss: 2.967086, ppl: 19.435196 +epoch: 1, batch: 7041, sum loss: 3832.271240, avg loss: 2.708319, ppl: 15.004028 +epoch: 1, batch: 7042, sum loss: 4018.107910, avg loss: 2.474204, ppl: 11.872256 +epoch: 1, batch: 7043, sum loss: 4624.067871, avg loss: 2.734517, ppl: 15.402300 +epoch: 1, batch: 7044, sum loss: 4544.666504, avg loss: 2.872735, ppl: 17.685318 +epoch: 1, batch: 7045, sum loss: 4329.347656, avg loss: 2.584685, ppl: 13.259116 +epoch: 1, batch: 7046, sum loss: 4954.268066, avg loss: 2.750843, ppl: 15.655821 +epoch: 1, batch: 7047, sum loss: 4316.006836, avg loss: 2.585984, ppl: 13.276343 +epoch: 1, batch: 7048, sum loss: 5028.223633, avg loss: 2.848852, ppl: 17.267946 +epoch: 1, batch: 7049, sum loss: 5316.220703, avg loss: 2.818781, ppl: 16.756411 +epoch: 1, batch: 7050, sum loss: 3862.275635, avg loss: 2.614946, ppl: 13.666483 +epoch: 1, batch: 7051, sum loss: 4744.583984, avg loss: 2.619870, ppl: 13.733937 +epoch: 1, batch: 7052, sum loss: 4725.928223, avg loss: 2.910054, ppl: 18.357794 +epoch: 1, batch: 7053, sum loss: 4270.649414, avg loss: 2.592987, ppl: 13.369645 +epoch: 1, batch: 7054, sum loss: 4665.009766, avg loss: 2.879636, ppl: 17.807787 +epoch: 1, batch: 7055, sum loss: 4011.637695, avg loss: 2.701439, ppl: 14.901165 +epoch: 1, batch: 7056, sum loss: 3776.051758, avg loss: 2.801225, ppl: 16.464811 +epoch: 1, batch: 7057, sum loss: 4997.641602, avg loss: 2.863978, ppl: 17.531126 +epoch: 1, batch: 7058, sum loss: 5481.722656, avg loss: 2.853578, ppl: 17.349747 +epoch: 1, batch: 7059, sum loss: 5527.823242, avg loss: 2.880575, ppl: 17.824514 +epoch: 1, batch: 7060, sum loss: 4366.380371, avg loss: 2.712038, ppl: 15.059930 +epoch: 1, batch: 7061, sum loss: 5086.136230, avg loss: 2.811574, ppl: 16.636074 +epoch: 1, batch: 7062, sum loss: 4807.698730, avg loss: 2.737870, ppl: 15.454025 +epoch: 1, batch: 7063, sum loss: 3797.958252, avg loss: 2.695499, ppl: 14.812911 +epoch: 1, batch: 7064, sum loss: 5521.054688, avg loss: 2.918105, ppl: 18.506184 +epoch: 1, batch: 7065, sum loss: 4841.367188, avg loss: 2.611309, ppl: 13.616864 +epoch: 1, batch: 7066, sum loss: 5641.705078, avg loss: 2.991360, ppl: 19.912748 +epoch: 1, batch: 7067, sum loss: 4402.016113, avg loss: 2.896063, ppl: 18.102741 +epoch: 1, batch: 7068, sum loss: 5575.707031, avg loss: 2.942326, ppl: 18.959888 +epoch: 1, batch: 7069, sum loss: 4766.165039, avg loss: 2.855701, ppl: 17.386625 +epoch: 1, batch: 7070, sum loss: 4347.150879, avg loss: 2.572278, ppl: 13.095629 +epoch: 1, batch: 7071, sum loss: 4404.934570, avg loss: 2.811062, ppl: 16.627573 +epoch: 1, batch: 7072, sum loss: 4196.415527, avg loss: 2.640916, ppl: 14.026047 +epoch: 1, batch: 7073, sum loss: 4405.394043, avg loss: 2.530381, ppl: 12.558295 +epoch: 1, batch: 7074, sum loss: 5091.741211, avg loss: 3.027194, ppl: 20.639248 +epoch: 1, batch: 7075, sum loss: 5348.935059, avg loss: 2.888194, ppl: 17.960840 +epoch: 1, batch: 7076, sum loss: 4317.979004, avg loss: 2.809355, ppl: 16.599213 +epoch: 1, batch: 7077, sum loss: 5266.841797, avg loss: 3.131297, ppl: 22.903669 +epoch: 1, batch: 7078, sum loss: 4393.479492, avg loss: 2.712024, ppl: 15.059732 +epoch: 1, batch: 7079, sum loss: 4741.104492, avg loss: 3.188369, ppl: 24.248840 +epoch: 1, batch: 7080, sum loss: 4994.446777, avg loss: 2.915614, ppl: 18.460142 +epoch: 1, batch: 7081, sum loss: 4869.114258, avg loss: 2.760269, ppl: 15.804092 +epoch: 1, batch: 7082, sum loss: 5149.846680, avg loss: 3.047246, ppl: 21.057281 +epoch: 1, batch: 7083, sum loss: 4806.728027, avg loss: 2.878280, ppl: 17.783667 +epoch: 1, batch: 7084, sum loss: 4536.423828, avg loss: 2.713172, ppl: 15.077027 +epoch: 1, batch: 7085, sum loss: 3791.596680, avg loss: 2.668259, ppl: 14.414857 +epoch: 1, batch: 7086, sum loss: 4016.476318, avg loss: 2.674085, ppl: 14.499083 +epoch: 1, batch: 7087, sum loss: 5511.506348, avg loss: 2.953647, ppl: 19.175768 +epoch: 1, batch: 7088, sum loss: 4728.420410, avg loss: 2.708145, ppl: 15.001417 +epoch: 1, batch: 7089, sum loss: 4043.022949, avg loss: 2.425329, ppl: 11.305945 +epoch: 1, batch: 7090, sum loss: 4708.573242, avg loss: 2.547929, ppl: 12.780611 +epoch: 1, batch: 7091, sum loss: 4925.268555, avg loss: 2.770118, ppl: 15.960509 +epoch: 1, batch: 7092, sum loss: 4303.352539, avg loss: 2.672890, ppl: 14.481757 +epoch: 1, batch: 7093, sum loss: 4548.576172, avg loss: 2.564023, ppl: 12.987957 +epoch: 1, batch: 7094, sum loss: 4143.080566, avg loss: 2.549588, ppl: 12.801827 +epoch: 1, batch: 7095, sum loss: 4997.604492, avg loss: 2.842778, ppl: 17.163387 +epoch: 1, batch: 7096, sum loss: 4366.664551, avg loss: 2.844733, ppl: 17.196959 +epoch: 1, batch: 7097, sum loss: 4330.741211, avg loss: 2.746190, ppl: 15.583144 +epoch: 1, batch: 7098, sum loss: 5010.065918, avg loss: 2.889311, ppl: 17.980923 +epoch: 1, batch: 7099, sum loss: 4401.002441, avg loss: 2.790744, ppl: 16.293131 +epoch: 1, batch: 7100, sum loss: 4410.860352, avg loss: 2.692833, ppl: 14.773465 +epoch: 1, batch: 7101, sum loss: 4670.526367, avg loss: 2.801755, ppl: 16.473539 +epoch: 1, batch: 7102, sum loss: 5068.710449, avg loss: 2.933281, ppl: 18.789185 +epoch: 1, batch: 7103, sum loss: 3938.214111, avg loss: 2.542423, ppl: 12.710437 +epoch: 1, batch: 7104, sum loss: 3570.091797, avg loss: 2.370579, ppl: 10.703588 +epoch: 1, batch: 7105, sum loss: 4792.031250, avg loss: 2.909551, ppl: 18.348566 +epoch: 1, batch: 7106, sum loss: 5481.725098, avg loss: 3.016910, ppl: 20.428068 +epoch: 1, batch: 7107, sum loss: 4243.625977, avg loss: 2.682444, ppl: 14.620777 +epoch: 1, batch: 7108, sum loss: 5236.763672, avg loss: 3.076829, ppl: 21.689526 +epoch: 1, batch: 7109, sum loss: 4708.797852, avg loss: 2.675453, ppl: 14.518929 +epoch: 1, batch: 7110, sum loss: 4732.005371, avg loss: 2.806646, ppl: 16.554304 +epoch: 1, batch: 7111, sum loss: 3837.134521, avg loss: 2.566645, ppl: 13.022064 +epoch: 1, batch: 7112, sum loss: 4406.319824, avg loss: 2.665650, ppl: 14.377294 +epoch: 1, batch: 7113, sum loss: 4405.150391, avg loss: 2.697581, ppl: 14.843785 +epoch: 1, batch: 7114, sum loss: 5058.786133, avg loss: 2.986296, ppl: 19.812170 +epoch: 1, batch: 7115, sum loss: 4395.525391, avg loss: 2.733536, ppl: 15.387193 +epoch: 1, batch: 7116, sum loss: 5171.008789, avg loss: 2.839653, ppl: 17.109837 +epoch: 1, batch: 7117, sum loss: 4969.925781, avg loss: 2.696650, ppl: 14.829965 +epoch: 1, batch: 7118, sum loss: 5343.623535, avg loss: 2.816881, ppl: 16.724607 +epoch: 1, batch: 7119, sum loss: 4933.093750, avg loss: 2.830232, ppl: 16.949387 +epoch: 1, batch: 7120, sum loss: 4589.044922, avg loss: 2.712202, ppl: 15.062401 +epoch: 1, batch: 7121, sum loss: 3984.286377, avg loss: 2.661514, ppl: 14.317947 +epoch: 1, batch: 7122, sum loss: 4526.200195, avg loss: 2.823581, ppl: 16.837032 +epoch: 1, batch: 7123, sum loss: 4189.065430, avg loss: 2.577886, ppl: 13.169273 +epoch: 1, batch: 7124, sum loss: 3776.656006, avg loss: 2.548351, ppl: 12.786003 +epoch: 1, batch: 7125, sum loss: 4021.713135, avg loss: 2.564868, ppl: 12.998942 +epoch: 1, batch: 7126, sum loss: 5450.391113, avg loss: 2.908426, ppl: 18.327932 +epoch: 1, batch: 7127, sum loss: 4978.565430, avg loss: 2.872802, ppl: 17.686499 +epoch: 1, batch: 7128, sum loss: 3974.798828, avg loss: 2.517289, ppl: 12.394944 +epoch: 1, batch: 7129, sum loss: 4746.725586, avg loss: 2.703147, ppl: 14.926627 +epoch: 1, batch: 7130, sum loss: 5151.917969, avg loss: 3.041274, ppl: 20.931890 +epoch: 1, batch: 7131, sum loss: 4520.716309, avg loss: 2.726608, ppl: 15.280971 +epoch: 1, batch: 7132, sum loss: 5178.907715, avg loss: 2.878770, ppl: 17.792377 +epoch: 1, batch: 7133, sum loss: 4537.715820, avg loss: 2.852116, ppl: 17.324394 +epoch: 1, batch: 7134, sum loss: 4298.373047, avg loss: 2.826018, ppl: 16.878117 +epoch: 1, batch: 7135, sum loss: 5484.435059, avg loss: 2.985539, ppl: 19.797174 +epoch: 1, batch: 7136, sum loss: 4346.698242, avg loss: 2.725203, ppl: 15.259508 +epoch: 1, batch: 7137, sum loss: 4243.429688, avg loss: 2.663798, ppl: 14.350684 +epoch: 1, batch: 7138, sum loss: 5539.964355, avg loss: 3.030615, ppl: 20.709967 +epoch: 1, batch: 7139, sum loss: 5098.620605, avg loss: 2.856370, ppl: 17.398260 +epoch: 1, batch: 7140, sum loss: 3887.721191, avg loss: 2.644708, ppl: 14.079339 +epoch: 1, batch: 7141, sum loss: 4861.708008, avg loss: 2.838125, ppl: 17.083704 +epoch: 1, batch: 7142, sum loss: 4389.330078, avg loss: 2.583479, ppl: 13.243127 +epoch: 1, batch: 7143, sum loss: 3939.351807, avg loss: 2.321362, ppl: 10.189546 +epoch: 1, batch: 7144, sum loss: 4077.205078, avg loss: 2.545072, ppl: 12.744143 +epoch: 1, batch: 7145, sum loss: 4414.019043, avg loss: 2.639964, ppl: 14.012694 +epoch: 1, batch: 7146, sum loss: 4804.547852, avg loss: 2.870100, ppl: 17.638790 +epoch: 1, batch: 7147, sum loss: 5322.743164, avg loss: 3.022568, ppl: 20.543991 +epoch: 1, batch: 7148, sum loss: 4526.274902, avg loss: 2.720117, ppl: 15.182103 +epoch: 1, batch: 7149, sum loss: 5402.362793, avg loss: 2.910756, ppl: 18.370680 +epoch: 1, batch: 7150, sum loss: 4779.479004, avg loss: 2.848319, ppl: 17.258741 +epoch: 1, batch: 7151, sum loss: 4637.902832, avg loss: 2.688639, ppl: 14.711646 +epoch: 1, batch: 7152, sum loss: 5143.979004, avg loss: 3.004661, ppl: 20.179367 +epoch: 1, batch: 7153, sum loss: 3682.486328, avg loss: 2.660756, ppl: 14.307099 +epoch: 1, batch: 7154, sum loss: 4602.587402, avg loss: 2.642128, ppl: 14.043058 +epoch: 1, batch: 7155, sum loss: 4712.458496, avg loss: 2.660903, ppl: 14.309197 +epoch: 1, batch: 7156, sum loss: 5155.077148, avg loss: 3.088722, ppl: 21.949015 +epoch: 1, batch: 7157, sum loss: 4528.130859, avg loss: 2.654238, ppl: 14.214157 +epoch: 1, batch: 7158, sum loss: 5618.761719, avg loss: 3.187046, ppl: 24.216782 +epoch: 1, batch: 7159, sum loss: 4585.167480, avg loss: 2.956265, ppl: 19.226032 +epoch: 1, batch: 7160, sum loss: 5134.166016, avg loss: 3.113503, ppl: 22.499718 +epoch: 1, batch: 7161, sum loss: 4341.939453, avg loss: 2.618781, ppl: 13.718994 +epoch: 1, batch: 7162, sum loss: 5047.434570, avg loss: 2.791723, ppl: 16.309088 +epoch: 1, batch: 7163, sum loss: 4574.863281, avg loss: 2.760931, ppl: 15.814564 +epoch: 1, batch: 7164, sum loss: 5487.621094, avg loss: 2.955100, ppl: 19.203644 +epoch: 1, batch: 7165, sum loss: 4340.550781, avg loss: 2.807601, ppl: 16.570114 +epoch: 1, batch: 7166, sum loss: 5181.788574, avg loss: 2.881974, ppl: 17.849464 +epoch: 1, batch: 7167, sum loss: 4546.865723, avg loss: 2.620672, ppl: 13.744957 +epoch: 1, batch: 7168, sum loss: 4663.346680, avg loss: 2.759377, ppl: 15.789999 +epoch: 1, batch: 7169, sum loss: 4448.666992, avg loss: 2.707649, ppl: 14.993979 +epoch: 1, batch: 7170, sum loss: 4374.796387, avg loss: 2.727429, ppl: 15.293519 +epoch: 1, batch: 7171, sum loss: 4479.979004, avg loss: 2.692295, ppl: 14.765525 +epoch: 1, batch: 7172, sum loss: 3566.495605, avg loss: 2.439464, ppl: 11.466888 +epoch: 1, batch: 7173, sum loss: 5217.839355, avg loss: 2.911741, ppl: 18.388781 +epoch: 1, batch: 7174, sum loss: 4800.023438, avg loss: 2.930417, ppl: 18.735443 +epoch: 1, batch: 7175, sum loss: 5432.501953, avg loss: 2.866756, ppl: 17.579891 +epoch: 1, batch: 7176, sum loss: 3937.715820, avg loss: 2.725063, ppl: 15.257373 +epoch: 1, batch: 7177, sum loss: 5497.265137, avg loss: 2.931875, ppl: 18.762774 +epoch: 1, batch: 7178, sum loss: 3892.590332, avg loss: 2.530943, ppl: 12.565349 +epoch: 1, batch: 7179, sum loss: 4225.788086, avg loss: 2.619832, ppl: 13.733410 +epoch: 1, batch: 7180, sum loss: 4795.346680, avg loss: 2.878359, ppl: 17.785070 +epoch: 1, batch: 7181, sum loss: 3864.067383, avg loss: 2.545499, ppl: 12.749587 +epoch: 1, batch: 7182, sum loss: 4077.104248, avg loss: 2.712644, ppl: 15.069067 +epoch: 1, batch: 7183, sum loss: 5139.809570, avg loss: 2.830292, ppl: 16.950401 +epoch: 1, batch: 7184, sum loss: 4145.631836, avg loss: 2.695469, ppl: 14.812469 +epoch: 1, batch: 7185, sum loss: 3129.569336, avg loss: 2.307942, ppl: 10.053712 +epoch: 1, batch: 7186, sum loss: 3819.761719, avg loss: 2.362252, ppl: 10.614832 +epoch: 1, batch: 7187, sum loss: 4320.877441, avg loss: 2.778699, ppl: 16.098070 +epoch: 1, batch: 7188, sum loss: 3529.579346, avg loss: 2.315997, ppl: 10.135021 +epoch: 1, batch: 7189, sum loss: 5204.924805, avg loss: 3.013854, ppl: 20.365728 +epoch: 1, batch: 7190, sum loss: 3661.055664, avg loss: 2.721974, ppl: 15.210323 +epoch: 1, batch: 7191, sum loss: 4359.073242, avg loss: 2.892550, ppl: 18.039255 +epoch: 1, batch: 7192, sum loss: 5076.951172, avg loss: 2.792602, ppl: 16.323442 +epoch: 1, batch: 7193, sum loss: 4654.756836, avg loss: 2.725267, ppl: 15.260494 +epoch: 1, batch: 7194, sum loss: 5093.746094, avg loss: 2.836162, ppl: 17.050194 +epoch: 1, batch: 7195, sum loss: 4708.372559, avg loss: 2.508456, ppl: 12.285952 +epoch: 1, batch: 7196, sum loss: 4653.707520, avg loss: 2.829001, ppl: 16.928549 +epoch: 1, batch: 7197, sum loss: 3928.141602, avg loss: 2.697900, ppl: 14.848511 +epoch: 1, batch: 7198, sum loss: 4728.945312, avg loss: 2.793234, ppl: 16.333759 +epoch: 1, batch: 7199, sum loss: 4666.686523, avg loss: 2.614390, ppl: 13.658887 +epoch: 1, batch: 7200, sum loss: 4797.511719, avg loss: 2.954133, ppl: 19.185078 +epoch: 1, batch: 7201, sum loss: 5203.119141, avg loss: 3.014553, ppl: 20.379986 +epoch: 1, batch: 7202, sum loss: 4197.924805, avg loss: 2.548831, ppl: 12.792141 +epoch: 1, batch: 7203, sum loss: 4538.445312, avg loss: 2.716006, ppl: 15.119810 +epoch: 1, batch: 7204, sum loss: 4958.243652, avg loss: 2.737849, ppl: 15.453705 +epoch: 1, batch: 7205, sum loss: 4355.118652, avg loss: 2.865210, ppl: 17.552736 +epoch: 1, batch: 7206, sum loss: 5125.713867, avg loss: 2.820976, ppl: 16.793238 +epoch: 1, batch: 7207, sum loss: 4027.959473, avg loss: 2.690688, ppl: 14.741810 +epoch: 1, batch: 7208, sum loss: 4400.185059, avg loss: 2.765673, ppl: 15.889725 +epoch: 1, batch: 7209, sum loss: 4750.742188, avg loss: 2.778212, ppl: 16.090223 +epoch: 1, batch: 7210, sum loss: 4146.262695, avg loss: 2.525130, ppl: 12.492514 +epoch: 1, batch: 7211, sum loss: 4380.554688, avg loss: 2.650063, ppl: 14.154934 +epoch: 1, batch: 7212, sum loss: 4508.859863, avg loss: 2.846503, ppl: 17.227425 +epoch: 1, batch: 7213, sum loss: 4799.539062, avg loss: 2.774300, ppl: 16.027401 +epoch: 1, batch: 7214, sum loss: 5616.455078, avg loss: 2.832302, ppl: 16.984516 +epoch: 1, batch: 7215, sum loss: 4664.039551, avg loss: 2.850880, ppl: 17.303005 +epoch: 1, batch: 7216, sum loss: 4215.279297, avg loss: 2.861697, ppl: 17.491184 +epoch: 1, batch: 7217, sum loss: 4985.618164, avg loss: 3.007007, ppl: 20.226778 +epoch: 1, batch: 7218, sum loss: 5551.425781, avg loss: 2.965505, ppl: 19.404507 +epoch: 1, batch: 7219, sum loss: 5603.364258, avg loss: 2.848685, ppl: 17.265068 +epoch: 1, batch: 7220, sum loss: 4134.783691, avg loss: 2.799448, ppl: 16.435564 +epoch: 1, batch: 7221, sum loss: 5141.134766, avg loss: 2.894783, ppl: 18.079582 +epoch: 1, batch: 7222, sum loss: 4923.627930, avg loss: 2.672979, ppl: 14.483055 +epoch: 1, batch: 7223, sum loss: 4830.586426, avg loss: 2.846545, ppl: 17.228151 +epoch: 1, batch: 7224, sum loss: 4782.510742, avg loss: 2.898491, ppl: 18.146748 +epoch: 1, batch: 7225, sum loss: 5073.182617, avg loss: 2.989501, ppl: 19.875757 +epoch: 1, batch: 7226, sum loss: 4956.356934, avg loss: 2.765824, ppl: 15.892135 +epoch: 1, batch: 7227, sum loss: 4702.268555, avg loss: 3.128588, ppl: 22.841715 +epoch: 1, batch: 7228, sum loss: 5135.894531, avg loss: 2.911505, ppl: 18.384445 +epoch: 1, batch: 7229, sum loss: 4127.990234, avg loss: 2.538739, ppl: 12.663697 +epoch: 1, batch: 7230, sum loss: 5043.210449, avg loss: 2.789386, ppl: 16.271030 +epoch: 1, batch: 7231, sum loss: 5151.876953, avg loss: 2.547911, ppl: 12.780383 +epoch: 1, batch: 7232, sum loss: 5368.386719, avg loss: 3.128431, ppl: 22.838110 +epoch: 1, batch: 7233, sum loss: 3505.561279, avg loss: 2.637744, ppl: 13.981622 +epoch: 1, batch: 7234, sum loss: 4706.520020, avg loss: 2.796506, ppl: 16.387293 +epoch: 1, batch: 7235, sum loss: 4268.704590, avg loss: 2.630132, ppl: 13.875604 +epoch: 1, batch: 7236, sum loss: 6384.631348, avg loss: 3.173276, ppl: 23.885611 +epoch: 1, batch: 7237, sum loss: 4154.560059, avg loss: 2.556652, ppl: 12.892585 +epoch: 1, batch: 7238, sum loss: 5482.704102, avg loss: 2.866024, ppl: 17.567034 +epoch: 1, batch: 7239, sum loss: 4711.632812, avg loss: 2.687754, ppl: 14.698628 +epoch: 1, batch: 7240, sum loss: 4959.389160, avg loss: 2.952018, ppl: 19.144539 +epoch: 1, batch: 7241, sum loss: 4433.953125, avg loss: 2.580881, ppl: 13.208769 +epoch: 1, batch: 7242, sum loss: 4944.180664, avg loss: 2.951750, ppl: 19.139414 +epoch: 1, batch: 7243, sum loss: 4301.823242, avg loss: 2.791579, ppl: 16.306747 +epoch: 1, batch: 7244, sum loss: 4314.133789, avg loss: 2.611461, ppl: 13.618936 +epoch: 1, batch: 7245, sum loss: 4189.473633, avg loss: 2.692464, ppl: 14.768018 +epoch: 1, batch: 7246, sum loss: 4708.264160, avg loss: 2.501734, ppl: 12.203643 +epoch: 1, batch: 7247, sum loss: 4839.000977, avg loss: 2.797110, ppl: 16.397196 +epoch: 1, batch: 7248, sum loss: 5261.059082, avg loss: 2.957313, ppl: 19.246180 +epoch: 1, batch: 7249, sum loss: 5886.456055, avg loss: 2.854731, ppl: 17.369770 +epoch: 1, batch: 7250, sum loss: 5326.733398, avg loss: 2.984164, ppl: 19.769978 +epoch: 1, batch: 7251, sum loss: 4675.313477, avg loss: 2.657938, ppl: 14.266848 +epoch: 1, batch: 7252, sum loss: 4532.310059, avg loss: 2.588413, ppl: 13.308628 +epoch: 1, batch: 7253, sum loss: 4787.466309, avg loss: 2.768922, ppl: 15.941442 +epoch: 1, batch: 7254, sum loss: 4921.028320, avg loss: 2.872754, ppl: 17.685663 +epoch: 1, batch: 7255, sum loss: 4398.711914, avg loss: 2.828754, ppl: 16.924355 +epoch: 1, batch: 7256, sum loss: 5368.754883, avg loss: 2.725256, ppl: 15.260326 +epoch: 1, batch: 7257, sum loss: 4680.511230, avg loss: 2.663922, ppl: 14.352470 +epoch: 1, batch: 7258, sum loss: 5270.788574, avg loss: 2.788777, ppl: 16.261122 +epoch: 1, batch: 7259, sum loss: 5210.560547, avg loss: 2.680330, ppl: 14.589904 +epoch: 1, batch: 7260, sum loss: 4815.540039, avg loss: 2.736102, ppl: 15.426736 +epoch: 1, batch: 7261, sum loss: 3594.045898, avg loss: 2.463362, ppl: 11.744235 +epoch: 1, batch: 7262, sum loss: 4937.801270, avg loss: 2.646196, ppl: 14.100294 +epoch: 1, batch: 7263, sum loss: 4296.227539, avg loss: 2.703730, ppl: 14.935342 +epoch: 1, batch: 7264, sum loss: 5019.169434, avg loss: 2.753247, ppl: 15.693510 +epoch: 1, batch: 7265, sum loss: 4371.059082, avg loss: 2.678345, ppl: 14.560978 +epoch: 1, batch: 7266, sum loss: 5106.311035, avg loss: 2.865494, ppl: 17.557734 +epoch: 1, batch: 7267, sum loss: 5499.222168, avg loss: 3.016578, ppl: 20.421299 +epoch: 1, batch: 7268, sum loss: 4397.333008, avg loss: 2.949251, ppl: 19.091648 +epoch: 1, batch: 7269, sum loss: 4639.996094, avg loss: 2.742315, ppl: 15.522872 +epoch: 1, batch: 7270, sum loss: 4428.450684, avg loss: 2.886865, ppl: 17.936987 +epoch: 1, batch: 7271, sum loss: 4474.712891, avg loss: 2.819605, ppl: 16.770227 +epoch: 1, batch: 7272, sum loss: 4453.744629, avg loss: 2.699239, ppl: 14.868417 +epoch: 1, batch: 7273, sum loss: 4493.343750, avg loss: 2.690625, ppl: 14.740886 +epoch: 1, batch: 7274, sum loss: 4583.399414, avg loss: 2.737993, ppl: 15.455930 +epoch: 1, batch: 7275, sum loss: 3834.499023, avg loss: 2.411635, ppl: 11.152177 +epoch: 1, batch: 7276, sum loss: 4527.470215, avg loss: 2.667926, ppl: 14.410053 +epoch: 1, batch: 7277, sum loss: 4735.234863, avg loss: 2.735549, ppl: 15.418201 +epoch: 1, batch: 7278, sum loss: 4448.015625, avg loss: 2.728844, ppl: 15.315171 +epoch: 1, batch: 7279, sum loss: 3905.586670, avg loss: 2.498776, ppl: 12.167591 +epoch: 1, batch: 7280, sum loss: 4176.457520, avg loss: 2.831497, ppl: 16.970842 +epoch: 1, batch: 7281, sum loss: 4800.970703, avg loss: 2.749697, ppl: 15.637893 +epoch: 1, batch: 7282, sum loss: 4055.515381, avg loss: 2.668102, ppl: 14.412592 +epoch: 1, batch: 7283, sum loss: 5340.325684, avg loss: 2.937473, ppl: 18.868103 +epoch: 1, batch: 7284, sum loss: 4934.524902, avg loss: 2.702369, ppl: 14.915020 +epoch: 1, batch: 7285, sum loss: 4887.408203, avg loss: 2.849801, ppl: 17.284334 +epoch: 1, batch: 7286, sum loss: 4213.786133, avg loss: 2.628687, ppl: 13.855571 +epoch: 1, batch: 7287, sum loss: 5062.993164, avg loss: 2.643861, ppl: 14.067408 +epoch: 1, batch: 7288, sum loss: 4854.352051, avg loss: 2.680482, ppl: 14.592120 +epoch: 1, batch: 7289, sum loss: 4863.974609, avg loss: 2.652112, ppl: 14.183970 +epoch: 1, batch: 7290, sum loss: 4641.677734, avg loss: 2.679952, ppl: 14.584399 +epoch: 1, batch: 7291, sum loss: 4315.096191, avg loss: 2.542779, ppl: 12.714959 +epoch: 1, batch: 7292, sum loss: 5163.092773, avg loss: 3.093525, ppl: 22.054682 +epoch: 1, batch: 7293, sum loss: 4510.648438, avg loss: 2.803386, ppl: 16.500425 +epoch: 1, batch: 7294, sum loss: 4578.144043, avg loss: 2.700970, ppl: 14.894171 +epoch: 1, batch: 7295, sum loss: 4653.102539, avg loss: 2.642307, ppl: 14.045567 +epoch: 1, batch: 7296, sum loss: 3506.790527, avg loss: 2.567197, ppl: 13.029247 +epoch: 1, batch: 7297, sum loss: 4374.752930, avg loss: 2.574899, ppl: 13.129991 +epoch: 1, batch: 7298, sum loss: 5240.856934, avg loss: 3.008529, ppl: 20.257574 +epoch: 1, batch: 7299, sum loss: 3995.576660, avg loss: 2.538486, ppl: 12.660485 +epoch: 1, batch: 7300, sum loss: 4568.207031, avg loss: 2.770289, ppl: 15.963249 +epoch: 1, batch: 7301, sum loss: 5109.934082, avg loss: 2.820052, ppl: 16.777721 +epoch: 1, batch: 7302, sum loss: 4613.881836, avg loss: 2.832340, ppl: 16.985159 +epoch: 1, batch: 7303, sum loss: 4960.925781, avg loss: 2.620669, ppl: 13.744911 +epoch: 1, batch: 7304, sum loss: 4612.334473, avg loss: 2.735667, ppl: 15.420025 +epoch: 1, batch: 7305, sum loss: 4336.964844, avg loss: 2.472614, ppl: 11.853389 +epoch: 1, batch: 7306, sum loss: 5345.289062, avg loss: 2.938587, ppl: 18.889132 +epoch: 1, batch: 7307, sum loss: 4961.649902, avg loss: 2.815919, ppl: 16.708530 +epoch: 1, batch: 7308, sum loss: 3934.322998, avg loss: 2.621135, ppl: 13.751319 +epoch: 1, batch: 7309, sum loss: 3800.858154, avg loss: 2.701392, ppl: 14.900455 +epoch: 1, batch: 7310, sum loss: 4020.210205, avg loss: 2.487754, ppl: 12.034215 +epoch: 1, batch: 7311, sum loss: 5747.513672, avg loss: 3.103409, ppl: 22.273754 +epoch: 1, batch: 7312, sum loss: 3677.866699, avg loss: 2.513921, ppl: 12.353275 +epoch: 1, batch: 7313, sum loss: 4918.636719, avg loss: 2.853038, ppl: 17.340380 +epoch: 1, batch: 7314, sum loss: 4727.249023, avg loss: 2.666243, ppl: 14.385824 +epoch: 1, batch: 7315, sum loss: 4637.500977, avg loss: 2.711989, ppl: 15.059197 +epoch: 1, batch: 7316, sum loss: 4545.864258, avg loss: 2.562494, ppl: 12.968117 +epoch: 1, batch: 7317, sum loss: 4087.009521, avg loss: 2.599879, ppl: 13.462106 +epoch: 1, batch: 7318, sum loss: 3538.268066, avg loss: 2.448628, ppl: 11.572463 +epoch: 1, batch: 7319, sum loss: 5065.783691, avg loss: 3.033403, ppl: 20.767794 +epoch: 1, batch: 7320, sum loss: 4717.607422, avg loss: 2.730097, ppl: 15.334372 +epoch: 1, batch: 7321, sum loss: 5178.458008, avg loss: 2.763318, ppl: 15.852355 +epoch: 1, batch: 7322, sum loss: 5430.477051, avg loss: 2.823961, ppl: 16.843435 +epoch: 1, batch: 7323, sum loss: 4847.125000, avg loss: 2.710920, ppl: 15.043110 +epoch: 1, batch: 7324, sum loss: 4163.792480, avg loss: 2.500776, ppl: 12.191955 +epoch: 1, batch: 7325, sum loss: 4452.095215, avg loss: 2.756715, ppl: 15.748031 +epoch: 1, batch: 7326, sum loss: 5800.743164, avg loss: 3.041816, ppl: 20.943241 +epoch: 1, batch: 7327, sum loss: 4563.406738, avg loss: 2.690688, ppl: 14.741814 +epoch: 1, batch: 7328, sum loss: 4118.995117, avg loss: 2.438718, ppl: 11.458345 +epoch: 1, batch: 7329, sum loss: 3734.543213, avg loss: 2.360647, ppl: 10.597803 +epoch: 1, batch: 7330, sum loss: 5134.229492, avg loss: 2.873100, ppl: 17.691778 +epoch: 1, batch: 7331, sum loss: 4617.417480, avg loss: 2.572377, ppl: 13.096925 +epoch: 1, batch: 7332, sum loss: 4290.056152, avg loss: 2.667945, ppl: 14.410331 +epoch: 1, batch: 7333, sum loss: 5182.079102, avg loss: 2.983350, ppl: 19.753887 +epoch: 1, batch: 7334, sum loss: 4813.847168, avg loss: 2.833342, ppl: 17.002180 +epoch: 1, batch: 7335, sum loss: 3377.106445, avg loss: 2.468645, ppl: 11.806439 +epoch: 1, batch: 7336, sum loss: 5134.545898, avg loss: 2.846200, ppl: 17.222221 +epoch: 1, batch: 7337, sum loss: 5102.229492, avg loss: 3.165155, ppl: 23.692410 +epoch: 1, batch: 7338, sum loss: 4420.530273, avg loss: 2.685620, ppl: 14.667294 +epoch: 1, batch: 7339, sum loss: 4401.336426, avg loss: 2.486631, ppl: 12.020709 +epoch: 1, batch: 7340, sum loss: 3816.438721, avg loss: 2.481430, ppl: 11.958347 +epoch: 1, batch: 7341, sum loss: 5470.322266, avg loss: 3.018942, ppl: 20.469616 +epoch: 1, batch: 7342, sum loss: 4128.383789, avg loss: 2.857013, ppl: 17.409447 +epoch: 1, batch: 7343, sum loss: 4296.626953, avg loss: 2.479300, ppl: 11.932909 +epoch: 1, batch: 7344, sum loss: 4357.266602, avg loss: 2.868510, ppl: 17.610754 +epoch: 1, batch: 7345, sum loss: 4972.440918, avg loss: 2.814058, ppl: 16.677464 +epoch: 1, batch: 7346, sum loss: 4577.987305, avg loss: 2.859455, ppl: 17.452015 +epoch: 1, batch: 7347, sum loss: 4047.713867, avg loss: 2.846493, ppl: 17.227268 +epoch: 1, batch: 7348, sum loss: 3647.007080, avg loss: 2.354427, ppl: 10.532091 +epoch: 1, batch: 7349, sum loss: 4616.261719, avg loss: 3.047038, ppl: 21.052885 +epoch: 1, batch: 7350, sum loss: 5168.421387, avg loss: 2.786211, ppl: 16.219448 +epoch: 1, batch: 7351, sum loss: 4956.442383, avg loss: 2.946755, ppl: 19.044058 +epoch: 1, batch: 7352, sum loss: 4707.388672, avg loss: 2.840910, ppl: 17.131355 +epoch: 1, batch: 7353, sum loss: 5057.232422, avg loss: 2.916512, ppl: 18.476732 +epoch: 1, batch: 7354, sum loss: 4353.676758, avg loss: 2.559481, ppl: 12.929101 +epoch: 1, batch: 7355, sum loss: 5020.354004, avg loss: 3.070553, ppl: 21.553816 +epoch: 1, batch: 7356, sum loss: 4128.663086, avg loss: 2.694950, ppl: 14.804776 +epoch: 1, batch: 7357, sum loss: 4172.879883, avg loss: 2.713186, ppl: 15.077232 +epoch: 1, batch: 7358, sum loss: 4510.907227, avg loss: 2.760653, ppl: 15.810167 +epoch: 1, batch: 7359, sum loss: 4190.379395, avg loss: 2.686141, ppl: 14.674932 +epoch: 1, batch: 7360, sum loss: 4687.943359, avg loss: 2.540891, ppl: 12.690969 +epoch: 1, batch: 7361, sum loss: 5466.227051, avg loss: 3.065747, ppl: 21.450485 +epoch: 1, batch: 7362, sum loss: 5084.473633, avg loss: 2.666216, ppl: 14.385430 +epoch: 1, batch: 7363, sum loss: 4870.401855, avg loss: 2.549949, ppl: 12.806447 +epoch: 1, batch: 7364, sum loss: 4035.983887, avg loss: 2.495970, ppl: 12.133500 +epoch: 1, batch: 7365, sum loss: 4866.049805, avg loss: 2.796580, ppl: 16.388508 +epoch: 1, batch: 7366, sum loss: 4287.976074, avg loss: 2.755769, ppl: 15.733131 +epoch: 1, batch: 7367, sum loss: 5332.005859, avg loss: 3.087438, ppl: 21.920853 +epoch: 1, batch: 7368, sum loss: 4702.547852, avg loss: 2.872662, ppl: 17.684036 +epoch: 1, batch: 7369, sum loss: 5110.036621, avg loss: 2.870807, ppl: 17.651260 +epoch: 1, batch: 7370, sum loss: 4295.002441, avg loss: 2.746165, ppl: 15.582762 +epoch: 1, batch: 7371, sum loss: 4712.130859, avg loss: 2.896208, ppl: 18.105364 +epoch: 1, batch: 7372, sum loss: 4899.158203, avg loss: 2.703730, ppl: 14.935331 +epoch: 1, batch: 7373, sum loss: 5055.727051, avg loss: 2.767229, ppl: 15.914472 +epoch: 1, batch: 7374, sum loss: 5357.460938, avg loss: 3.014891, ppl: 20.386862 +epoch: 1, batch: 7375, sum loss: 4783.346191, avg loss: 2.847230, ppl: 17.239960 +epoch: 1, batch: 7376, sum loss: 4552.137207, avg loss: 2.903149, ppl: 18.231459 +epoch: 1, batch: 7377, sum loss: 5801.973145, avg loss: 2.937708, ppl: 18.872540 +epoch: 1, batch: 7378, sum loss: 4598.630371, avg loss: 2.745451, ppl: 15.571638 +epoch: 1, batch: 7379, sum loss: 3700.175781, avg loss: 2.562449, ppl: 12.967529 +epoch: 1, batch: 7380, sum loss: 4193.197754, avg loss: 2.532124, ppl: 12.580202 +epoch: 1, batch: 7381, sum loss: 4766.194824, avg loss: 2.886853, ppl: 17.936781 +epoch: 1, batch: 7382, sum loss: 4115.717285, avg loss: 2.727447, ppl: 15.293793 +epoch: 1, batch: 7383, sum loss: 3706.574219, avg loss: 2.449818, ppl: 11.586237 +epoch: 1, batch: 7384, sum loss: 4512.682617, avg loss: 2.761740, ppl: 15.827354 +epoch: 1, batch: 7385, sum loss: 4570.771484, avg loss: 2.874699, ppl: 17.720091 +epoch: 1, batch: 7386, sum loss: 4196.963867, avg loss: 2.866779, ppl: 17.580294 +epoch: 1, batch: 7387, sum loss: 4757.952637, avg loss: 2.942457, ppl: 18.962379 +epoch: 1, batch: 7388, sum loss: 4568.078125, avg loss: 2.776947, ppl: 16.069885 +epoch: 1, batch: 7389, sum loss: 4553.954590, avg loss: 2.678797, ppl: 14.567558 +epoch: 1, batch: 7390, sum loss: 4969.979980, avg loss: 2.942558, ppl: 18.964291 +epoch: 1, batch: 7391, sum loss: 4108.092773, avg loss: 2.406615, ppl: 11.096342 +epoch: 1, batch: 7392, sum loss: 4808.713867, avg loss: 2.537580, ppl: 12.649020 +epoch: 1, batch: 7393, sum loss: 4516.125000, avg loss: 2.796362, ppl: 16.384932 +epoch: 1, batch: 7394, sum loss: 4765.677734, avg loss: 2.660903, ppl: 14.309208 +epoch: 1, batch: 7395, sum loss: 5385.746094, avg loss: 2.944640, ppl: 19.003819 +epoch: 1, batch: 7396, sum loss: 5031.571289, avg loss: 2.719768, ppl: 15.176805 +epoch: 1, batch: 7397, sum loss: 5337.698730, avg loss: 2.875915, ppl: 17.741655 +epoch: 1, batch: 7398, sum loss: 4671.023438, avg loss: 2.807106, ppl: 16.561911 +epoch: 1, batch: 7399, sum loss: 5049.376953, avg loss: 3.149954, ppl: 23.334999 +epoch: 1, batch: 7400, sum loss: 4847.500000, avg loss: 2.819954, ppl: 16.776073 +epoch: 1, batch: 7401, sum loss: 4720.197754, avg loss: 2.686510, ppl: 14.680349 +epoch: 1, batch: 7402, sum loss: 5415.192383, avg loss: 2.970484, ppl: 19.501362 +epoch: 1, batch: 7403, sum loss: 5048.363770, avg loss: 2.784536, ppl: 16.192305 +epoch: 1, batch: 7404, sum loss: 4574.860840, avg loss: 2.691095, ppl: 14.747814 +epoch: 1, batch: 7405, sum loss: 5063.443848, avg loss: 2.830321, ppl: 16.950899 +epoch: 1, batch: 7406, sum loss: 5073.037598, avg loss: 2.956316, ppl: 19.227005 +epoch: 1, batch: 7407, sum loss: 4124.270020, avg loss: 2.602063, ppl: 13.491542 +epoch: 1, batch: 7408, sum loss: 3459.171875, avg loss: 2.246215, ppl: 9.451896 +epoch: 1, batch: 7409, sum loss: 4252.288574, avg loss: 2.649401, ppl: 14.145562 +epoch: 1, batch: 7410, sum loss: 5430.178711, avg loss: 2.799061, ppl: 16.429218 +epoch: 1, batch: 7411, sum loss: 5052.950684, avg loss: 2.841930, ppl: 17.148825 +epoch: 1, batch: 7412, sum loss: 3992.271240, avg loss: 2.713984, ppl: 15.089279 +epoch: 1, batch: 7413, sum loss: 5323.243164, avg loss: 2.729868, ppl: 15.330866 +epoch: 1, batch: 7414, sum loss: 4081.168457, avg loss: 2.561939, ppl: 12.960921 +epoch: 1, batch: 7415, sum loss: 4512.582520, avg loss: 2.928347, ppl: 18.696697 +epoch: 1, batch: 7416, sum loss: 4601.062988, avg loss: 2.737099, ppl: 15.442118 +epoch: 1, batch: 7417, sum loss: 5299.718750, avg loss: 3.000973, ppl: 20.105095 +epoch: 1, batch: 7418, sum loss: 4429.411621, avg loss: 2.699215, ppl: 14.868052 +epoch: 1, batch: 7419, sum loss: 5920.052246, avg loss: 3.014283, ppl: 20.374485 +epoch: 1, batch: 7420, sum loss: 4943.880371, avg loss: 2.846218, ppl: 17.222521 +epoch: 1, batch: 7421, sum loss: 4432.804199, avg loss: 2.716179, ppl: 15.122431 +epoch: 1, batch: 7422, sum loss: 4746.635742, avg loss: 2.833812, ppl: 17.010189 +epoch: 1, batch: 7423, sum loss: 4486.407715, avg loss: 2.608377, ppl: 13.576991 +epoch: 1, batch: 7424, sum loss: 4830.836426, avg loss: 2.751046, ppl: 15.659001 +epoch: 1, batch: 7425, sum loss: 5643.083496, avg loss: 3.012858, ppl: 20.345472 +epoch: 1, batch: 7426, sum loss: 3996.511719, avg loss: 2.660793, ppl: 14.307635 +epoch: 1, batch: 7427, sum loss: 5718.913574, avg loss: 2.991064, ppl: 19.906843 +epoch: 1, batch: 7428, sum loss: 5137.432617, avg loss: 2.857304, ppl: 17.414511 +epoch: 1, batch: 7429, sum loss: 5442.574219, avg loss: 3.177218, ppl: 23.979942 +epoch: 1, batch: 7430, sum loss: 4164.573242, avg loss: 2.655978, ppl: 14.238901 +epoch: 1, batch: 7431, sum loss: 4910.721191, avg loss: 2.704142, ppl: 14.941485 +epoch: 1, batch: 7432, sum loss: 3645.734375, avg loss: 2.512567, ppl: 12.336555 +epoch: 1, batch: 7433, sum loss: 4950.018555, avg loss: 2.669913, ppl: 14.438710 +epoch: 1, batch: 7434, sum loss: 3647.763428, avg loss: 2.664546, ppl: 14.361428 +epoch: 1, batch: 7435, sum loss: 4072.777344, avg loss: 2.498636, ppl: 12.165895 +epoch: 1, batch: 7436, sum loss: 5131.982422, avg loss: 2.781562, ppl: 16.144224 +epoch: 1, batch: 7437, sum loss: 4079.020508, avg loss: 2.719347, ppl: 15.170413 +epoch: 1, batch: 7438, sum loss: 4172.985840, avg loss: 2.518398, ppl: 12.408703 +epoch: 1, batch: 7439, sum loss: 5506.488770, avg loss: 3.037225, ppl: 20.847311 +epoch: 1, batch: 7440, sum loss: 3871.244629, avg loss: 2.642488, ppl: 14.048108 +epoch: 1, batch: 7441, sum loss: 6208.363770, avg loss: 2.977633, ppl: 19.641262 +epoch: 1, batch: 7442, sum loss: 4736.764648, avg loss: 2.647716, ppl: 14.121752 +epoch: 1, batch: 7443, sum loss: 5147.229980, avg loss: 2.854814, ppl: 17.371210 +epoch: 1, batch: 7444, sum loss: 5383.951172, avg loss: 2.953347, ppl: 19.170008 +epoch: 1, batch: 7445, sum loss: 4323.124512, avg loss: 2.657114, ppl: 14.255090 +epoch: 1, batch: 7446, sum loss: 4459.442871, avg loss: 2.754443, ppl: 15.712286 +epoch: 1, batch: 7447, sum loss: 4576.646484, avg loss: 2.758678, ppl: 15.778965 +epoch: 1, batch: 7448, sum loss: 4814.998047, avg loss: 2.832352, ppl: 16.985363 +epoch: 1, batch: 7449, sum loss: 4223.477539, avg loss: 2.691828, ppl: 14.758623 +epoch: 1, batch: 7450, sum loss: 6366.431641, avg loss: 2.988935, ppl: 19.864521 +epoch: 1, batch: 7451, sum loss: 5424.623047, avg loss: 2.885438, ppl: 17.911406 +epoch: 1, batch: 7452, sum loss: 5093.155762, avg loss: 2.867768, ppl: 17.597696 +epoch: 1, batch: 7453, sum loss: 5250.712891, avg loss: 2.872381, ppl: 17.679066 +epoch: 1, batch: 7454, sum loss: 4614.994629, avg loss: 2.713107, ppl: 15.076038 +epoch: 1, batch: 7455, sum loss: 3966.449219, avg loss: 2.456006, ppl: 11.658154 +epoch: 1, batch: 7456, sum loss: 4783.192383, avg loss: 2.816957, ppl: 16.725872 +epoch: 1, batch: 7457, sum loss: 6035.716309, avg loss: 3.074741, ppl: 21.644274 +epoch: 1, batch: 7458, sum loss: 4885.313965, avg loss: 2.676884, ppl: 14.539722 +epoch: 1, batch: 7459, sum loss: 4142.678223, avg loss: 2.573092, ppl: 13.106286 +epoch: 1, batch: 7460, sum loss: 5042.631836, avg loss: 2.736100, ppl: 15.426699 +epoch: 1, batch: 7461, sum loss: 3924.852295, avg loss: 2.512710, ppl: 12.338317 +epoch: 1, batch: 7462, sum loss: 5128.151367, avg loss: 3.009479, ppl: 20.276825 +epoch: 1, batch: 7463, sum loss: 4965.201660, avg loss: 2.855205, ppl: 17.378000 +epoch: 1, batch: 7464, sum loss: 4614.755859, avg loss: 2.635498, ppl: 13.950252 +epoch: 1, batch: 7465, sum loss: 4685.983398, avg loss: 2.690002, ppl: 14.731705 +epoch: 1, batch: 7466, sum loss: 5004.895996, avg loss: 2.792911, ppl: 16.328480 +epoch: 1, batch: 7467, sum loss: 3492.748291, avg loss: 2.500178, ppl: 12.184658 +epoch: 1, batch: 7468, sum loss: 5039.210938, avg loss: 2.816775, ppl: 16.722837 +epoch: 1, batch: 7469, sum loss: 4380.429688, avg loss: 2.763678, ppl: 15.858063 +epoch: 1, batch: 7470, sum loss: 6302.436035, avg loss: 3.151218, ppl: 23.364510 +epoch: 1, batch: 7471, sum loss: 3916.037598, avg loss: 2.811226, ppl: 16.630293 +epoch: 1, batch: 7472, sum loss: 3327.116943, avg loss: 2.254144, ppl: 9.527139 +epoch: 1, batch: 7473, sum loss: 5147.557617, avg loss: 3.015558, ppl: 20.400471 +epoch: 1, batch: 7474, sum loss: 4870.898438, avg loss: 2.845151, ppl: 17.204155 +epoch: 1, batch: 7475, sum loss: 4744.852539, avg loss: 2.544157, ppl: 12.732488 +epoch: 1, batch: 7476, sum loss: 4768.418457, avg loss: 2.809911, ppl: 16.608437 +epoch: 1, batch: 7477, sum loss: 3940.752441, avg loss: 2.590896, ppl: 13.341719 +epoch: 1, batch: 7478, sum loss: 4570.064941, avg loss: 2.587806, ppl: 13.300555 +epoch: 1, batch: 7479, sum loss: 3920.743896, avg loss: 2.610349, ppl: 13.603794 +epoch: 1, batch: 7480, sum loss: 5384.707520, avg loss: 2.885695, ppl: 17.916019 +epoch: 1, batch: 7481, sum loss: 4991.536133, avg loss: 2.679300, ppl: 14.574888 +epoch: 1, batch: 7482, sum loss: 4214.822266, avg loss: 2.456190, ppl: 11.660302 +epoch: 1, batch: 7483, sum loss: 4630.743652, avg loss: 2.786248, ppl: 16.220043 +epoch: 1, batch: 7484, sum loss: 4668.658203, avg loss: 2.762520, ppl: 15.839703 +epoch: 1, batch: 7485, sum loss: 4237.718750, avg loss: 2.546706, ppl: 12.764986 +epoch: 1, batch: 7486, sum loss: 5583.240234, avg loss: 2.851502, ppl: 17.313763 +epoch: 1, batch: 7487, sum loss: 4884.741211, avg loss: 2.745779, ppl: 15.576744 +epoch: 1, batch: 7488, sum loss: 4149.713379, avg loss: 2.803860, ppl: 16.508253 +epoch: 1, batch: 7489, sum loss: 4678.638672, avg loss: 2.744070, ppl: 15.550142 +epoch: 1, batch: 7490, sum loss: 4439.444824, avg loss: 2.811555, ppl: 16.635761 +epoch: 1, batch: 7491, sum loss: 4386.140137, avg loss: 2.658267, ppl: 14.271532 +epoch: 1, batch: 7492, sum loss: 4172.123047, avg loss: 2.643931, ppl: 14.068397 +epoch: 1, batch: 7493, sum loss: 4392.791016, avg loss: 2.620997, ppl: 13.749428 +epoch: 1, batch: 7494, sum loss: 4764.644531, avg loss: 2.739876, ppl: 15.485069 +epoch: 1, batch: 7495, sum loss: 5680.687988, avg loss: 2.926681, ppl: 18.665577 +epoch: 1, batch: 7496, sum loss: 5124.620117, avg loss: 3.007406, ppl: 20.234848 +epoch: 1, batch: 7497, sum loss: 4597.672363, avg loss: 2.710892, ppl: 15.042684 +epoch: 1, batch: 7498, sum loss: 5120.098145, avg loss: 2.945971, ppl: 19.029135 +epoch: 1, batch: 7499, sum loss: 3928.420898, avg loss: 2.410074, ppl: 11.134788 +epoch: 1, batch: 7500, sum loss: 5510.291992, avg loss: 2.859519, ppl: 17.453121 +epoch: 1, batch: 7501, sum loss: 5104.267578, avg loss: 2.704964, ppl: 14.953781 +epoch: 1, batch: 7502, sum loss: 4277.420898, avg loss: 2.603421, ppl: 13.509876 +epoch: 1, batch: 7503, sum loss: 4872.662109, avg loss: 2.824732, ppl: 16.856424 +epoch: 1, batch: 7504, sum loss: 4480.566895, avg loss: 2.762372, ppl: 15.837358 +epoch: 1, batch: 7505, sum loss: 4184.258301, avg loss: 2.783937, ppl: 16.182613 +epoch: 1, batch: 7506, sum loss: 4531.096680, avg loss: 2.714857, ppl: 15.102455 +epoch: 1, batch: 7507, sum loss: 3780.860107, avg loss: 2.436121, ppl: 11.428628 +epoch: 1, batch: 7508, sum loss: 4676.558105, avg loss: 2.752536, ppl: 15.682349 +epoch: 1, batch: 7509, sum loss: 4771.756348, avg loss: 2.703545, ppl: 14.932569 +epoch: 1, batch: 7510, sum loss: 4397.173828, avg loss: 2.719341, ppl: 15.170319 +epoch: 1, batch: 7511, sum loss: 5177.011230, avg loss: 2.931490, ppl: 18.755554 +epoch: 1, batch: 7512, sum loss: 5035.374512, avg loss: 2.927543, ppl: 18.681681 +epoch: 1, batch: 7513, sum loss: 5155.737305, avg loss: 2.909558, ppl: 18.348692 +epoch: 1, batch: 7514, sum loss: 4611.979980, avg loss: 2.895154, ppl: 18.086283 +epoch: 1, batch: 7515, sum loss: 4783.110352, avg loss: 2.602345, ppl: 13.495347 +epoch: 1, batch: 7516, sum loss: 4913.014160, avg loss: 2.881533, ppl: 17.841610 +epoch: 1, batch: 7517, sum loss: 3776.628418, avg loss: 2.562163, ppl: 12.963829 +epoch: 1, batch: 7518, sum loss: 5483.822266, avg loss: 3.019726, ppl: 20.485674 +epoch: 1, batch: 7519, sum loss: 4192.083984, avg loss: 2.498262, ppl: 12.161342 +epoch: 1, batch: 7520, sum loss: 4848.521484, avg loss: 2.726953, ppl: 15.286232 +epoch: 1, batch: 7521, sum loss: 4761.391113, avg loss: 2.840926, ppl: 17.131617 +epoch: 1, batch: 7522, sum loss: 5300.992188, avg loss: 2.733880, ppl: 15.392488 +epoch: 1, batch: 7523, sum loss: 5166.970703, avg loss: 2.904424, ppl: 18.254730 +epoch: 1, batch: 7524, sum loss: 5001.376953, avg loss: 2.899349, ppl: 18.162321 +epoch: 1, batch: 7525, sum loss: 5802.688477, avg loss: 2.991077, ppl: 19.907104 +epoch: 1, batch: 7526, sum loss: 4639.791504, avg loss: 2.722882, ppl: 15.224139 +epoch: 1, batch: 7527, sum loss: 4184.652832, avg loss: 2.612143, ppl: 13.628222 +epoch: 1, batch: 7528, sum loss: 4786.646484, avg loss: 2.735227, ppl: 15.413237 +epoch: 1, batch: 7529, sum loss: 4277.213379, avg loss: 2.570441, ppl: 13.071585 +epoch: 1, batch: 7530, sum loss: 5643.563477, avg loss: 2.889689, ppl: 17.987722 +epoch: 1, batch: 7531, sum loss: 4069.486084, avg loss: 2.698598, ppl: 14.858887 +epoch: 1, batch: 7532, sum loss: 4348.314453, avg loss: 2.667678, ppl: 14.406473 +epoch: 1, batch: 7533, sum loss: 4789.053711, avg loss: 2.965358, ppl: 19.401653 +epoch: 1, batch: 7534, sum loss: 4507.067871, avg loss: 2.682779, ppl: 14.625676 +epoch: 1, batch: 7535, sum loss: 4985.387695, avg loss: 2.839059, ppl: 17.099669 +epoch: 1, batch: 7536, sum loss: 4238.697266, avg loss: 2.654162, ppl: 14.213077 +epoch: 1, batch: 7537, sum loss: 5583.264648, avg loss: 2.786060, ppl: 16.217001 +epoch: 1, batch: 7538, sum loss: 4168.842285, avg loss: 2.694792, ppl: 14.802433 +epoch: 1, batch: 7539, sum loss: 4916.180176, avg loss: 2.830271, ppl: 16.950050 +epoch: 1, batch: 7540, sum loss: 4550.533203, avg loss: 2.819413, ppl: 16.767014 +epoch: 1, batch: 7541, sum loss: 3576.354248, avg loss: 2.558193, ppl: 12.912469 +epoch: 1, batch: 7542, sum loss: 5151.502930, avg loss: 2.697122, ppl: 14.836971 +epoch: 1, batch: 7543, sum loss: 4797.644531, avg loss: 2.879739, ppl: 17.809616 +epoch: 1, batch: 7544, sum loss: 5689.718262, avg loss: 3.085531, ppl: 21.879070 +epoch: 1, batch: 7545, sum loss: 3859.019531, avg loss: 2.696729, ppl: 14.831142 +epoch: 1, batch: 7546, sum loss: 4588.774902, avg loss: 2.782762, ppl: 16.163609 +epoch: 1, batch: 7547, sum loss: 4766.009766, avg loss: 3.072862, ppl: 21.603653 +epoch: 1, batch: 7548, sum loss: 5432.098145, avg loss: 3.147218, ppl: 23.271229 +epoch: 1, batch: 7549, sum loss: 4246.156738, avg loss: 2.511033, ppl: 12.317648 +epoch: 1, batch: 7550, sum loss: 4384.261719, avg loss: 2.773094, ppl: 16.008085 +epoch: 1, batch: 7551, sum loss: 3991.111816, avg loss: 2.466695, ppl: 11.783433 +epoch: 1, batch: 7552, sum loss: 4005.677734, avg loss: 2.708369, ppl: 15.004783 +epoch: 1, batch: 7553, sum loss: 5256.702637, avg loss: 2.830750, ppl: 16.958170 +epoch: 1, batch: 7554, sum loss: 4627.900391, avg loss: 2.771198, ppl: 15.977760 +epoch: 1, batch: 7555, sum loss: 4800.181152, avg loss: 2.755558, ppl: 15.729809 +epoch: 1, batch: 7556, sum loss: 5326.589355, avg loss: 2.813835, ppl: 16.673735 +epoch: 1, batch: 7557, sum loss: 4661.760254, avg loss: 2.576982, ppl: 13.157370 +epoch: 1, batch: 7558, sum loss: 3523.890869, avg loss: 2.453963, ppl: 11.634363 +epoch: 1, batch: 7559, sum loss: 4535.643066, avg loss: 2.960603, ppl: 19.309603 +epoch: 1, batch: 7560, sum loss: 5267.955078, avg loss: 3.118979, ppl: 22.623264 +epoch: 1, batch: 7561, sum loss: 5374.933105, avg loss: 3.089042, ppl: 21.956034 +epoch: 1, batch: 7562, sum loss: 4294.389648, avg loss: 2.662362, ppl: 14.330095 +epoch: 1, batch: 7563, sum loss: 4391.432617, avg loss: 3.043266, ppl: 20.973627 +epoch: 1, batch: 7564, sum loss: 3605.134521, avg loss: 2.529919, ppl: 12.552488 +epoch: 1, batch: 7565, sum loss: 4364.069824, avg loss: 2.703885, ppl: 14.937646 +epoch: 1, batch: 7566, sum loss: 4270.406250, avg loss: 2.554071, ppl: 12.859347 +epoch: 1, batch: 7567, sum loss: 4234.625000, avg loss: 2.733780, ppl: 15.390954 +epoch: 1, batch: 7568, sum loss: 5366.135742, avg loss: 2.883469, ppl: 17.876181 +epoch: 1, batch: 7569, sum loss: 5298.509277, avg loss: 2.766845, ppl: 15.908371 +epoch: 1, batch: 7570, sum loss: 4548.452637, avg loss: 2.621587, ppl: 13.757533 +epoch: 1, batch: 7571, sum loss: 4556.320801, avg loss: 2.814281, ppl: 16.681177 +epoch: 1, batch: 7572, sum loss: 4141.182617, avg loss: 2.591479, ppl: 13.349495 +epoch: 1, batch: 7573, sum loss: 5426.372559, avg loss: 2.986446, ppl: 19.815138 +epoch: 1, batch: 7574, sum loss: 5253.312500, avg loss: 2.954619, ppl: 19.194412 +epoch: 1, batch: 7575, sum loss: 4389.224609, avg loss: 2.673097, ppl: 14.484755 +epoch: 1, batch: 7576, sum loss: 4171.270508, avg loss: 2.797633, ppl: 16.405767 +epoch: 1, batch: 7577, sum loss: 3731.627686, avg loss: 2.562931, ppl: 12.973788 +epoch: 1, batch: 7578, sum loss: 4347.449219, avg loss: 2.668784, ppl: 14.422420 +epoch: 1, batch: 7579, sum loss: 4206.688477, avg loss: 2.612850, ppl: 13.637863 +epoch: 1, batch: 7580, sum loss: 4653.062500, avg loss: 2.833777, ppl: 17.009592 +epoch: 1, batch: 7581, sum loss: 4337.078125, avg loss: 2.729439, ppl: 15.324285 +epoch: 1, batch: 7582, sum loss: 4226.295410, avg loss: 2.705695, ppl: 14.964712 +epoch: 1, batch: 7583, sum loss: 4155.486328, avg loss: 2.686158, ppl: 14.675184 +epoch: 1, batch: 7584, sum loss: 5007.918945, avg loss: 2.791482, ppl: 16.305170 +epoch: 1, batch: 7585, sum loss: 4175.289551, avg loss: 2.704203, ppl: 14.942409 +epoch: 1, batch: 7586, sum loss: 5908.738770, avg loss: 3.169924, ppl: 23.805681 +epoch: 1, batch: 7587, sum loss: 4642.407227, avg loss: 2.956947, ppl: 19.239151 +epoch: 1, batch: 7588, sum loss: 4905.669922, avg loss: 2.840573, ppl: 17.125582 +epoch: 1, batch: 7589, sum loss: 4659.324219, avg loss: 2.761899, ppl: 15.829883 +epoch: 1, batch: 7590, sum loss: 4595.225586, avg loss: 2.696729, ppl: 14.831136 +epoch: 1, batch: 7591, sum loss: 3270.783691, avg loss: 2.295287, ppl: 9.927281 +epoch: 1, batch: 7592, sum loss: 4162.711914, avg loss: 2.567990, ppl: 13.039589 +epoch: 1, batch: 7593, sum loss: 4809.318848, avg loss: 2.694296, ppl: 14.795105 +epoch: 1, batch: 7594, sum loss: 4454.022461, avg loss: 2.840575, ppl: 17.125618 +epoch: 1, batch: 7595, sum loss: 5360.407227, avg loss: 2.902224, ppl: 18.214611 +epoch: 1, batch: 7596, sum loss: 4523.073730, avg loss: 2.646620, ppl: 14.106279 +epoch: 1, batch: 7597, sum loss: 4477.275391, avg loss: 2.513911, ppl: 12.353149 +epoch: 1, batch: 7598, sum loss: 4576.395020, avg loss: 2.838955, ppl: 17.097883 +epoch: 1, batch: 7599, sum loss: 4720.494629, avg loss: 2.597961, ppl: 13.436310 +epoch: 1, batch: 7600, sum loss: 4096.183594, avg loss: 2.558516, ppl: 12.916632 +epoch: 1, batch: 7601, sum loss: 4247.013184, avg loss: 2.591222, ppl: 13.346071 +epoch: 1, batch: 7602, sum loss: 4441.752441, avg loss: 2.735069, ppl: 15.410811 +epoch: 1, batch: 7603, sum loss: 5154.278809, avg loss: 2.858724, ppl: 17.439255 +epoch: 1, batch: 7604, sum loss: 4512.142578, avg loss: 2.881317, ppl: 17.837753 +epoch: 1, batch: 7605, sum loss: 3828.755371, avg loss: 2.500820, ppl: 12.192490 +epoch: 1, batch: 7606, sum loss: 4720.362305, avg loss: 2.799741, ppl: 16.440380 +epoch: 1, batch: 7607, sum loss: 5760.835938, avg loss: 3.177516, ppl: 23.987095 +epoch: 1, batch: 7608, sum loss: 5706.667969, avg loss: 2.938552, ppl: 18.888479 +epoch: 1, batch: 7609, sum loss: 4048.592041, avg loss: 2.658301, ppl: 14.272018 +epoch: 1, batch: 7610, sum loss: 5615.867188, avg loss: 3.099264, ppl: 22.181627 +epoch: 1, batch: 7611, sum loss: 3740.120605, avg loss: 2.637603, ppl: 13.979651 +epoch: 1, batch: 7612, sum loss: 4502.634277, avg loss: 2.692963, ppl: 14.775396 +epoch: 1, batch: 7613, sum loss: 3642.986328, avg loss: 2.552899, ppl: 12.844281 +epoch: 1, batch: 7614, sum loss: 3728.423340, avg loss: 2.461006, ppl: 11.716589 +epoch: 1, batch: 7615, sum loss: 4250.857910, avg loss: 2.771094, ppl: 15.976100 +epoch: 1, batch: 7616, sum loss: 4443.535645, avg loss: 2.635549, ppl: 13.950971 +epoch: 1, batch: 7617, sum loss: 3487.578857, avg loss: 2.269082, ppl: 9.670520 +epoch: 1, batch: 7618, sum loss: 5121.169434, avg loss: 2.780222, ppl: 16.122606 +epoch: 1, batch: 7619, sum loss: 5132.245605, avg loss: 2.769695, ppl: 15.953772 +epoch: 1, batch: 7620, sum loss: 4534.782227, avg loss: 2.631911, ppl: 13.900305 +epoch: 1, batch: 7621, sum loss: 5167.226074, avg loss: 2.769146, ppl: 15.945006 +epoch: 1, batch: 7622, sum loss: 4071.434570, avg loss: 2.666296, ppl: 14.386589 +epoch: 1, batch: 7623, sum loss: 4030.215332, avg loss: 2.407536, ppl: 11.106564 +epoch: 1, batch: 7624, sum loss: 5044.451660, avg loss: 2.867795, ppl: 17.598175 +epoch: 1, batch: 7625, sum loss: 4478.024902, avg loss: 2.757405, ppl: 15.758888 +epoch: 1, batch: 7626, sum loss: 4078.385254, avg loss: 2.605997, ppl: 13.544721 +epoch: 1, batch: 7627, sum loss: 4197.180176, avg loss: 2.676773, ppl: 14.538104 +epoch: 1, batch: 7628, sum loss: 5024.452637, avg loss: 2.882646, ppl: 17.861479 +epoch: 1, batch: 7629, sum loss: 5342.369141, avg loss: 2.801452, ppl: 16.468544 +epoch: 1, batch: 7630, sum loss: 5288.252930, avg loss: 2.830971, ppl: 16.961914 +epoch: 1, batch: 7631, sum loss: 4129.754395, avg loss: 2.736749, ppl: 15.436721 +epoch: 1, batch: 7632, sum loss: 5215.479004, avg loss: 2.926756, ppl: 18.666979 +epoch: 1, batch: 7633, sum loss: 4984.762207, avg loss: 2.821031, ppl: 16.794163 +epoch: 1, batch: 7634, sum loss: 5787.919434, avg loss: 2.868146, ppl: 17.604357 +epoch: 1, batch: 7635, sum loss: 4634.814941, avg loss: 3.051228, ppl: 21.141281 +epoch: 1, batch: 7636, sum loss: 5466.743164, avg loss: 2.833978, ppl: 17.012999 +epoch: 1, batch: 7637, sum loss: 4221.724121, avg loss: 2.711448, ppl: 15.051049 +epoch: 1, batch: 7638, sum loss: 4718.805176, avg loss: 2.661481, ppl: 14.317472 +epoch: 1, batch: 7639, sum loss: 4256.184570, avg loss: 2.712673, ppl: 15.069509 +epoch: 1, batch: 7640, sum loss: 4175.793945, avg loss: 2.535394, ppl: 12.621402 +epoch: 1, batch: 7641, sum loss: 4553.836426, avg loss: 2.725216, ppl: 15.259716 +epoch: 1, batch: 7642, sum loss: 4686.417969, avg loss: 2.774670, ppl: 16.033340 +epoch: 1, batch: 7643, sum loss: 5222.508789, avg loss: 2.938947, ppl: 18.895937 +epoch: 1, batch: 7644, sum loss: 5107.261230, avg loss: 2.798499, ppl: 16.419987 +epoch: 1, batch: 7645, sum loss: 4879.556152, avg loss: 2.902770, ppl: 18.224554 +epoch: 1, batch: 7646, sum loss: 3720.801270, avg loss: 2.462476, ppl: 11.733829 +epoch: 1, batch: 7647, sum loss: 4464.559082, avg loss: 2.869254, ppl: 17.623863 +epoch: 1, batch: 7648, sum loss: 5278.603516, avg loss: 2.871928, ppl: 17.671051 +epoch: 1, batch: 7649, sum loss: 4781.725586, avg loss: 2.672848, ppl: 14.481156 +epoch: 1, batch: 7650, sum loss: 4756.231445, avg loss: 2.645290, ppl: 14.087528 +epoch: 1, batch: 7651, sum loss: 3976.010498, avg loss: 2.442267, ppl: 11.499079 +epoch: 1, batch: 7652, sum loss: 5346.235352, avg loss: 3.027313, ppl: 20.641699 +epoch: 1, batch: 7653, sum loss: 5363.640625, avg loss: 3.025178, ppl: 20.597675 +epoch: 1, batch: 7654, sum loss: 5642.929199, avg loss: 3.145446, ppl: 23.230024 +epoch: 1, batch: 7655, sum loss: 5890.803711, avg loss: 3.203265, ppl: 24.612753 +epoch: 1, batch: 7656, sum loss: 4720.469238, avg loss: 2.723871, ppl: 15.239202 +epoch: 1, batch: 7657, sum loss: 4589.001953, avg loss: 2.720215, ppl: 15.183580 +epoch: 1, batch: 7658, sum loss: 4640.720703, avg loss: 2.721830, ppl: 15.208134 +epoch: 1, batch: 7659, sum loss: 4117.376465, avg loss: 2.432000, ppl: 11.381627 +epoch: 1, batch: 7660, sum loss: 4908.273438, avg loss: 2.833876, ppl: 17.011271 +epoch: 1, batch: 7661, sum loss: 4048.762451, avg loss: 2.582119, ppl: 13.225129 +epoch: 1, batch: 7662, sum loss: 4382.685059, avg loss: 2.700361, ppl: 14.885098 +epoch: 1, batch: 7663, sum loss: 5700.479004, avg loss: 3.041878, ppl: 20.944534 +epoch: 1, batch: 7664, sum loss: 4058.050293, avg loss: 2.437268, ppl: 11.441734 +epoch: 1, batch: 7665, sum loss: 4873.153809, avg loss: 2.786252, ppl: 16.220106 +epoch: 1, batch: 7666, sum loss: 5064.100098, avg loss: 2.788601, ppl: 16.258261 +epoch: 1, batch: 7667, sum loss: 4601.231445, avg loss: 2.705015, ppl: 14.954547 +epoch: 1, batch: 7668, sum loss: 4938.162598, avg loss: 2.669277, ppl: 14.429536 +epoch: 1, batch: 7669, sum loss: 5341.657715, avg loss: 2.851926, ppl: 17.321115 +epoch: 1, batch: 7670, sum loss: 4594.556641, avg loss: 2.535627, ppl: 12.624349 +epoch: 1, batch: 7671, sum loss: 5191.377441, avg loss: 2.864999, ppl: 17.549028 +epoch: 1, batch: 7672, sum loss: 4600.912598, avg loss: 2.722434, ppl: 15.217310 +epoch: 1, batch: 7673, sum loss: 4587.734863, avg loss: 2.641183, ppl: 14.029793 +epoch: 1, batch: 7674, sum loss: 4355.556641, avg loss: 2.786664, ppl: 16.226805 +epoch: 1, batch: 7675, sum loss: 3935.553711, avg loss: 2.565550, ppl: 13.007809 +epoch: 1, batch: 7676, sum loss: 4741.132812, avg loss: 2.813729, ppl: 16.671965 +epoch: 1, batch: 7677, sum loss: 5495.552734, avg loss: 2.826931, ppl: 16.893532 +epoch: 1, batch: 7678, sum loss: 5291.962891, avg loss: 2.918899, ppl: 18.520878 +epoch: 1, batch: 7679, sum loss: 4093.572998, avg loss: 2.740009, ppl: 15.487118 +epoch: 1, batch: 7680, sum loss: 4431.021484, avg loss: 2.489338, ppl: 12.053293 +epoch: 1, batch: 7681, sum loss: 4994.605957, avg loss: 2.847552, ppl: 17.245510 +epoch: 1, batch: 7682, sum loss: 4404.970703, avg loss: 2.725848, ppl: 15.269360 +epoch: 1, batch: 7683, sum loss: 3405.958496, avg loss: 2.391825, ppl: 10.933426 +epoch: 1, batch: 7684, sum loss: 4423.884766, avg loss: 2.467309, ppl: 11.790675 +epoch: 1, batch: 7685, sum loss: 3721.750488, avg loss: 2.394949, ppl: 10.967638 +epoch: 1, batch: 7686, sum loss: 4842.566895, avg loss: 2.662214, ppl: 14.327973 +epoch: 1, batch: 7687, sum loss: 4157.833496, avg loss: 2.606792, ppl: 13.555498 +epoch: 1, batch: 7688, sum loss: 4537.375488, avg loss: 2.746595, ppl: 15.589465 +epoch: 1, batch: 7689, sum loss: 4868.671387, avg loss: 2.721448, ppl: 15.202326 +epoch: 1, batch: 7690, sum loss: 5173.184570, avg loss: 2.828423, ppl: 16.918751 +epoch: 1, batch: 7691, sum loss: 4500.195801, avg loss: 2.819672, ppl: 16.771343 +epoch: 1, batch: 7692, sum loss: 3537.899170, avg loss: 2.480995, ppl: 11.953154 +epoch: 1, batch: 7693, sum loss: 5114.340332, avg loss: 2.842880, ppl: 17.165121 +epoch: 1, batch: 7694, sum loss: 3994.946289, avg loss: 2.693828, ppl: 14.788172 +epoch: 1, batch: 7695, sum loss: 4264.941406, avg loss: 2.850897, ppl: 17.303293 +epoch: 1, batch: 7696, sum loss: 5404.276855, avg loss: 3.039526, ppl: 20.895336 +epoch: 1, batch: 7697, sum loss: 4183.303223, avg loss: 2.654380, ppl: 14.216170 +epoch: 1, batch: 7698, sum loss: 4028.752930, avg loss: 2.467087, ppl: 11.788055 +epoch: 1, batch: 7699, sum loss: 5245.333496, avg loss: 2.807994, ppl: 16.576637 +epoch: 1, batch: 7700, sum loss: 4162.252441, avg loss: 2.848907, ppl: 17.268888 +epoch: 1, batch: 7701, sum loss: 4218.979492, avg loss: 2.799588, ppl: 16.437876 +epoch: 1, batch: 7702, sum loss: 5865.032227, avg loss: 2.707771, ppl: 14.995813 +epoch: 1, batch: 7703, sum loss: 4342.972656, avg loss: 2.736593, ppl: 15.434307 +epoch: 1, batch: 7704, sum loss: 4090.740723, avg loss: 2.563121, ppl: 12.976251 +epoch: 1, batch: 7705, sum loss: 4224.696289, avg loss: 2.708139, ppl: 15.001328 +epoch: 1, batch: 7706, sum loss: 4321.873047, avg loss: 2.430750, ppl: 11.367403 +epoch: 1, batch: 7707, sum loss: 4901.148438, avg loss: 2.619534, ppl: 13.729328 +epoch: 1, batch: 7708, sum loss: 4144.725098, avg loss: 2.600204, ppl: 13.466485 +epoch: 1, batch: 7709, sum loss: 4371.527344, avg loss: 2.720303, ppl: 15.184927 +epoch: 1, batch: 7710, sum loss: 6115.115723, avg loss: 3.225272, ppl: 25.160421 +epoch: 1, batch: 7711, sum loss: 5050.740234, avg loss: 2.747954, ppl: 15.610665 +epoch: 1, batch: 7712, sum loss: 4207.700195, avg loss: 2.681772, ppl: 14.610961 +epoch: 1, batch: 7713, sum loss: 4173.683594, avg loss: 2.852826, ppl: 17.336700 +epoch: 1, batch: 7714, sum loss: 4120.997559, avg loss: 2.623168, ppl: 13.779314 +epoch: 1, batch: 7715, sum loss: 4833.399902, avg loss: 2.757216, ppl: 15.755920 +epoch: 1, batch: 7716, sum loss: 4156.921875, avg loss: 2.929473, ppl: 18.717762 +epoch: 1, batch: 7717, sum loss: 5583.245605, avg loss: 3.032724, ppl: 20.753696 +epoch: 1, batch: 7718, sum loss: 3685.328369, avg loss: 2.450351, ppl: 11.592418 +epoch: 1, batch: 7719, sum loss: 4206.957520, avg loss: 2.593685, ppl: 13.378984 +epoch: 1, batch: 7720, sum loss: 5185.643555, avg loss: 2.850821, ppl: 17.301981 +epoch: 1, batch: 7721, sum loss: 5695.427734, avg loss: 2.882302, ppl: 17.855322 +epoch: 1, batch: 7722, sum loss: 5149.491699, avg loss: 2.849746, ppl: 17.283396 +epoch: 1, batch: 7723, sum loss: 5660.018555, avg loss: 3.077770, ppl: 21.709934 +epoch: 1, batch: 7724, sum loss: 4416.002441, avg loss: 2.631706, ppl: 13.897455 +epoch: 1, batch: 7725, sum loss: 4577.223633, avg loss: 2.700427, ppl: 14.886084 +epoch: 1, batch: 7726, sum loss: 4963.846680, avg loss: 2.801268, ppl: 16.465509 +epoch: 1, batch: 7727, sum loss: 5127.957520, avg loss: 2.724738, ppl: 15.252422 +epoch: 1, batch: 7728, sum loss: 4668.073242, avg loss: 2.682801, ppl: 14.626000 +epoch: 1, batch: 7729, sum loss: 5199.034668, avg loss: 2.907737, ppl: 18.315313 +epoch: 1, batch: 7730, sum loss: 4658.994629, avg loss: 2.743813, ppl: 15.546154 +epoch: 1, batch: 7731, sum loss: 4860.124023, avg loss: 2.892931, ppl: 18.046125 +epoch: 1, batch: 7732, sum loss: 3946.776367, avg loss: 2.572866, ppl: 13.103324 +epoch: 1, batch: 7733, sum loss: 6139.862305, avg loss: 3.093130, ppl: 22.045965 +epoch: 1, batch: 7734, sum loss: 4774.272461, avg loss: 2.967230, ppl: 19.437994 +epoch: 1, batch: 7735, sum loss: 4545.663086, avg loss: 2.828664, ppl: 16.922838 +epoch: 1, batch: 7736, sum loss: 4721.187500, avg loss: 2.796912, ppl: 16.393948 +epoch: 1, batch: 7737, sum loss: 4489.092285, avg loss: 2.835813, ppl: 17.044252 +epoch: 1, batch: 7738, sum loss: 5003.391113, avg loss: 2.684223, ppl: 14.646811 +epoch: 1, batch: 7739, sum loss: 4013.953613, avg loss: 2.697550, ppl: 14.843315 +epoch: 1, batch: 7740, sum loss: 4746.640625, avg loss: 2.783954, ppl: 16.182877 +epoch: 1, batch: 7741, sum loss: 5631.404297, avg loss: 3.048947, ppl: 21.093119 +epoch: 1, batch: 7742, sum loss: 4394.532715, avg loss: 2.611130, ppl: 13.614433 +epoch: 1, batch: 7743, sum loss: 5632.194336, avg loss: 2.889787, ppl: 17.989473 +epoch: 1, batch: 7744, sum loss: 4466.681152, avg loss: 2.750419, ppl: 15.649194 +epoch: 1, batch: 7745, sum loss: 5274.813477, avg loss: 2.814735, ppl: 16.688751 +epoch: 1, batch: 7746, sum loss: 5002.025391, avg loss: 2.795990, ppl: 16.378828 +epoch: 1, batch: 7747, sum loss: 5698.584473, avg loss: 3.002416, ppl: 20.134115 +epoch: 1, batch: 7748, sum loss: 5387.142578, avg loss: 2.888548, ppl: 17.967209 +epoch: 1, batch: 7749, sum loss: 4257.263672, avg loss: 2.795314, ppl: 16.367773 +epoch: 1, batch: 7750, sum loss: 4472.803711, avg loss: 2.776415, ppl: 16.061333 +epoch: 1, batch: 7751, sum loss: 4210.383301, avg loss: 2.784645, ppl: 16.194073 +epoch: 1, batch: 7752, sum loss: 5846.177246, avg loss: 3.076936, ppl: 21.691826 +epoch: 1, batch: 7753, sum loss: 6035.378418, avg loss: 3.073004, ppl: 21.606701 +epoch: 1, batch: 7754, sum loss: 4727.093750, avg loss: 2.926993, ppl: 18.671404 +epoch: 1, batch: 7755, sum loss: 4900.066895, avg loss: 2.880698, ppl: 17.826708 +epoch: 1, batch: 7756, sum loss: 4796.874023, avg loss: 2.761585, ppl: 15.824913 +epoch: 1, batch: 7757, sum loss: 4604.362793, avg loss: 2.726088, ppl: 15.273023 +epoch: 1, batch: 7758, sum loss: 4119.497559, avg loss: 2.592510, ppl: 13.363265 +epoch: 1, batch: 7759, sum loss: 5904.195312, avg loss: 2.915652, ppl: 18.460846 +epoch: 1, batch: 7760, sum loss: 5060.375977, avg loss: 2.681704, ppl: 14.609972 +epoch: 1, batch: 7761, sum loss: 4594.280762, avg loss: 2.663351, ppl: 14.344280 +epoch: 1, batch: 7762, sum loss: 4574.285156, avg loss: 2.727659, ppl: 15.297042 +epoch: 1, batch: 7763, sum loss: 4184.686523, avg loss: 2.787932, ppl: 16.247393 +epoch: 1, batch: 7764, sum loss: 4191.524902, avg loss: 2.608292, ppl: 13.575842 +epoch: 1, batch: 7765, sum loss: 4819.208008, avg loss: 2.777642, ppl: 16.081049 +epoch: 1, batch: 7766, sum loss: 4282.181641, avg loss: 2.698287, ppl: 14.854268 +epoch: 1, batch: 7767, sum loss: 4435.795410, avg loss: 2.728041, ppl: 15.302886 +epoch: 1, batch: 7768, sum loss: 3840.175537, avg loss: 2.568679, ppl: 13.048580 +epoch: 1, batch: 7769, sum loss: 4679.799805, avg loss: 2.543369, ppl: 12.722465 +epoch: 1, batch: 7770, sum loss: 4443.018555, avg loss: 2.674906, ppl: 14.510983 +epoch: 1, batch: 7771, sum loss: 4597.001953, avg loss: 2.782689, ppl: 16.162420 +epoch: 1, batch: 7772, sum loss: 4488.206055, avg loss: 2.772209, ppl: 15.993924 +epoch: 1, batch: 7773, sum loss: 3577.804199, avg loss: 2.634613, ppl: 13.937915 +epoch: 1, batch: 7774, sum loss: 4032.897949, avg loss: 2.637605, ppl: 13.979681 +epoch: 1, batch: 7775, sum loss: 5205.903809, avg loss: 2.680692, ppl: 14.595193 +epoch: 1, batch: 7776, sum loss: 4061.999756, avg loss: 2.583969, ppl: 13.249626 +epoch: 1, batch: 7777, sum loss: 4845.131836, avg loss: 2.850078, ppl: 17.289124 +epoch: 1, batch: 7778, sum loss: 4716.225098, avg loss: 2.694986, ppl: 14.805306 +epoch: 1, batch: 7779, sum loss: 4421.815918, avg loss: 2.654151, ppl: 14.212917 +epoch: 1, batch: 7780, sum loss: 4663.348145, avg loss: 2.804178, ppl: 16.513496 +epoch: 1, batch: 7781, sum loss: 5060.330566, avg loss: 3.061301, ppl: 21.355322 +epoch: 1, batch: 7782, sum loss: 5531.039062, avg loss: 2.957775, ppl: 19.255079 +epoch: 1, batch: 7783, sum loss: 4471.227539, avg loss: 2.945473, ppl: 19.019651 +epoch: 1, batch: 7784, sum loss: 4408.536621, avg loss: 2.890844, ppl: 18.008495 +epoch: 1, batch: 7785, sum loss: 5952.978516, avg loss: 3.131498, ppl: 22.908278 +epoch: 1, batch: 7786, sum loss: 4050.522461, avg loss: 2.566871, ppl: 13.025004 +epoch: 1, batch: 7787, sum loss: 4319.901855, avg loss: 2.616537, ppl: 13.688233 +epoch: 1, batch: 7788, sum loss: 4653.308594, avg loss: 2.753437, ppl: 15.696488 +epoch: 1, batch: 7789, sum loss: 5083.899902, avg loss: 2.867400, ppl: 17.591221 +epoch: 1, batch: 7790, sum loss: 4662.159668, avg loss: 2.650460, ppl: 14.160558 +epoch: 1, batch: 7791, sum loss: 4904.439941, avg loss: 2.642478, ppl: 14.047978 +epoch: 1, batch: 7792, sum loss: 4374.832031, avg loss: 2.730856, ppl: 15.346024 +epoch: 1, batch: 7793, sum loss: 4054.309082, avg loss: 2.580719, ppl: 13.206627 +epoch: 1, batch: 7794, sum loss: 4394.964844, avg loss: 2.653964, ppl: 14.210260 +epoch: 1, batch: 7795, sum loss: 5558.592285, avg loss: 2.974100, ppl: 19.571993 +epoch: 1, batch: 7796, sum loss: 4132.148438, avg loss: 2.686702, ppl: 14.683178 +epoch: 1, batch: 7797, sum loss: 3926.832031, avg loss: 2.619635, ppl: 13.730706 +epoch: 1, batch: 7798, sum loss: 4726.458496, avg loss: 2.798377, ppl: 16.417976 +epoch: 1, batch: 7799, sum loss: 4387.013184, avg loss: 2.676640, ppl: 14.536170 +epoch: 1, batch: 7800, sum loss: 4235.618652, avg loss: 2.629186, ppl: 13.862483 +epoch: 1, batch: 7801, sum loss: 5768.821289, avg loss: 2.920922, ppl: 18.558395 +epoch: 1, batch: 7802, sum loss: 5389.030762, avg loss: 2.979011, ppl: 19.668356 +epoch: 1, batch: 7803, sum loss: 4641.560059, avg loss: 2.655355, ppl: 14.230037 +epoch: 1, batch: 7804, sum loss: 5527.259277, avg loss: 3.013773, ppl: 20.364082 +epoch: 1, batch: 7805, sum loss: 5166.300293, avg loss: 2.935398, ppl: 18.828993 +epoch: 1, batch: 7806, sum loss: 4617.424805, avg loss: 2.625028, ppl: 13.804966 +epoch: 1, batch: 7807, sum loss: 4504.172852, avg loss: 2.749800, ppl: 15.639507 +epoch: 1, batch: 7808, sum loss: 4281.155273, avg loss: 2.762036, ppl: 15.832042 +epoch: 1, batch: 7809, sum loss: 4278.315918, avg loss: 2.718117, ppl: 15.151761 +epoch: 1, batch: 7810, sum loss: 4481.017090, avg loss: 2.730662, ppl: 15.343046 +epoch: 1, batch: 7811, sum loss: 5383.255371, avg loss: 2.818458, ppl: 16.751005 +epoch: 1, batch: 7812, sum loss: 5222.894531, avg loss: 2.925992, ppl: 18.652712 +epoch: 1, batch: 7813, sum loss: 4104.374512, avg loss: 2.459182, ppl: 11.695241 +epoch: 1, batch: 7814, sum loss: 5274.591309, avg loss: 3.003754, ppl: 20.161074 +epoch: 1, batch: 7815, sum loss: 5387.346680, avg loss: 3.021507, ppl: 20.522196 +epoch: 1, batch: 7816, sum loss: 4482.146484, avg loss: 2.574467, ppl: 13.124316 +epoch: 1, batch: 7817, sum loss: 4084.913818, avg loss: 2.484741, ppl: 11.998009 +epoch: 1, batch: 7818, sum loss: 4532.000977, avg loss: 2.813160, ppl: 16.662491 +epoch: 1, batch: 7819, sum loss: 4906.547852, avg loss: 2.765810, ppl: 15.891915 +epoch: 1, batch: 7820, sum loss: 3950.183594, avg loss: 2.438385, ppl: 11.454527 +epoch: 1, batch: 7821, sum loss: 4128.535156, avg loss: 2.696627, ppl: 14.829622 +epoch: 1, batch: 7822, sum loss: 5928.501465, avg loss: 3.021662, ppl: 20.525381 +epoch: 1, batch: 7823, sum loss: 4172.552246, avg loss: 2.545791, ppl: 12.753317 +epoch: 1, batch: 7824, sum loss: 3893.454102, avg loss: 2.705666, ppl: 14.964284 +epoch: 1, batch: 7825, sum loss: 3629.263184, avg loss: 2.603489, ppl: 13.510791 +epoch: 1, batch: 7826, sum loss: 4112.848633, avg loss: 2.461310, ppl: 11.720154 +epoch: 1, batch: 7827, sum loss: 4215.066406, avg loss: 2.795137, ppl: 16.364870 +epoch: 1, batch: 7828, sum loss: 4491.978027, avg loss: 2.720762, ppl: 15.191895 +epoch: 1, batch: 7829, sum loss: 3669.417969, avg loss: 2.525408, ppl: 12.495996 +epoch: 1, batch: 7830, sum loss: 4617.138672, avg loss: 2.624866, ppl: 13.802721 +epoch: 1, batch: 7831, sum loss: 3723.335938, avg loss: 2.328540, ppl: 10.262950 +epoch: 1, batch: 7832, sum loss: 4828.544434, avg loss: 2.830331, ppl: 16.951069 +epoch: 1, batch: 7833, sum loss: 3538.794189, avg loss: 2.389463, ppl: 10.907632 +epoch: 1, batch: 7834, sum loss: 4676.115723, avg loss: 2.690515, ppl: 14.739269 +epoch: 1, batch: 7835, sum loss: 3924.998535, avg loss: 2.575458, ppl: 13.137337 +epoch: 1, batch: 7836, sum loss: 5010.284668, avg loss: 2.754417, ppl: 15.711881 +epoch: 1, batch: 7837, sum loss: 5341.405273, avg loss: 2.944546, ppl: 19.002024 +epoch: 1, batch: 7838, sum loss: 5127.220215, avg loss: 2.810976, ppl: 16.626139 +epoch: 1, batch: 7839, sum loss: 4285.099121, avg loss: 2.700126, ppl: 14.881602 +epoch: 1, batch: 7840, sum loss: 4271.479492, avg loss: 2.788172, ppl: 16.251286 +epoch: 1, batch: 7841, sum loss: 4305.238281, avg loss: 2.795609, ppl: 16.372601 +epoch: 1, batch: 7842, sum loss: 4162.590820, avg loss: 2.824010, ppl: 16.844263 +epoch: 1, batch: 7843, sum loss: 5416.442383, avg loss: 2.954960, ppl: 19.200962 +epoch: 1, batch: 7844, sum loss: 4374.764648, avg loss: 2.507028, ppl: 12.268418 +epoch: 1, batch: 7845, sum loss: 4738.625000, avg loss: 2.790710, ppl: 16.292580 +epoch: 1, batch: 7846, sum loss: 5719.540527, avg loss: 2.999235, ppl: 20.070171 +epoch: 1, batch: 7847, sum loss: 5282.910156, avg loss: 3.057240, ppl: 21.268770 +epoch: 1, batch: 7848, sum loss: 4638.207520, avg loss: 2.794101, ppl: 16.347925 +epoch: 1, batch: 7849, sum loss: 3851.971680, avg loss: 2.557750, ppl: 12.906748 +epoch: 1, batch: 7850, sum loss: 4742.733887, avg loss: 2.693205, ppl: 14.778965 +epoch: 1, batch: 7851, sum loss: 4459.894043, avg loss: 3.013442, ppl: 20.357349 +epoch: 1, batch: 7852, sum loss: 5722.977539, avg loss: 3.026429, ppl: 20.623459 +epoch: 1, batch: 7853, sum loss: 5211.492188, avg loss: 2.957714, ppl: 19.253914 +epoch: 1, batch: 7854, sum loss: 3487.087646, avg loss: 2.769728, ppl: 15.954297 +epoch: 1, batch: 7855, sum loss: 3978.070312, avg loss: 2.519361, ppl: 12.420651 +epoch: 1, batch: 7856, sum loss: 3826.581055, avg loss: 2.437313, ppl: 11.442252 +epoch: 1, batch: 7857, sum loss: 4656.567871, avg loss: 2.671582, ppl: 14.462834 +epoch: 1, batch: 7858, sum loss: 4530.863281, avg loss: 2.822968, ppl: 16.826714 +epoch: 1, batch: 7859, sum loss: 6008.597168, avg loss: 3.292382, ppl: 26.906879 +epoch: 1, batch: 7860, sum loss: 6686.717773, avg loss: 3.166059, ppl: 23.713856 +epoch: 1, batch: 7861, sum loss: 4660.173340, avg loss: 2.640325, ppl: 14.017756 +epoch: 1, batch: 7862, sum loss: 4614.956055, avg loss: 2.742101, ppl: 15.519557 +epoch: 1, batch: 7863, sum loss: 3820.090332, avg loss: 2.420843, ppl: 11.255345 +epoch: 1, batch: 7864, sum loss: 4620.669922, avg loss: 2.716443, ppl: 15.126423 +epoch: 1, batch: 7865, sum loss: 4571.575684, avg loss: 2.844789, ppl: 17.197929 +epoch: 1, batch: 7866, sum loss: 5682.991699, avg loss: 2.880381, ppl: 17.821064 +epoch: 1, batch: 7867, sum loss: 5314.447266, avg loss: 2.866476, ppl: 17.574980 +epoch: 1, batch: 7868, sum loss: 4494.402344, avg loss: 2.860855, ppl: 17.476456 +epoch: 1, batch: 7869, sum loss: 4998.244141, avg loss: 2.889158, ppl: 17.978174 +epoch: 1, batch: 7870, sum loss: 4759.067383, avg loss: 2.870366, ppl: 17.643480 +epoch: 1, batch: 7871, sum loss: 4635.297852, avg loss: 2.693375, ppl: 14.781474 +epoch: 1, batch: 7872, sum loss: 4102.202637, avg loss: 2.702373, ppl: 14.915088 +epoch: 1, batch: 7873, sum loss: 4069.411377, avg loss: 2.732983, ppl: 15.378692 +epoch: 1, batch: 7874, sum loss: 3708.075684, avg loss: 2.300295, ppl: 9.977126 +epoch: 1, batch: 7875, sum loss: 5531.200195, avg loss: 3.091783, ppl: 22.016304 +epoch: 1, batch: 7876, sum loss: 4745.686035, avg loss: 2.735266, ppl: 15.413838 +epoch: 1, batch: 7877, sum loss: 4508.323242, avg loss: 2.707702, ppl: 14.994773 +epoch: 1, batch: 7878, sum loss: 4811.383301, avg loss: 2.750934, ppl: 15.657248 +epoch: 1, batch: 7879, sum loss: 3950.118652, avg loss: 2.602186, ppl: 13.493205 +epoch: 1, batch: 7880, sum loss: 4772.889160, avg loss: 2.852893, ppl: 17.337862 +epoch: 1, batch: 7881, sum loss: 5193.919434, avg loss: 3.084275, ppl: 21.851624 +epoch: 1, batch: 7882, sum loss: 4264.308594, avg loss: 2.425659, ppl: 11.309679 +epoch: 1, batch: 7883, sum loss: 4538.774414, avg loss: 2.993915, ppl: 19.963680 +epoch: 1, batch: 7884, sum loss: 4270.542480, avg loss: 2.753412, ppl: 15.696099 +epoch: 1, batch: 7885, sum loss: 4972.281738, avg loss: 2.949159, ppl: 19.089891 +epoch: 1, batch: 7886, sum loss: 4831.316406, avg loss: 2.855388, ppl: 17.381178 +epoch: 1, batch: 7887, sum loss: 4782.735840, avg loss: 2.743968, ppl: 15.548556 +epoch: 1, batch: 7888, sum loss: 4288.792969, avg loss: 2.745706, ppl: 15.575607 +epoch: 1, batch: 7889, sum loss: 3940.049316, avg loss: 2.495281, ppl: 12.125146 +epoch: 1, batch: 7890, sum loss: 3517.046387, avg loss: 2.392549, ppl: 10.941343 +epoch: 1, batch: 7891, sum loss: 5272.131348, avg loss: 2.877801, ppl: 17.775141 +epoch: 1, batch: 7892, sum loss: 5027.735352, avg loss: 2.835722, ppl: 17.042704 +epoch: 1, batch: 7893, sum loss: 4969.304688, avg loss: 2.870771, ppl: 17.650620 +epoch: 1, batch: 7894, sum loss: 4997.392090, avg loss: 2.810682, ppl: 16.621246 +epoch: 1, batch: 7895, sum loss: 5906.208984, avg loss: 2.849112, ppl: 17.272438 +epoch: 1, batch: 7896, sum loss: 4674.501465, avg loss: 2.736827, ppl: 15.437928 +epoch: 1, batch: 7897, sum loss: 4338.174805, avg loss: 2.541403, ppl: 12.697474 +epoch: 1, batch: 7898, sum loss: 4616.458984, avg loss: 2.784354, ppl: 16.189356 +epoch: 1, batch: 7899, sum loss: 3630.373779, avg loss: 2.691159, ppl: 14.748760 +epoch: 1, batch: 7900, sum loss: 4556.739746, avg loss: 2.766691, ppl: 15.905910 +epoch: 1, batch: 7901, sum loss: 5216.376953, avg loss: 2.758528, ppl: 15.776607 +epoch: 1, batch: 7902, sum loss: 4727.422852, avg loss: 2.812268, ppl: 16.647633 +epoch: 1, batch: 7903, sum loss: 4672.783203, avg loss: 2.847522, ppl: 17.244987 +epoch: 1, batch: 7904, sum loss: 4759.512207, avg loss: 2.727514, ppl: 15.294817 +epoch: 1, batch: 7905, sum loss: 3605.959229, avg loss: 2.592350, ppl: 13.361136 +epoch: 1, batch: 7906, sum loss: 4149.758301, avg loss: 2.492347, ppl: 12.089622 +epoch: 1, batch: 7907, sum loss: 5225.666992, avg loss: 2.738819, ppl: 15.468708 +epoch: 1, batch: 7908, sum loss: 4614.091797, avg loss: 2.793034, ppl: 16.330488 +epoch: 1, batch: 7909, sum loss: 4735.267578, avg loss: 2.801934, ppl: 16.476473 +epoch: 1, batch: 7910, sum loss: 4671.319824, avg loss: 2.648140, ppl: 14.127743 +epoch: 1, batch: 7911, sum loss: 3931.215820, avg loss: 2.674297, ppl: 14.502146 +epoch: 1, batch: 7912, sum loss: 5043.156738, avg loss: 2.798644, ppl: 16.422367 +epoch: 1, batch: 7913, sum loss: 4807.156250, avg loss: 2.590063, ppl: 13.330606 +epoch: 1, batch: 7914, sum loss: 4820.326172, avg loss: 2.823858, ppl: 16.841705 +epoch: 1, batch: 7915, sum loss: 5780.934570, avg loss: 2.996855, ppl: 20.022476 +epoch: 1, batch: 7916, sum loss: 4486.371094, avg loss: 2.681633, ppl: 14.608923 +epoch: 1, batch: 7917, sum loss: 4310.443848, avg loss: 2.467340, ppl: 11.791046 +epoch: 1, batch: 7918, sum loss: 4261.013184, avg loss: 2.598179, ppl: 13.439241 +epoch: 1, batch: 7919, sum loss: 4800.004883, avg loss: 2.674098, ppl: 14.499259 +epoch: 1, batch: 7920, sum loss: 4433.808594, avg loss: 2.738610, ppl: 15.465466 +epoch: 1, batch: 7921, sum loss: 4793.520508, avg loss: 2.697536, ppl: 14.843106 +epoch: 1, batch: 7922, sum loss: 3991.964844, avg loss: 2.670211, ppl: 14.443010 +epoch: 1, batch: 7923, sum loss: 5693.567871, avg loss: 2.966945, ppl: 19.432467 +epoch: 1, batch: 7924, sum loss: 4872.177246, avg loss: 2.850894, ppl: 17.303240 +epoch: 1, batch: 7925, sum loss: 5044.896484, avg loss: 2.887748, ppl: 17.952839 +epoch: 1, batch: 7926, sum loss: 4423.769531, avg loss: 2.510653, ppl: 12.312963 +epoch: 1, batch: 7927, sum loss: 4853.870605, avg loss: 2.669896, ppl: 14.438466 +epoch: 1, batch: 7928, sum loss: 4606.124512, avg loss: 2.602330, ppl: 13.495148 +epoch: 1, batch: 7929, sum loss: 4323.551758, avg loss: 2.582767, ppl: 13.233706 +epoch: 1, batch: 7930, sum loss: 4421.658203, avg loss: 2.679793, ppl: 14.582073 +epoch: 1, batch: 7931, sum loss: 3849.986328, avg loss: 2.450660, ppl: 11.595995 +epoch: 1, batch: 7932, sum loss: 4890.657715, avg loss: 2.914576, ppl: 18.440985 +epoch: 1, batch: 7933, sum loss: 4902.217773, avg loss: 3.037310, ppl: 20.849077 +epoch: 1, batch: 7934, sum loss: 4234.268066, avg loss: 2.502522, ppl: 12.213263 +epoch: 1, batch: 7935, sum loss: 4155.074219, avg loss: 2.561698, ppl: 12.957801 +epoch: 1, batch: 7936, sum loss: 4333.117676, avg loss: 2.648605, ppl: 14.134309 +epoch: 1, batch: 7937, sum loss: 4438.552734, avg loss: 2.795058, ppl: 16.363586 +epoch: 1, batch: 7938, sum loss: 5045.950195, avg loss: 2.968206, ppl: 19.456985 +epoch: 1, batch: 7939, sum loss: 4386.397461, avg loss: 2.671375, ppl: 14.459838 +epoch: 1, batch: 7940, sum loss: 4793.156738, avg loss: 2.728034, ppl: 15.302780 +epoch: 1, batch: 7941, sum loss: 4416.131348, avg loss: 2.650739, ppl: 14.164502 +epoch: 1, batch: 7942, sum loss: 4103.349121, avg loss: 2.875508, ppl: 17.734425 +epoch: 1, batch: 7943, sum loss: 5145.489258, avg loss: 2.831860, ppl: 16.977005 +epoch: 1, batch: 7944, sum loss: 3775.214111, avg loss: 2.647415, ppl: 14.117500 +epoch: 1, batch: 7945, sum loss: 5386.997070, avg loss: 2.798440, ppl: 16.419012 +epoch: 1, batch: 7946, sum loss: 5208.400879, avg loss: 2.863332, ppl: 17.519806 +epoch: 1, batch: 7947, sum loss: 3492.358398, avg loss: 2.577386, ppl: 13.162687 +epoch: 1, batch: 7948, sum loss: 5355.468262, avg loss: 2.871565, ppl: 17.664635 +epoch: 1, batch: 7949, sum loss: 3644.422852, avg loss: 2.313919, ppl: 10.113984 +epoch: 1, batch: 7950, sum loss: 3960.207764, avg loss: 2.524033, ppl: 12.478824 +epoch: 1, batch: 7951, sum loss: 4817.372559, avg loss: 2.770197, ppl: 15.961781 +epoch: 1, batch: 7952, sum loss: 3598.893311, avg loss: 2.566971, ppl: 13.026309 +epoch: 1, batch: 7953, sum loss: 5055.691406, avg loss: 2.797837, ppl: 16.409111 +epoch: 1, batch: 7954, sum loss: 5149.948730, avg loss: 2.926107, ppl: 18.654869 +epoch: 1, batch: 7955, sum loss: 4796.753418, avg loss: 2.901847, ppl: 18.207747 +epoch: 1, batch: 7956, sum loss: 4976.187012, avg loss: 2.811405, ppl: 16.633274 +epoch: 1, batch: 7957, sum loss: 4246.643555, avg loss: 2.720464, ppl: 15.187360 +epoch: 1, batch: 7958, sum loss: 4560.116211, avg loss: 2.928784, ppl: 18.704865 +epoch: 1, batch: 7959, sum loss: 4511.938965, avg loss: 2.724601, ppl: 15.250324 +epoch: 1, batch: 7960, sum loss: 5043.940430, avg loss: 2.902152, ppl: 18.213299 +epoch: 1, batch: 7961, sum loss: 4838.677246, avg loss: 2.729090, ppl: 15.318948 +epoch: 1, batch: 7962, sum loss: 5938.606445, avg loss: 2.981228, ppl: 19.712015 +epoch: 1, batch: 7963, sum loss: 5324.979980, avg loss: 3.018696, ppl: 20.464590 +epoch: 1, batch: 7964, sum loss: 4715.414551, avg loss: 2.793492, ppl: 16.337975 +epoch: 1, batch: 7965, sum loss: 4523.413574, avg loss: 2.872009, ppl: 17.672480 +epoch: 1, batch: 7966, sum loss: 4740.004395, avg loss: 3.032632, ppl: 20.751787 +epoch: 1, batch: 7967, sum loss: 5223.713379, avg loss: 2.878079, ppl: 17.780088 +epoch: 1, batch: 7968, sum loss: 4319.576660, avg loss: 2.677977, ppl: 14.555615 +epoch: 1, batch: 7969, sum loss: 5113.132324, avg loss: 2.719751, ppl: 15.176544 +epoch: 1, batch: 7970, sum loss: 5401.059082, avg loss: 3.203475, ppl: 24.617935 +epoch: 1, batch: 7971, sum loss: 5190.118164, avg loss: 2.817654, ppl: 16.737537 +epoch: 1, batch: 7972, sum loss: 4086.250488, avg loss: 2.433741, ppl: 11.401451 +epoch: 1, batch: 7973, sum loss: 4055.291016, avg loss: 2.734519, ppl: 15.402329 +epoch: 1, batch: 7974, sum loss: 5487.161133, avg loss: 2.921811, ppl: 18.574898 +epoch: 1, batch: 7975, sum loss: 4032.951904, avg loss: 2.469658, ppl: 11.818406 +epoch: 1, batch: 7976, sum loss: 5039.219727, avg loss: 2.722431, ppl: 15.217270 +epoch: 1, batch: 7977, sum loss: 4096.990723, avg loss: 2.530569, ppl: 12.560649 +epoch: 1, batch: 7978, sum loss: 5279.225098, avg loss: 2.867586, ppl: 17.594488 +epoch: 1, batch: 7979, sum loss: 5743.394043, avg loss: 3.111264, ppl: 22.449413 +epoch: 1, batch: 7980, sum loss: 5045.671387, avg loss: 2.745197, ppl: 15.567674 +epoch: 1, batch: 7981, sum loss: 4691.936523, avg loss: 2.801156, ppl: 16.463673 +epoch: 1, batch: 7982, sum loss: 4296.297363, avg loss: 2.594383, ppl: 13.388318 +epoch: 1, batch: 7983, sum loss: 5037.603027, avg loss: 2.876987, ppl: 17.760672 +epoch: 1, batch: 7984, sum loss: 4076.520020, avg loss: 2.522599, ppl: 12.460940 +epoch: 1, batch: 7985, sum loss: 3930.196777, avg loss: 2.500125, ppl: 12.184019 +epoch: 1, batch: 7986, sum loss: 5230.941406, avg loss: 2.978896, ppl: 19.666096 +epoch: 1, batch: 7987, sum loss: 4639.752441, avg loss: 2.702244, ppl: 14.913157 +epoch: 1, batch: 7988, sum loss: 4574.057129, avg loss: 2.820010, ppl: 16.777025 +epoch: 1, batch: 7989, sum loss: 5587.197266, avg loss: 2.722806, ppl: 15.222974 +epoch: 1, batch: 7990, sum loss: 3697.599609, avg loss: 2.429435, ppl: 11.352464 +epoch: 1, batch: 7991, sum loss: 4374.502930, avg loss: 2.394364, ppl: 10.961224 +epoch: 1, batch: 7992, sum loss: 4407.334961, avg loss: 2.771909, ppl: 15.989124 +epoch: 1, batch: 7993, sum loss: 4636.764648, avg loss: 2.669410, ppl: 14.431449 +epoch: 1, batch: 7994, sum loss: 3903.332031, avg loss: 2.442636, ppl: 11.503321 +epoch: 1, batch: 7995, sum loss: 3507.844971, avg loss: 2.461645, ppl: 11.724086 +epoch: 1, batch: 7996, sum loss: 4442.115723, avg loss: 2.961411, ppl: 19.325211 +epoch: 1, batch: 7997, sum loss: 3928.023682, avg loss: 2.746870, ppl: 15.593744 +epoch: 1, batch: 7998, sum loss: 5251.420410, avg loss: 2.769737, ppl: 15.954433 +epoch: 1, batch: 7999, sum loss: 4738.264160, avg loss: 2.823757, ppl: 16.839998 +epoch: 1, batch: 8000, sum loss: 4855.688477, avg loss: 2.911084, ppl: 18.376711 +epoch: 1, batch: 8001, sum loss: 4180.584473, avg loss: 2.800123, ppl: 16.446676 +epoch: 1, batch: 8002, sum loss: 3998.951660, avg loss: 2.680263, ppl: 14.588923 +epoch: 1, batch: 8003, sum loss: 5572.840820, avg loss: 2.862271, ppl: 17.501225 +epoch: 1, batch: 8004, sum loss: 4854.997559, avg loss: 2.788626, ppl: 16.258665 +epoch: 1, batch: 8005, sum loss: 5222.335938, avg loss: 2.898078, ppl: 18.139244 +epoch: 1, batch: 8006, sum loss: 4154.858398, avg loss: 2.824513, ppl: 16.852736 +epoch: 1, batch: 8007, sum loss: 4083.776611, avg loss: 2.528654, ppl: 12.536625 +epoch: 1, batch: 8008, sum loss: 4733.463867, avg loss: 2.698668, ppl: 14.859925 +epoch: 1, batch: 8009, sum loss: 4584.869141, avg loss: 2.812803, ppl: 16.656542 +epoch: 1, batch: 8010, sum loss: 4922.841797, avg loss: 2.764089, ppl: 15.864575 +epoch: 1, batch: 8011, sum loss: 4688.482422, avg loss: 2.767699, ppl: 15.921955 +epoch: 1, batch: 8012, sum loss: 5131.281738, avg loss: 3.027305, ppl: 20.641527 +epoch: 1, batch: 8013, sum loss: 4226.187012, avg loss: 2.693554, ppl: 14.784131 +epoch: 1, batch: 8014, sum loss: 4226.214355, avg loss: 2.793268, ppl: 16.334312 +epoch: 1, batch: 8015, sum loss: 4321.006836, avg loss: 2.503480, ppl: 12.224965 +epoch: 1, batch: 8016, sum loss: 4131.492188, avg loss: 2.703856, ppl: 14.937222 +epoch: 1, batch: 8017, sum loss: 4553.830078, avg loss: 2.704175, ppl: 14.941980 +epoch: 1, batch: 8018, sum loss: 5391.712891, avg loss: 3.047888, ppl: 21.070787 +epoch: 1, batch: 8019, sum loss: 5387.498535, avg loss: 2.856574, ppl: 17.401808 +epoch: 1, batch: 8020, sum loss: 3993.184326, avg loss: 2.549926, ppl: 12.806157 +epoch: 1, batch: 8021, sum loss: 4742.558594, avg loss: 2.804588, ppl: 16.520273 +epoch: 1, batch: 8022, sum loss: 3883.434570, avg loss: 2.500602, ppl: 12.189830 +epoch: 1, batch: 8023, sum loss: 4920.859375, avg loss: 2.841143, ppl: 17.135338 +epoch: 1, batch: 8024, sum loss: 5183.908203, avg loss: 2.923806, ppl: 18.611994 +epoch: 1, batch: 8025, sum loss: 5218.483398, avg loss: 2.805636, ppl: 16.537598 +epoch: 1, batch: 8026, sum loss: 4744.008789, avg loss: 2.823815, ppl: 16.840975 +epoch: 1, batch: 8027, sum loss: 4829.802734, avg loss: 2.878309, ppl: 17.784174 +epoch: 1, batch: 8028, sum loss: 4628.883301, avg loss: 2.550349, ppl: 12.811574 +epoch: 1, batch: 8029, sum loss: 5904.161133, avg loss: 2.863318, ppl: 17.519556 +epoch: 1, batch: 8030, sum loss: 4550.600098, avg loss: 2.889270, ppl: 17.980177 +epoch: 1, batch: 8031, sum loss: 4730.812012, avg loss: 2.520411, ppl: 12.433709 +epoch: 1, batch: 8032, sum loss: 4075.397461, avg loss: 2.627593, ppl: 13.840420 +epoch: 1, batch: 8033, sum loss: 5047.631836, avg loss: 2.932964, ppl: 18.783228 +epoch: 1, batch: 8034, sum loss: 4597.217285, avg loss: 2.837789, ppl: 17.077957 +epoch: 1, batch: 8035, sum loss: 4596.885254, avg loss: 2.623793, ppl: 13.787924 +epoch: 1, batch: 8036, sum loss: 4271.817871, avg loss: 2.902050, ppl: 18.211445 +epoch: 1, batch: 8037, sum loss: 4567.022461, avg loss: 2.857962, ppl: 17.425970 +epoch: 1, batch: 8038, sum loss: 5635.806152, avg loss: 3.046382, ppl: 21.039082 +epoch: 1, batch: 8039, sum loss: 4525.246094, avg loss: 2.665045, ppl: 14.368596 +epoch: 1, batch: 8040, sum loss: 5399.083008, avg loss: 2.859684, ppl: 17.456005 +epoch: 1, batch: 8041, sum loss: 4414.004883, avg loss: 2.659039, ppl: 14.282560 +epoch: 1, batch: 8042, sum loss: 4234.353027, avg loss: 2.749580, ppl: 15.636062 +epoch: 1, batch: 8043, sum loss: 4280.568848, avg loss: 2.786829, ppl: 16.229471 +epoch: 1, batch: 8044, sum loss: 4871.200684, avg loss: 2.730494, ppl: 15.340460 +epoch: 1, batch: 8045, sum loss: 4832.843750, avg loss: 2.704445, ppl: 14.946025 +epoch: 1, batch: 8046, sum loss: 4424.835938, avg loss: 2.854733, ppl: 17.369799 +epoch: 1, batch: 8047, sum loss: 4604.906250, avg loss: 2.851335, ppl: 17.310877 +epoch: 1, batch: 8048, sum loss: 4613.161133, avg loss: 2.861763, ppl: 17.492331 +epoch: 1, batch: 8049, sum loss: 4712.833496, avg loss: 2.697672, ppl: 14.845134 +epoch: 1, batch: 8050, sum loss: 4749.934570, avg loss: 2.689657, ppl: 14.726627 +epoch: 1, batch: 8051, sum loss: 4276.675293, avg loss: 2.659624, ppl: 14.290913 +epoch: 1, batch: 8052, sum loss: 4319.137207, avg loss: 2.795558, ppl: 16.371765 +epoch: 1, batch: 8053, sum loss: 4274.053223, avg loss: 2.895700, ppl: 18.096165 +epoch: 1, batch: 8054, sum loss: 3895.723389, avg loss: 2.673798, ppl: 14.494911 +epoch: 1, batch: 8055, sum loss: 4800.861328, avg loss: 2.861062, ppl: 17.480074 +epoch: 1, batch: 8056, sum loss: 4537.075195, avg loss: 2.733178, ppl: 15.381691 +epoch: 1, batch: 8057, sum loss: 4563.478516, avg loss: 2.843289, ppl: 17.172146 +epoch: 1, batch: 8058, sum loss: 4551.218262, avg loss: 2.741698, ppl: 15.513301 +epoch: 1, batch: 8059, sum loss: 4858.900879, avg loss: 2.880202, ppl: 17.817873 +epoch: 1, batch: 8060, sum loss: 4940.314941, avg loss: 2.870607, ppl: 17.647730 +epoch: 1, batch: 8061, sum loss: 3874.136719, avg loss: 2.608846, ppl: 13.583369 +epoch: 1, batch: 8062, sum loss: 3713.975586, avg loss: 2.410107, ppl: 11.135157 +epoch: 1, batch: 8063, sum loss: 4977.142578, avg loss: 2.824712, ppl: 16.856091 +epoch: 1, batch: 8064, sum loss: 5371.444824, avg loss: 3.041588, ppl: 20.938469 +epoch: 1, batch: 8065, sum loss: 4855.669434, avg loss: 2.678251, ppl: 14.559610 +epoch: 1, batch: 8066, sum loss: 4424.536133, avg loss: 2.955602, ppl: 19.213284 +epoch: 1, batch: 8067, sum loss: 4032.108643, avg loss: 2.591329, ppl: 13.347503 +epoch: 1, batch: 8068, sum loss: 3919.953125, avg loss: 2.456111, ppl: 11.659379 +epoch: 1, batch: 8069, sum loss: 5020.949219, avg loss: 2.882290, ppl: 17.855112 +epoch: 1, batch: 8070, sum loss: 4394.573730, avg loss: 2.729549, ppl: 15.325973 +epoch: 1, batch: 8071, sum loss: 6730.680176, avg loss: 3.215805, ppl: 24.923355 +epoch: 1, batch: 8072, sum loss: 4664.639648, avg loss: 2.870548, ppl: 17.646677 +epoch: 1, batch: 8073, sum loss: 4684.806152, avg loss: 2.810322, ppl: 16.615261 +epoch: 1, batch: 8074, sum loss: 4839.084961, avg loss: 2.854917, ppl: 17.373001 +epoch: 1, batch: 8075, sum loss: 5556.430176, avg loss: 2.849451, ppl: 17.278299 +epoch: 1, batch: 8076, sum loss: 4678.096680, avg loss: 2.759939, ppl: 15.798879 +epoch: 1, batch: 8077, sum loss: 4739.307129, avg loss: 2.594038, ppl: 13.383703 +epoch: 1, batch: 8078, sum loss: 5585.492676, avg loss: 3.348617, ppl: 28.463337 +epoch: 1, batch: 8079, sum loss: 3860.391846, avg loss: 2.405228, ppl: 11.080960 +epoch: 1, batch: 8080, sum loss: 5547.233398, avg loss: 2.968022, ppl: 19.453405 +epoch: 1, batch: 8081, sum loss: 4819.257812, avg loss: 2.829864, ppl: 16.943150 +epoch: 1, batch: 8082, sum loss: 3605.183838, avg loss: 2.267411, ppl: 9.654375 +epoch: 1, batch: 8083, sum loss: 3611.010498, avg loss: 2.659065, ppl: 14.282932 +epoch: 1, batch: 8084, sum loss: 5246.015137, avg loss: 2.990886, ppl: 19.903318 +epoch: 1, batch: 8085, sum loss: 4498.891602, avg loss: 2.593021, ppl: 13.370104 +epoch: 1, batch: 8086, sum loss: 3351.622803, avg loss: 2.455401, ppl: 11.651110 +epoch: 1, batch: 8087, sum loss: 4511.577148, avg loss: 2.703162, ppl: 14.926855 +epoch: 1, batch: 8088, sum loss: 5918.761230, avg loss: 3.016698, ppl: 20.423735 +epoch: 1, batch: 8089, sum loss: 4025.864990, avg loss: 2.597332, ppl: 13.427868 +epoch: 1, batch: 8090, sum loss: 4675.211914, avg loss: 2.656370, ppl: 14.244493 +epoch: 1, batch: 8091, sum loss: 4924.372559, avg loss: 2.799530, ppl: 16.436916 +epoch: 1, batch: 8092, sum loss: 4376.646484, avg loss: 2.706646, ppl: 14.978951 +epoch: 1, batch: 8093, sum loss: 5097.210449, avg loss: 2.965219, ppl: 19.398943 +epoch: 1, batch: 8094, sum loss: 4512.509766, avg loss: 2.698870, ppl: 14.862923 +epoch: 1, batch: 8095, sum loss: 5300.016602, avg loss: 3.001142, ppl: 20.108484 +epoch: 1, batch: 8096, sum loss: 4974.490234, avg loss: 2.733237, ppl: 15.382593 +epoch: 1, batch: 8097, sum loss: 4530.462402, avg loss: 2.568289, ppl: 13.043491 +epoch: 1, batch: 8098, sum loss: 4961.089844, avg loss: 2.596070, ppl: 13.410930 +epoch: 1, batch: 8099, sum loss: 5526.236328, avg loss: 2.695725, ppl: 14.816257 +epoch: 1, batch: 8100, sum loss: 4871.353516, avg loss: 2.657585, ppl: 14.261807 +epoch: 1, batch: 8101, sum loss: 4715.997559, avg loss: 2.798812, ppl: 16.425117 +epoch: 1, batch: 8102, sum loss: 4242.831543, avg loss: 2.534547, ppl: 12.610718 +epoch: 1, batch: 8103, sum loss: 4514.136719, avg loss: 2.724283, ppl: 15.245475 +epoch: 1, batch: 8104, sum loss: 4735.293945, avg loss: 2.856028, ppl: 17.392305 +epoch: 1, batch: 8105, sum loss: 5252.497559, avg loss: 3.080644, ppl: 21.772417 +epoch: 1, batch: 8106, sum loss: 5304.783691, avg loss: 2.735835, ppl: 15.422613 +epoch: 1, batch: 8107, sum loss: 3922.614258, avg loss: 2.555449, ppl: 12.877080 +epoch: 1, batch: 8108, sum loss: 5401.778320, avg loss: 2.793060, ppl: 16.330917 +epoch: 1, batch: 8109, sum loss: 4722.527832, avg loss: 2.844896, ppl: 17.199776 +epoch: 1, batch: 8110, sum loss: 4866.602051, avg loss: 2.844303, ppl: 17.189571 +epoch: 1, batch: 8111, sum loss: 4443.152832, avg loss: 3.034940, ppl: 20.799740 +epoch: 1, batch: 8112, sum loss: 3883.586426, avg loss: 2.464205, ppl: 11.754128 +epoch: 1, batch: 8113, sum loss: 4395.242188, avg loss: 2.787091, ppl: 16.233723 +epoch: 1, batch: 8114, sum loss: 4451.714844, avg loss: 2.688234, ppl: 14.705677 +epoch: 1, batch: 8115, sum loss: 4126.877441, avg loss: 2.446282, ppl: 11.545341 +epoch: 1, batch: 8116, sum loss: 4781.843262, avg loss: 2.786622, ppl: 16.226116 +epoch: 1, batch: 8117, sum loss: 4651.558594, avg loss: 2.679469, ppl: 14.577355 +epoch: 1, batch: 8118, sum loss: 4609.958008, avg loss: 2.555409, ppl: 12.876565 +epoch: 1, batch: 8119, sum loss: 4401.582031, avg loss: 2.936346, ppl: 18.846846 +epoch: 1, batch: 8120, sum loss: 4672.561523, avg loss: 2.697784, ppl: 14.846790 +epoch: 1, batch: 8121, sum loss: 5111.341797, avg loss: 2.866709, ppl: 17.579069 +epoch: 1, batch: 8122, sum loss: 4880.230957, avg loss: 2.744787, ppl: 15.561298 +epoch: 1, batch: 8123, sum loss: 4091.174805, avg loss: 2.648010, ppl: 14.125897 +epoch: 1, batch: 8124, sum loss: 3818.142578, avg loss: 2.490634, ppl: 12.068931 +epoch: 1, batch: 8125, sum loss: 4492.058594, avg loss: 2.719164, ppl: 15.167635 +epoch: 1, batch: 8126, sum loss: 4857.292480, avg loss: 2.770846, ppl: 15.972139 +epoch: 1, batch: 8127, sum loss: 5056.922852, avg loss: 2.848970, ppl: 17.269991 +epoch: 1, batch: 8128, sum loss: 4330.455566, avg loss: 2.533912, ppl: 12.602711 +epoch: 1, batch: 8129, sum loss: 4322.584473, avg loss: 2.428418, ppl: 11.340928 +epoch: 1, batch: 8130, sum loss: 4918.485840, avg loss: 2.846346, ppl: 17.224726 +epoch: 1, batch: 8131, sum loss: 4424.884277, avg loss: 2.779450, ppl: 16.110157 +epoch: 1, batch: 8132, sum loss: 4866.863281, avg loss: 2.829572, ppl: 16.938206 +epoch: 1, batch: 8133, sum loss: 5084.567383, avg loss: 2.880775, ppl: 17.828079 +epoch: 1, batch: 8134, sum loss: 4078.540039, avg loss: 2.433496, ppl: 11.398667 +epoch: 1, batch: 8135, sum loss: 4539.836914, avg loss: 2.710351, ppl: 15.034544 +epoch: 1, batch: 8136, sum loss: 5116.748535, avg loss: 2.890818, ppl: 18.008041 +epoch: 1, batch: 8137, sum loss: 4922.195312, avg loss: 2.912542, ppl: 18.403515 +epoch: 1, batch: 8138, sum loss: 4861.819824, avg loss: 2.698013, ppl: 14.850200 +epoch: 1, batch: 8139, sum loss: 4108.935059, avg loss: 2.699695, ppl: 14.875189 +epoch: 1, batch: 8140, sum loss: 5185.064453, avg loss: 2.979922, ppl: 19.686283 +epoch: 1, batch: 8141, sum loss: 4271.724121, avg loss: 2.733029, ppl: 15.379399 +epoch: 1, batch: 8142, sum loss: 4345.840820, avg loss: 2.603859, ppl: 13.515798 +epoch: 1, batch: 8143, sum loss: 6046.166992, avg loss: 2.752010, ppl: 15.674110 +epoch: 1, batch: 8144, sum loss: 4514.249512, avg loss: 2.779710, ppl: 16.114351 +epoch: 1, batch: 8145, sum loss: 4595.298340, avg loss: 2.651644, ppl: 14.177320 +epoch: 1, batch: 8146, sum loss: 4593.655273, avg loss: 2.672284, ppl: 14.472982 +epoch: 1, batch: 8147, sum loss: 5188.935059, avg loss: 2.635315, ppl: 13.947704 +epoch: 1, batch: 8148, sum loss: 4417.324219, avg loss: 2.613801, ppl: 13.650843 +epoch: 1, batch: 8149, sum loss: 4772.032227, avg loss: 2.867808, ppl: 17.598398 +epoch: 1, batch: 8150, sum loss: 3754.395508, avg loss: 2.651409, ppl: 14.173998 +epoch: 1, batch: 8151, sum loss: 4868.556641, avg loss: 2.892785, ppl: 18.043484 +epoch: 1, batch: 8152, sum loss: 4423.434082, avg loss: 2.647178, ppl: 14.114151 +epoch: 1, batch: 8153, sum loss: 4624.294922, avg loss: 2.614073, ppl: 13.654553 +epoch: 1, batch: 8154, sum loss: 4111.500000, avg loss: 2.548977, ppl: 12.794010 +epoch: 1, batch: 8155, sum loss: 4577.098633, avg loss: 2.642667, ppl: 14.050621 +epoch: 1, batch: 8156, sum loss: 4296.106445, avg loss: 2.461952, ppl: 11.727681 +epoch: 1, batch: 8157, sum loss: 4620.051758, avg loss: 2.751669, ppl: 15.668756 +epoch: 1, batch: 8158, sum loss: 4513.303223, avg loss: 2.601327, ppl: 13.481622 +epoch: 1, batch: 8159, sum loss: 4424.823242, avg loss: 2.814773, ppl: 16.689388 +epoch: 1, batch: 8160, sum loss: 5116.477539, avg loss: 2.770156, ppl: 15.961123 +epoch: 1, batch: 8161, sum loss: 4682.015625, avg loss: 2.706367, ppl: 14.974777 +epoch: 1, batch: 8162, sum loss: 5340.615234, avg loss: 2.957151, ppl: 19.243078 +epoch: 1, batch: 8163, sum loss: 4139.356934, avg loss: 2.759571, ppl: 15.793071 +epoch: 1, batch: 8164, sum loss: 4615.216309, avg loss: 2.850659, ppl: 17.299171 +epoch: 1, batch: 8165, sum loss: 5487.520508, avg loss: 2.961425, ppl: 19.325493 +epoch: 1, batch: 8166, sum loss: 4880.894531, avg loss: 2.656992, ppl: 14.253353 +epoch: 1, batch: 8167, sum loss: 3778.307617, avg loss: 2.579050, ppl: 13.184602 +epoch: 1, batch: 8168, sum loss: 3539.723633, avg loss: 2.521171, ppl: 12.443154 +epoch: 1, batch: 8169, sum loss: 4219.892578, avg loss: 2.733091, ppl: 15.380356 +epoch: 1, batch: 8170, sum loss: 5158.485352, avg loss: 2.837451, ppl: 17.072189 +epoch: 1, batch: 8171, sum loss: 4883.469727, avg loss: 2.867569, ppl: 17.594191 +epoch: 1, batch: 8172, sum loss: 4373.763672, avg loss: 2.571290, ppl: 13.082685 +epoch: 1, batch: 8173, sum loss: 4865.201660, avg loss: 2.892510, ppl: 18.038528 +epoch: 1, batch: 8174, sum loss: 4043.879395, avg loss: 2.488541, ppl: 12.043694 +epoch: 1, batch: 8175, sum loss: 4502.799316, avg loss: 2.805482, ppl: 16.535049 +epoch: 1, batch: 8176, sum loss: 4673.066406, avg loss: 2.699634, ppl: 14.874285 +epoch: 1, batch: 8177, sum loss: 5051.086914, avg loss: 3.114110, ppl: 22.513390 +epoch: 1, batch: 8178, sum loss: 5068.273926, avg loss: 2.863432, ppl: 17.521553 +epoch: 1, batch: 8179, sum loss: 3332.277832, avg loss: 2.302887, ppl: 10.003021 +epoch: 1, batch: 8180, sum loss: 4808.553711, avg loss: 3.005346, ppl: 20.193203 +epoch: 1, batch: 8181, sum loss: 4693.399902, avg loss: 2.723970, ppl: 15.240704 +epoch: 1, batch: 8182, sum loss: 4827.405762, avg loss: 2.752227, ppl: 15.677501 +epoch: 1, batch: 8183, sum loss: 3992.844238, avg loss: 2.457135, ppl: 11.671325 +epoch: 1, batch: 8184, sum loss: 4898.305664, avg loss: 2.694338, ppl: 14.795715 +epoch: 1, batch: 8185, sum loss: 4809.180664, avg loss: 2.551290, ppl: 12.823630 +epoch: 1, batch: 8186, sum loss: 4344.711426, avg loss: 2.710363, ppl: 15.034727 +epoch: 1, batch: 8187, sum loss: 4256.565918, avg loss: 2.716379, ppl: 15.125456 +epoch: 1, batch: 8188, sum loss: 3820.343262, avg loss: 2.475919, ppl: 11.892634 +epoch: 1, batch: 8189, sum loss: 4252.848633, avg loss: 2.734951, ppl: 15.408989 +epoch: 1, batch: 8190, sum loss: 4075.981445, avg loss: 2.609463, ppl: 13.591753 +epoch: 1, batch: 8191, sum loss: 4829.140625, avg loss: 2.716052, ppl: 15.120513 +epoch: 1, batch: 8192, sum loss: 5124.451660, avg loss: 2.582889, ppl: 13.235318 +epoch: 1, batch: 8193, sum loss: 3711.068359, avg loss: 2.505785, ppl: 12.253180 +epoch: 1, batch: 8194, sum loss: 4572.351562, avg loss: 2.805124, ppl: 16.529118 +epoch: 1, batch: 8195, sum loss: 4504.880859, avg loss: 2.741863, ppl: 15.515861 +epoch: 1, batch: 8196, sum loss: 4873.532715, avg loss: 2.904370, ppl: 18.253736 +epoch: 1, batch: 8197, sum loss: 4281.709961, avg loss: 2.612391, ppl: 13.631612 +epoch: 1, batch: 8198, sum loss: 3746.552246, avg loss: 2.421818, ppl: 11.266323 +epoch: 1, batch: 8199, sum loss: 4203.983398, avg loss: 2.736969, ppl: 15.440107 +epoch: 1, batch: 8200, sum loss: 4803.273438, avg loss: 2.981548, ppl: 19.718313 +epoch: 1, batch: 8201, sum loss: 4166.448730, avg loss: 2.586250, ppl: 13.279880 +epoch: 1, batch: 8202, sum loss: 4561.938477, avg loss: 2.715440, ppl: 15.111251 +epoch: 1, batch: 8203, sum loss: 4787.370117, avg loss: 2.639124, ppl: 14.000929 +epoch: 1, batch: 8204, sum loss: 4223.541504, avg loss: 2.497659, ppl: 12.154011 +epoch: 1, batch: 8205, sum loss: 5730.961914, avg loss: 3.050006, ppl: 21.115479 +epoch: 1, batch: 8206, sum loss: 4105.852539, avg loss: 2.520474, ppl: 12.434492 +epoch: 1, batch: 8207, sum loss: 4550.620117, avg loss: 2.771389, ppl: 15.980812 +epoch: 1, batch: 8208, sum loss: 4719.583984, avg loss: 2.817662, ppl: 16.737677 +epoch: 1, batch: 8209, sum loss: 3958.069336, avg loss: 2.631695, ppl: 13.897306 +epoch: 1, batch: 8210, sum loss: 3954.762695, avg loss: 2.553107, ppl: 12.846957 +epoch: 1, batch: 8211, sum loss: 5378.838379, avg loss: 2.798563, ppl: 16.421038 +epoch: 1, batch: 8212, sum loss: 3550.869141, avg loss: 2.370407, ppl: 10.701743 +epoch: 1, batch: 8213, sum loss: 4443.383301, avg loss: 2.780590, ppl: 16.128538 +epoch: 1, batch: 8214, sum loss: 4592.954590, avg loss: 2.809146, ppl: 16.595747 +epoch: 1, batch: 8215, sum loss: 3740.208008, avg loss: 2.641390, ppl: 14.032693 +epoch: 1, batch: 8216, sum loss: 5690.090820, avg loss: 2.889838, ppl: 17.990395 +epoch: 1, batch: 8217, sum loss: 4396.332520, avg loss: 2.727253, ppl: 15.290833 +epoch: 1, batch: 8218, sum loss: 4812.779297, avg loss: 2.764376, ppl: 15.869141 +epoch: 1, batch: 8219, sum loss: 4042.719482, avg loss: 2.576622, ppl: 13.152631 +epoch: 1, batch: 8220, sum loss: 4602.487305, avg loss: 2.787697, ppl: 16.243565 +epoch: 1, batch: 8221, sum loss: 5207.166992, avg loss: 2.867383, ppl: 17.590914 +epoch: 1, batch: 8222, sum loss: 3905.539551, avg loss: 2.431843, ppl: 11.379833 +epoch: 1, batch: 8223, sum loss: 4992.136719, avg loss: 2.738419, ppl: 15.462513 +epoch: 1, batch: 8224, sum loss: 4749.681152, avg loss: 2.764657, ppl: 15.873598 +epoch: 1, batch: 8225, sum loss: 4311.459473, avg loss: 2.616177, ppl: 13.683310 +epoch: 1, batch: 8226, sum loss: 4078.293945, avg loss: 2.522136, ppl: 12.455175 +epoch: 1, batch: 8227, sum loss: 4554.013672, avg loss: 2.688320, ppl: 14.706943 +epoch: 1, batch: 8228, sum loss: 4878.527344, avg loss: 2.881588, ppl: 17.842575 +epoch: 1, batch: 8229, sum loss: 5344.500977, avg loss: 2.914123, ppl: 18.432632 +epoch: 1, batch: 8230, sum loss: 4399.616211, avg loss: 2.677794, ppl: 14.552958 +epoch: 1, batch: 8231, sum loss: 3519.517578, avg loss: 2.607050, ppl: 13.558992 +epoch: 1, batch: 8232, sum loss: 4745.703125, avg loss: 2.754326, ppl: 15.710443 +epoch: 1, batch: 8233, sum loss: 4193.180664, avg loss: 2.657275, ppl: 14.257391 +epoch: 1, batch: 8234, sum loss: 4893.555664, avg loss: 2.965791, ppl: 19.410055 +epoch: 1, batch: 8235, sum loss: 5616.531250, avg loss: 2.990698, ppl: 19.899570 +epoch: 1, batch: 8236, sum loss: 4373.055176, avg loss: 2.694427, ppl: 14.797038 +epoch: 1, batch: 8237, sum loss: 4392.662109, avg loss: 2.611571, ppl: 13.620429 +epoch: 1, batch: 8238, sum loss: 5686.041016, avg loss: 3.249166, ppl: 25.768847 +epoch: 1, batch: 8239, sum loss: 4785.604004, avg loss: 2.550962, ppl: 12.819427 +epoch: 1, batch: 8240, sum loss: 5130.708984, avg loss: 2.734920, ppl: 15.408504 +epoch: 1, batch: 8241, sum loss: 4512.906738, avg loss: 2.849057, ppl: 17.271490 +epoch: 1, batch: 8242, sum loss: 5372.163574, avg loss: 2.914901, ppl: 18.446995 +epoch: 1, batch: 8243, sum loss: 4386.780762, avg loss: 2.577427, ppl: 13.163224 +epoch: 1, batch: 8244, sum loss: 4425.427734, avg loss: 2.603193, ppl: 13.506797 +epoch: 1, batch: 8245, sum loss: 4258.176758, avg loss: 2.715674, ppl: 15.114793 +epoch: 1, batch: 8246, sum loss: 4672.132324, avg loss: 2.494465, ppl: 12.115246 +epoch: 1, batch: 8247, sum loss: 4763.299805, avg loss: 2.785555, ppl: 16.208817 +epoch: 1, batch: 8248, sum loss: 5777.757812, avg loss: 2.763155, ppl: 15.849777 +epoch: 1, batch: 8249, sum loss: 4505.813965, avg loss: 2.828508, ppl: 16.920204 +epoch: 1, batch: 8250, sum loss: 5469.738281, avg loss: 2.900180, ppl: 18.177410 +epoch: 1, batch: 8251, sum loss: 5122.807129, avg loss: 3.086028, ppl: 21.889965 +epoch: 1, batch: 8252, sum loss: 3793.224609, avg loss: 2.356040, ppl: 10.549094 +epoch: 1, batch: 8253, sum loss: 4573.161133, avg loss: 2.797040, ppl: 16.396049 +epoch: 1, batch: 8254, sum loss: 4341.217773, avg loss: 2.618346, ppl: 13.713023 +epoch: 1, batch: 8255, sum loss: 4443.832031, avg loss: 2.618640, ppl: 13.717055 +epoch: 1, batch: 8256, sum loss: 3538.361572, avg loss: 2.314167, ppl: 10.116495 +epoch: 1, batch: 8257, sum loss: 5299.638672, avg loss: 2.783424, ppl: 16.174305 +epoch: 1, batch: 8258, sum loss: 4222.003906, avg loss: 2.467565, ppl: 11.793697 +epoch: 1, batch: 8259, sum loss: 4052.763916, avg loss: 2.725463, ppl: 15.263481 +epoch: 1, batch: 8260, sum loss: 4867.243164, avg loss: 2.914517, ppl: 18.439903 +epoch: 1, batch: 8261, sum loss: 4701.337402, avg loss: 2.650134, ppl: 14.155930 +epoch: 1, batch: 8262, sum loss: 4278.451660, avg loss: 2.888894, ppl: 17.973417 +epoch: 1, batch: 8263, sum loss: 4548.901855, avg loss: 2.723893, ppl: 15.239541 +epoch: 1, batch: 8264, sum loss: 4224.788086, avg loss: 2.548123, ppl: 12.783089 +epoch: 1, batch: 8265, sum loss: 4001.629883, avg loss: 2.568440, ppl: 13.045461 +epoch: 1, batch: 8266, sum loss: 4478.078125, avg loss: 2.740562, ppl: 15.495690 +epoch: 1, batch: 8267, sum loss: 4229.155273, avg loss: 2.566235, ppl: 13.016725 +epoch: 1, batch: 8268, sum loss: 4164.319824, avg loss: 2.750542, ppl: 15.651108 +epoch: 1, batch: 8269, sum loss: 4570.927246, avg loss: 2.737082, ppl: 15.441864 +epoch: 1, batch: 8270, sum loss: 4780.775879, avg loss: 2.779521, ppl: 16.111298 +epoch: 1, batch: 8271, sum loss: 4295.556641, avg loss: 2.649942, ppl: 14.153223 +epoch: 1, batch: 8272, sum loss: 3721.377930, avg loss: 2.485890, ppl: 12.011810 +epoch: 1, batch: 8273, sum loss: 5046.117676, avg loss: 2.893416, ppl: 18.054878 +epoch: 1, batch: 8274, sum loss: 5136.544434, avg loss: 2.847308, ppl: 17.241312 +epoch: 1, batch: 8275, sum loss: 4304.342285, avg loss: 2.807790, ppl: 16.573256 +epoch: 1, batch: 8276, sum loss: 4120.988281, avg loss: 2.767621, ppl: 15.920718 +epoch: 1, batch: 8277, sum loss: 5924.654297, avg loss: 2.986217, ppl: 19.810593 +epoch: 1, batch: 8278, sum loss: 5047.068359, avg loss: 2.744464, ppl: 15.556269 +epoch: 1, batch: 8279, sum loss: 4725.464844, avg loss: 2.802767, ppl: 16.490210 +epoch: 1, batch: 8280, sum loss: 3708.690674, avg loss: 2.431928, ppl: 11.380808 +epoch: 1, batch: 8281, sum loss: 3782.154053, avg loss: 2.707340, ppl: 14.989351 +epoch: 1, batch: 8282, sum loss: 5433.150391, avg loss: 2.903875, ppl: 18.244713 +epoch: 1, batch: 8283, sum loss: 5316.180664, avg loss: 2.578167, ppl: 13.172973 +epoch: 1, batch: 8284, sum loss: 4608.994141, avg loss: 2.793330, ppl: 16.335321 +epoch: 1, batch: 8285, sum loss: 4014.708496, avg loss: 2.368560, ppl: 10.681994 +epoch: 1, batch: 8286, sum loss: 4461.103516, avg loss: 2.507647, ppl: 12.276005 +epoch: 1, batch: 8287, sum loss: 4904.107422, avg loss: 2.844610, ppl: 17.194851 +epoch: 1, batch: 8288, sum loss: 4982.443848, avg loss: 2.965740, ppl: 19.409069 +epoch: 1, batch: 8289, sum loss: 4713.137695, avg loss: 2.770804, ppl: 15.971469 +epoch: 1, batch: 8290, sum loss: 5147.931641, avg loss: 2.929955, ppl: 18.726793 +epoch: 1, batch: 8291, sum loss: 4024.143799, avg loss: 2.540495, ppl: 12.685948 +epoch: 1, batch: 8292, sum loss: 4953.725098, avg loss: 2.742926, ppl: 15.532372 +epoch: 1, batch: 8293, sum loss: 4327.285645, avg loss: 2.745739, ppl: 15.576128 +epoch: 1, batch: 8294, sum loss: 4232.737305, avg loss: 2.859958, ppl: 17.460789 +epoch: 1, batch: 8295, sum loss: 4377.639648, avg loss: 2.656335, ppl: 14.243987 +epoch: 1, batch: 8296, sum loss: 3909.861572, avg loss: 2.709537, ppl: 15.022315 +epoch: 1, batch: 8297, sum loss: 4424.852051, avg loss: 2.607456, ppl: 13.564495 +epoch: 1, batch: 8298, sum loss: 5606.528809, avg loss: 2.918547, ppl: 18.514370 +epoch: 1, batch: 8299, sum loss: 4819.600586, avg loss: 2.926291, ppl: 18.658289 +epoch: 1, batch: 8300, sum loss: 5118.446289, avg loss: 2.718240, ppl: 15.153628 +epoch: 1, batch: 8301, sum loss: 5495.331543, avg loss: 2.932407, ppl: 18.772770 +epoch: 1, batch: 8302, sum loss: 3774.893311, avg loss: 2.498275, ppl: 12.161495 +epoch: 1, batch: 8303, sum loss: 4045.976562, avg loss: 2.554278, ppl: 12.862012 +epoch: 1, batch: 8304, sum loss: 4502.807617, avg loss: 2.633221, ppl: 13.918529 +epoch: 1, batch: 8305, sum loss: 4997.258789, avg loss: 2.798017, ppl: 16.412073 +epoch: 1, batch: 8306, sum loss: 4245.026855, avg loss: 2.968550, ppl: 19.463686 +epoch: 1, batch: 8307, sum loss: 4562.426270, avg loss: 2.668086, ppl: 14.412352 +epoch: 1, batch: 8308, sum loss: 4671.664062, avg loss: 2.663435, ppl: 14.345474 +epoch: 1, batch: 8309, sum loss: 4303.383301, avg loss: 2.805335, ppl: 16.532606 +epoch: 1, batch: 8310, sum loss: 4141.158691, avg loss: 2.601230, ppl: 13.480314 +epoch: 1, batch: 8311, sum loss: 5714.667969, avg loss: 2.834657, ppl: 17.024559 +epoch: 1, batch: 8312, sum loss: 4927.563477, avg loss: 2.656368, ppl: 14.244466 +epoch: 1, batch: 8313, sum loss: 4399.318359, avg loss: 2.589358, ppl: 13.321212 +epoch: 1, batch: 8314, sum loss: 4314.862305, avg loss: 2.658572, ppl: 14.275888 +epoch: 1, batch: 8315, sum loss: 4456.723633, avg loss: 2.652812, ppl: 14.193892 +epoch: 1, batch: 8316, sum loss: 5348.187988, avg loss: 2.943417, ppl: 18.980585 +epoch: 1, batch: 8317, sum loss: 5410.326172, avg loss: 2.972707, ppl: 19.544752 +epoch: 1, batch: 8318, sum loss: 4387.999023, avg loss: 2.794904, ppl: 16.361057 +epoch: 1, batch: 8319, sum loss: 5384.083984, avg loss: 2.826291, ppl: 16.882725 +epoch: 1, batch: 8320, sum loss: 4374.223145, avg loss: 2.696808, ppl: 14.832316 +epoch: 1, batch: 8321, sum loss: 4259.328613, avg loss: 2.617903, ppl: 13.706953 +epoch: 1, batch: 8322, sum loss: 4575.079590, avg loss: 2.720023, ppl: 15.180677 +epoch: 1, batch: 8323, sum loss: 3992.769043, avg loss: 2.445051, ppl: 11.531142 +epoch: 1, batch: 8324, sum loss: 4152.025879, avg loss: 2.729800, ppl: 15.329824 +epoch: 1, batch: 8325, sum loss: 3444.174316, avg loss: 2.461883, ppl: 11.726873 +epoch: 1, batch: 8326, sum loss: 4281.100586, avg loss: 2.496269, ppl: 12.137120 +epoch: 1, batch: 8327, sum loss: 4773.138672, avg loss: 2.684555, ppl: 14.651681 +epoch: 1, batch: 8328, sum loss: 3943.819824, avg loss: 2.577660, ppl: 13.166294 +epoch: 1, batch: 8329, sum loss: 4819.214844, avg loss: 2.614875, ppl: 13.665509 +epoch: 1, batch: 8330, sum loss: 4054.545410, avg loss: 2.619215, ppl: 13.724952 +epoch: 1, batch: 8331, sum loss: 4015.223877, avg loss: 2.401450, ppl: 11.039170 +epoch: 1, batch: 8332, sum loss: 5079.075684, avg loss: 2.874406, ppl: 17.714905 +epoch: 1, batch: 8333, sum loss: 4312.373047, avg loss: 2.700296, ppl: 14.884139 +epoch: 1, batch: 8334, sum loss: 4449.894043, avg loss: 2.889542, ppl: 17.985064 +epoch: 1, batch: 8335, sum loss: 4596.134766, avg loss: 2.705200, ppl: 14.957311 +epoch: 1, batch: 8336, sum loss: 4938.978027, avg loss: 2.793540, ppl: 16.338753 +epoch: 1, batch: 8337, sum loss: 4100.721191, avg loss: 2.593751, ppl: 13.379871 +epoch: 1, batch: 8338, sum loss: 3848.775879, avg loss: 2.384619, ppl: 10.854931 +epoch: 1, batch: 8339, sum loss: 4217.016602, avg loss: 2.607926, ppl: 13.570881 +epoch: 1, batch: 8340, sum loss: 4451.688477, avg loss: 2.594224, ppl: 13.386198 +epoch: 1, batch: 8341, sum loss: 4008.116943, avg loss: 2.675645, ppl: 14.521718 +epoch: 1, batch: 8342, sum loss: 5772.138672, avg loss: 3.086705, ppl: 21.904787 +epoch: 1, batch: 8343, sum loss: 5211.538574, avg loss: 2.869790, ppl: 17.633312 +epoch: 1, batch: 8344, sum loss: 4524.505371, avg loss: 2.757164, ppl: 15.755091 +epoch: 1, batch: 8345, sum loss: 4239.743164, avg loss: 2.586787, ppl: 13.287005 +epoch: 1, batch: 8346, sum loss: 4914.870117, avg loss: 2.834412, ppl: 17.020384 +epoch: 1, batch: 8347, sum loss: 4907.396484, avg loss: 2.794645, ppl: 16.356823 +epoch: 1, batch: 8348, sum loss: 4685.081543, avg loss: 2.584160, ppl: 13.252151 +epoch: 1, batch: 8349, sum loss: 4446.163086, avg loss: 2.684881, ppl: 14.656457 +epoch: 1, batch: 8350, sum loss: 4311.130371, avg loss: 2.772431, ppl: 15.997482 +epoch: 1, batch: 8351, sum loss: 5232.025391, avg loss: 3.061454, ppl: 21.358597 +epoch: 1, batch: 8352, sum loss: 4674.793945, avg loss: 2.814445, ppl: 16.683922 +epoch: 1, batch: 8353, sum loss: 3804.769531, avg loss: 2.709950, ppl: 15.028520 +epoch: 1, batch: 8354, sum loss: 3589.196777, avg loss: 2.404016, ppl: 11.067540 +epoch: 1, batch: 8355, sum loss: 4751.414062, avg loss: 2.845158, ppl: 17.204279 +epoch: 1, batch: 8356, sum loss: 5137.685059, avg loss: 2.881484, ppl: 17.840721 +epoch: 1, batch: 8357, sum loss: 4703.121582, avg loss: 2.769801, ppl: 15.955457 +epoch: 1, batch: 8358, sum loss: 3782.400879, avg loss: 2.533423, ppl: 12.596553 +epoch: 1, batch: 8359, sum loss: 5329.690430, avg loss: 3.004335, ppl: 20.172794 +epoch: 1, batch: 8360, sum loss: 4904.843750, avg loss: 3.069364, ppl: 21.528208 +epoch: 1, batch: 8361, sum loss: 5029.783203, avg loss: 3.135775, ppl: 23.006460 +epoch: 1, batch: 8362, sum loss: 4176.682129, avg loss: 2.972728, ppl: 19.545156 +epoch: 1, batch: 8363, sum loss: 5629.700195, avg loss: 3.015372, ppl: 20.396683 +epoch: 1, batch: 8364, sum loss: 3996.299316, avg loss: 2.483716, ppl: 11.985723 +epoch: 1, batch: 8365, sum loss: 4745.989258, avg loss: 2.808278, ppl: 16.581333 +epoch: 1, batch: 8366, sum loss: 5055.717285, avg loss: 3.060362, ppl: 21.335272 +epoch: 1, batch: 8367, sum loss: 4147.450684, avg loss: 2.708982, ppl: 15.013980 +epoch: 1, batch: 8368, sum loss: 3660.721680, avg loss: 2.375550, ppl: 10.756923 +epoch: 1, batch: 8369, sum loss: 4700.108887, avg loss: 2.742187, ppl: 15.520896 +epoch: 1, batch: 8370, sum loss: 4940.308105, avg loss: 3.025296, ppl: 20.600096 +epoch: 1, batch: 8371, sum loss: 5178.547363, avg loss: 2.902773, ppl: 18.224619 +epoch: 1, batch: 8372, sum loss: 3922.420410, avg loss: 2.499949, ppl: 12.181875 +epoch: 1, batch: 8373, sum loss: 4842.379395, avg loss: 2.987279, ppl: 19.831646 +epoch: 1, batch: 8374, sum loss: 5126.518066, avg loss: 2.751754, ppl: 15.670094 +epoch: 1, batch: 8375, sum loss: 4054.760010, avg loss: 2.571186, ppl: 13.081327 +epoch: 1, batch: 8376, sum loss: 5176.918945, avg loss: 2.860176, ppl: 17.464605 +epoch: 1, batch: 8377, sum loss: 5342.423340, avg loss: 2.783962, ppl: 16.183012 +epoch: 1, batch: 8378, sum loss: 5538.089355, avg loss: 2.821238, ppl: 16.797626 +epoch: 1, batch: 8379, sum loss: 5312.031250, avg loss: 2.921909, ppl: 18.576723 +epoch: 1, batch: 8380, sum loss: 4470.404297, avg loss: 2.801005, ppl: 16.461184 +epoch: 1, batch: 8381, sum loss: 4284.246582, avg loss: 2.713266, ppl: 15.078436 +epoch: 1, batch: 8382, sum loss: 4123.571777, avg loss: 2.546987, ppl: 12.768575 +epoch: 1, batch: 8383, sum loss: 5208.694824, avg loss: 2.966227, ppl: 19.418520 +epoch: 1, batch: 8384, sum loss: 5346.239746, avg loss: 2.983393, ppl: 19.754730 +epoch: 1, batch: 8385, sum loss: 5136.531738, avg loss: 2.797675, ppl: 16.406460 +epoch: 1, batch: 8386, sum loss: 3290.861084, avg loss: 2.461377, ppl: 11.720942 +epoch: 1, batch: 8387, sum loss: 5599.433594, avg loss: 2.726112, ppl: 15.273387 +epoch: 1, batch: 8388, sum loss: 4073.067383, avg loss: 2.550449, ppl: 12.812860 +epoch: 1, batch: 8389, sum loss: 4465.783203, avg loss: 2.805140, ppl: 16.529394 +epoch: 1, batch: 8390, sum loss: 4821.888672, avg loss: 2.885631, ppl: 17.914865 +epoch: 1, batch: 8391, sum loss: 4498.423828, avg loss: 2.666523, ppl: 14.389845 +epoch: 1, batch: 8392, sum loss: 5090.550781, avg loss: 2.920568, ppl: 18.551830 +epoch: 1, batch: 8393, sum loss: 5746.724609, avg loss: 3.019824, ppl: 20.487680 +epoch: 1, batch: 8394, sum loss: 4336.798340, avg loss: 2.595331, ppl: 13.401025 +epoch: 1, batch: 8395, sum loss: 4848.480469, avg loss: 2.659616, ppl: 14.290807 +epoch: 1, batch: 8396, sum loss: 4170.840820, avg loss: 2.694342, ppl: 14.795774 +epoch: 1, batch: 8397, sum loss: 5643.011719, avg loss: 3.051926, ppl: 21.156055 +epoch: 1, batch: 8398, sum loss: 5042.448242, avg loss: 2.599200, ppl: 13.452974 +epoch: 1, batch: 8399, sum loss: 4713.201172, avg loss: 2.813852, ppl: 16.674017 +epoch: 1, batch: 8400, sum loss: 5648.073242, avg loss: 2.842513, ppl: 17.158829 +epoch: 1, batch: 8401, sum loss: 4960.062988, avg loss: 2.659551, ppl: 14.289873 +epoch: 1, batch: 8402, sum loss: 4241.179688, avg loss: 2.697952, ppl: 14.849282 +epoch: 1, batch: 8403, sum loss: 5937.323242, avg loss: 2.850371, ppl: 17.294203 +epoch: 1, batch: 8404, sum loss: 4763.668457, avg loss: 2.736168, ppl: 15.427752 +epoch: 1, batch: 8405, sum loss: 5428.287109, avg loss: 2.814042, ppl: 16.677193 +epoch: 1, batch: 8406, sum loss: 4535.241699, avg loss: 2.846982, ppl: 17.235678 +epoch: 1, batch: 8407, sum loss: 5123.723633, avg loss: 2.973722, ppl: 19.564613 +epoch: 1, batch: 8408, sum loss: 4983.610352, avg loss: 2.846151, ppl: 17.221371 +epoch: 1, batch: 8409, sum loss: 4649.552246, avg loss: 2.659927, ppl: 14.295247 +epoch: 1, batch: 8410, sum loss: 4640.064453, avg loss: 2.648439, ppl: 14.131957 +epoch: 1, batch: 8411, sum loss: 4895.518555, avg loss: 2.959806, ppl: 19.294224 +epoch: 1, batch: 8412, sum loss: 5018.582031, avg loss: 2.946907, ppl: 19.046942 +epoch: 1, batch: 8413, sum loss: 4865.672852, avg loss: 2.783566, ppl: 16.176600 +epoch: 1, batch: 8414, sum loss: 4493.070312, avg loss: 2.783811, ppl: 16.180561 +epoch: 1, batch: 8415, sum loss: 3683.358398, avg loss: 2.554340, ppl: 12.862809 +epoch: 1, batch: 8416, sum loss: 4619.606445, avg loss: 2.730264, ppl: 15.336935 +epoch: 1, batch: 8417, sum loss: 5174.158203, avg loss: 2.853921, ppl: 17.355700 +epoch: 1, batch: 8418, sum loss: 4246.082520, avg loss: 2.629153, ppl: 13.862027 +epoch: 1, batch: 8419, sum loss: 4562.876465, avg loss: 2.728993, ppl: 15.317457 +epoch: 1, batch: 8420, sum loss: 5095.404297, avg loss: 2.829208, ppl: 16.932053 +epoch: 1, batch: 8421, sum loss: 4265.090332, avg loss: 2.767742, ppl: 15.922639 +epoch: 1, batch: 8422, sum loss: 4747.312500, avg loss: 2.753662, ppl: 15.700014 +epoch: 1, batch: 8423, sum loss: 3797.287598, avg loss: 2.516427, ppl: 12.384263 +epoch: 1, batch: 8424, sum loss: 4042.429688, avg loss: 2.347520, ppl: 10.459599 +epoch: 1, batch: 8425, sum loss: 4753.254883, avg loss: 2.661397, ppl: 14.316275 +epoch: 1, batch: 8426, sum loss: 4209.284180, avg loss: 2.782078, ppl: 16.152555 +epoch: 1, batch: 8427, sum loss: 4476.654785, avg loss: 2.729668, ppl: 15.327792 +epoch: 1, batch: 8428, sum loss: 3706.679199, avg loss: 2.509600, ppl: 12.300011 +epoch: 1, batch: 8429, sum loss: 4852.818359, avg loss: 2.939321, ppl: 18.903004 +epoch: 1, batch: 8430, sum loss: 4599.613770, avg loss: 2.720056, ppl: 15.181166 +epoch: 1, batch: 8431, sum loss: 4521.428711, avg loss: 2.730331, ppl: 15.337970 +epoch: 1, batch: 8432, sum loss: 4987.423340, avg loss: 2.758531, ppl: 15.776644 +epoch: 1, batch: 8433, sum loss: 4309.241211, avg loss: 2.610080, ppl: 13.600132 +epoch: 1, batch: 8434, sum loss: 4794.605957, avg loss: 2.860744, ppl: 17.474514 +epoch: 1, batch: 8435, sum loss: 3890.143799, avg loss: 2.500093, ppl: 12.183621 +epoch: 1, batch: 8436, sum loss: 4318.380371, avg loss: 2.637985, ppl: 13.984988 +epoch: 1, batch: 8437, sum loss: 5669.776855, avg loss: 3.014235, ppl: 20.373508 +epoch: 1, batch: 8438, sum loss: 3946.840576, avg loss: 2.641794, ppl: 14.038369 +epoch: 1, batch: 8439, sum loss: 5362.302246, avg loss: 2.819297, ppl: 16.765059 +epoch: 1, batch: 8440, sum loss: 4205.344238, avg loss: 2.313171, ppl: 10.106418 +epoch: 1, batch: 8441, sum loss: 4826.637695, avg loss: 2.690434, ppl: 14.738064 +epoch: 1, batch: 8442, sum loss: 5417.750000, avg loss: 2.950844, ppl: 19.122087 +epoch: 1, batch: 8443, sum loss: 4377.679688, avg loss: 2.508699, ppl: 12.288931 +epoch: 1, batch: 8444, sum loss: 4621.971191, avg loss: 2.709244, ppl: 15.017925 +epoch: 1, batch: 8445, sum loss: 4946.105469, avg loss: 2.770928, ppl: 15.973445 +epoch: 1, batch: 8446, sum loss: 4091.089844, avg loss: 2.642823, ppl: 14.052818 +epoch: 1, batch: 8447, sum loss: 4329.163086, avg loss: 2.759186, ppl: 15.786987 +epoch: 1, batch: 8448, sum loss: 5156.539062, avg loss: 2.667635, ppl: 14.405866 +epoch: 1, batch: 8449, sum loss: 4921.203125, avg loss: 2.775636, ppl: 16.048838 +epoch: 1, batch: 8450, sum loss: 3600.446777, avg loss: 2.155956, ppl: 8.636145 +epoch: 1, batch: 8451, sum loss: 4378.487793, avg loss: 2.620280, ppl: 13.739571 +epoch: 1, batch: 8452, sum loss: 4214.704102, avg loss: 2.657443, ppl: 14.259777 +epoch: 1, batch: 8453, sum loss: 3932.734375, avg loss: 2.553724, ppl: 12.854881 +epoch: 1, batch: 8454, sum loss: 4325.847656, avg loss: 2.532698, ppl: 12.587418 +epoch: 1, batch: 8455, sum loss: 4427.745117, avg loss: 2.654524, ppl: 14.218211 +epoch: 1, batch: 8456, sum loss: 3812.125000, avg loss: 2.399072, ppl: 11.012948 +epoch: 1, batch: 8457, sum loss: 4713.989258, avg loss: 2.821059, ppl: 16.794626 +epoch: 1, batch: 8458, sum loss: 4633.086426, avg loss: 2.726949, ppl: 15.286180 +epoch: 1, batch: 8459, sum loss: 5368.812012, avg loss: 3.034942, ppl: 20.799770 +epoch: 1, batch: 8460, sum loss: 5386.297363, avg loss: 2.895859, ppl: 18.099037 +epoch: 1, batch: 8461, sum loss: 5403.727051, avg loss: 3.261151, ppl: 26.079533 +epoch: 1, batch: 8462, sum loss: 4791.078613, avg loss: 2.767810, ppl: 15.923717 +epoch: 1, batch: 8463, sum loss: 5497.326660, avg loss: 2.955552, ppl: 19.212326 +epoch: 1, batch: 8464, sum loss: 4631.198730, avg loss: 2.959232, ppl: 19.283163 +epoch: 1, batch: 8465, sum loss: 4595.431152, avg loss: 2.838438, ppl: 17.089052 +epoch: 1, batch: 8466, sum loss: 4443.027344, avg loss: 2.864621, ppl: 17.542402 +epoch: 1, batch: 8467, sum loss: 5509.272461, avg loss: 2.822373, ppl: 16.816715 +epoch: 1, batch: 8468, sum loss: 4176.404297, avg loss: 2.701426, ppl: 14.900962 +epoch: 1, batch: 8469, sum loss: 4673.041016, avg loss: 2.842482, ppl: 17.158306 +epoch: 1, batch: 8470, sum loss: 4450.880859, avg loss: 2.663603, ppl: 14.347896 +epoch: 1, batch: 8471, sum loss: 4870.727539, avg loss: 2.673286, ppl: 14.487500 +epoch: 1, batch: 8472, sum loss: 5591.057129, avg loss: 2.931860, ppl: 18.762495 +epoch: 1, batch: 8473, sum loss: 4594.061035, avg loss: 2.785968, ppl: 16.215504 +epoch: 1, batch: 8474, sum loss: 4533.806641, avg loss: 2.430995, ppl: 11.370195 +epoch: 1, batch: 8475, sum loss: 5319.192871, avg loss: 2.951828, ppl: 19.140902 +epoch: 1, batch: 8476, sum loss: 4502.691406, avg loss: 2.653324, ppl: 14.201170 +epoch: 1, batch: 8477, sum loss: 5045.522461, avg loss: 2.779902, ppl: 16.117445 +epoch: 1, batch: 8478, sum loss: 4958.301758, avg loss: 2.946109, ppl: 19.031757 +epoch: 1, batch: 8479, sum loss: 4949.290039, avg loss: 2.812097, ppl: 16.644779 +epoch: 1, batch: 8480, sum loss: 3647.458984, avg loss: 2.292557, ppl: 9.900225 +epoch: 1, batch: 8481, sum loss: 4056.354492, avg loss: 2.700635, ppl: 14.889190 +epoch: 1, batch: 8482, sum loss: 4797.467285, avg loss: 2.766705, ppl: 15.906141 +epoch: 1, batch: 8483, sum loss: 4614.791504, avg loss: 2.720986, ppl: 15.195292 +epoch: 1, batch: 8484, sum loss: 5099.044922, avg loss: 2.679477, ppl: 14.577467 +epoch: 1, batch: 8485, sum loss: 4107.318359, avg loss: 2.648174, ppl: 14.128221 +epoch: 1, batch: 8486, sum loss: 4688.883789, avg loss: 2.589113, ppl: 13.317957 +epoch: 1, batch: 8487, sum loss: 5431.522461, avg loss: 2.793993, ppl: 16.346163 +epoch: 1, batch: 8488, sum loss: 5985.632812, avg loss: 3.094950, ppl: 22.086128 +epoch: 1, batch: 8489, sum loss: 4986.015625, avg loss: 2.695144, ppl: 14.807647 +epoch: 1, batch: 8490, sum loss: 5320.314453, avg loss: 3.012635, ppl: 20.340937 +epoch: 1, batch: 8491, sum loss: 4928.118164, avg loss: 2.658100, ppl: 14.269157 +epoch: 1, batch: 8492, sum loss: 4499.765625, avg loss: 2.891880, ppl: 18.027174 +epoch: 1, batch: 8493, sum loss: 5166.865723, avg loss: 3.055509, ppl: 21.231993 +epoch: 1, batch: 8494, sum loss: 5311.618652, avg loss: 2.937842, ppl: 18.875072 +epoch: 1, batch: 8495, sum loss: 5262.157715, avg loss: 2.772475, ppl: 15.998184 +epoch: 1, batch: 8496, sum loss: 3653.023193, avg loss: 2.461606, ppl: 11.723622 +epoch: 1, batch: 8497, sum loss: 4500.791504, avg loss: 2.598609, ppl: 13.445029 +epoch: 1, batch: 8498, sum loss: 5291.425293, avg loss: 3.013340, ppl: 20.355278 +epoch: 1, batch: 8499, sum loss: 4364.037598, avg loss: 2.719027, ppl: 15.165552 +epoch: 1, batch: 8500, sum loss: 4893.150879, avg loss: 2.712390, ppl: 15.065234 +epoch: 1, batch: 8501, sum loss: 4373.659668, avg loss: 2.658760, ppl: 14.278567 +epoch: 1, batch: 8502, sum loss: 4590.028809, avg loss: 2.418350, ppl: 11.227324 +epoch: 1, batch: 8503, sum loss: 4731.399902, avg loss: 2.703657, ppl: 14.934249 +epoch: 1, batch: 8504, sum loss: 4348.392578, avg loss: 2.821799, ppl: 16.807064 +epoch: 1, batch: 8505, sum loss: 4211.125488, avg loss: 2.604283, ppl: 13.521528 +epoch: 1, batch: 8506, sum loss: 4691.444336, avg loss: 2.963641, ppl: 19.368366 +epoch: 1, batch: 8507, sum loss: 4204.020020, avg loss: 2.696613, ppl: 14.829424 +epoch: 1, batch: 8508, sum loss: 5041.394043, avg loss: 3.035156, ppl: 20.804224 +epoch: 1, batch: 8509, sum loss: 4336.081055, avg loss: 2.720252, ppl: 15.184141 +epoch: 1, batch: 8510, sum loss: 4627.673828, avg loss: 2.738269, ppl: 15.460195 +epoch: 1, batch: 8511, sum loss: 4449.673828, avg loss: 2.881913, ppl: 17.848389 +epoch: 1, batch: 8512, sum loss: 4376.026367, avg loss: 2.681389, ppl: 14.605364 +epoch: 1, batch: 8513, sum loss: 3801.026855, avg loss: 2.549314, ppl: 12.798322 +epoch: 1, batch: 8514, sum loss: 5278.778809, avg loss: 2.892482, ppl: 18.038017 +epoch: 1, batch: 8515, sum loss: 4988.983887, avg loss: 2.919242, ppl: 18.527233 +epoch: 1, batch: 8516, sum loss: 4618.901367, avg loss: 2.718600, ppl: 15.159085 +epoch: 1, batch: 8517, sum loss: 4604.244629, avg loss: 2.414392, ppl: 11.182963 +epoch: 1, batch: 8518, sum loss: 4402.754395, avg loss: 2.806089, ppl: 16.545090 +epoch: 1, batch: 8519, sum loss: 4754.253906, avg loss: 2.843453, ppl: 17.174976 +epoch: 1, batch: 8520, sum loss: 4716.864258, avg loss: 2.601690, ppl: 13.486515 +epoch: 1, batch: 8521, sum loss: 4645.025879, avg loss: 2.689650, ppl: 14.726525 +epoch: 1, batch: 8522, sum loss: 5300.970215, avg loss: 2.909424, ppl: 18.346226 +epoch: 1, batch: 8523, sum loss: 4109.948730, avg loss: 2.729050, ppl: 15.318323 +epoch: 1, batch: 8524, sum loss: 5487.700684, avg loss: 2.905082, ppl: 18.266750 +epoch: 1, batch: 8525, sum loss: 4936.914551, avg loss: 2.798704, ppl: 16.423355 +epoch: 1, batch: 8526, sum loss: 4459.881348, avg loss: 2.492946, ppl: 12.096866 +epoch: 1, batch: 8527, sum loss: 5281.918457, avg loss: 2.772660, ppl: 16.001148 +epoch: 1, batch: 8528, sum loss: 4451.807129, avg loss: 2.765098, ppl: 15.880590 +epoch: 1, batch: 8529, sum loss: 4845.326660, avg loss: 2.678456, ppl: 14.562593 +epoch: 1, batch: 8530, sum loss: 4752.463867, avg loss: 2.798860, ppl: 16.425907 +epoch: 1, batch: 8531, sum loss: 4585.876953, avg loss: 2.784382, ppl: 16.189808 +epoch: 1, batch: 8532, sum loss: 4493.570312, avg loss: 2.677932, ppl: 14.554966 +epoch: 1, batch: 8533, sum loss: 4577.401855, avg loss: 2.887951, ppl: 17.956472 +epoch: 1, batch: 8534, sum loss: 4382.900391, avg loss: 2.947478, ppl: 19.057835 +epoch: 1, batch: 8535, sum loss: 4464.883789, avg loss: 3.018853, ppl: 20.467800 +epoch: 1, batch: 8536, sum loss: 4541.669434, avg loss: 2.836771, ppl: 17.060579 +epoch: 1, batch: 8537, sum loss: 4427.642090, avg loss: 2.701429, ppl: 14.901009 +epoch: 1, batch: 8538, sum loss: 5343.659180, avg loss: 2.962117, ppl: 19.338867 +epoch: 1, batch: 8539, sum loss: 4553.123047, avg loss: 2.651790, ppl: 14.179396 +epoch: 1, batch: 8540, sum loss: 4409.466309, avg loss: 2.839322, ppl: 17.104158 +epoch: 1, batch: 8541, sum loss: 4138.398926, avg loss: 2.755259, ppl: 15.725110 +epoch: 1, batch: 8542, sum loss: 4394.236328, avg loss: 2.775892, ppl: 16.052933 +epoch: 1, batch: 8543, sum loss: 3759.716064, avg loss: 2.484941, ppl: 12.000415 +epoch: 1, batch: 8544, sum loss: 4666.591797, avg loss: 2.781044, ppl: 16.135859 +epoch: 1, batch: 8545, sum loss: 5068.900391, avg loss: 2.819188, ppl: 16.763235 +epoch: 1, batch: 8546, sum loss: 4988.016602, avg loss: 2.675975, ppl: 14.526501 +epoch: 1, batch: 8547, sum loss: 5495.437500, avg loss: 2.869680, ppl: 17.631378 +epoch: 1, batch: 8548, sum loss: 3728.154297, avg loss: 2.512233, ppl: 12.332440 +epoch: 1, batch: 8549, sum loss: 4452.550293, avg loss: 2.606879, ppl: 13.556681 +epoch: 1, batch: 8550, sum loss: 3923.370117, avg loss: 2.654513, ppl: 14.218061 +epoch: 1, batch: 8551, sum loss: 4491.570312, avg loss: 2.855417, ppl: 17.381676 +epoch: 1, batch: 8552, sum loss: 4078.587402, avg loss: 2.720872, ppl: 15.193567 +epoch: 1, batch: 8553, sum loss: 5379.021484, avg loss: 2.945795, ppl: 19.025784 +epoch: 1, batch: 8554, sum loss: 3542.847900, avg loss: 2.505550, ppl: 12.250297 +epoch: 1, batch: 8555, sum loss: 4499.136719, avg loss: 2.723449, ppl: 15.232762 +epoch: 1, batch: 8556, sum loss: 5254.035156, avg loss: 2.843093, ppl: 17.168781 +epoch: 1, batch: 8557, sum loss: 4856.123535, avg loss: 2.871747, ppl: 17.667852 +epoch: 1, batch: 8558, sum loss: 4921.413086, avg loss: 2.788336, ppl: 16.253952 +epoch: 1, batch: 8559, sum loss: 4767.093262, avg loss: 2.682664, ppl: 14.623995 +epoch: 1, batch: 8560, sum loss: 4699.422852, avg loss: 2.689996, ppl: 14.731613 +epoch: 1, batch: 8561, sum loss: 4404.652344, avg loss: 2.901616, ppl: 18.203531 +epoch: 1, batch: 8562, sum loss: 3552.130615, avg loss: 2.343094, ppl: 10.413407 +epoch: 1, batch: 8563, sum loss: 3789.799316, avg loss: 2.585129, ppl: 13.265003 +epoch: 1, batch: 8564, sum loss: 4816.882812, avg loss: 3.121765, ppl: 22.686378 +epoch: 1, batch: 8565, sum loss: 4669.227051, avg loss: 2.826409, ppl: 16.884716 +epoch: 1, batch: 8566, sum loss: 4087.486572, avg loss: 2.735935, ppl: 15.424154 +epoch: 1, batch: 8567, sum loss: 5086.740234, avg loss: 2.702837, ppl: 14.922012 +epoch: 1, batch: 8568, sum loss: 4263.662109, avg loss: 2.648237, ppl: 14.129110 +epoch: 1, batch: 8569, sum loss: 4423.715820, avg loss: 2.776972, ppl: 16.070280 +epoch: 1, batch: 8570, sum loss: 4927.218750, avg loss: 2.657615, ppl: 14.262236 +epoch: 1, batch: 8571, sum loss: 4126.198242, avg loss: 2.728967, ppl: 15.317060 +epoch: 1, batch: 8572, sum loss: 5306.269531, avg loss: 2.946290, ppl: 19.035212 +epoch: 1, batch: 8573, sum loss: 4430.814941, avg loss: 2.631125, ppl: 13.889393 +epoch: 1, batch: 8574, sum loss: 5471.723633, avg loss: 2.960889, ppl: 19.315142 +epoch: 1, batch: 8575, sum loss: 4543.907227, avg loss: 3.045514, ppl: 21.020840 +epoch: 1, batch: 8576, sum loss: 4977.165527, avg loss: 2.912326, ppl: 18.399553 +epoch: 1, batch: 8577, sum loss: 4530.584961, avg loss: 2.777796, ppl: 16.083530 +epoch: 1, batch: 8578, sum loss: 4037.030273, avg loss: 2.461604, ppl: 11.723599 +epoch: 1, batch: 8579, sum loss: 4556.177246, avg loss: 2.632107, ppl: 13.903033 +epoch: 1, batch: 8580, sum loss: 4654.463867, avg loss: 2.690441, ppl: 14.738179 +epoch: 1, batch: 8581, sum loss: 5128.627930, avg loss: 2.665607, ppl: 14.376673 +epoch: 1, batch: 8582, sum loss: 4596.271973, avg loss: 2.667598, ppl: 14.405333 +epoch: 1, batch: 8583, sum loss: 3931.065674, avg loss: 2.610270, ppl: 13.602716 +epoch: 1, batch: 8584, sum loss: 4559.771484, avg loss: 2.629626, ppl: 13.868579 +epoch: 1, batch: 8585, sum loss: 4798.481934, avg loss: 3.035093, ppl: 20.802910 +epoch: 1, batch: 8586, sum loss: 4817.772949, avg loss: 2.663225, ppl: 14.342464 +epoch: 1, batch: 8587, sum loss: 4018.473145, avg loss: 2.854029, ppl: 17.357582 +epoch: 1, batch: 8588, sum loss: 5400.886230, avg loss: 2.796937, ppl: 16.394361 +epoch: 1, batch: 8589, sum loss: 5028.606445, avg loss: 2.891666, ppl: 18.023306 +epoch: 1, batch: 8590, sum loss: 4964.523438, avg loss: 3.072106, ppl: 21.587320 +epoch: 1, batch: 8591, sum loss: 4279.162109, avg loss: 2.722113, ppl: 15.212438 +epoch: 1, batch: 8592, sum loss: 4360.313965, avg loss: 2.833212, ppl: 16.999983 +epoch: 1, batch: 8593, sum loss: 4531.982910, avg loss: 2.702435, ppl: 14.916001 +epoch: 1, batch: 8594, sum loss: 4819.812500, avg loss: 2.712331, ppl: 15.064350 +epoch: 1, batch: 8595, sum loss: 5733.064941, avg loss: 2.956712, ppl: 19.234629 +epoch: 1, batch: 8596, sum loss: 5207.563965, avg loss: 2.923955, ppl: 18.614767 +epoch: 1, batch: 8597, sum loss: 3950.475098, avg loss: 2.470591, ppl: 11.829437 +epoch: 1, batch: 8598, sum loss: 3966.098389, avg loss: 2.480362, ppl: 11.945584 +epoch: 1, batch: 8599, sum loss: 4766.520508, avg loss: 2.977215, ppl: 19.633055 +epoch: 1, batch: 8600, sum loss: 5384.430664, avg loss: 2.896412, ppl: 18.109060 +epoch: 1, batch: 8601, sum loss: 3242.965576, avg loss: 2.539519, ppl: 12.673572 +epoch: 1, batch: 8602, sum loss: 5542.491211, avg loss: 2.959152, ppl: 19.281609 +epoch: 1, batch: 8603, sum loss: 4024.859863, avg loss: 2.756753, ppl: 15.748631 +epoch: 1, batch: 8604, sum loss: 3734.282715, avg loss: 2.540329, ppl: 12.683837 +epoch: 1, batch: 8605, sum loss: 4173.764160, avg loss: 2.753143, ppl: 15.691868 +epoch: 1, batch: 8606, sum loss: 3417.742188, avg loss: 2.684794, ppl: 14.655178 +epoch: 1, batch: 8607, sum loss: 5068.244141, avg loss: 2.690151, ppl: 14.733900 +epoch: 1, batch: 8608, sum loss: 5203.381348, avg loss: 2.949763, ppl: 19.101423 +epoch: 1, batch: 8609, sum loss: 4163.033691, avg loss: 2.663489, ppl: 14.346261 +epoch: 1, batch: 8610, sum loss: 4404.484375, avg loss: 2.600050, ppl: 13.464408 +epoch: 1, batch: 8611, sum loss: 5114.650391, avg loss: 2.977096, ppl: 19.630724 +epoch: 1, batch: 8612, sum loss: 4032.129395, avg loss: 2.670285, ppl: 14.444078 +epoch: 1, batch: 8613, sum loss: 4444.290039, avg loss: 2.691878, ppl: 14.759366 +epoch: 1, batch: 8614, sum loss: 4254.826172, avg loss: 2.522126, ppl: 12.455048 +epoch: 1, batch: 8615, sum loss: 5255.061523, avg loss: 2.814709, ppl: 16.688318 +epoch: 1, batch: 8616, sum loss: 4184.071289, avg loss: 2.559065, ppl: 12.923726 +epoch: 1, batch: 8617, sum loss: 5297.450684, avg loss: 2.825307, ppl: 16.866125 +epoch: 1, batch: 8618, sum loss: 3599.131592, avg loss: 2.438436, ppl: 11.455112 +epoch: 1, batch: 8619, sum loss: 3817.800293, avg loss: 2.505118, ppl: 12.245008 +epoch: 1, batch: 8620, sum loss: 4327.546875, avg loss: 2.718308, ppl: 15.154662 +epoch: 1, batch: 8621, sum loss: 4957.191406, avg loss: 3.009831, ppl: 20.283966 +epoch: 1, batch: 8622, sum loss: 3911.064941, avg loss: 2.775774, ppl: 16.051039 +epoch: 1, batch: 8623, sum loss: 4628.974121, avg loss: 2.677255, ppl: 14.545115 +epoch: 1, batch: 8624, sum loss: 4058.513916, avg loss: 2.774104, ppl: 16.024261 +epoch: 1, batch: 8625, sum loss: 3590.544678, avg loss: 2.542879, ppl: 12.716224 +epoch: 1, batch: 8626, sum loss: 5598.814453, avg loss: 3.054454, ppl: 21.209604 +epoch: 1, batch: 8627, sum loss: 4767.796875, avg loss: 2.831234, ppl: 16.966375 +epoch: 1, batch: 8628, sum loss: 4019.034424, avg loss: 2.548532, ppl: 12.788314 +epoch: 1, batch: 8629, sum loss: 5070.717285, avg loss: 2.723264, ppl: 15.229952 +epoch: 1, batch: 8630, sum loss: 4010.799561, avg loss: 2.485006, ppl: 12.001190 +epoch: 1, batch: 8631, sum loss: 4568.107910, avg loss: 2.663620, ppl: 14.348132 +epoch: 1, batch: 8632, sum loss: 4130.049316, avg loss: 2.620590, ppl: 13.743826 +epoch: 1, batch: 8633, sum loss: 4268.137695, avg loss: 2.767923, ppl: 15.925528 +epoch: 1, batch: 8634, sum loss: 3697.088867, avg loss: 2.491300, ppl: 12.076962 +epoch: 1, batch: 8635, sum loss: 5082.804199, avg loss: 2.984618, ppl: 19.778944 +epoch: 1, batch: 8636, sum loss: 4945.627930, avg loss: 2.834171, ppl: 17.016281 +epoch: 1, batch: 8637, sum loss: 3674.422852, avg loss: 2.492824, ppl: 12.095389 +epoch: 1, batch: 8638, sum loss: 5062.527344, avg loss: 2.970967, ppl: 19.510775 +epoch: 1, batch: 8639, sum loss: 5180.741699, avg loss: 2.865454, ppl: 17.557030 +epoch: 1, batch: 8640, sum loss: 3953.309326, avg loss: 2.621558, ppl: 13.757140 +epoch: 1, batch: 8641, sum loss: 5210.937012, avg loss: 2.863152, ppl: 17.516657 +epoch: 1, batch: 8642, sum loss: 4664.538086, avg loss: 2.825281, ppl: 16.865679 +epoch: 1, batch: 8643, sum loss: 4506.150879, avg loss: 2.598703, ppl: 13.446285 +epoch: 1, batch: 8644, sum loss: 5249.036621, avg loss: 2.892031, ppl: 18.029894 +epoch: 1, batch: 8645, sum loss: 4275.868652, avg loss: 2.578932, ppl: 13.183045 +epoch: 1, batch: 8646, sum loss: 5252.467773, avg loss: 2.845324, ppl: 17.207129 +epoch: 1, batch: 8647, sum loss: 5416.943848, avg loss: 2.792239, ppl: 16.317516 +epoch: 1, batch: 8648, sum loss: 4587.866211, avg loss: 2.819832, ppl: 16.774025 +epoch: 1, batch: 8649, sum loss: 5186.714355, avg loss: 3.133966, ppl: 22.964888 +epoch: 1, batch: 8650, sum loss: 4685.304688, avg loss: 2.969141, ppl: 19.475183 +epoch: 1, batch: 8651, sum loss: 5630.624512, avg loss: 2.955709, ppl: 19.215332 +epoch: 1, batch: 8652, sum loss: 4745.430664, avg loss: 2.855253, ppl: 17.378838 +epoch: 1, batch: 8653, sum loss: 5365.704102, avg loss: 2.908241, ppl: 18.324530 +epoch: 1, batch: 8654, sum loss: 5161.050293, avg loss: 2.990180, ppl: 19.889257 +epoch: 1, batch: 8655, sum loss: 5408.433594, avg loss: 3.019784, ppl: 20.486870 +epoch: 1, batch: 8656, sum loss: 4570.073730, avg loss: 2.723524, ppl: 15.233917 +epoch: 1, batch: 8657, sum loss: 4720.151367, avg loss: 2.616492, ppl: 13.687620 +epoch: 1, batch: 8658, sum loss: 4912.983398, avg loss: 2.692046, ppl: 14.761843 +epoch: 1, batch: 8659, sum loss: 5660.557617, avg loss: 2.890990, ppl: 18.011127 +epoch: 1, batch: 8660, sum loss: 4752.790527, avg loss: 2.585849, ppl: 13.274555 +epoch: 1, batch: 8661, sum loss: 4735.445312, avg loss: 2.838996, ppl: 17.098593 +epoch: 1, batch: 8662, sum loss: 4967.382812, avg loss: 3.153894, ppl: 23.427105 +epoch: 1, batch: 8663, sum loss: 4410.731445, avg loss: 2.796913, ppl: 16.393959 +epoch: 1, batch: 8664, sum loss: 5077.035156, avg loss: 2.783462, ppl: 16.174927 +epoch: 1, batch: 8665, sum loss: 4736.678711, avg loss: 2.804428, ppl: 16.517622 +epoch: 1, batch: 8666, sum loss: 5860.637695, avg loss: 2.836708, ppl: 17.059505 +epoch: 1, batch: 8667, sum loss: 5255.875000, avg loss: 2.934603, ppl: 18.814041 +epoch: 1, batch: 8668, sum loss: 4282.722656, avg loss: 2.757710, ppl: 15.763695 +epoch: 1, batch: 8669, sum loss: 3796.426270, avg loss: 2.349274, ppl: 10.477957 +epoch: 1, batch: 8670, sum loss: 4610.041992, avg loss: 2.663225, ppl: 14.342464 +epoch: 1, batch: 8671, sum loss: 5474.434082, avg loss: 2.805963, ppl: 16.543003 +epoch: 1, batch: 8672, sum loss: 4513.119629, avg loss: 2.697621, ppl: 14.844373 +epoch: 1, batch: 8673, sum loss: 4226.389648, avg loss: 2.583368, ppl: 13.241659 +epoch: 1, batch: 8674, sum loss: 5009.066895, avg loss: 2.657330, ppl: 14.258169 +epoch: 1, batch: 8675, sum loss: 4287.102051, avg loss: 2.599819, ppl: 13.461304 +epoch: 1, batch: 8676, sum loss: 4787.369629, avg loss: 2.631869, ppl: 13.899725 +epoch: 1, batch: 8677, sum loss: 4776.615723, avg loss: 2.646324, ppl: 14.102110 +epoch: 1, batch: 8678, sum loss: 4020.566895, avg loss: 2.560871, ppl: 12.947084 +epoch: 1, batch: 8679, sum loss: 5034.419434, avg loss: 2.639968, ppl: 14.012757 +epoch: 1, batch: 8680, sum loss: 4132.855469, avg loss: 2.474764, ppl: 11.878901 +epoch: 1, batch: 8681, sum loss: 5114.151367, avg loss: 2.947638, ppl: 19.060875 +epoch: 1, batch: 8682, sum loss: 4627.826172, avg loss: 2.833941, ppl: 17.012379 +epoch: 1, batch: 8683, sum loss: 4727.354492, avg loss: 2.718433, ppl: 15.156552 +epoch: 1, batch: 8684, sum loss: 5343.442871, avg loss: 2.616769, ppl: 13.691420 +epoch: 1, batch: 8685, sum loss: 4558.065430, avg loss: 2.777614, ppl: 16.080612 +epoch: 1, batch: 8686, sum loss: 4088.675293, avg loss: 2.738564, ppl: 15.464755 +epoch: 1, batch: 8687, sum loss: 4952.819824, avg loss: 2.756160, ppl: 15.739292 +epoch: 1, batch: 8688, sum loss: 4113.792969, avg loss: 2.678251, ppl: 14.559603 +epoch: 1, batch: 8689, sum loss: 4853.859863, avg loss: 2.943517, ppl: 18.982494 +epoch: 1, batch: 8690, sum loss: 4101.545898, avg loss: 2.534948, ppl: 12.615776 +epoch: 1, batch: 8691, sum loss: 4816.447754, avg loss: 2.811703, ppl: 16.638233 +epoch: 1, batch: 8692, sum loss: 4168.947754, avg loss: 2.491899, ppl: 12.084208 +epoch: 1, batch: 8693, sum loss: 4294.059570, avg loss: 2.685466, ppl: 14.665028 +epoch: 1, batch: 8694, sum loss: 3549.701172, avg loss: 2.344585, ppl: 10.428941 +epoch: 1, batch: 8695, sum loss: 4640.452637, avg loss: 2.657762, ppl: 14.264334 +epoch: 1, batch: 8696, sum loss: 5175.504883, avg loss: 2.695576, ppl: 14.814045 +epoch: 1, batch: 8697, sum loss: 3846.423340, avg loss: 2.475176, ppl: 11.883797 +epoch: 1, batch: 8698, sum loss: 4485.703125, avg loss: 2.665302, ppl: 14.372287 +epoch: 1, batch: 8699, sum loss: 4160.323730, avg loss: 2.332020, ppl: 10.298724 +epoch: 1, batch: 8700, sum loss: 4454.066406, avg loss: 2.768220, ppl: 15.930256 +epoch: 1, batch: 8701, sum loss: 3732.766357, avg loss: 2.595804, ppl: 13.407366 +epoch: 1, batch: 8702, sum loss: 4846.018555, avg loss: 2.767572, ppl: 15.919936 +epoch: 1, batch: 8703, sum loss: 4830.594727, avg loss: 2.757189, ppl: 15.755492 +epoch: 1, batch: 8704, sum loss: 5009.136719, avg loss: 2.592721, ppl: 13.366091 +epoch: 1, batch: 8705, sum loss: 4280.349609, avg loss: 2.840312, ppl: 17.121098 +epoch: 1, batch: 8706, sum loss: 6159.219727, avg loss: 3.044597, ppl: 21.001564 +epoch: 1, batch: 8707, sum loss: 3848.455566, avg loss: 2.530214, ppl: 12.556193 +epoch: 1, batch: 8708, sum loss: 4637.476074, avg loss: 2.666749, ppl: 14.393098 +epoch: 1, batch: 8709, sum loss: 5005.552246, avg loss: 2.549950, ppl: 12.806468 +epoch: 1, batch: 8710, sum loss: 4788.765137, avg loss: 2.785785, ppl: 16.212547 +epoch: 1, batch: 8711, sum loss: 4746.263184, avg loss: 2.825157, ppl: 16.863586 +epoch: 1, batch: 8712, sum loss: 5951.934570, avg loss: 2.953814, ppl: 19.178959 +epoch: 1, batch: 8713, sum loss: 5000.364258, avg loss: 2.912268, ppl: 18.398481 +epoch: 1, batch: 8714, sum loss: 3716.762695, avg loss: 2.408790, ppl: 11.120496 +epoch: 1, batch: 8715, sum loss: 4203.125488, avg loss: 2.748938, ppl: 15.626022 +epoch: 1, batch: 8716, sum loss: 4004.015137, avg loss: 2.499385, ppl: 12.175006 +epoch: 1, batch: 8717, sum loss: 4356.223633, avg loss: 2.787091, ppl: 16.233732 +epoch: 1, batch: 8718, sum loss: 3961.629395, avg loss: 2.825699, ppl: 16.872728 +epoch: 1, batch: 8719, sum loss: 5108.642578, avg loss: 2.742159, ppl: 15.520459 +epoch: 1, batch: 8720, sum loss: 4904.393066, avg loss: 2.678533, ppl: 14.563706 +epoch: 1, batch: 8721, sum loss: 4668.670410, avg loss: 2.658696, ppl: 14.277661 +epoch: 1, batch: 8722, sum loss: 4514.940918, avg loss: 2.527962, ppl: 12.527954 +epoch: 1, batch: 8723, sum loss: 4380.957031, avg loss: 2.535276, ppl: 12.619916 +epoch: 1, batch: 8724, sum loss: 5213.263184, avg loss: 2.811900, ppl: 16.641510 +epoch: 1, batch: 8725, sum loss: 4952.021973, avg loss: 2.852547, ppl: 17.331873 +epoch: 1, batch: 8726, sum loss: 4632.961914, avg loss: 2.863388, ppl: 17.520788 +epoch: 1, batch: 8727, sum loss: 5890.620605, avg loss: 2.987130, ppl: 19.828695 +epoch: 1, batch: 8728, sum loss: 4812.949219, avg loss: 2.654688, ppl: 14.220551 +epoch: 1, batch: 8729, sum loss: 3398.302246, avg loss: 2.255012, ppl: 9.535403 +epoch: 1, batch: 8730, sum loss: 4114.721191, avg loss: 2.439076, ppl: 11.462446 +epoch: 1, batch: 8731, sum loss: 4145.449219, avg loss: 2.647158, ppl: 14.113869 +epoch: 1, batch: 8732, sum loss: 5081.098633, avg loss: 2.920171, ppl: 18.544468 +epoch: 1, batch: 8733, sum loss: 3840.807129, avg loss: 2.574268, ppl: 13.121704 +epoch: 1, batch: 8734, sum loss: 4610.097656, avg loss: 2.647960, ppl: 14.125190 +epoch: 1, batch: 8735, sum loss: 4391.095703, avg loss: 2.669359, ppl: 14.430716 +epoch: 1, batch: 8736, sum loss: 4755.758789, avg loss: 2.774655, ppl: 16.033096 +epoch: 1, batch: 8737, sum loss: 4868.050781, avg loss: 2.941421, ppl: 18.942736 +epoch: 1, batch: 8738, sum loss: 4844.010742, avg loss: 2.926895, ppl: 18.669569 +epoch: 1, batch: 8739, sum loss: 5141.406738, avg loss: 2.636619, ppl: 13.965903 +epoch: 1, batch: 8740, sum loss: 4068.847900, avg loss: 2.555809, ppl: 12.881717 +epoch: 1, batch: 8741, sum loss: 5411.006836, avg loss: 2.966561, ppl: 19.424999 +epoch: 1, batch: 8742, sum loss: 4344.180176, avg loss: 2.679938, ppl: 14.584193 +epoch: 1, batch: 8743, sum loss: 4886.970703, avg loss: 2.933356, ppl: 18.790579 +epoch: 1, batch: 8744, sum loss: 5205.297852, avg loss: 2.643625, ppl: 14.064097 +epoch: 1, batch: 8745, sum loss: 4788.256836, avg loss: 2.675004, ppl: 14.512404 +epoch: 1, batch: 8746, sum loss: 3973.809326, avg loss: 2.592178, ppl: 13.358840 +epoch: 1, batch: 8747, sum loss: 3591.547607, avg loss: 2.407204, ppl: 11.102870 +epoch: 1, batch: 8748, sum loss: 4604.726074, avg loss: 2.721469, ppl: 15.202644 +epoch: 1, batch: 8749, sum loss: 5151.645020, avg loss: 2.695785, ppl: 14.817142 +epoch: 1, batch: 8750, sum loss: 5248.981445, avg loss: 2.999418, ppl: 20.073851 +epoch: 1, batch: 8751, sum loss: 4627.561035, avg loss: 2.801187, ppl: 16.464182 +epoch: 1, batch: 8752, sum loss: 4604.112305, avg loss: 2.652138, ppl: 14.184339 +epoch: 1, batch: 8753, sum loss: 4598.583496, avg loss: 2.638315, ppl: 13.989614 +epoch: 1, batch: 8754, sum loss: 4877.531738, avg loss: 2.835774, ppl: 17.043591 +epoch: 1, batch: 8755, sum loss: 4136.642090, avg loss: 2.694881, ppl: 14.803753 +epoch: 1, batch: 8756, sum loss: 5216.433594, avg loss: 2.960519, ppl: 19.307981 +epoch: 1, batch: 8757, sum loss: 5772.807617, avg loss: 2.763431, ppl: 15.854146 +epoch: 1, batch: 8758, sum loss: 4753.019531, avg loss: 2.634712, ppl: 13.939290 +epoch: 1, batch: 8759, sum loss: 6021.496094, avg loss: 3.012254, ppl: 20.333183 +epoch: 1, batch: 8760, sum loss: 4130.691406, avg loss: 2.817661, ppl: 16.737659 +epoch: 1, batch: 8761, sum loss: 4088.090332, avg loss: 2.605539, ppl: 13.538519 +epoch: 1, batch: 8762, sum loss: 5317.495117, avg loss: 2.745222, ppl: 15.568071 +epoch: 1, batch: 8763, sum loss: 4943.391602, avg loss: 2.857452, ppl: 17.417086 +epoch: 1, batch: 8764, sum loss: 3874.540527, avg loss: 2.540682, ppl: 12.688326 +epoch: 1, batch: 8765, sum loss: 4367.520020, avg loss: 2.543693, ppl: 12.726579 +epoch: 1, batch: 8766, sum loss: 4499.988770, avg loss: 2.767521, ppl: 15.919116 +epoch: 1, batch: 8767, sum loss: 4752.887695, avg loss: 2.620115, ppl: 13.737297 +epoch: 1, batch: 8768, sum loss: 4483.409180, avg loss: 2.715572, ppl: 15.113255 +epoch: 1, batch: 8769, sum loss: 4186.411621, avg loss: 2.663112, ppl: 14.340844 +epoch: 1, batch: 8770, sum loss: 5248.826660, avg loss: 2.838738, ppl: 17.094179 +epoch: 1, batch: 8771, sum loss: 4678.045898, avg loss: 2.889466, ppl: 17.983709 +epoch: 1, batch: 8772, sum loss: 3838.094971, avg loss: 2.672768, ppl: 14.479996 +epoch: 1, batch: 8773, sum loss: 5489.999512, avg loss: 2.619275, ppl: 13.725763 +epoch: 1, batch: 8774, sum loss: 5956.190430, avg loss: 2.887150, ppl: 17.942102 +epoch: 1, batch: 8775, sum loss: 4486.495605, avg loss: 2.837758, ppl: 17.077436 +epoch: 1, batch: 8776, sum loss: 3896.462158, avg loss: 2.690927, ppl: 14.745339 +epoch: 1, batch: 8777, sum loss: 5854.315918, avg loss: 2.835020, ppl: 17.030739 +epoch: 1, batch: 8778, sum loss: 4500.952637, avg loss: 2.970926, ppl: 19.509974 +epoch: 1, batch: 8779, sum loss: 4926.407227, avg loss: 2.739937, ppl: 15.486011 +epoch: 1, batch: 8780, sum loss: 3942.208740, avg loss: 2.479377, ppl: 11.933822 +epoch: 1, batch: 8781, sum loss: 4092.061768, avg loss: 2.609733, ppl: 13.595422 +epoch: 1, batch: 8782, sum loss: 4374.512695, avg loss: 2.522787, ppl: 12.463282 +epoch: 1, batch: 8783, sum loss: 4701.322266, avg loss: 2.887790, ppl: 17.953588 +epoch: 1, batch: 8784, sum loss: 4112.779297, avg loss: 2.622946, ppl: 13.776249 +epoch: 1, batch: 8785, sum loss: 4699.963379, avg loss: 2.743703, ppl: 15.544441 +epoch: 1, batch: 8786, sum loss: 5178.986328, avg loss: 2.772477, ppl: 15.998207 +epoch: 1, batch: 8787, sum loss: 5480.950195, avg loss: 2.728198, ppl: 15.305286 +epoch: 1, batch: 8788, sum loss: 4281.184082, avg loss: 2.672400, ppl: 14.474660 +epoch: 1, batch: 8789, sum loss: 4866.998047, avg loss: 2.836246, ppl: 17.051634 +epoch: 1, batch: 8790, sum loss: 5106.024902, avg loss: 2.791703, ppl: 16.308773 +epoch: 1, batch: 8791, sum loss: 4181.564941, avg loss: 2.751030, ppl: 15.658748 +epoch: 1, batch: 8792, sum loss: 4383.115723, avg loss: 2.833301, ppl: 17.001484 +epoch: 1, batch: 8793, sum loss: 4241.270020, avg loss: 2.503701, ppl: 12.227668 +epoch: 1, batch: 8794, sum loss: 4550.791992, avg loss: 2.846024, ppl: 17.219175 +epoch: 1, batch: 8795, sum loss: 5792.407227, avg loss: 2.714343, ppl: 15.094684 +epoch: 1, batch: 8796, sum loss: 3958.446289, avg loss: 2.534217, ppl: 12.606551 +epoch: 1, batch: 8797, sum loss: 5004.027344, avg loss: 2.673092, ppl: 14.484681 +epoch: 1, batch: 8798, sum loss: 3609.845459, avg loss: 2.505098, ppl: 12.244755 +epoch: 1, batch: 8799, sum loss: 3904.379639, avg loss: 2.528744, ppl: 12.537743 +epoch: 1, batch: 8800, sum loss: 5009.128418, avg loss: 2.636384, ppl: 13.962617 +epoch: 1, batch: 8801, sum loss: 4764.265625, avg loss: 2.859703, ppl: 17.456343 +epoch: 1, batch: 8802, sum loss: 4689.111328, avg loss: 2.899884, ppl: 18.172028 +epoch: 1, batch: 8803, sum loss: 3537.183838, avg loss: 2.317945, ppl: 10.154785 +epoch: 1, batch: 8804, sum loss: 3677.116455, avg loss: 2.634038, ppl: 13.929902 +epoch: 1, batch: 8805, sum loss: 4537.495605, avg loss: 2.712191, ppl: 15.062243 +epoch: 1, batch: 8806, sum loss: 5167.068359, avg loss: 2.630890, ppl: 13.886128 +epoch: 1, batch: 8807, sum loss: 5776.910645, avg loss: 3.099201, ppl: 22.180222 +epoch: 1, batch: 8808, sum loss: 5822.041992, avg loss: 3.227296, ppl: 25.211390 +epoch: 1, batch: 8809, sum loss: 4099.573730, avg loss: 2.663791, ppl: 14.350585 +epoch: 1, batch: 8810, sum loss: 3443.503906, avg loss: 2.314183, ppl: 10.116652 +epoch: 1, batch: 8811, sum loss: 4889.696289, avg loss: 2.691082, ppl: 14.747628 +epoch: 1, batch: 8812, sum loss: 3317.485840, avg loss: 2.479436, ppl: 11.934536 +epoch: 1, batch: 8813, sum loss: 5125.792969, avg loss: 2.891028, ppl: 18.011824 +epoch: 1, batch: 8814, sum loss: 5506.452637, avg loss: 2.986146, ppl: 19.809185 +epoch: 1, batch: 8815, sum loss: 4716.348633, avg loss: 2.790739, ppl: 16.293053 +epoch: 1, batch: 8816, sum loss: 5053.693359, avg loss: 2.664045, ppl: 14.354236 +epoch: 1, batch: 8817, sum loss: 4529.958008, avg loss: 2.597453, ppl: 13.429488 +epoch: 1, batch: 8818, sum loss: 4569.521973, avg loss: 2.673799, ppl: 14.494928 +epoch: 1, batch: 8819, sum loss: 4972.607422, avg loss: 2.887693, ppl: 17.951845 +epoch: 1, batch: 8820, sum loss: 5182.544922, avg loss: 2.778844, ppl: 16.100405 +epoch: 1, batch: 8821, sum loss: 4171.247559, avg loss: 2.650094, ppl: 14.155363 +epoch: 1, batch: 8822, sum loss: 5856.628906, avg loss: 3.021996, ppl: 20.532244 +epoch: 1, batch: 8823, sum loss: 4412.804688, avg loss: 3.018334, ppl: 20.457190 +epoch: 1, batch: 8824, sum loss: 4475.607422, avg loss: 2.623451, ppl: 13.783207 +epoch: 1, batch: 8825, sum loss: 4550.913086, avg loss: 2.667593, ppl: 14.405247 +epoch: 1, batch: 8826, sum loss: 3923.589844, avg loss: 2.598404, ppl: 13.442266 +epoch: 1, batch: 8827, sum loss: 5019.522949, avg loss: 2.865025, ppl: 17.549488 +epoch: 1, batch: 8828, sum loss: 3935.800293, avg loss: 2.608218, ppl: 13.574835 +epoch: 1, batch: 8829, sum loss: 4501.925781, avg loss: 2.611326, ppl: 13.617095 +epoch: 1, batch: 8830, sum loss: 4839.555176, avg loss: 2.768624, ppl: 15.936695 +epoch: 1, batch: 8831, sum loss: 3866.780029, avg loss: 2.625105, ppl: 13.806029 +epoch: 1, batch: 8832, sum loss: 4936.692871, avg loss: 3.013854, ppl: 20.365734 +epoch: 1, batch: 8833, sum loss: 4094.152344, avg loss: 2.764451, ppl: 15.870329 +epoch: 1, batch: 8834, sum loss: 4145.257812, avg loss: 2.532228, ppl: 12.581512 +epoch: 1, batch: 8835, sum loss: 5028.737305, avg loss: 2.913521, ppl: 18.421547 +epoch: 1, batch: 8836, sum loss: 3198.020996, avg loss: 2.237943, ppl: 9.374033 +epoch: 1, batch: 8837, sum loss: 4163.267090, avg loss: 2.869240, ppl: 17.623615 +epoch: 1, batch: 8838, sum loss: 4744.006836, avg loss: 2.698525, ppl: 14.857800 +epoch: 1, batch: 8839, sum loss: 4125.056152, avg loss: 2.659611, ppl: 14.290722 +epoch: 1, batch: 8840, sum loss: 4710.054688, avg loss: 2.738404, ppl: 15.462284 +epoch: 1, batch: 8841, sum loss: 5103.504883, avg loss: 2.766127, ppl: 15.896952 +epoch: 1, batch: 8842, sum loss: 3964.790283, avg loss: 2.786219, ppl: 16.219584 +epoch: 1, batch: 8843, sum loss: 5189.870117, avg loss: 3.001660, ppl: 20.118908 +epoch: 1, batch: 8844, sum loss: 4779.054199, avg loss: 2.621533, ppl: 13.756796 +epoch: 1, batch: 8845, sum loss: 5921.130859, avg loss: 3.126257, ppl: 22.788527 +epoch: 1, batch: 8846, sum loss: 4653.185547, avg loss: 2.818404, ppl: 16.750103 +epoch: 1, batch: 8847, sum loss: 4210.917480, avg loss: 2.670208, ppl: 14.442969 +epoch: 1, batch: 8848, sum loss: 4343.744629, avg loss: 2.682980, ppl: 14.628623 +epoch: 1, batch: 8849, sum loss: 4989.190430, avg loss: 2.765627, ppl: 15.888994 +epoch: 1, batch: 8850, sum loss: 5346.802734, avg loss: 2.942654, ppl: 18.966124 +epoch: 1, batch: 8851, sum loss: 4447.148926, avg loss: 2.659778, ppl: 14.293118 +epoch: 1, batch: 8852, sum loss: 4717.607422, avg loss: 2.668330, ppl: 14.415877 +epoch: 1, batch: 8853, sum loss: 5279.473145, avg loss: 2.936303, ppl: 18.846046 +epoch: 1, batch: 8854, sum loss: 4695.568848, avg loss: 2.707940, ppl: 14.998352 +epoch: 1, batch: 8855, sum loss: 4404.996582, avg loss: 2.775675, ppl: 16.049461 +epoch: 1, batch: 8856, sum loss: 5527.564453, avg loss: 2.781864, ppl: 16.149097 +epoch: 1, batch: 8857, sum loss: 4458.546875, avg loss: 2.665001, ppl: 14.367966 +epoch: 1, batch: 8858, sum loss: 3624.561523, avg loss: 2.457330, ppl: 11.673598 +epoch: 1, batch: 8859, sum loss: 4383.294434, avg loss: 2.674371, ppl: 14.503224 +epoch: 1, batch: 8860, sum loss: 4606.785645, avg loss: 2.722687, ppl: 15.221159 +epoch: 1, batch: 8861, sum loss: 4093.063232, avg loss: 2.739667, ppl: 15.481835 +epoch: 1, batch: 8862, sum loss: 5803.068359, avg loss: 2.945720, ppl: 19.024355 +epoch: 1, batch: 8863, sum loss: 4154.874023, avg loss: 2.593554, ppl: 13.377234 +epoch: 1, batch: 8864, sum loss: 4647.110352, avg loss: 2.908079, ppl: 18.321571 +epoch: 1, batch: 8865, sum loss: 3236.520264, avg loss: 2.130691, ppl: 8.420686 +epoch: 1, batch: 8866, sum loss: 4476.760254, avg loss: 2.671098, ppl: 14.455832 +epoch: 1, batch: 8867, sum loss: 3958.222900, avg loss: 2.480089, ppl: 11.942332 +epoch: 1, batch: 8868, sum loss: 4460.709961, avg loss: 2.565101, ppl: 13.001967 +epoch: 1, batch: 8869, sum loss: 4832.177734, avg loss: 2.992061, ppl: 19.926702 +epoch: 1, batch: 8870, sum loss: 4274.001953, avg loss: 2.841757, ppl: 17.145857 +epoch: 1, batch: 8871, sum loss: 5819.645996, avg loss: 2.898230, ppl: 18.142008 +epoch: 1, batch: 8872, sum loss: 4886.983398, avg loss: 2.671943, ppl: 14.468053 +epoch: 1, batch: 8873, sum loss: 5287.000977, avg loss: 2.702966, ppl: 14.923926 +epoch: 1, batch: 8874, sum loss: 4402.810059, avg loss: 2.731272, ppl: 15.352399 +epoch: 1, batch: 8875, sum loss: 4663.444824, avg loss: 2.562332, ppl: 12.966024 +epoch: 1, batch: 8876, sum loss: 3817.023926, avg loss: 2.291131, ppl: 9.886110 +epoch: 1, batch: 8877, sum loss: 5349.123047, avg loss: 2.937465, ppl: 18.867950 +epoch: 1, batch: 8878, sum loss: 4091.717773, avg loss: 2.806391, ppl: 16.550077 +epoch: 1, batch: 8879, sum loss: 4479.066406, avg loss: 2.986044, ppl: 19.807173 +epoch: 1, batch: 8880, sum loss: 4627.816406, avg loss: 2.925295, ppl: 18.639717 +epoch: 1, batch: 8881, sum loss: 5013.087402, avg loss: 2.881085, ppl: 17.833607 +epoch: 1, batch: 8882, sum loss: 4807.690918, avg loss: 2.949504, ppl: 19.096474 +epoch: 1, batch: 8883, sum loss: 4627.765625, avg loss: 2.748080, ppl: 15.612619 +epoch: 1, batch: 8884, sum loss: 5168.937500, avg loss: 2.576739, ppl: 13.154167 +epoch: 1, batch: 8885, sum loss: 5112.332520, avg loss: 2.851273, ppl: 17.309805 +epoch: 1, batch: 8886, sum loss: 5488.852539, avg loss: 2.867739, ppl: 17.597185 +epoch: 1, batch: 8887, sum loss: 4140.614258, avg loss: 2.674815, ppl: 14.509671 +epoch: 1, batch: 8888, sum loss: 4536.514648, avg loss: 2.552906, ppl: 12.844378 +epoch: 1, batch: 8889, sum loss: 4466.822754, avg loss: 2.715394, ppl: 15.110556 +epoch: 1, batch: 8890, sum loss: 3748.419434, avg loss: 2.669814, ppl: 14.437288 +epoch: 1, batch: 8891, sum loss: 4433.148926, avg loss: 2.784641, ppl: 16.194008 +epoch: 1, batch: 8892, sum loss: 4740.013184, avg loss: 2.900865, ppl: 18.189871 +epoch: 1, batch: 8893, sum loss: 4926.296387, avg loss: 2.677335, ppl: 14.546272 +epoch: 1, batch: 8894, sum loss: 4543.067871, avg loss: 2.597523, ppl: 13.430429 +epoch: 1, batch: 8895, sum loss: 4583.938477, avg loss: 2.717213, ppl: 15.138076 +epoch: 1, batch: 8896, sum loss: 5550.673828, avg loss: 3.054856, ppl: 21.218136 +epoch: 1, batch: 8897, sum loss: 4031.366455, avg loss: 2.537046, ppl: 12.642273 +epoch: 1, batch: 8898, sum loss: 4861.104492, avg loss: 2.649103, ppl: 14.141354 +epoch: 1, batch: 8899, sum loss: 5010.224121, avg loss: 2.783458, ppl: 16.174856 +epoch: 1, batch: 8900, sum loss: 5759.492188, avg loss: 2.833001, ppl: 16.996397 +epoch: 1, batch: 8901, sum loss: 4046.802246, avg loss: 2.589125, ppl: 13.318112 +epoch: 1, batch: 8902, sum loss: 4177.078613, avg loss: 2.647071, ppl: 14.112647 +epoch: 1, batch: 8903, sum loss: 4694.140625, avg loss: 2.740304, ppl: 15.491690 +epoch: 1, batch: 8904, sum loss: 5586.928711, avg loss: 2.981285, ppl: 19.713139 +epoch: 1, batch: 8905, sum loss: 4141.420410, avg loss: 2.777613, ppl: 16.080585 +epoch: 1, batch: 8906, sum loss: 4069.440918, avg loss: 2.794946, ppl: 16.361740 +epoch: 1, batch: 8907, sum loss: 4086.711670, avg loss: 2.454482, ppl: 11.640398 +epoch: 1, batch: 8908, sum loss: 5061.259277, avg loss: 2.912117, ppl: 18.395700 +epoch: 1, batch: 8909, sum loss: 5062.397461, avg loss: 2.799999, ppl: 16.444622 +epoch: 1, batch: 8910, sum loss: 4129.653320, avg loss: 2.496768, ppl: 12.143178 +epoch: 1, batch: 8911, sum loss: 5394.323242, avg loss: 2.986890, ppl: 19.823936 +epoch: 1, batch: 8912, sum loss: 4347.165527, avg loss: 2.742691, ppl: 15.528717 +epoch: 1, batch: 8913, sum loss: 4082.818359, avg loss: 2.447733, ppl: 11.562105 +epoch: 1, batch: 8914, sum loss: 4556.398438, avg loss: 2.744818, ppl: 15.561789 +epoch: 1, batch: 8915, sum loss: 4799.729004, avg loss: 2.774410, ppl: 16.029163 +epoch: 1, batch: 8916, sum loss: 4066.984619, avg loss: 2.735027, ppl: 15.410153 +epoch: 1, batch: 8917, sum loss: 5053.466797, avg loss: 2.716918, ppl: 15.133605 +epoch: 1, batch: 8918, sum loss: 3877.542480, avg loss: 2.509736, ppl: 12.301685 +epoch: 1, batch: 8919, sum loss: 5268.143066, avg loss: 2.874055, ppl: 17.708685 +epoch: 1, batch: 8920, sum loss: 4804.392578, avg loss: 2.722035, ppl: 15.211252 +epoch: 1, batch: 8921, sum loss: 5345.099121, avg loss: 2.810252, ppl: 16.614103 +epoch: 1, batch: 8922, sum loss: 4806.187012, avg loss: 2.732341, ppl: 15.368816 +epoch: 1, batch: 8923, sum loss: 4232.362793, avg loss: 2.883081, ppl: 17.869244 +epoch: 1, batch: 8924, sum loss: 4848.515137, avg loss: 2.639366, ppl: 14.004324 +epoch: 1, batch: 8925, sum loss: 5110.011230, avg loss: 2.813883, ppl: 16.674538 +epoch: 1, batch: 8926, sum loss: 4413.818359, avg loss: 2.811349, ppl: 16.632347 +epoch: 1, batch: 8927, sum loss: 4219.223145, avg loss: 2.612522, ppl: 13.633392 +epoch: 1, batch: 8928, sum loss: 3424.161133, avg loss: 2.468754, ppl: 11.807720 +epoch: 1, batch: 8929, sum loss: 4210.780762, avg loss: 2.707898, ppl: 14.997712 +epoch: 1, batch: 8930, sum loss: 5114.080078, avg loss: 2.793053, ppl: 16.330805 +epoch: 1, batch: 8931, sum loss: 4592.756348, avg loss: 2.997883, ppl: 20.043058 +epoch: 1, batch: 8932, sum loss: 4005.331055, avg loss: 2.559317, ppl: 12.926984 +epoch: 1, batch: 8933, sum loss: 4412.970215, avg loss: 2.793019, ppl: 16.330248 +epoch: 1, batch: 8934, sum loss: 4374.964844, avg loss: 2.703934, ppl: 14.938379 +epoch: 1, batch: 8935, sum loss: 5431.154297, avg loss: 3.025713, ppl: 20.608688 +epoch: 1, batch: 8936, sum loss: 5046.194336, avg loss: 2.903449, ppl: 18.236937 +epoch: 1, batch: 8937, sum loss: 4623.270996, avg loss: 2.783426, ppl: 16.174343 +epoch: 1, batch: 8938, sum loss: 5160.763184, avg loss: 2.818549, ppl: 16.752527 +epoch: 1, batch: 8939, sum loss: 4475.108398, avg loss: 2.486171, ppl: 12.015187 +epoch: 1, batch: 8940, sum loss: 5128.208984, avg loss: 2.841113, ppl: 17.134827 +epoch: 1, batch: 8941, sum loss: 5845.982422, avg loss: 2.797121, ppl: 16.397367 +epoch: 1, batch: 8942, sum loss: 4653.514648, avg loss: 2.822022, ppl: 16.810812 +epoch: 1, batch: 8943, sum loss: 3656.456299, avg loss: 2.314213, ppl: 10.116956 +epoch: 1, batch: 8944, sum loss: 4849.362305, avg loss: 2.814488, ppl: 16.684626 +epoch: 1, batch: 8945, sum loss: 4512.151855, avg loss: 2.802579, ppl: 16.487106 +epoch: 1, batch: 8946, sum loss: 4790.063477, avg loss: 2.647907, ppl: 14.124446 +epoch: 1, batch: 8947, sum loss: 4429.999023, avg loss: 2.816274, ppl: 16.714455 +epoch: 1, batch: 8948, sum loss: 4059.615967, avg loss: 2.799735, ppl: 16.440294 +epoch: 1, batch: 8949, sum loss: 4174.171875, avg loss: 2.551450, ppl: 12.825687 +epoch: 1, batch: 8950, sum loss: 4370.429688, avg loss: 2.366232, ppl: 10.657157 +epoch: 1, batch: 8951, sum loss: 5048.376953, avg loss: 2.868396, ppl: 17.608747 +epoch: 1, batch: 8952, sum loss: 5039.718750, avg loss: 2.926666, ppl: 18.665293 +epoch: 1, batch: 8953, sum loss: 5314.945312, avg loss: 2.840698, ppl: 17.127712 +epoch: 1, batch: 8954, sum loss: 4115.117188, avg loss: 2.752587, ppl: 15.683146 +epoch: 1, batch: 8955, sum loss: 4554.678223, avg loss: 2.820234, ppl: 16.780783 +epoch: 1, batch: 8956, sum loss: 4789.196289, avg loss: 2.739815, ppl: 15.484120 +epoch: 1, batch: 8957, sum loss: 5273.371094, avg loss: 2.919918, ppl: 18.539759 +epoch: 1, batch: 8958, sum loss: 4371.603516, avg loss: 2.773860, ppl: 16.020353 +epoch: 1, batch: 8959, sum loss: 4178.265625, avg loss: 2.681814, ppl: 14.611570 +epoch: 1, batch: 8960, sum loss: 4707.994629, avg loss: 2.586810, ppl: 13.287322 +epoch: 1, batch: 8961, sum loss: 4927.896484, avg loss: 2.853443, ppl: 17.347412 +epoch: 1, batch: 8962, sum loss: 4833.280273, avg loss: 2.875241, ppl: 17.729696 +epoch: 1, batch: 8963, sum loss: 4908.854004, avg loss: 3.037657, ppl: 20.856319 +epoch: 1, batch: 8964, sum loss: 5375.030762, avg loss: 2.943609, ppl: 18.984245 +epoch: 1, batch: 8965, sum loss: 4751.985352, avg loss: 2.764390, ppl: 15.869349 +epoch: 1, batch: 8966, sum loss: 4537.394043, avg loss: 2.880885, ppl: 17.830044 +epoch: 1, batch: 8967, sum loss: 5483.816406, avg loss: 3.019722, ppl: 20.485605 +epoch: 1, batch: 8968, sum loss: 3997.045898, avg loss: 2.536196, ppl: 12.631536 +epoch: 1, batch: 8969, sum loss: 4673.970703, avg loss: 2.630259, ppl: 13.877367 +epoch: 1, batch: 8970, sum loss: 5023.114746, avg loss: 2.845957, ppl: 17.218033 +epoch: 1, batch: 8971, sum loss: 4770.917969, avg loss: 2.875779, ppl: 17.739244 +epoch: 1, batch: 8972, sum loss: 3967.341309, avg loss: 2.637860, ppl: 13.983245 +epoch: 1, batch: 8973, sum loss: 5158.955078, avg loss: 2.755852, ppl: 15.734445 +epoch: 1, batch: 8974, sum loss: 4173.912109, avg loss: 2.502346, ppl: 12.211102 +epoch: 1, batch: 8975, sum loss: 4847.790039, avg loss: 2.911586, ppl: 18.385927 +epoch: 1, batch: 8976, sum loss: 3954.832520, avg loss: 2.482632, ppl: 11.972734 +epoch: 1, batch: 8977, sum loss: 4797.323730, avg loss: 3.063425, ppl: 21.400730 +epoch: 1, batch: 8978, sum loss: 3488.951172, avg loss: 2.586324, ppl: 13.280861 +epoch: 1, batch: 8979, sum loss: 5443.817871, avg loss: 2.833846, ppl: 17.010756 +epoch: 1, batch: 8980, sum loss: 4395.617676, avg loss: 2.540819, ppl: 12.690065 +epoch: 1, batch: 8981, sum loss: 4599.885742, avg loss: 2.916858, ppl: 18.483130 +epoch: 1, batch: 8982, sum loss: 4019.056885, avg loss: 2.553403, ppl: 12.850762 +epoch: 1, batch: 8983, sum loss: 3890.419189, avg loss: 2.562859, ppl: 12.972848 +epoch: 1, batch: 8984, sum loss: 4117.553711, avg loss: 2.492466, ppl: 12.091056 +epoch: 1, batch: 8985, sum loss: 4610.345703, avg loss: 2.826699, ppl: 16.889612 +epoch: 1, batch: 8986, sum loss: 4046.673828, avg loss: 2.646615, ppl: 14.106205 +epoch: 1, batch: 8987, sum loss: 5000.449707, avg loss: 2.953603, ppl: 19.174917 +epoch: 1, batch: 8988, sum loss: 4866.769043, avg loss: 2.690309, ppl: 14.736233 +epoch: 1, batch: 8989, sum loss: 4719.368164, avg loss: 2.660298, ppl: 14.300545 +epoch: 1, batch: 8990, sum loss: 4077.081787, avg loss: 2.671744, ppl: 14.465179 +epoch: 1, batch: 8991, sum loss: 4613.966309, avg loss: 2.731774, ppl: 15.360113 +epoch: 1, batch: 8992, sum loss: 5192.472656, avg loss: 2.917120, ppl: 18.487957 +epoch: 1, batch: 8993, sum loss: 4984.057129, avg loss: 2.713151, ppl: 15.076700 +epoch: 1, batch: 8994, sum loss: 4975.368652, avg loss: 2.683586, ppl: 14.637491 +epoch: 1, batch: 8995, sum loss: 5145.407227, avg loss: 2.910298, ppl: 18.362276 +epoch: 1, batch: 8996, sum loss: 4901.418457, avg loss: 2.844700, ppl: 17.196405 +epoch: 1, batch: 8997, sum loss: 5001.038574, avg loss: 2.817486, ppl: 16.734732 +epoch: 1, batch: 8998, sum loss: 6039.747559, avg loss: 2.931916, ppl: 18.763552 +epoch: 1, batch: 8999, sum loss: 4254.263184, avg loss: 2.766101, ppl: 15.896530 +epoch: 1, batch: 9000, sum loss: 4850.868652, avg loss: 2.675603, ppl: 14.521109 +epoch: 1, batch: 9001, sum loss: 4503.661133, avg loss: 2.680751, ppl: 14.596048 +epoch: 1, batch: 9002, sum loss: 4944.381836, avg loss: 2.687164, ppl: 14.689954 +epoch: 1, batch: 9003, sum loss: 4626.664062, avg loss: 2.798950, ppl: 16.427383 +epoch: 1, batch: 9004, sum loss: 4456.208008, avg loss: 2.865729, ppl: 17.561848 +epoch: 1, batch: 9005, sum loss: 4753.041016, avg loss: 2.734776, ppl: 15.406296 +epoch: 1, batch: 9006, sum loss: 5105.650879, avg loss: 2.774810, ppl: 16.035582 +epoch: 1, batch: 9007, sum loss: 4295.398438, avg loss: 2.567483, ppl: 13.032975 +epoch: 1, batch: 9008, sum loss: 4754.114258, avg loss: 2.836584, ppl: 17.057396 +epoch: 1, batch: 9009, sum loss: 3742.445312, avg loss: 2.346361, ppl: 10.447479 +epoch: 1, batch: 9010, sum loss: 4785.954102, avg loss: 2.860702, ppl: 17.473785 +epoch: 1, batch: 9011, sum loss: 5639.702148, avg loss: 2.890673, ppl: 18.005417 +epoch: 1, batch: 9012, sum loss: 4050.601562, avg loss: 2.820753, ppl: 16.789490 +epoch: 1, batch: 9013, sum loss: 4581.141113, avg loss: 2.573675, ppl: 13.113929 +epoch: 1, batch: 9014, sum loss: 5072.491699, avg loss: 2.875562, ppl: 17.735392 +epoch: 1, batch: 9015, sum loss: 4923.697754, avg loss: 2.918612, ppl: 18.515566 +epoch: 1, batch: 9016, sum loss: 5023.217285, avg loss: 2.643799, ppl: 14.066539 +epoch: 1, batch: 9017, sum loss: 5019.313965, avg loss: 2.797834, ppl: 16.409065 +epoch: 1, batch: 9018, sum loss: 3994.419922, avg loss: 2.577045, ppl: 13.158201 +epoch: 1, batch: 9019, sum loss: 4427.671875, avg loss: 2.637089, ppl: 13.972464 +epoch: 1, batch: 9020, sum loss: 4105.812012, avg loss: 2.491391, ppl: 12.078061 +epoch: 1, batch: 9021, sum loss: 3860.417725, avg loss: 2.377105, ppl: 10.773663 +epoch: 1, batch: 9022, sum loss: 6097.508301, avg loss: 3.035096, ppl: 20.802979 +epoch: 1, batch: 9023, sum loss: 5615.348633, avg loss: 2.977386, ppl: 19.636410 +epoch: 1, batch: 9024, sum loss: 4774.372070, avg loss: 2.546332, ppl: 12.760212 +epoch: 1, batch: 9025, sum loss: 3659.799316, avg loss: 2.466172, ppl: 11.777277 +epoch: 1, batch: 9026, sum loss: 4218.001465, avg loss: 2.751469, ppl: 15.665623 +epoch: 1, batch: 9027, sum loss: 4502.753418, avg loss: 2.613322, ppl: 13.644300 +epoch: 1, batch: 9028, sum loss: 5049.343262, avg loss: 2.780475, ppl: 16.126682 +epoch: 1, batch: 9029, sum loss: 5066.507324, avg loss: 2.898460, ppl: 18.146173 +epoch: 1, batch: 9030, sum loss: 3952.403076, avg loss: 2.787308, ppl: 16.237253 +epoch: 1, batch: 9031, sum loss: 4203.407227, avg loss: 2.908932, ppl: 18.337212 +epoch: 1, batch: 9032, sum loss: 5075.732422, avg loss: 2.779700, ppl: 16.114191 +epoch: 1, batch: 9033, sum loss: 4475.868164, avg loss: 2.756076, ppl: 15.737971 +epoch: 1, batch: 9034, sum loss: 5176.534180, avg loss: 2.985314, ppl: 19.792709 +epoch: 1, batch: 9035, sum loss: 4822.645996, avg loss: 2.763694, ppl: 15.858313 +epoch: 1, batch: 9036, sum loss: 4368.869141, avg loss: 2.609838, ppl: 13.596851 +epoch: 1, batch: 9037, sum loss: 4518.449707, avg loss: 2.593829, ppl: 13.380905 +epoch: 1, batch: 9038, sum loss: 5231.364746, avg loss: 2.659565, ppl: 14.290071 +epoch: 1, batch: 9039, sum loss: 4666.018066, avg loss: 2.719125, ppl: 15.167042 +epoch: 1, batch: 9040, sum loss: 4149.508301, avg loss: 2.588589, ppl: 13.310976 +epoch: 1, batch: 9041, sum loss: 4925.440918, avg loss: 2.895615, ppl: 18.094624 +epoch: 1, batch: 9042, sum loss: 4811.991211, avg loss: 2.992532, ppl: 19.936092 +epoch: 1, batch: 9043, sum loss: 3579.610107, avg loss: 2.355007, ppl: 10.538199 +epoch: 1, batch: 9044, sum loss: 5392.772461, avg loss: 3.022855, ppl: 20.549870 +epoch: 1, batch: 9045, sum loss: 4504.972168, avg loss: 2.725331, ppl: 15.261462 +epoch: 1, batch: 9046, sum loss: 5304.823242, avg loss: 2.805300, ppl: 16.532043 +epoch: 1, batch: 9047, sum loss: 4838.144531, avg loss: 2.785345, ppl: 16.205414 +epoch: 1, batch: 9048, sum loss: 5345.880859, avg loss: 3.051302, ppl: 21.142853 +epoch: 1, batch: 9049, sum loss: 3687.012695, avg loss: 2.458009, ppl: 11.681525 +epoch: 1, batch: 9050, sum loss: 4409.366699, avg loss: 2.764493, ppl: 15.870995 +epoch: 1, batch: 9051, sum loss: 4525.525879, avg loss: 2.732805, ppl: 15.375963 +epoch: 1, batch: 9052, sum loss: 3948.424805, avg loss: 2.565578, ppl: 13.008178 +epoch: 1, batch: 9053, sum loss: 5170.446289, avg loss: 3.006073, ppl: 20.207897 +epoch: 1, batch: 9054, sum loss: 3944.490234, avg loss: 2.750690, ppl: 15.653437 +epoch: 1, batch: 9055, sum loss: 4492.984375, avg loss: 2.764913, ppl: 15.877664 +epoch: 1, batch: 9056, sum loss: 5196.476562, avg loss: 2.959269, ppl: 19.283876 +epoch: 1, batch: 9057, sum loss: 4489.035156, avg loss: 2.793426, ppl: 16.336891 +epoch: 1, batch: 9058, sum loss: 4752.671387, avg loss: 2.680582, ppl: 14.593581 +epoch: 1, batch: 9059, sum loss: 4911.121094, avg loss: 2.806355, ppl: 16.549486 +epoch: 1, batch: 9060, sum loss: 5519.749023, avg loss: 3.036165, ppl: 20.825235 +epoch: 1, batch: 9061, sum loss: 4600.334961, avg loss: 2.668408, ppl: 14.416998 +epoch: 1, batch: 9062, sum loss: 3765.128418, avg loss: 2.746264, ppl: 15.584307 +epoch: 1, batch: 9063, sum loss: 4628.324219, avg loss: 2.835983, ppl: 17.047150 +epoch: 1, batch: 9064, sum loss: 4551.997070, avg loss: 2.631212, ppl: 13.890598 +epoch: 1, batch: 9065, sum loss: 4132.462402, avg loss: 2.477495, ppl: 11.911394 +epoch: 1, batch: 9066, sum loss: 3947.111328, avg loss: 2.512483, ppl: 12.335526 +epoch: 1, batch: 9067, sum loss: 4545.967285, avg loss: 2.641469, ppl: 14.033797 +epoch: 1, batch: 9068, sum loss: 4241.025391, avg loss: 2.570318, ppl: 13.069986 +epoch: 1, batch: 9069, sum loss: 5829.484375, avg loss: 3.120709, ppl: 22.662441 +epoch: 1, batch: 9070, sum loss: 4394.361328, avg loss: 2.729417, ppl: 15.323948 +epoch: 1, batch: 9071, sum loss: 4606.056641, avg loss: 2.677940, ppl: 14.555078 +epoch: 1, batch: 9072, sum loss: 5048.907227, avg loss: 2.733572, ppl: 15.387754 +epoch: 1, batch: 9073, sum loss: 4540.858398, avg loss: 2.723970, ppl: 15.240710 +epoch: 1, batch: 9074, sum loss: 4861.921387, avg loss: 2.861637, ppl: 17.490137 +epoch: 1, batch: 9075, sum loss: 5277.196289, avg loss: 2.863373, ppl: 17.520521 +epoch: 1, batch: 9076, sum loss: 5859.299805, avg loss: 3.090348, ppl: 21.984734 +epoch: 1, batch: 9077, sum loss: 4158.610840, avg loss: 2.732333, ppl: 15.368703 +epoch: 1, batch: 9078, sum loss: 4879.387695, avg loss: 2.791412, ppl: 16.304024 +epoch: 1, batch: 9079, sum loss: 4665.862305, avg loss: 2.812455, ppl: 16.650742 +epoch: 1, batch: 9080, sum loss: 4075.777344, avg loss: 2.617712, ppl: 13.704329 +epoch: 1, batch: 9081, sum loss: 4580.680176, avg loss: 2.583576, ppl: 13.244418 +epoch: 1, batch: 9082, sum loss: 4433.249023, avg loss: 2.823725, ppl: 16.839468 +epoch: 1, batch: 9083, sum loss: 4933.995117, avg loss: 2.814601, ppl: 16.686516 +epoch: 1, batch: 9084, sum loss: 5386.262207, avg loss: 2.877277, ppl: 17.765829 +epoch: 1, batch: 9085, sum loss: 5341.898926, avg loss: 2.845977, ppl: 17.218374 +epoch: 1, batch: 9086, sum loss: 3642.461426, avg loss: 2.402679, ppl: 11.052750 +epoch: 1, batch: 9087, sum loss: 4997.850586, avg loss: 3.135414, ppl: 22.998163 +epoch: 1, batch: 9088, sum loss: 4500.057129, avg loss: 2.748966, ppl: 15.626466 +epoch: 1, batch: 9089, sum loss: 5121.871094, avg loss: 2.755175, ppl: 15.723798 +epoch: 1, batch: 9090, sum loss: 4622.335938, avg loss: 2.703120, ppl: 14.926236 +epoch: 1, batch: 9091, sum loss: 4883.299316, avg loss: 2.898100, ppl: 18.139654 +epoch: 1, batch: 9092, sum loss: 4038.536377, avg loss: 2.789045, ppl: 16.265472 +epoch: 1, batch: 9093, sum loss: 4755.512207, avg loss: 2.722102, ppl: 15.212264 +epoch: 1, batch: 9094, sum loss: 5021.436523, avg loss: 2.683825, ppl: 14.640988 +epoch: 1, batch: 9095, sum loss: 4476.809082, avg loss: 2.748195, ppl: 15.614418 +epoch: 1, batch: 9096, sum loss: 4684.654785, avg loss: 2.758925, ppl: 15.782870 +epoch: 1, batch: 9097, sum loss: 4365.433594, avg loss: 2.632951, ppl: 13.914779 +epoch: 1, batch: 9098, sum loss: 6146.155273, avg loss: 3.110403, ppl: 22.430073 +epoch: 1, batch: 9099, sum loss: 4647.824707, avg loss: 2.593652, ppl: 13.378544 +epoch: 1, batch: 9100, sum loss: 3744.147949, avg loss: 2.404719, ppl: 11.075321 +epoch: 1, batch: 9101, sum loss: 4266.026855, avg loss: 2.791902, ppl: 16.312021 +epoch: 1, batch: 9102, sum loss: 4203.428223, avg loss: 2.640344, ppl: 14.018030 +epoch: 1, batch: 9103, sum loss: 4479.618164, avg loss: 2.846009, ppl: 17.218920 +epoch: 1, batch: 9104, sum loss: 4202.971680, avg loss: 2.633441, ppl: 13.921592 +epoch: 1, batch: 9105, sum loss: 3769.104492, avg loss: 2.667448, ppl: 14.403170 +epoch: 1, batch: 9106, sum loss: 3981.802734, avg loss: 2.501132, ppl: 12.196296 +epoch: 1, batch: 9107, sum loss: 3897.919922, avg loss: 2.534408, ppl: 12.608969 +epoch: 1, batch: 9108, sum loss: 5193.342285, avg loss: 2.717604, ppl: 15.144000 +epoch: 1, batch: 9109, sum loss: 4479.560059, avg loss: 2.829792, ppl: 16.941929 +epoch: 1, batch: 9110, sum loss: 5205.663086, avg loss: 2.803265, ppl: 16.498425 +epoch: 1, batch: 9111, sum loss: 5480.581055, avg loss: 2.879969, ppl: 17.813723 +epoch: 1, batch: 9112, sum loss: 3989.647949, avg loss: 2.677616, ppl: 14.550365 +epoch: 1, batch: 9113, sum loss: 4992.020020, avg loss: 2.962623, ppl: 19.348660 +epoch: 1, batch: 9114, sum loss: 4921.700195, avg loss: 2.878187, ppl: 17.782009 +epoch: 1, batch: 9115, sum loss: 4538.069824, avg loss: 2.568234, ppl: 13.042773 +epoch: 1, batch: 9116, sum loss: 4603.176758, avg loss: 2.630387, ppl: 13.879134 +epoch: 1, batch: 9117, sum loss: 4966.907227, avg loss: 2.851267, ppl: 17.309702 +epoch: 1, batch: 9118, sum loss: 4306.275879, avg loss: 2.570911, ppl: 13.077735 +epoch: 1, batch: 9119, sum loss: 4760.731934, avg loss: 2.788947, ppl: 16.263880 +epoch: 1, batch: 9120, sum loss: 4211.064453, avg loss: 2.759544, ppl: 15.792645 +epoch: 1, batch: 9121, sum loss: 4238.677734, avg loss: 2.556500, ppl: 12.890627 +epoch: 1, batch: 9122, sum loss: 3952.159180, avg loss: 2.692206, ppl: 14.764215 +epoch: 1, batch: 9123, sum loss: 4068.892090, avg loss: 2.873511, ppl: 17.699057 +epoch: 1, batch: 9124, sum loss: 4628.258301, avg loss: 2.898095, ppl: 18.139563 +epoch: 1, batch: 9125, sum loss: 4786.587891, avg loss: 2.724296, ppl: 15.245675 +epoch: 1, batch: 9126, sum loss: 5259.368164, avg loss: 2.931643, ppl: 18.758434 +epoch: 1, batch: 9127, sum loss: 3908.741211, avg loss: 2.379027, ppl: 10.794394 +epoch: 1, batch: 9128, sum loss: 3526.551025, avg loss: 2.445597, ppl: 11.537437 +epoch: 1, batch: 9129, sum loss: 4380.186035, avg loss: 2.564512, ppl: 12.994309 +epoch: 1, batch: 9130, sum loss: 4206.295410, avg loss: 2.580549, ppl: 13.204389 +epoch: 1, batch: 9131, sum loss: 4127.437988, avg loss: 2.445165, ppl: 11.532449 +epoch: 1, batch: 9132, sum loss: 5316.902832, avg loss: 3.002204, ppl: 20.129848 +epoch: 1, batch: 9133, sum loss: 5401.477539, avg loss: 2.847379, ppl: 17.242525 +epoch: 1, batch: 9134, sum loss: 5651.090820, avg loss: 2.938685, ppl: 18.890984 +epoch: 1, batch: 9135, sum loss: 5130.326172, avg loss: 2.870916, ppl: 17.653175 +epoch: 1, batch: 9136, sum loss: 4723.628418, avg loss: 2.952268, ppl: 19.149328 +epoch: 1, batch: 9137, sum loss: 6150.314453, avg loss: 3.180100, ppl: 24.049170 +epoch: 1, batch: 9138, sum loss: 4106.891602, avg loss: 2.620863, ppl: 13.747579 +epoch: 1, batch: 9139, sum loss: 4220.917480, avg loss: 2.663039, ppl: 14.339808 +epoch: 1, batch: 9140, sum loss: 4296.556152, avg loss: 2.589847, ppl: 13.327733 +epoch: 1, batch: 9141, sum loss: 4261.772949, avg loss: 2.606589, ppl: 13.552741 +epoch: 1, batch: 9142, sum loss: 4264.576660, avg loss: 2.776417, ppl: 16.061375 +epoch: 1, batch: 9143, sum loss: 5013.646484, avg loss: 2.786907, ppl: 16.230747 +epoch: 1, batch: 9144, sum loss: 3079.059570, avg loss: 2.453434, ppl: 11.628209 +epoch: 1, batch: 9145, sum loss: 5015.374023, avg loss: 2.733174, ppl: 15.381632 +epoch: 1, batch: 9146, sum loss: 4808.841309, avg loss: 2.946594, ppl: 19.040989 +epoch: 1, batch: 9147, sum loss: 4939.710449, avg loss: 2.897191, ppl: 18.123167 +epoch: 1, batch: 9148, sum loss: 5194.157227, avg loss: 2.927935, ppl: 18.689001 +epoch: 1, batch: 9149, sum loss: 5749.597656, avg loss: 2.965239, ppl: 19.399330 +epoch: 1, batch: 9150, sum loss: 5390.875488, avg loss: 3.049138, ppl: 21.097147 +epoch: 1, batch: 9151, sum loss: 4701.847168, avg loss: 2.700659, ppl: 14.889542 +epoch: 1, batch: 9152, sum loss: 3919.371094, avg loss: 2.538453, ppl: 12.660069 +epoch: 1, batch: 9153, sum loss: 4032.728516, avg loss: 2.426431, ppl: 11.318417 +epoch: 1, batch: 9154, sum loss: 4855.063965, avg loss: 2.809644, ppl: 16.603998 +epoch: 1, batch: 9155, sum loss: 4124.193848, avg loss: 2.608598, ppl: 13.580002 +epoch: 1, batch: 9156, sum loss: 6015.240723, avg loss: 2.970489, ppl: 19.501459 +epoch: 1, batch: 9157, sum loss: 4614.267578, avg loss: 2.754787, ppl: 15.717689 +epoch: 1, batch: 9158, sum loss: 5007.265625, avg loss: 2.882709, ppl: 17.862598 +epoch: 1, batch: 9159, sum loss: 4584.156738, avg loss: 2.901365, ppl: 18.198971 +epoch: 1, batch: 9160, sum loss: 5403.014648, avg loss: 3.000008, ppl: 20.085699 +epoch: 1, batch: 9161, sum loss: 5612.287109, avg loss: 2.933762, ppl: 18.798218 +epoch: 1, batch: 9162, sum loss: 3613.929688, avg loss: 2.466846, ppl: 11.785220 +epoch: 1, batch: 9163, sum loss: 4780.002930, avg loss: 2.806813, ppl: 16.557070 +epoch: 1, batch: 9164, sum loss: 3639.250488, avg loss: 2.294609, ppl: 9.920559 +epoch: 1, batch: 9165, sum loss: 4703.845215, avg loss: 2.698706, ppl: 14.860496 +epoch: 1, batch: 9166, sum loss: 4401.257812, avg loss: 2.578358, ppl: 13.175491 +epoch: 1, batch: 9167, sum loss: 5306.653320, avg loss: 3.013432, ppl: 20.357141 +epoch: 1, batch: 9168, sum loss: 4488.881836, avg loss: 2.762389, ppl: 15.837629 +epoch: 1, batch: 9169, sum loss: 4934.289062, avg loss: 2.897410, ppl: 18.127134 +epoch: 1, batch: 9170, sum loss: 4107.643555, avg loss: 2.643271, ppl: 14.059119 +epoch: 1, batch: 9171, sum loss: 4163.859375, avg loss: 2.667431, ppl: 14.402919 +epoch: 1, batch: 9172, sum loss: 4358.704590, avg loss: 2.662617, ppl: 14.333757 +epoch: 1, batch: 9173, sum loss: 4600.510742, avg loss: 2.756448, ppl: 15.743814 +epoch: 1, batch: 9174, sum loss: 3758.080078, avg loss: 2.304157, ppl: 10.015732 +epoch: 1, batch: 9175, sum loss: 6259.483398, avg loss: 3.180632, ppl: 24.061958 +epoch: 1, batch: 9176, sum loss: 4915.461426, avg loss: 2.836389, ppl: 17.054066 +epoch: 1, batch: 9177, sum loss: 4314.684082, avg loss: 2.725637, ppl: 15.266142 +epoch: 1, batch: 9178, sum loss: 5127.546387, avg loss: 2.885507, ppl: 17.912649 +epoch: 1, batch: 9179, sum loss: 4775.411133, avg loss: 2.705615, ppl: 14.963521 +epoch: 1, batch: 9180, sum loss: 4702.491211, avg loss: 2.682539, ppl: 14.622175 +epoch: 1, batch: 9181, sum loss: 4401.411133, avg loss: 2.632423, ppl: 13.907429 +epoch: 1, batch: 9182, sum loss: 4586.595215, avg loss: 2.661982, ppl: 14.324654 +epoch: 1, batch: 9183, sum loss: 4160.346191, avg loss: 2.755196, ppl: 15.724124 +epoch: 1, batch: 9184, sum loss: 5007.501953, avg loss: 2.706758, ppl: 14.980626 +epoch: 1, batch: 9185, sum loss: 4315.974121, avg loss: 2.705940, ppl: 14.968381 +epoch: 1, batch: 9186, sum loss: 3402.709961, avg loss: 2.535551, ppl: 12.623388 +epoch: 1, batch: 9187, sum loss: 4352.642090, avg loss: 2.833751, ppl: 17.009150 +epoch: 1, batch: 9188, sum loss: 4811.471680, avg loss: 2.536358, ppl: 12.633580 +epoch: 1, batch: 9189, sum loss: 4850.053223, avg loss: 2.733964, ppl: 15.393794 +epoch: 1, batch: 9190, sum loss: 5231.849609, avg loss: 2.811311, ppl: 16.631708 +epoch: 1, batch: 9191, sum loss: 5182.025391, avg loss: 2.781549, ppl: 16.144005 +epoch: 1, batch: 9192, sum loss: 4212.137207, avg loss: 2.740493, ppl: 15.494619 +epoch: 1, batch: 9193, sum loss: 5936.874023, avg loss: 3.068152, ppl: 21.502123 +epoch: 1, batch: 9194, sum loss: 5429.867676, avg loss: 3.129607, ppl: 22.864986 +epoch: 1, batch: 9195, sum loss: 4837.039551, avg loss: 2.815506, ppl: 16.701628 +epoch: 1, batch: 9196, sum loss: 5009.725586, avg loss: 3.192942, ppl: 24.359982 +epoch: 1, batch: 9197, sum loss: 4529.455078, avg loss: 2.588260, ppl: 13.306597 +epoch: 1, batch: 9198, sum loss: 4291.957031, avg loss: 2.535120, ppl: 12.617939 +epoch: 1, batch: 9199, sum loss: 5234.074219, avg loss: 2.935544, ppl: 18.831741 +epoch: 1, batch: 9200, sum loss: 5126.976562, avg loss: 2.752000, ppl: 15.673954 +epoch: 1, batch: 9201, sum loss: 5010.953125, avg loss: 2.965061, ppl: 19.395885 +epoch: 1, batch: 9202, sum loss: 4136.445801, avg loss: 2.583664, ppl: 13.245581 +epoch: 1, batch: 9203, sum loss: 4254.820312, avg loss: 2.706629, ppl: 14.978694 +epoch: 1, batch: 9204, sum loss: 3960.452637, avg loss: 2.665177, ppl: 14.370487 +epoch: 1, batch: 9205, sum loss: 5721.523438, avg loss: 2.862193, ppl: 17.499859 +epoch: 1, batch: 9206, sum loss: 3909.628662, avg loss: 2.388289, ppl: 10.894835 +epoch: 1, batch: 9207, sum loss: 5013.812988, avg loss: 2.788550, ppl: 16.257427 +epoch: 1, batch: 9208, sum loss: 5276.479004, avg loss: 2.758222, ppl: 15.771777 +epoch: 1, batch: 9209, sum loss: 4994.209961, avg loss: 2.741059, ppl: 15.503399 +epoch: 1, batch: 9210, sum loss: 3660.794189, avg loss: 2.758699, ppl: 15.779307 +epoch: 1, batch: 9211, sum loss: 4244.498535, avg loss: 2.485069, ppl: 12.001951 +epoch: 1, batch: 9212, sum loss: 4900.191895, avg loss: 2.671860, ppl: 14.466856 +epoch: 1, batch: 9213, sum loss: 3509.416992, avg loss: 2.287755, ppl: 9.852798 +epoch: 1, batch: 9214, sum loss: 4986.809082, avg loss: 2.897623, ppl: 18.130993 +epoch: 1, batch: 9215, sum loss: 5526.830566, avg loss: 2.981030, ppl: 19.708115 +epoch: 1, batch: 9216, sum loss: 4811.147949, avg loss: 2.843468, ppl: 17.175224 +epoch: 1, batch: 9217, sum loss: 4594.336426, avg loss: 2.641942, ppl: 14.040438 +epoch: 1, batch: 9218, sum loss: 3273.786621, avg loss: 2.499074, ppl: 12.171215 +epoch: 1, batch: 9219, sum loss: 5543.047852, avg loss: 2.829529, ppl: 16.937487 +epoch: 1, batch: 9220, sum loss: 3440.771973, avg loss: 2.411193, ppl: 11.147251 +epoch: 1, batch: 9221, sum loss: 4522.223633, avg loss: 2.639944, ppl: 14.012413 +epoch: 1, batch: 9222, sum loss: 5648.073242, avg loss: 2.829696, ppl: 16.940310 +epoch: 1, batch: 9223, sum loss: 4414.599121, avg loss: 2.735192, ppl: 15.412696 +epoch: 1, batch: 9224, sum loss: 3896.517822, avg loss: 2.540103, ppl: 12.680973 +epoch: 1, batch: 9225, sum loss: 5254.972168, avg loss: 2.660745, ppl: 14.306950 +epoch: 1, batch: 9226, sum loss: 3720.443359, avg loss: 2.507037, ppl: 12.268526 +epoch: 1, batch: 9227, sum loss: 4045.924561, avg loss: 2.622116, ppl: 13.764817 +epoch: 1, batch: 9228, sum loss: 5220.310547, avg loss: 2.967772, ppl: 19.448536 +epoch: 1, batch: 9229, sum loss: 3831.183350, avg loss: 2.466957, ppl: 11.786521 +epoch: 1, batch: 9230, sum loss: 5181.324219, avg loss: 2.856298, ppl: 17.397003 +epoch: 1, batch: 9231, sum loss: 5022.730469, avg loss: 2.543155, ppl: 12.719735 +epoch: 1, batch: 9232, sum loss: 5959.335938, avg loss: 3.049814, ppl: 21.111412 +epoch: 1, batch: 9233, sum loss: 4989.388672, avg loss: 2.926328, ppl: 18.658983 +epoch: 1, batch: 9234, sum loss: 3690.652344, avg loss: 2.497059, ppl: 12.146713 +epoch: 1, batch: 9235, sum loss: 4319.618164, avg loss: 2.678003, ppl: 14.555990 +epoch: 1, batch: 9236, sum loss: 4610.285156, avg loss: 2.758998, ppl: 15.784019 +epoch: 1, batch: 9237, sum loss: 4933.548340, avg loss: 2.781030, ppl: 16.135639 +epoch: 1, batch: 9238, sum loss: 6057.838867, avg loss: 3.070369, ppl: 21.549864 +epoch: 1, batch: 9239, sum loss: 4700.534180, avg loss: 2.768277, ppl: 15.931160 +epoch: 1, batch: 9240, sum loss: 4883.501953, avg loss: 2.867588, ppl: 17.594526 +epoch: 1, batch: 9241, sum loss: 4176.411133, avg loss: 2.563788, ppl: 12.984914 +epoch: 1, batch: 9242, sum loss: 3845.631592, avg loss: 2.556936, ppl: 12.896240 +epoch: 1, batch: 9243, sum loss: 4722.470703, avg loss: 2.524036, ppl: 12.478856 +epoch: 1, batch: 9244, sum loss: 4211.077637, avg loss: 2.583483, ppl: 13.243187 +epoch: 1, batch: 9245, sum loss: 4341.495605, avg loss: 2.698257, ppl: 14.853819 +epoch: 1, batch: 9246, sum loss: 4606.170898, avg loss: 2.741769, ppl: 15.514400 +epoch: 1, batch: 9247, sum loss: 4605.416992, avg loss: 2.625665, ppl: 13.813760 +epoch: 1, batch: 9248, sum loss: 4323.854492, avg loss: 2.550947, ppl: 12.819234 +epoch: 1, batch: 9249, sum loss: 3676.466797, avg loss: 2.631687, ppl: 13.897194 +epoch: 1, batch: 9250, sum loss: 4430.776855, avg loss: 2.882744, ppl: 17.863216 +epoch: 1, batch: 9251, sum loss: 4918.798828, avg loss: 2.774280, ppl: 16.027088 +epoch: 1, batch: 9252, sum loss: 4477.858398, avg loss: 2.676544, ppl: 14.534777 +epoch: 1, batch: 9253, sum loss: 4930.419434, avg loss: 2.861532, ppl: 17.488298 +epoch: 1, batch: 9254, sum loss: 4445.750000, avg loss: 2.759621, ppl: 15.793862 +epoch: 1, batch: 9255, sum loss: 5068.504883, avg loss: 2.795645, ppl: 16.373194 +epoch: 1, batch: 9256, sum loss: 4665.766113, avg loss: 2.807320, ppl: 16.565470 +epoch: 1, batch: 9257, sum loss: 4551.989258, avg loss: 2.843216, ppl: 17.170906 +epoch: 1, batch: 9258, sum loss: 4394.988770, avg loss: 2.963580, ppl: 19.367180 +epoch: 1, batch: 9259, sum loss: 3957.482666, avg loss: 2.493688, ppl: 12.105842 +epoch: 1, batch: 9260, sum loss: 4129.024414, avg loss: 2.769299, ppl: 15.947448 +epoch: 1, batch: 9261, sum loss: 3662.493896, avg loss: 2.427100, ppl: 11.325991 +epoch: 1, batch: 9262, sum loss: 4388.400391, avg loss: 2.637260, ppl: 13.974859 +epoch: 1, batch: 9263, sum loss: 5325.723145, avg loss: 2.797124, ppl: 16.397415 +epoch: 1, batch: 9264, sum loss: 4699.367188, avg loss: 2.829240, ppl: 16.932581 +epoch: 1, batch: 9265, sum loss: 3722.108643, avg loss: 2.798578, ppl: 16.421280 +epoch: 1, batch: 9266, sum loss: 4367.773438, avg loss: 2.955192, ppl: 19.205406 +epoch: 1, batch: 9267, sum loss: 4743.382812, avg loss: 2.899378, ppl: 18.162849 +epoch: 1, batch: 9268, sum loss: 4446.849609, avg loss: 2.948839, ppl: 19.083788 +epoch: 1, batch: 9269, sum loss: 3932.394531, avg loss: 2.715742, ppl: 15.115824 +epoch: 1, batch: 9270, sum loss: 4410.108887, avg loss: 2.850749, ppl: 17.300739 +epoch: 1, batch: 9271, sum loss: 4681.959473, avg loss: 2.669304, ppl: 14.429924 +epoch: 1, batch: 9272, sum loss: 4621.845215, avg loss: 2.621580, ppl: 13.757442 +epoch: 1, batch: 9273, sum loss: 5591.625000, avg loss: 2.909274, ppl: 18.343483 +epoch: 1, batch: 9274, sum loss: 5705.089844, avg loss: 2.924188, ppl: 18.619095 +epoch: 1, batch: 9275, sum loss: 4679.791992, avg loss: 2.618798, ppl: 13.719223 +epoch: 1, batch: 9276, sum loss: 5316.001465, avg loss: 3.225729, ppl: 25.171917 +epoch: 1, batch: 9277, sum loss: 4715.241211, avg loss: 2.859455, ppl: 17.452011 +epoch: 1, batch: 9278, sum loss: 4621.443359, avg loss: 2.908397, ppl: 18.327404 +epoch: 1, batch: 9279, sum loss: 5984.079590, avg loss: 2.846850, ppl: 17.233418 +epoch: 1, batch: 9280, sum loss: 4633.751465, avg loss: 2.728947, ppl: 15.316745 +epoch: 1, batch: 9281, sum loss: 4638.095703, avg loss: 2.677884, ppl: 14.554269 +epoch: 1, batch: 9282, sum loss: 4902.597656, avg loss: 2.822451, ppl: 16.818027 +epoch: 1, batch: 9283, sum loss: 4996.041992, avg loss: 2.752640, ppl: 15.683987 +epoch: 1, batch: 9284, sum loss: 4957.308105, avg loss: 2.842493, ppl: 17.158489 +epoch: 1, batch: 9285, sum loss: 4621.799316, avg loss: 2.635005, ppl: 13.943385 +epoch: 1, batch: 9286, sum loss: 5357.024414, avg loss: 3.066413, ppl: 21.464779 +epoch: 1, batch: 9287, sum loss: 5596.230469, avg loss: 2.800916, ppl: 16.459721 +epoch: 1, batch: 9288, sum loss: 3595.335693, avg loss: 2.462559, ppl: 11.734799 +epoch: 1, batch: 9289, sum loss: 4979.024902, avg loss: 2.845157, ppl: 17.204262 +epoch: 1, batch: 9290, sum loss: 4553.386719, avg loss: 2.535293, ppl: 12.620132 +epoch: 1, batch: 9291, sum loss: 5174.694824, avg loss: 2.900614, ppl: 18.185305 +epoch: 1, batch: 9292, sum loss: 4652.100586, avg loss: 2.664433, ppl: 14.359812 +epoch: 1, batch: 9293, sum loss: 4009.109131, avg loss: 2.759194, ppl: 15.787115 +epoch: 1, batch: 9294, sum loss: 4720.371582, avg loss: 2.876521, ppl: 17.752411 +epoch: 1, batch: 9295, sum loss: 3982.405029, avg loss: 2.580950, ppl: 13.209676 +epoch: 1, batch: 9296, sum loss: 4673.391113, avg loss: 2.785096, ppl: 16.201372 +epoch: 1, batch: 9297, sum loss: 5450.823730, avg loss: 2.976966, ppl: 19.628168 +epoch: 1, batch: 9298, sum loss: 4026.119873, avg loss: 2.538537, ppl: 12.661135 +epoch: 1, batch: 9299, sum loss: 4364.911621, avg loss: 2.648612, ppl: 14.134400 +epoch: 1, batch: 9300, sum loss: 5372.852539, avg loss: 2.890184, ppl: 17.996624 +epoch: 1, batch: 9301, sum loss: 4141.221680, avg loss: 2.609466, ppl: 13.591785 +epoch: 1, batch: 9302, sum loss: 4712.349609, avg loss: 2.657840, ppl: 14.265439 +epoch: 1, batch: 9303, sum loss: 4356.764648, avg loss: 2.726386, ppl: 15.277572 +epoch: 1, batch: 9304, sum loss: 5156.779785, avg loss: 2.711241, ppl: 15.047935 +epoch: 1, batch: 9305, sum loss: 5057.778320, avg loss: 2.741343, ppl: 15.507802 +epoch: 1, batch: 9306, sum loss: 4898.738281, avg loss: 2.699029, ppl: 14.865297 +epoch: 1, batch: 9307, sum loss: 5650.046387, avg loss: 3.109547, ppl: 22.410883 +epoch: 1, batch: 9308, sum loss: 3675.132080, avg loss: 2.360393, ppl: 10.595115 +epoch: 1, batch: 9309, sum loss: 3583.211670, avg loss: 2.585290, ppl: 13.267132 +epoch: 1, batch: 9310, sum loss: 5191.064941, avg loss: 3.023334, ppl: 20.559729 +epoch: 1, batch: 9311, sum loss: 5407.117188, avg loss: 2.979128, ppl: 19.670658 +epoch: 1, batch: 9312, sum loss: 4261.987305, avg loss: 2.565917, ppl: 13.012579 +epoch: 1, batch: 9313, sum loss: 4543.036133, avg loss: 2.811285, ppl: 16.631271 +epoch: 1, batch: 9314, sum loss: 3908.995117, avg loss: 2.653765, ppl: 14.207425 +epoch: 1, batch: 9315, sum loss: 4718.265137, avg loss: 2.984355, ppl: 19.773739 +epoch: 1, batch: 9316, sum loss: 4194.772949, avg loss: 2.757905, ppl: 15.766773 +epoch: 1, batch: 9317, sum loss: 5303.965332, avg loss: 2.759608, ppl: 15.793644 +epoch: 1, batch: 9318, sum loss: 5337.449707, avg loss: 3.012105, ppl: 20.330149 +epoch: 1, batch: 9319, sum loss: 4990.173340, avg loss: 2.766171, ppl: 15.897653 +epoch: 1, batch: 9320, sum loss: 3945.096436, avg loss: 2.653057, ppl: 14.197378 +epoch: 1, batch: 9321, sum loss: 4435.896484, avg loss: 2.580510, ppl: 13.203869 +epoch: 1, batch: 9322, sum loss: 4829.338379, avg loss: 2.900504, ppl: 18.183300 +epoch: 1, batch: 9323, sum loss: 4306.380859, avg loss: 2.555716, ppl: 12.880513 +epoch: 1, batch: 9324, sum loss: 5310.753418, avg loss: 2.730465, ppl: 15.340014 +epoch: 1, batch: 9325, sum loss: 4736.912598, avg loss: 2.888361, ppl: 17.963850 +epoch: 1, batch: 9326, sum loss: 3848.899658, avg loss: 2.648933, ppl: 14.138947 +epoch: 1, batch: 9327, sum loss: 3763.140137, avg loss: 2.515468, ppl: 12.372399 +epoch: 1, batch: 9328, sum loss: 5765.712402, avg loss: 2.959811, ppl: 19.294333 +epoch: 1, batch: 9329, sum loss: 4242.149902, avg loss: 2.845171, ppl: 17.204504 +epoch: 1, batch: 9330, sum loss: 5116.651367, avg loss: 2.633377, ppl: 13.920702 +epoch: 1, batch: 9331, sum loss: 4570.050293, avg loss: 2.788316, ppl: 16.253626 +epoch: 1, batch: 9332, sum loss: 4085.327148, avg loss: 2.483481, ppl: 11.982909 +epoch: 1, batch: 9333, sum loss: 4853.047363, avg loss: 2.617609, ppl: 13.702921 +epoch: 1, batch: 9334, sum loss: 5007.411621, avg loss: 2.670619, ppl: 14.448917 +epoch: 1, batch: 9335, sum loss: 4025.426758, avg loss: 2.587035, ppl: 13.290310 +epoch: 1, batch: 9336, sum loss: 4650.551758, avg loss: 2.703809, ppl: 14.936521 +epoch: 1, batch: 9337, sum loss: 4096.959961, avg loss: 2.447407, ppl: 11.558343 +epoch: 1, batch: 9338, sum loss: 4253.018066, avg loss: 2.651508, ppl: 14.175394 +epoch: 1, batch: 9339, sum loss: 4719.802734, avg loss: 2.684757, ppl: 14.654640 +epoch: 1, batch: 9340, sum loss: 5632.155273, avg loss: 2.994235, ppl: 19.970068 +epoch: 1, batch: 9341, sum loss: 4755.845215, avg loss: 2.722293, ppl: 15.215165 +epoch: 1, batch: 9342, sum loss: 5651.247559, avg loss: 2.954128, ppl: 19.184992 +epoch: 1, batch: 9343, sum loss: 3611.017578, avg loss: 2.584837, ppl: 13.261130 +epoch: 1, batch: 9344, sum loss: 5341.966309, avg loss: 2.793915, ppl: 16.344889 +epoch: 1, batch: 9345, sum loss: 4193.267578, avg loss: 2.717607, ppl: 15.144039 +epoch: 1, batch: 9346, sum loss: 4447.093262, avg loss: 2.579521, ppl: 13.190814 +epoch: 1, batch: 9347, sum loss: 3096.484619, avg loss: 2.065700, ppl: 7.890822 +epoch: 1, batch: 9348, sum loss: 4195.556152, avg loss: 2.758420, ppl: 15.774895 +epoch: 1, batch: 9349, sum loss: 3826.413574, avg loss: 2.459135, ppl: 11.694686 +epoch: 1, batch: 9350, sum loss: 5000.986816, avg loss: 2.701776, ppl: 14.906178 +epoch: 1, batch: 9351, sum loss: 5157.761719, avg loss: 2.918937, ppl: 18.521587 +epoch: 1, batch: 9352, sum loss: 5210.182129, avg loss: 2.848651, ppl: 17.264475 +epoch: 1, batch: 9353, sum loss: 4109.889648, avg loss: 2.559084, ppl: 12.923979 +epoch: 1, batch: 9354, sum loss: 4868.883301, avg loss: 2.852304, ppl: 17.327663 +epoch: 1, batch: 9355, sum loss: 5228.438965, avg loss: 2.874348, ppl: 17.713869 +epoch: 1, batch: 9356, sum loss: 4876.868164, avg loss: 2.884014, ppl: 17.885929 +epoch: 1, batch: 9357, sum loss: 4597.279785, avg loss: 2.836076, ppl: 17.048738 +epoch: 1, batch: 9358, sum loss: 4192.809570, avg loss: 2.708533, ppl: 15.007248 +epoch: 1, batch: 9359, sum loss: 3997.206299, avg loss: 2.689910, ppl: 14.730350 +epoch: 1, batch: 9360, sum loss: 4318.238770, avg loss: 2.721008, ppl: 15.195625 +epoch: 1, batch: 9361, sum loss: 4215.033203, avg loss: 2.869321, ppl: 17.625053 +epoch: 1, batch: 9362, sum loss: 4138.702148, avg loss: 2.484215, ppl: 11.991700 +epoch: 1, batch: 9363, sum loss: 4753.071777, avg loss: 2.790999, ppl: 16.297295 +epoch: 1, batch: 9364, sum loss: 5072.684570, avg loss: 2.910318, ppl: 18.362640 +epoch: 1, batch: 9365, sum loss: 4898.468262, avg loss: 2.697394, ppl: 14.841011 +epoch: 1, batch: 9366, sum loss: 4248.086914, avg loss: 2.933762, ppl: 18.798214 +epoch: 1, batch: 9367, sum loss: 5273.396484, avg loss: 2.753732, ppl: 15.701115 +epoch: 1, batch: 9368, sum loss: 5113.168457, avg loss: 2.818726, ppl: 16.755487 +epoch: 1, batch: 9369, sum loss: 4472.416992, avg loss: 2.737097, ppl: 15.442096 +epoch: 1, batch: 9370, sum loss: 4477.011230, avg loss: 2.895868, ppl: 18.099197 +epoch: 1, batch: 9371, sum loss: 5028.825684, avg loss: 2.782969, ppl: 16.166954 +epoch: 1, batch: 9372, sum loss: 4136.707031, avg loss: 2.798855, ppl: 16.425821 +epoch: 1, batch: 9373, sum loss: 4240.321289, avg loss: 2.620718, ppl: 13.745586 +epoch: 1, batch: 9374, sum loss: 4779.358398, avg loss: 2.824680, ppl: 16.855547 +epoch: 1, batch: 9375, sum loss: 4402.913574, avg loss: 2.657160, ppl: 14.255739 +epoch: 1, batch: 9376, sum loss: 4468.040039, avg loss: 2.741129, ppl: 15.504478 +epoch: 1, batch: 9377, sum loss: 3517.411133, avg loss: 2.320192, ppl: 10.177629 +epoch: 1, batch: 9378, sum loss: 4432.829102, avg loss: 2.751601, ppl: 15.667695 +epoch: 1, batch: 9379, sum loss: 5254.491699, avg loss: 2.932194, ppl: 18.768768 +epoch: 1, batch: 9380, sum loss: 4381.318359, avg loss: 2.781789, ppl: 16.147888 +epoch: 1, batch: 9381, sum loss: 4806.899902, avg loss: 2.745231, ppl: 15.568215 +epoch: 1, batch: 9382, sum loss: 4929.643066, avg loss: 2.810515, ppl: 16.618469 +epoch: 1, batch: 9383, sum loss: 4640.223145, avg loss: 2.880337, ppl: 17.820282 +epoch: 1, batch: 9384, sum loss: 4007.467285, avg loss: 2.364287, ppl: 10.636456 +epoch: 1, batch: 9385, sum loss: 4565.688477, avg loss: 2.760392, ppl: 15.806041 +epoch: 1, batch: 9386, sum loss: 3830.225098, avg loss: 2.516574, ppl: 12.386088 +epoch: 1, batch: 9387, sum loss: 5048.431641, avg loss: 2.675375, ppl: 14.517786 +epoch: 1, batch: 9388, sum loss: 4526.870117, avg loss: 2.696170, ppl: 14.822856 +epoch: 1, batch: 9389, sum loss: 4363.546875, avg loss: 2.657459, ppl: 14.260002 +epoch: 1, batch: 9390, sum loss: 5148.180176, avg loss: 2.839592, ppl: 17.108784 +epoch: 1, batch: 9391, sum loss: 4746.324707, avg loss: 2.732484, ppl: 15.371023 +epoch: 1, batch: 9392, sum loss: 3413.522461, avg loss: 2.333235, ppl: 10.311242 +epoch: 1, batch: 9393, sum loss: 3786.862061, avg loss: 2.415091, ppl: 11.190784 +epoch: 1, batch: 9394, sum loss: 4514.266113, avg loss: 2.600384, ppl: 13.468905 +epoch: 1, batch: 9395, sum loss: 5276.902344, avg loss: 2.905783, ppl: 18.279554 +epoch: 1, batch: 9396, sum loss: 4976.445801, avg loss: 2.893282, ppl: 18.052467 +epoch: 1, batch: 9397, sum loss: 5302.405273, avg loss: 2.997403, ppl: 20.033436 +epoch: 1, batch: 9398, sum loss: 5179.747070, avg loss: 2.832011, ppl: 16.979565 +epoch: 1, batch: 9399, sum loss: 4514.324707, avg loss: 2.600417, ppl: 13.469358 +epoch: 1, batch: 9400, sum loss: 5226.464844, avg loss: 3.014109, ppl: 20.370930 +epoch: 1, batch: 9401, sum loss: 4741.435059, avg loss: 2.840884, ppl: 17.130901 +epoch: 1, batch: 9402, sum loss: 5777.991699, avg loss: 2.870339, ppl: 17.642992 +epoch: 1, batch: 9403, sum loss: 5124.071777, avg loss: 2.916375, ppl: 18.474205 +epoch: 1, batch: 9404, sum loss: 5570.564453, avg loss: 2.933420, ppl: 18.791788 +epoch: 1, batch: 9405, sum loss: 5132.483398, avg loss: 2.682950, ppl: 14.628183 +epoch: 1, batch: 9406, sum loss: 4528.844238, avg loss: 2.811201, ppl: 16.629877 +epoch: 1, batch: 9407, sum loss: 4396.927246, avg loss: 2.681053, ppl: 14.600462 +epoch: 1, batch: 9408, sum loss: 3755.416992, avg loss: 2.364872, ppl: 10.642679 +epoch: 1, batch: 9409, sum loss: 4892.059570, avg loss: 2.743724, ppl: 15.544765 +epoch: 1, batch: 9410, sum loss: 5134.596191, avg loss: 2.897628, ppl: 18.131084 +epoch: 1, batch: 9411, sum loss: 4041.534180, avg loss: 2.671206, ppl: 14.457391 +epoch: 1, batch: 9412, sum loss: 5001.873047, avg loss: 2.783458, ppl: 16.174849 +epoch: 1, batch: 9413, sum loss: 3841.622070, avg loss: 2.419158, ppl: 11.236389 +epoch: 1, batch: 9414, sum loss: 5508.750977, avg loss: 3.115809, ppl: 22.551678 +epoch: 1, batch: 9415, sum loss: 4917.087402, avg loss: 2.850486, ppl: 17.296179 +epoch: 1, batch: 9416, sum loss: 3848.024902, avg loss: 2.500341, ppl: 12.186651 +epoch: 1, batch: 9417, sum loss: 4226.889648, avg loss: 2.737623, ppl: 15.450216 +epoch: 1, batch: 9418, sum loss: 5093.713379, avg loss: 2.640598, ppl: 14.021584 +epoch: 1, batch: 9419, sum loss: 4684.699707, avg loss: 2.678502, ppl: 14.563259 +epoch: 1, batch: 9420, sum loss: 4818.667969, avg loss: 2.866548, ppl: 17.576246 +epoch: 1, batch: 9421, sum loss: 4676.416016, avg loss: 3.009277, ppl: 20.272736 +epoch: 1, batch: 9422, sum loss: 4106.304199, avg loss: 2.789609, ppl: 16.274654 +epoch: 1, batch: 9423, sum loss: 4676.181641, avg loss: 2.698316, ppl: 14.854693 +epoch: 1, batch: 9424, sum loss: 4697.929688, avg loss: 2.740916, ppl: 15.501174 +epoch: 1, batch: 9425, sum loss: 6071.315918, avg loss: 3.292471, ppl: 26.909266 +epoch: 1, batch: 9426, sum loss: 4945.510742, avg loss: 2.835728, ppl: 17.042810 +epoch: 1, batch: 9427, sum loss: 4518.398926, avg loss: 2.626976, ppl: 13.831880 +epoch: 1, batch: 9428, sum loss: 3880.013184, avg loss: 2.759611, ppl: 15.793700 +epoch: 1, batch: 9429, sum loss: 4084.371094, avg loss: 2.461948, ppl: 11.727631 +epoch: 1, batch: 9430, sum loss: 4532.130859, avg loss: 2.621244, ppl: 13.752821 +epoch: 1, batch: 9431, sum loss: 5569.035645, avg loss: 2.978094, ppl: 19.650330 +epoch: 1, batch: 9432, sum loss: 4682.745605, avg loss: 2.728873, ppl: 15.315613 +epoch: 1, batch: 9433, sum loss: 3833.799805, avg loss: 2.573020, ppl: 13.105346 +epoch: 1, batch: 9434, sum loss: 4970.977051, avg loss: 2.749434, ppl: 15.633784 +epoch: 1, batch: 9435, sum loss: 5462.550293, avg loss: 2.954327, ppl: 19.188803 +epoch: 1, batch: 9436, sum loss: 4109.073242, avg loss: 2.494884, ppl: 12.120322 +epoch: 1, batch: 9437, sum loss: 5286.452637, avg loss: 2.936918, ppl: 18.857643 +epoch: 1, batch: 9438, sum loss: 4820.583984, avg loss: 2.855796, ppl: 17.388279 +epoch: 1, batch: 9439, sum loss: 4323.262207, avg loss: 2.707115, ppl: 14.985974 +epoch: 1, batch: 9440, sum loss: 4412.110352, avg loss: 2.677251, ppl: 14.545059 +epoch: 1, batch: 9441, sum loss: 3548.874512, avg loss: 2.558669, ppl: 12.918615 +epoch: 1, batch: 9442, sum loss: 4982.337891, avg loss: 2.740560, ppl: 15.495657 +epoch: 1, batch: 9443, sum loss: 5129.472656, avg loss: 2.804523, ppl: 16.519197 +epoch: 1, batch: 9444, sum loss: 5190.258789, avg loss: 2.854928, ppl: 17.373182 +epoch: 1, batch: 9445, sum loss: 4434.775879, avg loss: 2.717387, ppl: 15.140711 +epoch: 1, batch: 9446, sum loss: 4907.375000, avg loss: 2.859776, ppl: 17.457613 +epoch: 1, batch: 9447, sum loss: 4868.157227, avg loss: 2.661650, ppl: 14.319893 +epoch: 1, batch: 9448, sum loss: 4965.961426, avg loss: 2.791434, ppl: 16.304384 +epoch: 1, batch: 9449, sum loss: 5961.663574, avg loss: 2.871707, ppl: 17.667145 +epoch: 1, batch: 9450, sum loss: 5077.111816, avg loss: 2.771349, ppl: 15.980180 +epoch: 1, batch: 9451, sum loss: 4492.963379, avg loss: 2.508634, ppl: 12.288131 +epoch: 1, batch: 9452, sum loss: 4029.156738, avg loss: 2.553331, ppl: 12.849837 +epoch: 1, batch: 9453, sum loss: 5374.723633, avg loss: 2.794968, ppl: 16.362103 +epoch: 1, batch: 9454, sum loss: 4753.588867, avg loss: 2.809450, ppl: 16.600780 +epoch: 1, batch: 9455, sum loss: 4448.356934, avg loss: 2.737450, ppl: 15.447549 +epoch: 1, batch: 9456, sum loss: 5009.611328, avg loss: 2.783118, ppl: 16.169352 +epoch: 1, batch: 9457, sum loss: 4527.109375, avg loss: 2.765491, ppl: 15.886847 +epoch: 1, batch: 9458, sum loss: 5257.967773, avg loss: 3.041046, ppl: 20.927114 +epoch: 1, batch: 9459, sum loss: 5648.291016, avg loss: 2.906995, ppl: 18.301720 +epoch: 1, batch: 9460, sum loss: 4394.615234, avg loss: 2.668254, ppl: 14.414784 +epoch: 1, batch: 9461, sum loss: 3712.874512, avg loss: 2.673056, ppl: 14.484160 +epoch: 1, batch: 9462, sum loss: 4473.789062, avg loss: 2.829721, ppl: 16.940733 +epoch: 1, batch: 9463, sum loss: 3921.304688, avg loss: 2.738341, ppl: 15.461319 +epoch: 1, batch: 9464, sum loss: 5546.303223, avg loss: 3.034083, ppl: 20.781904 +epoch: 1, batch: 9465, sum loss: 5773.492188, avg loss: 2.901252, ppl: 18.196918 +epoch: 1, batch: 9466, sum loss: 5672.439941, avg loss: 2.919424, ppl: 18.530603 +epoch: 1, batch: 9467, sum loss: 4148.421387, avg loss: 2.581470, ppl: 13.216546 +epoch: 1, batch: 9468, sum loss: 4928.647461, avg loss: 2.694723, ppl: 14.801413 +epoch: 1, batch: 9469, sum loss: 4450.343750, avg loss: 2.814892, ppl: 16.691366 +epoch: 1, batch: 9470, sum loss: 4114.937012, avg loss: 2.601098, ppl: 13.478527 +epoch: 1, batch: 9471, sum loss: 4960.406738, avg loss: 2.961437, ppl: 19.325722 +epoch: 1, batch: 9472, sum loss: 5396.808594, avg loss: 2.544464, ppl: 12.736401 +epoch: 1, batch: 9473, sum loss: 4515.942383, avg loss: 2.499138, ppl: 12.171996 +epoch: 1, batch: 9474, sum loss: 6532.238770, avg loss: 2.988215, ppl: 19.850227 +epoch: 1, batch: 9475, sum loss: 4472.955078, avg loss: 2.692929, ppl: 14.774889 +epoch: 1, batch: 9476, sum loss: 4470.532715, avg loss: 2.685005, ppl: 14.658271 +epoch: 1, batch: 9477, sum loss: 4325.921875, avg loss: 2.516534, ppl: 12.385594 +epoch: 1, batch: 9478, sum loss: 4135.581543, avg loss: 2.624100, ppl: 13.792155 +epoch: 1, batch: 9479, sum loss: 4367.516602, avg loss: 2.546657, ppl: 12.764359 +epoch: 1, batch: 9480, sum loss: 3810.837402, avg loss: 2.369924, ppl: 10.696575 +epoch: 1, batch: 9481, sum loss: 4782.858398, avg loss: 2.810140, ppl: 16.612246 +epoch: 1, batch: 9482, sum loss: 4750.502441, avg loss: 2.763527, ppl: 15.855666 +epoch: 1, batch: 9483, sum loss: 4584.512207, avg loss: 2.622719, ppl: 13.773119 +epoch: 1, batch: 9484, sum loss: 4687.230957, avg loss: 2.621494, ppl: 13.756258 +epoch: 1, batch: 9485, sum loss: 5054.274414, avg loss: 2.881570, ppl: 17.842270 +epoch: 1, batch: 9486, sum loss: 4400.189453, avg loss: 2.765675, ppl: 15.889767 +epoch: 1, batch: 9487, sum loss: 4044.749512, avg loss: 2.687541, ppl: 14.695498 +epoch: 1, batch: 9488, sum loss: 5009.793945, avg loss: 2.856211, ppl: 17.395489 +epoch: 1, batch: 9489, sum loss: 3294.154541, avg loss: 2.375021, ppl: 10.751242 +epoch: 1, batch: 9490, sum loss: 4726.745605, avg loss: 2.636222, ppl: 13.960360 +epoch: 1, batch: 9491, sum loss: 4900.647461, avg loss: 2.789213, ppl: 16.268211 +epoch: 1, batch: 9492, sum loss: 4710.092773, avg loss: 2.727326, ppl: 15.291947 +epoch: 1, batch: 9493, sum loss: 4486.101074, avg loss: 2.546028, ppl: 12.756333 +epoch: 1, batch: 9494, sum loss: 5264.599609, avg loss: 2.713711, ppl: 15.085156 +epoch: 1, batch: 9495, sum loss: 4487.554199, avg loss: 2.809990, ppl: 16.609755 +epoch: 1, batch: 9496, sum loss: 4349.586914, avg loss: 2.686589, ppl: 14.681505 +epoch: 1, batch: 9497, sum loss: 4658.410645, avg loss: 2.511273, ppl: 12.320601 +epoch: 1, batch: 9498, sum loss: 4535.259766, avg loss: 2.486436, ppl: 12.018371 +epoch: 1, batch: 9499, sum loss: 5090.812988, avg loss: 2.978826, ppl: 19.664709 +epoch: 1, batch: 9500, sum loss: 4008.779053, avg loss: 2.703155, ppl: 14.926756 +epoch: 1, batch: 9501, sum loss: 4233.430176, avg loss: 2.573514, ppl: 13.111815 +epoch: 1, batch: 9502, sum loss: 4689.121094, avg loss: 2.817981, ppl: 16.743021 +epoch: 1, batch: 9503, sum loss: 4354.332520, avg loss: 2.626256, ppl: 13.821923 +epoch: 1, batch: 9504, sum loss: 5261.786133, avg loss: 2.757750, ppl: 15.764326 +epoch: 1, batch: 9505, sum loss: 3521.214844, avg loss: 2.355328, ppl: 10.541582 +epoch: 1, batch: 9506, sum loss: 4248.267090, avg loss: 2.699026, ppl: 14.865248 +epoch: 1, batch: 9507, sum loss: 4593.125488, avg loss: 2.665772, ppl: 14.379048 +epoch: 1, batch: 9508, sum loss: 4422.008301, avg loss: 2.630582, ppl: 13.881845 +epoch: 1, batch: 9509, sum loss: 5044.916992, avg loss: 2.866430, ppl: 17.574167 +epoch: 1, batch: 9510, sum loss: 5193.594727, avg loss: 3.098804, ppl: 22.171413 +epoch: 1, batch: 9511, sum loss: 4567.642578, avg loss: 2.812588, ppl: 16.652956 +epoch: 1, batch: 9512, sum loss: 4313.469727, avg loss: 2.634985, ppl: 13.943100 +epoch: 1, batch: 9513, sum loss: 4387.095703, avg loss: 2.691470, ppl: 14.753343 +epoch: 1, batch: 9514, sum loss: 5086.380371, avg loss: 2.762836, ppl: 15.844710 +epoch: 1, batch: 9515, sum loss: 4696.555664, avg loss: 2.879556, ppl: 17.806364 +epoch: 1, batch: 9516, sum loss: 4629.903320, avg loss: 2.772397, ppl: 15.996937 +epoch: 1, batch: 9517, sum loss: 4187.176758, avg loss: 2.486447, ppl: 12.018499 +epoch: 1, batch: 9518, sum loss: 4168.267578, avg loss: 2.546284, ppl: 12.759606 +epoch: 1, batch: 9519, sum loss: 5110.137695, avg loss: 2.692380, ppl: 14.766786 +epoch: 1, batch: 9520, sum loss: 4753.000977, avg loss: 2.703641, ppl: 14.934007 +epoch: 1, batch: 9521, sum loss: 5115.852051, avg loss: 2.988231, ppl: 19.850540 +epoch: 1, batch: 9522, sum loss: 4641.821289, avg loss: 2.550451, ppl: 12.812884 +epoch: 1, batch: 9523, sum loss: 4899.687988, avg loss: 2.708507, ppl: 15.006847 +epoch: 1, batch: 9524, sum loss: 4264.050781, avg loss: 2.463345, ppl: 11.744033 +epoch: 1, batch: 9525, sum loss: 4138.500488, avg loss: 2.500605, ppl: 12.189862 +epoch: 1, batch: 9526, sum loss: 4388.946777, avg loss: 2.655140, ppl: 14.226980 +epoch: 1, batch: 9527, sum loss: 4449.775391, avg loss: 2.696834, ppl: 14.832691 +epoch: 1, batch: 9528, sum loss: 3818.658936, avg loss: 2.642671, ppl: 14.050677 +epoch: 1, batch: 9529, sum loss: 3930.450195, avg loss: 2.668330, ppl: 14.415877 +epoch: 1, batch: 9530, sum loss: 3807.364746, avg loss: 2.369238, ppl: 10.689240 +epoch: 1, batch: 9531, sum loss: 4058.072510, avg loss: 2.325543, ppl: 10.232234 +epoch: 1, batch: 9532, sum loss: 4089.671875, avg loss: 2.710187, ppl: 15.032085 +epoch: 1, batch: 9533, sum loss: 5250.735352, avg loss: 2.968194, ppl: 19.456755 +epoch: 1, batch: 9534, sum loss: 4105.312012, avg loss: 2.693774, ppl: 14.787381 +epoch: 1, batch: 9535, sum loss: 4084.282471, avg loss: 2.535247, ppl: 12.619546 +epoch: 1, batch: 9536, sum loss: 5154.642090, avg loss: 2.763883, ppl: 15.861315 +epoch: 1, batch: 9537, sum loss: 3902.906982, avg loss: 2.644246, ppl: 14.072828 +epoch: 1, batch: 9538, sum loss: 3374.719727, avg loss: 2.438381, ppl: 11.454483 +epoch: 1, batch: 9539, sum loss: 3651.839355, avg loss: 2.632905, ppl: 13.914132 +epoch: 1, batch: 9540, sum loss: 5248.243652, avg loss: 2.800557, ppl: 16.453808 +epoch: 1, batch: 9541, sum loss: 4638.865234, avg loss: 2.727140, ppl: 15.289097 +epoch: 1, batch: 9542, sum loss: 4626.070312, avg loss: 2.718020, ppl: 15.150298 +epoch: 1, batch: 9543, sum loss: 4770.720703, avg loss: 2.738646, ppl: 15.466023 +epoch: 1, batch: 9544, sum loss: 5811.277832, avg loss: 2.929071, ppl: 18.710249 +epoch: 1, batch: 9545, sum loss: 4085.349365, avg loss: 2.368319, ppl: 10.679420 +epoch: 1, batch: 9546, sum loss: 4280.322754, avg loss: 2.541759, ppl: 12.702001 +epoch: 1, batch: 9547, sum loss: 4112.623047, avg loss: 2.686233, ppl: 14.676291 +epoch: 1, batch: 9548, sum loss: 5583.359863, avg loss: 3.067780, ppl: 21.494139 +epoch: 1, batch: 9549, sum loss: 4462.435547, avg loss: 2.853220, ppl: 17.343534 +epoch: 1, batch: 9550, sum loss: 5451.751953, avg loss: 2.858811, ppl: 17.440771 +epoch: 1, batch: 9551, sum loss: 4301.503418, avg loss: 2.525839, ppl: 12.501378 +epoch: 1, batch: 9552, sum loss: 5860.674316, avg loss: 2.843607, ppl: 17.177616 +epoch: 1, batch: 9553, sum loss: 4836.904785, avg loss: 2.875686, ppl: 17.737595 +epoch: 1, batch: 9554, sum loss: 4459.266602, avg loss: 2.802807, ppl: 16.490877 +epoch: 1, batch: 9555, sum loss: 4908.955078, avg loss: 2.819618, ppl: 16.770447 +epoch: 1, batch: 9556, sum loss: 4235.654297, avg loss: 2.589031, ppl: 13.316858 +epoch: 1, batch: 9557, sum loss: 3547.202637, avg loss: 2.579784, ppl: 13.194283 +epoch: 1, batch: 9558, sum loss: 5042.683594, avg loss: 2.891447, ppl: 18.019361 +epoch: 1, batch: 9559, sum loss: 4524.958984, avg loss: 2.924990, ppl: 18.634033 +epoch: 1, batch: 9560, sum loss: 3955.644531, avg loss: 2.805422, ppl: 16.534048 +epoch: 1, batch: 9561, sum loss: 5138.452148, avg loss: 2.800246, ppl: 16.448700 +epoch: 1, batch: 9562, sum loss: 4291.942871, avg loss: 2.568488, ppl: 13.046082 +epoch: 1, batch: 9563, sum loss: 3894.689209, avg loss: 2.647647, ppl: 14.120779 +epoch: 1, batch: 9564, sum loss: 5213.065430, avg loss: 2.967026, ppl: 19.434042 +epoch: 1, batch: 9565, sum loss: 4861.854492, avg loss: 2.773448, ppl: 16.013758 +epoch: 1, batch: 9566, sum loss: 5546.413086, avg loss: 2.890262, ppl: 17.998026 +epoch: 1, batch: 9567, sum loss: 4975.464844, avg loss: 2.866051, ppl: 17.567511 +epoch: 1, batch: 9568, sum loss: 5422.620117, avg loss: 3.029397, ppl: 20.684746 +epoch: 1, batch: 9569, sum loss: 5425.562500, avg loss: 2.910709, ppl: 18.369825 +epoch: 1, batch: 9570, sum loss: 5224.610352, avg loss: 2.799898, ppl: 16.442972 +epoch: 1, batch: 9571, sum loss: 3477.537842, avg loss: 2.566448, ppl: 13.019503 +epoch: 1, batch: 9572, sum loss: 4250.657227, avg loss: 2.590285, ppl: 13.333566 +epoch: 1, batch: 9573, sum loss: 4003.665039, avg loss: 2.578020, ppl: 13.171035 +epoch: 1, batch: 9574, sum loss: 4825.679688, avg loss: 2.648562, ppl: 14.133699 +epoch: 1, batch: 9575, sum loss: 3786.024658, avg loss: 2.426939, ppl: 11.324166 +epoch: 1, batch: 9576, sum loss: 4845.356445, avg loss: 2.676993, ppl: 14.541297 +epoch: 1, batch: 9577, sum loss: 4460.888672, avg loss: 2.817997, ppl: 16.743273 +epoch: 1, batch: 9578, sum loss: 3916.987793, avg loss: 2.440491, ppl: 11.478672 +epoch: 1, batch: 9579, sum loss: 5317.695312, avg loss: 3.006046, ppl: 20.207342 +epoch: 1, batch: 9580, sum loss: 4812.269531, avg loss: 2.676457, ppl: 14.533505 +epoch: 1, batch: 9581, sum loss: 3991.016846, avg loss: 2.687553, ppl: 14.695678 +epoch: 1, batch: 9582, sum loss: 4502.294434, avg loss: 2.826299, ppl: 16.882864 +epoch: 1, batch: 9583, sum loss: 4282.734375, avg loss: 2.640403, ppl: 14.018856 +epoch: 1, batch: 9584, sum loss: 4092.929443, avg loss: 2.638897, ppl: 13.997755 +epoch: 1, batch: 9585, sum loss: 4486.444336, avg loss: 2.689715, ppl: 14.727477 +epoch: 1, batch: 9586, sum loss: 4417.758301, avg loss: 2.972919, ppl: 19.548908 +epoch: 1, batch: 9587, sum loss: 4746.668457, avg loss: 2.892546, ppl: 18.039183 +epoch: 1, batch: 9588, sum loss: 3992.432373, avg loss: 2.723351, ppl: 15.231277 +epoch: 1, batch: 9589, sum loss: 5667.299805, avg loss: 2.978087, ppl: 19.650194 +epoch: 1, batch: 9590, sum loss: 3601.960205, avg loss: 2.506583, ppl: 12.262961 +epoch: 1, batch: 9591, sum loss: 4246.424805, avg loss: 2.849950, ppl: 17.286911 +epoch: 1, batch: 9592, sum loss: 4824.353027, avg loss: 2.750486, ppl: 15.650242 +epoch: 1, batch: 9593, sum loss: 4423.526367, avg loss: 2.722170, ppl: 15.213301 +epoch: 1, batch: 9594, sum loss: 4286.490234, avg loss: 2.624918, ppl: 13.803435 +epoch: 1, batch: 9595, sum loss: 5382.704102, avg loss: 2.799118, ppl: 16.430149 +epoch: 1, batch: 9596, sum loss: 4409.350586, avg loss: 2.648259, ppl: 14.129414 +epoch: 1, batch: 9597, sum loss: 5047.939453, avg loss: 2.842308, ppl: 17.155319 +epoch: 1, batch: 9598, sum loss: 5933.844727, avg loss: 3.032113, ppl: 20.741009 +epoch: 1, batch: 9599, sum loss: 4038.297852, avg loss: 2.623975, ppl: 13.790436 +epoch: 1, batch: 9600, sum loss: 4939.372559, avg loss: 2.669931, ppl: 14.438975 +epoch: 1, batch: 9601, sum loss: 5312.523926, avg loss: 2.969549, ppl: 19.483139 +epoch: 1, batch: 9602, sum loss: 4192.494629, avg loss: 2.551731, ppl: 12.829296 +epoch: 1, batch: 9603, sum loss: 4381.253906, avg loss: 2.696156, ppl: 14.822648 +epoch: 1, batch: 9604, sum loss: 5352.605957, avg loss: 3.065639, ppl: 21.448168 +epoch: 1, batch: 9605, sum loss: 4958.513672, avg loss: 2.908219, ppl: 18.324141 +epoch: 1, batch: 9606, sum loss: 5195.903809, avg loss: 2.848631, ppl: 17.264137 +epoch: 1, batch: 9607, sum loss: 5338.872559, avg loss: 2.989290, ppl: 19.871578 +epoch: 1, batch: 9608, sum loss: 3759.840576, avg loss: 2.436708, ppl: 11.435336 +epoch: 1, batch: 9609, sum loss: 5014.317871, avg loss: 2.858790, ppl: 17.440414 +epoch: 1, batch: 9610, sum loss: 5129.107910, avg loss: 2.976847, ppl: 19.625847 +epoch: 1, batch: 9611, sum loss: 4930.733398, avg loss: 2.832127, ppl: 16.981544 +epoch: 1, batch: 9612, sum loss: 4571.082031, avg loss: 2.643772, ppl: 14.066163 +epoch: 1, batch: 9613, sum loss: 4457.486328, avg loss: 2.698236, ppl: 14.853514 +epoch: 1, batch: 9614, sum loss: 3946.836182, avg loss: 2.627720, ppl: 13.842178 +epoch: 1, batch: 9615, sum loss: 4679.612793, avg loss: 2.773926, ppl: 16.021408 +epoch: 1, batch: 9616, sum loss: 2814.048828, avg loss: 2.284131, ppl: 9.817147 +epoch: 1, batch: 9617, sum loss: 4252.088867, avg loss: 2.791917, ppl: 16.312254 +epoch: 1, batch: 9618, sum loss: 4950.083984, avg loss: 2.810951, ppl: 16.625713 +epoch: 1, batch: 9619, sum loss: 4131.173828, avg loss: 2.526712, ppl: 12.512294 +epoch: 1, batch: 9620, sum loss: 3188.462402, avg loss: 2.413673, ppl: 11.174936 +epoch: 1, batch: 9621, sum loss: 4810.048828, avg loss: 2.831106, ppl: 16.964212 +epoch: 1, batch: 9622, sum loss: 4096.396973, avg loss: 2.694998, ppl: 14.805490 +epoch: 1, batch: 9623, sum loss: 5307.580078, avg loss: 2.951935, ppl: 19.142965 +epoch: 1, batch: 9624, sum loss: 4690.444336, avg loss: 2.839252, ppl: 17.102972 +epoch: 1, batch: 9625, sum loss: 5238.102051, avg loss: 2.913294, ppl: 18.417358 +epoch: 1, batch: 9626, sum loss: 4769.470703, avg loss: 2.766515, ppl: 15.903108 +epoch: 1, batch: 9627, sum loss: 5028.737793, avg loss: 2.667765, ppl: 14.407738 +epoch: 1, batch: 9628, sum loss: 4422.728027, avg loss: 2.593976, ppl: 13.382870 +epoch: 1, batch: 9629, sum loss: 5005.068848, avg loss: 2.796128, ppl: 16.381092 +epoch: 1, batch: 9630, sum loss: 4155.334473, avg loss: 2.650086, ppl: 14.155252 +epoch: 1, batch: 9631, sum loss: 6225.013672, avg loss: 3.075600, ppl: 21.662868 +epoch: 1, batch: 9632, sum loss: 4137.237305, avg loss: 2.789776, ppl: 16.277370 +epoch: 1, batch: 9633, sum loss: 4112.221680, avg loss: 2.571746, ppl: 13.088655 +epoch: 1, batch: 9634, sum loss: 4716.295898, avg loss: 2.848005, ppl: 17.253325 +epoch: 1, batch: 9635, sum loss: 4571.155762, avg loss: 2.858759, ppl: 17.439869 +epoch: 1, batch: 9636, sum loss: 5343.095703, avg loss: 2.948728, ppl: 19.081673 +epoch: 1, batch: 9637, sum loss: 4164.380859, avg loss: 2.553268, ppl: 12.849031 +epoch: 1, batch: 9638, sum loss: 4950.919922, avg loss: 2.684881, ppl: 14.656453 +epoch: 1, batch: 9639, sum loss: 4910.286133, avg loss: 2.677364, ppl: 14.546700 +epoch: 1, batch: 9640, sum loss: 4252.455078, avg loss: 2.724186, ppl: 15.244007 +epoch: 1, batch: 9641, sum loss: 5089.989746, avg loss: 2.772326, ppl: 15.995790 +epoch: 1, batch: 9642, sum loss: 4917.566406, avg loss: 2.806830, ppl: 16.557354 +epoch: 1, batch: 9643, sum loss: 4457.269531, avg loss: 2.714537, ppl: 15.097617 +epoch: 1, batch: 9644, sum loss: 4142.967773, avg loss: 2.627120, ppl: 13.833868 +epoch: 1, batch: 9645, sum loss: 4595.720703, avg loss: 2.606762, ppl: 13.555084 +epoch: 1, batch: 9646, sum loss: 6106.454590, avg loss: 2.868227, ppl: 17.605772 +epoch: 1, batch: 9647, sum loss: 4880.116699, avg loss: 2.565782, ppl: 13.010827 +epoch: 1, batch: 9648, sum loss: 4847.058594, avg loss: 2.625709, ppl: 13.814363 +epoch: 1, batch: 9649, sum loss: 4811.838867, avg loss: 2.883067, ppl: 17.868996 +epoch: 1, batch: 9650, sum loss: 5020.695312, avg loss: 2.841367, ppl: 17.139179 +epoch: 1, batch: 9651, sum loss: 4598.774902, avg loss: 2.639940, ppl: 14.012357 +epoch: 1, batch: 9652, sum loss: 4600.123047, avg loss: 2.898628, ppl: 18.149237 +epoch: 1, batch: 9653, sum loss: 4221.419922, avg loss: 2.502324, ppl: 12.210835 +epoch: 1, batch: 9654, sum loss: 4527.666992, avg loss: 2.789690, ppl: 16.275974 +epoch: 1, batch: 9655, sum loss: 4259.368652, avg loss: 2.479260, ppl: 11.932431 +epoch: 1, batch: 9656, sum loss: 5251.898438, avg loss: 2.985730, ppl: 19.800945 +epoch: 1, batch: 9657, sum loss: 5144.101562, avg loss: 2.643423, ppl: 14.061257 +epoch: 1, batch: 9658, sum loss: 5029.474609, avg loss: 2.865798, ppl: 17.563055 +epoch: 1, batch: 9659, sum loss: 4623.666992, avg loss: 2.716608, ppl: 15.128919 +epoch: 1, batch: 9660, sum loss: 4160.767578, avg loss: 2.592378, ppl: 13.361512 +epoch: 1, batch: 9661, sum loss: 4615.686035, avg loss: 2.686662, ppl: 14.682590 +epoch: 1, batch: 9662, sum loss: 4299.111328, avg loss: 2.668598, ppl: 14.419741 +epoch: 1, batch: 9663, sum loss: 4245.402344, avg loss: 2.880192, ppl: 17.817686 +epoch: 1, batch: 9664, sum loss: 4392.889160, avg loss: 2.761087, ppl: 15.817026 +epoch: 1, batch: 9665, sum loss: 4556.130859, avg loss: 2.824632, ppl: 16.854736 +epoch: 1, batch: 9666, sum loss: 4791.856445, avg loss: 2.534033, ppl: 12.604238 +epoch: 1, batch: 9667, sum loss: 4466.264648, avg loss: 3.071709, ppl: 21.578747 +epoch: 1, batch: 9668, sum loss: 3393.562012, avg loss: 2.179552, ppl: 8.842342 +epoch: 1, batch: 9669, sum loss: 3990.175293, avg loss: 2.811963, ppl: 16.642557 +epoch: 1, batch: 9670, sum loss: 4352.156250, avg loss: 2.613908, ppl: 13.652297 +epoch: 1, batch: 9671, sum loss: 4251.520508, avg loss: 2.481915, ppl: 11.964154 +epoch: 1, batch: 9672, sum loss: 4638.854980, avg loss: 2.754665, ppl: 15.715770 +epoch: 1, batch: 9673, sum loss: 5578.666016, avg loss: 2.683341, ppl: 14.633904 +epoch: 1, batch: 9674, sum loss: 3880.512451, avg loss: 2.431399, ppl: 11.374783 +epoch: 1, batch: 9675, sum loss: 4721.599609, avg loss: 2.934493, ppl: 18.811968 +epoch: 1, batch: 9676, sum loss: 4293.684570, avg loss: 2.741816, ppl: 15.515140 +epoch: 1, batch: 9677, sum loss: 5403.499023, avg loss: 2.884943, ppl: 17.902555 +epoch: 1, batch: 9678, sum loss: 5134.879883, avg loss: 2.760688, ppl: 15.810718 +epoch: 1, batch: 9679, sum loss: 5674.144531, avg loss: 2.933891, ppl: 18.800634 +epoch: 1, batch: 9680, sum loss: 4062.580078, avg loss: 2.521775, ppl: 12.450683 +epoch: 1, batch: 9681, sum loss: 5475.704102, avg loss: 3.040369, ppl: 20.912949 +epoch: 1, batch: 9682, sum loss: 4638.868164, avg loss: 2.821696, ppl: 16.805328 +epoch: 1, batch: 9683, sum loss: 4821.440430, avg loss: 2.714775, ppl: 15.101213 +epoch: 1, batch: 9684, sum loss: 4947.686035, avg loss: 2.989538, ppl: 19.876507 +epoch: 1, batch: 9685, sum loss: 5093.016113, avg loss: 2.786114, ppl: 16.217875 +epoch: 1, batch: 9686, sum loss: 5010.819824, avg loss: 2.845440, ppl: 17.209127 +epoch: 1, batch: 9687, sum loss: 4121.025391, avg loss: 2.639991, ppl: 14.013075 +epoch: 1, batch: 9688, sum loss: 4886.148438, avg loss: 2.696550, ppl: 14.828484 +epoch: 1, batch: 9689, sum loss: 4125.635742, avg loss: 2.622782, ppl: 13.773986 +epoch: 1, batch: 9690, sum loss: 4004.762451, avg loss: 2.515554, ppl: 12.373465 +epoch: 1, batch: 9691, sum loss: 5075.565918, avg loss: 2.764469, ppl: 15.870617 +epoch: 1, batch: 9692, sum loss: 4389.455078, avg loss: 2.649037, ppl: 14.140420 +epoch: 1, batch: 9693, sum loss: 4807.669434, avg loss: 2.848145, ppl: 17.255751 +epoch: 1, batch: 9694, sum loss: 5244.787598, avg loss: 2.949824, ppl: 19.102598 +epoch: 1, batch: 9695, sum loss: 4179.707031, avg loss: 2.746194, ppl: 15.583211 +epoch: 1, batch: 9696, sum loss: 4483.574219, avg loss: 2.638949, ppl: 13.998486 +epoch: 1, batch: 9697, sum loss: 4964.859375, avg loss: 2.672153, ppl: 14.471085 +epoch: 1, batch: 9698, sum loss: 5608.900879, avg loss: 3.142241, ppl: 23.155712 +epoch: 1, batch: 9699, sum loss: 4403.652832, avg loss: 2.500654, ppl: 12.190470 +epoch: 1, batch: 9700, sum loss: 4334.005859, avg loss: 2.715543, ppl: 15.112807 +epoch: 1, batch: 9701, sum loss: 4089.250732, avg loss: 2.612940, ppl: 13.639089 +epoch: 1, batch: 9702, sum loss: 4802.422852, avg loss: 2.772761, ppl: 16.002762 +epoch: 1, batch: 9703, sum loss: 4829.750488, avg loss: 2.824415, ppl: 16.851091 +epoch: 1, batch: 9704, sum loss: 4762.218750, avg loss: 2.783296, ppl: 16.172235 +epoch: 1, batch: 9705, sum loss: 4404.881348, avg loss: 2.595687, ppl: 13.405799 +epoch: 1, batch: 9706, sum loss: 4064.862061, avg loss: 2.487676, ppl: 12.033275 +epoch: 1, batch: 9707, sum loss: 4819.087891, avg loss: 2.744355, ppl: 15.554585 +epoch: 1, batch: 9708, sum loss: 4950.132324, avg loss: 2.983805, ppl: 19.762871 +epoch: 1, batch: 9709, sum loss: 4100.997070, avg loss: 2.579243, ppl: 13.187158 +epoch: 1, batch: 9710, sum loss: 4741.943848, avg loss: 2.887908, ppl: 17.955698 +epoch: 1, batch: 9711, sum loss: 4867.698242, avg loss: 2.778367, ppl: 16.092716 +epoch: 1, batch: 9712, sum loss: 5381.272949, avg loss: 2.934173, ppl: 18.805937 +epoch: 1, batch: 9713, sum loss: 4532.175293, avg loss: 2.834381, ppl: 17.019859 +epoch: 1, batch: 9714, sum loss: 4179.342773, avg loss: 2.640141, ppl: 14.015173 +epoch: 1, batch: 9715, sum loss: 4783.404297, avg loss: 2.717843, ppl: 15.147618 +epoch: 1, batch: 9716, sum loss: 4108.568359, avg loss: 2.719105, ppl: 15.166749 +epoch: 1, batch: 9717, sum loss: 5481.679688, avg loss: 2.980794, ppl: 19.703455 +epoch: 1, batch: 9718, sum loss: 5587.990234, avg loss: 3.055216, ppl: 21.225777 +epoch: 1, batch: 9719, sum loss: 4047.411621, avg loss: 2.441141, ppl: 11.486137 +epoch: 1, batch: 9720, sum loss: 5445.800293, avg loss: 2.918435, ppl: 18.512295 +epoch: 1, batch: 9721, sum loss: 4645.964355, avg loss: 3.010994, ppl: 20.307585 +epoch: 1, batch: 9722, sum loss: 4764.214844, avg loss: 2.741205, ppl: 15.505665 +epoch: 1, batch: 9723, sum loss: 5836.151855, avg loss: 2.833083, ppl: 16.997791 +epoch: 1, batch: 9724, sum loss: 4788.503906, avg loss: 2.744128, ppl: 15.551051 +epoch: 1, batch: 9725, sum loss: 4042.947266, avg loss: 2.491033, ppl: 12.073746 +epoch: 1, batch: 9726, sum loss: 4526.188965, avg loss: 2.584917, ppl: 13.262186 +epoch: 1, batch: 9727, sum loss: 4934.342773, avg loss: 2.937109, ppl: 18.861240 +epoch: 1, batch: 9728, sum loss: 4047.704834, avg loss: 2.586393, ppl: 13.281776 +epoch: 1, batch: 9729, sum loss: 5399.395020, avg loss: 2.816586, ppl: 16.719673 +epoch: 1, batch: 9730, sum loss: 4603.228516, avg loss: 2.657753, ppl: 14.264205 +epoch: 1, batch: 9731, sum loss: 5958.738281, avg loss: 2.829410, ppl: 16.935471 +epoch: 1, batch: 9732, sum loss: 5601.550293, avg loss: 3.041015, ppl: 20.926481 +epoch: 1, batch: 9733, sum loss: 3733.631836, avg loss: 2.526138, ppl: 12.505118 +epoch: 1, batch: 9734, sum loss: 5190.147461, avg loss: 2.920736, ppl: 18.554935 +epoch: 1, batch: 9735, sum loss: 3552.256836, avg loss: 2.570374, ppl: 13.070712 +epoch: 1, batch: 9736, sum loss: 4893.133789, avg loss: 2.724462, ppl: 15.248209 +epoch: 1, batch: 9737, sum loss: 5000.578125, avg loss: 2.717705, ppl: 15.145531 +epoch: 1, batch: 9738, sum loss: 5061.458496, avg loss: 2.785613, ppl: 16.209749 +epoch: 1, batch: 9739, sum loss: 4465.339355, avg loss: 2.736115, ppl: 15.426934 +epoch: 1, batch: 9740, sum loss: 4368.023438, avg loss: 2.546952, ppl: 12.768130 +epoch: 1, batch: 9741, sum loss: 5098.694824, avg loss: 2.729494, ppl: 15.325132 +epoch: 1, batch: 9742, sum loss: 6110.907227, avg loss: 3.034214, ppl: 20.784636 +epoch: 1, batch: 9743, sum loss: 4737.421875, avg loss: 2.893966, ppl: 18.064808 +epoch: 1, batch: 9744, sum loss: 3907.516113, avg loss: 2.569044, ppl: 13.053344 +epoch: 1, batch: 9745, sum loss: 4894.035645, avg loss: 2.681663, ppl: 14.609372 +epoch: 1, batch: 9746, sum loss: 4422.215820, avg loss: 2.644866, ppl: 14.081558 +epoch: 1, batch: 9747, sum loss: 4649.141602, avg loss: 2.528081, ppl: 12.529445 +epoch: 1, batch: 9748, sum loss: 4547.507812, avg loss: 2.718176, ppl: 15.152654 +epoch: 1, batch: 9749, sum loss: 3949.478516, avg loss: 2.476162, ppl: 11.895524 +epoch: 1, batch: 9750, sum loss: 5185.750000, avg loss: 2.895449, ppl: 18.091631 +epoch: 1, batch: 9751, sum loss: 4194.663086, avg loss: 2.586105, ppl: 13.277958 +epoch: 1, batch: 9752, sum loss: 4872.289062, avg loss: 2.840985, ppl: 17.132629 +epoch: 1, batch: 9753, sum loss: 3958.114990, avg loss: 2.576898, ppl: 13.156262 +epoch: 1, batch: 9754, sum loss: 4401.959961, avg loss: 2.658188, ppl: 14.270412 +epoch: 1, batch: 9755, sum loss: 4469.267090, avg loss: 2.950011, ppl: 19.106169 +epoch: 1, batch: 9756, sum loss: 4319.667969, avg loss: 2.713359, ppl: 15.079849 +epoch: 1, batch: 9757, sum loss: 4440.738281, avg loss: 2.699537, ppl: 14.872841 +epoch: 1, batch: 9758, sum loss: 5396.915039, avg loss: 3.123215, ppl: 22.719299 +epoch: 1, batch: 9759, sum loss: 4525.545410, avg loss: 2.810898, ppl: 16.624838 +epoch: 1, batch: 9760, sum loss: 4904.253418, avg loss: 3.031059, ppl: 20.719164 +epoch: 1, batch: 9761, sum loss: 4776.461914, avg loss: 2.738797, ppl: 15.468365 +epoch: 1, batch: 9762, sum loss: 3854.322021, avg loss: 2.776889, ppl: 16.068954 +epoch: 1, batch: 9763, sum loss: 5334.993164, avg loss: 3.022659, ppl: 20.545853 +epoch: 1, batch: 9764, sum loss: 5038.641602, avg loss: 3.057428, ppl: 21.272776 +epoch: 1, batch: 9765, sum loss: 4239.515625, avg loss: 2.605726, ppl: 13.541046 +epoch: 1, batch: 9766, sum loss: 4936.841797, avg loss: 2.840530, ppl: 17.124846 +epoch: 1, batch: 9767, sum loss: 4662.071777, avg loss: 2.756991, ppl: 15.752375 +epoch: 1, batch: 9768, sum loss: 4655.154297, avg loss: 2.679997, ppl: 14.585046 +epoch: 1, batch: 9769, sum loss: 3894.525879, avg loss: 2.480590, ppl: 11.948310 +epoch: 1, batch: 9770, sum loss: 4082.913818, avg loss: 2.747587, ppl: 15.604927 +epoch: 1, batch: 9771, sum loss: 4684.289551, avg loss: 2.669111, ppl: 14.427135 +epoch: 1, batch: 9772, sum loss: 4660.113281, avg loss: 2.919870, ppl: 18.538885 +epoch: 1, batch: 9773, sum loss: 4956.155273, avg loss: 2.886520, ppl: 17.930809 +epoch: 1, batch: 9774, sum loss: 3704.573730, avg loss: 2.416552, ppl: 11.207149 +epoch: 1, batch: 9775, sum loss: 5052.532227, avg loss: 2.925612, ppl: 18.645638 +epoch: 1, batch: 9776, sum loss: 4714.617676, avg loss: 2.656122, ppl: 14.240962 +epoch: 1, batch: 9777, sum loss: 4747.826172, avg loss: 2.774884, ppl: 16.036766 +epoch: 1, batch: 9778, sum loss: 4025.188477, avg loss: 2.692434, ppl: 14.767570 +epoch: 1, batch: 9779, sum loss: 4968.743164, avg loss: 2.897226, ppl: 18.123802 +epoch: 1, batch: 9780, sum loss: 4249.629395, avg loss: 2.563106, ppl: 12.976056 +epoch: 1, batch: 9781, sum loss: 5195.284180, avg loss: 2.951866, ppl: 19.141638 +epoch: 1, batch: 9782, sum loss: 5312.985352, avg loss: 2.882792, ppl: 17.864077 +epoch: 1, batch: 9783, sum loss: 4686.661133, avg loss: 2.819892, ppl: 16.775045 +epoch: 1, batch: 9784, sum loss: 4755.110352, avg loss: 2.591341, ppl: 13.347659 +epoch: 1, batch: 9785, sum loss: 4422.087402, avg loss: 2.800562, ppl: 16.453890 +epoch: 1, batch: 9786, sum loss: 5399.409668, avg loss: 2.892025, ppl: 18.029774 +epoch: 1, batch: 9787, sum loss: 4764.444336, avg loss: 2.755607, ppl: 15.730585 +epoch: 1, batch: 9788, sum loss: 4930.329590, avg loss: 2.888301, ppl: 17.962763 +epoch: 1, batch: 9789, sum loss: 3603.148438, avg loss: 2.592193, ppl: 13.359037 +epoch: 1, batch: 9790, sum loss: 5272.341309, avg loss: 2.909681, ppl: 18.350937 +epoch: 1, batch: 9791, sum loss: 4467.877930, avg loss: 2.720998, ppl: 15.195474 +epoch: 1, batch: 9792, sum loss: 3746.042480, avg loss: 2.532821, ppl: 12.588972 +epoch: 1, batch: 9793, sum loss: 4772.779297, avg loss: 2.632531, ppl: 13.908935 +epoch: 1, batch: 9794, sum loss: 5102.958496, avg loss: 2.734704, ppl: 15.405187 +epoch: 1, batch: 9795, sum loss: 4204.412109, avg loss: 2.587331, ppl: 13.294236 +epoch: 1, batch: 9796, sum loss: 4569.570312, avg loss: 2.534426, ppl: 12.609194 +epoch: 1, batch: 9797, sum loss: 4195.471191, avg loss: 2.599425, ppl: 13.455994 +epoch: 1, batch: 9798, sum loss: 4170.462402, avg loss: 2.608169, ppl: 13.574175 +epoch: 1, batch: 9799, sum loss: 4148.958008, avg loss: 2.550066, ppl: 12.807952 +epoch: 1, batch: 9800, sum loss: 5546.168457, avg loss: 2.767549, ppl: 15.919568 +epoch: 1, batch: 9801, sum loss: 5973.786133, avg loss: 2.938409, ppl: 18.885777 +epoch: 1, batch: 9802, sum loss: 4260.278320, avg loss: 2.537390, ppl: 12.646623 +epoch: 1, batch: 9803, sum loss: 3793.674316, avg loss: 2.499127, ppl: 12.171860 +epoch: 1, batch: 9804, sum loss: 4137.107422, avg loss: 2.748909, ppl: 15.625571 +epoch: 1, batch: 9805, sum loss: 4267.439453, avg loss: 2.835508, ppl: 17.039055 +epoch: 1, batch: 9806, sum loss: 5245.504883, avg loss: 2.639912, ppl: 14.011966 +epoch: 1, batch: 9807, sum loss: 4207.955078, avg loss: 2.702605, ppl: 14.918537 +epoch: 1, batch: 9808, sum loss: 4969.010742, avg loss: 2.688859, ppl: 14.714869 +epoch: 1, batch: 9809, sum loss: 4797.140625, avg loss: 3.063308, ppl: 21.398226 +epoch: 1, batch: 9810, sum loss: 4561.738281, avg loss: 2.817627, ppl: 16.737089 +epoch: 1, batch: 9811, sum loss: 4498.628906, avg loss: 2.665065, ppl: 14.368877 +epoch: 1, batch: 9812, sum loss: 4173.319336, avg loss: 2.683807, ppl: 14.640723 +epoch: 1, batch: 9813, sum loss: 4656.793945, avg loss: 2.851680, ppl: 17.316854 +epoch: 1, batch: 9814, sum loss: 4348.072266, avg loss: 2.652881, ppl: 14.194878 +epoch: 1, batch: 9815, sum loss: 3771.076660, avg loss: 2.750603, ppl: 15.652066 +epoch: 1, batch: 9816, sum loss: 4407.916992, avg loss: 2.862284, ppl: 17.501450 +epoch: 1, batch: 9817, sum loss: 4832.309570, avg loss: 2.687603, ppl: 14.696400 +epoch: 1, batch: 9818, sum loss: 4331.410645, avg loss: 2.485032, ppl: 12.001502 +epoch: 1, batch: 9819, sum loss: 4112.729980, avg loss: 2.698642, ppl: 14.859535 +epoch: 1, batch: 9820, sum loss: 4961.843750, avg loss: 2.753520, ppl: 15.697799 +epoch: 1, batch: 9821, sum loss: 5697.350586, avg loss: 3.115009, ppl: 22.533625 +epoch: 1, batch: 9822, sum loss: 4307.023926, avg loss: 2.615072, ppl: 13.668200 +epoch: 1, batch: 9823, sum loss: 4506.712891, avg loss: 2.641684, ppl: 14.036819 +epoch: 1, batch: 9824, sum loss: 4297.953125, avg loss: 2.801795, ppl: 16.474188 +epoch: 1, batch: 9825, sum loss: 4178.865723, avg loss: 2.487420, ppl: 12.030199 +epoch: 1, batch: 9826, sum loss: 4690.319336, avg loss: 2.775337, ppl: 16.044033 +epoch: 1, batch: 9827, sum loss: 3857.057129, avg loss: 2.665554, ppl: 14.375916 +epoch: 1, batch: 9828, sum loss: 4566.825684, avg loss: 2.567075, ppl: 13.027657 +epoch: 1, batch: 9829, sum loss: 4538.661133, avg loss: 2.805106, ppl: 16.528822 +epoch: 1, batch: 9830, sum loss: 4328.736816, avg loss: 2.665478, ppl: 14.374823 +epoch: 1, batch: 9831, sum loss: 3957.664551, avg loss: 2.688631, ppl: 14.711519 +epoch: 1, batch: 9832, sum loss: 5527.477539, avg loss: 3.007333, ppl: 20.233356 +epoch: 1, batch: 9833, sum loss: 5153.183594, avg loss: 2.948046, ppl: 19.068651 +epoch: 1, batch: 9834, sum loss: 4056.174316, avg loss: 2.538282, ppl: 12.657905 +epoch: 1, batch: 9835, sum loss: 4954.663574, avg loss: 2.921382, ppl: 18.566929 +epoch: 1, batch: 9836, sum loss: 4535.350586, avg loss: 2.720666, ppl: 15.190438 +epoch: 1, batch: 9837, sum loss: 4666.745117, avg loss: 2.727496, ppl: 15.294540 +epoch: 1, batch: 9838, sum loss: 4806.122070, avg loss: 2.732304, ppl: 15.368249 +epoch: 1, batch: 9839, sum loss: 4414.434082, avg loss: 2.728328, ppl: 15.307264 +epoch: 1, batch: 9840, sum loss: 3995.724121, avg loss: 2.468020, ppl: 11.799061 +epoch: 1, batch: 9841, sum loss: 4208.120605, avg loss: 2.644953, ppl: 14.082787 +epoch: 1, batch: 9842, sum loss: 3921.007812, avg loss: 2.608788, ppl: 13.582573 +epoch: 1, batch: 9843, sum loss: 5329.421875, avg loss: 3.131270, ppl: 22.903051 +epoch: 1, batch: 9844, sum loss: 4757.266113, avg loss: 2.672621, ppl: 14.477873 +epoch: 1, batch: 9845, sum loss: 4002.989746, avg loss: 2.546431, ppl: 12.761477 +epoch: 1, batch: 9846, sum loss: 4747.911621, avg loss: 3.177986, ppl: 23.998381 +epoch: 1, batch: 9847, sum loss: 3992.244385, avg loss: 2.578969, ppl: 13.183542 +epoch: 1, batch: 9848, sum loss: 5026.337891, avg loss: 2.817454, ppl: 16.734192 +epoch: 1, batch: 9849, sum loss: 4512.232910, avg loss: 2.700319, ppl: 14.884480 +epoch: 1, batch: 9850, sum loss: 5072.479004, avg loss: 2.942273, ppl: 18.958899 +epoch: 1, batch: 9851, sum loss: 4519.620117, avg loss: 2.904640, ppl: 18.258673 +epoch: 1, batch: 9852, sum loss: 4558.748535, avg loss: 2.791640, ppl: 16.307747 +epoch: 1, batch: 9853, sum loss: 4829.431152, avg loss: 2.681527, ppl: 14.607388 +epoch: 1, batch: 9854, sum loss: 5753.873047, avg loss: 3.095144, ppl: 22.090420 +epoch: 1, batch: 9855, sum loss: 4127.204590, avg loss: 2.577892, ppl: 13.169346 +epoch: 1, batch: 9856, sum loss: 6199.792969, avg loss: 3.256194, ppl: 25.950579 +epoch: 1, batch: 9857, sum loss: 4810.106934, avg loss: 2.759671, ppl: 15.794649 +epoch: 1, batch: 9858, sum loss: 4423.688477, avg loss: 2.700664, ppl: 14.889619 +epoch: 1, batch: 9859, sum loss: 4892.288574, avg loss: 2.795594, ppl: 16.372343 +epoch: 1, batch: 9860, sum loss: 3859.980713, avg loss: 2.356520, ppl: 10.554163 +epoch: 1, batch: 9861, sum loss: 4421.281738, avg loss: 2.660218, ppl: 14.299402 +epoch: 1, batch: 9862, sum loss: 3914.782959, avg loss: 2.555341, ppl: 12.875696 +epoch: 1, batch: 9863, sum loss: 3437.208740, avg loss: 2.390270, ppl: 10.916446 +epoch: 1, batch: 9864, sum loss: 4740.620605, avg loss: 2.949982, ppl: 19.105604 +epoch: 1, batch: 9865, sum loss: 4852.482422, avg loss: 2.746170, ppl: 15.582836 +epoch: 1, batch: 9866, sum loss: 4597.803223, avg loss: 2.880829, ppl: 17.829048 +epoch: 1, batch: 9867, sum loss: 4919.695312, avg loss: 2.911062, ppl: 18.376308 +epoch: 1, batch: 9868, sum loss: 5142.874023, avg loss: 2.766473, ppl: 15.902452 +epoch: 1, batch: 9869, sum loss: 4614.990234, avg loss: 2.815735, ppl: 16.705456 +epoch: 1, batch: 9870, sum loss: 5316.699707, avg loss: 2.937403, ppl: 18.866795 +epoch: 1, batch: 9871, sum loss: 4212.020020, avg loss: 2.782048, ppl: 16.152063 +epoch: 1, batch: 9872, sum loss: 3916.324707, avg loss: 2.478687, ppl: 11.925591 +epoch: 1, batch: 9873, sum loss: 4691.579590, avg loss: 2.679372, ppl: 14.575931 +epoch: 1, batch: 9874, sum loss: 5692.623047, avg loss: 2.877969, ppl: 17.778130 +epoch: 1, batch: 9875, sum loss: 4795.669922, avg loss: 2.947554, ppl: 19.059271 +epoch: 1, batch: 9876, sum loss: 2901.245361, avg loss: 2.216383, ppl: 9.174088 +epoch: 1, batch: 9877, sum loss: 4265.354980, avg loss: 2.537391, ppl: 12.646638 +epoch: 1, batch: 9878, sum loss: 4858.684082, avg loss: 2.712833, ppl: 15.071913 +epoch: 1, batch: 9879, sum loss: 4006.084717, avg loss: 2.719677, ppl: 15.175426 +epoch: 1, batch: 9880, sum loss: 4979.125000, avg loss: 2.868160, ppl: 17.604588 +epoch: 1, batch: 9881, sum loss: 4824.026367, avg loss: 2.772429, ppl: 15.997444 +epoch: 1, batch: 9882, sum loss: 4751.969238, avg loss: 2.830238, ppl: 16.949488 +epoch: 1, batch: 9883, sum loss: 4775.828613, avg loss: 2.796152, ppl: 16.381495 +epoch: 1, batch: 9884, sum loss: 4265.564453, avg loss: 2.563440, ppl: 12.980397 +epoch: 1, batch: 9885, sum loss: 3854.030762, avg loss: 2.595307, ppl: 13.400699 +epoch: 1, batch: 9886, sum loss: 4840.459961, avg loss: 2.737817, ppl: 15.453211 +epoch: 1, batch: 9887, sum loss: 3723.157227, avg loss: 2.522464, ppl: 12.459262 +epoch: 1, batch: 9888, sum loss: 4853.403320, avg loss: 2.576117, ppl: 13.145988 +epoch: 1, batch: 9889, sum loss: 5388.819336, avg loss: 2.833239, ppl: 17.000429 +epoch: 1, batch: 9890, sum loss: 5059.287109, avg loss: 2.676872, ppl: 14.539536 +epoch: 1, batch: 9891, sum loss: 4878.907227, avg loss: 2.670447, ppl: 14.446430 +epoch: 1, batch: 9892, sum loss: 5319.739746, avg loss: 2.900621, ppl: 18.185442 +epoch: 1, batch: 9893, sum loss: 3730.250000, avg loss: 2.428548, ppl: 11.342404 +epoch: 1, batch: 9894, sum loss: 4689.326172, avg loss: 2.927170, ppl: 18.674702 +epoch: 1, batch: 9895, sum loss: 5199.749512, avg loss: 2.914658, ppl: 18.442501 +epoch: 1, batch: 9896, sum loss: 4673.222168, avg loss: 2.920764, ppl: 18.555454 +epoch: 1, batch: 9897, sum loss: 5067.555664, avg loss: 3.069386, ppl: 21.528675 +epoch: 1, batch: 9898, sum loss: 4627.918945, avg loss: 2.706385, ppl: 14.975048 +epoch: 1, batch: 9899, sum loss: 5163.399902, avg loss: 3.195173, ppl: 24.414404 +epoch: 1, batch: 9900, sum loss: 4548.777832, avg loss: 2.456143, ppl: 11.659758 +epoch: 1, batch: 9901, sum loss: 4249.630859, avg loss: 2.621611, ppl: 13.757865 +epoch: 1, batch: 9902, sum loss: 4938.931641, avg loss: 2.674029, ppl: 14.498270 +epoch: 1, batch: 9903, sum loss: 3932.798340, avg loss: 2.662694, ppl: 14.334851 +epoch: 1, batch: 9904, sum loss: 5024.406738, avg loss: 2.529913, ppl: 12.552410 +epoch: 1, batch: 9905, sum loss: 3923.732422, avg loss: 2.633378, ppl: 13.920709 +epoch: 1, batch: 9906, sum loss: 4315.563965, avg loss: 2.626637, ppl: 13.827184 +epoch: 1, batch: 9907, sum loss: 4944.895508, avg loss: 2.832128, ppl: 16.981560 +epoch: 1, batch: 9908, sum loss: 4761.418945, avg loss: 2.734876, ppl: 15.407831 +epoch: 1, batch: 9909, sum loss: 4208.107910, avg loss: 2.494433, ppl: 12.114859 +epoch: 1, batch: 9910, sum loss: 5266.727051, avg loss: 2.862351, ppl: 17.502634 +epoch: 1, batch: 9911, sum loss: 5296.541016, avg loss: 3.028325, ppl: 20.662600 +epoch: 1, batch: 9912, sum loss: 4654.469727, avg loss: 2.843292, ppl: 17.172211 +epoch: 1, batch: 9913, sum loss: 4627.374512, avg loss: 2.899358, ppl: 18.162474 +epoch: 1, batch: 9914, sum loss: 4296.796875, avg loss: 2.594684, ppl: 13.392356 +epoch: 1, batch: 9915, sum loss: 3864.709961, avg loss: 2.436765, ppl: 11.435990 +epoch: 1, batch: 9916, sum loss: 3255.118652, avg loss: 2.265218, ppl: 9.633227 +epoch: 1, batch: 9917, sum loss: 4173.662109, avg loss: 2.611804, ppl: 13.623599 +epoch: 1, batch: 9918, sum loss: 4346.254883, avg loss: 2.692847, ppl: 14.773677 +epoch: 1, batch: 9919, sum loss: 5104.275879, avg loss: 2.957286, ppl: 19.245670 +epoch: 1, batch: 9920, sum loss: 4191.442383, avg loss: 2.566713, ppl: 13.022945 +epoch: 1, batch: 9921, sum loss: 5062.394043, avg loss: 2.886199, ppl: 17.925055 +epoch: 1, batch: 9922, sum loss: 5039.943848, avg loss: 2.870128, ppl: 17.639271 +epoch: 1, batch: 9923, sum loss: 5022.153809, avg loss: 2.711746, ppl: 15.055539 +epoch: 1, batch: 9924, sum loss: 5305.031738, avg loss: 2.764477, ppl: 15.870738 +epoch: 1, batch: 9925, sum loss: 5375.051758, avg loss: 2.875897, ppl: 17.741329 +epoch: 1, batch: 9926, sum loss: 4591.231445, avg loss: 2.650826, ppl: 14.165741 +epoch: 1, batch: 9927, sum loss: 4071.249512, avg loss: 2.479446, ppl: 11.934648 +epoch: 1, batch: 9928, sum loss: 4962.342773, avg loss: 2.886761, ppl: 17.935131 +epoch: 1, batch: 9929, sum loss: 4290.859375, avg loss: 2.629203, ppl: 13.862718 +epoch: 1, batch: 9930, sum loss: 4978.006836, avg loss: 2.660613, ppl: 14.305057 +epoch: 1, batch: 9931, sum loss: 4721.643555, avg loss: 2.805492, ppl: 16.535212 +epoch: 1, batch: 9932, sum loss: 4941.613281, avg loss: 2.645403, ppl: 14.089124 +epoch: 1, batch: 9933, sum loss: 4827.439941, avg loss: 2.768027, ppl: 15.927183 +epoch: 1, batch: 9934, sum loss: 4081.944580, avg loss: 2.613281, ppl: 13.643740 +epoch: 1, batch: 9935, sum loss: 5243.919922, avg loss: 2.892400, ppl: 18.036537 +epoch: 1, batch: 9936, sum loss: 3983.233887, avg loss: 2.630934, ppl: 13.886734 +epoch: 1, batch: 9937, sum loss: 4307.605957, avg loss: 2.956490, ppl: 19.230356 +epoch: 1, batch: 9938, sum loss: 5259.039062, avg loss: 2.881665, ppl: 17.843964 +epoch: 1, batch: 9939, sum loss: 4225.319336, avg loss: 2.562352, ppl: 12.966284 +epoch: 1, batch: 9940, sum loss: 4988.683105, avg loss: 2.834479, ppl: 17.021528 +epoch: 1, batch: 9941, sum loss: 5425.726074, avg loss: 2.846656, ppl: 17.230061 +epoch: 1, batch: 9942, sum loss: 4504.839844, avg loss: 2.619093, ppl: 13.723270 +epoch: 1, batch: 9943, sum loss: 4873.132324, avg loss: 2.962390, ppl: 19.344158 +epoch: 1, batch: 9944, sum loss: 4678.100586, avg loss: 2.665584, ppl: 14.376348 +epoch: 1, batch: 9945, sum loss: 3938.291748, avg loss: 2.532664, ppl: 12.586988 +epoch: 1, batch: 9946, sum loss: 3596.534180, avg loss: 2.373950, ppl: 10.739731 +epoch: 1, batch: 9947, sum loss: 4783.139648, avg loss: 2.993204, ppl: 19.949495 +epoch: 1, batch: 9948, sum loss: 5155.957520, avg loss: 2.899864, ppl: 18.171669 +epoch: 1, batch: 9949, sum loss: 4821.060059, avg loss: 2.796439, ppl: 16.386187 +epoch: 1, batch: 9950, sum loss: 3497.717529, avg loss: 2.440836, ppl: 11.482633 +epoch: 1, batch: 9951, sum loss: 4675.102051, avg loss: 2.700810, ppl: 14.891788 +epoch: 1, batch: 9952, sum loss: 5373.295898, avg loss: 2.960494, ppl: 19.307503 +epoch: 1, batch: 9953, sum loss: 4941.499023, avg loss: 2.810864, ppl: 16.624275 +epoch: 1, batch: 9954, sum loss: 5118.462891, avg loss: 2.903269, ppl: 18.233650 +epoch: 1, batch: 9955, sum loss: 4355.839844, avg loss: 2.657620, ppl: 14.262307 +epoch: 1, batch: 9956, sum loss: 3894.053711, avg loss: 2.746159, ppl: 15.582668 +epoch: 1, batch: 9957, sum loss: 4143.530273, avg loss: 2.797792, ppl: 16.408381 +epoch: 1, batch: 9958, sum loss: 5590.721191, avg loss: 2.927079, ppl: 18.673010 +epoch: 1, batch: 9959, sum loss: 4273.342285, avg loss: 2.765918, ppl: 15.893617 +epoch: 1, batch: 9960, sum loss: 5549.513184, avg loss: 3.124726, ppl: 22.753660 +epoch: 1, batch: 9961, sum loss: 4318.164062, avg loss: 2.724394, ppl: 15.247166 +epoch: 1, batch: 9962, sum loss: 4636.164551, avg loss: 2.690751, ppl: 14.742748 +epoch: 1, batch: 9963, sum loss: 4962.507324, avg loss: 2.876816, ppl: 17.757643 +epoch: 1, batch: 9964, sum loss: 4517.403809, avg loss: 2.718053, ppl: 15.150793 +epoch: 1, batch: 9965, sum loss: 4660.388672, avg loss: 2.606481, ppl: 13.551284 +epoch: 1, batch: 9966, sum loss: 5646.853027, avg loss: 2.892855, ppl: 18.044758 +epoch: 1, batch: 9967, sum loss: 4737.624512, avg loss: 2.808313, ppl: 16.581926 +epoch: 1, batch: 9968, sum loss: 5044.951172, avg loss: 2.662243, ppl: 14.328397 +epoch: 1, batch: 9969, sum loss: 3458.874023, avg loss: 2.500993, ppl: 12.194603 +epoch: 1, batch: 9970, sum loss: 3868.382812, avg loss: 2.757222, ppl: 15.756019 +epoch: 1, batch: 9971, sum loss: 5023.444824, avg loss: 2.863994, ppl: 17.531401 +epoch: 1, batch: 9972, sum loss: 4793.872559, avg loss: 2.843341, ppl: 17.173042 +epoch: 1, batch: 9973, sum loss: 4290.287598, avg loss: 2.581401, ppl: 13.215635 +epoch: 1, batch: 9974, sum loss: 3992.376221, avg loss: 2.619669, ppl: 13.731184 +epoch: 1, batch: 9975, sum loss: 4533.603027, avg loss: 2.812409, ppl: 16.649975 +epoch: 1, batch: 9976, sum loss: 5114.326660, avg loss: 2.939268, ppl: 18.902006 +epoch: 1, batch: 9977, sum loss: 4864.538086, avg loss: 2.558936, ppl: 12.922066 +epoch: 1, batch: 9978, sum loss: 4800.821289, avg loss: 2.768640, ppl: 15.936938 +epoch: 1, batch: 9979, sum loss: 5416.618164, avg loss: 2.818220, ppl: 16.747013 +epoch: 1, batch: 9980, sum loss: 4970.919922, avg loss: 2.957121, ppl: 19.242487 +epoch: 1, batch: 9981, sum loss: 4580.969238, avg loss: 2.751333, ppl: 15.663497 +epoch: 1, batch: 9982, sum loss: 4586.949707, avg loss: 2.746676, ppl: 15.590729 +epoch: 1, batch: 9983, sum loss: 4396.843262, avg loss: 2.554819, ppl: 12.868968 +epoch: 1, batch: 9984, sum loss: 5130.099609, avg loss: 2.904926, ppl: 18.263893 +epoch: 1, batch: 9985, sum loss: 5484.041016, avg loss: 2.957951, ppl: 19.258467 +epoch: 1, batch: 9986, sum loss: 4402.475098, avg loss: 2.640957, ppl: 14.026619 +epoch: 1, batch: 9987, sum loss: 4317.969238, avg loss: 2.653945, ppl: 14.209993 +epoch: 1, batch: 9988, sum loss: 4628.829590, avg loss: 2.747080, ppl: 15.597019 +epoch: 1, batch: 9989, sum loss: 5565.071289, avg loss: 2.719976, ppl: 15.179961 +epoch: 1, batch: 9990, sum loss: 5767.981934, avg loss: 3.079542, ppl: 21.748438 +epoch: 1, batch: 9991, sum loss: 3581.949707, avg loss: 2.453390, ppl: 11.627702 +epoch: 1, batch: 9992, sum loss: 4128.936523, avg loss: 2.829977, ppl: 16.945072 +epoch: 1, batch: 9993, sum loss: 4523.594727, avg loss: 2.662504, ppl: 14.332138 +epoch: 1, batch: 9994, sum loss: 5160.584473, avg loss: 2.750845, ppl: 15.655851 +epoch: 1, batch: 9995, sum loss: 4516.385254, avg loss: 2.820978, ppl: 16.793261 +epoch: 1, batch: 9996, sum loss: 4415.391113, avg loss: 2.523081, ppl: 12.466943 +epoch: 1, batch: 9997, sum loss: 5221.355957, avg loss: 2.772892, ppl: 16.004852 +epoch: 1, batch: 9998, sum loss: 4703.849121, avg loss: 2.862963, ppl: 17.513350 +epoch: 1, batch: 9999, sum loss: 4485.364258, avg loss: 2.676232, ppl: 14.530238 +epoch: 1, batch: 10000, sum loss: 4837.749023, avg loss: 2.817559, ppl: 16.735952 +epoch: 1, batch: 10001, sum loss: 5810.288086, avg loss: 2.800139, ppl: 16.446928 +epoch: 1, batch: 10002, sum loss: 4443.564453, avg loss: 2.780704, ppl: 16.130369 +epoch: 1, batch: 10003, sum loss: 4896.333984, avg loss: 2.796307, ppl: 16.384033 +epoch: 1, batch: 10004, sum loss: 4087.997559, avg loss: 2.558196, ppl: 12.912506 +epoch: 1, batch: 10005, sum loss: 4187.032227, avg loss: 2.587783, ppl: 13.300247 +epoch: 1, batch: 10006, sum loss: 4251.081055, avg loss: 2.551669, ppl: 12.828498 +epoch: 1, batch: 10007, sum loss: 4208.732910, avg loss: 2.583630, ppl: 13.245129 +epoch: 1, batch: 10008, sum loss: 3287.350830, avg loss: 2.440498, ppl: 11.478757 +epoch: 1, batch: 10009, sum loss: 5262.774414, avg loss: 2.707189, ppl: 14.987082 +epoch: 1, batch: 10010, sum loss: 4725.560547, avg loss: 2.768342, ppl: 15.932204 +epoch: 1, batch: 10011, sum loss: 4427.913574, avg loss: 2.779607, ppl: 16.112684 +epoch: 1, batch: 10012, sum loss: 3613.499268, avg loss: 2.340349, ppl: 10.384865 +epoch: 1, batch: 10013, sum loss: 4438.369141, avg loss: 2.587970, ppl: 13.302743 +epoch: 1, batch: 10014, sum loss: 4674.254395, avg loss: 2.785610, ppl: 16.209709 +epoch: 1, batch: 10015, sum loss: 5894.427246, avg loss: 3.032113, ppl: 20.741014 +epoch: 1, batch: 10016, sum loss: 4827.174316, avg loss: 2.752095, ppl: 15.675433 +epoch: 1, batch: 10017, sum loss: 4437.982422, avg loss: 2.709391, ppl: 15.020123 +epoch: 1, batch: 10018, sum loss: 4305.472656, avg loss: 2.823261, ppl: 16.831646 +epoch: 1, batch: 10019, sum loss: 5274.079590, avg loss: 2.802380, ppl: 16.483833 +epoch: 1, batch: 10020, sum loss: 4178.665039, avg loss: 2.569905, ppl: 13.064577 +epoch: 1, batch: 10021, sum loss: 4687.828125, avg loss: 2.666569, ppl: 14.390507 +epoch: 1, batch: 10022, sum loss: 4653.455078, avg loss: 2.648523, ppl: 14.133150 +epoch: 1, batch: 10023, sum loss: 5329.289551, avg loss: 2.855996, ppl: 17.391758 +epoch: 1, batch: 10024, sum loss: 4316.491699, avg loss: 2.793846, ppl: 16.343756 +epoch: 1, batch: 10025, sum loss: 4163.952148, avg loss: 2.540544, ppl: 12.686574 +epoch: 1, batch: 10026, sum loss: 4695.734375, avg loss: 2.886131, ppl: 17.923820 +epoch: 1, batch: 10027, sum loss: 3955.175293, avg loss: 2.685116, ppl: 14.659899 +epoch: 1, batch: 10028, sum loss: 4688.189453, avg loss: 2.841327, ppl: 17.138493 +epoch: 1, batch: 10029, sum loss: 5211.528809, avg loss: 3.022929, ppl: 20.551394 +epoch: 1, batch: 10030, sum loss: 3666.495361, avg loss: 2.447594, ppl: 11.560495 +epoch: 1, batch: 10031, sum loss: 5647.422852, avg loss: 3.013566, ppl: 20.359875 +epoch: 1, batch: 10032, sum loss: 5080.206543, avg loss: 2.765491, ppl: 15.886839 +epoch: 1, batch: 10033, sum loss: 4577.644043, avg loss: 2.580408, ppl: 13.202525 +epoch: 1, batch: 10034, sum loss: 4361.475586, avg loss: 2.627395, ppl: 13.837674 +epoch: 1, batch: 10035, sum loss: 4101.730957, avg loss: 2.714580, ppl: 15.098272 +epoch: 1, batch: 10036, sum loss: 5046.115723, avg loss: 2.686963, ppl: 14.686997 +epoch: 1, batch: 10037, sum loss: 3782.857910, avg loss: 2.466009, ppl: 11.775359 +epoch: 1, batch: 10038, sum loss: 4541.437988, avg loss: 2.779338, ppl: 16.108353 +epoch: 1, batch: 10039, sum loss: 4447.894043, avg loss: 2.847564, ppl: 17.245712 +epoch: 1, batch: 10040, sum loss: 4707.548340, avg loss: 2.822271, ppl: 16.815001 +epoch: 1, batch: 10041, sum loss: 4749.134766, avg loss: 2.857482, ppl: 17.417614 +epoch: 1, batch: 10042, sum loss: 5123.698242, avg loss: 3.107155, ppl: 22.357344 +epoch: 1, batch: 10043, sum loss: 4338.458008, avg loss: 2.585493, ppl: 13.269834 +epoch: 1, batch: 10044, sum loss: 5318.195312, avg loss: 2.888754, ppl: 17.970898 +epoch: 1, batch: 10045, sum loss: 5277.958984, avg loss: 2.990345, ppl: 19.892544 +epoch: 1, batch: 10046, sum loss: 5002.079102, avg loss: 2.944131, ppl: 18.994156 +epoch: 1, batch: 10047, sum loss: 3389.198975, avg loss: 2.523603, ppl: 12.473460 +epoch: 1, batch: 10048, sum loss: 4758.627930, avg loss: 2.665898, ppl: 14.380859 +epoch: 1, batch: 10049, sum loss: 4627.363281, avg loss: 2.738085, ppl: 15.457353 +epoch: 1, batch: 10050, sum loss: 5115.982422, avg loss: 2.856495, ppl: 17.400431 +epoch: 1, batch: 10051, sum loss: 5145.559570, avg loss: 2.853888, ppl: 17.355124 +epoch: 1, batch: 10052, sum loss: 4331.529785, avg loss: 2.719102, ppl: 15.166698 +epoch: 1, batch: 10053, sum loss: 4574.648926, avg loss: 2.820375, ppl: 16.783150 +epoch: 1, batch: 10054, sum loss: 3705.511719, avg loss: 2.425073, ppl: 11.303056 +epoch: 1, batch: 10055, sum loss: 5422.773438, avg loss: 2.892146, ppl: 18.031963 +epoch: 1, batch: 10056, sum loss: 4977.099609, avg loss: 3.057186, ppl: 21.267633 +epoch: 1, batch: 10057, sum loss: 4247.156738, avg loss: 2.796021, ppl: 16.379351 +epoch: 1, batch: 10058, sum loss: 3611.800781, avg loss: 2.769786, ppl: 15.955217 +epoch: 1, batch: 10059, sum loss: 3964.878906, avg loss: 2.566265, ppl: 13.017113 +epoch: 1, batch: 10060, sum loss: 4550.422363, avg loss: 2.698946, ppl: 14.864053 +epoch: 1, batch: 10061, sum loss: 4491.895996, avg loss: 2.675340, ppl: 14.517284 +epoch: 1, batch: 10062, sum loss: 4449.937500, avg loss: 2.769096, ppl: 15.944216 +epoch: 1, batch: 10063, sum loss: 4296.673340, avg loss: 2.797313, ppl: 16.400526 +epoch: 1, batch: 10064, sum loss: 5084.511719, avg loss: 2.732140, ppl: 15.365728 +epoch: 1, batch: 10065, sum loss: 4732.782227, avg loss: 2.764476, ppl: 15.870715 +epoch: 1, batch: 10066, sum loss: 5038.665039, avg loss: 2.986761, ppl: 19.821369 +epoch: 1, batch: 10067, sum loss: 4334.471680, avg loss: 2.623772, ppl: 13.787638 +epoch: 1, batch: 10068, sum loss: 4304.691406, avg loss: 2.710763, ppl: 15.040747 +epoch: 1, batch: 10069, sum loss: 4188.720215, avg loss: 2.900776, ppl: 18.188248 +epoch: 1, batch: 10070, sum loss: 5249.759277, avg loss: 3.031039, ppl: 20.718742 +epoch: 1, batch: 10071, sum loss: 4662.332520, avg loss: 2.701236, ppl: 14.898128 +epoch: 1, batch: 10072, sum loss: 3202.237549, avg loss: 2.389729, ppl: 10.910542 +epoch: 1, batch: 10073, sum loss: 4334.654297, avg loss: 2.842396, ppl: 17.156828 +epoch: 1, batch: 10074, sum loss: 4874.837891, avg loss: 2.743296, ppl: 15.538120 +epoch: 1, batch: 10075, sum loss: 4546.515137, avg loss: 2.785855, ppl: 16.213676 +epoch: 1, batch: 10076, sum loss: 5152.734863, avg loss: 2.864222, ppl: 17.535398 +epoch: 1, batch: 10077, sum loss: 4396.077148, avg loss: 2.449068, ppl: 11.577552 +epoch: 1, batch: 10078, sum loss: 4732.542969, avg loss: 2.793709, ppl: 16.341515 +epoch: 1, batch: 10079, sum loss: 4649.897461, avg loss: 2.897132, ppl: 18.122105 +epoch: 1, batch: 10080, sum loss: 5644.238281, avg loss: 2.875313, ppl: 17.730965 +epoch: 1, batch: 10081, sum loss: 4848.444336, avg loss: 2.797717, ppl: 16.407152 +epoch: 1, batch: 10082, sum loss: 4863.603027, avg loss: 2.699003, ppl: 14.864903 +epoch: 1, batch: 10083, sum loss: 4265.158203, avg loss: 2.449832, ppl: 11.586405 +epoch: 1, batch: 10084, sum loss: 5310.998047, avg loss: 2.821997, ppl: 16.810383 +epoch: 1, batch: 10085, sum loss: 3734.588867, avg loss: 2.444103, ppl: 11.520208 +epoch: 1, batch: 10086, sum loss: 4021.221680, avg loss: 2.459463, ppl: 11.698524 +epoch: 1, batch: 10087, sum loss: 3807.352539, avg loss: 2.533169, ppl: 12.593346 +epoch: 1, batch: 10088, sum loss: 5066.684570, avg loss: 2.732840, ppl: 15.376488 +epoch: 1, batch: 10089, sum loss: 5414.185547, avg loss: 2.991263, ppl: 19.910812 +epoch: 1, batch: 10090, sum loss: 4327.554199, avg loss: 2.568281, ppl: 13.043389 +epoch: 1, batch: 10091, sum loss: 4343.503906, avg loss: 2.756030, ppl: 15.737247 +epoch: 1, batch: 10092, sum loss: 4730.305664, avg loss: 2.893153, ppl: 18.050135 +epoch: 1, batch: 10093, sum loss: 3736.192139, avg loss: 2.451570, ppl: 11.606549 +epoch: 1, batch: 10094, sum loss: 5083.125488, avg loss: 2.911298, ppl: 18.380632 +epoch: 1, batch: 10095, sum loss: 5629.178711, avg loss: 2.915163, ppl: 18.451811 +epoch: 1, batch: 10096, sum loss: 5756.609863, avg loss: 3.068555, ppl: 21.510805 +epoch: 1, batch: 10097, sum loss: 4722.701172, avg loss: 2.881453, ppl: 17.840172 +epoch: 1, batch: 10098, sum loss: 4806.313477, avg loss: 2.773407, ppl: 16.013090 +epoch: 1, batch: 10099, sum loss: 4202.987305, avg loss: 2.715108, ppl: 15.106244 +epoch: 1, batch: 10100, sum loss: 4427.826172, avg loss: 2.878951, ppl: 17.795589 +epoch: 1, batch: 10101, sum loss: 5892.217773, avg loss: 3.024753, ppl: 20.588911 +epoch: 1, batch: 10102, sum loss: 4995.014648, avg loss: 2.914244, ppl: 18.434874 +epoch: 1, batch: 10103, sum loss: 3613.759766, avg loss: 2.269950, ppl: 9.678914 +epoch: 1, batch: 10104, sum loss: 4258.381836, avg loss: 2.559124, ppl: 12.924487 +epoch: 1, batch: 10105, sum loss: 3745.699463, avg loss: 2.330865, ppl: 10.286833 +epoch: 1, batch: 10106, sum loss: 4006.458008, avg loss: 2.672754, ppl: 14.479789 +epoch: 1, batch: 10107, sum loss: 4030.891602, avg loss: 2.681897, ppl: 14.612790 +epoch: 1, batch: 10108, sum loss: 3918.291748, avg loss: 2.589750, ppl: 13.326440 +epoch: 1, batch: 10109, sum loss: 4822.484863, avg loss: 2.710784, ppl: 15.041062 +epoch: 1, batch: 10110, sum loss: 4530.658203, avg loss: 2.838758, ppl: 17.094524 +epoch: 1, batch: 10111, sum loss: 4138.246582, avg loss: 2.362013, ppl: 10.612293 +epoch: 1, batch: 10112, sum loss: 4003.762695, avg loss: 2.394595, ppl: 10.963758 +epoch: 1, batch: 10113, sum loss: 4437.425781, avg loss: 2.588930, ppl: 13.315515 +epoch: 1, batch: 10114, sum loss: 4953.501953, avg loss: 2.703877, ppl: 14.937529 +epoch: 1, batch: 10115, sum loss: 4517.908691, avg loss: 2.684438, ppl: 14.649962 +epoch: 1, batch: 10116, sum loss: 3742.067383, avg loss: 2.395690, ppl: 10.975766 +epoch: 1, batch: 10117, sum loss: 5350.711914, avg loss: 2.895407, ppl: 18.090862 +epoch: 1, batch: 10118, sum loss: 3349.333740, avg loss: 2.229916, ppl: 9.299084 +epoch: 1, batch: 10119, sum loss: 4178.145508, avg loss: 2.725470, ppl: 15.263587 +epoch: 1, batch: 10120, sum loss: 4216.861328, avg loss: 2.625692, ppl: 13.814129 +epoch: 1, batch: 10121, sum loss: 4836.928711, avg loss: 2.502291, ppl: 12.210439 +epoch: 1, batch: 10122, sum loss: 3933.107178, avg loss: 2.393857, ppl: 10.955668 +epoch: 1, batch: 10123, sum loss: 5906.430664, avg loss: 2.687184, ppl: 14.690251 +epoch: 1, batch: 10124, sum loss: 5131.612793, avg loss: 2.755968, ppl: 15.736272 +epoch: 1, batch: 10125, sum loss: 3938.090332, avg loss: 2.459769, ppl: 11.702111 +epoch: 1, batch: 10126, sum loss: 4954.281738, avg loss: 2.740200, ppl: 15.490084 +epoch: 1, batch: 10127, sum loss: 4594.855957, avg loss: 2.808592, ppl: 16.586546 +epoch: 1, batch: 10128, sum loss: 5098.587891, avg loss: 2.802962, ppl: 16.493429 +epoch: 1, batch: 10129, sum loss: 4548.480957, avg loss: 2.689817, ppl: 14.728987 +epoch: 1, batch: 10130, sum loss: 4927.212891, avg loss: 2.611135, ppl: 13.614501 +epoch: 1, batch: 10131, sum loss: 3939.821777, avg loss: 2.786295, ppl: 16.220818 +epoch: 1, batch: 10132, sum loss: 4260.277832, avg loss: 2.558726, ppl: 12.919342 +epoch: 1, batch: 10133, sum loss: 4446.587402, avg loss: 2.767012, ppl: 15.911016 +epoch: 1, batch: 10134, sum loss: 4082.230225, avg loss: 2.858705, ppl: 17.438925 +epoch: 1, batch: 10135, sum loss: 3951.645996, avg loss: 2.695529, ppl: 14.813360 +epoch: 1, batch: 10136, sum loss: 3872.928223, avg loss: 2.645443, ppl: 14.089682 +epoch: 1, batch: 10137, sum loss: 4723.060059, avg loss: 2.686610, ppl: 14.681816 +epoch: 1, batch: 10138, sum loss: 4818.029785, avg loss: 2.888507, ppl: 17.966469 +epoch: 1, batch: 10139, sum loss: 4589.881348, avg loss: 2.542871, ppl: 12.716121 +epoch: 1, batch: 10140, sum loss: 4970.187500, avg loss: 2.762750, ppl: 15.843354 +epoch: 1, batch: 10141, sum loss: 5760.047852, avg loss: 2.819407, ppl: 16.766901 +epoch: 1, batch: 10142, sum loss: 4826.723633, avg loss: 2.511303, ppl: 12.320971 +epoch: 1, batch: 10143, sum loss: 4969.543945, avg loss: 2.768548, ppl: 15.935483 +epoch: 1, batch: 10144, sum loss: 4368.300781, avg loss: 2.745632, ppl: 15.574456 +epoch: 1, batch: 10145, sum loss: 5048.831543, avg loss: 2.569380, ppl: 13.057727 +epoch: 1, batch: 10146, sum loss: 4469.035156, avg loss: 2.706866, ppl: 14.982244 +epoch: 1, batch: 10147, sum loss: 4367.130859, avg loss: 2.528738, ppl: 12.537677 +epoch: 1, batch: 10148, sum loss: 4864.499023, avg loss: 2.707011, ppl: 14.984424 +epoch: 1, batch: 10149, sum loss: 5634.431641, avg loss: 3.083980, ppl: 21.845175 +epoch: 1, batch: 10150, sum loss: 4835.200684, avg loss: 2.699721, ppl: 14.875583 +epoch: 1, batch: 10151, sum loss: 5167.631836, avg loss: 2.718375, ppl: 15.155681 +epoch: 1, batch: 10152, sum loss: 4276.025879, avg loss: 2.504995, ppl: 12.243493 +epoch: 1, batch: 10153, sum loss: 3465.354492, avg loss: 2.273855, ppl: 9.716784 +epoch: 1, batch: 10154, sum loss: 4034.846680, avg loss: 2.623437, ppl: 13.783020 +epoch: 1, batch: 10155, sum loss: 4853.231445, avg loss: 2.784413, ppl: 16.190306 +epoch: 1, batch: 10156, sum loss: 3959.034912, avg loss: 2.427367, ppl: 11.329010 +epoch: 1, batch: 10157, sum loss: 4325.688965, avg loss: 2.701867, ppl: 14.907539 +epoch: 1, batch: 10158, sum loss: 4522.238281, avg loss: 2.752427, ppl: 15.680648 +epoch: 1, batch: 10159, sum loss: 5063.098633, avg loss: 2.935130, ppl: 18.823948 +epoch: 1, batch: 10160, sum loss: 4621.730469, avg loss: 2.870640, ppl: 17.648310 +epoch: 1, batch: 10161, sum loss: 4715.235840, avg loss: 2.751013, ppl: 15.658483 +epoch: 1, batch: 10162, sum loss: 5617.452148, avg loss: 2.948794, ppl: 19.082920 +epoch: 1, batch: 10163, sum loss: 4139.255859, avg loss: 2.672212, ppl: 14.471941 +epoch: 1, batch: 10164, sum loss: 4727.484863, avg loss: 2.880856, ppl: 17.829529 +epoch: 1, batch: 10165, sum loss: 4739.709961, avg loss: 2.914951, ppl: 18.447901 +epoch: 1, batch: 10166, sum loss: 5013.427246, avg loss: 2.832445, ppl: 16.986942 +epoch: 1, batch: 10167, sum loss: 5068.185059, avg loss: 2.771014, ppl: 15.974832 +epoch: 1, batch: 10168, sum loss: 3800.252686, avg loss: 2.583448, ppl: 13.242726 +epoch: 1, batch: 10169, sum loss: 4316.002441, avg loss: 2.694134, ppl: 14.792699 +epoch: 1, batch: 10170, sum loss: 3583.016602, avg loss: 2.307158, ppl: 10.045836 +epoch: 1, batch: 10171, sum loss: 4408.597168, avg loss: 2.739961, ppl: 15.486380 +epoch: 1, batch: 10172, sum loss: 6138.672363, avg loss: 3.159379, ppl: 23.555952 +epoch: 1, batch: 10173, sum loss: 4294.732910, avg loss: 2.709611, ppl: 15.023426 +epoch: 1, batch: 10174, sum loss: 4985.693848, avg loss: 2.730391, ppl: 15.338883 +epoch: 1, batch: 10175, sum loss: 4298.356445, avg loss: 2.635412, ppl: 13.949055 +epoch: 1, batch: 10176, sum loss: 4563.320312, avg loss: 2.760629, ppl: 15.809787 +epoch: 1, batch: 10177, sum loss: 4826.107910, avg loss: 2.708254, ppl: 15.003052 +epoch: 1, batch: 10178, sum loss: 4224.572754, avg loss: 2.551070, ppl: 12.820821 +epoch: 1, batch: 10179, sum loss: 4790.076660, avg loss: 2.927920, ppl: 18.688715 +epoch: 1, batch: 10180, sum loss: 4873.767578, avg loss: 2.766043, ppl: 15.895610 +epoch: 1, batch: 10181, sum loss: 4872.776367, avg loss: 2.639641, ppl: 14.008167 +epoch: 1, batch: 10182, sum loss: 5343.408691, avg loss: 2.896156, ppl: 18.104424 +epoch: 1, batch: 10183, sum loss: 4742.833008, avg loss: 2.927675, ppl: 18.684135 +epoch: 1, batch: 10184, sum loss: 4701.564453, avg loss: 2.866808, ppl: 17.580805 +epoch: 1, batch: 10185, sum loss: 5550.362305, avg loss: 3.168015, ppl: 23.760279 +epoch: 1, batch: 10186, sum loss: 4231.581055, avg loss: 2.538441, ppl: 12.659918 +epoch: 1, batch: 10187, sum loss: 4103.624512, avg loss: 2.645793, ppl: 14.094614 +epoch: 1, batch: 10188, sum loss: 4629.581543, avg loss: 2.653055, ppl: 14.197348 +epoch: 1, batch: 10189, sum loss: 4611.489746, avg loss: 2.608309, ppl: 13.576072 +epoch: 1, batch: 10190, sum loss: 4681.663086, avg loss: 2.679830, ppl: 14.582615 +epoch: 1, batch: 10191, sum loss: 4100.970215, avg loss: 2.630513, ppl: 13.880895 +epoch: 1, batch: 10192, sum loss: 4810.437988, avg loss: 2.727006, ppl: 15.287045 +epoch: 1, batch: 10193, sum loss: 4036.369385, avg loss: 2.698108, ppl: 14.851605 +epoch: 1, batch: 10194, sum loss: 4854.774414, avg loss: 2.744361, ppl: 15.554670 +epoch: 1, batch: 10195, sum loss: 4685.890625, avg loss: 2.865988, ppl: 17.566401 +epoch: 1, batch: 10196, sum loss: 4072.070312, avg loss: 2.644202, ppl: 14.072205 +epoch: 1, batch: 10197, sum loss: 4443.026367, avg loss: 2.749398, ppl: 15.633210 +epoch: 1, batch: 10198, sum loss: 4225.778320, avg loss: 2.666106, ppl: 14.383853 +epoch: 1, batch: 10199, sum loss: 4467.288086, avg loss: 2.453206, ppl: 11.625559 +epoch: 1, batch: 10200, sum loss: 4112.705078, avg loss: 2.529339, ppl: 12.545209 +epoch: 1, batch: 10201, sum loss: 4276.151855, avg loss: 2.402333, ppl: 11.048919 +epoch: 1, batch: 10202, sum loss: 5859.805664, avg loss: 3.087358, ppl: 21.919096 +epoch: 1, batch: 10203, sum loss: 4856.343262, avg loss: 2.880394, ppl: 17.821285 +epoch: 1, batch: 10204, sum loss: 4324.355469, avg loss: 2.850597, ppl: 17.298103 +epoch: 1, batch: 10205, sum loss: 3776.627930, avg loss: 2.593838, ppl: 13.381029 +epoch: 1, batch: 10206, sum loss: 3483.137695, avg loss: 2.495084, ppl: 12.122755 +epoch: 1, batch: 10207, sum loss: 4823.359863, avg loss: 2.783243, ppl: 16.171375 +epoch: 1, batch: 10208, sum loss: 5041.256836, avg loss: 2.663104, ppl: 14.340738 +epoch: 1, batch: 10209, sum loss: 4110.057129, avg loss: 2.682805, ppl: 14.626062 +epoch: 1, batch: 10210, sum loss: 4357.067871, avg loss: 2.624740, ppl: 13.800981 +epoch: 1, batch: 10211, sum loss: 4534.873047, avg loss: 2.756762, ppl: 15.748758 +epoch: 1, batch: 10212, sum loss: 4487.169434, avg loss: 2.711281, ppl: 15.048537 +epoch: 1, batch: 10213, sum loss: 4053.292480, avg loss: 2.591619, ppl: 13.351374 +epoch: 1, batch: 10214, sum loss: 5550.551270, avg loss: 2.780837, ppl: 16.132523 +epoch: 1, batch: 10215, sum loss: 4361.816895, avg loss: 2.727840, ppl: 15.299810 +epoch: 1, batch: 10216, sum loss: 4530.904297, avg loss: 2.608465, ppl: 13.578198 +epoch: 1, batch: 10217, sum loss: 5115.895996, avg loss: 2.806306, ppl: 16.548676 +epoch: 1, batch: 10218, sum loss: 4667.088379, avg loss: 2.823405, ppl: 16.834070 +epoch: 1, batch: 10219, sum loss: 3395.277588, avg loss: 2.275655, ppl: 9.734296 +epoch: 1, batch: 10220, sum loss: 4508.983887, avg loss: 2.643015, ppl: 14.055519 +epoch: 1, batch: 10221, sum loss: 5962.256348, avg loss: 3.188372, ppl: 24.248926 +epoch: 1, batch: 10222, sum loss: 4925.663574, avg loss: 2.944210, ppl: 18.995655 +epoch: 1, batch: 10223, sum loss: 4505.333008, avg loss: 2.865988, ppl: 17.566401 +epoch: 1, batch: 10224, sum loss: 4889.127930, avg loss: 2.729831, ppl: 15.330299 +epoch: 1, batch: 10225, sum loss: 4297.558105, avg loss: 2.539928, ppl: 12.678758 +epoch: 1, batch: 10226, sum loss: 4870.737305, avg loss: 2.815455, ppl: 16.700771 +epoch: 1, batch: 10227, sum loss: 4190.747070, avg loss: 2.571010, ppl: 13.079032 +epoch: 1, batch: 10228, sum loss: 4971.260254, avg loss: 2.769504, ppl: 15.950726 +epoch: 1, batch: 10229, sum loss: 4178.387695, avg loss: 2.693996, ppl: 14.790661 +epoch: 1, batch: 10230, sum loss: 4481.327148, avg loss: 2.730851, ppl: 15.345947 +epoch: 1, batch: 10231, sum loss: 4986.829590, avg loss: 2.952534, ppl: 19.154428 +epoch: 1, batch: 10232, sum loss: 5017.146484, avg loss: 2.923745, ppl: 18.610857 +epoch: 1, batch: 10233, sum loss: 4077.043213, avg loss: 2.676982, ppl: 14.541138 +epoch: 1, batch: 10234, sum loss: 4349.466309, avg loss: 2.609158, ppl: 13.587603 +epoch: 1, batch: 10235, sum loss: 4345.662109, avg loss: 2.643347, ppl: 14.060182 +epoch: 1, batch: 10236, sum loss: 4631.655762, avg loss: 2.864351, ppl: 17.537672 +epoch: 1, batch: 10237, sum loss: 5286.059082, avg loss: 2.835869, ppl: 17.045198 +epoch: 1, batch: 10238, sum loss: 4487.367188, avg loss: 2.683832, ppl: 14.641093 +epoch: 1, batch: 10239, sum loss: 5105.348145, avg loss: 2.925701, ppl: 18.647291 +epoch: 1, batch: 10240, sum loss: 4305.713867, avg loss: 2.682688, ppl: 14.624348 +epoch: 1, batch: 10241, sum loss: 3752.114014, avg loss: 2.547260, ppl: 12.772061 +epoch: 1, batch: 10242, sum loss: 4960.767578, avg loss: 2.677155, ppl: 14.543651 +epoch: 1, batch: 10243, sum loss: 5424.791504, avg loss: 3.045925, ppl: 21.029467 +epoch: 1, batch: 10244, sum loss: 5015.819336, avg loss: 2.809983, ppl: 16.609636 +epoch: 1, batch: 10245, sum loss: 4361.055176, avg loss: 2.568348, ppl: 13.044260 +epoch: 1, batch: 10246, sum loss: 4285.680664, avg loss: 2.690320, ppl: 14.736398 +epoch: 1, batch: 10247, sum loss: 5276.514648, avg loss: 2.823175, ppl: 16.830206 +epoch: 1, batch: 10248, sum loss: 4154.531738, avg loss: 2.598206, ppl: 13.439607 +epoch: 1, batch: 10249, sum loss: 4544.302246, avg loss: 2.719511, ppl: 15.172897 +epoch: 1, batch: 10250, sum loss: 5831.161621, avg loss: 2.961484, ppl: 19.326630 +epoch: 1, batch: 10251, sum loss: 4020.340088, avg loss: 2.546130, ppl: 12.757642 +epoch: 1, batch: 10252, sum loss: 5143.390625, avg loss: 2.934051, ppl: 18.803642 +epoch: 1, batch: 10253, sum loss: 4938.414551, avg loss: 2.772833, ppl: 16.003902 +epoch: 1, batch: 10254, sum loss: 3943.395508, avg loss: 2.510118, ppl: 12.306385 +epoch: 1, batch: 10255, sum loss: 3580.249756, avg loss: 2.402852, ppl: 11.054663 +epoch: 1, batch: 10256, sum loss: 4849.281738, avg loss: 2.832524, ppl: 16.988291 +epoch: 1, batch: 10257, sum loss: 3995.407715, avg loss: 2.383895, ppl: 10.847069 +epoch: 1, batch: 10258, sum loss: 5012.540039, avg loss: 2.794058, ppl: 16.347223 +epoch: 1, batch: 10259, sum loss: 4513.163574, avg loss: 2.738570, ppl: 15.464858 +epoch: 1, batch: 10260, sum loss: 4500.863770, avg loss: 2.661658, ppl: 14.320016 +epoch: 1, batch: 10261, sum loss: 4054.770752, avg loss: 2.646717, ppl: 14.107652 +epoch: 1, batch: 10262, sum loss: 5060.653809, avg loss: 2.677595, ppl: 14.550054 +epoch: 1, batch: 10263, sum loss: 4906.324707, avg loss: 2.728768, ppl: 15.314007 +epoch: 1, batch: 10264, sum loss: 5153.706543, avg loss: 3.035163, ppl: 20.804367 +epoch: 1, batch: 10265, sum loss: 5028.766602, avg loss: 2.728577, ppl: 15.311079 +epoch: 1, batch: 10266, sum loss: 5272.718750, avg loss: 2.791275, ppl: 16.301796 +epoch: 1, batch: 10267, sum loss: 4250.781250, avg loss: 2.539296, ppl: 12.670747 +epoch: 1, batch: 10268, sum loss: 5698.457031, avg loss: 3.013462, ppl: 20.357763 +epoch: 1, batch: 10269, sum loss: 4539.601562, avg loss: 2.759636, ppl: 15.794095 +epoch: 1, batch: 10270, sum loss: 5335.195312, avg loss: 2.846956, ppl: 17.235233 +epoch: 1, batch: 10271, sum loss: 2851.989258, avg loss: 2.217721, ppl: 9.186371 +epoch: 1, batch: 10272, sum loss: 4721.763184, avg loss: 2.373938, ppl: 10.739603 +epoch: 1, batch: 10273, sum loss: 4624.262695, avg loss: 2.830026, ppl: 16.945904 +epoch: 1, batch: 10274, sum loss: 3954.280518, avg loss: 2.515446, ppl: 12.372122 +epoch: 1, batch: 10275, sum loss: 3923.606934, avg loss: 2.541196, ppl: 12.694849 +epoch: 1, batch: 10276, sum loss: 4393.734863, avg loss: 2.693890, ppl: 14.789095 +epoch: 1, batch: 10277, sum loss: 5584.918457, avg loss: 2.905785, ppl: 18.279589 +epoch: 1, batch: 10278, sum loss: 4775.165039, avg loss: 2.711621, ppl: 15.053662 +epoch: 1, batch: 10279, sum loss: 4770.170898, avg loss: 2.721147, ppl: 15.197748 +epoch: 1, batch: 10280, sum loss: 4023.623047, avg loss: 2.511625, ppl: 12.324940 +epoch: 1, batch: 10281, sum loss: 4676.934082, avg loss: 2.785547, ppl: 16.208685 +epoch: 1, batch: 10282, sum loss: 5041.394043, avg loss: 2.821150, ppl: 16.796148 +epoch: 1, batch: 10283, sum loss: 4684.687500, avg loss: 2.746007, ppl: 15.580291 +epoch: 1, batch: 10284, sum loss: 3727.726074, avg loss: 2.517033, ppl: 12.391777 +epoch: 1, batch: 10285, sum loss: 4534.552246, avg loss: 2.664249, ppl: 14.357166 +epoch: 1, batch: 10286, sum loss: 4997.601562, avg loss: 2.717565, ppl: 15.143400 +epoch: 1, batch: 10287, sum loss: 5464.821289, avg loss: 2.934920, ppl: 18.820004 +epoch: 1, batch: 10288, sum loss: 4345.617188, avg loss: 2.613119, ppl: 13.641534 +epoch: 1, batch: 10289, sum loss: 4857.020020, avg loss: 2.597337, ppl: 13.427932 +epoch: 1, batch: 10290, sum loss: 5256.723633, avg loss: 2.778395, ppl: 16.093174 +epoch: 1, batch: 10291, sum loss: 4937.278809, avg loss: 3.016053, ppl: 20.410576 +epoch: 1, batch: 10292, sum loss: 5336.005371, avg loss: 2.783519, ppl: 16.175840 +epoch: 1, batch: 10293, sum loss: 5171.761230, avg loss: 2.863655, ppl: 17.525467 +epoch: 1, batch: 10294, sum loss: 4031.229492, avg loss: 2.614286, ppl: 13.657467 +epoch: 1, batch: 10295, sum loss: 4086.276611, avg loss: 2.573222, ppl: 13.107992 +epoch: 1, batch: 10296, sum loss: 4856.602539, avg loss: 2.812161, ppl: 16.645859 +epoch: 1, batch: 10297, sum loss: 4727.816406, avg loss: 2.814177, ppl: 16.679436 +epoch: 1, batch: 10298, sum loss: 3949.638184, avg loss: 2.603585, ppl: 13.512089 +epoch: 1, batch: 10299, sum loss: 5060.489258, avg loss: 2.714855, ppl: 15.102416 +epoch: 1, batch: 10300, sum loss: 4134.663086, avg loss: 2.618533, ppl: 13.715584 +epoch: 1, batch: 10301, sum loss: 5960.779785, avg loss: 2.930570, ppl: 18.738306 +epoch: 1, batch: 10302, sum loss: 4796.764648, avg loss: 2.624051, ppl: 13.791474 +epoch: 1, batch: 10303, sum loss: 4818.052246, avg loss: 2.688645, ppl: 14.711734 +epoch: 1, batch: 10304, sum loss: 4477.771973, avg loss: 2.460314, ppl: 11.708490 +epoch: 1, batch: 10305, sum loss: 5575.478027, avg loss: 2.873958, ppl: 17.706961 +epoch: 1, batch: 10306, sum loss: 4330.731934, avg loss: 2.858569, ppl: 17.436556 +epoch: 1, batch: 10307, sum loss: 4485.995605, avg loss: 2.640374, ppl: 14.018448 +epoch: 1, batch: 10308, sum loss: 5337.949707, avg loss: 2.883819, ppl: 17.882442 +epoch: 1, batch: 10309, sum loss: 3956.726807, avg loss: 2.594575, ppl: 13.390894 +epoch: 1, batch: 10310, sum loss: 4508.862793, avg loss: 2.830422, ppl: 16.952620 +epoch: 1, batch: 10311, sum loss: 3946.665039, avg loss: 2.483741, ppl: 11.986024 +epoch: 1, batch: 10312, sum loss: 4538.339355, avg loss: 2.615757, ppl: 13.677573 +epoch: 1, batch: 10313, sum loss: 4059.102295, avg loss: 2.511821, ppl: 12.327355 +epoch: 1, batch: 10314, sum loss: 4455.661621, avg loss: 2.752107, ppl: 15.675632 +epoch: 1, batch: 10315, sum loss: 3938.571289, avg loss: 2.679300, ppl: 14.574892 +epoch: 1, batch: 10316, sum loss: 4854.305664, avg loss: 2.734820, ppl: 15.406968 +epoch: 1, batch: 10317, sum loss: 4969.050293, avg loss: 2.716813, ppl: 15.132017 +epoch: 1, batch: 10318, sum loss: 4834.211914, avg loss: 2.688661, ppl: 14.711958 +epoch: 1, batch: 10319, sum loss: 4884.484375, avg loss: 2.783182, ppl: 16.170391 +epoch: 1, batch: 10320, sum loss: 4692.149902, avg loss: 2.672067, ppl: 14.469850 +epoch: 1, batch: 10321, sum loss: 5174.212402, avg loss: 2.724704, ppl: 15.251896 +epoch: 1, batch: 10322, sum loss: 4411.020508, avg loss: 2.578037, ppl: 13.171255 +epoch: 1, batch: 10323, sum loss: 5005.842773, avg loss: 2.779480, ppl: 16.110634 +epoch: 1, batch: 10324, sum loss: 4715.744141, avg loss: 2.673324, ppl: 14.488052 +epoch: 1, batch: 10325, sum loss: 5818.789062, avg loss: 3.248905, ppl: 25.762114 +epoch: 1, batch: 10326, sum loss: 4833.556641, avg loss: 2.808574, ppl: 16.586256 +epoch: 1, batch: 10327, sum loss: 4604.469727, avg loss: 2.783839, ppl: 16.181021 +epoch: 1, batch: 10328, sum loss: 5322.015625, avg loss: 2.956676, ppl: 19.233923 +epoch: 1, batch: 10329, sum loss: 3893.398926, avg loss: 2.505405, ppl: 12.248515 +epoch: 1, batch: 10330, sum loss: 5994.503906, avg loss: 3.176738, ppl: 23.968430 +epoch: 1, batch: 10331, sum loss: 5087.606445, avg loss: 2.939114, ppl: 18.899092 +epoch: 1, batch: 10332, sum loss: 4112.676270, avg loss: 2.429224, ppl: 11.350071 +epoch: 1, batch: 10333, sum loss: 4633.140625, avg loss: 2.767707, ppl: 15.922077 +epoch: 1, batch: 10334, sum loss: 4150.094727, avg loss: 2.708939, ppl: 15.013339 +epoch: 1, batch: 10335, sum loss: 4680.692871, avg loss: 2.782814, ppl: 16.164440 +epoch: 1, batch: 10336, sum loss: 4271.702637, avg loss: 2.489337, ppl: 12.053285 +epoch: 1, batch: 10337, sum loss: 5258.105469, avg loss: 2.782066, ppl: 16.152363 +epoch: 1, batch: 10338, sum loss: 3160.564697, avg loss: 2.337696, ppl: 10.357344 +epoch: 1, batch: 10339, sum loss: 4203.633301, avg loss: 2.793112, ppl: 16.331766 +epoch: 1, batch: 10340, sum loss: 4697.853027, avg loss: 2.845459, ppl: 17.209459 +epoch: 1, batch: 10341, sum loss: 5712.855469, avg loss: 2.905827, ppl: 18.280352 +epoch: 1, batch: 10342, sum loss: 4759.988770, avg loss: 2.723106, ppl: 15.227544 +epoch: 1, batch: 10343, sum loss: 4519.328613, avg loss: 2.873063, ppl: 17.691130 +epoch: 1, batch: 10344, sum loss: 4830.266113, avg loss: 2.653992, ppl: 14.210660 +epoch: 1, batch: 10345, sum loss: 4445.748535, avg loss: 2.679776, ppl: 14.581826 +epoch: 1, batch: 10346, sum loss: 5380.787109, avg loss: 2.760794, ppl: 15.812392 +epoch: 1, batch: 10347, sum loss: 5125.872070, avg loss: 2.788831, ppl: 16.262003 +epoch: 1, batch: 10348, sum loss: 4037.255127, avg loss: 2.547164, ppl: 12.770834 +epoch: 1, batch: 10349, sum loss: 4581.146973, avg loss: 2.781510, ppl: 16.143377 +epoch: 1, batch: 10350, sum loss: 5918.247070, avg loss: 2.859057, ppl: 17.445063 +epoch: 1, batch: 10351, sum loss: 4419.429688, avg loss: 2.684951, ppl: 14.657488 +epoch: 1, batch: 10352, sum loss: 4241.152832, avg loss: 2.795750, ppl: 16.374905 +epoch: 1, batch: 10353, sum loss: 4259.897461, avg loss: 2.537163, ppl: 12.643756 +epoch: 1, batch: 10354, sum loss: 4723.683594, avg loss: 2.783550, ppl: 16.176338 +epoch: 1, batch: 10355, sum loss: 4759.928711, avg loss: 2.561856, ppl: 12.959849 +epoch: 1, batch: 10356, sum loss: 4442.419922, avg loss: 2.692376, ppl: 14.766715 +epoch: 1, batch: 10357, sum loss: 5171.445312, avg loss: 2.635803, ppl: 13.954510 +epoch: 1, batch: 10358, sum loss: 4474.559082, avg loss: 2.506756, ppl: 12.265076 +epoch: 1, batch: 10359, sum loss: 3836.408203, avg loss: 2.369616, ppl: 10.693286 +epoch: 1, batch: 10360, sum loss: 5104.027344, avg loss: 2.790611, ppl: 16.290972 +epoch: 1, batch: 10361, sum loss: 3591.885254, avg loss: 2.542028, ppl: 12.705408 +epoch: 1, batch: 10362, sum loss: 5150.833984, avg loss: 2.929940, ppl: 18.726498 +epoch: 1, batch: 10363, sum loss: 3570.354004, avg loss: 2.427161, ppl: 11.326682 +epoch: 1, batch: 10364, sum loss: 4869.830566, avg loss: 2.770097, ppl: 15.960182 +epoch: 1, batch: 10365, sum loss: 4384.756836, avg loss: 2.606871, ppl: 13.556564 +epoch: 1, batch: 10366, sum loss: 4954.032227, avg loss: 2.769163, ppl: 15.945276 +epoch: 1, batch: 10367, sum loss: 3971.531006, avg loss: 2.557328, ppl: 12.901305 +epoch: 1, batch: 10368, sum loss: 3611.365479, avg loss: 2.352681, ppl: 10.513721 +epoch: 1, batch: 10369, sum loss: 5566.893555, avg loss: 2.914604, ppl: 18.441507 +epoch: 1, batch: 10370, sum loss: 4860.851562, avg loss: 2.633181, ppl: 13.917967 +epoch: 1, batch: 10371, sum loss: 4356.722656, avg loss: 2.711091, ppl: 15.045678 +epoch: 1, batch: 10372, sum loss: 5508.552734, avg loss: 2.906888, ppl: 18.299761 +epoch: 1, batch: 10373, sum loss: 6080.831543, avg loss: 2.767789, ppl: 15.923383 +epoch: 1, batch: 10374, sum loss: 3696.260254, avg loss: 2.509342, ppl: 12.296833 +epoch: 1, batch: 10375, sum loss: 4493.717773, avg loss: 2.935152, ppl: 18.824369 +epoch: 1, batch: 10376, sum loss: 4037.908447, avg loss: 2.490998, ppl: 12.073322 +epoch: 1, batch: 10377, sum loss: 4887.592773, avg loss: 2.972988, ppl: 19.550255 +epoch: 1, batch: 10378, sum loss: 4427.651367, avg loss: 2.756943, ppl: 15.751624 +epoch: 1, batch: 10379, sum loss: 4052.328125, avg loss: 2.572907, ppl: 13.103859 +epoch: 1, batch: 10380, sum loss: 4217.822266, avg loss: 2.656060, ppl: 14.240066 +epoch: 1, batch: 10381, sum loss: 5242.142578, avg loss: 2.845897, ppl: 17.216999 +epoch: 1, batch: 10382, sum loss: 4554.013672, avg loss: 2.670976, ppl: 14.454068 +epoch: 1, batch: 10383, sum loss: 4810.494629, avg loss: 2.405247, ppl: 11.081172 +epoch: 1, batch: 10384, sum loss: 4775.603027, avg loss: 2.757277, ppl: 15.756871 +epoch: 1, batch: 10385, sum loss: 4375.795410, avg loss: 2.806796, ppl: 16.556791 +epoch: 1, batch: 10386, sum loss: 4741.029785, avg loss: 2.852605, ppl: 17.332882 +epoch: 1, batch: 10387, sum loss: 4330.856934, avg loss: 2.660232, ppl: 14.299601 +epoch: 1, batch: 10388, sum loss: 4200.934570, avg loss: 2.682589, ppl: 14.622904 +epoch: 1, batch: 10389, sum loss: 4193.743652, avg loss: 2.757228, ppl: 15.756108 +epoch: 1, batch: 10390, sum loss: 4172.839844, avg loss: 2.603144, ppl: 13.506134 +epoch: 1, batch: 10391, sum loss: 5098.889160, avg loss: 2.690707, ppl: 14.742091 +epoch: 1, batch: 10392, sum loss: 4355.655762, avg loss: 2.758490, ppl: 15.776005 +epoch: 1, batch: 10393, sum loss: 3662.605957, avg loss: 2.325464, ppl: 10.231426 +epoch: 1, batch: 10394, sum loss: 5655.523438, avg loss: 3.195211, ppl: 24.415329 +epoch: 1, batch: 10395, sum loss: 5260.898438, avg loss: 3.035717, ppl: 20.815908 +epoch: 1, batch: 10396, sum loss: 5338.694336, avg loss: 2.805409, ppl: 16.533844 +epoch: 1, batch: 10397, sum loss: 5929.134277, avg loss: 2.985465, ppl: 19.795712 +epoch: 1, batch: 10398, sum loss: 3913.073730, avg loss: 2.654731, ppl: 14.221164 +epoch: 1, batch: 10399, sum loss: 5802.008789, avg loss: 3.092755, ppl: 22.037716 +epoch: 1, batch: 10400, sum loss: 3772.136230, avg loss: 2.819235, ppl: 16.764019 +epoch: 1, batch: 10401, sum loss: 4140.586914, avg loss: 2.467573, ppl: 11.793785 +epoch: 1, batch: 10402, sum loss: 3837.159912, avg loss: 2.568380, ppl: 13.044677 +epoch: 1, batch: 10403, sum loss: 3804.113281, avg loss: 2.738742, ppl: 15.467520 +epoch: 1, batch: 10404, sum loss: 4209.383789, avg loss: 2.669235, ppl: 14.428930 +epoch: 1, batch: 10405, sum loss: 5038.243652, avg loss: 2.791271, ppl: 16.301722 +epoch: 1, batch: 10406, sum loss: 4590.359863, avg loss: 2.842328, ppl: 17.155659 +epoch: 1, batch: 10407, sum loss: 4168.195312, avg loss: 2.671920, ppl: 14.467721 +epoch: 1, batch: 10408, sum loss: 4009.862305, avg loss: 2.588678, ppl: 13.312163 +epoch: 1, batch: 10409, sum loss: 4315.209961, avg loss: 2.642504, ppl: 14.048343 +epoch: 1, batch: 10410, sum loss: 4721.795898, avg loss: 2.754840, ppl: 15.718528 +epoch: 1, batch: 10411, sum loss: 4671.349121, avg loss: 2.631746, ppl: 13.898012 +epoch: 1, batch: 10412, sum loss: 4540.298828, avg loss: 2.514008, ppl: 12.354351 +epoch: 1, batch: 10413, sum loss: 4713.383789, avg loss: 2.643513, ppl: 14.062521 +epoch: 1, batch: 10414, sum loss: 5304.338867, avg loss: 3.164880, ppl: 23.685902 +epoch: 1, batch: 10415, sum loss: 4217.358398, avg loss: 2.684506, ppl: 14.650958 +epoch: 1, batch: 10416, sum loss: 4034.355469, avg loss: 2.415782, ppl: 11.198522 +epoch: 1, batch: 10417, sum loss: 5045.426758, avg loss: 2.891362, ppl: 18.017832 +epoch: 1, batch: 10418, sum loss: 4999.421387, avg loss: 3.013515, ppl: 20.358835 +epoch: 1, batch: 10419, sum loss: 5213.351562, avg loss: 2.801371, ppl: 16.467209 +epoch: 1, batch: 10420, sum loss: 4496.869141, avg loss: 2.765602, ppl: 15.888604 +epoch: 1, batch: 10421, sum loss: 4177.114746, avg loss: 2.600943, ppl: 13.476441 +epoch: 1, batch: 10422, sum loss: 4557.394043, avg loss: 2.790811, ppl: 16.294226 +epoch: 1, batch: 10423, sum loss: 4417.535156, avg loss: 2.762686, ppl: 15.842339 +epoch: 1, batch: 10424, sum loss: 4790.716309, avg loss: 2.657081, ppl: 14.254614 +epoch: 1, batch: 10425, sum loss: 5137.986816, avg loss: 2.813794, ppl: 16.673050 +epoch: 1, batch: 10426, sum loss: 4896.630371, avg loss: 2.723376, ppl: 15.231658 +epoch: 1, batch: 10427, sum loss: 4947.282227, avg loss: 2.831873, ppl: 16.977228 +epoch: 1, batch: 10428, sum loss: 5006.817383, avg loss: 2.823924, ppl: 16.842817 +epoch: 1, batch: 10429, sum loss: 4718.298340, avg loss: 2.564292, ppl: 12.991463 +epoch: 1, batch: 10430, sum loss: 4974.685059, avg loss: 2.765250, ppl: 15.883014 +epoch: 1, batch: 10431, sum loss: 4243.789062, avg loss: 2.771907, ppl: 15.989094 +epoch: 1, batch: 10432, sum loss: 5387.868652, avg loss: 2.973438, ppl: 19.559040 +epoch: 1, batch: 10433, sum loss: 4333.969238, avg loss: 2.639446, ppl: 14.005436 +epoch: 1, batch: 10434, sum loss: 5112.210449, avg loss: 3.050245, ppl: 21.120520 +epoch: 1, batch: 10435, sum loss: 4608.735352, avg loss: 2.560409, ppl: 12.941104 +epoch: 1, batch: 10436, sum loss: 5582.037109, avg loss: 2.910343, ppl: 18.363091 +epoch: 1, batch: 10437, sum loss: 4950.031250, avg loss: 2.849759, ppl: 17.283617 +epoch: 1, batch: 10438, sum loss: 4865.393555, avg loss: 2.715064, ppl: 15.105571 +epoch: 1, batch: 10439, sum loss: 4088.002197, avg loss: 2.684177, ppl: 14.646149 +epoch: 1, batch: 10440, sum loss: 3869.774902, avg loss: 2.506331, ppl: 12.259866 +epoch: 1, batch: 10441, sum loss: 4478.726074, avg loss: 2.550528, ppl: 12.813862 +epoch: 1, batch: 10442, sum loss: 5307.900879, avg loss: 2.715039, ppl: 15.105196 +epoch: 1, batch: 10443, sum loss: 4307.571777, avg loss: 2.734966, ppl: 15.409220 +epoch: 1, batch: 10444, sum loss: 3596.313477, avg loss: 2.302377, ppl: 9.997924 +epoch: 1, batch: 10445, sum loss: 4966.558105, avg loss: 2.757667, ppl: 15.763022 +epoch: 1, batch: 10446, sum loss: 4795.486328, avg loss: 2.607660, ppl: 13.567264 +epoch: 1, batch: 10447, sum loss: 4519.760254, avg loss: 2.744238, ppl: 15.552760 +epoch: 1, batch: 10448, sum loss: 4875.712891, avg loss: 2.931878, ppl: 18.762831 +epoch: 1, batch: 10449, sum loss: 4820.745605, avg loss: 2.842421, ppl: 17.157249 +epoch: 1, batch: 10450, sum loss: 5349.216797, avg loss: 2.919878, ppl: 18.539030 +epoch: 1, batch: 10451, sum loss: 4539.251465, avg loss: 2.800278, ppl: 16.449226 +epoch: 1, batch: 10452, sum loss: 4062.313477, avg loss: 2.856761, ppl: 17.405052 +epoch: 1, batch: 10453, sum loss: 4311.312500, avg loss: 2.494972, ppl: 12.121400 +epoch: 1, batch: 10454, sum loss: 5040.118652, avg loss: 2.647121, ppl: 14.113351 +epoch: 1, batch: 10455, sum loss: 4831.872070, avg loss: 2.736054, ppl: 15.426001 +epoch: 1, batch: 10456, sum loss: 3394.465820, avg loss: 2.341011, ppl: 10.391738 +epoch: 1, batch: 10457, sum loss: 5192.076172, avg loss: 2.744226, ppl: 15.552575 +epoch: 1, batch: 10458, sum loss: 4233.371094, avg loss: 2.398510, ppl: 11.006769 +epoch: 1, batch: 10459, sum loss: 4420.773438, avg loss: 2.821170, ppl: 16.796497 +epoch: 1, batch: 10460, sum loss: 5095.329590, avg loss: 2.662137, ppl: 14.326866 +epoch: 1, batch: 10461, sum loss: 4552.425781, avg loss: 2.786062, ppl: 16.217035 +epoch: 1, batch: 10462, sum loss: 4796.664551, avg loss: 2.828222, ppl: 16.915359 +epoch: 1, batch: 10463, sum loss: 4372.676270, avg loss: 2.598144, ppl: 13.438770 +epoch: 1, batch: 10464, sum loss: 4198.869141, avg loss: 2.825619, ppl: 16.871376 +epoch: 1, batch: 10465, sum loss: 3812.015625, avg loss: 2.353096, ppl: 10.518085 +epoch: 1, batch: 10466, sum loss: 4897.077148, avg loss: 2.728177, ppl: 15.304958 +epoch: 1, batch: 10467, sum loss: 4925.621094, avg loss: 2.565428, ppl: 13.006221 +epoch: 1, batch: 10468, sum loss: 4879.926758, avg loss: 2.820767, ppl: 16.789722 +epoch: 1, batch: 10469, sum loss: 6083.245117, avg loss: 3.064607, ppl: 21.426043 +epoch: 1, batch: 10470, sum loss: 4355.759277, avg loss: 2.790365, ppl: 16.286959 +epoch: 1, batch: 10471, sum loss: 4795.392090, avg loss: 2.741791, ppl: 15.514744 +epoch: 1, batch: 10472, sum loss: 4228.738281, avg loss: 2.703797, ppl: 14.936336 +epoch: 1, batch: 10473, sum loss: 5309.680176, avg loss: 2.940022, ppl: 18.916267 +epoch: 1, batch: 10474, sum loss: 4350.311035, avg loss: 2.688697, ppl: 14.712487 +epoch: 1, batch: 10475, sum loss: 5476.582031, avg loss: 2.799889, ppl: 16.442816 +epoch: 1, batch: 10476, sum loss: 5449.202637, avg loss: 2.778788, ppl: 16.099495 +epoch: 1, batch: 10477, sum loss: 5019.060547, avg loss: 2.834026, ppl: 17.013826 +epoch: 1, batch: 10478, sum loss: 4237.532715, avg loss: 2.753433, ppl: 15.696418 +epoch: 1, batch: 10479, sum loss: 5029.757324, avg loss: 2.838464, ppl: 17.089489 +epoch: 1, batch: 10480, sum loss: 4627.264648, avg loss: 2.739648, ppl: 15.481529 +epoch: 1, batch: 10481, sum loss: 4616.865234, avg loss: 2.806605, ppl: 16.553621 +epoch: 1, batch: 10482, sum loss: 4362.650391, avg loss: 2.665028, ppl: 14.368350 +epoch: 1, batch: 10483, sum loss: 4630.562500, avg loss: 2.622062, ppl: 13.764082 +epoch: 1, batch: 10484, sum loss: 5625.415039, avg loss: 2.936020, ppl: 18.840717 +epoch: 1, batch: 10485, sum loss: 4513.227051, avg loss: 2.578987, ppl: 13.183775 +epoch: 1, batch: 10486, sum loss: 5074.498535, avg loss: 2.703516, ppl: 14.932134 +epoch: 1, batch: 10487, sum loss: 5353.530273, avg loss: 2.951230, ppl: 19.129459 +epoch: 1, batch: 10488, sum loss: 5199.948730, avg loss: 2.906623, ppl: 18.294914 +epoch: 1, batch: 10489, sum loss: 4681.072266, avg loss: 2.576264, ppl: 13.147931 +epoch: 1, batch: 10490, sum loss: 4687.839355, avg loss: 2.712870, ppl: 15.072474 +epoch: 1, batch: 10491, sum loss: 5179.585449, avg loss: 2.796752, ppl: 16.391325 +epoch: 1, batch: 10492, sum loss: 4633.353516, avg loss: 2.781124, ppl: 16.137156 +epoch: 1, batch: 10493, sum loss: 4144.839355, avg loss: 2.542846, ppl: 12.715812 +epoch: 1, batch: 10494, sum loss: 4533.021484, avg loss: 2.817291, ppl: 16.731464 +epoch: 1, batch: 10495, sum loss: 4271.291992, avg loss: 2.643126, ppl: 14.057081 +epoch: 1, batch: 10496, sum loss: 4294.187988, avg loss: 2.756218, ppl: 15.740204 +epoch: 1, batch: 10497, sum loss: 3929.372803, avg loss: 2.646042, ppl: 14.098129 +epoch: 1, batch: 10498, sum loss: 4047.974121, avg loss: 2.454805, ppl: 11.644167 +epoch: 1, batch: 10499, sum loss: 5941.456055, avg loss: 2.911051, ppl: 18.376106 +epoch: 1, batch: 10500, sum loss: 3888.201172, avg loss: 2.465568, ppl: 11.770169 +epoch: 1, batch: 10501, sum loss: 4401.866211, avg loss: 2.708841, ppl: 15.011861 +epoch: 1, batch: 10502, sum loss: 3788.364502, avg loss: 2.498921, ppl: 12.169361 +epoch: 1, batch: 10503, sum loss: 5522.425293, avg loss: 2.848079, ppl: 17.254608 +epoch: 1, batch: 10504, sum loss: 4689.809082, avg loss: 2.945860, ppl: 19.027018 +epoch: 1, batch: 10505, sum loss: 5281.118164, avg loss: 2.806120, ppl: 16.545599 +epoch: 1, batch: 10506, sum loss: 5809.176758, avg loss: 3.073638, ppl: 21.620424 +epoch: 1, batch: 10507, sum loss: 3813.773193, avg loss: 2.612174, ppl: 13.628641 +epoch: 1, batch: 10508, sum loss: 4824.654785, avg loss: 2.539292, ppl: 12.670698 +epoch: 1, batch: 10509, sum loss: 3315.941650, avg loss: 2.290015, ppl: 9.875086 +epoch: 1, batch: 10510, sum loss: 4826.503906, avg loss: 2.601889, ppl: 13.489194 +epoch: 1, batch: 10511, sum loss: 4306.617676, avg loss: 2.705162, ppl: 14.956737 +epoch: 1, batch: 10512, sum loss: 4877.780762, avg loss: 2.709878, ppl: 15.027445 +epoch: 1, batch: 10513, sum loss: 4547.735840, avg loss: 2.918958, ppl: 18.521973 +epoch: 1, batch: 10514, sum loss: 5444.671875, avg loss: 3.118369, ppl: 22.609470 +epoch: 1, batch: 10515, sum loss: 5539.972656, avg loss: 2.888411, ppl: 17.964746 +epoch: 1, batch: 10516, sum loss: 4110.553223, avg loss: 2.492755, ppl: 12.094553 +epoch: 1, batch: 10517, sum loss: 4094.918213, avg loss: 2.518400, ppl: 12.408727 +epoch: 1, batch: 10518, sum loss: 4025.372070, avg loss: 2.600370, ppl: 13.468717 +epoch: 1, batch: 10519, sum loss: 4766.743652, avg loss: 2.919010, ppl: 18.522943 +epoch: 1, batch: 10520, sum loss: 4181.529785, avg loss: 2.570086, ppl: 13.066945 +epoch: 1, batch: 10521, sum loss: 4849.358887, avg loss: 2.842531, ppl: 17.159147 +epoch: 1, batch: 10522, sum loss: 5288.724609, avg loss: 2.776233, ppl: 16.058422 +epoch: 1, batch: 10523, sum loss: 4980.544922, avg loss: 2.815458, ppl: 16.700825 +epoch: 1, batch: 10524, sum loss: 3554.240479, avg loss: 2.330650, ppl: 10.284620 +epoch: 1, batch: 10525, sum loss: 4069.816650, avg loss: 2.334949, ppl: 10.328936 +epoch: 1, batch: 10526, sum loss: 3806.695312, avg loss: 2.662025, ppl: 14.325265 +epoch: 1, batch: 10527, sum loss: 5240.730469, avg loss: 2.738104, ppl: 15.457644 +epoch: 1, batch: 10528, sum loss: 4006.034180, avg loss: 2.465252, ppl: 11.766443 +epoch: 1, batch: 10529, sum loss: 4526.229980, avg loss: 2.410133, ppl: 11.135444 +epoch: 1, batch: 10530, sum loss: 5921.466797, avg loss: 2.921296, ppl: 18.565331 +epoch: 1, batch: 10531, sum loss: 4516.780273, avg loss: 2.604833, ppl: 13.528964 +epoch: 1, batch: 10532, sum loss: 4590.042969, avg loss: 2.727298, ppl: 15.291517 +epoch: 1, batch: 10533, sum loss: 5174.086914, avg loss: 2.997733, ppl: 20.040052 +epoch: 1, batch: 10534, sum loss: 3695.908203, avg loss: 2.611949, ppl: 13.625584 +epoch: 1, batch: 10535, sum loss: 4619.182129, avg loss: 2.991699, ppl: 19.919487 +epoch: 1, batch: 10536, sum loss: 4763.975586, avg loss: 2.856101, ppl: 17.393570 +epoch: 1, batch: 10537, sum loss: 3638.444336, avg loss: 2.320436, ppl: 10.180117 +epoch: 1, batch: 10538, sum loss: 4764.475586, avg loss: 2.694839, ppl: 14.803139 +epoch: 1, batch: 10539, sum loss: 4921.558105, avg loss: 2.741815, ppl: 15.515121 +epoch: 1, batch: 10540, sum loss: 3951.659180, avg loss: 2.566012, ppl: 13.013826 +epoch: 1, batch: 10541, sum loss: 5677.141113, avg loss: 3.052227, ppl: 21.162411 +epoch: 1, batch: 10542, sum loss: 4707.793457, avg loss: 2.719696, ppl: 15.175705 +epoch: 1, batch: 10543, sum loss: 4607.089844, avg loss: 2.732556, ppl: 15.372133 +epoch: 1, batch: 10544, sum loss: 5008.966309, avg loss: 2.907119, ppl: 18.303993 +epoch: 1, batch: 10545, sum loss: 4928.770020, avg loss: 2.814832, ppl: 16.690367 +epoch: 1, batch: 10546, sum loss: 4885.475098, avg loss: 2.870432, ppl: 17.644638 +epoch: 1, batch: 10547, sum loss: 4855.075684, avg loss: 2.808025, ppl: 16.577152 +epoch: 1, batch: 10548, sum loss: 4957.101074, avg loss: 2.649440, ppl: 14.146109 +epoch: 1, batch: 10549, sum loss: 5229.314941, avg loss: 2.740731, ppl: 15.498310 +epoch: 1, batch: 10550, sum loss: 4510.959961, avg loss: 2.596983, ppl: 13.423185 +epoch: 1, batch: 10551, sum loss: 4423.845703, avg loss: 2.619210, ppl: 13.724876 +epoch: 1, batch: 10552, sum loss: 3869.521484, avg loss: 2.664960, ppl: 14.367370 +epoch: 1, batch: 10553, sum loss: 4647.661133, avg loss: 2.928583, ppl: 18.701115 +epoch: 1, batch: 10554, sum loss: 4432.001953, avg loss: 2.754507, ppl: 15.713293 +epoch: 1, batch: 10555, sum loss: 4324.940430, avg loss: 2.745994, ppl: 15.580091 +epoch: 1, batch: 10556, sum loss: 3737.920654, avg loss: 2.606639, ppl: 13.553423 +epoch: 1, batch: 10557, sum loss: 5268.855469, avg loss: 2.817570, ppl: 16.736128 +epoch: 1, batch: 10558, sum loss: 3331.967773, avg loss: 2.307457, ppl: 10.048835 +epoch: 1, batch: 10559, sum loss: 4789.633301, avg loss: 2.732249, ppl: 15.367417 +epoch: 1, batch: 10560, sum loss: 5364.388184, avg loss: 2.955586, ppl: 19.212976 +epoch: 1, batch: 10561, sum loss: 4925.014648, avg loss: 2.618296, ppl: 13.712337 +epoch: 1, batch: 10562, sum loss: 4191.587402, avg loss: 2.676620, ppl: 14.535882 +epoch: 1, batch: 10563, sum loss: 4093.146240, avg loss: 2.602127, ppl: 13.492410 +epoch: 1, batch: 10564, sum loss: 3997.157715, avg loss: 2.693502, ppl: 14.783363 +epoch: 1, batch: 10565, sum loss: 5193.019043, avg loss: 2.663087, ppl: 14.340485 +epoch: 1, batch: 10566, sum loss: 5100.254395, avg loss: 2.875002, ppl: 17.725466 +epoch: 1, batch: 10567, sum loss: 5382.354492, avg loss: 2.949235, ppl: 19.091352 +epoch: 1, batch: 10568, sum loss: 4360.369141, avg loss: 2.798697, ppl: 16.423225 +epoch: 1, batch: 10569, sum loss: 5608.610352, avg loss: 3.054799, ppl: 21.216911 +epoch: 1, batch: 10570, sum loss: 4516.061035, avg loss: 2.665915, ppl: 14.381109 +epoch: 1, batch: 10571, sum loss: 3428.964355, avg loss: 2.228047, ppl: 9.281722 +epoch: 1, batch: 10572, sum loss: 5161.365723, avg loss: 2.935930, ppl: 18.839020 +epoch: 1, batch: 10573, sum loss: 4632.297363, avg loss: 2.715297, ppl: 15.109100 +epoch: 1, batch: 10574, sum loss: 4079.594238, avg loss: 2.661184, ppl: 14.313220 +epoch: 1, batch: 10575, sum loss: 5263.837891, avg loss: 2.795453, ppl: 16.370041 +epoch: 1, batch: 10576, sum loss: 5265.676758, avg loss: 2.844774, ppl: 17.197672 +epoch: 1, batch: 10577, sum loss: 4461.667480, avg loss: 2.790286, ppl: 16.285677 +epoch: 1, batch: 10578, sum loss: 4997.360352, avg loss: 2.891991, ppl: 18.029169 +epoch: 1, batch: 10579, sum loss: 4026.510742, avg loss: 2.684340, ppl: 14.648537 +epoch: 1, batch: 10580, sum loss: 5090.946777, avg loss: 2.902478, ppl: 18.219240 +epoch: 1, batch: 10581, sum loss: 5137.908203, avg loss: 2.848064, ppl: 17.254353 +epoch: 1, batch: 10582, sum loss: 4842.711914, avg loss: 2.789581, ppl: 16.274193 +epoch: 1, batch: 10583, sum loss: 4323.303223, avg loss: 2.497576, ppl: 12.152993 +epoch: 1, batch: 10584, sum loss: 3986.380859, avg loss: 2.555372, ppl: 12.876095 +epoch: 1, batch: 10585, sum loss: 4639.425781, avg loss: 2.763208, ppl: 15.850605 +epoch: 1, batch: 10586, sum loss: 5159.017578, avg loss: 3.032932, ppl: 20.758007 +epoch: 1, batch: 10587, sum loss: 4995.657227, avg loss: 2.690176, ppl: 14.734269 +epoch: 1, batch: 10588, sum loss: 4711.734375, avg loss: 2.887092, ppl: 17.941067 +epoch: 1, batch: 10589, sum loss: 4433.416016, avg loss: 2.890102, ppl: 17.995138 +epoch: 1, batch: 10590, sum loss: 5138.010254, avg loss: 2.753489, ppl: 15.697301 +epoch: 1, batch: 10591, sum loss: 4541.790039, avg loss: 2.634449, ppl: 13.935636 +epoch: 1, batch: 10592, sum loss: 4566.614258, avg loss: 2.811954, ppl: 16.642414 +epoch: 1, batch: 10593, sum loss: 4078.704102, avg loss: 2.592946, ppl: 13.369100 +epoch: 1, batch: 10594, sum loss: 5062.569336, avg loss: 2.798546, ppl: 16.420755 +epoch: 1, batch: 10595, sum loss: 4710.179199, avg loss: 2.702340, ppl: 14.914597 +epoch: 1, batch: 10596, sum loss: 3684.866699, avg loss: 2.456578, ppl: 11.664824 +epoch: 1, batch: 10597, sum loss: 4763.187988, avg loss: 2.652109, ppl: 14.183923 +epoch: 1, batch: 10598, sum loss: 5551.035156, avg loss: 2.864311, ppl: 17.536974 +epoch: 1, batch: 10599, sum loss: 5045.020508, avg loss: 2.976413, ppl: 19.617329 +epoch: 1, batch: 10600, sum loss: 3408.604004, avg loss: 2.419165, ppl: 11.236477 +epoch: 1, batch: 10601, sum loss: 3935.910889, avg loss: 2.758172, ppl: 15.770984 +epoch: 1, batch: 10602, sum loss: 4038.062744, avg loss: 2.565478, ppl: 13.006875 +epoch: 1, batch: 10603, sum loss: 4000.891113, avg loss: 2.519453, ppl: 12.421801 +epoch: 1, batch: 10604, sum loss: 3517.187012, avg loss: 2.514072, ppl: 12.355140 +epoch: 1, batch: 10605, sum loss: 3962.607422, avg loss: 2.636465, ppl: 13.963758 +epoch: 1, batch: 10606, sum loss: 4542.993164, avg loss: 2.717101, ppl: 15.136383 +epoch: 1, batch: 10607, sum loss: 4993.301758, avg loss: 2.767906, ppl: 15.925247 +epoch: 1, batch: 10608, sum loss: 4662.418457, avg loss: 2.720197, ppl: 15.183320 +epoch: 1, batch: 10609, sum loss: 5264.823242, avg loss: 2.954446, ppl: 19.191093 +epoch: 1, batch: 10610, sum loss: 5384.726074, avg loss: 2.931261, ppl: 18.751263 +epoch: 1, batch: 10611, sum loss: 4142.916504, avg loss: 2.667686, ppl: 14.406597 +epoch: 1, batch: 10612, sum loss: 4243.170410, avg loss: 2.637147, ppl: 13.973287 +epoch: 1, batch: 10613, sum loss: 4504.142578, avg loss: 2.576741, ppl: 13.154195 +epoch: 1, batch: 10614, sum loss: 3452.354736, avg loss: 2.538496, ppl: 12.660619 +epoch: 1, batch: 10615, sum loss: 4628.152344, avg loss: 2.846342, ppl: 17.224661 +epoch: 1, batch: 10616, sum loss: 4859.073242, avg loss: 2.900939, ppl: 18.191227 +epoch: 1, batch: 10617, sum loss: 3330.261719, avg loss: 2.397597, ppl: 10.996715 +epoch: 1, batch: 10618, sum loss: 4028.771240, avg loss: 2.571009, ppl: 13.079017 +epoch: 1, batch: 10619, sum loss: 4147.211426, avg loss: 2.707057, ppl: 14.985109 +epoch: 1, batch: 10620, sum loss: 4409.136719, avg loss: 2.615146, ppl: 13.669217 +epoch: 1, batch: 10621, sum loss: 3869.878418, avg loss: 2.519452, ppl: 12.421789 +epoch: 1, batch: 10622, sum loss: 5190.000488, avg loss: 3.268262, ppl: 26.265659 +epoch: 1, batch: 10623, sum loss: 4409.588867, avg loss: 2.540086, ppl: 12.680758 +epoch: 1, batch: 10624, sum loss: 4529.493164, avg loss: 2.630368, ppl: 13.878870 +epoch: 1, batch: 10625, sum loss: 5157.208496, avg loss: 2.869899, ppl: 17.635237 +epoch: 1, batch: 10626, sum loss: 4799.500000, avg loss: 2.644353, ppl: 14.074331 +epoch: 1, batch: 10627, sum loss: 3109.670410, avg loss: 2.403146, ppl: 11.057905 +epoch: 1, batch: 10628, sum loss: 3885.324951, avg loss: 2.598880, ppl: 13.448662 +epoch: 1, batch: 10629, sum loss: 4773.771484, avg loss: 2.790048, ppl: 16.281799 +epoch: 1, batch: 10630, sum loss: 4341.408691, avg loss: 2.550769, ppl: 12.816955 +epoch: 1, batch: 10631, sum loss: 4745.749512, avg loss: 2.672156, ppl: 14.471140 +epoch: 1, batch: 10632, sum loss: 3774.789062, avg loss: 2.443229, ppl: 11.510149 +epoch: 1, batch: 10633, sum loss: 4442.102539, avg loss: 2.895764, ppl: 18.097328 +epoch: 1, batch: 10634, sum loss: 4797.248047, avg loss: 2.689040, ppl: 14.717546 +epoch: 1, batch: 10635, sum loss: 3757.080078, avg loss: 2.629167, ppl: 13.862223 +epoch: 1, batch: 10636, sum loss: 4480.312012, avg loss: 2.819580, ppl: 16.769798 +epoch: 1, batch: 10637, sum loss: 5398.224121, avg loss: 2.775437, ppl: 16.045631 +epoch: 1, batch: 10638, sum loss: 4512.366211, avg loss: 2.677962, ppl: 14.555400 +epoch: 1, batch: 10639, sum loss: 4216.648438, avg loss: 2.296649, ppl: 9.940820 +epoch: 1, batch: 10640, sum loss: 4808.535156, avg loss: 2.699908, ppl: 14.878356 +epoch: 1, batch: 10641, sum loss: 5039.845703, avg loss: 2.761559, ppl: 15.824498 +epoch: 1, batch: 10642, sum loss: 4477.971680, avg loss: 2.846771, ppl: 17.232058 +epoch: 1, batch: 10643, sum loss: 5160.704102, avg loss: 3.075509, ppl: 21.660906 +epoch: 1, batch: 10644, sum loss: 4934.927734, avg loss: 2.930480, ppl: 18.736618 +epoch: 1, batch: 10645, sum loss: 4793.209473, avg loss: 2.521415, ppl: 12.446193 +epoch: 1, batch: 10646, sum loss: 5289.333008, avg loss: 2.907825, ppl: 18.316912 +epoch: 1, batch: 10647, sum loss: 4975.848633, avg loss: 3.189647, ppl: 24.279848 +epoch: 1, batch: 10648, sum loss: 5648.552246, avg loss: 2.832774, ppl: 16.992540 +epoch: 1, batch: 10649, sum loss: 4292.769531, avg loss: 2.671294, ppl: 14.458670 +epoch: 1, batch: 10650, sum loss: 5561.774414, avg loss: 2.860995, ppl: 17.478914 +epoch: 1, batch: 10651, sum loss: 3455.412109, avg loss: 2.858075, ppl: 17.427940 +epoch: 1, batch: 10652, sum loss: 5483.870117, avg loss: 2.970677, ppl: 19.505123 +epoch: 1, batch: 10653, sum loss: 5473.250977, avg loss: 3.111570, ppl: 22.456264 +epoch: 1, batch: 10654, sum loss: 4420.884277, avg loss: 2.708875, ppl: 15.012380 +epoch: 1, batch: 10655, sum loss: 4829.629395, avg loss: 3.005370, ppl: 20.193684 +epoch: 1, batch: 10656, sum loss: 5368.018555, avg loss: 3.031066, ppl: 20.719316 +epoch: 1, batch: 10657, sum loss: 4368.012207, avg loss: 2.392121, ppl: 10.936662 +epoch: 1, batch: 10658, sum loss: 4077.974609, avg loss: 2.452180, ppl: 11.613633 +epoch: 1, batch: 10659, sum loss: 4466.812012, avg loss: 2.712090, ppl: 15.060717 +epoch: 1, batch: 10660, sum loss: 3603.964111, avg loss: 2.473551, ppl: 11.864504 +epoch: 1, batch: 10661, sum loss: 4968.953125, avg loss: 2.748315, ppl: 15.616294 +epoch: 1, batch: 10662, sum loss: 5287.235352, avg loss: 2.678437, ppl: 14.562318 +epoch: 1, batch: 10663, sum loss: 3733.673828, avg loss: 2.598242, ppl: 13.440090 +epoch: 1, batch: 10664, sum loss: 3961.930176, avg loss: 2.646580, ppl: 14.105711 +epoch: 1, batch: 10665, sum loss: 4696.284668, avg loss: 2.677471, ppl: 14.548257 +epoch: 1, batch: 10666, sum loss: 4584.997559, avg loss: 2.584553, ppl: 13.257365 +epoch: 1, batch: 10667, sum loss: 4296.178223, avg loss: 2.575646, ppl: 13.139808 +epoch: 1, batch: 10668, sum loss: 4719.049316, avg loss: 2.560526, ppl: 12.942622 +epoch: 1, batch: 10669, sum loss: 3314.536865, avg loss: 2.292211, ppl: 9.896793 +epoch: 1, batch: 10670, sum loss: 4708.312988, avg loss: 2.695085, ppl: 14.806771 +epoch: 1, batch: 10671, sum loss: 4567.662109, avg loss: 2.756585, ppl: 15.745984 +epoch: 1, batch: 10672, sum loss: 4991.459961, avg loss: 2.878581, ppl: 17.789019 +epoch: 1, batch: 10673, sum loss: 4719.381348, avg loss: 2.727966, ppl: 15.301733 +epoch: 1, batch: 10674, sum loss: 4845.694824, avg loss: 3.006014, ppl: 20.206697 +epoch: 1, batch: 10675, sum loss: 4587.317383, avg loss: 2.732172, ppl: 15.366230 +epoch: 1, batch: 10676, sum loss: 4199.166016, avg loss: 2.636011, ppl: 13.957421 +epoch: 1, batch: 10677, sum loss: 3916.632324, avg loss: 2.486751, ppl: 12.022148 +epoch: 1, batch: 10678, sum loss: 5660.129883, avg loss: 3.034922, ppl: 20.799358 +epoch: 1, batch: 10679, sum loss: 3750.900391, avg loss: 2.367993, ppl: 10.675941 +epoch: 1, batch: 10680, sum loss: 5228.358887, avg loss: 2.770725, ppl: 15.970216 +epoch: 1, batch: 10681, sum loss: 4401.720703, avg loss: 2.717112, ppl: 15.136539 +epoch: 1, batch: 10682, sum loss: 3863.199951, avg loss: 2.671646, ppl: 14.463756 +epoch: 1, batch: 10683, sum loss: 4573.579590, avg loss: 2.603062, ppl: 13.505023 +epoch: 1, batch: 10684, sum loss: 4516.418945, avg loss: 2.652037, ppl: 14.182899 +epoch: 1, batch: 10685, sum loss: 5193.660156, avg loss: 2.924358, ppl: 18.622274 +epoch: 1, batch: 10686, sum loss: 3965.302246, avg loss: 2.615635, ppl: 13.675897 +epoch: 1, batch: 10687, sum loss: 4667.949219, avg loss: 2.667399, ppl: 14.402466 +epoch: 1, batch: 10688, sum loss: 4280.113281, avg loss: 2.772094, ppl: 15.992090 +epoch: 1, batch: 10689, sum loss: 4886.850098, avg loss: 2.781360, ppl: 16.140961 +epoch: 1, batch: 10690, sum loss: 4238.603027, avg loss: 2.711838, ppl: 15.056929 +epoch: 1, batch: 10691, sum loss: 4188.287109, avg loss: 2.744618, ppl: 15.558672 +epoch: 1, batch: 10692, sum loss: 4606.032227, avg loss: 2.815423, ppl: 16.700243 +epoch: 1, batch: 10693, sum loss: 4698.250977, avg loss: 2.845700, ppl: 17.213608 +epoch: 1, batch: 10694, sum loss: 4761.859863, avg loss: 2.774977, ppl: 16.038254 +epoch: 1, batch: 10695, sum loss: 4822.410645, avg loss: 2.724526, ppl: 15.249183 +epoch: 1, batch: 10696, sum loss: 3747.634521, avg loss: 2.650378, ppl: 14.159389 +epoch: 1, batch: 10697, sum loss: 3610.396729, avg loss: 2.398935, ppl: 11.011441 +epoch: 1, batch: 10698, sum loss: 5284.094727, avg loss: 2.930724, ppl: 18.741188 +epoch: 1, batch: 10699, sum loss: 4874.871094, avg loss: 2.852470, ppl: 17.330534 +epoch: 1, batch: 10700, sum loss: 4436.124512, avg loss: 2.662740, ppl: 14.335511 +epoch: 1, batch: 10701, sum loss: 4871.037598, avg loss: 2.695649, ppl: 14.815125 +epoch: 1, batch: 10702, sum loss: 5285.476074, avg loss: 2.731512, ppl: 15.356089 +epoch: 1, batch: 10703, sum loss: 5187.962891, avg loss: 3.057138, ppl: 21.266603 +epoch: 1, batch: 10704, sum loss: 4256.145020, avg loss: 2.615947, ppl: 13.680159 +epoch: 1, batch: 10705, sum loss: 3614.360107, avg loss: 2.578003, ppl: 13.170809 +epoch: 1, batch: 10706, sum loss: 5014.692383, avg loss: 2.700427, ppl: 14.886081 +epoch: 1, batch: 10707, sum loss: 5342.217773, avg loss: 2.917650, ppl: 18.497776 +epoch: 1, batch: 10708, sum loss: 5144.071289, avg loss: 2.968304, ppl: 19.458893 +epoch: 1, batch: 10709, sum loss: 4393.393066, avg loss: 2.466813, ppl: 11.784824 +epoch: 1, batch: 10710, sum loss: 4650.785156, avg loss: 2.751944, ppl: 15.673068 +epoch: 1, batch: 10711, sum loss: 4825.404297, avg loss: 2.701794, ppl: 14.906452 +epoch: 1, batch: 10712, sum loss: 3856.550293, avg loss: 2.442400, ppl: 11.500614 +epoch: 1, batch: 10713, sum loss: 4139.728516, avg loss: 2.525765, ppl: 12.500451 +epoch: 1, batch: 10714, sum loss: 4498.287598, avg loss: 2.642942, ppl: 14.054494 +epoch: 1, batch: 10715, sum loss: 3211.154785, avg loss: 2.430851, ppl: 11.368558 +epoch: 1, batch: 10716, sum loss: 5259.951172, avg loss: 2.725363, ppl: 15.261961 +epoch: 1, batch: 10717, sum loss: 3683.301025, avg loss: 2.440889, ppl: 11.483243 +epoch: 1, batch: 10718, sum loss: 4402.309570, avg loss: 2.653592, ppl: 14.204976 +epoch: 1, batch: 10719, sum loss: 3808.644287, avg loss: 2.596213, ppl: 13.412842 +epoch: 1, batch: 10720, sum loss: 5046.217285, avg loss: 2.707198, ppl: 14.987225 +epoch: 1, batch: 10721, sum loss: 4159.100098, avg loss: 2.782007, ppl: 16.151400 +epoch: 1, batch: 10722, sum loss: 4037.948975, avg loss: 2.512725, ppl: 12.338509 +epoch: 1, batch: 10723, sum loss: 4499.514648, avg loss: 2.786077, ppl: 16.217279 +epoch: 1, batch: 10724, sum loss: 4354.287598, avg loss: 2.656673, ppl: 14.248807 +epoch: 1, batch: 10725, sum loss: 4765.750488, avg loss: 2.762754, ppl: 15.843415 +epoch: 1, batch: 10726, sum loss: 4764.726562, avg loss: 3.089965, ppl: 21.976316 +epoch: 1, batch: 10727, sum loss: 3751.076172, avg loss: 2.546556, ppl: 12.763066 +epoch: 1, batch: 10728, sum loss: 5181.094238, avg loss: 2.709777, ppl: 15.025929 +epoch: 1, batch: 10729, sum loss: 4973.130371, avg loss: 2.882974, ppl: 17.867338 +epoch: 1, batch: 10730, sum loss: 4733.289551, avg loss: 2.807408, ppl: 16.566919 +epoch: 1, batch: 10731, sum loss: 4960.856445, avg loss: 2.619248, ppl: 13.725404 +epoch: 1, batch: 10732, sum loss: 5083.831055, avg loss: 2.698424, ppl: 14.856305 +epoch: 1, batch: 10733, sum loss: 4631.895020, avg loss: 3.005772, ppl: 20.201813 +epoch: 1, batch: 10734, sum loss: 4166.734375, avg loss: 2.493557, ppl: 12.104259 +epoch: 1, batch: 10735, sum loss: 4048.821289, avg loss: 2.656707, ppl: 14.249286 +epoch: 1, batch: 10736, sum loss: 4947.930664, avg loss: 2.809728, ppl: 16.605396 +epoch: 1, batch: 10737, sum loss: 5639.513672, avg loss: 3.112314, ppl: 22.472998 +epoch: 1, batch: 10738, sum loss: 4536.720215, avg loss: 2.776451, ppl: 16.061911 +epoch: 1, batch: 10739, sum loss: 5250.436523, avg loss: 3.005401, ppl: 20.194321 +epoch: 1, batch: 10740, sum loss: 4926.820312, avg loss: 2.715998, ppl: 15.119695 +epoch: 1, batch: 10741, sum loss: 4930.232910, avg loss: 2.854796, ppl: 17.370895 +epoch: 1, batch: 10742, sum loss: 5075.017578, avg loss: 2.841555, ppl: 17.142406 +epoch: 1, batch: 10743, sum loss: 4217.216797, avg loss: 2.670815, ppl: 14.451742 +epoch: 1, batch: 10744, sum loss: 4592.862793, avg loss: 2.695342, ppl: 14.810584 +epoch: 1, batch: 10745, sum loss: 5085.507324, avg loss: 2.707938, ppl: 14.998313 +epoch: 1, batch: 10746, sum loss: 4834.991211, avg loss: 2.898676, ppl: 18.150097 +epoch: 1, batch: 10747, sum loss: 2803.803955, avg loss: 2.350213, ppl: 10.487802 +epoch: 1, batch: 10748, sum loss: 4014.868164, avg loss: 2.674796, ppl: 14.509384 +epoch: 1, batch: 10749, sum loss: 3985.971191, avg loss: 2.529170, ppl: 12.543085 +epoch: 1, batch: 10750, sum loss: 3863.798584, avg loss: 2.327590, ppl: 10.253197 +epoch: 1, batch: 10751, sum loss: 5332.374023, avg loss: 3.107444, ppl: 22.363810 +epoch: 1, batch: 10752, sum loss: 4538.131836, avg loss: 2.728883, ppl: 15.315763 +epoch: 1, batch: 10753, sum loss: 4998.544434, avg loss: 2.940320, ppl: 18.921909 +epoch: 1, batch: 10754, sum loss: 4454.393555, avg loss: 2.711134, ppl: 15.046331 +epoch: 1, batch: 10755, sum loss: 5763.392578, avg loss: 2.957103, ppl: 19.242138 +epoch: 1, batch: 10756, sum loss: 4125.404297, avg loss: 2.458525, ppl: 11.687556 +epoch: 1, batch: 10757, sum loss: 5237.812988, avg loss: 3.010237, ppl: 20.292217 +epoch: 1, batch: 10758, sum loss: 4503.045898, avg loss: 2.749112, ppl: 15.628750 +epoch: 1, batch: 10759, sum loss: 6368.261230, avg loss: 3.146374, ppl: 23.251606 +epoch: 1, batch: 10760, sum loss: 4536.071777, avg loss: 2.777754, ppl: 16.082851 +epoch: 1, batch: 10761, sum loss: 4476.957520, avg loss: 2.772110, ppl: 15.992342 +epoch: 1, batch: 10762, sum loss: 4815.744141, avg loss: 2.710042, ppl: 15.029903 +epoch: 1, batch: 10763, sum loss: 4753.280762, avg loss: 2.733341, ppl: 15.384207 +epoch: 1, batch: 10764, sum loss: 4296.020996, avg loss: 2.813373, ppl: 16.666044 +epoch: 1, batch: 10765, sum loss: 5074.148926, avg loss: 2.747238, ppl: 15.599493 +epoch: 1, batch: 10766, sum loss: 4389.368164, avg loss: 2.585023, ppl: 13.263590 +epoch: 1, batch: 10767, sum loss: 4767.359375, avg loss: 2.871903, ppl: 17.670616 +epoch: 1, batch: 10768, sum loss: 5303.961914, avg loss: 2.961453, ppl: 19.326027 +epoch: 1, batch: 10769, sum loss: 5048.730957, avg loss: 2.893255, ppl: 18.051981 +epoch: 1, batch: 10770, sum loss: 4329.153320, avg loss: 2.909377, ppl: 18.345371 +epoch: 1, batch: 10771, sum loss: 5204.727051, avg loss: 2.875540, ppl: 17.734999 +epoch: 1, batch: 10772, sum loss: 4933.060547, avg loss: 2.707498, ppl: 14.991714 +epoch: 1, batch: 10773, sum loss: 4422.352051, avg loss: 2.770897, ppl: 15.972958 +epoch: 1, batch: 10774, sum loss: 5705.126953, avg loss: 2.936247, ppl: 18.844982 +epoch: 1, batch: 10775, sum loss: 5372.322754, avg loss: 2.905529, ppl: 18.274904 +epoch: 1, batch: 10776, sum loss: 4304.231934, avg loss: 2.557476, ppl: 12.903206 +epoch: 1, batch: 10777, sum loss: 4177.215820, avg loss: 2.728423, ppl: 15.308732 +epoch: 1, batch: 10778, sum loss: 3794.232910, avg loss: 2.556761, ppl: 12.893980 +epoch: 1, batch: 10779, sum loss: 4996.879395, avg loss: 2.627171, ppl: 13.834580 +epoch: 1, batch: 10780, sum loss: 4913.509766, avg loss: 2.626141, ppl: 13.820335 +epoch: 1, batch: 10781, sum loss: 3487.104248, avg loss: 2.602317, ppl: 13.494965 +epoch: 1, batch: 10782, sum loss: 4239.709473, avg loss: 2.633360, ppl: 13.920463 +epoch: 1, batch: 10783, sum loss: 4387.292969, avg loss: 2.728416, ppl: 15.308619 +epoch: 1, batch: 10784, sum loss: 4558.718750, avg loss: 2.739615, ppl: 15.481020 +epoch: 1, batch: 10785, sum loss: 4141.384766, avg loss: 2.557989, ppl: 12.909834 +epoch: 1, batch: 10786, sum loss: 5435.559082, avg loss: 2.810527, ppl: 16.618671 +epoch: 1, batch: 10787, sum loss: 4766.416504, avg loss: 2.890489, ppl: 18.002111 +epoch: 1, batch: 10788, sum loss: 5076.632324, avg loss: 2.748583, ppl: 15.620480 +epoch: 1, batch: 10789, sum loss: 4683.622070, avg loss: 2.542683, ppl: 12.713737 +epoch: 1, batch: 10790, sum loss: 4251.076172, avg loss: 2.441744, ppl: 11.493066 +epoch: 1, batch: 10791, sum loss: 4858.866699, avg loss: 2.658023, ppl: 14.268059 +epoch: 1, batch: 10792, sum loss: 4436.618652, avg loss: 2.788572, ppl: 16.257792 +epoch: 1, batch: 10793, sum loss: 3631.140869, avg loss: 2.407918, ppl: 11.110806 +epoch: 1, batch: 10794, sum loss: 4992.030273, avg loss: 2.987451, ppl: 19.835060 +epoch: 1, batch: 10795, sum loss: 3863.734619, avg loss: 2.593111, ppl: 13.371299 +epoch: 1, batch: 10796, sum loss: 4917.931152, avg loss: 2.783210, ppl: 16.170839 +epoch: 1, batch: 10797, sum loss: 4628.861816, avg loss: 2.918576, ppl: 18.514908 +epoch: 1, batch: 10798, sum loss: 5137.846680, avg loss: 2.746043, ppl: 15.580859 +epoch: 1, batch: 10799, sum loss: 4992.714355, avg loss: 2.839997, ppl: 17.115711 +epoch: 1, batch: 10800, sum loss: 4466.238281, avg loss: 2.775785, ppl: 16.051226 +epoch: 1, batch: 10801, sum loss: 4698.310059, avg loss: 2.712650, ppl: 15.069157 +epoch: 1, batch: 10802, sum loss: 3892.844727, avg loss: 2.758926, ppl: 15.782886 +epoch: 1, batch: 10803, sum loss: 5442.671387, avg loss: 2.733637, ppl: 15.388756 +epoch: 1, batch: 10804, sum loss: 5392.956543, avg loss: 2.823538, ppl: 16.836306 +epoch: 1, batch: 10805, sum loss: 5304.011719, avg loss: 2.790117, ppl: 16.282917 +epoch: 1, batch: 10806, sum loss: 5457.855957, avg loss: 2.959792, ppl: 19.293951 +epoch: 1, batch: 10807, sum loss: 4982.499512, avg loss: 2.769594, ppl: 15.952155 +epoch: 1, batch: 10808, sum loss: 5499.721680, avg loss: 2.808847, ppl: 16.590773 +epoch: 1, batch: 10809, sum loss: 4678.014648, avg loss: 2.729297, ppl: 15.322107 +epoch: 1, batch: 10810, sum loss: 4201.590332, avg loss: 2.402282, ppl: 11.048355 +epoch: 1, batch: 10811, sum loss: 5042.442871, avg loss: 2.940200, ppl: 18.919626 +epoch: 1, batch: 10812, sum loss: 4807.987305, avg loss: 2.821589, ppl: 16.803526 +epoch: 1, batch: 10813, sum loss: 4572.014648, avg loss: 2.651981, ppl: 14.182104 +epoch: 1, batch: 10814, sum loss: 5208.740234, avg loss: 2.724236, ppl: 15.244770 +epoch: 1, batch: 10815, sum loss: 5595.801758, avg loss: 2.960742, ppl: 19.312292 +epoch: 1, batch: 10816, sum loss: 4883.547363, avg loss: 2.801806, ppl: 16.474369 +epoch: 1, batch: 10817, sum loss: 4308.245605, avg loss: 2.772359, ppl: 15.996324 +epoch: 1, batch: 10818, sum loss: 4694.384766, avg loss: 2.753305, ppl: 15.694416 +epoch: 1, batch: 10819, sum loss: 5004.038086, avg loss: 2.811257, ppl: 16.630816 +epoch: 1, batch: 10820, sum loss: 4659.492676, avg loss: 2.622112, ppl: 13.764761 +epoch: 1, batch: 10821, sum loss: 3920.875977, avg loss: 2.280905, ppl: 9.785535 +epoch: 1, batch: 10822, sum loss: 4161.378418, avg loss: 2.517470, ppl: 12.397194 +epoch: 1, batch: 10823, sum loss: 4944.052734, avg loss: 2.826788, ppl: 16.891123 +epoch: 1, batch: 10824, sum loss: 4463.883789, avg loss: 2.622728, ppl: 13.773251 +epoch: 1, batch: 10825, sum loss: 5253.058105, avg loss: 2.819677, ppl: 16.771431 +epoch: 1, batch: 10826, sum loss: 4467.228516, avg loss: 2.594209, ppl: 13.385998 +epoch: 1, batch: 10827, sum loss: 5508.661621, avg loss: 2.870589, ppl: 17.647419 +epoch: 1, batch: 10828, sum loss: 4772.017090, avg loss: 2.756798, ppl: 15.749330 +epoch: 1, batch: 10829, sum loss: 4910.259766, avg loss: 2.740100, ppl: 15.488540 +epoch: 1, batch: 10830, sum loss: 5465.008789, avg loss: 2.740727, ppl: 15.498243 +epoch: 1, batch: 10831, sum loss: 5574.541992, avg loss: 2.960458, ppl: 19.306808 +epoch: 1, batch: 10832, sum loss: 4374.477539, avg loss: 2.701963, ppl: 14.908965 +epoch: 1, batch: 10833, sum loss: 4530.933594, avg loss: 2.595037, ppl: 13.397077 +epoch: 1, batch: 10834, sum loss: 5093.950684, avg loss: 2.715326, ppl: 15.109529 +epoch: 1, batch: 10835, sum loss: 5118.663574, avg loss: 2.842123, ppl: 17.152142 +epoch: 1, batch: 10836, sum loss: 4715.442871, avg loss: 2.747927, ppl: 15.610241 +epoch: 1, batch: 10837, sum loss: 4985.972656, avg loss: 2.807417, ppl: 16.567072 +epoch: 1, batch: 10838, sum loss: 5919.565918, avg loss: 2.961264, ppl: 19.322372 +epoch: 1, batch: 10839, sum loss: 5100.966797, avg loss: 2.730710, ppl: 15.343781 +epoch: 1, batch: 10840, sum loss: 5027.810547, avg loss: 2.847005, ppl: 17.236076 +epoch: 1, batch: 10841, sum loss: 6822.606934, avg loss: 3.245769, ppl: 25.681458 +epoch: 1, batch: 10842, sum loss: 4475.641602, avg loss: 2.999760, ppl: 20.080715 +epoch: 1, batch: 10843, sum loss: 5398.528320, avg loss: 2.807347, ppl: 16.565908 +epoch: 1, batch: 10844, sum loss: 4103.916016, avg loss: 2.618964, ppl: 13.721497 +epoch: 1, batch: 10845, sum loss: 4798.131836, avg loss: 2.759133, ppl: 15.786148 +epoch: 1, batch: 10846, sum loss: 4748.726562, avg loss: 2.660351, ppl: 14.301309 +epoch: 1, batch: 10847, sum loss: 4665.828125, avg loss: 2.695452, ppl: 14.812216 +epoch: 1, batch: 10848, sum loss: 4554.112793, avg loss: 2.660113, ppl: 14.297899 +epoch: 1, batch: 10849, sum loss: 4371.980469, avg loss: 2.537423, ppl: 12.647042 +epoch: 1, batch: 10850, sum loss: 5175.045898, avg loss: 2.857563, ppl: 17.419022 +epoch: 1, batch: 10851, sum loss: 5037.932617, avg loss: 2.812916, ppl: 16.658421 +epoch: 1, batch: 10852, sum loss: 5087.789062, avg loss: 2.971839, ppl: 19.527807 +epoch: 1, batch: 10853, sum loss: 5468.375488, avg loss: 2.885686, ppl: 17.915857 +epoch: 1, batch: 10854, sum loss: 4071.674316, avg loss: 2.740023, ppl: 15.487344 +epoch: 1, batch: 10855, sum loss: 5162.282227, avg loss: 2.747356, ppl: 15.601330 +epoch: 1, batch: 10856, sum loss: 4432.560547, avg loss: 2.697846, ppl: 14.847711 +epoch: 1, batch: 10857, sum loss: 4710.778320, avg loss: 2.782504, ppl: 16.159428 +epoch: 1, batch: 10858, sum loss: 4466.617188, avg loss: 2.703763, ppl: 14.935837 +epoch: 1, batch: 10859, sum loss: 4697.224121, avg loss: 2.910300, ppl: 18.362307 +epoch: 1, batch: 10860, sum loss: 4506.468262, avg loss: 2.943480, ppl: 18.981798 +epoch: 1, batch: 10861, sum loss: 4305.454102, avg loss: 2.468724, ppl: 11.807365 +epoch: 1, batch: 10862, sum loss: 4207.041016, avg loss: 2.730072, ppl: 15.333991 +epoch: 1, batch: 10863, sum loss: 5377.252441, avg loss: 2.799195, ppl: 16.431408 +epoch: 1, batch: 10864, sum loss: 5123.298340, avg loss: 2.971751, ppl: 19.526079 +epoch: 1, batch: 10865, sum loss: 4886.342773, avg loss: 2.590850, ppl: 13.341105 +epoch: 1, batch: 10866, sum loss: 4000.998047, avg loss: 2.606513, ppl: 13.551717 +epoch: 1, batch: 10867, sum loss: 5134.300781, avg loss: 2.726660, ppl: 15.281761 +epoch: 1, batch: 10868, sum loss: 4594.455566, avg loss: 2.746238, ppl: 15.583891 +epoch: 1, batch: 10869, sum loss: 5005.503906, avg loss: 2.910177, ppl: 18.360043 +epoch: 1, batch: 10870, sum loss: 5681.366211, avg loss: 2.704125, ppl: 14.941236 +epoch: 1, batch: 10871, sum loss: 4446.421875, avg loss: 2.648256, ppl: 14.129376 +epoch: 1, batch: 10872, sum loss: 5769.441895, avg loss: 2.806149, ppl: 16.546072 +epoch: 1, batch: 10873, sum loss: 4360.579102, avg loss: 2.831545, ppl: 16.971661 +epoch: 1, batch: 10874, sum loss: 4383.375488, avg loss: 2.640588, ppl: 14.021443 +epoch: 1, batch: 10875, sum loss: 4527.314941, avg loss: 2.811997, ppl: 16.643116 +epoch: 1, batch: 10876, sum loss: 4492.556641, avg loss: 2.911573, ppl: 18.385695 +epoch: 1, batch: 10877, sum loss: 6131.232422, avg loss: 3.095019, ppl: 22.087650 +epoch: 1, batch: 10878, sum loss: 4582.594238, avg loss: 2.962246, ppl: 19.341362 +epoch: 1, batch: 10879, sum loss: 5097.407227, avg loss: 3.007320, ppl: 20.233101 +epoch: 1, batch: 10880, sum loss: 4494.049805, avg loss: 2.748654, ppl: 15.621596 +epoch: 1, batch: 10881, sum loss: 5160.352051, avg loss: 2.789380, ppl: 16.270922 +epoch: 1, batch: 10882, sum loss: 4838.648438, avg loss: 2.878434, ppl: 17.786406 +epoch: 1, batch: 10883, sum loss: 5312.928711, avg loss: 2.870302, ppl: 17.642345 +epoch: 1, batch: 10884, sum loss: 4823.231934, avg loss: 3.046893, ppl: 21.049843 +epoch: 1, batch: 10885, sum loss: 5618.081543, avg loss: 2.989932, ppl: 19.884327 +epoch: 1, batch: 10886, sum loss: 5830.539062, avg loss: 3.126294, ppl: 22.789373 +epoch: 1, batch: 10887, sum loss: 5268.663086, avg loss: 2.941744, ppl: 18.948856 +epoch: 1, batch: 10888, sum loss: 4683.922852, avg loss: 2.776481, ppl: 16.062393 +epoch: 1, batch: 10889, sum loss: 4251.459961, avg loss: 2.457491, ppl: 11.675483 +epoch: 1, batch: 10890, sum loss: 4828.078613, avg loss: 2.751042, ppl: 15.658938 +epoch: 1, batch: 10891, sum loss: 4916.808594, avg loss: 2.776289, ppl: 16.059322 +epoch: 1, batch: 10892, sum loss: 4568.192871, avg loss: 2.889432, ppl: 17.983101 +epoch: 1, batch: 10893, sum loss: 4978.536133, avg loss: 3.054317, ppl: 21.206686 +epoch: 1, batch: 10894, sum loss: 3834.831543, avg loss: 2.646537, ppl: 14.105105 +epoch: 1, batch: 10895, sum loss: 5281.831543, avg loss: 2.950744, ppl: 19.120167 +epoch: 1, batch: 10896, sum loss: 4733.541992, avg loss: 2.737734, ppl: 15.451933 +epoch: 1, batch: 10897, sum loss: 3480.996338, avg loss: 2.495338, ppl: 12.125831 +epoch: 1, batch: 10898, sum loss: 4387.264648, avg loss: 2.714891, ppl: 15.102970 +epoch: 1, batch: 10899, sum loss: 4544.508789, avg loss: 2.657608, ppl: 14.262127 +epoch: 1, batch: 10900, sum loss: 5210.151855, avg loss: 2.724975, ppl: 15.256027 +epoch: 1, batch: 10901, sum loss: 4503.484375, avg loss: 2.582273, ppl: 13.227169 +epoch: 1, batch: 10902, sum loss: 4847.097168, avg loss: 2.948356, ppl: 19.074568 +epoch: 1, batch: 10903, sum loss: 5404.670898, avg loss: 2.923024, ppl: 18.597435 +epoch: 1, batch: 10904, sum loss: 5168.082031, avg loss: 2.853718, ppl: 17.352169 +epoch: 1, batch: 10905, sum loss: 4523.570801, avg loss: 2.713600, ppl: 15.083473 +epoch: 1, batch: 10906, sum loss: 4046.854492, avg loss: 2.756713, ppl: 15.747993 +epoch: 1, batch: 10907, sum loss: 4282.884277, avg loss: 2.642125, ppl: 14.043009 +epoch: 1, batch: 10908, sum loss: 4974.890137, avg loss: 2.928128, ppl: 18.692614 +epoch: 1, batch: 10909, sum loss: 5180.076172, avg loss: 3.148982, ppl: 23.312326 +epoch: 1, batch: 10910, sum loss: 4565.388184, avg loss: 2.873120, ppl: 17.692137 +epoch: 1, batch: 10911, sum loss: 4835.981934, avg loss: 2.674769, ppl: 14.508993 +epoch: 1, batch: 10912, sum loss: 4576.914062, avg loss: 2.498316, ppl: 12.161991 +epoch: 1, batch: 10913, sum loss: 5734.017578, avg loss: 2.988024, ppl: 19.846422 +epoch: 1, batch: 10914, sum loss: 6403.234375, avg loss: 3.052066, ppl: 21.159016 +epoch: 1, batch: 10915, sum loss: 3923.181396, avg loss: 2.432227, ppl: 11.384203 +epoch: 1, batch: 10916, sum loss: 4436.383301, avg loss: 2.804288, ppl: 16.515306 +epoch: 1, batch: 10917, sum loss: 4308.016602, avg loss: 2.670810, ppl: 14.451670 +epoch: 1, batch: 10918, sum loss: 4131.829102, avg loss: 2.474149, ppl: 11.871603 +epoch: 1, batch: 10919, sum loss: 5605.467773, avg loss: 3.000786, ppl: 20.101326 +epoch: 1, batch: 10920, sum loss: 3481.323975, avg loss: 2.539259, ppl: 12.670279 +epoch: 1, batch: 10921, sum loss: 4382.454590, avg loss: 2.668974, ppl: 14.425157 +epoch: 1, batch: 10922, sum loss: 4874.099121, avg loss: 2.815771, ppl: 16.706045 +epoch: 1, batch: 10923, sum loss: 5824.331055, avg loss: 3.050986, ppl: 21.136166 +epoch: 1, batch: 10924, sum loss: 4749.764160, avg loss: 2.825559, ppl: 16.870367 +epoch: 1, batch: 10925, sum loss: 4851.011719, avg loss: 2.751566, ppl: 15.667154 +epoch: 1, batch: 10926, sum loss: 4155.345703, avg loss: 2.665392, ppl: 14.373582 +epoch: 1, batch: 10927, sum loss: 3923.302002, avg loss: 2.620776, ppl: 13.746389 +epoch: 1, batch: 10928, sum loss: 4076.025391, avg loss: 2.606154, ppl: 13.546852 +epoch: 1, batch: 10929, sum loss: 4337.840820, avg loss: 3.024994, ppl: 20.593874 +epoch: 1, batch: 10930, sum loss: 3871.214600, avg loss: 2.626333, ppl: 13.822985 +epoch: 1, batch: 10931, sum loss: 3625.505371, avg loss: 2.441418, ppl: 11.489318 +epoch: 1, batch: 10932, sum loss: 4784.206543, avg loss: 2.846048, ppl: 17.219593 +epoch: 1, batch: 10933, sum loss: 4741.411621, avg loss: 2.742285, ppl: 15.522421 +epoch: 1, batch: 10934, sum loss: 4194.357910, avg loss: 2.730702, ppl: 15.343654 +epoch: 1, batch: 10935, sum loss: 4771.992676, avg loss: 2.725296, ppl: 15.260927 +epoch: 1, batch: 10936, sum loss: 3543.979980, avg loss: 2.596322, ppl: 13.414313 +epoch: 1, batch: 10937, sum loss: 4836.727051, avg loss: 2.607400, ppl: 13.563739 +epoch: 1, batch: 10938, sum loss: 4975.021484, avg loss: 2.813926, ppl: 16.675261 +epoch: 1, batch: 10939, sum loss: 4441.810547, avg loss: 2.634526, ppl: 13.936702 +epoch: 1, batch: 10940, sum loss: 4424.594238, avg loss: 2.663814, ppl: 14.350913 +epoch: 1, batch: 10941, sum loss: 4275.987793, avg loss: 2.400892, ppl: 11.033009 +epoch: 1, batch: 10942, sum loss: 5443.141113, avg loss: 2.827606, ppl: 16.904938 +epoch: 1, batch: 10943, sum loss: 4934.458008, avg loss: 2.821302, ppl: 16.798716 +epoch: 1, batch: 10944, sum loss: 4148.892090, avg loss: 2.576952, ppl: 13.156968 +epoch: 1, batch: 10945, sum loss: 4466.339844, avg loss: 2.517666, ppl: 12.399624 +epoch: 1, batch: 10946, sum loss: 4655.848633, avg loss: 2.904459, ppl: 18.255373 +epoch: 1, batch: 10947, sum loss: 4723.160645, avg loss: 2.985563, ppl: 19.797642 +epoch: 1, batch: 10948, sum loss: 4317.891602, avg loss: 2.760800, ppl: 15.812490 +epoch: 1, batch: 10949, sum loss: 4818.719727, avg loss: 2.809749, ppl: 16.605747 +epoch: 1, batch: 10950, sum loss: 4267.943848, avg loss: 2.339881, ppl: 10.380006 +epoch: 1, batch: 10951, sum loss: 4693.024414, avg loss: 2.666491, ppl: 14.389389 +epoch: 1, batch: 10952, sum loss: 4579.203125, avg loss: 2.773594, ppl: 16.016090 +epoch: 1, batch: 10953, sum loss: 4372.037598, avg loss: 2.494032, ppl: 12.110004 +epoch: 1, batch: 10954, sum loss: 3967.685303, avg loss: 2.662876, ppl: 14.337466 +epoch: 1, batch: 10955, sum loss: 4803.337402, avg loss: 2.636299, ppl: 13.961442 +epoch: 1, batch: 10956, sum loss: 4459.125000, avg loss: 2.646365, ppl: 14.102681 +epoch: 1, batch: 10957, sum loss: 4699.289551, avg loss: 2.848054, ppl: 17.254179 +epoch: 1, batch: 10958, sum loss: 3695.983154, avg loss: 2.772681, ppl: 16.001469 +epoch: 1, batch: 10959, sum loss: 4589.195312, avg loss: 2.637469, ppl: 13.977776 +epoch: 1, batch: 10960, sum loss: 3931.529785, avg loss: 2.504159, ppl: 12.233269 +epoch: 1, batch: 10961, sum loss: 5133.742188, avg loss: 3.059441, ppl: 21.315639 +epoch: 1, batch: 10962, sum loss: 4766.051270, avg loss: 2.671554, ppl: 14.462420 +epoch: 1, batch: 10963, sum loss: 4598.792969, avg loss: 2.812717, ppl: 16.655115 +epoch: 1, batch: 10964, sum loss: 4412.114746, avg loss: 2.563692, ppl: 12.983669 +epoch: 1, batch: 10965, sum loss: 5728.534668, avg loss: 2.965080, ppl: 19.396255 +epoch: 1, batch: 10966, sum loss: 4404.389648, avg loss: 2.740753, ppl: 15.498651 +epoch: 1, batch: 10967, sum loss: 6319.119141, avg loss: 3.035120, ppl: 20.803469 +epoch: 1, batch: 10968, sum loss: 4357.307617, avg loss: 2.691357, ppl: 14.751686 +epoch: 1, batch: 10969, sum loss: 5080.876953, avg loss: 2.828996, ppl: 16.928459 +epoch: 1, batch: 10970, sum loss: 4275.102539, avg loss: 2.754576, ppl: 15.714383 +epoch: 1, batch: 10971, sum loss: 5000.367188, avg loss: 2.796626, ppl: 16.389254 +epoch: 1, batch: 10972, sum loss: 4157.096191, avg loss: 2.461277, ppl: 11.719766 +epoch: 1, batch: 10973, sum loss: 3971.660645, avg loss: 2.625024, ppl: 13.804900 +epoch: 1, batch: 10974, sum loss: 5400.308594, avg loss: 2.941344, ppl: 18.941296 +epoch: 1, batch: 10975, sum loss: 5121.586914, avg loss: 2.623764, ppl: 13.787523 +epoch: 1, batch: 10976, sum loss: 6248.797363, avg loss: 3.140099, ppl: 23.106155 +epoch: 1, batch: 10977, sum loss: 3752.242432, avg loss: 2.661165, ppl: 14.312954 +epoch: 1, batch: 10978, sum loss: 4510.479492, avg loss: 2.981150, ppl: 19.710468 +epoch: 1, batch: 10979, sum loss: 4466.144043, avg loss: 2.877670, ppl: 17.772820 +epoch: 1, batch: 10980, sum loss: 4297.719238, avg loss: 2.652913, ppl: 14.195334 +epoch: 1, batch: 10981, sum loss: 4979.204102, avg loss: 2.784790, ppl: 16.196413 +epoch: 1, batch: 10982, sum loss: 4195.596680, avg loss: 2.622248, ppl: 13.766635 +epoch: 1, batch: 10983, sum loss: 5258.246094, avg loss: 2.950756, ppl: 19.120396 +epoch: 1, batch: 10984, sum loss: 3458.577637, avg loss: 2.530050, ppl: 12.554129 +epoch: 1, batch: 10985, sum loss: 5019.590332, avg loss: 2.758017, ppl: 15.768540 +epoch: 1, batch: 10986, sum loss: 4082.399170, avg loss: 2.790430, ppl: 16.288027 +epoch: 1, batch: 10987, sum loss: 5061.831055, avg loss: 2.832586, ppl: 16.989340 +epoch: 1, batch: 10988, sum loss: 4688.851074, avg loss: 2.794309, ppl: 16.351332 +epoch: 1, batch: 10989, sum loss: 5102.182617, avg loss: 2.945833, ppl: 19.026505 +epoch: 1, batch: 10990, sum loss: 5538.447754, avg loss: 2.840230, ppl: 17.119694 +epoch: 1, batch: 10991, sum loss: 4596.831055, avg loss: 2.732955, ppl: 15.378270 +epoch: 1, batch: 10992, sum loss: 4784.555664, avg loss: 2.783337, ppl: 16.172894 +epoch: 1, batch: 10993, sum loss: 5328.119141, avg loss: 2.680140, ppl: 14.587139 +epoch: 1, batch: 10994, sum loss: 5668.864258, avg loss: 3.047776, ppl: 21.068445 +epoch: 1, batch: 10995, sum loss: 5270.247559, avg loss: 2.810799, ppl: 16.623188 +epoch: 1, batch: 10996, sum loss: 4521.578613, avg loss: 2.545934, ppl: 12.755136 +epoch: 1, batch: 10997, sum loss: 4222.030273, avg loss: 2.722134, ppl: 15.212753 +epoch: 1, batch: 10998, sum loss: 3967.002197, avg loss: 2.674985, ppl: 14.512128 +epoch: 1, batch: 10999, sum loss: 4785.079102, avg loss: 2.900048, ppl: 18.175018 +epoch: 1, batch: 11000, sum loss: 4562.980469, avg loss: 2.837674, ppl: 17.076008 +epoch: 1, batch: 11001, sum loss: 5594.733398, avg loss: 2.991836, ppl: 19.922228 +epoch: 1, batch: 11002, sum loss: 4502.826660, avg loss: 2.538234, ppl: 12.657292 +epoch: 1, batch: 11003, sum loss: 5267.982422, avg loss: 2.912097, ppl: 18.395329 +epoch: 1, batch: 11004, sum loss: 4406.733887, avg loss: 2.534062, ppl: 12.604604 +epoch: 1, batch: 11005, sum loss: 4649.537598, avg loss: 2.714266, ppl: 15.093525 +epoch: 1, batch: 11006, sum loss: 6006.207520, avg loss: 2.884826, ppl: 17.900452 +epoch: 1, batch: 11007, sum loss: 4620.702637, avg loss: 2.680222, ppl: 14.588332 +epoch: 1, batch: 11008, sum loss: 4889.331543, avg loss: 2.701288, ppl: 14.898912 +epoch: 1, batch: 11009, sum loss: 4241.773926, avg loss: 2.765172, ppl: 15.881771 +epoch: 1, batch: 11010, sum loss: 4928.076660, avg loss: 2.816044, ppl: 16.710609 +epoch: 1, batch: 11011, sum loss: 4374.682129, avg loss: 2.656152, ppl: 14.241380 +epoch: 1, batch: 11012, sum loss: 4529.955078, avg loss: 2.635227, ppl: 13.946478 +epoch: 1, batch: 11013, sum loss: 4898.987793, avg loss: 2.880063, ppl: 17.815401 +epoch: 1, batch: 11014, sum loss: 3876.120605, avg loss: 2.421062, ppl: 11.257812 +epoch: 1, batch: 11015, sum loss: 4396.808105, avg loss: 2.829349, ppl: 16.934429 +epoch: 1, batch: 11016, sum loss: 4566.349609, avg loss: 2.426328, ppl: 11.317251 +epoch: 1, batch: 11017, sum loss: 4994.205566, avg loss: 2.890165, ppl: 17.996284 +epoch: 1, batch: 11018, sum loss: 4443.730469, avg loss: 2.609354, ppl: 13.590272 +epoch: 1, batch: 11019, sum loss: 4352.619141, avg loss: 2.592388, ppl: 13.361637 +epoch: 1, batch: 11020, sum loss: 4188.985352, avg loss: 2.654617, ppl: 14.219537 +epoch: 1, batch: 11021, sum loss: 4909.582520, avg loss: 2.810293, ppl: 16.614788 +epoch: 1, batch: 11022, sum loss: 5544.657227, avg loss: 2.944587, ppl: 19.002808 +epoch: 1, batch: 11023, sum loss: 4472.660645, avg loss: 2.624801, ppl: 13.801826 +epoch: 1, batch: 11024, sum loss: 4495.100098, avg loss: 2.735910, ppl: 15.423772 +epoch: 1, batch: 11025, sum loss: 5526.077637, avg loss: 2.951965, ppl: 19.143526 +epoch: 1, batch: 11026, sum loss: 5012.709473, avg loss: 2.824062, ppl: 16.845131 +epoch: 1, batch: 11027, sum loss: 4081.496582, avg loss: 2.520998, ppl: 12.441012 +epoch: 1, batch: 11028, sum loss: 5051.243164, avg loss: 2.973068, ppl: 19.551823 +epoch: 1, batch: 11029, sum loss: 4692.660645, avg loss: 2.581222, ppl: 13.213269 +epoch: 1, batch: 11030, sum loss: 5177.023438, avg loss: 2.807497, ppl: 16.568388 +epoch: 1, batch: 11031, sum loss: 4521.687012, avg loss: 2.618232, ppl: 13.711464 +epoch: 1, batch: 11032, sum loss: 4486.801270, avg loss: 2.807760, ppl: 16.572762 +epoch: 1, batch: 11033, sum loss: 5215.395020, avg loss: 3.089689, ppl: 21.970245 +epoch: 1, batch: 11034, sum loss: 3815.030518, avg loss: 2.416106, ppl: 11.202147 +epoch: 1, batch: 11035, sum loss: 4746.324219, avg loss: 2.815139, ppl: 16.695498 +epoch: 1, batch: 11036, sum loss: 3818.375000, avg loss: 2.640647, ppl: 14.022269 +epoch: 1, batch: 11037, sum loss: 5594.555664, avg loss: 2.921439, ppl: 18.567986 +epoch: 1, batch: 11038, sum loss: 4575.244141, avg loss: 2.653854, ppl: 14.208695 +epoch: 1, batch: 11039, sum loss: 3792.156250, avg loss: 2.631614, ppl: 13.896187 +epoch: 1, batch: 11040, sum loss: 3940.445801, avg loss: 2.682400, ppl: 14.620139 +epoch: 1, batch: 11041, sum loss: 5731.050293, avg loss: 2.668087, ppl: 14.412369 +epoch: 1, batch: 11042, sum loss: 5041.926270, avg loss: 2.689027, ppl: 14.717354 +epoch: 1, batch: 11043, sum loss: 3744.659912, avg loss: 2.559576, ppl: 12.930337 +epoch: 1, batch: 11044, sum loss: 4430.722168, avg loss: 2.546392, ppl: 12.760979 +epoch: 1, batch: 11045, sum loss: 4982.753906, avg loss: 2.896950, ppl: 18.118799 +epoch: 1, batch: 11046, sum loss: 3782.959717, avg loss: 2.545733, ppl: 12.752575 +epoch: 1, batch: 11047, sum loss: 5429.365723, avg loss: 2.871161, ppl: 17.657511 +epoch: 1, batch: 11048, sum loss: 5215.653320, avg loss: 2.813189, ppl: 16.662975 +epoch: 1, batch: 11049, sum loss: 4731.381836, avg loss: 2.892043, ppl: 18.030106 +epoch: 1, batch: 11050, sum loss: 4808.833008, avg loss: 2.799088, ppl: 16.429657 +epoch: 1, batch: 11051, sum loss: 4753.733887, avg loss: 2.732031, ppl: 15.364058 +epoch: 1, batch: 11052, sum loss: 3538.592773, avg loss: 2.314318, ppl: 10.118024 +epoch: 1, batch: 11053, sum loss: 4352.959473, avg loss: 2.556054, ppl: 12.884869 +epoch: 1, batch: 11054, sum loss: 4377.628906, avg loss: 2.724100, ppl: 15.242691 +epoch: 1, batch: 11055, sum loss: 3880.358398, avg loss: 2.544497, ppl: 12.736823 +epoch: 1, batch: 11056, sum loss: 5213.746094, avg loss: 2.849042, ppl: 17.271223 +epoch: 1, batch: 11057, sum loss: 5309.820801, avg loss: 2.941729, ppl: 18.948582 +epoch: 1, batch: 11058, sum loss: 3709.143799, avg loss: 2.550993, ppl: 12.819827 +epoch: 1, batch: 11059, sum loss: 4517.485352, avg loss: 2.572600, ppl: 13.099838 +epoch: 1, batch: 11060, sum loss: 5321.582520, avg loss: 3.205773, ppl: 24.674557 +epoch: 1, batch: 11061, sum loss: 4537.016113, avg loss: 2.720034, ppl: 15.180833 +epoch: 1, batch: 11062, sum loss: 5210.470215, avg loss: 2.912504, ppl: 18.402824 +epoch: 1, batch: 11063, sum loss: 5159.724609, avg loss: 2.911808, ppl: 18.390026 +epoch: 1, batch: 11064, sum loss: 5106.439453, avg loss: 3.034129, ppl: 20.782871 +epoch: 1, batch: 11065, sum loss: 4787.403809, avg loss: 2.969853, ppl: 19.489063 +epoch: 1, batch: 11066, sum loss: 4064.073242, avg loss: 2.611872, ppl: 13.624535 +epoch: 1, batch: 11067, sum loss: 4706.364258, avg loss: 2.809770, ppl: 16.606096 +epoch: 1, batch: 11068, sum loss: 5019.827148, avg loss: 2.801243, ppl: 16.465101 +epoch: 1, batch: 11069, sum loss: 4128.218262, avg loss: 2.573702, ppl: 13.114288 +epoch: 1, batch: 11070, sum loss: 4769.339844, avg loss: 3.158503, ppl: 23.535343 +epoch: 1, batch: 11071, sum loss: 5175.909180, avg loss: 2.898046, ppl: 18.138659 +epoch: 1, batch: 11072, sum loss: 5114.810547, avg loss: 2.851065, ppl: 17.306206 +epoch: 1, batch: 11073, sum loss: 5375.204102, avg loss: 2.943704, ppl: 18.986048 +epoch: 1, batch: 11074, sum loss: 4540.551758, avg loss: 2.669343, ppl: 14.430478 +epoch: 1, batch: 11075, sum loss: 4430.189941, avg loss: 2.605994, ppl: 13.544685 +epoch: 1, batch: 11076, sum loss: 4785.413086, avg loss: 2.881043, ppl: 17.832870 +epoch: 1, batch: 11077, sum loss: 5198.195801, avg loss: 2.722994, ppl: 15.225845 +epoch: 1, batch: 11078, sum loss: 4849.119629, avg loss: 2.715073, ppl: 15.105707 +epoch: 1, batch: 11079, sum loss: 4542.150391, avg loss: 2.567637, ppl: 13.034992 +epoch: 1, batch: 11080, sum loss: 4466.376953, avg loss: 2.489619, ppl: 12.056684 +epoch: 1, batch: 11081, sum loss: 4600.851074, avg loss: 2.568873, ppl: 13.051104 +epoch: 1, batch: 11082, sum loss: 4254.553711, avg loss: 2.819453, ppl: 16.767668 +epoch: 1, batch: 11083, sum loss: 3892.362305, avg loss: 2.469773, ppl: 11.819764 +epoch: 1, batch: 11084, sum loss: 4885.490234, avg loss: 2.602818, ppl: 13.501739 +epoch: 1, batch: 11085, sum loss: 4404.128418, avg loss: 2.727015, ppl: 15.287180 +epoch: 1, batch: 11086, sum loss: 3964.384766, avg loss: 2.582661, ppl: 13.232305 +epoch: 1, batch: 11087, sum loss: 4759.215332, avg loss: 2.617830, ppl: 13.705954 +epoch: 1, batch: 11088, sum loss: 5334.562988, avg loss: 2.911879, ppl: 18.391329 +epoch: 1, batch: 11089, sum loss: 4898.899414, avg loss: 2.652355, ppl: 14.187410 +epoch: 1, batch: 11090, sum loss: 4327.751953, avg loss: 2.626063, ppl: 13.819258 +epoch: 1, batch: 11091, sum loss: 4436.989258, avg loss: 2.663259, ppl: 14.342954 +epoch: 1, batch: 11092, sum loss: 4203.500000, avg loss: 2.619003, ppl: 13.722036 +epoch: 1, batch: 11093, sum loss: 4808.218262, avg loss: 2.710382, ppl: 15.035021 +epoch: 1, batch: 11094, sum loss: 3562.478271, avg loss: 2.811743, ppl: 16.638891 +epoch: 1, batch: 11095, sum loss: 4094.461426, avg loss: 2.629712, ppl: 13.869773 +epoch: 1, batch: 11096, sum loss: 4903.185547, avg loss: 3.026658, ppl: 20.628174 +epoch: 1, batch: 11097, sum loss: 3685.296387, avg loss: 2.562793, ppl: 12.971998 +epoch: 1, batch: 11098, sum loss: 4652.696777, avg loss: 2.645081, ppl: 14.084583 +epoch: 1, batch: 11099, sum loss: 4481.089355, avg loss: 2.857837, ppl: 17.423805 +epoch: 1, batch: 11100, sum loss: 4705.194336, avg loss: 2.509437, ppl: 12.298005 +epoch: 1, batch: 11101, sum loss: 5078.134277, avg loss: 3.064656, ppl: 21.427080 +epoch: 1, batch: 11102, sum loss: 4737.189941, avg loss: 2.621577, ppl: 13.757402 +epoch: 1, batch: 11103, sum loss: 4729.985352, avg loss: 2.847673, ppl: 17.247606 +epoch: 1, batch: 11104, sum loss: 5014.552246, avg loss: 2.900262, ppl: 18.178902 +epoch: 1, batch: 11105, sum loss: 3576.405273, avg loss: 2.556401, ppl: 12.889348 +epoch: 1, batch: 11106, sum loss: 5568.034180, avg loss: 3.107162, ppl: 22.357504 +epoch: 1, batch: 11107, sum loss: 4481.460449, avg loss: 2.675499, ppl: 14.519593 +epoch: 1, batch: 11108, sum loss: 5194.505859, avg loss: 2.910087, ppl: 18.358402 +epoch: 1, batch: 11109, sum loss: 4898.827148, avg loss: 2.858125, ppl: 17.428825 +epoch: 1, batch: 11110, sum loss: 4653.072266, avg loss: 2.702132, ppl: 14.911496 +epoch: 1, batch: 11111, sum loss: 4990.015625, avg loss: 2.894441, ppl: 18.073393 +epoch: 1, batch: 11112, sum loss: 4862.469238, avg loss: 2.856915, ppl: 17.407742 +epoch: 1, batch: 11113, sum loss: 4871.277344, avg loss: 2.904757, ppl: 18.260801 +epoch: 1, batch: 11114, sum loss: 5252.520020, avg loss: 2.889175, ppl: 17.978470 +epoch: 1, batch: 11115, sum loss: 5657.024902, avg loss: 2.944834, ppl: 19.007498 +epoch: 1, batch: 11116, sum loss: 4343.460449, avg loss: 2.782486, ppl: 16.159142 +epoch: 1, batch: 11117, sum loss: 5355.392578, avg loss: 2.907379, ppl: 18.308756 +epoch: 1, batch: 11118, sum loss: 5193.951660, avg loss: 2.647274, ppl: 14.115507 +epoch: 1, batch: 11119, sum loss: 4227.974121, avg loss: 2.757974, ppl: 15.767867 +epoch: 1, batch: 11120, sum loss: 4966.664062, avg loss: 2.813974, ppl: 16.676056 +epoch: 1, batch: 11121, sum loss: 4352.807129, avg loss: 2.695237, ppl: 14.809024 +epoch: 1, batch: 11122, sum loss: 4381.001465, avg loss: 2.613963, ppl: 13.653049 +epoch: 1, batch: 11123, sum loss: 3806.842529, avg loss: 2.334054, ppl: 10.319695 +epoch: 1, batch: 11124, sum loss: 4673.000000, avg loss: 2.872157, ppl: 17.675108 +epoch: 1, batch: 11125, sum loss: 3423.647949, avg loss: 2.405937, ppl: 11.088813 +epoch: 1, batch: 11126, sum loss: 4062.019531, avg loss: 2.715254, ppl: 15.108444 +epoch: 1, batch: 11127, sum loss: 5267.885254, avg loss: 2.765294, ppl: 15.883710 +epoch: 1, batch: 11128, sum loss: 4706.252930, avg loss: 2.828277, ppl: 16.916290 +epoch: 1, batch: 11129, sum loss: 5503.006836, avg loss: 3.042016, ppl: 20.947435 +epoch: 1, batch: 11130, sum loss: 5200.799316, avg loss: 2.764912, ppl: 15.877641 +epoch: 1, batch: 11131, sum loss: 5113.291992, avg loss: 2.990229, ppl: 19.890240 +epoch: 1, batch: 11132, sum loss: 4180.341797, avg loss: 2.466278, ppl: 11.778529 +epoch: 1, batch: 11133, sum loss: 4467.319824, avg loss: 2.686302, ppl: 14.677295 +epoch: 1, batch: 11134, sum loss: 4790.619629, avg loss: 2.715771, ppl: 15.116256 +epoch: 1, batch: 11135, sum loss: 4446.576660, avg loss: 2.810731, ppl: 16.622063 +epoch: 1, batch: 11136, sum loss: 4724.185059, avg loss: 2.567492, ppl: 13.033093 +epoch: 1, batch: 11137, sum loss: 4176.068848, avg loss: 2.817860, ppl: 16.740993 +epoch: 1, batch: 11138, sum loss: 4250.102539, avg loss: 2.796120, ppl: 16.380968 +epoch: 1, batch: 11139, sum loss: 4771.549316, avg loss: 2.747006, ppl: 15.595867 +epoch: 1, batch: 11140, sum loss: 4680.231934, avg loss: 2.777586, ppl: 16.080153 +epoch: 1, batch: 11141, sum loss: 4623.113281, avg loss: 2.790050, ppl: 16.281837 +epoch: 1, batch: 11142, sum loss: 4525.786133, avg loss: 2.771455, ppl: 15.981872 +epoch: 1, batch: 11143, sum loss: 4187.924805, avg loss: 2.635573, ppl: 13.951300 +epoch: 1, batch: 11144, sum loss: 4329.260742, avg loss: 2.750483, ppl: 15.650193 +epoch: 1, batch: 11145, sum loss: 4849.898926, avg loss: 2.829579, ppl: 16.938334 +epoch: 1, batch: 11146, sum loss: 5155.787598, avg loss: 2.754160, ppl: 15.707844 +epoch: 1, batch: 11147, sum loss: 5215.847168, avg loss: 2.840875, ppl: 17.130754 +epoch: 1, batch: 11148, sum loss: 4301.082031, avg loss: 2.769531, ppl: 15.951159 +epoch: 1, batch: 11149, sum loss: 5030.302734, avg loss: 2.911055, ppl: 18.376173 +epoch: 1, batch: 11150, sum loss: 4755.507324, avg loss: 2.883873, ppl: 17.883410 +epoch: 1, batch: 11151, sum loss: 5176.124023, avg loss: 2.825395, ppl: 16.867613 +epoch: 1, batch: 11152, sum loss: 4663.979980, avg loss: 2.725880, ppl: 15.269840 +epoch: 1, batch: 11153, sum loss: 3378.779053, avg loss: 2.362783, ppl: 10.620464 +epoch: 1, batch: 11154, sum loss: 5213.248535, avg loss: 2.977298, ppl: 19.634693 +epoch: 1, batch: 11155, sum loss: 4735.324707, avg loss: 2.635128, ppl: 13.945098 +epoch: 1, batch: 11156, sum loss: 4945.917969, avg loss: 2.878881, ppl: 17.794353 +epoch: 1, batch: 11157, sum loss: 4191.310547, avg loss: 2.707565, ppl: 14.992724 +epoch: 1, batch: 11158, sum loss: 4574.865723, avg loss: 2.741082, ppl: 15.503751 +epoch: 1, batch: 11159, sum loss: 4318.100586, avg loss: 2.649142, ppl: 14.141893 +epoch: 1, batch: 11160, sum loss: 6121.440918, avg loss: 3.022934, ppl: 20.551497 +epoch: 1, batch: 11161, sum loss: 5049.166016, avg loss: 2.825499, ppl: 16.869354 +epoch: 1, batch: 11162, sum loss: 5108.473145, avg loss: 2.852302, ppl: 17.327625 +epoch: 1, batch: 11163, sum loss: 4764.082520, avg loss: 2.688534, ppl: 14.710099 +epoch: 1, batch: 11164, sum loss: 4609.916016, avg loss: 2.661614, ppl: 14.319388 +epoch: 1, batch: 11165, sum loss: 4295.836426, avg loss: 2.698390, ppl: 14.855791 +epoch: 1, batch: 11166, sum loss: 3934.747559, avg loss: 2.631938, ppl: 13.900683 +epoch: 1, batch: 11167, sum loss: 3828.593262, avg loss: 2.499082, ppl: 12.171313 +epoch: 1, batch: 11168, sum loss: 5202.440430, avg loss: 2.610357, ppl: 13.603901 +epoch: 1, batch: 11169, sum loss: 4804.382812, avg loss: 2.657291, ppl: 14.257619 +epoch: 1, batch: 11170, sum loss: 4207.219238, avg loss: 2.522314, ppl: 12.457388 +epoch: 1, batch: 11171, sum loss: 5206.361328, avg loss: 2.951452, ppl: 19.133717 +epoch: 1, batch: 11172, sum loss: 4640.067383, avg loss: 2.685224, ppl: 14.661489 +epoch: 1, batch: 11173, sum loss: 4772.388672, avg loss: 2.663163, ppl: 14.341586 +epoch: 1, batch: 11174, sum loss: 4877.630859, avg loss: 2.915500, ppl: 18.458033 +epoch: 1, batch: 11175, sum loss: 4757.644531, avg loss: 2.840385, ppl: 17.122356 +epoch: 1, batch: 11176, sum loss: 4670.343262, avg loss: 2.815156, ppl: 16.695776 +epoch: 1, batch: 11177, sum loss: 4568.622070, avg loss: 2.645410, ppl: 14.089214 +epoch: 1, batch: 11178, sum loss: 4941.652344, avg loss: 2.884794, ppl: 17.899879 +epoch: 1, batch: 11179, sum loss: 4686.865234, avg loss: 2.681273, ppl: 14.603675 +epoch: 1, batch: 11180, sum loss: 4896.485840, avg loss: 2.856760, ppl: 17.405035 +epoch: 1, batch: 11181, sum loss: 4074.266357, avg loss: 2.530600, ppl: 12.561042 +epoch: 1, batch: 11182, sum loss: 4433.297363, avg loss: 2.623253, ppl: 13.780477 +epoch: 1, batch: 11183, sum loss: 5784.328125, avg loss: 2.911086, ppl: 18.376747 +epoch: 1, batch: 11184, sum loss: 4313.835938, avg loss: 2.631993, ppl: 13.901442 +epoch: 1, batch: 11185, sum loss: 4838.360840, avg loss: 2.831106, ppl: 16.964220 +epoch: 1, batch: 11186, sum loss: 5015.762695, avg loss: 2.742353, ppl: 15.523464 +epoch: 1, batch: 11187, sum loss: 4719.612305, avg loss: 2.772980, ppl: 16.006266 +epoch: 1, batch: 11188, sum loss: 5315.067383, avg loss: 3.065206, ppl: 21.438879 +epoch: 1, batch: 11189, sum loss: 5909.595215, avg loss: 2.810079, ppl: 16.611223 +epoch: 1, batch: 11190, sum loss: 5831.614746, avg loss: 3.227235, ppl: 25.209864 +epoch: 1, batch: 11191, sum loss: 4507.320312, avg loss: 2.790910, ppl: 16.295851 +epoch: 1, batch: 11192, sum loss: 4397.477539, avg loss: 2.536031, ppl: 12.629442 +epoch: 1, batch: 11193, sum loss: 6051.249023, avg loss: 3.062373, ppl: 21.378231 +epoch: 1, batch: 11194, sum loss: 4953.934082, avg loss: 2.821147, ppl: 16.796108 +epoch: 1, batch: 11195, sum loss: 5743.999023, avg loss: 2.844972, ppl: 17.201078 +epoch: 1, batch: 11196, sum loss: 4350.048340, avg loss: 2.644406, ppl: 14.075083 +epoch: 1, batch: 11197, sum loss: 4804.204102, avg loss: 2.610980, ppl: 13.612389 +epoch: 1, batch: 11198, sum loss: 4764.077637, avg loss: 2.747450, ppl: 15.602788 +epoch: 1, batch: 11199, sum loss: 4365.058594, avg loss: 2.704497, ppl: 14.946801 +epoch: 1, batch: 11200, sum loss: 4714.072754, avg loss: 2.909922, ppl: 18.355360 +epoch: 1, batch: 11201, sum loss: 4611.873535, avg loss: 2.741899, ppl: 15.516416 +epoch: 1, batch: 11202, sum loss: 4389.394043, avg loss: 2.692880, ppl: 14.774159 +epoch: 1, batch: 11203, sum loss: 4703.300781, avg loss: 2.773173, ppl: 16.009344 +epoch: 1, batch: 11204, sum loss: 4571.573242, avg loss: 2.698685, ppl: 14.860184 +epoch: 1, batch: 11205, sum loss: 4741.208496, avg loss: 2.731111, ppl: 15.349929 +epoch: 1, batch: 11206, sum loss: 5370.695312, avg loss: 2.904649, ppl: 18.258825 +epoch: 1, batch: 11207, sum loss: 4845.052246, avg loss: 2.897759, ppl: 18.133457 +epoch: 1, batch: 11208, sum loss: 4898.280762, avg loss: 2.857807, ppl: 17.423271 +epoch: 1, batch: 11209, sum loss: 5093.879883, avg loss: 2.812744, ppl: 16.655565 +epoch: 1, batch: 11210, sum loss: 4456.903320, avg loss: 2.706074, ppl: 14.970379 +epoch: 1, batch: 11211, sum loss: 5571.942383, avg loss: 3.000507, ppl: 20.095730 +epoch: 1, batch: 11212, sum loss: 5357.884766, avg loss: 2.993232, ppl: 19.950048 +epoch: 1, batch: 11213, sum loss: 4752.448730, avg loss: 2.864647, ppl: 17.542854 +epoch: 1, batch: 11214, sum loss: 4226.269043, avg loss: 2.678244, ppl: 14.559503 +epoch: 1, batch: 11215, sum loss: 4454.084961, avg loss: 2.886640, ppl: 17.932951 +epoch: 1, batch: 11216, sum loss: 4365.906250, avg loss: 2.563656, ppl: 12.983198 +epoch: 1, batch: 11217, sum loss: 5765.756348, avg loss: 3.103206, ppl: 22.269224 +epoch: 1, batch: 11218, sum loss: 4790.543457, avg loss: 2.622082, ppl: 13.764348 +epoch: 1, batch: 11219, sum loss: 3921.504639, avg loss: 2.469461, ppl: 11.816081 +epoch: 1, batch: 11220, sum loss: 5172.638184, avg loss: 2.974490, ppl: 19.579638 +epoch: 1, batch: 11221, sum loss: 4080.516357, avg loss: 2.673995, ppl: 14.497772 +epoch: 1, batch: 11222, sum loss: 5107.257324, avg loss: 2.729694, ppl: 15.328197 +epoch: 1, batch: 11223, sum loss: 4873.583984, avg loss: 2.739507, ppl: 15.479359 +epoch: 1, batch: 11224, sum loss: 4693.231445, avg loss: 2.791928, ppl: 16.312445 +epoch: 1, batch: 11225, sum loss: 4705.382324, avg loss: 2.815908, ppl: 16.708344 +epoch: 1, batch: 11226, sum loss: 5689.430176, avg loss: 2.876355, ppl: 17.749453 +epoch: 1, batch: 11227, sum loss: 4032.106201, avg loss: 2.561694, ppl: 12.957748 +epoch: 1, batch: 11228, sum loss: 6482.850098, avg loss: 3.241425, ppl: 25.570141 +epoch: 1, batch: 11229, sum loss: 4781.409180, avg loss: 2.751098, ppl: 15.659823 +epoch: 1, batch: 11230, sum loss: 4630.562500, avg loss: 2.722259, ppl: 15.214650 +epoch: 1, batch: 11231, sum loss: 3996.187988, avg loss: 2.658808, ppl: 14.279251 +epoch: 1, batch: 11232, sum loss: 5277.532227, avg loss: 2.606189, ppl: 13.547320 +epoch: 1, batch: 11233, sum loss: 4364.701172, avg loss: 2.776527, ppl: 16.063143 +epoch: 1, batch: 11234, sum loss: 5279.021973, avg loss: 2.855069, ppl: 17.375631 +epoch: 1, batch: 11235, sum loss: 4206.482422, avg loss: 2.594992, ppl: 13.396482 +epoch: 1, batch: 11236, sum loss: 5925.864746, avg loss: 3.086388, ppl: 21.897842 +epoch: 1, batch: 11237, sum loss: 5196.047852, avg loss: 2.858112, ppl: 17.428593 +epoch: 1, batch: 11238, sum loss: 4980.550293, avg loss: 2.828251, ppl: 16.915852 +epoch: 1, batch: 11239, sum loss: 4842.104492, avg loss: 2.667826, ppl: 14.408613 +epoch: 1, batch: 11240, sum loss: 4192.091309, avg loss: 2.417584, ppl: 11.218724 +epoch: 1, batch: 11241, sum loss: 5514.153320, avg loss: 3.083978, ppl: 21.845139 +epoch: 1, batch: 11242, sum loss: 5130.810547, avg loss: 2.858390, ppl: 17.433443 +epoch: 1, batch: 11243, sum loss: 4695.986328, avg loss: 2.671209, ppl: 14.457442 +epoch: 1, batch: 11244, sum loss: 4766.555664, avg loss: 2.606099, ppl: 13.546109 +epoch: 1, batch: 11245, sum loss: 4780.853516, avg loss: 2.752362, ppl: 15.679631 +epoch: 1, batch: 11246, sum loss: 5043.799805, avg loss: 2.766758, ppl: 15.906983 +epoch: 1, batch: 11247, sum loss: 4092.828369, avg loss: 2.824588, ppl: 16.854004 +epoch: 1, batch: 11248, sum loss: 5316.384766, avg loss: 2.929138, ppl: 18.711489 +epoch: 1, batch: 11249, sum loss: 4252.882812, avg loss: 2.445591, ppl: 11.537369 +epoch: 1, batch: 11250, sum loss: 4507.002930, avg loss: 2.620351, ppl: 13.740541 +epoch: 1, batch: 11251, sum loss: 3789.380859, avg loss: 2.512852, ppl: 12.340074 +epoch: 1, batch: 11252, sum loss: 4997.068359, avg loss: 2.672229, ppl: 14.472193 +epoch: 1, batch: 11253, sum loss: 4840.385742, avg loss: 2.722377, ppl: 15.216446 +epoch: 1, batch: 11254, sum loss: 4605.862793, avg loss: 2.781318, ppl: 16.140284 +epoch: 1, batch: 11255, sum loss: 4866.592773, avg loss: 2.884762, ppl: 17.899300 +epoch: 1, batch: 11256, sum loss: 4186.718750, avg loss: 2.687239, ppl: 14.691064 +epoch: 1, batch: 11257, sum loss: 4917.262695, avg loss: 2.882334, ppl: 17.855909 +epoch: 1, batch: 11258, sum loss: 4473.541016, avg loss: 2.734438, ppl: 15.401092 +epoch: 1, batch: 11259, sum loss: 3983.480957, avg loss: 2.571647, ppl: 13.087360 +epoch: 1, batch: 11260, sum loss: 5043.340820, avg loss: 2.855799, ppl: 17.388321 +epoch: 1, batch: 11261, sum loss: 5480.414062, avg loss: 2.993126, ppl: 19.947950 +epoch: 1, batch: 11262, sum loss: 4770.411133, avg loss: 2.668015, ppl: 14.411338 +epoch: 1, batch: 11263, sum loss: 4515.794434, avg loss: 2.534116, ppl: 12.605283 +epoch: 1, batch: 11264, sum loss: 3601.601074, avg loss: 2.367917, ppl: 10.675128 +epoch: 1, batch: 11265, sum loss: 4968.141602, avg loss: 2.692760, ppl: 14.772385 +epoch: 1, batch: 11266, sum loss: 5112.771484, avg loss: 2.799984, ppl: 16.444391 +epoch: 1, batch: 11267, sum loss: 4437.590332, avg loss: 2.707499, ppl: 14.991728 +epoch: 1, batch: 11268, sum loss: 4884.372070, avg loss: 2.742489, ppl: 15.525575 +epoch: 1, batch: 11269, sum loss: 4128.123535, avg loss: 2.649630, ppl: 14.148804 +epoch: 1, batch: 11270, sum loss: 4657.124512, avg loss: 3.067935, ppl: 21.497454 +epoch: 1, batch: 11271, sum loss: 5069.364746, avg loss: 2.752098, ppl: 15.675489 +epoch: 1, batch: 11272, sum loss: 4073.353760, avg loss: 2.434760, ppl: 11.413083 +epoch: 1, batch: 11273, sum loss: 5387.116211, avg loss: 3.094265, ppl: 22.071020 +epoch: 1, batch: 11274, sum loss: 4527.377930, avg loss: 2.735576, ppl: 15.418621 +epoch: 1, batch: 11275, sum loss: 4272.812500, avg loss: 2.566254, ppl: 13.016970 +epoch: 1, batch: 11276, sum loss: 4715.111328, avg loss: 2.948787, ppl: 19.082800 +epoch: 1, batch: 11277, sum loss: 4527.169434, avg loss: 2.701175, ppl: 14.897229 +epoch: 1, batch: 11278, sum loss: 4337.022461, avg loss: 2.796275, ppl: 16.383503 +epoch: 1, batch: 11279, sum loss: 4322.451660, avg loss: 2.630829, ppl: 13.885271 +epoch: 1, batch: 11280, sum loss: 4114.276855, avg loss: 2.533422, ppl: 12.596535 +epoch: 1, batch: 11281, sum loss: 4421.193359, avg loss: 2.937670, ppl: 18.871828 +epoch: 1, batch: 11282, sum loss: 5429.679199, avg loss: 2.859231, ppl: 17.448099 +epoch: 1, batch: 11283, sum loss: 4779.601562, avg loss: 2.613232, ppl: 13.643079 +epoch: 1, batch: 11284, sum loss: 4880.994629, avg loss: 3.043014, ppl: 20.968353 +epoch: 1, batch: 11285, sum loss: 4980.189453, avg loss: 2.919220, ppl: 18.526831 +epoch: 1, batch: 11286, sum loss: 5627.600586, avg loss: 3.053500, ppl: 21.189377 +epoch: 1, batch: 11287, sum loss: 5342.598145, avg loss: 2.716115, ppl: 15.121457 +epoch: 1, batch: 11288, sum loss: 4114.776855, avg loss: 2.615878, ppl: 13.679226 +epoch: 1, batch: 11289, sum loss: 5416.183105, avg loss: 3.098503, ppl: 22.164749 +epoch: 1, batch: 11290, sum loss: 4589.575195, avg loss: 2.637687, ppl: 13.980828 +epoch: 1, batch: 11291, sum loss: 4367.794922, avg loss: 2.689529, ppl: 14.724738 +epoch: 1, batch: 11292, sum loss: 5418.626465, avg loss: 2.859433, ppl: 17.451635 +epoch: 1, batch: 11293, sum loss: 4319.441406, avg loss: 2.694598, ppl: 14.799575 +epoch: 1, batch: 11294, sum loss: 4732.726562, avg loss: 2.513397, ppl: 12.346801 +epoch: 1, batch: 11295, sum loss: 4835.775879, avg loss: 2.961283, ppl: 19.322756 +epoch: 1, batch: 11296, sum loss: 4553.263184, avg loss: 2.726505, ppl: 15.279393 +epoch: 1, batch: 11297, sum loss: 4351.485352, avg loss: 2.570281, ppl: 13.069493 +epoch: 1, batch: 11298, sum loss: 3843.821289, avg loss: 2.333832, ppl: 10.317400 +epoch: 1, batch: 11299, sum loss: 3630.413818, avg loss: 2.474720, ppl: 11.878375 +epoch: 1, batch: 11300, sum loss: 5248.652832, avg loss: 2.620396, ppl: 13.741162 +epoch: 1, batch: 11301, sum loss: 5091.463867, avg loss: 2.716896, ppl: 15.133280 +epoch: 1, batch: 11302, sum loss: 4712.488770, avg loss: 2.657918, ppl: 14.266559 +epoch: 1, batch: 11303, sum loss: 4580.479004, avg loss: 2.650740, ppl: 14.164518 +epoch: 1, batch: 11304, sum loss: 4386.021973, avg loss: 2.677669, ppl: 14.551136 +epoch: 1, batch: 11305, sum loss: 5229.374023, avg loss: 2.906823, ppl: 18.298565 +epoch: 1, batch: 11306, sum loss: 4596.555664, avg loss: 2.855003, ppl: 17.374496 +epoch: 1, batch: 11307, sum loss: 5115.815918, avg loss: 2.734268, ppl: 15.398474 +epoch: 1, batch: 11308, sum loss: 4252.238281, avg loss: 2.611940, ppl: 13.625458 +epoch: 1, batch: 11309, sum loss: 5629.078613, avg loss: 2.924196, ppl: 18.619259 +epoch: 1, batch: 11310, sum loss: 4169.202637, avg loss: 2.495035, ppl: 12.122154 +epoch: 1, batch: 11311, sum loss: 3264.189697, avg loss: 2.298725, ppl: 9.961477 +epoch: 1, batch: 11312, sum loss: 5181.825684, avg loss: 2.799474, ppl: 16.435995 +epoch: 1, batch: 11313, sum loss: 3943.353516, avg loss: 2.425187, ppl: 11.304339 +epoch: 1, batch: 11314, sum loss: 4782.344238, avg loss: 2.886146, ppl: 17.924099 +epoch: 1, batch: 11315, sum loss: 3970.557617, avg loss: 2.776614, ppl: 16.064529 +epoch: 1, batch: 11316, sum loss: 4103.751953, avg loss: 2.577734, ppl: 13.167261 +epoch: 1, batch: 11317, sum loss: 4616.979492, avg loss: 2.774627, ppl: 16.032650 +epoch: 1, batch: 11318, sum loss: 4638.302246, avg loss: 2.854340, ppl: 17.362970 +epoch: 1, batch: 11319, sum loss: 4287.750000, avg loss: 2.608120, ppl: 13.573515 +epoch: 1, batch: 11320, sum loss: 4785.331055, avg loss: 2.714311, ppl: 15.094212 +epoch: 1, batch: 11321, sum loss: 4044.312012, avg loss: 2.687251, ppl: 14.691229 +epoch: 1, batch: 11322, sum loss: 4625.209961, avg loss: 2.642977, ppl: 14.054983 +epoch: 1, batch: 11323, sum loss: 4392.161133, avg loss: 2.670007, ppl: 14.440066 +epoch: 1, batch: 11324, sum loss: 4521.220703, avg loss: 2.684810, ppl: 14.655422 +epoch: 1, batch: 11325, sum loss: 4765.438965, avg loss: 2.738758, ppl: 15.467764 +epoch: 1, batch: 11326, sum loss: 4764.055176, avg loss: 2.847612, ppl: 17.246555 +epoch: 1, batch: 11327, sum loss: 4921.134766, avg loss: 2.788178, ppl: 16.251390 +epoch: 1, batch: 11328, sum loss: 5339.987793, avg loss: 2.942142, ppl: 18.956411 +epoch: 1, batch: 11329, sum loss: 5575.480469, avg loss: 3.134053, ppl: 22.966875 +epoch: 1, batch: 11330, sum loss: 4729.501465, avg loss: 2.718104, ppl: 15.151573 +epoch: 1, batch: 11331, sum loss: 4631.288574, avg loss: 2.995659, ppl: 19.998531 +epoch: 1, batch: 11332, sum loss: 4970.430664, avg loss: 2.683818, ppl: 14.640883 +epoch: 1, batch: 11333, sum loss: 5006.416504, avg loss: 2.740239, ppl: 15.490685 +epoch: 1, batch: 11334, sum loss: 4149.425293, avg loss: 2.612988, ppl: 13.639752 +epoch: 1, batch: 11335, sum loss: 5373.415527, avg loss: 2.849107, ppl: 17.272350 +epoch: 1, batch: 11336, sum loss: 4279.209961, avg loss: 2.550185, ppl: 12.809470 +epoch: 1, batch: 11337, sum loss: 5597.293945, avg loss: 2.818376, ppl: 16.749620 +epoch: 1, batch: 11338, sum loss: 6101.418457, avg loss: 3.283864, ppl: 26.678648 +epoch: 1, batch: 11339, sum loss: 4786.563477, avg loss: 2.651836, ppl: 14.180045 +epoch: 1, batch: 11340, sum loss: 5219.266113, avg loss: 2.979033, ppl: 19.668793 +epoch: 1, batch: 11341, sum loss: 4294.484863, avg loss: 2.609043, ppl: 13.586044 +epoch: 1, batch: 11342, sum loss: 3654.025391, avg loss: 2.134361, ppl: 8.451640 +epoch: 1, batch: 11343, sum loss: 4605.612793, avg loss: 2.723603, ppl: 15.235120 +epoch: 1, batch: 11344, sum loss: 4750.335938, avg loss: 2.854769, ppl: 17.370428 +epoch: 1, batch: 11345, sum loss: 5244.843262, avg loss: 2.756092, ppl: 15.738218 +epoch: 1, batch: 11346, sum loss: 4013.861816, avg loss: 2.554973, ppl: 12.870948 +epoch: 1, batch: 11347, sum loss: 3729.500488, avg loss: 2.779062, ppl: 16.103901 +epoch: 1, batch: 11348, sum loss: 4403.220215, avg loss: 2.671857, ppl: 14.466807 +epoch: 1, batch: 11349, sum loss: 4968.598145, avg loss: 2.769564, ppl: 15.951680 +epoch: 1, batch: 11350, sum loss: 5204.721680, avg loss: 2.756738, ppl: 15.748391 +epoch: 1, batch: 11351, sum loss: 5547.331543, avg loss: 2.918112, ppl: 18.506319 +epoch: 1, batch: 11352, sum loss: 4639.911621, avg loss: 2.755292, ppl: 15.725631 +epoch: 1, batch: 11353, sum loss: 4318.656250, avg loss: 2.724704, ppl: 15.251903 +epoch: 1, batch: 11354, sum loss: 4749.995117, avg loss: 2.753621, ppl: 15.699370 +epoch: 1, batch: 11355, sum loss: 4108.216797, avg loss: 2.418020, ppl: 11.223620 +epoch: 1, batch: 11356, sum loss: 3745.481201, avg loss: 2.490346, ppl: 12.065455 +epoch: 1, batch: 11357, sum loss: 4036.383545, avg loss: 2.510189, ppl: 12.307254 +epoch: 1, batch: 11358, sum loss: 4345.197754, avg loss: 2.507327, ppl: 12.272081 +epoch: 1, batch: 11359, sum loss: 4373.524414, avg loss: 2.676576, ppl: 14.535234 +epoch: 1, batch: 11360, sum loss: 4751.134766, avg loss: 2.851821, ppl: 17.319300 +epoch: 1, batch: 11361, sum loss: 4945.166992, avg loss: 2.686131, ppl: 14.674789 +epoch: 1, batch: 11362, sum loss: 4411.972168, avg loss: 2.983078, ppl: 19.748505 +epoch: 1, batch: 11363, sum loss: 5781.988281, avg loss: 2.839876, ppl: 17.113651 +epoch: 1, batch: 11364, sum loss: 4990.158691, avg loss: 2.685769, ppl: 14.669476 +epoch: 1, batch: 11365, sum loss: 4361.335938, avg loss: 2.876871, ppl: 17.758617 +epoch: 1, batch: 11366, sum loss: 4767.002930, avg loss: 2.760280, ppl: 15.804262 +epoch: 1, batch: 11367, sum loss: 5806.588867, avg loss: 3.135307, ppl: 22.995697 +epoch: 1, batch: 11368, sum loss: 4952.049316, avg loss: 2.854207, ppl: 17.360666 +epoch: 1, batch: 11369, sum loss: 4322.761719, avg loss: 2.507403, ppl: 12.273011 +epoch: 1, batch: 11370, sum loss: 4954.491699, avg loss: 2.766327, ppl: 15.900124 +epoch: 1, batch: 11371, sum loss: 5303.395020, avg loss: 2.814966, ppl: 16.692604 +epoch: 1, batch: 11372, sum loss: 5371.725098, avg loss: 3.050383, ppl: 21.123440 +epoch: 1, batch: 11373, sum loss: 4595.887695, avg loss: 2.564670, ppl: 12.996364 +epoch: 1, batch: 11374, sum loss: 4577.084961, avg loss: 2.907932, ppl: 18.318872 +epoch: 1, batch: 11375, sum loss: 5247.372070, avg loss: 2.880007, ppl: 17.814390 +epoch: 1, batch: 11376, sum loss: 4275.459961, avg loss: 2.823950, ppl: 16.843246 +epoch: 1, batch: 11377, sum loss: 4778.751465, avg loss: 2.839424, ppl: 17.105917 +epoch: 1, batch: 11378, sum loss: 4232.394531, avg loss: 2.627185, ppl: 13.834768 +epoch: 1, batch: 11379, sum loss: 4105.135254, avg loss: 2.650184, ppl: 14.156646 +epoch: 1, batch: 11380, sum loss: 3687.347900, avg loss: 2.670056, ppl: 14.440783 +epoch: 1, batch: 11381, sum loss: 4610.085449, avg loss: 2.601628, ppl: 13.485679 +epoch: 1, batch: 11382, sum loss: 3958.756836, avg loss: 2.664036, ppl: 14.354099 +epoch: 1, batch: 11383, sum loss: 4982.539062, avg loss: 2.856960, ppl: 17.408529 +epoch: 1, batch: 11384, sum loss: 4775.895020, avg loss: 2.851281, ppl: 17.309937 +epoch: 1, batch: 11385, sum loss: 5322.804688, avg loss: 2.766531, ppl: 15.903361 +epoch: 1, batch: 11386, sum loss: 4070.631836, avg loss: 2.713754, ppl: 15.085808 +epoch: 1, batch: 11387, sum loss: 4717.681152, avg loss: 2.708198, ppl: 15.002218 +epoch: 1, batch: 11388, sum loss: 5132.976562, avg loss: 2.734671, ppl: 15.404669 +epoch: 1, batch: 11389, sum loss: 5891.314453, avg loss: 2.960459, ppl: 19.306841 +epoch: 1, batch: 11390, sum loss: 5766.374023, avg loss: 2.943529, ppl: 18.982725 +epoch: 1, batch: 11391, sum loss: 3086.009033, avg loss: 2.315086, ppl: 10.125791 +epoch: 1, batch: 11392, sum loss: 4871.336426, avg loss: 2.759964, ppl: 15.799274 +epoch: 1, batch: 11393, sum loss: 5135.118164, avg loss: 2.754892, ppl: 15.719337 +epoch: 1, batch: 11394, sum loss: 4849.945312, avg loss: 2.883440, ppl: 17.875652 +epoch: 1, batch: 11395, sum loss: 4153.719727, avg loss: 2.776551, ppl: 16.063519 +epoch: 1, batch: 11396, sum loss: 4806.022461, avg loss: 2.865845, ppl: 17.563892 +epoch: 1, batch: 11397, sum loss: 5442.669434, avg loss: 2.924594, ppl: 18.626665 +epoch: 1, batch: 11398, sum loss: 4519.148438, avg loss: 2.706077, ppl: 14.970429 +epoch: 1, batch: 11399, sum loss: 4323.300293, avg loss: 2.791027, ppl: 16.297743 +epoch: 1, batch: 11400, sum loss: 3777.371338, avg loss: 2.467258, ppl: 11.790071 +epoch: 1, batch: 11401, sum loss: 4420.378906, avg loss: 2.669311, ppl: 14.430020 +epoch: 1, batch: 11402, sum loss: 4233.183594, avg loss: 2.654034, ppl: 14.211246 +epoch: 1, batch: 11403, sum loss: 5094.138184, avg loss: 2.802056, ppl: 16.478497 +epoch: 1, batch: 11404, sum loss: 4092.540771, avg loss: 2.603397, ppl: 13.509557 +epoch: 1, batch: 11405, sum loss: 5602.168457, avg loss: 2.784378, ppl: 16.189745 +epoch: 1, batch: 11406, sum loss: 4588.830566, avg loss: 2.644859, ppl: 14.081461 +epoch: 1, batch: 11407, sum loss: 4640.631836, avg loss: 2.726576, ppl: 15.280471 +epoch: 1, batch: 11408, sum loss: 5153.407227, avg loss: 2.864595, ppl: 17.541954 +epoch: 1, batch: 11409, sum loss: 4650.147461, avg loss: 2.717795, ppl: 15.146893 +epoch: 1, batch: 11410, sum loss: 3783.112549, avg loss: 2.603656, ppl: 13.513056 +epoch: 1, batch: 11411, sum loss: 4714.415039, avg loss: 2.712552, ppl: 15.067677 +epoch: 1, batch: 11412, sum loss: 5364.132812, avg loss: 2.896400, ppl: 18.108835 +epoch: 1, batch: 11413, sum loss: 4973.189453, avg loss: 2.698421, ppl: 14.856252 +epoch: 1, batch: 11414, sum loss: 4159.424805, avg loss: 2.808525, ppl: 16.585430 +epoch: 1, batch: 11415, sum loss: 4194.349121, avg loss: 2.781399, ppl: 16.141581 +epoch: 1, batch: 11416, sum loss: 4793.848145, avg loss: 2.788743, ppl: 16.260559 +epoch: 1, batch: 11417, sum loss: 4800.337891, avg loss: 2.610298, ppl: 13.603106 +epoch: 1, batch: 11418, sum loss: 3644.539062, avg loss: 2.614447, ppl: 13.659658 +epoch: 1, batch: 11419, sum loss: 4158.845703, avg loss: 2.721758, ppl: 15.207027 +epoch: 1, batch: 11420, sum loss: 4033.074219, avg loss: 2.500356, ppl: 12.186831 +epoch: 1, batch: 11421, sum loss: 4413.835449, avg loss: 2.438583, ppl: 11.456800 +epoch: 1, batch: 11422, sum loss: 5588.011719, avg loss: 3.025453, ppl: 20.603342 +epoch: 1, batch: 11423, sum loss: 5871.908203, avg loss: 2.906885, ppl: 18.299713 +epoch: 1, batch: 11424, sum loss: 4913.898438, avg loss: 2.612386, ppl: 13.631540 +epoch: 1, batch: 11425, sum loss: 4862.506348, avg loss: 2.855259, ppl: 17.378941 +epoch: 1, batch: 11426, sum loss: 4736.937500, avg loss: 2.966147, ppl: 19.416969 +epoch: 1, batch: 11427, sum loss: 4519.102539, avg loss: 2.799940, ppl: 16.443655 +epoch: 1, batch: 11428, sum loss: 5687.484375, avg loss: 2.878282, ppl: 17.783688 +epoch: 1, batch: 11429, sum loss: 3827.368896, avg loss: 2.558402, ppl: 12.915160 +epoch: 1, batch: 11430, sum loss: 4607.754883, avg loss: 2.757484, ppl: 15.760136 +epoch: 1, batch: 11431, sum loss: 4294.422852, avg loss: 2.687374, ppl: 14.693036 +epoch: 1, batch: 11432, sum loss: 4841.586426, avg loss: 2.943213, ppl: 18.976730 +epoch: 1, batch: 11433, sum loss: 4258.329102, avg loss: 2.646569, ppl: 14.105556 +epoch: 1, batch: 11434, sum loss: 3844.979004, avg loss: 2.449031, ppl: 11.577127 +epoch: 1, batch: 11435, sum loss: 5462.795898, avg loss: 2.999888, ppl: 20.083286 +epoch: 1, batch: 11436, sum loss: 4729.277344, avg loss: 2.670399, ppl: 14.445738 +epoch: 1, batch: 11437, sum loss: 4773.020020, avg loss: 2.797784, ppl: 16.408251 +epoch: 1, batch: 11438, sum loss: 4585.898926, avg loss: 2.859039, ppl: 17.444759 +epoch: 1, batch: 11439, sum loss: 4942.194336, avg loss: 2.819278, ppl: 16.764742 +epoch: 1, batch: 11440, sum loss: 4883.512695, avg loss: 3.142544, ppl: 23.162708 +epoch: 1, batch: 11441, sum loss: 5128.370117, avg loss: 2.866613, ppl: 17.577377 +epoch: 1, batch: 11442, sum loss: 4635.676270, avg loss: 2.544279, ppl: 12.734045 +epoch: 1, batch: 11443, sum loss: 4647.691895, avg loss: 2.849597, ppl: 17.280807 +epoch: 1, batch: 11444, sum loss: 4949.305664, avg loss: 2.932053, ppl: 18.766125 +epoch: 1, batch: 11445, sum loss: 4273.570801, avg loss: 2.762489, ppl: 15.839223 +epoch: 1, batch: 11446, sum loss: 4188.515625, avg loss: 2.772016, ppl: 15.990832 +epoch: 1, batch: 11447, sum loss: 5168.367676, avg loss: 3.069102, ppl: 21.522568 +epoch: 1, batch: 11448, sum loss: 4666.568359, avg loss: 2.905709, ppl: 18.278194 +epoch: 1, batch: 11449, sum loss: 5162.455078, avg loss: 3.091290, ppl: 22.005457 +epoch: 1, batch: 11450, sum loss: 5398.986328, avg loss: 2.841572, ppl: 17.142693 +epoch: 1, batch: 11451, sum loss: 4608.051270, avg loss: 2.752719, ppl: 15.685221 +epoch: 1, batch: 11452, sum loss: 4484.913086, avg loss: 2.810096, ppl: 16.611509 +epoch: 1, batch: 11453, sum loss: 3782.584961, avg loss: 2.583733, ppl: 13.246493 +epoch: 1, batch: 11454, sum loss: 5694.856934, avg loss: 3.037257, ppl: 20.847979 +epoch: 1, batch: 11455, sum loss: 5659.395020, avg loss: 2.953755, ppl: 19.177839 +epoch: 1, batch: 11456, sum loss: 4533.714844, avg loss: 2.605583, ppl: 13.539119 +epoch: 1, batch: 11457, sum loss: 4803.414551, avg loss: 2.709202, ppl: 15.017284 +epoch: 1, batch: 11458, sum loss: 3593.386719, avg loss: 2.446145, ppl: 11.543755 +epoch: 1, batch: 11459, sum loss: 4216.810059, avg loss: 2.481937, ppl: 11.964414 +epoch: 1, batch: 11460, sum loss: 4615.090820, avg loss: 2.841805, ppl: 17.146683 +epoch: 1, batch: 11461, sum loss: 4969.870605, avg loss: 2.788929, ppl: 16.263588 +epoch: 1, batch: 11462, sum loss: 4437.120117, avg loss: 2.623962, ppl: 13.790258 +epoch: 1, batch: 11463, sum loss: 5257.286621, avg loss: 2.683658, ppl: 14.638549 +epoch: 1, batch: 11464, sum loss: 3992.334961, avg loss: 2.542889, ppl: 12.716351 +epoch: 1, batch: 11465, sum loss: 4464.807617, avg loss: 2.785282, ppl: 16.204393 +epoch: 1, batch: 11466, sum loss: 5055.591309, avg loss: 2.622195, ppl: 13.765903 +epoch: 1, batch: 11467, sum loss: 4996.088867, avg loss: 2.891255, ppl: 18.015907 +epoch: 1, batch: 11468, sum loss: 5381.426758, avg loss: 3.018187, ppl: 20.454170 +epoch: 1, batch: 11469, sum loss: 4860.367676, avg loss: 2.664675, ppl: 14.363284 +epoch: 1, batch: 11470, sum loss: 4509.284180, avg loss: 2.853977, ppl: 17.356680 +epoch: 1, batch: 11471, sum loss: 4437.736328, avg loss: 2.761504, ppl: 15.823619 +epoch: 1, batch: 11472, sum loss: 4461.177246, avg loss: 2.641313, ppl: 14.031612 +epoch: 1, batch: 11473, sum loss: 5865.994629, avg loss: 2.971628, ppl: 19.523682 +epoch: 1, batch: 11474, sum loss: 5562.689941, avg loss: 3.102448, ppl: 22.252369 +epoch: 1, batch: 11475, sum loss: 4940.840332, avg loss: 2.849389, ppl: 17.277220 +epoch: 1, batch: 11476, sum loss: 4386.764160, avg loss: 2.671598, ppl: 14.463065 +epoch: 1, batch: 11477, sum loss: 6063.779785, avg loss: 3.006336, ppl: 20.213207 +epoch: 1, batch: 11478, sum loss: 5170.456055, avg loss: 2.680382, ppl: 14.590659 +epoch: 1, batch: 11479, sum loss: 5328.440918, avg loss: 2.971802, ppl: 19.527075 +epoch: 1, batch: 11480, sum loss: 5032.621582, avg loss: 2.768219, ppl: 15.930233 +epoch: 1, batch: 11481, sum loss: 5127.368164, avg loss: 2.826554, ppl: 16.887159 +epoch: 1, batch: 11482, sum loss: 4830.115723, avg loss: 3.066740, ppl: 21.471790 +epoch: 1, batch: 11483, sum loss: 5126.145020, avg loss: 2.876625, ppl: 17.754248 +epoch: 1, batch: 11484, sum loss: 4351.551758, avg loss: 2.682831, ppl: 14.626439 +epoch: 1, batch: 11485, sum loss: 5046.103516, avg loss: 2.748422, ppl: 15.617973 +epoch: 1, batch: 11486, sum loss: 4525.307129, avg loss: 2.910166, ppl: 18.359838 +epoch: 1, batch: 11487, sum loss: 4743.228027, avg loss: 2.826715, ppl: 16.889891 +epoch: 1, batch: 11488, sum loss: 4917.813965, avg loss: 2.803771, ppl: 16.506773 +epoch: 1, batch: 11489, sum loss: 4890.195312, avg loss: 2.868150, ppl: 17.604416 +epoch: 1, batch: 11490, sum loss: 5220.899414, avg loss: 2.831290, ppl: 16.967342 +epoch: 1, batch: 11491, sum loss: 4762.174805, avg loss: 2.732171, ppl: 15.366216 +epoch: 1, batch: 11492, sum loss: 4496.913086, avg loss: 2.715527, ppl: 15.112576 +epoch: 1, batch: 11493, sum loss: 4356.941406, avg loss: 2.740215, ppl: 15.490309 +epoch: 1, batch: 11494, sum loss: 4732.101074, avg loss: 3.092877, ppl: 22.040390 +epoch: 1, batch: 11495, sum loss: 4628.851562, avg loss: 2.606335, ppl: 13.549307 +epoch: 1, batch: 11496, sum loss: 4637.823242, avg loss: 2.785480, ppl: 16.207588 +epoch: 1, batch: 11497, sum loss: 4165.448242, avg loss: 2.634692, ppl: 13.939018 +epoch: 1, batch: 11498, sum loss: 4632.111328, avg loss: 2.677521, ppl: 14.548982 +epoch: 1, batch: 11499, sum loss: 4712.362793, avg loss: 2.685107, ppl: 14.659770 +epoch: 1, batch: 11500, sum loss: 5636.868164, avg loss: 2.877421, ppl: 17.768396 +epoch: 1, batch: 11501, sum loss: 3729.800293, avg loss: 2.599164, ppl: 13.452487 +epoch: 1, batch: 11502, sum loss: 4748.217285, avg loss: 2.826320, ppl: 16.883215 +epoch: 1, batch: 11503, sum loss: 4220.652832, avg loss: 2.584600, ppl: 13.257991 +epoch: 1, batch: 11504, sum loss: 3856.576660, avg loss: 2.459551, ppl: 11.699561 +epoch: 1, batch: 11505, sum loss: 3804.129883, avg loss: 2.525983, ppl: 12.503175 +epoch: 1, batch: 11506, sum loss: 3365.745605, avg loss: 2.291181, ppl: 9.886610 +epoch: 1, batch: 11507, sum loss: 4568.502441, avg loss: 2.787372, ppl: 16.238283 +epoch: 1, batch: 11508, sum loss: 4828.671387, avg loss: 2.773504, ppl: 16.014660 +epoch: 1, batch: 11509, sum loss: 5310.159668, avg loss: 2.917670, ppl: 18.498140 +epoch: 1, batch: 11510, sum loss: 4241.143555, avg loss: 2.606726, ppl: 13.554603 +epoch: 1, batch: 11511, sum loss: 4499.654785, avg loss: 2.679961, ppl: 14.584527 +epoch: 1, batch: 11512, sum loss: 4715.346680, avg loss: 2.661031, ppl: 14.311036 +epoch: 1, batch: 11513, sum loss: 4766.740234, avg loss: 2.815558, ppl: 16.702501 +epoch: 1, batch: 11514, sum loss: 4882.782715, avg loss: 2.860447, ppl: 17.469334 +epoch: 1, batch: 11515, sum loss: 4472.177734, avg loss: 2.716998, ppl: 15.134813 +epoch: 1, batch: 11516, sum loss: 4334.419434, avg loss: 2.752012, ppl: 15.674141 +epoch: 1, batch: 11517, sum loss: 5070.671875, avg loss: 2.801476, ppl: 16.468941 +epoch: 1, batch: 11518, sum loss: 4740.083984, avg loss: 2.791569, ppl: 16.306585 +epoch: 1, batch: 11519, sum loss: 5391.641602, avg loss: 2.739655, ppl: 15.481650 +epoch: 1, batch: 11520, sum loss: 4740.936035, avg loss: 2.919296, ppl: 18.528231 +epoch: 1, batch: 11521, sum loss: 6203.059570, avg loss: 2.938446, ppl: 18.886475 +epoch: 1, batch: 11522, sum loss: 5018.589844, avg loss: 2.854715, ppl: 17.369492 +epoch: 1, batch: 11523, sum loss: 4658.542480, avg loss: 2.821649, ppl: 16.804541 +epoch: 1, batch: 11524, sum loss: 4763.795898, avg loss: 2.828858, ppl: 16.926115 +epoch: 1, batch: 11525, sum loss: 4142.176758, avg loss: 2.721535, ppl: 15.203649 +epoch: 1, batch: 11526, sum loss: 5503.109863, avg loss: 3.220076, ppl: 25.030024 +epoch: 1, batch: 11527, sum loss: 5064.124023, avg loss: 2.958016, ppl: 19.259726 +epoch: 1, batch: 11528, sum loss: 3878.364258, avg loss: 2.484538, ppl: 11.995581 +epoch: 1, batch: 11529, sum loss: 4187.942871, avg loss: 2.696679, ppl: 14.830403 +epoch: 1, batch: 11530, sum loss: 4740.695801, avg loss: 2.842144, ppl: 17.152496 +epoch: 1, batch: 11531, sum loss: 4582.292969, avg loss: 2.855011, ppl: 17.374628 +epoch: 1, batch: 11532, sum loss: 3765.219971, avg loss: 2.584228, ppl: 13.253051 +epoch: 1, batch: 11533, sum loss: 4392.975098, avg loss: 2.604016, ppl: 13.517918 +epoch: 1, batch: 11534, sum loss: 4451.638672, avg loss: 2.630992, ppl: 13.887542 +epoch: 1, batch: 11535, sum loss: 3315.576172, avg loss: 2.385307, ppl: 10.862392 +epoch: 1, batch: 11536, sum loss: 4579.763184, avg loss: 2.848111, ppl: 17.255163 +epoch: 1, batch: 11537, sum loss: 4597.224121, avg loss: 2.915171, ppl: 18.451962 +epoch: 1, batch: 11538, sum loss: 5093.410156, avg loss: 2.930616, ppl: 18.739164 +epoch: 1, batch: 11539, sum loss: 5314.923828, avg loss: 2.779772, ppl: 16.115343 +epoch: 1, batch: 11540, sum loss: 5957.888672, avg loss: 3.289834, ppl: 26.838402 +epoch: 1, batch: 11541, sum loss: 4493.686523, avg loss: 2.676406, ppl: 14.532774 +epoch: 1, batch: 11542, sum loss: 3874.642090, avg loss: 2.616234, ppl: 13.684090 +epoch: 1, batch: 11543, sum loss: 5288.667969, avg loss: 2.776204, ppl: 16.057943 +epoch: 1, batch: 11544, sum loss: 4154.886230, avg loss: 2.308270, ppl: 10.057014 +epoch: 1, batch: 11545, sum loss: 5046.392578, avg loss: 2.742604, ppl: 15.527373 +epoch: 1, batch: 11546, sum loss: 4740.696777, avg loss: 2.831958, ppl: 16.978666 +epoch: 1, batch: 11547, sum loss: 4977.621582, avg loss: 2.785463, ppl: 16.207314 +epoch: 1, batch: 11548, sum loss: 5513.814941, avg loss: 2.993385, ppl: 19.953112 +epoch: 1, batch: 11549, sum loss: 4395.032227, avg loss: 2.646016, ppl: 14.097759 +epoch: 1, batch: 11550, sum loss: 4100.090820, avg loss: 2.887388, ppl: 17.946373 +epoch: 1, batch: 11551, sum loss: 5467.698242, avg loss: 2.733849, ppl: 15.392021 +epoch: 1, batch: 11552, sum loss: 4765.776855, avg loss: 2.751603, ppl: 15.667733 +epoch: 1, batch: 11553, sum loss: 4306.949707, avg loss: 2.594548, ppl: 13.390533 +epoch: 1, batch: 11554, sum loss: 4844.986328, avg loss: 2.873657, ppl: 17.701643 +epoch: 1, batch: 11555, sum loss: 3813.173584, avg loss: 2.466477, ppl: 11.780871 +epoch: 1, batch: 11556, sum loss: 4159.947754, avg loss: 2.847329, ppl: 17.241671 +epoch: 1, batch: 11557, sum loss: 4552.851074, avg loss: 2.671861, ppl: 14.466866 +epoch: 1, batch: 11558, sum loss: 3802.901855, avg loss: 2.557432, ppl: 12.902643 +epoch: 1, batch: 11559, sum loss: 3280.312500, avg loss: 2.370168, ppl: 10.699189 +epoch: 1, batch: 11560, sum loss: 5195.169922, avg loss: 2.694590, ppl: 14.799455 +epoch: 1, batch: 11561, sum loss: 4206.781738, avg loss: 2.708810, ppl: 15.011403 +epoch: 1, batch: 11562, sum loss: 4106.514160, avg loss: 2.960717, ppl: 19.311808 +epoch: 1, batch: 11563, sum loss: 4806.169922, avg loss: 2.766937, ppl: 15.909832 +epoch: 1, batch: 11564, sum loss: 4104.756348, avg loss: 2.651651, ppl: 14.177432 +epoch: 1, batch: 11565, sum loss: 4941.588379, avg loss: 2.868014, ppl: 17.602028 +epoch: 1, batch: 11566, sum loss: 4523.276855, avg loss: 2.828816, ppl: 16.925407 +epoch: 1, batch: 11567, sum loss: 5265.680176, avg loss: 2.996972, ppl: 20.024811 +epoch: 1, batch: 11568, sum loss: 4224.190918, avg loss: 2.385201, ppl: 10.861245 +epoch: 1, batch: 11569, sum loss: 5421.284668, avg loss: 2.733880, ppl: 15.392499 +epoch: 1, batch: 11570, sum loss: 5610.819336, avg loss: 2.917743, ppl: 18.499481 +epoch: 1, batch: 11571, sum loss: 4856.055664, avg loss: 2.782840, ppl: 16.164860 +epoch: 1, batch: 11572, sum loss: 5029.727539, avg loss: 2.970896, ppl: 19.509401 +epoch: 1, batch: 11573, sum loss: 4619.847656, avg loss: 2.633893, ppl: 13.927879 +epoch: 1, batch: 11574, sum loss: 4091.415039, avg loss: 2.781384, ppl: 16.141336 +epoch: 1, batch: 11575, sum loss: 3763.100830, avg loss: 2.423117, ppl: 11.280970 +epoch: 1, batch: 11576, sum loss: 5176.162109, avg loss: 3.082884, ppl: 21.821241 +epoch: 1, batch: 11577, sum loss: 4771.349609, avg loss: 2.739007, ppl: 15.471606 +epoch: 1, batch: 11578, sum loss: 4208.928223, avg loss: 2.756338, ppl: 15.742091 +epoch: 1, batch: 11579, sum loss: 3626.871826, avg loss: 2.344455, ppl: 10.427588 +epoch: 1, batch: 11580, sum loss: 4208.344727, avg loss: 2.538206, ppl: 12.656940 +epoch: 1, batch: 11581, sum loss: 4890.430664, avg loss: 2.698913, ppl: 14.863567 +epoch: 1, batch: 11582, sum loss: 4521.637207, avg loss: 2.789412, ppl: 16.271450 +epoch: 1, batch: 11583, sum loss: 4919.583496, avg loss: 2.710514, ppl: 15.037007 +epoch: 1, batch: 11584, sum loss: 4211.950195, avg loss: 2.448808, ppl: 11.574544 +epoch: 1, batch: 11585, sum loss: 4077.353516, avg loss: 2.475624, ppl: 11.889128 +epoch: 1, batch: 11586, sum loss: 5225.125000, avg loss: 2.651002, ppl: 14.168227 +epoch: 1, batch: 11587, sum loss: 5383.690430, avg loss: 2.905391, ppl: 18.272394 +epoch: 1, batch: 11588, sum loss: 3725.047363, avg loss: 2.537498, ppl: 12.647989 +epoch: 1, batch: 11589, sum loss: 4759.522949, avg loss: 2.856856, ppl: 17.406719 +epoch: 1, batch: 11590, sum loss: 5333.041016, avg loss: 2.899968, ppl: 18.173567 +epoch: 1, batch: 11591, sum loss: 4676.474121, avg loss: 2.726807, ppl: 15.284005 +epoch: 1, batch: 11592, sum loss: 4609.720703, avg loss: 2.667662, ppl: 14.406254 +epoch: 1, batch: 11593, sum loss: 4345.266113, avg loss: 2.569643, ppl: 13.061161 +epoch: 1, batch: 11594, sum loss: 5573.274414, avg loss: 2.993166, ppl: 19.948734 +epoch: 1, batch: 11595, sum loss: 5066.210938, avg loss: 2.817692, ppl: 16.738178 +epoch: 1, batch: 11596, sum loss: 4888.859375, avg loss: 2.728158, ppl: 15.304673 +epoch: 1, batch: 11597, sum loss: 4142.968750, avg loss: 2.747327, ppl: 15.600869 +epoch: 1, batch: 11598, sum loss: 4259.525879, avg loss: 2.547564, ppl: 12.775938 +epoch: 1, batch: 11599, sum loss: 5745.299316, avg loss: 2.919360, ppl: 18.529419 +epoch: 1, batch: 11600, sum loss: 4398.519043, avg loss: 2.605758, ppl: 13.541482 +epoch: 1, batch: 11601, sum loss: 4644.003418, avg loss: 2.795908, ppl: 16.377497 +epoch: 1, batch: 11602, sum loss: 5340.485840, avg loss: 2.888310, ppl: 17.962931 +epoch: 1, batch: 11603, sum loss: 4998.212891, avg loss: 2.728282, ppl: 15.306571 +epoch: 1, batch: 11604, sum loss: 4641.165527, avg loss: 2.675023, ppl: 14.512689 +epoch: 1, batch: 11605, sum loss: 3899.632080, avg loss: 2.667327, ppl: 14.401425 +epoch: 1, batch: 11606, sum loss: 4344.956055, avg loss: 2.788804, ppl: 16.261557 +epoch: 1, batch: 11607, sum loss: 3955.953125, avg loss: 2.607748, ppl: 13.568454 +epoch: 1, batch: 11608, sum loss: 3242.421387, avg loss: 2.447110, ppl: 11.554910 +epoch: 1, batch: 11609, sum loss: 4304.694336, avg loss: 2.448632, ppl: 11.572499 +epoch: 1, batch: 11610, sum loss: 5083.095215, avg loss: 2.774615, ppl: 16.032457 +epoch: 1, batch: 11611, sum loss: 3410.204102, avg loss: 2.371491, ppl: 10.713353 +epoch: 1, batch: 11612, sum loss: 4587.173828, avg loss: 2.786861, ppl: 16.230001 +epoch: 1, batch: 11613, sum loss: 3672.620117, avg loss: 2.435424, ppl: 11.420664 +epoch: 1, batch: 11614, sum loss: 3759.129883, avg loss: 2.841368, ppl: 17.139194 +epoch: 1, batch: 11615, sum loss: 5510.124512, avg loss: 3.020902, ppl: 20.509777 +epoch: 1, batch: 11616, sum loss: 4319.827637, avg loss: 2.602306, ppl: 13.494820 +epoch: 1, batch: 11617, sum loss: 5518.640625, avg loss: 2.703891, ppl: 14.937735 +epoch: 1, batch: 11618, sum loss: 5315.568359, avg loss: 2.896768, ppl: 18.115494 +epoch: 1, batch: 11619, sum loss: 5094.945312, avg loss: 2.903103, ppl: 18.230621 +epoch: 1, batch: 11620, sum loss: 5109.884277, avg loss: 2.906646, ppl: 18.295338 +epoch: 1, batch: 11621, sum loss: 4736.389648, avg loss: 2.870539, ppl: 17.646530 +epoch: 1, batch: 11622, sum loss: 4390.577148, avg loss: 2.810869, ppl: 16.624357 +epoch: 1, batch: 11623, sum loss: 4817.387207, avg loss: 2.964546, ppl: 19.385899 +epoch: 1, batch: 11624, sum loss: 5582.785156, avg loss: 2.823867, ppl: 16.841854 +epoch: 1, batch: 11625, sum loss: 3674.153076, avg loss: 2.396708, ppl: 10.986945 +epoch: 1, batch: 11626, sum loss: 4101.649414, avg loss: 2.566739, ppl: 13.023291 +epoch: 1, batch: 11627, sum loss: 4201.248047, avg loss: 2.579035, ppl: 13.184410 +epoch: 1, batch: 11628, sum loss: 3967.929199, avg loss: 2.600216, ppl: 13.466645 +epoch: 1, batch: 11629, sum loss: 5010.825195, avg loss: 2.815070, ppl: 16.694351 +epoch: 1, batch: 11630, sum loss: 5347.611328, avg loss: 2.979171, ppl: 19.671497 +epoch: 1, batch: 11631, sum loss: 5770.370117, avg loss: 2.915801, ppl: 18.463596 +epoch: 1, batch: 11632, sum loss: 4967.991699, avg loss: 2.906958, ppl: 18.301048 +epoch: 1, batch: 11633, sum loss: 5189.265137, avg loss: 3.120424, ppl: 22.655985 +epoch: 1, batch: 11634, sum loss: 4153.673828, avg loss: 2.512809, ppl: 12.339546 +epoch: 1, batch: 11635, sum loss: 5182.875977, avg loss: 2.733585, ppl: 15.387949 +epoch: 1, batch: 11636, sum loss: 6005.360352, avg loss: 3.022325, ppl: 20.538990 +epoch: 1, batch: 11637, sum loss: 5401.364746, avg loss: 2.964525, ppl: 19.385498 +epoch: 1, batch: 11638, sum loss: 5195.315430, avg loss: 2.609400, ppl: 13.590895 +epoch: 1, batch: 11639, sum loss: 5103.881348, avg loss: 2.694763, ppl: 14.802013 +epoch: 1, batch: 11640, sum loss: 3844.084229, avg loss: 2.351122, ppl: 10.497340 +epoch: 1, batch: 11641, sum loss: 4113.927734, avg loss: 2.548902, ppl: 12.793050 +epoch: 1, batch: 11642, sum loss: 4715.830078, avg loss: 2.578365, ppl: 13.175583 +epoch: 1, batch: 11643, sum loss: 4340.864746, avg loss: 2.757855, ppl: 15.765995 +epoch: 1, batch: 11644, sum loss: 4327.439453, avg loss: 2.761608, ppl: 15.825268 +epoch: 1, batch: 11645, sum loss: 5269.958496, avg loss: 2.968991, ppl: 19.472254 +epoch: 1, batch: 11646, sum loss: 4478.563965, avg loss: 2.930997, ppl: 18.746319 +epoch: 1, batch: 11647, sum loss: 5566.875488, avg loss: 2.919180, ppl: 18.526083 +epoch: 1, batch: 11648, sum loss: 5277.889160, avg loss: 2.925659, ppl: 18.646513 +epoch: 1, batch: 11649, sum loss: 4613.207520, avg loss: 2.745957, ppl: 15.579515 +epoch: 1, batch: 11650, sum loss: 4512.555176, avg loss: 2.596407, ppl: 13.415449 +epoch: 1, batch: 11651, sum loss: 4016.575684, avg loss: 2.534117, ppl: 12.605298 +epoch: 1, batch: 11652, sum loss: 5489.937988, avg loss: 3.225580, ppl: 25.168179 +epoch: 1, batch: 11653, sum loss: 5266.179199, avg loss: 2.975243, ppl: 19.594376 +epoch: 1, batch: 11654, sum loss: 4593.850098, avg loss: 2.622061, ppl: 13.764059 +epoch: 1, batch: 11655, sum loss: 5260.003418, avg loss: 3.014328, ppl: 20.375404 +epoch: 1, batch: 11656, sum loss: 3737.842285, avg loss: 2.328874, ppl: 10.266372 +epoch: 1, batch: 11657, sum loss: 4141.037109, avg loss: 2.564110, ppl: 12.989090 +epoch: 1, batch: 11658, sum loss: 4760.105957, avg loss: 2.603997, ppl: 13.517657 +epoch: 1, batch: 11659, sum loss: 4428.370117, avg loss: 2.903849, ppl: 18.244240 +epoch: 1, batch: 11660, sum loss: 4991.955078, avg loss: 2.957319, ppl: 19.246313 +epoch: 1, batch: 11661, sum loss: 4468.826660, avg loss: 2.675944, ppl: 14.526057 +epoch: 1, batch: 11662, sum loss: 4858.157715, avg loss: 2.714055, ppl: 15.090337 +epoch: 1, batch: 11663, sum loss: 5322.501953, avg loss: 2.869273, ppl: 17.624205 +epoch: 1, batch: 11664, sum loss: 5248.238770, avg loss: 2.782735, ppl: 16.163172 +epoch: 1, batch: 11665, sum loss: 4738.774414, avg loss: 2.748709, ppl: 15.622454 +epoch: 1, batch: 11666, sum loss: 5504.469238, avg loss: 2.794147, ppl: 16.348673 +epoch: 1, batch: 11667, sum loss: 5307.541504, avg loss: 2.786111, ppl: 16.217827 +epoch: 1, batch: 11668, sum loss: 5949.065430, avg loss: 3.000033, ppl: 20.086197 +epoch: 1, batch: 11669, sum loss: 5194.780762, avg loss: 3.013214, ppl: 20.352711 +epoch: 1, batch: 11670, sum loss: 4529.480469, avg loss: 2.634951, ppl: 13.942628 +epoch: 1, batch: 11671, sum loss: 5226.061035, avg loss: 2.840250, ppl: 17.120054 +epoch: 1, batch: 11672, sum loss: 4790.570801, avg loss: 2.626410, ppl: 13.824046 +epoch: 1, batch: 11673, sum loss: 4571.725098, avg loss: 2.554036, ppl: 12.858903 +epoch: 1, batch: 11674, sum loss: 3682.601074, avg loss: 2.397527, ppl: 10.995947 +epoch: 1, batch: 11675, sum loss: 4223.641113, avg loss: 2.631552, ppl: 13.895318 +epoch: 1, batch: 11676, sum loss: 3927.095215, avg loss: 2.496564, ppl: 12.140706 +epoch: 1, batch: 11677, sum loss: 3953.925293, avg loss: 2.664370, ppl: 14.358901 +epoch: 1, batch: 11678, sum loss: 5594.973633, avg loss: 2.889966, ppl: 17.992689 +epoch: 1, batch: 11679, sum loss: 4428.729004, avg loss: 2.759333, ppl: 15.789303 +epoch: 1, batch: 11680, sum loss: 5486.958496, avg loss: 2.835637, ppl: 17.041258 +epoch: 1, batch: 11681, sum loss: 4979.657227, avg loss: 2.880080, ppl: 17.815689 +epoch: 1, batch: 11682, sum loss: 4398.194336, avg loss: 2.757489, ppl: 15.760215 +epoch: 1, batch: 11683, sum loss: 4451.497070, avg loss: 2.785668, ppl: 16.210642 +epoch: 1, batch: 11684, sum loss: 4376.275879, avg loss: 2.780353, ppl: 16.124714 +epoch: 1, batch: 11685, sum loss: 4067.245117, avg loss: 2.603870, ppl: 13.515945 +epoch: 1, batch: 11686, sum loss: 4569.933105, avg loss: 2.793358, ppl: 16.335781 +epoch: 1, batch: 11687, sum loss: 4310.685547, avg loss: 2.707717, ppl: 14.995002 +epoch: 1, batch: 11688, sum loss: 5724.440918, avg loss: 3.114494, ppl: 22.522039 +epoch: 1, batch: 11689, sum loss: 4869.581543, avg loss: 2.581963, ppl: 13.223067 +epoch: 1, batch: 11690, sum loss: 5279.312500, avg loss: 2.762592, ppl: 15.840843 +epoch: 1, batch: 11691, sum loss: 4307.128418, avg loss: 2.577575, ppl: 13.165180 +epoch: 1, batch: 11692, sum loss: 5033.799805, avg loss: 2.779569, ppl: 16.112082 +epoch: 1, batch: 11693, sum loss: 3416.872559, avg loss: 2.287063, ppl: 9.845981 +epoch: 1, batch: 11694, sum loss: 4004.678223, avg loss: 2.552376, ppl: 12.837573 +epoch: 1, batch: 11695, sum loss: 3851.462891, avg loss: 2.583141, ppl: 13.238653 +epoch: 1, batch: 11696, sum loss: 4493.749023, avg loss: 2.550369, ppl: 12.811825 +epoch: 1, batch: 11697, sum loss: 5159.339844, avg loss: 2.861531, ppl: 17.488277 +epoch: 1, batch: 11698, sum loss: 5199.413574, avg loss: 3.154984, ppl: 23.452662 +epoch: 1, batch: 11699, sum loss: 4555.142578, avg loss: 2.912495, ppl: 18.402658 +epoch: 1, batch: 11700, sum loss: 4800.750977, avg loss: 2.586612, ppl: 13.284680 +epoch: 1, batch: 11701, sum loss: 4212.228516, avg loss: 2.411121, ppl: 11.146448 +epoch: 1, batch: 11702, sum loss: 3965.329102, avg loss: 2.619108, ppl: 13.723473 +epoch: 1, batch: 11703, sum loss: 4978.412109, avg loss: 3.019049, ppl: 20.471817 +epoch: 1, batch: 11704, sum loss: 5292.539551, avg loss: 2.821183, ppl: 16.796713 +epoch: 1, batch: 11705, sum loss: 4649.637695, avg loss: 2.929829, ppl: 18.724422 +epoch: 1, batch: 11706, sum loss: 5346.468262, avg loss: 2.985186, ppl: 19.790180 +epoch: 1, batch: 11707, sum loss: 4990.911133, avg loss: 2.855212, ppl: 17.378128 +epoch: 1, batch: 11708, sum loss: 3943.132812, avg loss: 2.262268, ppl: 9.604847 +epoch: 1, batch: 11709, sum loss: 4428.137695, avg loss: 2.829481, ppl: 16.936666 +epoch: 1, batch: 11710, sum loss: 4554.943848, avg loss: 2.607295, ppl: 13.562309 +epoch: 1, batch: 11711, sum loss: 4287.842285, avg loss: 2.586154, ppl: 13.278597 +epoch: 1, batch: 11712, sum loss: 5891.817871, avg loss: 2.937098, ppl: 18.861029 +epoch: 1, batch: 11713, sum loss: 5199.865234, avg loss: 2.876032, ppl: 17.743719 +epoch: 1, batch: 11714, sum loss: 5266.635742, avg loss: 2.630687, ppl: 13.883305 +epoch: 1, batch: 11715, sum loss: 5435.532227, avg loss: 2.980007, ppl: 19.687948 +epoch: 1, batch: 11716, sum loss: 5198.649902, avg loss: 2.986014, ppl: 19.806568 +epoch: 1, batch: 11717, sum loss: 4285.651367, avg loss: 2.685245, ppl: 14.661798 +epoch: 1, batch: 11718, sum loss: 3922.246826, avg loss: 2.490315, ppl: 12.065082 +epoch: 1, batch: 11719, sum loss: 4762.173828, avg loss: 2.735310, ppl: 15.414515 +epoch: 1, batch: 11720, sum loss: 4557.588867, avg loss: 2.767206, ppl: 15.914111 +epoch: 1, batch: 11721, sum loss: 3930.818604, avg loss: 2.424934, ppl: 11.301488 +epoch: 1, batch: 11722, sum loss: 5316.427734, avg loss: 2.830899, ppl: 16.960693 +epoch: 1, batch: 11723, sum loss: 5105.933594, avg loss: 3.046500, ppl: 21.041569 +epoch: 1, batch: 11724, sum loss: 4881.218262, avg loss: 2.635647, ppl: 13.952334 +epoch: 1, batch: 11725, sum loss: 4837.785156, avg loss: 2.783536, ppl: 16.176115 +epoch: 1, batch: 11726, sum loss: 3753.502197, avg loss: 2.551667, ppl: 12.828471 +epoch: 1, batch: 11727, sum loss: 5620.255859, avg loss: 3.224473, ppl: 25.140310 +epoch: 1, batch: 11728, sum loss: 4121.825684, avg loss: 2.433191, ppl: 11.395186 +epoch: 1, batch: 11729, sum loss: 5362.255859, avg loss: 2.915854, ppl: 18.464579 +epoch: 1, batch: 11730, sum loss: 3809.095947, avg loss: 2.502691, ppl: 12.215324 +epoch: 1, batch: 11731, sum loss: 4222.321777, avg loss: 2.532887, ppl: 12.589794 +epoch: 1, batch: 11732, sum loss: 6097.047363, avg loss: 3.096520, ppl: 22.120831 +epoch: 1, batch: 11733, sum loss: 5427.886230, avg loss: 2.870379, ppl: 17.643700 +epoch: 1, batch: 11734, sum loss: 4584.378418, avg loss: 2.733678, ppl: 15.389387 +epoch: 1, batch: 11735, sum loss: 5097.491699, avg loss: 2.953356, ppl: 19.170172 +epoch: 1, batch: 11736, sum loss: 4558.558105, avg loss: 2.761089, ppl: 15.817063 +epoch: 1, batch: 11737, sum loss: 4911.280273, avg loss: 3.063806, ppl: 21.408876 +epoch: 1, batch: 11738, sum loss: 5416.870117, avg loss: 3.076019, ppl: 21.671961 +epoch: 1, batch: 11739, sum loss: 5054.177734, avg loss: 2.674168, ppl: 14.500282 +epoch: 1, batch: 11740, sum loss: 3574.262207, avg loss: 2.517086, ppl: 12.392436 +epoch: 1, batch: 11741, sum loss: 5369.690430, avg loss: 3.230861, ppl: 25.301424 +epoch: 1, batch: 11742, sum loss: 4601.819336, avg loss: 2.691122, ppl: 14.748219 +epoch: 1, batch: 11743, sum loss: 5420.116211, avg loss: 2.914041, ppl: 18.431129 +epoch: 1, batch: 11744, sum loss: 5095.223145, avg loss: 2.967515, ppl: 19.443542 +epoch: 1, batch: 11745, sum loss: 4460.170410, avg loss: 2.724600, ppl: 15.250318 +epoch: 1, batch: 11746, sum loss: 4126.073242, avg loss: 2.482595, ppl: 11.972295 +epoch: 1, batch: 11747, sum loss: 4429.544434, avg loss: 2.756406, ppl: 15.743161 +epoch: 1, batch: 11748, sum loss: 5945.167969, avg loss: 3.165691, ppl: 23.705122 +epoch: 1, batch: 11749, sum loss: 4535.758789, avg loss: 2.870733, ppl: 17.649960 +epoch: 1, batch: 11750, sum loss: 5508.562500, avg loss: 2.922314, ppl: 18.584249 +epoch: 1, batch: 11751, sum loss: 4252.377930, avg loss: 2.459444, ppl: 11.698300 +epoch: 1, batch: 11752, sum loss: 5656.581055, avg loss: 2.936958, ppl: 18.858393 +epoch: 1, batch: 11753, sum loss: 4838.466797, avg loss: 2.645417, ppl: 14.089315 +epoch: 1, batch: 11754, sum loss: 5282.029297, avg loss: 2.755362, ppl: 15.726737 +epoch: 1, batch: 11755, sum loss: 4668.975098, avg loss: 2.671039, ppl: 14.454975 +epoch: 1, batch: 11756, sum loss: 4772.609375, avg loss: 2.830729, ppl: 16.957823 +epoch: 1, batch: 11757, sum loss: 4997.902344, avg loss: 2.798378, ppl: 16.417992 +epoch: 1, batch: 11758, sum loss: 3783.630615, avg loss: 2.463301, ppl: 11.743515 +epoch: 1, batch: 11759, sum loss: 6106.435059, avg loss: 2.980203, ppl: 19.691807 +epoch: 1, batch: 11760, sum loss: 5811.723633, avg loss: 2.709428, ppl: 15.020686 +epoch: 1, batch: 11761, sum loss: 5433.308105, avg loss: 2.858132, ppl: 17.428932 +epoch: 1, batch: 11762, sum loss: 5383.283691, avg loss: 2.803794, ppl: 16.507151 +epoch: 1, batch: 11763, sum loss: 5122.374023, avg loss: 2.751007, ppl: 15.658385 +epoch: 1, batch: 11764, sum loss: 4327.557617, avg loss: 2.754652, ppl: 15.715568 +epoch: 1, batch: 11765, sum loss: 6339.992188, avg loss: 3.166829, ppl: 23.732113 +epoch: 1, batch: 11766, sum loss: 5548.079590, avg loss: 3.008720, ppl: 20.261448 +epoch: 1, batch: 11767, sum loss: 4634.849609, avg loss: 2.657597, ppl: 14.261977 +epoch: 1, batch: 11768, sum loss: 4748.685547, avg loss: 2.999801, ppl: 20.081543 +epoch: 1, batch: 11769, sum loss: 5010.119141, avg loss: 2.798949, ppl: 16.427376 +epoch: 1, batch: 11770, sum loss: 4551.826172, avg loss: 2.714267, ppl: 15.093546 +epoch: 1, batch: 11771, sum loss: 5227.145020, avg loss: 2.850133, ppl: 17.290089 +epoch: 1, batch: 11772, sum loss: 5125.226562, avg loss: 2.844188, ppl: 17.187595 +epoch: 1, batch: 11773, sum loss: 4408.278320, avg loss: 2.555524, ppl: 12.878044 +epoch: 1, batch: 11774, sum loss: 4497.576172, avg loss: 2.446995, ppl: 11.553571 +epoch: 1, batch: 11775, sum loss: 5049.195312, avg loss: 2.980635, ppl: 19.700315 +epoch: 1, batch: 11776, sum loss: 4408.219727, avg loss: 2.555490, ppl: 12.877605 +epoch: 1, batch: 11777, sum loss: 4080.262207, avg loss: 2.812035, ppl: 16.643747 +epoch: 1, batch: 11778, sum loss: 4878.030273, avg loss: 2.759067, ppl: 15.785110 +epoch: 1, batch: 11779, sum loss: 4950.819824, avg loss: 2.819374, ppl: 16.766346 +epoch: 1, batch: 11780, sum loss: 4866.095215, avg loss: 2.870853, ppl: 17.652063 +epoch: 1, batch: 11781, sum loss: 4008.136230, avg loss: 2.609464, ppl: 13.591760 +epoch: 1, batch: 11782, sum loss: 5010.176758, avg loss: 2.791185, ppl: 16.300323 +epoch: 1, batch: 11783, sum loss: 5050.865234, avg loss: 2.554813, ppl: 12.868892 +epoch: 1, batch: 11784, sum loss: 4254.289062, avg loss: 2.461973, ppl: 11.727927 +epoch: 1, batch: 11785, sum loss: 4437.031250, avg loss: 2.703858, ppl: 14.937251 +epoch: 1, batch: 11786, sum loss: 4315.260742, avg loss: 2.697038, ppl: 14.835722 +epoch: 1, batch: 11787, sum loss: 4455.947754, avg loss: 2.788453, ppl: 16.255850 +epoch: 1, batch: 11788, sum loss: 5634.614258, avg loss: 3.174430, ppl: 23.913195 +epoch: 1, batch: 11789, sum loss: 4178.337891, avg loss: 2.491555, ppl: 12.080046 +epoch: 1, batch: 11790, sum loss: 5105.928223, avg loss: 2.978955, ppl: 19.667244 +epoch: 1, batch: 11791, sum loss: 4196.913086, avg loss: 2.735928, ppl: 15.424044 +epoch: 1, batch: 11792, sum loss: 4616.506348, avg loss: 2.797883, ppl: 16.409863 +epoch: 1, batch: 11793, sum loss: 4537.708008, avg loss: 2.730270, ppl: 15.337023 +epoch: 1, batch: 11794, sum loss: 4075.369629, avg loss: 2.462459, ppl: 11.733630 +epoch: 1, batch: 11795, sum loss: 3696.930176, avg loss: 2.497926, ppl: 12.157250 +epoch: 1, batch: 11796, sum loss: 4715.688965, avg loss: 2.782117, ppl: 16.153187 +epoch: 1, batch: 11797, sum loss: 4381.361328, avg loss: 2.648949, ppl: 14.139170 +epoch: 1, batch: 11798, sum loss: 5668.034668, avg loss: 3.150658, ppl: 23.351433 +epoch: 1, batch: 11799, sum loss: 4324.023926, avg loss: 2.576891, ppl: 13.156178 +epoch: 1, batch: 11800, sum loss: 3648.039795, avg loss: 2.587262, ppl: 13.293330 +epoch: 1, batch: 11801, sum loss: 3821.617676, avg loss: 2.546048, ppl: 12.756589 +epoch: 1, batch: 11802, sum loss: 4073.820557, avg loss: 2.510056, ppl: 12.305616 +epoch: 1, batch: 11803, sum loss: 4560.728516, avg loss: 2.932945, ppl: 18.782856 +epoch: 1, batch: 11804, sum loss: 4736.051270, avg loss: 2.631140, ppl: 13.889591 +epoch: 1, batch: 11805, sum loss: 4877.153809, avg loss: 2.723145, ppl: 15.228147 +epoch: 1, batch: 11806, sum loss: 3914.496582, avg loss: 2.555155, ppl: 12.873289 +epoch: 1, batch: 11807, sum loss: 4547.051270, avg loss: 2.625318, ppl: 13.808969 +epoch: 1, batch: 11808, sum loss: 5168.783203, avg loss: 2.849385, ppl: 17.277145 +epoch: 1, batch: 11809, sum loss: 5943.015625, avg loss: 2.899032, ppl: 18.156563 +epoch: 1, batch: 11810, sum loss: 3937.628906, avg loss: 2.457946, ppl: 11.680790 +epoch: 1, batch: 11811, sum loss: 4760.377441, avg loss: 2.785475, ppl: 16.207523 +epoch: 1, batch: 11812, sum loss: 3953.347656, avg loss: 2.548902, ppl: 12.793053 +epoch: 1, batch: 11813, sum loss: 5281.408203, avg loss: 2.911471, ppl: 18.383820 +epoch: 1, batch: 11814, sum loss: 4677.023438, avg loss: 2.937829, ppl: 18.874821 +epoch: 1, batch: 11815, sum loss: 6192.250488, avg loss: 3.208420, ppl: 24.739967 +epoch: 1, batch: 11816, sum loss: 4363.375488, avg loss: 2.768639, ppl: 15.936934 +epoch: 1, batch: 11817, sum loss: 3986.482666, avg loss: 2.539161, ppl: 12.669037 +epoch: 1, batch: 11818, sum loss: 4226.750000, avg loss: 2.648340, ppl: 14.130555 +epoch: 1, batch: 11819, sum loss: 5360.066406, avg loss: 2.981127, ppl: 19.710012 +epoch: 1, batch: 11820, sum loss: 5331.244629, avg loss: 2.932478, ppl: 18.774090 +epoch: 1, batch: 11821, sum loss: 3616.835693, avg loss: 2.518688, ppl: 12.412297 +epoch: 1, batch: 11822, sum loss: 5280.619141, avg loss: 2.998648, ppl: 20.058397 +epoch: 1, batch: 11823, sum loss: 4916.869629, avg loss: 2.654897, ppl: 14.223524 +epoch: 1, batch: 11824, sum loss: 4493.249023, avg loss: 2.552982, ppl: 12.845356 +epoch: 1, batch: 11825, sum loss: 4917.152832, avg loss: 2.983709, ppl: 19.760981 +epoch: 1, batch: 11826, sum loss: 5343.222168, avg loss: 2.668942, ppl: 14.424700 +epoch: 1, batch: 11827, sum loss: 5169.276855, avg loss: 2.732176, ppl: 15.366285 +epoch: 1, batch: 11828, sum loss: 4995.120605, avg loss: 2.849470, ppl: 17.278620 +epoch: 1, batch: 11829, sum loss: 4074.560303, avg loss: 2.569080, ppl: 13.053804 +epoch: 1, batch: 11830, sum loss: 4360.210449, avg loss: 2.634568, ppl: 13.937297 +epoch: 1, batch: 11831, sum loss: 4233.423340, avg loss: 2.647544, ppl: 14.119321 +epoch: 1, batch: 11832, sum loss: 4987.042969, avg loss: 2.872721, ppl: 17.685068 +epoch: 1, batch: 11833, sum loss: 4521.334473, avg loss: 2.777232, ppl: 16.074472 +epoch: 1, batch: 11834, sum loss: 4812.886719, avg loss: 2.647352, ppl: 14.116614 +epoch: 1, batch: 11835, sum loss: 4668.334473, avg loss: 2.858747, ppl: 17.439665 +epoch: 1, batch: 11836, sum loss: 4354.491211, avg loss: 2.620031, ppl: 13.736148 +epoch: 1, batch: 11837, sum loss: 5446.111328, avg loss: 2.854356, ppl: 17.363253 +epoch: 1, batch: 11838, sum loss: 4369.220215, avg loss: 2.617867, ppl: 13.706460 +epoch: 1, batch: 11839, sum loss: 4222.397461, avg loss: 2.653927, ppl: 14.209728 +epoch: 1, batch: 11840, sum loss: 5589.964844, avg loss: 2.932825, ppl: 18.780613 +epoch: 1, batch: 11841, sum loss: 5370.836914, avg loss: 2.951010, ppl: 19.125252 +epoch: 1, batch: 11842, sum loss: 5150.717285, avg loss: 2.793231, ppl: 16.333706 +epoch: 1, batch: 11843, sum loss: 4054.447754, avg loss: 2.692196, ppl: 14.764068 +epoch: 1, batch: 11844, sum loss: 5211.171387, avg loss: 2.771900, ppl: 15.988979 +epoch: 1, batch: 11845, sum loss: 5015.948730, avg loss: 2.685197, ppl: 14.661095 +epoch: 1, batch: 11846, sum loss: 5089.240723, avg loss: 2.913131, ppl: 18.414373 +epoch: 1, batch: 11847, sum loss: 4161.280273, avg loss: 2.538914, ppl: 12.665908 +epoch: 1, batch: 11848, sum loss: 4556.663086, avg loss: 2.629350, ppl: 13.864751 +epoch: 1, batch: 11849, sum loss: 4964.960449, avg loss: 2.871579, ppl: 17.664892 +epoch: 1, batch: 11850, sum loss: 5343.640625, avg loss: 3.058753, ppl: 21.300968 +epoch: 1, batch: 11851, sum loss: 5387.897461, avg loss: 2.867428, ppl: 17.591719 +epoch: 1, batch: 11852, sum loss: 3386.127686, avg loss: 2.911546, ppl: 18.385195 +epoch: 1, batch: 11853, sum loss: 4370.148926, avg loss: 2.689322, ppl: 14.721698 +epoch: 1, batch: 11854, sum loss: 4198.223633, avg loss: 2.439409, ppl: 11.466265 +epoch: 1, batch: 11855, sum loss: 5537.139160, avg loss: 3.002787, ppl: 20.141596 +epoch: 1, batch: 11856, sum loss: 4814.745605, avg loss: 2.712533, ppl: 15.067386 +epoch: 1, batch: 11857, sum loss: 4462.745605, avg loss: 2.547229, ppl: 12.771668 +epoch: 1, batch: 11858, sum loss: 4821.887207, avg loss: 3.040282, ppl: 20.911140 +epoch: 1, batch: 11859, sum loss: 3926.871094, avg loss: 2.700737, ppl: 14.890698 +epoch: 1, batch: 11860, sum loss: 4460.390137, avg loss: 2.609942, ppl: 13.598258 +epoch: 1, batch: 11861, sum loss: 4476.660645, avg loss: 2.695160, ppl: 14.807886 +epoch: 1, batch: 11862, sum loss: 4546.860352, avg loss: 2.516248, ppl: 12.382051 +epoch: 1, batch: 11863, sum loss: 4679.135742, avg loss: 2.874162, ppl: 17.710575 +epoch: 1, batch: 11864, sum loss: 4067.375244, avg loss: 2.571033, ppl: 13.079334 +epoch: 1, batch: 11865, sum loss: 4675.977051, avg loss: 2.861675, ppl: 17.490801 +epoch: 1, batch: 11866, sum loss: 3606.314941, avg loss: 2.536086, ppl: 12.630144 +epoch: 1, batch: 11867, sum loss: 5408.306641, avg loss: 2.881357, ppl: 17.838459 +epoch: 1, batch: 11868, sum loss: 5499.451172, avg loss: 2.900555, ppl: 18.184229 +epoch: 1, batch: 11869, sum loss: 4429.408691, avg loss: 2.775319, ppl: 16.043737 +epoch: 1, batch: 11870, sum loss: 4410.831543, avg loss: 2.711021, ppl: 15.044631 +epoch: 1, batch: 11871, sum loss: 4239.405273, avg loss: 2.666293, ppl: 14.386535 +epoch: 1, batch: 11872, sum loss: 4987.200684, avg loss: 2.706023, ppl: 14.969626 +epoch: 1, batch: 11873, sum loss: 4665.617676, avg loss: 2.692220, ppl: 14.764420 +epoch: 1, batch: 11874, sum loss: 5077.682617, avg loss: 2.734347, ppl: 15.399686 +epoch: 1, batch: 11875, sum loss: 4477.799805, avg loss: 2.713818, ppl: 15.086768 +epoch: 1, batch: 11876, sum loss: 4525.405273, avg loss: 2.665139, ppl: 14.369943 +epoch: 1, batch: 11877, sum loss: 5224.078613, avg loss: 2.772866, ppl: 16.004429 +epoch: 1, batch: 11878, sum loss: 4339.612305, avg loss: 2.771144, ppl: 15.976908 +epoch: 1, batch: 11879, sum loss: 4641.443359, avg loss: 2.692253, ppl: 14.764898 +epoch: 1, batch: 11880, sum loss: 4332.262695, avg loss: 2.695870, ppl: 14.818404 +epoch: 1, batch: 11881, sum loss: 4911.969727, avg loss: 2.903055, ppl: 18.229759 +epoch: 1, batch: 11882, sum loss: 4371.729004, avg loss: 2.670574, ppl: 14.448256 +epoch: 1, batch: 11883, sum loss: 5141.816895, avg loss: 2.751106, ppl: 15.659939 +epoch: 1, batch: 11884, sum loss: 3838.213135, avg loss: 2.611029, ppl: 13.613057 +epoch: 1, batch: 11885, sum loss: 5068.447754, avg loss: 2.732317, ppl: 15.368450 +epoch: 1, batch: 11886, sum loss: 4634.604492, avg loss: 2.696105, ppl: 14.821888 +epoch: 1, batch: 11887, sum loss: 4456.245605, avg loss: 2.713913, ppl: 15.088207 +epoch: 1, batch: 11888, sum loss: 4831.615723, avg loss: 2.817269, ppl: 16.731089 +epoch: 1, batch: 11889, sum loss: 4296.286621, avg loss: 2.745231, ppl: 15.568209 +epoch: 1, batch: 11890, sum loss: 4252.663086, avg loss: 2.786804, ppl: 16.229073 +epoch: 1, batch: 11891, sum loss: 4283.962402, avg loss: 2.753189, ppl: 15.692597 +epoch: 1, batch: 11892, sum loss: 5317.291016, avg loss: 2.940979, ppl: 18.934383 +epoch: 1, batch: 11893, sum loss: 4184.512207, avg loss: 2.575084, ppl: 13.132426 +epoch: 1, batch: 11894, sum loss: 3269.481934, avg loss: 2.253261, ppl: 9.518726 +epoch: 1, batch: 11895, sum loss: 4393.520508, avg loss: 2.761484, ppl: 15.823302 +epoch: 1, batch: 11896, sum loss: 4881.094727, avg loss: 2.872922, ppl: 17.688633 +epoch: 1, batch: 11897, sum loss: 4637.667969, avg loss: 2.726436, ppl: 15.278340 +epoch: 1, batch: 11898, sum loss: 4753.585938, avg loss: 2.543385, ppl: 12.722659 +epoch: 1, batch: 11899, sum loss: 4743.932129, avg loss: 2.626762, ppl: 13.828918 +epoch: 1, batch: 11900, sum loss: 4116.322266, avg loss: 2.796415, ppl: 16.385792 +epoch: 1, batch: 11901, sum loss: 4687.779297, avg loss: 2.778767, ppl: 16.099152 +epoch: 1, batch: 11902, sum loss: 4884.321289, avg loss: 3.041296, ppl: 20.932348 +epoch: 1, batch: 11903, sum loss: 4881.895996, avg loss: 2.761254, ppl: 15.819662 +epoch: 1, batch: 11904, sum loss: 4113.755371, avg loss: 2.579157, ppl: 13.186019 +epoch: 1, batch: 11905, sum loss: 4527.156250, avg loss: 2.639741, ppl: 14.009577 +epoch: 1, batch: 11906, sum loss: 4370.258301, avg loss: 2.601344, ppl: 13.481850 +epoch: 1, batch: 11907, sum loss: 4422.331543, avg loss: 2.670490, ppl: 14.447047 +epoch: 1, batch: 11908, sum loss: 4582.137695, avg loss: 2.615376, ppl: 13.672350 +epoch: 1, batch: 11909, sum loss: 4522.613281, avg loss: 2.506992, ppl: 12.267971 +epoch: 1, batch: 11910, sum loss: 4217.838867, avg loss: 2.664459, ppl: 14.360178 +epoch: 1, batch: 11911, sum loss: 3550.862305, avg loss: 2.357810, ppl: 10.567785 +epoch: 1, batch: 11912, sum loss: 4291.392090, avg loss: 2.768640, ppl: 15.936950 +epoch: 1, batch: 11913, sum loss: 5596.367188, avg loss: 2.931570, ppl: 18.757057 +epoch: 1, batch: 11914, sum loss: 5036.227539, avg loss: 2.750534, ppl: 15.650989 +epoch: 1, batch: 11915, sum loss: 3564.850586, avg loss: 2.461914, ppl: 11.727231 +epoch: 1, batch: 11916, sum loss: 5148.448730, avg loss: 2.947023, ppl: 19.049154 +epoch: 1, batch: 11917, sum loss: 4982.984863, avg loss: 2.812068, ppl: 16.644306 +epoch: 1, batch: 11918, sum loss: 5367.342773, avg loss: 3.054834, ppl: 21.217655 +epoch: 1, batch: 11919, sum loss: 5178.402344, avg loss: 2.907581, ppl: 18.312452 +epoch: 1, batch: 11920, sum loss: 3624.447998, avg loss: 2.489319, ppl: 12.053063 +epoch: 1, batch: 11921, sum loss: 5026.288574, avg loss: 2.912102, ppl: 18.395430 +epoch: 1, batch: 11922, sum loss: 4969.876465, avg loss: 2.887784, ppl: 17.953480 +epoch: 1, batch: 11923, sum loss: 5455.405273, avg loss: 3.113816, ppl: 22.506767 +epoch: 1, batch: 11924, sum loss: 4517.280273, avg loss: 2.762862, ppl: 15.845134 +epoch: 1, batch: 11925, sum loss: 4398.180664, avg loss: 2.607102, ppl: 13.559697 +epoch: 1, batch: 11926, sum loss: 5351.074219, avg loss: 3.007911, ppl: 20.245068 +epoch: 1, batch: 11927, sum loss: 5120.691406, avg loss: 2.835377, ppl: 17.036825 +epoch: 1, batch: 11928, sum loss: 4818.641602, avg loss: 2.956222, ppl: 19.225199 +epoch: 1, batch: 11929, sum loss: 4506.958984, avg loss: 2.705257, ppl: 14.958167 +epoch: 1, batch: 11930, sum loss: 4654.562500, avg loss: 2.767278, ppl: 15.915260 +epoch: 1, batch: 11931, sum loss: 3909.968750, avg loss: 2.632976, ppl: 13.915113 +epoch: 1, batch: 11932, sum loss: 5716.161621, avg loss: 2.691225, ppl: 14.749730 +epoch: 1, batch: 11933, sum loss: 4941.336914, avg loss: 2.613081, ppl: 13.641021 +epoch: 1, batch: 11934, sum loss: 4165.309082, avg loss: 2.687296, ppl: 14.691901 +epoch: 1, batch: 11935, sum loss: 5505.193359, avg loss: 3.142234, ppl: 23.155535 +epoch: 1, batch: 11936, sum loss: 3931.696289, avg loss: 2.680093, ppl: 14.586447 +epoch: 1, batch: 11937, sum loss: 5288.453125, avg loss: 2.767375, ppl: 15.916794 +epoch: 1, batch: 11938, sum loss: 4589.967285, avg loss: 2.758394, ppl: 15.774488 +epoch: 1, batch: 11939, sum loss: 4394.997559, avg loss: 2.790474, ppl: 16.288746 +epoch: 1, batch: 11940, sum loss: 5179.173828, avg loss: 2.725881, ppl: 15.269862 +epoch: 1, batch: 11941, sum loss: 5580.992188, avg loss: 3.002147, ppl: 20.128716 +epoch: 1, batch: 11942, sum loss: 5446.343750, avg loss: 2.887775, ppl: 17.953321 +epoch: 1, batch: 11943, sum loss: 4036.910645, avg loss: 2.420210, ppl: 11.248225 +epoch: 1, batch: 11944, sum loss: 5652.477051, avg loss: 2.927228, ppl: 18.675789 +epoch: 1, batch: 11945, sum loss: 4588.888184, avg loss: 2.538102, ppl: 12.655626 +epoch: 1, batch: 11946, sum loss: 4822.246582, avg loss: 2.818379, ppl: 16.749680 +epoch: 1, batch: 11947, sum loss: 4342.567383, avg loss: 2.627082, ppl: 13.833350 +epoch: 1, batch: 11948, sum loss: 4730.114258, avg loss: 2.636630, ppl: 13.966060 +epoch: 1, batch: 11949, sum loss: 4611.212891, avg loss: 2.690323, ppl: 14.736429 +epoch: 1, batch: 11950, sum loss: 4729.364746, avg loss: 2.711792, ppl: 15.056225 +epoch: 1, batch: 11951, sum loss: 3841.549561, avg loss: 2.542389, ppl: 12.709998 +epoch: 1, batch: 11952, sum loss: 4356.633301, avg loss: 2.840048, ppl: 17.116585 +epoch: 1, batch: 11953, sum loss: 4178.557129, avg loss: 2.541701, ppl: 12.701262 +epoch: 1, batch: 11954, sum loss: 4234.526367, avg loss: 2.359068, ppl: 10.581082 +epoch: 1, batch: 11955, sum loss: 4452.167480, avg loss: 2.874220, ppl: 17.711611 +epoch: 1, batch: 11956, sum loss: 5213.333496, avg loss: 2.647706, ppl: 14.121611 +epoch: 1, batch: 11957, sum loss: 5002.952148, avg loss: 2.905315, ppl: 18.270992 +epoch: 1, batch: 11958, sum loss: 5163.557617, avg loss: 2.832451, ppl: 16.987043 +epoch: 1, batch: 11959, sum loss: 4723.324707, avg loss: 2.803160, ppl: 16.496693 +epoch: 1, batch: 11960, sum loss: 3723.508301, avg loss: 2.444851, ppl: 11.528833 +epoch: 1, batch: 11961, sum loss: 4679.978027, avg loss: 2.769218, ppl: 15.946155 +epoch: 1, batch: 11962, sum loss: 4855.502930, avg loss: 2.914467, ppl: 18.438988 +epoch: 1, batch: 11963, sum loss: 4842.378418, avg loss: 2.723498, ppl: 15.233510 +epoch: 1, batch: 11964, sum loss: 5082.052246, avg loss: 2.942705, ppl: 18.967091 +epoch: 1, batch: 11965, sum loss: 5314.140137, avg loss: 2.987150, ppl: 19.829092 +epoch: 1, batch: 11966, sum loss: 5206.873047, avg loss: 2.791889, ppl: 16.311804 +epoch: 1, batch: 11967, sum loss: 4233.242188, avg loss: 2.397079, ppl: 10.991027 +epoch: 1, batch: 11968, sum loss: 4399.997559, avg loss: 2.603549, ppl: 13.511602 +epoch: 1, batch: 11969, sum loss: 4010.929199, avg loss: 2.619810, ppl: 13.733115 +epoch: 1, batch: 11970, sum loss: 4604.831543, avg loss: 2.785742, ppl: 16.211843 +epoch: 1, batch: 11971, sum loss: 4642.445312, avg loss: 2.584881, ppl: 13.261705 +epoch: 1, batch: 11972, sum loss: 4223.913086, avg loss: 2.532322, ppl: 12.582691 +epoch: 1, batch: 11973, sum loss: 3985.117676, avg loss: 2.584382, ppl: 13.255099 +epoch: 1, batch: 11974, sum loss: 4820.954102, avg loss: 2.794756, ppl: 16.358639 +epoch: 1, batch: 11975, sum loss: 5933.501465, avg loss: 2.851274, ppl: 17.309824 +epoch: 1, batch: 11976, sum loss: 3716.490723, avg loss: 2.620939, ppl: 13.748621 +epoch: 1, batch: 11977, sum loss: 5420.527344, avg loss: 2.903335, ppl: 18.234863 +epoch: 1, batch: 11978, sum loss: 4210.419922, avg loss: 2.539457, ppl: 12.672789 +epoch: 1, batch: 11979, sum loss: 4746.147949, avg loss: 2.600629, ppl: 13.472211 +epoch: 1, batch: 11980, sum loss: 4690.505859, avg loss: 2.570140, ppl: 13.067656 +epoch: 1, batch: 11981, sum loss: 4997.375977, avg loss: 2.824972, ppl: 16.860479 +epoch: 1, batch: 11982, sum loss: 4460.915039, avg loss: 2.625612, ppl: 13.813028 +epoch: 1, batch: 11983, sum loss: 4727.648438, avg loss: 2.774442, ppl: 16.029675 +epoch: 1, batch: 11984, sum loss: 4729.128906, avg loss: 2.830119, ppl: 16.947479 +epoch: 1, batch: 11985, sum loss: 4492.054688, avg loss: 2.709321, ppl: 15.019078 +epoch: 1, batch: 11986, sum loss: 4881.583008, avg loss: 3.020782, ppl: 20.507313 +epoch: 1, batch: 11987, sum loss: 4194.666016, avg loss: 2.616760, ppl: 13.691289 +epoch: 1, batch: 11988, sum loss: 4696.287109, avg loss: 2.847961, ppl: 17.252563 +epoch: 1, batch: 11989, sum loss: 4948.827637, avg loss: 2.805458, ppl: 16.534645 +epoch: 1, batch: 11990, sum loss: 4566.525879, avg loss: 2.888378, ppl: 17.964151 +epoch: 1, batch: 11991, sum loss: 3483.753662, avg loss: 2.436192, ppl: 11.429429 +epoch: 1, batch: 11992, sum loss: 4681.531250, avg loss: 2.715506, ppl: 15.112248 +epoch: 1, batch: 11993, sum loss: 4923.971680, avg loss: 2.777198, ppl: 16.073915 +epoch: 1, batch: 11994, sum loss: 5273.111816, avg loss: 2.782645, ppl: 16.161709 +epoch: 1, batch: 11995, sum loss: 5272.274414, avg loss: 2.696815, ppl: 14.832422 +epoch: 1, batch: 11996, sum loss: 4292.797363, avg loss: 2.540117, ppl: 12.681152 +epoch: 1, batch: 11997, sum loss: 3806.999023, avg loss: 2.598634, ppl: 13.445359 +epoch: 1, batch: 11998, sum loss: 5692.648438, avg loss: 2.778257, ppl: 16.090948 +epoch: 1, batch: 11999, sum loss: 4559.679688, avg loss: 2.797349, ppl: 16.401117 +epoch: 1, batch: 12000, sum loss: 4566.234863, avg loss: 2.705116, ppl: 14.956045 +epoch: 1, batch: 12001, sum loss: 4777.379395, avg loss: 2.864136, ppl: 17.533905 +epoch: 1, batch: 12002, sum loss: 4453.073242, avg loss: 2.610242, ppl: 13.602344 +epoch: 1, batch: 12003, sum loss: 4118.377930, avg loss: 2.858000, ppl: 17.426640 +epoch: 1, batch: 12004, sum loss: 4758.832031, avg loss: 2.842791, ppl: 17.163603 +epoch: 1, batch: 12005, sum loss: 5451.802734, avg loss: 2.962936, ppl: 19.354717 +epoch: 1, batch: 12006, sum loss: 5372.081543, avg loss: 2.958194, ppl: 19.263161 +epoch: 1, batch: 12007, sum loss: 5047.472168, avg loss: 2.846854, ppl: 17.233484 +epoch: 1, batch: 12008, sum loss: 4397.398438, avg loss: 2.724535, ppl: 15.249314 +epoch: 1, batch: 12009, sum loss: 5572.665527, avg loss: 3.061904, ppl: 21.368208 +epoch: 1, batch: 12010, sum loss: 4194.446777, avg loss: 2.651357, ppl: 14.173258 +epoch: 1, batch: 12011, sum loss: 4248.394531, avg loss: 2.972984, ppl: 19.550177 +epoch: 1, batch: 12012, sum loss: 4346.464355, avg loss: 2.757909, ppl: 15.766837 +epoch: 1, batch: 12013, sum loss: 4833.149414, avg loss: 2.864938, ppl: 17.547956 +epoch: 1, batch: 12014, sum loss: 4949.869141, avg loss: 2.939352, ppl: 18.903599 +epoch: 1, batch: 12015, sum loss: 4405.822266, avg loss: 2.532082, ppl: 12.579668 +epoch: 1, batch: 12016, sum loss: 4763.460938, avg loss: 2.664128, ppl: 14.355427 +epoch: 1, batch: 12017, sum loss: 5011.292480, avg loss: 2.725010, ppl: 15.256561 +epoch: 1, batch: 12018, sum loss: 4504.100586, avg loss: 2.896528, ppl: 18.111151 +epoch: 1, batch: 12019, sum loss: 4718.481934, avg loss: 2.569979, ppl: 13.065553 +epoch: 1, batch: 12020, sum loss: 5451.276855, avg loss: 2.767146, ppl: 15.913147 +epoch: 1, batch: 12021, sum loss: 4839.226562, avg loss: 2.567229, ppl: 13.029670 +epoch: 1, batch: 12022, sum loss: 5559.199707, avg loss: 3.036155, ppl: 20.825022 +epoch: 1, batch: 12023, sum loss: 3989.838379, avg loss: 2.447754, ppl: 11.562345 +epoch: 1, batch: 12024, sum loss: 5003.640137, avg loss: 2.709064, ppl: 15.015207 +epoch: 1, batch: 12025, sum loss: 4250.231445, avg loss: 2.543526, ppl: 12.724458 +epoch: 1, batch: 12026, sum loss: 4255.650391, avg loss: 2.696863, ppl: 14.833134 +epoch: 1, batch: 12027, sum loss: 5685.954102, avg loss: 2.969167, ppl: 19.475679 +epoch: 1, batch: 12028, sum loss: 3371.765137, avg loss: 2.346392, ppl: 10.447805 +epoch: 1, batch: 12029, sum loss: 5254.733398, avg loss: 2.707230, ppl: 14.987700 +epoch: 1, batch: 12030, sum loss: 4800.975586, avg loss: 2.804308, ppl: 16.515646 +epoch: 1, batch: 12031, sum loss: 4772.230469, avg loss: 2.847393, ppl: 17.242771 +epoch: 1, batch: 12032, sum loss: 4929.892578, avg loss: 2.828395, ppl: 16.918287 +epoch: 1, batch: 12033, sum loss: 4669.666504, avg loss: 2.852576, ppl: 17.332369 +epoch: 1, batch: 12034, sum loss: 4613.092773, avg loss: 2.739366, ppl: 15.477174 +epoch: 1, batch: 12035, sum loss: 4628.317383, avg loss: 2.695584, ppl: 14.814165 +epoch: 1, batch: 12036, sum loss: 4194.746094, avg loss: 2.776139, ppl: 16.056906 +epoch: 1, batch: 12037, sum loss: 4319.865234, avg loss: 2.582107, ppl: 13.224977 +epoch: 1, batch: 12038, sum loss: 4169.292969, avg loss: 2.652222, ppl: 14.185523 +epoch: 1, batch: 12039, sum loss: 3807.662109, avg loss: 2.662701, ppl: 14.334954 +epoch: 1, batch: 12040, sum loss: 4351.166016, avg loss: 2.860727, ppl: 17.474232 +epoch: 1, batch: 12041, sum loss: 3788.016357, avg loss: 2.523662, ppl: 12.474192 +epoch: 1, batch: 12042, sum loss: 4633.479980, avg loss: 2.735230, ppl: 15.413291 +epoch: 1, batch: 12043, sum loss: 6117.391113, avg loss: 3.223073, ppl: 25.105139 +epoch: 1, batch: 12044, sum loss: 5002.139160, avg loss: 2.754482, ppl: 15.712896 +epoch: 1, batch: 12045, sum loss: 5318.200195, avg loss: 2.825824, ppl: 16.874836 +epoch: 1, batch: 12046, sum loss: 4846.051758, avg loss: 2.622322, ppl: 13.767659 +epoch: 1, batch: 12047, sum loss: 4548.905762, avg loss: 2.760258, ppl: 15.803926 +epoch: 1, batch: 12048, sum loss: 4543.415527, avg loss: 2.785662, ppl: 16.210552 +epoch: 1, batch: 12049, sum loss: 4469.552246, avg loss: 2.738696, ppl: 15.466808 +epoch: 1, batch: 12050, sum loss: 3746.320312, avg loss: 2.560711, ppl: 12.945020 +epoch: 1, batch: 12051, sum loss: 3866.639893, avg loss: 2.439520, ppl: 11.467539 +epoch: 1, batch: 12052, sum loss: 4990.555176, avg loss: 2.746591, ppl: 15.589391 +epoch: 1, batch: 12053, sum loss: 4032.988525, avg loss: 2.617124, ppl: 13.696280 +epoch: 1, batch: 12054, sum loss: 4235.665039, avg loss: 2.593794, ppl: 13.380436 +epoch: 1, batch: 12055, sum loss: 5571.006348, avg loss: 2.830796, ppl: 16.958960 +epoch: 1, batch: 12056, sum loss: 5432.078613, avg loss: 2.794279, ppl: 16.350842 +epoch: 1, batch: 12057, sum loss: 4643.171875, avg loss: 2.618822, ppl: 13.719557 +epoch: 1, batch: 12058, sum loss: 5237.916504, avg loss: 2.922945, ppl: 18.595964 +epoch: 1, batch: 12059, sum loss: 6337.352051, avg loss: 3.020663, ppl: 20.504892 +epoch: 1, batch: 12060, sum loss: 4477.922363, avg loss: 2.715538, ppl: 15.112739 +epoch: 1, batch: 12061, sum loss: 4122.726074, avg loss: 2.554353, ppl: 12.862978 +epoch: 1, batch: 12062, sum loss: 3800.983398, avg loss: 2.505592, ppl: 12.250811 +epoch: 1, batch: 12063, sum loss: 3343.353271, avg loss: 2.338009, ppl: 10.360592 +epoch: 1, batch: 12064, sum loss: 4811.608887, avg loss: 2.691056, ppl: 14.747248 +epoch: 1, batch: 12065, sum loss: 4101.792969, avg loss: 2.576503, ppl: 13.151069 +epoch: 1, batch: 12066, sum loss: 4896.245117, avg loss: 2.774076, ppl: 16.023821 +epoch: 1, batch: 12067, sum loss: 4547.135254, avg loss: 2.548843, ppl: 12.792291 +epoch: 1, batch: 12068, sum loss: 4920.008789, avg loss: 2.906089, ppl: 18.285147 +epoch: 1, batch: 12069, sum loss: 4436.662109, avg loss: 2.708585, ppl: 15.008021 +epoch: 1, batch: 12070, sum loss: 4640.508301, avg loss: 2.736149, ppl: 15.427457 +epoch: 1, batch: 12071, sum loss: 6172.863770, avg loss: 3.195064, ppl: 24.411737 +epoch: 1, batch: 12072, sum loss: 3750.672119, avg loss: 2.467548, ppl: 11.793489 +epoch: 1, batch: 12073, sum loss: 4203.229980, avg loss: 2.706523, ppl: 14.977109 +epoch: 1, batch: 12074, sum loss: 4968.367188, avg loss: 2.641344, ppl: 14.032044 +epoch: 1, batch: 12075, sum loss: 4751.249023, avg loss: 2.747975, ppl: 15.610990 +epoch: 1, batch: 12076, sum loss: 4473.226074, avg loss: 2.522970, ppl: 12.465567 +epoch: 1, batch: 12077, sum loss: 4693.384277, avg loss: 2.829044, ppl: 16.929270 +epoch: 1, batch: 12078, sum loss: 4722.007324, avg loss: 2.597364, ppl: 13.428290 +epoch: 1, batch: 12079, sum loss: 5225.735840, avg loss: 2.769335, ppl: 15.948030 +epoch: 1, batch: 12080, sum loss: 3846.364014, avg loss: 2.598895, ppl: 13.448863 +epoch: 1, batch: 12081, sum loss: 3437.265869, avg loss: 2.373802, ppl: 10.738146 +epoch: 1, batch: 12082, sum loss: 5559.250977, avg loss: 2.938293, ppl: 18.883589 +epoch: 1, batch: 12083, sum loss: 4478.342285, avg loss: 2.603687, ppl: 13.513474 +epoch: 1, batch: 12084, sum loss: 3563.716797, avg loss: 2.635885, ppl: 13.955661 +epoch: 1, batch: 12085, sum loss: 5191.854004, avg loss: 2.881162, ppl: 17.834988 +epoch: 1, batch: 12086, sum loss: 4288.812012, avg loss: 2.766976, ppl: 15.910442 +epoch: 1, batch: 12087, sum loss: 4573.774414, avg loss: 2.543812, ppl: 12.728099 +epoch: 1, batch: 12088, sum loss: 4984.716797, avg loss: 2.871381, ppl: 17.661385 +epoch: 1, batch: 12089, sum loss: 5737.511719, avg loss: 2.972804, ppl: 19.546654 +epoch: 1, batch: 12090, sum loss: 4612.160156, avg loss: 2.740440, ppl: 15.493795 +epoch: 1, batch: 12091, sum loss: 5631.643066, avg loss: 3.049076, ppl: 21.095844 +epoch: 1, batch: 12092, sum loss: 4633.766602, avg loss: 2.598860, ppl: 13.448396 +epoch: 1, batch: 12093, sum loss: 4566.929688, avg loss: 2.650569, ppl: 14.162090 +epoch: 1, batch: 12094, sum loss: 4367.666016, avg loss: 2.748688, ppl: 15.622129 +epoch: 1, batch: 12095, sum loss: 4546.964355, avg loss: 2.781018, ppl: 16.135435 +epoch: 1, batch: 12096, sum loss: 4795.929199, avg loss: 2.868379, ppl: 17.608450 +epoch: 1, batch: 12097, sum loss: 5401.905762, avg loss: 2.864213, ppl: 17.535252 +epoch: 1, batch: 12098, sum loss: 5068.582031, avg loss: 2.777305, ppl: 16.075642 +epoch: 1, batch: 12099, sum loss: 4795.301758, avg loss: 2.943709, ppl: 18.986134 +epoch: 1, batch: 12100, sum loss: 3804.723877, avg loss: 2.539869, ppl: 12.678011 +epoch: 1, batch: 12101, sum loss: 4418.184570, avg loss: 2.648792, ppl: 14.136948 +epoch: 1, batch: 12102, sum loss: 5003.505371, avg loss: 2.708991, ppl: 15.014112 +epoch: 1, batch: 12103, sum loss: 4857.890137, avg loss: 2.860948, ppl: 17.478094 +epoch: 1, batch: 12104, sum loss: 5224.120605, avg loss: 3.082077, ppl: 21.803642 +epoch: 1, batch: 12105, sum loss: 4208.890137, avg loss: 2.481657, ppl: 11.961068 +epoch: 1, batch: 12106, sum loss: 5035.939453, avg loss: 2.799299, ppl: 16.433126 +epoch: 1, batch: 12107, sum loss: 4056.014160, avg loss: 2.560615, ppl: 12.943776 +epoch: 1, batch: 12108, sum loss: 5999.920410, avg loss: 3.019587, ppl: 20.482836 +epoch: 1, batch: 12109, sum loss: 4334.551270, avg loss: 2.805535, ppl: 16.535917 +epoch: 1, batch: 12110, sum loss: 3733.460205, avg loss: 2.745191, ppl: 15.567593 +epoch: 1, batch: 12111, sum loss: 5554.357910, avg loss: 2.880891, ppl: 17.830154 +epoch: 1, batch: 12112, sum loss: 4082.720703, avg loss: 2.546925, ppl: 12.767780 +epoch: 1, batch: 12113, sum loss: 5081.723633, avg loss: 2.716047, ppl: 15.120434 +epoch: 1, batch: 12114, sum loss: 5063.369629, avg loss: 2.736957, ppl: 15.439923 +epoch: 1, batch: 12115, sum loss: 4838.568359, avg loss: 2.800097, ppl: 16.446249 +epoch: 1, batch: 12116, sum loss: 4602.269043, avg loss: 2.752553, ppl: 15.682626 +epoch: 1, batch: 12117, sum loss: 5019.158203, avg loss: 2.892887, ppl: 18.045324 +epoch: 1, batch: 12118, sum loss: 3681.702393, avg loss: 2.507972, ppl: 12.279995 +epoch: 1, batch: 12119, sum loss: 5053.234863, avg loss: 2.693622, ppl: 14.785132 +epoch: 1, batch: 12120, sum loss: 4498.293945, avg loss: 2.847022, ppl: 17.236368 +epoch: 1, batch: 12121, sum loss: 5607.506348, avg loss: 3.089535, ppl: 21.966866 +epoch: 1, batch: 12122, sum loss: 4428.903809, avg loss: 3.021080, ppl: 20.513445 +epoch: 1, batch: 12123, sum loss: 3850.894775, avg loss: 2.328232, ppl: 10.259782 +epoch: 1, batch: 12124, sum loss: 5203.920410, avg loss: 3.117987, ppl: 22.600842 +epoch: 1, batch: 12125, sum loss: 4119.973633, avg loss: 2.544765, ppl: 12.740228 +epoch: 1, batch: 12126, sum loss: 3543.771240, avg loss: 2.313167, ppl: 10.106377 +epoch: 1, batch: 12127, sum loss: 4512.376465, avg loss: 2.468477, ppl: 11.804458 +epoch: 1, batch: 12128, sum loss: 3942.631348, avg loss: 2.420277, ppl: 11.248976 +epoch: 1, batch: 12129, sum loss: 5339.751953, avg loss: 2.937157, ppl: 18.862152 +epoch: 1, batch: 12130, sum loss: 4150.103027, avg loss: 2.650130, ppl: 14.155872 +epoch: 1, batch: 12131, sum loss: 5380.412109, avg loss: 2.845273, ppl: 17.206264 +epoch: 1, batch: 12132, sum loss: 4683.259766, avg loss: 2.503078, ppl: 12.220055 +epoch: 1, batch: 12133, sum loss: 3421.270264, avg loss: 2.351388, ppl: 10.500138 +epoch: 1, batch: 12134, sum loss: 5068.842773, avg loss: 2.803563, ppl: 16.503349 +epoch: 1, batch: 12135, sum loss: 5333.188965, avg loss: 2.818810, ppl: 16.756903 +epoch: 1, batch: 12136, sum loss: 4357.116211, avg loss: 2.485520, ppl: 12.007361 +epoch: 1, batch: 12137, sum loss: 5622.071777, avg loss: 3.008064, ppl: 20.248161 +epoch: 1, batch: 12138, sum loss: 4738.354004, avg loss: 2.950407, ppl: 19.113731 +epoch: 1, batch: 12139, sum loss: 4992.022949, avg loss: 2.686772, ppl: 14.684204 +epoch: 1, batch: 12140, sum loss: 5331.574219, avg loss: 2.832930, ppl: 16.995182 +epoch: 1, batch: 12141, sum loss: 4412.988281, avg loss: 2.732501, ppl: 15.371276 +epoch: 1, batch: 12142, sum loss: 4993.714355, avg loss: 2.988459, ppl: 19.855061 +epoch: 1, batch: 12143, sum loss: 4949.599609, avg loss: 2.722552, ppl: 15.219112 +epoch: 1, batch: 12144, sum loss: 5014.436523, avg loss: 2.626735, ppl: 13.828542 +epoch: 1, batch: 12145, sum loss: 6296.859375, avg loss: 2.935599, ppl: 18.832777 +epoch: 1, batch: 12146, sum loss: 3901.392578, avg loss: 2.650403, ppl: 14.159738 +epoch: 1, batch: 12147, sum loss: 3514.190186, avg loss: 2.415251, ppl: 11.192579 +epoch: 1, batch: 12148, sum loss: 5009.218262, avg loss: 2.800010, ppl: 16.444815 +epoch: 1, batch: 12149, sum loss: 3615.801758, avg loss: 2.463080, ppl: 11.740920 +epoch: 1, batch: 12150, sum loss: 4394.176270, avg loss: 2.739511, ppl: 15.479422 +epoch: 1, batch: 12151, sum loss: 3836.227051, avg loss: 2.538866, ppl: 12.665304 +epoch: 1, batch: 12152, sum loss: 4619.056152, avg loss: 2.702783, ppl: 14.921202 +epoch: 1, batch: 12153, sum loss: 4989.466309, avg loss: 2.589241, ppl: 13.319653 +epoch: 1, batch: 12154, sum loss: 4435.132812, avg loss: 2.758167, ppl: 15.770908 +epoch: 1, batch: 12155, sum loss: 5190.059570, avg loss: 2.933895, ppl: 18.800711 +epoch: 1, batch: 12156, sum loss: 4196.581543, avg loss: 2.555774, ppl: 12.881271 +epoch: 1, batch: 12157, sum loss: 5048.673828, avg loss: 2.812632, ppl: 16.653690 +epoch: 1, batch: 12158, sum loss: 3464.931641, avg loss: 2.647007, ppl: 14.111735 +epoch: 1, batch: 12159, sum loss: 4832.861328, avg loss: 2.834523, ppl: 17.022278 +epoch: 1, batch: 12160, sum loss: 4153.604004, avg loss: 2.817913, ppl: 16.741879 +epoch: 1, batch: 12161, sum loss: 4393.277344, avg loss: 2.818010, ppl: 16.743496 +epoch: 1, batch: 12162, sum loss: 4752.478516, avg loss: 2.755060, ppl: 15.721988 +epoch: 1, batch: 12163, sum loss: 4463.894043, avg loss: 2.661833, ppl: 14.322519 +epoch: 1, batch: 12164, sum loss: 5486.568359, avg loss: 2.875560, ppl: 17.735350 +epoch: 1, batch: 12165, sum loss: 6838.268555, avg loss: 3.300323, ppl: 27.121386 +epoch: 1, batch: 12166, sum loss: 4286.172363, avg loss: 2.688941, ppl: 14.716087 +epoch: 1, batch: 12167, sum loss: 4642.740723, avg loss: 2.908985, ppl: 18.338182 +epoch: 1, batch: 12168, sum loss: 4817.940430, avg loss: 2.776911, ppl: 16.069307 +epoch: 1, batch: 12169, sum loss: 5477.791504, avg loss: 2.954580, ppl: 19.193661 +epoch: 1, batch: 12170, sum loss: 4622.347168, avg loss: 2.792959, ppl: 16.329269 +epoch: 1, batch: 12171, sum loss: 4937.144043, avg loss: 2.917934, ppl: 18.503019 +epoch: 1, batch: 12172, sum loss: 4021.651611, avg loss: 2.745155, ppl: 15.567020 +epoch: 1, batch: 12173, sum loss: 4961.315918, avg loss: 2.690518, ppl: 14.739314 +epoch: 1, batch: 12174, sum loss: 5018.185059, avg loss: 2.751198, ppl: 15.661384 +epoch: 1, batch: 12175, sum loss: 5335.817383, avg loss: 2.914155, ppl: 18.433229 +epoch: 1, batch: 12176, sum loss: 4643.440918, avg loss: 2.660998, ppl: 14.310558 +epoch: 1, batch: 12177, sum loss: 4634.336426, avg loss: 2.642153, ppl: 14.043406 +epoch: 1, batch: 12178, sum loss: 4591.771973, avg loss: 2.796451, ppl: 16.386385 +epoch: 1, batch: 12179, sum loss: 4017.091797, avg loss: 2.469018, ppl: 11.810840 +epoch: 1, batch: 12180, sum loss: 4224.399902, avg loss: 2.860122, ppl: 17.463657 +epoch: 1, batch: 12181, sum loss: 3970.782715, avg loss: 2.474008, ppl: 11.869925 +epoch: 1, batch: 12182, sum loss: 3464.759277, avg loss: 2.460767, ppl: 11.713787 +epoch: 1, batch: 12183, sum loss: 3639.782715, avg loss: 2.498135, ppl: 12.159793 +epoch: 1, batch: 12184, sum loss: 4613.257812, avg loss: 2.964819, ppl: 19.391188 +epoch: 1, batch: 12185, sum loss: 4220.755859, avg loss: 2.767709, ppl: 15.922112 +epoch: 1, batch: 12186, sum loss: 4548.546387, avg loss: 2.680346, ppl: 14.590134 +epoch: 1, batch: 12187, sum loss: 5126.821777, avg loss: 2.711170, ppl: 15.046865 +epoch: 1, batch: 12188, sum loss: 4434.185059, avg loss: 2.663174, ppl: 14.341743 +epoch: 1, batch: 12189, sum loss: 4827.409180, avg loss: 2.745966, ppl: 15.579663 +epoch: 1, batch: 12190, sum loss: 5142.482910, avg loss: 3.037498, ppl: 20.852993 +epoch: 1, batch: 12191, sum loss: 3653.097412, avg loss: 2.414473, ppl: 11.183875 +epoch: 1, batch: 12192, sum loss: 4584.046387, avg loss: 2.735111, ppl: 15.411458 +epoch: 1, batch: 12193, sum loss: 4205.940430, avg loss: 2.672135, ppl: 14.470830 +epoch: 1, batch: 12194, sum loss: 4960.649902, avg loss: 2.774413, ppl: 16.029209 +epoch: 1, batch: 12195, sum loss: 5585.310547, avg loss: 3.123775, ppl: 22.732042 +epoch: 1, batch: 12196, sum loss: 4560.267578, avg loss: 2.496041, ppl: 12.134362 +epoch: 1, batch: 12197, sum loss: 5067.538086, avg loss: 2.863016, ppl: 17.514269 +epoch: 1, batch: 12198, sum loss: 3830.434082, avg loss: 2.697489, ppl: 14.842416 +epoch: 1, batch: 12199, sum loss: 3951.930664, avg loss: 2.509162, ppl: 12.294625 +epoch: 1, batch: 12200, sum loss: 4860.486328, avg loss: 2.756941, ppl: 15.751579 +epoch: 1, batch: 12201, sum loss: 4206.122559, avg loss: 2.604410, ppl: 13.523247 +epoch: 1, batch: 12202, sum loss: 3810.237793, avg loss: 2.434657, ppl: 11.411900 +epoch: 1, batch: 12203, sum loss: 4073.145264, avg loss: 2.545716, ppl: 12.752353 +epoch: 1, batch: 12204, sum loss: 5065.801758, avg loss: 2.918089, ppl: 18.505882 +epoch: 1, batch: 12205, sum loss: 4757.718262, avg loss: 2.876493, ppl: 17.751900 +epoch: 1, batch: 12206, sum loss: 3932.139160, avg loss: 2.728757, ppl: 15.313846 +epoch: 1, batch: 12207, sum loss: 4176.924316, avg loss: 2.641951, ppl: 14.040568 +epoch: 1, batch: 12208, sum loss: 5076.840820, avg loss: 3.062027, ppl: 21.370831 +epoch: 1, batch: 12209, sum loss: 3713.913574, avg loss: 2.506015, ppl: 12.255987 +epoch: 1, batch: 12210, sum loss: 4494.792969, avg loss: 2.724117, ppl: 15.242949 +epoch: 1, batch: 12211, sum loss: 4543.085938, avg loss: 2.655223, ppl: 14.228157 +epoch: 1, batch: 12212, sum loss: 4771.791504, avg loss: 2.848831, ppl: 17.267584 +epoch: 1, batch: 12213, sum loss: 3838.637207, avg loss: 2.613095, ppl: 13.641209 +epoch: 1, batch: 12214, sum loss: 4047.156738, avg loss: 2.526315, ppl: 12.507331 +epoch: 1, batch: 12215, sum loss: 4497.219727, avg loss: 2.757339, ppl: 15.757855 +epoch: 1, batch: 12216, sum loss: 4997.743164, avg loss: 2.865678, ppl: 17.560961 +epoch: 1, batch: 12217, sum loss: 4786.270996, avg loss: 2.899014, ppl: 18.156225 +epoch: 1, batch: 12218, sum loss: 3746.427246, avg loss: 2.583743, ppl: 13.246629 +epoch: 1, batch: 12219, sum loss: 5128.447754, avg loss: 2.787200, ppl: 16.235493 +epoch: 1, batch: 12220, sum loss: 5592.673828, avg loss: 3.018173, ppl: 20.453878 +epoch: 1, batch: 12221, sum loss: 4892.657715, avg loss: 2.871278, ppl: 17.659575 +epoch: 1, batch: 12222, sum loss: 4898.538574, avg loss: 2.704881, ppl: 14.952533 +epoch: 1, batch: 12223, sum loss: 4309.115723, avg loss: 2.699947, ppl: 14.878945 +epoch: 1, batch: 12224, sum loss: 4524.291016, avg loss: 2.575009, ppl: 13.131436 +epoch: 1, batch: 12225, sum loss: 3926.821289, avg loss: 2.558190, ppl: 12.912420 +epoch: 1, batch: 12226, sum loss: 4380.271484, avg loss: 2.747975, ppl: 15.610982 +epoch: 1, batch: 12227, sum loss: 4900.718750, avg loss: 2.870954, ppl: 17.653856 +epoch: 1, batch: 12228, sum loss: 3570.546875, avg loss: 2.598651, ppl: 13.445593 +epoch: 1, batch: 12229, sum loss: 4410.948730, avg loss: 2.568986, ppl: 13.052582 +epoch: 1, batch: 12230, sum loss: 4377.332031, avg loss: 2.772218, ppl: 15.994065 +epoch: 1, batch: 12231, sum loss: 5358.293945, avg loss: 2.879255, ppl: 17.801012 +epoch: 1, batch: 12232, sum loss: 4143.335938, avg loss: 2.548177, ppl: 12.783778 +epoch: 1, batch: 12233, sum loss: 4492.109375, avg loss: 2.920747, ppl: 18.555149 +epoch: 1, batch: 12234, sum loss: 5108.338867, avg loss: 2.930774, ppl: 18.742126 +epoch: 1, batch: 12235, sum loss: 4858.676270, avg loss: 2.691787, ppl: 14.758032 +epoch: 1, batch: 12236, sum loss: 5963.950195, avg loss: 3.196115, ppl: 24.437395 +epoch: 1, batch: 12237, sum loss: 4485.002441, avg loss: 2.708335, ppl: 15.004268 +epoch: 1, batch: 12238, sum loss: 4647.395020, avg loss: 2.763017, ppl: 15.847586 +epoch: 1, batch: 12239, sum loss: 4074.226562, avg loss: 2.605004, ppl: 13.531281 +epoch: 1, batch: 12240, sum loss: 4323.938477, avg loss: 2.747102, ppl: 15.597362 +epoch: 1, batch: 12241, sum loss: 4579.855957, avg loss: 2.664256, ppl: 14.357265 +epoch: 1, batch: 12242, sum loss: 4102.946777, avg loss: 2.580470, ppl: 13.203337 +epoch: 1, batch: 12243, sum loss: 3398.546143, avg loss: 2.383272, ppl: 10.840316 +epoch: 1, batch: 12244, sum loss: 5039.534668, avg loss: 2.694938, ppl: 14.804607 +epoch: 1, batch: 12245, sum loss: 3991.430420, avg loss: 2.846955, ppl: 17.235218 +epoch: 1, batch: 12246, sum loss: 5507.254395, avg loss: 2.828585, ppl: 16.921499 +epoch: 1, batch: 12247, sum loss: 5833.249023, avg loss: 3.132787, ppl: 22.937819 +epoch: 1, batch: 12248, sum loss: 4489.842773, avg loss: 2.823800, ppl: 16.840729 +epoch: 1, batch: 12249, sum loss: 4237.906250, avg loss: 2.647037, ppl: 14.112163 +epoch: 1, batch: 12250, sum loss: 3945.322998, avg loss: 2.522585, ppl: 12.460765 +epoch: 1, batch: 12251, sum loss: 4286.213867, avg loss: 2.591423, ppl: 13.348757 +epoch: 1, batch: 12252, sum loss: 4823.625977, avg loss: 2.751641, ppl: 15.668320 +epoch: 1, batch: 12253, sum loss: 4241.844727, avg loss: 2.816630, ppl: 16.720409 +epoch: 1, batch: 12254, sum loss: 5045.341309, avg loss: 2.848866, ppl: 17.268185 +epoch: 1, batch: 12255, sum loss: 6016.195312, avg loss: 2.960726, ppl: 19.311987 +epoch: 1, batch: 12256, sum loss: 5567.264648, avg loss: 2.833214, ppl: 17.000004 +epoch: 1, batch: 12257, sum loss: 4501.911133, avg loss: 2.687708, ppl: 14.697955 +epoch: 1, batch: 12258, sum loss: 4978.157715, avg loss: 2.846288, ppl: 17.223728 +epoch: 1, batch: 12259, sum loss: 5058.329102, avg loss: 2.841758, ppl: 17.145882 +epoch: 1, batch: 12260, sum loss: 4398.506836, avg loss: 2.627543, ppl: 13.839724 +epoch: 1, batch: 12261, sum loss: 5541.492676, avg loss: 3.104478, ppl: 22.297573 +epoch: 1, batch: 12262, sum loss: 4226.527344, avg loss: 2.995413, ppl: 19.993607 +epoch: 1, batch: 12263, sum loss: 5492.520020, avg loss: 2.980206, ppl: 19.691877 +epoch: 1, batch: 12264, sum loss: 4852.941895, avg loss: 2.892099, ppl: 18.031115 +epoch: 1, batch: 12265, sum loss: 3728.378418, avg loss: 2.640495, ppl: 14.020136 +epoch: 1, batch: 12266, sum loss: 4266.799316, avg loss: 2.596956, ppl: 13.422820 +epoch: 1, batch: 12267, sum loss: 4724.891602, avg loss: 2.814111, ppl: 16.678333 +epoch: 1, batch: 12268, sum loss: 4765.185547, avg loss: 2.863693, ppl: 17.526136 +epoch: 1, batch: 12269, sum loss: 4933.436523, avg loss: 2.841841, ppl: 17.147308 +epoch: 1, batch: 12270, sum loss: 5007.944336, avg loss: 2.879784, ppl: 17.810427 +epoch: 1, batch: 12271, sum loss: 3946.542969, avg loss: 2.531458, ppl: 12.571824 +epoch: 1, batch: 12272, sum loss: 4363.251465, avg loss: 2.770318, ppl: 15.963714 +epoch: 1, batch: 12273, sum loss: 4205.445801, avg loss: 2.486958, ppl: 12.024642 +epoch: 1, batch: 12274, sum loss: 4777.945312, avg loss: 2.639749, ppl: 14.009687 +epoch: 1, batch: 12275, sum loss: 4227.669922, avg loss: 2.667300, ppl: 14.401027 +epoch: 1, batch: 12276, sum loss: 4814.081543, avg loss: 2.729071, ppl: 15.318651 +epoch: 1, batch: 12277, sum loss: 4679.788086, avg loss: 2.783931, ppl: 16.182510 +epoch: 1, batch: 12278, sum loss: 4663.316895, avg loss: 2.819418, ppl: 16.767084 +epoch: 1, batch: 12279, sum loss: 5132.405273, avg loss: 2.766795, ppl: 15.907575 +epoch: 1, batch: 12280, sum loss: 4467.852539, avg loss: 2.840338, ppl: 17.121559 +epoch: 1, batch: 12281, sum loss: 4599.418945, avg loss: 2.600011, ppl: 13.463884 +epoch: 1, batch: 12282, sum loss: 4337.674805, avg loss: 2.523371, ppl: 12.470567 +epoch: 1, batch: 12283, sum loss: 3409.980713, avg loss: 2.384602, ppl: 10.854742 +epoch: 1, batch: 12284, sum loss: 4895.690430, avg loss: 2.751934, ppl: 15.672911 +epoch: 1, batch: 12285, sum loss: 4506.903809, avg loss: 2.631001, ppl: 13.887658 +epoch: 1, batch: 12286, sum loss: 5064.383789, avg loss: 2.848360, ppl: 17.259455 +epoch: 1, batch: 12287, sum loss: 4178.442871, avg loss: 2.554060, ppl: 12.859212 +epoch: 1, batch: 12288, sum loss: 5596.667969, avg loss: 2.933264, ppl: 18.788858 +epoch: 1, batch: 12289, sum loss: 4803.618164, avg loss: 2.879867, ppl: 17.811905 +epoch: 1, batch: 12290, sum loss: 4387.521484, avg loss: 2.810712, ppl: 16.621750 +epoch: 1, batch: 12291, sum loss: 4239.659668, avg loss: 2.688434, ppl: 14.708619 +epoch: 1, batch: 12292, sum loss: 4234.597656, avg loss: 2.849662, ppl: 17.281937 +epoch: 1, batch: 12293, sum loss: 4377.181641, avg loss: 2.635269, ppl: 13.947062 +epoch: 1, batch: 12294, sum loss: 3938.047363, avg loss: 2.331585, ppl: 10.294247 +epoch: 1, batch: 12295, sum loss: 4209.490723, avg loss: 2.798863, ppl: 16.425966 +epoch: 1, batch: 12296, sum loss: 4728.142578, avg loss: 2.642897, ppl: 14.053857 +epoch: 1, batch: 12297, sum loss: 5155.501953, avg loss: 2.667099, ppl: 14.398140 +epoch: 1, batch: 12298, sum loss: 4263.340332, avg loss: 2.732911, ppl: 15.377581 +epoch: 1, batch: 12299, sum loss: 3757.517578, avg loss: 2.438363, ppl: 11.454275 +epoch: 1, batch: 12300, sum loss: 4625.957520, avg loss: 2.763416, ppl: 15.853901 +epoch: 1, batch: 12301, sum loss: 5015.613770, avg loss: 3.063906, ppl: 21.411024 +epoch: 1, batch: 12302, sum loss: 4167.409180, avg loss: 2.569303, ppl: 13.056718 +epoch: 1, batch: 12303, sum loss: 5222.664062, avg loss: 2.772115, ppl: 15.992418 +epoch: 1, batch: 12304, sum loss: 4739.815918, avg loss: 2.960535, ppl: 19.308296 +epoch: 1, batch: 12305, sum loss: 5461.717285, avg loss: 2.986177, ppl: 19.809805 +epoch: 1, batch: 12306, sum loss: 6303.468750, avg loss: 3.356480, ppl: 28.688021 +epoch: 1, batch: 12307, sum loss: 4619.895020, avg loss: 2.711206, ppl: 15.047411 +epoch: 1, batch: 12308, sum loss: 4180.026855, avg loss: 2.594678, ppl: 13.392280 +epoch: 1, batch: 12309, sum loss: 3948.389893, avg loss: 2.776645, ppl: 16.065039 +epoch: 1, batch: 12310, sum loss: 4514.781738, avg loss: 2.980054, ppl: 19.688883 +epoch: 1, batch: 12311, sum loss: 5207.959961, avg loss: 2.790975, ppl: 16.296904 +epoch: 1, batch: 12312, sum loss: 5235.637695, avg loss: 2.939718, ppl: 18.910513 +epoch: 1, batch: 12313, sum loss: 5034.746582, avg loss: 2.781628, ppl: 16.145287 +epoch: 1, batch: 12314, sum loss: 3770.523926, avg loss: 2.589646, ppl: 13.325048 +epoch: 1, batch: 12315, sum loss: 4905.388184, avg loss: 2.811111, ppl: 16.628378 +epoch: 1, batch: 12316, sum loss: 3740.207031, avg loss: 2.675399, ppl: 14.518135 +epoch: 1, batch: 12317, sum loss: 4201.608887, avg loss: 2.726547, ppl: 15.280034 +epoch: 1, batch: 12318, sum loss: 5014.069824, avg loss: 2.758014, ppl: 15.768498 +epoch: 1, batch: 12319, sum loss: 4549.299316, avg loss: 2.735598, ppl: 15.418959 +epoch: 1, batch: 12320, sum loss: 4028.340820, avg loss: 2.605654, ppl: 13.540074 +epoch: 1, batch: 12321, sum loss: 5072.832031, avg loss: 2.694016, ppl: 14.790957 +epoch: 1, batch: 12322, sum loss: 5208.251953, avg loss: 2.812231, ppl: 16.647017 +epoch: 1, batch: 12323, sum loss: 5050.829590, avg loss: 2.711127, ppl: 15.046224 +epoch: 1, batch: 12324, sum loss: 4136.843262, avg loss: 2.759735, ppl: 15.795662 +epoch: 1, batch: 12325, sum loss: 4673.894043, avg loss: 2.757460, ppl: 15.759757 +epoch: 1, batch: 12326, sum loss: 4490.226562, avg loss: 2.739614, ppl: 15.481001 +epoch: 1, batch: 12327, sum loss: 4705.760742, avg loss: 2.579913, ppl: 13.195986 +epoch: 1, batch: 12328, sum loss: 4939.059082, avg loss: 2.651132, ppl: 14.170072 +epoch: 1, batch: 12329, sum loss: 4688.216797, avg loss: 2.819132, ppl: 16.762297 +epoch: 1, batch: 12330, sum loss: 4703.624023, avg loss: 2.617487, ppl: 13.701248 +epoch: 1, batch: 12331, sum loss: 5857.137695, avg loss: 3.052182, ppl: 21.161472 +epoch: 1, batch: 12332, sum loss: 4238.006348, avg loss: 2.682282, ppl: 14.618421 +epoch: 1, batch: 12333, sum loss: 4503.371094, avg loss: 2.591123, ppl: 13.344745 +epoch: 1, batch: 12334, sum loss: 4572.516602, avg loss: 2.777957, ppl: 16.086119 +epoch: 1, batch: 12335, sum loss: 4295.993652, avg loss: 2.841266, ppl: 17.137445 +epoch: 1, batch: 12336, sum loss: 4913.102539, avg loss: 2.712923, ppl: 15.073264 +epoch: 1, batch: 12337, sum loss: 5837.770996, avg loss: 2.924735, ppl: 18.629290 +epoch: 1, batch: 12338, sum loss: 5277.303223, avg loss: 2.760096, ppl: 15.801357 +epoch: 1, batch: 12339, sum loss: 6269.480469, avg loss: 3.123807, ppl: 22.732758 +epoch: 1, batch: 12340, sum loss: 4142.040039, avg loss: 2.566320, ppl: 13.017827 +epoch: 1, batch: 12341, sum loss: 4468.857422, avg loss: 2.523353, ppl: 12.470335 +epoch: 1, batch: 12342, sum loss: 4867.447754, avg loss: 2.813554, ppl: 16.669048 +epoch: 1, batch: 12343, sum loss: 3952.366211, avg loss: 2.657946, ppl: 14.266959 +epoch: 1, batch: 12344, sum loss: 4483.422363, avg loss: 2.606641, ppl: 13.553446 +epoch: 1, batch: 12345, sum loss: 6044.245117, avg loss: 2.926995, ppl: 18.671444 +epoch: 1, batch: 12346, sum loss: 3725.734863, avg loss: 2.610887, ppl: 13.611113 +epoch: 1, batch: 12347, sum loss: 5683.140137, avg loss: 2.958428, ppl: 19.267662 +epoch: 1, batch: 12348, sum loss: 4417.416504, avg loss: 2.423158, ppl: 11.281429 +epoch: 1, batch: 12349, sum loss: 5499.345215, avg loss: 2.928299, ppl: 18.695797 +epoch: 1, batch: 12350, sum loss: 4092.825684, avg loss: 2.642237, ppl: 14.044592 +epoch: 1, batch: 12351, sum loss: 3765.003418, avg loss: 2.435319, ppl: 11.419463 +epoch: 1, batch: 12352, sum loss: 5472.747559, avg loss: 2.920356, ppl: 18.547890 +epoch: 1, batch: 12353, sum loss: 4239.785156, avg loss: 2.633407, ppl: 13.921117 +epoch: 1, batch: 12354, sum loss: 5225.652832, avg loss: 3.022356, ppl: 20.539621 +epoch: 1, batch: 12355, sum loss: 5031.382324, avg loss: 2.700688, ppl: 14.889975 +epoch: 1, batch: 12356, sum loss: 5220.494141, avg loss: 2.890639, ppl: 18.004812 +epoch: 1, batch: 12357, sum loss: 4135.349121, avg loss: 2.650865, ppl: 14.166285 +epoch: 1, batch: 12358, sum loss: 4975.311035, avg loss: 2.643630, ppl: 14.064157 +epoch: 1, batch: 12359, sum loss: 4187.947266, avg loss: 2.466400, ppl: 11.779964 +epoch: 1, batch: 12360, sum loss: 5508.244629, avg loss: 2.882389, ppl: 17.856874 +epoch: 1, batch: 12361, sum loss: 4478.305664, avg loss: 2.541604, ppl: 12.700023 +epoch: 1, batch: 12362, sum loss: 4239.548828, avg loss: 2.816976, ppl: 16.726194 +epoch: 1, batch: 12363, sum loss: 5507.999023, avg loss: 3.008192, ppl: 20.250750 +epoch: 1, batch: 12364, sum loss: 5066.666992, avg loss: 2.673703, ppl: 14.493539 +epoch: 1, batch: 12365, sum loss: 4211.622070, avg loss: 2.890612, ppl: 18.004328 +epoch: 1, batch: 12366, sum loss: 4901.089355, avg loss: 2.851128, ppl: 17.307299 +epoch: 1, batch: 12367, sum loss: 4935.133789, avg loss: 2.640521, ppl: 14.020504 +epoch: 1, batch: 12368, sum loss: 4469.737305, avg loss: 2.561454, ppl: 12.954640 +epoch: 1, batch: 12369, sum loss: 3898.101807, avg loss: 2.659005, ppl: 14.282077 +epoch: 1, batch: 12370, sum loss: 4747.186523, avg loss: 2.790821, ppl: 16.294390 +epoch: 1, batch: 12371, sum loss: 4179.773438, avg loss: 2.733665, ppl: 15.389185 +epoch: 1, batch: 12372, sum loss: 4209.958496, avg loss: 2.654450, ppl: 14.217171 +epoch: 1, batch: 12373, sum loss: 4095.754883, avg loss: 2.710625, ppl: 15.038678 +epoch: 1, batch: 12374, sum loss: 4518.046387, avg loss: 2.813229, ppl: 16.663643 +epoch: 1, batch: 12375, sum loss: 4785.482910, avg loss: 2.660079, ppl: 14.297425 +epoch: 1, batch: 12376, sum loss: 5933.062500, avg loss: 3.219242, ppl: 25.009146 +epoch: 1, batch: 12377, sum loss: 4460.774414, avg loss: 2.490661, ppl: 12.069253 +epoch: 1, batch: 12378, sum loss: 4440.354492, avg loss: 2.514357, ppl: 12.358658 +epoch: 1, batch: 12379, sum loss: 3818.982666, avg loss: 2.779464, ppl: 16.110376 +epoch: 1, batch: 12380, sum loss: 5155.422363, avg loss: 2.864124, ppl: 17.533680 +epoch: 1, batch: 12381, sum loss: 5216.404785, avg loss: 2.798500, ppl: 16.420004 +epoch: 1, batch: 12382, sum loss: 4080.564941, avg loss: 2.632623, ppl: 13.910205 +epoch: 1, batch: 12383, sum loss: 4794.562988, avg loss: 2.914628, ppl: 18.441946 +epoch: 1, batch: 12384, sum loss: 4964.161621, avg loss: 2.696449, ppl: 14.826981 +epoch: 1, batch: 12385, sum loss: 3941.250000, avg loss: 2.552623, ppl: 12.840744 +epoch: 1, batch: 12386, sum loss: 4501.570801, avg loss: 2.665228, ppl: 14.371231 +epoch: 1, batch: 12387, sum loss: 5123.162109, avg loss: 2.878181, ppl: 17.781898 +epoch: 1, batch: 12388, sum loss: 4113.126465, avg loss: 2.686562, ppl: 14.681116 +epoch: 1, batch: 12389, sum loss: 4526.155762, avg loss: 2.804310, ppl: 16.515669 +epoch: 1, batch: 12390, sum loss: 4260.227051, avg loss: 2.849650, ppl: 17.281734 +epoch: 1, batch: 12391, sum loss: 4998.831055, avg loss: 2.998699, ppl: 20.059418 +epoch: 1, batch: 12392, sum loss: 5331.032715, avg loss: 2.925924, ppl: 18.651443 +epoch: 1, batch: 12393, sum loss: 4925.977539, avg loss: 2.802035, ppl: 16.478144 +epoch: 1, batch: 12394, sum loss: 5058.796875, avg loss: 2.774985, ppl: 16.038380 +epoch: 1, batch: 12395, sum loss: 4680.447266, avg loss: 2.725945, ppl: 15.270834 +epoch: 1, batch: 12396, sum loss: 4453.775391, avg loss: 2.640057, ppl: 14.013997 +epoch: 1, batch: 12397, sum loss: 3702.373535, avg loss: 2.542839, ppl: 12.715720 +epoch: 1, batch: 12398, sum loss: 4100.352539, avg loss: 2.751915, ppl: 15.672608 +epoch: 1, batch: 12399, sum loss: 4037.989502, avg loss: 2.857742, ppl: 17.422140 +epoch: 1, batch: 12400, sum loss: 4856.899414, avg loss: 2.892733, ppl: 18.042559 +epoch: 1, batch: 12401, sum loss: 4567.447754, avg loss: 2.828141, ppl: 16.913988 +epoch: 1, batch: 12402, sum loss: 4461.320801, avg loss: 2.814713, ppl: 16.688393 +epoch: 1, batch: 12403, sum loss: 4693.362305, avg loss: 2.674280, ppl: 14.501910 +epoch: 1, batch: 12404, sum loss: 5360.236328, avg loss: 2.783093, ppl: 16.168949 +epoch: 1, batch: 12405, sum loss: 5494.354492, avg loss: 3.047340, ppl: 21.059261 +epoch: 1, batch: 12406, sum loss: 4436.157227, avg loss: 2.669168, ppl: 14.427960 +epoch: 1, batch: 12407, sum loss: 5726.225586, avg loss: 3.015390, ppl: 20.397043 +epoch: 1, batch: 12408, sum loss: 4778.291992, avg loss: 3.072857, ppl: 21.603529 +epoch: 1, batch: 12409, sum loss: 5283.794922, avg loss: 2.862294, ppl: 17.501633 +epoch: 1, batch: 12410, sum loss: 4545.506836, avg loss: 2.653536, ppl: 14.204174 +epoch: 1, batch: 12411, sum loss: 4793.593262, avg loss: 2.690007, ppl: 14.731786 +epoch: 1, batch: 12412, sum loss: 4532.775391, avg loss: 2.743811, ppl: 15.546117 +epoch: 1, batch: 12413, sum loss: 4261.486328, avg loss: 2.496477, ppl: 12.139652 +epoch: 1, batch: 12414, sum loss: 4681.251953, avg loss: 2.820031, ppl: 16.777378 +epoch: 1, batch: 12415, sum loss: 4208.751953, avg loss: 2.657040, ppl: 14.254040 +epoch: 1, batch: 12416, sum loss: 4751.060547, avg loss: 2.940013, ppl: 18.916086 +epoch: 1, batch: 12417, sum loss: 5239.227051, avg loss: 2.993844, ppl: 19.962271 +epoch: 1, batch: 12418, sum loss: 4483.213867, avg loss: 2.819631, ppl: 16.770666 +epoch: 1, batch: 12419, sum loss: 4858.007324, avg loss: 2.752412, ppl: 15.680408 +epoch: 1, batch: 12420, sum loss: 4428.893066, avg loss: 2.579437, ppl: 13.189708 +epoch: 1, batch: 12421, sum loss: 4222.070801, avg loss: 2.790529, ppl: 16.289642 +epoch: 1, batch: 12422, sum loss: 4314.508301, avg loss: 2.681484, ppl: 14.606757 +epoch: 1, batch: 12423, sum loss: 4543.874512, avg loss: 2.725780, ppl: 15.268312 +epoch: 1, batch: 12424, sum loss: 4445.050293, avg loss: 2.655347, ppl: 14.229921 +epoch: 1, batch: 12425, sum loss: 5574.691895, avg loss: 2.911066, ppl: 18.376383 +epoch: 1, batch: 12426, sum loss: 5094.616211, avg loss: 2.839808, ppl: 17.112488 +epoch: 1, batch: 12427, sum loss: 5440.056641, avg loss: 2.864696, ppl: 17.543711 +epoch: 1, batch: 12428, sum loss: 4680.056641, avg loss: 2.616018, ppl: 13.681141 +epoch: 1, batch: 12429, sum loss: 4667.261719, avg loss: 2.622057, ppl: 13.764009 +epoch: 1, batch: 12430, sum loss: 3777.520264, avg loss: 2.601598, ppl: 13.485271 +epoch: 1, batch: 12431, sum loss: 3870.432373, avg loss: 2.746936, ppl: 15.594770 +epoch: 1, batch: 12432, sum loss: 4545.502930, avg loss: 2.667549, ppl: 14.404615 +epoch: 1, batch: 12433, sum loss: 4396.487305, avg loss: 2.703867, ppl: 14.937379 +epoch: 1, batch: 12434, sum loss: 4497.410156, avg loss: 2.774466, ppl: 16.030069 +epoch: 1, batch: 12435, sum loss: 5200.213379, avg loss: 2.846313, ppl: 17.224155 +epoch: 1, batch: 12436, sum loss: 4689.486816, avg loss: 2.884063, ppl: 17.886805 +epoch: 1, batch: 12437, sum loss: 3949.856445, avg loss: 2.549940, ppl: 12.806331 +epoch: 1, batch: 12438, sum loss: 4447.061523, avg loss: 2.819950, ppl: 16.776018 +epoch: 1, batch: 12439, sum loss: 4832.839844, avg loss: 2.699910, ppl: 14.878399 +epoch: 1, batch: 12440, sum loss: 4400.821777, avg loss: 2.590242, ppl: 13.333003 +epoch: 1, batch: 12441, sum loss: 4104.218750, avg loss: 2.758212, ppl: 15.771612 +epoch: 1, batch: 12442, sum loss: 4766.175293, avg loss: 2.821892, ppl: 16.808619 +epoch: 1, batch: 12443, sum loss: 6122.582031, avg loss: 3.123766, ppl: 22.731831 +epoch: 1, batch: 12444, sum loss: 5172.435059, avg loss: 2.996776, ppl: 20.020878 +epoch: 1, batch: 12445, sum loss: 4511.534668, avg loss: 2.735922, ppl: 15.423952 +epoch: 1, batch: 12446, sum loss: 3990.664062, avg loss: 2.519359, ppl: 12.420628 +epoch: 1, batch: 12447, sum loss: 5352.249023, avg loss: 2.789082, ppl: 16.266085 +epoch: 1, batch: 12448, sum loss: 4436.278320, avg loss: 2.548121, ppl: 12.783058 +epoch: 1, batch: 12449, sum loss: 4717.170898, avg loss: 2.718831, ppl: 15.162580 +epoch: 1, batch: 12450, sum loss: 5464.177246, avg loss: 2.915783, ppl: 18.463263 +epoch: 1, batch: 12451, sum loss: 4368.267090, avg loss: 2.735296, ppl: 15.414302 +epoch: 1, batch: 12452, sum loss: 4666.221680, avg loss: 2.909116, ppl: 18.340574 +epoch: 1, batch: 12453, sum loss: 4497.164062, avg loss: 2.558114, ppl: 12.911441 +epoch: 1, batch: 12454, sum loss: 5522.347656, avg loss: 2.782039, ppl: 16.151924 +epoch: 1, batch: 12455, sum loss: 3665.143799, avg loss: 2.529430, ppl: 12.546351 +epoch: 1, batch: 12456, sum loss: 3812.483887, avg loss: 2.300835, ppl: 9.982515 +epoch: 1, batch: 12457, sum loss: 4593.245117, avg loss: 2.852947, ppl: 17.338808 +epoch: 1, batch: 12458, sum loss: 4950.594238, avg loss: 3.009480, ppl: 20.276848 +epoch: 1, batch: 12459, sum loss: 4931.744141, avg loss: 2.610770, ppl: 13.609523 +epoch: 1, batch: 12460, sum loss: 4655.827148, avg loss: 2.769677, ppl: 15.953479 +epoch: 1, batch: 12461, sum loss: 5799.104980, avg loss: 2.945203, ppl: 19.014528 +epoch: 1, batch: 12462, sum loss: 4331.623535, avg loss: 2.636411, ppl: 13.962999 +epoch: 1, batch: 12463, sum loss: 4880.661621, avg loss: 2.618381, ppl: 13.713500 +epoch: 1, batch: 12464, sum loss: 5768.413086, avg loss: 3.141837, ppl: 23.146351 +epoch: 1, batch: 12465, sum loss: 5126.524414, avg loss: 2.772593, ppl: 16.000069 +epoch: 1, batch: 12466, sum loss: 4608.774414, avg loss: 2.815378, ppl: 16.699493 +epoch: 1, batch: 12467, sum loss: 4502.442871, avg loss: 2.667324, ppl: 14.401381 +epoch: 1, batch: 12468, sum loss: 5089.294434, avg loss: 2.913162, ppl: 18.414938 +epoch: 1, batch: 12469, sum loss: 3666.260254, avg loss: 2.576430, ppl: 13.150112 +epoch: 1, batch: 12470, sum loss: 4762.649414, avg loss: 2.608242, ppl: 13.575162 +epoch: 1, batch: 12471, sum loss: 4839.806641, avg loss: 2.833610, ppl: 17.006750 +epoch: 1, batch: 12472, sum loss: 4012.580078, avg loss: 2.577123, ppl: 13.159221 +epoch: 1, batch: 12473, sum loss: 3727.526367, avg loss: 2.374221, ppl: 10.742638 +epoch: 1, batch: 12474, sum loss: 5667.251953, avg loss: 3.180276, ppl: 24.053394 +epoch: 1, batch: 12475, sum loss: 4167.054688, avg loss: 2.709398, ppl: 15.020235 +epoch: 1, batch: 12476, sum loss: 4103.889648, avg loss: 2.642556, ppl: 14.049069 +epoch: 1, batch: 12477, sum loss: 5369.148438, avg loss: 3.023169, ppl: 20.556337 +epoch: 1, batch: 12478, sum loss: 4515.968262, avg loss: 2.653330, ppl: 14.201255 +epoch: 1, batch: 12479, sum loss: 4080.543457, avg loss: 2.585896, ppl: 13.275175 +epoch: 1, batch: 12480, sum loss: 4355.738281, avg loss: 2.525066, ppl: 12.491718 +epoch: 1, batch: 12481, sum loss: 4276.740234, avg loss: 2.705086, ppl: 14.955596 +epoch: 1, batch: 12482, sum loss: 5383.050781, avg loss: 2.980648, ppl: 19.700584 +epoch: 1, batch: 12483, sum loss: 4941.398926, avg loss: 2.768291, ppl: 15.931380 +epoch: 1, batch: 12484, sum loss: 4512.960449, avg loss: 2.683092, ppl: 14.630259 +epoch: 1, batch: 12485, sum loss: 4912.809570, avg loss: 2.592512, ppl: 13.363294 +epoch: 1, batch: 12486, sum loss: 3422.792236, avg loss: 2.455375, ppl: 11.650799 +epoch: 1, batch: 12487, sum loss: 5536.137695, avg loss: 3.201931, ppl: 24.579937 +epoch: 1, batch: 12488, sum loss: 4463.441895, avg loss: 2.762031, ppl: 15.831963 +epoch: 1, batch: 12489, sum loss: 4770.787109, avg loss: 2.690799, ppl: 14.743458 +epoch: 1, batch: 12490, sum loss: 4945.319336, avg loss: 2.798709, ppl: 16.423437 +epoch: 1, batch: 12491, sum loss: 4746.574219, avg loss: 2.835469, ppl: 17.038385 +epoch: 1, batch: 12492, sum loss: 4191.749512, avg loss: 2.725455, ppl: 15.263354 +epoch: 1, batch: 12493, sum loss: 5590.755859, avg loss: 2.883319, ppl: 17.873499 +epoch: 1, batch: 12494, sum loss: 3811.627197, avg loss: 2.510953, ppl: 12.316668 +epoch: 1, batch: 12495, sum loss: 4279.820312, avg loss: 2.584433, ppl: 13.255766 +epoch: 1, batch: 12496, sum loss: 5004.408203, avg loss: 2.684768, ppl: 14.654807 +epoch: 1, batch: 12497, sum loss: 3670.834961, avg loss: 2.559857, ppl: 12.933969 +epoch: 1, batch: 12498, sum loss: 3749.865234, avg loss: 2.538839, ppl: 12.664960 +epoch: 1, batch: 12499, sum loss: 4982.413086, avg loss: 2.805413, ppl: 16.533899 +epoch: 1, batch: 12500, sum loss: 4043.953613, avg loss: 2.522741, ppl: 12.462708 +epoch: 1, batch: 12501, sum loss: 4803.804199, avg loss: 2.930936, ppl: 18.745165 +epoch: 1, batch: 12502, sum loss: 4221.934082, avg loss: 2.670420, ppl: 14.446034 +epoch: 1, batch: 12503, sum loss: 4233.790527, avg loss: 2.710493, ppl: 15.036692 +epoch: 1, batch: 12504, sum loss: 4651.695801, avg loss: 2.594365, ppl: 13.388079 +epoch: 1, batch: 12505, sum loss: 4706.100586, avg loss: 2.737697, ppl: 15.451358 +epoch: 1, batch: 12506, sum loss: 4858.496582, avg loss: 2.874850, ppl: 17.722767 +epoch: 1, batch: 12507, sum loss: 4595.677734, avg loss: 2.737152, ppl: 15.442935 +epoch: 1, batch: 12508, sum loss: 4197.601074, avg loss: 2.910958, ppl: 18.374390 +epoch: 1, batch: 12509, sum loss: 4350.702637, avg loss: 2.810531, ppl: 16.618746 +epoch: 1, batch: 12510, sum loss: 4539.477051, avg loss: 2.705290, ppl: 14.958659 +epoch: 1, batch: 12511, sum loss: 4964.156250, avg loss: 2.806194, ppl: 16.546814 +epoch: 1, batch: 12512, sum loss: 4296.175293, avg loss: 2.762814, ppl: 15.844363 +epoch: 1, batch: 12513, sum loss: 4800.385254, avg loss: 2.895287, ppl: 18.088684 +epoch: 1, batch: 12514, sum loss: 6016.885254, avg loss: 2.885796, ppl: 17.917830 +epoch: 1, batch: 12515, sum loss: 4323.944824, avg loss: 2.626941, ppl: 13.831394 +epoch: 1, batch: 12516, sum loss: 4666.566406, avg loss: 2.629051, ppl: 13.860616 +epoch: 1, batch: 12517, sum loss: 5309.231445, avg loss: 3.154624, ppl: 23.444208 +epoch: 1, batch: 12518, sum loss: 4942.708008, avg loss: 2.762833, ppl: 15.844666 +epoch: 1, batch: 12519, sum loss: 4677.355469, avg loss: 2.615971, ppl: 13.680489 +epoch: 1, batch: 12520, sum loss: 5655.413574, avg loss: 2.954762, ppl: 19.197144 +epoch: 1, batch: 12521, sum loss: 4645.539062, avg loss: 2.696192, ppl: 14.823181 +epoch: 1, batch: 12522, sum loss: 4714.959473, avg loss: 2.760515, ppl: 15.807978 +epoch: 1, batch: 12523, sum loss: 3999.852539, avg loss: 2.734007, ppl: 15.394455 +epoch: 1, batch: 12524, sum loss: 4313.896973, avg loss: 2.622430, ppl: 13.769136 +epoch: 1, batch: 12525, sum loss: 4442.186523, avg loss: 2.715273, ppl: 15.108736 +epoch: 1, batch: 12526, sum loss: 4599.880859, avg loss: 2.759377, ppl: 15.789995 +epoch: 1, batch: 12527, sum loss: 4226.714844, avg loss: 2.538568, ppl: 12.661521 +epoch: 1, batch: 12528, sum loss: 5922.403809, avg loss: 2.789639, ppl: 16.275143 +epoch: 1, batch: 12529, sum loss: 4166.035156, avg loss: 2.384680, ppl: 10.855583 +epoch: 1, batch: 12530, sum loss: 5231.619629, avg loss: 3.190012, ppl: 24.288719 +epoch: 1, batch: 12531, sum loss: 4308.186035, avg loss: 2.570517, ppl: 13.072579 +epoch: 1, batch: 12532, sum loss: 4892.301758, avg loss: 2.961442, ppl: 19.325815 +epoch: 1, batch: 12533, sum loss: 4642.622559, avg loss: 2.853487, ppl: 17.348162 +epoch: 1, batch: 12534, sum loss: 4929.971680, avg loss: 2.752636, ppl: 15.683924 +epoch: 1, batch: 12535, sum loss: 4374.594727, avg loss: 2.638477, ppl: 13.991876 +epoch: 1, batch: 12536, sum loss: 4293.243164, avg loss: 2.433811, ppl: 11.402258 +epoch: 1, batch: 12537, sum loss: 4874.972656, avg loss: 2.741829, ppl: 15.515343 +epoch: 1, batch: 12538, sum loss: 3707.456543, avg loss: 2.548080, ppl: 12.782537 +epoch: 1, batch: 12539, sum loss: 3662.294189, avg loss: 2.726950, ppl: 15.286200 +epoch: 1, batch: 12540, sum loss: 5939.452148, avg loss: 3.001239, ppl: 20.110439 +epoch: 1, batch: 12541, sum loss: 4959.616211, avg loss: 2.900360, ppl: 18.180695 +epoch: 1, batch: 12542, sum loss: 3602.996094, avg loss: 2.523107, ppl: 12.467267 +epoch: 1, batch: 12543, sum loss: 4588.658691, avg loss: 2.841275, ppl: 17.137598 +epoch: 1, batch: 12544, sum loss: 4522.185059, avg loss: 2.752395, ppl: 15.680140 +epoch: 1, batch: 12545, sum loss: 5475.434082, avg loss: 2.800734, ppl: 16.456715 +epoch: 1, batch: 12546, sum loss: 4910.991211, avg loss: 2.921470, ppl: 18.568562 +epoch: 1, batch: 12547, sum loss: 3485.598145, avg loss: 2.465062, ppl: 11.764216 +epoch: 1, batch: 12548, sum loss: 5323.331055, avg loss: 3.105794, ppl: 22.326944 +epoch: 1, batch: 12549, sum loss: 4740.612305, avg loss: 2.672273, ppl: 14.472827 +epoch: 1, batch: 12550, sum loss: 5438.170898, avg loss: 2.903455, ppl: 18.237045 +epoch: 1, batch: 12551, sum loss: 4517.660645, avg loss: 2.523833, ppl: 12.476324 +epoch: 1, batch: 12552, sum loss: 5098.726562, avg loss: 3.004553, ppl: 20.177197 +epoch: 1, batch: 12553, sum loss: 4247.586914, avg loss: 2.749247, ppl: 15.630862 +epoch: 1, batch: 12554, sum loss: 4252.133789, avg loss: 2.669262, ppl: 14.429312 +epoch: 1, batch: 12555, sum loss: 4484.152832, avg loss: 2.942358, ppl: 18.960493 +epoch: 1, batch: 12556, sum loss: 4922.895996, avg loss: 2.790757, ppl: 16.293356 +epoch: 1, batch: 12557, sum loss: 3998.998535, avg loss: 2.552009, ppl: 12.832864 +epoch: 1, batch: 12558, sum loss: 5118.448242, avg loss: 2.757785, ppl: 15.764879 +epoch: 1, batch: 12559, sum loss: 4152.251465, avg loss: 2.452600, ppl: 11.618516 +epoch: 1, batch: 12560, sum loss: 5546.673828, avg loss: 3.110866, ppl: 22.440472 +epoch: 1, batch: 12561, sum loss: 5582.044922, avg loss: 3.143043, ppl: 23.174286 +epoch: 1, batch: 12562, sum loss: 4342.908203, avg loss: 2.746938, ppl: 15.594800 +epoch: 1, batch: 12563, sum loss: 4969.393555, avg loss: 2.813926, ppl: 16.675257 +epoch: 1, batch: 12564, sum loss: 4675.032227, avg loss: 2.574357, ppl: 13.122873 +epoch: 1, batch: 12565, sum loss: 3420.924316, avg loss: 2.421036, ppl: 11.257519 +epoch: 1, batch: 12566, sum loss: 4512.850586, avg loss: 2.720223, ppl: 15.183714 +epoch: 1, batch: 12567, sum loss: 4498.897461, avg loss: 2.566399, ppl: 13.018860 +epoch: 1, batch: 12568, sum loss: 4769.131836, avg loss: 2.922262, ppl: 18.583279 +epoch: 1, batch: 12569, sum loss: 4221.846680, avg loss: 2.594866, ppl: 13.394790 +epoch: 1, batch: 12570, sum loss: 5420.210449, avg loss: 2.939377, ppl: 18.904058 +epoch: 1, batch: 12571, sum loss: 3845.361816, avg loss: 2.659310, ppl: 14.286427 +epoch: 1, batch: 12572, sum loss: 5232.553223, avg loss: 2.886130, ppl: 17.923807 +epoch: 1, batch: 12573, sum loss: 4178.411133, avg loss: 2.595286, ppl: 13.400424 +epoch: 1, batch: 12574, sum loss: 4439.002930, avg loss: 2.716648, ppl: 15.129524 +epoch: 1, batch: 12575, sum loss: 5461.703125, avg loss: 2.812412, ppl: 16.650023 +epoch: 1, batch: 12576, sum loss: 5004.291504, avg loss: 2.761750, ppl: 15.827520 +epoch: 1, batch: 12577, sum loss: 4551.144043, avg loss: 2.812821, ppl: 16.656836 +epoch: 1, batch: 12578, sum loss: 4634.626465, avg loss: 2.757065, ppl: 15.753539 +epoch: 1, batch: 12579, sum loss: 3987.923828, avg loss: 2.740841, ppl: 15.500017 +epoch: 1, batch: 12580, sum loss: 5386.159180, avg loss: 2.967581, ppl: 19.444826 +epoch: 1, batch: 12581, sum loss: 4175.011719, avg loss: 2.570820, ppl: 13.076544 +epoch: 1, batch: 12582, sum loss: 4613.593750, avg loss: 2.649968, ppl: 14.153581 +epoch: 1, batch: 12583, sum loss: 4528.705566, avg loss: 2.520148, ppl: 12.430434 +epoch: 1, batch: 12584, sum loss: 5304.767090, avg loss: 2.734416, ppl: 15.400747 +epoch: 1, batch: 12585, sum loss: 5115.034180, avg loss: 2.650277, ppl: 14.157958 +epoch: 1, batch: 12586, sum loss: 4412.503906, avg loss: 2.690551, ppl: 14.739799 +epoch: 1, batch: 12587, sum loss: 4370.659668, avg loss: 2.629759, ppl: 13.870431 +epoch: 1, batch: 12588, sum loss: 4311.343750, avg loss: 2.691226, ppl: 14.749744 +epoch: 1, batch: 12589, sum loss: 3771.889160, avg loss: 2.470130, ppl: 11.823989 +epoch: 1, batch: 12590, sum loss: 5393.583496, avg loss: 3.068022, ppl: 21.499346 +epoch: 1, batch: 12591, sum loss: 4257.955566, avg loss: 2.696615, ppl: 14.829453 +epoch: 1, batch: 12592, sum loss: 3907.466309, avg loss: 2.514457, ppl: 12.359899 +epoch: 1, batch: 12593, sum loss: 4604.678223, avg loss: 2.881526, ppl: 17.841475 +epoch: 1, batch: 12594, sum loss: 3663.004150, avg loss: 2.596034, ppl: 13.410447 +epoch: 1, batch: 12595, sum loss: 4462.674316, avg loss: 2.617404, ppl: 13.700118 +epoch: 1, batch: 12596, sum loss: 3385.165771, avg loss: 2.267358, ppl: 9.653862 +epoch: 1, batch: 12597, sum loss: 5263.524414, avg loss: 2.914465, ppl: 18.438953 +epoch: 1, batch: 12598, sum loss: 4849.204102, avg loss: 2.670266, ppl: 14.443816 +epoch: 1, batch: 12599, sum loss: 4985.819336, avg loss: 2.756119, ppl: 15.738647 +epoch: 1, batch: 12600, sum loss: 3300.793701, avg loss: 2.513933, ppl: 12.353420 +epoch: 1, batch: 12601, sum loss: 4385.464355, avg loss: 2.573629, ppl: 13.113332 +epoch: 1, batch: 12602, sum loss: 4865.737305, avg loss: 2.680847, ppl: 14.597455 +epoch: 1, batch: 12603, sum loss: 4353.104980, avg loss: 2.765632, ppl: 15.889077 +epoch: 1, batch: 12604, sum loss: 5349.451660, avg loss: 2.741903, ppl: 15.516479 +epoch: 1, batch: 12605, sum loss: 4758.717773, avg loss: 2.782876, ppl: 16.165442 +epoch: 1, batch: 12606, sum loss: 3721.289551, avg loss: 2.499187, ppl: 12.172593 +epoch: 1, batch: 12607, sum loss: 4915.976562, avg loss: 2.922697, ppl: 18.591362 +epoch: 1, batch: 12608, sum loss: 4656.273438, avg loss: 2.564027, ppl: 12.988016 +epoch: 1, batch: 12609, sum loss: 5119.425293, avg loss: 2.641603, ppl: 14.035678 +epoch: 1, batch: 12610, sum loss: 4162.469727, avg loss: 2.619553, ppl: 13.729587 +epoch: 1, batch: 12611, sum loss: 4412.047363, avg loss: 2.900754, ppl: 18.187862 +epoch: 1, batch: 12612, sum loss: 4787.557617, avg loss: 2.727953, ppl: 15.301532 +epoch: 1, batch: 12613, sum loss: 3774.679443, avg loss: 2.473578, ppl: 11.864820 +epoch: 1, batch: 12614, sum loss: 4572.676758, avg loss: 2.630999, ppl: 13.887641 +epoch: 1, batch: 12615, sum loss: 4322.796387, avg loss: 2.593159, ppl: 13.371949 +epoch: 1, batch: 12616, sum loss: 4320.882324, avg loss: 2.778702, ppl: 16.098120 +epoch: 1, batch: 12617, sum loss: 4131.430664, avg loss: 2.730622, ppl: 15.342424 +epoch: 1, batch: 12618, sum loss: 4010.222656, avg loss: 2.551032, ppl: 12.820332 +epoch: 1, batch: 12619, sum loss: 3500.925293, avg loss: 2.268908, ppl: 9.668839 +epoch: 1, batch: 12620, sum loss: 5192.075195, avg loss: 2.788440, ppl: 16.255642 +epoch: 1, batch: 12621, sum loss: 3878.864258, avg loss: 2.716292, ppl: 15.124133 +epoch: 1, batch: 12622, sum loss: 3655.970215, avg loss: 2.330127, ppl: 10.279252 +epoch: 1, batch: 12623, sum loss: 4034.860840, avg loss: 2.679191, ppl: 14.573294 +epoch: 1, batch: 12624, sum loss: 4687.692871, avg loss: 2.666492, ppl: 14.389402 +epoch: 1, batch: 12625, sum loss: 4472.581543, avg loss: 2.659085, ppl: 14.283217 +epoch: 1, batch: 12626, sum loss: 5185.817383, avg loss: 2.809218, ppl: 16.596941 +epoch: 1, batch: 12627, sum loss: 5131.335449, avg loss: 2.796368, ppl: 16.385031 +epoch: 1, batch: 12628, sum loss: 5243.818848, avg loss: 2.736857, ppl: 15.438393 +epoch: 1, batch: 12629, sum loss: 4831.004395, avg loss: 2.798959, ppl: 16.427546 +epoch: 1, batch: 12630, sum loss: 5370.907227, avg loss: 2.775662, ppl: 16.049255 +epoch: 1, batch: 12631, sum loss: 4430.229004, avg loss: 2.626099, ppl: 13.819756 +epoch: 1, batch: 12632, sum loss: 5417.046875, avg loss: 2.815513, ppl: 16.701740 +epoch: 1, batch: 12633, sum loss: 5520.057129, avg loss: 2.810620, ppl: 16.620216 +epoch: 1, batch: 12634, sum loss: 5375.100586, avg loss: 3.090915, ppl: 21.997190 +epoch: 1, batch: 12635, sum loss: 5375.562988, avg loss: 2.896316, ppl: 18.107321 +epoch: 1, batch: 12636, sum loss: 3931.314453, avg loss: 2.556121, ppl: 12.885738 +epoch: 1, batch: 12637, sum loss: 4101.220215, avg loss: 2.497698, ppl: 12.154483 +epoch: 1, batch: 12638, sum loss: 4896.511230, avg loss: 2.828718, ppl: 16.923754 +epoch: 1, batch: 12639, sum loss: 4823.629395, avg loss: 2.732934, ppl: 15.377947 +epoch: 1, batch: 12640, sum loss: 4726.942871, avg loss: 2.666070, ppl: 14.383338 +epoch: 1, batch: 12641, sum loss: 5669.184082, avg loss: 2.932842, ppl: 18.780939 +epoch: 1, batch: 12642, sum loss: 4886.576172, avg loss: 2.957976, ppl: 19.258955 +epoch: 1, batch: 12643, sum loss: 4012.892090, avg loss: 2.680623, ppl: 14.594176 +epoch: 1, batch: 12644, sum loss: 4296.452148, avg loss: 2.616597, ppl: 13.689059 +epoch: 1, batch: 12645, sum loss: 5132.842773, avg loss: 2.708624, ppl: 15.008612 +epoch: 1, batch: 12646, sum loss: 3840.943359, avg loss: 2.621804, ppl: 13.760529 +epoch: 1, batch: 12647, sum loss: 4725.951660, avg loss: 2.757265, ppl: 15.756683 +epoch: 1, batch: 12648, sum loss: 5015.325195, avg loss: 2.703679, ppl: 14.934580 +epoch: 1, batch: 12649, sum loss: 5148.419922, avg loss: 2.877820, ppl: 17.775480 +epoch: 1, batch: 12650, sum loss: 4097.804688, avg loss: 2.543641, ppl: 12.725917 +epoch: 1, batch: 12651, sum loss: 3704.599854, avg loss: 2.374743, ppl: 10.748256 +epoch: 1, batch: 12652, sum loss: 4636.101074, avg loss: 2.847728, ppl: 17.248545 +epoch: 1, batch: 12653, sum loss: 3915.433105, avg loss: 2.570869, ppl: 13.077180 +epoch: 1, batch: 12654, sum loss: 4325.528320, avg loss: 2.670079, ppl: 14.441113 +epoch: 1, batch: 12655, sum loss: 4849.234375, avg loss: 2.612734, ppl: 13.636283 +epoch: 1, batch: 12656, sum loss: 3900.076660, avg loss: 2.488881, ppl: 12.047791 +epoch: 1, batch: 12657, sum loss: 3856.748291, avg loss: 2.798801, ppl: 16.424948 +epoch: 1, batch: 12658, sum loss: 5826.623535, avg loss: 2.977324, ppl: 19.635208 +epoch: 1, batch: 12659, sum loss: 4838.621582, avg loss: 2.761770, ppl: 15.827841 +epoch: 1, batch: 12660, sum loss: 4022.973877, avg loss: 2.511220, ppl: 12.319945 +epoch: 1, batch: 12661, sum loss: 4782.382812, avg loss: 2.948448, ppl: 19.076323 +epoch: 1, batch: 12662, sum loss: 4943.358887, avg loss: 2.997792, ppl: 20.041237 +epoch: 1, batch: 12663, sum loss: 3775.586426, avg loss: 2.647676, ppl: 14.121186 +epoch: 1, batch: 12664, sum loss: 4023.045654, avg loss: 2.698220, ppl: 14.853266 +epoch: 1, batch: 12665, sum loss: 4075.009033, avg loss: 2.390035, ppl: 10.913873 +epoch: 1, batch: 12666, sum loss: 4475.189941, avg loss: 2.511330, ppl: 12.321308 +epoch: 1, batch: 12667, sum loss: 4051.423340, avg loss: 2.560950, ppl: 12.948112 +epoch: 1, batch: 12668, sum loss: 5405.963867, avg loss: 2.955694, ppl: 19.215052 +epoch: 1, batch: 12669, sum loss: 5206.936523, avg loss: 2.788932, ppl: 16.263647 +epoch: 1, batch: 12670, sum loss: 4189.921875, avg loss: 2.576828, ppl: 13.155340 +epoch: 1, batch: 12671, sum loss: 4357.396484, avg loss: 2.716581, ppl: 15.128514 +epoch: 1, batch: 12672, sum loss: 4575.585938, avg loss: 2.897774, ppl: 18.133743 +epoch: 1, batch: 12673, sum loss: 4284.999512, avg loss: 2.724094, ppl: 15.242593 +epoch: 1, batch: 12674, sum loss: 5339.772949, avg loss: 2.849399, ppl: 17.277388 +epoch: 1, batch: 12675, sum loss: 4144.793945, avg loss: 2.521164, ppl: 12.443074 +epoch: 1, batch: 12676, sum loss: 4998.025391, avg loss: 2.679906, ppl: 14.583728 +epoch: 1, batch: 12677, sum loss: 3461.480713, avg loss: 2.668836, ppl: 14.423176 +epoch: 1, batch: 12678, sum loss: 4953.321777, avg loss: 2.687641, ppl: 14.696959 +epoch: 1, batch: 12679, sum loss: 4887.646973, avg loss: 2.831777, ppl: 16.975597 +epoch: 1, batch: 12680, sum loss: 5385.520508, avg loss: 3.089799, ppl: 21.972670 +epoch: 1, batch: 12681, sum loss: 4025.960938, avg loss: 2.744349, ppl: 15.554492 +epoch: 1, batch: 12682, sum loss: 4152.759766, avg loss: 2.603611, ppl: 13.512446 +epoch: 1, batch: 12683, sum loss: 4269.048340, avg loss: 2.678198, ppl: 14.558840 +epoch: 1, batch: 12684, sum loss: 4858.431641, avg loss: 2.678298, ppl: 14.560284 +epoch: 1, batch: 12685, sum loss: 4808.315918, avg loss: 2.815173, ppl: 16.696066 +epoch: 1, batch: 12686, sum loss: 3812.037842, avg loss: 2.561853, ppl: 12.959815 +epoch: 1, batch: 12687, sum loss: 4683.960449, avg loss: 2.735958, ppl: 15.424515 +epoch: 1, batch: 12688, sum loss: 4172.257324, avg loss: 2.792675, ppl: 16.324638 +epoch: 1, batch: 12689, sum loss: 4588.235352, avg loss: 2.583466, ppl: 13.242956 +epoch: 1, batch: 12690, sum loss: 4562.361328, avg loss: 2.604088, ppl: 13.518885 +epoch: 1, batch: 12691, sum loss: 3939.698730, avg loss: 2.502985, ppl: 12.218913 +epoch: 1, batch: 12692, sum loss: 3846.738037, avg loss: 2.443925, ppl: 11.518160 +epoch: 1, batch: 12693, sum loss: 4946.339844, avg loss: 2.889217, ppl: 17.979229 +epoch: 1, batch: 12694, sum loss: 3544.860596, avg loss: 2.470286, ppl: 11.825830 +epoch: 1, batch: 12695, sum loss: 5176.993164, avg loss: 2.750793, ppl: 15.655048 +epoch: 1, batch: 12696, sum loss: 4512.041992, avg loss: 2.718098, ppl: 15.151472 +epoch: 1, batch: 12697, sum loss: 4271.652344, avg loss: 2.748811, ppl: 15.624044 +epoch: 1, batch: 12698, sum loss: 4958.911133, avg loss: 2.822374, ppl: 16.816725 +epoch: 1, batch: 12699, sum loss: 4248.993164, avg loss: 2.680753, ppl: 14.596076 +epoch: 1, batch: 12700, sum loss: 5318.535156, avg loss: 3.023613, ppl: 20.565460 +epoch: 1, batch: 12701, sum loss: 4145.672852, avg loss: 2.576552, ppl: 13.151718 +epoch: 1, batch: 12702, sum loss: 4875.125977, avg loss: 2.514248, ppl: 12.357308 +epoch: 1, batch: 12703, sum loss: 4031.562012, avg loss: 2.459769, ppl: 11.702111 +epoch: 1, batch: 12704, sum loss: 4633.137207, avg loss: 2.565414, ppl: 13.006038 +epoch: 1, batch: 12705, sum loss: 5635.258789, avg loss: 3.013507, ppl: 20.358681 +epoch: 1, batch: 12706, sum loss: 4529.279785, avg loss: 2.694396, ppl: 14.796579 +epoch: 1, batch: 12707, sum loss: 4503.296875, avg loss: 2.786694, ppl: 16.227276 +epoch: 1, batch: 12708, sum loss: 4867.657227, avg loss: 2.728508, ppl: 15.310020 +epoch: 1, batch: 12709, sum loss: 5354.562988, avg loss: 2.825627, ppl: 16.871517 +epoch: 1, batch: 12710, sum loss: 4760.880371, avg loss: 2.711208, ppl: 15.047437 +epoch: 1, batch: 12711, sum loss: 5457.547363, avg loss: 2.898326, ppl: 18.143742 +epoch: 1, batch: 12712, sum loss: 4534.196289, avg loss: 2.640767, ppl: 14.023951 +epoch: 1, batch: 12713, sum loss: 4109.187988, avg loss: 2.371141, ppl: 10.709607 +epoch: 1, batch: 12714, sum loss: 5283.832031, avg loss: 2.917633, ppl: 18.497444 +epoch: 1, batch: 12715, sum loss: 4355.303223, avg loss: 2.706839, ppl: 14.981837 +epoch: 1, batch: 12716, sum loss: 4664.352539, avg loss: 2.648695, ppl: 14.135583 +epoch: 1, batch: 12717, sum loss: 5150.028809, avg loss: 2.751084, ppl: 15.659595 +epoch: 1, batch: 12718, sum loss: 4868.839355, avg loss: 2.697418, ppl: 14.841361 +epoch: 1, batch: 12719, sum loss: 4549.533691, avg loss: 2.700020, ppl: 14.880031 +epoch: 1, batch: 12720, sum loss: 5148.869141, avg loss: 2.772681, ppl: 16.001480 +epoch: 1, batch: 12721, sum loss: 4640.955078, avg loss: 2.784016, ppl: 16.183887 +epoch: 1, batch: 12722, sum loss: 4448.540527, avg loss: 2.712525, ppl: 15.067267 +epoch: 1, batch: 12723, sum loss: 4644.227539, avg loss: 2.799414, ppl: 16.435011 +epoch: 1, batch: 12724, sum loss: 3624.958740, avg loss: 2.405414, ppl: 11.083016 +epoch: 1, batch: 12725, sum loss: 4527.558594, avg loss: 3.010345, ppl: 20.294395 +epoch: 1, batch: 12726, sum loss: 5028.549316, avg loss: 2.624504, ppl: 13.797727 +epoch: 1, batch: 12727, sum loss: 5276.397461, avg loss: 2.845953, ppl: 17.217964 +epoch: 1, batch: 12728, sum loss: 4409.003418, avg loss: 2.611969, ppl: 13.625854 +epoch: 1, batch: 12729, sum loss: 4960.845215, avg loss: 2.610971, ppl: 13.612268 +epoch: 1, batch: 12730, sum loss: 3735.699219, avg loss: 2.452856, ppl: 11.621489 +epoch: 1, batch: 12731, sum loss: 3803.036621, avg loss: 2.661327, ppl: 14.315278 +epoch: 1, batch: 12732, sum loss: 4023.217529, avg loss: 2.605711, ppl: 13.540849 +epoch: 1, batch: 12733, sum loss: 4184.528320, avg loss: 2.640081, ppl: 14.014338 +epoch: 1, batch: 12734, sum loss: 4558.764160, avg loss: 2.720026, ppl: 15.180724 +epoch: 1, batch: 12735, sum loss: 4804.769531, avg loss: 2.522189, ppl: 12.455829 +epoch: 1, batch: 12736, sum loss: 2740.222168, avg loss: 2.340070, ppl: 10.381966 +epoch: 1, batch: 12737, sum loss: 5822.893555, avg loss: 3.017043, ppl: 20.430796 +epoch: 1, batch: 12738, sum loss: 4324.670410, avg loss: 2.542428, ppl: 12.710498 +epoch: 1, batch: 12739, sum loss: 4286.797852, avg loss: 2.716602, ppl: 15.128825 +epoch: 1, batch: 12740, sum loss: 4132.967285, avg loss: 2.579880, ppl: 13.195552 +epoch: 1, batch: 12741, sum loss: 5039.403320, avg loss: 2.855186, ppl: 17.377668 +epoch: 1, batch: 12742, sum loss: 5318.202148, avg loss: 2.837888, ppl: 17.079655 +epoch: 1, batch: 12743, sum loss: 5610.099609, avg loss: 2.887339, ppl: 17.945496 +epoch: 1, batch: 12744, sum loss: 4489.212891, avg loss: 2.643824, ppl: 14.066891 +epoch: 1, batch: 12745, sum loss: 4306.545898, avg loss: 2.527316, ppl: 12.519853 +epoch: 1, batch: 12746, sum loss: 5027.044922, avg loss: 2.811546, ppl: 16.635622 +epoch: 1, batch: 12747, sum loss: 4007.334961, avg loss: 2.450969, ppl: 11.599584 +epoch: 1, batch: 12748, sum loss: 4611.189453, avg loss: 2.566049, ppl: 13.014301 +epoch: 1, batch: 12749, sum loss: 4869.151855, avg loss: 2.859161, ppl: 17.446890 +epoch: 1, batch: 12750, sum loss: 4938.046387, avg loss: 2.857666, ppl: 17.420815 +epoch: 1, batch: 12751, sum loss: 4717.899902, avg loss: 2.680625, ppl: 14.594212 +epoch: 1, batch: 12752, sum loss: 4702.526367, avg loss: 2.826038, ppl: 16.878450 +epoch: 1, batch: 12753, sum loss: 4597.145508, avg loss: 2.909586, ppl: 18.349195 +epoch: 1, batch: 12754, sum loss: 3767.769287, avg loss: 2.438686, ppl: 11.457972 +epoch: 1, batch: 12755, sum loss: 4688.838867, avg loss: 2.782694, ppl: 16.162498 +epoch: 1, batch: 12756, sum loss: 4058.767334, avg loss: 2.637276, ppl: 13.975080 +epoch: 1, batch: 12757, sum loss: 4380.800293, avg loss: 2.839145, ppl: 17.101137 +epoch: 1, batch: 12758, sum loss: 5197.965332, avg loss: 2.815799, ppl: 16.706522 +epoch: 1, batch: 12759, sum loss: 3569.646484, avg loss: 2.415187, ppl: 11.191864 +epoch: 1, batch: 12760, sum loss: 3959.289307, avg loss: 2.604796, ppl: 13.528461 +epoch: 1, batch: 12761, sum loss: 5384.374023, avg loss: 2.845864, ppl: 17.216419 +epoch: 1, batch: 12762, sum loss: 4614.646484, avg loss: 2.700203, ppl: 14.882749 +epoch: 1, batch: 12763, sum loss: 5449.437500, avg loss: 2.756418, ppl: 15.743341 +epoch: 1, batch: 12764, sum loss: 4355.331543, avg loss: 2.675265, ppl: 14.516197 +epoch: 1, batch: 12765, sum loss: 4944.618652, avg loss: 2.861469, ppl: 17.487198 +epoch: 1, batch: 12766, sum loss: 4938.494141, avg loss: 2.740563, ppl: 15.495706 +epoch: 1, batch: 12767, sum loss: 5346.754883, avg loss: 3.007174, ppl: 20.230145 +epoch: 1, batch: 12768, sum loss: 5241.443848, avg loss: 2.978093, ppl: 19.650307 +epoch: 1, batch: 12769, sum loss: 3842.045410, avg loss: 2.478739, ppl: 11.926216 +epoch: 1, batch: 12770, sum loss: 5078.350098, avg loss: 2.770513, ppl: 15.966820 +epoch: 1, batch: 12771, sum loss: 3787.644287, avg loss: 2.594277, ppl: 13.386904 +epoch: 1, batch: 12772, sum loss: 5596.372070, avg loss: 2.856750, ppl: 17.404861 +epoch: 1, batch: 12773, sum loss: 5258.111328, avg loss: 2.919551, ppl: 18.532963 +epoch: 1, batch: 12774, sum loss: 4758.096191, avg loss: 2.789036, ppl: 16.265337 +epoch: 1, batch: 12775, sum loss: 4224.405273, avg loss: 2.895411, ppl: 18.090944 +epoch: 1, batch: 12776, sum loss: 5131.252441, avg loss: 2.892476, ppl: 18.037914 +epoch: 1, batch: 12777, sum loss: 5225.662109, avg loss: 2.890300, ppl: 17.998705 +epoch: 1, batch: 12778, sum loss: 4416.286133, avg loss: 2.670064, ppl: 14.440896 +epoch: 1, batch: 12779, sum loss: 4377.707031, avg loss: 2.765450, ppl: 15.886184 +epoch: 1, batch: 12780, sum loss: 4413.598145, avg loss: 2.605430, ppl: 13.537041 +epoch: 1, batch: 12781, sum loss: 4116.777344, avg loss: 2.710189, ppl: 15.032118 +epoch: 1, batch: 12782, sum loss: 4968.709961, avg loss: 2.892148, ppl: 18.031998 +epoch: 1, batch: 12783, sum loss: 3852.649902, avg loss: 2.394438, ppl: 10.962030 +epoch: 1, batch: 12784, sum loss: 4973.452148, avg loss: 2.859950, ppl: 17.460648 +epoch: 1, batch: 12785, sum loss: 4281.752441, avg loss: 2.720300, ppl: 15.184876 +epoch: 1, batch: 12786, sum loss: 4289.381348, avg loss: 2.733831, ppl: 15.391747 +epoch: 1, batch: 12787, sum loss: 4196.906250, avg loss: 2.746666, ppl: 15.590573 +epoch: 1, batch: 12788, sum loss: 5276.173340, avg loss: 2.762394, ppl: 15.837720 +epoch: 1, batch: 12789, sum loss: 4250.597168, avg loss: 2.598165, ppl: 13.439049 +epoch: 1, batch: 12790, sum loss: 4528.199219, avg loss: 2.744363, ppl: 15.554704 +epoch: 1, batch: 12791, sum loss: 6076.697266, avg loss: 3.061308, ppl: 21.355480 +epoch: 1, batch: 12792, sum loss: 4403.789062, avg loss: 2.598106, ppl: 13.438257 +epoch: 1, batch: 12793, sum loss: 5718.285156, avg loss: 3.041641, ppl: 20.939577 +epoch: 1, batch: 12794, sum loss: 4670.302246, avg loss: 2.750473, ppl: 15.650025 +epoch: 1, batch: 12795, sum loss: 3849.451416, avg loss: 2.557775, ppl: 12.907070 +epoch: 1, batch: 12796, sum loss: 4087.829834, avg loss: 2.418834, ppl: 11.232757 +epoch: 1, batch: 12797, sum loss: 3483.305664, avg loss: 2.538853, ppl: 12.665132 +epoch: 1, batch: 12798, sum loss: 5043.014160, avg loss: 2.641705, ppl: 14.037113 +epoch: 1, batch: 12799, sum loss: 5079.810059, avg loss: 2.683470, ppl: 14.635798 +epoch: 1, batch: 12800, sum loss: 4733.565430, avg loss: 2.820957, ppl: 16.792910 +epoch: 1, batch: 12801, sum loss: 4674.917480, avg loss: 2.779380, ppl: 16.109032 +epoch: 1, batch: 12802, sum loss: 4312.084473, avg loss: 2.611802, ppl: 13.623576 +epoch: 1, batch: 12803, sum loss: 4232.292480, avg loss: 2.618993, ppl: 13.721896 +epoch: 1, batch: 12804, sum loss: 3835.707520, avg loss: 2.543573, ppl: 12.725052 +epoch: 1, batch: 12805, sum loss: 4808.741211, avg loss: 2.889869, ppl: 17.990948 +epoch: 1, batch: 12806, sum loss: 5066.701172, avg loss: 2.833725, ppl: 17.008709 +epoch: 1, batch: 12807, sum loss: 4909.600098, avg loss: 2.810303, ppl: 16.614960 +epoch: 1, batch: 12808, sum loss: 4721.177734, avg loss: 2.928770, ppl: 18.704615 +epoch: 1, batch: 12809, sum loss: 4254.930176, avg loss: 2.676057, ppl: 14.527692 +epoch: 1, batch: 12810, sum loss: 4220.336914, avg loss: 2.540841, ppl: 12.690340 +epoch: 1, batch: 12811, sum loss: 3886.790039, avg loss: 2.439919, ppl: 11.472106 +epoch: 1, batch: 12812, sum loss: 4292.829102, avg loss: 2.467143, ppl: 11.788719 +epoch: 1, batch: 12813, sum loss: 4396.748535, avg loss: 2.565197, ppl: 13.003226 +epoch: 1, batch: 12814, sum loss: 4593.608887, avg loss: 2.574893, ppl: 13.129912 +epoch: 1, batch: 12815, sum loss: 4433.575684, avg loss: 2.562760, ppl: 12.971574 +epoch: 1, batch: 12816, sum loss: 4758.961914, avg loss: 2.675077, ppl: 14.513467 +epoch: 1, batch: 12817, sum loss: 4931.723633, avg loss: 2.724709, ppl: 15.251979 +epoch: 1, batch: 12818, sum loss: 4158.779785, avg loss: 2.500770, ppl: 12.191874 +epoch: 1, batch: 12819, sum loss: 4592.588867, avg loss: 2.670110, ppl: 14.441554 +epoch: 1, batch: 12820, sum loss: 4532.675293, avg loss: 2.620044, ppl: 13.736321 +epoch: 1, batch: 12821, sum loss: 4660.166992, avg loss: 2.773909, ppl: 16.021139 +epoch: 1, batch: 12822, sum loss: 5556.095215, avg loss: 2.862491, ppl: 17.505075 +epoch: 1, batch: 12823, sum loss: 4558.675293, avg loss: 2.669014, ppl: 14.425732 +epoch: 1, batch: 12824, sum loss: 4974.800781, avg loss: 2.777666, ppl: 16.081448 +epoch: 1, batch: 12825, sum loss: 4313.665039, avg loss: 2.737097, ppl: 15.442092 +epoch: 1, batch: 12826, sum loss: 4418.323730, avg loss: 2.567300, ppl: 13.030596 +epoch: 1, batch: 12827, sum loss: 3850.476074, avg loss: 2.441646, ppl: 11.491945 +epoch: 1, batch: 12828, sum loss: 4247.453613, avg loss: 2.546435, ppl: 12.761529 +epoch: 1, batch: 12829, sum loss: 4686.952148, avg loss: 2.645007, ppl: 14.083542 +epoch: 1, batch: 12830, sum loss: 4894.248047, avg loss: 2.913243, ppl: 18.416428 +epoch: 1, batch: 12831, sum loss: 3526.604736, avg loss: 2.596911, ppl: 13.422209 +epoch: 1, batch: 12832, sum loss: 4151.062988, avg loss: 2.760015, ppl: 15.800084 +epoch: 1, batch: 12833, sum loss: 4860.715332, avg loss: 2.799951, ppl: 16.443846 +epoch: 1, batch: 12834, sum loss: 4511.934570, avg loss: 2.711499, ppl: 15.051825 +epoch: 1, batch: 12835, sum loss: 4605.527344, avg loss: 2.693291, ppl: 14.780233 +epoch: 1, batch: 12836, sum loss: 4472.291992, avg loss: 2.783007, ppl: 16.167562 +epoch: 1, batch: 12837, sum loss: 5463.479492, avg loss: 2.950043, ppl: 19.106770 +epoch: 1, batch: 12838, sum loss: 4043.137695, avg loss: 2.488085, ppl: 12.038198 +epoch: 1, batch: 12839, sum loss: 4583.526367, avg loss: 2.761161, ppl: 15.818192 +epoch: 1, batch: 12840, sum loss: 3875.387207, avg loss: 2.387792, ppl: 10.889427 +epoch: 1, batch: 12841, sum loss: 4806.962891, avg loss: 2.835966, ppl: 17.046865 +epoch: 1, batch: 12842, sum loss: 4103.078125, avg loss: 2.601825, ppl: 13.488332 +epoch: 1, batch: 12843, sum loss: 4155.405762, avg loss: 2.636679, ppl: 13.966739 +epoch: 1, batch: 12844, sum loss: 3449.964111, avg loss: 2.509065, ppl: 12.293427 +epoch: 1, batch: 12845, sum loss: 4494.218262, avg loss: 2.565193, ppl: 13.003170 +epoch: 1, batch: 12846, sum loss: 3716.623047, avg loss: 2.523166, ppl: 12.468004 +epoch: 1, batch: 12847, sum loss: 4205.717773, avg loss: 2.697702, ppl: 14.845583 +epoch: 1, batch: 12848, sum loss: 4902.415527, avg loss: 2.723564, ppl: 15.234524 +epoch: 1, batch: 12849, sum loss: 5675.094238, avg loss: 2.925306, ppl: 18.639935 +epoch: 1, batch: 12850, sum loss: 4116.587891, avg loss: 2.817651, ppl: 16.737488 +epoch: 1, batch: 12851, sum loss: 4003.566895, avg loss: 2.494434, ppl: 12.114876 +epoch: 1, batch: 12852, sum loss: 4120.635742, avg loss: 2.649927, ppl: 14.153001 +epoch: 1, batch: 12853, sum loss: 5326.768555, avg loss: 2.910803, ppl: 18.371538 +epoch: 1, batch: 12854, sum loss: 4752.612793, avg loss: 2.792369, ppl: 16.319641 +epoch: 1, batch: 12855, sum loss: 4048.419922, avg loss: 2.437339, ppl: 11.442550 +epoch: 1, batch: 12856, sum loss: 4887.712402, avg loss: 2.826901, ppl: 16.893036 +epoch: 1, batch: 12857, sum loss: 4706.071289, avg loss: 2.926661, ppl: 18.665209 +epoch: 1, batch: 12858, sum loss: 4339.478027, avg loss: 2.677037, ppl: 14.541948 +epoch: 1, batch: 12859, sum loss: 4107.600586, avg loss: 2.711287, ppl: 15.048638 +epoch: 1, batch: 12860, sum loss: 3357.018799, avg loss: 2.197002, ppl: 8.997996 +epoch: 1, batch: 12861, sum loss: 4020.768066, avg loss: 2.757728, ppl: 15.763991 +epoch: 1, batch: 12862, sum loss: 4584.543945, avg loss: 2.659248, ppl: 14.285547 +epoch: 1, batch: 12863, sum loss: 3368.056641, avg loss: 2.437089, ppl: 11.439688 +epoch: 1, batch: 12864, sum loss: 4393.731445, avg loss: 2.684014, ppl: 14.643761 +epoch: 1, batch: 12865, sum loss: 3823.824463, avg loss: 2.449599, ppl: 11.583704 +epoch: 1, batch: 12866, sum loss: 4804.096680, avg loss: 2.751487, ppl: 15.665914 +epoch: 1, batch: 12867, sum loss: 4880.873047, avg loss: 2.849313, ppl: 17.275906 +epoch: 1, batch: 12868, sum loss: 4393.866699, avg loss: 2.572521, ppl: 13.098811 +epoch: 1, batch: 12869, sum loss: 4020.627197, avg loss: 2.650380, ppl: 14.159424 +epoch: 1, batch: 12870, sum loss: 5140.885254, avg loss: 3.080219, ppl: 21.763168 +epoch: 1, batch: 12871, sum loss: 4415.347656, avg loss: 2.571548, ppl: 13.086066 +epoch: 1, batch: 12872, sum loss: 4309.499512, avg loss: 2.468213, ppl: 11.801337 +epoch: 1, batch: 12873, sum loss: 3339.103271, avg loss: 2.590460, ppl: 13.335909 +epoch: 1, batch: 12874, sum loss: 4818.663574, avg loss: 2.733218, ppl: 15.382311 +epoch: 1, batch: 12875, sum loss: 4853.903320, avg loss: 2.828615, ppl: 16.922007 +epoch: 1, batch: 12876, sum loss: 3921.720703, avg loss: 2.497911, ppl: 12.157074 +epoch: 1, batch: 12877, sum loss: 5028.331543, avg loss: 2.871691, ppl: 17.666876 +epoch: 1, batch: 12878, sum loss: 4543.939941, avg loss: 2.520211, ppl: 12.431216 +epoch: 1, batch: 12879, sum loss: 5082.027344, avg loss: 2.975426, ppl: 19.597965 +epoch: 1, batch: 12880, sum loss: 4490.102539, avg loss: 2.639684, ppl: 14.008776 +epoch: 1, batch: 12881, sum loss: 3758.958984, avg loss: 2.431409, ppl: 11.374902 +epoch: 1, batch: 12882, sum loss: 3825.837891, avg loss: 2.778386, ppl: 16.093031 +epoch: 1, batch: 12883, sum loss: 4540.630859, avg loss: 2.678838, ppl: 14.568159 +epoch: 1, batch: 12884, sum loss: 4307.916992, avg loss: 2.680720, ppl: 14.595600 +epoch: 1, batch: 12885, sum loss: 4053.733398, avg loss: 2.446429, ppl: 11.547042 +epoch: 1, batch: 12886, sum loss: 4265.312012, avg loss: 2.465498, ppl: 11.769344 +epoch: 1, batch: 12887, sum loss: 4818.328125, avg loss: 2.729931, ppl: 15.331827 +epoch: 1, batch: 12888, sum loss: 6295.585938, avg loss: 3.057594, ppl: 21.276300 +epoch: 1, batch: 12889, sum loss: 3999.600342, avg loss: 2.461293, ppl: 11.719950 +epoch: 1, batch: 12890, sum loss: 4661.194336, avg loss: 2.565324, ppl: 13.004875 +epoch: 1, batch: 12891, sum loss: 4821.088867, avg loss: 2.845979, ppl: 17.218410 +epoch: 1, batch: 12892, sum loss: 4609.952148, avg loss: 2.695879, ppl: 14.818531 +epoch: 1, batch: 12893, sum loss: 4042.654053, avg loss: 2.378032, ppl: 10.783659 +epoch: 1, batch: 12894, sum loss: 3863.700684, avg loss: 2.484695, ppl: 11.997463 +epoch: 1, batch: 12895, sum loss: 5289.659180, avg loss: 2.932184, ppl: 18.768566 +epoch: 1, batch: 12896, sum loss: 5159.860352, avg loss: 2.665217, ppl: 14.371067 +epoch: 1, batch: 12897, sum loss: 3906.316895, avg loss: 2.518579, ppl: 12.410954 +epoch: 1, batch: 12898, sum loss: 5425.692383, avg loss: 3.053288, ppl: 21.184881 +epoch: 1, batch: 12899, sum loss: 5266.811523, avg loss: 2.831619, ppl: 16.972921 +epoch: 1, batch: 12900, sum loss: 4219.447266, avg loss: 2.640455, ppl: 14.019585 +epoch: 1, batch: 12901, sum loss: 5255.634277, avg loss: 2.833226, ppl: 17.000214 +epoch: 1, batch: 12902, sum loss: 4012.627930, avg loss: 2.600537, ppl: 13.470964 +epoch: 1, batch: 12903, sum loss: 4568.940430, avg loss: 2.612316, ppl: 13.630581 +epoch: 1, batch: 12904, sum loss: 3697.756592, avg loss: 2.525790, ppl: 12.500767 +epoch: 1, batch: 12905, sum loss: 4515.713867, avg loss: 2.858047, ppl: 17.427454 +epoch: 1, batch: 12906, sum loss: 5291.092285, avg loss: 3.032145, ppl: 20.741667 +epoch: 1, batch: 12907, sum loss: 3422.417480, avg loss: 2.453346, ppl: 11.627187 +epoch: 1, batch: 12908, sum loss: 3797.724609, avg loss: 2.628183, ppl: 13.848586 +epoch: 1, batch: 12909, sum loss: 5134.970703, avg loss: 2.855935, ppl: 17.390684 +epoch: 1, batch: 12910, sum loss: 4869.486328, avg loss: 2.976459, ppl: 19.618221 +epoch: 1, batch: 12911, sum loss: 3824.118164, avg loss: 2.428011, ppl: 11.336316 +epoch: 1, batch: 12912, sum loss: 5121.126953, avg loss: 2.883518, ppl: 17.877045 +epoch: 1, batch: 12913, sum loss: 4480.739746, avg loss: 2.793479, ppl: 16.337755 +epoch: 1, batch: 12914, sum loss: 4281.381836, avg loss: 2.637943, ppl: 13.984412 +epoch: 1, batch: 12915, sum loss: 4864.187500, avg loss: 2.706838, ppl: 14.981827 +epoch: 1, batch: 12916, sum loss: 4519.991699, avg loss: 2.532208, ppl: 12.581261 +epoch: 1, batch: 12917, sum loss: 4757.012695, avg loss: 2.836621, ppl: 17.058022 +epoch: 1, batch: 12918, sum loss: 4989.857422, avg loss: 2.719269, ppl: 15.169223 +epoch: 1, batch: 12919, sum loss: 3882.939209, avg loss: 2.527955, ppl: 12.527864 +epoch: 1, batch: 12920, sum loss: 5344.795410, avg loss: 2.835435, ppl: 17.037813 +epoch: 1, batch: 12921, sum loss: 4783.025879, avg loss: 2.805294, ppl: 16.531933 +epoch: 1, batch: 12922, sum loss: 3963.168457, avg loss: 2.521100, ppl: 12.442270 +epoch: 1, batch: 12923, sum loss: 5874.239746, avg loss: 2.932721, ppl: 18.778656 +epoch: 1, batch: 12924, sum loss: 4437.333008, avg loss: 2.702395, ppl: 14.915418 +epoch: 1, batch: 12925, sum loss: 5140.818359, avg loss: 2.994070, ppl: 19.966784 +epoch: 1, batch: 12926, sum loss: 4322.263672, avg loss: 2.770682, ppl: 15.969520 +epoch: 1, batch: 12927, sum loss: 4313.738281, avg loss: 2.483442, ppl: 11.982435 +epoch: 1, batch: 12928, sum loss: 4793.263672, avg loss: 2.705002, ppl: 14.954348 +epoch: 1, batch: 12929, sum loss: 3879.440430, avg loss: 2.637281, ppl: 13.975156 +epoch: 1, batch: 12930, sum loss: 4545.825195, avg loss: 2.823494, ppl: 16.835571 +epoch: 1, batch: 12931, sum loss: 4500.583496, avg loss: 2.535540, ppl: 12.623244 +epoch: 1, batch: 12932, sum loss: 3716.812012, avg loss: 2.626722, ppl: 13.828367 +epoch: 1, batch: 12933, sum loss: 5387.991699, avg loss: 3.005015, ppl: 20.186518 +epoch: 1, batch: 12934, sum loss: 4875.417480, avg loss: 2.699567, ppl: 14.873288 +epoch: 1, batch: 12935, sum loss: 3546.191162, avg loss: 2.394457, ppl: 10.962248 +epoch: 1, batch: 12936, sum loss: 4129.655273, avg loss: 2.557062, ppl: 12.897869 +epoch: 1, batch: 12937, sum loss: 4432.545898, avg loss: 2.870820, ppl: 17.651487 +epoch: 1, batch: 12938, sum loss: 5164.082031, avg loss: 2.801998, ppl: 16.477535 +epoch: 1, batch: 12939, sum loss: 5608.878906, avg loss: 2.804440, ppl: 16.517817 +epoch: 1, batch: 12940, sum loss: 5321.095215, avg loss: 2.884062, ppl: 17.886787 +epoch: 1, batch: 12941, sum loss: 3993.987061, avg loss: 2.632819, ppl: 13.912941 +epoch: 1, batch: 12942, sum loss: 4285.305664, avg loss: 2.795372, ppl: 16.368721 +epoch: 1, batch: 12943, sum loss: 4417.013672, avg loss: 2.619818, ppl: 13.733230 +epoch: 1, batch: 12944, sum loss: 4319.522461, avg loss: 2.637071, ppl: 13.972218 +epoch: 1, batch: 12945, sum loss: 4990.796875, avg loss: 2.749750, ppl: 15.638727 +epoch: 1, batch: 12946, sum loss: 4517.585938, avg loss: 2.736273, ppl: 15.429366 +epoch: 1, batch: 12947, sum loss: 4039.712891, avg loss: 2.564897, ppl: 12.999320 +epoch: 1, batch: 12948, sum loss: 4306.354492, avg loss: 2.656604, ppl: 14.247815 +epoch: 1, batch: 12949, sum loss: 4470.115723, avg loss: 2.804338, ppl: 16.516146 +epoch: 1, batch: 12950, sum loss: 4346.823730, avg loss: 2.763397, ppl: 15.853606 +epoch: 1, batch: 12951, sum loss: 5087.843262, avg loss: 2.798594, ppl: 16.421537 +epoch: 1, batch: 12952, sum loss: 3681.135254, avg loss: 2.592349, ppl: 13.361117 +epoch: 1, batch: 12953, sum loss: 5243.963867, avg loss: 3.052366, ppl: 21.165352 +epoch: 1, batch: 12954, sum loss: 4696.401367, avg loss: 2.736831, ppl: 15.437980 +epoch: 1, batch: 12955, sum loss: 5336.491699, avg loss: 2.884590, ppl: 17.896231 +epoch: 1, batch: 12956, sum loss: 3691.474121, avg loss: 2.457706, ppl: 11.677988 +epoch: 1, batch: 12957, sum loss: 5019.869629, avg loss: 2.809104, ppl: 16.595049 +epoch: 1, batch: 12958, sum loss: 3758.586426, avg loss: 2.338884, ppl: 10.369657 +epoch: 1, batch: 12959, sum loss: 3933.772705, avg loss: 2.546131, ppl: 12.757653 +epoch: 1, batch: 12960, sum loss: 4547.094727, avg loss: 2.595374, ppl: 13.401597 +epoch: 1, batch: 12961, sum loss: 3976.489746, avg loss: 2.459177, ppl: 11.695189 +epoch: 1, batch: 12962, sum loss: 4046.983398, avg loss: 2.529365, ppl: 12.545532 +epoch: 1, batch: 12963, sum loss: 4781.899414, avg loss: 2.896366, ppl: 18.108213 +epoch: 1, batch: 12964, sum loss: 4324.576660, avg loss: 2.527514, ppl: 12.522340 +epoch: 1, batch: 12965, sum loss: 3996.583984, avg loss: 2.676881, ppl: 14.539678 +epoch: 1, batch: 12966, sum loss: 5949.297363, avg loss: 3.171267, ppl: 23.837673 +epoch: 1, batch: 12967, sum loss: 4032.139160, avg loss: 2.733654, ppl: 15.389012 +epoch: 1, batch: 12968, sum loss: 4509.543457, avg loss: 2.721511, ppl: 15.203276 +epoch: 1, batch: 12969, sum loss: 4750.843750, avg loss: 2.925396, ppl: 18.641615 +epoch: 1, batch: 12970, sum loss: 3922.195557, avg loss: 2.381418, ppl: 10.820235 +epoch: 1, batch: 12971, sum loss: 4769.866211, avg loss: 2.928095, ppl: 18.691982 +epoch: 1, batch: 12972, sum loss: 5588.724121, avg loss: 2.939887, ppl: 18.913700 +epoch: 1, batch: 12973, sum loss: 5813.244629, avg loss: 2.999610, ppl: 20.077713 +epoch: 1, batch: 12974, sum loss: 4074.582764, avg loss: 2.654451, ppl: 14.217184 +epoch: 1, batch: 12975, sum loss: 4088.729492, avg loss: 2.594372, ppl: 13.388171 +epoch: 1, batch: 12976, sum loss: 3955.719238, avg loss: 2.773997, ppl: 16.022541 +epoch: 1, batch: 12977, sum loss: 4166.927734, avg loss: 2.705797, ppl: 14.966243 +epoch: 1, batch: 12978, sum loss: 4927.468750, avg loss: 2.849895, ppl: 17.285971 +epoch: 1, batch: 12979, sum loss: 5395.680176, avg loss: 3.072711, ppl: 21.600382 +epoch: 1, batch: 12980, sum loss: 4706.388672, avg loss: 2.926859, ppl: 18.668894 +epoch: 1, batch: 12981, sum loss: 4849.260254, avg loss: 2.829207, ppl: 16.932024 +epoch: 1, batch: 12982, sum loss: 3412.266602, avg loss: 2.361430, ppl: 10.606109 +epoch: 1, batch: 12983, sum loss: 5095.192383, avg loss: 2.938404, ppl: 18.885677 +epoch: 1, batch: 12984, sum loss: 4969.885742, avg loss: 2.814205, ppl: 16.679905 +epoch: 1, batch: 12985, sum loss: 4116.640625, avg loss: 2.671409, ppl: 14.460324 +epoch: 1, batch: 12986, sum loss: 3941.826172, avg loss: 2.600149, ppl: 13.465750 +epoch: 1, batch: 12987, sum loss: 5267.374512, avg loss: 2.918213, ppl: 18.508183 +epoch: 1, batch: 12988, sum loss: 4807.826660, avg loss: 2.851617, ppl: 17.315765 +epoch: 1, batch: 12989, sum loss: 4294.726074, avg loss: 2.751266, ppl: 15.662448 +epoch: 1, batch: 12990, sum loss: 3558.647949, avg loss: 2.350494, ppl: 10.490752 +epoch: 1, batch: 12991, sum loss: 4113.911621, avg loss: 2.598807, ppl: 13.447683 +epoch: 1, batch: 12992, sum loss: 3952.151123, avg loss: 2.652451, ppl: 14.188766 +epoch: 1, batch: 12993, sum loss: 3244.799316, avg loss: 2.293144, ppl: 9.906035 +epoch: 1, batch: 12994, sum loss: 3734.001709, avg loss: 2.349907, ppl: 10.484591 +epoch: 1, batch: 12995, sum loss: 4690.569336, avg loss: 2.659053, ppl: 14.282755 +epoch: 1, batch: 12996, sum loss: 4253.799805, avg loss: 2.650342, ppl: 14.158887 +epoch: 1, batch: 12997, sum loss: 5279.951172, avg loss: 3.037947, ppl: 20.862362 +epoch: 1, batch: 12998, sum loss: 3111.797607, avg loss: 2.552746, ppl: 12.842324 +epoch: 1, batch: 12999, sum loss: 3791.139160, avg loss: 2.356208, ppl: 10.550870 +epoch: 1, batch: 13000, sum loss: 4708.337891, avg loss: 2.836348, ppl: 17.053377 +epoch: 1, batch: 13001, sum loss: 3856.726562, avg loss: 2.411962, ppl: 11.155823 +epoch: 1, batch: 13002, sum loss: 4731.431641, avg loss: 2.671616, ppl: 14.463321 +epoch: 1, batch: 13003, sum loss: 4751.355957, avg loss: 2.578055, ppl: 13.171499 +epoch: 1, batch: 13004, sum loss: 5043.761230, avg loss: 2.781997, ppl: 16.151251 +epoch: 1, batch: 13005, sum loss: 4285.964355, avg loss: 2.714354, ppl: 15.094849 +epoch: 1, batch: 13006, sum loss: 5031.994629, avg loss: 2.655406, ppl: 14.230766 +epoch: 1, batch: 13007, sum loss: 4623.276855, avg loss: 2.710010, ppl: 15.029423 +epoch: 1, batch: 13008, sum loss: 4006.222900, avg loss: 2.686937, ppl: 14.686624 +epoch: 1, batch: 13009, sum loss: 4119.843262, avg loss: 2.701537, ppl: 14.902615 +epoch: 1, batch: 13010, sum loss: 4513.304688, avg loss: 2.746990, ppl: 15.595618 +epoch: 1, batch: 13011, sum loss: 4870.684570, avg loss: 2.885477, ppl: 17.912107 +epoch: 1, batch: 13012, sum loss: 3953.617676, avg loss: 2.549076, ppl: 12.795280 +epoch: 1, batch: 13013, sum loss: 3789.366211, avg loss: 2.649907, ppl: 14.152718 +epoch: 1, batch: 13014, sum loss: 4609.205078, avg loss: 2.703346, ppl: 14.929606 +epoch: 1, batch: 13015, sum loss: 4034.637207, avg loss: 2.650879, ppl: 14.166481 +epoch: 1, batch: 13016, sum loss: 4627.316406, avg loss: 2.569304, ppl: 13.056730 +epoch: 1, batch: 13017, sum loss: 4084.067139, avg loss: 2.810783, ppl: 16.622923 +epoch: 1, batch: 13018, sum loss: 3474.044922, avg loss: 2.645884, ppl: 14.095894 +epoch: 1, batch: 13019, sum loss: 5411.666504, avg loss: 2.931564, ppl: 18.756937 +epoch: 1, batch: 13020, sum loss: 4013.290771, avg loss: 2.602653, ppl: 13.499502 +epoch: 1, batch: 13021, sum loss: 4057.923828, avg loss: 2.606245, ppl: 13.548086 +epoch: 1, batch: 13022, sum loss: 4952.531738, avg loss: 2.616234, ppl: 13.684096 +epoch: 1, batch: 13023, sum loss: 4181.280273, avg loss: 2.359639, ppl: 10.587128 +epoch: 1, batch: 13024, sum loss: 4438.060547, avg loss: 2.910204, ppl: 18.360538 +epoch: 1, batch: 13025, sum loss: 4479.375488, avg loss: 2.698419, ppl: 14.856227 +epoch: 1, batch: 13026, sum loss: 4704.806152, avg loss: 2.825710, ppl: 16.872913 +epoch: 1, batch: 13027, sum loss: 4404.577148, avg loss: 2.782424, ppl: 16.158136 +epoch: 1, batch: 13028, sum loss: 5183.973633, avg loss: 2.817377, ppl: 16.732901 +epoch: 1, batch: 13029, sum loss: 4889.650391, avg loss: 2.638775, ppl: 13.996050 +epoch: 1, batch: 13030, sum loss: 4849.554199, avg loss: 2.921418, ppl: 18.567600 +epoch: 1, batch: 13031, sum loss: 4752.594727, avg loss: 2.875133, ppl: 17.727777 +epoch: 1, batch: 13032, sum loss: 4038.681885, avg loss: 2.565871, ppl: 13.011993 +epoch: 1, batch: 13033, sum loss: 4798.077148, avg loss: 2.783108, ppl: 16.169189 +epoch: 1, batch: 13034, sum loss: 4073.188965, avg loss: 2.458171, ppl: 11.683419 +epoch: 1, batch: 13035, sum loss: 4433.619629, avg loss: 2.760660, ppl: 15.810269 +epoch: 1, batch: 13036, sum loss: 4505.955078, avg loss: 2.793525, ppl: 16.338503 +epoch: 1, batch: 13037, sum loss: 3510.331543, avg loss: 2.530881, ppl: 12.564567 +epoch: 1, batch: 13038, sum loss: 3298.034424, avg loss: 2.611270, ppl: 13.616339 +epoch: 1, batch: 13039, sum loss: 4202.661133, avg loss: 2.636550, ppl: 13.964944 +epoch: 1, batch: 13040, sum loss: 5121.839844, avg loss: 2.791193, ppl: 16.300463 +epoch: 1, batch: 13041, sum loss: 4630.473145, avg loss: 2.670400, ppl: 14.445742 +epoch: 1, batch: 13042, sum loss: 4965.313477, avg loss: 2.755446, ppl: 15.728053 +epoch: 1, batch: 13043, sum loss: 4316.845703, avg loss: 2.432026, ppl: 11.381915 +epoch: 1, batch: 13044, sum loss: 4392.787109, avg loss: 2.808687, ppl: 16.588131 +epoch: 1, batch: 13045, sum loss: 3944.616943, avg loss: 2.593437, ppl: 13.375661 +epoch: 1, batch: 13046, sum loss: 4600.338867, avg loss: 2.645393, ppl: 14.088986 +epoch: 1, batch: 13047, sum loss: 5188.893555, avg loss: 2.820051, ppl: 16.777702 +epoch: 1, batch: 13048, sum loss: 5525.607910, avg loss: 2.917428, ppl: 18.493656 +epoch: 1, batch: 13049, sum loss: 5257.146973, avg loss: 3.016149, ppl: 20.412523 +epoch: 1, batch: 13050, sum loss: 4214.812500, avg loss: 2.566877, ppl: 13.025088 +epoch: 1, batch: 13051, sum loss: 4519.881836, avg loss: 2.563745, ppl: 12.984347 +epoch: 1, batch: 13052, sum loss: 4848.723145, avg loss: 2.656835, ppl: 14.251107 +epoch: 1, batch: 13053, sum loss: 4844.620605, avg loss: 2.560582, ppl: 12.943344 +epoch: 1, batch: 13054, sum loss: 4630.022461, avg loss: 2.660933, ppl: 14.309628 +epoch: 1, batch: 13055, sum loss: 5032.406738, avg loss: 2.846384, ppl: 17.225384 +epoch: 1, batch: 13056, sum loss: 4562.404297, avg loss: 2.679039, ppl: 14.571091 +epoch: 1, batch: 13057, sum loss: 4552.100098, avg loss: 2.698340, ppl: 14.855058 +epoch: 1, batch: 13058, sum loss: 5022.964355, avg loss: 2.920328, ppl: 18.547373 +epoch: 1, batch: 13059, sum loss: 3919.717773, avg loss: 2.541970, ppl: 12.704675 +epoch: 1, batch: 13060, sum loss: 4662.048828, avg loss: 2.710494, ppl: 15.036695 +epoch: 1, batch: 13061, sum loss: 4301.881836, avg loss: 2.764706, ppl: 15.874367 +epoch: 1, batch: 13062, sum loss: 4270.118164, avg loss: 2.658853, ppl: 14.279902 +epoch: 1, batch: 13063, sum loss: 5529.067871, avg loss: 2.922340, ppl: 18.584728 +epoch: 1, batch: 13064, sum loss: 3799.822266, avg loss: 2.635106, ppl: 13.944785 +epoch: 1, batch: 13065, sum loss: 4048.398438, avg loss: 2.375821, ppl: 10.759839 +epoch: 1, batch: 13066, sum loss: 4660.949707, avg loss: 2.661879, ppl: 14.323174 +epoch: 1, batch: 13067, sum loss: 4356.356934, avg loss: 2.774750, ppl: 16.034615 +epoch: 1, batch: 13068, sum loss: 5466.455566, avg loss: 2.972515, ppl: 19.541010 +epoch: 1, batch: 13069, sum loss: 4465.976562, avg loss: 2.674237, ppl: 14.501288 +epoch: 1, batch: 13070, sum loss: 4298.980957, avg loss: 2.594436, ppl: 13.389036 +epoch: 1, batch: 13071, sum loss: 5038.050781, avg loss: 2.902103, ppl: 18.212404 +epoch: 1, batch: 13072, sum loss: 4496.879395, avg loss: 2.686308, ppl: 14.677389 +epoch: 1, batch: 13073, sum loss: 4726.423340, avg loss: 2.585571, ppl: 13.270862 +epoch: 1, batch: 13074, sum loss: 4319.856445, avg loss: 2.610185, ppl: 13.601569 +epoch: 1, batch: 13075, sum loss: 4143.442383, avg loss: 2.893466, ppl: 18.055775 +epoch: 1, batch: 13076, sum loss: 4466.935547, avg loss: 2.710519, ppl: 15.037079 +epoch: 1, batch: 13077, sum loss: 4654.746094, avg loss: 2.652277, ppl: 14.186304 +epoch: 1, batch: 13078, sum loss: 4332.730957, avg loss: 2.749195, ppl: 15.630038 +epoch: 1, batch: 13079, sum loss: 4381.685059, avg loss: 2.615932, ppl: 13.679954 +epoch: 1, batch: 13080, sum loss: 4659.913574, avg loss: 2.736297, ppl: 15.429741 +epoch: 1, batch: 13081, sum loss: 6109.905762, avg loss: 3.117299, ppl: 22.585291 +epoch: 1, batch: 13082, sum loss: 4994.325684, avg loss: 2.912143, ppl: 18.396183 +epoch: 1, batch: 13083, sum loss: 4459.883789, avg loss: 2.742856, ppl: 15.531276 +epoch: 1, batch: 13084, sum loss: 4404.319824, avg loss: 2.606106, ppl: 13.546203 +epoch: 1, batch: 13085, sum loss: 4915.514648, avg loss: 2.729325, ppl: 15.322541 +epoch: 1, batch: 13086, sum loss: 3959.731689, avg loss: 2.769043, ppl: 15.943372 +epoch: 1, batch: 13087, sum loss: 4594.222168, avg loss: 2.672613, ppl: 14.477756 +epoch: 1, batch: 13088, sum loss: 5005.156250, avg loss: 2.837390, ppl: 17.071156 +epoch: 1, batch: 13089, sum loss: 4178.926270, avg loss: 2.620017, ppl: 13.735951 +epoch: 1, batch: 13090, sum loss: 4875.276367, avg loss: 2.908876, ppl: 18.336180 +epoch: 1, batch: 13091, sum loss: 4522.632324, avg loss: 2.556604, ppl: 12.891960 +epoch: 1, batch: 13092, sum loss: 4362.386719, avg loss: 2.668126, ppl: 14.412939 +epoch: 1, batch: 13093, sum loss: 4055.833984, avg loss: 2.633658, ppl: 13.924619 +epoch: 1, batch: 13094, sum loss: 5332.048340, avg loss: 2.929697, ppl: 18.721958 +epoch: 1, batch: 13095, sum loss: 4126.632812, avg loss: 2.765840, ppl: 15.892381 +epoch: 1, batch: 13096, sum loss: 5096.353516, avg loss: 2.839194, ppl: 17.101980 +epoch: 1, batch: 13097, sum loss: 5470.527344, avg loss: 2.945895, ppl: 19.027683 +epoch: 1, batch: 13098, sum loss: 3705.464600, avg loss: 2.677359, ppl: 14.546623 +epoch: 1, batch: 13099, sum loss: 4977.241211, avg loss: 2.880348, ppl: 17.820473 +epoch: 1, batch: 13100, sum loss: 4175.704102, avg loss: 2.550827, ppl: 12.817703 +epoch: 1, batch: 13101, sum loss: 4622.958008, avg loss: 2.610366, ppl: 13.604027 +epoch: 1, batch: 13102, sum loss: 4813.059570, avg loss: 2.568335, ppl: 13.044086 +epoch: 1, batch: 13103, sum loss: 4218.176758, avg loss: 2.518315, ppl: 12.407667 +epoch: 1, batch: 13104, sum loss: 5030.665039, avg loss: 2.722221, ppl: 15.214081 +epoch: 1, batch: 13105, sum loss: 4602.634277, avg loss: 2.736406, ppl: 15.431418 +epoch: 1, batch: 13106, sum loss: 4375.897949, avg loss: 2.625014, ppl: 13.804762 +epoch: 1, batch: 13107, sum loss: 4621.999023, avg loss: 2.699766, ppl: 14.876245 +epoch: 1, batch: 13108, sum loss: 4737.582031, avg loss: 2.791740, ppl: 16.309368 +epoch: 1, batch: 13109, sum loss: 4978.559082, avg loss: 2.691113, ppl: 14.748081 +epoch: 1, batch: 13110, sum loss: 4304.062500, avg loss: 2.663405, ppl: 14.345050 +epoch: 1, batch: 13111, sum loss: 4089.357422, avg loss: 2.521182, ppl: 12.443296 +epoch: 1, batch: 13112, sum loss: 4755.619141, avg loss: 2.683758, ppl: 14.640007 +epoch: 1, batch: 13113, sum loss: 5085.058105, avg loss: 2.965048, ppl: 19.395639 +epoch: 1, batch: 13114, sum loss: 5059.320312, avg loss: 2.801396, ppl: 16.467613 +epoch: 1, batch: 13115, sum loss: 3947.239990, avg loss: 2.549897, ppl: 12.805781 +epoch: 1, batch: 13116, sum loss: 4124.726074, avg loss: 2.563534, ppl: 12.981610 +epoch: 1, batch: 13117, sum loss: 4012.210938, avg loss: 2.643090, ppl: 14.056575 +epoch: 1, batch: 13118, sum loss: 5108.400879, avg loss: 2.740558, ppl: 15.495635 +epoch: 1, batch: 13119, sum loss: 3931.910156, avg loss: 2.609098, ppl: 13.586786 +epoch: 1, batch: 13120, sum loss: 4270.351562, avg loss: 2.481320, ppl: 11.957036 +epoch: 1, batch: 13121, sum loss: 3869.306885, avg loss: 2.560759, ppl: 12.945641 +epoch: 1, batch: 13122, sum loss: 4420.375000, avg loss: 2.643765, ppl: 14.066066 +epoch: 1, batch: 13123, sum loss: 4441.420898, avg loss: 2.623403, ppl: 13.782543 +epoch: 1, batch: 13124, sum loss: 4856.577148, avg loss: 2.672855, ppl: 14.481253 +epoch: 1, batch: 13125, sum loss: 4359.549316, avg loss: 2.672930, ppl: 14.482344 +epoch: 1, batch: 13126, sum loss: 4241.877441, avg loss: 2.641268, ppl: 14.030991 +epoch: 1, batch: 13127, sum loss: 5041.084961, avg loss: 2.929160, ppl: 18.711912 +epoch: 1, batch: 13128, sum loss: 5441.908203, avg loss: 2.985139, ppl: 19.789255 +epoch: 1, batch: 13129, sum loss: 4238.522461, avg loss: 2.734531, ppl: 15.402514 +epoch: 1, batch: 13130, sum loss: 4945.459961, avg loss: 2.764371, ppl: 15.869058 +epoch: 1, batch: 13131, sum loss: 5320.888672, avg loss: 3.057982, ppl: 21.284561 +epoch: 1, batch: 13132, sum loss: 4000.493896, avg loss: 2.645829, ppl: 14.095131 +epoch: 1, batch: 13133, sum loss: 4785.144043, avg loss: 2.748503, ppl: 15.619236 +epoch: 1, batch: 13134, sum loss: 3884.164062, avg loss: 2.792354, ppl: 16.319387 +epoch: 1, batch: 13135, sum loss: 3657.396484, avg loss: 2.486333, ppl: 12.017133 +epoch: 1, batch: 13136, sum loss: 3072.742920, avg loss: 2.480018, ppl: 11.941483 +epoch: 1, batch: 13137, sum loss: 5083.701172, avg loss: 2.777979, ppl: 16.086475 +epoch: 1, batch: 13138, sum loss: 5122.578613, avg loss: 2.774961, ppl: 16.038006 +epoch: 1, batch: 13139, sum loss: 4732.928223, avg loss: 2.622121, ppl: 13.764886 +epoch: 1, batch: 13140, sum loss: 4176.420410, avg loss: 2.717255, ppl: 15.138704 +epoch: 1, batch: 13141, sum loss: 4516.523438, avg loss: 2.854945, ppl: 17.373486 +epoch: 1, batch: 13142, sum loss: 4079.157227, avg loss: 2.524231, ppl: 12.481293 +epoch: 1, batch: 13143, sum loss: 4913.590820, avg loss: 2.791813, ppl: 16.310562 +epoch: 1, batch: 13144, sum loss: 5180.664551, avg loss: 2.854361, ppl: 17.363335 +epoch: 1, batch: 13145, sum loss: 4784.085938, avg loss: 2.986321, ppl: 19.812653 +epoch: 1, batch: 13146, sum loss: 4292.740234, avg loss: 2.457207, ppl: 11.672162 +epoch: 1, batch: 13147, sum loss: 3987.184082, avg loss: 2.614547, ppl: 13.661027 +epoch: 1, batch: 13148, sum loss: 4280.462402, avg loss: 2.491538, ppl: 12.079842 +epoch: 1, batch: 13149, sum loss: 4271.705078, avg loss: 2.596781, ppl: 13.420468 +epoch: 1, batch: 13150, sum loss: 5124.300293, avg loss: 2.872366, ppl: 17.678791 +epoch: 1, batch: 13151, sum loss: 5260.494629, avg loss: 2.694926, ppl: 14.804420 +epoch: 1, batch: 13152, sum loss: 3888.019043, avg loss: 2.460772, ppl: 11.713846 +epoch: 1, batch: 13153, sum loss: 4417.440918, avg loss: 2.838972, ppl: 17.098185 +epoch: 1, batch: 13154, sum loss: 4727.836426, avg loss: 2.629497, ppl: 13.866797 +epoch: 1, batch: 13155, sum loss: 5577.635742, avg loss: 3.000342, ppl: 20.092405 +epoch: 1, batch: 13156, sum loss: 4370.169922, avg loss: 2.792441, ppl: 16.320808 +epoch: 1, batch: 13157, sum loss: 3271.904297, avg loss: 2.520727, ppl: 12.437640 +epoch: 1, batch: 13158, sum loss: 5305.456055, avg loss: 2.990674, ppl: 19.899090 +epoch: 1, batch: 13159, sum loss: 5488.083008, avg loss: 2.933235, ppl: 18.788321 +epoch: 1, batch: 13160, sum loss: 4495.409180, avg loss: 2.732771, ppl: 15.375440 +epoch: 1, batch: 13161, sum loss: 4791.411133, avg loss: 2.651583, ppl: 14.176465 +epoch: 1, batch: 13162, sum loss: 3779.356201, avg loss: 2.553619, ppl: 12.853539 +epoch: 1, batch: 13163, sum loss: 5534.077148, avg loss: 2.835081, ppl: 17.031773 +epoch: 1, batch: 13164, sum loss: 3679.553223, avg loss: 2.587590, ppl: 13.297688 +epoch: 1, batch: 13165, sum loss: 4914.453613, avg loss: 2.721181, ppl: 15.198267 +epoch: 1, batch: 13166, sum loss: 4928.492188, avg loss: 2.885534, ppl: 17.913132 +epoch: 1, batch: 13167, sum loss: 4834.471680, avg loss: 2.823874, ppl: 16.841963 +epoch: 1, batch: 13168, sum loss: 5238.854980, avg loss: 2.813563, ppl: 16.669210 +epoch: 1, batch: 13169, sum loss: 4769.245117, avg loss: 2.817038, ppl: 16.727232 +epoch: 1, batch: 13170, sum loss: 5599.921387, avg loss: 2.950433, ppl: 19.114229 +epoch: 1, batch: 13171, sum loss: 4580.924805, avg loss: 2.655609, ppl: 14.233646 +epoch: 1, batch: 13172, sum loss: 5308.289062, avg loss: 2.810105, ppl: 16.611668 +epoch: 1, batch: 13173, sum loss: 4468.957520, avg loss: 2.663264, ppl: 14.343033 +epoch: 1, batch: 13174, sum loss: 3884.332764, avg loss: 2.403671, ppl: 11.063720 +epoch: 1, batch: 13175, sum loss: 4482.054199, avg loss: 2.666302, ppl: 14.386672 +epoch: 1, batch: 13176, sum loss: 5001.601074, avg loss: 2.943850, ppl: 18.988813 +epoch: 1, batch: 13177, sum loss: 4186.667969, avg loss: 2.467100, ppl: 11.788207 +epoch: 1, batch: 13178, sum loss: 4916.743652, avg loss: 2.695583, ppl: 14.814155 +epoch: 1, batch: 13179, sum loss: 4272.190430, avg loss: 2.472332, ppl: 11.850055 +epoch: 1, batch: 13180, sum loss: 5178.190918, avg loss: 2.937147, ppl: 18.861963 +epoch: 1, batch: 13181, sum loss: 4288.188965, avg loss: 2.621143, ppl: 13.751428 +epoch: 1, batch: 13182, sum loss: 4939.931152, avg loss: 2.835781, ppl: 17.043713 +epoch: 1, batch: 13183, sum loss: 5756.117188, avg loss: 2.836923, ppl: 17.063187 +epoch: 1, batch: 13184, sum loss: 3897.784180, avg loss: 2.445285, ppl: 11.533834 +epoch: 1, batch: 13185, sum loss: 5248.909668, avg loss: 2.762584, ppl: 15.840726 +epoch: 1, batch: 13186, sum loss: 4725.848633, avg loss: 2.714445, ppl: 15.096228 +epoch: 1, batch: 13187, sum loss: 3966.003418, avg loss: 2.565332, ppl: 13.004977 +epoch: 1, batch: 13188, sum loss: 5295.865723, avg loss: 2.900255, ppl: 18.178783 +epoch: 1, batch: 13189, sum loss: 4699.528809, avg loss: 2.777499, ppl: 16.078760 +epoch: 1, batch: 13190, sum loss: 4914.558105, avg loss: 2.835867, ppl: 17.045176 +epoch: 1, batch: 13191, sum loss: 5246.487793, avg loss: 2.940856, ppl: 18.932055 +epoch: 1, batch: 13192, sum loss: 4508.670410, avg loss: 2.671013, ppl: 14.454610 +epoch: 1, batch: 13193, sum loss: 5384.889648, avg loss: 2.792993, ppl: 16.329815 +epoch: 1, batch: 13194, sum loss: 5752.331543, avg loss: 2.860434, ppl: 17.469099 +epoch: 1, batch: 13195, sum loss: 4343.455078, avg loss: 2.607116, ppl: 13.559884 +epoch: 1, batch: 13196, sum loss: 4436.624512, avg loss: 2.772890, ppl: 16.004826 +epoch: 1, batch: 13197, sum loss: 4974.146973, avg loss: 2.761881, ppl: 15.829585 +epoch: 1, batch: 13198, sum loss: 4728.517578, avg loss: 2.865768, ppl: 17.562540 +epoch: 1, batch: 13199, sum loss: 4501.123047, avg loss: 2.528721, ppl: 12.537458 +epoch: 1, batch: 13200, sum loss: 4273.916016, avg loss: 2.691383, ppl: 14.752066 +epoch: 1, batch: 13201, sum loss: 4696.400391, avg loss: 2.790493, ppl: 16.289053 +epoch: 1, batch: 13202, sum loss: 5549.576660, avg loss: 2.725725, ppl: 15.267485 +epoch: 1, batch: 13203, sum loss: 5208.081543, avg loss: 2.833559, ppl: 17.005878 +epoch: 1, batch: 13204, sum loss: 3857.154297, avg loss: 2.290472, ppl: 9.879598 +epoch: 1, batch: 13205, sum loss: 5154.612305, avg loss: 2.863674, ppl: 17.525793 +epoch: 1, batch: 13206, sum loss: 5257.290039, avg loss: 2.753950, ppl: 15.704540 +epoch: 1, batch: 13207, sum loss: 3369.765869, avg loss: 2.463279, ppl: 11.743258 +epoch: 1, batch: 13208, sum loss: 5219.568359, avg loss: 2.729900, ppl: 15.331348 +epoch: 1, batch: 13209, sum loss: 4871.893555, avg loss: 2.642025, ppl: 14.041606 +epoch: 1, batch: 13210, sum loss: 4649.278809, avg loss: 2.590127, ppl: 13.331470 +epoch: 1, batch: 13211, sum loss: 4880.995605, avg loss: 2.856054, ppl: 17.392752 +epoch: 1, batch: 13212, sum loss: 3858.791016, avg loss: 2.368810, ppl: 10.684667 +epoch: 1, batch: 13213, sum loss: 4300.580566, avg loss: 2.553789, ppl: 12.855723 +epoch: 1, batch: 13214, sum loss: 3743.633545, avg loss: 2.350052, ppl: 10.486118 +epoch: 1, batch: 13215, sum loss: 4847.561523, avg loss: 2.679692, ppl: 14.580595 +epoch: 1, batch: 13216, sum loss: 4138.063477, avg loss: 2.503366, ppl: 12.223563 +epoch: 1, batch: 13217, sum loss: 5511.285156, avg loss: 2.940921, ppl: 18.933268 +epoch: 1, batch: 13218, sum loss: 4232.671387, avg loss: 2.660384, ppl: 14.301785 +epoch: 1, batch: 13219, sum loss: 3983.216797, avg loss: 2.449703, ppl: 11.584903 +epoch: 1, batch: 13220, sum loss: 5374.477051, avg loss: 2.884851, ppl: 17.900896 +epoch: 1, batch: 13221, sum loss: 4557.244629, avg loss: 2.699790, ppl: 14.876600 +epoch: 1, batch: 13222, sum loss: 5650.130859, avg loss: 3.002195, ppl: 20.129671 +epoch: 1, batch: 13223, sum loss: 4754.830566, avg loss: 2.693955, ppl: 14.790054 +epoch: 1, batch: 13224, sum loss: 3705.415527, avg loss: 2.558989, ppl: 12.922744 +epoch: 1, batch: 13225, sum loss: 4262.153320, avg loss: 2.662182, ppl: 14.327519 +epoch: 1, batch: 13226, sum loss: 5208.451660, avg loss: 2.924453, ppl: 18.624044 +epoch: 1, batch: 13227, sum loss: 4376.009766, avg loss: 2.776656, ppl: 16.065208 +epoch: 1, batch: 13228, sum loss: 4533.222168, avg loss: 2.665034, ppl: 14.368432 +epoch: 1, batch: 13229, sum loss: 5638.573730, avg loss: 2.827770, ppl: 16.907719 +epoch: 1, batch: 13230, sum loss: 3522.864990, avg loss: 2.617285, ppl: 13.698475 +epoch: 1, batch: 13231, sum loss: 4148.051270, avg loss: 2.366259, ppl: 10.657444 +epoch: 1, batch: 13232, sum loss: 3750.180176, avg loss: 2.459135, ppl: 11.694686 +epoch: 1, batch: 13233, sum loss: 4317.561035, avg loss: 2.608798, ppl: 13.582718 +epoch: 1, batch: 13234, sum loss: 5333.629883, avg loss: 2.901866, ppl: 18.208090 +epoch: 1, batch: 13235, sum loss: 4273.649902, avg loss: 2.796891, ppl: 16.393602 +epoch: 1, batch: 13236, sum loss: 5151.054688, avg loss: 2.772365, ppl: 15.996427 +epoch: 1, batch: 13237, sum loss: 4467.357910, avg loss: 2.479111, ppl: 11.930653 +epoch: 1, batch: 13238, sum loss: 6157.080078, avg loss: 2.961559, ppl: 19.328074 +epoch: 1, batch: 13239, sum loss: 5370.994141, avg loss: 2.844806, ppl: 17.198225 +epoch: 1, batch: 13240, sum loss: 5325.848633, avg loss: 2.831392, ppl: 16.969067 +epoch: 1, batch: 13241, sum loss: 4207.114258, avg loss: 2.502745, ppl: 12.215980 +epoch: 1, batch: 13242, sum loss: 4251.819824, avg loss: 2.739575, ppl: 15.480403 +epoch: 1, batch: 13243, sum loss: 4756.495605, avg loss: 2.636638, ppl: 13.966176 +epoch: 1, batch: 13244, sum loss: 3825.802002, avg loss: 2.220431, ppl: 9.211296 +epoch: 1, batch: 13245, sum loss: 5266.229492, avg loss: 2.849691, ppl: 17.282448 +epoch: 1, batch: 13246, sum loss: 4161.679199, avg loss: 2.627323, ppl: 13.836675 +epoch: 1, batch: 13247, sum loss: 3778.830078, avg loss: 2.441105, ppl: 11.485722 +epoch: 1, batch: 13248, sum loss: 4058.006836, avg loss: 2.754927, ppl: 15.719889 +epoch: 1, batch: 13249, sum loss: 4420.171875, avg loss: 2.804677, ppl: 16.521746 +epoch: 1, batch: 13250, sum loss: 4449.487793, avg loss: 2.931151, ppl: 18.749207 +epoch: 1, batch: 13251, sum loss: 4026.662354, avg loss: 2.737364, ppl: 15.446216 +epoch: 1, batch: 13252, sum loss: 4980.370117, avg loss: 2.902314, ppl: 18.216244 +epoch: 1, batch: 13253, sum loss: 4353.169922, avg loss: 2.758663, ppl: 15.778728 +epoch: 1, batch: 13254, sum loss: 4416.551270, avg loss: 2.701255, ppl: 14.898412 +epoch: 1, batch: 13255, sum loss: 4784.894043, avg loss: 2.680613, ppl: 14.594037 +epoch: 1, batch: 13256, sum loss: 4560.476562, avg loss: 2.882728, ppl: 17.862944 +epoch: 1, batch: 13257, sum loss: 5022.242188, avg loss: 2.848691, ppl: 17.265167 +epoch: 1, batch: 13258, sum loss: 4150.375000, avg loss: 2.808102, ppl: 16.578428 +epoch: 1, batch: 13259, sum loss: 4612.951172, avg loss: 2.724720, ppl: 15.252147 +epoch: 1, batch: 13260, sum loss: 4106.245605, avg loss: 2.566403, ppl: 13.018916 +epoch: 1, batch: 13261, sum loss: 5082.884277, avg loss: 3.159033, ppl: 23.547815 +epoch: 1, batch: 13262, sum loss: 4657.211426, avg loss: 2.591659, ppl: 13.351908 +epoch: 1, batch: 13263, sum loss: 4695.585449, avg loss: 2.788352, ppl: 16.254219 +epoch: 1, batch: 13264, sum loss: 4464.829590, avg loss: 2.528216, ppl: 12.531132 +epoch: 1, batch: 13265, sum loss: 4461.019531, avg loss: 2.690603, ppl: 14.740559 +epoch: 1, batch: 13266, sum loss: 4930.184082, avg loss: 2.557150, ppl: 12.898998 +epoch: 1, batch: 13267, sum loss: 4782.174805, avg loss: 2.587757, ppl: 13.299905 +epoch: 1, batch: 13268, sum loss: 4244.889648, avg loss: 2.478044, ppl: 11.917931 +epoch: 1, batch: 13269, sum loss: 5367.914062, avg loss: 2.829686, ppl: 16.940140 +epoch: 1, batch: 13270, sum loss: 4912.150391, avg loss: 2.721413, ppl: 15.201786 +epoch: 1, batch: 13271, sum loss: 4851.665039, avg loss: 2.714978, ppl: 15.104274 +epoch: 1, batch: 13272, sum loss: 4409.295898, avg loss: 2.619902, ppl: 13.734383 +epoch: 1, batch: 13273, sum loss: 4012.139648, avg loss: 2.637830, ppl: 13.982832 +epoch: 1, batch: 13274, sum loss: 4552.308105, avg loss: 2.616269, ppl: 13.684572 +epoch: 1, batch: 13275, sum loss: 4781.247559, avg loss: 2.845981, ppl: 17.218439 +epoch: 1, batch: 13276, sum loss: 5292.504883, avg loss: 3.013955, ppl: 20.367798 +epoch: 1, batch: 13277, sum loss: 4424.114746, avg loss: 2.823303, ppl: 16.832348 +epoch: 1, batch: 13278, sum loss: 5118.768555, avg loss: 2.877329, ppl: 17.766756 +epoch: 1, batch: 13279, sum loss: 4752.543457, avg loss: 2.559259, ppl: 12.926231 +epoch: 1, batch: 13280, sum loss: 4484.838867, avg loss: 2.630404, ppl: 13.879379 +epoch: 1, batch: 13281, sum loss: 4630.424316, avg loss: 2.956848, ppl: 19.237244 +epoch: 1, batch: 13282, sum loss: 5051.642090, avg loss: 2.815854, ppl: 16.707439 +epoch: 1, batch: 13283, sum loss: 5161.491211, avg loss: 2.971498, ppl: 19.521132 +epoch: 1, batch: 13284, sum loss: 4190.716309, avg loss: 2.450711, ppl: 11.596592 +epoch: 1, batch: 13285, sum loss: 5059.558105, avg loss: 2.863361, ppl: 17.520308 +epoch: 1, batch: 13286, sum loss: 4297.533203, avg loss: 2.726861, ppl: 15.284833 +epoch: 1, batch: 13287, sum loss: 4271.156250, avg loss: 2.512445, ppl: 12.335052 +epoch: 1, batch: 13288, sum loss: 4445.520020, avg loss: 2.666779, ppl: 14.393527 +epoch: 1, batch: 13289, sum loss: 3586.397949, avg loss: 2.515006, ppl: 12.366678 +epoch: 1, batch: 13290, sum loss: 4538.810059, avg loss: 3.077159, ppl: 21.696684 +epoch: 1, batch: 13291, sum loss: 4715.337402, avg loss: 2.821866, ppl: 16.808182 +epoch: 1, batch: 13292, sum loss: 4348.768066, avg loss: 2.679463, ppl: 14.577258 +epoch: 1, batch: 13293, sum loss: 4600.137695, avg loss: 2.767833, ppl: 15.924082 +epoch: 1, batch: 13294, sum loss: 4831.172852, avg loss: 2.836860, ppl: 17.062109 +epoch: 1, batch: 13295, sum loss: 5322.889648, avg loss: 2.714375, ppl: 15.095177 +epoch: 1, batch: 13296, sum loss: 4312.809570, avg loss: 2.655671, ppl: 14.234532 +epoch: 1, batch: 13297, sum loss: 4512.501953, avg loss: 2.537965, ppl: 12.653898 +epoch: 1, batch: 13298, sum loss: 4934.689453, avg loss: 2.974496, ppl: 19.579760 +epoch: 1, batch: 13299, sum loss: 4840.634766, avg loss: 2.824174, ppl: 16.847031 +epoch: 1, batch: 13300, sum loss: 3887.460693, avg loss: 2.699625, ppl: 14.874160 +epoch: 1, batch: 13301, sum loss: 4115.838867, avg loss: 2.432529, ppl: 11.387645 +epoch: 1, batch: 13302, sum loss: 4257.178711, avg loss: 2.682532, ppl: 14.622074 +epoch: 1, batch: 13303, sum loss: 5346.905273, avg loss: 2.899623, ppl: 18.167303 +epoch: 1, batch: 13304, sum loss: 3989.005127, avg loss: 2.697096, ppl: 14.836585 +epoch: 1, batch: 13305, sum loss: 4410.835449, avg loss: 2.802310, ppl: 16.482670 +epoch: 1, batch: 13306, sum loss: 4377.560059, avg loss: 2.425241, ppl: 11.304954 +epoch: 1, batch: 13307, sum loss: 4612.973145, avg loss: 2.888524, ppl: 17.966776 +epoch: 1, batch: 13308, sum loss: 4163.473633, avg loss: 2.529449, ppl: 12.546597 +epoch: 1, batch: 13309, sum loss: 3348.934570, avg loss: 2.358405, ppl: 10.574068 +epoch: 1, batch: 13310, sum loss: 3429.472900, avg loss: 2.648242, ppl: 14.129171 +epoch: 1, batch: 13311, sum loss: 3830.162598, avg loss: 2.563696, ppl: 12.983722 +epoch: 1, batch: 13312, sum loss: 4030.077148, avg loss: 2.314806, ppl: 10.122959 +epoch: 1, batch: 13313, sum loss: 5556.924805, avg loss: 3.028297, ppl: 20.662014 +epoch: 1, batch: 13314, sum loss: 5032.244141, avg loss: 2.627804, ppl: 13.843334 +epoch: 1, batch: 13315, sum loss: 3881.992676, avg loss: 2.522412, ppl: 12.458614 +epoch: 1, batch: 13316, sum loss: 5728.326172, avg loss: 2.819058, ppl: 16.761057 +epoch: 1, batch: 13317, sum loss: 4237.658691, avg loss: 2.609396, ppl: 13.590836 +epoch: 1, batch: 13318, sum loss: 5248.416016, avg loss: 2.894879, ppl: 18.081320 +epoch: 1, batch: 13319, sum loss: 3219.862549, avg loss: 2.328172, ppl: 10.259175 +epoch: 1, batch: 13320, sum loss: 3912.074707, avg loss: 2.629082, ppl: 13.861046 +epoch: 1, batch: 13321, sum loss: 4400.343262, avg loss: 2.824354, ppl: 16.850056 +epoch: 1, batch: 13322, sum loss: 4319.424316, avg loss: 2.756493, ppl: 15.744532 +epoch: 1, batch: 13323, sum loss: 5026.991699, avg loss: 2.711430, ppl: 15.050784 +epoch: 1, batch: 13324, sum loss: 3561.566650, avg loss: 2.471594, ppl: 11.841305 +epoch: 1, batch: 13325, sum loss: 3949.100342, avg loss: 2.691957, ppl: 14.760527 +epoch: 1, batch: 13326, sum loss: 3488.857422, avg loss: 2.528158, ppl: 12.530400 +epoch: 1, batch: 13327, sum loss: 4310.890625, avg loss: 2.478948, ppl: 11.928710 +epoch: 1, batch: 13328, sum loss: 4955.244141, avg loss: 2.841310, ppl: 17.138195 +epoch: 1, batch: 13329, sum loss: 4161.812500, avg loss: 2.575379, ppl: 13.136293 +epoch: 1, batch: 13330, sum loss: 4943.792969, avg loss: 2.837998, ppl: 17.081537 +epoch: 1, batch: 13331, sum loss: 4036.625488, avg loss: 2.553210, ppl: 12.848284 +epoch: 1, batch: 13332, sum loss: 6099.819336, avg loss: 2.861079, ppl: 17.480370 +epoch: 1, batch: 13333, sum loss: 5470.070312, avg loss: 2.886581, ppl: 17.931890 +epoch: 1, batch: 13334, sum loss: 5145.985352, avg loss: 2.753336, ppl: 15.694906 +epoch: 1, batch: 13335, sum loss: 5058.345703, avg loss: 2.710796, ppl: 15.041245 +epoch: 1, batch: 13336, sum loss: 4832.521973, avg loss: 2.710332, ppl: 15.034268 +epoch: 1, batch: 13337, sum loss: 4432.909180, avg loss: 2.901119, ppl: 18.194485 +epoch: 1, batch: 13338, sum loss: 3933.682373, avg loss: 2.668713, ppl: 14.421391 +epoch: 1, batch: 13339, sum loss: 4415.626953, avg loss: 2.699039, ppl: 14.865432 +epoch: 1, batch: 13340, sum loss: 4039.118164, avg loss: 2.779847, ppl: 16.116562 +epoch: 1, batch: 13341, sum loss: 5305.948242, avg loss: 3.001102, ppl: 20.107683 +epoch: 1, batch: 13342, sum loss: 3612.823730, avg loss: 2.493322, ppl: 12.101412 +epoch: 1, batch: 13343, sum loss: 3652.907227, avg loss: 2.526215, ppl: 12.506084 +epoch: 1, batch: 13344, sum loss: 4665.226074, avg loss: 2.726608, ppl: 15.280967 +epoch: 1, batch: 13345, sum loss: 4563.978516, avg loss: 2.605011, ppl: 13.531371 +epoch: 1, batch: 13346, sum loss: 4810.321289, avg loss: 2.610049, ppl: 13.599724 +epoch: 1, batch: 13347, sum loss: 5498.335449, avg loss: 2.909172, ppl: 18.341610 +epoch: 1, batch: 13348, sum loss: 6261.406738, avg loss: 2.974540, ppl: 19.580614 +epoch: 1, batch: 13349, sum loss: 5260.995605, avg loss: 2.979046, ppl: 19.669046 +epoch: 1, batch: 13350, sum loss: 4138.845703, avg loss: 2.426053, ppl: 11.314132 +epoch: 1, batch: 13351, sum loss: 3974.357910, avg loss: 2.445759, ppl: 11.539300 +epoch: 1, batch: 13352, sum loss: 4749.648926, avg loss: 2.704812, ppl: 14.951499 +epoch: 1, batch: 13353, sum loss: 3671.564941, avg loss: 2.485826, ppl: 12.011038 +epoch: 1, batch: 13354, sum loss: 5088.753906, avg loss: 2.762624, ppl: 15.841360 +epoch: 1, batch: 13355, sum loss: 4211.012207, avg loss: 2.811090, ppl: 16.628025 +epoch: 1, batch: 13356, sum loss: 4011.884033, avg loss: 2.540775, ppl: 12.689503 +epoch: 1, batch: 13357, sum loss: 5174.674805, avg loss: 3.001552, ppl: 20.116726 +epoch: 1, batch: 13358, sum loss: 4313.579102, avg loss: 2.775791, ppl: 16.051317 +epoch: 1, batch: 13359, sum loss: 4699.391602, avg loss: 2.711709, ppl: 15.054979 +epoch: 1, batch: 13360, sum loss: 4832.639648, avg loss: 2.719550, ppl: 15.173488 +epoch: 1, batch: 13361, sum loss: 4698.816406, avg loss: 2.786961, ppl: 16.231619 +epoch: 1, batch: 13362, sum loss: 4123.213867, avg loss: 2.503469, ppl: 12.224832 +epoch: 1, batch: 13363, sum loss: 4138.510742, avg loss: 2.509709, ppl: 12.301354 +epoch: 1, batch: 13364, sum loss: 4179.542480, avg loss: 2.641936, ppl: 14.040354 +epoch: 1, batch: 13365, sum loss: 5717.153320, avg loss: 2.852871, ppl: 17.337486 +epoch: 1, batch: 13366, sum loss: 3979.233887, avg loss: 2.665260, ppl: 14.371690 +epoch: 1, batch: 13367, sum loss: 3585.813232, avg loss: 2.483250, ppl: 11.980139 +epoch: 1, batch: 13368, sum loss: 4924.970703, avg loss: 3.060889, ppl: 21.346531 +epoch: 1, batch: 13369, sum loss: 4606.313477, avg loss: 2.612770, ppl: 13.636773 +epoch: 1, batch: 13370, sum loss: 5112.203125, avg loss: 2.719257, ppl: 15.169045 +epoch: 1, batch: 13371, sum loss: 6136.316406, avg loss: 3.143605, ppl: 23.187307 +epoch: 1, batch: 13372, sum loss: 4015.393066, avg loss: 2.414548, ppl: 11.184712 +epoch: 1, batch: 13373, sum loss: 3886.665039, avg loss: 2.514014, ppl: 12.354416 +epoch: 1, batch: 13374, sum loss: 4802.065918, avg loss: 2.759808, ppl: 15.796811 +epoch: 1, batch: 13375, sum loss: 4156.165527, avg loss: 2.602483, ppl: 13.497213 +epoch: 1, batch: 13376, sum loss: 4814.365723, avg loss: 2.765288, ppl: 15.883608 +epoch: 1, batch: 13377, sum loss: 4839.588379, avg loss: 2.954571, ppl: 19.193497 +epoch: 1, batch: 13378, sum loss: 3866.060791, avg loss: 2.695998, ppl: 14.820298 +epoch: 1, batch: 13379, sum loss: 3795.955811, avg loss: 2.564835, ppl: 12.998514 +epoch: 1, batch: 13380, sum loss: 4519.987305, avg loss: 2.798754, ppl: 16.424166 +epoch: 1, batch: 13381, sum loss: 4692.666992, avg loss: 2.518877, ppl: 12.414641 +epoch: 1, batch: 13382, sum loss: 4610.802246, avg loss: 2.762614, ppl: 15.841194 +epoch: 1, batch: 13383, sum loss: 3843.622559, avg loss: 2.408285, ppl: 11.114882 +epoch: 1, batch: 13384, sum loss: 4603.410156, avg loss: 2.781517, ppl: 16.143488 +epoch: 1, batch: 13385, sum loss: 4316.373535, avg loss: 2.823004, ppl: 16.827332 +epoch: 1, batch: 13386, sum loss: 4893.124023, avg loss: 2.695936, ppl: 14.819386 +epoch: 1, batch: 13387, sum loss: 4413.165039, avg loss: 2.918760, ppl: 18.518312 +epoch: 1, batch: 13388, sum loss: 4357.372559, avg loss: 2.653698, ppl: 14.206484 +epoch: 1, batch: 13389, sum loss: 4784.923828, avg loss: 2.801477, ppl: 16.468960 +epoch: 1, batch: 13390, sum loss: 5336.437012, avg loss: 2.739444, ppl: 15.478381 +epoch: 1, batch: 13391, sum loss: 4210.827148, avg loss: 2.725455, ppl: 15.263350 +epoch: 1, batch: 13392, sum loss: 4719.490234, avg loss: 2.790946, ppl: 16.296436 +epoch: 1, batch: 13393, sum loss: 4583.739258, avg loss: 2.705867, ppl: 14.967292 +epoch: 1, batch: 13394, sum loss: 3480.908936, avg loss: 2.314434, ppl: 10.119194 +epoch: 1, batch: 13395, sum loss: 4934.548340, avg loss: 2.938981, ppl: 18.896572 +epoch: 1, batch: 13396, sum loss: 5778.016113, avg loss: 3.007817, ppl: 20.243162 +epoch: 1, batch: 13397, sum loss: 5340.229004, avg loss: 2.821040, ppl: 16.794310 +epoch: 1, batch: 13398, sum loss: 5056.558594, avg loss: 2.715660, ppl: 15.114580 +epoch: 1, batch: 13399, sum loss: 3811.009277, avg loss: 2.520509, ppl: 12.434924 +epoch: 1, batch: 13400, sum loss: 4766.838379, avg loss: 2.539605, ppl: 12.674663 +epoch: 1, batch: 13401, sum loss: 4144.493164, avg loss: 2.499694, ppl: 12.178771 +epoch: 1, batch: 13402, sum loss: 4903.537598, avg loss: 2.644842, ppl: 14.081223 +epoch: 1, batch: 13403, sum loss: 5513.754395, avg loss: 2.706801, ppl: 14.981277 +epoch: 1, batch: 13404, sum loss: 3831.672363, avg loss: 2.330701, ppl: 10.285148 +epoch: 1, batch: 13405, sum loss: 4295.852539, avg loss: 2.736212, ppl: 15.428428 +epoch: 1, batch: 13406, sum loss: 5327.740234, avg loss: 2.881417, ppl: 17.839535 +epoch: 1, batch: 13407, sum loss: 4921.316895, avg loss: 2.881333, ppl: 17.838034 +epoch: 1, batch: 13408, sum loss: 4716.830566, avg loss: 2.764848, ppl: 15.876623 +epoch: 1, batch: 13409, sum loss: 5751.919922, avg loss: 2.769340, ppl: 15.948110 +epoch: 1, batch: 13410, sum loss: 4994.365723, avg loss: 2.915567, ppl: 18.459270 +epoch: 1, batch: 13411, sum loss: 4327.891602, avg loss: 2.708318, ppl: 15.004014 +epoch: 1, batch: 13412, sum loss: 5365.013672, avg loss: 2.972307, ppl: 19.536934 +epoch: 1, batch: 13413, sum loss: 5171.786621, avg loss: 2.756816, ppl: 15.749615 +epoch: 1, batch: 13414, sum loss: 4836.021484, avg loss: 2.909760, ppl: 18.352398 +epoch: 1, batch: 13415, sum loss: 3663.528809, avg loss: 2.581768, ppl: 13.220491 +epoch: 1, batch: 13416, sum loss: 3670.744873, avg loss: 2.455348, ppl: 11.650485 +epoch: 1, batch: 13417, sum loss: 4438.128418, avg loss: 2.593880, ppl: 13.381591 +epoch: 1, batch: 13418, sum loss: 4563.286133, avg loss: 2.633171, ppl: 13.917838 +epoch: 1, batch: 13419, sum loss: 3975.216797, avg loss: 2.446287, ppl: 11.545401 +epoch: 1, batch: 13420, sum loss: 4177.388672, avg loss: 2.550298, ppl: 12.810924 +epoch: 1, batch: 13421, sum loss: 4822.711426, avg loss: 2.737067, ppl: 15.441625 +epoch: 1, batch: 13422, sum loss: 5215.877441, avg loss: 2.992471, ppl: 19.934885 +epoch: 1, batch: 13423, sum loss: 3918.046143, avg loss: 2.424533, ppl: 11.296957 +epoch: 1, batch: 13424, sum loss: 4737.491699, avg loss: 2.551153, ppl: 12.821881 +epoch: 1, batch: 13425, sum loss: 5290.016602, avg loss: 2.890720, ppl: 18.006264 +epoch: 1, batch: 13426, sum loss: 5328.455078, avg loss: 2.930943, ppl: 18.745308 +epoch: 1, batch: 13427, sum loss: 4587.824707, avg loss: 2.729223, ppl: 15.320986 +epoch: 1, batch: 13428, sum loss: 3910.553223, avg loss: 2.413922, ppl: 11.177712 +epoch: 1, batch: 13429, sum loss: 4209.872070, avg loss: 2.649384, ppl: 14.145329 +epoch: 1, batch: 13430, sum loss: 4294.273438, avg loss: 2.636141, ppl: 13.959229 +epoch: 1, batch: 13431, sum loss: 5178.328613, avg loss: 2.927263, ppl: 18.676449 +epoch: 1, batch: 13432, sum loss: 4363.786133, avg loss: 2.682106, ppl: 14.615839 +epoch: 1, batch: 13433, sum loss: 5005.497070, avg loss: 2.972386, ppl: 19.538475 +epoch: 1, batch: 13434, sum loss: 5124.882324, avg loss: 2.662277, ppl: 14.328872 +epoch: 1, batch: 13435, sum loss: 5673.969238, avg loss: 2.911221, ppl: 18.379223 +epoch: 1, batch: 13436, sum loss: 4718.487793, avg loss: 2.790354, ppl: 16.286781 +epoch: 1, batch: 13437, sum loss: 4766.398438, avg loss: 2.658337, ppl: 14.272535 +epoch: 1, batch: 13438, sum loss: 3847.815186, avg loss: 2.460240, ppl: 11.707620 +epoch: 1, batch: 13439, sum loss: 5042.647461, avg loss: 2.683687, ppl: 14.638968 +epoch: 1, batch: 13440, sum loss: 4546.710938, avg loss: 2.691954, ppl: 14.760495 +epoch: 1, batch: 13441, sum loss: 4832.882812, avg loss: 2.868180, ppl: 17.604944 +epoch: 1, batch: 13442, sum loss: 4977.403320, avg loss: 2.980481, ppl: 19.697292 +epoch: 1, batch: 13443, sum loss: 3961.438232, avg loss: 2.518397, ppl: 12.408688 +epoch: 1, batch: 13444, sum loss: 4120.970703, avg loss: 2.596705, ppl: 13.419448 +epoch: 1, batch: 13445, sum loss: 3540.439697, avg loss: 2.458639, ppl: 11.688889 +epoch: 1, batch: 13446, sum loss: 4153.567383, avg loss: 2.674545, ppl: 14.505742 +epoch: 1, batch: 13447, sum loss: 4686.152832, avg loss: 2.670172, ppl: 14.442460 +epoch: 1, batch: 13448, sum loss: 4406.416992, avg loss: 2.604265, ppl: 13.521290 +epoch: 1, batch: 13449, sum loss: 4054.947510, avg loss: 2.624562, ppl: 13.798523 +epoch: 1, batch: 13450, sum loss: 4459.007812, avg loss: 2.713943, ppl: 15.088646 +epoch: 1, batch: 13451, sum loss: 4734.634277, avg loss: 2.595742, ppl: 13.406538 +epoch: 1, batch: 13452, sum loss: 4025.102783, avg loss: 2.632507, ppl: 13.908593 +epoch: 1, batch: 13453, sum loss: 4861.949219, avg loss: 2.686160, ppl: 14.675213 +epoch: 1, batch: 13454, sum loss: 4406.782227, avg loss: 2.711866, ppl: 15.057344 +epoch: 1, batch: 13455, sum loss: 4091.562500, avg loss: 2.554034, ppl: 12.858871 +epoch: 1, batch: 13456, sum loss: 3988.532227, avg loss: 2.555114, ppl: 12.872762 +epoch: 1, batch: 13457, sum loss: 4050.813477, avg loss: 2.578494, ppl: 13.177276 +epoch: 1, batch: 13458, sum loss: 4161.070801, avg loss: 2.805847, ppl: 16.541079 +epoch: 1, batch: 13459, sum loss: 4941.895020, avg loss: 2.652654, ppl: 14.191656 +epoch: 1, batch: 13460, sum loss: 3835.868896, avg loss: 2.468384, ppl: 11.803357 +epoch: 1, batch: 13461, sum loss: 4066.036377, avg loss: 2.647159, ppl: 14.113886 +epoch: 1, batch: 13462, sum loss: 4196.005371, avg loss: 2.625786, ppl: 13.815423 +epoch: 1, batch: 13463, sum loss: 4664.506348, avg loss: 2.789777, ppl: 16.277386 +epoch: 1, batch: 13464, sum loss: 4214.673340, avg loss: 2.612941, ppl: 13.639102 +epoch: 1, batch: 13465, sum loss: 4291.622070, avg loss: 2.506788, ppl: 12.265476 +epoch: 1, batch: 13466, sum loss: 5446.663086, avg loss: 2.942552, ppl: 18.964174 +epoch: 1, batch: 13467, sum loss: 6136.258789, avg loss: 3.019812, ppl: 20.487446 +epoch: 1, batch: 13468, sum loss: 4697.736816, avg loss: 2.908815, ppl: 18.335066 +epoch: 1, batch: 13469, sum loss: 4282.235840, avg loss: 2.773469, ppl: 16.014090 +epoch: 1, batch: 13470, sum loss: 4100.201660, avg loss: 2.685135, ppl: 14.660186 +epoch: 1, batch: 13471, sum loss: 4913.192383, avg loss: 2.695114, ppl: 14.807205 +epoch: 1, batch: 13472, sum loss: 4667.473633, avg loss: 2.546358, ppl: 12.760540 +epoch: 1, batch: 13473, sum loss: 4658.080078, avg loss: 2.732012, ppl: 15.363765 +epoch: 1, batch: 13474, sum loss: 4509.686523, avg loss: 2.768377, ppl: 15.932759 +epoch: 1, batch: 13475, sum loss: 5162.090820, avg loss: 2.965015, ppl: 19.394993 +epoch: 1, batch: 13476, sum loss: 4548.227539, avg loss: 2.898807, ppl: 18.152470 +epoch: 1, batch: 13477, sum loss: 4361.377930, avg loss: 2.682274, ppl: 14.618299 +epoch: 1, batch: 13478, sum loss: 3991.167236, avg loss: 2.651938, ppl: 14.181502 +epoch: 1, batch: 13479, sum loss: 5463.550781, avg loss: 2.755195, ppl: 15.724101 +epoch: 1, batch: 13480, sum loss: 4693.085449, avg loss: 2.852939, ppl: 17.338673 +epoch: 1, batch: 13481, sum loss: 4624.550781, avg loss: 2.715532, ppl: 15.112645 +epoch: 1, batch: 13482, sum loss: 3576.244141, avg loss: 2.486957, ppl: 12.024631 +epoch: 1, batch: 13483, sum loss: 4811.325195, avg loss: 2.958995, ppl: 19.278580 +epoch: 1, batch: 13484, sum loss: 5068.037598, avg loss: 2.824993, ppl: 16.860828 +epoch: 1, batch: 13485, sum loss: 4524.505371, avg loss: 2.768975, ppl: 15.942289 +epoch: 1, batch: 13486, sum loss: 4790.800293, avg loss: 2.564668, ppl: 12.996346 +epoch: 1, batch: 13487, sum loss: 4826.240723, avg loss: 2.948223, ppl: 19.072031 +epoch: 1, batch: 13488, sum loss: 4694.447266, avg loss: 2.529336, ppl: 12.545170 +epoch: 1, batch: 13489, sum loss: 4219.945801, avg loss: 2.514866, ppl: 12.364956 +epoch: 1, batch: 13490, sum loss: 4866.799316, avg loss: 2.809930, ppl: 16.608761 +epoch: 1, batch: 13491, sum loss: 4533.221191, avg loss: 2.831494, ppl: 16.970789 +epoch: 1, batch: 13492, sum loss: 5684.326660, avg loss: 2.991751, ppl: 19.920532 +epoch: 1, batch: 13493, sum loss: 4385.037598, avg loss: 2.402760, ppl: 11.053645 +epoch: 1, batch: 13494, sum loss: 4194.634766, avg loss: 2.770565, ppl: 15.967646 +epoch: 1, batch: 13495, sum loss: 5202.750977, avg loss: 2.725380, ppl: 15.262218 +epoch: 1, batch: 13496, sum loss: 3744.948730, avg loss: 2.400608, ppl: 11.029883 +epoch: 1, batch: 13497, sum loss: 4639.507324, avg loss: 2.791521, ppl: 16.305799 +epoch: 1, batch: 13498, sum loss: 6221.119141, avg loss: 3.072158, ppl: 21.588432 +epoch: 1, batch: 13499, sum loss: 4088.330322, avg loss: 2.443712, ppl: 11.515711 +epoch: 1, batch: 13500, sum loss: 4135.878906, avg loss: 2.746268, ppl: 15.584355 +epoch: 1, batch: 13501, sum loss: 5168.908203, avg loss: 2.843184, ppl: 17.170345 +epoch: 1, batch: 13502, sum loss: 4884.568848, avg loss: 2.810454, ppl: 16.617458 +epoch: 1, batch: 13503, sum loss: 4184.141113, avg loss: 2.603697, ppl: 13.513606 +epoch: 1, batch: 13504, sum loss: 4295.236816, avg loss: 2.735820, ppl: 15.422379 +epoch: 1, batch: 13505, sum loss: 5460.378906, avg loss: 3.060751, ppl: 21.343575 +epoch: 1, batch: 13506, sum loss: 5073.044922, avg loss: 2.717217, ppl: 15.138141 +epoch: 1, batch: 13507, sum loss: 3524.308105, avg loss: 2.309507, ppl: 10.069463 +epoch: 1, batch: 13508, sum loss: 5079.078125, avg loss: 2.835889, ppl: 17.045553 +epoch: 1, batch: 13509, sum loss: 3412.499756, avg loss: 2.542846, ppl: 12.715815 +epoch: 1, batch: 13510, sum loss: 4534.381836, avg loss: 2.906655, ppl: 18.295500 +epoch: 1, batch: 13511, sum loss: 4110.927734, avg loss: 2.556547, ppl: 12.891226 +epoch: 1, batch: 13512, sum loss: 4701.891602, avg loss: 2.635590, ppl: 13.951536 +epoch: 1, batch: 13513, sum loss: 4290.335449, avg loss: 2.612872, ppl: 13.638162 +epoch: 1, batch: 13514, sum loss: 5005.463379, avg loss: 2.815221, ppl: 16.696871 +epoch: 1, batch: 13515, sum loss: 4058.791992, avg loss: 2.577011, ppl: 13.157746 +epoch: 1, batch: 13516, sum loss: 4226.522949, avg loss: 2.454427, ppl: 11.639759 +epoch: 1, batch: 13517, sum loss: 4849.341797, avg loss: 2.783778, ppl: 16.180037 +epoch: 1, batch: 13518, sum loss: 5181.996582, avg loss: 3.039294, ppl: 20.890495 +epoch: 1, batch: 13519, sum loss: 4006.495361, avg loss: 2.558426, ppl: 12.915474 +epoch: 1, batch: 13520, sum loss: 6084.959961, avg loss: 3.007889, ppl: 20.244619 +epoch: 1, batch: 13521, sum loss: 3734.303711, avg loss: 2.366479, ppl: 10.659792 +epoch: 1, batch: 13522, sum loss: 4886.249512, avg loss: 2.716092, ppl: 15.121111 +epoch: 1, batch: 13523, sum loss: 4619.915527, avg loss: 2.843025, ppl: 17.167618 +epoch: 1, batch: 13524, sum loss: 4645.369141, avg loss: 2.858689, ppl: 17.438643 +epoch: 1, batch: 13525, sum loss: 4223.211426, avg loss: 2.595705, ppl: 13.406030 +epoch: 1, batch: 13526, sum loss: 6015.649902, avg loss: 2.989886, ppl: 19.883411 +epoch: 1, batch: 13527, sum loss: 5223.854980, avg loss: 2.926530, ppl: 18.662752 +epoch: 1, batch: 13528, sum loss: 4435.480469, avg loss: 2.923850, ppl: 18.612806 +epoch: 1, batch: 13529, sum loss: 4477.652344, avg loss: 2.674822, ppl: 14.509771 +epoch: 1, batch: 13530, sum loss: 5107.276855, avg loss: 2.699406, ppl: 14.870901 +epoch: 1, batch: 13531, sum loss: 4727.064453, avg loss: 2.891171, ppl: 18.014387 +epoch: 1, batch: 13532, sum loss: 4992.834961, avg loss: 3.002306, ppl: 20.131907 +epoch: 1, batch: 13533, sum loss: 4554.731445, avg loss: 2.733932, ppl: 15.393298 +epoch: 1, batch: 13534, sum loss: 4140.262207, avg loss: 2.465909, ppl: 11.774186 +epoch: 1, batch: 13535, sum loss: 4144.212891, avg loss: 2.507086, ppl: 12.269123 +epoch: 1, batch: 13536, sum loss: 3660.605713, avg loss: 2.414648, ppl: 11.185827 +epoch: 1, batch: 13537, sum loss: 4604.599609, avg loss: 2.883281, ppl: 17.872818 +epoch: 1, batch: 13538, sum loss: 4148.444824, avg loss: 2.373252, ppl: 10.732241 +epoch: 1, batch: 13539, sum loss: 4997.147461, avg loss: 2.837676, ppl: 17.076036 +epoch: 1, batch: 13540, sum loss: 3561.893555, avg loss: 2.388929, ppl: 10.901815 +epoch: 1, batch: 13541, sum loss: 3564.276367, avg loss: 2.501246, ppl: 12.197688 +epoch: 1, batch: 13542, sum loss: 4443.883301, avg loss: 2.556895, ppl: 12.895711 +epoch: 1, batch: 13543, sum loss: 5028.709473, avg loss: 2.728546, ppl: 15.310604 +epoch: 1, batch: 13544, sum loss: 4814.812500, avg loss: 2.952062, ppl: 19.145384 +epoch: 1, batch: 13545, sum loss: 3416.445557, avg loss: 2.244708, ppl: 9.437660 +epoch: 1, batch: 13546, sum loss: 4605.295898, avg loss: 2.469327, ppl: 11.814498 +epoch: 1, batch: 13547, sum loss: 5300.172852, avg loss: 2.668768, ppl: 14.422186 +epoch: 1, batch: 13548, sum loss: 4704.552246, avg loss: 2.699112, ppl: 14.866524 +epoch: 1, batch: 13549, sum loss: 4923.103516, avg loss: 2.798808, ppl: 16.425062 +epoch: 1, batch: 13550, sum loss: 5481.302246, avg loss: 2.982210, ppl: 19.731379 +epoch: 1, batch: 13551, sum loss: 4619.077637, avg loss: 2.614079, ppl: 13.654634 +epoch: 1, batch: 13552, sum loss: 4720.537109, avg loss: 2.535197, ppl: 12.618919 +epoch: 1, batch: 13553, sum loss: 4963.023438, avg loss: 2.873783, ppl: 17.703867 +epoch: 1, batch: 13554, sum loss: 4335.079102, avg loss: 2.622552, ppl: 13.770827 +epoch: 1, batch: 13555, sum loss: 3884.132080, avg loss: 2.567173, ppl: 13.028936 +epoch: 1, batch: 13556, sum loss: 3720.598633, avg loss: 2.690238, ppl: 14.735179 +epoch: 1, batch: 13557, sum loss: 4907.321289, avg loss: 2.541337, ppl: 12.696632 +epoch: 1, batch: 13558, sum loss: 5560.199219, avg loss: 2.949708, ppl: 19.100370 +epoch: 1, batch: 13559, sum loss: 4054.758301, avg loss: 2.333003, ppl: 10.308848 +epoch: 1, batch: 13560, sum loss: 4537.380859, avg loss: 2.942530, ppl: 18.963758 +epoch: 1, batch: 13561, sum loss: 3736.252197, avg loss: 2.623773, ppl: 13.787642 +epoch: 1, batch: 13562, sum loss: 4246.313477, avg loss: 2.675686, ppl: 14.522310 +epoch: 1, batch: 13563, sum loss: 4794.365723, avg loss: 2.934128, ppl: 18.805103 +epoch: 1, batch: 13564, sum loss: 4581.052734, avg loss: 2.717113, ppl: 15.136563 +epoch: 1, batch: 13565, sum loss: 4936.245117, avg loss: 2.934747, ppl: 18.816750 +epoch: 1, batch: 13566, sum loss: 3943.976807, avg loss: 2.401935, ppl: 11.044525 +epoch: 1, batch: 13567, sum loss: 5467.931641, avg loss: 2.930295, ppl: 18.733164 +epoch: 1, batch: 13568, sum loss: 3905.664551, avg loss: 2.547726, ppl: 12.778019 +epoch: 1, batch: 13569, sum loss: 4787.555664, avg loss: 2.866800, ppl: 17.580667 +epoch: 1, batch: 13570, sum loss: 4526.767090, avg loss: 2.730258, ppl: 15.336836 +epoch: 1, batch: 13571, sum loss: 5616.708008, avg loss: 3.089498, ppl: 21.966055 +epoch: 1, batch: 13572, sum loss: 4631.733887, avg loss: 2.716560, ppl: 15.128186 +epoch: 1, batch: 13573, sum loss: 4431.957031, avg loss: 2.554442, ppl: 12.864121 +epoch: 1, batch: 13574, sum loss: 4996.740234, avg loss: 2.740944, ppl: 15.501607 +epoch: 1, batch: 13575, sum loss: 5072.480957, avg loss: 2.804025, ppl: 16.510969 +epoch: 1, batch: 13576, sum loss: 3664.449219, avg loss: 2.449498, ppl: 11.582533 +epoch: 1, batch: 13577, sum loss: 4856.893066, avg loss: 2.850289, ppl: 17.292786 +epoch: 1, batch: 13578, sum loss: 5123.685547, avg loss: 2.801359, ppl: 16.467012 +epoch: 1, batch: 13579, sum loss: 4282.529297, avg loss: 2.498559, ppl: 12.164946 +epoch: 1, batch: 13580, sum loss: 4345.916016, avg loss: 2.556421, ppl: 12.889606 +epoch: 1, batch: 13581, sum loss: 4372.802246, avg loss: 2.595135, ppl: 13.398393 +epoch: 1, batch: 13582, sum loss: 4424.940918, avg loss: 2.676915, ppl: 14.540170 +epoch: 1, batch: 13583, sum loss: 4562.898438, avg loss: 2.738835, ppl: 15.468947 +epoch: 1, batch: 13584, sum loss: 4074.958984, avg loss: 2.457756, ppl: 11.678573 +epoch: 1, batch: 13585, sum loss: 4435.831055, avg loss: 2.544940, ppl: 12.742467 +epoch: 1, batch: 13586, sum loss: 5778.206543, avg loss: 3.065361, ppl: 21.442211 +epoch: 1, batch: 13587, sum loss: 4458.600098, avg loss: 2.596739, ppl: 13.419899 +epoch: 1, batch: 13588, sum loss: 4707.237305, avg loss: 2.830570, ppl: 16.955118 +epoch: 1, batch: 13589, sum loss: 3779.298096, avg loss: 2.479854, ppl: 11.939526 +epoch: 1, batch: 13590, sum loss: 4452.543945, avg loss: 2.475010, ppl: 11.881830 +epoch: 1, batch: 13591, sum loss: 4170.892090, avg loss: 2.699607, ppl: 14.873880 +epoch: 1, batch: 13592, sum loss: 5179.934570, avg loss: 2.616128, ppl: 13.682648 +epoch: 1, batch: 13593, sum loss: 3655.255371, avg loss: 2.631573, ppl: 13.895614 +epoch: 1, batch: 13594, sum loss: 4651.464355, avg loss: 2.807160, ppl: 16.562815 +epoch: 1, batch: 13595, sum loss: 4863.634766, avg loss: 2.610647, ppl: 13.607848 +epoch: 1, batch: 13596, sum loss: 5085.461914, avg loss: 2.886187, ppl: 17.924837 +epoch: 1, batch: 13597, sum loss: 4177.401367, avg loss: 2.757361, ppl: 15.758198 +epoch: 1, batch: 13598, sum loss: 3842.616699, avg loss: 2.447527, ppl: 11.559721 +epoch: 1, batch: 13599, sum loss: 4759.635742, avg loss: 2.833117, ppl: 16.998354 +epoch: 1, batch: 13600, sum loss: 4822.403809, avg loss: 2.559662, ppl: 12.931450 +epoch: 1, batch: 13601, sum loss: 4442.467773, avg loss: 2.607082, ppl: 13.559428 +epoch: 1, batch: 13602, sum loss: 4553.077148, avg loss: 2.659508, ppl: 14.289250 +epoch: 1, batch: 13603, sum loss: 5274.233398, avg loss: 2.874242, ppl: 17.711990 +epoch: 1, batch: 13604, sum loss: 4024.562744, avg loss: 2.569963, ppl: 13.065347 +epoch: 1, batch: 13605, sum loss: 5091.153809, avg loss: 2.942863, ppl: 18.970089 +epoch: 1, batch: 13606, sum loss: 3691.539062, avg loss: 2.625561, ppl: 13.812324 +epoch: 1, batch: 13607, sum loss: 3859.898926, avg loss: 2.444521, ppl: 11.525030 +epoch: 1, batch: 13608, sum loss: 3846.973145, avg loss: 2.466008, ppl: 11.775351 +epoch: 1, batch: 13609, sum loss: 4956.811523, avg loss: 2.725020, ppl: 15.256721 +epoch: 1, batch: 13610, sum loss: 5142.453125, avg loss: 2.715128, ppl: 15.106550 +epoch: 1, batch: 13611, sum loss: 4650.250000, avg loss: 2.791266, ppl: 16.301651 +epoch: 1, batch: 13612, sum loss: 4493.839355, avg loss: 2.670136, ppl: 14.441936 +epoch: 1, batch: 13613, sum loss: 5104.984375, avg loss: 3.008241, ppl: 20.251740 +epoch: 1, batch: 13614, sum loss: 4208.054688, avg loss: 2.633326, ppl: 13.919989 +epoch: 1, batch: 13615, sum loss: 4507.160156, avg loss: 2.571113, ppl: 13.080370 +epoch: 1, batch: 13616, sum loss: 3802.458496, avg loss: 2.590230, ppl: 13.332844 +epoch: 1, batch: 13617, sum loss: 5108.348633, avg loss: 2.762763, ppl: 15.843554 +epoch: 1, batch: 13618, sum loss: 4510.856934, avg loss: 2.554279, ppl: 12.862024 +epoch: 1, batch: 13619, sum loss: 4091.980469, avg loss: 2.779878, ppl: 16.117056 +epoch: 1, batch: 13620, sum loss: 4663.058594, avg loss: 2.780595, ppl: 16.128620 +epoch: 1, batch: 13621, sum loss: 5593.041992, avg loss: 2.956153, ppl: 19.223879 +epoch: 1, batch: 13622, sum loss: 4247.695801, avg loss: 2.671507, ppl: 14.461741 +epoch: 1, batch: 13623, sum loss: 4448.539551, avg loss: 2.971636, ppl: 19.523836 +epoch: 1, batch: 13624, sum loss: 4241.951660, avg loss: 2.529488, ppl: 12.547082 +epoch: 1, batch: 13625, sum loss: 4193.558105, avg loss: 2.596630, ppl: 13.418447 +epoch: 1, batch: 13626, sum loss: 4111.471191, avg loss: 2.717430, ppl: 15.141357 +epoch: 1, batch: 13627, sum loss: 4873.799805, avg loss: 2.941339, ppl: 18.941200 +epoch: 1, batch: 13628, sum loss: 5333.626465, avg loss: 3.124562, ppl: 22.749918 +epoch: 1, batch: 13629, sum loss: 2829.083252, avg loss: 2.276012, ppl: 9.737770 +epoch: 1, batch: 13630, sum loss: 5185.566406, avg loss: 2.981924, ppl: 19.725739 +epoch: 1, batch: 13631, sum loss: 4594.283691, avg loss: 2.827251, ppl: 16.898949 +epoch: 1, batch: 13632, sum loss: 4843.264648, avg loss: 2.832319, ppl: 16.984795 +epoch: 1, batch: 13633, sum loss: 5043.437988, avg loss: 2.752968, ppl: 15.689133 +epoch: 1, batch: 13634, sum loss: 4730.908203, avg loss: 2.688016, ppl: 14.702477 +epoch: 1, batch: 13635, sum loss: 4769.597656, avg loss: 2.593582, ppl: 13.377609 +epoch: 1, batch: 13636, sum loss: 5139.749023, avg loss: 2.785772, ppl: 16.212326 +epoch: 1, batch: 13637, sum loss: 4072.851562, avg loss: 2.644709, ppl: 14.079346 +epoch: 1, batch: 13638, sum loss: 4854.512695, avg loss: 2.804456, ppl: 16.518080 +epoch: 1, batch: 13639, sum loss: 3947.226318, avg loss: 2.531896, ppl: 12.577334 +epoch: 1, batch: 13640, sum loss: 3973.899658, avg loss: 2.553920, ppl: 12.857407 +epoch: 1, batch: 13641, sum loss: 4099.243652, avg loss: 2.601043, ppl: 13.477788 +epoch: 1, batch: 13642, sum loss: 4372.029297, avg loss: 2.693795, ppl: 14.787688 +epoch: 1, batch: 13643, sum loss: 4748.162109, avg loss: 2.563802, ppl: 12.985099 +epoch: 1, batch: 13644, sum loss: 5183.753906, avg loss: 2.953706, ppl: 19.176888 +epoch: 1, batch: 13645, sum loss: 4196.312012, avg loss: 2.671109, ppl: 14.455992 +epoch: 1, batch: 13646, sum loss: 4177.041504, avg loss: 2.556329, ppl: 12.888417 +epoch: 1, batch: 13647, sum loss: 5059.187500, avg loss: 2.975993, ppl: 19.609083 +epoch: 1, batch: 13648, sum loss: 3447.618896, avg loss: 2.535014, ppl: 12.616607 +epoch: 1, batch: 13649, sum loss: 5279.019043, avg loss: 3.182049, ppl: 24.096071 +epoch: 1, batch: 13650, sum loss: 3926.177246, avg loss: 2.572855, ppl: 13.103187 +epoch: 1, batch: 13651, sum loss: 4407.821289, avg loss: 2.558225, ppl: 12.912872 +epoch: 1, batch: 13652, sum loss: 4356.324219, avg loss: 2.621134, ppl: 13.751306 +epoch: 1, batch: 13653, sum loss: 4508.157227, avg loss: 2.648741, ppl: 14.136230 +epoch: 1, batch: 13654, sum loss: 4877.854980, avg loss: 2.626739, ppl: 13.828605 +epoch: 1, batch: 13655, sum loss: 4792.174316, avg loss: 2.696778, ppl: 14.831863 +epoch: 1, batch: 13656, sum loss: 4994.334961, avg loss: 2.837690, ppl: 17.076275 +epoch: 1, batch: 13657, sum loss: 5084.850098, avg loss: 2.820216, ppl: 16.780481 +epoch: 1, batch: 13658, sum loss: 4783.250977, avg loss: 2.859086, ppl: 17.445576 +epoch: 1, batch: 13659, sum loss: 5538.883301, avg loss: 3.068633, ppl: 21.512482 +epoch: 1, batch: 13660, sum loss: 5464.247070, avg loss: 2.789304, ppl: 16.269697 +epoch: 1, batch: 13661, sum loss: 3832.274170, avg loss: 2.289292, ppl: 9.867948 +epoch: 1, batch: 13662, sum loss: 4267.906738, avg loss: 2.697792, ppl: 14.846910 +epoch: 1, batch: 13663, sum loss: 4997.453613, avg loss: 2.771744, ppl: 15.986483 +epoch: 1, batch: 13664, sum loss: 4010.264160, avg loss: 2.698697, ppl: 14.860361 +epoch: 1, batch: 13665, sum loss: 4073.774170, avg loss: 2.844815, ppl: 17.198368 +epoch: 1, batch: 13666, sum loss: 4391.675781, avg loss: 2.583339, ppl: 13.241274 +epoch: 1, batch: 13667, sum loss: 4749.872070, avg loss: 2.731381, ppl: 15.354083 +epoch: 1, batch: 13668, sum loss: 5032.374023, avg loss: 2.822420, ppl: 16.817493 +epoch: 1, batch: 13669, sum loss: 4861.325684, avg loss: 2.874823, ppl: 17.722288 +epoch: 1, batch: 13670, sum loss: 4922.900879, avg loss: 2.800285, ppl: 16.449331 +epoch: 1, batch: 13671, sum loss: 4283.108398, avg loss: 2.731574, ppl: 15.357041 +epoch: 1, batch: 13672, sum loss: 5319.937500, avg loss: 3.237941, ppl: 25.481203 +epoch: 1, batch: 13673, sum loss: 3473.739014, avg loss: 2.604002, ppl: 13.517731 +epoch: 1, batch: 13674, sum loss: 3747.373535, avg loss: 2.591545, ppl: 13.350377 +epoch: 1, batch: 13675, sum loss: 4967.355957, avg loss: 3.108483, ppl: 22.387058 +epoch: 1, batch: 13676, sum loss: 4639.296875, avg loss: 2.543474, ppl: 12.723800 +epoch: 1, batch: 13677, sum loss: 4150.164551, avg loss: 2.691417, ppl: 14.752562 +epoch: 1, batch: 13678, sum loss: 5084.478516, avg loss: 2.821575, ppl: 16.803301 +epoch: 1, batch: 13679, sum loss: 3516.001465, avg loss: 2.277203, ppl: 9.749374 +epoch: 1, batch: 13680, sum loss: 4448.610840, avg loss: 2.665435, ppl: 14.374202 +epoch: 1, batch: 13681, sum loss: 4644.807617, avg loss: 2.600676, ppl: 13.472847 +epoch: 1, batch: 13682, sum loss: 5261.641113, avg loss: 2.706606, ppl: 14.978348 +epoch: 1, batch: 13683, sum loss: 4599.081543, avg loss: 2.610149, ppl: 13.601072 +epoch: 1, batch: 13684, sum loss: 3823.182129, avg loss: 2.569343, ppl: 13.057241 +epoch: 1, batch: 13685, sum loss: 4692.868164, avg loss: 2.687783, ppl: 14.699045 +epoch: 1, batch: 13686, sum loss: 4446.453125, avg loss: 2.529268, ppl: 12.544321 +epoch: 1, batch: 13687, sum loss: 3934.224609, avg loss: 2.566357, ppl: 13.018308 +epoch: 1, batch: 13688, sum loss: 3963.900635, avg loss: 2.795417, ppl: 16.369446 +epoch: 1, batch: 13689, sum loss: 4462.088867, avg loss: 2.701022, ppl: 14.894953 +epoch: 1, batch: 13690, sum loss: 5450.426758, avg loss: 2.828452, ppl: 16.919247 +epoch: 1, batch: 13691, sum loss: 3893.625000, avg loss: 2.630828, ppl: 13.885258 +epoch: 1, batch: 13692, sum loss: 4722.502930, avg loss: 2.677156, ppl: 14.543672 +epoch: 1, batch: 13693, sum loss: 4136.468262, avg loss: 2.389641, ppl: 10.909575 +epoch: 1, batch: 13694, sum loss: 5074.336914, avg loss: 2.833242, ppl: 17.000490 +epoch: 1, batch: 13695, sum loss: 4634.423828, avg loss: 2.689741, ppl: 14.727863 +epoch: 1, batch: 13696, sum loss: 4321.036621, avg loss: 2.642836, ppl: 14.052999 +epoch: 1, batch: 13697, sum loss: 4810.894531, avg loss: 2.811744, ppl: 16.638918 +epoch: 1, batch: 13698, sum loss: 3934.766113, avg loss: 2.503032, ppl: 12.219487 +epoch: 1, batch: 13699, sum loss: 5285.735352, avg loss: 2.861795, ppl: 17.492901 +epoch: 1, batch: 13700, sum loss: 4286.260254, avg loss: 2.774279, ppl: 16.027061 +epoch: 1, batch: 13701, sum loss: 4595.978027, avg loss: 2.896017, ppl: 18.101894 +epoch: 1, batch: 13702, sum loss: 4154.167969, avg loss: 2.409610, ppl: 11.129622 +epoch: 1, batch: 13703, sum loss: 4601.544922, avg loss: 2.757067, ppl: 15.753573 +epoch: 1, batch: 13704, sum loss: 4505.673340, avg loss: 2.672404, ppl: 14.474729 +epoch: 1, batch: 13705, sum loss: 3998.590332, avg loss: 2.641077, ppl: 14.028301 +epoch: 1, batch: 13706, sum loss: 4952.386719, avg loss: 2.725584, ppl: 15.265330 +epoch: 1, batch: 13707, sum loss: 3659.042236, avg loss: 2.626735, ppl: 13.828548 +epoch: 1, batch: 13708, sum loss: 3986.433105, avg loss: 2.553769, ppl: 12.855463 +epoch: 1, batch: 13709, sum loss: 4097.468750, avg loss: 2.800731, ppl: 16.456667 +epoch: 1, batch: 13710, sum loss: 4740.616699, avg loss: 2.664765, ppl: 14.364572 +epoch: 1, batch: 13711, sum loss: 3825.138184, avg loss: 2.505002, ppl: 12.243584 +epoch: 1, batch: 13712, sum loss: 4752.995605, avg loss: 2.846105, ppl: 17.220579 +epoch: 1, batch: 13713, sum loss: 4311.255859, avg loss: 2.932827, ppl: 18.780653 +epoch: 1, batch: 13714, sum loss: 4349.561035, avg loss: 2.703270, ppl: 14.928464 +epoch: 1, batch: 13715, sum loss: 5115.996094, avg loss: 2.981350, ppl: 19.714407 +epoch: 1, batch: 13716, sum loss: 4340.560547, avg loss: 3.033236, ppl: 20.764313 +epoch: 1, batch: 13717, sum loss: 4749.453125, avg loss: 2.847394, ppl: 17.242788 +epoch: 1, batch: 13718, sum loss: 4537.583496, avg loss: 2.661339, ppl: 14.315449 +epoch: 1, batch: 13719, sum loss: 3923.844482, avg loss: 2.561256, ppl: 12.952077 +epoch: 1, batch: 13720, sum loss: 4202.725098, avg loss: 2.906449, ppl: 18.291731 +epoch: 1, batch: 13721, sum loss: 4187.145996, avg loss: 2.418917, ppl: 11.233691 +epoch: 1, batch: 13722, sum loss: 4484.104980, avg loss: 2.715993, ppl: 15.119622 +epoch: 1, batch: 13723, sum loss: 4128.209961, avg loss: 2.452888, ppl: 11.621857 +epoch: 1, batch: 13724, sum loss: 4906.083984, avg loss: 2.754679, ppl: 15.716002 +epoch: 1, batch: 13725, sum loss: 4979.635254, avg loss: 2.883402, ppl: 17.874983 +epoch: 1, batch: 13726, sum loss: 5180.291016, avg loss: 2.874745, ppl: 17.720911 +epoch: 1, batch: 13727, sum loss: 4131.880371, avg loss: 2.514839, ppl: 12.364617 +epoch: 1, batch: 13728, sum loss: 4562.423340, avg loss: 2.712499, ppl: 15.066879 +epoch: 1, batch: 13729, sum loss: 4246.667969, avg loss: 2.360571, ppl: 10.597005 +epoch: 1, batch: 13730, sum loss: 5027.157227, avg loss: 2.945025, ppl: 19.011133 +epoch: 1, batch: 13731, sum loss: 4199.247070, avg loss: 2.569919, ppl: 13.064761 +epoch: 1, batch: 13732, sum loss: 3912.658447, avg loss: 2.658056, ppl: 14.268524 +epoch: 1, batch: 13733, sum loss: 4461.752441, avg loss: 2.822108, ppl: 16.812250 +epoch: 1, batch: 13734, sum loss: 4836.044922, avg loss: 2.763454, ppl: 15.854513 +epoch: 1, batch: 13735, sum loss: 3860.351074, avg loss: 2.590840, ppl: 13.340972 +epoch: 1, batch: 13736, sum loss: 3951.527832, avg loss: 2.463546, ppl: 11.746391 +epoch: 1, batch: 13737, sum loss: 3963.982178, avg loss: 2.544276, ppl: 12.734009 +epoch: 1, batch: 13738, sum loss: 3991.566895, avg loss: 2.479234, ppl: 11.932120 +epoch: 1, batch: 13739, sum loss: 4170.023926, avg loss: 2.373377, ppl: 10.733582 +epoch: 1, batch: 13740, sum loss: 4615.591309, avg loss: 2.941741, ppl: 18.948803 +epoch: 1, batch: 13741, sum loss: 3472.854492, avg loss: 2.367317, ppl: 10.668732 +epoch: 1, batch: 13742, sum loss: 5047.956055, avg loss: 2.787386, ppl: 16.238520 +epoch: 1, batch: 13743, sum loss: 5019.696289, avg loss: 2.826406, ppl: 16.884661 +epoch: 1, batch: 13744, sum loss: 4028.969238, avg loss: 2.602693, ppl: 13.500049 +epoch: 1, batch: 13745, sum loss: 4067.350342, avg loss: 2.731599, ppl: 15.357418 +epoch: 1, batch: 13746, sum loss: 4756.936035, avg loss: 2.626690, ppl: 13.827929 +epoch: 1, batch: 13747, sum loss: 4897.348145, avg loss: 2.670310, ppl: 14.444443 +epoch: 1, batch: 13748, sum loss: 5667.153809, avg loss: 2.888458, ppl: 17.965576 +epoch: 1, batch: 13749, sum loss: 4827.583984, avg loss: 2.828110, ppl: 16.913467 +epoch: 1, batch: 13750, sum loss: 4188.100098, avg loss: 2.497376, ppl: 12.150572 +epoch: 1, batch: 13751, sum loss: 4149.836426, avg loss: 2.712312, ppl: 15.064056 +epoch: 1, batch: 13752, sum loss: 4503.173828, avg loss: 2.864614, ppl: 17.542290 +epoch: 1, batch: 13753, sum loss: 4293.700195, avg loss: 2.655350, ppl: 14.229961 +epoch: 1, batch: 13754, sum loss: 3987.432129, avg loss: 2.656517, ppl: 14.246582 +epoch: 1, batch: 13755, sum loss: 3939.034424, avg loss: 2.442055, ppl: 11.496639 +epoch: 1, batch: 13756, sum loss: 4354.469727, avg loss: 2.768258, ppl: 15.930856 +epoch: 1, batch: 13757, sum loss: 5070.431641, avg loss: 2.650513, ppl: 14.161304 +epoch: 1, batch: 13758, sum loss: 5056.911133, avg loss: 2.766363, ppl: 15.900697 +epoch: 1, batch: 13759, sum loss: 5435.758789, avg loss: 3.115048, ppl: 22.534510 +epoch: 1, batch: 13760, sum loss: 4291.795898, avg loss: 2.730150, ppl: 15.335187 +epoch: 1, batch: 13761, sum loss: 3739.630615, avg loss: 2.600578, ppl: 13.471517 +epoch: 1, batch: 13762, sum loss: 4785.025391, avg loss: 2.785230, ppl: 16.203547 +epoch: 1, batch: 13763, sum loss: 5179.518066, avg loss: 2.747755, ppl: 15.607554 +epoch: 1, batch: 13764, sum loss: 4087.515625, avg loss: 2.758108, ppl: 15.769977 +epoch: 1, batch: 13765, sum loss: 5048.015137, avg loss: 2.761496, ppl: 15.823502 +epoch: 1, batch: 13766, sum loss: 4253.418457, avg loss: 2.562300, ppl: 12.965610 +epoch: 1, batch: 13767, sum loss: 4388.639160, avg loss: 2.586116, ppl: 13.278103 +epoch: 1, batch: 13768, sum loss: 5276.778809, avg loss: 3.032631, ppl: 20.751766 +epoch: 1, batch: 13769, sum loss: 4711.952637, avg loss: 2.850546, ppl: 17.297222 +epoch: 1, batch: 13770, sum loss: 4545.787109, avg loss: 2.802581, ppl: 16.487150 +epoch: 1, batch: 13771, sum loss: 4034.609619, avg loss: 2.314750, ppl: 10.122394 +epoch: 1, batch: 13772, sum loss: 5263.514648, avg loss: 2.973737, ppl: 19.564901 +epoch: 1, batch: 13773, sum loss: 4829.772461, avg loss: 2.731772, ppl: 15.360080 +epoch: 1, batch: 13774, sum loss: 4523.276855, avg loss: 2.602576, ppl: 13.498463 +epoch: 1, batch: 13775, sum loss: 4151.951660, avg loss: 2.644555, ppl: 14.077184 +epoch: 1, batch: 13776, sum loss: 4178.895508, avg loss: 2.727739, ppl: 15.298253 +epoch: 1, batch: 13777, sum loss: 5145.421387, avg loss: 3.046431, ppl: 21.040115 +epoch: 1, batch: 13778, sum loss: 4066.065674, avg loss: 2.506822, ppl: 12.265888 +epoch: 1, batch: 13779, sum loss: 5254.916016, avg loss: 3.039281, ppl: 20.890211 +epoch: 1, batch: 13780, sum loss: 4872.583008, avg loss: 2.819782, ppl: 16.773190 +epoch: 1, batch: 13781, sum loss: 5000.219727, avg loss: 2.878653, ppl: 17.790291 +epoch: 1, batch: 13782, sum loss: 4305.625977, avg loss: 2.467407, ppl: 11.791837 +epoch: 1, batch: 13783, sum loss: 3907.589844, avg loss: 2.465356, ppl: 11.767674 +epoch: 1, batch: 13784, sum loss: 3947.270996, avg loss: 2.807447, ppl: 16.567574 +epoch: 1, batch: 13785, sum loss: 5646.092285, avg loss: 3.102249, ppl: 22.247923 +epoch: 1, batch: 13786, sum loss: 5428.152344, avg loss: 2.959734, ppl: 19.292839 +epoch: 1, batch: 13787, sum loss: 4819.820801, avg loss: 2.815316, ppl: 16.698446 +epoch: 1, batch: 13788, sum loss: 4595.155273, avg loss: 2.720637, ppl: 15.189989 +epoch: 1, batch: 13789, sum loss: 5227.329590, avg loss: 2.744005, ppl: 15.549134 +epoch: 1, batch: 13790, sum loss: 5372.399902, avg loss: 2.835040, ppl: 17.031075 +epoch: 1, batch: 13791, sum loss: 5193.049805, avg loss: 2.669949, ppl: 14.439227 +epoch: 1, batch: 13792, sum loss: 4324.062988, avg loss: 2.721248, ppl: 15.199278 +epoch: 1, batch: 13793, sum loss: 4541.881836, avg loss: 2.953109, ppl: 19.165447 +epoch: 1, batch: 13794, sum loss: 5252.833496, avg loss: 2.836303, ppl: 17.052605 +epoch: 1, batch: 13795, sum loss: 6006.372559, avg loss: 3.088109, ppl: 21.935570 +epoch: 1, batch: 13796, sum loss: 5551.075195, avg loss: 2.874715, ppl: 17.720379 +epoch: 1, batch: 13797, sum loss: 4078.587891, avg loss: 2.621200, ppl: 13.752221 +epoch: 1, batch: 13798, sum loss: 3695.797119, avg loss: 2.656935, ppl: 14.252544 +epoch: 1, batch: 13799, sum loss: 4157.674805, avg loss: 2.609965, ppl: 13.598579 +epoch: 1, batch: 13800, sum loss: 4044.174805, avg loss: 2.701519, ppl: 14.902358 +epoch: 1, batch: 13801, sum loss: 4586.112793, avg loss: 2.742891, ppl: 15.531816 +epoch: 1, batch: 13802, sum loss: 4909.642578, avg loss: 2.833031, ppl: 16.996895 +epoch: 1, batch: 13803, sum loss: 4616.955566, avg loss: 2.738408, ppl: 15.462347 +epoch: 1, batch: 13804, sum loss: 6096.252930, avg loss: 3.068069, ppl: 21.500340 +epoch: 1, batch: 13805, sum loss: 4657.253906, avg loss: 2.709281, ppl: 15.018473 +epoch: 1, batch: 13806, sum loss: 3677.113770, avg loss: 2.584057, ppl: 13.250793 +epoch: 1, batch: 13807, sum loss: 4754.414062, avg loss: 2.769024, ppl: 15.943065 +epoch: 1, batch: 13808, sum loss: 4251.732910, avg loss: 2.541383, ppl: 12.697213 +epoch: 1, batch: 13809, sum loss: 4546.660645, avg loss: 2.803120, ppl: 16.496033 +epoch: 1, batch: 13810, sum loss: 3962.091064, avg loss: 2.659122, ppl: 14.283735 +epoch: 1, batch: 13811, sum loss: 4819.541992, avg loss: 2.942333, ppl: 18.960033 +epoch: 1, batch: 13812, sum loss: 4364.173828, avg loss: 2.486709, ppl: 12.021644 +epoch: 1, batch: 13813, sum loss: 4593.399414, avg loss: 2.678367, ppl: 14.561294 +epoch: 1, batch: 13814, sum loss: 3847.922119, avg loss: 2.518274, ppl: 12.407158 +epoch: 1, batch: 13815, sum loss: 4396.280273, avg loss: 2.475383, ppl: 11.886259 +epoch: 1, batch: 13816, sum loss: 3773.212402, avg loss: 2.607610, ppl: 13.566594 +epoch: 1, batch: 13817, sum loss: 4743.795898, avg loss: 2.677086, ppl: 14.542652 +epoch: 1, batch: 13818, sum loss: 5171.499023, avg loss: 2.672609, ppl: 14.477694 +epoch: 1, batch: 13819, sum loss: 4428.428223, avg loss: 2.617274, ppl: 13.698335 +epoch: 1, batch: 13820, sum loss: 4426.911621, avg loss: 2.702632, ppl: 14.918949 +epoch: 1, batch: 13821, sum loss: 4977.024902, avg loss: 2.613984, ppl: 13.653336 +epoch: 1, batch: 13822, sum loss: 4038.563232, avg loss: 2.522526, ppl: 12.460026 +epoch: 1, batch: 13823, sum loss: 4503.710938, avg loss: 2.764709, ppl: 15.874420 +epoch: 1, batch: 13824, sum loss: 4490.102539, avg loss: 2.573125, ppl: 13.106714 +epoch: 1, batch: 13825, sum loss: 4272.937012, avg loss: 2.835393, ppl: 17.037094 +epoch: 1, batch: 13826, sum loss: 4514.435547, avg loss: 2.682374, ppl: 14.619760 +epoch: 1, batch: 13827, sum loss: 5596.962891, avg loss: 3.060122, ppl: 21.330160 +epoch: 1, batch: 13828, sum loss: 3563.717773, avg loss: 2.427601, ppl: 11.331660 +epoch: 1, batch: 13829, sum loss: 4987.051270, avg loss: 2.782953, ppl: 16.166687 +epoch: 1, batch: 13830, sum loss: 3951.892090, avg loss: 2.202838, ppl: 9.050667 +epoch: 1, batch: 13831, sum loss: 5043.210449, avg loss: 2.910104, ppl: 18.358709 +epoch: 1, batch: 13832, sum loss: 4788.415039, avg loss: 2.825024, ppl: 16.861343 +epoch: 1, batch: 13833, sum loss: 4239.565430, avg loss: 2.567878, ppl: 13.038122 +epoch: 1, batch: 13834, sum loss: 3889.054199, avg loss: 2.674728, ppl: 14.508402 +epoch: 1, batch: 13835, sum loss: 4054.839844, avg loss: 2.550214, ppl: 12.809842 +epoch: 1, batch: 13836, sum loss: 4098.564941, avg loss: 2.590749, ppl: 13.339757 +epoch: 1, batch: 13837, sum loss: 5330.396973, avg loss: 3.119015, ppl: 22.624088 +epoch: 1, batch: 13838, sum loss: 5106.166992, avg loss: 2.914479, ppl: 18.439203 +epoch: 1, batch: 13839, sum loss: 5366.335938, avg loss: 2.969748, ppl: 19.487019 +epoch: 1, batch: 13840, sum loss: 5049.706055, avg loss: 2.979178, ppl: 19.671635 +epoch: 1, batch: 13841, sum loss: 4643.050781, avg loss: 2.724795, ppl: 15.253288 +epoch: 1, batch: 13842, sum loss: 4799.167480, avg loss: 2.702234, ppl: 14.913011 +epoch: 1, batch: 13843, sum loss: 5740.844238, avg loss: 3.143945, ppl: 23.195202 +epoch: 1, batch: 13844, sum loss: 4536.901367, avg loss: 2.427449, ppl: 11.329937 +epoch: 1, batch: 13845, sum loss: 4508.248047, avg loss: 2.812382, ppl: 16.649527 +epoch: 1, batch: 13846, sum loss: 5441.601074, avg loss: 2.963835, ppl: 19.372122 +epoch: 1, batch: 13847, sum loss: 4474.550781, avg loss: 2.803603, ppl: 16.504007 +epoch: 1, batch: 13848, sum loss: 4138.973633, avg loss: 2.522226, ppl: 12.456298 +epoch: 1, batch: 13849, sum loss: 5103.509766, avg loss: 2.686058, ppl: 14.673718 +epoch: 1, batch: 13850, sum loss: 4176.479492, avg loss: 2.706727, ppl: 14.980165 +epoch: 1, batch: 13851, sum loss: 4966.987305, avg loss: 2.861168, ppl: 17.481932 +epoch: 1, batch: 13852, sum loss: 3778.143311, avg loss: 2.386698, ppl: 10.877520 +epoch: 1, batch: 13853, sum loss: 4194.644531, avg loss: 2.714980, ppl: 15.104314 +epoch: 1, batch: 13854, sum loss: 4251.864258, avg loss: 2.589442, ppl: 13.322342 +epoch: 1, batch: 13855, sum loss: 3645.608154, avg loss: 2.440166, ppl: 11.474945 +epoch: 1, batch: 13856, sum loss: 4189.975098, avg loss: 2.618734, ppl: 13.718350 +epoch: 1, batch: 13857, sum loss: 4341.248047, avg loss: 2.735506, ppl: 15.417543 +epoch: 1, batch: 13858, sum loss: 3541.323730, avg loss: 2.325229, ppl: 10.229021 +epoch: 1, batch: 13859, sum loss: 5266.074219, avg loss: 2.846527, ppl: 17.227839 +epoch: 1, batch: 13860, sum loss: 6644.092285, avg loss: 2.916634, ppl: 18.478985 +epoch: 1, batch: 13861, sum loss: 4863.135254, avg loss: 2.886134, ppl: 17.923876 +epoch: 1, batch: 13862, sum loss: 5321.184082, avg loss: 3.070504, ppl: 21.552767 +epoch: 1, batch: 13863, sum loss: 4855.067383, avg loss: 2.724505, ppl: 15.248859 +epoch: 1, batch: 13864, sum loss: 3955.514160, avg loss: 2.340541, ppl: 10.386853 +epoch: 1, batch: 13865, sum loss: 5642.215820, avg loss: 2.838137, ppl: 17.083902 +epoch: 1, batch: 13866, sum loss: 4326.850098, avg loss: 2.859782, ppl: 17.457720 +epoch: 1, batch: 13867, sum loss: 4521.853516, avg loss: 2.867377, ppl: 17.590818 +epoch: 1, batch: 13868, sum loss: 4375.704590, avg loss: 2.418853, ppl: 11.232965 +epoch: 1, batch: 13869, sum loss: 4033.351807, avg loss: 2.681750, ppl: 14.610637 +epoch: 1, batch: 13870, sum loss: 3692.843750, avg loss: 2.510431, ppl: 12.310232 +epoch: 1, batch: 13871, sum loss: 5355.789062, avg loss: 2.788022, ppl: 16.248840 +epoch: 1, batch: 13872, sum loss: 4052.904053, avg loss: 2.576544, ppl: 13.151608 +epoch: 1, batch: 13873, sum loss: 4724.321777, avg loss: 2.834026, ppl: 17.013823 +epoch: 1, batch: 13874, sum loss: 5171.573242, avg loss: 2.973878, ppl: 19.567650 +epoch: 1, batch: 13875, sum loss: 4483.843262, avg loss: 2.797157, ppl: 16.397966 +epoch: 1, batch: 13876, sum loss: 4078.751465, avg loss: 2.549220, ppl: 12.797113 +epoch: 1, batch: 13877, sum loss: 4458.595215, avg loss: 2.613479, ppl: 13.646443 +epoch: 1, batch: 13878, sum loss: 4405.910156, avg loss: 2.723059, ppl: 15.226836 +epoch: 1, batch: 13879, sum loss: 4247.815918, avg loss: 2.590132, ppl: 13.331528 +epoch: 1, batch: 13880, sum loss: 4962.523438, avg loss: 2.641045, ppl: 14.027857 +epoch: 1, batch: 13881, sum loss: 4451.773438, avg loss: 2.900178, ppl: 18.177383 +epoch: 1, batch: 13882, sum loss: 4732.033203, avg loss: 2.741618, ppl: 15.512066 +epoch: 1, batch: 13883, sum loss: 4003.876221, avg loss: 2.545376, ppl: 12.748018 +epoch: 1, batch: 13884, sum loss: 4355.081543, avg loss: 2.649076, ppl: 14.140973 +epoch: 1, batch: 13885, sum loss: 4967.474609, avg loss: 2.951559, ppl: 19.135769 +epoch: 1, batch: 13886, sum loss: 4505.571289, avg loss: 2.544083, ppl: 12.731549 +epoch: 1, batch: 13887, sum loss: 4753.661133, avg loss: 2.889764, ppl: 17.989056 +epoch: 1, batch: 13888, sum loss: 4207.210938, avg loss: 2.678046, ppl: 14.556628 +epoch: 1, batch: 13889, sum loss: 5125.029785, avg loss: 2.681858, ppl: 14.612211 +epoch: 1, batch: 13890, sum loss: 4117.048828, avg loss: 2.492160, ppl: 12.087360 +epoch: 1, batch: 13891, sum loss: 5136.921875, avg loss: 2.925354, ppl: 18.640829 +epoch: 1, batch: 13892, sum loss: 5093.257812, avg loss: 2.834312, ppl: 17.018682 +epoch: 1, batch: 13893, sum loss: 4189.824707, avg loss: 2.573602, ppl: 13.112978 +epoch: 1, batch: 13894, sum loss: 4481.452637, avg loss: 2.513434, ppl: 12.347257 +epoch: 1, batch: 13895, sum loss: 3266.616211, avg loss: 2.348394, ppl: 10.468745 +epoch: 1, batch: 13896, sum loss: 6009.500977, avg loss: 2.979425, ppl: 19.676508 +epoch: 1, batch: 13897, sum loss: 4071.455566, avg loss: 2.522587, ppl: 12.460792 +epoch: 1, batch: 13898, sum loss: 3880.747803, avg loss: 2.816218, ppl: 16.713514 +epoch: 1, batch: 13899, sum loss: 4297.606934, avg loss: 2.718284, ppl: 15.154293 +epoch: 1, batch: 13900, sum loss: 4694.601562, avg loss: 2.726249, ppl: 15.275488 +epoch: 1, batch: 13901, sum loss: 4015.232910, avg loss: 2.377284, ppl: 10.775599 +epoch: 1, batch: 13902, sum loss: 3988.569824, avg loss: 2.595036, ppl: 13.397067 +epoch: 1, batch: 13903, sum loss: 4093.577148, avg loss: 2.574577, ppl: 13.125762 +epoch: 1, batch: 13904, sum loss: 4499.498047, avg loss: 2.728622, ppl: 15.311776 +epoch: 1, batch: 13905, sum loss: 4190.722656, avg loss: 2.574154, ppl: 13.120212 +epoch: 1, batch: 13906, sum loss: 4787.113770, avg loss: 2.825923, ppl: 16.876514 +epoch: 1, batch: 13907, sum loss: 4255.024902, avg loss: 2.676116, ppl: 14.528558 +epoch: 1, batch: 13908, sum loss: 4844.635742, avg loss: 2.685497, ppl: 14.665482 +epoch: 1, batch: 13909, sum loss: 4507.375000, avg loss: 2.772063, ppl: 15.991594 +epoch: 1, batch: 13910, sum loss: 4813.657715, avg loss: 2.761708, ppl: 15.826857 +epoch: 1, batch: 13911, sum loss: 4076.331787, avg loss: 2.653862, ppl: 14.208807 +epoch: 1, batch: 13912, sum loss: 3992.598145, avg loss: 2.565937, ppl: 13.012846 +epoch: 1, batch: 13913, sum loss: 4982.125977, avg loss: 2.881507, ppl: 17.841146 +epoch: 1, batch: 13914, sum loss: 6003.270996, avg loss: 2.952912, ppl: 19.161678 +epoch: 1, batch: 13915, sum loss: 3041.384766, avg loss: 2.274783, ppl: 9.725807 +epoch: 1, batch: 13916, sum loss: 3436.343750, avg loss: 2.607241, ppl: 13.561584 +epoch: 1, batch: 13917, sum loss: 4603.929688, avg loss: 2.720999, ppl: 15.195488 +epoch: 1, batch: 13918, sum loss: 4138.715820, avg loss: 2.647931, ppl: 14.124783 +epoch: 1, batch: 13919, sum loss: 3880.107666, avg loss: 2.537677, ppl: 12.650248 +epoch: 1, batch: 13920, sum loss: 4396.291016, avg loss: 2.656369, ppl: 14.244476 +epoch: 1, batch: 13921, sum loss: 4806.527344, avg loss: 2.654074, ppl: 14.211816 +epoch: 1, batch: 13922, sum loss: 4274.242188, avg loss: 2.862855, ppl: 17.511446 +epoch: 1, batch: 13923, sum loss: 5033.010254, avg loss: 2.988724, ppl: 19.860315 +epoch: 1, batch: 13924, sum loss: 4632.520508, avg loss: 2.882714, ppl: 17.862679 +epoch: 1, batch: 13925, sum loss: 4661.102539, avg loss: 2.791080, ppl: 16.298605 +epoch: 1, batch: 13926, sum loss: 5372.279297, avg loss: 2.826028, ppl: 16.878288 +epoch: 1, batch: 13927, sum loss: 4485.186523, avg loss: 2.813793, ppl: 16.673042 +epoch: 1, batch: 13928, sum loss: 4039.857666, avg loss: 2.650825, ppl: 14.165724 +epoch: 1, batch: 13929, sum loss: 4070.534180, avg loss: 2.657007, ppl: 14.253561 +epoch: 1, batch: 13930, sum loss: 4489.328125, avg loss: 2.673811, ppl: 14.495101 +epoch: 1, batch: 13931, sum loss: 4722.261719, avg loss: 2.699978, ppl: 14.879406 +epoch: 1, batch: 13932, sum loss: 4170.630859, avg loss: 2.424785, ppl: 11.299804 +epoch: 1, batch: 13933, sum loss: 4094.753662, avg loss: 2.443171, ppl: 11.509475 +epoch: 1, batch: 13934, sum loss: 4457.234375, avg loss: 2.805056, ppl: 16.528008 +epoch: 1, batch: 13935, sum loss: 5366.986328, avg loss: 2.835175, ppl: 17.033377 +epoch: 1, batch: 13936, sum loss: 4044.598145, avg loss: 2.725470, ppl: 15.263591 +epoch: 1, batch: 13937, sum loss: 5029.376953, avg loss: 2.662455, ppl: 14.331428 +epoch: 1, batch: 13938, sum loss: 4038.171387, avg loss: 2.591894, ppl: 13.355047 +epoch: 1, batch: 13939, sum loss: 4549.791992, avg loss: 2.654488, ppl: 14.217703 +epoch: 1, batch: 13940, sum loss: 4124.727539, avg loss: 2.590909, ppl: 13.341897 +epoch: 1, batch: 13941, sum loss: 4769.725098, avg loss: 2.714698, ppl: 15.100054 +epoch: 1, batch: 13942, sum loss: 4470.252930, avg loss: 2.579488, ppl: 13.190384 +epoch: 1, batch: 13943, sum loss: 4249.789551, avg loss: 2.859885, ppl: 17.459522 +epoch: 1, batch: 13944, sum loss: 3698.467285, avg loss: 2.711486, ppl: 15.051631 +epoch: 1, batch: 13945, sum loss: 4231.024902, avg loss: 2.564258, ppl: 12.991011 +epoch: 1, batch: 13946, sum loss: 3851.678467, avg loss: 2.483352, ppl: 11.981356 +epoch: 1, batch: 13947, sum loss: 4921.779297, avg loss: 2.755756, ppl: 15.732925 +epoch: 1, batch: 13948, sum loss: 5043.219727, avg loss: 2.826917, ppl: 16.893297 +epoch: 1, batch: 13949, sum loss: 4227.138672, avg loss: 2.646925, ppl: 14.110579 +epoch: 1, batch: 13950, sum loss: 4695.776367, avg loss: 2.849379, ppl: 17.277046 +epoch: 1, batch: 13951, sum loss: 4238.275879, avg loss: 2.614606, ppl: 13.661828 +epoch: 1, batch: 13952, sum loss: 3865.456543, avg loss: 2.703117, ppl: 14.926179 +epoch: 1, batch: 13953, sum loss: 4705.736328, avg loss: 2.690530, ppl: 14.739480 +epoch: 1, batch: 13954, sum loss: 4824.257812, avg loss: 2.793432, ppl: 16.337000 +epoch: 1, batch: 13955, sum loss: 4016.688721, avg loss: 2.560031, ppl: 12.936217 +epoch: 1, batch: 13956, sum loss: 3995.472656, avg loss: 2.616550, ppl: 13.688423 +epoch: 1, batch: 13957, sum loss: 5522.266602, avg loss: 2.980176, ppl: 19.691286 +epoch: 1, batch: 13958, sum loss: 3446.801270, avg loss: 2.273616, ppl: 9.714461 +epoch: 1, batch: 13959, sum loss: 5048.251465, avg loss: 2.888016, ppl: 17.957645 +epoch: 1, batch: 13960, sum loss: 4395.721191, avg loss: 2.629020, ppl: 13.860180 +epoch: 1, batch: 13961, sum loss: 5156.765137, avg loss: 2.887327, ppl: 17.945272 +epoch: 1, batch: 13962, sum loss: 6100.777832, avg loss: 3.070346, ppl: 21.549360 +epoch: 1, batch: 13963, sum loss: 4866.655273, avg loss: 2.703697, ppl: 14.934851 +epoch: 1, batch: 13964, sum loss: 5459.291016, avg loss: 2.994674, ppl: 19.978849 +epoch: 1, batch: 13965, sum loss: 4721.947266, avg loss: 2.605931, ppl: 13.543829 +epoch: 1, batch: 13966, sum loss: 5175.459961, avg loss: 2.902670, ppl: 18.222729 +epoch: 1, batch: 13967, sum loss: 4869.286133, avg loss: 2.780860, ppl: 16.132893 +epoch: 1, batch: 13968, sum loss: 4964.161621, avg loss: 2.674656, ppl: 14.507360 +epoch: 1, batch: 13969, sum loss: 4405.482910, avg loss: 2.724479, ppl: 15.248474 +epoch: 1, batch: 13970, sum loss: 4267.496582, avg loss: 2.690729, ppl: 14.742421 +epoch: 1, batch: 13971, sum loss: 4631.746582, avg loss: 2.795260, ppl: 16.366890 +epoch: 1, batch: 13972, sum loss: 3663.164551, avg loss: 2.624043, ppl: 13.791376 +epoch: 1, batch: 13973, sum loss: 3823.439941, avg loss: 2.418368, ppl: 11.227522 +epoch: 1, batch: 13974, sum loss: 4724.978516, avg loss: 2.893435, ppl: 18.055214 +epoch: 1, batch: 13975, sum loss: 4509.937012, avg loss: 2.816950, ppl: 16.725761 +epoch: 1, batch: 13976, sum loss: 4794.992676, avg loss: 2.833920, ppl: 17.012018 +epoch: 1, batch: 13977, sum loss: 5122.099121, avg loss: 2.948819, ppl: 19.083410 +epoch: 1, batch: 13978, sum loss: 4310.606934, avg loss: 2.772095, ppl: 15.992098 +epoch: 1, batch: 13979, sum loss: 4848.020996, avg loss: 2.737448, ppl: 15.447516 +epoch: 1, batch: 13980, sum loss: 4667.178711, avg loss: 2.696233, ppl: 14.823779 +epoch: 1, batch: 13981, sum loss: 4390.891113, avg loss: 2.723878, ppl: 15.239305 +epoch: 1, batch: 13982, sum loss: 4171.670898, avg loss: 2.438148, ppl: 11.451813 +epoch: 1, batch: 13983, sum loss: 4114.472656, avg loss: 2.505769, ppl: 12.252978 +epoch: 1, batch: 13984, sum loss: 5419.555664, avg loss: 2.921593, ppl: 18.570856 +epoch: 1, batch: 13985, sum loss: 4180.438477, avg loss: 2.718100, ppl: 15.151512 +epoch: 1, batch: 13986, sum loss: 4448.443359, avg loss: 2.742567, ppl: 15.526789 +epoch: 1, batch: 13987, sum loss: 4981.931152, avg loss: 2.848445, ppl: 17.260927 +epoch: 1, batch: 13988, sum loss: 4831.816406, avg loss: 2.711457, ppl: 15.051193 +epoch: 1, batch: 13989, sum loss: 4450.548828, avg loss: 2.679439, ppl: 14.576918 +epoch: 1, batch: 13990, sum loss: 3577.876221, avg loss: 2.592664, ppl: 13.365330 +epoch: 1, batch: 13991, sum loss: 5070.768066, avg loss: 2.757351, ppl: 15.758043 +epoch: 1, batch: 13992, sum loss: 5631.463867, avg loss: 3.095912, ppl: 22.107391 +epoch: 1, batch: 13993, sum loss: 4424.785645, avg loss: 2.758595, ppl: 15.777656 +epoch: 1, batch: 13994, sum loss: 3904.935547, avg loss: 2.391265, ppl: 10.927307 +epoch: 1, batch: 13995, sum loss: 4644.003906, avg loss: 2.882684, ppl: 17.862156 +epoch: 1, batch: 13996, sum loss: 4487.049805, avg loss: 2.455966, ppl: 11.657689 +epoch: 1, batch: 13997, sum loss: 4056.381836, avg loss: 2.622096, ppl: 13.764538 +epoch: 1, batch: 13998, sum loss: 3905.038818, avg loss: 2.658297, ppl: 14.271968 +epoch: 1, batch: 13999, sum loss: 3451.999023, avg loss: 2.420757, ppl: 11.254371 +epoch: 1, batch: 14000, sum loss: 3340.696777, avg loss: 2.251143, ppl: 9.498589 +epoch: 1, batch: 14001, sum loss: 4436.305176, avg loss: 2.728355, ppl: 15.307684 +epoch: 1, batch: 14002, sum loss: 5724.083008, avg loss: 2.996902, ppl: 20.023413 +epoch: 1, batch: 14003, sum loss: 5085.540527, avg loss: 2.725370, ppl: 15.262059 +epoch: 1, batch: 14004, sum loss: 4119.881348, avg loss: 2.825707, ppl: 16.872877 +epoch: 1, batch: 14005, sum loss: 4967.916016, avg loss: 2.729624, ppl: 15.327127 +epoch: 1, batch: 14006, sum loss: 5378.242188, avg loss: 2.904018, ppl: 18.247324 +epoch: 1, batch: 14007, sum loss: 4994.149414, avg loss: 2.747057, ppl: 15.596663 +epoch: 1, batch: 14008, sum loss: 5835.297363, avg loss: 3.012544, ppl: 20.339069 +epoch: 1, batch: 14009, sum loss: 4607.854980, avg loss: 2.779165, ppl: 16.105564 +epoch: 1, batch: 14010, sum loss: 5704.434570, avg loss: 2.919363, ppl: 18.529472 +epoch: 1, batch: 14011, sum loss: 4829.237305, avg loss: 2.884849, ppl: 17.900869 +epoch: 1, batch: 14012, sum loss: 2931.941650, avg loss: 2.157426, ppl: 8.648846 +epoch: 1, batch: 14013, sum loss: 4123.061035, avg loss: 2.637915, ppl: 13.984015 +epoch: 1, batch: 14014, sum loss: 5057.136719, avg loss: 2.650491, ppl: 14.160987 +epoch: 1, batch: 14015, sum loss: 4521.230957, avg loss: 2.667393, ppl: 14.402372 +epoch: 1, batch: 14016, sum loss: 4156.763672, avg loss: 2.596355, ppl: 13.414748 +epoch: 1, batch: 14017, sum loss: 3977.943115, avg loss: 2.307392, ppl: 10.048182 +epoch: 1, batch: 14018, sum loss: 4200.234375, avg loss: 2.781612, ppl: 16.145029 +epoch: 1, batch: 14019, sum loss: 4621.735840, avg loss: 2.816414, ppl: 16.716799 +epoch: 1, batch: 14020, sum loss: 4010.931885, avg loss: 2.400318, ppl: 11.026687 +epoch: 1, batch: 14021, sum loss: 4021.152832, avg loss: 2.724358, ppl: 15.246628 +epoch: 1, batch: 14022, sum loss: 4499.608398, avg loss: 2.824613, ppl: 16.854418 +epoch: 1, batch: 14023, sum loss: 4162.216797, avg loss: 2.583623, ppl: 13.245040 +epoch: 1, batch: 14024, sum loss: 4297.775879, avg loss: 2.676075, ppl: 14.527952 +epoch: 1, batch: 14025, sum loss: 4425.068848, avg loss: 2.884660, ppl: 17.897486 +epoch: 1, batch: 14026, sum loss: 4166.732910, avg loss: 2.455352, ppl: 11.650537 +epoch: 1, batch: 14027, sum loss: 4610.417969, avg loss: 2.879712, ppl: 17.809137 +epoch: 1, batch: 14028, sum loss: 3687.121826, avg loss: 2.435351, ppl: 11.419830 +epoch: 1, batch: 14029, sum loss: 5013.534668, avg loss: 2.838921, ppl: 17.097309 +epoch: 1, batch: 14030, sum loss: 4479.687012, avg loss: 2.886396, ppl: 17.928585 +epoch: 1, batch: 14031, sum loss: 4183.417969, avg loss: 2.626126, ppl: 13.820121 +epoch: 1, batch: 14032, sum loss: 4634.782715, avg loss: 2.852174, ppl: 17.325407 +epoch: 1, batch: 14033, sum loss: 3696.262695, avg loss: 2.531687, ppl: 12.574699 +epoch: 1, batch: 14034, sum loss: 3608.204102, avg loss: 2.307036, ppl: 10.044607 +epoch: 1, batch: 14035, sum loss: 4329.672852, avg loss: 2.694258, ppl: 14.794540 +epoch: 1, batch: 14036, sum loss: 3929.290771, avg loss: 2.568164, ppl: 13.041856 +epoch: 1, batch: 14037, sum loss: 4078.123047, avg loss: 2.373762, ppl: 10.737711 +epoch: 1, batch: 14038, sum loss: 4946.613281, avg loss: 2.645248, ppl: 14.086934 +epoch: 1, batch: 14039, sum loss: 4627.106934, avg loss: 2.850959, ppl: 17.304375 +epoch: 1, batch: 14040, sum loss: 4543.059082, avg loss: 2.723657, ppl: 15.235933 +epoch: 1, batch: 14041, sum loss: 4096.625977, avg loss: 2.418315, ppl: 11.226928 +epoch: 1, batch: 14042, sum loss: 5555.563965, avg loss: 3.156570, ppl: 23.489897 +epoch: 1, batch: 14043, sum loss: 4826.520996, avg loss: 2.811020, ppl: 16.626867 +epoch: 1, batch: 14044, sum loss: 5315.810547, avg loss: 2.821556, ppl: 16.802969 +epoch: 1, batch: 14045, sum loss: 4806.021484, avg loss: 2.916275, ppl: 18.472355 +epoch: 1, batch: 14046, sum loss: 5782.795898, avg loss: 2.965536, ppl: 19.405109 +epoch: 1, batch: 14047, sum loss: 3429.089844, avg loss: 2.409761, ppl: 11.131300 +epoch: 1, batch: 14048, sum loss: 4557.179688, avg loss: 2.732122, ppl: 15.365460 +epoch: 1, batch: 14049, sum loss: 4817.976074, avg loss: 2.925304, ppl: 18.639894 +epoch: 1, batch: 14050, sum loss: 4294.339355, avg loss: 2.678939, ppl: 14.569628 +epoch: 1, batch: 14051, sum loss: 4740.842285, avg loss: 2.777295, ppl: 16.075476 +epoch: 1, batch: 14052, sum loss: 4546.011719, avg loss: 2.689948, ppl: 14.730907 +epoch: 1, batch: 14053, sum loss: 3468.794678, avg loss: 2.552461, ppl: 12.838663 +epoch: 1, batch: 14054, sum loss: 4087.747803, avg loss: 2.666502, ppl: 14.389550 +epoch: 1, batch: 14055, sum loss: 4600.872070, avg loss: 2.527952, ppl: 12.527820 +epoch: 1, batch: 14056, sum loss: 3951.350342, avg loss: 2.436098, ppl: 11.428356 +epoch: 1, batch: 14057, sum loss: 4549.511719, avg loss: 2.614662, ppl: 13.662597 +epoch: 1, batch: 14058, sum loss: 5317.049316, avg loss: 3.129517, ppl: 22.862936 +epoch: 1, batch: 14059, sum loss: 4062.163086, avg loss: 2.448561, ppl: 11.571685 +epoch: 1, batch: 14060, sum loss: 4695.822266, avg loss: 2.844229, ppl: 17.188305 +epoch: 1, batch: 14061, sum loss: 4534.353027, avg loss: 2.543103, ppl: 12.719083 +epoch: 1, batch: 14062, sum loss: 4466.230469, avg loss: 2.602699, ppl: 13.500120 +epoch: 1, batch: 14063, sum loss: 4524.906250, avg loss: 2.660145, ppl: 14.298360 +epoch: 1, batch: 14064, sum loss: 4803.233398, avg loss: 2.871030, ppl: 17.655195 +epoch: 1, batch: 14065, sum loss: 5459.723633, avg loss: 2.908750, ppl: 18.333868 +epoch: 1, batch: 14066, sum loss: 4716.548828, avg loss: 2.804131, ppl: 16.512724 +epoch: 1, batch: 14067, sum loss: 3779.368652, avg loss: 2.592160, ppl: 13.358588 +epoch: 1, batch: 14068, sum loss: 5415.474609, avg loss: 2.928867, ppl: 18.706417 +epoch: 1, batch: 14069, sum loss: 5206.573242, avg loss: 2.851355, ppl: 17.311216 +epoch: 1, batch: 14070, sum loss: 3531.073975, avg loss: 2.500761, ppl: 12.191763 +epoch: 1, batch: 14071, sum loss: 5337.591797, avg loss: 2.927917, ppl: 18.688652 +epoch: 1, batch: 14072, sum loss: 4264.393555, avg loss: 2.835368, ppl: 17.036667 +epoch: 1, batch: 14073, sum loss: 4146.338379, avg loss: 2.795913, ppl: 16.377571 +epoch: 1, batch: 14074, sum loss: 4071.270508, avg loss: 2.745294, ppl: 15.569188 +epoch: 1, batch: 14075, sum loss: 5268.520020, avg loss: 2.874261, ppl: 17.712328 +epoch: 1, batch: 14076, sum loss: 3796.577881, avg loss: 2.430588, ppl: 11.365561 +epoch: 1, batch: 14077, sum loss: 4450.416992, avg loss: 2.767672, ppl: 15.921531 +epoch: 1, batch: 14078, sum loss: 4611.071289, avg loss: 2.715590, ppl: 15.113521 +epoch: 1, batch: 14079, sum loss: 4021.183594, avg loss: 2.601024, ppl: 13.477537 +epoch: 1, batch: 14080, sum loss: 4791.000977, avg loss: 2.759793, ppl: 15.796577 +epoch: 1, batch: 14081, sum loss: 4460.969238, avg loss: 2.584571, ppl: 13.257596 +epoch: 1, batch: 14082, sum loss: 4016.100098, avg loss: 2.569482, ppl: 13.059056 +epoch: 1, batch: 14083, sum loss: 4800.738770, avg loss: 2.807450, ppl: 16.567610 +epoch: 1, batch: 14084, sum loss: 4924.342285, avg loss: 2.717628, ppl: 15.144361 +epoch: 1, batch: 14085, sum loss: 3878.558594, avg loss: 2.845604, ppl: 17.211954 +epoch: 1, batch: 14086, sum loss: 4751.132324, avg loss: 2.504550, ppl: 12.238056 +epoch: 1, batch: 14087, sum loss: 4562.258301, avg loss: 2.840759, ppl: 17.128754 +epoch: 1, batch: 14088, sum loss: 4194.344727, avg loss: 2.535880, ppl: 12.627533 +epoch: 1, batch: 14089, sum loss: 5413.544922, avg loss: 2.852237, ppl: 17.326494 +epoch: 1, batch: 14090, sum loss: 4191.328613, avg loss: 2.592040, ppl: 13.356993 +epoch: 1, batch: 14091, sum loss: 4705.958008, avg loss: 2.667777, ppl: 14.407899 +epoch: 1, batch: 14092, sum loss: 4532.681641, avg loss: 2.702851, ppl: 14.922218 +epoch: 1, batch: 14093, sum loss: 4913.716797, avg loss: 2.777681, ppl: 16.081678 +epoch: 1, batch: 14094, sum loss: 5643.250488, avg loss: 3.043824, ppl: 20.985342 +epoch: 1, batch: 14095, sum loss: 4773.118652, avg loss: 2.972054, ppl: 19.531992 +epoch: 1, batch: 14096, sum loss: 5228.828613, avg loss: 2.868255, ppl: 17.606266 +epoch: 1, batch: 14097, sum loss: 5277.981445, avg loss: 3.035067, ppl: 20.802374 +epoch: 1, batch: 14098, sum loss: 4510.244141, avg loss: 2.632950, ppl: 13.914762 +epoch: 1, batch: 14099, sum loss: 4584.467773, avg loss: 2.549759, ppl: 12.804023 +epoch: 1, batch: 14100, sum loss: 5000.094727, avg loss: 2.821724, ppl: 16.805798 +epoch: 1, batch: 14101, sum loss: 5053.150391, avg loss: 2.909125, ppl: 18.340750 +epoch: 1, batch: 14102, sum loss: 4306.359375, avg loss: 2.629035, ppl: 13.860385 +epoch: 1, batch: 14103, sum loss: 6181.537598, avg loss: 3.021279, ppl: 20.517523 +epoch: 1, batch: 14104, sum loss: 5107.683594, avg loss: 2.837602, ppl: 17.074774 +epoch: 1, batch: 14105, sum loss: 3732.162598, avg loss: 2.533715, ppl: 12.600235 +epoch: 1, batch: 14106, sum loss: 4951.139648, avg loss: 2.773748, ppl: 16.018559 +epoch: 1, batch: 14107, sum loss: 5117.200195, avg loss: 2.935858, ppl: 18.837654 +epoch: 1, batch: 14108, sum loss: 4678.041504, avg loss: 2.764800, ppl: 15.875862 +epoch: 1, batch: 14109, sum loss: 4660.717773, avg loss: 2.712874, ppl: 15.072535 +epoch: 1, batch: 14110, sum loss: 4546.694824, avg loss: 2.762269, ppl: 15.835737 +epoch: 1, batch: 14111, sum loss: 5456.066895, avg loss: 2.846149, ppl: 17.221331 +epoch: 1, batch: 14112, sum loss: 5491.189941, avg loss: 3.013825, ppl: 20.365156 +epoch: 1, batch: 14113, sum loss: 4323.691406, avg loss: 2.719303, ppl: 15.169740 +epoch: 1, batch: 14114, sum loss: 5235.867676, avg loss: 2.724177, ppl: 15.243861 +epoch: 1, batch: 14115, sum loss: 3896.710693, avg loss: 2.384768, ppl: 10.856544 +epoch: 1, batch: 14116, sum loss: 4237.825684, avg loss: 2.663624, ppl: 14.348193 +epoch: 1, batch: 14117, sum loss: 4947.713379, avg loss: 2.838619, ppl: 17.092150 +epoch: 1, batch: 14118, sum loss: 3234.122070, avg loss: 2.374539, ppl: 10.746057 +epoch: 1, batch: 14119, sum loss: 4384.394531, avg loss: 2.658820, ppl: 14.279431 +epoch: 1, batch: 14120, sum loss: 4297.066406, avg loss: 2.505578, ppl: 12.250638 +epoch: 1, batch: 14121, sum loss: 4378.702637, avg loss: 2.660208, ppl: 14.299266 +epoch: 1, batch: 14122, sum loss: 4703.146973, avg loss: 2.948682, ppl: 19.080780 +epoch: 1, batch: 14123, sum loss: 5509.946289, avg loss: 2.816946, ppl: 16.725689 +epoch: 1, batch: 14124, sum loss: 4730.628906, avg loss: 2.781087, ppl: 16.136551 +epoch: 1, batch: 14125, sum loss: 4274.526367, avg loss: 2.390675, ppl: 10.920861 +epoch: 1, batch: 14126, sum loss: 6473.942383, avg loss: 3.142690, ppl: 23.166109 +epoch: 1, batch: 14127, sum loss: 3943.955566, avg loss: 2.549422, ppl: 12.799701 +epoch: 1, batch: 14128, sum loss: 4773.357422, avg loss: 2.660734, ppl: 14.306792 +epoch: 1, batch: 14129, sum loss: 4062.145996, avg loss: 2.693731, ppl: 14.786740 +epoch: 1, batch: 14130, sum loss: 5160.639648, avg loss: 2.696259, ppl: 14.824167 +epoch: 1, batch: 14131, sum loss: 4529.190918, avg loss: 2.527450, ppl: 12.521540 +epoch: 1, batch: 14132, sum loss: 4028.608887, avg loss: 2.639980, ppl: 14.012918 +epoch: 1, batch: 14133, sum loss: 3905.577637, avg loss: 2.596794, ppl: 13.420638 +epoch: 1, batch: 14134, sum loss: 4259.973633, avg loss: 2.592802, ppl: 13.367171 +epoch: 1, batch: 14135, sum loss: 4515.781250, avg loss: 2.761946, ppl: 15.830615 +epoch: 1, batch: 14136, sum loss: 4766.463867, avg loss: 2.614626, ppl: 13.662111 +epoch: 1, batch: 14137, sum loss: 5073.567871, avg loss: 2.758873, ppl: 15.782054 +epoch: 1, batch: 14138, sum loss: 4525.716309, avg loss: 2.747854, ppl: 15.609106 +epoch: 1, batch: 14139, sum loss: 3758.687012, avg loss: 2.576208, ppl: 13.147184 +epoch: 1, batch: 14140, sum loss: 4775.863770, avg loss: 2.760615, ppl: 15.809561 +epoch: 1, batch: 14141, sum loss: 4196.405762, avg loss: 2.515831, ppl: 12.376886 +epoch: 1, batch: 14142, sum loss: 3652.958740, avg loss: 2.367439, ppl: 10.670034 +epoch: 1, batch: 14143, sum loss: 4809.207520, avg loss: 2.744982, ppl: 15.564326 +epoch: 1, batch: 14144, sum loss: 4984.451660, avg loss: 2.832075, ppl: 16.980654 +epoch: 1, batch: 14145, sum loss: 5721.141602, avg loss: 2.857713, ppl: 17.421638 +epoch: 1, batch: 14146, sum loss: 4622.391113, avg loss: 2.835823, ppl: 17.044418 +epoch: 1, batch: 14147, sum loss: 3057.946777, avg loss: 2.192077, ppl: 8.953788 +epoch: 1, batch: 14148, sum loss: 4757.557129, avg loss: 2.666792, ppl: 14.393723 +epoch: 1, batch: 14149, sum loss: 4667.911621, avg loss: 2.849763, ppl: 17.283680 +epoch: 1, batch: 14150, sum loss: 6493.574219, avg loss: 3.071700, ppl: 21.578558 +epoch: 1, batch: 14151, sum loss: 5414.636719, avg loss: 2.884729, ppl: 17.898720 +epoch: 1, batch: 14152, sum loss: 4534.920410, avg loss: 2.680213, ppl: 14.588200 +epoch: 1, batch: 14153, sum loss: 4400.434570, avg loss: 2.614637, ppl: 13.662258 +epoch: 1, batch: 14154, sum loss: 4053.690186, avg loss: 2.538316, ppl: 12.658334 +epoch: 1, batch: 14155, sum loss: 3636.666016, avg loss: 2.506317, ppl: 12.259693 +epoch: 1, batch: 14156, sum loss: 4210.749023, avg loss: 2.421362, ppl: 11.261191 +epoch: 1, batch: 14157, sum loss: 3219.725098, avg loss: 2.592371, ppl: 13.361417 +epoch: 1, batch: 14158, sum loss: 4864.104492, avg loss: 2.861238, ppl: 17.483158 +epoch: 1, batch: 14159, sum loss: 4919.683105, avg loss: 2.840464, ppl: 17.123703 +epoch: 1, batch: 14160, sum loss: 4615.047363, avg loss: 2.678495, ppl: 14.563165 +epoch: 1, batch: 14161, sum loss: 3819.540039, avg loss: 2.619712, ppl: 13.731767 +epoch: 1, batch: 14162, sum loss: 4663.481445, avg loss: 2.622880, ppl: 13.775346 +epoch: 1, batch: 14163, sum loss: 4689.072266, avg loss: 2.782832, ppl: 16.164738 +epoch: 1, batch: 14164, sum loss: 3914.534912, avg loss: 2.445056, ppl: 11.531198 +epoch: 1, batch: 14165, sum loss: 5614.534180, avg loss: 2.711026, ppl: 15.044699 +epoch: 1, batch: 14166, sum loss: 3963.970947, avg loss: 2.393702, ppl: 10.953974 +epoch: 1, batch: 14167, sum loss: 5390.257812, avg loss: 2.864111, ppl: 17.533466 +epoch: 1, batch: 14168, sum loss: 3763.604004, avg loss: 2.525909, ppl: 12.502251 +epoch: 1, batch: 14169, sum loss: 5571.265625, avg loss: 2.829490, ppl: 16.936819 +epoch: 1, batch: 14170, sum loss: 3694.812744, avg loss: 2.455025, ppl: 11.646727 +epoch: 1, batch: 14171, sum loss: 4885.019531, avg loss: 2.878621, ppl: 17.789722 +epoch: 1, batch: 14172, sum loss: 5173.441895, avg loss: 2.864586, ppl: 17.541788 +epoch: 1, batch: 14173, sum loss: 4742.759766, avg loss: 2.823071, ppl: 16.828459 +epoch: 1, batch: 14174, sum loss: 4813.393066, avg loss: 2.853227, ppl: 17.343655 +epoch: 1, batch: 14175, sum loss: 3678.162842, avg loss: 2.582979, ppl: 13.236517 +epoch: 1, batch: 14176, sum loss: 4239.373535, avg loss: 2.801965, ppl: 16.476999 +epoch: 1, batch: 14177, sum loss: 5381.540039, avg loss: 2.971585, ppl: 19.522839 +epoch: 1, batch: 14178, sum loss: 4549.948730, avg loss: 2.716388, ppl: 15.125583 +epoch: 1, batch: 14179, sum loss: 4982.117188, avg loss: 2.767843, ppl: 15.924249 +epoch: 1, batch: 14180, sum loss: 4418.338867, avg loss: 2.708975, ppl: 15.013884 +epoch: 1, batch: 14181, sum loss: 4051.611572, avg loss: 2.733881, ppl: 15.392509 +epoch: 1, batch: 14182, sum loss: 3821.880859, avg loss: 2.511091, ppl: 12.318368 +epoch: 1, batch: 14183, sum loss: 4435.811523, avg loss: 2.811034, ppl: 16.627102 +epoch: 1, batch: 14184, sum loss: 4884.616211, avg loss: 3.064377, ppl: 21.421104 +epoch: 1, batch: 14185, sum loss: 3804.968262, avg loss: 2.503269, ppl: 12.222381 +epoch: 1, batch: 14186, sum loss: 5019.846680, avg loss: 2.796572, ppl: 16.388371 +epoch: 1, batch: 14187, sum loss: 4153.228516, avg loss: 2.876197, ppl: 17.746653 +epoch: 1, batch: 14188, sum loss: 3905.645264, avg loss: 2.727406, ppl: 15.293165 +epoch: 1, batch: 14189, sum loss: 4835.711914, avg loss: 2.515979, ppl: 12.378725 +epoch: 1, batch: 14190, sum loss: 5196.482422, avg loss: 2.921013, ppl: 18.560081 +epoch: 1, batch: 14191, sum loss: 4575.994629, avg loss: 2.407151, ppl: 11.102288 +epoch: 1, batch: 14192, sum loss: 4340.687500, avg loss: 2.559368, ppl: 12.927640 +epoch: 1, batch: 14193, sum loss: 5266.618164, avg loss: 2.995801, ppl: 20.001373 +epoch: 1, batch: 14194, sum loss: 5370.783691, avg loss: 2.800200, ppl: 16.447935 +epoch: 1, batch: 14195, sum loss: 4292.270508, avg loss: 2.725251, ppl: 15.260243 +epoch: 1, batch: 14196, sum loss: 5068.864746, avg loss: 2.725196, ppl: 15.259406 +epoch: 1, batch: 14197, sum loss: 4562.683594, avg loss: 2.735422, ppl: 15.416246 +epoch: 1, batch: 14198, sum loss: 5606.124512, avg loss: 2.863189, ppl: 17.517309 +epoch: 1, batch: 14199, sum loss: 4600.229492, avg loss: 2.622708, ppl: 13.772968 +epoch: 1, batch: 14200, sum loss: 4737.917480, avg loss: 2.854167, ppl: 17.359974 +epoch: 1, batch: 14201, sum loss: 3958.560547, avg loss: 2.630273, ppl: 13.877556 +epoch: 1, batch: 14202, sum loss: 3374.389404, avg loss: 2.479346, ppl: 11.933453 +epoch: 1, batch: 14203, sum loss: 4688.716797, avg loss: 2.834774, ppl: 17.026560 +epoch: 1, batch: 14204, sum loss: 4417.903320, avg loss: 2.659785, ppl: 14.293220 +epoch: 1, batch: 14205, sum loss: 3980.901367, avg loss: 2.632871, ppl: 13.913664 +epoch: 1, batch: 14206, sum loss: 4155.076172, avg loss: 2.555397, ppl: 12.876414 +epoch: 1, batch: 14207, sum loss: 5299.358398, avg loss: 3.086406, ppl: 21.898224 +epoch: 1, batch: 14208, sum loss: 4315.030273, avg loss: 2.644014, ppl: 14.069561 +epoch: 1, batch: 14209, sum loss: 5066.443359, avg loss: 2.684920, ppl: 14.657022 +epoch: 1, batch: 14210, sum loss: 4436.874512, avg loss: 2.680891, ppl: 14.598095 +epoch: 1, batch: 14211, sum loss: 4732.165527, avg loss: 2.760890, ppl: 15.813911 +epoch: 1, batch: 14212, sum loss: 5303.106445, avg loss: 2.877432, ppl: 17.768578 +epoch: 1, batch: 14213, sum loss: 4454.743164, avg loss: 2.894570, ppl: 18.075726 +epoch: 1, batch: 14214, sum loss: 4408.061035, avg loss: 2.696062, ppl: 14.821248 +epoch: 1, batch: 14215, sum loss: 5080.654297, avg loss: 2.755236, ppl: 15.724747 +epoch: 1, batch: 14216, sum loss: 4876.769043, avg loss: 2.855251, ppl: 17.378799 +epoch: 1, batch: 14217, sum loss: 4178.636719, avg loss: 2.639695, ppl: 14.008925 +epoch: 1, batch: 14218, sum loss: 4568.091309, avg loss: 2.619318, ppl: 13.726362 +epoch: 1, batch: 14219, sum loss: 5065.416992, avg loss: 2.692938, ppl: 14.775026 +epoch: 1, batch: 14220, sum loss: 5002.396484, avg loss: 2.730566, ppl: 15.341564 +epoch: 1, batch: 14221, sum loss: 4622.726074, avg loss: 2.714460, ppl: 15.096461 +epoch: 1, batch: 14222, sum loss: 5450.750000, avg loss: 2.828620, ppl: 16.922087 +epoch: 1, batch: 14223, sum loss: 4310.655273, avg loss: 2.862321, ppl: 17.502100 +epoch: 1, batch: 14224, sum loss: 3923.047852, avg loss: 2.620606, ppl: 13.744056 +epoch: 1, batch: 14225, sum loss: 4580.649414, avg loss: 2.764423, ppl: 15.869883 +epoch: 1, batch: 14226, sum loss: 5392.955566, avg loss: 2.929362, ppl: 18.715687 +epoch: 1, batch: 14227, sum loss: 3535.286133, avg loss: 2.530627, ppl: 12.561379 +epoch: 1, batch: 14228, sum loss: 4989.681641, avg loss: 2.717691, ppl: 15.145318 +epoch: 1, batch: 14229, sum loss: 4720.021484, avg loss: 2.806196, ppl: 16.546850 +epoch: 1, batch: 14230, sum loss: 4797.012207, avg loss: 2.889766, ppl: 17.989107 +epoch: 1, batch: 14231, sum loss: 4330.908203, avg loss: 2.599585, ppl: 13.458146 +epoch: 1, batch: 14232, sum loss: 3907.818359, avg loss: 2.327468, ppl: 10.251947 +epoch: 1, batch: 14233, sum loss: 3725.583984, avg loss: 2.468909, ppl: 11.809559 +epoch: 1, batch: 14234, sum loss: 6075.135742, avg loss: 2.915133, ppl: 18.451258 +epoch: 1, batch: 14235, sum loss: 5344.125977, avg loss: 2.954188, ppl: 19.186140 +epoch: 1, batch: 14236, sum loss: 5099.376953, avg loss: 2.840878, ppl: 17.130808 +epoch: 1, batch: 14237, sum loss: 4458.166016, avg loss: 2.590451, ppl: 13.335781 +epoch: 1, batch: 14238, sum loss: 3533.343262, avg loss: 2.369781, ppl: 10.695050 +epoch: 1, batch: 14239, sum loss: 5092.583496, avg loss: 3.004474, ppl: 20.175600 +epoch: 1, batch: 14240, sum loss: 5223.328125, avg loss: 2.868385, ppl: 17.608551 +epoch: 1, batch: 14241, sum loss: 4077.956787, avg loss: 2.561531, ppl: 12.955632 +epoch: 1, batch: 14242, sum loss: 4277.221680, avg loss: 2.622453, ppl: 13.769464 +epoch: 1, batch: 14243, sum loss: 3929.773926, avg loss: 2.581980, ppl: 13.223301 +epoch: 1, batch: 14244, sum loss: 3994.157227, avg loss: 2.662771, ppl: 14.335966 +epoch: 1, batch: 14245, sum loss: 3943.605957, avg loss: 2.489650, ppl: 12.057058 +epoch: 1, batch: 14246, sum loss: 3436.425049, avg loss: 2.558768, ppl: 12.919888 +epoch: 1, batch: 14247, sum loss: 3783.329590, avg loss: 2.534045, ppl: 12.604391 +epoch: 1, batch: 14248, sum loss: 4006.300537, avg loss: 2.757261, ppl: 15.756631 +epoch: 1, batch: 14249, sum loss: 5171.637207, avg loss: 2.844685, ppl: 17.196138 +epoch: 1, batch: 14250, sum loss: 3703.112061, avg loss: 2.470388, ppl: 11.827040 +epoch: 1, batch: 14251, sum loss: 5079.862305, avg loss: 2.869979, ppl: 17.636642 +epoch: 1, batch: 14252, sum loss: 4428.077148, avg loss: 2.703344, ppl: 14.929567 +epoch: 1, batch: 14253, sum loss: 4341.099609, avg loss: 2.706421, ppl: 14.975588 +epoch: 1, batch: 14254, sum loss: 4408.206543, avg loss: 2.822155, ppl: 16.813047 +epoch: 1, batch: 14255, sum loss: 4656.260742, avg loss: 2.791523, ppl: 16.305838 +epoch: 1, batch: 14256, sum loss: 5276.335938, avg loss: 2.919942, ppl: 18.540215 +epoch: 1, batch: 14257, sum loss: 5136.535645, avg loss: 2.651799, ppl: 14.179531 +epoch: 1, batch: 14258, sum loss: 4555.026367, avg loss: 2.636011, ppl: 13.957412 +epoch: 1, batch: 14259, sum loss: 4397.786133, avg loss: 2.673426, ppl: 14.489528 +epoch: 1, batch: 14260, sum loss: 4941.128418, avg loss: 2.861105, ppl: 17.480837 +epoch: 1, batch: 14261, sum loss: 4488.336914, avg loss: 2.864287, ppl: 17.536539 +epoch: 1, batch: 14262, sum loss: 4478.659668, avg loss: 2.581360, ppl: 13.215100 +epoch: 1, batch: 14263, sum loss: 4590.269531, avg loss: 2.633545, ppl: 13.923045 +epoch: 1, batch: 14264, sum loss: 4295.861328, avg loss: 2.793148, ppl: 16.332350 +epoch: 1, batch: 14265, sum loss: 4509.828613, avg loss: 2.577045, ppl: 13.158197 +epoch: 1, batch: 14266, sum loss: 5193.781250, avg loss: 2.736450, ppl: 15.432099 +epoch: 1, batch: 14267, sum loss: 4626.241699, avg loss: 2.807185, ppl: 16.563234 +epoch: 1, batch: 14268, sum loss: 4343.622070, avg loss: 2.573236, ppl: 13.108173 +epoch: 1, batch: 14269, sum loss: 4697.818359, avg loss: 2.862778, ppl: 17.510098 +epoch: 1, batch: 14270, sum loss: 3977.458008, avg loss: 2.426759, ppl: 11.322127 +epoch: 1, batch: 14271, sum loss: 4835.256348, avg loss: 2.596808, ppl: 13.420830 +epoch: 1, batch: 14272, sum loss: 4032.229492, avg loss: 2.731863, ppl: 15.361476 +epoch: 1, batch: 14273, sum loss: 4977.386230, avg loss: 2.823248, ppl: 16.831429 +epoch: 1, batch: 14274, sum loss: 4991.917480, avg loss: 2.919250, ppl: 18.527388 +epoch: 1, batch: 14275, sum loss: 3898.322021, avg loss: 2.648317, ppl: 14.130232 +epoch: 1, batch: 14276, sum loss: 3384.568848, avg loss: 2.619635, ppl: 13.730716 +epoch: 1, batch: 14277, sum loss: 5076.623535, avg loss: 2.863296, ppl: 17.519176 +epoch: 1, batch: 14278, sum loss: 4404.402344, avg loss: 2.832413, ppl: 16.986403 +epoch: 1, batch: 14279, sum loss: 5131.366211, avg loss: 2.784246, ppl: 16.187614 +epoch: 1, batch: 14280, sum loss: 3614.267578, avg loss: 2.609580, ppl: 13.593334 +epoch: 1, batch: 14281, sum loss: 4623.955566, avg loss: 2.567438, ppl: 13.032392 +epoch: 1, batch: 14282, sum loss: 4896.900391, avg loss: 2.751068, ppl: 15.659342 +epoch: 1, batch: 14283, sum loss: 3712.766113, avg loss: 2.409323, ppl: 11.126420 +epoch: 1, batch: 14284, sum loss: 3688.353760, avg loss: 2.370407, ppl: 10.701751 +epoch: 1, batch: 14285, sum loss: 4567.021484, avg loss: 2.667653, ppl: 14.406113 +epoch: 1, batch: 14286, sum loss: 4579.455566, avg loss: 2.900225, ppl: 18.178238 +epoch: 1, batch: 14287, sum loss: 3683.518555, avg loss: 2.575887, ppl: 13.142973 +epoch: 1, batch: 14288, sum loss: 4935.116699, avg loss: 2.746309, ppl: 15.584998 +epoch: 1, batch: 14289, sum loss: 5783.678223, avg loss: 2.873164, ppl: 17.692905 +epoch: 1, batch: 14290, sum loss: 4371.720215, avg loss: 2.739173, ppl: 15.474182 +epoch: 1, batch: 14291, sum loss: 4174.383301, avg loss: 2.546909, ppl: 12.767573 +epoch: 1, batch: 14292, sum loss: 5201.194336, avg loss: 2.853096, ppl: 17.341393 +epoch: 1, batch: 14293, sum loss: 5200.167480, avg loss: 2.863528, ppl: 17.523245 +epoch: 1, batch: 14294, sum loss: 5210.153809, avg loss: 2.755237, ppl: 15.724772 +epoch: 1, batch: 14295, sum loss: 5145.749023, avg loss: 2.787513, ppl: 16.240580 +epoch: 1, batch: 14296, sum loss: 4676.043457, avg loss: 2.659865, ppl: 14.294365 +epoch: 1, batch: 14297, sum loss: 3971.605469, avg loss: 2.451608, ppl: 11.607000 +epoch: 1, batch: 14298, sum loss: 3989.031494, avg loss: 2.523107, ppl: 12.467267 +epoch: 1, batch: 14299, sum loss: 4172.387695, avg loss: 2.492466, ppl: 12.091056 +epoch: 1, batch: 14300, sum loss: 4052.736328, avg loss: 2.472688, ppl: 11.854274 +epoch: 1, batch: 14301, sum loss: 4538.113770, avg loss: 2.827485, ppl: 16.902899 +epoch: 1, batch: 14302, sum loss: 4333.975586, avg loss: 2.598307, ppl: 13.440959 +epoch: 1, batch: 14303, sum loss: 5095.549805, avg loss: 2.787500, ppl: 16.240366 +epoch: 1, batch: 14304, sum loss: 4518.927246, avg loss: 2.738744, ppl: 15.467543 +epoch: 1, batch: 14305, sum loss: 3589.288086, avg loss: 2.378587, ppl: 10.789649 +epoch: 1, batch: 14306, sum loss: 4313.599121, avg loss: 2.779381, ppl: 16.109047 +epoch: 1, batch: 14307, sum loss: 4739.489746, avg loss: 2.879399, ppl: 17.803562 +epoch: 1, batch: 14308, sum loss: 4922.676270, avg loss: 2.742438, ppl: 15.524790 +epoch: 1, batch: 14309, sum loss: 5232.502441, avg loss: 2.910179, ppl: 18.360086 +epoch: 1, batch: 14310, sum loss: 4489.188477, avg loss: 2.752415, ppl: 15.680449 +epoch: 1, batch: 14311, sum loss: 5597.432617, avg loss: 3.171350, ppl: 23.839645 +epoch: 1, batch: 14312, sum loss: 5041.303711, avg loss: 2.829015, ppl: 16.928770 +epoch: 1, batch: 14313, sum loss: 4469.528320, avg loss: 2.576097, ppl: 13.145730 +epoch: 1, batch: 14314, sum loss: 3952.559082, avg loss: 2.760167, ppl: 15.802484 +epoch: 1, batch: 14315, sum loss: 5112.267090, avg loss: 2.760403, ppl: 15.806218 +epoch: 1, batch: 14316, sum loss: 3962.576172, avg loss: 2.603533, ppl: 13.511383 +epoch: 1, batch: 14317, sum loss: 3694.345703, avg loss: 2.648277, ppl: 14.129666 +epoch: 1, batch: 14318, sum loss: 4303.685059, avg loss: 2.513835, ppl: 12.352206 +epoch: 1, batch: 14319, sum loss: 4916.141602, avg loss: 2.890148, ppl: 17.995972 +epoch: 1, batch: 14320, sum loss: 4230.193359, avg loss: 2.494218, ppl: 12.112256 +epoch: 1, batch: 14321, sum loss: 4311.760742, avg loss: 2.851694, ppl: 17.317089 +epoch: 1, batch: 14322, sum loss: 4597.553711, avg loss: 2.527517, ppl: 12.522375 +epoch: 1, batch: 14323, sum loss: 4428.058105, avg loss: 2.791966, ppl: 16.313059 +epoch: 1, batch: 14324, sum loss: 4134.509766, avg loss: 2.568018, ppl: 13.039959 +epoch: 1, batch: 14325, sum loss: 4400.530762, avg loss: 2.552512, ppl: 12.839318 +epoch: 1, batch: 14326, sum loss: 3695.291016, avg loss: 2.524106, ppl: 12.479731 +epoch: 1, batch: 14327, sum loss: 4876.596680, avg loss: 2.617604, ppl: 13.702856 +epoch: 1, batch: 14328, sum loss: 5081.854492, avg loss: 2.693087, ppl: 14.777217 +epoch: 1, batch: 14329, sum loss: 4103.595703, avg loss: 2.673352, ppl: 14.488457 +epoch: 1, batch: 14330, sum loss: 3573.621582, avg loss: 2.337228, ppl: 10.352500 +epoch: 1, batch: 14331, sum loss: 4233.141602, avg loss: 2.509272, ppl: 12.295974 +epoch: 1, batch: 14332, sum loss: 4351.887207, avg loss: 2.768376, ppl: 15.932740 +epoch: 1, batch: 14333, sum loss: 4269.934570, avg loss: 2.629270, ppl: 13.863647 +epoch: 1, batch: 14334, sum loss: 5274.000000, avg loss: 3.050318, ppl: 21.122065 +epoch: 1, batch: 14335, sum loss: 4276.278809, avg loss: 2.723744, ppl: 15.237270 +epoch: 1, batch: 14336, sum loss: 3981.223145, avg loss: 2.740002, ppl: 15.487019 +epoch: 1, batch: 14337, sum loss: 5554.295898, avg loss: 2.737455, ppl: 15.447619 +epoch: 1, batch: 14338, sum loss: 3733.527344, avg loss: 2.298970, ppl: 9.963914 +epoch: 1, batch: 14339, sum loss: 4790.146484, avg loss: 2.695637, ppl: 14.814949 +epoch: 1, batch: 14340, sum loss: 4999.910156, avg loss: 2.646856, ppl: 14.109603 +epoch: 1, batch: 14341, sum loss: 4325.961426, avg loss: 2.464935, ppl: 11.762721 +epoch: 1, batch: 14342, sum loss: 4025.508789, avg loss: 2.438225, ppl: 11.452692 +epoch: 1, batch: 14343, sum loss: 5719.875977, avg loss: 2.909398, ppl: 18.345749 +epoch: 1, batch: 14344, sum loss: 4794.083984, avg loss: 2.928579, ppl: 18.701038 +epoch: 1, batch: 14345, sum loss: 3545.733398, avg loss: 2.405518, ppl: 11.084171 +epoch: 1, batch: 14346, sum loss: 5030.947266, avg loss: 2.973373, ppl: 19.557779 +epoch: 1, batch: 14347, sum loss: 5063.631836, avg loss: 2.891852, ppl: 18.026657 +epoch: 1, batch: 14348, sum loss: 4441.916016, avg loss: 2.854702, ppl: 17.369257 +epoch: 1, batch: 14349, sum loss: 4788.101562, avg loss: 2.891366, ppl: 18.017897 +epoch: 1, batch: 14350, sum loss: 4619.242676, avg loss: 2.693436, ppl: 14.782380 +epoch: 1, batch: 14351, sum loss: 4953.008301, avg loss: 2.652924, ppl: 14.195479 +epoch: 1, batch: 14352, sum loss: 4374.715820, avg loss: 2.530200, ppl: 12.556017 +epoch: 1, batch: 14353, sum loss: 4233.155762, avg loss: 2.498911, ppl: 12.169236 +epoch: 1, batch: 14354, sum loss: 3764.133789, avg loss: 2.539901, ppl: 12.678422 +epoch: 1, batch: 14355, sum loss: 4493.652832, avg loss: 2.634028, ppl: 13.929772 +epoch: 1, batch: 14356, sum loss: 4131.985352, avg loss: 2.741862, ppl: 15.515842 +epoch: 1, batch: 14357, sum loss: 6041.322266, avg loss: 3.041955, ppl: 20.946148 +epoch: 1, batch: 14358, sum loss: 5509.020996, avg loss: 2.651117, ppl: 14.169855 +epoch: 1, batch: 14359, sum loss: 4510.345215, avg loss: 2.826031, ppl: 16.878332 +epoch: 1, batch: 14360, sum loss: 5147.554199, avg loss: 3.019094, ppl: 20.472725 +epoch: 1, batch: 14361, sum loss: 3457.603760, avg loss: 2.419597, ppl: 11.241327 +epoch: 1, batch: 14362, sum loss: 4640.657715, avg loss: 2.595446, ppl: 13.402565 +epoch: 1, batch: 14363, sum loss: 4960.480957, avg loss: 2.606664, ppl: 13.553756 +epoch: 1, batch: 14364, sum loss: 3945.714111, avg loss: 2.545622, ppl: 12.751159 +epoch: 1, batch: 14365, sum loss: 5216.542480, avg loss: 2.754246, ppl: 15.709199 +epoch: 1, batch: 14366, sum loss: 5372.394043, avg loss: 3.031825, ppl: 20.735041 +epoch: 1, batch: 14367, sum loss: 3892.861572, avg loss: 2.651813, ppl: 14.179724 +epoch: 1, batch: 14368, sum loss: 4380.466797, avg loss: 2.848158, ppl: 17.255964 +epoch: 1, batch: 14369, sum loss: 5146.414551, avg loss: 3.020196, ppl: 20.495317 +epoch: 1, batch: 14370, sum loss: 3440.941895, avg loss: 2.452560, ppl: 11.618053 +epoch: 1, batch: 14371, sum loss: 4592.940430, avg loss: 3.035651, ppl: 20.814533 +epoch: 1, batch: 14372, sum loss: 4185.375977, avg loss: 2.494265, ppl: 12.112823 +epoch: 1, batch: 14373, sum loss: 5346.999023, avg loss: 2.857830, ppl: 17.423670 +epoch: 1, batch: 14374, sum loss: 5813.568848, avg loss: 2.771005, ppl: 15.974683 +epoch: 1, batch: 14375, sum loss: 5202.976074, avg loss: 3.058775, ppl: 21.301441 +epoch: 1, batch: 14376, sum loss: 5029.666016, avg loss: 2.651379, ppl: 14.173573 +epoch: 1, batch: 14377, sum loss: 5156.288574, avg loss: 2.660624, ppl: 14.305210 +epoch: 1, batch: 14378, sum loss: 5553.386719, avg loss: 3.081791, ppl: 21.797400 +epoch: 1, batch: 14379, sum loss: 4566.615234, avg loss: 2.411096, ppl: 11.146169 +epoch: 1, batch: 14380, sum loss: 5105.059570, avg loss: 3.029709, ppl: 20.691208 +epoch: 1, batch: 14381, sum loss: 3834.788086, avg loss: 2.478855, ppl: 11.927595 +epoch: 1, batch: 14382, sum loss: 4885.592285, avg loss: 2.779063, ppl: 16.103920 +epoch: 1, batch: 14383, sum loss: 4624.734863, avg loss: 2.860071, ppl: 17.462770 +epoch: 1, batch: 14384, sum loss: 4488.103516, avg loss: 2.600292, ppl: 13.467667 +epoch: 1, batch: 14385, sum loss: 4765.611816, avg loss: 2.865672, ppl: 17.560844 +epoch: 1, batch: 14386, sum loss: 5870.075195, avg loss: 3.109150, ppl: 22.401993 +epoch: 1, batch: 14387, sum loss: 3916.452148, avg loss: 2.725436, ppl: 15.263074 +epoch: 1, batch: 14388, sum loss: 4454.003418, avg loss: 2.801260, ppl: 16.465380 +epoch: 1, batch: 14389, sum loss: 3997.647705, avg loss: 2.436105, ppl: 11.428435 +epoch: 1, batch: 14390, sum loss: 3909.392334, avg loss: 2.620236, ppl: 13.738968 +epoch: 1, batch: 14391, sum loss: 4056.664062, avg loss: 2.715304, ppl: 15.109201 +epoch: 1, batch: 14392, sum loss: 4738.301758, avg loss: 2.713804, ppl: 15.086555 +epoch: 1, batch: 14393, sum loss: 4217.502930, avg loss: 2.686308, ppl: 14.677382 +epoch: 1, batch: 14394, sum loss: 4574.672852, avg loss: 2.940021, ppl: 18.916245 +epoch: 1, batch: 14395, sum loss: 4089.571533, avg loss: 2.484551, ppl: 11.995738 +epoch: 1, batch: 14396, sum loss: 4828.042480, avg loss: 2.738538, ppl: 15.464360 +epoch: 1, batch: 14397, sum loss: 4597.010254, avg loss: 2.637413, ppl: 13.976992 +epoch: 1, batch: 14398, sum loss: 5133.601562, avg loss: 2.717629, ppl: 15.144375 +epoch: 1, batch: 14399, sum loss: 4858.928711, avg loss: 2.746709, ppl: 15.591242 +epoch: 1, batch: 14400, sum loss: 4834.685547, avg loss: 2.799471, ppl: 16.435944 +epoch: 1, batch: 14401, sum loss: 4337.233887, avg loss: 2.685594, ppl: 14.666908 +epoch: 1, batch: 14402, sum loss: 4612.857422, avg loss: 2.667934, ppl: 14.410163 +epoch: 1, batch: 14403, sum loss: 5800.101562, avg loss: 3.030356, ppl: 20.704601 +epoch: 1, batch: 14404, sum loss: 5236.649414, avg loss: 2.938636, ppl: 18.890064 +epoch: 1, batch: 14405, sum loss: 5376.389160, avg loss: 2.870470, ppl: 17.645302 +epoch: 1, batch: 14406, sum loss: 3816.915527, avg loss: 2.537843, ppl: 12.652348 +epoch: 1, batch: 14407, sum loss: 4678.814453, avg loss: 2.690520, ppl: 14.739343 +epoch: 1, batch: 14408, sum loss: 4539.589844, avg loss: 2.798761, ppl: 16.424278 +epoch: 1, batch: 14409, sum loss: 4008.964844, avg loss: 2.460997, ppl: 11.716491 +epoch: 1, batch: 14410, sum loss: 5411.452148, avg loss: 2.678937, ppl: 14.569593 +epoch: 1, batch: 14411, sum loss: 3828.313232, avg loss: 2.535307, ppl: 12.620304 +epoch: 1, batch: 14412, sum loss: 4449.178223, avg loss: 2.721210, ppl: 15.198698 +epoch: 1, batch: 14413, sum loss: 5247.878906, avg loss: 3.108933, ppl: 22.397139 +epoch: 1, batch: 14414, sum loss: 4780.613281, avg loss: 2.697863, ppl: 14.847969 +epoch: 1, batch: 14415, sum loss: 4824.946289, avg loss: 2.884009, ppl: 17.885828 +epoch: 1, batch: 14416, sum loss: 3547.533936, avg loss: 2.535764, ppl: 12.626077 +epoch: 1, batch: 14417, sum loss: 4770.583008, avg loss: 2.807877, ppl: 16.574694 +epoch: 1, batch: 14418, sum loss: 4780.256836, avg loss: 2.923704, ppl: 18.610100 +epoch: 1, batch: 14419, sum loss: 5185.873535, avg loss: 2.756977, ppl: 15.752150 +epoch: 1, batch: 14420, sum loss: 4558.833984, avg loss: 2.810625, ppl: 16.620304 +epoch: 1, batch: 14421, sum loss: 5458.639648, avg loss: 2.958612, ppl: 19.271208 +epoch: 1, batch: 14422, sum loss: 3786.587158, avg loss: 2.499398, ppl: 12.175157 +epoch: 1, batch: 14423, sum loss: 4535.179199, avg loss: 2.772115, ppl: 15.992414 +epoch: 1, batch: 14424, sum loss: 4211.714844, avg loss: 2.828552, ppl: 16.920950 +epoch: 1, batch: 14425, sum loss: 4212.048340, avg loss: 2.448865, ppl: 11.575206 +epoch: 1, batch: 14426, sum loss: 4429.313477, avg loss: 2.729090, ppl: 15.318944 +epoch: 1, batch: 14427, sum loss: 4934.561035, avg loss: 2.715774, ppl: 15.116302 +epoch: 1, batch: 14428, sum loss: 3626.476562, avg loss: 2.620286, ppl: 13.739659 +epoch: 1, batch: 14429, sum loss: 4327.221191, avg loss: 2.467059, ppl: 11.787724 +epoch: 1, batch: 14430, sum loss: 4058.605469, avg loss: 2.489942, ppl: 12.060577 +epoch: 1, batch: 14431, sum loss: 5163.703125, avg loss: 3.012662, ppl: 20.341486 +epoch: 1, batch: 14432, sum loss: 6232.614746, avg loss: 3.162158, ppl: 23.621529 +epoch: 1, batch: 14433, sum loss: 4220.903809, avg loss: 2.452588, ppl: 11.618375 +epoch: 1, batch: 14434, sum loss: 5005.836914, avg loss: 2.960282, ppl: 19.303415 +epoch: 1, batch: 14435, sum loss: 4960.580566, avg loss: 2.777481, ppl: 16.078466 +epoch: 1, batch: 14436, sum loss: 4690.230469, avg loss: 2.462063, ppl: 11.728987 +epoch: 1, batch: 14437, sum loss: 5029.028809, avg loss: 2.790804, ppl: 16.294117 +epoch: 1, batch: 14438, sum loss: 4650.980957, avg loss: 2.653155, ppl: 14.198770 +epoch: 1, batch: 14439, sum loss: 5599.539062, avg loss: 2.999218, ppl: 20.069826 +epoch: 1, batch: 14440, sum loss: 3996.514648, avg loss: 2.539082, ppl: 12.668031 +epoch: 1, batch: 14441, sum loss: 4403.833984, avg loss: 2.801421, ppl: 16.468035 +epoch: 1, batch: 14442, sum loss: 4692.660645, avg loss: 2.811660, ppl: 16.637518 +epoch: 1, batch: 14443, sum loss: 4631.557617, avg loss: 2.858986, ppl: 17.443836 +epoch: 1, batch: 14444, sum loss: 4988.313965, avg loss: 2.759023, ppl: 15.784417 +epoch: 1, batch: 14445, sum loss: 4903.993652, avg loss: 2.846195, ppl: 17.222122 +epoch: 1, batch: 14446, sum loss: 4564.666992, avg loss: 2.728432, ppl: 15.308867 +epoch: 1, batch: 14447, sum loss: 3740.165527, avg loss: 2.409901, ppl: 11.132855 +epoch: 1, batch: 14448, sum loss: 4454.894043, avg loss: 2.755037, ppl: 15.721616 +epoch: 1, batch: 14449, sum loss: 4292.860840, avg loss: 2.532661, ppl: 12.586958 +epoch: 1, batch: 14450, sum loss: 3800.232666, avg loss: 2.361860, ppl: 10.610669 +epoch: 1, batch: 14451, sum loss: 4998.663574, avg loss: 2.867851, ppl: 17.599150 +epoch: 1, batch: 14452, sum loss: 5399.924316, avg loss: 2.813926, ppl: 16.675257 +epoch: 1, batch: 14453, sum loss: 3605.640869, avg loss: 2.353552, ppl: 10.522877 +epoch: 1, batch: 14454, sum loss: 4062.725830, avg loss: 2.514063, ppl: 12.355025 +epoch: 1, batch: 14455, sum loss: 5487.125000, avg loss: 2.878869, ppl: 17.794142 +epoch: 1, batch: 14456, sum loss: 5046.465820, avg loss: 2.870572, ppl: 17.647112 +epoch: 1, batch: 14457, sum loss: 4616.857910, avg loss: 2.801491, ppl: 16.469193 +epoch: 1, batch: 14458, sum loss: 5592.409180, avg loss: 3.098288, ppl: 22.159971 +epoch: 1, batch: 14459, sum loss: 4779.039551, avg loss: 2.588862, ppl: 13.314613 +epoch: 1, batch: 14460, sum loss: 4749.838867, avg loss: 2.764749, ppl: 15.875055 +epoch: 1, batch: 14461, sum loss: 3946.589111, avg loss: 2.507363, ppl: 12.272523 +epoch: 1, batch: 14462, sum loss: 5218.816895, avg loss: 3.046595, ppl: 21.043566 +epoch: 1, batch: 14463, sum loss: 4511.600098, avg loss: 2.496735, ppl: 12.142781 +epoch: 1, batch: 14464, sum loss: 4992.518555, avg loss: 2.605699, ppl: 13.540681 +epoch: 1, batch: 14465, sum loss: 4465.479492, avg loss: 2.792670, ppl: 16.324549 +epoch: 1, batch: 14466, sum loss: 4279.227539, avg loss: 2.514235, ppl: 12.357149 +epoch: 1, batch: 14467, sum loss: 4411.752441, avg loss: 2.713255, ppl: 15.078275 +epoch: 1, batch: 14468, sum loss: 4865.812500, avg loss: 3.003588, ppl: 20.157738 +epoch: 1, batch: 14469, sum loss: 4188.418457, avg loss: 2.757352, ppl: 15.758066 +epoch: 1, batch: 14470, sum loss: 4680.134766, avg loss: 2.984780, ppl: 19.782146 +epoch: 1, batch: 14471, sum loss: 3930.850098, avg loss: 2.606664, ppl: 13.553765 +epoch: 1, batch: 14472, sum loss: 4905.944824, avg loss: 2.736166, ppl: 15.427714 +epoch: 1, batch: 14473, sum loss: 5434.054688, avg loss: 2.790989, ppl: 16.297125 +epoch: 1, batch: 14474, sum loss: 4826.639648, avg loss: 2.881576, ppl: 17.842371 +epoch: 1, batch: 14475, sum loss: 4503.845703, avg loss: 2.751280, ppl: 15.662672 +epoch: 1, batch: 14476, sum loss: 4638.925781, avg loss: 2.779464, ppl: 16.110388 +epoch: 1, batch: 14477, sum loss: 4592.862305, avg loss: 2.692182, ppl: 14.763853 +epoch: 1, batch: 14478, sum loss: 4125.105469, avg loss: 2.479030, ppl: 11.929686 +epoch: 1, batch: 14479, sum loss: 3866.175537, avg loss: 2.483093, ppl: 11.978253 +epoch: 1, batch: 14480, sum loss: 3850.162109, avg loss: 2.622726, ppl: 13.773221 +epoch: 1, batch: 14481, sum loss: 4612.975586, avg loss: 2.538787, ppl: 12.664298 +epoch: 1, batch: 14482, sum loss: 4704.293945, avg loss: 2.577695, ppl: 13.166759 +epoch: 1, batch: 14483, sum loss: 4676.498535, avg loss: 2.901054, ppl: 18.193304 +epoch: 1, batch: 14484, sum loss: 3279.636230, avg loss: 2.562216, ppl: 12.964512 +epoch: 1, batch: 14485, sum loss: 5572.610840, avg loss: 2.936044, ppl: 18.841158 +epoch: 1, batch: 14486, sum loss: 4684.523438, avg loss: 2.786748, ppl: 16.228159 +epoch: 1, batch: 14487, sum loss: 6714.501953, avg loss: 3.188272, ppl: 24.246506 +epoch: 1, batch: 14488, sum loss: 4792.468262, avg loss: 2.760638, ppl: 15.809934 +epoch: 1, batch: 14489, sum loss: 4107.499512, avg loss: 2.487886, ppl: 12.035805 +epoch: 1, batch: 14490, sum loss: 4438.726562, avg loss: 2.659513, ppl: 14.289325 +epoch: 1, batch: 14491, sum loss: 3652.931396, avg loss: 2.496877, ppl: 12.144512 +epoch: 1, batch: 14492, sum loss: 4674.832031, avg loss: 2.574247, ppl: 13.121428 +epoch: 1, batch: 14493, sum loss: 3514.342773, avg loss: 2.358619, ppl: 10.576341 +epoch: 1, batch: 14494, sum loss: 3949.049561, avg loss: 2.572671, ppl: 13.100766 +epoch: 1, batch: 14495, sum loss: 4603.182129, avg loss: 2.584605, ppl: 13.258058 +epoch: 1, batch: 14496, sum loss: 4276.939453, avg loss: 2.673087, ppl: 14.484616 +epoch: 1, batch: 14497, sum loss: 4387.613770, avg loss: 2.869597, ppl: 17.629915 +epoch: 1, batch: 14498, sum loss: 4236.694336, avg loss: 2.666265, ppl: 14.386129 +epoch: 1, batch: 14499, sum loss: 4229.409180, avg loss: 2.474786, ppl: 11.879166 +epoch: 1, batch: 14500, sum loss: 5235.743164, avg loss: 2.983329, ppl: 19.753473 +epoch: 1, batch: 14501, sum loss: 3898.843750, avg loss: 2.515383, ppl: 12.371349 +epoch: 1, batch: 14502, sum loss: 5247.991211, avg loss: 2.998852, ppl: 20.062492 +epoch: 1, batch: 14503, sum loss: 4231.977539, avg loss: 2.668334, ppl: 14.415929 +epoch: 1, batch: 14504, sum loss: 4300.429199, avg loss: 2.538624, ppl: 12.662236 +epoch: 1, batch: 14505, sum loss: 4755.921875, avg loss: 3.054542, ppl: 21.211470 +epoch: 1, batch: 14506, sum loss: 4079.545898, avg loss: 2.638775, ppl: 13.996046 +epoch: 1, batch: 14507, sum loss: 4247.645508, avg loss: 2.485457, ppl: 12.006602 +epoch: 1, batch: 14508, sum loss: 4797.769531, avg loss: 2.689333, ppl: 14.721849 +epoch: 1, batch: 14509, sum loss: 4226.810059, avg loss: 2.643408, ppl: 14.061047 +epoch: 1, batch: 14510, sum loss: 3983.356934, avg loss: 2.538787, ppl: 12.664302 +epoch: 1, batch: 14511, sum loss: 4639.185059, avg loss: 2.596075, ppl: 13.410991 +epoch: 1, batch: 14512, sum loss: 4881.321289, avg loss: 2.736167, ppl: 15.427732 +epoch: 1, batch: 14513, sum loss: 4645.262695, avg loss: 2.603847, ppl: 13.515634 +epoch: 1, batch: 14514, sum loss: 4349.152344, avg loss: 2.605843, ppl: 13.542641 +epoch: 1, batch: 14515, sum loss: 3782.530518, avg loss: 2.495073, ppl: 12.122620 +epoch: 1, batch: 14516, sum loss: 3785.107910, avg loss: 2.432589, ppl: 11.388324 +epoch: 1, batch: 14517, sum loss: 4379.612793, avg loss: 2.530106, ppl: 12.554832 +epoch: 1, batch: 14518, sum loss: 4848.215332, avg loss: 2.672666, ppl: 14.478512 +epoch: 1, batch: 14519, sum loss: 4764.054688, avg loss: 2.658513, ppl: 14.275043 +epoch: 1, batch: 14520, sum loss: 4694.149414, avg loss: 2.725987, ppl: 15.271475 +epoch: 1, batch: 14521, sum loss: 4418.410645, avg loss: 2.677825, ppl: 14.553401 +epoch: 1, batch: 14522, sum loss: 5419.246582, avg loss: 2.745312, ppl: 15.569474 +epoch: 1, batch: 14523, sum loss: 4723.372070, avg loss: 2.637282, ppl: 13.975166 +epoch: 1, batch: 14524, sum loss: 4383.023438, avg loss: 2.654769, ppl: 14.221700 +epoch: 1, batch: 14525, sum loss: 4756.090820, avg loss: 2.747597, ppl: 15.605091 +epoch: 1, batch: 14526, sum loss: 5138.503906, avg loss: 2.779072, ppl: 16.104067 +epoch: 1, batch: 14527, sum loss: 4571.725586, avg loss: 2.803020, ppl: 16.494385 +epoch: 1, batch: 14528, sum loss: 4624.244141, avg loss: 2.742731, ppl: 15.529336 +epoch: 1, batch: 14529, sum loss: 4765.295410, avg loss: 2.702947, ppl: 14.923646 +epoch: 1, batch: 14530, sum loss: 4547.807129, avg loss: 2.676755, ppl: 14.537848 +epoch: 1, batch: 14531, sum loss: 3849.918701, avg loss: 2.471065, ppl: 11.835042 +epoch: 1, batch: 14532, sum loss: 4883.021484, avg loss: 2.526136, ppl: 12.505098 +epoch: 1, batch: 14533, sum loss: 3801.322754, avg loss: 2.623411, ppl: 13.782659 +epoch: 1, batch: 14534, sum loss: 5541.878418, avg loss: 2.837623, ppl: 17.075136 +epoch: 1, batch: 14535, sum loss: 4565.175781, avg loss: 2.768451, ppl: 15.933933 +epoch: 1, batch: 14536, sum loss: 5278.317871, avg loss: 2.911372, ppl: 18.382010 +epoch: 1, batch: 14537, sum loss: 6070.614258, avg loss: 2.941189, ppl: 18.938351 +epoch: 1, batch: 14538, sum loss: 4196.791504, avg loss: 2.829934, ppl: 16.944336 +epoch: 1, batch: 14539, sum loss: 4201.030762, avg loss: 2.640497, ppl: 14.020169 +epoch: 1, batch: 14540, sum loss: 3928.217773, avg loss: 2.668626, ppl: 14.420147 +epoch: 1, batch: 14541, sum loss: 3724.387939, avg loss: 2.748626, ppl: 15.621150 +epoch: 1, batch: 14542, sum loss: 3854.928223, avg loss: 2.568240, ppl: 13.042851 +epoch: 1, batch: 14543, sum loss: 4672.780762, avg loss: 2.710430, ppl: 15.035738 +epoch: 1, batch: 14544, sum loss: 5320.291504, avg loss: 2.674857, ppl: 14.510281 +epoch: 1, batch: 14545, sum loss: 4296.871582, avg loss: 2.685545, ppl: 14.666188 +epoch: 1, batch: 14546, sum loss: 5867.572266, avg loss: 2.919190, ppl: 18.526283 +epoch: 1, batch: 14547, sum loss: 3797.706543, avg loss: 2.511711, ppl: 12.326000 +epoch: 1, batch: 14548, sum loss: 6023.758789, avg loss: 3.065526, ppl: 21.445740 +epoch: 1, batch: 14549, sum loss: 4738.968262, avg loss: 2.657862, ppl: 14.265762 +epoch: 1, batch: 14550, sum loss: 4438.990234, avg loss: 2.696835, ppl: 14.832712 +epoch: 1, batch: 14551, sum loss: 4737.158691, avg loss: 2.644980, ppl: 14.083159 +epoch: 1, batch: 14552, sum loss: 4857.824219, avg loss: 2.819399, ppl: 16.766769 +epoch: 1, batch: 14553, sum loss: 4290.834961, avg loss: 2.557113, ppl: 12.898521 +epoch: 1, batch: 14554, sum loss: 4600.760742, avg loss: 2.564527, ppl: 12.994508 +epoch: 1, batch: 14555, sum loss: 4243.911621, avg loss: 2.514166, ppl: 12.356295 +epoch: 1, batch: 14556, sum loss: 4927.901367, avg loss: 2.762277, ppl: 15.835855 +epoch: 1, batch: 14557, sum loss: 4377.008789, avg loss: 2.831183, ppl: 16.965519 +epoch: 1, batch: 14558, sum loss: 4170.680176, avg loss: 2.560270, ppl: 12.939311 +epoch: 1, batch: 14559, sum loss: 4636.144531, avg loss: 2.623738, ppl: 13.787161 +epoch: 1, batch: 14560, sum loss: 4956.176270, avg loss: 2.782805, ppl: 16.164305 +epoch: 1, batch: 14561, sum loss: 4602.711914, avg loss: 2.646758, ppl: 14.108223 +epoch: 1, batch: 14562, sum loss: 3831.222656, avg loss: 2.588664, ppl: 13.311975 +epoch: 1, batch: 14563, sum loss: 3975.772461, avg loss: 2.574982, ppl: 13.131086 +epoch: 1, batch: 14564, sum loss: 4447.945801, avg loss: 2.621064, ppl: 13.750349 +epoch: 1, batch: 14565, sum loss: 4516.992676, avg loss: 2.597466, ppl: 13.429661 +epoch: 1, batch: 14566, sum loss: 4514.915039, avg loss: 2.631069, ppl: 13.888615 +epoch: 1, batch: 14567, sum loss: 4046.490234, avg loss: 2.593904, ppl: 13.381913 +epoch: 1, batch: 14568, sum loss: 5001.952148, avg loss: 2.861529, ppl: 17.488243 +epoch: 1, batch: 14569, sum loss: 4373.123047, avg loss: 2.549926, ppl: 12.806153 +epoch: 1, batch: 14570, sum loss: 4557.996582, avg loss: 2.642317, ppl: 14.045711 +epoch: 1, batch: 14571, sum loss: 4680.953125, avg loss: 2.779664, ppl: 16.113598 +epoch: 1, batch: 14572, sum loss: 4048.970215, avg loss: 2.565887, ppl: 13.012197 +epoch: 1, batch: 14573, sum loss: 3894.651855, avg loss: 2.520810, ppl: 12.438672 +epoch: 1, batch: 14574, sum loss: 4786.038086, avg loss: 2.661868, ppl: 14.323014 +epoch: 1, batch: 14575, sum loss: 4730.450684, avg loss: 2.713970, ppl: 15.089067 +epoch: 1, batch: 14576, sum loss: 4676.985840, avg loss: 2.998068, ppl: 20.046766 +epoch: 1, batch: 14577, sum loss: 3810.856445, avg loss: 2.569694, ppl: 13.061831 +epoch: 1, batch: 14578, sum loss: 4405.000000, avg loss: 2.854828, ppl: 17.371456 +epoch: 1, batch: 14579, sum loss: 4924.050293, avg loss: 2.778810, ppl: 16.099844 +epoch: 1, batch: 14580, sum loss: 4927.706055, avg loss: 2.836906, ppl: 17.062895 +epoch: 1, batch: 14581, sum loss: 5028.680664, avg loss: 2.690573, ppl: 14.740116 +epoch: 1, batch: 14582, sum loss: 4295.528320, avg loss: 2.757079, ppl: 15.753754 +epoch: 1, batch: 14583, sum loss: 4903.719238, avg loss: 2.700286, ppl: 14.883986 +epoch: 1, batch: 14584, sum loss: 5146.744141, avg loss: 2.876883, ppl: 17.758837 +epoch: 1, batch: 14585, sum loss: 5446.850098, avg loss: 3.001019, ppl: 20.106024 +epoch: 1, batch: 14586, sum loss: 5078.316895, avg loss: 2.846590, ppl: 17.228935 +epoch: 1, batch: 14587, sum loss: 4374.767090, avg loss: 2.518576, ppl: 12.410915 +epoch: 1, batch: 14588, sum loss: 4309.362793, avg loss: 2.818419, ppl: 16.750351 +epoch: 1, batch: 14589, sum loss: 4837.313477, avg loss: 2.812392, ppl: 16.649689 +epoch: 1, batch: 14590, sum loss: 3506.832031, avg loss: 2.459209, ppl: 11.695557 +epoch: 1, batch: 14591, sum loss: 5077.527344, avg loss: 2.802167, ppl: 16.480328 +epoch: 1, batch: 14592, sum loss: 3976.221924, avg loss: 2.534240, ppl: 12.606840 +epoch: 1, batch: 14593, sum loss: 4817.615234, avg loss: 2.900431, ppl: 18.181974 +epoch: 1, batch: 14594, sum loss: 4183.450684, avg loss: 2.669720, ppl: 14.435925 +epoch: 1, batch: 14595, sum loss: 4976.256348, avg loss: 2.990539, ppl: 19.896400 +epoch: 1, batch: 14596, sum loss: 5299.295898, avg loss: 3.014389, ppl: 20.376638 +epoch: 1, batch: 14597, sum loss: 5268.727539, avg loss: 2.851043, ppl: 17.305822 +epoch: 1, batch: 14598, sum loss: 4107.286133, avg loss: 2.639644, ppl: 14.008218 +epoch: 1, batch: 14599, sum loss: 5187.413574, avg loss: 2.858079, ppl: 17.428019 +epoch: 1, batch: 14600, sum loss: 3316.929688, avg loss: 2.239655, ppl: 9.390096 +epoch: 1, batch: 14601, sum loss: 4180.499512, avg loss: 2.796321, ppl: 16.384253 +epoch: 1, batch: 14602, sum loss: 4098.819336, avg loss: 2.629134, ppl: 13.861756 +epoch: 1, batch: 14603, sum loss: 4540.470703, avg loss: 2.710729, ppl: 15.040235 +epoch: 1, batch: 14604, sum loss: 4851.274414, avg loss: 2.620894, ppl: 13.748008 +epoch: 1, batch: 14605, sum loss: 4120.468750, avg loss: 2.644717, ppl: 14.079460 +epoch: 1, batch: 14606, sum loss: 4589.923828, avg loss: 2.829793, ppl: 16.941950 +epoch: 1, batch: 14607, sum loss: 5575.926758, avg loss: 2.967497, ppl: 19.443195 +epoch: 1, batch: 14608, sum loss: 3871.378906, avg loss: 2.472145, ppl: 11.847832 +epoch: 1, batch: 14609, sum loss: 5335.267578, avg loss: 2.866882, ppl: 17.582117 +epoch: 1, batch: 14610, sum loss: 5647.320312, avg loss: 2.802640, ppl: 16.488125 +epoch: 1, batch: 14611, sum loss: 4951.125977, avg loss: 2.759825, ppl: 15.797078 +epoch: 1, batch: 14612, sum loss: 4719.780273, avg loss: 2.690867, ppl: 14.744450 +epoch: 1, batch: 14613, sum loss: 4310.990234, avg loss: 2.845538, ppl: 17.210817 +epoch: 1, batch: 14614, sum loss: 4681.374023, avg loss: 2.796520, ppl: 16.387518 +epoch: 1, batch: 14615, sum loss: 4534.418945, avg loss: 2.934899, ppl: 18.819605 +epoch: 1, batch: 14616, sum loss: 4344.520508, avg loss: 2.647483, ppl: 14.118463 +epoch: 1, batch: 14617, sum loss: 4158.074707, avg loss: 2.794405, ppl: 16.352896 +epoch: 1, batch: 14618, sum loss: 4898.033203, avg loss: 2.737861, ppl: 15.453893 +epoch: 1, batch: 14619, sum loss: 5520.728027, avg loss: 2.803823, ppl: 16.507639 +epoch: 1, batch: 14620, sum loss: 4731.485840, avg loss: 2.744482, ppl: 15.556547 +epoch: 1, batch: 14621, sum loss: 4344.014160, avg loss: 2.791783, ppl: 16.310072 +epoch: 1, batch: 14622, sum loss: 4907.589355, avg loss: 2.841685, ppl: 17.144623 +epoch: 1, batch: 14623, sum loss: 4286.787598, avg loss: 2.599629, ppl: 13.458739 +epoch: 1, batch: 14624, sum loss: 4718.820801, avg loss: 2.572967, ppl: 13.104643 +epoch: 1, batch: 14625, sum loss: 4836.689453, avg loss: 2.709630, ppl: 15.023716 +epoch: 1, batch: 14626, sum loss: 5289.724121, avg loss: 3.057644, ppl: 21.277367 +epoch: 1, batch: 14627, sum loss: 4044.073486, avg loss: 2.597350, ppl: 13.428102 +epoch: 1, batch: 14628, sum loss: 5357.161133, avg loss: 2.804796, ppl: 16.523712 +epoch: 1, batch: 14629, sum loss: 4825.262695, avg loss: 2.633877, ppl: 13.927664 +epoch: 1, batch: 14630, sum loss: 4736.759277, avg loss: 2.720712, ppl: 15.191133 +epoch: 1, batch: 14631, sum loss: 5243.483887, avg loss: 2.790572, ppl: 16.290331 +epoch: 1, batch: 14632, sum loss: 5315.978516, avg loss: 2.702582, ppl: 14.918196 +epoch: 1, batch: 14633, sum loss: 4918.980469, avg loss: 2.664670, ppl: 14.363205 +epoch: 1, batch: 14634, sum loss: 5310.599121, avg loss: 2.993573, ppl: 19.956865 +epoch: 1, batch: 14635, sum loss: 3303.789795, avg loss: 2.358166, ppl: 10.571540 +epoch: 1, batch: 14636, sum loss: 5023.554688, avg loss: 3.028062, ppl: 20.657158 +epoch: 1, batch: 14637, sum loss: 4584.219727, avg loss: 2.699776, ppl: 14.876402 +epoch: 1, batch: 14638, sum loss: 4179.479492, avg loss: 2.427108, ppl: 11.326075 +epoch: 1, batch: 14639, sum loss: 5665.241211, avg loss: 3.040924, ppl: 20.924564 +epoch: 1, batch: 14640, sum loss: 4066.627197, avg loss: 2.775855, ppl: 16.052340 +epoch: 1, batch: 14641, sum loss: 4622.404297, avg loss: 2.813393, ppl: 16.666365 +epoch: 1, batch: 14642, sum loss: 4492.151855, avg loss: 2.589137, ppl: 13.318268 +epoch: 1, batch: 14643, sum loss: 3807.880127, avg loss: 2.503537, ppl: 12.225665 +epoch: 1, batch: 14644, sum loss: 4961.012695, avg loss: 2.650114, ppl: 14.155650 +epoch: 1, batch: 14645, sum loss: 5481.214844, avg loss: 2.810879, ppl: 16.624529 +epoch: 1, batch: 14646, sum loss: 4318.213867, avg loss: 2.794960, ppl: 16.361982 +epoch: 1, batch: 14647, sum loss: 6166.569336, avg loss: 3.203413, ppl: 24.616392 +epoch: 1, batch: 14648, sum loss: 3867.655273, avg loss: 2.618589, ppl: 13.716352 +epoch: 1, batch: 14649, sum loss: 4572.996094, avg loss: 2.647942, ppl: 14.124944 +epoch: 1, batch: 14650, sum loss: 4912.668945, avg loss: 2.973771, ppl: 19.565556 +epoch: 1, batch: 14651, sum loss: 4582.826172, avg loss: 2.626261, ppl: 13.821996 +epoch: 1, batch: 14652, sum loss: 5038.067871, avg loss: 2.781926, ppl: 16.150099 +epoch: 1, batch: 14653, sum loss: 4483.309570, avg loss: 2.823243, ppl: 16.831345 +epoch: 1, batch: 14654, sum loss: 4494.857422, avg loss: 2.832299, ppl: 16.984455 +epoch: 1, batch: 14655, sum loss: 4648.325684, avg loss: 2.707237, ppl: 14.987803 +epoch: 1, batch: 14656, sum loss: 3806.065674, avg loss: 2.364016, ppl: 10.633569 +epoch: 1, batch: 14657, sum loss: 4170.155273, avg loss: 2.590158, ppl: 13.331884 +epoch: 1, batch: 14658, sum loss: 4838.119629, avg loss: 2.693830, ppl: 14.788214 +epoch: 1, batch: 14659, sum loss: 5204.457031, avg loss: 2.755139, ppl: 15.723221 +epoch: 1, batch: 14660, sum loss: 4299.684570, avg loss: 2.810251, ppl: 16.614096 +epoch: 1, batch: 14661, sum loss: 5199.738770, avg loss: 2.874372, ppl: 17.714296 +epoch: 1, batch: 14662, sum loss: 4691.133301, avg loss: 2.843111, ppl: 17.169096 +epoch: 1, batch: 14663, sum loss: 3499.512695, avg loss: 2.401862, ppl: 11.043717 +epoch: 1, batch: 14664, sum loss: 4167.906738, avg loss: 2.498745, ppl: 12.167217 +epoch: 1, batch: 14665, sum loss: 4871.307617, avg loss: 2.879023, ppl: 17.796883 +epoch: 1, batch: 14666, sum loss: 4645.110840, avg loss: 2.695944, ppl: 14.819496 +epoch: 1, batch: 14667, sum loss: 4275.256836, avg loss: 2.678732, ppl: 14.566617 +epoch: 1, batch: 14668, sum loss: 4147.362305, avg loss: 2.761226, ppl: 15.819232 +epoch: 1, batch: 14669, sum loss: 3878.881592, avg loss: 2.481690, ppl: 11.961464 +epoch: 1, batch: 14670, sum loss: 4152.284668, avg loss: 2.629693, ppl: 13.869505 +epoch: 1, batch: 14671, sum loss: 4408.942871, avg loss: 2.701558, ppl: 14.902934 +epoch: 1, batch: 14672, sum loss: 5381.269043, avg loss: 2.810062, ppl: 16.610950 +epoch: 1, batch: 14673, sum loss: 4118.203613, avg loss: 2.561072, ppl: 12.949690 +epoch: 1, batch: 14674, sum loss: 4849.825195, avg loss: 2.951811, ppl: 19.140579 +epoch: 1, batch: 14675, sum loss: 4162.080566, avg loss: 2.554991, ppl: 12.871184 +epoch: 1, batch: 14676, sum loss: 5338.193848, avg loss: 2.865375, ppl: 17.555635 +epoch: 1, batch: 14677, sum loss: 5169.071777, avg loss: 2.950384, ppl: 19.113285 +epoch: 1, batch: 14678, sum loss: 3555.712402, avg loss: 2.432088, ppl: 11.382623 +epoch: 1, batch: 14679, sum loss: 3609.587158, avg loss: 2.414439, ppl: 11.183499 +epoch: 1, batch: 14680, sum loss: 4377.295898, avg loss: 2.582475, ppl: 13.229847 +epoch: 1, batch: 14681, sum loss: 4502.209961, avg loss: 2.628260, ppl: 13.849656 +epoch: 1, batch: 14682, sum loss: 5068.705078, avg loss: 2.847587, ppl: 17.246119 +epoch: 1, batch: 14683, sum loss: 5031.919922, avg loss: 2.775466, ppl: 16.046106 +epoch: 1, batch: 14684, sum loss: 4532.576660, avg loss: 2.506956, ppl: 12.267532 +epoch: 1, batch: 14685, sum loss: 4152.814453, avg loss: 2.569811, ppl: 13.063354 +epoch: 1, batch: 14686, sum loss: 3750.890625, avg loss: 2.549892, ppl: 12.805717 +epoch: 1, batch: 14687, sum loss: 4275.047363, avg loss: 2.627564, ppl: 13.840020 +epoch: 1, batch: 14688, sum loss: 4755.555176, avg loss: 2.714358, ppl: 15.094917 +epoch: 1, batch: 14689, sum loss: 5152.586914, avg loss: 2.669734, ppl: 14.436132 +epoch: 1, batch: 14690, sum loss: 4069.553711, avg loss: 2.588775, ppl: 13.313448 +epoch: 1, batch: 14691, sum loss: 4683.397461, avg loss: 2.784422, ppl: 16.190453 +epoch: 1, batch: 14692, sum loss: 4205.943848, avg loss: 2.596262, ppl: 13.413501 +epoch: 1, batch: 14693, sum loss: 3708.633545, avg loss: 2.613554, ppl: 13.647474 +epoch: 1, batch: 14694, sum loss: 4932.941406, avg loss: 2.785399, ppl: 16.206278 +epoch: 1, batch: 14695, sum loss: 3869.396240, avg loss: 2.709661, ppl: 15.024185 +epoch: 1, batch: 14696, sum loss: 5212.348633, avg loss: 2.721853, ppl: 15.208478 +epoch: 1, batch: 14697, sum loss: 6012.207520, avg loss: 3.107084, ppl: 22.355755 +epoch: 1, batch: 14698, sum loss: 4284.425781, avg loss: 2.610863, ppl: 13.610788 +epoch: 1, batch: 14699, sum loss: 3493.065674, avg loss: 2.796690, ppl: 16.390305 +epoch: 1, batch: 14700, sum loss: 4888.536621, avg loss: 2.568858, ppl: 13.050911 +epoch: 1, batch: 14701, sum loss: 5034.654297, avg loss: 2.915260, ppl: 18.453615 +epoch: 1, batch: 14702, sum loss: 4738.372559, avg loss: 2.910548, ppl: 18.366865 +epoch: 1, batch: 14703, sum loss: 4480.708496, avg loss: 2.606579, ppl: 13.552603 +epoch: 1, batch: 14704, sum loss: 4739.240234, avg loss: 2.777984, ppl: 16.086552 +epoch: 1, batch: 14705, sum loss: 5851.160645, avg loss: 3.218460, ppl: 24.989609 +epoch: 1, batch: 14706, sum loss: 4847.310059, avg loss: 2.760427, ppl: 15.806595 +epoch: 1, batch: 14707, sum loss: 4012.278809, avg loss: 2.596944, ppl: 13.422660 +epoch: 1, batch: 14708, sum loss: 3634.008057, avg loss: 2.483943, ppl: 11.988439 +epoch: 1, batch: 14709, sum loss: 4191.417969, avg loss: 2.629497, ppl: 13.866791 +epoch: 1, batch: 14710, sum loss: 3928.135254, avg loss: 2.495639, ppl: 12.129478 +epoch: 1, batch: 14711, sum loss: 4534.956543, avg loss: 2.751794, ppl: 15.670721 +epoch: 1, batch: 14712, sum loss: 3975.926758, avg loss: 2.617463, ppl: 13.700925 +epoch: 1, batch: 14713, sum loss: 5198.529297, avg loss: 2.782938, ppl: 16.166456 +epoch: 1, batch: 14714, sum loss: 4333.145020, avg loss: 2.565509, ppl: 13.007281 +epoch: 1, batch: 14715, sum loss: 4140.655762, avg loss: 2.639041, ppl: 13.999774 +epoch: 1, batch: 14716, sum loss: 4729.641113, avg loss: 2.643734, ppl: 14.065633 +epoch: 1, batch: 14717, sum loss: 4480.376953, avg loss: 2.662137, ppl: 14.326877 +epoch: 1, batch: 14718, sum loss: 3960.317627, avg loss: 2.414828, ppl: 11.187844 +epoch: 1, batch: 14719, sum loss: 4931.218750, avg loss: 2.709461, ppl: 15.021176 +epoch: 1, batch: 14720, sum loss: 5136.733887, avg loss: 2.726504, ppl: 15.279382 +epoch: 1, batch: 14721, sum loss: 3785.437012, avg loss: 2.537156, ppl: 12.643665 +epoch: 1, batch: 14722, sum loss: 4254.737305, avg loss: 2.478007, ppl: 11.917484 +epoch: 1, batch: 14723, sum loss: 4664.637695, avg loss: 2.642854, ppl: 14.053257 +epoch: 1, batch: 14724, sum loss: 5130.848633, avg loss: 2.867998, ppl: 17.601746 +epoch: 1, batch: 14725, sum loss: 4440.972168, avg loss: 2.649745, ppl: 14.150426 +epoch: 1, batch: 14726, sum loss: 3479.463135, avg loss: 2.257926, ppl: 9.563230 +epoch: 1, batch: 14727, sum loss: 4515.052734, avg loss: 2.848614, ppl: 17.263834 +epoch: 1, batch: 14728, sum loss: 4249.215820, avg loss: 2.674145, ppl: 14.499944 +epoch: 1, batch: 14729, sum loss: 4545.992188, avg loss: 2.846583, ppl: 17.228804 +epoch: 1, batch: 14730, sum loss: 5196.387207, avg loss: 2.811898, ppl: 16.641470 +epoch: 1, batch: 14731, sum loss: 4517.826172, avg loss: 2.673270, ppl: 14.487265 +epoch: 1, batch: 14732, sum loss: 4561.089844, avg loss: 2.714935, ppl: 15.103622 +epoch: 1, batch: 14733, sum loss: 4512.262695, avg loss: 2.620362, ppl: 13.740691 +epoch: 1, batch: 14734, sum loss: 4568.912109, avg loss: 2.671878, ppl: 14.467118 +epoch: 1, batch: 14735, sum loss: 5884.905762, avg loss: 3.052337, ppl: 21.164753 +epoch: 1, batch: 14736, sum loss: 4881.259277, avg loss: 2.833000, ppl: 16.996376 +epoch: 1, batch: 14737, sum loss: 3974.315674, avg loss: 2.742799, ppl: 15.530395 +epoch: 1, batch: 14738, sum loss: 4730.928711, avg loss: 2.635615, ppl: 13.951888 +epoch: 1, batch: 14739, sum loss: 5033.243652, avg loss: 2.774666, ppl: 16.033268 +epoch: 1, batch: 14740, sum loss: 4543.286133, avg loss: 2.802768, ppl: 16.490221 +epoch: 1, batch: 14741, sum loss: 5995.683594, avg loss: 2.766813, ppl: 15.907851 +epoch: 1, batch: 14742, sum loss: 4825.953613, avg loss: 2.700590, ppl: 14.888508 +epoch: 1, batch: 14743, sum loss: 3855.701904, avg loss: 2.555137, ppl: 12.873065 +epoch: 1, batch: 14744, sum loss: 4412.614258, avg loss: 2.522936, ppl: 12.465136 +epoch: 1, batch: 14745, sum loss: 3939.242676, avg loss: 2.659853, ppl: 14.294190 +epoch: 1, batch: 14746, sum loss: 4050.654297, avg loss: 2.707657, ppl: 14.994098 +epoch: 1, batch: 14747, sum loss: 3914.466797, avg loss: 2.527093, ppl: 12.517065 +epoch: 1, batch: 14748, sum loss: 3509.904785, avg loss: 2.525111, ppl: 12.492288 +epoch: 1, batch: 14749, sum loss: 4176.348633, avg loss: 2.762136, ppl: 15.833620 +epoch: 1, batch: 14750, sum loss: 5115.035645, avg loss: 3.015941, ppl: 20.408283 +epoch: 1, batch: 14751, sum loss: 4461.278320, avg loss: 2.618121, ppl: 13.709940 +epoch: 1, batch: 14752, sum loss: 4549.894531, avg loss: 2.671694, ppl: 14.464448 +epoch: 1, batch: 14753, sum loss: 4166.997070, avg loss: 2.493715, ppl: 12.106165 +epoch: 1, batch: 14754, sum loss: 4625.135742, avg loss: 3.022965, ppl: 20.552134 +epoch: 1, batch: 14755, sum loss: 3337.116699, avg loss: 2.568989, ppl: 13.052619 +epoch: 1, batch: 14756, sum loss: 5074.564941, avg loss: 2.800533, ppl: 16.453407 +epoch: 1, batch: 14757, sum loss: 4018.755859, avg loss: 2.631798, ppl: 13.898741 +epoch: 1, batch: 14758, sum loss: 4123.223633, avg loss: 2.486866, ppl: 12.023535 +epoch: 1, batch: 14759, sum loss: 4009.193848, avg loss: 2.558516, ppl: 12.916632 +epoch: 1, batch: 14760, sum loss: 4826.610352, avg loss: 2.678474, ppl: 14.562856 +epoch: 1, batch: 14761, sum loss: 3845.488037, avg loss: 2.577405, ppl: 13.162935 +epoch: 1, batch: 14762, sum loss: 4478.748047, avg loss: 2.650147, ppl: 14.156116 +epoch: 1, batch: 14763, sum loss: 4328.357422, avg loss: 2.441262, ppl: 11.487529 +epoch: 1, batch: 14764, sum loss: 4566.803223, avg loss: 2.840052, ppl: 17.116650 +epoch: 1, batch: 14765, sum loss: 4576.476562, avg loss: 2.737127, ppl: 15.442559 +epoch: 1, batch: 14766, sum loss: 4366.886230, avg loss: 2.760358, ppl: 15.805498 +epoch: 1, batch: 14767, sum loss: 3892.180176, avg loss: 2.364630, ppl: 10.640096 +epoch: 1, batch: 14768, sum loss: 4775.061523, avg loss: 2.708486, ppl: 15.006543 +epoch: 1, batch: 14769, sum loss: 4345.626465, avg loss: 2.752138, ppl: 15.676118 +epoch: 1, batch: 14770, sum loss: 4962.365234, avg loss: 2.806768, ppl: 16.556316 +epoch: 1, batch: 14771, sum loss: 4503.301758, avg loss: 2.557241, ppl: 12.900179 +epoch: 1, batch: 14772, sum loss: 4294.164062, avg loss: 2.799325, ppl: 16.433542 +epoch: 1, batch: 14773, sum loss: 5294.891602, avg loss: 2.813439, ppl: 16.667131 +epoch: 1, batch: 14774, sum loss: 4344.339355, avg loss: 2.716910, ppl: 15.133490 +epoch: 1, batch: 14775, sum loss: 4177.798340, avg loss: 2.746745, ppl: 15.591792 +epoch: 1, batch: 14776, sum loss: 4907.776855, avg loss: 2.763388, ppl: 15.853462 +epoch: 1, batch: 14777, sum loss: 5114.757812, avg loss: 2.836804, ppl: 17.061153 +epoch: 1, batch: 14778, sum loss: 4327.716309, avg loss: 2.607058, ppl: 13.559102 +epoch: 1, batch: 14779, sum loss: 5428.727051, avg loss: 2.968140, ppl: 19.455692 +epoch: 1, batch: 14780, sum loss: 4549.686523, avg loss: 2.628357, ppl: 13.850996 +epoch: 1, batch: 14781, sum loss: 4413.527832, avg loss: 2.532145, ppl: 12.580457 +epoch: 1, batch: 14782, sum loss: 4253.187988, avg loss: 2.551402, ppl: 12.825076 +epoch: 1, batch: 14783, sum loss: 4309.586914, avg loss: 2.663527, ppl: 14.346802 +epoch: 1, batch: 14784, sum loss: 4229.837402, avg loss: 2.655265, ppl: 14.228758 +epoch: 1, batch: 14785, sum loss: 4701.179688, avg loss: 2.706494, ppl: 14.976673 +epoch: 1, batch: 14786, sum loss: 4591.824219, avg loss: 2.617916, ppl: 13.707123 +epoch: 1, batch: 14787, sum loss: 5051.888184, avg loss: 2.878569, ppl: 17.788794 +epoch: 1, batch: 14788, sum loss: 4438.116211, avg loss: 2.648041, ppl: 14.126335 +epoch: 1, batch: 14789, sum loss: 4321.687012, avg loss: 2.569374, ppl: 13.057646 +epoch: 1, batch: 14790, sum loss: 4495.101562, avg loss: 2.694905, ppl: 14.804113 +epoch: 1, batch: 14791, sum loss: 5030.590820, avg loss: 2.886168, ppl: 17.924492 +epoch: 1, batch: 14792, sum loss: 4701.265625, avg loss: 2.793384, ppl: 16.336210 +epoch: 1, batch: 14793, sum loss: 4637.541504, avg loss: 2.645489, ppl: 14.090330 +epoch: 1, batch: 14794, sum loss: 4201.542480, avg loss: 2.652489, ppl: 14.189311 +epoch: 1, batch: 14795, sum loss: 4492.133789, avg loss: 2.667538, ppl: 14.404461 +epoch: 1, batch: 14796, sum loss: 4358.932129, avg loss: 2.500822, ppl: 12.192507 +epoch: 1, batch: 14797, sum loss: 3951.884766, avg loss: 2.473019, ppl: 11.858197 +epoch: 1, batch: 14798, sum loss: 4627.491699, avg loss: 2.860007, ppl: 17.461655 +epoch: 1, batch: 14799, sum loss: 5205.357422, avg loss: 2.765865, ppl: 15.892776 +epoch: 1, batch: 14800, sum loss: 4420.023438, avg loss: 2.630966, ppl: 13.887184 +epoch: 1, batch: 14801, sum loss: 4355.605469, avg loss: 2.595712, ppl: 13.406135 +epoch: 1, batch: 14802, sum loss: 4424.223145, avg loss: 2.533919, ppl: 12.602804 +epoch: 1, batch: 14803, sum loss: 4370.622559, avg loss: 2.724827, ppl: 15.253776 +epoch: 1, batch: 14804, sum loss: 4614.924316, avg loss: 2.717859, ppl: 15.147857 +epoch: 1, batch: 14805, sum loss: 5571.542480, avg loss: 2.721809, ppl: 15.207804 +epoch: 1, batch: 14806, sum loss: 3202.118896, avg loss: 2.313670, ppl: 10.111463 +epoch: 1, batch: 14807, sum loss: 5221.920898, avg loss: 2.881855, ppl: 17.847345 +epoch: 1, batch: 14808, sum loss: 3910.864258, avg loss: 2.489411, ppl: 12.054173 +epoch: 1, batch: 14809, sum loss: 5312.791016, avg loss: 2.991436, ppl: 19.914268 +epoch: 1, batch: 14810, sum loss: 5058.161133, avg loss: 2.923793, ppl: 18.611740 +epoch: 1, batch: 14811, sum loss: 4591.619141, avg loss: 2.700953, ppl: 14.893912 +epoch: 1, batch: 14812, sum loss: 3624.429199, avg loss: 2.429242, ppl: 11.350277 +epoch: 1, batch: 14813, sum loss: 4002.539551, avg loss: 2.638457, ppl: 13.991599 +epoch: 1, batch: 14814, sum loss: 5126.174805, avg loss: 2.795079, ppl: 16.363918 +epoch: 1, batch: 14815, sum loss: 4733.052246, avg loss: 2.684658, ppl: 14.653190 +epoch: 1, batch: 14816, sum loss: 3406.448486, avg loss: 2.273998, ppl: 9.718172 +epoch: 1, batch: 14817, sum loss: 4975.572266, avg loss: 2.723356, ppl: 15.231360 +epoch: 1, batch: 14818, sum loss: 3793.805176, avg loss: 2.363741, ppl: 10.630651 +epoch: 1, batch: 14819, sum loss: 5291.980957, avg loss: 2.772122, ppl: 15.992537 +epoch: 1, batch: 14820, sum loss: 4788.499512, avg loss: 2.737850, ppl: 15.453723 +epoch: 1, batch: 14821, sum loss: 5266.870117, avg loss: 2.565451, ppl: 13.006518 +epoch: 1, batch: 14822, sum loss: 4335.658691, avg loss: 2.859933, ppl: 17.460363 +epoch: 1, batch: 14823, sum loss: 4743.893555, avg loss: 2.578203, ppl: 13.173444 +epoch: 1, batch: 14824, sum loss: 4494.369141, avg loss: 2.656246, ppl: 14.242727 +epoch: 1, batch: 14825, sum loss: 5555.379883, avg loss: 2.853303, ppl: 17.344973 +epoch: 1, batch: 14826, sum loss: 4990.301758, avg loss: 2.696003, ppl: 14.820379 +epoch: 1, batch: 14827, sum loss: 5561.800781, avg loss: 2.933439, ppl: 18.792154 +epoch: 1, batch: 14828, sum loss: 4648.941406, avg loss: 2.571317, ppl: 13.083046 +epoch: 1, batch: 14829, sum loss: 4182.911133, avg loss: 2.473632, ppl: 11.865462 +epoch: 1, batch: 14830, sum loss: 4444.334473, avg loss: 2.695169, ppl: 14.808027 +epoch: 1, batch: 14831, sum loss: 4818.820312, avg loss: 2.853061, ppl: 17.340784 +epoch: 1, batch: 14832, sum loss: 4591.972656, avg loss: 2.777963, ppl: 16.086218 +epoch: 1, batch: 14833, sum loss: 4358.626465, avg loss: 2.627261, ppl: 13.835824 +epoch: 1, batch: 14834, sum loss: 3611.338135, avg loss: 2.583218, ppl: 13.239670 +epoch: 1, batch: 14835, sum loss: 4495.971191, avg loss: 2.706786, ppl: 14.981044 +epoch: 1, batch: 14836, sum loss: 4740.287598, avg loss: 2.736887, ppl: 15.438842 +epoch: 1, batch: 14837, sum loss: 5041.075684, avg loss: 2.780516, ppl: 16.127342 +epoch: 1, batch: 14838, sum loss: 4736.320312, avg loss: 3.014844, ppl: 20.385920 +epoch: 1, batch: 14839, sum loss: 4900.812012, avg loss: 2.742480, ppl: 15.525445 +epoch: 1, batch: 14840, sum loss: 3575.513916, avg loss: 2.493385, ppl: 12.102171 +epoch: 1, batch: 14841, sum loss: 5091.762695, avg loss: 2.839801, ppl: 17.112356 +epoch: 1, batch: 14842, sum loss: 4991.153320, avg loss: 2.748432, ppl: 15.618130 +epoch: 1, batch: 14843, sum loss: 4028.381836, avg loss: 2.603996, ppl: 13.517648 +epoch: 1, batch: 14844, sum loss: 4022.043457, avg loss: 2.467511, ppl: 11.793062 +epoch: 1, batch: 14845, sum loss: 3982.097656, avg loss: 2.579079, ppl: 13.184988 +epoch: 1, batch: 14846, sum loss: 4778.593262, avg loss: 3.045630, ppl: 21.023266 +epoch: 1, batch: 14847, sum loss: 4097.616211, avg loss: 2.750078, ppl: 15.643855 +epoch: 1, batch: 14848, sum loss: 4473.288086, avg loss: 2.648483, ppl: 14.132587 +epoch: 1, batch: 14849, sum loss: 4009.341553, avg loss: 2.535953, ppl: 12.628458 +epoch: 1, batch: 14850, sum loss: 4772.660645, avg loss: 2.704057, ppl: 14.940221 +epoch: 1, batch: 14851, sum loss: 4666.169922, avg loss: 2.535962, ppl: 12.628572 +epoch: 1, batch: 14852, sum loss: 4842.363770, avg loss: 2.660640, ppl: 14.305435 +epoch: 1, batch: 14853, sum loss: 3964.438965, avg loss: 2.673256, ppl: 14.487068 +epoch: 1, batch: 14854, sum loss: 4158.842285, avg loss: 2.562441, ppl: 12.967437 +epoch: 1, batch: 14855, sum loss: 4184.141602, avg loss: 2.531241, ppl: 12.569097 +epoch: 1, batch: 14856, sum loss: 3647.500977, avg loss: 2.500001, ppl: 12.182503 +epoch: 1, batch: 14857, sum loss: 5457.971680, avg loss: 2.998886, ppl: 20.063168 +epoch: 1, batch: 14858, sum loss: 5302.717285, avg loss: 2.816100, ppl: 16.711554 +epoch: 1, batch: 14859, sum loss: 4787.128418, avg loss: 2.825932, ppl: 16.876663 +epoch: 1, batch: 14860, sum loss: 4834.178711, avg loss: 2.928031, ppl: 18.690792 +epoch: 1, batch: 14861, sum loss: 4046.490234, avg loss: 2.513348, ppl: 12.346194 +epoch: 1, batch: 14862, sum loss: 4002.321777, avg loss: 2.664662, ppl: 14.363089 +epoch: 1, batch: 14863, sum loss: 3541.818848, avg loss: 2.463017, ppl: 11.740181 +epoch: 1, batch: 14864, sum loss: 4801.652344, avg loss: 2.781954, ppl: 16.150545 +epoch: 1, batch: 14865, sum loss: 4577.968262, avg loss: 2.791444, ppl: 16.304548 +epoch: 1, batch: 14866, sum loss: 4663.039551, avg loss: 2.602143, ppl: 13.492620 +epoch: 1, batch: 14867, sum loss: 3861.430908, avg loss: 2.496077, ppl: 12.134793 +epoch: 1, batch: 14868, sum loss: 5627.068359, avg loss: 2.841954, ppl: 17.149235 +epoch: 1, batch: 14869, sum loss: 4646.769531, avg loss: 2.637213, ppl: 13.974206 +epoch: 1, batch: 14870, sum loss: 3891.804199, avg loss: 2.494746, ppl: 12.118660 +epoch: 1, batch: 14871, sum loss: 4496.359375, avg loss: 2.620256, ppl: 13.739243 +epoch: 1, batch: 14872, sum loss: 3896.723145, avg loss: 2.510775, ppl: 12.314474 +epoch: 1, batch: 14873, sum loss: 3799.068359, avg loss: 2.486301, ppl: 12.016749 +epoch: 1, batch: 14874, sum loss: 4360.247559, avg loss: 2.668450, ppl: 14.417606 +epoch: 1, batch: 14875, sum loss: 3813.229004, avg loss: 2.520310, ppl: 12.432452 +epoch: 1, batch: 14876, sum loss: 4156.680176, avg loss: 2.841203, ppl: 17.136372 +epoch: 1, batch: 14877, sum loss: 4370.674316, avg loss: 2.697947, ppl: 14.849219 +epoch: 1, batch: 14878, sum loss: 3774.676758, avg loss: 2.405785, ppl: 11.087132 +epoch: 1, batch: 14879, sum loss: 4876.617188, avg loss: 2.643153, ppl: 14.057456 +epoch: 1, batch: 14880, sum loss: 4864.628418, avg loss: 2.571157, ppl: 13.080947 +epoch: 1, batch: 14881, sum loss: 5140.966309, avg loss: 2.797044, ppl: 16.396101 +epoch: 1, batch: 14882, sum loss: 4587.628906, avg loss: 2.912780, ppl: 18.407902 +epoch: 1, batch: 14883, sum loss: 3984.885010, avg loss: 2.551143, ppl: 12.821747 +epoch: 1, batch: 14884, sum loss: 4787.241699, avg loss: 2.561392, ppl: 12.953835 +epoch: 1, batch: 14885, sum loss: 4068.998535, avg loss: 2.678735, ppl: 14.566655 +epoch: 1, batch: 14886, sum loss: 5500.401855, avg loss: 3.091850, ppl: 22.017780 +epoch: 1, batch: 14887, sum loss: 4129.853516, avg loss: 2.786676, ppl: 16.226990 +epoch: 1, batch: 14888, sum loss: 4873.157227, avg loss: 2.582489, ppl: 13.230030 +epoch: 1, batch: 14889, sum loss: 4541.299805, avg loss: 2.801542, ppl: 16.470024 +epoch: 1, batch: 14890, sum loss: 3832.749268, avg loss: 2.580976, ppl: 13.210022 +epoch: 1, batch: 14891, sum loss: 4242.095703, avg loss: 2.794529, ppl: 16.354931 +epoch: 1, batch: 14892, sum loss: 4345.304199, avg loss: 2.930077, ppl: 18.729074 +epoch: 1, batch: 14893, sum loss: 4773.421875, avg loss: 2.786586, ppl: 16.225533 +epoch: 1, batch: 14894, sum loss: 4259.090820, avg loss: 2.846986, ppl: 17.235752 +epoch: 1, batch: 14895, sum loss: 4892.465332, avg loss: 2.802099, ppl: 16.479208 +epoch: 1, batch: 14896, sum loss: 3138.606934, avg loss: 2.392231, ppl: 10.937866 +epoch: 1, batch: 14897, sum loss: 3849.743408, avg loss: 2.493357, ppl: 12.101836 +epoch: 1, batch: 14898, sum loss: 4258.836914, avg loss: 2.449015, ppl: 11.576937 +epoch: 1, batch: 14899, sum loss: 5256.769531, avg loss: 2.673840, ppl: 14.495526 +epoch: 1, batch: 14900, sum loss: 4625.676270, avg loss: 2.664560, ppl: 14.361630 +epoch: 1, batch: 14901, sum loss: 4841.338867, avg loss: 2.700133, ppl: 14.881716 +epoch: 1, batch: 14902, sum loss: 4732.539551, avg loss: 2.679807, ppl: 14.582281 +epoch: 1, batch: 14903, sum loss: 5690.956055, avg loss: 2.956341, ppl: 19.227486 +epoch: 1, batch: 14904, sum loss: 5162.133301, avg loss: 2.891952, ppl: 18.028460 +epoch: 1, batch: 14905, sum loss: 4831.376953, avg loss: 2.693075, ppl: 14.777048 +epoch: 1, batch: 14906, sum loss: 4639.970703, avg loss: 2.732610, ppl: 15.372951 +epoch: 1, batch: 14907, sum loss: 4302.200195, avg loss: 2.536675, ppl: 12.637578 +epoch: 1, batch: 14908, sum loss: 4725.788086, avg loss: 2.760390, ppl: 15.806006 +epoch: 1, batch: 14909, sum loss: 4708.802246, avg loss: 2.696909, ppl: 14.833805 +epoch: 1, batch: 14910, sum loss: 5364.722656, avg loss: 2.870370, ppl: 17.643553 +epoch: 1, batch: 14911, sum loss: 5512.497559, avg loss: 2.798222, ppl: 16.415436 +epoch: 1, batch: 14912, sum loss: 3967.288086, avg loss: 2.764661, ppl: 15.873652 +epoch: 1, batch: 14913, sum loss: 4116.291016, avg loss: 2.526882, ppl: 12.514427 +epoch: 1, batch: 14914, sum loss: 4152.172363, avg loss: 2.599983, ppl: 13.463506 +epoch: 1, batch: 14915, sum loss: 5311.739258, avg loss: 2.979102, ppl: 19.670153 +epoch: 1, batch: 14916, sum loss: 5015.442383, avg loss: 2.713984, ppl: 15.089272 +epoch: 1, batch: 14917, sum loss: 4698.004883, avg loss: 2.761908, ppl: 15.830011 +epoch: 1, batch: 14918, sum loss: 4829.763672, avg loss: 2.728680, ppl: 15.312663 +epoch: 1, batch: 14919, sum loss: 4411.969238, avg loss: 2.501116, ppl: 12.196100 +epoch: 1, batch: 14920, sum loss: 4164.362793, avg loss: 2.594619, ppl: 13.391479 +epoch: 1, batch: 14921, sum loss: 4157.439941, avg loss: 2.422751, ppl: 11.276836 +epoch: 1, batch: 14922, sum loss: 4205.355469, avg loss: 2.482500, ppl: 11.971159 +epoch: 1, batch: 14923, sum loss: 4221.750000, avg loss: 2.630374, ppl: 13.878956 +epoch: 1, batch: 14924, sum loss: 4705.158691, avg loss: 2.797359, ppl: 16.401281 +epoch: 1, batch: 14925, sum loss: 4498.680176, avg loss: 2.861756, ppl: 17.492214 +epoch: 1, batch: 14926, sum loss: 4018.236328, avg loss: 2.441213, ppl: 11.486965 +epoch: 1, batch: 14927, sum loss: 4350.797852, avg loss: 2.630470, ppl: 13.880296 +epoch: 1, batch: 14928, sum loss: 4017.366699, avg loss: 2.651727, ppl: 14.178507 +epoch: 1, batch: 14929, sum loss: 4574.946289, avg loss: 2.642950, ppl: 14.054601 +epoch: 1, batch: 14930, sum loss: 4853.047852, avg loss: 2.673856, ppl: 14.495750 +epoch: 1, batch: 14931, sum loss: 3610.189209, avg loss: 2.406793, ppl: 11.098310 +epoch: 1, batch: 14932, sum loss: 4539.269531, avg loss: 2.437846, ppl: 11.448357 +epoch: 1, batch: 14933, sum loss: 4921.584961, avg loss: 2.754105, ppl: 15.706971 +epoch: 1, batch: 14934, sum loss: 4940.284180, avg loss: 2.752248, ppl: 15.677830 +epoch: 1, batch: 14935, sum loss: 5104.717773, avg loss: 3.058549, ppl: 21.296631 +epoch: 1, batch: 14936, sum loss: 4271.144043, avg loss: 2.574529, ppl: 13.125136 +epoch: 1, batch: 14937, sum loss: 4472.239258, avg loss: 2.791660, ppl: 16.308065 +epoch: 1, batch: 14938, sum loss: 5629.663086, avg loss: 2.978658, ppl: 19.661407 +epoch: 1, batch: 14939, sum loss: 4959.378906, avg loss: 2.851857, ppl: 17.319914 +epoch: 1, batch: 14940, sum loss: 5018.095703, avg loss: 2.924298, ppl: 18.621155 +epoch: 1, batch: 14941, sum loss: 3858.540283, avg loss: 2.596595, ppl: 13.417973 +epoch: 1, batch: 14942, sum loss: 4306.210938, avg loss: 2.735839, ppl: 15.422680 +epoch: 1, batch: 14943, sum loss: 3324.610596, avg loss: 2.640676, ppl: 14.022676 +epoch: 1, batch: 14944, sum loss: 4269.614258, avg loss: 2.581387, ppl: 13.215459 +epoch: 1, batch: 14945, sum loss: 3175.732910, avg loss: 2.328250, ppl: 10.259970 +epoch: 1, batch: 14946, sum loss: 4368.884766, avg loss: 2.510853, ppl: 12.315434 +epoch: 1, batch: 14947, sum loss: 4411.147949, avg loss: 2.492174, ppl: 12.087527 +epoch: 1, batch: 14948, sum loss: 4457.685059, avg loss: 2.823106, ppl: 16.829046 +epoch: 1, batch: 14949, sum loss: 4740.853027, avg loss: 2.635271, ppl: 13.947096 +epoch: 1, batch: 14950, sum loss: 4661.581055, avg loss: 2.651639, ppl: 14.177253 +epoch: 1, batch: 14951, sum loss: 4253.168945, avg loss: 2.598149, ppl: 13.438834 +epoch: 1, batch: 14952, sum loss: 4022.312500, avg loss: 2.743733, ppl: 15.544905 +epoch: 1, batch: 14953, sum loss: 4385.388184, avg loss: 2.520338, ppl: 12.432799 +epoch: 1, batch: 14954, sum loss: 4005.109619, avg loss: 2.551025, ppl: 12.820243 +epoch: 1, batch: 14955, sum loss: 4618.230469, avg loss: 2.607697, ppl: 13.567762 +epoch: 1, batch: 14956, sum loss: 4342.139648, avg loss: 2.729189, ppl: 15.320456 +epoch: 1, batch: 14957, sum loss: 5057.704102, avg loss: 2.736853, ppl: 15.438322 +epoch: 1, batch: 14958, sum loss: 4879.242188, avg loss: 2.677959, ppl: 14.555362 +epoch: 1, batch: 14959, sum loss: 3751.990479, avg loss: 2.552375, ppl: 12.837552 +epoch: 1, batch: 14960, sum loss: 5004.081543, avg loss: 2.795576, ppl: 16.372057 +epoch: 1, batch: 14961, sum loss: 3762.393799, avg loss: 2.554239, ppl: 12.861508 +epoch: 1, batch: 14962, sum loss: 5259.317871, avg loss: 2.852125, ppl: 17.324553 +epoch: 1, batch: 14963, sum loss: 3775.332031, avg loss: 2.677541, ppl: 14.549266 +epoch: 1, batch: 14964, sum loss: 3950.747070, avg loss: 2.764693, ppl: 15.874174 +epoch: 1, batch: 14965, sum loss: 5615.746094, avg loss: 3.038824, ppl: 20.880665 +epoch: 1, batch: 14966, sum loss: 3859.164551, avg loss: 2.440964, ppl: 11.484109 +epoch: 1, batch: 14967, sum loss: 5133.043457, avg loss: 2.895118, ppl: 18.085630 +epoch: 1, batch: 14968, sum loss: 5307.742676, avg loss: 2.735950, ppl: 15.424390 +epoch: 1, batch: 14969, sum loss: 4283.554688, avg loss: 2.660593, ppl: 14.304770 +epoch: 1, batch: 14970, sum loss: 5141.119629, avg loss: 2.786515, ppl: 16.224375 +epoch: 1, batch: 14971, sum loss: 4224.693848, avg loss: 2.514699, ppl: 12.362884 +epoch: 1, batch: 14972, sum loss: 4141.866699, avg loss: 2.626421, ppl: 13.824211 +epoch: 1, batch: 14973, sum loss: 4832.874023, avg loss: 2.842867, ppl: 17.164909 +epoch: 1, batch: 14974, sum loss: 4855.441406, avg loss: 2.851111, ppl: 17.306993 +epoch: 1, batch: 14975, sum loss: 5112.619141, avg loss: 2.809132, ppl: 16.595501 +epoch: 1, batch: 14976, sum loss: 4626.242188, avg loss: 2.739042, ppl: 15.472160 +epoch: 1, batch: 14977, sum loss: 5311.180176, avg loss: 2.997280, ppl: 20.030981 +epoch: 1, batch: 14978, sum loss: 3625.964355, avg loss: 2.383934, ppl: 10.847499 +epoch: 1, batch: 14979, sum loss: 3758.032715, avg loss: 2.344375, ppl: 10.426750 +epoch: 1, batch: 14980, sum loss: 4662.038574, avg loss: 2.823767, ppl: 16.840164 +epoch: 1, batch: 14981, sum loss: 4021.411621, avg loss: 2.640454, ppl: 14.019567 +epoch: 1, batch: 14982, sum loss: 4709.400391, avg loss: 2.638320, ppl: 13.989677 +epoch: 1, batch: 14983, sum loss: 4942.863770, avg loss: 2.882136, ppl: 17.852367 +epoch: 1, batch: 14984, sum loss: 4037.946045, avg loss: 2.605127, ppl: 13.532939 +epoch: 1, batch: 14985, sum loss: 4237.696777, avg loss: 2.549757, ppl: 12.803998 +epoch: 1, batch: 14986, sum loss: 3617.702637, avg loss: 2.366058, ppl: 10.655308 +epoch: 1, batch: 14987, sum loss: 3744.241699, avg loss: 2.311260, ppl: 10.087132 +epoch: 1, batch: 14988, sum loss: 4266.466309, avg loss: 2.593596, ppl: 13.377798 +epoch: 1, batch: 14989, sum loss: 4399.567383, avg loss: 2.736049, ppl: 15.425923 +epoch: 1, batch: 14990, sum loss: 4934.995117, avg loss: 2.749301, ppl: 15.631701 +epoch: 1, batch: 14991, sum loss: 4791.821289, avg loss: 2.822039, ppl: 16.811089 +epoch: 1, batch: 14992, sum loss: 4649.024414, avg loss: 2.800617, ppl: 16.454800 +epoch: 1, batch: 14993, sum loss: 4064.821045, avg loss: 2.324083, ppl: 10.217305 +epoch: 1, batch: 14994, sum loss: 5120.976074, avg loss: 2.664400, ppl: 14.359329 +epoch: 1, batch: 14995, sum loss: 5521.889648, avg loss: 2.959212, ppl: 19.282768 +epoch: 1, batch: 14996, sum loss: 4649.420410, avg loss: 2.822963, ppl: 16.826633 +epoch: 1, batch: 14997, sum loss: 4936.207031, avg loss: 2.798303, ppl: 16.416769 +epoch: 1, batch: 14998, sum loss: 4277.498535, avg loss: 2.688560, ppl: 14.710474 +epoch: 1, batch: 14999, sum loss: 4035.655762, avg loss: 2.674391, ppl: 14.503515 +epoch: 1, batch: 15000, sum loss: 4632.953613, avg loss: 2.594039, ppl: 13.383718 +epoch: 1, batch: 15001, sum loss: 4147.917969, avg loss: 2.422849, ppl: 11.277946 +epoch: 1, batch: 15002, sum loss: 4189.079102, avg loss: 2.676728, ppl: 14.537445 +epoch: 1, batch: 15003, sum loss: 5090.901367, avg loss: 2.909086, ppl: 18.340036 +epoch: 1, batch: 15004, sum loss: 4040.329346, avg loss: 2.668646, ppl: 14.420425 +epoch: 1, batch: 15005, sum loss: 3522.543945, avg loss: 2.475435, ppl: 11.886877 +epoch: 1, batch: 15006, sum loss: 4027.551025, avg loss: 2.476969, ppl: 11.905120 +epoch: 1, batch: 15007, sum loss: 4652.572754, avg loss: 2.741646, ppl: 15.512495 +epoch: 1, batch: 15008, sum loss: 4180.509277, avg loss: 2.763060, ppl: 15.848262 +epoch: 1, batch: 15009, sum loss: 5470.811523, avg loss: 2.952408, ppl: 19.152008 +epoch: 1, batch: 15010, sum loss: 3422.195801, avg loss: 2.368302, ppl: 10.679239 +epoch: 1, batch: 15011, sum loss: 3684.130615, avg loss: 2.588989, ppl: 13.316296 +epoch: 1, batch: 15012, sum loss: 5715.384277, avg loss: 3.016034, ppl: 20.410181 +epoch: 1, batch: 15013, sum loss: 5066.408203, avg loss: 2.916758, ppl: 18.481270 +epoch: 1, batch: 15014, sum loss: 5071.221680, avg loss: 2.733812, ppl: 15.391453 +epoch: 1, batch: 15015, sum loss: 5440.397949, avg loss: 2.866385, ppl: 17.573372 +epoch: 1, batch: 15016, sum loss: 4368.730469, avg loss: 2.548851, ppl: 12.792397 +epoch: 1, batch: 15017, sum loss: 5119.738281, avg loss: 2.776431, ppl: 16.061592 +epoch: 1, batch: 15018, sum loss: 3606.895508, avg loss: 2.576354, ppl: 13.149106 +epoch: 1, batch: 15019, sum loss: 4520.570312, avg loss: 2.768261, ppl: 15.930905 +epoch: 1, batch: 15020, sum loss: 4178.237793, avg loss: 2.688699, ppl: 14.712519 +epoch: 1, batch: 15021, sum loss: 4888.591797, avg loss: 2.590669, ppl: 13.338688 +epoch: 1, batch: 15022, sum loss: 3780.237549, avg loss: 2.582129, ppl: 13.225271 +epoch: 1, batch: 15023, sum loss: 3778.606445, avg loss: 2.595197, ppl: 13.399223 +epoch: 1, batch: 15024, sum loss: 3621.696777, avg loss: 2.269233, ppl: 9.671984 +epoch: 1, batch: 15025, sum loss: 4494.829102, avg loss: 2.762648, ppl: 15.841742 +epoch: 1, batch: 15026, sum loss: 5246.895020, avg loss: 2.906867, ppl: 18.299377 +epoch: 1, batch: 15027, sum loss: 3598.309570, avg loss: 2.481593, ppl: 11.960300 +epoch: 1, batch: 15028, sum loss: 4157.740723, avg loss: 2.656703, ppl: 14.249235 +epoch: 1, batch: 15029, sum loss: 4950.812500, avg loss: 2.829036, ppl: 16.929129 +epoch: 1, batch: 15030, sum loss: 4278.928711, avg loss: 2.569927, ppl: 13.064874 +epoch: 1, batch: 15031, sum loss: 4044.582031, avg loss: 2.743950, ppl: 15.548278 +epoch: 1, batch: 15032, sum loss: 5103.589844, avg loss: 2.790372, ppl: 16.287075 +epoch: 1, batch: 15033, sum loss: 4029.143555, avg loss: 2.501020, ppl: 12.194932 +epoch: 1, batch: 15034, sum loss: 5329.256836, avg loss: 2.967292, ppl: 19.439213 +epoch: 1, batch: 15035, sum loss: 4271.780762, avg loss: 2.654929, ppl: 14.223975 +epoch: 1, batch: 15036, sum loss: 4196.321289, avg loss: 2.894015, ppl: 18.065695 +epoch: 1, batch: 15037, sum loss: 4684.664062, avg loss: 2.726813, ppl: 15.284093 +epoch: 1, batch: 15038, sum loss: 5121.625000, avg loss: 2.699855, ppl: 14.877576 +epoch: 1, batch: 15039, sum loss: 4472.609375, avg loss: 2.830765, ppl: 16.958437 +epoch: 1, batch: 15040, sum loss: 4061.911865, avg loss: 2.463258, ppl: 11.743003 +epoch: 1, batch: 15041, sum loss: 4378.667480, avg loss: 2.724746, ppl: 15.252546 +epoch: 1, batch: 15042, sum loss: 5582.000977, avg loss: 2.899741, ppl: 18.169434 +epoch: 1, batch: 15043, sum loss: 3947.058105, avg loss: 2.469999, ppl: 11.822433 +epoch: 1, batch: 15044, sum loss: 3757.381348, avg loss: 2.687683, ppl: 14.697587 +epoch: 1, batch: 15045, sum loss: 4089.733643, avg loss: 2.456296, ppl: 11.661542 +epoch: 1, batch: 15046, sum loss: 4907.444824, avg loss: 2.644097, ppl: 14.070738 +epoch: 1, batch: 15047, sum loss: 5354.298828, avg loss: 2.768510, ppl: 15.934875 +epoch: 1, batch: 15048, sum loss: 4800.142090, avg loss: 2.615881, ppl: 13.679269 +epoch: 1, batch: 15049, sum loss: 3579.494873, avg loss: 2.376823, ppl: 10.770627 +epoch: 1, batch: 15050, sum loss: 3664.763184, avg loss: 2.532663, ppl: 12.586979 +epoch: 1, batch: 15051, sum loss: 4837.008789, avg loss: 2.685735, ppl: 14.668979 +epoch: 1, batch: 15052, sum loss: 5074.063965, avg loss: 2.970763, ppl: 19.506802 +epoch: 1, batch: 15053, sum loss: 3665.371094, avg loss: 2.433846, ppl: 11.402647 +epoch: 1, batch: 15054, sum loss: 5332.811523, avg loss: 2.835094, ppl: 17.031998 +epoch: 1, batch: 15055, sum loss: 3710.748291, avg loss: 2.527758, ppl: 12.525389 +epoch: 1, batch: 15056, sum loss: 4418.085938, avg loss: 2.803354, ppl: 16.499895 +epoch: 1, batch: 15057, sum loss: 4858.773438, avg loss: 2.693333, ppl: 14.780865 +epoch: 1, batch: 15058, sum loss: 4067.638672, avg loss: 2.492426, ppl: 12.090569 +epoch: 1, batch: 15059, sum loss: 4305.512207, avg loss: 2.707869, ppl: 14.997287 +epoch: 1, batch: 15060, sum loss: 4086.340332, avg loss: 2.748043, ppl: 15.612050 +epoch: 1, batch: 15061, sum loss: 4229.876465, avg loss: 2.486700, ppl: 12.021538 +epoch: 1, batch: 15062, sum loss: 5966.798828, avg loss: 3.018108, ppl: 20.452551 +epoch: 1, batch: 15063, sum loss: 4537.206055, avg loss: 2.404455, ppl: 11.072391 +epoch: 1, batch: 15064, sum loss: 3820.560059, avg loss: 2.495467, ppl: 12.127398 +epoch: 1, batch: 15065, sum loss: 5411.501953, avg loss: 2.923556, ppl: 18.607334 +epoch: 1, batch: 15066, sum loss: 5058.386230, avg loss: 2.880630, ppl: 17.825500 +epoch: 1, batch: 15067, sum loss: 5313.796875, avg loss: 2.926100, ppl: 18.654726 +epoch: 1, batch: 15068, sum loss: 4817.775391, avg loss: 2.772023, ppl: 15.990947 +epoch: 1, batch: 15069, sum loss: 4393.787598, avg loss: 2.853109, ppl: 17.341612 +epoch: 1, batch: 15070, sum loss: 5511.687988, avg loss: 3.129863, ppl: 22.870836 +epoch: 1, batch: 15071, sum loss: 4418.159180, avg loss: 2.647190, ppl: 14.114316 +epoch: 1, batch: 15072, sum loss: 4910.385742, avg loss: 3.042370, ppl: 20.954855 +epoch: 1, batch: 15073, sum loss: 4275.531250, avg loss: 2.682266, ppl: 14.618174 +epoch: 1, batch: 15074, sum loss: 4881.049805, avg loss: 2.839471, ppl: 17.106707 +epoch: 1, batch: 15075, sum loss: 4547.993164, avg loss: 2.919123, ppl: 18.525028 +epoch: 1, batch: 15076, sum loss: 3916.978516, avg loss: 2.681026, ppl: 14.600061 +epoch: 1, batch: 15077, sum loss: 4380.436035, avg loss: 2.588910, ppl: 13.315251 +epoch: 1, batch: 15078, sum loss: 4869.651367, avg loss: 2.714410, ppl: 15.095702 +epoch: 1, batch: 15079, sum loss: 4650.922852, avg loss: 2.484468, ppl: 11.994732 +epoch: 1, batch: 15080, sum loss: 4510.712402, avg loss: 2.510135, ppl: 12.306590 +epoch: 1, batch: 15081, sum loss: 5066.769531, avg loss: 2.693657, ppl: 14.785654 +epoch: 1, batch: 15082, sum loss: 4735.608887, avg loss: 2.961607, ppl: 19.328999 +epoch: 1, batch: 15083, sum loss: 3705.531982, avg loss: 2.550263, ppl: 12.810472 +epoch: 1, batch: 15084, sum loss: 4763.837891, avg loss: 2.784242, ppl: 16.187546 +epoch: 1, batch: 15085, sum loss: 4287.633301, avg loss: 2.691546, ppl: 14.754476 +epoch: 1, batch: 15086, sum loss: 4548.036133, avg loss: 2.550777, ppl: 12.817064 +epoch: 1, batch: 15087, sum loss: 4652.219727, avg loss: 2.807616, ppl: 16.570368 +epoch: 1, batch: 15088, sum loss: 5098.578613, avg loss: 2.942053, ppl: 18.954727 +epoch: 1, batch: 15089, sum loss: 4596.176758, avg loss: 2.729321, ppl: 15.322483 +epoch: 1, batch: 15090, sum loss: 4285.432617, avg loss: 2.671716, ppl: 14.464772 +epoch: 1, batch: 15091, sum loss: 4517.808594, avg loss: 2.905343, ppl: 18.271515 +epoch: 1, batch: 15092, sum loss: 4103.029297, avg loss: 2.605098, ppl: 13.532548 +epoch: 1, batch: 15093, sum loss: 4188.549316, avg loss: 2.723374, ppl: 15.231629 +epoch: 1, batch: 15094, sum loss: 6701.571289, avg loss: 3.141853, ppl: 23.146709 +epoch: 1, batch: 15095, sum loss: 5189.985352, avg loss: 2.694697, ppl: 14.801028 +epoch: 1, batch: 15096, sum loss: 4720.845703, avg loss: 2.501773, ppl: 12.204111 +epoch: 1, batch: 15097, sum loss: 4847.295898, avg loss: 2.621577, ppl: 13.757402 +epoch: 1, batch: 15098, sum loss: 4230.093262, avg loss: 2.559040, ppl: 12.923406 +epoch: 1, batch: 15099, sum loss: 4335.123535, avg loss: 2.714542, ppl: 15.097696 +epoch: 1, batch: 15100, sum loss: 4491.967285, avg loss: 2.661118, ppl: 14.312285 +epoch: 1, batch: 15101, sum loss: 4258.816895, avg loss: 2.744083, ppl: 15.550350 +epoch: 1, batch: 15102, sum loss: 4936.691895, avg loss: 2.933269, ppl: 18.788952 +epoch: 1, batch: 15103, sum loss: 4571.880371, avg loss: 2.703655, ppl: 14.934216 +epoch: 1, batch: 15104, sum loss: 4121.777832, avg loss: 2.553766, ppl: 12.855423 +epoch: 1, batch: 15105, sum loss: 5225.001953, avg loss: 2.797110, ppl: 16.397196 +epoch: 1, batch: 15106, sum loss: 4645.624023, avg loss: 2.635067, ppl: 13.944253 +epoch: 1, batch: 15107, sum loss: 5518.065918, avg loss: 2.768723, ppl: 15.938275 +epoch: 1, batch: 15108, sum loss: 3935.238037, avg loss: 2.590677, ppl: 13.338796 +epoch: 1, batch: 15109, sum loss: 4295.091309, avg loss: 2.708128, ppl: 15.001170 +epoch: 1, batch: 15110, sum loss: 5365.733398, avg loss: 2.974353, ppl: 19.576960 +epoch: 1, batch: 15111, sum loss: 5095.322266, avg loss: 2.991968, ppl: 19.924864 +epoch: 1, batch: 15112, sum loss: 4777.831055, avg loss: 2.862691, ppl: 17.508577 +epoch: 1, batch: 15113, sum loss: 5870.416016, avg loss: 3.070301, ppl: 21.548393 +epoch: 1, batch: 15114, sum loss: 4620.199219, avg loss: 2.873258, ppl: 17.694576 +epoch: 1, batch: 15115, sum loss: 4119.614258, avg loss: 2.750076, ppl: 15.643822 +epoch: 1, batch: 15116, sum loss: 4282.889160, avg loss: 2.754270, ppl: 15.709562 +epoch: 1, batch: 15117, sum loss: 5239.130859, avg loss: 3.000648, ppl: 20.098553 +epoch: 1, batch: 15118, sum loss: 4740.650879, avg loss: 2.770690, ppl: 15.969656 +epoch: 1, batch: 15119, sum loss: 4569.874512, avg loss: 2.640020, ppl: 14.013482 +epoch: 1, batch: 15120, sum loss: 4225.890625, avg loss: 2.562699, ppl: 12.970779 +epoch: 1, batch: 15121, sum loss: 4784.237793, avg loss: 2.929723, ppl: 18.722445 +epoch: 1, batch: 15122, sum loss: 5065.672852, avg loss: 2.836323, ppl: 17.052946 +epoch: 1, batch: 15123, sum loss: 3997.269775, avg loss: 2.422588, ppl: 11.275000 +epoch: 1, batch: 15124, sum loss: 5256.887695, avg loss: 2.910790, ppl: 18.371315 +epoch: 1, batch: 15125, sum loss: 4450.227051, avg loss: 2.603995, ppl: 13.517632 +epoch: 1, batch: 15126, sum loss: 4599.249512, avg loss: 2.763972, ppl: 15.862729 +epoch: 1, batch: 15127, sum loss: 3976.614746, avg loss: 2.532876, ppl: 12.589659 +epoch: 1, batch: 15128, sum loss: 4545.567383, avg loss: 2.826845, ppl: 16.892090 +epoch: 1, batch: 15129, sum loss: 4213.631348, avg loss: 2.573996, ppl: 13.118141 +epoch: 1, batch: 15130, sum loss: 4623.791504, avg loss: 2.855955, ppl: 17.391045 +epoch: 1, batch: 15131, sum loss: 4435.122559, avg loss: 2.807039, ppl: 16.560818 +epoch: 1, batch: 15132, sum loss: 3753.509277, avg loss: 2.427884, ppl: 11.334876 +epoch: 1, batch: 15133, sum loss: 5291.129395, avg loss: 2.670938, ppl: 14.453527 +epoch: 1, batch: 15134, sum loss: 3688.513916, avg loss: 2.542050, ppl: 12.705687 +epoch: 1, batch: 15135, sum loss: 5663.773926, avg loss: 3.017461, ppl: 20.439327 +epoch: 1, batch: 15136, sum loss: 4204.673340, avg loss: 2.523813, ppl: 12.476084 +epoch: 1, batch: 15137, sum loss: 4449.143555, avg loss: 2.665754, ppl: 14.378788 +epoch: 1, batch: 15138, sum loss: 3986.855469, avg loss: 2.423620, ppl: 11.286646 +epoch: 1, batch: 15139, sum loss: 4169.224609, avg loss: 2.481681, ppl: 11.961359 +epoch: 1, batch: 15140, sum loss: 6475.675293, avg loss: 3.010542, ppl: 20.298391 +epoch: 1, batch: 15141, sum loss: 4623.582031, avg loss: 2.778595, ppl: 16.096390 +epoch: 1, batch: 15142, sum loss: 4459.055664, avg loss: 2.860203, ppl: 17.465065 +epoch: 1, batch: 15143, sum loss: 4507.068848, avg loss: 2.644994, ppl: 14.083354 +epoch: 1, batch: 15144, sum loss: 4943.152832, avg loss: 2.689419, ppl: 14.723124 +epoch: 1, batch: 15145, sum loss: 5081.135742, avg loss: 2.859390, ppl: 17.450874 +epoch: 1, batch: 15146, sum loss: 4651.611816, avg loss: 2.772117, ppl: 15.992449 +epoch: 1, batch: 15147, sum loss: 4027.022461, avg loss: 2.524779, ppl: 12.488136 +epoch: 1, batch: 15148, sum loss: 3825.666992, avg loss: 2.643861, ppl: 14.067414 +epoch: 1, batch: 15149, sum loss: 4418.981445, avg loss: 2.744709, ppl: 15.560085 +epoch: 1, batch: 15150, sum loss: 3718.692871, avg loss: 2.435293, ppl: 11.419167 +epoch: 1, batch: 15151, sum loss: 3853.250000, avg loss: 2.382962, ppl: 10.836960 +epoch: 1, batch: 15152, sum loss: 4194.971680, avg loss: 2.628428, ppl: 13.851984 +epoch: 1, batch: 15153, sum loss: 3231.173828, avg loss: 2.355083, ppl: 10.539003 +epoch: 1, batch: 15154, sum loss: 4153.357422, avg loss: 2.478137, ppl: 11.919039 +epoch: 1, batch: 15155, sum loss: 4522.161621, avg loss: 2.862128, ppl: 17.498716 +epoch: 1, batch: 15156, sum loss: 5872.114258, avg loss: 3.344029, ppl: 28.333042 +epoch: 1, batch: 15157, sum loss: 4314.482422, avg loss: 2.557488, ppl: 12.903366 +epoch: 1, batch: 15158, sum loss: 5152.836426, avg loss: 3.036439, ppl: 20.830925 +epoch: 1, batch: 15159, sum loss: 4988.948730, avg loss: 2.762430, ppl: 15.838286 +epoch: 1, batch: 15160, sum loss: 5915.954590, avg loss: 3.081227, ppl: 21.785107 +epoch: 1, batch: 15161, sum loss: 4567.494629, avg loss: 2.728492, ppl: 15.309775 +epoch: 1, batch: 15162, sum loss: 4369.908691, avg loss: 2.669462, ppl: 14.432199 +epoch: 1, batch: 15163, sum loss: 3923.058594, avg loss: 2.498764, ppl: 12.167440 +epoch: 1, batch: 15164, sum loss: 4443.737305, avg loss: 2.558283, ppl: 12.913624 +epoch: 1, batch: 15165, sum loss: 3679.416260, avg loss: 2.265650, ppl: 9.637389 +epoch: 1, batch: 15166, sum loss: 5912.446777, avg loss: 2.874305, ppl: 17.713118 +epoch: 1, batch: 15167, sum loss: 3885.608643, avg loss: 2.542938, ppl: 12.716972 +epoch: 1, batch: 15168, sum loss: 4191.047852, avg loss: 2.659294, ppl: 14.286204 +epoch: 1, batch: 15169, sum loss: 4901.445312, avg loss: 2.800826, ppl: 16.458233 +epoch: 1, batch: 15170, sum loss: 5799.069336, avg loss: 2.800130, ppl: 16.446787 +epoch: 1, batch: 15171, sum loss: 3797.801514, avg loss: 2.736168, ppl: 15.427754 +epoch: 1, batch: 15172, sum loss: 5696.350098, avg loss: 2.831188, ppl: 16.965607 +epoch: 1, batch: 15173, sum loss: 3288.419922, avg loss: 2.226418, ppl: 9.266618 +epoch: 1, batch: 15174, sum loss: 4791.700684, avg loss: 2.602771, ppl: 13.501092 +epoch: 1, batch: 15175, sum loss: 3780.240723, avg loss: 2.478846, ppl: 11.927496 +epoch: 1, batch: 15176, sum loss: 3958.210449, avg loss: 2.597251, ppl: 13.426776 +epoch: 1, batch: 15177, sum loss: 4422.143555, avg loss: 2.508306, ppl: 12.284103 +epoch: 1, batch: 15178, sum loss: 4306.168945, avg loss: 2.677966, ppl: 14.555452 +epoch: 1, batch: 15179, sum loss: 4980.581543, avg loss: 2.919450, ppl: 18.531090 +epoch: 1, batch: 15180, sum loss: 4188.125977, avg loss: 2.679543, ppl: 14.578433 +epoch: 1, batch: 15181, sum loss: 3953.460449, avg loss: 2.589038, ppl: 13.316950 +epoch: 1, batch: 15182, sum loss: 4977.606445, avg loss: 2.807449, ppl: 16.567598 +epoch: 1, batch: 15183, sum loss: 3651.133545, avg loss: 2.414771, ppl: 11.187209 +epoch: 1, batch: 15184, sum loss: 5196.836426, avg loss: 2.660951, ppl: 14.309887 +epoch: 1, batch: 15185, sum loss: 4393.950684, avg loss: 3.013684, ppl: 20.362268 +epoch: 1, batch: 15186, sum loss: 3897.619141, avg loss: 2.607103, ppl: 13.559713 +epoch: 1, batch: 15187, sum loss: 4470.963867, avg loss: 2.639294, ppl: 14.003312 +epoch: 1, batch: 15188, sum loss: 5381.744629, avg loss: 2.965149, ppl: 19.397587 +epoch: 1, batch: 15189, sum loss: 5113.709473, avg loss: 2.831511, ppl: 16.971088 +epoch: 1, batch: 15190, sum loss: 4278.666016, avg loss: 2.687604, ppl: 14.696424 +epoch: 1, batch: 15191, sum loss: 3686.817627, avg loss: 2.652387, ppl: 14.187860 +epoch: 1, batch: 15192, sum loss: 4882.333984, avg loss: 2.695933, ppl: 14.819337 +epoch: 1, batch: 15193, sum loss: 3954.973633, avg loss: 2.683157, ppl: 14.631214 +epoch: 1, batch: 15194, sum loss: 4919.965820, avg loss: 2.517895, ppl: 12.402456 +epoch: 1, batch: 15195, sum loss: 3819.858887, avg loss: 2.493381, ppl: 12.102118 +epoch: 1, batch: 15196, sum loss: 4324.986816, avg loss: 2.398772, ppl: 11.009653 +epoch: 1, batch: 15197, sum loss: 4510.053223, avg loss: 2.750032, ppl: 15.643139 +epoch: 1, batch: 15198, sum loss: 5434.137695, avg loss: 2.719789, ppl: 15.177113 +epoch: 1, batch: 15199, sum loss: 4030.967773, avg loss: 2.415200, ppl: 11.192003 +epoch: 1, batch: 15200, sum loss: 4966.065430, avg loss: 2.863936, ppl: 17.530394 +epoch: 1, batch: 15201, sum loss: 5089.720215, avg loss: 2.746746, ppl: 15.591811 +epoch: 1, batch: 15202, sum loss: 5024.369629, avg loss: 2.717344, ppl: 15.140061 +epoch: 1, batch: 15203, sum loss: 4750.292969, avg loss: 2.569115, ppl: 13.054262 +epoch: 1, batch: 15204, sum loss: 4503.518555, avg loss: 2.726101, ppl: 15.273219 +epoch: 1, batch: 15205, sum loss: 4690.670898, avg loss: 2.638173, ppl: 13.987619 +epoch: 1, batch: 15206, sum loss: 5930.529785, avg loss: 2.867761, ppl: 17.597576 +epoch: 1, batch: 15207, sum loss: 3435.395020, avg loss: 2.319646, ppl: 10.172070 +epoch: 1, batch: 15208, sum loss: 4470.067871, avg loss: 2.632549, ppl: 13.909177 +epoch: 1, batch: 15209, sum loss: 3561.732666, avg loss: 2.513573, ppl: 12.348973 +epoch: 1, batch: 15210, sum loss: 4566.452637, avg loss: 2.731132, ppl: 15.350254 +epoch: 1, batch: 15211, sum loss: 4911.127441, avg loss: 2.685144, ppl: 14.660308 +epoch: 1, batch: 15212, sum loss: 4758.270020, avg loss: 2.658251, ppl: 14.271311 +epoch: 1, batch: 15213, sum loss: 4178.038086, avg loss: 2.555375, ppl: 12.876125 +epoch: 1, batch: 15214, sum loss: 4787.921875, avg loss: 2.703513, ppl: 14.932098 +epoch: 1, batch: 15215, sum loss: 5506.308594, avg loss: 2.847109, ppl: 17.237873 +epoch: 1, batch: 15216, sum loss: 4500.894531, avg loss: 2.619846, ppl: 13.733603 +epoch: 1, batch: 15217, sum loss: 3648.789062, avg loss: 2.521623, ppl: 12.448790 +epoch: 1, batch: 15218, sum loss: 4408.978516, avg loss: 2.561870, ppl: 12.960032 +epoch: 1, batch: 15219, sum loss: 4748.886230, avg loss: 2.751382, ppl: 15.664270 +epoch: 1, batch: 15220, sum loss: 4938.437988, avg loss: 2.766632, ppl: 15.904977 +epoch: 1, batch: 15221, sum loss: 5048.466797, avg loss: 2.874981, ppl: 17.725090 +epoch: 1, batch: 15222, sum loss: 4494.107910, avg loss: 2.708926, ppl: 15.013139 +epoch: 1, batch: 15223, sum loss: 4542.403320, avg loss: 2.779929, ppl: 16.117872 +epoch: 1, batch: 15224, sum loss: 3708.958252, avg loss: 2.487564, ppl: 12.031935 +epoch: 1, batch: 15225, sum loss: 5343.831543, avg loss: 2.905836, ppl: 18.280514 +epoch: 1, batch: 15226, sum loss: 4951.401855, avg loss: 2.865395, ppl: 17.555979 +epoch: 1, batch: 15227, sum loss: 4096.857910, avg loss: 2.505723, ppl: 12.252420 +epoch: 1, batch: 15228, sum loss: 4042.225098, avg loss: 2.731233, ppl: 15.351810 +epoch: 1, batch: 15229, sum loss: 5122.936523, avg loss: 2.861976, ppl: 17.496059 +epoch: 1, batch: 15230, sum loss: 3274.731934, avg loss: 2.449313, ppl: 11.580393 +epoch: 1, batch: 15231, sum loss: 3797.541992, avg loss: 2.524961, ppl: 12.490411 +epoch: 1, batch: 15232, sum loss: 4994.200684, avg loss: 2.708352, ppl: 15.004525 +epoch: 1, batch: 15233, sum loss: 4213.522949, avg loss: 2.577078, ppl: 13.158634 +epoch: 1, batch: 15234, sum loss: 4393.136719, avg loss: 2.607203, ppl: 13.561065 +epoch: 1, batch: 15235, sum loss: 4445.678711, avg loss: 2.654137, ppl: 14.212710 +epoch: 1, batch: 15236, sum loss: 3702.336426, avg loss: 2.394784, ppl: 10.965832 +epoch: 1, batch: 15237, sum loss: 4298.495117, avg loss: 2.732673, ppl: 15.373929 +epoch: 1, batch: 15238, sum loss: 3856.657715, avg loss: 2.597076, ppl: 13.424423 +epoch: 1, batch: 15239, sum loss: 5350.556641, avg loss: 2.745283, ppl: 15.569021 +epoch: 1, batch: 15240, sum loss: 4377.942871, avg loss: 2.584382, ppl: 13.255093 +epoch: 1, batch: 15241, sum loss: 4209.348633, avg loss: 2.780283, ppl: 16.123587 +epoch: 1, batch: 15242, sum loss: 4678.647461, avg loss: 2.889838, ppl: 17.990395 +epoch: 1, batch: 15243, sum loss: 4566.271484, avg loss: 2.631857, ppl: 13.899553 +epoch: 1, batch: 15244, sum loss: 4215.432617, avg loss: 2.533313, ppl: 12.595165 +epoch: 1, batch: 15245, sum loss: 4710.130859, avg loss: 2.676211, ppl: 14.529930 +epoch: 1, batch: 15246, sum loss: 4671.061523, avg loss: 2.930403, ppl: 18.735170 +epoch: 1, batch: 15247, sum loss: 3748.900146, avg loss: 2.590809, ppl: 13.340555 +epoch: 1, batch: 15248, sum loss: 3938.157227, avg loss: 2.616716, ppl: 13.690688 +epoch: 1, batch: 15249, sum loss: 4440.612305, avg loss: 2.609055, ppl: 13.586209 +epoch: 1, batch: 15250, sum loss: 5122.228027, avg loss: 2.608059, ppl: 13.572683 +epoch: 1, batch: 15251, sum loss: 4051.534424, avg loss: 2.690262, ppl: 14.735538 +epoch: 1, batch: 15252, sum loss: 3970.110352, avg loss: 2.559710, ppl: 12.932067 +epoch: 1, batch: 15253, sum loss: 4768.211426, avg loss: 2.740351, ppl: 15.492429 +epoch: 1, batch: 15254, sum loss: 4210.900391, avg loss: 2.682102, ppl: 14.615787 +epoch: 1, batch: 15255, sum loss: 4566.139160, avg loss: 2.765681, ppl: 15.889857 +epoch: 1, batch: 15256, sum loss: 4604.707520, avg loss: 2.552499, ppl: 12.839144 +epoch: 1, batch: 15257, sum loss: 6581.628906, avg loss: 2.834466, ppl: 17.021299 +epoch: 1, batch: 15258, sum loss: 4287.076660, avg loss: 2.720226, ppl: 15.183758 +epoch: 1, batch: 15259, sum loss: 4254.709961, avg loss: 2.572376, ppl: 13.096906 +epoch: 1, batch: 15260, sum loss: 4048.179199, avg loss: 2.552446, ppl: 12.838467 +epoch: 1, batch: 15261, sum loss: 5238.787598, avg loss: 2.727115, ppl: 15.288714 +epoch: 1, batch: 15262, sum loss: 3692.431641, avg loss: 2.413354, ppl: 11.171369 +epoch: 1, batch: 15263, sum loss: 4704.665039, avg loss: 2.551337, ppl: 12.824235 +epoch: 1, batch: 15264, sum loss: 4219.053711, avg loss: 2.571026, ppl: 13.079235 +epoch: 1, batch: 15265, sum loss: 4882.229980, avg loss: 2.805879, ppl: 16.541615 +epoch: 1, batch: 15266, sum loss: 4816.329102, avg loss: 2.666849, ppl: 14.394539 +epoch: 1, batch: 15267, sum loss: 4663.369141, avg loss: 2.770867, ppl: 15.972474 +epoch: 1, batch: 15268, sum loss: 4024.437256, avg loss: 2.436100, ppl: 11.428386 +epoch: 1, batch: 15269, sum loss: 4278.306641, avg loss: 2.485942, ppl: 12.012432 +epoch: 1, batch: 15270, sum loss: 4906.996582, avg loss: 2.714047, ppl: 15.090218 +epoch: 1, batch: 15271, sum loss: 4361.558594, avg loss: 2.646577, ppl: 14.105671 +epoch: 1, batch: 15272, sum loss: 5041.840332, avg loss: 2.753599, ppl: 15.699038 +epoch: 1, batch: 15273, sum loss: 4264.052246, avg loss: 2.443583, ppl: 11.514220 +epoch: 1, batch: 15274, sum loss: 4474.726074, avg loss: 2.745231, ppl: 15.568205 +epoch: 1, batch: 15275, sum loss: 4258.861816, avg loss: 2.578004, ppl: 13.170818 +epoch: 1, batch: 15276, sum loss: 4596.505859, avg loss: 2.632592, ppl: 13.909781 +epoch: 1, batch: 15277, sum loss: 4752.948242, avg loss: 2.820741, ppl: 16.789282 +epoch: 1, batch: 15278, sum loss: 4570.606445, avg loss: 2.909361, ppl: 18.345079 +epoch: 1, batch: 15279, sum loss: 4766.367676, avg loss: 2.742444, ppl: 15.524882 +epoch: 1, batch: 15280, sum loss: 4239.309082, avg loss: 2.647913, ppl: 14.124537 +epoch: 1, batch: 15281, sum loss: 4286.908203, avg loss: 2.636475, ppl: 13.963892 +epoch: 1, batch: 15282, sum loss: 4759.355469, avg loss: 2.710339, ppl: 15.034376 +epoch: 1, batch: 15283, sum loss: 4307.977539, avg loss: 2.664179, ppl: 14.356159 +epoch: 1, batch: 15284, sum loss: 4372.280273, avg loss: 2.756797, ppl: 15.749318 +epoch: 1, batch: 15285, sum loss: 4475.009277, avg loss: 2.791646, ppl: 16.307848 +epoch: 1, batch: 15286, sum loss: 3647.802246, avg loss: 2.542022, ppl: 12.705341 +epoch: 1, batch: 15287, sum loss: 5122.442871, avg loss: 2.964377, ppl: 19.382618 +epoch: 1, batch: 15288, sum loss: 4088.226318, avg loss: 2.652970, ppl: 14.196136 +epoch: 1, batch: 15289, sum loss: 4611.144043, avg loss: 2.676230, ppl: 14.530210 +epoch: 1, batch: 15290, sum loss: 4635.333984, avg loss: 2.792370, ppl: 16.319649 +epoch: 1, batch: 15291, sum loss: 4275.178223, avg loss: 2.484124, ppl: 11.990617 +epoch: 1, batch: 15292, sum loss: 5566.450684, avg loss: 3.038456, ppl: 20.872984 +epoch: 1, batch: 15293, sum loss: 3759.900391, avg loss: 2.421056, ppl: 11.257744 +epoch: 1, batch: 15294, sum loss: 4794.674316, avg loss: 2.683086, ppl: 14.630171 +epoch: 1, batch: 15295, sum loss: 4748.683594, avg loss: 2.667800, ppl: 14.408232 +epoch: 1, batch: 15296, sum loss: 4359.818848, avg loss: 2.598223, ppl: 13.439840 +epoch: 1, batch: 15297, sum loss: 4211.026855, avg loss: 2.588216, ppl: 13.306007 +epoch: 1, batch: 15298, sum loss: 4009.014648, avg loss: 2.410712, ppl: 11.141896 +epoch: 1, batch: 15299, sum loss: 3956.247314, avg loss: 2.347921, ppl: 10.463795 +epoch: 1, batch: 15300, sum loss: 5127.883301, avg loss: 2.753965, ppl: 15.704784 +epoch: 1, batch: 15301, sum loss: 4328.787598, avg loss: 2.476423, ppl: 11.898630 +epoch: 1, batch: 15302, sum loss: 5357.405273, avg loss: 2.927544, ppl: 18.681690 +epoch: 1, batch: 15303, sum loss: 4067.354736, avg loss: 2.487679, ppl: 12.033312 +epoch: 1, batch: 15304, sum loss: 3909.002441, avg loss: 2.283296, ppl: 9.808954 +epoch: 1, batch: 15305, sum loss: 4335.581543, avg loss: 2.562400, ppl: 12.966905 +epoch: 1, batch: 15306, sum loss: 4401.497559, avg loss: 2.664345, ppl: 14.358539 +epoch: 1, batch: 15307, sum loss: 4245.594727, avg loss: 2.539232, ppl: 12.669930 +epoch: 1, batch: 15308, sum loss: 4502.453125, avg loss: 2.659453, ppl: 14.288466 +epoch: 1, batch: 15309, sum loss: 3917.283203, avg loss: 2.382776, ppl: 10.834934 +epoch: 1, batch: 15310, sum loss: 4712.598633, avg loss: 2.714631, ppl: 15.099031 +epoch: 1, batch: 15311, sum loss: 4008.804199, avg loss: 2.534010, ppl: 12.603949 +epoch: 1, batch: 15312, sum loss: 4121.729004, avg loss: 2.637063, ppl: 13.972104 +epoch: 1, batch: 15313, sum loss: 4657.063965, avg loss: 2.824175, ppl: 16.847034 +epoch: 1, batch: 15314, sum loss: 4640.425781, avg loss: 2.860928, ppl: 17.477743 +epoch: 1, batch: 15315, sum loss: 4865.504395, avg loss: 2.673354, ppl: 14.488484 +epoch: 1, batch: 15316, sum loss: 4528.395508, avg loss: 2.574415, ppl: 13.123634 +epoch: 1, batch: 15317, sum loss: 4394.019531, avg loss: 2.498021, ppl: 12.158413 +epoch: 1, batch: 15318, sum loss: 5181.628906, avg loss: 3.021358, ppl: 20.519133 +epoch: 1, batch: 15319, sum loss: 4134.162109, avg loss: 2.707375, ppl: 14.989880 +epoch: 1, batch: 15320, sum loss: 4097.912109, avg loss: 2.766990, ppl: 15.910670 +epoch: 1, batch: 15321, sum loss: 3647.571289, avg loss: 2.196009, ppl: 8.989068 +epoch: 1, batch: 15322, sum loss: 4719.226074, avg loss: 2.600125, ppl: 13.465416 +epoch: 1, batch: 15323, sum loss: 5669.594727, avg loss: 2.942187, ppl: 18.957266 +epoch: 1, batch: 15324, sum loss: 3763.395752, avg loss: 2.304590, ppl: 10.020071 +epoch: 1, batch: 15325, sum loss: 6267.885742, avg loss: 3.281615, ppl: 26.618740 +epoch: 1, batch: 15326, sum loss: 3830.770020, avg loss: 2.579643, ppl: 13.192428 +epoch: 1, batch: 15327, sum loss: 4705.425781, avg loss: 2.815934, ppl: 16.708778 +epoch: 1, batch: 15328, sum loss: 4489.416504, avg loss: 2.630004, ppl: 13.873824 +epoch: 1, batch: 15329, sum loss: 3934.548096, avg loss: 2.586817, ppl: 13.287408 +epoch: 1, batch: 15330, sum loss: 4292.799805, avg loss: 2.722131, ppl: 15.212699 +epoch: 1, batch: 15331, sum loss: 4809.987305, avg loss: 2.811214, ppl: 16.630098 +epoch: 1, batch: 15332, sum loss: 4516.076172, avg loss: 2.583568, ppl: 13.244308 +epoch: 1, batch: 15333, sum loss: 4338.037598, avg loss: 2.596073, ppl: 13.410969 +epoch: 1, batch: 15334, sum loss: 4147.806641, avg loss: 2.765204, ppl: 15.882286 +epoch: 1, batch: 15335, sum loss: 4595.939453, avg loss: 2.602457, ppl: 13.496860 +epoch: 1, batch: 15336, sum loss: 4714.904785, avg loss: 2.885499, ppl: 17.912500 +epoch: 1, batch: 15337, sum loss: 5831.317871, avg loss: 2.966082, ppl: 19.415707 +epoch: 1, batch: 15338, sum loss: 3928.618652, avg loss: 2.692679, ppl: 14.771194 +epoch: 1, batch: 15339, sum loss: 4503.585938, avg loss: 2.555951, ppl: 12.883551 +epoch: 1, batch: 15340, sum loss: 4354.942383, avg loss: 2.699902, ppl: 14.878278 +epoch: 1, batch: 15341, sum loss: 4535.392090, avg loss: 2.702856, ppl: 14.922286 +epoch: 1, batch: 15342, sum loss: 3462.695312, avg loss: 2.402981, ppl: 11.056084 +epoch: 1, batch: 15343, sum loss: 4693.032227, avg loss: 2.681733, ppl: 14.610386 +epoch: 1, batch: 15344, sum loss: 3404.976318, avg loss: 2.232771, ppl: 9.325675 +epoch: 1, batch: 15345, sum loss: 3797.682129, avg loss: 2.364684, ppl: 10.640672 +epoch: 1, batch: 15346, sum loss: 5443.668945, avg loss: 3.014213, ppl: 20.373053 +epoch: 1, batch: 15347, sum loss: 4294.892578, avg loss: 2.542861, ppl: 12.716002 +epoch: 1, batch: 15348, sum loss: 3526.514648, avg loss: 2.306419, ppl: 10.038414 +epoch: 1, batch: 15349, sum loss: 4446.375000, avg loss: 2.879777, ppl: 17.810295 +epoch: 1, batch: 15350, sum loss: 4390.327637, avg loss: 2.708407, ppl: 15.005352 +epoch: 1, batch: 15351, sum loss: 4426.833008, avg loss: 2.544157, ppl: 12.732488 +epoch: 1, batch: 15352, sum loss: 4962.250488, avg loss: 2.991109, ppl: 19.907755 +epoch: 1, batch: 15353, sum loss: 3901.941406, avg loss: 2.461793, ppl: 11.725814 +epoch: 1, batch: 15354, sum loss: 5556.647461, avg loss: 2.955663, ppl: 19.214466 +epoch: 1, batch: 15355, sum loss: 4334.288574, avg loss: 2.600053, ppl: 13.464453 +epoch: 1, batch: 15356, sum loss: 4939.433594, avg loss: 2.903841, ppl: 18.244087 +epoch: 1, batch: 15357, sum loss: 4122.534180, avg loss: 2.619145, ppl: 13.723984 +epoch: 1, batch: 15358, sum loss: 4669.333984, avg loss: 2.612946, ppl: 13.639170 +epoch: 1, batch: 15359, sum loss: 4370.660645, avg loss: 2.618730, ppl: 13.718294 +epoch: 1, batch: 15360, sum loss: 4918.513184, avg loss: 2.788273, ppl: 16.252924 +epoch: 1, batch: 15361, sum loss: 3953.864502, avg loss: 2.511985, ppl: 12.329380 +epoch: 1, batch: 15362, sum loss: 4876.755371, avg loss: 2.724444, ppl: 15.247936 +epoch: 1, batch: 15363, sum loss: 4816.833984, avg loss: 2.835100, ppl: 17.032099 +epoch: 1, batch: 15364, sum loss: 4194.768066, avg loss: 2.557786, ppl: 12.907203 +epoch: 1, batch: 15365, sum loss: 5374.927734, avg loss: 2.769154, ppl: 15.945136 +epoch: 1, batch: 15366, sum loss: 4384.009766, avg loss: 2.548843, ppl: 12.792294 +epoch: 1, batch: 15367, sum loss: 4245.889648, avg loss: 2.592118, ppl: 13.358034 +epoch: 1, batch: 15368, sum loss: 3964.309814, avg loss: 2.585982, ppl: 13.276315 +epoch: 1, batch: 15369, sum loss: 6159.648926, avg loss: 3.047822, ppl: 21.069416 +epoch: 1, batch: 15370, sum loss: 3767.737305, avg loss: 2.672154, ppl: 14.471109 +epoch: 1, batch: 15371, sum loss: 3987.736816, avg loss: 2.644388, ppl: 14.074824 +epoch: 1, batch: 15372, sum loss: 4966.497070, avg loss: 2.773030, ppl: 16.007063 +epoch: 1, batch: 15373, sum loss: 4314.860352, avg loss: 2.414583, ppl: 11.185110 +epoch: 1, batch: 15374, sum loss: 4658.618164, avg loss: 2.648447, ppl: 14.132075 +epoch: 1, batch: 15375, sum loss: 3896.937500, avg loss: 2.387830, ppl: 10.889832 +epoch: 1, batch: 15376, sum loss: 3923.868408, avg loss: 2.520147, ppl: 12.430422 +epoch: 1, batch: 15377, sum loss: 3816.457031, avg loss: 2.457474, ppl: 11.675282 +epoch: 1, batch: 15378, sum loss: 5087.129883, avg loss: 2.680258, ppl: 14.588861 +epoch: 1, batch: 15379, sum loss: 5525.083008, avg loss: 2.826129, ppl: 16.879999 +epoch: 1, batch: 15380, sum loss: 4785.417969, avg loss: 2.747082, ppl: 15.597060 +epoch: 1, batch: 15381, sum loss: 5544.395020, avg loss: 3.157400, ppl: 23.509401 +epoch: 1, batch: 15382, sum loss: 3824.310303, avg loss: 2.587490, ppl: 13.296357 +epoch: 1, batch: 15383, sum loss: 4449.134766, avg loss: 2.746380, ppl: 15.586102 +epoch: 1, batch: 15384, sum loss: 4364.958496, avg loss: 2.610621, ppl: 13.607501 +epoch: 1, batch: 15385, sum loss: 4649.539551, avg loss: 2.675224, ppl: 14.515602 +epoch: 1, batch: 15386, sum loss: 4432.176270, avg loss: 2.602570, ppl: 13.498382 +epoch: 1, batch: 15387, sum loss: 4204.091797, avg loss: 2.798996, ppl: 16.428141 +epoch: 1, batch: 15388, sum loss: 5346.688965, avg loss: 2.814047, ppl: 16.677277 +epoch: 1, batch: 15389, sum loss: 4831.044922, avg loss: 2.641359, ppl: 14.032258 +epoch: 1, batch: 15390, sum loss: 4780.144531, avg loss: 2.667491, ppl: 14.403791 +epoch: 1, batch: 15391, sum loss: 5709.287598, avg loss: 2.672888, ppl: 14.481726 +epoch: 1, batch: 15392, sum loss: 5149.880371, avg loss: 2.661437, ppl: 14.316845 +epoch: 1, batch: 15393, sum loss: 3924.133057, avg loss: 2.277500, ppl: 9.752273 +epoch: 1, batch: 15394, sum loss: 4725.888184, avg loss: 2.706694, ppl: 14.979676 +epoch: 1, batch: 15395, sum loss: 4520.338867, avg loss: 2.622006, ppl: 13.763310 +epoch: 1, batch: 15396, sum loss: 4814.380859, avg loss: 2.815427, ppl: 16.700310 +epoch: 1, batch: 15397, sum loss: 4486.172852, avg loss: 2.796866, ppl: 16.393188 +epoch: 1, batch: 15398, sum loss: 4629.707031, avg loss: 2.509326, ppl: 12.296642 +epoch: 1, batch: 15399, sum loss: 4508.035645, avg loss: 2.824584, ppl: 16.853928 +epoch: 1, batch: 15400, sum loss: 4705.573730, avg loss: 2.781072, ppl: 16.136309 +epoch: 1, batch: 15401, sum loss: 5791.374023, avg loss: 2.979102, ppl: 19.670147 +epoch: 1, batch: 15402, sum loss: 4909.348145, avg loss: 2.899792, ppl: 18.170368 +epoch: 1, batch: 15403, sum loss: 5027.096680, avg loss: 2.912570, ppl: 18.404045 +epoch: 1, batch: 15404, sum loss: 4335.138672, avg loss: 2.716252, ppl: 15.123538 +epoch: 1, batch: 15405, sum loss: 5320.561035, avg loss: 2.972380, ppl: 19.538374 +epoch: 1, batch: 15406, sum loss: 4238.816406, avg loss: 2.677711, ppl: 14.551743 +epoch: 1, batch: 15407, sum loss: 4972.419922, avg loss: 2.676222, ppl: 14.530089 +epoch: 1, batch: 15408, sum loss: 5210.924805, avg loss: 2.709789, ppl: 15.026109 +epoch: 1, batch: 15409, sum loss: 4597.960449, avg loss: 2.953090, ppl: 19.165077 +epoch: 1, batch: 15410, sum loss: 4570.737793, avg loss: 2.720677, ppl: 15.190609 +epoch: 1, batch: 15411, sum loss: 4615.694336, avg loss: 2.895668, ppl: 18.095581 +epoch: 1, batch: 15412, sum loss: 3702.471924, avg loss: 2.679068, ppl: 14.571505 +epoch: 1, batch: 15413, sum loss: 4791.256348, avg loss: 2.756764, ppl: 15.748804 +epoch: 1, batch: 15414, sum loss: 4219.011230, avg loss: 2.716685, ppl: 15.130080 +epoch: 1, batch: 15415, sum loss: 4105.700684, avg loss: 2.528141, ppl: 12.530189 +epoch: 1, batch: 15416, sum loss: 4151.310059, avg loss: 2.810637, ppl: 16.620495 +epoch: 1, batch: 15417, sum loss: 4829.690430, avg loss: 2.683161, ppl: 14.631277 +epoch: 1, batch: 15418, sum loss: 4388.947754, avg loss: 2.762082, ppl: 15.832767 +epoch: 1, batch: 15419, sum loss: 3849.340820, avg loss: 2.540819, ppl: 12.690062 +epoch: 1, batch: 15420, sum loss: 3974.024902, avg loss: 2.696082, ppl: 14.821548 +epoch: 1, batch: 15421, sum loss: 4871.593262, avg loss: 2.755426, ppl: 15.727742 +epoch: 1, batch: 15422, sum loss: 4400.262695, avg loss: 2.738185, ppl: 15.458897 +epoch: 1, batch: 15423, sum loss: 4446.468750, avg loss: 2.870542, ppl: 17.646572 +epoch: 1, batch: 15424, sum loss: 4198.564453, avg loss: 2.782349, ppl: 16.156931 +epoch: 1, batch: 15425, sum loss: 3974.597900, avg loss: 2.332510, ppl: 10.303777 +epoch: 1, batch: 15426, sum loss: 4152.893066, avg loss: 2.561933, ppl: 12.960844 +epoch: 1, batch: 15427, sum loss: 3542.620117, avg loss: 2.503618, ppl: 12.226656 +epoch: 1, batch: 15428, sum loss: 4774.832031, avg loss: 2.842162, ppl: 17.152813 +epoch: 1, batch: 15429, sum loss: 4738.644043, avg loss: 2.600793, ppl: 13.474414 +epoch: 1, batch: 15430, sum loss: 4630.118164, avg loss: 2.718801, ppl: 15.162132 +epoch: 1, batch: 15431, sum loss: 4753.738770, avg loss: 2.676655, ppl: 14.536385 +epoch: 1, batch: 15432, sum loss: 5399.973633, avg loss: 2.914179, ppl: 18.433670 +epoch: 1, batch: 15433, sum loss: 4851.430664, avg loss: 2.719412, ppl: 15.171396 +epoch: 1, batch: 15434, sum loss: 4573.089355, avg loss: 2.774933, ppl: 16.037550 +epoch: 1, batch: 15435, sum loss: 5385.739258, avg loss: 3.041072, ppl: 20.927673 +epoch: 1, batch: 15436, sum loss: 3962.228027, avg loss: 2.655649, ppl: 14.234221 +epoch: 1, batch: 15437, sum loss: 5600.390137, avg loss: 2.924486, ppl: 18.624645 +epoch: 1, batch: 15438, sum loss: 5078.527344, avg loss: 2.874096, ppl: 17.709406 +epoch: 1, batch: 15439, sum loss: 4927.729492, avg loss: 2.880029, ppl: 17.814793 +epoch: 1, batch: 15440, sum loss: 4332.647461, avg loss: 2.562181, ppl: 12.964058 +epoch: 1, batch: 15441, sum loss: 4335.621094, avg loss: 2.754524, ppl: 15.713559 +epoch: 1, batch: 15442, sum loss: 4817.834473, avg loss: 2.618388, ppl: 13.713602 +epoch: 1, batch: 15443, sum loss: 5216.952148, avg loss: 3.094278, ppl: 22.071295 +epoch: 1, batch: 15444, sum loss: 4377.174805, avg loss: 2.622633, ppl: 13.771940 +epoch: 1, batch: 15445, sum loss: 4483.335449, avg loss: 2.715527, ppl: 15.112576 +epoch: 1, batch: 15446, sum loss: 3944.432373, avg loss: 2.566319, ppl: 13.017820 +epoch: 1, batch: 15447, sum loss: 4675.039062, avg loss: 3.141827, ppl: 23.146124 +epoch: 1, batch: 15448, sum loss: 5670.756348, avg loss: 2.970538, ppl: 19.502403 +epoch: 1, batch: 15449, sum loss: 5369.460449, avg loss: 2.690110, ppl: 14.733303 +epoch: 1, batch: 15450, sum loss: 5826.541992, avg loss: 2.983380, ppl: 19.754480 +epoch: 1, batch: 15451, sum loss: 4212.491211, avg loss: 2.559229, ppl: 12.925853 +epoch: 1, batch: 15452, sum loss: 4760.983887, avg loss: 2.866336, ppl: 17.572512 +epoch: 1, batch: 15453, sum loss: 3599.569580, avg loss: 2.269590, ppl: 9.675432 +epoch: 1, batch: 15454, sum loss: 4404.790039, avg loss: 2.617225, ppl: 13.697662 +epoch: 1, batch: 15455, sum loss: 4601.891602, avg loss: 2.617686, ppl: 13.703973 +epoch: 1, batch: 15456, sum loss: 4536.472656, avg loss: 2.701890, ppl: 14.907874 +epoch: 1, batch: 15457, sum loss: 4836.755859, avg loss: 2.678159, ppl: 14.558273 +epoch: 1, batch: 15458, sum loss: 4525.155762, avg loss: 2.685552, ppl: 14.666300 +epoch: 1, batch: 15459, sum loss: 4627.354492, avg loss: 2.816406, ppl: 16.716656 +epoch: 1, batch: 15460, sum loss: 5327.524414, avg loss: 2.956451, ppl: 19.229603 +epoch: 1, batch: 15461, sum loss: 4305.968262, avg loss: 2.555471, ppl: 12.877359 +epoch: 1, batch: 15462, sum loss: 3558.682617, avg loss: 2.479918, ppl: 11.940289 +epoch: 1, batch: 15463, sum loss: 4416.345215, avg loss: 2.379496, ppl: 10.799462 +epoch: 1, batch: 15464, sum loss: 4723.056641, avg loss: 2.768497, ppl: 15.934673 +epoch: 1, batch: 15465, sum loss: 4489.519043, avg loss: 2.620852, ppl: 13.747428 +epoch: 1, batch: 15466, sum loss: 5203.290039, avg loss: 2.660169, ppl: 14.298700 +epoch: 1, batch: 15467, sum loss: 4120.431152, avg loss: 2.680827, ppl: 14.597162 +epoch: 1, batch: 15468, sum loss: 4662.365234, avg loss: 2.780182, ppl: 16.121954 +epoch: 1, batch: 15469, sum loss: 4793.120117, avg loss: 2.689742, ppl: 14.727877 +epoch: 1, batch: 15470, sum loss: 5143.711914, avg loss: 2.694454, ppl: 14.797433 +epoch: 1, batch: 15471, sum loss: 4712.047363, avg loss: 2.606221, ppl: 13.547756 +epoch: 1, batch: 15472, sum loss: 3917.762207, avg loss: 2.380172, ppl: 10.806756 +epoch: 1, batch: 15473, sum loss: 4785.991211, avg loss: 2.658884, ppl: 14.280344 +epoch: 1, batch: 15474, sum loss: 4607.535156, avg loss: 2.669487, ppl: 14.432567 +epoch: 1, batch: 15475, sum loss: 4141.668457, avg loss: 2.708743, ppl: 15.010401 +epoch: 1, batch: 15476, sum loss: 4982.725098, avg loss: 2.709476, ppl: 15.021395 +epoch: 1, batch: 15477, sum loss: 4941.271484, avg loss: 2.658026, ppl: 14.268103 +epoch: 1, batch: 15478, sum loss: 3945.075195, avg loss: 2.436736, ppl: 11.435653 +epoch: 1, batch: 15479, sum loss: 5359.416016, avg loss: 3.024501, ppl: 20.583738 +epoch: 1, batch: 15480, sum loss: 4374.558105, avg loss: 2.651247, ppl: 14.171703 +epoch: 1, batch: 15481, sum loss: 5425.375488, avg loss: 2.861485, ppl: 17.487480 +epoch: 1, batch: 15482, sum loss: 3745.863770, avg loss: 2.549941, ppl: 12.806349 +epoch: 1, batch: 15483, sum loss: 6094.137695, avg loss: 2.834483, ppl: 17.021593 +epoch: 1, batch: 15484, sum loss: 3371.102539, avg loss: 2.623426, ppl: 13.782866 +epoch: 1, batch: 15485, sum loss: 5513.381348, avg loss: 2.844882, ppl: 17.199533 +epoch: 1, batch: 15486, sum loss: 3617.821289, avg loss: 2.395908, ppl: 10.978163 +epoch: 1, batch: 15487, sum loss: 4613.368164, avg loss: 2.792596, ppl: 16.323338 +epoch: 1, batch: 15488, sum loss: 4547.326660, avg loss: 2.547522, ppl: 12.775408 +epoch: 1, batch: 15489, sum loss: 4791.718262, avg loss: 2.498289, ppl: 12.161669 +epoch: 1, batch: 15490, sum loss: 4765.683105, avg loss: 2.788580, ppl: 16.257915 +epoch: 1, batch: 15491, sum loss: 3687.212646, avg loss: 2.335157, ppl: 10.331081 +epoch: 1, batch: 15492, sum loss: 4252.860840, avg loss: 2.517976, ppl: 12.403461 +epoch: 1, batch: 15493, sum loss: 5079.581055, avg loss: 2.747204, ppl: 15.598961 +epoch: 1, batch: 15494, sum loss: 3687.894287, avg loss: 2.463523, ppl: 11.746122 +epoch: 1, batch: 15495, sum loss: 3443.352783, avg loss: 2.290987, ppl: 9.884685 +epoch: 1, batch: 15496, sum loss: 4481.213867, avg loss: 2.878108, ppl: 17.780596 +epoch: 1, batch: 15497, sum loss: 4840.711914, avg loss: 2.931988, ppl: 18.764898 +epoch: 1, batch: 15498, sum loss: 4802.346191, avg loss: 2.697947, ppl: 14.849219 +epoch: 1, batch: 15499, sum loss: 4161.153809, avg loss: 2.608874, ppl: 13.583745 +epoch: 1, batch: 15500, sum loss: 4642.890137, avg loss: 2.635012, ppl: 13.943475 +epoch: 1, batch: 15501, sum loss: 5000.678711, avg loss: 2.870654, ppl: 17.648550 +epoch: 1, batch: 15502, sum loss: 4599.855469, avg loss: 2.694701, ppl: 14.801099 +epoch: 1, batch: 15503, sum loss: 4401.457031, avg loss: 2.623038, ppl: 13.777511 +epoch: 1, batch: 15504, sum loss: 4716.411133, avg loss: 2.636339, ppl: 13.962001 +epoch: 1, batch: 15505, sum loss: 5713.865723, avg loss: 2.888708, ppl: 17.970083 +epoch: 1, batch: 15506, sum loss: 5623.978516, avg loss: 3.007475, ppl: 20.236242 +epoch: 1, batch: 15507, sum loss: 5060.166016, avg loss: 2.687289, ppl: 14.691799 +epoch: 1, batch: 15508, sum loss: 4414.150391, avg loss: 2.731529, ppl: 15.356345 +epoch: 1, batch: 15509, sum loss: 4369.787109, avg loss: 2.575007, ppl: 13.131412 +epoch: 1, batch: 15510, sum loss: 3985.599365, avg loss: 2.588052, ppl: 13.303824 +epoch: 1, batch: 15511, sum loss: 4404.007324, avg loss: 2.519455, ppl: 12.421827 +epoch: 1, batch: 15512, sum loss: 4161.818848, avg loss: 2.471389, ppl: 11.838881 +epoch: 1, batch: 15513, sum loss: 4548.354492, avg loss: 2.924987, ppl: 18.633986 +epoch: 1, batch: 15514, sum loss: 4199.989746, avg loss: 2.411016, ppl: 11.145279 +epoch: 1, batch: 15515, sum loss: 4827.888672, avg loss: 2.732252, ppl: 15.367450 +epoch: 1, batch: 15516, sum loss: 4853.637695, avg loss: 2.853402, ppl: 17.346701 +epoch: 1, batch: 15517, sum loss: 4786.547852, avg loss: 2.585925, ppl: 13.275568 +epoch: 1, batch: 15518, sum loss: 4172.579102, avg loss: 2.522720, ppl: 12.462450 +epoch: 1, batch: 15519, sum loss: 5320.986328, avg loss: 2.896563, ppl: 18.111794 +epoch: 1, batch: 15520, sum loss: 4287.964355, avg loss: 2.490107, ppl: 12.062568 +epoch: 1, batch: 15521, sum loss: 5783.646484, avg loss: 3.234702, ppl: 25.398792 +epoch: 1, batch: 15522, sum loss: 5170.668457, avg loss: 2.880595, ppl: 17.824879 +epoch: 1, batch: 15523, sum loss: 3945.442627, avg loss: 2.588873, ppl: 13.314759 +epoch: 1, batch: 15524, sum loss: 5008.233398, avg loss: 2.637300, ppl: 13.975426 +epoch: 1, batch: 15525, sum loss: 3942.436279, avg loss: 2.694762, ppl: 14.801991 +epoch: 1, batch: 15526, sum loss: 3581.122803, avg loss: 2.585648, ppl: 13.271891 +epoch: 1, batch: 15527, sum loss: 3968.022949, avg loss: 2.508232, ppl: 12.283193 +epoch: 1, batch: 15528, sum loss: 4309.987793, avg loss: 2.588581, ppl: 13.310874 +epoch: 1, batch: 15529, sum loss: 4596.008301, avg loss: 2.580577, ppl: 13.204760 +epoch: 1, batch: 15530, sum loss: 4152.851562, avg loss: 2.819315, ppl: 16.765369 +epoch: 1, batch: 15531, sum loss: 4460.136230, avg loss: 2.600662, ppl: 13.472660 +epoch: 1, batch: 15532, sum loss: 4621.492188, avg loss: 2.767361, ppl: 15.916570 +epoch: 1, batch: 15533, sum loss: 5161.170410, avg loss: 2.692317, ppl: 14.765841 +epoch: 1, batch: 15534, sum loss: 4601.509277, avg loss: 2.706770, ppl: 14.980816 +epoch: 1, batch: 15535, sum loss: 4402.127930, avg loss: 2.739345, ppl: 15.476850 +epoch: 1, batch: 15536, sum loss: 3627.798828, avg loss: 2.349611, ppl: 10.481489 +epoch: 1, batch: 15537, sum loss: 4731.672363, avg loss: 2.668738, ppl: 14.421760 +epoch: 1, batch: 15538, sum loss: 4727.812012, avg loss: 2.877548, ppl: 17.770651 +epoch: 1, batch: 15539, sum loss: 4604.024414, avg loss: 2.781888, ppl: 16.149483 +epoch: 1, batch: 15540, sum loss: 4982.748535, avg loss: 2.730273, ppl: 15.337077 +epoch: 1, batch: 15541, sum loss: 3702.723877, avg loss: 2.513730, ppl: 12.350911 +epoch: 1, batch: 15542, sum loss: 4581.128418, avg loss: 2.555007, ppl: 12.871396 +epoch: 1, batch: 15543, sum loss: 4580.331055, avg loss: 2.629352, ppl: 13.864780 +epoch: 1, batch: 15544, sum loss: 4191.504883, avg loss: 2.788759, ppl: 16.260828 +epoch: 1, batch: 15545, sum loss: 4923.487305, avg loss: 2.803808, ppl: 16.507395 +epoch: 1, batch: 15546, sum loss: 4313.593750, avg loss: 2.546395, ppl: 12.761022 +epoch: 1, batch: 15547, sum loss: 5007.081055, avg loss: 2.889256, ppl: 17.979927 +epoch: 1, batch: 15548, sum loss: 4521.516113, avg loss: 2.684986, ppl: 14.657994 +epoch: 1, batch: 15549, sum loss: 5192.372559, avg loss: 2.796108, ppl: 16.380764 +epoch: 1, batch: 15550, sum loss: 4299.771484, avg loss: 2.612255, ppl: 13.629749 +epoch: 1, batch: 15551, sum loss: 4571.766113, avg loss: 2.767413, ppl: 15.917404 +epoch: 1, batch: 15552, sum loss: 3659.013184, avg loss: 2.569532, ppl: 13.059707 +epoch: 1, batch: 15553, sum loss: 4231.966797, avg loss: 2.552453, ppl: 12.838555 +epoch: 1, batch: 15554, sum loss: 4470.875000, avg loss: 2.551869, ppl: 12.831068 +epoch: 1, batch: 15555, sum loss: 3949.189941, avg loss: 2.699378, ppl: 14.870480 +epoch: 1, batch: 15556, sum loss: 4220.960449, avg loss: 2.599113, ppl: 13.451807 +epoch: 1, batch: 15557, sum loss: 5096.014648, avg loss: 2.920352, ppl: 18.547819 +epoch: 1, batch: 15558, sum loss: 4822.490723, avg loss: 2.882541, ppl: 17.859596 +epoch: 1, batch: 15559, sum loss: 4001.768555, avg loss: 2.529563, ppl: 12.548018 +epoch: 1, batch: 15560, sum loss: 5790.308594, avg loss: 3.003272, ppl: 20.151371 +epoch: 1, batch: 15561, sum loss: 4412.324219, avg loss: 2.727024, ppl: 15.287318 +epoch: 1, batch: 15562, sum loss: 4567.742676, avg loss: 2.569034, ppl: 13.053213 +epoch: 1, batch: 15563, sum loss: 5021.852539, avg loss: 2.647260, ppl: 14.115313 +epoch: 1, batch: 15564, sum loss: 4415.915039, avg loss: 2.559951, ppl: 12.935182 +epoch: 1, batch: 15565, sum loss: 4889.321777, avg loss: 2.787527, ppl: 16.240801 +epoch: 1, batch: 15566, sum loss: 5105.857422, avg loss: 2.704374, ppl: 14.944952 +epoch: 1, batch: 15567, sum loss: 3754.591797, avg loss: 2.511433, ppl: 12.322572 +epoch: 1, batch: 15568, sum loss: 5100.008301, avg loss: 2.731659, ppl: 15.358352 +epoch: 1, batch: 15569, sum loss: 4763.783691, avg loss: 2.630471, ppl: 13.880313 +epoch: 1, batch: 15570, sum loss: 4925.001465, avg loss: 2.666487, ppl: 14.389334 +epoch: 1, batch: 15571, sum loss: 4088.483643, avg loss: 2.586011, ppl: 13.276708 +epoch: 1, batch: 15572, sum loss: 4640.395996, avg loss: 2.750679, ppl: 15.653265 +epoch: 1, batch: 15573, sum loss: 3756.281250, avg loss: 2.548359, ppl: 12.786106 +epoch: 1, batch: 15574, sum loss: 4117.621094, avg loss: 2.453886, ppl: 11.633470 +epoch: 1, batch: 15575, sum loss: 4712.073242, avg loss: 2.654689, ppl: 14.220564 +epoch: 1, batch: 15576, sum loss: 4243.655762, avg loss: 2.632541, ppl: 13.909064 +epoch: 1, batch: 15577, sum loss: 4341.346680, avg loss: 2.518183, ppl: 12.406032 +epoch: 1, batch: 15578, sum loss: 4570.981934, avg loss: 2.701526, ppl: 14.902454 +epoch: 1, batch: 15579, sum loss: 5037.923340, avg loss: 2.895358, ppl: 18.089983 +epoch: 1, batch: 15580, sum loss: 4282.680176, avg loss: 2.804637, ppl: 16.521072 +epoch: 1, batch: 15581, sum loss: 3679.380371, avg loss: 2.639441, ppl: 14.005369 +epoch: 1, batch: 15582, sum loss: 3774.971436, avg loss: 2.599843, ppl: 13.461618 +epoch: 1, batch: 15583, sum loss: 5194.789551, avg loss: 2.794400, ppl: 16.352814 +epoch: 1, batch: 15584, sum loss: 4191.458008, avg loss: 2.397859, ppl: 10.999604 +epoch: 1, batch: 15585, sum loss: 4407.632812, avg loss: 2.867686, ppl: 17.596251 +epoch: 1, batch: 15586, sum loss: 4856.539062, avg loss: 2.791114, ppl: 16.299171 +epoch: 1, batch: 15587, sum loss: 5093.910645, avg loss: 2.765424, ppl: 15.885775 +epoch: 1, batch: 15588, sum loss: 3862.288574, avg loss: 2.718007, ppl: 15.150103 +epoch: 1, batch: 15589, sum loss: 3506.670410, avg loss: 2.513742, ppl: 12.351067 +epoch: 1, batch: 15590, sum loss: 3830.403809, avg loss: 2.523323, ppl: 12.469960 +epoch: 1, batch: 15591, sum loss: 4501.989746, avg loss: 2.694189, ppl: 14.793517 +epoch: 1, batch: 15592, sum loss: 4693.790527, avg loss: 2.639927, ppl: 14.012186 +epoch: 1, batch: 15593, sum loss: 3981.114502, avg loss: 2.460516, ppl: 11.710850 +epoch: 1, batch: 15594, sum loss: 4463.314453, avg loss: 2.600999, ppl: 13.477197 +epoch: 1, batch: 15595, sum loss: 4291.655762, avg loss: 2.659019, ppl: 14.282265 +epoch: 1, batch: 15596, sum loss: 3729.733154, avg loss: 2.667907, ppl: 14.409771 +epoch: 1, batch: 15597, sum loss: 4332.436035, avg loss: 2.697656, ppl: 14.844896 +epoch: 1, batch: 15598, sum loss: 5056.017578, avg loss: 2.892459, ppl: 18.037603 +epoch: 1, batch: 15599, sum loss: 4998.097656, avg loss: 2.862599, ppl: 17.506971 +epoch: 1, batch: 15600, sum loss: 4745.145020, avg loss: 2.863696, ppl: 17.526190 +epoch: 1, batch: 15601, sum loss: 4532.389160, avg loss: 2.772103, ppl: 15.992235 +epoch: 1, batch: 15602, sum loss: 4562.164062, avg loss: 2.640141, ppl: 14.015183 +epoch: 1, batch: 15603, sum loss: 5021.440430, avg loss: 2.871035, ppl: 17.655283 +epoch: 1, batch: 15604, sum loss: 4807.731934, avg loss: 2.713167, ppl: 15.076948 +epoch: 1, batch: 15605, sum loss: 4343.844238, avg loss: 2.742326, ppl: 15.523050 +epoch: 1, batch: 15606, sum loss: 4967.553223, avg loss: 2.854916, ppl: 17.372971 +epoch: 1, batch: 15607, sum loss: 4555.620117, avg loss: 2.901669, ppl: 18.204504 +epoch: 1, batch: 15608, sum loss: 4523.118164, avg loss: 2.668506, ppl: 14.418418 +epoch: 1, batch: 15609, sum loss: 4398.325195, avg loss: 2.848656, ppl: 17.264570 +epoch: 1, batch: 15610, sum loss: 4283.636719, avg loss: 2.593001, ppl: 13.369830 +epoch: 1, batch: 15611, sum loss: 4773.519043, avg loss: 2.585872, ppl: 13.274856 +epoch: 1, batch: 15612, sum loss: 5107.710938, avg loss: 2.608637, ppl: 13.580526 +epoch: 1, batch: 15613, sum loss: 3481.723389, avg loss: 2.397881, ppl: 10.999846 +epoch: 1, batch: 15614, sum loss: 4588.621094, avg loss: 2.869682, ppl: 17.631403 +epoch: 1, batch: 15615, sum loss: 4647.195312, avg loss: 2.708156, ppl: 15.001585 +epoch: 1, batch: 15616, sum loss: 4837.908691, avg loss: 2.957157, ppl: 19.243189 +epoch: 1, batch: 15617, sum loss: 4300.875977, avg loss: 2.641816, ppl: 14.038670 +epoch: 1, batch: 15618, sum loss: 4366.390625, avg loss: 2.595952, ppl: 13.409341 +epoch: 1, batch: 15619, sum loss: 3766.147461, avg loss: 2.549863, ppl: 12.805350 +epoch: 1, batch: 15620, sum loss: 4415.592285, avg loss: 2.601999, ppl: 13.490680 +epoch: 1, batch: 15621, sum loss: 4711.587891, avg loss: 2.544054, ppl: 12.731176 +epoch: 1, batch: 15622, sum loss: 3852.572510, avg loss: 2.649637, ppl: 14.148905 +epoch: 1, batch: 15623, sum loss: 3850.029297, avg loss: 2.717028, ppl: 15.135279 +epoch: 1, batch: 15624, sum loss: 4170.400879, avg loss: 2.556959, ppl: 12.896544 +epoch: 1, batch: 15625, sum loss: 4985.849121, avg loss: 2.880329, ppl: 17.820129 +epoch: 1, batch: 15626, sum loss: 4720.753906, avg loss: 2.515053, ppl: 12.367262 +epoch: 1, batch: 15627, sum loss: 4528.085938, avg loss: 2.779672, ppl: 16.113737 +epoch: 1, batch: 15628, sum loss: 4043.262695, avg loss: 2.519167, ppl: 12.418244 +epoch: 1, batch: 15629, sum loss: 3929.332764, avg loss: 2.468174, ppl: 11.800876 +epoch: 1, batch: 15630, sum loss: 4810.300781, avg loss: 2.720758, ppl: 15.191840 +epoch: 1, batch: 15631, sum loss: 4755.502441, avg loss: 2.520139, ppl: 12.430325 +epoch: 1, batch: 15632, sum loss: 3970.427979, avg loss: 2.511340, ppl: 12.321425 +epoch: 1, batch: 15633, sum loss: 4566.875000, avg loss: 2.588932, ppl: 13.315537 +epoch: 1, batch: 15634, sum loss: 4264.520508, avg loss: 2.616270, ppl: 13.684589 +epoch: 1, batch: 15635, sum loss: 4717.077148, avg loss: 2.698557, ppl: 14.858274 +epoch: 1, batch: 15636, sum loss: 4513.138672, avg loss: 3.024892, ppl: 20.591778 +epoch: 1, batch: 15637, sum loss: 4328.766602, avg loss: 2.621906, ppl: 13.761930 +epoch: 1, batch: 15638, sum loss: 5160.233398, avg loss: 2.801430, ppl: 16.468176 +epoch: 1, batch: 15639, sum loss: 4419.366211, avg loss: 2.717937, ppl: 15.149041 +epoch: 1, batch: 15640, sum loss: 4113.797852, avg loss: 2.379293, ppl: 10.797269 +epoch: 1, batch: 15641, sum loss: 4275.562500, avg loss: 2.618226, ppl: 13.711372 +epoch: 1, batch: 15642, sum loss: 5494.348633, avg loss: 2.772123, ppl: 15.992556 +epoch: 1, batch: 15643, sum loss: 5126.998535, avg loss: 2.691338, ppl: 14.751397 +epoch: 1, batch: 15644, sum loss: 4414.494629, avg loss: 2.698346, ppl: 14.855146 +epoch: 1, batch: 15645, sum loss: 3826.016602, avg loss: 2.355922, ppl: 10.547844 +epoch: 1, batch: 15646, sum loss: 4506.044922, avg loss: 2.698231, ppl: 14.853425 +epoch: 1, batch: 15647, sum loss: 4083.903076, avg loss: 2.482616, ppl: 11.972540 +epoch: 1, batch: 15648, sum loss: 4441.366211, avg loss: 2.755190, ppl: 15.724027 +epoch: 1, batch: 15649, sum loss: 4913.626953, avg loss: 2.760465, ppl: 15.807186 +epoch: 1, batch: 15650, sum loss: 4547.424805, avg loss: 2.769443, ppl: 15.949744 +epoch: 1, batch: 15651, sum loss: 4560.409180, avg loss: 2.711302, ppl: 15.048850 +epoch: 1, batch: 15652, sum loss: 4179.826172, avg loss: 2.547121, ppl: 12.770288 +epoch: 1, batch: 15653, sum loss: 4237.430664, avg loss: 2.979909, ppl: 19.686024 +epoch: 1, batch: 15654, sum loss: 3857.399414, avg loss: 2.581927, ppl: 13.222597 +epoch: 1, batch: 15655, sum loss: 3487.981201, avg loss: 2.294724, ppl: 9.921701 +epoch: 1, batch: 15656, sum loss: 4819.778320, avg loss: 2.697134, ppl: 14.837148 +epoch: 1, batch: 15657, sum loss: 4683.295410, avg loss: 2.801014, ppl: 16.461332 +epoch: 1, batch: 15658, sum loss: 4665.447266, avg loss: 2.967842, ppl: 19.449900 +epoch: 1, batch: 15659, sum loss: 3543.763184, avg loss: 2.449042, ppl: 11.577246 +epoch: 1, batch: 15660, sum loss: 4590.004883, avg loss: 2.685784, ppl: 14.669700 +epoch: 1, batch: 15661, sum loss: 4419.266602, avg loss: 2.485527, ppl: 12.007444 +epoch: 1, batch: 15662, sum loss: 3393.526611, avg loss: 2.289829, ppl: 9.873252 +epoch: 1, batch: 15663, sum loss: 4650.069824, avg loss: 2.628643, ppl: 13.854959 +epoch: 1, batch: 15664, sum loss: 3888.967773, avg loss: 2.654585, ppl: 14.219090 +epoch: 1, batch: 15665, sum loss: 4028.672852, avg loss: 2.692963, ppl: 14.775393 +epoch: 1, batch: 15666, sum loss: 4145.979980, avg loss: 2.421717, ppl: 11.265187 +epoch: 1, batch: 15667, sum loss: 3870.252441, avg loss: 2.683948, ppl: 14.642786 +epoch: 1, batch: 15668, sum loss: 4834.554688, avg loss: 2.971453, ppl: 19.520269 +epoch: 1, batch: 15669, sum loss: 4698.045898, avg loss: 2.539484, ppl: 12.673133 +epoch: 1, batch: 15670, sum loss: 4525.572266, avg loss: 2.658973, ppl: 14.281614 +epoch: 1, batch: 15671, sum loss: 3445.900391, avg loss: 2.389668, ppl: 10.909866 +epoch: 1, batch: 15672, sum loss: 4076.940430, avg loss: 2.811683, ppl: 16.637899 +epoch: 1, batch: 15673, sum loss: 4412.346191, avg loss: 2.754273, ppl: 15.709622 +epoch: 1, batch: 15674, sum loss: 4964.465820, avg loss: 2.686399, ppl: 14.678726 +epoch: 1, batch: 15675, sum loss: 4189.682617, avg loss: 2.550020, ppl: 12.807357 +epoch: 1, batch: 15676, sum loss: 4531.131348, avg loss: 2.746140, ppl: 15.582372 +epoch: 1, batch: 15677, sum loss: 4922.097656, avg loss: 2.705936, ppl: 14.968320 +epoch: 1, batch: 15678, sum loss: 5175.539062, avg loss: 2.848398, ppl: 17.260109 +epoch: 1, batch: 15679, sum loss: 4482.591309, avg loss: 2.615281, ppl: 13.671056 +epoch: 1, batch: 15680, sum loss: 4961.113770, avg loss: 2.515778, ppl: 12.376231 +epoch: 1, batch: 15681, sum loss: 5020.792969, avg loss: 2.674903, ppl: 14.510944 +epoch: 1, batch: 15682, sum loss: 3730.210938, avg loss: 2.346045, ppl: 10.444177 +epoch: 1, batch: 15683, sum loss: 4621.147461, avg loss: 2.610818, ppl: 13.610178 +epoch: 1, batch: 15684, sum loss: 4596.071777, avg loss: 2.988343, ppl: 19.852764 +epoch: 1, batch: 15685, sum loss: 5094.613770, avg loss: 2.785464, ppl: 16.207340 +epoch: 1, batch: 15686, sum loss: 4486.418945, avg loss: 2.737290, ppl: 15.445078 +epoch: 1, batch: 15687, sum loss: 4364.112305, avg loss: 2.698895, ppl: 14.863292 +epoch: 1, batch: 15688, sum loss: 4246.706055, avg loss: 2.649224, ppl: 14.143060 +epoch: 1, batch: 15689, sum loss: 4581.648438, avg loss: 2.810827, ppl: 16.623665 +epoch: 1, batch: 15690, sum loss: 5382.336914, avg loss: 2.909371, ppl: 18.345263 +epoch: 1, batch: 15691, sum loss: 3795.755615, avg loss: 2.532192, ppl: 12.581054 +epoch: 1, batch: 15692, sum loss: 3758.253906, avg loss: 2.686386, ppl: 14.678534 +epoch: 1, batch: 15693, sum loss: 5078.382812, avg loss: 2.650513, ppl: 14.161301 +epoch: 1, batch: 15694, sum loss: 3611.177246, avg loss: 2.141861, ppl: 8.515267 +epoch: 1, batch: 15695, sum loss: 4866.789062, avg loss: 2.831175, ppl: 16.965378 +epoch: 1, batch: 15696, sum loss: 4687.747070, avg loss: 2.655947, ppl: 14.238470 +epoch: 1, batch: 15697, sum loss: 4149.665039, avg loss: 2.541130, ppl: 12.694005 +epoch: 1, batch: 15698, sum loss: 4542.483398, avg loss: 2.781680, ppl: 16.146122 +epoch: 1, batch: 15699, sum loss: 4445.068359, avg loss: 2.695615, ppl: 14.814621 +epoch: 1, batch: 15700, sum loss: 4411.879883, avg loss: 2.726749, ppl: 15.283120 +epoch: 1, batch: 15701, sum loss: 4539.542969, avg loss: 2.744585, ppl: 15.558153 +epoch: 1, batch: 15702, sum loss: 4531.182617, avg loss: 2.500653, ppl: 12.190446 +epoch: 1, batch: 15703, sum loss: 4205.663086, avg loss: 2.494462, ppl: 12.115217 +epoch: 1, batch: 15704, sum loss: 3791.262451, avg loss: 2.561664, ppl: 12.957358 +epoch: 1, batch: 15705, sum loss: 4847.577148, avg loss: 2.647503, ppl: 14.118735 +epoch: 1, batch: 15706, sum loss: 4801.078613, avg loss: 2.680669, ppl: 14.594855 +epoch: 1, batch: 15707, sum loss: 3554.375000, avg loss: 2.522622, ppl: 12.461231 +epoch: 1, batch: 15708, sum loss: 3802.405762, avg loss: 2.533248, ppl: 12.594352 +epoch: 1, batch: 15709, sum loss: 3540.105225, avg loss: 2.385516, ppl: 10.864664 +epoch: 1, batch: 15710, sum loss: 4068.494141, avg loss: 2.534887, ppl: 12.615009 +epoch: 1, batch: 15711, sum loss: 4535.553711, avg loss: 3.041954, ppl: 20.946138 +epoch: 1, batch: 15712, sum loss: 5433.167480, avg loss: 2.829775, ppl: 16.941647 +epoch: 1, batch: 15713, sum loss: 5839.217285, avg loss: 2.907977, ppl: 18.319698 +epoch: 1, batch: 15714, sum loss: 3554.509277, avg loss: 2.396837, ppl: 10.988365 +epoch: 1, batch: 15715, sum loss: 4583.545898, avg loss: 2.602808, ppl: 13.501604 +epoch: 1, batch: 15716, sum loss: 5901.566895, avg loss: 2.853756, ppl: 17.352837 +epoch: 1, batch: 15717, sum loss: 4468.844238, avg loss: 2.599677, ppl: 13.459388 +epoch: 1, batch: 15718, sum loss: 5260.204102, avg loss: 2.808438, ppl: 16.583990 +epoch: 1, batch: 15719, sum loss: 4578.806152, avg loss: 2.788554, ppl: 16.257502 +epoch: 1, batch: 15720, sum loss: 4837.833008, avg loss: 2.845784, ppl: 17.215054 +epoch: 1, batch: 15721, sum loss: 3846.978516, avg loss: 2.769603, ppl: 15.952296 +epoch: 1, batch: 15722, sum loss: 4411.394531, avg loss: 2.708038, ppl: 14.999822 +epoch: 1, batch: 15723, sum loss: 4490.756836, avg loss: 2.646292, ppl: 14.101649 +epoch: 1, batch: 15724, sum loss: 4540.856934, avg loss: 2.758723, ppl: 15.779673 +epoch: 1, batch: 15725, sum loss: 4776.627930, avg loss: 2.856835, ppl: 17.406351 +epoch: 1, batch: 15726, sum loss: 4495.467285, avg loss: 2.554243, ppl: 12.861555 +epoch: 1, batch: 15727, sum loss: 3974.139648, avg loss: 2.396948, ppl: 10.989583 +epoch: 1, batch: 15728, sum loss: 4454.187988, avg loss: 2.747802, ppl: 15.608294 +epoch: 1, batch: 15729, sum loss: 3701.913086, avg loss: 2.605146, ppl: 13.533207 +epoch: 1, batch: 15730, sum loss: 4606.489258, avg loss: 2.601067, ppl: 13.478109 +epoch: 1, batch: 15731, sum loss: 4806.678711, avg loss: 2.776822, ppl: 16.067873 +epoch: 1, batch: 15732, sum loss: 3307.018555, avg loss: 2.309371, ppl: 10.068085 +epoch: 1, batch: 15733, sum loss: 4637.905273, avg loss: 2.795603, ppl: 16.372499 +epoch: 1, batch: 15734, sum loss: 4032.408936, avg loss: 2.737549, ppl: 15.449066 +epoch: 1, batch: 15735, sum loss: 4552.901855, avg loss: 2.694025, ppl: 14.791087 +epoch: 1, batch: 15736, sum loss: 4511.770508, avg loss: 2.731096, ppl: 15.349702 +epoch: 1, batch: 15737, sum loss: 5470.367188, avg loss: 2.966577, ppl: 19.425304 +epoch: 1, batch: 15738, sum loss: 4364.297852, avg loss: 2.667664, ppl: 14.406274 +epoch: 1, batch: 15739, sum loss: 4759.422363, avg loss: 2.793088, ppl: 16.331377 +epoch: 1, batch: 15740, sum loss: 6156.586914, avg loss: 2.988634, ppl: 19.858543 +epoch: 1, batch: 15741, sum loss: 4124.346680, avg loss: 2.633682, ppl: 13.924951 +epoch: 1, batch: 15742, sum loss: 4675.579102, avg loss: 2.622310, ppl: 13.767495 +epoch: 1, batch: 15743, sum loss: 4625.595703, avg loss: 2.857070, ppl: 17.410435 +epoch: 1, batch: 15744, sum loss: 4819.211914, avg loss: 2.738189, ppl: 15.458956 +epoch: 1, batch: 15745, sum loss: 4412.314941, avg loss: 2.488616, ppl: 12.044589 +epoch: 1, batch: 15746, sum loss: 3965.399902, avg loss: 2.617426, ppl: 13.700409 +epoch: 1, batch: 15747, sum loss: 4703.036133, avg loss: 2.860728, ppl: 17.474236 +epoch: 1, batch: 15748, sum loss: 5319.880371, avg loss: 3.082202, ppl: 21.806360 +epoch: 1, batch: 15749, sum loss: 4157.521484, avg loss: 2.448482, ppl: 11.570764 +epoch: 1, batch: 15750, sum loss: 4678.587891, avg loss: 2.744040, ppl: 15.549679 +epoch: 1, batch: 15751, sum loss: 4090.776123, avg loss: 2.540855, ppl: 12.690513 +epoch: 1, batch: 15752, sum loss: 4926.664551, avg loss: 2.750790, ppl: 15.654989 +epoch: 1, batch: 15753, sum loss: 3980.374023, avg loss: 2.603253, ppl: 13.507608 +epoch: 1, batch: 15754, sum loss: 4074.699219, avg loss: 2.510597, ppl: 12.312281 +epoch: 1, batch: 15755, sum loss: 4686.775391, avg loss: 2.712254, ppl: 15.063194 +epoch: 1, batch: 15756, sum loss: 5433.253906, avg loss: 2.902379, ppl: 18.217438 +epoch: 1, batch: 15757, sum loss: 4303.272949, avg loss: 2.598595, ppl: 13.444830 +epoch: 1, batch: 15758, sum loss: 3821.521973, avg loss: 2.505916, ppl: 12.254781 +epoch: 1, batch: 15759, sum loss: 3752.025146, avg loss: 2.292013, ppl: 9.894835 +epoch: 1, batch: 15760, sum loss: 3859.407959, avg loss: 2.591946, ppl: 13.355739 +epoch: 1, batch: 15761, sum loss: 4521.351562, avg loss: 2.503517, ppl: 12.225414 +epoch: 1, batch: 15762, sum loss: 4208.127441, avg loss: 2.625157, ppl: 13.806746 +epoch: 1, batch: 15763, sum loss: 4267.950684, avg loss: 2.596077, ppl: 13.411023 +epoch: 1, batch: 15764, sum loss: 4824.711426, avg loss: 2.747558, ppl: 15.604477 +epoch: 1, batch: 15765, sum loss: 3860.223633, avg loss: 2.471334, ppl: 11.838228 +epoch: 1, batch: 15766, sum loss: 4236.621094, avg loss: 2.674634, ppl: 14.507046 +epoch: 1, batch: 15767, sum loss: 4635.892578, avg loss: 2.791025, ppl: 16.297714 +epoch: 1, batch: 15768, sum loss: 3903.876953, avg loss: 2.573419, ppl: 13.110574 +epoch: 1, batch: 15769, sum loss: 4311.593750, avg loss: 2.542213, ppl: 12.707767 +epoch: 1, batch: 15770, sum loss: 4715.644531, avg loss: 2.795284, ppl: 16.367281 +epoch: 1, batch: 15771, sum loss: 4626.857910, avg loss: 2.900851, ppl: 18.189627 +epoch: 1, batch: 15772, sum loss: 4097.742676, avg loss: 2.565900, ppl: 13.012368 +epoch: 1, batch: 15773, sum loss: 4614.599609, avg loss: 2.650545, ppl: 14.161759 +epoch: 1, batch: 15774, sum loss: 4822.694824, avg loss: 2.821940, ppl: 16.809425 +epoch: 1, batch: 15775, sum loss: 4740.143555, avg loss: 2.553957, ppl: 12.857879 +epoch: 1, batch: 15776, sum loss: 4158.213867, avg loss: 2.794499, ppl: 16.354427 +epoch: 1, batch: 15777, sum loss: 4602.816895, avg loss: 2.915020, ppl: 18.449186 +epoch: 1, batch: 15778, sum loss: 4733.502930, avg loss: 2.844653, ppl: 17.195597 +epoch: 1, batch: 15779, sum loss: 4896.301758, avg loss: 2.938956, ppl: 18.896118 +epoch: 1, batch: 15780, sum loss: 4480.468750, avg loss: 2.692590, ppl: 14.769874 +epoch: 1, batch: 15781, sum loss: 4192.821289, avg loss: 2.463467, ppl: 11.745464 +epoch: 1, batch: 15782, sum loss: 3853.893311, avg loss: 2.344217, ppl: 10.425110 +epoch: 1, batch: 15783, sum loss: 5081.211426, avg loss: 2.821328, ppl: 16.799139 +epoch: 1, batch: 15784, sum loss: 4211.990234, avg loss: 2.545009, ppl: 12.743347 +epoch: 1, batch: 15785, sum loss: 5208.880859, avg loss: 2.882612, ppl: 17.860872 +epoch: 1, batch: 15786, sum loss: 4990.771973, avg loss: 2.925423, ppl: 18.642113 +epoch: 1, batch: 15787, sum loss: 4600.069824, avg loss: 2.593049, ppl: 13.370480 +epoch: 1, batch: 15788, sum loss: 4583.046875, avg loss: 2.592221, ppl: 13.359413 +epoch: 1, batch: 15789, sum loss: 4295.857422, avg loss: 2.773310, ppl: 16.011547 +epoch: 1, batch: 15790, sum loss: 4437.087402, avg loss: 2.617751, ppl: 13.704862 +epoch: 1, batch: 15791, sum loss: 4872.061523, avg loss: 2.793613, ppl: 16.339952 +epoch: 1, batch: 15792, sum loss: 5017.837402, avg loss: 2.686208, ppl: 14.675926 +epoch: 1, batch: 15793, sum loss: 5680.240234, avg loss: 3.160957, ppl: 23.593172 +epoch: 1, batch: 15794, sum loss: 4155.360352, avg loss: 2.519928, ppl: 12.427695 +epoch: 1, batch: 15795, sum loss: 4343.110840, avg loss: 2.513374, ppl: 12.346521 +epoch: 1, batch: 15796, sum loss: 5497.953125, avg loss: 2.826711, ppl: 16.889822 +epoch: 1, batch: 15797, sum loss: 5164.416992, avg loss: 2.719546, ppl: 15.173426 +epoch: 1, batch: 15798, sum loss: 5140.913574, avg loss: 2.978513, ppl: 19.658564 +epoch: 1, batch: 15799, sum loss: 4202.043945, avg loss: 2.759057, ppl: 15.784951 +epoch: 1, batch: 15800, sum loss: 4632.551758, avg loss: 2.599636, ppl: 13.458842 +epoch: 1, batch: 15801, sum loss: 4995.311523, avg loss: 2.731171, ppl: 15.350854 +epoch: 1, batch: 15802, sum loss: 4423.638672, avg loss: 2.658437, ppl: 14.273958 +epoch: 1, batch: 15803, sum loss: 4924.506348, avg loss: 2.866418, ppl: 17.573957 +epoch: 1, batch: 15804, sum loss: 4490.476562, avg loss: 2.515673, ppl: 12.374936 +epoch: 1, batch: 15805, sum loss: 5144.197754, avg loss: 2.776145, ppl: 16.057009 +epoch: 1, batch: 15806, sum loss: 3620.010986, avg loss: 2.403726, ppl: 11.064323 +epoch: 1, batch: 15807, sum loss: 4078.366211, avg loss: 2.600999, ppl: 13.477194 +epoch: 1, batch: 15808, sum loss: 4694.030762, avg loss: 3.038208, ppl: 20.867809 +epoch: 1, batch: 15809, sum loss: 4186.060547, avg loss: 2.532402, ppl: 12.583696 +epoch: 1, batch: 15810, sum loss: 4559.222656, avg loss: 2.506445, ppl: 12.261260 +epoch: 1, batch: 15811, sum loss: 4928.246582, avg loss: 2.736394, ppl: 15.431246 +epoch: 1, batch: 15812, sum loss: 4120.599609, avg loss: 2.644801, ppl: 14.080642 +epoch: 1, batch: 15813, sum loss: 4188.863770, avg loss: 2.631196, ppl: 13.890369 +epoch: 1, batch: 15814, sum loss: 4501.126465, avg loss: 2.506195, ppl: 12.258203 +epoch: 1, batch: 15815, sum loss: 3691.269287, avg loss: 2.603152, ppl: 13.506240 +epoch: 1, batch: 15816, sum loss: 3750.358643, avg loss: 2.486975, ppl: 12.024848 +epoch: 1, batch: 15817, sum loss: 4775.906738, avg loss: 2.778305, ppl: 16.091726 +epoch: 1, batch: 15818, sum loss: 5044.498047, avg loss: 2.604284, ppl: 13.521541 +epoch: 1, batch: 15819, sum loss: 4592.952148, avg loss: 2.863437, ppl: 17.521641 +epoch: 1, batch: 15820, sum loss: 4470.936523, avg loss: 2.674005, ppl: 14.497921 +epoch: 1, batch: 15821, sum loss: 4190.194336, avg loss: 2.657067, ppl: 14.254417 +epoch: 1, batch: 15822, sum loss: 4305.425781, avg loss: 2.659312, ppl: 14.286453 +epoch: 1, batch: 15823, sum loss: 4881.545898, avg loss: 2.685119, ppl: 14.659941 +epoch: 1, batch: 15824, sum loss: 4407.580078, avg loss: 2.544792, ppl: 12.740580 +epoch: 1, batch: 15825, sum loss: 4477.370117, avg loss: 2.758700, ppl: 15.779315 +epoch: 1, batch: 15826, sum loss: 4884.761719, avg loss: 2.767570, ppl: 15.919909 +epoch: 1, batch: 15827, sum loss: 4612.071777, avg loss: 2.864641, ppl: 17.542749 +epoch: 1, batch: 15828, sum loss: 4969.609863, avg loss: 2.975814, ppl: 19.605583 +epoch: 1, batch: 15829, sum loss: 4279.505859, avg loss: 2.673021, ppl: 14.483652 +epoch: 1, batch: 15830, sum loss: 4717.800293, avg loss: 2.663919, ppl: 14.352423 +epoch: 1, batch: 15831, sum loss: 5394.172363, avg loss: 2.805082, ppl: 16.528429 +epoch: 1, batch: 15832, sum loss: 4815.517578, avg loss: 2.869796, ppl: 17.633417 +epoch: 1, batch: 15833, sum loss: 4514.152344, avg loss: 2.679022, ppl: 14.570837 +epoch: 1, batch: 15834, sum loss: 5443.003418, avg loss: 2.714715, ppl: 15.100303 +epoch: 1, batch: 15835, sum loss: 4058.032959, avg loss: 2.636799, ppl: 13.968413 +epoch: 1, batch: 15836, sum loss: 5011.769531, avg loss: 2.912126, ppl: 18.395872 +epoch: 1, batch: 15837, sum loss: 5261.638184, avg loss: 2.800233, ppl: 16.448484 +epoch: 1, batch: 15838, sum loss: 4843.651855, avg loss: 2.886563, ppl: 17.931566 +epoch: 1, batch: 15839, sum loss: 3940.626953, avg loss: 2.578944, ppl: 13.183215 +epoch: 1, batch: 15840, sum loss: 4169.785156, avg loss: 2.697144, ppl: 14.837300 +epoch: 1, batch: 15841, sum loss: 5398.224121, avg loss: 2.758418, ppl: 15.774869 +epoch: 1, batch: 15842, sum loss: 4573.065918, avg loss: 2.691622, ppl: 14.755594 +epoch: 1, batch: 15843, sum loss: 4898.233887, avg loss: 2.756463, ppl: 15.744051 +epoch: 1, batch: 15844, sum loss: 4677.254395, avg loss: 2.752946, ppl: 15.688778 +epoch: 1, batch: 15845, sum loss: 5175.724609, avg loss: 2.924138, ppl: 18.618176 +epoch: 1, batch: 15846, sum loss: 3704.625488, avg loss: 2.544386, ppl: 12.735402 +epoch: 1, batch: 15847, sum loss: 5038.090332, avg loss: 2.946252, ppl: 19.034472 +epoch: 1, batch: 15848, sum loss: 3462.124023, avg loss: 2.494326, ppl: 12.113562 +epoch: 1, batch: 15849, sum loss: 4384.343750, avg loss: 2.774901, ppl: 16.037043 +epoch: 1, batch: 15850, sum loss: 5310.007324, avg loss: 2.964828, ppl: 19.391369 +epoch: 1, batch: 15851, sum loss: 4779.783203, avg loss: 2.732866, ppl: 15.376899 +epoch: 1, batch: 15852, sum loss: 4067.288330, avg loss: 2.574233, ppl: 13.121250 +epoch: 1, batch: 15853, sum loss: 4398.564453, avg loss: 2.830479, ppl: 16.953583 +epoch: 1, batch: 15854, sum loss: 4976.369629, avg loss: 2.723793, ppl: 15.238011 +epoch: 1, batch: 15855, sum loss: 4632.794922, avg loss: 2.910047, ppl: 18.357662 +epoch: 1, batch: 15856, sum loss: 4435.444336, avg loss: 2.817944, ppl: 16.742397 +epoch: 1, batch: 15857, sum loss: 4550.390137, avg loss: 2.715030, ppl: 15.105062 +epoch: 1, batch: 15858, sum loss: 4977.282227, avg loss: 2.629309, ppl: 13.864185 +epoch: 1, batch: 15859, sum loss: 4626.187012, avg loss: 2.904072, ppl: 18.248306 +epoch: 1, batch: 15860, sum loss: 4419.976074, avg loss: 2.665848, ppl: 14.380139 +epoch: 1, batch: 15861, sum loss: 4952.564941, avg loss: 2.908141, ppl: 18.322712 +epoch: 1, batch: 15862, sum loss: 4612.116211, avg loss: 2.458484, ppl: 11.687082 +epoch: 1, batch: 15863, sum loss: 4737.542969, avg loss: 2.693316, ppl: 14.780610 +epoch: 1, batch: 15864, sum loss: 3872.384766, avg loss: 2.450876, ppl: 11.598508 +epoch: 1, batch: 15865, sum loss: 4102.321777, avg loss: 2.443312, ppl: 11.511107 +epoch: 1, batch: 15866, sum loss: 3797.403809, avg loss: 2.560623, ppl: 12.943878 +epoch: 1, batch: 15867, sum loss: 4348.419434, avg loss: 2.419822, ppl: 11.243855 +epoch: 1, batch: 15868, sum loss: 4999.954590, avg loss: 3.004781, ppl: 20.181786 +epoch: 1, batch: 15869, sum loss: 4084.646973, avg loss: 2.564122, ppl: 12.989255 +epoch: 1, batch: 15870, sum loss: 5171.973145, avg loss: 2.829307, ppl: 16.933723 +epoch: 1, batch: 15871, sum loss: 4336.610840, avg loss: 2.671972, ppl: 14.468473 +epoch: 1, batch: 15872, sum loss: 3510.795898, avg loss: 2.421239, ppl: 11.259798 +epoch: 1, batch: 15873, sum loss: 4594.065430, avg loss: 2.492711, ppl: 12.094013 +epoch: 1, batch: 15874, sum loss: 3766.876953, avg loss: 2.349892, ppl: 10.484436 +epoch: 1, batch: 15875, sum loss: 3537.280029, avg loss: 2.298428, ppl: 9.958511 +epoch: 1, batch: 15876, sum loss: 4596.480469, avg loss: 2.684860, ppl: 14.656149 +epoch: 1, batch: 15877, sum loss: 3764.170410, avg loss: 2.359982, ppl: 10.590755 +epoch: 1, batch: 15878, sum loss: 4259.297363, avg loss: 2.508420, ppl: 12.285506 +epoch: 1, batch: 15879, sum loss: 4695.186035, avg loss: 2.686033, ppl: 14.673355 +epoch: 1, batch: 15880, sum loss: 4556.618652, avg loss: 2.792046, ppl: 16.314362 +epoch: 1, batch: 15881, sum loss: 5217.868164, avg loss: 2.713400, ppl: 15.080461 +epoch: 1, batch: 15882, sum loss: 5066.726562, avg loss: 2.964732, ppl: 19.389500 +epoch: 1, batch: 15883, sum loss: 5120.295410, avg loss: 2.852532, ppl: 17.331617 +epoch: 1, batch: 15884, sum loss: 4214.140625, avg loss: 2.627270, ppl: 13.835942 +epoch: 1, batch: 15885, sum loss: 3415.056641, avg loss: 2.272160, ppl: 9.700332 +epoch: 1, batch: 15886, sum loss: 4266.173340, avg loss: 2.902159, ppl: 18.213421 +epoch: 1, batch: 15887, sum loss: 5252.740723, avg loss: 2.871920, ppl: 17.670908 +epoch: 1, batch: 15888, sum loss: 4931.651367, avg loss: 2.907813, ppl: 18.316702 +epoch: 1, batch: 15889, sum loss: 5241.980957, avg loss: 2.915451, ppl: 18.457130 +epoch: 1, batch: 15890, sum loss: 3695.664551, avg loss: 2.431358, ppl: 11.374322 +epoch: 1, batch: 15891, sum loss: 4855.404297, avg loss: 2.822909, ppl: 16.825731 +epoch: 1, batch: 15892, sum loss: 4244.527344, avg loss: 2.827800, ppl: 16.908218 +epoch: 1, batch: 15893, sum loss: 4663.746094, avg loss: 2.617141, ppl: 13.696516 +epoch: 1, batch: 15894, sum loss: 4524.254883, avg loss: 2.561866, ppl: 12.959972 +epoch: 1, batch: 15895, sum loss: 4121.662109, avg loss: 2.491936, ppl: 12.084649 +epoch: 1, batch: 15896, sum loss: 3618.325928, avg loss: 2.354148, ppl: 10.529159 +epoch: 1, batch: 15897, sum loss: 3733.224609, avg loss: 2.610647, ppl: 13.607848 +epoch: 1, batch: 15898, sum loss: 4854.145508, avg loss: 2.521634, ppl: 12.448920 +epoch: 1, batch: 15899, sum loss: 4764.029297, avg loss: 2.747422, ppl: 15.602353 +epoch: 1, batch: 15900, sum loss: 4044.720947, avg loss: 2.691098, ppl: 14.747867 +epoch: 1, batch: 15901, sum loss: 5213.073242, avg loss: 2.941915, ppl: 18.952106 +epoch: 1, batch: 15902, sum loss: 4534.333008, avg loss: 2.530320, ppl: 12.557523 +epoch: 1, batch: 15903, sum loss: 4022.082031, avg loss: 2.814613, ppl: 16.686718 +epoch: 1, batch: 15904, sum loss: 4913.231934, avg loss: 2.755599, ppl: 15.730453 +epoch: 1, batch: 15905, sum loss: 5002.443359, avg loss: 2.843913, ppl: 17.182877 +epoch: 1, batch: 15906, sum loss: 3816.640137, avg loss: 2.361782, ppl: 10.609844 +epoch: 1, batch: 15907, sum loss: 4331.455566, avg loss: 2.717350, ppl: 15.140144 +epoch: 1, batch: 15908, sum loss: 5936.152344, avg loss: 2.869092, ppl: 17.621019 +epoch: 1, batch: 15909, sum loss: 4236.129883, avg loss: 2.655881, ppl: 14.237523 +epoch: 1, batch: 15910, sum loss: 5225.068359, avg loss: 2.783734, ppl: 16.179319 +epoch: 1, batch: 15911, sum loss: 4910.005859, avg loss: 2.732335, ppl: 15.368732 +epoch: 1, batch: 15912, sum loss: 4811.775391, avg loss: 2.525866, ppl: 12.501720 +epoch: 1, batch: 15913, sum loss: 4084.461426, avg loss: 2.659155, ppl: 14.284209 +epoch: 1, batch: 15914, sum loss: 4864.661133, avg loss: 2.717688, ppl: 15.145260 +epoch: 1, batch: 15915, sum loss: 3361.202637, avg loss: 2.462420, ppl: 11.733166 +epoch: 1, batch: 15916, sum loss: 4787.362305, avg loss: 2.839479, ppl: 17.106857 +epoch: 1, batch: 15917, sum loss: 4504.366699, avg loss: 2.582779, ppl: 13.233863 +epoch: 1, batch: 15918, sum loss: 4649.320312, avg loss: 2.715724, ppl: 15.115557 +epoch: 1, batch: 15919, sum loss: 5521.728516, avg loss: 2.869921, ppl: 17.635628 +epoch: 1, batch: 15920, sum loss: 4587.029297, avg loss: 2.699841, ppl: 14.877363 +epoch: 1, batch: 15921, sum loss: 4016.559814, avg loss: 2.504090, ppl: 12.232420 +epoch: 1, batch: 15922, sum loss: 4438.103027, avg loss: 2.791260, ppl: 16.301542 +epoch: 1, batch: 15923, sum loss: 4464.857910, avg loss: 2.654493, ppl: 14.217780 +epoch: 1, batch: 15924, sum loss: 4225.002441, avg loss: 2.675746, ppl: 14.523176 +epoch: 1, batch: 15925, sum loss: 5541.869629, avg loss: 3.050011, ppl: 21.115576 +epoch: 1, batch: 15926, sum loss: 4783.855469, avg loss: 2.762041, ppl: 15.832129 +epoch: 1, batch: 15927, sum loss: 4021.320801, avg loss: 2.446059, ppl: 11.542764 +epoch: 1, batch: 15928, sum loss: 5662.656250, avg loss: 3.111350, ppl: 22.451330 +epoch: 1, batch: 15929, sum loss: 4466.763184, avg loss: 2.581944, ppl: 13.222818 +epoch: 1, batch: 15930, sum loss: 3626.665527, avg loss: 2.379702, ppl: 10.801682 +epoch: 1, batch: 15931, sum loss: 4676.440430, avg loss: 2.725199, ppl: 15.259442 +epoch: 1, batch: 15932, sum loss: 4677.416504, avg loss: 2.584208, ppl: 13.252792 +epoch: 1, batch: 15933, sum loss: 4941.494141, avg loss: 2.646756, ppl: 14.108203 +epoch: 1, batch: 15934, sum loss: 4697.531250, avg loss: 2.652474, ppl: 14.189098 +epoch: 1, batch: 15935, sum loss: 4054.765625, avg loss: 2.687055, ppl: 14.688354 +epoch: 1, batch: 15936, sum loss: 3963.693359, avg loss: 2.633683, ppl: 13.924968 +epoch: 1, batch: 15937, sum loss: 4872.565430, avg loss: 2.886591, ppl: 17.932074 +epoch: 1, batch: 15938, sum loss: 4505.381836, avg loss: 2.650225, ppl: 14.157219 +epoch: 1, batch: 15939, sum loss: 4789.770996, avg loss: 2.678843, ppl: 14.568225 +epoch: 1, batch: 15940, sum loss: 5439.120117, avg loss: 3.076426, ppl: 21.680767 +epoch: 1, batch: 15941, sum loss: 5288.355469, avg loss: 2.878800, ppl: 17.792912 +epoch: 1, batch: 15942, sum loss: 4833.787598, avg loss: 2.808709, ppl: 16.588484 +epoch: 1, batch: 15943, sum loss: 4915.712402, avg loss: 2.464016, ppl: 11.751915 +epoch: 1, batch: 15944, sum loss: 4015.151611, avg loss: 2.634614, ppl: 13.937931 +epoch: 1, batch: 15945, sum loss: 4511.418945, avg loss: 2.630565, ppl: 13.881610 +epoch: 1, batch: 15946, sum loss: 4737.779297, avg loss: 2.867905, ppl: 17.600115 +epoch: 1, batch: 15947, sum loss: 5117.244629, avg loss: 3.038744, ppl: 20.879002 +epoch: 1, batch: 15948, sum loss: 4649.866211, avg loss: 2.749773, ppl: 15.639086 +epoch: 1, batch: 15949, sum loss: 4856.667969, avg loss: 2.568307, ppl: 13.043718 +epoch: 1, batch: 15950, sum loss: 5553.039062, avg loss: 3.083309, ppl: 21.830513 +epoch: 1, batch: 15951, sum loss: 5457.884766, avg loss: 2.750950, ppl: 15.657497 +epoch: 1, batch: 15952, sum loss: 3969.550537, avg loss: 2.504448, ppl: 12.236805 +epoch: 1, batch: 15953, sum loss: 4667.062012, avg loss: 2.683762, ppl: 14.640067 +epoch: 1, batch: 15954, sum loss: 3701.656250, avg loss: 2.558159, ppl: 12.912025 +epoch: 1, batch: 15955, sum loss: 4805.859863, avg loss: 2.916177, ppl: 18.470541 +epoch: 1, batch: 15956, sum loss: 5818.913574, avg loss: 2.906550, ppl: 18.293579 +epoch: 1, batch: 15957, sum loss: 5645.661133, avg loss: 2.862911, ppl: 17.512436 +epoch: 1, batch: 15958, sum loss: 4182.620605, avg loss: 2.620690, ppl: 13.745199 +epoch: 1, batch: 15959, sum loss: 5114.419434, avg loss: 2.684735, ppl: 14.654311 +epoch: 1, batch: 15960, sum loss: 4109.298340, avg loss: 2.491994, ppl: 12.085352 +epoch: 1, batch: 15961, sum loss: 4736.249023, avg loss: 2.631249, ppl: 13.891115 +epoch: 1, batch: 15962, sum loss: 4383.166016, avg loss: 2.563255, ppl: 12.977990 +epoch: 1, batch: 15963, sum loss: 4579.022949, avg loss: 2.696716, ppl: 14.830941 +epoch: 1, batch: 15964, sum loss: 4685.585938, avg loss: 2.746533, ppl: 15.588499 +epoch: 1, batch: 15965, sum loss: 3624.237305, avg loss: 2.414549, ppl: 11.184721 +epoch: 1, batch: 15966, sum loss: 4777.995605, avg loss: 2.924110, ppl: 18.617647 +epoch: 1, batch: 15967, sum loss: 4724.084473, avg loss: 2.961808, ppl: 19.332903 +epoch: 1, batch: 15968, sum loss: 4110.469727, avg loss: 2.665674, ppl: 14.377640 +epoch: 1, batch: 15969, sum loss: 5249.543457, avg loss: 2.870172, ppl: 17.640043 +epoch: 1, batch: 15970, sum loss: 4993.971191, avg loss: 2.699444, ppl: 14.871462 +epoch: 1, batch: 15971, sum loss: 4466.796875, avg loss: 2.643075, ppl: 14.056360 +epoch: 1, batch: 15972, sum loss: 4102.484375, avg loss: 2.755194, ppl: 15.724098 +epoch: 1, batch: 15973, sum loss: 5121.458496, avg loss: 2.751993, ppl: 15.673838 +epoch: 1, batch: 15974, sum loss: 4495.358398, avg loss: 2.754509, ppl: 15.713324 +epoch: 1, batch: 15975, sum loss: 3850.936035, avg loss: 2.478080, ppl: 11.918357 +epoch: 1, batch: 15976, sum loss: 4592.960938, avg loss: 2.561607, ppl: 12.956620 +epoch: 1, batch: 15977, sum loss: 4945.943359, avg loss: 2.887299, ppl: 17.944777 +epoch: 1, batch: 15978, sum loss: 4009.418457, avg loss: 2.493419, ppl: 12.102590 +epoch: 1, batch: 15979, sum loss: 5211.375000, avg loss: 2.830731, ppl: 16.957848 +epoch: 1, batch: 15980, sum loss: 4027.318115, avg loss: 2.603308, ppl: 13.508356 +epoch: 1, batch: 15981, sum loss: 5180.094238, avg loss: 2.641558, ppl: 14.035048 +epoch: 1, batch: 15982, sum loss: 4740.368164, avg loss: 2.878183, ppl: 17.781940 +epoch: 1, batch: 15983, sum loss: 4305.474121, avg loss: 2.447683, ppl: 11.561526 +epoch: 1, batch: 15984, sum loss: 4033.210205, avg loss: 2.683440, ppl: 14.635348 +epoch: 1, batch: 15985, sum loss: 4644.500977, avg loss: 2.899189, ppl: 18.159412 +epoch: 1, batch: 15986, sum loss: 4309.020020, avg loss: 2.608366, ppl: 13.576845 +epoch: 1, batch: 15987, sum loss: 5132.316406, avg loss: 3.114270, ppl: 22.516981 +epoch: 1, batch: 15988, sum loss: 4320.273438, avg loss: 2.690083, ppl: 14.732899 +epoch: 1, batch: 15989, sum loss: 3665.529297, avg loss: 2.383309, ppl: 10.840717 +epoch: 1, batch: 15990, sum loss: 4597.195312, avg loss: 2.867870, ppl: 17.599489 +epoch: 1, batch: 15991, sum loss: 5565.702148, avg loss: 2.879308, ppl: 17.801950 +epoch: 1, batch: 15992, sum loss: 4653.130859, avg loss: 2.686565, ppl: 14.681162 +epoch: 1, batch: 15993, sum loss: 4974.299805, avg loss: 2.624960, ppl: 13.804028 +epoch: 1, batch: 15994, sum loss: 4716.250000, avg loss: 2.676646, ppl: 14.536257 +epoch: 1, batch: 15995, sum loss: 4464.716309, avg loss: 2.470789, ppl: 11.831780 +epoch: 1, batch: 15996, sum loss: 4171.482422, avg loss: 2.945962, ppl: 19.028963 +epoch: 1, batch: 15997, sum loss: 5477.584473, avg loss: 2.879908, ppl: 17.812632 +epoch: 1, batch: 15998, sum loss: 3864.809570, avg loss: 2.464802, ppl: 11.761153 +epoch: 1, batch: 15999, sum loss: 3588.927734, avg loss: 2.604447, ppl: 13.523743 +epoch: 1, batch: 16000, sum loss: 4947.126465, avg loss: 2.798149, ppl: 16.414230 +epoch: 1, batch: 16001, sum loss: 4053.094971, avg loss: 2.801033, ppl: 16.461647 +epoch: 1, batch: 16002, sum loss: 5629.100098, avg loss: 2.855961, ppl: 17.391148 +epoch: 1, batch: 16003, sum loss: 5302.230957, avg loss: 2.958834, ppl: 19.275492 +epoch: 1, batch: 16004, sum loss: 4583.436523, avg loss: 2.576412, ppl: 13.149868 +epoch: 1, batch: 16005, sum loss: 4690.977051, avg loss: 2.855129, ppl: 17.376678 +epoch: 1, batch: 16006, sum loss: 4961.518066, avg loss: 2.887962, ppl: 17.956671 +epoch: 1, batch: 16007, sum loss: 4522.037109, avg loss: 2.570800, ppl: 13.076283 +epoch: 1, batch: 16008, sum loss: 4521.048340, avg loss: 2.746688, ppl: 15.590911 +epoch: 1, batch: 16009, sum loss: 4096.652344, avg loss: 2.573274, ppl: 13.108674 +epoch: 1, batch: 16010, sum loss: 2875.404785, avg loss: 2.235929, ppl: 9.355169 +epoch: 1, batch: 16011, sum loss: 4901.630859, avg loss: 2.786601, ppl: 16.225771 +epoch: 1, batch: 16012, sum loss: 4610.083496, avg loss: 2.589935, ppl: 13.328899 +epoch: 1, batch: 16013, sum loss: 5032.584473, avg loss: 2.832068, ppl: 16.980536 +epoch: 1, batch: 16014, sum loss: 5195.244629, avg loss: 2.802181, ppl: 16.480560 +epoch: 1, batch: 16015, sum loss: 4229.606445, avg loss: 2.673582, ppl: 14.491780 +epoch: 1, batch: 16016, sum loss: 4595.012695, avg loss: 2.636266, ppl: 13.960982 +epoch: 1, batch: 16017, sum loss: 5981.829102, avg loss: 3.149989, ppl: 23.335812 +epoch: 1, batch: 16018, sum loss: 3867.000977, avg loss: 2.236553, ppl: 9.361012 +epoch: 1, batch: 16019, sum loss: 4528.951660, avg loss: 2.749819, ppl: 15.639798 +epoch: 1, batch: 16020, sum loss: 3716.303467, avg loss: 2.405375, ppl: 11.082581 +epoch: 1, batch: 16021, sum loss: 4587.631836, avg loss: 2.711366, ppl: 15.049826 +epoch: 1, batch: 16022, sum loss: 4209.990723, avg loss: 2.546879, ppl: 12.767193 +epoch: 1, batch: 16023, sum loss: 5057.633789, avg loss: 2.800462, ppl: 16.452242 +epoch: 1, batch: 16024, sum loss: 4790.285645, avg loss: 2.819474, ppl: 16.768024 +epoch: 1, batch: 16025, sum loss: 4440.466797, avg loss: 2.681441, ppl: 14.606130 +epoch: 1, batch: 16026, sum loss: 4140.243164, avg loss: 2.533808, ppl: 12.601407 +epoch: 1, batch: 16027, sum loss: 4547.197754, avg loss: 2.829619, ppl: 16.939009 +epoch: 1, batch: 16028, sum loss: 4110.681152, avg loss: 2.674484, ppl: 14.504857 +epoch: 1, batch: 16029, sum loss: 3513.632080, avg loss: 2.673997, ppl: 14.497800 +epoch: 1, batch: 16030, sum loss: 4564.838867, avg loss: 2.826526, ppl: 16.886688 +epoch: 1, batch: 16031, sum loss: 4125.793457, avg loss: 2.529610, ppl: 12.548607 +epoch: 1, batch: 16032, sum loss: 4633.312012, avg loss: 2.757924, ppl: 15.767074 +epoch: 1, batch: 16033, sum loss: 5115.413574, avg loss: 2.875443, ppl: 17.733282 +epoch: 1, batch: 16034, sum loss: 5270.951172, avg loss: 2.956226, ppl: 19.225285 +epoch: 1, batch: 16035, sum loss: 4388.661621, avg loss: 2.767126, ppl: 15.912832 +epoch: 1, batch: 16036, sum loss: 4969.470215, avg loss: 2.899341, ppl: 18.162170 +epoch: 1, batch: 16037, sum loss: 3899.547363, avg loss: 2.615391, ppl: 13.672558 +epoch: 1, batch: 16038, sum loss: 4930.248047, avg loss: 2.683859, ppl: 14.641480 +epoch: 1, batch: 16039, sum loss: 4821.663086, avg loss: 2.548448, ppl: 12.787237 +epoch: 1, batch: 16040, sum loss: 5039.526855, avg loss: 2.649594, ppl: 14.148288 +epoch: 1, batch: 16041, sum loss: 4071.316650, avg loss: 2.563802, ppl: 12.985086 +epoch: 1, batch: 16042, sum loss: 4694.272461, avg loss: 2.850196, ppl: 17.291164 +epoch: 1, batch: 16043, sum loss: 4457.183105, avg loss: 2.638948, ppl: 13.998469 +epoch: 1, batch: 16044, sum loss: 4088.151367, avg loss: 2.486710, ppl: 12.021661 +epoch: 1, batch: 16045, sum loss: 4517.563477, avg loss: 2.669955, ppl: 14.439316 +epoch: 1, batch: 16046, sum loss: 4445.842285, avg loss: 2.574315, ppl: 13.122330 +epoch: 1, batch: 16047, sum loss: 5340.901855, avg loss: 3.010655, ppl: 20.300686 +epoch: 1, batch: 16048, sum loss: 4628.340820, avg loss: 2.544442, ppl: 12.736125 +epoch: 1, batch: 16049, sum loss: 4064.436035, avg loss: 2.527634, ppl: 12.523845 +epoch: 1, batch: 16050, sum loss: 4309.251465, avg loss: 2.727374, ppl: 15.292681 +epoch: 1, batch: 16051, sum loss: 4628.108887, avg loss: 2.887155, ppl: 17.942184 +epoch: 1, batch: 16052, sum loss: 4197.936035, avg loss: 2.625351, ppl: 13.809417 +epoch: 1, batch: 16053, sum loss: 4205.324707, avg loss: 2.452084, ppl: 11.612526 +epoch: 1, batch: 16054, sum loss: 5466.812012, avg loss: 3.048975, ppl: 21.093712 +epoch: 1, batch: 16055, sum loss: 3959.338135, avg loss: 2.562679, ppl: 12.970513 +epoch: 1, batch: 16056, sum loss: 4894.160156, avg loss: 2.976983, ppl: 19.628510 +epoch: 1, batch: 16057, sum loss: 4481.865234, avg loss: 2.561066, ppl: 12.949614 +epoch: 1, batch: 16058, sum loss: 4593.579102, avg loss: 2.740799, ppl: 15.499367 +epoch: 1, batch: 16059, sum loss: 4222.854492, avg loss: 2.715662, ppl: 15.114616 +epoch: 1, batch: 16060, sum loss: 5046.987305, avg loss: 2.757917, ppl: 15.766961 +epoch: 1, batch: 16061, sum loss: 5179.995117, avg loss: 2.852420, ppl: 17.329670 +epoch: 1, batch: 16062, sum loss: 4873.363770, avg loss: 2.695445, ppl: 14.812102 +epoch: 1, batch: 16063, sum loss: 4175.524414, avg loss: 2.729101, ppl: 15.319108 +epoch: 1, batch: 16064, sum loss: 5403.584961, avg loss: 2.975542, ppl: 19.600250 +epoch: 1, batch: 16065, sum loss: 4016.626465, avg loss: 2.351655, ppl: 10.502935 +epoch: 1, batch: 16066, sum loss: 3960.861816, avg loss: 2.725989, ppl: 15.271508 +epoch: 1, batch: 16067, sum loss: 4836.193359, avg loss: 2.740053, ppl: 15.487805 +epoch: 1, batch: 16068, sum loss: 4206.522461, avg loss: 2.515863, ppl: 12.377285 +epoch: 1, batch: 16069, sum loss: 5146.222168, avg loss: 2.905828, ppl: 18.280382 +epoch: 1, batch: 16070, sum loss: 4390.363281, avg loss: 2.721862, ppl: 15.208615 +epoch: 1, batch: 16071, sum loss: 5020.008301, avg loss: 2.977466, ppl: 19.637999 +epoch: 1, batch: 16072, sum loss: 4401.597656, avg loss: 2.759622, ppl: 15.793877 +epoch: 1, batch: 16073, sum loss: 4876.750000, avg loss: 2.650408, ppl: 14.159808 +epoch: 1, batch: 16074, sum loss: 4345.620605, avg loss: 2.538330, ppl: 12.658508 +epoch: 1, batch: 16075, sum loss: 3639.484619, avg loss: 2.449182, ppl: 11.578872 +epoch: 1, batch: 16076, sum loss: 4471.416016, avg loss: 2.679099, ppl: 14.571959 +epoch: 1, batch: 16077, sum loss: 4494.943848, avg loss: 2.647199, ppl: 14.114450 +epoch: 1, batch: 16078, sum loss: 4747.595215, avg loss: 2.795993, ppl: 16.378878 +epoch: 1, batch: 16079, sum loss: 5130.117188, avg loss: 2.811023, ppl: 16.626923 +epoch: 1, batch: 16080, sum loss: 4385.486816, avg loss: 2.602663, ppl: 13.499637 +epoch: 1, batch: 16081, sum loss: 4161.868164, avg loss: 2.620824, ppl: 13.747045 +epoch: 1, batch: 16082, sum loss: 5251.049316, avg loss: 2.771002, ppl: 15.974637 +epoch: 1, batch: 16083, sum loss: 4184.201660, avg loss: 2.563849, ppl: 12.985706 +epoch: 1, batch: 16084, sum loss: 5018.732422, avg loss: 2.760579, ppl: 15.808991 +epoch: 1, batch: 16085, sum loss: 4975.795898, avg loss: 2.770488, ppl: 15.966420 +epoch: 1, batch: 16086, sum loss: 5125.018066, avg loss: 2.965867, ppl: 19.411526 +epoch: 1, batch: 16087, sum loss: 4924.486816, avg loss: 2.757272, ppl: 15.756792 +epoch: 1, batch: 16088, sum loss: 5041.479004, avg loss: 2.785348, ppl: 16.205452 +epoch: 1, batch: 16089, sum loss: 4935.276855, avg loss: 2.818548, ppl: 16.752504 +epoch: 1, batch: 16090, sum loss: 5798.178711, avg loss: 2.903445, ppl: 18.236855 +epoch: 1, batch: 16091, sum loss: 4259.222168, avg loss: 2.523236, ppl: 12.468882 +epoch: 1, batch: 16092, sum loss: 4200.270996, avg loss: 2.803919, ppl: 16.509220 +epoch: 1, batch: 16093, sum loss: 4180.929688, avg loss: 2.986378, ppl: 19.813795 +epoch: 1, batch: 16094, sum loss: 4276.250000, avg loss: 2.661014, ppl: 14.310798 +epoch: 1, batch: 16095, sum loss: 4431.809082, avg loss: 2.757815, ppl: 15.765364 +epoch: 1, batch: 16096, sum loss: 4543.556152, avg loss: 2.732144, ppl: 15.365802 +epoch: 1, batch: 16097, sum loss: 3839.153564, avg loss: 2.457845, ppl: 11.679612 +epoch: 1, batch: 16098, sum loss: 4239.861816, avg loss: 2.654892, ppl: 14.223446 +epoch: 1, batch: 16099, sum loss: 4815.021973, avg loss: 2.688454, ppl: 14.708924 +epoch: 1, batch: 16100, sum loss: 4343.544434, avg loss: 2.679546, ppl: 14.578475 +epoch: 1, batch: 16101, sum loss: 4861.378906, avg loss: 2.869763, ppl: 17.632841 +epoch: 1, batch: 16102, sum loss: 4442.465820, avg loss: 2.518405, ppl: 12.408786 +epoch: 1, batch: 16103, sum loss: 4328.464355, avg loss: 2.658762, ppl: 14.278598 +epoch: 1, batch: 16104, sum loss: 5177.301758, avg loss: 2.717744, ppl: 15.146109 +epoch: 1, batch: 16105, sum loss: 4842.812500, avg loss: 2.799314, ppl: 16.433363 +epoch: 1, batch: 16106, sum loss: 5302.458008, avg loss: 2.834024, ppl: 17.013783 +epoch: 1, batch: 16107, sum loss: 4585.123535, avg loss: 2.684499, ppl: 14.650853 +epoch: 1, batch: 16108, sum loss: 4250.745117, avg loss: 2.607819, ppl: 13.569425 +epoch: 1, batch: 16109, sum loss: 4836.654297, avg loss: 2.709610, ppl: 15.023422 +epoch: 1, batch: 16110, sum loss: 4765.443359, avg loss: 2.532116, ppl: 12.580103 +epoch: 1, batch: 16111, sum loss: 4541.268066, avg loss: 2.578801, ppl: 13.181320 +epoch: 1, batch: 16112, sum loss: 4524.993652, avg loss: 2.679096, ppl: 14.571921 +epoch: 1, batch: 16113, sum loss: 5169.455078, avg loss: 2.729385, ppl: 15.323462 +epoch: 1, batch: 16114, sum loss: 4749.801758, avg loss: 2.803897, ppl: 16.508860 +epoch: 1, batch: 16115, sum loss: 4912.832520, avg loss: 2.957756, ppl: 19.254711 +epoch: 1, batch: 16116, sum loss: 5027.155273, avg loss: 2.771310, ppl: 15.979548 +epoch: 1, batch: 16117, sum loss: 4137.652344, avg loss: 2.633770, ppl: 13.926172 +epoch: 1, batch: 16118, sum loss: 4767.765625, avg loss: 2.713583, ppl: 15.083221 +epoch: 1, batch: 16119, sum loss: 4399.630859, avg loss: 2.727607, ppl: 15.296247 +epoch: 1, batch: 16120, sum loss: 4331.177734, avg loss: 2.561312, ppl: 12.952797 +epoch: 1, batch: 16121, sum loss: 4690.833008, avg loss: 3.038105, ppl: 20.865656 +epoch: 1, batch: 16122, sum loss: 5293.322266, avg loss: 2.633494, ppl: 13.922328 +epoch: 1, batch: 16123, sum loss: 3874.185059, avg loss: 2.537122, ppl: 12.643229 +epoch: 1, batch: 16124, sum loss: 4271.239258, avg loss: 2.784380, ppl: 16.189781 +epoch: 1, batch: 16125, sum loss: 4792.429199, avg loss: 2.883532, ppl: 17.877296 +epoch: 1, batch: 16126, sum loss: 4424.857910, avg loss: 2.678486, ppl: 14.563023 +epoch: 1, batch: 16127, sum loss: 4853.970703, avg loss: 2.853598, ppl: 17.350098 +epoch: 1, batch: 16128, sum loss: 4415.384277, avg loss: 2.640780, ppl: 14.024141 +epoch: 1, batch: 16129, sum loss: 5024.888184, avg loss: 2.717625, ppl: 15.144307 +epoch: 1, batch: 16130, sum loss: 4171.743164, avg loss: 2.398932, ppl: 11.011413 +epoch: 1, batch: 16131, sum loss: 4782.082031, avg loss: 2.682043, ppl: 14.614919 +epoch: 1, batch: 16132, sum loss: 5204.206543, avg loss: 2.868912, ppl: 17.617844 +epoch: 1, batch: 16133, sum loss: 4030.629883, avg loss: 2.294041, ppl: 9.914922 +epoch: 1, batch: 16134, sum loss: 5070.499023, avg loss: 2.872804, ppl: 17.686541 +epoch: 1, batch: 16135, sum loss: 5087.591309, avg loss: 2.858197, ppl: 17.430080 +epoch: 1, batch: 16136, sum loss: 4153.095215, avg loss: 2.670801, ppl: 14.451539 +epoch: 1, batch: 16137, sum loss: 4374.158691, avg loss: 2.715182, ppl: 15.107364 +epoch: 1, batch: 16138, sum loss: 4900.616211, avg loss: 2.685269, ppl: 14.662147 +epoch: 1, batch: 16139, sum loss: 4723.547363, avg loss: 2.611137, ppl: 13.614527 +epoch: 1, batch: 16140, sum loss: 4945.275391, avg loss: 2.723169, ppl: 15.228506 +epoch: 1, batch: 16141, sum loss: 4833.100586, avg loss: 2.643928, ppl: 14.068357 +epoch: 1, batch: 16142, sum loss: 4531.155762, avg loss: 2.701941, ppl: 14.908645 +epoch: 1, batch: 16143, sum loss: 4695.572754, avg loss: 2.847528, ppl: 17.245090 +epoch: 1, batch: 16144, sum loss: 5037.908203, avg loss: 2.723194, ppl: 15.228880 +epoch: 1, batch: 16145, sum loss: 4718.437500, avg loss: 2.716429, ppl: 15.126213 +epoch: 1, batch: 16146, sum loss: 4480.415527, avg loss: 2.660579, ppl: 14.304575 +epoch: 1, batch: 16147, sum loss: 5717.070312, avg loss: 2.875790, ppl: 17.739431 +epoch: 1, batch: 16148, sum loss: 3419.876953, avg loss: 2.516466, ppl: 12.384748 +epoch: 1, batch: 16149, sum loss: 3792.665039, avg loss: 2.652213, ppl: 14.185401 +epoch: 1, batch: 16150, sum loss: 3737.168945, avg loss: 2.533674, ppl: 12.599710 +epoch: 1, batch: 16151, sum loss: 4917.061523, avg loss: 2.682521, ppl: 14.621914 +epoch: 1, batch: 16152, sum loss: 4110.393555, avg loss: 2.616419, ppl: 13.686619 +epoch: 1, batch: 16153, sum loss: 4008.016113, avg loss: 2.677365, ppl: 14.546717 +epoch: 1, batch: 16154, sum loss: 4955.860352, avg loss: 2.785756, ppl: 16.212070 +epoch: 1, batch: 16155, sum loss: 3946.056641, avg loss: 2.644810, ppl: 14.080772 +epoch: 1, batch: 16156, sum loss: 4345.537109, avg loss: 2.594351, ppl: 13.387890 +epoch: 1, batch: 16157, sum loss: 3962.564209, avg loss: 2.487485, ppl: 12.030985 +epoch: 1, batch: 16158, sum loss: 4039.563232, avg loss: 2.532642, ppl: 12.586712 +epoch: 1, batch: 16159, sum loss: 4224.450195, avg loss: 2.392101, ppl: 10.936445 +epoch: 1, batch: 16160, sum loss: 4021.834961, avg loss: 2.584727, ppl: 13.259666 +epoch: 1, batch: 16161, sum loss: 4779.673340, avg loss: 2.925137, ppl: 18.636776 +epoch: 1, batch: 16162, sum loss: 5178.348145, avg loss: 2.802136, ppl: 16.479816 +epoch: 1, batch: 16163, sum loss: 5270.491211, avg loss: 3.083962, ppl: 21.844784 +epoch: 1, batch: 16164, sum loss: 3652.201660, avg loss: 2.545088, ppl: 12.744353 +epoch: 1, batch: 16165, sum loss: 4950.934082, avg loss: 2.797138, ppl: 16.397650 +epoch: 1, batch: 16166, sum loss: 5308.919922, avg loss: 2.872792, ppl: 17.686335 +epoch: 1, batch: 16167, sum loss: 4851.512695, avg loss: 2.696783, ppl: 14.831942 +epoch: 1, batch: 16168, sum loss: 4480.250488, avg loss: 2.716950, ppl: 15.134092 +epoch: 1, batch: 16169, sum loss: 4628.002441, avg loss: 2.903389, ppl: 18.235846 +epoch: 1, batch: 16170, sum loss: 3766.314941, avg loss: 2.572619, ppl: 13.100095 +epoch: 1, batch: 16171, sum loss: 4096.237305, avg loss: 2.653004, ppl: 14.196617 +epoch: 1, batch: 16172, sum loss: 4533.739258, avg loss: 2.698654, ppl: 14.859723 +epoch: 1, batch: 16173, sum loss: 4939.170898, avg loss: 2.665500, ppl: 14.375131 +epoch: 1, batch: 16174, sum loss: 4977.836914, avg loss: 2.912719, ppl: 18.406784 +epoch: 1, batch: 16175, sum loss: 5354.898438, avg loss: 2.700403, ppl: 14.885726 +epoch: 1, batch: 16176, sum loss: 4569.916504, avg loss: 2.692939, ppl: 14.775029 +epoch: 1, batch: 16177, sum loss: 4587.933105, avg loss: 2.621676, ppl: 13.758763 +epoch: 1, batch: 16178, sum loss: 4353.928711, avg loss: 2.684296, ppl: 14.647891 +epoch: 1, batch: 16179, sum loss: 4494.435547, avg loss: 2.626789, ppl: 13.829290 +epoch: 1, batch: 16180, sum loss: 4007.642578, avg loss: 2.652311, ppl: 14.186791 +epoch: 1, batch: 16181, sum loss: 4292.107422, avg loss: 2.523285, ppl: 12.469488 +epoch: 1, batch: 16182, sum loss: 4260.253418, avg loss: 2.729182, ppl: 15.320354 +epoch: 1, batch: 16183, sum loss: 5101.860840, avg loss: 2.874288, ppl: 17.712801 +epoch: 1, batch: 16184, sum loss: 5631.506836, avg loss: 2.997077, ppl: 20.026907 +epoch: 1, batch: 16185, sum loss: 4386.340820, avg loss: 2.666468, ppl: 14.389063 +epoch: 1, batch: 16186, sum loss: 4334.923340, avg loss: 2.548456, ppl: 12.787341 +epoch: 1, batch: 16187, sum loss: 4751.928711, avg loss: 2.966248, ppl: 19.418915 +epoch: 1, batch: 16188, sum loss: 4621.862305, avg loss: 2.609747, ppl: 13.595613 +epoch: 1, batch: 16189, sum loss: 5449.812500, avg loss: 2.741354, ppl: 15.507972 +epoch: 1, batch: 16190, sum loss: 4873.702148, avg loss: 2.707612, ppl: 14.993433 +epoch: 1, batch: 16191, sum loss: 4504.300781, avg loss: 2.629481, ppl: 13.866570 +epoch: 1, batch: 16192, sum loss: 4823.876953, avg loss: 3.031978, ppl: 20.738211 +epoch: 1, batch: 16193, sum loss: 5699.829590, avg loss: 2.932011, ppl: 18.765337 +epoch: 1, batch: 16194, sum loss: 6232.819336, avg loss: 2.922091, ppl: 18.580093 +epoch: 1, batch: 16195, sum loss: 5499.303711, avg loss: 2.827406, ppl: 16.901556 +epoch: 1, batch: 16196, sum loss: 5166.424316, avg loss: 2.810894, ppl: 16.624783 +epoch: 1, batch: 16197, sum loss: 4224.417969, avg loss: 2.394795, ppl: 10.965946 +epoch: 1, batch: 16198, sum loss: 4603.910645, avg loss: 2.689200, ppl: 14.719897 +epoch: 1, batch: 16199, sum loss: 4226.625000, avg loss: 2.725097, ppl: 15.257890 +epoch: 1, batch: 16200, sum loss: 4095.545410, avg loss: 2.623668, ppl: 13.786198 +epoch: 1, batch: 16201, sum loss: 4180.584473, avg loss: 2.590201, ppl: 13.332453 +epoch: 1, batch: 16202, sum loss: 4537.946777, avg loss: 2.868487, ppl: 17.610355 +epoch: 1, batch: 16203, sum loss: 4962.444824, avg loss: 2.822778, ppl: 16.823530 +epoch: 1, batch: 16204, sum loss: 4896.897949, avg loss: 2.848690, ppl: 17.265150 +epoch: 1, batch: 16205, sum loss: 4397.663086, avg loss: 2.628609, ppl: 13.854487 +epoch: 1, batch: 16206, sum loss: 5279.416992, avg loss: 2.743980, ppl: 15.548741 +epoch: 1, batch: 16207, sum loss: 4886.478516, avg loss: 2.886284, ppl: 17.926569 +epoch: 1, batch: 16208, sum loss: 5000.729980, avg loss: 2.675618, ppl: 14.521320 +epoch: 1, batch: 16209, sum loss: 5052.079102, avg loss: 2.673058, ppl: 14.484191 +epoch: 1, batch: 16210, sum loss: 4038.384766, avg loss: 2.562427, ppl: 12.967248 +epoch: 1, batch: 16211, sum loss: 5109.930664, avg loss: 2.549865, ppl: 12.805381 +epoch: 1, batch: 16212, sum loss: 4630.726562, avg loss: 2.576921, ppl: 13.156564 +epoch: 1, batch: 16213, sum loss: 4570.397461, avg loss: 2.559013, ppl: 12.923058 +epoch: 1, batch: 16214, sum loss: 3911.664062, avg loss: 2.531821, ppl: 12.576393 +epoch: 1, batch: 16215, sum loss: 3994.089111, avg loss: 2.678799, ppl: 14.567586 +epoch: 1, batch: 16216, sum loss: 4994.461914, avg loss: 2.754805, ppl: 15.717980 +epoch: 1, batch: 16217, sum loss: 5815.798340, avg loss: 3.121738, ppl: 22.685778 +epoch: 1, batch: 16218, sum loss: 4168.423340, avg loss: 2.628262, ppl: 13.849675 +epoch: 1, batch: 16219, sum loss: 4111.289062, avg loss: 2.722708, ppl: 15.221486 +epoch: 1, batch: 16220, sum loss: 4344.892578, avg loss: 2.561847, ppl: 12.959731 +epoch: 1, batch: 16221, sum loss: 4774.687988, avg loss: 2.560154, ppl: 12.937816 +epoch: 1, batch: 16222, sum loss: 4538.664551, avg loss: 2.595006, ppl: 13.396661 +epoch: 1, batch: 16223, sum loss: 3698.276855, avg loss: 2.409301, ppl: 11.126179 +epoch: 1, batch: 16224, sum loss: 3747.970215, avg loss: 2.421170, ppl: 11.259019 +epoch: 1, batch: 16225, sum loss: 5745.291016, avg loss: 3.216848, ppl: 24.949366 +epoch: 1, batch: 16226, sum loss: 4631.215332, avg loss: 2.805097, ppl: 16.528685 +epoch: 1, batch: 16227, sum loss: 4400.180176, avg loss: 2.935410, ppl: 18.829231 +epoch: 1, batch: 16228, sum loss: 4464.226074, avg loss: 2.550986, ppl: 12.819741 +epoch: 1, batch: 16229, sum loss: 4487.043457, avg loss: 2.709567, ppl: 15.022774 +epoch: 1, batch: 16230, sum loss: 5544.176270, avg loss: 2.700524, ppl: 14.887536 +epoch: 1, batch: 16231, sum loss: 4611.800293, avg loss: 2.601128, ppl: 13.478938 +epoch: 1, batch: 16232, sum loss: 4904.807617, avg loss: 2.772644, ppl: 16.000889 +epoch: 1, batch: 16233, sum loss: 4635.993652, avg loss: 2.727055, ppl: 15.287803 +epoch: 1, batch: 16234, sum loss: 4250.629883, avg loss: 2.816852, ppl: 16.724125 +epoch: 1, batch: 16235, sum loss: 3969.750732, avg loss: 2.339276, ppl: 10.373719 +epoch: 1, batch: 16236, sum loss: 4104.687988, avg loss: 2.689835, ppl: 14.729243 +epoch: 1, batch: 16237, sum loss: 4773.043945, avg loss: 2.854692, ppl: 17.369078 +epoch: 1, batch: 16238, sum loss: 4729.447754, avg loss: 2.880297, ppl: 17.819569 +epoch: 1, batch: 16239, sum loss: 4666.834961, avg loss: 2.847367, ppl: 17.242323 +epoch: 1, batch: 16240, sum loss: 4709.268555, avg loss: 2.736356, ppl: 15.430650 +epoch: 1, batch: 16241, sum loss: 4801.179199, avg loss: 2.553819, ppl: 12.856104 +epoch: 1, batch: 16242, sum loss: 4972.826172, avg loss: 2.730822, ppl: 15.345490 +epoch: 1, batch: 16243, sum loss: 4914.520508, avg loss: 2.704744, ppl: 14.950494 +epoch: 1, batch: 16244, sum loss: 5056.543457, avg loss: 2.876304, ppl: 17.748560 +epoch: 1, batch: 16245, sum loss: 5102.385254, avg loss: 2.969957, ppl: 19.491074 +epoch: 1, batch: 16246, sum loss: 4593.669434, avg loss: 2.680087, ppl: 14.586367 +epoch: 1, batch: 16247, sum loss: 3586.829102, avg loss: 2.560192, ppl: 12.938303 +epoch: 1, batch: 16248, sum loss: 4311.834961, avg loss: 2.734201, ppl: 15.397435 +epoch: 1, batch: 16249, sum loss: 4455.490234, avg loss: 2.718420, ppl: 15.156353 +epoch: 1, batch: 16250, sum loss: 4840.517578, avg loss: 2.806097, ppl: 16.545221 +epoch: 1, batch: 16251, sum loss: 4920.830078, avg loss: 3.158428, ppl: 23.533566 +epoch: 1, batch: 16252, sum loss: 4626.097168, avg loss: 2.610664, ppl: 13.608088 +epoch: 1, batch: 16253, sum loss: 4086.564453, avg loss: 2.491808, ppl: 12.083099 +epoch: 1, batch: 16254, sum loss: 3823.342041, avg loss: 2.439912, ppl: 11.472033 +epoch: 1, batch: 16255, sum loss: 4254.502441, avg loss: 2.541519, ppl: 12.698945 +epoch: 1, batch: 16256, sum loss: 4648.393555, avg loss: 2.668423, ppl: 14.417221 +epoch: 1, batch: 16257, sum loss: 4933.214355, avg loss: 2.927724, ppl: 18.685049 +epoch: 1, batch: 16258, sum loss: 4378.117188, avg loss: 2.563300, ppl: 12.978580 +epoch: 1, batch: 16259, sum loss: 4200.669922, avg loss: 2.783744, ppl: 16.179489 +epoch: 1, batch: 16260, sum loss: 5560.079590, avg loss: 2.924818, ppl: 18.630840 +epoch: 1, batch: 16261, sum loss: 4796.116211, avg loss: 2.617967, ppl: 13.707832 +epoch: 1, batch: 16262, sum loss: 4954.158203, avg loss: 2.818065, ppl: 16.744417 +epoch: 1, batch: 16263, sum loss: 4462.145508, avg loss: 2.476218, ppl: 11.896194 +epoch: 1, batch: 16264, sum loss: 4019.929688, avg loss: 2.529849, ppl: 12.551608 +epoch: 1, batch: 16265, sum loss: 3675.364746, avg loss: 2.519099, ppl: 12.417397 +epoch: 1, batch: 16266, sum loss: 4474.866211, avg loss: 2.752070, ppl: 15.675049 +epoch: 1, batch: 16267, sum loss: 4745.412598, avg loss: 2.898847, ppl: 18.153204 +epoch: 1, batch: 16268, sum loss: 5165.090332, avg loss: 2.913193, ppl: 18.415501 +epoch: 1, batch: 16269, sum loss: 4977.867188, avg loss: 2.828334, ppl: 16.917246 +epoch: 1, batch: 16270, sum loss: 4134.563477, avg loss: 2.597088, ppl: 13.424583 +epoch: 1, batch: 16271, sum loss: 5942.728027, avg loss: 3.122821, ppl: 22.710352 +epoch: 1, batch: 16272, sum loss: 4853.476074, avg loss: 2.828366, ppl: 16.917795 +epoch: 1, batch: 16273, sum loss: 3426.553223, avg loss: 2.394516, ppl: 10.962895 +epoch: 1, batch: 16274, sum loss: 4632.925293, avg loss: 2.690433, ppl: 14.738053 +epoch: 1, batch: 16275, sum loss: 4475.560059, avg loss: 2.582551, ppl: 13.230841 +epoch: 1, batch: 16276, sum loss: 5781.075684, avg loss: 2.866175, ppl: 17.569693 +epoch: 1, batch: 16277, sum loss: 4228.765137, avg loss: 2.615192, ppl: 13.669840 +epoch: 1, batch: 16278, sum loss: 4160.327637, avg loss: 2.660056, ppl: 14.297091 +epoch: 1, batch: 16279, sum loss: 4732.568359, avg loss: 2.486899, ppl: 12.023928 +epoch: 1, batch: 16280, sum loss: 4946.470215, avg loss: 2.689761, ppl: 14.728154 +epoch: 1, batch: 16281, sum loss: 4612.570312, avg loss: 2.718073, ppl: 15.151104 +epoch: 1, batch: 16282, sum loss: 5274.934570, avg loss: 2.753098, ppl: 15.691160 +epoch: 1, batch: 16283, sum loss: 4199.304688, avg loss: 2.581011, ppl: 13.210485 +epoch: 1, batch: 16284, sum loss: 5013.293945, avg loss: 2.544819, ppl: 12.740927 +epoch: 1, batch: 16285, sum loss: 5052.203125, avg loss: 2.789731, ppl: 16.276646 +epoch: 1, batch: 16286, sum loss: 3950.618896, avg loss: 2.534072, ppl: 12.604733 +epoch: 1, batch: 16287, sum loss: 5227.533203, avg loss: 2.935168, ppl: 18.824657 +epoch: 1, batch: 16288, sum loss: 5704.818359, avg loss: 2.918066, ppl: 18.505459 +epoch: 1, batch: 16289, sum loss: 4143.120605, avg loss: 2.418634, ppl: 11.230510 +epoch: 1, batch: 16290, sum loss: 4898.182129, avg loss: 3.029179, ppl: 20.680244 +epoch: 1, batch: 16291, sum loss: 4835.386230, avg loss: 2.764658, ppl: 15.873606 +epoch: 1, batch: 16292, sum loss: 3394.021484, avg loss: 2.540435, ppl: 12.685192 +epoch: 1, batch: 16293, sum loss: 5030.748047, avg loss: 2.940239, ppl: 18.920362 +epoch: 1, batch: 16294, sum loss: 4445.769043, avg loss: 2.676562, ppl: 14.535033 +epoch: 1, batch: 16295, sum loss: 4461.835449, avg loss: 2.643268, ppl: 14.059069 +epoch: 1, batch: 16296, sum loss: 3404.577148, avg loss: 2.244283, ppl: 9.433646 +epoch: 1, batch: 16297, sum loss: 5617.733398, avg loss: 2.958259, ppl: 19.264400 +epoch: 1, batch: 16298, sum loss: 3836.748047, avg loss: 2.525838, ppl: 12.501366 +epoch: 1, batch: 16299, sum loss: 4819.137207, avg loss: 2.995113, ppl: 19.987621 +epoch: 1, batch: 16300, sum loss: 6084.567383, avg loss: 2.981170, ppl: 19.710859 +epoch: 1, batch: 16301, sum loss: 5061.913574, avg loss: 2.895832, ppl: 18.098551 +epoch: 1, batch: 16302, sum loss: 4618.807129, avg loss: 2.619857, ppl: 13.733754 +epoch: 1, batch: 16303, sum loss: 5184.429688, avg loss: 2.689020, ppl: 14.717241 +epoch: 1, batch: 16304, sum loss: 5171.477539, avg loss: 2.961900, ppl: 19.334677 +epoch: 1, batch: 16305, sum loss: 3768.914795, avg loss: 2.367409, ppl: 10.669708 +epoch: 1, batch: 16306, sum loss: 3833.566650, avg loss: 2.560833, ppl: 12.946594 +epoch: 1, batch: 16307, sum loss: 4161.554199, avg loss: 2.704064, ppl: 14.940325 +epoch: 1, batch: 16308, sum loss: 4448.917969, avg loss: 2.791040, ppl: 16.297964 +epoch: 1, batch: 16309, sum loss: 3593.694824, avg loss: 2.525435, ppl: 12.496336 +epoch: 1, batch: 16310, sum loss: 4374.909668, avg loss: 2.524472, ppl: 12.484299 +epoch: 1, batch: 16311, sum loss: 5029.106934, avg loss: 2.872134, ppl: 17.674700 +epoch: 1, batch: 16312, sum loss: 4756.169922, avg loss: 2.815968, ppl: 16.709343 +epoch: 1, batch: 16313, sum loss: 4938.421875, avg loss: 2.913523, ppl: 18.421587 +epoch: 1, batch: 16314, sum loss: 4897.158203, avg loss: 2.722156, ppl: 15.213083 +epoch: 1, batch: 16315, sum loss: 5749.173828, avg loss: 3.053199, ppl: 21.183002 +epoch: 1, batch: 16316, sum loss: 5763.248535, avg loss: 2.674361, ppl: 14.503082 +epoch: 1, batch: 16317, sum loss: 4703.584473, avg loss: 2.681633, ppl: 14.608931 +epoch: 1, batch: 16318, sum loss: 3616.346191, avg loss: 2.215899, ppl: 9.169644 +epoch: 1, batch: 16319, sum loss: 4286.275391, avg loss: 2.443715, ppl: 11.515738 +epoch: 1, batch: 16320, sum loss: 4282.878906, avg loss: 2.613105, ppl: 13.641339 +epoch: 1, batch: 16321, sum loss: 4743.674316, avg loss: 2.843930, ppl: 17.183155 +epoch: 1, batch: 16322, sum loss: 4630.375977, avg loss: 2.977734, ppl: 19.643251 +epoch: 1, batch: 16323, sum loss: 5333.577148, avg loss: 2.919309, ppl: 18.528479 +epoch: 1, batch: 16324, sum loss: 4796.993164, avg loss: 2.872451, ppl: 17.680302 +epoch: 1, batch: 16325, sum loss: 3828.994629, avg loss: 2.520734, ppl: 12.437720 +epoch: 1, batch: 16326, sum loss: 5015.680664, avg loss: 2.649594, ppl: 14.148288 +epoch: 1, batch: 16327, sum loss: 5174.495605, avg loss: 2.902129, ppl: 18.212877 +epoch: 1, batch: 16328, sum loss: 4131.417480, avg loss: 2.631476, ppl: 13.894265 +epoch: 1, batch: 16329, sum loss: 4402.007812, avg loss: 2.432049, ppl: 11.382175 +epoch: 1, batch: 16330, sum loss: 4574.235352, avg loss: 2.630383, ppl: 13.879082 +epoch: 1, batch: 16331, sum loss: 4076.647949, avg loss: 2.606552, ppl: 13.552247 +epoch: 1, batch: 16332, sum loss: 3717.811523, avg loss: 2.457245, ppl: 11.672610 +epoch: 1, batch: 16333, sum loss: 5199.486816, avg loss: 2.787929, ppl: 16.247330 +epoch: 1, batch: 16334, sum loss: 4842.652344, avg loss: 2.791154, ppl: 16.299822 +epoch: 1, batch: 16335, sum loss: 5034.892578, avg loss: 2.827003, ppl: 16.894756 +epoch: 1, batch: 16336, sum loss: 4739.416504, avg loss: 2.650680, ppl: 14.163671 +epoch: 1, batch: 16337, sum loss: 5631.259766, avg loss: 3.032450, ppl: 20.748003 +epoch: 1, batch: 16338, sum loss: 4729.844238, avg loss: 2.775730, ppl: 16.050341 +epoch: 1, batch: 16339, sum loss: 5488.451172, avg loss: 3.020612, ppl: 20.503826 +epoch: 1, batch: 16340, sum loss: 5058.236328, avg loss: 2.785372, ppl: 16.205849 +epoch: 1, batch: 16341, sum loss: 4919.935547, avg loss: 2.832433, ppl: 16.986734 +epoch: 1, batch: 16342, sum loss: 3893.859619, avg loss: 2.585564, ppl: 13.270774 +epoch: 1, batch: 16343, sum loss: 4687.610352, avg loss: 2.723771, ppl: 15.237677 +epoch: 1, batch: 16344, sum loss: 5332.604004, avg loss: 2.902888, ppl: 18.226700 +epoch: 1, batch: 16345, sum loss: 4476.663086, avg loss: 2.888170, ppl: 17.960407 +epoch: 1, batch: 16346, sum loss: 4218.361816, avg loss: 2.730331, ppl: 15.337970 +epoch: 1, batch: 16347, sum loss: 4727.156250, avg loss: 2.832329, ppl: 16.984964 +epoch: 1, batch: 16348, sum loss: 5189.012695, avg loss: 2.885991, ppl: 17.921326 +epoch: 1, batch: 16349, sum loss: 4108.488281, avg loss: 2.628591, ppl: 13.854239 +epoch: 1, batch: 16350, sum loss: 5147.169434, avg loss: 2.828115, ppl: 16.913553 +epoch: 1, batch: 16351, sum loss: 4840.224609, avg loss: 2.884520, ppl: 17.894978 +epoch: 1, batch: 16352, sum loss: 5382.906250, avg loss: 2.821230, ppl: 16.797493 +epoch: 1, batch: 16353, sum loss: 4757.035645, avg loss: 2.530338, ppl: 12.557751 +epoch: 1, batch: 16354, sum loss: 5235.397949, avg loss: 2.664325, ppl: 14.358251 +epoch: 1, batch: 16355, sum loss: 4438.995605, avg loss: 2.579312, ppl: 13.188060 +epoch: 1, batch: 16356, sum loss: 3842.369385, avg loss: 2.386565, ppl: 10.876067 +epoch: 1, batch: 16357, sum loss: 5403.656250, avg loss: 3.148984, ppl: 23.312366 +epoch: 1, batch: 16358, sum loss: 5107.015137, avg loss: 2.926656, ppl: 18.665115 +epoch: 1, batch: 16359, sum loss: 4911.119141, avg loss: 2.807958, ppl: 16.576042 +epoch: 1, batch: 16360, sum loss: 3550.399658, avg loss: 2.365356, ppl: 10.647831 +epoch: 1, batch: 16361, sum loss: 3869.798828, avg loss: 2.430778, ppl: 11.367723 +epoch: 1, batch: 16362, sum loss: 4283.978027, avg loss: 2.888724, ppl: 17.970371 +epoch: 1, batch: 16363, sum loss: 4181.958984, avg loss: 2.467232, ppl: 11.789773 +epoch: 1, batch: 16364, sum loss: 3920.373779, avg loss: 2.550666, ppl: 12.815638 +epoch: 1, batch: 16365, sum loss: 4820.557129, avg loss: 2.483543, ppl: 11.983646 +epoch: 1, batch: 16366, sum loss: 4613.415527, avg loss: 2.686905, ppl: 14.686151 +epoch: 1, batch: 16367, sum loss: 4811.118652, avg loss: 2.739817, ppl: 15.484154 +epoch: 1, batch: 16368, sum loss: 4257.017090, avg loss: 2.674006, ppl: 14.497928 +epoch: 1, batch: 16369, sum loss: 4241.594727, avg loss: 2.752495, ppl: 15.681710 +epoch: 1, batch: 16370, sum loss: 5327.292969, avg loss: 2.903157, ppl: 18.231611 +epoch: 1, batch: 16371, sum loss: 4317.335449, avg loss: 2.425469, ppl: 11.307536 +epoch: 1, batch: 16372, sum loss: 4728.754883, avg loss: 2.694447, ppl: 14.797334 +epoch: 1, batch: 16373, sum loss: 5081.984863, avg loss: 2.980636, ppl: 19.700350 +epoch: 1, batch: 16374, sum loss: 3748.007568, avg loss: 2.520516, ppl: 12.435014 +epoch: 1, batch: 16375, sum loss: 5210.754883, avg loss: 2.813582, ppl: 16.669529 +epoch: 1, batch: 16376, sum loss: 4815.185547, avg loss: 2.628376, ppl: 13.851264 +epoch: 1, batch: 16377, sum loss: 4394.207031, avg loss: 2.827675, ppl: 16.906111 +epoch: 1, batch: 16378, sum loss: 4355.829102, avg loss: 2.785057, ppl: 16.200739 +epoch: 1, batch: 16379, sum loss: 4710.613770, avg loss: 2.741917, ppl: 15.516705 +epoch: 1, batch: 16380, sum loss: 5063.988281, avg loss: 3.037785, ppl: 20.858995 +epoch: 1, batch: 16381, sum loss: 5349.729004, avg loss: 2.844088, ppl: 17.185871 +epoch: 1, batch: 16382, sum loss: 5561.887695, avg loss: 2.698636, ppl: 14.859444 +epoch: 1, batch: 16383, sum loss: 5307.469727, avg loss: 2.844303, ppl: 17.189575 +epoch: 1, batch: 16384, sum loss: 4746.688477, avg loss: 2.532918, ppl: 12.590191 +epoch: 1, batch: 16385, sum loss: 5652.545898, avg loss: 2.743954, ppl: 15.548345 +epoch: 1, batch: 16386, sum loss: 3741.180176, avg loss: 2.714935, ppl: 15.103625 +epoch: 1, batch: 16387, sum loss: 5513.579102, avg loss: 2.915695, ppl: 18.461637 +epoch: 1, batch: 16388, sum loss: 4440.931641, avg loss: 2.648140, ppl: 14.127743 +epoch: 1, batch: 16389, sum loss: 4050.637207, avg loss: 2.509689, ppl: 12.301099 +epoch: 1, batch: 16390, sum loss: 4478.696777, avg loss: 2.657980, ppl: 14.267443 +epoch: 1, batch: 16391, sum loss: 3435.756592, avg loss: 2.478901, ppl: 11.928147 +epoch: 1, batch: 16392, sum loss: 4410.048828, avg loss: 2.634438, ppl: 13.935479 +epoch: 1, batch: 16393, sum loss: 3749.082520, avg loss: 2.420324, ppl: 11.249507 +epoch: 1, batch: 16394, sum loss: 4755.436035, avg loss: 2.488454, ppl: 12.042645 +epoch: 1, batch: 16395, sum loss: 3889.244873, avg loss: 2.485141, ppl: 12.002808 +epoch: 1, batch: 16396, sum loss: 4547.438477, avg loss: 2.513786, ppl: 12.351603 +epoch: 1, batch: 16397, sum loss: 4838.849121, avg loss: 2.645626, ppl: 14.092258 +epoch: 1, batch: 16398, sum loss: 4576.548340, avg loss: 2.840812, ppl: 17.129673 +epoch: 1, batch: 16399, sum loss: 5147.007324, avg loss: 2.901357, ppl: 18.198824 +epoch: 1, batch: 16400, sum loss: 5062.564453, avg loss: 2.801640, ppl: 16.471643 +epoch: 1, batch: 16401, sum loss: 4151.460938, avg loss: 2.921507, ppl: 18.569244 +epoch: 1, batch: 16402, sum loss: 3912.381104, avg loss: 2.582430, ppl: 13.229244 +epoch: 1, batch: 16403, sum loss: 3897.652832, avg loss: 2.429958, ppl: 11.358406 +epoch: 1, batch: 16404, sum loss: 5982.348145, avg loss: 2.913954, ppl: 18.429529 +epoch: 1, batch: 16405, sum loss: 4376.233398, avg loss: 2.581849, ppl: 13.221558 +epoch: 1, batch: 16406, sum loss: 5954.417480, avg loss: 2.996687, ppl: 20.019102 +epoch: 1, batch: 16407, sum loss: 5691.641602, avg loss: 2.959772, ppl: 19.293570 +epoch: 1, batch: 16408, sum loss: 5268.770508, avg loss: 2.802537, ppl: 16.486427 +epoch: 1, batch: 16409, sum loss: 3341.328613, avg loss: 2.233509, ppl: 9.332553 +epoch: 1, batch: 16410, sum loss: 5108.061523, avg loss: 2.706975, ppl: 14.983877 +epoch: 1, batch: 16411, sum loss: 4893.116211, avg loss: 2.725970, ppl: 15.271220 +epoch: 1, batch: 16412, sum loss: 4272.083008, avg loss: 2.611298, ppl: 13.616712 +epoch: 1, batch: 16413, sum loss: 4670.288086, avg loss: 2.721613, ppl: 15.204830 +epoch: 1, batch: 16414, sum loss: 3631.099121, avg loss: 2.466779, ppl: 11.784431 +epoch: 1, batch: 16415, sum loss: 4421.085938, avg loss: 2.854155, ppl: 17.359758 +epoch: 1, batch: 16416, sum loss: 4480.012695, avg loss: 2.756931, ppl: 15.751425 +epoch: 1, batch: 16417, sum loss: 4330.250977, avg loss: 2.747621, ppl: 15.605463 +epoch: 1, batch: 16418, sum loss: 3440.130371, avg loss: 2.427756, ppl: 11.333422 +epoch: 1, batch: 16419, sum loss: 3874.205811, avg loss: 2.537135, ppl: 12.643400 +epoch: 1, batch: 16420, sum loss: 4539.632324, avg loss: 2.700555, ppl: 14.887986 +epoch: 1, batch: 16421, sum loss: 5782.966797, avg loss: 3.148050, ppl: 23.290594 +epoch: 1, batch: 16422, sum loss: 3830.246582, avg loss: 2.618077, ppl: 13.709335 +epoch: 1, batch: 16423, sum loss: 4157.679199, avg loss: 2.600174, ppl: 13.466086 +epoch: 1, batch: 16424, sum loss: 3728.478271, avg loss: 2.600055, ppl: 13.464472 +epoch: 1, batch: 16425, sum loss: 4285.406738, avg loss: 2.736530, ppl: 15.433343 +epoch: 1, batch: 16426, sum loss: 3769.573730, avg loss: 2.541857, ppl: 12.703239 +epoch: 1, batch: 16427, sum loss: 4180.894043, avg loss: 2.787263, ppl: 16.236515 +epoch: 1, batch: 16428, sum loss: 4737.523438, avg loss: 2.708704, ppl: 15.009810 +epoch: 1, batch: 16429, sum loss: 4929.563965, avg loss: 2.740169, ppl: 15.489599 +epoch: 1, batch: 16430, sum loss: 4513.915039, avg loss: 2.576436, ppl: 13.150182 +epoch: 1, batch: 16431, sum loss: 4147.689453, avg loss: 2.665610, ppl: 14.376718 +epoch: 1, batch: 16432, sum loss: 4421.548340, avg loss: 2.492417, ppl: 12.090465 +epoch: 1, batch: 16433, sum loss: 5631.958496, avg loss: 2.978297, ppl: 19.654306 +epoch: 1, batch: 16434, sum loss: 5449.344727, avg loss: 2.795970, ppl: 16.378504 +epoch: 1, batch: 16435, sum loss: 4538.629395, avg loss: 2.587588, ppl: 13.297660 +epoch: 1, batch: 16436, sum loss: 4969.257812, avg loss: 2.810666, ppl: 16.620989 +epoch: 1, batch: 16437, sum loss: 4787.174316, avg loss: 2.839368, ppl: 17.104954 +epoch: 1, batch: 16438, sum loss: 4070.919678, avg loss: 2.365438, ppl: 10.648706 +epoch: 1, batch: 16439, sum loss: 4464.140137, avg loss: 2.764174, ppl: 15.865922 +epoch: 1, batch: 16440, sum loss: 4379.956543, avg loss: 2.747777, ppl: 15.607897 +epoch: 1, batch: 16441, sum loss: 4380.676270, avg loss: 2.760351, ppl: 15.805385 +epoch: 1, batch: 16442, sum loss: 4701.317383, avg loss: 2.868406, ppl: 17.608925 +epoch: 1, batch: 16443, sum loss: 4911.085449, avg loss: 2.734457, ppl: 15.401386 +epoch: 1, batch: 16444, sum loss: 3769.720703, avg loss: 2.332748, ppl: 10.306224 +epoch: 1, batch: 16445, sum loss: 4167.247559, avg loss: 2.642516, ppl: 14.048504 +epoch: 1, batch: 16446, sum loss: 3672.805420, avg loss: 2.493419, ppl: 12.102580 +epoch: 1, batch: 16447, sum loss: 4501.203613, avg loss: 2.556050, ppl: 12.884816 +epoch: 1, batch: 16448, sum loss: 4591.219727, avg loss: 2.736126, ppl: 15.427108 +epoch: 1, batch: 16449, sum loss: 4188.912598, avg loss: 2.609914, ppl: 13.597885 +epoch: 1, batch: 16450, sum loss: 5010.426758, avg loss: 3.034783, ppl: 20.796467 +epoch: 1, batch: 16451, sum loss: 4206.008301, avg loss: 2.575633, ppl: 13.139629 +epoch: 1, batch: 16452, sum loss: 5227.072754, avg loss: 2.997175, ppl: 20.028866 +epoch: 1, batch: 16453, sum loss: 3291.850586, avg loss: 2.490053, ppl: 12.061920 +epoch: 1, batch: 16454, sum loss: 4153.167969, avg loss: 2.538611, ppl: 12.662076 +epoch: 1, batch: 16455, sum loss: 3777.351074, avg loss: 2.566135, ppl: 13.015428 +epoch: 1, batch: 16456, sum loss: 3699.029785, avg loss: 2.592172, ppl: 13.358761 +epoch: 1, batch: 16457, sum loss: 4186.822754, avg loss: 2.809948, ppl: 16.609058 +epoch: 1, batch: 16458, sum loss: 4492.218750, avg loss: 2.807637, ppl: 16.570711 +epoch: 1, batch: 16459, sum loss: 4651.332520, avg loss: 2.645809, ppl: 14.094846 +epoch: 1, batch: 16460, sum loss: 3037.039551, avg loss: 2.252997, ppl: 9.516211 +epoch: 1, batch: 16461, sum loss: 3879.131592, avg loss: 2.535380, ppl: 12.621227 +epoch: 1, batch: 16462, sum loss: 4973.937988, avg loss: 2.649940, ppl: 14.153193 +epoch: 1, batch: 16463, sum loss: 4728.056152, avg loss: 2.612186, ppl: 13.628810 +epoch: 1, batch: 16464, sum loss: 4496.345703, avg loss: 2.624837, ppl: 13.802323 +epoch: 1, batch: 16465, sum loss: 5022.325195, avg loss: 2.829479, ppl: 16.936630 +epoch: 1, batch: 16466, sum loss: 4251.505859, avg loss: 2.727072, ppl: 15.288065 +epoch: 1, batch: 16467, sum loss: 4652.120117, avg loss: 2.765826, ppl: 15.892169 +epoch: 1, batch: 16468, sum loss: 5061.631836, avg loss: 2.747900, ppl: 15.609817 +epoch: 1, batch: 16469, sum loss: 4892.931152, avg loss: 2.679590, ppl: 14.579115 +epoch: 1, batch: 16470, sum loss: 4287.775879, avg loss: 2.616093, ppl: 13.682158 +epoch: 1, batch: 16471, sum loss: 3774.467285, avg loss: 2.340029, ppl: 10.381540 +epoch: 1, batch: 16472, sum loss: 4467.492188, avg loss: 2.710857, ppl: 15.042160 +epoch: 1, batch: 16473, sum loss: 4089.684814, avg loss: 2.427113, ppl: 11.326132 +epoch: 1, batch: 16474, sum loss: 4287.147949, avg loss: 2.550356, ppl: 12.811660 +epoch: 1, batch: 16475, sum loss: 4873.233398, avg loss: 2.812021, ppl: 16.643530 +epoch: 1, batch: 16476, sum loss: 4866.444824, avg loss: 2.732423, ppl: 15.370081 +epoch: 1, batch: 16477, sum loss: 4878.266113, avg loss: 2.593443, ppl: 13.375744 +epoch: 1, batch: 16478, sum loss: 4892.236816, avg loss: 2.736150, ppl: 15.427479 +epoch: 1, batch: 16479, sum loss: 5642.631836, avg loss: 3.095245, ppl: 22.092659 +epoch: 1, batch: 16480, sum loss: 5500.439941, avg loss: 2.922657, ppl: 18.590612 +epoch: 1, batch: 16481, sum loss: 4392.400391, avg loss: 2.614524, ppl: 13.660714 +epoch: 1, batch: 16482, sum loss: 4524.221191, avg loss: 2.972550, ppl: 19.541691 +epoch: 1, batch: 16483, sum loss: 4766.325195, avg loss: 2.700468, ppl: 14.886691 +epoch: 1, batch: 16484, sum loss: 4522.622559, avg loss: 2.587313, ppl: 13.294002 +epoch: 1, batch: 16485, sum loss: 5766.323242, avg loss: 3.075372, ppl: 21.657948 +epoch: 1, batch: 16486, sum loss: 6055.987305, avg loss: 2.957025, ppl: 19.240646 +epoch: 1, batch: 16487, sum loss: 3352.229004, avg loss: 2.362388, ppl: 10.616277 +epoch: 1, batch: 16488, sum loss: 4302.717773, avg loss: 2.723239, ppl: 15.229574 +epoch: 1, batch: 16489, sum loss: 3969.444580, avg loss: 2.479353, ppl: 11.933546 +epoch: 1, batch: 16490, sum loss: 5684.305664, avg loss: 2.747369, ppl: 15.601523 +epoch: 1, batch: 16491, sum loss: 4979.889648, avg loss: 2.821467, ppl: 16.801483 +epoch: 1, batch: 16492, sum loss: 3660.605469, avg loss: 2.552723, ppl: 12.842031 +epoch: 1, batch: 16493, sum loss: 5476.400391, avg loss: 3.007359, ppl: 20.233887 +epoch: 1, batch: 16494, sum loss: 4165.370117, avg loss: 2.703031, ppl: 14.924894 +epoch: 1, batch: 16495, sum loss: 4837.021973, avg loss: 2.857072, ppl: 17.410467 +epoch: 1, batch: 16496, sum loss: 3900.698486, avg loss: 2.640960, ppl: 14.026670 +epoch: 1, batch: 16497, sum loss: 4264.473145, avg loss: 2.687129, ppl: 14.689439 +epoch: 1, batch: 16498, sum loss: 3736.706055, avg loss: 2.479566, ppl: 11.936084 +epoch: 1, batch: 16499, sum loss: 4078.953125, avg loss: 2.660765, ppl: 14.307236 +epoch: 1, batch: 16500, sum loss: 4198.885742, avg loss: 2.689869, ppl: 14.729749 +epoch: 1, batch: 16501, sum loss: 4126.599609, avg loss: 2.652056, ppl: 14.183172 +epoch: 1, batch: 16502, sum loss: 4761.576660, avg loss: 2.731828, ppl: 15.360945 +epoch: 1, batch: 16503, sum loss: 4207.767090, avg loss: 2.435050, ppl: 11.416395 +epoch: 1, batch: 16504, sum loss: 4251.777344, avg loss: 2.495175, ppl: 12.123851 +epoch: 1, batch: 16505, sum loss: 4377.285645, avg loss: 2.802360, ppl: 16.483496 +epoch: 1, batch: 16506, sum loss: 4671.535645, avg loss: 2.795653, ppl: 16.373314 +epoch: 1, batch: 16507, sum loss: 4348.056152, avg loss: 2.738071, ppl: 15.457135 +epoch: 1, batch: 16508, sum loss: 4710.223633, avg loss: 2.713263, ppl: 15.078389 +epoch: 1, batch: 16509, sum loss: 4761.111816, avg loss: 2.607400, ppl: 13.563735 +epoch: 1, batch: 16510, sum loss: 3193.740723, avg loss: 2.334606, ppl: 10.325388 +epoch: 1, batch: 16511, sum loss: 5352.502930, avg loss: 3.285760, ppl: 26.729288 +epoch: 1, batch: 16512, sum loss: 4650.408691, avg loss: 2.710028, ppl: 15.029702 +epoch: 1, batch: 16513, sum loss: 4220.084473, avg loss: 2.540689, ppl: 12.688410 +epoch: 1, batch: 16514, sum loss: 5294.777832, avg loss: 3.098173, ppl: 22.157436 +epoch: 1, batch: 16515, sum loss: 5750.179199, avg loss: 2.822866, ppl: 16.825010 +epoch: 1, batch: 16516, sum loss: 4927.053223, avg loss: 2.854608, ppl: 17.367624 +epoch: 1, batch: 16517, sum loss: 4802.091797, avg loss: 2.821440, ppl: 16.801035 +epoch: 1, batch: 16518, sum loss: 4637.185547, avg loss: 2.834466, ppl: 17.021299 +epoch: 1, batch: 16519, sum loss: 5600.490234, avg loss: 2.858852, ppl: 17.441486 +epoch: 1, batch: 16520, sum loss: 3943.867188, avg loss: 2.492963, ppl: 12.097062 +epoch: 1, batch: 16521, sum loss: 4857.763184, avg loss: 2.583916, ppl: 13.248925 +epoch: 1, batch: 16522, sum loss: 3805.875000, avg loss: 2.413364, ppl: 11.171478 +epoch: 1, batch: 16523, sum loss: 5269.443359, avg loss: 2.943823, ppl: 18.988302 +epoch: 1, batch: 16524, sum loss: 4815.204102, avg loss: 2.895493, ppl: 18.092411 +epoch: 1, batch: 16525, sum loss: 4840.408203, avg loss: 2.734694, ppl: 15.405029 +epoch: 1, batch: 16526, sum loss: 5415.126953, avg loss: 2.835145, ppl: 17.032869 +epoch: 1, batch: 16527, sum loss: 5426.341797, avg loss: 3.186343, ppl: 24.199760 +epoch: 1, batch: 16528, sum loss: 5027.046875, avg loss: 2.781985, ppl: 16.151047 +epoch: 1, batch: 16529, sum loss: 4930.218750, avg loss: 2.758936, ppl: 15.783044 +epoch: 1, batch: 16530, sum loss: 4098.500000, avg loss: 2.708857, ppl: 15.012100 +epoch: 1, batch: 16531, sum loss: 5009.394531, avg loss: 2.767622, ppl: 15.920722 +epoch: 1, batch: 16532, sum loss: 4878.138184, avg loss: 2.811607, ppl: 16.636633 +epoch: 1, batch: 16533, sum loss: 4943.924316, avg loss: 2.755811, ppl: 15.733791 +epoch: 1, batch: 16534, sum loss: 5241.585449, avg loss: 2.963022, ppl: 19.356373 +epoch: 1, batch: 16535, sum loss: 3531.763672, avg loss: 2.471493, ppl: 11.840114 +epoch: 1, batch: 16536, sum loss: 5485.464355, avg loss: 2.992616, ppl: 19.937765 +epoch: 1, batch: 16537, sum loss: 4415.969238, avg loss: 2.548165, ppl: 12.783619 +epoch: 1, batch: 16538, sum loss: 4489.733398, avg loss: 2.627111, ppl: 13.833753 +epoch: 1, batch: 16539, sum loss: 4347.051270, avg loss: 2.543623, ppl: 12.725689 +epoch: 1, batch: 16540, sum loss: 4670.012207, avg loss: 2.735801, ppl: 15.422091 +epoch: 1, batch: 16541, sum loss: 5339.572754, avg loss: 2.649912, ppl: 14.152795 +epoch: 1, batch: 16542, sum loss: 5098.439941, avg loss: 2.900136, ppl: 18.176626 +epoch: 1, batch: 16543, sum loss: 4993.619629, avg loss: 2.783512, ppl: 16.175724 +epoch: 1, batch: 16544, sum loss: 4597.472168, avg loss: 2.677619, ppl: 14.550410 +epoch: 1, batch: 16545, sum loss: 5102.500977, avg loss: 2.874648, ppl: 17.719193 +epoch: 1, batch: 16546, sum loss: 4282.799316, avg loss: 2.781039, ppl: 16.135769 +epoch: 1, batch: 16547, sum loss: 4115.701660, avg loss: 2.684737, ppl: 14.654346 +epoch: 1, batch: 16548, sum loss: 5033.143555, avg loss: 2.739872, ppl: 15.484995 +epoch: 1, batch: 16549, sum loss: 5502.422363, avg loss: 2.855435, ppl: 17.381990 +epoch: 1, batch: 16550, sum loss: 4750.148926, avg loss: 2.703557, ppl: 14.932746 +epoch: 1, batch: 16551, sum loss: 3851.237549, avg loss: 2.515505, ppl: 12.372853 +epoch: 1, batch: 16552, sum loss: 4838.611328, avg loss: 2.686625, ppl: 14.682037 +epoch: 1, batch: 16553, sum loss: 5133.333008, avg loss: 2.822063, ppl: 16.811501 +epoch: 1, batch: 16554, sum loss: 4651.201172, avg loss: 2.775180, ppl: 16.041512 +epoch: 1, batch: 16555, sum loss: 4474.527832, avg loss: 2.786132, ppl: 16.218164 +epoch: 1, batch: 16556, sum loss: 4229.672363, avg loss: 2.641894, ppl: 14.039771 +epoch: 1, batch: 16557, sum loss: 3945.648682, avg loss: 2.522793, ppl: 12.463362 +epoch: 1, batch: 16558, sum loss: 4282.278809, avg loss: 2.536895, ppl: 12.640362 +epoch: 1, batch: 16559, sum loss: 4823.257812, avg loss: 2.749862, ppl: 15.640472 +epoch: 1, batch: 16560, sum loss: 4025.796387, avg loss: 2.855175, ppl: 17.377474 +epoch: 1, batch: 16561, sum loss: 4639.234375, avg loss: 2.815069, ppl: 16.694334 +epoch: 1, batch: 16562, sum loss: 4568.342285, avg loss: 2.567927, ppl: 13.038769 +epoch: 1, batch: 16563, sum loss: 5354.405273, avg loss: 2.874077, ppl: 17.709068 +epoch: 1, batch: 16564, sum loss: 3347.024658, avg loss: 2.452033, ppl: 11.611928 +epoch: 1, batch: 16565, sum loss: 4315.730957, avg loss: 2.775390, ppl: 16.044882 +epoch: 1, batch: 16566, sum loss: 5409.255859, avg loss: 3.215966, ppl: 24.927372 +epoch: 1, batch: 16567, sum loss: 5256.009766, avg loss: 2.876853, ppl: 17.758295 +epoch: 1, batch: 16568, sum loss: 5134.971191, avg loss: 2.802932, ppl: 16.492933 +epoch: 1, batch: 16569, sum loss: 4469.600586, avg loss: 2.772705, ppl: 16.001862 +epoch: 1, batch: 16570, sum loss: 5205.004395, avg loss: 3.135545, ppl: 23.001163 +epoch: 1, batch: 16571, sum loss: 4658.443359, avg loss: 2.850945, ppl: 17.304119 +epoch: 1, batch: 16572, sum loss: 5169.883789, avg loss: 2.889818, ppl: 17.990030 +epoch: 1, batch: 16573, sum loss: 4814.318359, avg loss: 2.792528, ppl: 16.322233 +epoch: 1, batch: 16574, sum loss: 4283.943359, avg loss: 2.669123, ppl: 14.427317 +epoch: 1, batch: 16575, sum loss: 4686.670410, avg loss: 2.823296, ppl: 16.832232 +epoch: 1, batch: 16576, sum loss: 3891.588135, avg loss: 2.291866, ppl: 9.893380 +epoch: 1, batch: 16577, sum loss: 6249.089355, avg loss: 3.207952, ppl: 24.728380 +epoch: 1, batch: 16578, sum loss: 4234.401367, avg loss: 2.569418, ppl: 13.058228 +epoch: 1, batch: 16579, sum loss: 5170.063477, avg loss: 2.880258, ppl: 17.818876 +epoch: 1, batch: 16580, sum loss: 4543.220703, avg loss: 2.641407, ppl: 14.032941 +epoch: 1, batch: 16581, sum loss: 5574.385742, avg loss: 3.001823, ppl: 20.122190 +epoch: 1, batch: 16582, sum loss: 5813.937988, avg loss: 2.948244, ppl: 19.072439 +epoch: 1, batch: 16583, sum loss: 5603.819824, avg loss: 3.135881, ppl: 23.008907 +epoch: 1, batch: 16584, sum loss: 5794.173828, avg loss: 3.111801, ppl: 22.461470 +epoch: 1, batch: 16585, sum loss: 3628.051270, avg loss: 2.496938, ppl: 12.145251 +epoch: 1, batch: 16586, sum loss: 4283.537598, avg loss: 2.585116, ppl: 13.264830 +epoch: 1, batch: 16587, sum loss: 4542.497559, avg loss: 2.736444, ppl: 15.432018 +epoch: 1, batch: 16588, sum loss: 5563.545410, avg loss: 3.123832, ppl: 22.733337 +epoch: 1, batch: 16589, sum loss: 3863.193115, avg loss: 2.347019, ppl: 10.454358 +epoch: 1, batch: 16590, sum loss: 4939.464844, avg loss: 2.700637, ppl: 14.889215 +epoch: 1, batch: 16591, sum loss: 4767.845215, avg loss: 2.709003, ppl: 15.014298 +epoch: 1, batch: 16592, sum loss: 4252.212402, avg loss: 2.616746, ppl: 13.691099 +epoch: 1, batch: 16593, sum loss: 4488.678711, avg loss: 2.935696, ppl: 18.834606 +epoch: 1, batch: 16594, sum loss: 4239.878906, avg loss: 2.544945, ppl: 12.742530 +epoch: 1, batch: 16595, sum loss: 4341.068359, avg loss: 2.745774, ppl: 15.576662 +epoch: 1, batch: 16596, sum loss: 5046.113770, avg loss: 2.624084, ppl: 13.791935 +epoch: 1, batch: 16597, sum loss: 4465.070312, avg loss: 2.657780, ppl: 14.264585 +epoch: 1, batch: 16598, sum loss: 5908.105957, avg loss: 2.879194, ppl: 17.799921 +epoch: 1, batch: 16599, sum loss: 4535.465332, avg loss: 2.799670, ppl: 16.439220 +epoch: 1, batch: 16600, sum loss: 4939.320312, avg loss: 2.742543, ppl: 15.526423 +epoch: 1, batch: 16601, sum loss: 4223.872070, avg loss: 2.748128, ppl: 15.613372 +epoch: 1, batch: 16602, sum loss: 4558.364746, avg loss: 2.644063, ppl: 14.070258 +epoch: 1, batch: 16603, sum loss: 4108.106934, avg loss: 2.808002, ppl: 16.576769 +epoch: 1, batch: 16604, sum loss: 4687.795898, avg loss: 2.808745, ppl: 16.589092 +epoch: 1, batch: 16605, sum loss: 4684.994629, avg loss: 2.663442, ppl: 14.345584 +epoch: 1, batch: 16606, sum loss: 5762.206543, avg loss: 3.032741, ppl: 20.754032 +epoch: 1, batch: 16607, sum loss: 3990.198730, avg loss: 2.699729, ppl: 14.875696 +epoch: 1, batch: 16608, sum loss: 4696.637695, avg loss: 2.757861, ppl: 15.766086 +epoch: 1, batch: 16609, sum loss: 4948.437500, avg loss: 2.803647, ppl: 16.504734 +epoch: 1, batch: 16610, sum loss: 6139.366211, avg loss: 3.045321, ppl: 21.016771 +epoch: 1, batch: 16611, sum loss: 4750.412598, avg loss: 2.610117, ppl: 13.600641 +epoch: 1, batch: 16612, sum loss: 5557.206543, avg loss: 2.799600, ppl: 16.438076 +epoch: 1, batch: 16613, sum loss: 4746.894043, avg loss: 2.844155, ppl: 17.187027 +epoch: 1, batch: 16614, sum loss: 5118.781250, avg loss: 2.845348, ppl: 17.207548 +epoch: 1, batch: 16615, sum loss: 5231.378418, avg loss: 2.781169, ppl: 16.137871 +epoch: 1, batch: 16616, sum loss: 4623.308594, avg loss: 2.619438, ppl: 13.728012 +epoch: 1, batch: 16617, sum loss: 4317.134766, avg loss: 2.638835, ppl: 13.996894 +epoch: 1, batch: 16618, sum loss: 5094.128418, avg loss: 2.732902, ppl: 15.377441 +epoch: 1, batch: 16619, sum loss: 5373.038086, avg loss: 2.862567, ppl: 17.506407 +epoch: 1, batch: 16620, sum loss: 4019.602295, avg loss: 2.589950, ppl: 13.329106 +epoch: 1, batch: 16621, sum loss: 5313.755371, avg loss: 2.771912, ppl: 15.989178 +epoch: 1, batch: 16622, sum loss: 4720.252930, avg loss: 2.700374, ppl: 14.885293 +epoch: 1, batch: 16623, sum loss: 5240.032227, avg loss: 2.996016, ppl: 20.005676 +epoch: 1, batch: 16624, sum loss: 4762.031250, avg loss: 2.858362, ppl: 17.432947 +epoch: 1, batch: 16625, sum loss: 4785.997070, avg loss: 2.921854, ppl: 18.575695 +epoch: 1, batch: 16626, sum loss: 4837.961426, avg loss: 2.730227, ppl: 15.336361 +epoch: 1, batch: 16627, sum loss: 4788.382812, avg loss: 2.857030, ppl: 17.409750 +epoch: 1, batch: 16628, sum loss: 4969.648926, avg loss: 2.838178, ppl: 17.084604 +epoch: 1, batch: 16629, sum loss: 4065.543701, avg loss: 2.539378, ppl: 12.671785 +epoch: 1, batch: 16630, sum loss: 4233.063477, avg loss: 2.699658, ppl: 14.874639 +epoch: 1, batch: 16631, sum loss: 4235.380859, avg loss: 2.557597, ppl: 12.904772 +epoch: 1, batch: 16632, sum loss: 5793.144531, avg loss: 2.990782, ppl: 19.901236 +epoch: 1, batch: 16633, sum loss: 4749.549805, avg loss: 2.915623, ppl: 18.460308 +epoch: 1, batch: 16634, sum loss: 5491.580078, avg loss: 2.899462, ppl: 18.164362 +epoch: 1, batch: 16635, sum loss: 4141.979492, avg loss: 2.660231, ppl: 14.299590 +epoch: 1, batch: 16636, sum loss: 4227.013184, avg loss: 2.650165, ppl: 14.156375 +epoch: 1, batch: 16637, sum loss: 4323.259277, avg loss: 2.657197, ppl: 14.256269 +epoch: 1, batch: 16638, sum loss: 4077.794434, avg loss: 2.722159, ppl: 15.213134 +epoch: 1, batch: 16639, sum loss: 4198.107910, avg loss: 2.390722, ppl: 10.921377 +epoch: 1, batch: 16640, sum loss: 3477.104248, avg loss: 2.311904, ppl: 10.093629 +epoch: 1, batch: 16641, sum loss: 4855.057129, avg loss: 2.571534, ppl: 13.085888 +epoch: 1, batch: 16642, sum loss: 4320.795410, avg loss: 2.645925, ppl: 14.096476 +epoch: 1, batch: 16643, sum loss: 4961.773438, avg loss: 2.949925, ppl: 19.104515 +epoch: 1, batch: 16644, sum loss: 5172.855957, avg loss: 2.904467, ppl: 18.255512 +epoch: 1, batch: 16645, sum loss: 4534.879395, avg loss: 2.615271, ppl: 13.670916 +epoch: 1, batch: 16646, sum loss: 4377.461914, avg loss: 2.735914, ppl: 15.423831 +epoch: 1, batch: 16647, sum loss: 4601.321777, avg loss: 2.835072, ppl: 17.031624 +epoch: 1, batch: 16648, sum loss: 4943.162598, avg loss: 3.138516, ppl: 23.069599 +epoch: 1, batch: 16649, sum loss: 5858.218262, avg loss: 2.967689, ppl: 19.446922 +epoch: 1, batch: 16650, sum loss: 4388.037109, avg loss: 2.730577, ppl: 15.341737 +epoch: 1, batch: 16651, sum loss: 4905.579102, avg loss: 2.845464, ppl: 17.209538 +epoch: 1, batch: 16652, sum loss: 4233.194336, avg loss: 2.494516, ppl: 12.115872 +epoch: 1, batch: 16653, sum loss: 5557.926758, avg loss: 3.005909, ppl: 20.204582 +epoch: 1, batch: 16654, sum loss: 4263.429688, avg loss: 2.844183, ppl: 17.187506 +epoch: 1, batch: 16655, sum loss: 4926.571289, avg loss: 2.991239, ppl: 19.910343 +epoch: 1, batch: 16656, sum loss: 4078.542969, avg loss: 2.552280, ppl: 12.836333 +epoch: 1, batch: 16657, sum loss: 4783.471680, avg loss: 2.789196, ppl: 16.267939 +epoch: 1, batch: 16658, sum loss: 4071.658936, avg loss: 2.496419, ppl: 12.138943 +epoch: 1, batch: 16659, sum loss: 3639.282471, avg loss: 2.322452, ppl: 10.200659 +epoch: 1, batch: 16660, sum loss: 4084.926270, avg loss: 2.578868, ppl: 13.182203 +epoch: 1, batch: 16661, sum loss: 4815.721191, avg loss: 2.358335, ppl: 10.573338 +epoch: 1, batch: 16662, sum loss: 4537.077637, avg loss: 2.720071, ppl: 15.181394 +epoch: 1, batch: 16663, sum loss: 4287.870117, avg loss: 2.534202, ppl: 12.606368 +epoch: 1, batch: 16664, sum loss: 3989.684570, avg loss: 2.597451, ppl: 13.429462 +epoch: 1, batch: 16665, sum loss: 5045.868652, avg loss: 2.901592, ppl: 18.203106 +epoch: 1, batch: 16666, sum loss: 4701.314941, avg loss: 2.877182, ppl: 17.764139 +epoch: 1, batch: 16667, sum loss: 4190.014648, avg loss: 2.559569, ppl: 12.930244 +epoch: 1, batch: 16668, sum loss: 4228.313965, avg loss: 2.751018, ppl: 15.658558 +epoch: 1, batch: 16669, sum loss: 5252.993164, avg loss: 2.702157, ppl: 14.911866 +epoch: 1, batch: 16670, sum loss: 5569.018555, avg loss: 2.791488, ppl: 16.305264 +epoch: 1, batch: 16671, sum loss: 3879.981934, avg loss: 2.554300, ppl: 12.862294 +epoch: 1, batch: 16672, sum loss: 4256.395508, avg loss: 2.601709, ppl: 13.486766 +epoch: 1, batch: 16673, sum loss: 4964.250977, avg loss: 2.979742, ppl: 19.682743 +epoch: 1, batch: 16674, sum loss: 4499.444336, avg loss: 2.622054, ppl: 13.763964 +epoch: 1, batch: 16675, sum loss: 4888.103027, avg loss: 2.810870, ppl: 16.624378 +epoch: 1, batch: 16676, sum loss: 4496.098633, avg loss: 2.671479, ppl: 14.461334 +epoch: 1, batch: 16677, sum loss: 3863.754639, avg loss: 2.478354, ppl: 11.921631 +epoch: 1, batch: 16678, sum loss: 4997.174805, avg loss: 2.941245, ppl: 18.939407 +epoch: 1, batch: 16679, sum loss: 5370.692383, avg loss: 2.878184, ppl: 17.781958 +epoch: 1, batch: 16680, sum loss: 5413.431641, avg loss: 2.932520, ppl: 18.774881 +epoch: 1, batch: 16681, sum loss: 5280.013184, avg loss: 2.978011, ppl: 19.648695 +epoch: 1, batch: 16682, sum loss: 5469.697266, avg loss: 2.890960, ppl: 18.010599 +epoch: 1, batch: 16683, sum loss: 4397.040527, avg loss: 2.614174, ppl: 13.655931 +epoch: 1, batch: 16684, sum loss: 4158.892578, avg loss: 2.672810, ppl: 14.480603 +epoch: 1, batch: 16685, sum loss: 5048.602051, avg loss: 2.966276, ppl: 19.419470 +epoch: 1, batch: 16686, sum loss: 5421.303711, avg loss: 2.874498, ppl: 17.716536 +epoch: 1, batch: 16687, sum loss: 4576.899902, avg loss: 2.801040, ppl: 16.461765 +epoch: 1, batch: 16688, sum loss: 4675.668945, avg loss: 2.606282, ppl: 13.548577 +epoch: 1, batch: 16689, sum loss: 4867.510254, avg loss: 2.736093, ppl: 15.426600 +epoch: 1, batch: 16690, sum loss: 4480.420898, avg loss: 2.666917, ppl: 14.395524 +epoch: 1, batch: 16691, sum loss: 5439.215820, avg loss: 2.805166, ppl: 16.529812 +epoch: 1, batch: 16692, sum loss: 4885.318359, avg loss: 2.802822, ppl: 16.491114 +epoch: 1, batch: 16693, sum loss: 5172.944824, avg loss: 2.739907, ppl: 15.485549 +epoch: 1, batch: 16694, sum loss: 5045.050293, avg loss: 2.686395, ppl: 14.678666 +epoch: 1, batch: 16695, sum loss: 5346.103027, avg loss: 2.916587, ppl: 18.478107 +epoch: 1, batch: 16696, sum loss: 4702.945312, avg loss: 2.692012, ppl: 14.761347 +epoch: 1, batch: 16697, sum loss: 4371.286621, avg loss: 2.673570, ppl: 14.491608 +epoch: 1, batch: 16698, sum loss: 4340.592773, avg loss: 2.664575, ppl: 14.361846 +epoch: 1, batch: 16699, sum loss: 5619.356445, avg loss: 2.902560, ppl: 18.220730 +epoch: 1, batch: 16700, sum loss: 4029.358887, avg loss: 2.691622, ppl: 14.755597 +epoch: 1, batch: 16701, sum loss: 4629.251953, avg loss: 2.662019, ppl: 14.325190 +epoch: 1, batch: 16702, sum loss: 5091.917969, avg loss: 2.843059, ppl: 17.168196 +epoch: 1, batch: 16703, sum loss: 4783.239746, avg loss: 2.705452, ppl: 14.961084 +epoch: 1, batch: 16704, sum loss: 5466.030762, avg loss: 3.001664, ppl: 20.118996 +epoch: 1, batch: 16705, sum loss: 4664.556641, avg loss: 2.591420, ppl: 13.348719 +epoch: 1, batch: 16706, sum loss: 4788.427734, avg loss: 2.818380, ppl: 16.749697 +epoch: 1, batch: 16707, sum loss: 5112.072754, avg loss: 2.901290, ppl: 18.197603 +epoch: 1, batch: 16708, sum loss: 4457.191895, avg loss: 2.525321, ppl: 12.494908 +epoch: 1, batch: 16709, sum loss: 4113.281250, avg loss: 2.672697, ppl: 14.478971 +epoch: 1, batch: 16710, sum loss: 3622.057129, avg loss: 2.827523, ppl: 16.903540 +epoch: 1, batch: 16711, sum loss: 4623.965820, avg loss: 2.688352, ppl: 14.707419 +epoch: 1, batch: 16712, sum loss: 4823.637695, avg loss: 2.802811, ppl: 16.490936 +epoch: 1, batch: 16713, sum loss: 6029.517090, avg loss: 3.163440, ppl: 23.651817 +epoch: 1, batch: 16714, sum loss: 5421.775391, avg loss: 2.954646, ppl: 19.194929 +epoch: 1, batch: 16715, sum loss: 4950.766113, avg loss: 2.750426, ppl: 15.649290 +epoch: 1, batch: 16716, sum loss: 4677.477539, avg loss: 2.848647, ppl: 17.264400 +epoch: 1, batch: 16717, sum loss: 6154.747070, avg loss: 2.970438, ppl: 19.500454 +epoch: 1, batch: 16718, sum loss: 5263.999023, avg loss: 2.757464, ppl: 15.759828 +epoch: 1, batch: 16719, sum loss: 4321.529785, avg loss: 2.651245, ppl: 14.171677 +epoch: 1, batch: 16720, sum loss: 6105.069824, avg loss: 3.145322, ppl: 23.227150 +epoch: 1, batch: 16721, sum loss: 3558.998047, avg loss: 2.355392, ppl: 10.542266 +epoch: 1, batch: 16722, sum loss: 4288.799805, avg loss: 2.719594, ppl: 15.174160 +epoch: 1, batch: 16723, sum loss: 4017.835938, avg loss: 2.509579, ppl: 12.299753 +epoch: 1, batch: 16724, sum loss: 4969.274902, avg loss: 3.080766, ppl: 21.775070 +epoch: 1, batch: 16725, sum loss: 4692.882324, avg loss: 2.531220, ppl: 12.568833 +epoch: 1, batch: 16726, sum loss: 4224.810059, avg loss: 2.711688, ppl: 15.054670 +epoch: 1, batch: 16727, sum loss: 4664.962891, avg loss: 2.650547, ppl: 14.161783 +epoch: 1, batch: 16728, sum loss: 4750.012207, avg loss: 2.805678, ppl: 16.538279 +epoch: 1, batch: 16729, sum loss: 4517.986816, avg loss: 2.451431, ppl: 11.604939 +epoch: 1, batch: 16730, sum loss: 3722.450195, avg loss: 2.595851, ppl: 13.407992 +epoch: 1, batch: 16731, sum loss: 4972.058594, avg loss: 3.158868, ppl: 23.543930 +epoch: 1, batch: 16732, sum loss: 3775.846191, avg loss: 2.411140, ppl: 11.146666 +epoch: 1, batch: 16733, sum loss: 4193.707520, avg loss: 2.660982, ppl: 14.310333 +epoch: 1, batch: 16734, sum loss: 5037.842285, avg loss: 2.711433, ppl: 15.050827 +epoch: 1, batch: 16735, sum loss: 4566.664062, avg loss: 2.752661, ppl: 15.684308 +epoch: 1, batch: 16736, sum loss: 5140.132812, avg loss: 2.861989, ppl: 17.496302 +epoch: 1, batch: 16737, sum loss: 5032.491211, avg loss: 2.875709, ppl: 17.738001 +epoch: 1, batch: 16738, sum loss: 3306.666504, avg loss: 2.361905, ppl: 10.611142 +epoch: 1, batch: 16739, sum loss: 4210.297363, avg loss: 2.543986, ppl: 12.730317 +epoch: 1, batch: 16740, sum loss: 4917.253906, avg loss: 2.728776, ppl: 15.314127 +epoch: 1, batch: 16741, sum loss: 4655.904785, avg loss: 2.832059, ppl: 16.980387 +epoch: 1, batch: 16742, sum loss: 5031.812500, avg loss: 2.592381, ppl: 13.361554 +epoch: 1, batch: 16743, sum loss: 5102.008301, avg loss: 2.961119, ppl: 19.319582 +epoch: 1, batch: 16744, sum loss: 4657.296875, avg loss: 2.726754, ppl: 15.283204 +epoch: 1, batch: 16745, sum loss: 4239.075684, avg loss: 2.526267, ppl: 12.506729 +epoch: 1, batch: 16746, sum loss: 4269.127441, avg loss: 2.564041, ppl: 12.988193 +epoch: 1, batch: 16747, sum loss: 3928.794434, avg loss: 2.562814, ppl: 12.972276 +epoch: 1, batch: 16748, sum loss: 4259.858887, avg loss: 2.629543, ppl: 13.867425 +epoch: 1, batch: 16749, sum loss: 5783.080078, avg loss: 3.004197, ppl: 20.170021 +epoch: 1, batch: 16750, sum loss: 4181.932129, avg loss: 2.448438, ppl: 11.570259 +epoch: 1, batch: 16751, sum loss: 4383.919922, avg loss: 2.419382, ppl: 11.238910 +epoch: 1, batch: 16752, sum loss: 5207.248047, avg loss: 2.873757, ppl: 17.703407 +epoch: 1, batch: 16753, sum loss: 4204.559570, avg loss: 2.565320, ppl: 13.004820 +epoch: 1, batch: 16754, sum loss: 4434.954102, avg loss: 2.707542, ppl: 14.992378 +epoch: 1, batch: 16755, sum loss: 4456.541504, avg loss: 2.802856, ppl: 16.491684 +epoch: 1, batch: 16756, sum loss: 3705.193359, avg loss: 2.453770, ppl: 11.632122 +epoch: 1, batch: 16757, sum loss: 3266.548340, avg loss: 2.290707, ppl: 9.881923 +epoch: 1, batch: 16758, sum loss: 4016.047363, avg loss: 2.816303, ppl: 16.714933 +epoch: 1, batch: 16759, sum loss: 4385.529297, avg loss: 2.508884, ppl: 12.291207 +epoch: 1, batch: 16760, sum loss: 4055.803711, avg loss: 2.486698, ppl: 12.021511 +epoch: 1, batch: 16761, sum loss: 3932.895752, avg loss: 2.525945, ppl: 12.502701 +epoch: 1, batch: 16762, sum loss: 4220.815430, avg loss: 2.319129, ppl: 10.166820 +epoch: 1, batch: 16763, sum loss: 5262.320312, avg loss: 2.811069, ppl: 16.627680 +epoch: 1, batch: 16764, sum loss: 4879.671387, avg loss: 2.732179, ppl: 15.366333 +epoch: 1, batch: 16765, sum loss: 4671.367676, avg loss: 2.759225, ppl: 15.787601 +epoch: 1, batch: 16766, sum loss: 4002.467041, avg loss: 2.588918, ppl: 13.315356 +epoch: 1, batch: 16767, sum loss: 4505.987793, avg loss: 2.664688, ppl: 14.363472 +epoch: 1, batch: 16768, sum loss: 5145.387207, avg loss: 2.975933, ppl: 19.607901 +epoch: 1, batch: 16769, sum loss: 4045.015137, avg loss: 2.547239, ppl: 12.771790 +epoch: 1, batch: 16770, sum loss: 4865.223145, avg loss: 2.830264, ppl: 16.949928 +epoch: 1, batch: 16771, sum loss: 4497.323730, avg loss: 2.648601, ppl: 14.134245 +epoch: 1, batch: 16772, sum loss: 4214.630859, avg loss: 2.523731, ppl: 12.475055 +epoch: 1, batch: 16773, sum loss: 4149.144531, avg loss: 2.534603, ppl: 12.611419 +epoch: 1, batch: 16774, sum loss: 4354.959961, avg loss: 2.481459, ppl: 11.958695 +epoch: 1, batch: 16775, sum loss: 3855.569580, avg loss: 2.698089, ppl: 14.851329 +epoch: 1, batch: 16776, sum loss: 4196.173340, avg loss: 2.428341, ppl: 11.340055 +epoch: 1, batch: 16777, sum loss: 5340.069336, avg loss: 2.809084, ppl: 16.594717 +epoch: 1, batch: 16778, sum loss: 5054.358887, avg loss: 2.831574, ppl: 16.972149 +epoch: 1, batch: 16779, sum loss: 4715.557617, avg loss: 2.689993, ppl: 14.731575 +epoch: 1, batch: 16780, sum loss: 4441.620605, avg loss: 2.635977, ppl: 13.956936 +epoch: 1, batch: 16781, sum loss: 4422.990234, avg loss: 2.728556, ppl: 15.310768 +epoch: 1, batch: 16782, sum loss: 4002.031250, avg loss: 2.585292, ppl: 13.267158 +epoch: 1, batch: 16783, sum loss: 3768.997070, avg loss: 2.461788, ppl: 11.725758 +epoch: 1, batch: 16784, sum loss: 4617.272949, avg loss: 2.716043, ppl: 15.120373 +epoch: 1, batch: 16785, sum loss: 4200.847656, avg loss: 2.532156, ppl: 12.580606 +epoch: 1, batch: 16786, sum loss: 3708.818848, avg loss: 2.494162, ppl: 12.111578 +epoch: 1, batch: 16787, sum loss: 4949.200684, avg loss: 2.642392, ppl: 14.046769 +epoch: 1, batch: 16788, sum loss: 5493.723145, avg loss: 2.852401, ppl: 17.329332 +epoch: 1, batch: 16789, sum loss: 4429.375488, avg loss: 2.642826, ppl: 14.052855 +epoch: 1, batch: 16790, sum loss: 4686.442383, avg loss: 2.535954, ppl: 12.628469 +epoch: 1, batch: 16791, sum loss: 4727.327148, avg loss: 2.820601, ppl: 16.786936 +epoch: 1, batch: 16792, sum loss: 4844.553711, avg loss: 2.760429, ppl: 15.806628 +epoch: 1, batch: 16793, sum loss: 4359.503906, avg loss: 2.701056, ppl: 14.895450 +epoch: 1, batch: 16794, sum loss: 5381.474609, avg loss: 3.028404, ppl: 20.664236 +epoch: 1, batch: 16795, sum loss: 4724.189453, avg loss: 2.748220, ppl: 15.614808 +epoch: 1, batch: 16796, sum loss: 4012.057129, avg loss: 2.487326, ppl: 12.029069 +epoch: 1, batch: 16797, sum loss: 4979.132324, avg loss: 2.549479, ppl: 12.800433 +epoch: 1, batch: 16798, sum loss: 5017.591309, avg loss: 2.759951, ppl: 15.799070 +epoch: 1, batch: 16799, sum loss: 4733.099121, avg loss: 2.677092, ppl: 14.542746 +epoch: 1, batch: 16800, sum loss: 4478.195801, avg loss: 2.681554, ppl: 14.607780 +epoch: 1, batch: 16801, sum loss: 5167.967773, avg loss: 3.223935, ppl: 25.126797 +epoch: 1, batch: 16802, sum loss: 3675.269287, avg loss: 2.478267, ppl: 11.920585 +epoch: 1, batch: 16803, sum loss: 3949.655518, avg loss: 2.573066, ppl: 13.105940 +epoch: 1, batch: 16804, sum loss: 5204.863770, avg loss: 2.684303, ppl: 14.647992 +epoch: 1, batch: 16805, sum loss: 4644.288574, avg loss: 2.761170, ppl: 15.818346 +epoch: 1, batch: 16806, sum loss: 4651.995117, avg loss: 3.018816, ppl: 20.467039 +epoch: 1, batch: 16807, sum loss: 3883.335449, avg loss: 2.543114, ppl: 12.719219 +epoch: 1, batch: 16808, sum loss: 4665.039551, avg loss: 2.861987, ppl: 17.496264 +epoch: 1, batch: 16809, sum loss: 4856.840820, avg loss: 2.773753, ppl: 16.018635 +epoch: 1, batch: 16810, sum loss: 5098.901855, avg loss: 2.897103, ppl: 18.121576 +epoch: 1, batch: 16811, sum loss: 4611.208008, avg loss: 2.835921, ppl: 17.046097 +epoch: 1, batch: 16812, sum loss: 5402.804199, avg loss: 2.745327, ppl: 15.569712 +epoch: 1, batch: 16813, sum loss: 3994.071289, avg loss: 2.545616, ppl: 12.751080 +epoch: 1, batch: 16814, sum loss: 5097.386719, avg loss: 2.929533, ppl: 18.718878 +epoch: 1, batch: 16815, sum loss: 4095.421875, avg loss: 2.419033, ppl: 11.234985 +epoch: 1, batch: 16816, sum loss: 4062.589844, avg loss: 2.413898, ppl: 11.177444 +epoch: 1, batch: 16817, sum loss: 4285.028320, avg loss: 2.651626, ppl: 14.177076 +epoch: 1, batch: 16818, sum loss: 5248.821777, avg loss: 2.907934, ppl: 18.318920 +epoch: 1, batch: 16819, sum loss: 4299.186523, avg loss: 2.683637, ppl: 14.638234 +epoch: 1, batch: 16820, sum loss: 4240.126465, avg loss: 2.776769, ppl: 16.067026 +epoch: 1, batch: 16821, sum loss: 5007.435059, avg loss: 2.731825, ppl: 15.360893 +epoch: 1, batch: 16822, sum loss: 4808.187500, avg loss: 2.856915, ppl: 17.407738 +epoch: 1, batch: 16823, sum loss: 4775.168457, avg loss: 2.874876, ppl: 17.723219 +epoch: 1, batch: 16824, sum loss: 4962.002930, avg loss: 2.786077, ppl: 16.217276 +epoch: 1, batch: 16825, sum loss: 4537.352539, avg loss: 2.987066, ppl: 19.827415 +epoch: 1, batch: 16826, sum loss: 5130.455566, avg loss: 2.866176, ppl: 17.569706 +epoch: 1, batch: 16827, sum loss: 4322.968262, avg loss: 2.551929, ppl: 12.831836 +epoch: 1, batch: 16828, sum loss: 4652.629395, avg loss: 2.838700, ppl: 17.093531 +epoch: 1, batch: 16829, sum loss: 5151.591797, avg loss: 2.793705, ppl: 16.341454 +epoch: 1, batch: 16830, sum loss: 3913.445312, avg loss: 2.725240, ppl: 15.260083 +epoch: 1, batch: 16831, sum loss: 4330.803223, avg loss: 2.640734, ppl: 14.023489 +epoch: 1, batch: 16832, sum loss: 3796.977051, avg loss: 2.481684, ppl: 11.961396 +epoch: 1, batch: 16833, sum loss: 4197.627441, avg loss: 2.754349, ppl: 15.710806 +epoch: 1, batch: 16834, sum loss: 4560.468750, avg loss: 2.855647, ppl: 17.385687 +epoch: 1, batch: 16835, sum loss: 5942.505859, avg loss: 3.064727, ppl: 21.428612 +epoch: 1, batch: 16836, sum loss: 4477.993652, avg loss: 2.555933, ppl: 12.883308 +epoch: 1, batch: 16837, sum loss: 4022.169434, avg loss: 2.775825, ppl: 16.051857 +epoch: 1, batch: 16838, sum loss: 3529.702637, avg loss: 2.319121, ppl: 10.166738 +epoch: 1, batch: 16839, sum loss: 4510.452637, avg loss: 2.664178, ppl: 14.356139 +epoch: 1, batch: 16840, sum loss: 5013.850586, avg loss: 2.651428, ppl: 14.174268 +epoch: 1, batch: 16841, sum loss: 5013.082520, avg loss: 2.775793, ppl: 16.051355 +epoch: 1, batch: 16842, sum loss: 3407.298340, avg loss: 2.512757, ppl: 12.338900 +epoch: 1, batch: 16843, sum loss: 5697.596680, avg loss: 2.997158, ppl: 20.028526 +epoch: 1, batch: 16844, sum loss: 4114.841797, avg loss: 2.639411, ppl: 14.004955 +epoch: 1, batch: 16845, sum loss: 4012.763428, avg loss: 2.585544, ppl: 13.270501 +epoch: 1, batch: 16846, sum loss: 4730.189453, avg loss: 2.651452, ppl: 14.174600 +epoch: 1, batch: 16847, sum loss: 3699.817871, avg loss: 2.352077, ppl: 10.507376 +epoch: 1, batch: 16848, sum loss: 4003.336914, avg loss: 2.481920, ppl: 11.964213 +epoch: 1, batch: 16849, sum loss: 3778.852051, avg loss: 2.505870, ppl: 12.254217 +epoch: 1, batch: 16850, sum loss: 5120.226562, avg loss: 2.697696, ppl: 14.845488 +epoch: 1, batch: 16851, sum loss: 5498.188477, avg loss: 2.729984, ppl: 15.332646 +epoch: 1, batch: 16852, sum loss: 3871.455322, avg loss: 2.600037, ppl: 13.464237 +epoch: 1, batch: 16853, sum loss: 4112.663086, avg loss: 2.513853, ppl: 12.352430 +epoch: 1, batch: 16854, sum loss: 5817.687500, avg loss: 2.870097, ppl: 17.638737 +epoch: 1, batch: 16855, sum loss: 5251.074219, avg loss: 2.796099, ppl: 16.380625 +epoch: 1, batch: 16856, sum loss: 4678.967285, avg loss: 2.658504, ppl: 14.274918 +epoch: 1, batch: 16857, sum loss: 5804.321289, avg loss: 3.023084, ppl: 20.554588 +epoch: 1, batch: 16858, sum loss: 3933.982422, avg loss: 2.454138, ppl: 11.636394 +epoch: 1, batch: 16859, sum loss: 4744.950195, avg loss: 2.886223, ppl: 17.925474 +epoch: 1, batch: 16860, sum loss: 3927.408936, avg loss: 2.712299, ppl: 15.063869 +epoch: 1, batch: 16861, sum loss: 4693.689453, avg loss: 2.457429, ppl: 11.674759 +epoch: 1, batch: 16862, sum loss: 4690.046387, avg loss: 2.811779, ppl: 16.639486 +epoch: 1, batch: 16863, sum loss: 5582.150391, avg loss: 2.833579, ppl: 17.006218 +epoch: 1, batch: 16864, sum loss: 4138.754883, avg loss: 2.599720, ppl: 13.459975 +epoch: 1, batch: 16865, sum loss: 5118.406738, avg loss: 2.809224, ppl: 16.597040 +epoch: 1, batch: 16866, sum loss: 5143.991211, avg loss: 2.665281, ppl: 14.371982 +epoch: 1, batch: 16867, sum loss: 4908.717285, avg loss: 2.825974, ppl: 16.877378 +epoch: 1, batch: 16868, sum loss: 4003.558838, avg loss: 2.391612, ppl: 10.931104 +epoch: 1, batch: 16869, sum loss: 4374.040527, avg loss: 2.641329, ppl: 14.031837 +epoch: 1, batch: 16870, sum loss: 4823.083496, avg loss: 2.912490, ppl: 18.402561 +epoch: 1, batch: 16871, sum loss: 4893.868164, avg loss: 2.841968, ppl: 17.149475 +epoch: 1, batch: 16872, sum loss: 5136.983398, avg loss: 2.915428, ppl: 18.456705 +epoch: 1, batch: 16873, sum loss: 4299.359375, avg loss: 2.580648, ppl: 13.205688 +epoch: 1, batch: 16874, sum loss: 4566.271484, avg loss: 2.727761, ppl: 15.298595 +epoch: 1, batch: 16875, sum loss: 4900.661621, avg loss: 2.698602, ppl: 14.858948 +epoch: 1, batch: 16876, sum loss: 4763.635742, avg loss: 2.583317, ppl: 13.240980 +epoch: 1, batch: 16877, sum loss: 4024.764160, avg loss: 2.553784, ppl: 12.855662 +epoch: 1, batch: 16878, sum loss: 4370.234375, avg loss: 2.712746, ppl: 15.070608 +epoch: 1, batch: 16879, sum loss: 4048.310303, avg loss: 2.457990, ppl: 11.681314 +epoch: 1, batch: 16880, sum loss: 5308.073730, avg loss: 2.873890, ppl: 17.705753 +epoch: 1, batch: 16881, sum loss: 4987.468262, avg loss: 2.809841, ppl: 16.607281 +epoch: 1, batch: 16882, sum loss: 4081.174561, avg loss: 2.653559, ppl: 14.204509 +epoch: 1, batch: 16883, sum loss: 4808.550293, avg loss: 2.781116, ppl: 16.137028 +epoch: 1, batch: 16884, sum loss: 5760.347656, avg loss: 3.093635, ppl: 22.057102 +epoch: 1, batch: 16885, sum loss: 5303.789551, avg loss: 2.868464, ppl: 17.609943 +epoch: 1, batch: 16886, sum loss: 5126.847168, avg loss: 3.082891, ppl: 21.821392 +epoch: 1, batch: 16887, sum loss: 4314.496582, avg loss: 2.419796, ppl: 11.243568 +epoch: 1, batch: 16888, sum loss: 4065.754150, avg loss: 2.601250, ppl: 13.480580 +epoch: 1, batch: 16889, sum loss: 4734.730469, avg loss: 2.693248, ppl: 14.779606 +epoch: 1, batch: 16890, sum loss: 5266.689941, avg loss: 2.885858, ppl: 17.918928 +epoch: 1, batch: 16891, sum loss: 4606.734863, avg loss: 2.733967, ppl: 15.393838 +epoch: 1, batch: 16892, sum loss: 5202.445801, avg loss: 2.839763, ppl: 17.111708 +epoch: 1, batch: 16893, sum loss: 5199.001953, avg loss: 2.953978, ppl: 19.182114 +epoch: 1, batch: 16894, sum loss: 5701.915039, avg loss: 2.878301, ppl: 17.784035 +epoch: 1, batch: 16895, sum loss: 4303.100586, avg loss: 2.682731, ppl: 14.624982 +epoch: 1, batch: 16896, sum loss: 4360.499023, avg loss: 2.631562, ppl: 13.895464 +epoch: 1, batch: 16897, sum loss: 4341.835449, avg loss: 2.636209, ppl: 13.960174 +epoch: 1, batch: 16898, sum loss: 5553.069336, avg loss: 2.963217, ppl: 19.360159 +epoch: 1, batch: 16899, sum loss: 6081.591797, avg loss: 3.134841, ppl: 22.984985 +epoch: 1, batch: 16900, sum loss: 4342.492188, avg loss: 2.620695, ppl: 13.745278 +epoch: 1, batch: 16901, sum loss: 4964.286133, avg loss: 2.848127, ppl: 17.255438 +epoch: 1, batch: 16902, sum loss: 4885.945312, avg loss: 2.774529, ppl: 16.031071 +epoch: 1, batch: 16903, sum loss: 4262.334961, avg loss: 2.685781, ppl: 14.669661 +epoch: 1, batch: 16904, sum loss: 4214.803711, avg loss: 2.624411, ppl: 13.796440 +epoch: 1, batch: 16905, sum loss: 5295.229492, avg loss: 2.958229, ppl: 19.263817 +epoch: 1, batch: 16906, sum loss: 4696.727051, avg loss: 2.780774, ppl: 16.131500 +epoch: 1, batch: 16907, sum loss: 4790.879395, avg loss: 2.783777, ppl: 16.180010 +epoch: 1, batch: 16908, sum loss: 5150.812988, avg loss: 2.799355, ppl: 16.434040 +epoch: 1, batch: 16909, sum loss: 4183.091797, avg loss: 2.566314, ppl: 13.017753 +epoch: 1, batch: 16910, sum loss: 4174.708984, avg loss: 2.594599, ppl: 13.391211 +epoch: 1, batch: 16911, sum loss: 5357.362305, avg loss: 3.016533, ppl: 20.420370 +epoch: 1, batch: 16912, sum loss: 5212.637207, avg loss: 2.883096, ppl: 17.869507 +epoch: 1, batch: 16913, sum loss: 3667.643066, avg loss: 2.425690, ppl: 11.310030 +epoch: 1, batch: 16914, sum loss: 5647.085938, avg loss: 2.860732, ppl: 17.474319 +epoch: 1, batch: 16915, sum loss: 4582.000977, avg loss: 2.650087, ppl: 14.155275 +epoch: 1, batch: 16916, sum loss: 5600.023926, avg loss: 2.947381, ppl: 19.055986 +epoch: 1, batch: 16917, sum loss: 4219.988770, avg loss: 2.499993, ppl: 12.182413 +epoch: 1, batch: 16918, sum loss: 4433.488281, avg loss: 2.750303, ppl: 15.647369 +epoch: 1, batch: 16919, sum loss: 4641.899414, avg loss: 2.678534, ppl: 14.563725 +epoch: 1, batch: 16920, sum loss: 4824.029785, avg loss: 2.636082, ppl: 13.958406 +epoch: 1, batch: 16921, sum loss: 5015.496094, avg loss: 2.780208, ppl: 16.122379 +epoch: 1, batch: 16922, sum loss: 5690.452637, avg loss: 2.853788, ppl: 17.353386 +epoch: 1, batch: 16923, sum loss: 5378.070312, avg loss: 3.059198, ppl: 21.310463 +epoch: 1, batch: 16924, sum loss: 4465.365234, avg loss: 2.588618, ppl: 13.311357 +epoch: 1, batch: 16925, sum loss: 4236.502930, avg loss: 2.703576, ppl: 14.933035 +epoch: 1, batch: 16926, sum loss: 5394.302246, avg loss: 2.762059, ppl: 15.832416 +epoch: 1, batch: 16927, sum loss: 6006.286133, avg loss: 3.033478, ppl: 20.769333 +epoch: 1, batch: 16928, sum loss: 3633.699951, avg loss: 2.333783, ppl: 10.316895 +epoch: 1, batch: 16929, sum loss: 4231.157227, avg loss: 2.656094, ppl: 14.240551 +epoch: 1, batch: 16930, sum loss: 3981.216309, avg loss: 2.485154, ppl: 12.002965 +epoch: 1, batch: 16931, sum loss: 4484.643555, avg loss: 2.722916, ppl: 15.224658 +epoch: 1, batch: 16932, sum loss: 4894.834473, avg loss: 3.010353, ppl: 20.294569 +epoch: 1, batch: 16933, sum loss: 4430.790039, avg loss: 2.612494, ppl: 13.633013 +epoch: 1, batch: 16934, sum loss: 5665.009277, avg loss: 3.063823, ppl: 21.409254 +epoch: 1, batch: 16935, sum loss: 4057.991211, avg loss: 2.747455, ppl: 15.602878 +epoch: 1, batch: 16936, sum loss: 4481.375488, avg loss: 2.605451, ppl: 13.537328 +epoch: 1, batch: 16937, sum loss: 4528.610840, avg loss: 2.749612, ppl: 15.636561 +epoch: 1, batch: 16938, sum loss: 4953.004883, avg loss: 2.617867, ppl: 13.706460 +epoch: 1, batch: 16939, sum loss: 4358.296387, avg loss: 2.765416, ppl: 15.885653 +epoch: 1, batch: 16940, sum loss: 4651.142578, avg loss: 2.668470, ppl: 14.417888 +epoch: 1, batch: 16941, sum loss: 4415.149902, avg loss: 2.672609, ppl: 14.477694 +epoch: 1, batch: 16942, sum loss: 4679.423828, avg loss: 2.752602, ppl: 15.683393 +epoch: 1, batch: 16943, sum loss: 4606.505371, avg loss: 2.708116, ppl: 15.000988 +epoch: 1, batch: 16944, sum loss: 4329.871094, avg loss: 2.886581, ppl: 17.931890 +epoch: 1, batch: 16945, sum loss: 4383.319336, avg loss: 2.899021, ppl: 18.156361 +epoch: 1, batch: 16946, sum loss: 4102.079102, avg loss: 2.555812, ppl: 12.881760 +epoch: 1, batch: 16947, sum loss: 3984.114990, avg loss: 2.502585, ppl: 12.214023 +epoch: 1, batch: 16948, sum loss: 4196.916016, avg loss: 2.619798, ppl: 13.732945 +epoch: 1, batch: 16949, sum loss: 4027.811523, avg loss: 2.674510, ppl: 14.505238 +epoch: 1, batch: 16950, sum loss: 5612.351074, avg loss: 3.135391, ppl: 22.997637 +epoch: 1, batch: 16951, sum loss: 3814.579102, avg loss: 2.567011, ppl: 13.026834 +epoch: 1, batch: 16952, sum loss: 3579.793457, avg loss: 2.508615, ppl: 12.287900 +epoch: 1, batch: 16953, sum loss: 6027.971191, avg loss: 2.893889, ppl: 18.063425 +epoch: 1, batch: 16954, sum loss: 4655.277832, avg loss: 2.720794, ppl: 15.192376 +epoch: 1, batch: 16955, sum loss: 4011.916992, avg loss: 2.622168, ppl: 13.765535 +epoch: 1, batch: 16956, sum loss: 5045.768066, avg loss: 2.888247, ppl: 17.961800 +epoch: 1, batch: 16957, sum loss: 4626.701172, avg loss: 2.625824, ppl: 13.815950 +epoch: 1, batch: 16958, sum loss: 4774.102539, avg loss: 2.742161, ppl: 15.520493 +epoch: 1, batch: 16959, sum loss: 4610.341797, avg loss: 2.812899, ppl: 16.658142 +epoch: 1, batch: 16960, sum loss: 5497.417969, avg loss: 2.772273, ppl: 15.994954 +epoch: 1, batch: 16961, sum loss: 4507.424805, avg loss: 2.730118, ppl: 15.334697 +epoch: 1, batch: 16962, sum loss: 3759.127441, avg loss: 2.576509, ppl: 13.151154 +epoch: 1, batch: 16963, sum loss: 4488.951660, avg loss: 2.643670, ppl: 14.064728 +epoch: 1, batch: 16964, sum loss: 4331.934570, avg loss: 2.609599, ppl: 13.593600 +epoch: 1, batch: 16965, sum loss: 4682.906250, avg loss: 2.594408, ppl: 13.388657 +epoch: 1, batch: 16966, sum loss: 5305.161621, avg loss: 3.009167, ppl: 20.270512 +epoch: 1, batch: 16967, sum loss: 4269.227539, avg loss: 2.489345, ppl: 12.053382 +epoch: 1, batch: 16968, sum loss: 5371.867676, avg loss: 2.922670, ppl: 18.590860 +epoch: 1, batch: 16969, sum loss: 4266.094238, avg loss: 2.638277, ppl: 13.989083 +epoch: 1, batch: 16970, sum loss: 4402.719727, avg loss: 2.661862, ppl: 14.322935 +epoch: 1, batch: 16971, sum loss: 4711.281738, avg loss: 2.693700, ppl: 14.786288 +epoch: 1, batch: 16972, sum loss: 4544.395020, avg loss: 2.903767, ppl: 18.242729 +epoch: 1, batch: 16973, sum loss: 4340.050781, avg loss: 2.832931, ppl: 16.995205 +epoch: 1, batch: 16974, sum loss: 4755.645996, avg loss: 2.649385, ppl: 14.145336 +epoch: 1, batch: 16975, sum loss: 4834.167969, avg loss: 2.775068, ppl: 16.039711 +epoch: 1, batch: 16976, sum loss: 4272.586426, avg loss: 2.707596, ppl: 14.993186 +epoch: 1, batch: 16977, sum loss: 5046.145020, avg loss: 2.905092, ppl: 18.266928 +epoch: 1, batch: 16978, sum loss: 4760.222656, avg loss: 2.796840, ppl: 16.392771 +epoch: 1, batch: 16979, sum loss: 4784.950195, avg loss: 2.630539, ppl: 13.881249 +epoch: 1, batch: 16980, sum loss: 5310.415527, avg loss: 2.851995, ppl: 17.322313 +epoch: 1, batch: 16981, sum loss: 4179.574219, avg loss: 2.845183, ppl: 17.204710 +epoch: 1, batch: 16982, sum loss: 5236.304688, avg loss: 3.012834, ppl: 20.344967 +epoch: 1, batch: 16983, sum loss: 5348.963867, avg loss: 2.885094, ppl: 17.905245 +epoch: 1, batch: 16984, sum loss: 4765.805176, avg loss: 2.765993, ppl: 15.894810 +epoch: 1, batch: 16985, sum loss: 5649.432617, avg loss: 3.133351, ppl: 22.950766 +epoch: 1, batch: 16986, sum loss: 4651.567871, avg loss: 2.523911, ppl: 12.477300 +epoch: 1, batch: 16987, sum loss: 4815.062500, avg loss: 2.946795, ppl: 19.044813 +epoch: 1, batch: 16988, sum loss: 4417.345215, avg loss: 2.626246, ppl: 13.821782 +epoch: 1, batch: 16989, sum loss: 4978.529297, avg loss: 2.817504, ppl: 16.735027 +epoch: 1, batch: 16990, sum loss: 3880.898682, avg loss: 2.518429, ppl: 12.409084 +epoch: 1, batch: 16991, sum loss: 4463.759277, avg loss: 2.887296, ppl: 17.944715 +epoch: 1, batch: 16992, sum loss: 4572.977051, avg loss: 2.761460, ppl: 15.822921 +epoch: 1, batch: 16993, sum loss: 5366.152832, avg loss: 2.958188, ppl: 19.263037 +epoch: 1, batch: 16994, sum loss: 4714.542480, avg loss: 2.823079, ppl: 16.828592 +epoch: 1, batch: 16995, sum loss: 5972.142578, avg loss: 3.113734, ppl: 22.504932 +epoch: 1, batch: 16996, sum loss: 3866.714111, avg loss: 2.415187, ppl: 11.191862 +epoch: 1, batch: 16997, sum loss: 4575.602051, avg loss: 2.579257, ppl: 13.187337 +epoch: 1, batch: 16998, sum loss: 4923.069336, avg loss: 2.907897, ppl: 18.318235 +epoch: 1, batch: 16999, sum loss: 3876.355469, avg loss: 2.635184, ppl: 13.945875 +epoch: 1, batch: 17000, sum loss: 4981.900879, avg loss: 2.760056, ppl: 15.800724 +epoch: 1, batch: 17001, sum loss: 5230.968262, avg loss: 2.816892, ppl: 16.724783 +epoch: 1, batch: 17002, sum loss: 4617.610352, avg loss: 2.758429, ppl: 15.775045 +epoch: 1, batch: 17003, sum loss: 4522.572754, avg loss: 2.739293, ppl: 15.476041 +epoch: 1, batch: 17004, sum loss: 5171.138184, avg loss: 2.839725, ppl: 17.111052 +epoch: 1, batch: 17005, sum loss: 4895.964355, avg loss: 2.753636, ppl: 15.699610 +epoch: 1, batch: 17006, sum loss: 5275.869141, avg loss: 3.028627, ppl: 20.668842 +epoch: 1, batch: 17007, sum loss: 4889.234863, avg loss: 2.620169, ppl: 13.738041 +epoch: 1, batch: 17008, sum loss: 4680.146973, avg loss: 2.897924, ppl: 18.136454 +epoch: 1, batch: 17009, sum loss: 4821.703125, avg loss: 2.814771, ppl: 16.689356 +epoch: 1, batch: 17010, sum loss: 5422.099609, avg loss: 2.682880, ppl: 14.627154 +epoch: 1, batch: 17011, sum loss: 4867.937988, avg loss: 2.549994, ppl: 12.807024 +epoch: 1, batch: 17012, sum loss: 5234.946289, avg loss: 2.786028, ppl: 16.216478 +epoch: 1, batch: 17013, sum loss: 4815.854004, avg loss: 2.755065, ppl: 15.722070 +epoch: 1, batch: 17014, sum loss: 4997.393066, avg loss: 2.865478, ppl: 17.557436 +epoch: 1, batch: 17015, sum loss: 4490.454590, avg loss: 2.526986, ppl: 12.515728 +epoch: 1, batch: 17016, sum loss: 4497.647949, avg loss: 2.866570, ppl: 17.576622 +epoch: 1, batch: 17017, sum loss: 4963.395508, avg loss: 2.950889, ppl: 19.122948 +epoch: 1, batch: 17018, sum loss: 4931.578125, avg loss: 2.735207, ppl: 15.412931 +epoch: 1, batch: 17019, sum loss: 4381.665039, avg loss: 2.676643, ppl: 14.536219 +epoch: 1, batch: 17020, sum loss: 3691.559570, avg loss: 2.528466, ppl: 12.534258 +epoch: 1, batch: 17021, sum loss: 3681.636719, avg loss: 2.556692, ppl: 12.893098 +epoch: 1, batch: 17022, sum loss: 4665.983398, avg loss: 2.867845, ppl: 17.599045 +epoch: 1, batch: 17023, sum loss: 5281.192383, avg loss: 2.757803, ppl: 15.765164 +epoch: 1, batch: 17024, sum loss: 5081.864258, avg loss: 2.656489, ppl: 14.246188 +epoch: 1, batch: 17025, sum loss: 4311.808594, avg loss: 2.597475, ppl: 13.429786 +epoch: 1, batch: 17026, sum loss: 5087.127930, avg loss: 2.892057, ppl: 18.030359 +epoch: 1, batch: 17027, sum loss: 4678.632324, avg loss: 2.606480, ppl: 13.551271 +epoch: 1, batch: 17028, sum loss: 4701.141113, avg loss: 2.575968, ppl: 13.144032 +epoch: 1, batch: 17029, sum loss: 4274.156250, avg loss: 2.628632, ppl: 13.854808 +epoch: 1, batch: 17030, sum loss: 4730.961914, avg loss: 2.782919, ppl: 16.166140 +epoch: 1, batch: 17031, sum loss: 4258.479492, avg loss: 2.856123, ppl: 17.393963 +epoch: 1, batch: 17032, sum loss: 5326.827637, avg loss: 2.865426, ppl: 17.556536 +epoch: 1, batch: 17033, sum loss: 4575.662598, avg loss: 2.984777, ppl: 19.782085 +epoch: 1, batch: 17034, sum loss: 4852.114258, avg loss: 2.673341, ppl: 14.488297 +epoch: 1, batch: 17035, sum loss: 4549.516113, avg loss: 2.722631, ppl: 15.220314 +epoch: 1, batch: 17036, sum loss: 4098.791016, avg loss: 2.494699, ppl: 12.118089 +epoch: 1, batch: 17037, sum loss: 4273.049805, avg loss: 2.529929, ppl: 12.552614 +epoch: 1, batch: 17038, sum loss: 4973.443359, avg loss: 2.682548, ppl: 14.622297 +epoch: 1, batch: 17039, sum loss: 5139.356445, avg loss: 2.936775, ppl: 18.854940 +epoch: 1, batch: 17040, sum loss: 5849.640137, avg loss: 2.786870, ppl: 16.230141 +epoch: 1, batch: 17041, sum loss: 4342.682617, avg loss: 2.656075, ppl: 14.240286 +epoch: 1, batch: 17042, sum loss: 3928.591064, avg loss: 2.532941, ppl: 12.590476 +epoch: 1, batch: 17043, sum loss: 3623.777344, avg loss: 2.299351, ppl: 9.967711 +epoch: 1, batch: 17044, sum loss: 6460.328125, avg loss: 3.037296, ppl: 20.848787 +epoch: 1, batch: 17045, sum loss: 4851.334473, avg loss: 2.704200, ppl: 14.942355 +epoch: 1, batch: 17046, sum loss: 4427.374512, avg loss: 2.662282, ppl: 14.328947 +epoch: 1, batch: 17047, sum loss: 4063.333252, avg loss: 2.624892, ppl: 13.803086 +epoch: 1, batch: 17048, sum loss: 5057.906250, avg loss: 2.886933, ppl: 17.938206 +epoch: 1, batch: 17049, sum loss: 4672.616211, avg loss: 2.774713, ppl: 16.034021 +epoch: 1, batch: 17050, sum loss: 4808.215820, avg loss: 2.771306, ppl: 15.979490 +epoch: 1, batch: 17051, sum loss: 4609.653320, avg loss: 2.833223, ppl: 17.000158 +epoch: 1, batch: 17052, sum loss: 5100.304199, avg loss: 2.619571, ppl: 13.729832 +epoch: 1, batch: 17053, sum loss: 4247.942383, avg loss: 2.721296, ppl: 15.200003 +epoch: 1, batch: 17054, sum loss: 4854.866699, avg loss: 2.960284, ppl: 19.303463 +epoch: 1, batch: 17055, sum loss: 4682.315430, avg loss: 2.793744, ppl: 16.342096 +epoch: 1, batch: 17056, sum loss: 5151.656250, avg loss: 2.801336, ppl: 16.466629 +epoch: 1, batch: 17057, sum loss: 3734.995361, avg loss: 2.654581, ppl: 14.219028 +epoch: 1, batch: 17058, sum loss: 4227.484375, avg loss: 2.694381, ppl: 14.796360 +epoch: 1, batch: 17059, sum loss: 4822.459961, avg loss: 2.841756, ppl: 17.145849 +epoch: 1, batch: 17060, sum loss: 4384.937012, avg loss: 2.782320, ppl: 16.156469 +epoch: 1, batch: 17061, sum loss: 4323.599609, avg loss: 2.759158, ppl: 15.786540 +epoch: 1, batch: 17062, sum loss: 4183.994141, avg loss: 2.555892, ppl: 12.882780 +epoch: 1, batch: 17063, sum loss: 4165.498047, avg loss: 2.843343, ppl: 17.173084 +epoch: 1, batch: 17064, sum loss: 4459.208984, avg loss: 2.785265, ppl: 16.204111 +epoch: 1, batch: 17065, sum loss: 4252.610840, avg loss: 2.424521, ppl: 11.296822 +epoch: 1, batch: 17066, sum loss: 4932.204102, avg loss: 2.832972, ppl: 16.995899 +epoch: 1, batch: 17067, sum loss: 5158.343750, avg loss: 2.770324, ppl: 15.963809 +epoch: 1, batch: 17068, sum loss: 4818.262695, avg loss: 2.781907, ppl: 16.149786 +epoch: 1, batch: 17069, sum loss: 5213.905762, avg loss: 3.013818, ppl: 20.365011 +epoch: 1, batch: 17070, sum loss: 4856.136230, avg loss: 2.687402, ppl: 14.693457 +epoch: 1, batch: 17071, sum loss: 5332.258789, avg loss: 2.664797, ppl: 14.365034 +epoch: 1, batch: 17072, sum loss: 5090.794434, avg loss: 2.763732, ppl: 15.858921 +epoch: 1, batch: 17073, sum loss: 5108.483887, avg loss: 2.635957, ppl: 13.956659 +epoch: 1, batch: 17074, sum loss: 4175.432129, avg loss: 2.637670, ppl: 13.980595 +epoch: 1, batch: 17075, sum loss: 5085.943359, avg loss: 2.606839, ppl: 13.556138 +epoch: 1, batch: 17076, sum loss: 4283.538086, avg loss: 2.799698, ppl: 16.439684 +epoch: 1, batch: 17077, sum loss: 5357.309570, avg loss: 2.933905, ppl: 18.800898 +epoch: 1, batch: 17078, sum loss: 5661.609863, avg loss: 2.801390, ppl: 16.467527 +epoch: 1, batch: 17079, sum loss: 5150.397949, avg loss: 2.977108, ppl: 19.630972 +epoch: 1, batch: 17080, sum loss: 4771.935059, avg loss: 2.775995, ppl: 16.054594 +epoch: 1, batch: 17081, sum loss: 3963.213379, avg loss: 2.529173, ppl: 12.543124 +epoch: 1, batch: 17082, sum loss: 4100.755371, avg loss: 2.638839, ppl: 13.996941 +epoch: 1, batch: 17083, sum loss: 4660.809082, avg loss: 2.622853, ppl: 13.774961 +epoch: 1, batch: 17084, sum loss: 4598.721680, avg loss: 2.874201, ppl: 17.711267 +epoch: 1, batch: 17085, sum loss: 4206.675293, avg loss: 2.649040, ppl: 14.140457 +epoch: 1, batch: 17086, sum loss: 5567.530273, avg loss: 2.966186, ppl: 19.417711 +epoch: 1, batch: 17087, sum loss: 5174.192383, avg loss: 2.784818, ppl: 16.196873 +epoch: 1, batch: 17088, sum loss: 4018.315918, avg loss: 2.557808, ppl: 12.907492 +epoch: 1, batch: 17089, sum loss: 4419.635254, avg loss: 2.656031, ppl: 14.239658 +epoch: 1, batch: 17090, sum loss: 4681.882812, avg loss: 2.783521, ppl: 16.175879 +epoch: 1, batch: 17091, sum loss: 4878.000977, avg loss: 2.808291, ppl: 16.581551 +epoch: 1, batch: 17092, sum loss: 5151.858398, avg loss: 2.786294, ppl: 16.220802 +epoch: 1, batch: 17093, sum loss: 3699.989990, avg loss: 2.661863, ppl: 14.322952 +epoch: 1, batch: 17094, sum loss: 4624.894043, avg loss: 2.650369, ppl: 14.159262 +epoch: 1, batch: 17095, sum loss: 4484.223145, avg loss: 2.795650, ppl: 16.373276 +epoch: 1, batch: 17096, sum loss: 3853.543213, avg loss: 2.586271, ppl: 13.280155 +epoch: 1, batch: 17097, sum loss: 3866.510986, avg loss: 2.513986, ppl: 12.354079 +epoch: 1, batch: 17098, sum loss: 4350.215332, avg loss: 2.718884, ppl: 15.163398 +epoch: 1, batch: 17099, sum loss: 4544.554199, avg loss: 2.684320, ppl: 14.648244 +epoch: 1, batch: 17100, sum loss: 4310.352539, avg loss: 2.664000, ppl: 14.353593 +epoch: 1, batch: 17101, sum loss: 4810.307617, avg loss: 2.871826, ppl: 17.669247 +epoch: 1, batch: 17102, sum loss: 3632.577148, avg loss: 2.565379, ppl: 13.005591 +epoch: 1, batch: 17103, sum loss: 4404.436523, avg loss: 2.848924, ppl: 17.269188 +epoch: 1, batch: 17104, sum loss: 4235.936523, avg loss: 2.576604, ppl: 13.152392 +epoch: 1, batch: 17105, sum loss: 6211.610352, avg loss: 3.041925, ppl: 20.945518 +epoch: 1, batch: 17106, sum loss: 4832.634766, avg loss: 2.522252, ppl: 12.456618 +epoch: 1, batch: 17107, sum loss: 4230.326172, avg loss: 2.553003, ppl: 12.845622 +epoch: 1, batch: 17108, sum loss: 4655.193848, avg loss: 2.528622, ppl: 12.536224 +epoch: 1, batch: 17109, sum loss: 4283.885742, avg loss: 2.588451, ppl: 13.309135 +epoch: 1, batch: 17110, sum loss: 4404.171875, avg loss: 2.506643, ppl: 12.263692 +epoch: 1, batch: 17111, sum loss: 5720.586914, avg loss: 2.881908, ppl: 17.848289 +epoch: 1, batch: 17112, sum loss: 4313.549316, avg loss: 2.487629, ppl: 12.032715 +epoch: 1, batch: 17113, sum loss: 3907.659180, avg loss: 2.626115, ppl: 13.819976 +epoch: 1, batch: 17114, sum loss: 4598.009766, avg loss: 2.644054, ppl: 14.070127 +epoch: 1, batch: 17115, sum loss: 5296.541016, avg loss: 2.937627, ppl: 18.871010 +epoch: 1, batch: 17116, sum loss: 4991.910645, avg loss: 2.617677, ppl: 13.703858 +epoch: 1, batch: 17117, sum loss: 4618.514160, avg loss: 2.859761, ppl: 17.457354 +epoch: 1, batch: 17118, sum loss: 4775.971191, avg loss: 2.826018, ppl: 16.878124 +epoch: 1, batch: 17119, sum loss: 4848.662109, avg loss: 2.489046, ppl: 12.049779 +epoch: 1, batch: 17120, sum loss: 5392.354004, avg loss: 2.583783, ppl: 13.247153 +epoch: 1, batch: 17121, sum loss: 4106.354492, avg loss: 2.585866, ppl: 13.274776 +epoch: 1, batch: 17122, sum loss: 4688.267090, avg loss: 2.730499, ppl: 15.340544 +epoch: 1, batch: 17123, sum loss: 3464.176025, avg loss: 2.512093, ppl: 12.330709 +epoch: 1, batch: 17124, sum loss: 4942.511719, avg loss: 2.696406, ppl: 14.826348 +epoch: 1, batch: 17125, sum loss: 4507.774902, avg loss: 2.599639, ppl: 13.458884 +epoch: 1, batch: 17126, sum loss: 5017.399902, avg loss: 2.664578, ppl: 14.361883 +epoch: 1, batch: 17127, sum loss: 4385.312988, avg loss: 2.786094, ppl: 16.217558 +epoch: 1, batch: 17128, sum loss: 4386.210938, avg loss: 2.758623, ppl: 15.778104 +epoch: 1, batch: 17129, sum loss: 5025.682617, avg loss: 2.796707, ppl: 16.390587 +epoch: 1, batch: 17130, sum loss: 4891.113770, avg loss: 2.932323, ppl: 18.771175 +epoch: 1, batch: 17131, sum loss: 4202.392578, avg loss: 2.688671, ppl: 14.712109 +epoch: 1, batch: 17132, sum loss: 4779.160645, avg loss: 2.849827, ppl: 17.284796 +epoch: 1, batch: 17133, sum loss: 4497.850098, avg loss: 2.631861, ppl: 13.899610 +epoch: 1, batch: 17134, sum loss: 5395.221191, avg loss: 2.868273, ppl: 17.606581 +epoch: 1, batch: 17135, sum loss: 4058.435059, avg loss: 2.486786, ppl: 12.022578 +epoch: 1, batch: 17136, sum loss: 4646.064941, avg loss: 2.644317, ppl: 14.073828 +epoch: 1, batch: 17137, sum loss: 4693.156250, avg loss: 2.781954, ppl: 16.150549 +epoch: 1, batch: 17138, sum loss: 5304.657715, avg loss: 2.774403, ppl: 16.029049 +epoch: 1, batch: 17139, sum loss: 4139.040527, avg loss: 2.547102, ppl: 12.770040 +epoch: 1, batch: 17140, sum loss: 7067.559570, avg loss: 3.112092, ppl: 22.467989 +epoch: 1, batch: 17141, sum loss: 4625.682617, avg loss: 2.545780, ppl: 12.753174 +epoch: 1, batch: 17142, sum loss: 5328.843750, avg loss: 2.923118, ppl: 18.599188 +epoch: 1, batch: 17143, sum loss: 4425.217285, avg loss: 2.704901, ppl: 14.952832 +epoch: 1, batch: 17144, sum loss: 4924.098633, avg loss: 2.684896, ppl: 14.656670 +epoch: 1, batch: 17145, sum loss: 5648.000000, avg loss: 2.833919, ppl: 17.011997 +epoch: 1, batch: 17146, sum loss: 5290.855469, avg loss: 2.797914, ppl: 16.410379 +epoch: 1, batch: 17147, sum loss: 4545.850098, avg loss: 2.548122, ppl: 12.783080 +epoch: 1, batch: 17148, sum loss: 4076.372803, avg loss: 2.660818, ppl: 14.307986 +epoch: 1, batch: 17149, sum loss: 5449.516602, avg loss: 3.027509, ppl: 20.645744 +epoch: 1, batch: 17150, sum loss: 4746.677246, avg loss: 2.751697, ppl: 15.669201 +epoch: 1, batch: 17151, sum loss: 4804.884766, avg loss: 2.755094, ppl: 15.722524 +epoch: 1, batch: 17152, sum loss: 4428.102051, avg loss: 2.932518, ppl: 18.774847 +epoch: 1, batch: 17153, sum loss: 4689.390625, avg loss: 2.747153, ppl: 15.598166 +epoch: 1, batch: 17154, sum loss: 5237.528809, avg loss: 3.010074, ppl: 20.288898 +epoch: 1, batch: 17155, sum loss: 4307.597656, avg loss: 2.678854, ppl: 14.568392 +epoch: 1, batch: 17156, sum loss: 4518.809570, avg loss: 2.598511, ppl: 13.443699 +epoch: 1, batch: 17157, sum loss: 5825.380371, avg loss: 2.905427, ppl: 18.273035 +epoch: 1, batch: 17158, sum loss: 5190.557129, avg loss: 2.856663, ppl: 17.403358 +epoch: 1, batch: 17159, sum loss: 3505.483887, avg loss: 2.332325, ppl: 10.301861 +epoch: 1, batch: 17160, sum loss: 4287.999512, avg loss: 2.643649, ppl: 14.064436 +epoch: 1, batch: 17161, sum loss: 4396.655273, avg loss: 2.513811, ppl: 12.351912 +epoch: 1, batch: 17162, sum loss: 3506.185303, avg loss: 2.585682, ppl: 13.272343 +epoch: 1, batch: 17163, sum loss: 5106.003906, avg loss: 2.748118, ppl: 15.613226 +epoch: 1, batch: 17164, sum loss: 4156.410645, avg loss: 2.576820, ppl: 13.155240 +epoch: 1, batch: 17165, sum loss: 3722.924805, avg loss: 2.409660, ppl: 11.130177 +epoch: 1, batch: 17166, sum loss: 4504.569824, avg loss: 2.743344, ppl: 15.538854 +epoch: 1, batch: 17167, sum loss: 4154.294434, avg loss: 2.573912, ppl: 13.117043 +epoch: 1, batch: 17168, sum loss: 4937.580078, avg loss: 2.646077, ppl: 14.098623 +epoch: 1, batch: 17169, sum loss: 3206.122559, avg loss: 2.280315, ppl: 9.779757 +epoch: 1, batch: 17170, sum loss: 4810.442383, avg loss: 2.758281, ppl: 15.772706 +epoch: 1, batch: 17171, sum loss: 4888.877441, avg loss: 2.811316, ppl: 16.631784 +epoch: 1, batch: 17172, sum loss: 4674.392090, avg loss: 2.697283, ppl: 14.839362 +epoch: 1, batch: 17173, sum loss: 5218.576172, avg loss: 3.032293, ppl: 20.744743 +epoch: 1, batch: 17174, sum loss: 5613.991699, avg loss: 2.890830, ppl: 18.008251 +epoch: 1, batch: 17175, sum loss: 4946.168457, avg loss: 2.895883, ppl: 18.099478 +epoch: 1, batch: 17176, sum loss: 4683.958008, avg loss: 2.859559, ppl: 17.453829 +epoch: 1, batch: 17177, sum loss: 3923.158936, avg loss: 2.601564, ppl: 13.484814 +epoch: 1, batch: 17178, sum loss: 5900.630371, avg loss: 2.815186, ppl: 16.696285 +epoch: 1, batch: 17179, sum loss: 4135.656738, avg loss: 2.506459, ppl: 12.261433 +epoch: 1, batch: 17180, sum loss: 4744.985352, avg loss: 2.691427, ppl: 14.752709 +epoch: 1, batch: 17181, sum loss: 4002.758301, avg loss: 2.592460, ppl: 13.362605 +epoch: 1, batch: 17182, sum loss: 5236.788574, avg loss: 2.669107, ppl: 14.427083 +epoch: 1, batch: 17183, sum loss: 3336.612061, avg loss: 2.390123, ppl: 10.914841 +epoch: 1, batch: 17184, sum loss: 3641.286621, avg loss: 2.558880, ppl: 12.921342 +epoch: 1, batch: 17185, sum loss: 3771.687500, avg loss: 2.569269, ppl: 13.056282 +epoch: 1, batch: 17186, sum loss: 4865.428223, avg loss: 2.963111, ppl: 19.358105 +epoch: 1, batch: 17187, sum loss: 4792.224121, avg loss: 2.837315, ppl: 17.069864 +epoch: 1, batch: 17188, sum loss: 5042.714844, avg loss: 2.871706, ppl: 17.667124 +epoch: 1, batch: 17189, sum loss: 4548.040039, avg loss: 2.613816, ppl: 13.651044 +epoch: 1, batch: 17190, sum loss: 4773.899414, avg loss: 2.860335, ppl: 17.467384 +epoch: 1, batch: 17191, sum loss: 3888.452637, avg loss: 2.650615, ppl: 14.162749 +epoch: 1, batch: 17192, sum loss: 4236.250000, avg loss: 2.669345, ppl: 14.430512 +epoch: 1, batch: 17193, sum loss: 4101.720215, avg loss: 2.531926, ppl: 12.577709 +epoch: 1, batch: 17194, sum loss: 5420.797852, avg loss: 2.994916, ppl: 19.983685 +epoch: 1, batch: 17195, sum loss: 4675.275879, avg loss: 2.768073, ppl: 15.927917 +epoch: 1, batch: 17196, sum loss: 4481.932617, avg loss: 2.566972, ppl: 13.026319 +epoch: 1, batch: 17197, sum loss: 5112.845703, avg loss: 2.533620, ppl: 12.599036 +epoch: 1, batch: 17198, sum loss: 4681.792480, avg loss: 2.752377, ppl: 15.679852 +epoch: 1, batch: 17199, sum loss: 3163.098633, avg loss: 2.293762, ppl: 9.912162 +epoch: 1, batch: 17200, sum loss: 5883.598633, avg loss: 2.941799, ppl: 18.949915 +epoch: 1, batch: 17201, sum loss: 4872.996582, avg loss: 2.813509, ppl: 16.668297 +epoch: 1, batch: 17202, sum loss: 3816.144531, avg loss: 2.537330, ppl: 12.645864 +epoch: 1, batch: 17203, sum loss: 4842.003906, avg loss: 2.624392, ppl: 13.796187 +epoch: 1, batch: 17204, sum loss: 3917.406250, avg loss: 2.289542, ppl: 9.870418 +epoch: 1, batch: 17205, sum loss: 3835.705811, avg loss: 2.565689, ppl: 13.009624 +epoch: 1, batch: 17206, sum loss: 4648.849609, avg loss: 2.841595, ppl: 17.143089 +epoch: 1, batch: 17207, sum loss: 5324.545410, avg loss: 2.911179, ppl: 18.378447 +epoch: 1, batch: 17208, sum loss: 4017.279785, avg loss: 2.598499, ppl: 13.443548 +epoch: 1, batch: 17209, sum loss: 4474.261230, avg loss: 2.728208, ppl: 15.305436 +epoch: 1, batch: 17210, sum loss: 4383.325195, avg loss: 2.746444, ppl: 15.587109 +epoch: 1, batch: 17211, sum loss: 4044.773438, avg loss: 2.640192, ppl: 14.015888 +epoch: 1, batch: 17212, sum loss: 4967.646484, avg loss: 2.772124, ppl: 15.992571 +epoch: 1, batch: 17213, sum loss: 4047.197754, avg loss: 2.558279, ppl: 12.913574 +epoch: 1, batch: 17214, sum loss: 4567.920410, avg loss: 2.561930, ppl: 12.960804 +epoch: 1, batch: 17215, sum loss: 5080.087402, avg loss: 2.779041, ppl: 16.103575 +epoch: 1, batch: 17216, sum loss: 4390.799805, avg loss: 2.570726, ppl: 13.075310 +epoch: 1, batch: 17217, sum loss: 4865.710449, avg loss: 2.680832, ppl: 14.597239 +epoch: 1, batch: 17218, sum loss: 4905.950195, avg loss: 2.688192, ppl: 14.705064 +epoch: 1, batch: 17219, sum loss: 4469.058594, avg loss: 2.851984, ppl: 17.322115 +epoch: 1, batch: 17220, sum loss: 4298.114258, avg loss: 2.783753, ppl: 16.179628 +epoch: 1, batch: 17221, sum loss: 4310.214355, avg loss: 2.458765, ppl: 11.690363 +epoch: 1, batch: 17222, sum loss: 4435.183105, avg loss: 2.687990, ppl: 14.702091 +epoch: 1, batch: 17223, sum loss: 4953.169922, avg loss: 2.770229, ppl: 15.962295 +epoch: 1, batch: 17224, sum loss: 4382.901367, avg loss: 2.912227, ppl: 18.397722 +epoch: 1, batch: 17225, sum loss: 4490.266602, avg loss: 2.633588, ppl: 13.923633 +epoch: 1, batch: 17226, sum loss: 4640.885254, avg loss: 2.682593, ppl: 14.622956 +epoch: 1, batch: 17227, sum loss: 4434.907227, avg loss: 2.614922, ppl: 13.666148 +epoch: 1, batch: 17228, sum loss: 4990.385742, avg loss: 2.707751, ppl: 14.995517 +epoch: 1, batch: 17229, sum loss: 3788.370850, avg loss: 2.527266, ppl: 12.519226 +epoch: 1, batch: 17230, sum loss: 4598.823730, avg loss: 2.675290, ppl: 14.516564 +epoch: 1, batch: 17231, sum loss: 4386.739746, avg loss: 2.629940, ppl: 13.872938 +epoch: 1, batch: 17232, sum loss: 4361.982422, avg loss: 2.900254, ppl: 18.178766 +epoch: 1, batch: 17233, sum loss: 4261.428223, avg loss: 2.745766, ppl: 15.576536 +epoch: 1, batch: 17234, sum loss: 4142.280273, avg loss: 2.547528, ppl: 12.775481 +epoch: 1, batch: 17235, sum loss: 4697.095215, avg loss: 2.570933, ppl: 13.078025 +epoch: 1, batch: 17236, sum loss: 5057.977051, avg loss: 2.628886, ppl: 13.858326 +epoch: 1, batch: 17237, sum loss: 4103.501953, avg loss: 2.564689, ppl: 12.996612 +epoch: 1, batch: 17238, sum loss: 5293.929688, avg loss: 2.787746, ppl: 16.244364 +epoch: 1, batch: 17239, sum loss: 4033.802979, avg loss: 2.421250, ppl: 11.259927 +epoch: 1, batch: 17240, sum loss: 5060.991211, avg loss: 2.840063, ppl: 17.116837 +epoch: 1, batch: 17241, sum loss: 3883.011475, avg loss: 2.721101, ppl: 15.197049 +epoch: 1, batch: 17242, sum loss: 4501.344727, avg loss: 2.612504, ppl: 13.633149 +epoch: 1, batch: 17243, sum loss: 4774.307617, avg loss: 2.608911, ppl: 13.584253 +epoch: 1, batch: 17244, sum loss: 4906.423340, avg loss: 2.570154, ppl: 13.067833 +epoch: 1, batch: 17245, sum loss: 4838.867188, avg loss: 2.826441, ppl: 16.885260 +epoch: 1, batch: 17246, sum loss: 3893.349854, avg loss: 2.465706, ppl: 11.771791 +epoch: 1, batch: 17247, sum loss: 4089.786865, avg loss: 2.431502, ppl: 11.375959 +epoch: 1, batch: 17248, sum loss: 4714.535156, avg loss: 2.726741, ppl: 15.283000 +epoch: 1, batch: 17249, sum loss: 4220.692383, avg loss: 2.488616, ppl: 12.044592 +epoch: 1, batch: 17250, sum loss: 5202.692871, avg loss: 2.782189, ppl: 16.154339 +epoch: 1, batch: 17251, sum loss: 4648.529297, avg loss: 2.731216, ppl: 15.351539 +epoch: 1, batch: 17252, sum loss: 4908.152344, avg loss: 2.729784, ppl: 15.329579 +epoch: 1, batch: 17253, sum loss: 4169.338867, avg loss: 2.553177, ppl: 12.847861 +epoch: 1, batch: 17254, sum loss: 4442.778320, avg loss: 2.783696, ppl: 16.178701 +epoch: 1, batch: 17255, sum loss: 4475.915527, avg loss: 2.533059, ppl: 12.591968 +epoch: 1, batch: 17256, sum loss: 5057.782227, avg loss: 2.852669, ppl: 17.333988 +epoch: 1, batch: 17257, sum loss: 4900.620605, avg loss: 2.836007, ppl: 17.047565 +epoch: 1, batch: 17258, sum loss: 4272.394531, avg loss: 2.413782, ppl: 11.176153 +epoch: 1, batch: 17259, sum loss: 4383.352051, avg loss: 2.722579, ppl: 15.219520 +epoch: 1, batch: 17260, sum loss: 4873.324707, avg loss: 2.810452, ppl: 16.617435 +epoch: 1, batch: 17261, sum loss: 4919.221680, avg loss: 2.986777, ppl: 19.821692 +epoch: 1, batch: 17262, sum loss: 4511.243164, avg loss: 2.842623, ppl: 17.160727 +epoch: 1, batch: 17263, sum loss: 4521.894043, avg loss: 2.742204, ppl: 15.521152 +epoch: 1, batch: 17264, sum loss: 5198.581543, avg loss: 2.774056, ppl: 16.023497 +epoch: 1, batch: 17265, sum loss: 5631.832031, avg loss: 2.783901, ppl: 16.182024 +epoch: 1, batch: 17266, sum loss: 4373.209473, avg loss: 2.542564, ppl: 12.712219 +epoch: 1, batch: 17267, sum loss: 5007.985840, avg loss: 2.855180, ppl: 17.377565 +epoch: 1, batch: 17268, sum loss: 4564.009277, avg loss: 2.777851, ppl: 16.084415 +epoch: 1, batch: 17269, sum loss: 4352.363770, avg loss: 2.545242, ppl: 12.746310 +epoch: 1, batch: 17270, sum loss: 5099.219727, avg loss: 2.903884, ppl: 18.244865 +epoch: 1, batch: 17271, sum loss: 5010.816406, avg loss: 2.759260, ppl: 15.788155 +epoch: 1, batch: 17272, sum loss: 5151.114746, avg loss: 2.791932, ppl: 16.312508 +epoch: 1, batch: 17273, sum loss: 4208.659180, avg loss: 2.478598, ppl: 11.924533 +epoch: 1, batch: 17274, sum loss: 4979.880371, avg loss: 2.724223, ppl: 15.244570 +epoch: 1, batch: 17275, sum loss: 4926.380371, avg loss: 2.810257, ppl: 16.614187 +epoch: 1, batch: 17276, sum loss: 5462.848633, avg loss: 2.700370, ppl: 14.885236 +epoch: 1, batch: 17277, sum loss: 6101.195312, avg loss: 2.946014, ppl: 19.029953 +epoch: 1, batch: 17278, sum loss: 5154.986328, avg loss: 3.039497, ppl: 20.894724 +epoch: 1, batch: 17279, sum loss: 5265.977051, avg loss: 3.049205, ppl: 21.098566 +epoch: 1, batch: 17280, sum loss: 3667.536133, avg loss: 2.400220, ppl: 11.025600 +epoch: 1, batch: 17281, sum loss: 4126.956543, avg loss: 2.360959, ppl: 10.601116 +epoch: 1, batch: 17282, sum loss: 4103.523438, avg loss: 2.649144, ppl: 14.141924 +epoch: 1, batch: 17283, sum loss: 4753.829590, avg loss: 2.682748, ppl: 14.625230 +epoch: 1, batch: 17284, sum loss: 5412.999023, avg loss: 2.828108, ppl: 16.913431 +epoch: 1, batch: 17285, sum loss: 4259.934570, avg loss: 2.888091, ppl: 17.959000 +epoch: 1, batch: 17286, sum loss: 3543.197266, avg loss: 2.482970, ppl: 11.976777 +epoch: 1, batch: 17287, sum loss: 5257.220703, avg loss: 2.805347, ppl: 16.532816 +epoch: 1, batch: 17288, sum loss: 5058.757324, avg loss: 2.884126, ppl: 17.887930 +epoch: 1, batch: 17289, sum loss: 4826.135742, avg loss: 2.560284, ppl: 12.939493 +epoch: 1, batch: 17290, sum loss: 3894.164062, avg loss: 2.545205, ppl: 12.745845 +epoch: 1, batch: 17291, sum loss: 4151.238281, avg loss: 2.625704, ppl: 13.814297 +epoch: 1, batch: 17292, sum loss: 5146.408691, avg loss: 2.749150, ppl: 15.629342 +epoch: 1, batch: 17293, sum loss: 4779.048340, avg loss: 2.748159, ppl: 15.613859 +epoch: 1, batch: 17294, sum loss: 4354.381348, avg loss: 2.784131, ppl: 16.185751 +epoch: 1, batch: 17295, sum loss: 3720.831299, avg loss: 2.505610, ppl: 12.251033 +epoch: 1, batch: 17296, sum loss: 4104.400391, avg loss: 2.747256, ppl: 15.599768 +epoch: 1, batch: 17297, sum loss: 5880.960938, avg loss: 3.184061, ppl: 24.144613 +epoch: 1, batch: 17298, sum loss: 4842.093262, avg loss: 2.759027, ppl: 15.784485 +epoch: 1, batch: 17299, sum loss: 4936.250000, avg loss: 2.827177, ppl: 16.897684 +epoch: 1, batch: 17300, sum loss: 3624.461670, avg loss: 2.463944, ppl: 11.751066 +epoch: 1, batch: 17301, sum loss: 4566.932617, avg loss: 2.822579, ppl: 16.820173 +epoch: 1, batch: 17302, sum loss: 4412.617676, avg loss: 2.547701, ppl: 12.777690 +epoch: 1, batch: 17303, sum loss: 5194.516113, avg loss: 2.708298, ppl: 15.003720 +epoch: 1, batch: 17304, sum loss: 4905.895996, avg loss: 2.928893, ppl: 18.706917 +epoch: 1, batch: 17305, sum loss: 5190.060547, avg loss: 2.738818, ppl: 15.468693 +epoch: 1, batch: 17306, sum loss: 4114.748047, avg loss: 2.410514, ppl: 11.139689 +epoch: 1, batch: 17307, sum loss: 3765.525879, avg loss: 2.572080, ppl: 13.093035 +epoch: 1, batch: 17308, sum loss: 4992.839355, avg loss: 3.037007, ppl: 20.842764 +epoch: 1, batch: 17309, sum loss: 4562.998047, avg loss: 2.631487, ppl: 13.894411 +epoch: 1, batch: 17310, sum loss: 4381.314941, avg loss: 2.653734, ppl: 14.206991 +epoch: 1, batch: 17311, sum loss: 4080.663086, avg loss: 2.458231, ppl: 11.684121 +epoch: 1, batch: 17312, sum loss: 6037.981934, avg loss: 2.801848, ppl: 16.475060 +epoch: 1, batch: 17313, sum loss: 4451.280273, avg loss: 2.646421, ppl: 14.103471 +epoch: 1, batch: 17314, sum loss: 3740.806641, avg loss: 2.480641, ppl: 11.948920 +epoch: 1, batch: 17315, sum loss: 4794.991211, avg loss: 2.981960, ppl: 19.726435 +epoch: 1, batch: 17316, sum loss: 5069.080566, avg loss: 2.928411, ppl: 18.697905 +epoch: 1, batch: 17317, sum loss: 5396.076172, avg loss: 3.034914, ppl: 20.799179 +epoch: 1, batch: 17318, sum loss: 5563.114258, avg loss: 3.065077, ppl: 21.436110 +epoch: 1, batch: 17319, sum loss: 4336.480957, avg loss: 2.601368, ppl: 13.482172 +epoch: 1, batch: 17320, sum loss: 4940.719727, avg loss: 2.810421, ppl: 16.616909 +epoch: 1, batch: 17321, sum loss: 3806.877197, avg loss: 2.484907, ppl: 12.000003 +epoch: 1, batch: 17322, sum loss: 3638.973633, avg loss: 2.502733, ppl: 12.215835 +epoch: 1, batch: 17323, sum loss: 5397.264648, avg loss: 2.818415, ppl: 16.750278 +epoch: 1, batch: 17324, sum loss: 4734.621582, avg loss: 2.713250, ppl: 15.078202 +epoch: 1, batch: 17325, sum loss: 5182.218262, avg loss: 2.952831, ppl: 19.160116 +epoch: 1, batch: 17326, sum loss: 4386.519531, avg loss: 2.600189, ppl: 13.466289 +epoch: 1, batch: 17327, sum loss: 5421.696289, avg loss: 2.806261, ppl: 16.547932 +epoch: 1, batch: 17328, sum loss: 4073.027588, avg loss: 2.574606, ppl: 13.126150 +epoch: 1, batch: 17329, sum loss: 4855.738281, avg loss: 2.808408, ppl: 16.583504 +epoch: 1, batch: 17330, sum loss: 5695.936035, avg loss: 3.012129, ppl: 20.330639 +epoch: 1, batch: 17331, sum loss: 4910.737305, avg loss: 2.865074, ppl: 17.550360 +epoch: 1, batch: 17332, sum loss: 4376.368164, avg loss: 2.731815, ppl: 15.360746 +epoch: 1, batch: 17333, sum loss: 4599.426270, avg loss: 2.514722, ppl: 12.363173 +epoch: 1, batch: 17334, sum loss: 5270.348633, avg loss: 2.868998, ppl: 17.619352 +epoch: 1, batch: 17335, sum loss: 6192.166992, avg loss: 3.068467, ppl: 21.508913 +epoch: 1, batch: 17336, sum loss: 5682.730469, avg loss: 2.976810, ppl: 19.625116 +epoch: 1, batch: 17337, sum loss: 4784.025391, avg loss: 2.604260, ppl: 13.521215 +epoch: 1, batch: 17338, sum loss: 4800.522461, avg loss: 2.649295, ppl: 14.144061 +epoch: 1, batch: 17339, sum loss: 5029.519043, avg loss: 2.877299, ppl: 17.766228 +epoch: 1, batch: 17340, sum loss: 4648.758789, avg loss: 2.729747, ppl: 15.329005 +epoch: 1, batch: 17341, sum loss: 4591.881836, avg loss: 2.710674, ppl: 15.039410 +epoch: 1, batch: 17342, sum loss: 4034.766357, avg loss: 2.495217, ppl: 12.124368 +epoch: 1, batch: 17343, sum loss: 4409.179688, avg loss: 2.522414, ppl: 12.458638 +epoch: 1, batch: 17344, sum loss: 5071.222168, avg loss: 2.708986, ppl: 15.014048 +epoch: 1, batch: 17345, sum loss: 5974.373047, avg loss: 2.945943, ppl: 19.028605 +epoch: 1, batch: 17346, sum loss: 4869.978027, avg loss: 2.897072, ppl: 18.121006 +epoch: 1, batch: 17347, sum loss: 4714.334473, avg loss: 2.661962, ppl: 14.324363 +epoch: 1, batch: 17348, sum loss: 4709.757324, avg loss: 2.656378, ppl: 14.244596 +epoch: 1, batch: 17349, sum loss: 4382.148438, avg loss: 2.685140, ppl: 14.660255 +epoch: 1, batch: 17350, sum loss: 4136.434082, avg loss: 2.551779, ppl: 12.829908 +epoch: 1, batch: 17351, sum loss: 4628.583496, avg loss: 2.703612, ppl: 14.933572 +epoch: 1, batch: 17352, sum loss: 4114.953125, avg loss: 2.649680, ppl: 14.149512 +epoch: 1, batch: 17353, sum loss: 5542.909180, avg loss: 2.676441, ppl: 14.533277 +epoch: 1, batch: 17354, sum loss: 4819.897949, avg loss: 2.868987, ppl: 17.619162 +epoch: 1, batch: 17355, sum loss: 4915.276855, avg loss: 2.778563, ppl: 16.095867 +epoch: 1, batch: 17356, sum loss: 5778.899902, avg loss: 2.823107, ppl: 16.829058 +epoch: 1, batch: 17357, sum loss: 5036.938965, avg loss: 2.812361, ppl: 16.649181 +epoch: 1, batch: 17358, sum loss: 4514.048828, avg loss: 2.551752, ppl: 12.829559 +epoch: 1, batch: 17359, sum loss: 4042.436035, avg loss: 2.591305, ppl: 13.347181 +epoch: 1, batch: 17360, sum loss: 4901.685547, avg loss: 2.808989, ppl: 16.593130 +epoch: 1, batch: 17361, sum loss: 4586.316406, avg loss: 2.499355, ppl: 12.174640 +epoch: 1, batch: 17362, sum loss: 3681.053467, avg loss: 2.579575, ppl: 13.191531 +epoch: 1, batch: 17363, sum loss: 3911.279053, avg loss: 2.605782, ppl: 13.541815 +epoch: 1, batch: 17364, sum loss: 4661.694336, avg loss: 2.729329, ppl: 15.322604 +epoch: 1, batch: 17365, sum loss: 4121.673828, avg loss: 2.585743, ppl: 13.273144 +epoch: 1, batch: 17366, sum loss: 4383.223633, avg loss: 2.627832, ppl: 13.843723 +epoch: 1, batch: 17367, sum loss: 4300.439453, avg loss: 2.696200, ppl: 14.823301 +epoch: 1, batch: 17368, sum loss: 4874.684570, avg loss: 2.597062, ppl: 13.424234 +epoch: 1, batch: 17369, sum loss: 4927.158691, avg loss: 2.676349, ppl: 14.531942 +epoch: 1, batch: 17370, sum loss: 4513.602539, avg loss: 2.589560, ppl: 13.323902 +epoch: 1, batch: 17371, sum loss: 4998.279297, avg loss: 2.882514, ppl: 17.859114 +epoch: 1, batch: 17372, sum loss: 4824.786133, avg loss: 2.739799, ppl: 15.483872 +epoch: 1, batch: 17373, sum loss: 4781.169434, avg loss: 2.750961, ppl: 15.657665 +epoch: 1, batch: 17374, sum loss: 4434.542480, avg loss: 2.750957, ppl: 15.657605 +epoch: 1, batch: 17375, sum loss: 5093.577637, avg loss: 2.774280, ppl: 16.027077 +epoch: 1, batch: 17376, sum loss: 3839.602051, avg loss: 2.563152, ppl: 12.976656 +epoch: 1, batch: 17377, sum loss: 4972.367188, avg loss: 2.897650, ppl: 18.131487 +epoch: 1, batch: 17378, sum loss: 4771.830078, avg loss: 2.923916, ppl: 18.614031 +epoch: 1, batch: 17379, sum loss: 4046.106934, avg loss: 2.567327, ppl: 13.030943 +epoch: 1, batch: 17380, sum loss: 4796.406250, avg loss: 2.719051, ppl: 15.165925 +epoch: 1, batch: 17381, sum loss: 5081.059570, avg loss: 2.954104, ppl: 19.184534 +epoch: 1, batch: 17382, sum loss: 5045.206055, avg loss: 2.860094, ppl: 17.463169 +epoch: 1, batch: 17383, sum loss: 4433.195312, avg loss: 2.556629, ppl: 12.892286 +epoch: 1, batch: 17384, sum loss: 4758.626465, avg loss: 2.717662, ppl: 15.144877 +epoch: 1, batch: 17385, sum loss: 3906.826660, avg loss: 2.634408, ppl: 13.935057 +epoch: 1, batch: 17386, sum loss: 3928.696045, avg loss: 2.407289, ppl: 11.103821 +epoch: 1, batch: 17387, sum loss: 5158.191406, avg loss: 2.899489, ppl: 18.164864 +epoch: 1, batch: 17388, sum loss: 4392.004883, avg loss: 2.603441, ppl: 13.510146 +epoch: 1, batch: 17389, sum loss: 5048.077148, avg loss: 2.659683, ppl: 14.291751 +epoch: 1, batch: 17390, sum loss: 4748.593262, avg loss: 2.702671, ppl: 14.919529 +epoch: 1, batch: 17391, sum loss: 4838.298340, avg loss: 2.652576, ppl: 14.190546 +epoch: 1, batch: 17392, sum loss: 3490.376953, avg loss: 2.240293, ppl: 9.396087 +epoch: 1, batch: 17393, sum loss: 4450.002930, avg loss: 2.656718, ppl: 14.249449 +epoch: 1, batch: 17394, sum loss: 3655.597412, avg loss: 2.645150, ppl: 14.085557 +epoch: 1, batch: 17395, sum loss: 4610.973145, avg loss: 2.723552, ppl: 15.234339 +epoch: 1, batch: 17396, sum loss: 4470.602051, avg loss: 2.631314, ppl: 13.892009 +epoch: 1, batch: 17397, sum loss: 5798.513672, avg loss: 3.287139, ppl: 26.766188 +epoch: 1, batch: 17398, sum loss: 5399.351074, avg loss: 2.940823, ppl: 18.931417 +epoch: 1, batch: 17399, sum loss: 5644.349121, avg loss: 2.839210, ppl: 17.102245 +epoch: 1, batch: 17400, sum loss: 4096.867188, avg loss: 2.596240, ppl: 13.413213 +epoch: 1, batch: 17401, sum loss: 3518.538086, avg loss: 2.393563, ppl: 10.952451 +epoch: 1, batch: 17402, sum loss: 4693.013184, avg loss: 2.703349, ppl: 14.929642 +epoch: 1, batch: 17403, sum loss: 4987.040039, avg loss: 2.963185, ppl: 19.359531 +epoch: 1, batch: 17404, sum loss: 4673.507324, avg loss: 2.715576, ppl: 15.113319 +epoch: 1, batch: 17405, sum loss: 4246.530273, avg loss: 2.572096, ppl: 13.093241 +epoch: 1, batch: 17406, sum loss: 4863.148926, avg loss: 2.676472, ppl: 14.533724 +epoch: 1, batch: 17407, sum loss: 5318.122559, avg loss: 2.859206, ppl: 17.447662 +epoch: 1, batch: 17408, sum loss: 4554.864258, avg loss: 2.742242, ppl: 15.521748 +epoch: 1, batch: 17409, sum loss: 4556.525391, avg loss: 2.831899, ppl: 16.977669 +epoch: 1, batch: 17410, sum loss: 4341.028320, avg loss: 2.580873, ppl: 13.208665 +epoch: 1, batch: 17411, sum loss: 4538.025391, avg loss: 2.646079, ppl: 14.098647 +epoch: 1, batch: 17412, sum loss: 4582.286133, avg loss: 2.633498, ppl: 13.922381 +epoch: 1, batch: 17413, sum loss: 5555.810059, avg loss: 2.881644, ppl: 17.843592 +epoch: 1, batch: 17414, sum loss: 5279.455078, avg loss: 2.936293, ppl: 18.845858 +epoch: 1, batch: 17415, sum loss: 4096.386230, avg loss: 2.668655, ppl: 14.420567 +epoch: 1, batch: 17416, sum loss: 3854.056396, avg loss: 2.520639, ppl: 12.436538 +epoch: 1, batch: 17417, sum loss: 4396.304688, avg loss: 2.730624, ppl: 15.342457 +epoch: 1, batch: 17418, sum loss: 4591.110352, avg loss: 2.755769, ppl: 15.733128 +epoch: 1, batch: 17419, sum loss: 4619.837891, avg loss: 2.553808, ppl: 12.855963 +epoch: 1, batch: 17420, sum loss: 4742.097168, avg loss: 2.708222, ppl: 15.002584 +epoch: 1, batch: 17421, sum loss: 3427.323486, avg loss: 2.383396, ppl: 10.841660 +epoch: 1, batch: 17422, sum loss: 4673.121094, avg loss: 2.712200, ppl: 15.062383 +epoch: 1, batch: 17423, sum loss: 4343.803711, avg loss: 2.664910, ppl: 14.366661 +epoch: 1, batch: 17424, sum loss: 4456.742676, avg loss: 2.579134, ppl: 13.185708 +epoch: 1, batch: 17425, sum loss: 4859.742676, avg loss: 2.750279, ppl: 15.646996 +epoch: 1, batch: 17426, sum loss: 4361.312988, avg loss: 2.553462, ppl: 12.851516 +epoch: 1, batch: 17427, sum loss: 5793.145020, avg loss: 2.896573, ppl: 18.111961 +epoch: 1, batch: 17428, sum loss: 4618.799316, avg loss: 2.550414, ppl: 12.812405 +epoch: 1, batch: 17429, sum loss: 4965.010254, avg loss: 2.864980, ppl: 17.548698 +epoch: 1, batch: 17430, sum loss: 5312.115234, avg loss: 2.967662, ppl: 19.446404 +epoch: 1, batch: 17431, sum loss: 4851.010742, avg loss: 2.853536, ppl: 17.349018 +epoch: 1, batch: 17432, sum loss: 4821.303223, avg loss: 2.665176, ppl: 14.370481 +epoch: 1, batch: 17433, sum loss: 4183.390625, avg loss: 2.676513, ppl: 14.534330 +epoch: 1, batch: 17434, sum loss: 5041.226074, avg loss: 3.022318, ppl: 20.538843 +epoch: 1, batch: 17435, sum loss: 5031.827637, avg loss: 2.766260, ppl: 15.899066 +epoch: 1, batch: 17436, sum loss: 5216.672852, avg loss: 3.056047, ppl: 21.243425 +epoch: 1, batch: 17437, sum loss: 4164.696777, avg loss: 2.716697, ppl: 15.130268 +epoch: 1, batch: 17438, sum loss: 5085.844727, avg loss: 2.897917, ppl: 18.136328 +epoch: 1, batch: 17439, sum loss: 4103.094238, avg loss: 2.642044, ppl: 14.041877 +epoch: 1, batch: 17440, sum loss: 4045.210693, avg loss: 2.478683, ppl: 11.925548 +epoch: 1, batch: 17441, sum loss: 4873.526367, avg loss: 2.710526, ppl: 15.037187 +epoch: 1, batch: 17442, sum loss: 4039.433594, avg loss: 2.569614, ppl: 13.060784 +epoch: 1, batch: 17443, sum loss: 4380.024414, avg loss: 2.641752, ppl: 14.037773 +epoch: 1, batch: 17444, sum loss: 4025.186768, avg loss: 2.636010, ppl: 13.957398 +epoch: 1, batch: 17445, sum loss: 4505.792969, avg loss: 2.588049, ppl: 13.303790 +epoch: 1, batch: 17446, sum loss: 5603.062500, avg loss: 3.014019, ppl: 20.369108 +epoch: 1, batch: 17447, sum loss: 4851.523438, avg loss: 2.812478, ppl: 16.651121 +epoch: 1, batch: 17448, sum loss: 4805.234375, avg loss: 2.603052, ppl: 13.504894 +epoch: 1, batch: 17449, sum loss: 5535.198242, avg loss: 2.863527, ppl: 17.523228 +epoch: 1, batch: 17450, sum loss: 4822.101074, avg loss: 2.833197, ppl: 16.999720 +epoch: 1, batch: 17451, sum loss: 4580.092285, avg loss: 2.772453, ppl: 15.997829 +epoch: 1, batch: 17452, sum loss: 5624.257812, avg loss: 2.884235, ppl: 17.889870 +epoch: 1, batch: 17453, sum loss: 4057.500732, avg loss: 2.581107, ppl: 13.211761 +epoch: 1, batch: 17454, sum loss: 5193.812012, avg loss: 2.736466, ppl: 15.432349 +epoch: 1, batch: 17455, sum loss: 3654.487061, avg loss: 2.614082, ppl: 13.654680 +epoch: 1, batch: 17456, sum loss: 4136.950195, avg loss: 2.530245, ppl: 12.556577 +epoch: 1, batch: 17457, sum loss: 3915.542725, avg loss: 2.522901, ppl: 12.464708 +epoch: 1, batch: 17458, sum loss: 4970.918945, avg loss: 2.924070, ppl: 18.616907 +epoch: 1, batch: 17459, sum loss: 4797.388672, avg loss: 2.773057, ppl: 16.007494 +epoch: 1, batch: 17460, sum loss: 4306.110352, avg loss: 2.677929, ppl: 14.554921 +epoch: 1, batch: 17461, sum loss: 5350.927734, avg loss: 2.792760, ppl: 16.326015 +epoch: 1, batch: 17462, sum loss: 4374.354492, avg loss: 2.710257, ppl: 15.033135 +epoch: 1, batch: 17463, sum loss: 3318.784424, avg loss: 2.512327, ppl: 12.333602 +epoch: 1, batch: 17464, sum loss: 4508.208008, avg loss: 2.586465, ppl: 13.282729 +epoch: 1, batch: 17465, sum loss: 3709.695312, avg loss: 2.636600, ppl: 13.965633 +epoch: 1, batch: 17466, sum loss: 4705.643066, avg loss: 2.839857, ppl: 17.113316 +epoch: 1, batch: 17467, sum loss: 4406.633789, avg loss: 2.837498, ppl: 17.072990 +epoch: 1, batch: 17468, sum loss: 4734.838379, avg loss: 2.855753, ppl: 17.387524 +epoch: 1, batch: 17469, sum loss: 3966.384277, avg loss: 2.375080, ppl: 10.751877 +epoch: 1, batch: 17470, sum loss: 4579.123535, avg loss: 2.830113, ppl: 16.947382 +epoch: 1, batch: 17471, sum loss: 4113.209961, avg loss: 2.573974, ppl: 13.117846 +epoch: 1, batch: 17472, sum loss: 4251.215820, avg loss: 2.785856, ppl: 16.213688 +epoch: 1, batch: 17473, sum loss: 4704.869141, avg loss: 2.705503, ppl: 14.961837 +epoch: 1, batch: 17474, sum loss: 4984.991211, avg loss: 2.631991, ppl: 13.901423 +epoch: 1, batch: 17475, sum loss: 4142.181152, avg loss: 2.420913, ppl: 11.256125 +epoch: 1, batch: 17476, sum loss: 5368.419922, avg loss: 2.974194, ppl: 19.573837 +epoch: 1, batch: 17477, sum loss: 4300.623535, avg loss: 2.559895, ppl: 12.934460 +epoch: 1, batch: 17478, sum loss: 4612.056641, avg loss: 2.548098, ppl: 12.782766 +epoch: 1, batch: 17479, sum loss: 4828.322266, avg loss: 2.862076, ppl: 17.497816 +epoch: 1, batch: 17480, sum loss: 4103.633789, avg loss: 2.703316, ppl: 14.929158 +epoch: 1, batch: 17481, sum loss: 3977.485596, avg loss: 2.446178, ppl: 11.544140 +epoch: 1, batch: 17482, sum loss: 4986.332031, avg loss: 2.679383, ppl: 14.576101 +epoch: 1, batch: 17483, sum loss: 4464.208984, avg loss: 2.847072, ppl: 17.237236 +epoch: 1, batch: 17484, sum loss: 3317.407715, avg loss: 2.336203, ppl: 10.341889 +epoch: 1, batch: 17485, sum loss: 3826.119141, avg loss: 2.437019, ppl: 11.438887 +epoch: 1, batch: 17486, sum loss: 5135.967773, avg loss: 2.950010, ppl: 19.106150 +epoch: 1, batch: 17487, sum loss: 5835.920898, avg loss: 3.069921, ppl: 21.540211 +epoch: 1, batch: 17488, sum loss: 4668.865723, avg loss: 2.480800, ppl: 11.950823 +epoch: 1, batch: 17489, sum loss: 4104.161621, avg loss: 2.693019, ppl: 14.776224 +epoch: 1, batch: 17490, sum loss: 4413.138184, avg loss: 2.548001, ppl: 12.781531 +epoch: 1, batch: 17491, sum loss: 4113.735352, avg loss: 2.633633, ppl: 13.924271 +epoch: 1, batch: 17492, sum loss: 4858.174316, avg loss: 2.909086, ppl: 18.340036 +epoch: 1, batch: 17493, sum loss: 4404.791992, avg loss: 2.687487, ppl: 14.694707 +epoch: 1, batch: 17494, sum loss: 4134.292969, avg loss: 2.793441, ppl: 16.337145 +epoch: 1, batch: 17495, sum loss: 4207.268555, avg loss: 2.634483, ppl: 13.936100 +epoch: 1, batch: 17496, sum loss: 4169.646484, avg loss: 2.690095, ppl: 14.733071 +epoch: 1, batch: 17497, sum loss: 5086.960449, avg loss: 2.931966, ppl: 18.764477 +epoch: 1, batch: 17498, sum loss: 4926.240234, avg loss: 2.694880, ppl: 14.803739 +epoch: 1, batch: 17499, sum loss: 4683.775879, avg loss: 2.947625, ppl: 19.060629 +epoch: 1, batch: 17500, sum loss: 4323.754883, avg loss: 2.644498, ppl: 14.076382 +epoch: 1, batch: 17501, sum loss: 4875.671387, avg loss: 2.689284, ppl: 14.721129 +epoch: 1, batch: 17502, sum loss: 4716.210938, avg loss: 2.658518, ppl: 14.275115 +epoch: 1, batch: 17503, sum loss: 3931.824219, avg loss: 2.355797, ppl: 10.546527 +epoch: 1, batch: 17504, sum loss: 5218.480957, avg loss: 2.777265, ppl: 16.074997 +epoch: 1, batch: 17505, sum loss: 5656.095703, avg loss: 3.060658, ppl: 21.341589 +epoch: 1, batch: 17506, sum loss: 4746.096680, avg loss: 2.785268, ppl: 16.204161 +epoch: 1, batch: 17507, sum loss: 3905.398438, avg loss: 2.498655, ppl: 12.166123 +epoch: 1, batch: 17508, sum loss: 4682.149902, avg loss: 2.780374, ppl: 16.125055 +epoch: 1, batch: 17509, sum loss: 4935.415527, avg loss: 2.717740, ppl: 15.146050 +epoch: 1, batch: 17510, sum loss: 5091.049316, avg loss: 2.839403, ppl: 17.105549 +epoch: 1, batch: 17511, sum loss: 4807.250488, avg loss: 2.748571, ppl: 15.620294 +epoch: 1, batch: 17512, sum loss: 5155.621094, avg loss: 2.859468, ppl: 17.452244 +epoch: 1, batch: 17513, sum loss: 4611.479980, avg loss: 2.907617, ppl: 18.313099 +epoch: 1, batch: 17514, sum loss: 3987.076172, avg loss: 2.675890, ppl: 14.525274 +epoch: 1, batch: 17515, sum loss: 5932.228516, avg loss: 2.900845, ppl: 18.189514 +epoch: 1, batch: 17516, sum loss: 4029.274902, avg loss: 2.577911, ppl: 13.169597 +epoch: 1, batch: 17517, sum loss: 4104.108887, avg loss: 2.544395, ppl: 12.735520 +epoch: 1, batch: 17518, sum loss: 4414.875000, avg loss: 2.678929, ppl: 14.569483 +epoch: 1, batch: 17519, sum loss: 4580.168457, avg loss: 2.538896, ppl: 12.665682 +epoch: 1, batch: 17520, sum loss: 5223.203125, avg loss: 2.784224, ppl: 16.187244 +epoch: 1, batch: 17521, sum loss: 4430.126953, avg loss: 2.772295, ppl: 15.995297 +epoch: 1, batch: 17522, sum loss: 3666.806152, avg loss: 2.499527, ppl: 12.176733 +epoch: 1, batch: 17523, sum loss: 4805.743164, avg loss: 2.646334, ppl: 14.102247 +epoch: 1, batch: 17524, sum loss: 4963.781250, avg loss: 2.856031, ppl: 17.392355 +epoch: 1, batch: 17525, sum loss: 4686.104980, avg loss: 2.819558, ppl: 16.769436 +epoch: 1, batch: 17526, sum loss: 4218.705078, avg loss: 2.840879, ppl: 17.130812 +epoch: 1, batch: 17527, sum loss: 5400.018555, avg loss: 2.892350, ppl: 18.035652 +epoch: 1, batch: 17528, sum loss: 3875.940186, avg loss: 2.380799, ppl: 10.813535 +epoch: 1, batch: 17529, sum loss: 5519.573730, avg loss: 2.996511, ppl: 20.015591 +epoch: 1, batch: 17530, sum loss: 4994.502930, avg loss: 2.701191, ppl: 14.897470 +epoch: 1, batch: 17531, sum loss: 5264.864258, avg loss: 2.951157, ppl: 19.128078 +epoch: 1, batch: 17532, sum loss: 4155.259766, avg loss: 2.553940, ppl: 12.857658 +epoch: 1, batch: 17533, sum loss: 5404.197266, avg loss: 3.030958, ppl: 20.717064 +epoch: 1, batch: 17534, sum loss: 4124.950684, avg loss: 2.504524, ppl: 12.237729 +epoch: 1, batch: 17535, sum loss: 4709.809570, avg loss: 2.765596, ppl: 15.888501 +epoch: 1, batch: 17536, sum loss: 4105.684570, avg loss: 2.748116, ppl: 15.613182 +epoch: 1, batch: 17537, sum loss: 4534.725098, avg loss: 2.559100, ppl: 12.924179 +epoch: 1, batch: 17538, sum loss: 5129.117676, avg loss: 2.886391, ppl: 17.928495 +epoch: 1, batch: 17539, sum loss: 3873.465088, avg loss: 2.546657, ppl: 12.764362 +epoch: 1, batch: 17540, sum loss: 4231.928223, avg loss: 2.704108, ppl: 14.940976 +epoch: 1, batch: 17541, sum loss: 3937.406738, avg loss: 2.583600, ppl: 13.244738 +epoch: 1, batch: 17542, sum loss: 4746.149414, avg loss: 2.704359, ppl: 14.944728 +epoch: 1, batch: 17543, sum loss: 4614.866699, avg loss: 2.730690, ppl: 15.343474 +epoch: 1, batch: 17544, sum loss: 3743.676514, avg loss: 2.339798, ppl: 10.379137 +epoch: 1, batch: 17545, sum loss: 5033.193359, avg loss: 2.882700, ppl: 17.862432 +epoch: 1, batch: 17546, sum loss: 3820.131348, avg loss: 2.538293, ppl: 12.658050 +epoch: 1, batch: 17547, sum loss: 4497.114746, avg loss: 2.498397, ppl: 12.162982 +epoch: 1, batch: 17548, sum loss: 4101.834473, avg loss: 2.670465, ppl: 14.446689 +epoch: 1, batch: 17549, sum loss: 4847.862793, avg loss: 2.875364, ppl: 17.731871 +epoch: 1, batch: 17550, sum loss: 5310.521973, avg loss: 3.052024, ppl: 21.158129 +epoch: 1, batch: 17551, sum loss: 4747.384277, avg loss: 2.926871, ppl: 18.669115 +epoch: 1, batch: 17552, sum loss: 3219.953613, avg loss: 2.318181, ppl: 10.157182 +epoch: 1, batch: 17553, sum loss: 4398.729980, avg loss: 2.688710, ppl: 14.712691 +epoch: 1, batch: 17554, sum loss: 5495.641602, avg loss: 2.887883, ppl: 17.955261 +epoch: 1, batch: 17555, sum loss: 4778.494629, avg loss: 2.765332, ppl: 15.884320 +epoch: 1, batch: 17556, sum loss: 5025.657227, avg loss: 3.056969, ppl: 21.263014 +epoch: 1, batch: 17557, sum loss: 3762.930908, avg loss: 2.568553, ppl: 13.046938 +epoch: 1, batch: 17558, sum loss: 4428.831055, avg loss: 2.750827, ppl: 15.655567 +epoch: 1, batch: 17559, sum loss: 4392.422363, avg loss: 2.634926, ppl: 13.942286 +epoch: 1, batch: 17560, sum loss: 4289.523438, avg loss: 2.576290, ppl: 13.148273 +epoch: 1, batch: 17561, sum loss: 5061.049805, avg loss: 2.779269, ppl: 16.107250 +epoch: 1, batch: 17562, sum loss: 5094.775391, avg loss: 2.708546, ppl: 15.007442 +epoch: 1, batch: 17563, sum loss: 4898.255371, avg loss: 2.706218, ppl: 14.972549 +epoch: 1, batch: 17564, sum loss: 4004.412598, avg loss: 2.461225, ppl: 11.719153 +epoch: 1, batch: 17565, sum loss: 5189.477539, avg loss: 2.669485, ppl: 14.432528 +epoch: 1, batch: 17566, sum loss: 4150.384766, avg loss: 2.623505, ppl: 13.783950 +epoch: 1, batch: 17567, sum loss: 5271.143066, avg loss: 2.888298, ppl: 17.962704 +epoch: 1, batch: 17568, sum loss: 4617.933105, avg loss: 2.833088, ppl: 16.997864 +epoch: 1, batch: 17569, sum loss: 4880.668457, avg loss: 2.646783, ppl: 14.108583 +epoch: 1, batch: 17570, sum loss: 4207.214844, avg loss: 2.495383, ppl: 12.126371 +epoch: 1, batch: 17571, sum loss: 4670.402344, avg loss: 2.601896, ppl: 13.489284 +epoch: 1, batch: 17572, sum loss: 4661.692383, avg loss: 2.801498, ppl: 16.469297 +epoch: 1, batch: 17573, sum loss: 5834.687988, avg loss: 2.872815, ppl: 17.686743 +epoch: 1, batch: 17574, sum loss: 4685.674316, avg loss: 2.838083, ppl: 17.082983 +epoch: 1, batch: 17575, sum loss: 4501.916504, avg loss: 2.614353, ppl: 13.658382 +epoch: 1, batch: 17576, sum loss: 5735.138672, avg loss: 2.853303, ppl: 17.344976 +epoch: 1, batch: 17577, sum loss: 4187.982422, avg loss: 2.705415, ppl: 14.960525 +epoch: 1, batch: 17578, sum loss: 3945.094238, avg loss: 2.573447, ppl: 13.110940 +epoch: 1, batch: 17579, sum loss: 4816.502930, avg loss: 2.838246, ppl: 17.085768 +epoch: 1, batch: 17580, sum loss: 4534.310547, avg loss: 2.761456, ppl: 15.822865 +epoch: 1, batch: 17581, sum loss: 4362.445312, avg loss: 2.873811, ppl: 17.704365 +epoch: 1, batch: 17582, sum loss: 5111.451660, avg loss: 2.759963, ppl: 15.799259 +epoch: 1, batch: 17583, sum loss: 4299.924805, avg loss: 2.607595, ppl: 13.566390 +epoch: 1, batch: 17584, sum loss: 5055.839844, avg loss: 2.801019, ppl: 16.461416 +epoch: 1, batch: 17585, sum loss: 5109.447754, avg loss: 3.104160, ppl: 22.290495 +epoch: 1, batch: 17586, sum loss: 4728.333496, avg loss: 3.046607, ppl: 21.043818 +epoch: 1, batch: 17587, sum loss: 3974.531982, avg loss: 2.658550, ppl: 14.275572 +epoch: 1, batch: 17588, sum loss: 5779.790039, avg loss: 3.077631, ppl: 21.706907 +epoch: 1, batch: 17589, sum loss: 4893.615723, avg loss: 3.011456, ppl: 20.316956 +epoch: 1, batch: 17590, sum loss: 4449.144043, avg loss: 2.642010, ppl: 14.041395 +epoch: 1, batch: 17591, sum loss: 4232.459473, avg loss: 2.447923, ppl: 11.564307 +epoch: 1, batch: 17592, sum loss: 4727.855957, avg loss: 2.822601, ppl: 16.820541 +epoch: 1, batch: 17593, sum loss: 4377.122070, avg loss: 2.708615, ppl: 15.008475 +epoch: 1, batch: 17594, sum loss: 4531.860352, avg loss: 2.665800, ppl: 14.379454 +epoch: 1, batch: 17595, sum loss: 5631.404785, avg loss: 3.024385, ppl: 20.581343 +epoch: 1, batch: 17596, sum loss: 3547.928223, avg loss: 2.411916, ppl: 11.155312 +epoch: 1, batch: 17597, sum loss: 4143.913574, avg loss: 2.561133, ppl: 12.950483 +epoch: 1, batch: 17598, sum loss: 5113.518555, avg loss: 3.125623, ppl: 22.774073 +epoch: 1, batch: 17599, sum loss: 4493.144531, avg loss: 2.936696, ppl: 18.853449 +epoch: 1, batch: 17600, sum loss: 4493.269531, avg loss: 2.883998, ppl: 17.885645 +epoch: 1, batch: 17601, sum loss: 4154.156250, avg loss: 2.601225, ppl: 13.480243 +epoch: 1, batch: 17602, sum loss: 5352.934570, avg loss: 2.895043, ppl: 18.084278 +epoch: 1, batch: 17603, sum loss: 4305.624023, avg loss: 2.635021, ppl: 13.943604 +epoch: 1, batch: 17604, sum loss: 5075.725586, avg loss: 2.816718, ppl: 16.721876 +epoch: 1, batch: 17605, sum loss: 4349.462402, avg loss: 2.368988, ppl: 10.686575 +epoch: 1, batch: 17606, sum loss: 5879.192871, avg loss: 3.110684, ppl: 22.436390 +epoch: 1, batch: 17607, sum loss: 4093.469238, avg loss: 2.634150, ppl: 13.931466 +epoch: 1, batch: 17608, sum loss: 5003.612305, avg loss: 2.862478, ppl: 17.504858 +epoch: 1, batch: 17609, sum loss: 4432.031250, avg loss: 2.937065, ppl: 18.860413 +epoch: 1, batch: 17610, sum loss: 5022.577148, avg loss: 2.762694, ppl: 15.842460 +epoch: 1, batch: 17611, sum loss: 4846.820312, avg loss: 2.722933, ppl: 15.224909 +epoch: 1, batch: 17612, sum loss: 5339.106934, avg loss: 2.861258, ppl: 17.483500 +epoch: 1, batch: 17613, sum loss: 4060.247070, avg loss: 2.726828, ppl: 15.284329 +epoch: 1, batch: 17614, sum loss: 4443.719727, avg loss: 2.577564, ppl: 13.165026 +epoch: 1, batch: 17615, sum loss: 4084.451416, avg loss: 2.591657, ppl: 13.351876 +epoch: 1, batch: 17616, sum loss: 4251.241211, avg loss: 2.553298, ppl: 12.849411 +epoch: 1, batch: 17617, sum loss: 5540.668945, avg loss: 2.926925, ppl: 18.670130 +epoch: 1, batch: 17618, sum loss: 4308.992676, avg loss: 2.718607, ppl: 15.159197 +epoch: 1, batch: 17619, sum loss: 4813.461914, avg loss: 2.647669, ppl: 14.121081 +epoch: 1, batch: 17620, sum loss: 4714.139160, avg loss: 2.826223, ppl: 16.881573 +epoch: 1, batch: 17621, sum loss: 4548.719238, avg loss: 2.860830, ppl: 17.476019 +epoch: 1, batch: 17622, sum loss: 4534.144043, avg loss: 2.741321, ppl: 15.507451 +epoch: 1, batch: 17623, sum loss: 4336.870117, avg loss: 2.514128, ppl: 12.355826 +epoch: 1, batch: 17624, sum loss: 5472.978027, avg loss: 2.863934, ppl: 17.530357 +epoch: 1, batch: 17625, sum loss: 5580.304688, avg loss: 2.794344, ppl: 16.351898 +epoch: 1, batch: 17626, sum loss: 5258.255859, avg loss: 2.758791, ppl: 15.780752 +epoch: 1, batch: 17627, sum loss: 4633.188477, avg loss: 2.534567, ppl: 12.610970 +epoch: 1, batch: 17628, sum loss: 4762.624512, avg loss: 2.752962, ppl: 15.689035 +epoch: 1, batch: 17629, sum loss: 4257.354492, avg loss: 2.489681, ppl: 12.057429 +epoch: 1, batch: 17630, sum loss: 4252.672363, avg loss: 2.722582, ppl: 15.219563 +epoch: 1, batch: 17631, sum loss: 4751.628906, avg loss: 2.670955, ppl: 14.453765 +epoch: 1, batch: 17632, sum loss: 4312.714844, avg loss: 2.678705, ppl: 14.566214 +epoch: 1, batch: 17633, sum loss: 4375.427734, avg loss: 2.656605, ppl: 14.247828 +epoch: 1, batch: 17634, sum loss: 5274.759277, avg loss: 2.919070, ppl: 18.524048 +epoch: 1, batch: 17635, sum loss: 4249.071289, avg loss: 2.594060, ppl: 13.384007 +epoch: 1, batch: 17636, sum loss: 3694.666016, avg loss: 2.388278, ppl: 10.894717 +epoch: 1, batch: 17637, sum loss: 5470.277344, avg loss: 2.886690, ppl: 17.933849 +epoch: 1, batch: 17638, sum loss: 3760.000732, avg loss: 2.473685, ppl: 11.866091 +epoch: 1, batch: 17639, sum loss: 5434.738281, avg loss: 3.067008, ppl: 21.477547 +epoch: 1, batch: 17640, sum loss: 4252.937988, avg loss: 2.707154, ppl: 14.986556 +epoch: 1, batch: 17641, sum loss: 4674.128418, avg loss: 2.841415, ppl: 17.140009 +epoch: 1, batch: 17642, sum loss: 4350.918945, avg loss: 2.545886, ppl: 12.754521 +epoch: 1, batch: 17643, sum loss: 5799.490234, avg loss: 2.918717, ppl: 18.517508 +epoch: 1, batch: 17644, sum loss: 4714.068359, avg loss: 2.745526, ppl: 15.572804 +epoch: 1, batch: 17645, sum loss: 3775.482178, avg loss: 2.462806, ppl: 11.737707 +epoch: 1, batch: 17646, sum loss: 4475.547363, avg loss: 2.838014, ppl: 17.081800 +epoch: 1, batch: 17647, sum loss: 5358.300781, avg loss: 2.890130, ppl: 17.995646 +epoch: 1, batch: 17648, sum loss: 4722.578125, avg loss: 2.547237, ppl: 12.771771 +epoch: 1, batch: 17649, sum loss: 3752.419678, avg loss: 2.486693, ppl: 12.021457 +epoch: 1, batch: 17650, sum loss: 4629.558594, avg loss: 2.614093, ppl: 13.654823 +epoch: 1, batch: 17651, sum loss: 5422.035645, avg loss: 2.819571, ppl: 16.769659 +epoch: 1, batch: 17652, sum loss: 3976.857422, avg loss: 2.667242, ppl: 14.400196 +epoch: 1, batch: 17653, sum loss: 6016.314941, avg loss: 2.829875, ppl: 16.943346 +epoch: 1, batch: 17654, sum loss: 4411.959961, avg loss: 2.592221, ppl: 13.359407 +epoch: 1, batch: 17655, sum loss: 3617.694824, avg loss: 2.522800, ppl: 12.463442 +epoch: 1, batch: 17656, sum loss: 4805.435547, avg loss: 2.665244, ppl: 14.371461 +epoch: 1, batch: 17657, sum loss: 4959.235840, avg loss: 2.644926, ppl: 14.082400 +epoch: 1, batch: 17658, sum loss: 4618.065918, avg loss: 2.760350, ppl: 15.805377 +epoch: 1, batch: 17659, sum loss: 4008.634766, avg loss: 2.601321, ppl: 13.481528 +epoch: 1, batch: 17660, sum loss: 3392.659180, avg loss: 2.439007, ppl: 11.461657 +epoch: 1, batch: 17661, sum loss: 4322.269043, avg loss: 2.630718, ppl: 13.883728 +epoch: 1, batch: 17662, sum loss: 4892.928223, avg loss: 2.759689, ppl: 15.794928 +epoch: 1, batch: 17663, sum loss: 4921.244629, avg loss: 2.864520, ppl: 17.540625 +epoch: 1, batch: 17664, sum loss: 4630.475098, avg loss: 2.645986, ppl: 14.097336 +epoch: 1, batch: 17665, sum loss: 4281.045898, avg loss: 2.640990, ppl: 14.027090 +epoch: 1, batch: 17666, sum loss: 3576.931152, avg loss: 2.206620, ppl: 9.084957 +epoch: 1, batch: 17667, sum loss: 4945.177734, avg loss: 2.896999, ppl: 18.119694 +epoch: 1, batch: 17668, sum loss: 4212.073730, avg loss: 2.654111, ppl: 14.212344 +epoch: 1, batch: 17669, sum loss: 5096.779785, avg loss: 2.711053, ppl: 15.045108 +epoch: 1, batch: 17670, sum loss: 4639.931152, avg loss: 2.618471, ppl: 13.714743 +epoch: 1, batch: 17671, sum loss: 5681.444336, avg loss: 2.915056, ppl: 18.449850 +epoch: 1, batch: 17672, sum loss: 4691.764648, avg loss: 2.682541, ppl: 14.622203 +epoch: 1, batch: 17673, sum loss: 5070.452637, avg loss: 2.717284, ppl: 15.139152 +epoch: 1, batch: 17674, sum loss: 4428.212891, avg loss: 2.806219, ppl: 16.547228 +epoch: 1, batch: 17675, sum loss: 5593.795410, avg loss: 3.080284, ppl: 21.764574 +epoch: 1, batch: 17676, sum loss: 4467.324707, avg loss: 2.665468, ppl: 14.374679 +epoch: 1, batch: 17677, sum loss: 5120.238281, avg loss: 2.891156, ppl: 18.014130 +epoch: 1, batch: 17678, sum loss: 4738.504883, avg loss: 2.656113, ppl: 14.240823 +epoch: 1, batch: 17679, sum loss: 5283.034668, avg loss: 2.640197, ppl: 14.015968 +epoch: 1, batch: 17680, sum loss: 4470.459473, avg loss: 2.783599, ppl: 16.177132 +epoch: 1, batch: 17681, sum loss: 5057.368652, avg loss: 2.769643, ppl: 15.952943 +epoch: 1, batch: 17682, sum loss: 4676.509277, avg loss: 2.555470, ppl: 12.877347 +epoch: 1, batch: 17683, sum loss: 4756.571777, avg loss: 2.779995, ppl: 16.118948 +epoch: 1, batch: 17684, sum loss: 5544.308594, avg loss: 2.834513, ppl: 17.022116 +epoch: 1, batch: 17685, sum loss: 4523.412598, avg loss: 2.659266, ppl: 14.285807 +epoch: 1, batch: 17686, sum loss: 4658.504883, avg loss: 2.828479, ppl: 16.919703 +epoch: 1, batch: 17687, sum loss: 4272.486328, avg loss: 2.609949, ppl: 13.598355 +epoch: 1, batch: 17688, sum loss: 4499.096680, avg loss: 2.621851, ppl: 13.761175 +epoch: 1, batch: 17689, sum loss: 5021.208008, avg loss: 2.730402, ppl: 15.339045 +epoch: 1, batch: 17690, sum loss: 4535.642578, avg loss: 2.933792, ppl: 18.798782 +epoch: 1, batch: 17691, sum loss: 5223.062988, avg loss: 3.036664, ppl: 20.835630 +epoch: 1, batch: 17692, sum loss: 4028.942383, avg loss: 2.700364, ppl: 14.885143 +epoch: 1, batch: 17693, sum loss: 4359.963867, avg loss: 2.754241, ppl: 15.709113 +epoch: 1, batch: 17694, sum loss: 4381.597656, avg loss: 2.709708, ppl: 15.024887 +epoch: 1, batch: 17695, sum loss: 4608.348145, avg loss: 2.606532, ppl: 13.551970 +epoch: 1, batch: 17696, sum loss: 4473.574707, avg loss: 2.579916, ppl: 13.196030 +epoch: 1, batch: 17697, sum loss: 5100.329102, avg loss: 2.660579, ppl: 14.304565 +epoch: 1, batch: 17698, sum loss: 5221.560547, avg loss: 2.796765, ppl: 16.391535 +epoch: 1, batch: 17699, sum loss: 5216.993652, avg loss: 2.965886, ppl: 19.411898 +epoch: 1, batch: 17700, sum loss: 4673.183105, avg loss: 2.679577, ppl: 14.578930 +epoch: 1, batch: 17701, sum loss: 4302.831055, avg loss: 2.501646, ppl: 12.202563 +epoch: 1, batch: 17702, sum loss: 4679.080566, avg loss: 2.739508, ppl: 15.479373 +epoch: 1, batch: 17703, sum loss: 5631.159180, avg loss: 2.858457, ppl: 17.434597 +epoch: 1, batch: 17704, sum loss: 4178.210938, avg loss: 2.649468, ppl: 14.146510 +epoch: 1, batch: 17705, sum loss: 3834.068604, avg loss: 2.542486, ppl: 12.711228 +epoch: 1, batch: 17706, sum loss: 4090.187500, avg loss: 2.754335, ppl: 15.710588 +epoch: 1, batch: 17707, sum loss: 4054.372559, avg loss: 2.624189, ppl: 13.793388 +epoch: 1, batch: 17708, sum loss: 4756.273438, avg loss: 2.806061, ppl: 16.544621 +epoch: 1, batch: 17709, sum loss: 3204.403564, avg loss: 2.280714, ppl: 9.783666 +epoch: 1, batch: 17710, sum loss: 4008.931396, avg loss: 2.559982, ppl: 12.935579 +epoch: 1, batch: 17711, sum loss: 4694.391113, avg loss: 2.566644, ppl: 13.022045 +epoch: 1, batch: 17712, sum loss: 5527.313965, avg loss: 3.027007, ppl: 20.635370 +epoch: 1, batch: 17713, sum loss: 4579.005859, avg loss: 2.746854, ppl: 15.593498 +epoch: 1, batch: 17714, sum loss: 4897.738281, avg loss: 2.964733, ppl: 19.389519 +epoch: 1, batch: 17715, sum loss: 3601.207764, avg loss: 2.364549, ppl: 10.639238 +epoch: 1, batch: 17716, sum loss: 4451.361816, avg loss: 2.764821, ppl: 15.876199 +epoch: 1, batch: 17717, sum loss: 4572.059570, avg loss: 2.544274, ppl: 12.733975 +epoch: 1, batch: 17718, sum loss: 4587.288574, avg loss: 2.648550, ppl: 14.133531 +epoch: 1, batch: 17719, sum loss: 4484.560059, avg loss: 2.816935, ppl: 16.725502 +epoch: 1, batch: 17720, sum loss: 4939.348633, avg loss: 2.709462, ppl: 15.021187 +epoch: 1, batch: 17721, sum loss: 4505.242188, avg loss: 2.772457, ppl: 15.997887 +epoch: 1, batch: 17722, sum loss: 5286.695312, avg loss: 2.804613, ppl: 16.520679 +epoch: 1, batch: 17723, sum loss: 4708.522949, avg loss: 2.639307, ppl: 14.003492 +epoch: 1, batch: 17724, sum loss: 4677.418457, avg loss: 2.537937, ppl: 12.653544 +epoch: 1, batch: 17725, sum loss: 4514.099121, avg loss: 2.807276, ppl: 16.564726 +epoch: 1, batch: 17726, sum loss: 5246.472656, avg loss: 2.826763, ppl: 16.890703 +epoch: 1, batch: 17727, sum loss: 4477.744141, avg loss: 2.853884, ppl: 17.355059 +epoch: 1, batch: 17728, sum loss: 3887.364014, avg loss: 2.487117, ppl: 12.026554 +epoch: 1, batch: 17729, sum loss: 5412.041992, avg loss: 2.923848, ppl: 18.612766 +epoch: 1, batch: 17730, sum loss: 5152.212402, avg loss: 2.673696, ppl: 14.493443 +epoch: 1, batch: 17731, sum loss: 4327.244629, avg loss: 2.627349, ppl: 13.837045 +epoch: 1, batch: 17732, sum loss: 4749.092285, avg loss: 2.726230, ppl: 15.275186 +epoch: 1, batch: 17733, sum loss: 5174.509766, avg loss: 2.824514, ppl: 16.852755 +epoch: 1, batch: 17734, sum loss: 3873.212646, avg loss: 2.556576, ppl: 12.891601 +epoch: 1, batch: 17735, sum loss: 4806.111328, avg loss: 2.760546, ppl: 15.808479 +epoch: 1, batch: 17736, sum loss: 4744.728027, avg loss: 2.659601, ppl: 14.290586 +epoch: 1, batch: 17737, sum loss: 4923.384277, avg loss: 2.855792, ppl: 17.388195 +epoch: 1, batch: 17738, sum loss: 4029.211914, avg loss: 2.239695, ppl: 9.390470 +epoch: 1, batch: 17739, sum loss: 4344.073242, avg loss: 2.534465, ppl: 12.609684 +epoch: 1, batch: 17740, sum loss: 4051.084961, avg loss: 2.531928, ppl: 12.577733 +epoch: 1, batch: 17741, sum loss: 5681.746582, avg loss: 2.931758, ppl: 18.760582 +epoch: 1, batch: 17742, sum loss: 5819.886719, avg loss: 2.920164, ppl: 18.544327 +epoch: 1, batch: 17743, sum loss: 5638.932617, avg loss: 2.797090, ppl: 16.396860 +epoch: 1, batch: 17744, sum loss: 3804.593262, avg loss: 2.464115, ppl: 11.753075 +epoch: 1, batch: 17745, sum loss: 4301.017578, avg loss: 2.569306, ppl: 13.056755 +epoch: 1, batch: 17746, sum loss: 5355.600586, avg loss: 2.808390, ppl: 16.583204 +epoch: 1, batch: 17747, sum loss: 4635.176758, avg loss: 2.680843, ppl: 14.597388 +epoch: 1, batch: 17748, sum loss: 4167.983398, avg loss: 2.469185, ppl: 11.812811 +epoch: 1, batch: 17749, sum loss: 5052.086914, avg loss: 2.800492, ppl: 16.452732 +epoch: 1, batch: 17750, sum loss: 5069.652344, avg loss: 2.768789, ppl: 15.939321 +epoch: 1, batch: 17751, sum loss: 4639.314453, avg loss: 2.745157, ppl: 15.567050 +epoch: 1, batch: 17752, sum loss: 5045.166504, avg loss: 2.658149, ppl: 14.269851 +epoch: 1, batch: 17753, sum loss: 5095.598145, avg loss: 2.752889, ppl: 15.687895 +epoch: 1, batch: 17754, sum loss: 3997.476074, avg loss: 2.393698, ppl: 10.953929 +epoch: 1, batch: 17755, sum loss: 4744.995605, avg loss: 2.640510, ppl: 14.020347 +epoch: 1, batch: 17756, sum loss: 4029.513672, avg loss: 2.663261, ppl: 14.342984 +epoch: 1, batch: 17757, sum loss: 4000.157471, avg loss: 2.562561, ppl: 12.968986 +epoch: 1, batch: 17758, sum loss: 5343.620117, avg loss: 3.013886, ppl: 20.366394 +epoch: 1, batch: 17759, sum loss: 3647.002197, avg loss: 2.509981, ppl: 12.304695 +epoch: 1, batch: 17760, sum loss: 4156.629395, avg loss: 2.634112, ppl: 13.930942 +epoch: 1, batch: 17761, sum loss: 4168.774902, avg loss: 2.655271, ppl: 14.228835 +epoch: 1, batch: 17762, sum loss: 4173.018066, avg loss: 2.497318, ppl: 12.149864 +epoch: 1, batch: 17763, sum loss: 4869.730957, avg loss: 2.776357, ppl: 16.060413 +epoch: 1, batch: 17764, sum loss: 4004.461670, avg loss: 2.485699, ppl: 12.009517 +epoch: 1, batch: 17765, sum loss: 3417.216797, avg loss: 2.396365, ppl: 10.983182 +epoch: 1, batch: 17766, sum loss: 4445.037598, avg loss: 2.610122, ppl: 13.600710 +epoch: 1, batch: 17767, sum loss: 4770.868652, avg loss: 2.643141, ppl: 14.057282 +epoch: 1, batch: 17768, sum loss: 3736.040527, avg loss: 2.662894, ppl: 14.337726 +epoch: 1, batch: 17769, sum loss: 5292.587402, avg loss: 2.690690, ppl: 14.741845 +epoch: 1, batch: 17770, sum loss: 4441.805664, avg loss: 2.636086, ppl: 13.958469 +epoch: 1, batch: 17771, sum loss: 3891.500000, avg loss: 2.399199, ppl: 11.014345 +epoch: 1, batch: 17772, sum loss: 5075.421875, avg loss: 2.878855, ppl: 17.793892 +epoch: 1, batch: 17773, sum loss: 5013.166504, avg loss: 2.827505, ppl: 16.903236 +epoch: 1, batch: 17774, sum loss: 4676.270508, avg loss: 2.649445, ppl: 14.146186 +epoch: 1, batch: 17775, sum loss: 3992.906982, avg loss: 2.489344, ppl: 12.053362 +epoch: 1, batch: 17776, sum loss: 5047.642578, avg loss: 2.892632, ppl: 18.040726 +epoch: 1, batch: 17777, sum loss: 3925.759277, avg loss: 2.636507, ppl: 13.964345 +epoch: 1, batch: 17778, sum loss: 5374.156250, avg loss: 2.712850, ppl: 15.072171 +epoch: 1, batch: 17779, sum loss: 4532.084473, avg loss: 2.736766, ppl: 15.436979 +epoch: 1, batch: 17780, sum loss: 4896.579102, avg loss: 2.933840, ppl: 18.799688 +epoch: 1, batch: 17781, sum loss: 4535.117676, avg loss: 2.616917, ppl: 13.693443 +epoch: 1, batch: 17782, sum loss: 4334.181641, avg loss: 2.731054, ppl: 15.349051 +epoch: 1, batch: 17783, sum loss: 4428.391602, avg loss: 2.658098, ppl: 14.269127 +epoch: 1, batch: 17784, sum loss: 3759.217773, avg loss: 2.528055, ppl: 12.529113 +epoch: 1, batch: 17785, sum loss: 4120.739746, avg loss: 2.660258, ppl: 14.299979 +epoch: 1, batch: 17786, sum loss: 5141.117188, avg loss: 2.718729, ppl: 15.161044 +epoch: 1, batch: 17787, sum loss: 3895.068604, avg loss: 2.501650, ppl: 12.202610 +epoch: 1, batch: 17788, sum loss: 4348.877930, avg loss: 2.736865, ppl: 15.438502 +epoch: 1, batch: 17789, sum loss: 3516.755371, avg loss: 2.338268, ppl: 10.363273 +epoch: 1, batch: 17790, sum loss: 3976.529785, avg loss: 2.471429, ppl: 11.839357 +epoch: 1, batch: 17791, sum loss: 4385.841309, avg loss: 2.677559, ppl: 14.549530 +epoch: 1, batch: 17792, sum loss: 5327.818848, avg loss: 3.040993, ppl: 20.926006 +epoch: 1, batch: 17793, sum loss: 5018.012695, avg loss: 2.751103, ppl: 15.659902 +epoch: 1, batch: 17794, sum loss: 4339.927734, avg loss: 2.683938, ppl: 14.642643 +epoch: 1, batch: 17795, sum loss: 4124.798828, avg loss: 2.713684, ppl: 15.084740 +epoch: 1, batch: 17796, sum loss: 6538.766602, avg loss: 2.969467, ppl: 19.481531 +epoch: 1, batch: 17797, sum loss: 4783.192383, avg loss: 2.859051, ppl: 17.444967 +epoch: 1, batch: 17798, sum loss: 4880.011719, avg loss: 2.964770, ppl: 19.390249 +epoch: 1, batch: 17799, sum loss: 5864.030273, avg loss: 3.022696, ppl: 20.546612 +epoch: 1, batch: 17800, sum loss: 4970.589355, avg loss: 2.841961, ppl: 17.149357 +epoch: 1, batch: 17801, sum loss: 4608.025391, avg loss: 2.662060, ppl: 14.325764 +epoch: 1, batch: 17802, sum loss: 3601.817627, avg loss: 2.438604, ppl: 11.457034 +epoch: 1, batch: 17803, sum loss: 5512.977051, avg loss: 2.785739, ppl: 16.211790 +epoch: 1, batch: 17804, sum loss: 3543.986816, avg loss: 2.586852, ppl: 13.287873 +epoch: 1, batch: 17805, sum loss: 4951.632812, avg loss: 2.656455, ppl: 14.245703 +epoch: 1, batch: 17806, sum loss: 4870.326660, avg loss: 2.799038, ppl: 16.428841 +epoch: 1, batch: 17807, sum loss: 5877.890625, avg loss: 2.891240, ppl: 18.015629 +epoch: 1, batch: 17808, sum loss: 3701.799316, avg loss: 2.403766, ppl: 11.064764 +epoch: 1, batch: 17809, sum loss: 4384.058594, avg loss: 2.794174, ppl: 16.349113 +epoch: 1, batch: 17810, sum loss: 5161.117188, avg loss: 2.976423, ppl: 19.617516 +epoch: 1, batch: 17811, sum loss: 4314.790527, avg loss: 2.684997, ppl: 14.658162 +epoch: 1, batch: 17812, sum loss: 4453.440918, avg loss: 2.687653, ppl: 14.697139 +epoch: 1, batch: 17813, sum loss: 5179.766602, avg loss: 2.795341, ppl: 16.368206 +epoch: 1, batch: 17814, sum loss: 5495.085938, avg loss: 2.859046, ppl: 17.444876 +epoch: 1, batch: 17815, sum loss: 4095.749023, avg loss: 2.483778, ppl: 11.986458 +epoch: 1, batch: 17816, sum loss: 4016.562500, avg loss: 2.515067, ppl: 12.367441 +epoch: 1, batch: 17817, sum loss: 4742.282715, avg loss: 2.717640, ppl: 15.144545 +epoch: 1, batch: 17818, sum loss: 4946.818848, avg loss: 2.813890, ppl: 16.674656 +epoch: 1, batch: 17819, sum loss: 4107.642090, avg loss: 2.554504, ppl: 12.864913 +epoch: 1, batch: 17820, sum loss: 4249.599609, avg loss: 2.748771, ppl: 15.623414 +epoch: 1, batch: 17821, sum loss: 5388.501953, avg loss: 2.949372, ppl: 19.093950 +epoch: 1, batch: 17822, sum loss: 4879.660156, avg loss: 2.766247, ppl: 15.898858 +epoch: 1, batch: 17823, sum loss: 4885.487305, avg loss: 2.835454, ppl: 17.038134 +epoch: 1, batch: 17824, sum loss: 4141.578125, avg loss: 2.561273, ppl: 12.952293 +epoch: 1, batch: 17825, sum loss: 4645.790039, avg loss: 2.513956, ppl: 12.353700 +epoch: 1, batch: 17826, sum loss: 3856.056152, avg loss: 2.307634, ppl: 10.050615 +epoch: 1, batch: 17827, sum loss: 4566.217773, avg loss: 2.639432, ppl: 14.005249 +epoch: 1, batch: 17828, sum loss: 5438.369629, avg loss: 2.764804, ppl: 15.875926 +epoch: 1, batch: 17829, sum loss: 5515.647461, avg loss: 2.853413, ppl: 17.346888 +epoch: 1, batch: 17830, sum loss: 4539.204102, avg loss: 2.648310, ppl: 14.130144 +epoch: 1, batch: 17831, sum loss: 4828.248535, avg loss: 2.786064, ppl: 16.217058 +epoch: 1, batch: 17832, sum loss: 4065.467773, avg loss: 2.763744, ppl: 15.859114 +epoch: 1, batch: 17833, sum loss: 4924.106934, avg loss: 2.978891, ppl: 19.665993 +epoch: 1, batch: 17834, sum loss: 5043.043457, avg loss: 2.998242, ppl: 20.050261 +epoch: 1, batch: 17835, sum loss: 4610.710938, avg loss: 2.631685, ppl: 13.897161 +epoch: 1, batch: 17836, sum loss: 4823.542969, avg loss: 2.614386, ppl: 13.658832 +epoch: 1, batch: 17837, sum loss: 4755.314941, avg loss: 2.912012, ppl: 18.393763 +epoch: 1, batch: 17838, sum loss: 4786.442871, avg loss: 2.771536, ppl: 15.983167 +epoch: 1, batch: 17839, sum loss: 4465.240234, avg loss: 2.838678, ppl: 17.093147 +epoch: 1, batch: 17840, sum loss: 5191.242188, avg loss: 2.932905, ppl: 18.782118 +epoch: 1, batch: 17841, sum loss: 4684.274414, avg loss: 2.851049, ppl: 17.305933 +epoch: 1, batch: 17842, sum loss: 4020.703613, avg loss: 2.825512, ppl: 16.869579 +epoch: 1, batch: 17843, sum loss: 5584.931641, avg loss: 2.756630, ppl: 15.746682 +epoch: 1, batch: 17844, sum loss: 4173.029297, avg loss: 2.711520, ppl: 15.052136 +epoch: 1, batch: 17845, sum loss: 4345.843750, avg loss: 2.671078, ppl: 14.455544 +epoch: 1, batch: 17846, sum loss: 4423.870605, avg loss: 2.725737, ppl: 15.267656 +epoch: 1, batch: 17847, sum loss: 4083.417480, avg loss: 2.563351, ppl: 12.979234 +epoch: 1, batch: 17848, sum loss: 4041.590332, avg loss: 2.808610, ppl: 16.586849 +epoch: 1, batch: 17849, sum loss: 4697.324707, avg loss: 2.860734, ppl: 17.474344 +epoch: 1, batch: 17850, sum loss: 4653.522949, avg loss: 2.677516, ppl: 14.548908 +epoch: 1, batch: 17851, sum loss: 5240.584961, avg loss: 2.917920, ppl: 18.502768 +epoch: 1, batch: 17852, sum loss: 3657.961426, avg loss: 2.652619, ppl: 14.191155 +epoch: 1, batch: 17853, sum loss: 4069.539551, avg loss: 2.533960, ppl: 12.603315 +epoch: 1, batch: 17854, sum loss: 5225.302734, avg loss: 2.922429, ppl: 18.586376 +epoch: 1, batch: 17855, sum loss: 3402.409912, avg loss: 2.444260, ppl: 11.522018 +epoch: 1, batch: 17856, sum loss: 4279.089844, avg loss: 2.753597, ppl: 15.699000 +epoch: 1, batch: 17857, sum loss: 4704.905273, avg loss: 2.749799, ppl: 15.639484 +epoch: 1, batch: 17858, sum loss: 5193.625977, avg loss: 2.699390, ppl: 14.870657 +epoch: 1, batch: 17859, sum loss: 4926.250977, avg loss: 2.811787, ppl: 16.639633 +epoch: 1, batch: 17860, sum loss: 3937.829834, avg loss: 2.628725, ppl: 13.856090 +epoch: 1, batch: 17861, sum loss: 3645.959717, avg loss: 2.409755, ppl: 11.131239 +epoch: 1, batch: 17862, sum loss: 4086.812744, avg loss: 2.812672, ppl: 16.654366 +epoch: 1, batch: 17863, sum loss: 5052.956055, avg loss: 2.693473, ppl: 14.782933 +epoch: 1, batch: 17864, sum loss: 4265.942871, avg loss: 2.952210, ppl: 19.148218 +epoch: 1, batch: 17865, sum loss: 4823.321777, avg loss: 2.427439, ppl: 11.329832 +epoch: 1, batch: 17866, sum loss: 5280.175293, avg loss: 2.776118, ppl: 16.056561 +epoch: 1, batch: 17867, sum loss: 5183.636230, avg loss: 2.792907, ppl: 16.328424 +epoch: 1, batch: 17868, sum loss: 4373.182617, avg loss: 2.814146, ppl: 16.678923 +epoch: 1, batch: 17869, sum loss: 4684.857910, avg loss: 2.697097, ppl: 14.836603 +epoch: 1, batch: 17870, sum loss: 4016.757324, avg loss: 2.491785, ppl: 12.082823 +epoch: 1, batch: 17871, sum loss: 5551.668945, avg loss: 3.000902, ppl: 20.103666 +epoch: 1, batch: 17872, sum loss: 3984.680908, avg loss: 2.546122, ppl: 12.757532 +epoch: 1, batch: 17873, sum loss: 5648.804688, avg loss: 2.925326, ppl: 18.640305 +epoch: 1, batch: 17874, sum loss: 4678.201660, avg loss: 2.768167, ppl: 15.929401 +epoch: 1, batch: 17875, sum loss: 4928.303711, avg loss: 2.830732, ppl: 16.957863 +epoch: 1, batch: 17876, sum loss: 5102.780762, avg loss: 2.847534, ppl: 17.245201 +epoch: 1, batch: 17877, sum loss: 3970.505859, avg loss: 2.388993, ppl: 10.902507 +epoch: 1, batch: 17878, sum loss: 4463.968262, avg loss: 2.740312, ppl: 15.491816 +epoch: 1, batch: 17879, sum loss: 6028.890137, avg loss: 2.969897, ppl: 19.489908 +epoch: 1, batch: 17880, sum loss: 4071.672607, avg loss: 2.485759, ppl: 12.010227 +epoch: 1, batch: 17881, sum loss: 4787.441406, avg loss: 2.712431, ppl: 15.065863 +epoch: 1, batch: 17882, sum loss: 5145.390137, avg loss: 2.811689, ppl: 16.637991 +epoch: 1, batch: 17883, sum loss: 4882.397461, avg loss: 2.643421, ppl: 14.061220 +epoch: 1, batch: 17884, sum loss: 4943.327148, avg loss: 2.688052, ppl: 14.703006 +epoch: 1, batch: 17885, sum loss: 4584.437988, avg loss: 2.723968, ppl: 15.240674 +epoch: 1, batch: 17886, sum loss: 4539.983398, avg loss: 2.641061, ppl: 14.028077 +epoch: 1, batch: 17887, sum loss: 4157.098145, avg loss: 2.706444, ppl: 14.975930 +epoch: 1, batch: 17888, sum loss: 4364.416016, avg loss: 2.543366, ppl: 12.722422 +epoch: 1, batch: 17889, sum loss: 3838.816895, avg loss: 2.675134, ppl: 14.514291 +epoch: 1, batch: 17890, sum loss: 4406.027832, avg loss: 2.616406, ppl: 13.686449 +epoch: 1, batch: 17891, sum loss: 5058.793457, avg loss: 2.733006, ppl: 15.379044 +epoch: 1, batch: 17892, sum loss: 4836.812012, avg loss: 2.803949, ppl: 16.509718 +epoch: 1, batch: 17893, sum loss: 5294.780273, avg loss: 2.876035, ppl: 17.743778 +epoch: 1, batch: 17894, sum loss: 4927.076172, avg loss: 2.731195, ppl: 15.351224 +epoch: 1, batch: 17895, sum loss: 4495.191406, avg loss: 2.901996, ppl: 18.210451 +epoch: 1, batch: 17896, sum loss: 5710.207520, avg loss: 2.972518, ppl: 19.541071 +epoch: 1, batch: 17897, sum loss: 5436.388672, avg loss: 3.059307, ppl: 21.312784 +epoch: 1, batch: 17898, sum loss: 4314.598633, avg loss: 2.688223, ppl: 14.705526 +epoch: 1, batch: 17899, sum loss: 6058.238281, avg loss: 2.968270, ppl: 19.458220 +epoch: 1, batch: 17900, sum loss: 4354.125977, avg loss: 2.596378, ppl: 13.415062 +epoch: 1, batch: 17901, sum loss: 3953.006592, avg loss: 2.895976, ppl: 18.101152 +epoch: 1, batch: 17902, sum loss: 4077.025635, avg loss: 2.595179, ppl: 13.398983 +epoch: 1, batch: 17903, sum loss: 3736.732910, avg loss: 2.674827, ppl: 14.509834 +epoch: 1, batch: 17904, sum loss: 5246.074707, avg loss: 2.846487, ppl: 17.227148 +epoch: 1, batch: 17905, sum loss: 5258.216797, avg loss: 2.801394, ppl: 16.467590 +epoch: 1, batch: 17906, sum loss: 3855.813477, avg loss: 2.473261, ppl: 11.861061 +epoch: 1, batch: 17907, sum loss: 4335.922852, avg loss: 2.863886, ppl: 17.529509 +epoch: 1, batch: 17908, sum loss: 4834.658691, avg loss: 2.675517, ppl: 14.519849 +epoch: 1, batch: 17909, sum loss: 5762.580078, avg loss: 2.908925, ppl: 18.337069 +epoch: 1, batch: 17910, sum loss: 4171.558594, avg loss: 2.853323, ppl: 17.345333 +epoch: 1, batch: 17911, sum loss: 4354.532227, avg loss: 2.642313, ppl: 14.045657 +epoch: 1, batch: 17912, sum loss: 3713.893311, avg loss: 2.408491, ppl: 11.117174 +epoch: 1, batch: 17913, sum loss: 4452.864258, avg loss: 2.582868, ppl: 13.235044 +epoch: 1, batch: 17914, sum loss: 4319.494629, avg loss: 2.849271, ppl: 17.275185 +epoch: 1, batch: 17915, sum loss: 4210.425781, avg loss: 2.607075, ppl: 13.559328 +epoch: 1, batch: 17916, sum loss: 4239.388184, avg loss: 2.669640, ppl: 14.434772 +epoch: 1, batch: 17917, sum loss: 4028.529541, avg loss: 2.650348, ppl: 14.158971 +epoch: 1, batch: 17918, sum loss: 4096.024902, avg loss: 2.348638, ppl: 10.471299 +epoch: 1, batch: 17919, sum loss: 4696.595703, avg loss: 2.749763, ppl: 15.638929 +epoch: 1, batch: 17920, sum loss: 5176.833496, avg loss: 2.774294, ppl: 16.027313 +epoch: 1, batch: 17921, sum loss: 4788.121094, avg loss: 2.981395, ppl: 19.715305 +epoch: 1, batch: 17922, sum loss: 4017.213379, avg loss: 2.555479, ppl: 12.877470 +epoch: 1, batch: 17923, sum loss: 5420.872070, avg loss: 2.939735, ppl: 18.910843 +epoch: 1, batch: 17924, sum loss: 4262.405273, avg loss: 2.541685, ppl: 12.701050 +epoch: 1, batch: 17925, sum loss: 4056.304688, avg loss: 2.487005, ppl: 12.025204 +epoch: 1, batch: 17926, sum loss: 5164.008301, avg loss: 2.773366, ppl: 16.012449 +epoch: 1, batch: 17927, sum loss: 4660.473145, avg loss: 2.767502, ppl: 15.918820 +epoch: 1, batch: 17928, sum loss: 4931.422363, avg loss: 2.787689, ppl: 16.243444 +epoch: 1, batch: 17929, sum loss: 4109.491699, avg loss: 2.463724, ppl: 11.748483 +epoch: 1, batch: 17930, sum loss: 4232.400391, avg loss: 2.651880, ppl: 14.180674 +epoch: 1, batch: 17931, sum loss: 4805.336426, avg loss: 2.537137, ppl: 12.643415 +epoch: 1, batch: 17932, sum loss: 4772.370117, avg loss: 2.667619, ppl: 14.405625 +epoch: 1, batch: 17933, sum loss: 3382.305420, avg loss: 2.403913, ppl: 11.066395 +epoch: 1, batch: 17934, sum loss: 4489.981445, avg loss: 2.580449, ppl: 13.203067 +epoch: 1, batch: 17935, sum loss: 5012.225098, avg loss: 2.872335, ppl: 17.678251 +epoch: 1, batch: 17936, sum loss: 4250.657227, avg loss: 2.506284, ppl: 12.259287 +epoch: 1, batch: 17937, sum loss: 4438.266602, avg loss: 2.656054, ppl: 14.239991 +epoch: 1, batch: 17938, sum loss: 5457.646973, avg loss: 2.916968, ppl: 18.485157 +epoch: 1, batch: 17939, sum loss: 4407.118164, avg loss: 2.635836, ppl: 13.954979 +epoch: 1, batch: 17940, sum loss: 4813.940430, avg loss: 2.753971, ppl: 15.704866 +epoch: 1, batch: 17941, sum loss: 4387.369629, avg loss: 2.836050, ppl: 17.048296 +epoch: 1, batch: 17942, sum loss: 4975.873535, avg loss: 2.914982, ppl: 18.448473 +epoch: 1, batch: 17943, sum loss: 5786.863770, avg loss: 2.869045, ppl: 17.620188 +epoch: 1, batch: 17944, sum loss: 4000.710449, avg loss: 2.538522, ppl: 12.660941 +epoch: 1, batch: 17945, sum loss: 4079.926025, avg loss: 2.642439, ppl: 14.047429 +epoch: 1, batch: 17946, sum loss: 5653.475586, avg loss: 3.039503, ppl: 20.894857 +epoch: 1, batch: 17947, sum loss: 4020.053711, avg loss: 2.660525, ppl: 14.303802 +epoch: 1, batch: 17948, sum loss: 3701.326172, avg loss: 2.686013, ppl: 14.673061 +epoch: 1, batch: 17949, sum loss: 4832.188477, avg loss: 2.935716, ppl: 18.834988 +epoch: 1, batch: 17950, sum loss: 5682.127930, avg loss: 3.033704, ppl: 20.774044 +epoch: 1, batch: 17951, sum loss: 4375.200195, avg loss: 2.576679, ppl: 13.153379 +epoch: 1, batch: 17952, sum loss: 4622.630371, avg loss: 2.748294, ppl: 15.615967 +epoch: 1, batch: 17953, sum loss: 4578.077148, avg loss: 2.649350, ppl: 14.144844 +epoch: 1, batch: 17954, sum loss: 4754.822754, avg loss: 2.703140, ppl: 14.926524 +epoch: 1, batch: 17955, sum loss: 5002.993164, avg loss: 2.726427, ppl: 15.278198 +epoch: 1, batch: 17956, sum loss: 5600.076172, avg loss: 3.022167, ppl: 20.535753 +epoch: 1, batch: 17957, sum loss: 2756.505859, avg loss: 2.185968, ppl: 8.899261 +epoch: 1, batch: 17958, sum loss: 3927.721680, avg loss: 2.447179, ppl: 11.555697 +epoch: 1, batch: 17959, sum loss: 5200.521484, avg loss: 2.929871, ppl: 18.725216 +epoch: 1, batch: 17960, sum loss: 4149.710449, avg loss: 2.510412, ppl: 12.309995 +epoch: 1, batch: 17961, sum loss: 4358.619629, avg loss: 2.720736, ppl: 15.191504 +epoch: 1, batch: 17962, sum loss: 4649.439453, avg loss: 2.700023, ppl: 14.880073 +epoch: 1, batch: 17963, sum loss: 4080.290527, avg loss: 2.542237, ppl: 12.708068 +epoch: 1, batch: 17964, sum loss: 3929.701904, avg loss: 2.474623, ppl: 11.877234 +epoch: 1, batch: 17965, sum loss: 4155.181152, avg loss: 2.566511, ppl: 13.020316 +epoch: 1, batch: 17966, sum loss: 5942.631348, avg loss: 2.998300, ppl: 20.051426 +epoch: 1, batch: 17967, sum loss: 4601.701660, avg loss: 2.586679, ppl: 13.285573 +epoch: 1, batch: 17968, sum loss: 4253.057617, avg loss: 2.796225, ppl: 16.382683 +epoch: 1, batch: 17969, sum loss: 4686.888672, avg loss: 2.745687, ppl: 15.575318 +epoch: 1, batch: 17970, sum loss: 3820.062256, avg loss: 2.475737, ppl: 11.890469 +epoch: 1, batch: 17971, sum loss: 4279.457031, avg loss: 2.598335, ppl: 13.441334 +epoch: 1, batch: 17972, sum loss: 5703.406738, avg loss: 2.681432, ppl: 14.606001 +epoch: 1, batch: 17973, sum loss: 5273.318359, avg loss: 2.728049, ppl: 15.303002 +epoch: 1, batch: 17974, sum loss: 5214.291504, avg loss: 2.959303, ppl: 19.284523 +epoch: 1, batch: 17975, sum loss: 4799.073730, avg loss: 2.942412, ppl: 18.961525 +epoch: 1, batch: 17976, sum loss: 4937.477539, avg loss: 2.783245, ppl: 16.171417 +epoch: 1, batch: 17977, sum loss: 4664.640625, avg loss: 2.657915, ppl: 14.266511 +epoch: 1, batch: 17978, sum loss: 5258.126953, avg loss: 2.889081, ppl: 17.976778 +epoch: 1, batch: 17979, sum loss: 4919.717773, avg loss: 2.907635, ppl: 18.313431 +epoch: 1, batch: 17980, sum loss: 4256.134766, avg loss: 2.666751, ppl: 14.393132 +epoch: 1, batch: 17981, sum loss: 4338.023926, avg loss: 2.557797, ppl: 12.907353 +epoch: 1, batch: 17982, sum loss: 4915.464355, avg loss: 2.677268, ppl: 14.545301 +epoch: 1, batch: 17983, sum loss: 4481.334961, avg loss: 2.859818, ppl: 17.458353 +epoch: 1, batch: 17984, sum loss: 4766.370605, avg loss: 2.626100, ppl: 13.819762 +epoch: 1, batch: 17985, sum loss: 4217.475098, avg loss: 2.418277, ppl: 11.226497 +epoch: 1, batch: 17986, sum loss: 5326.879883, avg loss: 2.947913, ppl: 19.066130 +epoch: 1, batch: 17987, sum loss: 3897.388672, avg loss: 2.385183, ppl: 10.861049 +epoch: 1, batch: 17988, sum loss: 4569.406250, avg loss: 2.781136, ppl: 16.137339 +epoch: 1, batch: 17989, sum loss: 4792.071289, avg loss: 2.956244, ppl: 19.225620 +epoch: 1, batch: 17990, sum loss: 3557.961426, avg loss: 2.587608, ppl: 13.297930 +epoch: 1, batch: 17991, sum loss: 4809.725098, avg loss: 2.883528, ppl: 17.877237 +epoch: 1, batch: 17992, sum loss: 5763.585938, avg loss: 2.957202, ppl: 19.244045 +epoch: 1, batch: 17993, sum loss: 3456.609863, avg loss: 2.335547, ppl: 10.335114 +epoch: 1, batch: 17994, sum loss: 4177.760742, avg loss: 2.601345, ppl: 13.481863 +epoch: 1, batch: 17995, sum loss: 4244.951660, avg loss: 2.564926, ppl: 12.999692 +epoch: 1, batch: 17996, sum loss: 4777.270508, avg loss: 2.640835, ppl: 14.024914 +epoch: 1, batch: 17997, sum loss: 3941.303955, avg loss: 2.622291, ppl: 13.767233 +epoch: 1, batch: 17998, sum loss: 5093.728516, avg loss: 2.895809, ppl: 18.098145 +epoch: 1, batch: 17999, sum loss: 4822.611328, avg loss: 2.855306, ppl: 17.379749 +epoch: 1, batch: 18000, sum loss: 4530.080078, avg loss: 2.615520, ppl: 13.674322 +epoch: 1, batch: 18001, sum loss: 4154.896484, avg loss: 2.561588, ppl: 12.956380 +epoch: 1, batch: 18002, sum loss: 4160.615723, avg loss: 2.705212, ppl: 14.957485 +epoch: 1, batch: 18003, sum loss: 4532.012207, avg loss: 2.678494, ppl: 14.563148 +epoch: 1, batch: 18004, sum loss: 3885.749268, avg loss: 2.511797, ppl: 12.327059 +epoch: 1, batch: 18005, sum loss: 4268.824219, avg loss: 2.713811, ppl: 15.086657 +epoch: 1, batch: 18006, sum loss: 4767.462891, avg loss: 2.682872, ppl: 14.627036 +epoch: 1, batch: 18007, sum loss: 4360.511719, avg loss: 2.362141, ppl: 10.613647 +epoch: 1, batch: 18008, sum loss: 4124.698730, avg loss: 2.582779, ppl: 13.233870 +epoch: 1, batch: 18009, sum loss: 4092.266602, avg loss: 2.740969, ppl: 15.501998 +epoch: 1, batch: 18010, sum loss: 4944.272461, avg loss: 2.722617, ppl: 15.220099 +epoch: 1, batch: 18011, sum loss: 4961.750977, avg loss: 2.811190, ppl: 16.629702 +epoch: 1, batch: 18012, sum loss: 5427.461426, avg loss: 2.672310, ppl: 14.473362 +epoch: 1, batch: 18013, sum loss: 4774.209961, avg loss: 2.841792, ppl: 17.146461 +epoch: 1, batch: 18014, sum loss: 4204.022461, avg loss: 2.693160, ppl: 14.778299 +epoch: 1, batch: 18015, sum loss: 4773.100098, avg loss: 2.514805, ppl: 12.364202 +epoch: 1, batch: 18016, sum loss: 4104.656738, avg loss: 2.555826, ppl: 12.881935 +epoch: 1, batch: 18017, sum loss: 3567.808838, avg loss: 2.516085, ppl: 12.380038 +epoch: 1, batch: 18018, sum loss: 5038.332031, avg loss: 2.707325, ppl: 14.989129 +epoch: 1, batch: 18019, sum loss: 4221.951172, avg loss: 2.684012, ppl: 14.643729 +epoch: 1, batch: 18020, sum loss: 4913.377441, avg loss: 2.654445, ppl: 14.217093 +epoch: 1, batch: 18021, sum loss: 4969.042969, avg loss: 2.600232, ppl: 13.466861 +epoch: 1, batch: 18022, sum loss: 4289.276367, avg loss: 2.623410, ppl: 13.782649 +epoch: 1, batch: 18023, sum loss: 5323.801270, avg loss: 2.869974, ppl: 17.636553 +epoch: 1, batch: 18024, sum loss: 3622.147461, avg loss: 2.170250, ppl: 8.760475 +epoch: 1, batch: 18025, sum loss: 5269.481934, avg loss: 2.985542, ppl: 19.797235 +epoch: 1, batch: 18026, sum loss: 4458.214355, avg loss: 2.633322, ppl: 13.919938 +epoch: 1, batch: 18027, sum loss: 4082.999512, avg loss: 2.514162, ppl: 12.356254 +epoch: 1, batch: 18028, sum loss: 5394.751953, avg loss: 2.956028, ppl: 19.221481 +epoch: 1, batch: 18029, sum loss: 4846.917969, avg loss: 2.809808, ppl: 16.606726 +epoch: 1, batch: 18030, sum loss: 4169.745605, avg loss: 2.561269, ppl: 12.952241 +epoch: 1, batch: 18031, sum loss: 4545.551270, avg loss: 2.625968, ppl: 13.817946 +epoch: 1, batch: 18032, sum loss: 3244.082031, avg loss: 2.392391, ppl: 10.939619 +epoch: 1, batch: 18033, sum loss: 4645.426270, avg loss: 2.747148, ppl: 15.598076 +epoch: 1, batch: 18034, sum loss: 4284.823242, avg loss: 2.448470, ppl: 11.570634 +epoch: 1, batch: 18035, sum loss: 4101.820801, avg loss: 2.693251, ppl: 14.779641 +epoch: 1, batch: 18036, sum loss: 4274.602539, avg loss: 2.845940, ppl: 17.217741 +epoch: 1, batch: 18037, sum loss: 4050.110107, avg loss: 2.678644, ppl: 14.565335 +epoch: 1, batch: 18038, sum loss: 5001.502930, avg loss: 2.769382, ppl: 15.948767 +epoch: 1, batch: 18039, sum loss: 5338.679688, avg loss: 2.777669, ppl: 16.081490 +epoch: 1, batch: 18040, sum loss: 5263.374512, avg loss: 2.855873, ppl: 17.389618 +epoch: 1, batch: 18041, sum loss: 4176.443848, avg loss: 2.551279, ppl: 12.823496 +epoch: 1, batch: 18042, sum loss: 3894.154053, avg loss: 2.530314, ppl: 12.557454 +epoch: 1, batch: 18043, sum loss: 4698.719727, avg loss: 2.763953, ppl: 15.862423 +epoch: 1, batch: 18044, sum loss: 3560.403809, avg loss: 2.286708, ppl: 9.842480 +epoch: 1, batch: 18045, sum loss: 4870.281250, avg loss: 2.792592, ppl: 16.323280 +epoch: 1, batch: 18046, sum loss: 5002.155762, avg loss: 2.677814, ppl: 14.553238 +epoch: 1, batch: 18047, sum loss: 4344.511719, avg loss: 2.575289, ppl: 13.135106 +epoch: 1, batch: 18048, sum loss: 3965.931641, avg loss: 2.407973, ppl: 11.111416 +epoch: 1, batch: 18049, sum loss: 5038.361328, avg loss: 2.835319, ppl: 17.035826 +epoch: 1, batch: 18050, sum loss: 3618.719727, avg loss: 2.616573, ppl: 13.688726 +epoch: 1, batch: 18051, sum loss: 5159.922852, avg loss: 2.930110, ppl: 18.729681 +epoch: 1, batch: 18052, sum loss: 4351.261719, avg loss: 2.659696, ppl: 14.291939 +epoch: 1, batch: 18053, sum loss: 4590.204102, avg loss: 2.984528, ppl: 19.777166 +epoch: 1, batch: 18054, sum loss: 3641.580078, avg loss: 2.460527, ppl: 11.710983 +epoch: 1, batch: 18055, sum loss: 4097.624512, avg loss: 2.631743, ppl: 13.897979 +epoch: 1, batch: 18056, sum loss: 4883.257324, avg loss: 2.892925, ppl: 18.046017 +epoch: 1, batch: 18057, sum loss: 3944.386475, avg loss: 2.440833, ppl: 11.482603 +epoch: 1, batch: 18058, sum loss: 3849.651611, avg loss: 2.771527, ppl: 15.983027 +epoch: 1, batch: 18059, sum loss: 4013.002197, avg loss: 2.585697, ppl: 13.272542 +epoch: 1, batch: 18060, sum loss: 4587.944336, avg loss: 2.645873, ppl: 14.095750 +epoch: 1, batch: 18061, sum loss: 4254.479492, avg loss: 2.561396, ppl: 12.953893 +epoch: 1, batch: 18062, sum loss: 4865.604004, avg loss: 2.568957, ppl: 13.052202 +epoch: 1, batch: 18063, sum loss: 3891.869629, avg loss: 2.638556, ppl: 13.992980 +epoch: 1, batch: 18064, sum loss: 5198.982422, avg loss: 2.991359, ppl: 19.912729 +epoch: 1, batch: 18065, sum loss: 4090.937744, avg loss: 2.686105, ppl: 14.674408 +epoch: 1, batch: 18066, sum loss: 4487.306152, avg loss: 2.567109, ppl: 13.028104 +epoch: 1, batch: 18067, sum loss: 5625.264648, avg loss: 2.890681, ppl: 18.005564 +epoch: 1, batch: 18068, sum loss: 3946.552734, avg loss: 2.497818, ppl: 12.155944 +epoch: 1, batch: 18069, sum loss: 5314.410645, avg loss: 2.651901, ppl: 14.180978 +epoch: 1, batch: 18070, sum loss: 4715.415039, avg loss: 2.735160, ppl: 15.412204 +epoch: 1, batch: 18071, sum loss: 4636.461914, avg loss: 2.754879, ppl: 15.719143 +epoch: 1, batch: 18072, sum loss: 4841.991699, avg loss: 2.615879, ppl: 13.679233 +epoch: 1, batch: 18073, sum loss: 5367.556152, avg loss: 2.944354, ppl: 18.998377 +epoch: 1, batch: 18074, sum loss: 4637.910645, avg loss: 2.668533, ppl: 14.418802 +epoch: 1, batch: 18075, sum loss: 3984.184326, avg loss: 2.610868, ppl: 13.610860 +epoch: 1, batch: 18076, sum loss: 4652.218262, avg loss: 2.600457, ppl: 13.469898 +epoch: 1, batch: 18077, sum loss: 4358.794434, avg loss: 2.549003, ppl: 12.794337 +epoch: 1, batch: 18078, sum loss: 3768.469971, avg loss: 2.401829, ppl: 11.043359 +epoch: 1, batch: 18079, sum loss: 4088.156738, avg loss: 2.794366, ppl: 16.352253 +epoch: 1, batch: 18080, sum loss: 3965.118652, avg loss: 2.723296, ppl: 15.230438 +epoch: 1, batch: 18081, sum loss: 3810.811768, avg loss: 2.292907, ppl: 9.903688 +epoch: 1, batch: 18082, sum loss: 4760.789062, avg loss: 2.639018, ppl: 13.999454 +epoch: 1, batch: 18083, sum loss: 4926.131836, avg loss: 2.769045, ppl: 15.943406 +epoch: 1, batch: 18084, sum loss: 5056.284668, avg loss: 2.725760, ppl: 15.268013 +epoch: 1, batch: 18085, sum loss: 4464.610840, avg loss: 2.689525, ppl: 14.724675 +epoch: 1, batch: 18086, sum loss: 3950.564941, avg loss: 2.495619, ppl: 12.129237 +epoch: 1, batch: 18087, sum loss: 4920.708984, avg loss: 2.969649, ppl: 19.485085 +epoch: 1, batch: 18088, sum loss: 4013.340820, avg loss: 2.391741, ppl: 10.932506 +epoch: 1, batch: 18089, sum loss: 4276.831543, avg loss: 2.573304, ppl: 13.109068 +epoch: 1, batch: 18090, sum loss: 4564.596680, avg loss: 2.677183, ppl: 14.544064 +epoch: 1, batch: 18091, sum loss: 3882.449219, avg loss: 2.628605, ppl: 13.854428 +epoch: 1, batch: 18092, sum loss: 5061.599609, avg loss: 2.885746, ppl: 17.916937 +epoch: 1, batch: 18093, sum loss: 5049.519531, avg loss: 2.650666, ppl: 14.163475 +epoch: 1, batch: 18094, sum loss: 4784.803223, avg loss: 2.627569, ppl: 13.840083 +epoch: 1, batch: 18095, sum loss: 3865.000488, avg loss: 2.400621, ppl: 11.030028 +epoch: 1, batch: 18096, sum loss: 4867.002441, avg loss: 2.541516, ppl: 12.698903 +epoch: 1, batch: 18097, sum loss: 4034.453369, avg loss: 2.722303, ppl: 15.215329 +epoch: 1, batch: 18098, sum loss: 3841.544922, avg loss: 2.494510, ppl: 12.115791 +epoch: 1, batch: 18099, sum loss: 4269.019531, avg loss: 2.671477, ppl: 14.461307 +epoch: 1, batch: 18100, sum loss: 4551.440430, avg loss: 2.649267, ppl: 14.143667 +epoch: 1, batch: 18101, sum loss: 3931.064697, avg loss: 2.534535, ppl: 12.610571 +epoch: 1, batch: 18102, sum loss: 5355.633301, avg loss: 2.859388, ppl: 17.450842 +epoch: 1, batch: 18103, sum loss: 4492.397461, avg loss: 2.776513, ppl: 16.062906 +epoch: 1, batch: 18104, sum loss: 4721.682129, avg loss: 2.802185, ppl: 16.480623 +epoch: 1, batch: 18105, sum loss: 5518.388672, avg loss: 2.818380, ppl: 16.749701 +epoch: 1, batch: 18106, sum loss: 5520.192871, avg loss: 2.900784, ppl: 18.188408 +epoch: 1, batch: 18107, sum loss: 4733.858887, avg loss: 2.929368, ppl: 18.715799 +epoch: 1, batch: 18108, sum loss: 4343.401367, avg loss: 2.554942, ppl: 12.870555 +epoch: 1, batch: 18109, sum loss: 5458.660156, avg loss: 2.876007, ppl: 17.743275 +epoch: 1, batch: 18110, sum loss: 4019.467529, avg loss: 2.594879, ppl: 13.394965 +epoch: 1, batch: 18111, sum loss: 5033.853516, avg loss: 2.866659, ppl: 17.578197 +epoch: 1, batch: 18112, sum loss: 4539.434570, avg loss: 2.700437, ppl: 14.886233 +epoch: 1, batch: 18113, sum loss: 4439.583984, avg loss: 2.603862, ppl: 13.515830 +epoch: 1, batch: 18114, sum loss: 3898.162109, avg loss: 2.651811, ppl: 14.179693 +epoch: 1, batch: 18115, sum loss: 4746.475098, avg loss: 2.590871, ppl: 13.341382 +epoch: 1, batch: 18116, sum loss: 4692.708984, avg loss: 2.875435, ppl: 17.733130 +epoch: 1, batch: 18117, sum loss: 4436.786621, avg loss: 2.561655, ppl: 12.957244 +epoch: 1, batch: 18118, sum loss: 5171.342285, avg loss: 2.613109, ppl: 13.641395 +epoch: 1, batch: 18119, sum loss: 4572.605957, avg loss: 2.653863, ppl: 14.208820 +epoch: 1, batch: 18120, sum loss: 4232.950684, avg loss: 2.842814, ppl: 17.164005 +epoch: 1, batch: 18121, sum loss: 4869.849121, avg loss: 2.571198, ppl: 13.081490 +epoch: 1, batch: 18122, sum loss: 4164.288086, avg loss: 2.671128, ppl: 14.456264 +epoch: 1, batch: 18123, sum loss: 3095.778320, avg loss: 2.062477, ppl: 7.865431 +epoch: 1, batch: 18124, sum loss: 4527.699219, avg loss: 2.737424, ppl: 15.447145 +epoch: 1, batch: 18125, sum loss: 5625.313477, avg loss: 2.976356, ppl: 19.616211 +epoch: 1, batch: 18126, sum loss: 4191.735840, avg loss: 2.714855, ppl: 15.102420 +epoch: 1, batch: 18127, sum loss: 4473.594727, avg loss: 2.782086, ppl: 16.152683 +epoch: 1, batch: 18128, sum loss: 4092.187500, avg loss: 2.664185, ppl: 14.356238 +epoch: 1, batch: 18129, sum loss: 4954.976562, avg loss: 2.979541, ppl: 19.678783 +epoch: 1, batch: 18130, sum loss: 5545.106934, avg loss: 2.811920, ppl: 16.641842 +epoch: 1, batch: 18131, sum loss: 3843.960449, avg loss: 2.595517, ppl: 13.403514 +epoch: 1, batch: 18132, sum loss: 4438.678223, avg loss: 2.770710, ppl: 15.969976 +epoch: 1, batch: 18133, sum loss: 3573.710938, avg loss: 2.500847, ppl: 12.192822 +epoch: 1, batch: 18134, sum loss: 4198.434570, avg loss: 2.720956, ppl: 15.194839 +epoch: 1, batch: 18135, sum loss: 4657.666016, avg loss: 2.829688, ppl: 16.940172 +epoch: 1, batch: 18136, sum loss: 5505.901367, avg loss: 2.933352, ppl: 18.790506 +epoch: 1, batch: 18137, sum loss: 4675.214844, avg loss: 2.942237, ppl: 18.958212 +epoch: 1, batch: 18138, sum loss: 3715.342041, avg loss: 2.473597, ppl: 11.865044 +epoch: 1, batch: 18139, sum loss: 3929.018311, avg loss: 2.574717, ppl: 13.127605 +epoch: 1, batch: 18140, sum loss: 5353.705078, avg loss: 2.843179, ppl: 17.170256 +epoch: 1, batch: 18141, sum loss: 3897.595215, avg loss: 2.463714, ppl: 11.748360 +epoch: 1, batch: 18142, sum loss: 4600.780273, avg loss: 2.624518, ppl: 13.797924 +epoch: 1, batch: 18143, sum loss: 3549.464600, avg loss: 2.422843, ppl: 11.277874 +epoch: 1, batch: 18144, sum loss: 4087.438232, avg loss: 2.741407, ppl: 15.508797 +epoch: 1, batch: 18145, sum loss: 4610.254883, avg loss: 2.691334, ppl: 14.751338 +epoch: 1, batch: 18146, sum loss: 4451.986816, avg loss: 2.549821, ppl: 12.804807 +epoch: 1, batch: 18147, sum loss: 4499.008301, avg loss: 2.676388, ppl: 14.532507 +epoch: 1, batch: 18148, sum loss: 4142.116211, avg loss: 2.656906, ppl: 14.252123 +epoch: 1, batch: 18149, sum loss: 4318.980469, avg loss: 2.854581, ppl: 17.367153 +epoch: 1, batch: 18150, sum loss: 4549.095215, avg loss: 2.732189, ppl: 15.366494 +epoch: 1, batch: 18151, sum loss: 4896.437988, avg loss: 2.825411, ppl: 16.867882 +epoch: 1, batch: 18152, sum loss: 4938.089844, avg loss: 2.913327, ppl: 18.417978 +epoch: 1, batch: 18153, sum loss: 4654.286621, avg loss: 2.653527, ppl: 14.204048 +epoch: 1, batch: 18154, sum loss: 4104.044434, avg loss: 2.397222, ppl: 10.992597 +epoch: 1, batch: 18155, sum loss: 5312.640137, avg loss: 2.804984, ppl: 16.526817 +epoch: 1, batch: 18156, sum loss: 4250.922852, avg loss: 2.603137, ppl: 13.506041 +epoch: 1, batch: 18157, sum loss: 3846.288086, avg loss: 2.371324, ppl: 10.711568 +epoch: 1, batch: 18158, sum loss: 4221.763672, avg loss: 2.729001, ppl: 15.317571 +epoch: 1, batch: 18159, sum loss: 3952.405762, avg loss: 2.533593, ppl: 12.598698 +epoch: 1, batch: 18160, sum loss: 4396.455078, avg loss: 2.607625, ppl: 13.566789 +epoch: 1, batch: 18161, sum loss: 4128.976074, avg loss: 2.580610, ppl: 13.205192 +epoch: 1, batch: 18162, sum loss: 3624.871094, avg loss: 2.171882, ppl: 8.774784 +epoch: 1, batch: 18163, sum loss: 4471.673828, avg loss: 2.552325, ppl: 12.836921 +epoch: 1, batch: 18164, sum loss: 4876.318359, avg loss: 2.810558, ppl: 16.619190 +epoch: 1, batch: 18165, sum loss: 4284.614258, avg loss: 2.470942, ppl: 11.833594 +epoch: 1, batch: 18166, sum loss: 4079.100342, avg loss: 2.528891, ppl: 12.539587 +epoch: 1, batch: 18167, sum loss: 5056.984375, avg loss: 2.790830, ppl: 16.294540 +epoch: 1, batch: 18168, sum loss: 4457.650391, avg loss: 2.765292, ppl: 15.883673 +epoch: 1, batch: 18169, sum loss: 4506.528809, avg loss: 2.688860, ppl: 14.714887 +epoch: 1, batch: 18170, sum loss: 3622.909180, avg loss: 2.310529, ppl: 10.079753 +epoch: 1, batch: 18171, sum loss: 4758.496094, avg loss: 2.720695, ppl: 15.190880 +epoch: 1, batch: 18172, sum loss: 4641.627441, avg loss: 2.631308, ppl: 13.891930 +epoch: 1, batch: 18173, sum loss: 4753.429688, avg loss: 2.934216, ppl: 18.806753 +epoch: 1, batch: 18174, sum loss: 4253.361816, avg loss: 2.546923, ppl: 12.767762 +epoch: 1, batch: 18175, sum loss: 4153.880859, avg loss: 2.557808, ppl: 12.907498 +epoch: 1, batch: 18176, sum loss: 4088.721924, avg loss: 2.563462, ppl: 12.980679 +epoch: 1, batch: 18177, sum loss: 5707.632812, avg loss: 3.160372, ppl: 23.579378 +epoch: 1, batch: 18178, sum loss: 3987.942139, avg loss: 2.349995, ppl: 10.485521 +epoch: 1, batch: 18179, sum loss: 3944.715576, avg loss: 2.480953, ppl: 11.952653 +epoch: 1, batch: 18180, sum loss: 3813.916992, avg loss: 2.650394, ppl: 14.159616 +epoch: 1, batch: 18181, sum loss: 4431.480957, avg loss: 2.677632, ppl: 14.550598 +epoch: 1, batch: 18182, sum loss: 4133.016602, avg loss: 2.717302, ppl: 15.139426 +epoch: 1, batch: 18183, sum loss: 3397.556885, avg loss: 2.469155, ppl: 11.812456 +epoch: 1, batch: 18184, sum loss: 5031.110352, avg loss: 2.826467, ppl: 16.885691 +epoch: 1, batch: 18185, sum loss: 4139.904297, avg loss: 2.499942, ppl: 12.181788 +epoch: 1, batch: 18186, sum loss: 5899.813477, avg loss: 2.833724, ppl: 17.008684 +epoch: 1, batch: 18187, sum loss: 4073.247070, avg loss: 2.553760, ppl: 12.855350 +epoch: 1, batch: 18188, sum loss: 5023.469727, avg loss: 2.797032, ppl: 16.395914 +epoch: 1, batch: 18189, sum loss: 5231.661133, avg loss: 2.942442, ppl: 18.962090 +epoch: 1, batch: 18190, sum loss: 5451.784668, avg loss: 2.932644, ppl: 18.777205 +epoch: 1, batch: 18191, sum loss: 3034.324463, avg loss: 2.107170, ppl: 8.224931 +epoch: 1, batch: 18192, sum loss: 4957.972168, avg loss: 2.841245, ppl: 17.137083 +epoch: 1, batch: 18193, sum loss: 4618.632324, avg loss: 2.726465, ppl: 15.278785 +epoch: 1, batch: 18194, sum loss: 4028.006836, avg loss: 2.583712, ppl: 13.246215 +epoch: 1, batch: 18195, sum loss: 3617.863281, avg loss: 2.505445, ppl: 12.249015 +epoch: 1, batch: 18196, sum loss: 3845.780273, avg loss: 2.535122, ppl: 12.617972 +epoch: 1, batch: 18197, sum loss: 4225.148926, avg loss: 2.774228, ppl: 16.026247 +epoch: 1, batch: 18198, sum loss: 4473.990723, avg loss: 2.510657, ppl: 12.313018 +epoch: 1, batch: 18199, sum loss: 4550.343262, avg loss: 2.595746, ppl: 13.406589 +epoch: 1, batch: 18200, sum loss: 4982.126953, avg loss: 2.809999, ppl: 16.609894 +epoch: 1, batch: 18201, sum loss: 3805.043213, avg loss: 2.533318, ppl: 12.595222 +epoch: 1, batch: 18202, sum loss: 4161.131836, avg loss: 2.551276, ppl: 12.823462 +epoch: 1, batch: 18203, sum loss: 4728.085938, avg loss: 2.816013, ppl: 16.710096 +epoch: 1, batch: 18204, sum loss: 4013.750000, avg loss: 2.318746, ppl: 10.162926 +epoch: 1, batch: 18205, sum loss: 4432.537109, avg loss: 2.739516, ppl: 15.479491 +epoch: 1, batch: 18206, sum loss: 4662.358398, avg loss: 2.665728, ppl: 14.378414 +epoch: 1, batch: 18207, sum loss: 3957.151367, avg loss: 2.918253, ppl: 18.508928 +epoch: 1, batch: 18208, sum loss: 4195.733398, avg loss: 2.585171, ppl: 13.265563 +epoch: 1, batch: 18209, sum loss: 4046.342529, avg loss: 2.401390, ppl: 11.038513 +epoch: 1, batch: 18210, sum loss: 5290.492188, avg loss: 2.639966, ppl: 14.012731 +epoch: 1, batch: 18211, sum loss: 4989.321289, avg loss: 2.788888, ppl: 16.262928 +epoch: 1, batch: 18212, sum loss: 4209.746582, avg loss: 2.609886, ppl: 13.597507 +epoch: 1, batch: 18213, sum loss: 3464.212891, avg loss: 2.515768, ppl: 12.376114 +epoch: 1, batch: 18214, sum loss: 3847.600098, avg loss: 2.364843, ppl: 10.642372 +epoch: 1, batch: 18215, sum loss: 3361.937012, avg loss: 2.544994, ppl: 12.743150 +epoch: 1, batch: 18216, sum loss: 4272.469727, avg loss: 2.666960, ppl: 14.396135 +epoch: 1, batch: 18217, sum loss: 4614.765625, avg loss: 2.637009, ppl: 13.971352 +epoch: 1, batch: 18218, sum loss: 4149.275391, avg loss: 2.606329, ppl: 13.549216 +epoch: 1, batch: 18219, sum loss: 4360.373047, avg loss: 2.730353, ppl: 15.338295 +epoch: 1, batch: 18220, sum loss: 5483.956055, avg loss: 3.048336, ppl: 21.080233 +epoch: 1, batch: 18221, sum loss: 5124.328125, avg loss: 2.851602, ppl: 17.315496 +epoch: 1, batch: 18222, sum loss: 3614.477539, avg loss: 2.429084, ppl: 11.348486 +epoch: 1, batch: 18223, sum loss: 4474.151855, avg loss: 2.527770, ppl: 12.525537 +epoch: 1, batch: 18224, sum loss: 4621.512207, avg loss: 2.597815, ppl: 13.434346 +epoch: 1, batch: 18225, sum loss: 4113.285156, avg loss: 2.645200, ppl: 14.086255 +epoch: 1, batch: 18226, sum loss: 5681.688477, avg loss: 2.979386, ppl: 19.675724 +epoch: 1, batch: 18227, sum loss: 4088.343506, avg loss: 2.716507, ppl: 15.127397 +epoch: 1, batch: 18228, sum loss: 4795.812500, avg loss: 2.822727, ppl: 16.822659 +epoch: 1, batch: 18229, sum loss: 4133.701172, avg loss: 2.698238, ppl: 14.853542 +epoch: 1, batch: 18230, sum loss: 4550.893066, avg loss: 2.713711, ppl: 15.085150 +epoch: 1, batch: 18231, sum loss: 5218.951172, avg loss: 2.821055, ppl: 16.794554 +epoch: 1, batch: 18232, sum loss: 5437.727051, avg loss: 3.032754, ppl: 20.754305 +epoch: 1, batch: 18233, sum loss: 4899.041504, avg loss: 2.715655, ppl: 15.114505 +epoch: 1, batch: 18234, sum loss: 5336.440430, avg loss: 3.001373, ppl: 20.113125 +epoch: 1, batch: 18235, sum loss: 4445.506836, avg loss: 2.790651, ppl: 16.291620 +epoch: 1, batch: 18236, sum loss: 4224.992188, avg loss: 2.627483, ppl: 13.838889 +epoch: 1, batch: 18237, sum loss: 4672.914551, avg loss: 2.661113, ppl: 14.312214 +epoch: 1, batch: 18238, sum loss: 4887.097656, avg loss: 2.645965, ppl: 14.097047 +epoch: 1, batch: 18239, sum loss: 4327.790039, avg loss: 2.706560, ppl: 14.977669 +epoch: 1, batch: 18240, sum loss: 4533.635742, avg loss: 2.784788, ppl: 16.196390 +epoch: 1, batch: 18241, sum loss: 5193.197754, avg loss: 2.957402, ppl: 19.247900 +epoch: 1, batch: 18242, sum loss: 4641.685059, avg loss: 2.572996, ppl: 13.105030 +epoch: 1, batch: 18243, sum loss: 3341.288330, avg loss: 2.047358, ppl: 7.747406 +epoch: 1, batch: 18244, sum loss: 3931.514160, avg loss: 2.314017, ppl: 10.114971 +epoch: 1, batch: 18245, sum loss: 4137.238281, avg loss: 2.679559, ppl: 14.578659 +epoch: 1, batch: 18246, sum loss: 4332.998535, avg loss: 2.528004, ppl: 12.528474 +epoch: 1, batch: 18247, sum loss: 3820.293945, avg loss: 2.598840, ppl: 13.448123 +epoch: 1, batch: 18248, sum loss: 4711.501465, avg loss: 2.753654, ppl: 15.699895 +epoch: 1, batch: 18249, sum loss: 3846.234863, avg loss: 2.562448, ppl: 12.967526 +epoch: 1, batch: 18250, sum loss: 5100.083496, avg loss: 2.858791, ppl: 17.440435 +epoch: 1, batch: 18251, sum loss: 5490.242188, avg loss: 2.703221, ppl: 14.927741 +epoch: 1, batch: 18252, sum loss: 4481.905273, avg loss: 2.463939, ppl: 11.751010 +epoch: 1, batch: 18253, sum loss: 5135.600098, avg loss: 2.531099, ppl: 12.567311 +epoch: 1, batch: 18254, sum loss: 3332.668457, avg loss: 2.356908, ppl: 10.558258 +epoch: 1, batch: 18255, sum loss: 4382.956055, avg loss: 2.629248, ppl: 13.863337 +epoch: 1, batch: 18256, sum loss: 4209.021973, avg loss: 2.738466, ppl: 15.463246 +epoch: 1, batch: 18257, sum loss: 4879.279785, avg loss: 2.906063, ppl: 18.284668 +epoch: 1, batch: 18258, sum loss: 4601.008789, avg loss: 2.737067, ppl: 15.441621 +epoch: 1, batch: 18259, sum loss: 5200.088867, avg loss: 2.855623, ppl: 17.385262 +epoch: 1, batch: 18260, sum loss: 4644.940918, avg loss: 2.655770, ppl: 14.235938 +epoch: 1, batch: 18261, sum loss: 4655.270996, avg loss: 2.935228, ppl: 18.825788 +epoch: 1, batch: 18262, sum loss: 4410.149902, avg loss: 2.463771, ppl: 11.749032 +epoch: 1, batch: 18263, sum loss: 4724.145508, avg loss: 2.617255, ppl: 13.698073 +epoch: 1, batch: 18264, sum loss: 5116.220703, avg loss: 2.910251, ppl: 18.361401 +epoch: 1, batch: 18265, sum loss: 3352.137207, avg loss: 2.576585, ppl: 13.152147 +epoch: 1, batch: 18266, sum loss: 3980.849609, avg loss: 2.605268, ppl: 13.534852 +epoch: 1, batch: 18267, sum loss: 4225.855957, avg loss: 2.586203, ppl: 13.279256 +epoch: 1, batch: 18268, sum loss: 4356.449707, avg loss: 2.717685, ppl: 15.145224 +epoch: 1, batch: 18269, sum loss: 4386.804199, avg loss: 2.704565, ppl: 14.947810 +epoch: 1, batch: 18270, sum loss: 4092.293945, avg loss: 2.394555, ppl: 10.963314 +epoch: 1, batch: 18271, sum loss: 5734.745117, avg loss: 2.925890, ppl: 18.650822 +epoch: 1, batch: 18272, sum loss: 4760.204102, avg loss: 2.726348, ppl: 15.276999 +epoch: 1, batch: 18273, sum loss: 4096.317383, avg loss: 2.642786, ppl: 14.052293 +epoch: 1, batch: 18274, sum loss: 4852.874023, avg loss: 2.711103, ppl: 15.045857 +epoch: 1, batch: 18275, sum loss: 4620.348633, avg loss: 2.783343, ppl: 16.172991 +epoch: 1, batch: 18276, sum loss: 4133.603516, avg loss: 2.553183, ppl: 12.847938 +epoch: 1, batch: 18277, sum loss: 4198.708984, avg loss: 2.719371, ppl: 15.170782 +epoch: 1, batch: 18278, sum loss: 4012.644531, avg loss: 2.481537, ppl: 11.959628 +epoch: 1, batch: 18279, sum loss: 4337.653320, avg loss: 2.657876, ppl: 14.265956 +epoch: 1, batch: 18280, sum loss: 4350.738770, avg loss: 2.823322, ppl: 16.832672 +epoch: 1, batch: 18281, sum loss: 3832.009277, avg loss: 2.674117, ppl: 14.499539 +epoch: 1, batch: 18282, sum loss: 4263.806152, avg loss: 2.580997, ppl: 13.210296 +epoch: 1, batch: 18283, sum loss: 4079.103516, avg loss: 2.636783, ppl: 13.968197 +epoch: 1, batch: 18284, sum loss: 5196.095215, avg loss: 2.885117, ppl: 17.905655 +epoch: 1, batch: 18285, sum loss: 4583.583496, avg loss: 2.686743, ppl: 14.683770 +epoch: 1, batch: 18286, sum loss: 4718.349609, avg loss: 2.618396, ppl: 13.713710 +epoch: 1, batch: 18287, sum loss: 4666.312500, avg loss: 2.606879, ppl: 13.556668 +epoch: 1, batch: 18288, sum loss: 3743.810791, avg loss: 2.736704, ppl: 15.436022 +epoch: 1, batch: 18289, sum loss: 4675.140137, avg loss: 2.843759, ppl: 17.180229 +epoch: 1, batch: 18290, sum loss: 4372.369141, avg loss: 2.769075, ppl: 15.943874 +epoch: 1, batch: 18291, sum loss: 4452.417480, avg loss: 2.936951, ppl: 18.858259 +epoch: 1, batch: 18292, sum loss: 4587.789551, avg loss: 2.624594, ppl: 13.798967 +epoch: 1, batch: 18293, sum loss: 3595.977539, avg loss: 2.527040, ppl: 12.516399 +epoch: 1, batch: 18294, sum loss: 3434.000244, avg loss: 2.532449, ppl: 12.584281 +epoch: 1, batch: 18295, sum loss: 3966.957031, avg loss: 2.669554, ppl: 14.433527 +epoch: 1, batch: 18296, sum loss: 4260.116699, avg loss: 2.699694, ppl: 14.875175 +epoch: 1, batch: 18297, sum loss: 4035.332764, avg loss: 2.733965, ppl: 15.393808 +epoch: 1, batch: 18298, sum loss: 5232.820801, avg loss: 2.839295, ppl: 17.103706 +epoch: 1, batch: 18299, sum loss: 3521.934570, avg loss: 2.368483, ppl: 10.681177 +epoch: 1, batch: 18300, sum loss: 4711.983887, avg loss: 2.799753, ppl: 16.440580 +epoch: 1, batch: 18301, sum loss: 4226.605957, avg loss: 2.495045, ppl: 12.122275 +epoch: 1, batch: 18302, sum loss: 4211.637695, avg loss: 2.460069, ppl: 11.705616 +epoch: 1, batch: 18303, sum loss: 4849.006348, avg loss: 2.857399, ppl: 17.416172 +epoch: 1, batch: 18304, sum loss: 3839.367432, avg loss: 2.499588, ppl: 12.177479 +epoch: 1, batch: 18305, sum loss: 4860.800293, avg loss: 2.544922, ppl: 12.742229 +epoch: 1, batch: 18306, sum loss: 4898.242188, avg loss: 2.818321, ppl: 16.748711 +epoch: 1, batch: 18307, sum loss: 4848.587402, avg loss: 2.830466, ppl: 16.953352 +epoch: 1, batch: 18308, sum loss: 4921.290039, avg loss: 2.654417, ppl: 14.216700 +epoch: 1, batch: 18309, sum loss: 5338.195312, avg loss: 2.842489, ppl: 17.158428 +epoch: 1, batch: 18310, sum loss: 3932.145020, avg loss: 2.407927, ppl: 11.110905 +epoch: 1, batch: 18311, sum loss: 4719.659668, avg loss: 2.642587, ppl: 14.049499 +epoch: 1, batch: 18312, sum loss: 3930.207764, avg loss: 2.589070, ppl: 13.317375 +epoch: 1, batch: 18313, sum loss: 4153.225098, avg loss: 2.677772, ppl: 14.552638 +epoch: 1, batch: 18314, sum loss: 4800.206055, avg loss: 2.867507, ppl: 17.593100 +epoch: 1, batch: 18315, sum loss: 4817.648438, avg loss: 2.823944, ppl: 16.843147 +epoch: 1, batch: 18316, sum loss: 4495.885742, avg loss: 2.703479, ppl: 14.931589 +epoch: 1, batch: 18317, sum loss: 4368.156250, avg loss: 2.768160, ppl: 15.929295 +epoch: 1, batch: 18318, sum loss: 4582.327637, avg loss: 2.588886, ppl: 13.314928 +epoch: 1, batch: 18319, sum loss: 3746.883057, avg loss: 2.445746, ppl: 11.539157 +epoch: 1, batch: 18320, sum loss: 4244.329590, avg loss: 2.550679, ppl: 12.815803 +epoch: 1, batch: 18321, sum loss: 4632.999512, avg loss: 2.990962, ppl: 19.904812 +epoch: 1, batch: 18322, sum loss: 4015.657715, avg loss: 2.336043, ppl: 10.340238 +epoch: 1, batch: 18323, sum loss: 4322.262695, avg loss: 2.468454, ppl: 11.804182 +epoch: 1, batch: 18324, sum loss: 4219.180176, avg loss: 2.770309, ppl: 15.963562 +epoch: 1, batch: 18325, sum loss: 4699.704590, avg loss: 2.826040, ppl: 16.878490 +epoch: 1, batch: 18326, sum loss: 4387.729004, avg loss: 2.700141, ppl: 14.881829 +epoch: 1, batch: 18327, sum loss: 4717.062012, avg loss: 2.729781, ppl: 15.329535 +epoch: 1, batch: 18328, sum loss: 4724.713867, avg loss: 2.837666, ppl: 17.075865 +epoch: 1, batch: 18329, sum loss: 4348.822754, avg loss: 2.712927, ppl: 15.073336 +epoch: 1, batch: 18330, sum loss: 4521.916016, avg loss: 2.609299, ppl: 13.589527 +epoch: 1, batch: 18331, sum loss: 4174.131348, avg loss: 2.729975, ppl: 15.332500 +epoch: 1, batch: 18332, sum loss: 4081.931152, avg loss: 2.524385, ppl: 12.483221 +epoch: 1, batch: 18333, sum loss: 3467.163818, avg loss: 2.412779, ppl: 11.164949 +epoch: 1, batch: 18334, sum loss: 4803.155273, avg loss: 2.964911, ppl: 19.392973 +epoch: 1, batch: 18335, sum loss: 4721.432129, avg loss: 2.702594, ppl: 14.918384 +epoch: 1, batch: 18336, sum loss: 4347.538574, avg loss: 2.608001, ppl: 13.571899 +epoch: 1, batch: 18337, sum loss: 4408.862793, avg loss: 2.668803, ppl: 14.422698 +epoch: 1, batch: 18338, sum loss: 3545.581055, avg loss: 2.563689, ppl: 12.983620 +epoch: 1, batch: 18339, sum loss: 4184.514648, avg loss: 2.630116, ppl: 13.875380 +epoch: 1, batch: 18340, sum loss: 5088.944336, avg loss: 2.850949, ppl: 17.304201 +epoch: 1, batch: 18341, sum loss: 4879.799805, avg loss: 2.767895, ppl: 15.925084 +epoch: 1, batch: 18342, sum loss: 4815.654297, avg loss: 2.883625, ppl: 17.878971 +epoch: 1, batch: 18343, sum loss: 4330.845703, avg loss: 2.665136, ppl: 14.369902 +epoch: 1, batch: 18344, sum loss: 4742.391113, avg loss: 2.640530, ppl: 14.020627 +epoch: 1, batch: 18345, sum loss: 3982.838135, avg loss: 2.508085, ppl: 12.281383 +epoch: 1, batch: 18346, sum loss: 4529.494629, avg loss: 2.627317, ppl: 13.836599 +epoch: 1, batch: 18347, sum loss: 4351.496582, avg loss: 2.428291, ppl: 11.339482 +epoch: 1, batch: 18348, sum loss: 3861.416504, avg loss: 2.447032, ppl: 11.554003 +epoch: 1, batch: 18349, sum loss: 4280.703613, avg loss: 2.738774, ppl: 15.468007 +epoch: 1, batch: 18350, sum loss: 5153.097656, avg loss: 2.906429, ppl: 18.291355 +epoch: 1, batch: 18351, sum loss: 4495.433105, avg loss: 2.567352, ppl: 13.031273 +epoch: 1, batch: 18352, sum loss: 4461.307617, avg loss: 2.848855, ppl: 17.268003 +epoch: 1, batch: 18353, sum loss: 3722.370605, avg loss: 2.330852, ppl: 10.286702 +epoch: 1, batch: 18354, sum loss: 3716.760498, avg loss: 2.391738, ppl: 10.932475 +epoch: 1, batch: 18355, sum loss: 3569.241211, avg loss: 2.482087, ppl: 11.966213 +epoch: 1, batch: 18356, sum loss: 3900.590332, avg loss: 2.770306, ppl: 15.963512 +epoch: 1, batch: 18357, sum loss: 3685.524414, avg loss: 2.400993, ppl: 11.034129 +epoch: 1, batch: 18358, sum loss: 4357.964355, avg loss: 2.740858, ppl: 15.500279 +epoch: 1, batch: 18359, sum loss: 4523.568359, avg loss: 2.639188, ppl: 14.001830 +epoch: 1, batch: 18360, sum loss: 3653.402832, avg loss: 2.277683, ppl: 9.754049 +epoch: 1, batch: 18361, sum loss: 4526.476562, avg loss: 2.585081, ppl: 13.264361 +epoch: 1, batch: 18362, sum loss: 5185.354980, avg loss: 2.880753, ppl: 17.827688 +epoch: 1, batch: 18363, sum loss: 5383.436523, avg loss: 2.880383, ppl: 17.821102 +epoch: 1, batch: 18364, sum loss: 5422.323242, avg loss: 3.032619, ppl: 20.751514 +epoch: 1, batch: 18365, sum loss: 3698.667725, avg loss: 2.369422, ppl: 10.691211 +epoch: 1, batch: 18366, sum loss: 4679.814453, avg loss: 2.973198, ppl: 19.554363 +epoch: 1, batch: 18367, sum loss: 4145.079102, avg loss: 2.563438, ppl: 12.980367 +epoch: 1, batch: 18368, sum loss: 4806.381836, avg loss: 2.817340, ppl: 16.732283 +epoch: 1, batch: 18369, sum loss: 5531.697754, avg loss: 2.893147, ppl: 18.050028 +epoch: 1, batch: 18370, sum loss: 4179.001953, avg loss: 2.268731, ppl: 9.667125 +epoch: 1, batch: 18371, sum loss: 4817.771484, avg loss: 2.854130, ppl: 17.359329 +epoch: 1, batch: 18372, sum loss: 4777.266602, avg loss: 2.825114, ppl: 16.862860 +epoch: 1, batch: 18373, sum loss: 4095.179932, avg loss: 2.611722, ppl: 13.622485 +epoch: 1, batch: 18374, sum loss: 3718.032959, avg loss: 2.486979, ppl: 12.024888 +epoch: 1, batch: 18375, sum loss: 4119.679688, avg loss: 2.373087, ppl: 10.730471 +epoch: 1, batch: 18376, sum loss: 4020.086426, avg loss: 2.487677, ppl: 12.033292 +epoch: 1, batch: 18377, sum loss: 4589.289062, avg loss: 2.906453, ppl: 18.291800 +epoch: 1, batch: 18378, sum loss: 3998.437500, avg loss: 2.522673, ppl: 12.461867 +epoch: 1, batch: 18379, sum loss: 4669.480469, avg loss: 2.796096, ppl: 16.380573 +epoch: 1, batch: 18380, sum loss: 4917.875000, avg loss: 2.941313, ppl: 18.940701 +epoch: 1, batch: 18381, sum loss: 4039.831055, avg loss: 2.542373, ppl: 12.709798 +epoch: 1, batch: 18382, sum loss: 4689.119629, avg loss: 2.764811, ppl: 15.876040 +epoch: 1, batch: 18383, sum loss: 5337.854004, avg loss: 2.720619, ppl: 15.189717 +epoch: 1, batch: 18384, sum loss: 4868.969727, avg loss: 2.807941, ppl: 16.575752 +epoch: 1, batch: 18385, sum loss: 5597.253418, avg loss: 2.928966, ppl: 18.708267 +epoch: 1, batch: 18386, sum loss: 3569.847412, avg loss: 2.351678, ppl: 10.503181 +epoch: 1, batch: 18387, sum loss: 3513.486084, avg loss: 2.524056, ppl: 12.479112 +epoch: 1, batch: 18388, sum loss: 4505.780273, avg loss: 2.704550, ppl: 14.947586 +epoch: 1, batch: 18389, sum loss: 4754.505859, avg loss: 2.754638, ppl: 15.715358 +epoch: 1, batch: 18390, sum loss: 4725.848633, avg loss: 2.390414, ppl: 10.918013 +epoch: 1, batch: 18391, sum loss: 5648.285156, avg loss: 2.919010, ppl: 18.522947 +epoch: 1, batch: 18392, sum loss: 3641.929688, avg loss: 2.504766, ppl: 12.240694 +epoch: 1, batch: 18393, sum loss: 5418.043945, avg loss: 2.853104, ppl: 17.341520 +epoch: 1, batch: 18394, sum loss: 4757.197754, avg loss: 2.846917, ppl: 17.234564 +epoch: 1, batch: 18395, sum loss: 3998.280518, avg loss: 2.548299, ppl: 12.785333 +epoch: 1, batch: 18396, sum loss: 4197.719727, avg loss: 2.453372, ppl: 11.627491 +epoch: 1, batch: 18397, sum loss: 4774.531738, avg loss: 2.860714, ppl: 17.474003 +epoch: 1, batch: 18398, sum loss: 4512.070312, avg loss: 2.657285, ppl: 14.257534 +epoch: 1, batch: 18399, sum loss: 4734.030762, avg loss: 2.726976, ppl: 15.286592 +epoch: 1, batch: 18400, sum loss: 5414.705566, avg loss: 2.931622, ppl: 18.758032 +epoch: 1, batch: 18401, sum loss: 4403.718750, avg loss: 2.649650, ppl: 14.149091 +epoch: 1, batch: 18402, sum loss: 4800.982422, avg loss: 2.878287, ppl: 17.783781 +epoch: 1, batch: 18403, sum loss: 4668.113281, avg loss: 2.818909, ppl: 16.758556 +epoch: 1, batch: 18404, sum loss: 4867.662109, avg loss: 2.547181, ppl: 12.771047 +epoch: 1, batch: 18405, sum loss: 4700.118652, avg loss: 2.646463, ppl: 14.104067 +epoch: 1, batch: 18406, sum loss: 4980.503418, avg loss: 2.991293, ppl: 19.911421 +epoch: 1, batch: 18407, sum loss: 3788.677734, avg loss: 2.468194, ppl: 11.801115 +epoch: 1, batch: 18408, sum loss: 4201.345703, avg loss: 2.645684, ppl: 14.093079 +epoch: 1, batch: 18409, sum loss: 5026.005859, avg loss: 2.635556, ppl: 13.951070 +epoch: 1, batch: 18410, sum loss: 4831.473145, avg loss: 2.838703, ppl: 17.093588 +epoch: 1, batch: 18411, sum loss: 4354.496582, avg loss: 2.770036, ppl: 15.959208 +epoch: 1, batch: 18412, sum loss: 5318.217285, avg loss: 2.837896, ppl: 17.079794 +epoch: 1, batch: 18413, sum loss: 3557.372070, avg loss: 2.424930, ppl: 11.301434 +epoch: 1, batch: 18414, sum loss: 4468.506348, avg loss: 2.551974, ppl: 12.832411 +epoch: 1, batch: 18415, sum loss: 3595.859375, avg loss: 2.485044, ppl: 12.001654 +epoch: 1, batch: 18416, sum loss: 5302.850586, avg loss: 2.849463, ppl: 17.278500 +epoch: 1, batch: 18417, sum loss: 4360.960938, avg loss: 2.667254, ppl: 14.400378 +epoch: 1, batch: 18418, sum loss: 4689.436035, avg loss: 2.732772, ppl: 15.375443 +epoch: 1, batch: 18419, sum loss: 4247.672363, avg loss: 2.679919, ppl: 14.583919 +epoch: 1, batch: 18420, sum loss: 5130.889160, avg loss: 2.895536, ppl: 18.093191 +epoch: 1, batch: 18421, sum loss: 5774.142578, avg loss: 3.097716, ppl: 22.147306 +epoch: 1, batch: 18422, sum loss: 5102.367188, avg loss: 2.822106, ppl: 16.812214 +epoch: 1, batch: 18423, sum loss: 4835.849121, avg loss: 2.790449, ppl: 16.288338 +epoch: 1, batch: 18424, sum loss: 5105.531250, avg loss: 2.722950, ppl: 15.225170 +epoch: 1, batch: 18425, sum loss: 4062.213867, avg loss: 2.385328, ppl: 10.862626 +epoch: 1, batch: 18426, sum loss: 4593.708984, avg loss: 2.658396, ppl: 14.273383 +epoch: 1, batch: 18427, sum loss: 4247.240723, avg loss: 2.517629, ppl: 12.399168 +epoch: 1, batch: 18428, sum loss: 5504.797363, avg loss: 3.160044, ppl: 23.571644 +epoch: 1, batch: 18429, sum loss: 4827.785156, avg loss: 2.668759, ppl: 14.422066 +epoch: 1, batch: 18430, sum loss: 4580.194336, avg loss: 2.609797, ppl: 13.596293 +epoch: 1, batch: 18431, sum loss: 3402.888672, avg loss: 2.366404, ppl: 10.658992 +epoch: 1, batch: 18432, sum loss: 3618.600098, avg loss: 2.433490, ppl: 11.398597 +epoch: 1, batch: 18433, sum loss: 4582.989258, avg loss: 2.669184, ppl: 14.428194 +epoch: 1, batch: 18434, sum loss: 4670.357422, avg loss: 2.515001, ppl: 12.366625 +epoch: 1, batch: 18435, sum loss: 4252.151367, avg loss: 2.890654, ppl: 18.005074 +epoch: 1, batch: 18436, sum loss: 3840.251465, avg loss: 2.850966, ppl: 17.304493 +epoch: 1, batch: 18437, sum loss: 4419.169922, avg loss: 2.428115, ppl: 11.337495 +epoch: 1, batch: 18438, sum loss: 4754.280762, avg loss: 2.869210, ppl: 17.623087 +epoch: 1, batch: 18439, sum loss: 4859.071289, avg loss: 2.781380, ppl: 16.141283 +epoch: 1, batch: 18440, sum loss: 4348.980957, avg loss: 2.686214, ppl: 14.676014 +epoch: 1, batch: 18441, sum loss: 4535.898926, avg loss: 2.779350, ppl: 16.108547 +epoch: 1, batch: 18442, sum loss: 4105.705078, avg loss: 2.378740, ppl: 10.791295 +epoch: 1, batch: 18443, sum loss: 5229.168945, avg loss: 2.967746, ppl: 19.448044 +epoch: 1, batch: 18444, sum loss: 6107.207031, avg loss: 3.079782, ppl: 21.753654 +epoch: 1, batch: 18445, sum loss: 4690.108887, avg loss: 2.684664, ppl: 14.653284 +epoch: 1, batch: 18446, sum loss: 4628.820312, avg loss: 2.894822, ppl: 18.080280 +epoch: 1, batch: 18447, sum loss: 4884.671387, avg loss: 2.838275, ppl: 17.086266 +epoch: 1, batch: 18448, sum loss: 4807.890137, avg loss: 3.035284, ppl: 20.806892 +epoch: 1, batch: 18449, sum loss: 3412.058594, avg loss: 2.341838, ppl: 10.400338 +epoch: 1, batch: 18450, sum loss: 5250.096680, avg loss: 2.886254, ppl: 17.926039 +epoch: 1, batch: 18451, sum loss: 3665.479004, avg loss: 2.529661, ppl: 12.549253 +epoch: 1, batch: 18452, sum loss: 4818.818848, avg loss: 2.788668, ppl: 16.259354 +epoch: 1, batch: 18453, sum loss: 4976.991211, avg loss: 2.740634, ppl: 15.496802 +epoch: 1, batch: 18454, sum loss: 3847.793213, avg loss: 2.482447, ppl: 11.970525 +epoch: 1, batch: 18455, sum loss: 3416.552246, avg loss: 2.517725, ppl: 12.400351 +epoch: 1, batch: 18456, sum loss: 5061.503906, avg loss: 2.764339, ppl: 15.868544 +epoch: 1, batch: 18457, sum loss: 5058.391113, avg loss: 2.640079, ppl: 14.014311 +epoch: 1, batch: 18458, sum loss: 4079.950195, avg loss: 2.673624, ppl: 14.492395 +epoch: 1, batch: 18459, sum loss: 4739.288574, avg loss: 2.659534, ppl: 14.289621 +epoch: 1, batch: 18460, sum loss: 4195.537109, avg loss: 2.701569, ppl: 14.903101 +epoch: 1, batch: 18461, sum loss: 4684.733887, avg loss: 2.621563, ppl: 13.757215 +epoch: 1, batch: 18462, sum loss: 4987.573242, avg loss: 2.608563, ppl: 13.579529 +epoch: 1, batch: 18463, sum loss: 4946.498535, avg loss: 2.619967, ppl: 13.735276 +epoch: 1, batch: 18464, sum loss: 5015.820801, avg loss: 2.778848, ppl: 16.100466 +epoch: 1, batch: 18465, sum loss: 4791.027832, avg loss: 2.740863, ppl: 15.500354 +epoch: 1, batch: 18466, sum loss: 3286.533936, avg loss: 2.362713, ppl: 10.619725 +epoch: 1, batch: 18467, sum loss: 5107.910156, avg loss: 2.718420, ppl: 15.156349 +epoch: 1, batch: 18468, sum loss: 3227.188721, avg loss: 2.357333, ppl: 10.562742 +epoch: 1, batch: 18469, sum loss: 3956.697754, avg loss: 2.559313, ppl: 12.926931 +epoch: 1, batch: 18470, sum loss: 4014.512939, avg loss: 2.534415, ppl: 12.609050 +epoch: 1, batch: 18471, sum loss: 4389.947754, avg loss: 2.649335, ppl: 14.144625 +epoch: 1, batch: 18472, sum loss: 3626.468262, avg loss: 2.437143, ppl: 11.440305 +epoch: 1, batch: 18473, sum loss: 4811.079102, avg loss: 2.746050, ppl: 15.580963 +epoch: 1, batch: 18474, sum loss: 5331.592285, avg loss: 3.034486, ppl: 20.790291 +epoch: 1, batch: 18475, sum loss: 4150.412109, avg loss: 2.719798, ppl: 15.177261 +epoch: 1, batch: 18476, sum loss: 4101.518066, avg loss: 2.625812, ppl: 13.815785 +epoch: 1, batch: 18477, sum loss: 4470.822754, avg loss: 2.506067, ppl: 12.256627 +epoch: 1, batch: 18478, sum loss: 5707.342773, avg loss: 2.826817, ppl: 16.891602 +epoch: 1, batch: 18479, sum loss: 4804.919434, avg loss: 2.676835, ppl: 14.539009 +epoch: 1, batch: 18480, sum loss: 5326.784180, avg loss: 2.825880, ppl: 16.875793 +epoch: 1, batch: 18481, sum loss: 4729.643066, avg loss: 2.670606, ppl: 14.448721 +epoch: 1, batch: 18482, sum loss: 4306.495605, avg loss: 2.554268, ppl: 12.861879 +epoch: 1, batch: 18483, sum loss: 3781.515137, avg loss: 2.404015, ppl: 11.067519 +epoch: 1, batch: 18484, sum loss: 4486.871582, avg loss: 2.640890, ppl: 14.025680 +epoch: 1, batch: 18485, sum loss: 3785.023682, avg loss: 2.715225, ppl: 15.108008 +epoch: 1, batch: 18486, sum loss: 4508.096680, avg loss: 2.819322, ppl: 16.765486 +epoch: 1, batch: 18487, sum loss: 4016.024902, avg loss: 2.673785, ppl: 14.494724 +epoch: 1, batch: 18488, sum loss: 5184.725586, avg loss: 2.690569, ppl: 14.740056 +epoch: 1, batch: 18489, sum loss: 5098.916016, avg loss: 2.879117, ppl: 17.798546 +epoch: 1, batch: 18490, sum loss: 4203.534180, avg loss: 2.381606, ppl: 10.822268 +epoch: 1, batch: 18491, sum loss: 4651.678711, avg loss: 3.014698, ppl: 20.382935 +epoch: 1, batch: 18492, sum loss: 4126.574707, avg loss: 2.513139, ppl: 12.343622 +epoch: 1, batch: 18493, sum loss: 4897.819824, avg loss: 2.793965, ppl: 16.345695 +epoch: 1, batch: 18494, sum loss: 3883.782715, avg loss: 2.546743, ppl: 12.765455 +epoch: 1, batch: 18495, sum loss: 3979.305664, avg loss: 2.554111, ppl: 12.859868 +epoch: 1, batch: 18496, sum loss: 5206.543945, avg loss: 2.750419, ppl: 15.649194 +epoch: 1, batch: 18497, sum loss: 4288.113281, avg loss: 2.603590, ppl: 13.512163 +epoch: 1, batch: 18498, sum loss: 4373.496094, avg loss: 2.611042, ppl: 13.613235 +epoch: 1, batch: 18499, sum loss: 4278.929688, avg loss: 2.556111, ppl: 12.885609 +epoch: 1, batch: 18500, sum loss: 4629.163086, avg loss: 2.807255, ppl: 16.564383 +epoch: 1, batch: 18501, sum loss: 4417.542969, avg loss: 2.507119, ppl: 12.269526 +epoch: 1, batch: 18502, sum loss: 4253.239746, avg loss: 2.527177, ppl: 12.518122 +epoch: 1, batch: 18503, sum loss: 4599.806641, avg loss: 2.661925, ppl: 14.323837 +epoch: 1, batch: 18504, sum loss: 4933.130859, avg loss: 2.815714, ppl: 16.705101 +epoch: 1, batch: 18505, sum loss: 4802.948730, avg loss: 2.710468, ppl: 15.036308 +epoch: 1, batch: 18506, sum loss: 4706.609375, avg loss: 2.610432, ppl: 13.604932 +epoch: 1, batch: 18507, sum loss: 5543.722656, avg loss: 2.884351, ppl: 17.891956 +epoch: 1, batch: 18508, sum loss: 4664.851074, avg loss: 2.479985, ppl: 11.941079 +epoch: 1, batch: 18509, sum loss: 4928.403320, avg loss: 2.672670, ppl: 14.478574 +epoch: 1, batch: 18510, sum loss: 4454.018555, avg loss: 2.623097, ppl: 13.778328 +epoch: 1, batch: 18511, sum loss: 4395.054199, avg loss: 2.837349, ppl: 17.070459 +epoch: 1, batch: 18512, sum loss: 4095.033691, avg loss: 2.503077, ppl: 12.220037 +epoch: 1, batch: 18513, sum loss: 3862.572754, avg loss: 2.346642, ppl: 10.450418 +epoch: 1, batch: 18514, sum loss: 4829.653809, avg loss: 2.814484, ppl: 16.684559 +epoch: 1, batch: 18515, sum loss: 4449.682129, avg loss: 2.670877, ppl: 14.452645 +epoch: 1, batch: 18516, sum loss: 4496.327148, avg loss: 2.820782, ppl: 16.789982 +epoch: 1, batch: 18517, sum loss: 4294.177734, avg loss: 2.660581, ppl: 14.304599 +epoch: 1, batch: 18518, sum loss: 3819.562500, avg loss: 2.541292, ppl: 12.696069 +epoch: 1, batch: 18519, sum loss: 4048.115234, avg loss: 2.763219, ppl: 15.850779 +epoch: 1, batch: 18520, sum loss: 4131.296875, avg loss: 2.719748, ppl: 15.176494 +epoch: 1, batch: 18521, sum loss: 4746.319824, avg loss: 2.796889, ppl: 16.393560 +epoch: 1, batch: 18522, sum loss: 4400.174805, avg loss: 2.748392, ppl: 15.617493 +epoch: 1, batch: 18523, sum loss: 4131.483398, avg loss: 2.681041, ppl: 14.600277 +epoch: 1, batch: 18524, sum loss: 4568.187988, avg loss: 2.442881, ppl: 11.506146 +epoch: 1, batch: 18525, sum loss: 3914.991455, avg loss: 2.677833, ppl: 14.553519 +epoch: 1, batch: 18526, sum loss: 3682.625977, avg loss: 2.433990, ppl: 11.404289 +epoch: 1, batch: 18527, sum loss: 5132.751465, avg loss: 2.645748, ppl: 14.093988 +epoch: 1, batch: 18528, sum loss: 4423.411621, avg loss: 2.606607, ppl: 13.552983 +epoch: 1, batch: 18529, sum loss: 4141.742676, avg loss: 2.608150, ppl: 13.573922 +epoch: 1, batch: 18530, sum loss: 3720.227783, avg loss: 2.312137, ppl: 10.095973 +epoch: 1, batch: 18531, sum loss: 4399.762207, avg loss: 2.504133, ppl: 12.232951 +epoch: 1, batch: 18532, sum loss: 4365.602539, avg loss: 2.778869, ppl: 16.100796 +epoch: 1, batch: 18533, sum loss: 3847.208008, avg loss: 2.684723, ppl: 14.654143 +epoch: 1, batch: 18534, sum loss: 4439.301758, avg loss: 2.626806, ppl: 13.829525 +epoch: 1, batch: 18535, sum loss: 5591.951660, avg loss: 2.713223, ppl: 15.077785 +epoch: 1, batch: 18536, sum loss: 3301.740723, avg loss: 2.247611, ppl: 9.465097 +epoch: 1, batch: 18537, sum loss: 4811.621094, avg loss: 2.691063, ppl: 14.747347 +epoch: 1, batch: 18538, sum loss: 5257.540039, avg loss: 3.012917, ppl: 20.346661 +epoch: 1, batch: 18539, sum loss: 4794.094238, avg loss: 2.888009, ppl: 17.957514 +epoch: 1, batch: 18540, sum loss: 4008.389404, avg loss: 2.656322, ppl: 14.243800 +epoch: 1, batch: 18541, sum loss: 3627.936035, avg loss: 2.490004, ppl: 12.061325 +epoch: 1, batch: 18542, sum loss: 4925.092285, avg loss: 2.801531, ppl: 16.469849 +epoch: 1, batch: 18543, sum loss: 5118.517578, avg loss: 2.853131, ppl: 17.341999 +epoch: 1, batch: 18544, sum loss: 4213.795898, avg loss: 2.597901, ppl: 13.435512 +epoch: 1, batch: 18545, sum loss: 5614.212891, avg loss: 2.917990, ppl: 18.504057 +epoch: 1, batch: 18546, sum loss: 4726.681152, avg loss: 2.859456, ppl: 17.452032 +epoch: 1, batch: 18547, sum loss: 5964.866211, avg loss: 2.964645, ppl: 19.387827 +epoch: 1, batch: 18548, sum loss: 3811.476807, avg loss: 2.558038, ppl: 12.910465 +epoch: 1, batch: 18549, sum loss: 4273.853027, avg loss: 2.806207, ppl: 16.547035 +epoch: 1, batch: 18550, sum loss: 4213.049316, avg loss: 2.636451, ppl: 13.963566 +epoch: 1, batch: 18551, sum loss: 4756.077637, avg loss: 2.776461, ppl: 16.062075 +epoch: 1, batch: 18552, sum loss: 4707.947266, avg loss: 2.595340, ppl: 13.401146 +epoch: 1, batch: 18553, sum loss: 4667.461914, avg loss: 2.594476, ppl: 13.389566 +epoch: 1, batch: 18554, sum loss: 3858.786621, avg loss: 2.420820, ppl: 11.255082 +epoch: 1, batch: 18555, sum loss: 4975.701172, avg loss: 2.582097, ppl: 13.224846 +epoch: 1, batch: 18556, sum loss: 4381.455566, avg loss: 2.787185, ppl: 16.235260 +epoch: 1, batch: 18557, sum loss: 4925.553223, avg loss: 2.888888, ppl: 17.973307 +epoch: 1, batch: 18558, sum loss: 4470.791016, avg loss: 2.831407, ppl: 16.969313 +epoch: 1, batch: 18559, sum loss: 5128.954102, avg loss: 2.904278, ppl: 18.252052 +epoch: 1, batch: 18560, sum loss: 3970.732422, avg loss: 2.386258, ppl: 10.872728 +epoch: 1, batch: 18561, sum loss: 4571.854492, avg loss: 2.827368, ppl: 16.900927 +epoch: 1, batch: 18562, sum loss: 3414.614502, avg loss: 2.322867, ppl: 10.204891 +epoch: 1, batch: 18563, sum loss: 4853.245605, avg loss: 2.682834, ppl: 14.626481 +epoch: 1, batch: 18564, sum loss: 4467.930664, avg loss: 2.714417, ppl: 15.095810 +epoch: 1, batch: 18565, sum loss: 4083.466064, avg loss: 2.599278, ppl: 13.454027 +epoch: 1, batch: 18566, sum loss: 4577.530273, avg loss: 2.627744, ppl: 13.842505 +epoch: 1, batch: 18567, sum loss: 4269.579590, avg loss: 2.783298, ppl: 16.172274 +epoch: 1, batch: 18568, sum loss: 4461.978516, avg loss: 2.717405, ppl: 15.140978 +epoch: 1, batch: 18569, sum loss: 3419.187988, avg loss: 2.252429, ppl: 9.510814 +epoch: 1, batch: 18570, sum loss: 4649.842773, avg loss: 2.554859, ppl: 12.869481 +epoch: 1, batch: 18571, sum loss: 4655.241699, avg loss: 2.720773, ppl: 15.192058 +epoch: 1, batch: 18572, sum loss: 4890.395508, avg loss: 2.794512, ppl: 16.354643 +epoch: 1, batch: 18573, sum loss: 3607.152100, avg loss: 2.370008, ppl: 10.697477 +epoch: 1, batch: 18574, sum loss: 5799.919922, avg loss: 2.862744, ppl: 17.509514 +epoch: 1, batch: 18575, sum loss: 4399.462891, avg loss: 2.438727, ppl: 11.458441 +epoch: 1, batch: 18576, sum loss: 4199.651855, avg loss: 2.709453, ppl: 15.021055 +epoch: 1, batch: 18577, sum loss: 4343.873047, avg loss: 2.759767, ppl: 15.796159 +epoch: 1, batch: 18578, sum loss: 3951.100830, avg loss: 2.466355, ppl: 11.779433 +epoch: 1, batch: 18579, sum loss: 5575.240723, avg loss: 3.038279, ppl: 20.869287 +epoch: 1, batch: 18580, sum loss: 4783.160645, avg loss: 2.645553, ppl: 14.091240 +epoch: 1, batch: 18581, sum loss: 5121.150391, avg loss: 2.656199, ppl: 14.242045 +epoch: 1, batch: 18582, sum loss: 4033.477051, avg loss: 2.572371, ppl: 13.096834 +epoch: 1, batch: 18583, sum loss: 5250.078613, avg loss: 2.902200, ppl: 18.214163 +epoch: 1, batch: 18584, sum loss: 4535.289062, avg loss: 2.689970, ppl: 14.731231 +epoch: 1, batch: 18585, sum loss: 3321.103760, avg loss: 2.315972, ppl: 10.134768 +epoch: 1, batch: 18586, sum loss: 4107.441406, avg loss: 2.607899, ppl: 13.570512 +epoch: 1, batch: 18587, sum loss: 4648.268555, avg loss: 2.679117, ppl: 14.572227 +epoch: 1, batch: 18588, sum loss: 3846.941406, avg loss: 2.276297, ppl: 9.740541 +epoch: 1, batch: 18589, sum loss: 5423.188965, avg loss: 2.858824, ppl: 17.441004 +epoch: 1, batch: 18590, sum loss: 4816.955078, avg loss: 2.802185, ppl: 16.480610 +epoch: 1, batch: 18591, sum loss: 5197.983887, avg loss: 2.680755, ppl: 14.596111 +epoch: 1, batch: 18592, sum loss: 4057.906738, avg loss: 2.764242, ppl: 15.867003 +epoch: 1, batch: 18593, sum loss: 4832.517090, avg loss: 2.839317, ppl: 17.104073 +epoch: 1, batch: 18594, sum loss: 4599.792480, avg loss: 2.813329, ppl: 16.665300 +epoch: 1, batch: 18595, sum loss: 5510.509277, avg loss: 2.937372, ppl: 18.866196 +epoch: 1, batch: 18596, sum loss: 5702.682617, avg loss: 3.039809, ppl: 20.901260 +epoch: 1, batch: 18597, sum loss: 5090.643066, avg loss: 2.884217, ppl: 17.889555 +epoch: 1, batch: 18598, sum loss: 3693.829102, avg loss: 2.400149, ppl: 11.024817 +epoch: 1, batch: 18599, sum loss: 3941.747803, avg loss: 2.569588, ppl: 13.060442 +epoch: 1, batch: 18600, sum loss: 4459.827637, avg loss: 2.600482, ppl: 13.470236 +epoch: 1, batch: 18601, sum loss: 4610.187500, avg loss: 2.819686, ppl: 16.771591 +epoch: 1, batch: 18602, sum loss: 4336.312988, avg loss: 2.479310, ppl: 11.933025 +epoch: 1, batch: 18603, sum loss: 4526.669922, avg loss: 2.773695, ppl: 16.017710 +epoch: 1, batch: 18604, sum loss: 5597.126953, avg loss: 2.950515, ppl: 19.115797 +epoch: 1, batch: 18605, sum loss: 4537.644531, avg loss: 2.748422, ppl: 15.617970 +epoch: 1, batch: 18606, sum loss: 4304.044922, avg loss: 2.725804, ppl: 15.268690 +epoch: 1, batch: 18607, sum loss: 5059.094238, avg loss: 2.807489, ppl: 16.568258 +epoch: 1, batch: 18608, sum loss: 4972.107422, avg loss: 2.818655, ppl: 16.754301 +epoch: 1, batch: 18609, sum loss: 4438.933105, avg loss: 2.861981, ppl: 17.496159 +epoch: 1, batch: 18610, sum loss: 4703.647949, avg loss: 2.747458, ppl: 15.602915 +epoch: 1, batch: 18611, sum loss: 4515.065430, avg loss: 2.780213, ppl: 16.122448 +epoch: 1, batch: 18612, sum loss: 4405.522949, avg loss: 2.593009, ppl: 13.369947 +epoch: 1, batch: 18613, sum loss: 5053.654297, avg loss: 2.823271, ppl: 16.831810 +epoch: 1, batch: 18614, sum loss: 5516.133789, avg loss: 3.093738, ppl: 22.059374 +epoch: 1, batch: 18615, sum loss: 4114.162598, avg loss: 2.564939, ppl: 12.999868 +epoch: 1, batch: 18616, sum loss: 3286.458496, avg loss: 2.283849, ppl: 9.814381 +epoch: 1, batch: 18617, sum loss: 4325.652344, avg loss: 2.788944, ppl: 16.263836 +epoch: 1, batch: 18618, sum loss: 4518.984375, avg loss: 2.743767, ppl: 15.545435 +epoch: 1, batch: 18619, sum loss: 3967.759033, avg loss: 2.666505, ppl: 14.389585 +epoch: 1, batch: 18620, sum loss: 4311.637207, avg loss: 2.673055, ppl: 14.484146 +epoch: 1, batch: 18621, sum loss: 4737.358398, avg loss: 2.801513, ppl: 16.469549 +epoch: 1, batch: 18622, sum loss: 4505.528320, avg loss: 2.851600, ppl: 17.315468 +epoch: 1, batch: 18623, sum loss: 3953.714355, avg loss: 2.475713, ppl: 11.890187 +epoch: 1, batch: 18624, sum loss: 4254.687012, avg loss: 2.427089, ppl: 11.325864 +epoch: 1, batch: 18625, sum loss: 4423.125488, avg loss: 2.716908, ppl: 15.133450 +epoch: 1, batch: 18626, sum loss: 4221.766113, avg loss: 2.641906, ppl: 14.039942 +epoch: 1, batch: 18627, sum loss: 4061.955566, avg loss: 2.527664, ppl: 12.524212 +epoch: 1, batch: 18628, sum loss: 3791.411865, avg loss: 2.469975, ppl: 11.822154 +epoch: 1, batch: 18629, sum loss: 5602.440918, avg loss: 2.911872, ppl: 18.391190 +epoch: 1, batch: 18630, sum loss: 4059.587891, avg loss: 2.658538, ppl: 14.275408 +epoch: 1, batch: 18631, sum loss: 4052.928467, avg loss: 2.749612, ppl: 15.636569 +epoch: 1, batch: 18632, sum loss: 4822.970703, avg loss: 2.842057, ppl: 17.151009 +epoch: 1, batch: 18633, sum loss: 4316.837891, avg loss: 2.821463, ppl: 16.801411 +epoch: 1, batch: 18634, sum loss: 5241.170410, avg loss: 2.989829, ppl: 19.882288 +epoch: 1, batch: 18635, sum loss: 3525.743164, avg loss: 2.282035, ppl: 9.796593 +epoch: 1, batch: 18636, sum loss: 3656.267334, avg loss: 2.385041, ppl: 10.859506 +epoch: 1, batch: 18637, sum loss: 5568.845703, avg loss: 2.781641, ppl: 16.145498 +epoch: 1, batch: 18638, sum loss: 4092.084961, avg loss: 2.638353, ppl: 13.990138 +epoch: 1, batch: 18639, sum loss: 4534.506836, avg loss: 2.595596, ppl: 13.404578 +epoch: 1, batch: 18640, sum loss: 4537.229492, avg loss: 2.702340, ppl: 14.914597 +epoch: 1, batch: 18641, sum loss: 4570.755859, avg loss: 2.702990, ppl: 14.924290 +epoch: 1, batch: 18642, sum loss: 5394.403320, avg loss: 2.941332, ppl: 18.941061 +epoch: 1, batch: 18643, sum loss: 4195.276855, avg loss: 2.685837, ppl: 14.670472 +epoch: 1, batch: 18644, sum loss: 4677.965820, avg loss: 2.580235, ppl: 13.200240 +epoch: 1, batch: 18645, sum loss: 4201.348145, avg loss: 2.664140, ppl: 14.355595 +epoch: 1, batch: 18646, sum loss: 5627.235352, avg loss: 2.827756, ppl: 16.907484 +epoch: 1, batch: 18647, sum loss: 4617.878418, avg loss: 2.647866, ppl: 14.123866 +epoch: 1, batch: 18648, sum loss: 5174.478516, avg loss: 2.672768, ppl: 14.479989 +epoch: 1, batch: 18649, sum loss: 5027.270508, avg loss: 2.960701, ppl: 19.311508 +epoch: 1, batch: 18650, sum loss: 4568.990234, avg loss: 2.563968, ppl: 12.987245 +epoch: 1, batch: 18651, sum loss: 5463.395996, avg loss: 2.915366, ppl: 18.455564 +epoch: 1, batch: 18652, sum loss: 4529.283691, avg loss: 2.558917, ppl: 12.921819 +epoch: 1, batch: 18653, sum loss: 4087.449219, avg loss: 2.524675, ppl: 12.486841 +epoch: 1, batch: 18654, sum loss: 3868.717041, avg loss: 2.535201, ppl: 12.618971 +epoch: 1, batch: 18655, sum loss: 3387.007080, avg loss: 2.408967, ppl: 11.122460 +epoch: 1, batch: 18656, sum loss: 4588.504395, avg loss: 2.672396, ppl: 14.474611 +epoch: 1, batch: 18657, sum loss: 4981.813477, avg loss: 2.832185, ppl: 16.982527 +epoch: 1, batch: 18658, sum loss: 4359.015625, avg loss: 2.558108, ppl: 12.911364 +epoch: 1, batch: 18659, sum loss: 4484.088867, avg loss: 2.588966, ppl: 13.315994 +epoch: 1, batch: 18660, sum loss: 4468.505859, avg loss: 2.647219, ppl: 14.114734 +epoch: 1, batch: 18661, sum loss: 4722.346191, avg loss: 3.042749, ppl: 20.962790 +epoch: 1, batch: 18662, sum loss: 3823.341553, avg loss: 2.576376, ppl: 13.149395 +epoch: 1, batch: 18663, sum loss: 5130.700684, avg loss: 2.856738, ppl: 17.404654 +epoch: 1, batch: 18664, sum loss: 4265.106934, avg loss: 2.599090, ppl: 13.451493 +epoch: 1, batch: 18665, sum loss: 4134.480957, avg loss: 2.723637, ppl: 15.235635 +epoch: 1, batch: 18666, sum loss: 5363.593262, avg loss: 2.737924, ppl: 15.454869 +epoch: 1, batch: 18667, sum loss: 4049.645996, avg loss: 2.681885, ppl: 14.612609 +epoch: 1, batch: 18668, sum loss: 3756.379150, avg loss: 2.558841, ppl: 12.920836 +epoch: 1, batch: 18669, sum loss: 4104.365723, avg loss: 2.525764, ppl: 12.500436 +epoch: 1, batch: 18670, sum loss: 5139.648926, avg loss: 2.706503, ppl: 14.976809 +epoch: 1, batch: 18671, sum loss: 4522.246094, avg loss: 2.760834, ppl: 15.813022 +epoch: 1, batch: 18672, sum loss: 4150.876953, avg loss: 2.526401, ppl: 12.508408 +epoch: 1, batch: 18673, sum loss: 4430.731934, avg loss: 2.595625, ppl: 13.404965 +epoch: 1, batch: 18674, sum loss: 4430.618164, avg loss: 2.624774, ppl: 13.801455 +epoch: 1, batch: 18675, sum loss: 3444.340576, avg loss: 2.249733, ppl: 9.485201 +epoch: 1, batch: 18676, sum loss: 4649.687500, avg loss: 2.666105, ppl: 14.383836 +epoch: 1, batch: 18677, sum loss: 5106.333984, avg loss: 2.785780, ppl: 16.212454 +epoch: 1, batch: 18678, sum loss: 4533.729980, avg loss: 2.871267, ppl: 17.659372 +epoch: 1, batch: 18679, sum loss: 4158.190918, avg loss: 2.668929, ppl: 14.424510 +epoch: 1, batch: 18680, sum loss: 3174.599365, avg loss: 2.327419, ppl: 10.251447 +epoch: 1, batch: 18681, sum loss: 5348.089355, avg loss: 2.994451, ppl: 19.974392 +epoch: 1, batch: 18682, sum loss: 4660.455078, avg loss: 2.903710, ppl: 18.241699 +epoch: 1, batch: 18683, sum loss: 4290.312500, avg loss: 2.597042, ppl: 13.423965 +epoch: 1, batch: 18684, sum loss: 3761.992432, avg loss: 2.357138, ppl: 10.560685 +epoch: 1, batch: 18685, sum loss: 3366.071777, avg loss: 2.335928, ppl: 10.339047 +epoch: 1, batch: 18686, sum loss: 4142.640625, avg loss: 2.697032, ppl: 14.835630 +epoch: 1, batch: 18687, sum loss: 4829.747559, avg loss: 2.643540, ppl: 14.062897 +epoch: 1, batch: 18688, sum loss: 4201.682617, avg loss: 2.579302, ppl: 13.187927 +epoch: 1, batch: 18689, sum loss: 5927.675781, avg loss: 3.103495, ppl: 22.275671 +epoch: 1, batch: 18690, sum loss: 3848.943848, avg loss: 2.384724, ppl: 10.856062 +epoch: 1, batch: 18691, sum loss: 3715.661621, avg loss: 2.583909, ppl: 13.248831 +epoch: 1, batch: 18692, sum loss: 4639.138184, avg loss: 2.749934, ppl: 15.641595 +epoch: 1, batch: 18693, sum loss: 4035.413574, avg loss: 2.357134, ppl: 10.560642 +epoch: 1, batch: 18694, sum loss: 4072.667969, avg loss: 2.755527, ppl: 15.729321 +epoch: 1, batch: 18695, sum loss: 4145.437012, avg loss: 2.582827, ppl: 13.234494 +epoch: 1, batch: 18696, sum loss: 4368.620117, avg loss: 2.716804, ppl: 15.131877 +epoch: 1, batch: 18697, sum loss: 4317.845703, avg loss: 2.461714, ppl: 11.724886 +epoch: 1, batch: 18698, sum loss: 4291.669434, avg loss: 2.543965, ppl: 12.730050 +epoch: 1, batch: 18699, sum loss: 4654.246094, avg loss: 2.601591, ppl: 13.485174 +epoch: 1, batch: 18700, sum loss: 4830.707031, avg loss: 2.959992, ppl: 19.297821 +epoch: 1, batch: 18701, sum loss: 3789.495117, avg loss: 2.524647, ppl: 12.486486 +epoch: 1, batch: 18702, sum loss: 4661.857910, avg loss: 2.680769, ppl: 14.596320 +epoch: 1, batch: 18703, sum loss: 3982.231445, avg loss: 2.533226, ppl: 12.594069 +epoch: 1, batch: 18704, sum loss: 3725.279785, avg loss: 2.505232, ppl: 12.246398 +epoch: 1, batch: 18705, sum loss: 4129.119141, avg loss: 2.697008, ppl: 14.835280 +epoch: 1, batch: 18706, sum loss: 4490.722656, avg loss: 2.831477, ppl: 16.970510 +epoch: 1, batch: 18707, sum loss: 5617.667480, avg loss: 2.931977, ppl: 18.764688 +epoch: 1, batch: 18708, sum loss: 3327.228027, avg loss: 2.167575, ppl: 8.737072 +epoch: 1, batch: 18709, sum loss: 3908.145996, avg loss: 2.710226, ppl: 15.032674 +epoch: 1, batch: 18710, sum loss: 4287.855469, avg loss: 2.661611, ppl: 14.319344 +epoch: 1, batch: 18711, sum loss: 4062.539551, avg loss: 2.831038, ppl: 16.963058 +epoch: 1, batch: 18712, sum loss: 5228.043457, avg loss: 2.932161, ppl: 18.768150 +epoch: 1, batch: 18713, sum loss: 4637.953613, avg loss: 2.665491, ppl: 14.375001 +epoch: 1, batch: 18714, sum loss: 4400.514160, avg loss: 2.564402, ppl: 12.992890 +epoch: 1, batch: 18715, sum loss: 4911.266113, avg loss: 2.689631, ppl: 14.726241 +epoch: 1, batch: 18716, sum loss: 4556.910156, avg loss: 2.696397, ppl: 14.826210 +epoch: 1, batch: 18717, sum loss: 4270.260254, avg loss: 2.809382, ppl: 16.599655 +epoch: 1, batch: 18718, sum loss: 4852.084961, avg loss: 2.951390, ppl: 19.132524 +epoch: 1, batch: 18719, sum loss: 4779.919922, avg loss: 2.705105, ppl: 14.955884 +epoch: 1, batch: 18720, sum loss: 3347.320801, avg loss: 2.294257, ppl: 9.917064 +epoch: 1, batch: 18721, sum loss: 5256.196777, avg loss: 2.662714, ppl: 14.335135 +epoch: 1, batch: 18722, sum loss: 3738.685547, avg loss: 2.504143, ppl: 12.233070 +epoch: 1, batch: 18723, sum loss: 5246.647949, avg loss: 2.721291, ppl: 15.199926 +epoch: 1, batch: 18724, sum loss: 4176.184570, avg loss: 2.892095, ppl: 18.031038 +epoch: 1, batch: 18725, sum loss: 4333.302246, avg loss: 2.500463, ppl: 12.188133 +epoch: 1, batch: 18726, sum loss: 3755.063477, avg loss: 2.475322, ppl: 11.885533 +epoch: 1, batch: 18727, sum loss: 3995.298584, avg loss: 2.508034, ppl: 12.280766 +epoch: 1, batch: 18728, sum loss: 4937.297363, avg loss: 2.599946, ppl: 13.463012 +epoch: 1, batch: 18729, sum loss: 4227.745605, avg loss: 2.539187, ppl: 12.669363 +epoch: 1, batch: 18730, sum loss: 4179.789062, avg loss: 2.427287, ppl: 11.328111 +epoch: 1, batch: 18731, sum loss: 4471.018555, avg loss: 2.686910, ppl: 14.686231 +epoch: 1, batch: 18732, sum loss: 4958.403809, avg loss: 2.874437, ppl: 17.715448 +epoch: 1, batch: 18733, sum loss: 5109.734375, avg loss: 2.772509, ppl: 15.998726 +epoch: 1, batch: 18734, sum loss: 4648.822266, avg loss: 3.018716, ppl: 20.464991 +epoch: 1, batch: 18735, sum loss: 4200.913574, avg loss: 2.861658, ppl: 17.490496 +epoch: 1, batch: 18736, sum loss: 5073.277832, avg loss: 2.675780, ppl: 14.523667 +epoch: 1, batch: 18737, sum loss: 3853.995605, avg loss: 2.586574, ppl: 13.284186 +epoch: 1, batch: 18738, sum loss: 5324.642578, avg loss: 2.773252, ppl: 16.010609 +epoch: 1, batch: 18739, sum loss: 4249.712402, avg loss: 2.642856, ppl: 14.053281 +epoch: 1, batch: 18740, sum loss: 5129.240234, avg loss: 2.781584, ppl: 16.144569 +epoch: 1, batch: 18741, sum loss: 3491.041504, avg loss: 2.504334, ppl: 12.235407 +epoch: 1, batch: 18742, sum loss: 5161.488281, avg loss: 2.988702, ppl: 19.859888 +epoch: 1, batch: 18743, sum loss: 4958.690430, avg loss: 2.500600, ppl: 12.189810 +epoch: 1, batch: 18744, sum loss: 4377.709961, avg loss: 2.744646, ppl: 15.559102 +epoch: 1, batch: 18745, sum loss: 4586.537109, avg loss: 2.585421, ppl: 13.268875 +epoch: 1, batch: 18746, sum loss: 4594.474121, avg loss: 2.734806, ppl: 15.406755 +epoch: 1, batch: 18747, sum loss: 4270.319336, avg loss: 2.809421, ppl: 16.600302 +epoch: 1, batch: 18748, sum loss: 4344.745605, avg loss: 2.572377, ppl: 13.096925 +epoch: 1, batch: 18749, sum loss: 5205.493164, avg loss: 3.044148, ppl: 20.992138 +epoch: 1, batch: 18750, sum loss: 4608.573242, avg loss: 2.743198, ppl: 15.536598 +epoch: 1, batch: 18751, sum loss: 3808.582764, avg loss: 2.469898, ppl: 11.821240 +epoch: 1, batch: 18752, sum loss: 5074.686523, avg loss: 2.985110, ppl: 19.788675 +epoch: 1, batch: 18753, sum loss: 4321.074707, avg loss: 2.584375, ppl: 13.255001 +epoch: 1, batch: 18754, sum loss: 3820.181396, avg loss: 2.433237, ppl: 11.395706 +epoch: 1, batch: 18755, sum loss: 4053.373047, avg loss: 2.470063, ppl: 11.823189 +epoch: 1, batch: 18756, sum loss: 4668.969238, avg loss: 2.892794, ppl: 18.043652 +epoch: 1, batch: 18757, sum loss: 5856.099121, avg loss: 2.816787, ppl: 16.723024 +epoch: 1, batch: 18758, sum loss: 4622.174805, avg loss: 2.706191, ppl: 14.972142 +epoch: 1, batch: 18759, sum loss: 4929.084473, avg loss: 2.738380, ppl: 15.461924 +epoch: 1, batch: 18760, sum loss: 4571.454590, avg loss: 2.700210, ppl: 14.882851 +epoch: 1, batch: 18761, sum loss: 3694.379883, avg loss: 2.620128, ppl: 13.737477 +epoch: 1, batch: 18762, sum loss: 4244.557129, avg loss: 2.462040, ppl: 11.728716 +epoch: 1, batch: 18763, sum loss: 4696.463867, avg loss: 2.714719, ppl: 15.100364 +epoch: 1, batch: 18764, sum loss: 4256.628906, avg loss: 2.481999, ppl: 11.965160 +epoch: 1, batch: 18765, sum loss: 5159.497559, avg loss: 2.724128, ppl: 15.243109 +epoch: 1, batch: 18766, sum loss: 3790.590576, avg loss: 2.275264, ppl: 9.730490 +epoch: 1, batch: 18767, sum loss: 4068.485840, avg loss: 2.555582, ppl: 12.878787 +epoch: 1, batch: 18768, sum loss: 4558.041504, avg loss: 2.613556, ppl: 13.647494 +epoch: 1, batch: 18769, sum loss: 4894.146973, avg loss: 2.811113, ppl: 16.628410 +epoch: 1, batch: 18770, sum loss: 4480.722168, avg loss: 2.747224, ppl: 15.599266 +epoch: 1, batch: 18771, sum loss: 4514.856934, avg loss: 2.606730, ppl: 13.554658 +epoch: 1, batch: 18772, sum loss: 4042.101318, avg loss: 2.532645, ppl: 12.586754 +epoch: 1, batch: 18773, sum loss: 4753.251953, avg loss: 2.791105, ppl: 16.299021 +epoch: 1, batch: 18774, sum loss: 4668.094727, avg loss: 2.815497, ppl: 16.701481 +epoch: 1, batch: 18775, sum loss: 4494.758301, avg loss: 2.631591, ppl: 13.895865 +epoch: 1, batch: 18776, sum loss: 4261.069336, avg loss: 2.587170, ppl: 13.292104 +epoch: 1, batch: 18777, sum loss: 5048.482910, avg loss: 2.714238, ppl: 15.093107 +epoch: 1, batch: 18778, sum loss: 4114.191895, avg loss: 2.527145, ppl: 12.517715 +epoch: 1, batch: 18779, sum loss: 4789.071289, avg loss: 2.818759, ppl: 16.756042 +epoch: 1, batch: 18780, sum loss: 4478.855469, avg loss: 2.792304, ppl: 16.318575 +epoch: 1, batch: 18781, sum loss: 4881.107910, avg loss: 2.834557, ppl: 17.022863 +epoch: 1, batch: 18782, sum loss: 4399.781250, avg loss: 2.687710, ppl: 14.697980 +epoch: 1, batch: 18783, sum loss: 4625.372070, avg loss: 2.616161, ppl: 13.683091 +epoch: 1, batch: 18784, sum loss: 4050.456055, avg loss: 2.541064, ppl: 12.693170 +epoch: 1, batch: 18785, sum loss: 5538.422852, avg loss: 3.099285, ppl: 22.182093 +epoch: 1, batch: 18786, sum loss: 4877.812500, avg loss: 2.782552, ppl: 16.160202 +epoch: 1, batch: 18787, sum loss: 3707.896973, avg loss: 2.520664, ppl: 12.436854 +epoch: 1, batch: 18788, sum loss: 4002.618896, avg loss: 2.539733, ppl: 12.676282 +epoch: 1, batch: 18789, sum loss: 4620.910645, avg loss: 2.551580, ppl: 12.827352 +epoch: 1, batch: 18790, sum loss: 4195.529785, avg loss: 2.553579, ppl: 12.853018 +epoch: 1, batch: 18791, sum loss: 4550.141602, avg loss: 2.512502, ppl: 12.335761 +epoch: 1, batch: 18792, sum loss: 4256.188477, avg loss: 2.742390, ppl: 15.524038 +epoch: 1, batch: 18793, sum loss: 3877.523438, avg loss: 2.711555, ppl: 15.052665 +epoch: 1, batch: 18794, sum loss: 5092.021484, avg loss: 2.846295, ppl: 17.223846 +epoch: 1, batch: 18795, sum loss: 4468.232422, avg loss: 2.742930, ppl: 15.532424 +epoch: 1, batch: 18796, sum loss: 4345.416016, avg loss: 2.753749, ppl: 15.701388 +epoch: 1, batch: 18797, sum loss: 4482.172852, avg loss: 2.797861, ppl: 16.409504 +epoch: 1, batch: 18798, sum loss: 5063.046875, avg loss: 2.814367, ppl: 16.682617 +epoch: 1, batch: 18799, sum loss: 4331.344727, avg loss: 2.834650, ppl: 17.024437 +epoch: 1, batch: 18800, sum loss: 3676.337646, avg loss: 2.502612, ppl: 12.214360 +epoch: 1, batch: 18801, sum loss: 5494.921387, avg loss: 2.764045, ppl: 15.863879 +epoch: 1, batch: 18802, sum loss: 4811.559570, avg loss: 2.779641, ppl: 16.113241 +epoch: 1, batch: 18803, sum loss: 5058.663574, avg loss: 2.804137, ppl: 16.512823 +epoch: 1, batch: 18804, sum loss: 4067.490479, avg loss: 2.545363, ppl: 12.747857 +epoch: 1, batch: 18805, sum loss: 5932.179199, avg loss: 3.021997, ppl: 20.532248 +epoch: 1, batch: 18806, sum loss: 3873.141846, avg loss: 2.594201, ppl: 13.385886 +epoch: 1, batch: 18807, sum loss: 4843.228516, avg loss: 2.914097, ppl: 18.432154 +epoch: 1, batch: 18808, sum loss: 4142.002930, avg loss: 2.548925, ppl: 12.793343 +epoch: 1, batch: 18809, sum loss: 3849.545410, avg loss: 2.537604, ppl: 12.649328 +epoch: 1, batch: 18810, sum loss: 4142.866211, avg loss: 2.425565, ppl: 11.308622 +epoch: 1, batch: 18811, sum loss: 4409.208984, avg loss: 2.361655, ppl: 10.608489 +epoch: 1, batch: 18812, sum loss: 4581.859375, avg loss: 2.615217, ppl: 13.670178 +epoch: 1, batch: 18813, sum loss: 4017.531250, avg loss: 2.646595, ppl: 14.105927 +epoch: 1, batch: 18814, sum loss: 3810.315186, avg loss: 2.451940, ppl: 11.610853 +epoch: 1, batch: 18815, sum loss: 3971.063232, avg loss: 2.583646, ppl: 13.245337 +epoch: 1, batch: 18816, sum loss: 4985.743164, avg loss: 2.709643, ppl: 15.023910 +epoch: 1, batch: 18817, sum loss: 5314.933105, avg loss: 2.668139, ppl: 14.413124 +epoch: 1, batch: 18818, sum loss: 4409.373535, avg loss: 2.735343, ppl: 15.415037 +epoch: 1, batch: 18819, sum loss: 4308.124512, avg loss: 2.641401, ppl: 14.032847 +epoch: 1, batch: 18820, sum loss: 6493.062500, avg loss: 3.155035, ppl: 23.453863 +epoch: 1, batch: 18821, sum loss: 4063.827148, avg loss: 2.442204, ppl: 11.498355 +epoch: 1, batch: 18822, sum loss: 4931.683594, avg loss: 2.664335, ppl: 14.358395 +epoch: 1, batch: 18823, sum loss: 5094.784180, avg loss: 2.780996, ppl: 16.135080 +epoch: 1, batch: 18824, sum loss: 4806.259277, avg loss: 2.646618, ppl: 14.106256 +epoch: 1, batch: 18825, sum loss: 4908.664551, avg loss: 2.683797, ppl: 14.640580 +epoch: 1, batch: 18826, sum loss: 4018.179688, avg loss: 2.408981, ppl: 11.122617 +epoch: 1, batch: 18827, sum loss: 5100.943359, avg loss: 3.016525, ppl: 20.420204 +epoch: 1, batch: 18828, sum loss: 3753.447266, avg loss: 2.263840, ppl: 9.619964 +epoch: 1, batch: 18829, sum loss: 4476.394043, avg loss: 2.980289, ppl: 19.693502 +epoch: 1, batch: 18830, sum loss: 5064.410156, avg loss: 2.739000, ppl: 15.471499 +epoch: 1, batch: 18831, sum loss: 4485.391602, avg loss: 2.592712, ppl: 13.365970 +epoch: 1, batch: 18832, sum loss: 4768.709961, avg loss: 2.721867, ppl: 15.208684 +epoch: 1, batch: 18833, sum loss: 4985.272949, avg loss: 2.837378, ppl: 17.070944 +epoch: 1, batch: 18834, sum loss: 3623.232910, avg loss: 2.312210, ppl: 10.096715 +epoch: 1, batch: 18835, sum loss: 4486.763672, avg loss: 2.793751, ppl: 16.342197 +epoch: 1, batch: 18836, sum loss: 5103.670898, avg loss: 2.926417, ppl: 18.660643 +epoch: 1, batch: 18837, sum loss: 3741.067383, avg loss: 2.459610, ppl: 11.700253 +epoch: 1, batch: 18838, sum loss: 4279.025391, avg loss: 2.487805, ppl: 12.034835 +epoch: 1, batch: 18839, sum loss: 3684.097412, avg loss: 2.426942, ppl: 11.324196 +epoch: 1, batch: 18840, sum loss: 5310.822266, avg loss: 2.730500, ppl: 15.340555 +epoch: 1, batch: 18841, sum loss: 3940.200439, avg loss: 2.432223, ppl: 11.384156 +epoch: 1, batch: 18842, sum loss: 4858.652832, avg loss: 2.629141, ppl: 13.861858 +epoch: 1, batch: 18843, sum loss: 3955.250977, avg loss: 2.533793, ppl: 12.601214 +epoch: 1, batch: 18844, sum loss: 4698.663574, avg loss: 2.832226, ppl: 16.983229 +epoch: 1, batch: 18845, sum loss: 4551.403320, avg loss: 2.653879, ppl: 14.209055 +epoch: 1, batch: 18846, sum loss: 4861.992188, avg loss: 2.971878, ppl: 19.528561 +epoch: 1, batch: 18847, sum loss: 4063.035400, avg loss: 2.489605, ppl: 12.056515 +epoch: 1, batch: 18848, sum loss: 3853.668457, avg loss: 2.632287, ppl: 13.905536 +epoch: 1, batch: 18849, sum loss: 4918.950195, avg loss: 2.853220, ppl: 17.343531 +epoch: 1, batch: 18850, sum loss: 4657.768066, avg loss: 2.643455, ppl: 14.061707 +epoch: 1, batch: 18851, sum loss: 4649.776367, avg loss: 2.866693, ppl: 17.578789 +epoch: 1, batch: 18852, sum loss: 4480.198242, avg loss: 2.689194, ppl: 14.719813 +epoch: 1, batch: 18853, sum loss: 4993.242676, avg loss: 2.734525, ppl: 15.402429 +epoch: 1, batch: 18854, sum loss: 4725.424316, avg loss: 2.712643, ppl: 15.069053 +epoch: 1, batch: 18855, sum loss: 4902.624023, avg loss: 2.883897, ppl: 17.883823 +epoch: 1, batch: 18856, sum loss: 4136.152344, avg loss: 2.488660, ppl: 12.045120 +epoch: 1, batch: 18857, sum loss: 4461.184570, avg loss: 2.711966, ppl: 15.058856 +epoch: 1, batch: 18858, sum loss: 4127.774902, avg loss: 2.637556, ppl: 13.978995 +epoch: 1, batch: 18859, sum loss: 5475.218750, avg loss: 3.011672, ppl: 20.321339 +epoch: 1, batch: 18860, sum loss: 4063.553223, avg loss: 2.411604, ppl: 11.151836 +epoch: 1, batch: 18861, sum loss: 4431.287109, avg loss: 2.588369, ppl: 13.308043 +epoch: 1, batch: 18862, sum loss: 4149.166504, avg loss: 2.682073, ppl: 14.615357 +epoch: 1, batch: 18863, sum loss: 4624.453613, avg loss: 2.685513, ppl: 14.665723 +epoch: 1, batch: 18864, sum loss: 4791.129883, avg loss: 2.678105, ppl: 14.557479 +epoch: 1, batch: 18865, sum loss: 4932.929688, avg loss: 2.546685, ppl: 12.764725 +epoch: 1, batch: 18866, sum loss: 4524.379395, avg loss: 2.586838, ppl: 13.287686 +epoch: 1, batch: 18867, sum loss: 4406.283203, avg loss: 2.665628, ppl: 14.376975 +epoch: 1, batch: 18868, sum loss: 5133.829102, avg loss: 2.655887, ppl: 14.237608 +epoch: 1, batch: 18869, sum loss: 3878.963867, avg loss: 2.568850, ppl: 13.050811 +epoch: 1, batch: 18870, sum loss: 4219.691406, avg loss: 2.830108, ppl: 16.947298 +epoch: 1, batch: 18871, sum loss: 4030.541748, avg loss: 2.649929, ppl: 14.153034 +epoch: 1, batch: 18872, sum loss: 4968.881348, avg loss: 2.805692, ppl: 16.538525 +epoch: 1, batch: 18873, sum loss: 4920.326660, avg loss: 2.745718, ppl: 15.575797 +epoch: 1, batch: 18874, sum loss: 4305.694336, avg loss: 2.770717, ppl: 15.970082 +epoch: 1, batch: 18875, sum loss: 3744.849121, avg loss: 2.374667, ppl: 10.747431 +epoch: 1, batch: 18876, sum loss: 4143.691406, avg loss: 2.647726, ppl: 14.121890 +epoch: 1, batch: 18877, sum loss: 5033.927734, avg loss: 2.697710, ppl: 14.845700 +epoch: 1, batch: 18878, sum loss: 5316.644531, avg loss: 2.841606, ppl: 17.143274 +epoch: 1, batch: 18879, sum loss: 3919.076416, avg loss: 2.702811, ppl: 14.921624 +epoch: 1, batch: 18880, sum loss: 4590.387695, avg loss: 2.641190, ppl: 14.029886 +epoch: 1, batch: 18881, sum loss: 4905.314941, avg loss: 2.743465, ppl: 15.540736 +epoch: 1, batch: 18882, sum loss: 4386.779785, avg loss: 2.813842, ppl: 16.673857 +epoch: 1, batch: 18883, sum loss: 4038.784180, avg loss: 2.620885, ppl: 13.747890 +epoch: 1, batch: 18884, sum loss: 5559.902832, avg loss: 3.068379, ppl: 21.507010 +epoch: 1, batch: 18885, sum loss: 4085.822021, avg loss: 2.523670, ppl: 12.474296 +epoch: 1, batch: 18886, sum loss: 5973.385742, avg loss: 2.935325, ppl: 18.827616 +epoch: 1, batch: 18887, sum loss: 3844.443848, avg loss: 2.527577, ppl: 12.523122 +epoch: 1, batch: 18888, sum loss: 4736.642578, avg loss: 2.758674, ppl: 15.778901 +epoch: 1, batch: 18889, sum loss: 4384.704590, avg loss: 2.730202, ppl: 15.335984 +epoch: 1, batch: 18890, sum loss: 4242.696777, avg loss: 2.645073, ppl: 14.084473 +epoch: 1, batch: 18891, sum loss: 3515.781494, avg loss: 2.370723, ppl: 10.705124 +epoch: 1, batch: 18892, sum loss: 5462.315430, avg loss: 2.965426, ppl: 19.402975 +epoch: 1, batch: 18893, sum loss: 4448.586426, avg loss: 2.572925, ppl: 13.104093 +epoch: 1, batch: 18894, sum loss: 3334.171143, avg loss: 2.177773, ppl: 8.826632 +epoch: 1, batch: 18895, sum loss: 4184.449707, avg loss: 2.603889, ppl: 13.516204 +epoch: 1, batch: 18896, sum loss: 4604.712891, avg loss: 2.535635, ppl: 12.624442 +epoch: 1, batch: 18897, sum loss: 4094.671875, avg loss: 2.507454, ppl: 12.273638 +epoch: 1, batch: 18898, sum loss: 4579.840820, avg loss: 2.811443, ppl: 16.633905 +epoch: 1, batch: 18899, sum loss: 5370.345215, avg loss: 2.876457, ppl: 17.751268 +epoch: 1, batch: 18900, sum loss: 4587.971680, avg loss: 2.592074, ppl: 13.357451 +epoch: 1, batch: 18901, sum loss: 3878.420898, avg loss: 2.490957, ppl: 12.072830 +epoch: 1, batch: 18902, sum loss: 4262.467773, avg loss: 2.618223, ppl: 13.711343 +epoch: 1, batch: 18903, sum loss: 5055.320801, avg loss: 2.599137, ppl: 13.452122 +epoch: 1, batch: 18904, sum loss: 5028.148926, avg loss: 2.904765, ppl: 18.260958 +epoch: 1, batch: 18905, sum loss: 3851.270996, avg loss: 2.746984, ppl: 15.595521 +epoch: 1, batch: 18906, sum loss: 3584.201416, avg loss: 2.400671, ppl: 11.030573 +epoch: 1, batch: 18907, sum loss: 4433.199707, avg loss: 2.675437, ppl: 14.518696 +epoch: 1, batch: 18908, sum loss: 3742.240967, avg loss: 2.431606, ppl: 11.377134 +epoch: 1, batch: 18909, sum loss: 5054.277344, avg loss: 2.790877, ppl: 16.295298 +epoch: 1, batch: 18910, sum loss: 4689.540039, avg loss: 2.809790, ppl: 16.606436 +epoch: 1, batch: 18911, sum loss: 4297.472656, avg loss: 2.530903, ppl: 12.564842 +epoch: 1, batch: 18912, sum loss: 4748.447266, avg loss: 2.760725, ppl: 15.811306 +epoch: 1, batch: 18913, sum loss: 5462.046875, avg loss: 2.930283, ppl: 18.732924 +epoch: 1, batch: 18914, sum loss: 4659.612305, avg loss: 2.710653, ppl: 15.039094 +epoch: 1, batch: 18915, sum loss: 4158.306152, avg loss: 2.476656, ppl: 11.901404 +epoch: 1, batch: 18916, sum loss: 4588.807617, avg loss: 2.672573, ppl: 14.477169 +epoch: 1, batch: 18917, sum loss: 4847.543945, avg loss: 2.625972, ppl: 13.817996 +epoch: 1, batch: 18918, sum loss: 4573.001953, avg loss: 3.000657, ppl: 20.098743 +epoch: 1, batch: 18919, sum loss: 3864.414062, avg loss: 2.509360, ppl: 12.297055 +epoch: 1, batch: 18920, sum loss: 3762.628418, avg loss: 2.438515, ppl: 11.456016 +epoch: 1, batch: 18921, sum loss: 5144.038574, avg loss: 2.873765, ppl: 17.703539 +epoch: 1, batch: 18922, sum loss: 4747.018066, avg loss: 2.808887, ppl: 16.591434 +epoch: 1, batch: 18923, sum loss: 5064.034180, avg loss: 2.675137, ppl: 14.514336 +epoch: 1, batch: 18924, sum loss: 4227.229492, avg loss: 2.469176, ppl: 11.812710 +epoch: 1, batch: 18925, sum loss: 4639.198242, avg loss: 2.574472, ppl: 13.124385 +epoch: 1, batch: 18926, sum loss: 4996.332031, avg loss: 2.742224, ppl: 15.521466 +epoch: 1, batch: 18927, sum loss: 3672.969238, avg loss: 2.375789, ppl: 10.759496 +epoch: 1, batch: 18928, sum loss: 4020.591309, avg loss: 2.466620, ppl: 11.782559 +epoch: 1, batch: 18929, sum loss: 3192.808838, avg loss: 2.111646, ppl: 8.261830 +epoch: 1, batch: 18930, sum loss: 5386.960938, avg loss: 3.067745, ppl: 21.493389 +epoch: 1, batch: 18931, sum loss: 3045.066650, avg loss: 2.114630, ppl: 8.286517 +epoch: 1, batch: 18932, sum loss: 4739.058105, avg loss: 2.628429, ppl: 13.851997 +epoch: 1, batch: 18933, sum loss: 4126.068848, avg loss: 2.611436, ppl: 13.618591 +epoch: 1, batch: 18934, sum loss: 3240.223633, avg loss: 2.401945, ppl: 11.044636 +epoch: 1, batch: 18935, sum loss: 4487.979004, avg loss: 2.465923, ppl: 11.774340 +epoch: 1, batch: 18936, sum loss: 5301.411133, avg loss: 2.995148, ppl: 19.988312 +epoch: 1, batch: 18937, sum loss: 4512.364258, avg loss: 2.703634, ppl: 14.933896 +epoch: 1, batch: 18938, sum loss: 4552.095703, avg loss: 2.634315, ppl: 13.933762 +epoch: 1, batch: 18939, sum loss: 3529.599854, avg loss: 2.326697, ppl: 10.244053 +epoch: 1, batch: 18940, sum loss: 4611.697754, avg loss: 2.825795, ppl: 16.874357 +epoch: 1, batch: 18941, sum loss: 3327.178711, avg loss: 2.563312, ppl: 12.978726 +epoch: 1, batch: 18942, sum loss: 4018.647217, avg loss: 2.513225, ppl: 12.344682 +epoch: 1, batch: 18943, sum loss: 4605.287109, avg loss: 2.693150, ppl: 14.778158 +epoch: 1, batch: 18944, sum loss: 4372.912109, avg loss: 2.930906, ppl: 18.744612 +epoch: 1, batch: 18945, sum loss: 4609.098633, avg loss: 2.629263, ppl: 13.863554 +epoch: 1, batch: 18946, sum loss: 4867.034668, avg loss: 2.662492, ppl: 14.331954 +epoch: 1, batch: 18947, sum loss: 4602.030762, avg loss: 2.634248, ppl: 13.932825 +epoch: 1, batch: 18948, sum loss: 4549.956055, avg loss: 2.817310, ppl: 16.731787 +epoch: 1, batch: 18949, sum loss: 4958.769531, avg loss: 2.636241, ppl: 13.960629 +epoch: 1, batch: 18950, sum loss: 4894.343750, avg loss: 2.673044, ppl: 14.483994 +epoch: 1, batch: 18951, sum loss: 4169.089355, avg loss: 2.516047, ppl: 12.379558 +epoch: 1, batch: 18952, sum loss: 5188.933594, avg loss: 2.985577, ppl: 19.797930 +epoch: 1, batch: 18953, sum loss: 3554.470215, avg loss: 2.581315, ppl: 13.214499 +epoch: 1, batch: 18954, sum loss: 4676.777344, avg loss: 2.819034, ppl: 16.760651 +epoch: 1, batch: 18955, sum loss: 4690.840332, avg loss: 2.521957, ppl: 12.452945 +epoch: 1, batch: 18956, sum loss: 4888.786621, avg loss: 2.757353, ppl: 15.758077 +epoch: 1, batch: 18957, sum loss: 4677.338867, avg loss: 2.617425, ppl: 13.700402 +epoch: 1, batch: 18958, sum loss: 5314.982910, avg loss: 2.862134, ppl: 17.498829 +epoch: 1, batch: 18959, sum loss: 4454.141602, avg loss: 2.796071, ppl: 16.380167 +epoch: 1, batch: 18960, sum loss: 4539.047363, avg loss: 2.605653, ppl: 13.540062 +epoch: 1, batch: 18961, sum loss: 5469.557129, avg loss: 2.790590, ppl: 16.290632 +epoch: 1, batch: 18962, sum loss: 4337.522461, avg loss: 2.417794, ppl: 11.221078 +epoch: 1, batch: 18963, sum loss: 5339.740234, avg loss: 2.808911, ppl: 16.591845 +epoch: 1, batch: 18964, sum loss: 4678.950195, avg loss: 2.765337, ppl: 15.884392 +epoch: 1, batch: 18965, sum loss: 4313.738770, avg loss: 2.537493, ppl: 12.647929 +epoch: 1, batch: 18966, sum loss: 4559.331055, avg loss: 2.571535, ppl: 13.085895 +epoch: 1, batch: 18967, sum loss: 3916.386230, avg loss: 2.598796, ppl: 13.447542 +epoch: 1, batch: 18968, sum loss: 4972.825195, avg loss: 2.516612, ppl: 12.386560 +epoch: 1, batch: 18969, sum loss: 4446.110352, avg loss: 2.607689, ppl: 13.567665 +epoch: 1, batch: 18970, sum loss: 5618.488770, avg loss: 2.994930, ppl: 19.983961 +epoch: 1, batch: 18971, sum loss: 4720.330566, avg loss: 2.872995, ppl: 17.689915 +epoch: 1, batch: 18972, sum loss: 4231.806641, avg loss: 2.623563, ppl: 13.784749 +epoch: 1, batch: 18973, sum loss: 4678.135254, avg loss: 2.601855, ppl: 13.488733 +epoch: 1, batch: 18974, sum loss: 4870.615723, avg loss: 2.670294, ppl: 14.444212 +epoch: 1, batch: 18975, sum loss: 4764.048828, avg loss: 2.749018, ppl: 15.627281 +epoch: 1, batch: 18976, sum loss: 4141.489746, avg loss: 2.377434, ppl: 10.777210 +epoch: 1, batch: 18977, sum loss: 3577.119629, avg loss: 2.551441, ppl: 12.825568 +epoch: 1, batch: 18978, sum loss: 3616.163574, avg loss: 2.490471, ppl: 12.066957 +epoch: 1, batch: 18979, sum loss: 4398.186035, avg loss: 2.521896, ppl: 12.452179 +epoch: 1, batch: 18980, sum loss: 4477.834961, avg loss: 2.594342, ppl: 13.387778 +epoch: 1, batch: 18981, sum loss: 4229.212402, avg loss: 2.753394, ppl: 15.695807 +epoch: 1, batch: 18982, sum loss: 4163.099121, avg loss: 2.498859, ppl: 12.168601 +epoch: 1, batch: 18983, sum loss: 4230.394531, avg loss: 2.516594, ppl: 12.386336 +epoch: 1, batch: 18984, sum loss: 4940.383301, avg loss: 2.613960, ppl: 13.653004 +epoch: 1, batch: 18985, sum loss: 5288.050781, avg loss: 2.806821, ppl: 16.557201 +epoch: 1, batch: 18986, sum loss: 4658.544922, avg loss: 2.730683, ppl: 15.343361 +epoch: 1, batch: 18987, sum loss: 4339.302734, avg loss: 2.501039, ppl: 12.195159 +epoch: 1, batch: 18988, sum loss: 5262.979004, avg loss: 2.875945, ppl: 17.742180 +epoch: 1, batch: 18989, sum loss: 4414.784668, avg loss: 2.572718, ppl: 13.101391 +epoch: 1, batch: 18990, sum loss: 4256.657227, avg loss: 2.492188, ppl: 12.087695 +epoch: 1, batch: 18991, sum loss: 4133.160156, avg loss: 2.594576, ppl: 13.390913 +epoch: 1, batch: 18992, sum loss: 4549.665527, avg loss: 2.818876, ppl: 16.758001 +epoch: 1, batch: 18993, sum loss: 4524.784668, avg loss: 2.701364, ppl: 14.900045 +epoch: 1, batch: 18994, sum loss: 4373.506836, avg loss: 2.538309, ppl: 12.658252 +epoch: 1, batch: 18995, sum loss: 4843.127930, avg loss: 2.769084, ppl: 15.944022 +epoch: 1, batch: 18996, sum loss: 4596.107910, avg loss: 2.734151, ppl: 15.396665 +epoch: 1, batch: 18997, sum loss: 3722.648438, avg loss: 2.638305, ppl: 13.989470 +epoch: 1, batch: 18998, sum loss: 3910.943359, avg loss: 2.438244, ppl: 11.452913 +epoch: 1, batch: 18999, sum loss: 4476.270020, avg loss: 2.792433, ppl: 16.320677 +epoch: 1, batch: 19000, sum loss: 4624.336914, avg loss: 2.617055, ppl: 13.695337 +epoch: 1, batch: 19001, sum loss: 3771.747559, avg loss: 2.560589, ppl: 12.943439 +epoch: 1, batch: 19002, sum loss: 3873.947266, avg loss: 2.447219, ppl: 11.556160 +epoch: 1, batch: 19003, sum loss: 5519.235352, avg loss: 2.880603, ppl: 17.825020 +epoch: 1, batch: 19004, sum loss: 4305.888672, avg loss: 2.556941, ppl: 12.896308 +epoch: 1, batch: 19005, sum loss: 3732.230469, avg loss: 2.354719, ppl: 10.535172 +epoch: 1, batch: 19006, sum loss: 4211.757324, avg loss: 2.765435, ppl: 15.885948 +epoch: 1, batch: 19007, sum loss: 4792.430664, avg loss: 2.706059, ppl: 14.970161 +epoch: 1, batch: 19008, sum loss: 4783.597168, avg loss: 2.900908, ppl: 18.190655 +epoch: 1, batch: 19009, sum loss: 3989.656738, avg loss: 2.517133, ppl: 12.393021 +epoch: 1, batch: 19010, sum loss: 3977.385254, avg loss: 2.465831, ppl: 11.773262 +epoch: 1, batch: 19011, sum loss: 4467.149414, avg loss: 2.733874, ppl: 15.392396 +epoch: 1, batch: 19012, sum loss: 4078.805664, avg loss: 2.455632, ppl: 11.653802 +epoch: 1, batch: 19013, sum loss: 4023.096436, avg loss: 2.320125, ppl: 10.176943 +epoch: 1, batch: 19014, sum loss: 3915.721191, avg loss: 2.593193, ppl: 13.372399 +epoch: 1, batch: 19015, sum loss: 4571.649414, avg loss: 2.634956, ppl: 13.942704 +epoch: 1, batch: 19016, sum loss: 4249.924316, avg loss: 2.610518, ppl: 13.606103 +epoch: 1, batch: 19017, sum loss: 3840.238770, avg loss: 2.361770, ppl: 10.609718 +epoch: 1, batch: 19018, sum loss: 3755.955078, avg loss: 2.512344, ppl: 12.333811 +epoch: 1, batch: 19019, sum loss: 4019.463867, avg loss: 2.569990, ppl: 13.065690 +epoch: 1, batch: 19020, sum loss: 3969.263184, avg loss: 2.542770, ppl: 12.714838 +epoch: 1, batch: 19021, sum loss: 3269.134277, avg loss: 2.735677, ppl: 15.420183 +epoch: 1, batch: 19022, sum loss: 4773.178223, avg loss: 2.668071, ppl: 14.412135 +epoch: 1, batch: 19023, sum loss: 4193.759766, avg loss: 2.564991, ppl: 13.000535 +epoch: 1, batch: 19024, sum loss: 4923.162109, avg loss: 2.627088, ppl: 13.833423 +epoch: 1, batch: 19025, sum loss: 4112.543945, avg loss: 2.359463, ppl: 10.585265 +epoch: 1, batch: 19026, sum loss: 5204.208984, avg loss: 2.719022, ppl: 15.165487 +epoch: 1, batch: 19027, sum loss: 4885.738281, avg loss: 2.774411, ppl: 16.029186 +epoch: 1, batch: 19028, sum loss: 3751.359863, avg loss: 2.519382, ppl: 12.420918 +epoch: 1, batch: 19029, sum loss: 4614.263672, avg loss: 2.738436, ppl: 15.462775 +epoch: 1, batch: 19030, sum loss: 4498.086426, avg loss: 2.586594, ppl: 13.284446 +epoch: 1, batch: 19031, sum loss: 4662.805176, avg loss: 2.704643, ppl: 14.948986 +epoch: 1, batch: 19032, sum loss: 5367.706055, avg loss: 2.887416, ppl: 17.946869 +epoch: 1, batch: 19033, sum loss: 3932.312012, avg loss: 2.507852, ppl: 12.278528 +epoch: 1, batch: 19034, sum loss: 3856.108398, avg loss: 2.657552, ppl: 14.261338 +epoch: 1, batch: 19035, sum loss: 4797.554688, avg loss: 2.523700, ppl: 12.474673 +epoch: 1, batch: 19036, sum loss: 4442.819336, avg loss: 2.690987, ppl: 14.746225 +epoch: 1, batch: 19037, sum loss: 3860.071777, avg loss: 2.601126, ppl: 13.478912 +epoch: 1, batch: 19038, sum loss: 4322.572754, avg loss: 2.610249, ppl: 13.602441 +epoch: 1, batch: 19039, sum loss: 5385.622070, avg loss: 2.866217, ppl: 17.570431 +epoch: 1, batch: 19040, sum loss: 4738.380371, avg loss: 2.784007, ppl: 16.183744 +epoch: 1, batch: 19041, sum loss: 3705.169434, avg loss: 2.312840, ppl: 10.103074 +epoch: 1, batch: 19042, sum loss: 3943.592773, avg loss: 2.596177, ppl: 13.412363 +epoch: 1, batch: 19043, sum loss: 4378.008789, avg loss: 2.676045, ppl: 14.527519 +epoch: 1, batch: 19044, sum loss: 4441.028320, avg loss: 2.749863, ppl: 15.640484 +epoch: 1, batch: 19045, sum loss: 4514.093750, avg loss: 2.566284, ppl: 13.017365 +epoch: 1, batch: 19046, sum loss: 4150.935547, avg loss: 2.745328, ppl: 15.569719 +epoch: 1, batch: 19047, sum loss: 4263.328613, avg loss: 2.754088, ppl: 15.706717 +epoch: 1, batch: 19048, sum loss: 4328.161621, avg loss: 2.769137, ppl: 15.944874 +epoch: 1, batch: 19049, sum loss: 4357.016602, avg loss: 2.648642, ppl: 14.134835 +epoch: 1, batch: 19050, sum loss: 4384.901367, avg loss: 2.754335, ppl: 15.710588 +epoch: 1, batch: 19051, sum loss: 5899.805664, avg loss: 2.852904, ppl: 17.338060 +epoch: 1, batch: 19052, sum loss: 5087.554688, avg loss: 2.801517, ppl: 16.469608 +epoch: 1, batch: 19053, sum loss: 5128.256348, avg loss: 3.317113, ppl: 27.580608 +epoch: 1, batch: 19054, sum loss: 3697.902344, avg loss: 2.465268, ppl: 11.766637 +epoch: 1, batch: 19055, sum loss: 4093.363770, avg loss: 2.286795, ppl: 9.843343 +epoch: 1, batch: 19056, sum loss: 4328.474609, avg loss: 2.685158, ppl: 14.660518 +epoch: 1, batch: 19057, sum loss: 4363.292480, avg loss: 2.554621, ppl: 12.866419 +epoch: 1, batch: 19058, sum loss: 3978.536133, avg loss: 2.723160, ppl: 15.228361 +epoch: 1, batch: 19059, sum loss: 4269.370605, avg loss: 2.680082, ppl: 14.586290 +epoch: 1, batch: 19060, sum loss: 5138.199707, avg loss: 2.921092, ppl: 18.561537 +epoch: 1, batch: 19061, sum loss: 5450.987305, avg loss: 2.590773, ppl: 13.340084 +epoch: 1, batch: 19062, sum loss: 4898.465332, avg loss: 2.947332, ppl: 19.055046 +epoch: 1, batch: 19063, sum loss: 4870.403809, avg loss: 2.821787, ppl: 16.806852 +epoch: 1, batch: 19064, sum loss: 5000.574707, avg loss: 2.919191, ppl: 18.526300 +epoch: 1, batch: 19065, sum loss: 4440.974609, avg loss: 2.645012, ppl: 14.083610 +epoch: 1, batch: 19066, sum loss: 4528.543945, avg loss: 2.578897, ppl: 13.182596 +epoch: 1, batch: 19067, sum loss: 4808.048828, avg loss: 2.910441, ppl: 18.364902 +epoch: 1, batch: 19068, sum loss: 4040.360352, avg loss: 2.644215, ppl: 14.072392 +epoch: 1, batch: 19069, sum loss: 3409.393555, avg loss: 2.810712, ppl: 16.621746 +epoch: 1, batch: 19070, sum loss: 4435.831055, avg loss: 2.694916, ppl: 14.804272 +epoch: 1, batch: 19071, sum loss: 4000.643066, avg loss: 2.492612, ppl: 12.092826 +epoch: 1, batch: 19072, sum loss: 3839.308350, avg loss: 2.601157, ppl: 13.479330 +epoch: 1, batch: 19073, sum loss: 5099.234375, avg loss: 2.992508, ppl: 19.935627 +epoch: 1, batch: 19074, sum loss: 4759.633789, avg loss: 2.823033, ppl: 16.827814 +epoch: 1, batch: 19075, sum loss: 4003.297852, avg loss: 2.546627, ppl: 12.763979 +epoch: 1, batch: 19076, sum loss: 3960.134033, avg loss: 2.608784, ppl: 13.582524 +epoch: 1, batch: 19077, sum loss: 5141.900391, avg loss: 2.754098, ppl: 15.706862 +epoch: 1, batch: 19078, sum loss: 3356.784668, avg loss: 2.140806, ppl: 8.506294 +epoch: 1, batch: 19079, sum loss: 4043.649902, avg loss: 2.690386, ppl: 14.737361 +epoch: 1, batch: 19080, sum loss: 4200.446289, avg loss: 2.618732, ppl: 13.718321 +epoch: 1, batch: 19081, sum loss: 5082.085938, avg loss: 2.674782, ppl: 14.509191 +epoch: 1, batch: 19082, sum loss: 4629.988770, avg loss: 2.586586, ppl: 13.284338 +epoch: 1, batch: 19083, sum loss: 4544.348633, avg loss: 2.476484, ppl: 11.899356 +epoch: 1, batch: 19084, sum loss: 4662.708008, avg loss: 2.690541, ppl: 14.739652 +epoch: 1, batch: 19085, sum loss: 3905.440186, avg loss: 2.811692, ppl: 16.638046 +epoch: 1, batch: 19086, sum loss: 3697.631592, avg loss: 2.439071, ppl: 11.462389 +epoch: 1, batch: 19087, sum loss: 4185.995117, avg loss: 2.538505, ppl: 12.660733 +epoch: 1, batch: 19088, sum loss: 4601.614258, avg loss: 2.513170, ppl: 12.344002 +epoch: 1, batch: 19089, sum loss: 4296.046387, avg loss: 2.815234, ppl: 16.697077 +epoch: 1, batch: 19090, sum loss: 4631.691895, avg loss: 2.752045, ppl: 15.674652 +epoch: 1, batch: 19091, sum loss: 4564.536133, avg loss: 2.704109, ppl: 14.941001 +epoch: 1, batch: 19092, sum loss: 4700.417969, avg loss: 2.723301, ppl: 15.230518 +epoch: 1, batch: 19093, sum loss: 3956.833984, avg loss: 2.630874, ppl: 13.885897 +epoch: 1, batch: 19094, sum loss: 4659.159668, avg loss: 2.770012, ppl: 15.958820 +epoch: 1, batch: 19095, sum loss: 5756.961914, avg loss: 3.073658, ppl: 21.620852 +epoch: 1, batch: 19096, sum loss: 4363.520020, avg loss: 2.542844, ppl: 12.715781 +epoch: 1, batch: 19097, sum loss: 4583.507812, avg loss: 2.664830, ppl: 14.365510 +epoch: 1, batch: 19098, sum loss: 4845.820312, avg loss: 2.917411, ppl: 18.493353 +epoch: 1, batch: 19099, sum loss: 4226.260742, avg loss: 2.610414, ppl: 13.604686 +epoch: 1, batch: 19100, sum loss: 4834.302246, avg loss: 2.809008, ppl: 16.593443 +epoch: 1, batch: 19101, sum loss: 4251.572266, avg loss: 2.573591, ppl: 13.112828 +epoch: 1, batch: 19102, sum loss: 5217.829590, avg loss: 2.961311, ppl: 19.323286 +epoch: 1, batch: 19103, sum loss: 5253.875977, avg loss: 2.705395, ppl: 14.960217 +epoch: 1, batch: 19104, sum loss: 4938.341309, avg loss: 2.944747, ppl: 19.005857 +epoch: 1, batch: 19105, sum loss: 4159.562012, avg loss: 2.756502, ppl: 15.744678 +epoch: 1, batch: 19106, sum loss: 3722.017822, avg loss: 2.602810, ppl: 13.501620 +epoch: 1, batch: 19107, sum loss: 4320.744629, avg loss: 2.538628, ppl: 12.662285 +epoch: 1, batch: 19108, sum loss: 4992.550781, avg loss: 2.828641, ppl: 16.922443 +epoch: 1, batch: 19109, sum loss: 4738.987793, avg loss: 2.782729, ppl: 16.163073 +epoch: 1, batch: 19110, sum loss: 4937.080078, avg loss: 2.616364, ppl: 13.685878 +epoch: 1, batch: 19111, sum loss: 3530.415527, avg loss: 2.356753, ppl: 10.556615 +epoch: 1, batch: 19112, sum loss: 3832.422363, avg loss: 2.411845, ppl: 11.154528 +epoch: 1, batch: 19113, sum loss: 4015.660645, avg loss: 2.551245, ppl: 12.823065 +epoch: 1, batch: 19114, sum loss: 3824.189453, avg loss: 2.765141, ppl: 15.881271 +epoch: 1, batch: 19115, sum loss: 4816.776367, avg loss: 2.684937, ppl: 14.657274 +epoch: 1, batch: 19116, sum loss: 5101.387207, avg loss: 2.870786, ppl: 17.650890 +epoch: 1, batch: 19117, sum loss: 4455.845215, avg loss: 2.639719, ppl: 14.009263 +epoch: 1, batch: 19118, sum loss: 3818.789062, avg loss: 2.528999, ppl: 12.540951 +epoch: 1, batch: 19119, sum loss: 5311.131836, avg loss: 2.867782, ppl: 17.597937 +epoch: 1, batch: 19120, sum loss: 4783.354004, avg loss: 2.734908, ppl: 15.408323 +epoch: 1, batch: 19121, sum loss: 4236.528809, avg loss: 2.441803, ppl: 11.493751 +epoch: 1, batch: 19122, sum loss: 4711.946777, avg loss: 2.633844, ppl: 13.927202 +epoch: 1, batch: 19123, sum loss: 4265.770996, avg loss: 2.666107, ppl: 14.383860 +epoch: 1, batch: 19124, sum loss: 3993.933105, avg loss: 2.636260, ppl: 13.960886 +epoch: 1, batch: 19125, sum loss: 4827.491211, avg loss: 2.704477, ppl: 14.946499 +epoch: 1, batch: 19126, sum loss: 4182.189453, avg loss: 2.442868, ppl: 11.505987 +epoch: 1, batch: 19127, sum loss: 4434.794922, avg loss: 2.624139, ppl: 13.792694 +epoch: 1, batch: 19128, sum loss: 4228.578125, avg loss: 2.539687, ppl: 12.675699 +epoch: 1, batch: 19129, sum loss: 4767.566406, avg loss: 2.653070, ppl: 14.197558 +epoch: 1, batch: 19130, sum loss: 4499.979980, avg loss: 2.753966, ppl: 15.704791 +epoch: 1, batch: 19131, sum loss: 5331.860840, avg loss: 2.761192, ppl: 15.818682 +epoch: 1, batch: 19132, sum loss: 4209.449219, avg loss: 2.537341, ppl: 12.646005 +epoch: 1, batch: 19133, sum loss: 4387.092285, avg loss: 2.689817, ppl: 14.728987 +epoch: 1, batch: 19134, sum loss: 3686.864746, avg loss: 2.394068, ppl: 10.957980 +epoch: 1, batch: 19135, sum loss: 5140.172363, avg loss: 2.750226, ppl: 15.646164 +epoch: 1, batch: 19136, sum loss: 4827.205566, avg loss: 2.775851, ppl: 16.052290 +epoch: 1, batch: 19137, sum loss: 4442.676758, avg loss: 2.604148, ppl: 13.519704 +epoch: 1, batch: 19138, sum loss: 4221.999512, avg loss: 2.557238, ppl: 12.900136 +epoch: 1, batch: 19139, sum loss: 3733.836670, avg loss: 2.453244, ppl: 11.625998 +epoch: 1, batch: 19140, sum loss: 3669.306885, avg loss: 2.480938, ppl: 11.952467 +epoch: 1, batch: 19141, sum loss: 3724.836426, avg loss: 2.470051, ppl: 11.823045 +epoch: 1, batch: 19142, sum loss: 5108.811523, avg loss: 2.714565, ppl: 15.098042 +epoch: 1, batch: 19143, sum loss: 4202.030762, avg loss: 2.690161, ppl: 14.734040 +epoch: 1, batch: 19144, sum loss: 4067.740479, avg loss: 2.440156, ppl: 11.474833 +epoch: 1, batch: 19145, sum loss: 5187.047852, avg loss: 2.909169, ppl: 18.341549 +epoch: 1, batch: 19146, sum loss: 4502.223633, avg loss: 2.812132, ppl: 16.645374 +epoch: 1, batch: 19147, sum loss: 4969.251953, avg loss: 2.961414, ppl: 19.325272 +epoch: 1, batch: 19148, sum loss: 4157.987305, avg loss: 2.571421, ppl: 13.084403 +epoch: 1, batch: 19149, sum loss: 3956.027832, avg loss: 2.502231, ppl: 12.209708 +epoch: 1, batch: 19150, sum loss: 4543.194336, avg loss: 2.773623, ppl: 16.016554 +epoch: 1, batch: 19151, sum loss: 3306.564209, avg loss: 2.173941, ppl: 8.792870 +epoch: 1, batch: 19152, sum loss: 5018.916992, avg loss: 2.788287, ppl: 16.253160 +epoch: 1, batch: 19153, sum loss: 5018.131348, avg loss: 2.817592, ppl: 16.736504 +epoch: 1, batch: 19154, sum loss: 5818.934570, avg loss: 3.007201, ppl: 20.230700 +epoch: 1, batch: 19155, sum loss: 4329.174316, avg loss: 2.515499, ppl: 12.372786 +epoch: 1, batch: 19156, sum loss: 4002.458984, avg loss: 2.469130, ppl: 11.812160 +epoch: 1, batch: 19157, sum loss: 5091.608887, avg loss: 2.852442, ppl: 17.330051 +epoch: 1, batch: 19158, sum loss: 3800.778320, avg loss: 2.540627, ppl: 12.687627 +epoch: 1, batch: 19159, sum loss: 4205.087402, avg loss: 2.498566, ppl: 12.165042 +epoch: 1, batch: 19160, sum loss: 3697.782471, avg loss: 2.543179, ppl: 12.720047 +epoch: 1, batch: 19161, sum loss: 4546.929688, avg loss: 2.888773, ppl: 17.971252 +epoch: 1, batch: 19162, sum loss: 4524.742676, avg loss: 2.598933, ppl: 13.449383 +epoch: 1, batch: 19163, sum loss: 4713.967285, avg loss: 2.844881, ppl: 17.199505 +epoch: 1, batch: 19164, sum loss: 4303.244141, avg loss: 2.704742, ppl: 14.950455 +epoch: 1, batch: 19165, sum loss: 5415.043457, avg loss: 2.752945, ppl: 15.688770 +epoch: 1, batch: 19166, sum loss: 4130.363770, avg loss: 2.489671, ppl: 12.057305 +epoch: 1, batch: 19167, sum loss: 4761.162109, avg loss: 2.711368, ppl: 15.049850 +epoch: 1, batch: 19168, sum loss: 3970.690674, avg loss: 2.478583, ppl: 11.924359 +epoch: 1, batch: 19169, sum loss: 4984.816406, avg loss: 2.923646, ppl: 18.609016 +epoch: 1, batch: 19170, sum loss: 4321.662109, avg loss: 2.503860, ppl: 12.229607 +epoch: 1, batch: 19171, sum loss: 4119.488770, avg loss: 2.839069, ppl: 17.099833 +epoch: 1, batch: 19172, sum loss: 4969.957520, avg loss: 2.909811, ppl: 18.353331 +epoch: 1, batch: 19173, sum loss: 4697.601562, avg loss: 2.628764, ppl: 13.856635 +epoch: 1, batch: 19174, sum loss: 4299.133301, avg loss: 2.368669, ppl: 10.683159 +epoch: 1, batch: 19175, sum loss: 4284.041016, avg loss: 2.533437, ppl: 12.596721 +epoch: 1, batch: 19176, sum loss: 4235.718750, avg loss: 2.556257, ppl: 12.887495 +epoch: 1, batch: 19177, sum loss: 4994.544434, avg loss: 2.751815, ppl: 15.671054 +epoch: 1, batch: 19178, sum loss: 5154.811035, avg loss: 2.938889, ppl: 18.894838 +epoch: 1, batch: 19179, sum loss: 4891.009766, avg loss: 2.649518, ppl: 14.147215 +epoch: 1, batch: 19180, sum loss: 5003.100098, avg loss: 2.890295, ppl: 17.998610 +epoch: 1, batch: 19181, sum loss: 3503.075684, avg loss: 2.553262, ppl: 12.848951 +epoch: 1, batch: 19182, sum loss: 4879.504883, avg loss: 2.918364, ppl: 18.510984 +epoch: 1, batch: 19183, sum loss: 4620.765625, avg loss: 2.721299, ppl: 15.200057 +epoch: 1, batch: 19184, sum loss: 5059.693359, avg loss: 2.829806, ppl: 16.942175 +epoch: 1, batch: 19185, sum loss: 4982.757324, avg loss: 2.690474, ppl: 14.738654 +epoch: 1, batch: 19186, sum loss: 3699.206055, avg loss: 2.528507, ppl: 12.534781 +epoch: 1, batch: 19187, sum loss: 4390.058594, avg loss: 2.683410, ppl: 14.634912 +epoch: 1, batch: 19188, sum loss: 4868.392090, avg loss: 2.860395, ppl: 17.468420 +epoch: 1, batch: 19189, sum loss: 5143.491699, avg loss: 2.886359, ppl: 17.927919 +epoch: 1, batch: 19190, sum loss: 4069.585938, avg loss: 2.608709, ppl: 13.581507 +epoch: 1, batch: 19191, sum loss: 4679.354492, avg loss: 2.621487, ppl: 13.756166 +epoch: 1, batch: 19192, sum loss: 3851.769531, avg loss: 2.453357, ppl: 11.627309 +epoch: 1, batch: 19193, sum loss: 4559.389160, avg loss: 2.669431, ppl: 14.431762 +epoch: 1, batch: 19194, sum loss: 4379.180176, avg loss: 2.743847, ppl: 15.546680 +epoch: 1, batch: 19195, sum loss: 5485.468262, avg loss: 2.938119, ppl: 18.880299 +epoch: 1, batch: 19196, sum loss: 5509.251465, avg loss: 2.768468, ppl: 15.934206 +epoch: 1, batch: 19197, sum loss: 4717.412109, avg loss: 2.660695, ppl: 14.306230 +epoch: 1, batch: 19198, sum loss: 4937.453125, avg loss: 2.670337, ppl: 14.444836 +epoch: 1, batch: 19199, sum loss: 4967.581055, avg loss: 2.772088, ppl: 15.991987 +epoch: 1, batch: 19200, sum loss: 4289.041992, avg loss: 2.572911, ppl: 13.103909 +epoch: 1, batch: 19201, sum loss: 5019.047852, avg loss: 2.902862, ppl: 18.226231 +epoch: 1, batch: 19202, sum loss: 3748.860840, avg loss: 2.592573, ppl: 13.364116 +epoch: 1, batch: 19203, sum loss: 4530.924805, avg loss: 2.612990, ppl: 13.639771 +epoch: 1, batch: 19204, sum loss: 5233.054688, avg loss: 2.660424, ppl: 14.302356 +epoch: 1, batch: 19205, sum loss: 4329.073730, avg loss: 2.659136, ppl: 14.283943 +epoch: 1, batch: 19206, sum loss: 3552.680908, avg loss: 2.373200, ppl: 10.731681 +epoch: 1, batch: 19207, sum loss: 5304.723633, avg loss: 2.995327, ppl: 19.991892 +epoch: 1, batch: 19208, sum loss: 3930.360596, avg loss: 2.621989, ppl: 13.763064 +epoch: 1, batch: 19209, sum loss: 4635.444336, avg loss: 2.713960, ppl: 15.088912 +epoch: 1, batch: 19210, sum loss: 3740.382568, avg loss: 2.431978, ppl: 11.381374 +epoch: 1, batch: 19211, sum loss: 5388.511719, avg loss: 2.851064, ppl: 17.306194 +epoch: 1, batch: 19212, sum loss: 5098.213867, avg loss: 2.804298, ppl: 16.515480 +epoch: 1, batch: 19213, sum loss: 4755.614746, avg loss: 2.996607, ppl: 20.017498 +epoch: 1, batch: 19214, sum loss: 3558.998535, avg loss: 2.406355, ppl: 11.093448 +epoch: 1, batch: 19215, sum loss: 4537.471191, avg loss: 2.639599, ppl: 14.007590 +epoch: 1, batch: 19216, sum loss: 4866.828125, avg loss: 2.569603, ppl: 13.060641 +epoch: 1, batch: 19217, sum loss: 4216.410156, avg loss: 2.529340, ppl: 12.545224 +epoch: 1, batch: 19218, sum loss: 5171.614258, avg loss: 2.789436, ppl: 16.271837 +epoch: 1, batch: 19219, sum loss: 4514.079102, avg loss: 2.831919, ppl: 16.978014 +epoch: 1, batch: 19220, sum loss: 4472.961426, avg loss: 2.575107, ppl: 13.132727 +epoch: 1, batch: 19221, sum loss: 4058.847656, avg loss: 2.623690, ppl: 13.786497 +epoch: 1, batch: 19222, sum loss: 6029.920898, avg loss: 3.093854, ppl: 22.061935 +epoch: 1, batch: 19223, sum loss: 4235.770020, avg loss: 2.680867, ppl: 14.597747 +epoch: 1, batch: 19224, sum loss: 4588.706055, avg loss: 2.710399, ppl: 15.035279 +epoch: 1, batch: 19225, sum loss: 4310.467773, avg loss: 2.634760, ppl: 13.939972 +epoch: 1, batch: 19226, sum loss: 4169.803711, avg loss: 2.609389, ppl: 13.590745 +epoch: 1, batch: 19227, sum loss: 4939.520508, avg loss: 2.860174, ppl: 17.464569 +epoch: 1, batch: 19228, sum loss: 3584.040283, avg loss: 2.518651, ppl: 12.411842 +epoch: 1, batch: 19229, sum loss: 4403.231445, avg loss: 2.876050, ppl: 17.744038 +epoch: 1, batch: 19230, sum loss: 3895.525879, avg loss: 2.428632, ppl: 11.343357 +epoch: 1, batch: 19231, sum loss: 5763.778809, avg loss: 2.735538, ppl: 15.418036 +epoch: 1, batch: 19232, sum loss: 3644.963379, avg loss: 2.371479, ppl: 10.713228 +epoch: 1, batch: 19233, sum loss: 4134.513184, avg loss: 2.623422, ppl: 13.782806 +epoch: 1, batch: 19234, sum loss: 4877.250000, avg loss: 2.676866, ppl: 14.539456 +epoch: 1, batch: 19235, sum loss: 4142.876953, avg loss: 2.533870, ppl: 12.602180 +epoch: 1, batch: 19236, sum loss: 4683.706543, avg loss: 2.596290, ppl: 13.413876 +epoch: 1, batch: 19237, sum loss: 4576.914551, avg loss: 2.745600, ppl: 15.573948 +epoch: 1, batch: 19238, sum loss: 4643.382324, avg loss: 2.824442, ppl: 16.851534 +epoch: 1, batch: 19239, sum loss: 4639.602051, avg loss: 2.607983, ppl: 13.571651 +epoch: 1, batch: 19240, sum loss: 4224.434570, avg loss: 2.612514, ppl: 13.633279 +epoch: 1, batch: 19241, sum loss: 4389.757324, avg loss: 2.652421, ppl: 14.188354 +epoch: 1, batch: 19242, sum loss: 6375.700195, avg loss: 3.053496, ppl: 21.189301 +epoch: 1, batch: 19243, sum loss: 4165.551758, avg loss: 2.587299, ppl: 13.293818 +epoch: 1, batch: 19244, sum loss: 5392.701660, avg loss: 2.946832, ppl: 19.045517 +epoch: 1, batch: 19245, sum loss: 3911.158691, avg loss: 2.478554, ppl: 11.924013 +epoch: 1, batch: 19246, sum loss: 4018.911377, avg loss: 2.584509, ppl: 13.256781 +epoch: 1, batch: 19247, sum loss: 4268.898438, avg loss: 2.563903, ppl: 12.986405 +epoch: 1, batch: 19248, sum loss: 4080.123047, avg loss: 2.513939, ppl: 12.353494 +epoch: 1, batch: 19249, sum loss: 4594.804199, avg loss: 2.937854, ppl: 18.875301 +epoch: 1, batch: 19250, sum loss: 4493.791504, avg loss: 2.782533, ppl: 16.159908 +epoch: 1, batch: 19251, sum loss: 4579.478516, avg loss: 2.814676, ppl: 16.687773 +epoch: 1, batch: 19252, sum loss: 4702.651367, avg loss: 2.746876, ppl: 15.593837 +epoch: 1, batch: 19253, sum loss: 4536.779297, avg loss: 2.465641, ppl: 11.771023 +epoch: 1, batch: 19254, sum loss: 5296.357910, avg loss: 2.829251, ppl: 16.932774 +epoch: 1, batch: 19255, sum loss: 4401.257812, avg loss: 2.754229, ppl: 15.708926 +epoch: 1, batch: 19256, sum loss: 4930.727539, avg loss: 2.830498, ppl: 16.953901 +epoch: 1, batch: 19257, sum loss: 4176.359863, avg loss: 2.446608, ppl: 11.549107 +epoch: 1, batch: 19258, sum loss: 4774.519531, avg loss: 2.743977, ppl: 15.548697 +epoch: 1, batch: 19259, sum loss: 5325.954590, avg loss: 2.797245, ppl: 16.399408 +epoch: 1, batch: 19260, sum loss: 3289.554688, avg loss: 2.260862, ppl: 9.591356 +epoch: 1, batch: 19261, sum loss: 4314.633789, avg loss: 2.410410, ppl: 11.138526 +epoch: 1, batch: 19262, sum loss: 4385.018066, avg loss: 2.843721, ppl: 17.179575 +epoch: 1, batch: 19263, sum loss: 3441.023193, avg loss: 2.509864, ppl: 12.303255 +epoch: 1, batch: 19264, sum loss: 4320.098633, avg loss: 2.735971, ppl: 15.424717 +epoch: 1, batch: 19265, sum loss: 4536.384766, avg loss: 2.754332, ppl: 15.710540 +epoch: 1, batch: 19266, sum loss: 4371.482422, avg loss: 2.583618, ppl: 13.244977 +epoch: 1, batch: 19267, sum loss: 4128.091309, avg loss: 2.531018, ppl: 12.566298 +epoch: 1, batch: 19268, sum loss: 4578.599121, avg loss: 2.578040, ppl: 13.171299 +epoch: 1, batch: 19269, sum loss: 4212.302734, avg loss: 2.563787, ppl: 12.984901 +epoch: 1, batch: 19270, sum loss: 3969.845947, avg loss: 2.459632, ppl: 11.700504 +epoch: 1, batch: 19271, sum loss: 4114.887695, avg loss: 2.741431, ppl: 15.509163 +epoch: 1, batch: 19272, sum loss: 4826.253418, avg loss: 2.877909, ppl: 17.777061 +epoch: 1, batch: 19273, sum loss: 4271.093262, avg loss: 2.711805, ppl: 15.056430 +epoch: 1, batch: 19274, sum loss: 4141.930664, avg loss: 2.544183, ppl: 12.732827 +epoch: 1, batch: 19275, sum loss: 4805.338379, avg loss: 2.891299, ppl: 18.016693 +epoch: 1, batch: 19276, sum loss: 4911.177734, avg loss: 2.732987, ppl: 15.378757 +epoch: 1, batch: 19277, sum loss: 3987.270020, avg loss: 2.425347, ppl: 11.306148 +epoch: 1, batch: 19278, sum loss: 5334.753418, avg loss: 2.883651, ppl: 17.879423 +epoch: 1, batch: 19279, sum loss: 3850.855957, avg loss: 2.387388, ppl: 10.885020 +epoch: 1, batch: 19280, sum loss: 5142.127930, avg loss: 2.707808, ppl: 14.996371 +epoch: 1, batch: 19281, sum loss: 5753.598145, avg loss: 2.958148, ppl: 19.262270 +epoch: 1, batch: 19282, sum loss: 4334.763184, avg loss: 2.484105, ppl: 11.990382 +epoch: 1, batch: 19283, sum loss: 4668.009277, avg loss: 2.747504, ppl: 15.603641 +epoch: 1, batch: 19284, sum loss: 3744.369629, avg loss: 2.380400, ppl: 10.809228 +epoch: 1, batch: 19285, sum loss: 3813.937988, avg loss: 2.423086, ppl: 11.280622 +epoch: 1, batch: 19286, sum loss: 4719.920410, avg loss: 2.574970, ppl: 13.130926 +epoch: 1, batch: 19287, sum loss: 5711.973145, avg loss: 2.733001, ppl: 15.378978 +epoch: 1, batch: 19288, sum loss: 3923.072754, avg loss: 2.470449, ppl: 11.827756 +epoch: 1, batch: 19289, sum loss: 3948.536377, avg loss: 2.447946, ppl: 11.564564 +epoch: 1, batch: 19290, sum loss: 4124.344727, avg loss: 2.753234, ppl: 15.693300 +epoch: 1, batch: 19291, sum loss: 4264.591797, avg loss: 2.581472, ppl: 13.216580 +epoch: 1, batch: 19292, sum loss: 4961.776855, avg loss: 2.764221, ppl: 15.866678 +epoch: 1, batch: 19293, sum loss: 3653.633789, avg loss: 2.286379, ppl: 9.839246 +epoch: 1, batch: 19294, sum loss: 3675.292725, avg loss: 2.529451, ppl: 12.546620 +epoch: 1, batch: 19295, sum loss: 5209.697266, avg loss: 2.938352, ppl: 18.884691 +epoch: 1, batch: 19296, sum loss: 4098.500488, avg loss: 2.635692, ppl: 13.952963 +epoch: 1, batch: 19297, sum loss: 3740.582520, avg loss: 2.563799, ppl: 12.985053 +epoch: 1, batch: 19298, sum loss: 4547.815430, avg loss: 2.719985, ppl: 15.180102 +epoch: 1, batch: 19299, sum loss: 3435.937744, avg loss: 2.587303, ppl: 13.293865 +epoch: 1, batch: 19300, sum loss: 4966.868164, avg loss: 2.906301, ppl: 18.289022 +epoch: 1, batch: 19301, sum loss: 4079.669189, avg loss: 2.699980, ppl: 14.879427 +epoch: 1, batch: 19302, sum loss: 4554.869141, avg loss: 2.850356, ppl: 17.293940 +epoch: 1, batch: 19303, sum loss: 4710.014648, avg loss: 2.700696, ppl: 14.890099 +epoch: 1, batch: 19304, sum loss: 4704.783691, avg loss: 2.787194, ppl: 16.235403 +epoch: 1, batch: 19305, sum loss: 4421.710938, avg loss: 2.938014, ppl: 18.878317 +epoch: 1, batch: 19306, sum loss: 4797.800781, avg loss: 2.681834, ppl: 14.611863 +epoch: 1, batch: 19307, sum loss: 4991.899414, avg loss: 2.791890, ppl: 16.311823 +epoch: 1, batch: 19308, sum loss: 5723.743164, avg loss: 2.834940, ppl: 17.029373 +epoch: 1, batch: 19309, sum loss: 4090.548096, avg loss: 2.603786, ppl: 13.514812 +epoch: 1, batch: 19310, sum loss: 4325.482422, avg loss: 2.796046, ppl: 16.379745 +epoch: 1, batch: 19311, sum loss: 5579.896484, avg loss: 2.977533, ppl: 19.639299 +epoch: 1, batch: 19312, sum loss: 5112.456055, avg loss: 2.826123, ppl: 16.879890 +epoch: 1, batch: 19313, sum loss: 5591.265137, avg loss: 2.816758, ppl: 16.722555 +epoch: 1, batch: 19314, sum loss: 4394.494141, avg loss: 2.588041, ppl: 13.303688 +epoch: 1, batch: 19315, sum loss: 4911.365234, avg loss: 2.673579, ppl: 14.491749 +epoch: 1, batch: 19316, sum loss: 5545.500488, avg loss: 2.838025, ppl: 17.081993 +epoch: 1, batch: 19317, sum loss: 5458.856445, avg loss: 2.766780, ppl: 15.907329 +epoch: 1, batch: 19318, sum loss: 3834.316650, avg loss: 2.549412, ppl: 12.799582 +epoch: 1, batch: 19319, sum loss: 3684.476318, avg loss: 2.496258, ppl: 12.136990 +epoch: 1, batch: 19320, sum loss: 3956.844727, avg loss: 2.308544, ppl: 10.059769 +epoch: 1, batch: 19321, sum loss: 3629.118896, avg loss: 2.345907, ppl: 10.442744 +epoch: 1, batch: 19322, sum loss: 3502.469727, avg loss: 2.523393, ppl: 12.470840 +epoch: 1, batch: 19323, sum loss: 5478.244141, avg loss: 2.676231, ppl: 14.530221 +epoch: 1, batch: 19324, sum loss: 4402.895508, avg loss: 2.655546, ppl: 14.232758 +epoch: 1, batch: 19325, sum loss: 4433.620605, avg loss: 2.779700, ppl: 16.114180 +epoch: 1, batch: 19326, sum loss: 5192.134766, avg loss: 2.966934, ppl: 19.432253 +epoch: 1, batch: 19327, sum loss: 3526.176514, avg loss: 2.410237, ppl: 11.136601 +epoch: 1, batch: 19328, sum loss: 5361.633789, avg loss: 3.063791, ppl: 21.408560 +epoch: 1, batch: 19329, sum loss: 4214.344238, avg loss: 2.390439, ppl: 10.918286 +epoch: 1, batch: 19330, sum loss: 4172.352539, avg loss: 2.564445, ppl: 12.993448 +epoch: 1, batch: 19331, sum loss: 4753.967773, avg loss: 2.617824, ppl: 13.705862 +epoch: 1, batch: 19332, sum loss: 4510.093750, avg loss: 2.679794, ppl: 14.582093 +epoch: 1, batch: 19333, sum loss: 5191.693848, avg loss: 2.800266, ppl: 16.449026 +epoch: 1, batch: 19334, sum loss: 4318.555176, avg loss: 2.726361, ppl: 15.277185 +epoch: 1, batch: 19335, sum loss: 5438.392578, avg loss: 2.885089, ppl: 17.905159 +epoch: 1, batch: 19336, sum loss: 4045.915039, avg loss: 2.555853, ppl: 12.882279 +epoch: 1, batch: 19337, sum loss: 4337.441406, avg loss: 2.619228, ppl: 13.725122 +epoch: 1, batch: 19338, sum loss: 3928.463623, avg loss: 2.343952, ppl: 10.422346 +epoch: 1, batch: 19339, sum loss: 4683.632812, avg loss: 2.871633, ppl: 17.665836 +epoch: 1, batch: 19340, sum loss: 4802.167969, avg loss: 2.503737, ppl: 12.228107 +epoch: 1, batch: 19341, sum loss: 4286.835938, avg loss: 2.807358, ppl: 16.566097 +epoch: 1, batch: 19342, sum loss: 5324.735840, avg loss: 2.895452, ppl: 18.091677 +epoch: 1, batch: 19343, sum loss: 4130.162598, avg loss: 2.557376, ppl: 12.901920 +epoch: 1, batch: 19344, sum loss: 4364.283203, avg loss: 2.630671, ppl: 13.883082 +epoch: 1, batch: 19345, sum loss: 4376.020996, avg loss: 2.615673, ppl: 13.676418 +epoch: 1, batch: 19346, sum loss: 5013.301270, avg loss: 2.892845, ppl: 18.044580 +epoch: 1, batch: 19347, sum loss: 5335.777344, avg loss: 2.734894, ppl: 15.408107 +epoch: 1, batch: 19348, sum loss: 4488.713867, avg loss: 2.683033, ppl: 14.629396 +epoch: 1, batch: 19349, sum loss: 4702.578125, avg loss: 2.886788, ppl: 17.935614 +epoch: 1, batch: 19350, sum loss: 4712.312500, avg loss: 2.708225, ppl: 15.002629 +epoch: 1, batch: 19351, sum loss: 4023.734375, avg loss: 2.671803, ppl: 14.466021 +epoch: 1, batch: 19352, sum loss: 3820.483154, avg loss: 2.520108, ppl: 12.429935 +epoch: 1, batch: 19353, sum loss: 4319.860352, avg loss: 2.413330, ppl: 11.171097 +epoch: 1, batch: 19354, sum loss: 4606.926758, avg loss: 2.790386, ppl: 16.287300 +epoch: 1, batch: 19355, sum loss: 4237.014160, avg loss: 2.514549, ppl: 12.361027 +epoch: 1, batch: 19356, sum loss: 4943.545410, avg loss: 2.783528, ppl: 16.175987 +epoch: 1, batch: 19357, sum loss: 5201.010254, avg loss: 3.002893, ppl: 20.143723 +epoch: 1, batch: 19358, sum loss: 4320.631348, avg loss: 2.731120, ppl: 15.350064 +epoch: 1, batch: 19359, sum loss: 4771.507812, avg loss: 2.813389, ppl: 16.666306 +epoch: 1, batch: 19360, sum loss: 4280.481445, avg loss: 2.878602, ppl: 17.789387 +epoch: 1, batch: 19361, sum loss: 4010.326172, avg loss: 2.534972, ppl: 12.616080 +epoch: 1, batch: 19362, sum loss: 4930.271973, avg loss: 2.766707, ppl: 15.906172 +epoch: 1, batch: 19363, sum loss: 4410.985352, avg loss: 2.616243, ppl: 13.684214 +epoch: 1, batch: 19364, sum loss: 4157.665039, avg loss: 2.619827, ppl: 13.733345 +epoch: 1, batch: 19365, sum loss: 4299.108887, avg loss: 2.637490, ppl: 13.978075 +epoch: 1, batch: 19366, sum loss: 4338.278320, avg loss: 2.597771, ppl: 13.433766 +epoch: 1, batch: 19367, sum loss: 5180.096680, avg loss: 2.690959, ppl: 14.745813 +epoch: 1, batch: 19368, sum loss: 4456.750977, avg loss: 2.657573, ppl: 14.261641 +epoch: 1, batch: 19369, sum loss: 4108.850586, avg loss: 2.553667, ppl: 12.854157 +epoch: 1, batch: 19370, sum loss: 3794.852051, avg loss: 2.695208, ppl: 14.808593 +epoch: 1, batch: 19371, sum loss: 4509.219238, avg loss: 2.764696, ppl: 15.874215 +epoch: 1, batch: 19372, sum loss: 4460.503906, avg loss: 2.548859, ppl: 12.792503 +epoch: 1, batch: 19373, sum loss: 3511.301758, avg loss: 2.377320, ppl: 10.775988 +epoch: 1, batch: 19374, sum loss: 3997.254150, avg loss: 2.554156, ppl: 12.860442 +epoch: 1, batch: 19375, sum loss: 4570.170410, avg loss: 2.454442, ppl: 11.639935 +epoch: 1, batch: 19376, sum loss: 3955.804688, avg loss: 2.821544, ppl: 16.802776 +epoch: 1, batch: 19377, sum loss: 5233.798828, avg loss: 2.824500, ppl: 16.852518 +epoch: 1, batch: 19378, sum loss: 3895.709961, avg loss: 2.556240, ppl: 12.887271 +epoch: 1, batch: 19379, sum loss: 4934.392090, avg loss: 2.628872, ppl: 13.858125 +epoch: 1, batch: 19380, sum loss: 4671.065430, avg loss: 2.642005, ppl: 14.041334 +epoch: 1, batch: 19381, sum loss: 4540.319336, avg loss: 2.725282, ppl: 15.260713 +epoch: 1, batch: 19382, sum loss: 4729.919434, avg loss: 2.730900, ppl: 15.346697 +epoch: 1, batch: 19383, sum loss: 4746.391113, avg loss: 2.699881, ppl: 14.877962 +epoch: 1, batch: 19384, sum loss: 4611.916992, avg loss: 2.572179, ppl: 13.094327 +epoch: 1, batch: 19385, sum loss: 4330.543945, avg loss: 2.698158, ppl: 14.852349 +epoch: 1, batch: 19386, sum loss: 4654.521973, avg loss: 2.800555, ppl: 16.453772 +epoch: 1, batch: 19387, sum loss: 3665.477051, avg loss: 2.445282, ppl: 11.533796 +epoch: 1, batch: 19388, sum loss: 4622.770996, avg loss: 2.972843, ppl: 19.547417 +epoch: 1, batch: 19389, sum loss: 4588.391602, avg loss: 2.809793, ppl: 16.606476 +epoch: 1, batch: 19390, sum loss: 4246.303223, avg loss: 2.702930, ppl: 14.923396 +epoch: 1, batch: 19391, sum loss: 5228.007324, avg loss: 2.557734, ppl: 12.906532 +epoch: 1, batch: 19392, sum loss: 5482.314453, avg loss: 2.756317, ppl: 15.741757 +epoch: 1, batch: 19393, sum loss: 4731.049805, avg loss: 2.769935, ppl: 15.957603 +epoch: 1, batch: 19394, sum loss: 5514.446289, avg loss: 2.971146, ppl: 19.514263 +epoch: 1, batch: 19395, sum loss: 4480.623047, avg loss: 2.686225, ppl: 14.676167 +epoch: 1, batch: 19396, sum loss: 4503.500000, avg loss: 2.658500, ppl: 14.274867 +epoch: 1, batch: 19397, sum loss: 4132.310547, avg loss: 2.565059, ppl: 13.001431 +epoch: 1, batch: 19398, sum loss: 4649.972168, avg loss: 2.680099, ppl: 14.586541 +epoch: 1, batch: 19399, sum loss: 3794.134277, avg loss: 2.625699, ppl: 13.814221 +epoch: 1, batch: 19400, sum loss: 3967.468750, avg loss: 2.650280, ppl: 14.157995 +epoch: 1, batch: 19401, sum loss: 4358.864258, avg loss: 2.596107, ppl: 13.411428 +epoch: 1, batch: 19402, sum loss: 3699.095947, avg loss: 2.542334, ppl: 12.709298 +epoch: 1, batch: 19403, sum loss: 4857.745605, avg loss: 2.817718, ppl: 16.738609 +epoch: 1, batch: 19404, sum loss: 3991.260986, avg loss: 2.828675, ppl: 16.923027 +epoch: 1, batch: 19405, sum loss: 3879.404785, avg loss: 2.318831, ppl: 10.163791 +epoch: 1, batch: 19406, sum loss: 4293.691895, avg loss: 2.666889, ppl: 14.395123 +epoch: 1, batch: 19407, sum loss: 4199.021484, avg loss: 2.494962, ppl: 12.121276 +epoch: 1, batch: 19408, sum loss: 4010.495117, avg loss: 2.437991, ppl: 11.450013 +epoch: 1, batch: 19409, sum loss: 4047.756348, avg loss: 2.920459, ppl: 18.549801 +epoch: 1, batch: 19410, sum loss: 5113.114258, avg loss: 2.778866, ppl: 16.100758 +epoch: 1, batch: 19411, sum loss: 4229.522461, avg loss: 2.726965, ppl: 15.286418 +epoch: 1, batch: 19412, sum loss: 4584.817871, avg loss: 2.782050, ppl: 16.152094 +epoch: 1, batch: 19413, sum loss: 4799.183105, avg loss: 2.717544, ppl: 15.143086 +epoch: 1, batch: 19414, sum loss: 3351.454590, avg loss: 2.373551, ppl: 10.735450 +epoch: 1, batch: 19415, sum loss: 3859.422607, avg loss: 2.545793, ppl: 12.753342 +epoch: 1, batch: 19416, sum loss: 5156.128906, avg loss: 2.751403, ppl: 15.664592 +epoch: 1, batch: 19417, sum loss: 4667.495117, avg loss: 2.585870, ppl: 13.274831 +epoch: 1, batch: 19418, sum loss: 5177.015625, avg loss: 2.799900, ppl: 16.443007 +epoch: 1, batch: 19419, sum loss: 4246.994141, avg loss: 2.594377, ppl: 13.388238 +epoch: 1, batch: 19420, sum loss: 5247.559570, avg loss: 3.000320, ppl: 20.091965 +epoch: 1, batch: 19421, sum loss: 4744.711426, avg loss: 2.604123, ppl: 13.519359 +epoch: 1, batch: 19422, sum loss: 4621.648438, avg loss: 2.809513, ppl: 16.601826 +epoch: 1, batch: 19423, sum loss: 4332.955566, avg loss: 2.616519, ppl: 13.687992 +epoch: 1, batch: 19424, sum loss: 4777.246094, avg loss: 2.699009, ppl: 14.864996 +epoch: 1, batch: 19425, sum loss: 5160.567871, avg loss: 2.862212, ppl: 17.500189 +epoch: 1, batch: 19426, sum loss: 3638.867676, avg loss: 2.577102, ppl: 13.158944 +epoch: 1, batch: 19427, sum loss: 4351.273438, avg loss: 2.640336, ppl: 14.017910 +epoch: 1, batch: 19428, sum loss: 5560.169922, avg loss: 2.908039, ppl: 18.320829 +epoch: 1, batch: 19429, sum loss: 4993.525879, avg loss: 2.811670, ppl: 16.637682 +epoch: 1, batch: 19430, sum loss: 3842.853760, avg loss: 2.244658, ppl: 9.437183 +epoch: 1, batch: 19431, sum loss: 3939.327148, avg loss: 2.538226, ppl: 12.657202 +epoch: 1, batch: 19432, sum loss: 3566.603271, avg loss: 2.501124, ppl: 12.196199 +epoch: 1, batch: 19433, sum loss: 4252.115723, avg loss: 2.741532, ppl: 15.510723 +epoch: 1, batch: 19434, sum loss: 3739.932129, avg loss: 2.473500, ppl: 11.863901 +epoch: 1, batch: 19435, sum loss: 4320.963867, avg loss: 2.736519, ppl: 15.433170 +epoch: 1, batch: 19436, sum loss: 4086.479980, avg loss: 2.458773, ppl: 11.690454 +epoch: 1, batch: 19437, sum loss: 4246.411621, avg loss: 2.426521, ppl: 11.319431 +epoch: 1, batch: 19438, sum loss: 4308.384277, avg loss: 2.573706, ppl: 13.114342 +epoch: 1, batch: 19439, sum loss: 4747.041992, avg loss: 2.827303, ppl: 16.899824 +epoch: 1, batch: 19440, sum loss: 4455.802734, avg loss: 2.774472, ppl: 16.030165 +epoch: 1, batch: 19441, sum loss: 4256.143066, avg loss: 2.550116, ppl: 12.808587 +epoch: 1, batch: 19442, sum loss: 4096.694824, avg loss: 2.544531, ppl: 12.737251 +epoch: 1, batch: 19443, sum loss: 4936.139160, avg loss: 2.608953, ppl: 13.584820 +epoch: 1, batch: 19444, sum loss: 4244.461426, avg loss: 2.786908, ppl: 16.230762 +epoch: 1, batch: 19445, sum loss: 4386.736328, avg loss: 2.767657, ppl: 15.921288 +epoch: 1, batch: 19446, sum loss: 3852.750244, avg loss: 2.633459, ppl: 13.921841 +epoch: 1, batch: 19447, sum loss: 4235.824219, avg loss: 2.881513, ppl: 17.841249 +epoch: 1, batch: 19448, sum loss: 4310.458008, avg loss: 2.625127, ppl: 13.806325 +epoch: 1, batch: 19449, sum loss: 4844.155762, avg loss: 2.657244, ppl: 14.256946 +epoch: 1, batch: 19450, sum loss: 5168.699707, avg loss: 2.838385, ppl: 17.088152 +epoch: 1, batch: 19451, sum loss: 4677.598633, avg loss: 2.669863, ppl: 14.437984 +epoch: 1, batch: 19452, sum loss: 4469.015625, avg loss: 2.534893, ppl: 12.615075 +epoch: 1, batch: 19453, sum loss: 4383.434082, avg loss: 2.705824, ppl: 14.966639 +epoch: 1, batch: 19454, sum loss: 4906.891602, avg loss: 2.709493, ppl: 15.021660 +epoch: 1, batch: 19455, sum loss: 4336.876465, avg loss: 2.741388, ppl: 15.508501 +epoch: 1, batch: 19456, sum loss: 4751.609375, avg loss: 2.777095, ppl: 16.072264 +epoch: 1, batch: 19457, sum loss: 4610.593262, avg loss: 2.682137, ppl: 14.616295 +epoch: 1, batch: 19458, sum loss: 4728.438477, avg loss: 2.760326, ppl: 15.804993 +epoch: 1, batch: 19459, sum loss: 4938.813965, avg loss: 2.675414, ppl: 14.518357 +epoch: 1, batch: 19460, sum loss: 4687.541016, avg loss: 2.773693, ppl: 16.017675 +epoch: 1, batch: 19461, sum loss: 4578.286133, avg loss: 2.766336, ppl: 15.900271 +epoch: 1, batch: 19462, sum loss: 4522.374512, avg loss: 2.452481, ppl: 11.617130 +epoch: 1, batch: 19463, sum loss: 4583.713379, avg loss: 2.652612, ppl: 14.191057 +epoch: 1, batch: 19464, sum loss: 5670.900391, avg loss: 3.062041, ppl: 21.371138 +epoch: 1, batch: 19465, sum loss: 4568.267578, avg loss: 2.745353, ppl: 15.570112 +epoch: 1, batch: 19466, sum loss: 5305.902832, avg loss: 2.858784, ppl: 17.440306 +epoch: 1, batch: 19467, sum loss: 4887.830078, avg loss: 2.851710, ppl: 17.317362 +epoch: 1, batch: 19468, sum loss: 3947.141357, avg loss: 2.438012, ppl: 11.450256 +epoch: 1, batch: 19469, sum loss: 4346.903320, avg loss: 2.579765, ppl: 13.194032 +epoch: 1, batch: 19470, sum loss: 4822.473145, avg loss: 2.692615, ppl: 14.770247 +epoch: 1, batch: 19471, sum loss: 4778.813965, avg loss: 2.749605, ppl: 15.636457 +epoch: 1, batch: 19472, sum loss: 5096.154297, avg loss: 2.892256, ppl: 18.033945 +epoch: 1, batch: 19473, sum loss: 3850.343018, avg loss: 2.488909, ppl: 12.048130 +epoch: 1, batch: 19474, sum loss: 4856.272461, avg loss: 2.773428, ppl: 16.013437 +epoch: 1, batch: 19475, sum loss: 5097.743164, avg loss: 2.742196, ppl: 15.521037 +epoch: 1, batch: 19476, sum loss: 5515.642090, avg loss: 2.954281, ppl: 19.187914 +epoch: 1, batch: 19477, sum loss: 4793.450195, avg loss: 2.627988, ppl: 13.845885 +epoch: 1, batch: 19478, sum loss: 4482.384277, avg loss: 2.700232, ppl: 14.883178 +epoch: 1, batch: 19479, sum loss: 4824.419922, avg loss: 2.927439, ppl: 18.679735 +epoch: 1, batch: 19480, sum loss: 4530.770508, avg loss: 2.687290, ppl: 14.691803 +epoch: 1, batch: 19481, sum loss: 4239.555176, avg loss: 2.535619, ppl: 12.624246 +epoch: 1, batch: 19482, sum loss: 4922.755859, avg loss: 2.690031, ppl: 14.732126 +epoch: 1, batch: 19483, sum loss: 4591.124023, avg loss: 2.655364, ppl: 14.230162 +epoch: 1, batch: 19484, sum loss: 3173.306152, avg loss: 2.296169, ppl: 9.936048 +epoch: 1, batch: 19485, sum loss: 3838.991455, avg loss: 2.567887, ppl: 13.038250 +epoch: 1, batch: 19486, sum loss: 4984.395020, avg loss: 2.828828, ppl: 16.925613 +epoch: 1, batch: 19487, sum loss: 4146.558105, avg loss: 2.547026, ppl: 12.769071 +epoch: 1, batch: 19488, sum loss: 4770.971191, avg loss: 2.565038, ppl: 13.001158 +epoch: 1, batch: 19489, sum loss: 3963.943115, avg loss: 2.448390, ppl: 11.569702 +epoch: 1, batch: 19490, sum loss: 3777.951660, avg loss: 2.559588, ppl: 12.930489 +epoch: 1, batch: 19491, sum loss: 5265.620117, avg loss: 2.794915, ppl: 16.361240 +epoch: 1, batch: 19492, sum loss: 5666.301270, avg loss: 2.975999, ppl: 19.609196 +epoch: 1, batch: 19493, sum loss: 4695.115234, avg loss: 2.813131, ppl: 16.662003 +epoch: 1, batch: 19494, sum loss: 4862.728516, avg loss: 2.661592, ppl: 14.319067 +epoch: 1, batch: 19495, sum loss: 4521.442383, avg loss: 2.642573, ppl: 14.049308 +epoch: 1, batch: 19496, sum loss: 5108.083496, avg loss: 2.876173, ppl: 17.746233 +epoch: 1, batch: 19497, sum loss: 4010.624756, avg loss: 2.590843, ppl: 13.341013 +epoch: 1, batch: 19498, sum loss: 4559.911621, avg loss: 2.616128, ppl: 13.682645 +epoch: 1, batch: 19499, sum loss: 4610.517578, avg loss: 2.741092, ppl: 15.503913 +epoch: 1, batch: 19500, sum loss: 4669.009766, avg loss: 2.575295, ppl: 13.135194 +epoch: 1, batch: 19501, sum loss: 4339.867188, avg loss: 2.438128, ppl: 11.451580 +epoch: 1, batch: 19502, sum loss: 3583.038330, avg loss: 2.561142, ppl: 12.950604 +epoch: 1, batch: 19503, sum loss: 4647.611816, avg loss: 2.748440, ppl: 15.618249 +epoch: 1, batch: 19504, sum loss: 4710.967773, avg loss: 2.712129, ppl: 15.061305 +epoch: 1, batch: 19505, sum loss: 5424.787598, avg loss: 2.701587, ppl: 14.903371 +epoch: 1, batch: 19506, sum loss: 4593.166992, avg loss: 2.466792, ppl: 11.784582 +epoch: 1, batch: 19507, sum loss: 4771.992188, avg loss: 2.945674, ppl: 19.023487 +epoch: 1, batch: 19508, sum loss: 4708.902344, avg loss: 2.654398, ppl: 14.216425 +epoch: 1, batch: 19509, sum loss: 5164.365234, avg loss: 2.800632, ppl: 16.455044 +epoch: 1, batch: 19510, sum loss: 4220.540039, avg loss: 2.301276, ppl: 9.986915 +epoch: 1, batch: 19511, sum loss: 3689.546875, avg loss: 2.413046, ppl: 11.167926 +epoch: 1, batch: 19512, sum loss: 4372.215820, avg loss: 2.811714, ppl: 16.638418 +epoch: 1, batch: 19513, sum loss: 4127.168457, avg loss: 2.546063, ppl: 12.756783 +epoch: 1, batch: 19514, sum loss: 4475.552734, avg loss: 2.785036, ppl: 16.200403 +epoch: 1, batch: 19515, sum loss: 5785.440430, avg loss: 3.082280, ppl: 21.808077 +epoch: 1, batch: 19516, sum loss: 4354.593750, avg loss: 2.505520, ppl: 12.249928 +epoch: 1, batch: 19517, sum loss: 4400.545410, avg loss: 2.650931, ppl: 14.167224 +epoch: 1, batch: 19518, sum loss: 3599.712402, avg loss: 2.383915, ppl: 10.847292 +epoch: 1, batch: 19519, sum loss: 4805.778809, avg loss: 2.910829, ppl: 18.372028 +epoch: 1, batch: 19520, sum loss: 3836.118164, avg loss: 2.658432, ppl: 14.273897 +epoch: 1, batch: 19521, sum loss: 5620.824707, avg loss: 2.859016, ppl: 17.444347 +epoch: 1, batch: 19522, sum loss: 4055.158447, avg loss: 2.525005, ppl: 12.490959 +epoch: 1, batch: 19523, sum loss: 4590.970215, avg loss: 2.415029, ppl: 11.190095 +epoch: 1, batch: 19524, sum loss: 4277.728027, avg loss: 2.516311, ppl: 12.382828 +epoch: 1, batch: 19525, sum loss: 4189.177246, avg loss: 2.716717, ppl: 15.130564 +epoch: 1, batch: 19526, sum loss: 4750.540039, avg loss: 2.773228, ppl: 16.010235 +epoch: 1, batch: 19527, sum loss: 4985.466797, avg loss: 2.868508, ppl: 17.610722 +epoch: 1, batch: 19528, sum loss: 4625.211426, avg loss: 2.766275, ppl: 15.899298 +epoch: 1, batch: 19529, sum loss: 4232.579590, avg loss: 2.728936, ppl: 15.316577 +epoch: 1, batch: 19530, sum loss: 4069.310791, avg loss: 2.698482, ppl: 14.857159 +epoch: 1, batch: 19531, sum loss: 3682.504395, avg loss: 2.546684, ppl: 12.764700 +epoch: 1, batch: 19532, sum loss: 4815.024414, avg loss: 2.664651, ppl: 14.362935 +epoch: 1, batch: 19533, sum loss: 3992.801758, avg loss: 2.665422, ppl: 14.374010 +epoch: 1, batch: 19534, sum loss: 4679.573242, avg loss: 2.664905, ppl: 14.366586 +epoch: 1, batch: 19535, sum loss: 4812.280762, avg loss: 2.651394, ppl: 14.173788 +epoch: 1, batch: 19536, sum loss: 3993.179688, avg loss: 2.750124, ppl: 15.644567 +epoch: 1, batch: 19537, sum loss: 4796.218262, avg loss: 2.822966, ppl: 16.826679 +epoch: 1, batch: 19538, sum loss: 5293.586914, avg loss: 2.945791, ppl: 19.025715 +epoch: 1, batch: 19539, sum loss: 5102.999023, avg loss: 2.878172, ppl: 17.781742 +epoch: 1, batch: 19540, sum loss: 3989.192383, avg loss: 2.734196, ppl: 15.397366 +epoch: 1, batch: 19541, sum loss: 5294.021973, avg loss: 2.831028, ppl: 16.962885 +epoch: 1, batch: 19542, sum loss: 4927.851074, avg loss: 2.784097, ppl: 16.185192 +epoch: 1, batch: 19543, sum loss: 3554.013184, avg loss: 2.650271, ppl: 14.157870 +epoch: 1, batch: 19544, sum loss: 5843.414062, avg loss: 3.098311, ppl: 22.160490 +epoch: 1, batch: 19545, sum loss: 4506.945312, avg loss: 2.907707, ppl: 18.314751 +epoch: 1, batch: 19546, sum loss: 5115.163086, avg loss: 2.646230, ppl: 14.100785 +epoch: 1, batch: 19547, sum loss: 4609.046387, avg loss: 2.596646, ppl: 13.418651 +epoch: 1, batch: 19548, sum loss: 4979.880371, avg loss: 2.771219, ppl: 15.978100 +epoch: 1, batch: 19549, sum loss: 4370.844238, avg loss: 2.541188, ppl: 12.694750 +epoch: 1, batch: 19550, sum loss: 4852.949707, avg loss: 2.781060, ppl: 16.136116 +epoch: 1, batch: 19551, sum loss: 5714.500000, avg loss: 2.772683, ppl: 16.001511 +epoch: 1, batch: 19552, sum loss: 3707.337402, avg loss: 2.523715, ppl: 12.474855 +epoch: 1, batch: 19553, sum loss: 4440.402344, avg loss: 2.671722, ppl: 14.464862 +epoch: 1, batch: 19554, sum loss: 4500.530762, avg loss: 2.691705, ppl: 14.756818 +epoch: 1, batch: 19555, sum loss: 4131.144043, avg loss: 2.543808, ppl: 12.728047 +epoch: 1, batch: 19556, sum loss: 4241.869141, avg loss: 2.696675, ppl: 14.830333 +epoch: 1, batch: 19557, sum loss: 4629.250977, avg loss: 2.600703, ppl: 13.473203 +epoch: 1, batch: 19558, sum loss: 4453.185059, avg loss: 2.772842, ppl: 16.004059 +epoch: 1, batch: 19559, sum loss: 4762.955078, avg loss: 2.712389, ppl: 15.065224 +epoch: 1, batch: 19560, sum loss: 4728.102051, avg loss: 2.922189, ppl: 18.581923 +epoch: 1, batch: 19561, sum loss: 4167.522949, avg loss: 2.619436, ppl: 13.727983 +epoch: 1, batch: 19562, sum loss: 4955.687012, avg loss: 2.891299, ppl: 18.016703 +epoch: 1, batch: 19563, sum loss: 3830.556152, avg loss: 2.380706, ppl: 10.812534 +epoch: 1, batch: 19564, sum loss: 3284.607178, avg loss: 2.381876, ppl: 10.825191 +epoch: 1, batch: 19565, sum loss: 4180.325684, avg loss: 2.419170, ppl: 11.236528 +epoch: 1, batch: 19566, sum loss: 4523.812500, avg loss: 2.731771, ppl: 15.360062 +epoch: 1, batch: 19567, sum loss: 4584.767090, avg loss: 2.765239, ppl: 15.882843 +epoch: 1, batch: 19568, sum loss: 4462.660645, avg loss: 2.534163, ppl: 12.605872 +epoch: 1, batch: 19569, sum loss: 5131.245605, avg loss: 2.820916, ppl: 16.792221 +epoch: 1, batch: 19570, sum loss: 5437.809570, avg loss: 2.669519, ppl: 14.433021 +epoch: 1, batch: 19571, sum loss: 3939.121582, avg loss: 2.501029, ppl: 12.195043 +epoch: 1, batch: 19572, sum loss: 4397.963379, avg loss: 2.537775, ppl: 12.651484 +epoch: 1, batch: 19573, sum loss: 3550.666016, avg loss: 2.391021, ppl: 10.924640 +epoch: 1, batch: 19574, sum loss: 4633.345215, avg loss: 2.885022, ppl: 17.903959 +epoch: 1, batch: 19575, sum loss: 4708.103516, avg loss: 2.605480, ppl: 13.537727 +epoch: 1, batch: 19576, sum loss: 3980.929688, avg loss: 2.596823, ppl: 13.421031 +epoch: 1, batch: 19577, sum loss: 5142.666992, avg loss: 2.797969, ppl: 16.411280 +epoch: 1, batch: 19578, sum loss: 4952.958008, avg loss: 2.844893, ppl: 17.199715 +epoch: 1, batch: 19579, sum loss: 4671.713867, avg loss: 2.784096, ppl: 16.185188 +epoch: 1, batch: 19580, sum loss: 4644.295410, avg loss: 2.634314, ppl: 13.933748 +epoch: 1, batch: 19581, sum loss: 4630.650391, avg loss: 2.731947, ppl: 15.362772 +epoch: 1, batch: 19582, sum loss: 3219.711182, avg loss: 2.350154, ppl: 10.487186 +epoch: 1, batch: 19583, sum loss: 4468.429688, avg loss: 2.650314, ppl: 14.158488 +epoch: 1, batch: 19584, sum loss: 4360.545898, avg loss: 2.727046, ppl: 15.287653 +epoch: 1, batch: 19585, sum loss: 4069.443359, avg loss: 2.554578, ppl: 12.865873 +epoch: 1, batch: 19586, sum loss: 4274.340820, avg loss: 2.418982, ppl: 11.234414 +epoch: 1, batch: 19587, sum loss: 4676.150879, avg loss: 2.744220, ppl: 15.552479 +epoch: 1, batch: 19588, sum loss: 4888.571777, avg loss: 2.666979, ppl: 14.396406 +epoch: 1, batch: 19589, sum loss: 3902.377930, avg loss: 2.527447, ppl: 12.521498 +epoch: 1, batch: 19590, sum loss: 4339.797852, avg loss: 2.680542, ppl: 14.593004 +epoch: 1, batch: 19591, sum loss: 4397.457031, avg loss: 2.658680, ppl: 14.277433 +epoch: 1, batch: 19592, sum loss: 4222.785645, avg loss: 2.624478, ppl: 13.797375 +epoch: 1, batch: 19593, sum loss: 4356.718750, avg loss: 2.591742, ppl: 13.353016 +epoch: 1, batch: 19594, sum loss: 4109.947266, avg loss: 2.385344, ppl: 10.862797 +epoch: 1, batch: 19595, sum loss: 4673.356445, avg loss: 2.775152, ppl: 16.041073 +epoch: 1, batch: 19596, sum loss: 5044.737793, avg loss: 2.869589, ppl: 17.629772 +epoch: 1, batch: 19597, sum loss: 4003.967285, avg loss: 2.639398, ppl: 14.004775 +epoch: 1, batch: 19598, sum loss: 5301.819336, avg loss: 2.737129, ppl: 15.442589 +epoch: 1, batch: 19599, sum loss: 4551.518555, avg loss: 2.760169, ppl: 15.802510 +epoch: 1, batch: 19600, sum loss: 4077.576660, avg loss: 2.487844, ppl: 12.035303 +epoch: 1, batch: 19601, sum loss: 5137.724609, avg loss: 2.881506, ppl: 17.841116 +epoch: 1, batch: 19602, sum loss: 4624.213867, avg loss: 2.651499, ppl: 14.175265 +epoch: 1, batch: 19603, sum loss: 4774.781738, avg loss: 2.693053, ppl: 14.776713 +epoch: 1, batch: 19604, sum loss: 4786.982422, avg loss: 2.863028, ppl: 17.514481 +epoch: 1, batch: 19605, sum loss: 4983.356934, avg loss: 2.837903, ppl: 17.079908 +epoch: 1, batch: 19606, sum loss: 3710.559570, avg loss: 2.548461, ppl: 12.787412 +epoch: 1, batch: 19607, sum loss: 4305.842285, avg loss: 2.756621, ppl: 15.746548 +epoch: 1, batch: 19608, sum loss: 4025.726807, avg loss: 2.488088, ppl: 12.038239 +epoch: 1, batch: 19609, sum loss: 4242.082520, avg loss: 2.564742, ppl: 12.997299 +epoch: 1, batch: 19610, sum loss: 4004.358398, avg loss: 2.465738, ppl: 11.772164 +epoch: 1, batch: 19611, sum loss: 4645.094238, avg loss: 2.803316, ppl: 16.499262 +epoch: 1, batch: 19612, sum loss: 4001.435547, avg loss: 2.534158, ppl: 12.605812 +epoch: 1, batch: 19613, sum loss: 3863.850342, avg loss: 2.545356, ppl: 12.747766 +epoch: 1, batch: 19614, sum loss: 4812.609375, avg loss: 2.876635, ppl: 17.754421 +epoch: 1, batch: 19615, sum loss: 5174.208984, avg loss: 2.892235, ppl: 18.033575 +epoch: 1, batch: 19616, sum loss: 4521.345703, avg loss: 2.627162, ppl: 13.834452 +epoch: 1, batch: 19617, sum loss: 5161.115723, avg loss: 2.856179, ppl: 17.394934 +epoch: 1, batch: 19618, sum loss: 4375.427246, avg loss: 2.651774, ppl: 14.179173 +epoch: 1, batch: 19619, sum loss: 4614.713867, avg loss: 2.664384, ppl: 14.359107 +epoch: 1, batch: 19620, sum loss: 4265.170410, avg loss: 2.519297, ppl: 12.419867 +epoch: 1, batch: 19621, sum loss: 3695.970215, avg loss: 2.580985, ppl: 13.210141 +epoch: 1, batch: 19622, sum loss: 3728.777588, avg loss: 2.648280, ppl: 14.129710 +epoch: 1, batch: 19623, sum loss: 4474.430664, avg loss: 2.660185, ppl: 14.298929 +epoch: 1, batch: 19624, sum loss: 4212.599121, avg loss: 2.636170, ppl: 13.959631 +epoch: 1, batch: 19625, sum loss: 4165.968750, avg loss: 2.597237, ppl: 13.426594 +epoch: 1, batch: 19626, sum loss: 3643.032471, avg loss: 2.547575, ppl: 12.776087 +epoch: 1, batch: 19627, sum loss: 5625.348633, avg loss: 2.971658, ppl: 19.524263 +epoch: 1, batch: 19628, sum loss: 4795.879883, avg loss: 2.785064, ppl: 16.200851 +epoch: 1, batch: 19629, sum loss: 4724.271484, avg loss: 2.729215, ppl: 15.320858 +epoch: 1, batch: 19630, sum loss: 4823.954590, avg loss: 2.816086, ppl: 16.711308 +epoch: 1, batch: 19631, sum loss: 4562.708496, avg loss: 2.638929, ppl: 13.998205 +epoch: 1, batch: 19632, sum loss: 4263.339844, avg loss: 2.706882, ppl: 14.982491 +epoch: 1, batch: 19633, sum loss: 4152.554199, avg loss: 2.643255, ppl: 14.058898 +epoch: 1, batch: 19634, sum loss: 3938.375000, avg loss: 2.562378, ppl: 12.966618 +epoch: 1, batch: 19635, sum loss: 4584.579102, avg loss: 2.498408, ppl: 12.163118 +epoch: 1, batch: 19636, sum loss: 4689.195801, avg loss: 2.769755, ppl: 15.954730 +epoch: 1, batch: 19637, sum loss: 4728.817871, avg loss: 2.763775, ppl: 15.859594 +epoch: 1, batch: 19638, sum loss: 4972.377441, avg loss: 2.700911, ppl: 14.893297 +epoch: 1, batch: 19639, sum loss: 4466.639648, avg loss: 2.621267, ppl: 13.753142 +epoch: 1, batch: 19640, sum loss: 4219.876953, avg loss: 2.705050, ppl: 14.955057 +epoch: 1, batch: 19641, sum loss: 4297.315430, avg loss: 2.444434, ppl: 11.524027 +epoch: 1, batch: 19642, sum loss: 4789.812988, avg loss: 2.551845, ppl: 12.830755 +epoch: 1, batch: 19643, sum loss: 4042.992676, avg loss: 2.635588, ppl: 13.951519 +epoch: 1, batch: 19644, sum loss: 4027.715088, avg loss: 2.581869, ppl: 13.221822 +epoch: 1, batch: 19645, sum loss: 4400.682617, avg loss: 2.468134, ppl: 11.800406 +epoch: 1, batch: 19646, sum loss: 3962.132568, avg loss: 2.625668, ppl: 13.813796 +epoch: 1, batch: 19647, sum loss: 4371.438477, avg loss: 2.622339, ppl: 13.767882 +epoch: 1, batch: 19648, sum loss: 4082.904053, avg loss: 2.589032, ppl: 13.316880 +epoch: 1, batch: 19649, sum loss: 4947.199707, avg loss: 2.609283, ppl: 13.589300 +epoch: 1, batch: 19650, sum loss: 4222.744141, avg loss: 2.698239, ppl: 14.853553 +epoch: 1, batch: 19651, sum loss: 4531.455566, avg loss: 2.564491, ppl: 12.994043 +epoch: 1, batch: 19652, sum loss: 5190.814941, avg loss: 2.937643, ppl: 18.871311 +epoch: 1, batch: 19653, sum loss: 4019.100830, avg loss: 2.633749, ppl: 13.925880 +epoch: 1, batch: 19654, sum loss: 4174.073730, avg loss: 2.362238, ppl: 10.614677 +epoch: 1, batch: 19655, sum loss: 4587.752930, avg loss: 2.775410, ppl: 16.045202 +epoch: 1, batch: 19656, sum loss: 5057.304688, avg loss: 2.800279, ppl: 16.449242 +epoch: 1, batch: 19657, sum loss: 4410.306641, avg loss: 2.655212, ppl: 14.227998 +epoch: 1, batch: 19658, sum loss: 4042.300537, avg loss: 2.463315, ppl: 11.743680 +epoch: 1, batch: 19659, sum loss: 4437.376465, avg loss: 2.663491, ppl: 14.346289 +epoch: 1, batch: 19660, sum loss: 4180.757812, avg loss: 2.741481, ppl: 15.509932 +epoch: 1, batch: 19661, sum loss: 4966.157715, avg loss: 2.954288, ppl: 19.188051 +epoch: 1, batch: 19662, sum loss: 5099.815430, avg loss: 3.008741, ppl: 20.261868 +epoch: 1, batch: 19663, sum loss: 4643.379883, avg loss: 2.777141, ppl: 16.073008 +epoch: 1, batch: 19664, sum loss: 4106.325195, avg loss: 2.845686, ppl: 17.213366 +epoch: 1, batch: 19665, sum loss: 4450.642578, avg loss: 2.584578, ppl: 13.257688 +epoch: 1, batch: 19666, sum loss: 4515.620117, avg loss: 2.822263, ppl: 16.814852 +epoch: 1, batch: 19667, sum loss: 4775.876953, avg loss: 2.666598, ppl: 14.390925 +epoch: 1, batch: 19668, sum loss: 4230.088867, avg loss: 2.660433, ppl: 14.302485 +epoch: 1, batch: 19669, sum loss: 4515.194336, avg loss: 2.711828, ppl: 15.056781 +epoch: 1, batch: 19670, sum loss: 5650.704590, avg loss: 2.999313, ppl: 20.071735 +epoch: 1, batch: 19671, sum loss: 5081.612793, avg loss: 2.809073, ppl: 16.594528 +epoch: 1, batch: 19672, sum loss: 4602.620605, avg loss: 2.622576, ppl: 13.771149 +epoch: 1, batch: 19673, sum loss: 4011.825195, avg loss: 2.450718, ppl: 11.596669 +epoch: 1, batch: 19674, sum loss: 4104.668457, avg loss: 2.508966, ppl: 12.292216 +epoch: 1, batch: 19675, sum loss: 5059.477051, avg loss: 2.704157, ppl: 14.941713 +epoch: 1, batch: 19676, sum loss: 4307.868652, avg loss: 2.768553, ppl: 15.935559 +epoch: 1, batch: 19677, sum loss: 4814.551758, avg loss: 2.789427, ppl: 16.271698 +epoch: 1, batch: 19678, sum loss: 4263.108398, avg loss: 2.516593, ppl: 12.386321 +epoch: 1, batch: 19679, sum loss: 5036.946289, avg loss: 2.919969, ppl: 18.540714 +epoch: 1, batch: 19680, sum loss: 4194.110840, avg loss: 2.812952, ppl: 16.659019 +epoch: 1, batch: 19681, sum loss: 3956.818848, avg loss: 2.497992, ppl: 12.158053 +epoch: 1, batch: 19682, sum loss: 4981.354980, avg loss: 2.859561, ppl: 17.453857 +epoch: 1, batch: 19683, sum loss: 3484.018311, avg loss: 2.301201, ppl: 9.986170 +epoch: 1, batch: 19684, sum loss: 4414.664062, avg loss: 2.537163, ppl: 12.643753 +epoch: 1, batch: 19685, sum loss: 4206.068848, avg loss: 2.591540, ppl: 13.350310 +epoch: 1, batch: 19686, sum loss: 4140.330566, avg loss: 2.716752, ppl: 15.131101 +epoch: 1, batch: 19687, sum loss: 5032.136719, avg loss: 2.685238, ppl: 14.661695 +epoch: 1, batch: 19688, sum loss: 5162.435547, avg loss: 2.639282, ppl: 14.003145 +epoch: 1, batch: 19689, sum loss: 4006.893311, avg loss: 2.393604, ppl: 10.952900 +epoch: 1, batch: 19690, sum loss: 5260.786133, avg loss: 3.069304, ppl: 21.526905 +epoch: 1, batch: 19691, sum loss: 5172.842285, avg loss: 2.880202, ppl: 17.817869 +epoch: 1, batch: 19692, sum loss: 4076.228027, avg loss: 2.511539, ppl: 12.323882 +epoch: 1, batch: 19693, sum loss: 4773.695312, avg loss: 2.547329, ppl: 12.772947 +epoch: 1, batch: 19694, sum loss: 4411.540039, avg loss: 2.950863, ppl: 19.122448 +epoch: 1, batch: 19695, sum loss: 4686.591797, avg loss: 2.508882, ppl: 12.291182 +epoch: 1, batch: 19696, sum loss: 4343.501465, avg loss: 2.901470, ppl: 18.200890 +epoch: 1, batch: 19697, sum loss: 4688.438477, avg loss: 2.748205, ppl: 15.614585 +epoch: 1, batch: 19698, sum loss: 3691.236572, avg loss: 2.592160, ppl: 13.358602 +epoch: 1, batch: 19699, sum loss: 4994.204102, avg loss: 2.732059, ppl: 15.364493 +epoch: 1, batch: 19700, sum loss: 4490.143555, avg loss: 2.449615, ppl: 11.583880 +epoch: 1, batch: 19701, sum loss: 5872.173828, avg loss: 3.115211, ppl: 22.538197 +epoch: 1, batch: 19702, sum loss: 5995.348145, avg loss: 2.918865, ppl: 18.520250 +epoch: 1, batch: 19703, sum loss: 4788.817871, avg loss: 2.667865, ppl: 14.409177 +epoch: 1, batch: 19704, sum loss: 5783.771973, avg loss: 3.141647, ppl: 23.141953 +epoch: 1, batch: 19705, sum loss: 4759.615234, avg loss: 2.545249, ppl: 12.746398 +epoch: 1, batch: 19706, sum loss: 3644.833984, avg loss: 2.639271, ppl: 14.002998 +epoch: 1, batch: 19707, sum loss: 4174.308105, avg loss: 2.557787, ppl: 12.907221 +epoch: 1, batch: 19708, sum loss: 4413.504883, avg loss: 2.496326, ppl: 12.137823 +epoch: 1, batch: 19709, sum loss: 4144.297852, avg loss: 2.558209, ppl: 12.912666 +epoch: 1, batch: 19710, sum loss: 4588.093262, avg loss: 2.749007, ppl: 15.627113 +epoch: 1, batch: 19711, sum loss: 4056.696777, avg loss: 2.540198, ppl: 12.682186 +epoch: 1, batch: 19712, sum loss: 4443.343750, avg loss: 2.652743, ppl: 14.192911 +epoch: 1, batch: 19713, sum loss: 5128.212891, avg loss: 2.745296, ppl: 15.569221 +epoch: 1, batch: 19714, sum loss: 4015.552490, avg loss: 2.645292, ppl: 14.087552 +epoch: 1, batch: 19715, sum loss: 4848.562012, avg loss: 2.827150, ppl: 16.897228 +epoch: 1, batch: 19716, sum loss: 4046.519043, avg loss: 2.514928, ppl: 12.365717 +epoch: 1, batch: 19717, sum loss: 4961.781250, avg loss: 2.781268, ppl: 16.139467 +epoch: 1, batch: 19718, sum loss: 4320.236328, avg loss: 2.899487, ppl: 18.164833 +epoch: 1, batch: 19719, sum loss: 3420.362305, avg loss: 2.184139, ppl: 8.882999 +epoch: 1, batch: 19720, sum loss: 4732.037109, avg loss: 2.859237, ppl: 17.448208 +epoch: 1, batch: 19721, sum loss: 4347.067383, avg loss: 2.471329, ppl: 11.838169 +epoch: 1, batch: 19722, sum loss: 4046.241455, avg loss: 2.706516, ppl: 14.977005 +epoch: 1, batch: 19723, sum loss: 4990.483398, avg loss: 2.752611, ppl: 15.683527 +epoch: 1, batch: 19724, sum loss: 3709.999756, avg loss: 2.650000, ppl: 14.154037 +epoch: 1, batch: 19725, sum loss: 4969.508301, avg loss: 2.756244, ppl: 15.740613 +epoch: 1, batch: 19726, sum loss: 4352.884277, avg loss: 2.541088, ppl: 12.693479 +epoch: 1, batch: 19727, sum loss: 4114.973145, avg loss: 2.570252, ppl: 13.069117 +epoch: 1, batch: 19728, sum loss: 3938.699707, avg loss: 2.601519, ppl: 13.484206 +epoch: 1, batch: 19729, sum loss: 4638.648926, avg loss: 2.700028, ppl: 14.880155 +epoch: 1, batch: 19730, sum loss: 4672.320801, avg loss: 2.745194, ppl: 15.567636 +epoch: 1, batch: 19731, sum loss: 4988.158203, avg loss: 2.878337, ppl: 17.784672 +epoch: 1, batch: 19732, sum loss: 4955.720703, avg loss: 2.853034, ppl: 17.340321 +epoch: 1, batch: 19733, sum loss: 4348.051758, avg loss: 2.354116, ppl: 10.528817 +epoch: 1, batch: 19734, sum loss: 3735.940918, avg loss: 2.502305, ppl: 12.210602 +epoch: 1, batch: 19735, sum loss: 5261.031738, avg loss: 2.825474, ppl: 16.868931 +epoch: 1, batch: 19736, sum loss: 5026.647461, avg loss: 2.778689, ppl: 16.097897 +epoch: 1, batch: 19737, sum loss: 4938.089844, avg loss: 2.891153, ppl: 18.014071 +epoch: 1, batch: 19738, sum loss: 4736.815918, avg loss: 2.960510, ppl: 19.307812 +epoch: 1, batch: 19739, sum loss: 4309.646484, avg loss: 2.771477, ppl: 15.982222 +epoch: 1, batch: 19740, sum loss: 4852.306641, avg loss: 2.772747, ppl: 16.002525 +epoch: 1, batch: 19741, sum loss: 4717.730957, avg loss: 3.116071, ppl: 22.557571 +epoch: 1, batch: 19742, sum loss: 4990.424316, avg loss: 2.778633, ppl: 16.097000 +epoch: 1, batch: 19743, sum loss: 3630.430908, avg loss: 2.589466, ppl: 13.322654 +epoch: 1, batch: 19744, sum loss: 4812.268555, avg loss: 2.697460, ppl: 14.841988 +epoch: 1, batch: 19745, sum loss: 4481.324707, avg loss: 2.867130, ppl: 17.586477 +epoch: 1, batch: 19746, sum loss: 6185.514160, avg loss: 2.921830, ppl: 18.575249 +epoch: 1, batch: 19747, sum loss: 4502.183105, avg loss: 2.736889, ppl: 15.438886 +epoch: 1, batch: 19748, sum loss: 4347.096680, avg loss: 2.645829, ppl: 14.095121 +epoch: 1, batch: 19749, sum loss: 4558.317383, avg loss: 2.616715, ppl: 13.690675 +epoch: 1, batch: 19750, sum loss: 4387.970215, avg loss: 2.773685, ppl: 16.017553 +epoch: 1, batch: 19751, sum loss: 4430.685547, avg loss: 2.875201, ppl: 17.728996 +epoch: 1, batch: 19752, sum loss: 4419.251465, avg loss: 2.699604, ppl: 14.873837 +epoch: 1, batch: 19753, sum loss: 3934.729248, avg loss: 2.495073, ppl: 12.122614 +epoch: 1, batch: 19754, sum loss: 4390.473145, avg loss: 2.649652, ppl: 14.149111 +epoch: 1, batch: 19755, sum loss: 4670.775879, avg loss: 2.717147, ppl: 15.137076 +epoch: 1, batch: 19756, sum loss: 4048.223877, avg loss: 2.546053, ppl: 12.756650 +epoch: 1, batch: 19757, sum loss: 4858.406250, avg loss: 2.793793, ppl: 16.342894 +epoch: 1, batch: 19758, sum loss: 3492.490234, avg loss: 2.437188, ppl: 11.440823 +epoch: 1, batch: 19759, sum loss: 4544.489746, avg loss: 2.760930, ppl: 15.814537 +epoch: 1, batch: 19760, sum loss: 4164.072266, avg loss: 2.642178, ppl: 14.043756 +epoch: 1, batch: 19761, sum loss: 4473.343262, avg loss: 2.744382, ppl: 15.555004 +epoch: 1, batch: 19762, sum loss: 3817.200928, avg loss: 2.609160, ppl: 13.587632 +epoch: 1, batch: 19763, sum loss: 4811.036133, avg loss: 2.526805, ppl: 12.513461 +epoch: 1, batch: 19764, sum loss: 4362.365234, avg loss: 2.682881, ppl: 14.627179 +epoch: 1, batch: 19765, sum loss: 5289.488281, avg loss: 2.773722, ppl: 16.018145 +epoch: 1, batch: 19766, sum loss: 4424.820312, avg loss: 2.651181, ppl: 14.170761 +epoch: 1, batch: 19767, sum loss: 4608.584473, avg loss: 2.841297, ppl: 17.137985 +epoch: 1, batch: 19768, sum loss: 5870.805176, avg loss: 3.218643, ppl: 24.994184 +epoch: 1, batch: 19769, sum loss: 4676.702637, avg loss: 2.883294, ppl: 17.873047 +epoch: 1, batch: 19770, sum loss: 4881.101562, avg loss: 2.450352, ppl: 11.592431 +epoch: 1, batch: 19771, sum loss: 4451.826172, avg loss: 2.651475, ppl: 14.174927 +epoch: 1, batch: 19772, sum loss: 4182.026855, avg loss: 2.643506, ppl: 14.062421 +epoch: 1, batch: 19773, sum loss: 4092.688232, avg loss: 2.413142, ppl: 11.168996 +epoch: 1, batch: 19774, sum loss: 4786.037598, avg loss: 2.761707, ppl: 15.826830 +epoch: 1, batch: 19775, sum loss: 4464.449219, avg loss: 2.766078, ppl: 15.896163 +epoch: 1, batch: 19776, sum loss: 4380.086426, avg loss: 2.693780, ppl: 14.787466 +epoch: 1, batch: 19777, sum loss: 4845.589355, avg loss: 2.739169, ppl: 15.474119 +epoch: 1, batch: 19778, sum loss: 4144.580566, avg loss: 2.376480, ppl: 10.766932 +epoch: 1, batch: 19779, sum loss: 6805.451172, avg loss: 2.889788, ppl: 17.989502 +epoch: 1, batch: 19780, sum loss: 4798.832520, avg loss: 2.586972, ppl: 13.289467 +epoch: 1, batch: 19781, sum loss: 4323.083008, avg loss: 2.593331, ppl: 13.374249 +epoch: 1, batch: 19782, sum loss: 3697.375000, avg loss: 2.488139, ppl: 12.038853 +epoch: 1, batch: 19783, sum loss: 4450.916992, avg loss: 2.759403, ppl: 15.790413 +epoch: 1, batch: 19784, sum loss: 4750.939941, avg loss: 2.895149, ppl: 18.086201 +epoch: 1, batch: 19785, sum loss: 4030.000488, avg loss: 2.756498, ppl: 15.744614 +epoch: 1, batch: 19786, sum loss: 4773.387207, avg loss: 2.643072, ppl: 14.056314 +epoch: 1, batch: 19787, sum loss: 4295.453613, avg loss: 2.552260, ppl: 12.836080 +epoch: 1, batch: 19788, sum loss: 5367.965820, avg loss: 2.749983, ppl: 15.642360 +epoch: 1, batch: 19789, sum loss: 4888.488770, avg loss: 2.693382, ppl: 14.781587 +epoch: 1, batch: 19790, sum loss: 4822.907227, avg loss: 2.848734, ppl: 17.265917 +epoch: 1, batch: 19791, sum loss: 4853.406250, avg loss: 2.563870, ppl: 12.985978 +epoch: 1, batch: 19792, sum loss: 5493.481445, avg loss: 2.942411, ppl: 18.961506 +epoch: 1, batch: 19793, sum loss: 4573.125977, avg loss: 2.898052, ppl: 18.138777 +epoch: 1, batch: 19794, sum loss: 4649.649414, avg loss: 2.643348, ppl: 14.060202 +epoch: 1, batch: 19795, sum loss: 5115.624512, avg loss: 2.718185, ppl: 15.152798 +epoch: 1, batch: 19796, sum loss: 4553.120117, avg loss: 2.710191, ppl: 15.032140 +epoch: 1, batch: 19797, sum loss: 4993.380371, avg loss: 2.678852, ppl: 14.568360 +epoch: 1, batch: 19798, sum loss: 4655.775391, avg loss: 2.691200, ppl: 14.749358 +epoch: 1, batch: 19799, sum loss: 4883.887695, avg loss: 2.748389, ppl: 15.617455 +epoch: 1, batch: 19800, sum loss: 5521.872559, avg loss: 2.788824, ppl: 16.261890 +epoch: 1, batch: 19801, sum loss: 4716.388672, avg loss: 2.839488, ppl: 17.106997 +epoch: 1, batch: 19802, sum loss: 5389.741211, avg loss: 2.977758, ppl: 19.643721 +epoch: 1, batch: 19803, sum loss: 4903.625000, avg loss: 2.864267, ppl: 17.536192 +epoch: 1, batch: 19804, sum loss: 4045.331055, avg loss: 2.360170, ppl: 10.592751 +epoch: 1, batch: 19805, sum loss: 4770.277344, avg loss: 2.688995, ppl: 14.716876 +epoch: 1, batch: 19806, sum loss: 4589.729004, avg loss: 2.503944, ppl: 12.230636 +epoch: 1, batch: 19807, sum loss: 4020.762451, avg loss: 2.367940, ppl: 10.675381 +epoch: 1, batch: 19808, sum loss: 5313.608887, avg loss: 2.950366, ppl: 19.112944 +epoch: 1, batch: 19809, sum loss: 5393.203125, avg loss: 2.904256, ppl: 18.251652 +epoch: 1, batch: 19810, sum loss: 4726.364258, avg loss: 2.785129, ppl: 16.201912 +epoch: 1, batch: 19811, sum loss: 4445.486816, avg loss: 2.768049, ppl: 15.927529 +epoch: 1, batch: 19812, sum loss: 4656.902832, avg loss: 3.014177, ppl: 20.372314 +epoch: 1, batch: 19813, sum loss: 5321.243652, avg loss: 2.836484, ppl: 17.055691 +epoch: 1, batch: 19814, sum loss: 5213.192383, avg loss: 2.897828, ppl: 18.134712 +epoch: 1, batch: 19815, sum loss: 5524.187988, avg loss: 2.875684, ppl: 17.737545 +epoch: 1, batch: 19816, sum loss: 4397.793457, avg loss: 2.646085, ppl: 14.098738 +epoch: 1, batch: 19817, sum loss: 4134.052734, avg loss: 2.595137, ppl: 13.398418 +epoch: 1, batch: 19818, sum loss: 5063.299805, avg loss: 2.955808, ppl: 19.217251 +epoch: 1, batch: 19819, sum loss: 4839.892578, avg loss: 2.805735, ppl: 16.539227 +epoch: 1, batch: 19820, sum loss: 4259.229980, avg loss: 2.660356, ppl: 14.301380 +epoch: 1, batch: 19821, sum loss: 4555.227051, avg loss: 2.600016, ppl: 13.463948 +epoch: 1, batch: 19822, sum loss: 4648.048340, avg loss: 2.692960, ppl: 14.775339 +epoch: 1, batch: 19823, sum loss: 4997.795410, avg loss: 2.658402, ppl: 14.273458 +epoch: 1, batch: 19824, sum loss: 5277.041504, avg loss: 2.710345, ppl: 15.034462 +epoch: 1, batch: 19825, sum loss: 4006.712158, avg loss: 2.685464, ppl: 14.665003 +epoch: 1, batch: 19826, sum loss: 4970.650879, avg loss: 2.543833, ppl: 12.728372 +epoch: 1, batch: 19827, sum loss: 4487.172852, avg loss: 2.442664, ppl: 11.503642 +epoch: 1, batch: 19828, sum loss: 3511.811279, avg loss: 2.480093, ppl: 11.942372 +epoch: 1, batch: 19829, sum loss: 3976.668457, avg loss: 2.590663, ppl: 13.338618 +epoch: 1, batch: 19830, sum loss: 4289.527344, avg loss: 2.407142, ppl: 11.102187 +epoch: 1, batch: 19831, sum loss: 4888.671875, avg loss: 2.807968, ppl: 16.576199 +epoch: 1, batch: 19832, sum loss: 4704.212891, avg loss: 2.723922, ppl: 15.239973 +epoch: 1, batch: 19833, sum loss: 4358.359375, avg loss: 2.595807, ppl: 13.407397 +epoch: 1, batch: 19834, sum loss: 3809.977295, avg loss: 2.533230, ppl: 12.594114 +epoch: 1, batch: 19835, sum loss: 4884.019043, avg loss: 2.854482, ppl: 17.365446 +epoch: 1, batch: 19836, sum loss: 4410.843750, avg loss: 2.694468, ppl: 14.797641 +epoch: 1, batch: 19837, sum loss: 3969.062012, avg loss: 2.439497, ppl: 11.467274 +epoch: 1, batch: 19838, sum loss: 4090.625000, avg loss: 2.607154, ppl: 13.560405 +epoch: 1, batch: 19839, sum loss: 4413.425781, avg loss: 2.811099, ppl: 16.628187 +epoch: 1, batch: 19840, sum loss: 3481.999023, avg loss: 2.332216, ppl: 10.300745 +epoch: 1, batch: 19841, sum loss: 4462.357910, avg loss: 2.638887, ppl: 13.997618 +epoch: 1, batch: 19842, sum loss: 3473.247070, avg loss: 2.383835, ppl: 10.846415 +epoch: 1, batch: 19843, sum loss: 3658.509033, avg loss: 2.364906, ppl: 10.643037 +epoch: 1, batch: 19844, sum loss: 4704.251465, avg loss: 2.735030, ppl: 15.410205 +epoch: 1, batch: 19845, sum loss: 4254.557129, avg loss: 2.669107, ppl: 14.427087 +epoch: 1, batch: 19846, sum loss: 4074.504639, avg loss: 2.441285, ppl: 11.487795 +epoch: 1, batch: 19847, sum loss: 4681.423340, avg loss: 2.808292, ppl: 16.581575 +epoch: 1, batch: 19848, sum loss: 4822.675293, avg loss: 2.689724, ppl: 14.727613 +epoch: 1, batch: 19849, sum loss: 3257.027832, avg loss: 2.363590, ppl: 10.629046 +epoch: 1, batch: 19850, sum loss: 4244.181152, avg loss: 2.524795, ppl: 12.488338 +epoch: 1, batch: 19851, sum loss: 5761.753906, avg loss: 2.976112, ppl: 19.611425 +epoch: 1, batch: 19852, sum loss: 4495.312012, avg loss: 2.820146, ppl: 16.779293 +epoch: 1, batch: 19853, sum loss: 4007.242920, avg loss: 2.306991, ppl: 10.044155 +epoch: 1, batch: 19854, sum loss: 5183.632812, avg loss: 2.754321, ppl: 15.710375 +epoch: 1, batch: 19855, sum loss: 4247.124512, avg loss: 2.642890, ppl: 14.053763 +epoch: 1, batch: 19856, sum loss: 3635.007812, avg loss: 2.536642, ppl: 12.637165 +epoch: 1, batch: 19857, sum loss: 4827.990723, avg loss: 2.958328, ppl: 19.265728 +epoch: 1, batch: 19858, sum loss: 4843.598633, avg loss: 2.993572, ppl: 19.956831 +epoch: 1, batch: 19859, sum loss: 5134.202637, avg loss: 2.688064, ppl: 14.703188 +epoch: 1, batch: 19860, sum loss: 5611.828125, avg loss: 2.673572, ppl: 14.491646 +epoch: 1, batch: 19861, sum loss: 4240.776367, avg loss: 2.608104, ppl: 13.573285 +epoch: 1, batch: 19862, sum loss: 3871.816650, avg loss: 2.432046, ppl: 11.382143 +epoch: 1, batch: 19863, sum loss: 4628.125488, avg loss: 2.576907, ppl: 13.156387 +epoch: 1, batch: 19864, sum loss: 4671.842285, avg loss: 2.956862, ppl: 19.237514 +epoch: 1, batch: 19865, sum loss: 4244.351562, avg loss: 2.546102, ppl: 12.757274 +epoch: 1, batch: 19866, sum loss: 3898.395508, avg loss: 2.735716, ppl: 15.420782 +epoch: 1, batch: 19867, sum loss: 4774.702148, avg loss: 2.835334, ppl: 17.036087 +epoch: 1, batch: 19868, sum loss: 5317.238770, avg loss: 2.894523, ppl: 18.074881 +epoch: 1, batch: 19869, sum loss: 4573.471680, avg loss: 2.634488, ppl: 13.936180 +epoch: 1, batch: 19870, sum loss: 4088.720215, avg loss: 2.538002, ppl: 12.654356 +epoch: 1, batch: 19871, sum loss: 4609.729492, avg loss: 2.676962, ppl: 14.540856 +epoch: 1, batch: 19872, sum loss: 5030.528809, avg loss: 2.690122, ppl: 14.733479 +epoch: 1, batch: 19873, sum loss: 4598.480469, avg loss: 2.927104, ppl: 18.673477 +epoch: 1, batch: 19874, sum loss: 5351.902344, avg loss: 2.730562, ppl: 15.341510 +epoch: 1, batch: 19875, sum loss: 4016.021973, avg loss: 2.514729, ppl: 12.363258 +epoch: 1, batch: 19876, sum loss: 5442.152344, avg loss: 2.924316, ppl: 18.621489 +epoch: 1, batch: 19877, sum loss: 4284.141602, avg loss: 2.523052, ppl: 12.466583 +epoch: 1, batch: 19878, sum loss: 4252.673340, avg loss: 2.626729, ppl: 13.828457 +epoch: 1, batch: 19879, sum loss: 4917.053711, avg loss: 2.855432, ppl: 17.381941 +epoch: 1, batch: 19880, sum loss: 4159.631836, avg loss: 2.725840, ppl: 15.269237 +epoch: 1, batch: 19881, sum loss: 4246.379883, avg loss: 2.955031, ppl: 19.202326 +epoch: 1, batch: 19882, sum loss: 4906.384766, avg loss: 2.682551, ppl: 14.622342 +epoch: 1, batch: 19883, sum loss: 4832.993164, avg loss: 2.655491, ppl: 14.231971 +epoch: 1, batch: 19884, sum loss: 4840.968750, avg loss: 2.683464, ppl: 14.635701 +epoch: 1, batch: 19885, sum loss: 4707.383789, avg loss: 2.686863, ppl: 14.685534 +epoch: 1, batch: 19886, sum loss: 4793.829102, avg loss: 2.616719, ppl: 13.690731 +epoch: 1, batch: 19887, sum loss: 4593.515625, avg loss: 2.901778, ppl: 18.206495 +epoch: 1, batch: 19888, sum loss: 4837.605957, avg loss: 3.004724, ppl: 20.180647 +epoch: 1, batch: 19889, sum loss: 3999.586426, avg loss: 2.471932, ppl: 11.845312 +epoch: 1, batch: 19890, sum loss: 4108.270508, avg loss: 2.690419, ppl: 14.737856 +epoch: 1, batch: 19891, sum loss: 4211.941406, avg loss: 2.555790, ppl: 12.881468 +epoch: 1, batch: 19892, sum loss: 4799.355469, avg loss: 2.648651, ppl: 14.134956 +epoch: 1, batch: 19893, sum loss: 4961.002441, avg loss: 2.919955, ppl: 18.540445 +epoch: 1, batch: 19894, sum loss: 4850.546387, avg loss: 2.693252, ppl: 14.779655 +epoch: 1, batch: 19895, sum loss: 4825.899902, avg loss: 2.984478, ppl: 19.776167 +epoch: 1, batch: 19896, sum loss: 3680.644775, avg loss: 2.351850, ppl: 10.504981 +epoch: 1, batch: 19897, sum loss: 4951.393555, avg loss: 2.773890, ppl: 16.020834 +epoch: 1, batch: 19898, sum loss: 4114.388184, avg loss: 2.573101, ppl: 13.106401 +epoch: 1, batch: 19899, sum loss: 4082.928467, avg loss: 2.610568, ppl: 13.606778 +epoch: 1, batch: 19900, sum loss: 3803.394043, avg loss: 2.350676, ppl: 10.492661 +epoch: 1, batch: 19901, sum loss: 3598.798096, avg loss: 2.594663, ppl: 13.392078 +epoch: 1, batch: 19902, sum loss: 4633.048828, avg loss: 2.701486, ppl: 14.901861 +epoch: 1, batch: 19903, sum loss: 4437.346680, avg loss: 2.572375, ppl: 13.096893 +epoch: 1, batch: 19904, sum loss: 4027.950195, avg loss: 2.692480, ppl: 14.768257 +epoch: 1, batch: 19905, sum loss: 4955.462891, avg loss: 2.648564, ppl: 14.133726 +epoch: 1, batch: 19906, sum loss: 4340.606934, avg loss: 2.711185, ppl: 15.047095 +epoch: 1, batch: 19907, sum loss: 4744.342773, avg loss: 3.012281, ppl: 20.333727 +epoch: 1, batch: 19908, sum loss: 4540.941406, avg loss: 2.796146, ppl: 16.381393 +epoch: 1, batch: 19909, sum loss: 3275.083740, avg loss: 2.544743, ppl: 12.739948 +epoch: 1, batch: 19910, sum loss: 4882.743164, avg loss: 2.732369, ppl: 15.369253 +epoch: 1, batch: 19911, sum loss: 5345.062988, avg loss: 2.778099, ppl: 16.088413 +epoch: 1, batch: 19912, sum loss: 4188.127930, avg loss: 2.390484, ppl: 10.918778 +epoch: 1, batch: 19913, sum loss: 4792.975098, avg loss: 2.770506, ppl: 15.966706 +epoch: 1, batch: 19914, sum loss: 5065.536133, avg loss: 2.953665, ppl: 19.176111 +epoch: 1, batch: 19915, sum loss: 5308.607910, avg loss: 2.942687, ppl: 18.966747 +epoch: 1, batch: 19916, sum loss: 4110.884766, avg loss: 2.461608, ppl: 11.723644 +epoch: 1, batch: 19917, sum loss: 4047.485352, avg loss: 2.512406, ppl: 12.334567 +epoch: 1, batch: 19918, sum loss: 3859.168213, avg loss: 2.512479, ppl: 12.335476 +epoch: 1, batch: 19919, sum loss: 5123.609375, avg loss: 2.953089, ppl: 19.165064 +epoch: 1, batch: 19920, sum loss: 3982.362305, avg loss: 2.582596, ppl: 13.231437 +epoch: 1, batch: 19921, sum loss: 3407.294434, avg loss: 2.585201, ppl: 13.265949 +epoch: 1, batch: 19922, sum loss: 4224.018555, avg loss: 2.423419, ppl: 11.284369 +epoch: 1, batch: 19923, sum loss: 5178.296387, avg loss: 2.721123, ppl: 15.197375 +epoch: 1, batch: 19924, sum loss: 4390.283691, avg loss: 2.635224, ppl: 13.946438 +epoch: 1, batch: 19925, sum loss: 5324.572754, avg loss: 2.646408, ppl: 14.103290 +epoch: 1, batch: 19926, sum loss: 4764.084473, avg loss: 2.829029, ppl: 16.929012 +epoch: 1, batch: 19927, sum loss: 5229.788086, avg loss: 3.154275, ppl: 23.436045 +epoch: 1, batch: 19928, sum loss: 5361.271973, avg loss: 3.018734, ppl: 20.465370 +epoch: 1, batch: 19929, sum loss: 3821.508789, avg loss: 2.535839, ppl: 12.627015 +epoch: 1, batch: 19930, sum loss: 4561.432617, avg loss: 2.877875, ppl: 17.776464 +epoch: 1, batch: 19931, sum loss: 4637.580566, avg loss: 2.716802, ppl: 15.131851 +epoch: 1, batch: 19932, sum loss: 5140.251953, avg loss: 3.059674, ppl: 21.320601 +epoch: 1, batch: 19933, sum loss: 5489.258789, avg loss: 3.047895, ppl: 21.070942 +epoch: 1, batch: 19934, sum loss: 4618.931152, avg loss: 2.858249, ppl: 17.430986 +epoch: 1, batch: 19935, sum loss: 4444.350098, avg loss: 2.539629, ppl: 12.674961 +epoch: 1, batch: 19936, sum loss: 4307.303223, avg loss: 2.677006, ppl: 14.541494 +epoch: 1, batch: 19937, sum loss: 4342.146484, avg loss: 2.454577, ppl: 11.641508 +epoch: 1, batch: 19938, sum loss: 5109.025879, avg loss: 2.773630, ppl: 16.016668 +epoch: 1, batch: 19939, sum loss: 3917.736816, avg loss: 2.679711, ppl: 14.580873 +epoch: 1, batch: 19940, sum loss: 4953.584473, avg loss: 2.759657, ppl: 15.794427 +epoch: 1, batch: 19941, sum loss: 3967.414551, avg loss: 2.474993, ppl: 11.881630 +epoch: 1, batch: 19942, sum loss: 3859.726807, avg loss: 2.661881, ppl: 14.323202 +epoch: 1, batch: 19943, sum loss: 4939.437012, avg loss: 2.744132, ppl: 15.551106 +epoch: 1, batch: 19944, sum loss: 5304.071289, avg loss: 2.695158, ppl: 14.807865 +epoch: 1, batch: 19945, sum loss: 4673.441895, avg loss: 2.656874, ppl: 14.251675 +epoch: 1, batch: 19946, sum loss: 4673.787109, avg loss: 2.670735, ppl: 14.450592 +epoch: 1, batch: 19947, sum loss: 4195.799316, avg loss: 2.610952, ppl: 13.611999 +epoch: 1, batch: 19948, sum loss: 4439.792480, avg loss: 2.860691, ppl: 17.473602 +epoch: 1, batch: 19949, sum loss: 4729.821777, avg loss: 2.718288, ppl: 15.154362 +epoch: 1, batch: 19950, sum loss: 5383.162598, avg loss: 2.969202, ppl: 19.476368 +epoch: 1, batch: 19951, sum loss: 4565.794434, avg loss: 2.770506, ppl: 15.966717 +epoch: 1, batch: 19952, sum loss: 4586.406738, avg loss: 2.579531, ppl: 13.190956 +epoch: 1, batch: 19953, sum loss: 3911.397949, avg loss: 2.444624, ppl: 11.526212 +epoch: 1, batch: 19954, sum loss: 4396.640137, avg loss: 2.730832, ppl: 15.345654 +epoch: 1, batch: 19955, sum loss: 4888.955078, avg loss: 2.698099, ppl: 14.851471 +epoch: 1, batch: 19956, sum loss: 4930.852539, avg loss: 2.870112, ppl: 17.638996 +epoch: 1, batch: 19957, sum loss: 4553.489258, avg loss: 3.019555, ppl: 20.482178 +epoch: 1, batch: 19958, sum loss: 4580.993652, avg loss: 2.492379, ppl: 12.090010 +epoch: 1, batch: 19959, sum loss: 4104.161621, avg loss: 2.507124, ppl: 12.269591 +epoch: 1, batch: 19960, sum loss: 4568.680664, avg loss: 2.563794, ppl: 12.984987 +epoch: 1, batch: 19961, sum loss: 3636.708008, avg loss: 2.485788, ppl: 12.010585 +epoch: 1, batch: 19962, sum loss: 4335.781738, avg loss: 2.691361, ppl: 14.751735 +epoch: 1, batch: 19963, sum loss: 4019.927002, avg loss: 2.583501, ppl: 13.243418 +epoch: 1, batch: 19964, sum loss: 5224.994141, avg loss: 2.776299, ppl: 16.059471 +epoch: 1, batch: 19965, sum loss: 5213.491211, avg loss: 2.749732, ppl: 15.638436 +epoch: 1, batch: 19966, sum loss: 4707.888184, avg loss: 2.560026, ppl: 12.936159 +epoch: 1, batch: 19967, sum loss: 4700.152832, avg loss: 2.489488, ppl: 12.055099 +epoch: 1, batch: 19968, sum loss: 4359.397949, avg loss: 2.667930, ppl: 14.410111 +epoch: 1, batch: 19969, sum loss: 4136.501465, avg loss: 2.490368, ppl: 12.065717 +epoch: 1, batch: 19970, sum loss: 5865.752930, avg loss: 2.977540, ppl: 19.639435 +epoch: 1, batch: 19971, sum loss: 5045.899414, avg loss: 2.746815, ppl: 15.592893 +epoch: 1, batch: 19972, sum loss: 6143.307617, avg loss: 3.135941, ppl: 23.010273 +epoch: 1, batch: 19973, sum loss: 4580.308594, avg loss: 2.913682, ppl: 18.424517 +epoch: 1, batch: 19974, sum loss: 4199.892578, avg loss: 2.499936, ppl: 12.181716 +epoch: 1, batch: 19975, sum loss: 4243.483887, avg loss: 2.593817, ppl: 13.380745 +epoch: 1, batch: 19976, sum loss: 3353.314209, avg loss: 2.589432, ppl: 13.322199 +epoch: 1, batch: 19977, sum loss: 4332.940430, avg loss: 2.621258, ppl: 13.753021 +epoch: 1, batch: 19978, sum loss: 4655.477051, avg loss: 3.017160, ppl: 20.433172 +epoch: 1, batch: 19979, sum loss: 3616.630859, avg loss: 2.195890, ppl: 8.987996 +epoch: 1, batch: 19980, sum loss: 3529.829834, avg loss: 2.475337, ppl: 11.885707 +epoch: 1, batch: 19981, sum loss: 4080.550293, avg loss: 2.753408, ppl: 15.696033 +epoch: 1, batch: 19982, sum loss: 4571.852539, avg loss: 2.695668, ppl: 14.815409 +epoch: 1, batch: 19983, sum loss: 3851.244141, avg loss: 2.520448, ppl: 12.434162 +epoch: 1, batch: 19984, sum loss: 4274.370117, avg loss: 2.726001, ppl: 15.271697 +epoch: 1, batch: 19985, sum loss: 4592.580078, avg loss: 2.706294, ppl: 14.973674 +epoch: 1, batch: 19986, sum loss: 4836.334961, avg loss: 2.677926, ppl: 14.554879 +epoch: 1, batch: 19987, sum loss: 4181.692871, avg loss: 2.336141, ppl: 10.341253 +epoch: 1, batch: 19988, sum loss: 4324.317383, avg loss: 2.503947, ppl: 12.230680 +epoch: 1, batch: 19989, sum loss: 4205.050781, avg loss: 2.592510, ppl: 13.363268 +epoch: 1, batch: 19990, sum loss: 4960.229980, avg loss: 2.907521, ppl: 18.311340 +epoch: 1, batch: 19991, sum loss: 4827.625000, avg loss: 2.713673, ppl: 15.084585 +epoch: 1, batch: 19992, sum loss: 4684.317383, avg loss: 2.788284, ppl: 16.253111 +epoch: 1, batch: 19993, sum loss: 4157.150879, avg loss: 2.784428, ppl: 16.190552 +epoch: 1, batch: 19994, sum loss: 4975.223145, avg loss: 2.846238, ppl: 17.222862 +epoch: 1, batch: 19995, sum loss: 5391.475098, avg loss: 2.892422, ppl: 18.036945 +epoch: 1, batch: 19996, sum loss: 4717.572266, avg loss: 2.617965, ppl: 13.707796 +epoch: 1, batch: 19997, sum loss: 5766.982422, avg loss: 3.020944, ppl: 20.510653 +epoch: 1, batch: 19998, sum loss: 3741.173340, avg loss: 2.531240, ppl: 12.569088 +epoch: 1, batch: 19999, sum loss: 4710.567383, avg loss: 2.955187, ppl: 19.205307 +epoch: 1, batch: 20000, sum loss: 4092.168213, avg loss: 2.667645, ppl: 14.406010 +epoch: 1, batch: 20001, sum loss: 5596.520020, avg loss: 2.846653, ppl: 17.230021 +epoch: 1, batch: 20002, sum loss: 4662.195312, avg loss: 2.673277, ppl: 14.487365 +epoch: 1, batch: 20003, sum loss: 4558.871094, avg loss: 2.492548, ppl: 12.092053 +epoch: 1, batch: 20004, sum loss: 5545.967773, avg loss: 2.823813, ppl: 16.840939 +epoch: 1, batch: 20005, sum loss: 4985.576660, avg loss: 3.034435, ppl: 20.789225 +epoch: 1, batch: 20006, sum loss: 3868.403809, avg loss: 2.380556, ppl: 10.810913 +epoch: 1, batch: 20007, sum loss: 4345.985352, avg loss: 2.773443, ppl: 16.013678 +epoch: 1, batch: 20008, sum loss: 4237.540039, avg loss: 2.645156, ppl: 14.085641 +epoch: 1, batch: 20009, sum loss: 5453.314941, avg loss: 2.885352, ppl: 17.909868 +epoch: 1, batch: 20010, sum loss: 4669.625977, avg loss: 2.686781, ppl: 14.684337 +epoch: 1, batch: 20011, sum loss: 5005.673828, avg loss: 3.204657, ppl: 24.647041 +epoch: 1, batch: 20012, sum loss: 5189.483398, avg loss: 2.828057, ppl: 16.912561 +epoch: 1, batch: 20013, sum loss: 4149.444336, avg loss: 2.555077, ppl: 12.872286 +epoch: 1, batch: 20014, sum loss: 4201.254395, avg loss: 2.569575, ppl: 13.060267 +epoch: 1, batch: 20015, sum loss: 4442.753418, avg loss: 2.740748, ppl: 15.498580 +epoch: 1, batch: 20016, sum loss: 5178.208984, avg loss: 2.803578, ppl: 16.503597 +epoch: 1, batch: 20017, sum loss: 4627.463379, avg loss: 2.612910, ppl: 13.638679 +epoch: 1, batch: 20018, sum loss: 3948.977539, avg loss: 2.486762, ppl: 12.022283 +epoch: 1, batch: 20019, sum loss: 4584.810547, avg loss: 2.773630, ppl: 16.016668 +epoch: 1, batch: 20020, sum loss: 5195.765625, avg loss: 2.813084, ppl: 16.661221 +epoch: 1, batch: 20021, sum loss: 3807.363037, avg loss: 2.277131, ppl: 9.748672 +epoch: 1, batch: 20022, sum loss: 4130.380859, avg loss: 2.468847, ppl: 11.808824 +epoch: 1, batch: 20023, sum loss: 5074.330566, avg loss: 2.642881, ppl: 14.053629 +epoch: 1, batch: 20024, sum loss: 3785.802979, avg loss: 2.575376, ppl: 13.136259 +epoch: 1, batch: 20025, sum loss: 4251.000977, avg loss: 2.570134, ppl: 13.067572 +epoch: 1, batch: 20026, sum loss: 4882.654785, avg loss: 2.809353, ppl: 16.599169 +epoch: 1, batch: 20027, sum loss: 4287.061035, avg loss: 2.585682, ppl: 13.272340 +epoch: 1, batch: 20028, sum loss: 3451.905762, avg loss: 2.329221, ppl: 10.269940 +epoch: 1, batch: 20029, sum loss: 4345.308594, avg loss: 2.609795, ppl: 13.596265 +epoch: 1, batch: 20030, sum loss: 4350.726074, avg loss: 2.575918, ppl: 13.143383 +epoch: 1, batch: 20031, sum loss: 4492.267578, avg loss: 2.793699, ppl: 16.341351 +epoch: 1, batch: 20032, sum loss: 5318.548340, avg loss: 2.907900, ppl: 18.318287 +epoch: 1, batch: 20033, sum loss: 5907.556641, avg loss: 2.832002, ppl: 16.979422 +epoch: 1, batch: 20034, sum loss: 4010.036133, avg loss: 2.641658, ppl: 14.036451 +epoch: 1, batch: 20035, sum loss: 4723.281738, avg loss: 2.632822, ppl: 13.912971 +epoch: 1, batch: 20036, sum loss: 4348.144531, avg loss: 2.556228, ppl: 12.887120 +epoch: 1, batch: 20037, sum loss: 3951.077393, avg loss: 2.508621, ppl: 12.287967 +epoch: 1, batch: 20038, sum loss: 5283.729004, avg loss: 2.817989, ppl: 16.743145 +epoch: 1, batch: 20039, sum loss: 4989.356445, avg loss: 2.714557, ppl: 15.097926 +epoch: 1, batch: 20040, sum loss: 4574.705078, avg loss: 2.774230, ppl: 16.026278 +epoch: 1, batch: 20041, sum loss: 5163.866211, avg loss: 2.761426, ppl: 15.822386 +epoch: 1, batch: 20042, sum loss: 4595.131348, avg loss: 2.769820, ppl: 15.955761 +epoch: 1, batch: 20043, sum loss: 5219.347168, avg loss: 2.870928, ppl: 17.653395 +epoch: 1, batch: 20044, sum loss: 4259.527832, avg loss: 2.665537, ppl: 14.375666 +epoch: 1, batch: 20045, sum loss: 3732.125977, avg loss: 2.306629, ppl: 10.040523 +epoch: 1, batch: 20046, sum loss: 4078.834473, avg loss: 2.711991, ppl: 15.059226 +epoch: 1, batch: 20047, sum loss: 4531.579102, avg loss: 2.537278, ppl: 12.645209 +epoch: 1, batch: 20048, sum loss: 5251.778320, avg loss: 2.896734, ppl: 18.114885 +epoch: 1, batch: 20049, sum loss: 4772.421875, avg loss: 2.736480, ppl: 15.432570 +epoch: 1, batch: 20050, sum loss: 4248.468750, avg loss: 2.637163, ppl: 13.973500 +epoch: 1, batch: 20051, sum loss: 4870.318359, avg loss: 2.791013, ppl: 16.297525 +epoch: 1, batch: 20052, sum loss: 4862.162109, avg loss: 2.731552, ppl: 15.356700 +epoch: 1, batch: 20053, sum loss: 5587.679199, avg loss: 2.843603, ppl: 17.177540 +epoch: 1, batch: 20054, sum loss: 4495.494141, avg loss: 2.582133, ppl: 13.225322 +epoch: 1, batch: 20055, sum loss: 4518.736816, avg loss: 2.702594, ppl: 14.918381 +epoch: 1, batch: 20056, sum loss: 4084.143066, avg loss: 2.750265, ppl: 15.646772 +epoch: 1, batch: 20057, sum loss: 5639.545410, avg loss: 3.046756, ppl: 21.046957 +epoch: 1, batch: 20058, sum loss: 4737.893555, avg loss: 2.765846, ppl: 15.892472 +epoch: 1, batch: 20059, sum loss: 4193.319824, avg loss: 2.724704, ppl: 15.251903 +epoch: 1, batch: 20060, sum loss: 5439.867188, avg loss: 2.948437, ppl: 19.076122 +epoch: 1, batch: 20061, sum loss: 4233.177246, avg loss: 2.701453, ppl: 14.901375 +epoch: 1, batch: 20062, sum loss: 3496.740479, avg loss: 2.460760, ppl: 11.713715 +epoch: 1, batch: 20063, sum loss: 4328.280273, avg loss: 2.535607, ppl: 12.624086 +epoch: 1, batch: 20064, sum loss: 5412.812500, avg loss: 2.764460, ppl: 15.870469 +epoch: 1, batch: 20065, sum loss: 4063.634033, avg loss: 2.623392, ppl: 13.782393 +epoch: 1, batch: 20066, sum loss: 4195.653320, avg loss: 2.604378, ppl: 13.522815 +epoch: 1, batch: 20067, sum loss: 3988.165771, avg loss: 2.452746, ppl: 11.620216 +epoch: 1, batch: 20068, sum loss: 4442.964844, avg loss: 2.522978, ppl: 12.465668 +epoch: 1, batch: 20069, sum loss: 4302.260254, avg loss: 2.618539, ppl: 13.715675 +epoch: 1, batch: 20070, sum loss: 4475.966797, avg loss: 2.543163, ppl: 12.719838 +epoch: 1, batch: 20071, sum loss: 4823.217285, avg loss: 2.711196, ppl: 15.047256 +epoch: 1, batch: 20072, sum loss: 5295.668945, avg loss: 2.919333, ppl: 18.528915 +epoch: 1, batch: 20073, sum loss: 4905.795898, avg loss: 2.696974, ppl: 14.834774 +epoch: 1, batch: 20074, sum loss: 4346.129883, avg loss: 2.759447, ppl: 15.791113 +epoch: 1, batch: 20075, sum loss: 4195.482422, avg loss: 2.699796, ppl: 14.876693 +epoch: 1, batch: 20076, sum loss: 4707.428711, avg loss: 2.798709, ppl: 16.423433 +epoch: 1, batch: 20077, sum loss: 4524.152832, avg loss: 2.478988, ppl: 11.929185 +epoch: 1, batch: 20078, sum loss: 4703.696289, avg loss: 2.831846, ppl: 16.976770 +epoch: 1, batch: 20079, sum loss: 4681.077148, avg loss: 2.894915, ppl: 18.081963 +epoch: 1, batch: 20080, sum loss: 4832.795410, avg loss: 2.847847, ppl: 17.250593 +epoch: 1, batch: 20081, sum loss: 5438.291992, avg loss: 3.072481, ppl: 21.595423 +epoch: 1, batch: 20082, sum loss: 5015.171875, avg loss: 2.897269, ppl: 18.124571 +epoch: 1, batch: 20083, sum loss: 4043.430664, avg loss: 2.877887, ppl: 17.776663 +epoch: 1, batch: 20084, sum loss: 5196.408203, avg loss: 3.047747, ppl: 21.067823 +epoch: 1, batch: 20085, sum loss: 4229.612305, avg loss: 2.594854, ppl: 13.394633 +epoch: 1, batch: 20086, sum loss: 4853.050293, avg loss: 2.779525, ppl: 16.111366 +epoch: 1, batch: 20087, sum loss: 5507.543457, avg loss: 2.757909, ppl: 15.766833 +epoch: 1, batch: 20088, sum loss: 4918.949219, avg loss: 2.653155, ppl: 14.198763 +epoch: 1, batch: 20089, sum loss: 4370.238281, avg loss: 2.765974, ppl: 15.894506 +epoch: 1, batch: 20090, sum loss: 3711.599854, avg loss: 2.321201, ppl: 10.187899 +epoch: 1, batch: 20091, sum loss: 4849.089355, avg loss: 2.715056, ppl: 15.105452 +epoch: 1, batch: 20092, sum loss: 4533.727539, avg loss: 2.655962, ppl: 14.238680 +epoch: 1, batch: 20093, sum loss: 4434.853516, avg loss: 2.919587, ppl: 18.533638 +epoch: 1, batch: 20094, sum loss: 4529.716309, avg loss: 2.789234, ppl: 16.268557 +epoch: 1, batch: 20095, sum loss: 3751.389160, avg loss: 2.521095, ppl: 12.442211 +epoch: 1, batch: 20096, sum loss: 4297.742188, avg loss: 2.666093, ppl: 14.383664 +epoch: 1, batch: 20097, sum loss: 4664.155273, avg loss: 2.786234, ppl: 16.219820 +epoch: 1, batch: 20098, sum loss: 4631.578125, avg loss: 2.843203, ppl: 17.170681 +epoch: 1, batch: 20099, sum loss: 3894.650146, avg loss: 2.605117, ppl: 13.532809 +epoch: 1, batch: 20100, sum loss: 3538.437500, avg loss: 2.522051, ppl: 12.454115 +epoch: 1, batch: 20101, sum loss: 4480.385254, avg loss: 2.638625, ppl: 13.993951 +epoch: 1, batch: 20102, sum loss: 4304.110352, avg loss: 2.521447, ppl: 12.446596 +epoch: 1, batch: 20103, sum loss: 6048.332031, avg loss: 3.059349, ppl: 21.313669 +epoch: 1, batch: 20104, sum loss: 4923.376465, avg loss: 2.923620, ppl: 18.608538 +epoch: 1, batch: 20105, sum loss: 4083.126465, avg loss: 2.403253, ppl: 11.059092 +epoch: 1, batch: 20106, sum loss: 3590.357422, avg loss: 2.592316, ppl: 13.360678 +epoch: 1, batch: 20107, sum loss: 4140.935059, avg loss: 2.394989, ppl: 10.968072 +epoch: 1, batch: 20108, sum loss: 4434.885742, avg loss: 2.796271, ppl: 16.383436 +epoch: 1, batch: 20109, sum loss: 3843.870850, avg loss: 2.530527, ppl: 12.560125 +epoch: 1, batch: 20110, sum loss: 4690.557129, avg loss: 2.708174, ppl: 15.001853 +epoch: 1, batch: 20111, sum loss: 4039.391113, avg loss: 2.496533, ppl: 12.140335 +epoch: 1, batch: 20112, sum loss: 4279.966797, avg loss: 2.609736, ppl: 13.595461 +epoch: 1, batch: 20113, sum loss: 3842.858643, avg loss: 2.516607, ppl: 12.386496 +epoch: 1, batch: 20114, sum loss: 4899.110352, avg loss: 2.546315, ppl: 12.759999 +epoch: 1, batch: 20115, sum loss: 4457.912598, avg loss: 2.438683, ppl: 11.457941 +epoch: 1, batch: 20116, sum loss: 3561.501953, avg loss: 2.454515, ppl: 11.640792 +epoch: 1, batch: 20117, sum loss: 4944.116211, avg loss: 2.879508, ppl: 17.805519 +epoch: 1, batch: 20118, sum loss: 4198.637207, avg loss: 2.643978, ppl: 14.069061 +epoch: 1, batch: 20119, sum loss: 4259.357422, avg loss: 2.424222, ppl: 11.293434 +epoch: 1, batch: 20120, sum loss: 3721.721924, avg loss: 2.398017, ppl: 11.001338 +epoch: 1, batch: 20121, sum loss: 4308.534668, avg loss: 2.604918, ppl: 13.530119 +epoch: 1, batch: 20122, sum loss: 4379.403320, avg loss: 2.647765, ppl: 14.122442 +epoch: 1, batch: 20123, sum loss: 5299.405762, avg loss: 2.752938, ppl: 15.688658 +epoch: 1, batch: 20124, sum loss: 3975.091309, avg loss: 2.685873, ppl: 14.670997 +epoch: 1, batch: 20125, sum loss: 4233.684570, avg loss: 2.464310, ppl: 11.755367 +epoch: 1, batch: 20126, sum loss: 4036.566406, avg loss: 2.571061, ppl: 13.079700 +epoch: 1, batch: 20127, sum loss: 4917.608398, avg loss: 2.802056, ppl: 16.478493 +epoch: 1, batch: 20128, sum loss: 3902.360840, avg loss: 2.479264, ppl: 11.932473 +epoch: 1, batch: 20129, sum loss: 4458.422852, avg loss: 2.580106, ppl: 13.198534 +epoch: 1, batch: 20130, sum loss: 4932.419922, avg loss: 2.824983, ppl: 16.860657 +epoch: 1, batch: 20131, sum loss: 5248.834961, avg loss: 2.872926, ppl: 17.688692 +epoch: 1, batch: 20132, sum loss: 4147.991211, avg loss: 2.544780, ppl: 12.740422 +epoch: 1, batch: 20133, sum loss: 4249.355469, avg loss: 2.589491, ppl: 13.322993 +epoch: 1, batch: 20134, sum loss: 6082.427734, avg loss: 2.800381, ppl: 16.450912 +epoch: 1, batch: 20135, sum loss: 5260.726562, avg loss: 2.851342, ppl: 17.311001 +epoch: 1, batch: 20136, sum loss: 3904.539062, avg loss: 2.592655, ppl: 13.365214 +epoch: 1, batch: 20137, sum loss: 4898.641602, avg loss: 2.836504, ppl: 17.056025 +epoch: 1, batch: 20138, sum loss: 4026.371094, avg loss: 2.323353, ppl: 10.209851 +epoch: 1, batch: 20139, sum loss: 3732.298096, avg loss: 2.465190, ppl: 11.765722 +epoch: 1, batch: 20140, sum loss: 4147.866211, avg loss: 2.552533, ppl: 12.839584 +epoch: 1, batch: 20141, sum loss: 4610.987305, avg loss: 2.754473, ppl: 15.712758 +epoch: 1, batch: 20142, sum loss: 5623.849609, avg loss: 2.921480, ppl: 18.568752 +epoch: 1, batch: 20143, sum loss: 4034.353027, avg loss: 2.825177, ppl: 16.863934 +epoch: 1, batch: 20144, sum loss: 5306.847656, avg loss: 2.787210, ppl: 16.235659 +epoch: 1, batch: 20145, sum loss: 3306.921631, avg loss: 2.628714, ppl: 13.855934 +epoch: 1, batch: 20146, sum loss: 4637.408203, avg loss: 2.630407, ppl: 13.879422 +epoch: 1, batch: 20147, sum loss: 4886.708008, avg loss: 2.786036, ppl: 16.216618 +epoch: 1, batch: 20148, sum loss: 5241.112305, avg loss: 2.805735, ppl: 16.539221 +epoch: 1, batch: 20149, sum loss: 5498.253906, avg loss: 2.916846, ppl: 18.482893 +epoch: 1, batch: 20150, sum loss: 4299.292969, avg loss: 2.741896, ppl: 15.516375 +epoch: 1, batch: 20151, sum loss: 6109.059570, avg loss: 2.944125, ppl: 18.994040 +epoch: 1, batch: 20152, sum loss: 4495.021484, avg loss: 2.839559, ppl: 17.108212 +epoch: 1, batch: 20153, sum loss: 4350.617676, avg loss: 2.697221, ppl: 14.838443 +epoch: 1, batch: 20154, sum loss: 3810.275146, avg loss: 2.620547, ppl: 13.743237 +epoch: 1, batch: 20155, sum loss: 4628.787598, avg loss: 2.946396, ppl: 19.037218 +epoch: 1, batch: 20156, sum loss: 4533.501465, avg loss: 2.885743, ppl: 17.916868 +epoch: 1, batch: 20157, sum loss: 4480.315430, avg loss: 2.824915, ppl: 16.859514 +epoch: 1, batch: 20158, sum loss: 4687.455566, avg loss: 2.605589, ppl: 13.539203 +epoch: 1, batch: 20159, sum loss: 4668.024902, avg loss: 2.682773, ppl: 14.625592 +epoch: 1, batch: 20160, sum loss: 3867.912109, avg loss: 2.662018, ppl: 14.325169 +epoch: 1, batch: 20161, sum loss: 4386.232422, avg loss: 2.566549, ppl: 13.020813 +epoch: 1, batch: 20162, sum loss: 5247.914062, avg loss: 2.976696, ppl: 19.622866 +epoch: 1, batch: 20163, sum loss: 4062.685791, avg loss: 2.360654, ppl: 10.597881 +epoch: 1, batch: 20164, sum loss: 4603.992188, avg loss: 2.690820, ppl: 14.743757 +epoch: 1, batch: 20165, sum loss: 5099.205078, avg loss: 2.840783, ppl: 17.129169 +epoch: 1, batch: 20166, sum loss: 5410.222656, avg loss: 2.822234, ppl: 16.814375 +epoch: 1, batch: 20167, sum loss: 5064.924805, avg loss: 2.772263, ppl: 15.994794 +epoch: 1, batch: 20168, sum loss: 5021.809082, avg loss: 2.802349, ppl: 16.483318 +epoch: 1, batch: 20169, sum loss: 4577.805664, avg loss: 2.692827, ppl: 14.773381 +epoch: 1, batch: 20170, sum loss: 3388.346924, avg loss: 2.195948, ppl: 8.988515 +epoch: 1, batch: 20171, sum loss: 4515.427734, avg loss: 2.830989, ppl: 16.962231 +epoch: 1, batch: 20172, sum loss: 3699.956055, avg loss: 2.379393, ppl: 10.798347 +epoch: 1, batch: 20173, sum loss: 4202.431152, avg loss: 2.739525, ppl: 15.479628 +epoch: 1, batch: 20174, sum loss: 5432.077148, avg loss: 3.046594, ppl: 21.043550 +epoch: 1, batch: 20175, sum loss: 5060.333496, avg loss: 2.825423, ppl: 16.868086 +epoch: 1, batch: 20176, sum loss: 4912.292969, avg loss: 2.841118, ppl: 17.134912 +epoch: 1, batch: 20177, sum loss: 5201.595703, avg loss: 2.958814, ppl: 19.275105 +epoch: 1, batch: 20178, sum loss: 4251.874023, avg loss: 2.622994, ppl: 13.776916 +epoch: 1, batch: 20179, sum loss: 4401.581543, avg loss: 2.669243, ppl: 14.429040 +epoch: 1, batch: 20180, sum loss: 4542.965332, avg loss: 2.648959, ppl: 14.139315 +epoch: 1, batch: 20181, sum loss: 4760.851562, avg loss: 2.820410, ppl: 16.783728 +epoch: 1, batch: 20182, sum loss: 4865.149414, avg loss: 2.762720, ppl: 15.842871 +epoch: 1, batch: 20183, sum loss: 5424.491211, avg loss: 3.050895, ppl: 21.134251 +epoch: 1, batch: 20184, sum loss: 4297.331543, avg loss: 2.585639, ppl: 13.271764 +epoch: 1, batch: 20185, sum loss: 4429.681152, avg loss: 2.817863, ppl: 16.741041 +epoch: 1, batch: 20186, sum loss: 4642.220703, avg loss: 2.764872, ppl: 15.877012 +epoch: 1, batch: 20187, sum loss: 4893.375000, avg loss: 2.643639, ppl: 14.064285 +epoch: 1, batch: 20188, sum loss: 4579.837891, avg loss: 2.794288, ppl: 16.350981 +epoch: 1, batch: 20189, sum loss: 4283.171875, avg loss: 2.549507, ppl: 12.800794 +epoch: 1, batch: 20190, sum loss: 3694.680664, avg loss: 2.400702, ppl: 11.030920 +epoch: 1, batch: 20191, sum loss: 4952.696777, avg loss: 2.457914, ppl: 11.680422 +epoch: 1, batch: 20192, sum loss: 3618.604492, avg loss: 2.521676, ppl: 12.449439 +epoch: 1, batch: 20193, sum loss: 4484.576172, avg loss: 2.849159, ppl: 17.273245 +epoch: 1, batch: 20194, sum loss: 5021.990234, avg loss: 2.711658, ppl: 15.054212 +epoch: 1, batch: 20195, sum loss: 4527.458984, avg loss: 2.803380, ppl: 16.500328 +epoch: 1, batch: 20196, sum loss: 5292.927734, avg loss: 2.905010, ppl: 18.265421 +epoch: 1, batch: 20197, sum loss: 3915.651123, avg loss: 2.441179, ppl: 11.486576 +epoch: 1, batch: 20198, sum loss: 4885.863281, avg loss: 2.734115, ppl: 15.396111 +epoch: 1, batch: 20199, sum loss: 4643.285156, avg loss: 2.599824, ppl: 13.461365 +epoch: 1, batch: 20200, sum loss: 3909.788086, avg loss: 2.705736, ppl: 14.965322 +epoch: 1, batch: 20201, sum loss: 6160.881836, avg loss: 2.964813, ppl: 19.391081 +epoch: 1, batch: 20202, sum loss: 4316.365234, avg loss: 2.745780, ppl: 15.576752 +epoch: 1, batch: 20203, sum loss: 4226.873535, avg loss: 2.680326, ppl: 14.589845 +epoch: 1, batch: 20204, sum loss: 4187.782715, avg loss: 2.591450, ppl: 13.349111 +epoch: 1, batch: 20205, sum loss: 4762.161133, avg loss: 2.811193, ppl: 16.629745 +epoch: 1, batch: 20206, sum loss: 4819.045898, avg loss: 2.823108, ppl: 16.829082 +epoch: 1, batch: 20207, sum loss: 4593.420410, avg loss: 2.554739, ppl: 12.867938 +epoch: 1, batch: 20208, sum loss: 4441.889648, avg loss: 2.710122, ppl: 15.031104 +epoch: 1, batch: 20209, sum loss: 4508.201172, avg loss: 2.733900, ppl: 15.392803 +epoch: 1, batch: 20210, sum loss: 4269.449707, avg loss: 2.310308, ppl: 10.077530 +epoch: 1, batch: 20211, sum loss: 5137.282715, avg loss: 2.811868, ppl: 16.640974 +epoch: 1, batch: 20212, sum loss: 3940.672119, avg loss: 2.542369, ppl: 12.709746 +epoch: 1, batch: 20213, sum loss: 4867.792480, avg loss: 2.518258, ppl: 12.406966 +epoch: 1, batch: 20214, sum loss: 4283.864258, avg loss: 2.502257, ppl: 12.210022 +epoch: 1, batch: 20215, sum loss: 4038.773193, avg loss: 2.556185, ppl: 12.886567 +epoch: 1, batch: 20216, sum loss: 5337.414551, avg loss: 3.094154, ppl: 22.068552 +epoch: 1, batch: 20217, sum loss: 4587.391113, avg loss: 2.495860, ppl: 12.132164 +epoch: 1, batch: 20218, sum loss: 4831.640137, avg loss: 2.881121, ppl: 17.834257 +epoch: 1, batch: 20219, sum loss: 4985.196289, avg loss: 2.743641, ppl: 15.543482 +epoch: 1, batch: 20220, sum loss: 4452.682129, avg loss: 2.793401, ppl: 16.336494 +epoch: 1, batch: 20221, sum loss: 4251.153809, avg loss: 2.377603, ppl: 10.779033 +epoch: 1, batch: 20222, sum loss: 4742.876953, avg loss: 2.559566, ppl: 12.930211 +epoch: 1, batch: 20223, sum loss: 5646.584961, avg loss: 3.104225, ppl: 22.291935 +epoch: 1, batch: 20224, sum loss: 4903.569336, avg loss: 2.894669, ppl: 18.077518 +epoch: 1, batch: 20225, sum loss: 4472.223145, avg loss: 2.710438, ppl: 15.035864 +epoch: 1, batch: 20226, sum loss: 4925.488770, avg loss: 2.878720, ppl: 17.791477 +epoch: 1, batch: 20227, sum loss: 4374.906250, avg loss: 2.824342, ppl: 16.849859 +epoch: 1, batch: 20228, sum loss: 4087.831543, avg loss: 2.517138, ppl: 12.393071 +epoch: 1, batch: 20229, sum loss: 4312.273926, avg loss: 2.574492, ppl: 13.124648 +epoch: 1, batch: 20230, sum loss: 3453.884521, avg loss: 2.279792, ppl: 9.774646 +epoch: 1, batch: 20231, sum loss: 4436.079102, avg loss: 2.843641, ppl: 17.178190 +epoch: 1, batch: 20232, sum loss: 4923.305664, avg loss: 2.762798, ppl: 15.844110 +epoch: 1, batch: 20233, sum loss: 4993.584473, avg loss: 2.983026, ppl: 19.747473 +epoch: 1, batch: 20234, sum loss: 4864.288086, avg loss: 2.705388, ppl: 14.960121 +epoch: 1, batch: 20235, sum loss: 4169.321777, avg loss: 2.637142, ppl: 13.973213 +epoch: 1, batch: 20236, sum loss: 3849.213135, avg loss: 2.512541, ppl: 12.336240 +epoch: 1, batch: 20237, sum loss: 5176.615723, avg loss: 2.771208, ppl: 15.977917 +epoch: 1, batch: 20238, sum loss: 5951.088867, avg loss: 2.972572, ppl: 19.542114 +epoch: 1, batch: 20239, sum loss: 4287.563965, avg loss: 2.717088, ppl: 15.136174 +epoch: 1, batch: 20240, sum loss: 5167.329102, avg loss: 2.906260, ppl: 18.288263 +epoch: 1, batch: 20241, sum loss: 7044.853027, avg loss: 3.039195, ppl: 20.888412 +epoch: 1, batch: 20242, sum loss: 5770.118652, avg loss: 2.908326, ppl: 18.326094 +epoch: 1, batch: 20243, sum loss: 4355.631836, avg loss: 2.700330, ppl: 14.884640 +epoch: 1, batch: 20244, sum loss: 4845.983398, avg loss: 2.758101, ppl: 15.769867 +epoch: 1, batch: 20245, sum loss: 4880.941895, avg loss: 2.797101, ppl: 16.397047 +epoch: 1, batch: 20246, sum loss: 4311.275391, avg loss: 2.653092, ppl: 14.197876 +epoch: 1, batch: 20247, sum loss: 5154.211426, avg loss: 2.838222, ppl: 17.085361 +epoch: 1, batch: 20248, sum loss: 5011.544922, avg loss: 3.013557, ppl: 20.359686 +epoch: 1, batch: 20249, sum loss: 4474.353516, avg loss: 2.728264, ppl: 15.306297 +epoch: 1, batch: 20250, sum loss: 4789.464844, avg loss: 2.784573, ppl: 16.192896 +epoch: 1, batch: 20251, sum loss: 4350.505371, avg loss: 2.722469, ppl: 15.217850 +epoch: 1, batch: 20252, sum loss: 3523.687988, avg loss: 2.522325, ppl: 12.457524 +epoch: 1, batch: 20253, sum loss: 4741.611328, avg loss: 2.820709, ppl: 16.788742 +epoch: 1, batch: 20254, sum loss: 5697.923828, avg loss: 2.919019, ppl: 18.523098 +epoch: 1, batch: 20255, sum loss: 4682.634277, avg loss: 2.539390, ppl: 12.671934 +epoch: 1, batch: 20256, sum loss: 4431.930664, avg loss: 2.463552, ppl: 11.746464 +epoch: 1, batch: 20257, sum loss: 4446.291016, avg loss: 2.651336, ppl: 14.172964 +epoch: 1, batch: 20258, sum loss: 5013.663574, avg loss: 2.672529, ppl: 14.476527 +epoch: 1, batch: 20259, sum loss: 4456.658203, avg loss: 2.684734, ppl: 14.654301 +epoch: 1, batch: 20260, sum loss: 3817.730713, avg loss: 2.511665, ppl: 12.325437 +epoch: 1, batch: 20261, sum loss: 4653.806641, avg loss: 2.849851, ppl: 17.285204 +epoch: 1, batch: 20262, sum loss: 4496.895508, avg loss: 2.537751, ppl: 12.651192 +epoch: 1, batch: 20263, sum loss: 4538.710449, avg loss: 2.696798, ppl: 14.832157 +epoch: 1, batch: 20264, sum loss: 4788.884277, avg loss: 2.656065, ppl: 14.240137 +epoch: 1, batch: 20265, sum loss: 4568.403809, avg loss: 2.742139, ppl: 15.520149 +epoch: 1, batch: 20266, sum loss: 4490.261230, avg loss: 2.624349, ppl: 13.795595 +epoch: 1, batch: 20267, sum loss: 5006.851074, avg loss: 2.800252, ppl: 16.448795 +epoch: 1, batch: 20268, sum loss: 3670.651855, avg loss: 2.295592, ppl: 9.930314 +epoch: 1, batch: 20269, sum loss: 4364.308594, avg loss: 2.765722, ppl: 15.890502 +epoch: 1, batch: 20270, sum loss: 4552.630859, avg loss: 2.745857, ppl: 15.577958 +epoch: 1, batch: 20271, sum loss: 5149.436035, avg loss: 2.997344, ppl: 20.032251 +epoch: 1, batch: 20272, sum loss: 4865.458496, avg loss: 2.867094, ppl: 17.585840 +epoch: 1, batch: 20273, sum loss: 3715.326172, avg loss: 2.653804, ppl: 14.207988 +epoch: 1, batch: 20274, sum loss: 5374.409668, avg loss: 2.916120, ppl: 18.469492 +epoch: 1, batch: 20275, sum loss: 3862.842041, avg loss: 2.474595, ppl: 11.876891 +epoch: 1, batch: 20276, sum loss: 4097.636719, avg loss: 2.341507, ppl: 10.396890 +epoch: 1, batch: 20277, sum loss: 4720.473633, avg loss: 2.501576, ppl: 12.201705 +epoch: 1, batch: 20278, sum loss: 4398.402344, avg loss: 2.675427, ppl: 14.518551 +epoch: 1, batch: 20279, sum loss: 3033.234375, avg loss: 2.270385, ppl: 9.683128 +epoch: 1, batch: 20280, sum loss: 3622.303223, avg loss: 2.482730, ppl: 11.973910 +epoch: 1, batch: 20281, sum loss: 3856.354736, avg loss: 2.494408, ppl: 12.114558 +epoch: 1, batch: 20282, sum loss: 4983.966309, avg loss: 2.789013, ppl: 16.264961 +epoch: 1, batch: 20283, sum loss: 4270.553223, avg loss: 2.586647, ppl: 13.285146 +epoch: 1, batch: 20284, sum loss: 5325.407227, avg loss: 2.832663, ppl: 16.990652 +epoch: 1, batch: 20285, sum loss: 4724.040039, avg loss: 2.722790, ppl: 15.222731 +epoch: 1, batch: 20286, sum loss: 4853.652832, avg loss: 3.115310, ppl: 22.540415 +epoch: 1, batch: 20287, sum loss: 4384.399414, avg loss: 2.676678, ppl: 14.536728 +epoch: 1, batch: 20288, sum loss: 5062.167969, avg loss: 2.721596, ppl: 15.204566 +epoch: 1, batch: 20289, sum loss: 4631.024414, avg loss: 3.120636, ppl: 22.660793 +epoch: 1, batch: 20290, sum loss: 5370.524414, avg loss: 2.907701, ppl: 18.314653 +epoch: 1, batch: 20291, sum loss: 3937.590332, avg loss: 2.447228, ppl: 11.556271 +epoch: 1, batch: 20292, sum loss: 4707.793457, avg loss: 2.729156, ppl: 15.319948 +epoch: 1, batch: 20293, sum loss: 4453.049316, avg loss: 2.847218, ppl: 17.239759 +epoch: 1, batch: 20294, sum loss: 4161.755859, avg loss: 2.588156, ppl: 13.305221 +epoch: 1, batch: 20295, sum loss: 4838.996094, avg loss: 2.824866, ppl: 16.858690 +epoch: 1, batch: 20296, sum loss: 4958.307129, avg loss: 2.976175, ppl: 19.612646 +epoch: 1, batch: 20297, sum loss: 4250.996094, avg loss: 2.640370, ppl: 14.018392 +epoch: 1, batch: 20298, sum loss: 5137.482910, avg loss: 3.157642, ppl: 23.515074 +epoch: 1, batch: 20299, sum loss: 3912.051270, avg loss: 2.578808, ppl: 13.181411 +epoch: 1, batch: 20300, sum loss: 5115.220703, avg loss: 2.789106, ppl: 16.266474 +epoch: 1, batch: 20301, sum loss: 4283.180176, avg loss: 2.483003, ppl: 11.977180 +epoch: 1, batch: 20302, sum loss: 3783.939941, avg loss: 2.512576, ppl: 12.336673 +epoch: 1, batch: 20303, sum loss: 5291.674316, avg loss: 2.816218, ppl: 16.713528 +epoch: 1, batch: 20304, sum loss: 5115.561523, avg loss: 2.786253, ppl: 16.220137 +epoch: 1, batch: 20305, sum loss: 4808.507812, avg loss: 2.792397, ppl: 16.320093 +epoch: 1, batch: 20306, sum loss: 5042.210938, avg loss: 2.858396, ppl: 17.433546 +epoch: 1, batch: 20307, sum loss: 4871.021484, avg loss: 2.809124, ppl: 16.595379 +epoch: 1, batch: 20308, sum loss: 4711.624023, avg loss: 2.840039, ppl: 17.116425 +epoch: 1, batch: 20309, sum loss: 4462.424316, avg loss: 2.665725, ppl: 14.378377 +epoch: 1, batch: 20310, sum loss: 4480.869629, avg loss: 2.678344, ppl: 14.560964 +epoch: 1, batch: 20311, sum loss: 4065.480469, avg loss: 2.485013, ppl: 12.001270 +epoch: 1, batch: 20312, sum loss: 4047.931641, avg loss: 2.593166, ppl: 13.372036 +epoch: 1, batch: 20313, sum loss: 4091.410889, avg loss: 2.493242, ppl: 12.100446 +epoch: 1, batch: 20314, sum loss: 4327.200684, avg loss: 2.467047, ppl: 11.787586 +epoch: 1, batch: 20315, sum loss: 3823.327393, avg loss: 2.634960, ppl: 13.942758 +epoch: 1, batch: 20316, sum loss: 3629.615723, avg loss: 2.503183, ppl: 12.221337 +epoch: 1, batch: 20317, sum loss: 3682.696777, avg loss: 2.608142, ppl: 13.573809 +epoch: 1, batch: 20318, sum loss: 5029.456055, avg loss: 2.768000, ppl: 15.926750 +epoch: 1, batch: 20319, sum loss: 4958.793457, avg loss: 2.932462, ppl: 18.773798 +epoch: 1, batch: 20320, sum loss: 4293.003418, avg loss: 2.760774, ppl: 15.812078 +epoch: 1, batch: 20321, sum loss: 4004.166260, avg loss: 2.674794, ppl: 14.509356 +epoch: 1, batch: 20322, sum loss: 4601.335449, avg loss: 2.689267, ppl: 14.720881 +epoch: 1, batch: 20323, sum loss: 4716.160645, avg loss: 2.795590, ppl: 16.372292 +epoch: 1, batch: 20324, sum loss: 4745.959961, avg loss: 2.667768, ppl: 14.407779 +epoch: 1, batch: 20325, sum loss: 4631.289062, avg loss: 2.753442, ppl: 15.696564 +epoch: 1, batch: 20326, sum loss: 5748.392578, avg loss: 2.736027, ppl: 15.425577 +epoch: 1, batch: 20327, sum loss: 4060.971436, avg loss: 2.517651, ppl: 12.399440 +epoch: 1, batch: 20328, sum loss: 4042.158203, avg loss: 2.404615, ppl: 11.074167 +epoch: 1, batch: 20329, sum loss: 4569.673828, avg loss: 2.794908, ppl: 16.361116 +epoch: 1, batch: 20330, sum loss: 4001.054688, avg loss: 2.611655, ppl: 13.621570 +epoch: 1, batch: 20331, sum loss: 3921.038086, avg loss: 2.523190, ppl: 12.468313 +epoch: 1, batch: 20332, sum loss: 3408.577148, avg loss: 2.344276, ppl: 10.425721 +epoch: 1, batch: 20333, sum loss: 5830.319824, avg loss: 2.974653, ppl: 19.582823 +epoch: 1, batch: 20334, sum loss: 5223.389648, avg loss: 2.673178, ppl: 14.485931 +epoch: 1, batch: 20335, sum loss: 4685.846680, avg loss: 2.834753, ppl: 17.026192 +epoch: 1, batch: 20336, sum loss: 4093.746582, avg loss: 2.723717, ppl: 15.236852 +epoch: 1, batch: 20337, sum loss: 5554.454102, avg loss: 3.263486, ppl: 26.140514 +epoch: 1, batch: 20338, sum loss: 4882.308594, avg loss: 2.883821, ppl: 17.882471 +epoch: 1, batch: 20339, sum loss: 4943.033691, avg loss: 2.797416, ppl: 16.402205 +epoch: 1, batch: 20340, sum loss: 4874.749023, avg loss: 2.611006, ppl: 13.612745 +epoch: 1, batch: 20341, sum loss: 3870.144531, avg loss: 2.627389, ppl: 13.837599 +epoch: 1, batch: 20342, sum loss: 5044.526367, avg loss: 2.850015, ppl: 17.288040 +epoch: 1, batch: 20343, sum loss: 4628.065918, avg loss: 2.830621, ppl: 16.955992 +epoch: 1, batch: 20344, sum loss: 6002.332031, avg loss: 2.936562, ppl: 18.850918 +epoch: 1, batch: 20345, sum loss: 5700.838379, avg loss: 2.721164, ppl: 15.198002 +epoch: 1, batch: 20346, sum loss: 4562.376465, avg loss: 2.644856, ppl: 14.081417 +epoch: 1, batch: 20347, sum loss: 4999.156250, avg loss: 2.788152, ppl: 16.250959 +epoch: 1, batch: 20348, sum loss: 4899.805664, avg loss: 2.838821, ppl: 17.095600 +epoch: 1, batch: 20349, sum loss: 4992.588867, avg loss: 2.694328, ppl: 14.795567 +epoch: 1, batch: 20350, sum loss: 4555.021484, avg loss: 2.769010, ppl: 15.942840 +epoch: 1, batch: 20351, sum loss: 4351.241699, avg loss: 2.757441, ppl: 15.759459 +epoch: 1, batch: 20352, sum loss: 4382.680176, avg loss: 2.552522, ppl: 12.839443 +epoch: 1, batch: 20353, sum loss: 4622.597656, avg loss: 2.719175, ppl: 15.167809 +epoch: 1, batch: 20354, sum loss: 4837.889160, avg loss: 2.683244, ppl: 14.632487 +epoch: 1, batch: 20355, sum loss: 4607.245117, avg loss: 2.762137, ppl: 15.833650 +epoch: 1, batch: 20356, sum loss: 4284.391113, avg loss: 2.548716, ppl: 12.790665 +epoch: 1, batch: 20357, sum loss: 4817.056641, avg loss: 2.903590, ppl: 18.239515 +epoch: 1, batch: 20358, sum loss: 4314.319336, avg loss: 2.808802, ppl: 16.590025 +epoch: 1, batch: 20359, sum loss: 4456.976562, avg loss: 2.756325, ppl: 15.741878 +epoch: 1, batch: 20360, sum loss: 4285.512207, avg loss: 2.597280, ppl: 13.427167 +epoch: 1, batch: 20361, sum loss: 3749.472168, avg loss: 2.359643, ppl: 10.587169 +epoch: 1, batch: 20362, sum loss: 3840.973633, avg loss: 2.426389, ppl: 11.317936 +epoch: 1, batch: 20363, sum loss: 4502.448242, avg loss: 2.750427, ppl: 15.649305 +epoch: 1, batch: 20364, sum loss: 4048.354248, avg loss: 2.670419, ppl: 14.446013 +epoch: 1, batch: 20365, sum loss: 4475.850586, avg loss: 2.611348, ppl: 13.617396 +epoch: 1, batch: 20366, sum loss: 4228.162598, avg loss: 2.579721, ppl: 13.193453 +epoch: 1, batch: 20367, sum loss: 5596.446777, avg loss: 2.880312, ppl: 17.819839 +epoch: 1, batch: 20368, sum loss: 4147.415039, avg loss: 2.651800, ppl: 14.179538 +epoch: 1, batch: 20369, sum loss: 4889.902344, avg loss: 2.734845, ppl: 15.407351 +epoch: 1, batch: 20370, sum loss: 4824.768555, avg loss: 2.763327, ppl: 15.852495 +epoch: 1, batch: 20371, sum loss: 4141.176758, avg loss: 2.578566, ppl: 13.178225 +epoch: 1, batch: 20372, sum loss: 5977.658203, avg loss: 3.028195, ppl: 20.659916 +epoch: 1, batch: 20373, sum loss: 4525.744629, avg loss: 2.646634, ppl: 14.106481 +epoch: 1, batch: 20374, sum loss: 4336.363281, avg loss: 2.706843, ppl: 14.981909 +epoch: 1, batch: 20375, sum loss: 3964.440918, avg loss: 2.554408, ppl: 12.863683 +epoch: 1, batch: 20376, sum loss: 5124.256348, avg loss: 2.780389, ppl: 16.125286 +epoch: 1, batch: 20377, sum loss: 4828.495605, avg loss: 2.689970, ppl: 14.731231 +epoch: 1, batch: 20378, sum loss: 4858.123047, avg loss: 2.754038, ppl: 15.705926 +epoch: 1, batch: 20379, sum loss: 4226.913574, avg loss: 2.665141, ppl: 14.369974 +epoch: 1, batch: 20380, sum loss: 3943.495361, avg loss: 2.505397, ppl: 12.248422 +epoch: 1, batch: 20381, sum loss: 4729.116211, avg loss: 2.776932, ppl: 16.069651 +epoch: 1, batch: 20382, sum loss: 5416.256348, avg loss: 2.884056, ppl: 17.886667 +epoch: 1, batch: 20383, sum loss: 4850.642090, avg loss: 2.899368, ppl: 18.162659 +epoch: 1, batch: 20384, sum loss: 4837.074219, avg loss: 2.805728, ppl: 16.539108 +epoch: 1, batch: 20385, sum loss: 5157.620117, avg loss: 2.807632, ppl: 16.570637 +epoch: 1, batch: 20386, sum loss: 4994.168457, avg loss: 2.720135, ppl: 15.182375 +epoch: 1, batch: 20387, sum loss: 4871.008789, avg loss: 2.739600, ppl: 15.480794 +epoch: 1, batch: 20388, sum loss: 4796.546875, avg loss: 2.872184, ppl: 17.675575 +epoch: 1, batch: 20389, sum loss: 4249.000000, avg loss: 2.536717, ppl: 12.638108 +epoch: 1, batch: 20390, sum loss: 3985.877686, avg loss: 2.579856, ppl: 13.195240 +epoch: 1, batch: 20391, sum loss: 4911.640137, avg loss: 2.643509, ppl: 14.062465 +epoch: 1, batch: 20392, sum loss: 4200.853027, avg loss: 2.787560, ppl: 16.241343 +epoch: 1, batch: 20393, sum loss: 4641.030762, avg loss: 2.581218, ppl: 13.213225 +epoch: 1, batch: 20394, sum loss: 4607.150391, avg loss: 2.643230, ppl: 14.058542 +epoch: 1, batch: 20395, sum loss: 3940.982422, avg loss: 2.644955, ppl: 14.082807 +epoch: 1, batch: 20396, sum loss: 4006.004150, avg loss: 2.553221, ppl: 12.848425 +epoch: 1, batch: 20397, sum loss: 4545.354492, avg loss: 2.651899, ppl: 14.180941 +epoch: 1, batch: 20398, sum loss: 4333.427734, avg loss: 2.621553, ppl: 13.757077 +epoch: 1, batch: 20399, sum loss: 5065.871094, avg loss: 2.857231, ppl: 17.413250 +epoch: 1, batch: 20400, sum loss: 4236.802734, avg loss: 2.845402, ppl: 17.208467 +epoch: 1, batch: 20401, sum loss: 3429.563965, avg loss: 2.260754, ppl: 9.590319 +epoch: 1, batch: 20402, sum loss: 6816.010254, avg loss: 3.161415, ppl: 23.603970 +epoch: 1, batch: 20403, sum loss: 4271.991211, avg loss: 2.703792, ppl: 14.936260 +epoch: 1, batch: 20404, sum loss: 4964.629883, avg loss: 2.693776, ppl: 14.787414 +epoch: 1, batch: 20405, sum loss: 5210.951172, avg loss: 2.930794, ppl: 18.742502 +epoch: 1, batch: 20406, sum loss: 4377.911133, avg loss: 2.581316, ppl: 13.214511 +epoch: 1, batch: 20407, sum loss: 4277.106934, avg loss: 2.654939, ppl: 14.224121 +epoch: 1, batch: 20408, sum loss: 5343.533691, avg loss: 3.048222, ppl: 21.077841 +epoch: 1, batch: 20409, sum loss: 4246.159668, avg loss: 2.637366, ppl: 13.976345 +epoch: 1, batch: 20410, sum loss: 4841.549805, avg loss: 2.738433, ppl: 15.462742 +epoch: 1, batch: 20411, sum loss: 4395.404297, avg loss: 2.869063, ppl: 17.620499 +epoch: 1, batch: 20412, sum loss: 3835.558105, avg loss: 2.755430, ppl: 15.727798 +epoch: 1, batch: 20413, sum loss: 4342.888672, avg loss: 2.577382, ppl: 13.162635 +epoch: 1, batch: 20414, sum loss: 4598.851562, avg loss: 2.730910, ppl: 15.346840 +epoch: 1, batch: 20415, sum loss: 4618.164062, avg loss: 2.604717, ppl: 13.527403 +epoch: 1, batch: 20416, sum loss: 4905.762207, avg loss: 2.686617, ppl: 14.681921 +epoch: 1, batch: 20417, sum loss: 4837.085938, avg loss: 2.715938, ppl: 15.118790 +epoch: 1, batch: 20418, sum loss: 4772.960938, avg loss: 2.738360, ppl: 15.461602 +epoch: 1, batch: 20419, sum loss: 4722.837891, avg loss: 2.668270, ppl: 14.415011 +epoch: 1, batch: 20420, sum loss: 4610.072754, avg loss: 2.747362, ppl: 15.601416 +epoch: 1, batch: 20421, sum loss: 4569.548828, avg loss: 2.572944, ppl: 13.104349 +epoch: 1, batch: 20422, sum loss: 4788.271484, avg loss: 2.697618, ppl: 14.844323 +epoch: 1, batch: 20423, sum loss: 4044.940430, avg loss: 2.539197, ppl: 12.669490 +epoch: 1, batch: 20424, sum loss: 4659.124023, avg loss: 2.711946, ppl: 15.058558 +epoch: 1, batch: 20425, sum loss: 4876.676270, avg loss: 2.777151, ppl: 16.073156 +epoch: 1, batch: 20426, sum loss: 4416.240723, avg loss: 2.684645, ppl: 14.652994 +epoch: 1, batch: 20427, sum loss: 3817.766602, avg loss: 2.498538, ppl: 12.164702 +epoch: 1, batch: 20428, sum loss: 4309.577637, avg loss: 2.550046, ppl: 12.807693 +epoch: 1, batch: 20429, sum loss: 3923.746094, avg loss: 2.654768, ppl: 14.221680 +epoch: 1, batch: 20430, sum loss: 4448.969727, avg loss: 2.562771, ppl: 12.971707 +epoch: 1, batch: 20431, sum loss: 4548.817383, avg loss: 2.678927, ppl: 14.569448 +epoch: 1, batch: 20432, sum loss: 4193.987793, avg loss: 2.634414, ppl: 13.935150 +epoch: 1, batch: 20433, sum loss: 3872.068604, avg loss: 2.482095, ppl: 11.966311 +epoch: 1, batch: 20434, sum loss: 4830.208008, avg loss: 2.735112, ppl: 15.411469 +epoch: 1, batch: 20435, sum loss: 4123.788574, avg loss: 2.588693, ppl: 13.312366 +epoch: 1, batch: 20436, sum loss: 5463.951660, avg loss: 3.059324, ppl: 21.313135 +epoch: 1, batch: 20437, sum loss: 4663.647461, avg loss: 2.756293, ppl: 15.741379 +epoch: 1, batch: 20438, sum loss: 4719.407227, avg loss: 2.623350, ppl: 13.781820 +epoch: 1, batch: 20439, sum loss: 3920.154053, avg loss: 2.397648, ppl: 10.997276 +epoch: 1, batch: 20440, sum loss: 4106.731445, avg loss: 2.544443, ppl: 12.736136 +epoch: 1, batch: 20441, sum loss: 4003.277588, avg loss: 2.397172, ppl: 10.992049 +epoch: 1, batch: 20442, sum loss: 4586.881836, avg loss: 2.600273, ppl: 13.467416 +epoch: 1, batch: 20443, sum loss: 5013.989746, avg loss: 2.659941, ppl: 14.295452 +epoch: 1, batch: 20444, sum loss: 5553.000977, avg loss: 2.927254, ppl: 18.676279 +epoch: 1, batch: 20445, sum loss: 5065.450195, avg loss: 2.929699, ppl: 18.722002 +epoch: 1, batch: 20446, sum loss: 4730.640625, avg loss: 2.686337, ppl: 14.677819 +epoch: 1, batch: 20447, sum loss: 3688.585449, avg loss: 2.572235, ppl: 13.095064 +epoch: 1, batch: 20448, sum loss: 3734.755615, avg loss: 2.496495, ppl: 12.139863 +epoch: 1, batch: 20449, sum loss: 4804.753418, avg loss: 2.736192, ppl: 15.428126 +epoch: 1, batch: 20450, sum loss: 4849.266602, avg loss: 2.710602, ppl: 15.038323 +epoch: 1, batch: 20451, sum loss: 4925.952148, avg loss: 2.539151, ppl: 12.668907 +epoch: 1, batch: 20452, sum loss: 4989.594727, avg loss: 2.714687, ppl: 15.099881 +epoch: 1, batch: 20453, sum loss: 4552.140137, avg loss: 2.640453, ppl: 14.019547 +epoch: 1, batch: 20454, sum loss: 4935.111328, avg loss: 2.781912, ppl: 16.149864 +epoch: 1, batch: 20455, sum loss: 5261.516602, avg loss: 2.795705, ppl: 16.374166 +epoch: 1, batch: 20456, sum loss: 5014.858887, avg loss: 2.882103, ppl: 17.851770 +epoch: 1, batch: 20457, sum loss: 5029.875488, avg loss: 2.618363, ppl: 13.713259 +epoch: 1, batch: 20458, sum loss: 4961.286133, avg loss: 2.646019, ppl: 14.097807 +epoch: 1, batch: 20459, sum loss: 5141.634766, avg loss: 2.737825, ppl: 15.453333 +epoch: 1, batch: 20460, sum loss: 4598.822754, avg loss: 2.874264, ppl: 17.712387 +epoch: 1, batch: 20461, sum loss: 5603.236328, avg loss: 3.032054, ppl: 20.739792 +epoch: 1, batch: 20462, sum loss: 3360.185547, avg loss: 2.295209, ppl: 9.926507 +epoch: 1, batch: 20463, sum loss: 5190.182617, avg loss: 2.836165, ppl: 17.050259 +epoch: 1, batch: 20464, sum loss: 3707.085205, avg loss: 2.509875, ppl: 12.303392 +epoch: 1, batch: 20465, sum loss: 4306.420898, avg loss: 2.492142, ppl: 12.087135 +epoch: 1, batch: 20466, sum loss: 4780.550781, avg loss: 2.885064, ppl: 17.904711 +epoch: 1, batch: 20467, sum loss: 5187.621582, avg loss: 2.882012, ppl: 17.850153 +epoch: 1, batch: 20468, sum loss: 4078.233154, avg loss: 2.614252, ppl: 13.656999 +epoch: 1, batch: 20469, sum loss: 5316.182617, avg loss: 2.773178, ppl: 16.009438 +epoch: 1, batch: 20470, sum loss: 4385.667480, avg loss: 2.592002, ppl: 13.356486 +epoch: 1, batch: 20471, sum loss: 4614.586426, avg loss: 2.725686, ppl: 15.266885 +epoch: 1, batch: 20472, sum loss: 4243.873047, avg loss: 2.659068, ppl: 14.282976 +epoch: 1, batch: 20473, sum loss: 5263.384766, avg loss: 2.955297, ppl: 19.207436 +epoch: 1, batch: 20474, sum loss: 5649.092285, avg loss: 2.933070, ppl: 18.785212 +epoch: 1, batch: 20475, sum loss: 4003.458252, avg loss: 2.549973, ppl: 12.806765 +epoch: 1, batch: 20476, sum loss: 4718.657715, avg loss: 2.762680, ppl: 15.842244 +epoch: 1, batch: 20477, sum loss: 3367.475342, avg loss: 2.263088, ppl: 9.612730 +epoch: 1, batch: 20478, sum loss: 4933.961426, avg loss: 2.798617, ppl: 16.421917 +epoch: 1, batch: 20479, sum loss: 5196.649414, avg loss: 3.014298, ppl: 20.374783 +epoch: 1, batch: 20480, sum loss: 4517.685547, avg loss: 2.367760, ppl: 10.673453 +epoch: 1, batch: 20481, sum loss: 4693.859375, avg loss: 2.626670, ppl: 13.827649 +epoch: 1, batch: 20482, sum loss: 4619.396484, avg loss: 2.685696, ppl: 14.668402 +epoch: 1, batch: 20483, sum loss: 5068.215332, avg loss: 2.577933, ppl: 13.169883 +epoch: 1, batch: 20484, sum loss: 4422.375000, avg loss: 2.429877, ppl: 11.357480 +epoch: 1, batch: 20485, sum loss: 4610.635254, avg loss: 2.551541, ppl: 12.826859 +epoch: 1, batch: 20486, sum loss: 3633.535645, avg loss: 2.280939, ppl: 9.785864 +epoch: 1, batch: 20487, sum loss: 4747.625488, avg loss: 2.832712, ppl: 16.991482 +epoch: 1, batch: 20488, sum loss: 5341.729492, avg loss: 2.888983, ppl: 17.975019 +epoch: 1, batch: 20489, sum loss: 5345.294922, avg loss: 2.840220, ppl: 17.119539 +epoch: 1, batch: 20490, sum loss: 4619.166992, avg loss: 2.587769, ppl: 13.300063 +epoch: 1, batch: 20491, sum loss: 4774.900391, avg loss: 2.830409, ppl: 16.952398 +epoch: 1, batch: 20492, sum loss: 4911.467773, avg loss: 2.882317, ppl: 17.855593 +epoch: 1, batch: 20493, sum loss: 4908.419922, avg loss: 2.832325, ppl: 16.984909 +epoch: 1, batch: 20494, sum loss: 4133.645508, avg loss: 2.572275, ppl: 13.095582 +epoch: 1, batch: 20495, sum loss: 4880.647461, avg loss: 2.760547, ppl: 15.808495 +epoch: 1, batch: 20496, sum loss: 3804.610840, avg loss: 2.643927, ppl: 14.068339 +epoch: 1, batch: 20497, sum loss: 4725.711914, avg loss: 2.663874, ppl: 14.351775 +epoch: 1, batch: 20498, sum loss: 3560.624512, avg loss: 2.597100, ppl: 13.424753 +epoch: 1, batch: 20499, sum loss: 5053.463867, avg loss: 2.791969, ppl: 16.313110 +epoch: 1, batch: 20500, sum loss: 4434.812500, avg loss: 2.526959, ppl: 12.515386 +epoch: 1, batch: 20501, sum loss: 4103.154297, avg loss: 2.820037, ppl: 16.777477 +epoch: 1, batch: 20502, sum loss: 3776.188965, avg loss: 2.482702, ppl: 11.973568 +epoch: 1, batch: 20503, sum loss: 3958.575684, avg loss: 2.550629, ppl: 12.815161 +epoch: 1, batch: 20504, sum loss: 4311.098633, avg loss: 2.793972, ppl: 16.345818 +epoch: 1, batch: 20505, sum loss: 3754.762207, avg loss: 2.596654, ppl: 13.418769 +epoch: 1, batch: 20506, sum loss: 4210.963867, avg loss: 2.490221, ppl: 12.063945 +epoch: 1, batch: 20507, sum loss: 5528.288574, avg loss: 2.891364, ppl: 18.017876 +epoch: 1, batch: 20508, sum loss: 4825.390625, avg loss: 2.491167, ppl: 12.075360 +epoch: 1, batch: 20509, sum loss: 4700.958008, avg loss: 2.938099, ppl: 18.879915 +epoch: 1, batch: 20510, sum loss: 4557.075195, avg loss: 2.526095, ppl: 12.504580 +epoch: 1, batch: 20511, sum loss: 4541.562500, avg loss: 2.803434, ppl: 16.501209 +epoch: 1, batch: 20512, sum loss: 4817.379883, avg loss: 3.031705, ppl: 20.732559 +epoch: 1, batch: 20513, sum loss: 4110.982910, avg loss: 2.647124, ppl: 14.113384 +epoch: 1, batch: 20514, sum loss: 4439.449219, avg loss: 2.735335, ppl: 15.414912 +epoch: 1, batch: 20515, sum loss: 3441.325195, avg loss: 2.360305, ppl: 10.594186 +epoch: 1, batch: 20516, sum loss: 4623.067871, avg loss: 2.721052, ppl: 15.196306 +epoch: 1, batch: 20517, sum loss: 3550.151367, avg loss: 2.448380, ppl: 11.569592 +epoch: 1, batch: 20518, sum loss: 4153.263672, avg loss: 2.417499, ppl: 11.217772 +epoch: 1, batch: 20519, sum loss: 4119.517090, avg loss: 2.541343, ppl: 12.696711 +epoch: 1, batch: 20520, sum loss: 3920.952148, avg loss: 2.656472, ppl: 14.245936 +epoch: 1, batch: 20521, sum loss: 4444.182129, avg loss: 2.867214, ppl: 17.587957 +epoch: 1, batch: 20522, sum loss: 4688.896484, avg loss: 2.694768, ppl: 14.802087 +epoch: 1, batch: 20523, sum loss: 4953.526367, avg loss: 2.856705, ppl: 17.404085 +epoch: 1, batch: 20524, sum loss: 4687.123047, avg loss: 3.073523, ppl: 21.617935 +epoch: 1, batch: 20525, sum loss: 4651.732910, avg loss: 2.518534, ppl: 12.410395 +epoch: 1, batch: 20526, sum loss: 4800.528809, avg loss: 2.695412, ppl: 14.811623 +epoch: 1, batch: 20527, sum loss: 4458.706055, avg loss: 2.725371, ppl: 15.262070 +epoch: 1, batch: 20528, sum loss: 4236.783203, avg loss: 2.529423, ppl: 12.546265 +epoch: 1, batch: 20529, sum loss: 5525.166016, avg loss: 3.173559, ppl: 23.892365 +epoch: 1, batch: 20530, sum loss: 4235.339355, avg loss: 2.596775, ppl: 13.420382 +epoch: 1, batch: 20531, sum loss: 4095.809570, avg loss: 2.429306, ppl: 11.351000 +epoch: 1, batch: 20532, sum loss: 4185.066406, avg loss: 2.735337, ppl: 15.414945 +epoch: 1, batch: 20533, sum loss: 4627.425293, avg loss: 2.752781, ppl: 15.686197 +epoch: 1, batch: 20534, sum loss: 3829.253418, avg loss: 2.519246, ppl: 12.419228 +epoch: 1, batch: 20535, sum loss: 5304.911133, avg loss: 3.054065, ppl: 21.201357 +epoch: 1, batch: 20536, sum loss: 4983.139648, avg loss: 2.724516, ppl: 15.249034 +epoch: 1, batch: 20537, sum loss: 4615.146484, avg loss: 2.653908, ppl: 14.209464 +epoch: 1, batch: 20538, sum loss: 3708.380371, avg loss: 2.386345, ppl: 10.873679 +epoch: 1, batch: 20539, sum loss: 4369.450195, avg loss: 2.713944, ppl: 15.088671 +epoch: 1, batch: 20540, sum loss: 4673.854004, avg loss: 2.689214, ppl: 14.720101 +epoch: 1, batch: 20541, sum loss: 4126.975586, avg loss: 2.542807, ppl: 12.715311 +epoch: 1, batch: 20542, sum loss: 3524.781982, avg loss: 2.309818, ppl: 10.072589 +epoch: 1, batch: 20543, sum loss: 4365.166016, avg loss: 2.755787, ppl: 15.733413 +epoch: 1, batch: 20544, sum loss: 4398.381348, avg loss: 2.801517, ppl: 16.469608 +epoch: 1, batch: 20545, sum loss: 4850.448242, avg loss: 2.757503, ppl: 15.760445 +epoch: 1, batch: 20546, sum loss: 4907.769043, avg loss: 2.701029, ppl: 14.895044 +epoch: 1, batch: 20547, sum loss: 3713.965820, avg loss: 2.523075, ppl: 12.466868 +epoch: 1, batch: 20548, sum loss: 4826.228516, avg loss: 2.946415, ppl: 19.037584 +epoch: 1, batch: 20549, sum loss: 5591.781738, avg loss: 2.909356, ppl: 18.344978 +epoch: 1, batch: 20550, sum loss: 4879.837891, avg loss: 2.714036, ppl: 15.090063 +epoch: 1, batch: 20551, sum loss: 3967.572754, avg loss: 2.473549, ppl: 11.864481 +epoch: 1, batch: 20552, sum loss: 4906.356445, avg loss: 2.802031, ppl: 16.478085 +epoch: 1, batch: 20553, sum loss: 6105.111328, avg loss: 2.943641, ppl: 18.984844 +epoch: 1, batch: 20554, sum loss: 3810.066895, avg loss: 2.447057, ppl: 11.554287 +epoch: 1, batch: 20555, sum loss: 3770.499756, avg loss: 2.333230, ppl: 10.311193 +epoch: 1, batch: 20556, sum loss: 4689.149902, avg loss: 2.659756, ppl: 14.292801 +epoch: 1, batch: 20557, sum loss: 4033.891602, avg loss: 2.412615, ppl: 11.163113 +epoch: 1, batch: 20558, sum loss: 4248.713867, avg loss: 2.436189, ppl: 11.429399 +epoch: 1, batch: 20559, sum loss: 4346.772461, avg loss: 2.704899, ppl: 14.952804 +epoch: 1, batch: 20560, sum loss: 5262.270020, avg loss: 2.746488, ppl: 15.587786 +epoch: 1, batch: 20561, sum loss: 4073.232910, avg loss: 2.506605, ppl: 12.263225 +epoch: 1, batch: 20562, sum loss: 4511.217773, avg loss: 2.871558, ppl: 17.664522 +epoch: 1, batch: 20563, sum loss: 4612.976074, avg loss: 2.732806, ppl: 15.375967 +epoch: 1, batch: 20564, sum loss: 4211.487793, avg loss: 2.675659, ppl: 14.521919 +epoch: 1, batch: 20565, sum loss: 3771.741455, avg loss: 2.601201, ppl: 13.479918 +epoch: 1, batch: 20566, sum loss: 5310.958496, avg loss: 2.983685, ppl: 19.760490 +epoch: 1, batch: 20567, sum loss: 4251.346680, avg loss: 2.550298, ppl: 12.810921 +epoch: 1, batch: 20568, sum loss: 4258.463867, avg loss: 2.683342, ppl: 14.633922 +epoch: 1, batch: 20569, sum loss: 4904.251953, avg loss: 2.961505, ppl: 19.327032 +epoch: 1, batch: 20570, sum loss: 3358.139404, avg loss: 2.445841, ppl: 11.540246 +epoch: 1, batch: 20571, sum loss: 5687.546387, avg loss: 3.087702, ppl: 21.926628 +epoch: 1, batch: 20572, sum loss: 5194.630859, avg loss: 2.786819, ppl: 16.229315 +epoch: 1, batch: 20573, sum loss: 4958.260254, avg loss: 2.854496, ppl: 17.365690 +epoch: 1, batch: 20574, sum loss: 4945.115234, avg loss: 2.878414, ppl: 17.786041 +epoch: 1, batch: 20575, sum loss: 4282.639160, avg loss: 2.422307, ppl: 11.271836 +epoch: 1, batch: 20576, sum loss: 4950.605469, avg loss: 2.554492, ppl: 12.864762 +epoch: 1, batch: 20577, sum loss: 3705.442139, avg loss: 2.607630, ppl: 13.566856 +epoch: 1, batch: 20578, sum loss: 4209.567383, avg loss: 2.525235, ppl: 12.493836 +epoch: 1, batch: 20579, sum loss: 5141.240234, avg loss: 2.809421, ppl: 16.600306 +epoch: 1, batch: 20580, sum loss: 3675.192139, avg loss: 2.321663, ppl: 10.192607 +epoch: 1, batch: 20581, sum loss: 5395.442871, avg loss: 2.883722, ppl: 17.880693 +epoch: 1, batch: 20582, sum loss: 3470.697266, avg loss: 2.348239, ppl: 10.467123 +epoch: 1, batch: 20583, sum loss: 5452.756836, avg loss: 2.844422, ppl: 17.191620 +epoch: 1, batch: 20584, sum loss: 4652.240723, avg loss: 2.643318, ppl: 14.059782 +epoch: 1, batch: 20585, sum loss: 4259.058594, avg loss: 2.933236, ppl: 18.788328 +epoch: 1, batch: 20586, sum loss: 5291.801758, avg loss: 2.817786, ppl: 16.739744 +epoch: 1, batch: 20587, sum loss: 5639.910156, avg loss: 2.876038, ppl: 17.743830 +epoch: 1, batch: 20588, sum loss: 4417.799805, avg loss: 2.501585, ppl: 12.201821 +epoch: 1, batch: 20589, sum loss: 4945.705078, avg loss: 2.778486, ppl: 16.094635 +epoch: 1, batch: 20590, sum loss: 3848.936768, avg loss: 2.422238, ppl: 11.271060 +epoch: 1, batch: 20591, sum loss: 4822.873047, avg loss: 2.698866, ppl: 14.862866 +epoch: 1, batch: 20592, sum loss: 4000.168457, avg loss: 2.614489, ppl: 13.660238 +epoch: 1, batch: 20593, sum loss: 4166.838867, avg loss: 2.438174, ppl: 11.452107 +epoch: 1, batch: 20594, sum loss: 4279.777344, avg loss: 2.524942, ppl: 12.490176 +epoch: 1, batch: 20595, sum loss: 3985.892090, avg loss: 2.257017, ppl: 9.554544 +epoch: 1, batch: 20596, sum loss: 5169.209473, avg loss: 2.818544, ppl: 16.752436 +epoch: 1, batch: 20597, sum loss: 4878.972656, avg loss: 2.816959, ppl: 16.725908 +epoch: 1, batch: 20598, sum loss: 3598.492920, avg loss: 2.617086, ppl: 13.695752 +epoch: 1, batch: 20599, sum loss: 4716.581543, avg loss: 2.975761, ppl: 19.604540 +epoch: 1, batch: 20600, sum loss: 4388.358398, avg loss: 2.654784, ppl: 14.221916 +epoch: 1, batch: 20601, sum loss: 4460.241699, avg loss: 2.768617, ppl: 15.936577 +epoch: 1, batch: 20602, sum loss: 4181.432617, avg loss: 2.621588, ppl: 13.757553 +epoch: 1, batch: 20603, sum loss: 3668.539795, avg loss: 2.368328, ppl: 10.679520 +epoch: 1, batch: 20604, sum loss: 4343.063965, avg loss: 2.616304, ppl: 13.685045 +epoch: 1, batch: 20605, sum loss: 4289.335449, avg loss: 2.613855, ppl: 13.651571 +epoch: 1, batch: 20606, sum loss: 4489.441406, avg loss: 2.631560, ppl: 13.895428 +epoch: 1, batch: 20607, sum loss: 4281.872559, avg loss: 2.458021, ppl: 11.681670 +epoch: 1, batch: 20608, sum loss: 3953.715576, avg loss: 2.430065, ppl: 11.359619 +epoch: 1, batch: 20609, sum loss: 5342.040527, avg loss: 3.107644, ppl: 22.368290 +epoch: 1, batch: 20610, sum loss: 4355.851074, avg loss: 2.712236, ppl: 15.062918 +epoch: 1, batch: 20611, sum loss: 4806.576172, avg loss: 2.837412, ppl: 17.071529 +epoch: 1, batch: 20612, sum loss: 4937.174805, avg loss: 2.775253, ppl: 16.042683 +epoch: 1, batch: 20613, sum loss: 4818.877930, avg loss: 2.713332, ppl: 15.079439 +epoch: 1, batch: 20614, sum loss: 4027.190186, avg loss: 2.609974, ppl: 13.598702 +epoch: 1, batch: 20615, sum loss: 5297.260254, avg loss: 2.722128, ppl: 15.212655 +epoch: 1, batch: 20616, sum loss: 4787.117188, avg loss: 2.797848, ppl: 16.409292 +epoch: 1, batch: 20617, sum loss: 4241.668457, avg loss: 2.580090, ppl: 13.198330 +epoch: 1, batch: 20618, sum loss: 3968.270508, avg loss: 2.334277, ppl: 10.321994 +epoch: 1, batch: 20619, sum loss: 4830.242676, avg loss: 2.936318, ppl: 18.846321 +epoch: 1, batch: 20620, sum loss: 4255.520508, avg loss: 2.549743, ppl: 12.803809 +epoch: 1, batch: 20621, sum loss: 4628.008789, avg loss: 2.804854, ppl: 16.524662 +epoch: 1, batch: 20622, sum loss: 5286.903320, avg loss: 2.707068, ppl: 14.985271 +epoch: 1, batch: 20623, sum loss: 4707.730469, avg loss: 2.724381, ppl: 15.246973 +epoch: 1, batch: 20624, sum loss: 5555.425293, avg loss: 2.922370, ppl: 18.585281 +epoch: 1, batch: 20625, sum loss: 4456.583984, avg loss: 2.632359, ppl: 13.906541 +epoch: 1, batch: 20626, sum loss: 5076.455078, avg loss: 2.729277, ppl: 15.321804 +epoch: 1, batch: 20627, sum loss: 4943.096680, avg loss: 2.426655, ppl: 11.320951 +epoch: 1, batch: 20628, sum loss: 5014.928711, avg loss: 2.976219, ppl: 19.613512 +epoch: 1, batch: 20629, sum loss: 4507.496094, avg loss: 2.765335, ppl: 15.884361 +epoch: 1, batch: 20630, sum loss: 4707.753906, avg loss: 2.775798, ppl: 16.051435 +epoch: 1, batch: 20631, sum loss: 3646.339600, avg loss: 2.361619, ppl: 10.608112 +epoch: 1, batch: 20632, sum loss: 4341.803223, avg loss: 2.606124, ppl: 13.546449 +epoch: 1, batch: 20633, sum loss: 5582.425293, avg loss: 3.017527, ppl: 20.440687 +epoch: 1, batch: 20634, sum loss: 5700.668945, avg loss: 2.834743, ppl: 17.026033 +epoch: 1, batch: 20635, sum loss: 4247.096191, avg loss: 2.446484, ppl: 11.547675 +epoch: 1, batch: 20636, sum loss: 5215.638184, avg loss: 2.746518, ppl: 15.588264 +epoch: 1, batch: 20637, sum loss: 5025.966797, avg loss: 2.671965, ppl: 14.468377 +epoch: 1, batch: 20638, sum loss: 3764.667480, avg loss: 2.247563, ppl: 9.464642 +epoch: 1, batch: 20639, sum loss: 4145.182129, avg loss: 2.705733, ppl: 14.965276 +epoch: 1, batch: 20640, sum loss: 4449.518555, avg loss: 2.492728, ppl: 12.094218 +epoch: 1, batch: 20641, sum loss: 4760.209961, avg loss: 2.862423, ppl: 17.503895 +epoch: 1, batch: 20642, sum loss: 4672.586914, avg loss: 2.792939, ppl: 16.328939 +epoch: 1, batch: 20643, sum loss: 5376.269043, avg loss: 3.164373, ppl: 23.673889 +epoch: 1, batch: 20644, sum loss: 4413.532227, avg loss: 2.639673, ppl: 14.008615 +epoch: 1, batch: 20645, sum loss: 5196.832031, avg loss: 2.740945, ppl: 15.501633 +epoch: 1, batch: 20646, sum loss: 4578.125977, avg loss: 2.608619, ppl: 13.580280 +epoch: 1, batch: 20647, sum loss: 3767.733398, avg loss: 2.770392, ppl: 15.964894 +epoch: 1, batch: 20648, sum loss: 4857.084961, avg loss: 2.731769, ppl: 15.360032 +epoch: 1, batch: 20649, sum loss: 4424.287598, avg loss: 2.742894, ppl: 15.531864 +epoch: 1, batch: 20650, sum loss: 4177.243164, avg loss: 2.694996, ppl: 14.805454 +epoch: 1, batch: 20651, sum loss: 4588.976562, avg loss: 2.713765, ppl: 15.085970 +epoch: 1, batch: 20652, sum loss: 4246.583008, avg loss: 2.759313, ppl: 15.788994 +epoch: 1, batch: 20653, sum loss: 4317.500488, avg loss: 2.588430, ppl: 13.308856 +epoch: 1, batch: 20654, sum loss: 4778.130859, avg loss: 2.717936, ppl: 15.149015 +epoch: 1, batch: 20655, sum loss: 4683.339844, avg loss: 2.691575, ppl: 14.754890 +epoch: 1, batch: 20656, sum loss: 4754.136719, avg loss: 2.775328, ppl: 16.043888 +epoch: 1, batch: 20657, sum loss: 4700.460449, avg loss: 2.802898, ppl: 16.492374 +epoch: 1, batch: 20658, sum loss: 4298.768066, avg loss: 2.581843, ppl: 13.221478 +epoch: 1, batch: 20659, sum loss: 4249.242676, avg loss: 2.482034, ppl: 11.965580 +epoch: 1, batch: 20660, sum loss: 4678.044434, avg loss: 2.645953, ppl: 14.096872 +epoch: 1, batch: 20661, sum loss: 4376.597168, avg loss: 2.404724, ppl: 11.075372 +epoch: 1, batch: 20662, sum loss: 3663.439453, avg loss: 2.618613, ppl: 13.716685 +epoch: 1, batch: 20663, sum loss: 4318.092773, avg loss: 2.668784, ppl: 14.422423 +epoch: 1, batch: 20664, sum loss: 4302.584961, avg loss: 2.488482, ppl: 12.042978 +epoch: 1, batch: 20665, sum loss: 4393.449219, avg loss: 2.697022, ppl: 14.835488 +epoch: 1, batch: 20666, sum loss: 5197.951172, avg loss: 2.970258, ppl: 19.496944 +epoch: 1, batch: 20667, sum loss: 4965.447266, avg loss: 2.834160, ppl: 17.016094 +epoch: 1, batch: 20668, sum loss: 4569.708008, avg loss: 2.946298, ppl: 19.035353 +epoch: 1, batch: 20669, sum loss: 4530.138184, avg loss: 2.555070, ppl: 12.872196 +epoch: 1, batch: 20670, sum loss: 5215.557617, avg loss: 2.765407, ppl: 15.885506 +epoch: 1, batch: 20671, sum loss: 5971.978027, avg loss: 2.871143, ppl: 17.657194 +epoch: 1, batch: 20672, sum loss: 3887.138916, avg loss: 2.752931, ppl: 15.688550 +epoch: 1, batch: 20673, sum loss: 4625.046875, avg loss: 2.647422, ppl: 14.117601 +epoch: 1, batch: 20674, sum loss: 4471.081543, avg loss: 2.829798, ppl: 16.942045 +epoch: 1, batch: 20675, sum loss: 4629.597656, avg loss: 2.724896, ppl: 15.254823 +epoch: 1, batch: 20676, sum loss: 5423.979492, avg loss: 2.729733, ppl: 15.328790 +epoch: 1, batch: 20677, sum loss: 4521.163086, avg loss: 2.806433, ppl: 16.550772 +epoch: 1, batch: 20678, sum loss: 4430.906250, avg loss: 2.562699, ppl: 12.970777 +epoch: 1, batch: 20679, sum loss: 5601.237793, avg loss: 2.940282, ppl: 18.921188 +epoch: 1, batch: 20680, sum loss: 4886.177246, avg loss: 2.693593, ppl: 14.784698 +epoch: 1, batch: 20681, sum loss: 5796.414062, avg loss: 2.813793, ppl: 16.673042 +epoch: 1, batch: 20682, sum loss: 4741.914062, avg loss: 2.659514, ppl: 14.289349 +epoch: 1, batch: 20683, sum loss: 3658.617676, avg loss: 2.427749, ppl: 11.333341 +epoch: 1, batch: 20684, sum loss: 4900.791992, avg loss: 2.790884, ppl: 16.295420 +epoch: 1, batch: 20685, sum loss: 4345.776367, avg loss: 2.429165, ppl: 11.349402 +epoch: 1, batch: 20686, sum loss: 5033.748047, avg loss: 2.782614, ppl: 16.161207 +epoch: 1, batch: 20687, sum loss: 4613.437012, avg loss: 2.742828, ppl: 15.530847 +epoch: 1, batch: 20688, sum loss: 3913.679932, avg loss: 2.564666, ppl: 12.996314 +epoch: 1, batch: 20689, sum loss: 4915.873047, avg loss: 2.981124, ppl: 19.709951 +epoch: 1, batch: 20690, sum loss: 4239.251465, avg loss: 2.634712, ppl: 13.939297 +epoch: 1, batch: 20691, sum loss: 4790.651367, avg loss: 2.745359, ppl: 15.570202 +epoch: 1, batch: 20692, sum loss: 4941.319336, avg loss: 2.722490, ppl: 15.218169 +epoch: 1, batch: 20693, sum loss: 5231.978027, avg loss: 2.837298, ppl: 17.069588 +epoch: 1, batch: 20694, sum loss: 4636.460449, avg loss: 2.616513, ppl: 13.687907 +epoch: 1, batch: 20695, sum loss: 4228.237305, avg loss: 2.333464, ppl: 10.313610 +epoch: 1, batch: 20696, sum loss: 5155.827637, avg loss: 2.697974, ppl: 14.849612 +epoch: 1, batch: 20697, sum loss: 4727.613281, avg loss: 2.784225, ppl: 16.187263 +epoch: 1, batch: 20698, sum loss: 4258.227051, avg loss: 2.531645, ppl: 12.574174 +epoch: 1, batch: 20699, sum loss: 3608.353027, avg loss: 2.380180, ppl: 10.806849 +epoch: 1, batch: 20700, sum loss: 4959.608887, avg loss: 2.832444, ppl: 16.986921 +epoch: 1, batch: 20701, sum loss: 4009.980469, avg loss: 2.773154, ppl: 16.009047 +epoch: 1, batch: 20702, sum loss: 4195.468262, avg loss: 2.470829, ppl: 11.832257 +epoch: 1, batch: 20703, sum loss: 5375.956055, avg loss: 2.986642, ppl: 19.819025 +epoch: 1, batch: 20704, sum loss: 4934.653809, avg loss: 2.663062, ppl: 14.340130 +epoch: 1, batch: 20705, sum loss: 4257.605469, avg loss: 2.608827, ppl: 13.583107 +epoch: 1, batch: 20706, sum loss: 5751.495117, avg loss: 2.897479, ppl: 18.128378 +epoch: 1, batch: 20707, sum loss: 4245.586914, avg loss: 2.771271, ppl: 15.978930 +epoch: 1, batch: 20708, sum loss: 5149.918457, avg loss: 2.911203, ppl: 18.378902 +epoch: 1, batch: 20709, sum loss: 4309.765625, avg loss: 2.666934, ppl: 14.395764 +epoch: 1, batch: 20710, sum loss: 5306.611328, avg loss: 2.848423, ppl: 17.260532 +epoch: 1, batch: 20711, sum loss: 4675.538086, avg loss: 2.814893, ppl: 16.691399 +epoch: 1, batch: 20712, sum loss: 4805.485352, avg loss: 2.900112, ppl: 18.176174 +epoch: 1, batch: 20713, sum loss: 4264.285645, avg loss: 2.814710, ppl: 16.688337 +epoch: 1, batch: 20714, sum loss: 5550.869629, avg loss: 2.827748, ppl: 16.907347 +epoch: 1, batch: 20715, sum loss: 5039.799805, avg loss: 2.665151, ppl: 14.370114 +epoch: 1, batch: 20716, sum loss: 4463.768555, avg loss: 2.692261, ppl: 14.765021 +epoch: 1, batch: 20717, sum loss: 4223.892578, avg loss: 2.592936, ppl: 13.368962 +epoch: 1, batch: 20718, sum loss: 4082.735352, avg loss: 2.693097, ppl: 14.777376 +epoch: 1, batch: 20719, sum loss: 4317.893555, avg loss: 2.710542, ppl: 15.037427 +epoch: 1, batch: 20720, sum loss: 5033.323730, avg loss: 2.911119, ppl: 18.377342 +epoch: 1, batch: 20721, sum loss: 4086.635742, avg loss: 2.746395, ppl: 15.586340 +epoch: 1, batch: 20722, sum loss: 3705.171875, avg loss: 2.305645, ppl: 10.030648 +epoch: 1, batch: 20723, sum loss: 4846.484375, avg loss: 2.695486, ppl: 14.812717 +epoch: 1, batch: 20724, sum loss: 4504.457031, avg loss: 2.556446, ppl: 12.889920 +epoch: 1, batch: 20725, sum loss: 4512.631836, avg loss: 2.941742, ppl: 18.948820 +epoch: 1, batch: 20726, sum loss: 3929.188477, avg loss: 2.631740, ppl: 13.897936 +epoch: 1, batch: 20727, sum loss: 4096.113281, avg loss: 2.579417, ppl: 13.189440 +epoch: 1, batch: 20728, sum loss: 4543.160156, avg loss: 2.916021, ppl: 18.467651 +epoch: 1, batch: 20729, sum loss: 5132.914062, avg loss: 2.705806, ppl: 14.966375 +epoch: 1, batch: 20730, sum loss: 4358.053223, avg loss: 2.688497, ppl: 14.709545 +epoch: 1, batch: 20731, sum loss: 5018.158203, avg loss: 2.798750, ppl: 16.424099 +epoch: 1, batch: 20732, sum loss: 5318.118652, avg loss: 2.782898, ppl: 16.165804 +epoch: 1, batch: 20733, sum loss: 5263.447754, avg loss: 2.937192, ppl: 18.862804 +epoch: 1, batch: 20734, sum loss: 4631.086426, avg loss: 2.709822, ppl: 15.026607 +epoch: 1, batch: 20735, sum loss: 6024.736328, avg loss: 2.789230, ppl: 16.268482 +epoch: 1, batch: 20736, sum loss: 3857.645508, avg loss: 2.658612, ppl: 14.276453 +epoch: 1, batch: 20737, sum loss: 4637.464844, avg loss: 2.957567, ppl: 19.251072 +epoch: 1, batch: 20738, sum loss: 4406.142090, avg loss: 2.576691, ppl: 13.153543 +epoch: 1, batch: 20739, sum loss: 4336.386719, avg loss: 2.509483, ppl: 12.298572 +epoch: 1, batch: 20740, sum loss: 4393.842285, avg loss: 2.564998, ppl: 13.000637 +epoch: 1, batch: 20741, sum loss: 4728.774414, avg loss: 2.857266, ppl: 17.413847 +epoch: 1, batch: 20742, sum loss: 4714.907227, avg loss: 2.587765, ppl: 13.300010 +epoch: 1, batch: 20743, sum loss: 5004.631836, avg loss: 2.813171, ppl: 16.662678 +epoch: 1, batch: 20744, sum loss: 5214.668457, avg loss: 2.817217, ppl: 16.730223 +epoch: 1, batch: 20745, sum loss: 3371.005859, avg loss: 2.506324, ppl: 12.259781 +epoch: 1, batch: 20746, sum loss: 3779.210449, avg loss: 2.538086, ppl: 12.655428 +epoch: 1, batch: 20747, sum loss: 4368.349121, avg loss: 2.606414, ppl: 13.550366 +epoch: 1, batch: 20748, sum loss: 4484.558594, avg loss: 2.611857, ppl: 13.624331 +epoch: 1, batch: 20749, sum loss: 4846.675293, avg loss: 2.758495, ppl: 15.776076 +epoch: 1, batch: 20750, sum loss: 4416.913086, avg loss: 2.670443, ppl: 14.446372 +epoch: 1, batch: 20751, sum loss: 4409.856445, avg loss: 2.614023, ppl: 13.653870 +epoch: 1, batch: 20752, sum loss: 4480.935059, avg loss: 2.766009, ppl: 15.895075 +epoch: 1, batch: 20753, sum loss: 5749.588867, avg loss: 3.061549, ppl: 21.360619 +epoch: 1, batch: 20754, sum loss: 5020.736328, avg loss: 2.767771, ppl: 15.923102 +epoch: 1, batch: 20755, sum loss: 4655.812500, avg loss: 2.695896, ppl: 14.818792 +epoch: 1, batch: 20756, sum loss: 4987.681152, avg loss: 2.786414, ppl: 16.222738 +epoch: 1, batch: 20757, sum loss: 3606.055176, avg loss: 2.511180, ppl: 12.319464 +epoch: 1, batch: 20758, sum loss: 4520.338379, avg loss: 2.693885, ppl: 14.789014 +epoch: 1, batch: 20759, sum loss: 4385.231445, avg loss: 2.672292, ppl: 14.473104 +epoch: 1, batch: 20760, sum loss: 4469.775391, avg loss: 2.995828, ppl: 20.001917 +epoch: 1, batch: 20761, sum loss: 5994.659668, avg loss: 3.018459, ppl: 20.459740 +epoch: 1, batch: 20762, sum loss: 4215.196777, avg loss: 2.598765, ppl: 13.447119 +epoch: 1, batch: 20763, sum loss: 4895.354004, avg loss: 2.832959, ppl: 16.995684 +epoch: 1, batch: 20764, sum loss: 4479.821777, avg loss: 2.724952, ppl: 15.255689 +epoch: 1, batch: 20765, sum loss: 4773.222168, avg loss: 2.744809, ppl: 15.561636 +epoch: 1, batch: 20766, sum loss: 4994.019531, avg loss: 2.886716, ppl: 17.934322 +epoch: 1, batch: 20767, sum loss: 4100.552734, avg loss: 2.529644, ppl: 12.549035 +epoch: 1, batch: 20768, sum loss: 4159.077637, avg loss: 2.666075, ppl: 14.383410 +epoch: 1, batch: 20769, sum loss: 3576.254150, avg loss: 2.368380, ppl: 10.680079 +epoch: 1, batch: 20770, sum loss: 5288.357422, avg loss: 3.027108, ppl: 20.637457 +epoch: 1, batch: 20771, sum loss: 4577.057129, avg loss: 2.661080, ppl: 14.311732 +epoch: 1, batch: 20772, sum loss: 4261.721191, avg loss: 2.653625, ppl: 14.205434 +epoch: 1, batch: 20773, sum loss: 5079.664062, avg loss: 2.884534, ppl: 17.895220 +epoch: 1, batch: 20774, sum loss: 4235.542969, avg loss: 2.528682, ppl: 12.536978 +epoch: 1, batch: 20775, sum loss: 4478.201660, avg loss: 2.631141, ppl: 13.889605 +epoch: 1, batch: 20776, sum loss: 4637.582031, avg loss: 2.603920, ppl: 13.516625 +epoch: 1, batch: 20777, sum loss: 4507.471191, avg loss: 2.608490, ppl: 13.578535 +epoch: 1, batch: 20778, sum loss: 4505.863770, avg loss: 2.527125, ppl: 12.517468 +epoch: 1, batch: 20779, sum loss: 4803.579102, avg loss: 2.712354, ppl: 15.064695 +epoch: 1, batch: 20780, sum loss: 4475.574219, avg loss: 2.791999, ppl: 16.313597 +epoch: 1, batch: 20781, sum loss: 5179.671875, avg loss: 2.821172, ppl: 16.796524 +epoch: 1, batch: 20782, sum loss: 5814.590820, avg loss: 2.994125, ppl: 19.967882 +epoch: 1, batch: 20783, sum loss: 3589.414062, avg loss: 2.229450, ppl: 9.294750 +epoch: 1, batch: 20784, sum loss: 4429.159668, avg loss: 2.676229, ppl: 14.530204 +epoch: 1, batch: 20785, sum loss: 5350.593262, avg loss: 3.123522, ppl: 22.726282 +epoch: 1, batch: 20786, sum loss: 4148.809082, avg loss: 2.520540, ppl: 12.435313 +epoch: 1, batch: 20787, sum loss: 4764.644043, avg loss: 2.778218, ppl: 16.090322 +epoch: 1, batch: 20788, sum loss: 3956.933105, avg loss: 2.673604, ppl: 14.492098 +epoch: 1, batch: 20789, sum loss: 4248.520508, avg loss: 2.672025, ppl: 14.469246 +epoch: 1, batch: 20790, sum loss: 3623.423096, avg loss: 2.604905, ppl: 13.529942 +epoch: 1, batch: 20791, sum loss: 4411.439453, avg loss: 2.659096, ppl: 14.283364 +epoch: 1, batch: 20792, sum loss: 4695.953125, avg loss: 2.790228, ppl: 16.284727 +epoch: 1, batch: 20793, sum loss: 3656.724609, avg loss: 2.329124, ppl: 10.268942 +epoch: 1, batch: 20794, sum loss: 5329.856445, avg loss: 2.965975, ppl: 19.413618 +epoch: 1, batch: 20795, sum loss: 4620.489746, avg loss: 2.795214, ppl: 16.366138 +epoch: 1, batch: 20796, sum loss: 4554.728516, avg loss: 2.540284, ppl: 12.683269 +epoch: 1, batch: 20797, sum loss: 4722.204102, avg loss: 2.747065, ppl: 15.596786 +epoch: 1, batch: 20798, sum loss: 5406.242188, avg loss: 2.741502, ppl: 15.510264 +epoch: 1, batch: 20799, sum loss: 5013.222656, avg loss: 2.765153, ppl: 15.881476 +epoch: 1, batch: 20800, sum loss: 4831.750977, avg loss: 3.151827, ppl: 23.378740 +epoch: 1, batch: 20801, sum loss: 4773.687988, avg loss: 2.521758, ppl: 12.450463 +epoch: 1, batch: 20802, sum loss: 3520.794434, avg loss: 2.516651, ppl: 12.387042 +epoch: 1, batch: 20803, sum loss: 4443.687500, avg loss: 2.576051, ppl: 13.145123 +epoch: 1, batch: 20804, sum loss: 5716.355957, avg loss: 2.932969, ppl: 18.783312 +epoch: 1, batch: 20805, sum loss: 4259.994629, avg loss: 2.679242, ppl: 14.574040 +epoch: 1, batch: 20806, sum loss: 4749.178711, avg loss: 2.759546, ppl: 15.792672 +epoch: 1, batch: 20807, sum loss: 3828.587402, avg loss: 2.470057, ppl: 11.823115 +epoch: 1, batch: 20808, sum loss: 4941.038086, avg loss: 2.782116, ppl: 16.153168 +epoch: 1, batch: 20809, sum loss: 5199.471680, avg loss: 2.838140, ppl: 17.083952 +epoch: 1, batch: 20810, sum loss: 4147.163574, avg loss: 2.600103, ppl: 13.465120 +epoch: 1, batch: 20811, sum loss: 4624.543457, avg loss: 2.670060, ppl: 14.440831 +epoch: 1, batch: 20812, sum loss: 4663.073242, avg loss: 2.787252, ppl: 16.236347 +epoch: 1, batch: 20813, sum loss: 4540.830566, avg loss: 2.655457, ppl: 14.231482 +epoch: 1, batch: 20814, sum loss: 4710.246094, avg loss: 2.762608, ppl: 15.841100 +epoch: 1, batch: 20815, sum loss: 5734.654785, avg loss: 3.024607, ppl: 20.585917 +epoch: 1, batch: 20816, sum loss: 4302.848633, avg loss: 2.593640, ppl: 13.378378 +epoch: 1, batch: 20817, sum loss: 5518.831055, avg loss: 2.907709, ppl: 18.314789 +epoch: 1, batch: 20818, sum loss: 4759.759766, avg loss: 2.796569, ppl: 16.388315 +epoch: 1, batch: 20819, sum loss: 5340.728516, avg loss: 2.975336, ppl: 19.596212 +epoch: 1, batch: 20820, sum loss: 3993.582031, avg loss: 2.623904, ppl: 13.789455 +epoch: 1, batch: 20821, sum loss: 5526.963379, avg loss: 3.152860, ppl: 23.402899 +epoch: 1, batch: 20822, sum loss: 4529.662598, avg loss: 2.681861, ppl: 14.612257 +epoch: 1, batch: 20823, sum loss: 4562.991699, avg loss: 2.733968, ppl: 15.393842 +epoch: 1, batch: 20824, sum loss: 4464.099121, avg loss: 2.673113, ppl: 14.484996 +epoch: 1, batch: 20825, sum loss: 4474.690918, avg loss: 2.883177, ppl: 17.870964 +epoch: 1, batch: 20826, sum loss: 4337.426270, avg loss: 2.578731, ppl: 13.180406 +epoch: 1, batch: 20827, sum loss: 4304.000000, avg loss: 2.661719, ppl: 14.320890 +epoch: 1, batch: 20828, sum loss: 4979.618652, avg loss: 2.845496, ppl: 17.210100 +epoch: 1, batch: 20829, sum loss: 5249.156738, avg loss: 3.051835, ppl: 21.154133 +epoch: 1, batch: 20830, sum loss: 5463.586426, avg loss: 3.052283, ppl: 21.163601 +epoch: 1, batch: 20831, sum loss: 4245.424805, avg loss: 2.614178, ppl: 13.655983 +epoch: 1, batch: 20832, sum loss: 4397.722656, avg loss: 2.517300, ppl: 12.395081 +epoch: 1, batch: 20833, sum loss: 5155.559570, avg loss: 2.676822, ppl: 14.538821 +epoch: 1, batch: 20834, sum loss: 4210.607910, avg loss: 2.786637, ppl: 16.226353 +epoch: 1, batch: 20835, sum loss: 4021.712891, avg loss: 2.465796, ppl: 11.772847 +epoch: 1, batch: 20836, sum loss: 3807.321777, avg loss: 2.488446, ppl: 12.042544 +epoch: 1, batch: 20837, sum loss: 4443.506836, avg loss: 2.688147, ppl: 14.704401 +epoch: 1, batch: 20838, sum loss: 3905.003174, avg loss: 2.557304, ppl: 12.900989 +epoch: 1, batch: 20839, sum loss: 4272.507812, avg loss: 2.690496, ppl: 14.738988 +epoch: 1, batch: 20840, sum loss: 4973.056641, avg loss: 2.724962, ppl: 15.255841 +epoch: 1, batch: 20841, sum loss: 5050.915039, avg loss: 2.516649, ppl: 12.387021 +epoch: 1, batch: 20842, sum loss: 4685.926270, avg loss: 2.644428, ppl: 14.075392 +epoch: 1, batch: 20843, sum loss: 4343.569824, avg loss: 2.486302, ppl: 12.016758 +epoch: 1, batch: 20844, sum loss: 4638.959473, avg loss: 2.683030, ppl: 14.629358 +epoch: 1, batch: 20845, sum loss: 4939.386719, avg loss: 2.895303, ppl: 18.088978 +epoch: 1, batch: 20846, sum loss: 3751.819092, avg loss: 2.447371, ppl: 11.557916 +epoch: 1, batch: 20847, sum loss: 4625.865234, avg loss: 2.857236, ppl: 17.413332 +epoch: 1, batch: 20848, sum loss: 4494.938965, avg loss: 2.680345, ppl: 14.590131 +epoch: 1, batch: 20849, sum loss: 4752.035645, avg loss: 2.806873, ppl: 16.558058 +epoch: 1, batch: 20850, sum loss: 4617.757324, avg loss: 2.766781, ppl: 15.907348 +epoch: 1, batch: 20851, sum loss: 6155.198242, avg loss: 3.138806, ppl: 23.076294 +epoch: 1, batch: 20852, sum loss: 4253.885254, avg loss: 2.567221, ppl: 13.029564 +epoch: 1, batch: 20853, sum loss: 5069.460938, avg loss: 2.816367, ppl: 16.716013 +epoch: 1, batch: 20854, sum loss: 3786.278320, avg loss: 2.304491, ppl: 10.019073 +epoch: 1, batch: 20855, sum loss: 4774.552246, avg loss: 2.909538, ppl: 18.348320 +epoch: 1, batch: 20856, sum loss: 4877.607910, avg loss: 2.839120, ppl: 17.100708 +epoch: 1, batch: 20857, sum loss: 4517.407227, avg loss: 2.693743, ppl: 14.786920 +epoch: 1, batch: 20858, sum loss: 4899.911133, avg loss: 2.819281, ppl: 16.764797 +epoch: 1, batch: 20859, sum loss: 5054.279297, avg loss: 2.789337, ppl: 16.270235 +epoch: 1, batch: 20860, sum loss: 4350.988281, avg loss: 2.592961, ppl: 13.369297 +epoch: 1, batch: 20861, sum loss: 4723.877930, avg loss: 2.633154, ppl: 13.917596 +epoch: 1, batch: 20862, sum loss: 4645.663574, avg loss: 2.793544, ppl: 16.338823 +epoch: 1, batch: 20863, sum loss: 4379.560547, avg loss: 2.546256, ppl: 12.759245 +epoch: 1, batch: 20864, sum loss: 5484.481934, avg loss: 3.058830, ppl: 21.302620 +epoch: 1, batch: 20865, sum loss: 3692.918457, avg loss: 2.679912, ppl: 14.583808 +epoch: 1, batch: 20866, sum loss: 4482.167480, avg loss: 2.785685, ppl: 16.210920 +epoch: 1, batch: 20867, sum loss: 5078.292969, avg loss: 2.743540, ppl: 15.541910 +epoch: 1, batch: 20868, sum loss: 4398.852539, avg loss: 2.576949, ppl: 13.156940 +epoch: 1, batch: 20869, sum loss: 4384.098633, avg loss: 2.736641, ppl: 15.435047 +epoch: 1, batch: 20870, sum loss: 4653.000488, avg loss: 2.599442, ppl: 13.456221 +epoch: 1, batch: 20871, sum loss: 4604.284180, avg loss: 2.842151, ppl: 17.152620 +epoch: 1, batch: 20872, sum loss: 4318.023438, avg loss: 2.700452, ppl: 14.886464 +epoch: 1, batch: 20873, sum loss: 5392.169434, avg loss: 3.000651, ppl: 20.098614 +epoch: 1, batch: 20874, sum loss: 5213.094727, avg loss: 2.831665, ppl: 16.973696 +epoch: 1, batch: 20875, sum loss: 4223.224121, avg loss: 2.628018, ppl: 13.846294 +epoch: 1, batch: 20876, sum loss: 5615.599609, avg loss: 3.092290, ppl: 22.027472 +epoch: 1, batch: 20877, sum loss: 4322.123047, avg loss: 2.730337, ppl: 15.338050 +epoch: 1, batch: 20878, sum loss: 4239.133301, avg loss: 2.575415, ppl: 13.136769 +epoch: 1, batch: 20879, sum loss: 4618.075684, avg loss: 2.891719, ppl: 18.024273 +epoch: 1, batch: 20880, sum loss: 3997.708496, avg loss: 2.463160, ppl: 11.741855 +epoch: 1, batch: 20881, sum loss: 4818.697754, avg loss: 2.683017, ppl: 14.629156 +epoch: 1, batch: 20882, sum loss: 3995.048828, avg loss: 2.649237, ppl: 14.143239 +epoch: 1, batch: 20883, sum loss: 4652.023438, avg loss: 2.775671, ppl: 16.049385 +epoch: 1, batch: 20884, sum loss: 3978.218262, avg loss: 2.553414, ppl: 12.850897 +epoch: 1, batch: 20885, sum loss: 4607.296875, avg loss: 2.506690, ppl: 12.264272 +epoch: 1, batch: 20886, sum loss: 4453.378418, avg loss: 2.816811, ppl: 16.723436 +epoch: 1, batch: 20887, sum loss: 3153.275391, avg loss: 2.159778, ppl: 8.669209 +epoch: 1, batch: 20888, sum loss: 4591.402344, avg loss: 2.772586, ppl: 15.999954 +epoch: 1, batch: 20889, sum loss: 5527.934570, avg loss: 3.015785, ppl: 20.405111 +epoch: 1, batch: 20890, sum loss: 4219.704102, avg loss: 2.763395, ppl: 15.853576 +epoch: 1, batch: 20891, sum loss: 4790.137695, avg loss: 2.759296, ppl: 15.788723 +epoch: 1, batch: 20892, sum loss: 5096.260254, avg loss: 2.858250, ppl: 17.431002 +epoch: 1, batch: 20893, sum loss: 5078.220703, avg loss: 2.761404, ppl: 15.822035 +epoch: 1, batch: 20894, sum loss: 4568.714844, avg loss: 2.656229, ppl: 14.242486 +epoch: 1, batch: 20895, sum loss: 5239.975098, avg loss: 2.702411, ppl: 14.915652 +epoch: 1, batch: 20896, sum loss: 3935.411621, avg loss: 2.537338, ppl: 12.645966 +epoch: 1, batch: 20897, sum loss: 4341.493652, avg loss: 2.462560, ppl: 11.734817 +epoch: 1, batch: 20898, sum loss: 3452.864258, avg loss: 2.306522, ppl: 10.039451 +epoch: 1, batch: 20899, sum loss: 4722.360352, avg loss: 2.484145, ppl: 11.990869 +epoch: 1, batch: 20900, sum loss: 4563.018555, avg loss: 2.887986, ppl: 17.957115 +epoch: 1, batch: 20901, sum loss: 5071.518066, avg loss: 2.842779, ppl: 17.163403 +epoch: 1, batch: 20902, sum loss: 4772.869141, avg loss: 2.810877, ppl: 16.624493 +epoch: 1, batch: 20903, sum loss: 4318.211914, avg loss: 2.590409, ppl: 13.335225 +epoch: 1, batch: 20904, sum loss: 5074.827637, avg loss: 2.654198, ppl: 14.213588 +epoch: 1, batch: 20905, sum loss: 4660.802734, avg loss: 2.935014, ppl: 18.821775 +epoch: 1, batch: 20906, sum loss: 5081.549805, avg loss: 2.818386, ppl: 16.749792 +epoch: 1, batch: 20907, sum loss: 4405.579590, avg loss: 2.686329, ppl: 14.677697 +epoch: 1, batch: 20908, sum loss: 4268.452148, avg loss: 2.572907, ppl: 13.103859 +epoch: 1, batch: 20909, sum loss: 4116.000488, avg loss: 2.503650, ppl: 12.227040 +epoch: 1, batch: 20910, sum loss: 5079.917480, avg loss: 2.732607, ppl: 15.372917 +epoch: 1, batch: 20911, sum loss: 4865.119629, avg loss: 2.679030, ppl: 14.570959 +epoch: 1, batch: 20912, sum loss: 4093.253662, avg loss: 2.489814, ppl: 12.059028 +epoch: 1, batch: 20913, sum loss: 4360.153809, avg loss: 2.766595, ppl: 15.904386 +epoch: 1, batch: 20914, sum loss: 4725.518066, avg loss: 2.508237, ppl: 12.283255 +epoch: 1, batch: 20915, sum loss: 4918.683594, avg loss: 2.788369, ppl: 16.254494 +epoch: 1, batch: 20916, sum loss: 4276.760742, avg loss: 2.878036, ppl: 17.779312 +epoch: 1, batch: 20917, sum loss: 4626.870117, avg loss: 2.560526, ppl: 12.942619 +epoch: 1, batch: 20918, sum loss: 4020.251465, avg loss: 2.763059, ppl: 15.848254 +epoch: 1, batch: 20919, sum loss: 4069.253418, avg loss: 2.642372, ppl: 14.046488 +epoch: 1, batch: 20920, sum loss: 4288.506836, avg loss: 2.675301, ppl: 14.516713 +epoch: 1, batch: 20921, sum loss: 4666.996582, avg loss: 2.660773, ppl: 14.307348 +epoch: 1, batch: 20922, sum loss: 4656.945801, avg loss: 2.671799, ppl: 14.465969 +epoch: 1, batch: 20923, sum loss: 4363.259766, avg loss: 2.816823, ppl: 16.723642 +epoch: 1, batch: 20924, sum loss: 4377.933594, avg loss: 2.748232, ppl: 15.615002 +epoch: 1, batch: 20925, sum loss: 4268.684570, avg loss: 2.705123, ppl: 14.956159 +epoch: 1, batch: 20926, sum loss: 3143.302490, avg loss: 2.232459, ppl: 9.322765 +epoch: 1, batch: 20927, sum loss: 4625.887207, avg loss: 2.663148, ppl: 14.341360 +epoch: 1, batch: 20928, sum loss: 4654.627441, avg loss: 2.932973, ppl: 18.783384 +epoch: 1, batch: 20929, sum loss: 4445.627441, avg loss: 2.525925, ppl: 12.502451 +epoch: 1, batch: 20930, sum loss: 4462.236816, avg loss: 2.525318, ppl: 12.494866 +epoch: 1, batch: 20931, sum loss: 4153.819336, avg loss: 2.882595, ppl: 17.860563 +epoch: 1, batch: 20932, sum loss: 4584.356445, avg loss: 2.802174, ppl: 16.480438 +epoch: 1, batch: 20933, sum loss: 4290.480469, avg loss: 2.569150, ppl: 13.054723 +epoch: 1, batch: 20934, sum loss: 4269.801758, avg loss: 2.760053, ppl: 15.800679 +epoch: 1, batch: 20935, sum loss: 3549.292969, avg loss: 2.412844, ppl: 11.165668 +epoch: 1, batch: 20936, sum loss: 4929.284180, avg loss: 2.753790, ppl: 15.702028 +epoch: 1, batch: 20937, sum loss: 3994.654297, avg loss: 2.723009, ppl: 15.226067 +epoch: 1, batch: 20938, sum loss: 4344.731934, avg loss: 2.869704, ppl: 17.631804 +epoch: 1, batch: 20939, sum loss: 3739.103760, avg loss: 2.625775, ppl: 13.815278 +epoch: 1, batch: 20940, sum loss: 3964.423340, avg loss: 2.466972, ppl: 11.786698 +epoch: 1, batch: 20941, sum loss: 4473.197266, avg loss: 2.751044, ppl: 15.658968 +epoch: 1, batch: 20942, sum loss: 3382.981934, avg loss: 2.258332, ppl: 9.567121 +epoch: 1, batch: 20943, sum loss: 3695.841309, avg loss: 2.442724, ppl: 11.504335 +epoch: 1, batch: 20944, sum loss: 4897.423828, avg loss: 2.845685, ppl: 17.213346 +epoch: 1, batch: 20945, sum loss: 4823.118652, avg loss: 2.765550, ppl: 15.887771 +epoch: 1, batch: 20946, sum loss: 3989.328369, avg loss: 2.677402, ppl: 14.547247 +epoch: 1, batch: 20947, sum loss: 4223.200195, avg loss: 2.603699, ppl: 13.513636 +epoch: 1, batch: 20948, sum loss: 3977.395752, avg loss: 2.431171, ppl: 11.372193 +epoch: 1, batch: 20949, sum loss: 4479.024902, avg loss: 2.391364, ppl: 10.928391 +epoch: 1, batch: 20950, sum loss: 4955.122070, avg loss: 2.788476, ppl: 16.256226 +epoch: 1, batch: 20951, sum loss: 4829.254395, avg loss: 2.806075, ppl: 16.544846 +epoch: 1, batch: 20952, sum loss: 4680.911621, avg loss: 2.688634, ppl: 14.711565 +epoch: 1, batch: 20953, sum loss: 4293.156738, avg loss: 2.546356, ppl: 12.760526 +epoch: 1, batch: 20954, sum loss: 5026.835449, avg loss: 2.706966, ppl: 14.983741 +epoch: 1, batch: 20955, sum loss: 5058.792480, avg loss: 2.779557, ppl: 16.111874 +epoch: 1, batch: 20956, sum loss: 4604.106934, avg loss: 2.646039, ppl: 14.098079 +epoch: 1, batch: 20957, sum loss: 3848.066895, avg loss: 2.655671, ppl: 14.234532 +epoch: 1, batch: 20958, sum loss: 4107.161621, avg loss: 2.771364, ppl: 15.980420 +epoch: 1, batch: 20959, sum loss: 4963.481934, avg loss: 2.885745, ppl: 17.916916 +epoch: 1, batch: 20960, sum loss: 5107.485352, avg loss: 2.820257, ppl: 16.781166 +epoch: 1, batch: 20961, sum loss: 5058.203613, avg loss: 2.917072, ppl: 18.487083 +epoch: 1, batch: 20962, sum loss: 4651.231445, avg loss: 2.844790, ppl: 17.197943 +epoch: 1, batch: 20963, sum loss: 5083.489258, avg loss: 2.660120, ppl: 14.298005 +epoch: 1, batch: 20964, sum loss: 4338.065918, avg loss: 2.650010, ppl: 14.154179 +epoch: 1, batch: 20965, sum loss: 4224.649902, avg loss: 2.534283, ppl: 12.607388 +epoch: 1, batch: 20966, sum loss: 4736.146484, avg loss: 2.909181, ppl: 18.341768 +epoch: 1, batch: 20967, sum loss: 3564.979980, avg loss: 2.553711, ppl: 12.854715 +epoch: 1, batch: 20968, sum loss: 4332.572266, avg loss: 2.622622, ppl: 13.771792 +epoch: 1, batch: 20969, sum loss: 4633.424316, avg loss: 2.757991, ppl: 15.768130 +epoch: 1, batch: 20970, sum loss: 4808.652832, avg loss: 2.435994, ppl: 11.427174 +epoch: 1, batch: 20971, sum loss: 3943.803223, avg loss: 2.610062, ppl: 13.599889 +epoch: 1, batch: 20972, sum loss: 3427.237061, avg loss: 2.355489, ppl: 10.543286 +epoch: 1, batch: 20973, sum loss: 5055.890625, avg loss: 2.934353, ppl: 18.809336 +epoch: 1, batch: 20974, sum loss: 3361.151123, avg loss: 2.400822, ppl: 11.032243 +epoch: 1, batch: 20975, sum loss: 4282.379883, avg loss: 2.559701, ppl: 12.931952 +epoch: 1, batch: 20976, sum loss: 5002.552734, avg loss: 2.750166, ppl: 15.645235 +epoch: 1, batch: 20977, sum loss: 5126.192383, avg loss: 2.909303, ppl: 18.344017 +epoch: 1, batch: 20978, sum loss: 4928.903320, avg loss: 2.684588, ppl: 14.652160 +epoch: 1, batch: 20979, sum loss: 5010.660645, avg loss: 3.072140, ppl: 21.588057 +epoch: 1, batch: 20980, sum loss: 4286.265625, avg loss: 2.597737, ppl: 13.433302 +epoch: 1, batch: 20981, sum loss: 4771.517090, avg loss: 2.700349, ppl: 14.884931 +epoch: 1, batch: 20982, sum loss: 4370.076660, avg loss: 2.561592, ppl: 12.956432 +epoch: 1, batch: 20983, sum loss: 5181.742188, avg loss: 2.862841, ppl: 17.511208 +epoch: 1, batch: 20984, sum loss: 4583.604980, avg loss: 2.705788, ppl: 14.966104 +epoch: 1, batch: 20985, sum loss: 4691.350586, avg loss: 2.795799, ppl: 16.375708 +epoch: 1, batch: 20986, sum loss: 4183.107910, avg loss: 2.530616, ppl: 12.561239 +epoch: 1, batch: 20987, sum loss: 5110.970703, avg loss: 2.776193, ppl: 16.057768 +epoch: 1, batch: 20988, sum loss: 5244.827637, avg loss: 2.786837, ppl: 16.229607 +epoch: 1, batch: 20989, sum loss: 4689.088867, avg loss: 2.945408, ppl: 19.018414 +epoch: 1, batch: 20990, sum loss: 5275.859863, avg loss: 2.735023, ppl: 15.410102 +epoch: 1, batch: 20991, sum loss: 5433.736816, avg loss: 2.862875, ppl: 17.511805 +epoch: 1, batch: 20992, sum loss: 4625.283203, avg loss: 2.892610, ppl: 18.040331 +epoch: 1, batch: 20993, sum loss: 4465.779785, avg loss: 2.789369, ppl: 16.270752 +epoch: 1, batch: 20994, sum loss: 4686.847168, avg loss: 2.816615, ppl: 16.720160 +epoch: 1, batch: 20995, sum loss: 4315.534180, avg loss: 2.458994, ppl: 11.693039 +epoch: 1, batch: 20996, sum loss: 4633.779297, avg loss: 2.678485, ppl: 14.563016 +epoch: 1, batch: 20997, sum loss: 4147.955078, avg loss: 2.672652, ppl: 14.478308 +epoch: 1, batch: 20998, sum loss: 4902.031738, avg loss: 2.714303, ppl: 15.094090 +epoch: 1, batch: 20999, sum loss: 4121.488281, avg loss: 2.421556, ppl: 11.263371 +epoch: 1, batch: 21000, sum loss: 4644.886230, avg loss: 2.844388, ppl: 17.191038 +epoch: 1, batch: 21001, sum loss: 5587.998047, avg loss: 2.898339, ppl: 18.143988 +epoch: 1, batch: 21002, sum loss: 4594.246582, avg loss: 2.726556, ppl: 15.280169 +epoch: 1, batch: 21003, sum loss: 4807.930176, avg loss: 2.744252, ppl: 15.552983 +epoch: 1, batch: 21004, sum loss: 3715.649170, avg loss: 2.646474, ppl: 14.104215 +epoch: 1, batch: 21005, sum loss: 4324.956055, avg loss: 2.580523, ppl: 13.204040 +epoch: 1, batch: 21006, sum loss: 4939.555176, avg loss: 2.733567, ppl: 15.387670 +epoch: 1, batch: 21007, sum loss: 5815.767578, avg loss: 2.792015, ppl: 16.313864 +epoch: 1, batch: 21008, sum loss: 4779.231445, avg loss: 2.665494, ppl: 14.375055 +epoch: 1, batch: 21009, sum loss: 4541.600586, avg loss: 2.605623, ppl: 13.539655 +epoch: 1, batch: 21010, sum loss: 3972.902832, avg loss: 2.481514, ppl: 11.959352 +epoch: 1, batch: 21011, sum loss: 5931.976562, avg loss: 2.822063, ppl: 16.811497 +epoch: 1, batch: 21012, sum loss: 4527.535156, avg loss: 2.764063, ppl: 15.864166 +epoch: 1, batch: 21013, sum loss: 5704.814453, avg loss: 3.031251, ppl: 20.723141 +epoch: 1, batch: 21014, sum loss: 4403.935059, avg loss: 2.750740, ppl: 15.654216 +epoch: 1, batch: 21015, sum loss: 5264.069824, avg loss: 2.721856, ppl: 15.208525 +epoch: 1, batch: 21016, sum loss: 4169.570312, avg loss: 2.674516, ppl: 14.505327 +epoch: 1, batch: 21017, sum loss: 4537.057129, avg loss: 2.728236, ppl: 15.305870 +epoch: 1, batch: 21018, sum loss: 4873.357422, avg loss: 2.750202, ppl: 15.645787 +epoch: 1, batch: 21019, sum loss: 4901.573242, avg loss: 2.834918, ppl: 17.029005 +epoch: 1, batch: 21020, sum loss: 4664.381836, avg loss: 2.700858, ppl: 14.892506 +epoch: 1, batch: 21021, sum loss: 5169.352539, avg loss: 2.871863, ppl: 17.669901 +epoch: 1, batch: 21022, sum loss: 4222.160645, avg loss: 2.692704, ppl: 14.771571 +epoch: 1, batch: 21023, sum loss: 4517.095215, avg loss: 2.814389, ppl: 16.682987 +epoch: 1, batch: 21024, sum loss: 4890.517578, avg loss: 2.831800, ppl: 16.975983 +epoch: 1, batch: 21025, sum loss: 4255.244141, avg loss: 2.588348, ppl: 13.307768 +epoch: 1, batch: 21026, sum loss: 5200.413086, avg loss: 2.807998, ppl: 16.576706 +epoch: 1, batch: 21027, sum loss: 4982.854492, avg loss: 2.922495, ppl: 18.587612 +epoch: 1, batch: 21028, sum loss: 5454.491211, avg loss: 2.854260, ppl: 17.361588 +epoch: 1, batch: 21029, sum loss: 4367.130859, avg loss: 2.714190, ppl: 15.092373 +epoch: 1, batch: 21030, sum loss: 4538.441895, avg loss: 2.685469, ppl: 14.665072 +epoch: 1, batch: 21031, sum loss: 4616.067871, avg loss: 2.701035, ppl: 14.895133 +epoch: 1, batch: 21032, sum loss: 3949.451172, avg loss: 2.645312, ppl: 14.087844 +epoch: 1, batch: 21033, sum loss: 4191.485352, avg loss: 2.546468, ppl: 12.761943 +epoch: 1, batch: 21034, sum loss: 4393.571777, avg loss: 2.847422, ppl: 17.243269 +epoch: 1, batch: 21035, sum loss: 4990.946777, avg loss: 2.930679, ppl: 18.740356 +epoch: 1, batch: 21036, sum loss: 5237.305664, avg loss: 2.920974, ppl: 18.559351 +epoch: 1, batch: 21037, sum loss: 4454.384766, avg loss: 2.783990, ppl: 16.183470 +epoch: 1, batch: 21038, sum loss: 4656.291992, avg loss: 2.487336, ppl: 12.029183 +epoch: 1, batch: 21039, sum loss: 4231.466797, avg loss: 2.639717, ppl: 14.009240 +epoch: 1, batch: 21040, sum loss: 5889.059570, avg loss: 2.934260, ppl: 18.807579 +epoch: 1, batch: 21041, sum loss: 5269.592285, avg loss: 2.945552, ppl: 19.021156 +epoch: 1, batch: 21042, sum loss: 3692.705566, avg loss: 2.393199, ppl: 10.948459 +epoch: 1, batch: 21043, sum loss: 3537.249023, avg loss: 2.296915, ppl: 9.943458 +epoch: 1, batch: 21044, sum loss: 4700.033691, avg loss: 2.670474, ppl: 14.446809 +epoch: 1, batch: 21045, sum loss: 4931.369141, avg loss: 2.662726, ppl: 14.335317 +epoch: 1, batch: 21046, sum loss: 5390.543457, avg loss: 2.729389, ppl: 15.323524 +epoch: 1, batch: 21047, sum loss: 5103.768555, avg loss: 2.848085, ppl: 17.254711 +epoch: 1, batch: 21048, sum loss: 4166.471191, avg loss: 2.430847, ppl: 11.368504 +epoch: 1, batch: 21049, sum loss: 4550.423828, avg loss: 2.824596, ppl: 16.854134 +epoch: 1, batch: 21050, sum loss: 4761.847168, avg loss: 2.755699, ppl: 15.732029 +epoch: 1, batch: 21051, sum loss: 4130.260254, avg loss: 2.506226, ppl: 12.258576 +epoch: 1, batch: 21052, sum loss: 5680.554688, avg loss: 2.819134, ppl: 16.762329 +epoch: 1, batch: 21053, sum loss: 4186.364746, avg loss: 2.626327, ppl: 13.822899 +epoch: 1, batch: 21054, sum loss: 5166.711914, avg loss: 2.768870, ppl: 15.940613 +epoch: 1, batch: 21055, sum loss: 3288.644531, avg loss: 2.290142, ppl: 9.876343 +epoch: 1, batch: 21056, sum loss: 4824.799805, avg loss: 2.885646, ppl: 17.915138 +epoch: 1, batch: 21057, sum loss: 5367.916016, avg loss: 2.697445, ppl: 14.841764 +epoch: 1, batch: 21058, sum loss: 5216.576172, avg loss: 2.947218, ppl: 19.052877 +epoch: 1, batch: 21059, sum loss: 4722.042480, avg loss: 2.652833, ppl: 14.194194 +epoch: 1, batch: 21060, sum loss: 4895.815918, avg loss: 2.965364, ppl: 19.401768 +epoch: 1, batch: 21061, sum loss: 4483.435547, avg loss: 2.709025, ppl: 15.014625 +epoch: 1, batch: 21062, sum loss: 5192.333984, avg loss: 2.709986, ppl: 15.029072 +epoch: 1, batch: 21063, sum loss: 4246.169922, avg loss: 2.922347, ppl: 18.584852 +epoch: 1, batch: 21064, sum loss: 3972.730713, avg loss: 2.444757, ppl: 11.527751 +epoch: 1, batch: 21065, sum loss: 5192.173340, avg loss: 2.618343, ppl: 13.712977 +epoch: 1, batch: 21066, sum loss: 4858.121094, avg loss: 2.661984, ppl: 14.324684 +epoch: 1, batch: 21067, sum loss: 4670.275391, avg loss: 2.701143, ppl: 14.896742 +epoch: 1, batch: 21068, sum loss: 4662.423828, avg loss: 2.812077, ppl: 16.644453 +epoch: 1, batch: 21069, sum loss: 4067.075684, avg loss: 2.518313, ppl: 12.407649 +epoch: 1, batch: 21070, sum loss: 4001.937012, avg loss: 2.588575, ppl: 13.310792 +epoch: 1, batch: 21071, sum loss: 3514.751709, avg loss: 2.480418, ppl: 11.946254 +epoch: 1, batch: 21072, sum loss: 4947.932129, avg loss: 2.853479, ppl: 17.348024 +epoch: 1, batch: 21073, sum loss: 4559.181641, avg loss: 2.624745, ppl: 13.801053 +epoch: 1, batch: 21074, sum loss: 5427.828613, avg loss: 2.593325, ppl: 13.374163 +epoch: 1, batch: 21075, sum loss: 5011.697266, avg loss: 2.799831, ppl: 16.441862 +epoch: 1, batch: 21076, sum loss: 4202.246582, avg loss: 2.651260, ppl: 14.171879 +epoch: 1, batch: 21077, sum loss: 4476.948730, avg loss: 2.583352, ppl: 13.241444 +epoch: 1, batch: 21078, sum loss: 4525.666016, avg loss: 2.716486, ppl: 15.127075 +epoch: 1, batch: 21079, sum loss: 4313.291016, avg loss: 2.618877, ppl: 13.720309 +epoch: 1, batch: 21080, sum loss: 4480.090332, avg loss: 2.636899, ppl: 13.969810 +epoch: 1, batch: 21081, sum loss: 4310.613770, avg loss: 2.692451, ppl: 14.767828 +epoch: 1, batch: 21082, sum loss: 4913.575684, avg loss: 2.720695, ppl: 15.190880 +epoch: 1, batch: 21083, sum loss: 4646.089844, avg loss: 2.737826, ppl: 15.453347 +epoch: 1, batch: 21084, sum loss: 4590.286133, avg loss: 2.755274, ppl: 15.725347 +epoch: 1, batch: 21085, sum loss: 4657.840820, avg loss: 2.609435, ppl: 13.591368 +epoch: 1, batch: 21086, sum loss: 4571.636719, avg loss: 2.654841, ppl: 14.222727 +epoch: 1, batch: 21087, sum loss: 4928.912598, avg loss: 2.842510, ppl: 17.158779 +epoch: 1, batch: 21088, sum loss: 4110.149414, avg loss: 2.629654, ppl: 13.868973 +epoch: 1, batch: 21089, sum loss: 4671.832031, avg loss: 2.594021, ppl: 13.383480 +epoch: 1, batch: 21090, sum loss: 4182.106445, avg loss: 2.551621, ppl: 12.827877 +epoch: 1, batch: 21091, sum loss: 4567.049316, avg loss: 2.584635, ppl: 13.258443 +epoch: 1, batch: 21092, sum loss: 4374.623047, avg loss: 2.619535, ppl: 13.729334 +epoch: 1, batch: 21093, sum loss: 5834.256348, avg loss: 3.086908, ppl: 21.909231 +epoch: 1, batch: 21094, sum loss: 4228.117188, avg loss: 2.432749, ppl: 11.390148 +epoch: 1, batch: 21095, sum loss: 3968.590088, avg loss: 2.529376, ppl: 12.545670 +epoch: 1, batch: 21096, sum loss: 4221.077637, avg loss: 2.636526, ppl: 13.964604 +epoch: 1, batch: 21097, sum loss: 4262.064453, avg loss: 2.579942, ppl: 13.196376 +epoch: 1, batch: 21098, sum loss: 4536.741699, avg loss: 2.607323, ppl: 13.562691 +epoch: 1, batch: 21099, sum loss: 3824.115234, avg loss: 2.448217, ppl: 11.567705 +epoch: 1, batch: 21100, sum loss: 4577.392090, avg loss: 2.750837, ppl: 15.655725 +epoch: 1, batch: 21101, sum loss: 4254.377930, avg loss: 2.629405, ppl: 13.865521 +epoch: 1, batch: 21102, sum loss: 4675.423340, avg loss: 2.549304, ppl: 12.798190 +epoch: 1, batch: 21103, sum loss: 3920.060059, avg loss: 2.737472, ppl: 15.447888 +epoch: 1, batch: 21104, sum loss: 4258.003418, avg loss: 2.755989, ppl: 15.736602 +epoch: 1, batch: 21105, sum loss: 4215.672852, avg loss: 2.406206, ppl: 11.091801 +epoch: 1, batch: 21106, sum loss: 5020.102539, avg loss: 2.968718, ppl: 19.466953 +epoch: 1, batch: 21107, sum loss: 4565.080566, avg loss: 2.817951, ppl: 16.742514 +epoch: 1, batch: 21108, sum loss: 4898.781738, avg loss: 2.895261, ppl: 18.088223 +epoch: 1, batch: 21109, sum loss: 3253.219727, avg loss: 2.292614, ppl: 9.900789 +epoch: 1, batch: 21110, sum loss: 5363.279297, avg loss: 2.848263, ppl: 17.257780 +epoch: 1, batch: 21111, sum loss: 5010.791992, avg loss: 2.811892, ppl: 16.641378 +epoch: 1, batch: 21112, sum loss: 4972.291016, avg loss: 2.736539, ppl: 15.433475 +epoch: 1, batch: 21113, sum loss: 5121.382812, avg loss: 2.639888, ppl: 14.011635 +epoch: 1, batch: 21114, sum loss: 4049.787842, avg loss: 2.572927, ppl: 13.104131 +epoch: 1, batch: 21115, sum loss: 5621.438965, avg loss: 2.897649, ppl: 18.131470 +epoch: 1, batch: 21116, sum loss: 4466.413574, avg loss: 2.434013, ppl: 11.404555 +epoch: 1, batch: 21117, sum loss: 4593.541504, avg loss: 2.691003, ppl: 14.746453 +epoch: 1, batch: 21118, sum loss: 5072.722168, avg loss: 2.861096, ppl: 17.480669 +epoch: 1, batch: 21119, sum loss: 2978.601562, avg loss: 2.371498, ppl: 10.713430 +epoch: 1, batch: 21120, sum loss: 5133.100098, avg loss: 2.967110, ppl: 19.435667 +epoch: 1, batch: 21121, sum loss: 4092.400879, avg loss: 2.863822, ppl: 17.528383 +epoch: 1, batch: 21122, sum loss: 4152.592285, avg loss: 2.513676, ppl: 12.350245 +epoch: 1, batch: 21123, sum loss: 3811.364258, avg loss: 2.561401, ppl: 12.953949 +epoch: 1, batch: 21124, sum loss: 4789.517090, avg loss: 2.542207, ppl: 12.707683 +epoch: 1, batch: 21125, sum loss: 5101.443848, avg loss: 2.807619, ppl: 16.570419 +epoch: 1, batch: 21126, sum loss: 4702.104980, avg loss: 2.779022, ppl: 16.103264 +epoch: 1, batch: 21127, sum loss: 4230.510742, avg loss: 2.513672, ppl: 12.350202 +epoch: 1, batch: 21128, sum loss: 3935.187256, avg loss: 2.590643, ppl: 13.338351 +epoch: 1, batch: 21129, sum loss: 4904.024414, avg loss: 2.685665, ppl: 14.667954 +epoch: 1, batch: 21130, sum loss: 5175.355469, avg loss: 2.835811, ppl: 17.044220 +epoch: 1, batch: 21131, sum loss: 5175.712891, avg loss: 2.814417, ppl: 16.683449 +epoch: 1, batch: 21132, sum loss: 4899.427246, avg loss: 2.860144, ppl: 17.464048 +epoch: 1, batch: 21133, sum loss: 4601.315918, avg loss: 2.809106, ppl: 16.595078 +epoch: 1, batch: 21134, sum loss: 4184.200684, avg loss: 2.624969, ppl: 13.804147 +epoch: 1, batch: 21135, sum loss: 3967.105469, avg loss: 2.346012, ppl: 10.443832 +epoch: 1, batch: 21136, sum loss: 4488.184082, avg loss: 2.725066, ppl: 15.257423 +epoch: 1, batch: 21137, sum loss: 4498.877930, avg loss: 2.506339, ppl: 12.259962 +epoch: 1, batch: 21138, sum loss: 4083.164062, avg loss: 2.569644, ppl: 13.061171 +epoch: 1, batch: 21139, sum loss: 4533.225098, avg loss: 2.504544, ppl: 12.237984 +epoch: 1, batch: 21140, sum loss: 5537.521973, avg loss: 2.860290, ppl: 17.466593 +epoch: 1, batch: 21141, sum loss: 4748.434570, avg loss: 2.794841, ppl: 16.360027 +epoch: 1, batch: 21142, sum loss: 4773.770996, avg loss: 2.769009, ppl: 15.942825 +epoch: 1, batch: 21143, sum loss: 5219.980957, avg loss: 2.970962, ppl: 19.510685 +epoch: 1, batch: 21144, sum loss: 4741.713379, avg loss: 2.853017, ppl: 17.340012 +epoch: 1, batch: 21145, sum loss: 5017.240234, avg loss: 2.885130, ppl: 17.905890 +epoch: 1, batch: 21146, sum loss: 5505.265137, avg loss: 2.942419, ppl: 18.961655 +epoch: 1, batch: 21147, sum loss: 4077.665039, avg loss: 2.642686, ppl: 14.050899 +epoch: 1, batch: 21148, sum loss: 4362.258789, avg loss: 2.692752, ppl: 14.772279 +epoch: 1, batch: 21149, sum loss: 4066.326904, avg loss: 2.628524, ppl: 13.853312 +epoch: 1, batch: 21150, sum loss: 4477.318359, avg loss: 2.959232, ppl: 19.283163 +epoch: 1, batch: 21151, sum loss: 4982.885742, avg loss: 2.985552, ppl: 19.797424 +epoch: 1, batch: 21152, sum loss: 4446.446289, avg loss: 2.513537, ppl: 12.348526 +epoch: 1, batch: 21153, sum loss: 3757.338867, avg loss: 2.542178, ppl: 12.707316 +epoch: 1, batch: 21154, sum loss: 3901.920410, avg loss: 2.572129, ppl: 13.093678 +epoch: 1, batch: 21155, sum loss: 6165.934570, avg loss: 3.069156, ppl: 21.523733 +epoch: 1, batch: 21156, sum loss: 5205.484375, avg loss: 2.916238, ppl: 18.471663 +epoch: 1, batch: 21157, sum loss: 4120.359375, avg loss: 2.649749, ppl: 14.150487 +epoch: 1, batch: 21158, sum loss: 5135.453125, avg loss: 2.680299, ppl: 14.589459 +epoch: 1, batch: 21159, sum loss: 4024.198730, avg loss: 2.490222, ppl: 12.063953 +epoch: 1, batch: 21160, sum loss: 4413.943848, avg loss: 2.578238, ppl: 13.173908 +epoch: 1, batch: 21161, sum loss: 4435.896484, avg loss: 2.698234, ppl: 14.853475 +epoch: 1, batch: 21162, sum loss: 3629.539795, avg loss: 2.465720, ppl: 11.771955 +epoch: 1, batch: 21163, sum loss: 4282.045410, avg loss: 2.679628, ppl: 14.579667 +epoch: 1, batch: 21164, sum loss: 4554.969727, avg loss: 2.602840, ppl: 13.502026 +epoch: 1, batch: 21165, sum loss: 4209.516602, avg loss: 2.577781, ppl: 13.167886 +epoch: 1, batch: 21166, sum loss: 3714.750488, avg loss: 2.558368, ppl: 12.914726 +epoch: 1, batch: 21167, sum loss: 4336.168457, avg loss: 2.671700, ppl: 14.464531 +epoch: 1, batch: 21168, sum loss: 5021.122070, avg loss: 2.816109, ppl: 16.711699 +epoch: 1, batch: 21169, sum loss: 5308.316406, avg loss: 2.982200, ppl: 19.731180 +epoch: 1, batch: 21170, sum loss: 4553.214844, avg loss: 2.786545, ppl: 16.224871 +epoch: 1, batch: 21171, sum loss: 4591.366699, avg loss: 2.782646, ppl: 16.161736 +epoch: 1, batch: 21172, sum loss: 5382.724609, avg loss: 2.908009, ppl: 18.320288 +epoch: 1, batch: 21173, sum loss: 4799.917480, avg loss: 2.493464, ppl: 12.103123 +epoch: 1, batch: 21174, sum loss: 4628.274902, avg loss: 2.613368, ppl: 13.644930 +epoch: 1, batch: 21175, sum loss: 5170.131348, avg loss: 2.754465, ppl: 15.712638 +epoch: 1, batch: 21176, sum loss: 5195.690430, avg loss: 2.744686, ppl: 15.559726 +epoch: 1, batch: 21177, sum loss: 3968.785156, avg loss: 2.535965, ppl: 12.628612 +epoch: 1, batch: 21178, sum loss: 4967.116699, avg loss: 2.712789, ppl: 15.071255 +epoch: 1, batch: 21179, sum loss: 3777.500000, avg loss: 2.483564, ppl: 11.983895 +epoch: 1, batch: 21180, sum loss: 4752.916992, avg loss: 2.840955, ppl: 17.132111 +epoch: 1, batch: 21181, sum loss: 4996.167969, avg loss: 2.800543, ppl: 16.453575 +epoch: 1, batch: 21182, sum loss: 5319.883301, avg loss: 2.987020, ppl: 19.826521 +epoch: 1, batch: 21183, sum loss: 5649.528320, avg loss: 3.067062, ppl: 21.478708 +epoch: 1, batch: 21184, sum loss: 3446.472412, avg loss: 2.433949, ppl: 11.403832 +epoch: 1, batch: 21185, sum loss: 4546.989258, avg loss: 2.643598, ppl: 14.063719 +epoch: 1, batch: 21186, sum loss: 5568.979492, avg loss: 2.885482, ppl: 17.912197 +epoch: 1, batch: 21187, sum loss: 5136.181641, avg loss: 2.720435, ppl: 15.186930 +epoch: 1, batch: 21188, sum loss: 3707.789795, avg loss: 2.505263, ppl: 12.246783 +epoch: 1, batch: 21189, sum loss: 4309.566895, avg loss: 2.717255, ppl: 15.138715 +epoch: 1, batch: 21190, sum loss: 3842.333008, avg loss: 2.351489, ppl: 10.501195 +epoch: 1, batch: 21191, sum loss: 4096.609375, avg loss: 2.653244, ppl: 14.200036 +epoch: 1, batch: 21192, sum loss: 4222.520996, avg loss: 2.571572, ppl: 13.086381 +epoch: 1, batch: 21193, sum loss: 3859.569092, avg loss: 2.564498, ppl: 12.994133 +epoch: 1, batch: 21194, sum loss: 4112.984375, avg loss: 2.775293, ppl: 16.043333 +epoch: 1, batch: 21195, sum loss: 4231.187500, avg loss: 2.503661, ppl: 12.227178 +epoch: 1, batch: 21196, sum loss: 4813.085938, avg loss: 2.764553, ppl: 15.871937 +epoch: 1, batch: 21197, sum loss: 4536.629395, avg loss: 2.541529, ppl: 12.699076 +epoch: 1, batch: 21198, sum loss: 4360.503906, avg loss: 2.532232, ppl: 12.581560 +epoch: 1, batch: 21199, sum loss: 4812.161133, avg loss: 2.757685, ppl: 15.763315 +epoch: 1, batch: 21200, sum loss: 5130.720215, avg loss: 2.796033, ppl: 16.379538 +epoch: 1, batch: 21201, sum loss: 4357.673340, avg loss: 2.775588, ppl: 16.048065 +epoch: 1, batch: 21202, sum loss: 4061.002930, avg loss: 2.625083, ppl: 13.805717 +epoch: 1, batch: 21203, sum loss: 4401.165527, avg loss: 2.685275, ppl: 14.662230 +epoch: 1, batch: 21204, sum loss: 4267.213379, avg loss: 2.586190, ppl: 13.279081 +epoch: 1, batch: 21205, sum loss: 5277.627930, avg loss: 3.075541, ppl: 21.661592 +epoch: 1, batch: 21206, sum loss: 4231.260254, avg loss: 2.613502, ppl: 13.646762 +epoch: 1, batch: 21207, sum loss: 3602.091797, avg loss: 2.511919, ppl: 12.328567 +epoch: 1, batch: 21208, sum loss: 4266.440430, avg loss: 2.601488, ppl: 13.483788 +epoch: 1, batch: 21209, sum loss: 4492.800781, avg loss: 2.795769, ppl: 16.375216 +epoch: 1, batch: 21210, sum loss: 5095.092773, avg loss: 2.615551, ppl: 13.674746 +epoch: 1, batch: 21211, sum loss: 4459.333008, avg loss: 2.729090, ppl: 15.318940 +epoch: 1, batch: 21212, sum loss: 4322.404785, avg loss: 2.542591, ppl: 12.712567 +epoch: 1, batch: 21213, sum loss: 3579.960449, avg loss: 2.522876, ppl: 12.464390 +epoch: 1, batch: 21214, sum loss: 4401.600098, avg loss: 2.737314, ppl: 15.445436 +epoch: 1, batch: 21215, sum loss: 4184.770508, avg loss: 2.511867, ppl: 12.327926 +epoch: 1, batch: 21216, sum loss: 4216.782715, avg loss: 2.571209, ppl: 13.081630 +epoch: 1, batch: 21217, sum loss: 4841.728516, avg loss: 2.813323, ppl: 16.665201 +epoch: 1, batch: 21218, sum loss: 5118.820312, avg loss: 2.800230, ppl: 16.448429 +epoch: 1, batch: 21219, sum loss: 5392.048828, avg loss: 2.949698, ppl: 19.100193 +epoch: 1, batch: 21220, sum loss: 4367.411621, avg loss: 2.629387, ppl: 13.865267 +epoch: 1, batch: 21221, sum loss: 5086.043945, avg loss: 2.727101, ppl: 15.288507 +epoch: 1, batch: 21222, sum loss: 4967.474609, avg loss: 2.611711, ppl: 13.622342 +epoch: 1, batch: 21223, sum loss: 4613.280762, avg loss: 2.811262, ppl: 16.630892 +epoch: 1, batch: 21224, sum loss: 4743.816406, avg loss: 2.788840, ppl: 16.262138 +epoch: 1, batch: 21225, sum loss: 5030.054688, avg loss: 2.938116, ppl: 18.880243 +epoch: 1, batch: 21226, sum loss: 4561.434570, avg loss: 2.647379, ppl: 14.116995 +epoch: 1, batch: 21227, sum loss: 5482.055664, avg loss: 2.871690, ppl: 17.666847 +epoch: 1, batch: 21228, sum loss: 5697.967773, avg loss: 2.857557, ppl: 17.418913 +epoch: 1, batch: 21229, sum loss: 4370.807129, avg loss: 2.568042, ppl: 13.040264 +epoch: 1, batch: 21230, sum loss: 6416.806152, avg loss: 3.049813, ppl: 21.111391 +epoch: 1, batch: 21231, sum loss: 4939.378418, avg loss: 2.858437, ppl: 17.434248 +epoch: 1, batch: 21232, sum loss: 5169.099121, avg loss: 2.824644, ppl: 16.854950 +epoch: 1, batch: 21233, sum loss: 4810.025391, avg loss: 2.587426, ppl: 13.295507 +epoch: 1, batch: 21234, sum loss: 4365.278320, avg loss: 2.666633, ppl: 14.391434 +epoch: 1, batch: 21235, sum loss: 4562.573242, avg loss: 2.712588, ppl: 15.068223 +epoch: 1, batch: 21236, sum loss: 4713.696777, avg loss: 2.587100, ppl: 13.291175 +epoch: 1, batch: 21237, sum loss: 3992.928223, avg loss: 2.448147, ppl: 11.566896 +epoch: 1, batch: 21238, sum loss: 5154.781250, avg loss: 2.804560, ppl: 16.519804 +epoch: 1, batch: 21239, sum loss: 4395.013672, avg loss: 2.668496, ppl: 14.418273 +epoch: 1, batch: 21240, sum loss: 4317.943848, avg loss: 2.528070, ppl: 12.529301 +epoch: 1, batch: 21241, sum loss: 4163.524414, avg loss: 2.594096, ppl: 13.384485 +epoch: 1, batch: 21242, sum loss: 5624.568359, avg loss: 2.879963, ppl: 17.813622 +epoch: 1, batch: 21243, sum loss: 4127.952637, avg loss: 2.497249, ppl: 12.149025 +epoch: 1, batch: 21244, sum loss: 3758.485596, avg loss: 2.615509, ppl: 13.674169 +epoch: 1, batch: 21245, sum loss: 4414.899902, avg loss: 2.716861, ppl: 15.132753 +epoch: 1, batch: 21246, sum loss: 5787.085938, avg loss: 3.003158, ppl: 20.149075 +epoch: 1, batch: 21247, sum loss: 4119.122559, avg loss: 2.813608, ppl: 16.669958 +epoch: 1, batch: 21248, sum loss: 5153.994141, avg loss: 3.003493, ppl: 20.155819 +epoch: 1, batch: 21249, sum loss: 4163.113281, avg loss: 2.742499, ppl: 15.525734 +epoch: 1, batch: 21250, sum loss: 4262.929199, avg loss: 2.729148, ppl: 15.319831 +epoch: 1, batch: 21251, sum loss: 4173.202637, avg loss: 2.616428, ppl: 13.686749 +epoch: 1, batch: 21252, sum loss: 4769.217773, avg loss: 2.439498, ppl: 11.467279 +epoch: 1, batch: 21253, sum loss: 3527.597656, avg loss: 2.465128, ppl: 11.764984 +epoch: 1, batch: 21254, sum loss: 4132.999023, avg loss: 2.676813, ppl: 14.538687 +epoch: 1, batch: 21255, sum loss: 5609.069824, avg loss: 2.828578, ppl: 16.921377 +epoch: 1, batch: 21256, sum loss: 4689.294922, avg loss: 2.731098, ppl: 15.349730 +epoch: 1, batch: 21257, sum loss: 4051.617920, avg loss: 2.721033, ppl: 15.196010 +epoch: 1, batch: 21258, sum loss: 4268.160156, avg loss: 2.571181, ppl: 13.081262 +epoch: 1, batch: 21259, sum loss: 3920.718018, avg loss: 2.415723, ppl: 11.197859 +epoch: 1, batch: 21260, sum loss: 5220.365234, avg loss: 2.844886, ppl: 17.199594 +epoch: 1, batch: 21261, sum loss: 5073.502930, avg loss: 2.956587, ppl: 19.232218 +epoch: 1, batch: 21262, sum loss: 4317.094238, avg loss: 2.532020, ppl: 12.578891 +epoch: 1, batch: 21263, sum loss: 4771.624512, avg loss: 2.677680, ppl: 14.551289 +epoch: 1, batch: 21264, sum loss: 4676.159180, avg loss: 2.900843, ppl: 18.189476 +epoch: 1, batch: 21265, sum loss: 3823.284912, avg loss: 2.606193, ppl: 13.547376 +epoch: 1, batch: 21266, sum loss: 4711.143066, avg loss: 2.664674, ppl: 14.363263 +epoch: 1, batch: 21267, sum loss: 4344.125977, avg loss: 2.642412, ppl: 14.047050 +epoch: 1, batch: 21268, sum loss: 4321.699219, avg loss: 2.644859, ppl: 14.081457 +epoch: 1, batch: 21269, sum loss: 3904.623291, avg loss: 2.528901, ppl: 12.539721 +epoch: 1, batch: 21270, sum loss: 4048.719238, avg loss: 2.646222, ppl: 14.100664 +epoch: 1, batch: 21271, sum loss: 3872.355957, avg loss: 2.621771, ppl: 13.760076 +epoch: 1, batch: 21272, sum loss: 4355.419434, avg loss: 2.612729, ppl: 13.636214 +epoch: 1, batch: 21273, sum loss: 5705.821289, avg loss: 2.868688, ppl: 17.613899 +epoch: 1, batch: 21274, sum loss: 4299.223145, avg loss: 2.637560, ppl: 13.979054 +epoch: 1, batch: 21275, sum loss: 4978.583008, avg loss: 2.609320, ppl: 13.589808 +epoch: 1, batch: 21276, sum loss: 5536.735352, avg loss: 2.842267, ppl: 17.154608 +epoch: 1, batch: 21277, sum loss: 3909.267090, avg loss: 2.449415, ppl: 11.581574 +epoch: 1, batch: 21278, sum loss: 4454.205566, avg loss: 2.838882, ppl: 17.096636 +epoch: 1, batch: 21279, sum loss: 4441.093750, avg loss: 2.753313, ppl: 15.694543 +epoch: 1, batch: 21280, sum loss: 4134.208496, avg loss: 2.626562, ppl: 13.826152 +epoch: 1, batch: 21281, sum loss: 4412.823242, avg loss: 2.655128, ppl: 14.226813 +epoch: 1, batch: 21282, sum loss: 4826.997070, avg loss: 2.719435, ppl: 15.171743 +epoch: 1, batch: 21283, sum loss: 3725.172363, avg loss: 2.523829, ppl: 12.476283 +epoch: 1, batch: 21284, sum loss: 3991.324707, avg loss: 2.479084, ppl: 11.930326 +epoch: 1, batch: 21285, sum loss: 4865.742188, avg loss: 2.775666, ppl: 16.049313 +epoch: 1, batch: 21286, sum loss: 3982.036621, avg loss: 2.471780, ppl: 11.843505 +epoch: 1, batch: 21287, sum loss: 3905.409912, avg loss: 2.793569, ppl: 16.339235 +epoch: 1, batch: 21288, sum loss: 4317.527832, avg loss: 2.547214, ppl: 12.771473 +epoch: 1, batch: 21289, sum loss: 4683.688965, avg loss: 2.514057, ppl: 12.354958 +epoch: 1, batch: 21290, sum loss: 3963.016357, avg loss: 2.703285, ppl: 14.928699 +epoch: 1, batch: 21291, sum loss: 4193.583008, avg loss: 2.757123, ppl: 15.754444 +epoch: 1, batch: 21292, sum loss: 4387.160156, avg loss: 2.642868, ppl: 14.053445 +epoch: 1, batch: 21293, sum loss: 4847.548828, avg loss: 2.659105, ppl: 14.283504 +epoch: 1, batch: 21294, sum loss: 4910.980469, avg loss: 2.676284, ppl: 14.530993 +epoch: 1, batch: 21295, sum loss: 5296.901367, avg loss: 2.736003, ppl: 15.425202 +epoch: 1, batch: 21296, sum loss: 4526.062012, avg loss: 2.473258, ppl: 11.861028 +epoch: 1, batch: 21297, sum loss: 4660.486816, avg loss: 2.775752, ppl: 16.050686 +epoch: 1, batch: 21298, sum loss: 3867.491943, avg loss: 2.457110, ppl: 11.671039 +epoch: 1, batch: 21299, sum loss: 5399.887207, avg loss: 2.839058, ppl: 17.099646 +epoch: 1, batch: 21300, sum loss: 4662.707520, avg loss: 2.560520, ppl: 12.942551 +epoch: 1, batch: 21301, sum loss: 6127.833984, avg loss: 3.099562, ppl: 22.188229 +epoch: 1, batch: 21302, sum loss: 4703.367676, avg loss: 2.976815, ppl: 19.625210 +epoch: 1, batch: 21303, sum loss: 4156.356934, avg loss: 2.509877, ppl: 12.303422 +epoch: 1, batch: 21304, sum loss: 3908.339844, avg loss: 2.557814, ppl: 12.907572 +epoch: 1, batch: 21305, sum loss: 3447.953613, avg loss: 2.159019, ppl: 8.662638 +epoch: 1, batch: 21306, sum loss: 3450.756348, avg loss: 2.339496, ppl: 10.376004 +epoch: 1, batch: 21307, sum loss: 4239.515137, avg loss: 2.628342, ppl: 13.850781 +epoch: 1, batch: 21308, sum loss: 4799.437012, avg loss: 2.753550, ppl: 15.698256 +epoch: 1, batch: 21309, sum loss: 4981.451660, avg loss: 2.716168, ppl: 15.122258 +epoch: 1, batch: 21310, sum loss: 4942.329590, avg loss: 2.690435, ppl: 14.738091 +epoch: 1, batch: 21311, sum loss: 5087.093750, avg loss: 2.819897, ppl: 16.775118 +epoch: 1, batch: 21312, sum loss: 4986.761719, avg loss: 2.856107, ppl: 17.393673 +epoch: 1, batch: 21313, sum loss: 3848.214355, avg loss: 2.500464, ppl: 12.188150 +epoch: 1, batch: 21314, sum loss: 3378.258789, avg loss: 2.296573, ppl: 9.940059 +epoch: 1, batch: 21315, sum loss: 5239.567871, avg loss: 2.855350, ppl: 17.380527 +epoch: 1, batch: 21316, sum loss: 5471.834473, avg loss: 2.842511, ppl: 17.158804 +epoch: 1, batch: 21317, sum loss: 3750.416992, avg loss: 2.355790, ppl: 10.546453 +epoch: 1, batch: 21318, sum loss: 4234.121094, avg loss: 2.444643, ppl: 11.526428 +epoch: 1, batch: 21319, sum loss: 4241.104492, avg loss: 2.630958, ppl: 13.887069 +epoch: 1, batch: 21320, sum loss: 4681.164062, avg loss: 2.671897, ppl: 14.467394 +epoch: 1, batch: 21321, sum loss: 4362.168457, avg loss: 2.757376, ppl: 15.758434 +epoch: 1, batch: 21322, sum loss: 4644.123047, avg loss: 2.842181, ppl: 17.153131 +epoch: 1, batch: 21323, sum loss: 4074.514404, avg loss: 2.613544, ppl: 13.647328 +epoch: 1, batch: 21324, sum loss: 4273.245117, avg loss: 2.657491, ppl: 14.260461 +epoch: 1, batch: 21325, sum loss: 4167.392090, avg loss: 2.850473, ppl: 17.295967 +epoch: 1, batch: 21326, sum loss: 4092.745117, avg loss: 2.611835, ppl: 13.624028 +epoch: 1, batch: 21327, sum loss: 5038.645020, avg loss: 2.729494, ppl: 15.325125 +epoch: 1, batch: 21328, sum loss: 4531.354492, avg loss: 2.853498, ppl: 17.348360 +epoch: 1, batch: 21329, sum loss: 4380.567383, avg loss: 2.685817, ppl: 14.670182 +epoch: 1, batch: 21330, sum loss: 5179.050293, avg loss: 2.697422, ppl: 14.841425 +epoch: 1, batch: 21331, sum loss: 4766.866699, avg loss: 2.842496, ppl: 17.158546 +epoch: 1, batch: 21332, sum loss: 4383.298340, avg loss: 2.853710, ppl: 17.352037 +epoch: 1, batch: 21333, sum loss: 4180.066895, avg loss: 2.518113, ppl: 12.405162 +epoch: 1, batch: 21334, sum loss: 4640.714844, avg loss: 2.744361, ppl: 15.554678 +epoch: 1, batch: 21335, sum loss: 4247.084473, avg loss: 2.835170, ppl: 17.033293 +epoch: 1, batch: 21336, sum loss: 4295.232910, avg loss: 2.651378, ppl: 14.173562 +epoch: 1, batch: 21337, sum loss: 5038.449707, avg loss: 2.844974, ppl: 17.201117 +epoch: 1, batch: 21338, sum loss: 3645.346924, avg loss: 2.446542, ppl: 11.548338 +epoch: 1, batch: 21339, sum loss: 4592.892090, avg loss: 2.703292, ppl: 14.928791 +epoch: 1, batch: 21340, sum loss: 4011.265137, avg loss: 2.577934, ppl: 13.169898 +epoch: 1, batch: 21341, sum loss: 4609.991699, avg loss: 2.777103, ppl: 16.072399 +epoch: 1, batch: 21342, sum loss: 4349.292969, avg loss: 2.637534, ppl: 13.978685 +epoch: 1, batch: 21343, sum loss: 5520.083984, avg loss: 3.104659, ppl: 22.301620 +epoch: 1, batch: 21344, sum loss: 5082.609863, avg loss: 2.857004, ppl: 17.409285 +epoch: 1, batch: 21345, sum loss: 4430.682617, avg loss: 2.620155, ppl: 13.737858 +epoch: 1, batch: 21346, sum loss: 4319.709961, avg loss: 2.635576, ppl: 13.951353 +epoch: 1, batch: 21347, sum loss: 4680.547363, avg loss: 2.871501, ppl: 17.663519 +epoch: 1, batch: 21348, sum loss: 4004.471191, avg loss: 2.516952, ppl: 12.390776 +epoch: 1, batch: 21349, sum loss: 4744.410645, avg loss: 2.621222, ppl: 13.752513 +epoch: 1, batch: 21350, sum loss: 4444.286133, avg loss: 2.800432, ppl: 16.451759 +epoch: 1, batch: 21351, sum loss: 5005.894531, avg loss: 2.986811, ppl: 19.822361 +epoch: 1, batch: 21352, sum loss: 4109.150391, avg loss: 2.504053, ppl: 12.231965 +epoch: 1, batch: 21353, sum loss: 3542.995361, avg loss: 2.612828, ppl: 13.637570 +epoch: 1, batch: 21354, sum loss: 4717.826172, avg loss: 2.676022, ppl: 14.527183 +epoch: 1, batch: 21355, sum loss: 4277.430176, avg loss: 2.510229, ppl: 12.307750 +epoch: 1, batch: 21356, sum loss: 4488.162598, avg loss: 2.838812, ppl: 17.095449 +epoch: 1, batch: 21357, sum loss: 3972.146729, avg loss: 2.570969, ppl: 13.078489 +epoch: 1, batch: 21358, sum loss: 3709.916748, avg loss: 2.421617, ppl: 11.264056 +epoch: 1, batch: 21359, sum loss: 3984.700195, avg loss: 2.585788, ppl: 13.273751 +epoch: 1, batch: 21360, sum loss: 4225.066406, avg loss: 2.748905, ppl: 15.625508 +epoch: 1, batch: 21361, sum loss: 4693.792969, avg loss: 2.751344, ppl: 15.663670 +epoch: 1, batch: 21362, sum loss: 4416.367676, avg loss: 2.721114, ppl: 15.197238 +epoch: 1, batch: 21363, sum loss: 5421.770508, avg loss: 3.101700, ppl: 22.235710 +epoch: 1, batch: 21364, sum loss: 5650.817383, avg loss: 2.747116, ppl: 15.597581 +epoch: 1, batch: 21365, sum loss: 3643.409180, avg loss: 2.483578, ppl: 11.984066 +epoch: 1, batch: 21366, sum loss: 4542.844238, avg loss: 2.669121, ppl: 14.427282 +epoch: 1, batch: 21367, sum loss: 4199.399902, avg loss: 2.318830, ppl: 10.163771 +epoch: 1, batch: 21368, sum loss: 5429.804199, avg loss: 2.883592, ppl: 17.878380 +epoch: 1, batch: 21369, sum loss: 4806.537109, avg loss: 2.616515, ppl: 13.687934 +epoch: 1, batch: 21370, sum loss: 5699.721680, avg loss: 2.814677, ppl: 16.687792 +epoch: 1, batch: 21371, sum loss: 4027.311035, avg loss: 2.623655, ppl: 13.786024 +epoch: 1, batch: 21372, sum loss: 4672.922363, avg loss: 2.702673, ppl: 14.919565 +epoch: 1, batch: 21373, sum loss: 4236.236328, avg loss: 2.674392, ppl: 14.503522 +epoch: 1, batch: 21374, sum loss: 4061.516602, avg loss: 2.744268, ppl: 15.553224 +epoch: 1, batch: 21375, sum loss: 3803.940918, avg loss: 2.507542, ppl: 12.274717 +epoch: 1, batch: 21376, sum loss: 4955.795898, avg loss: 2.583835, ppl: 13.247848 +epoch: 1, batch: 21377, sum loss: 5081.179688, avg loss: 2.736230, ppl: 15.428711 +epoch: 1, batch: 21378, sum loss: 3646.092773, avg loss: 2.463576, ppl: 11.746747 +epoch: 1, batch: 21379, sum loss: 4126.123535, avg loss: 2.537591, ppl: 12.649165 +epoch: 1, batch: 21380, sum loss: 4307.978516, avg loss: 2.626816, ppl: 13.829670 +epoch: 1, batch: 21381, sum loss: 4099.187012, avg loss: 2.579727, ppl: 13.193542 +epoch: 1, batch: 21382, sum loss: 4927.614746, avg loss: 2.779253, ppl: 16.106981 +epoch: 1, batch: 21383, sum loss: 4919.720215, avg loss: 2.887160, ppl: 17.942278 +epoch: 1, batch: 21384, sum loss: 4595.946289, avg loss: 2.543412, ppl: 12.723011 +epoch: 1, batch: 21385, sum loss: 3945.155029, avg loss: 2.790067, ppl: 16.282114 +epoch: 1, batch: 21386, sum loss: 3816.993896, avg loss: 2.340278, ppl: 10.384127 +epoch: 1, batch: 21387, sum loss: 4627.781250, avg loss: 2.642936, ppl: 14.054410 +epoch: 1, batch: 21388, sum loss: 4884.971680, avg loss: 2.724468, ppl: 15.248307 +epoch: 1, batch: 21389, sum loss: 4206.480957, avg loss: 2.729709, ppl: 15.328421 +epoch: 1, batch: 21390, sum loss: 4276.461426, avg loss: 2.771524, ppl: 15.982973 +epoch: 1, batch: 21391, sum loss: 5309.155273, avg loss: 3.126711, ppl: 22.798874 +epoch: 1, batch: 21392, sum loss: 4104.143555, avg loss: 2.620781, ppl: 13.746458 +epoch: 1, batch: 21393, sum loss: 4181.568848, avg loss: 2.673637, ppl: 14.492589 +epoch: 1, batch: 21394, sum loss: 3810.817627, avg loss: 2.407339, ppl: 11.104371 +epoch: 1, batch: 21395, sum loss: 4798.464844, avg loss: 2.659903, ppl: 14.294900 +epoch: 1, batch: 21396, sum loss: 5310.505371, avg loss: 2.983430, ppl: 19.755461 +epoch: 1, batch: 21397, sum loss: 4753.738770, avg loss: 2.855098, ppl: 17.376148 +epoch: 1, batch: 21398, sum loss: 3936.623291, avg loss: 2.578011, ppl: 13.170918 +epoch: 1, batch: 21399, sum loss: 4313.053711, avg loss: 2.779030, ppl: 16.103386 +epoch: 1, batch: 21400, sum loss: 3948.177002, avg loss: 2.466069, ppl: 11.776070 +epoch: 1, batch: 21401, sum loss: 5043.509277, avg loss: 2.747009, ppl: 15.595919 +epoch: 1, batch: 21402, sum loss: 5733.072754, avg loss: 2.835347, ppl: 17.036318 +epoch: 1, batch: 21403, sum loss: 5081.976074, avg loss: 2.799987, ppl: 16.444429 +epoch: 1, batch: 21404, sum loss: 4661.292969, avg loss: 2.588169, ppl: 13.305391 +epoch: 1, batch: 21405, sum loss: 3741.177979, avg loss: 2.546751, ppl: 12.765568 +epoch: 1, batch: 21406, sum loss: 4302.941406, avg loss: 2.864808, ppl: 17.545681 +epoch: 1, batch: 21407, sum loss: 4261.945312, avg loss: 2.582997, ppl: 13.236751 +epoch: 1, batch: 21408, sum loss: 4438.170898, avg loss: 2.691432, ppl: 14.752783 +epoch: 1, batch: 21409, sum loss: 4745.350586, avg loss: 2.655484, ppl: 14.231879 +epoch: 1, batch: 21410, sum loss: 4118.533203, avg loss: 2.590272, ppl: 13.333404 +epoch: 1, batch: 21411, sum loss: 5545.681641, avg loss: 3.072400, ppl: 21.593662 +epoch: 1, batch: 21412, sum loss: 3391.328613, avg loss: 2.432804, ppl: 11.390776 +epoch: 1, batch: 21413, sum loss: 4521.753906, avg loss: 2.622827, ppl: 13.774613 +epoch: 1, batch: 21414, sum loss: 3975.022705, avg loss: 2.381679, ppl: 10.823063 +epoch: 1, batch: 21415, sum loss: 4918.859863, avg loss: 2.881582, ppl: 17.842474 +epoch: 1, batch: 21416, sum loss: 4243.051758, avg loss: 2.718163, ppl: 15.152454 +epoch: 1, batch: 21417, sum loss: 4465.389160, avg loss: 2.480772, ppl: 11.950484 +epoch: 1, batch: 21418, sum loss: 5932.268555, avg loss: 3.048442, ppl: 21.082479 +epoch: 1, batch: 21419, sum loss: 3859.289795, avg loss: 2.522412, ppl: 12.458609 +epoch: 1, batch: 21420, sum loss: 4067.944092, avg loss: 2.658787, ppl: 14.278958 +epoch: 1, batch: 21421, sum loss: 4567.918945, avg loss: 2.702911, ppl: 14.923104 +epoch: 1, batch: 21422, sum loss: 4219.704102, avg loss: 2.624194, ppl: 13.793454 +epoch: 1, batch: 21423, sum loss: 4616.755371, avg loss: 2.712547, ppl: 15.067605 +epoch: 1, batch: 21424, sum loss: 5242.004395, avg loss: 2.839656, ppl: 17.109873 +epoch: 1, batch: 21425, sum loss: 4806.945312, avg loss: 2.812724, ppl: 16.655228 +epoch: 1, batch: 21426, sum loss: 5442.843750, avg loss: 2.845187, ppl: 17.204782 +epoch: 1, batch: 21427, sum loss: 5359.251953, avg loss: 2.723197, ppl: 15.228938 +epoch: 1, batch: 21428, sum loss: 4797.034180, avg loss: 2.750593, ppl: 15.651910 +epoch: 1, batch: 21429, sum loss: 4171.833496, avg loss: 2.707225, ppl: 14.987625 +epoch: 1, batch: 21430, sum loss: 4198.643066, avg loss: 2.693164, ppl: 14.778366 +epoch: 1, batch: 21431, sum loss: 4315.937500, avg loss: 2.647815, ppl: 14.123139 +epoch: 1, batch: 21432, sum loss: 4216.998047, avg loss: 2.486438, ppl: 12.018385 +epoch: 1, batch: 21433, sum loss: 4735.637695, avg loss: 2.603429, ppl: 13.509989 +epoch: 1, batch: 21434, sum loss: 4787.817871, avg loss: 2.651062, ppl: 14.169079 +epoch: 1, batch: 21435, sum loss: 4691.446289, avg loss: 2.780940, ppl: 16.134184 +epoch: 1, batch: 21436, sum loss: 5271.364746, avg loss: 2.759877, ppl: 15.797899 +epoch: 1, batch: 21437, sum loss: 5377.802734, avg loss: 2.892847, ppl: 18.044611 +epoch: 1, batch: 21438, sum loss: 5259.024414, avg loss: 2.822879, ppl: 16.825226 +epoch: 1, batch: 21439, sum loss: 4164.782715, avg loss: 2.422794, ppl: 11.277326 +epoch: 1, batch: 21440, sum loss: 4670.543457, avg loss: 2.825495, ppl: 16.869293 +epoch: 1, batch: 21441, sum loss: 4985.367188, avg loss: 2.900156, ppl: 18.176973 +epoch: 1, batch: 21442, sum loss: 4954.193848, avg loss: 2.654980, ppl: 14.224708 +epoch: 1, batch: 21443, sum loss: 3929.450195, avg loss: 2.181816, ppl: 8.862383 +epoch: 1, batch: 21444, sum loss: 4453.447266, avg loss: 2.487959, ppl: 12.036689 +epoch: 1, batch: 21445, sum loss: 4461.372070, avg loss: 2.750538, ppl: 15.651044 +epoch: 1, batch: 21446, sum loss: 5611.070312, avg loss: 2.901277, ppl: 18.197374 +epoch: 1, batch: 21447, sum loss: 4434.442871, avg loss: 2.558824, ppl: 12.920618 +epoch: 1, batch: 21448, sum loss: 4342.698242, avg loss: 2.609795, ppl: 13.596258 +epoch: 1, batch: 21449, sum loss: 3876.557617, avg loss: 2.527091, ppl: 12.517041 +epoch: 1, batch: 21450, sum loss: 4708.979980, avg loss: 2.678601, ppl: 14.564696 +epoch: 1, batch: 21451, sum loss: 4361.343262, avg loss: 2.710593, ppl: 15.038183 +epoch: 1, batch: 21452, sum loss: 4327.381348, avg loss: 2.756294, ppl: 15.741397 +epoch: 1, batch: 21453, sum loss: 4578.999023, avg loss: 2.835294, ppl: 17.035400 +epoch: 1, batch: 21454, sum loss: 4088.580566, avg loss: 2.624249, ppl: 13.794217 +epoch: 1, batch: 21455, sum loss: 4436.080078, avg loss: 2.685279, ppl: 14.662287 +epoch: 1, batch: 21456, sum loss: 4335.890625, avg loss: 2.699807, ppl: 14.876863 +epoch: 1, batch: 21457, sum loss: 4587.724609, avg loss: 2.714630, ppl: 15.099021 +epoch: 1, batch: 21458, sum loss: 3756.349121, avg loss: 2.463180, ppl: 11.742090 +epoch: 1, batch: 21459, sum loss: 3850.379883, avg loss: 2.382661, ppl: 10.833692 +epoch: 1, batch: 21460, sum loss: 4567.251465, avg loss: 2.779824, ppl: 16.116188 +epoch: 1, batch: 21461, sum loss: 5166.949219, avg loss: 2.993597, ppl: 19.957346 +epoch: 1, batch: 21462, sum loss: 5210.087891, avg loss: 2.922091, ppl: 18.580097 +epoch: 1, batch: 21463, sum loss: 3965.058105, avg loss: 2.622393, ppl: 13.768635 +epoch: 1, batch: 21464, sum loss: 4500.389160, avg loss: 2.717626, ppl: 15.144332 +epoch: 1, batch: 21465, sum loss: 3978.925781, avg loss: 2.570366, ppl: 13.070603 +epoch: 1, batch: 21466, sum loss: 4934.737305, avg loss: 2.937344, ppl: 18.865669 +epoch: 1, batch: 21467, sum loss: 4840.819824, avg loss: 2.713464, ppl: 15.081431 +epoch: 1, batch: 21468, sum loss: 3974.894531, avg loss: 2.375908, ppl: 10.760783 +epoch: 1, batch: 21469, sum loss: 4241.074707, avg loss: 2.513974, ppl: 12.353932 +epoch: 1, batch: 21470, sum loss: 4634.094727, avg loss: 2.764973, ppl: 15.878613 +epoch: 1, batch: 21471, sum loss: 3321.069580, avg loss: 2.295141, ppl: 9.925840 +epoch: 1, batch: 21472, sum loss: 5306.282227, avg loss: 2.804589, ppl: 16.520281 +epoch: 1, batch: 21473, sum loss: 4002.735840, avg loss: 2.597492, ppl: 13.430019 +epoch: 1, batch: 21474, sum loss: 4697.949219, avg loss: 2.673847, ppl: 14.495626 +epoch: 1, batch: 21475, sum loss: 4738.367188, avg loss: 2.578002, ppl: 13.170794 +epoch: 1, batch: 21476, sum loss: 5701.736816, avg loss: 2.792232, ppl: 16.317392 +epoch: 1, batch: 21477, sum loss: 5402.538086, avg loss: 2.901471, ppl: 18.200893 +epoch: 1, batch: 21478, sum loss: 4203.291992, avg loss: 2.618873, ppl: 13.720257 +epoch: 1, batch: 21479, sum loss: 5609.225098, avg loss: 2.986808, ppl: 19.822306 +epoch: 1, batch: 21480, sum loss: 5263.023926, avg loss: 2.700372, ppl: 14.885261 +epoch: 1, batch: 21481, sum loss: 5875.471680, avg loss: 3.100513, ppl: 22.209335 +epoch: 1, batch: 21482, sum loss: 5067.313965, avg loss: 2.746511, ppl: 15.588157 +epoch: 1, batch: 21483, sum loss: 4110.930664, avg loss: 2.827325, ppl: 16.900194 +epoch: 1, batch: 21484, sum loss: 4620.494629, avg loss: 2.709968, ppl: 15.028789 +epoch: 1, batch: 21485, sum loss: 5920.005859, avg loss: 2.703199, ppl: 14.927407 +epoch: 1, batch: 21486, sum loss: 4846.051758, avg loss: 2.728633, ppl: 15.311940 +epoch: 1, batch: 21487, sum loss: 4305.324707, avg loss: 2.667487, ppl: 14.403732 +epoch: 1, batch: 21488, sum loss: 5146.478516, avg loss: 2.680458, ppl: 14.591769 +epoch: 1, batch: 21489, sum loss: 5008.711914, avg loss: 2.654325, ppl: 14.215391 +epoch: 1, batch: 21490, sum loss: 3844.507812, avg loss: 2.370227, ppl: 10.699817 +epoch: 1, batch: 21491, sum loss: 4198.177734, avg loss: 2.484129, ppl: 11.990668 +epoch: 1, batch: 21492, sum loss: 4557.372559, avg loss: 2.717575, ppl: 15.143548 +epoch: 1, batch: 21493, sum loss: 3771.216064, avg loss: 2.539539, ppl: 12.673831 +epoch: 1, batch: 21494, sum loss: 3963.857422, avg loss: 2.537681, ppl: 12.650299 +epoch: 1, batch: 21495, sum loss: 5170.123047, avg loss: 2.702626, ppl: 14.918854 +epoch: 1, batch: 21496, sum loss: 4425.574219, avg loss: 2.677298, ppl: 14.545742 +epoch: 1, batch: 21497, sum loss: 4979.599121, avg loss: 2.730043, ppl: 15.333552 +epoch: 1, batch: 21498, sum loss: 4309.475098, avg loss: 2.534986, ppl: 12.616248 +epoch: 1, batch: 21499, sum loss: 4411.885254, avg loss: 2.482772, ppl: 11.974407 +epoch: 1, batch: 21500, sum loss: 5322.952637, avg loss: 2.957196, ppl: 19.243937 +epoch: 1, batch: 21501, sum loss: 4060.662842, avg loss: 2.542682, ppl: 12.713722 +epoch: 1, batch: 21502, sum loss: 4885.806152, avg loss: 2.870626, ppl: 17.648071 +epoch: 1, batch: 21503, sum loss: 4204.284668, avg loss: 2.455774, ppl: 11.655447 +epoch: 1, batch: 21504, sum loss: 5244.807617, avg loss: 2.856649, ppl: 17.403111 +epoch: 1, batch: 21505, sum loss: 5064.000977, avg loss: 2.887116, ppl: 17.941486 +epoch: 1, batch: 21506, sum loss: 2946.662598, avg loss: 2.235708, ppl: 9.353097 +epoch: 1, batch: 21507, sum loss: 4586.120117, avg loss: 2.559219, ppl: 12.925717 +epoch: 1, batch: 21508, sum loss: 4011.861816, avg loss: 2.646347, ppl: 14.102429 +epoch: 1, batch: 21509, sum loss: 4589.917480, avg loss: 2.391828, ppl: 10.933457 +epoch: 1, batch: 21510, sum loss: 4196.914551, avg loss: 2.477517, ppl: 11.911655 +epoch: 1, batch: 21511, sum loss: 4409.221680, avg loss: 2.640253, ppl: 14.016744 +epoch: 1, batch: 21512, sum loss: 4288.064453, avg loss: 2.663394, ppl: 14.344893 +epoch: 1, batch: 21513, sum loss: 4493.000488, avg loss: 2.534123, ppl: 12.605376 +epoch: 1, batch: 21514, sum loss: 3910.727051, avg loss: 2.610632, ppl: 13.607651 +epoch: 1, batch: 21515, sum loss: 4604.737305, avg loss: 2.745818, ppl: 15.577357 +epoch: 1, batch: 21516, sum loss: 4763.458984, avg loss: 2.677605, ppl: 14.550200 +epoch: 1, batch: 21517, sum loss: 4121.344238, avg loss: 2.716773, ppl: 15.131408 +epoch: 1, batch: 21518, sum loss: 3842.230957, avg loss: 2.536126, ppl: 12.630647 +epoch: 1, batch: 21519, sum loss: 4558.770020, avg loss: 2.575576, ppl: 13.138887 +epoch: 1, batch: 21520, sum loss: 5268.521973, avg loss: 2.590227, ppl: 13.332796 +epoch: 1, batch: 21521, sum loss: 4798.866699, avg loss: 2.842931, ppl: 17.165998 +epoch: 1, batch: 21522, sum loss: 4807.340820, avg loss: 2.669262, ppl: 14.429316 +epoch: 1, batch: 21523, sum loss: 5085.733398, avg loss: 2.766993, ppl: 15.910719 +epoch: 1, batch: 21524, sum loss: 4366.828613, avg loss: 2.819128, ppl: 16.762220 +epoch: 1, batch: 21525, sum loss: 4448.714844, avg loss: 2.489488, ppl: 12.055101 +epoch: 1, batch: 21526, sum loss: 4438.486816, avg loss: 2.497741, ppl: 12.155010 +epoch: 1, batch: 21527, sum loss: 4164.882324, avg loss: 2.432758, ppl: 11.390257 +epoch: 1, batch: 21528, sum loss: 4453.356445, avg loss: 2.695736, ppl: 14.816425 +epoch: 1, batch: 21529, sum loss: 4737.907715, avg loss: 2.633634, ppl: 13.924280 +epoch: 1, batch: 21530, sum loss: 4162.904297, avg loss: 2.485316, ppl: 12.004916 +epoch: 1, batch: 21531, sum loss: 5717.208496, avg loss: 2.747337, ppl: 15.601032 +epoch: 1, batch: 21532, sum loss: 5889.557129, avg loss: 2.891290, ppl: 18.016531 +epoch: 1, batch: 21533, sum loss: 4455.584961, avg loss: 2.548962, ppl: 12.793816 +epoch: 1, batch: 21534, sum loss: 3858.373047, avg loss: 2.445103, ppl: 11.531742 +epoch: 1, batch: 21535, sum loss: 4172.701660, avg loss: 2.577333, ppl: 13.161984 +epoch: 1, batch: 21536, sum loss: 5328.309082, avg loss: 2.794079, ppl: 16.347570 +epoch: 1, batch: 21537, sum loss: 5020.645020, avg loss: 2.795460, ppl: 16.370150 +epoch: 1, batch: 21538, sum loss: 5115.729492, avg loss: 2.670005, ppl: 14.440042 +epoch: 1, batch: 21539, sum loss: 5069.074219, avg loss: 2.995907, ppl: 20.003492 +epoch: 1, batch: 21540, sum loss: 4090.992676, avg loss: 2.711062, ppl: 15.045248 +epoch: 1, batch: 21541, sum loss: 3738.742188, avg loss: 2.445221, ppl: 11.533094 +epoch: 1, batch: 21542, sum loss: 3995.215820, avg loss: 2.412570, ppl: 11.162612 +epoch: 1, batch: 21543, sum loss: 4740.004883, avg loss: 2.562165, ppl: 12.963851 +epoch: 1, batch: 21544, sum loss: 5120.263184, avg loss: 2.772206, ppl: 15.993871 +epoch: 1, batch: 21545, sum loss: 5253.329102, avg loss: 2.741821, ppl: 15.515213 +epoch: 1, batch: 21546, sum loss: 4337.328125, avg loss: 2.478473, ppl: 11.923046 +epoch: 1, batch: 21547, sum loss: 5290.595703, avg loss: 3.134239, ppl: 22.971153 +epoch: 1, batch: 21548, sum loss: 5284.149414, avg loss: 2.660700, ppl: 14.306294 +epoch: 1, batch: 21549, sum loss: 4506.786621, avg loss: 2.600569, ppl: 13.471404 +epoch: 1, batch: 21550, sum loss: 4683.863281, avg loss: 2.796336, ppl: 16.384510 +epoch: 1, batch: 21551, sum loss: 4392.227051, avg loss: 2.484291, ppl: 11.992619 +epoch: 1, batch: 21552, sum loss: 4585.899902, avg loss: 2.739486, ppl: 15.479033 +epoch: 1, batch: 21553, sum loss: 4242.206543, avg loss: 2.695176, ppl: 14.808120 +epoch: 1, batch: 21554, sum loss: 3935.479004, avg loss: 2.543942, ppl: 12.729758 +epoch: 1, batch: 21555, sum loss: 5308.552246, avg loss: 2.841837, ppl: 17.147243 +epoch: 1, batch: 21556, sum loss: 3518.025879, avg loss: 2.412912, ppl: 11.166432 +epoch: 1, batch: 21557, sum loss: 4639.874512, avg loss: 2.571992, ppl: 13.091883 +epoch: 1, batch: 21558, sum loss: 4650.494141, avg loss: 2.769800, ppl: 15.955441 +epoch: 1, batch: 21559, sum loss: 3517.038086, avg loss: 2.336902, ppl: 10.349129 +epoch: 1, batch: 21560, sum loss: 4679.319336, avg loss: 2.517116, ppl: 12.392808 +epoch: 1, batch: 21561, sum loss: 4553.347168, avg loss: 2.752931, ppl: 15.688542 +epoch: 1, batch: 21562, sum loss: 4427.319824, avg loss: 2.852655, ppl: 17.333736 +epoch: 1, batch: 21563, sum loss: 4337.451660, avg loss: 2.689059, ppl: 14.717816 +epoch: 1, batch: 21564, sum loss: 4495.326660, avg loss: 2.813096, ppl: 16.661415 +epoch: 1, batch: 21565, sum loss: 3484.698975, avg loss: 2.391695, ppl: 10.932003 +epoch: 1, batch: 21566, sum loss: 4849.542969, avg loss: 2.710756, ppl: 15.040647 +epoch: 1, batch: 21567, sum loss: 4596.956055, avg loss: 2.649542, ppl: 14.147562 +epoch: 1, batch: 21568, sum loss: 4009.134033, avg loss: 2.473247, ppl: 11.860900 +epoch: 1, batch: 21569, sum loss: 4212.346680, avg loss: 2.436291, ppl: 11.430563 +epoch: 1, batch: 21570, sum loss: 4459.380859, avg loss: 2.638687, ppl: 13.994815 +epoch: 1, batch: 21571, sum loss: 4765.276367, avg loss: 2.702936, ppl: 14.923486 +epoch: 1, batch: 21572, sum loss: 4412.532715, avg loss: 2.928024, ppl: 18.690666 +epoch: 1, batch: 21573, sum loss: 4000.248535, avg loss: 2.624835, ppl: 13.802297 +epoch: 1, batch: 21574, sum loss: 5670.695312, avg loss: 3.034080, ppl: 20.781845 +epoch: 1, batch: 21575, sum loss: 5109.642090, avg loss: 2.715006, ppl: 15.104706 +epoch: 1, batch: 21576, sum loss: 4479.326660, avg loss: 2.746368, ppl: 15.585927 +epoch: 1, batch: 21577, sum loss: 3672.347412, avg loss: 2.471297, ppl: 11.837790 +epoch: 1, batch: 21578, sum loss: 4713.870605, avg loss: 2.542541, ppl: 12.711928 +epoch: 1, batch: 21579, sum loss: 4617.154297, avg loss: 2.593907, ppl: 13.381951 +epoch: 1, batch: 21580, sum loss: 4263.398926, avg loss: 2.524215, ppl: 12.481093 +epoch: 1, batch: 21581, sum loss: 4302.106934, avg loss: 2.449947, ppl: 11.587734 +epoch: 1, batch: 21582, sum loss: 4123.607910, avg loss: 2.686389, ppl: 14.678582 +epoch: 1, batch: 21583, sum loss: 4883.534180, avg loss: 2.876051, ppl: 17.744062 +epoch: 1, batch: 21584, sum loss: 5260.226562, avg loss: 2.732585, ppl: 15.372577 +epoch: 1, batch: 21585, sum loss: 4018.991943, avg loss: 2.444642, ppl: 11.526426 +epoch: 1, batch: 21586, sum loss: 4561.085449, avg loss: 2.556662, ppl: 12.892714 +epoch: 1, batch: 21587, sum loss: 5156.563965, avg loss: 2.923222, ppl: 18.601130 +epoch: 1, batch: 21588, sum loss: 5865.457520, avg loss: 3.015660, ppl: 20.402544 +epoch: 1, batch: 21589, sum loss: 4796.962402, avg loss: 2.648792, ppl: 14.136954 +epoch: 1, batch: 21590, sum loss: 4438.833496, avg loss: 2.719874, ppl: 15.178405 +epoch: 1, batch: 21591, sum loss: 5079.279785, avg loss: 2.839173, ppl: 17.101610 +epoch: 1, batch: 21592, sum loss: 4972.460938, avg loss: 2.700957, ppl: 14.893972 +epoch: 1, batch: 21593, sum loss: 4772.881836, avg loss: 2.915627, ppl: 18.460392 +epoch: 1, batch: 21594, sum loss: 5030.551270, avg loss: 2.647659, ppl: 14.120940 +epoch: 1, batch: 21595, sum loss: 4207.109375, avg loss: 2.749745, ppl: 15.638638 +epoch: 1, batch: 21596, sum loss: 3961.661133, avg loss: 2.265101, ppl: 9.632094 +epoch: 1, batch: 21597, sum loss: 4474.606445, avg loss: 2.647696, ppl: 14.121466 +epoch: 1, batch: 21598, sum loss: 3639.166504, avg loss: 2.460559, ppl: 11.711352 +epoch: 1, batch: 21599, sum loss: 5527.874512, avg loss: 2.820344, ppl: 16.782623 +epoch: 1, batch: 21600, sum loss: 4767.333008, avg loss: 2.701039, ppl: 14.895194 +epoch: 1, batch: 21601, sum loss: 4887.335938, avg loss: 2.834882, ppl: 17.028387 +epoch: 1, batch: 21602, sum loss: 4014.007568, avg loss: 2.822790, ppl: 16.823727 +epoch: 1, batch: 21603, sum loss: 4560.978027, avg loss: 2.506032, ppl: 12.256201 +epoch: 1, batch: 21604, sum loss: 5338.918457, avg loss: 2.989316, ppl: 19.872089 +epoch: 1, batch: 21605, sum loss: 5009.499512, avg loss: 2.890652, ppl: 18.005039 +epoch: 1, batch: 21606, sum loss: 4448.237793, avg loss: 2.718972, ppl: 15.164724 +epoch: 1, batch: 21607, sum loss: 4321.645996, avg loss: 2.502401, ppl: 12.211775 +epoch: 1, batch: 21608, sum loss: 4791.023438, avg loss: 2.761397, ppl: 15.821929 +epoch: 1, batch: 21609, sum loss: 5391.336426, avg loss: 2.718778, ppl: 15.161781 +epoch: 1, batch: 21610, sum loss: 5064.374023, avg loss: 2.718397, ppl: 15.156010 +epoch: 1, batch: 21611, sum loss: 3698.397217, avg loss: 2.487153, ppl: 12.026990 +epoch: 1, batch: 21612, sum loss: 3994.080811, avg loss: 2.349459, ppl: 10.479903 +epoch: 1, batch: 21613, sum loss: 3822.781250, avg loss: 2.598764, ppl: 13.447104 +epoch: 1, batch: 21614, sum loss: 4622.283203, avg loss: 2.733462, ppl: 15.386056 +epoch: 1, batch: 21615, sum loss: 3480.546387, avg loss: 2.340650, ppl: 10.387985 +epoch: 1, batch: 21616, sum loss: 4179.106934, avg loss: 2.529726, ppl: 12.550064 +epoch: 1, batch: 21617, sum loss: 3558.628662, avg loss: 2.404479, ppl: 11.072658 +epoch: 1, batch: 21618, sum loss: 3886.406006, avg loss: 2.663746, ppl: 14.349949 +epoch: 1, batch: 21619, sum loss: 4402.055176, avg loss: 2.661460, ppl: 14.317183 +epoch: 1, batch: 21620, sum loss: 5893.036133, avg loss: 3.003586, ppl: 20.157694 +epoch: 1, batch: 21621, sum loss: 5018.954590, avg loss: 2.765264, ppl: 15.883237 +epoch: 1, batch: 21622, sum loss: 4500.780762, avg loss: 2.669502, ppl: 14.432784 +epoch: 1, batch: 21623, sum loss: 4893.001953, avg loss: 2.776959, ppl: 16.070080 +epoch: 1, batch: 21624, sum loss: 4648.272949, avg loss: 2.503109, ppl: 12.220425 +epoch: 1, batch: 21625, sum loss: 4303.408691, avg loss: 2.586183, ppl: 13.278993 +epoch: 1, batch: 21626, sum loss: 5117.678223, avg loss: 2.891344, ppl: 18.017502 +epoch: 1, batch: 21627, sum loss: 4313.491211, avg loss: 2.555386, ppl: 12.876266 +epoch: 1, batch: 21628, sum loss: 4871.635742, avg loss: 2.863983, ppl: 17.531221 +epoch: 1, batch: 21629, sum loss: 4509.663574, avg loss: 2.481928, ppl: 11.964314 +epoch: 1, batch: 21630, sum loss: 4696.405273, avg loss: 2.777295, ppl: 16.075476 +epoch: 1, batch: 21631, sum loss: 3510.718018, avg loss: 2.417850, ppl: 11.221707 +epoch: 1, batch: 21632, sum loss: 3842.493652, avg loss: 2.344413, ppl: 10.427154 +epoch: 1, batch: 21633, sum loss: 5304.239258, avg loss: 2.821404, ppl: 16.800417 +epoch: 1, batch: 21634, sum loss: 4065.767822, avg loss: 2.462609, ppl: 11.735393 +epoch: 1, batch: 21635, sum loss: 4201.725586, avg loss: 2.669457, ppl: 14.432130 +epoch: 1, batch: 21636, sum loss: 4019.386963, avg loss: 2.566658, ppl: 13.022235 +epoch: 1, batch: 21637, sum loss: 4054.802979, avg loss: 2.512269, ppl: 12.332888 +epoch: 1, batch: 21638, sum loss: 5266.552246, avg loss: 2.823889, ppl: 16.842215 +epoch: 1, batch: 21639, sum loss: 5259.926758, avg loss: 2.866445, ppl: 17.574436 +epoch: 1, batch: 21640, sum loss: 4904.854004, avg loss: 2.479703, ppl: 11.937723 +epoch: 1, batch: 21641, sum loss: 4226.121094, avg loss: 2.654599, ppl: 14.219279 +epoch: 1, batch: 21642, sum loss: 4232.402344, avg loss: 2.799208, ppl: 16.431631 +epoch: 1, batch: 21643, sum loss: 4559.875977, avg loss: 2.804352, ppl: 16.516367 +epoch: 1, batch: 21644, sum loss: 3831.349121, avg loss: 2.601052, ppl: 13.477907 +epoch: 1, batch: 21645, sum loss: 3288.375244, avg loss: 2.255401, ppl: 9.539122 +epoch: 1, batch: 21646, sum loss: 4138.416016, avg loss: 2.807609, ppl: 16.570257 +epoch: 1, batch: 21647, sum loss: 5169.550293, avg loss: 3.000319, ppl: 20.091955 +epoch: 1, batch: 21648, sum loss: 4792.933594, avg loss: 2.680612, ppl: 14.594016 +epoch: 1, batch: 21649, sum loss: 4103.352539, avg loss: 2.781934, ppl: 16.150227 +epoch: 1, batch: 21650, sum loss: 4967.712891, avg loss: 2.816164, ppl: 16.712614 +epoch: 1, batch: 21651, sum loss: 4686.582520, avg loss: 2.806337, ppl: 16.549185 +epoch: 1, batch: 21652, sum loss: 4501.289062, avg loss: 2.832781, ppl: 16.992653 +epoch: 1, batch: 21653, sum loss: 4931.996582, avg loss: 2.739998, ppl: 15.486956 +epoch: 1, batch: 21654, sum loss: 5088.740723, avg loss: 2.563597, ppl: 12.982437 +epoch: 1, batch: 21655, sum loss: 4347.834961, avg loss: 2.616026, ppl: 13.681245 +epoch: 1, batch: 21656, sum loss: 4470.094727, avg loss: 2.755915, ppl: 15.735438 +epoch: 1, batch: 21657, sum loss: 5170.915527, avg loss: 2.825637, ppl: 16.871691 +epoch: 1, batch: 21658, sum loss: 4588.608398, avg loss: 2.533743, ppl: 12.600584 +epoch: 1, batch: 21659, sum loss: 5630.548828, avg loss: 2.975977, ppl: 19.608776 +epoch: 1, batch: 21660, sum loss: 3925.578857, avg loss: 2.445843, ppl: 11.540279 +epoch: 1, batch: 21661, sum loss: 5141.124512, avg loss: 2.749264, ppl: 15.631130 +epoch: 1, batch: 21662, sum loss: 4359.586914, avg loss: 2.468622, ppl: 11.806170 +epoch: 1, batch: 21663, sum loss: 4896.510254, avg loss: 2.574401, ppl: 13.123452 +epoch: 1, batch: 21664, sum loss: 5112.574219, avg loss: 2.857783, ppl: 17.422863 +epoch: 1, batch: 21665, sum loss: 4121.260254, avg loss: 2.698926, ppl: 14.863763 +epoch: 1, batch: 21666, sum loss: 4069.306152, avg loss: 2.598535, ppl: 13.444029 +epoch: 1, batch: 21667, sum loss: 4249.345215, avg loss: 2.665838, ppl: 14.379988 +epoch: 1, batch: 21668, sum loss: 4387.175293, avg loss: 2.671849, ppl: 14.466690 +epoch: 1, batch: 21669, sum loss: 4670.306152, avg loss: 2.859955, ppl: 17.460733 +epoch: 1, batch: 21670, sum loss: 4952.429688, avg loss: 2.526750, ppl: 12.512772 +epoch: 1, batch: 21671, sum loss: 4594.603516, avg loss: 2.390533, ppl: 10.919310 +epoch: 1, batch: 21672, sum loss: 4127.844238, avg loss: 2.806148, ppl: 16.546064 +epoch: 1, batch: 21673, sum loss: 4614.129395, avg loss: 2.776251, ppl: 16.058708 +epoch: 1, batch: 21674, sum loss: 4231.958008, avg loss: 2.641672, ppl: 14.036649 +epoch: 1, batch: 21675, sum loss: 5265.065430, avg loss: 2.836781, ppl: 17.060759 +epoch: 1, batch: 21676, sum loss: 4904.508301, avg loss: 2.681525, ppl: 14.607349 +epoch: 1, batch: 21677, sum loss: 5013.951172, avg loss: 2.644489, ppl: 14.076254 +epoch: 1, batch: 21678, sum loss: 4065.840820, avg loss: 2.503597, ppl: 12.226388 +epoch: 1, batch: 21679, sum loss: 4379.496094, avg loss: 2.810973, ppl: 16.626091 +epoch: 1, batch: 21680, sum loss: 4302.479492, avg loss: 2.588736, ppl: 13.312938 +epoch: 1, batch: 21681, sum loss: 4603.035156, avg loss: 2.761269, ppl: 15.819903 +epoch: 1, batch: 21682, sum loss: 4601.753418, avg loss: 2.694235, ppl: 14.794194 +epoch: 1, batch: 21683, sum loss: 4195.472656, avg loss: 2.663792, ppl: 14.350605 +epoch: 1, batch: 21684, sum loss: 5169.571289, avg loss: 2.875179, ppl: 17.728590 +epoch: 1, batch: 21685, sum loss: 4875.788086, avg loss: 2.639842, ppl: 14.010994 +epoch: 1, batch: 21686, sum loss: 4552.923340, avg loss: 2.556386, ppl: 12.889152 +epoch: 1, batch: 21687, sum loss: 4913.516602, avg loss: 2.787020, ppl: 16.232578 +epoch: 1, batch: 21688, sum loss: 4751.233398, avg loss: 2.793200, ppl: 16.333202 +epoch: 1, batch: 21689, sum loss: 4728.859375, avg loss: 2.682280, ppl: 14.618382 +epoch: 1, batch: 21690, sum loss: 4944.498047, avg loss: 2.790349, ppl: 16.286699 +epoch: 1, batch: 21691, sum loss: 3164.011475, avg loss: 2.226609, ppl: 9.268384 +epoch: 1, batch: 21692, sum loss: 4080.893555, avg loss: 2.439267, ppl: 11.464633 +epoch: 1, batch: 21693, sum loss: 2799.283203, avg loss: 2.119064, ppl: 8.323342 +epoch: 1, batch: 21694, sum loss: 4862.036133, avg loss: 2.713190, ppl: 15.077293 +epoch: 1, batch: 21695, sum loss: 5422.328613, avg loss: 3.042833, ppl: 20.964560 +epoch: 1, batch: 21696, sum loss: 4936.919922, avg loss: 2.493394, ppl: 12.102278 +epoch: 1, batch: 21697, sum loss: 4207.864258, avg loss: 2.620090, ppl: 13.736957 +epoch: 1, batch: 21698, sum loss: 5254.863281, avg loss: 2.822161, ppl: 16.813141 +epoch: 1, batch: 21699, sum loss: 2913.087402, avg loss: 2.228835, ppl: 9.289039 +epoch: 1, batch: 21700, sum loss: 4125.788086, avg loss: 2.521875, ppl: 12.451927 +epoch: 1, batch: 21701, sum loss: 4788.297852, avg loss: 2.813336, ppl: 16.665421 +epoch: 1, batch: 21702, sum loss: 5326.658691, avg loss: 2.706636, ppl: 14.978798 +epoch: 1, batch: 21703, sum loss: 5562.585449, avg loss: 2.930762, ppl: 18.741903 +epoch: 1, batch: 21704, sum loss: 4085.080566, avg loss: 2.678741, ppl: 14.566749 +epoch: 1, batch: 21705, sum loss: 5547.271484, avg loss: 2.907375, ppl: 18.308672 +epoch: 1, batch: 21706, sum loss: 4378.623535, avg loss: 2.541279, ppl: 12.695896 +epoch: 1, batch: 21707, sum loss: 4479.424316, avg loss: 2.553834, ppl: 12.856297 +epoch: 1, batch: 21708, sum loss: 5225.750000, avg loss: 2.820157, ppl: 16.779478 +epoch: 1, batch: 21709, sum loss: 3439.994629, avg loss: 2.175835, ppl: 8.809535 +epoch: 1, batch: 21710, sum loss: 5756.520508, avg loss: 3.018626, ppl: 20.463165 +epoch: 1, batch: 21711, sum loss: 3167.188965, avg loss: 2.425107, ppl: 11.303433 +epoch: 1, batch: 21712, sum loss: 4257.899414, avg loss: 2.537485, ppl: 12.647818 +epoch: 1, batch: 21713, sum loss: 5240.582031, avg loss: 2.882608, ppl: 17.860802 +epoch: 1, batch: 21714, sum loss: 4917.544434, avg loss: 2.875757, ppl: 17.738848 +epoch: 1, batch: 21715, sum loss: 5053.639648, avg loss: 2.824841, ppl: 16.858257 +epoch: 1, batch: 21716, sum loss: 4621.654297, avg loss: 2.741195, ppl: 15.505499 +epoch: 1, batch: 21717, sum loss: 4577.954590, avg loss: 2.601110, ppl: 13.478697 +epoch: 1, batch: 21718, sum loss: 4454.684570, avg loss: 2.656341, ppl: 14.244079 +epoch: 1, batch: 21719, sum loss: 5200.810059, avg loss: 2.656185, ppl: 14.241852 +epoch: 1, batch: 21720, sum loss: 4470.588379, avg loss: 2.773318, ppl: 16.011669 +epoch: 1, batch: 21721, sum loss: 3585.386475, avg loss: 2.231106, ppl: 9.310153 +epoch: 1, batch: 21722, sum loss: 4718.349121, avg loss: 2.852690, ppl: 17.334349 +epoch: 1, batch: 21723, sum loss: 3962.633545, avg loss: 2.519157, ppl: 12.418120 +epoch: 1, batch: 21724, sum loss: 3609.080322, avg loss: 2.511538, ppl: 12.323873 +epoch: 1, batch: 21725, sum loss: 4865.702148, avg loss: 2.694187, ppl: 14.793489 +epoch: 1, batch: 21726, sum loss: 3924.685547, avg loss: 2.597409, ppl: 13.428902 +epoch: 1, batch: 21727, sum loss: 4147.024902, avg loss: 2.422328, ppl: 11.272065 +epoch: 1, batch: 21728, sum loss: 4449.096680, avg loss: 2.517882, ppl: 12.402296 +epoch: 1, batch: 21729, sum loss: 4219.651855, avg loss: 2.476321, ppl: 11.897419 +epoch: 1, batch: 21730, sum loss: 5358.653320, avg loss: 2.757928, ppl: 15.767134 +epoch: 1, batch: 21731, sum loss: 4419.738770, avg loss: 2.499852, ppl: 12.180696 +epoch: 1, batch: 21732, sum loss: 4482.110352, avg loss: 2.508176, ppl: 12.282504 +epoch: 1, batch: 21733, sum loss: 5042.307617, avg loss: 2.957365, ppl: 19.247194 +epoch: 1, batch: 21734, sum loss: 4592.386719, avg loss: 2.618236, ppl: 13.711519 +epoch: 1, batch: 21735, sum loss: 4175.850586, avg loss: 2.592086, ppl: 13.357608 +epoch: 1, batch: 21736, sum loss: 4541.654297, avg loss: 2.806956, ppl: 16.559427 +epoch: 1, batch: 21737, sum loss: 5070.376953, avg loss: 2.736307, ppl: 15.429895 +epoch: 1, batch: 21738, sum loss: 3825.068359, avg loss: 2.467786, ppl: 11.796302 +epoch: 1, batch: 21739, sum loss: 4539.877441, avg loss: 2.412262, ppl: 11.159177 +epoch: 1, batch: 21740, sum loss: 4487.656250, avg loss: 2.754853, ppl: 15.718738 +epoch: 1, batch: 21741, sum loss: 4850.452637, avg loss: 2.815120, ppl: 16.695187 +epoch: 1, batch: 21742, sum loss: 4795.485840, avg loss: 2.544024, ppl: 12.730800 +epoch: 1, batch: 21743, sum loss: 4493.736328, avg loss: 2.488226, ppl: 12.039901 +epoch: 1, batch: 21744, sum loss: 5494.767578, avg loss: 3.035783, ppl: 20.817278 +epoch: 1, batch: 21745, sum loss: 3761.874023, avg loss: 2.428582, ppl: 11.342792 +epoch: 1, batch: 21746, sum loss: 4741.264648, avg loss: 2.631113, ppl: 13.889215 +epoch: 1, batch: 21747, sum loss: 4508.249023, avg loss: 2.722373, ppl: 15.216381 +epoch: 1, batch: 21748, sum loss: 4307.110840, avg loss: 2.596209, ppl: 13.412794 +epoch: 1, batch: 21749, sum loss: 4835.713379, avg loss: 2.755392, ppl: 15.727206 +epoch: 1, batch: 21750, sum loss: 4383.151367, avg loss: 2.754966, ppl: 15.720510 +epoch: 1, batch: 21751, sum loss: 4958.916016, avg loss: 2.633519, ppl: 13.922673 +epoch: 1, batch: 21752, sum loss: 4446.363770, avg loss: 2.652962, ppl: 14.196021 +epoch: 1, batch: 21753, sum loss: 4564.498535, avg loss: 2.731597, ppl: 15.357396 +epoch: 1, batch: 21754, sum loss: 3385.233643, avg loss: 2.437173, ppl: 11.440654 +epoch: 1, batch: 21755, sum loss: 5151.040039, avg loss: 2.721099, ppl: 15.197009 +epoch: 1, batch: 21756, sum loss: 4296.838867, avg loss: 2.498162, ppl: 12.160124 +epoch: 1, batch: 21757, sum loss: 4734.264160, avg loss: 2.762115, ppl: 15.833287 +epoch: 1, batch: 21758, sum loss: 4237.473633, avg loss: 2.640170, ppl: 14.015591 +epoch: 1, batch: 21759, sum loss: 5403.965332, avg loss: 2.868347, ppl: 17.607887 +epoch: 1, batch: 21760, sum loss: 4502.941406, avg loss: 2.567241, ppl: 13.029831 +epoch: 1, batch: 21761, sum loss: 5246.092285, avg loss: 2.937342, ppl: 18.865629 +epoch: 1, batch: 21762, sum loss: 5321.053711, avg loss: 2.994403, ppl: 19.973425 +epoch: 1, batch: 21763, sum loss: 4114.329102, avg loss: 2.456316, ppl: 11.661770 +epoch: 1, batch: 21764, sum loss: 4416.585938, avg loss: 2.541189, ppl: 12.694753 +epoch: 1, batch: 21765, sum loss: 4348.272461, avg loss: 2.745121, ppl: 15.566504 +epoch: 1, batch: 21766, sum loss: 4674.489258, avg loss: 2.740029, ppl: 15.487432 +epoch: 1, batch: 21767, sum loss: 4670.132324, avg loss: 2.705755, ppl: 14.965604 +epoch: 1, batch: 21768, sum loss: 5058.254883, avg loss: 2.776210, ppl: 16.058046 +epoch: 1, batch: 21769, sum loss: 3929.675537, avg loss: 2.632067, ppl: 13.902472 +epoch: 1, batch: 21770, sum loss: 4922.952637, avg loss: 2.958505, ppl: 19.269150 +epoch: 1, batch: 21771, sum loss: 4857.336426, avg loss: 2.682130, ppl: 14.616187 +epoch: 1, batch: 21772, sum loss: 4469.641113, avg loss: 2.670037, ppl: 14.440500 +epoch: 1, batch: 21773, sum loss: 5319.927246, avg loss: 2.736588, ppl: 15.434237 +epoch: 1, batch: 21774, sum loss: 4742.054199, avg loss: 2.686716, ppl: 14.683381 +epoch: 1, batch: 21775, sum loss: 5057.351562, avg loss: 2.750055, ppl: 15.643497 +epoch: 1, batch: 21776, sum loss: 4390.368652, avg loss: 2.561475, ppl: 12.954915 +epoch: 1, batch: 21777, sum loss: 4890.718262, avg loss: 2.732245, ppl: 15.367344 +epoch: 1, batch: 21778, sum loss: 4922.412598, avg loss: 2.640779, ppl: 14.024128 +epoch: 1, batch: 21779, sum loss: 4590.125488, avg loss: 2.519279, ppl: 12.419633 +epoch: 1, batch: 21780, sum loss: 4764.819336, avg loss: 2.858320, ppl: 17.432213 +epoch: 1, batch: 21781, sum loss: 4096.209961, avg loss: 2.502266, ppl: 12.210135 +epoch: 1, batch: 21782, sum loss: 4033.971680, avg loss: 2.405469, ppl: 11.083627 +epoch: 1, batch: 21783, sum loss: 4713.304199, avg loss: 2.754708, ppl: 15.716444 +epoch: 1, batch: 21784, sum loss: 4035.167969, avg loss: 2.297932, ppl: 9.953574 +epoch: 1, batch: 21785, sum loss: 4864.179688, avg loss: 2.679989, ppl: 14.584930 +epoch: 1, batch: 21786, sum loss: 3929.397217, avg loss: 2.428552, ppl: 11.342445 +epoch: 1, batch: 21787, sum loss: 4816.052734, avg loss: 2.769438, ppl: 15.949668 +epoch: 1, batch: 21788, sum loss: 3826.864990, avg loss: 2.642863, ppl: 14.053374 +epoch: 1, batch: 21789, sum loss: 4252.693848, avg loss: 2.350854, ppl: 10.494525 +epoch: 1, batch: 21790, sum loss: 5528.938965, avg loss: 2.885668, ppl: 17.915524 +epoch: 1, batch: 21791, sum loss: 4341.040527, avg loss: 2.668126, ppl: 14.412929 +epoch: 1, batch: 21792, sum loss: 3863.897949, avg loss: 2.488022, ppl: 12.037440 +epoch: 1, batch: 21793, sum loss: 5562.875000, avg loss: 2.889805, ppl: 17.989803 +epoch: 1, batch: 21794, sum loss: 4037.004150, avg loss: 2.782222, ppl: 16.154879 +epoch: 1, batch: 21795, sum loss: 4280.408691, avg loss: 2.757995, ppl: 15.768202 +epoch: 1, batch: 21796, sum loss: 4915.844238, avg loss: 2.750892, ppl: 15.656594 +epoch: 1, batch: 21797, sum loss: 4466.624023, avg loss: 2.628973, ppl: 13.859523 +epoch: 1, batch: 21798, sum loss: 3718.597900, avg loss: 2.377620, ppl: 10.779221 +epoch: 1, batch: 21799, sum loss: 4382.928223, avg loss: 2.545254, ppl: 12.746471 +epoch: 1, batch: 21800, sum loss: 4419.537109, avg loss: 2.755322, ppl: 15.726111 +epoch: 1, batch: 21801, sum loss: 4240.063477, avg loss: 2.543529, ppl: 12.724500 +epoch: 1, batch: 21802, sum loss: 3879.854736, avg loss: 2.603930, ppl: 13.516748 +epoch: 1, batch: 21803, sum loss: 4622.493164, avg loss: 2.820313, ppl: 16.782103 +epoch: 1, batch: 21804, sum loss: 4035.609619, avg loss: 2.585272, ppl: 13.266898 +epoch: 1, batch: 21805, sum loss: 4141.320312, avg loss: 2.431779, ppl: 11.379112 +epoch: 1, batch: 21806, sum loss: 4338.252441, avg loss: 2.384966, ppl: 10.858689 +epoch: 1, batch: 21807, sum loss: 4215.313965, avg loss: 2.515104, ppl: 12.367892 +epoch: 1, batch: 21808, sum loss: 4089.352051, avg loss: 2.289671, ppl: 9.871689 +epoch: 1, batch: 21809, sum loss: 3666.212646, avg loss: 2.514549, ppl: 12.361032 +epoch: 1, batch: 21810, sum loss: 4311.005859, avg loss: 2.622266, ppl: 13.766888 +epoch: 1, batch: 21811, sum loss: 3996.243164, avg loss: 2.336984, ppl: 10.349978 +epoch: 1, batch: 21812, sum loss: 5299.687500, avg loss: 2.727580, ppl: 15.295824 +epoch: 1, batch: 21813, sum loss: 4537.097656, avg loss: 2.662616, ppl: 14.333737 +epoch: 1, batch: 21814, sum loss: 4368.510742, avg loss: 2.431002, ppl: 11.370271 +epoch: 1, batch: 21815, sum loss: 3775.818115, avg loss: 2.515535, ppl: 12.373228 +epoch: 1, batch: 21816, sum loss: 4604.920410, avg loss: 2.542750, ppl: 12.714593 +epoch: 1, batch: 21817, sum loss: 3052.137451, avg loss: 2.340596, ppl: 10.387427 +epoch: 1, batch: 21818, sum loss: 4143.160156, avg loss: 2.704413, ppl: 14.945537 +epoch: 1, batch: 21819, sum loss: 5364.360352, avg loss: 2.929744, ppl: 18.722832 +epoch: 1, batch: 21820, sum loss: 4310.048828, avg loss: 2.562455, ppl: 12.967609 +epoch: 1, batch: 21821, sum loss: 3935.846680, avg loss: 2.547474, ppl: 12.774790 +epoch: 1, batch: 21822, sum loss: 3655.067627, avg loss: 2.191288, ppl: 8.946725 +epoch: 1, batch: 21823, sum loss: 4539.014160, avg loss: 2.692179, ppl: 14.763814 +epoch: 1, batch: 21824, sum loss: 5129.361816, avg loss: 3.181986, ppl: 24.094561 +epoch: 1, batch: 21825, sum loss: 3975.145508, avg loss: 2.404807, ppl: 11.076288 +epoch: 1, batch: 21826, sum loss: 4765.244629, avg loss: 2.793226, ppl: 16.333635 +epoch: 1, batch: 21827, sum loss: 4493.344727, avg loss: 2.547248, ppl: 12.771903 +epoch: 1, batch: 21828, sum loss: 3732.387207, avg loss: 2.285601, ppl: 9.831597 +epoch: 1, batch: 21829, sum loss: 4246.095703, avg loss: 2.447317, ppl: 11.557301 +epoch: 1, batch: 21830, sum loss: 4697.324219, avg loss: 2.864222, ppl: 17.535406 +epoch: 1, batch: 21831, sum loss: 5583.778320, avg loss: 2.905192, ppl: 18.268745 +epoch: 1, batch: 21832, sum loss: 4254.893555, avg loss: 2.634609, ppl: 13.937861 +epoch: 1, batch: 21833, sum loss: 4620.028320, avg loss: 2.774792, ppl: 16.035290 +epoch: 1, batch: 21834, sum loss: 4716.953125, avg loss: 2.821144, ppl: 16.796061 +epoch: 1, batch: 21835, sum loss: 4301.356445, avg loss: 2.484897, ppl: 11.999880 +epoch: 1, batch: 21836, sum loss: 5494.269531, avg loss: 2.916279, ppl: 18.472425 +epoch: 1, batch: 21837, sum loss: 4119.630371, avg loss: 2.717434, ppl: 15.141426 +epoch: 1, batch: 21838, sum loss: 4532.684570, avg loss: 2.785915, ppl: 16.214653 +epoch: 1, batch: 21839, sum loss: 4177.422852, avg loss: 2.493984, ppl: 12.109424 +epoch: 1, batch: 21840, sum loss: 4645.573730, avg loss: 2.775134, ppl: 16.040773 +epoch: 1, batch: 21841, sum loss: 4512.137207, avg loss: 2.432419, ppl: 11.386394 +epoch: 1, batch: 21842, sum loss: 3943.733398, avg loss: 2.560866, ppl: 12.947023 +epoch: 1, batch: 21843, sum loss: 3621.600098, avg loss: 2.443725, ppl: 11.515856 +epoch: 1, batch: 21844, sum loss: 4126.175293, avg loss: 2.496174, ppl: 12.135968 +epoch: 1, batch: 21845, sum loss: 4378.751953, avg loss: 2.819544, ppl: 16.769207 +epoch: 1, batch: 21846, sum loss: 4310.821289, avg loss: 2.535777, ppl: 12.626242 +epoch: 1, batch: 21847, sum loss: 5167.868652, avg loss: 2.762089, ppl: 15.832888 +epoch: 1, batch: 21848, sum loss: 4673.507324, avg loss: 2.650883, ppl: 14.166549 +epoch: 1, batch: 21849, sum loss: 4214.827148, avg loss: 2.473490, ppl: 11.863782 +epoch: 1, batch: 21850, sum loss: 6634.787109, avg loss: 3.101817, ppl: 22.238329 +epoch: 1, batch: 21851, sum loss: 4558.296875, avg loss: 2.714888, ppl: 15.102917 +epoch: 1, batch: 21852, sum loss: 5079.720703, avg loss: 2.780362, ppl: 16.124851 +epoch: 1, batch: 21853, sum loss: 5179.413574, avg loss: 2.828735, ppl: 16.924040 +epoch: 1, batch: 21854, sum loss: 4535.349609, avg loss: 2.584245, ppl: 13.253276 +epoch: 1, batch: 21855, sum loss: 4674.178223, avg loss: 2.583847, ppl: 13.247999 +epoch: 1, batch: 21856, sum loss: 4162.034180, avg loss: 2.507250, ppl: 12.271133 +epoch: 1, batch: 21857, sum loss: 5098.409180, avg loss: 2.662355, ppl: 14.329992 +epoch: 1, batch: 21858, sum loss: 3937.575439, avg loss: 2.346588, ppl: 10.449858 +epoch: 1, batch: 21859, sum loss: 4534.631836, avg loss: 2.662732, ppl: 14.335395 +epoch: 1, batch: 21860, sum loss: 4125.448242, avg loss: 2.441094, ppl: 11.485596 +epoch: 1, batch: 21861, sum loss: 5943.984863, avg loss: 2.922313, ppl: 18.584223 +epoch: 1, batch: 21862, sum loss: 4965.428711, avg loss: 3.005708, ppl: 20.200502 +epoch: 1, batch: 21863, sum loss: 5325.076660, avg loss: 3.029054, ppl: 20.677656 +epoch: 1, batch: 21864, sum loss: 4207.809082, avg loss: 2.743031, ppl: 15.533990 +epoch: 1, batch: 21865, sum loss: 3147.852783, avg loss: 2.266273, ppl: 9.643389 +epoch: 1, batch: 21866, sum loss: 4995.417969, avg loss: 2.872581, ppl: 17.682594 +epoch: 1, batch: 21867, sum loss: 3395.394043, avg loss: 2.309792, ppl: 10.072328 +epoch: 1, batch: 21868, sum loss: 4212.673340, avg loss: 2.728415, ppl: 15.308611 +epoch: 1, batch: 21869, sum loss: 4022.973633, avg loss: 2.755461, ppl: 15.728297 +epoch: 1, batch: 21870, sum loss: 4338.195312, avg loss: 2.559407, ppl: 12.928152 +epoch: 1, batch: 21871, sum loss: 4270.502441, avg loss: 2.866109, ppl: 17.568529 +epoch: 1, batch: 21872, sum loss: 3538.053223, avg loss: 2.406839, ppl: 11.098821 +epoch: 1, batch: 21873, sum loss: 4899.375000, avg loss: 2.769573, ppl: 15.951824 +epoch: 1, batch: 21874, sum loss: 4012.264648, avg loss: 2.555583, ppl: 12.878799 +epoch: 1, batch: 21875, sum loss: 4976.375977, avg loss: 2.586474, ppl: 13.282853 +epoch: 1, batch: 21876, sum loss: 4496.830566, avg loss: 2.549224, ppl: 12.797165 +epoch: 1, batch: 21877, sum loss: 4167.526855, avg loss: 2.588526, ppl: 13.310138 +epoch: 1, batch: 21878, sum loss: 4594.267090, avg loss: 2.555210, ppl: 12.873999 +epoch: 1, batch: 21879, sum loss: 4409.140625, avg loss: 2.718336, ppl: 15.155078 +epoch: 1, batch: 21880, sum loss: 5212.078125, avg loss: 2.672860, ppl: 14.481332 +epoch: 1, batch: 21881, sum loss: 4154.991211, avg loss: 2.580740, ppl: 13.206904 +epoch: 1, batch: 21882, sum loss: 3812.208008, avg loss: 2.426612, ppl: 11.320468 +epoch: 1, batch: 21883, sum loss: 5180.703125, avg loss: 2.818663, ppl: 16.754436 +epoch: 1, batch: 21884, sum loss: 4206.879395, avg loss: 2.629300, ppl: 13.864057 +epoch: 1, batch: 21885, sum loss: 4365.670898, avg loss: 2.701529, ppl: 14.902500 +epoch: 1, batch: 21886, sum loss: 5249.878906, avg loss: 2.768924, ppl: 15.941468 +epoch: 1, batch: 21887, sum loss: 4490.859375, avg loss: 2.758513, ppl: 15.776365 +epoch: 1, batch: 21888, sum loss: 4407.917969, avg loss: 2.748079, ppl: 15.612605 +epoch: 1, batch: 21889, sum loss: 3919.450928, avg loss: 2.416431, ppl: 11.205793 +epoch: 1, batch: 21890, sum loss: 4048.256104, avg loss: 2.385537, ppl: 10.864895 +epoch: 1, batch: 21891, sum loss: 4487.205078, avg loss: 2.815060, ppl: 16.694172 +epoch: 1, batch: 21892, sum loss: 4637.276855, avg loss: 2.611079, ppl: 13.613738 +epoch: 1, batch: 21893, sum loss: 5991.488281, avg loss: 2.884684, ppl: 17.897909 +epoch: 1, batch: 21894, sum loss: 4572.213867, avg loss: 2.626200, ppl: 13.821146 +epoch: 1, batch: 21895, sum loss: 3826.104248, avg loss: 2.502357, ppl: 12.211247 +epoch: 1, batch: 21896, sum loss: 4371.565918, avg loss: 2.617704, ppl: 13.704225 +epoch: 1, batch: 21897, sum loss: 4260.022461, avg loss: 2.636153, ppl: 13.959392 +epoch: 1, batch: 21898, sum loss: 5240.222168, avg loss: 3.032536, ppl: 20.749788 +epoch: 1, batch: 21899, sum loss: 4547.936523, avg loss: 2.823052, ppl: 16.828131 +epoch: 1, batch: 21900, sum loss: 3825.408936, avg loss: 2.553677, ppl: 12.854286 +epoch: 1, batch: 21901, sum loss: 5245.647949, avg loss: 2.879060, ppl: 17.797541 +epoch: 1, batch: 21902, sum loss: 3875.895264, avg loss: 2.636663, ppl: 13.966526 +epoch: 1, batch: 21903, sum loss: 6212.273926, avg loss: 3.070822, ppl: 21.559622 +epoch: 1, batch: 21904, sum loss: 4807.827148, avg loss: 2.861802, ppl: 17.493023 +epoch: 1, batch: 21905, sum loss: 4616.144043, avg loss: 2.821604, ppl: 16.803783 +epoch: 1, batch: 21906, sum loss: 4598.040527, avg loss: 2.728807, ppl: 15.314612 +epoch: 1, batch: 21907, sum loss: 3367.584229, avg loss: 2.378237, ppl: 10.785876 +epoch: 1, batch: 21908, sum loss: 4059.112061, avg loss: 2.524323, ppl: 12.482448 +epoch: 1, batch: 21909, sum loss: 4872.254395, avg loss: 2.766754, ppl: 15.906919 +epoch: 1, batch: 21910, sum loss: 4923.505859, avg loss: 2.720169, ppl: 15.182889 +epoch: 1, batch: 21911, sum loss: 4108.467285, avg loss: 2.451353, ppl: 11.604034 +epoch: 1, batch: 21912, sum loss: 5134.012695, avg loss: 2.882658, ppl: 17.861677 +epoch: 1, batch: 21913, sum loss: 4608.528320, avg loss: 2.483043, ppl: 11.977659 +epoch: 1, batch: 21914, sum loss: 4508.151855, avg loss: 2.611907, ppl: 13.625012 +epoch: 1, batch: 21915, sum loss: 4483.117188, avg loss: 2.735276, ppl: 15.413993 +epoch: 1, batch: 21916, sum loss: 5455.691406, avg loss: 3.030940, ppl: 20.716694 +epoch: 1, batch: 21917, sum loss: 4509.921875, avg loss: 2.471190, ppl: 11.836524 +epoch: 1, batch: 21918, sum loss: 4870.583984, avg loss: 2.608776, ppl: 13.582411 +epoch: 1, batch: 21919, sum loss: 4528.964844, avg loss: 2.705475, ppl: 14.961423 +epoch: 1, batch: 21920, sum loss: 4859.322266, avg loss: 2.816998, ppl: 16.726570 +epoch: 1, batch: 21921, sum loss: 5136.038574, avg loss: 2.856529, ppl: 17.401014 +epoch: 1, batch: 21922, sum loss: 4649.116699, avg loss: 2.687351, ppl: 14.692699 +epoch: 1, batch: 21923, sum loss: 4887.842285, avg loss: 2.750615, ppl: 15.652249 +epoch: 1, batch: 21924, sum loss: 4936.125977, avg loss: 2.515864, ppl: 12.377303 +epoch: 1, batch: 21925, sum loss: 5217.943359, avg loss: 2.771080, ppl: 15.975875 +epoch: 1, batch: 21926, sum loss: 4206.070312, avg loss: 2.536834, ppl: 12.639587 +epoch: 1, batch: 21927, sum loss: 4964.727539, avg loss: 2.817666, ppl: 16.737740 +epoch: 1, batch: 21928, sum loss: 6068.469238, avg loss: 2.933045, ppl: 18.784737 +epoch: 1, batch: 21929, sum loss: 4781.675293, avg loss: 2.732386, ppl: 15.369513 +epoch: 1, batch: 21930, sum loss: 4809.656250, avg loss: 2.737425, ppl: 15.447163 +epoch: 1, batch: 21931, sum loss: 4473.858398, avg loss: 2.722981, ppl: 15.225646 +epoch: 1, batch: 21932, sum loss: 4115.359375, avg loss: 2.629622, ppl: 13.868533 +epoch: 1, batch: 21933, sum loss: 3878.187988, avg loss: 2.528154, ppl: 12.530353 +epoch: 1, batch: 21934, sum loss: 4776.806641, avg loss: 2.600330, ppl: 13.468186 +epoch: 1, batch: 21935, sum loss: 3753.458496, avg loss: 2.407607, ppl: 11.107345 +epoch: 1, batch: 21936, sum loss: 4442.341797, avg loss: 2.672889, ppl: 14.481750 +epoch: 1, batch: 21937, sum loss: 4990.699219, avg loss: 2.758817, ppl: 15.781158 +epoch: 1, batch: 21938, sum loss: 4197.165039, avg loss: 2.683609, ppl: 14.637830 +epoch: 1, batch: 21939, sum loss: 5613.085938, avg loss: 2.800941, ppl: 16.460129 +epoch: 1, batch: 21940, sum loss: 3005.053711, avg loss: 2.095574, ppl: 8.130110 +epoch: 1, batch: 21941, sum loss: 4869.194336, avg loss: 2.844156, ppl: 17.187038 +epoch: 1, batch: 21942, sum loss: 4670.397949, avg loss: 2.677980, ppl: 14.555667 +epoch: 1, batch: 21943, sum loss: 4274.600586, avg loss: 2.616035, ppl: 13.681366 +epoch: 1, batch: 21944, sum loss: 5014.463379, avg loss: 3.009882, ppl: 20.285006 +epoch: 1, batch: 21945, sum loss: 3835.865723, avg loss: 2.745788, ppl: 15.576881 +epoch: 1, batch: 21946, sum loss: 4739.499023, avg loss: 2.628674, ppl: 13.855383 +epoch: 1, batch: 21947, sum loss: 4355.939941, avg loss: 2.763921, ppl: 15.861920 +epoch: 1, batch: 21948, sum loss: 4204.952148, avg loss: 2.786582, ppl: 16.225466 +epoch: 1, batch: 21949, sum loss: 4653.138184, avg loss: 2.789651, ppl: 16.275341 +epoch: 1, batch: 21950, sum loss: 4661.573730, avg loss: 2.557089, ppl: 12.898220 +epoch: 1, batch: 21951, sum loss: 4343.291504, avg loss: 2.514934, ppl: 12.365796 +epoch: 1, batch: 21952, sum loss: 4881.489746, avg loss: 2.771999, ppl: 15.990561 +epoch: 1, batch: 21953, sum loss: 4602.719238, avg loss: 2.568482, ppl: 13.046004 +epoch: 1, batch: 21954, sum loss: 4713.765625, avg loss: 2.710618, ppl: 15.038574 +epoch: 1, batch: 21955, sum loss: 4564.154297, avg loss: 2.708697, ppl: 15.009703 +epoch: 1, batch: 21956, sum loss: 3368.396484, avg loss: 2.418088, ppl: 11.224378 +epoch: 1, batch: 21957, sum loss: 4910.186523, avg loss: 2.700873, ppl: 14.892722 +epoch: 1, batch: 21958, sum loss: 4237.843750, avg loss: 2.548313, ppl: 12.785512 +epoch: 1, batch: 21959, sum loss: 3877.263184, avg loss: 2.386008, ppl: 10.870014 +epoch: 1, batch: 21960, sum loss: 4647.320801, avg loss: 2.667808, ppl: 14.408345 +epoch: 1, batch: 21961, sum loss: 4198.517090, avg loss: 2.526184, ppl: 12.505689 +epoch: 1, batch: 21962, sum loss: 3891.010986, avg loss: 2.320221, ppl: 10.177925 +epoch: 1, batch: 21963, sum loss: 4785.575684, avg loss: 2.954059, ppl: 19.183664 +epoch: 1, batch: 21964, sum loss: 4450.454590, avg loss: 2.752291, ppl: 15.678514 +epoch: 1, batch: 21965, sum loss: 5229.760742, avg loss: 2.846903, ppl: 17.234325 +epoch: 1, batch: 21966, sum loss: 4342.646484, avg loss: 2.685619, ppl: 14.667283 +epoch: 1, batch: 21967, sum loss: 4734.881348, avg loss: 2.542901, ppl: 12.716506 +epoch: 1, batch: 21968, sum loss: 4269.883301, avg loss: 2.719671, ppl: 15.175329 +epoch: 1, batch: 21969, sum loss: 4318.949219, avg loss: 2.864025, ppl: 17.531944 +epoch: 1, batch: 21970, sum loss: 4488.000000, avg loss: 2.822641, ppl: 16.821222 +epoch: 1, batch: 21971, sum loss: 3861.365234, avg loss: 2.562286, ppl: 12.965425 +epoch: 1, batch: 21972, sum loss: 4728.736816, avg loss: 2.708326, ppl: 15.004135 +epoch: 1, batch: 21973, sum loss: 4832.651367, avg loss: 2.744265, ppl: 15.553184 +epoch: 1, batch: 21974, sum loss: 4190.638184, avg loss: 2.674307, ppl: 14.502291 +epoch: 1, batch: 21975, sum loss: 3688.416992, avg loss: 2.577510, ppl: 13.164320 +epoch: 1, batch: 21976, sum loss: 4690.359375, avg loss: 2.657427, ppl: 14.259557 +epoch: 1, batch: 21977, sum loss: 4115.553223, avg loss: 2.480743, ppl: 11.950145 +epoch: 1, batch: 21978, sum loss: 4789.571289, avg loss: 2.918690, ppl: 18.517023 +epoch: 1, batch: 21979, sum loss: 3930.147217, avg loss: 2.597586, ppl: 13.431274 +epoch: 1, batch: 21980, sum loss: 5070.379883, avg loss: 2.970345, ppl: 19.498655 +epoch: 1, batch: 21981, sum loss: 4362.864258, avg loss: 2.723386, ppl: 15.231807 +epoch: 1, batch: 21982, sum loss: 4857.049316, avg loss: 2.625432, ppl: 13.810539 +epoch: 1, batch: 21983, sum loss: 4465.601074, avg loss: 2.759951, ppl: 15.799070 +epoch: 1, batch: 21984, sum loss: 4545.036621, avg loss: 2.698953, ppl: 14.864160 +epoch: 1, batch: 21985, sum loss: 3831.094727, avg loss: 2.677215, ppl: 14.544532 +epoch: 1, batch: 21986, sum loss: 4648.597656, avg loss: 2.430004, ppl: 11.358926 +epoch: 1, batch: 21987, sum loss: 5114.322754, avg loss: 2.733470, ppl: 15.386191 +epoch: 1, batch: 21988, sum loss: 4136.561035, avg loss: 2.505488, ppl: 12.249540 +epoch: 1, batch: 21989, sum loss: 4809.970703, avg loss: 2.641390, ppl: 14.032693 +epoch: 1, batch: 21990, sum loss: 4111.267578, avg loss: 2.618642, ppl: 13.717081 +epoch: 1, batch: 21991, sum loss: 4983.382324, avg loss: 2.849275, ppl: 17.275255 +epoch: 1, batch: 21992, sum loss: 3502.831055, avg loss: 2.435905, ppl: 11.426151 +epoch: 1, batch: 21993, sum loss: 4336.085449, avg loss: 2.679904, ppl: 14.583700 +epoch: 1, batch: 21994, sum loss: 4264.330078, avg loss: 2.584442, ppl: 13.255896 +epoch: 1, batch: 21995, sum loss: 5212.151855, avg loss: 3.009326, ppl: 20.273722 +epoch: 1, batch: 21996, sum loss: 4750.501465, avg loss: 2.836120, ppl: 17.049492 +epoch: 1, batch: 21997, sum loss: 4024.001953, avg loss: 2.434363, ppl: 11.408548 +epoch: 1, batch: 21998, sum loss: 4258.791992, avg loss: 2.697145, ppl: 14.837311 +epoch: 1, batch: 21999, sum loss: 5388.862793, avg loss: 2.917630, ppl: 18.497404 +epoch: 1, batch: 22000, sum loss: 4104.658691, avg loss: 2.609446, ppl: 13.591519 +epoch: 1, batch: 22001, sum loss: 4732.462402, avg loss: 2.630607, ppl: 13.882195 +epoch: 1, batch: 22002, sum loss: 4716.010742, avg loss: 2.958601, ppl: 19.271002 +epoch: 1, batch: 22003, sum loss: 4839.434082, avg loss: 2.645946, ppl: 14.096768 +epoch: 1, batch: 22004, sum loss: 4561.441895, avg loss: 2.694295, ppl: 14.795091 +epoch: 1, batch: 22005, sum loss: 4831.267578, avg loss: 2.586332, ppl: 13.280962 +epoch: 1, batch: 22006, sum loss: 4171.112793, avg loss: 2.582732, ppl: 13.233248 +epoch: 1, batch: 22007, sum loss: 4354.833008, avg loss: 2.521617, ppl: 12.448715 +epoch: 1, batch: 22008, sum loss: 4995.204590, avg loss: 2.675525, ppl: 14.519967 +epoch: 1, batch: 22009, sum loss: 3832.174805, avg loss: 2.453377, ppl: 11.627547 +epoch: 1, batch: 22010, sum loss: 4615.483398, avg loss: 2.579924, ppl: 13.196131 +epoch: 1, batch: 22011, sum loss: 4656.504883, avg loss: 2.686962, ppl: 14.686984 +epoch: 1, batch: 22012, sum loss: 4495.516602, avg loss: 2.612154, ppl: 13.628371 +epoch: 1, batch: 22013, sum loss: 4686.456055, avg loss: 2.715212, ppl: 15.107814 +epoch: 1, batch: 22014, sum loss: 4602.373047, avg loss: 2.505375, ppl: 12.248147 +epoch: 1, batch: 22015, sum loss: 4921.103027, avg loss: 2.721849, ppl: 15.208416 +epoch: 1, batch: 22016, sum loss: 4316.561523, avg loss: 2.749402, ppl: 15.633285 +epoch: 1, batch: 22017, sum loss: 4549.333008, avg loss: 2.707937, ppl: 14.998295 +epoch: 1, batch: 22018, sum loss: 5908.793945, avg loss: 3.063138, ppl: 21.394594 +epoch: 1, batch: 22019, sum loss: 3932.484375, avg loss: 2.346351, ppl: 10.447379 +epoch: 1, batch: 22020, sum loss: 4394.078125, avg loss: 2.648631, ppl: 14.134669 +epoch: 1, batch: 22021, sum loss: 3721.790771, avg loss: 2.486166, ppl: 12.015121 +epoch: 1, batch: 22022, sum loss: 4037.125488, avg loss: 2.542271, ppl: 12.708495 +epoch: 1, batch: 22023, sum loss: 3822.908203, avg loss: 2.474374, ppl: 11.874275 +epoch: 1, batch: 22024, sum loss: 3531.235107, avg loss: 2.435335, ppl: 11.419640 +epoch: 1, batch: 22025, sum loss: 3954.568848, avg loss: 2.721658, ppl: 15.205512 +epoch: 1, batch: 22026, sum loss: 4816.740234, avg loss: 2.908659, ppl: 18.332207 +epoch: 1, batch: 22027, sum loss: 3697.064941, avg loss: 2.533972, ppl: 12.603465 +epoch: 1, batch: 22028, sum loss: 5319.020508, avg loss: 2.911341, ppl: 18.381435 +epoch: 1, batch: 22029, sum loss: 4064.948730, avg loss: 2.576013, ppl: 13.144627 +epoch: 1, batch: 22030, sum loss: 5349.677246, avg loss: 2.868460, ppl: 17.609873 +epoch: 1, batch: 22031, sum loss: 5090.497070, avg loss: 2.679209, ppl: 14.573565 +epoch: 1, batch: 22032, sum loss: 4645.958496, avg loss: 2.838093, ppl: 17.083162 +epoch: 1, batch: 22033, sum loss: 3997.238281, avg loss: 2.537929, ppl: 12.653440 +epoch: 1, batch: 22034, sum loss: 3974.760742, avg loss: 2.443000, ppl: 11.507509 +epoch: 1, batch: 22035, sum loss: 4476.085938, avg loss: 2.768142, ppl: 15.929014 +epoch: 1, batch: 22036, sum loss: 4488.024902, avg loss: 2.654066, ppl: 14.211700 +epoch: 1, batch: 22037, sum loss: 5086.043945, avg loss: 3.062037, ppl: 21.371050 +epoch: 1, batch: 22038, sum loss: 4009.351074, avg loss: 2.740500, ppl: 15.494726 +epoch: 1, batch: 22039, sum loss: 5341.983398, avg loss: 2.704802, ppl: 14.951353 +epoch: 1, batch: 22040, sum loss: 4063.213867, avg loss: 2.758462, ppl: 15.775557 +epoch: 1, batch: 22041, sum loss: 4398.686523, avg loss: 2.627650, ppl: 13.841211 +epoch: 1, batch: 22042, sum loss: 4567.685059, avg loss: 2.769973, ppl: 15.958200 +epoch: 1, batch: 22043, sum loss: 4570.535156, avg loss: 2.720557, ppl: 15.188776 +epoch: 1, batch: 22044, sum loss: 4104.858398, avg loss: 2.723861, ppl: 15.239046 +epoch: 1, batch: 22045, sum loss: 5780.750977, avg loss: 3.174493, ppl: 23.914682 +epoch: 1, batch: 22046, sum loss: 3865.620605, avg loss: 2.225458, ppl: 9.257723 +epoch: 1, batch: 22047, sum loss: 5090.358887, avg loss: 2.839018, ppl: 17.098965 +epoch: 1, batch: 22048, sum loss: 4634.728516, avg loss: 2.494472, ppl: 12.115333 +epoch: 1, batch: 22049, sum loss: 5271.554688, avg loss: 2.901241, ppl: 18.196711 +epoch: 1, batch: 22050, sum loss: 6080.502930, avg loss: 3.084984, ppl: 21.867107 +epoch: 1, batch: 22051, sum loss: 4306.950195, avg loss: 2.498231, ppl: 12.160964 +epoch: 1, batch: 22052, sum loss: 5241.878906, avg loss: 2.801646, ppl: 16.471733 +epoch: 1, batch: 22053, sum loss: 4550.235840, avg loss: 2.793269, ppl: 16.334335 +epoch: 1, batch: 22054, sum loss: 4024.946533, avg loss: 2.738059, ppl: 15.456951 +epoch: 1, batch: 22055, sum loss: 4961.037598, avg loss: 2.968904, ppl: 19.470560 +epoch: 1, batch: 22056, sum loss: 4399.727539, avg loss: 2.700876, ppl: 14.892776 +epoch: 1, batch: 22057, sum loss: 4079.838135, avg loss: 2.553090, ppl: 12.846744 +epoch: 1, batch: 22058, sum loss: 4091.794189, avg loss: 2.597965, ppl: 13.436360 +epoch: 1, batch: 22059, sum loss: 4219.856445, avg loss: 2.429394, ppl: 11.351995 +epoch: 1, batch: 22060, sum loss: 4733.967285, avg loss: 2.774893, ppl: 16.036907 +epoch: 1, batch: 22061, sum loss: 4097.514160, avg loss: 2.593364, ppl: 13.374682 +epoch: 1, batch: 22062, sum loss: 4006.183105, avg loss: 2.454769, ppl: 11.643745 +epoch: 1, batch: 22063, sum loss: 4233.603027, avg loss: 2.781605, ppl: 16.144917 +epoch: 1, batch: 22064, sum loss: 4341.287109, avg loss: 2.642293, ppl: 14.045369 +epoch: 1, batch: 22065, sum loss: 4468.886230, avg loss: 2.592162, ppl: 13.358617 +epoch: 1, batch: 22066, sum loss: 4573.250488, avg loss: 2.631329, ppl: 13.892224 +epoch: 1, batch: 22067, sum loss: 3132.665283, avg loss: 2.355387, ppl: 10.542212 +epoch: 1, batch: 22068, sum loss: 4406.228027, avg loss: 2.638460, ppl: 13.991639 +epoch: 1, batch: 22069, sum loss: 4763.177734, avg loss: 2.748516, ppl: 15.619430 +epoch: 1, batch: 22070, sum loss: 4487.716309, avg loss: 2.763372, ppl: 15.853209 +epoch: 1, batch: 22071, sum loss: 4728.504395, avg loss: 2.839943, ppl: 17.114784 +epoch: 1, batch: 22072, sum loss: 4371.305664, avg loss: 2.809322, ppl: 16.598667 +epoch: 1, batch: 22073, sum loss: 4480.726562, avg loss: 2.722191, ppl: 15.213624 +epoch: 1, batch: 22074, sum loss: 4249.079102, avg loss: 2.535250, ppl: 12.619588 +epoch: 1, batch: 22075, sum loss: 4068.872070, avg loss: 2.649005, ppl: 14.139968 +epoch: 1, batch: 22076, sum loss: 4999.178711, avg loss: 2.810106, ppl: 16.611679 +epoch: 1, batch: 22077, sum loss: 4557.194336, avg loss: 2.557348, ppl: 12.901560 +epoch: 1, batch: 22078, sum loss: 5330.375977, avg loss: 2.885964, ppl: 17.920841 +epoch: 1, batch: 22079, sum loss: 4167.046875, avg loss: 2.594674, ppl: 13.392222 +epoch: 1, batch: 22080, sum loss: 5311.331543, avg loss: 2.918314, ppl: 18.510057 +epoch: 1, batch: 22081, sum loss: 4775.062988, avg loss: 2.720834, ppl: 15.192981 +epoch: 1, batch: 22082, sum loss: 3769.567627, avg loss: 2.580128, ppl: 13.198833 +epoch: 1, batch: 22083, sum loss: 4069.388916, avg loss: 2.532290, ppl: 12.582280 +epoch: 1, batch: 22084, sum loss: 4912.247070, avg loss: 2.778420, ppl: 16.093580 +epoch: 1, batch: 22085, sum loss: 4530.201172, avg loss: 2.618613, ppl: 13.716689 +epoch: 1, batch: 22086, sum loss: 4557.358887, avg loss: 2.802804, ppl: 16.490818 +epoch: 1, batch: 22087, sum loss: 4494.621582, avg loss: 2.725665, ppl: 15.266564 +epoch: 1, batch: 22088, sum loss: 3907.430176, avg loss: 2.413484, ppl: 11.172818 +epoch: 1, batch: 22089, sum loss: 3834.311768, avg loss: 2.764464, ppl: 15.870533 +epoch: 1, batch: 22090, sum loss: 4509.235840, avg loss: 2.693689, ppl: 14.786126 +epoch: 1, batch: 22091, sum loss: 4961.555664, avg loss: 2.726130, ppl: 15.273656 +epoch: 1, batch: 22092, sum loss: 4642.191406, avg loss: 2.618269, ppl: 13.711974 +epoch: 1, batch: 22093, sum loss: 3439.923340, avg loss: 2.251259, ppl: 9.499687 +epoch: 1, batch: 22094, sum loss: 4363.591797, avg loss: 2.835342, ppl: 17.036232 +epoch: 1, batch: 22095, sum loss: 3774.460205, avg loss: 2.262866, ppl: 9.610592 +epoch: 1, batch: 22096, sum loss: 4016.094238, avg loss: 2.369377, ppl: 10.690731 +epoch: 1, batch: 22097, sum loss: 5066.153809, avg loss: 2.794349, ppl: 16.351976 +epoch: 1, batch: 22098, sum loss: 4976.351562, avg loss: 2.720805, ppl: 15.192543 +epoch: 1, batch: 22099, sum loss: 4806.786133, avg loss: 2.620930, ppl: 13.748507 +epoch: 1, batch: 22100, sum loss: 4256.500000, avg loss: 2.576574, ppl: 13.152000 +epoch: 1, batch: 22101, sum loss: 4244.697754, avg loss: 2.664594, ppl: 14.362113 +epoch: 1, batch: 22102, sum loss: 4898.068848, avg loss: 2.877831, ppl: 17.775681 +epoch: 1, batch: 22103, sum loss: 4183.340820, avg loss: 2.543064, ppl: 12.718582 +epoch: 1, batch: 22104, sum loss: 3964.045654, avg loss: 2.477529, ppl: 11.911789 +epoch: 1, batch: 22105, sum loss: 4093.843262, avg loss: 2.528625, ppl: 12.536254 +epoch: 1, batch: 22106, sum loss: 4687.776367, avg loss: 2.610121, ppl: 13.600690 +epoch: 1, batch: 22107, sum loss: 4664.949219, avg loss: 2.811904, ppl: 16.641577 +epoch: 1, batch: 22108, sum loss: 4906.189941, avg loss: 2.784444, ppl: 16.190811 +epoch: 1, batch: 22109, sum loss: 3300.397705, avg loss: 2.515547, ppl: 12.373376 +epoch: 1, batch: 22110, sum loss: 5114.322754, avg loss: 2.853975, ppl: 17.356634 +epoch: 1, batch: 22111, sum loss: 4774.798340, avg loss: 2.769605, ppl: 15.952327 +epoch: 1, batch: 22112, sum loss: 4176.782715, avg loss: 2.489143, ppl: 12.050948 +epoch: 1, batch: 22113, sum loss: 4152.218750, avg loss: 2.547374, ppl: 12.773511 +epoch: 1, batch: 22114, sum loss: 4352.373047, avg loss: 2.587618, ppl: 13.298053 +epoch: 1, batch: 22115, sum loss: 4175.088379, avg loss: 2.620897, ppl: 13.748044 +epoch: 1, batch: 22116, sum loss: 4164.848145, avg loss: 2.736431, ppl: 15.431816 +epoch: 1, batch: 22117, sum loss: 5814.480469, avg loss: 3.203571, ppl: 24.620283 +epoch: 1, batch: 22118, sum loss: 4447.008789, avg loss: 2.426082, ppl: 11.314469 +epoch: 1, batch: 22119, sum loss: 4127.887207, avg loss: 2.944285, ppl: 18.997072 +epoch: 1, batch: 22120, sum loss: 4949.226074, avg loss: 2.850937, ppl: 17.303982 +epoch: 1, batch: 22121, sum loss: 3926.553711, avg loss: 2.385513, ppl: 10.864631 +epoch: 1, batch: 22122, sum loss: 5473.125000, avg loss: 2.953656, ppl: 19.175938 +epoch: 1, batch: 22123, sum loss: 5186.922363, avg loss: 2.796185, ppl: 16.382023 +epoch: 1, batch: 22124, sum loss: 4484.001465, avg loss: 2.664291, ppl: 14.357761 +epoch: 1, batch: 22125, sum loss: 4107.677246, avg loss: 2.379883, ppl: 10.803634 +epoch: 1, batch: 22126, sum loss: 4902.392578, avg loss: 2.819087, ppl: 16.761545 +epoch: 1, batch: 22127, sum loss: 3344.417969, avg loss: 2.435847, ppl: 11.425489 +epoch: 1, batch: 22128, sum loss: 4573.458008, avg loss: 2.592663, ppl: 13.365320 +epoch: 1, batch: 22129, sum loss: 3923.907715, avg loss: 2.422165, ppl: 11.270237 +epoch: 1, batch: 22130, sum loss: 4760.964844, avg loss: 2.776073, ppl: 16.055841 +epoch: 1, batch: 22131, sum loss: 3129.064209, avg loss: 2.277339, ppl: 9.750704 +epoch: 1, batch: 22132, sum loss: 4621.744141, avg loss: 2.738000, ppl: 15.456044 +epoch: 1, batch: 22133, sum loss: 5182.884766, avg loss: 2.792503, ppl: 16.321815 +epoch: 1, batch: 22134, sum loss: 4265.749023, avg loss: 2.715308, ppl: 15.109266 +epoch: 1, batch: 22135, sum loss: 4393.579102, avg loss: 2.509183, ppl: 12.294880 +epoch: 1, batch: 22136, sum loss: 5067.123535, avg loss: 2.824484, ppl: 16.852242 +epoch: 1, batch: 22137, sum loss: 4158.994141, avg loss: 2.445029, ppl: 11.530882 +epoch: 1, batch: 22138, sum loss: 4479.203613, avg loss: 2.661440, ppl: 14.316889 +epoch: 1, batch: 22139, sum loss: 4051.993164, avg loss: 2.546822, ppl: 12.766462 +epoch: 1, batch: 22140, sum loss: 4541.391602, avg loss: 2.584742, ppl: 13.259866 +epoch: 1, batch: 22141, sum loss: 3897.291992, avg loss: 2.457309, ppl: 11.673356 +epoch: 1, batch: 22142, sum loss: 4933.259766, avg loss: 2.851595, ppl: 17.315380 +epoch: 1, batch: 22143, sum loss: 4187.324707, avg loss: 2.655247, ppl: 14.228503 +epoch: 1, batch: 22144, sum loss: 4367.329102, avg loss: 2.664630, ppl: 14.362637 +epoch: 1, batch: 22145, sum loss: 4628.227539, avg loss: 2.557032, ppl: 12.897479 +epoch: 1, batch: 22146, sum loss: 5211.782715, avg loss: 2.855771, ppl: 17.387844 +epoch: 1, batch: 22147, sum loss: 4816.518066, avg loss: 2.829916, ppl: 16.944046 +epoch: 1, batch: 22148, sum loss: 4133.992188, avg loss: 2.577302, ppl: 13.161580 +epoch: 1, batch: 22149, sum loss: 4794.449219, avg loss: 2.777781, ppl: 16.083284 +epoch: 1, batch: 22150, sum loss: 4658.841309, avg loss: 2.692972, ppl: 14.775519 +epoch: 1, batch: 22151, sum loss: 4967.301758, avg loss: 2.654891, ppl: 14.223443 +epoch: 1, batch: 22152, sum loss: 3543.186523, avg loss: 2.467400, ppl: 11.791749 +epoch: 1, batch: 22153, sum loss: 4351.160156, avg loss: 2.689221, ppl: 14.720210 +epoch: 1, batch: 22154, sum loss: 4140.333008, avg loss: 2.369967, ppl: 10.697042 +epoch: 1, batch: 22155, sum loss: 4650.042480, avg loss: 2.615322, ppl: 13.671619 +epoch: 1, batch: 22156, sum loss: 3410.850098, avg loss: 2.318729, ppl: 10.162748 +epoch: 1, batch: 22157, sum loss: 4435.080566, avg loss: 2.630534, ppl: 13.881183 +epoch: 1, batch: 22158, sum loss: 4170.461426, avg loss: 2.475051, ppl: 11.882315 +epoch: 1, batch: 22159, sum loss: 4426.651855, avg loss: 2.958992, ppl: 19.278530 +epoch: 1, batch: 22160, sum loss: 5835.897461, avg loss: 3.037948, ppl: 20.862387 +epoch: 1, batch: 22161, sum loss: 5431.081055, avg loss: 3.003917, ppl: 20.164356 +epoch: 1, batch: 22162, sum loss: 5492.132812, avg loss: 2.842719, ppl: 17.162363 +epoch: 1, batch: 22163, sum loss: 3853.378906, avg loss: 2.403855, ppl: 11.065748 +epoch: 1, batch: 22164, sum loss: 5117.070312, avg loss: 2.762997, ppl: 15.847264 +epoch: 1, batch: 22165, sum loss: 3978.020020, avg loss: 2.338636, ppl: 10.367085 +epoch: 1, batch: 22166, sum loss: 5023.445801, avg loss: 2.663545, ppl: 14.347061 +epoch: 1, batch: 22167, sum loss: 3947.015625, avg loss: 2.419997, ppl: 11.245831 +epoch: 1, batch: 22168, sum loss: 4648.502930, avg loss: 2.715247, ppl: 15.108340 +epoch: 1, batch: 22169, sum loss: 4043.223389, avg loss: 2.425449, ppl: 11.307304 +epoch: 1, batch: 22170, sum loss: 4644.552246, avg loss: 2.654030, ppl: 14.211192 +epoch: 1, batch: 22171, sum loss: 4207.215820, avg loss: 2.619686, ppl: 13.731410 +epoch: 1, batch: 22172, sum loss: 5841.228027, avg loss: 2.732099, ppl: 15.365106 +epoch: 1, batch: 22173, sum loss: 4689.518555, avg loss: 2.726464, ppl: 15.278769 +epoch: 1, batch: 22174, sum loss: 4310.271973, avg loss: 2.654108, ppl: 14.212307 +epoch: 1, batch: 22175, sum loss: 4546.509766, avg loss: 2.796131, ppl: 16.381151 +epoch: 1, batch: 22176, sum loss: 3637.963867, avg loss: 2.558343, ppl: 12.914399 +epoch: 1, batch: 22177, sum loss: 3469.671143, avg loss: 2.255963, ppl: 9.544481 +epoch: 1, batch: 22178, sum loss: 5023.751953, avg loss: 2.675054, ppl: 14.513138 +epoch: 1, batch: 22179, sum loss: 4019.489746, avg loss: 2.386870, ppl: 10.879393 +epoch: 1, batch: 22180, sum loss: 4527.228027, avg loss: 2.595887, ppl: 13.408481 +epoch: 1, batch: 22181, sum loss: 3679.426270, avg loss: 2.436706, ppl: 11.435311 +epoch: 1, batch: 22182, sum loss: 3662.853027, avg loss: 2.583112, ppl: 13.238275 +epoch: 1, batch: 22183, sum loss: 5125.635742, avg loss: 2.766128, ppl: 15.896966 +epoch: 1, batch: 22184, sum loss: 4373.148438, avg loss: 2.778366, ppl: 16.092705 +epoch: 1, batch: 22185, sum loss: 4488.003418, avg loss: 2.753376, ppl: 15.695535 +epoch: 1, batch: 22186, sum loss: 4348.875977, avg loss: 2.666386, ppl: 14.387882 +epoch: 1, batch: 22187, sum loss: 4293.749023, avg loss: 2.327235, ppl: 10.249564 +epoch: 1, batch: 22188, sum loss: 4035.860352, avg loss: 2.519264, ppl: 12.419446 +epoch: 1, batch: 22189, sum loss: 4882.829590, avg loss: 2.993764, ppl: 19.960682 +epoch: 1, batch: 22190, sum loss: 4321.240723, avg loss: 2.752383, ppl: 15.679949 +epoch: 1, batch: 22191, sum loss: 4222.010742, avg loss: 2.720368, ppl: 15.185908 +epoch: 1, batch: 22192, sum loss: 4289.375488, avg loss: 2.463742, ppl: 11.748699 +epoch: 1, batch: 22193, sum loss: 4212.534180, avg loss: 2.742535, ppl: 15.526299 +epoch: 1, batch: 22194, sum loss: 4797.516602, avg loss: 2.818752, ppl: 16.755932 +epoch: 1, batch: 22195, sum loss: 4952.688965, avg loss: 3.148562, ppl: 23.302542 +epoch: 1, batch: 22196, sum loss: 4248.477051, avg loss: 2.458609, ppl: 11.688545 +epoch: 1, batch: 22197, sum loss: 4861.310059, avg loss: 2.514904, ppl: 12.365428 +epoch: 1, batch: 22198, sum loss: 4383.697266, avg loss: 2.709331, ppl: 15.019221 +epoch: 1, batch: 22199, sum loss: 4857.141602, avg loss: 2.860507, ppl: 17.470390 +epoch: 1, batch: 22200, sum loss: 4721.383301, avg loss: 2.678039, ppl: 14.556524 +epoch: 1, batch: 22201, sum loss: 4050.917725, avg loss: 2.391333, ppl: 10.928047 +epoch: 1, batch: 22202, sum loss: 4218.880859, avg loss: 2.613929, ppl: 13.652584 +epoch: 1, batch: 22203, sum loss: 4189.655762, avg loss: 2.717027, ppl: 15.135261 +epoch: 1, batch: 22204, sum loss: 3746.438477, avg loss: 2.357733, ppl: 10.566974 +epoch: 1, batch: 22205, sum loss: 4165.903809, avg loss: 2.677316, ppl: 14.545999 +epoch: 1, batch: 22206, sum loss: 5037.925293, avg loss: 2.968724, ppl: 19.467073 +epoch: 1, batch: 22207, sum loss: 3578.944336, avg loss: 2.563714, ppl: 12.983948 +epoch: 1, batch: 22208, sum loss: 3778.695557, avg loss: 2.422241, ppl: 11.271087 +epoch: 1, batch: 22209, sum loss: 4810.186035, avg loss: 2.717619, ppl: 15.144223 +epoch: 1, batch: 22210, sum loss: 4851.287598, avg loss: 2.629424, ppl: 13.865782 +epoch: 1, batch: 22211, sum loss: 4292.146484, avg loss: 2.628381, ppl: 13.851327 +epoch: 1, batch: 22212, sum loss: 3691.464355, avg loss: 2.530133, ppl: 12.555179 +epoch: 1, batch: 22213, sum loss: 4405.844727, avg loss: 2.468261, ppl: 11.801900 +epoch: 1, batch: 22214, sum loss: 4244.049805, avg loss: 2.793976, ppl: 16.345882 +epoch: 1, batch: 22215, sum loss: 5612.756348, avg loss: 2.894665, ppl: 18.077454 +epoch: 1, batch: 22216, sum loss: 3774.802734, avg loss: 2.287759, ppl: 9.852836 +epoch: 1, batch: 22217, sum loss: 4778.476074, avg loss: 2.598410, ppl: 13.442349 +epoch: 1, batch: 22218, sum loss: 4361.031738, avg loss: 2.852212, ppl: 17.326063 +epoch: 1, batch: 22219, sum loss: 5245.646484, avg loss: 2.827842, ppl: 16.908928 +epoch: 1, batch: 22220, sum loss: 5351.767090, avg loss: 2.831623, ppl: 16.972982 +epoch: 1, batch: 22221, sum loss: 4922.912598, avg loss: 2.635392, ppl: 13.948782 +epoch: 1, batch: 22222, sum loss: 4617.730957, avg loss: 2.788485, ppl: 16.256371 +epoch: 1, batch: 22223, sum loss: 4508.753906, avg loss: 2.783182, ppl: 16.170387 +epoch: 1, batch: 22224, sum loss: 3947.755127, avg loss: 2.414529, ppl: 11.184503 +epoch: 1, batch: 22225, sum loss: 5017.581055, avg loss: 2.690392, ppl: 14.737453 +epoch: 1, batch: 22226, sum loss: 4777.863770, avg loss: 2.629534, ppl: 13.867310 +epoch: 1, batch: 22227, sum loss: 5294.090820, avg loss: 2.709361, ppl: 15.019669 +epoch: 1, batch: 22228, sum loss: 4697.350586, avg loss: 2.643416, ppl: 14.061157 +epoch: 1, batch: 22229, sum loss: 4515.089844, avg loss: 2.698799, ppl: 14.861867 +epoch: 1, batch: 22230, sum loss: 4838.229980, avg loss: 2.659830, ppl: 14.293854 +epoch: 1, batch: 22231, sum loss: 4141.040527, avg loss: 2.602791, ppl: 13.501369 +epoch: 1, batch: 22232, sum loss: 4290.810547, avg loss: 2.591069, ppl: 13.344031 +epoch: 1, batch: 22233, sum loss: 4328.322754, avg loss: 2.675107, ppl: 14.513899 +epoch: 1, batch: 22234, sum loss: 4356.989746, avg loss: 2.618383, ppl: 13.713536 +epoch: 1, batch: 22235, sum loss: 4699.904297, avg loss: 2.555685, ppl: 12.880117 +epoch: 1, batch: 22236, sum loss: 4102.047852, avg loss: 2.704053, ppl: 14.940157 +epoch: 1, batch: 22237, sum loss: 4496.922852, avg loss: 2.727061, ppl: 15.287883 +epoch: 1, batch: 22238, sum loss: 3842.062744, avg loss: 2.671810, ppl: 14.466128 +epoch: 1, batch: 22239, sum loss: 3993.187012, avg loss: 2.322971, ppl: 10.205953 +epoch: 1, batch: 22240, sum loss: 3969.381348, avg loss: 2.655104, ppl: 14.226471 +epoch: 1, batch: 22241, sum loss: 5176.022461, avg loss: 2.760545, ppl: 15.808460 +epoch: 1, batch: 22242, sum loss: 4806.527344, avg loss: 2.686712, ppl: 14.683314 +epoch: 1, batch: 22243, sum loss: 5028.547852, avg loss: 2.839383, ppl: 17.105215 +epoch: 1, batch: 22244, sum loss: 4051.721680, avg loss: 2.505703, ppl: 12.252169 +epoch: 1, batch: 22245, sum loss: 4325.562012, avg loss: 2.393781, ppl: 10.954833 +epoch: 1, batch: 22246, sum loss: 4101.637695, avg loss: 2.578025, ppl: 13.171098 +epoch: 1, batch: 22247, sum loss: 5211.815430, avg loss: 2.794539, ppl: 16.355087 +epoch: 1, batch: 22248, sum loss: 4164.345703, avg loss: 2.754197, ppl: 15.708424 +epoch: 1, batch: 22249, sum loss: 4400.342773, avg loss: 2.638095, ppl: 13.986536 +epoch: 1, batch: 22250, sum loss: 5260.130859, avg loss: 2.930435, ppl: 18.735783 +epoch: 1, batch: 22251, sum loss: 4388.854980, avg loss: 2.565082, ppl: 13.001725 +epoch: 1, batch: 22252, sum loss: 5275.920898, avg loss: 2.645898, ppl: 14.096099 +epoch: 1, batch: 22253, sum loss: 4179.973145, avg loss: 2.638872, ppl: 13.997404 +epoch: 1, batch: 22254, sum loss: 4902.881836, avg loss: 2.995041, ppl: 19.986177 +epoch: 1, batch: 22255, sum loss: 3928.407471, avg loss: 2.481622, ppl: 11.960646 +epoch: 1, batch: 22256, sum loss: 4141.148438, avg loss: 2.514358, ppl: 12.358675 +epoch: 1, batch: 22257, sum loss: 3856.042480, avg loss: 2.361324, ppl: 10.604984 +epoch: 1, batch: 22258, sum loss: 4874.295898, avg loss: 2.707942, ppl: 14.998381 +epoch: 1, batch: 22259, sum loss: 5184.304688, avg loss: 2.708623, ppl: 15.008597 +epoch: 1, batch: 22260, sum loss: 4652.349121, avg loss: 2.826458, ppl: 16.885542 +epoch: 1, batch: 22261, sum loss: 4179.933105, avg loss: 2.652242, ppl: 14.185803 +epoch: 1, batch: 22262, sum loss: 4195.615234, avg loss: 2.488503, ppl: 12.043228 +epoch: 1, batch: 22263, sum loss: 4550.383301, avg loss: 2.526587, ppl: 12.510735 +epoch: 1, batch: 22264, sum loss: 4694.897949, avg loss: 2.614086, ppl: 13.654729 +epoch: 1, batch: 22265, sum loss: 4632.746094, avg loss: 2.539883, ppl: 12.678184 +epoch: 1, batch: 22266, sum loss: 4146.962402, avg loss: 2.575753, ppl: 13.141209 +epoch: 1, batch: 22267, sum loss: 4374.402344, avg loss: 2.583817, ppl: 13.247611 +epoch: 1, batch: 22268, sum loss: 5060.516602, avg loss: 2.918406, ppl: 18.511761 +epoch: 1, batch: 22269, sum loss: 4092.640869, avg loss: 2.431753, ppl: 11.378817 +epoch: 1, batch: 22270, sum loss: 4398.174316, avg loss: 2.686728, ppl: 14.683559 +epoch: 1, batch: 22271, sum loss: 4976.031738, avg loss: 2.800243, ppl: 16.448641 +epoch: 1, batch: 22272, sum loss: 4427.071777, avg loss: 2.671739, ppl: 14.465103 +epoch: 1, batch: 22273, sum loss: 3695.149414, avg loss: 2.650753, ppl: 14.164698 +epoch: 1, batch: 22274, sum loss: 4020.379395, avg loss: 2.492486, ppl: 12.091295 +epoch: 1, batch: 22275, sum loss: 4522.698242, avg loss: 2.674570, ppl: 14.506116 +epoch: 1, batch: 22276, sum loss: 3469.077393, avg loss: 2.270339, ppl: 9.682678 +epoch: 1, batch: 22277, sum loss: 4694.843262, avg loss: 2.720071, ppl: 15.181405 +epoch: 1, batch: 22278, sum loss: 4360.950195, avg loss: 2.553249, ppl: 12.848786 +epoch: 1, batch: 22279, sum loss: 4657.304688, avg loss: 2.634222, ppl: 13.932472 +epoch: 1, batch: 22280, sum loss: 4673.468750, avg loss: 2.762097, ppl: 15.833015 +epoch: 1, batch: 22281, sum loss: 4290.063965, avg loss: 2.529519, ppl: 12.547467 +epoch: 1, batch: 22282, sum loss: 4890.059570, avg loss: 2.531087, ppl: 12.567155 +epoch: 1, batch: 22283, sum loss: 4215.804688, avg loss: 2.589561, ppl: 13.323915 +epoch: 1, batch: 22284, sum loss: 5018.354492, avg loss: 2.696590, ppl: 14.829084 +epoch: 1, batch: 22285, sum loss: 4395.136719, avg loss: 2.733294, ppl: 15.383477 +epoch: 1, batch: 22286, sum loss: 4400.109375, avg loss: 2.637955, ppl: 13.984582 +epoch: 1, batch: 22287, sum loss: 4407.492676, avg loss: 2.598758, ppl: 13.447020 +epoch: 1, batch: 22288, sum loss: 3744.732910, avg loss: 2.489849, ppl: 12.059453 +epoch: 1, batch: 22289, sum loss: 4363.800781, avg loss: 2.636738, ppl: 13.967565 +epoch: 1, batch: 22290, sum loss: 3883.000244, avg loss: 2.506779, ppl: 12.265356 +epoch: 1, batch: 22291, sum loss: 4031.479492, avg loss: 2.597603, ppl: 13.431502 +epoch: 1, batch: 22292, sum loss: 4520.089355, avg loss: 2.739448, ppl: 15.478439 +epoch: 1, batch: 22293, sum loss: 5498.700684, avg loss: 2.825643, ppl: 16.871788 +epoch: 1, batch: 22294, sum loss: 4292.356934, avg loss: 2.596707, ppl: 13.419477 +epoch: 1, batch: 22295, sum loss: 4633.746094, avg loss: 2.753266, ppl: 15.693798 +epoch: 1, batch: 22296, sum loss: 5249.853027, avg loss: 2.701932, ppl: 14.908503 +epoch: 1, batch: 22297, sum loss: 4182.667969, avg loss: 2.632264, ppl: 13.905221 +epoch: 1, batch: 22298, sum loss: 4059.032227, avg loss: 2.593631, ppl: 13.378257 +epoch: 1, batch: 22299, sum loss: 4635.118652, avg loss: 2.718545, ppl: 15.158247 +epoch: 1, batch: 22300, sum loss: 5531.800781, avg loss: 3.071516, ppl: 21.574591 +epoch: 1, batch: 22301, sum loss: 4510.276855, avg loss: 2.508497, ppl: 12.286444 +epoch: 1, batch: 22302, sum loss: 4580.351562, avg loss: 2.765913, ppl: 15.893540 +epoch: 1, batch: 22303, sum loss: 3549.431152, avg loss: 2.350617, ppl: 10.492038 +epoch: 1, batch: 22304, sum loss: 5220.363770, avg loss: 2.580506, ppl: 13.203815 +epoch: 1, batch: 22305, sum loss: 4844.817383, avg loss: 2.475635, ppl: 11.889252 +epoch: 1, batch: 22306, sum loss: 5330.610352, avg loss: 2.994725, ppl: 19.979864 +epoch: 1, batch: 22307, sum loss: 3801.579834, avg loss: 2.336558, ppl: 10.345565 +epoch: 1, batch: 22308, sum loss: 4447.255859, avg loss: 2.730053, ppl: 15.333695 +epoch: 1, batch: 22309, sum loss: 3104.419922, avg loss: 2.373410, ppl: 10.733930 +epoch: 1, batch: 22310, sum loss: 4722.512695, avg loss: 2.680200, ppl: 14.588016 +epoch: 1, batch: 22311, sum loss: 4372.029297, avg loss: 2.613287, ppl: 13.643827 +epoch: 1, batch: 22312, sum loss: 5369.702637, avg loss: 2.795264, ppl: 16.366957 +epoch: 1, batch: 22313, sum loss: 4091.234131, avg loss: 2.509960, ppl: 12.304434 +epoch: 1, batch: 22314, sum loss: 4915.713867, avg loss: 2.817028, ppl: 16.727064 +epoch: 1, batch: 22315, sum loss: 4231.795898, avg loss: 2.493692, ppl: 12.105893 +epoch: 1, batch: 22316, sum loss: 5210.595703, avg loss: 2.792388, ppl: 16.319941 +epoch: 1, batch: 22317, sum loss: 4485.016602, avg loss: 2.582048, ppl: 13.224190 +epoch: 1, batch: 22318, sum loss: 4048.968750, avg loss: 2.651584, ppl: 14.176476 +epoch: 1, batch: 22319, sum loss: 4430.046387, avg loss: 2.492992, ppl: 12.097414 +epoch: 1, batch: 22320, sum loss: 4206.409180, avg loss: 2.470000, ppl: 11.822441 +epoch: 1, batch: 22321, sum loss: 4672.594727, avg loss: 2.584400, ppl: 13.255329 +epoch: 1, batch: 22322, sum loss: 5313.950684, avg loss: 2.921358, ppl: 18.566490 +epoch: 1, batch: 22323, sum loss: 5350.030762, avg loss: 3.036340, ppl: 20.828871 +epoch: 1, batch: 22324, sum loss: 4566.291016, avg loss: 2.522813, ppl: 12.463606 +epoch: 1, batch: 22325, sum loss: 4866.241699, avg loss: 2.752399, ppl: 15.680207 +epoch: 1, batch: 22326, sum loss: 4609.816406, avg loss: 2.745573, ppl: 15.573528 +epoch: 1, batch: 22327, sum loss: 3865.013184, avg loss: 2.436957, ppl: 11.438177 +epoch: 1, batch: 22328, sum loss: 4795.479980, avg loss: 2.741841, ppl: 15.515524 +epoch: 1, batch: 22329, sum loss: 5313.431641, avg loss: 2.651413, ppl: 14.174052 +epoch: 1, batch: 22330, sum loss: 4585.757324, avg loss: 2.684870, ppl: 14.656289 +epoch: 1, batch: 22331, sum loss: 5317.644043, avg loss: 2.580128, ppl: 13.198830 +epoch: 1, batch: 22332, sum loss: 5031.785156, avg loss: 2.709631, ppl: 15.023730 +epoch: 1, batch: 22333, sum loss: 4825.427734, avg loss: 2.726230, ppl: 15.275197 +epoch: 1, batch: 22334, sum loss: 4717.936523, avg loss: 2.892665, ppl: 18.041325 +epoch: 1, batch: 22335, sum loss: 4637.241699, avg loss: 2.898276, ppl: 18.142841 +epoch: 1, batch: 22336, sum loss: 4089.446533, avg loss: 2.513489, ppl: 12.347937 +epoch: 1, batch: 22337, sum loss: 4598.067383, avg loss: 2.498950, ppl: 12.169703 +epoch: 1, batch: 22338, sum loss: 4732.355957, avg loss: 2.800211, ppl: 16.448112 +epoch: 1, batch: 22339, sum loss: 4305.297363, avg loss: 2.596681, ppl: 13.419127 +epoch: 1, batch: 22340, sum loss: 4326.738281, avg loss: 2.572377, ppl: 13.096918 +epoch: 1, batch: 22341, sum loss: 3096.081055, avg loss: 2.165092, ppl: 8.715402 +epoch: 1, batch: 22342, sum loss: 4537.450195, avg loss: 2.804357, ppl: 16.516457 +epoch: 1, batch: 22343, sum loss: 4946.413086, avg loss: 2.564237, ppl: 12.990741 +epoch: 1, batch: 22344, sum loss: 3898.661133, avg loss: 2.588753, ppl: 13.313153 +epoch: 1, batch: 22345, sum loss: 4340.664551, avg loss: 2.893776, ppl: 18.061388 +epoch: 1, batch: 22346, sum loss: 4577.038086, avg loss: 2.785781, ppl: 16.212473 +epoch: 1, batch: 22347, sum loss: 4724.675781, avg loss: 2.701358, ppl: 14.899957 +epoch: 1, batch: 22348, sum loss: 4253.815918, avg loss: 2.728554, ppl: 15.310736 +epoch: 1, batch: 22349, sum loss: 4298.980469, avg loss: 2.446773, ppl: 11.551012 +epoch: 1, batch: 22350, sum loss: 4816.927246, avg loss: 2.893049, ppl: 18.048264 +epoch: 1, batch: 22351, sum loss: 4542.487305, avg loss: 2.817920, ppl: 16.741995 +epoch: 1, batch: 22352, sum loss: 5672.169434, avg loss: 2.858956, ppl: 17.443312 +epoch: 1, batch: 22353, sum loss: 5073.773926, avg loss: 3.016512, ppl: 20.419950 +epoch: 1, batch: 22354, sum loss: 4345.418457, avg loss: 2.536730, ppl: 12.638277 +epoch: 1, batch: 22355, sum loss: 3953.117920, avg loss: 2.306370, ppl: 10.037919 +epoch: 1, batch: 22356, sum loss: 4462.227539, avg loss: 2.858570, ppl: 17.436573 +epoch: 1, batch: 22357, sum loss: 4743.458008, avg loss: 2.743469, ppl: 15.540807 +epoch: 1, batch: 22358, sum loss: 4637.178223, avg loss: 2.634760, ppl: 13.939972 +epoch: 1, batch: 22359, sum loss: 4572.696289, avg loss: 2.733232, ppl: 15.382516 +epoch: 1, batch: 22360, sum loss: 5172.922852, avg loss: 2.552009, ppl: 12.832864 +epoch: 1, batch: 22361, sum loss: 5720.647461, avg loss: 2.760930, ppl: 15.814548 +epoch: 1, batch: 22362, sum loss: 3572.273438, avg loss: 2.513915, ppl: 12.353199 +epoch: 1, batch: 22363, sum loss: 4949.971191, avg loss: 2.791862, ppl: 16.311363 +epoch: 1, batch: 22364, sum loss: 3539.083496, avg loss: 2.613799, ppl: 13.650806 +epoch: 1, batch: 22365, sum loss: 4846.306152, avg loss: 2.924747, ppl: 18.629517 +epoch: 1, batch: 22366, sum loss: 5001.950684, avg loss: 2.722891, ppl: 15.224274 +epoch: 1, batch: 22367, sum loss: 4094.642090, avg loss: 2.576867, ppl: 13.155861 +epoch: 1, batch: 22368, sum loss: 4578.366211, avg loss: 2.516969, ppl: 12.390979 +epoch: 1, batch: 22369, sum loss: 4809.876953, avg loss: 2.532847, ppl: 12.589302 +epoch: 1, batch: 22370, sum loss: 5163.285156, avg loss: 3.047984, ppl: 21.072821 +epoch: 1, batch: 22371, sum loss: 3866.885010, avg loss: 2.628746, ppl: 13.856380 +epoch: 1, batch: 22372, sum loss: 3217.414795, avg loss: 2.206732, ppl: 9.085973 +epoch: 1, batch: 22373, sum loss: 3919.838867, avg loss: 2.487207, ppl: 12.027641 +epoch: 1, batch: 22374, sum loss: 4579.001953, avg loss: 2.606148, ppl: 13.546765 +epoch: 1, batch: 22375, sum loss: 5090.462891, avg loss: 2.895599, ppl: 18.094336 +epoch: 1, batch: 22376, sum loss: 4259.877930, avg loss: 2.520638, ppl: 12.436525 +epoch: 1, batch: 22377, sum loss: 5053.130859, avg loss: 2.808855, ppl: 16.590916 +epoch: 1, batch: 22378, sum loss: 4833.401367, avg loss: 2.700224, ppl: 14.883067 +epoch: 1, batch: 22379, sum loss: 4041.085693, avg loss: 2.272827, ppl: 9.706800 +epoch: 1, batch: 22380, sum loss: 4992.258789, avg loss: 2.890712, ppl: 18.006117 +epoch: 1, batch: 22381, sum loss: 4389.195312, avg loss: 2.623548, ppl: 13.784542 +epoch: 1, batch: 22382, sum loss: 4772.187988, avg loss: 2.805519, ppl: 16.535658 +epoch: 1, batch: 22383, sum loss: 4681.894043, avg loss: 2.773634, ppl: 16.016733 +epoch: 1, batch: 22384, sum loss: 4822.508789, avg loss: 2.811958, ppl: 16.642477 +epoch: 1, batch: 22385, sum loss: 4605.768066, avg loss: 2.600660, ppl: 13.472622 +epoch: 1, batch: 22386, sum loss: 4232.312988, avg loss: 2.548051, ppl: 12.782168 +epoch: 1, batch: 22387, sum loss: 3946.101074, avg loss: 2.592708, ppl: 13.365916 +epoch: 1, batch: 22388, sum loss: 4423.636230, avg loss: 2.596031, ppl: 13.410402 +epoch: 1, batch: 22389, sum loss: 3702.168457, avg loss: 2.424472, ppl: 11.296262 +epoch: 1, batch: 22390, sum loss: 4135.502930, avg loss: 2.523186, ppl: 12.468263 +epoch: 1, batch: 22391, sum loss: 3875.579834, avg loss: 2.566609, ppl: 13.021595 +epoch: 1, batch: 22392, sum loss: 4881.155762, avg loss: 2.728427, ppl: 15.308786 +epoch: 1, batch: 22393, sum loss: 4217.897461, avg loss: 2.568756, ppl: 13.049582 +epoch: 1, batch: 22394, sum loss: 3419.790039, avg loss: 2.523830, ppl: 12.476292 +epoch: 1, batch: 22395, sum loss: 5366.800781, avg loss: 3.080827, ppl: 21.776403 +epoch: 1, batch: 22396, sum loss: 3903.664307, avg loss: 2.397828, ppl: 10.999261 +epoch: 1, batch: 22397, sum loss: 3541.995850, avg loss: 2.264703, ppl: 9.628267 +epoch: 1, batch: 22398, sum loss: 3724.633301, avg loss: 2.404541, ppl: 11.073341 +epoch: 1, batch: 22399, sum loss: 4975.524414, avg loss: 2.864436, ppl: 17.539152 +epoch: 1, batch: 22400, sum loss: 4810.129395, avg loss: 2.690229, ppl: 14.735049 +epoch: 1, batch: 22401, sum loss: 4239.352539, avg loss: 2.584971, ppl: 13.262904 +epoch: 1, batch: 22402, sum loss: 4800.906738, avg loss: 2.673111, ppl: 14.484958 +epoch: 1, batch: 22403, sum loss: 4215.055664, avg loss: 2.754938, ppl: 15.720072 +epoch: 1, batch: 22404, sum loss: 5060.228027, avg loss: 2.778818, ppl: 16.099985 +epoch: 1, batch: 22405, sum loss: 5449.604004, avg loss: 2.981184, ppl: 19.711136 +epoch: 1, batch: 22406, sum loss: 5397.941406, avg loss: 3.034256, ppl: 20.785507 +epoch: 1, batch: 22407, sum loss: 5240.081543, avg loss: 2.665352, ppl: 14.373006 +epoch: 1, batch: 22408, sum loss: 5203.938477, avg loss: 2.791812, ppl: 16.310555 +epoch: 1, batch: 22409, sum loss: 4182.312500, avg loss: 2.613945, ppl: 13.652808 +epoch: 1, batch: 22410, sum loss: 4325.625488, avg loss: 2.706900, ppl: 14.982752 +epoch: 1, batch: 22411, sum loss: 5349.755859, avg loss: 2.929768, ppl: 18.723284 +epoch: 1, batch: 22412, sum loss: 3667.724365, avg loss: 2.629193, ppl: 13.862582 +epoch: 1, batch: 22413, sum loss: 4415.672363, avg loss: 2.516052, ppl: 12.379631 +epoch: 1, batch: 22414, sum loss: 6118.744141, avg loss: 2.824905, ppl: 16.859343 +epoch: 1, batch: 22415, sum loss: 4849.671875, avg loss: 2.543090, ppl: 12.718907 +epoch: 1, batch: 22416, sum loss: 3821.677734, avg loss: 2.459252, ppl: 11.696061 +epoch: 1, batch: 22417, sum loss: 5238.409180, avg loss: 2.870361, ppl: 17.643393 +epoch: 1, batch: 22418, sum loss: 4679.723633, avg loss: 2.735081, ppl: 15.410995 +epoch: 1, batch: 22419, sum loss: 3675.804199, avg loss: 2.404058, ppl: 11.067996 +epoch: 1, batch: 22420, sum loss: 4722.998535, avg loss: 2.610834, ppl: 13.610399 +epoch: 1, batch: 22421, sum loss: 4368.412109, avg loss: 2.618952, ppl: 13.721340 +epoch: 1, batch: 22422, sum loss: 4784.622070, avg loss: 2.770482, ppl: 15.966325 +epoch: 1, batch: 22423, sum loss: 3961.834961, avg loss: 2.523462, ppl: 12.471697 +epoch: 1, batch: 22424, sum loss: 4688.938477, avg loss: 2.626856, ppl: 13.830224 +epoch: 1, batch: 22425, sum loss: 6182.051758, avg loss: 2.996632, ppl: 20.018005 +epoch: 1, batch: 22426, sum loss: 3302.445312, avg loss: 2.353846, ppl: 10.525970 +epoch: 1, batch: 22427, sum loss: 3283.689941, avg loss: 2.395106, ppl: 10.969359 +epoch: 1, batch: 22428, sum loss: 3823.512207, avg loss: 2.476368, ppl: 11.897975 +epoch: 1, batch: 22429, sum loss: 4749.646973, avg loss: 2.691018, ppl: 14.746682 +epoch: 1, batch: 22430, sum loss: 4893.007324, avg loss: 2.727429, ppl: 15.293515 +epoch: 1, batch: 22431, sum loss: 3554.486084, avg loss: 2.517341, ppl: 12.395597 +epoch: 1, batch: 22432, sum loss: 3557.305664, avg loss: 2.533694, ppl: 12.599959 +epoch: 1, batch: 22433, sum loss: 3966.781738, avg loss: 2.671233, ppl: 14.457790 +epoch: 1, batch: 22434, sum loss: 5171.855469, avg loss: 2.887691, ppl: 17.951817 +epoch: 1, batch: 22435, sum loss: 3107.221191, avg loss: 2.320554, ppl: 10.181309 +epoch: 1, batch: 22436, sum loss: 4238.382324, avg loss: 2.696172, ppl: 14.822881 +epoch: 1, batch: 22437, sum loss: 5010.614746, avg loss: 2.754599, ppl: 15.714732 +epoch: 1, batch: 22438, sum loss: 4618.342773, avg loss: 2.543140, ppl: 12.719550 +epoch: 1, batch: 22439, sum loss: 4144.772461, avg loss: 2.665449, ppl: 14.374397 +epoch: 1, batch: 22440, sum loss: 5199.761719, avg loss: 2.864883, ppl: 17.546995 +epoch: 1, batch: 22441, sum loss: 3943.319824, avg loss: 2.660810, ppl: 14.307870 +epoch: 1, batch: 22442, sum loss: 4227.526855, avg loss: 2.537531, ppl: 12.648405 +epoch: 1, batch: 22443, sum loss: 4083.206055, avg loss: 2.540888, ppl: 12.690930 +epoch: 1, batch: 22444, sum loss: 4245.664062, avg loss: 2.441440, ppl: 11.489575 +epoch: 1, batch: 22445, sum loss: 4844.355469, avg loss: 3.029616, ppl: 20.689280 +epoch: 1, batch: 22446, sum loss: 4992.560059, avg loss: 2.819063, ppl: 16.761133 +epoch: 1, batch: 22447, sum loss: 4113.480957, avg loss: 2.605118, ppl: 13.532820 +epoch: 1, batch: 22448, sum loss: 4573.244141, avg loss: 2.546350, ppl: 12.760440 +epoch: 1, batch: 22449, sum loss: 3247.471191, avg loss: 2.412683, ppl: 11.163871 +epoch: 1, batch: 22450, sum loss: 5150.565430, avg loss: 2.683984, ppl: 14.643317 +epoch: 1, batch: 22451, sum loss: 4637.295898, avg loss: 2.686730, ppl: 14.683580 +epoch: 1, batch: 22452, sum loss: 4357.384766, avg loss: 2.855429, ppl: 17.381895 +epoch: 1, batch: 22453, sum loss: 4520.236816, avg loss: 2.681042, ppl: 14.600298 +epoch: 1, batch: 22454, sum loss: 4401.927246, avg loss: 2.445515, ppl: 11.536491 +epoch: 1, batch: 22455, sum loss: 4325.241699, avg loss: 2.730582, ppl: 15.341809 +epoch: 1, batch: 22456, sum loss: 4725.240234, avg loss: 2.706323, ppl: 14.974120 +epoch: 1, batch: 22457, sum loss: 4853.364258, avg loss: 2.708351, ppl: 15.004507 +epoch: 1, batch: 22458, sum loss: 4274.029297, avg loss: 2.706795, ppl: 14.981183 +epoch: 1, batch: 22459, sum loss: 3754.335938, avg loss: 2.457026, ppl: 11.670056 +epoch: 1, batch: 22460, sum loss: 5080.007324, avg loss: 2.886368, ppl: 17.928072 +epoch: 1, batch: 22461, sum loss: 4017.510742, avg loss: 2.678340, ppl: 14.560908 +epoch: 1, batch: 22462, sum loss: 3673.484375, avg loss: 2.559919, ppl: 12.934774 +epoch: 1, batch: 22463, sum loss: 3737.380127, avg loss: 2.295688, ppl: 9.931266 +epoch: 1, batch: 22464, sum loss: 4601.250000, avg loss: 2.553413, ppl: 12.850888 +epoch: 1, batch: 22465, sum loss: 4108.297852, avg loss: 2.511185, ppl: 12.319516 +epoch: 1, batch: 22466, sum loss: 4174.559082, avg loss: 2.809259, ppl: 16.597618 +epoch: 1, batch: 22467, sum loss: 4292.649902, avg loss: 2.706589, ppl: 14.978094 +epoch: 1, batch: 22468, sum loss: 4365.670410, avg loss: 2.556013, ppl: 12.884346 +epoch: 1, batch: 22469, sum loss: 4689.096680, avg loss: 2.708894, ppl: 15.012670 +epoch: 1, batch: 22470, sum loss: 5164.312012, avg loss: 2.794541, ppl: 16.355122 +epoch: 1, batch: 22471, sum loss: 4741.595215, avg loss: 2.866745, ppl: 17.579695 +epoch: 1, batch: 22472, sum loss: 3905.009766, avg loss: 2.320267, ppl: 10.178393 +epoch: 1, batch: 22473, sum loss: 4909.032715, avg loss: 2.530429, ppl: 12.558897 +epoch: 1, batch: 22474, sum loss: 4085.686279, avg loss: 2.452393, ppl: 11.616106 +epoch: 1, batch: 22475, sum loss: 3874.941650, avg loss: 2.362769, ppl: 10.620322 +epoch: 1, batch: 22476, sum loss: 4696.565430, avg loss: 2.716348, ppl: 15.124984 +epoch: 1, batch: 22477, sum loss: 4220.175293, avg loss: 2.681179, ppl: 14.602293 +epoch: 1, batch: 22478, sum loss: 4723.974121, avg loss: 2.733781, ppl: 15.390976 +epoch: 1, batch: 22479, sum loss: 4939.855469, avg loss: 2.931665, ppl: 18.758837 +epoch: 1, batch: 22480, sum loss: 4765.333984, avg loss: 2.729286, ppl: 15.321950 +epoch: 1, batch: 22481, sum loss: 4536.421875, avg loss: 2.671627, ppl: 14.463476 +epoch: 1, batch: 22482, sum loss: 5396.882812, avg loss: 2.837478, ppl: 17.072653 +epoch: 1, batch: 22483, sum loss: 5158.924805, avg loss: 2.906436, ppl: 18.291500 +epoch: 1, batch: 22484, sum loss: 4423.292480, avg loss: 2.573178, ppl: 13.107411 +epoch: 1, batch: 22485, sum loss: 4561.977051, avg loss: 2.759817, ppl: 15.796947 +epoch: 1, batch: 22486, sum loss: 4010.106445, avg loss: 2.678762, ppl: 14.567044 +epoch: 1, batch: 22487, sum loss: 5037.692383, avg loss: 2.648629, ppl: 14.134649 +epoch: 1, batch: 22488, sum loss: 5030.579102, avg loss: 2.593082, ppl: 13.370920 +epoch: 1, batch: 22489, sum loss: 4399.944824, avg loss: 2.611243, ppl: 13.615969 +epoch: 1, batch: 22490, sum loss: 4672.961426, avg loss: 2.465943, ppl: 11.774576 +epoch: 1, batch: 22491, sum loss: 5070.445312, avg loss: 2.739300, ppl: 15.476155 +epoch: 1, batch: 22492, sum loss: 3946.582520, avg loss: 2.593024, ppl: 13.370145 +epoch: 1, batch: 22493, sum loss: 4785.649414, avg loss: 2.584044, ppl: 13.250615 +epoch: 1, batch: 22494, sum loss: 5375.928711, avg loss: 2.622404, ppl: 13.768788 +epoch: 1, batch: 22495, sum loss: 4349.870117, avg loss: 2.495623, ppl: 12.129283 +epoch: 1, batch: 22496, sum loss: 4193.363770, avg loss: 2.520051, ppl: 12.429225 +epoch: 1, batch: 22497, sum loss: 5744.202148, avg loss: 2.793873, ppl: 16.344193 +epoch: 1, batch: 22498, sum loss: 5546.849609, avg loss: 2.602933, ppl: 13.503281 +epoch: 1, batch: 22499, sum loss: 5223.278809, avg loss: 3.059917, ppl: 21.325792 +epoch: 1, batch: 22500, sum loss: 4374.895996, avg loss: 2.561414, ppl: 12.954127 +epoch: 1, batch: 22501, sum loss: 4721.522461, avg loss: 2.721339, ppl: 15.200655 +epoch: 1, batch: 22502, sum loss: 4878.769043, avg loss: 2.628647, ppl: 13.855013 +epoch: 1, batch: 22503, sum loss: 3807.695801, avg loss: 2.513331, ppl: 12.345983 +epoch: 1, batch: 22504, sum loss: 4327.844238, avg loss: 2.643766, ppl: 14.066072 +epoch: 1, batch: 22505, sum loss: 4034.690674, avg loss: 2.566597, ppl: 13.021440 +epoch: 1, batch: 22506, sum loss: 4151.922363, avg loss: 2.680389, ppl: 14.590767 +epoch: 1, batch: 22507, sum loss: 4399.282227, avg loss: 2.560700, ppl: 12.944872 +epoch: 1, batch: 22508, sum loss: 4548.534668, avg loss: 2.584395, ppl: 13.255263 +epoch: 1, batch: 22509, sum loss: 5216.761719, avg loss: 2.883782, ppl: 17.881777 +epoch: 1, batch: 22510, sum loss: 4322.476562, avg loss: 2.838133, ppl: 17.083841 +epoch: 1, batch: 22511, sum loss: 3851.431885, avg loss: 2.805121, ppl: 16.529083 +epoch: 1, batch: 22512, sum loss: 3682.731689, avg loss: 2.353183, ppl: 10.518999 +epoch: 1, batch: 22513, sum loss: 3834.970215, avg loss: 2.546461, ppl: 12.761861 +epoch: 1, batch: 22514, sum loss: 4444.942871, avg loss: 2.636384, ppl: 13.962620 +epoch: 1, batch: 22515, sum loss: 4970.252441, avg loss: 2.614546, ppl: 13.661017 +epoch: 1, batch: 22516, sum loss: 4642.838379, avg loss: 2.679076, ppl: 14.571619 +epoch: 1, batch: 22517, sum loss: 5362.782715, avg loss: 2.806270, ppl: 16.548084 +epoch: 1, batch: 22518, sum loss: 4634.867676, avg loss: 2.650010, ppl: 14.154181 +epoch: 1, batch: 22519, sum loss: 5857.301270, avg loss: 2.809257, ppl: 16.597586 +epoch: 1, batch: 22520, sum loss: 5308.190430, avg loss: 2.828018, ppl: 16.911915 +epoch: 1, batch: 22521, sum loss: 4525.559570, avg loss: 2.593444, ppl: 13.375757 +epoch: 1, batch: 22522, sum loss: 5170.872070, avg loss: 2.606286, ppl: 13.548641 +epoch: 1, batch: 22523, sum loss: 4552.550781, avg loss: 2.520792, ppl: 12.438447 +epoch: 1, batch: 22524, sum loss: 4633.501465, avg loss: 2.753120, ppl: 15.691516 +epoch: 1, batch: 22525, sum loss: 4951.860352, avg loss: 2.766402, ppl: 15.901322 +epoch: 1, batch: 22526, sum loss: 3702.395508, avg loss: 2.468264, ppl: 11.801936 +epoch: 1, batch: 22527, sum loss: 4412.531250, avg loss: 2.912562, ppl: 18.403887 +epoch: 1, batch: 22528, sum loss: 4286.798828, avg loss: 2.689334, ppl: 14.721870 +epoch: 1, batch: 22529, sum loss: 4519.347168, avg loss: 2.762437, ppl: 15.838400 +epoch: 1, batch: 22530, sum loss: 3975.671875, avg loss: 2.486349, ppl: 12.017319 +epoch: 1, batch: 22531, sum loss: 3926.825195, avg loss: 2.566552, ppl: 13.020856 +epoch: 1, batch: 22532, sum loss: 4582.410156, avg loss: 2.711485, ppl: 15.051617 +epoch: 1, batch: 22533, sum loss: 4099.497559, avg loss: 2.563788, ppl: 12.984914 +epoch: 1, batch: 22534, sum loss: 4488.029297, avg loss: 2.623045, ppl: 13.777609 +epoch: 1, batch: 22535, sum loss: 5456.585938, avg loss: 2.852371, ppl: 17.328819 +epoch: 1, batch: 22536, sum loss: 4338.724609, avg loss: 2.526922, ppl: 12.514923 +epoch: 1, batch: 22537, sum loss: 3548.226562, avg loss: 2.331292, ppl: 10.291231 +epoch: 1, batch: 22538, sum loss: 3856.672852, avg loss: 2.577990, ppl: 13.170636 +epoch: 1, batch: 22539, sum loss: 4316.027832, avg loss: 2.672463, ppl: 14.475581 +epoch: 1, batch: 22540, sum loss: 4195.562500, avg loss: 2.430801, ppl: 11.367983 +epoch: 1, batch: 22541, sum loss: 3549.448242, avg loss: 2.427803, ppl: 11.333957 +epoch: 1, batch: 22542, sum loss: 3588.609131, avg loss: 2.545113, ppl: 12.744669 +epoch: 1, batch: 22543, sum loss: 3870.035645, avg loss: 2.426355, ppl: 11.317551 +epoch: 1, batch: 22544, sum loss: 3951.830811, avg loss: 2.639833, ppl: 14.010870 +epoch: 1, batch: 22545, sum loss: 4046.530762, avg loss: 2.683376, ppl: 14.634414 +epoch: 1, batch: 22546, sum loss: 5034.202148, avg loss: 2.807698, ppl: 16.571722 +epoch: 1, batch: 22547, sum loss: 3924.602783, avg loss: 2.585377, ppl: 13.268296 +epoch: 1, batch: 22548, sum loss: 3692.714844, avg loss: 2.407246, ppl: 11.103336 +epoch: 1, batch: 22549, sum loss: 4489.775391, avg loss: 2.683667, ppl: 14.638681 +epoch: 1, batch: 22550, sum loss: 4310.020508, avg loss: 2.422721, ppl: 11.276500 +epoch: 1, batch: 22551, sum loss: 4236.216309, avg loss: 2.541221, ppl: 12.695168 +epoch: 1, batch: 22552, sum loss: 4169.988281, avg loss: 2.374709, ppl: 10.747881 +epoch: 1, batch: 22553, sum loss: 5596.513672, avg loss: 2.791279, ppl: 16.301849 +epoch: 1, batch: 22554, sum loss: 4817.274414, avg loss: 2.713957, ppl: 15.088869 +epoch: 1, batch: 22555, sum loss: 4750.040039, avg loss: 2.646262, ppl: 14.101229 +epoch: 1, batch: 22556, sum loss: 5175.800781, avg loss: 2.889894, ppl: 17.991407 +epoch: 1, batch: 22557, sum loss: 4562.576172, avg loss: 2.530547, ppl: 12.560373 +epoch: 1, batch: 22558, sum loss: 4230.063965, avg loss: 2.611151, ppl: 13.614709 +epoch: 1, batch: 22559, sum loss: 4088.027832, avg loss: 2.593926, ppl: 13.382210 +epoch: 1, batch: 22560, sum loss: 4599.620117, avg loss: 2.723280, ppl: 15.230198 +epoch: 1, batch: 22561, sum loss: 3975.303955, avg loss: 2.380422, ppl: 10.809460 +epoch: 1, batch: 22562, sum loss: 4303.859375, avg loss: 2.629114, ppl: 13.861482 +epoch: 1, batch: 22563, sum loss: 4415.060059, avg loss: 2.555012, ppl: 12.871448 +epoch: 1, batch: 22564, sum loss: 3764.686768, avg loss: 2.598128, ppl: 13.438552 +epoch: 1, batch: 22565, sum loss: 6286.335449, avg loss: 2.840640, ppl: 17.126717 +epoch: 1, batch: 22566, sum loss: 3825.513916, avg loss: 2.649248, ppl: 14.143397 +epoch: 1, batch: 22567, sum loss: 4826.176270, avg loss: 2.912599, ppl: 18.404568 +epoch: 1, batch: 22568, sum loss: 4597.250488, avg loss: 2.702675, ppl: 14.919590 +epoch: 1, batch: 22569, sum loss: 4611.428711, avg loss: 2.837802, ppl: 17.078190 +epoch: 1, batch: 22570, sum loss: 4103.710449, avg loss: 2.516070, ppl: 12.379849 +epoch: 1, batch: 22571, sum loss: 4049.050293, avg loss: 2.612291, ppl: 13.630237 +epoch: 1, batch: 22572, sum loss: 4648.905762, avg loss: 2.690339, ppl: 14.736672 +epoch: 1, batch: 22573, sum loss: 3890.873047, avg loss: 2.705753, ppl: 14.965583 +epoch: 1, batch: 22574, sum loss: 3663.445801, avg loss: 2.589008, ppl: 13.316550 +epoch: 1, batch: 22575, sum loss: 3746.144287, avg loss: 2.630719, ppl: 13.883755 +epoch: 1, batch: 22576, sum loss: 4919.026855, avg loss: 2.609563, ppl: 13.593114 +epoch: 1, batch: 22577, sum loss: 3464.456787, avg loss: 2.652724, ppl: 14.192640 +epoch: 1, batch: 22578, sum loss: 4225.120117, avg loss: 2.713629, ppl: 15.083916 +epoch: 1, batch: 22579, sum loss: 4379.419434, avg loss: 2.652586, ppl: 14.190692 +epoch: 1, batch: 22580, sum loss: 4775.609375, avg loss: 2.770075, ppl: 15.959836 +epoch: 1, batch: 22581, sum loss: 5354.647461, avg loss: 2.609477, ppl: 13.591945 +epoch: 1, batch: 22582, sum loss: 4617.573242, avg loss: 2.510916, ppl: 12.316200 +epoch: 1, batch: 22583, sum loss: 4986.140625, avg loss: 2.712808, ppl: 15.071532 +epoch: 1, batch: 22584, sum loss: 4521.364746, avg loss: 2.705784, ppl: 14.966046 +epoch: 1, batch: 22585, sum loss: 4476.961426, avg loss: 2.605915, ppl: 13.543610 +epoch: 1, batch: 22586, sum loss: 4904.689941, avg loss: 2.645464, ppl: 14.089977 +epoch: 1, batch: 22587, sum loss: 3786.252441, avg loss: 2.522487, ppl: 12.459541 +epoch: 1, batch: 22588, sum loss: 5165.049316, avg loss: 2.745906, ppl: 15.578724 +epoch: 1, batch: 22589, sum loss: 5564.890625, avg loss: 2.779665, ppl: 16.113630 +epoch: 1, batch: 22590, sum loss: 4575.439453, avg loss: 2.761279, ppl: 15.820065 +epoch: 1, batch: 22591, sum loss: 3604.771484, avg loss: 2.357601, ppl: 10.565574 +epoch: 1, batch: 22592, sum loss: 3801.418945, avg loss: 2.443071, ppl: 11.508333 +epoch: 1, batch: 22593, sum loss: 4702.375488, avg loss: 2.578057, ppl: 13.171518 +epoch: 1, batch: 22594, sum loss: 4843.320801, avg loss: 2.739435, ppl: 15.478237 +epoch: 1, batch: 22595, sum loss: 4727.427246, avg loss: 2.558132, ppl: 12.911672 +epoch: 1, batch: 22596, sum loss: 3826.409668, avg loss: 2.540777, ppl: 12.689524 +epoch: 1, batch: 22597, sum loss: 6118.296875, avg loss: 2.981626, ppl: 19.719860 +epoch: 1, batch: 22598, sum loss: 5487.470215, avg loss: 3.137490, ppl: 23.045950 +epoch: 1, batch: 22599, sum loss: 4755.673828, avg loss: 2.652356, ppl: 14.187424 +epoch: 1, batch: 22600, sum loss: 4460.355469, avg loss: 2.514293, ppl: 12.357865 +epoch: 1, batch: 22601, sum loss: 4482.559082, avg loss: 2.708495, ppl: 15.006672 +epoch: 1, batch: 22602, sum loss: 4834.728027, avg loss: 2.514159, ppl: 12.356213 +epoch: 1, batch: 22603, sum loss: 5519.619629, avg loss: 2.689873, ppl: 14.729809 +epoch: 1, batch: 22604, sum loss: 4218.298340, avg loss: 2.513885, ppl: 12.352822 +epoch: 1, batch: 22605, sum loss: 4247.607422, avg loss: 2.630097, ppl: 13.875121 +epoch: 1, batch: 22606, sum loss: 3433.770020, avg loss: 2.274020, ppl: 9.718390 +epoch: 1, batch: 22607, sum loss: 4427.040527, avg loss: 2.636713, ppl: 13.967212 +epoch: 1, batch: 22608, sum loss: 5255.645996, avg loss: 2.786663, ppl: 16.226778 +epoch: 1, batch: 22609, sum loss: 4751.128418, avg loss: 2.693383, ppl: 14.781605 +epoch: 1, batch: 22610, sum loss: 5642.053711, avg loss: 2.859632, ppl: 17.455103 +epoch: 1, batch: 22611, sum loss: 3565.881348, avg loss: 2.439044, ppl: 11.462072 +epoch: 1, batch: 22612, sum loss: 3696.962891, avg loss: 2.461360, ppl: 11.720740 +epoch: 1, batch: 22613, sum loss: 4242.726562, avg loss: 2.614126, ppl: 13.655276 +epoch: 1, batch: 22614, sum loss: 4749.058105, avg loss: 2.582413, ppl: 13.229027 +epoch: 1, batch: 22615, sum loss: 4792.531250, avg loss: 2.783119, ppl: 16.169378 +epoch: 1, batch: 22616, sum loss: 5098.453125, avg loss: 2.724988, ppl: 15.256238 +epoch: 1, batch: 22617, sum loss: 4388.785156, avg loss: 2.411421, ppl: 11.149789 +epoch: 1, batch: 22618, sum loss: 4422.057129, avg loss: 2.567977, ppl: 13.039425 +epoch: 1, batch: 22619, sum loss: 4551.508789, avg loss: 2.581684, ppl: 13.219379 +epoch: 1, batch: 22620, sum loss: 4938.361816, avg loss: 2.643663, ppl: 14.064624 +epoch: 1, batch: 22621, sum loss: 3841.085693, avg loss: 2.564142, ppl: 12.989514 +epoch: 1, batch: 22622, sum loss: 3851.098877, avg loss: 2.368449, ppl: 10.680819 +epoch: 1, batch: 22623, sum loss: 4820.504395, avg loss: 2.843955, ppl: 17.183598 +epoch: 1, batch: 22624, sum loss: 4452.040527, avg loss: 2.483012, ppl: 11.977288 +epoch: 1, batch: 22625, sum loss: 4199.030762, avg loss: 2.344517, ppl: 10.428237 +epoch: 1, batch: 22626, sum loss: 5069.728027, avg loss: 2.800955, ppl: 16.460356 +epoch: 1, batch: 22627, sum loss: 3762.066162, avg loss: 2.366079, ppl: 10.655534 +epoch: 1, batch: 22628, sum loss: 5364.005859, avg loss: 2.735342, ppl: 15.415018 +epoch: 1, batch: 22629, sum loss: 3992.880859, avg loss: 2.656607, ppl: 14.247869 +epoch: 1, batch: 22630, sum loss: 4486.012695, avg loss: 2.755536, ppl: 15.729471 +epoch: 1, batch: 22631, sum loss: 5185.168945, avg loss: 2.947794, ppl: 19.063847 +epoch: 1, batch: 22632, sum loss: 4737.536133, avg loss: 2.736878, ppl: 15.438712 +epoch: 1, batch: 22633, sum loss: 4729.074219, avg loss: 2.582782, ppl: 13.233908 +epoch: 1, batch: 22634, sum loss: 4645.165527, avg loss: 2.683516, ppl: 14.636458 +epoch: 1, batch: 22635, sum loss: 5427.621094, avg loss: 2.684283, ppl: 14.647699 +epoch: 1, batch: 22636, sum loss: 4460.618652, avg loss: 2.610075, ppl: 13.600074 +epoch: 1, batch: 22637, sum loss: 4862.311523, avg loss: 2.843457, ppl: 17.175037 +epoch: 1, batch: 22638, sum loss: 4044.392822, avg loss: 2.657289, ppl: 14.257578 +epoch: 1, batch: 22639, sum loss: 5040.425293, avg loss: 2.881890, ppl: 17.847971 +epoch: 1, batch: 22640, sum loss: 4481.949219, avg loss: 2.666240, ppl: 14.385773 +epoch: 1, batch: 22641, sum loss: 5070.196777, avg loss: 2.668525, ppl: 14.418682 +epoch: 1, batch: 22642, sum loss: 4385.951172, avg loss: 2.667854, ppl: 14.409008 +epoch: 1, batch: 22643, sum loss: 4504.997559, avg loss: 2.670419, ppl: 14.446028 +epoch: 1, batch: 22644, sum loss: 4183.086426, avg loss: 2.588544, ppl: 13.310373 +epoch: 1, batch: 22645, sum loss: 4499.698242, avg loss: 2.765641, ppl: 15.889225 +epoch: 1, batch: 22646, sum loss: 4629.960449, avg loss: 2.534188, ppl: 12.606185 +epoch: 1, batch: 22647, sum loss: 4110.107422, avg loss: 2.704018, ppl: 14.939640 +epoch: 1, batch: 22648, sum loss: 4360.564941, avg loss: 2.601769, ppl: 13.487579 +epoch: 1, batch: 22649, sum loss: 4931.182617, avg loss: 2.793871, ppl: 16.344168 +epoch: 1, batch: 22650, sum loss: 5511.529297, avg loss: 2.990520, ppl: 19.896036 +epoch: 1, batch: 22651, sum loss: 4676.877930, avg loss: 2.601156, ppl: 13.479304 +epoch: 1, batch: 22652, sum loss: 4430.883789, avg loss: 2.480898, ppl: 11.951994 +epoch: 1, batch: 22653, sum loss: 4018.049561, avg loss: 2.585618, ppl: 13.271482 +epoch: 1, batch: 22654, sum loss: 4399.962891, avg loss: 2.325562, ppl: 10.232427 +epoch: 1, batch: 22655, sum loss: 4422.754883, avg loss: 2.678834, ppl: 14.568100 +epoch: 1, batch: 22656, sum loss: 5081.982910, avg loss: 2.736663, ppl: 15.435386 +epoch: 1, batch: 22657, sum loss: 3858.100830, avg loss: 2.624558, ppl: 13.798480 +epoch: 1, batch: 22658, sum loss: 3812.123047, avg loss: 2.267771, ppl: 9.657849 +epoch: 1, batch: 22659, sum loss: 5072.305664, avg loss: 2.678092, ppl: 14.557288 +epoch: 1, batch: 22660, sum loss: 4626.475586, avg loss: 2.768687, ppl: 15.937695 +epoch: 1, batch: 22661, sum loss: 4374.691895, avg loss: 2.501253, ppl: 12.197770 +epoch: 1, batch: 22662, sum loss: 3508.408203, avg loss: 2.686377, ppl: 14.678400 +epoch: 1, batch: 22663, sum loss: 5314.828125, avg loss: 2.865136, ppl: 17.551447 +epoch: 1, batch: 22664, sum loss: 4076.634521, avg loss: 2.638598, ppl: 13.993577 +epoch: 1, batch: 22665, sum loss: 4203.362793, avg loss: 2.653638, ppl: 14.205626 +epoch: 1, batch: 22666, sum loss: 4394.580078, avg loss: 2.498340, ppl: 12.162290 +epoch: 1, batch: 22667, sum loss: 4771.661621, avg loss: 2.698904, ppl: 14.863429 +epoch: 1, batch: 22668, sum loss: 4277.894043, avg loss: 2.501692, ppl: 12.203128 +epoch: 1, batch: 22669, sum loss: 4270.086426, avg loss: 2.417943, ppl: 11.222745 +epoch: 1, batch: 22670, sum loss: 3656.158203, avg loss: 2.495671, ppl: 12.129870 +epoch: 1, batch: 22671, sum loss: 4293.108398, avg loss: 2.397045, ppl: 10.990655 +epoch: 1, batch: 22672, sum loss: 3714.538086, avg loss: 2.630693, ppl: 13.883384 +epoch: 1, batch: 22673, sum loss: 4875.417480, avg loss: 2.672926, ppl: 14.482285 +epoch: 1, batch: 22674, sum loss: 4256.083496, avg loss: 2.582575, ppl: 13.231162 +epoch: 1, batch: 22675, sum loss: 4237.921875, avg loss: 2.675456, ppl: 14.518967 +epoch: 1, batch: 22676, sum loss: 5009.353516, avg loss: 3.080783, ppl: 21.775448 +epoch: 1, batch: 22677, sum loss: 4258.926270, avg loss: 2.517096, ppl: 12.392554 +epoch: 1, batch: 22678, sum loss: 4295.395508, avg loss: 2.437796, ppl: 11.447778 +epoch: 1, batch: 22679, sum loss: 4254.370605, avg loss: 2.637552, ppl: 13.978935 +epoch: 1, batch: 22680, sum loss: 4528.017578, avg loss: 2.729366, ppl: 15.323163 +epoch: 1, batch: 22681, sum loss: 3890.272461, avg loss: 2.581468, ppl: 13.216527 +epoch: 1, batch: 22682, sum loss: 4356.654297, avg loss: 2.646813, ppl: 14.109004 +epoch: 1, batch: 22683, sum loss: 4094.952881, avg loss: 2.472798, ppl: 11.855568 +epoch: 1, batch: 22684, sum loss: 4480.681641, avg loss: 2.492036, ppl: 12.085862 +epoch: 1, batch: 22685, sum loss: 4303.514648, avg loss: 2.490460, ppl: 12.066825 +epoch: 1, batch: 22686, sum loss: 4146.395020, avg loss: 2.562667, ppl: 12.970362 +epoch: 1, batch: 22687, sum loss: 4205.082520, avg loss: 2.439143, ppl: 11.463215 +epoch: 1, batch: 22688, sum loss: 5719.618652, avg loss: 3.075064, ppl: 21.651266 +epoch: 1, batch: 22689, sum loss: 4724.524414, avg loss: 2.807204, ppl: 16.563541 +epoch: 1, batch: 22690, sum loss: 4164.315918, avg loss: 2.679740, ppl: 14.581301 +epoch: 1, batch: 22691, sum loss: 4337.980469, avg loss: 2.466163, ppl: 11.777170 +epoch: 1, batch: 22692, sum loss: 5049.728027, avg loss: 2.817929, ppl: 16.742138 +epoch: 1, batch: 22693, sum loss: 5167.255859, avg loss: 2.748540, ppl: 15.619813 +epoch: 1, batch: 22694, sum loss: 5331.609863, avg loss: 2.970256, ppl: 19.496916 +epoch: 1, batch: 22695, sum loss: 3953.936035, avg loss: 2.443718, ppl: 11.515779 +epoch: 1, batch: 22696, sum loss: 4446.683594, avg loss: 2.718022, ppl: 15.150323 +epoch: 1, batch: 22697, sum loss: 4938.340820, avg loss: 2.852883, ppl: 17.337696 +epoch: 1, batch: 22698, sum loss: 4519.089355, avg loss: 2.730568, ppl: 15.341594 +epoch: 1, batch: 22699, sum loss: 3950.552734, avg loss: 2.329335, ppl: 10.271113 +epoch: 1, batch: 22700, sum loss: 3860.445801, avg loss: 2.536430, ppl: 12.634480 +epoch: 1, batch: 22701, sum loss: 4472.980469, avg loss: 2.357923, ppl: 10.568979 +epoch: 1, batch: 22702, sum loss: 5145.489258, avg loss: 2.898867, ppl: 18.153568 +epoch: 1, batch: 22703, sum loss: 4021.014404, avg loss: 2.514706, ppl: 12.362969 +epoch: 1, batch: 22704, sum loss: 4976.576172, avg loss: 2.764765, ppl: 15.875301 +epoch: 1, batch: 22705, sum loss: 4048.794189, avg loss: 2.580493, ppl: 13.203651 +epoch: 1, batch: 22706, sum loss: 4339.298340, avg loss: 2.614035, ppl: 13.654036 +epoch: 1, batch: 22707, sum loss: 5058.545898, avg loss: 2.818131, ppl: 16.745531 +epoch: 1, batch: 22708, sum loss: 4626.787109, avg loss: 2.589137, ppl: 13.318268 +epoch: 1, batch: 22709, sum loss: 4393.176758, avg loss: 2.740597, ppl: 15.496230 +epoch: 1, batch: 22710, sum loss: 4169.905762, avg loss: 2.399255, ppl: 11.014970 +epoch: 1, batch: 22711, sum loss: 4473.163574, avg loss: 2.606739, ppl: 13.554774 +epoch: 1, batch: 22712, sum loss: 4958.723633, avg loss: 2.717109, ppl: 15.136498 +epoch: 1, batch: 22713, sum loss: 4013.870605, avg loss: 2.505537, ppl: 12.250139 +epoch: 1, batch: 22714, sum loss: 4544.375488, avg loss: 2.719555, ppl: 15.173563 +epoch: 1, batch: 22715, sum loss: 4393.512207, avg loss: 2.560322, ppl: 12.939981 +epoch: 1, batch: 22716, sum loss: 5022.944336, avg loss: 2.707787, ppl: 14.996050 +epoch: 1, batch: 22717, sum loss: 4659.919434, avg loss: 2.600402, ppl: 13.469147 +epoch: 1, batch: 22718, sum loss: 4611.618652, avg loss: 2.896745, ppl: 18.115093 +epoch: 1, batch: 22719, sum loss: 4344.169922, avg loss: 2.765226, ppl: 15.882631 +epoch: 1, batch: 22720, sum loss: 3403.899170, avg loss: 2.464808, ppl: 11.761218 +epoch: 1, batch: 22721, sum loss: 4443.187988, avg loss: 2.751200, ppl: 15.661414 +epoch: 1, batch: 22722, sum loss: 4218.716309, avg loss: 2.580255, ppl: 13.200498 +epoch: 1, batch: 22723, sum loss: 3842.538086, avg loss: 2.445919, ppl: 11.541149 +epoch: 1, batch: 22724, sum loss: 2862.193115, avg loss: 2.186549, ppl: 8.904434 +epoch: 1, batch: 22725, sum loss: 5291.416992, avg loss: 2.748788, ppl: 15.623683 +epoch: 1, batch: 22726, sum loss: 5086.032227, avg loss: 2.904644, ppl: 18.258751 +epoch: 1, batch: 22727, sum loss: 4909.289551, avg loss: 2.826304, ppl: 16.882942 +epoch: 1, batch: 22728, sum loss: 4193.583008, avg loss: 2.684752, ppl: 14.654570 +epoch: 1, batch: 22729, sum loss: 4077.844727, avg loss: 2.446218, ppl: 11.544597 +epoch: 1, batch: 22730, sum loss: 4738.437012, avg loss: 2.695357, ppl: 14.810800 +epoch: 1, batch: 22731, sum loss: 4922.275879, avg loss: 2.665011, ppl: 14.368114 +epoch: 1, batch: 22732, sum loss: 3528.430908, avg loss: 2.312209, ppl: 10.096705 +epoch: 1, batch: 22733, sum loss: 4686.162109, avg loss: 2.676278, ppl: 14.530903 +epoch: 1, batch: 22734, sum loss: 4349.079102, avg loss: 2.780741, ppl: 16.130970 +epoch: 1, batch: 22735, sum loss: 3272.170898, avg loss: 2.445569, ppl: 11.537107 +epoch: 1, batch: 22736, sum loss: 4042.959473, avg loss: 2.566958, ppl: 13.026141 +epoch: 1, batch: 22737, sum loss: 4719.461914, avg loss: 2.701466, ppl: 14.901567 +epoch: 1, batch: 22738, sum loss: 4670.868164, avg loss: 2.837709, ppl: 17.076593 +epoch: 1, batch: 22739, sum loss: 3778.592041, avg loss: 2.482649, ppl: 11.972942 +epoch: 1, batch: 22740, sum loss: 3841.395508, avg loss: 2.353796, ppl: 10.525451 +epoch: 1, batch: 22741, sum loss: 3448.547119, avg loss: 2.366882, ppl: 10.664091 +epoch: 1, batch: 22742, sum loss: 3803.588867, avg loss: 2.539111, ppl: 12.668408 +epoch: 1, batch: 22743, sum loss: 4180.213867, avg loss: 2.698653, ppl: 14.859706 +epoch: 1, batch: 22744, sum loss: 4192.853027, avg loss: 2.749412, ppl: 15.633434 +epoch: 1, batch: 22745, sum loss: 5064.896484, avg loss: 2.949852, ppl: 19.103136 +epoch: 1, batch: 22746, sum loss: 4037.754150, avg loss: 2.459047, ppl: 11.693658 +epoch: 1, batch: 22747, sum loss: 5521.325195, avg loss: 2.829998, ppl: 16.945423 +epoch: 1, batch: 22748, sum loss: 4313.374023, avg loss: 2.393659, ppl: 10.953504 +epoch: 1, batch: 22749, sum loss: 5010.961914, avg loss: 2.756305, ppl: 15.741566 +epoch: 1, batch: 22750, sum loss: 3679.114014, avg loss: 2.514774, ppl: 12.363812 +epoch: 1, batch: 22751, sum loss: 5284.898926, avg loss: 2.724175, ppl: 15.243829 +epoch: 1, batch: 22752, sum loss: 3239.905762, avg loss: 2.229805, ppl: 9.298048 +epoch: 1, batch: 22753, sum loss: 4525.192871, avg loss: 2.560947, ppl: 12.948070 +epoch: 1, batch: 22754, sum loss: 4308.048340, avg loss: 2.679134, ppl: 14.572474 +epoch: 1, batch: 22755, sum loss: 4480.355469, avg loss: 2.532705, ppl: 12.587513 +epoch: 1, batch: 22756, sum loss: 3753.475098, avg loss: 2.541283, ppl: 12.695951 +epoch: 1, batch: 22757, sum loss: 4610.341797, avg loss: 2.691385, ppl: 14.752087 +epoch: 1, batch: 22758, sum loss: 4805.222656, avg loss: 2.787252, ppl: 16.236343 +epoch: 1, batch: 22759, sum loss: 4542.943848, avg loss: 2.804286, ppl: 16.515287 +epoch: 1, batch: 22760, sum loss: 4336.969727, avg loss: 2.626875, ppl: 13.830478 +epoch: 1, batch: 22761, sum loss: 5352.253906, avg loss: 3.117213, ppl: 22.583342 +epoch: 1, batch: 22762, sum loss: 4537.026367, avg loss: 2.823290, ppl: 16.832130 +epoch: 1, batch: 22763, sum loss: 4709.663086, avg loss: 2.623768, ppl: 13.787576 +epoch: 1, batch: 22764, sum loss: 4455.020996, avg loss: 2.354662, ppl: 10.534570 +epoch: 1, batch: 22765, sum loss: 3886.399658, avg loss: 2.512217, ppl: 12.332241 +epoch: 1, batch: 22766, sum loss: 3932.143066, avg loss: 2.427249, ppl: 11.327676 +epoch: 1, batch: 22767, sum loss: 4597.289551, avg loss: 2.508069, ppl: 12.281187 +epoch: 1, batch: 22768, sum loss: 4524.290527, avg loss: 2.639609, ppl: 14.007730 +epoch: 1, batch: 22769, sum loss: 5140.195801, avg loss: 2.652320, ppl: 14.186913 +epoch: 1, batch: 22770, sum loss: 4288.531250, avg loss: 2.531600, ppl: 12.573614 +epoch: 1, batch: 22771, sum loss: 3310.669189, avg loss: 2.363076, ppl: 10.623576 +epoch: 1, batch: 22772, sum loss: 3638.309326, avg loss: 2.584027, ppl: 13.250384 +epoch: 1, batch: 22773, sum loss: 4390.621582, avg loss: 2.569117, ppl: 13.054296 +epoch: 1, batch: 22774, sum loss: 3984.935547, avg loss: 2.565960, ppl: 13.013144 +epoch: 1, batch: 22775, sum loss: 3844.143555, avg loss: 2.649306, ppl: 14.144223 +epoch: 1, batch: 22776, sum loss: 4052.030029, avg loss: 2.556486, ppl: 12.890439 +epoch: 1, batch: 22777, sum loss: 4717.591797, avg loss: 2.625260, ppl: 13.808162 +epoch: 1, batch: 22778, sum loss: 4216.297852, avg loss: 2.552238, ppl: 12.835804 +epoch: 1, batch: 22779, sum loss: 3925.902832, avg loss: 2.679797, ppl: 14.582135 +epoch: 1, batch: 22780, sum loss: 3929.030762, avg loss: 2.544709, ppl: 12.739523 +epoch: 1, batch: 22781, sum loss: 5357.498047, avg loss: 2.840667, ppl: 17.127190 +epoch: 1, batch: 22782, sum loss: 4454.257812, avg loss: 2.709403, ppl: 15.020299 +epoch: 1, batch: 22783, sum loss: 5175.077637, avg loss: 2.587539, ppl: 13.297007 +epoch: 1, batch: 22784, sum loss: 4946.666992, avg loss: 2.716456, ppl: 15.126625 +epoch: 1, batch: 22785, sum loss: 4072.806396, avg loss: 2.454977, ppl: 11.646163 +epoch: 1, batch: 22786, sum loss: 4411.296875, avg loss: 2.578198, ppl: 13.173381 +epoch: 1, batch: 22787, sum loss: 3452.815430, avg loss: 2.469825, ppl: 11.820378 +epoch: 1, batch: 22788, sum loss: 3877.372803, avg loss: 2.440134, ppl: 11.474576 +epoch: 1, batch: 22789, sum loss: 5325.120117, avg loss: 2.760560, ppl: 15.808690 +epoch: 1, batch: 22790, sum loss: 4157.037598, avg loss: 2.566073, ppl: 13.014611 +epoch: 1, batch: 22791, sum loss: 4424.048828, avg loss: 2.741046, ppl: 15.503200 +epoch: 1, batch: 22792, sum loss: 4589.000977, avg loss: 2.830969, ppl: 16.961891 +epoch: 1, batch: 22793, sum loss: 4517.724609, avg loss: 2.773312, ppl: 16.011570 +epoch: 1, batch: 22794, sum loss: 4566.882324, avg loss: 2.767807, ppl: 15.923683 +epoch: 1, batch: 22795, sum loss: 4656.362305, avg loss: 2.816916, ppl: 16.725191 +epoch: 1, batch: 22796, sum loss: 3681.623047, avg loss: 2.339023, ppl: 10.371102 +epoch: 1, batch: 22797, sum loss: 4758.387695, avg loss: 2.627492, ppl: 13.839018 +epoch: 1, batch: 22798, sum loss: 4474.174805, avg loss: 2.530642, ppl: 12.561568 +epoch: 1, batch: 22799, sum loss: 5578.293945, avg loss: 2.926702, ppl: 18.665964 +epoch: 1, batch: 22800, sum loss: 4458.458984, avg loss: 2.496338, ppl: 12.137959 +epoch: 1, batch: 22801, sum loss: 4382.025391, avg loss: 2.708297, ppl: 15.003710 +epoch: 1, batch: 22802, sum loss: 4868.985352, avg loss: 2.747735, ppl: 15.607234 +epoch: 1, batch: 22803, sum loss: 5047.826660, avg loss: 2.653957, ppl: 14.210162 +epoch: 1, batch: 22804, sum loss: 4105.222168, avg loss: 2.578657, ppl: 13.179428 +epoch: 1, batch: 22805, sum loss: 3929.999512, avg loss: 2.594059, ppl: 13.383990 +epoch: 1, batch: 22806, sum loss: 3436.210449, avg loss: 2.278654, ppl: 9.763531 +epoch: 1, batch: 22807, sum loss: 4283.101562, avg loss: 2.698867, ppl: 14.862880 +epoch: 1, batch: 22808, sum loss: 4888.864258, avg loss: 2.701030, ppl: 14.895069 +epoch: 1, batch: 22809, sum loss: 5493.035156, avg loss: 2.756164, ppl: 15.739352 +epoch: 1, batch: 22810, sum loss: 4390.223633, avg loss: 2.798103, ppl: 16.413483 +epoch: 1, batch: 22811, sum loss: 4512.869629, avg loss: 2.667181, ppl: 14.399314 +epoch: 1, batch: 22812, sum loss: 5068.004395, avg loss: 2.660370, ppl: 14.301575 +epoch: 1, batch: 22813, sum loss: 4451.389160, avg loss: 2.548019, ppl: 12.781757 +epoch: 1, batch: 22814, sum loss: 3748.657959, avg loss: 2.440533, ppl: 11.479154 +epoch: 1, batch: 22815, sum loss: 4023.815186, avg loss: 2.733570, ppl: 15.387725 +epoch: 1, batch: 22816, sum loss: 4244.010742, avg loss: 2.453185, ppl: 11.625319 +epoch: 1, batch: 22817, sum loss: 4213.269531, avg loss: 2.509392, ppl: 12.297454 +epoch: 1, batch: 22818, sum loss: 5055.681641, avg loss: 2.649728, ppl: 14.150193 +epoch: 1, batch: 22819, sum loss: 4120.535156, avg loss: 2.419574, ppl: 11.241073 +epoch: 1, batch: 22820, sum loss: 5254.381348, avg loss: 2.927232, ppl: 18.675865 +epoch: 1, batch: 22821, sum loss: 4640.353516, avg loss: 2.531562, ppl: 12.573134 +epoch: 1, batch: 22822, sum loss: 4620.496582, avg loss: 2.686335, ppl: 14.677788 +epoch: 1, batch: 22823, sum loss: 3954.705566, avg loss: 2.661309, ppl: 14.315019 +epoch: 1, batch: 22824, sum loss: 4657.443848, avg loss: 2.831273, ppl: 16.967043 +epoch: 1, batch: 22825, sum loss: 4644.573730, avg loss: 2.621091, ppl: 13.750723 +epoch: 1, batch: 22826, sum loss: 4419.500488, avg loss: 2.416348, ppl: 11.204866 +epoch: 1, batch: 22827, sum loss: 4251.311523, avg loss: 2.584384, ppl: 13.255117 +epoch: 1, batch: 22828, sum loss: 5402.325684, avg loss: 2.818115, ppl: 16.745253 +epoch: 1, batch: 22829, sum loss: 5184.398438, avg loss: 2.708672, ppl: 15.009331 +epoch: 1, batch: 22830, sum loss: 3676.213379, avg loss: 2.402754, ppl: 11.053575 +epoch: 1, batch: 22831, sum loss: 3843.443359, avg loss: 2.448053, ppl: 11.565807 +epoch: 1, batch: 22832, sum loss: 4557.708008, avg loss: 2.596985, ppl: 13.423201 +epoch: 1, batch: 22833, sum loss: 4782.847168, avg loss: 2.755096, ppl: 15.722553 +epoch: 1, batch: 22834, sum loss: 4057.109863, avg loss: 2.414947, ppl: 11.189173 +epoch: 1, batch: 22835, sum loss: 4559.176758, avg loss: 2.718650, ppl: 15.159848 +epoch: 1, batch: 22836, sum loss: 4562.392090, avg loss: 2.625082, ppl: 13.805703 +epoch: 1, batch: 22837, sum loss: 3799.468506, avg loss: 2.548269, ppl: 12.784951 +epoch: 1, batch: 22838, sum loss: 4524.003906, avg loss: 2.712233, ppl: 15.062867 +epoch: 1, batch: 22839, sum loss: 4290.052734, avg loss: 2.811306, ppl: 16.631624 +epoch: 1, batch: 22840, sum loss: 5632.594238, avg loss: 2.904896, ppl: 18.263353 +epoch: 1, batch: 22841, sum loss: 3955.817871, avg loss: 2.364506, ppl: 10.638780 +epoch: 1, batch: 22842, sum loss: 4515.281738, avg loss: 2.697301, ppl: 14.839627 +epoch: 1, batch: 22843, sum loss: 4705.021973, avg loss: 2.775824, ppl: 16.051849 +epoch: 1, batch: 22844, sum loss: 3797.440430, avg loss: 2.521541, ppl: 12.447762 +epoch: 1, batch: 22845, sum loss: 4464.166504, avg loss: 2.700645, ppl: 14.889336 +epoch: 1, batch: 22846, sum loss: 4928.177246, avg loss: 2.666763, ppl: 14.393296 +epoch: 1, batch: 22847, sum loss: 3655.293457, avg loss: 2.664208, ppl: 14.356577 +epoch: 1, batch: 22848, sum loss: 4180.591309, avg loss: 2.732413, ppl: 15.369927 +epoch: 1, batch: 22849, sum loss: 4634.483398, avg loss: 2.719767, ppl: 15.176790 +epoch: 1, batch: 22850, sum loss: 4462.735840, avg loss: 2.794450, ppl: 16.353624 +epoch: 1, batch: 22851, sum loss: 5282.255371, avg loss: 2.653066, ppl: 14.197507 +epoch: 1, batch: 22852, sum loss: 4563.945312, avg loss: 2.719872, ppl: 15.178379 +epoch: 1, batch: 22853, sum loss: 4045.130615, avg loss: 2.709398, ppl: 15.020224 +epoch: 1, batch: 22854, sum loss: 5065.724121, avg loss: 2.811168, ppl: 16.629326 +epoch: 1, batch: 22855, sum loss: 4333.321289, avg loss: 2.491847, ppl: 12.083571 +epoch: 1, batch: 22856, sum loss: 4869.497070, avg loss: 2.705276, ppl: 14.958448 +epoch: 1, batch: 22857, sum loss: 4285.802734, avg loss: 2.681979, ppl: 14.613988 +epoch: 1, batch: 22858, sum loss: 4899.754395, avg loss: 2.588354, ppl: 13.307850 +epoch: 1, batch: 22859, sum loss: 4356.089844, avg loss: 2.470839, ppl: 11.832374 +epoch: 1, batch: 22860, sum loss: 4501.711914, avg loss: 2.815330, ppl: 16.698677 +epoch: 1, batch: 22861, sum loss: 4718.414062, avg loss: 2.492559, ppl: 12.092177 +epoch: 1, batch: 22862, sum loss: 4115.319824, avg loss: 2.572075, ppl: 13.092962 +epoch: 1, batch: 22863, sum loss: 3131.494629, avg loss: 2.064268, ppl: 7.879529 +epoch: 1, batch: 22864, sum loss: 4122.654785, avg loss: 2.829550, ppl: 16.937841 +epoch: 1, batch: 22865, sum loss: 4086.821777, avg loss: 2.458978, ppl: 11.692858 +epoch: 1, batch: 22866, sum loss: 4602.981934, avg loss: 2.791378, ppl: 16.303467 +epoch: 1, batch: 22867, sum loss: 3541.223145, avg loss: 2.252687, ppl: 9.513259 +epoch: 1, batch: 22868, sum loss: 4099.674805, avg loss: 2.626313, ppl: 13.822715 +epoch: 1, batch: 22869, sum loss: 4502.241211, avg loss: 2.525093, ppl: 12.492061 +epoch: 1, batch: 22870, sum loss: 4487.942383, avg loss: 2.705209, ppl: 14.957446 +epoch: 1, batch: 22871, sum loss: 4281.921875, avg loss: 2.562491, ppl: 12.968077 +epoch: 1, batch: 22872, sum loss: 4768.138672, avg loss: 2.686275, ppl: 14.676906 +epoch: 1, batch: 22873, sum loss: 3789.725586, avg loss: 2.489964, ppl: 12.060848 +epoch: 1, batch: 22874, sum loss: 4900.356934, avg loss: 2.872425, ppl: 17.679836 +epoch: 1, batch: 22875, sum loss: 5008.578125, avg loss: 2.634707, ppl: 13.939227 +epoch: 1, batch: 22876, sum loss: 5259.351074, avg loss: 2.818516, ppl: 16.751972 +epoch: 1, batch: 22877, sum loss: 5270.628418, avg loss: 2.788692, ppl: 16.259743 +epoch: 1, batch: 22878, sum loss: 4284.879395, avg loss: 2.755550, ppl: 15.729685 +epoch: 1, batch: 22879, sum loss: 4538.139648, avg loss: 2.624719, ppl: 13.800701 +epoch: 1, batch: 22880, sum loss: 3960.177002, avg loss: 2.462797, ppl: 11.737592 +epoch: 1, batch: 22881, sum loss: 5192.783203, avg loss: 2.643983, ppl: 14.069135 +epoch: 1, batch: 22882, sum loss: 4475.370605, avg loss: 2.765989, ppl: 15.894757 +epoch: 1, batch: 22883, sum loss: 3355.849609, avg loss: 2.458498, ppl: 11.687244 +epoch: 1, batch: 22884, sum loss: 4827.711914, avg loss: 2.746139, ppl: 15.582345 +epoch: 1, batch: 22885, sum loss: 4460.910156, avg loss: 2.524567, ppl: 12.485490 +epoch: 1, batch: 22886, sum loss: 4819.595703, avg loss: 2.642322, ppl: 14.045784 +epoch: 1, batch: 22887, sum loss: 4324.186523, avg loss: 2.854249, ppl: 17.361385 +epoch: 1, batch: 22888, sum loss: 4590.483398, avg loss: 2.713052, ppl: 15.075209 +epoch: 1, batch: 22889, sum loss: 4621.792969, avg loss: 2.772521, ppl: 15.998920 +epoch: 1, batch: 22890, sum loss: 4115.058594, avg loss: 2.721600, ppl: 15.204627 +epoch: 1, batch: 22891, sum loss: 5034.126465, avg loss: 2.721149, ppl: 15.197782 +epoch: 1, batch: 22892, sum loss: 4171.336914, avg loss: 2.528083, ppl: 12.529465 +epoch: 1, batch: 22893, sum loss: 4090.932861, avg loss: 2.609013, ppl: 13.585640 +epoch: 1, batch: 22894, sum loss: 4239.236328, avg loss: 2.514375, ppl: 12.358885 +epoch: 1, batch: 22895, sum loss: 4479.992676, avg loss: 2.607679, ppl: 13.567526 +epoch: 1, batch: 22896, sum loss: 3543.767090, avg loss: 2.362511, ppl: 10.617583 +epoch: 1, batch: 22897, sum loss: 3927.540039, avg loss: 2.560326, ppl: 12.940033 +epoch: 1, batch: 22898, sum loss: 4792.252930, avg loss: 2.650582, ppl: 14.162283 +epoch: 1, batch: 22899, sum loss: 4659.499512, avg loss: 2.647443, ppl: 14.117890 +epoch: 1, batch: 22900, sum loss: 4177.098633, avg loss: 2.376051, ppl: 10.762323 +epoch: 1, batch: 22901, sum loss: 4228.097656, avg loss: 2.706849, ppl: 14.981987 +epoch: 1, batch: 22902, sum loss: 4356.539551, avg loss: 2.447494, ppl: 11.559346 +epoch: 1, batch: 22903, sum loss: 4514.956543, avg loss: 2.697107, ppl: 14.836744 +epoch: 1, batch: 22904, sum loss: 4502.577148, avg loss: 2.705876, ppl: 14.967421 +epoch: 1, batch: 22905, sum loss: 4535.163574, avg loss: 2.715667, ppl: 15.114685 +epoch: 1, batch: 22906, sum loss: 4374.877441, avg loss: 2.555419, ppl: 12.876694 +epoch: 1, batch: 22907, sum loss: 3553.791016, avg loss: 2.490393, ppl: 12.066019 +epoch: 1, batch: 22908, sum loss: 4539.311523, avg loss: 2.520439, ppl: 12.434059 +epoch: 1, batch: 22909, sum loss: 4452.529785, avg loss: 2.476379, ppl: 11.898106 +epoch: 1, batch: 22910, sum loss: 4911.931641, avg loss: 2.889372, ppl: 17.982006 +epoch: 1, batch: 22911, sum loss: 4105.350098, avg loss: 2.442207, ppl: 11.498391 +epoch: 1, batch: 22912, sum loss: 4187.071777, avg loss: 2.605521, ppl: 13.538273 +epoch: 1, batch: 22913, sum loss: 4181.801758, avg loss: 2.653427, ppl: 14.202633 +epoch: 1, batch: 22914, sum loss: 3814.473633, avg loss: 2.375139, ppl: 10.752510 +epoch: 1, batch: 22915, sum loss: 3457.864258, avg loss: 2.311407, ppl: 10.088606 +epoch: 1, batch: 22916, sum loss: 4578.872070, avg loss: 2.836972, ppl: 17.064009 +epoch: 1, batch: 22917, sum loss: 3907.327637, avg loss: 2.485577, ppl: 12.008051 +epoch: 1, batch: 22918, sum loss: 4203.186035, avg loss: 2.643513, ppl: 14.062521 +epoch: 1, batch: 22919, sum loss: 5025.177246, avg loss: 2.891356, ppl: 18.017729 +epoch: 1, batch: 22920, sum loss: 3544.298340, avg loss: 2.348773, ppl: 10.472712 +epoch: 1, batch: 22921, sum loss: 3828.187012, avg loss: 2.660311, ppl: 14.300729 +epoch: 1, batch: 22922, sum loss: 4838.712402, avg loss: 2.710763, ppl: 15.040754 +epoch: 1, batch: 22923, sum loss: 4156.204590, avg loss: 2.523500, ppl: 12.472173 +epoch: 1, batch: 22924, sum loss: 4991.814453, avg loss: 2.837871, ppl: 17.079363 +epoch: 1, batch: 22925, sum loss: 3826.647705, avg loss: 2.640889, ppl: 14.025662 +epoch: 1, batch: 22926, sum loss: 4478.752930, avg loss: 2.622221, ppl: 13.766258 +epoch: 1, batch: 22927, sum loss: 4580.111328, avg loss: 2.554440, ppl: 12.864097 +epoch: 1, batch: 22928, sum loss: 4355.319336, avg loss: 2.467603, ppl: 11.794142 +epoch: 1, batch: 22929, sum loss: 4295.442383, avg loss: 2.694757, ppl: 14.801918 +epoch: 1, batch: 22930, sum loss: 3778.471191, avg loss: 2.433014, ppl: 11.393174 +epoch: 1, batch: 22931, sum loss: 4890.650391, avg loss: 2.668113, ppl: 14.412744 +epoch: 1, batch: 22932, sum loss: 3735.986084, avg loss: 2.260125, ppl: 9.584284 +epoch: 1, batch: 22933, sum loss: 5061.217285, avg loss: 2.551017, ppl: 12.820133 +epoch: 1, batch: 22934, sum loss: 4238.093262, avg loss: 2.782727, ppl: 16.163038 +epoch: 1, batch: 22935, sum loss: 4400.288086, avg loss: 2.683103, ppl: 14.630415 +epoch: 1, batch: 22936, sum loss: 3748.951660, avg loss: 2.304211, ppl: 10.016273 +epoch: 1, batch: 22937, sum loss: 4360.916016, avg loss: 2.412011, ppl: 11.156376 +epoch: 1, batch: 22938, sum loss: 5473.052246, avg loss: 2.834310, ppl: 17.018650 +epoch: 1, batch: 22939, sum loss: 4494.303223, avg loss: 2.753862, ppl: 15.703167 +epoch: 1, batch: 22940, sum loss: 4101.196289, avg loss: 2.476568, ppl: 11.900350 +epoch: 1, batch: 22941, sum loss: 4943.397461, avg loss: 2.628069, ppl: 13.847001 +epoch: 1, batch: 22942, sum loss: 3211.472900, avg loss: 2.409207, ppl: 11.125136 +epoch: 1, batch: 22943, sum loss: 4206.447754, avg loss: 2.703372, ppl: 14.929998 +epoch: 1, batch: 22944, sum loss: 4820.595703, avg loss: 2.690065, ppl: 14.732629 +epoch: 1, batch: 22945, sum loss: 3711.806396, avg loss: 2.504593, ppl: 12.238576 +epoch: 1, batch: 22946, sum loss: 4094.250488, avg loss: 2.626203, ppl: 13.821192 +epoch: 1, batch: 22947, sum loss: 4737.604980, avg loss: 2.490855, ppl: 12.071587 +epoch: 1, batch: 22948, sum loss: 4258.254395, avg loss: 2.772301, ppl: 15.995400 +epoch: 1, batch: 22949, sum loss: 3642.999268, avg loss: 2.481607, ppl: 11.960472 +epoch: 1, batch: 22950, sum loss: 3752.304199, avg loss: 2.352542, ppl: 10.512258 +epoch: 1, batch: 22951, sum loss: 4356.364258, avg loss: 2.643425, ppl: 14.061281 +epoch: 1, batch: 22952, sum loss: 4441.382324, avg loss: 2.624930, ppl: 13.803613 +epoch: 1, batch: 22953, sum loss: 4188.297363, avg loss: 2.574245, ppl: 13.121412 +epoch: 1, batch: 22954, sum loss: 5261.820312, avg loss: 2.841156, ppl: 17.135555 +epoch: 1, batch: 22955, sum loss: 6339.300781, avg loss: 3.149181, ppl: 23.316957 +epoch: 1, batch: 22956, sum loss: 4502.138184, avg loss: 2.634370, ppl: 13.934529 +epoch: 1, batch: 22957, sum loss: 4943.019531, avg loss: 2.653258, ppl: 14.200226 +epoch: 1, batch: 22958, sum loss: 4639.993652, avg loss: 2.708694, ppl: 15.009667 +epoch: 1, batch: 22959, sum loss: 3743.687012, avg loss: 2.440474, ppl: 11.478477 +epoch: 1, batch: 22960, sum loss: 4534.928223, avg loss: 2.859349, ppl: 17.450172 +epoch: 1, batch: 22961, sum loss: 4214.389160, avg loss: 2.809593, ppl: 16.603155 +epoch: 1, batch: 22962, sum loss: 3710.152100, avg loss: 2.379828, ppl: 10.803047 +epoch: 1, batch: 22963, sum loss: 4149.646973, avg loss: 2.616423, ppl: 13.686681 +epoch: 1, batch: 22964, sum loss: 5102.296875, avg loss: 2.760983, ppl: 15.815386 +epoch: 1, batch: 22965, sum loss: 4008.663330, avg loss: 2.433918, ppl: 11.403476 +epoch: 1, batch: 22966, sum loss: 4994.003418, avg loss: 2.808776, ppl: 16.589598 +epoch: 1, batch: 22967, sum loss: 4315.000000, avg loss: 2.632703, ppl: 13.911319 +epoch: 1, batch: 22968, sum loss: 5030.181641, avg loss: 2.794545, ppl: 16.355192 +epoch: 1, batch: 22969, sum loss: 4689.935547, avg loss: 2.768557, ppl: 15.935619 +epoch: 1, batch: 22970, sum loss: 4296.300293, avg loss: 2.490609, ppl: 12.068623 +epoch: 1, batch: 22971, sum loss: 4140.440918, avg loss: 2.584545, ppl: 13.257255 +epoch: 1, batch: 22972, sum loss: 3998.703857, avg loss: 2.550194, ppl: 12.809586 +epoch: 1, batch: 22973, sum loss: 3573.163818, avg loss: 2.498716, ppl: 12.166863 +epoch: 1, batch: 22974, sum loss: 5169.391602, avg loss: 2.815573, ppl: 16.702740 +epoch: 1, batch: 22975, sum loss: 4817.271973, avg loss: 2.805633, ppl: 16.537542 +epoch: 1, batch: 22976, sum loss: 5096.675781, avg loss: 2.750499, ppl: 15.650447 +epoch: 1, batch: 22977, sum loss: 3676.718750, avg loss: 2.485949, ppl: 12.012515 +epoch: 1, batch: 22978, sum loss: 5243.548828, avg loss: 2.805537, ppl: 16.535954 +epoch: 1, batch: 22979, sum loss: 4665.600098, avg loss: 2.853578, ppl: 17.349751 +epoch: 1, batch: 22980, sum loss: 4536.573242, avg loss: 2.974802, ppl: 19.585745 +epoch: 1, batch: 22981, sum loss: 4627.752930, avg loss: 2.633895, ppl: 13.927909 +epoch: 1, batch: 22982, sum loss: 3945.645996, avg loss: 2.511551, ppl: 12.324026 +epoch: 1, batch: 22983, sum loss: 4018.360352, avg loss: 2.494327, ppl: 12.113576 +epoch: 1, batch: 22984, sum loss: 3868.503906, avg loss: 2.476635, ppl: 11.901150 +epoch: 1, batch: 22985, sum loss: 4475.704102, avg loss: 2.839914, ppl: 17.114286 +epoch: 1, batch: 22986, sum loss: 4466.769043, avg loss: 2.781301, ppl: 16.140001 +epoch: 1, batch: 22987, sum loss: 4435.983887, avg loss: 2.496333, ppl: 12.137904 +epoch: 1, batch: 22988, sum loss: 4250.526855, avg loss: 2.546751, ppl: 12.765558 +epoch: 1, batch: 22989, sum loss: 5253.770996, avg loss: 2.973272, ppl: 19.555794 +epoch: 1, batch: 22990, sum loss: 4386.567871, avg loss: 2.513793, ppl: 12.351686 +epoch: 1, batch: 22991, sum loss: 4231.780762, avg loss: 2.719653, ppl: 15.175061 +epoch: 1, batch: 22992, sum loss: 4727.383301, avg loss: 2.658821, ppl: 14.279442 +epoch: 1, batch: 22993, sum loss: 3184.318604, avg loss: 2.259985, ppl: 9.582943 +epoch: 1, batch: 22994, sum loss: 4337.203125, avg loss: 2.617503, ppl: 13.701474 +epoch: 1, batch: 22995, sum loss: 4945.767090, avg loss: 2.758375, ppl: 15.774196 +epoch: 1, batch: 22996, sum loss: 4851.492188, avg loss: 2.548053, ppl: 12.782190 +epoch: 1, batch: 22997, sum loss: 4523.350098, avg loss: 2.665498, ppl: 14.375107 +epoch: 1, batch: 22998, sum loss: 4190.376953, avg loss: 2.586653, ppl: 13.285225 +epoch: 1, batch: 22999, sum loss: 4155.121094, avg loss: 2.415768, ppl: 11.198369 +epoch: 1, batch: 23000, sum loss: 4291.948730, avg loss: 2.550177, ppl: 12.809375 +epoch: 1, batch: 23001, sum loss: 3897.876953, avg loss: 2.518008, ppl: 12.403870 +epoch: 1, batch: 23002, sum loss: 5024.525879, avg loss: 3.084424, ppl: 21.854864 +epoch: 1, batch: 23003, sum loss: 4861.834961, avg loss: 2.690556, ppl: 14.739870 +epoch: 1, batch: 23004, sum loss: 4289.229492, avg loss: 2.641151, ppl: 14.029345 +epoch: 1, batch: 23005, sum loss: 3677.976807, avg loss: 2.442216, ppl: 11.498489 +epoch: 1, batch: 23006, sum loss: 4313.859375, avg loss: 2.540553, ppl: 12.686689 +epoch: 1, batch: 23007, sum loss: 4685.542969, avg loss: 2.722570, ppl: 15.219385 +epoch: 1, batch: 23008, sum loss: 4819.509277, avg loss: 2.577278, ppl: 13.161260 +epoch: 1, batch: 23009, sum loss: 4935.941406, avg loss: 2.603345, ppl: 13.508845 +epoch: 1, batch: 23010, sum loss: 4744.122070, avg loss: 2.653312, ppl: 14.200997 +epoch: 1, batch: 23011, sum loss: 3385.323242, avg loss: 2.255379, ppl: 9.538905 +epoch: 1, batch: 23012, sum loss: 4979.232910, avg loss: 2.905037, ppl: 18.265913 +epoch: 1, batch: 23013, sum loss: 4807.947266, avg loss: 2.702612, ppl: 14.918651 +epoch: 1, batch: 23014, sum loss: 4385.359863, avg loss: 2.759824, ppl: 15.797059 +epoch: 1, batch: 23015, sum loss: 5799.231934, avg loss: 2.972441, ppl: 19.539557 +epoch: 1, batch: 23016, sum loss: 3940.432373, avg loss: 2.505043, ppl: 12.244082 +epoch: 1, batch: 23017, sum loss: 4544.513672, avg loss: 2.685883, ppl: 14.671147 +epoch: 1, batch: 23018, sum loss: 5583.268066, avg loss: 2.681685, ppl: 14.609690 +epoch: 1, batch: 23019, sum loss: 4522.617188, avg loss: 2.731049, ppl: 15.348981 +epoch: 1, batch: 23020, sum loss: 4561.375977, avg loss: 2.971581, ppl: 19.522751 +epoch: 1, batch: 23021, sum loss: 4786.416016, avg loss: 2.618390, ppl: 13.713621 +epoch: 1, batch: 23022, sum loss: 4528.729004, avg loss: 2.731441, ppl: 15.354998 +epoch: 1, batch: 23023, sum loss: 4542.467285, avg loss: 2.718413, ppl: 15.156244 +epoch: 1, batch: 23024, sum loss: 5290.563477, avg loss: 2.862859, ppl: 17.511520 +epoch: 1, batch: 23025, sum loss: 5119.858887, avg loss: 2.784045, ppl: 16.184359 +epoch: 1, batch: 23026, sum loss: 4337.804688, avg loss: 2.692616, ppl: 14.770267 +epoch: 1, batch: 23027, sum loss: 3613.741455, avg loss: 2.465035, ppl: 11.763896 +epoch: 1, batch: 23028, sum loss: 3692.474365, avg loss: 2.354894, ppl: 10.537016 +epoch: 1, batch: 23029, sum loss: 4808.253906, avg loss: 2.805282, ppl: 16.531744 +epoch: 1, batch: 23030, sum loss: 4889.052734, avg loss: 2.936368, ppl: 18.847269 +epoch: 1, batch: 23031, sum loss: 4919.602539, avg loss: 2.838778, ppl: 17.094864 +epoch: 1, batch: 23032, sum loss: 2874.909668, avg loss: 2.327862, ppl: 10.255991 +epoch: 1, batch: 23033, sum loss: 3579.384766, avg loss: 2.538571, ppl: 12.661563 +epoch: 1, batch: 23034, sum loss: 4536.901367, avg loss: 2.553124, ppl: 12.847175 +epoch: 1, batch: 23035, sum loss: 4140.749512, avg loss: 2.583125, ppl: 13.238445 +epoch: 1, batch: 23036, sum loss: 4667.154297, avg loss: 2.823445, ppl: 16.834743 +epoch: 1, batch: 23037, sum loss: 3953.790527, avg loss: 2.666076, ppl: 14.383417 +epoch: 1, batch: 23038, sum loss: 5245.059570, avg loss: 2.877158, ppl: 17.763723 +epoch: 1, batch: 23039, sum loss: 4009.789062, avg loss: 2.659011, ppl: 14.282163 +epoch: 1, batch: 23040, sum loss: 4531.852051, avg loss: 2.621083, ppl: 13.750605 +epoch: 1, batch: 23041, sum loss: 5664.655273, avg loss: 3.024375, ppl: 20.581146 +epoch: 1, batch: 23042, sum loss: 4320.054199, avg loss: 2.557759, ppl: 12.906855 +epoch: 1, batch: 23043, sum loss: 4641.191895, avg loss: 2.617706, ppl: 13.704245 +epoch: 1, batch: 23044, sum loss: 4474.024414, avg loss: 2.543504, ppl: 12.724185 +epoch: 1, batch: 23045, sum loss: 4049.900635, avg loss: 2.730884, ppl: 15.346445 +epoch: 1, batch: 23046, sum loss: 4253.854980, avg loss: 2.655340, ppl: 14.229826 +epoch: 1, batch: 23047, sum loss: 4406.839844, avg loss: 2.595312, ppl: 13.400769 +epoch: 1, batch: 23048, sum loss: 4012.963379, avg loss: 2.662882, ppl: 14.337552 +epoch: 1, batch: 23049, sum loss: 5103.750488, avg loss: 3.150463, ppl: 23.346880 +epoch: 1, batch: 23050, sum loss: 4913.343262, avg loss: 2.794848, ppl: 16.360146 +epoch: 1, batch: 23051, sum loss: 3566.868408, avg loss: 2.468421, ppl: 11.803794 +epoch: 1, batch: 23052, sum loss: 4137.465332, avg loss: 2.461312, ppl: 11.720176 +epoch: 1, batch: 23053, sum loss: 4415.742188, avg loss: 2.543630, ppl: 12.725786 +epoch: 1, batch: 23054, sum loss: 4735.625000, avg loss: 2.756476, ppl: 15.744257 +epoch: 1, batch: 23055, sum loss: 4265.112305, avg loss: 2.599093, ppl: 13.451538 +epoch: 1, batch: 23056, sum loss: 3840.062012, avg loss: 2.466321, ppl: 11.779035 +epoch: 1, batch: 23057, sum loss: 5132.464355, avg loss: 2.525819, ppl: 12.501130 +epoch: 1, batch: 23058, sum loss: 4528.956055, avg loss: 2.882849, ppl: 17.865103 +epoch: 1, batch: 23059, sum loss: 4485.161621, avg loss: 2.829755, ppl: 16.941307 +epoch: 1, batch: 23060, sum loss: 5137.706055, avg loss: 2.688491, ppl: 14.709460 +epoch: 1, batch: 23061, sum loss: 5201.911621, avg loss: 3.059948, ppl: 21.326452 +epoch: 1, batch: 23062, sum loss: 3881.768555, avg loss: 2.617511, ppl: 13.701578 +epoch: 1, batch: 23063, sum loss: 5131.734375, avg loss: 2.814994, ppl: 16.693081 +epoch: 1, batch: 23064, sum loss: 4916.659180, avg loss: 2.842000, ppl: 17.150023 +epoch: 1, batch: 23065, sum loss: 4051.927246, avg loss: 2.348943, ppl: 10.474497 +epoch: 1, batch: 23066, sum loss: 4681.551758, avg loss: 2.576528, ppl: 13.151402 +epoch: 1, batch: 23067, sum loss: 3731.941895, avg loss: 2.456841, ppl: 11.667897 +epoch: 1, batch: 23068, sum loss: 5088.025391, avg loss: 2.803320, ppl: 16.499332 +epoch: 1, batch: 23069, sum loss: 4648.746094, avg loss: 2.423747, ppl: 11.288072 +epoch: 1, batch: 23070, sum loss: 4053.554688, avg loss: 2.665059, ppl: 14.368798 +epoch: 1, batch: 23071, sum loss: 3652.084229, avg loss: 2.359228, ppl: 10.582775 +epoch: 1, batch: 23072, sum loss: 4989.166504, avg loss: 2.888921, ppl: 17.973906 +epoch: 1, batch: 23073, sum loss: 3581.070801, avg loss: 2.246594, ppl: 9.455475 +epoch: 1, batch: 23074, sum loss: 4142.104492, avg loss: 2.618271, ppl: 13.711990 +epoch: 1, batch: 23075, sum loss: 4028.264648, avg loss: 2.444335, ppl: 11.522890 +epoch: 1, batch: 23076, sum loss: 4188.391602, avg loss: 2.490126, ppl: 12.062792 +epoch: 1, batch: 23077, sum loss: 4041.602539, avg loss: 2.736359, ppl: 15.430705 +epoch: 1, batch: 23078, sum loss: 4282.086426, avg loss: 2.699928, ppl: 14.878665 +epoch: 1, batch: 23079, sum loss: 4270.706055, avg loss: 2.470044, ppl: 11.822968 +epoch: 1, batch: 23080, sum loss: 5150.993164, avg loss: 2.662012, ppl: 14.325080 +epoch: 1, batch: 23081, sum loss: 4975.291992, avg loss: 2.802981, ppl: 16.493744 +epoch: 1, batch: 23082, sum loss: 5036.719727, avg loss: 2.770473, ppl: 15.966185 +epoch: 1, batch: 23083, sum loss: 4528.240234, avg loss: 2.886068, ppl: 17.922691 +epoch: 1, batch: 23084, sum loss: 4544.159180, avg loss: 2.648112, ppl: 14.127336 +epoch: 1, batch: 23085, sum loss: 4694.574219, avg loss: 2.950707, ppl: 19.119461 +epoch: 1, batch: 23086, sum loss: 4915.731445, avg loss: 2.800987, ppl: 16.460878 +epoch: 1, batch: 23087, sum loss: 3668.764893, avg loss: 2.362373, ppl: 10.616113 +epoch: 1, batch: 23088, sum loss: 4299.327148, avg loss: 2.588397, ppl: 13.308418 +epoch: 1, batch: 23089, sum loss: 5162.297363, avg loss: 2.879140, ppl: 17.798954 +epoch: 1, batch: 23090, sum loss: 3967.550537, avg loss: 2.495315, ppl: 12.125550 +epoch: 1, batch: 23091, sum loss: 5563.307129, avg loss: 2.873609, ppl: 17.700783 +epoch: 1, batch: 23092, sum loss: 5020.747070, avg loss: 2.775427, ppl: 16.045479 +epoch: 1, batch: 23093, sum loss: 4706.796875, avg loss: 2.725418, ppl: 15.262794 +epoch: 1, batch: 23094, sum loss: 5308.564941, avg loss: 2.814722, ppl: 16.688532 +epoch: 1, batch: 23095, sum loss: 4344.557617, avg loss: 2.555622, ppl: 12.879313 +epoch: 1, batch: 23096, sum loss: 3526.096680, avg loss: 2.418448, ppl: 11.228416 +epoch: 1, batch: 23097, sum loss: 4418.668945, avg loss: 2.819827, ppl: 16.773954 +epoch: 1, batch: 23098, sum loss: 5883.903320, avg loss: 2.922953, ppl: 18.596115 +epoch: 1, batch: 23099, sum loss: 3564.954590, avg loss: 2.435078, ppl: 11.416711 +epoch: 1, batch: 23100, sum loss: 4847.287598, avg loss: 2.847995, ppl: 17.253155 +epoch: 1, batch: 23101, sum loss: 4438.742676, avg loss: 2.601842, ppl: 13.488564 +epoch: 1, batch: 23102, sum loss: 4319.427246, avg loss: 2.684541, ppl: 14.651482 +epoch: 1, batch: 23103, sum loss: 4711.960449, avg loss: 2.551143, ppl: 12.821750 +epoch: 1, batch: 23104, sum loss: 4969.527344, avg loss: 2.851135, ppl: 17.307407 +epoch: 1, batch: 23105, sum loss: 4324.739746, avg loss: 2.630620, ppl: 13.882378 +epoch: 1, batch: 23106, sum loss: 4442.244629, avg loss: 2.716969, ppl: 15.134380 +epoch: 1, batch: 23107, sum loss: 3914.805664, avg loss: 2.477725, ppl: 11.914129 +epoch: 1, batch: 23108, sum loss: 5332.867676, avg loss: 2.800876, ppl: 16.459061 +epoch: 1, batch: 23109, sum loss: 3359.737061, avg loss: 2.058662, ppl: 7.835482 +epoch: 1, batch: 23110, sum loss: 4614.971680, avg loss: 2.831271, ppl: 16.967010 +epoch: 1, batch: 23111, sum loss: 4858.984863, avg loss: 2.839851, ppl: 17.113218 +epoch: 1, batch: 23112, sum loss: 4155.633301, avg loss: 2.631813, ppl: 13.898950 +epoch: 1, batch: 23113, sum loss: 4392.125488, avg loss: 2.623731, ppl: 13.787069 +epoch: 1, batch: 23114, sum loss: 4862.989258, avg loss: 2.688220, ppl: 14.705474 +epoch: 1, batch: 23115, sum loss: 3557.923828, avg loss: 2.273434, ppl: 9.712694 +epoch: 1, batch: 23116, sum loss: 4958.053711, avg loss: 2.977810, ppl: 19.644751 +epoch: 1, batch: 23117, sum loss: 4651.547363, avg loss: 2.704388, ppl: 14.945166 +epoch: 1, batch: 23118, sum loss: 3555.245117, avg loss: 2.455280, ppl: 11.649693 +epoch: 1, batch: 23119, sum loss: 4473.811523, avg loss: 2.642535, ppl: 14.048772 +epoch: 1, batch: 23120, sum loss: 4363.500977, avg loss: 2.574337, ppl: 13.122611 +epoch: 1, batch: 23121, sum loss: 4267.357910, avg loss: 2.514648, ppl: 12.362259 +epoch: 1, batch: 23122, sum loss: 3910.575439, avg loss: 2.498770, ppl: 12.167521 +epoch: 1, batch: 23123, sum loss: 4354.324219, avg loss: 2.538965, ppl: 12.666548 +epoch: 1, batch: 23124, sum loss: 4267.822754, avg loss: 2.572527, ppl: 13.098886 +epoch: 1, batch: 23125, sum loss: 5266.312500, avg loss: 2.899952, ppl: 18.173267 +epoch: 1, batch: 23126, sum loss: 4437.974121, avg loss: 2.619819, ppl: 13.733243 +epoch: 1, batch: 23127, sum loss: 3602.118652, avg loss: 2.377636, ppl: 10.779392 +epoch: 1, batch: 23128, sum loss: 4932.681641, avg loss: 2.755688, ppl: 15.731860 +epoch: 1, batch: 23129, sum loss: 4514.257812, avg loss: 2.649212, ppl: 14.142895 +epoch: 1, batch: 23130, sum loss: 3169.498535, avg loss: 2.470381, ppl: 11.826949 +epoch: 1, batch: 23131, sum loss: 4060.410645, avg loss: 2.660820, ppl: 14.308010 +epoch: 1, batch: 23132, sum loss: 4229.819336, avg loss: 2.689014, ppl: 14.717160 +epoch: 1, batch: 23133, sum loss: 4579.929199, avg loss: 2.667402, ppl: 14.402503 +epoch: 1, batch: 23134, sum loss: 4279.625977, avg loss: 2.585877, ppl: 13.274922 +epoch: 1, batch: 23135, sum loss: 5341.747559, avg loss: 2.823334, ppl: 16.832874 +epoch: 1, batch: 23136, sum loss: 4050.025146, avg loss: 2.509309, ppl: 12.296433 +epoch: 1, batch: 23137, sum loss: 4901.618164, avg loss: 2.749085, ppl: 15.628325 +epoch: 1, batch: 23138, sum loss: 4460.229492, avg loss: 2.588642, ppl: 13.311677 +epoch: 1, batch: 23139, sum loss: 4067.311523, avg loss: 2.483096, ppl: 11.978293 +epoch: 1, batch: 23140, sum loss: 5366.875977, avg loss: 2.885417, ppl: 17.911039 +epoch: 1, batch: 23141, sum loss: 5139.739258, avg loss: 2.898894, ppl: 18.154062 +epoch: 1, batch: 23142, sum loss: 3514.576416, avg loss: 2.652510, ppl: 14.189615 +epoch: 1, batch: 23143, sum loss: 5073.621094, avg loss: 2.511694, ppl: 12.325788 +epoch: 1, batch: 23144, sum loss: 4198.595215, avg loss: 2.466860, ppl: 11.785378 +epoch: 1, batch: 23145, sum loss: 3832.483887, avg loss: 2.700835, ppl: 14.892155 +epoch: 1, batch: 23146, sum loss: 4127.634766, avg loss: 2.634100, ppl: 13.930772 +epoch: 1, batch: 23147, sum loss: 4469.894043, avg loss: 2.738906, ppl: 15.470046 +epoch: 1, batch: 23148, sum loss: 4030.572510, avg loss: 2.527005, ppl: 12.515961 +epoch: 1, batch: 23149, sum loss: 4068.921143, avg loss: 2.559070, ppl: 12.923791 +epoch: 1, batch: 23150, sum loss: 5780.105469, avg loss: 2.843141, ppl: 17.169607 +epoch: 1, batch: 23151, sum loss: 4138.972656, avg loss: 2.561245, ppl: 12.951939 +epoch: 1, batch: 23152, sum loss: 4783.246094, avg loss: 2.842095, ppl: 17.151663 +epoch: 1, batch: 23153, sum loss: 3996.896484, avg loss: 2.677091, ppl: 14.542722 +epoch: 1, batch: 23154, sum loss: 3425.771484, avg loss: 2.573833, ppl: 13.115998 +epoch: 1, batch: 23155, sum loss: 3856.637695, avg loss: 2.517388, ppl: 12.396171 +epoch: 1, batch: 23156, sum loss: 3787.580566, avg loss: 2.525054, ppl: 12.491567 +epoch: 1, batch: 23157, sum loss: 4337.647461, avg loss: 2.513121, ppl: 12.343398 +epoch: 1, batch: 23158, sum loss: 4056.794189, avg loss: 2.617287, ppl: 13.698504 +epoch: 1, batch: 23159, sum loss: 4725.110840, avg loss: 2.681675, ppl: 14.609543 +epoch: 1, batch: 23160, sum loss: 5723.531250, avg loss: 2.886299, ppl: 17.926842 +epoch: 1, batch: 23161, sum loss: 3775.007324, avg loss: 2.569780, ppl: 13.062952 +epoch: 1, batch: 23162, sum loss: 4776.700195, avg loss: 2.710954, ppl: 15.043616 +epoch: 1, batch: 23163, sum loss: 4173.549805, avg loss: 2.597106, ppl: 13.424836 +epoch: 1, batch: 23164, sum loss: 4184.799805, avg loss: 2.562645, ppl: 12.970080 +epoch: 1, batch: 23165, sum loss: 4563.109375, avg loss: 2.679454, ppl: 14.577127 +epoch: 1, batch: 23166, sum loss: 5360.715332, avg loss: 2.815502, ppl: 16.701557 +epoch: 1, batch: 23167, sum loss: 4111.588379, avg loss: 2.618846, ppl: 13.719884 +epoch: 1, batch: 23168, sum loss: 4605.694336, avg loss: 2.660713, ppl: 14.306485 +epoch: 1, batch: 23169, sum loss: 5282.331055, avg loss: 2.754083, ppl: 15.706630 +epoch: 1, batch: 23170, sum loss: 4957.114746, avg loss: 2.639571, ppl: 14.007195 +epoch: 1, batch: 23171, sum loss: 4864.844727, avg loss: 2.698195, ppl: 14.852891 +epoch: 1, batch: 23172, sum loss: 4321.448242, avg loss: 2.638247, ppl: 13.988653 +epoch: 1, batch: 23173, sum loss: 4106.680176, avg loss: 2.739613, ppl: 15.480997 +epoch: 1, batch: 23174, sum loss: 3986.551758, avg loss: 2.619285, ppl: 13.725907 +epoch: 1, batch: 23175, sum loss: 4183.210449, avg loss: 2.691899, ppl: 14.759672 +epoch: 1, batch: 23176, sum loss: 5392.602539, avg loss: 2.672251, ppl: 14.472510 +epoch: 1, batch: 23177, sum loss: 5084.486328, avg loss: 2.778408, ppl: 16.093376 +epoch: 1, batch: 23178, sum loss: 5217.689453, avg loss: 2.978133, ppl: 19.651102 +epoch: 1, batch: 23179, sum loss: 4898.928711, avg loss: 2.843255, ppl: 17.171574 +epoch: 1, batch: 23180, sum loss: 4183.452637, avg loss: 2.375612, ppl: 10.757592 +epoch: 1, batch: 23181, sum loss: 5207.226562, avg loss: 3.094014, ppl: 22.065479 +epoch: 1, batch: 23182, sum loss: 3830.113281, avg loss: 2.538180, ppl: 12.656613 +epoch: 1, batch: 23183, sum loss: 3483.203857, avg loss: 2.486227, ppl: 12.015852 +epoch: 1, batch: 23184, sum loss: 4010.449707, avg loss: 2.477115, ppl: 11.906868 +epoch: 1, batch: 23185, sum loss: 4351.500977, avg loss: 2.626132, ppl: 13.820210 +epoch: 1, batch: 23186, sum loss: 5008.303711, avg loss: 3.027995, ppl: 20.655779 +epoch: 1, batch: 23187, sum loss: 3953.204102, avg loss: 2.360122, ppl: 10.592243 +epoch: 1, batch: 23188, sum loss: 4580.184082, avg loss: 2.564493, ppl: 12.994068 +epoch: 1, batch: 23189, sum loss: 5402.540527, avg loss: 2.745193, ppl: 15.567626 +epoch: 1, batch: 23190, sum loss: 4622.899414, avg loss: 2.755006, ppl: 15.721128 +epoch: 1, batch: 23191, sum loss: 4912.043457, avg loss: 2.679784, ppl: 14.581941 +epoch: 1, batch: 23192, sum loss: 4610.878906, avg loss: 2.697998, ppl: 14.849977 +epoch: 1, batch: 23193, sum loss: 4252.482422, avg loss: 2.429990, ppl: 11.358767 +epoch: 1, batch: 23194, sum loss: 4735.497559, avg loss: 2.723115, ppl: 15.227690 +epoch: 1, batch: 23195, sum loss: 4650.878418, avg loss: 2.432468, ppl: 11.386947 +epoch: 1, batch: 23196, sum loss: 3999.020020, avg loss: 2.556918, ppl: 12.896012 +epoch: 1, batch: 23197, sum loss: 4506.375977, avg loss: 2.739438, ppl: 15.478285 +epoch: 1, batch: 23198, sum loss: 4674.264160, avg loss: 2.888915, ppl: 17.973795 +epoch: 1, batch: 23199, sum loss: 4788.140625, avg loss: 2.664519, ppl: 14.361041 +epoch: 1, batch: 23200, sum loss: 4857.863770, avg loss: 2.827627, ppl: 16.905304 +epoch: 1, batch: 23201, sum loss: 4357.888672, avg loss: 2.603279, ppl: 13.507957 +epoch: 1, batch: 23202, sum loss: 4087.158691, avg loss: 2.489135, ppl: 12.050842 +epoch: 1, batch: 23203, sum loss: 5052.326172, avg loss: 2.717766, ppl: 15.146441 +epoch: 1, batch: 23204, sum loss: 4093.674805, avg loss: 2.558547, ppl: 12.917032 +epoch: 1, batch: 23205, sum loss: 4496.219238, avg loss: 2.426454, ppl: 11.318676 +epoch: 1, batch: 23206, sum loss: 3967.606934, avg loss: 2.620612, ppl: 13.744138 +epoch: 1, batch: 23207, sum loss: 4732.935059, avg loss: 2.670957, ppl: 14.453790 +epoch: 1, batch: 23208, sum loss: 5315.127441, avg loss: 2.684408, ppl: 14.649522 +epoch: 1, batch: 23209, sum loss: 4776.185059, avg loss: 2.908761, ppl: 18.334064 +epoch: 1, batch: 23210, sum loss: 4909.702148, avg loss: 2.815196, ppl: 16.696449 +epoch: 1, batch: 23211, sum loss: 3167.354736, avg loss: 2.227394, ppl: 9.275665 +epoch: 1, batch: 23212, sum loss: 4096.377930, avg loss: 2.493231, ppl: 12.100304 +epoch: 1, batch: 23213, sum loss: 5385.999023, avg loss: 2.863370, ppl: 17.520470 +epoch: 1, batch: 23214, sum loss: 4163.730469, avg loss: 2.514330, ppl: 12.358325 +epoch: 1, batch: 23215, sum loss: 5695.584473, avg loss: 2.917820, ppl: 18.500912 +epoch: 1, batch: 23216, sum loss: 5210.542969, avg loss: 2.841081, ppl: 17.134279 +epoch: 1, batch: 23217, sum loss: 4579.140137, avg loss: 2.595885, ppl: 13.408442 +epoch: 1, batch: 23218, sum loss: 4633.481445, avg loss: 2.733617, ppl: 15.388452 +epoch: 1, batch: 23219, sum loss: 3847.394531, avg loss: 2.343115, ppl: 10.413623 +epoch: 1, batch: 23220, sum loss: 4531.174805, avg loss: 2.671683, ppl: 14.464296 +epoch: 1, batch: 23221, sum loss: 3887.724609, avg loss: 2.360488, ppl: 10.596126 +epoch: 1, batch: 23222, sum loss: 3781.958496, avg loss: 2.341770, ppl: 10.399627 +epoch: 1, batch: 23223, sum loss: 4043.956787, avg loss: 2.577410, ppl: 13.163005 +epoch: 1, batch: 23224, sum loss: 3760.186523, avg loss: 2.472181, ppl: 11.848255 +epoch: 1, batch: 23225, sum loss: 3973.595703, avg loss: 2.232357, ppl: 9.321815 +epoch: 1, batch: 23226, sum loss: 3891.404541, avg loss: 2.691151, ppl: 14.748644 +epoch: 1, batch: 23227, sum loss: 4631.794434, avg loss: 2.800359, ppl: 16.450560 +epoch: 1, batch: 23228, sum loss: 3794.064941, avg loss: 2.614793, ppl: 13.664389 +epoch: 1, batch: 23229, sum loss: 4222.936035, avg loss: 2.446661, ppl: 11.549712 +epoch: 1, batch: 23230, sum loss: 5069.332031, avg loss: 2.786879, ppl: 16.230280 +epoch: 1, batch: 23231, sum loss: 3680.041260, avg loss: 2.463214, ppl: 11.742488 +epoch: 1, batch: 23232, sum loss: 4958.827148, avg loss: 2.859762, ppl: 17.457367 +epoch: 1, batch: 23233, sum loss: 4769.291992, avg loss: 2.658468, ppl: 14.274407 +epoch: 1, batch: 23234, sum loss: 5134.822266, avg loss: 2.674387, ppl: 14.503452 +epoch: 1, batch: 23235, sum loss: 4978.002441, avg loss: 2.575273, ppl: 13.134903 +epoch: 1, batch: 23236, sum loss: 5071.666016, avg loss: 2.842862, ppl: 17.164827 +epoch: 1, batch: 23237, sum loss: 4178.098633, avg loss: 2.591873, ppl: 13.354757 +epoch: 1, batch: 23238, sum loss: 3408.459473, avg loss: 2.312388, ppl: 10.098508 +epoch: 1, batch: 23239, sum loss: 3385.722656, avg loss: 2.467728, ppl: 11.795615 +epoch: 1, batch: 23240, sum loss: 4075.226318, avg loss: 2.700614, ppl: 14.888870 +epoch: 1, batch: 23241, sum loss: 4503.815430, avg loss: 2.595859, ppl: 13.408101 +epoch: 1, batch: 23242, sum loss: 5143.320801, avg loss: 2.852646, ppl: 17.333588 +epoch: 1, batch: 23243, sum loss: 5192.018555, avg loss: 2.806497, ppl: 16.551828 +epoch: 1, batch: 23244, sum loss: 5104.125000, avg loss: 2.890218, ppl: 17.997232 +epoch: 1, batch: 23245, sum loss: 4481.279785, avg loss: 2.527513, ppl: 12.522322 +epoch: 1, batch: 23246, sum loss: 4506.113770, avg loss: 2.569050, ppl: 13.053419 +epoch: 1, batch: 23247, sum loss: 3985.778809, avg loss: 2.538713, ppl: 12.663360 +epoch: 1, batch: 23248, sum loss: 4664.271973, avg loss: 2.902472, ppl: 18.219124 +epoch: 1, batch: 23249, sum loss: 4333.064453, avg loss: 2.698047, ppl: 14.850706 +epoch: 1, batch: 23250, sum loss: 5604.472656, avg loss: 2.893378, ppl: 18.054190 +epoch: 1, batch: 23251, sum loss: 5563.442871, avg loss: 3.010521, ppl: 20.297972 +epoch: 1, batch: 23252, sum loss: 4418.067871, avg loss: 2.497495, ppl: 12.152011 +epoch: 1, batch: 23253, sum loss: 4993.961914, avg loss: 2.774423, ppl: 16.029381 +epoch: 1, batch: 23254, sum loss: 5292.451172, avg loss: 2.773821, ppl: 16.019733 +epoch: 1, batch: 23255, sum loss: 3915.735840, avg loss: 2.402292, ppl: 11.048470 +epoch: 1, batch: 23256, sum loss: 5219.906250, avg loss: 2.791394, ppl: 16.303728 +epoch: 1, batch: 23257, sum loss: 3888.167480, avg loss: 2.486041, ppl: 12.013615 +epoch: 1, batch: 23258, sum loss: 4025.375977, avg loss: 2.650017, ppl: 14.154280 +epoch: 1, batch: 23259, sum loss: 4281.901367, avg loss: 2.691327, ppl: 14.751239 +epoch: 1, batch: 23260, sum loss: 4685.114746, avg loss: 2.582753, ppl: 13.233525 +epoch: 1, batch: 23261, sum loss: 3778.194092, avg loss: 2.571949, ppl: 13.091321 +epoch: 1, batch: 23262, sum loss: 3979.642578, avg loss: 2.525154, ppl: 12.492818 +epoch: 1, batch: 23263, sum loss: 4121.483887, avg loss: 2.553584, ppl: 12.853082 +epoch: 1, batch: 23264, sum loss: 4855.289062, avg loss: 2.688421, ppl: 14.708440 +epoch: 1, batch: 23265, sum loss: 4468.740234, avg loss: 2.815841, ppl: 16.707228 +epoch: 1, batch: 23266, sum loss: 4414.160156, avg loss: 2.786717, ppl: 16.227659 +epoch: 1, batch: 23267, sum loss: 4302.067383, avg loss: 2.612063, ppl: 13.627131 +epoch: 1, batch: 23268, sum loss: 4902.041992, avg loss: 2.863342, ppl: 17.519985 +epoch: 1, batch: 23269, sum loss: 3425.651855, avg loss: 2.591265, ppl: 13.346641 +epoch: 1, batch: 23270, sum loss: 4772.015137, avg loss: 2.601971, ppl: 13.490303 +epoch: 1, batch: 23271, sum loss: 4708.661133, avg loss: 2.602909, ppl: 13.502955 +epoch: 1, batch: 23272, sum loss: 5317.529785, avg loss: 2.812020, ppl: 16.643505 +epoch: 1, batch: 23273, sum loss: 5008.603516, avg loss: 2.863696, ppl: 17.526178 +epoch: 1, batch: 23274, sum loss: 4540.022461, avg loss: 2.758215, ppl: 15.771672 +epoch: 1, batch: 23275, sum loss: 4474.165527, avg loss: 2.760127, ppl: 15.801844 +epoch: 1, batch: 23276, sum loss: 5354.169922, avg loss: 2.611790, ppl: 13.623418 +epoch: 1, batch: 23277, sum loss: 4612.288086, avg loss: 2.681563, ppl: 14.607906 +epoch: 1, batch: 23278, sum loss: 4642.585449, avg loss: 2.686682, ppl: 14.682870 +epoch: 1, batch: 23279, sum loss: 5039.807617, avg loss: 2.820262, ppl: 16.781242 +epoch: 1, batch: 23280, sum loss: 3748.002930, avg loss: 2.671421, ppl: 14.460497 +epoch: 1, batch: 23281, sum loss: 3933.177734, avg loss: 2.779631, ppl: 16.113073 +epoch: 1, batch: 23282, sum loss: 4141.545410, avg loss: 2.528416, ppl: 12.533636 +epoch: 1, batch: 23283, sum loss: 3821.403809, avg loss: 2.628201, ppl: 13.848830 +epoch: 1, batch: 23284, sum loss: 4165.310547, avg loss: 2.733143, ppl: 15.381159 +epoch: 1, batch: 23285, sum loss: 5083.138672, avg loss: 2.936533, ppl: 18.850378 +epoch: 1, batch: 23286, sum loss: 4151.881836, avg loss: 2.596549, ppl: 13.417352 +epoch: 1, batch: 23287, sum loss: 4333.296387, avg loss: 2.688149, ppl: 14.704436 +epoch: 1, batch: 23288, sum loss: 4814.045898, avg loss: 2.790751, ppl: 16.293259 +epoch: 1, batch: 23289, sum loss: 4667.267090, avg loss: 2.607412, ppl: 13.563897 +epoch: 1, batch: 23290, sum loss: 4970.598633, avg loss: 2.618861, ppl: 13.720094 +epoch: 1, batch: 23291, sum loss: 4815.944336, avg loss: 2.770969, ppl: 15.974108 +epoch: 1, batch: 23292, sum loss: 3750.163086, avg loss: 2.609717, ppl: 13.595201 +epoch: 1, batch: 23293, sum loss: 4746.687500, avg loss: 2.908509, ppl: 18.329458 +epoch: 1, batch: 23294, sum loss: 4159.033691, avg loss: 2.698919, ppl: 14.863653 +epoch: 1, batch: 23295, sum loss: 4381.426758, avg loss: 2.938583, ppl: 18.889059 +epoch: 1, batch: 23296, sum loss: 3706.910400, avg loss: 2.636494, ppl: 13.964158 +epoch: 1, batch: 23297, sum loss: 5308.244141, avg loss: 2.923042, ppl: 18.597773 +epoch: 1, batch: 23298, sum loss: 4182.913574, avg loss: 2.343369, ppl: 10.416270 +epoch: 1, batch: 23299, sum loss: 4133.906250, avg loss: 2.591791, ppl: 13.353662 +epoch: 1, batch: 23300, sum loss: 4195.217285, avg loss: 2.525718, ppl: 12.499864 +epoch: 1, batch: 23301, sum loss: 4516.662109, avg loss: 2.752384, ppl: 15.679968 +epoch: 1, batch: 23302, sum loss: 3932.391846, avg loss: 2.528869, ppl: 12.539321 +epoch: 1, batch: 23303, sum loss: 4358.152832, avg loss: 2.633325, ppl: 13.919978 +epoch: 1, batch: 23304, sum loss: 3671.233398, avg loss: 2.353355, ppl: 10.520807 +epoch: 1, batch: 23305, sum loss: 4847.888184, avg loss: 2.749795, ppl: 15.639421 +epoch: 1, batch: 23306, sum loss: 3799.888184, avg loss: 2.348509, ppl: 10.469951 +epoch: 1, batch: 23307, sum loss: 4251.181152, avg loss: 2.493361, ppl: 12.101888 +epoch: 1, batch: 23308, sum loss: 4249.261719, avg loss: 2.597348, ppl: 13.428085 +epoch: 1, batch: 23309, sum loss: 4651.318848, avg loss: 2.776907, ppl: 16.069241 +epoch: 1, batch: 23310, sum loss: 4452.605469, avg loss: 2.636238, ppl: 13.960583 +epoch: 1, batch: 23311, sum loss: 4135.674316, avg loss: 2.724423, ppl: 15.247616 +epoch: 1, batch: 23312, sum loss: 4226.245117, avg loss: 2.539811, ppl: 12.677274 +epoch: 1, batch: 23313, sum loss: 4871.116699, avg loss: 2.758277, ppl: 15.772638 +epoch: 1, batch: 23314, sum loss: 4434.874512, avg loss: 2.635101, ppl: 13.944715 +epoch: 1, batch: 23315, sum loss: 4059.795410, avg loss: 2.513805, ppl: 12.351841 +epoch: 1, batch: 23316, sum loss: 4528.406250, avg loss: 2.556977, ppl: 12.896771 +epoch: 1, batch: 23317, sum loss: 4909.072754, avg loss: 2.709201, ppl: 15.017277 +epoch: 1, batch: 23318, sum loss: 3968.348633, avg loss: 2.397794, ppl: 10.998883 +epoch: 1, batch: 23319, sum loss: 4014.427002, avg loss: 2.312458, ppl: 10.099218 +epoch: 1, batch: 23320, sum loss: 5740.370117, avg loss: 2.836151, ppl: 17.050020 +epoch: 1, batch: 23321, sum loss: 4006.902344, avg loss: 2.608660, ppl: 13.580847 +epoch: 1, batch: 23322, sum loss: 4720.736816, avg loss: 2.780175, ppl: 16.121841 +epoch: 1, batch: 23323, sum loss: 3817.851318, avg loss: 2.531732, ppl: 12.575263 +epoch: 1, batch: 23324, sum loss: 5282.243652, avg loss: 3.039266, ppl: 20.889896 +epoch: 1, batch: 23325, sum loss: 4400.387207, avg loss: 2.586941, ppl: 13.289062 +epoch: 1, batch: 23326, sum loss: 4183.870605, avg loss: 2.538756, ppl: 12.663912 +epoch: 1, batch: 23327, sum loss: 4328.140137, avg loss: 2.476053, ppl: 11.894222 +epoch: 1, batch: 23328, sum loss: 5260.796387, avg loss: 2.754344, ppl: 15.710728 +epoch: 1, batch: 23329, sum loss: 3984.845947, avg loss: 2.431267, ppl: 11.373278 +epoch: 1, batch: 23330, sum loss: 3785.247559, avg loss: 2.438948, ppl: 11.460982 +epoch: 1, batch: 23331, sum loss: 4911.229004, avg loss: 2.728461, ppl: 15.309301 +epoch: 1, batch: 23332, sum loss: 5911.287109, avg loss: 2.837872, ppl: 17.079382 +epoch: 1, batch: 23333, sum loss: 4604.979980, avg loss: 2.422399, ppl: 11.272868 +epoch: 1, batch: 23334, sum loss: 4414.462402, avg loss: 2.522550, ppl: 12.460328 +epoch: 1, batch: 23335, sum loss: 3089.238281, avg loss: 2.114468, ppl: 8.285180 +epoch: 1, batch: 23336, sum loss: 4089.285156, avg loss: 2.657105, ppl: 14.254968 +epoch: 1, batch: 23337, sum loss: 4544.211426, avg loss: 2.794718, ppl: 16.358015 +epoch: 1, batch: 23338, sum loss: 4423.022461, avg loss: 2.866508, ppl: 17.575546 +epoch: 1, batch: 23339, sum loss: 3977.297363, avg loss: 2.215765, ppl: 9.168416 +epoch: 1, batch: 23340, sum loss: 5357.100586, avg loss: 2.838951, ppl: 17.097818 +epoch: 1, batch: 23341, sum loss: 3585.287354, avg loss: 2.437313, ppl: 11.442255 +epoch: 1, batch: 23342, sum loss: 4786.089844, avg loss: 2.731786, ppl: 15.360304 +epoch: 1, batch: 23343, sum loss: 4726.780273, avg loss: 2.690256, ppl: 14.735450 +epoch: 1, batch: 23344, sum loss: 4728.507812, avg loss: 2.629871, ppl: 13.871976 +epoch: 1, batch: 23345, sum loss: 3764.769043, avg loss: 2.441484, ppl: 11.490085 +epoch: 1, batch: 23346, sum loss: 4862.733398, avg loss: 2.642790, ppl: 14.052353 +epoch: 1, batch: 23347, sum loss: 4441.152344, avg loss: 2.585071, ppl: 13.264235 +epoch: 1, batch: 23348, sum loss: 4515.250000, avg loss: 2.523896, ppl: 12.477113 +epoch: 1, batch: 23349, sum loss: 4100.489746, avg loss: 2.464237, ppl: 11.754507 +epoch: 1, batch: 23350, sum loss: 4409.918945, avg loss: 2.651785, ppl: 14.179332 +epoch: 1, batch: 23351, sum loss: 3963.867432, avg loss: 2.602671, ppl: 13.499743 +epoch: 1, batch: 23352, sum loss: 5721.028809, avg loss: 3.035028, ppl: 20.801569 +epoch: 1, batch: 23353, sum loss: 4662.407715, avg loss: 2.701279, ppl: 14.898774 +epoch: 1, batch: 23354, sum loss: 4406.521484, avg loss: 2.516574, ppl: 12.386094 +epoch: 1, batch: 23355, sum loss: 4754.401855, avg loss: 2.860651, ppl: 17.472898 +epoch: 1, batch: 23356, sum loss: 4169.706543, avg loss: 2.540955, ppl: 12.691781 +epoch: 1, batch: 23357, sum loss: 4703.708496, avg loss: 2.671044, ppl: 14.455050 +epoch: 1, batch: 23358, sum loss: 4209.866211, avg loss: 2.582740, ppl: 13.233349 +epoch: 1, batch: 23359, sum loss: 3748.567871, avg loss: 2.632421, ppl: 13.907403 +epoch: 1, batch: 23360, sum loss: 4755.965332, avg loss: 2.749113, ppl: 15.628760 +epoch: 1, batch: 23361, sum loss: 5364.190918, avg loss: 2.675407, ppl: 14.518256 +epoch: 1, batch: 23362, sum loss: 4905.823242, avg loss: 2.916661, ppl: 18.479473 +epoch: 1, batch: 23363, sum loss: 4717.521973, avg loss: 2.723742, ppl: 15.237241 +epoch: 1, batch: 23364, sum loss: 3019.799805, avg loss: 2.072615, ppl: 7.945571 +epoch: 1, batch: 23365, sum loss: 4087.279541, avg loss: 2.475639, ppl: 11.889300 +epoch: 1, batch: 23366, sum loss: 5547.694824, avg loss: 3.168301, ppl: 23.767073 +epoch: 1, batch: 23367, sum loss: 4418.320312, avg loss: 2.801725, ppl: 16.473042 +epoch: 1, batch: 23368, sum loss: 4298.890137, avg loss: 2.389600, ppl: 10.909127 +epoch: 1, batch: 23369, sum loss: 4652.212891, avg loss: 2.503882, ppl: 12.229877 +epoch: 1, batch: 23370, sum loss: 4708.809570, avg loss: 2.570311, ppl: 13.069886 +epoch: 1, batch: 23371, sum loss: 3781.810059, avg loss: 2.543248, ppl: 12.720923 +epoch: 1, batch: 23372, sum loss: 4188.265625, avg loss: 2.635787, ppl: 13.954290 +epoch: 1, batch: 23373, sum loss: 3933.732910, avg loss: 2.515174, ppl: 12.368766 +epoch: 1, batch: 23374, sum loss: 4222.720215, avg loss: 2.779934, ppl: 16.117960 +epoch: 1, batch: 23375, sum loss: 4563.076660, avg loss: 2.585313, ppl: 13.267436 +epoch: 1, batch: 23376, sum loss: 5115.832520, avg loss: 2.969142, ppl: 19.475212 +epoch: 1, batch: 23377, sum loss: 4447.298340, avg loss: 2.522574, ppl: 12.460631 +epoch: 1, batch: 23378, sum loss: 5241.820801, avg loss: 2.713158, ppl: 15.076812 +epoch: 1, batch: 23379, sum loss: 5046.146484, avg loss: 2.643346, ppl: 14.060164 +epoch: 1, batch: 23380, sum loss: 4305.669922, avg loss: 2.692727, ppl: 14.771898 +epoch: 1, batch: 23381, sum loss: 4010.303955, avg loss: 2.560858, ppl: 12.946924 +epoch: 1, batch: 23382, sum loss: 4855.291504, avg loss: 2.787194, ppl: 16.235395 +epoch: 1, batch: 23383, sum loss: 4392.756836, avg loss: 2.573378, ppl: 13.110040 +epoch: 1, batch: 23384, sum loss: 4180.879883, avg loss: 2.774307, ppl: 16.027508 +epoch: 1, batch: 23385, sum loss: 4432.665039, avg loss: 2.556324, ppl: 12.888347 +epoch: 1, batch: 23386, sum loss: 3986.602783, avg loss: 2.552243, ppl: 12.835856 +epoch: 1, batch: 23387, sum loss: 5187.116211, avg loss: 2.888149, ppl: 17.960043 +epoch: 1, batch: 23388, sum loss: 4693.520996, avg loss: 2.698977, ppl: 14.864517 +epoch: 1, batch: 23389, sum loss: 4554.664551, avg loss: 2.811522, ppl: 16.635210 +epoch: 1, batch: 23390, sum loss: 3896.855469, avg loss: 2.493190, ppl: 12.099808 +epoch: 1, batch: 23391, sum loss: 4365.132812, avg loss: 2.421039, ppl: 11.257545 +epoch: 1, batch: 23392, sum loss: 4304.869141, avg loss: 2.637788, ppl: 13.982235 +epoch: 1, batch: 23393, sum loss: 3591.022461, avg loss: 2.495498, ppl: 12.127777 +epoch: 1, batch: 23394, sum loss: 3922.861816, avg loss: 2.423015, ppl: 11.279820 +epoch: 1, batch: 23395, sum loss: 4503.529297, avg loss: 2.514533, ppl: 12.360838 +epoch: 1, batch: 23396, sum loss: 5240.437988, avg loss: 2.857382, ppl: 17.415865 +epoch: 1, batch: 23397, sum loss: 4628.514648, avg loss: 2.658538, ppl: 14.275405 +epoch: 1, batch: 23398, sum loss: 4054.109375, avg loss: 2.452577, ppl: 11.618244 +epoch: 1, batch: 23399, sum loss: 5179.643066, avg loss: 2.569268, ppl: 13.056257 +epoch: 1, batch: 23400, sum loss: 4372.872559, avg loss: 2.563231, ppl: 12.977683 +epoch: 1, batch: 23401, sum loss: 4505.426758, avg loss: 2.605799, ppl: 13.542044 +epoch: 1, batch: 23402, sum loss: 3912.517578, avg loss: 2.640026, ppl: 14.013562 +epoch: 1, batch: 23403, sum loss: 4992.406738, avg loss: 2.864261, ppl: 17.536087 +epoch: 1, batch: 23404, sum loss: 4438.525391, avg loss: 2.546486, ppl: 12.762180 +epoch: 1, batch: 23405, sum loss: 3884.526611, avg loss: 2.429348, ppl: 11.351473 +epoch: 1, batch: 23406, sum loss: 4986.795410, avg loss: 2.771982, ppl: 15.990294 +epoch: 1, batch: 23407, sum loss: 3717.006104, avg loss: 2.606596, ppl: 13.552841 +epoch: 1, batch: 23408, sum loss: 4275.141602, avg loss: 2.648787, ppl: 14.136877 +epoch: 1, batch: 23409, sum loss: 3829.552490, avg loss: 2.459571, ppl: 11.699796 +epoch: 1, batch: 23410, sum loss: 5391.258301, avg loss: 2.873805, ppl: 17.704256 +epoch: 1, batch: 23411, sum loss: 4961.209961, avg loss: 2.725940, ppl: 15.270758 +epoch: 1, batch: 23412, sum loss: 3945.937256, avg loss: 2.503767, ppl: 12.228475 +epoch: 1, batch: 23413, sum loss: 4569.694336, avg loss: 2.632312, ppl: 13.905888 +epoch: 1, batch: 23414, sum loss: 3914.500000, avg loss: 2.515745, ppl: 12.375831 +epoch: 1, batch: 23415, sum loss: 3896.539062, avg loss: 2.543433, ppl: 12.723274 +epoch: 1, batch: 23416, sum loss: 4940.462891, avg loss: 2.800716, ppl: 16.456425 +epoch: 1, batch: 23417, sum loss: 4086.802979, avg loss: 2.530528, ppl: 12.560137 +epoch: 1, batch: 23418, sum loss: 4422.629395, avg loss: 2.726652, ppl: 15.281637 +epoch: 1, batch: 23419, sum loss: 3648.699707, avg loss: 2.519820, ppl: 12.426365 +epoch: 1, batch: 23420, sum loss: 4096.376465, avg loss: 2.533319, ppl: 12.595241 +epoch: 1, batch: 23421, sum loss: 5297.350586, avg loss: 2.901068, ppl: 18.193573 +epoch: 1, batch: 23422, sum loss: 4696.011719, avg loss: 3.012195, ppl: 20.331978 +epoch: 1, batch: 23423, sum loss: 4962.597656, avg loss: 2.953927, ppl: 19.181135 +epoch: 1, batch: 23424, sum loss: 5644.394043, avg loss: 2.955180, ppl: 19.205183 +epoch: 1, batch: 23425, sum loss: 3673.449707, avg loss: 2.377637, ppl: 10.779406 +epoch: 1, batch: 23426, sum loss: 4892.031250, avg loss: 2.925856, ppl: 18.650187 +epoch: 1, batch: 23427, sum loss: 4238.529297, avg loss: 2.408255, ppl: 11.114551 +epoch: 1, batch: 23428, sum loss: 4683.405762, avg loss: 2.735634, ppl: 15.419518 +epoch: 1, batch: 23429, sum loss: 4083.869629, avg loss: 2.356532, ppl: 10.554281 +epoch: 1, batch: 23430, sum loss: 4855.668945, avg loss: 2.656274, ppl: 14.243121 +epoch: 1, batch: 23431, sum loss: 4223.520996, avg loss: 2.458394, ppl: 11.686029 +epoch: 1, batch: 23432, sum loss: 6010.677246, avg loss: 2.955102, ppl: 19.203676 +epoch: 1, batch: 23433, sum loss: 5693.156738, avg loss: 2.770392, ppl: 15.964898 +epoch: 1, batch: 23434, sum loss: 3931.279541, avg loss: 2.371098, ppl: 10.709140 +epoch: 1, batch: 23435, sum loss: 3697.743652, avg loss: 2.512054, ppl: 12.330233 +epoch: 1, batch: 23436, sum loss: 3171.927246, avg loss: 2.254390, ppl: 9.529483 +epoch: 1, batch: 23437, sum loss: 3361.343994, avg loss: 2.372155, ppl: 10.720472 +epoch: 1, batch: 23438, sum loss: 4883.400879, avg loss: 2.748115, ppl: 15.613178 +epoch: 1, batch: 23439, sum loss: 4386.022949, avg loss: 2.542622, ppl: 12.712962 +epoch: 1, batch: 23440, sum loss: 4557.134766, avg loss: 2.488878, ppl: 12.047749 +epoch: 1, batch: 23441, sum loss: 4046.536865, avg loss: 2.631038, ppl: 13.888181 +epoch: 1, batch: 23442, sum loss: 4213.161133, avg loss: 2.463837, ppl: 11.749808 +epoch: 1, batch: 23443, sum loss: 4897.084961, avg loss: 2.749627, ppl: 15.636792 +epoch: 1, batch: 23444, sum loss: 4637.672852, avg loss: 2.623118, ppl: 13.778621 +epoch: 1, batch: 23445, sum loss: 4236.039062, avg loss: 2.530489, ppl: 12.559652 +epoch: 1, batch: 23446, sum loss: 4319.374512, avg loss: 2.454190, ppl: 11.637004 +epoch: 1, batch: 23447, sum loss: 3875.280273, avg loss: 2.748426, ppl: 15.618030 +epoch: 1, batch: 23448, sum loss: 4387.335449, avg loss: 2.806996, ppl: 16.560102 +epoch: 1, batch: 23449, sum loss: 4300.872070, avg loss: 2.811028, ppl: 16.626993 +epoch: 1, batch: 23450, sum loss: 3780.144043, avg loss: 2.467457, ppl: 11.792421 +epoch: 1, batch: 23451, sum loss: 4752.970703, avg loss: 2.827466, ppl: 16.902580 +epoch: 1, batch: 23452, sum loss: 4824.310059, avg loss: 2.814650, ppl: 16.687336 +epoch: 1, batch: 23453, sum loss: 4617.124512, avg loss: 2.836071, ppl: 17.048658 +epoch: 1, batch: 23454, sum loss: 4236.167969, avg loss: 2.634433, ppl: 13.935406 +epoch: 1, batch: 23455, sum loss: 3288.158447, avg loss: 2.223231, ppl: 9.237126 +epoch: 1, batch: 23456, sum loss: 4236.851074, avg loss: 2.549249, ppl: 12.797485 +epoch: 1, batch: 23457, sum loss: 5459.260742, avg loss: 2.841885, ppl: 17.148056 +epoch: 1, batch: 23458, sum loss: 6742.169922, avg loss: 3.068807, ppl: 21.516226 +epoch: 1, batch: 23459, sum loss: 3840.843018, avg loss: 2.550361, ppl: 12.811724 +epoch: 1, batch: 23460, sum loss: 5062.733398, avg loss: 2.716059, ppl: 15.120610 +epoch: 1, batch: 23461, sum loss: 5159.436035, avg loss: 2.754637, ppl: 15.715343 +epoch: 1, batch: 23462, sum loss: 5032.802246, avg loss: 2.669922, ppl: 14.438838 +epoch: 1, batch: 23463, sum loss: 4291.563477, avg loss: 2.677207, ppl: 14.544417 +epoch: 1, batch: 23464, sum loss: 4109.695801, avg loss: 2.536849, ppl: 12.639784 +epoch: 1, batch: 23465, sum loss: 3506.155273, avg loss: 2.303650, ppl: 10.010656 +epoch: 1, batch: 23466, sum loss: 4253.494141, avg loss: 2.515372, ppl: 12.371213 +epoch: 1, batch: 23467, sum loss: 4979.743652, avg loss: 2.832619, ppl: 16.989895 +epoch: 1, batch: 23468, sum loss: 3871.281006, avg loss: 2.531904, ppl: 12.577431 +epoch: 1, batch: 23469, sum loss: 4549.860352, avg loss: 2.865151, ppl: 17.551710 +epoch: 1, batch: 23470, sum loss: 4473.601074, avg loss: 2.562200, ppl: 12.964309 +epoch: 1, batch: 23471, sum loss: 4885.370605, avg loss: 2.753873, ppl: 15.703331 +epoch: 1, batch: 23472, sum loss: 4343.254395, avg loss: 2.807534, ppl: 16.569004 +epoch: 1, batch: 23473, sum loss: 5846.662109, avg loss: 2.958837, ppl: 19.275547 +epoch: 1, batch: 23474, sum loss: 3938.596436, avg loss: 2.582686, ppl: 13.232635 +epoch: 1, batch: 23475, sum loss: 3617.774170, avg loss: 2.489865, ppl: 12.059651 +epoch: 1, batch: 23476, sum loss: 4186.733398, avg loss: 2.462785, ppl: 11.737450 +epoch: 1, batch: 23477, sum loss: 3589.297852, avg loss: 2.388089, ppl: 10.892657 +epoch: 1, batch: 23478, sum loss: 3875.975830, avg loss: 2.636718, ppl: 13.967292 +epoch: 1, batch: 23479, sum loss: 5072.911133, avg loss: 2.575082, ppl: 13.132392 +epoch: 1, batch: 23480, sum loss: 4242.465820, avg loss: 2.730030, ppl: 15.333341 +epoch: 1, batch: 23481, sum loss: 3505.292725, avg loss: 2.600365, ppl: 13.468658 +epoch: 1, batch: 23482, sum loss: 4702.403809, avg loss: 2.502610, ppl: 12.214332 +epoch: 1, batch: 23483, sum loss: 4440.792480, avg loss: 2.533253, ppl: 12.594412 +epoch: 1, batch: 23484, sum loss: 4006.121094, avg loss: 2.708669, ppl: 15.009280 +epoch: 1, batch: 23485, sum loss: 4576.455078, avg loss: 2.790521, ppl: 16.289511 +epoch: 1, batch: 23486, sum loss: 3789.100586, avg loss: 2.395133, ppl: 10.969657 +epoch: 1, batch: 23487, sum loss: 3398.805420, avg loss: 2.407086, ppl: 11.101562 +epoch: 1, batch: 23488, sum loss: 3279.289307, avg loss: 2.439947, ppl: 11.472437 +epoch: 1, batch: 23489, sum loss: 4709.340820, avg loss: 2.639765, ppl: 14.009914 +epoch: 1, batch: 23490, sum loss: 4698.039551, avg loss: 2.624603, ppl: 13.799095 +epoch: 1, batch: 23491, sum loss: 4459.187012, avg loss: 2.827639, ppl: 16.905506 +epoch: 1, batch: 23492, sum loss: 4770.745605, avg loss: 2.726140, ppl: 15.273820 +epoch: 1, batch: 23493, sum loss: 5037.975586, avg loss: 3.070064, ppl: 21.543287 +epoch: 1, batch: 23494, sum loss: 4684.880371, avg loss: 2.791943, ppl: 16.312685 +epoch: 1, batch: 23495, sum loss: 4376.577637, avg loss: 2.747381, ppl: 15.601713 +epoch: 1, batch: 23496, sum loss: 4802.480469, avg loss: 2.641629, ppl: 14.036043 +epoch: 1, batch: 23497, sum loss: 4794.692383, avg loss: 2.588927, ppl: 13.315474 +epoch: 1, batch: 23498, sum loss: 4798.351074, avg loss: 2.612058, ppl: 13.627072 +epoch: 1, batch: 23499, sum loss: 3783.834229, avg loss: 2.653460, ppl: 14.203100 +epoch: 1, batch: 23500, sum loss: 4464.287598, avg loss: 2.740508, ppl: 15.494856 +epoch: 1, batch: 23501, sum loss: 4519.959473, avg loss: 2.833830, ppl: 17.010492 +epoch: 1, batch: 23502, sum loss: 4003.509521, avg loss: 2.633888, ppl: 13.927816 +epoch: 1, batch: 23503, sum loss: 4285.005859, avg loss: 2.717188, ppl: 15.137701 +epoch: 1, batch: 23504, sum loss: 4224.662598, avg loss: 2.541915, ppl: 12.703975 +epoch: 1, batch: 23505, sum loss: 4574.206055, avg loss: 2.747271, ppl: 15.600002 +epoch: 1, batch: 23506, sum loss: 4450.175293, avg loss: 2.486131, ppl: 12.014707 +epoch: 1, batch: 23507, sum loss: 3912.360840, avg loss: 2.395812, ppl: 10.977105 +epoch: 1, batch: 23508, sum loss: 4120.611328, avg loss: 2.415364, ppl: 11.193845 +epoch: 1, batch: 23509, sum loss: 3919.091553, avg loss: 2.480438, ppl: 11.946493 +epoch: 1, batch: 23510, sum loss: 3810.316650, avg loss: 2.769125, ppl: 15.944684 +epoch: 1, batch: 23511, sum loss: 4918.458008, avg loss: 2.796167, ppl: 16.381737 +epoch: 1, batch: 23512, sum loss: 4097.708008, avg loss: 2.652238, ppl: 14.185756 +epoch: 1, batch: 23513, sum loss: 4568.166504, avg loss: 2.695083, ppl: 14.806753 +epoch: 1, batch: 23514, sum loss: 5067.612305, avg loss: 2.748163, ppl: 15.613922 +epoch: 1, batch: 23515, sum loss: 5217.910156, avg loss: 2.829669, ppl: 16.939857 +epoch: 1, batch: 23516, sum loss: 3695.959717, avg loss: 2.504038, ppl: 12.231785 +epoch: 1, batch: 23517, sum loss: 4749.424805, avg loss: 2.731124, ppl: 15.350133 +epoch: 1, batch: 23518, sum loss: 3982.041992, avg loss: 2.487222, ppl: 12.027813 +epoch: 1, batch: 23519, sum loss: 3899.530029, avg loss: 2.584182, ppl: 13.252441 +epoch: 1, batch: 23520, sum loss: 4274.759277, avg loss: 2.597059, ppl: 13.424203 +epoch: 1, batch: 23521, sum loss: 4993.587891, avg loss: 2.819643, ppl: 16.770863 +epoch: 1, batch: 23522, sum loss: 4372.281250, avg loss: 2.895550, ppl: 18.093458 +epoch: 1, batch: 23523, sum loss: 4726.006836, avg loss: 2.886993, ppl: 17.939280 +epoch: 1, batch: 23524, sum loss: 4564.597168, avg loss: 2.594996, ppl: 13.396530 +epoch: 1, batch: 23525, sum loss: 4936.551270, avg loss: 2.592727, ppl: 13.366168 +epoch: 1, batch: 23526, sum loss: 4203.157715, avg loss: 2.569167, ppl: 13.054950 +epoch: 1, batch: 23527, sum loss: 4311.848633, avg loss: 2.772893, ppl: 16.004871 +epoch: 1, batch: 23528, sum loss: 4510.977539, avg loss: 2.588054, ppl: 13.303853 +epoch: 1, batch: 23529, sum loss: 4765.976562, avg loss: 2.667027, ppl: 14.397099 +epoch: 1, batch: 23530, sum loss: 4282.208984, avg loss: 2.494006, ppl: 12.109695 +epoch: 1, batch: 23531, sum loss: 5350.455078, avg loss: 2.704982, ppl: 14.954048 +epoch: 1, batch: 23532, sum loss: 5171.109375, avg loss: 2.818043, ppl: 16.744059 +epoch: 1, batch: 23533, sum loss: 4168.993652, avg loss: 2.679302, ppl: 14.574913 +epoch: 1, batch: 23534, sum loss: 4492.798340, avg loss: 2.606032, ppl: 13.545192 +epoch: 1, batch: 23535, sum loss: 4241.174316, avg loss: 2.713483, ppl: 15.081718 +epoch: 1, batch: 23536, sum loss: 3944.537842, avg loss: 2.430399, ppl: 11.363418 +epoch: 1, batch: 23537, sum loss: 3886.093262, avg loss: 2.456443, ppl: 11.663252 +epoch: 1, batch: 23538, sum loss: 4259.047363, avg loss: 2.359583, ppl: 10.586535 +epoch: 1, batch: 23539, sum loss: 4030.924561, avg loss: 2.696270, ppl: 14.824341 +epoch: 1, batch: 23540, sum loss: 3664.166992, avg loss: 2.363979, ppl: 10.633175 +epoch: 1, batch: 23541, sum loss: 4869.617676, avg loss: 2.846066, ppl: 17.219896 +epoch: 1, batch: 23542, sum loss: 4802.872070, avg loss: 3.039793, ppl: 20.900908 +epoch: 1, batch: 23543, sum loss: 4409.438965, avg loss: 2.693610, ppl: 14.784949 +epoch: 1, batch: 23544, sum loss: 4437.390625, avg loss: 2.735753, ppl: 15.421345 +epoch: 1, batch: 23545, sum loss: 3935.555664, avg loss: 2.570579, ppl: 13.073386 +epoch: 1, batch: 23546, sum loss: 4307.734863, avg loss: 2.702469, ppl: 14.916510 +epoch: 1, batch: 23547, sum loss: 5146.785156, avg loss: 2.883353, ppl: 17.874109 +epoch: 1, batch: 23548, sum loss: 4885.806641, avg loss: 2.715846, ppl: 15.117392 +epoch: 1, batch: 23549, sum loss: 4721.680664, avg loss: 2.732454, ppl: 15.370561 +epoch: 1, batch: 23550, sum loss: 5065.030273, avg loss: 2.681329, ppl: 14.604490 +epoch: 1, batch: 23551, sum loss: 4381.907715, avg loss: 2.472860, ppl: 11.856306 +epoch: 1, batch: 23552, sum loss: 4830.838867, avg loss: 2.721599, ppl: 15.204617 +epoch: 1, batch: 23553, sum loss: 4679.422852, avg loss: 2.884971, ppl: 17.903048 +epoch: 1, batch: 23554, sum loss: 4788.244629, avg loss: 2.667546, ppl: 14.404574 +epoch: 1, batch: 23555, sum loss: 4844.565430, avg loss: 2.821529, ppl: 16.802525 +epoch: 1, batch: 23556, sum loss: 4595.452148, avg loss: 2.803814, ppl: 16.507494 +epoch: 1, batch: 23557, sum loss: 4364.792480, avg loss: 2.759034, ppl: 15.784594 +epoch: 1, batch: 23558, sum loss: 4533.727539, avg loss: 2.833580, ppl: 17.006226 +epoch: 1, batch: 23559, sum loss: 4689.859863, avg loss: 2.721915, ppl: 15.209424 +epoch: 1, batch: 23560, sum loss: 5185.068359, avg loss: 2.898305, ppl: 18.143373 +epoch: 1, batch: 23561, sum loss: 4528.393066, avg loss: 2.705133, ppl: 14.956312 +epoch: 1, batch: 23562, sum loss: 3410.736328, avg loss: 2.284485, ppl: 9.820628 +epoch: 1, batch: 23563, sum loss: 3726.271484, avg loss: 2.454724, ppl: 11.643223 +epoch: 1, batch: 23564, sum loss: 4038.751221, avg loss: 2.486916, ppl: 12.024135 +epoch: 1, batch: 23565, sum loss: 5886.885742, avg loss: 2.940502, ppl: 18.925352 +epoch: 1, batch: 23566, sum loss: 4982.142578, avg loss: 2.681454, ppl: 14.606321 +epoch: 1, batch: 23567, sum loss: 4101.954102, avg loss: 2.682769, ppl: 14.625540 +epoch: 1, batch: 23568, sum loss: 5143.957031, avg loss: 2.643349, ppl: 14.060211 +epoch: 1, batch: 23569, sum loss: 4369.445801, avg loss: 2.633783, ppl: 13.926352 +epoch: 1, batch: 23570, sum loss: 3086.894775, avg loss: 2.238502, ppl: 9.379273 +epoch: 1, batch: 23571, sum loss: 3988.516602, avg loss: 2.641402, ppl: 14.032861 +epoch: 1, batch: 23572, sum loss: 4234.958008, avg loss: 2.612559, ppl: 13.633893 +epoch: 1, batch: 23573, sum loss: 3995.717529, avg loss: 2.498885, ppl: 12.168920 +epoch: 1, batch: 23574, sum loss: 4517.822754, avg loss: 2.716670, ppl: 15.129860 +epoch: 1, batch: 23575, sum loss: 4645.998535, avg loss: 2.538797, ppl: 12.664429 +epoch: 1, batch: 23576, sum loss: 3954.484131, avg loss: 2.668343, ppl: 14.416063 +epoch: 1, batch: 23577, sum loss: 3908.760742, avg loss: 2.651805, ppl: 14.179612 +epoch: 1, batch: 23578, sum loss: 3677.823242, avg loss: 2.222250, ppl: 9.228068 +epoch: 1, batch: 23579, sum loss: 4641.628906, avg loss: 2.526744, ppl: 12.512700 +epoch: 1, batch: 23580, sum loss: 4841.951172, avg loss: 2.793971, ppl: 16.345793 +epoch: 1, batch: 23581, sum loss: 4161.476562, avg loss: 2.635514, ppl: 13.950481 +epoch: 1, batch: 23582, sum loss: 4353.429199, avg loss: 2.594415, ppl: 13.388759 +epoch: 1, batch: 23583, sum loss: 4200.923340, avg loss: 2.743908, ppl: 15.547629 +epoch: 1, batch: 23584, sum loss: 3974.592041, avg loss: 2.301443, ppl: 9.988586 +epoch: 1, batch: 23585, sum loss: 4260.492676, avg loss: 2.610596, ppl: 13.607161 +epoch: 1, batch: 23586, sum loss: 4423.498535, avg loss: 3.005094, ppl: 20.188114 +epoch: 1, batch: 23587, sum loss: 4309.442871, avg loss: 2.611784, ppl: 13.623326 +epoch: 1, batch: 23588, sum loss: 4391.593750, avg loss: 2.651929, ppl: 14.181363 +epoch: 1, batch: 23589, sum loss: 3701.365234, avg loss: 2.584752, ppl: 13.260004 +epoch: 1, batch: 23590, sum loss: 4649.282715, avg loss: 2.521303, ppl: 12.444801 +epoch: 1, batch: 23591, sum loss: 4614.660645, avg loss: 2.604210, ppl: 13.520545 +epoch: 1, batch: 23592, sum loss: 4890.722168, avg loss: 2.770947, ppl: 15.973762 +epoch: 1, batch: 23593, sum loss: 4984.630859, avg loss: 2.786267, ppl: 16.220348 +epoch: 1, batch: 23594, sum loss: 4169.028809, avg loss: 2.442313, ppl: 11.499614 +epoch: 1, batch: 23595, sum loss: 4057.411377, avg loss: 2.543832, ppl: 12.728348 +epoch: 1, batch: 23596, sum loss: 4962.407715, avg loss: 2.589983, ppl: 13.329548 +epoch: 1, batch: 23597, sum loss: 4336.343750, avg loss: 2.739320, ppl: 15.476459 +epoch: 1, batch: 23598, sum loss: 2874.990967, avg loss: 2.161647, ppl: 8.685433 +epoch: 1, batch: 23599, sum loss: 4247.048828, avg loss: 2.628124, ppl: 13.847771 +epoch: 1, batch: 23600, sum loss: 4189.124023, avg loss: 2.458406, ppl: 11.686172 +epoch: 1, batch: 23601, sum loss: 3942.614746, avg loss: 2.222443, ppl: 9.229855 +epoch: 1, batch: 23602, sum loss: 4460.692871, avg loss: 2.538812, ppl: 12.664616 +epoch: 1, batch: 23603, sum loss: 4700.416016, avg loss: 2.434187, ppl: 11.406547 +epoch: 1, batch: 23604, sum loss: 5142.809082, avg loss: 2.835066, ppl: 17.031521 +epoch: 1, batch: 23605, sum loss: 4881.888184, avg loss: 2.838307, ppl: 17.086815 +epoch: 1, batch: 23606, sum loss: 4633.339355, avg loss: 2.541601, ppl: 12.699993 +epoch: 1, batch: 23607, sum loss: 4215.616211, avg loss: 2.626552, ppl: 13.826017 +epoch: 1, batch: 23608, sum loss: 4960.797363, avg loss: 2.916400, ppl: 18.474667 +epoch: 1, batch: 23609, sum loss: 5031.118652, avg loss: 2.787323, ppl: 16.237497 +epoch: 1, batch: 23610, sum loss: 3851.597656, avg loss: 2.332888, ppl: 10.307666 +epoch: 1, batch: 23611, sum loss: 4385.519531, avg loss: 2.675729, ppl: 14.522930 +epoch: 1, batch: 23612, sum loss: 4009.250488, avg loss: 2.488672, ppl: 12.045270 +epoch: 1, batch: 23613, sum loss: 4875.314453, avg loss: 2.672870, ppl: 14.481467 +epoch: 1, batch: 23614, sum loss: 4377.285645, avg loss: 2.501306, ppl: 12.198416 +epoch: 1, batch: 23615, sum loss: 4721.867676, avg loss: 2.758100, ppl: 15.769856 +epoch: 1, batch: 23616, sum loss: 5204.907227, avg loss: 2.805880, ppl: 16.541624 +epoch: 1, batch: 23617, sum loss: 4225.375000, avg loss: 2.622828, ppl: 13.774616 +epoch: 1, batch: 23618, sum loss: 4163.647461, avg loss: 2.693174, ppl: 14.778514 +epoch: 1, batch: 23619, sum loss: 4843.284668, avg loss: 2.692209, ppl: 14.764257 +epoch: 1, batch: 23620, sum loss: 4390.737793, avg loss: 2.581269, ppl: 13.213890 +epoch: 1, batch: 23621, sum loss: 4877.929688, avg loss: 2.568683, ppl: 13.048634 +epoch: 1, batch: 23622, sum loss: 3854.119385, avg loss: 2.509192, ppl: 12.294997 +epoch: 1, batch: 23623, sum loss: 4693.453613, avg loss: 2.865356, ppl: 17.555305 +epoch: 1, batch: 23624, sum loss: 4797.526367, avg loss: 2.596064, ppl: 13.410851 +epoch: 1, batch: 23625, sum loss: 4540.437500, avg loss: 2.678724, ppl: 14.566499 +epoch: 1, batch: 23626, sum loss: 3597.402832, avg loss: 2.420863, ppl: 11.255570 +epoch: 1, batch: 23627, sum loss: 4027.940186, avg loss: 2.531703, ppl: 12.574909 +epoch: 1, batch: 23628, sum loss: 4713.824219, avg loss: 2.740596, ppl: 15.496212 +epoch: 1, batch: 23629, sum loss: 4542.108398, avg loss: 2.627015, ppl: 13.832414 +epoch: 1, batch: 23630, sum loss: 3465.468262, avg loss: 2.393279, ppl: 10.949342 +epoch: 1, batch: 23631, sum loss: 3744.652588, avg loss: 2.452294, ppl: 11.614960 +epoch: 1, batch: 23632, sum loss: 5475.587891, avg loss: 2.857823, ppl: 17.423548 +epoch: 1, batch: 23633, sum loss: 4974.733398, avg loss: 2.727376, ppl: 15.292703 +epoch: 1, batch: 23634, sum loss: 3934.646484, avg loss: 2.506144, ppl: 12.257577 +epoch: 1, batch: 23635, sum loss: 4723.061523, avg loss: 2.672927, ppl: 14.482292 +epoch: 1, batch: 23636, sum loss: 4186.469727, avg loss: 2.388175, ppl: 10.893590 +epoch: 1, batch: 23637, sum loss: 3952.522461, avg loss: 2.490562, ppl: 12.068062 +epoch: 1, batch: 23638, sum loss: 4617.186035, avg loss: 2.860710, ppl: 17.473932 +epoch: 1, batch: 23639, sum loss: 3301.075195, avg loss: 2.091936, ppl: 8.100584 +epoch: 1, batch: 23640, sum loss: 5583.224609, avg loss: 2.834124, ppl: 17.015493 +epoch: 1, batch: 23641, sum loss: 3787.807617, avg loss: 2.434324, ppl: 11.408099 +epoch: 1, batch: 23642, sum loss: 4670.484863, avg loss: 2.806782, ppl: 16.556549 +epoch: 1, batch: 23643, sum loss: 4820.907227, avg loss: 2.654685, ppl: 14.220499 +epoch: 1, batch: 23644, sum loss: 4592.470703, avg loss: 2.740138, ppl: 15.489120 +epoch: 1, batch: 23645, sum loss: 3838.132080, avg loss: 2.646988, ppl: 14.111466 +epoch: 1, batch: 23646, sum loss: 4856.393555, avg loss: 2.865129, ppl: 17.551317 +epoch: 1, batch: 23647, sum loss: 4154.473633, avg loss: 2.351145, ppl: 10.497585 +epoch: 1, batch: 23648, sum loss: 4737.576660, avg loss: 2.770513, ppl: 15.966816 +epoch: 1, batch: 23649, sum loss: 3359.897949, avg loss: 2.425919, ppl: 11.312621 +epoch: 1, batch: 23650, sum loss: 5331.538086, avg loss: 2.932639, ppl: 18.777121 +epoch: 1, batch: 23651, sum loss: 3976.956299, avg loss: 2.476311, ppl: 11.897300 +epoch: 1, batch: 23652, sum loss: 4345.855469, avg loss: 2.409011, ppl: 11.122953 +epoch: 1, batch: 23653, sum loss: 4374.203125, avg loss: 2.431464, ppl: 11.375521 +epoch: 1, batch: 23654, sum loss: 4166.866211, avg loss: 2.529973, ppl: 12.553170 +epoch: 1, batch: 23655, sum loss: 4360.389648, avg loss: 2.824087, ppl: 16.845552 +epoch: 1, batch: 23656, sum loss: 3763.920898, avg loss: 2.481161, ppl: 11.955132 +epoch: 1, batch: 23657, sum loss: 3696.639160, avg loss: 2.646127, ppl: 14.099322 +epoch: 1, batch: 23658, sum loss: 4341.395996, avg loss: 2.612152, ppl: 13.628343 +epoch: 1, batch: 23659, sum loss: 5317.812012, avg loss: 2.854435, ppl: 17.364618 +epoch: 1, batch: 23660, sum loss: 3911.998291, avg loss: 2.455743, ppl: 11.655088 +epoch: 1, batch: 23661, sum loss: 4398.707520, avg loss: 2.391902, ppl: 10.934271 +epoch: 1, batch: 23662, sum loss: 4786.299316, avg loss: 2.698027, ppl: 14.850398 +epoch: 1, batch: 23663, sum loss: 4436.945312, avg loss: 2.591674, ppl: 13.352099 +epoch: 1, batch: 23664, sum loss: 4474.887207, avg loss: 2.615364, ppl: 13.672190 +epoch: 1, batch: 23665, sum loss: 4749.111816, avg loss: 2.753109, ppl: 15.691333 +epoch: 1, batch: 23666, sum loss: 4224.750977, avg loss: 2.775789, ppl: 16.051291 +epoch: 1, batch: 23667, sum loss: 5095.956543, avg loss: 2.613311, ppl: 13.644153 +epoch: 1, batch: 23668, sum loss: 5041.086914, avg loss: 2.864254, ppl: 17.535963 +epoch: 1, batch: 23669, sum loss: 4245.641602, avg loss: 2.500378, ppl: 12.187099 +epoch: 1, batch: 23670, sum loss: 5341.115723, avg loss: 2.729236, ppl: 15.321183 +epoch: 1, batch: 23671, sum loss: 3669.890381, avg loss: 2.372263, ppl: 10.721625 +epoch: 1, batch: 23672, sum loss: 3949.592773, avg loss: 2.328769, ppl: 10.265302 +epoch: 1, batch: 23673, sum loss: 4811.237305, avg loss: 2.620500, ppl: 13.742588 +epoch: 1, batch: 23674, sum loss: 3452.792725, avg loss: 2.331393, ppl: 10.292266 +epoch: 1, batch: 23675, sum loss: 3198.224854, avg loss: 2.708065, ppl: 15.000222 +epoch: 1, batch: 23676, sum loss: 4194.130859, avg loss: 2.585777, ppl: 13.273602 +epoch: 1, batch: 23677, sum loss: 4135.274902, avg loss: 2.454169, ppl: 11.636760 +epoch: 1, batch: 23678, sum loss: 3933.127441, avg loss: 2.550666, ppl: 12.815640 +epoch: 1, batch: 23679, sum loss: 3794.720215, avg loss: 2.562269, ppl: 12.965201 +epoch: 1, batch: 23680, sum loss: 4320.431152, avg loss: 2.577823, ppl: 13.168438 +epoch: 1, batch: 23681, sum loss: 4129.195312, avg loss: 2.707669, ppl: 14.994284 +epoch: 1, batch: 23682, sum loss: 4260.395020, avg loss: 2.623396, ppl: 13.782449 +epoch: 1, batch: 23683, sum loss: 4553.916504, avg loss: 2.792101, ppl: 16.315260 +epoch: 1, batch: 23684, sum loss: 3627.946045, avg loss: 2.481495, ppl: 11.959134 +epoch: 1, batch: 23685, sum loss: 4212.527344, avg loss: 2.582788, ppl: 13.233983 +epoch: 1, batch: 23686, sum loss: 5297.562012, avg loss: 2.855829, ppl: 17.388838 +epoch: 1, batch: 23687, sum loss: 4253.980469, avg loss: 2.630786, ppl: 13.884675 +epoch: 1, batch: 23688, sum loss: 3930.136719, avg loss: 2.484283, ppl: 11.992524 +epoch: 1, batch: 23689, sum loss: 4784.806641, avg loss: 2.710939, ppl: 15.043390 +epoch: 1, batch: 23690, sum loss: 3940.976562, avg loss: 2.510176, ppl: 12.307098 +epoch: 1, batch: 23691, sum loss: 4045.413574, avg loss: 2.515804, ppl: 12.376559 +epoch: 1, batch: 23692, sum loss: 5177.300781, avg loss: 2.592539, ppl: 13.363663 +epoch: 1, batch: 23693, sum loss: 3755.958496, avg loss: 2.661912, ppl: 14.323656 +epoch: 1, batch: 23694, sum loss: 5792.895508, avg loss: 2.867770, ppl: 17.597734 +epoch: 1, batch: 23695, sum loss: 4470.149902, avg loss: 2.464250, ppl: 11.754666 +epoch: 1, batch: 23696, sum loss: 4673.543945, avg loss: 2.548279, ppl: 12.785082 +epoch: 1, batch: 23697, sum loss: 4592.483398, avg loss: 2.462458, ppl: 11.733613 +epoch: 1, batch: 23698, sum loss: 4349.414062, avg loss: 2.549481, ppl: 12.800454 +epoch: 1, batch: 23699, sum loss: 3181.206299, avg loss: 2.083305, ppl: 8.030965 +epoch: 1, batch: 23700, sum loss: 5002.751953, avg loss: 2.730760, ppl: 15.344543 +epoch: 1, batch: 23701, sum loss: 4183.060547, avg loss: 2.467882, ppl: 11.797438 +epoch: 1, batch: 23702, sum loss: 4265.564453, avg loss: 2.708295, ppl: 15.003671 +epoch: 1, batch: 23703, sum loss: 3795.933105, avg loss: 2.517197, ppl: 12.393806 +epoch: 1, batch: 23704, sum loss: 4230.685547, avg loss: 2.402434, ppl: 11.050035 +epoch: 1, batch: 23705, sum loss: 5225.155273, avg loss: 2.780817, ppl: 16.132200 +epoch: 1, batch: 23706, sum loss: 3466.075928, avg loss: 2.295414, ppl: 9.928550 +epoch: 1, batch: 23707, sum loss: 4552.146484, avg loss: 2.640456, ppl: 14.019601 +epoch: 1, batch: 23708, sum loss: 4394.137207, avg loss: 2.498088, ppl: 12.159227 +epoch: 1, batch: 23709, sum loss: 4614.875000, avg loss: 2.737174, ppl: 15.443277 +epoch: 1, batch: 23710, sum loss: 4668.461914, avg loss: 2.792142, ppl: 16.315937 +epoch: 1, batch: 23711, sum loss: 4840.520508, avg loss: 2.753425, ppl: 15.696294 +epoch: 1, batch: 23712, sum loss: 4438.345215, avg loss: 2.565517, ppl: 13.007387 +epoch: 1, batch: 23713, sum loss: 4625.531738, avg loss: 2.622184, ppl: 13.765749 +epoch: 1, batch: 23714, sum loss: 3878.226807, avg loss: 2.538107, ppl: 12.655687 +epoch: 1, batch: 23715, sum loss: 5435.301758, avg loss: 2.808941, ppl: 16.592344 +epoch: 1, batch: 23716, sum loss: 4522.380859, avg loss: 2.653979, ppl: 14.210474 +epoch: 1, batch: 23717, sum loss: 5443.010254, avg loss: 2.966218, ppl: 19.418344 +epoch: 1, batch: 23718, sum loss: 4077.347656, avg loss: 2.373311, ppl: 10.732865 +epoch: 1, batch: 23719, sum loss: 4302.905273, avg loss: 2.830859, ppl: 16.960022 +epoch: 1, batch: 23720, sum loss: 3902.204102, avg loss: 2.654561, ppl: 14.218737 +epoch: 1, batch: 23721, sum loss: 3651.706299, avg loss: 2.405604, ppl: 11.085120 +epoch: 1, batch: 23722, sum loss: 3943.944824, avg loss: 2.531415, ppl: 12.571284 +epoch: 1, batch: 23723, sum loss: 4166.194824, avg loss: 2.573314, ppl: 13.109193 +epoch: 1, batch: 23724, sum loss: 5355.822266, avg loss: 2.675236, ppl: 14.515772 +epoch: 1, batch: 23725, sum loss: 4457.916992, avg loss: 2.510088, ppl: 12.306019 +epoch: 1, batch: 23726, sum loss: 4400.416504, avg loss: 2.785074, ppl: 16.201014 +epoch: 1, batch: 23727, sum loss: 4707.930176, avg loss: 2.697954, ppl: 14.849321 +epoch: 1, batch: 23728, sum loss: 4475.868652, avg loss: 2.762882, ppl: 15.845444 +epoch: 1, batch: 23729, sum loss: 4757.388184, avg loss: 2.634213, ppl: 13.932340 +epoch: 1, batch: 23730, sum loss: 4619.815430, avg loss: 2.607119, ppl: 13.559933 +epoch: 1, batch: 23731, sum loss: 5246.963867, avg loss: 2.857823, ppl: 17.423561 +epoch: 1, batch: 23732, sum loss: 4122.062012, avg loss: 2.276125, ppl: 9.738869 +epoch: 1, batch: 23733, sum loss: 3723.691650, avg loss: 2.408597, ppl: 11.118357 +epoch: 1, batch: 23734, sum loss: 4609.717285, avg loss: 2.666118, ppl: 14.384017 +epoch: 1, batch: 23735, sum loss: 3912.986084, avg loss: 2.447146, ppl: 11.555317 +epoch: 1, batch: 23736, sum loss: 4209.835938, avg loss: 2.656048, ppl: 14.239899 +epoch: 1, batch: 23737, sum loss: 4658.226074, avg loss: 2.735306, ppl: 15.414460 +epoch: 1, batch: 23738, sum loss: 3348.192627, avg loss: 2.274587, ppl: 9.723906 +epoch: 1, batch: 23739, sum loss: 4282.320801, avg loss: 2.520495, ppl: 12.434750 +epoch: 1, batch: 23740, sum loss: 4540.710449, avg loss: 2.620144, ppl: 13.737707 +epoch: 1, batch: 23741, sum loss: 4981.347656, avg loss: 2.663822, ppl: 14.351040 +epoch: 1, batch: 23742, sum loss: 4464.778320, avg loss: 2.435776, ppl: 11.424686 +epoch: 1, batch: 23743, sum loss: 3865.794678, avg loss: 2.407095, ppl: 11.101664 +epoch: 1, batch: 23744, sum loss: 4566.521484, avg loss: 2.596090, ppl: 13.411192 +epoch: 1, batch: 23745, sum loss: 4918.368164, avg loss: 2.786611, ppl: 16.225935 +epoch: 1, batch: 23746, sum loss: 5340.594727, avg loss: 2.975262, ppl: 19.594749 +epoch: 1, batch: 23747, sum loss: 4049.533936, avg loss: 2.502802, ppl: 12.216679 +epoch: 1, batch: 23748, sum loss: 4253.545410, avg loss: 2.454440, ppl: 11.639917 +epoch: 1, batch: 23749, sum loss: 3957.585449, avg loss: 2.718122, ppl: 15.151840 +epoch: 1, batch: 23750, sum loss: 3829.338379, avg loss: 2.493059, ppl: 12.098228 +epoch: 1, batch: 23751, sum loss: 5014.499023, avg loss: 2.837860, ppl: 17.079184 +epoch: 1, batch: 23752, sum loss: 4060.954102, avg loss: 2.583304, ppl: 13.240816 +epoch: 1, batch: 23753, sum loss: 4574.118164, avg loss: 2.668680, ppl: 14.420927 +epoch: 1, batch: 23754, sum loss: 5666.586914, avg loss: 2.741455, ppl: 15.509533 +epoch: 1, batch: 23755, sum loss: 3475.732422, avg loss: 2.533333, ppl: 12.595414 +epoch: 1, batch: 23756, sum loss: 4869.963867, avg loss: 2.759186, ppl: 15.786991 +epoch: 1, batch: 23757, sum loss: 3657.257080, avg loss: 2.482863, ppl: 11.975501 +epoch: 1, batch: 23758, sum loss: 4133.023438, avg loss: 2.625809, ppl: 13.815746 +epoch: 1, batch: 23759, sum loss: 4283.268066, avg loss: 2.797693, ppl: 16.406757 +epoch: 1, batch: 23760, sum loss: 4496.944336, avg loss: 2.550734, ppl: 12.816508 +epoch: 1, batch: 23761, sum loss: 4872.578613, avg loss: 2.849461, ppl: 17.278467 +epoch: 1, batch: 23762, sum loss: 5278.171387, avg loss: 2.868571, ppl: 17.611837 +epoch: 1, batch: 23763, sum loss: 4028.564697, avg loss: 2.662634, ppl: 14.333990 +epoch: 1, batch: 23764, sum loss: 5522.939941, avg loss: 2.819265, ppl: 16.764523 +epoch: 1, batch: 23765, sum loss: 4627.290039, avg loss: 2.400047, ppl: 11.023692 +epoch: 1, batch: 23766, sum loss: 4015.337891, avg loss: 2.542963, ppl: 12.717291 +epoch: 1, batch: 23767, sum loss: 3589.458008, avg loss: 2.460218, ppl: 11.707363 +epoch: 1, batch: 23768, sum loss: 3960.459961, avg loss: 2.524194, ppl: 12.480828 +epoch: 1, batch: 23769, sum loss: 3812.203369, avg loss: 2.343087, ppl: 10.413338 +epoch: 1, batch: 23770, sum loss: 4096.645508, avg loss: 2.391504, ppl: 10.929915 +epoch: 1, batch: 23771, sum loss: 4949.292969, avg loss: 2.731398, ppl: 15.354336 +epoch: 1, batch: 23772, sum loss: 3462.231201, avg loss: 2.302015, ppl: 9.994304 +epoch: 1, batch: 23773, sum loss: 3963.244141, avg loss: 2.735158, ppl: 15.412182 +epoch: 1, batch: 23774, sum loss: 5000.786133, avg loss: 2.785953, ppl: 16.215269 +epoch: 1, batch: 23775, sum loss: 4516.040527, avg loss: 2.767182, ppl: 15.913724 +epoch: 1, batch: 23776, sum loss: 4647.490234, avg loss: 2.646635, ppl: 14.106484 +epoch: 1, batch: 23777, sum loss: 4874.083984, avg loss: 2.806036, ppl: 16.544203 +epoch: 1, batch: 23778, sum loss: 4605.360840, avg loss: 2.734775, ppl: 15.406278 +epoch: 1, batch: 23779, sum loss: 5176.407715, avg loss: 2.917930, ppl: 18.502945 +epoch: 1, batch: 23780, sum loss: 4886.738770, avg loss: 2.698365, ppl: 14.855423 +epoch: 1, batch: 23781, sum loss: 4806.744141, avg loss: 2.776860, ppl: 16.068480 +epoch: 1, batch: 23782, sum loss: 4584.333496, avg loss: 2.375302, ppl: 10.754264 +epoch: 1, batch: 23783, sum loss: 4435.310547, avg loss: 2.540270, ppl: 12.683090 +epoch: 1, batch: 23784, sum loss: 5437.462402, avg loss: 2.669348, ppl: 14.430561 +epoch: 1, batch: 23785, sum loss: 4745.896973, avg loss: 2.915170, ppl: 18.451952 +epoch: 1, batch: 23786, sum loss: 3696.359131, avg loss: 2.523112, ppl: 12.467336 +epoch: 1, batch: 23787, sum loss: 4847.100586, avg loss: 2.898984, ppl: 18.155685 +epoch: 1, batch: 23788, sum loss: 3759.600586, avg loss: 2.358595, ppl: 10.576083 +epoch: 1, batch: 23789, sum loss: 3885.538574, avg loss: 2.420896, ppl: 11.255943 +epoch: 1, batch: 23790, sum loss: 4182.754883, avg loss: 2.643966, ppl: 14.068893 +epoch: 1, batch: 23791, sum loss: 3901.883789, avg loss: 2.354788, ppl: 10.535895 +epoch: 1, batch: 23792, sum loss: 4014.625244, avg loss: 2.430161, ppl: 11.360708 +epoch: 1, batch: 23793, sum loss: 4113.238281, avg loss: 2.517282, ppl: 12.394858 +epoch: 1, batch: 23794, sum loss: 5089.093750, avg loss: 2.841482, ppl: 17.141144 +epoch: 1, batch: 23795, sum loss: 4924.129395, avg loss: 2.891444, ppl: 18.019314 +epoch: 1, batch: 23796, sum loss: 4529.179688, avg loss: 2.571936, ppl: 13.091146 +epoch: 1, batch: 23797, sum loss: 4555.140137, avg loss: 2.617897, ppl: 13.706861 +epoch: 1, batch: 23798, sum loss: 4189.820312, avg loss: 2.626847, ppl: 13.830089 +epoch: 1, batch: 23799, sum loss: 4589.057617, avg loss: 2.709007, ppl: 15.014356 +epoch: 1, batch: 23800, sum loss: 4149.826660, avg loss: 2.675581, ppl: 14.520787 +epoch: 1, batch: 23801, sum loss: 4351.728027, avg loss: 2.642215, ppl: 14.044274 +epoch: 1, batch: 23802, sum loss: 4268.170898, avg loss: 2.574289, ppl: 13.121982 +epoch: 1, batch: 23803, sum loss: 4297.048828, avg loss: 2.372749, ppl: 10.726844 +epoch: 1, batch: 23804, sum loss: 3739.546143, avg loss: 2.400222, ppl: 11.025627 +epoch: 1, batch: 23805, sum loss: 3838.817383, avg loss: 2.499230, ppl: 12.173119 +epoch: 1, batch: 23806, sum loss: 4718.156738, avg loss: 2.733579, ppl: 15.387856 +epoch: 1, batch: 23807, sum loss: 4845.783691, avg loss: 2.899930, ppl: 18.172882 +epoch: 1, batch: 23808, sum loss: 5105.039551, avg loss: 2.746121, ppl: 15.582074 +epoch: 1, batch: 23809, sum loss: 4395.841797, avg loss: 2.549792, ppl: 12.804444 +epoch: 1, batch: 23810, sum loss: 5350.007324, avg loss: 2.733780, ppl: 15.390954 +epoch: 1, batch: 23811, sum loss: 5522.373535, avg loss: 3.057793, ppl: 21.280533 +epoch: 1, batch: 23812, sum loss: 4193.202148, avg loss: 2.379797, ppl: 10.802710 +epoch: 1, batch: 23813, sum loss: 3445.812012, avg loss: 2.449049, ppl: 11.577332 +epoch: 1, batch: 23814, sum loss: 4793.093750, avg loss: 2.612040, ppl: 13.626825 +epoch: 1, batch: 23815, sum loss: 4380.732910, avg loss: 2.509011, ppl: 12.292767 +epoch: 1, batch: 23816, sum loss: 3875.427734, avg loss: 2.457469, ppl: 11.675219 +epoch: 1, batch: 23817, sum loss: 3268.251465, avg loss: 2.508251, ppl: 12.283430 +epoch: 1, batch: 23818, sum loss: 3487.072021, avg loss: 2.148535, ppl: 8.572289 +epoch: 1, batch: 23819, sum loss: 5583.159668, avg loss: 2.976098, ppl: 19.611145 +epoch: 1, batch: 23820, sum loss: 4736.046875, avg loss: 2.566963, ppl: 13.026203 +epoch: 1, batch: 23821, sum loss: 4214.926758, avg loss: 2.511875, ppl: 12.328029 +epoch: 1, batch: 23822, sum loss: 5342.859375, avg loss: 2.837419, ppl: 17.071640 +epoch: 1, batch: 23823, sum loss: 4689.958984, avg loss: 2.823575, ppl: 16.836943 +epoch: 1, batch: 23824, sum loss: 4018.954834, avg loss: 2.389390, ppl: 10.906844 +epoch: 1, batch: 23825, sum loss: 4395.676270, avg loss: 2.622719, ppl: 13.773116 +epoch: 1, batch: 23826, sum loss: 4052.326416, avg loss: 2.771769, ppl: 15.986895 +epoch: 1, batch: 23827, sum loss: 5438.813965, avg loss: 2.975281, ppl: 19.595133 +epoch: 1, batch: 23828, sum loss: 4060.818359, avg loss: 2.631768, ppl: 13.898324 +epoch: 1, batch: 23829, sum loss: 3944.355713, avg loss: 2.396328, ppl: 10.982773 +epoch: 1, batch: 23830, sum loss: 5802.350098, avg loss: 2.930480, ppl: 18.736612 +epoch: 1, batch: 23831, sum loss: 5095.400391, avg loss: 2.990258, ppl: 19.890823 +epoch: 1, batch: 23832, sum loss: 4744.698730, avg loss: 2.787719, ppl: 16.243933 +epoch: 1, batch: 23833, sum loss: 3804.371094, avg loss: 2.526143, ppl: 12.505178 +epoch: 1, batch: 23834, sum loss: 3865.645020, avg loss: 2.445063, ppl: 11.531278 +epoch: 1, batch: 23835, sum loss: 3871.643311, avg loss: 2.679338, ppl: 14.575440 +epoch: 1, batch: 23836, sum loss: 4675.306641, avg loss: 2.622158, ppl: 13.765391 +epoch: 1, batch: 23837, sum loss: 3959.936768, avg loss: 2.540049, ppl: 12.680296 +epoch: 1, batch: 23838, sum loss: 3467.356689, avg loss: 2.168453, ppl: 8.744747 +epoch: 1, batch: 23839, sum loss: 4755.735840, avg loss: 2.752162, ppl: 15.676488 +epoch: 1, batch: 23840, sum loss: 4962.377930, avg loss: 2.737109, ppl: 15.442272 +epoch: 1, batch: 23841, sum loss: 4698.627441, avg loss: 2.839050, ppl: 17.099510 +epoch: 1, batch: 23842, sum loss: 3549.102539, avg loss: 2.452732, ppl: 11.620045 +epoch: 1, batch: 23843, sum loss: 4223.697266, avg loss: 2.395744, ppl: 10.976365 +epoch: 1, batch: 23844, sum loss: 4334.885254, avg loss: 2.419021, ppl: 11.234854 +epoch: 1, batch: 23845, sum loss: 4194.615234, avg loss: 2.386015, ppl: 10.870094 +epoch: 1, batch: 23846, sum loss: 4067.886230, avg loss: 2.621061, ppl: 13.750303 +epoch: 1, batch: 23847, sum loss: 3854.034180, avg loss: 2.486474, ppl: 12.018821 +epoch: 1, batch: 23848, sum loss: 5056.617676, avg loss: 2.965758, ppl: 19.409416 +epoch: 1, batch: 23849, sum loss: 4407.364746, avg loss: 2.618755, ppl: 13.718635 +epoch: 1, batch: 23850, sum loss: 3448.938965, avg loss: 2.449531, ppl: 11.582909 +epoch: 1, batch: 23851, sum loss: 4331.498047, avg loss: 2.312599, ppl: 10.100644 +epoch: 1, batch: 23852, sum loss: 3699.741211, avg loss: 2.528873, ppl: 12.539369 +epoch: 1, batch: 23853, sum loss: 4463.789551, avg loss: 2.601276, ppl: 13.480927 +epoch: 1, batch: 23854, sum loss: 4780.872559, avg loss: 2.804031, ppl: 16.511066 +epoch: 1, batch: 23855, sum loss: 4961.176270, avg loss: 3.004952, ppl: 20.185255 +epoch: 1, batch: 23856, sum loss: 3559.837402, avg loss: 2.596526, ppl: 13.417048 +epoch: 1, batch: 23857, sum loss: 3040.673828, avg loss: 2.161104, ppl: 8.680719 +epoch: 1, batch: 23858, sum loss: 3525.875977, avg loss: 2.379134, ppl: 10.795547 +epoch: 1, batch: 23859, sum loss: 4932.643066, avg loss: 2.725217, ppl: 15.259730 +epoch: 1, batch: 23860, sum loss: 4377.642578, avg loss: 2.702249, ppl: 14.913227 +epoch: 1, batch: 23861, sum loss: 4318.960449, avg loss: 2.638339, ppl: 13.989944 +epoch: 1, batch: 23862, sum loss: 5041.347168, avg loss: 3.027836, ppl: 20.652498 +epoch: 1, batch: 23863, sum loss: 3645.745605, avg loss: 2.604104, ppl: 13.519107 +epoch: 1, batch: 23864, sum loss: 3808.747070, avg loss: 2.515685, ppl: 12.375084 +epoch: 1, batch: 23865, sum loss: 4876.269531, avg loss: 2.640103, ppl: 14.014646 +epoch: 1, batch: 23866, sum loss: 4512.673828, avg loss: 2.725044, ppl: 15.257092 +epoch: 1, batch: 23867, sum loss: 4199.447754, avg loss: 2.534368, ppl: 12.608460 +epoch: 1, batch: 23868, sum loss: 5152.875977, avg loss: 2.743810, ppl: 15.546109 +epoch: 1, batch: 23869, sum loss: 5351.828125, avg loss: 2.812311, ppl: 16.648352 +epoch: 1, batch: 23870, sum loss: 5352.387695, avg loss: 2.854607, ppl: 17.367609 +epoch: 1, batch: 23871, sum loss: 4418.257812, avg loss: 2.843152, ppl: 17.169796 +epoch: 1, batch: 23872, sum loss: 4944.382324, avg loss: 2.659700, ppl: 14.292000 +epoch: 1, batch: 23873, sum loss: 4403.883789, avg loss: 2.667404, ppl: 14.402531 +epoch: 1, batch: 23874, sum loss: 5012.023926, avg loss: 3.052390, ppl: 21.165867 +epoch: 1, batch: 23875, sum loss: 4356.338379, avg loss: 2.437794, ppl: 11.447764 +epoch: 1, batch: 23876, sum loss: 4316.456055, avg loss: 2.611286, ppl: 13.616552 +epoch: 1, batch: 23877, sum loss: 4639.744629, avg loss: 2.795027, ppl: 16.363071 +epoch: 1, batch: 23878, sum loss: 4399.062988, avg loss: 2.449367, ppl: 11.581014 +epoch: 1, batch: 23879, sum loss: 4834.194336, avg loss: 2.778273, ppl: 16.091202 +epoch: 1, batch: 23880, sum loss: 3661.762207, avg loss: 2.356346, ppl: 10.552326 +epoch: 1, batch: 23881, sum loss: 3810.995605, avg loss: 2.622846, ppl: 13.774877 +epoch: 1, batch: 23882, sum loss: 4675.402344, avg loss: 2.664047, ppl: 14.354260 +epoch: 1, batch: 23883, sum loss: 5056.822754, avg loss: 2.748273, ppl: 15.615643 +epoch: 1, batch: 23884, sum loss: 4223.559082, avg loss: 2.613589, ppl: 13.647940 +epoch: 1, batch: 23885, sum loss: 3888.140869, avg loss: 2.661288, ppl: 14.314708 +epoch: 1, batch: 23886, sum loss: 4620.227051, avg loss: 2.676841, ppl: 14.539085 +epoch: 1, batch: 23887, sum loss: 4382.812012, avg loss: 2.520306, ppl: 12.432402 +epoch: 1, batch: 23888, sum loss: 3444.721680, avg loss: 2.385541, ppl: 10.864942 +epoch: 1, batch: 23889, sum loss: 4285.250000, avg loss: 2.659994, ppl: 14.296202 +epoch: 1, batch: 23890, sum loss: 4824.945801, avg loss: 2.584331, ppl: 13.254416 +epoch: 1, batch: 23891, sum loss: 4618.248535, avg loss: 2.689720, ppl: 14.727547 +epoch: 1, batch: 23892, sum loss: 3800.236328, avg loss: 2.786097, ppl: 16.217600 +epoch: 1, batch: 23893, sum loss: 5457.545410, avg loss: 2.915356, ppl: 18.455376 +epoch: 1, batch: 23894, sum loss: 6139.741211, avg loss: 2.646440, ppl: 14.103744 +epoch: 1, batch: 23895, sum loss: 4324.524902, avg loss: 2.606706, ppl: 13.554325 +epoch: 1, batch: 23896, sum loss: 5121.037109, avg loss: 2.868928, ppl: 17.618130 +epoch: 1, batch: 23897, sum loss: 4902.496094, avg loss: 2.728156, ppl: 15.304641 +epoch: 1, batch: 23898, sum loss: 3953.666992, avg loss: 2.357583, ppl: 10.565385 +epoch: 1, batch: 23899, sum loss: 4627.771973, avg loss: 2.582462, ppl: 13.229671 +epoch: 1, batch: 23900, sum loss: 5418.146484, avg loss: 2.856166, ppl: 17.394705 +epoch: 1, batch: 23901, sum loss: 4064.868652, avg loss: 2.592391, ppl: 13.361678 +epoch: 1, batch: 23902, sum loss: 4878.408691, avg loss: 2.737603, ppl: 15.449910 +epoch: 1, batch: 23903, sum loss: 4372.612793, avg loss: 2.677656, ppl: 14.550948 +epoch: 1, batch: 23904, sum loss: 4448.410156, avg loss: 2.587790, ppl: 13.300339 +epoch: 1, batch: 23905, sum loss: 3831.809082, avg loss: 2.360942, ppl: 10.600934 +epoch: 1, batch: 23906, sum loss: 4063.029785, avg loss: 2.717746, ppl: 15.146137 +epoch: 1, batch: 23907, sum loss: 5049.543945, avg loss: 2.927272, ppl: 18.676613 +epoch: 1, batch: 23908, sum loss: 4828.364258, avg loss: 2.765386, ppl: 15.885176 +epoch: 1, batch: 23909, sum loss: 3599.798828, avg loss: 2.427376, ppl: 11.329119 +epoch: 1, batch: 23910, sum loss: 5281.438965, avg loss: 2.806291, ppl: 16.548420 +epoch: 1, batch: 23911, sum loss: 3743.256348, avg loss: 2.604911, ppl: 13.530016 +epoch: 1, batch: 23912, sum loss: 5569.777832, avg loss: 2.894895, ppl: 18.081600 +epoch: 1, batch: 23913, sum loss: 4270.079102, avg loss: 2.475408, ppl: 11.886559 +epoch: 1, batch: 23914, sum loss: 4544.084473, avg loss: 2.719380, ppl: 15.170919 +epoch: 1, batch: 23915, sum loss: 4012.673096, avg loss: 2.443772, ppl: 11.516397 +epoch: 1, batch: 23916, sum loss: 4246.521973, avg loss: 2.434932, ppl: 11.415045 +epoch: 1, batch: 23917, sum loss: 4546.819824, avg loss: 2.607121, ppl: 13.559958 +epoch: 1, batch: 23918, sum loss: 5421.861816, avg loss: 2.847617, ppl: 17.246628 +epoch: 1, batch: 23919, sum loss: 4340.380859, avg loss: 2.697564, ppl: 14.843531 +epoch: 1, batch: 23920, sum loss: 4044.096680, avg loss: 2.378881, ppl: 10.792813 +epoch: 1, batch: 23921, sum loss: 3685.845459, avg loss: 2.368795, ppl: 10.684511 +epoch: 1, batch: 23922, sum loss: 4228.322266, avg loss: 2.696634, ppl: 14.829732 +epoch: 1, batch: 23923, sum loss: 4129.029785, avg loss: 2.496391, ppl: 12.138601 +epoch: 1, batch: 23924, sum loss: 4131.578125, avg loss: 2.420374, ppl: 11.250065 +epoch: 1, batch: 23925, sum loss: 4852.546875, avg loss: 2.625837, ppl: 13.816134 +epoch: 1, batch: 23926, sum loss: 4075.850098, avg loss: 2.494400, ppl: 12.114466 +epoch: 1, batch: 23927, sum loss: 3694.701172, avg loss: 2.486340, ppl: 12.017210 +epoch: 1, batch: 23928, sum loss: 5100.974609, avg loss: 2.810454, ppl: 16.617468 +epoch: 1, batch: 23929, sum loss: 4949.695312, avg loss: 2.794859, ppl: 16.360321 +epoch: 1, batch: 23930, sum loss: 3641.358887, avg loss: 2.419508, ppl: 11.240325 +epoch: 1, batch: 23931, sum loss: 3308.753418, avg loss: 2.545195, ppl: 12.745711 +epoch: 1, batch: 23932, sum loss: 4626.740234, avg loss: 2.651427, ppl: 14.174252 +epoch: 1, batch: 23933, sum loss: 4758.522949, avg loss: 2.804080, ppl: 16.511869 +epoch: 1, batch: 23934, sum loss: 3478.690918, avg loss: 2.293138, ppl: 9.905976 +epoch: 1, batch: 23935, sum loss: 4775.258301, avg loss: 2.703997, ppl: 14.939320 +epoch: 1, batch: 23936, sum loss: 3945.385742, avg loss: 2.449029, ppl: 11.577103 +epoch: 1, batch: 23937, sum loss: 4421.467285, avg loss: 2.722578, ppl: 15.219512 +epoch: 1, batch: 23938, sum loss: 4419.760254, avg loss: 2.522694, ppl: 12.462129 +epoch: 1, batch: 23939, sum loss: 5067.020996, avg loss: 2.821281, ppl: 16.798359 +epoch: 1, batch: 23940, sum loss: 4882.774902, avg loss: 2.671102, ppl: 14.455894 +epoch: 1, batch: 23941, sum loss: 4580.921875, avg loss: 2.562037, ppl: 12.962192 +epoch: 1, batch: 23942, sum loss: 4119.691406, avg loss: 2.750128, ppl: 15.644628 +epoch: 1, batch: 23943, sum loss: 6218.069336, avg loss: 2.935821, ppl: 18.836967 +epoch: 1, batch: 23944, sum loss: 4010.919189, avg loss: 2.619804, ppl: 13.733027 +epoch: 1, batch: 23945, sum loss: 4901.486816, avg loss: 2.742858, ppl: 15.531309 +epoch: 1, batch: 23946, sum loss: 4395.525391, avg loss: 2.657512, ppl: 14.260770 +epoch: 1, batch: 23947, sum loss: 4787.150391, avg loss: 2.677377, ppl: 14.546890 +epoch: 1, batch: 23948, sum loss: 4522.993164, avg loss: 2.525401, ppl: 12.495904 +epoch: 1, batch: 23949, sum loss: 4317.013184, avg loss: 2.511352, ppl: 12.321572 +epoch: 1, batch: 23950, sum loss: 4006.140137, avg loss: 2.299736, ppl: 9.971550 +epoch: 1, batch: 23951, sum loss: 4186.150879, avg loss: 2.723586, ppl: 15.234851 +epoch: 1, batch: 23952, sum loss: 4913.274414, avg loss: 2.881686, ppl: 17.844334 +epoch: 1, batch: 23953, sum loss: 3828.720215, avg loss: 2.547385, ppl: 12.773660 +epoch: 1, batch: 23954, sum loss: 3951.740723, avg loss: 2.531544, ppl: 12.572909 +epoch: 1, batch: 23955, sum loss: 4619.702148, avg loss: 2.567928, ppl: 13.038778 +epoch: 1, batch: 23956, sum loss: 5734.875488, avg loss: 2.715377, ppl: 15.110300 +epoch: 1, batch: 23957, sum loss: 4127.223145, avg loss: 2.510476, ppl: 12.310793 +epoch: 1, batch: 23958, sum loss: 3950.178467, avg loss: 2.701900, ppl: 14.908037 +epoch: 1, batch: 23959, sum loss: 4789.668945, avg loss: 2.678786, ppl: 14.567395 +epoch: 1, batch: 23960, sum loss: 4216.870117, avg loss: 2.393229, ppl: 10.948796 +epoch: 1, batch: 23961, sum loss: 4175.311523, avg loss: 2.582135, ppl: 13.225341 +epoch: 1, batch: 23962, sum loss: 5086.981445, avg loss: 2.733467, ppl: 15.386136 +epoch: 1, batch: 23963, sum loss: 4559.324219, avg loss: 2.802289, ppl: 16.482332 +epoch: 1, batch: 23964, sum loss: 3411.444824, avg loss: 2.237013, ppl: 9.365316 +epoch: 1, batch: 23965, sum loss: 4641.531738, avg loss: 2.557318, ppl: 12.901170 +epoch: 1, batch: 23966, sum loss: 5479.531250, avg loss: 2.949156, ppl: 19.089827 +epoch: 1, batch: 23967, sum loss: 4590.304688, avg loss: 2.719375, ppl: 15.170835 +epoch: 1, batch: 23968, sum loss: 4253.566895, avg loss: 2.429222, ppl: 11.350044 +epoch: 1, batch: 23969, sum loss: 4742.420410, avg loss: 2.682365, ppl: 14.619623 +epoch: 1, batch: 23970, sum loss: 4920.098145, avg loss: 2.790753, ppl: 16.293289 +epoch: 1, batch: 23971, sum loss: 5369.311035, avg loss: 2.586373, ppl: 13.281516 +epoch: 1, batch: 23972, sum loss: 4118.369141, avg loss: 2.542203, ppl: 12.707638 +epoch: 1, batch: 23973, sum loss: 4576.781738, avg loss: 2.685905, ppl: 14.671470 +epoch: 1, batch: 23974, sum loss: 4309.545898, avg loss: 2.844585, ppl: 17.194420 +epoch: 1, batch: 23975, sum loss: 5213.479492, avg loss: 2.828801, ppl: 16.925150 +epoch: 1, batch: 23976, sum loss: 4422.974121, avg loss: 2.501682, ppl: 12.203006 +epoch: 1, batch: 23977, sum loss: 4739.178711, avg loss: 2.656490, ppl: 14.246202 +epoch: 1, batch: 23978, sum loss: 5452.053223, avg loss: 2.788774, ppl: 16.261072 +epoch: 1, batch: 23979, sum loss: 4461.117676, avg loss: 2.543396, ppl: 12.722811 +epoch: 1, batch: 23980, sum loss: 3649.205322, avg loss: 2.504602, ppl: 12.238689 +epoch: 1, batch: 23981, sum loss: 4558.988770, avg loss: 2.731569, ppl: 15.356968 +epoch: 1, batch: 23982, sum loss: 4288.137695, avg loss: 2.656839, ppl: 14.251168 +epoch: 1, batch: 23983, sum loss: 4482.984375, avg loss: 2.633951, ppl: 13.928690 +epoch: 1, batch: 23984, sum loss: 4111.200195, avg loss: 2.575940, ppl: 13.143665 +epoch: 1, batch: 23985, sum loss: 3984.161133, avg loss: 2.813673, ppl: 16.671040 +epoch: 1, batch: 23986, sum loss: 4229.032227, avg loss: 2.402859, ppl: 11.054739 +epoch: 1, batch: 23987, sum loss: 4362.666504, avg loss: 2.655305, ppl: 14.229327 +epoch: 1, batch: 23988, sum loss: 4283.105469, avg loss: 2.391460, ppl: 10.929441 +epoch: 1, batch: 23989, sum loss: 4206.743652, avg loss: 2.707042, ppl: 14.984888 +epoch: 1, batch: 23990, sum loss: 4589.270508, avg loss: 2.669733, ppl: 14.436111 +epoch: 1, batch: 23991, sum loss: 4493.276367, avg loss: 2.693811, ppl: 14.787925 +epoch: 1, batch: 23992, sum loss: 4639.673828, avg loss: 2.547871, ppl: 12.779871 +epoch: 1, batch: 23993, sum loss: 4082.869385, avg loss: 2.579197, ppl: 13.186547 +epoch: 1, batch: 23994, sum loss: 4624.148438, avg loss: 2.597836, ppl: 13.434637 +epoch: 1, batch: 23995, sum loss: 4239.639160, avg loss: 2.760182, ppl: 15.802717 +epoch: 1, batch: 23996, sum loss: 4910.091309, avg loss: 2.775631, ppl: 16.048754 +epoch: 1, batch: 23997, sum loss: 4479.038086, avg loss: 2.833041, ppl: 16.997070 +epoch: 1, batch: 23998, sum loss: 4741.065430, avg loss: 2.495298, ppl: 12.125342 +epoch: 1, batch: 23999, sum loss: 5005.413574, avg loss: 2.723294, ppl: 15.230402 +epoch: 1, batch: 24000, sum loss: 4081.836182, avg loss: 2.519652, ppl: 12.424273 +epoch: 1, batch: 24001, sum loss: 3907.403320, avg loss: 2.512800, ppl: 12.339429 +epoch: 1, batch: 24002, sum loss: 4773.563965, avg loss: 2.811286, ppl: 16.631296 +epoch: 1, batch: 24003, sum loss: 4051.725586, avg loss: 2.513477, ppl: 12.347793 +epoch: 1, batch: 24004, sum loss: 4061.739258, avg loss: 2.442417, ppl: 11.500804 +epoch: 1, batch: 24005, sum loss: 4437.150879, avg loss: 2.519677, ppl: 12.424579 +epoch: 1, batch: 24006, sum loss: 4866.522949, avg loss: 2.580341, ppl: 13.201640 +epoch: 1, batch: 24007, sum loss: 4599.964844, avg loss: 2.825531, ppl: 16.869905 +epoch: 1, batch: 24008, sum loss: 3662.493164, avg loss: 2.527601, ppl: 12.523420 +epoch: 1, batch: 24009, sum loss: 4202.455078, avg loss: 2.575034, ppl: 13.131763 +epoch: 1, batch: 24010, sum loss: 4963.013184, avg loss: 2.519296, ppl: 12.419852 +epoch: 1, batch: 24011, sum loss: 5021.099121, avg loss: 2.777157, ppl: 16.073254 +epoch: 1, batch: 24012, sum loss: 4086.590332, avg loss: 2.505574, ppl: 12.250583 +epoch: 1, batch: 24013, sum loss: 5088.576660, avg loss: 2.861967, ppl: 17.495905 +epoch: 1, batch: 24014, sum loss: 3953.748291, avg loss: 2.601150, ppl: 13.479234 +epoch: 1, batch: 24015, sum loss: 4065.282715, avg loss: 2.626152, ppl: 13.820483 +epoch: 1, batch: 24016, sum loss: 4620.232422, avg loss: 2.700311, ppl: 14.884366 +epoch: 1, batch: 24017, sum loss: 5026.793945, avg loss: 2.797326, ppl: 16.400726 +epoch: 1, batch: 24018, sum loss: 4184.357422, avg loss: 2.365380, ppl: 10.648088 +epoch: 1, batch: 24019, sum loss: 4415.679199, avg loss: 2.582268, ppl: 13.227110 +epoch: 1, batch: 24020, sum loss: 4387.455078, avg loss: 2.690040, ppl: 14.732264 +epoch: 1, batch: 24021, sum loss: 4828.694824, avg loss: 2.617179, ppl: 13.697025 +epoch: 1, batch: 24022, sum loss: 3432.334229, avg loss: 2.327006, ppl: 10.247219 +epoch: 1, batch: 24023, sum loss: 5157.501953, avg loss: 2.866872, ppl: 17.581928 +epoch: 1, batch: 24024, sum loss: 4199.342773, avg loss: 2.462958, ppl: 11.739481 +epoch: 1, batch: 24025, sum loss: 3594.369141, avg loss: 2.508283, ppl: 12.283819 +epoch: 1, batch: 24026, sum loss: 4318.074219, avg loss: 2.621781, ppl: 13.760214 +epoch: 1, batch: 24027, sum loss: 3604.415771, avg loss: 2.380724, ppl: 10.812728 +epoch: 1, batch: 24028, sum loss: 4187.620117, avg loss: 2.394294, ppl: 10.960458 +epoch: 1, batch: 24029, sum loss: 4469.229004, avg loss: 2.532141, ppl: 12.580415 +epoch: 1, batch: 24030, sum loss: 4255.101562, avg loss: 2.715445, ppl: 15.111330 +epoch: 1, batch: 24031, sum loss: 3839.060547, avg loss: 2.332358, ppl: 10.302202 +epoch: 1, batch: 24032, sum loss: 3523.536133, avg loss: 2.315070, ppl: 10.125630 +epoch: 1, batch: 24033, sum loss: 4951.181641, avg loss: 2.752185, ppl: 15.676854 +epoch: 1, batch: 24034, sum loss: 3380.197021, avg loss: 2.158491, ppl: 8.658064 +epoch: 1, batch: 24035, sum loss: 4300.582031, avg loss: 2.603258, ppl: 13.507673 +epoch: 1, batch: 24036, sum loss: 4396.906250, avg loss: 2.612541, ppl: 13.633646 +epoch: 1, batch: 24037, sum loss: 4367.948730, avg loss: 2.653675, ppl: 14.206148 +epoch: 1, batch: 24038, sum loss: 4479.174805, avg loss: 2.737882, ppl: 15.454221 +epoch: 1, batch: 24039, sum loss: 4851.857422, avg loss: 2.824131, ppl: 16.846304 +epoch: 1, batch: 24040, sum loss: 3849.444336, avg loss: 2.620452, ppl: 13.741936 +epoch: 1, batch: 24041, sum loss: 4458.246582, avg loss: 2.415085, ppl: 11.190720 +epoch: 1, batch: 24042, sum loss: 3644.806152, avg loss: 2.479460, ppl: 11.934818 +epoch: 1, batch: 24043, sum loss: 5316.366699, avg loss: 2.853659, ppl: 17.351152 +epoch: 1, batch: 24044, sum loss: 4537.232422, avg loss: 2.884445, ppl: 17.893637 +epoch: 1, batch: 24045, sum loss: 4476.397461, avg loss: 2.590508, ppl: 13.336541 +epoch: 1, batch: 24046, sum loss: 4224.673340, avg loss: 2.476362, ppl: 11.897898 +epoch: 1, batch: 24047, sum loss: 4831.113281, avg loss: 2.835160, ppl: 17.033134 +epoch: 1, batch: 24048, sum loss: 4234.829590, avg loss: 2.780584, ppl: 16.128439 +epoch: 1, batch: 24049, sum loss: 5109.561035, avg loss: 2.827648, ppl: 16.905659 +epoch: 1, batch: 24050, sum loss: 4089.082764, avg loss: 2.607833, ppl: 13.569619 +epoch: 1, batch: 24051, sum loss: 4528.728516, avg loss: 2.587845, ppl: 13.301075 +epoch: 1, batch: 24052, sum loss: 4352.640625, avg loss: 2.761828, ppl: 15.828747 +epoch: 1, batch: 24053, sum loss: 3965.562500, avg loss: 2.560079, ppl: 12.936841 +epoch: 1, batch: 24054, sum loss: 4923.298340, avg loss: 2.822992, ppl: 16.827124 +epoch: 1, batch: 24055, sum loss: 4289.784180, avg loss: 2.691207, ppl: 14.749471 +epoch: 1, batch: 24056, sum loss: 5064.014160, avg loss: 2.838573, ppl: 17.091362 +epoch: 1, batch: 24057, sum loss: 4059.499756, avg loss: 2.524565, ppl: 12.485456 +epoch: 1, batch: 24058, sum loss: 5368.623535, avg loss: 2.773049, ppl: 16.007368 +epoch: 1, batch: 24059, sum loss: 3884.793945, avg loss: 2.466536, ppl: 11.781563 +epoch: 1, batch: 24060, sum loss: 4801.928223, avg loss: 2.777287, ppl: 16.075342 +epoch: 1, batch: 24061, sum loss: 4219.683594, avg loss: 2.818760, ppl: 16.756056 +epoch: 1, batch: 24062, sum loss: 4548.754883, avg loss: 2.596321, ppl: 13.414301 +epoch: 1, batch: 24063, sum loss: 4647.449219, avg loss: 2.589108, ppl: 13.317890 +epoch: 1, batch: 24064, sum loss: 4270.795410, avg loss: 2.674261, ppl: 14.501634 +epoch: 1, batch: 24065, sum loss: 4088.786621, avg loss: 2.768305, ppl: 15.931612 +epoch: 1, batch: 24066, sum loss: 5354.740723, avg loss: 2.916525, ppl: 18.476974 +epoch: 1, batch: 24067, sum loss: 4333.222168, avg loss: 2.679791, ppl: 14.582048 +epoch: 1, batch: 24068, sum loss: 5403.299805, avg loss: 3.013553, ppl: 20.359602 +epoch: 1, batch: 24069, sum loss: 4336.064453, avg loss: 2.544639, ppl: 12.738627 +epoch: 1, batch: 24070, sum loss: 4909.865723, avg loss: 2.682987, ppl: 14.628720 +epoch: 1, batch: 24071, sum loss: 4055.770020, avg loss: 2.772228, ppl: 15.994237 +epoch: 1, batch: 24072, sum loss: 3782.091309, avg loss: 2.523076, ppl: 12.466889 +epoch: 1, batch: 24073, sum loss: 4303.471680, avg loss: 2.343939, ppl: 10.422207 +epoch: 1, batch: 24074, sum loss: 4668.247559, avg loss: 2.703096, ppl: 14.925877 +epoch: 1, batch: 24075, sum loss: 5135.660156, avg loss: 2.764080, ppl: 15.864431 +epoch: 1, batch: 24076, sum loss: 3991.621582, avg loss: 2.257704, ppl: 9.561116 +epoch: 1, batch: 24077, sum loss: 4270.422363, avg loss: 2.850749, ppl: 17.300735 +epoch: 1, batch: 24078, sum loss: 4895.228027, avg loss: 2.660450, ppl: 14.302724 +epoch: 1, batch: 24079, sum loss: 4111.115723, avg loss: 2.556664, ppl: 12.892735 +epoch: 1, batch: 24080, sum loss: 4107.613281, avg loss: 2.723881, ppl: 15.239359 +epoch: 1, batch: 24081, sum loss: 4618.009277, avg loss: 2.480134, ppl: 11.942864 +epoch: 1, batch: 24082, sum loss: 3995.978271, avg loss: 2.478895, ppl: 11.928073 +epoch: 1, batch: 24083, sum loss: 5837.036621, avg loss: 2.911240, ppl: 18.379578 +epoch: 1, batch: 24084, sum loss: 4320.050293, avg loss: 2.520449, ppl: 12.434183 +epoch: 1, batch: 24085, sum loss: 5275.464355, avg loss: 2.772183, ppl: 15.993509 +epoch: 1, batch: 24086, sum loss: 3780.579590, avg loss: 2.472583, ppl: 11.853027 +epoch: 1, batch: 24087, sum loss: 4322.487793, avg loss: 2.618103, ppl: 13.709692 +epoch: 1, batch: 24088, sum loss: 4136.422852, avg loss: 2.525289, ppl: 12.494500 +epoch: 1, batch: 24089, sum loss: 4515.499512, avg loss: 2.543943, ppl: 12.729768 +epoch: 1, batch: 24090, sum loss: 4999.463379, avg loss: 2.731947, ppl: 15.362772 +epoch: 1, batch: 24091, sum loss: 4319.439453, avg loss: 2.574159, ppl: 13.120283 +epoch: 1, batch: 24092, sum loss: 4751.801270, avg loss: 2.549250, ppl: 12.797498 +epoch: 1, batch: 24093, sum loss: 4085.341797, avg loss: 2.491062, ppl: 12.074094 +epoch: 1, batch: 24094, sum loss: 4829.170898, avg loss: 2.857497, ppl: 17.417883 +epoch: 1, batch: 24095, sum loss: 4556.513184, avg loss: 2.413407, ppl: 11.171963 +epoch: 1, batch: 24096, sum loss: 4045.952393, avg loss: 2.623834, ppl: 13.788489 +epoch: 1, batch: 24097, sum loss: 4832.006836, avg loss: 2.682958, ppl: 14.628295 +epoch: 1, batch: 24098, sum loss: 3913.193115, avg loss: 2.653012, ppl: 14.196739 +epoch: 1, batch: 24099, sum loss: 4278.714844, avg loss: 2.679220, ppl: 14.573728 +epoch: 1, batch: 24100, sum loss: 4382.539551, avg loss: 2.825622, ppl: 16.871433 +epoch: 1, batch: 24101, sum loss: 5613.630371, avg loss: 2.889156, ppl: 17.978136 +epoch: 1, batch: 24102, sum loss: 5027.152832, avg loss: 2.766732, ppl: 15.906573 +epoch: 1, batch: 24103, sum loss: 4682.982910, avg loss: 2.740189, ppl: 15.489914 +epoch: 1, batch: 24104, sum loss: 4231.663574, avg loss: 2.638194, ppl: 13.987923 +epoch: 1, batch: 24105, sum loss: 3964.052734, avg loss: 2.459090, ppl: 11.694162 +epoch: 1, batch: 24106, sum loss: 3636.224121, avg loss: 2.311649, ppl: 10.091052 +epoch: 1, batch: 24107, sum loss: 5940.098145, avg loss: 2.873778, ppl: 17.703770 +epoch: 1, batch: 24108, sum loss: 5453.199219, avg loss: 3.004518, ppl: 20.176479 +epoch: 1, batch: 24109, sum loss: 4525.581055, avg loss: 2.539608, ppl: 12.674702 +epoch: 1, batch: 24110, sum loss: 4950.861328, avg loss: 2.742859, ppl: 15.531331 +epoch: 1, batch: 24111, sum loss: 4273.823242, avg loss: 2.566861, ppl: 13.024874 +epoch: 1, batch: 24112, sum loss: 5081.059570, avg loss: 2.916796, ppl: 18.481985 +epoch: 1, batch: 24113, sum loss: 5319.678223, avg loss: 2.760601, ppl: 15.809342 +epoch: 1, batch: 24114, sum loss: 4321.471191, avg loss: 2.508109, ppl: 12.281679 +epoch: 1, batch: 24115, sum loss: 3841.483398, avg loss: 2.362536, ppl: 10.617844 +epoch: 1, batch: 24116, sum loss: 5331.715820, avg loss: 2.744064, ppl: 15.550050 +epoch: 1, batch: 24117, sum loss: 4434.697266, avg loss: 2.644423, ppl: 14.075318 +epoch: 1, batch: 24118, sum loss: 5027.393555, avg loss: 2.811741, ppl: 16.638866 +epoch: 1, batch: 24119, sum loss: 4187.686035, avg loss: 2.612406, ppl: 13.631804 +epoch: 1, batch: 24120, sum loss: 4089.420654, avg loss: 2.401304, ppl: 11.037560 +epoch: 1, batch: 24121, sum loss: 5156.629883, avg loss: 2.610952, ppl: 13.612002 +epoch: 1, batch: 24122, sum loss: 4533.185547, avg loss: 2.777687, ppl: 16.081785 +epoch: 1, batch: 24123, sum loss: 4049.403809, avg loss: 2.519853, ppl: 12.426771 +epoch: 1, batch: 24124, sum loss: 4775.354980, avg loss: 2.600956, ppl: 13.476612 +epoch: 1, batch: 24125, sum loss: 5341.310547, avg loss: 2.918749, ppl: 18.518108 +epoch: 1, batch: 24126, sum loss: 5644.998535, avg loss: 3.111907, ppl: 22.463837 +epoch: 1, batch: 24127, sum loss: 4598.434082, avg loss: 2.650394, ppl: 14.159619 +epoch: 1, batch: 24128, sum loss: 3799.490479, avg loss: 2.546576, ppl: 12.763322 +epoch: 1, batch: 24129, sum loss: 4316.555664, avg loss: 2.567850, ppl: 13.037762 +epoch: 1, batch: 24130, sum loss: 3906.453125, avg loss: 2.544920, ppl: 12.742214 +epoch: 1, batch: 24131, sum loss: 4449.315430, avg loss: 2.637413, ppl: 13.976998 +epoch: 1, batch: 24132, sum loss: 3446.538086, avg loss: 2.308465, ppl: 10.058969 +epoch: 1, batch: 24133, sum loss: 4957.118164, avg loss: 2.553899, ppl: 12.857137 +epoch: 1, batch: 24134, sum loss: 4901.598633, avg loss: 2.910688, ppl: 18.369436 +epoch: 1, batch: 24135, sum loss: 5385.007324, avg loss: 2.906102, ppl: 18.285387 +epoch: 1, batch: 24136, sum loss: 4184.028809, avg loss: 2.529643, ppl: 12.549020 +epoch: 1, batch: 24137, sum loss: 5871.067871, avg loss: 2.953253, ppl: 19.168211 +epoch: 1, batch: 24138, sum loss: 4095.190918, avg loss: 2.567518, ppl: 13.033432 +epoch: 1, batch: 24139, sum loss: 4536.845215, avg loss: 2.853362, ppl: 17.345995 +epoch: 1, batch: 24140, sum loss: 4350.868164, avg loss: 2.627336, ppl: 13.836857 +epoch: 1, batch: 24141, sum loss: 4908.274414, avg loss: 2.606625, ppl: 13.553226 +epoch: 1, batch: 24142, sum loss: 4204.374023, avg loss: 2.483387, ppl: 11.981778 +epoch: 1, batch: 24143, sum loss: 4549.225098, avg loss: 2.743803, ppl: 15.545991 +epoch: 1, batch: 24144, sum loss: 4256.887695, avg loss: 2.647318, ppl: 14.116130 +epoch: 1, batch: 24145, sum loss: 3405.255371, avg loss: 2.453354, ppl: 11.627278 +epoch: 1, batch: 24146, sum loss: 4075.256592, avg loss: 2.594053, ppl: 13.383904 +epoch: 1, batch: 24147, sum loss: 5070.698242, avg loss: 2.721792, ppl: 15.207546 +epoch: 1, batch: 24148, sum loss: 4577.408203, avg loss: 2.846647, ppl: 17.229910 +epoch: 1, batch: 24149, sum loss: 4858.719727, avg loss: 2.546499, ppl: 12.762341 +epoch: 1, batch: 24150, sum loss: 4295.582031, avg loss: 2.399766, ppl: 11.020602 +epoch: 1, batch: 24151, sum loss: 4794.999023, avg loss: 2.610234, ppl: 13.602230 +epoch: 1, batch: 24152, sum loss: 4804.534180, avg loss: 2.849664, ppl: 17.281982 +epoch: 1, batch: 24153, sum loss: 4820.377930, avg loss: 2.570868, ppl: 13.077174 +epoch: 1, batch: 24154, sum loss: 4525.408203, avg loss: 2.425192, ppl: 11.304399 +epoch: 1, batch: 24155, sum loss: 5973.583008, avg loss: 2.832424, ppl: 16.986593 +epoch: 1, batch: 24156, sum loss: 4416.911133, avg loss: 2.499667, ppl: 12.178434 +epoch: 1, batch: 24157, sum loss: 4167.580078, avg loss: 2.569408, ppl: 13.058094 +epoch: 1, batch: 24158, sum loss: 4214.065918, avg loss: 2.623951, ppl: 13.790104 +epoch: 1, batch: 24159, sum loss: 5093.898438, avg loss: 2.705204, ppl: 14.957360 +epoch: 1, batch: 24160, sum loss: 4871.528809, avg loss: 2.721524, ppl: 15.203482 +epoch: 1, batch: 24161, sum loss: 5755.232910, avg loss: 3.110937, ppl: 22.442060 +epoch: 1, batch: 24162, sum loss: 4749.239746, avg loss: 2.751587, ppl: 15.667479 +epoch: 1, batch: 24163, sum loss: 5498.389648, avg loss: 2.774162, ppl: 16.025196 +epoch: 1, batch: 24164, sum loss: 4337.020020, avg loss: 2.678827, ppl: 14.567988 +epoch: 1, batch: 24165, sum loss: 4254.500000, avg loss: 2.533949, ppl: 12.603173 +epoch: 1, batch: 24166, sum loss: 5440.321289, avg loss: 2.713377, ppl: 15.080115 +epoch: 1, batch: 24167, sum loss: 5018.739746, avg loss: 2.766670, ppl: 15.905583 +epoch: 1, batch: 24168, sum loss: 5017.195312, avg loss: 2.866969, ppl: 17.583635 +epoch: 1, batch: 24169, sum loss: 4579.775879, avg loss: 2.739100, ppl: 15.473061 +epoch: 1, batch: 24170, sum loss: 3425.995605, avg loss: 2.288574, ppl: 9.860868 +epoch: 1, batch: 24171, sum loss: 4738.838379, avg loss: 2.731319, ppl: 15.353124 +epoch: 1, batch: 24172, sum loss: 4058.659668, avg loss: 2.635493, ppl: 13.950192 +epoch: 1, batch: 24173, sum loss: 4188.552734, avg loss: 2.460959, ppl: 11.716044 +epoch: 1, batch: 24174, sum loss: 5280.132324, avg loss: 2.969703, ppl: 19.486139 +epoch: 1, batch: 24175, sum loss: 4827.728516, avg loss: 2.673161, ppl: 14.485683 +epoch: 1, batch: 24176, sum loss: 4049.668457, avg loss: 2.655520, ppl: 14.232388 +epoch: 1, batch: 24177, sum loss: 4710.515625, avg loss: 2.509598, ppl: 12.299985 +epoch: 1, batch: 24178, sum loss: 4313.687988, avg loss: 2.625495, ppl: 13.811405 +epoch: 1, batch: 24179, sum loss: 5133.341797, avg loss: 3.050114, ppl: 21.117750 +epoch: 1, batch: 24180, sum loss: 5490.554688, avg loss: 2.926735, ppl: 18.666584 +epoch: 1, batch: 24181, sum loss: 5972.693359, avg loss: 2.981874, ppl: 19.724747 +epoch: 1, batch: 24182, sum loss: 4728.984863, avg loss: 2.601202, ppl: 13.479928 +epoch: 1, batch: 24183, sum loss: 4067.323730, avg loss: 2.531004, ppl: 12.566118 +epoch: 1, batch: 24184, sum loss: 5575.201172, avg loss: 2.853225, ppl: 17.343620 +epoch: 1, batch: 24185, sum loss: 4229.122559, avg loss: 2.690282, ppl: 14.735826 +epoch: 1, batch: 24186, sum loss: 4750.333496, avg loss: 2.659761, ppl: 14.292875 +epoch: 1, batch: 24187, sum loss: 3963.716553, avg loss: 2.793317, ppl: 16.335112 +epoch: 1, batch: 24188, sum loss: 4531.732422, avg loss: 2.703898, ppl: 14.937841 +epoch: 1, batch: 24189, sum loss: 4113.736816, avg loss: 2.479648, ppl: 11.937066 +epoch: 1, batch: 24190, sum loss: 4702.274902, avg loss: 2.671747, ppl: 14.465218 +epoch: 1, batch: 24191, sum loss: 4321.932617, avg loss: 2.674463, ppl: 14.504562 +epoch: 1, batch: 24192, sum loss: 4409.603027, avg loss: 2.677355, ppl: 14.546560 +epoch: 1, batch: 24193, sum loss: 4141.100098, avg loss: 2.411823, ppl: 11.154277 +epoch: 1, batch: 24194, sum loss: 4985.226562, avg loss: 2.588384, ppl: 13.308244 +epoch: 1, batch: 24195, sum loss: 5076.421875, avg loss: 2.812422, ppl: 16.650196 +epoch: 1, batch: 24196, sum loss: 4559.187500, avg loss: 2.705749, ppl: 14.965526 +epoch: 1, batch: 24197, sum loss: 4738.581055, avg loss: 2.632545, ppl: 13.909123 +epoch: 1, batch: 24198, sum loss: 4709.373047, avg loss: 2.708093, ppl: 15.000637 +epoch: 1, batch: 24199, sum loss: 4631.670410, avg loss: 2.692832, ppl: 14.773448 +epoch: 1, batch: 24200, sum loss: 6192.087891, avg loss: 3.068428, ppl: 21.508072 +epoch: 1, batch: 24201, sum loss: 4286.279785, avg loss: 2.508063, ppl: 12.281119 +epoch: 1, batch: 24202, sum loss: 4573.346680, avg loss: 2.608869, ppl: 13.583673 +epoch: 1, batch: 24203, sum loss: 4485.638672, avg loss: 2.753615, ppl: 15.699281 +epoch: 1, batch: 24204, sum loss: 4199.938965, avg loss: 2.458981, ppl: 11.692885 +epoch: 1, batch: 24205, sum loss: 2811.782471, avg loss: 2.374816, ppl: 10.749037 +epoch: 1, batch: 24206, sum loss: 5035.770020, avg loss: 2.697252, ppl: 14.838902 +epoch: 1, batch: 24207, sum loss: 4059.724609, avg loss: 2.631059, ppl: 13.888475 +epoch: 1, batch: 24208, sum loss: 3762.707275, avg loss: 2.318366, ppl: 10.159057 +epoch: 1, batch: 24209, sum loss: 4339.569824, avg loss: 2.683717, ppl: 14.639403 +epoch: 1, batch: 24210, sum loss: 4516.915039, avg loss: 3.003268, ppl: 20.151279 +epoch: 1, batch: 24211, sum loss: 4909.812988, avg loss: 2.636849, ppl: 13.969120 +epoch: 1, batch: 24212, sum loss: 4572.936035, avg loss: 2.453292, ppl: 11.626557 +epoch: 1, batch: 24213, sum loss: 4212.547852, avg loss: 2.528540, ppl: 12.535193 +epoch: 1, batch: 24214, sum loss: 4376.803711, avg loss: 2.789550, ppl: 16.273693 +epoch: 1, batch: 24215, sum loss: 4722.419922, avg loss: 2.576334, ppl: 13.148846 +epoch: 1, batch: 24216, sum loss: 4031.950195, avg loss: 2.621554, ppl: 13.757088 +epoch: 1, batch: 24217, sum loss: 3576.368164, avg loss: 2.260662, ppl: 9.589438 +epoch: 1, batch: 24218, sum loss: 4154.704590, avg loss: 2.502834, ppl: 12.217069 +epoch: 1, batch: 24219, sum loss: 5439.715332, avg loss: 3.038947, ppl: 20.883244 +epoch: 1, batch: 24220, sum loss: 4516.645020, avg loss: 2.658414, ppl: 14.273631 +epoch: 1, batch: 24221, sum loss: 3495.280273, avg loss: 2.293491, ppl: 9.909471 +epoch: 1, batch: 24222, sum loss: 5049.463867, avg loss: 2.794390, ppl: 16.352657 +epoch: 1, batch: 24223, sum loss: 4478.112305, avg loss: 2.740583, ppl: 15.496016 +epoch: 1, batch: 24224, sum loss: 4397.641113, avg loss: 2.476149, ppl: 11.895370 +epoch: 1, batch: 24225, sum loss: 4046.062988, avg loss: 2.675968, ppl: 14.526401 +epoch: 1, batch: 24226, sum loss: 4396.182129, avg loss: 2.710346, ppl: 15.034484 +epoch: 1, batch: 24227, sum loss: 3803.219727, avg loss: 2.495551, ppl: 12.128416 +epoch: 1, batch: 24228, sum loss: 3912.072754, avg loss: 2.389782, ppl: 10.911115 +epoch: 1, batch: 24229, sum loss: 3707.038574, avg loss: 2.665017, ppl: 14.368196 +epoch: 1, batch: 24230, sum loss: 4966.477539, avg loss: 2.647376, ppl: 14.116948 +epoch: 1, batch: 24231, sum loss: 5436.686523, avg loss: 2.974117, ppl: 19.572338 +epoch: 1, batch: 24232, sum loss: 4288.449219, avg loss: 2.624510, ppl: 13.797812 +epoch: 1, batch: 24233, sum loss: 4864.827148, avg loss: 2.799095, ppl: 16.429773 +epoch: 1, batch: 24234, sum loss: 4101.059570, avg loss: 2.569586, ppl: 13.060420 +epoch: 1, batch: 24235, sum loss: 4809.590820, avg loss: 2.814272, ppl: 16.681026 +epoch: 1, batch: 24236, sum loss: 3965.497559, avg loss: 2.489327, ppl: 12.053158 +epoch: 1, batch: 24237, sum loss: 4217.885742, avg loss: 2.560951, ppl: 12.948119 +epoch: 1, batch: 24238, sum loss: 4188.658203, avg loss: 2.518736, ppl: 12.412898 +epoch: 1, batch: 24239, sum loss: 5050.783203, avg loss: 2.778208, ppl: 16.090170 +epoch: 1, batch: 24240, sum loss: 5342.758789, avg loss: 3.072317, ppl: 21.591871 +epoch: 1, batch: 24241, sum loss: 3833.466309, avg loss: 2.424710, ppl: 11.298950 +epoch: 1, batch: 24242, sum loss: 5918.901367, avg loss: 2.980313, ppl: 19.693977 +epoch: 1, batch: 24243, sum loss: 5514.683105, avg loss: 2.870736, ppl: 17.649998 +epoch: 1, batch: 24244, sum loss: 4612.289551, avg loss: 2.735640, ppl: 15.419613 +epoch: 1, batch: 24245, sum loss: 4231.838867, avg loss: 2.529491, ppl: 12.547123 +epoch: 1, batch: 24246, sum loss: 4374.993164, avg loss: 2.576557, ppl: 13.151774 +epoch: 1, batch: 24247, sum loss: 4866.443848, avg loss: 2.907075, ppl: 18.303190 +epoch: 1, batch: 24248, sum loss: 4914.230957, avg loss: 2.650610, ppl: 14.162675 +epoch: 1, batch: 24249, sum loss: 5247.908203, avg loss: 2.951580, ppl: 19.136156 +epoch: 1, batch: 24250, sum loss: 4432.540039, avg loss: 2.649456, ppl: 14.146344 +epoch: 1, batch: 24251, sum loss: 4959.034668, avg loss: 2.709855, ppl: 15.027098 +epoch: 1, batch: 24252, sum loss: 4457.819336, avg loss: 2.644021, ppl: 14.069665 +epoch: 1, batch: 24253, sum loss: 4328.490234, avg loss: 2.541685, ppl: 12.701058 +epoch: 1, batch: 24254, sum loss: 4838.375000, avg loss: 2.474872, ppl: 11.880188 +epoch: 1, batch: 24255, sum loss: 4412.998535, avg loss: 2.734200, ppl: 15.397421 +epoch: 1, batch: 24256, sum loss: 4118.431152, avg loss: 2.890127, ppl: 17.995594 +epoch: 1, batch: 24257, sum loss: 4468.463867, avg loss: 2.590414, ppl: 13.335292 +epoch: 1, batch: 24258, sum loss: 4260.829102, avg loss: 2.708728, ppl: 15.010168 +epoch: 1, batch: 24259, sum loss: 5415.030762, avg loss: 2.933386, ppl: 18.791151 +epoch: 1, batch: 24260, sum loss: 4836.943359, avg loss: 2.452811, ppl: 11.620968 +epoch: 1, batch: 24261, sum loss: 4695.977051, avg loss: 2.638189, ppl: 13.987853 +epoch: 1, batch: 24262, sum loss: 4532.075195, avg loss: 2.627290, ppl: 13.836223 +epoch: 1, batch: 24263, sum loss: 4355.046387, avg loss: 2.438436, ppl: 11.455109 +epoch: 1, batch: 24264, sum loss: 5329.868652, avg loss: 2.859372, ppl: 17.450558 +epoch: 1, batch: 24265, sum loss: 4924.344727, avg loss: 2.644654, ppl: 14.078567 +epoch: 1, batch: 24266, sum loss: 3746.663818, avg loss: 2.415644, ppl: 11.196979 +epoch: 1, batch: 24267, sum loss: 5654.290039, avg loss: 3.038307, ppl: 20.869890 +epoch: 1, batch: 24268, sum loss: 5018.870605, avg loss: 2.742552, ppl: 15.526563 +epoch: 1, batch: 24269, sum loss: 4790.011719, avg loss: 2.788133, ppl: 16.250647 +epoch: 1, batch: 24270, sum loss: 4284.498047, avg loss: 2.699747, ppl: 14.875965 +epoch: 1, batch: 24271, sum loss: 4098.177734, avg loss: 2.526620, ppl: 12.511146 +epoch: 1, batch: 24272, sum loss: 4452.143555, avg loss: 2.634404, ppl: 13.935011 +epoch: 1, batch: 24273, sum loss: 4800.291016, avg loss: 2.517195, ppl: 12.393784 +epoch: 1, batch: 24274, sum loss: 4360.468750, avg loss: 2.521960, ppl: 12.452981 +epoch: 1, batch: 24275, sum loss: 4202.526855, avg loss: 2.688757, ppl: 14.713371 +epoch: 1, batch: 24276, sum loss: 4468.399902, avg loss: 2.718005, ppl: 15.150064 +epoch: 1, batch: 24277, sum loss: 4724.184570, avg loss: 2.571685, ppl: 13.087857 +epoch: 1, batch: 24278, sum loss: 3983.626953, avg loss: 2.659297, ppl: 14.286242 +epoch: 1, batch: 24279, sum loss: 4956.381348, avg loss: 2.605879, ppl: 13.543122 +epoch: 1, batch: 24280, sum loss: 4183.532715, avg loss: 2.673184, ppl: 14.486014 +epoch: 1, batch: 24281, sum loss: 4631.799805, avg loss: 2.596300, ppl: 13.414020 +epoch: 1, batch: 24282, sum loss: 3482.782227, avg loss: 2.285290, ppl: 9.828539 +epoch: 1, batch: 24283, sum loss: 4104.593262, avg loss: 2.826855, ppl: 16.892250 +epoch: 1, batch: 24284, sum loss: 4540.400879, avg loss: 2.702620, ppl: 14.918765 +epoch: 1, batch: 24285, sum loss: 3823.360840, avg loss: 2.419849, ppl: 11.244158 +epoch: 1, batch: 24286, sum loss: 4577.138184, avg loss: 2.627519, ppl: 13.839390 +epoch: 1, batch: 24287, sum loss: 3734.018799, avg loss: 2.472860, ppl: 11.856309 +epoch: 1, batch: 24288, sum loss: 4465.367188, avg loss: 2.729442, ppl: 15.324335 +epoch: 1, batch: 24289, sum loss: 5040.585449, avg loss: 2.645977, ppl: 14.097205 +epoch: 1, batch: 24290, sum loss: 4353.810547, avg loss: 2.674331, ppl: 14.502640 +epoch: 1, batch: 24291, sum loss: 5176.096680, avg loss: 2.785843, ppl: 16.213482 +epoch: 1, batch: 24292, sum loss: 5168.950684, avg loss: 2.889296, ppl: 17.980648 +epoch: 1, batch: 24293, sum loss: 5556.273438, avg loss: 2.916679, ppl: 18.479813 +epoch: 1, batch: 24294, sum loss: 5169.557129, avg loss: 2.715104, ppl: 15.106175 +epoch: 1, batch: 24295, sum loss: 4432.050781, avg loss: 2.526825, ppl: 12.513708 +epoch: 1, batch: 24296, sum loss: 4627.507812, avg loss: 2.490585, ppl: 12.068338 +epoch: 1, batch: 24297, sum loss: 4881.669922, avg loss: 2.630210, ppl: 13.876686 +epoch: 1, batch: 24298, sum loss: 5026.448242, avg loss: 2.620672, ppl: 13.744954 +epoch: 1, batch: 24299, sum loss: 4770.949219, avg loss: 2.723145, ppl: 15.228136 +epoch: 1, batch: 24300, sum loss: 4439.159668, avg loss: 2.636081, ppl: 13.958389 +epoch: 1, batch: 24301, sum loss: 4425.473633, avg loss: 2.541915, ppl: 12.703972 +epoch: 1, batch: 24302, sum loss: 4336.488281, avg loss: 2.510995, ppl: 12.317179 +epoch: 1, batch: 24303, sum loss: 5472.668945, avg loss: 2.887952, ppl: 17.956495 +epoch: 1, batch: 24304, sum loss: 3983.030762, avg loss: 2.622140, ppl: 13.765148 +epoch: 1, batch: 24305, sum loss: 4044.244141, avg loss: 2.384578, ppl: 10.854481 +epoch: 1, batch: 24306, sum loss: 3761.659424, avg loss: 2.596038, ppl: 13.410505 +epoch: 1, batch: 24307, sum loss: 4277.578613, avg loss: 2.485519, ppl: 12.007353 +epoch: 1, batch: 24308, sum loss: 4410.809570, avg loss: 2.551076, ppl: 12.820888 +epoch: 1, batch: 24309, sum loss: 5007.127441, avg loss: 2.705093, ppl: 14.955709 +epoch: 1, batch: 24310, sum loss: 3842.821289, avg loss: 2.655716, ppl: 14.235177 +epoch: 1, batch: 24311, sum loss: 4579.116699, avg loss: 2.595871, ppl: 13.408264 +epoch: 1, batch: 24312, sum loss: 4140.564453, avg loss: 2.537111, ppl: 12.643086 +epoch: 1, batch: 24313, sum loss: 3635.431152, avg loss: 2.380767, ppl: 10.813192 +epoch: 1, batch: 24314, sum loss: 3416.150391, avg loss: 2.306651, ppl: 10.040743 +epoch: 1, batch: 24315, sum loss: 4633.421387, avg loss: 2.541646, ppl: 12.700565 +epoch: 1, batch: 24316, sum loss: 4439.706055, avg loss: 2.661694, ppl: 14.320532 +epoch: 1, batch: 24317, sum loss: 3436.321289, avg loss: 2.297006, ppl: 9.944368 +epoch: 1, batch: 24318, sum loss: 4769.601562, avg loss: 2.699265, ppl: 14.868803 +epoch: 1, batch: 24319, sum loss: 5407.658203, avg loss: 2.868784, ppl: 17.615587 +epoch: 1, batch: 24320, sum loss: 5320.522949, avg loss: 2.618368, ppl: 13.713321 +epoch: 1, batch: 24321, sum loss: 4253.567871, avg loss: 2.508000, ppl: 12.280343 +epoch: 1, batch: 24322, sum loss: 4503.175781, avg loss: 2.802225, ppl: 16.481279 +epoch: 1, batch: 24323, sum loss: 4745.546387, avg loss: 2.679586, ppl: 14.579052 +epoch: 1, batch: 24324, sum loss: 4398.114258, avg loss: 2.767850, ppl: 15.924366 +epoch: 1, batch: 24325, sum loss: 4754.307617, avg loss: 2.902508, ppl: 18.219774 +epoch: 1, batch: 24326, sum loss: 4405.675781, avg loss: 2.666874, ppl: 14.394903 +epoch: 1, batch: 24327, sum loss: 4322.685059, avg loss: 2.676585, ppl: 14.535373 +epoch: 1, batch: 24328, sum loss: 4594.467773, avg loss: 2.678990, ppl: 14.570368 +epoch: 1, batch: 24329, sum loss: 4206.356445, avg loss: 2.800503, ppl: 16.452929 +epoch: 1, batch: 24330, sum loss: 3701.942871, avg loss: 2.607002, ppl: 13.558346 +epoch: 1, batch: 24331, sum loss: 4123.438965, avg loss: 2.709224, ppl: 15.017621 +epoch: 1, batch: 24332, sum loss: 4239.734863, avg loss: 2.683377, ppl: 14.634424 +epoch: 1, batch: 24333, sum loss: 5148.466797, avg loss: 2.716869, ppl: 15.132869 +epoch: 1, batch: 24334, sum loss: 5330.226562, avg loss: 3.042367, ppl: 20.954784 +epoch: 1, batch: 24335, sum loss: 4314.497559, avg loss: 2.635612, ppl: 13.951856 +epoch: 1, batch: 24336, sum loss: 4765.112305, avg loss: 2.548188, ppl: 12.783924 +epoch: 1, batch: 24337, sum loss: 3906.447266, avg loss: 2.478710, ppl: 11.925872 +epoch: 1, batch: 24338, sum loss: 4313.269531, avg loss: 2.355691, ppl: 10.545410 +epoch: 1, batch: 24339, sum loss: 4694.934570, avg loss: 2.758481, ppl: 15.775858 +epoch: 1, batch: 24340, sum loss: 4581.087402, avg loss: 2.585264, ppl: 13.266788 +epoch: 1, batch: 24341, sum loss: 4152.204102, avg loss: 2.563089, ppl: 12.975840 +epoch: 1, batch: 24342, sum loss: 5087.327637, avg loss: 2.890527, ppl: 18.002794 +epoch: 1, batch: 24343, sum loss: 4394.342773, avg loss: 2.485488, ppl: 12.006980 +epoch: 1, batch: 24344, sum loss: 4624.231445, avg loss: 2.567591, ppl: 13.034386 +epoch: 1, batch: 24345, sum loss: 4006.781982, avg loss: 2.605190, ppl: 13.533793 +epoch: 1, batch: 24346, sum loss: 4674.849609, avg loss: 2.621901, ppl: 13.761864 +epoch: 1, batch: 24347, sum loss: 3693.294189, avg loss: 2.375109, ppl: 10.752185 +epoch: 1, batch: 24348, sum loss: 6146.250488, avg loss: 2.962049, ppl: 19.337545 +epoch: 1, batch: 24349, sum loss: 4868.484375, avg loss: 2.814153, ppl: 16.679037 +epoch: 1, batch: 24350, sum loss: 3891.820557, avg loss: 2.430869, ppl: 11.368753 +epoch: 1, batch: 24351, sum loss: 4224.425293, avg loss: 2.638617, ppl: 13.993834 +epoch: 1, batch: 24352, sum loss: 5102.315430, avg loss: 2.722687, ppl: 15.221167 +epoch: 1, batch: 24353, sum loss: 5179.227539, avg loss: 2.710218, ppl: 15.032558 +epoch: 1, batch: 24354, sum loss: 4399.967773, avg loss: 2.571577, ppl: 13.086443 +epoch: 1, batch: 24355, sum loss: 4737.784668, avg loss: 2.586127, ppl: 13.278246 +epoch: 1, batch: 24356, sum loss: 4269.589844, avg loss: 2.573592, ppl: 13.112846 +epoch: 1, batch: 24357, sum loss: 4533.134766, avg loss: 2.863635, ppl: 17.525120 +epoch: 1, batch: 24358, sum loss: 4401.037598, avg loss: 2.572202, ppl: 13.094630 +epoch: 1, batch: 24359, sum loss: 4719.567383, avg loss: 2.807595, ppl: 16.570019 +epoch: 1, batch: 24360, sum loss: 4459.841797, avg loss: 2.815557, ppl: 16.702473 +epoch: 1, batch: 24361, sum loss: 4293.910156, avg loss: 2.580475, ppl: 13.203406 +epoch: 1, batch: 24362, sum loss: 4028.173828, avg loss: 2.522338, ppl: 12.457690 +epoch: 1, batch: 24363, sum loss: 4020.608398, avg loss: 2.459088, ppl: 11.694137 +epoch: 1, batch: 24364, sum loss: 5168.857422, avg loss: 2.638518, ppl: 13.992456 +epoch: 1, batch: 24365, sum loss: 4838.452148, avg loss: 2.622467, ppl: 13.769655 +epoch: 1, batch: 24366, sum loss: 3945.773438, avg loss: 2.582313, ppl: 13.227693 +epoch: 1, batch: 24367, sum loss: 4034.316406, avg loss: 2.616288, ppl: 13.684834 +epoch: 1, batch: 24368, sum loss: 4294.145996, avg loss: 2.509729, ppl: 12.301595 +epoch: 1, batch: 24369, sum loss: 4887.842773, avg loss: 2.704949, ppl: 14.953552 +epoch: 1, batch: 24370, sum loss: 4538.290039, avg loss: 2.322564, ppl: 10.201797 +epoch: 1, batch: 24371, sum loss: 4951.615723, avg loss: 2.975731, ppl: 19.603941 +epoch: 1, batch: 24372, sum loss: 4886.232422, avg loss: 2.959559, ppl: 19.289473 +epoch: 1, batch: 24373, sum loss: 4284.181641, avg loss: 2.506835, ppl: 12.266052 +epoch: 1, batch: 24374, sum loss: 4440.516602, avg loss: 2.643165, ppl: 14.057620 +epoch: 1, batch: 24375, sum loss: 5032.080078, avg loss: 2.728894, ppl: 15.315934 +epoch: 1, batch: 24376, sum loss: 3832.617676, avg loss: 2.493571, ppl: 12.104419 +epoch: 1, batch: 24377, sum loss: 3996.184814, avg loss: 2.574862, ppl: 13.129498 +epoch: 1, batch: 24378, sum loss: 4417.944824, avg loss: 2.591170, ppl: 13.345378 +epoch: 1, batch: 24379, sum loss: 5077.304199, avg loss: 2.697824, ppl: 14.847385 +epoch: 1, batch: 24380, sum loss: 3337.493896, avg loss: 2.335545, ppl: 10.335092 +epoch: 1, batch: 24381, sum loss: 4622.141113, avg loss: 2.639715, ppl: 14.009213 +epoch: 1, batch: 24382, sum loss: 4206.024414, avg loss: 2.326341, ppl: 10.240402 +epoch: 1, batch: 24383, sum loss: 4388.923828, avg loss: 2.763806, ppl: 15.860093 +epoch: 1, batch: 24384, sum loss: 5255.047363, avg loss: 2.992624, ppl: 19.937927 +epoch: 1, batch: 24385, sum loss: 3810.597656, avg loss: 2.471205, ppl: 11.836699 +epoch: 1, batch: 24386, sum loss: 4360.339844, avg loss: 2.563398, ppl: 12.979847 +epoch: 1, batch: 24387, sum loss: 3123.363770, avg loss: 2.264948, ppl: 9.630628 +epoch: 1, batch: 24388, sum loss: 3698.556885, avg loss: 2.336422, ppl: 10.344164 +epoch: 1, batch: 24389, sum loss: 3906.873047, avg loss: 2.648727, ppl: 14.136038 +epoch: 1, batch: 24390, sum loss: 3821.357910, avg loss: 2.417051, ppl: 11.212745 +epoch: 1, batch: 24391, sum loss: 4086.644043, avg loss: 2.594695, ppl: 13.392497 +epoch: 1, batch: 24392, sum loss: 4351.498047, avg loss: 2.676198, ppl: 14.529746 +epoch: 1, batch: 24393, sum loss: 5480.201660, avg loss: 2.855759, ppl: 17.387629 +epoch: 1, batch: 24394, sum loss: 4532.561523, avg loss: 2.653724, ppl: 14.206853 +epoch: 1, batch: 24395, sum loss: 4150.513672, avg loss: 2.549456, ppl: 12.800134 +epoch: 1, batch: 24396, sum loss: 4407.850586, avg loss: 2.702545, ppl: 14.917647 +epoch: 1, batch: 24397, sum loss: 4404.225586, avg loss: 2.775190, ppl: 16.041668 +epoch: 1, batch: 24398, sum loss: 4586.356445, avg loss: 2.610334, ppl: 13.603589 +epoch: 1, batch: 24399, sum loss: 4119.300293, avg loss: 2.744371, ppl: 15.554822 +epoch: 1, batch: 24400, sum loss: 4876.767090, avg loss: 2.815685, ppl: 16.704618 +epoch: 1, batch: 24401, sum loss: 3961.420410, avg loss: 2.685709, ppl: 14.668594 +epoch: 1, batch: 24402, sum loss: 5274.380371, avg loss: 2.695136, ppl: 14.807527 +epoch: 1, batch: 24403, sum loss: 4126.464844, avg loss: 2.367450, ppl: 10.670146 +epoch: 1, batch: 24404, sum loss: 4275.066406, avg loss: 2.295954, ppl: 9.933908 +epoch: 1, batch: 24405, sum loss: 4219.041016, avg loss: 2.480330, ppl: 11.945203 +epoch: 1, batch: 24406, sum loss: 4758.147949, avg loss: 2.526897, ppl: 12.514618 +epoch: 1, batch: 24407, sum loss: 4262.950684, avg loss: 2.582042, ppl: 13.224111 +epoch: 1, batch: 24408, sum loss: 2977.987549, avg loss: 2.310308, ppl: 10.077533 +epoch: 1, batch: 24409, sum loss: 4963.477539, avg loss: 2.562456, ppl: 12.967628 +epoch: 1, batch: 24410, sum loss: 4856.041992, avg loss: 2.828213, ppl: 16.915211 +epoch: 1, batch: 24411, sum loss: 4611.065430, avg loss: 2.874729, ppl: 17.720623 +epoch: 1, batch: 24412, sum loss: 4261.221191, avg loss: 2.379241, ppl: 10.796707 +epoch: 1, batch: 24413, sum loss: 4587.282227, avg loss: 2.800539, ppl: 16.453505 +epoch: 1, batch: 24414, sum loss: 4525.975098, avg loss: 2.442512, ppl: 11.501901 +epoch: 1, batch: 24415, sum loss: 3579.468262, avg loss: 2.453371, ppl: 11.627478 +epoch: 1, batch: 24416, sum loss: 5922.327148, avg loss: 2.785666, ppl: 16.210619 +epoch: 1, batch: 24417, sum loss: 4143.289062, avg loss: 2.676543, ppl: 14.534766 +epoch: 1, batch: 24418, sum loss: 4219.771484, avg loss: 2.616102, ppl: 13.682280 +epoch: 1, batch: 24419, sum loss: 4863.982422, avg loss: 2.663736, ppl: 14.349805 +epoch: 1, batch: 24420, sum loss: 4186.656250, avg loss: 2.592357, ppl: 13.361226 +epoch: 1, batch: 24421, sum loss: 4237.044434, avg loss: 2.463398, ppl: 11.744652 +epoch: 1, batch: 24422, sum loss: 5057.604492, avg loss: 2.852569, ppl: 17.332249 +epoch: 1, batch: 24423, sum loss: 4519.337402, avg loss: 2.627522, ppl: 13.839430 +epoch: 1, batch: 24424, sum loss: 4645.414551, avg loss: 2.662129, ppl: 14.326754 +epoch: 1, batch: 24425, sum loss: 3865.352051, avg loss: 2.580342, ppl: 13.201650 +epoch: 1, batch: 24426, sum loss: 4540.721680, avg loss: 2.593216, ppl: 13.372715 +epoch: 1, batch: 24427, sum loss: 5135.823242, avg loss: 2.746430, ppl: 15.586883 +epoch: 1, batch: 24428, sum loss: 5422.991211, avg loss: 2.598463, ppl: 13.443054 +epoch: 1, batch: 24429, sum loss: 4404.844727, avg loss: 2.634477, ppl: 13.936017 +epoch: 1, batch: 24430, sum loss: 4369.032227, avg loss: 2.722138, ppl: 15.212819 +epoch: 1, batch: 24431, sum loss: 3051.685547, avg loss: 2.389730, ppl: 10.910550 +epoch: 1, batch: 24432, sum loss: 4610.992188, avg loss: 2.818455, ppl: 16.750950 +epoch: 1, batch: 24433, sum loss: 4636.390625, avg loss: 2.550270, ppl: 12.810560 +epoch: 1, batch: 24434, sum loss: 4249.533691, avg loss: 2.595928, ppl: 13.409021 +epoch: 1, batch: 24435, sum loss: 4706.217773, avg loss: 2.574517, ppl: 13.124983 +epoch: 1, batch: 24436, sum loss: 4563.460938, avg loss: 2.679660, ppl: 14.580136 +epoch: 1, batch: 24437, sum loss: 5238.330566, avg loss: 2.819338, ppl: 16.765753 +epoch: 1, batch: 24438, sum loss: 3988.288818, avg loss: 2.711277, ppl: 15.048484 +epoch: 1, batch: 24439, sum loss: 4849.505371, avg loss: 2.676327, ppl: 14.531628 +epoch: 1, batch: 24440, sum loss: 4369.041992, avg loss: 2.556490, ppl: 12.890497 +epoch: 1, batch: 24441, sum loss: 4825.528320, avg loss: 2.634022, ppl: 13.929683 +epoch: 1, batch: 24442, sum loss: 4049.891113, avg loss: 2.584487, ppl: 13.256490 +epoch: 1, batch: 24443, sum loss: 4926.942383, avg loss: 2.500986, ppl: 12.194513 +epoch: 1, batch: 24444, sum loss: 4024.727295, avg loss: 2.374470, ppl: 10.745321 +epoch: 1, batch: 24445, sum loss: 4790.382812, avg loss: 2.567193, ppl: 13.029203 +epoch: 1, batch: 24446, sum loss: 4033.940674, avg loss: 2.614349, ppl: 13.658323 +epoch: 1, batch: 24447, sum loss: 4546.380371, avg loss: 2.748719, ppl: 15.622599 +epoch: 1, batch: 24448, sum loss: 4536.386230, avg loss: 2.607119, ppl: 13.559923 +epoch: 1, batch: 24449, sum loss: 3812.950439, avg loss: 2.530159, ppl: 12.555509 +epoch: 1, batch: 24450, sum loss: 3794.476074, avg loss: 2.529651, ppl: 12.549122 +epoch: 1, batch: 24451, sum loss: 5101.367188, avg loss: 2.680697, ppl: 14.595269 +epoch: 1, batch: 24452, sum loss: 4864.583008, avg loss: 2.802179, ppl: 16.480520 +epoch: 1, batch: 24453, sum loss: 5010.402344, avg loss: 2.621875, ppl: 13.761497 +epoch: 1, batch: 24454, sum loss: 3287.358154, avg loss: 2.356529, ppl: 10.554256 +epoch: 1, batch: 24455, sum loss: 4370.302734, avg loss: 2.664819, ppl: 14.365346 +epoch: 1, batch: 24456, sum loss: 4499.680664, avg loss: 2.831769, ppl: 16.975460 +epoch: 1, batch: 24457, sum loss: 3688.333008, avg loss: 2.628890, ppl: 13.858382 +epoch: 1, batch: 24458, sum loss: 4113.616699, avg loss: 2.380565, ppl: 10.811011 +epoch: 1, batch: 24459, sum loss: 4402.644043, avg loss: 2.478966, ppl: 11.928926 +epoch: 1, batch: 24460, sum loss: 5150.850098, avg loss: 2.796336, ppl: 16.384499 +epoch: 1, batch: 24461, sum loss: 5200.295410, avg loss: 2.779420, ppl: 16.109680 +epoch: 1, batch: 24462, sum loss: 4219.900391, avg loss: 2.622685, ppl: 13.772656 +epoch: 1, batch: 24463, sum loss: 4489.686035, avg loss: 2.932519, ppl: 18.774860 +epoch: 1, batch: 24464, sum loss: 3228.602295, avg loss: 2.202321, ppl: 9.045983 +epoch: 1, batch: 24465, sum loss: 4340.548828, avg loss: 2.532409, ppl: 12.583783 +epoch: 1, batch: 24466, sum loss: 3409.594971, avg loss: 2.384332, ppl: 10.851813 +epoch: 1, batch: 24467, sum loss: 5000.891113, avg loss: 2.899067, ppl: 18.157204 +epoch: 1, batch: 24468, sum loss: 4045.793213, avg loss: 2.509797, ppl: 12.302437 +epoch: 1, batch: 24469, sum loss: 4227.485352, avg loss: 2.743339, ppl: 15.538783 +epoch: 1, batch: 24470, sum loss: 3759.126465, avg loss: 2.294949, ppl: 9.923928 +epoch: 1, batch: 24471, sum loss: 5419.934570, avg loss: 2.677833, ppl: 14.553526 +epoch: 1, batch: 24472, sum loss: 4944.562500, avg loss: 2.715301, ppl: 15.109154 +epoch: 1, batch: 24473, sum loss: 4413.024902, avg loss: 2.737608, ppl: 15.449991 +epoch: 1, batch: 24474, sum loss: 3853.246826, avg loss: 2.487571, ppl: 12.032009 +epoch: 1, batch: 24475, sum loss: 5030.998535, avg loss: 2.653480, ppl: 14.203384 +epoch: 1, batch: 24476, sum loss: 4214.958496, avg loss: 2.577956, ppl: 13.170194 +epoch: 1, batch: 24477, sum loss: 3715.336914, avg loss: 2.594509, ppl: 13.390013 +epoch: 1, batch: 24478, sum loss: 3744.839355, avg loss: 2.336144, ppl: 10.341286 +epoch: 1, batch: 24479, sum loss: 3817.423584, avg loss: 2.451782, ppl: 11.609010 +epoch: 1, batch: 24480, sum loss: 4090.576660, avg loss: 2.375480, ppl: 10.756174 +epoch: 1, batch: 24481, sum loss: 4102.558105, avg loss: 2.594913, ppl: 13.395425 +epoch: 1, batch: 24482, sum loss: 4236.328125, avg loss: 2.594200, ppl: 13.385870 +epoch: 1, batch: 24483, sum loss: 4421.431641, avg loss: 2.483950, ppl: 11.988530 +epoch: 1, batch: 24484, sum loss: 4434.166016, avg loss: 2.754140, ppl: 15.707533 +epoch: 1, batch: 24485, sum loss: 4480.166016, avg loss: 2.568902, ppl: 13.051493 +epoch: 1, batch: 24486, sum loss: 3962.573730, avg loss: 2.496896, ppl: 12.144738 +epoch: 1, batch: 24487, sum loss: 4676.942871, avg loss: 2.615740, ppl: 13.677331 +epoch: 1, batch: 24488, sum loss: 4072.176758, avg loss: 2.484550, ppl: 11.995715 +epoch: 1, batch: 24489, sum loss: 4645.485840, avg loss: 2.743937, ppl: 15.548085 +epoch: 1, batch: 24490, sum loss: 3641.003418, avg loss: 2.311748, ppl: 10.092051 +epoch: 1, batch: 24491, sum loss: 4306.000977, avg loss: 2.388242, ppl: 10.894330 +epoch: 1, batch: 24492, sum loss: 3448.826660, avg loss: 2.109374, ppl: 8.243080 +epoch: 1, batch: 24493, sum loss: 4220.759766, avg loss: 2.633038, ppl: 13.915979 +epoch: 1, batch: 24494, sum loss: 4638.991699, avg loss: 2.840779, ppl: 17.129097 +epoch: 1, batch: 24495, sum loss: 4057.344238, avg loss: 2.451568, ppl: 11.606527 +epoch: 1, batch: 24496, sum loss: 4709.631836, avg loss: 2.626677, ppl: 13.827744 +epoch: 1, batch: 24497, sum loss: 4032.395020, avg loss: 2.609965, ppl: 13.598570 +epoch: 1, batch: 24498, sum loss: 4087.794434, avg loss: 2.657864, ppl: 14.265779 +epoch: 1, batch: 24499, sum loss: 5295.802734, avg loss: 3.043565, ppl: 20.979898 +epoch: 1, batch: 24500, sum loss: 4294.966309, avg loss: 2.732167, ppl: 15.366146 +epoch: 1, batch: 24501, sum loss: 4795.475098, avg loss: 2.866393, ppl: 17.573513 +epoch: 1, batch: 24502, sum loss: 4656.820801, avg loss: 2.574252, ppl: 13.121494 +epoch: 1, batch: 24503, sum loss: 4093.313721, avg loss: 2.390954, ppl: 10.923913 +epoch: 1, batch: 24504, sum loss: 4966.224121, avg loss: 2.617936, ppl: 13.707401 +epoch: 1, batch: 24505, sum loss: 4621.728516, avg loss: 2.556266, ppl: 12.887603 +epoch: 1, batch: 24506, sum loss: 4570.179688, avg loss: 2.672620, ppl: 14.477845 +epoch: 1, batch: 24507, sum loss: 4323.386719, avg loss: 2.642657, ppl: 14.050487 +epoch: 1, batch: 24508, sum loss: 4389.682129, avg loss: 2.586731, ppl: 13.286267 +epoch: 1, batch: 24509, sum loss: 5536.064941, avg loss: 2.907597, ppl: 18.312746 +epoch: 1, batch: 24510, sum loss: 3927.809570, avg loss: 2.573925, ppl: 13.117208 +epoch: 1, batch: 24511, sum loss: 4147.280762, avg loss: 2.687804, ppl: 14.699353 +epoch: 1, batch: 24512, sum loss: 4291.095703, avg loss: 2.750702, ppl: 15.653623 +epoch: 1, batch: 24513, sum loss: 3567.753662, avg loss: 2.307732, ppl: 10.051602 +epoch: 1, batch: 24514, sum loss: 4963.188477, avg loss: 3.004352, ppl: 20.173132 +epoch: 1, batch: 24515, sum loss: 4266.894043, avg loss: 2.835146, ppl: 17.032883 +epoch: 1, batch: 24516, sum loss: 3900.689697, avg loss: 2.668051, ppl: 14.411846 +epoch: 1, batch: 24517, sum loss: 5234.587402, avg loss: 2.934186, ppl: 18.806189 +epoch: 1, batch: 24518, sum loss: 5242.428711, avg loss: 2.872564, ppl: 17.682291 +epoch: 1, batch: 24519, sum loss: 5460.662109, avg loss: 2.695292, ppl: 14.809850 +epoch: 1, batch: 24520, sum loss: 5474.880859, avg loss: 2.909076, ppl: 18.339844 +epoch: 1, batch: 24521, sum loss: 5352.491211, avg loss: 2.824534, ppl: 16.853085 +epoch: 1, batch: 24522, sum loss: 4561.096680, avg loss: 2.533943, ppl: 12.603099 +epoch: 1, batch: 24523, sum loss: 3648.154785, avg loss: 2.367394, ppl: 10.669556 +epoch: 1, batch: 24524, sum loss: 3194.459229, avg loss: 2.177545, ppl: 8.824618 +epoch: 1, batch: 24525, sum loss: 4135.554199, avg loss: 2.662946, ppl: 14.338461 +epoch: 1, batch: 24526, sum loss: 5266.604004, avg loss: 2.786563, ppl: 16.225157 +epoch: 1, batch: 24527, sum loss: 4378.747070, avg loss: 2.785463, ppl: 16.207314 +epoch: 1, batch: 24528, sum loss: 4710.823242, avg loss: 2.764568, ppl: 15.872175 +epoch: 1, batch: 24529, sum loss: 4251.704590, avg loss: 2.464756, ppl: 11.760618 +epoch: 1, batch: 24530, sum loss: 4487.827148, avg loss: 2.632157, ppl: 13.903726 +epoch: 1, batch: 24531, sum loss: 3612.554199, avg loss: 2.118800, ppl: 8.321148 +epoch: 1, batch: 24532, sum loss: 5210.676758, avg loss: 2.796928, ppl: 16.394205 +epoch: 1, batch: 24533, sum loss: 5440.145508, avg loss: 2.852725, ppl: 17.334948 +epoch: 1, batch: 24534, sum loss: 5098.618164, avg loss: 2.770988, ppl: 15.974409 +epoch: 1, batch: 24535, sum loss: 4945.525391, avg loss: 2.935030, ppl: 18.822067 +epoch: 1, batch: 24536, sum loss: 5073.473633, avg loss: 2.798386, ppl: 16.418129 +epoch: 1, batch: 24537, sum loss: 4242.342773, avg loss: 2.585218, ppl: 13.266180 +epoch: 1, batch: 24538, sum loss: 5190.710938, avg loss: 2.898219, ppl: 18.141811 +epoch: 1, batch: 24539, sum loss: 4319.209961, avg loss: 2.649822, ppl: 14.151520 +epoch: 1, batch: 24540, sum loss: 4902.321289, avg loss: 2.652771, ppl: 14.193317 +epoch: 1, batch: 24541, sum loss: 3762.703857, avg loss: 2.600348, ppl: 13.468428 +epoch: 1, batch: 24542, sum loss: 5402.469727, avg loss: 2.747950, ppl: 15.610599 +epoch: 1, batch: 24543, sum loss: 4878.337891, avg loss: 2.874684, ppl: 17.719816 +epoch: 1, batch: 24544, sum loss: 5341.687500, avg loss: 2.640478, ppl: 14.019905 +epoch: 1, batch: 24545, sum loss: 4545.903809, avg loss: 2.661536, ppl: 14.318265 +epoch: 1, batch: 24546, sum loss: 4967.066895, avg loss: 2.972512, ppl: 19.540941 +epoch: 1, batch: 24547, sum loss: 4032.119141, avg loss: 2.529560, ppl: 12.547988 +epoch: 1, batch: 24548, sum loss: 5313.736328, avg loss: 3.000416, ppl: 20.093891 +epoch: 1, batch: 24549, sum loss: 4205.925293, avg loss: 2.530641, ppl: 12.561557 +epoch: 1, batch: 24550, sum loss: 3801.540283, avg loss: 2.478188, ppl: 11.919644 +epoch: 1, batch: 24551, sum loss: 3737.694580, avg loss: 2.334600, ppl: 10.325329 +epoch: 1, batch: 24552, sum loss: 5939.894531, avg loss: 2.972920, ppl: 19.548923 +epoch: 1, batch: 24553, sum loss: 4924.655762, avg loss: 2.649089, ppl: 14.141145 +epoch: 1, batch: 24554, sum loss: 4075.843018, avg loss: 2.617754, ppl: 13.704907 +epoch: 1, batch: 24555, sum loss: 4510.623047, avg loss: 2.791227, ppl: 16.301010 +epoch: 1, batch: 24556, sum loss: 5526.810059, avg loss: 3.013528, ppl: 20.359093 +epoch: 1, batch: 24557, sum loss: 3978.210205, avg loss: 2.770341, ppl: 15.964083 +epoch: 1, batch: 24558, sum loss: 4750.586914, avg loss: 2.746004, ppl: 15.580247 +epoch: 1, batch: 24559, sum loss: 4245.985352, avg loss: 2.624218, ppl: 13.793790 +epoch: 1, batch: 24560, sum loss: 4365.387207, avg loss: 2.683090, ppl: 14.630227 +epoch: 1, batch: 24561, sum loss: 4437.016113, avg loss: 2.766220, ppl: 15.898418 +epoch: 1, batch: 24562, sum loss: 3945.195312, avg loss: 2.580245, ppl: 13.200378 +epoch: 1, batch: 24563, sum loss: 4794.082520, avg loss: 2.672287, ppl: 14.473031 +epoch: 1, batch: 24564, sum loss: 4129.778809, avg loss: 2.443656, ppl: 11.515062 +epoch: 1, batch: 24565, sum loss: 5689.731445, avg loss: 3.054070, ppl: 21.201448 +epoch: 1, batch: 24566, sum loss: 3918.942627, avg loss: 2.441709, ppl: 11.492662 +epoch: 1, batch: 24567, sum loss: 4574.325684, avg loss: 2.706702, ppl: 14.979783 +epoch: 1, batch: 24568, sum loss: 4063.837158, avg loss: 2.570422, ppl: 13.071339 +epoch: 1, batch: 24569, sum loss: 4919.799316, avg loss: 2.475993, ppl: 11.893517 +epoch: 1, batch: 24570, sum loss: 5331.527832, avg loss: 2.702244, ppl: 14.913163 +epoch: 1, batch: 24571, sum loss: 4339.914551, avg loss: 2.700631, ppl: 14.889130 +epoch: 1, batch: 24572, sum loss: 4526.829590, avg loss: 2.688141, ppl: 14.704317 +epoch: 1, batch: 24573, sum loss: 4998.893555, avg loss: 2.694821, ppl: 14.802875 +epoch: 1, batch: 24574, sum loss: 4575.042969, avg loss: 2.746124, ppl: 15.582119 +epoch: 1, batch: 24575, sum loss: 5735.893555, avg loss: 2.941484, ppl: 18.943933 +epoch: 1, batch: 24576, sum loss: 3500.001465, avg loss: 2.274205, ppl: 9.720188 +epoch: 1, batch: 24577, sum loss: 4087.998047, avg loss: 2.698349, ppl: 14.855179 +epoch: 1, batch: 24578, sum loss: 4894.900391, avg loss: 2.864190, ppl: 17.534842 +epoch: 1, batch: 24579, sum loss: 4770.629395, avg loss: 2.648878, ppl: 14.138168 +epoch: 1, batch: 24580, sum loss: 3475.622314, avg loss: 2.410279, ppl: 11.137069 +epoch: 1, batch: 24581, sum loss: 4122.643066, avg loss: 2.385789, ppl: 10.867633 +epoch: 1, batch: 24582, sum loss: 4227.694824, avg loss: 2.519484, ppl: 12.422189 +epoch: 1, batch: 24583, sum loss: 4022.442871, avg loss: 2.537819, ppl: 12.652045 +epoch: 1, batch: 24584, sum loss: 4440.977539, avg loss: 2.680131, ppl: 14.587007 +epoch: 1, batch: 24585, sum loss: 4324.364258, avg loss: 2.704418, ppl: 14.945615 +epoch: 1, batch: 24586, sum loss: 4066.920410, avg loss: 2.714900, ppl: 15.103100 +epoch: 1, batch: 24587, sum loss: 4550.875000, avg loss: 2.723444, ppl: 15.232697 +epoch: 1, batch: 24588, sum loss: 4515.832031, avg loss: 2.581951, ppl: 13.222910 +epoch: 1, batch: 24589, sum loss: 5082.721191, avg loss: 2.897789, ppl: 18.133999 +epoch: 1, batch: 24590, sum loss: 4140.755371, avg loss: 2.688802, ppl: 14.714042 +epoch: 1, batch: 24591, sum loss: 5259.206543, avg loss: 2.850518, ppl: 17.296747 +epoch: 1, batch: 24592, sum loss: 4846.768555, avg loss: 2.604389, ppl: 13.522966 +epoch: 1, batch: 24593, sum loss: 4421.454102, avg loss: 2.519347, ppl: 12.420483 +epoch: 1, batch: 24594, sum loss: 4802.232910, avg loss: 2.694856, ppl: 14.803386 +epoch: 1, batch: 24595, sum loss: 4982.604980, avg loss: 2.910400, ppl: 18.364141 +epoch: 1, batch: 24596, sum loss: 4816.739258, avg loss: 2.739897, ppl: 15.485391 +epoch: 1, batch: 24597, sum loss: 3933.786133, avg loss: 2.556066, ppl: 12.885032 +epoch: 1, batch: 24598, sum loss: 4329.939453, avg loss: 2.671153, ppl: 14.456629 +epoch: 1, batch: 24599, sum loss: 4011.318359, avg loss: 2.548487, ppl: 12.787738 +epoch: 1, batch: 24600, sum loss: 3843.706543, avg loss: 2.560764, ppl: 12.945702 +epoch: 1, batch: 24601, sum loss: 3289.116211, avg loss: 2.342675, ppl: 10.409048 +epoch: 1, batch: 24602, sum loss: 4078.527100, avg loss: 2.660487, ppl: 14.303259 +epoch: 1, batch: 24603, sum loss: 4492.437500, avg loss: 2.636407, ppl: 13.962947 +epoch: 1, batch: 24604, sum loss: 4525.209473, avg loss: 2.821203, ppl: 16.797045 +epoch: 1, batch: 24605, sum loss: 3756.417480, avg loss: 2.509297, ppl: 12.296282 +epoch: 1, batch: 24606, sum loss: 4297.753906, avg loss: 2.596830, ppl: 13.421130 +epoch: 1, batch: 24607, sum loss: 4383.136719, avg loss: 2.829656, ppl: 16.939627 +epoch: 1, batch: 24608, sum loss: 4655.620117, avg loss: 2.746679, ppl: 15.590762 +epoch: 1, batch: 24609, sum loss: 4668.172363, avg loss: 2.586245, ppl: 13.279813 +epoch: 1, batch: 24610, sum loss: 4657.271484, avg loss: 2.613508, ppl: 13.646843 +epoch: 1, batch: 24611, sum loss: 6273.208496, avg loss: 2.967459, ppl: 19.442453 +epoch: 1, batch: 24612, sum loss: 4618.468262, avg loss: 2.795683, ppl: 16.373810 +epoch: 1, batch: 24613, sum loss: 4512.660156, avg loss: 2.516821, ppl: 12.389151 +epoch: 1, batch: 24614, sum loss: 4071.591553, avg loss: 2.643891, ppl: 14.067830 +epoch: 1, batch: 24615, sum loss: 5736.375000, avg loss: 3.193973, ppl: 24.385113 +epoch: 1, batch: 24616, sum loss: 3568.039062, avg loss: 2.335104, ppl: 10.330537 +epoch: 1, batch: 24617, sum loss: 3863.969238, avg loss: 2.581142, ppl: 13.212214 +epoch: 1, batch: 24618, sum loss: 3673.349609, avg loss: 2.453807, ppl: 11.632552 +epoch: 1, batch: 24619, sum loss: 4926.712891, avg loss: 2.681934, ppl: 14.613330 +epoch: 1, batch: 24620, sum loss: 4269.461914, avg loss: 2.747401, ppl: 15.602036 +epoch: 1, batch: 24621, sum loss: 4605.290527, avg loss: 2.759311, ppl: 15.788967 +epoch: 1, batch: 24622, sum loss: 3930.513672, avg loss: 2.557263, ppl: 12.900465 +epoch: 1, batch: 24623, sum loss: 5050.323242, avg loss: 2.758232, ppl: 15.771939 +epoch: 1, batch: 24624, sum loss: 4303.902344, avg loss: 2.581825, ppl: 13.221245 +epoch: 1, batch: 24625, sum loss: 5151.471191, avg loss: 2.866706, ppl: 17.579023 +epoch: 1, batch: 24626, sum loss: 4172.546387, avg loss: 2.819288, ppl: 16.764914 +epoch: 1, batch: 24627, sum loss: 4890.634277, avg loss: 2.848360, ppl: 17.259455 +epoch: 1, batch: 24628, sum loss: 4592.760742, avg loss: 2.597715, ppl: 13.433014 +epoch: 1, batch: 24629, sum loss: 4862.463379, avg loss: 2.591932, ppl: 13.355544 +epoch: 1, batch: 24630, sum loss: 3979.879639, avg loss: 2.346627, ppl: 10.450264 +epoch: 1, batch: 24631, sum loss: 3881.861572, avg loss: 2.442959, ppl: 11.507038 +epoch: 1, batch: 24632, sum loss: 5125.966309, avg loss: 2.791921, ppl: 16.312321 +epoch: 1, batch: 24633, sum loss: 4805.872559, avg loss: 2.830314, ppl: 16.950777 +epoch: 1, batch: 24634, sum loss: 5026.860840, avg loss: 2.809872, ppl: 16.607790 +epoch: 1, batch: 24635, sum loss: 4756.814941, avg loss: 2.518166, ppl: 12.405818 +epoch: 1, batch: 24636, sum loss: 5160.854492, avg loss: 2.713383, ppl: 15.080209 +epoch: 1, batch: 24637, sum loss: 4183.085938, avg loss: 2.501846, ppl: 12.205002 +epoch: 1, batch: 24638, sum loss: 4106.294922, avg loss: 2.569646, ppl: 13.061205 +epoch: 1, batch: 24639, sum loss: 3785.411133, avg loss: 2.306771, ppl: 10.041945 +epoch: 1, batch: 24640, sum loss: 4272.086914, avg loss: 2.638720, ppl: 13.995272 +epoch: 1, batch: 24641, sum loss: 4151.823730, avg loss: 2.624414, ppl: 13.796494 +epoch: 1, batch: 24642, sum loss: 4193.670898, avg loss: 2.509677, ppl: 12.300961 +epoch: 1, batch: 24643, sum loss: 5287.574219, avg loss: 2.817035, ppl: 16.727177 +epoch: 1, batch: 24644, sum loss: 4205.303223, avg loss: 2.643183, ppl: 14.057872 +epoch: 1, batch: 24645, sum loss: 4515.706543, avg loss: 2.340957, ppl: 10.391178 +epoch: 1, batch: 24646, sum loss: 4311.563477, avg loss: 2.684660, ppl: 14.653214 +epoch: 1, batch: 24647, sum loss: 4988.917480, avg loss: 2.765475, ppl: 15.886589 +epoch: 1, batch: 24648, sum loss: 4953.330078, avg loss: 2.566492, ppl: 13.020074 +epoch: 1, batch: 24649, sum loss: 3780.080566, avg loss: 2.818852, ppl: 16.757601 +epoch: 1, batch: 24650, sum loss: 4516.959961, avg loss: 2.816060, ppl: 16.710876 +epoch: 1, batch: 24651, sum loss: 4219.842285, avg loss: 2.360091, ppl: 10.591912 +epoch: 1, batch: 24652, sum loss: 4486.958984, avg loss: 2.693253, ppl: 14.779670 +epoch: 1, batch: 24653, sum loss: 4462.131836, avg loss: 2.688031, ppl: 14.702701 +epoch: 1, batch: 24654, sum loss: 4365.708496, avg loss: 2.554540, ppl: 12.865379 +epoch: 1, batch: 24655, sum loss: 4354.492188, avg loss: 2.621609, ppl: 13.757838 +epoch: 1, batch: 24656, sum loss: 4720.176270, avg loss: 2.603517, ppl: 13.511174 +epoch: 1, batch: 24657, sum loss: 4361.531738, avg loss: 2.576215, ppl: 13.147282 +epoch: 1, batch: 24658, sum loss: 3857.249512, avg loss: 2.388390, ppl: 10.895935 +epoch: 1, batch: 24659, sum loss: 4329.410156, avg loss: 2.471125, ppl: 11.835751 +epoch: 1, batch: 24660, sum loss: 5401.761719, avg loss: 2.899496, ppl: 18.164993 +epoch: 1, batch: 24661, sum loss: 5042.173340, avg loss: 3.030152, ppl: 20.700386 +epoch: 1, batch: 24662, sum loss: 3988.359863, avg loss: 2.655366, ppl: 14.230192 +epoch: 1, batch: 24663, sum loss: 4346.129883, avg loss: 2.541596, ppl: 12.699929 +epoch: 1, batch: 24664, sum loss: 4631.682617, avg loss: 2.827645, ppl: 16.905598 +epoch: 1, batch: 24665, sum loss: 4809.604004, avg loss: 2.641188, ppl: 14.029866 +epoch: 1, batch: 24666, sum loss: 4157.506836, avg loss: 2.790273, ppl: 16.285469 +epoch: 1, batch: 24667, sum loss: 4977.660156, avg loss: 2.850894, ppl: 17.303240 +epoch: 1, batch: 24668, sum loss: 3627.514648, avg loss: 2.456002, ppl: 11.658109 +epoch: 1, batch: 24669, sum loss: 4091.836670, avg loss: 2.609590, ppl: 13.593471 +epoch: 1, batch: 24670, sum loss: 4760.587891, avg loss: 2.878227, ppl: 17.782724 +epoch: 1, batch: 24671, sum loss: 4773.317871, avg loss: 2.863418, ppl: 17.521311 +epoch: 1, batch: 24672, sum loss: 3845.563965, avg loss: 2.548419, ppl: 12.786872 +epoch: 1, batch: 24673, sum loss: 3826.639648, avg loss: 2.524169, ppl: 12.480516 +epoch: 1, batch: 24674, sum loss: 3649.774902, avg loss: 2.390160, ppl: 10.915244 +epoch: 1, batch: 24675, sum loss: 3972.430176, avg loss: 2.544798, ppl: 12.740659 +epoch: 1, batch: 24676, sum loss: 5525.281738, avg loss: 2.943677, ppl: 18.985527 +epoch: 1, batch: 24677, sum loss: 4414.401855, avg loss: 2.673775, ppl: 14.494579 +epoch: 1, batch: 24678, sum loss: 4718.635254, avg loss: 2.656889, ppl: 14.251885 +epoch: 1, batch: 24679, sum loss: 5387.707031, avg loss: 2.929694, ppl: 18.721899 +epoch: 1, batch: 24680, sum loss: 4366.230957, avg loss: 2.557839, ppl: 12.907892 +epoch: 1, batch: 24681, sum loss: 4876.695801, avg loss: 2.659049, ppl: 14.282697 +epoch: 1, batch: 24682, sum loss: 5086.497559, avg loss: 2.728808, ppl: 15.314616 +epoch: 1, batch: 24683, sum loss: 5187.379395, avg loss: 2.813112, ppl: 16.661697 +epoch: 1, batch: 24684, sum loss: 4349.621582, avg loss: 2.739057, ppl: 15.472382 +epoch: 1, batch: 24685, sum loss: 5826.737305, avg loss: 2.932429, ppl: 18.773182 +epoch: 1, batch: 24686, sum loss: 4653.647461, avg loss: 2.610010, ppl: 13.599186 +epoch: 1, batch: 24687, sum loss: 3906.351807, avg loss: 2.496071, ppl: 12.134727 +epoch: 1, batch: 24688, sum loss: 4327.744629, avg loss: 2.492940, ppl: 12.096793 +epoch: 1, batch: 24689, sum loss: 4580.254883, avg loss: 2.617289, ppl: 13.698531 +epoch: 1, batch: 24690, sum loss: 5525.914551, avg loss: 2.846942, ppl: 17.234995 +epoch: 1, batch: 24691, sum loss: 3685.288086, avg loss: 2.399276, ppl: 11.015202 +epoch: 1, batch: 24692, sum loss: 4230.550781, avg loss: 2.570201, ppl: 13.068453 +epoch: 1, batch: 24693, sum loss: 3744.564453, avg loss: 2.266685, ppl: 9.647371 +epoch: 1, batch: 24694, sum loss: 5006.059570, avg loss: 2.726612, ppl: 15.281025 +epoch: 1, batch: 24695, sum loss: 4468.360352, avg loss: 2.737966, ppl: 15.455518 +epoch: 1, batch: 24696, sum loss: 4447.823730, avg loss: 2.590462, ppl: 13.335934 +epoch: 1, batch: 24697, sum loss: 4370.486328, avg loss: 2.626494, ppl: 13.825216 +epoch: 1, batch: 24698, sum loss: 4538.373047, avg loss: 2.696597, ppl: 14.829184 +epoch: 1, batch: 24699, sum loss: 4554.103516, avg loss: 2.669463, ppl: 14.432213 +epoch: 1, batch: 24700, sum loss: 4585.155273, avg loss: 2.709903, ppl: 15.027814 +epoch: 1, batch: 24701, sum loss: 4374.652344, avg loss: 2.561272, ppl: 12.952278 +epoch: 1, batch: 24702, sum loss: 4954.623535, avg loss: 2.549987, ppl: 12.806932 +epoch: 1, batch: 24703, sum loss: 4064.152588, avg loss: 2.578777, ppl: 13.181009 +epoch: 1, batch: 24704, sum loss: 4759.974121, avg loss: 2.960183, ppl: 19.301502 +epoch: 1, batch: 24705, sum loss: 4865.003418, avg loss: 2.887242, ppl: 17.943758 +epoch: 1, batch: 24706, sum loss: 5330.008789, avg loss: 2.706962, ppl: 14.983691 +epoch: 1, batch: 24707, sum loss: 3572.365723, avg loss: 2.410503, ppl: 11.139567 +epoch: 1, batch: 24708, sum loss: 4103.017090, avg loss: 2.655675, ppl: 14.234587 +epoch: 1, batch: 24709, sum loss: 5106.120117, avg loss: 2.651153, ppl: 14.170365 +epoch: 1, batch: 24710, sum loss: 4554.185059, avg loss: 2.712439, ppl: 15.065974 +epoch: 1, batch: 24711, sum loss: 3681.694336, avg loss: 2.369173, ppl: 10.688547 +epoch: 1, batch: 24712, sum loss: 4842.479492, avg loss: 2.882428, ppl: 17.857586 +epoch: 1, batch: 24713, sum loss: 5278.699219, avg loss: 2.747891, ppl: 15.609683 +epoch: 1, batch: 24714, sum loss: 4035.212891, avg loss: 2.391946, ppl: 10.934753 +epoch: 1, batch: 24715, sum loss: 4632.881836, avg loss: 2.473509, ppl: 11.864003 +epoch: 1, batch: 24716, sum loss: 4987.761719, avg loss: 2.869828, ppl: 17.633993 +epoch: 1, batch: 24717, sum loss: 4659.935059, avg loss: 2.800442, ppl: 16.451912 +epoch: 1, batch: 24718, sum loss: 5294.866211, avg loss: 2.954725, ppl: 19.196434 +epoch: 1, batch: 24719, sum loss: 3850.426270, avg loss: 2.672052, ppl: 14.469625 +epoch: 1, batch: 24720, sum loss: 5079.388672, avg loss: 2.775622, ppl: 16.048613 +epoch: 1, batch: 24721, sum loss: 5178.148438, avg loss: 2.829589, ppl: 16.938503 +epoch: 1, batch: 24722, sum loss: 4383.566406, avg loss: 2.632773, ppl: 13.912291 +epoch: 1, batch: 24723, sum loss: 4795.439453, avg loss: 2.545350, ppl: 12.747690 +epoch: 1, batch: 24724, sum loss: 4727.623047, avg loss: 2.879186, ppl: 17.799776 +epoch: 1, batch: 24725, sum loss: 4147.066895, avg loss: 2.558339, ppl: 12.914344 +epoch: 1, batch: 24726, sum loss: 3874.285400, avg loss: 2.572567, ppl: 13.099404 +epoch: 1, batch: 24727, sum loss: 3368.300293, avg loss: 2.294482, ppl: 9.919301 +epoch: 1, batch: 24728, sum loss: 4153.062012, avg loss: 2.518534, ppl: 12.410386 +epoch: 1, batch: 24729, sum loss: 5055.026367, avg loss: 2.841499, ppl: 17.141439 +epoch: 1, batch: 24730, sum loss: 4352.972168, avg loss: 2.487413, ppl: 12.030110 +epoch: 1, batch: 24731, sum loss: 6270.184082, avg loss: 3.069106, ppl: 21.522655 +epoch: 1, batch: 24732, sum loss: 4134.053223, avg loss: 2.545599, ppl: 12.750866 +epoch: 1, batch: 24733, sum loss: 4638.103027, avg loss: 2.554021, ppl: 12.858709 +epoch: 1, batch: 24734, sum loss: 3972.562500, avg loss: 2.698752, ppl: 14.861169 +epoch: 1, batch: 24735, sum loss: 5087.246582, avg loss: 2.743930, ppl: 15.547970 +epoch: 1, batch: 24736, sum loss: 4557.221191, avg loss: 2.853614, ppl: 17.350370 +epoch: 1, batch: 24737, sum loss: 3778.706055, avg loss: 2.369095, ppl: 10.687714 +epoch: 1, batch: 24738, sum loss: 4390.051758, avg loss: 2.686690, ppl: 14.683000 +epoch: 1, batch: 24739, sum loss: 4242.574707, avg loss: 2.551157, ppl: 12.821937 +epoch: 1, batch: 24740, sum loss: 4391.108887, avg loss: 2.661278, ppl: 14.314575 +epoch: 1, batch: 24741, sum loss: 4112.327148, avg loss: 2.627685, ppl: 13.841690 +epoch: 1, batch: 24742, sum loss: 3329.904297, avg loss: 2.268327, ppl: 9.663223 +epoch: 1, batch: 24743, sum loss: 4916.937500, avg loss: 2.860348, ppl: 17.467600 +epoch: 1, batch: 24744, sum loss: 4128.248535, avg loss: 2.639545, ppl: 14.006828 +epoch: 1, batch: 24745, sum loss: 3922.240967, avg loss: 2.462173, ppl: 11.730270 +epoch: 1, batch: 24746, sum loss: 4104.322266, avg loss: 2.524183, ppl: 12.480698 +epoch: 1, batch: 24747, sum loss: 4843.238281, avg loss: 2.665514, ppl: 14.375333 +epoch: 1, batch: 24748, sum loss: 3687.326172, avg loss: 2.233390, ppl: 9.331443 +epoch: 1, batch: 24749, sum loss: 4362.082520, avg loss: 2.568953, ppl: 13.052155 +epoch: 1, batch: 24750, sum loss: 4857.838379, avg loss: 2.793467, ppl: 16.337557 +epoch: 1, batch: 24751, sum loss: 4593.394531, avg loss: 2.799143, ppl: 16.430565 +epoch: 1, batch: 24752, sum loss: 5366.217285, avg loss: 2.953339, ppl: 19.169861 +epoch: 1, batch: 24753, sum loss: 4163.438477, avg loss: 2.605406, ppl: 13.536717 +epoch: 1, batch: 24754, sum loss: 4022.690918, avg loss: 2.601999, ppl: 13.490683 +epoch: 1, batch: 24755, sum loss: 4671.314941, avg loss: 2.841432, ppl: 17.140299 +epoch: 1, batch: 24756, sum loss: 4836.105957, avg loss: 2.741557, ppl: 15.511111 +epoch: 1, batch: 24757, sum loss: 4545.735840, avg loss: 2.694568, ppl: 14.799123 +epoch: 1, batch: 24758, sum loss: 3941.334473, avg loss: 2.699544, ppl: 14.872951 +epoch: 1, batch: 24759, sum loss: 4738.388184, avg loss: 2.885742, ppl: 17.916857 +epoch: 1, batch: 24760, sum loss: 5323.648926, avg loss: 2.901171, ppl: 18.195444 +epoch: 1, batch: 24761, sum loss: 4838.381348, avg loss: 2.841093, ppl: 17.134485 +epoch: 1, batch: 24762, sum loss: 4509.479004, avg loss: 2.886991, ppl: 17.939241 +epoch: 1, batch: 24763, sum loss: 5038.365234, avg loss: 3.033333, ppl: 20.766323 +epoch: 1, batch: 24764, sum loss: 4047.310303, avg loss: 2.749532, ppl: 15.635305 +epoch: 1, batch: 24765, sum loss: 4898.326660, avg loss: 2.931375, ppl: 18.753389 +epoch: 1, batch: 24766, sum loss: 4914.550293, avg loss: 2.742495, ppl: 15.525671 +epoch: 1, batch: 24767, sum loss: 3472.408936, avg loss: 2.311857, ppl: 10.093147 +epoch: 1, batch: 24768, sum loss: 4051.017090, avg loss: 2.434505, ppl: 11.410174 +epoch: 1, batch: 24769, sum loss: 4524.303711, avg loss: 2.612185, ppl: 13.628791 +epoch: 1, batch: 24770, sum loss: 4247.810547, avg loss: 2.604421, ppl: 13.523392 +epoch: 1, batch: 24771, sum loss: 4902.698730, avg loss: 2.620363, ppl: 13.740710 +epoch: 1, batch: 24772, sum loss: 4603.955078, avg loss: 2.819323, ppl: 16.765501 +epoch: 1, batch: 24773, sum loss: 4509.845215, avg loss: 2.690839, ppl: 14.744035 +epoch: 1, batch: 24774, sum loss: 4089.403076, avg loss: 2.418334, ppl: 11.227142 +epoch: 1, batch: 24775, sum loss: 5433.223145, avg loss: 3.050659, ppl: 21.129257 +epoch: 1, batch: 24776, sum loss: 5166.890625, avg loss: 2.873688, ppl: 17.702179 +epoch: 1, batch: 24777, sum loss: 3713.148682, avg loss: 2.525952, ppl: 12.502788 +epoch: 1, batch: 24778, sum loss: 5033.577148, avg loss: 2.639527, ppl: 14.006572 +epoch: 1, batch: 24779, sum loss: 3795.229004, avg loss: 2.364629, ppl: 10.640086 +epoch: 1, batch: 24780, sum loss: 4696.969238, avg loss: 2.707187, ppl: 14.987057 +epoch: 1, batch: 24781, sum loss: 5475.619629, avg loss: 2.869822, ppl: 17.633871 +epoch: 1, batch: 24782, sum loss: 4321.609375, avg loss: 2.664371, ppl: 14.358912 +epoch: 1, batch: 24783, sum loss: 4413.915527, avg loss: 2.901983, ppl: 18.210217 +epoch: 1, batch: 24784, sum loss: 4614.786133, avg loss: 2.644577, ppl: 14.077483 +epoch: 1, batch: 24785, sum loss: 4323.750977, avg loss: 2.695606, ppl: 14.814487 +epoch: 1, batch: 24786, sum loss: 4534.715332, avg loss: 2.715399, ppl: 15.110631 +epoch: 1, batch: 24787, sum loss: 3680.507080, avg loss: 2.405560, ppl: 11.084639 +epoch: 1, batch: 24788, sum loss: 3363.448730, avg loss: 2.418008, ppl: 11.223478 +epoch: 1, batch: 24789, sum loss: 4327.301758, avg loss: 2.591199, ppl: 13.345759 +epoch: 1, batch: 24790, sum loss: 4580.672852, avg loss: 2.675627, ppl: 14.521445 +epoch: 1, batch: 24791, sum loss: 4268.757812, avg loss: 2.746948, ppl: 15.594971 +epoch: 1, batch: 24792, sum loss: 4391.258301, avg loss: 2.411454, ppl: 11.150167 +epoch: 1, batch: 24793, sum loss: 3808.291260, avg loss: 2.574909, ppl: 13.130128 +epoch: 1, batch: 24794, sum loss: 4258.063965, avg loss: 2.537583, ppl: 12.649059 +epoch: 1, batch: 24795, sum loss: 4512.116211, avg loss: 2.557889, ppl: 12.908539 +epoch: 1, batch: 24796, sum loss: 4733.062500, avg loss: 2.754984, ppl: 15.720792 +epoch: 1, batch: 24797, sum loss: 5371.782715, avg loss: 2.774681, ppl: 16.033512 +epoch: 1, batch: 24798, sum loss: 4583.603516, avg loss: 2.657162, ppl: 14.255770 +epoch: 1, batch: 24799, sum loss: 4172.301270, avg loss: 2.561265, ppl: 12.952194 +epoch: 1, batch: 24800, sum loss: 4390.715820, avg loss: 2.419127, ppl: 11.236049 +epoch: 1, batch: 24801, sum loss: 4771.288574, avg loss: 2.717135, ppl: 15.136892 +epoch: 1, batch: 24802, sum loss: 3897.999023, avg loss: 2.372489, ppl: 10.724049 +epoch: 1, batch: 24803, sum loss: 3870.493896, avg loss: 2.459018, ppl: 11.693317 +epoch: 1, batch: 24804, sum loss: 5066.881348, avg loss: 2.722666, ppl: 15.220847 +epoch: 1, batch: 24805, sum loss: 2650.621094, avg loss: 2.088748, ppl: 8.074799 +epoch: 1, batch: 24806, sum loss: 3401.968018, avg loss: 2.300181, ppl: 9.975989 +epoch: 1, batch: 24807, sum loss: 4900.944336, avg loss: 2.826381, ppl: 16.884241 +epoch: 1, batch: 24808, sum loss: 4206.819336, avg loss: 2.650800, ppl: 14.165366 +epoch: 1, batch: 24809, sum loss: 4226.216309, avg loss: 2.433055, ppl: 11.393636 +epoch: 1, batch: 24810, sum loss: 4950.218750, avg loss: 2.707997, ppl: 14.999204 +epoch: 1, batch: 24811, sum loss: 4636.791504, avg loss: 2.680226, ppl: 14.588395 +epoch: 1, batch: 24812, sum loss: 4543.301758, avg loss: 2.740230, ppl: 15.490553 +epoch: 1, batch: 24813, sum loss: 4069.132812, avg loss: 2.654359, ppl: 14.215876 +epoch: 1, batch: 24814, sum loss: 4794.564453, avg loss: 2.708794, ppl: 15.011156 +epoch: 1, batch: 24815, sum loss: 3562.352539, avg loss: 2.241883, ppl: 9.411038 +epoch: 1, batch: 24816, sum loss: 4951.365723, avg loss: 2.705664, ppl: 14.964255 +epoch: 1, batch: 24817, sum loss: 3872.672363, avg loss: 2.606105, ppl: 13.546186 +epoch: 1, batch: 24818, sum loss: 3728.582031, avg loss: 2.362853, ppl: 10.621211 +epoch: 1, batch: 24819, sum loss: 4887.529297, avg loss: 2.777005, ppl: 16.070820 +epoch: 1, batch: 24820, sum loss: 4856.054688, avg loss: 2.630582, ppl: 13.881848 +epoch: 1, batch: 24821, sum loss: 4784.276855, avg loss: 2.693850, ppl: 14.788496 +epoch: 1, batch: 24822, sum loss: 4912.891602, avg loss: 3.021458, ppl: 20.521198 +epoch: 1, batch: 24823, sum loss: 4760.819336, avg loss: 2.655226, ppl: 14.228194 +epoch: 1, batch: 24824, sum loss: 4677.024414, avg loss: 2.836279, ppl: 17.052198 +epoch: 1, batch: 24825, sum loss: 4922.850098, avg loss: 2.805043, ppl: 16.527782 +epoch: 1, batch: 24826, sum loss: 3943.051025, avg loss: 2.483030, ppl: 11.977499 +epoch: 1, batch: 24827, sum loss: 3554.059814, avg loss: 2.495829, ppl: 12.131783 +epoch: 1, batch: 24828, sum loss: 4526.840820, avg loss: 2.667555, ppl: 14.404708 +epoch: 1, batch: 24829, sum loss: 3869.693848, avg loss: 2.378423, ppl: 10.787874 +epoch: 1, batch: 24830, sum loss: 5002.002441, avg loss: 2.542960, ppl: 12.717257 +epoch: 1, batch: 24831, sum loss: 3944.089355, avg loss: 2.579522, ppl: 13.190836 +epoch: 1, batch: 24832, sum loss: 3674.806152, avg loss: 2.266999, ppl: 9.650401 +epoch: 1, batch: 24833, sum loss: 4316.343262, avg loss: 2.680959, ppl: 14.599080 +epoch: 1, batch: 24834, sum loss: 4571.947754, avg loss: 2.780990, ppl: 16.134989 +epoch: 1, batch: 24835, sum loss: 4669.036133, avg loss: 2.417937, ppl: 11.222684 +epoch: 1, batch: 24836, sum loss: 4357.724609, avg loss: 2.701627, ppl: 14.903965 +epoch: 1, batch: 24837, sum loss: 4925.711914, avg loss: 2.963726, ppl: 19.370001 +epoch: 1, batch: 24838, sum loss: 4842.762207, avg loss: 2.674082, ppl: 14.499034 +epoch: 1, batch: 24839, sum loss: 4675.489258, avg loss: 2.631114, ppl: 13.889231 +epoch: 1, batch: 24840, sum loss: 3679.420898, avg loss: 2.556929, ppl: 12.896151 +epoch: 1, batch: 24841, sum loss: 4106.526367, avg loss: 2.613957, ppl: 13.652971 +epoch: 1, batch: 24842, sum loss: 4139.085449, avg loss: 2.719504, ppl: 15.172800 +epoch: 1, batch: 24843, sum loss: 3902.897217, avg loss: 2.448493, ppl: 11.570890 +epoch: 1, batch: 24844, sum loss: 4163.683594, avg loss: 2.546595, ppl: 12.763574 +epoch: 1, batch: 24845, sum loss: 5431.918945, avg loss: 2.857401, ppl: 17.416201 +epoch: 1, batch: 24846, sum loss: 5423.877930, avg loss: 3.120758, ppl: 22.663559 +epoch: 1, batch: 24847, sum loss: 4267.605957, avg loss: 2.469679, ppl: 11.818657 +epoch: 1, batch: 24848, sum loss: 4554.292480, avg loss: 2.723859, ppl: 15.239021 +epoch: 1, batch: 24849, sum loss: 3565.751953, avg loss: 2.374002, ppl: 10.740294 +epoch: 1, batch: 24850, sum loss: 3924.588135, avg loss: 2.512540, ppl: 12.336228 +epoch: 1, batch: 24851, sum loss: 5410.439453, avg loss: 2.752004, ppl: 15.674009 +epoch: 1, batch: 24852, sum loss: 4425.558594, avg loss: 2.586534, ppl: 13.283645 +epoch: 1, batch: 24853, sum loss: 3930.326904, avg loss: 2.451857, ppl: 11.609887 +epoch: 1, batch: 24854, sum loss: 3991.980957, avg loss: 2.498111, ppl: 12.159500 +epoch: 1, batch: 24855, sum loss: 5895.910645, avg loss: 2.936211, ppl: 18.844303 +epoch: 1, batch: 24856, sum loss: 3601.742432, avg loss: 2.489110, ppl: 12.050550 +epoch: 1, batch: 24857, sum loss: 4818.699219, avg loss: 2.785375, ppl: 16.205896 +epoch: 1, batch: 24858, sum loss: 5425.113770, avg loss: 2.916728, ppl: 18.480715 +epoch: 1, batch: 24859, sum loss: 3509.210938, avg loss: 2.425163, ppl: 11.304072 +epoch: 1, batch: 24860, sum loss: 3833.777100, avg loss: 2.547360, ppl: 12.773343 +epoch: 1, batch: 24861, sum loss: 4896.446289, avg loss: 2.853407, ppl: 17.346781 +epoch: 1, batch: 24862, sum loss: 5136.876953, avg loss: 2.781200, ppl: 16.138382 +epoch: 1, batch: 24863, sum loss: 4317.517090, avg loss: 2.698448, ppl: 14.856659 +epoch: 1, batch: 24864, sum loss: 4058.305176, avg loss: 2.599811, ppl: 13.461195 +epoch: 1, batch: 24865, sum loss: 4878.808594, avg loss: 2.743987, ppl: 15.548853 +epoch: 1, batch: 24866, sum loss: 3612.122070, avg loss: 2.438975, ppl: 11.461288 +epoch: 1, batch: 24867, sum loss: 5249.108398, avg loss: 3.034167, ppl: 20.783649 +epoch: 1, batch: 24868, sum loss: 3818.949219, avg loss: 2.363211, ppl: 10.625015 +epoch: 1, batch: 24869, sum loss: 4582.336426, avg loss: 2.709838, ppl: 15.026847 +epoch: 1, batch: 24870, sum loss: 4553.076660, avg loss: 2.619722, ppl: 13.731904 +epoch: 1, batch: 24871, sum loss: 5036.009277, avg loss: 2.710446, ppl: 15.035985 +epoch: 1, batch: 24872, sum loss: 4333.744141, avg loss: 2.675151, ppl: 14.514540 +epoch: 1, batch: 24873, sum loss: 4194.777344, avg loss: 2.597385, ppl: 13.428582 +epoch: 1, batch: 24874, sum loss: 4718.756348, avg loss: 2.594149, ppl: 13.385187 +epoch: 1, batch: 24875, sum loss: 5113.095215, avg loss: 2.910128, ppl: 18.359150 +epoch: 1, batch: 24876, sum loss: 4052.967529, avg loss: 2.652466, ppl: 14.188980 +epoch: 1, batch: 24877, sum loss: 4997.437500, avg loss: 2.925900, ppl: 18.651005 +epoch: 1, batch: 24878, sum loss: 5140.692383, avg loss: 2.678839, ppl: 14.568172 +epoch: 1, batch: 24879, sum loss: 3761.635498, avg loss: 2.494453, ppl: 12.115108 +epoch: 1, batch: 24880, sum loss: 4301.607422, avg loss: 2.483607, ppl: 11.984415 +epoch: 1, batch: 24881, sum loss: 3854.511475, avg loss: 2.599131, ppl: 13.452047 +epoch: 1, batch: 24882, sum loss: 4212.638184, avg loss: 2.480941, ppl: 11.952510 +epoch: 1, batch: 24883, sum loss: 4056.981934, avg loss: 2.360083, ppl: 10.591826 +epoch: 1, batch: 24884, sum loss: 3568.791992, avg loss: 2.335597, ppl: 10.335627 +epoch: 1, batch: 24885, sum loss: 5148.921875, avg loss: 2.754907, ppl: 15.719584 +epoch: 1, batch: 24886, sum loss: 4474.776855, avg loss: 2.567284, ppl: 13.030391 +epoch: 1, batch: 24887, sum loss: 3923.333984, avg loss: 2.557584, ppl: 12.904603 +epoch: 1, batch: 24888, sum loss: 4246.065430, avg loss: 2.612963, ppl: 13.639407 +epoch: 1, batch: 24889, sum loss: 4233.375977, avg loss: 2.644207, ppl: 14.072289 +epoch: 1, batch: 24890, sum loss: 4020.357422, avg loss: 2.500222, ppl: 12.185202 +epoch: 1, batch: 24891, sum loss: 4137.032715, avg loss: 2.530295, ppl: 12.557212 +epoch: 1, batch: 24892, sum loss: 4094.924072, avg loss: 2.545012, ppl: 12.743378 +epoch: 1, batch: 24893, sum loss: 3780.199463, avg loss: 2.503443, ppl: 12.224517 +epoch: 1, batch: 24894, sum loss: 4005.646484, avg loss: 2.681156, ppl: 14.601959 +epoch: 1, batch: 24895, sum loss: 4841.455078, avg loss: 2.823006, ppl: 16.827353 +epoch: 1, batch: 24896, sum loss: 3438.205811, avg loss: 2.367910, ppl: 10.675062 +epoch: 1, batch: 24897, sum loss: 3585.223633, avg loss: 2.464071, ppl: 11.752563 +epoch: 1, batch: 24898, sum loss: 4881.550781, avg loss: 2.859725, ppl: 17.456730 +epoch: 1, batch: 24899, sum loss: 4361.915039, avg loss: 2.607242, ppl: 13.561591 +epoch: 1, batch: 24900, sum loss: 5838.167969, avg loss: 3.024958, ppl: 20.593132 +epoch: 1, batch: 24901, sum loss: 3585.529541, avg loss: 2.489951, ppl: 12.060687 +epoch: 1, batch: 24902, sum loss: 4345.277832, avg loss: 2.578800, ppl: 13.181311 +epoch: 1, batch: 24903, sum loss: 4797.620117, avg loss: 2.722826, ppl: 15.223289 +epoch: 1, batch: 24904, sum loss: 4629.013672, avg loss: 2.790243, ppl: 16.284983 +epoch: 1, batch: 24905, sum loss: 3222.347168, avg loss: 2.388693, ppl: 10.899240 +epoch: 1, batch: 24906, sum loss: 4772.097168, avg loss: 2.709879, ppl: 15.027459 +epoch: 1, batch: 24907, sum loss: 3676.330078, avg loss: 2.279188, ppl: 9.768744 +epoch: 1, batch: 24908, sum loss: 4849.052246, avg loss: 2.687945, ppl: 14.701428 +epoch: 1, batch: 24909, sum loss: 4071.468750, avg loss: 2.588346, ppl: 13.307746 +epoch: 1, batch: 24910, sum loss: 3842.557129, avg loss: 2.479069, ppl: 11.930155 +epoch: 1, batch: 24911, sum loss: 4385.260254, avg loss: 2.463629, ppl: 11.747371 +epoch: 1, batch: 24912, sum loss: 4270.222656, avg loss: 2.570874, ppl: 13.077255 +epoch: 1, batch: 24913, sum loss: 3947.344727, avg loss: 2.409856, ppl: 11.132361 +epoch: 1, batch: 24914, sum loss: 4198.083984, avg loss: 2.577093, ppl: 13.158825 +epoch: 1, batch: 24915, sum loss: 4407.095703, avg loss: 2.404308, ppl: 11.070763 +epoch: 1, batch: 24916, sum loss: 4685.591797, avg loss: 2.494990, ppl: 12.121616 +epoch: 1, batch: 24917, sum loss: 4425.467773, avg loss: 2.601686, ppl: 13.486454 +epoch: 1, batch: 24918, sum loss: 4490.934082, avg loss: 2.477073, ppl: 11.906368 +epoch: 1, batch: 24919, sum loss: 3655.029785, avg loss: 2.635205, ppl: 13.946175 +epoch: 1, batch: 24920, sum loss: 4124.767578, avg loss: 2.384259, ppl: 10.851016 +epoch: 1, batch: 24921, sum loss: 4204.004883, avg loss: 2.458482, ppl: 11.687060 +epoch: 1, batch: 24922, sum loss: 5222.186523, avg loss: 2.807627, ppl: 16.570553 +epoch: 1, batch: 24923, sum loss: 4476.056152, avg loss: 2.707838, ppl: 14.996815 +epoch: 1, batch: 24924, sum loss: 4643.725586, avg loss: 2.674957, ppl: 14.511726 +epoch: 1, batch: 24925, sum loss: 5787.992188, avg loss: 2.770700, ppl: 15.969809 +epoch: 1, batch: 24926, sum loss: 4202.418945, avg loss: 2.616699, ppl: 13.690459 +epoch: 1, batch: 24927, sum loss: 5430.926758, avg loss: 2.884188, ppl: 17.889044 +epoch: 1, batch: 24928, sum loss: 4776.189453, avg loss: 2.662313, ppl: 14.329394 +epoch: 1, batch: 24929, sum loss: 4194.936035, avg loss: 2.537771, ppl: 12.651443 +epoch: 1, batch: 24930, sum loss: 4353.414062, avg loss: 2.591318, ppl: 13.347350 +epoch: 1, batch: 24931, sum loss: 5373.160645, avg loss: 2.879507, ppl: 17.805494 +epoch: 1, batch: 24932, sum loss: 4453.102539, avg loss: 2.708700, ppl: 15.009746 +epoch: 1, batch: 24933, sum loss: 4515.743652, avg loss: 2.626960, ppl: 13.831655 +epoch: 1, batch: 24934, sum loss: 4346.143555, avg loss: 2.568643, ppl: 13.048104 +epoch: 1, batch: 24935, sum loss: 3969.667236, avg loss: 2.572694, ppl: 13.101075 +epoch: 1, batch: 24936, sum loss: 4308.758301, avg loss: 2.687934, ppl: 14.701270 +epoch: 1, batch: 24937, sum loss: 3646.630859, avg loss: 2.439218, ppl: 11.464070 +epoch: 1, batch: 24938, sum loss: 3943.118652, avg loss: 2.362564, ppl: 10.618137 +epoch: 1, batch: 24939, sum loss: 4901.087891, avg loss: 2.800622, ppl: 16.454870 +epoch: 1, batch: 24940, sum loss: 3626.419922, avg loss: 2.627841, ppl: 13.843843 +epoch: 1, batch: 24941, sum loss: 4324.151367, avg loss: 2.593972, ppl: 13.382822 +epoch: 1, batch: 24942, sum loss: 3725.530273, avg loss: 2.244295, ppl: 9.433765 +epoch: 1, batch: 24943, sum loss: 4133.344727, avg loss: 2.564110, ppl: 12.989087 +epoch: 1, batch: 24944, sum loss: 4446.198730, avg loss: 2.722718, ppl: 15.221639 +epoch: 1, batch: 24945, sum loss: 5322.011230, avg loss: 2.938714, ppl: 18.891541 +epoch: 1, batch: 24946, sum loss: 3836.173096, avg loss: 2.320734, ppl: 10.183144 +epoch: 1, batch: 24947, sum loss: 3812.804443, avg loss: 2.482295, ppl: 11.968696 +epoch: 1, batch: 24948, sum loss: 3856.265869, avg loss: 2.407157, ppl: 11.102354 +epoch: 1, batch: 24949, sum loss: 4718.513672, avg loss: 2.806968, ppl: 16.559637 +epoch: 1, batch: 24950, sum loss: 4289.879883, avg loss: 2.558068, ppl: 12.910847 +epoch: 1, batch: 24951, sum loss: 4723.233887, avg loss: 2.770225, ppl: 15.962230 +epoch: 1, batch: 24952, sum loss: 4426.661621, avg loss: 2.622430, ppl: 13.769143 +epoch: 1, batch: 24953, sum loss: 4773.812500, avg loss: 2.759429, ppl: 15.790827 +epoch: 1, batch: 24954, sum loss: 4110.320312, avg loss: 2.987151, ppl: 19.829117 +epoch: 1, batch: 24955, sum loss: 4646.439941, avg loss: 2.568513, ppl: 13.046412 +epoch: 1, batch: 24956, sum loss: 3055.224609, avg loss: 2.115806, ppl: 8.296272 +epoch: 1, batch: 24957, sum loss: 4661.608887, avg loss: 2.750212, ppl: 15.645944 +epoch: 1, batch: 24958, sum loss: 4756.396973, avg loss: 2.802827, ppl: 16.491199 +epoch: 1, batch: 24959, sum loss: 4968.068848, avg loss: 2.816365, ppl: 16.715986 +epoch: 1, batch: 24960, sum loss: 4014.432129, avg loss: 2.411070, ppl: 11.145885 +epoch: 1, batch: 24961, sum loss: 4386.467285, avg loss: 2.639270, ppl: 14.002982 +epoch: 1, batch: 24962, sum loss: 4962.812500, avg loss: 2.592901, ppl: 13.368497 +epoch: 1, batch: 24963, sum loss: 3389.313477, avg loss: 2.034402, ppl: 7.647676 +epoch: 1, batch: 24964, sum loss: 4689.429688, avg loss: 2.743961, ppl: 15.548456 +epoch: 1, batch: 24965, sum loss: 4700.892090, avg loss: 2.564589, ppl: 12.995319 +epoch: 1, batch: 24966, sum loss: 5114.182617, avg loss: 2.877987, ppl: 17.778448 +epoch: 1, batch: 24967, sum loss: 5096.652832, avg loss: 2.623085, ppl: 13.778157 +epoch: 1, batch: 24968, sum loss: 3665.604980, avg loss: 2.380263, ppl: 10.807743 +epoch: 1, batch: 24969, sum loss: 5031.743164, avg loss: 2.719861, ppl: 15.178216 +epoch: 1, batch: 24970, sum loss: 5123.018555, avg loss: 2.639371, ppl: 14.004388 +epoch: 1, batch: 24971, sum loss: 4405.611328, avg loss: 2.389160, ppl: 10.904332 +epoch: 1, batch: 24972, sum loss: 4148.531738, avg loss: 2.362490, ppl: 10.617352 +epoch: 1, batch: 24973, sum loss: 4168.084473, avg loss: 2.675279, ppl: 14.516397 +epoch: 1, batch: 24974, sum loss: 5360.120117, avg loss: 2.761525, ppl: 15.823955 +epoch: 1, batch: 24975, sum loss: 4314.792969, avg loss: 2.660168, ppl: 14.298694 +epoch: 1, batch: 24976, sum loss: 4045.687012, avg loss: 2.623662, ppl: 13.786119 +epoch: 1, batch: 24977, sum loss: 4087.419189, avg loss: 2.613439, ppl: 13.645903 +epoch: 1, batch: 24978, sum loss: 4814.706055, avg loss: 2.729425, ppl: 15.324076 +epoch: 1, batch: 24979, sum loss: 4435.946777, avg loss: 2.480955, ppl: 11.952669 +epoch: 1, batch: 24980, sum loss: 5054.263184, avg loss: 2.811047, ppl: 16.627323 +epoch: 1, batch: 24981, sum loss: 4220.505371, avg loss: 2.590857, ppl: 13.341194 +epoch: 1, batch: 24982, sum loss: 4485.163086, avg loss: 2.780634, ppl: 16.129250 +epoch: 1, batch: 24983, sum loss: 3790.626221, avg loss: 2.495475, ppl: 12.127490 +epoch: 1, batch: 24984, sum loss: 4527.952637, avg loss: 2.536668, ppl: 12.637496 +epoch: 1, batch: 24985, sum loss: 4138.908691, avg loss: 2.627878, ppl: 13.844367 +epoch: 1, batch: 24986, sum loss: 5405.439453, avg loss: 2.895254, ppl: 18.088099 +epoch: 1, batch: 24987, sum loss: 4064.354980, avg loss: 2.331816, ppl: 10.296620 +epoch: 1, batch: 24988, sum loss: 3183.976562, avg loss: 2.334294, ppl: 10.322165 +epoch: 1, batch: 24989, sum loss: 5357.452148, avg loss: 2.746003, ppl: 15.580239 +epoch: 1, batch: 24990, sum loss: 4913.750977, avg loss: 2.726832, ppl: 15.284388 +epoch: 1, batch: 24991, sum loss: 5134.335938, avg loss: 2.922217, ppl: 18.582445 +epoch: 1, batch: 24992, sum loss: 4091.006592, avg loss: 2.649616, ppl: 14.148602 +epoch: 1, batch: 24993, sum loss: 4714.635254, avg loss: 2.798003, ppl: 16.411842 +epoch: 1, batch: 24994, sum loss: 4728.868164, avg loss: 2.752543, ppl: 15.682458 +epoch: 1, batch: 24995, sum loss: 5011.692871, avg loss: 2.734148, ppl: 15.396617 +epoch: 1, batch: 24996, sum loss: 4423.840820, avg loss: 2.569013, ppl: 13.052937 +epoch: 1, batch: 24997, sum loss: 3774.092041, avg loss: 2.445944, ppl: 11.541443 +epoch: 1, batch: 24998, sum loss: 4608.174805, avg loss: 2.501724, ppl: 12.203512 +epoch: 1, batch: 24999, sum loss: 4230.008789, avg loss: 2.455025, ppl: 11.646729 +epoch: 1, batch: 25000, sum loss: 4466.926758, avg loss: 2.854266, ppl: 17.361692 +epoch: 1, batch: 25001, sum loss: 5094.250000, avg loss: 2.833287, ppl: 17.001247 +epoch: 1, batch: 25002, sum loss: 4080.191406, avg loss: 2.561326, ppl: 12.952976 +epoch: 1, batch: 25003, sum loss: 4516.595215, avg loss: 2.663087, ppl: 14.340488 +epoch: 1, batch: 25004, sum loss: 3902.862305, avg loss: 2.384156, ppl: 10.849896 +epoch: 1, batch: 25005, sum loss: 4312.602539, avg loss: 2.565498, ppl: 13.007135 +epoch: 1, batch: 25006, sum loss: 5737.832031, avg loss: 3.037497, ppl: 20.852983 +epoch: 1, batch: 25007, sum loss: 4752.338379, avg loss: 2.579988, ppl: 13.196983 +epoch: 1, batch: 25008, sum loss: 5260.180176, avg loss: 2.803934, ppl: 16.509470 +epoch: 1, batch: 25009, sum loss: 4773.584473, avg loss: 2.740289, ppl: 15.491468 +epoch: 1, batch: 25010, sum loss: 4491.984863, avg loss: 2.492778, ppl: 12.094824 +epoch: 1, batch: 25011, sum loss: 3551.736816, avg loss: 2.510061, ppl: 12.305684 +epoch: 1, batch: 25012, sum loss: 4785.984863, avg loss: 2.530928, ppl: 12.565163 +epoch: 1, batch: 25013, sum loss: 3808.487549, avg loss: 2.724240, ppl: 15.244824 +epoch: 1, batch: 25014, sum loss: 4808.418945, avg loss: 2.860451, ppl: 17.469412 +epoch: 1, batch: 25015, sum loss: 4814.945312, avg loss: 2.666083, ppl: 14.383513 +epoch: 1, batch: 25016, sum loss: 5088.784668, avg loss: 2.657329, ppl: 14.258152 +epoch: 1, batch: 25017, sum loss: 4212.537109, avg loss: 2.669542, ppl: 14.433354 +epoch: 1, batch: 25018, sum loss: 3921.415527, avg loss: 2.549685, ppl: 12.803070 +epoch: 1, batch: 25019, sum loss: 3778.737793, avg loss: 2.563594, ppl: 12.982393 +epoch: 1, batch: 25020, sum loss: 4303.753906, avg loss: 2.500729, ppl: 12.191374 +epoch: 1, batch: 25021, sum loss: 4766.547363, avg loss: 2.665854, ppl: 14.380228 +epoch: 1, batch: 25022, sum loss: 3322.064453, avg loss: 2.176975, ppl: 8.819591 +epoch: 1, batch: 25023, sum loss: 4917.273438, avg loss: 2.852247, ppl: 17.326672 +epoch: 1, batch: 25024, sum loss: 5166.016113, avg loss: 2.765533, ppl: 15.887509 +epoch: 1, batch: 25025, sum loss: 5142.749023, avg loss: 2.868237, ppl: 17.605955 +epoch: 1, batch: 25026, sum loss: 3853.080078, avg loss: 2.457321, ppl: 11.673501 +epoch: 1, batch: 25027, sum loss: 4270.181152, avg loss: 2.622961, ppl: 13.776460 +epoch: 1, batch: 25028, sum loss: 4801.815918, avg loss: 2.726755, ppl: 15.283215 +epoch: 1, batch: 25029, sum loss: 3763.737549, avg loss: 2.579669, ppl: 13.192777 +epoch: 1, batch: 25030, sum loss: 3440.922363, avg loss: 2.095568, ppl: 8.130057 +epoch: 1, batch: 25031, sum loss: 4271.429199, avg loss: 2.391618, ppl: 10.931164 +epoch: 1, batch: 25032, sum loss: 4030.092773, avg loss: 2.576786, ppl: 13.154785 +epoch: 1, batch: 25033, sum loss: 3912.920654, avg loss: 2.436439, ppl: 11.432256 +epoch: 1, batch: 25034, sum loss: 4640.646484, avg loss: 2.709075, ppl: 15.015387 +epoch: 1, batch: 25035, sum loss: 4474.372559, avg loss: 2.578889, ppl: 13.182486 +epoch: 1, batch: 25036, sum loss: 5423.623047, avg loss: 2.863581, ppl: 17.524176 +epoch: 1, batch: 25037, sum loss: 4294.697266, avg loss: 2.718163, ppl: 15.152458 +epoch: 1, batch: 25038, sum loss: 3723.939941, avg loss: 2.529850, ppl: 12.551629 +epoch: 1, batch: 25039, sum loss: 3740.545166, avg loss: 2.617596, ppl: 13.702748 +epoch: 1, batch: 25040, sum loss: 4952.321289, avg loss: 2.661108, ppl: 14.312135 +epoch: 1, batch: 25041, sum loss: 4824.956055, avg loss: 2.651075, ppl: 14.169261 +epoch: 1, batch: 25042, sum loss: 4449.640625, avg loss: 2.538301, ppl: 12.658140 +epoch: 1, batch: 25043, sum loss: 3981.525879, avg loss: 2.510420, ppl: 12.310098 +epoch: 1, batch: 25044, sum loss: 4232.603027, avg loss: 2.821735, ppl: 16.805990 +epoch: 1, batch: 25045, sum loss: 5154.735840, avg loss: 2.704478, ppl: 14.946517 +epoch: 1, batch: 25046, sum loss: 4543.908691, avg loss: 2.679192, ppl: 14.573307 +epoch: 1, batch: 25047, sum loss: 4439.641113, avg loss: 2.522523, ppl: 12.459995 +epoch: 1, batch: 25048, sum loss: 4559.366211, avg loss: 2.853170, ppl: 17.342678 +epoch: 1, batch: 25049, sum loss: 4653.922852, avg loss: 2.737602, ppl: 15.449888 +epoch: 1, batch: 25050, sum loss: 4203.334473, avg loss: 2.656975, ppl: 14.253105 +epoch: 1, batch: 25051, sum loss: 4112.793457, avg loss: 2.537195, ppl: 12.644156 +epoch: 1, batch: 25052, sum loss: 3701.995117, avg loss: 2.393016, ppl: 10.946455 +epoch: 1, batch: 25053, sum loss: 4191.540039, avg loss: 2.695524, ppl: 14.813282 +epoch: 1, batch: 25054, sum loss: 4089.952148, avg loss: 2.557819, ppl: 12.907631 +epoch: 1, batch: 25055, sum loss: 4581.089355, avg loss: 2.705900, ppl: 14.967788 +epoch: 1, batch: 25056, sum loss: 5121.347656, avg loss: 2.962029, ppl: 19.337162 +epoch: 1, batch: 25057, sum loss: 3396.673828, avg loss: 2.345769, ppl: 10.441303 +epoch: 1, batch: 25058, sum loss: 3907.347168, avg loss: 2.455906, ppl: 11.656995 +epoch: 1, batch: 25059, sum loss: 4182.827637, avg loss: 2.578809, ppl: 13.181424 +epoch: 1, batch: 25060, sum loss: 4827.469238, avg loss: 2.782403, ppl: 16.157801 +epoch: 1, batch: 25061, sum loss: 4931.196777, avg loss: 2.814610, ppl: 16.686670 +epoch: 1, batch: 25062, sum loss: 4206.391113, avg loss: 2.590142, ppl: 13.331668 +epoch: 1, batch: 25063, sum loss: 4306.346191, avg loss: 2.552665, ppl: 12.841283 +epoch: 1, batch: 25064, sum loss: 3839.206055, avg loss: 2.666115, ppl: 14.383983 +epoch: 1, batch: 25065, sum loss: 4605.340820, avg loss: 2.792808, ppl: 16.326805 +epoch: 1, batch: 25066, sum loss: 4076.984863, avg loss: 2.502753, ppl: 12.216079 +epoch: 1, batch: 25067, sum loss: 3992.308838, avg loss: 2.412271, ppl: 11.159278 +epoch: 1, batch: 25068, sum loss: 4326.076660, avg loss: 2.741493, ppl: 15.510132 +epoch: 1, batch: 25069, sum loss: 4616.213379, avg loss: 2.652996, ppl: 14.196512 +epoch: 1, batch: 25070, sum loss: 3432.814453, avg loss: 2.246606, ppl: 9.455592 +epoch: 1, batch: 25071, sum loss: 4857.477539, avg loss: 2.967305, ppl: 19.439453 +epoch: 1, batch: 25072, sum loss: 4531.441406, avg loss: 2.440195, ppl: 11.475274 +epoch: 1, batch: 25073, sum loss: 4965.426270, avg loss: 2.716316, ppl: 15.124508 +epoch: 1, batch: 25074, sum loss: 4422.898438, avg loss: 2.510158, ppl: 12.306875 +epoch: 1, batch: 25075, sum loss: 3332.182617, avg loss: 2.484849, ppl: 11.999308 +epoch: 1, batch: 25076, sum loss: 5441.331055, avg loss: 2.984823, ppl: 19.782991 +epoch: 1, batch: 25077, sum loss: 4568.858398, avg loss: 2.665612, ppl: 14.376741 +epoch: 1, batch: 25078, sum loss: 3937.200195, avg loss: 2.522229, ppl: 12.456336 +epoch: 1, batch: 25079, sum loss: 4620.400879, avg loss: 2.681602, ppl: 14.608484 +epoch: 1, batch: 25080, sum loss: 5257.885254, avg loss: 2.640826, ppl: 14.024786 +epoch: 1, batch: 25081, sum loss: 4488.160156, avg loss: 2.723398, ppl: 15.231996 +epoch: 1, batch: 25082, sum loss: 4883.996094, avg loss: 2.733070, ppl: 15.380033 +epoch: 1, batch: 25083, sum loss: 4168.605469, avg loss: 2.677332, ppl: 14.546227 +epoch: 1, batch: 25084, sum loss: 4295.588867, avg loss: 2.679718, ppl: 14.580988 +epoch: 1, batch: 25085, sum loss: 4342.457520, avg loss: 2.617515, ppl: 13.701633 +epoch: 1, batch: 25086, sum loss: 3848.545410, avg loss: 2.335282, ppl: 10.332376 +epoch: 1, batch: 25087, sum loss: 3852.258789, avg loss: 2.491759, ppl: 12.082505 +epoch: 1, batch: 25088, sum loss: 4964.397949, avg loss: 2.745795, ppl: 15.576997 +epoch: 1, batch: 25089, sum loss: 4447.609863, avg loss: 2.600941, ppl: 13.476419 +epoch: 1, batch: 25090, sum loss: 3128.123535, avg loss: 1.987372, ppl: 7.296333 +epoch: 1, batch: 25091, sum loss: 3842.085449, avg loss: 2.470795, ppl: 11.831845 +epoch: 1, batch: 25092, sum loss: 4221.164062, avg loss: 2.605657, ppl: 13.540116 +epoch: 1, batch: 25093, sum loss: 3800.055664, avg loss: 2.442195, ppl: 11.498254 +epoch: 1, batch: 25094, sum loss: 4714.575684, avg loss: 2.736260, ppl: 15.429171 +epoch: 1, batch: 25095, sum loss: 4268.545898, avg loss: 2.625182, ppl: 13.807086 +epoch: 1, batch: 25096, sum loss: 4094.956299, avg loss: 2.523078, ppl: 12.466916 +epoch: 1, batch: 25097, sum loss: 3904.678955, avg loss: 2.535506, ppl: 12.622813 +epoch: 1, batch: 25098, sum loss: 4423.134766, avg loss: 2.735396, ppl: 15.415842 +epoch: 1, batch: 25099, sum loss: 4836.812988, avg loss: 2.749752, ppl: 15.638746 +epoch: 1, batch: 25100, sum loss: 3787.499756, avg loss: 2.345201, ppl: 10.435370 +epoch: 1, batch: 25101, sum loss: 5534.461914, avg loss: 2.806522, ppl: 16.552252 +epoch: 1, batch: 25102, sum loss: 4043.777832, avg loss: 2.578940, ppl: 13.183155 +epoch: 1, batch: 25103, sum loss: 4280.020508, avg loss: 2.575223, ppl: 13.134245 +epoch: 1, batch: 25104, sum loss: 4513.895996, avg loss: 2.737354, ppl: 15.446054 +epoch: 1, batch: 25105, sum loss: 3798.614258, avg loss: 2.471447, ppl: 11.839569 +epoch: 1, batch: 25106, sum loss: 3841.154541, avg loss: 2.691769, ppl: 14.757761 +epoch: 1, batch: 25107, sum loss: 5221.278809, avg loss: 2.831496, ppl: 16.970835 +epoch: 1, batch: 25108, sum loss: 3698.756348, avg loss: 2.556155, ppl: 12.886174 +epoch: 1, batch: 25109, sum loss: 5656.299316, avg loss: 2.872676, ppl: 17.684280 +epoch: 1, batch: 25110, sum loss: 5009.171875, avg loss: 2.776703, ppl: 16.065962 +epoch: 1, batch: 25111, sum loss: 4126.334961, avg loss: 2.705793, ppl: 14.966186 +epoch: 1, batch: 25112, sum loss: 5075.428223, avg loss: 2.782581, ppl: 16.160683 +epoch: 1, batch: 25113, sum loss: 5237.498535, avg loss: 2.685897, ppl: 14.671350 +epoch: 1, batch: 25114, sum loss: 5043.125000, avg loss: 2.629367, ppl: 13.864985 +epoch: 1, batch: 25115, sum loss: 3747.839111, avg loss: 2.535751, ppl: 12.625905 +epoch: 1, batch: 25116, sum loss: 4895.619141, avg loss: 2.750348, ppl: 15.648074 +epoch: 1, batch: 25117, sum loss: 3884.957275, avg loss: 2.327716, ppl: 10.254490 +epoch: 1, batch: 25118, sum loss: 4282.393066, avg loss: 2.674824, ppl: 14.509796 +epoch: 1, batch: 25119, sum loss: 4212.549805, avg loss: 2.621375, ppl: 13.754624 +epoch: 1, batch: 25120, sum loss: 4740.455566, avg loss: 2.688857, ppl: 14.714852 +epoch: 1, batch: 25121, sum loss: 4079.725342, avg loss: 2.337951, ppl: 10.359992 +epoch: 1, batch: 25122, sum loss: 4948.961914, avg loss: 2.741807, ppl: 15.514998 +epoch: 1, batch: 25123, sum loss: 3924.626465, avg loss: 2.337478, ppl: 10.355092 +epoch: 1, batch: 25124, sum loss: 4909.309082, avg loss: 2.516304, ppl: 12.382746 +epoch: 1, batch: 25125, sum loss: 5022.797852, avg loss: 2.749205, ppl: 15.630202 +epoch: 1, batch: 25126, sum loss: 4796.954590, avg loss: 2.816767, ppl: 16.722702 +epoch: 1, batch: 25127, sum loss: 4875.956055, avg loss: 2.686477, ppl: 14.679871 +epoch: 1, batch: 25128, sum loss: 4466.700195, avg loss: 2.730257, ppl: 15.336828 +epoch: 1, batch: 25129, sum loss: 4669.952148, avg loss: 2.677725, ppl: 14.551948 +epoch: 1, batch: 25130, sum loss: 5304.577637, avg loss: 2.679080, ppl: 14.571674 +epoch: 1, batch: 25131, sum loss: 4162.415039, avg loss: 2.611302, ppl: 13.616764 +epoch: 1, batch: 25132, sum loss: 4531.960449, avg loss: 2.634861, ppl: 13.941371 +epoch: 1, batch: 25133, sum loss: 4905.699219, avg loss: 2.623369, ppl: 13.782070 +epoch: 1, batch: 25134, sum loss: 4712.399902, avg loss: 2.593506, ppl: 13.376586 +epoch: 1, batch: 25135, sum loss: 4313.107422, avg loss: 2.625141, ppl: 13.806526 +epoch: 1, batch: 25136, sum loss: 3209.002441, avg loss: 2.236238, ppl: 9.358065 +epoch: 1, batch: 25137, sum loss: 4433.249023, avg loss: 2.581974, ppl: 13.223212 +epoch: 1, batch: 25138, sum loss: 5477.647949, avg loss: 2.941809, ppl: 18.950094 +epoch: 1, batch: 25139, sum loss: 3805.452148, avg loss: 2.495378, ppl: 12.126323 +epoch: 1, batch: 25140, sum loss: 3262.633789, avg loss: 2.464225, ppl: 11.754367 +epoch: 1, batch: 25141, sum loss: 4752.368652, avg loss: 2.677391, ppl: 14.547084 +epoch: 1, batch: 25142, sum loss: 4251.392090, avg loss: 2.590732, ppl: 13.339537 +epoch: 1, batch: 25143, sum loss: 5384.178711, avg loss: 2.801342, ppl: 16.466726 +epoch: 1, batch: 25144, sum loss: 5419.458984, avg loss: 2.676276, ppl: 14.530879 +epoch: 1, batch: 25145, sum loss: 4229.909180, avg loss: 2.395192, ppl: 10.970304 +epoch: 1, batch: 25146, sum loss: 4917.177734, avg loss: 2.681122, ppl: 14.601464 +epoch: 1, batch: 25147, sum loss: 4498.411621, avg loss: 2.618400, ppl: 13.713769 +epoch: 1, batch: 25148, sum loss: 4997.900391, avg loss: 2.826867, ppl: 16.892452 +epoch: 1, batch: 25149, sum loss: 4198.772949, avg loss: 2.677789, ppl: 14.552877 +epoch: 1, batch: 25150, sum loss: 4742.494141, avg loss: 2.559360, ppl: 12.927542 +epoch: 1, batch: 25151, sum loss: 4492.145508, avg loss: 2.639333, ppl: 14.003867 +epoch: 1, batch: 25152, sum loss: 4397.960449, avg loss: 2.855819, ppl: 17.388664 +epoch: 1, batch: 25153, sum loss: 4798.595703, avg loss: 2.667368, ppl: 14.402020 +epoch: 1, batch: 25154, sum loss: 4055.839600, avg loss: 2.519155, ppl: 12.418099 +epoch: 1, batch: 25155, sum loss: 3897.742676, avg loss: 2.638959, ppl: 13.998630 +epoch: 1, batch: 25156, sum loss: 3731.628662, avg loss: 2.519668, ppl: 12.424476 +epoch: 1, batch: 25157, sum loss: 5009.411621, avg loss: 2.664581, ppl: 14.361925 +epoch: 1, batch: 25158, sum loss: 3918.791748, avg loss: 2.549637, ppl: 12.802454 +epoch: 1, batch: 25159, sum loss: 3914.446289, avg loss: 2.488523, ppl: 12.043472 +epoch: 1, batch: 25160, sum loss: 4728.260742, avg loss: 2.496442, ppl: 12.139224 +epoch: 1, batch: 25161, sum loss: 4257.511230, avg loss: 2.472422, ppl: 11.851120 +epoch: 1, batch: 25162, sum loss: 4196.193848, avg loss: 2.337713, ppl: 10.357517 +epoch: 1, batch: 25163, sum loss: 4263.685059, avg loss: 2.622192, ppl: 13.765871 +epoch: 1, batch: 25164, sum loss: 5439.038574, avg loss: 2.900820, ppl: 18.189064 +epoch: 1, batch: 25165, sum loss: 3790.188477, avg loss: 2.428052, ppl: 11.336773 +epoch: 1, batch: 25166, sum loss: 4619.076660, avg loss: 2.680834, ppl: 14.597260 +epoch: 1, batch: 25167, sum loss: 4060.771484, avg loss: 2.701777, ppl: 14.906203 +epoch: 1, batch: 25168, sum loss: 4887.634766, avg loss: 2.810601, ppl: 16.619900 +epoch: 1, batch: 25169, sum loss: 4611.626465, avg loss: 2.887681, ppl: 17.951632 +epoch: 1, batch: 25170, sum loss: 4193.416016, avg loss: 2.575808, ppl: 13.141935 +epoch: 1, batch: 25171, sum loss: 4528.190918, avg loss: 2.695352, ppl: 14.810729 +epoch: 1, batch: 25172, sum loss: 5055.994141, avg loss: 2.793367, ppl: 16.335934 +epoch: 1, batch: 25173, sum loss: 5162.513672, avg loss: 2.796595, ppl: 16.388742 +epoch: 1, batch: 25174, sum loss: 4059.880371, avg loss: 2.546976, ppl: 12.768437 +epoch: 1, batch: 25175, sum loss: 4472.725098, avg loss: 2.663922, ppl: 14.352470 +epoch: 1, batch: 25176, sum loss: 4584.897949, avg loss: 2.662542, ppl: 14.332682 +epoch: 1, batch: 25177, sum loss: 4069.342529, avg loss: 2.359039, ppl: 10.580782 +epoch: 1, batch: 25178, sum loss: 5420.642578, avg loss: 2.811537, ppl: 16.635464 +epoch: 1, batch: 25179, sum loss: 4686.733398, avg loss: 2.537484, ppl: 12.647814 +epoch: 1, batch: 25180, sum loss: 5989.196777, avg loss: 2.897531, ppl: 18.129330 +epoch: 1, batch: 25181, sum loss: 5769.605957, avg loss: 3.047863, ppl: 21.070280 +epoch: 1, batch: 25182, sum loss: 3935.922607, avg loss: 2.505361, ppl: 12.247984 +epoch: 1, batch: 25183, sum loss: 4000.916260, avg loss: 2.426268, ppl: 11.316571 +epoch: 1, batch: 25184, sum loss: 4670.062500, avg loss: 2.734229, ppl: 15.397861 +epoch: 1, batch: 25185, sum loss: 4765.919434, avg loss: 2.762852, ppl: 15.844968 +epoch: 1, batch: 25186, sum loss: 3664.962158, avg loss: 2.438431, ppl: 11.455057 +epoch: 1, batch: 25187, sum loss: 4059.857422, avg loss: 2.602473, ppl: 13.497072 +epoch: 1, batch: 25188, sum loss: 4081.893066, avg loss: 2.412466, ppl: 11.161454 +epoch: 1, batch: 25189, sum loss: 4869.581055, avg loss: 2.643638, ppl: 14.064279 +epoch: 1, batch: 25190, sum loss: 4696.772461, avg loss: 2.757940, ppl: 15.767333 +epoch: 1, batch: 25191, sum loss: 4104.545410, avg loss: 2.566945, ppl: 13.025970 +epoch: 1, batch: 25192, sum loss: 5032.389160, avg loss: 2.746937, ppl: 15.594796 +epoch: 1, batch: 25193, sum loss: 4652.403320, avg loss: 2.852485, ppl: 17.330799 +epoch: 1, batch: 25194, sum loss: 3411.768555, avg loss: 2.277549, ppl: 9.752748 +epoch: 1, batch: 25195, sum loss: 4819.910156, avg loss: 2.877558, ppl: 17.770832 +epoch: 1, batch: 25196, sum loss: 4990.005859, avg loss: 2.770686, ppl: 15.969587 +epoch: 1, batch: 25197, sum loss: 3744.048828, avg loss: 2.372654, ppl: 10.725825 +epoch: 1, batch: 25198, sum loss: 4057.826660, avg loss: 2.504831, ppl: 12.241494 +epoch: 1, batch: 25199, sum loss: 4891.596680, avg loss: 2.716045, ppl: 15.120398 +epoch: 1, batch: 25200, sum loss: 4052.442627, avg loss: 2.433900, ppl: 11.403264 +epoch: 1, batch: 25201, sum loss: 4133.130859, avg loss: 2.479383, ppl: 11.933893 +epoch: 1, batch: 25202, sum loss: 3830.626221, avg loss: 2.479370, ppl: 11.933743 +epoch: 1, batch: 25203, sum loss: 4337.617188, avg loss: 2.566637, ppl: 13.021961 +epoch: 1, batch: 25204, sum loss: 4168.580078, avg loss: 2.440621, ppl: 11.480164 +epoch: 1, batch: 25205, sum loss: 3516.763184, avg loss: 2.321296, ppl: 10.188870 +epoch: 1, batch: 25206, sum loss: 3630.308105, avg loss: 2.397826, ppl: 10.999235 +epoch: 1, batch: 25207, sum loss: 4383.288574, avg loss: 2.612210, ppl: 13.629138 +epoch: 1, batch: 25208, sum loss: 3937.255371, avg loss: 2.816349, ppl: 16.715706 +epoch: 1, batch: 25209, sum loss: 4267.493652, avg loss: 2.380086, ppl: 10.805829 +epoch: 1, batch: 25210, sum loss: 4429.794922, avg loss: 2.343807, ppl: 10.420833 +epoch: 1, batch: 25211, sum loss: 3851.915039, avg loss: 2.530825, ppl: 12.563863 +epoch: 1, batch: 25212, sum loss: 4616.447266, avg loss: 2.651607, ppl: 14.176800 +epoch: 1, batch: 25213, sum loss: 3782.860840, avg loss: 2.380655, ppl: 10.811983 +epoch: 1, batch: 25214, sum loss: 3793.050537, avg loss: 2.740644, ppl: 15.496954 +epoch: 1, batch: 25215, sum loss: 5527.675781, avg loss: 2.834705, ppl: 17.025383 +epoch: 1, batch: 25216, sum loss: 4367.067871, avg loss: 2.474259, ppl: 11.872910 +epoch: 1, batch: 25217, sum loss: 4795.213379, avg loss: 2.824036, ppl: 16.844706 +epoch: 1, batch: 25218, sum loss: 4571.018066, avg loss: 2.563667, ppl: 12.983341 +epoch: 1, batch: 25219, sum loss: 4316.565430, avg loss: 2.661261, ppl: 14.314329 +epoch: 1, batch: 25220, sum loss: 4957.657227, avg loss: 2.748147, ppl: 15.613673 +epoch: 1, batch: 25221, sum loss: 4367.159180, avg loss: 2.349198, ppl: 10.477162 +epoch: 1, batch: 25222, sum loss: 3949.086670, avg loss: 2.668302, ppl: 14.415468 +epoch: 1, batch: 25223, sum loss: 3620.048584, avg loss: 2.580220, ppl: 13.200042 +epoch: 1, batch: 25224, sum loss: 4443.521484, avg loss: 2.524728, ppl: 12.487499 +epoch: 1, batch: 25225, sum loss: 4065.400879, avg loss: 2.462387, ppl: 11.732785 +epoch: 1, batch: 25226, sum loss: 5143.523926, avg loss: 2.530017, ppl: 12.553716 +epoch: 1, batch: 25227, sum loss: 4889.820801, avg loss: 2.792588, ppl: 16.323206 +epoch: 1, batch: 25228, sum loss: 3827.018799, avg loss: 2.511167, ppl: 12.319302 +epoch: 1, batch: 25229, sum loss: 3541.595703, avg loss: 2.375316, ppl: 10.754410 +epoch: 1, batch: 25230, sum loss: 4186.276367, avg loss: 2.435297, ppl: 11.419215 +epoch: 1, batch: 25231, sum loss: 4665.453613, avg loss: 2.625466, ppl: 13.811013 +epoch: 1, batch: 25232, sum loss: 4834.381348, avg loss: 2.813959, ppl: 16.675806 +epoch: 1, batch: 25233, sum loss: 4632.268555, avg loss: 2.590754, ppl: 13.339829 +epoch: 1, batch: 25234, sum loss: 4680.627930, avg loss: 2.912650, ppl: 18.405502 +epoch: 1, batch: 25235, sum loss: 3596.191406, avg loss: 2.378434, ppl: 10.787992 +epoch: 1, batch: 25236, sum loss: 5146.663086, avg loss: 2.851337, ppl: 17.310911 +epoch: 1, batch: 25237, sum loss: 4497.988770, avg loss: 2.648992, ppl: 14.139783 +epoch: 1, batch: 25238, sum loss: 4622.457031, avg loss: 2.453534, ppl: 11.629369 +epoch: 1, batch: 25239, sum loss: 4823.804688, avg loss: 2.794788, ppl: 16.359165 +epoch: 1, batch: 25240, sum loss: 4536.410645, avg loss: 2.462764, ppl: 11.737206 +epoch: 1, batch: 25241, sum loss: 3964.895508, avg loss: 2.603346, ppl: 13.508858 +epoch: 1, batch: 25242, sum loss: 3856.388184, avg loss: 2.478399, ppl: 11.922156 +epoch: 1, batch: 25243, sum loss: 4441.033203, avg loss: 2.562627, ppl: 12.969849 +epoch: 1, batch: 25244, sum loss: 4131.781738, avg loss: 2.623353, ppl: 13.781863 +epoch: 1, batch: 25245, sum loss: 4419.576172, avg loss: 2.583037, ppl: 13.237281 +epoch: 1, batch: 25246, sum loss: 4375.429688, avg loss: 2.658220, ppl: 14.270862 +epoch: 1, batch: 25247, sum loss: 4131.620117, avg loss: 2.545669, ppl: 12.751751 +epoch: 1, batch: 25248, sum loss: 4589.570312, avg loss: 2.803647, ppl: 16.504734 +epoch: 1, batch: 25249, sum loss: 5001.674805, avg loss: 2.642195, ppl: 14.043993 +epoch: 1, batch: 25250, sum loss: 4111.560547, avg loss: 2.669844, ppl: 14.437722 +epoch: 1, batch: 25251, sum loss: 5400.742188, avg loss: 2.930408, ppl: 18.735277 +epoch: 1, batch: 25252, sum loss: 3775.904053, avg loss: 2.400448, ppl: 11.028111 +epoch: 1, batch: 25253, sum loss: 4069.587402, avg loss: 2.481456, ppl: 11.958661 +epoch: 1, batch: 25254, sum loss: 4237.836914, avg loss: 2.663631, ppl: 14.348292 +epoch: 1, batch: 25255, sum loss: 3955.054688, avg loss: 2.591779, ppl: 13.353506 +epoch: 1, batch: 25256, sum loss: 4376.730469, avg loss: 2.720156, ppl: 15.182686 +epoch: 1, batch: 25257, sum loss: 3847.010254, avg loss: 2.461299, ppl: 11.720025 +epoch: 1, batch: 25258, sum loss: 4888.232910, avg loss: 2.885616, ppl: 17.914593 +epoch: 1, batch: 25259, sum loss: 3943.552979, avg loss: 2.441828, ppl: 11.494039 +epoch: 1, batch: 25260, sum loss: 3777.465088, avg loss: 2.389288, ppl: 10.905731 +epoch: 1, batch: 25261, sum loss: 4906.712402, avg loss: 2.786322, ppl: 16.221243 +epoch: 1, batch: 25262, sum loss: 4763.648438, avg loss: 2.712784, ppl: 15.071177 +epoch: 1, batch: 25263, sum loss: 5260.077637, avg loss: 2.739624, ppl: 15.481163 +epoch: 1, batch: 25264, sum loss: 4670.700684, avg loss: 2.672026, ppl: 14.469250 +epoch: 1, batch: 25265, sum loss: 4934.358887, avg loss: 2.806803, ppl: 16.556892 +epoch: 1, batch: 25266, sum loss: 4911.201172, avg loss: 2.626311, ppl: 13.822681 +epoch: 1, batch: 25267, sum loss: 4126.522461, avg loss: 2.756528, ppl: 15.745079 +epoch: 1, batch: 25268, sum loss: 4405.403809, avg loss: 2.597526, ppl: 13.430468 +epoch: 1, batch: 25269, sum loss: 3508.135498, avg loss: 2.409434, ppl: 11.127660 +epoch: 1, batch: 25270, sum loss: 4170.169922, avg loss: 2.453041, ppl: 11.623644 +epoch: 1, batch: 25271, sum loss: 4883.409180, avg loss: 2.551415, ppl: 12.825244 +epoch: 1, batch: 25272, sum loss: 4505.291504, avg loss: 2.701014, ppl: 14.894828 +epoch: 1, batch: 25273, sum loss: 4916.709473, avg loss: 2.935349, ppl: 18.828077 +epoch: 1, batch: 25274, sum loss: 4645.637695, avg loss: 2.766907, ppl: 15.909357 +epoch: 1, batch: 25275, sum loss: 5087.667969, avg loss: 2.925629, ppl: 18.645945 +epoch: 1, batch: 25276, sum loss: 4869.759766, avg loss: 2.800322, ppl: 16.449944 +epoch: 1, batch: 25277, sum loss: 4719.253906, avg loss: 2.707547, ppl: 14.992450 +epoch: 1, batch: 25278, sum loss: 4818.136719, avg loss: 2.611456, ppl: 13.618868 +epoch: 1, batch: 25279, sum loss: 5138.530762, avg loss: 2.700226, ppl: 14.883100 +epoch: 1, batch: 25280, sum loss: 4416.910156, avg loss: 2.784937, ppl: 16.198799 +epoch: 1, batch: 25281, sum loss: 5006.319336, avg loss: 2.868951, ppl: 17.618525 +epoch: 1, batch: 25282, sum loss: 4039.887207, avg loss: 2.555273, ppl: 12.874818 +epoch: 1, batch: 25283, sum loss: 4064.239258, avg loss: 2.562572, ppl: 12.969131 +epoch: 1, batch: 25284, sum loss: 4099.111328, avg loss: 2.644588, ppl: 14.077643 +epoch: 1, batch: 25285, sum loss: 4734.510254, avg loss: 2.730398, ppl: 15.338990 +epoch: 1, batch: 25286, sum loss: 5293.600586, avg loss: 2.995812, ppl: 20.001602 +epoch: 1, batch: 25287, sum loss: 4980.004395, avg loss: 2.875291, ppl: 17.730585 +epoch: 1, batch: 25288, sum loss: 3019.771484, avg loss: 2.048692, ppl: 7.757745 +epoch: 1, batch: 25289, sum loss: 4470.469727, avg loss: 2.689813, ppl: 14.728927 +epoch: 1, batch: 25290, sum loss: 3910.888184, avg loss: 2.551134, ppl: 12.821633 +epoch: 1, batch: 25291, sum loss: 4725.139648, avg loss: 2.795941, ppl: 16.378027 +epoch: 1, batch: 25292, sum loss: 3867.173584, avg loss: 2.432185, ppl: 11.383725 +epoch: 1, batch: 25293, sum loss: 4146.980469, avg loss: 2.591863, ppl: 13.354624 +epoch: 1, batch: 25294, sum loss: 4312.249023, avg loss: 2.708699, ppl: 15.009735 +epoch: 1, batch: 25295, sum loss: 4766.899902, avg loss: 2.547782, ppl: 12.778728 +epoch: 1, batch: 25296, sum loss: 3915.802246, avg loss: 2.432175, ppl: 11.383616 +epoch: 1, batch: 25297, sum loss: 4765.615234, avg loss: 2.786909, ppl: 16.230778 +epoch: 1, batch: 25298, sum loss: 4459.129395, avg loss: 2.651088, ppl: 14.169440 +epoch: 1, batch: 25299, sum loss: 3920.718262, avg loss: 2.516507, ppl: 12.385264 +epoch: 1, batch: 25300, sum loss: 4588.914062, avg loss: 2.756105, ppl: 15.738418 +epoch: 1, batch: 25301, sum loss: 4524.534180, avg loss: 2.562024, ppl: 12.962025 +epoch: 1, batch: 25302, sum loss: 4548.829102, avg loss: 2.564165, ppl: 12.989809 +epoch: 1, batch: 25303, sum loss: 5388.228516, avg loss: 2.751904, ppl: 15.672448 +epoch: 1, batch: 25304, sum loss: 4840.865234, avg loss: 2.727248, ppl: 15.290749 +epoch: 1, batch: 25305, sum loss: 4449.274414, avg loss: 2.295807, ppl: 9.932452 +epoch: 1, batch: 25306, sum loss: 5072.713379, avg loss: 2.695384, ppl: 14.811209 +epoch: 1, batch: 25307, sum loss: 4903.772461, avg loss: 2.797360, ppl: 16.401297 +epoch: 1, batch: 25308, sum loss: 4420.270508, avg loss: 2.742103, ppl: 15.519594 +epoch: 1, batch: 25309, sum loss: 4103.038086, avg loss: 2.580527, ppl: 13.204096 +epoch: 1, batch: 25310, sum loss: 5114.444336, avg loss: 2.982183, ppl: 19.730846 +epoch: 1, batch: 25311, sum loss: 3712.115234, avg loss: 2.601342, ppl: 13.481821 +epoch: 1, batch: 25312, sum loss: 5008.095215, avg loss: 3.004256, ppl: 20.171207 +epoch: 1, batch: 25313, sum loss: 5045.935547, avg loss: 2.679732, ppl: 14.581186 +epoch: 1, batch: 25314, sum loss: 4464.877930, avg loss: 2.637258, ppl: 13.974836 +epoch: 1, batch: 25315, sum loss: 4271.078125, avg loss: 2.425371, ppl: 11.306422 +epoch: 1, batch: 25316, sum loss: 5596.996582, avg loss: 2.918142, ppl: 18.506870 +epoch: 1, batch: 25317, sum loss: 4571.519531, avg loss: 2.630333, ppl: 13.878397 +epoch: 1, batch: 25318, sum loss: 4234.417969, avg loss: 2.555472, ppl: 12.877381 +epoch: 1, batch: 25319, sum loss: 3174.580078, avg loss: 2.408634, ppl: 11.118767 +epoch: 1, batch: 25320, sum loss: 4203.056152, avg loss: 2.525875, ppl: 12.501831 +epoch: 1, batch: 25321, sum loss: 3864.647461, avg loss: 2.503010, ppl: 12.219222 +epoch: 1, batch: 25322, sum loss: 4100.271973, avg loss: 2.674672, ppl: 14.507589 +epoch: 1, batch: 25323, sum loss: 4962.246094, avg loss: 2.679399, ppl: 14.576324 +epoch: 1, batch: 25324, sum loss: 5291.652832, avg loss: 2.734704, ppl: 15.405187 +epoch: 1, batch: 25325, sum loss: 4180.330078, avg loss: 2.596478, ppl: 13.416405 +epoch: 1, batch: 25326, sum loss: 4667.749023, avg loss: 2.721720, ppl: 15.206448 +epoch: 1, batch: 25327, sum loss: 4566.239258, avg loss: 2.689187, ppl: 14.719705 +epoch: 1, batch: 25328, sum loss: 4979.993652, avg loss: 2.591048, ppl: 13.343745 +epoch: 1, batch: 25329, sum loss: 5054.870605, avg loss: 2.862328, ppl: 17.502218 +epoch: 1, batch: 25330, sum loss: 4452.035156, avg loss: 2.773854, ppl: 16.020250 +epoch: 1, batch: 25331, sum loss: 4683.084961, avg loss: 2.790873, ppl: 16.295240 +epoch: 1, batch: 25332, sum loss: 4792.580566, avg loss: 2.752775, ppl: 15.686096 +epoch: 1, batch: 25333, sum loss: 4170.260742, avg loss: 2.770937, ppl: 15.973601 +epoch: 1, batch: 25334, sum loss: 5207.183105, avg loss: 2.878487, ppl: 17.787346 +epoch: 1, batch: 25335, sum loss: 5370.415527, avg loss: 2.765405, ppl: 15.885468 +epoch: 1, batch: 25336, sum loss: 3738.040283, avg loss: 2.565573, ppl: 13.008113 +epoch: 1, batch: 25337, sum loss: 5061.336914, avg loss: 2.592898, ppl: 13.368459 +epoch: 1, batch: 25338, sum loss: 4672.728027, avg loss: 2.546446, ppl: 12.761666 +epoch: 1, batch: 25339, sum loss: 3694.151123, avg loss: 2.464410, ppl: 11.756547 +epoch: 1, batch: 25340, sum loss: 3488.849121, avg loss: 2.290774, ppl: 9.882587 +epoch: 1, batch: 25341, sum loss: 4010.885010, avg loss: 2.640477, ppl: 14.019892 +epoch: 1, batch: 25342, sum loss: 4410.910645, avg loss: 2.521961, ppl: 12.452999 +epoch: 1, batch: 25343, sum loss: 3848.901855, avg loss: 2.540529, ppl: 12.686383 +epoch: 1, batch: 25344, sum loss: 4016.606689, avg loss: 2.589688, ppl: 13.325618 +epoch: 1, batch: 25345, sum loss: 4246.632812, avg loss: 2.589410, ppl: 13.321914 +epoch: 1, batch: 25346, sum loss: 5195.397949, avg loss: 2.747434, ppl: 15.602547 +epoch: 1, batch: 25347, sum loss: 5075.963379, avg loss: 2.872645, ppl: 17.683727 +epoch: 1, batch: 25348, sum loss: 4810.078125, avg loss: 2.809625, ppl: 16.603689 +epoch: 1, batch: 25349, sum loss: 3608.223877, avg loss: 2.478176, ppl: 11.919502 +epoch: 1, batch: 25350, sum loss: 3643.909424, avg loss: 2.290327, ppl: 9.878163 +epoch: 1, batch: 25351, sum loss: 4694.451660, avg loss: 2.833103, ppl: 16.998123 +epoch: 1, batch: 25352, sum loss: 4238.133301, avg loss: 2.678972, ppl: 14.570101 +epoch: 1, batch: 25353, sum loss: 3982.693359, avg loss: 2.707474, ppl: 14.991352 +epoch: 1, batch: 25354, sum loss: 5372.765625, avg loss: 2.983212, ppl: 19.751165 +epoch: 1, batch: 25355, sum loss: 3437.375732, avg loss: 2.425812, ppl: 11.311414 +epoch: 1, batch: 25356, sum loss: 3858.947266, avg loss: 2.480043, ppl: 11.941780 +epoch: 1, batch: 25357, sum loss: 6006.778320, avg loss: 3.080399, ppl: 21.767086 +epoch: 1, batch: 25358, sum loss: 4830.048828, avg loss: 2.813075, ppl: 16.661066 +epoch: 1, batch: 25359, sum loss: 4782.417969, avg loss: 2.694320, ppl: 14.795450 +epoch: 1, batch: 25360, sum loss: 4777.346680, avg loss: 2.648197, ppl: 14.128537 +epoch: 1, batch: 25361, sum loss: 4775.821289, avg loss: 2.640034, ppl: 14.013680 +epoch: 1, batch: 25362, sum loss: 4333.057129, avg loss: 2.732066, ppl: 15.364604 +epoch: 1, batch: 25363, sum loss: 5163.879883, avg loss: 2.700774, ppl: 14.891253 +epoch: 1, batch: 25364, sum loss: 4483.364258, avg loss: 2.764096, ppl: 15.864696 +epoch: 1, batch: 25365, sum loss: 3857.702393, avg loss: 2.385716, ppl: 10.866840 +epoch: 1, batch: 25366, sum loss: 3841.735352, avg loss: 2.512580, ppl: 12.336722 +epoch: 1, batch: 25367, sum loss: 4735.018555, avg loss: 2.738588, ppl: 15.465135 +epoch: 1, batch: 25368, sum loss: 4296.724121, avg loss: 2.618357, ppl: 13.713177 +epoch: 1, batch: 25369, sum loss: 4206.175781, avg loss: 2.388515, ppl: 10.897305 +epoch: 1, batch: 25370, sum loss: 4776.900391, avg loss: 2.601798, ppl: 13.487962 +epoch: 1, batch: 25371, sum loss: 5515.797363, avg loss: 2.825716, ppl: 16.873022 +epoch: 1, batch: 25372, sum loss: 5148.555176, avg loss: 2.715483, ppl: 15.111907 +epoch: 1, batch: 25373, sum loss: 4176.558594, avg loss: 2.678998, ppl: 14.570493 +epoch: 1, batch: 25374, sum loss: 4341.056641, avg loss: 2.467912, ppl: 11.797784 +epoch: 1, batch: 25375, sum loss: 5131.935059, avg loss: 2.675670, ppl: 14.522075 +epoch: 1, batch: 25376, sum loss: 3918.317383, avg loss: 2.586348, ppl: 13.281184 +epoch: 1, batch: 25377, sum loss: 4522.297852, avg loss: 2.717727, ppl: 15.145860 +epoch: 1, batch: 25378, sum loss: 3512.061768, avg loss: 2.478519, ppl: 11.923594 +epoch: 1, batch: 25379, sum loss: 3497.607910, avg loss: 2.562350, ppl: 12.966256 +epoch: 1, batch: 25380, sum loss: 4054.638184, avg loss: 2.515284, ppl: 12.370125 +epoch: 1, batch: 25381, sum loss: 4240.912109, avg loss: 2.559392, ppl: 12.927951 +epoch: 1, batch: 25382, sum loss: 5069.771484, avg loss: 2.966513, ppl: 19.424078 +epoch: 1, batch: 25383, sum loss: 4222.489746, avg loss: 2.792652, ppl: 16.324257 +epoch: 1, batch: 25384, sum loss: 4242.475586, avg loss: 2.610754, ppl: 13.609312 +epoch: 1, batch: 25385, sum loss: 4971.330078, avg loss: 2.768001, ppl: 15.926769 +epoch: 1, batch: 25386, sum loss: 4113.868652, avg loss: 2.464870, ppl: 11.761958 +epoch: 1, batch: 25387, sum loss: 4763.279785, avg loss: 2.551301, ppl: 12.823783 +epoch: 1, batch: 25388, sum loss: 3328.050781, avg loss: 2.457940, ppl: 11.680723 +epoch: 1, batch: 25389, sum loss: 4633.687500, avg loss: 2.741827, ppl: 15.515306 +epoch: 1, batch: 25390, sum loss: 4837.478516, avg loss: 2.617683, ppl: 13.703938 +epoch: 1, batch: 25391, sum loss: 4032.243896, avg loss: 2.363566, ppl: 10.628788 +epoch: 1, batch: 25392, sum loss: 4782.819336, avg loss: 2.737733, ppl: 15.451914 +epoch: 1, batch: 25393, sum loss: 4448.644531, avg loss: 2.558162, ppl: 12.912069 +epoch: 1, batch: 25394, sum loss: 4437.410156, avg loss: 2.542928, ppl: 12.716857 +epoch: 1, batch: 25395, sum loss: 3731.399902, avg loss: 2.461346, ppl: 11.720573 +epoch: 1, batch: 25396, sum loss: 5070.770508, avg loss: 2.823369, ppl: 16.833467 +epoch: 1, batch: 25397, sum loss: 3980.990479, avg loss: 2.485013, ppl: 12.001273 +epoch: 1, batch: 25398, sum loss: 4894.796875, avg loss: 2.642979, ppl: 14.055010 +epoch: 1, batch: 25399, sum loss: 4098.295410, avg loss: 2.618719, ppl: 13.718141 +epoch: 1, batch: 25400, sum loss: 4243.877930, avg loss: 2.486162, ppl: 12.015070 +epoch: 1, batch: 25401, sum loss: 4459.099609, avg loss: 2.621458, ppl: 13.755759 +epoch: 1, batch: 25402, sum loss: 4165.597168, avg loss: 2.557150, ppl: 12.899001 +epoch: 1, batch: 25403, sum loss: 4248.413086, avg loss: 2.519818, ppl: 12.426335 +epoch: 1, batch: 25404, sum loss: 4276.415527, avg loss: 2.476211, ppl: 11.896100 +epoch: 1, batch: 25405, sum loss: 4254.696777, avg loss: 2.566162, ppl: 13.015776 +epoch: 1, batch: 25406, sum loss: 4141.194824, avg loss: 2.561036, ppl: 12.949224 +epoch: 1, batch: 25407, sum loss: 4721.909180, avg loss: 2.603037, ppl: 13.504691 +epoch: 1, batch: 25408, sum loss: 5557.077148, avg loss: 2.904902, ppl: 18.263449 +epoch: 1, batch: 25409, sum loss: 4726.498047, avg loss: 2.823476, ppl: 16.835262 +epoch: 1, batch: 25410, sum loss: 4729.521973, avg loss: 2.783709, ppl: 16.178923 +epoch: 1, batch: 25411, sum loss: 4387.095215, avg loss: 2.644421, ppl: 14.075298 +epoch: 1, batch: 25412, sum loss: 4189.973633, avg loss: 2.583214, ppl: 13.239626 +epoch: 1, batch: 25413, sum loss: 4435.069824, avg loss: 2.514212, ppl: 12.356867 +epoch: 1, batch: 25414, sum loss: 3929.453613, avg loss: 2.488571, ppl: 12.044052 +epoch: 1, batch: 25415, sum loss: 4066.046631, avg loss: 2.675031, ppl: 14.512795 +epoch: 1, batch: 25416, sum loss: 3652.481689, avg loss: 2.317564, ppl: 10.150922 +epoch: 1, batch: 25417, sum loss: 4954.224609, avg loss: 2.732612, ppl: 15.372984 +epoch: 1, batch: 25418, sum loss: 4850.441895, avg loss: 2.820024, ppl: 16.777258 +epoch: 1, batch: 25419, sum loss: 3544.807129, avg loss: 2.141877, ppl: 8.515409 +epoch: 1, batch: 25420, sum loss: 4676.311523, avg loss: 2.790162, ppl: 16.283659 +epoch: 1, batch: 25421, sum loss: 4273.115234, avg loss: 2.391223, ppl: 10.926848 +epoch: 1, batch: 25422, sum loss: 3147.181641, avg loss: 2.540098, ppl: 12.680916 +epoch: 1, batch: 25423, sum loss: 3729.620605, avg loss: 2.429720, ppl: 11.355704 +epoch: 1, batch: 25424, sum loss: 4544.484375, avg loss: 2.619299, ppl: 13.726104 +epoch: 1, batch: 25425, sum loss: 4106.188965, avg loss: 2.488599, ppl: 12.044394 +epoch: 1, batch: 25426, sum loss: 4914.482910, avg loss: 2.731786, ppl: 15.360296 +epoch: 1, batch: 25427, sum loss: 4533.984863, avg loss: 2.604242, ppl: 13.520970 +epoch: 1, batch: 25428, sum loss: 4266.022949, avg loss: 2.743423, ppl: 15.540092 +epoch: 1, batch: 25429, sum loss: 3877.820801, avg loss: 2.398158, ppl: 11.002886 +epoch: 1, batch: 25430, sum loss: 4216.877930, avg loss: 2.619179, ppl: 13.724448 +epoch: 1, batch: 25431, sum loss: 4218.926270, avg loss: 2.575657, ppl: 13.139946 +epoch: 1, batch: 25432, sum loss: 4419.413574, avg loss: 2.853075, ppl: 17.341028 +epoch: 1, batch: 25433, sum loss: 4351.692383, avg loss: 2.696216, ppl: 14.823531 +epoch: 1, batch: 25434, sum loss: 4720.873047, avg loss: 2.747889, ppl: 15.609642 +epoch: 1, batch: 25435, sum loss: 4468.897461, avg loss: 2.779165, ppl: 16.105568 +epoch: 1, batch: 25436, sum loss: 4528.261719, avg loss: 2.599461, ppl: 13.456488 +epoch: 1, batch: 25437, sum loss: 4021.463379, avg loss: 2.502466, ppl: 12.212579 +epoch: 1, batch: 25438, sum loss: 4053.299072, avg loss: 2.288706, ppl: 9.862171 +epoch: 1, batch: 25439, sum loss: 3877.499512, avg loss: 2.593645, ppl: 13.378448 +epoch: 1, batch: 25440, sum loss: 4206.552246, avg loss: 2.511375, ppl: 12.321857 +epoch: 1, batch: 25441, sum loss: 3482.951904, avg loss: 2.634608, ppl: 13.937852 +epoch: 1, batch: 25442, sum loss: 3865.437500, avg loss: 2.669501, ppl: 14.432766 +epoch: 1, batch: 25443, sum loss: 4616.808105, avg loss: 2.860476, ppl: 17.469841 +epoch: 1, batch: 25444, sum loss: 5050.388184, avg loss: 2.779520, ppl: 16.111290 +epoch: 1, batch: 25445, sum loss: 4787.117188, avg loss: 2.751217, ppl: 15.661675 +epoch: 1, batch: 25446, sum loss: 4880.904297, avg loss: 2.754461, ppl: 15.712566 +epoch: 1, batch: 25447, sum loss: 3592.657227, avg loss: 2.354297, ppl: 10.530725 +epoch: 1, batch: 25448, sum loss: 4896.392578, avg loss: 2.801140, ppl: 16.463406 +epoch: 1, batch: 25449, sum loss: 4664.517578, avg loss: 2.726194, ppl: 15.274639 +epoch: 1, batch: 25450, sum loss: 3679.674072, avg loss: 2.440102, ppl: 11.474213 +epoch: 1, batch: 25451, sum loss: 4838.615723, avg loss: 2.712229, ppl: 15.062810 +epoch: 1, batch: 25452, sum loss: 5232.211914, avg loss: 2.822121, ppl: 16.812466 +epoch: 1, batch: 25453, sum loss: 4191.350098, avg loss: 2.540212, ppl: 12.682362 +epoch: 1, batch: 25454, sum loss: 4502.002930, avg loss: 2.843969, ppl: 17.183830 +epoch: 1, batch: 25455, sum loss: 3582.355469, avg loss: 2.377144, ppl: 10.774084 +epoch: 1, batch: 25456, sum loss: 2956.185059, avg loss: 2.197907, ppl: 9.006145 +epoch: 1, batch: 25457, sum loss: 4336.159668, avg loss: 2.660221, ppl: 14.299443 +epoch: 1, batch: 25458, sum loss: 5076.128418, avg loss: 3.003626, ppl: 20.158506 +epoch: 1, batch: 25459, sum loss: 4864.020020, avg loss: 2.599690, ppl: 13.459568 +epoch: 1, batch: 25460, sum loss: 4058.272705, avg loss: 2.668161, ppl: 14.413437 +epoch: 1, batch: 25461, sum loss: 4964.716797, avg loss: 2.913566, ppl: 18.422382 +epoch: 1, batch: 25462, sum loss: 3558.941650, avg loss: 2.393370, ppl: 10.950336 +epoch: 1, batch: 25463, sum loss: 5239.420898, avg loss: 2.976943, ppl: 19.627733 +epoch: 1, batch: 25464, sum loss: 4887.629883, avg loss: 2.505192, ppl: 12.245914 +epoch: 1, batch: 25465, sum loss: 4283.888184, avg loss: 2.685823, ppl: 14.670277 +epoch: 1, batch: 25466, sum loss: 5584.736328, avg loss: 2.889155, ppl: 17.978109 +epoch: 1, batch: 25467, sum loss: 3771.158203, avg loss: 2.590081, ppl: 13.330854 +epoch: 1, batch: 25468, sum loss: 4106.344238, avg loss: 2.572897, ppl: 13.103737 +epoch: 1, batch: 25469, sum loss: 5573.514160, avg loss: 3.040652, ppl: 20.918869 +epoch: 1, batch: 25470, sum loss: 4563.955566, avg loss: 2.632039, ppl: 13.902085 +epoch: 1, batch: 25471, sum loss: 3970.029785, avg loss: 2.527072, ppl: 12.516803 +epoch: 1, batch: 25472, sum loss: 6280.622070, avg loss: 3.100011, ppl: 22.198198 +epoch: 1, batch: 25473, sum loss: 5728.300293, avg loss: 2.955780, ppl: 19.216698 +epoch: 1, batch: 25474, sum loss: 4259.597168, avg loss: 2.649003, ppl: 14.139938 +epoch: 1, batch: 25475, sum loss: 4734.768066, avg loss: 2.901206, ppl: 18.196077 +epoch: 1, batch: 25476, sum loss: 4982.467773, avg loss: 2.636226, ppl: 13.960423 +epoch: 1, batch: 25477, sum loss: 3992.210938, avg loss: 2.605882, ppl: 13.543164 +epoch: 1, batch: 25478, sum loss: 4735.694336, avg loss: 2.639740, ppl: 14.009567 +epoch: 1, batch: 25479, sum loss: 5447.296875, avg loss: 2.646889, ppl: 14.110070 +epoch: 1, batch: 25480, sum loss: 4475.495605, avg loss: 2.561818, ppl: 12.959352 +epoch: 1, batch: 25481, sum loss: 4428.004395, avg loss: 2.441017, ppl: 11.484714 +epoch: 1, batch: 25482, sum loss: 3493.685059, avg loss: 2.310638, ppl: 10.080858 +epoch: 1, batch: 25483, sum loss: 3449.255615, avg loss: 2.556898, ppl: 12.895754 +epoch: 1, batch: 25484, sum loss: 4505.668945, avg loss: 2.683543, ppl: 14.636863 +epoch: 1, batch: 25485, sum loss: 4101.490234, avg loss: 2.414062, ppl: 11.179275 +epoch: 1, batch: 25486, sum loss: 4675.587891, avg loss: 2.566184, ppl: 13.016064 +epoch: 1, batch: 25487, sum loss: 4660.478027, avg loss: 2.615308, ppl: 13.671421 +epoch: 1, batch: 25488, sum loss: 4008.917725, avg loss: 2.534082, ppl: 12.604854 +epoch: 1, batch: 25489, sum loss: 5037.911133, avg loss: 2.751454, ppl: 15.665387 +epoch: 1, batch: 25490, sum loss: 4306.927734, avg loss: 2.791269, ppl: 16.301691 +epoch: 1, batch: 25491, sum loss: 3578.432129, avg loss: 2.511180, ppl: 12.319464 +epoch: 1, batch: 25492, sum loss: 4212.561035, avg loss: 2.559272, ppl: 12.926398 +epoch: 1, batch: 25493, sum loss: 4280.476562, avg loss: 2.816103, ppl: 16.711601 +epoch: 1, batch: 25494, sum loss: 5113.250977, avg loss: 2.686942, ppl: 14.686697 +epoch: 1, batch: 25495, sum loss: 4843.644531, avg loss: 2.807910, ppl: 16.575239 +epoch: 1, batch: 25496, sum loss: 4632.968262, avg loss: 2.767604, ppl: 15.920437 +epoch: 1, batch: 25497, sum loss: 4550.682617, avg loss: 2.651913, ppl: 14.181140 +epoch: 1, batch: 25498, sum loss: 4711.890625, avg loss: 2.662085, ppl: 14.326129 +epoch: 1, batch: 25499, sum loss: 5485.997070, avg loss: 2.904181, ppl: 18.250282 +epoch: 1, batch: 25500, sum loss: 4142.739258, avg loss: 2.605496, ppl: 13.537944 +epoch: 1, batch: 25501, sum loss: 5177.903320, avg loss: 2.943663, ppl: 18.985264 +epoch: 1, batch: 25502, sum loss: 4377.535645, avg loss: 2.581094, ppl: 13.211587 +epoch: 1, batch: 25503, sum loss: 3142.148438, avg loss: 2.180533, ppl: 8.851027 +epoch: 1, batch: 25504, sum loss: 5084.529785, avg loss: 2.809133, ppl: 16.595516 +epoch: 1, batch: 25505, sum loss: 4960.135742, avg loss: 2.904061, ppl: 18.248093 +epoch: 1, batch: 25506, sum loss: 3910.624268, avg loss: 2.361488, ppl: 10.606724 +epoch: 1, batch: 25507, sum loss: 4360.930176, avg loss: 2.795468, ppl: 16.370289 +epoch: 1, batch: 25508, sum loss: 4921.352051, avg loss: 2.687795, ppl: 14.699227 +epoch: 1, batch: 25509, sum loss: 5558.434082, avg loss: 2.961339, ppl: 19.323837 +epoch: 1, batch: 25510, sum loss: 3873.369629, avg loss: 2.449949, ppl: 11.587756 +epoch: 1, batch: 25511, sum loss: 4159.630371, avg loss: 2.443966, ppl: 11.518635 +epoch: 1, batch: 25512, sum loss: 5034.052246, avg loss: 2.716704, ppl: 15.130368 +epoch: 1, batch: 25513, sum loss: 3731.380615, avg loss: 2.538354, ppl: 12.658819 +epoch: 1, batch: 25514, sum loss: 4749.054199, avg loss: 2.606506, ppl: 13.551620 +epoch: 1, batch: 25515, sum loss: 4104.052734, avg loss: 2.542784, ppl: 12.715017 +epoch: 1, batch: 25516, sum loss: 4307.235840, avg loss: 2.438978, ppl: 11.461327 +epoch: 1, batch: 25517, sum loss: 4744.401367, avg loss: 2.820690, ppl: 16.788437 +epoch: 1, batch: 25518, sum loss: 4084.614746, avg loss: 2.306389, ppl: 10.038110 +epoch: 1, batch: 25519, sum loss: 4534.140625, avg loss: 2.694082, ppl: 14.791938 +epoch: 1, batch: 25520, sum loss: 4751.137695, avg loss: 2.760684, ppl: 15.810658 +epoch: 1, batch: 25521, sum loss: 5822.399414, avg loss: 2.888095, ppl: 17.959068 +epoch: 1, batch: 25522, sum loss: 4662.882324, avg loss: 2.730025, ppl: 15.333263 +epoch: 1, batch: 25523, sum loss: 4040.789307, avg loss: 2.591911, ppl: 13.355270 +epoch: 1, batch: 25524, sum loss: 3744.168457, avg loss: 2.340105, ppl: 10.382330 +epoch: 1, batch: 25525, sum loss: 3747.024414, avg loss: 2.584155, ppl: 13.252085 +epoch: 1, batch: 25526, sum loss: 4350.380371, avg loss: 2.611273, ppl: 13.616367 +epoch: 1, batch: 25527, sum loss: 3585.976562, avg loss: 2.288435, ppl: 9.859490 +epoch: 1, batch: 25528, sum loss: 4891.701172, avg loss: 2.918676, ppl: 18.516758 +epoch: 1, batch: 25529, sum loss: 3624.413574, avg loss: 2.615017, ppl: 13.667448 +epoch: 1, batch: 25530, sum loss: 4128.639160, avg loss: 2.583629, ppl: 13.245119 +epoch: 1, batch: 25531, sum loss: 4840.724121, avg loss: 2.772465, ppl: 15.998024 +epoch: 1, batch: 25532, sum loss: 4199.790039, avg loss: 2.654734, ppl: 14.221208 +epoch: 1, batch: 25533, sum loss: 5655.765137, avg loss: 2.898906, ppl: 18.154274 +epoch: 1, batch: 25534, sum loss: 4026.962891, avg loss: 2.681067, ppl: 14.600664 +epoch: 1, batch: 25535, sum loss: 4609.484863, avg loss: 2.532684, ppl: 12.587246 +epoch: 1, batch: 25536, sum loss: 4540.744629, avg loss: 2.649209, ppl: 14.142851 +epoch: 1, batch: 25537, sum loss: 4017.389404, avg loss: 2.570307, ppl: 13.069834 +epoch: 1, batch: 25538, sum loss: 3778.388672, avg loss: 2.647785, ppl: 14.122718 +epoch: 1, batch: 25539, sum loss: 4706.466797, avg loss: 2.896287, ppl: 18.106794 +epoch: 1, batch: 25540, sum loss: 5834.754395, avg loss: 2.986057, ppl: 19.807419 +epoch: 1, batch: 25541, sum loss: 4092.507568, avg loss: 2.726521, ppl: 15.279634 +epoch: 1, batch: 25542, sum loss: 4276.129395, avg loss: 2.647758, ppl: 14.122344 +epoch: 1, batch: 25543, sum loss: 4698.172363, avg loss: 2.599985, ppl: 13.463531 +epoch: 1, batch: 25544, sum loss: 4650.527832, avg loss: 2.489576, ppl: 12.056162 +epoch: 1, batch: 25545, sum loss: 5912.065430, avg loss: 2.849188, ppl: 17.273752 +epoch: 1, batch: 25546, sum loss: 4354.689941, avg loss: 2.518618, ppl: 12.411427 +epoch: 1, batch: 25547, sum loss: 4202.768066, avg loss: 2.589506, ppl: 13.323187 +epoch: 1, batch: 25548, sum loss: 5662.849121, avg loss: 2.673677, ppl: 14.493169 +epoch: 1, batch: 25549, sum loss: 3669.392334, avg loss: 2.520187, ppl: 12.430923 +epoch: 1, batch: 25550, sum loss: 3877.019775, avg loss: 2.349709, ppl: 10.482519 +epoch: 1, batch: 25551, sum loss: 4725.817383, avg loss: 2.852032, ppl: 17.322950 +epoch: 1, batch: 25552, sum loss: 3708.112305, avg loss: 2.532864, ppl: 12.589506 +epoch: 1, batch: 25553, sum loss: 4106.909180, avg loss: 2.527329, ppl: 12.520017 +epoch: 1, batch: 25554, sum loss: 4659.750977, avg loss: 2.593072, ppl: 13.370790 +epoch: 1, batch: 25555, sum loss: 4389.220703, avg loss: 2.526897, ppl: 12.514618 +epoch: 1, batch: 25556, sum loss: 4661.343262, avg loss: 2.721158, ppl: 15.197908 +epoch: 1, batch: 25557, sum loss: 4368.592773, avg loss: 2.725260, ppl: 15.260388 +epoch: 1, batch: 25558, sum loss: 4569.919434, avg loss: 2.808801, ppl: 16.590017 +epoch: 1, batch: 25559, sum loss: 4628.158203, avg loss: 2.761431, ppl: 15.822469 +epoch: 1, batch: 25560, sum loss: 4639.018066, avg loss: 2.787872, ppl: 16.246405 +epoch: 1, batch: 25561, sum loss: 4380.301758, avg loss: 2.648308, ppl: 14.130115 +epoch: 1, batch: 25562, sum loss: 3210.506836, avg loss: 2.456394, ppl: 11.662680 +epoch: 1, batch: 25563, sum loss: 4216.210938, avg loss: 2.517141, ppl: 12.393115 +epoch: 1, batch: 25564, sum loss: 4483.768555, avg loss: 2.600794, ppl: 13.474434 +epoch: 1, batch: 25565, sum loss: 4134.838379, avg loss: 2.613678, ppl: 13.649157 +epoch: 1, batch: 25566, sum loss: 3806.572510, avg loss: 2.558180, ppl: 12.912300 +epoch: 1, batch: 25567, sum loss: 4669.813477, avg loss: 2.663898, ppl: 14.352128 +epoch: 1, batch: 25568, sum loss: 5351.344727, avg loss: 2.938685, ppl: 18.890984 +epoch: 1, batch: 25569, sum loss: 3915.820557, avg loss: 2.598421, ppl: 13.442497 +epoch: 1, batch: 25570, sum loss: 5180.441406, avg loss: 2.740974, ppl: 15.502084 +epoch: 1, batch: 25571, sum loss: 5256.831055, avg loss: 2.818676, ppl: 16.754656 +epoch: 1, batch: 25572, sum loss: 3433.740234, avg loss: 2.368097, ppl: 10.677052 +epoch: 1, batch: 25573, sum loss: 5561.292480, avg loss: 2.814419, ppl: 16.683485 +epoch: 1, batch: 25574, sum loss: 4635.128906, avg loss: 2.636592, ppl: 13.965527 +epoch: 1, batch: 25575, sum loss: 3434.885010, avg loss: 2.370521, ppl: 10.702968 +epoch: 1, batch: 25576, sum loss: 3760.277344, avg loss: 2.451289, ppl: 11.603293 +epoch: 1, batch: 25577, sum loss: 4672.582520, avg loss: 2.766479, ppl: 15.902539 +epoch: 1, batch: 25578, sum loss: 4881.656738, avg loss: 2.706018, ppl: 14.969551 +epoch: 1, batch: 25579, sum loss: 3854.223389, avg loss: 2.504369, ppl: 12.235833 +epoch: 1, batch: 25580, sum loss: 4265.870117, avg loss: 2.531674, ppl: 12.574534 +epoch: 1, batch: 25581, sum loss: 4535.300293, avg loss: 2.459491, ppl: 11.698850 +epoch: 1, batch: 25582, sum loss: 4973.059570, avg loss: 2.721981, ppl: 15.210428 +epoch: 1, batch: 25583, sum loss: 4733.919922, avg loss: 2.702009, ppl: 14.909658 +epoch: 1, batch: 25584, sum loss: 5601.644043, avg loss: 2.885958, ppl: 17.920723 +epoch: 1, batch: 25585, sum loss: 4112.868164, avg loss: 2.452515, ppl: 11.617533 +epoch: 1, batch: 25586, sum loss: 3594.731445, avg loss: 2.472305, ppl: 11.849730 +epoch: 1, batch: 25587, sum loss: 4288.204102, avg loss: 2.764799, ppl: 15.875854 +epoch: 1, batch: 25588, sum loss: 4389.329102, avg loss: 2.707791, ppl: 14.996110 +epoch: 1, batch: 25589, sum loss: 4292.104492, avg loss: 2.669219, ppl: 14.428699 +epoch: 1, batch: 25590, sum loss: 3628.763916, avg loss: 2.656489, ppl: 14.246181 +epoch: 1, batch: 25591, sum loss: 4015.709473, avg loss: 2.693300, ppl: 14.780364 +epoch: 1, batch: 25592, sum loss: 4233.223633, avg loss: 2.475569, ppl: 11.888473 +epoch: 1, batch: 25593, sum loss: 5392.665527, avg loss: 2.842734, ppl: 17.162617 +epoch: 1, batch: 25594, sum loss: 5376.627441, avg loss: 2.785817, ppl: 16.213064 +epoch: 1, batch: 25595, sum loss: 5163.759277, avg loss: 2.650800, ppl: 14.165373 +epoch: 1, batch: 25596, sum loss: 4007.895508, avg loss: 2.565874, ppl: 13.012031 +epoch: 1, batch: 25597, sum loss: 4412.680176, avg loss: 2.556593, ppl: 12.891826 +epoch: 1, batch: 25598, sum loss: 4873.791016, avg loss: 2.710673, ppl: 15.039398 +epoch: 1, batch: 25599, sum loss: 4398.303711, avg loss: 2.781976, ppl: 16.150900 +epoch: 1, batch: 25600, sum loss: 3933.677246, avg loss: 2.384047, ppl: 10.848717 +epoch: 1, batch: 25601, sum loss: 4007.214355, avg loss: 2.687602, ppl: 14.696392 +epoch: 1, batch: 25602, sum loss: 4227.033691, avg loss: 2.443372, ppl: 11.511793 +epoch: 1, batch: 25603, sum loss: 3887.410645, avg loss: 2.782685, ppl: 16.162355 +epoch: 1, batch: 25604, sum loss: 4220.957520, avg loss: 2.496131, ppl: 12.135453 +epoch: 1, batch: 25605, sum loss: 4754.318359, avg loss: 2.606534, ppl: 13.552001 +epoch: 1, batch: 25606, sum loss: 4487.908203, avg loss: 2.711727, ppl: 15.055256 +epoch: 1, batch: 25607, sum loss: 4763.428711, avg loss: 2.639019, ppl: 13.999460 +epoch: 1, batch: 25608, sum loss: 4359.493164, avg loss: 2.782063, ppl: 16.152317 +epoch: 1, batch: 25609, sum loss: 3720.037109, avg loss: 2.345547, ppl: 10.438978 +epoch: 1, batch: 25610, sum loss: 4384.114258, avg loss: 2.598764, ppl: 13.447104 +epoch: 1, batch: 25611, sum loss: 3806.631348, avg loss: 2.534375, ppl: 12.608547 +epoch: 1, batch: 25612, sum loss: 4230.417480, avg loss: 2.692818, ppl: 14.773254 +epoch: 1, batch: 25613, sum loss: 5587.336426, avg loss: 3.026726, ppl: 20.629581 +epoch: 1, batch: 25614, sum loss: 4457.356445, avg loss: 2.690016, ppl: 14.731909 +epoch: 1, batch: 25615, sum loss: 4097.013672, avg loss: 2.445978, ppl: 11.541837 +epoch: 1, batch: 25616, sum loss: 3574.514160, avg loss: 2.423399, ppl: 11.284154 +epoch: 1, batch: 25617, sum loss: 3436.054932, avg loss: 2.384494, ppl: 10.853565 +epoch: 1, batch: 25618, sum loss: 4593.825195, avg loss: 2.652324, ppl: 14.186971 +epoch: 1, batch: 25619, sum loss: 4782.963867, avg loss: 2.815164, ppl: 16.695919 +epoch: 1, batch: 25620, sum loss: 3853.721191, avg loss: 2.456164, ppl: 11.659997 +epoch: 1, batch: 25621, sum loss: 4354.856934, avg loss: 2.542240, ppl: 12.708104 +epoch: 1, batch: 25622, sum loss: 4427.297852, avg loss: 2.834378, ppl: 17.019806 +epoch: 1, batch: 25623, sum loss: 5491.910645, avg loss: 2.952640, ppl: 19.156466 +epoch: 1, batch: 25624, sum loss: 4789.781250, avg loss: 2.621665, ppl: 13.758606 +epoch: 1, batch: 25625, sum loss: 4532.583496, avg loss: 2.755370, ppl: 15.726853 +epoch: 1, batch: 25626, sum loss: 5064.831543, avg loss: 2.717184, ppl: 15.137639 +epoch: 1, batch: 25627, sum loss: 5252.900391, avg loss: 2.711874, ppl: 15.057470 +epoch: 1, batch: 25628, sum loss: 4383.746094, avg loss: 2.712714, ppl: 15.070124 +epoch: 1, batch: 25629, sum loss: 4473.420898, avg loss: 2.736037, ppl: 15.425735 +epoch: 1, batch: 25630, sum loss: 4999.306641, avg loss: 2.734850, ppl: 15.407438 +epoch: 1, batch: 25631, sum loss: 4357.828613, avg loss: 2.518976, ppl: 12.415876 +epoch: 1, batch: 25632, sum loss: 4305.707520, avg loss: 2.431230, ppl: 11.372857 +epoch: 1, batch: 25633, sum loss: 4836.254883, avg loss: 2.660206, ppl: 14.299235 +epoch: 1, batch: 25634, sum loss: 4388.003418, avg loss: 2.552649, ppl: 12.841075 +epoch: 1, batch: 25635, sum loss: 5022.868652, avg loss: 2.768946, ppl: 15.941829 +epoch: 1, batch: 25636, sum loss: 4333.925781, avg loss: 2.528545, ppl: 12.535253 +epoch: 1, batch: 25637, sum loss: 3829.739746, avg loss: 2.565130, ppl: 13.002355 +epoch: 1, batch: 25638, sum loss: 3822.593262, avg loss: 2.498427, ppl: 12.163348 +epoch: 1, batch: 25639, sum loss: 4439.670898, avg loss: 2.707116, ppl: 14.985999 +epoch: 1, batch: 25640, sum loss: 4258.738770, avg loss: 2.671731, ppl: 14.464983 +epoch: 1, batch: 25641, sum loss: 5063.253906, avg loss: 2.914942, ppl: 18.447739 +epoch: 1, batch: 25642, sum loss: 5434.929688, avg loss: 2.682591, ppl: 14.622935 +epoch: 1, batch: 25643, sum loss: 4082.390137, avg loss: 2.521550, ppl: 12.447881 +epoch: 1, batch: 25644, sum loss: 5064.312500, avg loss: 2.650085, ppl: 14.155241 +epoch: 1, batch: 25645, sum loss: 4298.977539, avg loss: 2.619730, ppl: 13.732018 +epoch: 1, batch: 25646, sum loss: 5523.996582, avg loss: 2.927396, ppl: 18.678928 +epoch: 1, batch: 25647, sum loss: 3668.540039, avg loss: 2.601802, ppl: 13.488017 +epoch: 1, batch: 25648, sum loss: 6197.852539, avg loss: 3.196417, ppl: 24.444790 +epoch: 1, batch: 25649, sum loss: 5358.371582, avg loss: 2.671172, ppl: 14.456908 +epoch: 1, batch: 25650, sum loss: 3831.704102, avg loss: 2.571614, ppl: 13.086924 +epoch: 1, batch: 25651, sum loss: 4039.223877, avg loss: 2.501067, ppl: 12.195504 +epoch: 1, batch: 25652, sum loss: 5168.341309, avg loss: 2.924924, ppl: 18.632816 +epoch: 1, batch: 25653, sum loss: 5083.243652, avg loss: 2.525208, ppl: 12.493494 +epoch: 1, batch: 25654, sum loss: 4548.354980, avg loss: 2.661413, ppl: 14.316506 +epoch: 1, batch: 25655, sum loss: 4183.539062, avg loss: 2.678322, ppl: 14.560641 +epoch: 1, batch: 25656, sum loss: 3967.562500, avg loss: 2.404583, ppl: 11.073816 +epoch: 1, batch: 25657, sum loss: 4607.416504, avg loss: 2.592806, ppl: 13.367229 +epoch: 1, batch: 25658, sum loss: 4598.139160, avg loss: 2.567358, ppl: 13.031357 +epoch: 1, batch: 25659, sum loss: 4259.047852, avg loss: 2.474752, ppl: 11.878757 +epoch: 1, batch: 25660, sum loss: 3627.172119, avg loss: 2.508418, ppl: 12.285477 +epoch: 1, batch: 25661, sum loss: 4148.004883, avg loss: 2.473467, ppl: 11.863510 +epoch: 1, batch: 25662, sum loss: 5324.033203, avg loss: 2.848600, ppl: 17.263590 +epoch: 1, batch: 25663, sum loss: 3912.168457, avg loss: 2.553635, ppl: 12.853741 +epoch: 1, batch: 25664, sum loss: 4377.428223, avg loss: 2.582554, ppl: 13.230882 +epoch: 1, batch: 25665, sum loss: 4726.223633, avg loss: 2.814904, ppl: 16.691568 +epoch: 1, batch: 25666, sum loss: 4071.576660, avg loss: 2.449805, ppl: 11.586093 +epoch: 1, batch: 25667, sum loss: 3959.648926, avg loss: 2.490345, ppl: 12.065441 +epoch: 1, batch: 25668, sum loss: 4519.740723, avg loss: 2.760990, ppl: 15.815495 +epoch: 1, batch: 25669, sum loss: 3981.096191, avg loss: 2.527680, ppl: 12.524415 +epoch: 1, batch: 25670, sum loss: 3931.234619, avg loss: 2.707462, ppl: 14.991178 +epoch: 1, batch: 25671, sum loss: 4433.121094, avg loss: 2.563980, ppl: 12.987403 +epoch: 1, batch: 25672, sum loss: 3382.863037, avg loss: 2.309122, ppl: 10.065579 +epoch: 1, batch: 25673, sum loss: 3563.005859, avg loss: 2.337930, ppl: 10.359775 +epoch: 1, batch: 25674, sum loss: 4801.982422, avg loss: 2.635556, ppl: 13.951063 +epoch: 1, batch: 25675, sum loss: 3965.744629, avg loss: 2.650899, ppl: 14.166768 +epoch: 1, batch: 25676, sum loss: 4640.195312, avg loss: 2.798670, ppl: 16.422794 +epoch: 1, batch: 25677, sum loss: 5959.871094, avg loss: 2.877775, ppl: 17.774675 +epoch: 1, batch: 25678, sum loss: 4622.448730, avg loss: 2.761320, ppl: 15.820706 +epoch: 1, batch: 25679, sum loss: 5104.368652, avg loss: 2.861193, ppl: 17.482378 +epoch: 1, batch: 25680, sum loss: 3693.466309, avg loss: 2.514272, ppl: 12.357615 +epoch: 1, batch: 25681, sum loss: 4767.528320, avg loss: 2.633994, ppl: 13.929287 +epoch: 1, batch: 25682, sum loss: 4284.658203, avg loss: 2.734307, ppl: 15.399061 +epoch: 1, batch: 25683, sum loss: 4416.005859, avg loss: 2.730987, ppl: 15.348029 +epoch: 1, batch: 25684, sum loss: 4073.739014, avg loss: 2.432083, ppl: 11.382568 +epoch: 1, batch: 25685, sum loss: 3880.727539, avg loss: 2.608016, ppl: 13.572094 +epoch: 1, batch: 25686, sum loss: 4070.331543, avg loss: 2.395722, ppl: 10.976122 +epoch: 1, batch: 25687, sum loss: 3368.176514, avg loss: 2.223219, ppl: 9.237017 +epoch: 1, batch: 25688, sum loss: 4883.989746, avg loss: 2.826383, ppl: 16.884277 +epoch: 1, batch: 25689, sum loss: 4113.864258, avg loss: 2.534728, ppl: 12.613004 +epoch: 1, batch: 25690, sum loss: 5061.017578, avg loss: 2.840077, ppl: 17.117090 +epoch: 1, batch: 25691, sum loss: 4630.812500, avg loss: 2.936470, ppl: 18.849182 +epoch: 1, batch: 25692, sum loss: 4172.365723, avg loss: 2.591532, ppl: 13.350202 +epoch: 1, batch: 25693, sum loss: 4056.331055, avg loss: 2.528885, ppl: 12.539515 +epoch: 1, batch: 25694, sum loss: 4478.861328, avg loss: 2.785362, ppl: 16.205675 +epoch: 1, batch: 25695, sum loss: 4694.110352, avg loss: 2.631228, ppl: 13.890817 +epoch: 1, batch: 25696, sum loss: 4716.484375, avg loss: 2.675261, ppl: 14.516131 +epoch: 1, batch: 25697, sum loss: 4322.020996, avg loss: 2.614653, ppl: 13.662470 +epoch: 1, batch: 25698, sum loss: 4370.049316, avg loss: 2.679368, ppl: 14.575879 +epoch: 1, batch: 25699, sum loss: 4884.107910, avg loss: 2.794112, ppl: 16.348108 +epoch: 1, batch: 25700, sum loss: 3605.381104, avg loss: 2.514213, ppl: 12.356878 +epoch: 1, batch: 25701, sum loss: 5891.764160, avg loss: 2.947356, ppl: 19.055500 +epoch: 1, batch: 25702, sum loss: 4863.117676, avg loss: 2.713794, ppl: 15.086397 +epoch: 1, batch: 25703, sum loss: 4723.879883, avg loss: 2.700903, ppl: 14.893177 +epoch: 1, batch: 25704, sum loss: 3851.567139, avg loss: 2.318824, ppl: 10.163717 +epoch: 1, batch: 25705, sum loss: 4465.784180, avg loss: 2.562125, ppl: 12.963338 +epoch: 1, batch: 25706, sum loss: 4926.541016, avg loss: 2.780215, ppl: 16.122488 +epoch: 1, batch: 25707, sum loss: 4510.597656, avg loss: 2.592298, ppl: 13.360433 +epoch: 1, batch: 25708, sum loss: 4395.027832, avg loss: 2.349026, ppl: 10.475364 +epoch: 1, batch: 25709, sum loss: 3900.361816, avg loss: 2.351032, ppl: 10.496394 +epoch: 1, batch: 25710, sum loss: 4416.880371, avg loss: 2.834968, ppl: 17.029861 +epoch: 1, batch: 25711, sum loss: 3871.335449, avg loss: 2.367789, ppl: 10.673769 +epoch: 1, batch: 25712, sum loss: 3926.642090, avg loss: 2.322083, ppl: 10.196893 +epoch: 1, batch: 25713, sum loss: 4369.872559, avg loss: 2.679260, ppl: 14.574301 +epoch: 1, batch: 25714, sum loss: 4235.996094, avg loss: 2.622908, ppl: 13.775723 +epoch: 1, batch: 25715, sum loss: 5531.645020, avg loss: 2.659445, ppl: 14.288354 +epoch: 1, batch: 25716, sum loss: 4399.096191, avg loss: 2.409144, ppl: 11.124431 +epoch: 1, batch: 25717, sum loss: 4161.421387, avg loss: 2.571954, ppl: 13.091377 +epoch: 1, batch: 25718, sum loss: 3691.616699, avg loss: 2.436711, ppl: 11.435366 +epoch: 1, batch: 25719, sum loss: 4534.337891, avg loss: 2.734824, ppl: 15.407031 +epoch: 1, batch: 25720, sum loss: 4964.577148, avg loss: 2.686460, ppl: 14.679611 +epoch: 1, batch: 25721, sum loss: 5298.946289, avg loss: 2.922750, ppl: 18.592356 +epoch: 1, batch: 25722, sum loss: 4012.832520, avg loss: 2.664563, ppl: 14.361678 +epoch: 1, batch: 25723, sum loss: 5841.032715, avg loss: 2.776156, ppl: 16.057182 +epoch: 1, batch: 25724, sum loss: 3927.018555, avg loss: 2.509277, ppl: 12.296038 +epoch: 1, batch: 25725, sum loss: 4088.512695, avg loss: 2.760643, ppl: 15.810009 +epoch: 1, batch: 25726, sum loss: 3605.595947, avg loss: 2.279138, ppl: 9.768252 +epoch: 1, batch: 25727, sum loss: 4417.645996, avg loss: 2.801297, ppl: 16.465996 +epoch: 1, batch: 25728, sum loss: 4134.270996, avg loss: 2.618284, ppl: 13.712176 +epoch: 1, batch: 25729, sum loss: 4377.235840, avg loss: 2.390626, ppl: 10.920327 +epoch: 1, batch: 25730, sum loss: 5526.915039, avg loss: 2.799855, ppl: 16.442270 +epoch: 1, batch: 25731, sum loss: 3851.406738, avg loss: 2.450004, ppl: 11.588397 +epoch: 1, batch: 25732, sum loss: 4905.346191, avg loss: 2.754265, ppl: 15.709495 +epoch: 1, batch: 25733, sum loss: 3473.826416, avg loss: 2.420785, ppl: 11.254690 +epoch: 1, batch: 25734, sum loss: 4625.713867, avg loss: 2.687806, ppl: 14.699389 +epoch: 1, batch: 25735, sum loss: 4456.885254, avg loss: 2.603321, ppl: 13.508523 +epoch: 1, batch: 25736, sum loss: 4705.833008, avg loss: 2.699847, ppl: 14.877452 +epoch: 1, batch: 25737, sum loss: 4352.793945, avg loss: 2.510262, ppl: 12.308151 +epoch: 1, batch: 25738, sum loss: 4599.462891, avg loss: 2.696051, ppl: 14.821086 +epoch: 1, batch: 25739, sum loss: 3698.107666, avg loss: 2.364519, ppl: 10.638919 +epoch: 1, batch: 25740, sum loss: 4492.970215, avg loss: 2.737947, ppl: 15.455215 +epoch: 1, batch: 25741, sum loss: 4043.011230, avg loss: 2.661627, ppl: 14.319566 +epoch: 1, batch: 25742, sum loss: 3853.435059, avg loss: 2.457548, ppl: 11.676146 +epoch: 1, batch: 25743, sum loss: 4229.495117, avg loss: 2.607580, ppl: 13.566183 +epoch: 1, batch: 25744, sum loss: 4352.532715, avg loss: 2.572419, ppl: 13.097468 +epoch: 1, batch: 25745, sum loss: 5101.205566, avg loss: 2.732301, ppl: 15.368205 +epoch: 1, batch: 25746, sum loss: 4641.317871, avg loss: 2.757765, ppl: 15.764563 +epoch: 1, batch: 25747, sum loss: 4966.640625, avg loss: 2.750078, ppl: 15.643851 +epoch: 1, batch: 25748, sum loss: 4452.135254, avg loss: 2.623533, ppl: 13.784338 +epoch: 1, batch: 25749, sum loss: 4821.397949, avg loss: 2.549655, ppl: 12.802689 +epoch: 1, batch: 25750, sum loss: 5180.939453, avg loss: 2.887926, ppl: 17.956032 +epoch: 1, batch: 25751, sum loss: 4205.843262, avg loss: 2.658561, ppl: 14.275728 +epoch: 1, batch: 25752, sum loss: 5085.154785, avg loss: 2.919147, ppl: 18.525484 +epoch: 1, batch: 25753, sum loss: 3572.773682, avg loss: 2.453828, ppl: 11.632793 +epoch: 1, batch: 25754, sum loss: 4110.239258, avg loss: 2.413529, ppl: 11.173319 +epoch: 1, batch: 25755, sum loss: 4963.577148, avg loss: 2.763685, ppl: 15.858165 +epoch: 1, batch: 25756, sum loss: 4574.168457, avg loss: 2.695444, ppl: 14.812096 +epoch: 1, batch: 25757, sum loss: 4255.635742, avg loss: 2.577611, ppl: 13.165650 +epoch: 1, batch: 25758, sum loss: 3542.714600, avg loss: 2.355528, ppl: 10.543698 +epoch: 1, batch: 25759, sum loss: 4047.789551, avg loss: 2.568394, ppl: 13.044863 +epoch: 1, batch: 25760, sum loss: 5506.321289, avg loss: 2.854495, ppl: 17.365665 +epoch: 1, batch: 25761, sum loss: 3913.350342, avg loss: 2.510167, ppl: 12.306987 +epoch: 1, batch: 25762, sum loss: 5020.682617, avg loss: 2.689171, ppl: 14.719469 +epoch: 1, batch: 25763, sum loss: 5128.587891, avg loss: 2.754344, ppl: 15.710728 +epoch: 1, batch: 25764, sum loss: 4384.945312, avg loss: 2.491446, ppl: 12.078730 +epoch: 1, batch: 25765, sum loss: 5092.368652, avg loss: 3.078820, ppl: 21.732748 +epoch: 1, batch: 25766, sum loss: 4250.388672, avg loss: 2.446971, ppl: 11.553301 +epoch: 1, batch: 25767, sum loss: 4607.061523, avg loss: 2.586784, ppl: 13.286967 +epoch: 1, batch: 25768, sum loss: 4582.214844, avg loss: 2.582985, ppl: 13.236587 +epoch: 1, batch: 25769, sum loss: 4949.019531, avg loss: 2.698484, ppl: 14.857186 +epoch: 1, batch: 25770, sum loss: 5191.590820, avg loss: 2.770326, ppl: 15.963836 +epoch: 1, batch: 25771, sum loss: 3631.543457, avg loss: 2.372008, ppl: 10.718890 +epoch: 1, batch: 25772, sum loss: 4850.923828, avg loss: 2.828527, ppl: 16.920519 +epoch: 1, batch: 25773, sum loss: 4332.350098, avg loss: 2.616153, ppl: 13.682990 +epoch: 1, batch: 25774, sum loss: 5204.065430, avg loss: 2.674237, ppl: 14.501285 +epoch: 1, batch: 25775, sum loss: 4765.958984, avg loss: 2.656610, ppl: 14.247913 +epoch: 1, batch: 25776, sum loss: 3580.975098, avg loss: 2.457773, ppl: 11.678771 +epoch: 1, batch: 25777, sum loss: 4144.007812, avg loss: 2.675279, ppl: 14.516405 +epoch: 1, batch: 25778, sum loss: 4672.408691, avg loss: 2.751713, ppl: 15.669452 +epoch: 1, batch: 25779, sum loss: 5224.060059, avg loss: 2.826872, ppl: 16.892544 +epoch: 1, batch: 25780, sum loss: 3681.094727, avg loss: 2.475518, ppl: 11.887857 +epoch: 1, batch: 25781, sum loss: 5703.811523, avg loss: 2.995699, ppl: 19.999342 +epoch: 1, batch: 25782, sum loss: 5141.295898, avg loss: 2.679154, ppl: 14.572755 +epoch: 1, batch: 25783, sum loss: 4260.147949, avg loss: 2.759163, ppl: 15.786630 +epoch: 1, batch: 25784, sum loss: 4303.174805, avg loss: 2.519423, ppl: 12.421428 +epoch: 1, batch: 25785, sum loss: 5446.317383, avg loss: 2.896977, ppl: 18.119291 +epoch: 1, batch: 25786, sum loss: 3734.472900, avg loss: 2.431298, ppl: 11.373631 +epoch: 1, batch: 25787, sum loss: 4106.186035, avg loss: 2.637242, ppl: 13.974613 +epoch: 1, batch: 25788, sum loss: 4913.310547, avg loss: 2.822120, ppl: 16.812450 +epoch: 1, batch: 25789, sum loss: 4970.611816, avg loss: 2.723623, ppl: 15.235421 +epoch: 1, batch: 25790, sum loss: 3101.406738, avg loss: 2.297338, ppl: 9.947669 +epoch: 1, batch: 25791, sum loss: 4912.231445, avg loss: 2.821500, ppl: 16.802036 +epoch: 1, batch: 25792, sum loss: 4311.649414, avg loss: 2.583373, ppl: 13.241728 +epoch: 1, batch: 25793, sum loss: 4367.583008, avg loss: 2.632660, ppl: 13.910722 +epoch: 1, batch: 25794, sum loss: 3531.282227, avg loss: 2.323212, ppl: 10.208412 +epoch: 1, batch: 25795, sum loss: 4392.718262, avg loss: 2.643032, ppl: 14.055751 +epoch: 1, batch: 25796, sum loss: 5264.986816, avg loss: 2.969536, ppl: 19.482874 +epoch: 1, batch: 25797, sum loss: 4260.239746, avg loss: 2.672672, ppl: 14.478608 +epoch: 1, batch: 25798, sum loss: 4945.783203, avg loss: 2.670509, ppl: 14.447323 +epoch: 1, batch: 25799, sum loss: 4973.079590, avg loss: 2.803314, ppl: 16.499239 +epoch: 1, batch: 25800, sum loss: 5032.227539, avg loss: 2.577986, ppl: 13.170580 +epoch: 1, batch: 25801, sum loss: 4225.093262, avg loss: 2.606473, ppl: 13.551174 +epoch: 1, batch: 25802, sum loss: 4183.209961, avg loss: 2.520006, ppl: 12.428671 +epoch: 1, batch: 25803, sum loss: 5065.044922, avg loss: 2.907603, ppl: 18.312855 +epoch: 1, batch: 25804, sum loss: 4052.443115, avg loss: 2.646926, ppl: 14.110592 +epoch: 1, batch: 25805, sum loss: 3995.504883, avg loss: 2.639039, ppl: 13.999744 +epoch: 1, batch: 25806, sum loss: 4262.292969, avg loss: 2.670610, ppl: 14.448776 +epoch: 1, batch: 25807, sum loss: 4564.005371, avg loss: 2.485842, ppl: 12.011227 +epoch: 1, batch: 25808, sum loss: 4697.032715, avg loss: 2.766215, ppl: 15.898342 +epoch: 1, batch: 25809, sum loss: 4262.024414, avg loss: 2.769347, ppl: 15.948212 +epoch: 1, batch: 25810, sum loss: 4042.506836, avg loss: 2.492298, ppl: 12.089021 +epoch: 1, batch: 25811, sum loss: 4311.430176, avg loss: 2.449676, ppl: 11.584596 +epoch: 1, batch: 25812, sum loss: 4012.399658, avg loss: 2.339592, ppl: 10.376997 +epoch: 1, batch: 25813, sum loss: 4477.397949, avg loss: 2.721822, ppl: 15.208010 +epoch: 1, batch: 25814, sum loss: 4675.196777, avg loss: 2.626515, ppl: 13.825506 +epoch: 1, batch: 25815, sum loss: 4415.570312, avg loss: 2.504578, ppl: 12.238389 +epoch: 1, batch: 25816, sum loss: 4554.437988, avg loss: 2.702930, ppl: 14.923400 +epoch: 1, batch: 25817, sum loss: 4764.203125, avg loss: 2.663054, ppl: 14.340014 +epoch: 1, batch: 25818, sum loss: 4900.946777, avg loss: 2.727294, ppl: 15.291449 +epoch: 1, batch: 25819, sum loss: 4405.611816, avg loss: 2.686349, ppl: 14.677984 +epoch: 1, batch: 25820, sum loss: 4562.483887, avg loss: 2.699695, ppl: 14.875189 +epoch: 1, batch: 25821, sum loss: 4216.725098, avg loss: 2.489212, ppl: 12.051773 +epoch: 1, batch: 25822, sum loss: 4725.642578, avg loss: 2.533857, ppl: 12.602014 +epoch: 1, batch: 25823, sum loss: 4597.104492, avg loss: 2.702589, ppl: 14.918309 +epoch: 1, batch: 25824, sum loss: 4532.147461, avg loss: 2.783874, ppl: 16.181591 +epoch: 1, batch: 25825, sum loss: 5375.384277, avg loss: 2.783731, ppl: 16.179274 +epoch: 1, batch: 25826, sum loss: 3922.570312, avg loss: 2.606359, ppl: 13.549630 +epoch: 1, batch: 25827, sum loss: 4909.125000, avg loss: 2.789275, ppl: 16.269226 +epoch: 1, batch: 25828, sum loss: 4959.765137, avg loss: 2.747792, ppl: 15.608135 +epoch: 1, batch: 25829, sum loss: 3556.511719, avg loss: 2.339810, ppl: 10.379269 +epoch: 1, batch: 25830, sum loss: 4958.920410, avg loss: 2.574725, ppl: 13.127708 +epoch: 1, batch: 25831, sum loss: 5480.188477, avg loss: 2.893447, ppl: 18.055439 +epoch: 1, batch: 25832, sum loss: 4475.593262, avg loss: 2.635803, ppl: 13.954514 +epoch: 1, batch: 25833, sum loss: 4708.577148, avg loss: 2.807738, ppl: 16.572393 +epoch: 1, batch: 25834, sum loss: 3323.684570, avg loss: 2.478512, ppl: 11.923510 +epoch: 1, batch: 25835, sum loss: 4318.416016, avg loss: 2.507791, ppl: 12.277776 +epoch: 1, batch: 25836, sum loss: 4483.171387, avg loss: 2.632514, ppl: 13.908692 +epoch: 1, batch: 25837, sum loss: 3524.743896, avg loss: 2.337363, ppl: 10.353900 +epoch: 1, batch: 25838, sum loss: 4093.447266, avg loss: 2.634136, ppl: 13.931270 +epoch: 1, batch: 25839, sum loss: 3623.379883, avg loss: 2.249150, ppl: 9.479671 +epoch: 1, batch: 25840, sum loss: 4808.568848, avg loss: 2.926700, ppl: 18.665937 +epoch: 1, batch: 25841, sum loss: 4594.311523, avg loss: 2.762665, ppl: 15.842003 +epoch: 1, batch: 25842, sum loss: 4624.547852, avg loss: 2.608318, ppl: 13.576198 +epoch: 1, batch: 25843, sum loss: 4795.816406, avg loss: 2.740467, ppl: 15.494213 +epoch: 1, batch: 25844, sum loss: 5299.922852, avg loss: 2.632848, ppl: 13.913339 +epoch: 1, batch: 25845, sum loss: 3686.607422, avg loss: 2.301253, ppl: 9.986689 +epoch: 1, batch: 25846, sum loss: 4614.724121, avg loss: 2.452032, ppl: 11.611917 +epoch: 1, batch: 25847, sum loss: 4451.408203, avg loss: 2.819131, ppl: 16.762280 +epoch: 1, batch: 25848, sum loss: 3761.577637, avg loss: 2.447351, ppl: 11.557684 +epoch: 1, batch: 25849, sum loss: 5159.209473, avg loss: 2.794805, ppl: 16.359432 +epoch: 1, batch: 25850, sum loss: 4567.463867, avg loss: 2.779953, ppl: 16.118271 +epoch: 1, batch: 25851, sum loss: 3646.059570, avg loss: 2.389292, ppl: 10.905770 +epoch: 1, batch: 25852, sum loss: 4380.471680, avg loss: 2.563178, ppl: 12.976996 +epoch: 1, batch: 25853, sum loss: 4209.373047, avg loss: 2.551135, ppl: 12.821652 +epoch: 1, batch: 25854, sum loss: 4135.941406, avg loss: 2.703230, ppl: 14.927866 +epoch: 1, batch: 25855, sum loss: 4481.240234, avg loss: 2.563639, ppl: 12.982972 +epoch: 1, batch: 25856, sum loss: 4923.463867, avg loss: 2.744406, ppl: 15.555367 +epoch: 1, batch: 25857, sum loss: 3689.553223, avg loss: 2.301655, ppl: 9.990704 +epoch: 1, batch: 25858, sum loss: 4666.082520, avg loss: 2.743141, ppl: 15.535702 +epoch: 1, batch: 25859, sum loss: 4806.954102, avg loss: 2.618167, ppl: 13.710565 +epoch: 1, batch: 25860, sum loss: 4185.595215, avg loss: 2.688244, ppl: 14.705824 +epoch: 1, batch: 25861, sum loss: 4330.689453, avg loss: 2.651984, ppl: 14.182144 +epoch: 1, batch: 25862, sum loss: 4588.188965, avg loss: 2.683151, ppl: 14.631130 +epoch: 1, batch: 25863, sum loss: 4481.665039, avg loss: 2.566818, ppl: 13.024322 +epoch: 1, batch: 25864, sum loss: 3793.578125, avg loss: 2.277058, ppl: 9.747956 +epoch: 1, batch: 25865, sum loss: 4395.410645, avg loss: 2.636719, ppl: 13.967301 +epoch: 1, batch: 25866, sum loss: 4044.047119, avg loss: 2.502504, ppl: 12.213041 +epoch: 1, batch: 25867, sum loss: 3717.134033, avg loss: 2.604859, ppl: 13.529323 +epoch: 1, batch: 25868, sum loss: 4853.085938, avg loss: 2.646175, ppl: 14.100008 +epoch: 1, batch: 25869, sum loss: 4412.431152, avg loss: 2.651701, ppl: 14.178142 +epoch: 1, batch: 25870, sum loss: 3910.805908, avg loss: 2.549417, ppl: 12.799640 +epoch: 1, batch: 25871, sum loss: 4051.419434, avg loss: 2.401553, ppl: 11.040308 +epoch: 1, batch: 25872, sum loss: 5413.704590, avg loss: 2.717723, ppl: 15.145802 +epoch: 1, batch: 25873, sum loss: 4993.161621, avg loss: 2.699006, ppl: 14.864953 +epoch: 1, batch: 25874, sum loss: 4454.443359, avg loss: 2.465104, ppl: 11.764707 +epoch: 1, batch: 25875, sum loss: 5131.178711, avg loss: 2.754256, ppl: 15.709345 +epoch: 1, batch: 25876, sum loss: 4344.306152, avg loss: 2.418879, ppl: 11.233257 +epoch: 1, batch: 25877, sum loss: 5255.002930, avg loss: 2.796702, ppl: 16.390505 +epoch: 1, batch: 25878, sum loss: 3989.048096, avg loss: 2.454799, ppl: 11.644090 +epoch: 1, batch: 25879, sum loss: 3864.239746, avg loss: 2.391237, ppl: 10.927008 +epoch: 1, batch: 25880, sum loss: 5310.601074, avg loss: 2.883063, ppl: 17.868916 +epoch: 1, batch: 25881, sum loss: 3896.751709, avg loss: 2.423353, ppl: 11.283629 +epoch: 1, batch: 25882, sum loss: 5301.618164, avg loss: 2.761260, ppl: 15.819757 +epoch: 1, batch: 25883, sum loss: 4173.740723, avg loss: 2.593997, ppl: 13.383154 +epoch: 1, batch: 25884, sum loss: 3557.063477, avg loss: 2.421418, ppl: 11.261820 +epoch: 1, batch: 25885, sum loss: 4146.898438, avg loss: 2.437918, ppl: 11.449178 +epoch: 1, batch: 25886, sum loss: 3784.728760, avg loss: 2.437044, ppl: 11.439173 +epoch: 1, batch: 25887, sum loss: 5351.589844, avg loss: 2.910054, ppl: 18.357798 +epoch: 1, batch: 25888, sum loss: 4026.154297, avg loss: 2.577564, ppl: 13.165023 +epoch: 1, batch: 25889, sum loss: 3878.639160, avg loss: 2.856141, ppl: 17.394270 +epoch: 1, batch: 25890, sum loss: 5095.560547, avg loss: 2.680463, ppl: 14.591852 +epoch: 1, batch: 25891, sum loss: 4002.467285, avg loss: 2.486005, ppl: 12.013183 +epoch: 1, batch: 25892, sum loss: 4329.147949, avg loss: 2.746921, ppl: 15.594547 +epoch: 1, batch: 25893, sum loss: 3689.030762, avg loss: 2.339271, ppl: 10.373674 +epoch: 1, batch: 25894, sum loss: 4666.185059, avg loss: 2.840039, ppl: 17.116442 +epoch: 1, batch: 25895, sum loss: 5234.205078, avg loss: 2.667790, ppl: 14.408098 +epoch: 1, batch: 25896, sum loss: 5294.494141, avg loss: 2.709567, ppl: 15.022770 +epoch: 1, batch: 25897, sum loss: 4273.522461, avg loss: 2.493304, ppl: 12.101190 +epoch: 1, batch: 25898, sum loss: 5757.149902, avg loss: 2.898867, ppl: 18.153568 +epoch: 1, batch: 25899, sum loss: 4193.259766, avg loss: 2.515453, ppl: 12.372208 +epoch: 1, batch: 25900, sum loss: 4728.212402, avg loss: 2.631170, ppl: 13.890012 +epoch: 1, batch: 25901, sum loss: 4250.217773, avg loss: 2.668059, ppl: 14.411966 +epoch: 1, batch: 25902, sum loss: 4022.375000, avg loss: 2.451173, ppl: 11.601949 +epoch: 1, batch: 25903, sum loss: 3224.072021, avg loss: 2.262506, ppl: 9.607140 +epoch: 1, batch: 25904, sum loss: 5092.132812, avg loss: 2.862357, ppl: 17.502726 +epoch: 1, batch: 25905, sum loss: 4468.008301, avg loss: 2.736073, ppl: 15.426294 +epoch: 1, batch: 25906, sum loss: 3905.584717, avg loss: 2.541044, ppl: 12.692919 +epoch: 1, batch: 25907, sum loss: 4799.030273, avg loss: 2.647011, ppl: 14.111793 +epoch: 1, batch: 25908, sum loss: 3112.210205, avg loss: 2.317357, ppl: 10.148814 +epoch: 1, batch: 25909, sum loss: 4498.247559, avg loss: 2.583715, ppl: 13.246256 +epoch: 1, batch: 25910, sum loss: 4611.203125, avg loss: 2.691887, ppl: 14.759506 +epoch: 1, batch: 25911, sum loss: 3839.643799, avg loss: 2.529410, ppl: 12.546098 +epoch: 1, batch: 25912, sum loss: 4149.520508, avg loss: 2.606483, ppl: 13.551304 +epoch: 1, batch: 25913, sum loss: 3822.743408, avg loss: 2.669514, ppl: 14.432949 +epoch: 1, batch: 25914, sum loss: 4287.577148, avg loss: 2.630415, ppl: 13.879535 +epoch: 1, batch: 25915, sum loss: 4714.043945, avg loss: 2.696822, ppl: 14.832515 +epoch: 1, batch: 25916, sum loss: 4634.349121, avg loss: 2.705399, ppl: 14.960289 +epoch: 1, batch: 25917, sum loss: 5275.432617, avg loss: 2.800124, ppl: 16.446680 +epoch: 1, batch: 25918, sum loss: 4145.786621, avg loss: 2.782407, ppl: 16.157871 +epoch: 1, batch: 25919, sum loss: 3896.916504, avg loss: 2.472663, ppl: 11.853969 +epoch: 1, batch: 25920, sum loss: 3778.320801, avg loss: 2.451863, ppl: 11.609957 +epoch: 1, batch: 25921, sum loss: 4750.881348, avg loss: 2.593276, ppl: 13.373508 +epoch: 1, batch: 25922, sum loss: 4441.741211, avg loss: 2.525151, ppl: 12.492787 +epoch: 1, batch: 25923, sum loss: 4865.209473, avg loss: 2.853495, ppl: 17.348314 +epoch: 1, batch: 25924, sum loss: 3895.448242, avg loss: 2.653575, ppl: 14.204729 +epoch: 1, batch: 25925, sum loss: 3563.576416, avg loss: 2.422554, ppl: 11.274615 +epoch: 1, batch: 25926, sum loss: 4372.155273, avg loss: 2.621196, ppl: 13.752165 +epoch: 1, batch: 25927, sum loss: 4284.712891, avg loss: 2.666281, ppl: 14.386363 +epoch: 1, batch: 25928, sum loss: 4497.681641, avg loss: 2.627150, ppl: 13.834291 +epoch: 1, batch: 25929, sum loss: 4736.399902, avg loss: 2.599561, ppl: 13.457829 +epoch: 1, batch: 25930, sum loss: 4509.511719, avg loss: 2.721492, ppl: 15.202982 +epoch: 1, batch: 25931, sum loss: 4080.732910, avg loss: 2.520527, ppl: 12.435147 +epoch: 1, batch: 25932, sum loss: 4281.804199, avg loss: 2.667791, ppl: 14.408101 +epoch: 1, batch: 25933, sum loss: 5169.752441, avg loss: 2.712357, ppl: 15.064738 +epoch: 1, batch: 25934, sum loss: 4892.352051, avg loss: 2.707444, ppl: 14.990912 +epoch: 1, batch: 25935, sum loss: 4858.189941, avg loss: 2.526360, ppl: 12.507892 +epoch: 1, batch: 25936, sum loss: 5041.382324, avg loss: 2.824304, ppl: 16.849211 +epoch: 1, batch: 25937, sum loss: 4840.436035, avg loss: 2.551627, ppl: 12.827957 +epoch: 1, batch: 25938, sum loss: 4847.954102, avg loss: 2.765519, ppl: 15.887278 +epoch: 1, batch: 25939, sum loss: 3891.392578, avg loss: 2.459793, ppl: 11.702387 +epoch: 1, batch: 25940, sum loss: 4277.611328, avg loss: 2.735046, ppl: 15.410447 +epoch: 1, batch: 25941, sum loss: 5190.411133, avg loss: 2.757923, ppl: 15.767059 +epoch: 1, batch: 25942, sum loss: 4447.756836, avg loss: 2.453258, ppl: 11.626166 +epoch: 1, batch: 25943, sum loss: 5166.281250, avg loss: 2.813879, ppl: 16.674465 +epoch: 1, batch: 25944, sum loss: 4200.262695, avg loss: 2.666833, ppl: 14.394316 +epoch: 1, batch: 25945, sum loss: 4153.516602, avg loss: 2.684885, ppl: 14.656512 +epoch: 1, batch: 25946, sum loss: 4512.529785, avg loss: 2.836285, ppl: 17.052305 +epoch: 1, batch: 25947, sum loss: 3671.159180, avg loss: 2.500790, ppl: 12.192118 +epoch: 1, batch: 25948, sum loss: 3893.585205, avg loss: 2.538191, ppl: 12.656755 +epoch: 1, batch: 25949, sum loss: 4562.337891, avg loss: 2.831991, ppl: 16.979240 +epoch: 1, batch: 25950, sum loss: 4299.632324, avg loss: 2.481034, ppl: 11.953619 +epoch: 1, batch: 25951, sum loss: 4526.264648, avg loss: 2.639221, ppl: 14.002295 +epoch: 1, batch: 25952, sum loss: 4129.190918, avg loss: 2.342139, ppl: 10.403466 +epoch: 1, batch: 25953, sum loss: 4756.348145, avg loss: 2.594844, ppl: 13.394493 +epoch: 1, batch: 25954, sum loss: 3828.951660, avg loss: 2.420323, ppl: 11.249496 +epoch: 1, batch: 25955, sum loss: 4246.654785, avg loss: 2.545956, ppl: 12.755418 +epoch: 1, batch: 25956, sum loss: 4855.750977, avg loss: 2.749576, ppl: 15.635999 +epoch: 1, batch: 25957, sum loss: 3898.620605, avg loss: 2.500719, ppl: 12.191255 +epoch: 1, batch: 25958, sum loss: 5077.513672, avg loss: 2.898124, ppl: 18.140091 +epoch: 1, batch: 25959, sum loss: 3941.810059, avg loss: 2.344920, ppl: 10.432435 +epoch: 1, batch: 25960, sum loss: 4632.105469, avg loss: 2.543715, ppl: 12.726867 +epoch: 1, batch: 25961, sum loss: 4536.478516, avg loss: 2.814193, ppl: 16.679703 +epoch: 1, batch: 25962, sum loss: 4799.464355, avg loss: 2.726968, ppl: 15.286472 +epoch: 1, batch: 25963, sum loss: 4679.992676, avg loss: 2.787369, ppl: 16.238241 +epoch: 1, batch: 25964, sum loss: 3852.643066, avg loss: 2.466481, ppl: 11.780916 +epoch: 1, batch: 25965, sum loss: 4467.505859, avg loss: 2.691269, ppl: 14.750378 +epoch: 1, batch: 25966, sum loss: 5137.914062, avg loss: 2.544781, ppl: 12.740443 +epoch: 1, batch: 25967, sum loss: 3863.197754, avg loss: 2.591011, ppl: 13.343259 +epoch: 1, batch: 25968, sum loss: 3625.307861, avg loss: 2.343444, ppl: 10.417052 +epoch: 1, batch: 25969, sum loss: 5126.247559, avg loss: 2.833747, ppl: 17.009069 +epoch: 1, batch: 25970, sum loss: 4012.381836, avg loss: 2.525099, ppl: 12.492129 +epoch: 1, batch: 25971, sum loss: 3811.383789, avg loss: 2.407696, ppl: 11.108343 +epoch: 1, batch: 25972, sum loss: 5035.647461, avg loss: 2.620004, ppl: 13.735778 +epoch: 1, batch: 25973, sum loss: 3954.773682, avg loss: 2.538366, ppl: 12.658967 +epoch: 1, batch: 25974, sum loss: 5068.695801, avg loss: 2.728039, ppl: 15.302842 +epoch: 1, batch: 25975, sum loss: 5242.728027, avg loss: 2.698265, ppl: 14.853931 +epoch: 1, batch: 25976, sum loss: 3791.784180, avg loss: 2.350765, ppl: 10.493597 +epoch: 1, batch: 25977, sum loss: 5803.912598, avg loss: 2.810611, ppl: 16.620077 +epoch: 1, batch: 25978, sum loss: 4487.738770, avg loss: 2.562958, ppl: 12.974135 +epoch: 1, batch: 25979, sum loss: 4471.583984, avg loss: 2.621093, ppl: 13.750739 +epoch: 1, batch: 25980, sum loss: 4176.024902, avg loss: 2.576203, ppl: 13.147122 +epoch: 1, batch: 25981, sum loss: 4592.802246, avg loss: 2.671787, ppl: 14.465800 +epoch: 1, batch: 25982, sum loss: 4594.764648, avg loss: 2.679163, ppl: 14.572890 +epoch: 1, batch: 25983, sum loss: 4557.099121, avg loss: 2.691730, ppl: 14.757188 +epoch: 1, batch: 25984, sum loss: 5161.085938, avg loss: 2.764374, ppl: 15.869100 +epoch: 1, batch: 25985, sum loss: 5099.283691, avg loss: 2.604333, ppl: 13.522202 +epoch: 1, batch: 25986, sum loss: 4195.992188, avg loss: 2.490203, ppl: 12.063726 +epoch: 1, batch: 25987, sum loss: 4041.702637, avg loss: 2.487202, ppl: 12.027572 +epoch: 1, batch: 25988, sum loss: 4580.690918, avg loss: 2.707264, ppl: 14.988214 +epoch: 1, batch: 25989, sum loss: 4371.304199, avg loss: 2.525306, ppl: 12.494715 +epoch: 1, batch: 25990, sum loss: 4371.995605, avg loss: 2.660983, ppl: 14.310354 +epoch: 1, batch: 25991, sum loss: 4922.636719, avg loss: 2.713692, ppl: 15.084862 +epoch: 1, batch: 25992, sum loss: 4412.564453, avg loss: 2.639094, ppl: 14.000508 +epoch: 1, batch: 25993, sum loss: 4572.096680, avg loss: 2.609644, ppl: 13.594216 +epoch: 1, batch: 25994, sum loss: 4149.553223, avg loss: 2.545738, ppl: 12.752639 +epoch: 1, batch: 25995, sum loss: 4210.030273, avg loss: 2.405732, ppl: 11.086539 +epoch: 1, batch: 25996, sum loss: 3394.486816, avg loss: 2.583323, ppl: 13.241069 +epoch: 1, batch: 25997, sum loss: 5407.724609, avg loss: 2.640491, ppl: 14.020080 +epoch: 1, batch: 25998, sum loss: 4759.263184, avg loss: 2.603536, ppl: 13.511425 +epoch: 1, batch: 25999, sum loss: 4343.904785, avg loss: 2.599584, ppl: 13.458139 +epoch: 1, batch: 26000, sum loss: 4504.647461, avg loss: 2.695780, ppl: 14.817065 +epoch: 1, batch: 26001, sum loss: 4100.740723, avg loss: 2.511170, ppl: 12.319334 +epoch: 1, batch: 26002, sum loss: 4075.061279, avg loss: 2.566160, ppl: 13.015744 +epoch: 1, batch: 26003, sum loss: 4441.190918, avg loss: 2.347353, ppl: 10.457846 +epoch: 1, batch: 26004, sum loss: 3905.699707, avg loss: 2.470398, ppl: 11.827155 +epoch: 1, batch: 26005, sum loss: 4424.518555, avg loss: 2.614964, ppl: 13.666721 +epoch: 1, batch: 26006, sum loss: 5118.346680, avg loss: 2.938201, ppl: 18.881851 +epoch: 1, batch: 26007, sum loss: 6268.851074, avg loss: 2.962595, ppl: 19.348114 +epoch: 1, batch: 26008, sum loss: 4662.235840, avg loss: 2.514690, ppl: 12.362778 +epoch: 1, batch: 26009, sum loss: 4234.511230, avg loss: 2.656532, ppl: 14.246789 +epoch: 1, batch: 26010, sum loss: 4087.398438, avg loss: 2.441696, ppl: 11.492512 +epoch: 1, batch: 26011, sum loss: 3048.789551, avg loss: 2.194953, ppl: 8.979576 +epoch: 1, batch: 26012, sum loss: 4421.067383, avg loss: 2.499190, ppl: 12.172634 +epoch: 1, batch: 26013, sum loss: 3851.395996, avg loss: 2.460956, ppl: 11.716005 +epoch: 1, batch: 26014, sum loss: 4202.985840, avg loss: 2.562796, ppl: 12.972041 +epoch: 1, batch: 26015, sum loss: 4411.692383, avg loss: 2.575419, ppl: 13.136817 +epoch: 1, batch: 26016, sum loss: 4908.196777, avg loss: 2.748151, ppl: 15.613729 +epoch: 1, batch: 26017, sum loss: 4316.344727, avg loss: 2.510963, ppl: 12.316782 +epoch: 1, batch: 26018, sum loss: 3557.920410, avg loss: 2.452047, ppl: 11.612094 +epoch: 1, batch: 26019, sum loss: 4877.443848, avg loss: 2.588877, ppl: 13.314810 +epoch: 1, batch: 26020, sum loss: 4235.821777, avg loss: 2.567165, ppl: 13.028831 +epoch: 1, batch: 26021, sum loss: 4986.079590, avg loss: 2.646539, ppl: 14.105139 +epoch: 1, batch: 26022, sum loss: 4894.806152, avg loss: 2.751437, ppl: 15.665122 +epoch: 1, batch: 26023, sum loss: 4409.295898, avg loss: 2.859466, ppl: 17.452198 +epoch: 1, batch: 26024, sum loss: 5132.041504, avg loss: 2.659089, ppl: 14.283273 +epoch: 1, batch: 26025, sum loss: 4160.738281, avg loss: 2.574714, ppl: 13.127564 +epoch: 1, batch: 26026, sum loss: 4019.375000, avg loss: 2.353264, ppl: 10.519849 +epoch: 1, batch: 26027, sum loss: 4166.372559, avg loss: 2.716019, ppl: 15.120005 +epoch: 1, batch: 26028, sum loss: 5097.951660, avg loss: 2.703050, ppl: 14.925182 +epoch: 1, batch: 26029, sum loss: 4044.432373, avg loss: 2.564637, ppl: 12.995939 +epoch: 1, batch: 26030, sum loss: 4811.134766, avg loss: 2.882645, ppl: 17.861462 +epoch: 1, batch: 26031, sum loss: 4248.768066, avg loss: 2.609808, ppl: 13.596446 +epoch: 1, batch: 26032, sum loss: 3898.329834, avg loss: 2.481432, ppl: 11.958379 +epoch: 1, batch: 26033, sum loss: 4086.573730, avg loss: 2.658799, ppl: 14.279129 +epoch: 1, batch: 26034, sum loss: 4452.129883, avg loss: 2.690109, ppl: 14.733278 +epoch: 1, batch: 26035, sum loss: 3893.699463, avg loss: 2.632657, ppl: 13.910679 +epoch: 1, batch: 26036, sum loss: 4600.615234, avg loss: 2.712627, ppl: 15.068809 +epoch: 1, batch: 26037, sum loss: 4600.456543, avg loss: 2.748182, ppl: 15.614221 +epoch: 1, batch: 26038, sum loss: 4648.392578, avg loss: 2.872925, ppl: 17.688679 +epoch: 1, batch: 26039, sum loss: 3751.652832, avg loss: 2.487833, ppl: 12.035173 +epoch: 1, batch: 26040, sum loss: 3639.092773, avg loss: 2.369201, ppl: 10.688850 +epoch: 1, batch: 26041, sum loss: 3928.115234, avg loss: 2.436796, ppl: 11.436339 +epoch: 1, batch: 26042, sum loss: 4876.276367, avg loss: 2.661723, ppl: 14.320941 +epoch: 1, batch: 26043, sum loss: 3677.447754, avg loss: 2.543187, ppl: 12.720141 +epoch: 1, batch: 26044, sum loss: 4460.532227, avg loss: 2.709923, ppl: 15.028111 +epoch: 1, batch: 26045, sum loss: 4815.724609, avg loss: 2.604502, ppl: 13.524491 +epoch: 1, batch: 26046, sum loss: 5527.564941, avg loss: 2.769321, ppl: 15.947805 +epoch: 1, batch: 26047, sum loss: 4575.351074, avg loss: 2.764563, ppl: 15.872096 +epoch: 1, batch: 26048, sum loss: 5355.184570, avg loss: 2.893131, ppl: 18.049726 +epoch: 1, batch: 26049, sum loss: 4554.844727, avg loss: 2.802981, ppl: 16.493748 +epoch: 1, batch: 26050, sum loss: 4681.712891, avg loss: 2.578036, ppl: 13.171239 +epoch: 1, batch: 26051, sum loss: 4799.087891, avg loss: 2.669126, ppl: 14.427348 +epoch: 1, batch: 26052, sum loss: 5375.495117, avg loss: 3.158340, ppl: 23.531506 +epoch: 1, batch: 26053, sum loss: 5080.715820, avg loss: 2.948761, ppl: 19.082304 +epoch: 1, batch: 26054, sum loss: 4283.050293, avg loss: 2.436320, ppl: 11.430896 +epoch: 1, batch: 26055, sum loss: 4384.500977, avg loss: 2.556560, ppl: 12.891398 +epoch: 1, batch: 26056, sum loss: 3866.893066, avg loss: 2.439680, ppl: 11.469371 +epoch: 1, batch: 26057, sum loss: 4579.283203, avg loss: 2.704834, ppl: 14.951831 +epoch: 1, batch: 26058, sum loss: 4162.915039, avg loss: 2.639769, ppl: 14.009961 +epoch: 1, batch: 26059, sum loss: 4949.762695, avg loss: 2.896292, ppl: 18.106880 +epoch: 1, batch: 26060, sum loss: 4978.076660, avg loss: 2.774848, ppl: 16.036184 +epoch: 1, batch: 26061, sum loss: 4695.429199, avg loss: 2.720411, ppl: 15.186560 +epoch: 1, batch: 26062, sum loss: 4091.947021, avg loss: 2.510397, ppl: 12.309815 +epoch: 1, batch: 26063, sum loss: 4477.070312, avg loss: 2.767040, ppl: 15.911463 +epoch: 1, batch: 26064, sum loss: 4464.718262, avg loss: 2.533892, ppl: 12.602465 +epoch: 1, batch: 26065, sum loss: 4391.079590, avg loss: 2.453117, ppl: 11.624525 +epoch: 1, batch: 26066, sum loss: 4816.499512, avg loss: 2.763339, ppl: 15.852684 +epoch: 1, batch: 26067, sum loss: 3888.264404, avg loss: 2.657734, ppl: 14.263929 +epoch: 1, batch: 26068, sum loss: 3792.920654, avg loss: 2.603240, ppl: 13.507431 +epoch: 1, batch: 26069, sum loss: 3994.815186, avg loss: 2.458348, ppl: 11.685489 +epoch: 1, batch: 26070, sum loss: 4979.747559, avg loss: 2.863570, ppl: 17.523972 +epoch: 1, batch: 26071, sum loss: 4448.199219, avg loss: 2.575680, ppl: 13.140249 +epoch: 1, batch: 26072, sum loss: 4688.977539, avg loss: 2.632778, ppl: 13.912364 +epoch: 1, batch: 26073, sum loss: 5001.876465, avg loss: 2.808465, ppl: 16.584446 +epoch: 1, batch: 26074, sum loss: 3613.646973, avg loss: 2.412314, ppl: 11.159757 +epoch: 1, batch: 26075, sum loss: 4127.363770, avg loss: 2.659384, ppl: 14.287485 +epoch: 1, batch: 26076, sum loss: 4642.283203, avg loss: 2.622759, ppl: 13.773671 +epoch: 1, batch: 26077, sum loss: 4971.616211, avg loss: 2.807237, ppl: 16.564083 +epoch: 1, batch: 26078, sum loss: 3632.819824, avg loss: 2.184498, ppl: 8.886185 +epoch: 1, batch: 26079, sum loss: 4066.058594, avg loss: 2.598120, ppl: 13.438456 +epoch: 1, batch: 26080, sum loss: 4925.711914, avg loss: 2.535107, ppl: 12.617777 +epoch: 1, batch: 26081, sum loss: 4854.411133, avg loss: 2.940285, ppl: 18.921246 +epoch: 1, batch: 26082, sum loss: 4309.925781, avg loss: 2.809600, ppl: 16.603270 +epoch: 1, batch: 26083, sum loss: 4116.242676, avg loss: 2.561445, ppl: 12.954529 +epoch: 1, batch: 26084, sum loss: 3429.283691, avg loss: 2.352046, ppl: 10.507051 +epoch: 1, batch: 26085, sum loss: 5716.849121, avg loss: 3.005704, ppl: 20.200436 +epoch: 1, batch: 26086, sum loss: 5311.509277, avg loss: 2.808836, ppl: 16.590599 +epoch: 1, batch: 26087, sum loss: 4797.914551, avg loss: 2.656653, ppl: 14.248515 +epoch: 1, batch: 26088, sum loss: 4443.884766, avg loss: 2.789633, ppl: 16.275038 +epoch: 1, batch: 26089, sum loss: 4561.920898, avg loss: 2.733326, ppl: 15.383968 +epoch: 1, batch: 26090, sum loss: 4955.611816, avg loss: 2.967432, ppl: 19.441933 +epoch: 1, batch: 26091, sum loss: 4052.807861, avg loss: 2.581406, ppl: 13.215711 +epoch: 1, batch: 26092, sum loss: 4535.527832, avg loss: 2.629292, ppl: 13.863944 +epoch: 1, batch: 26093, sum loss: 4451.041992, avg loss: 2.764622, ppl: 15.873042 +epoch: 1, batch: 26094, sum loss: 3719.058105, avg loss: 2.453205, ppl: 11.625543 +epoch: 1, batch: 26095, sum loss: 5180.166992, avg loss: 2.724969, ppl: 15.255947 +epoch: 1, batch: 26096, sum loss: 5192.617676, avg loss: 2.747417, ppl: 15.602275 +epoch: 1, batch: 26097, sum loss: 4191.435059, avg loss: 2.543347, ppl: 12.722177 +epoch: 1, batch: 26098, sum loss: 4207.270996, avg loss: 2.639442, ppl: 14.005389 +epoch: 1, batch: 26099, sum loss: 4047.050293, avg loss: 2.383422, ppl: 10.841940 +epoch: 1, batch: 26100, sum loss: 4861.962402, avg loss: 2.645246, ppl: 14.086910 +epoch: 1, batch: 26101, sum loss: 4922.901367, avg loss: 2.519397, ppl: 12.421102 +epoch: 1, batch: 26102, sum loss: 5053.417480, avg loss: 3.047899, ppl: 21.071037 +epoch: 1, batch: 26103, sum loss: 3605.293213, avg loss: 2.170556, ppl: 8.763153 +epoch: 1, batch: 26104, sum loss: 4576.900391, avg loss: 2.758831, ppl: 15.781381 +epoch: 1, batch: 26105, sum loss: 4080.771729, avg loss: 2.542537, ppl: 12.711877 +epoch: 1, batch: 26106, sum loss: 4191.142578, avg loss: 2.596743, ppl: 13.419953 +epoch: 1, batch: 26107, sum loss: 3571.507080, avg loss: 2.244819, ppl: 9.438708 +epoch: 1, batch: 26108, sum loss: 4227.727539, avg loss: 2.715304, ppl: 15.109198 +epoch: 1, batch: 26109, sum loss: 3844.912109, avg loss: 2.350191, ppl: 10.487572 +epoch: 1, batch: 26110, sum loss: 3997.173584, avg loss: 2.570530, ppl: 13.072747 +epoch: 1, batch: 26111, sum loss: 4196.198242, avg loss: 2.520239, ppl: 12.431572 +epoch: 1, batch: 26112, sum loss: 4339.631836, avg loss: 2.729328, ppl: 15.322589 +epoch: 1, batch: 26113, sum loss: 4437.554688, avg loss: 2.676450, ppl: 14.533415 +epoch: 1, batch: 26114, sum loss: 4414.824219, avg loss: 2.631004, ppl: 13.887701 +epoch: 1, batch: 26115, sum loss: 4291.804199, avg loss: 2.430240, ppl: 11.361610 +epoch: 1, batch: 26116, sum loss: 4045.520996, avg loss: 2.399479, ppl: 11.017431 +epoch: 1, batch: 26117, sum loss: 4568.311035, avg loss: 2.704743, ppl: 14.950476 +epoch: 1, batch: 26118, sum loss: 4468.054688, avg loss: 2.680297, ppl: 14.589420 +epoch: 1, batch: 26119, sum loss: 5115.812988, avg loss: 3.000477, ppl: 20.095118 +epoch: 1, batch: 26120, sum loss: 3943.433105, avg loss: 2.542510, ppl: 12.711537 +epoch: 1, batch: 26121, sum loss: 4255.555176, avg loss: 2.759763, ppl: 15.796106 +epoch: 1, batch: 26122, sum loss: 4119.232422, avg loss: 2.630417, ppl: 13.879551 +epoch: 1, batch: 26123, sum loss: 5044.430664, avg loss: 2.680356, ppl: 14.590290 +epoch: 1, batch: 26124, sum loss: 4696.978027, avg loss: 2.779277, ppl: 16.107368 +epoch: 1, batch: 26125, sum loss: 5224.307129, avg loss: 2.701296, ppl: 14.899034 +epoch: 1, batch: 26126, sum loss: 5332.447754, avg loss: 2.995757, ppl: 20.000496 +epoch: 1, batch: 26127, sum loss: 4669.307617, avg loss: 2.719457, ppl: 15.172080 +epoch: 1, batch: 26128, sum loss: 5143.355957, avg loss: 3.006053, ppl: 20.207478 +epoch: 1, batch: 26129, sum loss: 3952.396484, avg loss: 2.471793, ppl: 11.843660 +epoch: 1, batch: 26130, sum loss: 3796.669922, avg loss: 2.577508, ppl: 13.164298 +epoch: 1, batch: 26131, sum loss: 4401.410645, avg loss: 2.624574, ppl: 13.798694 +epoch: 1, batch: 26132, sum loss: 4888.909180, avg loss: 2.509707, ppl: 12.301325 +epoch: 1, batch: 26133, sum loss: 5109.221191, avg loss: 2.776751, ppl: 16.066729 +epoch: 1, batch: 26134, sum loss: 4991.542969, avg loss: 2.662156, ppl: 14.327150 +epoch: 1, batch: 26135, sum loss: 5483.643555, avg loss: 2.783576, ppl: 16.176758 +epoch: 1, batch: 26136, sum loss: 4420.402344, avg loss: 2.654896, ppl: 14.223511 +epoch: 1, batch: 26137, sum loss: 4352.835449, avg loss: 2.659032, ppl: 14.282458 +epoch: 1, batch: 26138, sum loss: 4564.465820, avg loss: 2.689727, ppl: 14.727649 +epoch: 1, batch: 26139, sum loss: 3782.248535, avg loss: 2.457601, ppl: 11.676769 +epoch: 1, batch: 26140, sum loss: 3992.108887, avg loss: 2.783897, ppl: 16.181965 +epoch: 1, batch: 26141, sum loss: 3790.270020, avg loss: 2.518452, ppl: 12.409371 +epoch: 1, batch: 26142, sum loss: 4376.408203, avg loss: 2.431338, ppl: 11.374088 +epoch: 1, batch: 26143, sum loss: 4401.749023, avg loss: 2.618530, ppl: 13.715548 +epoch: 1, batch: 26144, sum loss: 4252.507812, avg loss: 2.710330, ppl: 15.034236 +epoch: 1, batch: 26145, sum loss: 4213.029297, avg loss: 2.587856, ppl: 13.301221 +epoch: 1, batch: 26146, sum loss: 4386.639648, avg loss: 2.554828, ppl: 12.869085 +epoch: 1, batch: 26147, sum loss: 4801.701660, avg loss: 2.507416, ppl: 12.273175 +epoch: 1, batch: 26148, sum loss: 5530.479492, avg loss: 2.750114, ppl: 15.644419 +epoch: 1, batch: 26149, sum loss: 3811.855957, avg loss: 2.418690, ppl: 11.231139 +epoch: 1, batch: 26150, sum loss: 4089.248779, avg loss: 2.575094, ppl: 13.132548 +epoch: 1, batch: 26151, sum loss: 3856.755859, avg loss: 2.407463, ppl: 11.105751 +epoch: 1, batch: 26152, sum loss: 3719.972656, avg loss: 2.406192, ppl: 11.091641 +epoch: 1, batch: 26153, sum loss: 4876.443359, avg loss: 2.589720, ppl: 13.326043 +epoch: 1, batch: 26154, sum loss: 3754.844727, avg loss: 2.603914, ppl: 13.516545 +epoch: 1, batch: 26155, sum loss: 4192.583984, avg loss: 2.548683, ppl: 12.790250 +epoch: 1, batch: 26156, sum loss: 4243.318359, avg loss: 2.500482, ppl: 12.188372 +epoch: 1, batch: 26157, sum loss: 5660.432617, avg loss: 3.066323, ppl: 21.462845 +epoch: 1, batch: 26158, sum loss: 4525.062988, avg loss: 2.739142, ppl: 15.473709 +epoch: 1, batch: 26159, sum loss: 5098.913086, avg loss: 2.813970, ppl: 16.675983 +epoch: 1, batch: 26160, sum loss: 4415.561523, avg loss: 2.659977, ppl: 14.295959 +epoch: 1, batch: 26161, sum loss: 4582.671875, avg loss: 2.860594, ppl: 17.471903 +epoch: 1, batch: 26162, sum loss: 5439.445312, avg loss: 2.882589, ppl: 17.860451 +epoch: 1, batch: 26163, sum loss: 4760.108398, avg loss: 2.772340, ppl: 15.996026 +epoch: 1, batch: 26164, sum loss: 4553.088867, avg loss: 2.530900, ppl: 12.564806 +epoch: 1, batch: 26165, sum loss: 4638.830078, avg loss: 2.686063, ppl: 14.673785 +epoch: 1, batch: 26166, sum loss: 3863.852295, avg loss: 2.512258, ppl: 12.332741 +epoch: 1, batch: 26167, sum loss: 3666.212402, avg loss: 2.583659, ppl: 13.245521 +epoch: 1, batch: 26168, sum loss: 4095.123047, avg loss: 2.507730, ppl: 12.277030 +epoch: 1, batch: 26169, sum loss: 4325.314453, avg loss: 2.474436, ppl: 11.875011 +epoch: 1, batch: 26170, sum loss: 4462.380371, avg loss: 2.567538, ppl: 13.033690 +epoch: 1, batch: 26171, sum loss: 4236.711914, avg loss: 2.378839, ppl: 10.792363 +epoch: 1, batch: 26172, sum loss: 5201.451660, avg loss: 2.736166, ppl: 15.427722 +epoch: 1, batch: 26173, sum loss: 3835.474121, avg loss: 2.515065, ppl: 12.367412 +epoch: 1, batch: 26174, sum loss: 4168.808105, avg loss: 2.499285, ppl: 12.173792 +epoch: 1, batch: 26175, sum loss: 3738.542480, avg loss: 2.448292, ppl: 11.568577 +epoch: 1, batch: 26176, sum loss: 3860.861328, avg loss: 2.560253, ppl: 12.939089 +epoch: 1, batch: 26177, sum loss: 3356.822266, avg loss: 2.385801, ppl: 10.867767 +epoch: 1, batch: 26178, sum loss: 4305.547852, avg loss: 2.694335, ppl: 14.795683 +epoch: 1, batch: 26179, sum loss: 5252.260742, avg loss: 3.027240, ppl: 20.640179 +epoch: 1, batch: 26180, sum loss: 4391.291016, avg loss: 2.705663, ppl: 14.964234 +epoch: 1, batch: 26181, sum loss: 3502.128906, avg loss: 2.428661, ppl: 11.343681 +epoch: 1, batch: 26182, sum loss: 4118.986328, avg loss: 2.361804, ppl: 10.610075 +epoch: 1, batch: 26183, sum loss: 4830.153320, avg loss: 2.649563, ppl: 14.147856 +epoch: 1, batch: 26184, sum loss: 4064.131104, avg loss: 2.479640, ppl: 11.936972 +epoch: 1, batch: 26185, sum loss: 4053.852783, avg loss: 2.377626, ppl: 10.779287 +epoch: 1, batch: 26186, sum loss: 4972.083008, avg loss: 2.627951, ppl: 13.845370 +epoch: 1, batch: 26187, sum loss: 4776.685059, avg loss: 2.559853, ppl: 12.933910 +epoch: 1, batch: 26188, sum loss: 4768.146973, avg loss: 2.829761, ppl: 16.941408 +epoch: 1, batch: 26189, sum loss: 4014.050781, avg loss: 2.467149, ppl: 11.788784 +epoch: 1, batch: 26190, sum loss: 5114.317871, avg loss: 2.748156, ppl: 15.613811 +epoch: 1, batch: 26191, sum loss: 3926.823242, avg loss: 2.580042, ppl: 13.197688 +epoch: 1, batch: 26192, sum loss: 4601.748535, avg loss: 2.608701, ppl: 13.581397 +epoch: 1, batch: 26193, sum loss: 3711.157715, avg loss: 2.528037, ppl: 12.528883 +epoch: 1, batch: 26194, sum loss: 3907.064941, avg loss: 2.460368, ppl: 11.709125 +epoch: 1, batch: 26195, sum loss: 5002.016113, avg loss: 2.859929, ppl: 17.460289 +epoch: 1, batch: 26196, sum loss: 3965.557861, avg loss: 2.434351, ppl: 11.408412 +epoch: 1, batch: 26197, sum loss: 4646.954590, avg loss: 2.689210, ppl: 14.720041 +epoch: 1, batch: 26198, sum loss: 4286.751465, avg loss: 2.665890, ppl: 14.380746 +epoch: 1, batch: 26199, sum loss: 4279.002441, avg loss: 2.605970, ppl: 13.544355 +epoch: 1, batch: 26200, sum loss: 4714.517090, avg loss: 2.939225, ppl: 18.901196 +epoch: 1, batch: 26201, sum loss: 3102.466553, avg loss: 2.371916, ppl: 10.717912 +epoch: 1, batch: 26202, sum loss: 3770.382812, avg loss: 2.578921, ppl: 13.182910 +epoch: 1, batch: 26203, sum loss: 4932.098633, avg loss: 2.808712, ppl: 16.588543 +epoch: 1, batch: 26204, sum loss: 4284.852051, avg loss: 2.753761, ppl: 15.701575 +epoch: 1, batch: 26205, sum loss: 3604.160889, avg loss: 2.548912, ppl: 12.793172 +epoch: 1, batch: 26206, sum loss: 4365.878906, avg loss: 2.592565, ppl: 13.364004 +epoch: 1, batch: 26207, sum loss: 3958.885742, avg loss: 2.451323, ppl: 11.603683 +epoch: 1, batch: 26208, sum loss: 4052.054443, avg loss: 2.417694, ppl: 11.219952 +epoch: 1, batch: 26209, sum loss: 3995.879395, avg loss: 2.477297, ppl: 11.909026 +epoch: 1, batch: 26210, sum loss: 4591.583496, avg loss: 2.832562, ppl: 16.988934 +epoch: 1, batch: 26211, sum loss: 5042.088379, avg loss: 2.749230, ppl: 15.630594 +epoch: 1, batch: 26212, sum loss: 4139.220703, avg loss: 2.526997, ppl: 12.515860 +epoch: 1, batch: 26213, sum loss: 3969.880859, avg loss: 2.350433, ppl: 10.490109 +epoch: 1, batch: 26214, sum loss: 5069.589355, avg loss: 2.541147, ppl: 12.694229 +epoch: 1, batch: 26215, sum loss: 3938.094238, avg loss: 2.501966, ppl: 12.206466 +epoch: 1, batch: 26216, sum loss: 4599.947266, avg loss: 2.741327, ppl: 15.507554 +epoch: 1, batch: 26217, sum loss: 4574.401367, avg loss: 2.661083, ppl: 14.311780 +epoch: 1, batch: 26218, sum loss: 4266.774414, avg loss: 2.632186, ppl: 13.904137 +epoch: 1, batch: 26219, sum loss: 4510.849609, avg loss: 2.507421, ppl: 12.273231 +epoch: 1, batch: 26220, sum loss: 4798.046875, avg loss: 2.567173, ppl: 13.028943 +epoch: 1, batch: 26221, sum loss: 4001.829590, avg loss: 2.416564, ppl: 11.207282 +epoch: 1, batch: 26222, sum loss: 3781.590576, avg loss: 2.449217, ppl: 11.579275 +epoch: 1, batch: 26223, sum loss: 4816.928223, avg loss: 2.741564, ppl: 15.511227 +epoch: 1, batch: 26224, sum loss: 4302.654785, avg loss: 2.621971, ppl: 13.762825 +epoch: 1, batch: 26225, sum loss: 4760.215332, avg loss: 2.599790, ppl: 13.460913 +epoch: 1, batch: 26226, sum loss: 4044.035156, avg loss: 2.636268, ppl: 13.961002 +epoch: 1, batch: 26227, sum loss: 3937.031250, avg loss: 2.361746, ppl: 10.609463 +epoch: 1, batch: 26228, sum loss: 4220.624023, avg loss: 2.618253, ppl: 13.711748 +epoch: 1, batch: 26229, sum loss: 5488.312988, avg loss: 2.906945, ppl: 18.300812 +epoch: 1, batch: 26230, sum loss: 4044.576172, avg loss: 2.602687, ppl: 13.499969 +epoch: 1, batch: 26231, sum loss: 4576.760254, avg loss: 2.455343, ppl: 11.650434 +epoch: 1, batch: 26232, sum loss: 4572.538574, avg loss: 2.515148, ppl: 12.368435 +epoch: 1, batch: 26233, sum loss: 4696.684082, avg loss: 2.574936, ppl: 13.130482 +epoch: 1, batch: 26234, sum loss: 4518.229004, avg loss: 2.736662, ppl: 15.435378 +epoch: 1, batch: 26235, sum loss: 4700.852539, avg loss: 2.600029, ppl: 13.464128 +epoch: 1, batch: 26236, sum loss: 4731.130371, avg loss: 2.653466, ppl: 14.203188 +epoch: 1, batch: 26237, sum loss: 3994.570068, avg loss: 2.617674, ppl: 13.703810 +epoch: 1, batch: 26238, sum loss: 3635.923340, avg loss: 2.544383, ppl: 12.735369 +epoch: 1, batch: 26239, sum loss: 4373.525391, avg loss: 2.452903, ppl: 11.622034 +epoch: 1, batch: 26240, sum loss: 4051.807617, avg loss: 2.445267, ppl: 11.533628 +epoch: 1, batch: 26241, sum loss: 5100.670410, avg loss: 2.781173, ppl: 16.137932 +epoch: 1, batch: 26242, sum loss: 4086.283936, avg loss: 2.522398, ppl: 12.458430 +epoch: 1, batch: 26243, sum loss: 4812.763672, avg loss: 2.498839, ppl: 12.168357 +epoch: 1, batch: 26244, sum loss: 5258.148438, avg loss: 2.893863, ppl: 18.062946 +epoch: 1, batch: 26245, sum loss: 3236.635498, avg loss: 2.210817, ppl: 9.123163 +epoch: 1, batch: 26246, sum loss: 4122.229492, avg loss: 2.549307, ppl: 12.798233 +epoch: 1, batch: 26247, sum loss: 5186.026367, avg loss: 2.719469, ppl: 15.172257 +epoch: 1, batch: 26248, sum loss: 3903.079834, avg loss: 2.413779, ppl: 11.176111 +epoch: 1, batch: 26249, sum loss: 4322.413086, avg loss: 2.580545, ppl: 13.204335 +epoch: 1, batch: 26250, sum loss: 4850.396973, avg loss: 2.610547, ppl: 13.606496 +epoch: 1, batch: 26251, sum loss: 5153.726562, avg loss: 2.778289, ppl: 16.091471 +epoch: 1, batch: 26252, sum loss: 3674.055176, avg loss: 2.371888, ppl: 10.717612 +epoch: 1, batch: 26253, sum loss: 3514.862793, avg loss: 2.466570, ppl: 11.781970 +epoch: 1, batch: 26254, sum loss: 3718.155273, avg loss: 2.425411, ppl: 11.306878 +epoch: 1, batch: 26255, sum loss: 5017.887695, avg loss: 2.703603, ppl: 14.933444 +epoch: 1, batch: 26256, sum loss: 4267.736328, avg loss: 2.303150, ppl: 10.005648 +epoch: 1, batch: 26257, sum loss: 4345.597656, avg loss: 2.469090, ppl: 11.811687 +epoch: 1, batch: 26258, sum loss: 4086.153076, avg loss: 2.751618, ppl: 15.667964 +epoch: 1, batch: 26259, sum loss: 4273.262695, avg loss: 2.764077, ppl: 15.864386 +epoch: 1, batch: 26260, sum loss: 4237.910645, avg loss: 2.475415, ppl: 11.886639 +epoch: 1, batch: 26261, sum loss: 5067.960938, avg loss: 2.751336, ppl: 15.663546 +epoch: 1, batch: 26262, sum loss: 4156.778320, avg loss: 2.436564, ppl: 11.433686 +epoch: 1, batch: 26263, sum loss: 3997.287598, avg loss: 2.529929, ppl: 12.552614 +epoch: 1, batch: 26264, sum loss: 4218.636719, avg loss: 2.661600, ppl: 14.319186 +epoch: 1, batch: 26265, sum loss: 3569.779785, avg loss: 2.512160, ppl: 12.331541 +epoch: 1, batch: 26266, sum loss: 3401.605469, avg loss: 2.198840, ppl: 9.014552 +epoch: 1, batch: 26267, sum loss: 4570.785156, avg loss: 2.745216, ppl: 15.567982 +epoch: 1, batch: 26268, sum loss: 5188.174805, avg loss: 2.756735, ppl: 15.748335 +epoch: 1, batch: 26269, sum loss: 3719.528076, avg loss: 2.402796, ppl: 11.054041 +epoch: 1, batch: 26270, sum loss: 4233.520996, avg loss: 2.544183, ppl: 12.732827 +epoch: 1, batch: 26271, sum loss: 4850.517578, avg loss: 2.665120, ppl: 14.369669 +epoch: 1, batch: 26272, sum loss: 4092.041992, avg loss: 2.601425, ppl: 13.482940 +epoch: 1, batch: 26273, sum loss: 3991.479492, avg loss: 2.646870, ppl: 14.109801 +epoch: 1, batch: 26274, sum loss: 4600.865723, avg loss: 2.412620, ppl: 11.163166 +epoch: 1, batch: 26275, sum loss: 4645.774414, avg loss: 2.707328, ppl: 14.989168 +epoch: 1, batch: 26276, sum loss: 4719.922363, avg loss: 2.629483, ppl: 13.866602 +epoch: 1, batch: 26277, sum loss: 3854.355469, avg loss: 2.433305, ppl: 11.396488 +epoch: 1, batch: 26278, sum loss: 4596.957031, avg loss: 2.598619, ppl: 13.445161 +epoch: 1, batch: 26279, sum loss: 5355.088867, avg loss: 3.084729, ppl: 21.861536 +epoch: 1, batch: 26280, sum loss: 4859.810547, avg loss: 2.773865, ppl: 16.020430 +epoch: 1, batch: 26281, sum loss: 3859.232666, avg loss: 2.542314, ppl: 12.709046 +epoch: 1, batch: 26282, sum loss: 4695.913574, avg loss: 2.641121, ppl: 14.028927 +epoch: 1, batch: 26283, sum loss: 4543.981445, avg loss: 2.520234, ppl: 12.431503 +epoch: 1, batch: 26284, sum loss: 4370.062500, avg loss: 2.504334, ppl: 12.235404 +epoch: 1, batch: 26285, sum loss: 4220.273926, avg loss: 2.717498, ppl: 15.142385 +epoch: 1, batch: 26286, sum loss: 3720.783447, avg loss: 2.573156, ppl: 13.107123 +epoch: 1, batch: 26287, sum loss: 4767.810547, avg loss: 2.741697, ppl: 15.513287 +epoch: 1, batch: 26288, sum loss: 4119.099121, avg loss: 2.485878, ppl: 12.011656 +epoch: 1, batch: 26289, sum loss: 4081.899658, avg loss: 2.525928, ppl: 12.502493 +epoch: 1, batch: 26290, sum loss: 5268.469727, avg loss: 2.922058, ppl: 18.579477 +epoch: 1, batch: 26291, sum loss: 3562.873779, avg loss: 2.430337, ppl: 11.362710 +epoch: 1, batch: 26292, sum loss: 5119.834961, avg loss: 2.831767, ppl: 16.975431 +epoch: 1, batch: 26293, sum loss: 4847.850586, avg loss: 2.966861, ppl: 19.430826 +epoch: 1, batch: 26294, sum loss: 4957.232910, avg loss: 2.800697, ppl: 16.456106 +epoch: 1, batch: 26295, sum loss: 4374.093750, avg loss: 2.763167, ppl: 15.849962 +epoch: 1, batch: 26296, sum loss: 6106.493164, avg loss: 2.945728, ppl: 19.024500 +epoch: 1, batch: 26297, sum loss: 6037.530273, avg loss: 2.869549, ppl: 17.629059 +epoch: 1, batch: 26298, sum loss: 5465.617188, avg loss: 2.896458, ppl: 18.109894 +epoch: 1, batch: 26299, sum loss: 4547.872070, avg loss: 2.530814, ppl: 12.563725 +epoch: 1, batch: 26300, sum loss: 4133.656738, avg loss: 2.622879, ppl: 13.775319 +epoch: 1, batch: 26301, sum loss: 5203.945801, avg loss: 2.842133, ppl: 17.152317 +epoch: 1, batch: 26302, sum loss: 4248.085449, avg loss: 2.746015, ppl: 15.580425 +epoch: 1, batch: 26303, sum loss: 3441.366455, avg loss: 2.488334, ppl: 12.041203 +epoch: 1, batch: 26304, sum loss: 4117.616699, avg loss: 2.530803, ppl: 12.563590 +epoch: 1, batch: 26305, sum loss: 4829.197754, avg loss: 2.608967, ppl: 13.585011 +epoch: 1, batch: 26306, sum loss: 4554.243652, avg loss: 2.884258, ppl: 17.890289 +epoch: 1, batch: 26307, sum loss: 4453.108398, avg loss: 2.638098, ppl: 13.986569 +epoch: 1, batch: 26308, sum loss: 3710.866455, avg loss: 2.515842, ppl: 12.377023 +epoch: 1, batch: 26309, sum loss: 5401.044434, avg loss: 2.844152, ppl: 17.186977 +epoch: 1, batch: 26310, sum loss: 4317.383789, avg loss: 2.462854, ppl: 11.738269 +epoch: 1, batch: 26311, sum loss: 3898.765869, avg loss: 2.478554, ppl: 11.924013 +epoch: 1, batch: 26312, sum loss: 4787.681641, avg loss: 2.552069, ppl: 12.833632 +epoch: 1, batch: 26313, sum loss: 3695.882324, avg loss: 2.369155, ppl: 10.688361 +epoch: 1, batch: 26314, sum loss: 4260.143066, avg loss: 2.552512, ppl: 12.839321 +epoch: 1, batch: 26315, sum loss: 3723.915527, avg loss: 2.480957, ppl: 11.952692 +epoch: 1, batch: 26316, sum loss: 4412.335938, avg loss: 2.624828, ppl: 13.802198 +epoch: 1, batch: 26317, sum loss: 4901.282227, avg loss: 2.642201, ppl: 14.044077 +epoch: 1, batch: 26318, sum loss: 3406.986328, avg loss: 2.412880, ppl: 11.166070 +epoch: 1, batch: 26319, sum loss: 3846.103760, avg loss: 2.627120, ppl: 13.833871 +epoch: 1, batch: 26320, sum loss: 3999.137207, avg loss: 2.451954, ppl: 11.611014 +epoch: 1, batch: 26321, sum loss: 3745.126465, avg loss: 2.179934, ppl: 8.845722 +epoch: 1, batch: 26322, sum loss: 4943.566406, avg loss: 2.885911, ppl: 17.919893 +epoch: 1, batch: 26323, sum loss: 3642.500732, avg loss: 2.656820, ppl: 14.250903 +epoch: 1, batch: 26324, sum loss: 4039.119141, avg loss: 2.597504, ppl: 13.430180 +epoch: 1, batch: 26325, sum loss: 4325.068359, avg loss: 2.489964, ppl: 12.060848 +epoch: 1, batch: 26326, sum loss: 4589.111816, avg loss: 2.664989, ppl: 14.367798 +epoch: 1, batch: 26327, sum loss: 4744.979980, avg loss: 2.765140, ppl: 15.881260 +epoch: 1, batch: 26328, sum loss: 5344.561523, avg loss: 2.598231, ppl: 13.439942 +epoch: 1, batch: 26329, sum loss: 5414.097656, avg loss: 3.038214, ppl: 20.867945 +epoch: 1, batch: 26330, sum loss: 5001.976074, avg loss: 2.690681, ppl: 14.741712 +epoch: 1, batch: 26331, sum loss: 4578.936035, avg loss: 2.717469, ppl: 15.141956 +epoch: 1, batch: 26332, sum loss: 4240.968750, avg loss: 2.480099, ppl: 11.942443 +epoch: 1, batch: 26333, sum loss: 3917.690674, avg loss: 2.585934, ppl: 13.275688 +epoch: 1, batch: 26334, sum loss: 4468.443359, avg loss: 2.440439, ppl: 11.478078 +epoch: 1, batch: 26335, sum loss: 5115.502441, avg loss: 2.864223, ppl: 17.535427 +epoch: 1, batch: 26336, sum loss: 4303.251465, avg loss: 2.504803, ppl: 12.241146 +epoch: 1, batch: 26337, sum loss: 3674.365479, avg loss: 2.252830, ppl: 9.514622 +epoch: 1, batch: 26338, sum loss: 4936.329590, avg loss: 2.864962, ppl: 17.548388 +epoch: 1, batch: 26339, sum loss: 4535.119141, avg loss: 2.549252, ppl: 12.797525 +epoch: 1, batch: 26340, sum loss: 4573.735840, avg loss: 2.569515, ppl: 13.059483 +epoch: 1, batch: 26341, sum loss: 5577.014648, avg loss: 2.969656, ppl: 19.485220 +epoch: 1, batch: 26342, sum loss: 4712.786133, avg loss: 2.713176, ppl: 15.077081 +epoch: 1, batch: 26343, sum loss: 4062.668457, avg loss: 2.478748, ppl: 11.926324 +epoch: 1, batch: 26344, sum loss: 4014.012207, avg loss: 2.368149, ppl: 10.677608 +epoch: 1, batch: 26345, sum loss: 4550.203613, avg loss: 2.682903, ppl: 14.627496 +epoch: 1, batch: 26346, sum loss: 4995.893555, avg loss: 2.848286, ppl: 17.258175 +epoch: 1, batch: 26347, sum loss: 4616.773438, avg loss: 2.792966, ppl: 16.329388 +epoch: 1, batch: 26348, sum loss: 4362.478516, avg loss: 2.635939, ppl: 13.956409 +epoch: 1, batch: 26349, sum loss: 5116.182129, avg loss: 2.779023, ppl: 16.103287 +epoch: 1, batch: 26350, sum loss: 4441.310059, avg loss: 2.442965, ppl: 11.507107 +epoch: 1, batch: 26351, sum loss: 3770.718994, avg loss: 2.443758, ppl: 11.516243 +epoch: 1, batch: 26352, sum loss: 4660.287109, avg loss: 2.496137, ppl: 12.135520 +epoch: 1, batch: 26353, sum loss: 4072.608643, avg loss: 2.480273, ppl: 11.944528 +epoch: 1, batch: 26354, sum loss: 3512.338867, avg loss: 2.492789, ppl: 12.094956 +epoch: 1, batch: 26355, sum loss: 4028.279053, avg loss: 2.474373, ppl: 11.874258 +epoch: 1, batch: 26356, sum loss: 4884.700195, avg loss: 2.572249, ppl: 13.095239 +epoch: 1, batch: 26357, sum loss: 5176.191895, avg loss: 2.687535, ppl: 14.695404 +epoch: 1, batch: 26358, sum loss: 4662.333008, avg loss: 2.716977, ppl: 15.134507 +epoch: 1, batch: 26359, sum loss: 4618.630371, avg loss: 2.665107, ppl: 14.369484 +epoch: 1, batch: 26360, sum loss: 4718.842285, avg loss: 2.886142, ppl: 17.924025 +epoch: 1, batch: 26361, sum loss: 3796.341797, avg loss: 2.413440, ppl: 11.172333 +epoch: 1, batch: 26362, sum loss: 4276.291992, avg loss: 2.415984, ppl: 11.200788 +epoch: 1, batch: 26363, sum loss: 4924.868652, avg loss: 2.752861, ppl: 15.687453 +epoch: 1, batch: 26364, sum loss: 4596.454590, avg loss: 2.804426, ppl: 16.517595 +epoch: 1, batch: 26365, sum loss: 4410.659668, avg loss: 2.674748, ppl: 14.508696 +epoch: 1, batch: 26366, sum loss: 3991.120117, avg loss: 2.711359, ppl: 15.049711 +epoch: 1, batch: 26367, sum loss: 4426.349609, avg loss: 2.703940, ppl: 14.938472 +epoch: 1, batch: 26368, sum loss: 4041.316650, avg loss: 2.494640, ppl: 12.117372 +epoch: 1, batch: 26369, sum loss: 3800.023438, avg loss: 2.661081, ppl: 14.311749 +epoch: 1, batch: 26370, sum loss: 3965.371582, avg loss: 2.538650, ppl: 12.662568 +epoch: 1, batch: 26371, sum loss: 4383.614746, avg loss: 2.714313, ppl: 15.094230 +epoch: 1, batch: 26372, sum loss: 5393.594727, avg loss: 2.735089, ppl: 15.411109 +epoch: 1, batch: 26373, sum loss: 4750.955078, avg loss: 2.649724, ppl: 14.150133 +epoch: 1, batch: 26374, sum loss: 4900.657227, avg loss: 2.579293, ppl: 13.187818 +epoch: 1, batch: 26375, sum loss: 3845.213867, avg loss: 2.472806, ppl: 11.855673 +epoch: 1, batch: 26376, sum loss: 3868.215820, avg loss: 2.561732, ppl: 12.958245 +epoch: 1, batch: 26377, sum loss: 4044.642578, avg loss: 2.666211, ppl: 14.385365 +epoch: 1, batch: 26378, sum loss: 4406.652344, avg loss: 2.720156, ppl: 15.182690 +epoch: 1, batch: 26379, sum loss: 4145.523926, avg loss: 2.655685, ppl: 14.234733 +epoch: 1, batch: 26380, sum loss: 4669.199707, avg loss: 2.771038, ppl: 15.975212 +epoch: 1, batch: 26381, sum loss: 4678.966797, avg loss: 2.631590, ppl: 13.895845 +epoch: 1, batch: 26382, sum loss: 4479.084961, avg loss: 2.399081, ppl: 11.013056 +epoch: 1, batch: 26383, sum loss: 4438.529297, avg loss: 2.504813, ppl: 12.241275 +epoch: 1, batch: 26384, sum loss: 3641.686035, avg loss: 2.445726, ppl: 11.538923 +epoch: 1, batch: 26385, sum loss: 5223.904785, avg loss: 2.944704, ppl: 19.005033 +epoch: 1, batch: 26386, sum loss: 3621.709717, avg loss: 2.365585, ppl: 10.650263 +epoch: 1, batch: 26387, sum loss: 4644.714844, avg loss: 2.594813, ppl: 13.394077 +epoch: 1, batch: 26388, sum loss: 4453.387207, avg loss: 2.624271, ppl: 13.794510 +epoch: 1, batch: 26389, sum loss: 5672.154785, avg loss: 3.015500, ppl: 20.399281 +epoch: 1, batch: 26390, sum loss: 4606.165039, avg loss: 2.647221, ppl: 14.114764 +epoch: 1, batch: 26391, sum loss: 4563.196777, avg loss: 2.550697, ppl: 12.816031 +epoch: 1, batch: 26392, sum loss: 4304.722168, avg loss: 2.786228, ppl: 16.219727 +epoch: 1, batch: 26393, sum loss: 4618.748535, avg loss: 2.842307, ppl: 17.155294 +epoch: 1, batch: 26394, sum loss: 4664.530273, avg loss: 2.726201, ppl: 15.274753 +epoch: 1, batch: 26395, sum loss: 5298.739746, avg loss: 2.870390, ppl: 17.643896 +epoch: 1, batch: 26396, sum loss: 4436.122559, avg loss: 2.492204, ppl: 12.087885 +epoch: 1, batch: 26397, sum loss: 3668.587891, avg loss: 2.440844, ppl: 11.482723 +epoch: 1, batch: 26398, sum loss: 4566.589355, avg loss: 2.591708, ppl: 13.352557 +epoch: 1, batch: 26399, sum loss: 4939.123535, avg loss: 2.681392, ppl: 14.605406 +epoch: 1, batch: 26400, sum loss: 5432.886719, avg loss: 2.900634, ppl: 18.185663 +epoch: 1, batch: 26401, sum loss: 5157.188965, avg loss: 2.908736, ppl: 18.333614 +epoch: 1, batch: 26402, sum loss: 5736.342773, avg loss: 2.926705, ppl: 18.666031 +epoch: 1, batch: 26403, sum loss: 3541.744141, avg loss: 2.528012, ppl: 12.528569 +epoch: 1, batch: 26404, sum loss: 4623.804199, avg loss: 2.677362, ppl: 14.546668 +epoch: 1, batch: 26405, sum loss: 4497.692383, avg loss: 2.693229, ppl: 14.779321 +epoch: 1, batch: 26406, sum loss: 4087.428467, avg loss: 2.523104, ppl: 12.467237 +epoch: 1, batch: 26407, sum loss: 4769.421875, avg loss: 2.760082, ppl: 15.801139 +epoch: 1, batch: 26408, sum loss: 4620.386230, avg loss: 2.300989, ppl: 9.984054 +epoch: 1, batch: 26409, sum loss: 4338.853516, avg loss: 2.521123, ppl: 12.442567 +epoch: 1, batch: 26410, sum loss: 4237.476074, avg loss: 2.560409, ppl: 12.941104 +epoch: 1, batch: 26411, sum loss: 4458.502930, avg loss: 2.516085, ppl: 12.380035 +epoch: 1, batch: 26412, sum loss: 5107.569824, avg loss: 3.086145, ppl: 21.892517 +epoch: 1, batch: 26413, sum loss: 4211.042480, avg loss: 2.713301, ppl: 15.078964 +epoch: 1, batch: 26414, sum loss: 5155.735840, avg loss: 2.963067, ppl: 19.357241 +epoch: 1, batch: 26415, sum loss: 4388.433594, avg loss: 2.544020, ppl: 12.730742 +epoch: 1, batch: 26416, sum loss: 4439.832520, avg loss: 2.628675, ppl: 13.855402 +epoch: 1, batch: 26417, sum loss: 4163.867676, avg loss: 2.525086, ppl: 12.491975 +epoch: 1, batch: 26418, sum loss: 4612.851562, avg loss: 2.721446, ppl: 15.202293 +epoch: 1, batch: 26419, sum loss: 4877.144043, avg loss: 2.712538, ppl: 15.067472 +epoch: 1, batch: 26420, sum loss: 4805.888184, avg loss: 2.681857, ppl: 14.612208 +epoch: 1, batch: 26421, sum loss: 4941.906250, avg loss: 2.795196, ppl: 16.365837 +epoch: 1, batch: 26422, sum loss: 5157.612305, avg loss: 2.677888, ppl: 14.554324 +epoch: 1, batch: 26423, sum loss: 4498.899902, avg loss: 2.509147, ppl: 12.294435 +epoch: 1, batch: 26424, sum loss: 3856.135498, avg loss: 2.408579, ppl: 11.118155 +epoch: 1, batch: 26425, sum loss: 5159.895508, avg loss: 2.750477, ppl: 15.650100 +epoch: 1, batch: 26426, sum loss: 4736.520508, avg loss: 2.755393, ppl: 15.727221 +epoch: 1, batch: 26427, sum loss: 3735.004395, avg loss: 2.542549, ppl: 12.712031 +epoch: 1, batch: 26428, sum loss: 4493.685547, avg loss: 2.695672, ppl: 14.815472 +epoch: 1, batch: 26429, sum loss: 4465.160645, avg loss: 2.581018, ppl: 13.210576 +epoch: 1, batch: 26430, sum loss: 4780.625488, avg loss: 2.736477, ppl: 15.432523 +epoch: 1, batch: 26431, sum loss: 3970.806885, avg loss: 2.452629, ppl: 11.618856 +epoch: 1, batch: 26432, sum loss: 4623.071777, avg loss: 2.547147, ppl: 12.770617 +epoch: 1, batch: 26433, sum loss: 3766.593994, avg loss: 2.588724, ppl: 13.312778 +epoch: 1, batch: 26434, sum loss: 3653.490723, avg loss: 2.487060, ppl: 12.025863 +epoch: 1, batch: 26435, sum loss: 4015.451904, avg loss: 2.445464, ppl: 11.535903 +epoch: 1, batch: 26436, sum loss: 4308.251465, avg loss: 2.582885, ppl: 13.235261 +epoch: 1, batch: 26437, sum loss: 5739.199219, avg loss: 2.803712, ppl: 16.505808 +epoch: 1, batch: 26438, sum loss: 4129.385254, avg loss: 2.459431, ppl: 11.698158 +epoch: 1, batch: 26439, sum loss: 4664.478516, avg loss: 2.747043, ppl: 15.596443 +epoch: 1, batch: 26440, sum loss: 5098.404297, avg loss: 2.626689, ppl: 13.827916 +epoch: 1, batch: 26441, sum loss: 4255.859375, avg loss: 2.676641, ppl: 14.536184 +epoch: 1, batch: 26442, sum loss: 4660.509277, avg loss: 2.876858, ppl: 17.758385 +epoch: 1, batch: 26443, sum loss: 4066.176758, avg loss: 2.350391, ppl: 10.489672 +epoch: 1, batch: 26444, sum loss: 4699.364258, avg loss: 2.640092, ppl: 14.014498 +epoch: 1, batch: 26445, sum loss: 4496.400391, avg loss: 2.485573, ppl: 12.007994 +epoch: 1, batch: 26446, sum loss: 4404.734863, avg loss: 2.798434, ppl: 16.418911 +epoch: 1, batch: 26447, sum loss: 4107.880859, avg loss: 2.611495, ppl: 13.619390 +epoch: 1, batch: 26448, sum loss: 3228.245605, avg loss: 2.320809, ppl: 10.183913 +epoch: 1, batch: 26449, sum loss: 5767.893555, avg loss: 3.077851, ppl: 21.711700 +epoch: 1, batch: 26450, sum loss: 4261.998535, avg loss: 2.735558, ppl: 15.418338 +epoch: 1, batch: 26451, sum loss: 4707.948242, avg loss: 2.615527, ppl: 13.674419 +epoch: 1, batch: 26452, sum loss: 3297.107422, avg loss: 2.325182, ppl: 10.228539 +epoch: 1, batch: 26453, sum loss: 3475.273926, avg loss: 2.507413, ppl: 12.273134 +epoch: 1, batch: 26454, sum loss: 5737.321289, avg loss: 3.038836, ppl: 20.880915 +epoch: 1, batch: 26455, sum loss: 3478.244141, avg loss: 2.514999, ppl: 12.366601 +epoch: 1, batch: 26456, sum loss: 4702.340820, avg loss: 2.576625, ppl: 13.152675 +epoch: 1, batch: 26457, sum loss: 5282.319824, avg loss: 2.734120, ppl: 15.396191 +epoch: 1, batch: 26458, sum loss: 5037.510742, avg loss: 2.797063, ppl: 16.396421 +epoch: 1, batch: 26459, sum loss: 4226.503906, avg loss: 2.631696, ppl: 13.897320 +epoch: 1, batch: 26460, sum loss: 4062.511963, avg loss: 2.492339, ppl: 12.089517 +epoch: 1, batch: 26461, sum loss: 4371.854492, avg loss: 2.404760, ppl: 11.075776 +epoch: 1, batch: 26462, sum loss: 4122.198242, avg loss: 2.605688, ppl: 13.540536 +epoch: 1, batch: 26463, sum loss: 4184.849609, avg loss: 2.694688, ppl: 14.800895 +epoch: 1, batch: 26464, sum loss: 4307.646484, avg loss: 2.461512, ppl: 11.722527 +epoch: 1, batch: 26465, sum loss: 4053.020508, avg loss: 2.346856, ppl: 10.452656 +epoch: 1, batch: 26466, sum loss: 3216.258545, avg loss: 2.253860, ppl: 9.524431 +epoch: 1, batch: 26467, sum loss: 3520.696045, avg loss: 2.428066, ppl: 11.336938 +epoch: 1, batch: 26468, sum loss: 5140.083984, avg loss: 2.935513, ppl: 18.831171 +epoch: 1, batch: 26469, sum loss: 4936.503906, avg loss: 2.645500, ppl: 14.090494 +epoch: 1, batch: 26470, sum loss: 4095.567383, avg loss: 2.449502, ppl: 11.582580 +epoch: 1, batch: 26471, sum loss: 4464.341309, avg loss: 2.658929, ppl: 14.280981 +epoch: 1, batch: 26472, sum loss: 3760.203369, avg loss: 2.460866, ppl: 11.714954 +epoch: 1, batch: 26473, sum loss: 3861.702393, avg loss: 2.643191, ppl: 14.057996 +epoch: 1, batch: 26474, sum loss: 4587.946777, avg loss: 2.609754, ppl: 13.595700 +epoch: 1, batch: 26475, sum loss: 4137.003906, avg loss: 2.575968, ppl: 13.144029 +epoch: 1, batch: 26476, sum loss: 4593.617188, avg loss: 2.489765, ppl: 12.058447 +epoch: 1, batch: 26477, sum loss: 3911.094238, avg loss: 2.534734, ppl: 12.613075 +epoch: 1, batch: 26478, sum loss: 5890.319336, avg loss: 2.720702, ppl: 15.190978 +epoch: 1, batch: 26479, sum loss: 4824.395996, avg loss: 2.847931, ppl: 17.252056 +epoch: 1, batch: 26480, sum loss: 4827.277344, avg loss: 2.811460, ppl: 16.634192 +epoch: 1, batch: 26481, sum loss: 5412.406738, avg loss: 2.902095, ppl: 18.212257 +epoch: 1, batch: 26482, sum loss: 5243.328613, avg loss: 2.789005, ppl: 16.264822 +epoch: 1, batch: 26483, sum loss: 4833.421387, avg loss: 2.736932, ppl: 15.439537 +epoch: 1, batch: 26484, sum loss: 4037.511719, avg loss: 2.588149, ppl: 13.305116 +epoch: 1, batch: 26485, sum loss: 3791.616211, avg loss: 2.462088, ppl: 11.729280 +epoch: 1, batch: 26486, sum loss: 4675.535645, avg loss: 2.643039, ppl: 14.055855 +epoch: 1, batch: 26487, sum loss: 5042.886230, avg loss: 2.817255, ppl: 16.730858 +epoch: 1, batch: 26488, sum loss: 3423.764648, avg loss: 2.519326, ppl: 12.420228 +epoch: 1, batch: 26489, sum loss: 5027.076172, avg loss: 2.659829, ppl: 14.293839 +epoch: 1, batch: 26490, sum loss: 3778.602539, avg loss: 2.319584, ppl: 10.171443 +epoch: 1, batch: 26491, sum loss: 4597.210449, avg loss: 2.601704, ppl: 13.486695 +epoch: 1, batch: 26492, sum loss: 4067.604004, avg loss: 2.550222, ppl: 12.809946 +epoch: 1, batch: 26493, sum loss: 3056.686035, avg loss: 2.294809, ppl: 9.922544 +epoch: 1, batch: 26494, sum loss: 3498.217285, avg loss: 2.437782, ppl: 11.447622 +epoch: 1, batch: 26495, sum loss: 3887.611328, avg loss: 2.493657, ppl: 12.105466 +epoch: 1, batch: 26496, sum loss: 4541.008789, avg loss: 2.733901, ppl: 15.392811 +epoch: 1, batch: 26497, sum loss: 4913.745117, avg loss: 2.892140, ppl: 18.031851 +epoch: 1, batch: 26498, sum loss: 4950.337891, avg loss: 2.915393, ppl: 18.456070 +epoch: 1, batch: 26499, sum loss: 5287.351562, avg loss: 2.868883, ppl: 17.617331 +epoch: 1, batch: 26500, sum loss: 4720.821289, avg loss: 2.778588, ppl: 16.096281 +epoch: 1, batch: 26501, sum loss: 5555.322266, avg loss: 3.006127, ppl: 20.208977 +epoch: 1, batch: 26502, sum loss: 3792.918213, avg loss: 2.592562, ppl: 13.363969 +epoch: 1, batch: 26503, sum loss: 4277.549805, avg loss: 2.529598, ppl: 12.548461 +epoch: 1, batch: 26504, sum loss: 5167.930176, avg loss: 2.819384, ppl: 16.766514 +epoch: 1, batch: 26505, sum loss: 4102.702637, avg loss: 2.664093, ppl: 14.354918 +epoch: 1, batch: 26506, sum loss: 4645.151855, avg loss: 2.603785, ppl: 13.514792 +epoch: 1, batch: 26507, sum loss: 4880.499023, avg loss: 2.832559, ppl: 16.988878 +epoch: 1, batch: 26508, sum loss: 4238.696289, avg loss: 2.502182, ppl: 12.209105 +epoch: 1, batch: 26509, sum loss: 4981.024902, avg loss: 2.586202, ppl: 13.279243 +epoch: 1, batch: 26510, sum loss: 3692.735107, avg loss: 2.532740, ppl: 12.587952 +epoch: 1, batch: 26511, sum loss: 4736.071777, avg loss: 2.825819, ppl: 16.874752 +epoch: 1, batch: 26512, sum loss: 4006.544922, avg loss: 2.453487, ppl: 11.628830 +epoch: 1, batch: 26513, sum loss: 3233.640381, avg loss: 2.424018, ppl: 11.291141 +epoch: 1, batch: 26514, sum loss: 3839.677490, avg loss: 2.244113, ppl: 9.432048 +epoch: 1, batch: 26515, sum loss: 4386.150391, avg loss: 2.685946, ppl: 14.672078 +epoch: 1, batch: 26516, sum loss: 4051.687256, avg loss: 2.585633, ppl: 13.271691 +epoch: 1, batch: 26517, sum loss: 4264.815430, avg loss: 2.841316, ppl: 17.138309 +epoch: 1, batch: 26518, sum loss: 4352.336914, avg loss: 2.564724, ppl: 12.997073 +epoch: 1, batch: 26519, sum loss: 4831.850586, avg loss: 2.914264, ppl: 18.435247 +epoch: 1, batch: 26520, sum loss: 3978.607178, avg loss: 2.556946, ppl: 12.896366 +epoch: 1, batch: 26521, sum loss: 4246.623535, avg loss: 2.689439, ppl: 14.723408 +epoch: 1, batch: 26522, sum loss: 4056.365967, avg loss: 2.551173, ppl: 12.822141 +epoch: 1, batch: 26523, sum loss: 4418.693359, avg loss: 2.663468, ppl: 14.345953 +epoch: 1, batch: 26524, sum loss: 4769.971191, avg loss: 2.616550, ppl: 13.688423 +epoch: 1, batch: 26525, sum loss: 4800.947266, avg loss: 2.537498, ppl: 12.647992 +epoch: 1, batch: 26526, sum loss: 5260.912598, avg loss: 2.848356, ppl: 17.259380 +epoch: 1, batch: 26527, sum loss: 4628.464844, avg loss: 2.963166, ppl: 19.359161 +epoch: 1, batch: 26528, sum loss: 4880.215820, avg loss: 2.848929, ppl: 17.269279 +epoch: 1, batch: 26529, sum loss: 4382.763672, avg loss: 2.640219, ppl: 14.016275 +epoch: 1, batch: 26530, sum loss: 3788.007080, avg loss: 2.527023, ppl: 12.516188 +epoch: 1, batch: 26531, sum loss: 4689.192383, avg loss: 2.626999, ppl: 13.832192 +epoch: 1, batch: 26532, sum loss: 3988.644043, avg loss: 2.581647, ppl: 13.218887 +epoch: 1, batch: 26533, sum loss: 4614.649414, avg loss: 2.748451, ppl: 15.618420 +epoch: 1, batch: 26534, sum loss: 4650.748535, avg loss: 2.669775, ppl: 14.436724 +epoch: 1, batch: 26535, sum loss: 4253.179688, avg loss: 2.461331, ppl: 11.720400 +epoch: 1, batch: 26536, sum loss: 4070.549561, avg loss: 2.514237, ppl: 12.357176 +epoch: 1, batch: 26537, sum loss: 5239.347168, avg loss: 2.807796, ppl: 16.573347 +epoch: 1, batch: 26538, sum loss: 4481.844238, avg loss: 2.622495, ppl: 13.770040 +epoch: 1, batch: 26539, sum loss: 4397.062012, avg loss: 2.617299, ppl: 13.698671 +epoch: 1, batch: 26540, sum loss: 3548.585938, avg loss: 2.337672, ppl: 10.357097 +epoch: 1, batch: 26541, sum loss: 5309.802246, avg loss: 2.901531, ppl: 18.202000 +epoch: 1, batch: 26542, sum loss: 4327.021484, avg loss: 2.605070, ppl: 13.532174 +epoch: 1, batch: 26543, sum loss: 3493.197266, avg loss: 2.185981, ppl: 8.899373 +epoch: 1, batch: 26544, sum loss: 4530.782227, avg loss: 2.453050, ppl: 11.623741 +epoch: 1, batch: 26545, sum loss: 4280.290527, avg loss: 2.738510, ppl: 15.463922 +epoch: 1, batch: 26546, sum loss: 5003.753418, avg loss: 2.515713, ppl: 12.375429 +epoch: 1, batch: 26547, sum loss: 4321.756836, avg loss: 2.555740, ppl: 12.880833 +epoch: 1, batch: 26548, sum loss: 4097.798828, avg loss: 2.618402, ppl: 13.713788 +epoch: 1, batch: 26549, sum loss: 5003.186523, avg loss: 2.855700, ppl: 17.386608 +epoch: 1, batch: 26550, sum loss: 4442.357422, avg loss: 2.677732, ppl: 14.552052 +epoch: 1, batch: 26551, sum loss: 4037.412598, avg loss: 2.377746, ppl: 10.780577 +epoch: 1, batch: 26552, sum loss: 4246.143555, avg loss: 2.573420, ppl: 13.110590 +epoch: 1, batch: 26553, sum loss: 4672.510742, avg loss: 2.870093, ppl: 17.638653 +epoch: 1, batch: 26554, sum loss: 4480.795898, avg loss: 2.832361, ppl: 16.985523 +epoch: 1, batch: 26555, sum loss: 5332.966797, avg loss: 2.803873, ppl: 16.508465 +epoch: 1, batch: 26556, sum loss: 4736.363770, avg loss: 2.564355, ppl: 12.992277 +epoch: 1, batch: 26557, sum loss: 5459.133789, avg loss: 2.865687, ppl: 17.561115 +epoch: 1, batch: 26558, sum loss: 4296.845215, avg loss: 2.318859, ppl: 10.164066 +epoch: 1, batch: 26559, sum loss: 4138.094727, avg loss: 2.645841, ppl: 14.095289 +epoch: 1, batch: 26560, sum loss: 4775.518066, avg loss: 2.701085, ppl: 14.895886 +epoch: 1, batch: 26561, sum loss: 3676.243652, avg loss: 2.346040, ppl: 10.444124 +epoch: 1, batch: 26562, sum loss: 4706.194336, avg loss: 2.608755, ppl: 13.582132 +epoch: 1, batch: 26563, sum loss: 4582.209961, avg loss: 2.748776, ppl: 15.623500 +epoch: 1, batch: 26564, sum loss: 4049.660889, avg loss: 2.728882, ppl: 15.315752 +epoch: 1, batch: 26565, sum loss: 5367.068359, avg loss: 2.952183, ppl: 19.147703 +epoch: 1, batch: 26566, sum loss: 4780.155762, avg loss: 2.620699, ppl: 13.745334 +epoch: 1, batch: 26567, sum loss: 4340.326172, avg loss: 2.559155, ppl: 12.924885 +epoch: 1, batch: 26568, sum loss: 5486.176758, avg loss: 2.780627, ppl: 16.129131 +epoch: 1, batch: 26569, sum loss: 3982.550293, avg loss: 2.443282, ppl: 11.510761 +epoch: 1, batch: 26570, sum loss: 4959.941406, avg loss: 2.622920, ppl: 13.775887 +epoch: 1, batch: 26571, sum loss: 5405.041016, avg loss: 2.966543, ppl: 19.424652 +epoch: 1, batch: 26572, sum loss: 3937.928711, avg loss: 2.519468, ppl: 12.421988 +epoch: 1, batch: 26573, sum loss: 4134.186035, avg loss: 2.725238, ppl: 15.260043 +epoch: 1, batch: 26574, sum loss: 4298.237793, avg loss: 2.559999, ppl: 12.935802 +epoch: 1, batch: 26575, sum loss: 4540.229004, avg loss: 2.560761, ppl: 12.945665 +epoch: 1, batch: 26576, sum loss: 4040.717041, avg loss: 2.615351, ppl: 13.672014 +epoch: 1, batch: 26577, sum loss: 4271.379395, avg loss: 2.689786, ppl: 14.728519 +epoch: 1, batch: 26578, sum loss: 3796.390137, avg loss: 2.688662, ppl: 14.711972 +epoch: 1, batch: 26579, sum loss: 4363.962402, avg loss: 2.451664, ppl: 11.607648 +epoch: 1, batch: 26580, sum loss: 4605.424805, avg loss: 2.523520, ppl: 12.472428 +epoch: 1, batch: 26581, sum loss: 4458.491211, avg loss: 2.618022, ppl: 13.708577 +epoch: 1, batch: 26582, sum loss: 5153.141602, avg loss: 2.949709, ppl: 19.100393 +epoch: 1, batch: 26583, sum loss: 3916.392578, avg loss: 2.474032, ppl: 11.870211 +epoch: 1, batch: 26584, sum loss: 4740.633301, avg loss: 2.648398, ppl: 14.131388 +epoch: 1, batch: 26585, sum loss: 3927.066895, avg loss: 2.496546, ppl: 12.140489 +epoch: 1, batch: 26586, sum loss: 4376.077148, avg loss: 2.592463, ppl: 13.362640 +epoch: 1, batch: 26587, sum loss: 4875.101074, avg loss: 2.743445, ppl: 15.540424 +epoch: 1, batch: 26588, sum loss: 4965.090820, avg loss: 2.843695, ppl: 17.179119 +epoch: 1, batch: 26589, sum loss: 3871.703125, avg loss: 2.600204, ppl: 13.466478 +epoch: 1, batch: 26590, sum loss: 4191.345215, avg loss: 2.374700, ppl: 10.747787 +epoch: 1, batch: 26591, sum loss: 4502.078613, avg loss: 2.609901, ppl: 13.597701 +epoch: 1, batch: 26592, sum loss: 4438.156738, avg loss: 2.644909, ppl: 14.082159 +epoch: 1, batch: 26593, sum loss: 4408.938965, avg loss: 2.425159, ppl: 11.304027 +epoch: 1, batch: 26594, sum loss: 4083.049316, avg loss: 2.346580, ppl: 10.449771 +epoch: 1, batch: 26595, sum loss: 4028.022461, avg loss: 2.498773, ppl: 12.167560 +epoch: 1, batch: 26596, sum loss: 4549.696289, avg loss: 2.617777, ppl: 13.705221 +epoch: 1, batch: 26597, sum loss: 3470.784668, avg loss: 2.271456, ppl: 9.693504 +epoch: 1, batch: 26598, sum loss: 4480.358398, avg loss: 2.579366, ppl: 13.188773 +epoch: 1, batch: 26599, sum loss: 4646.102051, avg loss: 2.578303, ppl: 13.174763 +epoch: 1, batch: 26600, sum loss: 4189.390137, avg loss: 2.597266, ppl: 13.426981 +epoch: 1, batch: 26601, sum loss: 4210.464844, avg loss: 2.669921, ppl: 14.438824 +epoch: 1, batch: 26602, sum loss: 4055.389160, avg loss: 2.566702, ppl: 13.022806 +epoch: 1, batch: 26603, sum loss: 4839.499023, avg loss: 2.659065, ppl: 14.282935 +epoch: 1, batch: 26604, sum loss: 4892.469727, avg loss: 2.742416, ppl: 15.524446 +epoch: 1, batch: 26605, sum loss: 4605.791016, avg loss: 2.659233, ppl: 14.285322 +epoch: 1, batch: 26606, sum loss: 4107.993164, avg loss: 2.699076, ppl: 14.865989 +epoch: 1, batch: 26607, sum loss: 3748.253418, avg loss: 2.308038, ppl: 10.054675 +epoch: 1, batch: 26608, sum loss: 4048.554199, avg loss: 2.577056, ppl: 13.158339 +epoch: 1, batch: 26609, sum loss: 4969.884277, avg loss: 2.785810, ppl: 16.212940 +epoch: 1, batch: 26610, sum loss: 3492.413818, avg loss: 2.359739, ppl: 10.588188 +epoch: 1, batch: 26611, sum loss: 5259.237305, avg loss: 2.804927, ppl: 16.525864 +epoch: 1, batch: 26612, sum loss: 4348.228027, avg loss: 2.664356, ppl: 14.358692 +epoch: 1, batch: 26613, sum loss: 4077.197754, avg loss: 2.804125, ppl: 16.512623 +epoch: 1, batch: 26614, sum loss: 4827.544434, avg loss: 2.712104, ppl: 15.060925 +epoch: 1, batch: 26615, sum loss: 4626.154297, avg loss: 2.616603, ppl: 13.689144 +epoch: 1, batch: 26616, sum loss: 3651.135498, avg loss: 2.302103, ppl: 9.995181 +epoch: 1, batch: 26617, sum loss: 4467.944824, avg loss: 2.790721, ppl: 16.292770 +epoch: 1, batch: 26618, sum loss: 4506.416016, avg loss: 2.521777, ppl: 12.450706 +epoch: 1, batch: 26619, sum loss: 4553.669434, avg loss: 2.405530, ppl: 11.084309 +epoch: 1, batch: 26620, sum loss: 4652.426270, avg loss: 2.535382, ppl: 12.621255 +epoch: 1, batch: 26621, sum loss: 4574.106934, avg loss: 2.579869, ppl: 13.195404 +epoch: 1, batch: 26622, sum loss: 4654.583984, avg loss: 2.652184, ppl: 14.184992 +epoch: 1, batch: 26623, sum loss: 5098.053711, avg loss: 2.761676, ppl: 15.826344 +epoch: 1, batch: 26624, sum loss: 4027.031738, avg loss: 2.608181, ppl: 13.574340 +epoch: 1, batch: 26625, sum loss: 4593.855469, avg loss: 2.564967, ppl: 13.000225 +epoch: 1, batch: 26626, sum loss: 3643.903809, avg loss: 2.458775, ppl: 11.690476 +epoch: 1, batch: 26627, sum loss: 4404.441895, avg loss: 2.661294, ppl: 14.314804 +epoch: 1, batch: 26628, sum loss: 4265.458008, avg loss: 2.561837, ppl: 12.959599 +epoch: 1, batch: 26629, sum loss: 4689.203613, avg loss: 2.544332, ppl: 12.734715 +epoch: 1, batch: 26630, sum loss: 4088.013672, avg loss: 2.447912, ppl: 11.564180 +epoch: 1, batch: 26631, sum loss: 3782.553711, avg loss: 2.374485, ppl: 10.745473 +epoch: 1, batch: 26632, sum loss: 4852.532227, avg loss: 2.761828, ppl: 15.828754 +epoch: 1, batch: 26633, sum loss: 4734.299316, avg loss: 2.771838, ppl: 15.987992 +epoch: 1, batch: 26634, sum loss: 4466.515625, avg loss: 2.568439, ppl: 13.045447 +epoch: 1, batch: 26635, sum loss: 4956.155762, avg loss: 2.574626, ppl: 13.126410 +epoch: 1, batch: 26636, sum loss: 4999.277832, avg loss: 2.926978, ppl: 18.671114 +epoch: 1, batch: 26637, sum loss: 4251.110352, avg loss: 2.796783, ppl: 16.391832 +epoch: 1, batch: 26638, sum loss: 4830.651855, avg loss: 2.719962, ppl: 15.179740 +epoch: 1, batch: 26639, sum loss: 4850.690430, avg loss: 2.700830, ppl: 14.892087 +epoch: 1, batch: 26640, sum loss: 4764.258301, avg loss: 2.629282, ppl: 13.863806 +epoch: 1, batch: 26641, sum loss: 3704.268799, avg loss: 2.453158, ppl: 11.625002 +epoch: 1, batch: 26642, sum loss: 4366.042969, avg loss: 2.473679, ppl: 11.866019 +epoch: 1, batch: 26643, sum loss: 4571.339844, avg loss: 2.542458, ppl: 12.710876 +epoch: 1, batch: 26644, sum loss: 4078.002441, avg loss: 2.600767, ppl: 13.474068 +epoch: 1, batch: 26645, sum loss: 2821.454346, avg loss: 2.126190, ppl: 8.382869 +epoch: 1, batch: 26646, sum loss: 3925.388184, avg loss: 2.540704, ppl: 12.688607 +epoch: 1, batch: 26647, sum loss: 3455.415771, avg loss: 2.355430, ppl: 10.542660 +epoch: 1, batch: 26648, sum loss: 4343.628418, avg loss: 2.541620, ppl: 12.700229 +epoch: 1, batch: 26649, sum loss: 4625.924316, avg loss: 2.640368, ppl: 14.018361 +epoch: 1, batch: 26650, sum loss: 3830.443359, avg loss: 2.567321, ppl: 13.030871 +epoch: 1, batch: 26651, sum loss: 4257.739258, avg loss: 2.544973, ppl: 12.742880 +epoch: 1, batch: 26652, sum loss: 4333.973633, avg loss: 2.502294, ppl: 12.210476 +epoch: 1, batch: 26653, sum loss: 5115.400391, avg loss: 2.702272, ppl: 14.913572 +epoch: 1, batch: 26654, sum loss: 3599.255371, avg loss: 2.466933, ppl: 11.786243 +epoch: 1, batch: 26655, sum loss: 4216.008301, avg loss: 2.581756, ppl: 13.220337 +epoch: 1, batch: 26656, sum loss: 3851.014160, avg loss: 2.530233, ppl: 12.556430 +epoch: 1, batch: 26657, sum loss: 4966.577148, avg loss: 2.686088, ppl: 14.674159 +epoch: 1, batch: 26658, sum loss: 4489.428223, avg loss: 2.694735, ppl: 14.801593 +epoch: 1, batch: 26659, sum loss: 3671.513672, avg loss: 2.457506, ppl: 11.675653 +epoch: 1, batch: 26660, sum loss: 5031.623535, avg loss: 2.839517, ppl: 17.107494 +epoch: 1, batch: 26661, sum loss: 4471.846680, avg loss: 2.735074, ppl: 15.410888 +epoch: 1, batch: 26662, sum loss: 4614.836426, avg loss: 2.709828, ppl: 15.026686 +epoch: 1, batch: 26663, sum loss: 4411.284180, avg loss: 2.633603, ppl: 13.923841 +epoch: 1, batch: 26664, sum loss: 4071.317383, avg loss: 2.508513, ppl: 12.286652 +epoch: 1, batch: 26665, sum loss: 5300.767090, avg loss: 2.848344, ppl: 17.259171 +epoch: 1, batch: 26666, sum loss: 4937.983398, avg loss: 2.879291, ppl: 17.801640 +epoch: 1, batch: 26667, sum loss: 4944.765137, avg loss: 2.642846, ppl: 14.053147 +epoch: 1, batch: 26668, sum loss: 4583.622559, avg loss: 2.678915, ppl: 14.569270 +epoch: 1, batch: 26669, sum loss: 3470.407227, avg loss: 2.360821, ppl: 10.599653 +epoch: 1, batch: 26670, sum loss: 3746.672363, avg loss: 2.460061, ppl: 11.705521 +epoch: 1, batch: 26671, sum loss: 4265.135742, avg loss: 2.546350, ppl: 12.760440 +epoch: 1, batch: 26672, sum loss: 3438.052734, avg loss: 2.362923, ppl: 10.621953 +epoch: 1, batch: 26673, sum loss: 4799.717773, avg loss: 2.565322, ppl: 13.004850 +epoch: 1, batch: 26674, sum loss: 4404.518066, avg loss: 2.643768, ppl: 14.066110 +epoch: 1, batch: 26675, sum loss: 4291.021973, avg loss: 2.622874, ppl: 13.775260 +epoch: 1, batch: 26676, sum loss: 4332.853516, avg loss: 2.468862, ppl: 11.809004 +epoch: 1, batch: 26677, sum loss: 5327.265625, avg loss: 2.967836, ppl: 19.449787 +epoch: 1, batch: 26678, sum loss: 4527.943848, avg loss: 2.698417, ppl: 14.856195 +epoch: 1, batch: 26679, sum loss: 3480.026367, avg loss: 2.206738, ppl: 9.086034 +epoch: 1, batch: 26680, sum loss: 3445.669434, avg loss: 2.280390, ppl: 9.780495 +epoch: 1, batch: 26681, sum loss: 4440.847656, avg loss: 2.883667, ppl: 17.879723 +epoch: 1, batch: 26682, sum loss: 4410.609863, avg loss: 2.579304, ppl: 13.187956 +epoch: 1, batch: 26683, sum loss: 4152.887695, avg loss: 2.726781, ppl: 15.283612 +epoch: 1, batch: 26684, sum loss: 4272.687012, avg loss: 2.635834, ppl: 13.954946 +epoch: 1, batch: 26685, sum loss: 4121.930176, avg loss: 2.704679, ppl: 14.949511 +epoch: 1, batch: 26686, sum loss: 4804.597168, avg loss: 2.647161, ppl: 14.113913 +epoch: 1, batch: 26687, sum loss: 4813.593750, avg loss: 2.910275, ppl: 18.361837 +epoch: 1, batch: 26688, sum loss: 5195.831055, avg loss: 2.769633, ppl: 15.952775 +epoch: 1, batch: 26689, sum loss: 4186.278809, avg loss: 2.478555, ppl: 11.924019 +epoch: 1, batch: 26690, sum loss: 4850.107422, avg loss: 2.763594, ppl: 15.856729 +epoch: 1, batch: 26691, sum loss: 5083.218750, avg loss: 3.025726, ppl: 20.608953 +epoch: 1, batch: 26692, sum loss: 4894.535156, avg loss: 2.852293, ppl: 17.327473 +epoch: 1, batch: 26693, sum loss: 3520.160400, avg loss: 2.436097, ppl: 11.428350 +epoch: 1, batch: 26694, sum loss: 4078.367188, avg loss: 2.641430, ppl: 14.033252 +epoch: 1, batch: 26695, sum loss: 3602.940918, avg loss: 2.483074, ppl: 11.978033 +epoch: 1, batch: 26696, sum loss: 5672.244141, avg loss: 2.830461, ppl: 16.953276 +epoch: 1, batch: 26697, sum loss: 4759.774414, avg loss: 2.606667, ppl: 13.553804 +epoch: 1, batch: 26698, sum loss: 4413.739746, avg loss: 2.610136, ppl: 13.600901 +epoch: 1, batch: 26699, sum loss: 5291.427734, avg loss: 3.030600, ppl: 20.709661 +epoch: 1, batch: 26700, sum loss: 4103.561035, avg loss: 2.458694, ppl: 11.689541 +epoch: 1, batch: 26701, sum loss: 4544.823242, avg loss: 2.426494, ppl: 11.319129 +epoch: 1, batch: 26702, sum loss: 4267.393066, avg loss: 2.653851, ppl: 14.208654 +epoch: 1, batch: 26703, sum loss: 4541.980469, avg loss: 2.732840, ppl: 15.376499 +epoch: 1, batch: 26704, sum loss: 5676.478516, avg loss: 2.909523, ppl: 18.348040 +epoch: 1, batch: 26705, sum loss: 4719.516602, avg loss: 2.758338, ppl: 15.773613 +epoch: 1, batch: 26706, sum loss: 4840.781250, avg loss: 2.619470, ppl: 13.728451 +epoch: 1, batch: 26707, sum loss: 4185.370117, avg loss: 2.779130, ppl: 16.105007 +epoch: 1, batch: 26708, sum loss: 4442.034180, avg loss: 2.512463, ppl: 12.335273 +epoch: 1, batch: 26709, sum loss: 4678.319824, avg loss: 2.842236, ppl: 17.154076 +epoch: 1, batch: 26710, sum loss: 4590.384766, avg loss: 2.837073, ppl: 17.065746 +epoch: 1, batch: 26711, sum loss: 4116.099121, avg loss: 2.600189, ppl: 13.466279 +epoch: 1, batch: 26712, sum loss: 4005.961670, avg loss: 2.505292, ppl: 12.247131 +epoch: 1, batch: 26713, sum loss: 4028.583496, avg loss: 2.447499, ppl: 11.559403 +epoch: 1, batch: 26714, sum loss: 4934.653809, avg loss: 2.872325, ppl: 17.678066 +epoch: 1, batch: 26715, sum loss: 4930.165039, avg loss: 2.957507, ppl: 19.249929 +epoch: 1, batch: 26716, sum loss: 5057.081543, avg loss: 2.881528, ppl: 17.841513 +epoch: 1, batch: 26717, sum loss: 3745.442871, avg loss: 2.425805, ppl: 11.311333 +epoch: 1, batch: 26718, sum loss: 4067.577148, avg loss: 2.501585, ppl: 12.201818 +epoch: 1, batch: 26719, sum loss: 4516.134766, avg loss: 2.630247, ppl: 13.877202 +epoch: 1, batch: 26720, sum loss: 4010.811768, avg loss: 2.345504, ppl: 10.438533 +epoch: 1, batch: 26721, sum loss: 4538.746582, avg loss: 2.574445, ppl: 13.124031 +epoch: 1, batch: 26722, sum loss: 4570.371582, avg loss: 2.774967, ppl: 16.038105 +epoch: 1, batch: 26723, sum loss: 4161.314941, avg loss: 2.540485, ppl: 12.685824 +epoch: 1, batch: 26724, sum loss: 3687.273193, avg loss: 2.446764, ppl: 11.550907 +epoch: 1, batch: 26725, sum loss: 3982.682129, avg loss: 2.246296, ppl: 9.452656 +epoch: 1, batch: 26726, sum loss: 4786.403320, avg loss: 2.666520, ppl: 14.389807 +epoch: 1, batch: 26727, sum loss: 4739.081055, avg loss: 2.344919, ppl: 10.432427 +epoch: 1, batch: 26728, sum loss: 3701.643555, avg loss: 2.405226, ppl: 11.080939 +epoch: 1, batch: 26729, sum loss: 4167.971680, avg loss: 2.856732, ppl: 17.404554 +epoch: 1, batch: 26730, sum loss: 4315.389160, avg loss: 2.497332, ppl: 12.150030 +epoch: 1, batch: 26731, sum loss: 4238.983398, avg loss: 2.545936, ppl: 12.755162 +epoch: 1, batch: 26732, sum loss: 4547.020508, avg loss: 2.599783, ppl: 13.460816 +epoch: 1, batch: 26733, sum loss: 5137.097168, avg loss: 2.805624, ppl: 16.537392 +epoch: 1, batch: 26734, sum loss: 4352.108887, avg loss: 2.438156, ppl: 11.451908 +epoch: 1, batch: 26735, sum loss: 4363.818359, avg loss: 2.569976, ppl: 13.065506 +epoch: 1, batch: 26736, sum loss: 3907.989258, avg loss: 2.497118, ppl: 12.147431 +epoch: 1, batch: 26737, sum loss: 3946.150879, avg loss: 2.298282, ppl: 9.957066 +epoch: 1, batch: 26738, sum loss: 4998.762695, avg loss: 2.769397, ppl: 15.949021 +epoch: 1, batch: 26739, sum loss: 4377.220215, avg loss: 2.648046, ppl: 14.126409 +epoch: 1, batch: 26740, sum loss: 4455.845215, avg loss: 2.553493, ppl: 12.851914 +epoch: 1, batch: 26741, sum loss: 4374.037109, avg loss: 2.595868, ppl: 13.408216 +epoch: 1, batch: 26742, sum loss: 5447.813965, avg loss: 2.825630, ppl: 16.871567 +epoch: 1, batch: 26743, sum loss: 5594.849609, avg loss: 2.827109, ppl: 16.896547 +epoch: 1, batch: 26744, sum loss: 4508.500977, avg loss: 2.722525, ppl: 15.218696 +epoch: 1, batch: 26745, sum loss: 3999.434326, avg loss: 2.627749, ppl: 13.842582 +epoch: 1, batch: 26746, sum loss: 4423.007324, avg loss: 2.623373, ppl: 13.782136 +epoch: 1, batch: 26747, sum loss: 4628.706055, avg loss: 2.834480, ppl: 17.021547 +epoch: 1, batch: 26748, sum loss: 4652.987305, avg loss: 2.748368, ppl: 15.617128 +epoch: 1, batch: 26749, sum loss: 4669.287109, avg loss: 2.515780, ppl: 12.376255 +epoch: 1, batch: 26750, sum loss: 3468.286133, avg loss: 2.359378, ppl: 10.584370 +epoch: 1, batch: 26751, sum loss: 4331.709961, avg loss: 2.499544, ppl: 12.176939 +epoch: 1, batch: 26752, sum loss: 3785.604736, avg loss: 2.518699, ppl: 12.412436 +epoch: 1, batch: 26753, sum loss: 3921.176758, avg loss: 2.581420, ppl: 13.215887 +epoch: 1, batch: 26754, sum loss: 4086.420410, avg loss: 2.290595, ppl: 9.880811 +epoch: 1, batch: 26755, sum loss: 5115.799316, avg loss: 2.783351, ppl: 16.173126 +epoch: 1, batch: 26756, sum loss: 4718.230469, avg loss: 2.561471, ppl: 12.954865 +epoch: 1, batch: 26757, sum loss: 4051.039062, avg loss: 2.580280, ppl: 13.200829 +epoch: 1, batch: 26758, sum loss: 3226.731201, avg loss: 2.189099, ppl: 8.927162 +epoch: 1, batch: 26759, sum loss: 4437.441895, avg loss: 2.585922, ppl: 13.275523 +epoch: 1, batch: 26760, sum loss: 4716.403809, avg loss: 2.748487, ppl: 15.618986 +epoch: 1, batch: 26761, sum loss: 4833.271973, avg loss: 2.639690, ppl: 14.008859 +epoch: 1, batch: 26762, sum loss: 4443.573730, avg loss: 2.672023, ppl: 14.469208 +epoch: 1, batch: 26763, sum loss: 4536.666992, avg loss: 2.617811, ppl: 13.705691 +epoch: 1, batch: 26764, sum loss: 4183.043457, avg loss: 2.726886, ppl: 15.285219 +epoch: 1, batch: 26765, sum loss: 4112.261230, avg loss: 2.453617, ppl: 11.630333 +epoch: 1, batch: 26766, sum loss: 4406.188965, avg loss: 2.599521, ppl: 13.457295 +epoch: 1, batch: 26767, sum loss: 5180.980957, avg loss: 2.838894, ppl: 17.096840 +epoch: 1, batch: 26768, sum loss: 4561.473145, avg loss: 2.833213, ppl: 16.999996 +epoch: 1, batch: 26769, sum loss: 3776.494629, avg loss: 2.481271, ppl: 11.956454 +epoch: 1, batch: 26770, sum loss: 4314.179199, avg loss: 2.671319, ppl: 14.459022 +epoch: 1, batch: 26771, sum loss: 5012.081055, avg loss: 2.817358, ppl: 16.732594 +epoch: 1, batch: 26772, sum loss: 4046.449219, avg loss: 2.415791, ppl: 11.198623 +epoch: 1, batch: 26773, sum loss: 4440.416992, avg loss: 2.540284, ppl: 12.683277 +epoch: 1, batch: 26774, sum loss: 3555.967773, avg loss: 2.454084, ppl: 11.635772 +epoch: 1, batch: 26775, sum loss: 4806.971680, avg loss: 2.565086, ppl: 13.001778 +epoch: 1, batch: 26776, sum loss: 4112.875000, avg loss: 2.556168, ppl: 12.886346 +epoch: 1, batch: 26777, sum loss: 4743.306641, avg loss: 2.563950, ppl: 12.987009 +epoch: 1, batch: 26778, sum loss: 5130.424805, avg loss: 2.950216, ppl: 19.110073 +epoch: 1, batch: 26779, sum loss: 4379.974121, avg loss: 2.660981, ppl: 14.310316 +epoch: 1, batch: 26780, sum loss: 4470.438965, avg loss: 2.408642, ppl: 11.118847 +epoch: 1, batch: 26781, sum loss: 5436.935059, avg loss: 2.890449, ppl: 18.001396 +epoch: 1, batch: 26782, sum loss: 4500.132812, avg loss: 2.637827, ppl: 13.982785 +epoch: 1, batch: 26783, sum loss: 4829.739258, avg loss: 2.804727, ppl: 16.522558 +epoch: 1, batch: 26784, sum loss: 4287.211914, avg loss: 2.679507, ppl: 14.577911 +epoch: 1, batch: 26785, sum loss: 4778.043457, avg loss: 2.887035, ppl: 17.940044 +epoch: 1, batch: 26786, sum loss: 5652.038086, avg loss: 2.886639, ppl: 17.932930 +epoch: 1, batch: 26787, sum loss: 4277.496094, avg loss: 2.814142, ppl: 16.678864 +epoch: 1, batch: 26788, sum loss: 3647.713623, avg loss: 2.368645, ppl: 10.682909 +epoch: 1, batch: 26789, sum loss: 3785.962646, avg loss: 2.423792, ppl: 11.288581 +epoch: 1, batch: 26790, sum loss: 4242.277344, avg loss: 2.569520, ppl: 13.059554 +epoch: 1, batch: 26791, sum loss: 4797.709473, avg loss: 2.597569, ppl: 13.431047 +epoch: 1, batch: 26792, sum loss: 4813.070312, avg loss: 2.777305, ppl: 16.075645 +epoch: 1, batch: 26793, sum loss: 3763.786865, avg loss: 2.368651, ppl: 10.682973 +epoch: 1, batch: 26794, sum loss: 4176.726562, avg loss: 2.584608, ppl: 13.258092 +epoch: 1, batch: 26795, sum loss: 4017.772461, avg loss: 2.394382, ppl: 10.961419 +epoch: 1, batch: 26796, sum loss: 4505.715820, avg loss: 2.545602, ppl: 12.750906 +epoch: 1, batch: 26797, sum loss: 4816.343750, avg loss: 2.913698, ppl: 18.424816 +epoch: 1, batch: 26798, sum loss: 3286.561279, avg loss: 2.385023, ppl: 10.859308 +epoch: 1, batch: 26799, sum loss: 4503.863770, avg loss: 2.852352, ppl: 17.328489 +epoch: 1, batch: 26800, sum loss: 4788.230469, avg loss: 2.875814, ppl: 17.739862 +epoch: 1, batch: 26801, sum loss: 4149.805664, avg loss: 2.603391, ppl: 13.509474 +epoch: 1, batch: 26802, sum loss: 4467.894531, avg loss: 2.736004, ppl: 15.425220 +epoch: 1, batch: 26803, sum loss: 3319.305664, avg loss: 2.270387, ppl: 9.683146 +epoch: 1, batch: 26804, sum loss: 3417.366699, avg loss: 2.329493, ppl: 10.272735 +epoch: 1, batch: 26805, sum loss: 3888.319336, avg loss: 2.328335, ppl: 10.260843 +epoch: 1, batch: 26806, sum loss: 5337.184082, avg loss: 2.888087, ppl: 17.958914 +epoch: 1, batch: 26807, sum loss: 4028.310791, avg loss: 2.228048, ppl: 9.281732 +epoch: 1, batch: 26808, sum loss: 3680.673340, avg loss: 2.524467, ppl: 12.484242 +epoch: 1, batch: 26809, sum loss: 5249.175781, avg loss: 2.784709, ppl: 16.195101 +epoch: 1, batch: 26810, sum loss: 4682.116211, avg loss: 2.738080, ppl: 15.457272 +epoch: 1, batch: 26811, sum loss: 4876.604004, avg loss: 2.698729, ppl: 14.860836 +epoch: 1, batch: 26812, sum loss: 4043.527832, avg loss: 2.577137, ppl: 13.159409 +epoch: 1, batch: 26813, sum loss: 4231.363770, avg loss: 2.511195, ppl: 12.319646 +epoch: 1, batch: 26814, sum loss: 5180.143555, avg loss: 2.660577, ppl: 14.304544 +epoch: 1, batch: 26815, sum loss: 4682.428711, avg loss: 2.793812, ppl: 16.343203 +epoch: 1, batch: 26816, sum loss: 4165.134277, avg loss: 2.470424, ppl: 11.827457 +epoch: 1, batch: 26817, sum loss: 3734.027832, avg loss: 2.476146, ppl: 11.895329 +epoch: 1, batch: 26818, sum loss: 4475.106934, avg loss: 2.684527, ppl: 14.651272 +epoch: 1, batch: 26819, sum loss: 4300.199707, avg loss: 2.689306, ppl: 14.721449 +epoch: 1, batch: 26820, sum loss: 4171.114258, avg loss: 2.481329, ppl: 11.957145 +epoch: 1, batch: 26821, sum loss: 4565.125000, avg loss: 2.743465, ppl: 15.540736 +epoch: 1, batch: 26822, sum loss: 4697.140137, avg loss: 2.876387, ppl: 17.750029 +epoch: 1, batch: 26823, sum loss: 4261.635254, avg loss: 2.683650, ppl: 14.638419 +epoch: 1, batch: 26824, sum loss: 4967.070312, avg loss: 2.857923, ppl: 17.425297 +epoch: 1, batch: 26825, sum loss: 4466.398438, avg loss: 2.569849, ppl: 13.063858 +epoch: 1, batch: 26826, sum loss: 3982.695801, avg loss: 2.511158, ppl: 12.319181 +epoch: 1, batch: 26827, sum loss: 4275.220703, avg loss: 2.744044, ppl: 15.549743 +epoch: 1, batch: 26828, sum loss: 4475.720703, avg loss: 2.830943, ppl: 16.961445 +epoch: 1, batch: 26829, sum loss: 4740.257812, avg loss: 2.814880, ppl: 16.691175 +epoch: 1, batch: 26830, sum loss: 4882.140625, avg loss: 2.899133, ppl: 18.158403 +epoch: 1, batch: 26831, sum loss: 4051.131348, avg loss: 2.450775, ppl: 11.597333 +epoch: 1, batch: 26832, sum loss: 4169.718262, avg loss: 2.477551, ppl: 11.912056 +epoch: 1, batch: 26833, sum loss: 4911.765137, avg loss: 2.745537, ppl: 15.572968 +epoch: 1, batch: 26834, sum loss: 4272.365234, avg loss: 2.546106, ppl: 12.757325 +epoch: 1, batch: 26835, sum loss: 4773.151367, avg loss: 2.566211, ppl: 13.016405 +epoch: 1, batch: 26836, sum loss: 4807.442383, avg loss: 2.758142, ppl: 15.770521 +epoch: 1, batch: 26837, sum loss: 4608.058105, avg loss: 2.752723, ppl: 15.685285 +epoch: 1, batch: 26838, sum loss: 4769.488281, avg loss: 2.589299, ppl: 13.320427 +epoch: 1, batch: 26839, sum loss: 5133.299805, avg loss: 2.970660, ppl: 19.504784 +epoch: 1, batch: 26840, sum loss: 4324.541016, avg loss: 2.721549, ppl: 15.203852 +epoch: 1, batch: 26841, sum loss: 4637.154297, avg loss: 2.563380, ppl: 12.979614 +epoch: 1, batch: 26842, sum loss: 4184.690430, avg loss: 2.495343, ppl: 12.125892 +epoch: 1, batch: 26843, sum loss: 3946.716797, avg loss: 2.512232, ppl: 12.332429 +epoch: 1, batch: 26844, sum loss: 4515.967773, avg loss: 2.542775, ppl: 12.714902 +epoch: 1, batch: 26845, sum loss: 4854.228516, avg loss: 2.780200, ppl: 16.122242 +epoch: 1, batch: 26846, sum loss: 4615.060547, avg loss: 2.501388, ppl: 12.199413 +epoch: 1, batch: 26847, sum loss: 5262.724609, avg loss: 2.861732, ppl: 17.491793 +epoch: 1, batch: 26848, sum loss: 4601.163086, avg loss: 2.704975, ppl: 14.953948 +epoch: 1, batch: 26849, sum loss: 4282.070312, avg loss: 2.496834, ppl: 12.143983 +epoch: 1, batch: 26850, sum loss: 4486.492188, avg loss: 2.301946, ppl: 9.993608 +epoch: 1, batch: 26851, sum loss: 4579.454590, avg loss: 2.442376, ppl: 11.500332 +epoch: 1, batch: 26852, sum loss: 3415.416016, avg loss: 2.417138, ppl: 11.213718 +epoch: 1, batch: 26853, sum loss: 4720.657715, avg loss: 2.605220, ppl: 13.534197 +epoch: 1, batch: 26854, sum loss: 4459.232422, avg loss: 2.773154, ppl: 16.009054 +epoch: 1, batch: 26855, sum loss: 4231.117188, avg loss: 2.526040, ppl: 12.503897 +epoch: 1, batch: 26856, sum loss: 5003.729492, avg loss: 2.767550, ppl: 15.919576 +epoch: 1, batch: 26857, sum loss: 4484.365234, avg loss: 2.787051, ppl: 16.233082 +epoch: 1, batch: 26858, sum loss: 5105.775879, avg loss: 3.062853, ppl: 21.388489 +epoch: 1, batch: 26859, sum loss: 3980.952148, avg loss: 2.514815, ppl: 12.364320 +epoch: 1, batch: 26860, sum loss: 3603.244873, avg loss: 2.446195, ppl: 11.544335 +epoch: 1, batch: 26861, sum loss: 4462.174805, avg loss: 2.615577, ppl: 13.675108 +epoch: 1, batch: 26862, sum loss: 5235.703125, avg loss: 2.824004, ppl: 16.844154 +epoch: 1, batch: 26863, sum loss: 3935.013184, avg loss: 2.383412, ppl: 10.841833 +epoch: 1, batch: 26864, sum loss: 4517.249512, avg loss: 2.721235, ppl: 15.199079 +epoch: 1, batch: 26865, sum loss: 3927.956543, avg loss: 2.591001, ppl: 13.343116 +epoch: 1, batch: 26866, sum loss: 4181.480469, avg loss: 2.574803, ppl: 13.128732 +epoch: 1, batch: 26867, sum loss: 4516.813477, avg loss: 2.707922, ppl: 14.998074 +epoch: 1, batch: 26868, sum loss: 4587.923828, avg loss: 2.554523, ppl: 12.865168 +epoch: 1, batch: 26869, sum loss: 4763.944824, avg loss: 2.468365, ppl: 11.803135 +epoch: 1, batch: 26870, sum loss: 3874.198975, avg loss: 2.406335, ppl: 11.093225 +epoch: 1, batch: 26871, sum loss: 3350.334229, avg loss: 2.426020, ppl: 11.313768 +epoch: 1, batch: 26872, sum loss: 5187.590820, avg loss: 3.010790, ppl: 20.303431 +epoch: 1, batch: 26873, sum loss: 5088.284180, avg loss: 2.702222, ppl: 14.912833 +epoch: 1, batch: 26874, sum loss: 4693.815430, avg loss: 2.741714, ppl: 15.513560 +epoch: 1, batch: 26875, sum loss: 3696.351807, avg loss: 2.504303, ppl: 12.235034 +epoch: 1, batch: 26876, sum loss: 5002.780273, avg loss: 2.780867, ppl: 16.133007 +epoch: 1, batch: 26877, sum loss: 5688.746094, avg loss: 2.837280, ppl: 17.069271 +epoch: 1, batch: 26878, sum loss: 4236.076172, avg loss: 2.458547, ppl: 11.687816 +epoch: 1, batch: 26879, sum loss: 4175.256836, avg loss: 2.656016, ppl: 14.239444 +epoch: 1, batch: 26880, sum loss: 4159.783691, avg loss: 2.659708, ppl: 14.292119 +epoch: 1, batch: 26881, sum loss: 4944.930664, avg loss: 2.634486, ppl: 13.936151 +epoch: 1, batch: 26882, sum loss: 5738.758301, avg loss: 2.862223, ppl: 17.500393 +epoch: 1, batch: 26883, sum loss: 4954.767578, avg loss: 2.813610, ppl: 16.669991 +epoch: 1, batch: 26884, sum loss: 4565.165039, avg loss: 2.617640, ppl: 13.703352 +epoch: 1, batch: 26885, sum loss: 5450.374023, avg loss: 2.617855, ppl: 13.706293 +epoch: 1, batch: 26886, sum loss: 4374.934570, avg loss: 2.546528, ppl: 12.762710 +epoch: 1, batch: 26887, sum loss: 4062.486084, avg loss: 2.612531, ppl: 13.633519 +epoch: 1, batch: 26888, sum loss: 4779.500000, avg loss: 2.863691, ppl: 17.526094 +epoch: 1, batch: 26889, sum loss: 4336.231445, avg loss: 2.489226, ppl: 12.051943 +epoch: 1, batch: 26890, sum loss: 3036.919189, avg loss: 2.252908, ppl: 9.515362 +epoch: 1, batch: 26891, sum loss: 4892.325684, avg loss: 2.754688, ppl: 15.716137 +epoch: 1, batch: 26892, sum loss: 4641.615723, avg loss: 2.603262, ppl: 13.507728 +epoch: 1, batch: 26893, sum loss: 3499.712402, avg loss: 2.359887, ppl: 10.589756 +epoch: 1, batch: 26894, sum loss: 4713.517578, avg loss: 2.704256, ppl: 14.943189 +epoch: 1, batch: 26895, sum loss: 3800.504639, avg loss: 2.417624, ppl: 11.219168 +epoch: 1, batch: 26896, sum loss: 5082.860352, avg loss: 2.678009, ppl: 14.556080 +epoch: 1, batch: 26897, sum loss: 3976.785645, avg loss: 2.482388, ppl: 11.969814 +epoch: 1, batch: 26898, sum loss: 5265.104492, avg loss: 2.809554, ppl: 16.602514 +epoch: 1, batch: 26899, sum loss: 4737.372070, avg loss: 2.741535, ppl: 15.510772 +epoch: 1, batch: 26900, sum loss: 4232.248535, avg loss: 2.563446, ppl: 12.980466 +epoch: 1, batch: 26901, sum loss: 4852.348633, avg loss: 2.707784, ppl: 14.996007 +epoch: 1, batch: 26902, sum loss: 4538.171387, avg loss: 2.706125, ppl: 14.971147 +epoch: 1, batch: 26903, sum loss: 4454.889648, avg loss: 2.736419, ppl: 15.431622 +epoch: 1, batch: 26904, sum loss: 4165.037109, avg loss: 2.500022, ppl: 12.182764 +epoch: 1, batch: 26905, sum loss: 4122.079590, avg loss: 2.590873, ppl: 13.341420 +epoch: 1, batch: 26906, sum loss: 3994.077393, avg loss: 2.507268, ppl: 12.271356 +epoch: 1, batch: 26907, sum loss: 4186.972656, avg loss: 2.435703, ppl: 11.423841 +epoch: 1, batch: 26908, sum loss: 4152.475098, avg loss: 2.424095, ppl: 11.292007 +epoch: 1, batch: 26909, sum loss: 4763.561035, avg loss: 2.655274, ppl: 14.228880 +epoch: 1, batch: 26910, sum loss: 4684.408203, avg loss: 2.744235, ppl: 15.552705 +epoch: 1, batch: 26911, sum loss: 5712.082520, avg loss: 2.996895, ppl: 20.023270 +epoch: 1, batch: 26912, sum loss: 4829.035156, avg loss: 2.533597, ppl: 12.598737 +epoch: 1, batch: 26913, sum loss: 4246.725098, avg loss: 2.640998, ppl: 14.027198 +epoch: 1, batch: 26914, sum loss: 3757.616211, avg loss: 2.552728, ppl: 12.842094 +epoch: 1, batch: 26915, sum loss: 4131.637207, avg loss: 2.616616, ppl: 13.689323 +epoch: 1, batch: 26916, sum loss: 4984.188477, avg loss: 2.593230, ppl: 13.372900 +epoch: 1, batch: 26917, sum loss: 3988.216797, avg loss: 2.362688, ppl: 10.619456 +epoch: 1, batch: 26918, sum loss: 4672.687012, avg loss: 2.734164, ppl: 15.396873 +epoch: 1, batch: 26919, sum loss: 4595.249023, avg loss: 2.698326, ppl: 14.854842 +epoch: 1, batch: 26920, sum loss: 4166.852539, avg loss: 2.562640, ppl: 12.970013 +epoch: 1, batch: 26921, sum loss: 4231.813477, avg loss: 2.596205, ppl: 13.412733 +epoch: 1, batch: 26922, sum loss: 4349.769043, avg loss: 2.631439, ppl: 13.893751 +epoch: 1, batch: 26923, sum loss: 4279.910645, avg loss: 2.633791, ppl: 13.926468 +epoch: 1, batch: 26924, sum loss: 4425.930664, avg loss: 2.695451, ppl: 14.812201 +epoch: 1, batch: 26925, sum loss: 5018.864746, avg loss: 2.701219, ppl: 14.897879 +epoch: 1, batch: 26926, sum loss: 4134.349609, avg loss: 2.593695, ppl: 13.379115 +epoch: 1, batch: 26927, sum loss: 4233.976074, avg loss: 2.547519, ppl: 12.775365 +epoch: 1, batch: 26928, sum loss: 5360.520996, avg loss: 2.721077, ppl: 15.196676 +epoch: 1, batch: 26929, sum loss: 4853.755859, avg loss: 2.684599, ppl: 14.652330 +epoch: 1, batch: 26930, sum loss: 4880.917969, avg loss: 2.849339, ppl: 17.276358 +epoch: 1, batch: 26931, sum loss: 5080.939453, avg loss: 2.784076, ppl: 16.184864 +epoch: 1, batch: 26932, sum loss: 4043.528809, avg loss: 2.625668, ppl: 13.813800 +epoch: 1, batch: 26933, sum loss: 4506.435547, avg loss: 2.650845, ppl: 14.165998 +epoch: 1, batch: 26934, sum loss: 4439.036133, avg loss: 2.435017, ppl: 11.416014 +epoch: 1, batch: 26935, sum loss: 4263.562500, avg loss: 2.427997, ppl: 11.336151 +epoch: 1, batch: 26936, sum loss: 4045.381104, avg loss: 2.498691, ppl: 12.166561 +epoch: 1, batch: 26937, sum loss: 5049.071777, avg loss: 2.980562, ppl: 19.698879 +epoch: 1, batch: 26938, sum loss: 4373.587402, avg loss: 2.551685, ppl: 12.828697 +epoch: 1, batch: 26939, sum loss: 4934.122070, avg loss: 2.801886, ppl: 16.475697 +epoch: 1, batch: 26940, sum loss: 4367.998047, avg loss: 2.707996, ppl: 14.999192 +epoch: 1, batch: 26941, sum loss: 4614.527344, avg loss: 2.529894, ppl: 12.552180 +epoch: 1, batch: 26942, sum loss: 4601.961426, avg loss: 2.737633, ppl: 15.450375 +epoch: 1, batch: 26943, sum loss: 5147.602539, avg loss: 2.833023, ppl: 16.996761 +epoch: 1, batch: 26944, sum loss: 4102.361816, avg loss: 2.651818, ppl: 14.179788 +epoch: 1, batch: 26945, sum loss: 4325.663086, avg loss: 2.588667, ppl: 13.312020 +epoch: 1, batch: 26946, sum loss: 5026.174805, avg loss: 2.809488, ppl: 16.601421 +epoch: 1, batch: 26947, sum loss: 4311.280762, avg loss: 2.600290, ppl: 13.467644 +epoch: 1, batch: 26948, sum loss: 4199.904785, avg loss: 2.764914, ppl: 15.877679 +epoch: 1, batch: 26949, sum loss: 4601.236816, avg loss: 2.632287, ppl: 13.905529 +epoch: 1, batch: 26950, sum loss: 4249.762207, avg loss: 2.693132, ppl: 14.777886 +epoch: 1, batch: 26951, sum loss: 4183.200195, avg loss: 2.574277, ppl: 13.121825 +epoch: 1, batch: 26952, sum loss: 3240.059814, avg loss: 2.278523, ppl: 9.762251 +epoch: 1, batch: 26953, sum loss: 4496.060547, avg loss: 2.567710, ppl: 13.035943 +epoch: 1, batch: 26954, sum loss: 3975.890137, avg loss: 2.498988, ppl: 12.170174 +epoch: 1, batch: 26955, sum loss: 5198.254883, avg loss: 2.886316, ppl: 17.927141 +epoch: 1, batch: 26956, sum loss: 5289.877930, avg loss: 2.667614, ppl: 14.405553 +epoch: 1, batch: 26957, sum loss: 4592.512695, avg loss: 2.814040, ppl: 16.677153 +epoch: 1, batch: 26958, sum loss: 4227.003906, avg loss: 2.519073, ppl: 12.417075 +epoch: 1, batch: 26959, sum loss: 3693.531738, avg loss: 2.505788, ppl: 12.253212 +epoch: 1, batch: 26960, sum loss: 4859.340332, avg loss: 2.740745, ppl: 15.498525 +epoch: 1, batch: 26961, sum loss: 5096.179688, avg loss: 2.882455, ppl: 17.858055 +epoch: 1, batch: 26962, sum loss: 4499.898438, avg loss: 2.839053, ppl: 17.099560 +epoch: 1, batch: 26963, sum loss: 3565.041992, avg loss: 2.357832, ppl: 10.568017 +epoch: 1, batch: 26964, sum loss: 3967.239990, avg loss: 2.439877, ppl: 11.471630 +epoch: 1, batch: 26965, sum loss: 4885.664551, avg loss: 2.709742, ppl: 15.025396 +epoch: 1, batch: 26966, sum loss: 4436.267090, avg loss: 2.626564, ppl: 13.826185 +epoch: 1, batch: 26967, sum loss: 4545.020508, avg loss: 2.468778, ppl: 11.808010 +epoch: 1, batch: 26968, sum loss: 4945.770996, avg loss: 2.679183, ppl: 14.573175 +epoch: 1, batch: 26969, sum loss: 5562.208008, avg loss: 2.936752, ppl: 18.854504 +epoch: 1, batch: 26970, sum loss: 5238.080078, avg loss: 2.900377, ppl: 18.180990 +epoch: 1, batch: 26971, sum loss: 4359.579102, avg loss: 2.637374, ppl: 13.976452 +epoch: 1, batch: 26972, sum loss: 3920.970947, avg loss: 2.579586, ppl: 13.191679 +epoch: 1, batch: 26973, sum loss: 5627.903809, avg loss: 3.019262, ppl: 20.476166 +epoch: 1, batch: 26974, sum loss: 4922.911621, avg loss: 2.832515, ppl: 16.988136 +epoch: 1, batch: 26975, sum loss: 4944.693359, avg loss: 2.838515, ppl: 17.090368 +epoch: 1, batch: 26976, sum loss: 4987.610352, avg loss: 2.718044, ppl: 15.150660 +epoch: 1, batch: 26977, sum loss: 4625.164551, avg loss: 2.493350, ppl: 12.101752 +epoch: 1, batch: 26978, sum loss: 3878.230957, avg loss: 2.582044, ppl: 13.224146 +epoch: 1, batch: 26979, sum loss: 4445.054688, avg loss: 2.486048, ppl: 12.013710 +epoch: 1, batch: 26980, sum loss: 4492.698730, avg loss: 2.565790, ppl: 13.010939 +epoch: 1, batch: 26981, sum loss: 4498.177246, avg loss: 2.633593, ppl: 13.923709 +epoch: 1, batch: 26982, sum loss: 4784.666992, avg loss: 2.746651, ppl: 15.590339 +epoch: 1, batch: 26983, sum loss: 4207.987305, avg loss: 2.544128, ppl: 12.732121 +epoch: 1, batch: 26984, sum loss: 4835.168457, avg loss: 2.730191, ppl: 15.335815 +epoch: 1, batch: 26985, sum loss: 3825.630371, avg loss: 2.541947, ppl: 12.704384 +epoch: 1, batch: 26986, sum loss: 4413.542969, avg loss: 2.617760, ppl: 13.704989 +epoch: 1, batch: 26987, sum loss: 4417.820801, avg loss: 2.591097, ppl: 13.344407 +epoch: 1, batch: 26988, sum loss: 5081.672852, avg loss: 2.820018, ppl: 16.777157 +epoch: 1, batch: 26989, sum loss: 4205.963379, avg loss: 2.389752, ppl: 10.910787 +epoch: 1, batch: 26990, sum loss: 4635.765625, avg loss: 2.662703, ppl: 14.334981 +epoch: 1, batch: 26991, sum loss: 4534.899414, avg loss: 2.546266, ppl: 12.759369 +epoch: 1, batch: 26992, sum loss: 4250.064941, avg loss: 2.662948, ppl: 14.338495 +epoch: 1, batch: 26993, sum loss: 4250.342285, avg loss: 2.736859, ppl: 15.438422 +epoch: 1, batch: 26994, sum loss: 4133.216309, avg loss: 2.675221, ppl: 14.515557 +epoch: 1, batch: 26995, sum loss: 3689.083008, avg loss: 2.436647, ppl: 11.434632 +epoch: 1, batch: 26996, sum loss: 4619.839355, avg loss: 2.682833, ppl: 14.626478 +epoch: 1, batch: 26997, sum loss: 4402.119141, avg loss: 2.595589, ppl: 13.404483 +epoch: 1, batch: 26998, sum loss: 3768.798340, avg loss: 2.620861, ppl: 13.747560 +epoch: 1, batch: 26999, sum loss: 4505.756836, avg loss: 2.515777, ppl: 12.376223 +epoch: 1, batch: 27000, sum loss: 4674.415527, avg loss: 2.525346, ppl: 12.495218 +epoch: 1, batch: 27001, sum loss: 4244.070801, avg loss: 2.602128, ppl: 13.492420 +epoch: 1, batch: 27002, sum loss: 3874.913330, avg loss: 2.545935, ppl: 12.755154 +epoch: 1, batch: 27003, sum loss: 4197.033691, avg loss: 2.624786, ppl: 13.801625 +epoch: 1, batch: 27004, sum loss: 4972.530762, avg loss: 2.696600, ppl: 14.829229 +epoch: 1, batch: 27005, sum loss: 4488.078613, avg loss: 2.579355, ppl: 13.188635 +epoch: 1, batch: 27006, sum loss: 4947.131836, avg loss: 2.791835, ppl: 16.310925 +epoch: 1, batch: 27007, sum loss: 4244.672363, avg loss: 2.534133, ppl: 12.605496 +epoch: 1, batch: 27008, sum loss: 4928.895508, avg loss: 2.834327, ppl: 17.018951 +epoch: 1, batch: 27009, sum loss: 5109.998047, avg loss: 3.011195, ppl: 20.311661 +epoch: 1, batch: 27010, sum loss: 5619.863770, avg loss: 2.800132, ppl: 16.446810 +epoch: 1, batch: 27011, sum loss: 5101.399902, avg loss: 2.651455, ppl: 14.174650 +epoch: 1, batch: 27012, sum loss: 3909.215576, avg loss: 2.611366, ppl: 13.617643 +epoch: 1, batch: 27013, sum loss: 4526.774414, avg loss: 2.615121, ppl: 13.668869 +epoch: 1, batch: 27014, sum loss: 3753.412354, avg loss: 2.450008, ppl: 11.588441 +epoch: 1, batch: 27015, sum loss: 4169.919434, avg loss: 2.654309, ppl: 14.215160 +epoch: 1, batch: 27016, sum loss: 4117.700684, avg loss: 2.523101, ppl: 12.467198 +epoch: 1, batch: 27017, sum loss: 5308.716797, avg loss: 2.852615, ppl: 17.333055 +epoch: 1, batch: 27018, sum loss: 4270.252930, avg loss: 2.534275, ppl: 12.607285 +epoch: 1, batch: 27019, sum loss: 5572.061035, avg loss: 2.848702, ppl: 17.265356 +epoch: 1, batch: 27020, sum loss: 4608.143066, avg loss: 2.628718, ppl: 13.856000 +epoch: 1, batch: 27021, sum loss: 4863.095703, avg loss: 2.822458, ppl: 16.818144 +epoch: 1, batch: 27022, sum loss: 5410.158691, avg loss: 2.596045, ppl: 13.410601 +epoch: 1, batch: 27023, sum loss: 5046.430664, avg loss: 2.835073, ppl: 17.031652 +epoch: 1, batch: 27024, sum loss: 4184.842773, avg loss: 2.710391, ppl: 15.035147 +epoch: 1, batch: 27025, sum loss: 4304.134766, avg loss: 2.670059, ppl: 14.440817 +epoch: 1, batch: 27026, sum loss: 3932.592285, avg loss: 2.269239, ppl: 9.672042 +epoch: 1, batch: 27027, sum loss: 4178.781250, avg loss: 2.692514, ppl: 14.768757 +epoch: 1, batch: 27028, sum loss: 3573.039551, avg loss: 2.318650, ppl: 10.161946 +epoch: 1, batch: 27029, sum loss: 5635.645508, avg loss: 2.733097, ppl: 15.380445 +epoch: 1, batch: 27030, sum loss: 4968.890625, avg loss: 2.882187, ppl: 17.853283 +epoch: 1, batch: 27031, sum loss: 4139.298340, avg loss: 2.501087, ppl: 12.195740 +epoch: 1, batch: 27032, sum loss: 5223.042969, avg loss: 2.793071, ppl: 16.331100 +epoch: 1, batch: 27033, sum loss: 5419.403320, avg loss: 3.109239, ppl: 22.403986 +epoch: 1, batch: 27034, sum loss: 3928.541016, avg loss: 2.654420, ppl: 14.216733 +epoch: 1, batch: 27035, sum loss: 4081.115234, avg loss: 2.579719, ppl: 13.193428 +epoch: 1, batch: 27036, sum loss: 4623.206055, avg loss: 2.713149, ppl: 15.076678 +epoch: 1, batch: 27037, sum loss: 3971.896240, avg loss: 2.614810, ppl: 13.664617 +epoch: 1, batch: 27038, sum loss: 4159.854004, avg loss: 2.563065, ppl: 12.975521 +epoch: 1, batch: 27039, sum loss: 4487.073242, avg loss: 2.561115, ppl: 12.950249 +epoch: 1, batch: 27040, sum loss: 4698.398438, avg loss: 2.639550, ppl: 14.006895 +epoch: 1, batch: 27041, sum loss: 4823.036133, avg loss: 2.715673, ppl: 15.114785 +epoch: 1, batch: 27042, sum loss: 5230.239746, avg loss: 2.850267, ppl: 17.292397 +epoch: 1, batch: 27043, sum loss: 5405.851562, avg loss: 2.681474, ppl: 14.606610 +epoch: 1, batch: 27044, sum loss: 3850.090332, avg loss: 2.532954, ppl: 12.590647 +epoch: 1, batch: 27045, sum loss: 4553.027344, avg loss: 2.461096, ppl: 11.717645 +epoch: 1, batch: 27046, sum loss: 4482.117188, avg loss: 2.552459, ppl: 12.838629 +epoch: 1, batch: 27047, sum loss: 4750.052734, avg loss: 2.608486, ppl: 13.578477 +epoch: 1, batch: 27048, sum loss: 4054.900879, avg loss: 2.399350, ppl: 11.016010 +epoch: 1, batch: 27049, sum loss: 4163.309082, avg loss: 2.466415, ppl: 11.780144 +epoch: 1, batch: 27050, sum loss: 4788.089355, avg loss: 2.826499, ppl: 16.886238 +epoch: 1, batch: 27051, sum loss: 4064.142090, avg loss: 2.647650, ppl: 14.120809 +epoch: 1, batch: 27052, sum loss: 4629.366699, avg loss: 2.579034, ppl: 13.184401 +epoch: 1, batch: 27053, sum loss: 3695.596924, avg loss: 2.410696, ppl: 11.141713 +epoch: 1, batch: 27054, sum loss: 3787.780273, avg loss: 2.684465, ppl: 14.650364 +epoch: 1, batch: 27055, sum loss: 3832.645508, avg loss: 2.415026, ppl: 11.190058 +epoch: 1, batch: 27056, sum loss: 4464.028320, avg loss: 2.584846, ppl: 13.261241 +epoch: 1, batch: 27057, sum loss: 3508.492920, avg loss: 2.370603, ppl: 10.703848 +epoch: 1, batch: 27058, sum loss: 4169.354004, avg loss: 2.586448, ppl: 13.282508 +epoch: 1, batch: 27059, sum loss: 4888.421387, avg loss: 3.049545, ppl: 21.105745 +epoch: 1, batch: 27060, sum loss: 4372.003418, avg loss: 2.946094, ppl: 19.031467 +epoch: 1, batch: 27061, sum loss: 4335.440430, avg loss: 2.594519, ppl: 13.390141 +epoch: 1, batch: 27062, sum loss: 4534.038086, avg loss: 2.551513, ppl: 12.826492 +epoch: 1, batch: 27063, sum loss: 4009.235596, avg loss: 2.356987, ppl: 10.559092 +epoch: 1, batch: 27064, sum loss: 4463.751465, avg loss: 2.708587, ppl: 15.008057 +epoch: 1, batch: 27065, sum loss: 4386.753418, avg loss: 2.479793, ppl: 11.938791 +epoch: 1, batch: 27066, sum loss: 4135.102539, avg loss: 2.489526, ppl: 12.055558 +epoch: 1, batch: 27067, sum loss: 4511.688477, avg loss: 2.658626, ppl: 14.276661 +epoch: 1, batch: 27068, sum loss: 4613.261230, avg loss: 2.618196, ppl: 13.710967 +epoch: 1, batch: 27069, sum loss: 3691.401123, avg loss: 2.552836, ppl: 12.843478 +epoch: 1, batch: 27070, sum loss: 4106.090820, avg loss: 2.447015, ppl: 11.553804 +epoch: 1, batch: 27071, sum loss: 4834.708496, avg loss: 2.570286, ppl: 13.069566 +epoch: 1, batch: 27072, sum loss: 3957.084961, avg loss: 2.450207, ppl: 11.590752 +epoch: 1, batch: 27073, sum loss: 4155.315430, avg loss: 2.682579, ppl: 14.622761 +epoch: 1, batch: 27074, sum loss: 4250.225586, avg loss: 2.633349, ppl: 13.920314 +epoch: 1, batch: 27075, sum loss: 4731.629883, avg loss: 2.719327, ppl: 15.170116 +epoch: 1, batch: 27076, sum loss: 4226.717285, avg loss: 2.537045, ppl: 12.642258 +epoch: 1, batch: 27077, sum loss: 4579.803711, avg loss: 2.825295, ppl: 16.865923 +epoch: 1, batch: 27078, sum loss: 5842.343750, avg loss: 3.054022, ppl: 21.200438 +epoch: 1, batch: 27079, sum loss: 4517.273926, avg loss: 2.950538, ppl: 19.116243 +epoch: 1, batch: 27080, sum loss: 4234.413574, avg loss: 2.487905, ppl: 12.036029 +epoch: 1, batch: 27081, sum loss: 4227.537109, avg loss: 2.333078, ppl: 10.309625 +epoch: 1, batch: 27082, sum loss: 4030.826172, avg loss: 2.734617, ppl: 15.403850 +epoch: 1, batch: 27083, sum loss: 4330.067871, avg loss: 2.658114, ppl: 14.269351 +epoch: 1, batch: 27084, sum loss: 5611.612793, avg loss: 2.722762, ppl: 15.222310 +epoch: 1, batch: 27085, sum loss: 4481.583496, avg loss: 2.858153, ppl: 17.429298 +epoch: 1, batch: 27086, sum loss: 4563.080078, avg loss: 2.498949, ppl: 12.169692 +epoch: 1, batch: 27087, sum loss: 5320.200195, avg loss: 2.738137, ppl: 15.458163 +epoch: 1, batch: 27088, sum loss: 4548.320801, avg loss: 2.656729, ppl: 14.249609 +epoch: 1, batch: 27089, sum loss: 4776.250488, avg loss: 2.793129, ppl: 16.332043 +epoch: 1, batch: 27090, sum loss: 4190.395020, avg loss: 2.782467, ppl: 16.158834 +epoch: 1, batch: 27091, sum loss: 4800.800293, avg loss: 2.781460, ppl: 16.142572 +epoch: 1, batch: 27092, sum loss: 3919.493164, avg loss: 2.391393, ppl: 10.928706 +epoch: 1, batch: 27093, sum loss: 4432.497559, avg loss: 2.691255, ppl: 14.750180 +epoch: 1, batch: 27094, sum loss: 4938.367188, avg loss: 2.679526, ppl: 14.578186 +epoch: 1, batch: 27095, sum loss: 4900.117188, avg loss: 2.635889, ppl: 13.955708 +epoch: 1, batch: 27096, sum loss: 4186.074707, avg loss: 2.569721, ppl: 13.062173 +epoch: 1, batch: 27097, sum loss: 4547.795410, avg loss: 2.683065, ppl: 14.629868 +epoch: 1, batch: 27098, sum loss: 5031.466797, avg loss: 2.876768, ppl: 17.756784 +epoch: 1, batch: 27099, sum loss: 4948.819336, avg loss: 2.807045, ppl: 16.560900 +epoch: 1, batch: 27100, sum loss: 4677.763672, avg loss: 2.774474, ppl: 16.030199 +epoch: 1, batch: 27101, sum loss: 4858.166016, avg loss: 2.856065, ppl: 17.392942 +epoch: 1, batch: 27102, sum loss: 3970.246338, avg loss: 2.680788, ppl: 14.596584 +epoch: 1, batch: 27103, sum loss: 4369.965820, avg loss: 2.601170, ppl: 13.479504 +epoch: 1, batch: 27104, sum loss: 4033.435059, avg loss: 2.595518, ppl: 13.403530 +epoch: 1, batch: 27105, sum loss: 5003.416016, avg loss: 2.752154, ppl: 15.676364 +epoch: 1, batch: 27106, sum loss: 3395.502441, avg loss: 2.441051, ppl: 11.485111 +epoch: 1, batch: 27107, sum loss: 4514.231445, avg loss: 2.460072, ppl: 11.705652 +epoch: 1, batch: 27108, sum loss: 4146.692871, avg loss: 2.815135, ppl: 16.695421 +epoch: 1, batch: 27109, sum loss: 4421.741211, avg loss: 2.619515, ppl: 13.729063 +epoch: 1, batch: 27110, sum loss: 4473.204102, avg loss: 2.646866, ppl: 14.109754 +epoch: 1, batch: 27111, sum loss: 3933.028076, avg loss: 2.465848, ppl: 11.773467 +epoch: 1, batch: 27112, sum loss: 3914.001465, avg loss: 2.573308, ppl: 13.109118 +epoch: 1, batch: 27113, sum loss: 4366.342773, avg loss: 2.620854, ppl: 13.747458 +epoch: 1, batch: 27114, sum loss: 4662.192383, avg loss: 2.604577, ppl: 13.525497 +epoch: 1, batch: 27115, sum loss: 3502.247559, avg loss: 2.352080, ppl: 10.507406 +epoch: 1, batch: 27116, sum loss: 5269.154297, avg loss: 2.825284, ppl: 16.865730 +epoch: 1, batch: 27117, sum loss: 4133.812500, avg loss: 2.590108, ppl: 13.331213 +epoch: 1, batch: 27118, sum loss: 3732.378906, avg loss: 2.518474, ppl: 12.409649 +epoch: 1, batch: 27119, sum loss: 4163.881836, avg loss: 2.443593, ppl: 11.514333 +epoch: 1, batch: 27120, sum loss: 4680.385742, avg loss: 2.568818, ppl: 13.050385 +epoch: 1, batch: 27121, sum loss: 5089.188477, avg loss: 2.677111, ppl: 14.543020 +epoch: 1, batch: 27122, sum loss: 4207.067871, avg loss: 2.466042, ppl: 11.775746 +epoch: 1, batch: 27123, sum loss: 4276.605469, avg loss: 2.701583, ppl: 14.903300 +epoch: 1, batch: 27124, sum loss: 4890.855957, avg loss: 2.780475, ppl: 16.126686 +epoch: 1, batch: 27125, sum loss: 4198.847656, avg loss: 2.353614, ppl: 10.523537 +epoch: 1, batch: 27126, sum loss: 3803.299316, avg loss: 2.578508, ppl: 13.177464 +epoch: 1, batch: 27127, sum loss: 5031.030762, avg loss: 2.597331, ppl: 13.427855 +epoch: 1, batch: 27128, sum loss: 4820.436523, avg loss: 2.717270, ppl: 15.138931 +epoch: 1, batch: 27129, sum loss: 4217.851562, avg loss: 2.507641, ppl: 12.275932 +epoch: 1, batch: 27130, sum loss: 5551.395996, avg loss: 2.753669, ppl: 15.700127 +epoch: 1, batch: 27131, sum loss: 4910.150879, avg loss: 2.525798, ppl: 12.500868 +epoch: 1, batch: 27132, sum loss: 4821.550781, avg loss: 2.613307, ppl: 13.644091 +epoch: 1, batch: 27133, sum loss: 4356.571289, avg loss: 2.635554, ppl: 13.951043 +epoch: 1, batch: 27134, sum loss: 4578.929688, avg loss: 2.668374, ppl: 14.416510 +epoch: 1, batch: 27135, sum loss: 4385.062988, avg loss: 2.744095, ppl: 15.550529 +epoch: 1, batch: 27136, sum loss: 4129.507324, avg loss: 2.366480, ppl: 10.659802 +epoch: 1, batch: 27137, sum loss: 3997.077148, avg loss: 2.380630, ppl: 10.811707 +epoch: 1, batch: 27138, sum loss: 3568.981445, avg loss: 2.204436, ppl: 9.065136 +epoch: 1, batch: 27139, sum loss: 4241.963867, avg loss: 2.572446, ppl: 13.097827 +epoch: 1, batch: 27140, sum loss: 4724.328613, avg loss: 2.611569, ppl: 13.620410 +epoch: 1, batch: 27141, sum loss: 4706.487305, avg loss: 2.726818, ppl: 15.284169 +epoch: 1, batch: 27142, sum loss: 3488.739014, avg loss: 2.392825, ppl: 10.944369 +epoch: 1, batch: 27143, sum loss: 3757.770996, avg loss: 2.441697, ppl: 11.492523 +epoch: 1, batch: 27144, sum loss: 3893.145020, avg loss: 2.422617, ppl: 11.275325 +epoch: 1, batch: 27145, sum loss: 4713.458984, avg loss: 2.673544, ppl: 14.491241 +epoch: 1, batch: 27146, sum loss: 4075.106201, avg loss: 2.546941, ppl: 12.767990 +epoch: 1, batch: 27147, sum loss: 4511.923340, avg loss: 2.512207, ppl: 12.332114 +epoch: 1, batch: 27148, sum loss: 4316.009766, avg loss: 2.558393, ppl: 12.915052 +epoch: 1, batch: 27149, sum loss: 4667.437988, avg loss: 2.535273, ppl: 12.619880 +epoch: 1, batch: 27150, sum loss: 4599.926270, avg loss: 2.858873, ppl: 17.441853 +epoch: 1, batch: 27151, sum loss: 6266.191895, avg loss: 2.939114, ppl: 18.899101 +epoch: 1, batch: 27152, sum loss: 4454.724121, avg loss: 2.685186, ppl: 14.660933 +epoch: 1, batch: 27153, sum loss: 5337.902344, avg loss: 2.701368, ppl: 14.900095 +epoch: 1, batch: 27154, sum loss: 5025.846680, avg loss: 2.785946, ppl: 16.215153 +epoch: 1, batch: 27155, sum loss: 3952.161621, avg loss: 2.402530, ppl: 11.051097 +epoch: 1, batch: 27156, sum loss: 4873.389648, avg loss: 2.776860, ppl: 16.068491 +epoch: 1, batch: 27157, sum loss: 4204.858887, avg loss: 2.492507, ppl: 12.091549 +epoch: 1, batch: 27158, sum loss: 4454.178711, avg loss: 2.640296, ppl: 14.017348 +epoch: 1, batch: 27159, sum loss: 3950.770508, avg loss: 2.628590, ppl: 13.854217 +epoch: 1, batch: 27160, sum loss: 4211.675293, avg loss: 2.779984, ppl: 16.118759 +epoch: 1, batch: 27161, sum loss: 3915.574707, avg loss: 2.492409, ppl: 12.090369 +epoch: 1, batch: 27162, sum loss: 4545.063477, avg loss: 2.747922, ppl: 15.610167 +epoch: 1, batch: 27163, sum loss: 3663.394043, avg loss: 2.526479, ppl: 12.509380 +epoch: 1, batch: 27164, sum loss: 4016.707520, avg loss: 2.684965, ppl: 14.657687 +epoch: 1, batch: 27165, sum loss: 4163.238281, avg loss: 2.447524, ppl: 11.559688 +epoch: 1, batch: 27166, sum loss: 4966.687500, avg loss: 2.593570, ppl: 13.377450 +epoch: 1, batch: 27167, sum loss: 4170.624023, avg loss: 2.536876, ppl: 12.640121 +epoch: 1, batch: 27168, sum loss: 4411.271973, avg loss: 2.516413, ppl: 12.384095 +epoch: 1, batch: 27169, sum loss: 4799.040039, avg loss: 2.714389, ppl: 15.095388 +epoch: 1, batch: 27170, sum loss: 4370.794434, avg loss: 2.514842, ppl: 12.364650 +epoch: 1, batch: 27171, sum loss: 5379.894531, avg loss: 2.872341, ppl: 17.678354 +epoch: 1, batch: 27172, sum loss: 4231.584961, avg loss: 2.668086, ppl: 14.412362 +epoch: 1, batch: 27173, sum loss: 4213.122070, avg loss: 2.521318, ppl: 12.444988 +epoch: 1, batch: 27174, sum loss: 4894.736816, avg loss: 2.739080, ppl: 15.472751 +epoch: 1, batch: 27175, sum loss: 5474.788086, avg loss: 2.793259, ppl: 16.334169 +epoch: 1, batch: 27176, sum loss: 5459.827637, avg loss: 2.894925, ppl: 18.082138 +epoch: 1, batch: 27177, sum loss: 3778.171631, avg loss: 2.428131, ppl: 11.337670 +epoch: 1, batch: 27178, sum loss: 4780.449707, avg loss: 2.871141, ppl: 17.657152 +epoch: 1, batch: 27179, sum loss: 4865.142578, avg loss: 2.713409, ppl: 15.080601 +epoch: 1, batch: 27180, sum loss: 4171.462402, avg loss: 2.651915, ppl: 14.181170 +epoch: 1, batch: 27181, sum loss: 3768.107422, avg loss: 2.384878, ppl: 10.857739 +epoch: 1, batch: 27182, sum loss: 5094.393066, avg loss: 2.943035, ppl: 18.973337 +epoch: 1, batch: 27183, sum loss: 3637.952148, avg loss: 2.583773, ppl: 13.247024 +epoch: 1, batch: 27184, sum loss: 3559.882568, avg loss: 2.411844, ppl: 11.154516 +epoch: 1, batch: 27185, sum loss: 5096.221191, avg loss: 2.997777, ppl: 20.040945 +epoch: 1, batch: 27186, sum loss: 4039.430420, avg loss: 2.436327, ppl: 11.430980 +epoch: 1, batch: 27187, sum loss: 3517.450439, avg loss: 2.370250, ppl: 10.700062 +epoch: 1, batch: 27188, sum loss: 3690.565430, avg loss: 2.235352, ppl: 9.349771 +epoch: 1, batch: 27189, sum loss: 3912.703613, avg loss: 2.514591, ppl: 12.361551 +epoch: 1, batch: 27190, sum loss: 5091.842773, avg loss: 2.679917, ppl: 14.583888 +epoch: 1, batch: 27191, sum loss: 3938.596191, avg loss: 2.608342, ppl: 13.576521 +epoch: 1, batch: 27192, sum loss: 4234.346191, avg loss: 2.634938, ppl: 13.942454 +epoch: 1, batch: 27193, sum loss: 4170.797852, avg loss: 2.462100, ppl: 11.729421 +epoch: 1, batch: 27194, sum loss: 3772.014160, avg loss: 2.351630, ppl: 10.502675 +epoch: 1, batch: 27195, sum loss: 4811.129883, avg loss: 2.693802, ppl: 14.787790 +epoch: 1, batch: 27196, sum loss: 4787.862305, avg loss: 2.831379, ppl: 16.968851 +epoch: 1, batch: 27197, sum loss: 4925.659180, avg loss: 2.727386, ppl: 15.292859 +epoch: 1, batch: 27198, sum loss: 3361.961670, avg loss: 2.488498, ppl: 12.043180 +epoch: 1, batch: 27199, sum loss: 4309.651855, avg loss: 2.753771, ppl: 15.701733 +epoch: 1, batch: 27200, sum loss: 4115.269531, avg loss: 2.643076, ppl: 14.056377 +epoch: 1, batch: 27201, sum loss: 5188.386230, avg loss: 2.636375, ppl: 13.962503 +epoch: 1, batch: 27202, sum loss: 4856.494141, avg loss: 2.722250, ppl: 15.214520 +epoch: 1, batch: 27203, sum loss: 4396.244141, avg loss: 2.422173, ppl: 11.270326 +epoch: 1, batch: 27204, sum loss: 4550.410156, avg loss: 2.521003, ppl: 12.441066 +epoch: 1, batch: 27205, sum loss: 4531.799316, avg loss: 2.773439, ppl: 16.013609 +epoch: 1, batch: 27206, sum loss: 4179.087402, avg loss: 2.592486, ppl: 13.362949 +epoch: 1, batch: 27207, sum loss: 4248.896484, avg loss: 2.687474, ppl: 14.694510 +epoch: 1, batch: 27208, sum loss: 5050.178223, avg loss: 2.660789, ppl: 14.307580 +epoch: 1, batch: 27209, sum loss: 4435.957031, avg loss: 2.523298, ppl: 12.469648 +epoch: 1, batch: 27210, sum loss: 4795.594238, avg loss: 2.646575, ppl: 14.105647 +epoch: 1, batch: 27211, sum loss: 4390.685059, avg loss: 2.452897, ppl: 11.621963 +epoch: 1, batch: 27212, sum loss: 4792.611328, avg loss: 2.778326, ppl: 16.092052 +epoch: 1, batch: 27213, sum loss: 6352.516113, avg loss: 3.206722, ppl: 24.697989 +epoch: 1, batch: 27214, sum loss: 4540.867676, avg loss: 2.556795, ppl: 12.894423 +epoch: 1, batch: 27215, sum loss: 4286.164551, avg loss: 2.886306, ppl: 17.926966 +epoch: 1, batch: 27216, sum loss: 3613.334961, avg loss: 2.539237, ppl: 12.670007 +epoch: 1, batch: 27217, sum loss: 4725.203125, avg loss: 2.768133, ppl: 15.928869 +epoch: 1, batch: 27218, sum loss: 4166.082031, avg loss: 2.771844, ppl: 15.988091 +epoch: 1, batch: 27219, sum loss: 5085.679688, avg loss: 2.823809, ppl: 16.840870 +epoch: 1, batch: 27220, sum loss: 4759.544922, avg loss: 2.664919, ppl: 14.366784 +epoch: 1, batch: 27221, sum loss: 4295.274414, avg loss: 2.345863, ppl: 10.442277 +epoch: 1, batch: 27222, sum loss: 3958.288330, avg loss: 2.685406, ppl: 14.664153 +epoch: 1, batch: 27223, sum loss: 4907.666504, avg loss: 2.659982, ppl: 14.296028 +epoch: 1, batch: 27224, sum loss: 5080.235840, avg loss: 3.013189, ppl: 20.352192 +epoch: 1, batch: 27225, sum loss: 4961.224121, avg loss: 2.718479, ppl: 15.157249 +epoch: 1, batch: 27226, sum loss: 4566.094727, avg loss: 2.568107, ppl: 13.041119 +epoch: 1, batch: 27227, sum loss: 4241.541992, avg loss: 2.639416, ppl: 14.005029 +epoch: 1, batch: 27228, sum loss: 4802.421875, avg loss: 2.607178, ppl: 13.560731 +epoch: 1, batch: 27229, sum loss: 5269.183594, avg loss: 2.688359, ppl: 14.707518 +epoch: 1, batch: 27230, sum loss: 4703.222656, avg loss: 2.687556, ppl: 14.695713 +epoch: 1, batch: 27231, sum loss: 4582.863281, avg loss: 2.615790, ppl: 13.678013 +epoch: 1, batch: 27232, sum loss: 5019.477539, avg loss: 2.847123, ppl: 17.238115 +epoch: 1, batch: 27233, sum loss: 4687.061523, avg loss: 2.643577, ppl: 14.063416 +epoch: 1, batch: 27234, sum loss: 4162.912598, avg loss: 2.549242, ppl: 12.797403 +epoch: 1, batch: 27235, sum loss: 3848.259766, avg loss: 2.670548, ppl: 14.447887 +epoch: 1, batch: 27236, sum loss: 5331.171875, avg loss: 2.863143, ppl: 17.516495 +epoch: 1, batch: 27237, sum loss: 4829.205078, avg loss: 2.656328, ppl: 14.243896 +epoch: 1, batch: 27238, sum loss: 4358.473633, avg loss: 2.722345, ppl: 15.215957 +epoch: 1, batch: 27239, sum loss: 4497.468750, avg loss: 2.479310, ppl: 11.933031 +epoch: 1, batch: 27240, sum loss: 4272.193848, avg loss: 2.483834, ppl: 11.987130 +epoch: 1, batch: 27241, sum loss: 4166.140625, avg loss: 2.565357, ppl: 13.005306 +epoch: 1, batch: 27242, sum loss: 5139.105957, avg loss: 3.005325, ppl: 20.192780 +epoch: 1, batch: 27243, sum loss: 4581.662598, avg loss: 2.783513, ppl: 16.175751 +epoch: 1, batch: 27244, sum loss: 5517.114258, avg loss: 2.877994, ppl: 17.778574 +epoch: 1, batch: 27245, sum loss: 4481.993164, avg loss: 2.701623, ppl: 14.903908 +epoch: 1, batch: 27246, sum loss: 4075.178955, avg loss: 2.515543, ppl: 12.373322 +epoch: 1, batch: 27247, sum loss: 4782.672852, avg loss: 2.874202, ppl: 17.711294 +epoch: 1, batch: 27248, sum loss: 3702.082275, avg loss: 2.361022, ppl: 10.601778 +epoch: 1, batch: 27249, sum loss: 4696.228516, avg loss: 2.645762, ppl: 14.094187 +epoch: 1, batch: 27250, sum loss: 4899.431641, avg loss: 2.955025, ppl: 19.202206 +epoch: 1, batch: 27251, sum loss: 5278.525391, avg loss: 2.792871, ppl: 16.327822 +epoch: 1, batch: 27252, sum loss: 3696.952393, avg loss: 2.377461, ppl: 10.777509 +epoch: 1, batch: 27253, sum loss: 4117.929688, avg loss: 2.709164, ppl: 15.016722 +epoch: 1, batch: 27254, sum loss: 3776.137207, avg loss: 2.565311, ppl: 13.004699 +epoch: 1, batch: 27255, sum loss: 4679.889160, avg loss: 2.810744, ppl: 16.622286 +epoch: 1, batch: 27256, sum loss: 3981.928711, avg loss: 2.595781, ppl: 13.407059 +epoch: 1, batch: 27257, sum loss: 5214.506348, avg loss: 2.884130, ppl: 17.887989 +epoch: 1, batch: 27258, sum loss: 4687.270508, avg loss: 2.889809, ppl: 17.989876 +epoch: 1, batch: 27259, sum loss: 4938.254883, avg loss: 2.810617, ppl: 16.620176 +epoch: 1, batch: 27260, sum loss: 4248.245605, avg loss: 2.458476, ppl: 11.686982 +epoch: 1, batch: 27261, sum loss: 4264.305664, avg loss: 2.598602, ppl: 13.444926 +epoch: 1, batch: 27262, sum loss: 3555.394531, avg loss: 2.457080, ppl: 11.670682 +epoch: 1, batch: 27263, sum loss: 4238.824219, avg loss: 2.586226, ppl: 13.279556 +epoch: 1, batch: 27264, sum loss: 3996.263184, avg loss: 2.583234, ppl: 13.239891 +epoch: 1, batch: 27265, sum loss: 3795.965088, avg loss: 2.449010, ppl: 11.576879 +epoch: 1, batch: 27266, sum loss: 3982.500488, avg loss: 2.530178, ppl: 12.555741 +epoch: 1, batch: 27267, sum loss: 5427.576172, avg loss: 2.855116, ppl: 17.376459 +epoch: 1, batch: 27268, sum loss: 4733.647461, avg loss: 2.695699, ppl: 14.815875 +epoch: 1, batch: 27269, sum loss: 5036.153320, avg loss: 2.921203, ppl: 18.563604 +epoch: 1, batch: 27270, sum loss: 4585.974121, avg loss: 2.598286, ppl: 13.440677 +epoch: 1, batch: 27271, sum loss: 4232.096680, avg loss: 2.495340, ppl: 12.125857 +epoch: 1, batch: 27272, sum loss: 3848.381348, avg loss: 2.694945, ppl: 14.804706 +epoch: 1, batch: 27273, sum loss: 4418.816895, avg loss: 2.472757, ppl: 11.855087 +epoch: 1, batch: 27274, sum loss: 5342.655273, avg loss: 2.887922, ppl: 17.955956 +epoch: 1, batch: 27275, sum loss: 4216.922852, avg loss: 2.470371, ppl: 11.826831 +epoch: 1, batch: 27276, sum loss: 4369.714844, avg loss: 2.664460, ppl: 14.360195 +epoch: 1, batch: 27277, sum loss: 4308.060059, avg loss: 2.576591, ppl: 13.152225 +epoch: 1, batch: 27278, sum loss: 4249.298828, avg loss: 2.552131, ppl: 12.834430 +epoch: 1, batch: 27279, sum loss: 5099.615234, avg loss: 2.494919, ppl: 12.120755 +epoch: 1, batch: 27280, sum loss: 5030.423828, avg loss: 2.762451, ppl: 15.838622 +epoch: 1, batch: 27281, sum loss: 3930.723633, avg loss: 2.547456, ppl: 12.774558 +epoch: 1, batch: 27282, sum loss: 4268.441895, avg loss: 2.458780, ppl: 11.690538 +epoch: 1, batch: 27283, sum loss: 4286.062988, avg loss: 2.758084, ppl: 15.769604 +epoch: 1, batch: 27284, sum loss: 5764.228027, avg loss: 3.008470, ppl: 20.256380 +epoch: 1, batch: 27285, sum loss: 5231.807617, avg loss: 2.869889, ppl: 17.635061 +epoch: 1, batch: 27286, sum loss: 5265.076172, avg loss: 2.669917, ppl: 14.438768 +epoch: 1, batch: 27287, sum loss: 4969.771973, avg loss: 2.748768, ppl: 15.623366 +epoch: 1, batch: 27288, sum loss: 5306.687012, avg loss: 2.682855, ppl: 14.626788 +epoch: 1, batch: 27289, sum loss: 4840.352539, avg loss: 2.558326, ppl: 12.914178 +epoch: 1, batch: 27290, sum loss: 3867.874512, avg loss: 2.588939, ppl: 13.315633 +epoch: 1, batch: 27291, sum loss: 3981.520020, avg loss: 2.477611, ppl: 11.912766 +epoch: 1, batch: 27292, sum loss: 4791.566406, avg loss: 2.708630, ppl: 15.008700 +epoch: 1, batch: 27293, sum loss: 4900.376465, avg loss: 2.725460, ppl: 15.263427 +epoch: 1, batch: 27294, sum loss: 4963.647461, avg loss: 2.783874, ppl: 16.181591 +epoch: 1, batch: 27295, sum loss: 4467.875977, avg loss: 2.505820, ppl: 12.253597 +epoch: 1, batch: 27296, sum loss: 4439.611816, avg loss: 2.518214, ppl: 12.406422 +epoch: 1, batch: 27297, sum loss: 4757.777344, avg loss: 2.649096, ppl: 14.141256 +epoch: 1, batch: 27298, sum loss: 4945.998535, avg loss: 2.758505, ppl: 15.776234 +epoch: 1, batch: 27299, sum loss: 3546.705322, avg loss: 2.356615, ppl: 10.555160 +epoch: 1, batch: 27300, sum loss: 4577.519043, avg loss: 2.573085, ppl: 13.106198 +epoch: 1, batch: 27301, sum loss: 3917.812744, avg loss: 2.439485, ppl: 11.467132 +epoch: 1, batch: 27302, sum loss: 4369.012695, avg loss: 2.761702, ppl: 15.826755 +epoch: 1, batch: 27303, sum loss: 4161.597168, avg loss: 2.637260, ppl: 13.974866 +epoch: 1, batch: 27304, sum loss: 4991.779297, avg loss: 2.756366, ppl: 15.742538 +epoch: 1, batch: 27305, sum loss: 4491.382324, avg loss: 2.560651, ppl: 12.944245 +epoch: 1, batch: 27306, sum loss: 3686.137207, avg loss: 2.538662, ppl: 12.662716 +epoch: 1, batch: 27307, sum loss: 5412.747559, avg loss: 2.930562, ppl: 18.738159 +epoch: 1, batch: 27308, sum loss: 5073.809570, avg loss: 2.684555, ppl: 14.651684 +epoch: 1, batch: 27309, sum loss: 4582.122070, avg loss: 2.733963, ppl: 15.393776 +epoch: 1, batch: 27310, sum loss: 4995.286133, avg loss: 2.849564, ppl: 17.280251 +epoch: 1, batch: 27311, sum loss: 4952.375000, avg loss: 2.953116, ppl: 19.165575 +epoch: 1, batch: 27312, sum loss: 4908.017578, avg loss: 2.729709, ppl: 15.328432 +epoch: 1, batch: 27313, sum loss: 4773.062500, avg loss: 2.919304, ppl: 18.528389 +epoch: 1, batch: 27314, sum loss: 4030.301758, avg loss: 2.655008, ppl: 14.225098 +epoch: 1, batch: 27315, sum loss: 5033.204102, avg loss: 2.827643, ppl: 16.905561 +epoch: 1, batch: 27316, sum loss: 5338.848633, avg loss: 2.836795, ppl: 17.060999 +epoch: 1, batch: 27317, sum loss: 4289.038574, avg loss: 2.543914, ppl: 12.729394 +epoch: 1, batch: 27318, sum loss: 4643.041992, avg loss: 2.644101, ppl: 14.070795 +epoch: 1, batch: 27319, sum loss: 4411.240234, avg loss: 2.755303, ppl: 15.725807 +epoch: 1, batch: 27320, sum loss: 4150.268066, avg loss: 2.743072, ppl: 15.534638 +epoch: 1, batch: 27321, sum loss: 3629.668945, avg loss: 2.472527, ppl: 11.852355 +epoch: 1, batch: 27322, sum loss: 4638.547852, avg loss: 2.711016, ppl: 15.044552 +epoch: 1, batch: 27323, sum loss: 4378.739746, avg loss: 2.745291, ppl: 15.569151 +epoch: 1, batch: 27324, sum loss: 4804.952148, avg loss: 2.593066, ppl: 13.370709 +epoch: 1, batch: 27325, sum loss: 4201.432617, avg loss: 2.639091, ppl: 14.000468 +epoch: 1, batch: 27326, sum loss: 4216.786133, avg loss: 2.514482, ppl: 12.360202 +epoch: 1, batch: 27327, sum loss: 4671.696777, avg loss: 2.703528, ppl: 14.932323 +epoch: 1, batch: 27328, sum loss: 4216.450195, avg loss: 2.512783, ppl: 12.339226 +epoch: 1, batch: 27329, sum loss: 4244.695801, avg loss: 2.552433, ppl: 12.838299 +epoch: 1, batch: 27330, sum loss: 4455.566406, avg loss: 2.652123, ppl: 14.184119 +epoch: 1, batch: 27331, sum loss: 4294.754883, avg loss: 2.595018, ppl: 13.396831 +epoch: 1, batch: 27332, sum loss: 5052.998535, avg loss: 2.744703, ppl: 15.559997 +epoch: 1, batch: 27333, sum loss: 4344.383301, avg loss: 2.320718, ppl: 10.182979 +epoch: 1, batch: 27334, sum loss: 4904.631836, avg loss: 2.766290, ppl: 15.899536 +epoch: 1, batch: 27335, sum loss: 4518.608887, avg loss: 2.531434, ppl: 12.571515 +epoch: 1, batch: 27336, sum loss: 4288.526855, avg loss: 2.897653, ppl: 18.131546 +epoch: 1, batch: 27337, sum loss: 4235.231934, avg loss: 2.441056, ppl: 11.485163 +epoch: 1, batch: 27338, sum loss: 5321.128906, avg loss: 2.695607, ppl: 14.814511 +epoch: 1, batch: 27339, sum loss: 4821.326172, avg loss: 2.654915, ppl: 14.223779 +epoch: 1, batch: 27340, sum loss: 5327.696289, avg loss: 2.732152, ppl: 15.365915 +epoch: 1, batch: 27341, sum loss: 3900.119385, avg loss: 2.535838, ppl: 12.627012 +epoch: 1, batch: 27342, sum loss: 4316.062012, avg loss: 2.747334, ppl: 15.600988 +epoch: 1, batch: 27343, sum loss: 4592.380859, avg loss: 2.731934, ppl: 15.362567 +epoch: 1, batch: 27344, sum loss: 3480.275146, avg loss: 2.307875, ppl: 10.053036 +epoch: 1, batch: 27345, sum loss: 4967.970703, avg loss: 2.775403, ppl: 16.045084 +epoch: 1, batch: 27346, sum loss: 4356.971680, avg loss: 2.591893, ppl: 13.355025 +epoch: 1, batch: 27347, sum loss: 4388.621094, avg loss: 2.775851, ppl: 16.052286 +epoch: 1, batch: 27348, sum loss: 4853.558105, avg loss: 2.839999, ppl: 17.115747 +epoch: 1, batch: 27349, sum loss: 3864.897461, avg loss: 2.391644, ppl: 10.931456 +epoch: 1, batch: 27350, sum loss: 3826.692871, avg loss: 2.585603, ppl: 13.271293 +epoch: 1, batch: 27351, sum loss: 4172.288086, avg loss: 2.580265, ppl: 13.200633 +epoch: 1, batch: 27352, sum loss: 4138.876953, avg loss: 2.456307, ppl: 11.661662 +epoch: 1, batch: 27353, sum loss: 4919.792480, avg loss: 2.733218, ppl: 15.382311 +epoch: 1, batch: 27354, sum loss: 4697.079590, avg loss: 2.737226, ppl: 15.444084 +epoch: 1, batch: 27355, sum loss: 3303.857666, avg loss: 2.275384, ppl: 9.731657 +epoch: 1, batch: 27356, sum loss: 4766.723145, avg loss: 2.758520, ppl: 15.776483 +epoch: 1, batch: 27357, sum loss: 5188.608887, avg loss: 2.858738, ppl: 17.439499 +epoch: 1, batch: 27358, sum loss: 3975.147461, avg loss: 2.423870, ppl: 11.289469 +epoch: 1, batch: 27359, sum loss: 4763.688965, avg loss: 2.719001, ppl: 15.165162 +epoch: 1, batch: 27360, sum loss: 5055.995605, avg loss: 2.866211, ppl: 17.570312 +epoch: 1, batch: 27361, sum loss: 4933.549805, avg loss: 2.833745, ppl: 17.009041 +epoch: 1, batch: 27362, sum loss: 4605.691406, avg loss: 2.764521, ppl: 15.871430 +epoch: 1, batch: 27363, sum loss: 4676.969727, avg loss: 2.738273, ppl: 15.460257 +epoch: 1, batch: 27364, sum loss: 4355.546387, avg loss: 2.411709, ppl: 11.153007 +epoch: 1, batch: 27365, sum loss: 4979.138184, avg loss: 2.992271, ppl: 19.930889 +epoch: 1, batch: 27366, sum loss: 4586.566895, avg loss: 2.439663, ppl: 11.469177 +epoch: 1, batch: 27367, sum loss: 4831.764648, avg loss: 2.801023, ppl: 16.461483 +epoch: 1, batch: 27368, sum loss: 4420.985352, avg loss: 2.639395, ppl: 14.004722 +epoch: 1, batch: 27369, sum loss: 4425.863770, avg loss: 2.586712, ppl: 13.286014 +epoch: 1, batch: 27370, sum loss: 5172.174805, avg loss: 2.671578, ppl: 14.462769 +epoch: 1, batch: 27371, sum loss: 4700.333984, avg loss: 2.819636, ppl: 16.770750 +epoch: 1, batch: 27372, sum loss: 4056.635498, avg loss: 2.805419, ppl: 16.533998 +epoch: 1, batch: 27373, sum loss: 3738.547363, avg loss: 2.464435, ppl: 11.756833 +epoch: 1, batch: 27374, sum loss: 3474.223877, avg loss: 2.391069, ppl: 10.925171 +epoch: 1, batch: 27375, sum loss: 4259.165527, avg loss: 2.437988, ppl: 11.449984 +epoch: 1, batch: 27376, sum loss: 5736.395508, avg loss: 2.781957, ppl: 16.150599 +epoch: 1, batch: 27377, sum loss: 3887.204834, avg loss: 2.471205, ppl: 11.836696 +epoch: 1, batch: 27378, sum loss: 4739.398926, avg loss: 2.604065, ppl: 13.518585 +epoch: 1, batch: 27379, sum loss: 4500.568359, avg loss: 2.631911, ppl: 13.900311 +epoch: 1, batch: 27380, sum loss: 5250.315918, avg loss: 2.797185, ppl: 16.398418 +epoch: 1, batch: 27381, sum loss: 3931.014893, avg loss: 2.624175, ppl: 13.793195 +epoch: 1, batch: 27382, sum loss: 5088.908691, avg loss: 2.671343, ppl: 14.459376 +epoch: 1, batch: 27383, sum loss: 4563.713867, avg loss: 2.716496, ppl: 15.127231 +epoch: 1, batch: 27384, sum loss: 3841.395508, avg loss: 2.604336, ppl: 13.522243 +epoch: 1, batch: 27385, sum loss: 5167.037598, avg loss: 2.899572, ppl: 18.166376 +epoch: 1, batch: 27386, sum loss: 3790.244629, avg loss: 2.538677, ppl: 12.662907 +epoch: 1, batch: 27387, sum loss: 4151.900879, avg loss: 2.480228, ppl: 11.943984 +epoch: 1, batch: 27388, sum loss: 5017.034668, avg loss: 2.682906, ppl: 14.627545 +epoch: 1, batch: 27389, sum loss: 4336.750000, avg loss: 2.696984, ppl: 14.834919 +epoch: 1, batch: 27390, sum loss: 4185.054688, avg loss: 2.573834, ppl: 13.116020 +epoch: 1, batch: 27391, sum loss: 4118.135742, avg loss: 2.570621, ppl: 13.073947 +epoch: 1, batch: 27392, sum loss: 5512.136719, avg loss: 2.864936, ppl: 17.547928 +epoch: 1, batch: 27393, sum loss: 4333.154297, avg loss: 2.772332, ppl: 15.995889 +epoch: 1, batch: 27394, sum loss: 4315.851562, avg loss: 2.537244, ppl: 12.644769 +epoch: 1, batch: 27395, sum loss: 4139.907715, avg loss: 2.623516, ppl: 13.784098 +epoch: 1, batch: 27396, sum loss: 3251.115967, avg loss: 2.267166, ppl: 9.652007 +epoch: 1, batch: 27397, sum loss: 5015.548828, avg loss: 2.702343, ppl: 14.914639 +epoch: 1, batch: 27398, sum loss: 4288.085938, avg loss: 2.491625, ppl: 12.080887 +epoch: 1, batch: 27399, sum loss: 3991.702881, avg loss: 2.483947, ppl: 11.988490 +epoch: 1, batch: 27400, sum loss: 4456.885742, avg loss: 2.586701, ppl: 13.285868 +epoch: 1, batch: 27401, sum loss: 4493.074219, avg loss: 2.606192, ppl: 13.547359 +epoch: 1, batch: 27402, sum loss: 4472.710938, avg loss: 2.702544, ppl: 14.917641 +epoch: 1, batch: 27403, sum loss: 5857.976562, avg loss: 3.002551, ppl: 20.136837 +epoch: 1, batch: 27404, sum loss: 4123.196289, avg loss: 2.486849, ppl: 12.023335 +epoch: 1, batch: 27405, sum loss: 3307.369385, avg loss: 2.245329, ppl: 9.443521 +epoch: 1, batch: 27406, sum loss: 4540.267090, avg loss: 2.613856, ppl: 13.651585 +epoch: 1, batch: 27407, sum loss: 4751.878906, avg loss: 2.790299, ppl: 16.285883 +epoch: 1, batch: 27408, sum loss: 4176.127441, avg loss: 2.474009, ppl: 11.869942 +epoch: 1, batch: 27409, sum loss: 4950.880371, avg loss: 2.868413, ppl: 17.609045 +epoch: 1, batch: 27410, sum loss: 3489.517822, avg loss: 2.346683, ppl: 10.450847 +epoch: 1, batch: 27411, sum loss: 3854.347900, avg loss: 2.419553, ppl: 11.240834 +epoch: 1, batch: 27412, sum loss: 5173.689453, avg loss: 2.922988, ppl: 18.596779 +epoch: 1, batch: 27413, sum loss: 4113.391602, avg loss: 2.596838, ppl: 13.421236 +epoch: 1, batch: 27414, sum loss: 4517.497070, avg loss: 2.555146, ppl: 12.873173 +epoch: 1, batch: 27415, sum loss: 3793.304688, avg loss: 2.522144, ppl: 12.455273 +epoch: 1, batch: 27416, sum loss: 4693.413086, avg loss: 2.697364, ppl: 14.840558 +epoch: 1, batch: 27417, sum loss: 4668.914551, avg loss: 2.683284, ppl: 14.633074 +epoch: 1, batch: 27418, sum loss: 4597.298828, avg loss: 2.637578, ppl: 13.979308 +epoch: 1, batch: 27419, sum loss: 4624.584473, avg loss: 2.816434, ppl: 16.717133 +epoch: 1, batch: 27420, sum loss: 4845.074219, avg loss: 2.774957, ppl: 16.037933 +epoch: 1, batch: 27421, sum loss: 4522.904785, avg loss: 2.895586, ppl: 18.094097 +epoch: 1, batch: 27422, sum loss: 4135.289062, avg loss: 2.373874, ppl: 10.738916 +epoch: 1, batch: 27423, sum loss: 5005.973633, avg loss: 2.802897, ppl: 16.492355 +epoch: 1, batch: 27424, sum loss: 5094.263184, avg loss: 2.627263, ppl: 13.835851 +epoch: 1, batch: 27425, sum loss: 4452.126465, avg loss: 2.557224, ppl: 12.899955 +epoch: 1, batch: 27426, sum loss: 4262.358887, avg loss: 2.727037, ppl: 15.287522 +epoch: 1, batch: 27427, sum loss: 4392.766113, avg loss: 2.810471, ppl: 16.617744 +epoch: 1, batch: 27428, sum loss: 3247.824707, avg loss: 2.329860, ppl: 10.276503 +epoch: 1, batch: 27429, sum loss: 5145.537598, avg loss: 2.813307, ppl: 16.664930 +epoch: 1, batch: 27430, sum loss: 5063.749512, avg loss: 2.548440, ppl: 12.787134 +epoch: 1, batch: 27431, sum loss: 4013.207031, avg loss: 2.444097, ppl: 11.520143 +epoch: 1, batch: 27432, sum loss: 3756.680176, avg loss: 2.616072, ppl: 13.681881 +epoch: 1, batch: 27433, sum loss: 4756.439453, avg loss: 2.623519, ppl: 13.784144 +epoch: 1, batch: 27434, sum loss: 4056.570801, avg loss: 2.580516, ppl: 13.203948 +epoch: 1, batch: 27435, sum loss: 3907.269531, avg loss: 2.509486, ppl: 12.298607 +epoch: 1, batch: 27436, sum loss: 5078.347656, avg loss: 2.616356, ppl: 13.685767 +epoch: 1, batch: 27437, sum loss: 5173.368164, avg loss: 2.886924, ppl: 17.938051 +epoch: 1, batch: 27438, sum loss: 6094.563477, avg loss: 3.082733, ppl: 21.817953 +epoch: 1, batch: 27439, sum loss: 4786.637207, avg loss: 2.602848, ppl: 13.502138 +epoch: 1, batch: 27440, sum loss: 4397.185547, avg loss: 2.543196, ppl: 12.720260 +epoch: 1, batch: 27441, sum loss: 3965.623047, avg loss: 2.638472, ppl: 13.991802 +epoch: 1, batch: 27442, sum loss: 5266.685547, avg loss: 2.989038, ppl: 19.866571 +epoch: 1, batch: 27443, sum loss: 4382.964355, avg loss: 2.637163, ppl: 13.973500 +epoch: 1, batch: 27444, sum loss: 3679.230225, avg loss: 2.526944, ppl: 12.515201 +epoch: 1, batch: 27445, sum loss: 4644.215820, avg loss: 2.709578, ppl: 15.022928 +epoch: 1, batch: 27446, sum loss: 4807.413086, avg loss: 2.922439, ppl: 18.586571 +epoch: 1, batch: 27447, sum loss: 4051.043457, avg loss: 2.369031, ppl: 10.687034 +epoch: 1, batch: 27448, sum loss: 3865.816162, avg loss: 2.299712, ppl: 9.971313 +epoch: 1, batch: 27449, sum loss: 3914.956299, avg loss: 2.410687, ppl: 11.141618 +epoch: 1, batch: 27450, sum loss: 4510.078613, avg loss: 2.765223, ppl: 15.882582 +epoch: 1, batch: 27451, sum loss: 5031.058594, avg loss: 2.762800, ppl: 15.844144 +epoch: 1, batch: 27452, sum loss: 4876.925293, avg loss: 2.809289, ppl: 16.598108 +epoch: 1, batch: 27453, sum loss: 5014.829102, avg loss: 2.700500, ppl: 14.887177 +epoch: 1, batch: 27454, sum loss: 4671.592773, avg loss: 2.684824, ppl: 14.655615 +epoch: 1, batch: 27455, sum loss: 4149.571777, avg loss: 2.738991, ppl: 15.471371 +epoch: 1, batch: 27456, sum loss: 3902.113525, avg loss: 2.408712, ppl: 11.119632 +epoch: 1, batch: 27457, sum loss: 4213.114746, avg loss: 2.704182, ppl: 14.942084 +epoch: 1, batch: 27458, sum loss: 4807.685059, avg loss: 2.726991, ppl: 15.286819 +epoch: 1, batch: 27459, sum loss: 3827.979736, avg loss: 2.393984, ppl: 10.957056 +epoch: 1, batch: 27460, sum loss: 4765.695801, avg loss: 2.845192, ppl: 17.204857 +epoch: 1, batch: 27461, sum loss: 5158.734863, avg loss: 2.761635, ppl: 15.825702 +epoch: 1, batch: 27462, sum loss: 5213.742188, avg loss: 2.752768, ppl: 15.685987 +epoch: 1, batch: 27463, sum loss: 4622.979492, avg loss: 2.701917, ppl: 14.908282 +epoch: 1, batch: 27464, sum loss: 3466.498291, avg loss: 2.366210, ppl: 10.656931 +epoch: 1, batch: 27465, sum loss: 4592.925781, avg loss: 2.696962, ppl: 14.834590 +epoch: 1, batch: 27466, sum loss: 4465.401367, avg loss: 2.628253, ppl: 13.849550 +epoch: 1, batch: 27467, sum loss: 4728.949219, avg loss: 2.633045, ppl: 13.916082 +epoch: 1, batch: 27468, sum loss: 4088.014160, avg loss: 2.386465, ppl: 10.874981 +epoch: 1, batch: 27469, sum loss: 4999.222168, avg loss: 2.646491, ppl: 14.104464 +epoch: 1, batch: 27470, sum loss: 4767.051270, avg loss: 2.839221, ppl: 17.102430 +epoch: 1, batch: 27471, sum loss: 4580.840332, avg loss: 2.622118, ppl: 13.764847 +epoch: 1, batch: 27472, sum loss: 4565.143066, avg loss: 2.632724, ppl: 13.911611 +epoch: 1, batch: 27473, sum loss: 5147.470703, avg loss: 2.686571, ppl: 14.681252 +epoch: 1, batch: 27474, sum loss: 3981.458252, avg loss: 2.417400, ppl: 11.216660 +epoch: 1, batch: 27475, sum loss: 4204.584961, avg loss: 2.499753, ppl: 12.179488 +epoch: 1, batch: 27476, sum loss: 3655.428223, avg loss: 2.196772, ppl: 8.995927 +epoch: 1, batch: 27477, sum loss: 4504.599609, avg loss: 2.534946, ppl: 12.615752 +epoch: 1, batch: 27478, sum loss: 4298.255371, avg loss: 2.350058, ppl: 10.486176 +epoch: 1, batch: 27479, sum loss: 3595.957031, avg loss: 2.347230, ppl: 10.456570 +epoch: 1, batch: 27480, sum loss: 4523.495117, avg loss: 2.537014, ppl: 12.641863 +epoch: 1, batch: 27481, sum loss: 4937.654785, avg loss: 2.732515, ppl: 15.371495 +epoch: 1, batch: 27482, sum loss: 4051.976562, avg loss: 2.501220, ppl: 12.197369 +epoch: 1, batch: 27483, sum loss: 4540.109863, avg loss: 2.771740, ppl: 15.986422 +epoch: 1, batch: 27484, sum loss: 5323.833496, avg loss: 2.942971, ppl: 18.972120 +epoch: 1, batch: 27485, sum loss: 3694.090088, avg loss: 2.392545, ppl: 10.941309 +epoch: 1, batch: 27486, sum loss: 4495.364746, avg loss: 2.814881, ppl: 16.691187 +epoch: 1, batch: 27487, sum loss: 4261.756348, avg loss: 2.640493, ppl: 14.020120 +epoch: 1, batch: 27488, sum loss: 3793.156738, avg loss: 2.376665, ppl: 10.768925 +epoch: 1, batch: 27489, sum loss: 5464.473633, avg loss: 3.009071, ppl: 20.268568 +epoch: 1, batch: 27490, sum loss: 4251.217285, avg loss: 2.739187, ppl: 15.474392 +epoch: 1, batch: 27491, sum loss: 5236.090820, avg loss: 2.812079, ppl: 16.644485 +epoch: 1, batch: 27492, sum loss: 4726.958008, avg loss: 2.952504, ppl: 19.153849 +epoch: 1, batch: 27493, sum loss: 4355.204102, avg loss: 2.500117, ppl: 12.183920 +epoch: 1, batch: 27494, sum loss: 4486.243652, avg loss: 2.660880, ppl: 14.308877 +epoch: 1, batch: 27495, sum loss: 4401.699219, avg loss: 2.739079, ppl: 15.472721 +epoch: 1, batch: 27496, sum loss: 3862.911377, avg loss: 2.558219, ppl: 12.912805 +epoch: 1, batch: 27497, sum loss: 3739.372559, avg loss: 2.348852, ppl: 10.473541 +epoch: 1, batch: 27498, sum loss: 5152.602051, avg loss: 2.732027, ppl: 15.363992 +epoch: 1, batch: 27499, sum loss: 3677.405762, avg loss: 2.277031, ppl: 9.747701 +epoch: 1, batch: 27500, sum loss: 4535.959961, avg loss: 2.801705, ppl: 16.472708 +epoch: 1, batch: 27501, sum loss: 4280.967773, avg loss: 2.596099, ppl: 13.411324 +epoch: 1, batch: 27502, sum loss: 4593.961914, avg loss: 2.737760, ppl: 15.452338 +epoch: 1, batch: 27503, sum loss: 5039.838379, avg loss: 2.703776, ppl: 14.936022 +epoch: 1, batch: 27504, sum loss: 5202.787109, avg loss: 2.871295, ppl: 17.659876 +epoch: 1, batch: 27505, sum loss: 4740.592285, avg loss: 2.883572, ppl: 17.878017 +epoch: 1, batch: 27506, sum loss: 3762.020020, avg loss: 2.416198, ppl: 11.203181 +epoch: 1, batch: 27507, sum loss: 4139.529297, avg loss: 2.440760, ppl: 11.481765 +epoch: 1, batch: 27508, sum loss: 4154.344727, avg loss: 2.619385, ppl: 13.727279 +epoch: 1, batch: 27509, sum loss: 4043.081787, avg loss: 2.803802, ppl: 16.507280 +epoch: 1, batch: 27510, sum loss: 4533.007812, avg loss: 2.649333, ppl: 14.144594 +epoch: 1, batch: 27511, sum loss: 4163.564941, avg loss: 2.534123, ppl: 12.605376 +epoch: 1, batch: 27512, sum loss: 3672.730225, avg loss: 2.383342, ppl: 10.841076 +epoch: 1, batch: 27513, sum loss: 5121.025391, avg loss: 2.760661, ppl: 15.810285 +epoch: 1, batch: 27514, sum loss: 4944.450195, avg loss: 2.627232, ppl: 13.835418 +epoch: 1, batch: 27515, sum loss: 4801.450195, avg loss: 2.788299, ppl: 16.253344 +epoch: 1, batch: 27516, sum loss: 4655.067871, avg loss: 2.814430, ppl: 16.683672 +epoch: 1, batch: 27517, sum loss: 4355.985352, avg loss: 2.675667, ppl: 14.522027 +epoch: 1, batch: 27518, sum loss: 4370.993652, avg loss: 2.764702, ppl: 15.874307 +epoch: 1, batch: 27519, sum loss: 4649.038086, avg loss: 2.679561, ppl: 14.578691 +epoch: 1, batch: 27520, sum loss: 3784.541992, avg loss: 2.682170, ppl: 14.616779 +epoch: 1, batch: 27521, sum loss: 4712.351074, avg loss: 2.698941, ppl: 14.863986 +epoch: 1, batch: 27522, sum loss: 4064.738281, avg loss: 2.559659, ppl: 12.931407 +epoch: 1, batch: 27523, sum loss: 4577.768066, avg loss: 2.548869, ppl: 12.792623 +epoch: 1, batch: 27524, sum loss: 4438.802734, avg loss: 2.703290, ppl: 14.928774 +epoch: 1, batch: 27525, sum loss: 4527.406250, avg loss: 2.714272, ppl: 15.093625 +epoch: 1, batch: 27526, sum loss: 5434.766602, avg loss: 2.907847, ppl: 18.317322 +epoch: 1, batch: 27527, sum loss: 3342.049805, avg loss: 2.253574, ppl: 9.521705 +epoch: 1, batch: 27528, sum loss: 4584.410156, avg loss: 2.773388, ppl: 16.012793 +epoch: 1, batch: 27529, sum loss: 4929.108398, avg loss: 2.894368, ppl: 18.072071 +epoch: 1, batch: 27530, sum loss: 4404.115723, avg loss: 2.706893, ppl: 14.982658 +epoch: 1, batch: 27531, sum loss: 4939.914062, avg loss: 2.711259, ppl: 15.048211 +epoch: 1, batch: 27532, sum loss: 4822.658691, avg loss: 2.752659, ppl: 15.684286 +epoch: 1, batch: 27533, sum loss: 3911.487305, avg loss: 2.549861, ppl: 12.805326 +epoch: 1, batch: 27534, sum loss: 3950.905029, avg loss: 2.429831, ppl: 11.356960 +epoch: 1, batch: 27535, sum loss: 5265.287109, avg loss: 2.797708, ppl: 16.407003 +epoch: 1, batch: 27536, sum loss: 4659.005371, avg loss: 2.699308, ppl: 14.869433 +epoch: 1, batch: 27537, sum loss: 4679.558594, avg loss: 2.521314, ppl: 12.444938 +epoch: 1, batch: 27538, sum loss: 4111.651367, avg loss: 2.476898, ppl: 11.904285 +epoch: 1, batch: 27539, sum loss: 4131.785156, avg loss: 2.655389, ppl: 14.230518 +epoch: 1, batch: 27540, sum loss: 4133.265625, avg loss: 2.532638, ppl: 12.586670 +epoch: 1, batch: 27541, sum loss: 3753.776611, avg loss: 2.197761, ppl: 9.004831 +epoch: 1, batch: 27542, sum loss: 5274.432617, avg loss: 2.755712, ppl: 15.732235 +epoch: 1, batch: 27543, sum loss: 4159.838379, avg loss: 2.764012, ppl: 15.863364 +epoch: 1, batch: 27544, sum loss: 4036.597656, avg loss: 2.540338, ppl: 12.683961 +epoch: 1, batch: 27545, sum loss: 4425.096191, avg loss: 2.745097, ppl: 15.566123 +epoch: 1, batch: 27546, sum loss: 4555.548828, avg loss: 2.845440, ppl: 17.209124 +epoch: 1, batch: 27547, sum loss: 4900.825195, avg loss: 2.805280, ppl: 16.531712 +epoch: 1, batch: 27548, sum loss: 4929.264160, avg loss: 2.665908, ppl: 14.381003 +epoch: 1, batch: 27549, sum loss: 4865.668945, avg loss: 2.634363, ppl: 13.934439 +epoch: 1, batch: 27550, sum loss: 3758.446777, avg loss: 2.456501, ppl: 11.663931 +epoch: 1, batch: 27551, sum loss: 5081.488281, avg loss: 2.885570, ppl: 17.913769 +epoch: 1, batch: 27552, sum loss: 3213.777344, avg loss: 2.337293, ppl: 10.353169 +epoch: 1, batch: 27553, sum loss: 4270.477051, avg loss: 2.600778, ppl: 13.474216 +epoch: 1, batch: 27554, sum loss: 4100.898438, avg loss: 2.664651, ppl: 14.362942 +epoch: 1, batch: 27555, sum loss: 4755.258301, avg loss: 2.888978, ppl: 17.974939 +epoch: 1, batch: 27556, sum loss: 4918.984863, avg loss: 2.820519, ppl: 16.785555 +epoch: 1, batch: 27557, sum loss: 4133.670898, avg loss: 2.617904, ppl: 13.706966 +epoch: 1, batch: 27558, sum loss: 4264.790039, avg loss: 2.787445, ppl: 16.239468 +epoch: 1, batch: 27559, sum loss: 5531.610352, avg loss: 2.982000, ppl: 19.727234 +epoch: 1, batch: 27560, sum loss: 4393.156738, avg loss: 2.452907, ppl: 11.622084 +epoch: 1, batch: 27561, sum loss: 5031.675781, avg loss: 2.842755, ppl: 17.162981 +epoch: 1, batch: 27562, sum loss: 5090.423828, avg loss: 2.762031, ppl: 15.831970 +epoch: 1, batch: 27563, sum loss: 4420.977051, avg loss: 2.559917, ppl: 12.934747 +epoch: 1, batch: 27564, sum loss: 5367.835938, avg loss: 2.909396, ppl: 18.345718 +epoch: 1, batch: 27565, sum loss: 3646.210693, avg loss: 2.675136, ppl: 14.514328 +epoch: 1, batch: 27566, sum loss: 4390.020508, avg loss: 2.594575, ppl: 13.390891 +epoch: 1, batch: 27567, sum loss: 4549.352539, avg loss: 2.581926, ppl: 13.222575 +epoch: 1, batch: 27568, sum loss: 4098.402344, avg loss: 2.417937, ppl: 11.222678 +epoch: 1, batch: 27569, sum loss: 4699.776367, avg loss: 2.522693, ppl: 12.462108 +epoch: 1, batch: 27570, sum loss: 3866.046875, avg loss: 2.515320, ppl: 12.370567 +epoch: 1, batch: 27571, sum loss: 5158.669434, avg loss: 2.846948, ppl: 17.235098 +epoch: 1, batch: 27572, sum loss: 4799.534668, avg loss: 2.552944, ppl: 12.844863 +epoch: 1, batch: 27573, sum loss: 4226.974609, avg loss: 2.560251, ppl: 12.939068 +epoch: 1, batch: 27574, sum loss: 4292.707031, avg loss: 2.558228, ppl: 12.912919 +epoch: 1, batch: 27575, sum loss: 5173.985840, avg loss: 2.793729, ppl: 16.341843 +epoch: 1, batch: 27576, sum loss: 4247.859375, avg loss: 2.623755, ppl: 13.787398 +epoch: 1, batch: 27577, sum loss: 4002.978027, avg loss: 2.528729, ppl: 12.537560 +epoch: 1, batch: 27578, sum loss: 4435.610840, avg loss: 2.393746, ppl: 10.954449 +epoch: 1, batch: 27579, sum loss: 4772.043945, avg loss: 2.862654, ppl: 17.507927 +epoch: 1, batch: 27580, sum loss: 4308.746582, avg loss: 2.453728, ppl: 11.631631 +epoch: 1, batch: 27581, sum loss: 4503.444824, avg loss: 2.714554, ppl: 15.097873 +epoch: 1, batch: 27582, sum loss: 4385.282227, avg loss: 2.621209, ppl: 13.752338 +epoch: 1, batch: 27583, sum loss: 5037.262207, avg loss: 2.498642, ppl: 12.165964 +epoch: 1, batch: 27584, sum loss: 4767.232910, avg loss: 2.602201, ppl: 13.493411 +epoch: 1, batch: 27585, sum loss: 5350.004395, avg loss: 2.884099, ppl: 17.887453 +epoch: 1, batch: 27586, sum loss: 4261.729492, avg loss: 2.514295, ppl: 12.357889 +epoch: 1, batch: 27587, sum loss: 4692.897461, avg loss: 2.464757, ppl: 11.760627 +epoch: 1, batch: 27588, sum loss: 5112.595703, avg loss: 2.642168, ppl: 14.043621 +epoch: 1, batch: 27589, sum loss: 3846.158691, avg loss: 2.676520, ppl: 14.534420 +epoch: 1, batch: 27590, sum loss: 4536.018555, avg loss: 2.661983, ppl: 14.324663 +epoch: 1, batch: 27591, sum loss: 3754.596680, avg loss: 2.528348, ppl: 12.532782 +epoch: 1, batch: 27592, sum loss: 3918.693359, avg loss: 2.475485, ppl: 11.887475 +epoch: 1, batch: 27593, sum loss: 3865.992432, avg loss: 2.330315, ppl: 10.281178 +epoch: 1, batch: 27594, sum loss: 4794.151367, avg loss: 2.622621, ppl: 13.771773 +epoch: 1, batch: 27595, sum loss: 4119.871582, avg loss: 2.459625, ppl: 11.700423 +epoch: 1, batch: 27596, sum loss: 4857.222656, avg loss: 2.739550, ppl: 15.480023 +epoch: 1, batch: 27597, sum loss: 3963.859131, avg loss: 2.486737, ppl: 12.021987 +epoch: 1, batch: 27598, sum loss: 3570.491211, avg loss: 2.570548, ppl: 13.072988 +epoch: 1, batch: 27599, sum loss: 3968.147705, avg loss: 2.656056, ppl: 14.240014 +epoch: 1, batch: 27600, sum loss: 5227.694336, avg loss: 2.794065, ppl: 16.347328 +epoch: 1, batch: 27601, sum loss: 4781.506348, avg loss: 2.784803, ppl: 16.196621 +epoch: 1, batch: 27602, sum loss: 4393.979980, avg loss: 2.471305, ppl: 11.837884 +epoch: 1, batch: 27603, sum loss: 5320.391113, avg loss: 2.818004, ppl: 16.743391 +epoch: 1, batch: 27604, sum loss: 4317.140137, avg loss: 2.659975, ppl: 14.295936 +epoch: 1, batch: 27605, sum loss: 4041.450684, avg loss: 2.708747, ppl: 15.010458 +epoch: 1, batch: 27606, sum loss: 3460.504639, avg loss: 2.393157, ppl: 10.948000 +epoch: 1, batch: 27607, sum loss: 4533.920898, avg loss: 2.721441, ppl: 15.202213 +epoch: 1, batch: 27608, sum loss: 4811.379883, avg loss: 2.755659, ppl: 15.731399 +epoch: 1, batch: 27609, sum loss: 5611.196777, avg loss: 3.083075, ppl: 21.825418 +epoch: 1, batch: 27610, sum loss: 3513.922852, avg loss: 2.382321, ppl: 10.830007 +epoch: 1, batch: 27611, sum loss: 3982.194824, avg loss: 2.587521, ppl: 13.296769 +epoch: 1, batch: 27612, sum loss: 3616.034912, avg loss: 2.544711, ppl: 12.739551 +epoch: 1, batch: 27613, sum loss: 4250.062012, avg loss: 2.504456, ppl: 12.236904 +epoch: 1, batch: 27614, sum loss: 3673.466797, avg loss: 2.402529, ppl: 11.051089 +epoch: 1, batch: 27615, sum loss: 4552.370117, avg loss: 2.622333, ppl: 13.767807 +epoch: 1, batch: 27616, sum loss: 4790.535156, avg loss: 2.549513, ppl: 12.800873 +epoch: 1, batch: 27617, sum loss: 5188.420898, avg loss: 2.827477, ppl: 16.902769 +epoch: 1, batch: 27618, sum loss: 4743.952148, avg loss: 2.838990, ppl: 17.098486 +epoch: 1, batch: 27619, sum loss: 4125.453613, avg loss: 2.483717, ppl: 11.985729 +epoch: 1, batch: 27620, sum loss: 4126.805176, avg loss: 2.505650, ppl: 12.251514 +epoch: 1, batch: 27621, sum loss: 4084.003906, avg loss: 2.336387, ppl: 10.343796 +epoch: 1, batch: 27622, sum loss: 5276.867188, avg loss: 2.888269, ppl: 17.962189 +epoch: 1, batch: 27623, sum loss: 3737.741211, avg loss: 2.272183, ppl: 9.700554 +epoch: 1, batch: 27624, sum loss: 5130.221191, avg loss: 2.618796, ppl: 13.719197 +epoch: 1, batch: 27625, sum loss: 4555.089355, avg loss: 2.832767, ppl: 16.992414 +epoch: 1, batch: 27626, sum loss: 4034.884521, avg loss: 2.573268, ppl: 13.108596 +epoch: 1, batch: 27627, sum loss: 5118.631348, avg loss: 2.801659, ppl: 16.471952 +epoch: 1, batch: 27628, sum loss: 5390.641113, avg loss: 2.764431, ppl: 15.870011 +epoch: 1, batch: 27629, sum loss: 4322.164062, avg loss: 2.488292, ppl: 12.040699 +epoch: 1, batch: 27630, sum loss: 4956.441406, avg loss: 2.702531, ppl: 14.917435 +epoch: 1, batch: 27631, sum loss: 3943.347412, avg loss: 2.695385, ppl: 14.811213 +epoch: 1, batch: 27632, sum loss: 4395.443359, avg loss: 2.624146, ppl: 13.792784 +epoch: 1, batch: 27633, sum loss: 3995.583008, avg loss: 2.505068, ppl: 12.244390 +epoch: 1, batch: 27634, sum loss: 4246.477539, avg loss: 2.732611, ppl: 15.372972 +epoch: 1, batch: 27635, sum loss: 3360.298584, avg loss: 2.165141, ppl: 8.715830 +epoch: 1, batch: 27636, sum loss: 3982.291504, avg loss: 2.626842, ppl: 13.830019 +epoch: 1, batch: 27637, sum loss: 3943.763672, avg loss: 2.575940, ppl: 13.143665 +epoch: 1, batch: 27638, sum loss: 4549.156250, avg loss: 2.775568, ppl: 16.047739 +epoch: 1, batch: 27639, sum loss: 4700.705566, avg loss: 2.557511, ppl: 12.903662 +epoch: 1, batch: 27640, sum loss: 4771.127441, avg loss: 2.591596, ppl: 13.351058 +epoch: 1, batch: 27641, sum loss: 4880.461914, avg loss: 2.891269, ppl: 18.016157 +epoch: 1, batch: 27642, sum loss: 3808.606934, avg loss: 2.590889, ppl: 13.341630 +epoch: 1, batch: 27643, sum loss: 5861.217285, avg loss: 2.784426, ppl: 16.190525 +epoch: 1, batch: 27644, sum loss: 4584.653320, avg loss: 2.552702, ppl: 12.841761 +epoch: 1, batch: 27645, sum loss: 4148.602539, avg loss: 2.624037, ppl: 13.791287 +epoch: 1, batch: 27646, sum loss: 4298.336426, avg loss: 2.531411, ppl: 12.571237 +epoch: 1, batch: 27647, sum loss: 4282.620117, avg loss: 2.648497, ppl: 14.132786 +epoch: 1, batch: 27648, sum loss: 4704.660156, avg loss: 2.625369, ppl: 13.809664 +epoch: 1, batch: 27649, sum loss: 4572.188477, avg loss: 2.680063, ppl: 14.586020 +epoch: 1, batch: 27650, sum loss: 3378.110596, avg loss: 2.380628, ppl: 10.811687 +epoch: 1, batch: 27651, sum loss: 4859.746094, avg loss: 2.853638, ppl: 17.350796 +epoch: 1, batch: 27652, sum loss: 4716.989746, avg loss: 2.755251, ppl: 15.724986 +epoch: 1, batch: 27653, sum loss: 4924.571777, avg loss: 2.823722, ppl: 16.839413 +epoch: 1, batch: 27654, sum loss: 5008.722168, avg loss: 2.868684, ppl: 17.613825 +epoch: 1, batch: 27655, sum loss: 4934.951172, avg loss: 2.658918, ppl: 14.280824 +epoch: 1, batch: 27656, sum loss: 5242.248535, avg loss: 2.867751, ppl: 17.597395 +epoch: 1, batch: 27657, sum loss: 4366.263184, avg loss: 2.591254, ppl: 13.346498 +epoch: 1, batch: 27658, sum loss: 4687.761230, avg loss: 2.687936, ppl: 14.701305 +epoch: 1, batch: 27659, sum loss: 4751.773438, avg loss: 2.702943, ppl: 14.923581 +epoch: 1, batch: 27660, sum loss: 4823.133301, avg loss: 2.752930, ppl: 15.688535 +epoch: 1, batch: 27661, sum loss: 4812.003906, avg loss: 2.598274, ppl: 13.440523 +epoch: 1, batch: 27662, sum loss: 4771.500000, avg loss: 2.685143, ppl: 14.660305 +epoch: 1, batch: 27663, sum loss: 5054.565430, avg loss: 2.885026, ppl: 17.904037 +epoch: 1, batch: 27664, sum loss: 4844.195801, avg loss: 2.663109, ppl: 14.340810 +epoch: 1, batch: 27665, sum loss: 5042.772949, avg loss: 2.746608, ppl: 15.589666 +epoch: 1, batch: 27666, sum loss: 4724.711914, avg loss: 2.456948, ppl: 11.669147 +epoch: 1, batch: 27667, sum loss: 4979.652832, avg loss: 2.749671, ppl: 15.637479 +epoch: 1, batch: 27668, sum loss: 4384.244629, avg loss: 2.505283, ppl: 12.247020 +epoch: 1, batch: 27669, sum loss: 4208.378906, avg loss: 2.743402, ppl: 15.539762 +epoch: 1, batch: 27670, sum loss: 4523.081055, avg loss: 2.593510, ppl: 13.376637 +epoch: 1, batch: 27671, sum loss: 4494.721680, avg loss: 2.564017, ppl: 12.987885 +epoch: 1, batch: 27672, sum loss: 4726.214844, avg loss: 2.743015, ppl: 15.533750 +epoch: 1, batch: 27673, sum loss: 4289.236328, avg loss: 2.380265, ppl: 10.807761 +epoch: 1, batch: 27674, sum loss: 4623.984863, avg loss: 2.682126, ppl: 14.616135 +epoch: 1, batch: 27675, sum loss: 5383.681641, avg loss: 2.806925, ppl: 16.558918 +epoch: 1, batch: 27676, sum loss: 4843.057617, avg loss: 2.669822, ppl: 14.437406 +epoch: 1, batch: 27677, sum loss: 4167.703613, avg loss: 2.839035, ppl: 17.099257 +epoch: 1, batch: 27678, sum loss: 4884.254395, avg loss: 2.635863, ppl: 13.955352 +epoch: 1, batch: 27679, sum loss: 4878.093750, avg loss: 2.651138, ppl: 14.170153 +epoch: 1, batch: 27680, sum loss: 4330.906250, avg loss: 2.602708, ppl: 13.500249 +epoch: 1, batch: 27681, sum loss: 4503.939453, avg loss: 2.787091, ppl: 16.233732 +epoch: 1, batch: 27682, sum loss: 4612.272949, avg loss: 2.795317, ppl: 16.367815 +epoch: 1, batch: 27683, sum loss: 4256.653809, avg loss: 2.668749, ppl: 14.421910 +epoch: 1, batch: 27684, sum loss: 3840.200195, avg loss: 2.598241, ppl: 13.440078 +epoch: 1, batch: 27685, sum loss: 4473.797852, avg loss: 2.533294, ppl: 12.594928 +epoch: 1, batch: 27686, sum loss: 4696.908203, avg loss: 2.687019, ppl: 14.687820 +epoch: 1, batch: 27687, sum loss: 4074.595703, avg loss: 2.384199, ppl: 10.850367 +epoch: 1, batch: 27688, sum loss: 4208.591797, avg loss: 2.690915, ppl: 14.745167 +epoch: 1, batch: 27689, sum loss: 4306.900879, avg loss: 2.610243, ppl: 13.602357 +epoch: 1, batch: 27690, sum loss: 4545.008789, avg loss: 2.863900, ppl: 17.529758 +epoch: 1, batch: 27691, sum loss: 4721.116699, avg loss: 2.683978, ppl: 14.643226 +epoch: 1, batch: 27692, sum loss: 4238.852539, avg loss: 2.465883, ppl: 11.773874 +epoch: 1, batch: 27693, sum loss: 3783.769531, avg loss: 2.268447, ppl: 9.664380 +epoch: 1, batch: 27694, sum loss: 4719.072754, avg loss: 2.701244, ppl: 14.898249 +epoch: 1, batch: 27695, sum loss: 4797.903809, avg loss: 2.630430, ppl: 13.879733 +epoch: 1, batch: 27696, sum loss: 4500.711914, avg loss: 2.881378, ppl: 17.838833 +epoch: 1, batch: 27697, sum loss: 3942.921387, avg loss: 2.563668, ppl: 12.983354 +epoch: 1, batch: 27698, sum loss: 3815.626465, avg loss: 2.488993, ppl: 12.049139 +epoch: 1, batch: 27699, sum loss: 4521.750000, avg loss: 2.543167, ppl: 12.719887 +epoch: 1, batch: 27700, sum loss: 5175.882324, avg loss: 2.672113, ppl: 14.470509 +epoch: 1, batch: 27701, sum loss: 5397.335449, avg loss: 2.785003, ppl: 16.199865 +epoch: 1, batch: 27702, sum loss: 4703.114746, avg loss: 2.701387, ppl: 14.900383 +epoch: 1, batch: 27703, sum loss: 4503.446777, avg loss: 2.545759, ppl: 12.752900 +epoch: 1, batch: 27704, sum loss: 3569.609375, avg loss: 2.162089, ppl: 8.689274 +epoch: 1, batch: 27705, sum loss: 4004.347656, avg loss: 2.518458, ppl: 12.409442 +epoch: 1, batch: 27706, sum loss: 4734.915039, avg loss: 2.537468, ppl: 12.647603 +epoch: 1, batch: 27707, sum loss: 4918.368164, avg loss: 2.637195, ppl: 13.973947 +epoch: 1, batch: 27708, sum loss: 3861.234375, avg loss: 2.531957, ppl: 12.578096 +epoch: 1, batch: 27709, sum loss: 4041.962402, avg loss: 2.356829, ppl: 10.557425 +epoch: 1, batch: 27710, sum loss: 4964.890137, avg loss: 2.840326, ppl: 17.121351 +epoch: 1, batch: 27711, sum loss: 4579.495605, avg loss: 2.609399, ppl: 13.590881 +epoch: 1, batch: 27712, sum loss: 4161.564941, avg loss: 2.671094, ppl: 14.455781 +epoch: 1, batch: 27713, sum loss: 4731.753906, avg loss: 2.676332, ppl: 14.531686 +epoch: 1, batch: 27714, sum loss: 4406.203125, avg loss: 2.616510, ppl: 13.687875 +epoch: 1, batch: 27715, sum loss: 5368.637695, avg loss: 2.751736, ppl: 15.669817 +epoch: 1, batch: 27716, sum loss: 4242.488281, avg loss: 2.502943, ppl: 12.218400 +epoch: 1, batch: 27717, sum loss: 5619.300293, avg loss: 2.988989, ppl: 19.865595 +epoch: 1, batch: 27718, sum loss: 6029.537598, avg loss: 2.961462, ppl: 19.326216 +epoch: 1, batch: 27719, sum loss: 3643.000000, avg loss: 2.418991, ppl: 11.234516 +epoch: 1, batch: 27720, sum loss: 5063.153809, avg loss: 2.771294, ppl: 15.979296 +epoch: 1, batch: 27721, sum loss: 4842.507324, avg loss: 2.566247, ppl: 13.016874 +epoch: 1, batch: 27722, sum loss: 3646.584717, avg loss: 2.632913, ppl: 13.914245 +epoch: 1, batch: 27723, sum loss: 4124.509766, avg loss: 2.711709, ppl: 15.054986 +epoch: 1, batch: 27724, sum loss: 4047.321289, avg loss: 2.433747, ppl: 11.401525 +epoch: 1, batch: 27725, sum loss: 4375.069824, avg loss: 2.624517, ppl: 13.797908 +epoch: 1, batch: 27726, sum loss: 4031.308105, avg loss: 2.570987, ppl: 13.078730 +epoch: 1, batch: 27727, sum loss: 4509.707031, avg loss: 2.530700, ppl: 12.562296 +epoch: 1, batch: 27728, sum loss: 4192.455566, avg loss: 2.630148, ppl: 13.875819 +epoch: 1, batch: 27729, sum loss: 4768.689453, avg loss: 2.884870, ppl: 17.901232 +epoch: 1, batch: 27730, sum loss: 4571.288574, avg loss: 2.747169, ppl: 15.598407 +epoch: 1, batch: 27731, sum loss: 3904.878662, avg loss: 2.439025, ppl: 11.461859 +epoch: 1, batch: 27732, sum loss: 3843.805176, avg loss: 2.462399, ppl: 11.732928 +epoch: 1, batch: 27733, sum loss: 4162.040527, avg loss: 2.681727, ppl: 14.610310 +epoch: 1, batch: 27734, sum loss: 4272.029785, avg loss: 2.650142, ppl: 14.156054 +epoch: 1, batch: 27735, sum loss: 4530.848145, avg loss: 2.590536, ppl: 13.336923 +epoch: 1, batch: 27736, sum loss: 4653.637207, avg loss: 2.525034, ppl: 12.491317 +epoch: 1, batch: 27737, sum loss: 4204.132812, avg loss: 2.360546, ppl: 10.596740 +epoch: 1, batch: 27738, sum loss: 4467.531738, avg loss: 2.624872, ppl: 13.802804 +epoch: 1, batch: 27739, sum loss: 4900.311035, avg loss: 2.811423, ppl: 16.633575 +epoch: 1, batch: 27740, sum loss: 3774.259766, avg loss: 2.487976, ppl: 12.036890 +epoch: 1, batch: 27741, sum loss: 5744.283691, avg loss: 2.917361, ppl: 18.492422 +epoch: 1, batch: 27742, sum loss: 4397.984863, avg loss: 2.614735, ppl: 13.663600 +epoch: 1, batch: 27743, sum loss: 4017.039062, avg loss: 2.472024, ppl: 11.846399 +epoch: 1, batch: 27744, sum loss: 3981.785889, avg loss: 2.310961, ppl: 10.084111 +epoch: 1, batch: 27745, sum loss: 3811.048584, avg loss: 2.547493, ppl: 12.775030 +epoch: 1, batch: 27746, sum loss: 4182.654297, avg loss: 2.674331, ppl: 14.502650 +epoch: 1, batch: 27747, sum loss: 4755.505859, avg loss: 2.779372, ppl: 16.108910 +epoch: 1, batch: 27748, sum loss: 4779.395996, avg loss: 2.686563, ppl: 14.681134 +epoch: 1, batch: 27749, sum loss: 5332.082031, avg loss: 3.009076, ppl: 20.268656 +epoch: 1, batch: 27750, sum loss: 5311.923340, avg loss: 2.840601, ppl: 17.126051 +epoch: 1, batch: 27751, sum loss: 4868.717285, avg loss: 2.715403, ppl: 15.110696 +epoch: 1, batch: 27752, sum loss: 4738.990723, avg loss: 2.691079, ppl: 14.747585 +epoch: 1, batch: 27753, sum loss: 3467.906982, avg loss: 2.396619, ppl: 10.985966 +epoch: 1, batch: 27754, sum loss: 4551.436523, avg loss: 2.532797, ppl: 12.588672 +epoch: 1, batch: 27755, sum loss: 5285.683105, avg loss: 2.781939, ppl: 16.150299 +epoch: 1, batch: 27756, sum loss: 4172.396973, avg loss: 2.441426, ppl: 11.489413 +epoch: 1, batch: 27757, sum loss: 3932.953125, avg loss: 2.465801, ppl: 11.772914 +epoch: 1, batch: 27758, sum loss: 4747.877930, avg loss: 2.768442, ppl: 15.933788 +epoch: 1, batch: 27759, sum loss: 4202.717285, avg loss: 2.685442, ppl: 14.664685 +epoch: 1, batch: 27760, sum loss: 4936.122559, avg loss: 2.773103, ppl: 16.008223 +epoch: 1, batch: 27761, sum loss: 4303.638672, avg loss: 2.778334, ppl: 16.092184 +epoch: 1, batch: 27762, sum loss: 4393.374023, avg loss: 2.629189, ppl: 13.862519 +epoch: 1, batch: 27763, sum loss: 3833.785156, avg loss: 2.315088, ppl: 10.125810 +epoch: 1, batch: 27764, sum loss: 4247.326172, avg loss: 2.538749, ppl: 12.663813 +epoch: 1, batch: 27765, sum loss: 5178.855469, avg loss: 2.730024, ppl: 15.333257 +epoch: 1, batch: 27766, sum loss: 5484.540039, avg loss: 2.667578, ppl: 14.405034 +epoch: 1, batch: 27767, sum loss: 3331.306152, avg loss: 2.344339, ppl: 10.426380 +epoch: 1, batch: 27768, sum loss: 4783.604004, avg loss: 2.792530, ppl: 16.322264 +epoch: 1, batch: 27769, sum loss: 3506.545898, avg loss: 2.457285, ppl: 11.673078 +epoch: 1, batch: 27770, sum loss: 4551.647949, avg loss: 2.614387, ppl: 13.658841 +epoch: 1, batch: 27771, sum loss: 5357.999023, avg loss: 2.888409, ppl: 17.964708 +epoch: 1, batch: 27772, sum loss: 4517.602051, avg loss: 2.547999, ppl: 12.781501 +epoch: 1, batch: 27773, sum loss: 5047.257324, avg loss: 2.970722, ppl: 19.506006 +epoch: 1, batch: 27774, sum loss: 4384.523926, avg loss: 2.863830, ppl: 17.528538 +epoch: 1, batch: 27775, sum loss: 3603.584717, avg loss: 2.266405, ppl: 9.644670 +epoch: 1, batch: 27776, sum loss: 5610.590820, avg loss: 3.000316, ppl: 20.091883 +epoch: 1, batch: 27777, sum loss: 4784.608887, avg loss: 2.768871, ppl: 15.940624 +epoch: 1, batch: 27778, sum loss: 4814.290039, avg loss: 2.779613, ppl: 16.112789 +epoch: 1, batch: 27779, sum loss: 4836.964844, avg loss: 2.613163, ppl: 13.642136 +epoch: 1, batch: 27780, sum loss: 5204.762695, avg loss: 2.845688, ppl: 17.213394 +epoch: 1, batch: 27781, sum loss: 4474.322754, avg loss: 2.489885, ppl: 12.059887 +epoch: 1, batch: 27782, sum loss: 5042.503906, avg loss: 2.660952, ppl: 14.309903 +epoch: 1, batch: 27783, sum loss: 5259.151855, avg loss: 2.788522, ppl: 16.256971 +epoch: 1, batch: 27784, sum loss: 4588.728027, avg loss: 2.557819, ppl: 12.907640 +epoch: 1, batch: 27785, sum loss: 3653.783203, avg loss: 2.287905, ppl: 9.854266 +epoch: 1, batch: 27786, sum loss: 4418.924805, avg loss: 2.597839, ppl: 13.434679 +epoch: 1, batch: 27787, sum loss: 4849.159180, avg loss: 2.702987, ppl: 14.924251 +epoch: 1, batch: 27788, sum loss: 4792.535645, avg loss: 2.665481, ppl: 14.374866 +epoch: 1, batch: 27789, sum loss: 4828.636719, avg loss: 2.556187, ppl: 12.886582 +epoch: 1, batch: 27790, sum loss: 6057.421875, avg loss: 2.872177, ppl: 17.675459 +epoch: 1, batch: 27791, sum loss: 4497.467773, avg loss: 2.696324, ppl: 14.825129 +epoch: 1, batch: 27792, sum loss: 4669.137695, avg loss: 2.757908, ppl: 15.766822 +epoch: 1, batch: 27793, sum loss: 4249.105469, avg loss: 2.355380, ppl: 10.542134 +epoch: 1, batch: 27794, sum loss: 3884.284668, avg loss: 2.369911, ppl: 10.696443 +epoch: 1, batch: 27795, sum loss: 4089.651367, avg loss: 2.393008, ppl: 10.946376 +epoch: 1, batch: 27796, sum loss: 3304.399658, avg loss: 2.387572, ppl: 10.887029 +epoch: 1, batch: 27797, sum loss: 3483.706299, avg loss: 2.355447, ppl: 10.542841 +epoch: 1, batch: 27798, sum loss: 4753.475586, avg loss: 2.903773, ppl: 18.242838 +epoch: 1, batch: 27799, sum loss: 4172.149414, avg loss: 2.468727, ppl: 11.807410 +epoch: 1, batch: 27800, sum loss: 4903.224121, avg loss: 2.939583, ppl: 18.907957 +epoch: 1, batch: 27801, sum loss: 4473.551758, avg loss: 2.619175, ppl: 13.724402 +epoch: 1, batch: 27802, sum loss: 4440.966797, avg loss: 2.651324, ppl: 14.172785 +epoch: 1, batch: 27803, sum loss: 3773.565674, avg loss: 2.484243, ppl: 11.992043 +epoch: 1, batch: 27804, sum loss: 4332.093750, avg loss: 2.623921, ppl: 13.789689 +epoch: 1, batch: 27805, sum loss: 4400.915039, avg loss: 2.750572, ppl: 15.651578 +epoch: 1, batch: 27806, sum loss: 2907.200928, avg loss: 2.182583, ppl: 8.869188 +epoch: 1, batch: 27807, sum loss: 4852.137207, avg loss: 2.777411, ppl: 16.077343 +epoch: 1, batch: 27808, sum loss: 3772.081543, avg loss: 2.381365, ppl: 10.819657 +epoch: 1, batch: 27809, sum loss: 4610.977539, avg loss: 2.584629, ppl: 13.258367 +epoch: 1, batch: 27810, sum loss: 5495.566406, avg loss: 3.026193, ppl: 20.618586 +epoch: 1, batch: 27811, sum loss: 5081.859375, avg loss: 2.721939, ppl: 15.209780 +epoch: 1, batch: 27812, sum loss: 4492.556152, avg loss: 2.688544, ppl: 14.710236 +epoch: 1, batch: 27813, sum loss: 3892.693848, avg loss: 2.514660, ppl: 12.362407 +epoch: 1, batch: 27814, sum loss: 4823.843750, avg loss: 2.659230, ppl: 14.285292 +epoch: 1, batch: 27815, sum loss: 4544.385254, avg loss: 2.703382, ppl: 14.930140 +epoch: 1, batch: 27816, sum loss: 3527.685303, avg loss: 2.421198, ppl: 11.259338 +epoch: 1, batch: 27817, sum loss: 4250.277344, avg loss: 2.714098, ppl: 15.090988 +epoch: 1, batch: 27818, sum loss: 3661.862305, avg loss: 2.434749, ppl: 11.412952 +epoch: 1, batch: 27819, sum loss: 3463.871094, avg loss: 2.468903, ppl: 11.809488 +epoch: 1, batch: 27820, sum loss: 4630.328613, avg loss: 2.563859, ppl: 12.985826 +epoch: 1, batch: 27821, sum loss: 4543.142578, avg loss: 2.475827, ppl: 11.891540 +epoch: 1, batch: 27822, sum loss: 4945.082520, avg loss: 2.816106, ppl: 16.711655 +epoch: 1, batch: 27823, sum loss: 4791.266602, avg loss: 2.714599, ppl: 15.098549 +epoch: 1, batch: 27824, sum loss: 3754.556641, avg loss: 2.468479, ppl: 11.804481 +epoch: 1, batch: 27825, sum loss: 4791.137207, avg loss: 2.432050, ppl: 11.382186 +epoch: 1, batch: 27826, sum loss: 4334.715332, avg loss: 2.694043, ppl: 14.791359 +epoch: 1, batch: 27827, sum loss: 4806.816406, avg loss: 2.784946, ppl: 16.198938 +epoch: 1, batch: 27828, sum loss: 3061.481934, avg loss: 2.189901, ppl: 8.934332 +epoch: 1, batch: 27829, sum loss: 4395.624023, avg loss: 2.646372, ppl: 14.102782 +epoch: 1, batch: 27830, sum loss: 3725.835449, avg loss: 2.482235, ppl: 11.967988 +epoch: 1, batch: 27831, sum loss: 3935.938477, avg loss: 2.630975, ppl: 13.887303 +epoch: 1, batch: 27832, sum loss: 5581.496094, avg loss: 2.934541, ppl: 18.812860 +epoch: 1, batch: 27833, sum loss: 4414.081055, avg loss: 2.678447, ppl: 14.562464 +epoch: 1, batch: 27834, sum loss: 3446.192627, avg loss: 2.342755, ppl: 10.409877 +epoch: 1, batch: 27835, sum loss: 3978.612305, avg loss: 2.774486, ppl: 16.030378 +epoch: 1, batch: 27836, sum loss: 4405.151367, avg loss: 2.536069, ppl: 12.629924 +epoch: 1, batch: 27837, sum loss: 4560.334961, avg loss: 2.796036, ppl: 16.379593 +epoch: 1, batch: 27838, sum loss: 3662.017334, avg loss: 2.527272, ppl: 12.519310 +epoch: 1, batch: 27839, sum loss: 4853.221191, avg loss: 2.768523, ppl: 15.935084 +epoch: 1, batch: 27840, sum loss: 4379.815918, avg loss: 2.739097, ppl: 15.473004 +epoch: 1, batch: 27841, sum loss: 4365.776855, avg loss: 2.706619, ppl: 14.978551 +epoch: 1, batch: 27842, sum loss: 4580.106445, avg loss: 2.754123, ppl: 15.707259 +epoch: 1, batch: 27843, sum loss: 3603.576660, avg loss: 2.564823, ppl: 12.998360 +epoch: 1, batch: 27844, sum loss: 3594.844238, avg loss: 2.453819, ppl: 11.632682 +epoch: 1, batch: 27845, sum loss: 4686.166504, avg loss: 2.696298, ppl: 14.824754 +epoch: 1, batch: 27846, sum loss: 4700.410156, avg loss: 2.699834, ppl: 14.877256 +epoch: 1, batch: 27847, sum loss: 3516.117676, avg loss: 2.290630, ppl: 9.881165 +epoch: 1, batch: 27848, sum loss: 5126.981934, avg loss: 2.781868, ppl: 16.149151 +epoch: 1, batch: 27849, sum loss: 3981.779541, avg loss: 2.499548, ppl: 12.176986 +epoch: 1, batch: 27850, sum loss: 4196.603516, avg loss: 2.565161, ppl: 13.002754 +epoch: 1, batch: 27851, sum loss: 4135.395020, avg loss: 2.544858, ppl: 12.741425 +epoch: 1, batch: 27852, sum loss: 4693.522949, avg loss: 2.911615, ppl: 18.386463 +epoch: 1, batch: 27853, sum loss: 4540.696777, avg loss: 2.542383, ppl: 12.709929 +epoch: 1, batch: 27854, sum loss: 4633.678711, avg loss: 2.839264, ppl: 17.103172 +epoch: 1, batch: 27855, sum loss: 4615.530273, avg loss: 2.551427, ppl: 12.825388 +epoch: 1, batch: 27856, sum loss: 4260.306641, avg loss: 2.475483, ppl: 11.887449 +epoch: 1, batch: 27857, sum loss: 4088.497314, avg loss: 2.571382, ppl: 13.083891 +epoch: 1, batch: 27858, sum loss: 4865.850586, avg loss: 2.657483, ppl: 14.260345 +epoch: 1, batch: 27859, sum loss: 3691.191162, avg loss: 2.649814, ppl: 14.151408 +epoch: 1, batch: 27860, sum loss: 3781.021484, avg loss: 2.298493, ppl: 9.959165 +epoch: 1, batch: 27861, sum loss: 5126.880371, avg loss: 2.792418, ppl: 16.320442 +epoch: 1, batch: 27862, sum loss: 4179.350586, avg loss: 2.655242, ppl: 14.228425 +epoch: 1, batch: 27863, sum loss: 3558.650391, avg loss: 2.442451, ppl: 11.501190 +epoch: 1, batch: 27864, sum loss: 3237.203857, avg loss: 2.278117, ppl: 9.758286 +epoch: 1, batch: 27865, sum loss: 5113.830078, avg loss: 2.750850, ppl: 15.655933 +epoch: 1, batch: 27866, sum loss: 4531.898438, avg loss: 2.881054, ppl: 17.833063 +epoch: 1, batch: 27867, sum loss: 4792.036133, avg loss: 2.543544, ppl: 12.724685 +epoch: 1, batch: 27868, sum loss: 4166.075195, avg loss: 2.470982, ppl: 11.834061 +epoch: 1, batch: 27869, sum loss: 4724.921875, avg loss: 2.785921, ppl: 16.214743 +epoch: 1, batch: 27870, sum loss: 4623.921387, avg loss: 2.616820, ppl: 13.692118 +epoch: 1, batch: 27871, sum loss: 4804.938477, avg loss: 2.885849, ppl: 17.918774 +epoch: 1, batch: 27872, sum loss: 3399.313721, avg loss: 2.565520, ppl: 13.007418 +epoch: 1, batch: 27873, sum loss: 4147.135254, avg loss: 2.533375, ppl: 12.595949 +epoch: 1, batch: 27874, sum loss: 4341.180176, avg loss: 2.618323, ppl: 13.712712 +epoch: 1, batch: 27875, sum loss: 4371.789062, avg loss: 2.614707, ppl: 13.663206 +epoch: 1, batch: 27876, sum loss: 4134.906738, avg loss: 2.384606, ppl: 10.854784 +epoch: 1, batch: 27877, sum loss: 4196.573242, avg loss: 2.656059, ppl: 14.240059 +epoch: 1, batch: 27878, sum loss: 5326.517090, avg loss: 2.874537, ppl: 17.717218 +epoch: 1, batch: 27879, sum loss: 5036.241699, avg loss: 2.904407, ppl: 18.254412 +epoch: 1, batch: 27880, sum loss: 5010.258789, avg loss: 2.864642, ppl: 17.542770 +epoch: 1, batch: 27881, sum loss: 4068.424072, avg loss: 2.398835, ppl: 11.010342 +epoch: 1, batch: 27882, sum loss: 4559.101562, avg loss: 2.527218, ppl: 12.518632 +epoch: 1, batch: 27883, sum loss: 4365.382324, avg loss: 2.536538, ppl: 12.635852 +epoch: 1, batch: 27884, sum loss: 4669.527832, avg loss: 2.602858, ppl: 13.502276 +epoch: 1, batch: 27885, sum loss: 4879.038574, avg loss: 2.893855, ppl: 18.062801 +epoch: 1, batch: 27886, sum loss: 5264.830566, avg loss: 2.842781, ppl: 17.163431 +epoch: 1, batch: 27887, sum loss: 4865.546875, avg loss: 2.759811, ppl: 15.796856 +epoch: 1, batch: 27888, sum loss: 5209.985352, avg loss: 2.747883, ppl: 15.609549 +epoch: 1, batch: 27889, sum loss: 4797.513672, avg loss: 2.923531, ppl: 18.606865 +epoch: 1, batch: 27890, sum loss: 3406.146973, avg loss: 2.267741, ppl: 9.657559 +epoch: 1, batch: 27891, sum loss: 4511.392578, avg loss: 2.682160, ppl: 14.616626 +epoch: 1, batch: 27892, sum loss: 4395.033203, avg loss: 2.654006, ppl: 14.210847 +epoch: 1, batch: 27893, sum loss: 4086.712646, avg loss: 2.546238, ppl: 12.759019 +epoch: 1, batch: 27894, sum loss: 4613.762207, avg loss: 2.794526, ppl: 16.354876 +epoch: 1, batch: 27895, sum loss: 4312.494141, avg loss: 2.729427, ppl: 15.324098 +epoch: 1, batch: 27896, sum loss: 4451.694336, avg loss: 2.464947, ppl: 11.762858 +epoch: 1, batch: 27897, sum loss: 4739.439453, avg loss: 2.570195, ppl: 13.068372 +epoch: 1, batch: 27898, sum loss: 4041.730957, avg loss: 2.616007, ppl: 13.680990 +epoch: 1, batch: 27899, sum loss: 4132.004395, avg loss: 2.468342, ppl: 11.802862 +epoch: 1, batch: 27900, sum loss: 3957.969727, avg loss: 2.585219, ppl: 13.266190 +epoch: 1, batch: 27901, sum loss: 3838.438477, avg loss: 2.515359, ppl: 12.371054 +epoch: 1, batch: 27902, sum loss: 3909.707764, avg loss: 2.341142, ppl: 10.393103 +epoch: 1, batch: 27903, sum loss: 4998.264648, avg loss: 2.744791, ppl: 15.561365 +epoch: 1, batch: 27904, sum loss: 4371.724121, avg loss: 2.588351, ppl: 13.307803 +epoch: 1, batch: 27905, sum loss: 5183.893555, avg loss: 2.820399, ppl: 16.783546 +epoch: 1, batch: 27906, sum loss: 3724.078613, avg loss: 2.584371, ppl: 13.254951 +epoch: 1, batch: 27907, sum loss: 3018.337402, avg loss: 2.143706, ppl: 8.530992 +epoch: 1, batch: 27908, sum loss: 4854.831055, avg loss: 2.744393, ppl: 15.555171 +epoch: 1, batch: 27909, sum loss: 4106.478516, avg loss: 2.617259, ppl: 13.698119 +epoch: 1, batch: 27910, sum loss: 4385.986816, avg loss: 2.692441, ppl: 14.767683 +epoch: 1, batch: 27911, sum loss: 5283.801758, avg loss: 2.870072, ppl: 17.638283 +epoch: 1, batch: 27912, sum loss: 4490.276855, avg loss: 2.565872, ppl: 13.012006 +epoch: 1, batch: 27913, sum loss: 3553.796143, avg loss: 2.283931, ppl: 9.815186 +epoch: 1, batch: 27914, sum loss: 4466.114258, avg loss: 2.504832, ppl: 12.241497 +epoch: 1, batch: 27915, sum loss: 4170.240723, avg loss: 2.798819, ppl: 16.425241 +epoch: 1, batch: 27916, sum loss: 4953.241699, avg loss: 2.718574, ppl: 15.158691 +epoch: 1, batch: 27917, sum loss: 5000.784180, avg loss: 2.804703, ppl: 16.522160 +epoch: 1, batch: 27918, sum loss: 4380.063965, avg loss: 2.621223, ppl: 13.752539 +epoch: 1, batch: 27919, sum loss: 4849.982422, avg loss: 2.712518, ppl: 15.067170 +epoch: 1, batch: 27920, sum loss: 4307.379395, avg loss: 2.809771, ppl: 16.606119 +epoch: 1, batch: 27921, sum loss: 3900.781738, avg loss: 2.358393, ppl: 10.573945 +epoch: 1, batch: 27922, sum loss: 5082.871582, avg loss: 2.668174, ppl: 14.413627 +epoch: 1, batch: 27923, sum loss: 4969.473633, avg loss: 2.618269, ppl: 13.711964 +epoch: 1, batch: 27924, sum loss: 5162.794922, avg loss: 2.941763, ppl: 18.949232 +epoch: 1, batch: 27925, sum loss: 4679.428711, avg loss: 2.627417, ppl: 13.837975 +epoch: 1, batch: 27926, sum loss: 3651.941895, avg loss: 2.541365, ppl: 12.696996 +epoch: 1, batch: 27927, sum loss: 4338.970703, avg loss: 2.877301, ppl: 17.766266 +epoch: 1, batch: 27928, sum loss: 4308.769531, avg loss: 2.508015, ppl: 12.280528 +epoch: 1, batch: 27929, sum loss: 4422.395508, avg loss: 2.534324, ppl: 12.607904 +epoch: 1, batch: 27930, sum loss: 4673.166016, avg loss: 2.576167, ppl: 13.146646 +epoch: 1, batch: 27931, sum loss: 3707.020020, avg loss: 2.476299, ppl: 11.897152 +epoch: 1, batch: 27932, sum loss: 4413.439453, avg loss: 2.587010, ppl: 13.289977 +epoch: 1, batch: 27933, sum loss: 4583.585449, avg loss: 2.550688, ppl: 12.815912 +epoch: 1, batch: 27934, sum loss: 4193.980469, avg loss: 2.908447, ppl: 18.328314 +epoch: 1, batch: 27935, sum loss: 4483.458008, avg loss: 2.657652, ppl: 14.262756 +epoch: 1, batch: 27936, sum loss: 4541.845215, avg loss: 2.563118, ppl: 12.976217 +epoch: 1, batch: 27937, sum loss: 4876.182129, avg loss: 2.691050, ppl: 14.747149 +epoch: 1, batch: 27938, sum loss: 4280.625977, avg loss: 2.731733, ppl: 15.359487 +epoch: 1, batch: 27939, sum loss: 4035.394043, avg loss: 2.552431, ppl: 12.838280 +epoch: 1, batch: 27940, sum loss: 5247.879395, avg loss: 2.685711, ppl: 14.668626 +epoch: 1, batch: 27941, sum loss: 5236.010742, avg loss: 2.848754, ppl: 17.266262 +epoch: 1, batch: 27942, sum loss: 4075.100342, avg loss: 2.517048, ppl: 12.391960 +epoch: 1, batch: 27943, sum loss: 4104.270508, avg loss: 2.485930, ppl: 12.012289 +epoch: 1, batch: 27944, sum loss: 3649.267578, avg loss: 2.298028, ppl: 9.954528 +epoch: 1, batch: 27945, sum loss: 3483.140625, avg loss: 2.502256, ppl: 12.210011 +epoch: 1, batch: 27946, sum loss: 3880.217773, avg loss: 2.410073, ppl: 11.134774 +epoch: 1, batch: 27947, sum loss: 4081.867432, avg loss: 2.388454, ppl: 10.896631 +epoch: 1, batch: 27948, sum loss: 3667.449219, avg loss: 2.343418, ppl: 10.416780 +epoch: 1, batch: 27949, sum loss: 4593.984375, avg loss: 2.814941, ppl: 16.692198 +epoch: 1, batch: 27950, sum loss: 5053.007812, avg loss: 2.700699, ppl: 14.890138 +epoch: 1, batch: 27951, sum loss: 5206.749512, avg loss: 2.714677, ppl: 15.099727 +epoch: 1, batch: 27952, sum loss: 4766.322266, avg loss: 2.869550, ppl: 17.629080 +epoch: 1, batch: 27953, sum loss: 4539.421875, avg loss: 2.507968, ppl: 12.279951 +epoch: 1, batch: 27954, sum loss: 5037.097656, avg loss: 2.739042, ppl: 15.472153 +epoch: 1, batch: 27955, sum loss: 5094.924316, avg loss: 3.029087, ppl: 20.678347 +epoch: 1, batch: 27956, sum loss: 3651.752441, avg loss: 2.445916, ppl: 11.541116 +epoch: 1, batch: 27957, sum loss: 4000.896729, avg loss: 2.398619, ppl: 11.007966 +epoch: 1, batch: 27958, sum loss: 4049.971680, avg loss: 2.566522, ppl: 13.020459 +epoch: 1, batch: 27959, sum loss: 4003.682373, avg loss: 2.460776, ppl: 11.713896 +epoch: 1, batch: 27960, sum loss: 5717.297363, avg loss: 2.979311, ppl: 19.674252 +epoch: 1, batch: 27961, sum loss: 4347.401855, avg loss: 2.686898, ppl: 14.686056 +epoch: 1, batch: 27962, sum loss: 4716.430664, avg loss: 2.754924, ppl: 15.719851 +epoch: 1, batch: 27963, sum loss: 4735.376465, avg loss: 2.643985, ppl: 14.069151 +epoch: 1, batch: 27964, sum loss: 4913.733398, avg loss: 2.850194, ppl: 17.291128 +epoch: 1, batch: 27965, sum loss: 4499.002441, avg loss: 2.695628, ppl: 14.814815 +epoch: 1, batch: 27966, sum loss: 4303.908691, avg loss: 2.638816, ppl: 13.996620 +epoch: 1, batch: 27967, sum loss: 4634.555664, avg loss: 2.807121, ppl: 16.562160 +epoch: 1, batch: 27968, sum loss: 5816.586426, avg loss: 2.957085, ppl: 19.241798 +epoch: 1, batch: 27969, sum loss: 4739.363770, avg loss: 2.822730, ppl: 16.822716 +epoch: 1, batch: 27970, sum loss: 4164.144531, avg loss: 2.703990, ppl: 14.939220 +epoch: 1, batch: 27971, sum loss: 4060.313965, avg loss: 2.653800, ppl: 14.207927 +epoch: 1, batch: 27972, sum loss: 4795.429199, avg loss: 2.730882, ppl: 15.346423 +epoch: 1, batch: 27973, sum loss: 4624.565430, avg loss: 2.731580, ppl: 15.357140 +epoch: 1, batch: 27974, sum loss: 3971.776123, avg loss: 2.504272, ppl: 12.234654 +epoch: 1, batch: 27975, sum loss: 2913.658691, avg loss: 2.140822, ppl: 8.506427 +epoch: 1, batch: 27976, sum loss: 3763.741211, avg loss: 2.346472, ppl: 10.448645 +epoch: 1, batch: 27977, sum loss: 4133.372559, avg loss: 2.844716, ppl: 17.196680 +epoch: 1, batch: 27978, sum loss: 3788.189941, avg loss: 2.272460, ppl: 9.703237 +epoch: 1, batch: 27979, sum loss: 3677.077637, avg loss: 2.321387, ppl: 10.189800 +epoch: 1, batch: 27980, sum loss: 4277.766113, avg loss: 2.774167, ppl: 16.025278 +epoch: 1, batch: 27981, sum loss: 4475.804688, avg loss: 2.797378, ppl: 16.401583 +epoch: 1, batch: 27982, sum loss: 5049.288574, avg loss: 2.753156, ppl: 15.692080 +epoch: 1, batch: 27983, sum loss: 3704.198730, avg loss: 2.242251, ppl: 9.414501 +epoch: 1, batch: 27984, sum loss: 3794.738770, avg loss: 2.351139, ppl: 10.497522 +epoch: 1, batch: 27985, sum loss: 4876.865234, avg loss: 2.640425, ppl: 14.019167 +epoch: 1, batch: 27986, sum loss: 5198.493652, avg loss: 2.671374, ppl: 14.459825 +epoch: 1, batch: 27987, sum loss: 3798.357422, avg loss: 2.458484, ppl: 11.687079 +epoch: 1, batch: 27988, sum loss: 4328.944336, avg loss: 2.844247, ppl: 17.188616 +epoch: 1, batch: 27989, sum loss: 4179.892090, avg loss: 2.786595, ppl: 16.225672 +epoch: 1, batch: 27990, sum loss: 4184.650879, avg loss: 2.572004, ppl: 13.092035 +epoch: 1, batch: 27991, sum loss: 4219.838867, avg loss: 2.531397, ppl: 12.571054 +epoch: 1, batch: 27992, sum loss: 3602.498535, avg loss: 2.643066, ppl: 14.056230 +epoch: 1, batch: 27993, sum loss: 4789.998535, avg loss: 2.841043, ppl: 17.133627 +epoch: 1, batch: 27994, sum loss: 3421.930908, avg loss: 2.488677, ppl: 12.045330 +epoch: 1, batch: 27995, sum loss: 4124.774414, avg loss: 2.382885, ppl: 10.836123 +epoch: 1, batch: 27996, sum loss: 5211.695312, avg loss: 2.900220, ppl: 18.178146 +epoch: 1, batch: 27997, sum loss: 4627.834961, avg loss: 3.062763, ppl: 21.386566 +epoch: 1, batch: 27998, sum loss: 4676.229980, avg loss: 2.726665, ppl: 15.281830 +epoch: 1, batch: 27999, sum loss: 4530.937500, avg loss: 2.519987, ppl: 12.428439 +epoch: 1, batch: 28000, sum loss: 5135.231934, avg loss: 3.089791, ppl: 21.972477 +epoch: 1, batch: 28001, sum loss: 5310.388672, avg loss: 2.859660, ppl: 17.455589 +epoch: 1, batch: 28002, sum loss: 4923.672363, avg loss: 2.739940, ppl: 15.486058 +epoch: 1, batch: 28003, sum loss: 4508.660156, avg loss: 2.740827, ppl: 15.499792 +epoch: 1, batch: 28004, sum loss: 3829.380127, avg loss: 2.381455, ppl: 10.820638 +epoch: 1, batch: 28005, sum loss: 4719.357910, avg loss: 2.441468, ppl: 11.489899 +epoch: 1, batch: 28006, sum loss: 4346.921875, avg loss: 2.588995, ppl: 13.316376 +epoch: 1, batch: 28007, sum loss: 4201.780273, avg loss: 2.649294, ppl: 14.144048 +epoch: 1, batch: 28008, sum loss: 5640.745605, avg loss: 2.870609, ppl: 17.647755 +epoch: 1, batch: 28009, sum loss: 4650.791016, avg loss: 2.639496, ppl: 14.006137 +epoch: 1, batch: 28010, sum loss: 4296.973145, avg loss: 2.754470, ppl: 15.712712 +epoch: 1, batch: 28011, sum loss: 4435.168945, avg loss: 2.586104, ppl: 13.277942 +epoch: 1, batch: 28012, sum loss: 4289.357910, avg loss: 2.665853, ppl: 14.380215 +epoch: 1, batch: 28013, sum loss: 5775.161133, avg loss: 2.958587, ppl: 19.270721 +epoch: 1, batch: 28014, sum loss: 4874.307617, avg loss: 2.720038, ppl: 15.180898 +epoch: 1, batch: 28015, sum loss: 3875.052979, avg loss: 2.539353, ppl: 12.671474 +epoch: 1, batch: 28016, sum loss: 3852.180420, avg loss: 2.441179, ppl: 11.486576 +epoch: 1, batch: 28017, sum loss: 4722.695801, avg loss: 2.558340, ppl: 12.914363 +epoch: 1, batch: 28018, sum loss: 3072.132080, avg loss: 2.235904, ppl: 9.354935 +epoch: 1, batch: 28019, sum loss: 4313.892578, avg loss: 2.752963, ppl: 15.689047 +epoch: 1, batch: 28020, sum loss: 3928.404541, avg loss: 2.363661, ppl: 10.629797 +epoch: 1, batch: 28021, sum loss: 4730.381836, avg loss: 2.504173, ppl: 12.233433 +epoch: 1, batch: 28022, sum loss: 4361.598633, avg loss: 2.653040, ppl: 14.197138 +epoch: 1, batch: 28023, sum loss: 4742.608398, avg loss: 2.964130, ppl: 19.377840 +epoch: 1, batch: 28024, sum loss: 5214.719238, avg loss: 2.513118, ppl: 12.343354 +epoch: 1, batch: 28025, sum loss: 4552.778809, avg loss: 2.578017, ppl: 13.171000 +epoch: 1, batch: 28026, sum loss: 4825.075684, avg loss: 2.710717, ppl: 15.040051 +epoch: 1, batch: 28027, sum loss: 5189.790039, avg loss: 2.932085, ppl: 18.766714 +epoch: 1, batch: 28028, sum loss: 4885.114746, avg loss: 2.676775, ppl: 14.538136 +epoch: 1, batch: 28029, sum loss: 4520.557617, avg loss: 2.697230, ppl: 14.838573 +epoch: 1, batch: 28030, sum loss: 4738.725586, avg loss: 2.761495, ppl: 15.823483 +epoch: 1, batch: 28031, sum loss: 3529.636719, avg loss: 2.365708, ppl: 10.651581 +epoch: 1, batch: 28032, sum loss: 4759.194336, avg loss: 2.889614, ppl: 17.986362 +epoch: 1, batch: 28033, sum loss: 3920.217529, avg loss: 2.415414, ppl: 11.194407 +epoch: 1, batch: 28034, sum loss: 4067.902344, avg loss: 2.558429, ppl: 12.915511 +epoch: 1, batch: 28035, sum loss: 4416.949707, avg loss: 2.538477, ppl: 12.660374 +epoch: 1, batch: 28036, sum loss: 4945.364258, avg loss: 2.714251, ppl: 15.093294 +epoch: 1, batch: 28037, sum loss: 4177.442871, avg loss: 2.413312, ppl: 11.170895 +epoch: 1, batch: 28038, sum loss: 4777.912598, avg loss: 2.883472, ppl: 17.876226 +epoch: 1, batch: 28039, sum loss: 4616.783691, avg loss: 2.601005, ppl: 13.477274 +epoch: 1, batch: 28040, sum loss: 4382.232422, avg loss: 2.576268, ppl: 13.147981 +epoch: 1, batch: 28041, sum loss: 4051.474365, avg loss: 2.593774, ppl: 13.380168 +epoch: 1, batch: 28042, sum loss: 4568.892090, avg loss: 2.597437, ppl: 13.429279 +epoch: 1, batch: 28043, sum loss: 3232.837891, avg loss: 2.337554, ppl: 10.355880 +epoch: 1, batch: 28044, sum loss: 3453.328125, avg loss: 2.248260, ppl: 9.471247 +epoch: 1, batch: 28045, sum loss: 6000.100586, avg loss: 3.198348, ppl: 24.492037 +epoch: 1, batch: 28046, sum loss: 4981.293457, avg loss: 2.637000, ppl: 13.971231 +epoch: 1, batch: 28047, sum loss: 4414.721680, avg loss: 2.799443, ppl: 16.435490 +epoch: 1, batch: 28048, sum loss: 4029.185791, avg loss: 2.502600, ppl: 12.214206 +epoch: 1, batch: 28049, sum loss: 4185.107910, avg loss: 2.545686, ppl: 12.751973 +epoch: 1, batch: 28050, sum loss: 3964.197266, avg loss: 2.554251, ppl: 12.861662 +epoch: 1, batch: 28051, sum loss: 4272.441895, avg loss: 2.431669, ppl: 11.377851 +epoch: 1, batch: 28052, sum loss: 4320.435547, avg loss: 2.377785, ppl: 10.780999 +epoch: 1, batch: 28053, sum loss: 3327.553711, avg loss: 2.302805, ppl: 10.002204 +epoch: 1, batch: 28054, sum loss: 4597.368652, avg loss: 2.565496, ppl: 13.007108 +epoch: 1, batch: 28055, sum loss: 3953.431152, avg loss: 2.502172, ppl: 12.208977 +epoch: 1, batch: 28056, sum loss: 4766.115234, avg loss: 2.775839, ppl: 16.052086 +epoch: 1, batch: 28057, sum loss: 5133.298828, avg loss: 2.870972, ppl: 17.654177 +epoch: 1, batch: 28058, sum loss: 5101.836426, avg loss: 2.908687, ppl: 18.332705 +epoch: 1, batch: 28059, sum loss: 4223.183105, avg loss: 2.545620, ppl: 12.751128 +epoch: 1, batch: 28060, sum loss: 4200.283691, avg loss: 2.709861, ppl: 15.027180 +epoch: 1, batch: 28061, sum loss: 5108.539062, avg loss: 2.806890, ppl: 16.558338 +epoch: 1, batch: 28062, sum loss: 4363.270508, avg loss: 2.532368, ppl: 12.583270 +epoch: 1, batch: 28063, sum loss: 4730.936035, avg loss: 2.657829, ppl: 14.265289 +epoch: 1, batch: 28064, sum loss: 3994.446777, avg loss: 2.555628, ppl: 12.879386 +epoch: 1, batch: 28065, sum loss: 4628.649902, avg loss: 2.640416, ppl: 14.019040 +epoch: 1, batch: 28066, sum loss: 5040.660645, avg loss: 2.865640, ppl: 17.560287 +epoch: 1, batch: 28067, sum loss: 3651.108398, avg loss: 2.377024, ppl: 10.772792 +epoch: 1, batch: 28068, sum loss: 4583.977051, avg loss: 2.733439, ppl: 15.385707 +epoch: 1, batch: 28069, sum loss: 4958.414062, avg loss: 2.641670, ppl: 14.036622 +epoch: 1, batch: 28070, sum loss: 4581.102051, avg loss: 2.497874, ppl: 12.156628 +epoch: 1, batch: 28071, sum loss: 4639.590332, avg loss: 2.577550, ppl: 13.164847 +epoch: 1, batch: 28072, sum loss: 5850.468262, avg loss: 2.909234, ppl: 18.342735 +epoch: 1, batch: 28073, sum loss: 4723.908203, avg loss: 2.649416, ppl: 14.145771 +epoch: 1, batch: 28074, sum loss: 4343.233887, avg loss: 2.613257, ppl: 13.643421 +epoch: 1, batch: 28075, sum loss: 3347.084961, avg loss: 2.207840, ppl: 9.096045 +epoch: 1, batch: 28076, sum loss: 4718.936035, avg loss: 2.640703, ppl: 14.023058 +epoch: 1, batch: 28077, sum loss: 4092.053711, avg loss: 2.653731, ppl: 14.206951 +epoch: 1, batch: 28078, sum loss: 4633.511719, avg loss: 2.678330, ppl: 14.560762 +epoch: 1, batch: 28079, sum loss: 4283.614258, avg loss: 2.554332, ppl: 12.862699 +epoch: 1, batch: 28080, sum loss: 5317.282715, avg loss: 3.087853, ppl: 21.929943 +epoch: 1, batch: 28081, sum loss: 4992.109375, avg loss: 2.758072, ppl: 15.769404 +epoch: 1, batch: 28082, sum loss: 5062.533203, avg loss: 2.627158, ppl: 13.834395 +epoch: 1, batch: 28083, sum loss: 3914.521729, avg loss: 2.683017, ppl: 14.629163 +epoch: 1, batch: 28084, sum loss: 5448.209961, avg loss: 2.884177, ppl: 17.888834 +epoch: 1, batch: 28085, sum loss: 4106.146973, avg loss: 2.494622, ppl: 12.117146 +epoch: 1, batch: 28086, sum loss: 3744.195068, avg loss: 2.395518, ppl: 10.973885 +epoch: 1, batch: 28087, sum loss: 4922.981445, avg loss: 2.793974, ppl: 16.345844 +epoch: 1, batch: 28088, sum loss: 5040.183594, avg loss: 2.718545, ppl: 15.158257 +epoch: 1, batch: 28089, sum loss: 3913.543457, avg loss: 2.631838, ppl: 13.899295 +epoch: 1, batch: 28090, sum loss: 4781.612305, avg loss: 2.643235, ppl: 14.058613 +epoch: 1, batch: 28091, sum loss: 4663.959961, avg loss: 2.699051, ppl: 14.865616 +epoch: 1, batch: 28092, sum loss: 4739.759277, avg loss: 2.977236, ppl: 19.633471 +epoch: 1, batch: 28093, sum loss: 4329.732910, avg loss: 2.588005, ppl: 13.303209 +epoch: 1, batch: 28094, sum loss: 4567.544922, avg loss: 2.563157, ppl: 12.976715 +epoch: 1, batch: 28095, sum loss: 4946.386230, avg loss: 2.746466, ppl: 15.587455 +epoch: 1, batch: 28096, sum loss: 4155.634766, avg loss: 2.500382, ppl: 12.187148 +epoch: 1, batch: 28097, sum loss: 4518.796387, avg loss: 2.702630, ppl: 14.918910 +epoch: 1, batch: 28098, sum loss: 5170.536133, avg loss: 2.760564, ppl: 15.808754 +epoch: 1, batch: 28099, sum loss: 4747.908203, avg loss: 2.662876, ppl: 14.337470 +epoch: 1, batch: 28100, sum loss: 3757.030029, avg loss: 2.355505, ppl: 10.543449 +epoch: 1, batch: 28101, sum loss: 3705.299805, avg loss: 2.501890, ppl: 12.205545 +epoch: 1, batch: 28102, sum loss: 4037.564697, avg loss: 2.413368, ppl: 11.171523 +epoch: 1, batch: 28103, sum loss: 4790.151367, avg loss: 2.541194, ppl: 12.694825 +epoch: 1, batch: 28104, sum loss: 4463.116699, avg loss: 2.647163, ppl: 14.113939 +epoch: 1, batch: 28105, sum loss: 4546.019043, avg loss: 2.825369, ppl: 16.867170 +epoch: 1, batch: 28106, sum loss: 4000.473877, avg loss: 2.686685, ppl: 14.682919 +epoch: 1, batch: 28107, sum loss: 4371.770996, avg loss: 2.619395, ppl: 13.727423 +epoch: 1, batch: 28108, sum loss: 3929.458496, avg loss: 2.469804, ppl: 11.820133 +epoch: 1, batch: 28109, sum loss: 5185.756836, avg loss: 2.908445, ppl: 18.328274 +epoch: 1, batch: 28110, sum loss: 5119.447266, avg loss: 2.789890, ppl: 16.279222 +epoch: 1, batch: 28111, sum loss: 4623.434082, avg loss: 2.311717, ppl: 10.091738 +epoch: 1, batch: 28112, sum loss: 4955.692871, avg loss: 2.675860, ppl: 14.524835 +epoch: 1, batch: 28113, sum loss: 4357.786133, avg loss: 2.526253, ppl: 12.506556 +epoch: 1, batch: 28114, sum loss: 5181.321777, avg loss: 2.788655, ppl: 16.259142 +epoch: 1, batch: 28115, sum loss: 4165.577148, avg loss: 2.433164, ppl: 11.394880 +epoch: 1, batch: 28116, sum loss: 4281.107910, avg loss: 2.507972, ppl: 12.279998 +epoch: 1, batch: 28117, sum loss: 4047.538086, avg loss: 2.503116, ppl: 12.220512 +epoch: 1, batch: 28118, sum loss: 4518.738770, avg loss: 2.636370, ppl: 13.962434 +epoch: 1, batch: 28119, sum loss: 3481.586670, avg loss: 2.573235, ppl: 13.108158 +epoch: 1, batch: 28120, sum loss: 4274.495605, avg loss: 2.840197, ppl: 17.119131 +epoch: 1, batch: 28121, sum loss: 3904.714355, avg loss: 2.691051, ppl: 14.747160 +epoch: 1, batch: 28122, sum loss: 4531.904297, avg loss: 2.743284, ppl: 15.537920 +epoch: 1, batch: 28123, sum loss: 3719.103516, avg loss: 2.577341, ppl: 13.162098 +epoch: 1, batch: 28124, sum loss: 5105.080078, avg loss: 2.595363, ppl: 13.401456 +epoch: 1, batch: 28125, sum loss: 3843.783936, avg loss: 2.330979, ppl: 10.288005 +epoch: 1, batch: 28126, sum loss: 5239.187012, avg loss: 2.800207, ppl: 16.448050 +epoch: 1, batch: 28127, sum loss: 4762.448730, avg loss: 2.896867, ppl: 18.117287 +epoch: 1, batch: 28128, sum loss: 4481.564453, avg loss: 2.719396, ppl: 15.171154 +epoch: 1, batch: 28129, sum loss: 4555.262207, avg loss: 2.620979, ppl: 13.749182 +epoch: 1, batch: 28130, sum loss: 5235.445312, avg loss: 2.849998, ppl: 17.287739 +epoch: 1, batch: 28131, sum loss: 4695.522461, avg loss: 2.753972, ppl: 15.704892 +epoch: 1, batch: 28132, sum loss: 5242.770508, avg loss: 2.975466, ppl: 19.598749 +epoch: 1, batch: 28133, sum loss: 4650.399414, avg loss: 2.555165, ppl: 12.873418 +epoch: 1, batch: 28134, sum loss: 5503.865723, avg loss: 3.007577, ppl: 20.238302 +epoch: 1, batch: 28135, sum loss: 4735.452148, avg loss: 2.669364, ppl: 14.430792 +epoch: 1, batch: 28136, sum loss: 4426.444336, avg loss: 2.676206, ppl: 14.529860 +epoch: 1, batch: 28137, sum loss: 4022.440430, avg loss: 2.487595, ppl: 12.032299 +epoch: 1, batch: 28138, sum loss: 5720.812012, avg loss: 2.880570, ppl: 17.824429 +epoch: 1, batch: 28139, sum loss: 3741.599609, avg loss: 2.678310, ppl: 14.560471 +epoch: 1, batch: 28140, sum loss: 4521.207520, avg loss: 2.804719, ppl: 16.522436 +epoch: 1, batch: 28141, sum loss: 4336.469727, avg loss: 2.593583, ppl: 13.377613 +epoch: 1, batch: 28142, sum loss: 4231.514160, avg loss: 2.390686, ppl: 10.920983 +epoch: 1, batch: 28143, sum loss: 4184.682129, avg loss: 2.432955, ppl: 11.392495 +epoch: 1, batch: 28144, sum loss: 3856.458740, avg loss: 2.459476, ppl: 11.698683 +epoch: 1, batch: 28145, sum loss: 4083.841797, avg loss: 2.449815, ppl: 11.586204 +epoch: 1, batch: 28146, sum loss: 3627.347900, avg loss: 2.487893, ppl: 12.035888 +epoch: 1, batch: 28147, sum loss: 3952.426270, avg loss: 2.563182, ppl: 12.977040 +epoch: 1, batch: 28148, sum loss: 5050.372559, avg loss: 2.892539, ppl: 18.039049 +epoch: 1, batch: 28149, sum loss: 3674.316895, avg loss: 2.452815, ppl: 11.621012 +epoch: 1, batch: 28150, sum loss: 4482.507324, avg loss: 2.633671, ppl: 13.924788 +epoch: 1, batch: 28151, sum loss: 4932.320801, avg loss: 2.734102, ppl: 15.395919 +epoch: 1, batch: 28152, sum loss: 3723.538086, avg loss: 2.377738, ppl: 10.780493 +epoch: 1, batch: 28153, sum loss: 5242.032715, avg loss: 2.958258, ppl: 19.264381 +epoch: 1, batch: 28154, sum loss: 3756.889160, avg loss: 2.649428, ppl: 14.145950 +epoch: 1, batch: 28155, sum loss: 5163.228027, avg loss: 2.829166, ppl: 16.931337 +epoch: 1, batch: 28156, sum loss: 4081.781494, avg loss: 2.594902, ppl: 13.395278 +epoch: 1, batch: 28157, sum loss: 4401.654785, avg loss: 2.577081, ppl: 13.158674 +epoch: 1, batch: 28158, sum loss: 4957.446777, avg loss: 2.836068, ppl: 17.048601 +epoch: 1, batch: 28159, sum loss: 5062.056641, avg loss: 2.706982, ppl: 14.983988 +epoch: 1, batch: 28160, sum loss: 3261.342285, avg loss: 2.272712, ppl: 9.705689 +epoch: 1, batch: 28161, sum loss: 3658.347412, avg loss: 2.507435, ppl: 12.273406 +epoch: 1, batch: 28162, sum loss: 4291.665527, avg loss: 2.562189, ppl: 12.964160 +epoch: 1, batch: 28163, sum loss: 3793.294189, avg loss: 2.422282, ppl: 11.271554 +epoch: 1, batch: 28164, sum loss: 3668.760742, avg loss: 2.473878, ppl: 11.868382 +epoch: 1, batch: 28165, sum loss: 4054.180664, avg loss: 2.548197, ppl: 12.784028 +epoch: 1, batch: 28166, sum loss: 4366.014160, avg loss: 2.519339, ppl: 12.420380 +epoch: 1, batch: 28167, sum loss: 4994.813477, avg loss: 2.585307, ppl: 13.267364 +epoch: 1, batch: 28168, sum loss: 4946.104980, avg loss: 2.657768, ppl: 14.264409 +epoch: 1, batch: 28169, sum loss: 3388.163818, avg loss: 2.320660, ppl: 10.182394 +epoch: 1, batch: 28170, sum loss: 4982.474609, avg loss: 2.674436, ppl: 14.504169 +epoch: 1, batch: 28171, sum loss: 5619.829102, avg loss: 2.877537, ppl: 17.770447 +epoch: 1, batch: 28172, sum loss: 4553.166992, avg loss: 2.817554, ppl: 16.735861 +epoch: 1, batch: 28173, sum loss: 3863.042725, avg loss: 2.647733, ppl: 14.121991 +epoch: 1, batch: 28174, sum loss: 4597.307617, avg loss: 2.701121, ppl: 14.896420 +epoch: 1, batch: 28175, sum loss: 4906.230957, avg loss: 2.629277, ppl: 13.863743 +epoch: 1, batch: 28176, sum loss: 4503.914062, avg loss: 2.606432, ppl: 13.550612 +epoch: 1, batch: 28177, sum loss: 4160.701172, avg loss: 2.620089, ppl: 13.736947 +epoch: 1, batch: 28178, sum loss: 3504.607422, avg loss: 2.425334, ppl: 11.306005 +epoch: 1, batch: 28179, sum loss: 3178.267578, avg loss: 2.217912, ppl: 9.188126 +epoch: 1, batch: 28180, sum loss: 4725.851562, avg loss: 2.968500, ppl: 19.462696 +epoch: 1, batch: 28181, sum loss: 4528.887695, avg loss: 2.646924, ppl: 14.110575 +epoch: 1, batch: 28182, sum loss: 4714.750977, avg loss: 2.793099, ppl: 16.331553 +epoch: 1, batch: 28183, sum loss: 4185.877441, avg loss: 2.679819, ppl: 14.582456 +epoch: 1, batch: 28184, sum loss: 4284.497559, avg loss: 2.667807, ppl: 14.408332 +epoch: 1, batch: 28185, sum loss: 4276.895508, avg loss: 2.306848, ppl: 10.042716 +epoch: 1, batch: 28186, sum loss: 4932.645020, avg loss: 2.669180, ppl: 14.428136 +epoch: 1, batch: 28187, sum loss: 4676.273438, avg loss: 2.720345, ppl: 15.185564 +epoch: 1, batch: 28188, sum loss: 5041.206055, avg loss: 2.681493, ppl: 14.606879 +epoch: 1, batch: 28189, sum loss: 5082.992188, avg loss: 2.789787, ppl: 16.277557 +epoch: 1, batch: 28190, sum loss: 3942.772705, avg loss: 2.504938, ppl: 12.242802 +epoch: 1, batch: 28191, sum loss: 4450.357422, avg loss: 2.753934, ppl: 15.704289 +epoch: 1, batch: 28192, sum loss: 3850.979004, avg loss: 2.525232, ppl: 12.493794 +epoch: 1, batch: 28193, sum loss: 5398.948730, avg loss: 2.924674, ppl: 18.628157 +epoch: 1, batch: 28194, sum loss: 4101.803711, avg loss: 2.460590, ppl: 11.711720 +epoch: 1, batch: 28195, sum loss: 4809.916992, avg loss: 2.751669, ppl: 15.668760 +epoch: 1, batch: 28196, sum loss: 4649.648438, avg loss: 2.653909, ppl: 14.209475 +epoch: 1, batch: 28197, sum loss: 4331.712402, avg loss: 2.753790, ppl: 15.702036 +epoch: 1, batch: 28198, sum loss: 4625.596680, avg loss: 2.617769, ppl: 13.705107 +epoch: 1, batch: 28199, sum loss: 4348.460938, avg loss: 2.810899, ppl: 16.624861 +epoch: 1, batch: 28200, sum loss: 4128.412109, avg loss: 2.376749, ppl: 10.769829 +epoch: 1, batch: 28201, sum loss: 4646.591309, avg loss: 2.491470, ppl: 12.079018 +epoch: 1, batch: 28202, sum loss: 4190.163574, avg loss: 2.760319, ppl: 15.804876 +epoch: 1, batch: 28203, sum loss: 4605.192383, avg loss: 2.665042, ppl: 14.368552 +epoch: 1, batch: 28204, sum loss: 4892.329590, avg loss: 2.871085, ppl: 17.656172 +epoch: 1, batch: 28205, sum loss: 4425.335938, avg loss: 2.609278, ppl: 13.589238 +epoch: 1, batch: 28206, sum loss: 4322.393555, avg loss: 2.559144, ppl: 12.924747 +epoch: 1, batch: 28207, sum loss: 3860.367188, avg loss: 2.461969, ppl: 11.727880 +epoch: 1, batch: 28208, sum loss: 3691.793945, avg loss: 2.716552, ppl: 15.128067 +epoch: 1, batch: 28209, sum loss: 4389.702148, avg loss: 2.614474, ppl: 13.660030 +epoch: 1, batch: 28210, sum loss: 4869.799805, avg loss: 2.700943, ppl: 14.893766 +epoch: 1, batch: 28211, sum loss: 3947.455566, avg loss: 2.561620, ppl: 12.956784 +epoch: 1, batch: 28212, sum loss: 4550.094727, avg loss: 2.682839, ppl: 14.626558 +epoch: 1, batch: 28213, sum loss: 4091.668213, avg loss: 2.574996, ppl: 13.131262 +epoch: 1, batch: 28214, sum loss: 3334.941650, avg loss: 2.406163, ppl: 11.091319 +epoch: 1, batch: 28215, sum loss: 4712.614258, avg loss: 2.659489, ppl: 14.288984 +epoch: 1, batch: 28216, sum loss: 5039.978516, avg loss: 2.649831, ppl: 14.151648 +epoch: 1, batch: 28217, sum loss: 4161.686035, avg loss: 2.489047, ppl: 12.049785 +epoch: 1, batch: 28218, sum loss: 3773.207764, avg loss: 2.346522, ppl: 10.449165 +epoch: 1, batch: 28219, sum loss: 3435.130371, avg loss: 2.414006, ppl: 11.178651 +epoch: 1, batch: 28220, sum loss: 5216.272461, avg loss: 3.001307, ppl: 20.111816 +epoch: 1, batch: 28221, sum loss: 3929.453613, avg loss: 2.559905, ppl: 12.934583 +epoch: 1, batch: 28222, sum loss: 3732.187500, avg loss: 2.496446, ppl: 12.139278 +epoch: 1, batch: 28223, sum loss: 5804.249023, avg loss: 2.907940, ppl: 18.319029 +epoch: 1, batch: 28224, sum loss: 3861.572021, avg loss: 2.610934, ppl: 13.611762 +epoch: 1, batch: 28225, sum loss: 4781.696777, avg loss: 2.754434, ppl: 15.712139 +epoch: 1, batch: 28226, sum loss: 5362.874023, avg loss: 2.733371, ppl: 15.384662 +epoch: 1, batch: 28227, sum loss: 4114.060547, avg loss: 2.652521, ppl: 14.189768 +epoch: 1, batch: 28228, sum loss: 3971.646973, avg loss: 2.468395, ppl: 11.803481 +epoch: 1, batch: 28229, sum loss: 4237.935059, avg loss: 2.552973, ppl: 12.845237 +epoch: 1, batch: 28230, sum loss: 4064.833984, avg loss: 2.410934, ppl: 11.144362 +epoch: 1, batch: 28231, sum loss: 4890.832031, avg loss: 2.780462, ppl: 16.126467 +epoch: 1, batch: 28232, sum loss: 3686.277832, avg loss: 2.249102, ppl: 9.479217 +epoch: 1, batch: 28233, sum loss: 4046.010742, avg loss: 2.547866, ppl: 12.779800 +epoch: 1, batch: 28234, sum loss: 4438.845703, avg loss: 2.499350, ppl: 12.174579 +epoch: 1, batch: 28235, sum loss: 4245.263672, avg loss: 2.509021, ppl: 12.292890 +epoch: 1, batch: 28236, sum loss: 4232.787109, avg loss: 2.470979, ppl: 11.834026 +epoch: 1, batch: 28237, sum loss: 4124.750488, avg loss: 2.759030, ppl: 15.784530 +epoch: 1, batch: 28238, sum loss: 4721.584473, avg loss: 2.554970, ppl: 12.870914 +epoch: 1, batch: 28239, sum loss: 3877.365723, avg loss: 2.446287, ppl: 11.545403 +epoch: 1, batch: 28240, sum loss: 3959.246582, avg loss: 2.537979, ppl: 12.654067 +epoch: 1, batch: 28241, sum loss: 4686.336914, avg loss: 2.764801, ppl: 15.875874 +epoch: 1, batch: 28242, sum loss: 4408.645996, avg loss: 2.519226, ppl: 12.418984 +epoch: 1, batch: 28243, sum loss: 4711.689941, avg loss: 2.791286, ppl: 16.301962 +epoch: 1, batch: 28244, sum loss: 3768.776855, avg loss: 2.348147, ppl: 10.466162 +epoch: 1, batch: 28245, sum loss: 4942.435059, avg loss: 2.933196, ppl: 18.787577 +epoch: 1, batch: 28246, sum loss: 4320.221680, avg loss: 2.741257, ppl: 15.506471 +epoch: 1, batch: 28247, sum loss: 4108.836914, avg loss: 2.405642, ppl: 11.085546 +epoch: 1, batch: 28248, sum loss: 4763.170410, avg loss: 2.833534, ppl: 17.005447 +epoch: 1, batch: 28249, sum loss: 4341.820312, avg loss: 2.521382, ppl: 12.445789 +epoch: 1, batch: 28250, sum loss: 3720.976318, avg loss: 2.435194, ppl: 11.418034 +epoch: 1, batch: 28251, sum loss: 4836.675781, avg loss: 2.795766, ppl: 16.375174 +epoch: 1, batch: 28252, sum loss: 3518.757568, avg loss: 2.334942, ppl: 10.328862 +epoch: 1, batch: 28253, sum loss: 4584.134766, avg loss: 2.685492, ppl: 14.665416 +epoch: 1, batch: 28254, sum loss: 4277.510742, avg loss: 2.517664, ppl: 12.399597 +epoch: 1, batch: 28255, sum loss: 3815.073486, avg loss: 2.347738, ppl: 10.461873 +epoch: 1, batch: 28256, sum loss: 4270.085938, avg loss: 2.728489, ppl: 15.309743 +epoch: 1, batch: 28257, sum loss: 4048.970947, avg loss: 2.597159, ppl: 13.425544 +epoch: 1, batch: 28258, sum loss: 5860.400879, avg loss: 2.931666, ppl: 18.758863 +epoch: 1, batch: 28259, sum loss: 4848.400391, avg loss: 2.843637, ppl: 17.178125 +epoch: 1, batch: 28260, sum loss: 4465.800781, avg loss: 2.389406, ppl: 10.907018 +epoch: 1, batch: 28261, sum loss: 4831.183594, avg loss: 2.679525, ppl: 14.578169 +epoch: 1, batch: 28262, sum loss: 3942.273682, avg loss: 2.468550, ppl: 11.805313 +epoch: 1, batch: 28263, sum loss: 4059.794678, avg loss: 2.470964, ppl: 11.833855 +epoch: 1, batch: 28264, sum loss: 4772.310059, avg loss: 2.521030, ppl: 12.441404 +epoch: 1, batch: 28265, sum loss: 4838.985352, avg loss: 2.851494, ppl: 17.313631 +epoch: 1, batch: 28266, sum loss: 6071.982910, avg loss: 2.939004, ppl: 18.897024 +epoch: 1, batch: 28267, sum loss: 4783.041992, avg loss: 2.706872, ppl: 14.982330 +epoch: 1, batch: 28268, sum loss: 4779.356934, avg loss: 2.564033, ppl: 12.988091 +epoch: 1, batch: 28269, sum loss: 4359.765625, avg loss: 2.572133, ppl: 13.093724 +epoch: 1, batch: 28270, sum loss: 4469.972656, avg loss: 2.555731, ppl: 12.880707 +epoch: 1, batch: 28271, sum loss: 5085.723145, avg loss: 2.672477, ppl: 14.475778 +epoch: 1, batch: 28272, sum loss: 4978.837402, avg loss: 2.684009, ppl: 14.643687 +epoch: 1, batch: 28273, sum loss: 4777.793457, avg loss: 2.607966, ppl: 13.571418 +epoch: 1, batch: 28274, sum loss: 4974.918945, avg loss: 2.765380, ppl: 15.885077 +epoch: 1, batch: 28275, sum loss: 4216.490234, avg loss: 2.575742, ppl: 13.141070 +epoch: 1, batch: 28276, sum loss: 4052.565918, avg loss: 2.612873, ppl: 13.638175 +epoch: 1, batch: 28277, sum loss: 4793.320312, avg loss: 2.679329, ppl: 14.575315 +epoch: 1, batch: 28278, sum loss: 3998.240234, avg loss: 2.511457, ppl: 12.322877 +epoch: 1, batch: 28279, sum loss: 4874.840820, avg loss: 2.834210, ppl: 17.016947 +epoch: 1, batch: 28280, sum loss: 4996.471680, avg loss: 2.728821, ppl: 15.314824 +epoch: 1, batch: 28281, sum loss: 4345.525879, avg loss: 2.680768, ppl: 14.596306 +epoch: 1, batch: 28282, sum loss: 4217.659668, avg loss: 2.710578, ppl: 15.037968 +epoch: 1, batch: 28283, sum loss: 4480.780762, avg loss: 2.843135, ppl: 17.169506 +epoch: 1, batch: 28284, sum loss: 3827.436279, avg loss: 2.539772, ppl: 12.676781 +epoch: 1, batch: 28285, sum loss: 5053.461426, avg loss: 2.578296, ppl: 13.174675 +epoch: 1, batch: 28286, sum loss: 4247.200684, avg loss: 2.492489, ppl: 12.091330 +epoch: 1, batch: 28287, sum loss: 4043.873779, avg loss: 2.431674, ppl: 11.377913 +epoch: 1, batch: 28288, sum loss: 4982.713379, avg loss: 2.808745, ppl: 16.589081 +epoch: 1, batch: 28289, sum loss: 4527.867676, avg loss: 2.584399, ppl: 13.255326 +epoch: 1, batch: 28290, sum loss: 3943.495361, avg loss: 2.569052, ppl: 13.053447 +epoch: 1, batch: 28291, sum loss: 4547.128906, avg loss: 2.531809, ppl: 12.576237 +epoch: 1, batch: 28292, sum loss: 4265.955078, avg loss: 2.488889, ppl: 12.047880 +epoch: 1, batch: 28293, sum loss: 4217.036133, avg loss: 2.471885, ppl: 11.844755 +epoch: 1, batch: 28294, sum loss: 4780.195312, avg loss: 2.850444, ppl: 17.295465 +epoch: 1, batch: 28295, sum loss: 4610.173340, avg loss: 2.497385, ppl: 12.150681 +epoch: 1, batch: 28296, sum loss: 4404.264648, avg loss: 2.680624, ppl: 14.594191 +epoch: 1, batch: 28297, sum loss: 4655.954590, avg loss: 2.643926, ppl: 14.068333 +epoch: 1, batch: 28298, sum loss: 3959.019043, avg loss: 2.689551, ppl: 14.725064 +epoch: 1, batch: 28299, sum loss: 3877.205322, avg loss: 2.550793, ppl: 12.817266 +epoch: 1, batch: 28300, sum loss: 4745.272461, avg loss: 2.590214, ppl: 13.332628 +epoch: 1, batch: 28301, sum loss: 4624.473145, avg loss: 2.517405, ppl: 12.396390 +epoch: 1, batch: 28302, sum loss: 4255.176758, avg loss: 2.772102, ppl: 15.992216 +epoch: 1, batch: 28303, sum loss: 3738.570068, avg loss: 2.470965, ppl: 11.833862 +epoch: 1, batch: 28304, sum loss: 4154.267090, avg loss: 2.725897, ppl: 15.270106 +epoch: 1, batch: 28305, sum loss: 4683.473633, avg loss: 2.835033, ppl: 17.030958 +epoch: 1, batch: 28306, sum loss: 4646.145508, avg loss: 2.780458, ppl: 16.126408 +epoch: 1, batch: 28307, sum loss: 4170.116699, avg loss: 2.652746, ppl: 14.192959 +epoch: 1, batch: 28308, sum loss: 4827.642090, avg loss: 2.697007, ppl: 14.835258 +epoch: 1, batch: 28309, sum loss: 5289.314941, avg loss: 2.845247, ppl: 17.205812 +epoch: 1, batch: 28310, sum loss: 3974.908447, avg loss: 2.559503, ppl: 12.929394 +epoch: 1, batch: 28311, sum loss: 4151.016602, avg loss: 2.613991, ppl: 13.653427 +epoch: 1, batch: 28312, sum loss: 4497.066895, avg loss: 2.755556, ppl: 15.729782 +epoch: 1, batch: 28313, sum loss: 4609.121094, avg loss: 2.740262, ppl: 15.491048 +epoch: 1, batch: 28314, sum loss: 5589.868164, avg loss: 2.979674, ppl: 19.681396 +epoch: 1, batch: 28315, sum loss: 3991.757568, avg loss: 2.464048, ppl: 11.752288 +epoch: 1, batch: 28316, sum loss: 4267.624512, avg loss: 2.673950, ppl: 14.497123 +epoch: 1, batch: 28317, sum loss: 4548.564453, avg loss: 2.667780, ppl: 14.407947 +epoch: 1, batch: 28318, sum loss: 5263.725586, avg loss: 2.837588, ppl: 17.074530 +epoch: 1, batch: 28319, sum loss: 4744.145508, avg loss: 2.766265, ppl: 15.899146 +epoch: 1, batch: 28320, sum loss: 4964.237305, avg loss: 2.653254, ppl: 14.200165 +epoch: 1, batch: 28321, sum loss: 4698.096191, avg loss: 2.624635, ppl: 13.799533 +epoch: 1, batch: 28322, sum loss: 4907.635254, avg loss: 2.708408, ppl: 15.005366 +epoch: 1, batch: 28323, sum loss: 4493.175781, avg loss: 2.483790, ppl: 11.986607 +epoch: 1, batch: 28324, sum loss: 4247.692383, avg loss: 2.513427, ppl: 12.347178 +epoch: 1, batch: 28325, sum loss: 5007.694336, avg loss: 2.824419, ppl: 16.851149 +epoch: 1, batch: 28326, sum loss: 5572.185547, avg loss: 2.747626, ppl: 15.605541 +epoch: 1, batch: 28327, sum loss: 3945.889404, avg loss: 2.440253, ppl: 11.475946 +epoch: 1, batch: 28328, sum loss: 5006.100586, avg loss: 2.732588, ppl: 15.372617 +epoch: 1, batch: 28329, sum loss: 5165.892578, avg loss: 2.694780, ppl: 14.802260 +epoch: 1, batch: 28330, sum loss: 3936.433105, avg loss: 2.457199, ppl: 11.672073 +epoch: 1, batch: 28331, sum loss: 3488.377930, avg loss: 2.436018, ppl: 11.427448 +epoch: 1, batch: 28332, sum loss: 5525.251465, avg loss: 3.019263, ppl: 20.476200 +epoch: 1, batch: 28333, sum loss: 5537.962402, avg loss: 2.882854, ppl: 17.865187 +epoch: 1, batch: 28334, sum loss: 5095.044922, avg loss: 2.876931, ppl: 17.759684 +epoch: 1, batch: 28335, sum loss: 4489.400391, avg loss: 2.462644, ppl: 11.735804 +epoch: 1, batch: 28336, sum loss: 4054.661133, avg loss: 2.530999, ppl: 12.566059 +epoch: 1, batch: 28337, sum loss: 3706.998535, avg loss: 2.640312, ppl: 14.017583 +epoch: 1, batch: 28338, sum loss: 4668.890625, avg loss: 2.731943, ppl: 15.362710 +epoch: 1, batch: 28339, sum loss: 4103.291992, avg loss: 2.685400, ppl: 14.664073 +epoch: 1, batch: 28340, sum loss: 4576.636719, avg loss: 2.660835, ppl: 14.308235 +epoch: 1, batch: 28341, sum loss: 3396.323242, avg loss: 2.345527, ppl: 10.438774 +epoch: 1, batch: 28342, sum loss: 4401.796875, avg loss: 2.499601, ppl: 12.177629 +epoch: 1, batch: 28343, sum loss: 4116.548340, avg loss: 2.410157, ppl: 11.135709 +epoch: 1, batch: 28344, sum loss: 4581.682129, avg loss: 2.616609, ppl: 13.689226 +epoch: 1, batch: 28345, sum loss: 3959.740723, avg loss: 2.571260, ppl: 13.082300 +epoch: 1, batch: 28346, sum loss: 5132.724121, avg loss: 2.865843, ppl: 17.563847 +epoch: 1, batch: 28347, sum loss: 3778.820801, avg loss: 2.396209, ppl: 10.981462 +epoch: 1, batch: 28348, sum loss: 4402.790527, avg loss: 2.636402, ppl: 13.962870 +epoch: 1, batch: 28349, sum loss: 4048.240967, avg loss: 2.419749, ppl: 11.243043 +epoch: 1, batch: 28350, sum loss: 4668.570801, avg loss: 2.590772, ppl: 13.340065 +epoch: 1, batch: 28351, sum loss: 3387.461670, avg loss: 2.337793, ppl: 10.358349 +epoch: 1, batch: 28352, sum loss: 4263.530762, avg loss: 2.590238, ppl: 13.332939 +epoch: 1, batch: 28353, sum loss: 4209.178711, avg loss: 2.460070, ppl: 11.705626 +epoch: 1, batch: 28354, sum loss: 4986.967773, avg loss: 2.823878, ppl: 16.842030 +epoch: 1, batch: 28355, sum loss: 4413.389160, avg loss: 2.622334, ppl: 13.767827 +epoch: 1, batch: 28356, sum loss: 4006.578857, avg loss: 2.569967, ppl: 13.065397 +epoch: 1, batch: 28357, sum loss: 3913.366699, avg loss: 2.405265, ppl: 11.081370 +epoch: 1, batch: 28358, sum loss: 3871.842773, avg loss: 2.488331, ppl: 12.041161 +epoch: 1, batch: 28359, sum loss: 3827.304199, avg loss: 2.558358, ppl: 12.914599 +epoch: 1, batch: 28360, sum loss: 4888.708984, avg loss: 2.705428, ppl: 14.960720 +epoch: 1, batch: 28361, sum loss: 4142.800781, avg loss: 2.424108, ppl: 11.292155 +epoch: 1, batch: 28362, sum loss: 4194.054199, avg loss: 2.744800, ppl: 15.561499 +epoch: 1, batch: 28363, sum loss: 4185.635254, avg loss: 2.352802, ppl: 10.514995 +epoch: 1, batch: 28364, sum loss: 4595.123047, avg loss: 2.640875, ppl: 14.025476 +epoch: 1, batch: 28365, sum loss: 4942.737793, avg loss: 2.619363, ppl: 13.726974 +epoch: 1, batch: 28366, sum loss: 4516.929688, avg loss: 2.668004, ppl: 14.411169 +epoch: 1, batch: 28367, sum loss: 4129.377930, avg loss: 2.574425, ppl: 13.123772 +epoch: 1, batch: 28368, sum loss: 4628.794922, avg loss: 2.533550, ppl: 12.598145 +epoch: 1, batch: 28369, sum loss: 5226.401855, avg loss: 2.719252, ppl: 15.168970 +epoch: 1, batch: 28370, sum loss: 3537.531982, avg loss: 2.377374, ppl: 10.776563 +epoch: 1, batch: 28371, sum loss: 4332.051758, avg loss: 2.672456, ppl: 14.475481 +epoch: 1, batch: 28372, sum loss: 4023.641846, avg loss: 2.390756, ppl: 10.921744 +epoch: 1, batch: 28373, sum loss: 4243.969727, avg loss: 2.657464, ppl: 14.260077 +epoch: 1, batch: 28374, sum loss: 4665.076172, avg loss: 2.409647, ppl: 11.130029 +epoch: 1, batch: 28375, sum loss: 5088.751465, avg loss: 2.941475, ppl: 18.943762 +epoch: 1, batch: 28376, sum loss: 3935.995361, avg loss: 2.596303, ppl: 13.414058 +epoch: 1, batch: 28377, sum loss: 4346.207520, avg loss: 2.865002, ppl: 17.549082 +epoch: 1, batch: 28378, sum loss: 4057.574463, avg loss: 2.406628, ppl: 11.096479 +epoch: 1, batch: 28379, sum loss: 4325.040039, avg loss: 2.481377, ppl: 11.957718 +epoch: 1, batch: 28380, sum loss: 5435.521484, avg loss: 2.933363, ppl: 18.790716 +epoch: 1, batch: 28381, sum loss: 3894.333008, avg loss: 2.418840, ppl: 11.232826 +epoch: 1, batch: 28382, sum loss: 4585.349609, avg loss: 2.338271, ppl: 10.363304 +epoch: 1, batch: 28383, sum loss: 4431.375977, avg loss: 2.829742, ppl: 16.941090 +epoch: 1, batch: 28384, sum loss: 5030.295898, avg loss: 3.126349, ppl: 22.790625 +epoch: 1, batch: 28385, sum loss: 4897.434570, avg loss: 2.598109, ppl: 13.438296 +epoch: 1, batch: 28386, sum loss: 4349.086914, avg loss: 2.678009, ppl: 14.556084 +epoch: 1, batch: 28387, sum loss: 4772.413086, avg loss: 2.765013, ppl: 15.879250 +epoch: 1, batch: 28388, sum loss: 4381.005859, avg loss: 2.705995, ppl: 14.969205 +epoch: 1, batch: 28389, sum loss: 3741.972900, avg loss: 2.387985, ppl: 10.891530 +epoch: 1, batch: 28390, sum loss: 5088.915039, avg loss: 2.881605, ppl: 17.842896 +epoch: 1, batch: 28391, sum loss: 4337.893066, avg loss: 2.770046, ppl: 15.959375 +epoch: 1, batch: 28392, sum loss: 4494.170898, avg loss: 2.529078, ppl: 12.541931 +epoch: 1, batch: 28393, sum loss: 4008.942139, avg loss: 2.589756, ppl: 13.326520 +epoch: 1, batch: 28394, sum loss: 5468.652344, avg loss: 2.516637, ppl: 12.386870 +epoch: 1, batch: 28395, sum loss: 4371.043457, avg loss: 2.754281, ppl: 15.709739 +epoch: 1, batch: 28396, sum loss: 4312.359863, avg loss: 2.652128, ppl: 14.184187 +epoch: 1, batch: 28397, sum loss: 4225.985352, avg loss: 2.444179, ppl: 11.521088 +epoch: 1, batch: 28398, sum loss: 3757.988281, avg loss: 2.410512, ppl: 11.139666 +epoch: 1, batch: 28399, sum loss: 3858.324707, avg loss: 2.351203, ppl: 10.498194 +epoch: 1, batch: 28400, sum loss: 5269.929199, avg loss: 2.710869, ppl: 15.042343 +epoch: 1, batch: 28401, sum loss: 4885.806152, avg loss: 2.772875, ppl: 16.004585 +epoch: 1, batch: 28402, sum loss: 3776.885742, avg loss: 2.460512, ppl: 11.710805 +epoch: 1, batch: 28403, sum loss: 4789.220703, avg loss: 2.719603, ppl: 15.174294 +epoch: 1, batch: 28404, sum loss: 4070.281982, avg loss: 2.637902, ppl: 13.983829 +epoch: 1, batch: 28405, sum loss: 4682.216309, avg loss: 2.612844, ppl: 13.637781 +epoch: 1, batch: 28406, sum loss: 4798.652832, avg loss: 2.612223, ppl: 13.629311 +epoch: 1, batch: 28407, sum loss: 4409.032227, avg loss: 2.716594, ppl: 15.128706 +epoch: 1, batch: 28408, sum loss: 4940.356934, avg loss: 2.781733, ppl: 16.146973 +epoch: 1, batch: 28409, sum loss: 4657.603516, avg loss: 2.795680, ppl: 16.373764 +epoch: 1, batch: 28410, sum loss: 6349.467773, avg loss: 3.067376, ppl: 21.485449 +epoch: 1, batch: 28411, sum loss: 3954.742188, avg loss: 2.625991, ppl: 13.818259 +epoch: 1, batch: 28412, sum loss: 4390.521484, avg loss: 2.503148, ppl: 12.220903 +epoch: 1, batch: 28413, sum loss: 4209.262207, avg loss: 2.669158, ppl: 14.427815 +epoch: 1, batch: 28414, sum loss: 3616.535156, avg loss: 2.412632, ppl: 11.163304 +epoch: 1, batch: 28415, sum loss: 4527.115234, avg loss: 2.569305, ppl: 13.056746 +epoch: 1, batch: 28416, sum loss: 4762.682617, avg loss: 2.712234, ppl: 15.062889 +epoch: 1, batch: 28417, sum loss: 4469.843262, avg loss: 2.621609, ppl: 13.757842 +epoch: 1, batch: 28418, sum loss: 4590.808105, avg loss: 2.632344, ppl: 13.906328 +epoch: 1, batch: 28419, sum loss: 3183.889404, avg loss: 2.377811, ppl: 10.781281 +epoch: 1, batch: 28420, sum loss: 3754.251709, avg loss: 2.546982, ppl: 12.768514 +epoch: 1, batch: 28421, sum loss: 4086.585449, avg loss: 2.634807, ppl: 13.940619 +epoch: 1, batch: 28422, sum loss: 4280.955078, avg loss: 2.503483, ppl: 12.224995 +epoch: 1, batch: 28423, sum loss: 4057.172852, avg loss: 2.627703, ppl: 13.841934 +epoch: 1, batch: 28424, sum loss: 4553.100586, avg loss: 2.568021, ppl: 13.039990 +epoch: 1, batch: 28425, sum loss: 4379.116699, avg loss: 2.748975, ppl: 15.626603 +epoch: 1, batch: 28426, sum loss: 3964.518555, avg loss: 2.463964, ppl: 11.751304 +epoch: 1, batch: 28427, sum loss: 4421.037598, avg loss: 2.771811, ppl: 15.987554 +epoch: 1, batch: 28428, sum loss: 3824.284424, avg loss: 2.437402, ppl: 11.443278 +epoch: 1, batch: 28429, sum loss: 4377.164062, avg loss: 2.564244, ppl: 12.990831 +epoch: 1, batch: 28430, sum loss: 4654.466309, avg loss: 2.665788, ppl: 14.379282 +epoch: 1, batch: 28431, sum loss: 4182.354492, avg loss: 2.638709, ppl: 13.995129 +epoch: 1, batch: 28432, sum loss: 4852.778809, avg loss: 2.606219, ppl: 13.547724 +epoch: 1, batch: 28433, sum loss: 3911.283691, avg loss: 2.376236, ppl: 10.764307 +epoch: 1, batch: 28434, sum loss: 5285.649414, avg loss: 3.030762, ppl: 20.713015 +epoch: 1, batch: 28435, sum loss: 5317.827148, avg loss: 2.748231, ppl: 15.614984 +epoch: 1, batch: 28436, sum loss: 4735.508789, avg loss: 2.660398, ppl: 14.301984 +epoch: 1, batch: 28437, sum loss: 5083.478027, avg loss: 2.886699, ppl: 17.934019 +epoch: 1, batch: 28438, sum loss: 4373.459961, avg loss: 2.416276, ppl: 11.204062 +epoch: 1, batch: 28439, sum loss: 4543.270508, avg loss: 2.388681, ppl: 10.899105 +epoch: 1, batch: 28440, sum loss: 3912.002686, avg loss: 2.438905, ppl: 11.460479 +epoch: 1, batch: 28441, sum loss: 3842.366699, avg loss: 2.539568, ppl: 12.674200 +epoch: 1, batch: 28442, sum loss: 5005.497070, avg loss: 2.901738, ppl: 18.205753 +epoch: 1, batch: 28443, sum loss: 3770.867676, avg loss: 2.620478, ppl: 13.742287 +epoch: 1, batch: 28444, sum loss: 4451.640137, avg loss: 2.492520, ppl: 12.091707 +epoch: 1, batch: 28445, sum loss: 3497.758057, avg loss: 2.540129, ppl: 12.681313 +epoch: 1, batch: 28446, sum loss: 4689.414062, avg loss: 2.419719, ppl: 11.242694 +epoch: 1, batch: 28447, sum loss: 3521.039551, avg loss: 2.375870, ppl: 10.760373 +epoch: 1, batch: 28448, sum loss: 4031.621338, avg loss: 2.327726, ppl: 10.254595 +epoch: 1, batch: 28449, sum loss: 3998.704590, avg loss: 2.767270, ppl: 15.915120 +epoch: 1, batch: 28450, sum loss: 4924.823242, avg loss: 2.647754, ppl: 14.122291 +epoch: 1, batch: 28451, sum loss: 4107.687500, avg loss: 2.504688, ppl: 12.239734 +epoch: 1, batch: 28452, sum loss: 4386.854492, avg loss: 2.496787, ppl: 12.143413 +epoch: 1, batch: 28453, sum loss: 4950.922852, avg loss: 2.661787, ppl: 14.321853 +epoch: 1, batch: 28454, sum loss: 3427.034912, avg loss: 2.476181, ppl: 11.895751 +epoch: 1, batch: 28455, sum loss: 4262.140625, avg loss: 2.541527, ppl: 12.699045 +epoch: 1, batch: 28456, sum loss: 4467.395508, avg loss: 2.745787, ppl: 15.576867 +epoch: 1, batch: 28457, sum loss: 4132.395996, avg loss: 2.468576, ppl: 11.805623 +epoch: 1, batch: 28458, sum loss: 4027.332275, avg loss: 2.578318, ppl: 13.174954 +epoch: 1, batch: 28459, sum loss: 5014.351074, avg loss: 2.694439, ppl: 14.797217 +epoch: 1, batch: 28460, sum loss: 5112.812988, avg loss: 2.827883, ppl: 16.909630 +epoch: 1, batch: 28461, sum loss: 4076.370605, avg loss: 2.635017, ppl: 13.943545 +epoch: 1, batch: 28462, sum loss: 3907.128174, avg loss: 2.496567, ppl: 12.140749 +epoch: 1, batch: 28463, sum loss: 4419.821289, avg loss: 2.740125, ppl: 15.488920 +epoch: 1, batch: 28464, sum loss: 5321.695801, avg loss: 2.832196, ppl: 16.982710 +epoch: 1, batch: 28465, sum loss: 3610.675537, avg loss: 2.392761, ppl: 10.943663 +epoch: 1, batch: 28466, sum loss: 4370.603027, avg loss: 2.647246, ppl: 14.115114 +epoch: 1, batch: 28467, sum loss: 5876.246582, avg loss: 3.004216, ppl: 20.170395 +epoch: 1, batch: 28468, sum loss: 4859.925781, avg loss: 2.601673, ppl: 13.486286 +epoch: 1, batch: 28469, sum loss: 4001.772461, avg loss: 2.379175, ppl: 10.795992 +epoch: 1, batch: 28470, sum loss: 4183.577148, avg loss: 2.462376, ppl: 11.732659 +epoch: 1, batch: 28471, sum loss: 4091.981201, avg loss: 2.374917, ppl: 10.750116 +epoch: 1, batch: 28472, sum loss: 3390.982910, avg loss: 2.543873, ppl: 12.728875 +epoch: 1, batch: 28473, sum loss: 5077.639648, avg loss: 2.738748, ppl: 15.467612 +epoch: 1, batch: 28474, sum loss: 4263.350098, avg loss: 2.283530, ppl: 9.811251 +epoch: 1, batch: 28475, sum loss: 4660.329590, avg loss: 2.907255, ppl: 18.306477 +epoch: 1, batch: 28476, sum loss: 4142.595215, avg loss: 2.425407, ppl: 11.306829 +epoch: 1, batch: 28477, sum loss: 4384.947266, avg loss: 2.680286, ppl: 14.589261 +epoch: 1, batch: 28478, sum loss: 4038.150879, avg loss: 2.690307, ppl: 14.736201 +epoch: 1, batch: 28479, sum loss: 5186.744629, avg loss: 2.606404, ppl: 13.550241 +epoch: 1, batch: 28480, sum loss: 3905.671387, avg loss: 2.276032, ppl: 9.737966 +epoch: 1, batch: 28481, sum loss: 4465.781250, avg loss: 2.515933, ppl: 12.378153 +epoch: 1, batch: 28482, sum loss: 4765.841797, avg loss: 2.694088, ppl: 14.792026 +epoch: 1, batch: 28483, sum loss: 4601.721191, avg loss: 2.672312, ppl: 14.473390 +epoch: 1, batch: 28484, sum loss: 3648.142578, avg loss: 2.456661, ppl: 11.665800 +epoch: 1, batch: 28485, sum loss: 3693.573242, avg loss: 2.398424, ppl: 11.005819 +epoch: 1, batch: 28486, sum loss: 4489.370117, avg loss: 2.622296, ppl: 13.767292 +epoch: 1, batch: 28487, sum loss: 3571.673828, avg loss: 2.403549, ppl: 11.062366 +epoch: 1, batch: 28488, sum loss: 4086.259033, avg loss: 2.457161, ppl: 11.671631 +epoch: 1, batch: 28489, sum loss: 4414.918945, avg loss: 2.424448, ppl: 11.295987 +epoch: 1, batch: 28490, sum loss: 3660.293945, avg loss: 2.531324, ppl: 12.570134 +epoch: 1, batch: 28491, sum loss: 3707.203857, avg loss: 2.528789, ppl: 12.538307 +epoch: 1, batch: 28492, sum loss: 4200.562012, avg loss: 2.819169, ppl: 16.762920 +epoch: 1, batch: 28493, sum loss: 4942.419922, avg loss: 2.832332, ppl: 16.985029 +epoch: 1, batch: 28494, sum loss: 5134.925781, avg loss: 2.610537, ppl: 13.606350 +epoch: 1, batch: 28495, sum loss: 3673.465820, avg loss: 2.383820, ppl: 10.846252 +epoch: 1, batch: 28496, sum loss: 3828.104736, avg loss: 2.546976, ppl: 12.768432 +epoch: 1, batch: 28497, sum loss: 4960.354004, avg loss: 2.763428, ppl: 15.854105 +epoch: 1, batch: 28498, sum loss: 4269.437500, avg loss: 2.954628, ppl: 19.194580 +epoch: 1, batch: 28499, sum loss: 4349.308594, avg loss: 2.730263, ppl: 15.336916 +epoch: 1, batch: 28500, sum loss: 4392.273438, avg loss: 2.583690, ppl: 13.245931 +epoch: 1, batch: 28501, sum loss: 5131.311035, avg loss: 2.504300, ppl: 12.234993 +epoch: 1, batch: 28502, sum loss: 4552.173828, avg loss: 2.748897, ppl: 15.625393 +epoch: 1, batch: 28503, sum loss: 3774.006104, avg loss: 2.484533, ppl: 11.995521 +epoch: 1, batch: 28504, sum loss: 5598.662109, avg loss: 2.785404, ppl: 16.206367 +epoch: 1, batch: 28505, sum loss: 4845.044922, avg loss: 2.595096, ppl: 13.397879 +epoch: 1, batch: 28506, sum loss: 4573.850098, avg loss: 2.732288, ppl: 15.368011 +epoch: 1, batch: 28507, sum loss: 4139.604492, avg loss: 2.610091, ppl: 13.600291 +epoch: 1, batch: 28508, sum loss: 3319.030273, avg loss: 2.112686, ppl: 8.270429 +epoch: 1, batch: 28509, sum loss: 4579.222168, avg loss: 2.563954, ppl: 12.987071 +epoch: 1, batch: 28510, sum loss: 4492.475586, avg loss: 2.934341, ppl: 18.809099 +epoch: 1, batch: 28511, sum loss: 3946.944336, avg loss: 2.501232, ppl: 12.197514 +epoch: 1, batch: 28512, sum loss: 4048.142822, avg loss: 2.393934, ppl: 10.956517 +epoch: 1, batch: 28513, sum loss: 3718.627441, avg loss: 2.451303, ppl: 11.603462 +epoch: 1, batch: 28514, sum loss: 5581.458984, avg loss: 2.759001, ppl: 15.784063 +epoch: 1, batch: 28515, sum loss: 3556.409180, avg loss: 2.601616, ppl: 13.485515 +epoch: 1, batch: 28516, sum loss: 4893.185547, avg loss: 2.599992, ppl: 13.463634 +epoch: 1, batch: 28517, sum loss: 4861.075195, avg loss: 2.834446, ppl: 17.020967 +epoch: 1, batch: 28518, sum loss: 4929.880371, avg loss: 2.590584, ppl: 13.337553 +epoch: 1, batch: 28519, sum loss: 4574.791504, avg loss: 3.007753, ppl: 20.241858 +epoch: 1, batch: 28520, sum loss: 4849.631348, avg loss: 2.743004, ppl: 15.533583 +epoch: 1, batch: 28521, sum loss: 4555.441406, avg loss: 2.559237, ppl: 12.925948 +epoch: 1, batch: 28522, sum loss: 4796.194336, avg loss: 2.460849, ppl: 11.714751 +epoch: 1, batch: 28523, sum loss: 3830.673340, avg loss: 2.686307, ppl: 14.677368 +epoch: 1, batch: 28524, sum loss: 4613.317871, avg loss: 2.655911, ppl: 14.237957 +epoch: 1, batch: 28525, sum loss: 4486.437012, avg loss: 2.475959, ppl: 11.893103 +epoch: 1, batch: 28526, sum loss: 2936.933105, avg loss: 2.185218, ppl: 8.892588 +epoch: 1, batch: 28527, sum loss: 3823.154541, avg loss: 2.615017, ppl: 13.667448 +epoch: 1, batch: 28528, sum loss: 4409.268066, avg loss: 2.550184, ppl: 12.809461 +epoch: 1, batch: 28529, sum loss: 4624.735352, avg loss: 2.673257, ppl: 14.487082 +epoch: 1, batch: 28530, sum loss: 3992.862793, avg loss: 2.497100, ppl: 12.147214 +epoch: 1, batch: 28531, sum loss: 4050.061035, avg loss: 2.571467, ppl: 13.085009 +epoch: 1, batch: 28532, sum loss: 4922.932617, avg loss: 2.606105, ppl: 13.546186 +epoch: 1, batch: 28533, sum loss: 5568.775391, avg loss: 2.954257, ppl: 19.187471 +epoch: 1, batch: 28534, sum loss: 4885.149414, avg loss: 2.646343, ppl: 14.102375 +epoch: 1, batch: 28535, sum loss: 4846.979004, avg loss: 2.902383, ppl: 18.217499 +epoch: 1, batch: 28536, sum loss: 4382.955078, avg loss: 2.584290, ppl: 13.253870 +epoch: 1, batch: 28537, sum loss: 5132.765137, avg loss: 2.820201, ppl: 16.780218 +epoch: 1, batch: 28538, sum loss: 5235.127441, avg loss: 2.745216, ppl: 15.567982 +epoch: 1, batch: 28539, sum loss: 4026.804688, avg loss: 2.478034, ppl: 11.917806 +epoch: 1, batch: 28540, sum loss: 4473.715820, avg loss: 2.482639, ppl: 11.972823 +epoch: 1, batch: 28541, sum loss: 3899.611084, avg loss: 2.550432, ppl: 12.812643 +epoch: 1, batch: 28542, sum loss: 4358.736328, avg loss: 2.871368, ppl: 17.661161 +epoch: 1, batch: 28543, sum loss: 4551.783691, avg loss: 2.625019, ppl: 13.804841 +epoch: 1, batch: 28544, sum loss: 4418.013184, avg loss: 2.608036, ppl: 13.572369 +epoch: 1, batch: 28545, sum loss: 4295.700684, avg loss: 2.816853, ppl: 16.724134 +epoch: 1, batch: 28546, sum loss: 4905.092773, avg loss: 2.737217, ppl: 15.443948 +epoch: 1, batch: 28547, sum loss: 4279.178711, avg loss: 2.554734, ppl: 12.867873 +epoch: 1, batch: 28548, sum loss: 4383.434082, avg loss: 2.514879, ppl: 12.365112 +epoch: 1, batch: 28549, sum loss: 4536.137207, avg loss: 2.727683, ppl: 15.297406 +epoch: 1, batch: 28550, sum loss: 3677.028564, avg loss: 2.441586, ppl: 11.491252 +epoch: 1, batch: 28551, sum loss: 5266.643555, avg loss: 2.700843, ppl: 14.892279 +epoch: 1, batch: 28552, sum loss: 4065.814697, avg loss: 2.712351, ppl: 15.064655 +epoch: 1, batch: 28553, sum loss: 4665.705566, avg loss: 2.707896, ppl: 14.997694 +epoch: 1, batch: 28554, sum loss: 5177.638672, avg loss: 2.956961, ppl: 19.239418 +epoch: 1, batch: 28555, sum loss: 4896.391602, avg loss: 2.604464, ppl: 13.523969 +epoch: 1, batch: 28556, sum loss: 4538.210938, avg loss: 2.542415, ppl: 12.710331 +epoch: 1, batch: 28557, sum loss: 4681.204102, avg loss: 2.727974, ppl: 15.301861 +epoch: 1, batch: 28558, sum loss: 4379.438965, avg loss: 2.521266, ppl: 12.444341 +epoch: 1, batch: 28559, sum loss: 4953.295410, avg loss: 2.683259, ppl: 14.632697 +epoch: 1, batch: 28560, sum loss: 4115.182129, avg loss: 2.634560, ppl: 13.937174 +epoch: 1, batch: 28561, sum loss: 3856.622559, avg loss: 2.497813, ppl: 12.155877 +epoch: 1, batch: 28562, sum loss: 3988.250488, avg loss: 2.434829, ppl: 11.413870 +epoch: 1, batch: 28563, sum loss: 4409.817383, avg loss: 2.666153, ppl: 14.384528 +epoch: 1, batch: 28564, sum loss: 5146.941895, avg loss: 2.804873, ppl: 16.524977 +epoch: 1, batch: 28565, sum loss: 4306.758789, avg loss: 2.698470, ppl: 14.856988 +epoch: 1, batch: 28566, sum loss: 3611.869385, avg loss: 2.337780, ppl: 10.358213 +epoch: 1, batch: 28567, sum loss: 3894.832520, avg loss: 2.581069, ppl: 13.211250 +epoch: 1, batch: 28568, sum loss: 4442.926270, avg loss: 2.737478, ppl: 15.447969 +epoch: 1, batch: 28569, sum loss: 5249.396973, avg loss: 2.708667, ppl: 15.009259 +epoch: 1, batch: 28570, sum loss: 4149.310547, avg loss: 2.631142, ppl: 13.889622 +epoch: 1, batch: 28571, sum loss: 4417.692383, avg loss: 2.785430, ppl: 16.206789 +epoch: 1, batch: 28572, sum loss: 4487.915039, avg loss: 2.588186, ppl: 13.305616 +epoch: 1, batch: 28573, sum loss: 3706.152588, avg loss: 2.347152, ppl: 10.455747 +epoch: 1, batch: 28574, sum loss: 4900.210938, avg loss: 2.752928, ppl: 15.688494 +epoch: 1, batch: 28575, sum loss: 4860.433594, avg loss: 2.867512, ppl: 17.593201 +epoch: 1, batch: 28576, sum loss: 3813.664551, avg loss: 2.342546, ppl: 10.407699 +epoch: 1, batch: 28577, sum loss: 3931.437012, avg loss: 2.516925, ppl: 12.390439 +epoch: 1, batch: 28578, sum loss: 4551.902344, avg loss: 2.632679, ppl: 13.910991 +epoch: 1, batch: 28579, sum loss: 4359.529785, avg loss: 2.533138, ppl: 12.592956 +epoch: 1, batch: 28580, sum loss: 3900.987061, avg loss: 2.435073, ppl: 11.416654 +epoch: 1, batch: 28581, sum loss: 5349.830078, avg loss: 2.851722, ppl: 17.317574 +epoch: 1, batch: 28582, sum loss: 3133.833008, avg loss: 2.197639, ppl: 9.003728 +epoch: 1, batch: 28583, sum loss: 4913.603027, avg loss: 2.595670, ppl: 13.405562 +epoch: 1, batch: 28584, sum loss: 3406.298584, avg loss: 2.375383, ppl: 10.755128 +epoch: 1, batch: 28585, sum loss: 2978.604492, avg loss: 2.327035, ppl: 10.247510 +epoch: 1, batch: 28586, sum loss: 3789.384277, avg loss: 2.436903, ppl: 11.437567 +epoch: 1, batch: 28587, sum loss: 5317.118652, avg loss: 2.834285, ppl: 17.018229 +epoch: 1, batch: 28588, sum loss: 4741.517090, avg loss: 2.681854, ppl: 14.612156 +epoch: 1, batch: 28589, sum loss: 4013.059814, avg loss: 2.508162, ppl: 12.282337 +epoch: 1, batch: 28590, sum loss: 4065.297363, avg loss: 2.662277, ppl: 14.328882 +epoch: 1, batch: 28591, sum loss: 3794.657471, avg loss: 2.416979, ppl: 11.211941 +epoch: 1, batch: 28592, sum loss: 4594.039062, avg loss: 2.739439, ppl: 15.478296 +epoch: 1, batch: 28593, sum loss: 4571.642578, avg loss: 2.399812, ppl: 11.021110 +epoch: 1, batch: 28594, sum loss: 3990.784668, avg loss: 2.524215, ppl: 12.481099 +epoch: 1, batch: 28595, sum loss: 4414.770020, avg loss: 2.659500, ppl: 14.289145 +epoch: 1, batch: 28596, sum loss: 4270.434570, avg loss: 2.566367, ppl: 13.018441 +epoch: 1, batch: 28597, sum loss: 4804.583496, avg loss: 2.963963, ppl: 19.374592 +epoch: 1, batch: 28598, sum loss: 4636.968262, avg loss: 2.583269, ppl: 13.240355 +epoch: 1, batch: 28599, sum loss: 3855.211426, avg loss: 2.421615, ppl: 11.264037 +epoch: 1, batch: 28600, sum loss: 4351.167480, avg loss: 2.767918, ppl: 15.925448 +epoch: 1, batch: 28601, sum loss: 4050.626465, avg loss: 2.748050, ppl: 15.612165 +epoch: 1, batch: 28602, sum loss: 4585.329590, avg loss: 2.726117, ppl: 15.273471 +epoch: 1, batch: 28603, sum loss: 3546.018066, avg loss: 2.395958, ppl: 10.978713 +epoch: 1, batch: 28604, sum loss: 4698.768555, avg loss: 2.627947, ppl: 13.845311 +epoch: 1, batch: 28605, sum loss: 4009.254395, avg loss: 2.667501, ppl: 14.403932 +epoch: 1, batch: 28606, sum loss: 3672.021973, avg loss: 2.585931, ppl: 13.275643 +epoch: 1, batch: 28607, sum loss: 3885.507324, avg loss: 2.609474, ppl: 13.591906 +epoch: 1, batch: 28608, sum loss: 5212.345215, avg loss: 2.966616, ppl: 19.426079 +epoch: 1, batch: 28609, sum loss: 5000.676758, avg loss: 2.703068, ppl: 14.925460 +epoch: 1, batch: 28610, sum loss: 4972.394043, avg loss: 2.914651, ppl: 18.442369 +epoch: 1, batch: 28611, sum loss: 4649.853516, avg loss: 2.786012, ppl: 16.216215 +epoch: 1, batch: 28612, sum loss: 4861.208984, avg loss: 2.787390, ppl: 16.238590 +epoch: 1, batch: 28613, sum loss: 3807.780762, avg loss: 2.296611, ppl: 9.940435 +epoch: 1, batch: 28614, sum loss: 4433.668945, avg loss: 2.645387, ppl: 14.088902 +epoch: 1, batch: 28615, sum loss: 4934.801758, avg loss: 2.683416, ppl: 14.634999 +epoch: 1, batch: 28616, sum loss: 4241.606934, avg loss: 2.536846, ppl: 12.639748 +epoch: 1, batch: 28617, sum loss: 4358.443848, avg loss: 2.713850, ppl: 15.087254 +epoch: 1, batch: 28618, sum loss: 4775.964844, avg loss: 2.716703, ppl: 15.130362 +epoch: 1, batch: 28619, sum loss: 4373.586914, avg loss: 2.566659, ppl: 13.022244 +epoch: 1, batch: 28620, sum loss: 4115.866211, avg loss: 2.650268, ppl: 14.157833 +epoch: 1, batch: 28621, sum loss: 4267.262207, avg loss: 2.650473, ppl: 14.160740 +epoch: 1, batch: 28622, sum loss: 3991.970215, avg loss: 2.364911, ppl: 10.643095 +epoch: 1, batch: 28623, sum loss: 3651.106201, avg loss: 2.392599, ppl: 10.941896 +epoch: 1, batch: 28624, sum loss: 4668.976562, avg loss: 2.651321, ppl: 14.172750 +epoch: 1, batch: 28625, sum loss: 4885.555176, avg loss: 2.666788, ppl: 14.393661 +epoch: 1, batch: 28626, sum loss: 3675.777832, avg loss: 2.453790, ppl: 11.632352 +epoch: 1, batch: 28627, sum loss: 4514.544922, avg loss: 2.582692, ppl: 13.232708 +epoch: 1, batch: 28628, sum loss: 4061.986328, avg loss: 2.602170, ppl: 13.492979 +epoch: 1, batch: 28629, sum loss: 3914.760498, avg loss: 2.562016, ppl: 12.961923 +epoch: 1, batch: 28630, sum loss: 3621.539307, avg loss: 2.270558, ppl: 9.684800 +epoch: 1, batch: 28631, sum loss: 5208.126465, avg loss: 3.024464, ppl: 20.582962 +epoch: 1, batch: 28632, sum loss: 3978.750244, avg loss: 2.496079, ppl: 12.134823 +epoch: 1, batch: 28633, sum loss: 4047.634277, avg loss: 2.565041, ppl: 13.001189 +epoch: 1, batch: 28634, sum loss: 4763.568848, avg loss: 2.658242, ppl: 14.271174 +epoch: 1, batch: 28635, sum loss: 5651.545898, avg loss: 2.991819, ppl: 19.921886 +epoch: 1, batch: 28636, sum loss: 4944.153809, avg loss: 2.793307, ppl: 16.334955 +epoch: 1, batch: 28637, sum loss: 4893.804199, avg loss: 2.469124, ppl: 11.812096 +epoch: 1, batch: 28638, sum loss: 4064.763916, avg loss: 2.646331, ppl: 14.102201 +epoch: 1, batch: 28639, sum loss: 4873.529785, avg loss: 2.608956, ppl: 13.584859 +epoch: 1, batch: 28640, sum loss: 3664.104004, avg loss: 2.470738, ppl: 11.831171 +epoch: 1, batch: 28641, sum loss: 4551.819824, avg loss: 2.785691, ppl: 16.211023 +epoch: 1, batch: 28642, sum loss: 5139.676758, avg loss: 2.610298, ppl: 13.603106 +epoch: 1, batch: 28643, sum loss: 4304.483398, avg loss: 2.542518, ppl: 12.711643 +epoch: 1, batch: 28644, sum loss: 4753.674805, avg loss: 2.763764, ppl: 15.859431 +epoch: 1, batch: 28645, sum loss: 3986.200439, avg loss: 2.365698, ppl: 10.651467 +epoch: 1, batch: 28646, sum loss: 4432.703613, avg loss: 2.630685, ppl: 13.883271 +epoch: 1, batch: 28647, sum loss: 3975.079102, avg loss: 2.464401, ppl: 11.756441 +epoch: 1, batch: 28648, sum loss: 4431.020508, avg loss: 2.574678, ppl: 13.127086 +epoch: 1, batch: 28649, sum loss: 3793.497559, avg loss: 2.359140, ppl: 10.581849 +epoch: 1, batch: 28650, sum loss: 4387.991699, avg loss: 2.445926, ppl: 11.541237 +epoch: 1, batch: 28651, sum loss: 4397.639648, avg loss: 2.769295, ppl: 15.947383 +epoch: 1, batch: 28652, sum loss: 5816.050781, avg loss: 2.974962, ppl: 19.588879 +epoch: 1, batch: 28653, sum loss: 4214.637207, avg loss: 2.590435, ppl: 13.335566 +epoch: 1, batch: 28654, sum loss: 4478.891113, avg loss: 2.636193, ppl: 13.959950 +epoch: 1, batch: 28655, sum loss: 3712.388184, avg loss: 2.556741, ppl: 12.893731 +epoch: 1, batch: 28656, sum loss: 4433.969238, avg loss: 2.606684, ppl: 13.554028 +epoch: 1, batch: 28657, sum loss: 5223.036621, avg loss: 2.892047, ppl: 18.030178 +epoch: 1, batch: 28658, sum loss: 4734.175781, avg loss: 2.619909, ppl: 13.734474 +epoch: 1, batch: 28659, sum loss: 3730.602783, avg loss: 2.368637, ppl: 10.682817 +epoch: 1, batch: 28660, sum loss: 4256.625000, avg loss: 2.562688, ppl: 12.970637 +epoch: 1, batch: 28661, sum loss: 4729.850098, avg loss: 2.458342, ppl: 11.685422 +epoch: 1, batch: 28662, sum loss: 4887.997559, avg loss: 2.575341, ppl: 13.135801 +epoch: 1, batch: 28663, sum loss: 4012.711426, avg loss: 2.525306, ppl: 12.494720 +epoch: 1, batch: 28664, sum loss: 4899.165039, avg loss: 2.835165, ppl: 17.033211 +epoch: 1, batch: 28665, sum loss: 4545.227539, avg loss: 2.788483, ppl: 16.256342 +epoch: 1, batch: 28666, sum loss: 4225.089355, avg loss: 2.622650, ppl: 13.772174 +epoch: 1, batch: 28667, sum loss: 4437.152832, avg loss: 2.591795, ppl: 13.353717 +epoch: 1, batch: 28668, sum loss: 3757.832031, avg loss: 2.360447, ppl: 10.595689 +epoch: 1, batch: 28669, sum loss: 3699.077881, avg loss: 2.531881, ppl: 12.577140 +epoch: 1, batch: 28670, sum loss: 4409.648926, avg loss: 2.631056, ppl: 13.888423 +epoch: 1, batch: 28671, sum loss: 4055.971924, avg loss: 2.539745, ppl: 12.676434 +epoch: 1, batch: 28672, sum loss: 4638.926270, avg loss: 2.799593, ppl: 16.437963 +epoch: 1, batch: 28673, sum loss: 5404.737305, avg loss: 2.746310, ppl: 15.585013 +epoch: 1, batch: 28674, sum loss: 3543.559326, avg loss: 2.562227, ppl: 12.964651 +epoch: 1, batch: 28675, sum loss: 4163.094727, avg loss: 2.942116, ppl: 18.955919 +epoch: 1, batch: 28676, sum loss: 4461.902344, avg loss: 2.673399, ppl: 14.489130 +epoch: 1, batch: 28677, sum loss: 5039.265625, avg loss: 2.779518, ppl: 16.111252 +epoch: 1, batch: 28678, sum loss: 4621.374512, avg loss: 2.682168, ppl: 14.616741 +epoch: 1, batch: 28679, sum loss: 4554.036133, avg loss: 2.465640, ppl: 11.771008 +epoch: 1, batch: 28680, sum loss: 4686.060547, avg loss: 2.523457, ppl: 12.471641 +epoch: 1, batch: 28681, sum loss: 3731.449707, avg loss: 2.582318, ppl: 13.227765 +epoch: 1, batch: 28682, sum loss: 4204.501465, avg loss: 2.489344, ppl: 12.053362 +epoch: 1, batch: 28683, sum loss: 4844.765137, avg loss: 2.820003, ppl: 16.776901 +epoch: 1, batch: 28684, sum loss: 3671.006104, avg loss: 2.148043, ppl: 8.568078 +epoch: 1, batch: 28685, sum loss: 3896.575195, avg loss: 2.551785, ppl: 12.829982 +epoch: 1, batch: 28686, sum loss: 4818.350586, avg loss: 2.437203, ppl: 11.440997 +epoch: 1, batch: 28687, sum loss: 3979.526367, avg loss: 2.495001, ppl: 12.121744 +epoch: 1, batch: 28688, sum loss: 3928.707031, avg loss: 2.382478, ppl: 10.831716 +epoch: 1, batch: 28689, sum loss: 4141.350098, avg loss: 2.441834, ppl: 11.494099 +epoch: 1, batch: 28690, sum loss: 4492.697266, avg loss: 2.722847, ppl: 15.223598 +epoch: 1, batch: 28691, sum loss: 3737.386719, avg loss: 2.521853, ppl: 12.451653 +epoch: 1, batch: 28692, sum loss: 3832.108398, avg loss: 2.507924, ppl: 12.279415 +epoch: 1, batch: 28693, sum loss: 3948.466797, avg loss: 2.642883, ppl: 14.053659 +epoch: 1, batch: 28694, sum loss: 4442.030273, avg loss: 2.551425, ppl: 12.825363 +epoch: 1, batch: 28695, sum loss: 4756.957520, avg loss: 2.666456, ppl: 14.388887 +epoch: 1, batch: 28696, sum loss: 4334.253906, avg loss: 2.448731, ppl: 11.573652 +epoch: 1, batch: 28697, sum loss: 4239.578125, avg loss: 2.684977, ppl: 14.657858 +epoch: 1, batch: 28698, sum loss: 4152.348633, avg loss: 2.442558, ppl: 11.502427 +epoch: 1, batch: 28699, sum loss: 4522.025391, avg loss: 2.595881, ppl: 13.408398 +epoch: 1, batch: 28700, sum loss: 3659.132812, avg loss: 2.513141, ppl: 12.343637 +epoch: 1, batch: 28701, sum loss: 4766.025391, avg loss: 2.782268, ppl: 16.155621 +epoch: 1, batch: 28702, sum loss: 3770.075684, avg loss: 2.451285, ppl: 11.603243 +epoch: 1, batch: 28703, sum loss: 3952.272949, avg loss: 2.442690, ppl: 11.503949 +epoch: 1, batch: 28704, sum loss: 3772.377197, avg loss: 2.667876, ppl: 14.409338 +epoch: 1, batch: 28705, sum loss: 4454.997559, avg loss: 2.731452, ppl: 15.355163 +epoch: 1, batch: 28706, sum loss: 4736.628906, avg loss: 2.712846, ppl: 15.072110 +epoch: 1, batch: 28707, sum loss: 4463.199707, avg loss: 2.563584, ppl: 12.982264 +epoch: 1, batch: 28708, sum loss: 3827.303955, avg loss: 2.637701, ppl: 13.981022 +epoch: 1, batch: 28709, sum loss: 4816.837891, avg loss: 2.609338, ppl: 13.590052 +epoch: 1, batch: 28710, sum loss: 5735.361328, avg loss: 2.883540, ppl: 17.877445 +epoch: 1, batch: 28711, sum loss: 4560.498535, avg loss: 2.652995, ppl: 14.196495 +epoch: 1, batch: 28712, sum loss: 4734.930176, avg loss: 2.615984, ppl: 13.680668 +epoch: 1, batch: 28713, sum loss: 3136.068359, avg loss: 2.225740, ppl: 9.260337 +epoch: 1, batch: 28714, sum loss: 5451.542969, avg loss: 2.682846, ppl: 14.626659 +epoch: 1, batch: 28715, sum loss: 4101.205078, avg loss: 2.603940, ppl: 13.516884 +epoch: 1, batch: 28716, sum loss: 4125.428711, avg loss: 2.458539, ppl: 11.687726 +epoch: 1, batch: 28717, sum loss: 4500.879883, avg loss: 2.597161, ppl: 13.425567 +epoch: 1, batch: 28718, sum loss: 4408.773926, avg loss: 2.527966, ppl: 12.528005 +epoch: 1, batch: 28719, sum loss: 4362.386230, avg loss: 2.676311, ppl: 14.531382 +epoch: 1, batch: 28720, sum loss: 3577.172852, avg loss: 2.718216, ppl: 15.153271 +epoch: 1, batch: 28721, sum loss: 4431.272461, avg loss: 2.642381, ppl: 14.046605 +epoch: 1, batch: 28722, sum loss: 3919.216309, avg loss: 2.576737, ppl: 13.154142 +epoch: 1, batch: 28723, sum loss: 4648.170898, avg loss: 2.660659, ppl: 14.305708 +epoch: 1, batch: 28724, sum loss: 3769.646973, avg loss: 2.524881, ppl: 12.489405 +epoch: 1, batch: 28725, sum loss: 4040.310547, avg loss: 2.506396, ppl: 12.260664 +epoch: 1, batch: 28726, sum loss: 4594.941895, avg loss: 2.665280, ppl: 14.371967 +epoch: 1, batch: 28727, sum loss: 4615.095215, avg loss: 2.864740, ppl: 17.544485 +epoch: 1, batch: 28728, sum loss: 4289.649414, avg loss: 2.691123, ppl: 14.748222 +epoch: 1, batch: 28729, sum loss: 4052.740234, avg loss: 2.469677, ppl: 11.818628 +epoch: 1, batch: 28730, sum loss: 5520.358887, avg loss: 2.842615, ppl: 17.160587 +epoch: 1, batch: 28731, sum loss: 4714.705566, avg loss: 2.548490, ppl: 12.787774 +epoch: 1, batch: 28732, sum loss: 4290.080078, avg loss: 2.595330, ppl: 13.401006 +epoch: 1, batch: 28733, sum loss: 3819.042969, avg loss: 2.661354, ppl: 14.315657 +epoch: 1, batch: 28734, sum loss: 4706.181641, avg loss: 2.723485, ppl: 15.233315 +epoch: 1, batch: 28735, sum loss: 3874.897461, avg loss: 2.444730, ppl: 11.527440 +epoch: 1, batch: 28736, sum loss: 5286.682129, avg loss: 2.973387, ppl: 19.558056 +epoch: 1, batch: 28737, sum loss: 4889.751465, avg loss: 2.881409, ppl: 17.839399 +epoch: 1, batch: 28738, sum loss: 3615.529785, avg loss: 2.531884, ppl: 12.577176 +epoch: 1, batch: 28739, sum loss: 5632.657227, avg loss: 2.747638, ppl: 15.605723 +epoch: 1, batch: 28740, sum loss: 5230.455078, avg loss: 2.809052, ppl: 16.594183 +epoch: 1, batch: 28741, sum loss: 4009.261963, avg loss: 2.490225, ppl: 12.063989 +epoch: 1, batch: 28742, sum loss: 4616.679688, avg loss: 2.754582, ppl: 15.714474 +epoch: 1, batch: 28743, sum loss: 4858.996582, avg loss: 2.613769, ppl: 13.650400 +epoch: 1, batch: 28744, sum loss: 4772.855469, avg loss: 2.702636, ppl: 14.919006 +epoch: 1, batch: 28745, sum loss: 4412.850586, avg loss: 2.739200, ppl: 15.474595 +epoch: 1, batch: 28746, sum loss: 4823.460938, avg loss: 2.799455, ppl: 16.435686 +epoch: 1, batch: 28747, sum loss: 4199.831055, avg loss: 2.563999, ppl: 12.987654 +epoch: 1, batch: 28748, sum loss: 3755.753418, avg loss: 2.517261, ppl: 12.394602 +epoch: 1, batch: 28749, sum loss: 4574.847656, avg loss: 2.596395, ppl: 13.415289 +epoch: 1, batch: 28750, sum loss: 5109.586914, avg loss: 2.709219, ppl: 15.017542 +epoch: 1, batch: 28751, sum loss: 5027.402344, avg loss: 2.701452, ppl: 14.901357 +epoch: 1, batch: 28752, sum loss: 4650.762695, avg loss: 2.699224, ppl: 14.868186 +epoch: 1, batch: 28753, sum loss: 3324.988281, avg loss: 2.197613, ppl: 9.003496 +epoch: 1, batch: 28754, sum loss: 3433.399170, avg loss: 2.369496, ppl: 10.691998 +epoch: 1, batch: 28755, sum loss: 4105.247559, avg loss: 2.474531, ppl: 11.876141 +epoch: 1, batch: 28756, sum loss: 4226.087402, avg loss: 2.544303, ppl: 12.734351 +epoch: 1, batch: 28757, sum loss: 4226.859375, avg loss: 2.588401, ppl: 13.308478 +epoch: 1, batch: 28758, sum loss: 3983.921875, avg loss: 2.368562, ppl: 10.682023 +epoch: 1, batch: 28759, sum loss: 4454.147949, avg loss: 2.796075, ppl: 16.380234 +epoch: 1, batch: 28760, sum loss: 4761.191895, avg loss: 2.596070, ppl: 13.410924 +epoch: 1, batch: 28761, sum loss: 4860.399414, avg loss: 2.844002, ppl: 17.184401 +epoch: 1, batch: 28762, sum loss: 4424.695801, avg loss: 2.424491, ppl: 11.296477 +epoch: 1, batch: 28763, sum loss: 4840.779785, avg loss: 2.763002, ppl: 15.847352 +epoch: 1, batch: 28764, sum loss: 4628.853027, avg loss: 2.785110, ppl: 16.201603 +epoch: 1, batch: 28765, sum loss: 4577.695801, avg loss: 2.545993, ppl: 12.755890 +epoch: 1, batch: 28766, sum loss: 5130.622070, avg loss: 2.905222, ppl: 18.269299 +epoch: 1, batch: 28767, sum loss: 3754.026611, avg loss: 2.616046, ppl: 13.681525 +epoch: 1, batch: 28768, sum loss: 4732.468750, avg loss: 2.806921, ppl: 16.558855 +epoch: 1, batch: 28769, sum loss: 4222.793457, avg loss: 2.445161, ppl: 11.532410 +epoch: 1, batch: 28770, sum loss: 3665.287354, avg loss: 2.403467, ppl: 11.061462 +epoch: 1, batch: 28771, sum loss: 4771.988770, avg loss: 2.676382, ppl: 14.532421 +epoch: 1, batch: 28772, sum loss: 4159.078125, avg loss: 2.580073, ppl: 13.198107 +epoch: 1, batch: 28773, sum loss: 4898.722168, avg loss: 2.812125, ppl: 16.645256 +epoch: 1, batch: 28774, sum loss: 4705.292969, avg loss: 2.718251, ppl: 15.153798 +epoch: 1, batch: 28775, sum loss: 4144.353516, avg loss: 2.420767, ppl: 11.254489 +epoch: 1, batch: 28776, sum loss: 3864.413574, avg loss: 2.406235, ppl: 11.092120 +epoch: 1, batch: 28777, sum loss: 3711.000000, avg loss: 2.505739, ppl: 12.252616 +epoch: 1, batch: 28778, sum loss: 3585.999512, avg loss: 2.424611, ppl: 11.297832 +epoch: 1, batch: 28779, sum loss: 4155.183594, avg loss: 2.544509, ppl: 12.736975 +epoch: 1, batch: 28780, sum loss: 4650.583984, avg loss: 2.789792, ppl: 16.277639 +epoch: 1, batch: 28781, sum loss: 4377.218750, avg loss: 2.582430, ppl: 13.229244 +epoch: 1, batch: 28782, sum loss: 5272.862305, avg loss: 2.600031, ppl: 13.464150 +epoch: 1, batch: 28783, sum loss: 4818.952637, avg loss: 2.818101, ppl: 16.745020 +epoch: 1, batch: 28784, sum loss: 4691.651855, avg loss: 2.533289, ppl: 12.594865 +epoch: 1, batch: 28785, sum loss: 3735.346680, avg loss: 2.364143, ppl: 10.634925 +epoch: 1, batch: 28786, sum loss: 4175.836914, avg loss: 2.568165, ppl: 13.041875 +epoch: 1, batch: 28787, sum loss: 3335.292969, avg loss: 2.423905, ppl: 11.289856 +epoch: 1, batch: 28788, sum loss: 4232.428223, avg loss: 2.700975, ppl: 14.894253 +epoch: 1, batch: 28789, sum loss: 4467.890625, avg loss: 2.717695, ppl: 15.145371 +epoch: 1, batch: 28790, sum loss: 4188.256836, avg loss: 2.567907, ppl: 13.038510 +epoch: 1, batch: 28791, sum loss: 4519.060059, avg loss: 2.507802, ppl: 12.277920 +epoch: 1, batch: 28792, sum loss: 4063.022217, avg loss: 2.427134, ppl: 11.326375 +epoch: 1, batch: 28793, sum loss: 4745.095703, avg loss: 2.986215, ppl: 19.810560 +epoch: 1, batch: 28794, sum loss: 4358.560547, avg loss: 2.911530, ppl: 18.384907 +epoch: 1, batch: 28795, sum loss: 3673.270508, avg loss: 2.488666, ppl: 12.045195 +epoch: 1, batch: 28796, sum loss: 3880.608887, avg loss: 2.654315, ppl: 14.215252 +epoch: 1, batch: 28797, sum loss: 4287.678223, avg loss: 2.691575, ppl: 14.754890 +epoch: 1, batch: 28798, sum loss: 4436.733398, avg loss: 2.611379, ppl: 13.617822 +epoch: 1, batch: 28799, sum loss: 4685.564941, avg loss: 2.650207, ppl: 14.156962 +epoch: 1, batch: 28800, sum loss: 4205.775391, avg loss: 2.625328, ppl: 13.809100 +epoch: 1, batch: 28801, sum loss: 4729.152344, avg loss: 2.567401, ppl: 13.031910 +epoch: 1, batch: 28802, sum loss: 3664.494629, avg loss: 2.718468, ppl: 15.157080 +epoch: 1, batch: 28803, sum loss: 3543.058350, avg loss: 2.324841, ppl: 10.225060 +epoch: 1, batch: 28804, sum loss: 3897.609863, avg loss: 2.495269, ppl: 12.124992 +epoch: 1, batch: 28805, sum loss: 5378.879883, avg loss: 3.075403, ppl: 21.658607 +epoch: 1, batch: 28806, sum loss: 4895.774414, avg loss: 2.694427, ppl: 14.797041 +epoch: 1, batch: 28807, sum loss: 4348.071289, avg loss: 2.656122, ppl: 14.240952 +epoch: 1, batch: 28808, sum loss: 3643.990967, avg loss: 2.528793, ppl: 12.538367 +epoch: 1, batch: 28809, sum loss: 3731.678955, avg loss: 2.395173, ppl: 10.970091 +epoch: 1, batch: 28810, sum loss: 4361.579102, avg loss: 2.509539, ppl: 12.299260 +epoch: 1, batch: 28811, sum loss: 6304.949707, avg loss: 2.971230, ppl: 19.515905 +epoch: 1, batch: 28812, sum loss: 4510.516602, avg loss: 2.697678, ppl: 14.845215 +epoch: 1, batch: 28813, sum loss: 5210.610352, avg loss: 2.796892, ppl: 16.393620 +epoch: 1, batch: 28814, sum loss: 3709.423584, avg loss: 2.558223, ppl: 12.912854 +epoch: 1, batch: 28815, sum loss: 4144.934570, avg loss: 2.477546, ppl: 11.911999 +epoch: 1, batch: 28816, sum loss: 5495.606445, avg loss: 2.761611, ppl: 15.825320 +epoch: 1, batch: 28817, sum loss: 3930.531494, avg loss: 2.634405, ppl: 13.935014 +epoch: 1, batch: 28818, sum loss: 4704.891113, avg loss: 2.741778, ppl: 15.514547 +epoch: 1, batch: 28819, sum loss: 4432.640625, avg loss: 2.559261, ppl: 12.926266 +epoch: 1, batch: 28820, sum loss: 4410.728516, avg loss: 2.531991, ppl: 12.578526 +epoch: 1, batch: 28821, sum loss: 5006.999023, avg loss: 2.590274, ppl: 13.333423 +epoch: 1, batch: 28822, sum loss: 4347.701172, avg loss: 2.783419, ppl: 16.174232 +epoch: 1, batch: 28823, sum loss: 3943.117188, avg loss: 2.565464, ppl: 13.006686 +epoch: 1, batch: 28824, sum loss: 3800.636475, avg loss: 2.419247, ppl: 11.237391 +epoch: 1, batch: 28825, sum loss: 4308.707031, avg loss: 2.584707, ppl: 13.259407 +epoch: 1, batch: 28826, sum loss: 3651.469971, avg loss: 2.665307, ppl: 14.372355 +epoch: 1, batch: 28827, sum loss: 4594.934570, avg loss: 2.656031, ppl: 14.239665 +epoch: 1, batch: 28828, sum loss: 5238.645996, avg loss: 2.834765, ppl: 17.026402 +epoch: 1, batch: 28829, sum loss: 5216.591797, avg loss: 2.646672, ppl: 14.107019 +epoch: 1, batch: 28830, sum loss: 4732.660645, avg loss: 2.812038, ppl: 16.643810 +epoch: 1, batch: 28831, sum loss: 3733.476074, avg loss: 2.202641, ppl: 9.048878 +epoch: 1, batch: 28832, sum loss: 4410.382812, avg loss: 2.444780, ppl: 11.528011 +epoch: 1, batch: 28833, sum loss: 3964.614014, avg loss: 2.479433, ppl: 11.934499 +epoch: 1, batch: 28834, sum loss: 4824.758789, avg loss: 2.859964, ppl: 17.460896 +epoch: 1, batch: 28835, sum loss: 5073.025391, avg loss: 2.890613, ppl: 18.004335 +epoch: 1, batch: 28836, sum loss: 4445.569336, avg loss: 2.584633, ppl: 13.258427 +epoch: 1, batch: 28837, sum loss: 3799.175293, avg loss: 2.509363, ppl: 12.297093 +epoch: 1, batch: 28838, sum loss: 4215.327637, avg loss: 2.723080, ppl: 15.227148 +epoch: 1, batch: 28839, sum loss: 4774.821777, avg loss: 2.899102, ppl: 18.157841 +epoch: 1, batch: 28840, sum loss: 4824.954590, avg loss: 2.800322, ppl: 16.449940 +epoch: 1, batch: 28841, sum loss: 5006.552734, avg loss: 2.610299, ppl: 13.603112 +epoch: 1, batch: 28842, sum loss: 4756.405273, avg loss: 2.622054, ppl: 13.763961 +epoch: 1, batch: 28843, sum loss: 3729.666992, avg loss: 2.710514, ppl: 15.037000 +epoch: 1, batch: 28844, sum loss: 4656.343262, avg loss: 2.888550, ppl: 17.967243 +epoch: 1, batch: 28845, sum loss: 5119.208984, avg loss: 2.724433, ppl: 15.247762 +epoch: 1, batch: 28846, sum loss: 3498.634766, avg loss: 2.396325, ppl: 10.982744 +epoch: 1, batch: 28847, sum loss: 3602.251465, avg loss: 2.438898, ppl: 11.460400 +epoch: 1, batch: 28848, sum loss: 4754.647949, avg loss: 2.754721, ppl: 15.716650 +epoch: 1, batch: 28849, sum loss: 4387.406250, avg loss: 2.539008, ppl: 12.667104 +epoch: 1, batch: 28850, sum loss: 4256.864258, avg loss: 2.432494, ppl: 11.387245 +epoch: 1, batch: 28851, sum loss: 3425.545410, avg loss: 2.295942, ppl: 9.933790 +epoch: 1, batch: 28852, sum loss: 3274.190918, avg loss: 2.269017, ppl: 9.669886 +epoch: 1, batch: 28853, sum loss: 4502.996582, avg loss: 2.749082, ppl: 15.628280 +epoch: 1, batch: 28854, sum loss: 3968.636719, avg loss: 2.663515, ppl: 14.346623 +epoch: 1, batch: 28855, sum loss: 5787.962891, avg loss: 2.882452, ppl: 17.858004 +epoch: 1, batch: 28856, sum loss: 4363.373047, avg loss: 2.444467, ppl: 11.524404 +epoch: 1, batch: 28857, sum loss: 4519.365723, avg loss: 2.683709, ppl: 14.639289 +epoch: 1, batch: 28858, sum loss: 4487.702637, avg loss: 2.551281, ppl: 12.823517 +epoch: 1, batch: 28859, sum loss: 4754.613770, avg loss: 2.695359, ppl: 14.810839 +epoch: 1, batch: 28860, sum loss: 4830.745605, avg loss: 2.547862, ppl: 12.779749 +epoch: 1, batch: 28861, sum loss: 3830.661377, avg loss: 2.444583, ppl: 11.525742 +epoch: 1, batch: 28862, sum loss: 4354.549316, avg loss: 2.550996, ppl: 12.819860 +epoch: 1, batch: 28863, sum loss: 4851.544434, avg loss: 2.788244, ppl: 16.252455 +epoch: 1, batch: 28864, sum loss: 4507.356445, avg loss: 2.841965, ppl: 17.149431 +epoch: 1, batch: 28865, sum loss: 5034.680664, avg loss: 2.801714, ppl: 16.472864 +epoch: 1, batch: 28866, sum loss: 4369.761719, avg loss: 2.552431, ppl: 12.838274 +epoch: 1, batch: 28867, sum loss: 5055.543945, avg loss: 2.811759, ppl: 16.639168 +epoch: 1, batch: 28868, sum loss: 4299.075195, avg loss: 2.505289, ppl: 12.247093 +epoch: 1, batch: 28869, sum loss: 4673.159668, avg loss: 2.673432, ppl: 14.489617 +epoch: 1, batch: 28870, sum loss: 4364.444336, avg loss: 2.424691, ppl: 11.298742 +epoch: 1, batch: 28871, sum loss: 4236.938477, avg loss: 2.489388, ppl: 12.053897 +epoch: 1, batch: 28872, sum loss: 3819.207520, avg loss: 2.494584, ppl: 12.116690 +epoch: 1, batch: 28873, sum loss: 4417.107422, avg loss: 2.459414, ppl: 11.697954 +epoch: 1, batch: 28874, sum loss: 4416.960938, avg loss: 2.464822, ppl: 11.761389 +epoch: 1, batch: 28875, sum loss: 4516.743652, avg loss: 2.533227, ppl: 12.594081 +epoch: 1, batch: 28876, sum loss: 4833.834473, avg loss: 2.807105, ppl: 16.561899 +epoch: 1, batch: 28877, sum loss: 4163.142090, avg loss: 2.448907, ppl: 11.575692 +epoch: 1, batch: 28878, sum loss: 5655.839355, avg loss: 2.787501, ppl: 16.240385 +epoch: 1, batch: 28879, sum loss: 4268.117676, avg loss: 2.675936, ppl: 14.525940 +epoch: 1, batch: 28880, sum loss: 5572.977051, avg loss: 2.818906, ppl: 16.758505 +epoch: 1, batch: 28881, sum loss: 3986.867432, avg loss: 2.710311, ppl: 15.033950 +epoch: 1, batch: 28882, sum loss: 5224.836914, avg loss: 2.754263, ppl: 15.709457 +epoch: 1, batch: 28883, sum loss: 4439.119141, avg loss: 2.486902, ppl: 12.023962 +epoch: 1, batch: 28884, sum loss: 4053.075195, avg loss: 2.618266, ppl: 13.711922 +epoch: 1, batch: 28885, sum loss: 4489.140625, avg loss: 2.495353, ppl: 12.126016 +epoch: 1, batch: 28886, sum loss: 4648.837402, avg loss: 2.805575, ppl: 16.536575 +epoch: 1, batch: 28887, sum loss: 4449.093750, avg loss: 2.537988, ppl: 12.654190 +epoch: 1, batch: 28888, sum loss: 4237.222168, avg loss: 2.376457, ppl: 10.766686 +epoch: 1, batch: 28889, sum loss: 5020.699219, avg loss: 2.760143, ppl: 15.802095 +epoch: 1, batch: 28890, sum loss: 6028.863770, avg loss: 3.114082, ppl: 22.512762 +epoch: 1, batch: 28891, sum loss: 4518.777344, avg loss: 2.451860, ppl: 11.609918 +epoch: 1, batch: 28892, sum loss: 4408.617676, avg loss: 2.743384, ppl: 15.539480 +epoch: 1, batch: 28893, sum loss: 4324.381836, avg loss: 2.554272, ppl: 12.861929 +epoch: 1, batch: 28894, sum loss: 4704.193848, avg loss: 2.504895, ppl: 12.242279 +epoch: 1, batch: 28895, sum loss: 4607.887695, avg loss: 2.675893, ppl: 14.525312 +epoch: 1, batch: 28896, sum loss: 4097.041992, avg loss: 2.366864, ppl: 10.663900 +epoch: 1, batch: 28897, sum loss: 4462.256348, avg loss: 2.603417, ppl: 13.509818 +epoch: 1, batch: 28898, sum loss: 4689.041992, avg loss: 2.723021, ppl: 15.226248 +epoch: 1, batch: 28899, sum loss: 5156.024902, avg loss: 2.932892, ppl: 18.781876 +epoch: 1, batch: 28900, sum loss: 4636.822266, avg loss: 2.646588, ppl: 14.105832 +epoch: 1, batch: 28901, sum loss: 4542.488281, avg loss: 2.588312, ppl: 13.307295 +epoch: 1, batch: 28902, sum loss: 4369.799805, avg loss: 2.568959, ppl: 13.052233 +epoch: 1, batch: 28903, sum loss: 4999.222656, avg loss: 2.696452, ppl: 14.827034 +epoch: 1, batch: 28904, sum loss: 5669.322754, avg loss: 2.837499, ppl: 17.073011 +epoch: 1, batch: 28905, sum loss: 4381.909668, avg loss: 2.619193, ppl: 13.724641 +epoch: 1, batch: 28906, sum loss: 3619.227295, avg loss: 2.419270, ppl: 11.237648 +epoch: 1, batch: 28907, sum loss: 4449.198242, avg loss: 2.516515, ppl: 12.385359 +epoch: 1, batch: 28908, sum loss: 3727.936523, avg loss: 2.537737, ppl: 12.651014 +epoch: 1, batch: 28909, sum loss: 4494.863770, avg loss: 2.812806, ppl: 16.656590 +epoch: 1, batch: 28910, sum loss: 4656.040039, avg loss: 2.699154, ppl: 14.867147 +epoch: 1, batch: 28911, sum loss: 3818.469727, avg loss: 2.477917, ppl: 11.916413 +epoch: 1, batch: 28912, sum loss: 4595.681152, avg loss: 2.612667, ppl: 13.635369 +epoch: 1, batch: 28913, sum loss: 3953.456055, avg loss: 2.388795, ppl: 10.900355 +epoch: 1, batch: 28914, sum loss: 4321.564941, avg loss: 2.702667, ppl: 14.919473 +epoch: 1, batch: 28915, sum loss: 4862.960938, avg loss: 2.523592, ppl: 12.473317 +epoch: 1, batch: 28916, sum loss: 3846.693848, avg loss: 2.595610, ppl: 13.404763 +epoch: 1, batch: 28917, sum loss: 3983.926270, avg loss: 2.488399, ppl: 12.041979 +epoch: 1, batch: 28918, sum loss: 4568.645020, avg loss: 2.489725, ppl: 12.057959 +epoch: 1, batch: 28919, sum loss: 3543.408203, avg loss: 2.474447, ppl: 11.875141 +epoch: 1, batch: 28920, sum loss: 4528.675293, avg loss: 2.522939, ppl: 12.465178 +epoch: 1, batch: 28921, sum loss: 3603.623047, avg loss: 2.528858, ppl: 12.539181 +epoch: 1, batch: 28922, sum loss: 4681.347168, avg loss: 2.512800, ppl: 12.339437 +epoch: 1, batch: 28923, sum loss: 3338.227539, avg loss: 2.445588, ppl: 11.537330 +epoch: 1, batch: 28924, sum loss: 4341.453125, avg loss: 2.693209, ppl: 14.779028 +epoch: 1, batch: 28925, sum loss: 4959.666992, avg loss: 2.506148, ppl: 12.257624 +epoch: 1, batch: 28926, sum loss: 3992.426514, avg loss: 2.467507, ppl: 11.793012 +epoch: 1, batch: 28927, sum loss: 4155.131836, avg loss: 2.232742, ppl: 9.325397 +epoch: 1, batch: 28928, sum loss: 4207.013672, avg loss: 2.821605, ppl: 16.803806 +epoch: 1, batch: 28929, sum loss: 3809.210693, avg loss: 2.517654, ppl: 12.399476 +epoch: 1, batch: 28930, sum loss: 3624.949707, avg loss: 2.477751, ppl: 11.914439 +epoch: 1, batch: 28931, sum loss: 4289.155762, avg loss: 2.667385, ppl: 14.402263 +epoch: 1, batch: 28932, sum loss: 5405.432617, avg loss: 2.850967, ppl: 17.304502 +epoch: 1, batch: 28933, sum loss: 4932.478516, avg loss: 2.815342, ppl: 16.698881 +epoch: 1, batch: 28934, sum loss: 5428.967773, avg loss: 2.917232, ppl: 18.490028 +epoch: 1, batch: 28935, sum loss: 4038.099609, avg loss: 2.365612, ppl: 10.650556 +epoch: 1, batch: 28936, sum loss: 4983.988770, avg loss: 2.874273, ppl: 17.712536 +epoch: 1, batch: 28937, sum loss: 5136.928711, avg loss: 2.808600, ppl: 16.586676 +epoch: 1, batch: 28938, sum loss: 4594.623535, avg loss: 2.702720, ppl: 14.920259 +epoch: 1, batch: 28939, sum loss: 5057.550781, avg loss: 2.609675, ppl: 13.594638 +epoch: 1, batch: 28940, sum loss: 4906.229492, avg loss: 2.821294, ppl: 16.798567 +epoch: 1, batch: 28941, sum loss: 4847.147461, avg loss: 2.664732, ppl: 14.364100 +epoch: 1, batch: 28942, sum loss: 3833.843750, avg loss: 2.265865, ppl: 9.639460 +epoch: 1, batch: 28943, sum loss: 5267.533691, avg loss: 2.652333, ppl: 14.187102 +epoch: 1, batch: 28944, sum loss: 4209.945801, avg loss: 2.573317, ppl: 13.109230 +epoch: 1, batch: 28945, sum loss: 4872.501953, avg loss: 2.866178, ppl: 17.569735 +epoch: 1, batch: 28946, sum loss: 4481.556641, avg loss: 2.540565, ppl: 12.686837 +epoch: 1, batch: 28947, sum loss: 4040.213379, avg loss: 2.487816, ppl: 12.034965 +epoch: 1, batch: 28948, sum loss: 4128.181641, avg loss: 2.391762, ppl: 10.932743 +epoch: 1, batch: 28949, sum loss: 4060.541748, avg loss: 2.452018, ppl: 11.611756 +epoch: 1, batch: 28950, sum loss: 4235.145996, avg loss: 2.612675, ppl: 13.635473 +epoch: 1, batch: 28951, sum loss: 5263.203125, avg loss: 2.880790, ppl: 17.828352 +epoch: 1, batch: 28952, sum loss: 5705.904785, avg loss: 2.858670, ppl: 17.438314 +epoch: 1, batch: 28953, sum loss: 4032.165039, avg loss: 2.515387, ppl: 12.371393 +epoch: 1, batch: 28954, sum loss: 4174.307129, avg loss: 2.700069, ppl: 14.880765 +epoch: 1, batch: 28955, sum loss: 4953.837402, avg loss: 2.686463, ppl: 14.679660 +epoch: 1, batch: 28956, sum loss: 4311.522461, avg loss: 2.404642, ppl: 11.074461 +epoch: 1, batch: 28957, sum loss: 4843.491211, avg loss: 2.748860, ppl: 15.624811 +epoch: 1, batch: 28958, sum loss: 5001.203613, avg loss: 2.779991, ppl: 16.118874 +epoch: 1, batch: 28959, sum loss: 5076.206543, avg loss: 2.647995, ppl: 14.125691 +epoch: 1, batch: 28960, sum loss: 5495.088379, avg loss: 2.952761, ppl: 19.158781 +epoch: 1, batch: 28961, sum loss: 4157.889648, avg loss: 2.421602, ppl: 11.263885 +epoch: 1, batch: 28962, sum loss: 4663.233398, avg loss: 2.720673, ppl: 15.190539 +epoch: 1, batch: 28963, sum loss: 4306.870117, avg loss: 2.627743, ppl: 13.842485 +epoch: 1, batch: 28964, sum loss: 5296.382324, avg loss: 2.970490, ppl: 19.501463 +epoch: 1, batch: 28965, sum loss: 5593.754395, avg loss: 2.823702, ppl: 16.839075 +epoch: 1, batch: 28966, sum loss: 3231.493896, avg loss: 2.202790, ppl: 9.050233 +epoch: 1, batch: 28967, sum loss: 3642.831787, avg loss: 2.448140, ppl: 11.566809 +epoch: 1, batch: 28968, sum loss: 4318.516113, avg loss: 2.695703, ppl: 14.815928 +epoch: 1, batch: 28969, sum loss: 4743.958008, avg loss: 2.802102, ppl: 16.479244 +epoch: 1, batch: 28970, sum loss: 3812.458496, avg loss: 2.409898, ppl: 11.132823 +epoch: 1, batch: 28971, sum loss: 4436.437500, avg loss: 2.542371, ppl: 12.709770 +epoch: 1, batch: 28972, sum loss: 4550.920898, avg loss: 2.556697, ppl: 12.893163 +epoch: 1, batch: 28973, sum loss: 4832.425293, avg loss: 2.879872, ppl: 17.811995 +epoch: 1, batch: 28974, sum loss: 3673.250488, avg loss: 2.389883, ppl: 10.912220 +epoch: 1, batch: 28975, sum loss: 3562.093994, avg loss: 2.234689, ppl: 9.343574 +epoch: 1, batch: 28976, sum loss: 4034.604492, avg loss: 2.463128, ppl: 11.741486 +epoch: 1, batch: 28977, sum loss: 4880.969727, avg loss: 2.924488, ppl: 18.624680 +epoch: 1, batch: 28978, sum loss: 5102.258789, avg loss: 2.806523, ppl: 16.552267 +epoch: 1, batch: 28979, sum loss: 4641.822266, avg loss: 2.502330, ppl: 12.210913 +epoch: 1, batch: 28980, sum loss: 4259.422363, avg loss: 2.650543, ppl: 14.161726 +epoch: 1, batch: 28981, sum loss: 4907.329590, avg loss: 2.756927, ppl: 15.751361 +epoch: 1, batch: 28982, sum loss: 4931.626953, avg loss: 2.999773, ppl: 20.080978 +epoch: 1, batch: 28983, sum loss: 5055.527344, avg loss: 2.662205, ppl: 14.327847 +epoch: 1, batch: 28984, sum loss: 5202.456055, avg loss: 2.583146, ppl: 13.238723 +epoch: 1, batch: 28985, sum loss: 4013.251221, avg loss: 2.541641, ppl: 12.700496 +epoch: 1, batch: 28986, sum loss: 5077.183105, avg loss: 2.926330, ppl: 18.659033 +epoch: 1, batch: 28987, sum loss: 3356.457275, avg loss: 2.419940, ppl: 11.245187 +epoch: 1, batch: 28988, sum loss: 5069.517090, avg loss: 2.450226, ppl: 11.590964 +epoch: 1, batch: 28989, sum loss: 3983.251221, avg loss: 2.533875, ppl: 12.602242 +epoch: 1, batch: 28990, sum loss: 4481.430176, avg loss: 2.664346, ppl: 14.358556 +epoch: 1, batch: 28991, sum loss: 4185.133789, avg loss: 2.660606, ppl: 14.304961 +epoch: 1, batch: 28992, sum loss: 3962.018799, avg loss: 2.351346, ppl: 10.499698 +epoch: 1, batch: 28993, sum loss: 4371.093262, avg loss: 2.447421, ppl: 11.558497 +epoch: 1, batch: 28994, sum loss: 4466.401855, avg loss: 2.700364, ppl: 14.885147 +epoch: 1, batch: 28995, sum loss: 4257.281250, avg loss: 2.405244, ppl: 11.081130 +epoch: 1, batch: 28996, sum loss: 4370.558594, avg loss: 2.595344, ppl: 13.401191 +epoch: 1, batch: 28997, sum loss: 4935.546387, avg loss: 2.531049, ppl: 12.566685 +epoch: 1, batch: 28998, sum loss: 3967.563477, avg loss: 2.661009, ppl: 14.310716 +epoch: 1, batch: 28999, sum loss: 4282.424805, avg loss: 2.572027, ppl: 13.092336 +epoch: 1, batch: 29000, sum loss: 3601.286377, avg loss: 2.128420, ppl: 8.401581 +epoch: 1, batch: 29001, sum loss: 3967.093506, avg loss: 2.343233, ppl: 10.414855 +epoch: 1, batch: 29002, sum loss: 4685.350098, avg loss: 2.581460, ppl: 13.216423 +epoch: 1, batch: 29003, sum loss: 3561.002441, avg loss: 2.266711, ppl: 9.647615 +epoch: 1, batch: 29004, sum loss: 3834.493652, avg loss: 2.551227, ppl: 12.822823 +epoch: 1, batch: 29005, sum loss: 3649.214355, avg loss: 2.485841, ppl: 12.011215 +epoch: 1, batch: 29006, sum loss: 4126.214844, avg loss: 2.567651, ppl: 13.035166 +epoch: 1, batch: 29007, sum loss: 4047.189941, avg loss: 2.795021, ppl: 16.362970 +epoch: 1, batch: 29008, sum loss: 3983.334961, avg loss: 2.532317, ppl: 12.582628 +epoch: 1, batch: 29009, sum loss: 5325.582031, avg loss: 2.640348, ppl: 14.018084 +epoch: 1, batch: 29010, sum loss: 5093.678711, avg loss: 2.602799, ppl: 13.501481 +epoch: 1, batch: 29011, sum loss: 5126.955566, avg loss: 2.699819, ppl: 14.877033 +epoch: 1, batch: 29012, sum loss: 4400.777832, avg loss: 2.546746, ppl: 12.765504 +epoch: 1, batch: 29013, sum loss: 4317.628906, avg loss: 2.576151, ppl: 13.146439 +epoch: 1, batch: 29014, sum loss: 4171.231934, avg loss: 2.670443, ppl: 14.446368 +epoch: 1, batch: 29015, sum loss: 4625.900391, avg loss: 2.825840, ppl: 16.875118 +epoch: 1, batch: 29016, sum loss: 4785.241699, avg loss: 2.731302, ppl: 15.352871 +epoch: 1, batch: 29017, sum loss: 4770.814453, avg loss: 2.690815, ppl: 14.743683 +epoch: 1, batch: 29018, sum loss: 3674.691895, avg loss: 2.420746, ppl: 11.254248 +epoch: 1, batch: 29019, sum loss: 4619.118652, avg loss: 2.605256, ppl: 13.534691 +epoch: 1, batch: 29020, sum loss: 4060.395996, avg loss: 2.475851, ppl: 11.891827 +epoch: 1, batch: 29021, sum loss: 4529.436035, avg loss: 2.726933, ppl: 15.285936 +epoch: 1, batch: 29022, sum loss: 4925.858398, avg loss: 2.775131, ppl: 16.040731 +epoch: 1, batch: 29023, sum loss: 4677.808594, avg loss: 2.801083, ppl: 16.462467 +epoch: 1, batch: 29024, sum loss: 4115.750488, avg loss: 2.817078, ppl: 16.727898 +epoch: 1, batch: 29025, sum loss: 4586.528320, avg loss: 2.685321, ppl: 14.662906 +epoch: 1, batch: 29026, sum loss: 4320.918945, avg loss: 2.500532, ppl: 12.188975 +epoch: 1, batch: 29027, sum loss: 4112.819336, avg loss: 2.488094, ppl: 12.038307 +epoch: 1, batch: 29028, sum loss: 4071.286133, avg loss: 2.591525, ppl: 13.350120 +epoch: 1, batch: 29029, sum loss: 5202.376953, avg loss: 2.748218, ppl: 15.614782 +epoch: 1, batch: 29030, sum loss: 5260.108887, avg loss: 2.724034, ppl: 15.241677 +epoch: 1, batch: 29031, sum loss: 5175.497070, avg loss: 2.859391, ppl: 17.450891 +epoch: 1, batch: 29032, sum loss: 3857.617676, avg loss: 2.390098, ppl: 10.914562 +epoch: 1, batch: 29033, sum loss: 4091.036133, avg loss: 2.556898, ppl: 12.895748 +epoch: 1, batch: 29034, sum loss: 3924.538086, avg loss: 2.501299, ppl: 12.198328 +epoch: 1, batch: 29035, sum loss: 5046.864746, avg loss: 2.870799, ppl: 17.651117 +epoch: 1, batch: 29036, sum loss: 4463.035156, avg loss: 2.573838, ppl: 13.116067 +epoch: 1, batch: 29037, sum loss: 4474.585938, avg loss: 2.515225, ppl: 12.369396 +epoch: 1, batch: 29038, sum loss: 5295.607422, avg loss: 2.768221, ppl: 15.930271 +epoch: 1, batch: 29039, sum loss: 4689.611816, avg loss: 2.588086, ppl: 13.304281 +epoch: 1, batch: 29040, sum loss: 3407.490479, avg loss: 2.238824, ppl: 9.382295 +epoch: 1, batch: 29041, sum loss: 4874.701660, avg loss: 2.740136, ppl: 15.489086 +epoch: 1, batch: 29042, sum loss: 4651.651367, avg loss: 2.636991, ppl: 13.971095 +epoch: 1, batch: 29043, sum loss: 3483.950439, avg loss: 2.177469, ppl: 8.823945 +epoch: 1, batch: 29044, sum loss: 4809.641113, avg loss: 2.435261, ppl: 11.418801 +epoch: 1, batch: 29045, sum loss: 4673.815430, avg loss: 2.807096, ppl: 16.561762 +epoch: 1, batch: 29046, sum loss: 4028.815186, avg loss: 2.462601, ppl: 11.735297 +epoch: 1, batch: 29047, sum loss: 4356.522461, avg loss: 2.808847, ppl: 16.590786 +epoch: 1, batch: 29048, sum loss: 3852.850586, avg loss: 2.482507, ppl: 11.971238 +epoch: 1, batch: 29049, sum loss: 4930.617676, avg loss: 2.806271, ppl: 16.548088 +epoch: 1, batch: 29050, sum loss: 5078.238281, avg loss: 2.715636, ppl: 15.114213 +epoch: 1, batch: 29051, sum loss: 3962.881348, avg loss: 2.395938, ppl: 10.978490 +epoch: 1, batch: 29052, sum loss: 4260.299805, avg loss: 2.615285, ppl: 13.671118 +epoch: 1, batch: 29053, sum loss: 5363.702148, avg loss: 3.084360, ppl: 21.853479 +epoch: 1, batch: 29054, sum loss: 4501.774414, avg loss: 2.707020, ppl: 14.984555 +epoch: 1, batch: 29055, sum loss: 4719.821777, avg loss: 2.613412, ppl: 13.645529 +epoch: 1, batch: 29056, sum loss: 4493.159668, avg loss: 2.449923, ppl: 11.587458 +epoch: 1, batch: 29057, sum loss: 3754.519531, avg loss: 2.642167, ppl: 14.043605 +epoch: 1, batch: 29058, sum loss: 4463.141113, avg loss: 2.548910, ppl: 12.793150 +epoch: 1, batch: 29059, sum loss: 4567.442871, avg loss: 2.615947, ppl: 13.680162 +epoch: 1, batch: 29060, sum loss: 4495.000488, avg loss: 2.448257, ppl: 11.568169 +epoch: 1, batch: 29061, sum loss: 4786.271484, avg loss: 2.864316, ppl: 17.537050 +epoch: 1, batch: 29062, sum loss: 4634.915039, avg loss: 2.657635, ppl: 14.262514 +epoch: 1, batch: 29063, sum loss: 5408.108398, avg loss: 2.685257, ppl: 14.661976 +epoch: 1, batch: 29064, sum loss: 4394.338867, avg loss: 2.700884, ppl: 14.892897 +epoch: 1, batch: 29065, sum loss: 3994.751465, avg loss: 2.521939, ppl: 12.452720 +epoch: 1, batch: 29066, sum loss: 5376.775391, avg loss: 2.651270, ppl: 14.172025 +epoch: 1, batch: 29067, sum loss: 4315.473145, avg loss: 2.652411, ppl: 14.188209 +epoch: 1, batch: 29068, sum loss: 3894.079102, avg loss: 2.439899, ppl: 11.471885 +epoch: 1, batch: 29069, sum loss: 4732.422852, avg loss: 2.737087, ppl: 15.441934 +epoch: 1, batch: 29070, sum loss: 3953.624512, avg loss: 2.521444, ppl: 12.446558 +epoch: 1, batch: 29071, sum loss: 3900.704590, avg loss: 2.413802, ppl: 11.176377 +epoch: 1, batch: 29072, sum loss: 3562.645752, avg loss: 2.379857, ppl: 10.803356 +epoch: 1, batch: 29073, sum loss: 4794.598145, avg loss: 2.800583, ppl: 16.454235 +epoch: 1, batch: 29074, sum loss: 4760.767090, avg loss: 2.409295, ppl: 11.126116 +epoch: 1, batch: 29075, sum loss: 4497.834961, avg loss: 2.580513, ppl: 13.203917 +epoch: 1, batch: 29076, sum loss: 3730.210205, avg loss: 2.449252, ppl: 11.579678 +epoch: 1, batch: 29077, sum loss: 4112.421387, avg loss: 2.456644, ppl: 11.665594 +epoch: 1, batch: 29078, sum loss: 5049.159668, avg loss: 2.638014, ppl: 13.985405 +epoch: 1, batch: 29079, sum loss: 4184.274902, avg loss: 2.368011, ppl: 10.676134 +epoch: 1, batch: 29080, sum loss: 4214.503418, avg loss: 2.458870, ppl: 11.691594 +epoch: 1, batch: 29081, sum loss: 4298.713867, avg loss: 2.695118, ppl: 14.807272 +epoch: 1, batch: 29082, sum loss: 5325.437012, avg loss: 2.804338, ppl: 16.516130 +epoch: 1, batch: 29083, sum loss: 4170.489746, avg loss: 2.441739, ppl: 11.493005 +epoch: 1, batch: 29084, sum loss: 3804.788330, avg loss: 2.622184, ppl: 13.765749 +epoch: 1, batch: 29085, sum loss: 4363.381836, avg loss: 2.548704, ppl: 12.790519 +epoch: 1, batch: 29086, sum loss: 5947.265625, avg loss: 3.235727, ppl: 25.424839 +epoch: 1, batch: 29087, sum loss: 4597.988770, avg loss: 2.633442, ppl: 13.921601 +epoch: 1, batch: 29088, sum loss: 3970.513672, avg loss: 2.458522, ppl: 11.687529 +epoch: 1, batch: 29089, sum loss: 4738.754883, avg loss: 2.561489, ppl: 12.955094 +epoch: 1, batch: 29090, sum loss: 4574.505859, avg loss: 2.642695, ppl: 14.051026 +epoch: 1, batch: 29091, sum loss: 4141.178223, avg loss: 2.433125, ppl: 11.394429 +epoch: 1, batch: 29092, sum loss: 4959.943848, avg loss: 2.675266, ppl: 14.516214 +epoch: 1, batch: 29093, sum loss: 4442.689453, avg loss: 2.465421, ppl: 11.768441 +epoch: 1, batch: 29094, sum loss: 3993.283691, avg loss: 2.584650, ppl: 13.258645 +epoch: 1, batch: 29095, sum loss: 4771.329102, avg loss: 2.581888, ppl: 13.222077 +epoch: 1, batch: 29096, sum loss: 3857.977051, avg loss: 2.633431, ppl: 13.921458 +epoch: 1, batch: 29097, sum loss: 4363.473633, avg loss: 2.631769, ppl: 13.898340 +epoch: 1, batch: 29098, sum loss: 4183.668457, avg loss: 2.535557, ppl: 12.623454 +epoch: 1, batch: 29099, sum loss: 3577.849854, avg loss: 2.360059, ppl: 10.591579 +epoch: 1, batch: 29100, sum loss: 4492.775391, avg loss: 2.615120, ppl: 13.668853 +epoch: 1, batch: 29101, sum loss: 5401.664062, avg loss: 2.891683, ppl: 18.023621 +epoch: 1, batch: 29102, sum loss: 3907.182861, avg loss: 2.428330, ppl: 11.339928 +epoch: 1, batch: 29103, sum loss: 4308.451172, avg loss: 2.583004, ppl: 13.236848 +epoch: 1, batch: 29104, sum loss: 3498.033203, avg loss: 2.125172, ppl: 8.374339 +epoch: 1, batch: 29105, sum loss: 4277.367676, avg loss: 2.632226, ppl: 13.904691 +epoch: 1, batch: 29106, sum loss: 5136.621094, avg loss: 2.716352, ppl: 15.125042 +epoch: 1, batch: 29107, sum loss: 4443.772949, avg loss: 2.706317, ppl: 14.974030 +epoch: 1, batch: 29108, sum loss: 4562.864746, avg loss: 2.880596, ppl: 17.824902 +epoch: 1, batch: 29109, sum loss: 4454.135254, avg loss: 2.606282, ppl: 13.548580 +epoch: 1, batch: 29110, sum loss: 5184.929688, avg loss: 2.741898, ppl: 15.516412 +epoch: 1, batch: 29111, sum loss: 4413.040039, avg loss: 2.633079, ppl: 13.916551 +epoch: 1, batch: 29112, sum loss: 5420.738281, avg loss: 2.627600, ppl: 13.840508 +epoch: 1, batch: 29113, sum loss: 5423.887695, avg loss: 2.944565, ppl: 19.002386 +epoch: 1, batch: 29114, sum loss: 3520.502441, avg loss: 2.427933, ppl: 11.335424 +epoch: 1, batch: 29115, sum loss: 4560.852051, avg loss: 2.518417, ppl: 12.408934 +epoch: 1, batch: 29116, sum loss: 5491.876465, avg loss: 3.247710, ppl: 25.731335 +epoch: 1, batch: 29117, sum loss: 4166.303223, avg loss: 2.578158, ppl: 13.172850 +epoch: 1, batch: 29118, sum loss: 3646.715820, avg loss: 2.357282, ppl: 10.562208 +epoch: 1, batch: 29119, sum loss: 3868.789551, avg loss: 2.401483, ppl: 11.039539 +epoch: 1, batch: 29120, sum loss: 4963.244629, avg loss: 2.579649, ppl: 13.192507 +epoch: 1, batch: 29121, sum loss: 3780.929199, avg loss: 2.224076, ppl: 9.244937 +epoch: 1, batch: 29122, sum loss: 4959.564941, avg loss: 2.800432, ppl: 16.451752 +epoch: 1, batch: 29123, sum loss: 4191.516602, avg loss: 2.515916, ppl: 12.377946 +epoch: 1, batch: 29124, sum loss: 4253.666016, avg loss: 2.673580, ppl: 14.491759 +epoch: 1, batch: 29125, sum loss: 3583.691895, avg loss: 2.514871, ppl: 12.365018 +epoch: 1, batch: 29126, sum loss: 4349.394043, avg loss: 2.486789, ppl: 12.022610 +epoch: 1, batch: 29127, sum loss: 4328.627930, avg loss: 2.739638, ppl: 15.481378 +epoch: 1, batch: 29128, sum loss: 3916.463135, avg loss: 2.414589, ppl: 11.185169 +epoch: 1, batch: 29129, sum loss: 3586.940430, avg loss: 2.227913, ppl: 9.280481 +epoch: 1, batch: 29130, sum loss: 4384.537109, avg loss: 2.699838, ppl: 14.877324 +epoch: 1, batch: 29131, sum loss: 4570.208008, avg loss: 2.543243, ppl: 12.720863 +epoch: 1, batch: 29132, sum loss: 3764.538574, avg loss: 2.614263, ppl: 13.657148 +epoch: 1, batch: 29133, sum loss: 4792.434570, avg loss: 2.759030, ppl: 15.784522 +epoch: 1, batch: 29134, sum loss: 4362.968750, avg loss: 2.394604, ppl: 10.963858 +epoch: 1, batch: 29135, sum loss: 4999.561523, avg loss: 2.782171, ppl: 16.154057 +epoch: 1, batch: 29136, sum loss: 3784.894043, avg loss: 2.594170, ppl: 13.385474 +epoch: 1, batch: 29137, sum loss: 3960.603516, avg loss: 2.329767, ppl: 10.275547 +epoch: 1, batch: 29138, sum loss: 4394.894043, avg loss: 2.753693, ppl: 15.700508 +epoch: 1, batch: 29139, sum loss: 4331.109863, avg loss: 2.769252, ppl: 15.946698 +epoch: 1, batch: 29140, sum loss: 4304.308105, avg loss: 2.700319, ppl: 14.884477 +epoch: 1, batch: 29141, sum loss: 4050.128418, avg loss: 2.521873, ppl: 12.451900 +epoch: 1, batch: 29142, sum loss: 4166.604004, avg loss: 2.642108, ppl: 14.042774 +epoch: 1, batch: 29143, sum loss: 4008.151611, avg loss: 2.368884, ppl: 10.685459 +epoch: 1, batch: 29144, sum loss: 3922.052734, avg loss: 2.558417, ppl: 12.915351 +epoch: 1, batch: 29145, sum loss: 4511.220703, avg loss: 2.757470, ppl: 15.759918 +epoch: 1, batch: 29146, sum loss: 4850.164062, avg loss: 2.823146, ppl: 16.829708 +epoch: 1, batch: 29147, sum loss: 4948.429688, avg loss: 2.775339, ppl: 16.044071 +epoch: 1, batch: 29148, sum loss: 4178.326172, avg loss: 2.549314, ppl: 12.798325 +epoch: 1, batch: 29149, sum loss: 5100.643555, avg loss: 2.852709, ppl: 17.334679 +epoch: 1, batch: 29150, sum loss: 6299.301270, avg loss: 2.775023, ppl: 16.038992 +epoch: 1, batch: 29151, sum loss: 4893.152344, avg loss: 2.640665, ppl: 14.022526 +epoch: 1, batch: 29152, sum loss: 3800.150391, avg loss: 2.428211, ppl: 11.338579 +epoch: 1, batch: 29153, sum loss: 5165.027832, avg loss: 2.692924, ppl: 14.774811 +epoch: 1, batch: 29154, sum loss: 3858.247070, avg loss: 2.478001, ppl: 11.917414 +epoch: 1, batch: 29155, sum loss: 3752.727051, avg loss: 2.443182, ppl: 11.509604 +epoch: 1, batch: 29156, sum loss: 3629.551270, avg loss: 2.559627, ppl: 12.930997 +epoch: 1, batch: 29157, sum loss: 3981.021484, avg loss: 2.436366, ppl: 11.431418 +epoch: 1, batch: 29158, sum loss: 4498.403809, avg loss: 2.758065, ppl: 15.769300 +epoch: 1, batch: 29159, sum loss: 4039.533447, avg loss: 2.424690, ppl: 11.298723 +epoch: 1, batch: 29160, sum loss: 3483.167725, avg loss: 2.465087, ppl: 11.764503 +epoch: 1, batch: 29161, sum loss: 4159.924316, avg loss: 2.639546, ppl: 14.006842 +epoch: 1, batch: 29162, sum loss: 4360.003418, avg loss: 2.566218, ppl: 13.016499 +epoch: 1, batch: 29163, sum loss: 4587.453613, avg loss: 2.491827, ppl: 12.083332 +epoch: 1, batch: 29164, sum loss: 4262.542969, avg loss: 2.485448, ppl: 12.006494 +epoch: 1, batch: 29165, sum loss: 4253.782227, avg loss: 2.512571, ppl: 12.336605 +epoch: 1, batch: 29166, sum loss: 5621.612305, avg loss: 2.980707, ppl: 19.701735 +epoch: 1, batch: 29167, sum loss: 5250.880859, avg loss: 2.870903, ppl: 17.652948 +epoch: 1, batch: 29168, sum loss: 4114.492188, avg loss: 2.547673, ppl: 12.777339 +epoch: 1, batch: 29169, sum loss: 4172.151367, avg loss: 2.657421, ppl: 14.259472 +epoch: 1, batch: 29170, sum loss: 4997.468750, avg loss: 2.600140, ppl: 13.465621 +epoch: 1, batch: 29171, sum loss: 5139.979004, avg loss: 2.835068, ppl: 17.031567 +epoch: 1, batch: 29172, sum loss: 4577.613770, avg loss: 2.611303, ppl: 13.616780 +epoch: 1, batch: 29173, sum loss: 4383.689453, avg loss: 2.658393, ppl: 14.273329 +epoch: 1, batch: 29174, sum loss: 4211.632324, avg loss: 2.502455, ppl: 12.212441 +epoch: 1, batch: 29175, sum loss: 4950.414551, avg loss: 2.574318, ppl: 13.122370 +epoch: 1, batch: 29176, sum loss: 3913.177246, avg loss: 2.405149, ppl: 11.080078 +epoch: 1, batch: 29177, sum loss: 3771.798828, avg loss: 2.489636, ppl: 12.056889 +epoch: 1, batch: 29178, sum loss: 4357.245117, avg loss: 2.699656, ppl: 14.874618 +epoch: 1, batch: 29179, sum loss: 4208.836914, avg loss: 2.619065, ppl: 13.722884 +epoch: 1, batch: 29180, sum loss: 4523.040039, avg loss: 2.447532, ppl: 11.559787 +epoch: 1, batch: 29181, sum loss: 4718.311035, avg loss: 2.644793, ppl: 14.080534 +epoch: 1, batch: 29182, sum loss: 3457.995117, avg loss: 2.320802, ppl: 10.183841 +epoch: 1, batch: 29183, sum loss: 4791.741211, avg loss: 2.687460, ppl: 14.694307 +epoch: 1, batch: 29184, sum loss: 5185.038574, avg loss: 2.883781, ppl: 17.881756 +epoch: 1, batch: 29185, sum loss: 4312.282227, avg loss: 2.627838, ppl: 13.843805 +epoch: 1, batch: 29186, sum loss: 4359.642090, avg loss: 2.896772, ppl: 18.115582 +epoch: 1, batch: 29187, sum loss: 4336.534180, avg loss: 2.688490, ppl: 14.709447 +epoch: 1, batch: 29188, sum loss: 4818.559082, avg loss: 2.688928, ppl: 14.715894 +epoch: 1, batch: 29189, sum loss: 4228.723633, avg loss: 2.681499, ppl: 14.606969 +epoch: 1, batch: 29190, sum loss: 4809.362305, avg loss: 2.649786, ppl: 14.151016 +epoch: 1, batch: 29191, sum loss: 3947.958984, avg loss: 2.595634, ppl: 13.405084 +epoch: 1, batch: 29192, sum loss: 3304.003418, avg loss: 2.297638, ppl: 9.950650 +epoch: 1, batch: 29193, sum loss: 4200.234375, avg loss: 2.616968, ppl: 13.694145 +epoch: 1, batch: 29194, sum loss: 4467.493164, avg loss: 2.725743, ppl: 15.267755 +epoch: 1, batch: 29195, sum loss: 4065.868408, avg loss: 2.570081, ppl: 13.066883 +epoch: 1, batch: 29196, sum loss: 5233.505371, avg loss: 2.883474, ppl: 17.876270 +epoch: 1, batch: 29197, sum loss: 5026.107422, avg loss: 3.074072, ppl: 21.629793 +epoch: 1, batch: 29198, sum loss: 3678.525146, avg loss: 2.460552, ppl: 11.711274 +epoch: 1, batch: 29199, sum loss: 5835.982910, avg loss: 3.092731, ppl: 22.037174 +epoch: 1, batch: 29200, sum loss: 4324.987305, avg loss: 2.616447, ppl: 13.687010 +epoch: 1, batch: 29201, sum loss: 4490.380859, avg loss: 2.594096, ppl: 13.384488 +epoch: 1, batch: 29202, sum loss: 6049.142090, avg loss: 2.972551, ppl: 19.541714 +epoch: 1, batch: 29203, sum loss: 4304.406250, avg loss: 2.527543, ppl: 12.522704 +epoch: 1, batch: 29204, sum loss: 4992.423828, avg loss: 2.860988, ppl: 17.478786 +epoch: 1, batch: 29205, sum loss: 4003.560059, avg loss: 2.377411, ppl: 10.776966 +epoch: 1, batch: 29206, sum loss: 3902.607666, avg loss: 2.519437, ppl: 12.421599 +epoch: 1, batch: 29207, sum loss: 4876.631836, avg loss: 2.673592, ppl: 14.491932 +epoch: 1, batch: 29208, sum loss: 4641.078125, avg loss: 2.854291, ppl: 17.362131 +epoch: 1, batch: 29209, sum loss: 4311.224609, avg loss: 2.679444, ppl: 14.576981 +epoch: 1, batch: 29210, sum loss: 4030.806885, avg loss: 2.486617, ppl: 12.020546 +epoch: 1, batch: 29211, sum loss: 4516.418945, avg loss: 2.613668, ppl: 13.649030 +epoch: 1, batch: 29212, sum loss: 3871.859375, avg loss: 2.387089, ppl: 10.881776 +epoch: 1, batch: 29213, sum loss: 4601.907715, avg loss: 2.699066, ppl: 14.865843 +epoch: 1, batch: 29214, sum loss: 4614.637207, avg loss: 2.714493, ppl: 15.096948 +epoch: 1, batch: 29215, sum loss: 4621.770996, avg loss: 2.556289, ppl: 12.887904 +epoch: 1, batch: 29216, sum loss: 4568.531250, avg loss: 2.676351, ppl: 14.531970 +epoch: 1, batch: 29217, sum loss: 4570.023926, avg loss: 2.650826, ppl: 14.165738 +epoch: 1, batch: 29218, sum loss: 5018.535645, avg loss: 2.612460, ppl: 13.632547 +epoch: 1, batch: 29219, sum loss: 3484.960449, avg loss: 2.393517, ppl: 10.951941 +epoch: 1, batch: 29220, sum loss: 3912.529297, avg loss: 2.525842, ppl: 12.501416 +epoch: 1, batch: 29221, sum loss: 4351.951172, avg loss: 2.613785, ppl: 13.650615 +epoch: 1, batch: 29222, sum loss: 5059.950195, avg loss: 2.874972, ppl: 17.724920 +epoch: 1, batch: 29223, sum loss: 4283.082031, avg loss: 2.515022, ppl: 12.366879 +epoch: 1, batch: 29224, sum loss: 3903.423828, avg loss: 2.377238, ppl: 10.775096 +epoch: 1, batch: 29225, sum loss: 4787.070312, avg loss: 2.692391, ppl: 14.766937 +epoch: 1, batch: 29226, sum loss: 5284.791016, avg loss: 3.040731, ppl: 20.920540 +epoch: 1, batch: 29227, sum loss: 4596.135254, avg loss: 2.523962, ppl: 12.477940 +epoch: 1, batch: 29228, sum loss: 3779.151367, avg loss: 2.489560, ppl: 12.055966 +epoch: 1, batch: 29229, sum loss: 4724.475586, avg loss: 2.779104, ppl: 16.104576 +epoch: 1, batch: 29230, sum loss: 4616.462891, avg loss: 2.642509, ppl: 14.048404 +epoch: 1, batch: 29231, sum loss: 3731.727051, avg loss: 2.481201, ppl: 11.955620 +epoch: 1, batch: 29232, sum loss: 4210.053711, avg loss: 2.525527, ppl: 12.497479 +epoch: 1, batch: 29233, sum loss: 3801.998779, avg loss: 2.481723, ppl: 11.961852 +epoch: 1, batch: 29234, sum loss: 4544.700684, avg loss: 2.595489, ppl: 13.403137 +epoch: 1, batch: 29235, sum loss: 4823.699707, avg loss: 2.907595, ppl: 18.312698 +epoch: 1, batch: 29236, sum loss: 3923.730957, avg loss: 2.511992, ppl: 12.329463 +epoch: 1, batch: 29237, sum loss: 4996.695801, avg loss: 2.786780, ppl: 16.228674 +epoch: 1, batch: 29238, sum loss: 5043.250488, avg loss: 2.812744, ppl: 16.655565 +epoch: 1, batch: 29239, sum loss: 3652.577393, avg loss: 2.290017, ppl: 9.875107 +epoch: 1, batch: 29240, sum loss: 6027.875000, avg loss: 2.956290, ppl: 19.226509 +epoch: 1, batch: 29241, sum loss: 3845.081299, avg loss: 2.401675, ppl: 11.041656 +epoch: 1, batch: 29242, sum loss: 4214.148926, avg loss: 2.632198, ppl: 13.904300 +epoch: 1, batch: 29243, sum loss: 4781.579102, avg loss: 2.686280, ppl: 14.676983 +epoch: 1, batch: 29244, sum loss: 4898.400391, avg loss: 2.805499, ppl: 16.535318 +epoch: 1, batch: 29245, sum loss: 4822.099121, avg loss: 2.678944, ppl: 14.569701 +epoch: 1, batch: 29246, sum loss: 3878.761719, avg loss: 2.359344, ppl: 10.584006 +epoch: 1, batch: 29247, sum loss: 3170.145020, avg loss: 2.213789, ppl: 9.150317 +epoch: 1, batch: 29248, sum loss: 4552.775391, avg loss: 2.578015, ppl: 13.170972 +epoch: 1, batch: 29249, sum loss: 3692.870117, avg loss: 2.364193, ppl: 10.635457 +epoch: 1, batch: 29250, sum loss: 3380.175781, avg loss: 2.388817, ppl: 10.900589 +epoch: 1, batch: 29251, sum loss: 4187.219727, avg loss: 2.751130, ppl: 15.660319 +epoch: 1, batch: 29252, sum loss: 4605.170898, avg loss: 2.804611, ppl: 16.520647 +epoch: 1, batch: 29253, sum loss: 5102.972656, avg loss: 2.889565, ppl: 17.985493 +epoch: 1, batch: 29254, sum loss: 4289.484375, avg loss: 2.307415, ppl: 10.048413 +epoch: 1, batch: 29255, sum loss: 5254.714844, avg loss: 2.852723, ppl: 17.334915 +epoch: 1, batch: 29256, sum loss: 3780.487793, avg loss: 2.317896, ppl: 10.154284 +epoch: 1, batch: 29257, sum loss: 4673.312012, avg loss: 2.618102, ppl: 13.709679 +epoch: 1, batch: 29258, sum loss: 3486.339600, avg loss: 2.341397, ppl: 10.395745 +epoch: 1, batch: 29259, sum loss: 4146.563965, avg loss: 2.542345, ppl: 12.709435 +epoch: 1, batch: 29260, sum loss: 5260.837891, avg loss: 2.953867, ppl: 19.179987 +epoch: 1, batch: 29261, sum loss: 4527.481445, avg loss: 2.738948, ppl: 15.470703 +epoch: 1, batch: 29262, sum loss: 4539.587891, avg loss: 2.602975, ppl: 13.503848 +epoch: 1, batch: 29263, sum loss: 4509.621094, avg loss: 2.758178, ppl: 15.771082 +epoch: 1, batch: 29264, sum loss: 4693.074219, avg loss: 2.633600, ppl: 13.923799 +epoch: 1, batch: 29265, sum loss: 4222.928711, avg loss: 2.524165, ppl: 12.480474 +epoch: 1, batch: 29266, sum loss: 4828.974609, avg loss: 2.694741, ppl: 14.801678 +epoch: 1, batch: 29267, sum loss: 4605.000977, avg loss: 2.595829, ppl: 13.407698 +epoch: 1, batch: 29268, sum loss: 4921.685547, avg loss: 2.817221, ppl: 16.730295 +epoch: 1, batch: 29269, sum loss: 6309.485840, avg loss: 3.042182, ppl: 20.950912 +epoch: 1, batch: 29270, sum loss: 3772.933594, avg loss: 2.456337, ppl: 11.662015 +epoch: 1, batch: 29271, sum loss: 3317.589111, avg loss: 2.294322, ppl: 9.917707 +epoch: 1, batch: 29272, sum loss: 4460.444824, avg loss: 2.532904, ppl: 12.590019 +epoch: 1, batch: 29273, sum loss: 4228.569824, avg loss: 2.496204, ppl: 12.136338 +epoch: 1, batch: 29274, sum loss: 4298.652344, avg loss: 2.816941, ppl: 16.725613 +epoch: 1, batch: 29275, sum loss: 4371.907715, avg loss: 2.522739, ppl: 12.462690 +epoch: 1, batch: 29276, sum loss: 3636.086670, avg loss: 2.203689, ppl: 9.058367 +epoch: 1, batch: 29277, sum loss: 4626.855469, avg loss: 2.699449, ppl: 14.871536 +epoch: 1, batch: 29278, sum loss: 4555.920898, avg loss: 2.808829, ppl: 16.590481 +epoch: 1, batch: 29279, sum loss: 5234.357422, avg loss: 2.779797, ppl: 16.115747 +epoch: 1, batch: 29280, sum loss: 5090.638184, avg loss: 2.947677, ppl: 19.061625 +epoch: 1, batch: 29281, sum loss: 3941.000977, avg loss: 2.550810, ppl: 12.817480 +epoch: 1, batch: 29282, sum loss: 4884.985352, avg loss: 2.469659, ppl: 11.818412 +epoch: 1, batch: 29283, sum loss: 4766.747070, avg loss: 2.717644, ppl: 15.144595 +epoch: 1, batch: 29284, sum loss: 3761.916748, avg loss: 2.468449, ppl: 11.804129 +epoch: 1, batch: 29285, sum loss: 4474.546387, avg loss: 2.607545, ppl: 13.565701 +epoch: 1, batch: 29286, sum loss: 4276.093750, avg loss: 2.716705, ppl: 15.130383 +epoch: 1, batch: 29287, sum loss: 3855.322021, avg loss: 2.430846, ppl: 11.368498 +epoch: 1, batch: 29288, sum loss: 4759.937500, avg loss: 2.434751, ppl: 11.412971 +epoch: 1, batch: 29289, sum loss: 4224.210938, avg loss: 2.591540, ppl: 13.350320 +epoch: 1, batch: 29290, sum loss: 4169.582031, avg loss: 2.493769, ppl: 12.106826 +epoch: 1, batch: 29291, sum loss: 4658.741211, avg loss: 2.547152, ppl: 12.770684 +epoch: 1, batch: 29292, sum loss: 3811.411621, avg loss: 2.476551, ppl: 11.900147 +epoch: 1, batch: 29293, sum loss: 4328.064941, avg loss: 2.450773, ppl: 11.597305 +epoch: 1, batch: 29294, sum loss: 4047.826660, avg loss: 2.441391, ppl: 11.489014 +epoch: 1, batch: 29295, sum loss: 5143.498535, avg loss: 2.699999, ppl: 14.879722 +epoch: 1, batch: 29296, sum loss: 3959.548828, avg loss: 2.541431, ppl: 12.697824 +epoch: 1, batch: 29297, sum loss: 4080.956299, avg loss: 2.643107, ppl: 14.056807 +epoch: 1, batch: 29298, sum loss: 5585.676758, avg loss: 2.944479, ppl: 19.000761 +epoch: 1, batch: 29299, sum loss: 5059.218262, avg loss: 2.717088, ppl: 15.136185 +epoch: 1, batch: 29300, sum loss: 4289.622559, avg loss: 2.533741, ppl: 12.600551 +epoch: 1, batch: 29301, sum loss: 5127.893066, avg loss: 2.711736, ppl: 15.055392 +epoch: 1, batch: 29302, sum loss: 4833.764648, avg loss: 2.991191, ppl: 19.909378 +epoch: 1, batch: 29303, sum loss: 4455.491211, avg loss: 2.658408, ppl: 14.273543 +epoch: 1, batch: 29304, sum loss: 4461.520508, avg loss: 2.607552, ppl: 13.565795 +epoch: 1, batch: 29305, sum loss: 4530.959961, avg loss: 2.706667, ppl: 14.979262 +epoch: 1, batch: 29306, sum loss: 4534.693848, avg loss: 2.501210, ppl: 12.197247 +epoch: 1, batch: 29307, sum loss: 4060.038330, avg loss: 2.537524, ppl: 12.648314 +epoch: 1, batch: 29308, sum loss: 4559.196289, avg loss: 2.647617, ppl: 14.120348 +epoch: 1, batch: 29309, sum loss: 4460.723633, avg loss: 2.508843, ppl: 12.290707 +epoch: 1, batch: 29310, sum loss: 5369.772461, avg loss: 2.702452, ppl: 14.916261 +epoch: 1, batch: 29311, sum loss: 4185.523438, avg loss: 2.596478, ppl: 13.416409 +epoch: 1, batch: 29312, sum loss: 4093.114502, avg loss: 2.525055, ppl: 12.491585 +epoch: 1, batch: 29313, sum loss: 4656.105469, avg loss: 2.875915, ppl: 17.741642 +epoch: 1, batch: 29314, sum loss: 5055.937500, avg loss: 2.656825, ppl: 14.250968 +epoch: 1, batch: 29315, sum loss: 4000.090820, avg loss: 2.433145, ppl: 11.394666 +epoch: 1, batch: 29316, sum loss: 3878.605469, avg loss: 2.472024, ppl: 11.846399 +epoch: 1, batch: 29317, sum loss: 3945.415283, avg loss: 2.450568, ppl: 11.594935 +epoch: 1, batch: 29318, sum loss: 4033.178223, avg loss: 2.393578, ppl: 10.952608 +epoch: 1, batch: 29319, sum loss: 4998.769531, avg loss: 2.615787, ppl: 13.677980 +epoch: 1, batch: 29320, sum loss: 3762.183105, avg loss: 2.223512, ppl: 9.239728 +epoch: 1, batch: 29321, sum loss: 4219.468750, avg loss: 2.468970, ppl: 11.810271 +epoch: 1, batch: 29322, sum loss: 4575.145508, avg loss: 2.567422, ppl: 13.032183 +epoch: 1, batch: 29323, sum loss: 3950.253662, avg loss: 2.433921, ppl: 11.403506 +epoch: 1, batch: 29324, sum loss: 4772.441895, avg loss: 2.802373, ppl: 16.483723 +epoch: 1, batch: 29325, sum loss: 4761.251465, avg loss: 2.624725, ppl: 13.800783 +epoch: 1, batch: 29326, sum loss: 4009.774414, avg loss: 2.353154, ppl: 10.518693 +epoch: 1, batch: 29327, sum loss: 3969.689941, avg loss: 2.206609, ppl: 9.084860 +epoch: 1, batch: 29328, sum loss: 3670.438232, avg loss: 2.442075, ppl: 11.496866 +epoch: 1, batch: 29329, sum loss: 4760.397461, avg loss: 2.533474, ppl: 12.597193 +epoch: 1, batch: 29330, sum loss: 4714.225586, avg loss: 2.714005, ppl: 15.089581 +epoch: 1, batch: 29331, sum loss: 3721.907959, avg loss: 2.314619, ppl: 10.121069 +epoch: 1, batch: 29332, sum loss: 3769.597412, avg loss: 2.369326, ppl: 10.690184 +epoch: 1, batch: 29333, sum loss: 4491.656250, avg loss: 2.686398, ppl: 14.678701 +epoch: 1, batch: 29334, sum loss: 4128.365234, avg loss: 2.448615, ppl: 11.572311 +epoch: 1, batch: 29335, sum loss: 4332.390137, avg loss: 2.494180, ppl: 12.111794 +epoch: 1, batch: 29336, sum loss: 4066.760010, avg loss: 2.475204, ppl: 11.884129 +epoch: 1, batch: 29337, sum loss: 4437.835449, avg loss: 2.629050, ppl: 13.860590 +epoch: 1, batch: 29338, sum loss: 3800.056396, avg loss: 2.446913, ppl: 11.552634 +epoch: 1, batch: 29339, sum loss: 4596.760742, avg loss: 2.640299, ppl: 14.017395 +epoch: 1, batch: 29340, sum loss: 4004.658447, avg loss: 2.312158, ppl: 10.096192 +epoch: 1, batch: 29341, sum loss: 4631.288574, avg loss: 2.547463, ppl: 12.774658 +epoch: 1, batch: 29342, sum loss: 4472.963379, avg loss: 2.843588, ppl: 17.177280 +epoch: 1, batch: 29343, sum loss: 4143.743164, avg loss: 2.726147, ppl: 15.273922 +epoch: 1, batch: 29344, sum loss: 5252.199707, avg loss: 2.796698, ppl: 16.390442 +epoch: 1, batch: 29345, sum loss: 4872.356934, avg loss: 2.734207, ppl: 15.397531 +epoch: 1, batch: 29346, sum loss: 3734.529053, avg loss: 2.499685, ppl: 12.178655 +epoch: 1, batch: 29347, sum loss: 4874.046875, avg loss: 2.855329, ppl: 17.380159 +epoch: 1, batch: 29348, sum loss: 3980.965576, avg loss: 2.497469, ppl: 12.151698 +epoch: 1, batch: 29349, sum loss: 4721.914062, avg loss: 2.807321, ppl: 16.565481 +epoch: 1, batch: 29350, sum loss: 4635.981934, avg loss: 2.646109, ppl: 14.099067 +epoch: 1, batch: 29351, sum loss: 2946.935303, avg loss: 2.237612, ppl: 9.370929 +epoch: 1, batch: 29352, sum loss: 5181.488770, avg loss: 2.767889, ppl: 15.924989 +epoch: 1, batch: 29353, sum loss: 4752.828125, avg loss: 2.607147, ppl: 13.560305 +epoch: 1, batch: 29354, sum loss: 4159.916992, avg loss: 2.736788, ppl: 15.437314 +epoch: 1, batch: 29355, sum loss: 5157.494629, avg loss: 2.703089, ppl: 14.925770 +epoch: 1, batch: 29356, sum loss: 4777.519531, avg loss: 2.874561, ppl: 17.717638 +epoch: 1, batch: 29357, sum loss: 4056.151367, avg loss: 2.485387, ppl: 12.005764 +epoch: 1, batch: 29358, sum loss: 5290.442383, avg loss: 2.839744, ppl: 17.111378 +epoch: 1, batch: 29359, sum loss: 4681.796875, avg loss: 2.710942, ppl: 15.043440 +epoch: 1, batch: 29360, sum loss: 4309.928223, avg loss: 2.478395, ppl: 11.922112 +epoch: 1, batch: 29361, sum loss: 3967.875488, avg loss: 2.546775, ppl: 12.765869 +epoch: 1, batch: 29362, sum loss: 5140.328613, avg loss: 2.897592, ppl: 18.130436 +epoch: 1, batch: 29363, sum loss: 3912.788574, avg loss: 2.582699, ppl: 13.232803 +epoch: 1, batch: 29364, sum loss: 4524.910156, avg loss: 2.685407, ppl: 14.664164 +epoch: 1, batch: 29365, sum loss: 3608.316895, avg loss: 2.528603, ppl: 12.535985 +epoch: 1, batch: 29366, sum loss: 4612.167480, avg loss: 2.626519, ppl: 13.825562 +epoch: 1, batch: 29367, sum loss: 4795.193359, avg loss: 2.840755, ppl: 17.128685 +epoch: 1, batch: 29368, sum loss: 4242.439453, avg loss: 2.772836, ppl: 16.003960 +epoch: 1, batch: 29369, sum loss: 4505.705078, avg loss: 2.590975, ppl: 13.342772 +epoch: 1, batch: 29370, sum loss: 4161.723633, avg loss: 2.540735, ppl: 12.688991 +epoch: 1, batch: 29371, sum loss: 3960.116943, avg loss: 2.590005, ppl: 13.329834 +epoch: 1, batch: 29372, sum loss: 4337.044922, avg loss: 2.724274, ppl: 15.245348 +epoch: 1, batch: 29373, sum loss: 4420.453125, avg loss: 2.706952, ppl: 14.983541 +epoch: 1, batch: 29374, sum loss: 4233.065430, avg loss: 2.592202, ppl: 13.359152 +epoch: 1, batch: 29375, sum loss: 4518.356445, avg loss: 2.570168, ppl: 13.068027 +epoch: 1, batch: 29376, sum loss: 4410.606934, avg loss: 2.802164, ppl: 16.480276 +epoch: 1, batch: 29377, sum loss: 4265.072754, avg loss: 2.615005, ppl: 13.667282 +epoch: 1, batch: 29378, sum loss: 4549.832520, avg loss: 2.579270, ppl: 13.187510 +epoch: 1, batch: 29379, sum loss: 4501.312988, avg loss: 2.857976, ppl: 17.426228 +epoch: 1, batch: 29380, sum loss: 4297.737305, avg loss: 2.643135, ppl: 14.057201 +epoch: 1, batch: 29381, sum loss: 3774.156250, avg loss: 2.478106, ppl: 11.918675 +epoch: 1, batch: 29382, sum loss: 5093.780273, avg loss: 2.797243, ppl: 16.399378 +epoch: 1, batch: 29383, sum loss: 3916.113281, avg loss: 2.556210, ppl: 12.886881 +epoch: 1, batch: 29384, sum loss: 4160.761230, avg loss: 2.677453, ppl: 14.547986 +epoch: 1, batch: 29385, sum loss: 4029.552002, avg loss: 2.548736, ppl: 12.790927 +epoch: 1, batch: 29386, sum loss: 4139.597656, avg loss: 2.763416, ppl: 15.853912 +epoch: 1, batch: 29387, sum loss: 3885.293457, avg loss: 2.496975, ppl: 12.145700 +epoch: 1, batch: 29388, sum loss: 4773.339844, avg loss: 2.569074, ppl: 13.053733 +epoch: 1, batch: 29389, sum loss: 4022.196289, avg loss: 2.599998, ppl: 13.463708 +epoch: 1, batch: 29390, sum loss: 4242.385742, avg loss: 2.772801, ppl: 16.003399 +epoch: 1, batch: 29391, sum loss: 4658.569336, avg loss: 2.811448, ppl: 16.633984 +epoch: 1, batch: 29392, sum loss: 4014.759277, avg loss: 2.481310, ppl: 11.956917 +epoch: 1, batch: 29393, sum loss: 5111.342285, avg loss: 2.806888, ppl: 16.558302 +epoch: 1, batch: 29394, sum loss: 5008.382324, avg loss: 2.702851, ppl: 14.922208 +epoch: 1, batch: 29395, sum loss: 4354.803711, avg loss: 2.585988, ppl: 13.276401 +epoch: 1, batch: 29396, sum loss: 4678.470703, avg loss: 2.776541, ppl: 16.063354 +epoch: 1, batch: 29397, sum loss: 4447.391602, avg loss: 2.884171, ppl: 17.888731 +epoch: 1, batch: 29398, sum loss: 5302.018555, avg loss: 2.771573, ppl: 15.983750 +epoch: 1, batch: 29399, sum loss: 4195.619141, avg loss: 2.530530, ppl: 12.560163 +epoch: 1, batch: 29400, sum loss: 3786.835205, avg loss: 2.360870, ppl: 10.600168 +epoch: 1, batch: 29401, sum loss: 4112.046875, avg loss: 2.401896, ppl: 11.044102 +epoch: 1, batch: 29402, sum loss: 3986.957520, avg loss: 2.528191, ppl: 12.530822 +epoch: 1, batch: 29403, sum loss: 4173.731445, avg loss: 2.624988, ppl: 13.804413 +epoch: 1, batch: 29404, sum loss: 4549.478516, avg loss: 2.691999, ppl: 14.761157 +epoch: 1, batch: 29405, sum loss: 5120.997070, avg loss: 2.592910, ppl: 13.368615 +epoch: 1, batch: 29406, sum loss: 3635.787109, avg loss: 2.379442, ppl: 10.798873 +epoch: 1, batch: 29407, sum loss: 5195.242188, avg loss: 2.740107, ppl: 15.488640 +epoch: 1, batch: 29408, sum loss: 3704.325439, avg loss: 2.651629, ppl: 14.177111 +epoch: 1, batch: 29409, sum loss: 4049.267334, avg loss: 2.540318, ppl: 12.683707 +epoch: 1, batch: 29410, sum loss: 5032.450195, avg loss: 2.979544, ppl: 19.678848 +epoch: 1, batch: 29411, sum loss: 4304.889160, avg loss: 2.547272, ppl: 12.772210 +epoch: 1, batch: 29412, sum loss: 3882.775635, avg loss: 2.366103, ppl: 10.655787 +epoch: 1, batch: 29413, sum loss: 3572.877686, avg loss: 2.423933, ppl: 11.290179 +epoch: 1, batch: 29414, sum loss: 3635.526855, avg loss: 2.493503, ppl: 12.103597 +epoch: 1, batch: 29415, sum loss: 5386.458008, avg loss: 2.849978, ppl: 17.287397 +epoch: 1, batch: 29416, sum loss: 4381.115234, avg loss: 2.509230, ppl: 12.295458 +epoch: 1, batch: 29417, sum loss: 4386.386230, avg loss: 2.495100, ppl: 12.122949 +epoch: 1, batch: 29418, sum loss: 4900.525391, avg loss: 2.762416, ppl: 15.838056 +epoch: 1, batch: 29419, sum loss: 5448.418457, avg loss: 2.814266, ppl: 16.680923 +epoch: 1, batch: 29420, sum loss: 4105.031250, avg loss: 2.436220, ppl: 11.429759 +epoch: 1, batch: 29421, sum loss: 4921.632324, avg loss: 2.790041, ppl: 16.281687 +epoch: 1, batch: 29422, sum loss: 3187.023682, avg loss: 2.299440, ppl: 9.968597 +epoch: 1, batch: 29423, sum loss: 3929.380859, avg loss: 2.451267, ppl: 11.603036 +epoch: 1, batch: 29424, sum loss: 4214.070312, avg loss: 2.571123, ppl: 13.080501 +epoch: 1, batch: 29425, sum loss: 4554.872070, avg loss: 2.659003, ppl: 14.282043 +epoch: 1, batch: 29426, sum loss: 4065.453125, avg loss: 2.604390, ppl: 13.522979 +epoch: 1, batch: 29427, sum loss: 3979.211426, avg loss: 2.594010, ppl: 13.383333 +epoch: 1, batch: 29428, sum loss: 4739.985840, avg loss: 2.925917, ppl: 18.651329 +epoch: 1, batch: 29429, sum loss: 3536.990723, avg loss: 2.310249, ppl: 10.076932 +epoch: 1, batch: 29430, sum loss: 3801.627686, avg loss: 2.476630, ppl: 11.901095 +epoch: 1, batch: 29431, sum loss: 4331.036621, avg loss: 2.504937, ppl: 12.242793 +epoch: 1, batch: 29432, sum loss: 4318.445312, avg loss: 2.733193, ppl: 15.381926 +epoch: 1, batch: 29433, sum loss: 5597.618652, avg loss: 2.731878, ppl: 15.361713 +epoch: 1, batch: 29434, sum loss: 4777.576172, avg loss: 2.649793, ppl: 14.151104 +epoch: 1, batch: 29435, sum loss: 4816.818359, avg loss: 2.676010, ppl: 14.527020 +epoch: 1, batch: 29436, sum loss: 4675.754395, avg loss: 2.740770, ppl: 15.498919 +epoch: 1, batch: 29437, sum loss: 4207.446289, avg loss: 2.619829, ppl: 13.733380 +epoch: 1, batch: 29438, sum loss: 4004.670166, avg loss: 2.479672, ppl: 11.937347 +epoch: 1, batch: 29439, sum loss: 4449.718262, avg loss: 2.767238, ppl: 15.914612 +epoch: 1, batch: 29440, sum loss: 3567.019287, avg loss: 2.293903, ppl: 9.913556 +epoch: 1, batch: 29441, sum loss: 3889.483398, avg loss: 2.447755, ppl: 11.562364 +epoch: 1, batch: 29442, sum loss: 4663.352539, avg loss: 2.666296, ppl: 14.386589 +epoch: 1, batch: 29443, sum loss: 4911.447754, avg loss: 2.527765, ppl: 12.525484 +epoch: 1, batch: 29444, sum loss: 4478.966797, avg loss: 2.520521, ppl: 12.435079 +epoch: 1, batch: 29445, sum loss: 4920.244141, avg loss: 2.548029, ppl: 12.781888 +epoch: 1, batch: 29446, sum loss: 4673.243164, avg loss: 2.628371, ppl: 13.851185 +epoch: 1, batch: 29447, sum loss: 4533.206543, avg loss: 2.687141, ppl: 14.689617 +epoch: 1, batch: 29448, sum loss: 5096.619141, avg loss: 2.817369, ppl: 16.732761 +epoch: 1, batch: 29449, sum loss: 3780.084717, avg loss: 2.438764, ppl: 11.458873 +epoch: 1, batch: 29450, sum loss: 4002.565186, avg loss: 2.496922, ppl: 12.145048 +epoch: 1, batch: 29451, sum loss: 5131.813965, avg loss: 2.762010, ppl: 15.831627 +epoch: 1, batch: 29452, sum loss: 3420.177734, avg loss: 2.198058, ppl: 9.007501 +epoch: 1, batch: 29453, sum loss: 4651.181152, avg loss: 2.642716, ppl: 14.051321 +epoch: 1, batch: 29454, sum loss: 4290.988770, avg loss: 2.764812, ppl: 15.876062 +epoch: 1, batch: 29455, sum loss: 4096.365234, avg loss: 2.516195, ppl: 12.381393 +epoch: 1, batch: 29456, sum loss: 4607.549805, avg loss: 2.669496, ppl: 14.432691 +epoch: 1, batch: 29457, sum loss: 4626.130371, avg loss: 2.783472, ppl: 16.175085 +epoch: 1, batch: 29458, sum loss: 4035.410889, avg loss: 2.487923, ppl: 12.036250 +epoch: 1, batch: 29459, sum loss: 4624.952148, avg loss: 2.467957, ppl: 11.798322 +epoch: 1, batch: 29460, sum loss: 4547.028320, avg loss: 2.671579, ppl: 14.462793 +epoch: 1, batch: 29461, sum loss: 4611.815430, avg loss: 2.796735, ppl: 16.391035 +epoch: 1, batch: 29462, sum loss: 4643.348145, avg loss: 2.648801, ppl: 14.137079 +epoch: 1, batch: 29463, sum loss: 3694.421875, avg loss: 2.618300, ppl: 13.712399 +epoch: 1, batch: 29464, sum loss: 4876.071289, avg loss: 2.733224, ppl: 15.382399 +epoch: 1, batch: 29465, sum loss: 4704.413086, avg loss: 2.720887, ppl: 15.193789 +epoch: 1, batch: 29466, sum loss: 4456.982910, avg loss: 2.519493, ppl: 12.422298 +epoch: 1, batch: 29467, sum loss: 4428.885742, avg loss: 2.523582, ppl: 12.473189 +epoch: 1, batch: 29468, sum loss: 4851.964355, avg loss: 2.704551, ppl: 14.947603 +epoch: 1, batch: 29469, sum loss: 3728.313477, avg loss: 2.432038, ppl: 11.382051 +epoch: 1, batch: 29470, sum loss: 4375.004883, avg loss: 2.594902, ppl: 13.395275 +epoch: 1, batch: 29471, sum loss: 3875.789062, avg loss: 2.443751, ppl: 11.516158 +epoch: 1, batch: 29472, sum loss: 4605.454102, avg loss: 2.846387, ppl: 17.225431 +epoch: 1, batch: 29473, sum loss: 4286.583008, avg loss: 2.585394, ppl: 13.268515 +epoch: 1, batch: 29474, sum loss: 4755.431641, avg loss: 2.892598, ppl: 18.040125 +epoch: 1, batch: 29475, sum loss: 4630.355469, avg loss: 2.575281, ppl: 13.135006 +epoch: 1, batch: 29476, sum loss: 3605.823730, avg loss: 2.456283, ppl: 11.661386 +epoch: 1, batch: 29477, sum loss: 4464.787109, avg loss: 2.902982, ppl: 18.228430 +epoch: 1, batch: 29478, sum loss: 4626.616699, avg loss: 2.634748, ppl: 13.939795 +epoch: 1, batch: 29479, sum loss: 4096.433594, avg loss: 2.594321, ppl: 13.387501 +epoch: 1, batch: 29480, sum loss: 2912.783936, avg loss: 2.268523, ppl: 9.665119 +epoch: 1, batch: 29481, sum loss: 4586.430176, avg loss: 2.592668, ppl: 13.365387 +epoch: 1, batch: 29482, sum loss: 5403.510254, avg loss: 2.796848, ppl: 16.392897 +epoch: 1, batch: 29483, sum loss: 5108.901855, avg loss: 2.838279, ppl: 17.086330 +epoch: 1, batch: 29484, sum loss: 3788.131348, avg loss: 2.250821, ppl: 9.495525 +epoch: 1, batch: 29485, sum loss: 3298.270752, avg loss: 2.405741, ppl: 11.086642 +epoch: 1, batch: 29486, sum loss: 4635.161621, avg loss: 2.732996, ppl: 15.378897 +epoch: 1, batch: 29487, sum loss: 4657.914551, avg loss: 2.896713, ppl: 18.114506 +epoch: 1, batch: 29488, sum loss: 4067.157471, avg loss: 2.408027, ppl: 11.112015 +epoch: 1, batch: 29489, sum loss: 5748.998535, avg loss: 2.737618, ppl: 15.450142 +epoch: 1, batch: 29490, sum loss: 4647.215820, avg loss: 2.632983, ppl: 13.915223 +epoch: 1, batch: 29491, sum loss: 5022.219727, avg loss: 2.657259, ppl: 14.257160 +epoch: 1, batch: 29492, sum loss: 4380.972656, avg loss: 2.621767, ppl: 13.760016 +epoch: 1, batch: 29493, sum loss: 4812.485840, avg loss: 2.652969, ppl: 14.196126 +epoch: 1, batch: 29494, sum loss: 3809.314209, avg loss: 2.734612, ppl: 15.403762 +epoch: 1, batch: 29495, sum loss: 5152.115234, avg loss: 2.747795, ppl: 15.608176 +epoch: 1, batch: 29496, sum loss: 4341.815918, avg loss: 2.655545, ppl: 14.232738 +epoch: 1, batch: 29497, sum loss: 4732.366699, avg loss: 2.658633, ppl: 14.276759 +epoch: 1, batch: 29498, sum loss: 4341.791016, avg loss: 2.518440, ppl: 12.409229 +epoch: 1, batch: 29499, sum loss: 4419.236328, avg loss: 2.660588, ppl: 14.304695 +epoch: 1, batch: 29500, sum loss: 4729.733398, avg loss: 2.681255, ppl: 14.603407 +epoch: 1, batch: 29501, sum loss: 4256.923340, avg loss: 2.542965, ppl: 12.717324 +epoch: 1, batch: 29502, sum loss: 4201.211426, avg loss: 2.514190, ppl: 12.356598 +epoch: 1, batch: 29503, sum loss: 4101.168457, avg loss: 2.670032, ppl: 14.440425 +epoch: 1, batch: 29504, sum loss: 3504.141602, avg loss: 2.208029, ppl: 9.097765 +epoch: 1, batch: 29505, sum loss: 4541.050781, avg loss: 2.722453, ppl: 15.217600 +epoch: 1, batch: 29506, sum loss: 4414.242676, avg loss: 2.655982, ppl: 14.238969 +epoch: 1, batch: 29507, sum loss: 4481.835449, avg loss: 2.815223, ppl: 16.696903 +epoch: 1, batch: 29508, sum loss: 4698.466797, avg loss: 2.689449, ppl: 14.723555 +epoch: 1, batch: 29509, sum loss: 4135.833984, avg loss: 2.515714, ppl: 12.375443 +epoch: 1, batch: 29510, sum loss: 4414.649414, avg loss: 2.592278, ppl: 13.360171 +epoch: 1, batch: 29511, sum loss: 5099.209473, avg loss: 2.994251, ppl: 19.970396 +epoch: 1, batch: 29512, sum loss: 4542.554199, avg loss: 2.647176, ppl: 14.114128 +epoch: 1, batch: 29513, sum loss: 5019.409668, avg loss: 2.889700, ppl: 17.987921 +epoch: 1, batch: 29514, sum loss: 3925.256592, avg loss: 2.530791, ppl: 12.563437 +epoch: 1, batch: 29515, sum loss: 4786.462891, avg loss: 2.907936, ppl: 18.318954 +epoch: 1, batch: 29516, sum loss: 3946.039551, avg loss: 2.503832, ppl: 12.229268 +epoch: 1, batch: 29517, sum loss: 3694.472412, avg loss: 2.456431, ppl: 11.663114 +epoch: 1, batch: 29518, sum loss: 3912.822510, avg loss: 2.415323, ppl: 11.193380 +epoch: 1, batch: 29519, sum loss: 4361.743652, avg loss: 2.496705, ppl: 12.142420 +epoch: 1, batch: 29520, sum loss: 3945.045410, avg loss: 2.543550, ppl: 12.724758 +epoch: 1, batch: 29521, sum loss: 4565.114258, avg loss: 2.460978, ppl: 11.716265 +epoch: 1, batch: 29522, sum loss: 4202.523438, avg loss: 2.443328, ppl: 11.511283 +epoch: 1, batch: 29523, sum loss: 5101.383789, avg loss: 2.778531, ppl: 16.095364 +epoch: 1, batch: 29524, sum loss: 4276.939453, avg loss: 2.628727, ppl: 13.856126 +epoch: 1, batch: 29525, sum loss: 4604.024414, avg loss: 2.580731, ppl: 13.206794 +epoch: 1, batch: 29526, sum loss: 3564.783447, avg loss: 2.280732, ppl: 9.783836 +epoch: 1, batch: 29527, sum loss: 3861.554199, avg loss: 2.424077, ppl: 11.291800 +epoch: 1, batch: 29528, sum loss: 4591.484375, avg loss: 2.661730, ppl: 14.321047 +epoch: 1, batch: 29529, sum loss: 4027.075195, avg loss: 2.543951, ppl: 12.729871 +epoch: 1, batch: 29530, sum loss: 4505.762207, avg loss: 2.789946, ppl: 16.280134 +epoch: 1, batch: 29531, sum loss: 5486.276855, avg loss: 2.889035, ppl: 17.975950 +epoch: 1, batch: 29532, sum loss: 4630.585938, avg loss: 2.826975, ppl: 16.894283 +epoch: 1, batch: 29533, sum loss: 3940.832520, avg loss: 2.630729, ppl: 13.883890 +epoch: 1, batch: 29534, sum loss: 4025.485840, avg loss: 2.588737, ppl: 13.312947 +epoch: 1, batch: 29535, sum loss: 4229.591309, avg loss: 2.638547, ppl: 13.992860 +epoch: 1, batch: 29536, sum loss: 4244.253418, avg loss: 2.757800, ppl: 15.765116 +epoch: 1, batch: 29537, sum loss: 4084.594238, avg loss: 2.645463, ppl: 14.089964 +epoch: 1, batch: 29538, sum loss: 3944.328369, avg loss: 2.521949, ppl: 12.452845 +epoch: 1, batch: 29539, sum loss: 5337.078125, avg loss: 2.869397, ppl: 17.626385 +epoch: 1, batch: 29540, sum loss: 4072.901367, avg loss: 2.550345, ppl: 12.811528 +epoch: 1, batch: 29541, sum loss: 4101.380859, avg loss: 2.600749, ppl: 13.473824 +epoch: 1, batch: 29542, sum loss: 4939.772949, avg loss: 2.615020, ppl: 13.667490 +epoch: 1, batch: 29543, sum loss: 3282.027588, avg loss: 2.217586, ppl: 9.185134 +epoch: 1, batch: 29544, sum loss: 4609.755371, avg loss: 2.680090, ppl: 14.586412 +epoch: 1, batch: 29545, sum loss: 4815.535645, avg loss: 2.656115, ppl: 14.240849 +epoch: 1, batch: 29546, sum loss: 4697.357422, avg loss: 2.729435, ppl: 15.324223 +epoch: 1, batch: 29547, sum loss: 5788.197266, avg loss: 2.989771, ppl: 19.881132 +epoch: 1, batch: 29548, sum loss: 4880.337891, avg loss: 2.898063, ppl: 18.138975 +epoch: 1, batch: 29549, sum loss: 3884.550781, avg loss: 2.514272, ppl: 12.357615 +epoch: 1, batch: 29550, sum loss: 5252.029297, avg loss: 2.857470, ppl: 17.417397 +epoch: 1, batch: 29551, sum loss: 4737.742676, avg loss: 2.498810, ppl: 12.168000 +epoch: 1, batch: 29552, sum loss: 3910.880371, avg loss: 2.384683, ppl: 10.855622 +epoch: 1, batch: 29553, sum loss: 4138.303223, avg loss: 2.571972, ppl: 13.091618 +epoch: 1, batch: 29554, sum loss: 5269.032227, avg loss: 2.760101, ppl: 15.801436 +epoch: 1, batch: 29555, sum loss: 4937.334961, avg loss: 2.787880, ppl: 16.246536 +epoch: 1, batch: 29556, sum loss: 3801.857666, avg loss: 2.567088, ppl: 13.027837 +epoch: 1, batch: 29557, sum loss: 4314.192383, avg loss: 2.503884, ppl: 12.229904 +epoch: 1, batch: 29558, sum loss: 3686.754150, avg loss: 2.501190, ppl: 12.197002 +epoch: 1, batch: 29559, sum loss: 3499.243164, avg loss: 2.462522, ppl: 11.734364 +epoch: 1, batch: 29560, sum loss: 4758.879395, avg loss: 2.657107, ppl: 14.254994 +epoch: 1, batch: 29561, sum loss: 4390.379883, avg loss: 2.417610, ppl: 11.219013 +epoch: 1, batch: 29562, sum loss: 4881.710449, avg loss: 2.895439, ppl: 18.091444 +epoch: 1, batch: 29563, sum loss: 4167.282715, avg loss: 2.442721, ppl: 11.504306 +epoch: 1, batch: 29564, sum loss: 5332.175781, avg loss: 3.083965, ppl: 21.844852 +epoch: 1, batch: 29565, sum loss: 4365.818359, avg loss: 2.689968, ppl: 14.731206 +epoch: 1, batch: 29566, sum loss: 3687.469238, avg loss: 2.618941, ppl: 13.721189 +epoch: 1, batch: 29567, sum loss: 4898.012207, avg loss: 2.754788, ppl: 15.717704 +epoch: 1, batch: 29568, sum loss: 5255.651367, avg loss: 2.673271, ppl: 14.487286 +epoch: 1, batch: 29569, sum loss: 3787.685059, avg loss: 2.473994, ppl: 11.869763 +epoch: 1, batch: 29570, sum loss: 4783.011230, avg loss: 2.572895, ppl: 13.103700 +epoch: 1, batch: 29571, sum loss: 4401.417969, avg loss: 2.775169, ppl: 16.041336 +epoch: 1, batch: 29572, sum loss: 5288.276367, avg loss: 2.605062, ppl: 13.532067 +epoch: 1, batch: 29573, sum loss: 4146.980957, avg loss: 2.487691, ppl: 12.033458 +epoch: 1, batch: 29574, sum loss: 3789.337891, avg loss: 2.620566, ppl: 13.743495 +epoch: 1, batch: 29575, sum loss: 4488.838867, avg loss: 2.810795, ppl: 16.623121 +epoch: 1, batch: 29576, sum loss: 3925.110596, avg loss: 2.428905, ppl: 11.346451 +epoch: 1, batch: 29577, sum loss: 5410.154785, avg loss: 3.047974, ppl: 21.072615 +epoch: 1, batch: 29578, sum loss: 4036.840820, avg loss: 2.784028, ppl: 16.184084 +epoch: 1, batch: 29579, sum loss: 5861.323242, avg loss: 2.897342, ppl: 18.125902 +epoch: 1, batch: 29580, sum loss: 4519.797852, avg loss: 2.696777, ppl: 14.831850 +epoch: 1, batch: 29581, sum loss: 4582.004395, avg loss: 2.688970, ppl: 14.716508 +epoch: 1, batch: 29582, sum loss: 4581.447754, avg loss: 2.783383, ppl: 16.173639 +epoch: 1, batch: 29583, sum loss: 4293.789551, avg loss: 2.710726, ppl: 15.040188 +epoch: 1, batch: 29584, sum loss: 4857.859375, avg loss: 2.661841, ppl: 14.322628 +epoch: 1, batch: 29585, sum loss: 4427.811523, avg loss: 2.731531, ppl: 15.356375 +epoch: 1, batch: 29586, sum loss: 4644.449707, avg loss: 2.690875, ppl: 14.744566 +epoch: 1, batch: 29587, sum loss: 4074.802002, avg loss: 2.524661, ppl: 12.486656 +epoch: 1, batch: 29588, sum loss: 4864.073242, avg loss: 2.556003, ppl: 12.884214 +epoch: 1, batch: 29589, sum loss: 3570.878418, avg loss: 2.315745, ppl: 10.132467 +epoch: 1, batch: 29590, sum loss: 4042.870117, avg loss: 2.522065, ppl: 12.454288 +epoch: 1, batch: 29591, sum loss: 4199.383789, avg loss: 2.450049, ppl: 11.588914 +epoch: 1, batch: 29592, sum loss: 3854.065674, avg loss: 2.545618, ppl: 12.751106 +epoch: 1, batch: 29593, sum loss: 3995.601074, avg loss: 2.584477, ppl: 13.256351 +epoch: 1, batch: 29594, sum loss: 4762.145508, avg loss: 2.498502, ppl: 12.164262 +epoch: 1, batch: 29595, sum loss: 4954.684570, avg loss: 2.847520, ppl: 17.244959 +epoch: 1, batch: 29596, sum loss: 4550.895508, avg loss: 2.624507, ppl: 13.797772 +epoch: 1, batch: 29597, sum loss: 4277.740723, avg loss: 2.316048, ppl: 10.135541 +epoch: 1, batch: 29598, sum loss: 3964.478516, avg loss: 2.494952, ppl: 12.121151 +epoch: 1, batch: 29599, sum loss: 4113.506836, avg loss: 2.543913, ppl: 12.729383 +epoch: 1, batch: 29600, sum loss: 4355.603027, avg loss: 2.627022, ppl: 13.832519 +epoch: 1, batch: 29601, sum loss: 4525.406738, avg loss: 2.647985, ppl: 14.125550 +epoch: 1, batch: 29602, sum loss: 4752.605469, avg loss: 2.849284, ppl: 17.275408 +epoch: 1, batch: 29603, sum loss: 5154.880371, avg loss: 2.673693, ppl: 14.493397 +epoch: 1, batch: 29604, sum loss: 4454.848633, avg loss: 2.796515, ppl: 16.387440 +epoch: 1, batch: 29605, sum loss: 4126.998047, avg loss: 2.638746, ppl: 13.995636 +epoch: 1, batch: 29606, sum loss: 4760.251465, avg loss: 2.740502, ppl: 15.494760 +epoch: 1, batch: 29607, sum loss: 4415.765625, avg loss: 2.798331, ppl: 16.417215 +epoch: 1, batch: 29608, sum loss: 4299.715820, avg loss: 2.488261, ppl: 12.040325 +epoch: 1, batch: 29609, sum loss: 3907.289551, avg loss: 2.378143, ppl: 10.784860 +epoch: 1, batch: 29610, sum loss: 3924.764648, avg loss: 2.571930, ppl: 13.091062 +epoch: 1, batch: 29611, sum loss: 4712.062500, avg loss: 2.659178, ppl: 14.284535 +epoch: 1, batch: 29612, sum loss: 4090.061035, avg loss: 2.394649, ppl: 10.964352 +epoch: 1, batch: 29613, sum loss: 3665.805664, avg loss: 2.411714, ppl: 11.153065 +epoch: 1, batch: 29614, sum loss: 4988.926270, avg loss: 2.762418, ppl: 15.838090 +epoch: 1, batch: 29615, sum loss: 4270.272949, avg loss: 2.580225, ppl: 13.200114 +epoch: 1, batch: 29616, sum loss: 4771.777832, avg loss: 2.816870, ppl: 16.724421 +epoch: 1, batch: 29617, sum loss: 4731.507812, avg loss: 2.878046, ppl: 17.779499 +epoch: 1, batch: 29618, sum loss: 4534.245605, avg loss: 2.571892, ppl: 13.090569 +epoch: 1, batch: 29619, sum loss: 4720.137695, avg loss: 2.592058, ppl: 13.357235 +epoch: 1, batch: 29620, sum loss: 4191.087891, avg loss: 2.683155, ppl: 14.631179 +epoch: 1, batch: 29621, sum loss: 3614.858154, avg loss: 2.450751, ppl: 11.597056 +epoch: 1, batch: 29622, sum loss: 4726.522949, avg loss: 2.713274, ppl: 15.078558 +epoch: 1, batch: 29623, sum loss: 3573.022949, avg loss: 2.479544, ppl: 11.935822 +epoch: 1, batch: 29624, sum loss: 3594.495117, avg loss: 2.361692, ppl: 10.608889 +epoch: 1, batch: 29625, sum loss: 3861.013672, avg loss: 2.345695, ppl: 10.440527 +epoch: 1, batch: 29626, sum loss: 4415.314453, avg loss: 2.643901, ppl: 14.067974 +epoch: 1, batch: 29627, sum loss: 4593.337891, avg loss: 2.684593, ppl: 14.652233 +epoch: 1, batch: 29628, sum loss: 4398.849121, avg loss: 2.761362, ppl: 15.821371 +epoch: 1, batch: 29629, sum loss: 4821.798828, avg loss: 2.599352, ppl: 13.455021 +epoch: 1, batch: 29630, sum loss: 4205.061035, avg loss: 2.671576, ppl: 14.462749 +epoch: 1, batch: 29631, sum loss: 3474.371582, avg loss: 2.434738, ppl: 11.412832 +epoch: 1, batch: 29632, sum loss: 4205.891602, avg loss: 2.532144, ppl: 12.580454 +epoch: 1, batch: 29633, sum loss: 4632.129883, avg loss: 2.697804, ppl: 14.847095 +epoch: 1, batch: 29634, sum loss: 4767.632324, avg loss: 2.754265, ppl: 15.709484 +epoch: 1, batch: 29635, sum loss: 4994.789062, avg loss: 2.626072, ppl: 13.819383 +epoch: 1, batch: 29636, sum loss: 5043.331055, avg loss: 2.734995, ppl: 15.409668 +epoch: 1, batch: 29637, sum loss: 4207.421387, avg loss: 2.618184, ppl: 13.710800 +epoch: 1, batch: 29638, sum loss: 4417.608398, avg loss: 2.559448, ppl: 12.928685 +epoch: 1, batch: 29639, sum loss: 4761.697754, avg loss: 2.627868, ppl: 13.844228 +epoch: 1, batch: 29640, sum loss: 4300.514648, avg loss: 2.646471, ppl: 14.104171 +epoch: 1, batch: 29641, sum loss: 3832.309570, avg loss: 2.365623, ppl: 10.650675 +epoch: 1, batch: 29642, sum loss: 5484.567383, avg loss: 2.806841, ppl: 16.557529 +epoch: 1, batch: 29643, sum loss: 4844.895996, avg loss: 2.688622, ppl: 14.711383 +epoch: 1, batch: 29644, sum loss: 4444.335938, avg loss: 2.532385, ppl: 12.583484 +epoch: 1, batch: 29645, sum loss: 4670.859375, avg loss: 2.582012, ppl: 13.223717 +epoch: 1, batch: 29646, sum loss: 3928.822266, avg loss: 2.576277, ppl: 13.148093 +epoch: 1, batch: 29647, sum loss: 4619.127441, avg loss: 2.747845, ppl: 15.608957 +epoch: 1, batch: 29648, sum loss: 4021.183594, avg loss: 2.631665, ppl: 13.896886 +epoch: 1, batch: 29649, sum loss: 5275.701172, avg loss: 2.807718, ppl: 16.572050 +epoch: 1, batch: 29650, sum loss: 3312.963867, avg loss: 2.221975, ppl: 9.225530 +epoch: 1, batch: 29651, sum loss: 3455.200684, avg loss: 2.300400, ppl: 9.978170 +epoch: 1, batch: 29652, sum loss: 5767.577148, avg loss: 2.947152, ppl: 19.051624 +epoch: 1, batch: 29653, sum loss: 4815.312988, avg loss: 2.847613, ppl: 17.246563 +epoch: 1, batch: 29654, sum loss: 4291.804688, avg loss: 2.655820, ppl: 14.236650 +epoch: 1, batch: 29655, sum loss: 4427.987793, avg loss: 2.646735, ppl: 14.107903 +epoch: 1, batch: 29656, sum loss: 4143.069336, avg loss: 2.517053, ppl: 12.392025 +epoch: 1, batch: 29657, sum loss: 4660.422852, avg loss: 2.695444, ppl: 14.812096 +epoch: 1, batch: 29658, sum loss: 4927.448730, avg loss: 2.478596, ppl: 11.924510 +epoch: 1, batch: 29659, sum loss: 4853.653320, avg loss: 2.612300, ppl: 13.630363 +epoch: 1, batch: 29660, sum loss: 4205.357910, avg loss: 2.727210, ppl: 15.290169 +epoch: 1, batch: 29661, sum loss: 4125.387695, avg loss: 2.812125, ppl: 16.645252 +epoch: 1, batch: 29662, sum loss: 4571.131836, avg loss: 2.633141, ppl: 13.917410 +epoch: 1, batch: 29663, sum loss: 3640.438477, avg loss: 2.299708, ppl: 9.971274 +epoch: 1, batch: 29664, sum loss: 4141.223633, avg loss: 2.506794, ppl: 12.265543 +epoch: 1, batch: 29665, sum loss: 3671.696533, avg loss: 2.585702, ppl: 13.272602 +epoch: 1, batch: 29666, sum loss: 4096.229980, avg loss: 2.574626, ppl: 13.126407 +epoch: 1, batch: 29667, sum loss: 3252.928711, avg loss: 2.316901, ppl: 10.144186 +epoch: 1, batch: 29668, sum loss: 3695.749023, avg loss: 2.353980, ppl: 10.527389 +epoch: 1, batch: 29669, sum loss: 4340.789062, avg loss: 2.586883, ppl: 13.288282 +epoch: 1, batch: 29670, sum loss: 4853.237793, avg loss: 2.770113, ppl: 15.960438 +epoch: 1, batch: 29671, sum loss: 3938.001221, avg loss: 2.376585, ppl: 10.768064 +epoch: 1, batch: 29672, sum loss: 4619.851074, avg loss: 2.808420, ppl: 16.583694 +epoch: 1, batch: 29673, sum loss: 4006.317383, avg loss: 2.486851, ppl: 12.023360 +epoch: 1, batch: 29674, sum loss: 4270.823730, avg loss: 2.610528, ppl: 13.606233 +epoch: 1, batch: 29675, sum loss: 3704.335938, avg loss: 2.489473, ppl: 12.054923 +epoch: 1, batch: 29676, sum loss: 4933.736816, avg loss: 2.832225, ppl: 16.983212 +epoch: 1, batch: 29677, sum loss: 4436.314941, avg loss: 2.434860, ppl: 11.414221 +epoch: 1, batch: 29678, sum loss: 4881.481934, avg loss: 2.676251, ppl: 14.530519 +epoch: 1, batch: 29679, sum loss: 5085.996582, avg loss: 2.889771, ppl: 17.989185 +epoch: 1, batch: 29680, sum loss: 4452.073730, avg loss: 2.733010, ppl: 15.379113 +epoch: 1, batch: 29681, sum loss: 4272.291016, avg loss: 2.427438, ppl: 11.329818 +epoch: 1, batch: 29682, sum loss: 3930.321777, avg loss: 2.285071, ppl: 9.826383 +epoch: 1, batch: 29683, sum loss: 4199.005859, avg loss: 2.477290, ppl: 11.908943 +epoch: 1, batch: 29684, sum loss: 4412.417969, avg loss: 2.723715, ppl: 15.236819 +epoch: 1, batch: 29685, sum loss: 4438.489258, avg loss: 2.770592, ppl: 15.968092 +epoch: 1, batch: 29686, sum loss: 4322.263184, avg loss: 2.624325, ppl: 13.795257 +epoch: 1, batch: 29687, sum loss: 4461.184570, avg loss: 2.700475, ppl: 14.886801 +epoch: 1, batch: 29688, sum loss: 4304.030762, avg loss: 2.463670, ppl: 11.747842 +epoch: 1, batch: 29689, sum loss: 5062.902344, avg loss: 2.814287, ppl: 16.681276 +epoch: 1, batch: 29690, sum loss: 5013.139160, avg loss: 2.825896, ppl: 16.876055 +epoch: 1, batch: 29691, sum loss: 5142.061035, avg loss: 2.797639, ppl: 16.405869 +epoch: 1, batch: 29692, sum loss: 4014.307617, avg loss: 2.425564, ppl: 11.308600 +epoch: 1, batch: 29693, sum loss: 4287.667969, avg loss: 2.601740, ppl: 13.487190 +epoch: 1, batch: 29694, sum loss: 4732.888672, avg loss: 2.552799, ppl: 12.842995 +epoch: 1, batch: 29695, sum loss: 3625.409180, avg loss: 2.304774, ppl: 10.021911 +epoch: 1, batch: 29696, sum loss: 3885.008789, avg loss: 2.495189, ppl: 12.124022 +epoch: 1, batch: 29697, sum loss: 5170.311523, avg loss: 2.828398, ppl: 16.918335 +epoch: 1, batch: 29698, sum loss: 4523.880859, avg loss: 2.850587, ppl: 17.297926 +epoch: 1, batch: 29699, sum loss: 4910.646973, avg loss: 2.956440, ppl: 19.229393 +epoch: 1, batch: 29700, sum loss: 4975.570801, avg loss: 2.859524, ppl: 17.453209 +epoch: 1, batch: 29701, sum loss: 5344.266602, avg loss: 2.925160, ppl: 18.637201 +epoch: 1, batch: 29702, sum loss: 3097.672852, avg loss: 2.186078, ppl: 8.900241 +epoch: 1, batch: 29703, sum loss: 2971.506592, avg loss: 2.154827, ppl: 8.626399 +epoch: 1, batch: 29704, sum loss: 4512.929199, avg loss: 2.428918, ppl: 11.346595 +epoch: 1, batch: 29705, sum loss: 3760.203857, avg loss: 2.587890, ppl: 13.301674 +epoch: 1, batch: 29706, sum loss: 3545.920410, avg loss: 2.360799, ppl: 10.599418 +epoch: 1, batch: 29707, sum loss: 4092.445312, avg loss: 2.652265, ppl: 14.186138 +epoch: 1, batch: 29708, sum loss: 4565.723633, avg loss: 2.628511, ppl: 13.853130 +epoch: 1, batch: 29709, sum loss: 4810.052246, avg loss: 2.723699, ppl: 15.236576 +epoch: 1, batch: 29710, sum loss: 4949.002441, avg loss: 2.531459, ppl: 12.571836 +epoch: 1, batch: 29711, sum loss: 5007.047852, avg loss: 2.642242, ppl: 14.044653 +epoch: 1, batch: 29712, sum loss: 4520.480957, avg loss: 2.603964, ppl: 13.517209 +epoch: 1, batch: 29713, sum loss: 4439.067383, avg loss: 2.735100, ppl: 15.411285 +epoch: 1, batch: 29714, sum loss: 4860.421875, avg loss: 2.771050, ppl: 15.975399 +epoch: 1, batch: 29715, sum loss: 4801.522461, avg loss: 2.663074, ppl: 14.340303 +epoch: 1, batch: 29716, sum loss: 3578.207520, avg loss: 2.429197, ppl: 11.349768 +epoch: 1, batch: 29717, sum loss: 4151.091797, avg loss: 2.552947, ppl: 12.844903 +epoch: 1, batch: 29718, sum loss: 3683.130127, avg loss: 2.444015, ppl: 11.519196 +epoch: 1, batch: 29719, sum loss: 4856.449707, avg loss: 2.904575, ppl: 18.257488 +epoch: 1, batch: 29720, sum loss: 5145.011719, avg loss: 2.732348, ppl: 15.368934 +epoch: 1, batch: 29721, sum loss: 4238.094238, avg loss: 2.479868, ppl: 11.939688 +epoch: 1, batch: 29722, sum loss: 5185.698730, avg loss: 2.770138, ppl: 15.960840 +epoch: 1, batch: 29723, sum loss: 4709.564453, avg loss: 2.663781, ppl: 14.350445 +epoch: 1, batch: 29724, sum loss: 5096.456055, avg loss: 2.777360, ppl: 16.076530 +epoch: 1, batch: 29725, sum loss: 4781.149902, avg loss: 2.422062, ppl: 11.269069 +epoch: 1, batch: 29726, sum loss: 3217.982178, avg loss: 2.529860, ppl: 12.551749 +epoch: 1, batch: 29727, sum loss: 5316.774414, avg loss: 2.905341, ppl: 18.271481 +epoch: 1, batch: 29728, sum loss: 3611.559570, avg loss: 2.391761, ppl: 10.932733 +epoch: 1, batch: 29729, sum loss: 4032.769287, avg loss: 2.620383, ppl: 13.740986 +epoch: 1, batch: 29730, sum loss: 4474.297852, avg loss: 2.682433, ppl: 14.620621 +epoch: 1, batch: 29731, sum loss: 3892.632080, avg loss: 2.498480, ppl: 12.163995 +epoch: 1, batch: 29732, sum loss: 5497.062500, avg loss: 2.819006, ppl: 16.760187 +epoch: 1, batch: 29733, sum loss: 3986.805664, avg loss: 2.693788, ppl: 14.787579 +epoch: 1, batch: 29734, sum loss: 3936.349365, avg loss: 2.791737, ppl: 16.309330 +epoch: 1, batch: 29735, sum loss: 4257.875000, avg loss: 2.416501, ppl: 11.206582 +epoch: 1, batch: 29736, sum loss: 4208.231445, avg loss: 2.535079, ppl: 12.617431 +epoch: 1, batch: 29737, sum loss: 4423.652344, avg loss: 2.617546, ppl: 13.702055 +epoch: 1, batch: 29738, sum loss: 4323.140137, avg loss: 2.720667, ppl: 15.190453 +epoch: 1, batch: 29739, sum loss: 4354.165527, avg loss: 2.524154, ppl: 12.480335 +epoch: 1, batch: 29740, sum loss: 4900.554199, avg loss: 2.901453, ppl: 18.200571 +epoch: 1, batch: 29741, sum loss: 4659.238281, avg loss: 2.641292, ppl: 14.031315 +epoch: 1, batch: 29742, sum loss: 4368.281250, avg loss: 2.801976, ppl: 16.477180 +epoch: 1, batch: 29743, sum loss: 4547.968750, avg loss: 2.592913, ppl: 13.368653 +epoch: 1, batch: 29744, sum loss: 3736.977539, avg loss: 2.516483, ppl: 12.384963 +epoch: 1, batch: 29745, sum loss: 4311.441406, avg loss: 2.677914, ppl: 14.554699 +epoch: 1, batch: 29746, sum loss: 3660.885254, avg loss: 2.538755, ppl: 12.663899 +epoch: 1, batch: 29747, sum loss: 3994.375000, avg loss: 2.600505, ppl: 13.470534 +epoch: 1, batch: 29748, sum loss: 4624.459473, avg loss: 2.589283, ppl: 13.320217 +epoch: 1, batch: 29749, sum loss: 4527.275879, avg loss: 2.506797, ppl: 12.265584 +epoch: 1, batch: 29750, sum loss: 4316.068359, avg loss: 2.705999, ppl: 14.969262 +epoch: 1, batch: 29751, sum loss: 4854.765137, avg loss: 2.855744, ppl: 17.387375 +epoch: 1, batch: 29752, sum loss: 4457.320312, avg loss: 2.688372, ppl: 14.707707 +epoch: 1, batch: 29753, sum loss: 5147.676758, avg loss: 2.549617, ppl: 12.802200 +epoch: 1, batch: 29754, sum loss: 3988.892578, avg loss: 2.407298, ppl: 11.103916 +epoch: 1, batch: 29755, sum loss: 4404.183594, avg loss: 2.587652, ppl: 13.298510 +epoch: 1, batch: 29756, sum loss: 4950.746582, avg loss: 2.908782, ppl: 18.334450 +epoch: 1, batch: 29757, sum loss: 5434.703613, avg loss: 2.900055, ppl: 18.175148 +epoch: 1, batch: 29758, sum loss: 4379.631836, avg loss: 2.675401, ppl: 14.518173 +epoch: 1, batch: 29759, sum loss: 4839.324219, avg loss: 2.521795, ppl: 12.450924 +epoch: 1, batch: 29760, sum loss: 3237.900146, avg loss: 2.464155, ppl: 11.753551 +epoch: 1, batch: 29761, sum loss: 4503.226074, avg loss: 2.861007, ppl: 17.479128 +epoch: 1, batch: 29762, sum loss: 4441.457031, avg loss: 2.665941, ppl: 14.381470 +epoch: 1, batch: 29763, sum loss: 5217.977051, avg loss: 2.638006, ppl: 13.985295 +epoch: 1, batch: 29764, sum loss: 4566.816895, avg loss: 2.721583, ppl: 15.204377 +epoch: 1, batch: 29765, sum loss: 4028.373535, avg loss: 2.447372, ppl: 11.557929 +epoch: 1, batch: 29766, sum loss: 3897.814697, avg loss: 2.574514, ppl: 13.124942 +epoch: 1, batch: 29767, sum loss: 4945.062500, avg loss: 2.690458, ppl: 14.738429 +epoch: 1, batch: 29768, sum loss: 4351.388184, avg loss: 2.532822, ppl: 12.588984 +epoch: 1, batch: 29769, sum loss: 3856.307861, avg loss: 2.676133, ppl: 14.528808 +epoch: 1, batch: 29770, sum loss: 4248.782715, avg loss: 2.538102, ppl: 12.655630 +epoch: 1, batch: 29771, sum loss: 5378.253418, avg loss: 2.679748, ppl: 14.581412 +epoch: 1, batch: 29772, sum loss: 5035.310547, avg loss: 2.756054, ppl: 15.737618 +epoch: 1, batch: 29773, sum loss: 4383.591797, avg loss: 2.580101, ppl: 13.198475 +epoch: 1, batch: 29774, sum loss: 4857.103516, avg loss: 2.788234, ppl: 16.252289 +epoch: 1, batch: 29775, sum loss: 4080.752930, avg loss: 2.515877, ppl: 12.377462 +epoch: 1, batch: 29776, sum loss: 3469.596191, avg loss: 2.439941, ppl: 11.472363 +epoch: 1, batch: 29777, sum loss: 4922.087891, avg loss: 2.711894, ppl: 15.057772 +epoch: 1, batch: 29778, sum loss: 3884.724121, avg loss: 2.443223, ppl: 11.510076 +epoch: 1, batch: 29779, sum loss: 4758.230469, avg loss: 2.734615, ppl: 15.403813 +epoch: 1, batch: 29780, sum loss: 4819.499512, avg loss: 2.800406, ppl: 16.451332 +epoch: 1, batch: 29781, sum loss: 4361.518066, avg loss: 2.672499, ppl: 14.476099 +epoch: 1, batch: 29782, sum loss: 4930.458008, avg loss: 2.873227, ppl: 17.694031 +epoch: 1, batch: 29783, sum loss: 3285.401855, avg loss: 2.431829, ppl: 11.379682 +epoch: 1, batch: 29784, sum loss: 5815.989258, avg loss: 3.115152, ppl: 22.536858 +epoch: 1, batch: 29785, sum loss: 5177.373535, avg loss: 2.989246, ppl: 19.870687 +epoch: 1, batch: 29786, sum loss: 3872.502441, avg loss: 2.522803, ppl: 12.463481 +epoch: 1, batch: 29787, sum loss: 4107.574219, avg loss: 2.429080, ppl: 11.348436 +epoch: 1, batch: 29788, sum loss: 3914.935303, avg loss: 2.599559, ppl: 13.457800 +epoch: 1, batch: 29789, sum loss: 3380.551270, avg loss: 2.237294, ppl: 9.367947 +epoch: 1, batch: 29790, sum loss: 4357.057129, avg loss: 2.515622, ppl: 12.374302 +epoch: 1, batch: 29791, sum loss: 3965.215820, avg loss: 2.391566, ppl: 10.930593 +epoch: 1, batch: 29792, sum loss: 5031.334473, avg loss: 2.670560, ppl: 14.448056 +epoch: 1, batch: 29793, sum loss: 4516.457520, avg loss: 2.817503, ppl: 16.735010 +epoch: 1, batch: 29794, sum loss: 4351.514160, avg loss: 2.630903, ppl: 13.886311 +epoch: 1, batch: 29795, sum loss: 4566.193848, avg loss: 2.603303, ppl: 13.508285 +epoch: 1, batch: 29796, sum loss: 5151.580566, avg loss: 2.910498, ppl: 18.365936 +epoch: 1, batch: 29797, sum loss: 3616.569336, avg loss: 2.425600, ppl: 11.309013 +epoch: 1, batch: 29798, sum loss: 4270.869141, avg loss: 2.588406, ppl: 13.308536 +epoch: 1, batch: 29799, sum loss: 4018.818359, avg loss: 2.642221, ppl: 14.044365 +epoch: 1, batch: 29800, sum loss: 4194.109375, avg loss: 2.644457, ppl: 14.075805 +epoch: 1, batch: 29801, sum loss: 4148.558105, avg loss: 2.679947, ppl: 14.584322 +epoch: 1, batch: 29802, sum loss: 4349.641113, avg loss: 2.698288, ppl: 14.854286 +epoch: 1, batch: 29803, sum loss: 3908.130127, avg loss: 2.503607, ppl: 12.226513 +epoch: 1, batch: 29804, sum loss: 3362.372070, avg loss: 2.243077, ppl: 9.422277 +epoch: 1, batch: 29805, sum loss: 4553.462891, avg loss: 2.694357, ppl: 14.795997 +epoch: 1, batch: 29806, sum loss: 3111.379395, avg loss: 2.271080, ppl: 9.689860 +epoch: 1, batch: 29807, sum loss: 4812.082520, avg loss: 2.751334, ppl: 15.663508 +epoch: 1, batch: 29808, sum loss: 4233.795898, avg loss: 2.740321, ppl: 15.491960 +epoch: 1, batch: 29809, sum loss: 4102.257812, avg loss: 2.487725, ppl: 12.033862 +epoch: 1, batch: 29810, sum loss: 4503.000488, avg loss: 2.559978, ppl: 12.935527 +epoch: 1, batch: 29811, sum loss: 3998.326416, avg loss: 2.357504, ppl: 10.564548 +epoch: 1, batch: 29812, sum loss: 3616.111328, avg loss: 2.233546, ppl: 9.332905 +epoch: 1, batch: 29813, sum loss: 4686.952148, avg loss: 2.731324, ppl: 15.353204 +epoch: 1, batch: 29814, sum loss: 4547.797852, avg loss: 2.613677, ppl: 13.649147 +epoch: 1, batch: 29815, sum loss: 4614.999023, avg loss: 2.663012, ppl: 14.339408 +epoch: 1, batch: 29816, sum loss: 4815.243652, avg loss: 2.778559, ppl: 16.095818 +epoch: 1, batch: 29817, sum loss: 4113.654785, avg loss: 2.545578, ppl: 12.750602 +epoch: 1, batch: 29818, sum loss: 3978.526367, avg loss: 2.480378, ppl: 11.945781 +epoch: 1, batch: 29819, sum loss: 4151.363281, avg loss: 2.702711, ppl: 14.920120 +epoch: 1, batch: 29820, sum loss: 3613.275391, avg loss: 2.720840, ppl: 15.193079 +epoch: 1, batch: 29821, sum loss: 4334.352051, avg loss: 2.489576, ppl: 12.056165 +epoch: 1, batch: 29822, sum loss: 4839.020020, avg loss: 2.873528, ppl: 17.699343 +epoch: 1, batch: 29823, sum loss: 4180.740723, avg loss: 2.532248, ppl: 12.581756 +epoch: 1, batch: 29824, sum loss: 4146.982422, avg loss: 2.574167, ppl: 13.120380 +epoch: 1, batch: 29825, sum loss: 3524.245361, avg loss: 2.344807, ppl: 10.431261 +epoch: 1, batch: 29826, sum loss: 4984.050293, avg loss: 2.701382, ppl: 14.900312 +epoch: 1, batch: 29827, sum loss: 4110.159180, avg loss: 2.486485, ppl: 12.018950 +epoch: 1, batch: 29828, sum loss: 5110.015625, avg loss: 2.778693, ppl: 16.097963 +epoch: 1, batch: 29829, sum loss: 4259.858887, avg loss: 2.753626, ppl: 15.699450 +epoch: 1, batch: 29830, sum loss: 4336.222656, avg loss: 2.576484, ppl: 13.150818 +epoch: 1, batch: 29831, sum loss: 4529.619629, avg loss: 2.578042, ppl: 13.171321 +epoch: 1, batch: 29832, sum loss: 4731.899414, avg loss: 2.511624, ppl: 12.324931 +epoch: 1, batch: 29833, sum loss: 4725.116699, avg loss: 2.625065, ppl: 13.805470 +epoch: 1, batch: 29834, sum loss: 4611.200195, avg loss: 2.603727, ppl: 13.514009 +epoch: 1, batch: 29835, sum loss: 3605.146484, avg loss: 2.522846, ppl: 12.464016 +epoch: 1, batch: 29836, sum loss: 4701.922363, avg loss: 2.545708, ppl: 12.752253 +epoch: 1, batch: 29837, sum loss: 5182.587402, avg loss: 2.847576, ppl: 17.245922 +epoch: 1, batch: 29838, sum loss: 4408.481934, avg loss: 2.471122, ppl: 11.835722 +epoch: 1, batch: 29839, sum loss: 4927.013672, avg loss: 2.591801, ppl: 13.353799 +epoch: 1, batch: 29840, sum loss: 4293.759766, avg loss: 2.477645, ppl: 11.913180 +epoch: 1, batch: 29841, sum loss: 5278.239746, avg loss: 2.661745, ppl: 14.321252 +epoch: 1, batch: 29842, sum loss: 3431.265625, avg loss: 2.273867, ppl: 9.716908 +epoch: 1, batch: 29843, sum loss: 4134.933594, avg loss: 2.573076, ppl: 13.106083 +epoch: 1, batch: 29844, sum loss: 4921.153809, avg loss: 2.841313, ppl: 17.138252 +epoch: 1, batch: 29845, sum loss: 4148.830078, avg loss: 2.810861, ppl: 16.624220 +epoch: 1, batch: 29846, sum loss: 4299.183594, avg loss: 2.660386, ppl: 14.301806 +epoch: 1, batch: 29847, sum loss: 4625.813965, avg loss: 2.499089, ppl: 12.171404 +epoch: 1, batch: 29848, sum loss: 4129.066895, avg loss: 2.789910, ppl: 16.279556 +epoch: 1, batch: 29849, sum loss: 4705.645020, avg loss: 2.707506, ppl: 14.991835 +epoch: 1, batch: 29850, sum loss: 4375.971191, avg loss: 2.454274, ppl: 11.637986 +epoch: 1, batch: 29851, sum loss: 4563.166016, avg loss: 2.897248, ppl: 18.124199 +epoch: 1, batch: 29852, sum loss: 4762.638672, avg loss: 2.874254, ppl: 17.712202 +epoch: 1, batch: 29853, sum loss: 3947.892578, avg loss: 2.472068, ppl: 11.846922 +epoch: 1, batch: 29854, sum loss: 4127.305664, avg loss: 2.536758, ppl: 12.638633 +epoch: 1, batch: 29855, sum loss: 4913.360352, avg loss: 2.849977, ppl: 17.287388 +epoch: 1, batch: 29856, sum loss: 4422.152344, avg loss: 2.704680, ppl: 14.949535 +epoch: 1, batch: 29857, sum loss: 3886.105713, avg loss: 2.689347, ppl: 14.722053 +epoch: 1, batch: 29858, sum loss: 3190.775879, avg loss: 2.335854, ppl: 10.338281 +epoch: 1, batch: 29859, sum loss: 3574.189453, avg loss: 2.314890, ppl: 10.123806 +epoch: 1, batch: 29860, sum loss: 5934.833008, avg loss: 2.979334, ppl: 19.674711 +epoch: 1, batch: 29861, sum loss: 3667.348389, avg loss: 2.453076, ppl: 11.624046 +epoch: 1, batch: 29862, sum loss: 4567.791992, avg loss: 2.922452, ppl: 18.586802 +epoch: 1, batch: 29863, sum loss: 3881.274414, avg loss: 2.425797, ppl: 11.311235 +epoch: 1, batch: 29864, sum loss: 4014.430664, avg loss: 2.550464, ppl: 12.813049 +epoch: 1, batch: 29865, sum loss: 4626.965332, avg loss: 2.902739, ppl: 18.223986 +epoch: 1, batch: 29866, sum loss: 3815.688965, avg loss: 2.537027, ppl: 12.642032 +epoch: 1, batch: 29867, sum loss: 3670.864258, avg loss: 2.466979, ppl: 11.786779 +epoch: 1, batch: 29868, sum loss: 4930.308594, avg loss: 2.647856, ppl: 14.123732 +epoch: 1, batch: 29869, sum loss: 4959.422852, avg loss: 2.630994, ppl: 13.887562 +epoch: 1, batch: 29870, sum loss: 3946.459961, avg loss: 2.383128, ppl: 10.838753 +epoch: 1, batch: 29871, sum loss: 4241.658203, avg loss: 2.410033, ppl: 11.134329 +epoch: 1, batch: 29872, sum loss: 4444.052246, avg loss: 2.509346, ppl: 12.296888 +epoch: 1, batch: 29873, sum loss: 4911.738770, avg loss: 2.874043, ppl: 17.708460 +epoch: 1, batch: 29874, sum loss: 4661.552734, avg loss: 2.748557, ppl: 15.620077 +epoch: 1, batch: 29875, sum loss: 4585.341797, avg loss: 2.534739, ppl: 12.613132 +epoch: 1, batch: 29876, sum loss: 3142.547119, avg loss: 2.327813, ppl: 10.255485 +epoch: 1, batch: 29877, sum loss: 3832.719238, avg loss: 2.521526, ppl: 12.447576 +epoch: 1, batch: 29878, sum loss: 4860.260742, avg loss: 2.670473, ppl: 14.446802 +epoch: 1, batch: 29879, sum loss: 5237.862793, avg loss: 2.921284, ppl: 18.565119 +epoch: 1, batch: 29880, sum loss: 5164.622559, avg loss: 2.695523, ppl: 14.813268 +epoch: 1, batch: 29881, sum loss: 5202.279785, avg loss: 2.739484, ppl: 15.478993 +epoch: 1, batch: 29882, sum loss: 4971.115234, avg loss: 2.662622, ppl: 14.333823 +epoch: 1, batch: 29883, sum loss: 4942.085449, avg loss: 2.843547, ppl: 17.176588 +epoch: 1, batch: 29884, sum loss: 3370.227051, avg loss: 2.240842, ppl: 9.401247 +epoch: 1, batch: 29885, sum loss: 3036.228516, avg loss: 2.176508, ppl: 8.815469 +epoch: 1, batch: 29886, sum loss: 3847.532471, avg loss: 2.503274, ppl: 12.222447 +epoch: 1, batch: 29887, sum loss: 4342.703613, avg loss: 2.638338, ppl: 13.989927 +epoch: 1, batch: 29888, sum loss: 3765.663574, avg loss: 2.264380, ppl: 9.625153 +epoch: 1, batch: 29889, sum loss: 5526.208008, avg loss: 2.925467, ppl: 18.642939 +epoch: 1, batch: 29890, sum loss: 3591.608887, avg loss: 2.292029, ppl: 9.894994 +epoch: 1, batch: 29891, sum loss: 3954.506104, avg loss: 2.541456, ppl: 12.698152 +epoch: 1, batch: 29892, sum loss: 4866.700195, avg loss: 2.640640, ppl: 14.022179 +epoch: 1, batch: 29893, sum loss: 3935.391113, avg loss: 2.414350, ppl: 11.182503 +epoch: 1, batch: 29894, sum loss: 4082.862305, avg loss: 2.618898, ppl: 13.720600 +epoch: 1, batch: 29895, sum loss: 4439.949219, avg loss: 2.645977, ppl: 14.097208 +epoch: 1, batch: 29896, sum loss: 4274.694336, avg loss: 2.505682, ppl: 12.251918 +epoch: 1, batch: 29897, sum loss: 5373.693848, avg loss: 2.834227, ppl: 17.017239 +epoch: 1, batch: 29898, sum loss: 4190.979492, avg loss: 2.784704, ppl: 16.195026 +epoch: 1, batch: 29899, sum loss: 4573.109375, avg loss: 2.807311, ppl: 16.565311 +epoch: 1, batch: 29900, sum loss: 4139.762207, avg loss: 2.539732, ppl: 12.676267 +epoch: 1, batch: 29901, sum loss: 5168.044434, avg loss: 2.847408, ppl: 17.243023 +epoch: 1, batch: 29902, sum loss: 4278.182617, avg loss: 2.468657, ppl: 11.806581 +epoch: 1, batch: 29903, sum loss: 3822.049072, avg loss: 2.373944, ppl: 10.739661 +epoch: 1, batch: 29904, sum loss: 3992.866211, avg loss: 2.379539, ppl: 10.799920 +epoch: 1, batch: 29905, sum loss: 4637.273926, avg loss: 2.584880, ppl: 13.261693 +epoch: 1, batch: 29906, sum loss: 5059.804688, avg loss: 2.638063, ppl: 13.986086 +epoch: 1, batch: 29907, sum loss: 4723.030273, avg loss: 2.725349, ppl: 15.261743 +epoch: 1, batch: 29908, sum loss: 4960.748535, avg loss: 2.511771, ppl: 12.326747 +epoch: 1, batch: 29909, sum loss: 4476.235352, avg loss: 2.780270, ppl: 16.123379 +epoch: 1, batch: 29910, sum loss: 4798.348145, avg loss: 2.567334, ppl: 13.031043 +epoch: 1, batch: 29911, sum loss: 4730.700684, avg loss: 2.695556, ppl: 14.813752 +epoch: 1, batch: 29912, sum loss: 4293.682129, avg loss: 2.685230, ppl: 14.661567 +epoch: 1, batch: 29913, sum loss: 3900.936768, avg loss: 2.367073, ppl: 10.666130 +epoch: 1, batch: 29914, sum loss: 4123.095703, avg loss: 2.532614, ppl: 12.586364 +epoch: 1, batch: 29915, sum loss: 5207.532715, avg loss: 2.893074, ppl: 18.048702 +epoch: 1, batch: 29916, sum loss: 4596.792480, avg loss: 2.573792, ppl: 13.115464 +epoch: 1, batch: 29917, sum loss: 4528.437500, avg loss: 2.555552, ppl: 12.878404 +epoch: 1, batch: 29918, sum loss: 3932.800781, avg loss: 2.511367, ppl: 12.321763 +epoch: 1, batch: 29919, sum loss: 3903.019043, avg loss: 2.425742, ppl: 11.310620 +epoch: 1, batch: 29920, sum loss: 4433.437500, avg loss: 2.770898, ppl: 15.972977 +epoch: 1, batch: 29921, sum loss: 4167.906250, avg loss: 2.556998, ppl: 12.897039 +epoch: 1, batch: 29922, sum loss: 4402.580566, avg loss: 2.580645, ppl: 13.205654 +epoch: 1, batch: 29923, sum loss: 3961.708252, avg loss: 2.651746, ppl: 14.178770 +epoch: 1, batch: 29924, sum loss: 4965.032227, avg loss: 2.759884, ppl: 15.798016 +epoch: 1, batch: 29925, sum loss: 4083.615479, avg loss: 2.499153, ppl: 12.172175 +epoch: 1, batch: 29926, sum loss: 3798.947998, avg loss: 2.384776, ppl: 10.856629 +epoch: 1, batch: 29927, sum loss: 4382.952148, avg loss: 2.760046, ppl: 15.800562 +epoch: 1, batch: 29928, sum loss: 4265.816895, avg loss: 2.666136, ppl: 14.384274 +epoch: 1, batch: 29929, sum loss: 4971.414551, avg loss: 2.786668, ppl: 16.226854 +epoch: 1, batch: 29930, sum loss: 4551.291016, avg loss: 2.624735, ppl: 13.800918 +epoch: 1, batch: 29931, sum loss: 4291.561035, avg loss: 2.660608, ppl: 14.304988 +epoch: 1, batch: 29932, sum loss: 4266.695801, avg loss: 2.702151, ppl: 14.911766 +epoch: 1, batch: 29933, sum loss: 5476.953613, avg loss: 2.808694, ppl: 16.588242 +epoch: 1, batch: 29934, sum loss: 4067.016113, avg loss: 2.704133, ppl: 14.941358 +epoch: 1, batch: 29935, sum loss: 4835.424805, avg loss: 2.692330, ppl: 14.766043 +epoch: 1, batch: 29936, sum loss: 4230.546387, avg loss: 2.677561, ppl: 14.549564 +epoch: 1, batch: 29937, sum loss: 5241.125977, avg loss: 2.924736, ppl: 18.629299 +epoch: 1, batch: 29938, sum loss: 3389.885010, avg loss: 2.201224, ppl: 9.036068 +epoch: 1, batch: 29939, sum loss: 5496.992676, avg loss: 2.810323, ppl: 16.615292 +epoch: 1, batch: 29940, sum loss: 4080.122803, avg loss: 2.379080, ppl: 10.794971 +epoch: 1, batch: 29941, sum loss: 4614.286621, avg loss: 2.525609, ppl: 12.498499 +epoch: 1, batch: 29942, sum loss: 4652.367188, avg loss: 2.596187, ppl: 13.412500 +epoch: 1, batch: 29943, sum loss: 4388.013184, avg loss: 2.687087, ppl: 14.688826 +epoch: 1, batch: 29944, sum loss: 3905.938721, avg loss: 2.519961, ppl: 12.428107 +epoch: 1, batch: 29945, sum loss: 3722.453857, avg loss: 2.537460, ppl: 12.647504 +epoch: 1, batch: 29946, sum loss: 4030.092529, avg loss: 2.414675, ppl: 11.186134 +epoch: 1, batch: 29947, sum loss: 3876.595703, avg loss: 2.328286, ppl: 10.260337 +epoch: 1, batch: 29948, sum loss: 4870.982910, avg loss: 2.822122, ppl: 16.812490 +epoch: 1, batch: 29949, sum loss: 3821.347412, avg loss: 2.559509, ppl: 12.929471 +epoch: 1, batch: 29950, sum loss: 4017.216309, avg loss: 2.606889, ppl: 13.556813 +epoch: 1, batch: 29951, sum loss: 4180.960938, avg loss: 2.490149, ppl: 12.063080 +epoch: 1, batch: 29952, sum loss: 5161.040527, avg loss: 2.932409, ppl: 18.772804 +epoch: 1, batch: 29953, sum loss: 4888.465820, avg loss: 2.625385, ppl: 13.809884 +epoch: 1, batch: 29954, sum loss: 4094.375000, avg loss: 2.651798, ppl: 14.179503 +epoch: 1, batch: 29955, sum loss: 4461.211426, avg loss: 2.423254, ppl: 11.282518 +epoch: 1, batch: 29956, sum loss: 4963.403809, avg loss: 2.664200, ppl: 14.356454 +epoch: 1, batch: 29957, sum loss: 3731.970947, avg loss: 2.399981, ppl: 11.022972 +epoch: 1, batch: 29958, sum loss: 4255.013184, avg loss: 2.594520, ppl: 13.390163 +epoch: 1, batch: 29959, sum loss: 4487.979492, avg loss: 2.615373, ppl: 13.672311 +epoch: 1, batch: 29960, sum loss: 4624.967285, avg loss: 2.781099, ppl: 16.136744 +epoch: 1, batch: 29961, sum loss: 4278.993652, avg loss: 2.543991, ppl: 12.730381 +epoch: 1, batch: 29962, sum loss: 4170.410645, avg loss: 2.340298, ppl: 10.384330 +epoch: 1, batch: 29963, sum loss: 4996.776367, avg loss: 2.743974, ppl: 15.548653 +epoch: 1, batch: 29964, sum loss: 4780.580078, avg loss: 2.600968, ppl: 13.476782 +epoch: 1, batch: 29965, sum loss: 4210.806641, avg loss: 2.417225, ppl: 11.214700 +epoch: 1, batch: 29966, sum loss: 4094.742188, avg loss: 2.446083, ppl: 11.543039 +epoch: 1, batch: 29967, sum loss: 4714.481445, avg loss: 2.758620, ppl: 15.778054 +epoch: 1, batch: 29968, sum loss: 4699.324219, avg loss: 2.660999, ppl: 14.310575 +epoch: 1, batch: 29969, sum loss: 3373.706543, avg loss: 2.164020, ppl: 8.706062 +epoch: 1, batch: 29970, sum loss: 4476.048828, avg loss: 2.396172, ppl: 10.981058 +epoch: 1, batch: 29971, sum loss: 4081.466553, avg loss: 2.496310, ppl: 12.137620 +epoch: 1, batch: 29972, sum loss: 4102.574219, avg loss: 2.410443, ppl: 11.138895 +epoch: 1, batch: 29973, sum loss: 3751.260254, avg loss: 2.504179, ppl: 12.233511 +epoch: 1, batch: 29974, sum loss: 3962.937988, avg loss: 2.537092, ppl: 12.642855 +epoch: 1, batch: 29975, sum loss: 3712.153076, avg loss: 2.294285, ppl: 9.917343 +epoch: 1, batch: 29976, sum loss: 4722.047852, avg loss: 2.522462, ppl: 12.459229 +epoch: 1, batch: 29977, sum loss: 5003.499512, avg loss: 2.737144, ppl: 15.442821 +epoch: 1, batch: 29978, sum loss: 4458.822266, avg loss: 2.793748, ppl: 16.342161 +epoch: 1, batch: 29979, sum loss: 3674.101807, avg loss: 2.490916, ppl: 12.072335 +epoch: 1, batch: 29980, sum loss: 4507.665527, avg loss: 2.607094, ppl: 13.559590 +epoch: 1, batch: 29981, sum loss: 4338.983887, avg loss: 2.734079, ppl: 15.395563 +epoch: 1, batch: 29982, sum loss: 4654.757812, avg loss: 2.684405, ppl: 14.649477 +epoch: 1, batch: 29983, sum loss: 4243.080078, avg loss: 2.702599, ppl: 14.918451 +epoch: 1, batch: 29984, sum loss: 4459.019531, avg loss: 2.858346, ppl: 17.432669 +epoch: 1, batch: 29985, sum loss: 4913.463867, avg loss: 2.765033, ppl: 15.879564 +epoch: 1, batch: 29986, sum loss: 4270.967285, avg loss: 2.533195, ppl: 12.593682 +epoch: 1, batch: 29987, sum loss: 3927.262451, avg loss: 2.297989, ppl: 9.954141 +epoch: 1, batch: 29988, sum loss: 4389.247070, avg loss: 2.540074, ppl: 12.680605 +epoch: 1, batch: 29989, sum loss: 3960.720215, avg loss: 2.475450, ppl: 11.887055 +epoch: 1, batch: 29990, sum loss: 5084.162109, avg loss: 2.821400, ppl: 16.800350 +epoch: 1, batch: 29991, sum loss: 4160.619629, avg loss: 2.565117, ppl: 13.002178 +epoch: 1, batch: 29992, sum loss: 4752.179199, avg loss: 2.672767, ppl: 14.479979 +epoch: 1, batch: 29993, sum loss: 4650.559082, avg loss: 2.627434, ppl: 13.838223 +epoch: 1, batch: 29994, sum loss: 4061.026611, avg loss: 2.439055, ppl: 11.462203 +epoch: 1, batch: 29995, sum loss: 4677.035156, avg loss: 2.562759, ppl: 12.971556 +epoch: 1, batch: 29996, sum loss: 3887.177734, avg loss: 2.722113, ppl: 15.212438 +epoch: 1, batch: 29997, sum loss: 4927.651367, avg loss: 2.762136, ppl: 15.833634 +epoch: 1, batch: 29998, sum loss: 4642.105469, avg loss: 2.547808, ppl: 12.779058 +epoch: 1, batch: 29999, sum loss: 4180.181152, avg loss: 2.564528, ppl: 12.994527 +epoch: 1, batch: 30000, sum loss: 3570.780762, avg loss: 2.382109, ppl: 10.827711 +epoch: 1, batch: 30001, sum loss: 3791.642578, avg loss: 2.512686, ppl: 12.338023 +epoch: 1, batch: 30002, sum loss: 4795.604492, avg loss: 2.709381, ppl: 15.019977 +epoch: 1, batch: 30003, sum loss: 4581.090820, avg loss: 2.819133, ppl: 16.762308 +epoch: 1, batch: 30004, sum loss: 3855.478516, avg loss: 2.769740, ppl: 15.954491 +epoch: 1, batch: 30005, sum loss: 4396.637695, avg loss: 2.437160, ppl: 11.440509 +epoch: 1, batch: 30006, sum loss: 5238.211914, avg loss: 2.889251, ppl: 17.979837 +epoch: 1, batch: 30007, sum loss: 4229.334961, avg loss: 2.534053, ppl: 12.604493 +epoch: 1, batch: 30008, sum loss: 4176.356445, avg loss: 2.532660, ppl: 12.586943 +epoch: 1, batch: 30009, sum loss: 3304.105957, avg loss: 2.297709, ppl: 9.951360 +epoch: 1, batch: 30010, sum loss: 4339.650879, avg loss: 2.755334, ppl: 15.726287 +epoch: 1, batch: 30011, sum loss: 4251.129395, avg loss: 2.719852, ppl: 15.178082 +epoch: 1, batch: 30012, sum loss: 4938.385254, avg loss: 2.675182, ppl: 14.514986 +epoch: 1, batch: 30013, sum loss: 4714.159180, avg loss: 2.643948, ppl: 14.068638 +epoch: 1, batch: 30014, sum loss: 5610.169434, avg loss: 2.866719, ppl: 17.579250 +epoch: 1, batch: 30015, sum loss: 5000.145996, avg loss: 2.785597, ppl: 16.209486 +epoch: 1, batch: 30016, sum loss: 3604.043457, avg loss: 2.433520, ppl: 11.398939 +epoch: 1, batch: 30017, sum loss: 3668.831299, avg loss: 2.760596, ppl: 15.809256 +epoch: 1, batch: 30018, sum loss: 4326.211914, avg loss: 2.507949, ppl: 12.279720 +epoch: 1, batch: 30019, sum loss: 4118.005859, avg loss: 2.465872, ppl: 11.773742 +epoch: 1, batch: 30020, sum loss: 4741.453125, avg loss: 2.771159, ppl: 15.977143 +epoch: 1, batch: 30021, sum loss: 4151.060547, avg loss: 2.537323, ppl: 12.645776 +epoch: 1, batch: 30022, sum loss: 4878.996094, avg loss: 2.541144, ppl: 12.694183 +epoch: 1, batch: 30023, sum loss: 5168.111328, avg loss: 2.926450, ppl: 18.661270 +epoch: 1, batch: 30024, sum loss: 4063.279297, avg loss: 2.486707, ppl: 12.021624 +epoch: 1, batch: 30025, sum loss: 4657.243164, avg loss: 2.808953, ppl: 16.592529 +epoch: 1, batch: 30026, sum loss: 4216.671875, avg loss: 2.540164, ppl: 12.681747 +epoch: 1, batch: 30027, sum loss: 4671.512695, avg loss: 2.649752, ppl: 14.150528 +epoch: 1, batch: 30028, sum loss: 3988.324219, avg loss: 2.460410, ppl: 11.709607 +epoch: 1, batch: 30029, sum loss: 5274.008301, avg loss: 2.706007, ppl: 14.969391 +epoch: 1, batch: 30030, sum loss: 3788.315186, avg loss: 2.321272, ppl: 10.188622 +epoch: 1, batch: 30031, sum loss: 4538.822754, avg loss: 2.520168, ppl: 12.430686 +epoch: 1, batch: 30032, sum loss: 3808.094238, avg loss: 2.362341, ppl: 10.615778 +epoch: 1, batch: 30033, sum loss: 3741.294678, avg loss: 2.469501, ppl: 11.816555 +epoch: 1, batch: 30034, sum loss: 4129.148438, avg loss: 2.356820, ppl: 10.557327 +epoch: 1, batch: 30035, sum loss: 4737.715820, avg loss: 2.719699, ppl: 15.175756 +epoch: 1, batch: 30036, sum loss: 4382.073242, avg loss: 2.491230, ppl: 12.076120 +epoch: 1, batch: 30037, sum loss: 4341.723633, avg loss: 2.606076, ppl: 13.545799 +epoch: 1, batch: 30038, sum loss: 4629.790039, avg loss: 2.630563, ppl: 13.881577 +epoch: 1, batch: 30039, sum loss: 5086.283203, avg loss: 2.674176, ppl: 14.500403 +epoch: 1, batch: 30040, sum loss: 3565.784912, avg loss: 2.511116, ppl: 12.318673 +epoch: 1, batch: 30041, sum loss: 3470.108643, avg loss: 2.527391, ppl: 12.520802 +epoch: 1, batch: 30042, sum loss: 4419.539062, avg loss: 2.745055, ppl: 15.565474 +epoch: 1, batch: 30043, sum loss: 5223.539062, avg loss: 2.677365, ppl: 14.546713 +epoch: 1, batch: 30044, sum loss: 3974.597656, avg loss: 2.485677, ppl: 12.009248 +epoch: 1, batch: 30045, sum loss: 4203.036621, avg loss: 2.513778, ppl: 12.351505 +epoch: 1, batch: 30046, sum loss: 5165.357422, avg loss: 3.042025, ppl: 20.947611 +epoch: 1, batch: 30047, sum loss: 4698.495605, avg loss: 2.734864, ppl: 15.407640 +epoch: 1, batch: 30048, sum loss: 4032.936523, avg loss: 2.422184, ppl: 11.270450 +epoch: 1, batch: 30049, sum loss: 3906.511230, avg loss: 2.500968, ppl: 12.194286 +epoch: 1, batch: 30050, sum loss: 5144.368652, avg loss: 2.649006, ppl: 14.139972 +epoch: 1, batch: 30051, sum loss: 4107.351074, avg loss: 2.235902, ppl: 9.354913 +epoch: 1, batch: 30052, sum loss: 4632.944336, avg loss: 2.775881, ppl: 16.052757 +epoch: 1, batch: 30053, sum loss: 4063.587402, avg loss: 2.647288, ppl: 14.115706 +epoch: 1, batch: 30054, sum loss: 4243.976562, avg loss: 2.739817, ppl: 15.484150 +epoch: 1, batch: 30055, sum loss: 4100.596191, avg loss: 2.423520, ppl: 11.285519 +epoch: 1, batch: 30056, sum loss: 4364.491699, avg loss: 2.633972, ppl: 13.928986 +epoch: 1, batch: 30057, sum loss: 4498.375000, avg loss: 2.507456, ppl: 12.273661 +epoch: 1, batch: 30058, sum loss: 5620.734375, avg loss: 2.711401, ppl: 15.050346 +epoch: 1, batch: 30059, sum loss: 4439.386719, avg loss: 2.582540, ppl: 13.230705 +epoch: 1, batch: 30060, sum loss: 4893.369629, avg loss: 2.836736, ppl: 17.059994 +epoch: 1, batch: 30061, sum loss: 3526.490723, avg loss: 2.581618, ppl: 13.218513 +epoch: 1, batch: 30062, sum loss: 4805.213867, avg loss: 2.851759, ppl: 17.318216 +epoch: 1, batch: 30063, sum loss: 5320.329102, avg loss: 2.794291, ppl: 16.351025 +epoch: 1, batch: 30064, sum loss: 4883.464355, avg loss: 2.844184, ppl: 17.187531 +epoch: 1, batch: 30065, sum loss: 4516.636719, avg loss: 2.595768, ppl: 13.406882 +epoch: 1, batch: 30066, sum loss: 3661.029297, avg loss: 2.578190, ppl: 13.173271 +epoch: 1, batch: 30067, sum loss: 5570.808105, avg loss: 2.797995, ppl: 16.411709 +epoch: 1, batch: 30068, sum loss: 3472.613281, avg loss: 2.438633, ppl: 11.457367 +epoch: 1, batch: 30069, sum loss: 4404.489746, avg loss: 2.548895, ppl: 12.792955 +epoch: 1, batch: 30070, sum loss: 4093.657471, avg loss: 2.464574, ppl: 11.758473 +epoch: 1, batch: 30071, sum loss: 3965.753418, avg loss: 2.506797, ppl: 12.265584 +epoch: 1, batch: 30072, sum loss: 3603.949951, avg loss: 2.463397, ppl: 11.744643 +epoch: 1, batch: 30073, sum loss: 3687.043945, avg loss: 2.494617, ppl: 12.117094 +epoch: 1, batch: 30074, sum loss: 4713.550293, avg loss: 2.649550, ppl: 14.147677 +epoch: 1, batch: 30075, sum loss: 4134.153320, avg loss: 2.482975, ppl: 11.976843 +epoch: 1, batch: 30076, sum loss: 5160.390625, avg loss: 2.717425, ppl: 15.141289 +epoch: 1, batch: 30077, sum loss: 4747.200195, avg loss: 2.739296, ppl: 15.476086 +epoch: 1, batch: 30078, sum loss: 4633.502441, avg loss: 2.897750, ppl: 18.133303 +epoch: 1, batch: 30079, sum loss: 4390.818359, avg loss: 2.526363, ppl: 12.507928 +epoch: 1, batch: 30080, sum loss: 4973.208008, avg loss: 2.723553, ppl: 15.234357 +epoch: 1, batch: 30081, sum loss: 4935.363281, avg loss: 2.782053, ppl: 16.152155 +epoch: 1, batch: 30082, sum loss: 4733.040039, avg loss: 2.894826, ppl: 18.080349 +epoch: 1, batch: 30083, sum loss: 4858.132812, avg loss: 2.687020, ppl: 14.687845 +epoch: 1, batch: 30084, sum loss: 5250.898438, avg loss: 2.874055, ppl: 17.708679 +epoch: 1, batch: 30085, sum loss: 4090.612305, avg loss: 2.470176, ppl: 11.824533 +epoch: 1, batch: 30086, sum loss: 4734.431152, avg loss: 2.767055, ppl: 15.911710 +epoch: 1, batch: 30087, sum loss: 4443.581055, avg loss: 2.523328, ppl: 12.470029 +epoch: 1, batch: 30088, sum loss: 3803.429199, avg loss: 2.433416, ppl: 11.397749 +epoch: 1, batch: 30089, sum loss: 6070.161133, avg loss: 3.016979, ppl: 20.429480 +epoch: 1, batch: 30090, sum loss: 4240.268066, avg loss: 2.434138, ppl: 11.405980 +epoch: 1, batch: 30091, sum loss: 4532.497559, avg loss: 2.664608, ppl: 14.362311 +epoch: 1, batch: 30092, sum loss: 4011.869629, avg loss: 2.566775, ppl: 13.023756 +epoch: 1, batch: 30093, sum loss: 4491.248535, avg loss: 2.858847, ppl: 17.441408 +epoch: 1, batch: 30094, sum loss: 3756.642334, avg loss: 2.460146, ppl: 11.706517 +epoch: 1, batch: 30095, sum loss: 4169.724121, avg loss: 2.589891, ppl: 13.328315 +epoch: 1, batch: 30096, sum loss: 3726.806885, avg loss: 2.412173, ppl: 11.158179 +epoch: 1, batch: 30097, sum loss: 4381.606445, avg loss: 2.639522, ppl: 14.006508 +epoch: 1, batch: 30098, sum loss: 4858.481445, avg loss: 2.785826, ppl: 16.213211 +epoch: 1, batch: 30099, sum loss: 3577.898682, avg loss: 2.572177, ppl: 13.094305 +epoch: 1, batch: 30100, sum loss: 4736.801270, avg loss: 2.792925, ppl: 16.328718 +epoch: 1, batch: 30101, sum loss: 4769.224121, avg loss: 2.929499, ppl: 18.718245 +epoch: 1, batch: 30102, sum loss: 4149.426270, avg loss: 2.530138, ppl: 12.555239 +epoch: 1, batch: 30103, sum loss: 5061.986328, avg loss: 2.691114, ppl: 14.748102 +epoch: 1, batch: 30104, sum loss: 4993.283203, avg loss: 2.565922, ppl: 13.012645 +epoch: 1, batch: 30105, sum loss: 4186.778320, avg loss: 2.352123, ppl: 10.507852 +epoch: 1, batch: 30106, sum loss: 5016.929688, avg loss: 2.804321, ppl: 16.515850 +epoch: 1, batch: 30107, sum loss: 4460.439453, avg loss: 2.424152, ppl: 11.292645 +epoch: 1, batch: 30108, sum loss: 4754.297363, avg loss: 2.609384, ppl: 13.590674 +epoch: 1, batch: 30109, sum loss: 3739.883301, avg loss: 2.453992, ppl: 11.634696 +epoch: 1, batch: 30110, sum loss: 4778.256836, avg loss: 2.625416, ppl: 13.810319 +epoch: 1, batch: 30111, sum loss: 5278.403809, avg loss: 2.854734, ppl: 17.369820 +epoch: 1, batch: 30112, sum loss: 4686.140137, avg loss: 2.562133, ppl: 12.963433 +epoch: 1, batch: 30113, sum loss: 3697.602783, avg loss: 2.415155, ppl: 11.191509 +epoch: 1, batch: 30114, sum loss: 4707.183594, avg loss: 2.778739, ppl: 16.098703 +epoch: 1, batch: 30115, sum loss: 4161.832520, avg loss: 2.602772, ppl: 13.501111 +epoch: 1, batch: 30116, sum loss: 4493.029297, avg loss: 2.708276, ppl: 15.003381 +epoch: 1, batch: 30117, sum loss: 4174.508301, avg loss: 2.505707, ppl: 12.252221 +epoch: 1, batch: 30118, sum loss: 4977.936035, avg loss: 2.552788, ppl: 12.842854 +epoch: 1, batch: 30119, sum loss: 4814.947266, avg loss: 2.857535, ppl: 17.418543 +epoch: 1, batch: 30120, sum loss: 5893.420410, avg loss: 2.979484, ppl: 19.677666 +epoch: 1, batch: 30121, sum loss: 3474.082764, avg loss: 2.543252, ppl: 12.720979 +epoch: 1, batch: 30122, sum loss: 4673.749023, avg loss: 2.728400, ppl: 15.308371 +epoch: 1, batch: 30123, sum loss: 3676.034180, avg loss: 2.353415, ppl: 10.521440 +epoch: 1, batch: 30124, sum loss: 4104.296875, avg loss: 2.341299, ppl: 10.394729 +epoch: 1, batch: 30125, sum loss: 4034.922852, avg loss: 2.576579, ppl: 13.152069 +epoch: 1, batch: 30126, sum loss: 4380.006836, avg loss: 2.555430, ppl: 12.876835 +epoch: 1, batch: 30127, sum loss: 3858.823730, avg loss: 2.456285, ppl: 11.661408 +epoch: 1, batch: 30128, sum loss: 4926.963379, avg loss: 2.629116, ppl: 13.861508 +epoch: 1, batch: 30129, sum loss: 4210.123047, avg loss: 2.697068, ppl: 14.836168 +epoch: 1, batch: 30130, sum loss: 4746.257324, avg loss: 2.748267, ppl: 15.615549 +epoch: 1, batch: 30131, sum loss: 4485.360840, avg loss: 2.542722, ppl: 12.714226 +epoch: 1, batch: 30132, sum loss: 5166.326660, avg loss: 2.792609, ppl: 16.323551 +epoch: 1, batch: 30133, sum loss: 4410.499512, avg loss: 2.552372, ppl: 12.837524 +epoch: 1, batch: 30134, sum loss: 4512.252930, avg loss: 2.830773, ppl: 16.958570 +epoch: 1, batch: 30135, sum loss: 4286.259766, avg loss: 2.642577, ppl: 14.049361 +epoch: 1, batch: 30136, sum loss: 4310.385254, avg loss: 2.719486, ppl: 15.172522 +epoch: 1, batch: 30137, sum loss: 5333.225586, avg loss: 2.972813, ppl: 19.546822 +epoch: 1, batch: 30138, sum loss: 4082.868652, avg loss: 2.500226, ppl: 12.185245 +epoch: 1, batch: 30139, sum loss: 4403.565918, avg loss: 2.730047, ppl: 15.333611 +epoch: 1, batch: 30140, sum loss: 4235.047852, avg loss: 2.543572, ppl: 12.725049 +epoch: 1, batch: 30141, sum loss: 4417.337891, avg loss: 2.606099, ppl: 13.546106 +epoch: 1, batch: 30142, sum loss: 4755.830566, avg loss: 2.587503, ppl: 13.296528 +epoch: 1, batch: 30143, sum loss: 4679.669922, avg loss: 2.555800, ppl: 12.881603 +epoch: 1, batch: 30144, sum loss: 4548.047363, avg loss: 2.521091, ppl: 12.442158 +epoch: 1, batch: 30145, sum loss: 4605.197754, avg loss: 2.471926, ppl: 11.845236 +epoch: 1, batch: 30146, sum loss: 4530.330078, avg loss: 2.460799, ppl: 11.714162 +epoch: 1, batch: 30147, sum loss: 4709.299805, avg loss: 2.516996, ppl: 12.391322 +epoch: 1, batch: 30148, sum loss: 4372.729492, avg loss: 2.692567, ppl: 14.769546 +epoch: 1, batch: 30149, sum loss: 4537.144043, avg loss: 2.764865, ppl: 15.876899 +epoch: 1, batch: 30150, sum loss: 3925.077148, avg loss: 2.673758, ppl: 14.494341 +epoch: 1, batch: 30151, sum loss: 4613.444336, avg loss: 2.725012, ppl: 15.256591 +epoch: 1, batch: 30152, sum loss: 4080.790283, avg loss: 2.604206, ppl: 13.520484 +epoch: 1, batch: 30153, sum loss: 4368.333008, avg loss: 2.754308, ppl: 15.710169 +epoch: 1, batch: 30154, sum loss: 3757.668457, avg loss: 2.460818, ppl: 11.714385 +epoch: 1, batch: 30155, sum loss: 4162.652344, avg loss: 2.656447, ppl: 14.245587 +epoch: 1, batch: 30156, sum loss: 4376.690918, avg loss: 2.540157, ppl: 12.681665 +epoch: 1, batch: 30157, sum loss: 4652.291016, avg loss: 2.635859, ppl: 13.955296 +epoch: 1, batch: 30158, sum loss: 3911.694336, avg loss: 2.653795, ppl: 14.207862 +epoch: 1, batch: 30159, sum loss: 3720.731445, avg loss: 2.343030, ppl: 10.412739 +epoch: 1, batch: 30160, sum loss: 3499.561279, avg loss: 2.601904, ppl: 13.489403 +epoch: 1, batch: 30161, sum loss: 3643.628662, avg loss: 2.282975, ppl: 9.805814 +epoch: 1, batch: 30162, sum loss: 3867.130127, avg loss: 2.532502, ppl: 12.584950 +epoch: 1, batch: 30163, sum loss: 4207.585938, avg loss: 2.473595, ppl: 11.865029 +epoch: 1, batch: 30164, sum loss: 4180.317383, avg loss: 2.635761, ppl: 13.953931 +epoch: 1, batch: 30165, sum loss: 4167.138672, avg loss: 2.593117, ppl: 13.371382 +epoch: 1, batch: 30166, sum loss: 3844.818115, avg loss: 2.345832, ppl: 10.441953 +epoch: 1, batch: 30167, sum loss: 4367.376953, avg loss: 2.663035, ppl: 14.339740 +epoch: 1, batch: 30168, sum loss: 4390.948730, avg loss: 2.602815, ppl: 13.501691 +epoch: 1, batch: 30169, sum loss: 3927.670898, avg loss: 2.448673, ppl: 11.572976 +epoch: 1, batch: 30170, sum loss: 4335.654297, avg loss: 2.506159, ppl: 12.257752 +epoch: 1, batch: 30171, sum loss: 4478.774414, avg loss: 2.636124, ppl: 13.958992 +epoch: 1, batch: 30172, sum loss: 4908.502441, avg loss: 2.800059, ppl: 16.445610 +epoch: 1, batch: 30173, sum loss: 4365.620605, avg loss: 2.653872, ppl: 14.208957 +epoch: 1, batch: 30174, sum loss: 4688.415039, avg loss: 2.754650, ppl: 15.715545 +epoch: 1, batch: 30175, sum loss: 3761.720459, avg loss: 2.278450, ppl: 9.761536 +epoch: 1, batch: 30176, sum loss: 5104.444336, avg loss: 2.778685, ppl: 16.097839 +epoch: 1, batch: 30177, sum loss: 4928.788574, avg loss: 2.518543, ppl: 12.410501 +epoch: 1, batch: 30178, sum loss: 5529.994629, avg loss: 2.748507, ppl: 15.619287 +epoch: 1, batch: 30179, sum loss: 4707.781738, avg loss: 2.576782, ppl: 13.154744 +epoch: 1, batch: 30180, sum loss: 4227.841797, avg loss: 2.703224, ppl: 14.927777 +epoch: 1, batch: 30181, sum loss: 4229.582031, avg loss: 2.513121, ppl: 12.343390 +epoch: 1, batch: 30182, sum loss: 3780.148438, avg loss: 2.235452, ppl: 9.350704 +epoch: 1, batch: 30183, sum loss: 4711.896484, avg loss: 2.641198, ppl: 14.029997 +epoch: 1, batch: 30184, sum loss: 5448.901855, avg loss: 3.081958, ppl: 21.801054 +epoch: 1, batch: 30185, sum loss: 4100.645508, avg loss: 2.667954, ppl: 14.410459 +epoch: 1, batch: 30186, sum loss: 4491.565430, avg loss: 2.606828, ppl: 13.555989 +epoch: 1, batch: 30187, sum loss: 4574.576660, avg loss: 2.503873, ppl: 12.229773 +epoch: 1, batch: 30188, sum loss: 3654.237549, avg loss: 2.442672, ppl: 11.503740 +epoch: 1, batch: 30189, sum loss: 4618.767090, avg loss: 2.628780, ppl: 13.856859 +epoch: 1, batch: 30190, sum loss: 5484.125000, avg loss: 2.932687, ppl: 18.778025 +epoch: 1, batch: 30191, sum loss: 4816.884277, avg loss: 2.559450, ppl: 12.928700 +epoch: 1, batch: 30192, sum loss: 4067.501465, avg loss: 2.442944, ppl: 11.506865 +epoch: 1, batch: 30193, sum loss: 5179.458008, avg loss: 2.711758, ppl: 15.055722 +epoch: 1, batch: 30194, sum loss: 4296.369629, avg loss: 2.702119, ppl: 14.911297 +epoch: 1, batch: 30195, sum loss: 4733.339844, avg loss: 2.789240, ppl: 16.268648 +epoch: 1, batch: 30196, sum loss: 3315.900879, avg loss: 2.312344, ppl: 10.098063 +epoch: 1, batch: 30197, sum loss: 4109.110352, avg loss: 2.859506, ppl: 17.452906 +epoch: 1, batch: 30198, sum loss: 4765.023438, avg loss: 2.879168, ppl: 17.799463 +epoch: 1, batch: 30199, sum loss: 5141.617676, avg loss: 2.643505, ppl: 14.062410 +epoch: 1, batch: 30200, sum loss: 3780.171387, avg loss: 2.576804, ppl: 13.155026 +epoch: 1, batch: 30201, sum loss: 4738.674805, avg loss: 2.748652, ppl: 15.621552 +epoch: 1, batch: 30202, sum loss: 5181.280762, avg loss: 2.731302, ppl: 15.352871 +epoch: 1, batch: 30203, sum loss: 3930.134766, avg loss: 2.482713, ppl: 11.973705 +epoch: 1, batch: 30204, sum loss: 5517.076172, avg loss: 2.871981, ppl: 17.671999 +epoch: 1, batch: 30205, sum loss: 4099.447754, avg loss: 2.670650, ppl: 14.449358 +epoch: 1, batch: 30206, sum loss: 4130.186523, avg loss: 2.644166, ppl: 14.071697 +epoch: 1, batch: 30207, sum loss: 4181.482422, avg loss: 2.552797, ppl: 12.842979 +epoch: 1, batch: 30208, sum loss: 3436.206543, avg loss: 2.424987, ppl: 11.302083 +epoch: 1, batch: 30209, sum loss: 4610.854004, avg loss: 2.636280, ppl: 13.961172 +epoch: 1, batch: 30210, sum loss: 3845.137695, avg loss: 2.464832, ppl: 11.761504 +epoch: 1, batch: 30211, sum loss: 4295.884766, avg loss: 2.566240, ppl: 13.016784 +epoch: 1, batch: 30212, sum loss: 5296.607422, avg loss: 2.728803, ppl: 15.314550 +epoch: 1, batch: 30213, sum loss: 4987.188477, avg loss: 2.949254, ppl: 19.091707 +epoch: 1, batch: 30214, sum loss: 4734.547852, avg loss: 2.735152, ppl: 15.412083 +epoch: 1, batch: 30215, sum loss: 4294.789551, avg loss: 2.674215, ppl: 14.500963 +epoch: 1, batch: 30216, sum loss: 3400.171387, avg loss: 2.325699, ppl: 10.233827 +epoch: 1, batch: 30217, sum loss: 4536.988281, avg loss: 2.628614, ppl: 13.854557 +epoch: 1, batch: 30218, sum loss: 4159.564453, avg loss: 2.409945, ppl: 11.133344 +epoch: 1, batch: 30219, sum loss: 3996.432373, avg loss: 2.660740, ppl: 14.306878 +epoch: 1, batch: 30220, sum loss: 3510.776611, avg loss: 2.375356, ppl: 10.754846 +epoch: 1, batch: 30221, sum loss: 4155.035645, avg loss: 2.708628, ppl: 15.008672 +epoch: 1, batch: 30222, sum loss: 4607.014160, avg loss: 2.663014, ppl: 14.339442 +epoch: 1, batch: 30223, sum loss: 4619.125977, avg loss: 2.557656, ppl: 12.905526 +epoch: 1, batch: 30224, sum loss: 3331.676514, avg loss: 2.203490, ppl: 9.056563 +epoch: 1, batch: 30225, sum loss: 4774.789062, avg loss: 2.556097, ppl: 12.885425 +epoch: 1, batch: 30226, sum loss: 4501.928711, avg loss: 2.632707, ppl: 13.911375 +epoch: 1, batch: 30227, sum loss: 4677.067871, avg loss: 2.661962, ppl: 14.324369 +epoch: 1, batch: 30228, sum loss: 5280.492188, avg loss: 2.851238, ppl: 17.309189 +epoch: 1, batch: 30229, sum loss: 5076.427734, avg loss: 2.763434, ppl: 15.854192 +epoch: 1, batch: 30230, sum loss: 4514.802734, avg loss: 2.506831, ppl: 12.265997 +epoch: 1, batch: 30231, sum loss: 3520.744629, avg loss: 2.456905, ppl: 11.668640 +epoch: 1, batch: 30232, sum loss: 4924.273926, avg loss: 2.702675, ppl: 14.919590 +epoch: 1, batch: 30233, sum loss: 4587.258789, avg loss: 2.572776, ppl: 13.102140 +epoch: 1, batch: 30234, sum loss: 4347.062012, avg loss: 2.522961, ppl: 12.465454 +epoch: 1, batch: 30235, sum loss: 4602.586426, avg loss: 2.625549, ppl: 13.812149 +epoch: 1, batch: 30236, sum loss: 5018.987793, avg loss: 2.612695, ppl: 13.635756 +epoch: 1, batch: 30237, sum loss: 4429.657227, avg loss: 2.681391, ppl: 14.605392 +epoch: 1, batch: 30238, sum loss: 4693.838867, avg loss: 2.645907, ppl: 14.096220 +epoch: 1, batch: 30239, sum loss: 4834.867676, avg loss: 2.634805, ppl: 13.940600 +epoch: 1, batch: 30240, sum loss: 4612.103516, avg loss: 2.790141, ppl: 16.283318 +epoch: 1, batch: 30241, sum loss: 4671.206055, avg loss: 2.715817, ppl: 15.116962 +epoch: 1, batch: 30242, sum loss: 4322.194824, avg loss: 2.524647, ppl: 12.486481 +epoch: 1, batch: 30243, sum loss: 3540.317383, avg loss: 2.468841, ppl: 11.808748 +epoch: 1, batch: 30244, sum loss: 3571.139893, avg loss: 2.516660, ppl: 12.387151 +epoch: 1, batch: 30245, sum loss: 4142.828125, avg loss: 2.625366, ppl: 13.809634 +epoch: 1, batch: 30246, sum loss: 4793.229004, avg loss: 2.714173, ppl: 15.092117 +epoch: 1, batch: 30247, sum loss: 4988.896973, avg loss: 2.741152, ppl: 15.504841 +epoch: 1, batch: 30248, sum loss: 4338.103516, avg loss: 2.553328, ppl: 12.849794 +epoch: 1, batch: 30249, sum loss: 3803.430176, avg loss: 2.562958, ppl: 12.974141 +epoch: 1, batch: 30250, sum loss: 4704.284180, avg loss: 2.631032, ppl: 13.888088 +epoch: 1, batch: 30251, sum loss: 4716.203125, avg loss: 2.759627, ppl: 15.793956 +epoch: 1, batch: 30252, sum loss: 5030.963379, avg loss: 2.957650, ppl: 19.252684 +epoch: 1, batch: 30253, sum loss: 4620.253906, avg loss: 2.573958, ppl: 13.117638 +epoch: 1, batch: 30254, sum loss: 4563.514648, avg loss: 2.664048, ppl: 14.354280 +epoch: 1, batch: 30255, sum loss: 4503.556641, avg loss: 2.548702, ppl: 12.790494 +epoch: 1, batch: 30256, sum loss: 5366.282715, avg loss: 2.827336, ppl: 16.900372 +epoch: 1, batch: 30257, sum loss: 4521.473633, avg loss: 2.689752, ppl: 14.728028 +epoch: 1, batch: 30258, sum loss: 3415.400391, avg loss: 2.328153, ppl: 10.258975 +epoch: 1, batch: 30259, sum loss: 4108.603516, avg loss: 2.558284, ppl: 12.913633 +epoch: 1, batch: 30260, sum loss: 4222.796387, avg loss: 2.546922, ppl: 12.767744 +epoch: 1, batch: 30261, sum loss: 5113.191406, avg loss: 2.555318, ppl: 12.875395 +epoch: 1, batch: 30262, sum loss: 3944.409424, avg loss: 2.434821, ppl: 11.413772 +epoch: 1, batch: 30263, sum loss: 4427.690430, avg loss: 2.753539, ppl: 15.698087 +epoch: 1, batch: 30264, sum loss: 4674.533691, avg loss: 2.743271, ppl: 15.537724 +epoch: 1, batch: 30265, sum loss: 4139.674316, avg loss: 2.390112, ppl: 10.914718 +epoch: 1, batch: 30266, sum loss: 3714.585449, avg loss: 2.595797, ppl: 13.407267 +epoch: 1, batch: 30267, sum loss: 3309.427979, avg loss: 2.263631, ppl: 9.617945 +epoch: 1, batch: 30268, sum loss: 4316.127930, avg loss: 2.579873, ppl: 13.195466 +epoch: 1, batch: 30269, sum loss: 3868.011963, avg loss: 2.606477, ppl: 13.551226 +epoch: 1, batch: 30270, sum loss: 4754.674316, avg loss: 2.765954, ppl: 15.894193 +epoch: 1, batch: 30271, sum loss: 3816.212891, avg loss: 2.409225, ppl: 11.125339 +epoch: 1, batch: 30272, sum loss: 4913.664551, avg loss: 2.666123, ppl: 14.384092 +epoch: 1, batch: 30273, sum loss: 4541.382812, avg loss: 2.581798, ppl: 13.220889 +epoch: 1, batch: 30274, sum loss: 3168.803223, avg loss: 2.409736, ppl: 11.131024 +epoch: 1, batch: 30275, sum loss: 3561.338623, avg loss: 2.335304, ppl: 10.332601 +epoch: 1, batch: 30276, sum loss: 4294.950684, avg loss: 2.614090, ppl: 13.654791 +epoch: 1, batch: 30277, sum loss: 4895.125488, avg loss: 2.829552, ppl: 16.937874 +epoch: 1, batch: 30278, sum loss: 4375.852539, avg loss: 2.523560, ppl: 12.472916 +epoch: 1, batch: 30279, sum loss: 4230.040527, avg loss: 2.591937, ppl: 13.355611 +epoch: 1, batch: 30280, sum loss: 4755.807129, avg loss: 2.747433, ppl: 15.602531 +epoch: 1, batch: 30281, sum loss: 4410.935547, avg loss: 2.563007, ppl: 12.974775 +epoch: 1, batch: 30282, sum loss: 4689.429688, avg loss: 2.493051, ppl: 12.098135 +epoch: 1, batch: 30283, sum loss: 3689.554932, avg loss: 2.665863, ppl: 14.380359 +epoch: 1, batch: 30284, sum loss: 4323.826660, avg loss: 2.601580, ppl: 13.485033 +epoch: 1, batch: 30285, sum loss: 3250.963379, avg loss: 2.343881, ppl: 10.421608 +epoch: 1, batch: 30286, sum loss: 5029.524414, avg loss: 2.811361, ppl: 16.632538 +epoch: 1, batch: 30287, sum loss: 5083.927734, avg loss: 2.717225, ppl: 15.138256 +epoch: 1, batch: 30288, sum loss: 4578.142578, avg loss: 2.504454, ppl: 12.236880 +epoch: 1, batch: 30289, sum loss: 4583.206543, avg loss: 2.530760, ppl: 12.563054 +epoch: 1, batch: 30290, sum loss: 4092.547363, avg loss: 2.553055, ppl: 12.846290 +epoch: 1, batch: 30291, sum loss: 4138.656738, avg loss: 2.529741, ppl: 12.550259 +epoch: 1, batch: 30292, sum loss: 4240.715332, avg loss: 2.612887, ppl: 13.638364 +epoch: 1, batch: 30293, sum loss: 4712.179688, avg loss: 2.593385, ppl: 13.374963 +epoch: 1, batch: 30294, sum loss: 4626.094238, avg loss: 2.616569, ppl: 13.688681 +epoch: 1, batch: 30295, sum loss: 4180.273438, avg loss: 2.494197, ppl: 12.112000 +epoch: 1, batch: 30296, sum loss: 4029.228516, avg loss: 2.537298, ppl: 12.645453 +epoch: 1, batch: 30297, sum loss: 4049.789551, avg loss: 2.664335, ppl: 14.358401 +epoch: 1, batch: 30298, sum loss: 3895.088135, avg loss: 2.462129, ppl: 11.729756 +epoch: 1, batch: 30299, sum loss: 4367.907715, avg loss: 2.587623, ppl: 13.298126 +epoch: 1, batch: 30300, sum loss: 4349.921875, avg loss: 2.482832, ppl: 11.975132 +epoch: 1, batch: 30301, sum loss: 5939.713867, avg loss: 2.884757, ppl: 17.899210 +epoch: 1, batch: 30302, sum loss: 4272.859375, avg loss: 2.531315, ppl: 12.570023 +epoch: 1, batch: 30303, sum loss: 3949.522461, avg loss: 2.488672, ppl: 12.045270 +epoch: 1, batch: 30304, sum loss: 4761.695801, avg loss: 2.586473, ppl: 13.282834 +epoch: 1, batch: 30305, sum loss: 3511.515625, avg loss: 2.383921, ppl: 10.847354 +epoch: 1, batch: 30306, sum loss: 4431.855469, avg loss: 2.649047, ppl: 14.140554 +epoch: 1, batch: 30307, sum loss: 4434.270508, avg loss: 2.771419, ppl: 15.981297 +epoch: 1, batch: 30308, sum loss: 3984.696289, avg loss: 2.642371, ppl: 14.046474 +epoch: 1, batch: 30309, sum loss: 3505.169922, avg loss: 2.606074, ppl: 13.545770 +epoch: 1, batch: 30310, sum loss: 3780.341553, avg loss: 2.575165, ppl: 13.133478 +epoch: 1, batch: 30311, sum loss: 4345.005859, avg loss: 2.636533, ppl: 13.964701 +epoch: 1, batch: 30312, sum loss: 3869.739014, avg loss: 2.486979, ppl: 12.024891 +epoch: 1, batch: 30313, sum loss: 4650.188477, avg loss: 2.627225, ppl: 13.835326 +epoch: 1, batch: 30314, sum loss: 4319.610352, avg loss: 2.611614, ppl: 13.621024 +epoch: 1, batch: 30315, sum loss: 5017.048828, avg loss: 2.707528, ppl: 14.992164 +epoch: 1, batch: 30316, sum loss: 4319.317871, avg loss: 2.517085, ppl: 12.392421 +epoch: 1, batch: 30317, sum loss: 4417.749023, avg loss: 2.474930, ppl: 11.880870 +epoch: 1, batch: 30318, sum loss: 5960.237305, avg loss: 2.868257, ppl: 17.606297 +epoch: 1, batch: 30319, sum loss: 4005.100098, avg loss: 2.518931, ppl: 12.415317 +epoch: 1, batch: 30320, sum loss: 3208.122314, avg loss: 2.345119, ppl: 10.434512 +epoch: 1, batch: 30321, sum loss: 3879.330322, avg loss: 2.445984, ppl: 11.541900 +epoch: 1, batch: 30322, sum loss: 4357.927734, avg loss: 2.584773, ppl: 13.260283 +epoch: 1, batch: 30323, sum loss: 3364.538086, avg loss: 2.376086, ppl: 10.762697 +epoch: 1, batch: 30324, sum loss: 4225.187988, avg loss: 2.630877, ppl: 13.885936 +epoch: 1, batch: 30325, sum loss: 3832.977295, avg loss: 2.215594, ppl: 9.166851 +epoch: 1, batch: 30326, sum loss: 4545.892090, avg loss: 2.621622, ppl: 13.758016 +epoch: 1, batch: 30327, sum loss: 5131.378906, avg loss: 2.680971, ppl: 14.599264 +epoch: 1, batch: 30328, sum loss: 4741.619629, avg loss: 2.748765, ppl: 15.623329 +epoch: 1, batch: 30329, sum loss: 3901.056641, avg loss: 2.320676, ppl: 10.182556 +epoch: 1, batch: 30330, sum loss: 4160.481445, avg loss: 2.500289, ppl: 12.186021 +epoch: 1, batch: 30331, sum loss: 4466.787598, avg loss: 2.624434, ppl: 13.796769 +epoch: 1, batch: 30332, sum loss: 4825.458984, avg loss: 2.758982, ppl: 15.783762 +epoch: 1, batch: 30333, sum loss: 3921.534180, avg loss: 2.586764, ppl: 13.286708 +epoch: 1, batch: 30334, sum loss: 4488.301270, avg loss: 2.662101, ppl: 14.326351 +epoch: 1, batch: 30335, sum loss: 4391.291992, avg loss: 2.842260, ppl: 17.154497 +epoch: 1, batch: 30336, sum loss: 4794.360352, avg loss: 2.449852, ppl: 11.586632 +epoch: 1, batch: 30337, sum loss: 5589.789551, avg loss: 2.945095, ppl: 19.012465 +epoch: 1, batch: 30338, sum loss: 5186.851562, avg loss: 2.744366, ppl: 15.554748 +epoch: 1, batch: 30339, sum loss: 4656.151367, avg loss: 2.618758, ppl: 13.718674 +epoch: 1, batch: 30340, sum loss: 4331.053223, avg loss: 2.701842, ppl: 14.907170 +epoch: 1, batch: 30341, sum loss: 4413.807129, avg loss: 2.622583, ppl: 13.771248 +epoch: 1, batch: 30342, sum loss: 4023.183105, avg loss: 2.469726, ppl: 11.819203 +epoch: 1, batch: 30343, sum loss: 3563.993652, avg loss: 2.357139, ppl: 10.560693 +epoch: 1, batch: 30344, sum loss: 5581.981934, avg loss: 2.603536, ppl: 13.511436 +epoch: 1, batch: 30345, sum loss: 3593.167725, avg loss: 2.409905, ppl: 11.132900 +epoch: 1, batch: 30346, sum loss: 4941.555664, avg loss: 2.531535, ppl: 12.572789 +epoch: 1, batch: 30347, sum loss: 3870.374023, avg loss: 2.519775, ppl: 12.425799 +epoch: 1, batch: 30348, sum loss: 3927.175293, avg loss: 2.548459, ppl: 12.787384 +epoch: 1, batch: 30349, sum loss: 4732.363770, avg loss: 2.708852, ppl: 15.012026 +epoch: 1, batch: 30350, sum loss: 4495.132324, avg loss: 2.435066, ppl: 11.416574 +epoch: 1, batch: 30351, sum loss: 3835.894775, avg loss: 2.462064, ppl: 11.728990 +epoch: 1, batch: 30352, sum loss: 3899.168945, avg loss: 2.602916, ppl: 13.503062 +epoch: 1, batch: 30353, sum loss: 4920.566895, avg loss: 2.702124, ppl: 14.911365 +epoch: 1, batch: 30354, sum loss: 4604.394043, avg loss: 2.838714, ppl: 17.093767 +epoch: 1, batch: 30355, sum loss: 4156.968262, avg loss: 2.504198, ppl: 12.233742 +epoch: 1, batch: 30356, sum loss: 4254.292969, avg loss: 2.353038, ppl: 10.517474 +epoch: 1, batch: 30357, sum loss: 4812.325195, avg loss: 2.775274, ppl: 16.043022 +epoch: 1, batch: 30358, sum loss: 4471.219727, avg loss: 2.719720, ppl: 15.176074 +epoch: 1, batch: 30359, sum loss: 4747.065430, avg loss: 2.723503, ppl: 15.233590 +epoch: 1, batch: 30360, sum loss: 4561.420410, avg loss: 2.600582, ppl: 13.471571 +epoch: 1, batch: 30361, sum loss: 3720.300781, avg loss: 2.442745, ppl: 11.504580 +epoch: 1, batch: 30362, sum loss: 4972.856934, avg loss: 2.778132, ppl: 16.088942 +epoch: 1, batch: 30363, sum loss: 4950.019531, avg loss: 2.678582, ppl: 14.564425 +epoch: 1, batch: 30364, sum loss: 5305.880371, avg loss: 2.683804, ppl: 14.640677 +epoch: 1, batch: 30365, sum loss: 4318.980469, avg loss: 2.590870, ppl: 13.341372 +epoch: 1, batch: 30366, sum loss: 3819.089111, avg loss: 2.288250, ppl: 9.857672 +epoch: 1, batch: 30367, sum loss: 3123.582275, avg loss: 2.278324, ppl: 9.760309 +epoch: 1, batch: 30368, sum loss: 3824.085938, avg loss: 2.214294, ppl: 9.154945 +epoch: 1, batch: 30369, sum loss: 4612.220703, avg loss: 2.662945, ppl: 14.338454 +epoch: 1, batch: 30370, sum loss: 4858.884277, avg loss: 2.708408, ppl: 15.005373 +epoch: 1, batch: 30371, sum loss: 4270.497559, avg loss: 2.603962, ppl: 13.517186 +epoch: 1, batch: 30372, sum loss: 4389.098145, avg loss: 2.569730, ppl: 13.062291 +epoch: 1, batch: 30373, sum loss: 4621.962891, avg loss: 2.663956, ppl: 14.352953 +epoch: 1, batch: 30374, sum loss: 4217.426758, avg loss: 2.614648, ppl: 13.662404 +epoch: 1, batch: 30375, sum loss: 4345.465332, avg loss: 2.721018, ppl: 15.195781 +epoch: 1, batch: 30376, sum loss: 4712.836426, avg loss: 2.548857, ppl: 12.792473 +epoch: 1, batch: 30377, sum loss: 3869.716797, avg loss: 2.311659, ppl: 10.091150 +epoch: 1, batch: 30378, sum loss: 3411.478271, avg loss: 2.239973, ppl: 9.393074 +epoch: 1, batch: 30379, sum loss: 4194.483398, avg loss: 2.517697, ppl: 12.400008 +epoch: 1, batch: 30380, sum loss: 4846.340332, avg loss: 2.490411, ppl: 12.066237 +epoch: 1, batch: 30381, sum loss: 3785.569824, avg loss: 2.388372, ppl: 10.895743 +epoch: 1, batch: 30382, sum loss: 5217.582031, avg loss: 2.871537, ppl: 17.664143 +epoch: 1, batch: 30383, sum loss: 3946.022949, avg loss: 2.693531, ppl: 14.783786 +epoch: 1, batch: 30384, sum loss: 3899.274658, avg loss: 2.455463, ppl: 11.651824 +epoch: 1, batch: 30385, sum loss: 3894.647461, avg loss: 2.395232, ppl: 10.970745 +epoch: 1, batch: 30386, sum loss: 4390.321289, avg loss: 2.718465, ppl: 15.157043 +epoch: 1, batch: 30387, sum loss: 4744.653320, avg loss: 2.685146, ppl: 14.660346 +epoch: 1, batch: 30388, sum loss: 4751.393555, avg loss: 2.521971, ppl: 12.453120 +epoch: 1, batch: 30389, sum loss: 4844.541016, avg loss: 2.621505, ppl: 13.756409 +epoch: 1, batch: 30390, sum loss: 4086.337646, avg loss: 2.733336, ppl: 15.384126 +epoch: 1, batch: 30391, sum loss: 3713.736084, avg loss: 2.405270, ppl: 11.081418 +epoch: 1, batch: 30392, sum loss: 4858.039551, avg loss: 2.803254, ppl: 16.498247 +epoch: 1, batch: 30393, sum loss: 4322.901855, avg loss: 2.590115, ppl: 13.331306 +epoch: 1, batch: 30394, sum loss: 4247.248047, avg loss: 2.535671, ppl: 12.624896 +epoch: 1, batch: 30395, sum loss: 5578.765137, avg loss: 2.714727, ppl: 15.100493 +epoch: 1, batch: 30396, sum loss: 3784.431641, avg loss: 2.369713, ppl: 10.694324 +epoch: 1, batch: 30397, sum loss: 4112.910645, avg loss: 2.420783, ppl: 11.254671 +epoch: 1, batch: 30398, sum loss: 4433.799316, avg loss: 2.825876, ppl: 16.875721 +epoch: 1, batch: 30399, sum loss: 5564.647949, avg loss: 2.955203, ppl: 19.205627 +epoch: 1, batch: 30400, sum loss: 4343.129883, avg loss: 2.533915, ppl: 12.602747 +epoch: 1, batch: 30401, sum loss: 4006.511475, avg loss: 2.507204, ppl: 12.270572 +epoch: 1, batch: 30402, sum loss: 5146.494141, avg loss: 2.909268, ppl: 18.343361 +epoch: 1, batch: 30403, sum loss: 3939.386719, avg loss: 2.465198, ppl: 11.765815 +epoch: 1, batch: 30404, sum loss: 5102.959473, avg loss: 2.684355, ppl: 14.648754 +epoch: 1, batch: 30405, sum loss: 5436.471191, avg loss: 2.806645, ppl: 16.554283 +epoch: 1, batch: 30406, sum loss: 5157.524902, avg loss: 2.677843, ppl: 14.553665 +epoch: 1, batch: 30407, sum loss: 5266.376953, avg loss: 2.750066, ppl: 15.643669 +epoch: 1, batch: 30408, sum loss: 4034.320557, avg loss: 2.647192, ppl: 14.114350 +epoch: 1, batch: 30409, sum loss: 4045.754639, avg loss: 2.776770, ppl: 16.067047 +epoch: 1, batch: 30410, sum loss: 3266.968994, avg loss: 2.465637, ppl: 11.770978 +epoch: 1, batch: 30411, sum loss: 4689.325684, avg loss: 2.644854, ppl: 14.081387 +epoch: 1, batch: 30412, sum loss: 4524.925293, avg loss: 2.593080, ppl: 13.370894 +epoch: 1, batch: 30413, sum loss: 4343.326172, avg loss: 2.585313, ppl: 13.267446 +epoch: 1, batch: 30414, sum loss: 5078.894531, avg loss: 2.999938, ppl: 20.084288 +epoch: 1, batch: 30415, sum loss: 4144.031738, avg loss: 2.604671, ppl: 13.526777 +epoch: 1, batch: 30416, sum loss: 3923.915039, avg loss: 2.533192, ppl: 12.593646 +epoch: 1, batch: 30417, sum loss: 4313.370117, avg loss: 2.652749, ppl: 14.193003 +epoch: 1, batch: 30418, sum loss: 4893.381348, avg loss: 2.717036, ppl: 15.135387 +epoch: 1, batch: 30419, sum loss: 3275.486816, avg loss: 2.466481, ppl: 11.780919 +epoch: 1, batch: 30420, sum loss: 4673.500000, avg loss: 2.687464, ppl: 14.694367 +epoch: 1, batch: 30421, sum loss: 5151.948242, avg loss: 2.922262, ppl: 18.583279 +epoch: 1, batch: 30422, sum loss: 4730.740723, avg loss: 2.532516, ppl: 12.585136 +epoch: 1, batch: 30423, sum loss: 4635.351074, avg loss: 2.570910, ppl: 13.077723 +epoch: 1, batch: 30424, sum loss: 4007.020020, avg loss: 2.482664, ppl: 11.973123 +epoch: 1, batch: 30425, sum loss: 4076.136475, avg loss: 2.342607, ppl: 10.408339 +epoch: 1, batch: 30426, sum loss: 4967.566406, avg loss: 2.606278, ppl: 13.548532 +epoch: 1, batch: 30427, sum loss: 4072.544922, avg loss: 2.700627, ppl: 14.889058 +epoch: 1, batch: 30428, sum loss: 3351.103516, avg loss: 2.433626, ppl: 11.400148 +epoch: 1, batch: 30429, sum loss: 5671.577637, avg loss: 2.918980, ppl: 18.522383 +epoch: 1, batch: 30430, sum loss: 4674.198242, avg loss: 2.642283, ppl: 14.045232 +epoch: 1, batch: 30431, sum loss: 4973.116211, avg loss: 2.678038, ppl: 14.556500 +epoch: 1, batch: 30432, sum loss: 4202.393066, avg loss: 2.629783, ppl: 13.870758 +epoch: 1, batch: 30433, sum loss: 3946.506104, avg loss: 2.529812, ppl: 12.551142 +epoch: 1, batch: 30434, sum loss: 5031.125488, avg loss: 2.950807, ppl: 19.121376 +epoch: 1, batch: 30435, sum loss: 3966.790527, avg loss: 2.404115, ppl: 11.068635 +epoch: 1, batch: 30436, sum loss: 3838.157715, avg loss: 2.536786, ppl: 12.638988 +epoch: 1, batch: 30437, sum loss: 4457.090820, avg loss: 2.729388, ppl: 15.323506 +epoch: 1, batch: 30438, sum loss: 4202.651367, avg loss: 2.519575, ppl: 12.423317 +epoch: 1, batch: 30439, sum loss: 4141.197266, avg loss: 2.393756, ppl: 10.954558 +epoch: 1, batch: 30440, sum loss: 4193.689453, avg loss: 2.478540, ppl: 11.923842 +epoch: 1, batch: 30441, sum loss: 4736.260742, avg loss: 2.678881, ppl: 14.568777 +epoch: 1, batch: 30442, sum loss: 4401.193359, avg loss: 2.638605, ppl: 13.993670 +epoch: 1, batch: 30443, sum loss: 4713.274902, avg loss: 2.656863, ppl: 14.251508 +epoch: 1, batch: 30444, sum loss: 4422.235352, avg loss: 2.325045, ppl: 10.227141 +epoch: 1, batch: 30445, sum loss: 4852.613281, avg loss: 2.821287, ppl: 16.798450 +epoch: 1, batch: 30446, sum loss: 4769.616211, avg loss: 2.475151, ppl: 11.883505 +epoch: 1, batch: 30447, sum loss: 4191.805176, avg loss: 2.454218, ppl: 11.637334 +epoch: 1, batch: 30448, sum loss: 3866.961914, avg loss: 2.483598, ppl: 11.984306 +epoch: 1, batch: 30449, sum loss: 4468.661621, avg loss: 2.669451, ppl: 14.432051 +epoch: 1, batch: 30450, sum loss: 3936.733398, avg loss: 2.661753, ppl: 14.321379 +epoch: 1, batch: 30451, sum loss: 3878.498047, avg loss: 2.465669, ppl: 11.771359 +epoch: 1, batch: 30452, sum loss: 4491.332031, avg loss: 2.675004, ppl: 14.512411 +epoch: 1, batch: 30453, sum loss: 3839.107666, avg loss: 2.365439, ppl: 10.648714 +epoch: 1, batch: 30454, sum loss: 5579.079102, avg loss: 2.677101, ppl: 14.542878 +epoch: 1, batch: 30455, sum loss: 5638.573242, avg loss: 2.997647, ppl: 20.038322 +epoch: 1, batch: 30456, sum loss: 4326.729492, avg loss: 2.674122, ppl: 14.499615 +epoch: 1, batch: 30457, sum loss: 5515.897949, avg loss: 2.837396, ppl: 17.071260 +epoch: 1, batch: 30458, sum loss: 4693.523438, avg loss: 2.777233, ppl: 16.074480 +epoch: 1, batch: 30459, sum loss: 4449.456543, avg loss: 2.585390, ppl: 13.268464 +epoch: 1, batch: 30460, sum loss: 4425.431152, avg loss: 2.799134, ppl: 16.430412 +epoch: 1, batch: 30461, sum loss: 3941.455811, avg loss: 2.496172, ppl: 12.135951 +epoch: 1, batch: 30462, sum loss: 3543.574707, avg loss: 2.384640, ppl: 10.855151 +epoch: 1, batch: 30463, sum loss: 3639.110107, avg loss: 2.366131, ppl: 10.656087 +epoch: 1, batch: 30464, sum loss: 5764.824707, avg loss: 2.700152, ppl: 14.881996 +epoch: 1, batch: 30465, sum loss: 4038.288330, avg loss: 2.489697, ppl: 12.057619 +epoch: 1, batch: 30466, sum loss: 4886.154785, avg loss: 2.707011, ppl: 14.984420 +epoch: 1, batch: 30467, sum loss: 4496.525879, avg loss: 2.526138, ppl: 12.505118 +epoch: 1, batch: 30468, sum loss: 5049.720215, avg loss: 2.794532, ppl: 16.354979 +epoch: 1, batch: 30469, sum loss: 4182.540039, avg loss: 2.470490, ppl: 11.828247 +epoch: 1, batch: 30470, sum loss: 5383.174805, avg loss: 2.770548, ppl: 15.967383 +epoch: 1, batch: 30471, sum loss: 4131.077637, avg loss: 2.627912, ppl: 13.844832 +epoch: 1, batch: 30472, sum loss: 3647.297363, avg loss: 2.327567, ppl: 10.252967 +epoch: 1, batch: 30473, sum loss: 4822.226074, avg loss: 2.826627, ppl: 16.888405 +epoch: 1, batch: 30474, sum loss: 3804.572998, avg loss: 2.563728, ppl: 12.984137 +epoch: 1, batch: 30475, sum loss: 4419.596680, avg loss: 2.630713, ppl: 13.883658 +epoch: 1, batch: 30476, sum loss: 3832.824707, avg loss: 2.562049, ppl: 12.962346 +epoch: 1, batch: 30477, sum loss: 4211.342773, avg loss: 2.650310, ppl: 14.158427 +epoch: 1, batch: 30478, sum loss: 4037.161621, avg loss: 2.609671, ppl: 13.594583 +epoch: 1, batch: 30479, sum loss: 3972.563965, avg loss: 2.417872, ppl: 11.221953 +epoch: 1, batch: 30480, sum loss: 4572.220703, avg loss: 2.742784, ppl: 15.530158 +epoch: 1, batch: 30481, sum loss: 4462.799805, avg loss: 2.612880, ppl: 13.638276 +epoch: 1, batch: 30482, sum loss: 5592.827637, avg loss: 2.699241, ppl: 14.868444 +epoch: 1, batch: 30483, sum loss: 4547.478516, avg loss: 2.585263, ppl: 13.266785 +epoch: 1, batch: 30484, sum loss: 5039.236328, avg loss: 2.917913, ppl: 18.502640 +epoch: 1, batch: 30485, sum loss: 4136.348633, avg loss: 2.479825, ppl: 11.939178 +epoch: 1, batch: 30486, sum loss: 4119.467285, avg loss: 2.644074, ppl: 14.070410 +epoch: 1, batch: 30487, sum loss: 3663.262695, avg loss: 2.343738, ppl: 10.420115 +epoch: 1, batch: 30488, sum loss: 4978.125000, avg loss: 2.567367, ppl: 13.031471 +epoch: 1, batch: 30489, sum loss: 5147.821777, avg loss: 2.733840, ppl: 15.391886 +epoch: 1, batch: 30490, sum loss: 3286.993896, avg loss: 2.265330, ppl: 9.634304 +epoch: 1, batch: 30491, sum loss: 5508.399414, avg loss: 2.837918, ppl: 17.080172 +epoch: 1, batch: 30492, sum loss: 4838.151855, avg loss: 2.750513, ppl: 15.650656 +epoch: 1, batch: 30493, sum loss: 4011.279785, avg loss: 2.476099, ppl: 11.894770 +epoch: 1, batch: 30494, sum loss: 3626.019531, avg loss: 2.453329, ppl: 11.626984 +epoch: 1, batch: 30495, sum loss: 4842.109863, avg loss: 2.537793, ppl: 12.651723 +epoch: 1, batch: 30496, sum loss: 3520.523926, avg loss: 2.443112, ppl: 11.508800 +epoch: 1, batch: 30497, sum loss: 4933.202637, avg loss: 2.848269, ppl: 17.257891 +epoch: 1, batch: 30498, sum loss: 5090.790039, avg loss: 2.848791, ppl: 17.266899 +epoch: 1, batch: 30499, sum loss: 4028.567383, avg loss: 2.549726, ppl: 12.803598 +epoch: 1, batch: 30500, sum loss: 3747.467529, avg loss: 2.633498, ppl: 13.922384 +epoch: 1, batch: 30501, sum loss: 3920.878906, avg loss: 2.539429, ppl: 12.672439 +epoch: 1, batch: 30502, sum loss: 5009.697266, avg loss: 2.914309, ppl: 18.436073 +epoch: 1, batch: 30503, sum loss: 4970.432129, avg loss: 2.828931, ppl: 16.927357 +epoch: 1, batch: 30504, sum loss: 3948.935791, avg loss: 2.652072, ppl: 14.183402 +epoch: 1, batch: 30505, sum loss: 5028.805176, avg loss: 2.710946, ppl: 15.043501 +epoch: 1, batch: 30506, sum loss: 4359.060547, avg loss: 2.537288, ppl: 12.645329 +epoch: 1, batch: 30507, sum loss: 3995.260742, avg loss: 2.422838, ppl: 11.277825 +epoch: 1, batch: 30508, sum loss: 5627.083008, avg loss: 3.093504, ppl: 22.054214 +epoch: 1, batch: 30509, sum loss: 4097.322754, avg loss: 2.513695, ppl: 12.350481 +epoch: 1, batch: 30510, sum loss: 3941.438965, avg loss: 2.598180, ppl: 13.439254 +epoch: 1, batch: 30511, sum loss: 4983.732422, avg loss: 2.820448, ppl: 16.784374 +epoch: 1, batch: 30512, sum loss: 4382.457031, avg loss: 2.812874, ppl: 16.657717 +epoch: 1, batch: 30513, sum loss: 4157.041504, avg loss: 2.511808, ppl: 12.327193 +epoch: 1, batch: 30514, sum loss: 4896.297852, avg loss: 2.764708, ppl: 15.874405 +epoch: 1, batch: 30515, sum loss: 5222.980469, avg loss: 2.743162, ppl: 15.536035 +epoch: 1, batch: 30516, sum loss: 3451.058838, avg loss: 2.198127, ppl: 9.008123 +epoch: 1, batch: 30517, sum loss: 4210.738281, avg loss: 2.694010, ppl: 14.790875 +epoch: 1, batch: 30518, sum loss: 4534.051758, avg loss: 2.518918, ppl: 12.415154 +epoch: 1, batch: 30519, sum loss: 3962.249023, avg loss: 2.304973, ppl: 10.023911 +epoch: 1, batch: 30520, sum loss: 4577.583496, avg loss: 2.873562, ppl: 17.699947 +epoch: 1, batch: 30521, sum loss: 4651.953613, avg loss: 2.553213, ppl: 12.848318 +epoch: 1, batch: 30522, sum loss: 4607.283691, avg loss: 2.708574, ppl: 15.007853 +epoch: 1, batch: 30523, sum loss: 3871.614258, avg loss: 2.451941, ppl: 11.610856 +epoch: 1, batch: 30524, sum loss: 4629.860840, avg loss: 2.632098, ppl: 13.902914 +epoch: 1, batch: 30525, sum loss: 4706.743652, avg loss: 2.603287, ppl: 13.508072 +epoch: 1, batch: 30526, sum loss: 5146.468262, avg loss: 2.969687, ppl: 19.485825 +epoch: 1, batch: 30527, sum loss: 3515.900879, avg loss: 2.475987, ppl: 11.893436 +epoch: 1, batch: 30528, sum loss: 4700.467773, avg loss: 2.582675, ppl: 13.232485 +epoch: 1, batch: 30529, sum loss: 4707.754883, avg loss: 2.682481, ppl: 14.621328 +epoch: 1, batch: 30530, sum loss: 3826.126465, avg loss: 2.486112, ppl: 12.014474 +epoch: 1, batch: 30531, sum loss: 3854.077148, avg loss: 2.414835, ppl: 11.187926 +epoch: 1, batch: 30532, sum loss: 4263.362305, avg loss: 2.582291, ppl: 13.227409 +epoch: 1, batch: 30533, sum loss: 4190.977051, avg loss: 2.466732, ppl: 11.783872 +epoch: 1, batch: 30534, sum loss: 3650.355469, avg loss: 2.199009, ppl: 9.016078 +epoch: 1, batch: 30535, sum loss: 3251.905518, avg loss: 2.435884, ppl: 11.425918 +epoch: 1, batch: 30536, sum loss: 5161.419434, avg loss: 2.719399, ppl: 15.171205 +epoch: 1, batch: 30537, sum loss: 4237.862305, avg loss: 2.488469, ppl: 12.042820 +epoch: 1, batch: 30538, sum loss: 5097.865723, avg loss: 2.876899, ppl: 17.759125 +epoch: 1, batch: 30539, sum loss: 4287.946289, avg loss: 2.624202, ppl: 13.793566 +epoch: 1, batch: 30540, sum loss: 5276.680176, avg loss: 2.802273, ppl: 16.482069 +epoch: 1, batch: 30541, sum loss: 3890.287109, avg loss: 2.304672, ppl: 10.020896 +epoch: 1, batch: 30542, sum loss: 4410.666016, avg loss: 2.526155, ppl: 12.505327 +epoch: 1, batch: 30543, sum loss: 4294.011230, avg loss: 2.618299, ppl: 13.712385 +epoch: 1, batch: 30544, sum loss: 5364.772461, avg loss: 2.779675, ppl: 16.113783 +epoch: 1, batch: 30545, sum loss: 4150.330566, avg loss: 2.454365, ppl: 11.639038 +epoch: 1, batch: 30546, sum loss: 4395.989746, avg loss: 2.523530, ppl: 12.472550 +epoch: 1, batch: 30547, sum loss: 3394.944824, avg loss: 2.201650, ppl: 9.039921 +epoch: 1, batch: 30548, sum loss: 4428.578125, avg loss: 2.653432, ppl: 14.202701 +epoch: 1, batch: 30549, sum loss: 4493.083984, avg loss: 2.482367, ppl: 11.969563 +epoch: 1, batch: 30550, sum loss: 4228.035156, avg loss: 2.406394, ppl: 11.093890 +epoch: 1, batch: 30551, sum loss: 3822.387207, avg loss: 2.519701, ppl: 12.424887 +epoch: 1, batch: 30552, sum loss: 4815.747559, avg loss: 2.694878, ppl: 14.803718 +epoch: 1, batch: 30553, sum loss: 4273.335938, avg loss: 2.471565, ppl: 11.840963 +epoch: 1, batch: 30554, sum loss: 4054.334229, avg loss: 2.561171, ppl: 12.950978 +epoch: 1, batch: 30555, sum loss: 3658.961426, avg loss: 2.383688, ppl: 10.844828 +epoch: 1, batch: 30556, sum loss: 3746.261719, avg loss: 2.435801, ppl: 11.424964 +epoch: 1, batch: 30557, sum loss: 5110.061035, avg loss: 2.644959, ppl: 14.082870 +epoch: 1, batch: 30558, sum loss: 4976.006836, avg loss: 2.756790, ppl: 15.749213 +epoch: 1, batch: 30559, sum loss: 5280.945312, avg loss: 2.769243, ppl: 15.946550 +epoch: 1, batch: 30560, sum loss: 5676.213867, avg loss: 2.985909, ppl: 19.804506 +epoch: 1, batch: 30561, sum loss: 3962.464355, avg loss: 2.435442, ppl: 11.420865 +epoch: 1, batch: 30562, sum loss: 4052.328125, avg loss: 2.346455, ppl: 10.448467 +epoch: 1, batch: 30563, sum loss: 4680.492188, avg loss: 2.669990, ppl: 14.439822 +epoch: 1, batch: 30564, sum loss: 4158.022949, avg loss: 2.610184, ppl: 13.601553 +epoch: 1, batch: 30565, sum loss: 4969.956055, avg loss: 2.556562, ppl: 12.891419 +epoch: 1, batch: 30566, sum loss: 4483.999512, avg loss: 2.409457, ppl: 11.127919 +epoch: 1, batch: 30567, sum loss: 4249.291016, avg loss: 2.619785, ppl: 13.732768 +epoch: 1, batch: 30568, sum loss: 4848.641602, avg loss: 2.772236, ppl: 15.994363 +epoch: 1, batch: 30569, sum loss: 4865.492188, avg loss: 2.812423, ppl: 16.650217 +epoch: 1, batch: 30570, sum loss: 3772.004883, avg loss: 2.367862, ppl: 10.674551 +epoch: 1, batch: 30571, sum loss: 5625.908203, avg loss: 2.992504, ppl: 19.935545 +epoch: 1, batch: 30572, sum loss: 3474.421387, avg loss: 2.399462, ppl: 11.017253 +epoch: 1, batch: 30573, sum loss: 4144.966309, avg loss: 2.725159, ppl: 15.258835 +epoch: 1, batch: 30574, sum loss: 4971.998535, avg loss: 2.721400, ppl: 15.201597 +epoch: 1, batch: 30575, sum loss: 4833.549316, avg loss: 2.602880, ppl: 13.502576 +epoch: 1, batch: 30576, sum loss: 4089.613037, avg loss: 2.590002, ppl: 13.329796 +epoch: 1, batch: 30577, sum loss: 4744.161133, avg loss: 2.571361, ppl: 13.083620 +epoch: 1, batch: 30578, sum loss: 4912.318359, avg loss: 2.748919, ppl: 15.625731 +epoch: 1, batch: 30579, sum loss: 5241.989258, avg loss: 2.656862, ppl: 14.251501 +epoch: 1, batch: 30580, sum loss: 4825.927734, avg loss: 2.562893, ppl: 12.973297 +epoch: 1, batch: 30581, sum loss: 5245.558594, avg loss: 2.680408, ppl: 14.591045 +epoch: 1, batch: 30582, sum loss: 4596.343750, avg loss: 2.580766, ppl: 13.207248 +epoch: 1, batch: 30583, sum loss: 3862.473145, avg loss: 2.578420, ppl: 13.176302 +epoch: 1, batch: 30584, sum loss: 4349.206543, avg loss: 2.524206, ppl: 12.480977 +epoch: 1, batch: 30585, sum loss: 4077.947754, avg loss: 2.670562, ppl: 14.448084 +epoch: 1, batch: 30586, sum loss: 4910.698242, avg loss: 2.988861, ppl: 19.863037 +epoch: 1, batch: 30587, sum loss: 5026.689453, avg loss: 2.892226, ppl: 18.033415 +epoch: 1, batch: 30588, sum loss: 4827.660645, avg loss: 2.863381, ppl: 17.520666 +epoch: 1, batch: 30589, sum loss: 3521.711914, avg loss: 2.376324, ppl: 10.765256 +epoch: 1, batch: 30590, sum loss: 4573.335938, avg loss: 2.666668, ppl: 14.391938 +epoch: 1, batch: 30591, sum loss: 4366.373535, avg loss: 2.432520, ppl: 11.387545 +epoch: 1, batch: 30592, sum loss: 5824.003906, avg loss: 2.991271, ppl: 19.910969 +epoch: 1, batch: 30593, sum loss: 4385.542480, avg loss: 2.476309, ppl: 11.897265 +epoch: 1, batch: 30594, sum loss: 5108.273438, avg loss: 2.894206, ppl: 18.069149 +epoch: 1, batch: 30595, sum loss: 3910.318115, avg loss: 2.539168, ppl: 12.669122 +epoch: 1, batch: 30596, sum loss: 4363.086426, avg loss: 2.725226, ppl: 15.259861 +epoch: 1, batch: 30597, sum loss: 4350.265625, avg loss: 2.403462, ppl: 11.061404 +epoch: 1, batch: 30598, sum loss: 4608.187988, avg loss: 2.728353, ppl: 15.307655 +epoch: 1, batch: 30599, sum loss: 3287.074707, avg loss: 2.424096, ppl: 11.292021 +epoch: 1, batch: 30600, sum loss: 4052.917969, avg loss: 2.676960, ppl: 14.540829 +epoch: 1, batch: 30601, sum loss: 4485.798340, avg loss: 2.364680, ppl: 10.640636 +epoch: 1, batch: 30602, sum loss: 5038.487793, avg loss: 2.711780, ppl: 15.056056 +epoch: 1, batch: 30603, sum loss: 4933.386230, avg loss: 2.776244, ppl: 16.058598 +epoch: 1, batch: 30604, sum loss: 4881.700684, avg loss: 2.654541, ppl: 14.218458 +epoch: 1, batch: 30605, sum loss: 3305.921143, avg loss: 2.383505, ppl: 10.842836 +epoch: 1, batch: 30606, sum loss: 3862.043213, avg loss: 2.480439, ppl: 11.946507 +epoch: 1, batch: 30607, sum loss: 4487.823730, avg loss: 2.508565, ppl: 12.287291 +epoch: 1, batch: 30608, sum loss: 3583.248047, avg loss: 2.440905, ppl: 11.483424 +epoch: 1, batch: 30609, sum loss: 4008.140137, avg loss: 2.468066, ppl: 11.799610 +epoch: 1, batch: 30610, sum loss: 4644.289062, avg loss: 2.766104, ppl: 15.896584 +epoch: 1, batch: 30611, sum loss: 3755.185303, avg loss: 2.737016, ppl: 15.440837 +epoch: 1, batch: 30612, sum loss: 4553.149414, avg loss: 2.592910, ppl: 13.368615 +epoch: 1, batch: 30613, sum loss: 5199.339355, avg loss: 2.669065, ppl: 14.426481 +epoch: 1, batch: 30614, sum loss: 3826.567383, avg loss: 2.061728, ppl: 7.859540 +epoch: 1, batch: 30615, sum loss: 3420.230469, avg loss: 2.415417, ppl: 11.194437 +epoch: 1, batch: 30616, sum loss: 4234.451172, avg loss: 2.372242, ppl: 10.721400 +epoch: 1, batch: 30617, sum loss: 3454.450439, avg loss: 2.260766, ppl: 9.590433 +epoch: 1, batch: 30618, sum loss: 4734.975098, avg loss: 2.584594, ppl: 13.257899 +epoch: 1, batch: 30619, sum loss: 4297.373047, avg loss: 2.502838, ppl: 12.217118 +epoch: 1, batch: 30620, sum loss: 5274.453125, avg loss: 2.804069, ppl: 16.511688 +epoch: 1, batch: 30621, sum loss: 3831.211426, avg loss: 2.484573, ppl: 11.995996 +epoch: 1, batch: 30622, sum loss: 3925.324707, avg loss: 2.648667, ppl: 14.135189 +epoch: 1, batch: 30623, sum loss: 4821.618164, avg loss: 2.714875, ppl: 15.102725 +epoch: 1, batch: 30624, sum loss: 4704.059082, avg loss: 2.845771, ppl: 17.214819 +epoch: 1, batch: 30625, sum loss: 5136.907227, avg loss: 2.824028, ppl: 16.844568 +epoch: 1, batch: 30626, sum loss: 4825.684082, avg loss: 2.828654, ppl: 16.922668 +epoch: 1, batch: 30627, sum loss: 4908.235352, avg loss: 2.717738, ppl: 15.146029 +epoch: 1, batch: 30628, sum loss: 4637.316406, avg loss: 2.643852, ppl: 14.067286 +epoch: 1, batch: 30629, sum loss: 4738.637695, avg loss: 2.802270, ppl: 16.482014 +epoch: 1, batch: 30630, sum loss: 5160.986328, avg loss: 2.739377, ppl: 15.477344 +epoch: 1, batch: 30631, sum loss: 4272.270020, avg loss: 2.555186, ppl: 12.873689 +epoch: 1, batch: 30632, sum loss: 3216.697021, avg loss: 2.307530, ppl: 10.049573 +epoch: 1, batch: 30633, sum loss: 3877.529541, avg loss: 2.463488, ppl: 11.745705 +epoch: 1, batch: 30634, sum loss: 4101.380371, avg loss: 2.528594, ppl: 12.535874 +epoch: 1, batch: 30635, sum loss: 4317.456055, avg loss: 2.511609, ppl: 12.324749 +epoch: 1, batch: 30636, sum loss: 4674.387695, avg loss: 2.703521, ppl: 14.932216 +epoch: 1, batch: 30637, sum loss: 4170.573730, avg loss: 2.582399, ppl: 13.228831 +epoch: 1, batch: 30638, sum loss: 4011.435059, avg loss: 2.447489, ppl: 11.559288 +epoch: 1, batch: 30639, sum loss: 4114.006348, avg loss: 2.600510, ppl: 13.470601 +epoch: 1, batch: 30640, sum loss: 4016.900391, avg loss: 2.520013, ppl: 12.428757 +epoch: 1, batch: 30641, sum loss: 4445.121094, avg loss: 2.730418, ppl: 15.339301 +epoch: 1, batch: 30642, sum loss: 4190.555176, avg loss: 2.670845, ppl: 14.452169 +epoch: 1, batch: 30643, sum loss: 4263.546875, avg loss: 2.607674, ppl: 13.567454 +epoch: 1, batch: 30644, sum loss: 4223.744141, avg loss: 2.552111, ppl: 12.834173 +epoch: 1, batch: 30645, sum loss: 4501.964355, avg loss: 2.705508, ppl: 14.961908 +epoch: 1, batch: 30646, sum loss: 4592.215820, avg loss: 2.688651, ppl: 14.711814 +epoch: 1, batch: 30647, sum loss: 5637.765137, avg loss: 2.886720, ppl: 17.934395 +epoch: 1, batch: 30648, sum loss: 3813.392822, avg loss: 2.492414, ppl: 12.090425 +epoch: 1, batch: 30649, sum loss: 4326.467773, avg loss: 2.527142, ppl: 12.517683 +epoch: 1, batch: 30650, sum loss: 5030.397949, avg loss: 2.988947, ppl: 19.864752 +epoch: 1, batch: 30651, sum loss: 4195.336914, avg loss: 2.379658, ppl: 10.801208 +epoch: 1, batch: 30652, sum loss: 3431.100586, avg loss: 2.603263, ppl: 13.507741 +epoch: 1, batch: 30653, sum loss: 4490.188477, avg loss: 2.570228, ppl: 13.068805 +epoch: 1, batch: 30654, sum loss: 3989.618164, avg loss: 2.487293, ppl: 12.028673 +epoch: 1, batch: 30655, sum loss: 5446.738281, avg loss: 3.059965, ppl: 21.326818 +epoch: 1, batch: 30656, sum loss: 5344.714844, avg loss: 2.660386, ppl: 14.301803 +epoch: 1, batch: 30657, sum loss: 4282.031738, avg loss: 2.606227, ppl: 13.547844 +epoch: 1, batch: 30658, sum loss: 3760.259766, avg loss: 2.416619, ppl: 11.207905 +epoch: 1, batch: 30659, sum loss: 4218.563477, avg loss: 2.688696, ppl: 14.712474 +epoch: 1, batch: 30660, sum loss: 3599.947021, avg loss: 2.282782, ppl: 9.803915 +epoch: 1, batch: 30661, sum loss: 4741.011719, avg loss: 2.720030, ppl: 15.180772 +epoch: 1, batch: 30662, sum loss: 3759.490479, avg loss: 2.354095, ppl: 10.528601 +epoch: 1, batch: 30663, sum loss: 4648.799805, avg loss: 2.446737, ppl: 11.550593 +epoch: 1, batch: 30664, sum loss: 4200.038086, avg loss: 2.732621, ppl: 15.373127 +epoch: 1, batch: 30665, sum loss: 4179.287598, avg loss: 2.553016, ppl: 12.845794 +epoch: 1, batch: 30666, sum loss: 4888.842773, avg loss: 2.743459, ppl: 15.540640 +epoch: 1, batch: 30667, sum loss: 3481.429199, avg loss: 2.598082, ppl: 13.437934 +epoch: 1, batch: 30668, sum loss: 5046.282227, avg loss: 2.827049, ppl: 16.895529 +epoch: 1, batch: 30669, sum loss: 4674.289062, avg loss: 2.617183, ppl: 13.697087 +epoch: 1, batch: 30670, sum loss: 4411.598145, avg loss: 2.675317, ppl: 14.516955 +epoch: 1, batch: 30671, sum loss: 4276.469238, avg loss: 2.506723, ppl: 12.264672 +epoch: 1, batch: 30672, sum loss: 4527.338379, avg loss: 2.674152, ppl: 14.500044 +epoch: 1, batch: 30673, sum loss: 3258.409912, avg loss: 2.142282, ppl: 8.518851 +epoch: 1, batch: 30674, sum loss: 4485.361328, avg loss: 2.552852, ppl: 12.843684 +epoch: 1, batch: 30675, sum loss: 3825.772949, avg loss: 2.418314, ppl: 11.226914 +epoch: 1, batch: 30676, sum loss: 3528.975830, avg loss: 2.386055, ppl: 10.870527 +epoch: 1, batch: 30677, sum loss: 4715.535156, avg loss: 2.650666, ppl: 14.163472 +epoch: 1, batch: 30678, sum loss: 3822.484863, avg loss: 2.419294, ppl: 11.237924 +epoch: 1, batch: 30679, sum loss: 5598.121582, avg loss: 2.882658, ppl: 17.861687 +epoch: 1, batch: 30680, sum loss: 3798.595215, avg loss: 2.612514, ppl: 13.633282 +epoch: 1, batch: 30681, sum loss: 4402.587402, avg loss: 2.655360, ppl: 14.230115 +epoch: 1, batch: 30682, sum loss: 4770.378906, avg loss: 2.741597, ppl: 15.511741 +epoch: 1, batch: 30683, sum loss: 3799.118652, avg loss: 2.627330, ppl: 13.836771 +epoch: 1, batch: 30684, sum loss: 3897.416992, avg loss: 2.548997, ppl: 12.794270 +epoch: 1, batch: 30685, sum loss: 4202.352051, avg loss: 2.537652, ppl: 12.649938 +epoch: 1, batch: 30686, sum loss: 4331.940918, avg loss: 2.773330, ppl: 16.011860 +epoch: 1, batch: 30687, sum loss: 4605.850586, avg loss: 2.440832, ppl: 11.482594 +epoch: 1, batch: 30688, sum loss: 3772.973633, avg loss: 2.493704, ppl: 12.106032 +epoch: 1, batch: 30689, sum loss: 4321.454102, avg loss: 2.561621, ppl: 12.956800 +epoch: 1, batch: 30690, sum loss: 4582.805664, avg loss: 2.722998, ppl: 15.225900 +epoch: 1, batch: 30691, sum loss: 5273.374023, avg loss: 2.753720, ppl: 15.700932 +epoch: 1, batch: 30692, sum loss: 3737.918457, avg loss: 2.508670, ppl: 12.288576 +epoch: 1, batch: 30693, sum loss: 4980.901855, avg loss: 2.552999, ppl: 12.845576 +epoch: 1, batch: 30694, sum loss: 4572.071777, avg loss: 2.600723, ppl: 13.473479 +epoch: 1, batch: 30695, sum loss: 5297.254395, avg loss: 2.828219, ppl: 16.915310 +epoch: 1, batch: 30696, sum loss: 3386.852295, avg loss: 2.297729, ppl: 9.951554 +epoch: 1, batch: 30697, sum loss: 4457.020020, avg loss: 2.523794, ppl: 12.475840 +epoch: 1, batch: 30698, sum loss: 4249.501465, avg loss: 2.573896, ppl: 13.116824 +epoch: 1, batch: 30699, sum loss: 5078.982910, avg loss: 2.923997, ppl: 18.615549 +epoch: 1, batch: 30700, sum loss: 4260.150391, avg loss: 2.430206, ppl: 11.361217 +epoch: 1, batch: 30701, sum loss: 4231.743164, avg loss: 2.377384, ppl: 10.776671 +epoch: 1, batch: 30702, sum loss: 4626.938965, avg loss: 2.752492, ppl: 15.681662 +epoch: 1, batch: 30703, sum loss: 4739.407715, avg loss: 2.844782, ppl: 17.197815 +epoch: 1, batch: 30704, sum loss: 5826.206055, avg loss: 2.923335, ppl: 18.603218 +epoch: 1, batch: 30705, sum loss: 3942.178223, avg loss: 2.536794, ppl: 12.639088 +epoch: 1, batch: 30706, sum loss: 4683.090820, avg loss: 2.632429, ppl: 13.907505 +epoch: 1, batch: 30707, sum loss: 5107.464844, avg loss: 2.826488, ppl: 16.886061 +epoch: 1, batch: 30708, sum loss: 4545.319336, avg loss: 2.681604, ppl: 14.608512 +epoch: 1, batch: 30709, sum loss: 4126.631836, avg loss: 2.457791, ppl: 11.678988 +epoch: 1, batch: 30710, sum loss: 4107.941895, avg loss: 2.468715, ppl: 11.807267 +epoch: 1, batch: 30711, sum loss: 4128.537598, avg loss: 2.448718, ppl: 11.573495 +epoch: 1, batch: 30712, sum loss: 5800.567383, avg loss: 2.755614, ppl: 15.730698 +epoch: 1, batch: 30713, sum loss: 4681.621582, avg loss: 2.682878, ppl: 14.627123 +epoch: 1, batch: 30714, sum loss: 4699.579590, avg loss: 2.729140, ppl: 15.319711 +epoch: 1, batch: 30715, sum loss: 4970.641602, avg loss: 2.723639, ppl: 15.235668 +epoch: 1, batch: 30716, sum loss: 4250.201172, avg loss: 2.439840, ppl: 11.471204 +epoch: 1, batch: 30717, sum loss: 4452.193359, avg loss: 2.675597, ppl: 14.521015 +epoch: 1, batch: 30718, sum loss: 4159.463379, avg loss: 2.747334, ppl: 15.600985 +epoch: 1, batch: 30719, sum loss: 4558.995605, avg loss: 2.510460, ppl: 12.310594 +epoch: 1, batch: 30720, sum loss: 4564.674805, avg loss: 2.554379, ppl: 12.863306 +epoch: 1, batch: 30721, sum loss: 4999.006836, avg loss: 2.949267, ppl: 19.091948 +epoch: 1, batch: 30722, sum loss: 4366.116699, avg loss: 2.470921, ppl: 11.833335 +epoch: 1, batch: 30723, sum loss: 3517.637207, avg loss: 2.368779, ppl: 10.684340 +epoch: 1, batch: 30724, sum loss: 4479.027344, avg loss: 2.669265, ppl: 14.429363 +epoch: 1, batch: 30725, sum loss: 5592.295898, avg loss: 2.896062, ppl: 18.102718 +epoch: 1, batch: 30726, sum loss: 4646.957031, avg loss: 2.719109, ppl: 15.166800 +epoch: 1, batch: 30727, sum loss: 5559.640137, avg loss: 2.792386, ppl: 16.319910 +epoch: 1, batch: 30728, sum loss: 5184.031250, avg loss: 2.742874, ppl: 15.531553 +epoch: 1, batch: 30729, sum loss: 3899.097168, avg loss: 2.453806, ppl: 11.632532 +epoch: 1, batch: 30730, sum loss: 4993.765137, avg loss: 2.789813, ppl: 16.277973 +epoch: 1, batch: 30731, sum loss: 4520.501953, avg loss: 2.623623, ppl: 13.785574 +epoch: 1, batch: 30732, sum loss: 5664.570312, avg loss: 3.003484, ppl: 20.155632 +epoch: 1, batch: 30733, sum loss: 4242.803711, avg loss: 2.769454, ppl: 15.949926 +epoch: 1, batch: 30734, sum loss: 3627.357910, avg loss: 2.423085, ppl: 11.280603 +epoch: 1, batch: 30735, sum loss: 5067.453125, avg loss: 2.662876, ppl: 14.337466 +epoch: 1, batch: 30736, sum loss: 4701.785156, avg loss: 2.622301, ppl: 13.767364 +epoch: 1, batch: 30737, sum loss: 4675.848145, avg loss: 2.859846, ppl: 17.458836 +epoch: 1, batch: 30738, sum loss: 3870.169922, avg loss: 2.311930, ppl: 10.093884 +epoch: 1, batch: 30739, sum loss: 3742.515869, avg loss: 2.501682, ppl: 12.203000 +epoch: 1, batch: 30740, sum loss: 4254.900879, avg loss: 2.654336, ppl: 14.215547 +epoch: 1, batch: 30741, sum loss: 3850.947998, avg loss: 2.623261, ppl: 13.780595 +epoch: 1, batch: 30742, sum loss: 4154.256836, avg loss: 2.430812, ppl: 11.368105 +epoch: 1, batch: 30743, sum loss: 3605.703125, avg loss: 2.490127, ppl: 12.062803 +epoch: 1, batch: 30744, sum loss: 4528.713867, avg loss: 2.749674, ppl: 15.637538 +epoch: 1, batch: 30745, sum loss: 3900.108643, avg loss: 2.562490, ppl: 12.968061 +epoch: 1, batch: 30746, sum loss: 4940.276855, avg loss: 2.657491, ppl: 14.260471 +epoch: 1, batch: 30747, sum loss: 4824.057129, avg loss: 2.782040, ppl: 16.151936 +epoch: 1, batch: 30748, sum loss: 4175.294434, avg loss: 2.802211, ppl: 16.481047 +epoch: 1, batch: 30749, sum loss: 4929.314453, avg loss: 2.686275, ppl: 14.676903 +epoch: 1, batch: 30750, sum loss: 3863.093994, avg loss: 2.375827, ppl: 10.759904 +epoch: 1, batch: 30751, sum loss: 4366.925781, avg loss: 2.643418, ppl: 14.061177 +epoch: 1, batch: 30752, sum loss: 4319.752930, avg loss: 2.618032, ppl: 13.708718 +epoch: 1, batch: 30753, sum loss: 4018.569336, avg loss: 2.559598, ppl: 12.930624 +epoch: 1, batch: 30754, sum loss: 5039.167480, avg loss: 2.805773, ppl: 16.539848 +epoch: 1, batch: 30755, sum loss: 3897.710693, avg loss: 2.617670, ppl: 13.703757 +epoch: 1, batch: 30756, sum loss: 3783.171143, avg loss: 2.458201, ppl: 11.683773 +epoch: 1, batch: 30757, sum loss: 4342.979492, avg loss: 2.592824, ppl: 13.367465 +epoch: 1, batch: 30758, sum loss: 4553.090820, avg loss: 2.744479, ppl: 15.556510 +epoch: 1, batch: 30759, sum loss: 4689.827148, avg loss: 2.572588, ppl: 13.099679 +epoch: 1, batch: 30760, sum loss: 4228.641113, avg loss: 2.657851, ppl: 14.265602 +epoch: 1, batch: 30761, sum loss: 3635.585449, avg loss: 2.347053, ppl: 10.454718 +epoch: 1, batch: 30762, sum loss: 3707.644775, avg loss: 2.457021, ppl: 11.669997 +epoch: 1, batch: 30763, sum loss: 5077.892578, avg loss: 2.675391, ppl: 14.518032 +epoch: 1, batch: 30764, sum loss: 4108.713867, avg loss: 2.603748, ppl: 13.514290 +epoch: 1, batch: 30765, sum loss: 4050.902832, avg loss: 2.527076, ppl: 12.516853 +epoch: 1, batch: 30766, sum loss: 4761.150391, avg loss: 2.733152, ppl: 15.381288 +epoch: 1, batch: 30767, sum loss: 4498.265625, avg loss: 2.418422, ppl: 11.228132 +epoch: 1, batch: 30768, sum loss: 3983.542969, avg loss: 2.299967, ppl: 9.973854 +epoch: 1, batch: 30769, sum loss: 5159.627441, avg loss: 2.660974, ppl: 14.310214 +epoch: 1, batch: 30770, sum loss: 5110.631836, avg loss: 2.701180, ppl: 14.897293 +epoch: 1, batch: 30771, sum loss: 4780.851562, avg loss: 2.619645, ppl: 13.730844 +epoch: 1, batch: 30772, sum loss: 3966.805176, avg loss: 2.674852, ppl: 14.510201 +epoch: 1, batch: 30773, sum loss: 5062.652344, avg loss: 2.936574, ppl: 18.851143 +epoch: 1, batch: 30774, sum loss: 5075.383789, avg loss: 2.810290, ppl: 16.614737 +epoch: 1, batch: 30775, sum loss: 3811.716797, avg loss: 2.519311, ppl: 12.420033 +epoch: 1, batch: 30776, sum loss: 3802.683105, avg loss: 2.429829, ppl: 11.356944 +epoch: 1, batch: 30777, sum loss: 4040.385254, avg loss: 2.557206, ppl: 12.899724 +epoch: 1, batch: 30778, sum loss: 4147.593262, avg loss: 2.479135, ppl: 11.930943 +epoch: 1, batch: 30779, sum loss: 4698.792480, avg loss: 2.515414, ppl: 12.371724 +epoch: 1, batch: 30780, sum loss: 4448.088867, avg loss: 2.951618, ppl: 19.136900 +epoch: 1, batch: 30781, sum loss: 3857.266846, avg loss: 2.467861, ppl: 11.797188 +epoch: 1, batch: 30782, sum loss: 4176.372070, avg loss: 2.668608, ppl: 14.419886 +epoch: 1, batch: 30783, sum loss: 4477.381836, avg loss: 2.730111, ppl: 15.334587 +epoch: 1, batch: 30784, sum loss: 4183.138184, avg loss: 2.593390, ppl: 13.375039 +epoch: 1, batch: 30785, sum loss: 4665.802246, avg loss: 2.676880, ppl: 14.539660 +epoch: 1, batch: 30786, sum loss: 4298.571289, avg loss: 2.746691, ppl: 15.590956 +epoch: 1, batch: 30787, sum loss: 3260.495117, avg loss: 2.215010, ppl: 9.161505 +epoch: 1, batch: 30788, sum loss: 3847.266113, avg loss: 2.546172, ppl: 12.758174 +epoch: 1, batch: 30789, sum loss: 4494.053223, avg loss: 2.943060, ppl: 18.973825 +epoch: 1, batch: 30790, sum loss: 5131.306152, avg loss: 2.908904, ppl: 18.336687 +epoch: 1, batch: 30791, sum loss: 4116.000977, avg loss: 2.718627, ppl: 15.159493 +epoch: 1, batch: 30792, sum loss: 4501.366211, avg loss: 2.543145, ppl: 12.719607 +epoch: 1, batch: 30793, sum loss: 3710.540527, avg loss: 2.681026, ppl: 14.600068 +epoch: 1, batch: 30794, sum loss: 4939.647461, avg loss: 2.614954, ppl: 13.666584 +epoch: 1, batch: 30795, sum loss: 4397.114746, avg loss: 2.639324, ppl: 14.003740 +epoch: 1, batch: 30796, sum loss: 2929.897705, avg loss: 2.154337, ppl: 8.622169 +epoch: 1, batch: 30797, sum loss: 4646.530273, avg loss: 2.859403, ppl: 17.451107 +epoch: 1, batch: 30798, sum loss: 4185.392090, avg loss: 2.457658, ppl: 11.677434 +epoch: 1, batch: 30799, sum loss: 4868.514160, avg loss: 2.616074, ppl: 13.681908 +epoch: 1, batch: 30800, sum loss: 4455.195801, avg loss: 2.623790, ppl: 13.787885 +epoch: 1, batch: 30801, sum loss: 4348.577637, avg loss: 2.716164, ppl: 15.122196 +epoch: 1, batch: 30802, sum loss: 4912.276855, avg loss: 2.955642, ppl: 19.214054 +epoch: 1, batch: 30803, sum loss: 4536.559570, avg loss: 2.663863, ppl: 14.351628 +epoch: 1, batch: 30804, sum loss: 3335.072021, avg loss: 2.292146, ppl: 9.896150 +epoch: 1, batch: 30805, sum loss: 4140.743164, avg loss: 2.704601, ppl: 14.948345 +epoch: 1, batch: 30806, sum loss: 4320.913574, avg loss: 2.588924, ppl: 13.315433 +epoch: 1, batch: 30807, sum loss: 5596.595215, avg loss: 2.904305, ppl: 18.252548 +epoch: 1, batch: 30808, sum loss: 5531.444336, avg loss: 2.627764, ppl: 13.842790 +epoch: 1, batch: 30809, sum loss: 4289.653320, avg loss: 2.709825, ppl: 15.026647 +epoch: 1, batch: 30810, sum loss: 5124.111816, avg loss: 2.760836, ppl: 15.813059 +epoch: 1, batch: 30811, sum loss: 4128.917480, avg loss: 2.604995, ppl: 13.531161 +epoch: 1, batch: 30812, sum loss: 4587.645020, avg loss: 2.567233, ppl: 13.029719 +epoch: 1, batch: 30813, sum loss: 4588.498047, avg loss: 2.742677, ppl: 15.528495 +epoch: 1, batch: 30814, sum loss: 4666.925293, avg loss: 2.792894, ppl: 16.328203 +epoch: 1, batch: 30815, sum loss: 3901.060547, avg loss: 2.486336, ppl: 12.017159 +epoch: 1, batch: 30816, sum loss: 4270.466797, avg loss: 2.474199, ppl: 11.872189 +epoch: 1, batch: 30817, sum loss: 5246.741211, avg loss: 2.754195, ppl: 15.708386 +epoch: 1, batch: 30818, sum loss: 4837.182129, avg loss: 2.539203, ppl: 12.669572 +epoch: 1, batch: 30819, sum loss: 4583.091797, avg loss: 2.732911, ppl: 15.377584 +epoch: 1, batch: 30820, sum loss: 4589.736328, avg loss: 2.796914, ppl: 16.393978 +epoch: 1, batch: 30821, sum loss: 4532.745117, avg loss: 2.562321, ppl: 12.965873 +epoch: 1, batch: 30822, sum loss: 4439.901855, avg loss: 2.587355, ppl: 13.294566 +epoch: 1, batch: 30823, sum loss: 3908.991211, avg loss: 2.469356, ppl: 11.814839 +epoch: 1, batch: 30824, sum loss: 4933.688477, avg loss: 2.787395, ppl: 16.238659 +epoch: 1, batch: 30825, sum loss: 3879.745117, avg loss: 2.475906, ppl: 11.892481 +epoch: 1, batch: 30826, sum loss: 4551.047852, avg loss: 2.511616, ppl: 12.324828 +epoch: 1, batch: 30827, sum loss: 5174.479492, avg loss: 2.714837, ppl: 15.102146 +epoch: 1, batch: 30828, sum loss: 5592.355469, avg loss: 2.783651, ppl: 16.177984 +epoch: 1, batch: 30829, sum loss: 4954.183594, avg loss: 2.561625, ppl: 12.956861 +epoch: 1, batch: 30830, sum loss: 4608.896973, avg loss: 2.741759, ppl: 15.514252 +epoch: 1, batch: 30831, sum loss: 3766.842773, avg loss: 2.221016, ppl: 9.216690 +epoch: 1, batch: 30832, sum loss: 4371.216797, avg loss: 2.469614, ppl: 11.817884 +epoch: 1, batch: 30833, sum loss: 4194.618652, avg loss: 2.376554, ppl: 10.767738 +epoch: 1, batch: 30834, sum loss: 4370.185547, avg loss: 2.555664, ppl: 12.879849 +epoch: 1, batch: 30835, sum loss: 5191.071289, avg loss: 2.880728, ppl: 17.827242 +epoch: 1, batch: 30836, sum loss: 3963.253174, avg loss: 2.590362, ppl: 13.334593 +epoch: 1, batch: 30837, sum loss: 4371.447754, avg loss: 2.676943, ppl: 14.540572 +epoch: 1, batch: 30838, sum loss: 4860.185547, avg loss: 2.671900, ppl: 14.467428 +epoch: 1, batch: 30839, sum loss: 4079.719727, avg loss: 2.475558, ppl: 11.888339 +epoch: 1, batch: 30840, sum loss: 4526.920898, avg loss: 2.523367, ppl: 12.470519 +epoch: 1, batch: 30841, sum loss: 4826.083496, avg loss: 2.673730, ppl: 14.493937 +epoch: 1, batch: 30842, sum loss: 4635.055664, avg loss: 2.734546, ppl: 15.402752 +epoch: 1, batch: 30843, sum loss: 5242.482422, avg loss: 2.765023, ppl: 15.879397 +epoch: 1, batch: 30844, sum loss: 3734.825439, avg loss: 2.340116, ppl: 10.382443 +epoch: 1, batch: 30845, sum loss: 4648.454590, avg loss: 2.530460, ppl: 12.559280 +epoch: 1, batch: 30846, sum loss: 4833.863770, avg loss: 2.692961, ppl: 14.775353 +epoch: 1, batch: 30847, sum loss: 5834.606445, avg loss: 2.889850, ppl: 17.990604 +epoch: 1, batch: 30848, sum loss: 3697.099609, avg loss: 2.339936, ppl: 10.380577 +epoch: 1, batch: 30849, sum loss: 3890.428223, avg loss: 2.389698, ppl: 10.910196 +epoch: 1, batch: 30850, sum loss: 4169.273438, avg loss: 2.748367, ppl: 15.617113 +epoch: 1, batch: 30851, sum loss: 4195.033203, avg loss: 2.645040, ppl: 14.084005 +epoch: 1, batch: 30852, sum loss: 3319.056396, avg loss: 2.475061, ppl: 11.882428 +epoch: 1, batch: 30853, sum loss: 4916.715820, avg loss: 2.611108, ppl: 13.614121 +epoch: 1, batch: 30854, sum loss: 4100.789551, avg loss: 2.536048, ppl: 12.629662 +epoch: 1, batch: 30855, sum loss: 4551.140625, avg loss: 2.548231, ppl: 12.784470 +epoch: 1, batch: 30856, sum loss: 4415.118652, avg loss: 2.839305, ppl: 17.103868 +epoch: 1, batch: 30857, sum loss: 3905.051514, avg loss: 2.448308, ppl: 11.568758 +epoch: 1, batch: 30858, sum loss: 5290.723633, avg loss: 2.870713, ppl: 17.649593 +epoch: 1, batch: 30859, sum loss: 4381.922363, avg loss: 2.589789, ppl: 13.326955 +epoch: 1, batch: 30860, sum loss: 4130.640625, avg loss: 2.728297, ppl: 15.306790 +epoch: 1, batch: 30861, sum loss: 3889.277832, avg loss: 2.497931, ppl: 12.157309 +epoch: 1, batch: 30862, sum loss: 4528.081543, avg loss: 2.603842, ppl: 13.515569 +epoch: 1, batch: 30863, sum loss: 3601.558838, avg loss: 2.445050, ppl: 11.531129 +epoch: 1, batch: 30864, sum loss: 3853.739014, avg loss: 2.464027, ppl: 11.752047 +epoch: 1, batch: 30865, sum loss: 4310.559570, avg loss: 2.711044, ppl: 15.044969 +epoch: 1, batch: 30866, sum loss: 4518.777344, avg loss: 2.573336, ppl: 13.109480 +epoch: 1, batch: 30867, sum loss: 3951.708740, avg loss: 2.408110, ppl: 11.112940 +epoch: 1, batch: 30868, sum loss: 4406.636719, avg loss: 2.605936, ppl: 13.543891 +epoch: 1, batch: 30869, sum loss: 3931.610596, avg loss: 2.341638, ppl: 10.398256 +epoch: 1, batch: 30870, sum loss: 4329.227051, avg loss: 2.733098, ppl: 15.380459 +epoch: 1, batch: 30871, sum loss: 4556.064453, avg loss: 2.609430, ppl: 13.591299 +epoch: 1, batch: 30872, sum loss: 3661.701660, avg loss: 2.454224, ppl: 11.637395 +epoch: 1, batch: 30873, sum loss: 4266.853516, avg loss: 2.498158, ppl: 12.160071 +epoch: 1, batch: 30874, sum loss: 3751.566650, avg loss: 2.509409, ppl: 12.297659 +epoch: 1, batch: 30875, sum loss: 4213.281738, avg loss: 2.565945, ppl: 13.012952 +epoch: 1, batch: 30876, sum loss: 4715.705078, avg loss: 2.757722, ppl: 15.763894 +epoch: 1, batch: 30877, sum loss: 3911.469971, avg loss: 2.333813, ppl: 10.317204 +epoch: 1, batch: 30878, sum loss: 3800.566406, avg loss: 2.402381, ppl: 11.049451 +epoch: 1, batch: 30879, sum loss: 4671.785645, avg loss: 2.621653, ppl: 13.758449 +epoch: 1, batch: 30880, sum loss: 4029.950439, avg loss: 2.398780, ppl: 11.009737 +epoch: 1, batch: 30881, sum loss: 4567.774414, avg loss: 2.748360, ppl: 15.617002 +epoch: 1, batch: 30882, sum loss: 4743.318848, avg loss: 2.577891, ppl: 13.169330 +epoch: 1, batch: 30883, sum loss: 4429.579102, avg loss: 2.761583, ppl: 15.824875 +epoch: 1, batch: 30884, sum loss: 4907.839844, avg loss: 2.640043, ppl: 14.013803 +epoch: 1, batch: 30885, sum loss: 3677.427734, avg loss: 2.261641, ppl: 9.598824 +epoch: 1, batch: 30886, sum loss: 5466.648438, avg loss: 2.769325, ppl: 15.947869 +epoch: 1, batch: 30887, sum loss: 4740.062988, avg loss: 2.684067, ppl: 14.644535 +epoch: 1, batch: 30888, sum loss: 5214.936523, avg loss: 2.791722, ppl: 16.309076 +epoch: 1, batch: 30889, sum loss: 4541.086426, avg loss: 2.730659, ppl: 15.342999 +epoch: 1, batch: 30890, sum loss: 3567.660889, avg loss: 2.221457, ppl: 9.220760 +epoch: 1, batch: 30891, sum loss: 4419.007324, avg loss: 2.528036, ppl: 12.528880 +epoch: 1, batch: 30892, sum loss: 4457.260742, avg loss: 2.688336, ppl: 14.707181 +epoch: 1, batch: 30893, sum loss: 4279.443359, avg loss: 2.672982, ppl: 14.483087 +epoch: 1, batch: 30894, sum loss: 4476.512207, avg loss: 2.546366, ppl: 12.760653 +epoch: 1, batch: 30895, sum loss: 4260.166992, avg loss: 2.571012, ppl: 13.079054 +epoch: 1, batch: 30896, sum loss: 4032.461182, avg loss: 2.599910, ppl: 13.462533 +epoch: 1, batch: 30897, sum loss: 3672.333008, avg loss: 2.420786, ppl: 11.254706 +epoch: 1, batch: 30898, sum loss: 4166.114258, avg loss: 2.677451, ppl: 14.547969 +epoch: 1, batch: 30899, sum loss: 4505.767090, avg loss: 2.574724, ppl: 13.127693 +epoch: 1, batch: 30900, sum loss: 4173.635254, avg loss: 2.462322, ppl: 11.732018 +epoch: 1, batch: 30901, sum loss: 3902.463867, avg loss: 2.219831, ppl: 9.205779 +epoch: 1, batch: 30902, sum loss: 4157.213379, avg loss: 2.478958, ppl: 11.928832 +epoch: 1, batch: 30903, sum loss: 4587.668945, avg loss: 2.600719, ppl: 13.473428 +epoch: 1, batch: 30904, sum loss: 4207.495117, avg loss: 2.373094, ppl: 10.730539 +epoch: 1, batch: 30905, sum loss: 4448.846191, avg loss: 2.921107, ppl: 18.561829 +epoch: 1, batch: 30906, sum loss: 4474.110352, avg loss: 2.766921, ppl: 15.909566 +epoch: 1, batch: 30907, sum loss: 4586.884766, avg loss: 2.893934, ppl: 18.064226 +epoch: 1, batch: 30908, sum loss: 4583.655273, avg loss: 2.570755, ppl: 13.075686 +epoch: 1, batch: 30909, sum loss: 3954.322998, avg loss: 2.596404, ppl: 13.415407 +epoch: 1, batch: 30910, sum loss: 3604.266113, avg loss: 2.433671, ppl: 11.400652 +epoch: 1, batch: 30911, sum loss: 4285.982910, avg loss: 2.542101, ppl: 12.706344 +epoch: 1, batch: 30912, sum loss: 3808.906250, avg loss: 2.616007, ppl: 13.680987 +epoch: 1, batch: 30913, sum loss: 4463.863770, avg loss: 2.660229, ppl: 14.299560 +epoch: 1, batch: 30914, sum loss: 4791.547852, avg loss: 2.663451, ppl: 14.345707 +epoch: 1, batch: 30915, sum loss: 3782.194336, avg loss: 2.459164, ppl: 11.695032 +epoch: 1, batch: 30916, sum loss: 4274.458984, avg loss: 2.565701, ppl: 13.009781 +epoch: 1, batch: 30917, sum loss: 4155.051758, avg loss: 2.603416, ppl: 13.509809 +epoch: 1, batch: 30918, sum loss: 5004.530762, avg loss: 2.719854, ppl: 15.178101 +epoch: 1, batch: 30919, sum loss: 5987.155273, avg loss: 2.971293, ppl: 19.517138 +epoch: 1, batch: 30920, sum loss: 3899.454102, avg loss: 2.474273, ppl: 11.873072 +epoch: 1, batch: 30921, sum loss: 4171.055664, avg loss: 2.577908, ppl: 13.169562 +epoch: 1, batch: 30922, sum loss: 4420.766113, avg loss: 2.499020, ppl: 12.170559 +epoch: 1, batch: 30923, sum loss: 4190.278809, avg loss: 2.722728, ppl: 15.221794 +epoch: 1, batch: 30924, sum loss: 4336.685547, avg loss: 2.618771, ppl: 13.718857 +epoch: 1, batch: 30925, sum loss: 3697.556152, avg loss: 2.358135, ppl: 10.571220 +epoch: 1, batch: 30926, sum loss: 4185.550781, avg loss: 2.536697, ppl: 12.637864 +epoch: 1, batch: 30927, sum loss: 4544.466797, avg loss: 2.798317, ppl: 16.416992 +epoch: 1, batch: 30928, sum loss: 4366.325195, avg loss: 2.497898, ppl: 12.156912 +epoch: 1, batch: 30929, sum loss: 4109.702148, avg loss: 2.530605, ppl: 12.561101 +epoch: 1, batch: 30930, sum loss: 3889.065674, avg loss: 2.314920, ppl: 10.124115 +epoch: 1, batch: 30931, sum loss: 4825.385254, avg loss: 2.744815, ppl: 15.561736 +epoch: 1, batch: 30932, sum loss: 5167.801270, avg loss: 2.890269, ppl: 17.998156 +epoch: 1, batch: 30933, sum loss: 4460.188965, avg loss: 2.636045, ppl: 13.957897 +epoch: 1, batch: 30934, sum loss: 3359.458252, avg loss: 2.299424, ppl: 9.968438 +epoch: 1, batch: 30935, sum loss: 3947.738281, avg loss: 2.543646, ppl: 12.725984 +epoch: 1, batch: 30936, sum loss: 4168.607422, avg loss: 2.608640, ppl: 13.580575 +epoch: 1, batch: 30937, sum loss: 5081.889648, avg loss: 2.694533, ppl: 14.798612 +epoch: 1, batch: 30938, sum loss: 4854.718262, avg loss: 2.735052, ppl: 15.410550 +epoch: 1, batch: 30939, sum loss: 4899.776367, avg loss: 2.644240, ppl: 14.072741 +epoch: 1, batch: 30940, sum loss: 3991.560547, avg loss: 2.295320, ppl: 9.927608 +epoch: 1, batch: 30941, sum loss: 4590.316895, avg loss: 2.600746, ppl: 13.473788 +epoch: 1, batch: 30942, sum loss: 4421.655273, avg loss: 2.663648, ppl: 14.348536 +epoch: 1, batch: 30943, sum loss: 4504.372070, avg loss: 2.626456, ppl: 13.824689 +epoch: 1, batch: 30944, sum loss: 4108.366699, avg loss: 2.681702, ppl: 14.609933 +epoch: 1, batch: 30945, sum loss: 4344.737793, avg loss: 2.767349, ppl: 15.916384 +epoch: 1, batch: 30946, sum loss: 4523.023926, avg loss: 2.590506, ppl: 13.336522 +epoch: 1, batch: 30947, sum loss: 3238.253662, avg loss: 2.414805, ppl: 11.187591 +epoch: 1, batch: 30948, sum loss: 4728.628906, avg loss: 2.644647, ppl: 14.078476 +epoch: 1, batch: 30949, sum loss: 4867.794434, avg loss: 2.598929, ppl: 13.449331 +epoch: 1, batch: 30950, sum loss: 5571.774414, avg loss: 2.895933, ppl: 18.100376 +epoch: 1, batch: 30951, sum loss: 3515.786377, avg loss: 2.564396, ppl: 12.992804 +epoch: 1, batch: 30952, sum loss: 3814.468994, avg loss: 2.529489, ppl: 12.547088 +epoch: 1, batch: 30953, sum loss: 3645.370117, avg loss: 2.570783, ppl: 13.076057 +epoch: 1, batch: 30954, sum loss: 4585.934570, avg loss: 2.556262, ppl: 12.887557 +epoch: 1, batch: 30955, sum loss: 4587.246582, avg loss: 2.656194, ppl: 14.241977 +epoch: 1, batch: 30956, sum loss: 4997.504883, avg loss: 2.711614, ppl: 15.053555 +epoch: 1, batch: 30957, sum loss: 3789.768799, avg loss: 2.451338, ppl: 11.603866 +epoch: 1, batch: 30958, sum loss: 4042.719482, avg loss: 2.389314, ppl: 10.906012 +epoch: 1, batch: 30959, sum loss: 4471.791504, avg loss: 2.520739, ppl: 12.437786 +epoch: 1, batch: 30960, sum loss: 4346.840332, avg loss: 2.501059, ppl: 12.195400 +epoch: 1, batch: 30961, sum loss: 4783.847168, avg loss: 2.856028, ppl: 17.392313 +epoch: 1, batch: 30962, sum loss: 3892.396973, avg loss: 2.463542, ppl: 11.746349 +epoch: 1, batch: 30963, sum loss: 3800.259277, avg loss: 2.385599, ppl: 10.865571 +epoch: 1, batch: 30964, sum loss: 5185.797363, avg loss: 2.692522, ppl: 14.768877 +epoch: 1, batch: 30965, sum loss: 5461.762207, avg loss: 2.790885, ppl: 16.295439 +epoch: 1, batch: 30966, sum loss: 4364.934570, avg loss: 2.694404, ppl: 14.796700 +epoch: 1, batch: 30967, sum loss: 5774.982910, avg loss: 3.003111, ppl: 20.148123 +epoch: 1, batch: 30968, sum loss: 3863.775879, avg loss: 2.632000, ppl: 13.901545 +epoch: 1, batch: 30969, sum loss: 4489.808105, avg loss: 2.515299, ppl: 12.370305 +epoch: 1, batch: 30970, sum loss: 4788.753906, avg loss: 2.684279, ppl: 14.647639 +epoch: 1, batch: 30971, sum loss: 4675.632324, avg loss: 2.821745, ppl: 16.806158 +epoch: 1, batch: 30972, sum loss: 4734.828613, avg loss: 3.031260, ppl: 20.723333 +epoch: 1, batch: 30973, sum loss: 4492.021973, avg loss: 2.665888, ppl: 14.380718 +epoch: 1, batch: 30974, sum loss: 4221.649902, avg loss: 2.250346, ppl: 9.491023 +epoch: 1, batch: 30975, sum loss: 4016.704590, avg loss: 2.674237, ppl: 14.501285 +epoch: 1, batch: 30976, sum loss: 4854.145996, avg loss: 2.690768, ppl: 14.742998 +epoch: 1, batch: 30977, sum loss: 4879.304688, avg loss: 2.820407, ppl: 16.783682 +epoch: 1, batch: 30978, sum loss: 4253.058594, avg loss: 2.504746, ppl: 12.240449 +epoch: 1, batch: 30979, sum loss: 3811.223633, avg loss: 2.551020, ppl: 12.820172 +epoch: 1, batch: 30980, sum loss: 4945.780273, avg loss: 2.816504, ppl: 16.718298 +epoch: 1, batch: 30981, sum loss: 5037.683105, avg loss: 2.806509, ppl: 16.552031 +epoch: 1, batch: 30982, sum loss: 4495.376465, avg loss: 2.531181, ppl: 12.568336 +epoch: 1, batch: 30983, sum loss: 4437.405762, avg loss: 2.560534, ppl: 12.942727 +epoch: 1, batch: 30984, sum loss: 3271.118652, avg loss: 2.273189, ppl: 9.710314 +epoch: 1, batch: 30985, sum loss: 3848.505371, avg loss: 2.328194, ppl: 10.259400 +epoch: 1, batch: 30986, sum loss: 4442.058594, avg loss: 2.728537, ppl: 15.310472 +epoch: 1, batch: 30987, sum loss: 5012.836426, avg loss: 3.008905, ppl: 20.265205 +epoch: 1, batch: 30988, sum loss: 3992.125488, avg loss: 2.677482, ppl: 14.548416 +epoch: 1, batch: 30989, sum loss: 4466.173340, avg loss: 2.663192, ppl: 14.342000 +epoch: 1, batch: 30990, sum loss: 4183.096191, avg loss: 2.635852, ppl: 13.955192 +epoch: 1, batch: 30991, sum loss: 5056.152344, avg loss: 2.781162, ppl: 16.137758 +epoch: 1, batch: 30992, sum loss: 4372.086914, avg loss: 2.695491, ppl: 14.812795 +epoch: 1, batch: 30993, sum loss: 4429.334961, avg loss: 2.763153, ppl: 15.849744 +epoch: 1, batch: 30994, sum loss: 3788.292725, avg loss: 2.498874, ppl: 12.168783 +epoch: 1, batch: 30995, sum loss: 4374.145020, avg loss: 2.528408, ppl: 12.533531 +epoch: 1, batch: 30996, sum loss: 5028.853516, avg loss: 2.627405, ppl: 13.837816 +epoch: 1, batch: 30997, sum loss: 4002.744141, avg loss: 2.619597, ppl: 13.730188 +epoch: 1, batch: 30998, sum loss: 4745.432129, avg loss: 2.798014, ppl: 16.412024 +epoch: 1, batch: 30999, sum loss: 4359.181641, avg loss: 2.483864, ppl: 11.987495 +epoch: 1, batch: 31000, sum loss: 5390.433594, avg loss: 2.628198, ppl: 13.848787 +epoch: 1, batch: 31001, sum loss: 4112.187988, avg loss: 2.658170, ppl: 14.270144 +epoch: 1, batch: 31002, sum loss: 4562.277832, avg loss: 2.889346, ppl: 17.981552 +epoch: 1, batch: 31003, sum loss: 4396.608398, avg loss: 2.406463, ppl: 11.094654 +epoch: 1, batch: 31004, sum loss: 4101.892090, avg loss: 2.591214, ppl: 13.345966 +epoch: 1, batch: 31005, sum loss: 4672.395996, avg loss: 2.768007, ppl: 15.926865 +epoch: 1, batch: 31006, sum loss: 4169.266602, avg loss: 2.449628, ppl: 11.584035 +epoch: 1, batch: 31007, sum loss: 4941.783691, avg loss: 2.655445, ppl: 14.231322 +epoch: 1, batch: 31008, sum loss: 4514.479492, avg loss: 2.906941, ppl: 18.300734 +epoch: 1, batch: 31009, sum loss: 4397.722168, avg loss: 2.721363, ppl: 15.201021 +epoch: 1, batch: 31010, sum loss: 4738.555176, avg loss: 2.559998, ppl: 12.935785 +epoch: 1, batch: 31011, sum loss: 4463.060059, avg loss: 2.799912, ppl: 16.443203 +epoch: 1, batch: 31012, sum loss: 4340.586426, avg loss: 2.716262, ppl: 15.123682 +epoch: 1, batch: 31013, sum loss: 4354.725586, avg loss: 2.855558, ppl: 17.384129 +epoch: 1, batch: 31014, sum loss: 4876.779297, avg loss: 2.700321, ppl: 14.884504 +epoch: 1, batch: 31015, sum loss: 5065.027344, avg loss: 2.784512, ppl: 16.191916 +epoch: 1, batch: 31016, sum loss: 5058.026367, avg loss: 2.820985, ppl: 16.793390 +epoch: 1, batch: 31017, sum loss: 3075.266846, avg loss: 2.057035, ppl: 7.822739 +epoch: 1, batch: 31018, sum loss: 5011.471191, avg loss: 2.703059, ppl: 14.925314 +epoch: 1, batch: 31019, sum loss: 3784.082031, avg loss: 2.373954, ppl: 10.739769 +epoch: 1, batch: 31020, sum loss: 4303.572754, avg loss: 2.449387, ppl: 11.581244 +epoch: 1, batch: 31021, sum loss: 4917.451660, avg loss: 2.706357, ppl: 14.974630 +epoch: 1, batch: 31022, sum loss: 5044.389648, avg loss: 2.543817, ppl: 12.728166 +epoch: 1, batch: 31023, sum loss: 4814.634277, avg loss: 2.830473, ppl: 16.953472 +epoch: 1, batch: 31024, sum loss: 5888.794922, avg loss: 2.886664, ppl: 17.933382 +epoch: 1, batch: 31025, sum loss: 5622.368164, avg loss: 3.085822, ppl: 21.885456 +epoch: 1, batch: 31026, sum loss: 3792.144775, avg loss: 2.373057, ppl: 10.730143 +epoch: 1, batch: 31027, sum loss: 4395.889648, avg loss: 2.479351, ppl: 11.933520 +epoch: 1, batch: 31028, sum loss: 4537.772461, avg loss: 2.507057, ppl: 12.268766 +epoch: 1, batch: 31029, sum loss: 4242.873047, avg loss: 2.512062, ppl: 12.330333 +epoch: 1, batch: 31030, sum loss: 4462.565918, avg loss: 2.712806, ppl: 15.071507 +epoch: 1, batch: 31031, sum loss: 4476.094727, avg loss: 2.662757, ppl: 14.335761 +epoch: 1, batch: 31032, sum loss: 4369.320312, avg loss: 2.692126, ppl: 14.763026 +epoch: 1, batch: 31033, sum loss: 4964.438477, avg loss: 2.920258, ppl: 18.546072 +epoch: 1, batch: 31034, sum loss: 4709.922363, avg loss: 2.773806, ppl: 16.019489 +epoch: 1, batch: 31035, sum loss: 4563.308105, avg loss: 2.685879, ppl: 14.671091 +epoch: 1, batch: 31036, sum loss: 3715.079590, avg loss: 2.399922, ppl: 11.022321 +epoch: 1, batch: 31037, sum loss: 3767.539551, avg loss: 2.410454, ppl: 11.139017 +epoch: 1, batch: 31038, sum loss: 5207.791504, avg loss: 2.796880, ppl: 16.393427 +epoch: 1, batch: 31039, sum loss: 4231.492676, avg loss: 2.592827, ppl: 13.367503 +epoch: 1, batch: 31040, sum loss: 4217.440430, avg loss: 2.613036, ppl: 13.640403 +epoch: 1, batch: 31041, sum loss: 5486.633301, avg loss: 2.659541, ppl: 14.289730 +epoch: 1, batch: 31042, sum loss: 3160.287598, avg loss: 2.139667, ppl: 8.496606 +epoch: 1, batch: 31043, sum loss: 5056.857910, avg loss: 2.845727, ppl: 17.214077 +epoch: 1, batch: 31044, sum loss: 4849.196777, avg loss: 2.891590, ppl: 18.021944 +epoch: 1, batch: 31045, sum loss: 4576.982422, avg loss: 2.569895, ppl: 13.064450 +epoch: 1, batch: 31046, sum loss: 5161.984375, avg loss: 2.903253, ppl: 18.233368 +epoch: 1, batch: 31047, sum loss: 3857.776855, avg loss: 2.463459, ppl: 11.745369 +epoch: 1, batch: 31048, sum loss: 5357.907715, avg loss: 2.792031, ppl: 16.314121 +epoch: 1, batch: 31049, sum loss: 4300.049316, avg loss: 2.628392, ppl: 13.851481 +epoch: 1, batch: 31050, sum loss: 5069.109375, avg loss: 2.893327, ppl: 18.053282 +epoch: 1, batch: 31051, sum loss: 4485.536133, avg loss: 2.510093, ppl: 12.306074 +epoch: 1, batch: 31052, sum loss: 3767.269775, avg loss: 2.509840, ppl: 12.302961 +epoch: 1, batch: 31053, sum loss: 4561.919922, avg loss: 2.491491, ppl: 12.079274 +epoch: 1, batch: 31054, sum loss: 4708.260742, avg loss: 2.718395, ppl: 15.155981 +epoch: 1, batch: 31055, sum loss: 5073.886230, avg loss: 2.825104, ppl: 16.862694 +epoch: 1, batch: 31056, sum loss: 4136.491211, avg loss: 2.463664, ppl: 11.747771 +epoch: 1, batch: 31057, sum loss: 5684.456543, avg loss: 2.966835, ppl: 19.430334 +epoch: 1, batch: 31058, sum loss: 4868.632812, avg loss: 2.669207, ppl: 14.428517 +epoch: 1, batch: 31059, sum loss: 4065.491699, avg loss: 2.724860, ppl: 15.254285 +epoch: 1, batch: 31060, sum loss: 4643.406250, avg loss: 2.679403, ppl: 14.576393 +epoch: 1, batch: 31061, sum loss: 4432.897949, avg loss: 2.657613, ppl: 14.262201 +epoch: 1, batch: 31062, sum loss: 3966.792480, avg loss: 2.602882, ppl: 13.502599 +epoch: 1, batch: 31063, sum loss: 3646.951416, avg loss: 2.424835, ppl: 11.300362 +epoch: 1, batch: 31064, sum loss: 4816.810059, avg loss: 2.610737, ppl: 13.609078 +epoch: 1, batch: 31065, sum loss: 4427.975098, avg loss: 2.675514, ppl: 14.519808 +epoch: 1, batch: 31066, sum loss: 4448.939453, avg loss: 2.509272, ppl: 12.295979 +epoch: 1, batch: 31067, sum loss: 3763.943848, avg loss: 2.316273, ppl: 10.137822 +epoch: 1, batch: 31068, sum loss: 3869.771729, avg loss: 2.423151, ppl: 11.281348 +epoch: 1, batch: 31069, sum loss: 3873.364014, avg loss: 2.442222, ppl: 11.498561 +epoch: 1, batch: 31070, sum loss: 3737.742676, avg loss: 2.525502, ppl: 12.497167 +epoch: 1, batch: 31071, sum loss: 4326.830566, avg loss: 2.437651, ppl: 11.446121 +epoch: 1, batch: 31072, sum loss: 5395.250977, avg loss: 2.765377, ppl: 15.885032 +epoch: 1, batch: 31073, sum loss: 4245.796875, avg loss: 2.458481, ppl: 11.687046 +epoch: 1, batch: 31074, sum loss: 4277.379395, avg loss: 2.469619, ppl: 11.817938 +epoch: 1, batch: 31075, sum loss: 4180.039551, avg loss: 2.474861, ppl: 11.880052 +epoch: 1, batch: 31076, sum loss: 4679.045898, avg loss: 2.742700, ppl: 15.528854 +epoch: 1, batch: 31077, sum loss: 4410.571289, avg loss: 2.459884, ppl: 11.703450 +epoch: 1, batch: 31078, sum loss: 3835.243408, avg loss: 2.427369, ppl: 11.329041 +epoch: 1, batch: 31079, sum loss: 4415.664551, avg loss: 2.568740, ppl: 13.049376 +epoch: 1, batch: 31080, sum loss: 3281.942139, avg loss: 2.081130, ppl: 8.013519 +epoch: 1, batch: 31081, sum loss: 4213.245605, avg loss: 2.533521, ppl: 12.597787 +epoch: 1, batch: 31082, sum loss: 4077.223145, avg loss: 2.371858, ppl: 10.717282 +epoch: 1, batch: 31083, sum loss: 4499.495117, avg loss: 2.563815, ppl: 12.985260 +epoch: 1, batch: 31084, sum loss: 4734.426758, avg loss: 2.630237, ppl: 13.877060 +epoch: 1, batch: 31085, sum loss: 4688.260742, avg loss: 2.660761, ppl: 14.307175 +epoch: 1, batch: 31086, sum loss: 3933.166992, avg loss: 2.461306, ppl: 11.720109 +epoch: 1, batch: 31087, sum loss: 4521.957031, avg loss: 2.472366, ppl: 11.850451 +epoch: 1, batch: 31088, sum loss: 3782.485107, avg loss: 2.468985, ppl: 11.810454 +epoch: 1, batch: 31089, sum loss: 4096.623047, avg loss: 2.488836, ppl: 12.047240 +epoch: 1, batch: 31090, sum loss: 4529.737793, avg loss: 2.780686, ppl: 16.130085 +epoch: 1, batch: 31091, sum loss: 4435.270508, avg loss: 2.575651, ppl: 13.139864 +epoch: 1, batch: 31092, sum loss: 4564.774414, avg loss: 2.437146, ppl: 11.440343 +epoch: 1, batch: 31093, sum loss: 4802.972168, avg loss: 2.739859, ppl: 15.484796 +epoch: 1, batch: 31094, sum loss: 4697.446289, avg loss: 2.633098, ppl: 13.916816 +epoch: 1, batch: 31095, sum loss: 4735.911621, avg loss: 2.569675, ppl: 13.061582 +epoch: 1, batch: 31096, sum loss: 5272.360352, avg loss: 2.641463, ppl: 14.033721 +epoch: 1, batch: 31097, sum loss: 4043.794434, avg loss: 2.525793, ppl: 12.500806 +epoch: 1, batch: 31098, sum loss: 4109.952148, avg loss: 2.599590, ppl: 13.458220 +epoch: 1, batch: 31099, sum loss: 4414.143066, avg loss: 2.719743, ppl: 15.176421 +epoch: 1, batch: 31100, sum loss: 4151.433594, avg loss: 2.711583, ppl: 15.053088 +epoch: 1, batch: 31101, sum loss: 4725.017578, avg loss: 2.897007, ppl: 18.119823 +epoch: 1, batch: 31102, sum loss: 5488.841309, avg loss: 2.756826, ppl: 15.749780 +epoch: 1, batch: 31103, sum loss: 3820.312256, avg loss: 2.461542, ppl: 11.722870 +epoch: 1, batch: 31104, sum loss: 3898.212646, avg loss: 2.335658, ppl: 10.336257 +epoch: 1, batch: 31105, sum loss: 4985.466797, avg loss: 2.748328, ppl: 15.616499 +epoch: 1, batch: 31106, sum loss: 4453.616211, avg loss: 2.720596, ppl: 15.189377 +epoch: 1, batch: 31107, sum loss: 4882.964844, avg loss: 2.715776, ppl: 15.116332 +epoch: 1, batch: 31108, sum loss: 4220.683105, avg loss: 2.662891, ppl: 14.337685 +epoch: 1, batch: 31109, sum loss: 4687.986816, avg loss: 2.656083, ppl: 14.240401 +epoch: 1, batch: 31110, sum loss: 4660.569824, avg loss: 2.493617, ppl: 12.104979 +epoch: 1, batch: 31111, sum loss: 5020.788086, avg loss: 2.602793, ppl: 13.501398 +epoch: 1, batch: 31112, sum loss: 4539.358398, avg loss: 2.589480, ppl: 13.322845 +epoch: 1, batch: 31113, sum loss: 5144.565430, avg loss: 2.549339, ppl: 12.798638 +epoch: 1, batch: 31114, sum loss: 3767.687988, avg loss: 2.467379, ppl: 11.791502 +epoch: 1, batch: 31115, sum loss: 4471.859863, avg loss: 2.698769, ppl: 14.861421 +epoch: 1, batch: 31116, sum loss: 5048.467285, avg loss: 2.814084, ppl: 16.677896 +epoch: 1, batch: 31117, sum loss: 4003.986572, avg loss: 2.374844, ppl: 10.749334 +epoch: 1, batch: 31118, sum loss: 5419.704102, avg loss: 2.746936, ppl: 15.594774 +epoch: 1, batch: 31119, sum loss: 4306.248535, avg loss: 2.586335, ppl: 13.281013 +epoch: 1, batch: 31120, sum loss: 3858.110352, avg loss: 2.556733, ppl: 12.893630 +epoch: 1, batch: 31121, sum loss: 4831.016602, avg loss: 2.558801, ppl: 12.920319 +epoch: 1, batch: 31122, sum loss: 4140.107422, avg loss: 2.500065, ppl: 12.183284 +epoch: 1, batch: 31123, sum loss: 4211.091797, avg loss: 2.559934, ppl: 12.934965 +epoch: 1, batch: 31124, sum loss: 4003.112061, avg loss: 2.592689, ppl: 13.365667 +epoch: 1, batch: 31125, sum loss: 4956.265137, avg loss: 2.853348, ppl: 17.345755 +epoch: 1, batch: 31126, sum loss: 4031.707520, avg loss: 2.504166, ppl: 12.233354 +epoch: 1, batch: 31127, sum loss: 4552.516113, avg loss: 2.629992, ppl: 13.873659 +epoch: 1, batch: 31128, sum loss: 3990.832275, avg loss: 2.445363, ppl: 11.534737 +epoch: 1, batch: 31129, sum loss: 3776.166992, avg loss: 2.640676, ppl: 14.022683 +epoch: 1, batch: 31130, sum loss: 4626.764160, avg loss: 2.584784, ppl: 13.260428 +epoch: 1, batch: 31131, sum loss: 4912.955078, avg loss: 2.774113, ppl: 16.024414 +epoch: 1, batch: 31132, sum loss: 4254.018555, avg loss: 2.601846, ppl: 13.488618 +epoch: 1, batch: 31133, sum loss: 4271.012207, avg loss: 2.557492, ppl: 12.903421 +epoch: 1, batch: 31134, sum loss: 3873.195557, avg loss: 2.229819, ppl: 9.298183 +epoch: 1, batch: 31135, sum loss: 4753.566406, avg loss: 2.532534, ppl: 12.585359 +epoch: 1, batch: 31136, sum loss: 4421.493652, avg loss: 2.872965, ppl: 17.689396 +epoch: 1, batch: 31137, sum loss: 4181.826660, avg loss: 2.771257, ppl: 15.978705 +epoch: 1, batch: 31138, sum loss: 4609.015625, avg loss: 2.756588, ppl: 15.746033 +epoch: 1, batch: 31139, sum loss: 5477.405762, avg loss: 2.806048, ppl: 16.544409 +epoch: 1, batch: 31140, sum loss: 5115.542969, avg loss: 2.906559, ppl: 18.293732 +epoch: 1, batch: 31141, sum loss: 4709.282227, avg loss: 3.044139, ppl: 20.991943 +epoch: 1, batch: 31142, sum loss: 4073.230469, avg loss: 2.511240, ppl: 12.320191 +epoch: 1, batch: 31143, sum loss: 4544.154785, avg loss: 2.704854, ppl: 14.952137 +epoch: 1, batch: 31144, sum loss: 4408.434082, avg loss: 2.581050, ppl: 13.211008 +epoch: 1, batch: 31145, sum loss: 4722.756836, avg loss: 2.722050, ppl: 15.211476 +epoch: 1, batch: 31146, sum loss: 3957.521973, avg loss: 2.444424, ppl: 11.523906 +epoch: 1, batch: 31147, sum loss: 3951.369873, avg loss: 2.346419, ppl: 10.448092 +epoch: 1, batch: 31148, sum loss: 3864.558350, avg loss: 2.356438, ppl: 10.553293 +epoch: 1, batch: 31149, sum loss: 5398.922363, avg loss: 2.894865, ppl: 18.081053 +epoch: 1, batch: 31150, sum loss: 4674.576660, avg loss: 2.871361, ppl: 17.661047 +epoch: 1, batch: 31151, sum loss: 3730.661377, avg loss: 2.507165, ppl: 12.270095 +epoch: 1, batch: 31152, sum loss: 3773.651367, avg loss: 2.581157, ppl: 13.212416 +epoch: 1, batch: 31153, sum loss: 3652.015381, avg loss: 2.472590, ppl: 11.853109 +epoch: 1, batch: 31154, sum loss: 4615.617188, avg loss: 2.826465, ppl: 16.885662 +epoch: 1, batch: 31155, sum loss: 3273.867188, avg loss: 2.478325, ppl: 11.921278 +epoch: 1, batch: 31156, sum loss: 4282.542969, avg loss: 2.595481, ppl: 13.403028 +epoch: 1, batch: 31157, sum loss: 4868.949219, avg loss: 2.684096, ppl: 14.644951 +epoch: 1, batch: 31158, sum loss: 3890.766113, avg loss: 2.500492, ppl: 12.188494 +epoch: 1, batch: 31159, sum loss: 4445.960449, avg loss: 2.704355, ppl: 14.944681 +epoch: 1, batch: 31160, sum loss: 4211.709473, avg loss: 2.546378, ppl: 12.760802 +epoch: 1, batch: 31161, sum loss: 4573.298340, avg loss: 2.704494, ppl: 14.946745 +epoch: 1, batch: 31162, sum loss: 5418.543945, avg loss: 2.995326, ppl: 19.991873 +epoch: 1, batch: 31163, sum loss: 5146.548340, avg loss: 2.672144, ppl: 14.470958 +epoch: 1, batch: 31164, sum loss: 4582.920410, avg loss: 2.595085, ppl: 13.397728 +epoch: 1, batch: 31165, sum loss: 4704.875000, avg loss: 2.751389, ppl: 15.664371 +epoch: 1, batch: 31166, sum loss: 4035.583008, avg loss: 2.690389, ppl: 14.737403 +epoch: 1, batch: 31167, sum loss: 4403.577637, avg loss: 2.690029, ppl: 14.732105 +epoch: 1, batch: 31168, sum loss: 4474.579590, avg loss: 2.728402, ppl: 15.308407 +epoch: 1, batch: 31169, sum loss: 4852.094238, avg loss: 2.579529, ppl: 13.190925 +epoch: 1, batch: 31170, sum loss: 4290.315430, avg loss: 2.579865, ppl: 13.195356 +epoch: 1, batch: 31171, sum loss: 4640.090820, avg loss: 2.798607, ppl: 16.421761 +epoch: 1, batch: 31172, sum loss: 4178.886719, avg loss: 2.422543, ppl: 11.274494 +epoch: 1, batch: 31173, sum loss: 4169.778320, avg loss: 2.804155, ppl: 16.513115 +epoch: 1, batch: 31174, sum loss: 4350.679199, avg loss: 2.709016, ppl: 15.014488 +epoch: 1, batch: 31175, sum loss: 4656.550293, avg loss: 2.635286, ppl: 13.947302 +epoch: 1, batch: 31176, sum loss: 4846.125000, avg loss: 2.748795, ppl: 15.623787 +epoch: 1, batch: 31177, sum loss: 4886.849609, avg loss: 2.743880, ppl: 15.547188 +epoch: 1, batch: 31178, sum loss: 4827.079590, avg loss: 2.710320, ppl: 15.034085 +epoch: 1, batch: 31179, sum loss: 4159.197754, avg loss: 2.617494, ppl: 13.701344 +epoch: 1, batch: 31180, sum loss: 4292.720215, avg loss: 2.384845, ppl: 10.857374 +epoch: 1, batch: 31181, sum loss: 4902.363281, avg loss: 2.796556, ppl: 16.388117 +epoch: 1, batch: 31182, sum loss: 3834.250000, avg loss: 2.330851, ppl: 10.286690 +epoch: 1, batch: 31183, sum loss: 4962.445801, avg loss: 2.824386, ppl: 16.850590 +epoch: 1, batch: 31184, sum loss: 3704.591797, avg loss: 2.405579, ppl: 11.084847 +epoch: 1, batch: 31185, sum loss: 4711.141602, avg loss: 2.883196, ppl: 17.871292 +epoch: 1, batch: 31186, sum loss: 3716.469727, avg loss: 2.314116, ppl: 10.115972 +epoch: 1, batch: 31187, sum loss: 3966.560303, avg loss: 2.681920, ppl: 14.613128 +epoch: 1, batch: 31188, sum loss: 4603.161133, avg loss: 2.599187, ppl: 13.452802 +epoch: 1, batch: 31189, sum loss: 4308.369141, avg loss: 2.454911, ppl: 11.645400 +epoch: 1, batch: 31190, sum loss: 4123.589844, avg loss: 2.531363, ppl: 12.570625 +epoch: 1, batch: 31191, sum loss: 4630.617676, avg loss: 2.637026, ppl: 13.971591 +epoch: 1, batch: 31192, sum loss: 4649.776855, avg loss: 2.666156, ppl: 14.384573 +epoch: 1, batch: 31193, sum loss: 4675.545410, avg loss: 2.737439, ppl: 15.447369 +epoch: 1, batch: 31194, sum loss: 4301.555664, avg loss: 2.755641, ppl: 15.731125 +epoch: 1, batch: 31195, sum loss: 4112.915527, avg loss: 2.573790, ppl: 13.115433 +epoch: 1, batch: 31196, sum loss: 3773.179688, avg loss: 2.448527, ppl: 11.571288 +epoch: 1, batch: 31197, sum loss: 3999.197510, avg loss: 2.443004, ppl: 11.507559 +epoch: 1, batch: 31198, sum loss: 4346.752930, avg loss: 2.596627, ppl: 13.418398 +epoch: 1, batch: 31199, sum loss: 4610.004395, avg loss: 2.729428, ppl: 15.324123 +epoch: 1, batch: 31200, sum loss: 3859.274902, avg loss: 2.431805, ppl: 11.379408 +epoch: 1, batch: 31201, sum loss: 4001.180176, avg loss: 2.594799, ppl: 13.393895 +epoch: 1, batch: 31202, sum loss: 4092.152588, avg loss: 2.586695, ppl: 13.285795 +epoch: 1, batch: 31203, sum loss: 3953.190430, avg loss: 2.410482, ppl: 11.139328 +epoch: 1, batch: 31204, sum loss: 3761.733154, avg loss: 2.285379, ppl: 9.829408 +epoch: 1, batch: 31205, sum loss: 4841.308594, avg loss: 2.682165, ppl: 14.616710 +epoch: 1, batch: 31206, sum loss: 5077.086914, avg loss: 2.720840, ppl: 15.193071 +epoch: 1, batch: 31207, sum loss: 3998.120361, avg loss: 2.411412, ppl: 11.149688 +epoch: 1, batch: 31208, sum loss: 4355.208984, avg loss: 2.662108, ppl: 14.326460 +epoch: 1, batch: 31209, sum loss: 4880.519531, avg loss: 2.852437, ppl: 17.329964 +epoch: 1, batch: 31210, sum loss: 3727.193848, avg loss: 2.380073, ppl: 10.805687 +epoch: 1, batch: 31211, sum loss: 4138.178223, avg loss: 2.817003, ppl: 16.726654 +epoch: 1, batch: 31212, sum loss: 5038.138184, avg loss: 3.044192, ppl: 20.993069 +epoch: 1, batch: 31213, sum loss: 3557.946289, avg loss: 2.550499, ppl: 12.813498 +epoch: 1, batch: 31214, sum loss: 3865.706543, avg loss: 2.529913, ppl: 12.552410 +epoch: 1, batch: 31215, sum loss: 4344.762695, avg loss: 2.393809, ppl: 10.955141 +epoch: 1, batch: 31216, sum loss: 4637.971680, avg loss: 2.428257, ppl: 11.339106 +epoch: 1, batch: 31217, sum loss: 4038.557373, avg loss: 2.462535, ppl: 11.734520 +epoch: 1, batch: 31218, sum loss: 5434.136719, avg loss: 2.809791, ppl: 16.606453 +epoch: 1, batch: 31219, sum loss: 3545.012939, avg loss: 2.333781, ppl: 10.316874 +epoch: 1, batch: 31220, sum loss: 4302.264160, avg loss: 2.485421, ppl: 12.006176 +epoch: 1, batch: 31221, sum loss: 5051.709473, avg loss: 2.744003, ppl: 15.549105 +epoch: 1, batch: 31222, sum loss: 4369.338379, avg loss: 2.720634, ppl: 15.189949 +epoch: 1, batch: 31223, sum loss: 4717.043457, avg loss: 2.809436, ppl: 16.600554 +epoch: 1, batch: 31224, sum loss: 4395.700195, avg loss: 2.394172, ppl: 10.959123 +epoch: 1, batch: 31225, sum loss: 4332.106934, avg loss: 2.580171, ppl: 13.199393 +epoch: 1, batch: 31226, sum loss: 4506.677734, avg loss: 2.701845, ppl: 14.907212 +epoch: 1, batch: 31227, sum loss: 4531.381836, avg loss: 2.593807, ppl: 13.380621 +epoch: 1, batch: 31228, sum loss: 4403.246582, avg loss: 2.582549, ppl: 13.230825 +epoch: 1, batch: 31229, sum loss: 4635.090332, avg loss: 2.737797, ppl: 15.452905 +epoch: 1, batch: 31230, sum loss: 3395.377197, avg loss: 2.253071, ppl: 9.516913 +epoch: 1, batch: 31231, sum loss: 4582.082520, avg loss: 2.542776, ppl: 12.714920 +epoch: 1, batch: 31232, sum loss: 5388.565430, avg loss: 2.666287, ppl: 14.386453 +epoch: 1, batch: 31233, sum loss: 5176.180176, avg loss: 2.823884, ppl: 16.842146 +epoch: 1, batch: 31234, sum loss: 4278.333008, avg loss: 2.637690, ppl: 13.980868 +epoch: 1, batch: 31235, sum loss: 3906.643066, avg loss: 2.558378, ppl: 12.914852 +epoch: 1, batch: 31236, sum loss: 4981.614258, avg loss: 2.781471, ppl: 16.142746 +epoch: 1, batch: 31237, sum loss: 5440.199707, avg loss: 2.792710, ppl: 16.325207 +epoch: 1, batch: 31238, sum loss: 4726.281250, avg loss: 2.519340, ppl: 12.420394 +epoch: 1, batch: 31239, sum loss: 5652.894043, avg loss: 2.930479, ppl: 18.736605 +epoch: 1, batch: 31240, sum loss: 4537.237305, avg loss: 2.628758, ppl: 13.856555 +epoch: 1, batch: 31241, sum loss: 4978.583008, avg loss: 2.796957, ppl: 16.394678 +epoch: 1, batch: 31242, sum loss: 4104.312988, avg loss: 2.641128, ppl: 14.029020 +epoch: 1, batch: 31243, sum loss: 4533.915039, avg loss: 2.714919, ppl: 15.103392 +epoch: 1, batch: 31244, sum loss: 3195.784668, avg loss: 1.955805, ppl: 7.069605 +epoch: 1, batch: 31245, sum loss: 5087.653809, avg loss: 2.780139, ppl: 16.121258 +epoch: 1, batch: 31246, sum loss: 4627.910156, avg loss: 2.659719, ppl: 14.292266 +epoch: 1, batch: 31247, sum loss: 3598.306885, avg loss: 2.330510, ppl: 10.283184 +epoch: 1, batch: 31248, sum loss: 4502.652832, avg loss: 2.665869, ppl: 14.380441 +epoch: 1, batch: 31249, sum loss: 3975.511719, avg loss: 2.741732, ppl: 15.513838 +epoch: 1, batch: 31250, sum loss: 3992.434082, avg loss: 2.341604, ppl: 10.397899 +epoch: 1, batch: 31251, sum loss: 5432.920410, avg loss: 2.900652, ppl: 18.185993 +epoch: 1, batch: 31252, sum loss: 5116.586426, avg loss: 2.714370, ppl: 15.095090 +epoch: 1, batch: 31253, sum loss: 5303.497070, avg loss: 2.863659, ppl: 17.525537 +epoch: 1, batch: 31254, sum loss: 4168.253418, avg loss: 2.473741, ppl: 11.866755 +epoch: 1, batch: 31255, sum loss: 4161.978516, avg loss: 2.510240, ppl: 12.307887 +epoch: 1, batch: 31256, sum loss: 4394.468750, avg loss: 2.902555, ppl: 18.220648 +epoch: 1, batch: 31257, sum loss: 4493.277344, avg loss: 2.540010, ppl: 12.679797 +epoch: 1, batch: 31258, sum loss: 4270.473145, avg loss: 2.395106, ppl: 10.969357 +epoch: 1, batch: 31259, sum loss: 4636.001465, avg loss: 2.772728, ppl: 16.002232 +epoch: 1, batch: 31260, sum loss: 4704.959473, avg loss: 2.622608, ppl: 13.771599 +epoch: 1, batch: 31261, sum loss: 4503.095215, avg loss: 2.419718, ppl: 11.242689 +epoch: 1, batch: 31262, sum loss: 4593.808594, avg loss: 2.570682, ppl: 13.074739 +epoch: 1, batch: 31263, sum loss: 3720.369629, avg loss: 2.354664, ppl: 10.534592 +epoch: 1, batch: 31264, sum loss: 4452.669434, avg loss: 2.505723, ppl: 12.252411 +epoch: 1, batch: 31265, sum loss: 3500.572266, avg loss: 2.296963, ppl: 9.943941 +epoch: 1, batch: 31266, sum loss: 4357.389648, avg loss: 2.809407, ppl: 16.600063 +epoch: 1, batch: 31267, sum loss: 4951.019531, avg loss: 2.633521, ppl: 13.922704 +epoch: 1, batch: 31268, sum loss: 5601.754883, avg loss: 3.127724, ppl: 22.821987 +epoch: 1, batch: 31269, sum loss: 4280.052734, avg loss: 2.495657, ppl: 12.129705 +epoch: 1, batch: 31270, sum loss: 3191.172852, avg loss: 2.264849, ppl: 9.629675 +epoch: 1, batch: 31271, sum loss: 5433.696777, avg loss: 2.948289, ppl: 19.073294 +epoch: 1, batch: 31272, sum loss: 4879.215820, avg loss: 2.688273, ppl: 14.706259 +epoch: 1, batch: 31273, sum loss: 3827.871094, avg loss: 2.535014, ppl: 12.616607 +epoch: 1, batch: 31274, sum loss: 4451.901855, avg loss: 2.789412, ppl: 16.271454 +epoch: 1, batch: 31275, sum loss: 3173.557861, avg loss: 2.324951, ppl: 10.226178 +epoch: 1, batch: 31276, sum loss: 4351.814453, avg loss: 2.564416, ppl: 12.993073 +epoch: 1, batch: 31277, sum loss: 4977.017578, avg loss: 2.631950, ppl: 13.900852 +epoch: 1, batch: 31278, sum loss: 3748.114746, avg loss: 2.502079, ppl: 12.207850 +epoch: 1, batch: 31279, sum loss: 4430.424805, avg loss: 2.673763, ppl: 14.494403 +epoch: 1, batch: 31280, sum loss: 5085.789062, avg loss: 2.665508, ppl: 14.375247 +epoch: 1, batch: 31281, sum loss: 4167.684082, avg loss: 2.629454, ppl: 13.866192 +epoch: 1, batch: 31282, sum loss: 4479.071777, avg loss: 2.568275, ppl: 13.043305 +epoch: 1, batch: 31283, sum loss: 4206.945312, avg loss: 2.703692, ppl: 14.934775 +epoch: 1, batch: 31284, sum loss: 4454.979004, avg loss: 2.798354, ppl: 16.417595 +epoch: 1, batch: 31285, sum loss: 5565.689453, avg loss: 2.920089, ppl: 18.542933 +epoch: 1, batch: 31286, sum loss: 3759.711670, avg loss: 2.388635, ppl: 10.898606 +epoch: 1, batch: 31287, sum loss: 3514.314697, avg loss: 2.461005, ppl: 11.716578 +epoch: 1, batch: 31288, sum loss: 4119.087402, avg loss: 2.547364, ppl: 12.773389 +epoch: 1, batch: 31289, sum loss: 5123.795898, avg loss: 2.660330, ppl: 14.301012 +epoch: 1, batch: 31290, sum loss: 5322.713867, avg loss: 2.672045, ppl: 14.469532 +epoch: 1, batch: 31291, sum loss: 4736.982910, avg loss: 2.656749, ppl: 14.249884 +epoch: 1, batch: 31292, sum loss: 4039.520508, avg loss: 2.519975, ppl: 12.428288 +epoch: 1, batch: 31293, sum loss: 4481.404785, avg loss: 2.614588, ppl: 13.661593 +epoch: 1, batch: 31294, sum loss: 4737.575684, avg loss: 2.838572, ppl: 17.091333 +epoch: 1, batch: 31295, sum loss: 5146.812500, avg loss: 2.782061, ppl: 16.152275 +epoch: 1, batch: 31296, sum loss: 4666.211426, avg loss: 2.769265, ppl: 15.946908 +epoch: 1, batch: 31297, sum loss: 4165.336426, avg loss: 2.654771, ppl: 14.221734 +epoch: 1, batch: 31298, sum loss: 4021.532715, avg loss: 2.452154, ppl: 11.613337 +epoch: 1, batch: 31299, sum loss: 4078.546387, avg loss: 2.541150, ppl: 12.694265 +epoch: 1, batch: 31300, sum loss: 3983.710449, avg loss: 2.526132, ppl: 12.505047 +epoch: 1, batch: 31301, sum loss: 5112.421875, avg loss: 2.977532, ppl: 19.639280 +epoch: 1, batch: 31302, sum loss: 4482.794434, avg loss: 2.570410, ppl: 13.071177 +epoch: 1, batch: 31303, sum loss: 4202.886719, avg loss: 2.584801, ppl: 13.260653 +epoch: 1, batch: 31304, sum loss: 5158.347168, avg loss: 2.737976, ppl: 15.455676 +epoch: 1, batch: 31305, sum loss: 4581.150391, avg loss: 2.599972, ppl: 13.463361 +epoch: 1, batch: 31306, sum loss: 3942.393066, avg loss: 2.411250, ppl: 11.147883 +epoch: 1, batch: 31307, sum loss: 5030.534668, avg loss: 2.978410, ppl: 19.656542 +epoch: 1, batch: 31308, sum loss: 4629.168945, avg loss: 2.437688, ppl: 11.446542 +epoch: 1, batch: 31309, sum loss: 4977.622559, avg loss: 2.584435, ppl: 13.255803 +epoch: 1, batch: 31310, sum loss: 5738.786133, avg loss: 2.981188, ppl: 19.711212 +epoch: 1, batch: 31311, sum loss: 3977.152832, avg loss: 2.470281, ppl: 11.825771 +epoch: 1, batch: 31312, sum loss: 4382.808105, avg loss: 2.610368, ppl: 13.604056 +epoch: 1, batch: 31313, sum loss: 4098.776855, avg loss: 2.513045, ppl: 12.342460 +epoch: 1, batch: 31314, sum loss: 3804.910400, avg loss: 2.606103, ppl: 13.546161 +epoch: 1, batch: 31315, sum loss: 5309.462891, avg loss: 2.597584, ppl: 13.431255 +epoch: 1, batch: 31316, sum loss: 4684.454590, avg loss: 2.603921, ppl: 13.516639 +epoch: 1, batch: 31317, sum loss: 4235.895020, avg loss: 2.544081, ppl: 12.731525 +epoch: 1, batch: 31318, sum loss: 4030.955811, avg loss: 2.517774, ppl: 12.400960 +epoch: 1, batch: 31319, sum loss: 4675.353516, avg loss: 2.799613, ppl: 16.438284 +epoch: 1, batch: 31320, sum loss: 4768.020996, avg loss: 2.715274, ppl: 15.108750 +epoch: 1, batch: 31321, sum loss: 4303.076172, avg loss: 2.288870, ppl: 9.863789 +epoch: 1, batch: 31322, sum loss: 4405.535156, avg loss: 2.724512, ppl: 15.248965 +epoch: 1, batch: 31323, sum loss: 4361.100098, avg loss: 2.611437, ppl: 13.618611 +epoch: 1, batch: 31324, sum loss: 3034.841553, avg loss: 2.221699, ppl: 9.222992 +epoch: 1, batch: 31325, sum loss: 5254.812012, avg loss: 2.866782, ppl: 17.580360 +epoch: 1, batch: 31326, sum loss: 4532.806152, avg loss: 2.724042, ppl: 15.241808 +epoch: 1, batch: 31327, sum loss: 4257.062500, avg loss: 2.576915, ppl: 13.156482 +epoch: 1, batch: 31328, sum loss: 3036.501465, avg loss: 2.366720, ppl: 10.662359 +epoch: 1, batch: 31329, sum loss: 4762.697266, avg loss: 2.783576, ppl: 16.176758 +epoch: 1, batch: 31330, sum loss: 4786.688477, avg loss: 2.692176, ppl: 14.763765 +epoch: 1, batch: 31331, sum loss: 4982.945801, avg loss: 2.534560, ppl: 12.610887 +epoch: 1, batch: 31332, sum loss: 4287.147461, avg loss: 2.715103, ppl: 15.106165 +epoch: 1, batch: 31333, sum loss: 4635.066895, avg loss: 2.884298, ppl: 17.891005 +epoch: 1, batch: 31334, sum loss: 5612.115234, avg loss: 3.007564, ppl: 20.238047 +epoch: 1, batch: 31335, sum loss: 4110.660156, avg loss: 2.395490, ppl: 10.973571 +epoch: 1, batch: 31336, sum loss: 4357.758789, avg loss: 2.583141, ppl: 13.238657 +epoch: 1, batch: 31337, sum loss: 4656.734375, avg loss: 2.776824, ppl: 16.067911 +epoch: 1, batch: 31338, sum loss: 5829.633789, avg loss: 2.829919, ppl: 16.944090 +epoch: 1, batch: 31339, sum loss: 4803.089844, avg loss: 2.725931, ppl: 15.270619 +epoch: 1, batch: 31340, sum loss: 4529.406738, avg loss: 2.462973, ppl: 11.739660 +epoch: 1, batch: 31341, sum loss: 3855.979980, avg loss: 2.512039, ppl: 12.330045 +epoch: 1, batch: 31342, sum loss: 3748.447754, avg loss: 2.520812, ppl: 12.438693 +epoch: 1, batch: 31343, sum loss: 4744.716797, avg loss: 2.776312, ppl: 16.059681 +epoch: 1, batch: 31344, sum loss: 4741.562988, avg loss: 2.734465, ppl: 15.401507 +epoch: 1, batch: 31345, sum loss: 5378.460938, avg loss: 2.996357, ppl: 20.012503 +epoch: 1, batch: 31346, sum loss: 4909.559570, avg loss: 2.693121, ppl: 14.777728 +epoch: 1, batch: 31347, sum loss: 4694.755371, avg loss: 2.734278, ppl: 15.398621 +epoch: 1, batch: 31348, sum loss: 4860.032227, avg loss: 2.706031, ppl: 14.969748 +epoch: 1, batch: 31349, sum loss: 4146.332520, avg loss: 2.537535, ppl: 12.648457 +epoch: 1, batch: 31350, sum loss: 5109.002441, avg loss: 2.770609, ppl: 15.968350 +epoch: 1, batch: 31351, sum loss: 4941.709961, avg loss: 2.833549, ppl: 17.005711 +epoch: 1, batch: 31352, sum loss: 4182.679688, avg loss: 2.731992, ppl: 15.363461 +epoch: 1, batch: 31353, sum loss: 4324.393555, avg loss: 2.536301, ppl: 12.632860 +epoch: 1, batch: 31354, sum loss: 4960.501953, avg loss: 2.864031, ppl: 17.532057 +epoch: 1, batch: 31355, sum loss: 3830.724609, avg loss: 2.794110, ppl: 16.348070 +epoch: 1, batch: 31356, sum loss: 4438.483887, avg loss: 2.464455, ppl: 11.757074 +epoch: 1, batch: 31357, sum loss: 4072.325928, avg loss: 2.545204, ppl: 12.745824 +epoch: 1, batch: 31358, sum loss: 4180.663086, avg loss: 2.426386, ppl: 11.317906 +epoch: 1, batch: 31359, sum loss: 4579.254883, avg loss: 2.894598, ppl: 18.076242 +epoch: 1, batch: 31360, sum loss: 4503.078125, avg loss: 2.712698, ppl: 15.069876 +epoch: 1, batch: 31361, sum loss: 4524.607422, avg loss: 2.860055, ppl: 17.462490 +epoch: 1, batch: 31362, sum loss: 4809.295898, avg loss: 2.799357, ppl: 16.434084 +epoch: 1, batch: 31363, sum loss: 4593.626465, avg loss: 2.802701, ppl: 16.489117 +epoch: 1, batch: 31364, sum loss: 4906.067383, avg loss: 2.839159, ppl: 17.101385 +epoch: 1, batch: 31365, sum loss: 5674.648438, avg loss: 2.781690, ppl: 16.146292 +epoch: 1, batch: 31366, sum loss: 4462.800781, avg loss: 2.714599, ppl: 15.098553 +epoch: 1, batch: 31367, sum loss: 4064.874512, avg loss: 2.651582, ppl: 14.176441 +epoch: 1, batch: 31368, sum loss: 3888.320312, avg loss: 2.568243, ppl: 13.042892 +epoch: 1, batch: 31369, sum loss: 4064.133789, avg loss: 2.497931, ppl: 12.157314 +epoch: 1, batch: 31370, sum loss: 4819.569824, avg loss: 2.703068, ppl: 14.925453 +epoch: 1, batch: 31371, sum loss: 4775.825684, avg loss: 2.618325, ppl: 13.712742 +epoch: 1, batch: 31372, sum loss: 3791.414307, avg loss: 2.446074, ppl: 11.542937 +epoch: 1, batch: 31373, sum loss: 4892.027344, avg loss: 2.870908, ppl: 17.653040 +epoch: 1, batch: 31374, sum loss: 3978.908203, avg loss: 2.508769, ppl: 12.289795 +epoch: 1, batch: 31375, sum loss: 4306.476562, avg loss: 2.614740, ppl: 13.663659 +epoch: 1, batch: 31376, sum loss: 4138.494629, avg loss: 2.602827, ppl: 13.501852 +epoch: 1, batch: 31377, sum loss: 3859.810547, avg loss: 2.526054, ppl: 12.504069 +epoch: 1, batch: 31378, sum loss: 5337.223633, avg loss: 2.714763, ppl: 15.101029 +epoch: 1, batch: 31379, sum loss: 4164.864258, avg loss: 2.586872, ppl: 13.288142 +epoch: 1, batch: 31380, sum loss: 4600.524414, avg loss: 2.634894, ppl: 13.941833 +epoch: 1, batch: 31381, sum loss: 4955.872559, avg loss: 2.630506, ppl: 13.880789 +epoch: 1, batch: 31382, sum loss: 4130.193359, avg loss: 2.602516, ppl: 13.497661 +epoch: 1, batch: 31383, sum loss: 4267.340332, avg loss: 2.657123, ppl: 14.255222 +epoch: 1, batch: 31384, sum loss: 4451.006348, avg loss: 2.624414, ppl: 13.796490 +epoch: 1, batch: 31385, sum loss: 3995.076172, avg loss: 2.324070, ppl: 10.217174 +epoch: 1, batch: 31386, sum loss: 4019.394287, avg loss: 2.777743, ppl: 16.082684 +epoch: 1, batch: 31387, sum loss: 5761.466797, avg loss: 2.909832, ppl: 18.353706 +epoch: 1, batch: 31388, sum loss: 3656.834473, avg loss: 2.608299, ppl: 13.575933 +epoch: 1, batch: 31389, sum loss: 5509.204102, avg loss: 2.657600, ppl: 14.262014 +epoch: 1, batch: 31390, sum loss: 3977.488525, avg loss: 2.422344, ppl: 11.272250 +epoch: 1, batch: 31391, sum loss: 3996.220947, avg loss: 2.538895, ppl: 12.665669 +epoch: 1, batch: 31392, sum loss: 4958.625977, avg loss: 2.835121, ppl: 17.032455 +epoch: 1, batch: 31393, sum loss: 3873.292969, avg loss: 2.502127, ppl: 12.208439 +epoch: 1, batch: 31394, sum loss: 4311.977539, avg loss: 2.605425, ppl: 13.536972 +epoch: 1, batch: 31395, sum loss: 4976.250488, avg loss: 2.648350, ppl: 14.130711 +epoch: 1, batch: 31396, sum loss: 4693.635254, avg loss: 2.812244, ppl: 16.647236 +epoch: 1, batch: 31397, sum loss: 3596.813965, avg loss: 2.252232, ppl: 9.508935 +epoch: 1, batch: 31398, sum loss: 4022.842285, avg loss: 2.600415, ppl: 13.469330 +epoch: 1, batch: 31399, sum loss: 4275.645508, avg loss: 2.477199, ppl: 11.907862 +epoch: 1, batch: 31400, sum loss: 4054.236084, avg loss: 2.602206, ppl: 13.493465 +epoch: 1, batch: 31401, sum loss: 4068.864014, avg loss: 2.514749, ppl: 12.363505 +epoch: 1, batch: 31402, sum loss: 4905.567383, avg loss: 2.734430, ppl: 15.400964 +epoch: 1, batch: 31403, sum loss: 4241.197266, avg loss: 2.592419, ppl: 13.362054 +epoch: 1, batch: 31404, sum loss: 5302.122070, avg loss: 2.891015, ppl: 18.011587 +epoch: 1, batch: 31405, sum loss: 3843.067627, avg loss: 2.315101, ppl: 10.125947 +epoch: 1, batch: 31406, sum loss: 5239.583496, avg loss: 2.760581, ppl: 15.809033 +epoch: 1, batch: 31407, sum loss: 3735.521729, avg loss: 2.443114, ppl: 11.508826 +epoch: 1, batch: 31408, sum loss: 4095.821777, avg loss: 2.652735, ppl: 14.192796 +epoch: 1, batch: 31409, sum loss: 4930.309082, avg loss: 2.682431, ppl: 14.620600 +epoch: 1, batch: 31410, sum loss: 4816.238770, avg loss: 2.563193, ppl: 12.977182 +epoch: 1, batch: 31411, sum loss: 5507.992676, avg loss: 2.934466, ppl: 18.811457 +epoch: 1, batch: 31412, sum loss: 4300.535156, avg loss: 2.677793, ppl: 14.552933 +epoch: 1, batch: 31413, sum loss: 4175.868164, avg loss: 2.434908, ppl: 11.414773 +epoch: 1, batch: 31414, sum loss: 5211.158203, avg loss: 2.798689, ppl: 16.423096 +epoch: 1, batch: 31415, sum loss: 5668.396973, avg loss: 2.917343, ppl: 18.492086 +epoch: 1, batch: 31416, sum loss: 3933.119873, avg loss: 2.632610, ppl: 13.910032 +epoch: 1, batch: 31417, sum loss: 4320.101562, avg loss: 2.640649, ppl: 14.022303 +epoch: 1, batch: 31418, sum loss: 4987.155762, avg loss: 2.541873, ppl: 12.703448 +epoch: 1, batch: 31419, sum loss: 4623.996094, avg loss: 2.590474, ppl: 13.336093 +epoch: 1, batch: 31420, sum loss: 4273.573242, avg loss: 2.641269, ppl: 14.030997 +epoch: 1, batch: 31421, sum loss: 4346.750000, avg loss: 2.393585, ppl: 10.952686 +epoch: 1, batch: 31422, sum loss: 4342.728027, avg loss: 2.741621, ppl: 15.512114 +epoch: 1, batch: 31423, sum loss: 4464.798340, avg loss: 2.618650, ppl: 13.717196 +epoch: 1, batch: 31424, sum loss: 4432.415527, avg loss: 2.714278, ppl: 15.093704 +epoch: 1, batch: 31425, sum loss: 4276.470215, avg loss: 2.703205, ppl: 14.927496 +epoch: 1, batch: 31426, sum loss: 4636.036621, avg loss: 2.622193, ppl: 13.765877 +epoch: 1, batch: 31427, sum loss: 4806.770996, avg loss: 2.759340, ppl: 15.789423 +epoch: 1, batch: 31428, sum loss: 4371.354492, avg loss: 2.583543, ppl: 13.243977 +epoch: 1, batch: 31429, sum loss: 5517.615234, avg loss: 3.073880, ppl: 21.625656 +epoch: 1, batch: 31430, sum loss: 4148.578613, avg loss: 2.444654, ppl: 11.526567 +epoch: 1, batch: 31431, sum loss: 3801.809570, avg loss: 2.356981, ppl: 10.559021 +epoch: 1, batch: 31432, sum loss: 4774.296387, avg loss: 2.848626, ppl: 17.264038 +epoch: 1, batch: 31433, sum loss: 3803.713379, avg loss: 2.430488, ppl: 11.364423 +epoch: 1, batch: 31434, sum loss: 5280.945312, avg loss: 2.819512, ppl: 16.768660 +epoch: 1, batch: 31435, sum loss: 4598.404785, avg loss: 2.682850, ppl: 14.626719 +epoch: 1, batch: 31436, sum loss: 4305.975586, avg loss: 2.494771, ppl: 12.118964 +epoch: 1, batch: 31437, sum loss: 4117.837402, avg loss: 2.498688, ppl: 12.166518 +epoch: 1, batch: 31438, sum loss: 4935.921875, avg loss: 2.763674, ppl: 15.858003 +epoch: 1, batch: 31439, sum loss: 3831.921387, avg loss: 2.414569, ppl: 11.184953 +epoch: 1, batch: 31440, sum loss: 3822.096924, avg loss: 2.612507, ppl: 13.633182 +epoch: 1, batch: 31441, sum loss: 3338.079346, avg loss: 2.364079, ppl: 10.634237 +epoch: 1, batch: 31442, sum loss: 4279.813965, avg loss: 2.456839, ppl: 11.667872 +epoch: 1, batch: 31443, sum loss: 3844.035400, avg loss: 2.671324, ppl: 14.459101 +epoch: 1, batch: 31444, sum loss: 4247.033203, avg loss: 2.541612, ppl: 12.700126 +epoch: 1, batch: 31445, sum loss: 5023.573730, avg loss: 2.809605, ppl: 16.603361 +epoch: 1, batch: 31446, sum loss: 3962.711914, avg loss: 2.495411, ppl: 12.126713 +epoch: 1, batch: 31447, sum loss: 4404.810547, avg loss: 2.530046, ppl: 12.554087 +epoch: 1, batch: 31448, sum loss: 4114.840820, avg loss: 2.463977, ppl: 11.751450 +epoch: 1, batch: 31449, sum loss: 3740.416504, avg loss: 2.460800, ppl: 11.714184 +epoch: 1, batch: 31450, sum loss: 3902.068604, avg loss: 2.370637, ppl: 10.704210 +epoch: 1, batch: 31451, sum loss: 5471.454590, avg loss: 2.691320, ppl: 14.751141 +epoch: 1, batch: 31452, sum loss: 4305.398438, avg loss: 2.751053, ppl: 15.659118 +epoch: 1, batch: 31453, sum loss: 4811.526367, avg loss: 2.743173, ppl: 15.536209 +epoch: 1, batch: 31454, sum loss: 4111.805664, avg loss: 2.726661, ppl: 15.281783 +epoch: 1, batch: 31455, sum loss: 4071.359131, avg loss: 2.496235, ppl: 12.136712 +epoch: 1, batch: 31456, sum loss: 4792.893066, avg loss: 2.760883, ppl: 15.813802 +epoch: 1, batch: 31457, sum loss: 5542.927734, avg loss: 2.800873, ppl: 16.459009 +epoch: 1, batch: 31458, sum loss: 5147.783203, avg loss: 2.892013, ppl: 18.029568 +epoch: 1, batch: 31459, sum loss: 4793.061523, avg loss: 2.613447, ppl: 13.646004 +epoch: 1, batch: 31460, sum loss: 4235.736328, avg loss: 2.455499, ppl: 11.652251 +epoch: 1, batch: 31461, sum loss: 4431.675781, avg loss: 2.515140, ppl: 12.368335 +epoch: 1, batch: 31462, sum loss: 4301.600586, avg loss: 2.586651, ppl: 13.285206 +epoch: 1, batch: 31463, sum loss: 4739.731445, avg loss: 2.471184, ppl: 11.836456 +epoch: 1, batch: 31464, sum loss: 3771.935547, avg loss: 2.367819, ppl: 10.674088 +epoch: 1, batch: 31465, sum loss: 5308.165039, avg loss: 3.033237, ppl: 20.764343 +epoch: 1, batch: 31466, sum loss: 4112.229492, avg loss: 2.416116, ppl: 11.202265 +epoch: 1, batch: 31467, sum loss: 4120.940918, avg loss: 2.469108, ppl: 11.811904 +epoch: 1, batch: 31468, sum loss: 4481.386719, avg loss: 2.401600, ppl: 11.040832 +epoch: 1, batch: 31469, sum loss: 4987.731445, avg loss: 2.685908, ppl: 14.671518 +epoch: 1, batch: 31470, sum loss: 4629.298340, avg loss: 2.640786, ppl: 14.024228 +epoch: 1, batch: 31471, sum loss: 5137.433105, avg loss: 2.685537, ppl: 14.666080 +epoch: 1, batch: 31472, sum loss: 4621.739258, avg loss: 2.630472, ppl: 13.880319 +epoch: 1, batch: 31473, sum loss: 4618.853516, avg loss: 2.593405, ppl: 13.375231 +epoch: 1, batch: 31474, sum loss: 4330.740234, avg loss: 2.480378, ppl: 11.945781 +epoch: 1, batch: 31475, sum loss: 3788.212646, avg loss: 2.326912, ppl: 10.246251 +epoch: 1, batch: 31476, sum loss: 4290.662109, avg loss: 2.563120, ppl: 12.976235 +epoch: 1, batch: 31477, sum loss: 2986.034912, avg loss: 2.093994, ppl: 8.117269 +epoch: 1, batch: 31478, sum loss: 4889.926758, avg loss: 2.854598, ppl: 17.367458 +epoch: 1, batch: 31479, sum loss: 4329.529785, avg loss: 2.633534, ppl: 13.922886 +epoch: 1, batch: 31480, sum loss: 5378.086914, avg loss: 2.838041, ppl: 17.082262 +epoch: 1, batch: 31481, sum loss: 4676.006836, avg loss: 2.744136, ppl: 15.551166 +epoch: 1, batch: 31482, sum loss: 4671.983398, avg loss: 2.674289, ppl: 14.502039 +epoch: 1, batch: 31483, sum loss: 4359.045898, avg loss: 2.651488, ppl: 14.175113 +epoch: 1, batch: 31484, sum loss: 4645.643066, avg loss: 2.665314, ppl: 14.372468 +epoch: 1, batch: 31485, sum loss: 4432.695312, avg loss: 2.671908, ppl: 14.467546 +epoch: 1, batch: 31486, sum loss: 5496.491211, avg loss: 2.843503, ppl: 17.175827 +epoch: 1, batch: 31487, sum loss: 3407.945068, avg loss: 2.386516, ppl: 10.875539 +epoch: 1, batch: 31488, sum loss: 5240.353516, avg loss: 2.823466, ppl: 16.835106 +epoch: 1, batch: 31489, sum loss: 4903.251953, avg loss: 2.624867, ppl: 13.802741 +epoch: 1, batch: 31490, sum loss: 4654.455566, avg loss: 2.775465, ppl: 16.046095 +epoch: 1, batch: 31491, sum loss: 4542.287598, avg loss: 2.906134, ppl: 18.285971 +epoch: 1, batch: 31492, sum loss: 4315.822754, avg loss: 2.654258, ppl: 14.214429 +epoch: 1, batch: 31493, sum loss: 4828.871094, avg loss: 2.604569, ppl: 13.525394 +epoch: 1, batch: 31494, sum loss: 4191.311523, avg loss: 2.443913, ppl: 11.518025 +epoch: 1, batch: 31495, sum loss: 4162.885254, avg loss: 2.654901, ppl: 14.223581 +epoch: 1, batch: 31496, sum loss: 3439.988281, avg loss: 2.367507, ppl: 10.670762 +epoch: 1, batch: 31497, sum loss: 5388.906250, avg loss: 2.640326, ppl: 14.017776 +epoch: 1, batch: 31498, sum loss: 4716.445312, avg loss: 2.685903, ppl: 14.671441 +epoch: 1, batch: 31499, sum loss: 4946.123535, avg loss: 2.675026, ppl: 14.512730 +epoch: 1, batch: 31500, sum loss: 3174.135742, avg loss: 2.163692, ppl: 8.703206 +epoch: 1, batch: 31501, sum loss: 4065.789795, avg loss: 2.384628, ppl: 10.855020 +epoch: 1, batch: 31502, sum loss: 4616.398926, avg loss: 2.808028, ppl: 16.577204 +epoch: 1, batch: 31503, sum loss: 3919.809082, avg loss: 2.648520, ppl: 14.133102 +epoch: 1, batch: 31504, sum loss: 3257.765869, avg loss: 2.390144, ppl: 10.915062 +epoch: 1, batch: 31505, sum loss: 3661.292236, avg loss: 2.340980, ppl: 10.391411 +epoch: 1, batch: 31506, sum loss: 4252.548340, avg loss: 2.705184, ppl: 14.957061 +epoch: 1, batch: 31507, sum loss: 3619.482910, avg loss: 2.308344, ppl: 10.057752 +epoch: 1, batch: 31508, sum loss: 4591.619629, avg loss: 2.601484, ppl: 13.483737 +epoch: 1, batch: 31509, sum loss: 3892.185791, avg loss: 2.457188, ppl: 11.671943 +epoch: 1, batch: 31510, sum loss: 5582.940430, avg loss: 2.845535, ppl: 17.210770 +epoch: 1, batch: 31511, sum loss: 4170.353027, avg loss: 2.636127, ppl: 13.959035 +epoch: 1, batch: 31512, sum loss: 3987.631348, avg loss: 2.466068, ppl: 11.776050 +epoch: 1, batch: 31513, sum loss: 4466.526367, avg loss: 2.668176, ppl: 14.413653 +epoch: 1, batch: 31514, sum loss: 4095.904297, avg loss: 2.470389, ppl: 11.827043 +epoch: 1, batch: 31515, sum loss: 3824.607178, avg loss: 2.737729, ppl: 15.451852 +epoch: 1, batch: 31516, sum loss: 3783.548340, avg loss: 2.423798, ppl: 11.288651 +epoch: 1, batch: 31517, sum loss: 5121.467773, avg loss: 2.572309, ppl: 13.096031 +epoch: 1, batch: 31518, sum loss: 4330.457031, avg loss: 2.556350, ppl: 12.888687 +epoch: 1, batch: 31519, sum loss: 4025.542236, avg loss: 2.444166, ppl: 11.520942 +epoch: 1, batch: 31520, sum loss: 4296.877441, avg loss: 2.626453, ppl: 13.824653 +epoch: 1, batch: 31521, sum loss: 4092.683105, avg loss: 2.269930, ppl: 9.678720 +epoch: 1, batch: 31522, sum loss: 4797.560547, avg loss: 2.805591, ppl: 16.536844 +epoch: 1, batch: 31523, sum loss: 4738.432129, avg loss: 2.626625, ppl: 13.827029 +epoch: 1, batch: 31524, sum loss: 5081.602539, avg loss: 2.692953, ppl: 14.775245 +epoch: 1, batch: 31525, sum loss: 4194.060547, avg loss: 2.585734, ppl: 13.273026 +epoch: 1, batch: 31526, sum loss: 4327.274902, avg loss: 2.555980, ppl: 12.883925 +epoch: 1, batch: 31527, sum loss: 5299.403809, avg loss: 2.847611, ppl: 17.246531 +epoch: 1, batch: 31528, sum loss: 4085.997314, avg loss: 2.592638, ppl: 13.364979 +epoch: 1, batch: 31529, sum loss: 4566.690430, avg loss: 2.476513, ppl: 11.899702 +epoch: 1, batch: 31530, sum loss: 3838.209229, avg loss: 2.269787, ppl: 9.677338 +epoch: 1, batch: 31531, sum loss: 4584.954102, avg loss: 2.654866, ppl: 14.223086 +epoch: 1, batch: 31532, sum loss: 5191.699707, avg loss: 2.647476, ppl: 14.118355 +epoch: 1, batch: 31533, sum loss: 4473.118164, avg loss: 2.563391, ppl: 12.979763 +epoch: 1, batch: 31534, sum loss: 4244.431152, avg loss: 2.871740, ppl: 17.667730 +epoch: 1, batch: 31535, sum loss: 5240.813965, avg loss: 2.837474, ppl: 17.072584 +epoch: 1, batch: 31536, sum loss: 4167.899902, avg loss: 2.577551, ppl: 13.164860 +epoch: 1, batch: 31537, sum loss: 4796.089844, avg loss: 2.826217, ppl: 16.881475 +epoch: 1, batch: 31538, sum loss: 4114.454102, avg loss: 2.748466, ppl: 15.618658 +epoch: 1, batch: 31539, sum loss: 4786.197754, avg loss: 2.731848, ppl: 15.361252 +epoch: 1, batch: 31540, sum loss: 4210.058594, avg loss: 2.651170, ppl: 14.170615 +epoch: 1, batch: 31541, sum loss: 4262.915039, avg loss: 2.722168, ppl: 15.213268 +epoch: 1, batch: 31542, sum loss: 4313.331055, avg loss: 2.646215, ppl: 14.100573 +epoch: 1, batch: 31543, sum loss: 5146.132324, avg loss: 2.632293, ppl: 13.905616 +epoch: 1, batch: 31544, sum loss: 4047.158447, avg loss: 2.496705, ppl: 12.142413 +epoch: 1, batch: 31545, sum loss: 3757.519043, avg loss: 2.381191, ppl: 10.817777 +epoch: 1, batch: 31546, sum loss: 3938.678467, avg loss: 2.388525, ppl: 10.897413 +epoch: 1, batch: 31547, sum loss: 5034.202637, avg loss: 2.792126, ppl: 16.315664 +epoch: 1, batch: 31548, sum loss: 4471.288086, avg loss: 2.577111, ppl: 13.159070 +epoch: 1, batch: 31549, sum loss: 3656.406250, avg loss: 2.494138, ppl: 12.111289 +epoch: 1, batch: 31550, sum loss: 4265.770508, avg loss: 2.677822, ppl: 14.553363 +epoch: 1, batch: 31551, sum loss: 4351.438965, avg loss: 2.398809, ppl: 11.010053 +epoch: 1, batch: 31552, sum loss: 5081.259766, avg loss: 2.923625, ppl: 18.608618 +epoch: 1, batch: 31553, sum loss: 4057.321045, avg loss: 2.450073, ppl: 11.589192 +epoch: 1, batch: 31554, sum loss: 5102.423828, avg loss: 2.755088, ppl: 15.722429 +epoch: 1, batch: 31555, sum loss: 3927.412354, avg loss: 2.695547, ppl: 14.813622 +epoch: 1, batch: 31556, sum loss: 4955.449219, avg loss: 2.704940, ppl: 14.953414 +epoch: 1, batch: 31557, sum loss: 3191.015381, avg loss: 2.325813, ppl: 10.234999 +epoch: 1, batch: 31558, sum loss: 4449.179688, avg loss: 2.839298, ppl: 17.103754 +epoch: 1, batch: 31559, sum loss: 3741.263428, avg loss: 2.519369, ppl: 12.420758 +epoch: 1, batch: 31560, sum loss: 4275.353027, avg loss: 2.487117, ppl: 12.026548 +epoch: 1, batch: 31561, sum loss: 4567.050293, avg loss: 2.544318, ppl: 12.734536 +epoch: 1, batch: 31562, sum loss: 4801.412109, avg loss: 2.878545, ppl: 17.788364 +epoch: 1, batch: 31563, sum loss: 4472.814453, avg loss: 2.710797, ppl: 15.041252 +epoch: 1, batch: 31564, sum loss: 4216.432617, avg loss: 2.493455, ppl: 12.103022 +epoch: 1, batch: 31565, sum loss: 4268.605957, avg loss: 2.634942, ppl: 13.942505 +epoch: 1, batch: 31566, sum loss: 4370.179688, avg loss: 2.750270, ppl: 15.646862 +epoch: 1, batch: 31567, sum loss: 4404.995117, avg loss: 2.739425, ppl: 15.478078 +epoch: 1, batch: 31568, sum loss: 4714.431641, avg loss: 2.449055, ppl: 11.577403 +epoch: 1, batch: 31569, sum loss: 3402.561035, avg loss: 2.328926, ppl: 10.266910 +epoch: 1, batch: 31570, sum loss: 3352.852295, avg loss: 2.288636, ppl: 9.861482 +epoch: 1, batch: 31571, sum loss: 3858.904541, avg loss: 2.451655, ppl: 11.607537 +epoch: 1, batch: 31572, sum loss: 3977.489746, avg loss: 2.452213, ppl: 11.614020 +epoch: 1, batch: 31573, sum loss: 5192.083008, avg loss: 2.902226, ppl: 18.214655 +epoch: 1, batch: 31574, sum loss: 5081.898926, avg loss: 2.801488, ppl: 16.469133 +epoch: 1, batch: 31575, sum loss: 4201.698242, avg loss: 2.601671, ppl: 13.486251 +epoch: 1, batch: 31576, sum loss: 4185.161133, avg loss: 2.527271, ppl: 12.519298 +epoch: 1, batch: 31577, sum loss: 4530.747070, avg loss: 2.643376, ppl: 14.060597 +epoch: 1, batch: 31578, sum loss: 5016.895020, avg loss: 2.608890, ppl: 13.583959 +epoch: 1, batch: 31579, sum loss: 3979.138672, avg loss: 2.600744, ppl: 13.473763 +epoch: 1, batch: 31580, sum loss: 4340.688965, avg loss: 2.487501, ppl: 12.031169 +epoch: 1, batch: 31581, sum loss: 4783.497559, avg loss: 2.728750, ppl: 15.313725 +epoch: 1, batch: 31582, sum loss: 3798.104980, avg loss: 2.680385, ppl: 14.590704 +epoch: 1, batch: 31583, sum loss: 3913.604980, avg loss: 2.595229, ppl: 13.399652 +epoch: 1, batch: 31584, sum loss: 4685.850586, avg loss: 2.665444, ppl: 14.374329 +epoch: 1, batch: 31585, sum loss: 3505.859375, avg loss: 2.308005, ppl: 10.054345 +epoch: 1, batch: 31586, sum loss: 3897.157471, avg loss: 2.321118, ppl: 10.187058 +epoch: 1, batch: 31587, sum loss: 4042.102051, avg loss: 2.628155, ppl: 13.848193 +epoch: 1, batch: 31588, sum loss: 4956.855957, avg loss: 2.764560, ppl: 15.872054 +epoch: 1, batch: 31589, sum loss: 4564.702148, avg loss: 2.590637, ppl: 13.338265 +epoch: 1, batch: 31590, sum loss: 3980.103760, avg loss: 2.574453, ppl: 13.124131 +epoch: 1, batch: 31591, sum loss: 5470.406250, avg loss: 2.947417, ppl: 19.056671 +epoch: 1, batch: 31592, sum loss: 4875.046387, avg loss: 2.877831, ppl: 17.775681 +epoch: 1, batch: 31593, sum loss: 3729.588379, avg loss: 2.249450, ppl: 9.482521 +epoch: 1, batch: 31594, sum loss: 4210.081055, avg loss: 2.690148, ppl: 14.733851 +epoch: 1, batch: 31595, sum loss: 4014.578613, avg loss: 2.714387, ppl: 15.095357 +epoch: 1, batch: 31596, sum loss: 5367.673828, avg loss: 2.817677, ppl: 16.737915 +epoch: 1, batch: 31597, sum loss: 4499.504395, avg loss: 2.686271, ppl: 14.676850 +epoch: 1, batch: 31598, sum loss: 3984.921875, avg loss: 2.733143, ppl: 15.381148 +epoch: 1, batch: 31599, sum loss: 5412.001465, avg loss: 2.843931, ppl: 17.183187 +epoch: 1, batch: 31600, sum loss: 4735.412109, avg loss: 2.800362, ppl: 16.450603 +epoch: 1, batch: 31601, sum loss: 4447.669434, avg loss: 2.731984, ppl: 15.363333 +epoch: 1, batch: 31602, sum loss: 4946.530273, avg loss: 2.485694, ppl: 12.009448 +epoch: 1, batch: 31603, sum loss: 4565.121094, avg loss: 2.693287, ppl: 14.780173 +epoch: 1, batch: 31604, sum loss: 4288.623047, avg loss: 2.650570, ppl: 14.162114 +epoch: 1, batch: 31605, sum loss: 4004.233398, avg loss: 2.428280, ppl: 11.339360 +epoch: 1, batch: 31606, sum loss: 4500.561035, avg loss: 2.536957, ppl: 12.641140 +epoch: 1, batch: 31607, sum loss: 4604.604980, avg loss: 2.479593, ppl: 11.936409 +epoch: 1, batch: 31608, sum loss: 4267.413086, avg loss: 2.753170, ppl: 15.692294 +epoch: 1, batch: 31609, sum loss: 3708.060791, avg loss: 2.339470, ppl: 10.375740 +epoch: 1, batch: 31610, sum loss: 4844.728027, avg loss: 2.861623, ppl: 17.489895 +epoch: 1, batch: 31611, sum loss: 4666.296387, avg loss: 2.570962, ppl: 13.078403 +epoch: 1, batch: 31612, sum loss: 4564.564453, avg loss: 2.660003, ppl: 14.296328 +epoch: 1, batch: 31613, sum loss: 4071.411621, avg loss: 2.365724, ppl: 10.651752 +epoch: 1, batch: 31614, sum loss: 3665.488770, avg loss: 2.640842, ppl: 14.025007 +epoch: 1, batch: 31615, sum loss: 3982.187012, avg loss: 2.640707, ppl: 14.023121 +epoch: 1, batch: 31616, sum loss: 4146.974609, avg loss: 2.377852, ppl: 10.781721 +epoch: 1, batch: 31617, sum loss: 3734.963379, avg loss: 2.357931, ppl: 10.569065 +epoch: 1, batch: 31618, sum loss: 4191.430176, avg loss: 2.571429, ppl: 13.084516 +epoch: 1, batch: 31619, sum loss: 5413.933594, avg loss: 2.942355, ppl: 18.960449 +epoch: 1, batch: 31620, sum loss: 4136.085449, avg loss: 2.691012, ppl: 14.746594 +epoch: 1, batch: 31621, sum loss: 4280.053711, avg loss: 2.546135, ppl: 12.757706 +epoch: 1, batch: 31622, sum loss: 4100.787598, avg loss: 2.413648, ppl: 11.174651 +epoch: 1, batch: 31623, sum loss: 4093.432129, avg loss: 2.602309, ppl: 13.494862 +epoch: 1, batch: 31624, sum loss: 3882.144531, avg loss: 2.338641, ppl: 10.367143 +epoch: 1, batch: 31625, sum loss: 4346.544434, avg loss: 2.768500, ppl: 15.934708 +epoch: 1, batch: 31626, sum loss: 4599.503906, avg loss: 2.769117, ppl: 15.944554 +epoch: 1, batch: 31627, sum loss: 5745.240723, avg loss: 2.783547, ppl: 16.176296 +epoch: 1, batch: 31628, sum loss: 4872.398926, avg loss: 2.831144, ppl: 16.964859 +epoch: 1, batch: 31629, sum loss: 4021.258545, avg loss: 2.750519, ppl: 15.650753 +epoch: 1, batch: 31630, sum loss: 4164.092285, avg loss: 2.446588, ppl: 11.548869 +epoch: 1, batch: 31631, sum loss: 4181.566895, avg loss: 2.431144, ppl: 11.371879 +epoch: 1, batch: 31632, sum loss: 4247.361328, avg loss: 2.647981, ppl: 14.125489 +epoch: 1, batch: 31633, sum loss: 3490.114502, avg loss: 2.356593, ppl: 10.554931 +epoch: 1, batch: 31634, sum loss: 4031.818604, avg loss: 2.292109, ppl: 9.895782 +epoch: 1, batch: 31635, sum loss: 4905.507324, avg loss: 2.605155, ppl: 13.533326 +epoch: 1, batch: 31636, sum loss: 3806.075684, avg loss: 2.471478, ppl: 11.839931 +epoch: 1, batch: 31637, sum loss: 4243.076660, avg loss: 2.757035, ppl: 15.753062 +epoch: 1, batch: 31638, sum loss: 3852.540039, avg loss: 2.217927, ppl: 9.188268 +epoch: 1, batch: 31639, sum loss: 4263.539551, avg loss: 2.534803, ppl: 12.613951 +epoch: 1, batch: 31640, sum loss: 4356.614258, avg loss: 2.640372, ppl: 14.018421 +epoch: 1, batch: 31641, sum loss: 4469.698242, avg loss: 2.594137, ppl: 13.385034 +epoch: 1, batch: 31642, sum loss: 4009.460938, avg loss: 2.373867, ppl: 10.738837 +epoch: 1, batch: 31643, sum loss: 5124.810547, avg loss: 2.618707, ppl: 13.717980 +epoch: 1, batch: 31644, sum loss: 3718.568848, avg loss: 2.446427, ppl: 11.547014 +epoch: 1, batch: 31645, sum loss: 4803.716797, avg loss: 2.781538, ppl: 16.143839 +epoch: 1, batch: 31646, sum loss: 4948.090820, avg loss: 2.708315, ppl: 15.003967 +epoch: 1, batch: 31647, sum loss: 3855.521973, avg loss: 2.592819, ppl: 13.367401 +epoch: 1, batch: 31648, sum loss: 5900.219238, avg loss: 3.024203, ppl: 20.577595 +epoch: 1, batch: 31649, sum loss: 4924.999023, avg loss: 2.790368, ppl: 16.287010 +epoch: 1, batch: 31650, sum loss: 5431.561523, avg loss: 2.892205, ppl: 18.033033 +epoch: 1, batch: 31651, sum loss: 4364.901367, avg loss: 2.719565, ppl: 15.173716 +epoch: 1, batch: 31652, sum loss: 3635.901123, avg loss: 2.249939, ppl: 9.487155 +epoch: 1, batch: 31653, sum loss: 4688.094238, avg loss: 2.632282, ppl: 13.905466 +epoch: 1, batch: 31654, sum loss: 4147.511230, avg loss: 2.375436, ppl: 10.755702 +epoch: 1, batch: 31655, sum loss: 4054.000244, avg loss: 2.620556, ppl: 13.743364 +epoch: 1, batch: 31656, sum loss: 5113.516602, avg loss: 2.786658, ppl: 16.226692 +epoch: 1, batch: 31657, sum loss: 4360.210449, avg loss: 2.510196, ppl: 12.307342 +epoch: 1, batch: 31658, sum loss: 4594.420410, avg loss: 2.556717, ppl: 12.893420 +epoch: 1, batch: 31659, sum loss: 4757.076172, avg loss: 2.809850, ppl: 16.607431 +epoch: 1, batch: 31660, sum loss: 6535.906738, avg loss: 2.769452, ppl: 15.949889 +epoch: 1, batch: 31661, sum loss: 4421.680664, avg loss: 2.768742, ppl: 15.938568 +epoch: 1, batch: 31662, sum loss: 4540.617188, avg loss: 2.507243, ppl: 12.271054 +epoch: 1, batch: 31663, sum loss: 5386.996094, avg loss: 2.678765, ppl: 14.567093 +epoch: 1, batch: 31664, sum loss: 4491.004883, avg loss: 2.601973, ppl: 13.490322 +epoch: 1, batch: 31665, sum loss: 3822.480713, avg loss: 2.536484, ppl: 12.635161 +epoch: 1, batch: 31666, sum loss: 4913.219238, avg loss: 2.945575, ppl: 19.021601 +epoch: 1, batch: 31667, sum loss: 4296.225098, avg loss: 2.622848, ppl: 13.774896 +epoch: 1, batch: 31668, sum loss: 4109.290039, avg loss: 2.476968, ppl: 11.905114 +epoch: 1, batch: 31669, sum loss: 4019.541016, avg loss: 2.679694, ppl: 14.580630 +epoch: 1, batch: 31670, sum loss: 4895.983398, avg loss: 2.586362, ppl: 13.281364 +epoch: 1, batch: 31671, sum loss: 3639.280273, avg loss: 2.369323, ppl: 10.690155 +epoch: 1, batch: 31672, sum loss: 3962.631836, avg loss: 2.435545, ppl: 11.422042 +epoch: 1, batch: 31673, sum loss: 4458.298828, avg loss: 2.636487, ppl: 13.964058 +epoch: 1, batch: 31674, sum loss: 4381.474121, avg loss: 2.689671, ppl: 14.726830 +epoch: 1, batch: 31675, sum loss: 3850.830811, avg loss: 2.423430, ppl: 11.284504 +epoch: 1, batch: 31676, sum loss: 4542.869629, avg loss: 2.508487, ppl: 12.286326 +epoch: 1, batch: 31677, sum loss: 3555.501709, avg loss: 2.436944, ppl: 11.438035 +epoch: 1, batch: 31678, sum loss: 5376.583496, avg loss: 2.980368, ppl: 19.695057 +epoch: 1, batch: 31679, sum loss: 5312.040039, avg loss: 2.904341, ppl: 18.253220 +epoch: 1, batch: 31680, sum loss: 4804.323242, avg loss: 2.824411, ppl: 16.851015 +epoch: 1, batch: 31681, sum loss: 5354.870117, avg loss: 2.837769, ppl: 17.077623 +epoch: 1, batch: 31682, sum loss: 4795.885742, avg loss: 2.701907, ppl: 14.908140 +epoch: 1, batch: 31683, sum loss: 4919.850098, avg loss: 2.782721, ppl: 16.162937 +epoch: 1, batch: 31684, sum loss: 4281.818359, avg loss: 2.774996, ppl: 16.038559 +epoch: 1, batch: 31685, sum loss: 4165.825684, avg loss: 2.611803, ppl: 13.623592 +epoch: 1, batch: 31686, sum loss: 4692.097168, avg loss: 2.634530, ppl: 13.936755 +epoch: 1, batch: 31687, sum loss: 4901.492188, avg loss: 2.816950, ppl: 16.725752 +epoch: 1, batch: 31688, sum loss: 3575.223145, avg loss: 2.438761, ppl: 11.458832 +epoch: 1, batch: 31689, sum loss: 4652.076172, avg loss: 2.664419, ppl: 14.359611 +epoch: 1, batch: 31690, sum loss: 4009.502930, avg loss: 2.468906, ppl: 11.809517 +epoch: 1, batch: 31691, sum loss: 3717.994385, avg loss: 2.392532, ppl: 10.941160 +epoch: 1, batch: 31692, sum loss: 4563.349609, avg loss: 2.503209, ppl: 12.221649 +epoch: 1, batch: 31693, sum loss: 4512.658691, avg loss: 2.483577, ppl: 11.984049 +epoch: 1, batch: 31694, sum loss: 4232.035156, avg loss: 2.636782, ppl: 13.968181 +epoch: 1, batch: 31695, sum loss: 4471.420410, avg loss: 2.685538, ppl: 14.666087 +epoch: 1, batch: 31696, sum loss: 4684.912109, avg loss: 2.637901, ppl: 13.983822 +epoch: 1, batch: 31697, sum loss: 5149.789062, avg loss: 2.843616, ppl: 17.177776 +epoch: 1, batch: 31698, sum loss: 4387.624512, avg loss: 2.681922, ppl: 14.613155 +epoch: 1, batch: 31699, sum loss: 5019.433594, avg loss: 2.808861, ppl: 16.591003 +epoch: 1, batch: 31700, sum loss: 4645.053223, avg loss: 2.657353, ppl: 14.258499 +epoch: 1, batch: 31701, sum loss: 5202.698242, avg loss: 2.898439, ppl: 18.145802 +epoch: 1, batch: 31702, sum loss: 3824.925049, avg loss: 2.488565, ppl: 12.043986 +epoch: 1, batch: 31703, sum loss: 4180.664551, avg loss: 2.575887, ppl: 13.142969 +epoch: 1, batch: 31704, sum loss: 4800.916016, avg loss: 2.557760, ppl: 12.906877 +epoch: 1, batch: 31705, sum loss: 3591.845947, avg loss: 2.487428, ppl: 12.030293 +epoch: 1, batch: 31706, sum loss: 5057.203613, avg loss: 2.790951, ppl: 16.296515 +epoch: 1, batch: 31707, sum loss: 4194.819824, avg loss: 2.454546, ppl: 11.641152 +epoch: 1, batch: 31708, sum loss: 3770.174805, avg loss: 2.518487, ppl: 12.409803 +epoch: 1, batch: 31709, sum loss: 4596.684082, avg loss: 2.514597, ppl: 12.361631 +epoch: 1, batch: 31710, sum loss: 4251.040039, avg loss: 2.458670, ppl: 11.689253 +epoch: 1, batch: 31711, sum loss: 4867.284180, avg loss: 2.790874, ppl: 16.295252 +epoch: 1, batch: 31712, sum loss: 3973.942627, avg loss: 2.344509, ppl: 10.428150 +epoch: 1, batch: 31713, sum loss: 5235.674805, avg loss: 2.819426, ppl: 16.767229 +epoch: 1, batch: 31714, sum loss: 4388.314453, avg loss: 2.677434, ppl: 14.547715 +epoch: 1, batch: 31715, sum loss: 4922.525879, avg loss: 2.774817, ppl: 16.035696 +epoch: 1, batch: 31716, sum loss: 3684.430176, avg loss: 2.398718, ppl: 11.009049 +epoch: 1, batch: 31717, sum loss: 4334.366699, avg loss: 2.567753, ppl: 13.036496 +epoch: 1, batch: 31718, sum loss: 4419.833008, avg loss: 2.488645, ppl: 12.044942 +epoch: 1, batch: 31719, sum loss: 4007.878418, avg loss: 2.773618, ppl: 16.016481 +epoch: 1, batch: 31720, sum loss: 4618.438477, avg loss: 2.610762, ppl: 13.609422 +epoch: 1, batch: 31721, sum loss: 4608.983398, avg loss: 2.580618, ppl: 13.205296 +epoch: 1, batch: 31722, sum loss: 4771.026367, avg loss: 2.515038, ppl: 12.367073 +epoch: 1, batch: 31723, sum loss: 3959.738281, avg loss: 2.402754, ppl: 11.053575 +epoch: 1, batch: 31724, sum loss: 5083.110840, avg loss: 2.738745, ppl: 15.467561 +epoch: 1, batch: 31725, sum loss: 4460.231445, avg loss: 2.849988, ppl: 17.287575 +epoch: 1, batch: 31726, sum loss: 4407.598145, avg loss: 2.515753, ppl: 12.375918 +epoch: 1, batch: 31727, sum loss: 4499.477539, avg loss: 2.654559, ppl: 14.218713 +epoch: 1, batch: 31728, sum loss: 3877.131836, avg loss: 2.356919, ppl: 10.558372 +epoch: 1, batch: 31729, sum loss: 5139.533203, avg loss: 2.730889, ppl: 15.346525 +epoch: 1, batch: 31730, sum loss: 3778.662109, avg loss: 2.485962, ppl: 12.012670 +epoch: 1, batch: 31731, sum loss: 4854.651855, avg loss: 2.608625, ppl: 13.580371 +epoch: 1, batch: 31732, sum loss: 4851.967285, avg loss: 2.658612, ppl: 14.276464 +epoch: 1, batch: 31733, sum loss: 3758.465820, avg loss: 2.696173, ppl: 14.822902 +epoch: 1, batch: 31734, sum loss: 4441.727051, avg loss: 2.599021, ppl: 13.450566 +epoch: 1, batch: 31735, sum loss: 4499.565430, avg loss: 2.646803, ppl: 14.108866 +epoch: 1, batch: 31736, sum loss: 4235.173828, avg loss: 2.593493, ppl: 13.376410 +epoch: 1, batch: 31737, sum loss: 3992.071289, avg loss: 2.588892, ppl: 13.315010 +epoch: 1, batch: 31738, sum loss: 5127.113770, avg loss: 2.913133, ppl: 18.414394 +epoch: 1, batch: 31739, sum loss: 3947.133301, avg loss: 2.444045, ppl: 11.519547 +epoch: 1, batch: 31740, sum loss: 4904.203613, avg loss: 2.718516, ppl: 15.157817 +epoch: 1, batch: 31741, sum loss: 4824.321289, avg loss: 2.696658, ppl: 14.830089 +epoch: 1, batch: 31742, sum loss: 4008.006104, avg loss: 2.656068, ppl: 14.240184 +epoch: 1, batch: 31743, sum loss: 4115.242676, avg loss: 2.632913, ppl: 13.914242 +epoch: 1, batch: 31744, sum loss: 4247.880859, avg loss: 2.566695, ppl: 13.022719 +epoch: 1, batch: 31745, sum loss: 4906.000977, avg loss: 2.704521, ppl: 14.947154 +epoch: 1, batch: 31746, sum loss: 3474.772461, avg loss: 2.272578, ppl: 9.704391 +epoch: 1, batch: 31747, sum loss: 4386.521973, avg loss: 2.637716, ppl: 13.981238 +epoch: 1, batch: 31748, sum loss: 4425.752441, avg loss: 2.629681, ppl: 13.869340 +epoch: 1, batch: 31749, sum loss: 3562.481445, avg loss: 2.343738, ppl: 10.420113 +epoch: 1, batch: 31750, sum loss: 3979.930664, avg loss: 2.396105, ppl: 10.980325 +epoch: 1, batch: 31751, sum loss: 4143.821289, avg loss: 2.637697, ppl: 13.980965 +epoch: 1, batch: 31752, sum loss: 4749.083984, avg loss: 2.561534, ppl: 12.955675 +epoch: 1, batch: 31753, sum loss: 4021.631348, avg loss: 2.446248, ppl: 11.544947 +epoch: 1, batch: 31754, sum loss: 4591.379883, avg loss: 2.666306, ppl: 14.386733 +epoch: 1, batch: 31755, sum loss: 3598.149414, avg loss: 2.318395, ppl: 10.159359 +epoch: 1, batch: 31756, sum loss: 4162.873047, avg loss: 2.595308, ppl: 13.400708 +epoch: 1, batch: 31757, sum loss: 4354.000977, avg loss: 2.604068, ppl: 13.518618 +epoch: 1, batch: 31758, sum loss: 4671.420898, avg loss: 2.640713, ppl: 14.023198 +epoch: 1, batch: 31759, sum loss: 5266.901855, avg loss: 2.932574, ppl: 18.775888 +epoch: 1, batch: 31760, sum loss: 4774.679688, avg loss: 2.609115, ppl: 13.587016 +epoch: 1, batch: 31761, sum loss: 4817.452148, avg loss: 2.828803, ppl: 16.925194 +epoch: 1, batch: 31762, sum loss: 4275.705566, avg loss: 2.412926, ppl: 11.166592 +epoch: 1, batch: 31763, sum loss: 5552.800293, avg loss: 2.773626, ppl: 16.016611 +epoch: 1, batch: 31764, sum loss: 4270.091309, avg loss: 2.629367, ppl: 13.864985 +epoch: 1, batch: 31765, sum loss: 4004.626465, avg loss: 2.523394, ppl: 12.470856 +epoch: 1, batch: 31766, sum loss: 3539.529541, avg loss: 2.391574, ppl: 10.930687 +epoch: 1, batch: 31767, sum loss: 4732.806152, avg loss: 2.616256, ppl: 13.684390 +epoch: 1, batch: 31768, sum loss: 4291.970703, avg loss: 2.502607, ppl: 12.214291 +epoch: 1, batch: 31769, sum loss: 3686.750732, avg loss: 2.535592, ppl: 12.623903 +epoch: 1, batch: 31770, sum loss: 4346.291016, avg loss: 2.711348, ppl: 15.049549 +epoch: 1, batch: 31771, sum loss: 3968.796631, avg loss: 2.568800, ppl: 13.050160 +epoch: 1, batch: 31772, sum loss: 3066.628174, avg loss: 2.386481, ppl: 10.875157 +epoch: 1, batch: 31773, sum loss: 4444.283203, avg loss: 2.551253, ppl: 12.823165 +epoch: 1, batch: 31774, sum loss: 4921.175293, avg loss: 2.655788, ppl: 14.236199 +epoch: 1, batch: 31775, sum loss: 5028.318359, avg loss: 2.828076, ppl: 16.912884 +epoch: 1, batch: 31776, sum loss: 4784.222656, avg loss: 2.641757, ppl: 14.037853 +epoch: 1, batch: 31777, sum loss: 3565.789551, avg loss: 2.402823, ppl: 11.054341 +epoch: 1, batch: 31778, sum loss: 3897.248291, avg loss: 2.454187, ppl: 11.636965 +epoch: 1, batch: 31779, sum loss: 3867.237549, avg loss: 2.567887, ppl: 13.038243 +epoch: 1, batch: 31780, sum loss: 5762.650391, avg loss: 2.757249, ppl: 15.756435 +epoch: 1, batch: 31781, sum loss: 4074.552979, avg loss: 2.585376, ppl: 13.268281 +epoch: 1, batch: 31782, sum loss: 4747.130859, avg loss: 2.797367, ppl: 16.401400 +epoch: 1, batch: 31783, sum loss: 4782.740234, avg loss: 2.692984, ppl: 14.775706 +epoch: 1, batch: 31784, sum loss: 4982.316406, avg loss: 2.614017, ppl: 13.653785 +epoch: 1, batch: 31785, sum loss: 3563.086426, avg loss: 2.479531, ppl: 11.935669 +epoch: 1, batch: 31786, sum loss: 4334.655762, avg loss: 2.707468, ppl: 14.991266 +epoch: 1, batch: 31787, sum loss: 5010.535156, avg loss: 2.641294, ppl: 14.031352 +epoch: 1, batch: 31788, sum loss: 4934.279297, avg loss: 2.816370, ppl: 16.716053 +epoch: 1, batch: 31789, sum loss: 4717.387207, avg loss: 2.669715, ppl: 14.435861 +epoch: 1, batch: 31790, sum loss: 3858.778809, avg loss: 2.456257, ppl: 11.661078 +epoch: 1, batch: 31791, sum loss: 5329.312988, avg loss: 2.825723, ppl: 16.873138 +epoch: 1, batch: 31792, sum loss: 4677.656250, avg loss: 2.539444, ppl: 12.672626 +epoch: 1, batch: 31793, sum loss: 4539.457031, avg loss: 2.623964, ppl: 13.790275 +epoch: 1, batch: 31794, sum loss: 4284.807129, avg loss: 2.617476, ppl: 13.701092 +epoch: 1, batch: 31795, sum loss: 4612.042969, avg loss: 2.633948, ppl: 13.928653 +epoch: 1, batch: 31796, sum loss: 5291.958008, avg loss: 2.715217, ppl: 15.107890 +epoch: 1, batch: 31797, sum loss: 4690.697754, avg loss: 2.598725, ppl: 13.446577 +epoch: 1, batch: 31798, sum loss: 3215.337402, avg loss: 2.338427, ppl: 10.364921 +epoch: 1, batch: 31799, sum loss: 4240.938965, avg loss: 2.592261, ppl: 13.359945 +epoch: 1, batch: 31800, sum loss: 3671.937012, avg loss: 2.443072, ppl: 11.508338 +epoch: 1, batch: 31801, sum loss: 4445.958008, avg loss: 2.686380, ppl: 14.678439 +epoch: 1, batch: 31802, sum loss: 4929.361328, avg loss: 2.624793, ppl: 13.801718 +epoch: 1, batch: 31803, sum loss: 4059.822998, avg loss: 2.704746, ppl: 14.950512 +epoch: 1, batch: 31804, sum loss: 4801.498535, avg loss: 2.595405, ppl: 13.402009 +epoch: 1, batch: 31805, sum loss: 3921.530029, avg loss: 2.286606, ppl: 9.841482 +epoch: 1, batch: 31806, sum loss: 4731.169922, avg loss: 2.869115, ppl: 17.621410 +epoch: 1, batch: 31807, sum loss: 4425.810059, avg loss: 2.693737, ppl: 14.786832 +epoch: 1, batch: 31808, sum loss: 4988.416016, avg loss: 2.584672, ppl: 13.258936 +epoch: 1, batch: 31809, sum loss: 4354.721191, avg loss: 2.479910, ppl: 11.940186 +epoch: 1, batch: 31810, sum loss: 4377.852051, avg loss: 2.459467, ppl: 11.698580 +epoch: 1, batch: 31811, sum loss: 4392.173828, avg loss: 2.530054, ppl: 12.554185 +epoch: 1, batch: 31812, sum loss: 3945.858643, avg loss: 2.560583, ppl: 12.943362 +epoch: 1, batch: 31813, sum loss: 3805.358643, avg loss: 2.204727, ppl: 9.067775 +epoch: 1, batch: 31814, sum loss: 4605.717285, avg loss: 2.710840, ppl: 15.041909 +epoch: 1, batch: 31815, sum loss: 3865.162354, avg loss: 2.521306, ppl: 12.444842 +epoch: 1, batch: 31816, sum loss: 3751.706787, avg loss: 2.422019, ppl: 11.268582 +epoch: 1, batch: 31817, sum loss: 4169.438477, avg loss: 2.551676, ppl: 12.828587 +epoch: 1, batch: 31818, sum loss: 5366.163574, avg loss: 2.703357, ppl: 14.929767 +epoch: 1, batch: 31819, sum loss: 3913.864746, avg loss: 2.455373, ppl: 11.650779 +epoch: 1, batch: 31820, sum loss: 4397.629395, avg loss: 2.596003, ppl: 13.410031 +epoch: 1, batch: 31821, sum loss: 4573.019531, avg loss: 2.595357, ppl: 13.401377 +epoch: 1, batch: 31822, sum loss: 4057.460693, avg loss: 2.503060, ppl: 12.219831 +epoch: 1, batch: 31823, sum loss: 4471.614258, avg loss: 2.641237, ppl: 14.030549 +epoch: 1, batch: 31824, sum loss: 4674.396973, avg loss: 2.364389, ppl: 10.637537 +epoch: 1, batch: 31825, sum loss: 4122.574707, avg loss: 2.480490, ppl: 11.947120 +epoch: 1, batch: 31826, sum loss: 3744.009766, avg loss: 2.591010, ppl: 13.343246 +epoch: 1, batch: 31827, sum loss: 3666.224609, avg loss: 2.410404, ppl: 11.138462 +epoch: 1, batch: 31828, sum loss: 4411.923828, avg loss: 2.721730, ppl: 15.206599 +epoch: 1, batch: 31829, sum loss: 4566.623535, avg loss: 2.582932, ppl: 13.235888 +epoch: 1, batch: 31830, sum loss: 3345.511719, avg loss: 2.360982, ppl: 10.601358 +epoch: 1, batch: 31831, sum loss: 4202.173340, avg loss: 2.666354, ppl: 14.387413 +epoch: 1, batch: 31832, sum loss: 4289.801758, avg loss: 2.765830, ppl: 15.892219 +epoch: 1, batch: 31833, sum loss: 5742.349609, avg loss: 2.984589, ppl: 19.778374 +epoch: 1, batch: 31834, sum loss: 4919.445312, avg loss: 2.832150, ppl: 16.981941 +epoch: 1, batch: 31835, sum loss: 4768.082031, avg loss: 2.591349, ppl: 13.347764 +epoch: 1, batch: 31836, sum loss: 5283.401367, avg loss: 2.971542, ppl: 19.521997 +epoch: 1, batch: 31837, sum loss: 4106.914551, avg loss: 2.752624, ppl: 15.683729 +epoch: 1, batch: 31838, sum loss: 4195.008789, avg loss: 2.648364, ppl: 14.130902 +epoch: 1, batch: 31839, sum loss: 4523.380371, avg loss: 2.706990, ppl: 14.984109 +epoch: 1, batch: 31840, sum loss: 3678.481445, avg loss: 2.412119, ppl: 11.157578 +epoch: 1, batch: 31841, sum loss: 4416.638184, avg loss: 2.560370, ppl: 12.940607 +epoch: 1, batch: 31842, sum loss: 4894.863770, avg loss: 2.668955, ppl: 14.424889 +epoch: 1, batch: 31843, sum loss: 3450.236572, avg loss: 2.416132, ppl: 11.202447 +epoch: 1, batch: 31844, sum loss: 4920.895508, avg loss: 2.925622, ppl: 18.645811 +epoch: 1, batch: 31845, sum loss: 4333.988770, avg loss: 2.493665, ppl: 12.105556 +epoch: 1, batch: 31846, sum loss: 4233.276855, avg loss: 2.347907, ppl: 10.463650 +epoch: 1, batch: 31847, sum loss: 3941.807129, avg loss: 2.439237, ppl: 11.464292 +epoch: 1, batch: 31848, sum loss: 4855.341797, avg loss: 2.624509, ppl: 13.797799 +epoch: 1, batch: 31849, sum loss: 4756.026367, avg loss: 2.601765, ppl: 13.487521 +epoch: 1, batch: 31850, sum loss: 5032.481445, avg loss: 2.763581, ppl: 15.856528 +epoch: 1, batch: 31851, sum loss: 4202.721191, avg loss: 2.586290, ppl: 13.280408 +epoch: 1, batch: 31852, sum loss: 4302.682617, avg loss: 2.477077, ppl: 11.906411 +epoch: 1, batch: 31853, sum loss: 4188.533203, avg loss: 2.553984, ppl: 12.858225 +epoch: 1, batch: 31854, sum loss: 4500.675781, avg loss: 2.819972, ppl: 16.776386 +epoch: 1, batch: 31855, sum loss: 3877.811768, avg loss: 2.503429, ppl: 12.224341 +epoch: 1, batch: 31856, sum loss: 4721.046387, avg loss: 2.730507, ppl: 15.340661 +epoch: 1, batch: 31857, sum loss: 4532.650391, avg loss: 2.566620, ppl: 13.021731 +epoch: 1, batch: 31858, sum loss: 4143.004395, avg loss: 2.608945, ppl: 13.584710 +epoch: 1, batch: 31859, sum loss: 4639.320312, avg loss: 2.713053, ppl: 15.075226 +epoch: 1, batch: 31860, sum loss: 5146.327148, avg loss: 2.952569, ppl: 19.155090 +epoch: 1, batch: 31861, sum loss: 3860.631836, avg loss: 2.421977, ppl: 11.268118 +epoch: 1, batch: 31862, sum loss: 4283.618652, avg loss: 2.522744, ppl: 12.462744 +epoch: 1, batch: 31863, sum loss: 4662.443848, avg loss: 2.551967, ppl: 12.832322 +epoch: 1, batch: 31864, sum loss: 4891.779297, avg loss: 2.757485, ppl: 15.760162 +epoch: 1, batch: 31865, sum loss: 4998.944336, avg loss: 2.697757, ppl: 14.846397 +epoch: 1, batch: 31866, sum loss: 3826.285645, avg loss: 2.547460, ppl: 12.774619 +epoch: 1, batch: 31867, sum loss: 4997.352051, avg loss: 2.852370, ppl: 17.328802 +epoch: 1, batch: 31868, sum loss: 3780.957764, avg loss: 2.600384, ppl: 13.468903 +epoch: 1, batch: 31869, sum loss: 4722.979980, avg loss: 2.714356, ppl: 15.094889 +epoch: 1, batch: 31870, sum loss: 4244.217773, avg loss: 2.578504, ppl: 13.177414 +epoch: 1, batch: 31871, sum loss: 5066.360352, avg loss: 2.539529, ppl: 12.673698 +epoch: 1, batch: 31872, sum loss: 4678.210449, avg loss: 2.688627, ppl: 14.711460 +epoch: 1, batch: 31873, sum loss: 5130.362793, avg loss: 2.672064, ppl: 14.469805 +epoch: 1, batch: 31874, sum loss: 5049.354004, avg loss: 2.833532, ppl: 17.005419 +epoch: 1, batch: 31875, sum loss: 4291.145508, avg loss: 2.558823, ppl: 12.920596 +epoch: 1, batch: 31876, sum loss: 4686.025879, avg loss: 2.883708, ppl: 17.880455 +epoch: 1, batch: 31877, sum loss: 4726.541016, avg loss: 2.582809, ppl: 13.234267 +epoch: 1, batch: 31878, sum loss: 3979.846680, avg loss: 2.724057, ppl: 15.242029 +epoch: 1, batch: 31879, sum loss: 4314.961426, avg loss: 2.681766, ppl: 14.610874 +epoch: 1, batch: 31880, sum loss: 4444.151855, avg loss: 2.455333, ppl: 11.650310 +epoch: 1, batch: 31881, sum loss: 4757.800781, avg loss: 2.678942, ppl: 14.569670 +epoch: 1, batch: 31882, sum loss: 3832.324219, avg loss: 2.434767, ppl: 11.413165 +epoch: 1, batch: 31883, sum loss: 4226.825684, avg loss: 2.553973, ppl: 12.858093 +epoch: 1, batch: 31884, sum loss: 4916.452148, avg loss: 2.836960, ppl: 17.063814 +epoch: 1, batch: 31885, sum loss: 5112.952148, avg loss: 2.695283, ppl: 14.809711 +epoch: 1, batch: 31886, sum loss: 3739.481201, avg loss: 2.368259, ppl: 10.678786 +epoch: 1, batch: 31887, sum loss: 4258.142090, avg loss: 2.465630, ppl: 11.770890 +epoch: 1, batch: 31888, sum loss: 4483.028809, avg loss: 2.678034, ppl: 14.556448 +epoch: 1, batch: 31889, sum loss: 4092.052979, avg loss: 2.393013, ppl: 10.946431 +epoch: 1, batch: 31890, sum loss: 3833.986328, avg loss: 2.446705, ppl: 11.550224 +epoch: 1, batch: 31891, sum loss: 4597.179688, avg loss: 2.619476, ppl: 13.728522 +epoch: 1, batch: 31892, sum loss: 5723.948242, avg loss: 2.632911, ppl: 13.914211 +epoch: 1, batch: 31893, sum loss: 5260.073242, avg loss: 2.963421, ppl: 19.364109 +epoch: 1, batch: 31894, sum loss: 5127.229980, avg loss: 2.761028, ppl: 15.816098 +epoch: 1, batch: 31895, sum loss: 4610.874023, avg loss: 2.705912, ppl: 14.967963 +epoch: 1, batch: 31896, sum loss: 5110.992676, avg loss: 2.502935, ppl: 12.218298 +epoch: 1, batch: 31897, sum loss: 3855.958252, avg loss: 2.486111, ppl: 12.014460 +epoch: 1, batch: 31898, sum loss: 4848.682129, avg loss: 2.996713, ppl: 20.019627 +epoch: 1, batch: 31899, sum loss: 4078.564941, avg loss: 2.674469, ppl: 14.504642 +epoch: 1, batch: 31900, sum loss: 4755.623047, avg loss: 2.673200, ppl: 14.486253 +epoch: 1, batch: 31901, sum loss: 4325.555664, avg loss: 2.631117, ppl: 13.889271 +epoch: 1, batch: 31902, sum loss: 3867.957520, avg loss: 2.524776, ppl: 12.488103 +epoch: 1, batch: 31903, sum loss: 5304.562500, avg loss: 2.976747, ppl: 19.623873 +epoch: 1, batch: 31904, sum loss: 4271.936523, avg loss: 2.717517, ppl: 15.142674 +epoch: 1, batch: 31905, sum loss: 5123.423828, avg loss: 2.789017, ppl: 16.265022 +epoch: 1, batch: 31906, sum loss: 3768.101562, avg loss: 2.488839, ppl: 12.047277 +epoch: 1, batch: 31907, sum loss: 4944.542480, avg loss: 2.763858, ppl: 15.860922 +epoch: 1, batch: 31908, sum loss: 4537.930664, avg loss: 2.745270, ppl: 15.568810 +epoch: 1, batch: 31909, sum loss: 4642.622559, avg loss: 2.655963, ppl: 14.238688 +epoch: 1, batch: 31910, sum loss: 4910.377930, avg loss: 2.741696, ppl: 15.513275 +epoch: 1, batch: 31911, sum loss: 3469.976562, avg loss: 2.217237, ppl: 9.181931 +epoch: 1, batch: 31912, sum loss: 4275.035156, avg loss: 2.492732, ppl: 12.094271 +epoch: 1, batch: 31913, sum loss: 4459.500488, avg loss: 2.651308, ppl: 14.172568 +epoch: 1, batch: 31914, sum loss: 4709.654785, avg loss: 2.847434, ppl: 17.243471 +epoch: 1, batch: 31915, sum loss: 3467.995117, avg loss: 2.421784, ppl: 11.265944 +epoch: 1, batch: 31916, sum loss: 4195.611816, avg loss: 2.668964, ppl: 14.425023 +epoch: 1, batch: 31917, sum loss: 4953.043457, avg loss: 2.899908, ppl: 18.172480 +epoch: 1, batch: 31918, sum loss: 4203.360840, avg loss: 2.640302, ppl: 14.017435 +epoch: 1, batch: 31919, sum loss: 4638.419922, avg loss: 2.789188, ppl: 16.267807 +epoch: 1, batch: 31920, sum loss: 5246.585449, avg loss: 2.804161, ppl: 16.513220 +epoch: 1, batch: 31921, sum loss: 4227.335449, avg loss: 2.554281, ppl: 12.862052 +epoch: 1, batch: 31922, sum loss: 4599.809570, avg loss: 2.765971, ppl: 15.894465 +epoch: 1, batch: 31923, sum loss: 4168.464844, avg loss: 2.509612, ppl: 12.300152 +epoch: 1, batch: 31924, sum loss: 4937.364258, avg loss: 2.668846, ppl: 14.423310 +epoch: 1, batch: 31925, sum loss: 4249.364258, avg loss: 2.558317, ppl: 12.914064 +epoch: 1, batch: 31926, sum loss: 5099.654297, avg loss: 2.470763, ppl: 11.831468 +epoch: 1, batch: 31927, sum loss: 4833.135254, avg loss: 2.642502, ppl: 14.048303 +epoch: 1, batch: 31928, sum loss: 3740.455566, avg loss: 2.438367, ppl: 11.454325 +epoch: 1, batch: 31929, sum loss: 4064.533936, avg loss: 2.700687, ppl: 14.889960 +epoch: 1, batch: 31930, sum loss: 3930.718506, avg loss: 2.282647, ppl: 9.802595 +epoch: 1, batch: 31931, sum loss: 5078.750488, avg loss: 2.621967, ppl: 13.762770 +epoch: 1, batch: 31932, sum loss: 3647.849854, avg loss: 2.641455, ppl: 14.033613 +epoch: 1, batch: 31933, sum loss: 4642.753906, avg loss: 2.520496, ppl: 12.434765 +epoch: 1, batch: 31934, sum loss: 5133.268066, avg loss: 2.866146, ppl: 17.569178 +epoch: 1, batch: 31935, sum loss: 3938.130859, avg loss: 2.755865, ppl: 15.734643 +epoch: 1, batch: 31936, sum loss: 5212.411133, avg loss: 2.645894, ppl: 14.096042 +epoch: 1, batch: 31937, sum loss: 3709.575684, avg loss: 2.359781, ppl: 10.588633 +epoch: 1, batch: 31938, sum loss: 4343.352539, avg loss: 2.723105, ppl: 15.227533 +epoch: 1, batch: 31939, sum loss: 4686.010742, avg loss: 2.746782, ppl: 15.592380 +epoch: 1, batch: 31940, sum loss: 4184.381836, avg loss: 2.718897, ppl: 15.163582 +epoch: 1, batch: 31941, sum loss: 3748.320312, avg loss: 2.381398, ppl: 10.820016 +epoch: 1, batch: 31942, sum loss: 3842.449707, avg loss: 2.546355, ppl: 12.760507 +epoch: 1, batch: 31943, sum loss: 4990.319336, avg loss: 2.797264, ppl: 16.399721 +epoch: 1, batch: 31944, sum loss: 4274.609375, avg loss: 2.462332, ppl: 11.732144 +epoch: 1, batch: 31945, sum loss: 4621.230957, avg loss: 2.745829, ppl: 15.577528 +epoch: 1, batch: 31946, sum loss: 4709.210449, avg loss: 3.166920, ppl: 23.734274 +epoch: 1, batch: 31947, sum loss: 4552.226562, avg loss: 2.595340, ppl: 13.401143 +epoch: 1, batch: 31948, sum loss: 4062.099121, avg loss: 2.636015, ppl: 13.957471 +epoch: 1, batch: 31949, sum loss: 4459.334473, avg loss: 2.469177, ppl: 11.812727 +epoch: 1, batch: 31950, sum loss: 4526.806152, avg loss: 2.713913, ppl: 15.088196 +epoch: 1, batch: 31951, sum loss: 4545.122070, avg loss: 2.582456, ppl: 13.229586 +epoch: 1, batch: 31952, sum loss: 4414.897949, avg loss: 2.781914, ppl: 16.149910 +epoch: 1, batch: 31953, sum loss: 4089.422363, avg loss: 2.374810, ppl: 10.748968 +epoch: 1, batch: 31954, sum loss: 4251.585938, avg loss: 2.536746, ppl: 12.638475 +epoch: 1, batch: 31955, sum loss: 4359.301758, avg loss: 2.602568, ppl: 13.498363 +epoch: 1, batch: 31956, sum loss: 3139.324707, avg loss: 2.456436, ppl: 11.663166 +epoch: 1, batch: 31957, sum loss: 5346.703613, avg loss: 2.696270, ppl: 14.824337 +epoch: 1, batch: 31958, sum loss: 5074.147949, avg loss: 2.797215, ppl: 16.398912 +epoch: 1, batch: 31959, sum loss: 4914.613281, avg loss: 2.733378, ppl: 15.384765 +epoch: 1, batch: 31960, sum loss: 5290.351074, avg loss: 3.056240, ppl: 21.247507 +epoch: 1, batch: 31961, sum loss: 4329.807129, avg loss: 2.757839, ppl: 15.765736 +epoch: 1, batch: 31962, sum loss: 4630.250488, avg loss: 2.696710, ppl: 14.830853 +epoch: 1, batch: 31963, sum loss: 4038.367188, avg loss: 2.541452, ppl: 12.698094 +epoch: 1, batch: 31964, sum loss: 3748.974609, avg loss: 2.574845, ppl: 13.129286 +epoch: 1, batch: 31965, sum loss: 4314.156250, avg loss: 2.776162, ppl: 16.057281 +epoch: 1, batch: 31966, sum loss: 5080.339844, avg loss: 2.808369, ppl: 16.582855 +epoch: 1, batch: 31967, sum loss: 4171.567871, avg loss: 2.347534, ppl: 10.459743 +epoch: 1, batch: 31968, sum loss: 5414.969238, avg loss: 2.872663, ppl: 17.684044 +epoch: 1, batch: 31969, sum loss: 4610.683594, avg loss: 2.846101, ppl: 17.220509 +epoch: 1, batch: 31970, sum loss: 4522.431641, avg loss: 2.591651, ppl: 13.351800 +epoch: 1, batch: 31971, sum loss: 4231.941406, avg loss: 2.422405, ppl: 11.272936 +epoch: 1, batch: 31972, sum loss: 4843.473633, avg loss: 2.716474, ppl: 15.126895 +epoch: 1, batch: 31973, sum loss: 4297.956543, avg loss: 2.737552, ppl: 15.449119 +epoch: 1, batch: 31974, sum loss: 4227.513672, avg loss: 2.750497, ppl: 15.650406 +epoch: 1, batch: 31975, sum loss: 4528.893066, avg loss: 2.626968, ppl: 13.831774 +epoch: 1, batch: 31976, sum loss: 4502.626953, avg loss: 2.396289, ppl: 10.982347 +epoch: 1, batch: 31977, sum loss: 4056.471680, avg loss: 2.370819, ppl: 10.706161 +epoch: 1, batch: 31978, sum loss: 5142.619141, avg loss: 2.893989, ppl: 18.065233 +epoch: 1, batch: 31979, sum loss: 4558.486328, avg loss: 2.752709, ppl: 15.685067 +epoch: 1, batch: 31980, sum loss: 3810.047607, avg loss: 2.577840, ppl: 13.168664 +epoch: 1, batch: 31981, sum loss: 4606.267578, avg loss: 2.419258, ppl: 11.237522 +epoch: 1, batch: 31982, sum loss: 3916.517334, avg loss: 2.632068, ppl: 13.902493 +epoch: 1, batch: 31983, sum loss: 4954.938965, avg loss: 2.584736, ppl: 13.259789 +epoch: 1, batch: 31984, sum loss: 4910.074219, avg loss: 2.797763, ppl: 16.407904 +epoch: 1, batch: 31985, sum loss: 4265.155273, avg loss: 2.602291, ppl: 13.494620 +epoch: 1, batch: 31986, sum loss: 3435.337402, avg loss: 2.340148, ppl: 10.382772 +epoch: 1, batch: 31987, sum loss: 4174.430176, avg loss: 2.517751, ppl: 12.400670 +epoch: 1, batch: 31988, sum loss: 4118.523438, avg loss: 2.506709, ppl: 12.264505 +epoch: 1, batch: 31989, sum loss: 4407.973633, avg loss: 2.628488, ppl: 13.852803 +epoch: 1, batch: 31990, sum loss: 4486.324219, avg loss: 2.507727, ppl: 12.276998 +epoch: 1, batch: 31991, sum loss: 4322.731445, avg loss: 2.471545, ppl: 11.840721 +epoch: 1, batch: 31992, sum loss: 4455.030762, avg loss: 2.538479, ppl: 12.660401 +epoch: 1, batch: 31993, sum loss: 4308.955078, avg loss: 2.518384, ppl: 12.408531 +epoch: 1, batch: 31994, sum loss: 4630.758789, avg loss: 2.579810, ppl: 13.194630 +epoch: 1, batch: 31995, sum loss: 5557.873047, avg loss: 2.634063, ppl: 13.930254 +epoch: 1, batch: 31996, sum loss: 5159.187012, avg loss: 2.678706, ppl: 14.566228 +epoch: 1, batch: 31997, sum loss: 5552.506348, avg loss: 2.798642, ppl: 16.422333 +epoch: 1, batch: 31998, sum loss: 5252.753906, avg loss: 2.685457, ppl: 14.664902 +epoch: 1, batch: 31999, sum loss: 4062.088623, avg loss: 2.441159, ppl: 11.486346 +epoch: 1, batch: 32000, sum loss: 4211.106934, avg loss: 2.683943, ppl: 14.642720 +epoch: 1, batch: 32001, sum loss: 3844.583252, avg loss: 2.546082, ppl: 12.757021 +epoch: 1, batch: 32002, sum loss: 4150.564941, avg loss: 2.657212, ppl: 14.256487 +epoch: 1, batch: 32003, sum loss: 3573.263916, avg loss: 2.379004, ppl: 10.794144 +epoch: 1, batch: 32004, sum loss: 4209.825195, avg loss: 2.552956, ppl: 12.845022 +epoch: 1, batch: 32005, sum loss: 5041.198730, avg loss: 2.802223, ppl: 16.481239 +epoch: 1, batch: 32006, sum loss: 5361.954102, avg loss: 2.838515, ppl: 17.090361 +epoch: 1, batch: 32007, sum loss: 4894.709961, avg loss: 2.826045, ppl: 16.878574 +epoch: 1, batch: 32008, sum loss: 4187.618652, avg loss: 2.429013, ppl: 11.347679 +epoch: 1, batch: 32009, sum loss: 4984.807617, avg loss: 2.918505, ppl: 18.513597 +epoch: 1, batch: 32010, sum loss: 4206.827148, avg loss: 2.619444, ppl: 13.728087 +epoch: 1, batch: 32011, sum loss: 3653.262451, avg loss: 2.383080, ppl: 10.838239 +epoch: 1, batch: 32012, sum loss: 4918.541016, avg loss: 2.715926, ppl: 15.118599 +epoch: 1, batch: 32013, sum loss: 5018.118164, avg loss: 2.749654, ppl: 15.637218 +epoch: 1, batch: 32014, sum loss: 5087.034668, avg loss: 2.849880, ppl: 17.285698 +epoch: 1, batch: 32015, sum loss: 3653.165527, avg loss: 2.234352, ppl: 9.340426 +epoch: 1, batch: 32016, sum loss: 4643.798828, avg loss: 2.665786, ppl: 14.379244 +epoch: 1, batch: 32017, sum loss: 4498.276367, avg loss: 2.550043, ppl: 12.807659 +epoch: 1, batch: 32018, sum loss: 3868.845215, avg loss: 2.607038, ppl: 13.558837 +epoch: 1, batch: 32019, sum loss: 4012.651123, avg loss: 2.528451, ppl: 12.534072 +epoch: 1, batch: 32020, sum loss: 3988.558838, avg loss: 2.560051, ppl: 12.936474 +epoch: 1, batch: 32021, sum loss: 3589.883545, avg loss: 2.435471, ppl: 11.421192 +epoch: 1, batch: 32022, sum loss: 4225.352539, avg loss: 2.449480, ppl: 11.582320 +epoch: 1, batch: 32023, sum loss: 5224.688477, avg loss: 2.698703, ppl: 14.860439 +epoch: 1, batch: 32024, sum loss: 4748.103027, avg loss: 2.574893, ppl: 13.129915 +epoch: 1, batch: 32025, sum loss: 4377.413086, avg loss: 2.591719, ppl: 13.352704 +epoch: 1, batch: 32026, sum loss: 4242.913086, avg loss: 2.522540, ppl: 12.460210 +epoch: 1, batch: 32027, sum loss: 4289.111328, avg loss: 2.498026, ppl: 12.158474 +epoch: 1, batch: 32028, sum loss: 4613.921387, avg loss: 2.804815, ppl: 16.524023 +epoch: 1, batch: 32029, sum loss: 4278.144531, avg loss: 2.543487, ppl: 12.723957 +epoch: 1, batch: 32030, sum loss: 4111.667969, avg loss: 2.440159, ppl: 11.474866 +epoch: 1, batch: 32031, sum loss: 5244.194824, avg loss: 2.819460, ppl: 16.767788 +epoch: 1, batch: 32032, sum loss: 5027.663574, avg loss: 2.802488, ppl: 16.485613 +epoch: 1, batch: 32033, sum loss: 4377.977539, avg loss: 2.690828, ppl: 14.743880 +epoch: 1, batch: 32034, sum loss: 4102.382812, avg loss: 2.644992, ppl: 14.083334 +epoch: 1, batch: 32035, sum loss: 4665.095215, avg loss: 2.646112, ppl: 14.099114 +epoch: 1, batch: 32036, sum loss: 3068.355225, avg loss: 2.217020, ppl: 9.179930 +epoch: 1, batch: 32037, sum loss: 4333.640625, avg loss: 2.493464, ppl: 12.103131 +epoch: 1, batch: 32038, sum loss: 3960.945068, avg loss: 2.418159, ppl: 11.225178 +epoch: 1, batch: 32039, sum loss: 3805.178223, avg loss: 2.592083, ppl: 13.357570 +epoch: 1, batch: 32040, sum loss: 4148.959473, avg loss: 2.564252, ppl: 12.990936 +epoch: 1, batch: 32041, sum loss: 4510.105469, avg loss: 2.745043, ppl: 15.565280 +epoch: 1, batch: 32042, sum loss: 4759.640137, avg loss: 2.702805, ppl: 14.921532 +epoch: 1, batch: 32043, sum loss: 3219.489502, avg loss: 2.585935, ppl: 13.275698 +epoch: 1, batch: 32044, sum loss: 4949.416504, avg loss: 2.530376, ppl: 12.558232 +epoch: 1, batch: 32045, sum loss: 3970.516846, avg loss: 2.400554, ppl: 11.029289 +epoch: 1, batch: 32046, sum loss: 4871.801758, avg loss: 2.884430, ppl: 17.893360 +epoch: 1, batch: 32047, sum loss: 5247.451172, avg loss: 2.725949, ppl: 15.270892 +epoch: 1, batch: 32048, sum loss: 4887.686035, avg loss: 2.786594, ppl: 16.225660 +epoch: 1, batch: 32049, sum loss: 4236.626953, avg loss: 2.550648, ppl: 12.815412 +epoch: 1, batch: 32050, sum loss: 4264.993652, avg loss: 2.655662, ppl: 14.234410 +epoch: 1, batch: 32051, sum loss: 4044.483643, avg loss: 2.524647, ppl: 12.486481 +epoch: 1, batch: 32052, sum loss: 4015.412842, avg loss: 2.370373, ppl: 10.701388 +epoch: 1, batch: 32053, sum loss: 4931.688477, avg loss: 2.495794, ppl: 12.131360 +epoch: 1, batch: 32054, sum loss: 4737.315430, avg loss: 2.871100, ppl: 17.656433 +epoch: 1, batch: 32055, sum loss: 4966.112793, avg loss: 2.730134, ppl: 15.334934 +epoch: 1, batch: 32056, sum loss: 4701.539551, avg loss: 2.702034, ppl: 14.910031 +epoch: 1, batch: 32057, sum loss: 5499.868164, avg loss: 2.748560, ppl: 15.620122 +epoch: 1, batch: 32058, sum loss: 4745.890137, avg loss: 2.639538, ppl: 14.006735 +epoch: 1, batch: 32059, sum loss: 3979.428955, avg loss: 2.487143, ppl: 12.026867 +epoch: 1, batch: 32060, sum loss: 3988.498291, avg loss: 2.418738, ppl: 11.231672 +epoch: 1, batch: 32061, sum loss: 4078.270752, avg loss: 2.605924, ppl: 13.543729 +epoch: 1, batch: 32062, sum loss: 4985.997559, avg loss: 2.732054, ppl: 15.364406 +epoch: 1, batch: 32063, sum loss: 4369.562500, avg loss: 2.423496, ppl: 11.285238 +epoch: 1, batch: 32064, sum loss: 4217.782227, avg loss: 2.482509, ppl: 11.971261 +epoch: 1, batch: 32065, sum loss: 4632.842285, avg loss: 2.983157, ppl: 19.750063 +epoch: 1, batch: 32066, sum loss: 4372.261719, avg loss: 2.581028, ppl: 13.210711 +epoch: 1, batch: 32067, sum loss: 4540.229492, avg loss: 2.681766, ppl: 14.610870 +epoch: 1, batch: 32068, sum loss: 4586.548340, avg loss: 2.749729, ppl: 15.638399 +epoch: 1, batch: 32069, sum loss: 4492.233887, avg loss: 2.505429, ppl: 12.248813 +epoch: 1, batch: 32070, sum loss: 3738.622070, avg loss: 2.469367, ppl: 11.814969 +epoch: 1, batch: 32071, sum loss: 4141.853027, avg loss: 2.648244, ppl: 14.129201 +epoch: 1, batch: 32072, sum loss: 4256.080566, avg loss: 2.516902, ppl: 12.390149 +epoch: 1, batch: 32073, sum loss: 4738.655273, avg loss: 2.554531, ppl: 12.865266 +epoch: 1, batch: 32074, sum loss: 5043.419434, avg loss: 2.672718, ppl: 14.479275 +epoch: 1, batch: 32075, sum loss: 5058.391113, avg loss: 2.878993, ppl: 17.796345 +epoch: 1, batch: 32076, sum loss: 4099.152344, avg loss: 2.524109, ppl: 12.479767 +epoch: 1, batch: 32077, sum loss: 4124.423828, avg loss: 2.510300, ppl: 12.308627 +epoch: 1, batch: 32078, sum loss: 4223.521484, avg loss: 2.634761, ppl: 13.939975 +epoch: 1, batch: 32079, sum loss: 4476.126465, avg loss: 2.593353, ppl: 13.374536 +epoch: 1, batch: 32080, sum loss: 4955.621582, avg loss: 3.003407, ppl: 20.154085 +epoch: 1, batch: 32081, sum loss: 4776.273438, avg loss: 2.839639, ppl: 17.109594 +epoch: 1, batch: 32082, sum loss: 4959.250488, avg loss: 2.572226, ppl: 13.094935 +epoch: 1, batch: 32083, sum loss: 4774.654785, avg loss: 2.682390, ppl: 14.620000 +epoch: 1, batch: 32084, sum loss: 4241.381836, avg loss: 2.664185, ppl: 14.356238 +epoch: 1, batch: 32085, sum loss: 4486.495605, avg loss: 2.531883, ppl: 12.577161 +epoch: 1, batch: 32086, sum loss: 5008.526367, avg loss: 2.580384, ppl: 13.202213 +epoch: 1, batch: 32087, sum loss: 4537.168945, avg loss: 2.667354, ppl: 14.401810 +epoch: 1, batch: 32088, sum loss: 4122.449707, avg loss: 2.499969, ppl: 12.182122 +epoch: 1, batch: 32089, sum loss: 4591.591797, avg loss: 2.739613, ppl: 15.480997 +epoch: 1, batch: 32090, sum loss: 3441.119141, avg loss: 2.339306, ppl: 10.374036 +epoch: 1, batch: 32091, sum loss: 4946.277344, avg loss: 2.662151, ppl: 14.327078 +epoch: 1, batch: 32092, sum loss: 4757.649902, avg loss: 2.575880, ppl: 13.142879 +epoch: 1, batch: 32093, sum loss: 3845.742920, avg loss: 2.659573, ppl: 14.290191 +epoch: 1, batch: 32094, sum loss: 4477.139160, avg loss: 2.613625, ppl: 13.648431 +epoch: 1, batch: 32095, sum loss: 4139.075684, avg loss: 2.728461, ppl: 15.309312 +epoch: 1, batch: 32096, sum loss: 4744.891602, avg loss: 2.631665, ppl: 13.896886 +epoch: 1, batch: 32097, sum loss: 4914.685547, avg loss: 2.801987, ppl: 16.477358 +epoch: 1, batch: 32098, sum loss: 5201.924316, avg loss: 2.750885, ppl: 15.656489 +epoch: 1, batch: 32099, sum loss: 3258.313721, avg loss: 2.381808, ppl: 10.824459 +epoch: 1, batch: 32100, sum loss: 3984.212891, avg loss: 2.272797, ppl: 9.706511 +epoch: 1, batch: 32101, sum loss: 4207.873535, avg loss: 2.759261, ppl: 15.788177 +epoch: 1, batch: 32102, sum loss: 5505.886719, avg loss: 2.997217, ppl: 20.029711 +epoch: 1, batch: 32103, sum loss: 4786.476074, avg loss: 2.859305, ppl: 17.449394 +epoch: 1, batch: 32104, sum loss: 4071.678711, avg loss: 2.673460, ppl: 14.490011 +epoch: 1, batch: 32105, sum loss: 5306.858398, avg loss: 2.774103, ppl: 16.024242 +epoch: 1, batch: 32106, sum loss: 4592.479492, avg loss: 2.861358, ppl: 17.485254 +epoch: 1, batch: 32107, sum loss: 3647.669678, avg loss: 2.599907, ppl: 13.462488 +epoch: 1, batch: 32108, sum loss: 4095.247070, avg loss: 2.605119, ppl: 13.532835 +epoch: 1, batch: 32109, sum loss: 4239.425781, avg loss: 2.577158, ppl: 13.159691 +epoch: 1, batch: 32110, sum loss: 4573.803223, avg loss: 2.688891, ppl: 14.715343 +epoch: 1, batch: 32111, sum loss: 4127.061523, avg loss: 2.510378, ppl: 12.309584 +epoch: 1, batch: 32112, sum loss: 4664.500488, avg loss: 2.699364, ppl: 14.870267 +epoch: 1, batch: 32113, sum loss: 5427.778320, avg loss: 2.658070, ppl: 14.268719 +epoch: 1, batch: 32114, sum loss: 4610.678223, avg loss: 2.867337, ppl: 17.590117 +epoch: 1, batch: 32115, sum loss: 4492.525391, avg loss: 2.529575, ppl: 12.548173 +epoch: 1, batch: 32116, sum loss: 3739.476074, avg loss: 2.368256, ppl: 10.678751 +epoch: 1, batch: 32117, sum loss: 4202.171875, avg loss: 2.582773, ppl: 13.233785 +epoch: 1, batch: 32118, sum loss: 4897.247559, avg loss: 2.644302, ppl: 14.073620 +epoch: 1, batch: 32119, sum loss: 4267.311523, avg loss: 2.767387, ppl: 15.916995 +epoch: 1, batch: 32120, sum loss: 4020.040283, avg loss: 2.558905, ppl: 12.921665 +epoch: 1, batch: 32121, sum loss: 4374.199707, avg loss: 2.728758, ppl: 15.313861 +epoch: 1, batch: 32122, sum loss: 3761.263184, avg loss: 2.479409, ppl: 11.934206 +epoch: 1, batch: 32123, sum loss: 4942.636230, avg loss: 2.838964, ppl: 17.098043 +epoch: 1, batch: 32124, sum loss: 3596.744873, avg loss: 2.344684, ppl: 10.429973 +epoch: 1, batch: 32125, sum loss: 3849.250977, avg loss: 2.311862, ppl: 10.093205 +epoch: 1, batch: 32126, sum loss: 3807.639648, avg loss: 2.431443, ppl: 11.375284 +epoch: 1, batch: 32127, sum loss: 4418.956055, avg loss: 2.674913, ppl: 14.511086 +epoch: 1, batch: 32128, sum loss: 4252.327148, avg loss: 2.482386, ppl: 11.969789 +epoch: 1, batch: 32129, sum loss: 4215.011719, avg loss: 2.565436, ppl: 13.006330 +epoch: 1, batch: 32130, sum loss: 4453.360352, avg loss: 2.490694, ppl: 12.069647 +epoch: 1, batch: 32131, sum loss: 4185.216797, avg loss: 2.739016, ppl: 15.471758 +epoch: 1, batch: 32132, sum loss: 4498.063965, avg loss: 2.573263, ppl: 13.108530 +epoch: 1, batch: 32133, sum loss: 4325.313477, avg loss: 2.566952, ppl: 13.026054 +epoch: 1, batch: 32134, sum loss: 4001.033691, avg loss: 2.392963, ppl: 10.945878 +epoch: 1, batch: 32135, sum loss: 4713.589844, avg loss: 2.751658, ppl: 15.668585 +epoch: 1, batch: 32136, sum loss: 4362.322266, avg loss: 2.430263, ppl: 11.361871 +epoch: 1, batch: 32137, sum loss: 4783.697266, avg loss: 2.464553, ppl: 11.758224 +epoch: 1, batch: 32138, sum loss: 4620.295898, avg loss: 2.820693, ppl: 16.788485 +epoch: 1, batch: 32139, sum loss: 5469.472168, avg loss: 2.809180, ppl: 16.596296 +epoch: 1, batch: 32140, sum loss: 4849.513672, avg loss: 2.803187, ppl: 16.497141 +epoch: 1, batch: 32141, sum loss: 5041.351074, avg loss: 2.848221, ppl: 17.257055 +epoch: 1, batch: 32142, sum loss: 4495.238770, avg loss: 2.526834, ppl: 12.513831 +epoch: 1, batch: 32143, sum loss: 4639.349609, avg loss: 2.663232, ppl: 14.342564 +epoch: 1, batch: 32144, sum loss: 4085.126465, avg loss: 2.333025, ppl: 10.309079 +epoch: 1, batch: 32145, sum loss: 3756.367920, avg loss: 2.307351, ppl: 10.047776 +epoch: 1, batch: 32146, sum loss: 4082.381104, avg loss: 2.425657, ppl: 11.309661 +epoch: 1, batch: 32147, sum loss: 4695.400391, avg loss: 2.585573, ppl: 13.270887 +epoch: 1, batch: 32148, sum loss: 4280.143066, avg loss: 2.581510, ppl: 13.217078 +epoch: 1, batch: 32149, sum loss: 4366.992676, avg loss: 2.659557, ppl: 14.289959 +epoch: 1, batch: 32150, sum loss: 4135.195312, avg loss: 2.373821, ppl: 10.738340 +epoch: 1, batch: 32151, sum loss: 5186.601562, avg loss: 2.719770, ppl: 15.176834 +epoch: 1, batch: 32152, sum loss: 4581.446777, avg loss: 2.586926, ppl: 13.288865 +epoch: 1, batch: 32153, sum loss: 4318.687012, avg loss: 2.576782, ppl: 13.154741 +epoch: 1, batch: 32154, sum loss: 4319.792480, avg loss: 2.783372, ppl: 16.173462 +epoch: 1, batch: 32155, sum loss: 4459.665039, avg loss: 2.465266, ppl: 11.766606 +epoch: 1, batch: 32156, sum loss: 4881.384277, avg loss: 2.792554, ppl: 16.322657 +epoch: 1, batch: 32157, sum loss: 4047.282227, avg loss: 2.419177, ppl: 11.236603 +epoch: 1, batch: 32158, sum loss: 4659.176270, avg loss: 2.638265, ppl: 13.988914 +epoch: 1, batch: 32159, sum loss: 5089.635742, avg loss: 2.592784, ppl: 13.366939 +epoch: 1, batch: 32160, sum loss: 4787.170898, avg loss: 2.797879, ppl: 16.409809 +epoch: 1, batch: 32161, sum loss: 4522.862793, avg loss: 2.538083, ppl: 12.655382 +epoch: 1, batch: 32162, sum loss: 4557.453125, avg loss: 2.566133, ppl: 13.015403 +epoch: 1, batch: 32163, sum loss: 4683.909180, avg loss: 2.769905, ppl: 15.957119 +epoch: 1, batch: 32164, sum loss: 4631.546875, avg loss: 2.837958, ppl: 17.080849 +epoch: 1, batch: 32165, sum loss: 4226.916504, avg loss: 2.546335, ppl: 12.760255 +epoch: 1, batch: 32166, sum loss: 4717.659180, avg loss: 2.650370, ppl: 14.159282 +epoch: 1, batch: 32167, sum loss: 4087.217285, avg loss: 2.655762, ppl: 14.235826 +epoch: 1, batch: 32168, sum loss: 4151.679199, avg loss: 2.596422, ppl: 13.415653 +epoch: 1, batch: 32169, sum loss: 4189.192383, avg loss: 2.592322, ppl: 13.360758 +epoch: 1, batch: 32170, sum loss: 4418.414062, avg loss: 2.580849, ppl: 13.208349 +epoch: 1, batch: 32171, sum loss: 5442.884277, avg loss: 2.782661, ppl: 16.161966 +epoch: 1, batch: 32172, sum loss: 4345.696777, avg loss: 2.605334, ppl: 13.535743 +epoch: 1, batch: 32173, sum loss: 4977.515625, avg loss: 2.626657, ppl: 13.827471 +epoch: 1, batch: 32174, sum loss: 3292.641602, avg loss: 2.277069, ppl: 9.748066 +epoch: 1, batch: 32175, sum loss: 3997.729736, avg loss: 2.452595, ppl: 11.618458 +epoch: 1, batch: 32176, sum loss: 3898.847168, avg loss: 2.528435, ppl: 12.533878 +epoch: 1, batch: 32177, sum loss: 4446.807617, avg loss: 2.623485, ppl: 13.783681 +epoch: 1, batch: 32178, sum loss: 5038.094727, avg loss: 2.669896, ppl: 14.438473 +epoch: 1, batch: 32179, sum loss: 4096.211914, avg loss: 2.569769, ppl: 13.062808 +epoch: 1, batch: 32180, sum loss: 4037.676025, avg loss: 2.640730, ppl: 14.023439 +epoch: 1, batch: 32181, sum loss: 4517.755371, avg loss: 2.790460, ppl: 16.288517 +epoch: 1, batch: 32182, sum loss: 3360.402588, avg loss: 2.203543, ppl: 9.057043 +epoch: 1, batch: 32183, sum loss: 4964.559570, avg loss: 2.656265, ppl: 14.242992 +epoch: 1, batch: 32184, sum loss: 4180.518555, avg loss: 2.818961, ppl: 16.759422 +epoch: 1, batch: 32185, sum loss: 3863.084717, avg loss: 2.414428, ppl: 11.183372 +epoch: 1, batch: 32186, sum loss: 4275.575684, avg loss: 2.531424, ppl: 12.571399 +epoch: 1, batch: 32187, sum loss: 4869.519043, avg loss: 2.565606, ppl: 13.008534 +epoch: 1, batch: 32188, sum loss: 4623.831055, avg loss: 2.768761, ppl: 15.938876 +epoch: 1, batch: 32189, sum loss: 4624.711426, avg loss: 2.667077, ppl: 14.397820 +epoch: 1, batch: 32190, sum loss: 5057.795898, avg loss: 2.794363, ppl: 16.352201 +epoch: 1, batch: 32191, sum loss: 4727.939453, avg loss: 2.692449, ppl: 14.767793 +epoch: 1, batch: 32192, sum loss: 4212.791016, avg loss: 2.656236, ppl: 14.242585 +epoch: 1, batch: 32193, sum loss: 4742.060547, avg loss: 2.737910, ppl: 15.454656 +epoch: 1, batch: 32194, sum loss: 4333.342773, avg loss: 2.374434, ppl: 10.744935 +epoch: 1, batch: 32195, sum loss: 3803.677490, avg loss: 2.571790, ppl: 13.089233 +epoch: 1, batch: 32196, sum loss: 5006.820312, avg loss: 2.582166, ppl: 13.225757 +epoch: 1, batch: 32197, sum loss: 4838.575684, avg loss: 2.803346, ppl: 16.499765 +epoch: 1, batch: 32198, sum loss: 4553.138184, avg loss: 2.659543, ppl: 14.289762 +epoch: 1, batch: 32199, sum loss: 4768.237305, avg loss: 2.804846, ppl: 16.524525 +epoch: 1, batch: 32200, sum loss: 4327.067383, avg loss: 2.627242, ppl: 13.835557 +epoch: 1, batch: 32201, sum loss: 5638.254395, avg loss: 2.916842, ppl: 18.482817 +epoch: 1, batch: 32202, sum loss: 4358.460449, avg loss: 2.692070, ppl: 14.762196 +epoch: 1, batch: 32203, sum loss: 4894.672363, avg loss: 2.684955, ppl: 14.657540 +epoch: 1, batch: 32204, sum loss: 4972.904297, avg loss: 2.779712, ppl: 16.114374 +epoch: 1, batch: 32205, sum loss: 4657.658203, avg loss: 2.841768, ppl: 17.146057 +epoch: 1, batch: 32206, sum loss: 4265.468262, avg loss: 2.903654, ppl: 18.240681 +epoch: 1, batch: 32207, sum loss: 3839.856689, avg loss: 2.254760, ppl: 9.533008 +epoch: 1, batch: 32208, sum loss: 5428.395508, avg loss: 2.698010, ppl: 14.850150 +epoch: 1, batch: 32209, sum loss: 4762.387695, avg loss: 2.672496, ppl: 14.476058 +epoch: 1, batch: 32210, sum loss: 3798.183838, avg loss: 2.464753, ppl: 11.760573 +epoch: 1, batch: 32211, sum loss: 5053.172852, avg loss: 2.894143, ppl: 18.068005 +epoch: 1, batch: 32212, sum loss: 4871.788574, avg loss: 2.613621, ppl: 13.648376 +epoch: 1, batch: 32213, sum loss: 4376.838867, avg loss: 2.583730, ppl: 13.246455 +epoch: 1, batch: 32214, sum loss: 4312.458008, avg loss: 2.724231, ppl: 15.244686 +epoch: 1, batch: 32215, sum loss: 4877.466797, avg loss: 2.774441, ppl: 16.029655 +epoch: 1, batch: 32216, sum loss: 3943.311035, avg loss: 2.537523, ppl: 12.648306 +epoch: 1, batch: 32217, sum loss: 4609.001953, avg loss: 2.817239, ppl: 16.730587 +epoch: 1, batch: 32218, sum loss: 3992.652588, avg loss: 2.599383, ppl: 13.455439 +epoch: 1, batch: 32219, sum loss: 3192.171631, avg loss: 2.405555, ppl: 11.084580 +epoch: 1, batch: 32220, sum loss: 4625.673340, avg loss: 2.705072, ppl: 14.955396 +epoch: 1, batch: 32221, sum loss: 5555.264648, avg loss: 2.906993, ppl: 18.301689 +epoch: 1, batch: 32222, sum loss: 3736.353516, avg loss: 2.678390, ppl: 14.561627 +epoch: 1, batch: 32223, sum loss: 4633.173340, avg loss: 2.556939, ppl: 12.896280 +epoch: 1, batch: 32224, sum loss: 4701.431152, avg loss: 2.601788, ppl: 13.487833 +epoch: 1, batch: 32225, sum loss: 4620.193359, avg loss: 2.534390, ppl: 12.608740 +epoch: 1, batch: 32226, sum loss: 4335.335938, avg loss: 2.646725, ppl: 14.107762 +epoch: 1, batch: 32227, sum loss: 3408.882324, avg loss: 2.473790, ppl: 11.867335 +epoch: 1, batch: 32228, sum loss: 4717.605469, avg loss: 2.651830, ppl: 14.179964 +epoch: 1, batch: 32229, sum loss: 4606.815918, avg loss: 2.932410, ppl: 18.772818 +epoch: 1, batch: 32230, sum loss: 4274.906738, avg loss: 2.546103, ppl: 12.757289 +epoch: 1, batch: 32231, sum loss: 5474.068359, avg loss: 2.693931, ppl: 14.789705 +epoch: 1, batch: 32232, sum loss: 4238.982422, avg loss: 2.603797, ppl: 13.514963 +epoch: 1, batch: 32233, sum loss: 5331.931152, avg loss: 2.747002, ppl: 15.595807 +epoch: 1, batch: 32234, sum loss: 4465.214355, avg loss: 2.905149, ppl: 18.267969 +epoch: 1, batch: 32235, sum loss: 5111.980469, avg loss: 2.748377, ppl: 15.617258 +epoch: 1, batch: 32236, sum loss: 4105.731445, avg loss: 2.633567, ppl: 13.923350 +epoch: 1, batch: 32237, sum loss: 4571.173828, avg loss: 2.663854, ppl: 14.351499 +epoch: 1, batch: 32238, sum loss: 4281.580078, avg loss: 2.398644, ppl: 11.008244 +epoch: 1, batch: 32239, sum loss: 3876.296387, avg loss: 2.512182, ppl: 12.331806 +epoch: 1, batch: 32240, sum loss: 3941.493652, avg loss: 2.374394, ppl: 10.744497 +epoch: 1, batch: 32241, sum loss: 4613.436523, avg loss: 2.616810, ppl: 13.691981 +epoch: 1, batch: 32242, sum loss: 4652.886230, avg loss: 2.809714, ppl: 16.605165 +epoch: 1, batch: 32243, sum loss: 4607.351074, avg loss: 2.695934, ppl: 14.819354 +epoch: 1, batch: 32244, sum loss: 4404.803223, avg loss: 2.834494, ppl: 17.021778 +epoch: 1, batch: 32245, sum loss: 4891.070312, avg loss: 2.785348, ppl: 16.205452 +epoch: 1, batch: 32246, sum loss: 3355.445312, avg loss: 2.302982, ppl: 10.003973 +epoch: 1, batch: 32247, sum loss: 4245.204590, avg loss: 2.599635, ppl: 13.458830 +epoch: 1, batch: 32248, sum loss: 4200.065430, avg loss: 2.550131, ppl: 12.808777 +epoch: 1, batch: 32249, sum loss: 4114.954102, avg loss: 2.551119, ppl: 12.821438 +epoch: 1, batch: 32250, sum loss: 3548.485352, avg loss: 2.575098, ppl: 13.132607 +epoch: 1, batch: 32251, sum loss: 4469.108398, avg loss: 2.420969, ppl: 11.256760 +epoch: 1, batch: 32252, sum loss: 4875.273438, avg loss: 2.662629, ppl: 14.333925 +epoch: 1, batch: 32253, sum loss: 4056.015625, avg loss: 2.610049, ppl: 13.599714 +epoch: 1, batch: 32254, sum loss: 4476.260742, avg loss: 2.862059, ppl: 17.497524 +epoch: 1, batch: 32255, sum loss: 4571.793945, avg loss: 2.554075, ppl: 12.859396 +epoch: 1, batch: 32256, sum loss: 4614.928711, avg loss: 2.770065, ppl: 15.959673 +epoch: 1, batch: 32257, sum loss: 4005.039062, avg loss: 2.592258, ppl: 13.359910 +epoch: 1, batch: 32258, sum loss: 4862.570801, avg loss: 2.718038, ppl: 15.150573 +epoch: 1, batch: 32259, sum loss: 4797.418457, avg loss: 2.546401, ppl: 12.761088 +epoch: 1, batch: 32260, sum loss: 4669.993164, avg loss: 2.696301, ppl: 14.824793 +epoch: 1, batch: 32261, sum loss: 4469.103027, avg loss: 2.558158, ppl: 12.912017 +epoch: 1, batch: 32262, sum loss: 4939.160156, avg loss: 2.532902, ppl: 12.589995 +epoch: 1, batch: 32263, sum loss: 4660.594727, avg loss: 2.770865, ppl: 15.972440 +epoch: 1, batch: 32264, sum loss: 4735.189453, avg loss: 2.610358, ppl: 13.603920 +epoch: 1, batch: 32265, sum loss: 4048.164551, avg loss: 2.435719, ppl: 11.424027 +epoch: 1, batch: 32266, sum loss: 5276.982422, avg loss: 2.941462, ppl: 18.943518 +epoch: 1, batch: 32267, sum loss: 4273.186035, avg loss: 2.721775, ppl: 15.207285 +epoch: 1, batch: 32268, sum loss: 4563.587891, avg loss: 2.665647, ppl: 14.377249 +epoch: 1, batch: 32269, sum loss: 4500.691895, avg loss: 2.680579, ppl: 14.593536 +epoch: 1, batch: 32270, sum loss: 5694.683594, avg loss: 2.772485, ppl: 15.998337 +epoch: 1, batch: 32271, sum loss: 4748.739746, avg loss: 2.516555, ppl: 12.385858 +epoch: 1, batch: 32272, sum loss: 5672.766602, avg loss: 2.903156, ppl: 18.231590 +epoch: 1, batch: 32273, sum loss: 4436.334961, avg loss: 2.710040, ppl: 15.029875 +epoch: 1, batch: 32274, sum loss: 4428.985840, avg loss: 2.775054, ppl: 16.039490 +epoch: 1, batch: 32275, sum loss: 4929.622070, avg loss: 2.849493, ppl: 17.279011 +epoch: 1, batch: 32276, sum loss: 4417.509766, avg loss: 2.721817, ppl: 15.207937 +epoch: 1, batch: 32277, sum loss: 4808.693359, avg loss: 2.658206, ppl: 14.270658 +epoch: 1, batch: 32278, sum loss: 4748.039551, avg loss: 2.590311, ppl: 13.333916 +epoch: 1, batch: 32279, sum loss: 4296.451660, avg loss: 2.499390, ppl: 12.175070 +epoch: 1, batch: 32280, sum loss: 3949.436035, avg loss: 2.624210, ppl: 13.793674 +epoch: 1, batch: 32281, sum loss: 5016.931641, avg loss: 2.810606, ppl: 16.619987 +epoch: 1, batch: 32282, sum loss: 4855.324219, avg loss: 2.733854, ppl: 15.392091 +epoch: 1, batch: 32283, sum loss: 4943.117188, avg loss: 2.740087, ppl: 15.488333 +epoch: 1, batch: 32284, sum loss: 4997.552734, avg loss: 2.826670, ppl: 16.889130 +epoch: 1, batch: 32285, sum loss: 5091.392578, avg loss: 2.701004, ppl: 14.894679 +epoch: 1, batch: 32286, sum loss: 5062.153320, avg loss: 2.605329, ppl: 13.535672 +epoch: 1, batch: 32287, sum loss: 4481.898926, avg loss: 2.485801, ppl: 12.010734 +epoch: 1, batch: 32288, sum loss: 4625.579102, avg loss: 2.759892, ppl: 15.798141 +epoch: 1, batch: 32289, sum loss: 3924.017334, avg loss: 2.355352, ppl: 10.541843 +epoch: 1, batch: 32290, sum loss: 4856.787109, avg loss: 2.549495, ppl: 12.800632 +epoch: 1, batch: 32291, sum loss: 4509.290039, avg loss: 2.581162, ppl: 13.212481 +epoch: 1, batch: 32292, sum loss: 4349.966797, avg loss: 2.431507, ppl: 11.376017 +epoch: 1, batch: 32293, sum loss: 4093.945068, avg loss: 2.531815, ppl: 12.576315 +epoch: 1, batch: 32294, sum loss: 5397.855469, avg loss: 2.846970, ppl: 17.235485 +epoch: 1, batch: 32295, sum loss: 3976.474609, avg loss: 2.635172, ppl: 13.945713 +epoch: 1, batch: 32296, sum loss: 4198.398926, avg loss: 2.484260, ppl: 11.992238 +epoch: 1, batch: 32297, sum loss: 5187.774414, avg loss: 2.536809, ppl: 12.639275 +epoch: 1, batch: 32298, sum loss: 3989.561035, avg loss: 2.456626, ppl: 11.665388 +epoch: 1, batch: 32299, sum loss: 5357.388672, avg loss: 2.843625, ppl: 17.177916 +epoch: 1, batch: 32300, sum loss: 4152.479492, avg loss: 2.666975, ppl: 14.396351 +epoch: 1, batch: 32301, sum loss: 4777.784180, avg loss: 2.552235, ppl: 12.835764 +epoch: 1, batch: 32302, sum loss: 4269.944336, avg loss: 2.578469, ppl: 13.176946 +epoch: 1, batch: 32303, sum loss: 4708.376953, avg loss: 2.702857, ppl: 14.922304 +epoch: 1, batch: 32304, sum loss: 4853.212402, avg loss: 2.748138, ppl: 15.613539 +epoch: 1, batch: 32305, sum loss: 4225.279785, avg loss: 2.682717, ppl: 14.624780 +epoch: 1, batch: 32306, sum loss: 4612.480957, avg loss: 2.629693, ppl: 13.869505 +epoch: 1, batch: 32307, sum loss: 4071.555908, avg loss: 2.452744, ppl: 11.620194 +epoch: 1, batch: 32308, sum loss: 4073.283691, avg loss: 2.587855, ppl: 13.301208 +epoch: 1, batch: 32309, sum loss: 3360.470703, avg loss: 2.496635, ppl: 12.141568 +epoch: 1, batch: 32310, sum loss: 4378.832031, avg loss: 2.528194, ppl: 12.530854 +epoch: 1, batch: 32311, sum loss: 5751.994629, avg loss: 2.786819, ppl: 16.229311 +epoch: 1, batch: 32312, sum loss: 4966.437500, avg loss: 2.722828, ppl: 15.223308 +epoch: 1, batch: 32313, sum loss: 3888.262207, avg loss: 2.568205, ppl: 13.042391 +epoch: 1, batch: 32314, sum loss: 5424.142578, avg loss: 3.020124, ppl: 20.493832 +epoch: 1, batch: 32315, sum loss: 4971.368652, avg loss: 2.788205, ppl: 16.251816 +epoch: 1, batch: 32316, sum loss: 4437.840820, avg loss: 2.801667, ppl: 16.472086 +epoch: 1, batch: 32317, sum loss: 3846.619629, avg loss: 2.627472, ppl: 13.838747 +epoch: 1, batch: 32318, sum loss: 4981.427246, avg loss: 2.732544, ppl: 15.371943 +epoch: 1, batch: 32319, sum loss: 4946.338867, avg loss: 2.716276, ppl: 15.123899 +epoch: 1, batch: 32320, sum loss: 4201.252441, avg loss: 2.438336, ppl: 11.453961 +epoch: 1, batch: 32321, sum loss: 4508.849121, avg loss: 2.754337, ppl: 15.710619 +epoch: 1, batch: 32322, sum loss: 3662.627930, avg loss: 2.367568, ppl: 10.671411 +epoch: 1, batch: 32323, sum loss: 5933.220703, avg loss: 2.972555, ppl: 19.541794 +epoch: 1, batch: 32324, sum loss: 3903.666504, avg loss: 2.569892, ppl: 13.064419 +epoch: 1, batch: 32325, sum loss: 3295.534668, avg loss: 2.131652, ppl: 8.428782 +epoch: 1, batch: 32326, sum loss: 4359.812012, avg loss: 2.653568, ppl: 14.204627 +epoch: 1, batch: 32327, sum loss: 4670.498535, avg loss: 2.760342, ppl: 15.805245 +epoch: 1, batch: 32328, sum loss: 4797.206543, avg loss: 2.782603, ppl: 16.161026 +epoch: 1, batch: 32329, sum loss: 3721.800293, avg loss: 2.361548, ppl: 10.607364 +epoch: 1, batch: 32330, sum loss: 3780.665527, avg loss: 2.545902, ppl: 12.754734 +epoch: 1, batch: 32331, sum loss: 5281.094238, avg loss: 2.884268, ppl: 17.890463 +epoch: 1, batch: 32332, sum loss: 5395.715332, avg loss: 2.994293, ppl: 19.971230 +epoch: 1, batch: 32333, sum loss: 4505.306152, avg loss: 2.717314, ppl: 15.139599 +epoch: 1, batch: 32334, sum loss: 4257.484375, avg loss: 2.507353, ppl: 12.272397 +epoch: 1, batch: 32335, sum loss: 5289.694824, avg loss: 2.916039, ppl: 18.467995 +epoch: 1, batch: 32336, sum loss: 5238.946289, avg loss: 2.580762, ppl: 13.207197 +epoch: 1, batch: 32337, sum loss: 4801.013672, avg loss: 2.804330, ppl: 16.516012 +epoch: 1, batch: 32338, sum loss: 3214.391846, avg loss: 2.310850, ppl: 10.082988 +epoch: 1, batch: 32339, sum loss: 5266.554688, avg loss: 2.935649, ppl: 18.833729 +epoch: 1, batch: 32340, sum loss: 3789.761230, avg loss: 2.379009, ppl: 10.794201 +epoch: 1, batch: 32341, sum loss: 4855.736816, avg loss: 2.814920, ppl: 16.691839 +epoch: 1, batch: 32342, sum loss: 5135.017090, avg loss: 2.796850, ppl: 16.392931 +epoch: 1, batch: 32343, sum loss: 5265.796387, avg loss: 2.668929, ppl: 14.424510 +epoch: 1, batch: 32344, sum loss: 4203.873535, avg loss: 2.356431, ppl: 10.553225 +epoch: 1, batch: 32345, sum loss: 4782.358887, avg loss: 2.674697, ppl: 14.507959 +epoch: 1, batch: 32346, sum loss: 4410.183594, avg loss: 2.628238, ppl: 13.849349 +epoch: 1, batch: 32347, sum loss: 5054.439453, avg loss: 2.903182, ppl: 18.232063 +epoch: 1, batch: 32348, sum loss: 3596.289062, avg loss: 2.253314, ppl: 9.519231 +epoch: 1, batch: 32349, sum loss: 4628.617676, avg loss: 2.537619, ppl: 12.649521 +epoch: 1, batch: 32350, sum loss: 3401.780518, avg loss: 2.435061, ppl: 11.416517 +epoch: 1, batch: 32351, sum loss: 4990.657715, avg loss: 2.640560, ppl: 14.021049 +epoch: 1, batch: 32352, sum loss: 4562.679688, avg loss: 2.604270, ppl: 13.521344 +epoch: 1, batch: 32353, sum loss: 6054.544922, avg loss: 3.225650, ppl: 25.169931 +epoch: 1, batch: 32354, sum loss: 4970.454590, avg loss: 2.854942, ppl: 17.373436 +epoch: 1, batch: 32355, sum loss: 4909.808594, avg loss: 2.717105, ppl: 15.136434 +epoch: 1, batch: 32356, sum loss: 5161.946289, avg loss: 2.794774, ppl: 16.358925 +epoch: 1, batch: 32357, sum loss: 4510.083496, avg loss: 2.512581, ppl: 12.336735 +epoch: 1, batch: 32358, sum loss: 3445.428711, avg loss: 2.471613, ppl: 11.841534 +epoch: 1, batch: 32359, sum loss: 4265.968750, avg loss: 2.565225, ppl: 13.003579 +epoch: 1, batch: 32360, sum loss: 4281.930176, avg loss: 2.725608, ppl: 15.265695 +epoch: 1, batch: 32361, sum loss: 4989.857422, avg loss: 2.703065, ppl: 14.925404 +epoch: 1, batch: 32362, sum loss: 4587.332520, avg loss: 2.711190, ppl: 15.047164 +epoch: 1, batch: 32363, sum loss: 5394.302246, avg loss: 2.860182, ppl: 17.464697 +epoch: 1, batch: 32364, sum loss: 4739.135254, avg loss: 2.729917, ppl: 15.331608 +epoch: 1, batch: 32365, sum loss: 3721.915527, avg loss: 2.473034, ppl: 11.858367 +epoch: 1, batch: 32366, sum loss: 3928.719482, avg loss: 2.533023, ppl: 12.591517 +epoch: 1, batch: 32367, sum loss: 5816.731934, avg loss: 3.002959, ppl: 20.145063 +epoch: 1, batch: 32368, sum loss: 4277.237305, avg loss: 2.507173, ppl: 12.270194 +epoch: 1, batch: 32369, sum loss: 4532.434570, avg loss: 2.715659, ppl: 15.114566 +epoch: 1, batch: 32370, sum loss: 5032.182617, avg loss: 2.587240, ppl: 13.293038 +epoch: 1, batch: 32371, sum loss: 3801.718506, avg loss: 2.481539, ppl: 11.959662 +epoch: 1, batch: 32372, sum loss: 6637.439941, avg loss: 3.222058, ppl: 25.079683 +epoch: 1, batch: 32373, sum loss: 4484.615723, avg loss: 2.583304, ppl: 13.240812 +epoch: 1, batch: 32374, sum loss: 4052.103516, avg loss: 2.510597, ppl: 12.312278 +epoch: 1, batch: 32375, sum loss: 3758.293457, avg loss: 2.387734, ppl: 10.888791 +epoch: 1, batch: 32376, sum loss: 6108.982422, avg loss: 2.964087, ppl: 19.376995 +epoch: 1, batch: 32377, sum loss: 5209.352539, avg loss: 2.806763, ppl: 16.556242 +epoch: 1, batch: 32378, sum loss: 4322.338867, avg loss: 2.790406, ppl: 16.287632 +epoch: 1, batch: 32379, sum loss: 4564.736328, avg loss: 2.574584, ppl: 13.125850 +epoch: 1, batch: 32380, sum loss: 4531.531250, avg loss: 2.483031, ppl: 11.977510 +epoch: 1, batch: 32381, sum loss: 4426.574707, avg loss: 2.381159, ppl: 10.817433 +epoch: 1, batch: 32382, sum loss: 4186.750977, avg loss: 2.628218, ppl: 13.849068 +epoch: 1, batch: 32383, sum loss: 4048.349609, avg loss: 2.606793, ppl: 13.555511 +epoch: 1, batch: 32384, sum loss: 5071.930664, avg loss: 2.950512, ppl: 19.115746 +epoch: 1, batch: 32385, sum loss: 4777.218750, avg loss: 2.801888, ppl: 16.475723 +epoch: 1, batch: 32386, sum loss: 3094.888184, avg loss: 2.412228, ppl: 11.158792 +epoch: 1, batch: 32387, sum loss: 4465.704102, avg loss: 2.629979, ppl: 13.873477 +epoch: 1, batch: 32388, sum loss: 5093.040527, avg loss: 2.932090, ppl: 18.766817 +epoch: 1, batch: 32389, sum loss: 4046.672363, avg loss: 2.665792, ppl: 14.379333 +epoch: 1, batch: 32390, sum loss: 4834.816406, avg loss: 2.737722, ppl: 15.451741 +epoch: 1, batch: 32391, sum loss: 4496.665527, avg loss: 2.422772, ppl: 11.277081 +epoch: 1, batch: 32392, sum loss: 4046.246826, avg loss: 2.672554, ppl: 14.476896 +epoch: 1, batch: 32393, sum loss: 4274.816895, avg loss: 2.561304, ppl: 12.952701 +epoch: 1, batch: 32394, sum loss: 4397.324219, avg loss: 2.769096, ppl: 15.944212 +epoch: 1, batch: 32395, sum loss: 4349.525879, avg loss: 2.737273, ppl: 15.444802 +epoch: 1, batch: 32396, sum loss: 5397.597656, avg loss: 2.788015, ppl: 16.248735 +epoch: 1, batch: 32397, sum loss: 4835.774414, avg loss: 2.728992, ppl: 15.317447 +epoch: 1, batch: 32398, sum loss: 4127.843262, avg loss: 2.503240, ppl: 12.222034 +epoch: 1, batch: 32399, sum loss: 3549.659180, avg loss: 2.585331, ppl: 13.267676 +epoch: 1, batch: 32400, sum loss: 5017.706543, avg loss: 2.817353, ppl: 16.732510 +epoch: 1, batch: 32401, sum loss: 3917.427734, avg loss: 2.406282, ppl: 11.092647 +epoch: 1, batch: 32402, sum loss: 4412.181152, avg loss: 2.629429, ppl: 13.865845 +epoch: 1, batch: 32403, sum loss: 4573.308105, avg loss: 2.679149, ppl: 14.572692 +epoch: 1, batch: 32404, sum loss: 4180.212402, avg loss: 2.620823, ppl: 13.747031 +epoch: 1, batch: 32405, sum loss: 4785.713379, avg loss: 2.475796, ppl: 11.891169 +epoch: 1, batch: 32406, sum loss: 5612.306152, avg loss: 2.955401, ppl: 19.209423 +epoch: 1, batch: 32407, sum loss: 3791.879883, avg loss: 2.418291, ppl: 11.226652 +epoch: 1, batch: 32408, sum loss: 5285.543945, avg loss: 3.015142, ppl: 20.391991 +epoch: 1, batch: 32409, sum loss: 5803.864258, avg loss: 2.876048, ppl: 17.744007 +epoch: 1, batch: 32410, sum loss: 3813.905518, avg loss: 2.407769, ppl: 11.109146 +epoch: 1, batch: 32411, sum loss: 4381.268555, avg loss: 2.642502, ppl: 14.048309 +epoch: 1, batch: 32412, sum loss: 4291.569824, avg loss: 2.607272, ppl: 13.562005 +epoch: 1, batch: 32413, sum loss: 3929.346191, avg loss: 2.395943, ppl: 10.978542 +epoch: 1, batch: 32414, sum loss: 4774.101562, avg loss: 2.600273, ppl: 13.467416 +epoch: 1, batch: 32415, sum loss: 4979.741211, avg loss: 2.797607, ppl: 16.405348 +epoch: 1, batch: 32416, sum loss: 4285.219727, avg loss: 2.421028, ppl: 11.257427 +epoch: 1, batch: 32417, sum loss: 3635.282715, avg loss: 2.396363, ppl: 10.983158 +epoch: 1, batch: 32418, sum loss: 5176.967773, avg loss: 2.658946, ppl: 14.281229 +epoch: 1, batch: 32419, sum loss: 4412.749023, avg loss: 2.702235, ppl: 14.913018 +epoch: 1, batch: 32420, sum loss: 3718.420898, avg loss: 2.431930, ppl: 11.380827 +epoch: 1, batch: 32421, sum loss: 5567.146484, avg loss: 2.889023, ppl: 17.975731 +epoch: 1, batch: 32422, sum loss: 3667.467529, avg loss: 2.325598, ppl: 10.232795 +epoch: 1, batch: 32423, sum loss: 4373.736816, avg loss: 2.580376, ppl: 13.202097 +epoch: 1, batch: 32424, sum loss: 3905.210693, avg loss: 2.567528, ppl: 13.033572 +epoch: 1, batch: 32425, sum loss: 4624.342773, avg loss: 2.596487, ppl: 13.416520 +epoch: 1, batch: 32426, sum loss: 5210.330566, avg loss: 2.710890, ppl: 15.042659 +epoch: 1, batch: 32427, sum loss: 4429.613281, avg loss: 2.649290, ppl: 14.143997 +epoch: 1, batch: 32428, sum loss: 5201.083984, avg loss: 2.629466, ppl: 13.866361 +epoch: 1, batch: 32429, sum loss: 5491.740234, avg loss: 2.957318, ppl: 19.246290 +epoch: 1, batch: 32430, sum loss: 3671.558594, avg loss: 2.270599, ppl: 9.685202 +epoch: 1, batch: 32431, sum loss: 4961.641113, avg loss: 2.730678, ppl: 15.343280 +epoch: 1, batch: 32432, sum loss: 3776.774170, avg loss: 2.381320, ppl: 10.819180 +epoch: 1, batch: 32433, sum loss: 4672.194824, avg loss: 2.620412, ppl: 13.741388 +epoch: 1, batch: 32434, sum loss: 4820.084961, avg loss: 2.590051, ppl: 13.330454 +epoch: 1, batch: 32435, sum loss: 4526.684570, avg loss: 2.720363, ppl: 15.185840 +epoch: 1, batch: 32436, sum loss: 4424.721680, avg loss: 2.604310, ppl: 13.521886 +epoch: 1, batch: 32437, sum loss: 5430.959473, avg loss: 2.795141, ppl: 16.364944 +epoch: 1, batch: 32438, sum loss: 4152.480957, avg loss: 2.706963, ppl: 14.983698 +epoch: 1, batch: 32439, sum loss: 4202.274902, avg loss: 2.685160, ppl: 14.660542 +epoch: 1, batch: 32440, sum loss: 4048.855225, avg loss: 2.496212, ppl: 12.136428 +epoch: 1, batch: 32441, sum loss: 4908.444824, avg loss: 2.870435, ppl: 17.644701 +epoch: 1, batch: 32442, sum loss: 4030.688965, avg loss: 2.760746, ppl: 15.811634 +epoch: 1, batch: 32443, sum loss: 3593.436523, avg loss: 2.436228, ppl: 11.429849 +epoch: 1, batch: 32444, sum loss: 5027.257812, avg loss: 2.814814, ppl: 16.690073 +epoch: 1, batch: 32445, sum loss: 4350.752930, avg loss: 2.656137, ppl: 14.241173 +epoch: 1, batch: 32446, sum loss: 4947.793945, avg loss: 2.727560, ppl: 15.295521 +epoch: 1, batch: 32447, sum loss: 5235.708008, avg loss: 2.729775, ppl: 15.329433 +epoch: 1, batch: 32448, sum loss: 4491.792969, avg loss: 2.702643, ppl: 14.919113 +epoch: 1, batch: 32449, sum loss: 4738.072754, avg loss: 2.849112, ppl: 17.272434 +epoch: 1, batch: 32450, sum loss: 4861.736816, avg loss: 2.720614, ppl: 15.189642 +epoch: 1, batch: 32451, sum loss: 4370.635254, avg loss: 2.661776, ppl: 14.321696 +epoch: 1, batch: 32452, sum loss: 4947.030273, avg loss: 2.603700, ppl: 13.513652 +epoch: 1, batch: 32453, sum loss: 4397.887695, avg loss: 2.673488, ppl: 14.490422 +epoch: 1, batch: 32454, sum loss: 4702.862793, avg loss: 2.712147, ppl: 15.061574 +epoch: 1, batch: 32455, sum loss: 3867.823975, avg loss: 2.463582, ppl: 11.746817 +epoch: 1, batch: 32456, sum loss: 5272.629883, avg loss: 2.859344, ppl: 17.450071 +epoch: 1, batch: 32457, sum loss: 4158.328125, avg loss: 2.558971, ppl: 12.922515 +epoch: 1, batch: 32458, sum loss: 4066.326172, avg loss: 2.573624, ppl: 13.113262 +epoch: 1, batch: 32459, sum loss: 4830.353027, avg loss: 2.766525, ppl: 15.903278 +epoch: 1, batch: 32460, sum loss: 4929.001953, avg loss: 2.640065, ppl: 14.014117 +epoch: 1, batch: 32461, sum loss: 3914.435059, avg loss: 2.594059, ppl: 13.383990 +epoch: 1, batch: 32462, sum loss: 4479.869141, avg loss: 2.718367, ppl: 15.155559 +epoch: 1, batch: 32463, sum loss: 3995.671875, avg loss: 2.497295, ppl: 12.149584 +epoch: 1, batch: 32464, sum loss: 3965.069580, avg loss: 2.564728, ppl: 12.997123 +epoch: 1, batch: 32465, sum loss: 5200.174316, avg loss: 2.820051, ppl: 16.777710 +epoch: 1, batch: 32466, sum loss: 3997.112793, avg loss: 2.521838, ppl: 12.451458 +epoch: 1, batch: 32467, sum loss: 5595.878906, avg loss: 2.911488, ppl: 18.384127 +epoch: 1, batch: 32468, sum loss: 4691.457031, avg loss: 2.766189, ppl: 15.897937 +epoch: 1, batch: 32469, sum loss: 5610.648438, avg loss: 2.707842, ppl: 14.996876 +epoch: 1, batch: 32470, sum loss: 5094.880371, avg loss: 2.705725, ppl: 14.965162 +epoch: 1, batch: 32471, sum loss: 4492.847656, avg loss: 2.671134, ppl: 14.456357 +epoch: 1, batch: 32472, sum loss: 4616.429199, avg loss: 2.759372, ppl: 15.789924 +epoch: 1, batch: 32473, sum loss: 4597.575195, avg loss: 2.820598, ppl: 16.786892 +epoch: 1, batch: 32474, sum loss: 3903.367676, avg loss: 2.476756, ppl: 11.902591 +epoch: 1, batch: 32475, sum loss: 4068.418457, avg loss: 2.462723, ppl: 11.736731 +epoch: 1, batch: 32476, sum loss: 5488.375000, avg loss: 2.794488, ppl: 16.354259 +epoch: 1, batch: 32477, sum loss: 4848.568848, avg loss: 2.678767, ppl: 14.567127 +epoch: 1, batch: 32478, sum loss: 4687.259766, avg loss: 2.630337, ppl: 13.878443 +epoch: 1, batch: 32479, sum loss: 4428.967773, avg loss: 2.567518, ppl: 13.033432 +epoch: 1, batch: 32480, sum loss: 4703.052734, avg loss: 2.640681, ppl: 14.022750 +epoch: 1, batch: 32481, sum loss: 4957.042969, avg loss: 2.799008, ppl: 16.428341 +epoch: 1, batch: 32482, sum loss: 5098.261230, avg loss: 2.726343, ppl: 15.276916 +epoch: 1, batch: 32483, sum loss: 4573.476074, avg loss: 2.679248, ppl: 14.574127 +epoch: 1, batch: 32484, sum loss: 5727.220703, avg loss: 2.668789, ppl: 14.422492 +epoch: 1, batch: 32485, sum loss: 4911.367188, avg loss: 2.636268, ppl: 13.961002 +epoch: 1, batch: 32486, sum loss: 5297.791016, avg loss: 2.732229, ppl: 15.367095 +epoch: 1, batch: 32487, sum loss: 4447.419922, avg loss: 2.677556, ppl: 14.549484 +epoch: 1, batch: 32488, sum loss: 4373.695801, avg loss: 2.622120, ppl: 13.764873 +epoch: 1, batch: 32489, sum loss: 3787.388672, avg loss: 2.462541, ppl: 11.734595 +epoch: 1, batch: 32490, sum loss: 3887.354736, avg loss: 2.577821, ppl: 13.168420 +epoch: 1, batch: 32491, sum loss: 5626.960938, avg loss: 2.967807, ppl: 19.449213 +epoch: 1, batch: 32492, sum loss: 5298.586914, avg loss: 2.644006, ppl: 14.069447 +epoch: 1, batch: 32493, sum loss: 4641.703613, avg loss: 2.634338, ppl: 13.934087 +epoch: 1, batch: 32494, sum loss: 4454.554688, avg loss: 2.459721, ppl: 11.701548 +epoch: 1, batch: 32495, sum loss: 4057.060059, avg loss: 2.458824, ppl: 11.691059 +epoch: 1, batch: 32496, sum loss: 4054.021973, avg loss: 2.347436, ppl: 10.458718 +epoch: 1, batch: 32497, sum loss: 5049.355469, avg loss: 2.751693, ppl: 15.669130 +epoch: 1, batch: 32498, sum loss: 3249.085693, avg loss: 2.373328, ppl: 10.733049 +epoch: 1, batch: 32499, sum loss: 4821.067383, avg loss: 2.679860, ppl: 14.583046 +epoch: 1, batch: 32500, sum loss: 4558.428711, avg loss: 2.713351, ppl: 15.079716 +epoch: 1, batch: 32501, sum loss: 3749.487549, avg loss: 2.479820, ppl: 11.939116 +epoch: 1, batch: 32502, sum loss: 3617.530762, avg loss: 2.336906, ppl: 10.349169 +epoch: 1, batch: 32503, sum loss: 5084.833984, avg loss: 2.861471, ppl: 17.487230 +epoch: 1, batch: 32504, sum loss: 5329.429688, avg loss: 2.842363, ppl: 17.156252 +epoch: 1, batch: 32505, sum loss: 3406.515381, avg loss: 2.448969, ppl: 11.576401 +epoch: 1, batch: 32506, sum loss: 3918.882324, avg loss: 2.612588, ppl: 13.634293 +epoch: 1, batch: 32507, sum loss: 5107.047363, avg loss: 2.709309, ppl: 15.018891 +epoch: 1, batch: 32508, sum loss: 4790.295898, avg loss: 2.568523, ppl: 13.046543 +epoch: 1, batch: 32509, sum loss: 4384.394531, avg loss: 2.688164, ppl: 14.704646 +epoch: 1, batch: 32510, sum loss: 4091.385986, avg loss: 2.433900, ppl: 11.403267 +epoch: 1, batch: 32511, sum loss: 5722.296875, avg loss: 3.220201, ppl: 25.033146 +epoch: 1, batch: 32512, sum loss: 4464.823242, avg loss: 2.646605, ppl: 14.106074 +epoch: 1, batch: 32513, sum loss: 4569.892578, avg loss: 2.497209, ppl: 12.148540 +epoch: 1, batch: 32514, sum loss: 3944.826660, avg loss: 2.366423, ppl: 10.659192 +epoch: 1, batch: 32515, sum loss: 4936.819336, avg loss: 2.723011, ppl: 15.226103 +epoch: 1, batch: 32516, sum loss: 5042.228027, avg loss: 2.705058, ppl: 14.955182 +epoch: 1, batch: 32517, sum loss: 3543.165527, avg loss: 2.326438, ppl: 10.241401 +epoch: 1, batch: 32518, sum loss: 5511.590820, avg loss: 2.939515, ppl: 18.906677 +epoch: 1, batch: 32519, sum loss: 4647.218262, avg loss: 2.706592, ppl: 14.978141 +epoch: 1, batch: 32520, sum loss: 3384.316162, avg loss: 2.477537, ppl: 11.911894 +epoch: 1, batch: 32521, sum loss: 5077.454590, avg loss: 2.825517, ppl: 16.869671 +epoch: 1, batch: 32522, sum loss: 4609.662109, avg loss: 2.856048, ppl: 17.392662 +epoch: 1, batch: 32523, sum loss: 4332.504883, avg loss: 2.701063, ppl: 14.895556 +epoch: 1, batch: 32524, sum loss: 4616.010742, avg loss: 2.772379, ppl: 15.996643 +epoch: 1, batch: 32525, sum loss: 4065.051270, avg loss: 2.474164, ppl: 11.871776 +epoch: 1, batch: 32526, sum loss: 4967.217773, avg loss: 2.465121, ppl: 11.764900 +epoch: 1, batch: 32527, sum loss: 4781.146973, avg loss: 2.707331, ppl: 14.989219 +epoch: 1, batch: 32528, sum loss: 4156.724609, avg loss: 2.328697, ppl: 10.264563 +epoch: 1, batch: 32529, sum loss: 3724.900391, avg loss: 2.426645, ppl: 11.320840 +epoch: 1, batch: 32530, sum loss: 3198.023682, avg loss: 2.367153, ppl: 10.666980 +epoch: 1, batch: 32531, sum loss: 3810.719238, avg loss: 2.322193, ppl: 10.198015 +epoch: 1, batch: 32532, sum loss: 4549.049316, avg loss: 2.617405, ppl: 13.700122 +epoch: 1, batch: 32533, sum loss: 4508.902344, avg loss: 2.691882, ppl: 14.759429 +epoch: 1, batch: 32534, sum loss: 3929.625488, avg loss: 2.295342, ppl: 9.927831 +epoch: 1, batch: 32535, sum loss: 5036.340332, avg loss: 2.771789, ppl: 15.987206 +epoch: 1, batch: 32536, sum loss: 4861.316406, avg loss: 2.951619, ppl: 19.136909 +epoch: 1, batch: 32537, sum loss: 4902.612305, avg loss: 2.644343, ppl: 14.074197 +epoch: 1, batch: 32538, sum loss: 5101.466797, avg loss: 2.827864, ppl: 16.909307 +epoch: 1, batch: 32539, sum loss: 5365.550781, avg loss: 2.797472, ppl: 16.403124 +epoch: 1, batch: 32540, sum loss: 4329.223633, avg loss: 2.614265, ppl: 13.657181 +epoch: 1, batch: 32541, sum loss: 5704.964844, avg loss: 3.001034, ppl: 20.106306 +epoch: 1, batch: 32542, sum loss: 6140.856934, avg loss: 2.995540, ppl: 19.996153 +epoch: 1, batch: 32543, sum loss: 4877.609863, avg loss: 2.747949, ppl: 15.610583 +epoch: 1, batch: 32544, sum loss: 4194.726074, avg loss: 2.426100, ppl: 11.314664 +epoch: 1, batch: 32545, sum loss: 3952.229492, avg loss: 2.417266, ppl: 11.215151 +epoch: 1, batch: 32546, sum loss: 4033.336182, avg loss: 2.406525, ppl: 11.095342 +epoch: 1, batch: 32547, sum loss: 5094.468262, avg loss: 2.917794, ppl: 18.500431 +epoch: 1, batch: 32548, sum loss: 3486.123291, avg loss: 2.399259, ppl: 11.015012 +epoch: 1, batch: 32549, sum loss: 4418.233398, avg loss: 2.518947, ppl: 12.415518 +epoch: 1, batch: 32550, sum loss: 4156.583496, avg loss: 2.843081, ppl: 17.168571 +epoch: 1, batch: 32551, sum loss: 4105.112305, avg loss: 2.393651, ppl: 10.953417 +epoch: 1, batch: 32552, sum loss: 3924.175537, avg loss: 2.324749, ppl: 10.224109 +epoch: 1, batch: 32553, sum loss: 5256.554688, avg loss: 2.746371, ppl: 15.585972 +epoch: 1, batch: 32554, sum loss: 4740.048828, avg loss: 2.574714, ppl: 13.127564 +epoch: 1, batch: 32555, sum loss: 3716.860596, avg loss: 2.617508, ppl: 13.701532 +epoch: 1, batch: 32556, sum loss: 5140.600586, avg loss: 2.783216, ppl: 16.170950 +epoch: 1, batch: 32557, sum loss: 4002.535645, avg loss: 2.517318, ppl: 12.395308 +epoch: 1, batch: 32558, sum loss: 5531.468262, avg loss: 2.970713, ppl: 19.505829 +epoch: 1, batch: 32559, sum loss: 4111.782715, avg loss: 2.451868, ppl: 11.610015 +epoch: 1, batch: 32560, sum loss: 4156.485352, avg loss: 2.542193, ppl: 12.707504 +epoch: 1, batch: 32561, sum loss: 5004.258789, avg loss: 2.639377, ppl: 14.004478 +epoch: 1, batch: 32562, sum loss: 5082.039551, avg loss: 2.899053, ppl: 18.156940 +epoch: 1, batch: 32563, sum loss: 5047.854492, avg loss: 2.917835, ppl: 18.501188 +epoch: 1, batch: 32564, sum loss: 4899.821777, avg loss: 2.585658, ppl: 13.272017 +epoch: 1, batch: 32565, sum loss: 5085.465820, avg loss: 2.780463, ppl: 16.126478 +epoch: 1, batch: 32566, sum loss: 5176.075684, avg loss: 2.826912, ppl: 16.893217 +epoch: 1, batch: 32567, sum loss: 5123.465820, avg loss: 2.793602, ppl: 16.339766 +epoch: 1, batch: 32568, sum loss: 4236.851562, avg loss: 2.436373, ppl: 11.431498 +epoch: 1, batch: 32569, sum loss: 4625.229492, avg loss: 2.538545, ppl: 12.661241 +epoch: 1, batch: 32570, sum loss: 4492.706543, avg loss: 2.610521, ppl: 13.606135 +epoch: 1, batch: 32571, sum loss: 3822.208496, avg loss: 2.654311, ppl: 14.215195 +epoch: 1, batch: 32572, sum loss: 4442.542480, avg loss: 2.689190, ppl: 14.719754 +epoch: 1, batch: 32573, sum loss: 4359.069336, avg loss: 2.823232, ppl: 16.831156 +epoch: 1, batch: 32574, sum loss: 3977.602783, avg loss: 2.519064, ppl: 12.416974 +epoch: 1, batch: 32575, sum loss: 4716.072754, avg loss: 2.636150, ppl: 13.959358 +epoch: 1, batch: 32576, sum loss: 4157.531250, avg loss: 2.692702, ppl: 14.771532 +epoch: 1, batch: 32577, sum loss: 3670.731445, avg loss: 2.588668, ppl: 13.312033 +epoch: 1, batch: 32578, sum loss: 5254.821777, avg loss: 2.795118, ppl: 16.364557 +epoch: 1, batch: 32579, sum loss: 4063.968262, avg loss: 2.546346, ppl: 12.760391 +epoch: 1, batch: 32580, sum loss: 5578.859375, avg loss: 2.936242, ppl: 18.844891 +epoch: 1, batch: 32581, sum loss: 4053.214355, avg loss: 2.506626, ppl: 12.263485 +epoch: 1, batch: 32582, sum loss: 4486.153320, avg loss: 2.535983, ppl: 12.628837 +epoch: 1, batch: 32583, sum loss: 4729.267578, avg loss: 2.676439, ppl: 14.533249 +epoch: 1, batch: 32584, sum loss: 3558.319336, avg loss: 2.275140, ppl: 9.729283 +epoch: 1, batch: 32585, sum loss: 5083.767090, avg loss: 2.926751, ppl: 18.666891 +epoch: 1, batch: 32586, sum loss: 3731.929443, avg loss: 2.286721, ppl: 9.842615 +epoch: 1, batch: 32587, sum loss: 4143.285156, avg loss: 2.751185, ppl: 15.661186 +epoch: 1, batch: 32588, sum loss: 4463.378418, avg loss: 2.661525, ppl: 14.318114 +epoch: 1, batch: 32589, sum loss: 5209.223145, avg loss: 2.781219, ppl: 16.138681 +epoch: 1, batch: 32590, sum loss: 4931.580078, avg loss: 2.552578, ppl: 12.840159 +epoch: 1, batch: 32591, sum loss: 4635.322266, avg loss: 2.772322, ppl: 15.995736 +epoch: 1, batch: 32592, sum loss: 4834.702148, avg loss: 2.616181, ppl: 13.683366 +epoch: 1, batch: 32593, sum loss: 3707.969971, avg loss: 2.513878, ppl: 12.352739 +epoch: 1, batch: 32594, sum loss: 4463.900391, avg loss: 2.599826, ppl: 13.461390 +epoch: 1, batch: 32595, sum loss: 4586.098633, avg loss: 2.596885, ppl: 13.421860 +epoch: 1, batch: 32596, sum loss: 3679.665527, avg loss: 2.479559, ppl: 11.935999 +epoch: 1, batch: 32597, sum loss: 4360.915039, avg loss: 2.619168, ppl: 13.724304 +epoch: 1, batch: 32598, sum loss: 4283.211914, avg loss: 2.608534, ppl: 13.579124 +epoch: 1, batch: 32599, sum loss: 3829.961914, avg loss: 2.416380, ppl: 11.205219 +epoch: 1, batch: 32600, sum loss: 4551.012207, avg loss: 2.489613, ppl: 12.056607 +epoch: 1, batch: 32601, sum loss: 4134.259277, avg loss: 2.579076, ppl: 13.184954 +epoch: 1, batch: 32602, sum loss: 4284.650391, avg loss: 2.762508, ppl: 15.839521 +epoch: 1, batch: 32603, sum loss: 5019.150391, avg loss: 2.750219, ppl: 15.646064 +epoch: 1, batch: 32604, sum loss: 5329.332031, avg loss: 2.978945, ppl: 19.667048 +epoch: 1, batch: 32605, sum loss: 4543.377441, avg loss: 2.686799, ppl: 14.684600 +epoch: 1, batch: 32606, sum loss: 4608.671875, avg loss: 2.887639, ppl: 17.950878 +epoch: 1, batch: 32607, sum loss: 4175.542969, avg loss: 2.475129, ppl: 11.883245 +epoch: 1, batch: 32608, sum loss: 4553.521973, avg loss: 2.507446, ppl: 12.273541 +epoch: 1, batch: 32609, sum loss: 4466.488770, avg loss: 2.569901, ppl: 13.064537 +epoch: 1, batch: 32610, sum loss: 4271.185059, avg loss: 2.539349, ppl: 12.671420 +epoch: 1, batch: 32611, sum loss: 4691.554199, avg loss: 2.622445, ppl: 13.769350 +epoch: 1, batch: 32612, sum loss: 4611.665527, avg loss: 2.761477, ppl: 15.823189 +epoch: 1, batch: 32613, sum loss: 4886.942383, avg loss: 2.670460, ppl: 14.446620 +epoch: 1, batch: 32614, sum loss: 4848.471680, avg loss: 2.929590, ppl: 18.719957 +epoch: 1, batch: 32615, sum loss: 4174.462402, avg loss: 2.400496, ppl: 11.028646 +epoch: 1, batch: 32616, sum loss: 4467.657715, avg loss: 2.689740, ppl: 14.727841 +epoch: 1, batch: 32617, sum loss: 4438.360352, avg loss: 2.675323, ppl: 14.517035 +epoch: 1, batch: 32618, sum loss: 4971.828125, avg loss: 2.928050, ppl: 18.691139 +epoch: 1, batch: 32619, sum loss: 4367.372070, avg loss: 2.799598, ppl: 16.438028 +epoch: 1, batch: 32620, sum loss: 4104.468750, avg loss: 2.609325, ppl: 13.589877 +epoch: 1, batch: 32621, sum loss: 5593.545898, avg loss: 2.805188, ppl: 16.530190 +epoch: 1, batch: 32622, sum loss: 4355.741211, avg loss: 2.530936, ppl: 12.565265 +epoch: 1, batch: 32623, sum loss: 4104.526855, avg loss: 2.617683, ppl: 13.703934 +epoch: 1, batch: 32624, sum loss: 4781.083496, avg loss: 2.657634, ppl: 14.262505 +epoch: 1, batch: 32625, sum loss: 3889.752441, avg loss: 2.686293, ppl: 14.677169 +epoch: 1, batch: 32626, sum loss: 5590.062500, avg loss: 2.843369, ppl: 17.173517 +epoch: 1, batch: 32627, sum loss: 4009.585449, avg loss: 2.700058, ppl: 14.880588 +epoch: 1, batch: 32628, sum loss: 5130.699707, avg loss: 2.828390, ppl: 16.918203 +epoch: 1, batch: 32629, sum loss: 6253.341309, avg loss: 3.129800, ppl: 22.869413 +epoch: 1, batch: 32630, sum loss: 5512.053711, avg loss: 2.884382, ppl: 17.892502 +epoch: 1, batch: 32631, sum loss: 4169.027344, avg loss: 2.687961, ppl: 14.701667 +epoch: 1, batch: 32632, sum loss: 4963.577637, avg loss: 2.825030, ppl: 16.861448 +epoch: 1, batch: 32633, sum loss: 4727.691406, avg loss: 2.555509, ppl: 12.877851 +epoch: 1, batch: 32634, sum loss: 5051.820801, avg loss: 2.716033, ppl: 15.120217 +epoch: 1, batch: 32635, sum loss: 3935.918945, avg loss: 2.613492, ppl: 13.646622 +epoch: 1, batch: 32636, sum loss: 4211.625977, avg loss: 2.416309, ppl: 11.204423 +epoch: 1, batch: 32637, sum loss: 3585.562988, avg loss: 2.270781, ppl: 9.686961 +epoch: 1, batch: 32638, sum loss: 4421.471680, avg loss: 2.720906, ppl: 15.194075 +epoch: 1, batch: 32639, sum loss: 4258.818848, avg loss: 2.831661, ppl: 16.973639 +epoch: 1, batch: 32640, sum loss: 4736.859375, avg loss: 2.778217, ppl: 16.090303 +epoch: 1, batch: 32641, sum loss: 4951.071289, avg loss: 2.783064, ppl: 16.168488 +epoch: 1, batch: 32642, sum loss: 5047.006836, avg loss: 2.678879, ppl: 14.568746 +epoch: 1, batch: 32643, sum loss: 4236.254883, avg loss: 2.652633, ppl: 14.191355 +epoch: 1, batch: 32644, sum loss: 4271.382812, avg loss: 2.585583, ppl: 13.271024 +epoch: 1, batch: 32645, sum loss: 4722.220215, avg loss: 2.713920, ppl: 15.088301 +epoch: 1, batch: 32646, sum loss: 5083.737793, avg loss: 2.960826, ppl: 19.313911 +epoch: 1, batch: 32647, sum loss: 5859.916016, avg loss: 2.873917, ppl: 17.706232 +epoch: 1, batch: 32648, sum loss: 4542.167969, avg loss: 2.659349, ppl: 14.286982 +epoch: 1, batch: 32649, sum loss: 4219.245117, avg loss: 2.635381, ppl: 13.948629 +epoch: 1, batch: 32650, sum loss: 4484.256836, avg loss: 2.569775, ppl: 13.062881 +epoch: 1, batch: 32651, sum loss: 5521.945801, avg loss: 2.820197, ppl: 16.780157 +epoch: 1, batch: 32652, sum loss: 4334.490723, avg loss: 2.522986, ppl: 12.465769 +epoch: 1, batch: 32653, sum loss: 3640.818115, avg loss: 2.495420, ppl: 12.126828 +epoch: 1, batch: 32654, sum loss: 3977.171387, avg loss: 2.618283, ppl: 13.712153 +epoch: 1, batch: 32655, sum loss: 3861.923340, avg loss: 2.605886, ppl: 13.543222 +epoch: 1, batch: 32656, sum loss: 4032.613281, avg loss: 2.486198, ppl: 12.015506 +epoch: 1, batch: 32657, sum loss: 4574.322754, avg loss: 2.551212, ppl: 12.822634 +epoch: 1, batch: 32658, sum loss: 4244.520508, avg loss: 2.684706, ppl: 14.653895 +epoch: 1, batch: 32659, sum loss: 4908.762695, avg loss: 2.822750, ppl: 16.823055 +epoch: 1, batch: 32660, sum loss: 5101.575684, avg loss: 2.713604, ppl: 15.083538 +epoch: 1, batch: 32661, sum loss: 4662.269043, avg loss: 2.665677, ppl: 14.377681 +epoch: 1, batch: 32662, sum loss: 3815.234131, avg loss: 2.634830, ppl: 13.940946 +epoch: 1, batch: 32663, sum loss: 4286.636719, avg loss: 2.508272, ppl: 12.283685 +epoch: 1, batch: 32664, sum loss: 4902.496582, avg loss: 2.689247, ppl: 14.720582 +epoch: 1, batch: 32665, sum loss: 5128.122559, avg loss: 2.696174, ppl: 14.822909 +epoch: 1, batch: 32666, sum loss: 3915.512695, avg loss: 2.514780, ppl: 12.363892 +epoch: 1, batch: 32667, sum loss: 4486.993164, avg loss: 2.648756, ppl: 14.136446 +epoch: 1, batch: 32668, sum loss: 3642.491211, avg loss: 2.417048, ppl: 11.212710 +epoch: 1, batch: 32669, sum loss: 4094.177002, avg loss: 2.775713, ppl: 16.050070 +epoch: 1, batch: 32670, sum loss: 4505.481934, avg loss: 2.448631, ppl: 11.572496 +epoch: 1, batch: 32671, sum loss: 3518.234863, avg loss: 2.280126, ppl: 9.777916 +epoch: 1, batch: 32672, sum loss: 3817.604736, avg loss: 2.654802, ppl: 14.222164 +epoch: 1, batch: 32673, sum loss: 3900.527588, avg loss: 2.443940, ppl: 11.518330 +epoch: 1, batch: 32674, sum loss: 4736.715332, avg loss: 2.695911, ppl: 14.819008 +epoch: 1, batch: 32675, sum loss: 4440.604004, avg loss: 2.550606, ppl: 12.814861 +epoch: 1, batch: 32676, sum loss: 4070.504883, avg loss: 2.571386, ppl: 13.083951 +epoch: 1, batch: 32677, sum loss: 4992.747070, avg loss: 2.551225, ppl: 12.822802 +epoch: 1, batch: 32678, sum loss: 4547.989258, avg loss: 2.726612, ppl: 15.281033 +epoch: 1, batch: 32679, sum loss: 4692.611816, avg loss: 2.560072, ppl: 12.936748 +epoch: 1, batch: 32680, sum loss: 4696.876953, avg loss: 2.641663, ppl: 14.036528 +epoch: 1, batch: 32681, sum loss: 4265.875977, avg loss: 2.722321, ppl: 15.215590 +epoch: 1, batch: 32682, sum loss: 4979.513672, avg loss: 2.775649, ppl: 16.049036 +epoch: 1, batch: 32683, sum loss: 4664.746582, avg loss: 2.546259, ppl: 12.759284 +epoch: 1, batch: 32684, sum loss: 4319.499023, avg loss: 2.713253, ppl: 15.078246 +epoch: 1, batch: 32685, sum loss: 4042.278320, avg loss: 2.577984, ppl: 13.170554 +epoch: 1, batch: 32686, sum loss: 4710.664551, avg loss: 2.659890, ppl: 14.294712 +epoch: 1, batch: 32687, sum loss: 5271.963867, avg loss: 2.761636, ppl: 15.825721 +epoch: 1, batch: 32688, sum loss: 4210.233887, avg loss: 2.725071, ppl: 15.257493 +epoch: 1, batch: 32689, sum loss: 4426.115723, avg loss: 2.542284, ppl: 12.708658 +epoch: 1, batch: 32690, sum loss: 4804.917969, avg loss: 2.705472, ppl: 14.961377 +epoch: 1, batch: 32691, sum loss: 5308.349609, avg loss: 2.720835, ppl: 15.193010 +epoch: 1, batch: 32692, sum loss: 4218.680176, avg loss: 2.457006, ppl: 11.669825 +epoch: 1, batch: 32693, sum loss: 4528.859863, avg loss: 2.628474, ppl: 13.852608 +epoch: 1, batch: 32694, sum loss: 5604.980469, avg loss: 2.886190, ppl: 17.924885 +epoch: 1, batch: 32695, sum loss: 3599.997070, avg loss: 2.360654, ppl: 10.597878 +epoch: 1, batch: 32696, sum loss: 4983.473145, avg loss: 2.708409, ppl: 15.005387 +epoch: 1, batch: 32697, sum loss: 3788.957275, avg loss: 2.519253, ppl: 12.419322 +epoch: 1, batch: 32698, sum loss: 3739.499756, avg loss: 2.523279, ppl: 12.469422 +epoch: 1, batch: 32699, sum loss: 4429.110352, avg loss: 2.560179, ppl: 12.938136 +epoch: 1, batch: 32700, sum loss: 4555.630371, avg loss: 2.920276, ppl: 18.546404 +epoch: 1, batch: 32701, sum loss: 4757.342773, avg loss: 2.560464, ppl: 12.941826 +epoch: 1, batch: 32702, sum loss: 4576.995605, avg loss: 2.618419, ppl: 13.714021 +epoch: 1, batch: 32703, sum loss: 5057.920898, avg loss: 2.657867, ppl: 14.265827 +epoch: 1, batch: 32704, sum loss: 4476.659180, avg loss: 2.658349, ppl: 14.272702 +epoch: 1, batch: 32705, sum loss: 4633.027344, avg loss: 2.785945, ppl: 16.215141 +epoch: 1, batch: 32706, sum loss: 4827.335938, avg loss: 2.837940, ppl: 17.080544 +epoch: 1, batch: 32707, sum loss: 3611.448486, avg loss: 2.406028, ppl: 11.089828 +epoch: 1, batch: 32708, sum loss: 5254.570801, avg loss: 2.679537, ppl: 14.578336 +epoch: 1, batch: 32709, sum loss: 5159.932129, avg loss: 2.674926, ppl: 14.511273 +epoch: 1, batch: 32710, sum loss: 3205.124512, avg loss: 2.317516, ppl: 10.150428 +epoch: 1, batch: 32711, sum loss: 3553.620605, avg loss: 2.630363, ppl: 13.878807 +epoch: 1, batch: 32712, sum loss: 4206.759766, avg loss: 2.512999, ppl: 12.341886 +epoch: 1, batch: 32713, sum loss: 4303.501953, avg loss: 2.373691, ppl: 10.736953 +epoch: 1, batch: 32714, sum loss: 3770.908203, avg loss: 2.451826, ppl: 11.609525 +epoch: 1, batch: 32715, sum loss: 4352.765625, avg loss: 2.756660, ppl: 15.747155 +epoch: 1, batch: 32716, sum loss: 4042.243408, avg loss: 2.427774, ppl: 11.333625 +epoch: 1, batch: 32717, sum loss: 3952.340576, avg loss: 2.482626, ppl: 11.972663 +epoch: 1, batch: 32718, sum loss: 5162.110840, avg loss: 2.879036, ppl: 17.797104 +epoch: 1, batch: 32719, sum loss: 5250.982422, avg loss: 2.809514, ppl: 16.601852 +epoch: 1, batch: 32720, sum loss: 4179.341797, avg loss: 2.276330, ppl: 9.740866 +epoch: 1, batch: 32721, sum loss: 4424.219238, avg loss: 2.548513, ppl: 12.788079 +epoch: 1, batch: 32722, sum loss: 5001.298340, avg loss: 2.670208, ppl: 14.442966 +epoch: 1, batch: 32723, sum loss: 4175.108398, avg loss: 2.671215, ppl: 14.457519 +epoch: 1, batch: 32724, sum loss: 4608.571289, avg loss: 2.630463, ppl: 13.880197 +epoch: 1, batch: 32725, sum loss: 5213.357910, avg loss: 2.778975, ppl: 16.102516 +epoch: 1, batch: 32726, sum loss: 4674.034180, avg loss: 2.624388, ppl: 13.796122 +epoch: 1, batch: 32727, sum loss: 4847.818359, avg loss: 2.779712, ppl: 16.114382 +epoch: 1, batch: 32728, sum loss: 4100.932129, avg loss: 2.600464, ppl: 13.469992 +epoch: 1, batch: 32729, sum loss: 3983.889893, avg loss: 2.494609, ppl: 12.116990 +epoch: 1, batch: 32730, sum loss: 4793.173828, avg loss: 2.897929, ppl: 18.136541 +epoch: 1, batch: 32731, sum loss: 4004.740723, avg loss: 2.709568, ppl: 15.022778 +epoch: 1, batch: 32732, sum loss: 4322.405762, avg loss: 2.727070, ppl: 15.288025 +epoch: 1, batch: 32733, sum loss: 4107.876953, avg loss: 2.524817, ppl: 12.488606 +epoch: 1, batch: 32734, sum loss: 4268.617188, avg loss: 2.505057, ppl: 12.244258 +epoch: 1, batch: 32735, sum loss: 3690.214844, avg loss: 2.559095, ppl: 12.924114 +epoch: 1, batch: 32736, sum loss: 4648.489258, avg loss: 2.653247, ppl: 14.200076 +epoch: 1, batch: 32737, sum loss: 5228.927734, avg loss: 2.680127, ppl: 14.586947 +epoch: 1, batch: 32738, sum loss: 5031.375488, avg loss: 2.690575, ppl: 14.740150 +epoch: 1, batch: 32739, sum loss: 3483.241455, avg loss: 2.498739, ppl: 12.167136 +epoch: 1, batch: 32740, sum loss: 4313.611816, avg loss: 2.758064, ppl: 15.769280 +epoch: 1, batch: 32741, sum loss: 4929.329102, avg loss: 2.686283, ppl: 14.677022 +epoch: 1, batch: 32742, sum loss: 4267.502930, avg loss: 2.718155, ppl: 15.152335 +epoch: 1, batch: 32743, sum loss: 5345.576172, avg loss: 2.829844, ppl: 16.942825 +epoch: 1, batch: 32744, sum loss: 4160.312012, avg loss: 2.504703, ppl: 12.239926 +epoch: 1, batch: 32745, sum loss: 3842.477295, avg loss: 2.374831, ppl: 10.749201 +epoch: 1, batch: 32746, sum loss: 4748.355957, avg loss: 2.639442, ppl: 14.005386 +epoch: 1, batch: 32747, sum loss: 4659.258301, avg loss: 2.611692, ppl: 13.622079 +epoch: 1, batch: 32748, sum loss: 4305.184082, avg loss: 2.664099, ppl: 14.355009 +epoch: 1, batch: 32749, sum loss: 4121.261230, avg loss: 2.613355, ppl: 13.644757 +epoch: 1, batch: 32750, sum loss: 4303.690430, avg loss: 2.510905, ppl: 12.316066 +epoch: 1, batch: 32751, sum loss: 5046.648438, avg loss: 2.847996, ppl: 17.253168 +epoch: 1, batch: 32752, sum loss: 4766.000488, avg loss: 2.689617, ppl: 14.726030 +epoch: 1, batch: 32753, sum loss: 5658.691895, avg loss: 2.725767, ppl: 15.268115 +epoch: 1, batch: 32754, sum loss: 5768.509766, avg loss: 2.841630, ppl: 17.143694 +epoch: 1, batch: 32755, sum loss: 4958.233398, avg loss: 2.590508, ppl: 13.336551 +epoch: 1, batch: 32756, sum loss: 3819.921143, avg loss: 2.459705, ppl: 11.701355 +epoch: 1, batch: 32757, sum loss: 4889.789551, avg loss: 2.605109, ppl: 13.532700 +epoch: 1, batch: 32758, sum loss: 3444.385254, avg loss: 2.430759, ppl: 11.367506 +epoch: 1, batch: 32759, sum loss: 4202.310059, avg loss: 2.567080, ppl: 13.027731 +epoch: 1, batch: 32760, sum loss: 5320.116211, avg loss: 2.916730, ppl: 18.480764 +epoch: 1, batch: 32761, sum loss: 5539.519531, avg loss: 3.067286, ppl: 21.483528 +epoch: 1, batch: 32762, sum loss: 3970.582764, avg loss: 2.427007, ppl: 11.324933 +epoch: 1, batch: 32763, sum loss: 4257.998047, avg loss: 2.549700, ppl: 12.803256 +epoch: 1, batch: 32764, sum loss: 3693.184082, avg loss: 2.431326, ppl: 11.373953 +epoch: 1, batch: 32765, sum loss: 4173.099121, avg loss: 2.483988, ppl: 11.988976 +epoch: 1, batch: 32766, sum loss: 4309.038086, avg loss: 2.616295, ppl: 13.684928 +epoch: 1, batch: 32767, sum loss: 4225.877930, avg loss: 2.679694, ppl: 14.580637 +epoch: 1, batch: 32768, sum loss: 4222.951172, avg loss: 2.574970, ppl: 13.130926 +epoch: 1, batch: 32769, sum loss: 4623.547852, avg loss: 2.577229, ppl: 13.160613 +epoch: 1, batch: 32770, sum loss: 3980.021240, avg loss: 2.586109, ppl: 13.278002 +epoch: 1, batch: 32771, sum loss: 4203.093750, avg loss: 2.452214, ppl: 11.614026 +epoch: 1, batch: 32772, sum loss: 4190.112793, avg loss: 2.607413, ppl: 13.563916 +epoch: 1, batch: 32773, sum loss: 6577.126953, avg loss: 3.140939, ppl: 23.125578 +epoch: 1, batch: 32774, sum loss: 4664.503906, avg loss: 2.719827, ppl: 15.177699 +epoch: 1, batch: 32775, sum loss: 3960.883545, avg loss: 2.633566, ppl: 13.923334 +epoch: 1, batch: 32776, sum loss: 4163.688477, avg loss: 2.491735, ppl: 12.082217 +epoch: 1, batch: 32777, sum loss: 3952.870117, avg loss: 2.543674, ppl: 12.726348 +epoch: 1, batch: 32778, sum loss: 4860.063965, avg loss: 2.755138, ppl: 15.723217 +epoch: 1, batch: 32779, sum loss: 4420.464844, avg loss: 2.540497, ppl: 12.685975 +epoch: 1, batch: 32780, sum loss: 4053.640869, avg loss: 2.939551, ppl: 18.907352 +epoch: 1, batch: 32781, sum loss: 4830.921875, avg loss: 2.615551, ppl: 13.674746 +epoch: 1, batch: 32782, sum loss: 4728.052734, avg loss: 2.510915, ppl: 12.316192 +epoch: 1, batch: 32783, sum loss: 3852.968506, avg loss: 2.587621, ppl: 13.298103 +epoch: 1, batch: 32784, sum loss: 4209.838867, avg loss: 2.664455, ppl: 14.360120 +epoch: 1, batch: 32785, sum loss: 4930.436035, avg loss: 2.787132, ppl: 16.234388 +epoch: 1, batch: 32786, sum loss: 3968.874023, avg loss: 2.398111, ppl: 11.002378 +epoch: 1, batch: 32787, sum loss: 3767.966064, avg loss: 2.321606, ppl: 10.192026 +epoch: 1, batch: 32788, sum loss: 5036.857910, avg loss: 2.730004, ppl: 15.332953 +epoch: 1, batch: 32789, sum loss: 5512.121094, avg loss: 2.941367, ppl: 18.941715 +epoch: 1, batch: 32790, sum loss: 4169.849121, avg loss: 2.644166, ppl: 14.071701 +epoch: 1, batch: 32791, sum loss: 4401.546875, avg loss: 2.426432, ppl: 11.318422 +epoch: 1, batch: 32792, sum loss: 4678.269531, avg loss: 2.688661, ppl: 14.711958 +epoch: 1, batch: 32793, sum loss: 5043.793945, avg loss: 2.802108, ppl: 16.479345 +epoch: 1, batch: 32794, sum loss: 4184.130859, avg loss: 2.522080, ppl: 12.454477 +epoch: 1, batch: 32795, sum loss: 4143.044434, avg loss: 2.700811, ppl: 14.891807 +epoch: 1, batch: 32796, sum loss: 5061.136230, avg loss: 2.524257, ppl: 12.481624 +epoch: 1, batch: 32797, sum loss: 4634.788086, avg loss: 2.535442, ppl: 12.622010 +epoch: 1, batch: 32798, sum loss: 4887.544434, avg loss: 2.569687, ppl: 13.061737 +epoch: 1, batch: 32799, sum loss: 4756.666016, avg loss: 2.793110, ppl: 16.331732 +epoch: 1, batch: 32800, sum loss: 4929.052246, avg loss: 3.012868, ppl: 20.345671 +epoch: 1, batch: 32801, sum loss: 4684.055664, avg loss: 2.900344, ppl: 18.180401 +epoch: 1, batch: 32802, sum loss: 5151.976562, avg loss: 2.843254, ppl: 17.171553 +epoch: 1, batch: 32803, sum loss: 4469.050293, avg loss: 2.655407, ppl: 14.230780 +epoch: 1, batch: 32804, sum loss: 4991.145508, avg loss: 2.564823, ppl: 12.998360 +epoch: 1, batch: 32805, sum loss: 3760.451416, avg loss: 2.462640, ppl: 11.735754 +epoch: 1, batch: 32806, sum loss: 4364.312500, avg loss: 2.461541, ppl: 11.722864 +epoch: 1, batch: 32807, sum loss: 3700.919922, avg loss: 2.462355, ppl: 11.732410 +epoch: 1, batch: 32808, sum loss: 4285.779785, avg loss: 2.525504, ppl: 12.497190 +epoch: 1, batch: 32809, sum loss: 4593.335449, avg loss: 2.590714, ppl: 13.339289 +epoch: 1, batch: 32810, sum loss: 5001.398926, avg loss: 2.728532, ppl: 15.310392 +epoch: 1, batch: 32811, sum loss: 5491.691406, avg loss: 2.832229, ppl: 16.983269 +epoch: 1, batch: 32812, sum loss: 4296.423828, avg loss: 2.866194, ppl: 17.570011 +epoch: 1, batch: 32813, sum loss: 4571.229492, avg loss: 2.802716, ppl: 16.489368 +epoch: 1, batch: 32814, sum loss: 4248.979004, avg loss: 2.405990, ppl: 11.089407 +epoch: 1, batch: 32815, sum loss: 4304.310547, avg loss: 2.798641, ppl: 16.422321 +epoch: 1, batch: 32816, sum loss: 4855.331543, avg loss: 2.446011, ppl: 11.542211 +epoch: 1, batch: 32817, sum loss: 4270.973633, avg loss: 2.737804, ppl: 15.453009 +epoch: 1, batch: 32818, sum loss: 3749.779053, avg loss: 2.584272, ppl: 13.253639 +epoch: 1, batch: 32819, sum loss: 3577.117676, avg loss: 2.440053, ppl: 11.473649 +epoch: 1, batch: 32820, sum loss: 3941.153809, avg loss: 2.528001, ppl: 12.528440 +epoch: 1, batch: 32821, sum loss: 4228.852051, avg loss: 2.605577, ppl: 13.539039 +epoch: 1, batch: 32822, sum loss: 4169.929199, avg loss: 2.598087, ppl: 13.438001 +epoch: 1, batch: 32823, sum loss: 4227.919922, avg loss: 2.509151, ppl: 12.294490 +epoch: 1, batch: 32824, sum loss: 4412.839355, avg loss: 2.558168, ppl: 12.912140 +epoch: 1, batch: 32825, sum loss: 4467.317383, avg loss: 2.696027, ppl: 14.820736 +epoch: 1, batch: 32826, sum loss: 3992.175537, avg loss: 2.577260, ppl: 13.161025 +epoch: 1, batch: 32827, sum loss: 4390.913574, avg loss: 2.720516, ppl: 15.188164 +epoch: 1, batch: 32828, sum loss: 4562.299805, avg loss: 2.693211, ppl: 14.779057 +epoch: 1, batch: 32829, sum loss: 4400.717773, avg loss: 2.540830, ppl: 12.690201 +epoch: 1, batch: 32830, sum loss: 4434.734863, avg loss: 2.668312, ppl: 14.415620 +epoch: 1, batch: 32831, sum loss: 3989.881836, avg loss: 2.403543, ppl: 11.062304 +epoch: 1, batch: 32832, sum loss: 5291.998047, avg loss: 2.524808, ppl: 12.488500 +epoch: 1, batch: 32833, sum loss: 4338.722168, avg loss: 2.727041, ppl: 15.287584 +epoch: 1, batch: 32834, sum loss: 3633.495361, avg loss: 2.535587, ppl: 12.623834 +epoch: 1, batch: 32835, sum loss: 4096.607422, avg loss: 2.432665, ppl: 11.389193 +epoch: 1, batch: 32836, sum loss: 5173.613281, avg loss: 2.733023, ppl: 15.379311 +epoch: 1, batch: 32837, sum loss: 3416.985107, avg loss: 2.207355, ppl: 9.091635 +epoch: 1, batch: 32838, sum loss: 4033.657227, avg loss: 2.641557, ppl: 14.035035 +epoch: 1, batch: 32839, sum loss: 4806.001953, avg loss: 2.560470, ppl: 12.941896 +epoch: 1, batch: 32840, sum loss: 3971.017090, avg loss: 2.229656, ppl: 9.296667 +epoch: 1, batch: 32841, sum loss: 4274.736328, avg loss: 2.640356, ppl: 14.018194 +epoch: 1, batch: 32842, sum loss: 4013.319824, avg loss: 2.439708, ppl: 11.469691 +epoch: 1, batch: 32843, sum loss: 4084.145020, avg loss: 2.418085, ppl: 11.224339 +epoch: 1, batch: 32844, sum loss: 3957.615723, avg loss: 2.417603, ppl: 11.218933 +epoch: 1, batch: 32845, sum loss: 4866.076172, avg loss: 2.414926, ppl: 11.188946 +epoch: 1, batch: 32846, sum loss: 3576.009033, avg loss: 2.103535, ppl: 8.195087 +epoch: 1, batch: 32847, sum loss: 4223.751465, avg loss: 2.459960, ppl: 11.704346 +epoch: 1, batch: 32848, sum loss: 4817.180176, avg loss: 2.707802, ppl: 14.996282 +epoch: 1, batch: 32849, sum loss: 4652.274902, avg loss: 2.596136, ppl: 13.411809 +epoch: 1, batch: 32850, sum loss: 4256.946777, avg loss: 2.586237, ppl: 13.279712 +epoch: 1, batch: 32851, sum loss: 4399.938477, avg loss: 2.568557, ppl: 13.046988 +epoch: 1, batch: 32852, sum loss: 4453.055664, avg loss: 2.521549, ppl: 12.447864 +epoch: 1, batch: 32853, sum loss: 4248.066406, avg loss: 2.599796, ppl: 13.460989 +epoch: 1, batch: 32854, sum loss: 5263.809570, avg loss: 2.828485, ppl: 16.919800 +epoch: 1, batch: 32855, sum loss: 4664.380859, avg loss: 2.478417, ppl: 11.922376 +epoch: 1, batch: 32856, sum loss: 4910.176758, avg loss: 2.912323, ppl: 18.399496 +epoch: 1, batch: 32857, sum loss: 4977.746582, avg loss: 2.760813, ppl: 15.812701 +epoch: 1, batch: 32858, sum loss: 4238.687988, avg loss: 2.618090, ppl: 13.709516 +epoch: 1, batch: 32859, sum loss: 5224.256836, avg loss: 2.732352, ppl: 15.368989 +epoch: 1, batch: 32860, sum loss: 4177.457520, avg loss: 2.559717, ppl: 12.932153 +epoch: 1, batch: 32861, sum loss: 5374.613281, avg loss: 2.895805, ppl: 18.098059 +epoch: 1, batch: 32862, sum loss: 4116.336914, avg loss: 2.631929, ppl: 13.900558 +epoch: 1, batch: 32863, sum loss: 4853.854004, avg loss: 2.587342, ppl: 13.294392 +epoch: 1, batch: 32864, sum loss: 4181.109863, avg loss: 2.368901, ppl: 10.685640 +epoch: 1, batch: 32865, sum loss: 5111.770020, avg loss: 2.707505, ppl: 14.991828 +epoch: 1, batch: 32866, sum loss: 4012.071045, avg loss: 2.405318, ppl: 11.081960 +epoch: 1, batch: 32867, sum loss: 4989.515625, avg loss: 2.488536, ppl: 12.043635 +epoch: 1, batch: 32868, sum loss: 4483.030273, avg loss: 2.637077, ppl: 13.972301 +epoch: 1, batch: 32869, sum loss: 4636.960449, avg loss: 2.652724, ppl: 14.192640 +epoch: 1, batch: 32870, sum loss: 4331.672363, avg loss: 2.560090, ppl: 12.936982 +epoch: 1, batch: 32871, sum loss: 4636.743652, avg loss: 2.558909, ppl: 12.921718 +epoch: 1, batch: 32872, sum loss: 4047.951904, avg loss: 2.451818, ppl: 11.609439 +epoch: 1, batch: 32873, sum loss: 4124.019531, avg loss: 2.466519, ppl: 11.781365 +epoch: 1, batch: 32874, sum loss: 3388.077393, avg loss: 2.543602, ppl: 12.725422 +epoch: 1, batch: 32875, sum loss: 4179.020996, avg loss: 2.573289, ppl: 13.108865 +epoch: 1, batch: 32876, sum loss: 3928.243164, avg loss: 2.562455, ppl: 12.967613 +epoch: 1, batch: 32877, sum loss: 4214.124023, avg loss: 2.675634, ppl: 14.521556 +epoch: 1, batch: 32878, sum loss: 4915.758301, avg loss: 2.834924, ppl: 17.029106 +epoch: 1, batch: 32879, sum loss: 5341.255859, avg loss: 2.910766, ppl: 18.370872 +epoch: 1, batch: 32880, sum loss: 5135.653809, avg loss: 2.842088, ppl: 17.151545 +epoch: 1, batch: 32881, sum loss: 4207.989258, avg loss: 2.528840, ppl: 12.538950 +epoch: 1, batch: 32882, sum loss: 4325.603027, avg loss: 2.438333, ppl: 11.453932 +epoch: 1, batch: 32883, sum loss: 3988.450439, avg loss: 2.527535, ppl: 12.522602 +epoch: 1, batch: 32884, sum loss: 4397.943359, avg loss: 2.548055, ppl: 12.782221 +epoch: 1, batch: 32885, sum loss: 5023.513184, avg loss: 2.806432, ppl: 16.550756 +epoch: 1, batch: 32886, sum loss: 4717.202148, avg loss: 2.745752, ppl: 15.576324 +epoch: 1, batch: 32887, sum loss: 4042.565918, avg loss: 2.529766, ppl: 12.550567 +epoch: 1, batch: 32888, sum loss: 4604.258789, avg loss: 2.599807, ppl: 13.461143 +epoch: 1, batch: 32889, sum loss: 4978.956055, avg loss: 2.526107, ppl: 12.504725 +epoch: 1, batch: 32890, sum loss: 4497.017090, avg loss: 2.672024, ppl: 14.469229 +epoch: 1, batch: 32891, sum loss: 3776.256592, avg loss: 2.584707, ppl: 13.259400 +epoch: 1, batch: 32892, sum loss: 3566.023926, avg loss: 2.344526, ppl: 10.428329 +epoch: 1, batch: 32893, sum loss: 4498.714355, avg loss: 2.538778, ppl: 12.664184 +epoch: 1, batch: 32894, sum loss: 4062.753418, avg loss: 2.250833, ppl: 9.495641 +epoch: 1, batch: 32895, sum loss: 4387.428711, avg loss: 2.725111, ppl: 15.258107 +epoch: 1, batch: 32896, sum loss: 4777.950684, avg loss: 2.586871, ppl: 13.288130 +epoch: 1, batch: 32897, sum loss: 4590.891602, avg loss: 2.942879, ppl: 18.970392 +epoch: 1, batch: 32898, sum loss: 5621.729004, avg loss: 2.982350, ppl: 19.734131 +epoch: 1, batch: 32899, sum loss: 5024.048340, avg loss: 2.822499, ppl: 16.818829 +epoch: 1, batch: 32900, sum loss: 5179.368652, avg loss: 3.011261, ppl: 20.312994 +epoch: 1, batch: 32901, sum loss: 4117.460938, avg loss: 2.619250, ppl: 13.725427 +epoch: 1, batch: 32902, sum loss: 4424.152344, avg loss: 2.696010, ppl: 14.820478 +epoch: 1, batch: 32903, sum loss: 5041.175293, avg loss: 2.687194, ppl: 14.690392 +epoch: 1, batch: 32904, sum loss: 3799.498291, avg loss: 2.575931, ppl: 13.143549 +epoch: 1, batch: 32905, sum loss: 4312.114746, avg loss: 2.583652, ppl: 13.245420 +epoch: 1, batch: 32906, sum loss: 4904.116211, avg loss: 2.663833, ppl: 14.351191 +epoch: 1, batch: 32907, sum loss: 4954.427734, avg loss: 2.598022, ppl: 13.437133 +epoch: 1, batch: 32908, sum loss: 3988.766113, avg loss: 2.711602, ppl: 15.053368 +epoch: 1, batch: 32909, sum loss: 5596.962402, avg loss: 3.095665, ppl: 22.101931 +epoch: 1, batch: 32910, sum loss: 3828.487549, avg loss: 2.611520, ppl: 13.619731 +epoch: 1, batch: 32911, sum loss: 4053.520996, avg loss: 2.500630, ppl: 12.190167 +epoch: 1, batch: 32912, sum loss: 4008.286865, avg loss: 2.527293, ppl: 12.519572 +epoch: 1, batch: 32913, sum loss: 4658.273926, avg loss: 2.433790, ppl: 11.402014 +epoch: 1, batch: 32914, sum loss: 5699.677246, avg loss: 2.720610, ppl: 15.189580 +epoch: 1, batch: 32915, sum loss: 4403.874512, avg loss: 2.665784, ppl: 14.379213 +epoch: 1, batch: 32916, sum loss: 5023.712402, avg loss: 2.699469, ppl: 14.871838 +epoch: 1, batch: 32917, sum loss: 4055.473633, avg loss: 2.619815, ppl: 13.733184 +epoch: 1, batch: 32918, sum loss: 4614.489746, avg loss: 2.520202, ppl: 12.431109 +epoch: 1, batch: 32919, sum loss: 5943.894531, avg loss: 2.907972, ppl: 18.319607 +epoch: 1, batch: 32920, sum loss: 4965.131836, avg loss: 2.728095, ppl: 15.303699 +epoch: 1, batch: 32921, sum loss: 4220.380859, avg loss: 2.551621, ppl: 12.827881 +epoch: 1, batch: 32922, sum loss: 4252.354980, avg loss: 2.445287, ppl: 11.533865 +epoch: 1, batch: 32923, sum loss: 3505.180664, avg loss: 2.412375, ppl: 11.160436 +epoch: 1, batch: 32924, sum loss: 4808.572266, avg loss: 2.722861, ppl: 15.223812 +epoch: 1, batch: 32925, sum loss: 4017.657715, avg loss: 2.397171, ppl: 10.992031 +epoch: 1, batch: 32926, sum loss: 3981.514160, avg loss: 2.628062, ppl: 13.846912 +epoch: 1, batch: 32927, sum loss: 4350.910156, avg loss: 2.610024, ppl: 13.599377 +epoch: 1, batch: 32928, sum loss: 4231.092285, avg loss: 2.631276, ppl: 13.891486 +epoch: 1, batch: 32929, sum loss: 4352.888184, avg loss: 2.804696, ppl: 16.522055 +epoch: 1, batch: 32930, sum loss: 4613.378418, avg loss: 2.562988, ppl: 12.974528 +epoch: 1, batch: 32931, sum loss: 4845.601562, avg loss: 2.694995, ppl: 14.805448 +epoch: 1, batch: 32932, sum loss: 4702.656250, avg loss: 2.867473, ppl: 17.592512 +epoch: 1, batch: 32933, sum loss: 3656.143555, avg loss: 2.389636, ppl: 10.909525 +epoch: 1, batch: 32934, sum loss: 4338.430664, avg loss: 2.728573, ppl: 15.311017 +epoch: 1, batch: 32935, sum loss: 4430.673340, avg loss: 2.510297, ppl: 12.308580 +epoch: 1, batch: 32936, sum loss: 3772.991699, avg loss: 2.295007, ppl: 9.924505 +epoch: 1, batch: 32937, sum loss: 4462.971680, avg loss: 2.594751, ppl: 13.393250 +epoch: 1, batch: 32938, sum loss: 4732.766113, avg loss: 2.561021, ppl: 12.949026 +epoch: 1, batch: 32939, sum loss: 5423.338867, avg loss: 2.775506, ppl: 16.046745 +epoch: 1, batch: 32940, sum loss: 4517.985840, avg loss: 2.632859, ppl: 13.913491 +epoch: 1, batch: 32941, sum loss: 4410.258301, avg loss: 2.601922, ppl: 13.489644 +epoch: 1, batch: 32942, sum loss: 4785.572266, avg loss: 2.737742, ppl: 15.452050 +epoch: 1, batch: 32943, sum loss: 5173.724609, avg loss: 2.768178, ppl: 15.929580 +epoch: 1, batch: 32944, sum loss: 4061.789795, avg loss: 2.748166, ppl: 15.613975 +epoch: 1, batch: 32945, sum loss: 5013.034180, avg loss: 2.906107, ppl: 18.285473 +epoch: 1, batch: 32946, sum loss: 3731.905273, avg loss: 2.424890, ppl: 11.300984 +epoch: 1, batch: 32947, sum loss: 4669.907715, avg loss: 2.668519, ppl: 14.418596 +epoch: 1, batch: 32948, sum loss: 4662.992188, avg loss: 2.866006, ppl: 17.566719 +epoch: 1, batch: 32949, sum loss: 5276.746582, avg loss: 2.717171, ppl: 15.137444 +epoch: 1, batch: 32950, sum loss: 4801.080566, avg loss: 2.479897, ppl: 11.940032 +epoch: 1, batch: 32951, sum loss: 4726.048828, avg loss: 2.747703, ppl: 15.606739 +epoch: 1, batch: 32952, sum loss: 4314.518066, avg loss: 2.788958, ppl: 16.264065 +epoch: 1, batch: 32953, sum loss: 5046.854004, avg loss: 2.639568, ppl: 14.007149 +epoch: 1, batch: 32954, sum loss: 4802.696777, avg loss: 3.086566, ppl: 21.901737 +epoch: 1, batch: 32955, sum loss: 4915.651855, avg loss: 2.680290, ppl: 14.589323 +epoch: 1, batch: 32956, sum loss: 3992.619385, avg loss: 2.362497, ppl: 10.617426 +epoch: 1, batch: 32957, sum loss: 5560.624023, avg loss: 2.957779, ppl: 19.255152 +epoch: 1, batch: 32958, sum loss: 5003.926758, avg loss: 2.693179, ppl: 14.778584 +epoch: 1, batch: 32959, sum loss: 4598.229492, avg loss: 2.708027, ppl: 14.999650 +epoch: 1, batch: 32960, sum loss: 4433.800781, avg loss: 2.880962, ppl: 17.831421 +epoch: 1, batch: 32961, sum loss: 4339.928711, avg loss: 2.622313, ppl: 13.767538 +epoch: 1, batch: 32962, sum loss: 4125.344238, avg loss: 2.489646, ppl: 12.057013 +epoch: 1, batch: 32963, sum loss: 4554.798828, avg loss: 2.551708, ppl: 12.829000 +epoch: 1, batch: 32964, sum loss: 4334.305176, avg loss: 2.593839, ppl: 13.381045 +epoch: 1, batch: 32965, sum loss: 3378.907471, avg loss: 2.275358, ppl: 9.731406 +epoch: 1, batch: 32966, sum loss: 4995.441895, avg loss: 2.722312, ppl: 15.215456 +epoch: 1, batch: 32967, sum loss: 4596.899414, avg loss: 2.713636, ppl: 15.084021 +epoch: 1, batch: 32968, sum loss: 4506.593262, avg loss: 2.943562, ppl: 18.983345 +epoch: 1, batch: 32969, sum loss: 4343.542480, avg loss: 2.465121, ppl: 11.764903 +epoch: 1, batch: 32970, sum loss: 5847.481445, avg loss: 3.071156, ppl: 21.566826 +epoch: 1, batch: 32971, sum loss: 5044.519043, avg loss: 2.936274, ppl: 18.845503 +epoch: 1, batch: 32972, sum loss: 5772.283203, avg loss: 3.075271, ppl: 21.655748 +epoch: 1, batch: 32973, sum loss: 4720.988281, avg loss: 2.714772, ppl: 15.101167 +epoch: 1, batch: 32974, sum loss: 5065.486816, avg loss: 2.613770, ppl: 13.650423 +epoch: 1, batch: 32975, sum loss: 5185.544434, avg loss: 2.759736, ppl: 15.795677 +epoch: 1, batch: 32976, sum loss: 4973.371582, avg loss: 2.812993, ppl: 16.659706 +epoch: 1, batch: 32977, sum loss: 5387.245117, avg loss: 2.963281, ppl: 19.361395 +epoch: 1, batch: 32978, sum loss: 3987.434326, avg loss: 2.422500, ppl: 11.274005 +epoch: 1, batch: 32979, sum loss: 5010.080078, avg loss: 2.820991, ppl: 16.793486 +epoch: 1, batch: 32980, sum loss: 4702.271484, avg loss: 2.563943, ppl: 12.986923 +epoch: 1, batch: 32981, sum loss: 4515.994629, avg loss: 2.676938, ppl: 14.540506 +epoch: 1, batch: 32982, sum loss: 5020.369141, avg loss: 2.650670, ppl: 14.163529 +epoch: 1, batch: 32983, sum loss: 5253.177734, avg loss: 2.864328, ppl: 17.537262 +epoch: 1, batch: 32984, sum loss: 3981.537109, avg loss: 2.527960, ppl: 12.527924 +epoch: 1, batch: 32985, sum loss: 4483.440430, avg loss: 2.621895, ppl: 13.761775 +epoch: 1, batch: 32986, sum loss: 5289.753418, avg loss: 2.884271, ppl: 17.890524 +epoch: 1, batch: 32987, sum loss: 3830.877197, avg loss: 2.397295, ppl: 10.993399 +epoch: 1, batch: 32988, sum loss: 5940.497070, avg loss: 2.878148, ppl: 17.781309 +epoch: 1, batch: 32989, sum loss: 4403.077637, avg loss: 2.588523, ppl: 13.310097 +epoch: 1, batch: 32990, sum loss: 3838.825684, avg loss: 2.640183, ppl: 14.015764 +epoch: 1, batch: 32991, sum loss: 4963.605957, avg loss: 2.696147, ppl: 14.822506 +epoch: 1, batch: 32992, sum loss: 3691.101074, avg loss: 2.680538, ppl: 14.592945 +epoch: 1, batch: 32993, sum loss: 4099.854980, avg loss: 2.506024, ppl: 12.256102 +epoch: 1, batch: 32994, sum loss: 4792.180664, avg loss: 2.615819, ppl: 13.678417 +epoch: 1, batch: 32995, sum loss: 3998.524658, avg loss: 2.548454, ppl: 12.787323 +epoch: 1, batch: 32996, sum loss: 4286.349121, avg loss: 2.425778, ppl: 11.311023 +epoch: 1, batch: 32997, sum loss: 4876.835449, avg loss: 2.692897, ppl: 14.774409 +epoch: 1, batch: 32998, sum loss: 4816.452637, avg loss: 3.073678, ppl: 21.621275 +epoch: 1, batch: 32999, sum loss: 3975.875732, avg loss: 2.527575, ppl: 12.523101 +epoch: 1, batch: 33000, sum loss: 4776.390137, avg loss: 2.597276, ppl: 13.427113 +epoch: 1, batch: 33001, sum loss: 4390.577637, avg loss: 2.664186, ppl: 14.356252 +epoch: 1, batch: 33002, sum loss: 3798.764648, avg loss: 2.750735, ppl: 15.654130 +epoch: 1, batch: 33003, sum loss: 5346.326172, avg loss: 2.900882, ppl: 18.190186 +epoch: 1, batch: 33004, sum loss: 5361.924316, avg loss: 2.918849, ppl: 18.519949 +epoch: 1, batch: 33005, sum loss: 4367.456055, avg loss: 2.674498, ppl: 14.505071 +epoch: 1, batch: 33006, sum loss: 4461.121582, avg loss: 2.587658, ppl: 13.298586 +epoch: 1, batch: 33007, sum loss: 5478.168457, avg loss: 2.985378, ppl: 19.793983 +epoch: 1, batch: 33008, sum loss: 4257.110352, avg loss: 2.766154, ppl: 15.897368 +epoch: 1, batch: 33009, sum loss: 3817.000000, avg loss: 2.467356, ppl: 11.791232 +epoch: 1, batch: 33010, sum loss: 5322.843262, avg loss: 2.763678, ppl: 15.858059 +epoch: 1, batch: 33011, sum loss: 5146.400391, avg loss: 2.950917, ppl: 19.123491 +epoch: 1, batch: 33012, sum loss: 4316.255371, avg loss: 2.510911, ppl: 12.316142 +epoch: 1, batch: 33013, sum loss: 3197.222656, avg loss: 2.407547, ppl: 11.106686 +epoch: 1, batch: 33014, sum loss: 4702.764648, avg loss: 2.784348, ppl: 16.189266 +epoch: 1, batch: 33015, sum loss: 4729.329102, avg loss: 2.671937, ppl: 14.467974 +epoch: 1, batch: 33016, sum loss: 4502.345703, avg loss: 2.667267, ppl: 14.400553 +epoch: 1, batch: 33017, sum loss: 5357.078125, avg loss: 2.765657, ppl: 15.889479 +epoch: 1, batch: 33018, sum loss: 4828.479004, avg loss: 2.740340, ppl: 15.492251 +epoch: 1, batch: 33019, sum loss: 4984.034180, avg loss: 2.717576, ppl: 15.143566 +epoch: 1, batch: 33020, sum loss: 4442.787109, avg loss: 2.611868, ppl: 13.624473 +epoch: 1, batch: 33021, sum loss: 4953.236328, avg loss: 2.830421, ppl: 16.952591 +epoch: 1, batch: 33022, sum loss: 4228.028320, avg loss: 2.613120, ppl: 13.641547 +epoch: 1, batch: 33023, sum loss: 4652.935547, avg loss: 2.769605, ppl: 15.952327 +epoch: 1, batch: 33024, sum loss: 4722.680176, avg loss: 2.501420, ppl: 12.199800 +epoch: 1, batch: 33025, sum loss: 4489.681641, avg loss: 2.716081, ppl: 15.120942 +epoch: 1, batch: 33026, sum loss: 4327.321777, avg loss: 2.565099, ppl: 13.001945 +epoch: 1, batch: 33027, sum loss: 5113.857422, avg loss: 2.913879, ppl: 18.428137 +epoch: 1, batch: 33028, sum loss: 4926.744629, avg loss: 2.650212, ppl: 14.157043 +epoch: 1, batch: 33029, sum loss: 4096.318359, avg loss: 2.573064, ppl: 13.105924 +epoch: 1, batch: 33030, sum loss: 4893.558105, avg loss: 2.650898, ppl: 14.166758 +epoch: 1, batch: 33031, sum loss: 4268.088379, avg loss: 2.596161, ppl: 13.412148 +epoch: 1, batch: 33032, sum loss: 3143.340332, avg loss: 2.277783, ppl: 9.755029 +epoch: 1, batch: 33033, sum loss: 4510.290039, avg loss: 2.413210, ppl: 11.169761 +epoch: 1, batch: 33034, sum loss: 4310.048828, avg loss: 2.559412, ppl: 12.928207 +epoch: 1, batch: 33035, sum loss: 4911.840820, avg loss: 2.771919, ppl: 15.989292 +epoch: 1, batch: 33036, sum loss: 4778.938965, avg loss: 2.593022, ppl: 13.370111 +epoch: 1, batch: 33037, sum loss: 4795.891602, avg loss: 2.798070, ppl: 16.412935 +epoch: 1, batch: 33038, sum loss: 3721.000977, avg loss: 2.341725, ppl: 10.399158 +epoch: 1, batch: 33039, sum loss: 4334.028809, avg loss: 2.584394, ppl: 13.255254 +epoch: 1, batch: 33040, sum loss: 5071.016113, avg loss: 2.639780, ppl: 14.010115 +epoch: 1, batch: 33041, sum loss: 4443.199219, avg loss: 2.892708, ppl: 18.042099 +epoch: 1, batch: 33042, sum loss: 4349.322266, avg loss: 2.694747, ppl: 14.801780 +epoch: 1, batch: 33043, sum loss: 4424.338379, avg loss: 2.460700, ppl: 11.713005 +epoch: 1, batch: 33044, sum loss: 4571.789062, avg loss: 2.692455, ppl: 14.767891 +epoch: 1, batch: 33045, sum loss: 4613.167969, avg loss: 2.648202, ppl: 14.128612 +epoch: 1, batch: 33046, sum loss: 4059.874268, avg loss: 2.448658, ppl: 11.572800 +epoch: 1, batch: 33047, sum loss: 4930.419922, avg loss: 2.823838, ppl: 16.841356 +epoch: 1, batch: 33048, sum loss: 4801.916016, avg loss: 2.712947, ppl: 15.073630 +epoch: 1, batch: 33049, sum loss: 4350.749023, avg loss: 2.774712, ppl: 16.034014 +epoch: 1, batch: 33050, sum loss: 4684.724609, avg loss: 2.649731, ppl: 14.150234 +epoch: 1, batch: 33051, sum loss: 3540.538086, avg loss: 2.666068, ppl: 14.383301 +epoch: 1, batch: 33052, sum loss: 3847.891357, avg loss: 2.360670, ppl: 10.598045 +epoch: 1, batch: 33053, sum loss: 4782.251953, avg loss: 2.555987, ppl: 12.884014 +epoch: 1, batch: 33054, sum loss: 3412.302246, avg loss: 2.333996, ppl: 10.319095 +epoch: 1, batch: 33055, sum loss: 4247.669434, avg loss: 2.339025, ppl: 10.371120 +epoch: 1, batch: 33056, sum loss: 3429.857910, avg loss: 2.446404, ppl: 11.546747 +epoch: 1, batch: 33057, sum loss: 3894.365723, avg loss: 2.443140, ppl: 11.509126 +epoch: 1, batch: 33058, sum loss: 4743.763184, avg loss: 2.787170, ppl: 16.235008 +epoch: 1, batch: 33059, sum loss: 3836.156250, avg loss: 2.528778, ppl: 12.538173 +epoch: 1, batch: 33060, sum loss: 4613.116699, avg loss: 2.633058, ppl: 13.916255 +epoch: 1, batch: 33061, sum loss: 4192.558105, avg loss: 2.689261, ppl: 14.720797 +epoch: 1, batch: 33062, sum loss: 5098.789062, avg loss: 2.851672, ppl: 17.316706 +epoch: 1, batch: 33063, sum loss: 3830.073486, avg loss: 2.511523, ppl: 12.323690 +epoch: 1, batch: 33064, sum loss: 4437.118164, avg loss: 2.634868, ppl: 13.941478 +epoch: 1, batch: 33065, sum loss: 4862.393066, avg loss: 2.649806, ppl: 14.151286 +epoch: 1, batch: 33066, sum loss: 4987.007812, avg loss: 2.839982, ppl: 17.115454 +epoch: 1, batch: 33067, sum loss: 5083.413574, avg loss: 2.660080, ppl: 14.297439 +epoch: 1, batch: 33068, sum loss: 5717.393555, avg loss: 2.819228, ppl: 16.763899 +epoch: 1, batch: 33069, sum loss: 4269.510254, avg loss: 2.645298, ppl: 14.087636 +epoch: 1, batch: 33070, sum loss: 4625.343262, avg loss: 2.730427, ppl: 15.339433 +epoch: 1, batch: 33071, sum loss: 4562.525391, avg loss: 2.785424, ppl: 16.206696 +epoch: 1, batch: 33072, sum loss: 3392.989258, avg loss: 2.197532, ppl: 9.002767 +epoch: 1, batch: 33073, sum loss: 4654.107422, avg loss: 2.865830, ppl: 17.563620 +epoch: 1, batch: 33074, sum loss: 4901.697266, avg loss: 2.706625, ppl: 14.978634 +epoch: 1, batch: 33075, sum loss: 4984.089844, avg loss: 2.705803, ppl: 14.966335 +epoch: 1, batch: 33076, sum loss: 4129.935059, avg loss: 2.390009, ppl: 10.913589 +epoch: 1, batch: 33077, sum loss: 5499.583008, avg loss: 2.905221, ppl: 18.269276 +epoch: 1, batch: 33078, sum loss: 3287.472900, avg loss: 2.318387, ppl: 10.159277 +epoch: 1, batch: 33079, sum loss: 4226.309570, avg loss: 2.674880, ppl: 14.510602 +epoch: 1, batch: 33080, sum loss: 4823.075195, avg loss: 2.583329, ppl: 13.241144 +epoch: 1, batch: 33081, sum loss: 3398.825439, avg loss: 2.388493, ppl: 10.897058 +epoch: 1, batch: 33082, sum loss: 4361.759766, avg loss: 2.830474, ppl: 16.953489 +epoch: 1, batch: 33083, sum loss: 4888.869141, avg loss: 2.683243, ppl: 14.632474 +epoch: 1, batch: 33084, sum loss: 4558.957031, avg loss: 2.669178, ppl: 14.428111 +epoch: 1, batch: 33085, sum loss: 3937.141602, avg loss: 2.425842, ppl: 11.311750 +epoch: 1, batch: 33086, sum loss: 3952.834229, avg loss: 2.434011, ppl: 11.404536 +epoch: 1, batch: 33087, sum loss: 4749.922852, avg loss: 2.887491, ppl: 17.948221 +epoch: 1, batch: 33088, sum loss: 5162.704102, avg loss: 2.636723, ppl: 13.967361 +epoch: 1, batch: 33089, sum loss: 4269.646484, avg loss: 2.542970, ppl: 12.717385 +epoch: 1, batch: 33090, sum loss: 4921.030273, avg loss: 2.654277, ppl: 14.214709 +epoch: 1, batch: 33091, sum loss: 4385.498535, avg loss: 2.616646, ppl: 13.689732 +epoch: 1, batch: 33092, sum loss: 5133.292480, avg loss: 2.806612, ppl: 16.553736 +epoch: 1, batch: 33093, sum loss: 4183.395996, avg loss: 2.490117, ppl: 12.062685 +epoch: 1, batch: 33094, sum loss: 4859.363770, avg loss: 2.597202, ppl: 13.426113 +epoch: 1, batch: 33095, sum loss: 4203.148438, avg loss: 2.559774, ppl: 12.932893 +epoch: 1, batch: 33096, sum loss: 4425.864258, avg loss: 2.573177, ppl: 13.107399 +epoch: 1, batch: 33097, sum loss: 5202.221191, avg loss: 2.808975, ppl: 16.592897 +epoch: 1, batch: 33098, sum loss: 3728.598145, avg loss: 2.410212, ppl: 11.136325 +epoch: 1, batch: 33099, sum loss: 4762.913574, avg loss: 2.707740, ppl: 14.995341 +epoch: 1, batch: 33100, sum loss: 4282.764648, avg loss: 2.583091, ppl: 13.237990 +epoch: 1, batch: 33101, sum loss: 5466.582031, avg loss: 2.943770, ppl: 18.987301 +epoch: 1, batch: 33102, sum loss: 4558.902344, avg loss: 2.656703, ppl: 14.249231 +epoch: 1, batch: 33103, sum loss: 4301.200684, avg loss: 2.686571, ppl: 14.681252 +epoch: 1, batch: 33104, sum loss: 4780.420410, avg loss: 2.798841, ppl: 16.425594 +epoch: 1, batch: 33105, sum loss: 3369.451904, avg loss: 2.306264, ppl: 10.036859 +epoch: 1, batch: 33106, sum loss: 5249.194824, avg loss: 2.655131, ppl: 14.226854 +epoch: 1, batch: 33107, sum loss: 4656.186523, avg loss: 2.671363, ppl: 14.459669 +epoch: 1, batch: 33108, sum loss: 3593.917236, avg loss: 2.226715, ppl: 9.269362 +epoch: 1, batch: 33109, sum loss: 4369.951172, avg loss: 2.569048, ppl: 13.053394 +epoch: 1, batch: 33110, sum loss: 4171.273438, avg loss: 2.590853, ppl: 13.341146 +epoch: 1, batch: 33111, sum loss: 4757.465820, avg loss: 2.767578, ppl: 15.920023 +epoch: 1, batch: 33112, sum loss: 6311.797852, avg loss: 2.997055, ppl: 20.026468 +epoch: 1, batch: 33113, sum loss: 4654.414062, avg loss: 2.578623, ppl: 13.178976 +epoch: 1, batch: 33114, sum loss: 4002.707275, avg loss: 2.360087, ppl: 10.591872 +epoch: 1, batch: 33115, sum loss: 5509.274414, avg loss: 2.771265, ppl: 15.978831 +epoch: 1, batch: 33116, sum loss: 4097.831543, avg loss: 2.494115, ppl: 12.111014 +epoch: 1, batch: 33117, sum loss: 5633.532227, avg loss: 2.768321, ppl: 15.931855 +epoch: 1, batch: 33118, sum loss: 3933.635010, avg loss: 2.503905, ppl: 12.230164 +epoch: 1, batch: 33119, sum loss: 4075.507080, avg loss: 2.525097, ppl: 12.492111 +epoch: 1, batch: 33120, sum loss: 4777.516113, avg loss: 2.669003, ppl: 14.425583 +epoch: 1, batch: 33121, sum loss: 4317.512695, avg loss: 2.632630, ppl: 13.910301 +epoch: 1, batch: 33122, sum loss: 4978.828613, avg loss: 2.697090, ppl: 14.836497 +epoch: 1, batch: 33123, sum loss: 5795.636719, avg loss: 2.967556, ppl: 19.444340 +epoch: 1, batch: 33124, sum loss: 4971.878906, avg loss: 2.597638, ppl: 13.431973 +epoch: 1, batch: 33125, sum loss: 3812.795410, avg loss: 2.349227, ppl: 10.477467 +epoch: 1, batch: 33126, sum loss: 4668.286133, avg loss: 2.462176, ppl: 11.730312 +epoch: 1, batch: 33127, sum loss: 4394.229980, avg loss: 2.379118, ppl: 10.795374 +epoch: 1, batch: 33128, sum loss: 4969.587402, avg loss: 2.993727, ppl: 19.959944 +epoch: 1, batch: 33129, sum loss: 5035.394043, avg loss: 2.744084, ppl: 15.550365 +epoch: 1, batch: 33130, sum loss: 3853.123535, avg loss: 2.444875, ppl: 11.529111 +epoch: 1, batch: 33131, sum loss: 4964.024902, avg loss: 2.728986, ppl: 15.317344 +epoch: 1, batch: 33132, sum loss: 5157.332520, avg loss: 2.598152, ppl: 13.438886 +epoch: 1, batch: 33133, sum loss: 3579.622559, avg loss: 2.299051, ppl: 9.964722 +epoch: 1, batch: 33134, sum loss: 3583.568604, avg loss: 2.310489, ppl: 10.079351 +epoch: 1, batch: 33135, sum loss: 4547.698242, avg loss: 2.795143, ppl: 16.364975 +epoch: 1, batch: 33136, sum loss: 4638.996582, avg loss: 2.767898, ppl: 15.925121 +epoch: 1, batch: 33137, sum loss: 4012.511719, avg loss: 2.482990, ppl: 11.977019 +epoch: 1, batch: 33138, sum loss: 4891.535156, avg loss: 2.746511, ppl: 15.588142 +epoch: 1, batch: 33139, sum loss: 3851.658447, avg loss: 2.381978, ppl: 10.826297 +epoch: 1, batch: 33140, sum loss: 4493.820312, avg loss: 2.762028, ppl: 15.831924 +epoch: 1, batch: 33141, sum loss: 5076.211426, avg loss: 2.691523, ppl: 14.754124 +epoch: 1, batch: 33142, sum loss: 5206.373047, avg loss: 2.855937, ppl: 17.390724 +epoch: 1, batch: 33143, sum loss: 3973.784180, avg loss: 2.385225, ppl: 10.861502 +epoch: 1, batch: 33144, sum loss: 4210.086914, avg loss: 2.652859, ppl: 14.194563 +epoch: 1, batch: 33145, sum loss: 4248.560547, avg loss: 2.617721, ppl: 13.704450 +epoch: 1, batch: 33146, sum loss: 4899.698242, avg loss: 2.717525, ppl: 15.142805 +epoch: 1, batch: 33147, sum loss: 4495.567383, avg loss: 2.769912, ppl: 15.957230 +epoch: 1, batch: 33148, sum loss: 4473.051758, avg loss: 2.689749, ppl: 14.727972 +epoch: 1, batch: 33149, sum loss: 4897.427734, avg loss: 2.609178, ppl: 13.587881 +epoch: 1, batch: 33150, sum loss: 4924.758789, avg loss: 2.828696, ppl: 16.923370 +epoch: 1, batch: 33151, sum loss: 4272.018555, avg loss: 2.686804, ppl: 14.684669 +epoch: 1, batch: 33152, sum loss: 4090.092285, avg loss: 2.468372, ppl: 11.803214 +epoch: 1, batch: 33153, sum loss: 4936.710449, avg loss: 2.863521, ppl: 17.523123 +epoch: 1, batch: 33154, sum loss: 4612.751953, avg loss: 2.610499, ppl: 13.605841 +epoch: 1, batch: 33155, sum loss: 4862.399414, avg loss: 2.631169, ppl: 13.889993 +epoch: 1, batch: 33156, sum loss: 4513.440430, avg loss: 2.714035, ppl: 15.090042 +epoch: 1, batch: 33157, sum loss: 4315.381836, avg loss: 2.562578, ppl: 12.969214 +epoch: 1, batch: 33158, sum loss: 3980.447510, avg loss: 2.380651, ppl: 10.811934 +epoch: 1, batch: 33159, sum loss: 4055.930664, avg loss: 2.401380, ppl: 11.038400 +epoch: 1, batch: 33160, sum loss: 5764.676270, avg loss: 2.797029, ppl: 16.395859 +epoch: 1, batch: 33161, sum loss: 4982.776367, avg loss: 2.731785, ppl: 15.360285 +epoch: 1, batch: 33162, sum loss: 4453.666016, avg loss: 2.767971, ppl: 15.926291 +epoch: 1, batch: 33163, sum loss: 4433.092285, avg loss: 2.724703, ppl: 15.251888 +epoch: 1, batch: 33164, sum loss: 3386.892822, avg loss: 2.536999, ppl: 12.641670 +epoch: 1, batch: 33165, sum loss: 4372.090332, avg loss: 2.525760, ppl: 12.500388 +epoch: 1, batch: 33166, sum loss: 4535.841797, avg loss: 2.682343, ppl: 14.619307 +epoch: 1, batch: 33167, sum loss: 3910.701416, avg loss: 2.464210, ppl: 11.754196 +epoch: 1, batch: 33168, sum loss: 4252.207520, avg loss: 2.607117, ppl: 13.559900 +epoch: 1, batch: 33169, sum loss: 4624.388184, avg loss: 2.640999, ppl: 14.027205 +epoch: 1, batch: 33170, sum loss: 3369.443359, avg loss: 2.507026, ppl: 12.268394 +epoch: 1, batch: 33171, sum loss: 3934.700195, avg loss: 2.421354, ppl: 11.261094 +epoch: 1, batch: 33172, sum loss: 4947.036621, avg loss: 2.584659, ppl: 13.258762 +epoch: 1, batch: 33173, sum loss: 3944.477295, avg loss: 2.491773, ppl: 12.082685 +epoch: 1, batch: 33174, sum loss: 4776.533203, avg loss: 2.867067, ppl: 17.585358 +epoch: 1, batch: 33175, sum loss: 4871.083984, avg loss: 2.755138, ppl: 15.723213 +epoch: 1, batch: 33176, sum loss: 4227.388184, avg loss: 2.568280, ppl: 13.043367 +epoch: 1, batch: 33177, sum loss: 4724.500000, avg loss: 2.651235, ppl: 14.171524 +epoch: 1, batch: 33178, sum loss: 4200.969727, avg loss: 2.724364, ppl: 15.246718 +epoch: 1, batch: 33179, sum loss: 4321.035645, avg loss: 2.484782, ppl: 11.998504 +epoch: 1, batch: 33180, sum loss: 4215.661621, avg loss: 2.726819, ppl: 15.284184 +epoch: 1, batch: 33181, sum loss: 4943.904297, avg loss: 2.775915, ppl: 16.053308 +epoch: 1, batch: 33182, sum loss: 4257.328125, avg loss: 2.455206, ppl: 11.648837 +epoch: 1, batch: 33183, sum loss: 4309.286133, avg loss: 2.569640, ppl: 13.061121 +epoch: 1, batch: 33184, sum loss: 4474.101074, avg loss: 2.556629, ppl: 12.892286 +epoch: 1, batch: 33185, sum loss: 4559.297363, avg loss: 2.798832, ppl: 16.425449 +epoch: 1, batch: 33186, sum loss: 3486.590088, avg loss: 2.379925, ppl: 10.804092 +epoch: 1, batch: 33187, sum loss: 4156.462891, avg loss: 2.505403, ppl: 12.248488 +epoch: 1, batch: 33188, sum loss: 4235.785645, avg loss: 2.491639, ppl: 12.081057 +epoch: 1, batch: 33189, sum loss: 5250.107910, avg loss: 2.743003, ppl: 15.533565 +epoch: 1, batch: 33190, sum loss: 4844.652832, avg loss: 2.758914, ppl: 15.782694 +epoch: 1, batch: 33191, sum loss: 4432.665039, avg loss: 2.716094, ppl: 15.121140 +epoch: 1, batch: 33192, sum loss: 4935.857422, avg loss: 2.751314, ppl: 15.663202 +epoch: 1, batch: 33193, sum loss: 5921.839355, avg loss: 3.024433, ppl: 20.582329 +epoch: 1, batch: 33194, sum loss: 4481.664551, avg loss: 2.581604, ppl: 13.218324 +epoch: 1, batch: 33195, sum loss: 5092.649414, avg loss: 2.736512, ppl: 15.433067 +epoch: 1, batch: 33196, sum loss: 4840.914062, avg loss: 2.780537, ppl: 16.127674 +epoch: 1, batch: 33197, sum loss: 4681.563965, avg loss: 2.820219, ppl: 16.780529 +epoch: 1, batch: 33198, sum loss: 4893.440430, avg loss: 2.539409, ppl: 12.672175 +epoch: 1, batch: 33199, sum loss: 4996.812500, avg loss: 2.956694, ppl: 19.234272 +epoch: 1, batch: 33200, sum loss: 5403.512695, avg loss: 2.781015, ppl: 16.135397 +epoch: 1, batch: 33201, sum loss: 4101.764648, avg loss: 2.482908, ppl: 11.976046 +epoch: 1, batch: 33202, sum loss: 5027.725586, avg loss: 2.796288, ppl: 16.383713 +epoch: 1, batch: 33203, sum loss: 4930.910156, avg loss: 2.497928, ppl: 12.157277 +epoch: 1, batch: 33204, sum loss: 5043.753906, avg loss: 2.864142, ppl: 17.534002 +epoch: 1, batch: 33205, sum loss: 4556.875000, avg loss: 2.677365, ppl: 14.546710 +epoch: 1, batch: 33206, sum loss: 4881.458008, avg loss: 2.575967, ppl: 13.144026 +epoch: 1, batch: 33207, sum loss: 4388.896973, avg loss: 2.587793, ppl: 13.300386 +epoch: 1, batch: 33208, sum loss: 4296.833496, avg loss: 2.712647, ppl: 15.069118 +epoch: 1, batch: 33209, sum loss: 3929.832031, avg loss: 2.384607, ppl: 10.854795 +epoch: 1, batch: 33210, sum loss: 6039.296875, avg loss: 3.033298, ppl: 20.765610 +epoch: 1, batch: 33211, sum loss: 4886.982422, avg loss: 2.686631, ppl: 14.682135 +epoch: 1, batch: 33212, sum loss: 4594.429199, avg loss: 2.548214, ppl: 12.784247 +epoch: 1, batch: 33213, sum loss: 4290.928223, avg loss: 2.643825, ppl: 14.066908 +epoch: 1, batch: 33214, sum loss: 4793.505371, avg loss: 2.642506, ppl: 14.048363 +epoch: 1, batch: 33215, sum loss: 3843.874512, avg loss: 2.417531, ppl: 11.218127 +epoch: 1, batch: 33216, sum loss: 4313.543457, avg loss: 2.619030, ppl: 13.722413 +epoch: 1, batch: 33217, sum loss: 4402.293945, avg loss: 2.614189, ppl: 13.656136 +epoch: 1, batch: 33218, sum loss: 5502.739258, avg loss: 2.984132, ppl: 19.769337 +epoch: 1, batch: 33219, sum loss: 3742.607910, avg loss: 2.422400, ppl: 11.272882 +epoch: 1, batch: 33220, sum loss: 5117.245605, avg loss: 2.746777, ppl: 15.592298 +epoch: 1, batch: 33221, sum loss: 4183.459473, avg loss: 2.505066, ppl: 12.244363 +epoch: 1, batch: 33222, sum loss: 4588.260742, avg loss: 2.632393, ppl: 13.907008 +epoch: 1, batch: 33223, sum loss: 4545.700195, avg loss: 2.721976, ppl: 15.210353 +epoch: 1, batch: 33224, sum loss: 3883.787109, avg loss: 2.510528, ppl: 12.311433 +epoch: 1, batch: 33225, sum loss: 4217.118164, avg loss: 2.516180, ppl: 12.381213 +epoch: 1, batch: 33226, sum loss: 3507.099121, avg loss: 2.545065, ppl: 12.744053 +epoch: 1, batch: 33227, sum loss: 4435.638184, avg loss: 2.832464, ppl: 16.987261 +epoch: 1, batch: 33228, sum loss: 5302.135254, avg loss: 2.696915, ppl: 14.833900 +epoch: 1, batch: 33229, sum loss: 4296.193848, avg loss: 2.645440, ppl: 14.089638 +epoch: 1, batch: 33230, sum loss: 4638.766602, avg loss: 2.649210, ppl: 14.142861 +epoch: 1, batch: 33231, sum loss: 4526.558105, avg loss: 2.704037, ppl: 14.939925 +epoch: 1, batch: 33232, sum loss: 3829.888672, avg loss: 2.524646, ppl: 12.486478 +epoch: 1, batch: 33233, sum loss: 3656.789062, avg loss: 2.379173, ppl: 10.795974 +epoch: 1, batch: 33234, sum loss: 5540.797852, avg loss: 2.859029, ppl: 17.444580 +epoch: 1, batch: 33235, sum loss: 4464.012695, avg loss: 2.818190, ppl: 16.746510 +epoch: 1, batch: 33236, sum loss: 5328.185059, avg loss: 2.776542, ppl: 16.063385 +epoch: 1, batch: 33237, sum loss: 4668.273438, avg loss: 2.602159, ppl: 13.492841 +epoch: 1, batch: 33238, sum loss: 4404.996582, avg loss: 2.437740, ppl: 11.447142 +epoch: 1, batch: 33239, sum loss: 3575.215332, avg loss: 2.448778, ppl: 11.574191 +epoch: 1, batch: 33240, sum loss: 3563.561035, avg loss: 2.467840, ppl: 11.796937 +epoch: 1, batch: 33241, sum loss: 5540.796387, avg loss: 2.956668, ppl: 19.233780 +epoch: 1, batch: 33242, sum loss: 3818.106445, avg loss: 2.525203, ppl: 12.493428 +epoch: 1, batch: 33243, sum loss: 5563.707031, avg loss: 2.926727, ppl: 18.666428 +epoch: 1, batch: 33244, sum loss: 4546.698730, avg loss: 2.834600, ppl: 17.023594 +epoch: 1, batch: 33245, sum loss: 3908.123047, avg loss: 2.278789, ppl: 9.764846 +epoch: 1, batch: 33246, sum loss: 4786.990723, avg loss: 2.819194, ppl: 16.763327 +epoch: 1, batch: 33247, sum loss: 4893.962891, avg loss: 2.727962, ppl: 15.301663 +epoch: 1, batch: 33248, sum loss: 3455.956055, avg loss: 2.260272, ppl: 9.585699 +epoch: 1, batch: 33249, sum loss: 5174.518555, avg loss: 2.629329, ppl: 13.864460 +epoch: 1, batch: 33250, sum loss: 3925.888672, avg loss: 2.550935, ppl: 12.819081 +epoch: 1, batch: 33251, sum loss: 4109.538086, avg loss: 2.387878, ppl: 10.890360 +epoch: 1, batch: 33252, sum loss: 4663.379883, avg loss: 2.680103, ppl: 14.586600 +epoch: 1, batch: 33253, sum loss: 4869.921875, avg loss: 2.565818, ppl: 13.011295 +epoch: 1, batch: 33254, sum loss: 3706.991943, avg loss: 2.623490, ppl: 13.783750 +epoch: 1, batch: 33255, sum loss: 4454.588379, avg loss: 2.754848, ppl: 15.718644 +epoch: 1, batch: 33256, sum loss: 4297.439453, avg loss: 2.670876, ppl: 14.452624 +epoch: 1, batch: 33257, sum loss: 4541.420410, avg loss: 2.643435, ppl: 14.061415 +epoch: 1, batch: 33258, sum loss: 4446.174805, avg loss: 2.606199, ppl: 13.547453 +epoch: 1, batch: 33259, sum loss: 4221.385742, avg loss: 2.780887, ppl: 16.133320 +epoch: 1, batch: 33260, sum loss: 3567.006104, avg loss: 2.360692, ppl: 10.598286 +epoch: 1, batch: 33261, sum loss: 4106.540039, avg loss: 2.666584, ppl: 14.390734 +epoch: 1, batch: 33262, sum loss: 3725.370605, avg loss: 2.522255, ppl: 12.456657 +epoch: 1, batch: 33263, sum loss: 4872.017090, avg loss: 2.811320, ppl: 16.631851 +epoch: 1, batch: 33264, sum loss: 5092.792480, avg loss: 2.663594, ppl: 14.347766 +epoch: 1, batch: 33265, sum loss: 3897.382812, avg loss: 2.477675, ppl: 11.913533 +epoch: 1, batch: 33266, sum loss: 4364.544922, avg loss: 2.522858, ppl: 12.464173 +epoch: 1, batch: 33267, sum loss: 4633.811523, avg loss: 2.649406, ppl: 14.145637 +epoch: 1, batch: 33268, sum loss: 3997.495117, avg loss: 2.611036, ppl: 13.613141 +epoch: 1, batch: 33269, sum loss: 4847.712402, avg loss: 2.878689, ppl: 17.790939 +epoch: 1, batch: 33270, sum loss: 5427.803711, avg loss: 2.822571, ppl: 16.820036 +epoch: 1, batch: 33271, sum loss: 3902.260986, avg loss: 2.552166, ppl: 12.834867 +epoch: 1, batch: 33272, sum loss: 4577.742188, avg loss: 2.672354, ppl: 14.473997 +epoch: 1, batch: 33273, sum loss: 4840.759766, avg loss: 2.812760, ppl: 16.655823 +epoch: 1, batch: 33274, sum loss: 4208.979004, avg loss: 2.452785, ppl: 11.620666 +epoch: 1, batch: 33275, sum loss: 5072.313965, avg loss: 2.942178, ppl: 18.957087 +epoch: 1, batch: 33276, sum loss: 4739.299805, avg loss: 2.683635, ppl: 14.638210 +epoch: 1, batch: 33277, sum loss: 4689.065430, avg loss: 2.701075, ppl: 14.895730 +epoch: 1, batch: 33278, sum loss: 3821.884033, avg loss: 2.324747, ppl: 10.224092 +epoch: 1, batch: 33279, sum loss: 6022.770996, avg loss: 2.817012, ppl: 16.726789 +epoch: 1, batch: 33280, sum loss: 4272.900879, avg loss: 2.692439, ppl: 14.767655 +epoch: 1, batch: 33281, sum loss: 4547.686523, avg loss: 2.849428, ppl: 17.277891 +epoch: 1, batch: 33282, sum loss: 5280.716309, avg loss: 2.761881, ppl: 15.829589 +epoch: 1, batch: 33283, sum loss: 4837.222168, avg loss: 2.575731, ppl: 13.140914 +epoch: 1, batch: 33284, sum loss: 4733.175781, avg loss: 2.636867, ppl: 13.969366 +epoch: 1, batch: 33285, sum loss: 4402.354980, avg loss: 2.658427, ppl: 14.273819 +epoch: 1, batch: 33286, sum loss: 4255.400391, avg loss: 2.427496, ppl: 11.330475 +epoch: 1, batch: 33287, sum loss: 4529.865234, avg loss: 2.469937, ppl: 11.821706 +epoch: 1, batch: 33288, sum loss: 4141.201660, avg loss: 2.722684, ppl: 15.221116 +epoch: 1, batch: 33289, sum loss: 5130.622070, avg loss: 2.782333, ppl: 16.156673 +epoch: 1, batch: 33290, sum loss: 3758.345459, avg loss: 2.273651, ppl: 9.714807 +epoch: 1, batch: 33291, sum loss: 3915.974854, avg loss: 2.621135, ppl: 13.751316 +epoch: 1, batch: 33292, sum loss: 4066.106689, avg loss: 2.546091, ppl: 12.757133 +epoch: 1, batch: 33293, sum loss: 4151.979004, avg loss: 2.598235, ppl: 13.439991 +epoch: 1, batch: 33294, sum loss: 4494.526367, avg loss: 2.752312, ppl: 15.678846 +epoch: 1, batch: 33295, sum loss: 4512.984863, avg loss: 2.687900, ppl: 14.700776 +epoch: 1, batch: 33296, sum loss: 3695.538818, avg loss: 2.337469, ppl: 10.354996 +epoch: 1, batch: 33297, sum loss: 5211.513184, avg loss: 2.909834, ppl: 18.353754 +epoch: 1, batch: 33298, sum loss: 4562.631836, avg loss: 2.634314, ppl: 13.933751 +epoch: 1, batch: 33299, sum loss: 4117.230469, avg loss: 2.570056, ppl: 13.066562 +epoch: 1, batch: 33300, sum loss: 4297.244629, avg loss: 2.448572, ppl: 11.571815 +epoch: 1, batch: 33301, sum loss: 4587.234375, avg loss: 2.685734, ppl: 14.668968 +epoch: 1, batch: 33302, sum loss: 4240.875000, avg loss: 2.419210, ppl: 11.236979 +epoch: 1, batch: 33303, sum loss: 3938.524902, avg loss: 2.309985, ppl: 10.074278 +epoch: 1, batch: 33304, sum loss: 5561.086914, avg loss: 3.129481, ppl: 22.862103 +epoch: 1, batch: 33305, sum loss: 5180.556152, avg loss: 2.495451, ppl: 12.127198 +epoch: 1, batch: 33306, sum loss: 4817.019043, avg loss: 2.570448, ppl: 13.071675 +epoch: 1, batch: 33307, sum loss: 4787.622559, avg loss: 2.629117, ppl: 13.861528 +epoch: 1, batch: 33308, sum loss: 4000.641846, avg loss: 2.286081, ppl: 9.836314 +epoch: 1, batch: 33309, sum loss: 3597.276855, avg loss: 2.398185, ppl: 11.003182 +epoch: 1, batch: 33310, sum loss: 4685.763672, avg loss: 2.894233, ppl: 18.069641 +epoch: 1, batch: 33311, sum loss: 3781.003906, avg loss: 2.494066, ppl: 12.110417 +epoch: 1, batch: 33312, sum loss: 3637.947754, avg loss: 2.502027, ppl: 12.207216 +epoch: 1, batch: 33313, sum loss: 3326.908447, avg loss: 2.376363, ppl: 10.765677 +epoch: 1, batch: 33314, sum loss: 3887.734375, avg loss: 2.401318, ppl: 11.037718 +epoch: 1, batch: 33315, sum loss: 5362.953125, avg loss: 2.897327, ppl: 18.125639 +epoch: 1, batch: 33316, sum loss: 4212.961914, avg loss: 2.704083, ppl: 14.940617 +epoch: 1, batch: 33317, sum loss: 5959.263672, avg loss: 3.181668, ppl: 24.086893 +epoch: 1, batch: 33318, sum loss: 4672.550781, avg loss: 2.842184, ppl: 17.153189 +epoch: 1, batch: 33319, sum loss: 3704.565186, avg loss: 2.399330, ppl: 11.015792 +epoch: 1, batch: 33320, sum loss: 2841.063232, avg loss: 2.247677, ppl: 9.465717 +epoch: 1, batch: 33321, sum loss: 3728.985352, avg loss: 2.346750, ppl: 10.451545 +epoch: 1, batch: 33322, sum loss: 4602.282715, avg loss: 2.539891, ppl: 12.678289 +epoch: 1, batch: 33323, sum loss: 4948.708008, avg loss: 2.758477, ppl: 15.775802 +epoch: 1, batch: 33324, sum loss: 4493.052246, avg loss: 2.544197, ppl: 12.733001 +epoch: 1, batch: 33325, sum loss: 3611.530762, avg loss: 2.425474, ppl: 11.307587 +epoch: 1, batch: 33326, sum loss: 4914.260742, avg loss: 2.840613, ppl: 17.126263 +epoch: 1, batch: 33327, sum loss: 4646.270996, avg loss: 2.859244, ppl: 17.448324 +epoch: 1, batch: 33328, sum loss: 3970.935059, avg loss: 2.634993, ppl: 13.943219 +epoch: 1, batch: 33329, sum loss: 5015.163086, avg loss: 2.811190, ppl: 16.629698 +epoch: 1, batch: 33330, sum loss: 4919.486816, avg loss: 2.626528, ppl: 13.825684 +epoch: 1, batch: 33331, sum loss: 4626.898926, avg loss: 2.660666, ppl: 14.305820 +epoch: 1, batch: 33332, sum loss: 4134.809082, avg loss: 2.490849, ppl: 12.071518 +epoch: 1, batch: 33333, sum loss: 4763.716797, avg loss: 2.861091, ppl: 17.480591 +epoch: 1, batch: 33334, sum loss: 3034.037598, avg loss: 2.390889, ppl: 10.923197 +epoch: 1, batch: 33335, sum loss: 4155.002441, avg loss: 2.522770, ppl: 12.463074 +epoch: 1, batch: 33336, sum loss: 4013.751465, avg loss: 2.312069, ppl: 10.095287 +epoch: 1, batch: 33337, sum loss: 3993.943115, avg loss: 2.407440, ppl: 11.105496 +epoch: 1, batch: 33338, sum loss: 4389.702148, avg loss: 2.367692, ppl: 10.672736 +epoch: 1, batch: 33339, sum loss: 3778.931152, avg loss: 2.529405, ppl: 12.546041 +epoch: 1, batch: 33340, sum loss: 4666.959961, avg loss: 2.644170, ppl: 14.071761 +epoch: 1, batch: 33341, sum loss: 5807.390625, avg loss: 2.874946, ppl: 17.724464 +epoch: 1, batch: 33342, sum loss: 4310.853516, avg loss: 2.556853, ppl: 12.895166 +epoch: 1, batch: 33343, sum loss: 4232.430664, avg loss: 2.445078, ppl: 11.531453 +epoch: 1, batch: 33344, sum loss: 4492.994141, avg loss: 2.783763, ppl: 16.179798 +epoch: 1, batch: 33345, sum loss: 4377.484375, avg loss: 2.675724, ppl: 14.522861 +epoch: 1, batch: 33346, sum loss: 5348.955078, avg loss: 2.768610, ppl: 15.936471 +epoch: 1, batch: 33347, sum loss: 4220.341309, avg loss: 2.619703, ppl: 13.731646 +epoch: 1, batch: 33348, sum loss: 5246.359863, avg loss: 2.743912, ppl: 15.547688 +epoch: 1, batch: 33349, sum loss: 3980.683105, avg loss: 2.584859, ppl: 13.261421 +epoch: 1, batch: 33350, sum loss: 4987.578125, avg loss: 2.913305, ppl: 18.417564 +epoch: 1, batch: 33351, sum loss: 5013.686523, avg loss: 2.843838, ppl: 17.181581 +epoch: 1, batch: 33352, sum loss: 5287.270020, avg loss: 2.876643, ppl: 17.754570 +epoch: 1, batch: 33353, sum loss: 3777.325928, avg loss: 2.701950, ppl: 14.908773 +epoch: 1, batch: 33354, sum loss: 4774.339844, avg loss: 2.803488, ppl: 16.502102 +epoch: 1, batch: 33355, sum loss: 4438.716797, avg loss: 2.891672, ppl: 18.023422 +epoch: 1, batch: 33356, sum loss: 4419.250977, avg loss: 2.721214, ppl: 15.198756 +epoch: 1, batch: 33357, sum loss: 4228.109375, avg loss: 2.613170, ppl: 13.642230 +epoch: 1, batch: 33358, sum loss: 4407.745605, avg loss: 2.807481, ppl: 16.568136 +epoch: 1, batch: 33359, sum loss: 4378.980957, avg loss: 2.563806, ppl: 12.985146 +epoch: 1, batch: 33360, sum loss: 4587.218262, avg loss: 2.715937, ppl: 15.118775 +epoch: 1, batch: 33361, sum loss: 4051.520020, avg loss: 2.436272, ppl: 11.430347 +epoch: 1, batch: 33362, sum loss: 4129.512207, avg loss: 2.679761, ppl: 14.581614 +epoch: 1, batch: 33363, sum loss: 3933.033691, avg loss: 2.579038, ppl: 13.184454 +epoch: 1, batch: 33364, sum loss: 4413.094727, avg loss: 2.511721, ppl: 12.326130 +epoch: 1, batch: 33365, sum loss: 3580.909424, avg loss: 2.310264, ppl: 10.077088 +epoch: 1, batch: 33366, sum loss: 4557.376465, avg loss: 2.631280, ppl: 13.891536 +epoch: 1, batch: 33367, sum loss: 4765.604492, avg loss: 2.826575, ppl: 16.887514 +epoch: 1, batch: 33368, sum loss: 4684.547852, avg loss: 2.672303, ppl: 14.473269 +epoch: 1, batch: 33369, sum loss: 4405.899902, avg loss: 2.462772, ppl: 11.737307 +epoch: 1, batch: 33370, sum loss: 4549.237793, avg loss: 2.541474, ppl: 12.698370 +epoch: 1, batch: 33371, sum loss: 6030.267578, avg loss: 2.960367, ppl: 19.305056 +epoch: 1, batch: 33372, sum loss: 4178.082031, avg loss: 2.823029, ppl: 16.827738 +epoch: 1, batch: 33373, sum loss: 4159.449707, avg loss: 2.414074, ppl: 11.179416 +epoch: 1, batch: 33374, sum loss: 4473.887207, avg loss: 2.754857, ppl: 15.718786 +epoch: 1, batch: 33375, sum loss: 4569.775879, avg loss: 2.721725, ppl: 15.206527 +epoch: 1, batch: 33376, sum loss: 4464.756348, avg loss: 2.495672, ppl: 12.129876 +epoch: 1, batch: 33377, sum loss: 4224.034180, avg loss: 2.638372, ppl: 13.990415 +epoch: 1, batch: 33378, sum loss: 4166.129395, avg loss: 2.691298, ppl: 14.750813 +epoch: 1, batch: 33379, sum loss: 4604.799805, avg loss: 2.689719, ppl: 14.727544 +epoch: 1, batch: 33380, sum loss: 4596.358887, avg loss: 2.508929, ppl: 12.291764 +epoch: 1, batch: 33381, sum loss: 4138.813965, avg loss: 2.642921, ppl: 14.054193 +epoch: 1, batch: 33382, sum loss: 4604.790527, avg loss: 2.749129, ppl: 15.629010 +epoch: 1, batch: 33383, sum loss: 3844.220215, avg loss: 2.532424, ppl: 12.583979 +epoch: 1, batch: 33384, sum loss: 5084.138184, avg loss: 2.681508, ppl: 14.607099 +epoch: 1, batch: 33385, sum loss: 4780.605469, avg loss: 2.725545, ppl: 15.264727 +epoch: 1, batch: 33386, sum loss: 4814.923340, avg loss: 2.566590, ppl: 13.021350 +epoch: 1, batch: 33387, sum loss: 3959.411133, avg loss: 2.751502, ppl: 15.666142 +epoch: 1, batch: 33388, sum loss: 4131.914062, avg loss: 2.510276, ppl: 12.308325 +epoch: 1, batch: 33389, sum loss: 4042.491211, avg loss: 2.463432, ppl: 11.745047 +epoch: 1, batch: 33390, sum loss: 4751.995117, avg loss: 2.546621, ppl: 12.763903 +epoch: 1, batch: 33391, sum loss: 4577.032227, avg loss: 2.593220, ppl: 13.372756 +epoch: 1, batch: 33392, sum loss: 4519.002930, avg loss: 2.567615, ppl: 13.034703 +epoch: 1, batch: 33393, sum loss: 5436.491699, avg loss: 2.730533, ppl: 15.341063 +epoch: 1, batch: 33394, sum loss: 4583.588379, avg loss: 2.729951, ppl: 15.332142 +epoch: 1, batch: 33395, sum loss: 5707.797363, avg loss: 2.815884, ppl: 16.707941 +epoch: 1, batch: 33396, sum loss: 3380.606689, avg loss: 2.271913, ppl: 9.697936 +epoch: 1, batch: 33397, sum loss: 5715.394531, avg loss: 2.793448, ppl: 16.337254 +epoch: 1, batch: 33398, sum loss: 4798.262695, avg loss: 2.796191, ppl: 16.382120 +epoch: 1, batch: 33399, sum loss: 4564.064453, avg loss: 2.485874, ppl: 12.011613 +epoch: 1, batch: 33400, sum loss: 3779.712891, avg loss: 2.595957, ppl: 13.409411 +epoch: 1, batch: 33401, sum loss: 3181.626953, avg loss: 2.456855, ppl: 11.668056 +epoch: 1, batch: 33402, sum loss: 4625.268066, avg loss: 2.611670, ppl: 13.621783 +epoch: 1, batch: 33403, sum loss: 4250.161133, avg loss: 2.505991, ppl: 12.255701 +epoch: 1, batch: 33404, sum loss: 4234.322266, avg loss: 2.502554, ppl: 12.213653 +epoch: 1, batch: 33405, sum loss: 4848.053223, avg loss: 2.714476, ppl: 15.096692 +epoch: 1, batch: 33406, sum loss: 4012.932373, avg loss: 2.655812, ppl: 14.236546 +epoch: 1, batch: 33407, sum loss: 4249.144531, avg loss: 2.716844, ppl: 15.132494 +epoch: 1, batch: 33408, sum loss: 4401.964355, avg loss: 2.362836, ppl: 10.621034 +epoch: 1, batch: 33409, sum loss: 3802.260498, avg loss: 2.543318, ppl: 12.721812 +epoch: 1, batch: 33410, sum loss: 5193.277832, avg loss: 2.740516, ppl: 15.494978 +epoch: 1, batch: 33411, sum loss: 3575.778076, avg loss: 2.374355, ppl: 10.744077 +epoch: 1, batch: 33412, sum loss: 4438.077148, avg loss: 2.547691, ppl: 12.777561 +epoch: 1, batch: 33413, sum loss: 4378.802246, avg loss: 2.372049, ppl: 10.719332 +epoch: 1, batch: 33414, sum loss: 3652.431152, avg loss: 2.336808, ppl: 10.348155 +epoch: 1, batch: 33415, sum loss: 4619.204590, avg loss: 2.596517, ppl: 13.416930 +epoch: 1, batch: 33416, sum loss: 5524.466309, avg loss: 2.907614, ppl: 18.313051 +epoch: 1, batch: 33417, sum loss: 4001.482178, avg loss: 2.636023, ppl: 13.957578 +epoch: 1, batch: 33418, sum loss: 4241.350098, avg loss: 2.772124, ppl: 15.992571 +epoch: 1, batch: 33419, sum loss: 4346.821777, avg loss: 2.511162, ppl: 12.319237 +epoch: 1, batch: 33420, sum loss: 4107.532715, avg loss: 2.382560, ppl: 10.832597 +epoch: 1, batch: 33421, sum loss: 4502.364258, avg loss: 2.614613, ppl: 13.661932 +epoch: 1, batch: 33422, sum loss: 3848.562256, avg loss: 2.598624, ppl: 13.445228 +epoch: 1, batch: 33423, sum loss: 5522.311035, avg loss: 2.985033, ppl: 19.787155 +epoch: 1, batch: 33424, sum loss: 3909.967773, avg loss: 2.371114, ppl: 10.709321 +epoch: 1, batch: 33425, sum loss: 4369.220703, avg loss: 2.461533, ppl: 11.722764 +epoch: 1, batch: 33426, sum loss: 4356.226562, avg loss: 2.606958, ppl: 13.557744 +epoch: 1, batch: 33427, sum loss: 4184.363281, avg loss: 2.442711, ppl: 11.504182 +epoch: 1, batch: 33428, sum loss: 4408.793945, avg loss: 2.622721, ppl: 13.773149 +epoch: 1, batch: 33429, sum loss: 4891.205566, avg loss: 2.662605, ppl: 14.333584 +epoch: 1, batch: 33430, sum loss: 3995.375732, avg loss: 2.635472, ppl: 13.949900 +epoch: 1, batch: 33431, sum loss: 4445.794434, avg loss: 2.756227, ppl: 15.740347 +epoch: 1, batch: 33432, sum loss: 4306.590820, avg loss: 2.743052, ppl: 15.534316 +epoch: 1, batch: 33433, sum loss: 4778.520508, avg loss: 2.810894, ppl: 16.624783 +epoch: 1, batch: 33434, sum loss: 5738.037598, avg loss: 2.903865, ppl: 18.244530 +epoch: 1, batch: 33435, sum loss: 4361.123535, avg loss: 2.942729, ppl: 18.967529 +epoch: 1, batch: 33436, sum loss: 5085.185059, avg loss: 2.881125, ppl: 17.834320 +epoch: 1, batch: 33437, sum loss: 4296.775391, avg loss: 2.449701, ppl: 11.584881 +epoch: 1, batch: 33438, sum loss: 4533.075195, avg loss: 2.496187, ppl: 12.136127 +epoch: 1, batch: 33439, sum loss: 4378.938965, avg loss: 2.995170, ppl: 19.988766 +epoch: 1, batch: 33440, sum loss: 4959.680664, avg loss: 2.577797, ppl: 13.168093 +epoch: 1, batch: 33441, sum loss: 4157.872559, avg loss: 2.753558, ppl: 15.698386 +epoch: 1, batch: 33442, sum loss: 4598.464844, avg loss: 2.593607, ppl: 13.377938 +epoch: 1, batch: 33443, sum loss: 3868.066895, avg loss: 2.438882, ppl: 11.460222 +epoch: 1, batch: 33444, sum loss: 4162.164062, avg loss: 2.547224, ppl: 12.771601 +epoch: 1, batch: 33445, sum loss: 4628.436523, avg loss: 2.880172, ppl: 17.817343 +epoch: 1, batch: 33446, sum loss: 4139.373535, avg loss: 2.357274, ppl: 10.562123 +epoch: 1, batch: 33447, sum loss: 5765.775879, avg loss: 2.796206, ppl: 16.382366 +epoch: 1, batch: 33448, sum loss: 3787.193359, avg loss: 2.333452, ppl: 10.313487 +epoch: 1, batch: 33449, sum loss: 4924.553223, avg loss: 2.774396, ppl: 16.028942 +epoch: 1, batch: 33450, sum loss: 4041.597656, avg loss: 2.614229, ppl: 13.656679 +epoch: 1, batch: 33451, sum loss: 4630.084961, avg loss: 2.524583, ppl: 12.485686 +epoch: 1, batch: 33452, sum loss: 4217.684570, avg loss: 2.574899, ppl: 13.129991 +epoch: 1, batch: 33453, sum loss: 4997.257324, avg loss: 2.577234, ppl: 13.160688 +epoch: 1, batch: 33454, sum loss: 4503.598633, avg loss: 2.693540, ppl: 14.783920 +epoch: 1, batch: 33455, sum loss: 4030.284668, avg loss: 2.466515, ppl: 11.781313 +epoch: 1, batch: 33456, sum loss: 4193.206055, avg loss: 2.606094, ppl: 13.546041 +epoch: 1, batch: 33457, sum loss: 4378.098145, avg loss: 2.529231, ppl: 12.543851 +epoch: 1, batch: 33458, sum loss: 3786.546631, avg loss: 2.560207, ppl: 12.938500 +epoch: 1, batch: 33459, sum loss: 3656.936035, avg loss: 2.527254, ppl: 12.519077 +epoch: 1, batch: 33460, sum loss: 4834.006836, avg loss: 2.693040, ppl: 14.776530 +epoch: 1, batch: 33461, sum loss: 3930.475586, avg loss: 2.423228, ppl: 11.282217 +epoch: 1, batch: 33462, sum loss: 4666.802734, avg loss: 2.802885, ppl: 16.492151 +epoch: 1, batch: 33463, sum loss: 4206.101562, avg loss: 2.575690, ppl: 13.140378 +epoch: 1, batch: 33464, sum loss: 4636.880371, avg loss: 2.721174, ppl: 15.198154 +epoch: 1, batch: 33465, sum loss: 4146.441406, avg loss: 2.567456, ppl: 13.032627 +epoch: 1, batch: 33466, sum loss: 4405.009277, avg loss: 2.522915, ppl: 12.464877 +epoch: 1, batch: 33467, sum loss: 4263.956543, avg loss: 2.690193, ppl: 14.734526 +epoch: 1, batch: 33468, sum loss: 4080.611572, avg loss: 2.755308, ppl: 15.725890 +epoch: 1, batch: 33469, sum loss: 4525.735352, avg loss: 2.723066, ppl: 15.226934 +epoch: 1, batch: 33470, sum loss: 4302.115723, avg loss: 2.628049, ppl: 13.846727 +epoch: 1, batch: 33471, sum loss: 5466.011719, avg loss: 3.103925, ppl: 22.285244 +epoch: 1, batch: 33472, sum loss: 4599.150391, avg loss: 2.540967, ppl: 12.691941 +epoch: 1, batch: 33473, sum loss: 4163.493652, avg loss: 2.388694, ppl: 10.899251 +epoch: 1, batch: 33474, sum loss: 4816.212891, avg loss: 2.517623, ppl: 12.399088 +epoch: 1, batch: 33475, sum loss: 5147.195801, avg loss: 2.803483, ppl: 16.502031 +epoch: 1, batch: 33476, sum loss: 4187.458496, avg loss: 2.522565, ppl: 12.460522 +epoch: 1, batch: 33477, sum loss: 3554.679688, avg loss: 2.300764, ppl: 9.981802 +epoch: 1, batch: 33478, sum loss: 3951.269775, avg loss: 2.302605, ppl: 10.000198 +epoch: 1, batch: 33479, sum loss: 4222.611328, avg loss: 2.652394, ppl: 14.187964 +epoch: 1, batch: 33480, sum loss: 4257.870605, avg loss: 2.572731, ppl: 13.101562 +epoch: 1, batch: 33481, sum loss: 4181.437500, avg loss: 2.518938, ppl: 12.415408 +epoch: 1, batch: 33482, sum loss: 3916.265625, avg loss: 2.536442, ppl: 12.634631 +epoch: 1, batch: 33483, sum loss: 4463.656738, avg loss: 2.633426, ppl: 13.921379 +epoch: 1, batch: 33484, sum loss: 4216.328613, avg loss: 2.462809, ppl: 11.737732 +epoch: 1, batch: 33485, sum loss: 3520.580566, avg loss: 2.339256, ppl: 10.373519 +epoch: 1, batch: 33486, sum loss: 4480.312500, avg loss: 2.716988, ppl: 15.134662 +epoch: 1, batch: 33487, sum loss: 5098.317383, avg loss: 2.898418, ppl: 18.145420 +epoch: 1, batch: 33488, sum loss: 4986.531250, avg loss: 2.581020, ppl: 13.210610 +epoch: 1, batch: 33489, sum loss: 4434.356934, avg loss: 2.708832, ppl: 15.011724 +epoch: 1, batch: 33490, sum loss: 5222.904297, avg loss: 2.769303, ppl: 15.947509 +epoch: 1, batch: 33491, sum loss: 5321.711914, avg loss: 2.724891, ppl: 15.254750 +epoch: 1, batch: 33492, sum loss: 3979.245605, avg loss: 2.410203, ppl: 11.136227 +epoch: 1, batch: 33493, sum loss: 4524.904297, avg loss: 2.719294, ppl: 15.169602 +epoch: 1, batch: 33494, sum loss: 4265.592773, avg loss: 2.494499, ppl: 12.115659 +epoch: 1, batch: 33495, sum loss: 4515.834473, avg loss: 2.710585, ppl: 15.038069 +epoch: 1, batch: 33496, sum loss: 4735.282227, avg loss: 2.675301, ppl: 14.516716 +epoch: 1, batch: 33497, sum loss: 4575.524902, avg loss: 2.801913, ppl: 16.476143 +epoch: 1, batch: 33498, sum loss: 3603.436035, avg loss: 2.308415, ppl: 10.058471 +epoch: 1, batch: 33499, sum loss: 4145.975586, avg loss: 2.610816, ppl: 13.610152 +epoch: 1, batch: 33500, sum loss: 4992.737793, avg loss: 2.735747, ppl: 15.421257 +epoch: 1, batch: 33501, sum loss: 4047.556641, avg loss: 2.640285, ppl: 14.017195 +epoch: 1, batch: 33502, sum loss: 4482.565918, avg loss: 2.571753, ppl: 13.088753 +epoch: 1, batch: 33503, sum loss: 3670.236084, avg loss: 2.398847, ppl: 11.010475 +epoch: 1, batch: 33504, sum loss: 4865.548828, avg loss: 2.830453, ppl: 16.953142 +epoch: 1, batch: 33505, sum loss: 4913.234375, avg loss: 2.919331, ppl: 18.528889 +epoch: 1, batch: 33506, sum loss: 4154.262207, avg loss: 2.580287, ppl: 13.200926 +epoch: 1, batch: 33507, sum loss: 4296.659668, avg loss: 2.566702, ppl: 13.022809 +epoch: 1, batch: 33508, sum loss: 5269.775391, avg loss: 2.653462, ppl: 14.203124 +epoch: 1, batch: 33509, sum loss: 3841.656250, avg loss: 2.399536, ppl: 11.018059 +epoch: 1, batch: 33510, sum loss: 5387.750977, avg loss: 2.810512, ppl: 16.618422 +epoch: 1, batch: 33511, sum loss: 4760.576172, avg loss: 2.591495, ppl: 13.349715 +epoch: 1, batch: 33512, sum loss: 4941.400391, avg loss: 2.672472, ppl: 14.475705 +epoch: 1, batch: 33513, sum loss: 4934.461914, avg loss: 2.690546, ppl: 14.739726 +epoch: 1, batch: 33514, sum loss: 4486.069336, avg loss: 2.597608, ppl: 13.431576 +epoch: 1, batch: 33515, sum loss: 3630.985840, avg loss: 2.535605, ppl: 12.624063 +epoch: 1, batch: 33516, sum loss: 4281.575195, avg loss: 2.869689, ppl: 17.631525 +epoch: 1, batch: 33517, sum loss: 4996.782227, avg loss: 2.680677, ppl: 14.594974 +epoch: 1, batch: 33518, sum loss: 4973.841797, avg loss: 2.682762, ppl: 14.625439 +epoch: 1, batch: 33519, sum loss: 4857.721680, avg loss: 2.603281, ppl: 13.507979 +epoch: 1, batch: 33520, sum loss: 4665.838867, avg loss: 2.783914, ppl: 16.182228 +epoch: 1, batch: 33521, sum loss: 3554.318359, avg loss: 2.553390, ppl: 12.850588 +epoch: 1, batch: 33522, sum loss: 4988.187500, avg loss: 2.821373, ppl: 16.799906 +epoch: 1, batch: 33523, sum loss: 4592.541504, avg loss: 2.783359, ppl: 16.173248 +epoch: 1, batch: 33524, sum loss: 5054.509277, avg loss: 2.733645, ppl: 15.388873 +epoch: 1, batch: 33525, sum loss: 4262.100586, avg loss: 2.504172, ppl: 12.233423 +epoch: 1, batch: 33526, sum loss: 4411.966797, avg loss: 2.440247, ppl: 11.475876 +epoch: 1, batch: 33527, sum loss: 4195.321777, avg loss: 2.636909, ppl: 13.969953 +epoch: 1, batch: 33528, sum loss: 3909.488281, avg loss: 2.514141, ppl: 12.355986 +epoch: 1, batch: 33529, sum loss: 4587.576172, avg loss: 2.833586, ppl: 17.006340 +epoch: 1, batch: 33530, sum loss: 5243.828125, avg loss: 2.612769, ppl: 13.636764 +epoch: 1, batch: 33531, sum loss: 4708.495605, avg loss: 2.591357, ppl: 13.347873 +epoch: 1, batch: 33532, sum loss: 4763.072754, avg loss: 2.604195, ppl: 13.520339 +epoch: 1, batch: 33533, sum loss: 3975.378906, avg loss: 2.643204, ppl: 14.058173 +epoch: 1, batch: 33534, sum loss: 4098.131348, avg loss: 2.697914, ppl: 14.848723 +epoch: 1, batch: 33535, sum loss: 4425.264160, avg loss: 2.610775, ppl: 13.609597 +epoch: 1, batch: 33536, sum loss: 4419.435547, avg loss: 2.538447, ppl: 12.659990 +epoch: 1, batch: 33537, sum loss: 5544.119141, avg loss: 2.744613, ppl: 15.558598 +epoch: 1, batch: 33538, sum loss: 4142.102051, avg loss: 2.439401, ppl: 11.466166 +epoch: 1, batch: 33539, sum loss: 4944.419922, avg loss: 2.719703, ppl: 15.175814 +epoch: 1, batch: 33540, sum loss: 5325.964355, avg loss: 2.874239, ppl: 17.711935 +epoch: 1, batch: 33541, sum loss: 5202.833008, avg loss: 2.686026, ppl: 14.673253 +epoch: 1, batch: 33542, sum loss: 4598.071289, avg loss: 2.543181, ppl: 12.720069 +epoch: 1, batch: 33543, sum loss: 5226.411133, avg loss: 3.043920, ppl: 20.987358 +epoch: 1, batch: 33544, sum loss: 3496.846924, avg loss: 2.214596, ppl: 9.157707 +epoch: 1, batch: 33545, sum loss: 4252.426758, avg loss: 2.644544, ppl: 14.077023 +epoch: 1, batch: 33546, sum loss: 4292.087402, avg loss: 2.713077, ppl: 15.075585 +epoch: 1, batch: 33547, sum loss: 4166.017578, avg loss: 2.482728, ppl: 11.973885 +epoch: 1, batch: 33548, sum loss: 4641.327148, avg loss: 2.628158, ppl: 13.848239 +epoch: 1, batch: 33549, sum loss: 3771.253906, avg loss: 2.222306, ppl: 9.228592 +epoch: 1, batch: 33550, sum loss: 3595.004883, avg loss: 2.335936, ppl: 10.339128 +epoch: 1, batch: 33551, sum loss: 4791.921875, avg loss: 2.749238, ppl: 15.630717 +epoch: 1, batch: 33552, sum loss: 4411.240723, avg loss: 2.783117, ppl: 16.169344 +epoch: 1, batch: 33553, sum loss: 4197.789551, avg loss: 2.498684, ppl: 12.166477 +epoch: 1, batch: 33554, sum loss: 5568.943848, avg loss: 2.836956, ppl: 17.063740 +epoch: 1, batch: 33555, sum loss: 3946.606445, avg loss: 2.354777, ppl: 10.535780 +epoch: 1, batch: 33556, sum loss: 4376.472656, avg loss: 2.609703, ppl: 13.595016 +epoch: 1, batch: 33557, sum loss: 3867.791504, avg loss: 2.387526, ppl: 10.886525 +epoch: 1, batch: 33558, sum loss: 4124.132812, avg loss: 2.664169, ppl: 14.356009 +epoch: 1, batch: 33559, sum loss: 3879.108887, avg loss: 2.522177, ppl: 12.455688 +epoch: 1, batch: 33560, sum loss: 4171.216309, avg loss: 2.729854, ppl: 15.330643 +epoch: 1, batch: 33561, sum loss: 4768.371094, avg loss: 2.749926, ppl: 15.641468 +epoch: 1, batch: 33562, sum loss: 5615.962402, avg loss: 2.645296, ppl: 14.087609 +epoch: 1, batch: 33563, sum loss: 4809.037598, avg loss: 2.686613, ppl: 14.681866 +epoch: 1, batch: 33564, sum loss: 4449.627441, avg loss: 2.623601, ppl: 13.785277 +epoch: 1, batch: 33565, sum loss: 3736.863770, avg loss: 2.447193, ppl: 11.555865 +epoch: 1, batch: 33566, sum loss: 4356.656250, avg loss: 2.619757, ppl: 13.732388 +epoch: 1, batch: 33567, sum loss: 3522.695557, avg loss: 2.422762, ppl: 11.276959 +epoch: 1, batch: 33568, sum loss: 5802.672363, avg loss: 3.065331, ppl: 21.441563 +epoch: 1, batch: 33569, sum loss: 5365.490723, avg loss: 2.635310, ppl: 13.947635 +epoch: 1, batch: 33570, sum loss: 3961.843506, avg loss: 2.662529, ppl: 14.332494 +epoch: 1, batch: 33571, sum loss: 4712.569824, avg loss: 2.676076, ppl: 14.527972 +epoch: 1, batch: 33572, sum loss: 3769.064453, avg loss: 2.558768, ppl: 12.919885 +epoch: 1, batch: 33573, sum loss: 5324.395508, avg loss: 2.699998, ppl: 14.879697 +epoch: 1, batch: 33574, sum loss: 4016.213379, avg loss: 2.690029, ppl: 14.732101 +epoch: 1, batch: 33575, sum loss: 5062.533691, avg loss: 2.680007, ppl: 14.585198 +epoch: 1, batch: 33576, sum loss: 4405.557617, avg loss: 2.494653, ppl: 12.117531 +epoch: 1, batch: 33577, sum loss: 4041.644531, avg loss: 2.462915, ppl: 11.738986 +epoch: 1, batch: 33578, sum loss: 4036.339355, avg loss: 2.467200, ppl: 11.789393 +epoch: 1, batch: 33579, sum loss: 5265.544922, avg loss: 2.624898, ppl: 13.803163 +epoch: 1, batch: 33580, sum loss: 5773.129883, avg loss: 3.180788, ppl: 24.065710 +epoch: 1, batch: 33581, sum loss: 4753.537598, avg loss: 2.837933, ppl: 17.080425 +epoch: 1, batch: 33582, sum loss: 3937.116699, avg loss: 2.473063, ppl: 11.858717 +epoch: 1, batch: 33583, sum loss: 4388.343750, avg loss: 2.539551, ppl: 12.673976 +epoch: 1, batch: 33584, sum loss: 3496.172852, avg loss: 2.285080, ppl: 9.826476 +epoch: 1, batch: 33585, sum loss: 4214.722168, avg loss: 2.479249, ppl: 11.932294 +epoch: 1, batch: 33586, sum loss: 3561.650146, avg loss: 2.330923, ppl: 10.287431 +epoch: 1, batch: 33587, sum loss: 3878.867920, avg loss: 2.369498, ppl: 10.692021 +epoch: 1, batch: 33588, sum loss: 5186.517578, avg loss: 2.692896, ppl: 14.774403 +epoch: 1, batch: 33589, sum loss: 4502.429688, avg loss: 2.678423, ppl: 14.562117 +epoch: 1, batch: 33590, sum loss: 5144.242188, avg loss: 2.729041, ppl: 15.318188 +epoch: 1, batch: 33591, sum loss: 5341.993164, avg loss: 2.841486, ppl: 17.141214 +epoch: 1, batch: 33592, sum loss: 5226.857422, avg loss: 2.814678, ppl: 16.687801 +epoch: 1, batch: 33593, sum loss: 4588.463379, avg loss: 2.631000, ppl: 13.887645 +epoch: 1, batch: 33594, sum loss: 4754.834961, avg loss: 2.805212, ppl: 16.530584 +epoch: 1, batch: 33595, sum loss: 4522.380859, avg loss: 2.674383, ppl: 14.503394 +epoch: 1, batch: 33596, sum loss: 3751.299316, avg loss: 2.502535, ppl: 12.213411 +epoch: 1, batch: 33597, sum loss: 4108.418457, avg loss: 2.574197, ppl: 13.120778 +epoch: 1, batch: 33598, sum loss: 5412.627930, avg loss: 3.108919, ppl: 22.396818 +epoch: 1, batch: 33599, sum loss: 5169.267578, avg loss: 2.767274, ppl: 15.915189 +epoch: 1, batch: 33600, sum loss: 4047.850586, avg loss: 2.591454, ppl: 13.349164 +epoch: 1, batch: 33601, sum loss: 3327.343506, avg loss: 2.190483, ppl: 8.939528 +epoch: 1, batch: 33602, sum loss: 4395.283203, avg loss: 2.537692, ppl: 12.650444 +epoch: 1, batch: 33603, sum loss: 5015.497070, avg loss: 3.025028, ppl: 20.594591 +epoch: 1, batch: 33604, sum loss: 5151.751953, avg loss: 2.762333, ppl: 15.836753 +epoch: 1, batch: 33605, sum loss: 4198.989258, avg loss: 2.549477, ppl: 12.800412 +epoch: 1, batch: 33606, sum loss: 4487.478516, avg loss: 2.816998, ppl: 16.726570 +epoch: 1, batch: 33607, sum loss: 5145.308594, avg loss: 2.663203, ppl: 14.342157 +epoch: 1, batch: 33608, sum loss: 4661.607422, avg loss: 2.821796, ppl: 16.807016 +epoch: 1, batch: 33609, sum loss: 3710.582275, avg loss: 2.473722, ppl: 11.866526 +epoch: 1, batch: 33610, sum loss: 4383.704102, avg loss: 2.592374, ppl: 13.361452 +epoch: 1, batch: 33611, sum loss: 5410.073730, avg loss: 2.774397, ppl: 16.028954 +epoch: 1, batch: 33612, sum loss: 4218.217285, avg loss: 2.623269, ppl: 13.780704 +epoch: 1, batch: 33613, sum loss: 3867.630859, avg loss: 2.593985, ppl: 13.382992 +epoch: 1, batch: 33614, sum loss: 4733.595703, avg loss: 2.647425, ppl: 14.117638 +epoch: 1, batch: 33615, sum loss: 4521.188965, avg loss: 2.474652, ppl: 11.877571 +epoch: 1, batch: 33616, sum loss: 4474.237305, avg loss: 2.849833, ppl: 17.284891 +epoch: 1, batch: 33617, sum loss: 5635.536621, avg loss: 2.816360, ppl: 16.715899 +epoch: 1, batch: 33618, sum loss: 5326.962402, avg loss: 2.979285, ppl: 19.673754 +epoch: 1, batch: 33619, sum loss: 4428.673340, avg loss: 2.804733, ppl: 16.522665 +epoch: 1, batch: 33620, sum loss: 4660.522949, avg loss: 2.693944, ppl: 14.789888 +epoch: 1, batch: 33621, sum loss: 5009.118164, avg loss: 2.684415, ppl: 14.649623 +epoch: 1, batch: 33622, sum loss: 4007.110352, avg loss: 2.648454, ppl: 14.132170 +epoch: 1, batch: 33623, sum loss: 4940.958496, avg loss: 3.055633, ppl: 21.234625 +epoch: 1, batch: 33624, sum loss: 4138.352539, avg loss: 2.467712, ppl: 11.795424 +epoch: 1, batch: 33625, sum loss: 4973.802246, avg loss: 2.880024, ppl: 17.814709 +epoch: 1, batch: 33626, sum loss: 3712.969727, avg loss: 2.327881, ppl: 10.256182 +epoch: 1, batch: 33627, sum loss: 5188.913574, avg loss: 2.691345, ppl: 14.751510 +epoch: 1, batch: 33628, sum loss: 4573.076660, avg loss: 2.589511, ppl: 13.323257 +epoch: 1, batch: 33629, sum loss: 5348.089355, avg loss: 2.829677, ppl: 16.939985 +epoch: 1, batch: 33630, sum loss: 4466.771973, avg loss: 2.506606, ppl: 12.263239 +epoch: 1, batch: 33631, sum loss: 3848.813721, avg loss: 2.356898, ppl: 10.558145 +epoch: 1, batch: 33632, sum loss: 4627.700195, avg loss: 2.586752, ppl: 13.286552 +epoch: 1, batch: 33633, sum loss: 5069.738281, avg loss: 2.727132, ppl: 15.288973 +epoch: 1, batch: 33634, sum loss: 4279.009766, avg loss: 2.661076, ppl: 14.311674 +epoch: 1, batch: 33635, sum loss: 4050.880371, avg loss: 2.492849, ppl: 12.095692 +epoch: 1, batch: 33636, sum loss: 4475.466309, avg loss: 2.662383, ppl: 14.330402 +epoch: 1, batch: 33637, sum loss: 5280.388184, avg loss: 2.828274, ppl: 16.916243 +epoch: 1, batch: 33638, sum loss: 3500.623535, avg loss: 2.243989, ppl: 9.430881 +epoch: 1, batch: 33639, sum loss: 4164.100586, avg loss: 2.802221, ppl: 16.481213 +epoch: 1, batch: 33640, sum loss: 5366.967773, avg loss: 2.581514, ppl: 13.217135 +epoch: 1, batch: 33641, sum loss: 4842.709473, avg loss: 2.681456, ppl: 14.606346 +epoch: 1, batch: 33642, sum loss: 4848.632812, avg loss: 2.529282, ppl: 12.544494 +epoch: 1, batch: 33643, sum loss: 4825.019531, avg loss: 2.744607, ppl: 15.558501 +epoch: 1, batch: 33644, sum loss: 4297.252930, avg loss: 2.567057, ppl: 13.027427 +epoch: 1, batch: 33645, sum loss: 4405.348145, avg loss: 2.704327, ppl: 14.944250 +epoch: 1, batch: 33646, sum loss: 4302.433105, avg loss: 2.682315, ppl: 14.618898 +epoch: 1, batch: 33647, sum loss: 4053.285400, avg loss: 2.473023, ppl: 11.858245 +epoch: 1, batch: 33648, sum loss: 4233.047852, avg loss: 2.459644, ppl: 11.700649 +epoch: 1, batch: 33649, sum loss: 4621.200684, avg loss: 2.881048, ppl: 17.832952 +epoch: 1, batch: 33650, sum loss: 4980.750977, avg loss: 2.718750, ppl: 15.161366 +epoch: 1, batch: 33651, sum loss: 4151.917480, avg loss: 2.425185, ppl: 11.304325 +epoch: 1, batch: 33652, sum loss: 4107.316895, avg loss: 2.842434, ppl: 17.157475 +epoch: 1, batch: 33653, sum loss: 4425.789551, avg loss: 2.621913, ppl: 13.762032 +epoch: 1, batch: 33654, sum loss: 4531.129395, avg loss: 2.580370, ppl: 13.202022 +epoch: 1, batch: 33655, sum loss: 4683.819336, avg loss: 2.803004, ppl: 16.494122 +epoch: 1, batch: 33656, sum loss: 4645.637207, avg loss: 2.669906, ppl: 14.438618 +epoch: 1, batch: 33657, sum loss: 4957.994141, avg loss: 2.782264, ppl: 16.155556 +epoch: 1, batch: 33658, sum loss: 4543.857910, avg loss: 2.735616, ppl: 15.419234 +epoch: 1, batch: 33659, sum loss: 4824.948730, avg loss: 2.547492, ppl: 12.775018 +epoch: 1, batch: 33660, sum loss: 4762.696777, avg loss: 2.829885, ppl: 16.943512 +epoch: 1, batch: 33661, sum loss: 5208.878906, avg loss: 2.843275, ppl: 17.171906 +epoch: 1, batch: 33662, sum loss: 3914.040527, avg loss: 2.464761, ppl: 11.760674 +epoch: 1, batch: 33663, sum loss: 4641.190430, avg loss: 2.445306, ppl: 11.534080 +epoch: 1, batch: 33664, sum loss: 4938.267578, avg loss: 2.663575, ppl: 14.347486 +epoch: 1, batch: 33665, sum loss: 4784.204102, avg loss: 2.525979, ppl: 12.503130 +epoch: 1, batch: 33666, sum loss: 4503.184570, avg loss: 2.760996, ppl: 15.815589 +epoch: 1, batch: 33667, sum loss: 4374.462402, avg loss: 2.568680, ppl: 13.048593 +epoch: 1, batch: 33668, sum loss: 4009.944824, avg loss: 2.466141, ppl: 11.776909 +epoch: 1, batch: 33669, sum loss: 4715.657227, avg loss: 2.625644, ppl: 13.813474 +epoch: 1, batch: 33670, sum loss: 4909.361816, avg loss: 2.770520, ppl: 15.966938 +epoch: 1, batch: 33671, sum loss: 3665.442871, avg loss: 2.345133, ppl: 10.434661 +epoch: 1, batch: 33672, sum loss: 3924.069824, avg loss: 2.576540, ppl: 13.151552 +epoch: 1, batch: 33673, sum loss: 4179.598145, avg loss: 2.447071, ppl: 11.554458 +epoch: 1, batch: 33674, sum loss: 5077.392090, avg loss: 2.672312, ppl: 14.473390 +epoch: 1, batch: 33675, sum loss: 4239.676758, avg loss: 2.532662, ppl: 12.586973 +epoch: 1, batch: 33676, sum loss: 4874.532227, avg loss: 2.726249, ppl: 15.275474 +epoch: 1, batch: 33677, sum loss: 4195.262695, avg loss: 2.512133, ppl: 12.331209 +epoch: 1, batch: 33678, sum loss: 4761.428223, avg loss: 2.674960, ppl: 14.511765 +epoch: 1, batch: 33679, sum loss: 3840.739502, avg loss: 2.415559, ppl: 11.196031 +epoch: 1, batch: 33680, sum loss: 4088.131836, avg loss: 2.620597, ppl: 13.743931 +epoch: 1, batch: 33681, sum loss: 5237.293457, avg loss: 3.025588, ppl: 20.606123 +epoch: 1, batch: 33682, sum loss: 4108.268066, avg loss: 2.508100, ppl: 12.281573 +epoch: 1, batch: 33683, sum loss: 4227.096680, avg loss: 2.419632, ppl: 11.241718 +epoch: 1, batch: 33684, sum loss: 4538.159668, avg loss: 2.585846, ppl: 13.274514 +epoch: 1, batch: 33685, sum loss: 4741.949219, avg loss: 2.581355, ppl: 13.215034 +epoch: 1, batch: 33686, sum loss: 4323.221680, avg loss: 2.587206, ppl: 13.292585 +epoch: 1, batch: 33687, sum loss: 4371.678223, avg loss: 2.670543, ppl: 14.447808 +epoch: 1, batch: 33688, sum loss: 3477.697754, avg loss: 2.445638, ppl: 11.537913 +epoch: 1, batch: 33689, sum loss: 4398.957031, avg loss: 2.559021, ppl: 12.923160 +epoch: 1, batch: 33690, sum loss: 4370.625000, avg loss: 2.607772, ppl: 13.568781 +epoch: 1, batch: 33691, sum loss: 3926.390625, avg loss: 2.346916, ppl: 10.453284 +epoch: 1, batch: 33692, sum loss: 5276.716797, avg loss: 2.974474, ppl: 19.579317 +epoch: 1, batch: 33693, sum loss: 4384.106445, avg loss: 2.741780, ppl: 15.514577 +epoch: 1, batch: 33694, sum loss: 4260.394531, avg loss: 2.469794, ppl: 11.820012 +epoch: 1, batch: 33695, sum loss: 3526.354248, avg loss: 2.310848, ppl: 10.082973 +epoch: 1, batch: 33696, sum loss: 3942.444824, avg loss: 2.762751, ppl: 15.843362 +epoch: 1, batch: 33697, sum loss: 4992.293945, avg loss: 2.642824, ppl: 14.052829 +epoch: 1, batch: 33698, sum loss: 5490.770020, avg loss: 2.723596, ppl: 15.235014 +epoch: 1, batch: 33699, sum loss: 4455.961914, avg loss: 2.592183, ppl: 13.358897 +epoch: 1, batch: 33700, sum loss: 4121.715332, avg loss: 2.648917, ppl: 14.138721 +epoch: 1, batch: 33701, sum loss: 3801.446289, avg loss: 2.352380, ppl: 10.510555 +epoch: 1, batch: 33702, sum loss: 5440.584961, avg loss: 2.966513, ppl: 19.424068 +epoch: 1, batch: 33703, sum loss: 3745.279785, avg loss: 2.345197, ppl: 10.435330 +epoch: 1, batch: 33704, sum loss: 3895.518311, avg loss: 2.416575, ppl: 11.207405 +epoch: 1, batch: 33705, sum loss: 4146.020508, avg loss: 2.629056, ppl: 13.860672 +epoch: 1, batch: 33706, sum loss: 4653.813965, avg loss: 2.545850, ppl: 12.754065 +epoch: 1, batch: 33707, sum loss: 3895.867676, avg loss: 2.397457, ppl: 10.995179 +epoch: 1, batch: 33708, sum loss: 4143.670898, avg loss: 2.496187, ppl: 12.136136 +epoch: 1, batch: 33709, sum loss: 5839.495117, avg loss: 2.735127, ppl: 15.411696 +epoch: 1, batch: 33710, sum loss: 4762.910645, avg loss: 2.745193, ppl: 15.567626 +epoch: 1, batch: 33711, sum loss: 4894.391602, avg loss: 2.787239, ppl: 16.236132 +epoch: 1, batch: 33712, sum loss: 5046.264160, avg loss: 2.877003, ppl: 17.760971 +epoch: 1, batch: 33713, sum loss: 4095.480957, avg loss: 2.462707, ppl: 11.736534 +epoch: 1, batch: 33714, sum loss: 4622.366699, avg loss: 2.731895, ppl: 15.361974 +epoch: 1, batch: 33715, sum loss: 4795.803711, avg loss: 2.692759, ppl: 14.772377 +epoch: 1, batch: 33716, sum loss: 5005.637695, avg loss: 2.816904, ppl: 16.724983 +epoch: 1, batch: 33717, sum loss: 3446.720215, avg loss: 2.456679, ppl: 11.666003 +epoch: 1, batch: 33718, sum loss: 4464.498047, avg loss: 2.589616, ppl: 13.324658 +epoch: 1, batch: 33719, sum loss: 4190.118164, avg loss: 2.573783, ppl: 13.115342 +epoch: 1, batch: 33720, sum loss: 5126.846680, avg loss: 2.830948, ppl: 16.961535 +epoch: 1, batch: 33721, sum loss: 4503.038086, avg loss: 2.554191, ppl: 12.860887 +epoch: 1, batch: 33722, sum loss: 4927.891602, avg loss: 2.639471, ppl: 14.005787 +epoch: 1, batch: 33723, sum loss: 4111.312500, avg loss: 2.622010, ppl: 13.763367 +epoch: 1, batch: 33724, sum loss: 4433.699707, avg loss: 2.558396, ppl: 12.915080 +epoch: 1, batch: 33725, sum loss: 5359.519531, avg loss: 2.855365, ppl: 17.380777 +epoch: 1, batch: 33726, sum loss: 4234.067871, avg loss: 2.450271, ppl: 11.591486 +epoch: 1, batch: 33727, sum loss: 5305.092285, avg loss: 2.843029, ppl: 17.167688 +epoch: 1, batch: 33728, sum loss: 4134.946289, avg loss: 2.718571, ppl: 15.158645 +epoch: 1, batch: 33729, sum loss: 5806.920410, avg loss: 2.964227, ppl: 19.379717 +epoch: 1, batch: 33730, sum loss: 4156.827148, avg loss: 2.586700, ppl: 13.285858 +epoch: 1, batch: 33731, sum loss: 4636.489746, avg loss: 2.643381, ppl: 14.060658 +epoch: 1, batch: 33732, sum loss: 4536.657227, avg loss: 2.558746, ppl: 12.919610 +epoch: 1, batch: 33733, sum loss: 5092.637695, avg loss: 2.864251, ppl: 17.535912 +epoch: 1, batch: 33734, sum loss: 4032.446289, avg loss: 2.418984, ppl: 11.234439 +epoch: 1, batch: 33735, sum loss: 5576.916504, avg loss: 2.918324, ppl: 18.510229 +epoch: 1, batch: 33736, sum loss: 5350.423340, avg loss: 2.876572, ppl: 17.753304 +epoch: 1, batch: 33737, sum loss: 4997.011719, avg loss: 2.597199, ppl: 13.426085 +epoch: 1, batch: 33738, sum loss: 4905.835449, avg loss: 2.790577, ppl: 16.290422 +epoch: 1, batch: 33739, sum loss: 4169.990234, avg loss: 2.524207, ppl: 12.480998 +epoch: 1, batch: 33740, sum loss: 5068.591309, avg loss: 2.720661, ppl: 15.190358 +epoch: 1, batch: 33741, sum loss: 4949.431641, avg loss: 2.716483, ppl: 15.127025 +epoch: 1, batch: 33742, sum loss: 4710.147461, avg loss: 2.652110, ppl: 14.183937 +epoch: 1, batch: 33743, sum loss: 4833.922363, avg loss: 2.812055, ppl: 16.644089 +epoch: 1, batch: 33744, sum loss: 4618.966797, avg loss: 2.749385, ppl: 15.633017 +epoch: 1, batch: 33745, sum loss: 4739.125977, avg loss: 2.609651, ppl: 13.594303 +epoch: 1, batch: 33746, sum loss: 3991.035156, avg loss: 2.578188, ppl: 13.173249 +epoch: 1, batch: 33747, sum loss: 4795.629883, avg loss: 2.534688, ppl: 12.612495 +epoch: 1, batch: 33748, sum loss: 3872.535645, avg loss: 2.402317, ppl: 11.048750 +epoch: 1, batch: 33749, sum loss: 4071.535889, avg loss: 2.682171, ppl: 14.616796 +epoch: 1, batch: 33750, sum loss: 5302.090820, avg loss: 2.702391, ppl: 14.915347 +epoch: 1, batch: 33751, sum loss: 4552.900879, avg loss: 2.673459, ppl: 14.490005 +epoch: 1, batch: 33752, sum loss: 4567.051270, avg loss: 2.774636, ppl: 16.032795 +epoch: 1, batch: 33753, sum loss: 3894.299316, avg loss: 2.251040, ppl: 9.497608 +epoch: 1, batch: 33754, sum loss: 3792.609131, avg loss: 2.525039, ppl: 12.491385 +epoch: 1, batch: 33755, sum loss: 4156.218262, avg loss: 2.672809, ppl: 14.480594 +epoch: 1, batch: 33756, sum loss: 5028.544922, avg loss: 2.815535, ppl: 16.702106 +epoch: 1, batch: 33757, sum loss: 3587.173828, avg loss: 2.171413, ppl: 8.770666 +epoch: 1, batch: 33758, sum loss: 4283.357910, avg loss: 2.618190, ppl: 13.710878 +epoch: 1, batch: 33759, sum loss: 4238.994629, avg loss: 2.392209, ppl: 10.937632 +epoch: 1, batch: 33760, sum loss: 4872.393555, avg loss: 2.587569, ppl: 13.297413 +epoch: 1, batch: 33761, sum loss: 4130.892090, avg loss: 2.466204, ppl: 11.777658 +epoch: 1, batch: 33762, sum loss: 4563.585449, avg loss: 2.615235, ppl: 13.670430 +epoch: 1, batch: 33763, sum loss: 4534.821777, avg loss: 2.766822, ppl: 15.908000 +epoch: 1, batch: 33764, sum loss: 4438.329102, avg loss: 2.587947, ppl: 13.302432 +epoch: 1, batch: 33765, sum loss: 4996.888672, avg loss: 2.837529, ppl: 17.073524 +epoch: 1, batch: 33766, sum loss: 4257.222656, avg loss: 2.367754, ppl: 10.673398 +epoch: 1, batch: 33767, sum loss: 4478.005859, avg loss: 2.695970, ppl: 14.819884 +epoch: 1, batch: 33768, sum loss: 5118.774414, avg loss: 2.936761, ppl: 18.854675 +epoch: 1, batch: 33769, sum loss: 5170.061035, avg loss: 3.002358, ppl: 20.132959 +epoch: 1, batch: 33770, sum loss: 3841.725830, avg loss: 2.433012, ppl: 11.393147 +epoch: 1, batch: 33771, sum loss: 4364.366699, avg loss: 2.579413, ppl: 13.189393 +epoch: 1, batch: 33772, sum loss: 4828.357910, avg loss: 2.642779, ppl: 14.052205 +epoch: 1, batch: 33773, sum loss: 5509.479980, avg loss: 3.040552, ppl: 20.916784 +epoch: 1, batch: 33774, sum loss: 4022.062012, avg loss: 2.349335, ppl: 10.478601 +epoch: 1, batch: 33775, sum loss: 4577.713867, avg loss: 2.747727, ppl: 15.607122 +epoch: 1, batch: 33776, sum loss: 4529.110352, avg loss: 2.670466, ppl: 14.446699 +epoch: 1, batch: 33777, sum loss: 4754.903320, avg loss: 2.855798, ppl: 17.388304 +epoch: 1, batch: 33778, sum loss: 4460.177246, avg loss: 2.727937, ppl: 15.301288 +epoch: 1, batch: 33779, sum loss: 3772.010498, avg loss: 2.686617, ppl: 14.681925 +epoch: 1, batch: 33780, sum loss: 4604.738281, avg loss: 2.683414, ppl: 14.634972 +epoch: 1, batch: 33781, sum loss: 3860.723877, avg loss: 2.575533, ppl: 13.138317 +epoch: 1, batch: 33782, sum loss: 4177.526367, avg loss: 2.525711, ppl: 12.499784 +epoch: 1, batch: 33783, sum loss: 5327.503418, avg loss: 2.817294, ppl: 16.731520 +epoch: 1, batch: 33784, sum loss: 4628.540039, avg loss: 2.827453, ppl: 16.902349 +epoch: 1, batch: 33785, sum loss: 3780.548096, avg loss: 2.630862, ppl: 13.885731 +epoch: 1, batch: 33786, sum loss: 4100.580078, avg loss: 2.588750, ppl: 13.313122 +epoch: 1, batch: 33787, sum loss: 4785.220215, avg loss: 2.652561, ppl: 14.190336 +epoch: 1, batch: 33788, sum loss: 3892.067871, avg loss: 2.725538, ppl: 15.264621 +epoch: 1, batch: 33789, sum loss: 4453.612793, avg loss: 2.513325, ppl: 12.345918 +epoch: 1, batch: 33790, sum loss: 4440.755859, avg loss: 2.709430, ppl: 15.020711 +epoch: 1, batch: 33791, sum loss: 4209.826172, avg loss: 2.451850, ppl: 11.609804 +epoch: 1, batch: 33792, sum loss: 4059.169189, avg loss: 2.445283, ppl: 11.533810 +epoch: 1, batch: 33793, sum loss: 5343.766602, avg loss: 2.988684, ppl: 19.859524 +epoch: 1, batch: 33794, sum loss: 4226.654297, avg loss: 2.668342, ppl: 14.416053 +epoch: 1, batch: 33795, sum loss: 3974.108398, avg loss: 2.621444, ppl: 13.755569 +epoch: 1, batch: 33796, sum loss: 4373.000977, avg loss: 2.684469, ppl: 14.650427 +epoch: 1, batch: 33797, sum loss: 4022.251953, avg loss: 2.460093, ppl: 11.705897 +epoch: 1, batch: 33798, sum loss: 3748.362061, avg loss: 2.685073, ppl: 14.659273 +epoch: 1, batch: 33799, sum loss: 3867.250488, avg loss: 2.550957, ppl: 12.819366 +epoch: 1, batch: 33800, sum loss: 4573.310059, avg loss: 2.623815, ppl: 13.788230 +epoch: 1, batch: 33801, sum loss: 4264.440430, avg loss: 2.587646, ppl: 13.298430 +epoch: 1, batch: 33802, sum loss: 4603.119629, avg loss: 2.594769, ppl: 13.393487 +epoch: 1, batch: 33803, sum loss: 4159.898438, avg loss: 2.647930, ppl: 14.124776 +epoch: 1, batch: 33804, sum loss: 4161.220215, avg loss: 2.484311, ppl: 11.992850 +epoch: 1, batch: 33805, sum loss: 3887.630615, avg loss: 2.747442, ppl: 15.602670 +epoch: 1, batch: 33806, sum loss: 3033.366699, avg loss: 2.248604, ppl: 9.474499 +epoch: 1, batch: 33807, sum loss: 4965.995117, avg loss: 2.707740, ppl: 14.995345 +epoch: 1, batch: 33808, sum loss: 4303.000000, avg loss: 2.513434, ppl: 12.347263 +epoch: 1, batch: 33809, sum loss: 3845.343262, avg loss: 2.510015, ppl: 12.305118 +epoch: 1, batch: 33810, sum loss: 5242.193359, avg loss: 2.824458, ppl: 16.851803 +epoch: 1, batch: 33811, sum loss: 5139.102051, avg loss: 2.687815, ppl: 14.699518 +epoch: 1, batch: 33812, sum loss: 3940.937012, avg loss: 2.596138, ppl: 13.411839 +epoch: 1, batch: 33813, sum loss: 4186.330078, avg loss: 2.422645, ppl: 11.275642 +epoch: 1, batch: 33814, sum loss: 4444.245605, avg loss: 2.654866, ppl: 14.223083 +epoch: 1, batch: 33815, sum loss: 4752.485840, avg loss: 2.871593, ppl: 17.665133 +epoch: 1, batch: 33816, sum loss: 4794.820801, avg loss: 2.668236, ppl: 14.414527 +epoch: 1, batch: 33817, sum loss: 4350.139160, avg loss: 2.695254, ppl: 14.809274 +epoch: 1, batch: 33818, sum loss: 5324.881348, avg loss: 2.649195, ppl: 14.142645 +epoch: 1, batch: 33819, sum loss: 4619.993652, avg loss: 2.664356, ppl: 14.358700 +epoch: 1, batch: 33820, sum loss: 4165.678223, avg loss: 2.361496, ppl: 10.606805 +epoch: 1, batch: 33821, sum loss: 4076.678711, avg loss: 2.525823, ppl: 12.501184 +epoch: 1, batch: 33822, sum loss: 5082.017090, avg loss: 2.710409, ppl: 15.035426 +epoch: 1, batch: 33823, sum loss: 4788.822754, avg loss: 2.685823, ppl: 14.670277 +epoch: 1, batch: 33824, sum loss: 4713.757324, avg loss: 2.591400, ppl: 13.348452 +epoch: 1, batch: 33825, sum loss: 4134.201172, avg loss: 2.712730, ppl: 15.070368 +epoch: 1, batch: 33826, sum loss: 4798.594727, avg loss: 2.680779, ppl: 14.596459 +epoch: 1, batch: 33827, sum loss: 4314.376953, avg loss: 2.673096, ppl: 14.484744 +epoch: 1, batch: 33828, sum loss: 3676.037598, avg loss: 2.445800, ppl: 11.539779 +epoch: 1, batch: 33829, sum loss: 4224.516602, avg loss: 2.448995, ppl: 11.576711 +epoch: 1, batch: 33830, sum loss: 4716.097168, avg loss: 2.687235, ppl: 14.690997 +epoch: 1, batch: 33831, sum loss: 4859.087891, avg loss: 2.890594, ppl: 18.003992 +epoch: 1, batch: 33832, sum loss: 4179.699219, avg loss: 2.670734, ppl: 14.450575 +epoch: 1, batch: 33833, sum loss: 5165.765625, avg loss: 3.020916, ppl: 20.510059 +epoch: 1, batch: 33834, sum loss: 3849.659180, avg loss: 2.374867, ppl: 10.749580 +epoch: 1, batch: 33835, sum loss: 4770.074219, avg loss: 2.774912, ppl: 16.037222 +epoch: 1, batch: 33836, sum loss: 4847.692871, avg loss: 2.623210, ppl: 13.779892 +epoch: 1, batch: 33837, sum loss: 4704.757812, avg loss: 2.650568, ppl: 14.162074 +epoch: 1, batch: 33838, sum loss: 3981.023682, avg loss: 2.306503, ppl: 10.039251 +epoch: 1, batch: 33839, sum loss: 4164.426270, avg loss: 2.368843, ppl: 10.685023 +epoch: 1, batch: 33840, sum loss: 5047.500488, avg loss: 3.018840, ppl: 20.467537 +epoch: 1, batch: 33841, sum loss: 3631.095215, avg loss: 2.551718, ppl: 12.829128 +epoch: 1, batch: 33842, sum loss: 3795.626953, avg loss: 2.439349, ppl: 11.465573 +epoch: 1, batch: 33843, sum loss: 4927.968750, avg loss: 2.799982, ppl: 16.444351 +epoch: 1, batch: 33844, sum loss: 4736.641602, avg loss: 2.606847, ppl: 13.556245 +epoch: 1, batch: 33845, sum loss: 4406.925293, avg loss: 2.455112, ppl: 11.647735 +epoch: 1, batch: 33846, sum loss: 4390.477051, avg loss: 2.652856, ppl: 14.194526 +epoch: 1, batch: 33847, sum loss: 3869.304932, avg loss: 2.353592, ppl: 10.523298 +epoch: 1, batch: 33848, sum loss: 4949.798340, avg loss: 2.972852, ppl: 19.547590 +epoch: 1, batch: 33849, sum loss: 4053.328613, avg loss: 2.599954, ppl: 13.463123 +epoch: 1, batch: 33850, sum loss: 3684.882324, avg loss: 2.290169, ppl: 9.876609 +epoch: 1, batch: 33851, sum loss: 4565.793457, avg loss: 2.897077, ppl: 18.121101 +epoch: 1, batch: 33852, sum loss: 4652.749023, avg loss: 2.613904, ppl: 13.652246 +epoch: 1, batch: 33853, sum loss: 3700.795898, avg loss: 2.597050, ppl: 13.424075 +epoch: 1, batch: 33854, sum loss: 3971.533203, avg loss: 2.725829, ppl: 15.269069 +epoch: 1, batch: 33855, sum loss: 4250.581055, avg loss: 2.509198, ppl: 12.295062 +epoch: 1, batch: 33856, sum loss: 4472.273926, avg loss: 2.479088, ppl: 11.930374 +epoch: 1, batch: 33857, sum loss: 4106.759766, avg loss: 2.459138, ppl: 11.694722 +epoch: 1, batch: 33858, sum loss: 4351.923340, avg loss: 2.544984, ppl: 12.743029 +epoch: 1, batch: 33859, sum loss: 4275.942871, avg loss: 2.615256, ppl: 13.670710 +epoch: 1, batch: 33860, sum loss: 4835.899414, avg loss: 2.800174, ppl: 16.447500 +epoch: 1, batch: 33861, sum loss: 5126.754883, avg loss: 2.714005, ppl: 15.089584 +epoch: 1, batch: 33862, sum loss: 4652.592773, avg loss: 2.663190, ppl: 14.341966 +epoch: 1, batch: 33863, sum loss: 5187.534668, avg loss: 2.902929, ppl: 18.227461 +epoch: 1, batch: 33864, sum loss: 4372.642090, avg loss: 2.481636, ppl: 11.960814 +epoch: 1, batch: 33865, sum loss: 4196.455566, avg loss: 2.544849, ppl: 12.741300 +epoch: 1, batch: 33866, sum loss: 3458.236816, avg loss: 2.371905, ppl: 10.717786 +epoch: 1, batch: 33867, sum loss: 4031.320801, avg loss: 2.529059, ppl: 12.541704 +epoch: 1, batch: 33868, sum loss: 4098.606934, avg loss: 2.765592, ppl: 15.888441 +epoch: 1, batch: 33869, sum loss: 5037.742676, avg loss: 2.671126, ppl: 14.456233 +epoch: 1, batch: 33870, sum loss: 5385.426758, avg loss: 2.692713, ppl: 14.771705 +epoch: 1, batch: 33871, sum loss: 4058.186523, avg loss: 2.680440, ppl: 14.591518 +epoch: 1, batch: 33872, sum loss: 5060.809570, avg loss: 2.768495, ppl: 15.934639 +epoch: 1, batch: 33873, sum loss: 3976.477539, avg loss: 2.609237, ppl: 13.588681 +epoch: 1, batch: 33874, sum loss: 5155.767578, avg loss: 2.646698, ppl: 14.107379 +epoch: 1, batch: 33875, sum loss: 4769.680664, avg loss: 2.664626, ppl: 14.362576 +epoch: 1, batch: 33876, sum loss: 5501.888672, avg loss: 2.888130, ppl: 17.959702 +epoch: 1, batch: 33877, sum loss: 4227.260254, avg loss: 2.627259, ppl: 13.835797 +epoch: 1, batch: 33878, sum loss: 4435.487305, avg loss: 2.530227, ppl: 12.556353 +epoch: 1, batch: 33879, sum loss: 4480.375000, avg loss: 2.591310, ppl: 13.347245 +epoch: 1, batch: 33880, sum loss: 4247.474609, avg loss: 2.634910, ppl: 13.942056 +epoch: 1, batch: 33881, sum loss: 4580.949219, avg loss: 2.635759, ppl: 13.953901 +epoch: 1, batch: 33882, sum loss: 4748.175781, avg loss: 2.875939, ppl: 17.742083 +epoch: 1, batch: 33883, sum loss: 4610.420898, avg loss: 2.666525, ppl: 14.389873 +epoch: 1, batch: 33884, sum loss: 5465.912109, avg loss: 3.036618, ppl: 20.834661 +epoch: 1, batch: 33885, sum loss: 4912.592773, avg loss: 2.632686, ppl: 13.911087 +epoch: 1, batch: 33886, sum loss: 4190.041016, avg loss: 2.638565, ppl: 13.993110 +epoch: 1, batch: 33887, sum loss: 5015.803223, avg loss: 2.669400, ppl: 14.431314 +epoch: 1, batch: 33888, sum loss: 4708.781250, avg loss: 2.560512, ppl: 12.942440 +epoch: 1, batch: 33889, sum loss: 5292.527832, avg loss: 2.652896, ppl: 14.195090 +epoch: 1, batch: 33890, sum loss: 4251.993652, avg loss: 2.440869, ppl: 11.483013 +epoch: 1, batch: 33891, sum loss: 4804.167969, avg loss: 2.556769, ppl: 12.894085 +epoch: 1, batch: 33892, sum loss: 3438.103760, avg loss: 2.466358, ppl: 11.779472 +epoch: 1, batch: 33893, sum loss: 4969.785645, avg loss: 2.754870, ppl: 15.719000 +epoch: 1, batch: 33894, sum loss: 4450.357910, avg loss: 2.769358, ppl: 15.948391 +epoch: 1, batch: 33895, sum loss: 4844.563965, avg loss: 2.729332, ppl: 15.322644 +epoch: 1, batch: 33896, sum loss: 4315.959961, avg loss: 2.457836, ppl: 11.679511 +epoch: 1, batch: 33897, sum loss: 5252.123535, avg loss: 2.783319, ppl: 16.172613 +epoch: 1, batch: 33898, sum loss: 4996.412598, avg loss: 2.745282, ppl: 15.568999 +epoch: 1, batch: 33899, sum loss: 5599.479004, avg loss: 3.016961, ppl: 20.429106 +epoch: 1, batch: 33900, sum loss: 4264.113770, avg loss: 2.568743, ppl: 13.049414 +epoch: 1, batch: 33901, sum loss: 4159.544922, avg loss: 2.542509, ppl: 12.711528 +epoch: 1, batch: 33902, sum loss: 3633.419678, avg loss: 2.276579, ppl: 9.743288 +epoch: 1, batch: 33903, sum loss: 4074.695068, avg loss: 2.548277, ppl: 12.785055 +epoch: 1, batch: 33904, sum loss: 4651.162109, avg loss: 2.701023, ppl: 14.894966 +epoch: 1, batch: 33905, sum loss: 4879.335449, avg loss: 2.549287, ppl: 12.797976 +epoch: 1, batch: 33906, sum loss: 4306.096680, avg loss: 2.578501, ppl: 13.177370 +epoch: 1, batch: 33907, sum loss: 4400.535156, avg loss: 2.728168, ppl: 15.304827 +epoch: 1, batch: 33908, sum loss: 4473.185059, avg loss: 2.465924, ppl: 11.774351 +epoch: 1, batch: 33909, sum loss: 5585.682617, avg loss: 2.791446, ppl: 16.304575 +epoch: 1, batch: 33910, sum loss: 3594.928223, avg loss: 2.380747, ppl: 10.812981 +epoch: 1, batch: 33911, sum loss: 3574.774414, avg loss: 2.325813, ppl: 10.234999 +epoch: 1, batch: 33912, sum loss: 3846.210449, avg loss: 2.647082, ppl: 14.112802 +epoch: 1, batch: 33913, sum loss: 5174.924805, avg loss: 2.758489, ppl: 15.775982 +epoch: 1, batch: 33914, sum loss: 4726.211914, avg loss: 2.759026, ppl: 15.784462 +epoch: 1, batch: 33915, sum loss: 5623.319336, avg loss: 3.018421, ppl: 20.458969 +epoch: 1, batch: 33916, sum loss: 4205.466309, avg loss: 2.649947, ppl: 14.153295 +epoch: 1, batch: 33917, sum loss: 4800.657715, avg loss: 2.833918, ppl: 17.011990 +epoch: 1, batch: 33918, sum loss: 4319.995117, avg loss: 2.557724, ppl: 12.906403 +epoch: 1, batch: 33919, sum loss: 4151.527344, avg loss: 2.519131, ppl: 12.417797 +epoch: 1, batch: 33920, sum loss: 5102.216309, avg loss: 2.957807, ppl: 19.255693 +epoch: 1, batch: 33921, sum loss: 5336.022949, avg loss: 2.711394, ppl: 15.050242 +epoch: 1, batch: 33922, sum loss: 4444.286621, avg loss: 2.512316, ppl: 12.333461 +epoch: 1, batch: 33923, sum loss: 4909.738281, avg loss: 2.712563, ppl: 15.067842 +epoch: 1, batch: 33924, sum loss: 4144.072754, avg loss: 2.813356, ppl: 16.665749 +epoch: 1, batch: 33925, sum loss: 4164.372070, avg loss: 2.622401, ppl: 13.768740 +epoch: 1, batch: 33926, sum loss: 4481.342285, avg loss: 2.499354, ppl: 12.174631 +epoch: 1, batch: 33927, sum loss: 4430.932617, avg loss: 2.772799, ppl: 16.003365 +epoch: 1, batch: 33928, sum loss: 4471.194824, avg loss: 2.682180, ppl: 14.616929 +epoch: 1, batch: 33929, sum loss: 4305.233398, avg loss: 2.471431, ppl: 11.839380 +epoch: 1, batch: 33930, sum loss: 4433.469727, avg loss: 2.637400, ppl: 13.976819 +epoch: 1, batch: 33931, sum loss: 3733.785156, avg loss: 2.638717, ppl: 13.995242 +epoch: 1, batch: 33932, sum loss: 5031.229492, avg loss: 2.657807, ppl: 14.264977 +epoch: 1, batch: 33933, sum loss: 4980.525879, avg loss: 2.720113, ppl: 15.182031 +epoch: 1, batch: 33934, sum loss: 4503.443359, avg loss: 2.525768, ppl: 12.500486 +epoch: 1, batch: 33935, sum loss: 4851.343750, avg loss: 2.801007, ppl: 16.461212 +epoch: 1, batch: 33936, sum loss: 4368.931152, avg loss: 2.659118, ppl: 14.283688 +epoch: 1, batch: 33937, sum loss: 3745.131348, avg loss: 2.430325, ppl: 11.362577 +epoch: 1, batch: 33938, sum loss: 4215.902344, avg loss: 2.573811, ppl: 13.115711 +epoch: 1, batch: 33939, sum loss: 4534.676270, avg loss: 2.633378, ppl: 13.920709 +epoch: 1, batch: 33940, sum loss: 4764.822266, avg loss: 2.856608, ppl: 17.402401 +epoch: 1, batch: 33941, sum loss: 5352.463867, avg loss: 2.973591, ppl: 19.562042 +epoch: 1, batch: 33942, sum loss: 4269.062012, avg loss: 2.556325, ppl: 12.888362 +epoch: 1, batch: 33943, sum loss: 4265.467285, avg loss: 2.732522, ppl: 15.371609 +epoch: 1, batch: 33944, sum loss: 4504.935547, avg loss: 2.499964, ppl: 12.182058 +epoch: 1, batch: 33945, sum loss: 4779.196289, avg loss: 2.926636, ppl: 18.664732 +epoch: 1, batch: 33946, sum loss: 4051.779541, avg loss: 2.433501, ppl: 11.398722 +epoch: 1, batch: 33947, sum loss: 5538.621582, avg loss: 2.804365, ppl: 16.516592 +epoch: 1, batch: 33948, sum loss: 6059.557129, avg loss: 2.935832, ppl: 18.837170 +epoch: 1, batch: 33949, sum loss: 4540.179688, avg loss: 2.594388, ppl: 13.388394 +epoch: 1, batch: 33950, sum loss: 4417.334473, avg loss: 2.563746, ppl: 12.984365 +epoch: 1, batch: 33951, sum loss: 4787.137207, avg loss: 2.665444, ppl: 14.374329 +epoch: 1, batch: 33952, sum loss: 3997.694824, avg loss: 2.562625, ppl: 12.969817 +epoch: 1, batch: 33953, sum loss: 3828.265381, avg loss: 2.386699, ppl: 10.877530 +epoch: 1, batch: 33954, sum loss: 3849.608398, avg loss: 2.388094, ppl: 10.892717 +epoch: 1, batch: 33955, sum loss: 4621.763672, avg loss: 2.739635, ppl: 15.481330 +epoch: 1, batch: 33956, sum loss: 4180.107910, avg loss: 2.463234, ppl: 11.742725 +epoch: 1, batch: 33957, sum loss: 4067.573975, avg loss: 2.674276, ppl: 14.501848 +epoch: 1, batch: 33958, sum loss: 3400.994141, avg loss: 2.471653, ppl: 11.842003 +epoch: 1, batch: 33959, sum loss: 5017.137695, avg loss: 2.774966, ppl: 16.038074 +epoch: 1, batch: 33960, sum loss: 3963.816895, avg loss: 2.494535, ppl: 12.116103 +epoch: 1, batch: 33961, sum loss: 5068.470703, avg loss: 2.630239, ppl: 13.877090 +epoch: 1, batch: 33962, sum loss: 5884.331055, avg loss: 2.844046, ppl: 17.185154 +epoch: 1, batch: 33963, sum loss: 3214.467285, avg loss: 2.389939, ppl: 10.912824 +epoch: 1, batch: 33964, sum loss: 4647.971680, avg loss: 2.742166, ppl: 15.520571 +epoch: 1, batch: 33965, sum loss: 3918.197510, avg loss: 2.351859, ppl: 10.505081 +epoch: 1, batch: 33966, sum loss: 4641.670410, avg loss: 2.683046, ppl: 14.629592 +epoch: 1, batch: 33967, sum loss: 4659.631836, avg loss: 2.613366, ppl: 13.644907 +epoch: 1, batch: 33968, sum loss: 4507.393066, avg loss: 2.463056, ppl: 11.740640 +epoch: 1, batch: 33969, sum loss: 4693.070312, avg loss: 2.536795, ppl: 12.639096 +epoch: 1, batch: 33970, sum loss: 4623.228027, avg loss: 2.541632, ppl: 12.700377 +epoch: 1, batch: 33971, sum loss: 4362.623047, avg loss: 2.490082, ppl: 12.062263 +epoch: 1, batch: 33972, sum loss: 4560.337891, avg loss: 2.701622, ppl: 14.903887 +epoch: 1, batch: 33973, sum loss: 4445.382812, avg loss: 2.553350, ppl: 12.850082 +epoch: 1, batch: 33974, sum loss: 4134.845703, avg loss: 2.731074, ppl: 15.349361 +epoch: 1, batch: 33975, sum loss: 4741.789062, avg loss: 2.637257, ppl: 13.974823 +epoch: 1, batch: 33976, sum loss: 3343.814941, avg loss: 2.280911, ppl: 9.785588 +epoch: 1, batch: 33977, sum loss: 3920.352539, avg loss: 2.603156, ppl: 13.506294 +epoch: 1, batch: 33978, sum loss: 4261.791016, avg loss: 2.605007, ppl: 13.531319 +epoch: 1, batch: 33979, sum loss: 4721.336914, avg loss: 2.674978, ppl: 14.512037 +epoch: 1, batch: 33980, sum loss: 3971.494873, avg loss: 2.466767, ppl: 11.784287 +epoch: 1, batch: 33981, sum loss: 4318.449219, avg loss: 2.677278, ppl: 14.545444 +epoch: 1, batch: 33982, sum loss: 3631.785156, avg loss: 2.381499, ppl: 10.821107 +epoch: 1, batch: 33983, sum loss: 4426.200195, avg loss: 2.506342, ppl: 12.260003 +epoch: 1, batch: 33984, sum loss: 5186.144043, avg loss: 2.725246, ppl: 15.260174 +epoch: 1, batch: 33985, sum loss: 4791.980957, avg loss: 2.891962, ppl: 18.028645 +epoch: 1, batch: 33986, sum loss: 4051.431152, avg loss: 2.430372, ppl: 11.363113 +epoch: 1, batch: 33987, sum loss: 5009.197266, avg loss: 2.823674, ppl: 16.838598 +epoch: 1, batch: 33988, sum loss: 4606.793945, avg loss: 2.649105, ppl: 14.141381 +epoch: 1, batch: 33989, sum loss: 4270.524902, avg loss: 2.603979, ppl: 13.517412 +epoch: 1, batch: 33990, sum loss: 4207.352051, avg loss: 2.661197, ppl: 14.313408 +epoch: 1, batch: 33991, sum loss: 4427.375488, avg loss: 2.593659, ppl: 13.378631 +epoch: 1, batch: 33992, sum loss: 4068.505127, avg loss: 2.489905, ppl: 12.060135 +epoch: 1, batch: 33993, sum loss: 5784.087402, avg loss: 2.820130, ppl: 16.779037 +epoch: 1, batch: 33994, sum loss: 4853.694824, avg loss: 2.704008, ppl: 14.939494 +epoch: 1, batch: 33995, sum loss: 3699.580078, avg loss: 2.486277, ppl: 12.016454 +epoch: 1, batch: 33996, sum loss: 3592.782471, avg loss: 2.494988, ppl: 12.121588 +epoch: 1, batch: 33997, sum loss: 4151.256836, avg loss: 2.720352, ppl: 15.185666 +epoch: 1, batch: 33998, sum loss: 3768.560547, avg loss: 2.450300, ppl: 11.591818 +epoch: 1, batch: 33999, sum loss: 4381.489746, avg loss: 2.415375, ppl: 11.193965 +epoch: 1, batch: 34000, sum loss: 5003.843262, avg loss: 2.598050, ppl: 13.437505 +epoch: 1, batch: 34001, sum loss: 4067.757324, avg loss: 2.542348, ppl: 12.709483 +epoch: 1, batch: 34002, sum loss: 4968.055664, avg loss: 2.719242, ppl: 15.168825 +epoch: 1, batch: 34003, sum loss: 3856.357422, avg loss: 2.362964, ppl: 10.622392 +epoch: 1, batch: 34004, sum loss: 4049.456543, avg loss: 2.411826, ppl: 11.154314 +epoch: 1, batch: 34005, sum loss: 4410.303711, avg loss: 2.640900, ppl: 14.025826 +epoch: 1, batch: 34006, sum loss: 5121.843262, avg loss: 2.737490, ppl: 15.448160 +epoch: 1, batch: 34007, sum loss: 4802.207520, avg loss: 2.595788, ppl: 13.407148 +epoch: 1, batch: 34008, sum loss: 3825.104248, avg loss: 2.451990, ppl: 11.611429 +epoch: 1, batch: 34009, sum loss: 4427.837891, avg loss: 2.520113, ppl: 12.429996 +epoch: 1, batch: 34010, sum loss: 4912.948242, avg loss: 2.920897, ppl: 18.557922 +epoch: 1, batch: 34011, sum loss: 3892.453613, avg loss: 2.454258, ppl: 11.637797 +epoch: 1, batch: 34012, sum loss: 4410.279297, avg loss: 2.717362, ppl: 15.140335 +epoch: 1, batch: 34013, sum loss: 5012.187988, avg loss: 2.740398, ppl: 15.493153 +epoch: 1, batch: 34014, sum loss: 4270.067871, avg loss: 2.505908, ppl: 12.254687 +epoch: 1, batch: 34015, sum loss: 3904.041504, avg loss: 2.472477, ppl: 11.851769 +epoch: 1, batch: 34016, sum loss: 4418.624023, avg loss: 2.512009, ppl: 12.329678 +epoch: 1, batch: 34017, sum loss: 4520.095703, avg loss: 2.605242, ppl: 13.534507 +epoch: 1, batch: 34018, sum loss: 5085.261230, avg loss: 2.661047, ppl: 14.311268 +epoch: 1, batch: 34019, sum loss: 4655.011719, avg loss: 2.520310, ppl: 12.432446 +epoch: 1, batch: 34020, sum loss: 5582.605469, avg loss: 2.835249, ppl: 17.034645 +epoch: 1, batch: 34021, sum loss: 3638.238281, avg loss: 2.596887, ppl: 13.421885 +epoch: 1, batch: 34022, sum loss: 3735.894531, avg loss: 2.243781, ppl: 9.428911 +epoch: 1, batch: 34023, sum loss: 4720.208984, avg loss: 2.809648, ppl: 16.604076 +epoch: 1, batch: 34024, sum loss: 5126.225098, avg loss: 2.925928, ppl: 18.651520 +epoch: 1, batch: 34025, sum loss: 4823.276855, avg loss: 2.594554, ppl: 13.390619 +epoch: 1, batch: 34026, sum loss: 4711.927734, avg loss: 2.653112, ppl: 14.198160 +epoch: 1, batch: 34027, sum loss: 5037.700684, avg loss: 2.634781, ppl: 13.940254 +epoch: 1, batch: 34028, sum loss: 3413.734863, avg loss: 2.342989, ppl: 10.412313 +epoch: 1, batch: 34029, sum loss: 3916.752441, avg loss: 2.614654, ppl: 13.662493 +epoch: 1, batch: 34030, sum loss: 3576.823242, avg loss: 2.466775, ppl: 11.784377 +epoch: 1, batch: 34031, sum loss: 4159.393066, avg loss: 2.562781, ppl: 12.971837 +epoch: 1, batch: 34032, sum loss: 3647.020752, avg loss: 2.486040, ppl: 12.013606 +epoch: 1, batch: 34033, sum loss: 4795.014648, avg loss: 2.773288, ppl: 16.011192 +epoch: 1, batch: 34034, sum loss: 4171.613770, avg loss: 2.354184, ppl: 10.529532 +epoch: 1, batch: 34035, sum loss: 4095.238037, avg loss: 2.529486, ppl: 12.547057 +epoch: 1, batch: 34036, sum loss: 3638.153564, avg loss: 2.446640, ppl: 11.549473 +epoch: 1, batch: 34037, sum loss: 5868.243164, avg loss: 3.083680, ppl: 21.838625 +epoch: 1, batch: 34038, sum loss: 6079.678711, avg loss: 3.026221, ppl: 20.619171 +epoch: 1, batch: 34039, sum loss: 4462.856445, avg loss: 2.643872, ppl: 14.067575 +epoch: 1, batch: 34040, sum loss: 4492.000000, avg loss: 2.855690, ppl: 17.386425 +epoch: 1, batch: 34041, sum loss: 4988.950684, avg loss: 2.791802, ppl: 16.310390 +epoch: 1, batch: 34042, sum loss: 4083.707031, avg loss: 2.399358, ppl: 11.016100 +epoch: 1, batch: 34043, sum loss: 4366.742676, avg loss: 2.614816, ppl: 13.664701 +epoch: 1, batch: 34044, sum loss: 4799.264648, avg loss: 2.599818, ppl: 13.461291 +epoch: 1, batch: 34045, sum loss: 4286.172852, avg loss: 2.443656, ppl: 11.515062 +epoch: 1, batch: 34046, sum loss: 4558.571777, avg loss: 2.820898, ppl: 16.791927 +epoch: 1, batch: 34047, sum loss: 3736.756836, avg loss: 2.418613, ppl: 11.230271 +epoch: 1, batch: 34048, sum loss: 5431.251465, avg loss: 2.904413, ppl: 18.254517 +epoch: 1, batch: 34049, sum loss: 3846.824219, avg loss: 2.386367, ppl: 10.873920 +epoch: 1, batch: 34050, sum loss: 5357.145508, avg loss: 2.809201, ppl: 16.596643 +epoch: 1, batch: 34051, sum loss: 4165.278320, avg loss: 2.697719, ppl: 14.845835 +epoch: 1, batch: 34052, sum loss: 3957.602295, avg loss: 2.645456, ppl: 14.089869 +epoch: 1, batch: 34053, sum loss: 3515.496094, avg loss: 2.482695, ppl: 11.973491 +epoch: 1, batch: 34054, sum loss: 3952.061523, avg loss: 2.681181, ppl: 14.602335 +epoch: 1, batch: 34055, sum loss: 4177.452637, avg loss: 2.350846, ppl: 10.494440 +epoch: 1, batch: 34056, sum loss: 5051.119141, avg loss: 2.529354, ppl: 12.545394 +epoch: 1, batch: 34057, sum loss: 4972.580078, avg loss: 2.653458, ppl: 14.203066 +epoch: 1, batch: 34058, sum loss: 3606.727295, avg loss: 2.379108, ppl: 10.795266 +epoch: 1, batch: 34059, sum loss: 4538.280762, avg loss: 2.581502, ppl: 13.216974 +epoch: 1, batch: 34060, sum loss: 5707.650391, avg loss: 3.008777, ppl: 20.262606 +epoch: 1, batch: 34061, sum loss: 4413.302734, avg loss: 2.564383, ppl: 12.992637 +epoch: 1, batch: 34062, sum loss: 5046.036133, avg loss: 2.871961, ppl: 17.671644 +epoch: 1, batch: 34063, sum loss: 4287.228027, avg loss: 2.529338, ppl: 12.545200 +epoch: 1, batch: 34064, sum loss: 3907.884766, avg loss: 2.341453, ppl: 10.396330 +epoch: 1, batch: 34065, sum loss: 4863.913086, avg loss: 2.715753, ppl: 15.115982 +epoch: 1, batch: 34066, sum loss: 4296.939453, avg loss: 2.726484, ppl: 15.279076 +epoch: 1, batch: 34067, sum loss: 5152.231934, avg loss: 2.914158, ppl: 18.433290 +epoch: 1, batch: 34068, sum loss: 3816.756104, avg loss: 2.263794, ppl: 9.619514 +epoch: 1, batch: 34069, sum loss: 3632.657227, avg loss: 2.343650, ppl: 10.419196 +epoch: 1, batch: 34070, sum loss: 4770.347656, avg loss: 2.721248, ppl: 15.199282 +epoch: 1, batch: 34071, sum loss: 3562.362061, avg loss: 2.438304, ppl: 11.453598 +epoch: 1, batch: 34072, sum loss: 4048.596191, avg loss: 2.630667, ppl: 13.883023 +epoch: 1, batch: 34073, sum loss: 5145.853027, avg loss: 2.825839, ppl: 16.875097 +epoch: 1, batch: 34074, sum loss: 4423.094238, avg loss: 2.792357, ppl: 16.319447 +epoch: 1, batch: 34075, sum loss: 3842.100098, avg loss: 2.308954, ppl: 10.063897 +epoch: 1, batch: 34076, sum loss: 3335.182129, avg loss: 2.380573, ppl: 10.811091 +epoch: 1, batch: 34077, sum loss: 5125.534668, avg loss: 2.701916, ppl: 14.908268 +epoch: 1, batch: 34078, sum loss: 3511.700195, avg loss: 2.299738, ppl: 9.971571 +epoch: 1, batch: 34079, sum loss: 4802.432617, avg loss: 2.690439, ppl: 14.738137 +epoch: 1, batch: 34080, sum loss: 4199.846680, avg loss: 2.461809, ppl: 11.726007 +epoch: 1, batch: 34081, sum loss: 5423.248047, avg loss: 2.933071, ppl: 18.785229 +epoch: 1, batch: 34082, sum loss: 5006.221680, avg loss: 2.773530, ppl: 16.015068 +epoch: 1, batch: 34083, sum loss: 4678.304688, avg loss: 2.620899, ppl: 13.748080 +epoch: 1, batch: 34084, sum loss: 3592.470947, avg loss: 2.298446, ppl: 9.958694 +epoch: 1, batch: 34085, sum loss: 5483.211914, avg loss: 2.803278, ppl: 16.498640 +epoch: 1, batch: 34086, sum loss: 3464.777588, avg loss: 2.478382, ppl: 11.921955 +epoch: 1, batch: 34087, sum loss: 4641.354492, avg loss: 2.797682, ppl: 16.406572 +epoch: 1, batch: 34088, sum loss: 4702.130859, avg loss: 2.602175, ppl: 13.493056 +epoch: 1, batch: 34089, sum loss: 4285.431152, avg loss: 2.429383, ppl: 11.351873 +epoch: 1, batch: 34090, sum loss: 4047.592041, avg loss: 2.664643, ppl: 14.362815 +epoch: 1, batch: 34091, sum loss: 3754.935547, avg loss: 2.430379, ppl: 11.363190 +epoch: 1, batch: 34092, sum loss: 3967.138184, avg loss: 2.477913, ppl: 11.916365 +epoch: 1, batch: 34093, sum loss: 3669.326172, avg loss: 2.272029, ppl: 9.699058 +epoch: 1, batch: 34094, sum loss: 3276.295410, avg loss: 2.465234, ppl: 11.766232 +epoch: 1, batch: 34095, sum loss: 5017.848145, avg loss: 2.683341, ppl: 14.633907 +epoch: 1, batch: 34096, sum loss: 4424.567871, avg loss: 2.342281, ppl: 10.404940 +epoch: 1, batch: 34097, sum loss: 5241.457520, avg loss: 2.664696, ppl: 14.363585 +epoch: 1, batch: 34098, sum loss: 3644.465332, avg loss: 2.441035, ppl: 11.484921 +epoch: 1, batch: 34099, sum loss: 4217.943359, avg loss: 2.703810, ppl: 14.936531 +epoch: 1, batch: 34100, sum loss: 4097.043945, avg loss: 2.431480, ppl: 11.375708 +epoch: 1, batch: 34101, sum loss: 4774.440430, avg loss: 2.512864, ppl: 12.340218 +epoch: 1, batch: 34102, sum loss: 4115.620117, avg loss: 2.651817, ppl: 14.179785 +epoch: 1, batch: 34103, sum loss: 4007.444824, avg loss: 2.269221, ppl: 9.671866 +epoch: 1, batch: 34104, sum loss: 4303.862305, avg loss: 2.552706, ppl: 12.841806 +epoch: 1, batch: 34105, sum loss: 4922.968262, avg loss: 2.832548, ppl: 16.988691 +epoch: 1, batch: 34106, sum loss: 4255.089355, avg loss: 2.469582, ppl: 11.817504 +epoch: 1, batch: 34107, sum loss: 5371.260742, avg loss: 2.844947, ppl: 17.200653 +epoch: 1, batch: 34108, sum loss: 4511.985352, avg loss: 2.669814, ppl: 14.437282 +epoch: 1, batch: 34109, sum loss: 5292.547852, avg loss: 2.762290, ppl: 15.836070 +epoch: 1, batch: 34110, sum loss: 5274.823242, avg loss: 2.828323, ppl: 16.917074 +epoch: 1, batch: 34111, sum loss: 4204.644531, avg loss: 2.493858, ppl: 12.107900 +epoch: 1, batch: 34112, sum loss: 4671.747559, avg loss: 2.620161, ppl: 13.737939 +epoch: 1, batch: 34113, sum loss: 3584.518555, avg loss: 2.544016, ppl: 12.730697 +epoch: 1, batch: 34114, sum loss: 4606.055664, avg loss: 2.740069, ppl: 15.488049 +epoch: 1, batch: 34115, sum loss: 4391.595215, avg loss: 2.529721, ppl: 12.550001 +epoch: 1, batch: 34116, sum loss: 4095.974854, avg loss: 2.461524, ppl: 11.722658 +epoch: 1, batch: 34117, sum loss: 3910.095215, avg loss: 2.457634, ppl: 11.677148 +epoch: 1, batch: 34118, sum loss: 5108.317383, avg loss: 2.978611, ppl: 19.660480 +epoch: 1, batch: 34119, sum loss: 4495.190430, avg loss: 2.737631, ppl: 15.450345 +epoch: 1, batch: 34120, sum loss: 4695.255859, avg loss: 2.837013, ppl: 17.064713 +epoch: 1, batch: 34121, sum loss: 3915.431641, avg loss: 2.410980, ppl: 11.144877 +epoch: 1, batch: 34122, sum loss: 5050.065430, avg loss: 2.843505, ppl: 17.175869 +epoch: 1, batch: 34123, sum loss: 4461.088379, avg loss: 2.906246, ppl: 18.288025 +epoch: 1, batch: 34124, sum loss: 5634.209961, avg loss: 2.623003, ppl: 13.777031 +epoch: 1, batch: 34125, sum loss: 4645.277344, avg loss: 2.660526, ppl: 14.303804 +epoch: 1, batch: 34126, sum loss: 4234.208008, avg loss: 2.546126, ppl: 12.757589 +epoch: 1, batch: 34127, sum loss: 4830.574219, avg loss: 2.860020, ppl: 17.461884 +epoch: 1, batch: 34128, sum loss: 3874.075195, avg loss: 2.182577, ppl: 8.869136 +epoch: 1, batch: 34129, sum loss: 4638.157227, avg loss: 2.691908, ppl: 14.759809 +epoch: 1, batch: 34130, sum loss: 4652.017578, avg loss: 2.590210, ppl: 13.332574 +epoch: 1, batch: 34131, sum loss: 4092.319336, avg loss: 2.619923, ppl: 13.734664 +epoch: 1, batch: 34132, sum loss: 4357.844238, avg loss: 2.425066, ppl: 11.302980 +epoch: 1, batch: 34133, sum loss: 2817.849365, avg loss: 1.977438, ppl: 7.224212 +epoch: 1, batch: 34134, sum loss: 4297.684570, avg loss: 2.581192, ppl: 13.212879 +epoch: 1, batch: 34135, sum loss: 4073.802002, avg loss: 2.338578, ppl: 10.366480 +epoch: 1, batch: 34136, sum loss: 3733.446289, avg loss: 2.631041, ppl: 13.888214 +epoch: 1, batch: 34137, sum loss: 4915.282715, avg loss: 2.628494, ppl: 13.852885 +epoch: 1, batch: 34138, sum loss: 4977.065430, avg loss: 2.780483, ppl: 16.126816 +epoch: 1, batch: 34139, sum loss: 4852.230957, avg loss: 2.869445, ppl: 17.627230 +epoch: 1, batch: 34140, sum loss: 4850.847168, avg loss: 2.506898, ppl: 12.266815 +epoch: 1, batch: 34141, sum loss: 4728.026855, avg loss: 2.844782, ppl: 17.197803 +epoch: 1, batch: 34142, sum loss: 4151.991699, avg loss: 2.486223, ppl: 12.015800 +epoch: 1, batch: 34143, sum loss: 4140.081543, avg loss: 2.318075, ppl: 10.156104 +epoch: 1, batch: 34144, sum loss: 3851.485596, avg loss: 2.507478, ppl: 12.273933 +epoch: 1, batch: 34145, sum loss: 4307.988770, avg loss: 2.579634, ppl: 13.192308 +epoch: 1, batch: 34146, sum loss: 4569.904785, avg loss: 2.696109, ppl: 14.821948 +epoch: 1, batch: 34147, sum loss: 4339.507324, avg loss: 2.687001, ppl: 14.687569 +epoch: 1, batch: 34148, sum loss: 3634.820557, avg loss: 2.301976, ppl: 9.993913 +epoch: 1, batch: 34149, sum loss: 4849.583496, avg loss: 2.553756, ppl: 12.855304 +epoch: 1, batch: 34150, sum loss: 3555.680420, avg loss: 2.420477, ppl: 11.251222 +epoch: 1, batch: 34151, sum loss: 4312.801758, avg loss: 2.404014, ppl: 11.067516 +epoch: 1, batch: 34152, sum loss: 5143.947754, avg loss: 2.736142, ppl: 15.427358 +epoch: 1, batch: 34153, sum loss: 4623.599121, avg loss: 2.781949, ppl: 16.150469 +epoch: 1, batch: 34154, sum loss: 4229.934570, avg loss: 2.665365, ppl: 14.373201 +epoch: 1, batch: 34155, sum loss: 4607.187988, avg loss: 2.541196, ppl: 12.694843 +epoch: 1, batch: 34156, sum loss: 3892.970703, avg loss: 2.633945, ppl: 13.928613 +epoch: 1, batch: 34157, sum loss: 4264.909668, avg loss: 2.531104, ppl: 12.567368 +epoch: 1, batch: 34158, sum loss: 5290.062500, avg loss: 2.629256, ppl: 13.863452 +epoch: 1, batch: 34159, sum loss: 4474.146973, avg loss: 2.653705, ppl: 14.206582 +epoch: 1, batch: 34160, sum loss: 3863.736328, avg loss: 2.411820, ppl: 11.154248 +epoch: 1, batch: 34161, sum loss: 4591.187988, avg loss: 2.649272, ppl: 14.143734 +epoch: 1, batch: 34162, sum loss: 5061.243164, avg loss: 2.816496, ppl: 16.718170 +epoch: 1, batch: 34163, sum loss: 4634.354980, avg loss: 2.776726, ppl: 16.066330 +epoch: 1, batch: 34164, sum loss: 4353.334961, avg loss: 2.632004, ppl: 13.901605 +epoch: 1, batch: 34165, sum loss: 4757.264648, avg loss: 2.741939, ppl: 15.517048 +epoch: 1, batch: 34166, sum loss: 3797.575928, avg loss: 2.540184, ppl: 12.682011 +epoch: 1, batch: 34167, sum loss: 5537.163086, avg loss: 2.946867, ppl: 19.046192 +epoch: 1, batch: 34168, sum loss: 3846.852051, avg loss: 2.348505, ppl: 10.469911 +epoch: 1, batch: 34169, sum loss: 4745.616211, avg loss: 2.953090, ppl: 19.165091 +epoch: 1, batch: 34170, sum loss: 3968.687012, avg loss: 2.496030, ppl: 12.134221 +epoch: 1, batch: 34171, sum loss: 4886.246582, avg loss: 2.525192, ppl: 12.493294 +epoch: 1, batch: 34172, sum loss: 5504.630859, avg loss: 2.814228, ppl: 16.680298 +epoch: 1, batch: 34173, sum loss: 4965.558105, avg loss: 2.802234, ppl: 16.481421 +epoch: 1, batch: 34174, sum loss: 3853.470703, avg loss: 2.596678, ppl: 13.419089 +epoch: 1, batch: 34175, sum loss: 5013.907715, avg loss: 2.684105, ppl: 14.645083 +epoch: 1, batch: 34176, sum loss: 3950.812744, avg loss: 2.367174, ppl: 10.667201 +epoch: 1, batch: 34177, sum loss: 4595.664551, avg loss: 2.698570, ppl: 14.858469 +epoch: 1, batch: 34178, sum loss: 4202.036621, avg loss: 2.543606, ppl: 12.725474 +epoch: 1, batch: 34179, sum loss: 4492.808594, avg loss: 2.633534, ppl: 13.922883 +epoch: 1, batch: 34180, sum loss: 4165.657227, avg loss: 2.413475, ppl: 11.172714 +epoch: 1, batch: 34181, sum loss: 5361.343262, avg loss: 2.747998, ppl: 15.611343 +epoch: 1, batch: 34182, sum loss: 4493.982422, avg loss: 2.691007, ppl: 14.746524 +epoch: 1, batch: 34183, sum loss: 5066.371094, avg loss: 2.697748, ppl: 14.846263 +epoch: 1, batch: 34184, sum loss: 4576.943359, avg loss: 2.458079, ppl: 11.682349 +epoch: 1, batch: 34185, sum loss: 4830.737305, avg loss: 2.860117, ppl: 17.463570 +epoch: 1, batch: 34186, sum loss: 4402.859375, avg loss: 2.630143, ppl: 13.875756 +epoch: 1, batch: 34187, sum loss: 4226.162598, avg loss: 2.580075, ppl: 13.198125 +epoch: 1, batch: 34188, sum loss: 4025.189209, avg loss: 2.615458, ppl: 13.673471 +epoch: 1, batch: 34189, sum loss: 4938.074219, avg loss: 2.805724, ppl: 16.539043 +epoch: 1, batch: 34190, sum loss: 4453.735352, avg loss: 2.644736, ppl: 14.079728 +epoch: 1, batch: 34191, sum loss: 4399.407715, avg loss: 2.747912, ppl: 15.610010 +epoch: 1, batch: 34192, sum loss: 5100.231445, avg loss: 2.674479, ppl: 14.504791 +epoch: 1, batch: 34193, sum loss: 4306.012207, avg loss: 2.467629, ppl: 11.794446 +epoch: 1, batch: 34194, sum loss: 4607.404785, avg loss: 2.527375, ppl: 12.520599 +epoch: 1, batch: 34195, sum loss: 4407.841797, avg loss: 2.595902, ppl: 13.408679 +epoch: 1, batch: 34196, sum loss: 4523.611816, avg loss: 2.725067, ppl: 15.257442 +epoch: 1, batch: 34197, sum loss: 4555.744141, avg loss: 2.547955, ppl: 12.780944 +epoch: 1, batch: 34198, sum loss: 4308.742188, avg loss: 2.515319, ppl: 12.370559 +epoch: 1, batch: 34199, sum loss: 4285.622070, avg loss: 2.573947, ppl: 13.117499 +epoch: 1, batch: 34200, sum loss: 5756.705078, avg loss: 2.965845, ppl: 19.411097 +epoch: 1, batch: 34201, sum loss: 4974.406250, avg loss: 2.782106, ppl: 16.153009 +epoch: 1, batch: 34202, sum loss: 3655.711914, avg loss: 2.473418, ppl: 11.862927 +epoch: 1, batch: 34203, sum loss: 4086.736084, avg loss: 2.453023, ppl: 11.623428 +epoch: 1, batch: 34204, sum loss: 4183.895020, avg loss: 2.592252, ppl: 13.359827 +epoch: 1, batch: 34205, sum loss: 3964.618408, avg loss: 2.610019, ppl: 13.599305 +epoch: 1, batch: 34206, sum loss: 4165.739258, avg loss: 2.590634, ppl: 13.338223 +epoch: 1, batch: 34207, sum loss: 4091.697510, avg loss: 2.490382, ppl: 12.065881 +epoch: 1, batch: 34208, sum loss: 4486.878418, avg loss: 2.451846, ppl: 11.609760 +epoch: 1, batch: 34209, sum loss: 4555.281738, avg loss: 2.701828, ppl: 14.906957 +epoch: 1, batch: 34210, sum loss: 3468.554932, avg loss: 2.304688, ppl: 10.021049 +epoch: 1, batch: 34211, sum loss: 4269.848145, avg loss: 2.479587, ppl: 11.936329 +epoch: 1, batch: 34212, sum loss: 4492.915039, avg loss: 2.577691, ppl: 13.166699 +epoch: 1, batch: 34213, sum loss: 4474.337891, avg loss: 2.688905, ppl: 14.715553 +epoch: 1, batch: 34214, sum loss: 4560.194336, avg loss: 2.866244, ppl: 17.570900 +epoch: 1, batch: 34215, sum loss: 4687.457031, avg loss: 2.620155, ppl: 13.737851 +epoch: 1, batch: 34216, sum loss: 4636.444336, avg loss: 2.459652, ppl: 11.700741 +epoch: 1, batch: 34217, sum loss: 3811.405762, avg loss: 2.289133, ppl: 9.866376 +epoch: 1, batch: 34218, sum loss: 5230.471680, avg loss: 2.729891, ppl: 15.331221 +epoch: 1, batch: 34219, sum loss: 4964.055176, avg loss: 2.921751, ppl: 18.573786 +epoch: 1, batch: 34220, sum loss: 4508.145020, avg loss: 2.592378, ppl: 13.361506 +epoch: 1, batch: 34221, sum loss: 3796.922607, avg loss: 2.532970, ppl: 12.590851 +epoch: 1, batch: 34222, sum loss: 4696.868164, avg loss: 2.708690, ppl: 15.009596 +epoch: 1, batch: 34223, sum loss: 4758.726562, avg loss: 2.753893, ppl: 15.703642 +epoch: 1, batch: 34224, sum loss: 4431.376953, avg loss: 2.407049, ppl: 11.101152 +epoch: 1, batch: 34225, sum loss: 4624.644531, avg loss: 2.769248, ppl: 15.946642 +epoch: 1, batch: 34226, sum loss: 4049.274414, avg loss: 2.521341, ppl: 12.445278 +epoch: 1, batch: 34227, sum loss: 4823.317383, avg loss: 2.778409, ppl: 16.093388 +epoch: 1, batch: 34228, sum loss: 4015.780518, avg loss: 2.567635, ppl: 13.034955 +epoch: 1, batch: 34229, sum loss: 4095.838135, avg loss: 2.515871, ppl: 12.377385 +epoch: 1, batch: 34230, sum loss: 4101.592773, avg loss: 2.405626, ppl: 11.085371 +epoch: 1, batch: 34231, sum loss: 4929.751953, avg loss: 2.722116, ppl: 15.212481 +epoch: 1, batch: 34232, sum loss: 4151.347656, avg loss: 2.702700, ppl: 14.919967 +epoch: 1, batch: 34233, sum loss: 4665.142578, avg loss: 2.758807, ppl: 15.781008 +epoch: 1, batch: 34234, sum loss: 4973.417969, avg loss: 2.672444, ppl: 14.475302 +epoch: 1, batch: 34235, sum loss: 4474.645996, avg loss: 2.660313, ppl: 14.300759 +epoch: 1, batch: 34236, sum loss: 4498.654297, avg loss: 2.579504, ppl: 13.190588 +epoch: 1, batch: 34237, sum loss: 4334.510742, avg loss: 2.778533, ppl: 16.095385 +epoch: 1, batch: 34238, sum loss: 4151.513672, avg loss: 2.348141, ppl: 10.466100 +epoch: 1, batch: 34239, sum loss: 4769.504883, avg loss: 2.714573, ppl: 15.098161 +epoch: 1, batch: 34240, sum loss: 4149.440918, avg loss: 2.580498, ppl: 13.203712 +epoch: 1, batch: 34241, sum loss: 3283.570801, avg loss: 2.258302, ppl: 9.566829 +epoch: 1, batch: 34242, sum loss: 3650.679199, avg loss: 2.367496, ppl: 10.670642 +epoch: 1, batch: 34243, sum loss: 4260.409668, avg loss: 2.485653, ppl: 12.008962 +epoch: 1, batch: 34244, sum loss: 4813.375977, avg loss: 2.669648, ppl: 14.434893 +epoch: 1, batch: 34245, sum loss: 4213.404785, avg loss: 2.542791, ppl: 12.715108 +epoch: 1, batch: 34246, sum loss: 4312.394531, avg loss: 2.741509, ppl: 15.510380 +epoch: 1, batch: 34247, sum loss: 4797.583984, avg loss: 2.710500, ppl: 15.036785 +epoch: 1, batch: 34248, sum loss: 4579.663086, avg loss: 2.552767, ppl: 12.842585 +epoch: 1, batch: 34249, sum loss: 5605.248535, avg loss: 2.861281, ppl: 17.483904 +epoch: 1, batch: 34250, sum loss: 3898.742676, avg loss: 2.481695, ppl: 11.961521 +epoch: 1, batch: 34251, sum loss: 5292.215820, avg loss: 2.622505, ppl: 13.770181 +epoch: 1, batch: 34252, sum loss: 5747.340820, avg loss: 2.771138, ppl: 15.976809 +epoch: 1, batch: 34253, sum loss: 3822.290283, avg loss: 2.781871, ppl: 16.149202 +epoch: 1, batch: 34254, sum loss: 4589.897461, avg loss: 2.706307, ppl: 14.973881 +epoch: 1, batch: 34255, sum loss: 4311.750977, avg loss: 2.755112, ppl: 15.722808 +epoch: 1, batch: 34256, sum loss: 4753.553711, avg loss: 2.952518, ppl: 19.154118 +epoch: 1, batch: 34257, sum loss: 3616.765137, avg loss: 2.473848, ppl: 11.868023 +epoch: 1, batch: 34258, sum loss: 4512.776367, avg loss: 2.684578, ppl: 14.652023 +epoch: 1, batch: 34259, sum loss: 4131.679688, avg loss: 2.610031, ppl: 13.599478 +epoch: 1, batch: 34260, sum loss: 4161.454590, avg loss: 2.394393, ppl: 10.961539 +epoch: 1, batch: 34261, sum loss: 3708.335938, avg loss: 2.404887, ppl: 11.077181 +epoch: 1, batch: 34262, sum loss: 4593.515625, avg loss: 2.679997, ppl: 14.585056 +epoch: 1, batch: 34263, sum loss: 3192.778320, avg loss: 2.103280, ppl: 8.192996 +epoch: 1, batch: 34264, sum loss: 4178.899902, avg loss: 2.661720, ppl: 14.320897 +epoch: 1, batch: 34265, sum loss: 4429.275879, avg loss: 2.495367, ppl: 12.126178 +epoch: 1, batch: 34266, sum loss: 4560.351562, avg loss: 2.763849, ppl: 15.860782 +epoch: 1, batch: 34267, sum loss: 5213.079590, avg loss: 2.756785, ppl: 15.749123 +epoch: 1, batch: 34268, sum loss: 4786.505371, avg loss: 2.668063, ppl: 14.412028 +epoch: 1, batch: 34269, sum loss: 4557.291992, avg loss: 2.711060, ppl: 15.045216 +epoch: 1, batch: 34270, sum loss: 3944.906982, avg loss: 2.441155, ppl: 11.486302 +epoch: 1, batch: 34271, sum loss: 3885.101074, avg loss: 2.379119, ppl: 10.795384 +epoch: 1, batch: 34272, sum loss: 4003.229248, avg loss: 2.511436, ppl: 12.322616 +epoch: 1, batch: 34273, sum loss: 4245.499023, avg loss: 2.296105, ppl: 9.935412 +epoch: 1, batch: 34274, sum loss: 3586.185547, avg loss: 2.463040, ppl: 11.740444 +epoch: 1, batch: 34275, sum loss: 4200.717285, avg loss: 2.586648, ppl: 13.285172 +epoch: 1, batch: 34276, sum loss: 4640.820312, avg loss: 2.674824, ppl: 14.509803 +epoch: 1, batch: 34277, sum loss: 4392.580078, avg loss: 2.830271, ppl: 16.950050 +epoch: 1, batch: 34278, sum loss: 4309.749023, avg loss: 2.455697, ppl: 11.654557 +epoch: 1, batch: 34279, sum loss: 4243.352539, avg loss: 2.491693, ppl: 12.081708 +epoch: 1, batch: 34280, sum loss: 4105.257324, avg loss: 2.535675, ppl: 12.624948 +epoch: 1, batch: 34281, sum loss: 4261.320312, avg loss: 2.527473, ppl: 12.521829 +epoch: 1, batch: 34282, sum loss: 4424.404297, avg loss: 2.699453, ppl: 14.871600 +epoch: 1, batch: 34283, sum loss: 4525.201172, avg loss: 2.640141, ppl: 14.015177 +epoch: 1, batch: 34284, sum loss: 4466.900879, avg loss: 2.646268, ppl: 14.101319 +epoch: 1, batch: 34285, sum loss: 4711.189453, avg loss: 2.819383, ppl: 16.766508 +epoch: 1, batch: 34286, sum loss: 4320.569824, avg loss: 2.637710, ppl: 13.981154 +epoch: 1, batch: 34287, sum loss: 4549.291016, avg loss: 2.632692, ppl: 13.911163 +epoch: 1, batch: 34288, sum loss: 4149.578125, avg loss: 2.738996, ppl: 15.471437 +epoch: 1, batch: 34289, sum loss: 4136.041016, avg loss: 2.431535, ppl: 11.376331 +epoch: 1, batch: 34290, sum loss: 4702.818848, avg loss: 2.618496, ppl: 13.715083 +epoch: 1, batch: 34291, sum loss: 5472.416992, avg loss: 2.909312, ppl: 18.344183 +epoch: 1, batch: 34292, sum loss: 4424.362305, avg loss: 2.753181, ppl: 15.692477 +epoch: 1, batch: 34293, sum loss: 4271.049805, avg loss: 2.571373, ppl: 13.083770 +epoch: 1, batch: 34294, sum loss: 4686.524414, avg loss: 2.704284, ppl: 14.943612 +epoch: 1, batch: 34295, sum loss: 4161.706543, avg loss: 2.400061, ppl: 11.023853 +epoch: 1, batch: 34296, sum loss: 4904.210449, avg loss: 2.703534, ppl: 14.932408 +epoch: 1, batch: 34297, sum loss: 3980.508057, avg loss: 2.390696, ppl: 10.921087 +epoch: 1, batch: 34298, sum loss: 4024.630859, avg loss: 2.521698, ppl: 12.449724 +epoch: 1, batch: 34299, sum loss: 4325.625488, avg loss: 2.514899, ppl: 12.365355 +epoch: 1, batch: 34300, sum loss: 3967.917969, avg loss: 2.533792, ppl: 12.601196 +epoch: 1, batch: 34301, sum loss: 4195.660156, avg loss: 2.550553, ppl: 12.814189 +epoch: 1, batch: 34302, sum loss: 4197.116699, avg loss: 2.543707, ppl: 12.726764 +epoch: 1, batch: 34303, sum loss: 4328.041504, avg loss: 2.532500, ppl: 12.584924 +epoch: 1, batch: 34304, sum loss: 4059.039062, avg loss: 2.384864, ppl: 10.857590 +epoch: 1, batch: 34305, sum loss: 4302.468262, avg loss: 2.539828, ppl: 12.677488 +epoch: 1, batch: 34306, sum loss: 4557.684082, avg loss: 2.732424, ppl: 15.370107 +epoch: 1, batch: 34307, sum loss: 5351.624023, avg loss: 2.852678, ppl: 17.334141 +epoch: 1, batch: 34308, sum loss: 4018.023926, avg loss: 2.439602, ppl: 11.468472 +epoch: 1, batch: 34309, sum loss: 5511.772949, avg loss: 2.911660, ppl: 18.387300 +epoch: 1, batch: 34310, sum loss: 4119.780273, avg loss: 2.557281, ppl: 12.900699 +epoch: 1, batch: 34311, sum loss: 5461.835938, avg loss: 3.066724, ppl: 21.471453 +epoch: 1, batch: 34312, sum loss: 5095.370117, avg loss: 2.857751, ppl: 17.422302 +epoch: 1, batch: 34313, sum loss: 4454.094727, avg loss: 2.493894, ppl: 12.108335 +epoch: 1, batch: 34314, sum loss: 4298.296875, avg loss: 2.679736, ppl: 14.581249 +epoch: 1, batch: 34315, sum loss: 3972.089111, avg loss: 2.582633, ppl: 13.231929 +epoch: 1, batch: 34316, sum loss: 4368.612305, avg loss: 2.575833, ppl: 13.142258 +epoch: 1, batch: 34317, sum loss: 4275.183594, avg loss: 2.491366, ppl: 12.077759 +epoch: 1, batch: 34318, sum loss: 5348.831055, avg loss: 2.730389, ppl: 15.338847 +epoch: 1, batch: 34319, sum loss: 5256.111328, avg loss: 2.838073, ppl: 17.082815 +epoch: 1, batch: 34320, sum loss: 4345.937012, avg loss: 2.489082, ppl: 12.050210 +epoch: 1, batch: 34321, sum loss: 4467.754395, avg loss: 2.480708, ppl: 11.949718 +epoch: 1, batch: 34322, sum loss: 3334.629395, avg loss: 2.507240, ppl: 12.271016 +epoch: 1, batch: 34323, sum loss: 4759.381836, avg loss: 2.768692, ppl: 15.937778 +epoch: 1, batch: 34324, sum loss: 4882.711426, avg loss: 2.669607, ppl: 14.434298 +epoch: 1, batch: 34325, sum loss: 4114.249023, avg loss: 2.544372, ppl: 12.735226 +epoch: 1, batch: 34326, sum loss: 4051.737061, avg loss: 2.493376, ppl: 12.102070 +epoch: 1, batch: 34327, sum loss: 4136.041992, avg loss: 2.724665, ppl: 15.251310 +epoch: 1, batch: 34328, sum loss: 4564.619629, avg loss: 2.551492, ppl: 12.826229 +epoch: 1, batch: 34329, sum loss: 4765.989258, avg loss: 2.459231, ppl: 11.695813 +epoch: 1, batch: 34330, sum loss: 4578.002441, avg loss: 2.479958, ppl: 11.940763 +epoch: 1, batch: 34331, sum loss: 4239.230957, avg loss: 2.503976, ppl: 12.231027 +epoch: 1, batch: 34332, sum loss: 4329.734375, avg loss: 2.537945, ppl: 12.653642 +epoch: 1, batch: 34333, sum loss: 4572.425781, avg loss: 2.432141, ppl: 11.383231 +epoch: 1, batch: 34334, sum loss: 4495.674316, avg loss: 2.746289, ppl: 15.584682 +epoch: 1, batch: 34335, sum loss: 4129.160156, avg loss: 2.725518, ppl: 15.264322 +epoch: 1, batch: 34336, sum loss: 5802.206543, avg loss: 2.878079, ppl: 17.780079 +epoch: 1, batch: 34337, sum loss: 4367.027344, avg loss: 2.607181, ppl: 13.560763 +epoch: 1, batch: 34338, sum loss: 4268.685059, avg loss: 2.424012, ppl: 11.291068 +epoch: 1, batch: 34339, sum loss: 4422.270996, avg loss: 2.637013, ppl: 13.971408 +epoch: 1, batch: 34340, sum loss: 4794.750000, avg loss: 2.732051, ppl: 15.364369 +epoch: 1, batch: 34341, sum loss: 4833.782227, avg loss: 2.685435, ppl: 14.664573 +epoch: 1, batch: 34342, sum loss: 5020.254883, avg loss: 2.815623, ppl: 16.703571 +epoch: 1, batch: 34343, sum loss: 3657.067871, avg loss: 2.305844, ppl: 10.032639 +epoch: 1, batch: 34344, sum loss: 4843.756348, avg loss: 2.775792, ppl: 16.051329 +epoch: 1, batch: 34345, sum loss: 4188.661133, avg loss: 2.631068, ppl: 13.888601 +epoch: 1, batch: 34346, sum loss: 4440.665527, avg loss: 2.601444, ppl: 13.483200 +epoch: 1, batch: 34347, sum loss: 4509.600098, avg loss: 2.605199, ppl: 13.533923 +epoch: 1, batch: 34348, sum loss: 4189.221191, avg loss: 2.435594, ppl: 11.422597 +epoch: 1, batch: 34349, sum loss: 4401.424805, avg loss: 2.661079, ppl: 14.311726 +epoch: 1, batch: 34350, sum loss: 4735.368652, avg loss: 2.764372, ppl: 15.869065 +epoch: 1, batch: 34351, sum loss: 3880.056641, avg loss: 2.223528, ppl: 9.239873 +epoch: 1, batch: 34352, sum loss: 3772.928467, avg loss: 2.268748, ppl: 9.667293 +epoch: 1, batch: 34353, sum loss: 4569.939941, avg loss: 2.627913, ppl: 13.844839 +epoch: 1, batch: 34354, sum loss: 3700.208252, avg loss: 2.480032, ppl: 11.941652 +epoch: 1, batch: 34355, sum loss: 3790.602783, avg loss: 2.592752, ppl: 13.366502 +epoch: 1, batch: 34356, sum loss: 4000.174805, avg loss: 2.449586, ppl: 11.583554 +epoch: 1, batch: 34357, sum loss: 4731.872070, avg loss: 2.504961, ppl: 12.243088 +epoch: 1, batch: 34358, sum loss: 4949.821289, avg loss: 2.621727, ppl: 13.759472 +epoch: 1, batch: 34359, sum loss: 3827.398682, avg loss: 2.427012, ppl: 11.324997 +epoch: 1, batch: 34360, sum loss: 5035.257812, avg loss: 2.806721, ppl: 16.555548 +epoch: 1, batch: 34361, sum loss: 3192.381836, avg loss: 2.246574, ppl: 9.455288 +epoch: 1, batch: 34362, sum loss: 4479.215820, avg loss: 2.713032, ppl: 15.074917 +epoch: 1, batch: 34363, sum loss: 4486.617676, avg loss: 2.812927, ppl: 16.658598 +epoch: 1, batch: 34364, sum loss: 4767.367188, avg loss: 2.661846, ppl: 14.322710 +epoch: 1, batch: 34365, sum loss: 3924.906738, avg loss: 2.484118, ppl: 11.990542 +epoch: 1, batch: 34366, sum loss: 4409.454590, avg loss: 2.537086, ppl: 12.642770 +epoch: 1, batch: 34367, sum loss: 5052.178711, avg loss: 2.698813, ppl: 14.862086 +epoch: 1, batch: 34368, sum loss: 4495.723145, avg loss: 2.773426, ppl: 16.013395 +epoch: 1, batch: 34369, sum loss: 4485.978027, avg loss: 2.687824, ppl: 14.699655 +epoch: 1, batch: 34370, sum loss: 4707.526855, avg loss: 2.606604, ppl: 13.552948 +epoch: 1, batch: 34371, sum loss: 5283.970215, avg loss: 2.927407, ppl: 18.679138 +epoch: 1, batch: 34372, sum loss: 3957.941406, avg loss: 2.659907, ppl: 14.294958 +epoch: 1, batch: 34373, sum loss: 3590.987061, avg loss: 2.536008, ppl: 12.629153 +epoch: 1, batch: 34374, sum loss: 4671.234863, avg loss: 2.704826, ppl: 14.951720 +epoch: 1, batch: 34375, sum loss: 4334.657715, avg loss: 2.638258, ppl: 13.988811 +epoch: 1, batch: 34376, sum loss: 4765.191406, avg loss: 2.596835, ppl: 13.421188 +epoch: 1, batch: 34377, sum loss: 5491.453613, avg loss: 2.669642, ppl: 14.434800 +epoch: 1, batch: 34378, sum loss: 4539.840820, avg loss: 2.736492, ppl: 15.432758 +epoch: 1, batch: 34379, sum loss: 4954.585938, avg loss: 2.653769, ppl: 14.207479 +epoch: 1, batch: 34380, sum loss: 4949.642090, avg loss: 2.718090, ppl: 15.151357 +epoch: 1, batch: 34381, sum loss: 4598.173340, avg loss: 2.484156, ppl: 11.991000 +epoch: 1, batch: 34382, sum loss: 4683.976562, avg loss: 2.424418, ppl: 11.295658 +epoch: 1, batch: 34383, sum loss: 4041.266602, avg loss: 2.528953, ppl: 12.540367 +epoch: 1, batch: 34384, sum loss: 4976.659180, avg loss: 2.563967, ppl: 12.987229 +epoch: 1, batch: 34385, sum loss: 5003.027344, avg loss: 2.839403, ppl: 17.105545 +epoch: 1, batch: 34386, sum loss: 4241.030273, avg loss: 2.357438, ppl: 10.563848 +epoch: 1, batch: 34387, sum loss: 4567.296387, avg loss: 2.741474, ppl: 15.509836 +epoch: 1, batch: 34388, sum loss: 4953.270020, avg loss: 2.881484, ppl: 17.840721 +epoch: 1, batch: 34389, sum loss: 3771.090332, avg loss: 2.322100, ppl: 10.197065 +epoch: 1, batch: 34390, sum loss: 4388.918457, avg loss: 2.746507, ppl: 15.588090 +epoch: 1, batch: 34391, sum loss: 4614.760254, avg loss: 2.605737, ppl: 13.541201 +epoch: 1, batch: 34392, sum loss: 4990.726074, avg loss: 2.567246, ppl: 13.029893 +epoch: 1, batch: 34393, sum loss: 4851.958496, avg loss: 2.766225, ppl: 15.898501 +epoch: 1, batch: 34394, sum loss: 3432.652344, avg loss: 2.243564, ppl: 9.426866 +epoch: 1, batch: 34395, sum loss: 3509.401855, avg loss: 2.488938, ppl: 12.048470 +epoch: 1, batch: 34396, sum loss: 4621.302734, avg loss: 2.724825, ppl: 15.253739 +epoch: 1, batch: 34397, sum loss: 5120.069336, avg loss: 2.530929, ppl: 12.565172 +epoch: 1, batch: 34398, sum loss: 3990.315918, avg loss: 2.701636, ppl: 14.904093 +epoch: 1, batch: 34399, sum loss: 4454.095215, avg loss: 2.609312, ppl: 13.589696 +epoch: 1, batch: 34400, sum loss: 4353.144043, avg loss: 2.770938, ppl: 15.973617 +epoch: 1, batch: 34401, sum loss: 3502.915039, avg loss: 2.312155, ppl: 10.096161 +epoch: 1, batch: 34402, sum loss: 4648.526367, avg loss: 2.624803, ppl: 13.801856 +epoch: 1, batch: 34403, sum loss: 4148.924805, avg loss: 2.496345, ppl: 12.138046 +epoch: 1, batch: 34404, sum loss: 4288.634766, avg loss: 2.645672, ppl: 14.092914 +epoch: 1, batch: 34405, sum loss: 5393.936035, avg loss: 2.640204, ppl: 14.016059 +epoch: 1, batch: 34406, sum loss: 3643.168945, avg loss: 2.624761, ppl: 13.801280 +epoch: 1, batch: 34407, sum loss: 4228.141602, avg loss: 2.493008, ppl: 12.097612 +epoch: 1, batch: 34408, sum loss: 4001.630127, avg loss: 2.532677, ppl: 12.587162 +epoch: 1, batch: 34409, sum loss: 4177.645996, avg loss: 2.448796, ppl: 11.574403 +epoch: 1, batch: 34410, sum loss: 4800.186523, avg loss: 2.649110, ppl: 14.141441 +epoch: 1, batch: 34411, sum loss: 5094.443359, avg loss: 2.780810, ppl: 16.132076 +epoch: 1, batch: 34412, sum loss: 4297.005371, avg loss: 2.403247, ppl: 11.059026 +epoch: 1, batch: 34413, sum loss: 4103.895996, avg loss: 2.610621, ppl: 13.607498 +epoch: 1, batch: 34414, sum loss: 3747.319580, avg loss: 2.249291, ppl: 9.481015 +epoch: 1, batch: 34415, sum loss: 3827.500977, avg loss: 2.453526, ppl: 11.629282 +epoch: 1, batch: 34416, sum loss: 4645.018066, avg loss: 2.630248, ppl: 13.877212 +epoch: 1, batch: 34417, sum loss: 5515.145508, avg loss: 2.646423, ppl: 14.103501 +epoch: 1, batch: 34418, sum loss: 5397.408691, avg loss: 2.714994, ppl: 15.104523 +epoch: 1, batch: 34419, sum loss: 5573.336426, avg loss: 2.884750, ppl: 17.899086 +epoch: 1, batch: 34420, sum loss: 5684.943359, avg loss: 2.965542, ppl: 19.405214 +epoch: 1, batch: 34421, sum loss: 4678.395020, avg loss: 2.803113, ppl: 16.495914 +epoch: 1, batch: 34422, sum loss: 4445.908691, avg loss: 2.763150, ppl: 15.849694 +epoch: 1, batch: 34423, sum loss: 3841.062500, avg loss: 2.481307, ppl: 11.956877 +epoch: 1, batch: 34424, sum loss: 4118.343750, avg loss: 2.554804, ppl: 12.868775 +epoch: 1, batch: 34425, sum loss: 4080.914307, avg loss: 2.449528, ppl: 11.582881 +epoch: 1, batch: 34426, sum loss: 4556.999512, avg loss: 2.714115, ppl: 15.091250 +epoch: 1, batch: 34427, sum loss: 3535.453125, avg loss: 2.282410, ppl: 9.800270 +epoch: 1, batch: 34428, sum loss: 4362.658203, avg loss: 2.593733, ppl: 13.379620 +epoch: 1, batch: 34429, sum loss: 4503.032227, avg loss: 2.613484, ppl: 13.646508 +epoch: 1, batch: 34430, sum loss: 3802.851318, avg loss: 2.496948, ppl: 12.145367 +epoch: 1, batch: 34431, sum loss: 4293.275391, avg loss: 2.668288, ppl: 14.415269 +epoch: 1, batch: 34432, sum loss: 4296.482422, avg loss: 2.652150, ppl: 14.184498 +epoch: 1, batch: 34433, sum loss: 5165.376465, avg loss: 2.718619, ppl: 15.159378 +epoch: 1, batch: 34434, sum loss: 5056.242188, avg loss: 2.702428, ppl: 14.915901 +epoch: 1, batch: 34435, sum loss: 4184.203125, avg loss: 2.456960, ppl: 11.669285 +epoch: 1, batch: 34436, sum loss: 3959.218262, avg loss: 2.577616, ppl: 13.165716 +epoch: 1, batch: 34437, sum loss: 3930.747559, avg loss: 2.559081, ppl: 12.923929 +epoch: 1, batch: 34438, sum loss: 3844.608643, avg loss: 2.241754, ppl: 9.409824 +epoch: 1, batch: 34439, sum loss: 3945.908936, avg loss: 2.375623, ppl: 10.757708 +epoch: 1, batch: 34440, sum loss: 5151.573242, avg loss: 2.798247, ppl: 16.415850 +epoch: 1, batch: 34441, sum loss: 4916.758789, avg loss: 2.880351, ppl: 17.820524 +epoch: 1, batch: 34442, sum loss: 4145.750488, avg loss: 2.543405, ppl: 12.722922 +epoch: 1, batch: 34443, sum loss: 3939.647461, avg loss: 2.509330, ppl: 12.296683 +epoch: 1, batch: 34444, sum loss: 5471.066406, avg loss: 2.846549, ppl: 17.228222 +epoch: 1, batch: 34445, sum loss: 4153.068359, avg loss: 2.421614, ppl: 11.264027 +epoch: 1, batch: 34446, sum loss: 4745.873047, avg loss: 2.754424, ppl: 15.711993 +epoch: 1, batch: 34447, sum loss: 4104.615723, avg loss: 2.581519, ppl: 13.217205 +epoch: 1, batch: 34448, sum loss: 4222.277832, avg loss: 2.426596, ppl: 11.320287 +epoch: 1, batch: 34449, sum loss: 5000.802734, avg loss: 2.726719, ppl: 15.282660 +epoch: 1, batch: 34450, sum loss: 5380.475586, avg loss: 2.984179, ppl: 19.770273 +epoch: 1, batch: 34451, sum loss: 4930.247070, avg loss: 2.742073, ppl: 15.519120 +epoch: 1, batch: 34452, sum loss: 3786.692139, avg loss: 2.457295, ppl: 11.673198 +epoch: 1, batch: 34453, sum loss: 3896.803223, avg loss: 2.374651, ppl: 10.747267 +epoch: 1, batch: 34454, sum loss: 5450.453125, avg loss: 3.007976, ppl: 20.246386 +epoch: 1, batch: 34455, sum loss: 4303.342773, avg loss: 2.760323, ppl: 15.804940 +epoch: 1, batch: 34456, sum loss: 4260.197266, avg loss: 2.647730, ppl: 14.121943 +epoch: 1, batch: 34457, sum loss: 3378.637207, avg loss: 2.306237, ppl: 10.036586 +epoch: 1, batch: 34458, sum loss: 4395.276855, avg loss: 2.628754, ppl: 13.856499 +epoch: 1, batch: 34459, sum loss: 4625.671387, avg loss: 2.569818, ppl: 13.063440 +epoch: 1, batch: 34460, sum loss: 4167.052734, avg loss: 2.385262, ppl: 10.861908 +epoch: 1, batch: 34461, sum loss: 4468.160645, avg loss: 2.578281, ppl: 13.174468 +epoch: 1, batch: 34462, sum loss: 4173.225098, avg loss: 2.504937, ppl: 12.242786 +epoch: 1, batch: 34463, sum loss: 4159.309570, avg loss: 2.676518, ppl: 14.534403 +epoch: 1, batch: 34464, sum loss: 3972.386719, avg loss: 2.669615, ppl: 14.434408 +epoch: 1, batch: 34465, sum loss: 4574.030762, avg loss: 2.434290, ppl: 11.407716 +epoch: 1, batch: 34466, sum loss: 4461.609375, avg loss: 2.785024, ppl: 16.200212 +epoch: 1, batch: 34467, sum loss: 5065.594238, avg loss: 2.643838, ppl: 14.067096 +epoch: 1, batch: 34468, sum loss: 4261.994141, avg loss: 2.532379, ppl: 12.583408 +epoch: 1, batch: 34469, sum loss: 4697.516602, avg loss: 2.789499, ppl: 16.272869 +epoch: 1, batch: 34470, sum loss: 5857.197266, avg loss: 2.925673, ppl: 18.646767 +epoch: 1, batch: 34471, sum loss: 4553.015625, avg loss: 2.726357, ppl: 15.277127 +epoch: 1, batch: 34472, sum loss: 4795.633301, avg loss: 2.686630, ppl: 14.682107 +epoch: 1, batch: 34473, sum loss: 4024.523438, avg loss: 2.626974, ppl: 13.831846 +epoch: 1, batch: 34474, sum loss: 3823.601807, avg loss: 2.348650, ppl: 10.471421 +epoch: 1, batch: 34475, sum loss: 4189.944336, avg loss: 2.477791, ppl: 11.914916 +epoch: 1, batch: 34476, sum loss: 3866.368408, avg loss: 2.605370, ppl: 13.536227 +epoch: 1, batch: 34477, sum loss: 4908.856445, avg loss: 2.814711, ppl: 16.688354 +epoch: 1, batch: 34478, sum loss: 3379.538574, avg loss: 2.371606, ppl: 10.714584 +epoch: 1, batch: 34479, sum loss: 4264.043457, avg loss: 2.768859, ppl: 15.940442 +epoch: 1, batch: 34480, sum loss: 4204.262695, avg loss: 2.369934, ppl: 10.696685 +epoch: 1, batch: 34481, sum loss: 5211.645020, avg loss: 2.761868, ppl: 15.829385 +epoch: 1, batch: 34482, sum loss: 5043.678711, avg loss: 2.657365, ppl: 14.258669 +epoch: 1, batch: 34483, sum loss: 4087.910645, avg loss: 2.587285, ppl: 13.293634 +epoch: 1, batch: 34484, sum loss: 4753.551758, avg loss: 2.742961, ppl: 15.532912 +epoch: 1, batch: 34485, sum loss: 4050.729492, avg loss: 2.664954, ppl: 14.367285 +epoch: 1, batch: 34486, sum loss: 4612.036133, avg loss: 2.614533, ppl: 13.660834 +epoch: 1, batch: 34487, sum loss: 4798.740234, avg loss: 2.579968, ppl: 13.196715 +epoch: 1, batch: 34488, sum loss: 3953.935547, avg loss: 2.560839, ppl: 12.946677 +epoch: 1, batch: 34489, sum loss: 3630.186768, avg loss: 2.484728, ppl: 11.997851 +epoch: 1, batch: 34490, sum loss: 3182.936279, avg loss: 2.149180, ppl: 8.577826 +epoch: 1, batch: 34491, sum loss: 5619.532227, avg loss: 2.797179, ppl: 16.398317 +epoch: 1, batch: 34492, sum loss: 3261.658203, avg loss: 2.448692, ppl: 11.573203 +epoch: 1, batch: 34493, sum loss: 3925.280518, avg loss: 2.545578, ppl: 12.750589 +epoch: 1, batch: 34494, sum loss: 4209.247070, avg loss: 2.794985, ppl: 16.362381 +epoch: 1, batch: 34495, sum loss: 4480.688965, avg loss: 2.730462, ppl: 15.339977 +epoch: 1, batch: 34496, sum loss: 4267.055176, avg loss: 2.673594, ppl: 14.491953 +epoch: 1, batch: 34497, sum loss: 4716.057129, avg loss: 2.749887, ppl: 15.640872 +epoch: 1, batch: 34498, sum loss: 5015.805664, avg loss: 2.877685, ppl: 17.773087 +epoch: 1, batch: 34499, sum loss: 3899.608643, avg loss: 2.480667, ppl: 11.949233 +epoch: 1, batch: 34500, sum loss: 4919.366699, avg loss: 2.791922, ppl: 16.312347 +epoch: 1, batch: 34501, sum loss: 3980.120605, avg loss: 2.387595, ppl: 10.887276 +epoch: 1, batch: 34502, sum loss: 5744.259766, avg loss: 2.951829, ppl: 19.140940 +epoch: 1, batch: 34503, sum loss: 4681.824707, avg loss: 2.642113, ppl: 14.042851 +epoch: 1, batch: 34504, sum loss: 4675.573730, avg loss: 2.539692, ppl: 12.675772 +epoch: 1, batch: 34505, sum loss: 4650.695801, avg loss: 2.624546, ppl: 13.798312 +epoch: 1, batch: 34506, sum loss: 5001.005859, avg loss: 2.964437, ppl: 19.383797 +epoch: 1, batch: 34507, sum loss: 3676.894531, avg loss: 2.398496, ppl: 11.006612 +epoch: 1, batch: 34508, sum loss: 5003.329590, avg loss: 2.713302, ppl: 15.078990 +epoch: 1, batch: 34509, sum loss: 4428.353516, avg loss: 2.659672, ppl: 14.291597 +epoch: 1, batch: 34510, sum loss: 5102.445801, avg loss: 2.864933, ppl: 17.547882 +epoch: 1, batch: 34511, sum loss: 4168.573730, avg loss: 2.658529, ppl: 14.275275 +epoch: 1, batch: 34512, sum loss: 4344.208008, avg loss: 2.562955, ppl: 12.974094 +epoch: 1, batch: 34513, sum loss: 4742.838867, avg loss: 2.422287, ppl: 11.271613 +epoch: 1, batch: 34514, sum loss: 3259.878906, avg loss: 2.259098, ppl: 9.574451 +epoch: 1, batch: 34515, sum loss: 5220.007812, avg loss: 2.824680, ppl: 16.855543 +epoch: 1, batch: 34516, sum loss: 4261.697266, avg loss: 2.545817, ppl: 12.753642 +epoch: 1, batch: 34517, sum loss: 4152.321289, avg loss: 2.461364, ppl: 11.720791 +epoch: 1, batch: 34518, sum loss: 4488.290039, avg loss: 2.792962, ppl: 16.329317 +epoch: 1, batch: 34519, sum loss: 3852.094727, avg loss: 2.361799, ppl: 10.610026 +epoch: 1, batch: 34520, sum loss: 5829.740234, avg loss: 2.957757, ppl: 19.254744 +epoch: 1, batch: 34521, sum loss: 3922.588623, avg loss: 2.479512, ppl: 11.935441 +epoch: 1, batch: 34522, sum loss: 4885.064941, avg loss: 2.704909, ppl: 14.952950 +epoch: 1, batch: 34523, sum loss: 3729.859375, avg loss: 2.544242, ppl: 12.733577 +epoch: 1, batch: 34524, sum loss: 4443.343750, avg loss: 2.683179, ppl: 14.631528 +epoch: 1, batch: 34525, sum loss: 3737.324951, avg loss: 2.362405, ppl: 10.616454 +epoch: 1, batch: 34526, sum loss: 4312.498047, avg loss: 2.418675, ppl: 11.230973 +epoch: 1, batch: 34527, sum loss: 4614.383789, avg loss: 2.801690, ppl: 16.472464 +epoch: 1, batch: 34528, sum loss: 4270.313477, avg loss: 2.491431, ppl: 12.078554 +epoch: 1, batch: 34529, sum loss: 5223.226562, avg loss: 2.855783, ppl: 17.388042 +epoch: 1, batch: 34530, sum loss: 4423.470703, avg loss: 2.679268, ppl: 14.574416 +epoch: 1, batch: 34531, sum loss: 3852.668945, avg loss: 2.391477, ppl: 10.929624 +epoch: 1, batch: 34532, sum loss: 3970.081543, avg loss: 2.262155, ppl: 9.603759 +epoch: 1, batch: 34533, sum loss: 3941.162598, avg loss: 2.428319, ppl: 11.339809 +epoch: 1, batch: 34534, sum loss: 4694.050781, avg loss: 2.619448, ppl: 13.728147 +epoch: 1, batch: 34535, sum loss: 4075.300293, avg loss: 2.615726, ppl: 13.677135 +epoch: 1, batch: 34536, sum loss: 4323.878418, avg loss: 2.714299, ppl: 15.094028 +epoch: 1, batch: 34537, sum loss: 3879.705811, avg loss: 2.565943, ppl: 13.012927 +epoch: 1, batch: 34538, sum loss: 5380.638184, avg loss: 2.903744, ppl: 18.242321 +epoch: 1, batch: 34539, sum loss: 5027.872070, avg loss: 3.032492, ppl: 20.748878 +epoch: 1, batch: 34540, sum loss: 3846.733398, avg loss: 2.420852, ppl: 11.255442 +epoch: 1, batch: 34541, sum loss: 4571.735352, avg loss: 2.439560, ppl: 11.467993 +epoch: 1, batch: 34542, sum loss: 4530.869629, avg loss: 2.838891, ppl: 17.096790 +epoch: 1, batch: 34543, sum loss: 5088.950684, avg loss: 2.682631, ppl: 14.623514 +epoch: 1, batch: 34544, sum loss: 4724.797852, avg loss: 2.761425, ppl: 15.822374 +epoch: 1, batch: 34545, sum loss: 4853.883789, avg loss: 2.637980, ppl: 13.984929 +epoch: 1, batch: 34546, sum loss: 5069.309570, avg loss: 2.789934, ppl: 16.279940 +epoch: 1, batch: 34547, sum loss: 4379.125977, avg loss: 2.500929, ppl: 12.193812 +epoch: 1, batch: 34548, sum loss: 3298.091797, avg loss: 2.212000, ppl: 9.133965 +epoch: 1, batch: 34549, sum loss: 5141.189453, avg loss: 2.821729, ppl: 16.805878 +epoch: 1, batch: 34550, sum loss: 4537.343750, avg loss: 2.512372, ppl: 12.334152 +epoch: 1, batch: 34551, sum loss: 4444.551758, avg loss: 2.710093, ppl: 15.030666 +epoch: 1, batch: 34552, sum loss: 3353.810547, avg loss: 2.279953, ppl: 9.776218 +epoch: 1, batch: 34553, sum loss: 4918.939941, avg loss: 2.651720, ppl: 14.178398 +epoch: 1, batch: 34554, sum loss: 4380.923828, avg loss: 2.491993, ppl: 12.085338 +epoch: 1, batch: 34555, sum loss: 4119.244629, avg loss: 2.744334, ppl: 15.554244 +epoch: 1, batch: 34556, sum loss: 4547.415039, avg loss: 2.718120, ppl: 15.151815 +epoch: 1, batch: 34557, sum loss: 4766.479004, avg loss: 2.433119, ppl: 11.394361 +epoch: 1, batch: 34558, sum loss: 4980.475586, avg loss: 2.751644, ppl: 15.668371 +epoch: 1, batch: 34559, sum loss: 4064.769043, avg loss: 2.477007, ppl: 11.905580 +epoch: 1, batch: 34560, sum loss: 4225.726562, avg loss: 2.772786, ppl: 16.003162 +epoch: 1, batch: 34561, sum loss: 4737.407715, avg loss: 2.751108, ppl: 15.659968 +epoch: 1, batch: 34562, sum loss: 4487.048828, avg loss: 2.455966, ppl: 11.657683 +epoch: 1, batch: 34563, sum loss: 5158.666016, avg loss: 2.515196, ppl: 12.369028 +epoch: 1, batch: 34564, sum loss: 4463.550781, avg loss: 2.526062, ppl: 12.504162 +epoch: 1, batch: 34565, sum loss: 4412.423828, avg loss: 2.607816, ppl: 13.569376 +epoch: 1, batch: 34566, sum loss: 5522.561523, avg loss: 2.758522, ppl: 15.776512 +epoch: 1, batch: 34567, sum loss: 4058.093506, avg loss: 2.322893, ppl: 10.205152 +epoch: 1, batch: 34568, sum loss: 4480.395020, avg loss: 2.635527, ppl: 13.950658 +epoch: 1, batch: 34569, sum loss: 4461.312500, avg loss: 2.546412, ppl: 12.761228 +epoch: 1, batch: 34570, sum loss: 3748.719482, avg loss: 2.519301, ppl: 12.419909 +epoch: 1, batch: 34571, sum loss: 4609.950684, avg loss: 2.620779, ppl: 13.746431 +epoch: 1, batch: 34572, sum loss: 3962.893555, avg loss: 2.441709, ppl: 11.492662 +epoch: 1, batch: 34573, sum loss: 4698.719727, avg loss: 2.844261, ppl: 17.188858 +epoch: 1, batch: 34574, sum loss: 4413.621094, avg loss: 2.614705, ppl: 13.663179 +epoch: 1, batch: 34575, sum loss: 4023.222168, avg loss: 2.426551, ppl: 11.319777 +epoch: 1, batch: 34576, sum loss: 4868.825195, avg loss: 2.758541, ppl: 15.776810 +epoch: 1, batch: 34577, sum loss: 5536.867676, avg loss: 3.001012, ppl: 20.105881 +epoch: 1, batch: 34578, sum loss: 3564.057129, avg loss: 2.317332, ppl: 10.148564 +epoch: 1, batch: 34579, sum loss: 3935.406738, avg loss: 2.445871, ppl: 11.540599 +epoch: 1, batch: 34580, sum loss: 4731.821289, avg loss: 2.573040, ppl: 13.105612 +epoch: 1, batch: 34581, sum loss: 4632.094238, avg loss: 2.727971, ppl: 15.301805 +epoch: 1, batch: 34582, sum loss: 4058.855713, avg loss: 2.528882, ppl: 12.539479 +epoch: 1, batch: 34583, sum loss: 4711.018066, avg loss: 2.625986, ppl: 13.818187 +epoch: 1, batch: 34584, sum loss: 4776.421875, avg loss: 2.663928, ppl: 14.352549 +epoch: 1, batch: 34585, sum loss: 4036.211182, avg loss: 2.405370, ppl: 11.082533 +epoch: 1, batch: 34586, sum loss: 4259.292969, avg loss: 2.544381, ppl: 12.735338 +epoch: 1, batch: 34587, sum loss: 3536.051025, avg loss: 2.247966, ppl: 9.468460 +epoch: 1, batch: 34588, sum loss: 4526.766113, avg loss: 2.872313, ppl: 17.677868 +epoch: 1, batch: 34589, sum loss: 4714.501465, avg loss: 2.672620, ppl: 14.477849 +epoch: 1, batch: 34590, sum loss: 5309.750977, avg loss: 2.812368, ppl: 16.649300 +epoch: 1, batch: 34591, sum loss: 4789.618164, avg loss: 2.686270, ppl: 14.676822 +epoch: 1, batch: 34592, sum loss: 5535.335449, avg loss: 2.745702, ppl: 15.575548 +epoch: 1, batch: 34593, sum loss: 4549.103516, avg loss: 2.742076, ppl: 15.519164 +epoch: 1, batch: 34594, sum loss: 4400.197266, avg loss: 2.523049, ppl: 12.466548 +epoch: 1, batch: 34595, sum loss: 5676.317383, avg loss: 2.853855, ppl: 17.354549 +epoch: 1, batch: 34596, sum loss: 4190.181641, avg loss: 2.498617, ppl: 12.165662 +epoch: 1, batch: 34597, sum loss: 5331.019531, avg loss: 2.765052, ppl: 15.879863 +epoch: 1, batch: 34598, sum loss: 3725.281250, avg loss: 2.549816, ppl: 12.804749 +epoch: 1, batch: 34599, sum loss: 5254.028320, avg loss: 2.838481, ppl: 17.089785 +epoch: 1, batch: 34600, sum loss: 5339.219727, avg loss: 2.976154, ppl: 19.612240 +epoch: 1, batch: 34601, sum loss: 3300.798096, avg loss: 2.207892, ppl: 9.096518 +epoch: 1, batch: 34602, sum loss: 4594.099121, avg loss: 2.604365, ppl: 13.522631 +epoch: 1, batch: 34603, sum loss: 5082.931152, avg loss: 2.822283, ppl: 16.815189 +epoch: 1, batch: 34604, sum loss: 4968.697266, avg loss: 2.768077, ppl: 15.927970 +epoch: 1, batch: 34605, sum loss: 4817.439453, avg loss: 2.670421, ppl: 14.446048 +epoch: 1, batch: 34606, sum loss: 4225.143555, avg loss: 2.422674, ppl: 11.275970 +epoch: 1, batch: 34607, sum loss: 3977.544434, avg loss: 2.334240, ppl: 10.321609 +epoch: 1, batch: 34608, sum loss: 3520.302490, avg loss: 2.205703, ppl: 9.076632 +epoch: 1, batch: 34609, sum loss: 4522.192383, avg loss: 2.529190, ppl: 12.543345 +epoch: 1, batch: 34610, sum loss: 3888.225586, avg loss: 2.494051, ppl: 12.110238 +epoch: 1, batch: 34611, sum loss: 4510.784180, avg loss: 2.732153, ppl: 15.365933 +epoch: 1, batch: 34612, sum loss: 5031.275391, avg loss: 2.561749, ppl: 12.958465 +epoch: 1, batch: 34613, sum loss: 4316.032227, avg loss: 2.507863, ppl: 12.278663 +epoch: 1, batch: 34614, sum loss: 4555.812500, avg loss: 2.458614, ppl: 11.688604 +epoch: 1, batch: 34615, sum loss: 4717.786621, avg loss: 2.702054, ppl: 14.910330 +epoch: 1, batch: 34616, sum loss: 4240.830078, avg loss: 2.481469, ppl: 11.958816 +epoch: 1, batch: 34617, sum loss: 4799.918945, avg loss: 2.637318, ppl: 13.975673 +epoch: 1, batch: 34618, sum loss: 4385.968262, avg loss: 2.558908, ppl: 12.921699 +epoch: 1, batch: 34619, sum loss: 3514.369141, avg loss: 2.374574, ppl: 10.746431 +epoch: 1, batch: 34620, sum loss: 4805.220215, avg loss: 2.569637, ppl: 13.061077 +epoch: 1, batch: 34621, sum loss: 4526.686523, avg loss: 2.583725, ppl: 13.246395 +epoch: 1, batch: 34622, sum loss: 3960.855469, avg loss: 2.545537, ppl: 12.750073 +epoch: 1, batch: 34623, sum loss: 4760.121094, avg loss: 2.727863, ppl: 15.300157 +epoch: 1, batch: 34624, sum loss: 4425.921387, avg loss: 2.553907, ppl: 12.857241 +epoch: 1, batch: 34625, sum loss: 4210.315918, avg loss: 2.592559, ppl: 13.363928 +epoch: 1, batch: 34626, sum loss: 4046.620361, avg loss: 2.532303, ppl: 12.582452 +epoch: 1, batch: 34627, sum loss: 3740.063477, avg loss: 2.493376, ppl: 12.102058 +epoch: 1, batch: 34628, sum loss: 4168.524902, avg loss: 2.573164, ppl: 13.107227 +epoch: 1, batch: 34629, sum loss: 4201.880371, avg loss: 2.492218, ppl: 12.088063 +epoch: 1, batch: 34630, sum loss: 4056.911621, avg loss: 2.549913, ppl: 12.805992 +epoch: 1, batch: 34631, sum loss: 4357.655762, avg loss: 2.461952, ppl: 11.727687 +epoch: 1, batch: 34632, sum loss: 4812.349609, avg loss: 2.825807, ppl: 16.874559 +epoch: 1, batch: 34633, sum loss: 4023.594238, avg loss: 2.605955, ppl: 13.544152 +epoch: 1, batch: 34634, sum loss: 4341.648438, avg loss: 2.718628, ppl: 15.159505 +epoch: 1, batch: 34635, sum loss: 3919.466797, avg loss: 2.420918, ppl: 11.256193 +epoch: 1, batch: 34636, sum loss: 3820.530518, avg loss: 2.219948, ppl: 9.206853 +epoch: 1, batch: 34637, sum loss: 4206.212402, avg loss: 2.657115, ppl: 14.255096 +epoch: 1, batch: 34638, sum loss: 4377.190430, avg loss: 2.823994, ppl: 16.843990 +epoch: 1, batch: 34639, sum loss: 4910.823730, avg loss: 2.576508, ppl: 13.151129 +epoch: 1, batch: 34640, sum loss: 4899.589844, avg loss: 2.878725, ppl: 17.791571 +epoch: 1, batch: 34641, sum loss: 4299.171387, avg loss: 2.657090, ppl: 14.254744 +epoch: 1, batch: 34642, sum loss: 4619.524414, avg loss: 2.905361, ppl: 18.271841 +epoch: 1, batch: 34643, sum loss: 4873.320312, avg loss: 2.899060, ppl: 18.157074 +epoch: 1, batch: 34644, sum loss: 4943.740723, avg loss: 2.904665, ppl: 18.259134 +epoch: 1, batch: 34645, sum loss: 3962.351562, avg loss: 2.530237, ppl: 12.556484 +epoch: 1, batch: 34646, sum loss: 3818.078369, avg loss: 2.410403, ppl: 11.138449 +epoch: 1, batch: 34647, sum loss: 4655.566406, avg loss: 2.459359, ppl: 11.697308 +epoch: 1, batch: 34648, sum loss: 4227.583984, avg loss: 2.356513, ppl: 10.554083 +epoch: 1, batch: 34649, sum loss: 4529.794434, avg loss: 2.570826, ppl: 13.076615 +epoch: 1, batch: 34650, sum loss: 5196.405273, avg loss: 2.825669, ppl: 16.872231 +epoch: 1, batch: 34651, sum loss: 3966.512207, avg loss: 2.547535, ppl: 12.775576 +epoch: 1, batch: 34652, sum loss: 4274.455078, avg loss: 2.545834, ppl: 12.753859 +epoch: 1, batch: 34653, sum loss: 3670.926270, avg loss: 2.497229, ppl: 12.148782 +epoch: 1, batch: 34654, sum loss: 3684.798340, avg loss: 2.461455, ppl: 11.721856 +epoch: 1, batch: 34655, sum loss: 5075.325195, avg loss: 2.617496, ppl: 13.701376 +epoch: 1, batch: 34656, sum loss: 4598.321289, avg loss: 2.617144, ppl: 13.696546 +epoch: 1, batch: 34657, sum loss: 4354.030273, avg loss: 2.613463, ppl: 13.646232 +epoch: 1, batch: 34658, sum loss: 5071.290527, avg loss: 2.719191, ppl: 15.168040 +epoch: 1, batch: 34659, sum loss: 4389.629883, avg loss: 2.787066, ppl: 16.233328 +epoch: 1, batch: 34660, sum loss: 3962.536133, avg loss: 2.530355, ppl: 12.557963 +epoch: 1, batch: 34661, sum loss: 4808.745605, avg loss: 2.709152, ppl: 15.016540 +epoch: 1, batch: 34662, sum loss: 4106.224121, avg loss: 2.567995, ppl: 13.039655 +epoch: 1, batch: 34663, sum loss: 3376.548340, avg loss: 2.298535, ppl: 9.959582 +epoch: 1, batch: 34664, sum loss: 4384.621582, avg loss: 2.307696, ppl: 10.051236 +epoch: 1, batch: 34665, sum loss: 5385.633301, avg loss: 2.851050, ppl: 17.305943 +epoch: 1, batch: 34666, sum loss: 4128.266113, avg loss: 2.593132, ppl: 13.371586 +epoch: 1, batch: 34667, sum loss: 4550.394531, avg loss: 2.610668, ppl: 13.608141 +epoch: 1, batch: 34668, sum loss: 4328.601562, avg loss: 2.658846, ppl: 14.279802 +epoch: 1, batch: 34669, sum loss: 4175.165039, avg loss: 2.527340, ppl: 12.520154 +epoch: 1, batch: 34670, sum loss: 3388.095947, avg loss: 2.425266, ppl: 11.305231 +epoch: 1, batch: 34671, sum loss: 4226.543457, avg loss: 2.492066, ppl: 12.086217 +epoch: 1, batch: 34672, sum loss: 4822.855957, avg loss: 2.670463, ppl: 14.446654 +epoch: 1, batch: 34673, sum loss: 5467.162109, avg loss: 2.929883, ppl: 18.725439 +epoch: 1, batch: 34674, sum loss: 3701.085938, avg loss: 2.431726, ppl: 11.378499 +epoch: 1, batch: 34675, sum loss: 3582.762207, avg loss: 2.472576, ppl: 11.852940 +epoch: 1, batch: 34676, sum loss: 5051.293945, avg loss: 2.694023, ppl: 14.791066 +epoch: 1, batch: 34677, sum loss: 4128.534668, avg loss: 2.540637, ppl: 12.687748 +epoch: 1, batch: 34678, sum loss: 4002.828125, avg loss: 2.590827, ppl: 13.340806 +epoch: 1, batch: 34679, sum loss: 4422.532227, avg loss: 2.402245, ppl: 11.047946 +epoch: 1, batch: 34680, sum loss: 4721.581055, avg loss: 2.631874, ppl: 13.899788 +epoch: 1, batch: 34681, sum loss: 4850.880859, avg loss: 2.512108, ppl: 12.330897 +epoch: 1, batch: 34682, sum loss: 3986.748535, avg loss: 2.540949, ppl: 12.691705 +epoch: 1, batch: 34683, sum loss: 5129.483887, avg loss: 2.799937, ppl: 16.443607 +epoch: 1, batch: 34684, sum loss: 4923.445312, avg loss: 2.852518, ppl: 17.331360 +epoch: 1, batch: 34685, sum loss: 3677.150879, avg loss: 2.467887, ppl: 11.797489 +epoch: 1, batch: 34686, sum loss: 4373.692383, avg loss: 2.623691, ppl: 13.786510 +epoch: 1, batch: 34687, sum loss: 3970.164062, avg loss: 2.564706, ppl: 12.996832 +epoch: 1, batch: 34688, sum loss: 4681.904785, avg loss: 2.718876, ppl: 15.163271 +epoch: 1, batch: 34689, sum loss: 5962.629883, avg loss: 2.852933, ppl: 17.338560 +epoch: 1, batch: 34690, sum loss: 4374.925293, avg loss: 2.539133, ppl: 12.668677 +epoch: 1, batch: 34691, sum loss: 4936.722656, avg loss: 2.543391, ppl: 12.722744 +epoch: 1, batch: 34692, sum loss: 3862.033203, avg loss: 2.486821, ppl: 12.022996 +epoch: 1, batch: 34693, sum loss: 4091.032227, avg loss: 2.617423, ppl: 13.700373 +epoch: 1, batch: 34694, sum loss: 4176.424805, avg loss: 2.597279, ppl: 13.427154 +epoch: 1, batch: 34695, sum loss: 4310.216309, avg loss: 2.435151, ppl: 11.417539 +epoch: 1, batch: 34696, sum loss: 4201.697266, avg loss: 2.492110, ppl: 12.086753 +epoch: 1, batch: 34697, sum loss: 4994.156738, avg loss: 2.567690, ppl: 13.035676 +epoch: 1, batch: 34698, sum loss: 4529.778320, avg loss: 2.803081, ppl: 16.495384 +epoch: 1, batch: 34699, sum loss: 5041.394531, avg loss: 2.739888, ppl: 15.485253 +epoch: 1, batch: 34700, sum loss: 3829.738037, avg loss: 2.433125, ppl: 11.394429 +epoch: 1, batch: 34701, sum loss: 4199.064453, avg loss: 2.531082, ppl: 12.567092 +epoch: 1, batch: 34702, sum loss: 4133.622559, avg loss: 2.478191, ppl: 11.919683 +epoch: 1, batch: 34703, sum loss: 4890.014648, avg loss: 2.631870, ppl: 13.899738 +epoch: 1, batch: 34704, sum loss: 4961.419922, avg loss: 2.903113, ppl: 18.230808 +epoch: 1, batch: 34705, sum loss: 4042.760254, avg loss: 2.426627, ppl: 11.320629 +epoch: 1, batch: 34706, sum loss: 4378.750000, avg loss: 2.542828, ppl: 12.715581 +epoch: 1, batch: 34707, sum loss: 4208.406250, avg loss: 2.555195, ppl: 12.873808 +epoch: 1, batch: 34708, sum loss: 5171.258789, avg loss: 2.747746, ppl: 15.607420 +epoch: 1, batch: 34709, sum loss: 4250.214844, avg loss: 2.722751, ppl: 15.222147 +epoch: 1, batch: 34710, sum loss: 4012.403076, avg loss: 2.475264, ppl: 11.884845 +epoch: 1, batch: 34711, sum loss: 3538.313477, avg loss: 2.448660, ppl: 11.572827 +epoch: 1, batch: 34712, sum loss: 5296.433105, avg loss: 2.741425, ppl: 15.509070 +epoch: 1, batch: 34713, sum loss: 5326.189453, avg loss: 2.818090, ppl: 16.744833 +epoch: 1, batch: 34714, sum loss: 4152.208984, avg loss: 2.685776, ppl: 14.669574 +epoch: 1, batch: 34715, sum loss: 4726.431152, avg loss: 2.967000, ppl: 19.433537 +epoch: 1, batch: 34716, sum loss: 4290.013672, avg loss: 2.615862, ppl: 13.679001 +epoch: 1, batch: 34717, sum loss: 4785.348145, avg loss: 3.030619, ppl: 20.710056 +epoch: 1, batch: 34718, sum loss: 5106.866699, avg loss: 2.680770, ppl: 14.596327 +epoch: 1, batch: 34719, sum loss: 5094.223633, avg loss: 2.721273, ppl: 15.199665 +epoch: 1, batch: 34720, sum loss: 4354.806641, avg loss: 2.644084, ppl: 14.070550 +epoch: 1, batch: 34721, sum loss: 4670.496582, avg loss: 2.775102, ppl: 16.040262 +epoch: 1, batch: 34722, sum loss: 3956.972900, avg loss: 2.552886, ppl: 12.844115 +epoch: 1, batch: 34723, sum loss: 4088.771484, avg loss: 2.445438, ppl: 11.535597 +epoch: 1, batch: 34724, sum loss: 3910.247070, avg loss: 2.446963, ppl: 11.553207 +epoch: 1, batch: 34725, sum loss: 3554.733398, avg loss: 2.317297, ppl: 10.148204 +epoch: 1, batch: 34726, sum loss: 3685.456299, avg loss: 2.458610, ppl: 11.688554 +epoch: 1, batch: 34727, sum loss: 5446.747559, avg loss: 2.829479, ppl: 16.936638 +epoch: 1, batch: 34728, sum loss: 4625.084961, avg loss: 2.892486, ppl: 18.038094 +epoch: 1, batch: 34729, sum loss: 4412.001953, avg loss: 2.525473, ppl: 12.496809 +epoch: 1, batch: 34730, sum loss: 3790.523926, avg loss: 2.448659, ppl: 11.572817 +epoch: 1, batch: 34731, sum loss: 3872.900146, avg loss: 2.580213, ppl: 13.199953 +epoch: 1, batch: 34732, sum loss: 4149.728027, avg loss: 2.452558, ppl: 11.618028 +epoch: 1, batch: 34733, sum loss: 4111.474609, avg loss: 2.787441, ppl: 16.239403 +epoch: 1, batch: 34734, sum loss: 3891.361328, avg loss: 2.442788, ppl: 11.505074 +epoch: 1, batch: 34735, sum loss: 5550.521973, avg loss: 2.681412, ppl: 14.605699 +epoch: 1, batch: 34736, sum loss: 4469.205566, avg loss: 2.702059, ppl: 14.910401 +epoch: 1, batch: 34737, sum loss: 5165.382812, avg loss: 2.717192, ppl: 15.137762 +epoch: 1, batch: 34738, sum loss: 5157.085449, avg loss: 2.747515, ppl: 15.603807 +epoch: 1, batch: 34739, sum loss: 4248.909180, avg loss: 2.471733, ppl: 11.842954 +epoch: 1, batch: 34740, sum loss: 3993.647949, avg loss: 2.489805, ppl: 12.058930 +epoch: 1, batch: 34741, sum loss: 3483.209961, avg loss: 2.563068, ppl: 12.975571 +epoch: 1, batch: 34742, sum loss: 3741.338867, avg loss: 2.533067, ppl: 12.592061 +epoch: 1, batch: 34743, sum loss: 4338.060547, avg loss: 2.473238, ppl: 11.860796 +epoch: 1, batch: 34744, sum loss: 3087.674316, avg loss: 2.096181, ppl: 8.135042 +epoch: 1, batch: 34745, sum loss: 4061.007812, avg loss: 2.444917, ppl: 11.529597 +epoch: 1, batch: 34746, sum loss: 5084.651367, avg loss: 2.749947, ppl: 15.641796 +epoch: 1, batch: 34747, sum loss: 3843.942871, avg loss: 2.422144, ppl: 11.270001 +epoch: 1, batch: 34748, sum loss: 4076.371338, avg loss: 2.631615, ppl: 13.896190 +epoch: 1, batch: 34749, sum loss: 4799.268555, avg loss: 2.725309, ppl: 15.261124 +epoch: 1, batch: 34750, sum loss: 3579.624023, avg loss: 2.307946, ppl: 10.053750 +epoch: 1, batch: 34751, sum loss: 5363.525391, avg loss: 2.691182, ppl: 14.749098 +epoch: 1, batch: 34752, sum loss: 3716.384033, avg loss: 2.547213, ppl: 12.771464 +epoch: 1, batch: 34753, sum loss: 4473.275879, avg loss: 2.735949, ppl: 15.424368 +epoch: 1, batch: 34754, sum loss: 4155.018555, avg loss: 2.616511, ppl: 13.687878 +epoch: 1, batch: 34755, sum loss: 4987.909668, avg loss: 2.592469, ppl: 13.362720 +epoch: 1, batch: 34756, sum loss: 4055.203613, avg loss: 2.483284, ppl: 11.980550 +epoch: 1, batch: 34757, sum loss: 4375.975098, avg loss: 2.801521, ppl: 16.469671 +epoch: 1, batch: 34758, sum loss: 3957.206055, avg loss: 2.818523, ppl: 16.752089 +epoch: 1, batch: 34759, sum loss: 3617.343994, avg loss: 2.422869, ppl: 11.278175 +epoch: 1, batch: 34760, sum loss: 5365.328613, avg loss: 2.832803, ppl: 16.993025 +epoch: 1, batch: 34761, sum loss: 4574.177246, avg loss: 2.891389, ppl: 18.018314 +epoch: 1, batch: 34762, sum loss: 4240.984375, avg loss: 2.630884, ppl: 13.886036 +epoch: 1, batch: 34763, sum loss: 4008.188965, avg loss: 2.441041, ppl: 11.484990 +epoch: 1, batch: 34764, sum loss: 4503.787109, avg loss: 2.646173, ppl: 14.099978 +epoch: 1, batch: 34765, sum loss: 4372.317383, avg loss: 2.530276, ppl: 12.556975 +epoch: 1, batch: 34766, sum loss: 3980.507324, avg loss: 2.611881, ppl: 13.624661 +epoch: 1, batch: 34767, sum loss: 4132.046875, avg loss: 2.406550, ppl: 11.095620 +epoch: 1, batch: 34768, sum loss: 3334.589844, avg loss: 2.288668, ppl: 9.861797 +epoch: 1, batch: 34769, sum loss: 4029.714111, avg loss: 2.677551, ppl: 14.549418 +epoch: 1, batch: 34770, sum loss: 3907.249756, avg loss: 2.334080, ppl: 10.319961 +epoch: 1, batch: 34771, sum loss: 4802.198730, avg loss: 2.656083, ppl: 14.240405 +epoch: 1, batch: 34772, sum loss: 4258.396973, avg loss: 2.467206, ppl: 11.789455 +epoch: 1, batch: 34773, sum loss: 4678.008301, avg loss: 2.797852, ppl: 16.409363 +epoch: 1, batch: 34774, sum loss: 4896.790039, avg loss: 2.779109, ppl: 16.104670 +epoch: 1, batch: 34775, sum loss: 4626.998535, avg loss: 2.794081, ppl: 16.347601 +epoch: 1, batch: 34776, sum loss: 4939.731445, avg loss: 2.576803, ppl: 13.155017 +epoch: 1, batch: 34777, sum loss: 5087.532715, avg loss: 2.809240, ppl: 16.597294 +epoch: 1, batch: 34778, sum loss: 4406.506348, avg loss: 2.665763, ppl: 14.378915 +epoch: 1, batch: 34779, sum loss: 4757.192871, avg loss: 2.621043, ppl: 13.750057 +epoch: 1, batch: 34780, sum loss: 4067.772949, avg loss: 2.556740, ppl: 12.893713 +epoch: 1, batch: 34781, sum loss: 5147.474609, avg loss: 2.782419, ppl: 16.158056 +epoch: 1, batch: 34782, sum loss: 3486.308350, avg loss: 2.338235, ppl: 10.362931 +epoch: 1, batch: 34783, sum loss: 4535.285645, avg loss: 2.763733, ppl: 15.858929 +epoch: 1, batch: 34784, sum loss: 3952.013672, avg loss: 2.485543, ppl: 12.007642 +epoch: 1, batch: 34785, sum loss: 4075.791016, avg loss: 2.517474, ppl: 12.397246 +epoch: 1, batch: 34786, sum loss: 4254.690918, avg loss: 2.641025, ppl: 14.027572 +epoch: 1, batch: 34787, sum loss: 3635.617920, avg loss: 2.280814, ppl: 9.784643 +epoch: 1, batch: 34788, sum loss: 4277.405762, avg loss: 2.600246, ppl: 13.467056 +epoch: 1, batch: 34789, sum loss: 3598.927490, avg loss: 2.516733, ppl: 12.388055 +epoch: 1, batch: 34790, sum loss: 4107.160645, avg loss: 2.586373, ppl: 13.281516 +epoch: 1, batch: 34791, sum loss: 3496.186523, avg loss: 2.294086, ppl: 9.915367 +epoch: 1, batch: 34792, sum loss: 3896.767578, avg loss: 2.624086, ppl: 13.791962 +epoch: 1, batch: 34793, sum loss: 5341.312500, avg loss: 2.763224, ppl: 15.850869 +epoch: 1, batch: 34794, sum loss: 3905.675293, avg loss: 2.407938, ppl: 11.111024 +epoch: 1, batch: 34795, sum loss: 3761.830811, avg loss: 2.540061, ppl: 12.680450 +epoch: 1, batch: 34796, sum loss: 4050.618164, avg loss: 2.488095, ppl: 12.038316 +epoch: 1, batch: 34797, sum loss: 4455.352051, avg loss: 2.736703, ppl: 15.436004 +epoch: 1, batch: 34798, sum loss: 4792.681152, avg loss: 2.578096, ppl: 13.172040 +epoch: 1, batch: 34799, sum loss: 4478.767578, avg loss: 2.661181, ppl: 14.313183 +epoch: 1, batch: 34800, sum loss: 5060.915527, avg loss: 2.764017, ppl: 15.863444 +epoch: 1, batch: 34801, sum loss: 3957.464600, avg loss: 2.214586, ppl: 9.157615 +epoch: 1, batch: 34802, sum loss: 4218.877930, avg loss: 2.470069, ppl: 11.823262 +epoch: 1, batch: 34803, sum loss: 4455.472656, avg loss: 2.803947, ppl: 16.509689 +epoch: 1, batch: 34804, sum loss: 5242.293945, avg loss: 2.693882, ppl: 14.788976 +epoch: 1, batch: 34805, sum loss: 4321.168945, avg loss: 2.599981, ppl: 13.463487 +epoch: 1, batch: 34806, sum loss: 3565.298584, avg loss: 2.298710, ppl: 9.961320 +epoch: 1, batch: 34807, sum loss: 3560.251221, avg loss: 2.323924, ppl: 10.215680 +epoch: 1, batch: 34808, sum loss: 4864.491211, avg loss: 2.583373, ppl: 13.241725 +epoch: 1, batch: 34809, sum loss: 4297.531250, avg loss: 2.436242, ppl: 11.430010 +epoch: 1, batch: 34810, sum loss: 5657.300293, avg loss: 2.914632, ppl: 18.442017 +epoch: 1, batch: 34811, sum loss: 3513.635010, avg loss: 2.289013, ppl: 9.865195 +epoch: 1, batch: 34812, sum loss: 4330.420898, avg loss: 2.517687, ppl: 12.399878 +epoch: 1, batch: 34813, sum loss: 5101.513672, avg loss: 2.757575, ppl: 15.761576 +epoch: 1, batch: 34814, sum loss: 3868.543213, avg loss: 2.414821, ppl: 11.187767 +epoch: 1, batch: 34815, sum loss: 4103.178711, avg loss: 2.542242, ppl: 12.708132 +epoch: 1, batch: 34816, sum loss: 4192.737305, avg loss: 2.643592, ppl: 14.063635 +epoch: 1, batch: 34817, sum loss: 4992.317383, avg loss: 2.602877, ppl: 13.502524 +epoch: 1, batch: 34818, sum loss: 4272.666016, avg loss: 2.516293, ppl: 12.382612 +epoch: 1, batch: 34819, sum loss: 4264.392090, avg loss: 2.552000, ppl: 12.832747 +epoch: 1, batch: 34820, sum loss: 4168.179199, avg loss: 2.430425, ppl: 11.363712 +epoch: 1, batch: 34821, sum loss: 5278.081543, avg loss: 2.779401, ppl: 16.109362 +epoch: 1, batch: 34822, sum loss: 4302.169922, avg loss: 2.585439, ppl: 13.269110 +epoch: 1, batch: 34823, sum loss: 4208.448242, avg loss: 2.599412, ppl: 13.455827 +epoch: 1, batch: 34824, sum loss: 4221.475586, avg loss: 2.427531, ppl: 11.330866 +epoch: 1, batch: 34825, sum loss: 5174.927734, avg loss: 2.904000, ppl: 18.246988 +epoch: 1, batch: 34826, sum loss: 4590.533203, avg loss: 2.709878, ppl: 15.027438 +epoch: 1, batch: 34827, sum loss: 5343.173828, avg loss: 2.712271, ppl: 15.063445 +epoch: 1, batch: 34828, sum loss: 4632.188477, avg loss: 2.757255, ppl: 15.756533 +epoch: 1, batch: 34829, sum loss: 4304.058105, avg loss: 2.475019, ppl: 11.881935 +epoch: 1, batch: 34830, sum loss: 4286.882812, avg loss: 2.631604, ppl: 13.896041 +epoch: 1, batch: 34831, sum loss: 4424.208008, avg loss: 2.635026, ppl: 13.943668 +epoch: 1, batch: 34832, sum loss: 4076.849365, avg loss: 2.692767, ppl: 14.772497 +epoch: 1, batch: 34833, sum loss: 4981.619141, avg loss: 2.624668, ppl: 13.799991 +epoch: 1, batch: 34834, sum loss: 4948.580078, avg loss: 2.561377, ppl: 12.953640 +epoch: 1, batch: 34835, sum loss: 3279.776367, avg loss: 2.299984, ppl: 9.974018 +epoch: 1, batch: 34836, sum loss: 5018.472168, avg loss: 2.775704, ppl: 16.049917 +epoch: 1, batch: 34837, sum loss: 4501.071289, avg loss: 2.603280, ppl: 13.507973 +epoch: 1, batch: 34838, sum loss: 4671.520508, avg loss: 2.573841, ppl: 13.116102 +epoch: 1, batch: 34839, sum loss: 4041.048340, avg loss: 2.524078, ppl: 12.479380 +epoch: 1, batch: 34840, sum loss: 4233.430176, avg loss: 2.458438, ppl: 11.686542 +epoch: 1, batch: 34841, sum loss: 4424.655273, avg loss: 2.562047, ppl: 12.962324 +epoch: 1, batch: 34842, sum loss: 4339.246582, avg loss: 2.756827, ppl: 15.749795 +epoch: 1, batch: 34843, sum loss: 4296.815918, avg loss: 2.693928, ppl: 14.789663 +epoch: 1, batch: 34844, sum loss: 4879.997559, avg loss: 2.673971, ppl: 14.497427 +epoch: 1, batch: 34845, sum loss: 4162.203125, avg loss: 2.541027, ppl: 12.692703 +epoch: 1, batch: 34846, sum loss: 4141.688965, avg loss: 2.550301, ppl: 12.810960 +epoch: 1, batch: 34847, sum loss: 4062.012207, avg loss: 2.598856, ppl: 13.448347 +epoch: 1, batch: 34848, sum loss: 5026.039551, avg loss: 2.766120, ppl: 15.896830 +epoch: 1, batch: 34849, sum loss: 4917.133789, avg loss: 2.646466, ppl: 14.104107 +epoch: 1, batch: 34850, sum loss: 4620.083496, avg loss: 2.743518, ppl: 15.541558 +epoch: 1, batch: 34851, sum loss: 4190.872559, avg loss: 2.607886, ppl: 13.570331 +epoch: 1, batch: 34852, sum loss: 4460.699219, avg loss: 2.723259, ppl: 15.229882 +epoch: 1, batch: 34853, sum loss: 3719.564453, avg loss: 2.532038, ppl: 12.579119 +epoch: 1, batch: 34854, sum loss: 5118.497070, avg loss: 2.756326, ppl: 15.741897 +epoch: 1, batch: 34855, sum loss: 4945.330078, avg loss: 2.775157, ppl: 16.041149 +epoch: 1, batch: 34856, sum loss: 4687.139648, avg loss: 2.959053, ppl: 19.279701 +epoch: 1, batch: 34857, sum loss: 4241.929688, avg loss: 2.325619, ppl: 10.233017 +epoch: 1, batch: 34858, sum loss: 3975.212158, avg loss: 2.574620, ppl: 13.126322 +epoch: 1, batch: 34859, sum loss: 4707.259766, avg loss: 2.537606, ppl: 12.649355 +epoch: 1, batch: 34860, sum loss: 4217.622070, avg loss: 2.488273, ppl: 12.040461 +epoch: 1, batch: 34861, sum loss: 5338.871094, avg loss: 2.735078, ppl: 15.410939 +epoch: 1, batch: 34862, sum loss: 4603.050781, avg loss: 2.545935, ppl: 12.755151 +epoch: 1, batch: 34863, sum loss: 4491.658203, avg loss: 2.715634, ppl: 15.114187 +epoch: 1, batch: 34864, sum loss: 4212.216797, avg loss: 2.474863, ppl: 11.880080 +epoch: 1, batch: 34865, sum loss: 4571.407227, avg loss: 2.995680, ppl: 19.998951 +epoch: 1, batch: 34866, sum loss: 3375.361328, avg loss: 2.198932, ppl: 9.015384 +epoch: 1, batch: 34867, sum loss: 4236.743652, avg loss: 2.546120, ppl: 12.757510 +epoch: 1, batch: 34868, sum loss: 4728.740723, avg loss: 2.789818, ppl: 16.278049 +epoch: 1, batch: 34869, sum loss: 4878.569336, avg loss: 2.717866, ppl: 15.147961 +epoch: 1, batch: 34870, sum loss: 4435.491211, avg loss: 2.577276, ppl: 13.161232 +epoch: 1, batch: 34871, sum loss: 4791.074219, avg loss: 2.511045, ppl: 12.317798 +epoch: 1, batch: 34872, sum loss: 5012.588379, avg loss: 2.857804, ppl: 17.423225 +epoch: 1, batch: 34873, sum loss: 4359.049805, avg loss: 2.556628, ppl: 12.892265 +epoch: 1, batch: 34874, sum loss: 4042.311035, avg loss: 2.523290, ppl: 12.469556 +epoch: 1, batch: 34875, sum loss: 3339.785156, avg loss: 2.325756, ppl: 10.234410 +epoch: 1, batch: 34876, sum loss: 4996.860352, avg loss: 2.896731, ppl: 18.114830 +epoch: 1, batch: 34877, sum loss: 4526.837891, avg loss: 2.609128, ppl: 13.587204 +epoch: 1, batch: 34878, sum loss: 4308.383301, avg loss: 2.601681, ppl: 13.486386 +epoch: 1, batch: 34879, sum loss: 3509.834473, avg loss: 2.384398, ppl: 10.852532 +epoch: 1, batch: 34880, sum loss: 4495.469727, avg loss: 2.573251, ppl: 13.108371 +epoch: 1, batch: 34881, sum loss: 4879.975098, avg loss: 2.758607, ppl: 15.777843 +epoch: 1, batch: 34882, sum loss: 3997.192139, avg loss: 2.441779, ppl: 11.493471 +epoch: 1, batch: 34883, sum loss: 3787.679932, avg loss: 2.351136, ppl: 10.497488 +epoch: 1, batch: 34884, sum loss: 4106.669434, avg loss: 2.472408, ppl: 11.850947 +epoch: 1, batch: 34885, sum loss: 4876.541016, avg loss: 2.634544, ppl: 13.936957 +epoch: 1, batch: 34886, sum loss: 3788.679932, avg loss: 2.529159, ppl: 12.542951 +epoch: 1, batch: 34887, sum loss: 4486.039062, avg loss: 2.462151, ppl: 11.730017 +epoch: 1, batch: 34888, sum loss: 4616.167480, avg loss: 2.550369, ppl: 12.811831 +epoch: 1, batch: 34889, sum loss: 4957.975098, avg loss: 2.567569, ppl: 13.034097 +epoch: 1, batch: 34890, sum loss: 3762.885254, avg loss: 2.324203, ppl: 10.218537 +epoch: 1, batch: 34891, sum loss: 3757.758789, avg loss: 2.443276, ppl: 11.510690 +epoch: 1, batch: 34892, sum loss: 5134.409668, avg loss: 2.719497, ppl: 15.172685 +epoch: 1, batch: 34893, sum loss: 4947.754395, avg loss: 2.782764, ppl: 16.163639 +epoch: 1, batch: 34894, sum loss: 5173.625488, avg loss: 2.768125, ppl: 15.928737 +epoch: 1, batch: 34895, sum loss: 4204.045898, avg loss: 2.629172, ppl: 13.862285 +epoch: 1, batch: 34896, sum loss: 4286.949219, avg loss: 2.449685, ppl: 11.584701 +epoch: 1, batch: 34897, sum loss: 5868.001953, avg loss: 2.925225, ppl: 18.638424 +epoch: 1, batch: 34898, sum loss: 5141.813477, avg loss: 2.680820, ppl: 14.597065 +epoch: 1, batch: 34899, sum loss: 5005.001465, avg loss: 2.669334, ppl: 14.430358 +epoch: 1, batch: 34900, sum loss: 4201.277344, avg loss: 2.624158, ppl: 13.792961 +epoch: 1, batch: 34901, sum loss: 3516.488525, avg loss: 2.156032, ppl: 8.636802 +epoch: 1, batch: 34902, sum loss: 5054.356445, avg loss: 2.720321, ppl: 15.185195 +epoch: 1, batch: 34903, sum loss: 4443.849609, avg loss: 2.489552, ppl: 12.055871 +epoch: 1, batch: 34904, sum loss: 4698.663086, avg loss: 2.580265, ppl: 13.200640 +epoch: 1, batch: 34905, sum loss: 4530.129883, avg loss: 2.649199, ppl: 14.142702 +epoch: 1, batch: 34906, sum loss: 3870.011230, avg loss: 2.566320, ppl: 13.017836 +epoch: 1, batch: 34907, sum loss: 4229.400879, avg loss: 2.597912, ppl: 13.435656 +epoch: 1, batch: 34908, sum loss: 3681.011963, avg loss: 2.266633, ppl: 9.646866 +epoch: 1, batch: 34909, sum loss: 4597.359375, avg loss: 2.741419, ppl: 15.508974 +epoch: 1, batch: 34910, sum loss: 4753.687500, avg loss: 2.673615, ppl: 14.492268 +epoch: 1, batch: 34911, sum loss: 3880.875732, avg loss: 2.445416, ppl: 11.535353 +epoch: 1, batch: 34912, sum loss: 4305.237793, avg loss: 2.488577, ppl: 12.044121 +epoch: 1, batch: 34913, sum loss: 4994.660156, avg loss: 2.683858, ppl: 14.641477 +epoch: 1, batch: 34914, sum loss: 4918.302734, avg loss: 2.396834, ppl: 10.988328 +epoch: 1, batch: 34915, sum loss: 4717.917480, avg loss: 2.608025, ppl: 13.572224 +epoch: 1, batch: 34916, sum loss: 4351.460449, avg loss: 2.303579, ppl: 10.009943 +epoch: 1, batch: 34917, sum loss: 3600.230469, avg loss: 2.357715, ppl: 10.566777 +epoch: 1, batch: 34918, sum loss: 4509.410156, avg loss: 2.495523, ppl: 12.128075 +epoch: 1, batch: 34919, sum loss: 4308.025391, avg loss: 2.422961, ppl: 11.279213 +epoch: 1, batch: 34920, sum loss: 4490.935547, avg loss: 2.430160, ppl: 11.360698 +epoch: 1, batch: 34921, sum loss: 3585.464600, avg loss: 2.285191, ppl: 9.827561 +epoch: 1, batch: 34922, sum loss: 3837.510254, avg loss: 2.307583, ppl: 10.050103 +epoch: 1, batch: 34923, sum loss: 4131.432617, avg loss: 2.375752, ppl: 10.759101 +epoch: 1, batch: 34924, sum loss: 5575.900879, avg loss: 2.836165, ppl: 17.050255 +epoch: 1, batch: 34925, sum loss: 4351.062012, avg loss: 2.648242, ppl: 14.129181 +epoch: 1, batch: 34926, sum loss: 4903.394531, avg loss: 2.939685, ppl: 18.909891 +epoch: 1, batch: 34927, sum loss: 4053.570801, avg loss: 2.418598, ppl: 11.230108 +epoch: 1, batch: 34928, sum loss: 3796.854980, avg loss: 2.440138, ppl: 11.474626 +epoch: 1, batch: 34929, sum loss: 4882.990723, avg loss: 2.646607, ppl: 14.106101 +epoch: 1, batch: 34930, sum loss: 5028.859863, avg loss: 2.546258, ppl: 12.759272 +epoch: 1, batch: 34931, sum loss: 5381.452637, avg loss: 2.709694, ppl: 15.024679 +epoch: 1, batch: 34932, sum loss: 4905.942383, avg loss: 2.705981, ppl: 14.968987 +epoch: 1, batch: 34933, sum loss: 4849.543457, avg loss: 2.621375, ppl: 13.754622 +epoch: 1, batch: 34934, sum loss: 4909.684570, avg loss: 2.772267, ppl: 15.994851 +epoch: 1, batch: 34935, sum loss: 4384.109863, avg loss: 2.380081, ppl: 10.805782 +epoch: 1, batch: 34936, sum loss: 4250.684570, avg loss: 2.522661, ppl: 12.461719 +epoch: 1, batch: 34937, sum loss: 3626.054932, avg loss: 2.359177, ppl: 10.582240 +epoch: 1, batch: 34938, sum loss: 4214.091797, avg loss: 2.524920, ppl: 12.489899 +epoch: 1, batch: 34939, sum loss: 4845.248047, avg loss: 2.634719, ppl: 13.939393 +epoch: 1, batch: 34940, sum loss: 5156.222168, avg loss: 2.708100, ppl: 15.000749 +epoch: 1, batch: 34941, sum loss: 3260.908936, avg loss: 2.403028, ppl: 11.056605 +epoch: 1, batch: 34942, sum loss: 4244.708008, avg loss: 2.578802, ppl: 13.181339 +epoch: 1, batch: 34943, sum loss: 4401.497070, avg loss: 2.621499, ppl: 13.756330 +epoch: 1, batch: 34944, sum loss: 5520.878418, avg loss: 2.714296, ppl: 15.093978 +epoch: 1, batch: 34945, sum loss: 4315.589844, avg loss: 2.526692, ppl: 12.512047 +epoch: 1, batch: 34946, sum loss: 4092.356445, avg loss: 2.532399, ppl: 12.583654 +epoch: 1, batch: 34947, sum loss: 5683.290527, avg loss: 3.007032, ppl: 20.227280 +epoch: 1, batch: 34948, sum loss: 5137.270508, avg loss: 2.933907, ppl: 18.800938 +epoch: 1, batch: 34949, sum loss: 4487.065918, avg loss: 2.672463, ppl: 14.475581 +epoch: 1, batch: 34950, sum loss: 3726.345215, avg loss: 2.346565, ppl: 10.449614 +epoch: 1, batch: 34951, sum loss: 3986.507568, avg loss: 2.581935, ppl: 13.222702 +epoch: 1, batch: 34952, sum loss: 5647.957520, avg loss: 2.715364, ppl: 15.110112 +epoch: 1, batch: 34953, sum loss: 4518.735352, avg loss: 2.383299, ppl: 10.840611 +epoch: 1, batch: 34954, sum loss: 5000.416504, avg loss: 2.810802, ppl: 16.623240 +epoch: 1, batch: 34955, sum loss: 4598.864258, avg loss: 2.692543, ppl: 14.769190 +epoch: 1, batch: 34956, sum loss: 4855.472168, avg loss: 2.966080, ppl: 19.415655 +epoch: 1, batch: 34957, sum loss: 4515.745605, avg loss: 2.542650, ppl: 12.713310 +epoch: 1, batch: 34958, sum loss: 5279.241699, avg loss: 2.842887, ppl: 17.165253 +epoch: 1, batch: 34959, sum loss: 3626.983887, avg loss: 2.292657, ppl: 9.901212 +epoch: 1, batch: 34960, sum loss: 4057.887207, avg loss: 2.378597, ppl: 10.789757 +epoch: 1, batch: 34961, sum loss: 4238.546875, avg loss: 2.675850, ppl: 14.524696 +epoch: 1, batch: 34962, sum loss: 4047.491943, avg loss: 2.523374, ppl: 12.470603 +epoch: 1, batch: 34963, sum loss: 4817.954102, avg loss: 2.660383, ppl: 14.301772 +epoch: 1, batch: 34964, sum loss: 4459.691895, avg loss: 2.637311, ppl: 13.975566 +epoch: 1, batch: 34965, sum loss: 4095.860840, avg loss: 2.449678, ppl: 11.584613 +epoch: 1, batch: 34966, sum loss: 4017.960205, avg loss: 2.454466, ppl: 11.640212 +epoch: 1, batch: 34967, sum loss: 5277.498047, avg loss: 2.894953, ppl: 18.082644 +epoch: 1, batch: 34968, sum loss: 4244.708984, avg loss: 2.574111, ppl: 13.119648 +epoch: 1, batch: 34969, sum loss: 4740.163086, avg loss: 2.888582, ppl: 17.967813 +epoch: 1, batch: 34970, sum loss: 4679.127441, avg loss: 2.642082, ppl: 14.042409 +epoch: 1, batch: 34971, sum loss: 3765.653809, avg loss: 2.527285, ppl: 12.519464 +epoch: 1, batch: 34972, sum loss: 3978.716309, avg loss: 2.588625, ppl: 13.311455 +epoch: 1, batch: 34973, sum loss: 4857.860352, avg loss: 2.552738, ppl: 12.842217 +epoch: 1, batch: 34974, sum loss: 4528.314453, avg loss: 2.663715, ppl: 14.349494 +epoch: 1, batch: 34975, sum loss: 4158.973145, avg loss: 2.568853, ppl: 13.050848 +epoch: 1, batch: 34976, sum loss: 4811.919922, avg loss: 2.635225, ppl: 13.946445 +epoch: 1, batch: 34977, sum loss: 4410.668457, avg loss: 2.530504, ppl: 12.559834 +epoch: 1, batch: 34978, sum loss: 3674.060791, avg loss: 2.293421, ppl: 9.908780 +epoch: 1, batch: 34979, sum loss: 4075.662109, avg loss: 2.431779, ppl: 11.379112 +epoch: 1, batch: 34980, sum loss: 3265.114014, avg loss: 2.284894, ppl: 9.824650 +epoch: 1, batch: 34981, sum loss: 4257.033691, avg loss: 2.662310, ppl: 14.329350 +epoch: 1, batch: 34982, sum loss: 3731.734375, avg loss: 2.406018, ppl: 11.089717 +epoch: 1, batch: 34983, sum loss: 4945.428223, avg loss: 2.663128, ppl: 14.341073 +epoch: 1, batch: 34984, sum loss: 4422.465820, avg loss: 2.664136, ppl: 14.355543 +epoch: 1, batch: 34985, sum loss: 4704.636230, avg loss: 2.498479, ppl: 12.163980 +epoch: 1, batch: 34986, sum loss: 4359.556152, avg loss: 2.819894, ppl: 16.775074 +epoch: 1, batch: 34987, sum loss: 4036.376709, avg loss: 2.477825, ppl: 11.915319 +epoch: 1, batch: 34988, sum loss: 4166.024902, avg loss: 2.660297, ppl: 14.300534 +epoch: 1, batch: 34989, sum loss: 4700.584473, avg loss: 2.802972, ppl: 16.493599 +epoch: 1, batch: 34990, sum loss: 4279.291992, avg loss: 2.542657, ppl: 12.713408 +epoch: 1, batch: 34991, sum loss: 3968.507812, avg loss: 2.425738, ppl: 11.310577 +epoch: 1, batch: 34992, sum loss: 3725.881348, avg loss: 2.529451, ppl: 12.546618 +epoch: 1, batch: 34993, sum loss: 4095.311523, avg loss: 2.491066, ppl: 12.074134 +epoch: 1, batch: 34994, sum loss: 3737.505859, avg loss: 2.263783, ppl: 9.619412 +epoch: 1, batch: 34995, sum loss: 4610.963867, avg loss: 2.663757, ppl: 14.350102 +epoch: 1, batch: 34996, sum loss: 5289.272949, avg loss: 2.794122, ppl: 16.348269 +epoch: 1, batch: 34997, sum loss: 4399.101562, avg loss: 2.586185, ppl: 13.279021 +epoch: 1, batch: 34998, sum loss: 4203.702637, avg loss: 2.555442, ppl: 12.876994 +epoch: 1, batch: 34999, sum loss: 4544.982422, avg loss: 2.739592, ppl: 15.480661 +epoch: 1, batch: 35000, sum loss: 3970.628662, avg loss: 2.449493, ppl: 11.582475 +epoch: 1, batch: 35001, sum loss: 4463.664551, avg loss: 2.601203, ppl: 13.479947 +epoch: 1, batch: 35002, sum loss: 4031.110352, avg loss: 2.457994, ppl: 11.681357 +epoch: 1, batch: 35003, sum loss: 4450.385254, avg loss: 2.682571, ppl: 14.622639 +epoch: 1, batch: 35004, sum loss: 5668.761719, avg loss: 2.889277, ppl: 17.980305 +epoch: 1, batch: 35005, sum loss: 4485.200195, avg loss: 2.625995, ppl: 13.818322 +epoch: 1, batch: 35006, sum loss: 4212.667480, avg loss: 2.712600, ppl: 15.068402 +epoch: 1, batch: 35007, sum loss: 4790.350098, avg loss: 2.735780, ppl: 15.421764 +epoch: 1, batch: 35008, sum loss: 4855.765625, avg loss: 2.679782, ppl: 14.581920 +epoch: 1, batch: 35009, sum loss: 3780.764160, avg loss: 2.339582, ppl: 10.376895 +epoch: 1, batch: 35010, sum loss: 3575.476074, avg loss: 2.311232, ppl: 10.086844 +epoch: 1, batch: 35011, sum loss: 4708.883789, avg loss: 2.673983, ppl: 14.497596 +epoch: 1, batch: 35012, sum loss: 4292.850586, avg loss: 2.649908, ppl: 14.152734 +epoch: 1, batch: 35013, sum loss: 3702.318848, avg loss: 2.549806, ppl: 12.804624 +epoch: 1, batch: 35014, sum loss: 4401.320312, avg loss: 2.716865, ppl: 15.132800 +epoch: 1, batch: 35015, sum loss: 4599.740234, avg loss: 2.724965, ppl: 15.255874 +epoch: 1, batch: 35016, sum loss: 4356.401367, avg loss: 2.641844, ppl: 14.039072 +epoch: 1, batch: 35017, sum loss: 4954.643555, avg loss: 2.858998, ppl: 17.444036 +epoch: 1, batch: 35018, sum loss: 4063.961426, avg loss: 2.570500, ppl: 13.072364 +epoch: 1, batch: 35019, sum loss: 4595.337891, avg loss: 2.597704, ppl: 13.432859 +epoch: 1, batch: 35020, sum loss: 4811.617676, avg loss: 2.639395, ppl: 14.004735 +epoch: 1, batch: 35021, sum loss: 5055.325195, avg loss: 2.915412, ppl: 18.456423 +epoch: 1, batch: 35022, sum loss: 4827.833496, avg loss: 2.797122, ppl: 16.397394 +epoch: 1, batch: 35023, sum loss: 4369.365234, avg loss: 2.611695, ppl: 13.622118 +epoch: 1, batch: 35024, sum loss: 4754.814941, avg loss: 2.470034, ppl: 11.822845 +epoch: 1, batch: 35025, sum loss: 5223.964844, avg loss: 2.598988, ppl: 13.450114 +epoch: 1, batch: 35026, sum loss: 5172.296387, avg loss: 2.780805, ppl: 16.131996 +epoch: 1, batch: 35027, sum loss: 4107.203125, avg loss: 2.369996, ppl: 10.697348 +epoch: 1, batch: 35028, sum loss: 5179.692871, avg loss: 2.841302, ppl: 17.138060 +epoch: 1, batch: 35029, sum loss: 4503.939453, avg loss: 2.696970, ppl: 14.834711 +epoch: 1, batch: 35030, sum loss: 4350.190918, avg loss: 2.569516, ppl: 13.059505 +epoch: 1, batch: 35031, sum loss: 5341.947754, avg loss: 2.775038, ppl: 16.039232 +epoch: 1, batch: 35032, sum loss: 3976.867920, avg loss: 2.447303, ppl: 11.557138 +epoch: 1, batch: 35033, sum loss: 5680.220215, avg loss: 2.798138, ppl: 16.414057 +epoch: 1, batch: 35034, sum loss: 4748.159180, avg loss: 2.672009, ppl: 14.469001 +epoch: 1, batch: 35035, sum loss: 5290.725586, avg loss: 2.662670, ppl: 14.334510 +epoch: 1, batch: 35036, sum loss: 3789.701904, avg loss: 2.470471, ppl: 11.828012 +epoch: 1, batch: 35037, sum loss: 3923.001953, avg loss: 2.336511, ppl: 10.345078 +epoch: 1, batch: 35038, sum loss: 5530.299316, avg loss: 3.117418, ppl: 22.587978 +epoch: 1, batch: 35039, sum loss: 4001.914307, avg loss: 2.386353, ppl: 10.873768 +epoch: 1, batch: 35040, sum loss: 5067.316406, avg loss: 2.788837, ppl: 16.262091 +epoch: 1, batch: 35041, sum loss: 4779.207031, avg loss: 2.741943, ppl: 15.517107 +epoch: 1, batch: 35042, sum loss: 4505.270020, avg loss: 2.752150, ppl: 15.676305 +epoch: 1, batch: 35043, sum loss: 5793.351562, avg loss: 2.998629, ppl: 20.058025 +epoch: 1, batch: 35044, sum loss: 4507.694336, avg loss: 2.508456, ppl: 12.285940 +epoch: 1, batch: 35045, sum loss: 4767.775391, avg loss: 2.592592, ppl: 13.364361 +epoch: 1, batch: 35046, sum loss: 4493.159668, avg loss: 2.655532, ppl: 14.232551 +epoch: 1, batch: 35047, sum loss: 5490.067871, avg loss: 2.903262, ppl: 18.233521 +epoch: 1, batch: 35048, sum loss: 4986.666504, avg loss: 2.695495, ppl: 14.812855 +epoch: 1, batch: 35049, sum loss: 4187.521484, avg loss: 2.436022, ppl: 11.427489 +epoch: 1, batch: 35050, sum loss: 5630.480469, avg loss: 2.671006, ppl: 14.454503 +epoch: 1, batch: 35051, sum loss: 5561.478516, avg loss: 2.828829, ppl: 16.925634 +epoch: 1, batch: 35052, sum loss: 5394.104980, avg loss: 2.582147, ppl: 13.225501 +epoch: 1, batch: 35053, sum loss: 3780.675293, avg loss: 2.466194, ppl: 11.777535 +epoch: 1, batch: 35054, sum loss: 4100.525391, avg loss: 2.504903, ppl: 12.242367 +epoch: 1, batch: 35055, sum loss: 4425.613281, avg loss: 2.503175, ppl: 12.221235 +epoch: 1, batch: 35056, sum loss: 5100.519531, avg loss: 2.830477, ppl: 16.953547 +epoch: 1, batch: 35057, sum loss: 4506.215820, avg loss: 2.616850, ppl: 13.692523 +epoch: 1, batch: 35058, sum loss: 4619.908691, avg loss: 2.776387, ppl: 16.060896 +epoch: 1, batch: 35059, sum loss: 2980.491699, avg loss: 2.101898, ppl: 8.181685 +epoch: 1, batch: 35060, sum loss: 4795.160645, avg loss: 2.696941, ppl: 14.834283 +epoch: 1, batch: 35061, sum loss: 4058.924805, avg loss: 2.437793, ppl: 11.447745 +epoch: 1, batch: 35062, sum loss: 5633.619629, avg loss: 2.855357, ppl: 17.380644 +epoch: 1, batch: 35063, sum loss: 5030.928223, avg loss: 2.812145, ppl: 16.645592 +epoch: 1, batch: 35064, sum loss: 4328.631348, avg loss: 2.595103, ppl: 13.397964 +epoch: 1, batch: 35065, sum loss: 4579.422852, avg loss: 2.414034, ppl: 11.178967 +epoch: 1, batch: 35066, sum loss: 4140.032227, avg loss: 2.409798, ppl: 11.131708 +epoch: 1, batch: 35067, sum loss: 3822.150879, avg loss: 2.132897, ppl: 8.439280 +epoch: 1, batch: 35068, sum loss: 4809.593750, avg loss: 2.791407, ppl: 16.303938 +epoch: 1, batch: 35069, sum loss: 4365.353516, avg loss: 2.701333, ppl: 14.899573 +epoch: 1, batch: 35070, sum loss: 3319.520508, avg loss: 2.426550, ppl: 11.319763 +epoch: 1, batch: 35071, sum loss: 4799.391602, avg loss: 2.664848, ppl: 14.365767 +epoch: 1, batch: 35072, sum loss: 4233.234375, avg loss: 2.637529, ppl: 13.978621 +epoch: 1, batch: 35073, sum loss: 4764.389648, avg loss: 2.553263, ppl: 12.848968 +epoch: 1, batch: 35074, sum loss: 4299.030762, avg loss: 2.658646, ppl: 14.276947 +epoch: 1, batch: 35075, sum loss: 4108.730469, avg loss: 2.676697, ppl: 14.537002 +epoch: 1, batch: 35076, sum loss: 4487.320312, avg loss: 2.768242, ppl: 15.930601 +epoch: 1, batch: 35077, sum loss: 4267.061035, avg loss: 2.633988, ppl: 13.929214 +epoch: 1, batch: 35078, sum loss: 4148.305664, avg loss: 2.546535, ppl: 12.762804 +epoch: 1, batch: 35079, sum loss: 3877.770020, avg loss: 2.343064, ppl: 10.413089 +epoch: 1, batch: 35080, sum loss: 4362.266602, avg loss: 2.640597, ppl: 14.021577 +epoch: 1, batch: 35081, sum loss: 3982.080811, avg loss: 2.707057, ppl: 14.985109 +epoch: 1, batch: 35082, sum loss: 4193.190918, avg loss: 2.373057, ppl: 10.730141 +epoch: 1, batch: 35083, sum loss: 3874.294189, avg loss: 2.373955, ppl: 10.739781 +epoch: 1, batch: 35084, sum loss: 4729.777344, avg loss: 2.762720, ppl: 15.842882 +epoch: 1, batch: 35085, sum loss: 4252.567383, avg loss: 2.740057, ppl: 15.487861 +epoch: 1, batch: 35086, sum loss: 5061.909180, avg loss: 2.867937, ppl: 17.600676 +epoch: 1, batch: 35087, sum loss: 4031.902100, avg loss: 2.467505, ppl: 11.792981 +epoch: 1, batch: 35088, sum loss: 4284.558594, avg loss: 2.545786, ppl: 12.753253 +epoch: 1, batch: 35089, sum loss: 4087.177002, avg loss: 2.537044, ppl: 12.642240 +epoch: 1, batch: 35090, sum loss: 4400.971680, avg loss: 2.662415, ppl: 14.330853 +epoch: 1, batch: 35091, sum loss: 4188.631836, avg loss: 2.577620, ppl: 13.165760 +epoch: 1, batch: 35092, sum loss: 3856.687012, avg loss: 2.663458, ppl: 14.345809 +epoch: 1, batch: 35093, sum loss: 4676.775879, avg loss: 2.660282, ppl: 14.300320 +epoch: 1, batch: 35094, sum loss: 5194.494629, avg loss: 2.749865, ppl: 15.640517 +epoch: 1, batch: 35095, sum loss: 4489.163574, avg loss: 2.437114, ppl: 11.439975 +epoch: 1, batch: 35096, sum loss: 5085.249023, avg loss: 2.576114, ppl: 13.145953 +epoch: 1, batch: 35097, sum loss: 2720.886475, avg loss: 2.109214, ppl: 8.241763 +epoch: 1, batch: 35098, sum loss: 4824.209961, avg loss: 2.646303, ppl: 14.101807 +epoch: 1, batch: 35099, sum loss: 4837.531738, avg loss: 2.647801, ppl: 14.122944 +epoch: 1, batch: 35100, sum loss: 4240.305176, avg loss: 2.449627, ppl: 11.584030 +epoch: 1, batch: 35101, sum loss: 5799.007812, avg loss: 2.867956, ppl: 17.601007 +epoch: 1, batch: 35102, sum loss: 3554.279541, avg loss: 2.318512, ppl: 10.160548 +epoch: 1, batch: 35103, sum loss: 4571.953125, avg loss: 2.635132, ppl: 13.945148 +epoch: 1, batch: 35104, sum loss: 3749.493652, avg loss: 2.526613, ppl: 12.511060 +epoch: 1, batch: 35105, sum loss: 4844.750977, avg loss: 2.663414, ppl: 14.345187 +epoch: 1, batch: 35106, sum loss: 4101.265137, avg loss: 2.680566, ppl: 14.593345 +epoch: 1, batch: 35107, sum loss: 4611.925293, avg loss: 2.655110, ppl: 14.226546 +epoch: 1, batch: 35108, sum loss: 4144.580078, avg loss: 2.436555, ppl: 11.433583 +epoch: 1, batch: 35109, sum loss: 5625.875488, avg loss: 2.745669, ppl: 15.575028 +epoch: 1, batch: 35110, sum loss: 4544.870605, avg loss: 2.616506, ppl: 13.687813 +epoch: 1, batch: 35111, sum loss: 4407.439453, avg loss: 2.553557, ppl: 12.852738 +epoch: 1, batch: 35112, sum loss: 5125.430664, avg loss: 2.788591, ppl: 16.258099 +epoch: 1, batch: 35113, sum loss: 4191.713379, avg loss: 2.548154, ppl: 12.783483 +epoch: 1, batch: 35114, sum loss: 3636.141357, avg loss: 2.505955, ppl: 12.255262 +epoch: 1, batch: 35115, sum loss: 4796.907227, avg loss: 2.840087, ppl: 17.117258 +epoch: 1, batch: 35116, sum loss: 5214.729492, avg loss: 2.762039, ppl: 15.832090 +epoch: 1, batch: 35117, sum loss: 5521.666016, avg loss: 2.838903, ppl: 17.096998 +epoch: 1, batch: 35118, sum loss: 4700.491211, avg loss: 2.561576, ppl: 12.956216 +epoch: 1, batch: 35119, sum loss: 4637.635742, avg loss: 2.713655, ppl: 15.084305 +epoch: 1, batch: 35120, sum loss: 3978.135742, avg loss: 2.344217, ppl: 10.425105 +epoch: 1, batch: 35121, sum loss: 3572.124512, avg loss: 2.340842, ppl: 10.389978 +epoch: 1, batch: 35122, sum loss: 4336.329102, avg loss: 2.647331, ppl: 14.116319 +epoch: 1, batch: 35123, sum loss: 3936.145020, avg loss: 2.351341, ppl: 10.499643 +epoch: 1, batch: 35124, sum loss: 4769.667969, avg loss: 2.774676, ppl: 16.033432 +epoch: 1, batch: 35125, sum loss: 4499.377441, avg loss: 2.710468, ppl: 15.036315 +epoch: 1, batch: 35126, sum loss: 4177.528320, avg loss: 2.583506, ppl: 13.243484 +epoch: 1, batch: 35127, sum loss: 4584.454102, avg loss: 2.684107, ppl: 14.645111 +epoch: 1, batch: 35128, sum loss: 4559.125977, avg loss: 2.726750, ppl: 15.283138 +epoch: 1, batch: 35129, sum loss: 3815.098145, avg loss: 2.366686, ppl: 10.662001 +epoch: 1, batch: 35130, sum loss: 4861.631836, avg loss: 2.544025, ppl: 12.730809 +epoch: 1, batch: 35131, sum loss: 4374.346680, avg loss: 2.561093, ppl: 12.949962 +epoch: 1, batch: 35132, sum loss: 4434.137207, avg loss: 2.597620, ppl: 13.431732 +epoch: 1, batch: 35133, sum loss: 3819.239746, avg loss: 2.486484, ppl: 12.018947 +epoch: 1, batch: 35134, sum loss: 4410.026855, avg loss: 2.771858, ppl: 15.988320 +epoch: 1, batch: 35135, sum loss: 3952.031738, avg loss: 2.465397, ppl: 11.768154 +epoch: 1, batch: 35136, sum loss: 4465.033203, avg loss: 2.893735, ppl: 18.060644 +epoch: 1, batch: 35137, sum loss: 5492.858887, avg loss: 3.024702, ppl: 20.587866 +epoch: 1, batch: 35138, sum loss: 3742.332764, avg loss: 2.449171, ppl: 11.578739 +epoch: 1, batch: 35139, sum loss: 4923.865723, avg loss: 2.681844, ppl: 14.612013 +epoch: 1, batch: 35140, sum loss: 4754.778320, avg loss: 2.750016, ppl: 15.642889 +epoch: 1, batch: 35141, sum loss: 3357.298828, avg loss: 2.262331, ppl: 9.605452 +epoch: 1, batch: 35142, sum loss: 4365.073730, avg loss: 2.511550, ppl: 12.324017 +epoch: 1, batch: 35143, sum loss: 4537.558105, avg loss: 2.504171, ppl: 12.233415 +epoch: 1, batch: 35144, sum loss: 3996.610352, avg loss: 2.601960, ppl: 13.490152 +epoch: 1, batch: 35145, sum loss: 4456.210449, avg loss: 2.633694, ppl: 13.925117 +epoch: 1, batch: 35146, sum loss: 4766.014160, avg loss: 2.577617, ppl: 13.165729 +epoch: 1, batch: 35147, sum loss: 4676.234863, avg loss: 2.696790, ppl: 14.832051 +epoch: 1, batch: 35148, sum loss: 3880.040527, avg loss: 2.521144, ppl: 12.442822 +epoch: 1, batch: 35149, sum loss: 5159.123535, avg loss: 2.811512, ppl: 16.635044 +epoch: 1, batch: 35150, sum loss: 4159.778320, avg loss: 2.387932, ppl: 10.890951 +epoch: 1, batch: 35151, sum loss: 3830.637451, avg loss: 2.201516, ppl: 9.038703 +epoch: 1, batch: 35152, sum loss: 4386.075195, avg loss: 2.601468, ppl: 13.483519 +epoch: 1, batch: 35153, sum loss: 4545.050781, avg loss: 2.705387, ppl: 14.960111 +epoch: 1, batch: 35154, sum loss: 4218.603516, avg loss: 2.558280, ppl: 12.913584 +epoch: 1, batch: 35155, sum loss: 4373.201660, avg loss: 2.575502, ppl: 13.137906 +epoch: 1, batch: 35156, sum loss: 4077.836426, avg loss: 2.495616, ppl: 12.129202 +epoch: 1, batch: 35157, sum loss: 4556.329590, avg loss: 2.661407, ppl: 14.316425 +epoch: 1, batch: 35158, sum loss: 4730.066406, avg loss: 2.734142, ppl: 15.396532 +epoch: 1, batch: 35159, sum loss: 4284.277344, avg loss: 2.579336, ppl: 13.188380 +epoch: 1, batch: 35160, sum loss: 3528.087891, avg loss: 2.444967, ppl: 11.530172 +epoch: 1, batch: 35161, sum loss: 5653.423340, avg loss: 2.975486, ppl: 19.599152 +epoch: 1, batch: 35162, sum loss: 5100.052734, avg loss: 2.805310, ppl: 16.532192 +epoch: 1, batch: 35163, sum loss: 4483.553711, avg loss: 2.481214, ppl: 11.955768 +epoch: 1, batch: 35164, sum loss: 4634.630859, avg loss: 2.550705, ppl: 12.816135 +epoch: 1, batch: 35165, sum loss: 3964.103027, avg loss: 2.337325, ppl: 10.353502 +epoch: 1, batch: 35166, sum loss: 4488.385254, avg loss: 2.482514, ppl: 11.971321 +epoch: 1, batch: 35167, sum loss: 4155.692871, avg loss: 2.577973, ppl: 13.170416 +epoch: 1, batch: 35168, sum loss: 4155.907227, avg loss: 2.472283, ppl: 11.849464 +epoch: 1, batch: 35169, sum loss: 5237.850586, avg loss: 2.792031, ppl: 16.314125 +epoch: 1, batch: 35170, sum loss: 3548.624023, avg loss: 2.370490, ppl: 10.702639 +epoch: 1, batch: 35171, sum loss: 5503.300293, avg loss: 2.786481, ppl: 16.223829 +epoch: 1, batch: 35172, sum loss: 4186.229980, avg loss: 2.484410, ppl: 11.994037 +epoch: 1, batch: 35173, sum loss: 5093.181641, avg loss: 2.594591, ppl: 13.391108 +epoch: 1, batch: 35174, sum loss: 4461.133789, avg loss: 2.520415, ppl: 12.433750 +epoch: 1, batch: 35175, sum loss: 4121.349121, avg loss: 2.487235, ppl: 12.027976 +epoch: 1, batch: 35176, sum loss: 4222.970215, avg loss: 2.491428, ppl: 12.078510 +epoch: 1, batch: 35177, sum loss: 3793.667480, avg loss: 2.420975, ppl: 11.256829 +epoch: 1, batch: 35178, sum loss: 5286.653320, avg loss: 2.783914, ppl: 16.182240 +epoch: 1, batch: 35179, sum loss: 3606.187988, avg loss: 2.459883, ppl: 11.703439 +epoch: 1, batch: 35180, sum loss: 3967.741943, avg loss: 2.533679, ppl: 12.599779 +epoch: 1, batch: 35181, sum loss: 4238.757324, avg loss: 2.573623, ppl: 13.113247 +epoch: 1, batch: 35182, sum loss: 3622.619141, avg loss: 2.451028, ppl: 11.600265 +epoch: 1, batch: 35183, sum loss: 4480.991699, avg loss: 2.696144, ppl: 14.822471 +epoch: 1, batch: 35184, sum loss: 4172.847656, avg loss: 2.544419, ppl: 12.735830 +epoch: 1, batch: 35185, sum loss: 5351.253906, avg loss: 2.647825, ppl: 14.123287 +epoch: 1, batch: 35186, sum loss: 4139.863281, avg loss: 2.324460, ppl: 10.221160 +epoch: 1, batch: 35187, sum loss: 4032.153809, avg loss: 2.289696, ppl: 9.871931 +epoch: 1, batch: 35188, sum loss: 4088.664551, avg loss: 2.463051, ppl: 11.740579 +epoch: 1, batch: 35189, sum loss: 3842.875732, avg loss: 2.447692, ppl: 11.561628 +epoch: 1, batch: 35190, sum loss: 3872.177734, avg loss: 2.571167, ppl: 13.081084 +epoch: 1, batch: 35191, sum loss: 4789.884766, avg loss: 2.534331, ppl: 12.607988 +epoch: 1, batch: 35192, sum loss: 4876.771973, avg loss: 2.830396, ppl: 16.952171 +epoch: 1, batch: 35193, sum loss: 5817.474609, avg loss: 2.825388, ppl: 16.867491 +epoch: 1, batch: 35194, sum loss: 4330.312500, avg loss: 2.533828, ppl: 12.601656 +epoch: 1, batch: 35195, sum loss: 4327.613281, avg loss: 2.602293, ppl: 13.494646 +epoch: 1, batch: 35196, sum loss: 4281.665039, avg loss: 2.844960, ppl: 17.200874 +epoch: 1, batch: 35197, sum loss: 3772.417236, avg loss: 2.418216, ppl: 11.225817 +epoch: 1, batch: 35198, sum loss: 3868.578125, avg loss: 2.369001, ppl: 10.686707 +epoch: 1, batch: 35199, sum loss: 4086.214844, avg loss: 2.476494, ppl: 11.899469 +epoch: 1, batch: 35200, sum loss: 4674.025879, avg loss: 2.663263, ppl: 14.343008 +epoch: 1, batch: 35201, sum loss: 4709.690430, avg loss: 2.526658, ppl: 12.511623 +epoch: 1, batch: 35202, sum loss: 3890.504395, avg loss: 2.328249, ppl: 10.259963 +epoch: 1, batch: 35203, sum loss: 4046.040039, avg loss: 2.409791, ppl: 11.131639 +epoch: 1, batch: 35204, sum loss: 3752.374512, avg loss: 2.653730, ppl: 14.206934 +epoch: 1, batch: 35205, sum loss: 4411.368164, avg loss: 2.646291, ppl: 14.101645 +epoch: 1, batch: 35206, sum loss: 5861.606934, avg loss: 2.961904, ppl: 19.334742 +epoch: 1, batch: 35207, sum loss: 4388.489258, avg loss: 2.781045, ppl: 16.135878 +epoch: 1, batch: 35208, sum loss: 3513.546143, avg loss: 2.124272, ppl: 8.366805 +epoch: 1, batch: 35209, sum loss: 3780.589111, avg loss: 2.317958, ppl: 10.154916 +epoch: 1, batch: 35210, sum loss: 4100.477539, avg loss: 2.666110, ppl: 14.383911 +epoch: 1, batch: 35211, sum loss: 4406.228027, avg loss: 2.655954, ppl: 14.238564 +epoch: 1, batch: 35212, sum loss: 4540.363770, avg loss: 2.593012, ppl: 13.369983 +epoch: 1, batch: 35213, sum loss: 3316.653809, avg loss: 2.170585, ppl: 8.763409 +epoch: 1, batch: 35214, sum loss: 3773.610840, avg loss: 2.542864, ppl: 12.716042 +epoch: 1, batch: 35215, sum loss: 4537.913086, avg loss: 2.402283, ppl: 11.048373 +epoch: 1, batch: 35216, sum loss: 4299.070801, avg loss: 2.670230, ppl: 14.443293 +epoch: 1, batch: 35217, sum loss: 5038.480957, avg loss: 2.776023, ppl: 16.055038 +epoch: 1, batch: 35218, sum loss: 4139.072266, avg loss: 2.598288, ppl: 13.440705 +epoch: 1, batch: 35219, sum loss: 4535.518555, avg loss: 2.638463, ppl: 13.991689 +epoch: 1, batch: 35220, sum loss: 4581.804199, avg loss: 2.483363, ppl: 11.981486 +epoch: 1, batch: 35221, sum loss: 5219.702637, avg loss: 2.767605, ppl: 15.920456 +epoch: 1, batch: 35222, sum loss: 4119.781738, avg loss: 2.581317, ppl: 13.214530 +epoch: 1, batch: 35223, sum loss: 3524.867676, avg loss: 2.612948, ppl: 13.639206 +epoch: 1, batch: 35224, sum loss: 3888.990479, avg loss: 2.408044, ppl: 11.112200 +epoch: 1, batch: 35225, sum loss: 3850.648438, avg loss: 2.623057, ppl: 13.777783 +epoch: 1, batch: 35226, sum loss: 4073.777100, avg loss: 2.566967, ppl: 13.026262 +epoch: 1, batch: 35227, sum loss: 3649.117188, avg loss: 2.340678, ppl: 10.388280 +epoch: 1, batch: 35228, sum loss: 4326.054688, avg loss: 2.647524, ppl: 14.119041 +epoch: 1, batch: 35229, sum loss: 5415.134277, avg loss: 3.011754, ppl: 20.323021 +epoch: 1, batch: 35230, sum loss: 3198.320801, avg loss: 2.469746, ppl: 11.819440 +epoch: 1, batch: 35231, sum loss: 3665.852295, avg loss: 2.403838, ppl: 11.065561 +epoch: 1, batch: 35232, sum loss: 4154.447266, avg loss: 2.578800, ppl: 13.181317 +epoch: 1, batch: 35233, sum loss: 4185.903320, avg loss: 2.608039, ppl: 13.572414 +epoch: 1, batch: 35234, sum loss: 4407.594727, avg loss: 2.656778, ppl: 14.250302 +epoch: 1, batch: 35235, sum loss: 4588.239258, avg loss: 2.611405, ppl: 13.618176 +epoch: 1, batch: 35236, sum loss: 5039.144043, avg loss: 2.758152, ppl: 15.770676 +epoch: 1, batch: 35237, sum loss: 4605.991211, avg loss: 2.696716, ppl: 14.830948 +epoch: 1, batch: 35238, sum loss: 4640.943359, avg loss: 2.508618, ppl: 12.287938 +epoch: 1, batch: 35239, sum loss: 3188.966064, avg loss: 2.216099, ppl: 9.171479 +epoch: 1, batch: 35240, sum loss: 5304.318848, avg loss: 2.683014, ppl: 14.629122 +epoch: 1, batch: 35241, sum loss: 4490.459961, avg loss: 2.514255, ppl: 12.357403 +epoch: 1, batch: 35242, sum loss: 5555.283691, avg loss: 2.962818, ppl: 19.352428 +epoch: 1, batch: 35243, sum loss: 4280.152344, avg loss: 2.715833, ppl: 15.117193 +epoch: 1, batch: 35244, sum loss: 4256.282227, avg loss: 2.576442, ppl: 13.150270 +epoch: 1, batch: 35245, sum loss: 4952.562500, avg loss: 2.621791, ppl: 13.760342 +epoch: 1, batch: 35246, sum loss: 5467.244629, avg loss: 2.885089, ppl: 17.905169 +epoch: 1, batch: 35247, sum loss: 4069.278320, avg loss: 2.600178, ppl: 13.466131 +epoch: 1, batch: 35248, sum loss: 4253.246094, avg loss: 2.628706, ppl: 13.855825 +epoch: 1, batch: 35249, sum loss: 5155.673340, avg loss: 2.738010, ppl: 15.456200 +epoch: 1, batch: 35250, sum loss: 4290.812012, avg loss: 2.458918, ppl: 11.692155 +epoch: 1, batch: 35251, sum loss: 5409.935547, avg loss: 2.857863, ppl: 17.424259 +epoch: 1, batch: 35252, sum loss: 4836.179688, avg loss: 2.552074, ppl: 12.833690 +epoch: 1, batch: 35253, sum loss: 4702.778809, avg loss: 2.621393, ppl: 13.754870 +epoch: 1, batch: 35254, sum loss: 4275.939941, avg loss: 2.451800, ppl: 11.609228 +epoch: 1, batch: 35255, sum loss: 4854.259277, avg loss: 2.747176, ppl: 15.598515 +epoch: 1, batch: 35256, sum loss: 3944.375488, avg loss: 2.523593, ppl: 12.473330 +epoch: 1, batch: 35257, sum loss: 3739.557129, avg loss: 2.468355, ppl: 11.803011 +epoch: 1, batch: 35258, sum loss: 4953.225586, avg loss: 2.721553, ppl: 15.203910 +epoch: 1, batch: 35259, sum loss: 4380.821777, avg loss: 2.767417, ppl: 15.917469 +epoch: 1, batch: 35260, sum loss: 4506.733398, avg loss: 2.519135, ppl: 12.417856 +epoch: 1, batch: 35261, sum loss: 5044.579102, avg loss: 2.777852, ppl: 16.084431 +epoch: 1, batch: 35262, sum loss: 4362.371582, avg loss: 2.697818, ppl: 14.847301 +epoch: 1, batch: 35263, sum loss: 4587.407227, avg loss: 2.483707, ppl: 11.985618 +epoch: 1, batch: 35264, sum loss: 5343.839844, avg loss: 2.853091, ppl: 17.341305 +epoch: 1, batch: 35265, sum loss: 4388.819336, avg loss: 2.634345, ppl: 13.934187 +epoch: 1, batch: 35266, sum loss: 4106.720215, avg loss: 2.530327, ppl: 12.557607 +epoch: 1, batch: 35267, sum loss: 4684.458984, avg loss: 2.642109, ppl: 14.042788 +epoch: 1, batch: 35268, sum loss: 4895.621094, avg loss: 2.564495, ppl: 12.994096 +epoch: 1, batch: 35269, sum loss: 3958.165527, avg loss: 2.592119, ppl: 13.358047 +epoch: 1, batch: 35270, sum loss: 4371.005371, avg loss: 2.660381, ppl: 14.301731 +epoch: 1, batch: 35271, sum loss: 4376.357910, avg loss: 2.505070, ppl: 12.244418 +epoch: 1, batch: 35272, sum loss: 4696.437988, avg loss: 2.700655, ppl: 14.889474 +epoch: 1, batch: 35273, sum loss: 4806.571289, avg loss: 2.568985, ppl: 13.052572 +epoch: 1, batch: 35274, sum loss: 4419.502930, avg loss: 2.647995, ppl: 14.125685 +epoch: 1, batch: 35275, sum loss: 3714.530762, avg loss: 2.443770, ppl: 11.516380 +epoch: 1, batch: 35276, sum loss: 4584.395020, avg loss: 2.566851, ppl: 13.024740 +epoch: 1, batch: 35277, sum loss: 4654.981934, avg loss: 2.626965, ppl: 13.831727 +epoch: 1, batch: 35278, sum loss: 5259.425781, avg loss: 2.729334, ppl: 15.322673 +epoch: 1, batch: 35279, sum loss: 4524.606934, avg loss: 2.553390, ppl: 12.850594 +epoch: 1, batch: 35280, sum loss: 5215.652832, avg loss: 2.735004, ppl: 15.409808 +epoch: 1, batch: 35281, sum loss: 5354.048340, avg loss: 2.599052, ppl: 13.450986 +epoch: 1, batch: 35282, sum loss: 4115.583496, avg loss: 2.609755, ppl: 13.595720 +epoch: 1, batch: 35283, sum loss: 4715.005859, avg loss: 2.819980, ppl: 16.776510 +epoch: 1, batch: 35284, sum loss: 4214.074219, avg loss: 2.520380, ppl: 12.433314 +epoch: 1, batch: 35285, sum loss: 4145.964355, avg loss: 2.678272, ppl: 14.559905 +epoch: 1, batch: 35286, sum loss: 4491.362305, avg loss: 2.534629, ppl: 12.611750 +epoch: 1, batch: 35287, sum loss: 4676.268066, avg loss: 2.795139, ppl: 16.364908 +epoch: 1, batch: 35288, sum loss: 4101.395020, avg loss: 2.614018, ppl: 13.653808 +epoch: 1, batch: 35289, sum loss: 4434.208008, avg loss: 2.462081, ppl: 11.729194 +epoch: 1, batch: 35290, sum loss: 4813.727051, avg loss: 2.575563, ppl: 13.138708 +epoch: 1, batch: 35291, sum loss: 4008.766357, avg loss: 2.554982, ppl: 12.871064 +epoch: 1, batch: 35292, sum loss: 3869.977539, avg loss: 2.409699, ppl: 11.130615 +epoch: 1, batch: 35293, sum loss: 4402.579590, avg loss: 2.594331, ppl: 13.387629 +epoch: 1, batch: 35294, sum loss: 3857.472656, avg loss: 2.775160, ppl: 16.041195 +epoch: 1, batch: 35295, sum loss: 3926.440918, avg loss: 2.381104, ppl: 10.816840 +epoch: 1, batch: 35296, sum loss: 6045.957520, avg loss: 2.795172, ppl: 16.365446 +epoch: 1, batch: 35297, sum loss: 3927.049561, avg loss: 2.345908, ppl: 10.442750 +epoch: 1, batch: 35298, sum loss: 4955.161621, avg loss: 2.655499, ppl: 14.232089 +epoch: 1, batch: 35299, sum loss: 4675.954102, avg loss: 2.796623, ppl: 16.389215 +epoch: 1, batch: 35300, sum loss: 4884.385742, avg loss: 2.589812, ppl: 13.327270 +epoch: 1, batch: 35301, sum loss: 3843.881592, avg loss: 2.572879, ppl: 13.103499 +epoch: 1, batch: 35302, sum loss: 4909.070312, avg loss: 2.713693, ppl: 15.084880 +epoch: 1, batch: 35303, sum loss: 4011.550537, avg loss: 2.583098, ppl: 13.238082 +epoch: 1, batch: 35304, sum loss: 5144.058105, avg loss: 2.978609, ppl: 19.660456 +epoch: 1, batch: 35305, sum loss: 4843.905762, avg loss: 2.771113, ppl: 15.976412 +epoch: 1, batch: 35306, sum loss: 4609.194824, avg loss: 2.769949, ppl: 15.957820 +epoch: 1, batch: 35307, sum loss: 3627.212891, avg loss: 2.641815, ppl: 14.038667 +epoch: 1, batch: 35308, sum loss: 4288.416992, avg loss: 2.813922, ppl: 16.675190 +epoch: 1, batch: 35309, sum loss: 3941.407471, avg loss: 2.432968, ppl: 11.392641 +epoch: 1, batch: 35310, sum loss: 4816.371582, avg loss: 2.777607, ppl: 16.080502 +epoch: 1, batch: 35311, sum loss: 4074.696777, avg loss: 2.600317, ppl: 13.468010 +epoch: 1, batch: 35312, sum loss: 3930.096680, avg loss: 2.500062, ppl: 12.183244 +epoch: 1, batch: 35313, sum loss: 4719.129883, avg loss: 2.610138, ppl: 13.600930 +epoch: 1, batch: 35314, sum loss: 4535.245605, avg loss: 2.666223, ppl: 14.385533 +epoch: 1, batch: 35315, sum loss: 5625.926270, avg loss: 3.045981, ppl: 21.030651 +epoch: 1, batch: 35316, sum loss: 4445.383301, avg loss: 2.560705, ppl: 12.944937 +epoch: 1, batch: 35317, sum loss: 4558.174316, avg loss: 2.654732, ppl: 14.221170 +epoch: 1, batch: 35318, sum loss: 5385.708008, avg loss: 2.706386, ppl: 14.975056 +epoch: 1, batch: 35319, sum loss: 3768.492188, avg loss: 2.476013, ppl: 11.893754 +epoch: 1, batch: 35320, sum loss: 3951.508545, avg loss: 2.541163, ppl: 12.694428 +epoch: 1, batch: 35321, sum loss: 4481.325195, avg loss: 2.823771, ppl: 16.840244 +epoch: 1, batch: 35322, sum loss: 3515.458008, avg loss: 2.444686, ppl: 11.526926 +epoch: 1, batch: 35323, sum loss: 4473.720703, avg loss: 2.500682, ppl: 12.190809 +epoch: 1, batch: 35324, sum loss: 5002.496094, avg loss: 2.690961, ppl: 14.745834 +epoch: 1, batch: 35325, sum loss: 4097.804688, avg loss: 2.730050, ppl: 15.333652 +epoch: 1, batch: 35326, sum loss: 3915.474121, avg loss: 2.433483, ppl: 11.398513 +epoch: 1, batch: 35327, sum loss: 4043.393311, avg loss: 2.512985, ppl: 12.341718 +epoch: 1, batch: 35328, sum loss: 3570.715332, avg loss: 2.353801, ppl: 10.525496 +epoch: 1, batch: 35329, sum loss: 4481.119629, avg loss: 2.509026, ppl: 12.292946 +epoch: 1, batch: 35330, sum loss: 3923.202637, avg loss: 2.448941, ppl: 11.576076 +epoch: 1, batch: 35331, sum loss: 5438.435547, avg loss: 2.561675, ppl: 12.957500 +epoch: 1, batch: 35332, sum loss: 4858.453125, avg loss: 2.599493, ppl: 13.456917 +epoch: 1, batch: 35333, sum loss: 4991.570312, avg loss: 2.621623, ppl: 13.758039 +epoch: 1, batch: 35334, sum loss: 5514.189941, avg loss: 2.912937, ppl: 18.410789 +epoch: 1, batch: 35335, sum loss: 4318.793457, avg loss: 2.537481, ppl: 12.647775 +epoch: 1, batch: 35336, sum loss: 4629.000977, avg loss: 2.721341, ppl: 15.200691 +epoch: 1, batch: 35337, sum loss: 4067.371338, avg loss: 2.510723, ppl: 12.313831 +epoch: 1, batch: 35338, sum loss: 4708.663574, avg loss: 2.732829, ppl: 15.376319 +epoch: 1, batch: 35339, sum loss: 4497.871094, avg loss: 2.730948, ppl: 15.347425 +epoch: 1, batch: 35340, sum loss: 4250.762695, avg loss: 2.516733, ppl: 12.388063 +epoch: 1, batch: 35341, sum loss: 4033.869629, avg loss: 2.459677, ppl: 11.701026 +epoch: 1, batch: 35342, sum loss: 4698.156250, avg loss: 2.723569, ppl: 15.234596 +epoch: 1, batch: 35343, sum loss: 4430.458984, avg loss: 2.428980, ppl: 11.347298 +epoch: 1, batch: 35344, sum loss: 4960.734863, avg loss: 2.479128, ppl: 11.930855 +epoch: 1, batch: 35345, sum loss: 4892.906738, avg loss: 2.775330, ppl: 16.043922 +epoch: 1, batch: 35346, sum loss: 4665.401855, avg loss: 2.699885, ppl: 14.878026 +epoch: 1, batch: 35347, sum loss: 4573.235352, avg loss: 2.797086, ppl: 16.396793 +epoch: 1, batch: 35348, sum loss: 4620.107422, avg loss: 2.565301, ppl: 13.004575 +epoch: 1, batch: 35349, sum loss: 4321.280273, avg loss: 2.622136, ppl: 13.765096 +epoch: 1, batch: 35350, sum loss: 5149.459961, avg loss: 2.848153, ppl: 17.255875 +epoch: 1, batch: 35351, sum loss: 4999.359375, avg loss: 2.916779, ppl: 18.481663 +epoch: 1, batch: 35352, sum loss: 4286.905762, avg loss: 2.560876, ppl: 12.947149 +epoch: 1, batch: 35353, sum loss: 4579.018555, avg loss: 2.421480, ppl: 11.262515 +epoch: 1, batch: 35354, sum loss: 4300.492676, avg loss: 2.709826, ppl: 15.026653 +epoch: 1, batch: 35355, sum loss: 5772.972656, avg loss: 2.998947, ppl: 20.064392 +epoch: 1, batch: 35356, sum loss: 4914.533203, avg loss: 2.742485, ppl: 15.525522 +epoch: 1, batch: 35357, sum loss: 4927.819336, avg loss: 2.578660, ppl: 13.179466 +epoch: 1, batch: 35358, sum loss: 4868.833496, avg loss: 2.801400, ppl: 16.467688 +epoch: 1, batch: 35359, sum loss: 4959.234375, avg loss: 2.592386, ppl: 13.361614 +epoch: 1, batch: 35360, sum loss: 4367.593262, avg loss: 2.659923, ppl: 14.295186 +epoch: 1, batch: 35361, sum loss: 4435.901855, avg loss: 2.534801, ppl: 12.613920 +epoch: 1, batch: 35362, sum loss: 5203.834473, avg loss: 3.184721, ppl: 24.160553 +epoch: 1, batch: 35363, sum loss: 3926.096680, avg loss: 2.528073, ppl: 12.529334 +epoch: 1, batch: 35364, sum loss: 4742.394043, avg loss: 2.637594, ppl: 13.979528 +epoch: 1, batch: 35365, sum loss: 4486.010742, avg loss: 2.548870, ppl: 12.792635 +epoch: 1, batch: 35366, sum loss: 4765.649902, avg loss: 2.769117, ppl: 15.944543 +epoch: 1, batch: 35367, sum loss: 4559.412598, avg loss: 2.447350, ppl: 11.557675 +epoch: 1, batch: 35368, sum loss: 3856.293701, avg loss: 2.364374, ppl: 10.637377 +epoch: 1, batch: 35369, sum loss: 5064.365723, avg loss: 2.550033, ppl: 12.807528 +epoch: 1, batch: 35370, sum loss: 4468.094238, avg loss: 2.766622, ppl: 15.904814 +epoch: 1, batch: 35371, sum loss: 3971.117188, avg loss: 2.369402, ppl: 10.690994 +epoch: 1, batch: 35372, sum loss: 5128.082520, avg loss: 2.717585, ppl: 15.143703 +epoch: 1, batch: 35373, sum loss: 5655.603516, avg loss: 3.052134, ppl: 21.160444 +epoch: 1, batch: 35374, sum loss: 3756.809082, avg loss: 2.412851, ppl: 11.165751 +epoch: 1, batch: 35375, sum loss: 4336.257812, avg loss: 2.613778, ppl: 13.650527 +epoch: 1, batch: 35376, sum loss: 4492.810547, avg loss: 2.822117, ppl: 16.812407 +epoch: 1, batch: 35377, sum loss: 4375.140625, avg loss: 2.615147, ppl: 13.669224 +epoch: 1, batch: 35378, sum loss: 4840.284668, avg loss: 2.860688, ppl: 17.473553 +epoch: 1, batch: 35379, sum loss: 5229.381348, avg loss: 2.722219, ppl: 15.214037 +epoch: 1, batch: 35380, sum loss: 4222.086914, avg loss: 2.617537, ppl: 13.701931 +epoch: 1, batch: 35381, sum loss: 4617.286621, avg loss: 2.662795, ppl: 14.336304 +epoch: 1, batch: 35382, sum loss: 3324.082031, avg loss: 2.294053, ppl: 9.915038 +epoch: 1, batch: 35383, sum loss: 4381.745117, avg loss: 2.660440, ppl: 14.302584 +epoch: 1, batch: 35384, sum loss: 4105.719727, avg loss: 2.618444, ppl: 13.714364 +epoch: 1, batch: 35385, sum loss: 4098.604492, avg loss: 2.618917, ppl: 13.720849 +epoch: 1, batch: 35386, sum loss: 4832.926758, avg loss: 2.674558, ppl: 14.505939 +epoch: 1, batch: 35387, sum loss: 4058.424316, avg loss: 2.345910, ppl: 10.442772 +epoch: 1, batch: 35388, sum loss: 3710.360840, avg loss: 2.696483, ppl: 14.827494 +epoch: 1, batch: 35389, sum loss: 5241.749512, avg loss: 2.721573, ppl: 15.204222 +epoch: 1, batch: 35390, sum loss: 4350.289551, avg loss: 2.574136, ppl: 13.119974 +epoch: 1, batch: 35391, sum loss: 4158.298340, avg loss: 2.576393, ppl: 13.149624 +epoch: 1, batch: 35392, sum loss: 4251.699707, avg loss: 2.579915, ppl: 13.196013 +epoch: 1, batch: 35393, sum loss: 5005.487793, avg loss: 2.615197, ppl: 13.669915 +epoch: 1, batch: 35394, sum loss: 5458.462891, avg loss: 2.711606, ppl: 15.053432 +epoch: 1, batch: 35395, sum loss: 3920.572266, avg loss: 2.620703, ppl: 13.745390 +epoch: 1, batch: 35396, sum loss: 4046.989258, avg loss: 2.390425, ppl: 10.918133 +epoch: 1, batch: 35397, sum loss: 4670.888672, avg loss: 2.596380, ppl: 13.415093 +epoch: 1, batch: 35398, sum loss: 4556.712402, avg loss: 2.707494, ppl: 14.991659 +epoch: 1, batch: 35399, sum loss: 5240.538086, avg loss: 2.688834, ppl: 14.714515 +epoch: 1, batch: 35400, sum loss: 4934.465820, avg loss: 2.520156, ppl: 12.430540 +epoch: 1, batch: 35401, sum loss: 4597.211426, avg loss: 2.613537, ppl: 13.647237 +epoch: 1, batch: 35402, sum loss: 4582.949707, avg loss: 2.541847, ppl: 12.703109 +epoch: 1, batch: 35403, sum loss: 4452.314453, avg loss: 2.632948, ppl: 13.914725 +epoch: 1, batch: 35404, sum loss: 4577.931641, avg loss: 2.505710, ppl: 12.252251 +epoch: 1, batch: 35405, sum loss: 3848.505615, avg loss: 2.385930, ppl: 10.869169 +epoch: 1, batch: 35406, sum loss: 4677.115234, avg loss: 2.705099, ppl: 14.955791 +epoch: 1, batch: 35407, sum loss: 4085.537598, avg loss: 2.590702, ppl: 13.339136 +epoch: 1, batch: 35408, sum loss: 3687.391113, avg loss: 2.395966, ppl: 10.978794 +epoch: 1, batch: 35409, sum loss: 3838.552734, avg loss: 2.353497, ppl: 10.522297 +epoch: 1, batch: 35410, sum loss: 4603.777832, avg loss: 2.642812, ppl: 14.052658 +epoch: 1, batch: 35411, sum loss: 3528.786377, avg loss: 2.306396, ppl: 10.038187 +epoch: 1, batch: 35412, sum loss: 4615.885742, avg loss: 2.789055, ppl: 16.265640 +epoch: 1, batch: 35413, sum loss: 4362.825684, avg loss: 2.566368, ppl: 13.018456 +epoch: 1, batch: 35414, sum loss: 4946.384277, avg loss: 2.751048, ppl: 15.659032 +epoch: 1, batch: 35415, sum loss: 5022.277832, avg loss: 2.889688, ppl: 17.987698 +epoch: 1, batch: 35416, sum loss: 4314.849609, avg loss: 2.605586, ppl: 13.539151 +epoch: 1, batch: 35417, sum loss: 4858.719727, avg loss: 2.787561, ppl: 16.241365 +epoch: 1, batch: 35418, sum loss: 4606.759277, avg loss: 2.761846, ppl: 15.829038 +epoch: 1, batch: 35419, sum loss: 4427.181641, avg loss: 2.521174, ppl: 12.443199 +epoch: 1, batch: 35420, sum loss: 4039.579102, avg loss: 2.377622, ppl: 10.779236 +epoch: 1, batch: 35421, sum loss: 4669.290527, avg loss: 2.738587, ppl: 15.465116 +epoch: 1, batch: 35422, sum loss: 3910.871826, avg loss: 2.541177, ppl: 12.694607 +epoch: 1, batch: 35423, sum loss: 4701.354980, avg loss: 2.600307, ppl: 13.467872 +epoch: 1, batch: 35424, sum loss: 4309.224121, avg loss: 2.588123, ppl: 13.304770 +epoch: 1, batch: 35425, sum loss: 5161.093262, avg loss: 2.697906, ppl: 14.848599 +epoch: 1, batch: 35426, sum loss: 3926.198242, avg loss: 2.798431, ppl: 16.418859 +epoch: 1, batch: 35427, sum loss: 4261.228027, avg loss: 2.556226, ppl: 12.887084 +epoch: 1, batch: 35428, sum loss: 4284.892578, avg loss: 2.529453, ppl: 12.546636 +epoch: 1, batch: 35429, sum loss: 4599.660645, avg loss: 2.694587, ppl: 14.799413 +epoch: 1, batch: 35430, sum loss: 5051.704590, avg loss: 2.778715, ppl: 16.098328 +epoch: 1, batch: 35431, sum loss: 4349.208496, avg loss: 2.607439, ppl: 13.564272 +epoch: 1, batch: 35432, sum loss: 4035.700195, avg loss: 2.522313, ppl: 12.457373 +epoch: 1, batch: 35433, sum loss: 3749.692871, avg loss: 2.414484, ppl: 11.183992 +epoch: 1, batch: 35434, sum loss: 4581.201172, avg loss: 2.640462, ppl: 14.019674 +epoch: 1, batch: 35435, sum loss: 4498.981934, avg loss: 2.526099, ppl: 12.504630 +epoch: 1, batch: 35436, sum loss: 4292.286621, avg loss: 2.602963, ppl: 13.503696 +epoch: 1, batch: 35437, sum loss: 4237.536621, avg loss: 2.513367, ppl: 12.346430 +epoch: 1, batch: 35438, sum loss: 5071.665527, avg loss: 2.700567, ppl: 14.888175 +epoch: 1, batch: 35439, sum loss: 4600.152344, avg loss: 2.733305, ppl: 15.383646 +epoch: 1, batch: 35440, sum loss: 5758.553711, avg loss: 2.877838, ppl: 17.775799 +epoch: 1, batch: 35441, sum loss: 4530.430176, avg loss: 2.584387, ppl: 13.255162 +epoch: 1, batch: 35442, sum loss: 5339.300781, avg loss: 2.622447, ppl: 13.769370 +epoch: 1, batch: 35443, sum loss: 4152.423340, avg loss: 2.621479, ppl: 13.756058 +epoch: 1, batch: 35444, sum loss: 4190.942383, avg loss: 2.792100, ppl: 16.315248 +epoch: 1, batch: 35445, sum loss: 4529.553223, avg loss: 2.775462, ppl: 16.046034 +epoch: 1, batch: 35446, sum loss: 3981.581787, avg loss: 2.424837, ppl: 11.300385 +epoch: 1, batch: 35447, sum loss: 4992.770996, avg loss: 2.488919, ppl: 12.048243 +epoch: 1, batch: 35448, sum loss: 4387.839844, avg loss: 2.501619, ppl: 12.202234 +epoch: 1, batch: 35449, sum loss: 5588.237793, avg loss: 2.964582, ppl: 19.386606 +epoch: 1, batch: 35450, sum loss: 4452.764160, avg loss: 2.619273, ppl: 13.725744 +epoch: 1, batch: 35451, sum loss: 4880.867676, avg loss: 2.818053, ppl: 16.744219 +epoch: 1, batch: 35452, sum loss: 3985.216797, avg loss: 2.452441, ppl: 11.616668 +epoch: 1, batch: 35453, sum loss: 4884.584961, avg loss: 2.685313, ppl: 14.662797 +epoch: 1, batch: 35454, sum loss: 4223.872559, avg loss: 2.391774, ppl: 10.932868 +epoch: 1, batch: 35455, sum loss: 4045.498779, avg loss: 2.565313, ppl: 13.004729 +epoch: 1, batch: 35456, sum loss: 4130.638184, avg loss: 2.560842, ppl: 12.946714 +epoch: 1, batch: 35457, sum loss: 5585.065918, avg loss: 3.130642, ppl: 22.888680 +epoch: 1, batch: 35458, sum loss: 4984.512695, avg loss: 2.627577, ppl: 13.840189 +epoch: 1, batch: 35459, sum loss: 4308.489258, avg loss: 2.600174, ppl: 13.466086 +epoch: 1, batch: 35460, sum loss: 4871.946289, avg loss: 2.699139, ppl: 14.866927 +epoch: 1, batch: 35461, sum loss: 4401.109375, avg loss: 2.549890, ppl: 12.805690 +epoch: 1, batch: 35462, sum loss: 3615.801270, avg loss: 2.486796, ppl: 12.022693 +epoch: 1, batch: 35463, sum loss: 4301.122070, avg loss: 2.617846, ppl: 13.706176 +epoch: 1, batch: 35464, sum loss: 5426.572266, avg loss: 2.856091, ppl: 17.393400 +epoch: 1, batch: 35465, sum loss: 4035.016602, avg loss: 2.348671, ppl: 10.471643 +epoch: 1, batch: 35466, sum loss: 4069.705566, avg loss: 2.513716, ppl: 12.350737 +epoch: 1, batch: 35467, sum loss: 4322.250000, avg loss: 2.518794, ppl: 12.413614 +epoch: 1, batch: 35468, sum loss: 4869.032227, avg loss: 2.747761, ppl: 15.607647 +epoch: 1, batch: 35469, sum loss: 4136.093262, avg loss: 2.622761, ppl: 13.773694 +epoch: 1, batch: 35470, sum loss: 5162.573242, avg loss: 2.702918, ppl: 14.923215 +epoch: 1, batch: 35471, sum loss: 4288.076660, avg loss: 2.515001, ppl: 12.366622 +epoch: 1, batch: 35472, sum loss: 5092.520508, avg loss: 2.708787, ppl: 15.011063 +epoch: 1, batch: 35473, sum loss: 4525.352539, avg loss: 2.629490, ppl: 13.866698 +epoch: 1, batch: 35474, sum loss: 5400.804688, avg loss: 2.832095, ppl: 16.980993 +epoch: 1, batch: 35475, sum loss: 5073.363281, avg loss: 2.537951, ppl: 12.653714 +epoch: 1, batch: 35476, sum loss: 4241.790527, avg loss: 2.456161, ppl: 11.659966 +epoch: 1, batch: 35477, sum loss: 5034.945312, avg loss: 2.811248, ppl: 16.630661 +epoch: 1, batch: 35478, sum loss: 4141.104492, avg loss: 2.457629, ppl: 11.677089 +epoch: 1, batch: 35479, sum loss: 4828.243164, avg loss: 2.715548, ppl: 15.112883 +epoch: 1, batch: 35480, sum loss: 3743.471436, avg loss: 2.495648, ppl: 12.129587 +epoch: 1, batch: 35481, sum loss: 3742.780518, avg loss: 2.326153, ppl: 10.238482 +epoch: 1, batch: 35482, sum loss: 4054.840820, avg loss: 2.576138, ppl: 13.146263 +epoch: 1, batch: 35483, sum loss: 5120.554199, avg loss: 2.723699, ppl: 15.236576 +epoch: 1, batch: 35484, sum loss: 4617.446289, avg loss: 2.559560, ppl: 12.930127 +epoch: 1, batch: 35485, sum loss: 4758.987793, avg loss: 2.454352, ppl: 11.638885 +epoch: 1, batch: 35486, sum loss: 5945.475098, avg loss: 2.796555, ppl: 16.388084 +epoch: 1, batch: 35487, sum loss: 5028.815430, avg loss: 2.891786, ppl: 18.025473 +epoch: 1, batch: 35488, sum loss: 4185.872070, avg loss: 2.473920, ppl: 11.868877 +epoch: 1, batch: 35489, sum loss: 4594.721191, avg loss: 2.825782, ppl: 16.874132 +epoch: 1, batch: 35490, sum loss: 3998.543457, avg loss: 2.554980, ppl: 12.871039 +epoch: 1, batch: 35491, sum loss: 3980.418945, avg loss: 2.427085, ppl: 11.325815 +epoch: 1, batch: 35492, sum loss: 4074.016846, avg loss: 2.669736, ppl: 14.436156 +epoch: 1, batch: 35493, sum loss: 3403.320068, avg loss: 2.539791, ppl: 12.677023 +epoch: 1, batch: 35494, sum loss: 4431.252930, avg loss: 2.754042, ppl: 15.705982 +epoch: 1, batch: 35495, sum loss: 3888.851562, avg loss: 2.394613, ppl: 10.963955 +epoch: 1, batch: 35496, sum loss: 4400.992188, avg loss: 2.640067, ppl: 14.014144 +epoch: 1, batch: 35497, sum loss: 3514.916504, avg loss: 2.456266, ppl: 11.661186 +epoch: 1, batch: 35498, sum loss: 4589.536133, avg loss: 2.521723, ppl: 12.450033 +epoch: 1, batch: 35499, sum loss: 3832.221680, avg loss: 2.459706, ppl: 11.701369 +epoch: 1, batch: 35500, sum loss: 4981.614746, avg loss: 2.615021, ppl: 13.667500 +epoch: 1, batch: 35501, sum loss: 3737.387451, avg loss: 2.449140, ppl: 11.578386 +epoch: 1, batch: 35502, sum loss: 4420.202148, avg loss: 2.705142, ppl: 14.956441 +epoch: 1, batch: 35503, sum loss: 3864.114258, avg loss: 2.441007, ppl: 11.484599 +epoch: 1, batch: 35504, sum loss: 4333.509766, avg loss: 2.496261, ppl: 12.137033 +epoch: 1, batch: 35505, sum loss: 4949.319336, avg loss: 2.816915, ppl: 16.725170 +epoch: 1, batch: 35506, sum loss: 4864.832520, avg loss: 2.453269, ppl: 11.626291 +epoch: 1, batch: 35507, sum loss: 4240.574219, avg loss: 2.401231, ppl: 11.036755 +epoch: 1, batch: 35508, sum loss: 4734.249512, avg loss: 2.637465, ppl: 13.977725 +epoch: 1, batch: 35509, sum loss: 3881.052979, avg loss: 2.353580, ppl: 10.523172 +epoch: 1, batch: 35510, sum loss: 4432.593750, avg loss: 2.495830, ppl: 12.131797 +epoch: 1, batch: 35511, sum loss: 4614.480469, avg loss: 2.808570, ppl: 16.586182 +epoch: 1, batch: 35512, sum loss: 4006.015137, avg loss: 2.556487, ppl: 12.890454 +epoch: 1, batch: 35513, sum loss: 5934.083496, avg loss: 2.629191, ppl: 13.862550 +epoch: 1, batch: 35514, sum loss: 4414.533691, avg loss: 2.474514, ppl: 11.875939 +epoch: 1, batch: 35515, sum loss: 4598.022461, avg loss: 2.761575, ppl: 15.824751 +epoch: 1, batch: 35516, sum loss: 4010.236816, avg loss: 2.464804, ppl: 11.761182 +epoch: 1, batch: 35517, sum loss: 4559.018066, avg loss: 2.627676, ppl: 13.841568 +epoch: 1, batch: 35518, sum loss: 4607.545898, avg loss: 2.931009, ppl: 18.746532 +epoch: 1, batch: 35519, sum loss: 4459.995605, avg loss: 2.376130, ppl: 10.763167 +epoch: 1, batch: 35520, sum loss: 4250.150391, avg loss: 2.591555, ppl: 13.350517 +epoch: 1, batch: 35521, sum loss: 4391.596191, avg loss: 2.528265, ppl: 12.531745 +epoch: 1, batch: 35522, sum loss: 4396.078125, avg loss: 2.716983, ppl: 15.134590 +epoch: 1, batch: 35523, sum loss: 3996.413086, avg loss: 2.427955, ppl: 11.335673 +epoch: 1, batch: 35524, sum loss: 4725.597168, avg loss: 2.709631, ppl: 15.023734 +epoch: 1, batch: 35525, sum loss: 4599.747070, avg loss: 2.374676, ppl: 10.747528 +epoch: 1, batch: 35526, sum loss: 4070.399658, avg loss: 2.446154, ppl: 11.543859 +epoch: 1, batch: 35527, sum loss: 3781.142578, avg loss: 2.553101, ppl: 12.846881 +epoch: 1, batch: 35528, sum loss: 3870.554932, avg loss: 2.497132, ppl: 12.147609 +epoch: 1, batch: 35529, sum loss: 3835.024170, avg loss: 2.687473, ppl: 14.694500 +epoch: 1, batch: 35530, sum loss: 3848.610596, avg loss: 2.431213, ppl: 11.372670 +epoch: 1, batch: 35531, sum loss: 4670.740234, avg loss: 2.676642, ppl: 14.536198 +epoch: 1, batch: 35532, sum loss: 4561.452148, avg loss: 2.558302, ppl: 12.913870 +epoch: 1, batch: 35533, sum loss: 4812.882812, avg loss: 2.844493, ppl: 17.192846 +epoch: 1, batch: 35534, sum loss: 5082.789062, avg loss: 2.729747, ppl: 15.329009 +epoch: 1, batch: 35535, sum loss: 4384.741211, avg loss: 2.466109, ppl: 11.776533 +epoch: 1, batch: 35536, sum loss: 3812.981201, avg loss: 2.353692, ppl: 10.524357 +epoch: 1, batch: 35537, sum loss: 3773.467773, avg loss: 2.448714, ppl: 11.573451 +epoch: 1, batch: 35538, sum loss: 4694.256348, avg loss: 2.743575, ppl: 15.542448 +epoch: 1, batch: 35539, sum loss: 3637.538574, avg loss: 2.169075, ppl: 8.750184 +epoch: 1, batch: 35540, sum loss: 4956.288574, avg loss: 2.946664, ppl: 19.042320 +epoch: 1, batch: 35541, sum loss: 3970.836914, avg loss: 2.641941, ppl: 14.040424 +epoch: 1, batch: 35542, sum loss: 4447.768555, avg loss: 2.690725, ppl: 14.742362 +epoch: 1, batch: 35543, sum loss: 4714.918945, avg loss: 2.622313, ppl: 13.767531 +epoch: 1, batch: 35544, sum loss: 3864.160645, avg loss: 2.430290, ppl: 11.362174 +epoch: 1, batch: 35545, sum loss: 4494.393555, avg loss: 2.639104, ppl: 14.000648 +epoch: 1, batch: 35546, sum loss: 5017.924316, avg loss: 2.795501, ppl: 16.370829 +epoch: 1, batch: 35547, sum loss: 5409.503906, avg loss: 2.872811, ppl: 17.686670 +epoch: 1, batch: 35548, sum loss: 4744.367188, avg loss: 2.815648, ppl: 16.703999 +epoch: 1, batch: 35549, sum loss: 4470.868164, avg loss: 2.763206, ppl: 15.850586 +epoch: 1, batch: 35550, sum loss: 4785.950684, avg loss: 2.563444, ppl: 12.980450 +epoch: 1, batch: 35551, sum loss: 3828.354004, avg loss: 2.485944, ppl: 12.012455 +epoch: 1, batch: 35552, sum loss: 5170.610352, avg loss: 2.543340, ppl: 12.722092 +epoch: 1, batch: 35553, sum loss: 4522.343750, avg loss: 2.523629, ppl: 12.473787 +epoch: 1, batch: 35554, sum loss: 3586.900635, avg loss: 2.314130, ppl: 10.116114 +epoch: 1, batch: 35555, sum loss: 4195.639160, avg loss: 2.495919, ppl: 12.132873 +epoch: 1, batch: 35556, sum loss: 3981.925049, avg loss: 2.451924, ppl: 11.610668 +epoch: 1, batch: 35557, sum loss: 3381.310059, avg loss: 2.308061, ppl: 10.054913 +epoch: 1, batch: 35558, sum loss: 4801.660645, avg loss: 2.611017, ppl: 13.612891 +epoch: 1, batch: 35559, sum loss: 4540.271484, avg loss: 2.607853, ppl: 13.569880 +epoch: 1, batch: 35560, sum loss: 3555.821533, avg loss: 2.272090, ppl: 9.699656 +epoch: 1, batch: 35561, sum loss: 3640.160889, avg loss: 2.372986, ppl: 10.729383 +epoch: 1, batch: 35562, sum loss: 3785.588867, avg loss: 2.584020, ppl: 13.250293 +epoch: 1, batch: 35563, sum loss: 4528.599121, avg loss: 2.520089, ppl: 12.429699 +epoch: 1, batch: 35564, sum loss: 4634.456055, avg loss: 2.755325, ppl: 15.726145 +epoch: 1, batch: 35565, sum loss: 4032.112305, avg loss: 2.488958, ppl: 12.048719 +epoch: 1, batch: 35566, sum loss: 4656.024902, avg loss: 2.586681, ppl: 13.285599 +epoch: 1, batch: 35567, sum loss: 4246.358398, avg loss: 2.553433, ppl: 12.851142 +epoch: 1, batch: 35568, sum loss: 4015.803711, avg loss: 2.696980, ppl: 14.834867 +epoch: 1, batch: 35569, sum loss: 4836.253906, avg loss: 2.629828, ppl: 13.871387 +epoch: 1, batch: 35570, sum loss: 4356.512695, avg loss: 2.528446, ppl: 12.534016 +epoch: 1, batch: 35571, sum loss: 4334.583496, avg loss: 2.643039, ppl: 14.055851 +epoch: 1, batch: 35572, sum loss: 4705.727539, avg loss: 3.003017, ppl: 20.146231 +epoch: 1, batch: 35573, sum loss: 4663.527832, avg loss: 2.658796, ppl: 14.279084 +epoch: 1, batch: 35574, sum loss: 3811.118896, avg loss: 2.236572, ppl: 9.361189 +epoch: 1, batch: 35575, sum loss: 4199.329102, avg loss: 2.683277, ppl: 14.632973 +epoch: 1, batch: 35576, sum loss: 4769.709961, avg loss: 2.586611, ppl: 13.284668 +epoch: 1, batch: 35577, sum loss: 3128.145020, avg loss: 2.320582, ppl: 10.181602 +epoch: 1, batch: 35578, sum loss: 4372.126465, avg loss: 2.730872, ppl: 15.346270 +epoch: 1, batch: 35579, sum loss: 4117.970703, avg loss: 2.531021, ppl: 12.566325 +epoch: 1, batch: 35580, sum loss: 4938.301270, avg loss: 2.867771, ppl: 17.597744 +epoch: 1, batch: 35581, sum loss: 4975.620605, avg loss: 2.708558, ppl: 15.007617 +epoch: 1, batch: 35582, sum loss: 5086.155762, avg loss: 2.698226, ppl: 14.853358 +epoch: 1, batch: 35583, sum loss: 3927.544189, avg loss: 2.560329, ppl: 12.940070 +epoch: 1, batch: 35584, sum loss: 4522.933594, avg loss: 2.819784, ppl: 16.773230 +epoch: 1, batch: 35585, sum loss: 4316.850098, avg loss: 2.475258, ppl: 11.884771 +epoch: 1, batch: 35586, sum loss: 3931.465576, avg loss: 2.324935, ppl: 10.226020 +epoch: 1, batch: 35587, sum loss: 3575.754883, avg loss: 2.566945, ppl: 13.025973 +epoch: 1, batch: 35588, sum loss: 3599.605469, avg loss: 2.242745, ppl: 9.419148 +epoch: 1, batch: 35589, sum loss: 4214.681152, avg loss: 2.540495, ppl: 12.685948 +epoch: 1, batch: 35590, sum loss: 4806.153320, avg loss: 2.579792, ppl: 13.194397 +epoch: 1, batch: 35591, sum loss: 4846.352539, avg loss: 2.782062, ppl: 16.152298 +epoch: 1, batch: 35592, sum loss: 4177.402344, avg loss: 2.542546, ppl: 12.711988 +epoch: 1, batch: 35593, sum loss: 3784.553711, avg loss: 2.180043, ppl: 8.846683 +epoch: 1, batch: 35594, sum loss: 4435.848633, avg loss: 2.555212, ppl: 12.874032 +epoch: 1, batch: 35595, sum loss: 3242.727051, avg loss: 2.336259, ppl: 10.342469 +epoch: 1, batch: 35596, sum loss: 4280.110840, avg loss: 2.504453, ppl: 12.236869 +epoch: 1, batch: 35597, sum loss: 4484.512695, avg loss: 2.788876, ppl: 16.262732 +epoch: 1, batch: 35598, sum loss: 5101.511719, avg loss: 2.666760, ppl: 14.393255 +epoch: 1, batch: 35599, sum loss: 4018.458496, avg loss: 2.488210, ppl: 12.039702 +epoch: 1, batch: 35600, sum loss: 4333.720215, avg loss: 2.541771, ppl: 12.702152 +epoch: 1, batch: 35601, sum loss: 4473.408203, avg loss: 2.688346, ppl: 14.707335 +epoch: 1, batch: 35602, sum loss: 3802.016602, avg loss: 2.391205, ppl: 10.926656 +epoch: 1, batch: 35603, sum loss: 4065.777100, avg loss: 2.604598, ppl: 13.525784 +epoch: 1, batch: 35604, sum loss: 4682.663574, avg loss: 2.777381, ppl: 16.076857 +epoch: 1, batch: 35605, sum loss: 3476.183594, avg loss: 2.179425, ppl: 8.841226 +epoch: 1, batch: 35606, sum loss: 4672.458984, avg loss: 2.759870, ppl: 15.797782 +epoch: 1, batch: 35607, sum loss: 4491.064453, avg loss: 2.615646, ppl: 13.676050 +epoch: 1, batch: 35608, sum loss: 4235.921387, avg loss: 2.576595, ppl: 13.152273 +epoch: 1, batch: 35609, sum loss: 4220.835449, avg loss: 2.533514, ppl: 12.597703 +epoch: 1, batch: 35610, sum loss: 3725.307129, avg loss: 2.411202, ppl: 11.147354 +epoch: 1, batch: 35611, sum loss: 4838.239746, avg loss: 2.702927, ppl: 14.923350 +epoch: 1, batch: 35612, sum loss: 4527.261719, avg loss: 2.815461, ppl: 16.700876 +epoch: 1, batch: 35613, sum loss: 3919.066895, avg loss: 2.441786, ppl: 11.493550 +epoch: 1, batch: 35614, sum loss: 3935.531250, avg loss: 2.490843, ppl: 12.071443 +epoch: 1, batch: 35615, sum loss: 4076.725098, avg loss: 2.710588, ppl: 15.038122 +epoch: 1, batch: 35616, sum loss: 4549.997559, avg loss: 2.645347, ppl: 14.088338 +epoch: 1, batch: 35617, sum loss: 4355.325195, avg loss: 2.561956, ppl: 12.961146 +epoch: 1, batch: 35618, sum loss: 4022.165771, avg loss: 2.601660, ppl: 13.486100 +epoch: 1, batch: 35619, sum loss: 4857.518555, avg loss: 2.485936, ppl: 12.012355 +epoch: 1, batch: 35620, sum loss: 4602.655762, avg loss: 2.834148, ppl: 17.015892 +epoch: 1, batch: 35621, sum loss: 4924.610840, avg loss: 2.854847, ppl: 17.371778 +epoch: 1, batch: 35622, sum loss: 4255.354004, avg loss: 2.571211, ppl: 13.081655 +epoch: 1, batch: 35623, sum loss: 5128.676758, avg loss: 2.696465, ppl: 14.827229 +epoch: 1, batch: 35624, sum loss: 4085.212158, avg loss: 2.577421, ppl: 13.163146 +epoch: 1, batch: 35625, sum loss: 5171.565918, avg loss: 2.680957, ppl: 14.599055 +epoch: 1, batch: 35626, sum loss: 4722.516602, avg loss: 2.672618, ppl: 14.477828 +epoch: 1, batch: 35627, sum loss: 4252.450195, avg loss: 2.460909, ppl: 11.715452 +epoch: 1, batch: 35628, sum loss: 5525.215332, avg loss: 2.879216, ppl: 17.800306 +epoch: 1, batch: 35629, sum loss: 4526.987793, avg loss: 2.641183, ppl: 14.029793 +epoch: 1, batch: 35630, sum loss: 4945.713867, avg loss: 2.626508, ppl: 13.825400 +epoch: 1, batch: 35631, sum loss: 4227.465820, avg loss: 2.421229, ppl: 11.259690 +epoch: 1, batch: 35632, sum loss: 4754.020020, avg loss: 2.401020, ppl: 11.034427 +epoch: 1, batch: 35633, sum loss: 4809.583984, avg loss: 2.749905, ppl: 15.641148 +epoch: 1, batch: 35634, sum loss: 3900.162842, avg loss: 2.594919, ppl: 13.395498 +epoch: 1, batch: 35635, sum loss: 5279.033203, avg loss: 2.658123, ppl: 14.269487 +epoch: 1, batch: 35636, sum loss: 3996.831543, avg loss: 2.450541, ppl: 11.594612 +epoch: 1, batch: 35637, sum loss: 4150.711914, avg loss: 2.467724, ppl: 11.795570 +epoch: 1, batch: 35638, sum loss: 4683.534668, avg loss: 2.787818, ppl: 16.245541 +epoch: 1, batch: 35639, sum loss: 3567.272949, avg loss: 2.289649, ppl: 9.871472 +epoch: 1, batch: 35640, sum loss: 4898.044434, avg loss: 2.701624, ppl: 14.903918 +epoch: 1, batch: 35641, sum loss: 4670.663574, avg loss: 2.616618, ppl: 13.689353 +epoch: 1, batch: 35642, sum loss: 4411.414062, avg loss: 2.703073, ppl: 14.925521 +epoch: 1, batch: 35643, sum loss: 4576.028320, avg loss: 2.530989, ppl: 12.565929 +epoch: 1, batch: 35644, sum loss: 5025.250977, avg loss: 2.664502, ppl: 14.360798 +epoch: 1, batch: 35645, sum loss: 3766.824951, avg loss: 2.565957, ppl: 13.013107 +epoch: 1, batch: 35646, sum loss: 4038.229492, avg loss: 2.683209, ppl: 14.631974 +epoch: 1, batch: 35647, sum loss: 4155.952637, avg loss: 2.645419, ppl: 14.089342 +epoch: 1, batch: 35648, sum loss: 4190.525879, avg loss: 2.715830, ppl: 15.117157 +epoch: 1, batch: 35649, sum loss: 4005.297363, avg loss: 2.503311, ppl: 12.222893 +epoch: 1, batch: 35650, sum loss: 4653.370117, avg loss: 2.697606, ppl: 14.844153 +epoch: 1, batch: 35651, sum loss: 3756.769287, avg loss: 2.433141, ppl: 11.394616 +epoch: 1, batch: 35652, sum loss: 4511.734375, avg loss: 2.650843, ppl: 14.165971 +epoch: 1, batch: 35653, sum loss: 4470.600098, avg loss: 2.483667, ppl: 11.985132 +epoch: 1, batch: 35654, sum loss: 4045.226318, avg loss: 2.489370, ppl: 12.053681 +epoch: 1, batch: 35655, sum loss: 4723.018066, avg loss: 2.613734, ppl: 13.649928 +epoch: 1, batch: 35656, sum loss: 3544.992676, avg loss: 2.332232, ppl: 10.300910 +epoch: 1, batch: 35657, sum loss: 5283.370605, avg loss: 2.774880, ppl: 16.036697 +epoch: 1, batch: 35658, sum loss: 4179.893555, avg loss: 2.609172, ppl: 13.587793 +epoch: 1, batch: 35659, sum loss: 4463.364746, avg loss: 2.622423, ppl: 13.769052 +epoch: 1, batch: 35660, sum loss: 4103.651367, avg loss: 2.447019, ppl: 11.553857 +epoch: 1, batch: 35661, sum loss: 3574.107910, avg loss: 2.663270, ppl: 14.343111 +epoch: 1, batch: 35662, sum loss: 5355.705566, avg loss: 2.783631, ppl: 16.177652 +epoch: 1, batch: 35663, sum loss: 4038.171875, avg loss: 2.563919, ppl: 12.986607 +epoch: 1, batch: 35664, sum loss: 4451.295898, avg loss: 2.589468, ppl: 13.322685 +epoch: 1, batch: 35665, sum loss: 4501.022949, avg loss: 2.759671, ppl: 15.794641 +epoch: 1, batch: 35666, sum loss: 5233.986816, avg loss: 2.919123, ppl: 18.525028 +epoch: 1, batch: 35667, sum loss: 4153.426758, avg loss: 2.540322, ppl: 12.683755 +epoch: 1, batch: 35668, sum loss: 4313.631348, avg loss: 2.625460, ppl: 13.810927 +epoch: 1, batch: 35669, sum loss: 4103.337891, avg loss: 2.356886, ppl: 10.558019 +epoch: 1, batch: 35670, sum loss: 3602.064209, avg loss: 2.328419, ppl: 10.261704 +epoch: 1, batch: 35671, sum loss: 4548.425293, avg loss: 2.563937, ppl: 12.986852 +epoch: 1, batch: 35672, sum loss: 4431.268066, avg loss: 2.582324, ppl: 13.227844 +epoch: 1, batch: 35673, sum loss: 4920.237305, avg loss: 2.715363, ppl: 15.110091 +epoch: 1, batch: 35674, sum loss: 5405.865723, avg loss: 2.730235, ppl: 15.336493 +epoch: 1, batch: 35675, sum loss: 4413.116699, avg loss: 2.827109, ppl: 16.896540 +epoch: 1, batch: 35676, sum loss: 4402.598633, avg loss: 2.729447, ppl: 15.324416 +epoch: 1, batch: 35677, sum loss: 4331.271484, avg loss: 2.607629, ppl: 13.566843 +epoch: 1, batch: 35678, sum loss: 4161.058594, avg loss: 2.416410, ppl: 11.205562 +epoch: 1, batch: 35679, sum loss: 5839.406250, avg loss: 2.899407, ppl: 18.163374 +epoch: 1, batch: 35680, sum loss: 3412.754883, avg loss: 2.350382, ppl: 10.489577 +epoch: 1, batch: 35681, sum loss: 4205.506348, avg loss: 2.675258, ppl: 14.516100 +epoch: 1, batch: 35682, sum loss: 4096.172363, avg loss: 2.512989, ppl: 12.341768 +epoch: 1, batch: 35683, sum loss: 4836.651367, avg loss: 2.538925, ppl: 12.666044 +epoch: 1, batch: 35684, sum loss: 4851.242188, avg loss: 2.528005, ppl: 12.528488 +epoch: 1, batch: 35685, sum loss: 4513.409668, avg loss: 2.640965, ppl: 14.026736 +epoch: 1, batch: 35686, sum loss: 4288.076660, avg loss: 2.779052, ppl: 16.103745 +epoch: 1, batch: 35687, sum loss: 5301.215332, avg loss: 2.761050, ppl: 15.816438 +epoch: 1, batch: 35688, sum loss: 4789.939453, avg loss: 2.620317, ppl: 13.740078 +epoch: 1, batch: 35689, sum loss: 4584.882812, avg loss: 2.510889, ppl: 12.315872 +epoch: 1, batch: 35690, sum loss: 4713.589844, avg loss: 2.532826, ppl: 12.589038 +epoch: 1, batch: 35691, sum loss: 4743.407715, avg loss: 2.769065, ppl: 15.943714 +epoch: 1, batch: 35692, sum loss: 4769.849121, avg loss: 2.747609, ppl: 15.605273 +epoch: 1, batch: 35693, sum loss: 4152.719238, avg loss: 2.513753, ppl: 12.351193 +epoch: 1, batch: 35694, sum loss: 4145.527832, avg loss: 2.653987, ppl: 14.210586 +epoch: 1, batch: 35695, sum loss: 4459.374512, avg loss: 2.620079, ppl: 13.736810 +epoch: 1, batch: 35696, sum loss: 4082.317871, avg loss: 2.501420, ppl: 12.199808 +epoch: 1, batch: 35697, sum loss: 3474.238281, avg loss: 2.250154, ppl: 9.489202 +epoch: 1, batch: 35698, sum loss: 4231.329102, avg loss: 2.389231, ppl: 10.905109 +epoch: 1, batch: 35699, sum loss: 4184.690430, avg loss: 2.388522, ppl: 10.897377 +epoch: 1, batch: 35700, sum loss: 4833.728027, avg loss: 2.773223, ppl: 16.010155 +epoch: 1, batch: 35701, sum loss: 4737.476562, avg loss: 2.732109, ppl: 15.365252 +epoch: 1, batch: 35702, sum loss: 5355.694824, avg loss: 2.733892, ppl: 15.392682 +epoch: 1, batch: 35703, sum loss: 4570.248535, avg loss: 2.751504, ppl: 15.666179 +epoch: 1, batch: 35704, sum loss: 2885.972656, avg loss: 2.219979, ppl: 9.207138 +epoch: 1, batch: 35705, sum loss: 5336.368164, avg loss: 2.722637, ppl: 15.220401 +epoch: 1, batch: 35706, sum loss: 4238.843750, avg loss: 2.558143, ppl: 12.911822 +epoch: 1, batch: 35707, sum loss: 4493.166016, avg loss: 2.753165, ppl: 15.692226 +epoch: 1, batch: 35708, sum loss: 4571.088379, avg loss: 2.431430, ppl: 11.375135 +epoch: 1, batch: 35709, sum loss: 4855.578613, avg loss: 2.694550, ppl: 14.798855 +epoch: 1, batch: 35710, sum loss: 3740.112793, avg loss: 2.258522, ppl: 9.568938 +epoch: 1, batch: 35711, sum loss: 3965.380371, avg loss: 2.449278, ppl: 11.579979 +epoch: 1, batch: 35712, sum loss: 4002.613281, avg loss: 2.434680, ppl: 11.412161 +epoch: 1, batch: 35713, sum loss: 4778.011719, avg loss: 2.742831, ppl: 15.530890 +epoch: 1, batch: 35714, sum loss: 4293.059082, avg loss: 2.617719, ppl: 13.704427 +epoch: 1, batch: 35715, sum loss: 4402.724609, avg loss: 2.650647, ppl: 14.163198 +epoch: 1, batch: 35716, sum loss: 4923.366699, avg loss: 2.562919, ppl: 12.973628 +epoch: 1, batch: 35717, sum loss: 4764.159180, avg loss: 2.741173, ppl: 15.505166 +epoch: 1, batch: 35718, sum loss: 5412.258301, avg loss: 2.802827, ppl: 16.491196 +epoch: 1, batch: 35719, sum loss: 4406.144043, avg loss: 2.704815, ppl: 14.951552 +epoch: 1, batch: 35720, sum loss: 4508.455078, avg loss: 2.884488, ppl: 17.894405 +epoch: 1, batch: 35721, sum loss: 4644.685547, avg loss: 2.700398, ppl: 14.885662 +epoch: 1, batch: 35722, sum loss: 4023.761475, avg loss: 2.390827, ppl: 10.922520 +epoch: 1, batch: 35723, sum loss: 4666.375977, avg loss: 2.774302, ppl: 16.027435 +epoch: 1, batch: 35724, sum loss: 3690.504883, avg loss: 2.432765, ppl: 11.390336 +epoch: 1, batch: 35725, sum loss: 4056.373047, avg loss: 2.613643, ppl: 13.648679 +epoch: 1, batch: 35726, sum loss: 4753.850098, avg loss: 2.554460, ppl: 12.864351 +epoch: 1, batch: 35727, sum loss: 4458.230469, avg loss: 2.708524, ppl: 15.007109 +epoch: 1, batch: 35728, sum loss: 4605.766113, avg loss: 2.609499, ppl: 13.592242 +epoch: 1, batch: 35729, sum loss: 4127.711426, avg loss: 2.591156, ppl: 13.345190 +epoch: 1, batch: 35730, sum loss: 4316.262207, avg loss: 2.537485, ppl: 12.647823 +epoch: 1, batch: 35731, sum loss: 5143.107910, avg loss: 2.606745, ppl: 13.554858 +epoch: 1, batch: 35732, sum loss: 4577.938965, avg loss: 2.615965, ppl: 13.680413 +epoch: 1, batch: 35733, sum loss: 5030.717773, avg loss: 2.804191, ppl: 16.513704 +epoch: 1, batch: 35734, sum loss: 4831.869629, avg loss: 2.850661, ppl: 17.299206 +epoch: 1, batch: 35735, sum loss: 3790.875977, avg loss: 2.502228, ppl: 12.209673 +epoch: 1, batch: 35736, sum loss: 3923.285645, avg loss: 2.562564, ppl: 12.969029 +epoch: 1, batch: 35737, sum loss: 3605.998535, avg loss: 2.288070, ppl: 9.855900 +epoch: 1, batch: 35738, sum loss: 3964.327148, avg loss: 2.526659, ppl: 12.511629 +epoch: 1, batch: 35739, sum loss: 3705.783447, avg loss: 2.372461, ppl: 10.723746 +epoch: 1, batch: 35740, sum loss: 4383.868164, avg loss: 2.711112, ppl: 15.046001 +epoch: 1, batch: 35741, sum loss: 4788.319336, avg loss: 2.620864, ppl: 13.747602 +epoch: 1, batch: 35742, sum loss: 4842.488770, avg loss: 2.631787, ppl: 13.898589 +epoch: 1, batch: 35743, sum loss: 4604.037598, avg loss: 2.761870, ppl: 15.829418 +epoch: 1, batch: 35744, sum loss: 5771.475586, avg loss: 2.932661, ppl: 18.777523 +epoch: 1, batch: 35745, sum loss: 4485.028320, avg loss: 2.597005, ppl: 13.423479 +epoch: 1, batch: 35746, sum loss: 4594.912109, avg loss: 2.649892, ppl: 14.152505 +epoch: 1, batch: 35747, sum loss: 4876.430664, avg loss: 2.800937, ppl: 16.460058 +epoch: 1, batch: 35748, sum loss: 4742.165039, avg loss: 2.661148, ppl: 14.312708 +epoch: 1, batch: 35749, sum loss: 4151.903320, avg loss: 2.567658, ppl: 13.035266 +epoch: 1, batch: 35750, sum loss: 4599.310059, avg loss: 2.582431, ppl: 13.229263 +epoch: 1, batch: 35751, sum loss: 5270.859375, avg loss: 2.886561, ppl: 17.931532 +epoch: 1, batch: 35752, sum loss: 4213.688965, avg loss: 2.503677, ppl: 12.227376 +epoch: 1, batch: 35753, sum loss: 4795.532715, avg loss: 3.002838, ppl: 20.142628 +epoch: 1, batch: 35754, sum loss: 4696.326660, avg loss: 2.580399, ppl: 13.202409 +epoch: 1, batch: 35755, sum loss: 4677.919434, avg loss: 2.553450, ppl: 12.851360 +epoch: 1, batch: 35756, sum loss: 4148.570312, avg loss: 2.506689, ppl: 12.264256 +epoch: 1, batch: 35757, sum loss: 4628.042480, avg loss: 2.616191, ppl: 13.683509 +epoch: 1, batch: 35758, sum loss: 4103.101562, avg loss: 2.704747, ppl: 14.950537 +epoch: 1, batch: 35759, sum loss: 3838.179199, avg loss: 2.412432, ppl: 11.161072 +epoch: 1, batch: 35760, sum loss: 4461.471680, avg loss: 2.841702, ppl: 17.144917 +epoch: 1, batch: 35761, sum loss: 4092.127686, avg loss: 2.576907, ppl: 13.156379 +epoch: 1, batch: 35762, sum loss: 3768.783691, avg loss: 2.562056, ppl: 12.962436 +epoch: 1, batch: 35763, sum loss: 5046.175781, avg loss: 2.857404, ppl: 17.416256 +epoch: 1, batch: 35764, sum loss: 4339.099121, avg loss: 2.471013, ppl: 11.834433 +epoch: 1, batch: 35765, sum loss: 3825.621582, avg loss: 2.370274, ppl: 10.700319 +epoch: 1, batch: 35766, sum loss: 3220.547363, avg loss: 2.267991, ppl: 9.659977 +epoch: 1, batch: 35767, sum loss: 4449.068848, avg loss: 2.336696, ppl: 10.346992 +epoch: 1, batch: 35768, sum loss: 4027.165283, avg loss: 2.394272, ppl: 10.960215 +epoch: 1, batch: 35769, sum loss: 4154.866211, avg loss: 2.512011, ppl: 12.329700 +epoch: 1, batch: 35770, sum loss: 4708.122070, avg loss: 2.565734, ppl: 13.010206 +epoch: 1, batch: 35771, sum loss: 3889.960693, avg loss: 2.517774, ppl: 12.400963 +epoch: 1, batch: 35772, sum loss: 4684.495605, avg loss: 2.565441, ppl: 13.006398 +epoch: 1, batch: 35773, sum loss: 4053.171875, avg loss: 2.553984, ppl: 12.858225 +epoch: 1, batch: 35774, sum loss: 3049.980225, avg loss: 2.236056, ppl: 9.356356 +epoch: 1, batch: 35775, sum loss: 4778.985840, avg loss: 2.504709, ppl: 12.240003 +epoch: 1, batch: 35776, sum loss: 4454.900391, avg loss: 2.551489, ppl: 12.826192 +epoch: 1, batch: 35777, sum loss: 4841.338379, avg loss: 2.757026, ppl: 15.752931 +epoch: 1, batch: 35778, sum loss: 4730.167969, avg loss: 2.812228, ppl: 16.646969 +epoch: 1, batch: 35779, sum loss: 4950.937988, avg loss: 2.917465, ppl: 18.494345 +epoch: 1, batch: 35780, sum loss: 4486.409668, avg loss: 2.405581, ppl: 11.084874 +epoch: 1, batch: 35781, sum loss: 4459.067871, avg loss: 2.727259, ppl: 15.290913 +epoch: 1, batch: 35782, sum loss: 4805.227051, avg loss: 2.758454, ppl: 15.775436 +epoch: 1, batch: 35783, sum loss: 4213.615723, avg loss: 2.646744, ppl: 14.108022 +epoch: 1, batch: 35784, sum loss: 4650.241699, avg loss: 2.784576, ppl: 16.192949 +epoch: 1, batch: 35785, sum loss: 4811.198730, avg loss: 2.853617, ppl: 17.350433 +epoch: 1, batch: 35786, sum loss: 3606.890625, avg loss: 2.393425, ppl: 10.950932 +epoch: 1, batch: 35787, sum loss: 4392.500000, avg loss: 2.649276, ppl: 14.143799 +epoch: 1, batch: 35788, sum loss: 3302.273926, avg loss: 2.145727, ppl: 8.548255 +epoch: 1, batch: 35789, sum loss: 4337.506348, avg loss: 2.535071, ppl: 12.617329 +epoch: 1, batch: 35790, sum loss: 4223.155762, avg loss: 2.733434, ppl: 15.385633 +epoch: 1, batch: 35791, sum loss: 4708.610352, avg loss: 2.610094, ppl: 13.600333 +epoch: 1, batch: 35792, sum loss: 3785.617676, avg loss: 2.594666, ppl: 13.392114 +epoch: 1, batch: 35793, sum loss: 4490.695312, avg loss: 2.640033, ppl: 14.013659 +epoch: 1, batch: 35794, sum loss: 3828.280518, avg loss: 2.500510, ppl: 12.188708 +epoch: 1, batch: 35795, sum loss: 3960.506104, avg loss: 2.470684, ppl: 11.830534 +epoch: 1, batch: 35796, sum loss: 4601.743652, avg loss: 2.408029, ppl: 11.112039 +epoch: 1, batch: 35797, sum loss: 5038.007324, avg loss: 2.961792, ppl: 19.332575 +epoch: 1, batch: 35798, sum loss: 4419.345215, avg loss: 2.727991, ppl: 15.302115 +epoch: 1, batch: 35799, sum loss: 4750.205078, avg loss: 2.832561, ppl: 16.988914 +epoch: 1, batch: 35800, sum loss: 4903.748535, avg loss: 2.887956, ppl: 17.956562 +epoch: 1, batch: 35801, sum loss: 3764.555908, avg loss: 2.259637, ppl: 9.579615 +epoch: 1, batch: 35802, sum loss: 5252.969238, avg loss: 2.788200, ppl: 16.251747 +epoch: 1, batch: 35803, sum loss: 5177.859863, avg loss: 2.824801, ppl: 16.857586 +epoch: 1, batch: 35804, sum loss: 4327.210449, avg loss: 2.770301, ppl: 15.963444 +epoch: 1, batch: 35805, sum loss: 4777.759277, avg loss: 2.678116, ppl: 14.557646 +epoch: 1, batch: 35806, sum loss: 4183.729492, avg loss: 2.422542, ppl: 11.274481 +epoch: 1, batch: 35807, sum loss: 4434.744141, avg loss: 2.679604, ppl: 14.579316 +epoch: 1, batch: 35808, sum loss: 5376.928223, avg loss: 2.647429, ppl: 14.117695 +epoch: 1, batch: 35809, sum loss: 3754.033691, avg loss: 2.545108, ppl: 12.744599 +epoch: 1, batch: 35810, sum loss: 4374.071289, avg loss: 2.627070, ppl: 13.833179 +epoch: 1, batch: 35811, sum loss: 4416.391113, avg loss: 2.716108, ppl: 15.121349 +epoch: 1, batch: 35812, sum loss: 3473.598633, avg loss: 2.506204, ppl: 12.258307 +epoch: 1, batch: 35813, sum loss: 4786.237793, avg loss: 2.749131, ppl: 15.629051 +epoch: 1, batch: 35814, sum loss: 4011.616699, avg loss: 2.279328, ppl: 9.770109 +epoch: 1, batch: 35815, sum loss: 3464.913818, avg loss: 2.445246, ppl: 11.533389 +epoch: 1, batch: 35816, sum loss: 4468.071777, avg loss: 2.731095, ppl: 15.349690 +epoch: 1, batch: 35817, sum loss: 4442.441895, avg loss: 2.572346, ppl: 13.096516 +epoch: 1, batch: 35818, sum loss: 3702.204834, avg loss: 2.443700, ppl: 11.515565 +epoch: 1, batch: 35819, sum loss: 4086.435059, avg loss: 2.565245, ppl: 13.003842 +epoch: 1, batch: 35820, sum loss: 3819.985352, avg loss: 2.508198, ppl: 12.282777 +epoch: 1, batch: 35821, sum loss: 4148.626465, avg loss: 2.559301, ppl: 12.926774 +epoch: 1, batch: 35822, sum loss: 4368.866699, avg loss: 2.545960, ppl: 12.755464 +epoch: 1, batch: 35823, sum loss: 3921.484863, avg loss: 2.291926, ppl: 9.893974 +epoch: 1, batch: 35824, sum loss: 4592.888672, avg loss: 2.722519, ppl: 15.218605 +epoch: 1, batch: 35825, sum loss: 5060.298828, avg loss: 2.643834, ppl: 14.067035 +epoch: 1, batch: 35826, sum loss: 3824.565674, avg loss: 2.349242, ppl: 10.477622 +epoch: 1, batch: 35827, sum loss: 4281.406738, avg loss: 2.528888, ppl: 12.539551 +epoch: 1, batch: 35828, sum loss: 4182.117188, avg loss: 2.539233, ppl: 12.669952 +epoch: 1, batch: 35829, sum loss: 4415.427246, avg loss: 2.715515, ppl: 15.112390 +epoch: 1, batch: 35830, sum loss: 4438.018555, avg loss: 2.610599, ppl: 13.607203 +epoch: 1, batch: 35831, sum loss: 4242.462891, avg loss: 2.558784, ppl: 12.920091 +epoch: 1, batch: 35832, sum loss: 4527.182129, avg loss: 2.627500, ppl: 13.839127 +epoch: 1, batch: 35833, sum loss: 3987.717529, avg loss: 2.375055, ppl: 10.751605 +epoch: 1, batch: 35834, sum loss: 4509.028320, avg loss: 2.493932, ppl: 12.108788 +epoch: 1, batch: 35835, sum loss: 4620.114258, avg loss: 2.862524, ppl: 17.505661 +epoch: 1, batch: 35836, sum loss: 5599.842285, avg loss: 2.812578, ppl: 16.652790 +epoch: 1, batch: 35837, sum loss: 4064.323486, avg loss: 2.427912, ppl: 11.335184 +epoch: 1, batch: 35838, sum loss: 3920.706055, avg loss: 2.432200, ppl: 11.383896 +epoch: 1, batch: 35839, sum loss: 5057.762695, avg loss: 2.570002, ppl: 13.065845 +epoch: 1, batch: 35840, sum loss: 4027.670410, avg loss: 2.620475, ppl: 13.742253 +epoch: 1, batch: 35841, sum loss: 3932.205078, avg loss: 2.419818, ppl: 11.243817 +epoch: 1, batch: 35842, sum loss: 4652.798828, avg loss: 2.470950, ppl: 11.833682 +epoch: 1, batch: 35843, sum loss: 3962.728027, avg loss: 2.528863, ppl: 12.539240 +epoch: 1, batch: 35844, sum loss: 5351.116699, avg loss: 2.974495, ppl: 19.579737 +epoch: 1, batch: 35845, sum loss: 4532.438477, avg loss: 2.773830, ppl: 16.019876 +epoch: 1, batch: 35846, sum loss: 4456.274414, avg loss: 2.577371, ppl: 13.162490 +epoch: 1, batch: 35847, sum loss: 3938.951660, avg loss: 2.534718, ppl: 12.612874 +epoch: 1, batch: 35848, sum loss: 4504.736816, avg loss: 2.784139, ppl: 16.185875 +epoch: 1, batch: 35849, sum loss: 4497.787109, avg loss: 2.675662, ppl: 14.521954 +epoch: 1, batch: 35850, sum loss: 4747.583496, avg loss: 2.779615, ppl: 16.112823 +epoch: 1, batch: 35851, sum loss: 4167.813965, avg loss: 2.370770, ppl: 10.705635 +epoch: 1, batch: 35852, sum loss: 3839.977539, avg loss: 2.644613, ppl: 14.077990 +epoch: 1, batch: 35853, sum loss: 5410.903809, avg loss: 2.639465, ppl: 14.005713 +epoch: 1, batch: 35854, sum loss: 5034.750000, avg loss: 2.712689, ppl: 15.069739 +epoch: 1, batch: 35855, sum loss: 3969.388184, avg loss: 2.565862, ppl: 13.011869 +epoch: 1, batch: 35856, sum loss: 4122.605469, avg loss: 2.290336, ppl: 9.878260 +epoch: 1, batch: 35857, sum loss: 4436.624512, avg loss: 2.685608, ppl: 14.667118 +epoch: 1, batch: 35858, sum loss: 5173.427734, avg loss: 3.007807, ppl: 20.242954 +epoch: 1, batch: 35859, sum loss: 4716.237305, avg loss: 2.726149, ppl: 15.273948 +epoch: 1, batch: 35860, sum loss: 4205.531250, avg loss: 2.460814, ppl: 11.714343 +epoch: 1, batch: 35861, sum loss: 4498.201172, avg loss: 2.492078, ppl: 12.086370 +epoch: 1, batch: 35862, sum loss: 3576.236328, avg loss: 2.334358, ppl: 10.322830 +epoch: 1, batch: 35863, sum loss: 4253.602539, avg loss: 2.500648, ppl: 12.190391 +epoch: 1, batch: 35864, sum loss: 4093.896484, avg loss: 2.604260, ppl: 13.521215 +epoch: 1, batch: 35865, sum loss: 3524.421387, avg loss: 2.476754, ppl: 11.902568 +epoch: 1, batch: 35866, sum loss: 4247.966797, avg loss: 2.478394, ppl: 11.922100 +epoch: 1, batch: 35867, sum loss: 4995.540527, avg loss: 2.672841, ppl: 14.481056 +epoch: 1, batch: 35868, sum loss: 5036.096680, avg loss: 2.897639, ppl: 18.131287 +epoch: 1, batch: 35869, sum loss: 4329.483887, avg loss: 2.506939, ppl: 12.267324 +epoch: 1, batch: 35870, sum loss: 5106.663086, avg loss: 2.706234, ppl: 14.972777 +epoch: 1, batch: 35871, sum loss: 4763.602539, avg loss: 2.812044, ppl: 16.643902 +epoch: 1, batch: 35872, sum loss: 4598.873047, avg loss: 2.683123, ppl: 14.630715 +epoch: 1, batch: 35873, sum loss: 3929.598877, avg loss: 2.747971, ppl: 15.610929 +epoch: 1, batch: 35874, sum loss: 4861.890137, avg loss: 2.675779, ppl: 14.523658 +epoch: 1, batch: 35875, sum loss: 3132.378906, avg loss: 2.123647, ppl: 8.361574 +epoch: 1, batch: 35876, sum loss: 4458.702148, avg loss: 2.587755, ppl: 13.299883 +epoch: 1, batch: 35877, sum loss: 4416.925781, avg loss: 2.438943, ppl: 11.460922 +epoch: 1, batch: 35878, sum loss: 4169.295410, avg loss: 2.448206, ppl: 11.567581 +epoch: 1, batch: 35879, sum loss: 3528.642334, avg loss: 2.326066, ppl: 10.237588 +epoch: 1, batch: 35880, sum loss: 3587.504639, avg loss: 2.409338, ppl: 11.126595 +epoch: 1, batch: 35881, sum loss: 4468.790039, avg loss: 2.589102, ppl: 13.317807 +epoch: 1, batch: 35882, sum loss: 4466.989746, avg loss: 2.518033, ppl: 12.404168 +epoch: 1, batch: 35883, sum loss: 5358.260742, avg loss: 2.773427, ppl: 16.013418 +epoch: 1, batch: 35884, sum loss: 4784.805176, avg loss: 2.756224, ppl: 15.740297 +epoch: 1, batch: 35885, sum loss: 4516.630371, avg loss: 2.579458, ppl: 13.189984 +epoch: 1, batch: 35886, sum loss: 4163.175293, avg loss: 2.492919, ppl: 12.096539 +epoch: 1, batch: 35887, sum loss: 4039.453369, avg loss: 2.433406, ppl: 11.397633 +epoch: 1, batch: 35888, sum loss: 4012.870117, avg loss: 2.514330, ppl: 12.358322 +epoch: 1, batch: 35889, sum loss: 4428.851074, avg loss: 2.733859, ppl: 15.392168 +epoch: 1, batch: 35890, sum loss: 3541.272949, avg loss: 2.317587, ppl: 10.151152 +epoch: 1, batch: 35891, sum loss: 4561.139160, avg loss: 2.628899, ppl: 13.858498 +epoch: 1, batch: 35892, sum loss: 4352.583984, avg loss: 2.366821, ppl: 10.663443 +epoch: 1, batch: 35893, sum loss: 5458.731934, avg loss: 2.741704, ppl: 15.513390 +epoch: 1, batch: 35894, sum loss: 4924.290527, avg loss: 2.817100, ppl: 16.728270 +epoch: 1, batch: 35895, sum loss: 4419.983887, avg loss: 2.525705, ppl: 12.499705 +epoch: 1, batch: 35896, sum loss: 5179.828613, avg loss: 2.714795, ppl: 15.101512 +epoch: 1, batch: 35897, sum loss: 4121.717285, avg loss: 2.397741, ppl: 10.998306 +epoch: 1, batch: 35898, sum loss: 3671.991699, avg loss: 2.555318, ppl: 12.875392 +epoch: 1, batch: 35899, sum loss: 4940.383789, avg loss: 2.766172, ppl: 15.897668 +epoch: 1, batch: 35900, sum loss: 4488.009277, avg loss: 2.743282, ppl: 15.537898 +epoch: 1, batch: 35901, sum loss: 4816.436523, avg loss: 2.756976, ppl: 15.752131 +epoch: 1, batch: 35902, sum loss: 4512.604980, avg loss: 2.433983, ppl: 11.404216 +epoch: 1, batch: 35903, sum loss: 4559.539062, avg loss: 2.631009, ppl: 13.887777 +epoch: 1, batch: 35904, sum loss: 3924.452393, avg loss: 2.442099, ppl: 11.497144 +epoch: 1, batch: 35905, sum loss: 5176.263184, avg loss: 2.601137, ppl: 13.479057 +epoch: 1, batch: 35906, sum loss: 3967.914551, avg loss: 2.345103, ppl: 10.434350 +epoch: 1, batch: 35907, sum loss: 5020.752441, avg loss: 2.676307, ppl: 14.531336 +epoch: 1, batch: 35908, sum loss: 4911.587891, avg loss: 2.685395, ppl: 14.664000 +epoch: 1, batch: 35909, sum loss: 4413.837891, avg loss: 2.529420, ppl: 12.546226 +epoch: 1, batch: 35910, sum loss: 5135.272461, avg loss: 2.829351, ppl: 16.934473 +epoch: 1, batch: 35911, sum loss: 5277.872070, avg loss: 2.670988, ppl: 14.454241 +epoch: 1, batch: 35912, sum loss: 5512.956543, avg loss: 2.976758, ppl: 19.624096 +epoch: 1, batch: 35913, sum loss: 4141.755859, avg loss: 2.556639, ppl: 12.892419 +epoch: 1, batch: 35914, sum loss: 4624.326660, avg loss: 2.540839, ppl: 12.690313 +epoch: 1, batch: 35915, sum loss: 4839.256836, avg loss: 2.790805, ppl: 16.294136 +epoch: 1, batch: 35916, sum loss: 4027.564697, avg loss: 2.414607, ppl: 11.185377 +epoch: 1, batch: 35917, sum loss: 4657.531250, avg loss: 2.612188, ppl: 13.628839 +epoch: 1, batch: 35918, sum loss: 4753.563965, avg loss: 2.822782, ppl: 16.823582 +epoch: 1, batch: 35919, sum loss: 3687.638428, avg loss: 2.269316, ppl: 9.672782 +epoch: 1, batch: 35920, sum loss: 4702.718750, avg loss: 2.643462, ppl: 14.061801 +epoch: 1, batch: 35921, sum loss: 4724.303711, avg loss: 2.623156, ppl: 13.779140 +epoch: 1, batch: 35922, sum loss: 4346.571777, avg loss: 2.550805, ppl: 12.817419 +epoch: 1, batch: 35923, sum loss: 4554.581055, avg loss: 2.574664, ppl: 13.126910 +epoch: 1, batch: 35924, sum loss: 3989.760254, avg loss: 2.419503, ppl: 11.240272 +epoch: 1, batch: 35925, sum loss: 4149.568848, avg loss: 2.675415, ppl: 14.518374 +epoch: 1, batch: 35926, sum loss: 5184.692383, avg loss: 2.772563, ppl: 15.999584 +epoch: 1, batch: 35927, sum loss: 4053.663086, avg loss: 2.628835, ppl: 13.857613 +epoch: 1, batch: 35928, sum loss: 4711.758789, avg loss: 2.745780, ppl: 15.576762 +epoch: 1, batch: 35929, sum loss: 4677.629883, avg loss: 2.771108, ppl: 15.976325 +epoch: 1, batch: 35930, sum loss: 2740.543457, avg loss: 1.958930, ppl: 7.091737 +epoch: 1, batch: 35931, sum loss: 4953.739258, avg loss: 2.570700, ppl: 13.074976 +epoch: 1, batch: 35932, sum loss: 4489.579102, avg loss: 2.604164, ppl: 13.519923 +epoch: 1, batch: 35933, sum loss: 3622.284912, avg loss: 2.324958, ppl: 10.226254 +epoch: 1, batch: 35934, sum loss: 4884.175781, avg loss: 2.438430, ppl: 11.455046 +epoch: 1, batch: 35935, sum loss: 4929.259766, avg loss: 2.756857, ppl: 15.750257 +epoch: 1, batch: 35936, sum loss: 3287.650635, avg loss: 2.310366, ppl: 10.078112 +epoch: 1, batch: 35937, sum loss: 4609.771484, avg loss: 2.658461, ppl: 14.274302 +epoch: 1, batch: 35938, sum loss: 4235.425293, avg loss: 2.536183, ppl: 12.631364 +epoch: 1, batch: 35939, sum loss: 4384.803711, avg loss: 2.657457, ppl: 14.259978 +epoch: 1, batch: 35940, sum loss: 3600.743652, avg loss: 2.381444, ppl: 10.820521 +epoch: 1, batch: 35941, sum loss: 4446.215820, avg loss: 2.509151, ppl: 12.294490 +epoch: 1, batch: 35942, sum loss: 4506.331055, avg loss: 2.768017, ppl: 15.927012 +epoch: 1, batch: 35943, sum loss: 3589.919434, avg loss: 2.580819, ppl: 13.207952 +epoch: 1, batch: 35944, sum loss: 4993.577637, avg loss: 2.727241, ppl: 15.290639 +epoch: 1, batch: 35945, sum loss: 4275.493652, avg loss: 2.472813, ppl: 11.855752 +epoch: 1, batch: 35946, sum loss: 4191.660645, avg loss: 2.569994, ppl: 13.065749 +epoch: 1, batch: 35947, sum loss: 3834.099121, avg loss: 2.564615, ppl: 12.995651 +epoch: 1, batch: 35948, sum loss: 4995.792969, avg loss: 2.683025, ppl: 14.629285 +epoch: 1, batch: 35949, sum loss: 5098.484863, avg loss: 2.680592, ppl: 14.593724 +epoch: 1, batch: 35950, sum loss: 3538.264648, avg loss: 2.440183, ppl: 11.475137 +epoch: 1, batch: 35951, sum loss: 5506.056152, avg loss: 2.695084, ppl: 14.806757 +epoch: 1, batch: 35952, sum loss: 5390.231445, avg loss: 2.823589, ppl: 16.837172 +epoch: 1, batch: 35953, sum loss: 4775.990234, avg loss: 2.715174, ppl: 15.107234 +epoch: 1, batch: 35954, sum loss: 4281.930176, avg loss: 2.408285, ppl: 11.114882 +epoch: 1, batch: 35955, sum loss: 4931.226562, avg loss: 2.564340, ppl: 12.992085 +epoch: 1, batch: 35956, sum loss: 4818.608398, avg loss: 2.615966, ppl: 13.680420 +epoch: 1, batch: 35957, sum loss: 4298.171875, avg loss: 2.592384, ppl: 13.361583 +epoch: 1, batch: 35958, sum loss: 3873.020264, avg loss: 2.487489, ppl: 12.031028 +epoch: 1, batch: 35959, sum loss: 4700.308594, avg loss: 2.777960, ppl: 16.086172 +epoch: 1, batch: 35960, sum loss: 4622.903320, avg loss: 2.750091, ppl: 15.644056 +epoch: 1, batch: 35961, sum loss: 3857.376953, avg loss: 2.426023, ppl: 11.313800 +epoch: 1, batch: 35962, sum loss: 3578.362305, avg loss: 2.354186, ppl: 10.529552 +epoch: 1, batch: 35963, sum loss: 5002.103516, avg loss: 2.605262, ppl: 13.534775 +epoch: 1, batch: 35964, sum loss: 3962.912598, avg loss: 2.497109, ppl: 12.147330 +epoch: 1, batch: 35965, sum loss: 3459.235840, avg loss: 2.298496, ppl: 9.959190 +epoch: 1, batch: 35966, sum loss: 3942.922363, avg loss: 2.328956, ppl: 10.267216 +epoch: 1, batch: 35967, sum loss: 4432.435547, avg loss: 2.595103, ppl: 13.397964 +epoch: 1, batch: 35968, sum loss: 4324.237793, avg loss: 2.505352, ppl: 12.247869 +epoch: 1, batch: 35969, sum loss: 4605.534668, avg loss: 2.723557, ppl: 15.234415 +epoch: 1, batch: 35970, sum loss: 4777.961914, avg loss: 2.488522, ppl: 12.043464 +epoch: 1, batch: 35971, sum loss: 4357.249023, avg loss: 2.742133, ppl: 15.520053 +epoch: 1, batch: 35972, sum loss: 3809.589844, avg loss: 2.456215, ppl: 11.660594 +epoch: 1, batch: 35973, sum loss: 4419.531738, avg loss: 2.686645, ppl: 14.682338 +epoch: 1, batch: 35974, sum loss: 4179.071777, avg loss: 2.587661, ppl: 13.298624 +epoch: 1, batch: 35975, sum loss: 4537.763672, avg loss: 2.463498, ppl: 11.745831 +epoch: 1, batch: 35976, sum loss: 4675.039551, avg loss: 2.686804, ppl: 14.684673 +epoch: 1, batch: 35977, sum loss: 4500.178711, avg loss: 2.702810, ppl: 14.921603 +epoch: 1, batch: 35978, sum loss: 4426.594727, avg loss: 2.584118, ppl: 13.251601 +epoch: 1, batch: 35979, sum loss: 4283.806641, avg loss: 2.333228, ppl: 10.311171 +epoch: 1, batch: 35980, sum loss: 5169.458984, avg loss: 2.710781, ppl: 15.041016 +epoch: 1, batch: 35981, sum loss: 5329.948730, avg loss: 2.784717, ppl: 16.195236 +epoch: 1, batch: 35982, sum loss: 4550.880859, avg loss: 2.556675, ppl: 12.892874 +epoch: 1, batch: 35983, sum loss: 4023.267578, avg loss: 2.441303, ppl: 11.488003 +epoch: 1, batch: 35984, sum loss: 4078.126953, avg loss: 2.600846, ppl: 13.475137 +epoch: 1, batch: 35985, sum loss: 4983.204102, avg loss: 2.898897, ppl: 18.154114 +epoch: 1, batch: 35986, sum loss: 5471.498535, avg loss: 2.810220, ppl: 16.613577 +epoch: 1, batch: 35987, sum loss: 4350.475098, avg loss: 2.722450, ppl: 15.217560 +epoch: 1, batch: 35988, sum loss: 3867.137207, avg loss: 2.593653, ppl: 13.378560 +epoch: 1, batch: 35989, sum loss: 4760.903809, avg loss: 2.807137, ppl: 16.562428 +epoch: 1, batch: 35990, sum loss: 5829.796875, avg loss: 2.842417, ppl: 17.157181 +epoch: 1, batch: 35991, sum loss: 5118.255859, avg loss: 2.787721, ppl: 16.243956 +epoch: 1, batch: 35992, sum loss: 5306.113281, avg loss: 2.736521, ppl: 15.433192 +epoch: 1, batch: 35993, sum loss: 4807.175293, avg loss: 2.604104, ppl: 13.519101 +epoch: 1, batch: 35994, sum loss: 4633.705566, avg loss: 2.661520, ppl: 14.318029 +epoch: 1, batch: 35995, sum loss: 3816.243652, avg loss: 2.462093, ppl: 11.729334 +epoch: 1, batch: 35996, sum loss: 4086.328125, avg loss: 2.204060, ppl: 9.061732 +epoch: 1, batch: 35997, sum loss: 3686.412598, avg loss: 2.369160, ppl: 10.688407 +epoch: 1, batch: 35998, sum loss: 4413.891602, avg loss: 2.762135, ppl: 15.833612 +epoch: 1, batch: 35999, sum loss: 3957.145752, avg loss: 2.272915, ppl: 9.707661 +epoch: 1, batch: 36000, sum loss: 4008.471680, avg loss: 2.654617, ppl: 14.219540 +epoch: 1, batch: 36001, sum loss: 4209.024902, avg loss: 2.650520, ppl: 14.161395 +epoch: 1, batch: 36002, sum loss: 4499.121582, avg loss: 2.756815, ppl: 15.749596 +epoch: 1, batch: 36003, sum loss: 3948.340576, avg loss: 2.489496, ppl: 12.055199 +epoch: 1, batch: 36004, sum loss: 4338.089844, avg loss: 2.602333, ppl: 13.495190 +epoch: 1, batch: 36005, sum loss: 4251.708984, avg loss: 2.521773, ppl: 12.450650 +epoch: 1, batch: 36006, sum loss: 4021.377441, avg loss: 2.496200, ppl: 12.136284 +epoch: 1, batch: 36007, sum loss: 3478.722656, avg loss: 2.336281, ppl: 10.342701 +epoch: 1, batch: 36008, sum loss: 3926.732910, avg loss: 2.366928, ppl: 10.664577 +epoch: 1, batch: 36009, sum loss: 4662.295898, avg loss: 2.698088, ppl: 14.851308 +epoch: 1, batch: 36010, sum loss: 3861.912598, avg loss: 2.668910, ppl: 14.424235 +epoch: 1, batch: 36011, sum loss: 4334.378906, avg loss: 2.522921, ppl: 12.464957 +epoch: 1, batch: 36012, sum loss: 4394.038574, avg loss: 2.596950, ppl: 13.422730 +epoch: 1, batch: 36013, sum loss: 3901.908203, avg loss: 2.443274, ppl: 11.510662 +epoch: 1, batch: 36014, sum loss: 3533.714355, avg loss: 2.338659, ppl: 10.367328 +epoch: 1, batch: 36015, sum loss: 3697.996094, avg loss: 2.471923, ppl: 11.845199 +epoch: 1, batch: 36016, sum loss: 5219.203125, avg loss: 2.818144, ppl: 16.745743 +epoch: 1, batch: 36017, sum loss: 3452.220215, avg loss: 2.443185, ppl: 11.509639 +epoch: 1, batch: 36018, sum loss: 3792.830811, avg loss: 2.485472, ppl: 12.006791 +epoch: 1, batch: 36019, sum loss: 3306.835205, avg loss: 2.490087, ppl: 12.062323 +epoch: 1, batch: 36020, sum loss: 3890.883301, avg loss: 2.595653, ppl: 13.405335 +epoch: 1, batch: 36021, sum loss: 4193.266113, avg loss: 2.635617, ppl: 13.951915 +epoch: 1, batch: 36022, sum loss: 5034.377930, avg loss: 2.773762, ppl: 16.018784 +epoch: 1, batch: 36023, sum loss: 3977.613525, avg loss: 2.431304, ppl: 11.373706 +epoch: 1, batch: 36024, sum loss: 4998.273926, avg loss: 2.629287, ppl: 13.863878 +epoch: 1, batch: 36025, sum loss: 4058.727783, avg loss: 2.343376, ppl: 10.416347 +epoch: 1, batch: 36026, sum loss: 4099.615723, avg loss: 2.431563, ppl: 11.376655 +epoch: 1, batch: 36027, sum loss: 4354.159180, avg loss: 2.684438, ppl: 14.649973 +epoch: 1, batch: 36028, sum loss: 4318.423828, avg loss: 2.629978, ppl: 13.873467 +epoch: 1, batch: 36029, sum loss: 5434.070801, avg loss: 2.818502, ppl: 16.751734 +epoch: 1, batch: 36030, sum loss: 3271.436523, avg loss: 2.377497, ppl: 10.777897 +epoch: 1, batch: 36031, sum loss: 4980.350586, avg loss: 2.808996, ppl: 16.593258 +epoch: 1, batch: 36032, sum loss: 4397.580078, avg loss: 2.636439, ppl: 13.963389 +epoch: 1, batch: 36033, sum loss: 3360.143311, avg loss: 2.348109, ppl: 10.465755 +epoch: 1, batch: 36034, sum loss: 3738.610352, avg loss: 2.352807, ppl: 10.515044 +epoch: 1, batch: 36035, sum loss: 4504.201172, avg loss: 2.749817, ppl: 15.639775 +epoch: 1, batch: 36036, sum loss: 3989.483643, avg loss: 2.533005, ppl: 12.591292 +epoch: 1, batch: 36037, sum loss: 4823.480469, avg loss: 2.726671, ppl: 15.281925 +epoch: 1, batch: 36038, sum loss: 5047.296875, avg loss: 2.879234, ppl: 17.800631 +epoch: 1, batch: 36039, sum loss: 4195.783691, avg loss: 2.570946, ppl: 13.078190 +epoch: 1, batch: 36040, sum loss: 4340.984375, avg loss: 2.637293, ppl: 13.975323 +epoch: 1, batch: 36041, sum loss: 3876.023926, avg loss: 2.507131, ppl: 12.269673 +epoch: 1, batch: 36042, sum loss: 4247.657227, avg loss: 2.700354, ppl: 14.885001 +epoch: 1, batch: 36043, sum loss: 4360.167969, avg loss: 2.543855, ppl: 12.728651 +epoch: 1, batch: 36044, sum loss: 4408.300781, avg loss: 2.564457, ppl: 12.993597 +epoch: 1, batch: 36045, sum loss: 4243.890137, avg loss: 2.453116, ppl: 11.624509 +epoch: 1, batch: 36046, sum loss: 4201.839355, avg loss: 2.649331, ppl: 14.144577 +epoch: 1, batch: 36047, sum loss: 4751.807617, avg loss: 2.582504, ppl: 13.230226 +epoch: 1, batch: 36048, sum loss: 5244.664551, avg loss: 2.717443, ppl: 15.141556 +epoch: 1, batch: 36049, sum loss: 3467.384033, avg loss: 2.344411, ppl: 10.427131 +epoch: 1, batch: 36050, sum loss: 4533.896484, avg loss: 2.406527, ppl: 11.095358 +epoch: 1, batch: 36051, sum loss: 4200.604980, avg loss: 2.583398, ppl: 13.242057 +epoch: 1, batch: 36052, sum loss: 5159.779785, avg loss: 2.989444, ppl: 19.874620 +epoch: 1, batch: 36053, sum loss: 2758.144287, avg loss: 1.938260, ppl: 6.946655 +epoch: 1, batch: 36054, sum loss: 4247.430664, avg loss: 2.756282, ppl: 15.741210 +epoch: 1, batch: 36055, sum loss: 3992.618164, avg loss: 2.311881, ppl: 10.093390 +epoch: 1, batch: 36056, sum loss: 4327.129883, avg loss: 2.850547, ppl: 17.297237 +epoch: 1, batch: 36057, sum loss: 4940.470215, avg loss: 2.747759, ppl: 15.607610 +epoch: 1, batch: 36058, sum loss: 4247.729004, avg loss: 2.625296, ppl: 13.808662 +epoch: 1, batch: 36059, sum loss: 3116.942871, avg loss: 2.352410, ppl: 10.510866 +epoch: 1, batch: 36060, sum loss: 4900.047852, avg loss: 2.584414, ppl: 13.255513 +epoch: 1, batch: 36061, sum loss: 3781.700684, avg loss: 2.473316, ppl: 11.861720 +epoch: 1, batch: 36062, sum loss: 4180.811523, avg loss: 2.527698, ppl: 12.524635 +epoch: 1, batch: 36063, sum loss: 4045.855713, avg loss: 2.422668, ppl: 11.275906 +epoch: 1, batch: 36064, sum loss: 4037.929199, avg loss: 2.423727, ppl: 11.287851 +epoch: 1, batch: 36065, sum loss: 4244.179688, avg loss: 2.645997, ppl: 14.097497 +epoch: 1, batch: 36066, sum loss: 4119.983398, avg loss: 2.535374, ppl: 12.621156 +epoch: 1, batch: 36067, sum loss: 4830.611328, avg loss: 2.611141, ppl: 13.614579 +epoch: 1, batch: 36068, sum loss: 4183.880371, avg loss: 2.466911, ppl: 11.785979 +epoch: 1, batch: 36069, sum loss: 4397.639648, avg loss: 2.641225, ppl: 14.030382 +epoch: 1, batch: 36070, sum loss: 4621.486328, avg loss: 2.633326, ppl: 13.919985 +epoch: 1, batch: 36071, sum loss: 4092.099121, avg loss: 2.463636, ppl: 11.747447 +epoch: 1, batch: 36072, sum loss: 4603.260254, avg loss: 2.738406, ppl: 15.462314 +epoch: 1, batch: 36073, sum loss: 4843.572266, avg loss: 2.572263, ppl: 13.095432 +epoch: 1, batch: 36074, sum loss: 5269.388184, avg loss: 2.804358, ppl: 16.516464 +epoch: 1, batch: 36075, sum loss: 4644.730469, avg loss: 2.550648, ppl: 12.815408 +epoch: 1, batch: 36076, sum loss: 3192.952881, avg loss: 2.231274, ppl: 9.311721 +epoch: 1, batch: 36077, sum loss: 4779.719238, avg loss: 2.721936, ppl: 15.209740 +epoch: 1, batch: 36078, sum loss: 4422.048828, avg loss: 2.581465, ppl: 13.216480 +epoch: 1, batch: 36079, sum loss: 5299.976562, avg loss: 2.855591, ppl: 17.384705 +epoch: 1, batch: 36080, sum loss: 4169.516602, avg loss: 2.597830, ppl: 13.434547 +epoch: 1, batch: 36081, sum loss: 4750.361328, avg loss: 2.758630, ppl: 15.778216 +epoch: 1, batch: 36082, sum loss: 4309.014160, avg loss: 2.566417, ppl: 13.019094 +epoch: 1, batch: 36083, sum loss: 3707.631104, avg loss: 2.616536, ppl: 13.688221 +epoch: 1, batch: 36084, sum loss: 3850.618408, avg loss: 2.384284, ppl: 10.851288 +epoch: 1, batch: 36085, sum loss: 4708.586914, avg loss: 2.848510, ppl: 17.262039 +epoch: 1, batch: 36086, sum loss: 4528.410156, avg loss: 2.671628, ppl: 14.463504 +epoch: 1, batch: 36087, sum loss: 4431.392578, avg loss: 2.582397, ppl: 13.228806 +epoch: 1, batch: 36088, sum loss: 4320.713867, avg loss: 2.636189, ppl: 13.959901 +epoch: 1, batch: 36089, sum loss: 5047.672363, avg loss: 2.591208, ppl: 13.345881 +epoch: 1, batch: 36090, sum loss: 4175.181152, avg loss: 2.598121, ppl: 13.438469 +epoch: 1, batch: 36091, sum loss: 4705.805664, avg loss: 2.933794, ppl: 18.798822 +epoch: 1, batch: 36092, sum loss: 3453.575684, avg loss: 2.437245, ppl: 11.441472 +epoch: 1, batch: 36093, sum loss: 4367.334473, avg loss: 2.613605, ppl: 13.648170 +epoch: 1, batch: 36094, sum loss: 3545.365479, avg loss: 2.175071, ppl: 8.802808 +epoch: 1, batch: 36095, sum loss: 4180.143555, avg loss: 2.545763, ppl: 12.752961 +epoch: 1, batch: 36096, sum loss: 5068.163086, avg loss: 2.690108, ppl: 14.733264 +epoch: 1, batch: 36097, sum loss: 4037.010498, avg loss: 2.475175, ppl: 11.883788 +epoch: 1, batch: 36098, sum loss: 5275.524902, avg loss: 2.628563, ppl: 13.853844 +epoch: 1, batch: 36099, sum loss: 5050.876465, avg loss: 2.764574, ppl: 15.872274 +epoch: 1, batch: 36100, sum loss: 3345.003906, avg loss: 2.362291, ppl: 10.615242 +epoch: 1, batch: 36101, sum loss: 3676.013672, avg loss: 2.519543, ppl: 12.422923 +epoch: 1, batch: 36102, sum loss: 3799.415527, avg loss: 2.429294, ppl: 11.350864 +epoch: 1, batch: 36103, sum loss: 4509.292480, avg loss: 2.560643, ppl: 12.944138 +epoch: 1, batch: 36104, sum loss: 4003.574463, avg loss: 2.586288, ppl: 13.280389 +epoch: 1, batch: 36105, sum loss: 3813.900391, avg loss: 2.500918, ppl: 12.193687 +epoch: 1, batch: 36106, sum loss: 3941.335938, avg loss: 2.643418, ppl: 14.061180 +epoch: 1, batch: 36107, sum loss: 4942.452637, avg loss: 2.717126, ppl: 15.136763 +epoch: 1, batch: 36108, sum loss: 4380.614258, avg loss: 2.529223, ppl: 12.543756 +epoch: 1, batch: 36109, sum loss: 5583.148438, avg loss: 3.047570, ppl: 21.064102 +epoch: 1, batch: 36110, sum loss: 4857.636230, avg loss: 2.799790, ppl: 16.441200 +epoch: 1, batch: 36111, sum loss: 4543.501465, avg loss: 2.599257, ppl: 13.453741 +epoch: 1, batch: 36112, sum loss: 3362.885010, avg loss: 2.324039, ppl: 10.216862 +epoch: 1, batch: 36113, sum loss: 4580.064941, avg loss: 2.722987, ppl: 15.225739 +epoch: 1, batch: 36114, sum loss: 4215.610352, avg loss: 2.503332, ppl: 12.223149 +epoch: 1, batch: 36115, sum loss: 5053.675293, avg loss: 2.789004, ppl: 16.264812 +epoch: 1, batch: 36116, sum loss: 4735.160156, avg loss: 2.584694, ppl: 13.259236 +epoch: 1, batch: 36117, sum loss: 3975.638184, avg loss: 2.576564, ppl: 13.151872 +epoch: 1, batch: 36118, sum loss: 4295.501953, avg loss: 2.450372, ppl: 11.592658 +epoch: 1, batch: 36119, sum loss: 4378.367188, avg loss: 2.776390, ppl: 16.060938 +epoch: 1, batch: 36120, sum loss: 4627.390137, avg loss: 2.819860, ppl: 16.774498 +epoch: 1, batch: 36121, sum loss: 4994.961914, avg loss: 2.562833, ppl: 12.972521 +epoch: 1, batch: 36122, sum loss: 3869.724609, avg loss: 2.176448, ppl: 8.814940 +epoch: 1, batch: 36123, sum loss: 3409.790283, avg loss: 2.283851, ppl: 9.814406 +epoch: 1, batch: 36124, sum loss: 4475.000977, avg loss: 2.615430, ppl: 13.673100 +epoch: 1, batch: 36125, sum loss: 4093.797852, avg loss: 2.436785, ppl: 11.436209 +epoch: 1, batch: 36126, sum loss: 3924.846680, avg loss: 2.708659, ppl: 15.009133 +epoch: 1, batch: 36127, sum loss: 4571.704102, avg loss: 2.363859, ppl: 10.631906 +epoch: 1, batch: 36128, sum loss: 4055.504883, avg loss: 2.428446, ppl: 11.341245 +epoch: 1, batch: 36129, sum loss: 4496.697266, avg loss: 2.375434, ppl: 10.755682 +epoch: 1, batch: 36130, sum loss: 4837.338379, avg loss: 2.737600, ppl: 15.449855 +epoch: 1, batch: 36131, sum loss: 3796.045410, avg loss: 2.379966, ppl: 10.804533 +epoch: 1, batch: 36132, sum loss: 4679.244629, avg loss: 2.617027, ppl: 13.694952 +epoch: 1, batch: 36133, sum loss: 4802.370605, avg loss: 2.796954, ppl: 16.394640 +epoch: 1, batch: 36134, sum loss: 5691.248535, avg loss: 3.088035, ppl: 21.933937 +epoch: 1, batch: 36135, sum loss: 3800.216797, avg loss: 2.432917, ppl: 11.392068 +epoch: 1, batch: 36136, sum loss: 3918.187744, avg loss: 2.570989, ppl: 13.078758 +epoch: 1, batch: 36137, sum loss: 3903.179688, avg loss: 2.421327, ppl: 11.260797 +epoch: 1, batch: 36138, sum loss: 3774.785156, avg loss: 2.547089, ppl: 12.769872 +epoch: 1, batch: 36139, sum loss: 4643.697266, avg loss: 2.687325, ppl: 14.692318 +epoch: 1, batch: 36140, sum loss: 4921.093262, avg loss: 2.696490, ppl: 14.827589 +epoch: 1, batch: 36141, sum loss: 3875.205566, avg loss: 2.630825, ppl: 13.885224 +epoch: 1, batch: 36142, sum loss: 4531.281250, avg loss: 2.671746, ppl: 14.465203 +epoch: 1, batch: 36143, sum loss: 4479.244141, avg loss: 2.659884, ppl: 14.294627 +epoch: 1, batch: 36144, sum loss: 5260.721191, avg loss: 2.765889, ppl: 15.893169 +epoch: 1, batch: 36145, sum loss: 5142.733887, avg loss: 2.721023, ppl: 15.195865 +epoch: 1, batch: 36146, sum loss: 5804.838867, avg loss: 2.667665, ppl: 14.406291 +epoch: 1, batch: 36147, sum loss: 5592.012695, avg loss: 2.882481, ppl: 17.858522 +epoch: 1, batch: 36148, sum loss: 4434.471191, avg loss: 2.594775, ppl: 13.393579 +epoch: 1, batch: 36149, sum loss: 5120.153809, avg loss: 2.919130, ppl: 18.525160 +epoch: 1, batch: 36150, sum loss: 4457.084961, avg loss: 2.828099, ppl: 16.913286 +epoch: 1, batch: 36151, sum loss: 4240.709473, avg loss: 2.420496, ppl: 11.251444 +epoch: 1, batch: 36152, sum loss: 4054.364258, avg loss: 2.733894, ppl: 15.392704 +epoch: 1, batch: 36153, sum loss: 4218.618652, avg loss: 2.374012, ppl: 10.740391 +epoch: 1, batch: 36154, sum loss: 4624.112305, avg loss: 2.736161, ppl: 15.427645 +epoch: 1, batch: 36155, sum loss: 4995.813965, avg loss: 2.661595, ppl: 14.319112 +epoch: 1, batch: 36156, sum loss: 5588.263184, avg loss: 2.733984, ppl: 15.394095 +epoch: 1, batch: 36157, sum loss: 4594.250000, avg loss: 2.723325, ppl: 15.230888 +epoch: 1, batch: 36158, sum loss: 4136.061035, avg loss: 2.487108, ppl: 12.026448 +epoch: 1, batch: 36159, sum loss: 4484.251465, avg loss: 2.552220, ppl: 12.835571 +epoch: 1, batch: 36160, sum loss: 3554.252686, avg loss: 2.478558, ppl: 11.924061 +epoch: 1, batch: 36161, sum loss: 4098.635742, avg loss: 2.442572, ppl: 11.502586 +epoch: 1, batch: 36162, sum loss: 4765.810547, avg loss: 2.782143, ppl: 16.153595 +epoch: 1, batch: 36163, sum loss: 4592.425293, avg loss: 2.639325, ppl: 14.003747 +epoch: 1, batch: 36164, sum loss: 4968.141602, avg loss: 2.579513, ppl: 13.190710 +epoch: 1, batch: 36165, sum loss: 4074.612305, avg loss: 2.456065, ppl: 11.658846 +epoch: 1, batch: 36166, sum loss: 4120.783691, avg loss: 2.545265, ppl: 12.746605 +epoch: 1, batch: 36167, sum loss: 3821.618408, avg loss: 2.568292, ppl: 13.043526 +epoch: 1, batch: 36168, sum loss: 4797.375000, avg loss: 2.779476, ppl: 16.110571 +epoch: 1, batch: 36169, sum loss: 4355.614746, avg loss: 2.481832, ppl: 11.963159 +epoch: 1, batch: 36170, sum loss: 4581.075195, avg loss: 2.496499, ppl: 12.139915 +epoch: 1, batch: 36171, sum loss: 4705.851562, avg loss: 2.828036, ppl: 16.912210 +epoch: 1, batch: 36172, sum loss: 3913.082275, avg loss: 2.572704, ppl: 13.101200 +epoch: 1, batch: 36173, sum loss: 4272.876953, avg loss: 2.532826, ppl: 12.589029 +epoch: 1, batch: 36174, sum loss: 5053.662598, avg loss: 2.488263, ppl: 12.040348 +epoch: 1, batch: 36175, sum loss: 4093.318848, avg loss: 2.352482, ppl: 10.511628 +epoch: 1, batch: 36176, sum loss: 4557.515137, avg loss: 2.411384, ppl: 11.149377 +epoch: 1, batch: 36177, sum loss: 5116.407227, avg loss: 2.704232, ppl: 14.942840 +epoch: 1, batch: 36178, sum loss: 4427.929688, avg loss: 2.670645, ppl: 14.449289 +epoch: 1, batch: 36179, sum loss: 4481.741211, avg loss: 2.627046, ppl: 13.832852 +epoch: 1, batch: 36180, sum loss: 4465.579590, avg loss: 2.617573, ppl: 13.702428 +epoch: 1, batch: 36181, sum loss: 5319.191406, avg loss: 2.719423, ppl: 15.171567 +epoch: 1, batch: 36182, sum loss: 3583.745117, avg loss: 2.244048, ppl: 9.431436 +epoch: 1, batch: 36183, sum loss: 5343.499023, avg loss: 2.638765, ppl: 13.995906 +epoch: 1, batch: 36184, sum loss: 4564.233398, avg loss: 2.527261, ppl: 12.519170 +epoch: 1, batch: 36185, sum loss: 4420.369141, avg loss: 2.621809, ppl: 13.760588 +epoch: 1, batch: 36186, sum loss: 3250.274170, avg loss: 2.131327, ppl: 8.426044 +epoch: 1, batch: 36187, sum loss: 4163.574707, avg loss: 2.415067, ppl: 11.190517 +epoch: 1, batch: 36188, sum loss: 4128.893555, avg loss: 2.665522, ppl: 14.375453 +epoch: 1, batch: 36189, sum loss: 4447.161621, avg loss: 2.564684, ppl: 12.996547 +epoch: 1, batch: 36190, sum loss: 3788.423096, avg loss: 2.324186, ppl: 10.218357 +epoch: 1, batch: 36191, sum loss: 5034.327148, avg loss: 2.764595, ppl: 15.872607 +epoch: 1, batch: 36192, sum loss: 4124.551270, avg loss: 2.460949, ppl: 11.715930 +epoch: 1, batch: 36193, sum loss: 4111.903320, avg loss: 2.519549, ppl: 12.422988 +epoch: 1, batch: 36194, sum loss: 4767.541016, avg loss: 2.721199, ppl: 15.198539 +epoch: 1, batch: 36195, sum loss: 4850.809082, avg loss: 2.650715, ppl: 14.164167 +epoch: 1, batch: 36196, sum loss: 4604.261719, avg loss: 2.593950, ppl: 13.382528 +epoch: 1, batch: 36197, sum loss: 4113.676758, avg loss: 2.407067, ppl: 11.101349 +epoch: 1, batch: 36198, sum loss: 4215.486328, avg loss: 2.438107, ppl: 11.451341 +epoch: 1, batch: 36199, sum loss: 4717.126465, avg loss: 2.668058, ppl: 14.411956 +epoch: 1, batch: 36200, sum loss: 4587.475586, avg loss: 2.676474, ppl: 14.533751 +epoch: 1, batch: 36201, sum loss: 3247.738770, avg loss: 2.179691, ppl: 8.843570 +epoch: 1, batch: 36202, sum loss: 3782.245117, avg loss: 2.369828, ppl: 10.695550 +epoch: 1, batch: 36203, sum loss: 4653.661133, avg loss: 2.615886, ppl: 13.679331 +epoch: 1, batch: 36204, sum loss: 4219.915527, avg loss: 2.592086, ppl: 13.357601 +epoch: 1, batch: 36205, sum loss: 3816.134277, avg loss: 2.407656, ppl: 11.107890 +epoch: 1, batch: 36206, sum loss: 4686.886719, avg loss: 2.866597, ppl: 17.577108 +epoch: 1, batch: 36207, sum loss: 3441.530762, avg loss: 2.331661, ppl: 10.295022 +epoch: 1, batch: 36208, sum loss: 4257.542969, avg loss: 2.513308, ppl: 12.345696 +epoch: 1, batch: 36209, sum loss: 4415.618164, avg loss: 2.513158, ppl: 12.343845 +epoch: 1, batch: 36210, sum loss: 3755.654053, avg loss: 2.628170, ppl: 13.848397 +epoch: 1, batch: 36211, sum loss: 4851.306152, avg loss: 2.539951, ppl: 12.679048 +epoch: 1, batch: 36212, sum loss: 4333.920898, avg loss: 2.663750, ppl: 14.349997 +epoch: 1, batch: 36213, sum loss: 4535.668457, avg loss: 2.643163, ppl: 14.057604 +epoch: 1, batch: 36214, sum loss: 5311.411133, avg loss: 2.612598, ppl: 13.634423 +epoch: 1, batch: 36215, sum loss: 4663.754395, avg loss: 2.595300, ppl: 13.400610 +epoch: 1, batch: 36216, sum loss: 4394.308594, avg loss: 2.626604, ppl: 13.826735 +epoch: 1, batch: 36217, sum loss: 4166.830566, avg loss: 2.523822, ppl: 12.476193 +epoch: 1, batch: 36218, sum loss: 5580.545898, avg loss: 2.912602, ppl: 18.404633 +epoch: 1, batch: 36219, sum loss: 5016.574219, avg loss: 2.791639, ppl: 16.307720 +epoch: 1, batch: 36220, sum loss: 5129.601074, avg loss: 2.820012, ppl: 16.777046 +epoch: 1, batch: 36221, sum loss: 5987.139648, avg loss: 3.002578, ppl: 20.137375 +epoch: 1, batch: 36222, sum loss: 5329.288086, avg loss: 2.784372, ppl: 16.189650 +epoch: 1, batch: 36223, sum loss: 3509.119385, avg loss: 2.236532, ppl: 9.360816 +epoch: 1, batch: 36224, sum loss: 4460.203125, avg loss: 2.736321, ppl: 15.430113 +epoch: 1, batch: 36225, sum loss: 4879.190918, avg loss: 2.645982, ppl: 14.097282 +epoch: 1, batch: 36226, sum loss: 4164.778320, avg loss: 2.451312, ppl: 11.603556 +epoch: 1, batch: 36227, sum loss: 4888.492188, avg loss: 2.763421, ppl: 15.853992 +epoch: 1, batch: 36228, sum loss: 4576.505859, avg loss: 2.645379, ppl: 14.088784 +epoch: 1, batch: 36229, sum loss: 4174.101074, avg loss: 2.492001, ppl: 12.085433 +epoch: 1, batch: 36230, sum loss: 3743.244873, avg loss: 2.307796, ppl: 10.052242 +epoch: 1, batch: 36231, sum loss: 3752.111816, avg loss: 2.471747, ppl: 11.843118 +epoch: 1, batch: 36232, sum loss: 4963.004883, avg loss: 2.751111, ppl: 15.660025 +epoch: 1, batch: 36233, sum loss: 5050.101074, avg loss: 2.853164, ppl: 17.342575 +epoch: 1, batch: 36234, sum loss: 4480.842773, avg loss: 2.839571, ppl: 17.108419 +epoch: 1, batch: 36235, sum loss: 3615.614746, avg loss: 2.374008, ppl: 10.740358 +epoch: 1, batch: 36236, sum loss: 4039.337158, avg loss: 2.617847, ppl: 13.706179 +epoch: 1, batch: 36237, sum loss: 4415.895508, avg loss: 2.407795, ppl: 11.109435 +epoch: 1, batch: 36238, sum loss: 4405.017578, avg loss: 2.695849, ppl: 14.818097 +epoch: 1, batch: 36239, sum loss: 4561.545898, avg loss: 2.788231, ppl: 16.252247 +epoch: 1, batch: 36240, sum loss: 4064.975586, avg loss: 2.599089, ppl: 13.451480 +epoch: 1, batch: 36241, sum loss: 4165.688477, avg loss: 2.611717, ppl: 13.622420 +epoch: 1, batch: 36242, sum loss: 3618.255615, avg loss: 2.404157, ppl: 11.069092 +epoch: 1, batch: 36243, sum loss: 5107.896484, avg loss: 2.804995, ppl: 16.526999 +epoch: 1, batch: 36244, sum loss: 4477.260742, avg loss: 2.787833, ppl: 16.245785 +epoch: 1, batch: 36245, sum loss: 4061.859619, avg loss: 2.378138, ppl: 10.784801 +epoch: 1, batch: 36246, sum loss: 4968.552734, avg loss: 2.774178, ppl: 16.025446 +epoch: 1, batch: 36247, sum loss: 4252.717285, avg loss: 2.543491, ppl: 12.724018 +epoch: 1, batch: 36248, sum loss: 4064.316650, avg loss: 2.510387, ppl: 12.309695 +epoch: 1, batch: 36249, sum loss: 4556.185059, avg loss: 2.593162, ppl: 13.371982 +epoch: 1, batch: 36250, sum loss: 4504.352539, avg loss: 2.883708, ppl: 17.880459 +epoch: 1, batch: 36251, sum loss: 3491.573242, avg loss: 2.129008, ppl: 8.406524 +epoch: 1, batch: 36252, sum loss: 3344.476074, avg loss: 2.468248, ppl: 11.801751 +epoch: 1, batch: 36253, sum loss: 3646.082031, avg loss: 2.553279, ppl: 12.849163 +epoch: 1, batch: 36254, sum loss: 3874.504150, avg loss: 2.446025, ppl: 11.542378 +epoch: 1, batch: 36255, sum loss: 3910.921143, avg loss: 2.783574, ppl: 16.176731 +epoch: 1, batch: 36256, sum loss: 3987.139160, avg loss: 2.485748, ppl: 12.010096 +epoch: 1, batch: 36257, sum loss: 6085.089844, avg loss: 2.875751, ppl: 17.738750 +epoch: 1, batch: 36258, sum loss: 4632.015625, avg loss: 2.688343, ppl: 14.707290 +epoch: 1, batch: 36259, sum loss: 3717.286377, avg loss: 2.327668, ppl: 10.254005 +epoch: 1, batch: 36260, sum loss: 2813.810059, avg loss: 2.109303, ppl: 8.242495 +epoch: 1, batch: 36261, sum loss: 4409.524902, avg loss: 2.485640, ppl: 12.008799 +epoch: 1, batch: 36262, sum loss: 4536.689453, avg loss: 2.545841, ppl: 12.753956 +epoch: 1, batch: 36263, sum loss: 4031.785645, avg loss: 2.412798, ppl: 11.165159 +epoch: 1, batch: 36264, sum loss: 2597.771484, avg loss: 1.903129, ppl: 6.706850 +epoch: 1, batch: 36265, sum loss: 4500.213867, avg loss: 2.610333, ppl: 13.603580 +epoch: 1, batch: 36266, sum loss: 3250.095703, avg loss: 2.274385, ppl: 9.721935 +epoch: 1, batch: 36267, sum loss: 4408.238281, avg loss: 2.576411, ppl: 13.149853 +epoch: 1, batch: 36268, sum loss: 3945.013672, avg loss: 2.389469, ppl: 10.907704 +epoch: 1, batch: 36269, sum loss: 4592.362305, avg loss: 2.559845, ppl: 12.933815 +epoch: 1, batch: 36270, sum loss: 5177.900391, avg loss: 3.047617, ppl: 21.065081 +epoch: 1, batch: 36271, sum loss: 4202.736816, avg loss: 2.832033, ppl: 16.979944 +epoch: 1, batch: 36272, sum loss: 5038.988281, avg loss: 2.811936, ppl: 16.642101 +epoch: 1, batch: 36273, sum loss: 3165.009033, avg loss: 2.063239, ppl: 7.871427 +epoch: 1, batch: 36274, sum loss: 3331.710938, avg loss: 2.286692, ppl: 9.842329 +epoch: 1, batch: 36275, sum loss: 4180.153809, avg loss: 2.594757, ppl: 13.393336 +epoch: 1, batch: 36276, sum loss: 4106.672363, avg loss: 2.463511, ppl: 11.745976 +epoch: 1, batch: 36277, sum loss: 4474.789062, avg loss: 2.618367, ppl: 13.713311 +epoch: 1, batch: 36278, sum loss: 4443.018066, avg loss: 2.625897, ppl: 13.816965 +epoch: 1, batch: 36279, sum loss: 4446.979492, avg loss: 2.573483, ppl: 13.111418 +epoch: 1, batch: 36280, sum loss: 4064.664062, avg loss: 2.524636, ppl: 12.486350 +epoch: 1, batch: 36281, sum loss: 4027.695557, avg loss: 2.370627, ppl: 10.704106 +epoch: 1, batch: 36282, sum loss: 4600.634766, avg loss: 2.600698, ppl: 13.473142 +epoch: 1, batch: 36283, sum loss: 3332.368408, avg loss: 2.239495, ppl: 9.388587 +epoch: 1, batch: 36284, sum loss: 4307.210938, avg loss: 2.568403, ppl: 13.044969 +epoch: 1, batch: 36285, sum loss: 5134.679688, avg loss: 2.798191, ppl: 16.414919 +epoch: 1, batch: 36286, sum loss: 4045.502441, avg loss: 2.566943, ppl: 13.025946 +epoch: 1, batch: 36287, sum loss: 4412.455078, avg loss: 2.854111, ppl: 17.358994 +epoch: 1, batch: 36288, sum loss: 5419.791992, avg loss: 2.971377, ppl: 19.518782 +epoch: 1, batch: 36289, sum loss: 4997.947754, avg loss: 2.710384, ppl: 15.035047 +epoch: 1, batch: 36290, sum loss: 5314.017090, avg loss: 2.894345, ppl: 18.071657 +epoch: 1, batch: 36291, sum loss: 4839.216797, avg loss: 2.587816, ppl: 13.300697 +epoch: 1, batch: 36292, sum loss: 4718.798340, avg loss: 2.687243, ppl: 14.691116 +epoch: 1, batch: 36293, sum loss: 4332.545898, avg loss: 2.572771, ppl: 13.102077 +epoch: 1, batch: 36294, sum loss: 4789.268555, avg loss: 2.561106, ppl: 12.950135 +epoch: 1, batch: 36295, sum loss: 4408.540039, avg loss: 2.625694, ppl: 13.814155 +epoch: 1, batch: 36296, sum loss: 4593.866699, avg loss: 2.772400, ppl: 15.996979 +epoch: 1, batch: 36297, sum loss: 4561.762207, avg loss: 2.451242, ppl: 11.602754 +epoch: 1, batch: 36298, sum loss: 4453.200684, avg loss: 2.533106, ppl: 12.592559 +epoch: 1, batch: 36299, sum loss: 4973.677734, avg loss: 2.503109, ppl: 12.220428 +epoch: 1, batch: 36300, sum loss: 5608.654297, avg loss: 3.041570, ppl: 20.938084 +epoch: 1, batch: 36301, sum loss: 3898.612305, avg loss: 2.541468, ppl: 12.698300 +epoch: 1, batch: 36302, sum loss: 4091.918945, avg loss: 2.447320, ppl: 11.557331 +epoch: 1, batch: 36303, sum loss: 4527.394531, avg loss: 2.612461, ppl: 13.632554 +epoch: 1, batch: 36304, sum loss: 4774.409180, avg loss: 2.643637, ppl: 14.064268 +epoch: 1, batch: 36305, sum loss: 3139.906250, avg loss: 2.186564, ppl: 8.904566 +epoch: 1, batch: 36306, sum loss: 4526.022949, avg loss: 2.522867, ppl: 12.464277 +epoch: 1, batch: 36307, sum loss: 5171.888672, avg loss: 2.826169, ppl: 16.880663 +epoch: 1, batch: 36308, sum loss: 4715.219727, avg loss: 2.418061, ppl: 11.224077 +epoch: 1, batch: 36309, sum loss: 4104.172363, avg loss: 2.561905, ppl: 12.960488 +epoch: 1, batch: 36310, sum loss: 4168.275879, avg loss: 2.538536, ppl: 12.661122 +epoch: 1, batch: 36311, sum loss: 3566.332520, avg loss: 2.497432, ppl: 12.151246 +epoch: 1, batch: 36312, sum loss: 4202.318848, avg loss: 2.616637, ppl: 13.689605 +epoch: 1, batch: 36313, sum loss: 4070.980469, avg loss: 2.486854, ppl: 12.023395 +epoch: 1, batch: 36314, sum loss: 4145.114258, avg loss: 2.500069, ppl: 12.183333 +epoch: 1, batch: 36315, sum loss: 4036.068359, avg loss: 2.311609, ppl: 10.090643 +epoch: 1, batch: 36316, sum loss: 4305.734375, avg loss: 2.674369, ppl: 14.503197 +epoch: 1, batch: 36317, sum loss: 4516.191895, avg loss: 2.709173, ppl: 15.016854 +epoch: 1, batch: 36318, sum loss: 4245.758301, avg loss: 2.604760, ppl: 13.527974 +epoch: 1, batch: 36319, sum loss: 4525.482910, avg loss: 2.408453, ppl: 11.116750 +epoch: 1, batch: 36320, sum loss: 4601.478027, avg loss: 2.678392, ppl: 14.561666 +epoch: 1, batch: 36321, sum loss: 5092.494141, avg loss: 2.676035, ppl: 14.527373 +epoch: 1, batch: 36322, sum loss: 4447.379883, avg loss: 2.542813, ppl: 12.715390 +epoch: 1, batch: 36323, sum loss: 4167.402344, avg loss: 2.550430, ppl: 12.812610 +epoch: 1, batch: 36324, sum loss: 4833.912109, avg loss: 2.651625, ppl: 14.177056 +epoch: 1, batch: 36325, sum loss: 4150.366699, avg loss: 2.500221, ppl: 12.185186 +epoch: 1, batch: 36326, sum loss: 4677.801758, avg loss: 2.642826, ppl: 14.052858 +epoch: 1, batch: 36327, sum loss: 4130.807617, avg loss: 2.890698, ppl: 18.005877 +epoch: 1, batch: 36328, sum loss: 4263.114746, avg loss: 2.368397, ppl: 10.680261 +epoch: 1, batch: 36329, sum loss: 4897.627441, avg loss: 2.603736, ppl: 13.514132 +epoch: 1, batch: 36330, sum loss: 4230.350586, avg loss: 2.512085, ppl: 12.330609 +epoch: 1, batch: 36331, sum loss: 3776.844727, avg loss: 2.307175, ppl: 10.046001 +epoch: 1, batch: 36332, sum loss: 3926.181396, avg loss: 2.577926, ppl: 13.169798 +epoch: 1, batch: 36333, sum loss: 4953.919922, avg loss: 2.670577, ppl: 14.448300 +epoch: 1, batch: 36334, sum loss: 3884.708252, avg loss: 2.598467, ppl: 13.443112 +epoch: 1, batch: 36335, sum loss: 3930.095215, avg loss: 2.538821, ppl: 12.664733 +epoch: 1, batch: 36336, sum loss: 3752.887939, avg loss: 2.332435, ppl: 10.302998 +epoch: 1, batch: 36337, sum loss: 3783.474609, avg loss: 2.393090, ppl: 10.947264 +epoch: 1, batch: 36338, sum loss: 4912.485840, avg loss: 2.745939, ppl: 15.579244 +epoch: 1, batch: 36339, sum loss: 4936.849121, avg loss: 2.699207, ppl: 14.867934 +epoch: 1, batch: 36340, sum loss: 4637.814453, avg loss: 2.777134, ppl: 16.072897 +epoch: 1, batch: 36341, sum loss: 4883.077637, avg loss: 2.837349, ppl: 17.070450 +epoch: 1, batch: 36342, sum loss: 4261.235840, avg loss: 2.421157, ppl: 11.258874 +epoch: 1, batch: 36343, sum loss: 4086.237793, avg loss: 2.634583, ppl: 13.937492 +epoch: 1, batch: 36344, sum loss: 3288.206299, avg loss: 2.093066, ppl: 8.109739 +epoch: 1, batch: 36345, sum loss: 4659.798340, avg loss: 2.687311, ppl: 14.692122 +epoch: 1, batch: 36346, sum loss: 4566.235352, avg loss: 2.594452, ppl: 13.389247 +epoch: 1, batch: 36347, sum loss: 4342.819824, avg loss: 2.692387, ppl: 14.766880 +epoch: 1, batch: 36348, sum loss: 4517.476562, avg loss: 2.627968, ppl: 13.845605 +epoch: 1, batch: 36349, sum loss: 4845.488770, avg loss: 2.710005, ppl: 15.029347 +epoch: 1, batch: 36350, sum loss: 4466.915527, avg loss: 2.629144, ppl: 13.861898 +epoch: 1, batch: 36351, sum loss: 4581.013672, avg loss: 2.784811, ppl: 16.196749 +epoch: 1, batch: 36352, sum loss: 4205.391602, avg loss: 2.394870, ppl: 10.966773 +epoch: 1, batch: 36353, sum loss: 4361.154297, avg loss: 2.616169, ppl: 13.683206 +epoch: 1, batch: 36354, sum loss: 5104.840820, avg loss: 2.713897, ppl: 15.087962 +epoch: 1, batch: 36355, sum loss: 3701.926758, avg loss: 2.402289, ppl: 11.048433 +epoch: 1, batch: 36356, sum loss: 4152.146973, avg loss: 2.556741, ppl: 12.893725 +epoch: 1, batch: 36357, sum loss: 3596.452148, avg loss: 2.522056, ppl: 12.454178 +epoch: 1, batch: 36358, sum loss: 4481.263672, avg loss: 2.499310, ppl: 12.174097 +epoch: 1, batch: 36359, sum loss: 4239.361328, avg loss: 2.387028, ppl: 10.881104 +epoch: 1, batch: 36360, sum loss: 4566.950684, avg loss: 2.731430, ppl: 15.354830 +epoch: 1, batch: 36361, sum loss: 4667.754395, avg loss: 2.823808, ppl: 16.840855 +epoch: 1, batch: 36362, sum loss: 3828.927002, avg loss: 2.448163, ppl: 11.567079 +epoch: 1, batch: 36363, sum loss: 3827.344971, avg loss: 2.565245, ppl: 13.003839 +epoch: 1, batch: 36364, sum loss: 4195.670898, avg loss: 2.470949, ppl: 11.833668 +epoch: 1, batch: 36365, sum loss: 5377.708008, avg loss: 2.672817, ppl: 14.480707 +epoch: 1, batch: 36366, sum loss: 5114.033203, avg loss: 2.720230, ppl: 15.183820 +epoch: 1, batch: 36367, sum loss: 3522.183838, avg loss: 2.303587, ppl: 10.010021 +epoch: 1, batch: 36368, sum loss: 4478.567871, avg loss: 2.657904, ppl: 14.266351 +epoch: 1, batch: 36369, sum loss: 3691.126465, avg loss: 2.487282, ppl: 12.028539 +epoch: 1, batch: 36370, sum loss: 4205.860352, avg loss: 2.530602, ppl: 12.561066 +epoch: 1, batch: 36371, sum loss: 3686.884521, avg loss: 2.273048, ppl: 9.708953 +epoch: 1, batch: 36372, sum loss: 4300.321777, avg loss: 2.590555, ppl: 13.337177 +epoch: 1, batch: 36373, sum loss: 5367.078125, avg loss: 2.841227, ppl: 17.136784 +epoch: 1, batch: 36374, sum loss: 4627.315430, avg loss: 2.501252, ppl: 12.197752 +epoch: 1, batch: 36375, sum loss: 5333.517578, avg loss: 2.949955, ppl: 19.105085 +epoch: 1, batch: 36376, sum loss: 4815.464355, avg loss: 2.464414, ppl: 11.756587 +epoch: 1, batch: 36377, sum loss: 4898.031250, avg loss: 2.977526, ppl: 19.639177 +epoch: 1, batch: 36378, sum loss: 4044.424805, avg loss: 2.416024, ppl: 11.201240 +epoch: 1, batch: 36379, sum loss: 4461.650879, avg loss: 2.545152, ppl: 12.745161 +epoch: 1, batch: 36380, sum loss: 4114.091797, avg loss: 2.444499, ppl: 11.524775 +epoch: 1, batch: 36381, sum loss: 4870.801758, avg loss: 2.838463, ppl: 17.089472 +epoch: 1, batch: 36382, sum loss: 3781.279785, avg loss: 2.548032, ppl: 12.781925 +epoch: 1, batch: 36383, sum loss: 5314.691895, avg loss: 2.697813, ppl: 14.847229 +epoch: 1, batch: 36384, sum loss: 3908.192383, avg loss: 2.846462, ppl: 17.226727 +epoch: 1, batch: 36385, sum loss: 4406.465820, avg loss: 2.525195, ppl: 12.493336 +epoch: 1, batch: 36386, sum loss: 4668.568359, avg loss: 2.765740, ppl: 15.890790 +epoch: 1, batch: 36387, sum loss: 5180.745117, avg loss: 2.928629, ppl: 18.701981 +epoch: 1, batch: 36388, sum loss: 3950.822266, avg loss: 2.455452, ppl: 11.651698 +epoch: 1, batch: 36389, sum loss: 5249.847656, avg loss: 2.819467, ppl: 16.767912 +epoch: 1, batch: 36390, sum loss: 4722.628418, avg loss: 2.603434, ppl: 13.510047 +epoch: 1, batch: 36391, sum loss: 4726.536621, avg loss: 2.722659, ppl: 15.220746 +epoch: 1, batch: 36392, sum loss: 4821.879883, avg loss: 2.854873, ppl: 17.372225 +epoch: 1, batch: 36393, sum loss: 3925.942627, avg loss: 2.405602, ppl: 11.085104 +epoch: 1, batch: 36394, sum loss: 4047.937012, avg loss: 2.451809, ppl: 11.609334 +epoch: 1, batch: 36395, sum loss: 3799.817871, avg loss: 2.442042, ppl: 11.496496 +epoch: 1, batch: 36396, sum loss: 4181.699707, avg loss: 2.638296, ppl: 13.989350 +epoch: 1, batch: 36397, sum loss: 4166.444336, avg loss: 2.545171, ppl: 12.745404 +epoch: 1, batch: 36398, sum loss: 5148.583008, avg loss: 2.805768, ppl: 16.539766 +epoch: 1, batch: 36399, sum loss: 4341.747070, avg loss: 2.528682, ppl: 12.536971 +epoch: 1, batch: 36400, sum loss: 4381.917969, avg loss: 2.658931, ppl: 14.281012 +epoch: 1, batch: 36401, sum loss: 3749.691650, avg loss: 2.346490, ppl: 10.448833 +epoch: 1, batch: 36402, sum loss: 4642.813477, avg loss: 2.595200, ppl: 13.399271 +epoch: 1, batch: 36403, sum loss: 4555.122559, avg loss: 2.631498, ppl: 13.894563 +epoch: 1, batch: 36404, sum loss: 4033.617676, avg loss: 2.494507, ppl: 12.115760 +epoch: 1, batch: 36405, sum loss: 4601.795410, avg loss: 2.353860, ppl: 10.526118 +epoch: 1, batch: 36406, sum loss: 4059.517090, avg loss: 2.720856, ppl: 15.193321 +epoch: 1, batch: 36407, sum loss: 4419.750977, avg loss: 2.535715, ppl: 12.625453 +epoch: 1, batch: 36408, sum loss: 4406.512207, avg loss: 2.596648, ppl: 13.418689 +epoch: 1, batch: 36409, sum loss: 4661.543945, avg loss: 2.496810, ppl: 12.143690 +epoch: 1, batch: 36410, sum loss: 4853.291992, avg loss: 2.977480, ppl: 19.638260 +epoch: 1, batch: 36411, sum loss: 5107.318359, avg loss: 2.654531, ppl: 14.218323 +epoch: 1, batch: 36412, sum loss: 4296.459961, avg loss: 2.631022, ppl: 13.887962 +epoch: 1, batch: 36413, sum loss: 5018.638672, avg loss: 2.702552, ppl: 14.917747 +epoch: 1, batch: 36414, sum loss: 4393.379883, avg loss: 2.703618, ppl: 14.933668 +epoch: 1, batch: 36415, sum loss: 4442.661621, avg loss: 2.501499, ppl: 12.200766 +epoch: 1, batch: 36416, sum loss: 5045.001465, avg loss: 2.600516, ppl: 13.470692 +epoch: 1, batch: 36417, sum loss: 4644.321777, avg loss: 2.509088, ppl: 12.293714 +epoch: 1, batch: 36418, sum loss: 3776.475342, avg loss: 2.394721, ppl: 10.965141 +epoch: 1, batch: 36419, sum loss: 4269.039062, avg loss: 2.533554, ppl: 12.598205 +epoch: 1, batch: 36420, sum loss: 4679.034668, avg loss: 2.636076, ppl: 13.958320 +epoch: 1, batch: 36421, sum loss: 4176.894531, avg loss: 2.643604, ppl: 14.063799 +epoch: 1, batch: 36422, sum loss: 3448.165039, avg loss: 2.442043, ppl: 11.496508 +epoch: 1, batch: 36423, sum loss: 4676.445801, avg loss: 2.750851, ppl: 15.655945 +epoch: 1, batch: 36424, sum loss: 4471.185547, avg loss: 2.481235, ppl: 11.956021 +epoch: 1, batch: 36425, sum loss: 3878.983398, avg loss: 2.445765, ppl: 11.539374 +epoch: 1, batch: 36426, sum loss: 4665.806152, avg loss: 2.831193, ppl: 16.965689 +epoch: 1, batch: 36427, sum loss: 4227.232910, avg loss: 2.697660, ppl: 14.844954 +epoch: 1, batch: 36428, sum loss: 3827.674805, avg loss: 2.529858, ppl: 12.551723 +epoch: 1, batch: 36429, sum loss: 4721.722168, avg loss: 2.875592, ppl: 17.735926 +epoch: 1, batch: 36430, sum loss: 4070.060059, avg loss: 2.637758, ppl: 13.981818 +epoch: 1, batch: 36431, sum loss: 4474.594727, avg loss: 2.592465, ppl: 13.362669 +epoch: 1, batch: 36432, sum loss: 3537.516113, avg loss: 2.409752, ppl: 11.131202 +epoch: 1, batch: 36433, sum loss: 3694.071533, avg loss: 2.464357, ppl: 11.755925 +epoch: 1, batch: 36434, sum loss: 3624.135254, avg loss: 2.622384, ppl: 13.768513 +epoch: 1, batch: 36435, sum loss: 4407.834473, avg loss: 2.668181, ppl: 14.413722 +epoch: 1, batch: 36436, sum loss: 4473.776855, avg loss: 2.590490, ppl: 13.336309 +epoch: 1, batch: 36437, sum loss: 4635.024414, avg loss: 2.712127, ppl: 15.061273 +epoch: 1, batch: 36438, sum loss: 4901.576172, avg loss: 2.593426, ppl: 13.375524 +epoch: 1, batch: 36439, sum loss: 4600.743652, avg loss: 2.776550, ppl: 16.063507 +epoch: 1, batch: 36440, sum loss: 3968.412109, avg loss: 2.578565, ppl: 13.178219 +epoch: 1, batch: 36441, sum loss: 4534.321289, avg loss: 2.495499, ppl: 12.127783 +epoch: 1, batch: 36442, sum loss: 4155.365234, avg loss: 2.634981, ppl: 13.943049 +epoch: 1, batch: 36443, sum loss: 4964.337891, avg loss: 2.503448, ppl: 12.224575 +epoch: 1, batch: 36444, sum loss: 3657.886963, avg loss: 2.486667, ppl: 12.021142 +epoch: 1, batch: 36445, sum loss: 3379.327148, avg loss: 2.406928, ppl: 11.099811 +epoch: 1, batch: 36446, sum loss: 4959.626465, avg loss: 2.880155, ppl: 17.817028 +epoch: 1, batch: 36447, sum loss: 5134.331543, avg loss: 2.706553, ppl: 14.977562 +epoch: 1, batch: 36448, sum loss: 3337.087646, avg loss: 2.399057, ppl: 11.012783 +epoch: 1, batch: 36449, sum loss: 4276.899414, avg loss: 2.622256, ppl: 13.766747 +epoch: 1, batch: 36450, sum loss: 3745.698242, avg loss: 2.470777, ppl: 11.831639 +epoch: 1, batch: 36451, sum loss: 4978.175781, avg loss: 2.781104, ppl: 16.136820 +epoch: 1, batch: 36452, sum loss: 4225.665527, avg loss: 2.418813, ppl: 11.232512 +epoch: 1, batch: 36453, sum loss: 4352.087891, avg loss: 2.613867, ppl: 13.651734 +epoch: 1, batch: 36454, sum loss: 4231.506836, avg loss: 2.518754, ppl: 12.413123 +epoch: 1, batch: 36455, sum loss: 4782.111328, avg loss: 2.865256, ppl: 17.553539 +epoch: 1, batch: 36456, sum loss: 3892.836182, avg loss: 2.386779, ppl: 10.878396 +epoch: 1, batch: 36457, sum loss: 3695.030029, avg loss: 2.397813, ppl: 10.999096 +epoch: 1, batch: 36458, sum loss: 5330.782715, avg loss: 2.754926, ppl: 15.719885 +epoch: 1, batch: 36459, sum loss: 3880.803223, avg loss: 2.620394, ppl: 13.741133 +epoch: 1, batch: 36460, sum loss: 4392.053223, avg loss: 2.727983, ppl: 15.301995 +epoch: 1, batch: 36461, sum loss: 4508.270996, avg loss: 2.454149, ppl: 11.636524 +epoch: 1, batch: 36462, sum loss: 4402.974609, avg loss: 2.689661, ppl: 14.726680 +epoch: 1, batch: 36463, sum loss: 3999.365479, avg loss: 2.669803, ppl: 14.437130 +epoch: 1, batch: 36464, sum loss: 4686.636230, avg loss: 2.710605, ppl: 15.038373 +epoch: 1, batch: 36465, sum loss: 5433.605469, avg loss: 2.822652, ppl: 16.821404 +epoch: 1, batch: 36466, sum loss: 4350.026367, avg loss: 2.633188, ppl: 13.918070 +epoch: 1, batch: 36467, sum loss: 4599.126465, avg loss: 2.740838, ppl: 15.499969 +epoch: 1, batch: 36468, sum loss: 4714.706543, avg loss: 2.477513, ppl: 11.911598 +epoch: 1, batch: 36469, sum loss: 3427.121094, avg loss: 2.195465, ppl: 8.984179 +epoch: 1, batch: 36470, sum loss: 4534.500977, avg loss: 2.694296, ppl: 14.795105 +epoch: 1, batch: 36471, sum loss: 5781.817383, avg loss: 3.183820, ppl: 24.138788 +epoch: 1, batch: 36472, sum loss: 4229.926270, avg loss: 2.604634, ppl: 13.526278 +epoch: 1, batch: 36473, sum loss: 4024.015625, avg loss: 2.763747, ppl: 15.859156 +epoch: 1, batch: 36474, sum loss: 3786.754639, avg loss: 2.356412, ppl: 10.553023 +epoch: 1, batch: 36475, sum loss: 4459.163086, avg loss: 2.643250, ppl: 14.058824 +epoch: 1, batch: 36476, sum loss: 4597.348145, avg loss: 2.702733, ppl: 14.920447 +epoch: 1, batch: 36477, sum loss: 4376.488770, avg loss: 2.419286, ppl: 11.237838 +epoch: 1, batch: 36478, sum loss: 3643.118896, avg loss: 2.395213, ppl: 10.970536 +epoch: 1, batch: 36479, sum loss: 4753.166992, avg loss: 2.784515, ppl: 16.191961 +epoch: 1, batch: 36480, sum loss: 3840.500244, avg loss: 2.424558, ppl: 11.297236 +epoch: 1, batch: 36481, sum loss: 4757.188965, avg loss: 2.680106, ppl: 14.586645 +epoch: 1, batch: 36482, sum loss: 4088.525146, avg loss: 2.445290, ppl: 11.533897 +epoch: 1, batch: 36483, sum loss: 4354.023438, avg loss: 2.610326, ppl: 13.603482 +epoch: 1, batch: 36484, sum loss: 3966.091064, avg loss: 2.471085, ppl: 11.835277 +epoch: 1, batch: 36485, sum loss: 4403.072266, avg loss: 2.419271, ppl: 11.237659 +epoch: 1, batch: 36486, sum loss: 3234.940186, avg loss: 2.295912, ppl: 9.933494 +epoch: 1, batch: 36487, sum loss: 3625.838867, avg loss: 2.336236, ppl: 10.342240 +epoch: 1, batch: 36488, sum loss: 3626.599854, avg loss: 2.347314, ppl: 10.457442 +epoch: 1, batch: 36489, sum loss: 4284.181641, avg loss: 2.687692, ppl: 14.697721 +epoch: 1, batch: 36490, sum loss: 4507.173340, avg loss: 2.715165, ppl: 15.107098 +epoch: 1, batch: 36491, sum loss: 5346.038086, avg loss: 2.604013, ppl: 13.517873 +epoch: 1, batch: 36492, sum loss: 4310.037598, avg loss: 2.710716, ppl: 15.040033 +epoch: 1, batch: 36493, sum loss: 4390.858887, avg loss: 2.640324, ppl: 14.017746 +epoch: 1, batch: 36494, sum loss: 5419.442871, avg loss: 2.784914, ppl: 16.198429 +epoch: 1, batch: 36495, sum loss: 4784.068359, avg loss: 2.699813, ppl: 14.876947 +epoch: 1, batch: 36496, sum loss: 4269.006348, avg loss: 2.407787, ppl: 11.109350 +epoch: 1, batch: 36497, sum loss: 4978.121094, avg loss: 2.711395, ppl: 15.050253 +epoch: 1, batch: 36498, sum loss: 4554.069336, avg loss: 2.653887, ppl: 14.209156 +epoch: 1, batch: 36499, sum loss: 3204.810059, avg loss: 2.264883, ppl: 9.630000 +epoch: 1, batch: 36500, sum loss: 4741.595703, avg loss: 2.898286, ppl: 18.143023 +epoch: 1, batch: 36501, sum loss: 3934.138184, avg loss: 2.776386, ppl: 16.060865 +epoch: 1, batch: 36502, sum loss: 5247.379395, avg loss: 2.690964, ppl: 14.745880 +epoch: 1, batch: 36503, sum loss: 4072.084473, avg loss: 2.487529, ppl: 12.031507 +epoch: 1, batch: 36504, sum loss: 4337.391602, avg loss: 2.665883, ppl: 14.380639 +epoch: 1, batch: 36505, sum loss: 4507.728516, avg loss: 2.791163, ppl: 16.299969 +epoch: 1, batch: 36506, sum loss: 5369.300293, avg loss: 2.922864, ppl: 18.594461 +epoch: 1, batch: 36507, sum loss: 4727.744141, avg loss: 2.576427, ppl: 13.150075 +epoch: 1, batch: 36508, sum loss: 4636.026855, avg loss: 2.555693, ppl: 12.880221 +epoch: 1, batch: 36509, sum loss: 4758.021484, avg loss: 2.737642, ppl: 15.450507 +epoch: 1, batch: 36510, sum loss: 4651.305176, avg loss: 2.519667, ppl: 12.424458 +epoch: 1, batch: 36511, sum loss: 4080.616455, avg loss: 2.544025, ppl: 12.730815 +epoch: 1, batch: 36512, sum loss: 5736.750488, avg loss: 2.931400, ppl: 18.753874 +epoch: 1, batch: 36513, sum loss: 4611.570312, avg loss: 2.744982, ppl: 15.564342 +epoch: 1, batch: 36514, sum loss: 4160.159180, avg loss: 2.626363, ppl: 13.823403 +epoch: 1, batch: 36515, sum loss: 4913.529785, avg loss: 2.627556, ppl: 13.839905 +epoch: 1, batch: 36516, sum loss: 3583.698242, avg loss: 2.259583, ppl: 9.579091 +epoch: 1, batch: 36517, sum loss: 3553.082764, avg loss: 2.289358, ppl: 9.868597 +epoch: 1, batch: 36518, sum loss: 4491.502930, avg loss: 2.683096, ppl: 14.630324 +epoch: 1, batch: 36519, sum loss: 4713.421387, avg loss: 2.716669, ppl: 15.129846 +epoch: 1, batch: 36520, sum loss: 4224.967773, avg loss: 2.526895, ppl: 12.514585 +epoch: 1, batch: 36521, sum loss: 4156.062988, avg loss: 2.496134, ppl: 12.135488 +epoch: 1, batch: 36522, sum loss: 5156.903809, avg loss: 2.687287, ppl: 14.691761 +epoch: 1, batch: 36523, sum loss: 5914.574707, avg loss: 2.954333, ppl: 19.188915 +epoch: 1, batch: 36524, sum loss: 4728.843262, avg loss: 2.584067, ppl: 13.250925 +epoch: 1, batch: 36525, sum loss: 4038.974121, avg loss: 2.532272, ppl: 12.582062 +epoch: 1, batch: 36526, sum loss: 3743.264404, avg loss: 2.343935, ppl: 10.422170 +epoch: 1, batch: 36527, sum loss: 4763.036621, avg loss: 2.303209, ppl: 10.006244 +epoch: 1, batch: 36528, sum loss: 4565.973145, avg loss: 2.663928, ppl: 14.352559 +epoch: 1, batch: 36529, sum loss: 4002.767334, avg loss: 2.487736, ppl: 12.034000 +epoch: 1, batch: 36530, sum loss: 4254.854492, avg loss: 2.615153, ppl: 13.669312 +epoch: 1, batch: 36531, sum loss: 4617.152344, avg loss: 2.614469, ppl: 13.659962 +epoch: 1, batch: 36532, sum loss: 4405.768555, avg loss: 2.571961, ppl: 13.091468 +epoch: 1, batch: 36533, sum loss: 5452.112305, avg loss: 2.850033, ppl: 17.288345 +epoch: 1, batch: 36534, sum loss: 3901.088867, avg loss: 2.626996, ppl: 13.832153 +epoch: 1, batch: 36535, sum loss: 5412.580078, avg loss: 2.839759, ppl: 17.111635 +epoch: 1, batch: 36536, sum loss: 5080.074707, avg loss: 2.745986, ppl: 15.579975 +epoch: 1, batch: 36537, sum loss: 4401.064941, avg loss: 2.622804, ppl: 13.774292 +epoch: 1, batch: 36538, sum loss: 4841.915039, avg loss: 2.715600, ppl: 15.113679 +epoch: 1, batch: 36539, sum loss: 5606.021484, avg loss: 3.015611, ppl: 20.401556 +epoch: 1, batch: 36540, sum loss: 4308.071289, avg loss: 2.786592, ppl: 16.225628 +epoch: 1, batch: 36541, sum loss: 4360.198730, avg loss: 2.590730, ppl: 13.339505 +epoch: 1, batch: 36542, sum loss: 4920.306152, avg loss: 2.727443, ppl: 15.293723 +epoch: 1, batch: 36543, sum loss: 4070.461426, avg loss: 2.520410, ppl: 12.433688 +epoch: 1, batch: 36544, sum loss: 4193.091309, avg loss: 2.515352, ppl: 12.370956 +epoch: 1, batch: 36545, sum loss: 5731.344727, avg loss: 2.842929, ppl: 17.165974 +epoch: 1, batch: 36546, sum loss: 3937.267822, avg loss: 2.488791, ppl: 12.046705 +epoch: 1, batch: 36547, sum loss: 3802.356934, avg loss: 2.533216, ppl: 12.593940 +epoch: 1, batch: 36548, sum loss: 4300.423828, avg loss: 2.742617, ppl: 15.527570 +epoch: 1, batch: 36549, sum loss: 4107.789551, avg loss: 2.363515, ppl: 10.628248 +epoch: 1, batch: 36550, sum loss: 4654.868164, avg loss: 2.577446, ppl: 13.163482 +epoch: 1, batch: 36551, sum loss: 4111.890137, avg loss: 2.486028, ppl: 12.013463 +epoch: 1, batch: 36552, sum loss: 4171.734863, avg loss: 2.869144, ppl: 17.621923 +epoch: 1, batch: 36553, sum loss: 4312.024414, avg loss: 2.678276, ppl: 14.559971 +epoch: 1, batch: 36554, sum loss: 4419.966797, avg loss: 2.509919, ppl: 12.303929 +epoch: 1, batch: 36555, sum loss: 4498.936035, avg loss: 2.673165, ppl: 14.485739 +epoch: 1, batch: 36556, sum loss: 3931.230957, avg loss: 2.508763, ppl: 12.289717 +epoch: 1, batch: 36557, sum loss: 5251.543945, avg loss: 2.823411, ppl: 16.834169 +epoch: 1, batch: 36558, sum loss: 3411.140137, avg loss: 2.392104, ppl: 10.936480 +epoch: 1, batch: 36559, sum loss: 4888.325684, avg loss: 2.481384, ppl: 11.957797 +epoch: 1, batch: 36560, sum loss: 4573.488281, avg loss: 3.018804, ppl: 20.466805 +epoch: 1, batch: 36561, sum loss: 4445.750977, avg loss: 2.655765, ppl: 14.235876 +epoch: 1, batch: 36562, sum loss: 3608.972656, avg loss: 2.334394, ppl: 10.323199 +epoch: 1, batch: 36563, sum loss: 3963.569092, avg loss: 2.277913, ppl: 9.756301 +epoch: 1, batch: 36564, sum loss: 4515.488281, avg loss: 2.504431, ppl: 12.236588 +epoch: 1, batch: 36565, sum loss: 3820.298828, avg loss: 2.387687, ppl: 10.888277 +epoch: 1, batch: 36566, sum loss: 4395.346680, avg loss: 2.735126, ppl: 15.411678 +epoch: 1, batch: 36567, sum loss: 4357.849121, avg loss: 2.574040, ppl: 13.118713 +epoch: 1, batch: 36568, sum loss: 3740.726807, avg loss: 2.200428, ppl: 9.028875 +epoch: 1, batch: 36569, sum loss: 4156.512695, avg loss: 2.629040, ppl: 13.860461 +epoch: 1, batch: 36570, sum loss: 4569.053711, avg loss: 2.395938, ppl: 10.978490 +epoch: 1, batch: 36571, sum loss: 4591.767578, avg loss: 2.705815, ppl: 14.966507 +epoch: 1, batch: 36572, sum loss: 4010.645264, avg loss: 2.548059, ppl: 12.782272 +epoch: 1, batch: 36573, sum loss: 5121.563477, avg loss: 2.872442, ppl: 17.680136 +epoch: 1, batch: 36574, sum loss: 4554.836914, avg loss: 2.712827, ppl: 15.071827 +epoch: 1, batch: 36575, sum loss: 3929.790771, avg loss: 2.507844, ppl: 12.278426 +epoch: 1, batch: 36576, sum loss: 4728.361816, avg loss: 2.495178, ppl: 12.123888 +epoch: 1, batch: 36577, sum loss: 4754.923828, avg loss: 2.852384, ppl: 17.329039 +epoch: 1, batch: 36578, sum loss: 4281.061523, avg loss: 2.697582, ppl: 14.843789 +epoch: 1, batch: 36579, sum loss: 4022.798828, avg loss: 2.495533, ppl: 12.128193 +epoch: 1, batch: 36580, sum loss: 5042.060547, avg loss: 2.743232, ppl: 15.537120 +epoch: 1, batch: 36581, sum loss: 4296.131348, avg loss: 2.539085, ppl: 12.668070 +epoch: 1, batch: 36582, sum loss: 4132.420410, avg loss: 2.427979, ppl: 11.335949 +epoch: 1, batch: 36583, sum loss: 3952.153076, avg loss: 2.654233, ppl: 14.214079 +epoch: 1, batch: 36584, sum loss: 5139.867188, avg loss: 2.804074, ppl: 16.511776 +epoch: 1, batch: 36585, sum loss: 5240.781738, avg loss: 2.702827, ppl: 14.921860 +epoch: 1, batch: 36586, sum loss: 4455.675293, avg loss: 2.649034, ppl: 14.140373 +epoch: 1, batch: 36587, sum loss: 4303.615234, avg loss: 2.459209, ppl: 11.695554 +epoch: 1, batch: 36588, sum loss: 4223.012695, avg loss: 2.481206, ppl: 11.955673 +epoch: 1, batch: 36589, sum loss: 4056.049316, avg loss: 2.705837, ppl: 14.966835 +epoch: 1, batch: 36590, sum loss: 5707.023438, avg loss: 2.978614, ppl: 19.660542 +epoch: 1, batch: 36591, sum loss: 4505.790527, avg loss: 2.537044, ppl: 12.642249 +epoch: 1, batch: 36592, sum loss: 5628.524414, avg loss: 2.855669, ppl: 17.386068 +epoch: 1, batch: 36593, sum loss: 3800.512695, avg loss: 2.325895, ppl: 10.235840 +epoch: 1, batch: 36594, sum loss: 4700.728516, avg loss: 2.599961, ppl: 13.463207 +epoch: 1, batch: 36595, sum loss: 3795.976074, avg loss: 2.561388, ppl: 12.953778 +epoch: 1, batch: 36596, sum loss: 4192.713867, avg loss: 2.668819, ppl: 14.422921 +epoch: 1, batch: 36597, sum loss: 4145.333008, avg loss: 2.527642, ppl: 12.523940 +epoch: 1, batch: 36598, sum loss: 4304.863770, avg loss: 2.530784, ppl: 12.563354 +epoch: 1, batch: 36599, sum loss: 4125.358398, avg loss: 2.527793, ppl: 12.525833 +epoch: 1, batch: 36600, sum loss: 4324.555176, avg loss: 2.592659, ppl: 13.365263 +epoch: 1, batch: 36601, sum loss: 3858.049316, avg loss: 2.460491, ppl: 11.710557 +epoch: 1, batch: 36602, sum loss: 4324.859863, avg loss: 2.648414, ppl: 14.131607 +epoch: 1, batch: 36603, sum loss: 4730.969238, avg loss: 2.877718, ppl: 17.773676 +epoch: 1, batch: 36604, sum loss: 4079.207520, avg loss: 2.560708, ppl: 12.944977 +epoch: 1, batch: 36605, sum loss: 4973.343262, avg loss: 2.699969, ppl: 14.879274 +epoch: 1, batch: 36606, sum loss: 4768.914062, avg loss: 2.587582, ppl: 13.297581 +epoch: 1, batch: 36607, sum loss: 4589.413574, avg loss: 2.763042, ppl: 15.847986 +epoch: 1, batch: 36608, sum loss: 4454.922852, avg loss: 2.551502, ppl: 12.826358 +epoch: 1, batch: 36609, sum loss: 3858.929932, avg loss: 2.407317, ppl: 11.104134 +epoch: 1, batch: 36610, sum loss: 4186.022949, avg loss: 2.488718, ppl: 12.045818 +epoch: 1, batch: 36611, sum loss: 4126.616699, avg loss: 2.441785, ppl: 11.493540 +epoch: 1, batch: 36612, sum loss: 3685.646240, avg loss: 2.344559, ppl: 10.428670 +epoch: 1, batch: 36613, sum loss: 5254.115723, avg loss: 2.778485, ppl: 16.094624 +epoch: 1, batch: 36614, sum loss: 4349.971680, avg loss: 2.617311, ppl: 13.698844 +epoch: 1, batch: 36615, sum loss: 4643.514160, avg loss: 2.702860, ppl: 14.922354 +epoch: 1, batch: 36616, sum loss: 4240.984863, avg loss: 2.708164, ppl: 15.001707 +epoch: 1, batch: 36617, sum loss: 4733.301758, avg loss: 2.909220, ppl: 18.342495 +epoch: 1, batch: 36618, sum loss: 4134.922363, avg loss: 2.483437, ppl: 11.982375 +epoch: 1, batch: 36619, sum loss: 4391.179199, avg loss: 2.640517, ppl: 14.020447 +epoch: 1, batch: 36620, sum loss: 4828.764648, avg loss: 2.603108, ppl: 13.505644 +epoch: 1, batch: 36621, sum loss: 4087.663086, avg loss: 2.625346, ppl: 13.809347 +epoch: 1, batch: 36622, sum loss: 4521.437500, avg loss: 2.787569, ppl: 16.241493 +epoch: 1, batch: 36623, sum loss: 4421.224609, avg loss: 2.500693, ppl: 12.190937 +epoch: 1, batch: 36624, sum loss: 4636.352051, avg loss: 2.650859, ppl: 14.166201 +epoch: 1, batch: 36625, sum loss: 4713.022949, avg loss: 2.818794, ppl: 16.756626 +epoch: 1, batch: 36626, sum loss: 4264.505859, avg loss: 2.600308, ppl: 13.467891 +epoch: 1, batch: 36627, sum loss: 4277.097168, avg loss: 2.564207, ppl: 12.990354 +epoch: 1, batch: 36628, sum loss: 4316.637207, avg loss: 2.810311, ppl: 16.615082 +epoch: 1, batch: 36629, sum loss: 3975.568359, avg loss: 2.606930, ppl: 13.557366 +epoch: 1, batch: 36630, sum loss: 4506.348145, avg loss: 2.848513, ppl: 17.262096 +epoch: 1, batch: 36631, sum loss: 4364.728516, avg loss: 2.463165, ppl: 11.741917 +epoch: 1, batch: 36632, sum loss: 4782.901855, avg loss: 2.593765, ppl: 13.380047 +epoch: 1, batch: 36633, sum loss: 5559.463867, avg loss: 2.779732, ppl: 16.114702 +epoch: 1, batch: 36634, sum loss: 4302.485352, avg loss: 2.571719, ppl: 13.088300 +epoch: 1, batch: 36635, sum loss: 5313.874512, avg loss: 2.947240, ppl: 19.053301 +epoch: 1, batch: 36636, sum loss: 4798.438477, avg loss: 2.553719, ppl: 12.854826 +epoch: 1, batch: 36637, sum loss: 4194.871094, avg loss: 2.413620, ppl: 11.174336 +epoch: 1, batch: 36638, sum loss: 4920.914062, avg loss: 2.684623, ppl: 14.652677 +epoch: 1, batch: 36639, sum loss: 4222.052246, avg loss: 2.540344, ppl: 12.684039 +epoch: 1, batch: 36640, sum loss: 5056.165039, avg loss: 2.801199, ppl: 16.464382 +epoch: 1, batch: 36641, sum loss: 4236.533691, avg loss: 2.433391, ppl: 11.397467 +epoch: 1, batch: 36642, sum loss: 4150.527344, avg loss: 2.540102, ppl: 12.680970 +epoch: 1, batch: 36643, sum loss: 5025.182129, avg loss: 2.858465, ppl: 17.434748 +epoch: 1, batch: 36644, sum loss: 3876.030273, avg loss: 2.383782, ppl: 10.845849 +epoch: 1, batch: 36645, sum loss: 5003.256348, avg loss: 2.719161, ppl: 15.167592 +epoch: 1, batch: 36646, sum loss: 4435.832031, avg loss: 2.487848, ppl: 12.035343 +epoch: 1, batch: 36647, sum loss: 3319.752441, avg loss: 2.181178, ppl: 8.856732 +epoch: 1, batch: 36648, sum loss: 4807.520996, avg loss: 2.703893, ppl: 14.937767 +epoch: 1, batch: 36649, sum loss: 4152.591309, avg loss: 2.568084, ppl: 13.040812 +epoch: 1, batch: 36650, sum loss: 3933.492188, avg loss: 2.267143, ppl: 9.651782 +epoch: 1, batch: 36651, sum loss: 5234.899414, avg loss: 2.897011, ppl: 18.119905 +epoch: 1, batch: 36652, sum loss: 4892.492676, avg loss: 2.739358, ppl: 15.477041 +epoch: 1, batch: 36653, sum loss: 5632.097656, avg loss: 3.000585, ppl: 20.097288 +epoch: 1, batch: 36654, sum loss: 4714.957031, avg loss: 2.852364, ppl: 17.328691 +epoch: 1, batch: 36655, sum loss: 3364.495605, avg loss: 2.325152, ppl: 10.228239 +epoch: 1, batch: 36656, sum loss: 4272.921875, avg loss: 2.584950, ppl: 13.262623 +epoch: 1, batch: 36657, sum loss: 4957.893555, avg loss: 2.595756, ppl: 13.406716 +epoch: 1, batch: 36658, sum loss: 4634.438965, avg loss: 2.550599, ppl: 12.814782 +epoch: 1, batch: 36659, sum loss: 4707.687500, avg loss: 2.851416, ppl: 17.312277 +epoch: 1, batch: 36660, sum loss: 4130.129883, avg loss: 2.562115, ppl: 12.963208 +epoch: 1, batch: 36661, sum loss: 3654.147705, avg loss: 2.415167, ppl: 11.191640 +epoch: 1, batch: 36662, sum loss: 4296.936523, avg loss: 2.660642, ppl: 14.305469 +epoch: 1, batch: 36663, sum loss: 4420.558105, avg loss: 2.466829, ppl: 11.785021 +epoch: 1, batch: 36664, sum loss: 5067.499023, avg loss: 2.862994, ppl: 17.513884 +epoch: 1, batch: 36665, sum loss: 4316.578125, avg loss: 2.554188, ppl: 12.860856 +epoch: 1, batch: 36666, sum loss: 4079.911865, avg loss: 2.454821, ppl: 11.644344 +epoch: 1, batch: 36667, sum loss: 5353.102539, avg loss: 2.759331, ppl: 15.789280 +epoch: 1, batch: 36668, sum loss: 4570.540039, avg loss: 2.492115, ppl: 12.086819 +epoch: 1, batch: 36669, sum loss: 5416.318359, avg loss: 2.743829, ppl: 15.546395 +epoch: 1, batch: 36670, sum loss: 4134.451172, avg loss: 2.475719, ppl: 11.890256 +epoch: 1, batch: 36671, sum loss: 4248.109863, avg loss: 2.503306, ppl: 12.222835 +epoch: 1, batch: 36672, sum loss: 4205.994141, avg loss: 2.523092, ppl: 12.467083 +epoch: 1, batch: 36673, sum loss: 5514.572754, avg loss: 2.887211, ppl: 17.943193 +epoch: 1, batch: 36674, sum loss: 5203.874023, avg loss: 2.803811, ppl: 16.507442 +epoch: 1, batch: 36675, sum loss: 4152.919434, avg loss: 2.560369, ppl: 12.940598 +epoch: 1, batch: 36676, sum loss: 4098.142578, avg loss: 2.523487, ppl: 12.472009 +epoch: 1, batch: 36677, sum loss: 5792.606445, avg loss: 2.790273, ppl: 16.285460 +epoch: 1, batch: 36678, sum loss: 4117.160645, avg loss: 2.645990, ppl: 14.097397 +epoch: 1, batch: 36679, sum loss: 4422.464844, avg loss: 2.654541, ppl: 14.218451 +epoch: 1, batch: 36680, sum loss: 3935.330566, avg loss: 2.395210, ppl: 10.970505 +epoch: 1, batch: 36681, sum loss: 5579.026367, avg loss: 2.875787, ppl: 17.739376 +epoch: 1, batch: 36682, sum loss: 4421.000488, avg loss: 2.734076, ppl: 15.395508 +epoch: 1, batch: 36683, sum loss: 4547.137207, avg loss: 2.731013, ppl: 15.348432 +epoch: 1, batch: 36684, sum loss: 3962.052490, avg loss: 2.533282, ppl: 12.594769 +epoch: 1, batch: 36685, sum loss: 4347.930664, avg loss: 2.503127, ppl: 12.220644 +epoch: 1, batch: 36686, sum loss: 4252.798340, avg loss: 2.649719, ppl: 14.150055 +epoch: 1, batch: 36687, sum loss: 4503.420898, avg loss: 2.677420, ppl: 14.547514 +epoch: 1, batch: 36688, sum loss: 4174.025879, avg loss: 2.528181, ppl: 12.530687 +epoch: 1, batch: 36689, sum loss: 4978.445312, avg loss: 2.744457, ppl: 15.556169 +epoch: 1, batch: 36690, sum loss: 4397.460449, avg loss: 2.706130, ppl: 14.971218 +epoch: 1, batch: 36691, sum loss: 4591.382324, avg loss: 2.598405, ppl: 13.442286 +epoch: 1, batch: 36692, sum loss: 3811.467041, avg loss: 2.559750, ppl: 12.932579 +epoch: 1, batch: 36693, sum loss: 5326.414551, avg loss: 2.705137, ppl: 14.956366 +epoch: 1, batch: 36694, sum loss: 4930.542969, avg loss: 2.920938, ppl: 18.558683 +epoch: 1, batch: 36695, sum loss: 3814.736328, avg loss: 2.488413, ppl: 12.042146 +epoch: 1, batch: 36696, sum loss: 4756.068848, avg loss: 2.742831, ppl: 15.530887 +epoch: 1, batch: 36697, sum loss: 4295.952148, avg loss: 2.569350, ppl: 13.057328 +epoch: 1, batch: 36698, sum loss: 4725.063965, avg loss: 2.660509, ppl: 14.303570 +epoch: 1, batch: 36699, sum loss: 4542.401367, avg loss: 2.504080, ppl: 12.232304 +epoch: 1, batch: 36700, sum loss: 3585.024414, avg loss: 2.467326, ppl: 11.790875 +epoch: 1, batch: 36701, sum loss: 4884.440918, avg loss: 2.849732, ppl: 17.283152 +epoch: 1, batch: 36702, sum loss: 4381.238770, avg loss: 2.496432, ppl: 12.139108 +epoch: 1, batch: 36703, sum loss: 4299.313477, avg loss: 2.458155, ppl: 11.683238 +epoch: 1, batch: 36704, sum loss: 5159.077637, avg loss: 2.637565, ppl: 13.979125 +epoch: 1, batch: 36705, sum loss: 3398.723145, avg loss: 2.400228, ppl: 11.025693 +epoch: 1, batch: 36706, sum loss: 4074.160645, avg loss: 2.466199, ppl: 11.777596 +epoch: 1, batch: 36707, sum loss: 4958.597168, avg loss: 2.587994, ppl: 13.303063 +epoch: 1, batch: 36708, sum loss: 3782.542725, avg loss: 2.496728, ppl: 12.142697 +epoch: 1, batch: 36709, sum loss: 3813.304688, avg loss: 2.430404, ppl: 11.363477 +epoch: 1, batch: 36710, sum loss: 4479.303711, avg loss: 2.517877, ppl: 12.402243 +epoch: 1, batch: 36711, sum loss: 4638.751953, avg loss: 2.557195, ppl: 12.899586 +epoch: 1, batch: 36712, sum loss: 4771.587891, avg loss: 2.643539, ppl: 14.062883 +epoch: 1, batch: 36713, sum loss: 3536.500732, avg loss: 2.418947, ppl: 11.234026 +epoch: 1, batch: 36714, sum loss: 4287.159668, avg loss: 2.722006, ppl: 15.210806 +epoch: 1, batch: 36715, sum loss: 4616.271973, avg loss: 2.830332, ppl: 16.951092 +epoch: 1, batch: 36716, sum loss: 4790.124023, avg loss: 2.640642, ppl: 14.022199 +epoch: 1, batch: 36717, sum loss: 3996.099854, avg loss: 2.516436, ppl: 12.384378 +epoch: 1, batch: 36718, sum loss: 3824.290527, avg loss: 2.265575, ppl: 9.636663 +epoch: 1, batch: 36719, sum loss: 4702.212402, avg loss: 2.679323, ppl: 14.575226 +epoch: 1, batch: 36720, sum loss: 3572.439941, avg loss: 2.359604, ppl: 10.586755 +epoch: 1, batch: 36721, sum loss: 4499.935059, avg loss: 2.548095, ppl: 12.782723 +epoch: 1, batch: 36722, sum loss: 4004.431152, avg loss: 2.510615, ppl: 12.312501 +epoch: 1, batch: 36723, sum loss: 4048.459717, avg loss: 2.520834, ppl: 12.438969 +epoch: 1, batch: 36724, sum loss: 4449.071777, avg loss: 2.643536, ppl: 14.062846 +epoch: 1, batch: 36725, sum loss: 4906.764648, avg loss: 2.908574, ppl: 18.330647 +epoch: 1, batch: 36726, sum loss: 4463.896973, avg loss: 2.692338, ppl: 14.766166 +epoch: 1, batch: 36727, sum loss: 4099.087402, avg loss: 2.456014, ppl: 11.658251 +epoch: 1, batch: 36728, sum loss: 5480.512207, avg loss: 2.893618, ppl: 18.058525 +epoch: 1, batch: 36729, sum loss: 5020.855957, avg loss: 2.838245, ppl: 17.085760 +epoch: 1, batch: 36730, sum loss: 5258.616699, avg loss: 2.900506, ppl: 18.183340 +epoch: 1, batch: 36731, sum loss: 4893.974121, avg loss: 2.675765, ppl: 14.523457 +epoch: 1, batch: 36732, sum loss: 4211.321777, avg loss: 2.338324, ppl: 10.363853 +epoch: 1, batch: 36733, sum loss: 5283.368164, avg loss: 2.785118, ppl: 16.201723 +epoch: 1, batch: 36734, sum loss: 4651.319336, avg loss: 2.492668, ppl: 12.093503 +epoch: 1, batch: 36735, sum loss: 4232.343262, avg loss: 2.694044, ppl: 14.791373 +epoch: 1, batch: 36736, sum loss: 4536.587891, avg loss: 2.793465, ppl: 16.337538 +epoch: 1, batch: 36737, sum loss: 4507.029785, avg loss: 2.581346, ppl: 13.214914 +epoch: 1, batch: 36738, sum loss: 4251.401855, avg loss: 2.584439, ppl: 13.255848 +epoch: 1, batch: 36739, sum loss: 4136.664062, avg loss: 2.385619, ppl: 10.865791 +epoch: 1, batch: 36740, sum loss: 3993.131104, avg loss: 2.451277, ppl: 11.603160 +epoch: 1, batch: 36741, sum loss: 4962.988281, avg loss: 2.691425, ppl: 14.752688 +epoch: 1, batch: 36742, sum loss: 5237.914551, avg loss: 2.748119, ppl: 15.613230 +epoch: 1, batch: 36743, sum loss: 4460.872559, avg loss: 2.700286, ppl: 14.883990 +epoch: 1, batch: 36744, sum loss: 3312.240967, avg loss: 2.206690, ppl: 9.085589 +epoch: 1, batch: 36745, sum loss: 3955.652100, avg loss: 2.423806, ppl: 11.288748 +epoch: 1, batch: 36746, sum loss: 3358.150146, avg loss: 2.196305, ppl: 8.991728 +epoch: 1, batch: 36747, sum loss: 4792.808105, avg loss: 2.691077, ppl: 14.747550 +epoch: 1, batch: 36748, sum loss: 5061.176758, avg loss: 2.667990, ppl: 14.410974 +epoch: 1, batch: 36749, sum loss: 4381.297852, avg loss: 2.523789, ppl: 12.475780 +epoch: 1, batch: 36750, sum loss: 4506.099121, avg loss: 2.798819, ppl: 16.425241 +epoch: 1, batch: 36751, sum loss: 5291.789062, avg loss: 2.661866, ppl: 14.322987 +epoch: 1, batch: 36752, sum loss: 4558.999512, avg loss: 2.534185, ppl: 12.606158 +epoch: 1, batch: 36753, sum loss: 4941.352539, avg loss: 2.692835, ppl: 14.773504 +epoch: 1, batch: 36754, sum loss: 4307.332520, avg loss: 2.688722, ppl: 14.712859 +epoch: 1, batch: 36755, sum loss: 4725.196289, avg loss: 2.689355, ppl: 14.722172 +epoch: 1, batch: 36756, sum loss: 4665.742676, avg loss: 2.836318, ppl: 17.052853 +epoch: 1, batch: 36757, sum loss: 3810.015137, avg loss: 2.423674, ppl: 11.287251 +epoch: 1, batch: 36758, sum loss: 4184.782715, avg loss: 2.442955, ppl: 11.506999 +epoch: 1, batch: 36759, sum loss: 3650.806152, avg loss: 2.425785, ppl: 11.311103 +epoch: 1, batch: 36760, sum loss: 4572.189941, avg loss: 2.638309, ppl: 13.989531 +epoch: 1, batch: 36761, sum loss: 4286.354980, avg loss: 2.610448, ppl: 13.605140 +epoch: 1, batch: 36762, sum loss: 4584.838867, avg loss: 2.782063, ppl: 16.152302 +epoch: 1, batch: 36763, sum loss: 3574.296387, avg loss: 2.578857, ppl: 13.182068 +epoch: 1, batch: 36764, sum loss: 4056.456543, avg loss: 2.471942, ppl: 11.845425 +epoch: 1, batch: 36765, sum loss: 3440.122070, avg loss: 2.462507, ppl: 11.734190 +epoch: 1, batch: 36766, sum loss: 5275.153809, avg loss: 2.734657, ppl: 15.404463 +epoch: 1, batch: 36767, sum loss: 4305.667969, avg loss: 2.615837, ppl: 13.678665 +epoch: 1, batch: 36768, sum loss: 3593.437012, avg loss: 2.474819, ppl: 11.879556 +epoch: 1, batch: 36769, sum loss: 4928.007812, avg loss: 2.814396, ppl: 16.683102 +epoch: 1, batch: 36770, sum loss: 4267.323242, avg loss: 2.712856, ppl: 15.072265 +epoch: 1, batch: 36771, sum loss: 4384.399414, avg loss: 2.597393, ppl: 13.428684 +epoch: 1, batch: 36772, sum loss: 4589.866699, avg loss: 2.706289, ppl: 14.973610 +epoch: 1, batch: 36773, sum loss: 4818.909180, avg loss: 2.834653, ppl: 17.024487 +epoch: 1, batch: 36774, sum loss: 4167.148926, avg loss: 2.759701, ppl: 15.795123 +epoch: 1, batch: 36775, sum loss: 4384.629395, avg loss: 2.502642, ppl: 12.214727 +epoch: 1, batch: 36776, sum loss: 3529.342773, avg loss: 2.211368, ppl: 9.128192 +epoch: 1, batch: 36777, sum loss: 3924.563965, avg loss: 2.550074, ppl: 12.808053 +epoch: 1, batch: 36778, sum loss: 4290.337891, avg loss: 2.300449, ppl: 9.978663 +epoch: 1, batch: 36779, sum loss: 4342.446289, avg loss: 2.725955, ppl: 15.270991 +epoch: 1, batch: 36780, sum loss: 4480.696777, avg loss: 2.451147, ppl: 11.601647 +epoch: 1, batch: 36781, sum loss: 5106.689453, avg loss: 2.727932, ppl: 15.301218 +epoch: 1, batch: 36782, sum loss: 4799.451660, avg loss: 2.605566, ppl: 13.538880 +epoch: 1, batch: 36783, sum loss: 4700.976074, avg loss: 2.500519, ppl: 12.188819 +epoch: 1, batch: 36784, sum loss: 3979.525391, avg loss: 2.397305, ppl: 10.993504 +epoch: 1, batch: 36785, sum loss: 3858.235840, avg loss: 2.539984, ppl: 12.679468 +epoch: 1, batch: 36786, sum loss: 3184.796387, avg loss: 2.364363, ppl: 10.637258 +epoch: 1, batch: 36787, sum loss: 4084.912842, avg loss: 2.682149, ppl: 14.616469 +epoch: 1, batch: 36788, sum loss: 4293.484863, avg loss: 2.398595, ppl: 11.007698 +epoch: 1, batch: 36789, sum loss: 4405.425293, avg loss: 2.564276, ppl: 12.991242 +epoch: 1, batch: 36790, sum loss: 3401.442627, avg loss: 2.422680, ppl: 11.276037 +epoch: 1, batch: 36791, sum loss: 4785.437500, avg loss: 2.583930, ppl: 13.249099 +epoch: 1, batch: 36792, sum loss: 4330.624512, avg loss: 2.597855, ppl: 13.434891 +epoch: 1, batch: 36793, sum loss: 4313.705078, avg loss: 2.614367, ppl: 13.658565 +epoch: 1, batch: 36794, sum loss: 5006.746582, avg loss: 2.846360, ppl: 17.224964 +epoch: 1, batch: 36795, sum loss: 4544.831055, avg loss: 2.865593, ppl: 17.559471 +epoch: 1, batch: 36796, sum loss: 4215.265625, avg loss: 2.548528, ppl: 12.788268 +epoch: 1, batch: 36797, sum loss: 3955.656982, avg loss: 2.335099, ppl: 10.330478 +epoch: 1, batch: 36798, sum loss: 5028.858887, avg loss: 2.489534, ppl: 12.055658 +epoch: 1, batch: 36799, sum loss: 4209.739258, avg loss: 2.424965, ppl: 11.301833 +epoch: 1, batch: 36800, sum loss: 4887.866699, avg loss: 2.682693, ppl: 14.624424 +epoch: 1, batch: 36801, sum loss: 5425.307129, avg loss: 2.688457, ppl: 14.708970 +epoch: 1, batch: 36802, sum loss: 3460.347412, avg loss: 2.252831, ppl: 9.514629 +epoch: 1, batch: 36803, sum loss: 4303.975098, avg loss: 2.653499, ppl: 14.203646 +epoch: 1, batch: 36804, sum loss: 4801.856445, avg loss: 2.685602, ppl: 14.667027 +epoch: 1, batch: 36805, sum loss: 4992.173340, avg loss: 2.578602, ppl: 13.178699 +epoch: 1, batch: 36806, sum loss: 4397.456543, avg loss: 2.722883, ppl: 15.224154 +epoch: 1, batch: 36807, sum loss: 5085.656738, avg loss: 2.819100, ppl: 16.761761 +epoch: 1, batch: 36808, sum loss: 4526.878906, avg loss: 2.683390, ppl: 14.634623 +epoch: 1, batch: 36809, sum loss: 3723.555908, avg loss: 2.492340, ppl: 12.089531 +epoch: 1, batch: 36810, sum loss: 4479.979004, avg loss: 2.564384, ppl: 12.992652 +epoch: 1, batch: 36811, sum loss: 4551.675781, avg loss: 2.707719, ppl: 14.995034 +epoch: 1, batch: 36812, sum loss: 4541.680176, avg loss: 2.509216, ppl: 12.295284 +epoch: 1, batch: 36813, sum loss: 3806.823242, avg loss: 2.537882, ppl: 12.652845 +epoch: 1, batch: 36814, sum loss: 5300.011719, avg loss: 2.880441, ppl: 17.822130 +epoch: 1, batch: 36815, sum loss: 3583.083496, avg loss: 2.263477, ppl: 9.616464 +epoch: 1, batch: 36816, sum loss: 4538.245605, avg loss: 2.539589, ppl: 12.674463 +epoch: 1, batch: 36817, sum loss: 3938.865234, avg loss: 2.587954, ppl: 13.302521 +epoch: 1, batch: 36818, sum loss: 4444.826660, avg loss: 2.778017, ppl: 16.087082 +epoch: 1, batch: 36819, sum loss: 4108.011719, avg loss: 2.633341, ppl: 13.920197 +epoch: 1, batch: 36820, sum loss: 4852.924316, avg loss: 2.602104, ppl: 13.492098 +epoch: 1, batch: 36821, sum loss: 4149.062012, avg loss: 2.652853, ppl: 14.194478 +epoch: 1, batch: 36822, sum loss: 4419.889648, avg loss: 2.610685, ppl: 13.608371 +epoch: 1, batch: 36823, sum loss: 3500.409668, avg loss: 2.415742, ppl: 11.198072 +epoch: 1, batch: 36824, sum loss: 3928.375488, avg loss: 2.427921, ppl: 11.335287 +epoch: 1, batch: 36825, sum loss: 4225.495605, avg loss: 2.608331, ppl: 13.576369 +epoch: 1, batch: 36826, sum loss: 4236.722168, avg loss: 2.487799, ppl: 12.034760 +epoch: 1, batch: 36827, sum loss: 3536.602539, avg loss: 2.508229, ppl: 12.283154 +epoch: 1, batch: 36828, sum loss: 4086.007324, avg loss: 2.503681, ppl: 12.227420 +epoch: 1, batch: 36829, sum loss: 5470.343750, avg loss: 2.889775, ppl: 17.989258 +epoch: 1, batch: 36830, sum loss: 4997.583496, avg loss: 2.620652, ppl: 13.744685 +epoch: 1, batch: 36831, sum loss: 4157.593750, avg loss: 2.639742, ppl: 14.009587 +epoch: 1, batch: 36832, sum loss: 4287.698242, avg loss: 2.654922, ppl: 14.223870 +epoch: 1, batch: 36833, sum loss: 5405.685059, avg loss: 2.970157, ppl: 19.494974 +epoch: 1, batch: 36834, sum loss: 4244.626953, avg loss: 2.618524, ppl: 13.715459 +epoch: 1, batch: 36835, sum loss: 4669.265625, avg loss: 2.522564, ppl: 12.460504 +epoch: 1, batch: 36836, sum loss: 4067.161377, avg loss: 2.686368, ppl: 14.678271 +epoch: 1, batch: 36837, sum loss: 4216.893555, avg loss: 2.532669, ppl: 12.587054 +epoch: 1, batch: 36838, sum loss: 3572.503174, avg loss: 2.410596, ppl: 11.140600 +epoch: 1, batch: 36839, sum loss: 4801.035156, avg loss: 2.767167, ppl: 15.913492 +epoch: 1, batch: 36840, sum loss: 4703.793945, avg loss: 2.580249, ppl: 13.200425 +epoch: 1, batch: 36841, sum loss: 4677.616211, avg loss: 2.840083, ppl: 17.117180 +epoch: 1, batch: 36842, sum loss: 4115.001953, avg loss: 2.447949, ppl: 11.564600 +epoch: 1, batch: 36843, sum loss: 4177.811523, avg loss: 2.388686, ppl: 10.899159 +epoch: 1, batch: 36844, sum loss: 4849.995117, avg loss: 2.520787, ppl: 12.438388 +epoch: 1, batch: 36845, sum loss: 5323.235840, avg loss: 2.643116, ppl: 14.056937 +epoch: 1, batch: 36846, sum loss: 4580.697266, avg loss: 2.515485, ppl: 12.372602 +epoch: 1, batch: 36847, sum loss: 4915.190918, avg loss: 2.748988, ppl: 15.626812 +epoch: 1, batch: 36848, sum loss: 3746.063721, avg loss: 2.418375, ppl: 11.227605 +epoch: 1, batch: 36849, sum loss: 4447.881348, avg loss: 2.608728, ppl: 13.581766 +epoch: 1, batch: 36850, sum loss: 3791.857422, avg loss: 2.741762, ppl: 15.514303 +epoch: 1, batch: 36851, sum loss: 3920.259521, avg loss: 2.584219, ppl: 13.252928 +epoch: 1, batch: 36852, sum loss: 4327.997559, avg loss: 2.676560, ppl: 14.535009 +epoch: 1, batch: 36853, sum loss: 3869.525879, avg loss: 2.509420, ppl: 12.297797 +epoch: 1, batch: 36854, sum loss: 4010.806885, avg loss: 2.468189, ppl: 11.801053 +epoch: 1, batch: 36855, sum loss: 4738.028809, avg loss: 2.708993, ppl: 15.014148 +epoch: 1, batch: 36856, sum loss: 4560.829102, avg loss: 2.642427, ppl: 14.047255 +epoch: 1, batch: 36857, sum loss: 5280.568359, avg loss: 2.709373, ppl: 15.019858 +epoch: 1, batch: 36858, sum loss: 4293.504883, avg loss: 2.581783, ppl: 13.220687 +epoch: 1, batch: 36859, sum loss: 4484.890137, avg loss: 2.432153, ppl: 11.383364 +epoch: 1, batch: 36860, sum loss: 3706.275635, avg loss: 2.557816, ppl: 12.907599 +epoch: 1, batch: 36861, sum loss: 3615.372559, avg loss: 2.168790, ppl: 8.747689 +epoch: 1, batch: 36862, sum loss: 3611.089600, avg loss: 2.352501, ppl: 10.511831 +epoch: 1, batch: 36863, sum loss: 4076.319092, avg loss: 2.583219, ppl: 13.239682 +epoch: 1, batch: 36864, sum loss: 4080.614990, avg loss: 2.668813, ppl: 14.422839 +epoch: 1, batch: 36865, sum loss: 4387.156250, avg loss: 2.647650, ppl: 14.120816 +epoch: 1, batch: 36866, sum loss: 3783.262695, avg loss: 2.530610, ppl: 12.561171 +epoch: 1, batch: 36867, sum loss: 4338.290527, avg loss: 2.593121, ppl: 13.371433 +epoch: 1, batch: 36868, sum loss: 4187.905762, avg loss: 2.580348, ppl: 13.201738 +epoch: 1, batch: 36869, sum loss: 4772.283691, avg loss: 2.630807, ppl: 13.884970 +epoch: 1, batch: 36870, sum loss: 3600.144043, avg loss: 2.414584, ppl: 11.185113 +epoch: 1, batch: 36871, sum loss: 4276.906250, avg loss: 2.640066, ppl: 14.014124 +epoch: 1, batch: 36872, sum loss: 3982.969971, avg loss: 2.520867, ppl: 12.439379 +epoch: 1, batch: 36873, sum loss: 4575.302734, avg loss: 2.841803, ppl: 17.146650 +epoch: 1, batch: 36874, sum loss: 4992.915039, avg loss: 2.783119, ppl: 16.169371 +epoch: 1, batch: 36875, sum loss: 3824.733887, avg loss: 2.425323, ppl: 11.305878 +epoch: 1, batch: 36876, sum loss: 4636.443848, avg loss: 2.761432, ppl: 15.822479 +epoch: 1, batch: 36877, sum loss: 4125.646973, avg loss: 2.531072, ppl: 12.566966 +epoch: 1, batch: 36878, sum loss: 4168.443848, avg loss: 2.629933, ppl: 13.872839 +epoch: 1, batch: 36879, sum loss: 4375.478516, avg loss: 2.685991, ppl: 14.672729 +epoch: 1, batch: 36880, sum loss: 4751.481445, avg loss: 2.582327, ppl: 13.227879 +epoch: 1, batch: 36881, sum loss: 4088.206299, avg loss: 2.760437, ppl: 15.806742 +epoch: 1, batch: 36882, sum loss: 3988.588135, avg loss: 2.629260, ppl: 13.863511 +epoch: 1, batch: 36883, sum loss: 3662.072754, avg loss: 2.393512, ppl: 10.951887 +epoch: 1, batch: 36884, sum loss: 5410.776855, avg loss: 2.812254, ppl: 16.647402 +epoch: 1, batch: 36885, sum loss: 4676.297363, avg loss: 2.653971, ppl: 14.210362 +epoch: 1, batch: 36886, sum loss: 4800.267090, avg loss: 2.869257, ppl: 17.623922 +epoch: 1, batch: 36887, sum loss: 5841.942383, avg loss: 2.920971, ppl: 18.559307 +epoch: 1, batch: 36888, sum loss: 4542.750000, avg loss: 2.610776, ppl: 13.609607 +epoch: 1, batch: 36889, sum loss: 3951.709717, avg loss: 2.643284, ppl: 14.059299 +epoch: 1, batch: 36890, sum loss: 4346.602539, avg loss: 2.565881, ppl: 13.012117 +epoch: 1, batch: 36891, sum loss: 3826.601562, avg loss: 2.694790, ppl: 14.802408 +epoch: 1, batch: 36892, sum loss: 3802.435791, avg loss: 2.440588, ppl: 11.479786 +epoch: 1, batch: 36893, sum loss: 2863.067871, avg loss: 2.007761, ppl: 7.446629 +epoch: 1, batch: 36894, sum loss: 4533.588867, avg loss: 2.681011, ppl: 14.599842 +epoch: 1, batch: 36895, sum loss: 4416.563965, avg loss: 2.777713, ppl: 16.082199 +epoch: 1, batch: 36896, sum loss: 4038.947021, avg loss: 2.615899, ppl: 13.679503 +epoch: 1, batch: 36897, sum loss: 5141.132812, avg loss: 2.667947, ppl: 14.410348 +epoch: 1, batch: 36898, sum loss: 4242.096191, avg loss: 2.557020, ppl: 12.897326 +epoch: 1, batch: 36899, sum loss: 4233.000977, avg loss: 2.401021, ppl: 11.034442 +epoch: 1, batch: 36900, sum loss: 3865.338867, avg loss: 2.518136, ppl: 12.405452 +epoch: 1, batch: 36901, sum loss: 3556.393311, avg loss: 2.361483, ppl: 10.606670 +epoch: 1, batch: 36902, sum loss: 4795.463867, avg loss: 2.624775, ppl: 13.801468 +epoch: 1, batch: 36903, sum loss: 4567.540039, avg loss: 2.547429, ppl: 12.774220 +epoch: 1, batch: 36904, sum loss: 4830.211426, avg loss: 2.821385, ppl: 16.800106 +epoch: 1, batch: 36905, sum loss: 4763.688965, avg loss: 2.640626, ppl: 14.021975 +epoch: 1, batch: 36906, sum loss: 5090.538086, avg loss: 2.848650, ppl: 17.264467 +epoch: 1, batch: 36907, sum loss: 4238.526367, avg loss: 2.509489, ppl: 12.298641 +epoch: 1, batch: 36908, sum loss: 4471.211914, avg loss: 2.602568, ppl: 13.498360 +epoch: 1, batch: 36909, sum loss: 5358.262695, avg loss: 2.950585, ppl: 19.117136 +epoch: 1, batch: 36910, sum loss: 4711.624512, avg loss: 2.846903, ppl: 17.234325 +epoch: 1, batch: 36911, sum loss: 4432.278320, avg loss: 2.588948, ppl: 13.315750 +epoch: 1, batch: 36912, sum loss: 4288.007324, avg loss: 2.648553, ppl: 14.133574 +epoch: 1, batch: 36913, sum loss: 4500.965820, avg loss: 2.553015, ppl: 12.845778 +epoch: 1, batch: 36914, sum loss: 4630.830566, avg loss: 2.565557, ppl: 13.007902 +epoch: 1, batch: 36915, sum loss: 4577.216797, avg loss: 2.672047, ppl: 14.469560 +epoch: 1, batch: 36916, sum loss: 3551.139160, avg loss: 2.298472, ppl: 9.958955 +epoch: 1, batch: 36917, sum loss: 5417.905273, avg loss: 2.583646, ppl: 13.245340 +epoch: 1, batch: 36918, sum loss: 4299.310547, avg loss: 2.505426, ppl: 12.248772 +epoch: 1, batch: 36919, sum loss: 5172.624023, avg loss: 2.875277, ppl: 17.730339 +epoch: 1, batch: 36920, sum loss: 4427.890137, avg loss: 2.428903, ppl: 11.346430 +epoch: 1, batch: 36921, sum loss: 3705.795654, avg loss: 2.454169, ppl: 11.636763 +epoch: 1, batch: 36922, sum loss: 4413.631348, avg loss: 2.732899, ppl: 15.377398 +epoch: 1, batch: 36923, sum loss: 4239.218750, avg loss: 2.393687, ppl: 10.953801 +epoch: 1, batch: 36924, sum loss: 4301.719727, avg loss: 2.680199, ppl: 14.587998 +epoch: 1, batch: 36925, sum loss: 4619.494629, avg loss: 2.878190, ppl: 17.782055 +epoch: 1, batch: 36926, sum loss: 3781.962158, avg loss: 2.447872, ppl: 11.563715 +epoch: 1, batch: 36927, sum loss: 5342.631836, avg loss: 2.727224, ppl: 15.290383 +epoch: 1, batch: 36928, sum loss: 4360.183594, avg loss: 2.481607, ppl: 11.960469 +epoch: 1, batch: 36929, sum loss: 4619.210449, avg loss: 2.796132, ppl: 16.381166 +epoch: 1, batch: 36930, sum loss: 4500.182617, avg loss: 2.903344, ppl: 18.235016 +epoch: 1, batch: 36931, sum loss: 5259.998047, avg loss: 2.652546, ppl: 14.190116 +epoch: 1, batch: 36932, sum loss: 4604.976562, avg loss: 2.534384, ppl: 12.608667 +epoch: 1, batch: 36933, sum loss: 4246.409180, avg loss: 2.632616, ppl: 13.910109 +epoch: 1, batch: 36934, sum loss: 4739.880371, avg loss: 2.688531, ppl: 14.710053 +epoch: 1, batch: 36935, sum loss: 3784.853271, avg loss: 2.652315, ppl: 14.186842 +epoch: 1, batch: 36936, sum loss: 4913.539551, avg loss: 2.595636, ppl: 13.405112 +epoch: 1, batch: 36937, sum loss: 4032.354980, avg loss: 2.509244, ppl: 12.295630 +epoch: 1, batch: 36938, sum loss: 3964.197021, avg loss: 2.648094, ppl: 14.127090 +epoch: 1, batch: 36939, sum loss: 3290.307861, avg loss: 2.325306, ppl: 10.229810 +epoch: 1, batch: 36940, sum loss: 4392.050293, avg loss: 2.366406, ppl: 10.659019 +epoch: 1, batch: 36941, sum loss: 3893.438721, avg loss: 2.571624, ppl: 13.087061 +epoch: 1, batch: 36942, sum loss: 4981.399902, avg loss: 2.704343, ppl: 14.944496 +epoch: 1, batch: 36943, sum loss: 4472.666016, avg loss: 2.660717, ppl: 14.306550 +epoch: 1, batch: 36944, sum loss: 4752.160645, avg loss: 2.795389, ppl: 16.368990 +epoch: 1, batch: 36945, sum loss: 4457.861328, avg loss: 2.726521, ppl: 15.279630 +epoch: 1, batch: 36946, sum loss: 4133.022461, avg loss: 2.627478, ppl: 13.838820 +epoch: 1, batch: 36947, sum loss: 5014.462891, avg loss: 2.685840, ppl: 14.670514 +epoch: 1, batch: 36948, sum loss: 3204.995850, avg loss: 2.132399, ppl: 8.435079 +epoch: 1, batch: 36949, sum loss: 3602.949951, avg loss: 2.437720, ppl: 11.446913 +epoch: 1, batch: 36950, sum loss: 3579.175537, avg loss: 2.316619, ppl: 10.141325 +epoch: 1, batch: 36951, sum loss: 4310.411621, avg loss: 2.646048, ppl: 14.098207 +epoch: 1, batch: 36952, sum loss: 4239.034180, avg loss: 2.529257, ppl: 12.544180 +epoch: 1, batch: 36953, sum loss: 3621.632568, avg loss: 2.250859, ppl: 9.495892 +epoch: 1, batch: 36954, sum loss: 4464.898926, avg loss: 2.635714, ppl: 13.953265 +epoch: 1, batch: 36955, sum loss: 4272.602539, avg loss: 2.468286, ppl: 11.802196 +epoch: 1, batch: 36956, sum loss: 3934.155029, avg loss: 2.489972, ppl: 12.060934 +epoch: 1, batch: 36957, sum loss: 4011.355469, avg loss: 2.417936, ppl: 11.222671 +epoch: 1, batch: 36958, sum loss: 4563.992188, avg loss: 2.520150, ppl: 12.430467 +epoch: 1, batch: 36959, sum loss: 5526.386230, avg loss: 2.672334, ppl: 14.473711 +epoch: 1, batch: 36960, sum loss: 4238.326660, avg loss: 2.715136, ppl: 15.106658 +epoch: 1, batch: 36961, sum loss: 5031.621094, avg loss: 2.772243, ppl: 15.994470 +epoch: 1, batch: 36962, sum loss: 4578.352051, avg loss: 2.512817, ppl: 12.339638 +epoch: 1, batch: 36963, sum loss: 4088.119873, avg loss: 2.698429, ppl: 14.856376 +epoch: 1, batch: 36964, sum loss: 4531.449219, avg loss: 2.550056, ppl: 12.807817 +epoch: 1, batch: 36965, sum loss: 3371.197266, avg loss: 2.262549, ppl: 9.607545 +epoch: 1, batch: 36966, sum loss: 4905.894531, avg loss: 2.804971, ppl: 16.526600 +epoch: 1, batch: 36967, sum loss: 4599.302734, avg loss: 2.777357, ppl: 16.076469 +epoch: 1, batch: 36968, sum loss: 4224.732422, avg loss: 2.663766, ppl: 14.350225 +epoch: 1, batch: 36969, sum loss: 4401.615234, avg loss: 2.613786, ppl: 13.650634 +epoch: 1, batch: 36970, sum loss: 4586.886230, avg loss: 2.595861, ppl: 13.408126 +epoch: 1, batch: 36971, sum loss: 4517.735352, avg loss: 2.666904, ppl: 14.395331 +epoch: 1, batch: 36972, sum loss: 4751.545410, avg loss: 2.695148, ppl: 14.807707 +epoch: 1, batch: 36973, sum loss: 4552.985352, avg loss: 2.585454, ppl: 13.269319 +epoch: 1, batch: 36974, sum loss: 5135.861328, avg loss: 2.832797, ppl: 16.992929 +epoch: 1, batch: 36975, sum loss: 4280.980469, avg loss: 2.774453, ppl: 16.029852 +epoch: 1, batch: 36976, sum loss: 4297.357910, avg loss: 2.551875, ppl: 12.831144 +epoch: 1, batch: 36977, sum loss: 3753.568604, avg loss: 2.381706, ppl: 10.823352 +epoch: 1, batch: 36978, sum loss: 3886.063965, avg loss: 2.484696, ppl: 11.997469 +epoch: 1, batch: 36979, sum loss: 4566.104004, avg loss: 2.678067, ppl: 14.556927 +epoch: 1, batch: 36980, sum loss: 4135.497070, avg loss: 2.441262, ppl: 11.487523 +epoch: 1, batch: 36981, sum loss: 4417.874023, avg loss: 2.700412, ppl: 14.885864 +epoch: 1, batch: 36982, sum loss: 4611.445801, avg loss: 2.515791, ppl: 12.376400 +epoch: 1, batch: 36983, sum loss: 5082.831055, avg loss: 2.693604, ppl: 14.784864 +epoch: 1, batch: 36984, sum loss: 3940.832275, avg loss: 2.438634, ppl: 11.457376 +epoch: 1, batch: 36985, sum loss: 4485.032715, avg loss: 2.705086, ppl: 14.955603 +epoch: 1, batch: 36986, sum loss: 4992.878418, avg loss: 2.608609, ppl: 13.580153 +epoch: 1, batch: 36987, sum loss: 4102.213867, avg loss: 2.655155, ppl: 14.227187 +epoch: 1, batch: 36988, sum loss: 3752.770264, avg loss: 2.490226, ppl: 12.064000 +epoch: 1, batch: 36989, sum loss: 4813.595703, avg loss: 2.613244, ppl: 13.643242 +epoch: 1, batch: 36990, sum loss: 4155.316895, avg loss: 2.615052, ppl: 13.667920 +epoch: 1, batch: 36991, sum loss: 4463.785156, avg loss: 2.572787, ppl: 13.102287 +epoch: 1, batch: 36992, sum loss: 4281.651855, avg loss: 2.669359, ppl: 14.430719 +epoch: 1, batch: 36993, sum loss: 4361.362305, avg loss: 2.700534, ppl: 14.887677 +epoch: 1, batch: 36994, sum loss: 4252.764648, avg loss: 2.528398, ppl: 12.533406 +epoch: 1, batch: 36995, sum loss: 4376.411621, avg loss: 2.598819, ppl: 13.447850 +epoch: 1, batch: 36996, sum loss: 3868.850098, avg loss: 2.413506, ppl: 11.173066 +epoch: 1, batch: 36997, sum loss: 3669.873291, avg loss: 2.330078, ppl: 10.278745 +epoch: 1, batch: 36998, sum loss: 4065.425781, avg loss: 2.653672, ppl: 14.206111 +epoch: 1, batch: 36999, sum loss: 4117.071289, avg loss: 2.706819, ppl: 14.981541 +epoch: 1, batch: 37000, sum loss: 4648.612793, avg loss: 2.726459, ppl: 15.278693 +epoch: 1, batch: 37001, sum loss: 4914.936523, avg loss: 2.643860, ppl: 14.067404 +epoch: 1, batch: 37002, sum loss: 4987.607422, avg loss: 2.729944, ppl: 15.332028 +epoch: 1, batch: 37003, sum loss: 3821.907227, avg loss: 2.369440, ppl: 10.691407 +epoch: 1, batch: 37004, sum loss: 4173.207520, avg loss: 2.527685, ppl: 12.524477 +epoch: 1, batch: 37005, sum loss: 4658.700195, avg loss: 2.531902, ppl: 12.577407 +epoch: 1, batch: 37006, sum loss: 4437.987793, avg loss: 2.468291, ppl: 11.802263 +epoch: 1, batch: 37007, sum loss: 3898.385498, avg loss: 2.554643, ppl: 12.866708 +epoch: 1, batch: 37008, sum loss: 4409.285156, avg loss: 2.489715, ppl: 12.057837 +epoch: 1, batch: 37009, sum loss: 3770.748291, avg loss: 2.364105, ppl: 10.634521 +epoch: 1, batch: 37010, sum loss: 4436.003906, avg loss: 2.445427, ppl: 11.535471 +epoch: 1, batch: 37011, sum loss: 4184.537109, avg loss: 2.369500, ppl: 10.692044 +epoch: 1, batch: 37012, sum loss: 4984.562500, avg loss: 2.747829, ppl: 15.608715 +epoch: 1, batch: 37013, sum loss: 3393.912598, avg loss: 2.385041, ppl: 10.859503 +epoch: 1, batch: 37014, sum loss: 4433.677246, avg loss: 2.600397, ppl: 13.469089 +epoch: 1, batch: 37015, sum loss: 4611.085449, avg loss: 2.523856, ppl: 12.476619 +epoch: 1, batch: 37016, sum loss: 3650.847168, avg loss: 2.361480, ppl: 10.606632 +epoch: 1, batch: 37017, sum loss: 3924.624023, avg loss: 2.482368, ppl: 11.969575 +epoch: 1, batch: 37018, sum loss: 4505.468262, avg loss: 2.732243, ppl: 15.367314 +epoch: 1, batch: 37019, sum loss: 4395.212891, avg loss: 2.718128, ppl: 15.151931 +epoch: 1, batch: 37020, sum loss: 4416.626953, avg loss: 2.545606, ppl: 12.750957 +epoch: 1, batch: 37021, sum loss: 3146.491211, avg loss: 2.075522, ppl: 7.968705 +epoch: 1, batch: 37022, sum loss: 4330.524902, avg loss: 2.618213, ppl: 13.711205 +epoch: 1, batch: 37023, sum loss: 4913.732422, avg loss: 2.717772, ppl: 15.146542 +epoch: 1, batch: 37024, sum loss: 4574.898438, avg loss: 2.644450, ppl: 14.075701 +epoch: 1, batch: 37025, sum loss: 4623.752930, avg loss: 2.530790, ppl: 12.563425 +epoch: 1, batch: 37026, sum loss: 3957.780762, avg loss: 2.661587, ppl: 14.319002 +epoch: 1, batch: 37027, sum loss: 4789.457520, avg loss: 2.570831, ppl: 13.076681 +epoch: 1, batch: 37028, sum loss: 5923.737793, avg loss: 3.098189, ppl: 22.157789 +epoch: 1, batch: 37029, sum loss: 4074.411377, avg loss: 2.532263, ppl: 12.581947 +epoch: 1, batch: 37030, sum loss: 3809.521973, avg loss: 2.442001, ppl: 11.496025 +epoch: 1, batch: 37031, sum loss: 6038.118652, avg loss: 2.755873, ppl: 15.734778 +epoch: 1, batch: 37032, sum loss: 4276.078613, avg loss: 2.409058, ppl: 11.123482 +epoch: 1, batch: 37033, sum loss: 5763.392578, avg loss: 2.790989, ppl: 16.297132 +epoch: 1, batch: 37034, sum loss: 4558.078613, avg loss: 2.767503, ppl: 15.918843 +epoch: 1, batch: 37035, sum loss: 4661.471680, avg loss: 2.750131, ppl: 15.644679 +epoch: 1, batch: 37036, sum loss: 4588.724121, avg loss: 2.691334, ppl: 14.751341 +epoch: 1, batch: 37037, sum loss: 4607.907227, avg loss: 2.825204, ppl: 16.864380 +epoch: 1, batch: 37038, sum loss: 4171.145996, avg loss: 2.736972, ppl: 15.440166 +epoch: 1, batch: 37039, sum loss: 4288.214844, avg loss: 2.710629, ppl: 15.038728 +epoch: 1, batch: 37040, sum loss: 4074.174805, avg loss: 2.393757, ppl: 10.954575 +epoch: 1, batch: 37041, sum loss: 4786.211426, avg loss: 2.654582, ppl: 14.219042 +epoch: 1, batch: 37042, sum loss: 4005.835449, avg loss: 2.520979, ppl: 12.440769 +epoch: 1, batch: 37043, sum loss: 4317.610352, avg loss: 2.666838, ppl: 14.394382 +epoch: 1, batch: 37044, sum loss: 4853.840820, avg loss: 2.639391, ppl: 14.004678 +epoch: 1, batch: 37045, sum loss: 4349.079590, avg loss: 2.585660, ppl: 13.272042 +epoch: 1, batch: 37046, sum loss: 4616.647949, avg loss: 2.693494, ppl: 14.783233 +epoch: 1, batch: 37047, sum loss: 5150.942871, avg loss: 2.758941, ppl: 15.783119 +epoch: 1, batch: 37048, sum loss: 4184.623535, avg loss: 2.542299, ppl: 12.708853 +epoch: 1, batch: 37049, sum loss: 5002.978516, avg loss: 2.885224, ppl: 17.907576 +epoch: 1, batch: 37050, sum loss: 4400.239746, avg loss: 2.634874, ppl: 13.941557 +epoch: 1, batch: 37051, sum loss: 4510.655762, avg loss: 2.492075, ppl: 12.086331 +epoch: 1, batch: 37052, sum loss: 3348.882080, avg loss: 2.311168, ppl: 10.086198 +epoch: 1, batch: 37053, sum loss: 5304.860352, avg loss: 2.980259, ppl: 19.692911 +epoch: 1, batch: 37054, sum loss: 4838.519531, avg loss: 2.842843, ppl: 17.164499 +epoch: 1, batch: 37055, sum loss: 4299.902832, avg loss: 2.452883, ppl: 11.621799 +epoch: 1, batch: 37056, sum loss: 4065.661133, avg loss: 2.491214, ppl: 12.075928 +epoch: 1, batch: 37057, sum loss: 4924.263184, avg loss: 2.710106, ppl: 15.030874 +epoch: 1, batch: 37058, sum loss: 4633.936523, avg loss: 2.818696, ppl: 16.754992 +epoch: 1, batch: 37059, sum loss: 5455.479492, avg loss: 2.817913, ppl: 16.741871 +epoch: 1, batch: 37060, sum loss: 4349.640625, avg loss: 2.533279, ppl: 12.594742 +epoch: 1, batch: 37061, sum loss: 3304.601562, avg loss: 2.269644, ppl: 9.675956 +epoch: 1, batch: 37062, sum loss: 3737.329590, avg loss: 2.341685, ppl: 10.398746 +epoch: 1, batch: 37063, sum loss: 3646.478271, avg loss: 2.410098, ppl: 11.135054 +epoch: 1, batch: 37064, sum loss: 5244.592773, avg loss: 2.968077, ppl: 19.454481 +epoch: 1, batch: 37065, sum loss: 4224.728516, avg loss: 2.763067, ppl: 15.848368 +epoch: 1, batch: 37066, sum loss: 4363.569824, avg loss: 2.605116, ppl: 13.532800 +epoch: 1, batch: 37067, sum loss: 4608.845703, avg loss: 2.529553, ppl: 12.547898 +epoch: 1, batch: 37068, sum loss: 4490.362793, avg loss: 2.491877, ppl: 12.083941 +epoch: 1, batch: 37069, sum loss: 4822.420410, avg loss: 2.701636, ppl: 14.904099 +epoch: 1, batch: 37070, sum loss: 4472.744141, avg loss: 2.490392, ppl: 12.066008 +epoch: 1, batch: 37071, sum loss: 4100.264648, avg loss: 2.526349, ppl: 12.507758 +epoch: 1, batch: 37072, sum loss: 4238.978516, avg loss: 2.726031, ppl: 15.272156 +epoch: 1, batch: 37073, sum loss: 4513.252930, avg loss: 2.421273, ppl: 11.260184 +epoch: 1, batch: 37074, sum loss: 4242.841797, avg loss: 2.519502, ppl: 12.422414 +epoch: 1, batch: 37075, sum loss: 4778.701660, avg loss: 2.635798, ppl: 13.954443 +epoch: 1, batch: 37076, sum loss: 4832.625977, avg loss: 2.798278, ppl: 16.416355 +epoch: 1, batch: 37077, sum loss: 4374.735840, avg loss: 2.420994, ppl: 11.257038 +epoch: 1, batch: 37078, sum loss: 4321.012695, avg loss: 2.693898, ppl: 14.789215 +epoch: 1, batch: 37079, sum loss: 4475.620117, avg loss: 2.459132, ppl: 11.694655 +epoch: 1, batch: 37080, sum loss: 4937.305176, avg loss: 2.889003, ppl: 17.975370 +epoch: 1, batch: 37081, sum loss: 3381.332031, avg loss: 2.343265, ppl: 10.415190 +epoch: 1, batch: 37082, sum loss: 4248.858398, avg loss: 2.587612, ppl: 13.297976 +epoch: 1, batch: 37083, sum loss: 4579.467773, avg loss: 2.734011, ppl: 15.394506 +epoch: 1, batch: 37084, sum loss: 4301.793945, avg loss: 2.673582, ppl: 14.491791 +epoch: 1, batch: 37085, sum loss: 4755.982422, avg loss: 2.523068, ppl: 12.466783 +epoch: 1, batch: 37086, sum loss: 5405.359863, avg loss: 2.607506, ppl: 13.565178 +epoch: 1, batch: 37087, sum loss: 3338.396484, avg loss: 2.341091, ppl: 10.392572 +epoch: 1, batch: 37088, sum loss: 5125.155273, avg loss: 2.762887, ppl: 15.845523 +epoch: 1, batch: 37089, sum loss: 4470.755859, avg loss: 2.681917, ppl: 14.613079 +epoch: 1, batch: 37090, sum loss: 4377.673340, avg loss: 2.608864, ppl: 13.583609 +epoch: 1, batch: 37091, sum loss: 4878.053711, avg loss: 2.947465, ppl: 19.057575 +epoch: 1, batch: 37092, sum loss: 3616.440186, avg loss: 2.514910, ppl: 12.365493 +epoch: 1, batch: 37093, sum loss: 4334.188965, avg loss: 2.576807, ppl: 13.155064 +epoch: 1, batch: 37094, sum loss: 4783.668945, avg loss: 2.676927, ppl: 14.540347 +epoch: 1, batch: 37095, sum loss: 3922.175293, avg loss: 2.499793, ppl: 12.179973 +epoch: 1, batch: 37096, sum loss: 4403.608887, avg loss: 2.513476, ppl: 12.347773 +epoch: 1, batch: 37097, sum loss: 3709.142578, avg loss: 2.451515, ppl: 11.605922 +epoch: 1, batch: 37098, sum loss: 4570.031738, avg loss: 2.565992, ppl: 13.013563 +epoch: 1, batch: 37099, sum loss: 5687.262695, avg loss: 2.894281, ppl: 18.070511 +epoch: 1, batch: 37100, sum loss: 3343.562988, avg loss: 2.474880, ppl: 11.880281 +epoch: 1, batch: 37101, sum loss: 4944.098145, avg loss: 2.648151, ppl: 14.127894 +epoch: 1, batch: 37102, sum loss: 4169.083984, avg loss: 2.587886, ppl: 13.301620 +epoch: 1, batch: 37103, sum loss: 3772.093506, avg loss: 2.518086, ppl: 12.404837 +epoch: 1, batch: 37104, sum loss: 4513.034180, avg loss: 2.825945, ppl: 16.876888 +epoch: 1, batch: 37105, sum loss: 3392.266113, avg loss: 2.339494, ppl: 10.375985 +epoch: 1, batch: 37106, sum loss: 4887.091797, avg loss: 2.613418, ppl: 13.645614 +epoch: 1, batch: 37107, sum loss: 4233.126953, avg loss: 2.581175, ppl: 13.212655 +epoch: 1, batch: 37108, sum loss: 4267.186035, avg loss: 2.456641, ppl: 11.665566 +epoch: 1, batch: 37109, sum loss: 4060.045898, avg loss: 2.268182, ppl: 9.661820 +epoch: 1, batch: 37110, sum loss: 4479.021484, avg loss: 2.599548, ppl: 13.457655 +epoch: 1, batch: 37111, sum loss: 4095.369141, avg loss: 2.396354, ppl: 10.983061 +epoch: 1, batch: 37112, sum loss: 3919.735596, avg loss: 2.504623, ppl: 12.238949 +epoch: 1, batch: 37113, sum loss: 4085.585693, avg loss: 2.555088, ppl: 12.872430 +epoch: 1, batch: 37114, sum loss: 4615.766602, avg loss: 2.677359, ppl: 14.546626 +epoch: 1, batch: 37115, sum loss: 5167.556641, avg loss: 2.753094, ppl: 15.691101 +epoch: 1, batch: 37116, sum loss: 4760.237305, avg loss: 2.756362, ppl: 15.742471 +epoch: 1, batch: 37117, sum loss: 3761.574707, avg loss: 2.400495, ppl: 11.028629 +epoch: 1, batch: 37118, sum loss: 4346.131348, avg loss: 2.748976, ppl: 15.626622 +epoch: 1, batch: 37119, sum loss: 5755.786133, avg loss: 2.843768, ppl: 17.180378 +epoch: 1, batch: 37120, sum loss: 3800.836426, avg loss: 2.508803, ppl: 12.290209 +epoch: 1, batch: 37121, sum loss: 3581.424805, avg loss: 2.306133, ppl: 10.035544 +epoch: 1, batch: 37122, sum loss: 4362.845703, avg loss: 2.708160, ppl: 15.001650 +epoch: 1, batch: 37123, sum loss: 3589.016602, avg loss: 2.446501, ppl: 11.547865 +epoch: 1, batch: 37124, sum loss: 5612.073242, avg loss: 2.787915, ppl: 16.247112 +epoch: 1, batch: 37125, sum loss: 4512.609375, avg loss: 2.526657, ppl: 12.511611 +epoch: 1, batch: 37126, sum loss: 4209.434570, avg loss: 2.726318, ppl: 15.276533 +epoch: 1, batch: 37127, sum loss: 4269.236328, avg loss: 2.623993, ppl: 13.790675 +epoch: 1, batch: 37128, sum loss: 4037.596191, avg loss: 2.698928, ppl: 14.863788 +epoch: 1, batch: 37129, sum loss: 3872.795166, avg loss: 2.449586, ppl: 11.583547 +epoch: 1, batch: 37130, sum loss: 4327.866699, avg loss: 2.739156, ppl: 15.473924 +epoch: 1, batch: 37131, sum loss: 4219.795898, avg loss: 2.442012, ppl: 11.496143 +epoch: 1, batch: 37132, sum loss: 4061.101074, avg loss: 2.546145, ppl: 12.757827 +epoch: 1, batch: 37133, sum loss: 4934.397461, avg loss: 2.916311, ppl: 18.473007 +epoch: 1, batch: 37134, sum loss: 4511.635742, avg loss: 2.589917, ppl: 13.328668 +epoch: 1, batch: 37135, sum loss: 4839.982422, avg loss: 2.603541, ppl: 13.511493 +epoch: 1, batch: 37136, sum loss: 4982.681152, avg loss: 2.950078, ppl: 19.107439 +epoch: 1, batch: 37137, sum loss: 4743.901367, avg loss: 2.603678, ppl: 13.513349 +epoch: 1, batch: 37138, sum loss: 4358.788574, avg loss: 2.682331, ppl: 14.619136 +epoch: 1, batch: 37139, sum loss: 4229.845215, avg loss: 2.426761, ppl: 11.322155 +epoch: 1, batch: 37140, sum loss: 4418.089844, avg loss: 2.579153, ppl: 13.185968 +epoch: 1, batch: 37141, sum loss: 5080.713379, avg loss: 2.911583, ppl: 18.385889 +epoch: 1, batch: 37142, sum loss: 4494.938477, avg loss: 2.359548, ppl: 10.586164 +epoch: 1, batch: 37143, sum loss: 3971.740234, avg loss: 2.359917, ppl: 10.590072 +epoch: 1, batch: 37144, sum loss: 4305.306641, avg loss: 2.549027, ppl: 12.794648 +epoch: 1, batch: 37145, sum loss: 5684.496094, avg loss: 2.690249, ppl: 14.735344 +epoch: 1, batch: 37146, sum loss: 5454.704102, avg loss: 2.734187, ppl: 15.397226 +epoch: 1, batch: 37147, sum loss: 4184.385254, avg loss: 2.587746, ppl: 13.299762 +epoch: 1, batch: 37148, sum loss: 4814.721680, avg loss: 2.700349, ppl: 14.884924 +epoch: 1, batch: 37149, sum loss: 4213.777344, avg loss: 2.372622, ppl: 10.725483 +epoch: 1, batch: 37150, sum loss: 3335.116943, avg loss: 2.515171, ppl: 12.368728 +epoch: 1, batch: 37151, sum loss: 4268.505859, avg loss: 2.499125, ppl: 12.171839 +epoch: 1, batch: 37152, sum loss: 3819.141357, avg loss: 2.568353, ppl: 13.044325 +epoch: 1, batch: 37153, sum loss: 4759.671875, avg loss: 2.570017, ppl: 13.066048 +epoch: 1, batch: 37154, sum loss: 4985.095215, avg loss: 2.648829, ppl: 14.137467 +epoch: 1, batch: 37155, sum loss: 3235.736572, avg loss: 2.373981, ppl: 10.740068 +epoch: 1, batch: 37156, sum loss: 4255.148926, avg loss: 2.641309, ppl: 14.031563 +epoch: 1, batch: 37157, sum loss: 4450.053711, avg loss: 2.869151, ppl: 17.622057 +epoch: 1, batch: 37158, sum loss: 3609.363037, avg loss: 2.485787, ppl: 12.010571 +epoch: 1, batch: 37159, sum loss: 5083.392578, avg loss: 2.530310, ppl: 12.557397 +epoch: 1, batch: 37160, sum loss: 3740.118652, avg loss: 2.425498, ppl: 11.307865 +epoch: 1, batch: 37161, sum loss: 4999.902832, avg loss: 2.852198, ppl: 17.325821 +epoch: 1, batch: 37162, sum loss: 4661.038574, avg loss: 2.618561, ppl: 13.715973 +epoch: 1, batch: 37163, sum loss: 3704.174561, avg loss: 2.504513, ppl: 12.237596 +epoch: 1, batch: 37164, sum loss: 4627.584473, avg loss: 2.559505, ppl: 12.929413 +epoch: 1, batch: 37165, sum loss: 4989.964844, avg loss: 2.943932, ppl: 18.990372 +epoch: 1, batch: 37166, sum loss: 4099.646484, avg loss: 2.658655, ppl: 14.277080 +epoch: 1, batch: 37167, sum loss: 4148.916992, avg loss: 2.704639, ppl: 14.948926 +epoch: 1, batch: 37168, sum loss: 5196.027832, avg loss: 2.907682, ppl: 18.314301 +epoch: 1, batch: 37169, sum loss: 4500.220703, avg loss: 2.691520, ppl: 14.754081 +epoch: 1, batch: 37170, sum loss: 4180.900391, avg loss: 2.446402, ppl: 11.546722 +epoch: 1, batch: 37171, sum loss: 4667.078125, avg loss: 2.564329, ppl: 12.991934 +epoch: 1, batch: 37172, sum loss: 5131.871094, avg loss: 2.847875, ppl: 17.251091 +epoch: 1, batch: 37173, sum loss: 5022.823242, avg loss: 2.734253, ppl: 15.398243 +epoch: 1, batch: 37174, sum loss: 3659.972900, avg loss: 2.390577, ppl: 10.919791 +epoch: 1, batch: 37175, sum loss: 4092.792969, avg loss: 2.497128, ppl: 12.147556 +epoch: 1, batch: 37176, sum loss: 3855.744873, avg loss: 2.457454, ppl: 11.675046 +epoch: 1, batch: 37177, sum loss: 5667.224121, avg loss: 2.835030, ppl: 17.030905 +epoch: 1, batch: 37178, sum loss: 5092.986816, avg loss: 2.832584, ppl: 16.989311 +epoch: 1, batch: 37179, sum loss: 3772.680664, avg loss: 2.381743, ppl: 10.823751 +epoch: 1, batch: 37180, sum loss: 4959.278809, avg loss: 2.770547, ppl: 15.967360 +epoch: 1, batch: 37181, sum loss: 3799.209717, avg loss: 2.447945, ppl: 11.564553 +epoch: 1, batch: 37182, sum loss: 4715.830566, avg loss: 2.673373, ppl: 14.488764 +epoch: 1, batch: 37183, sum loss: 4957.811035, avg loss: 2.867444, ppl: 17.592001 +epoch: 1, batch: 37184, sum loss: 4006.293213, avg loss: 2.366387, ppl: 10.658813 +epoch: 1, batch: 37185, sum loss: 3924.237549, avg loss: 2.289520, ppl: 9.870201 +epoch: 1, batch: 37186, sum loss: 3699.378174, avg loss: 2.410018, ppl: 11.134164 +epoch: 1, batch: 37187, sum loss: 4964.237305, avg loss: 2.908165, ppl: 18.323139 +epoch: 1, batch: 37188, sum loss: 4267.973145, avg loss: 2.623216, ppl: 13.779974 +epoch: 1, batch: 37189, sum loss: 4453.406738, avg loss: 2.520321, ppl: 12.432586 +epoch: 1, batch: 37190, sum loss: 3689.387451, avg loss: 2.486110, ppl: 12.014448 +epoch: 1, batch: 37191, sum loss: 5525.733887, avg loss: 2.732806, ppl: 15.375971 +epoch: 1, batch: 37192, sum loss: 4294.617188, avg loss: 2.580900, ppl: 13.209021 +epoch: 1, batch: 37193, sum loss: 4823.021484, avg loss: 2.779840, ppl: 16.116434 +epoch: 1, batch: 37194, sum loss: 4068.784180, avg loss: 2.459966, ppl: 11.704418 +epoch: 1, batch: 37195, sum loss: 4101.906738, avg loss: 2.358773, ppl: 10.577967 +epoch: 1, batch: 37196, sum loss: 4238.888672, avg loss: 2.632850, ppl: 13.913369 +epoch: 1, batch: 37197, sum loss: 4270.023926, avg loss: 2.525147, ppl: 12.492737 +epoch: 1, batch: 37198, sum loss: 4086.968750, avg loss: 2.611482, ppl: 13.619215 +epoch: 1, batch: 37199, sum loss: 4184.377930, avg loss: 2.510125, ppl: 12.306464 +epoch: 1, batch: 37200, sum loss: 3555.185303, avg loss: 2.418494, ppl: 11.228930 +epoch: 1, batch: 37201, sum loss: 4021.675049, avg loss: 2.518269, ppl: 12.407099 +epoch: 1, batch: 37202, sum loss: 4918.009766, avg loss: 2.726170, ppl: 15.274268 +epoch: 1, batch: 37203, sum loss: 4729.094727, avg loss: 2.634593, ppl: 13.937642 +epoch: 1, batch: 37204, sum loss: 3496.270508, avg loss: 2.251301, ppl: 9.500088 +epoch: 1, batch: 37205, sum loss: 5071.803711, avg loss: 2.908144, ppl: 18.322763 +epoch: 1, batch: 37206, sum loss: 3010.696777, avg loss: 2.319489, ppl: 10.170476 +epoch: 1, batch: 37207, sum loss: 4295.044434, avg loss: 2.492771, ppl: 12.094743 +epoch: 1, batch: 37208, sum loss: 4157.175293, avg loss: 2.446837, ppl: 11.551747 +epoch: 1, batch: 37209, sum loss: 3737.006592, avg loss: 2.389390, ppl: 10.906844 +epoch: 1, batch: 37210, sum loss: 5167.468750, avg loss: 2.826843, ppl: 16.892044 +epoch: 1, batch: 37211, sum loss: 4794.349121, avg loss: 2.573456, ppl: 13.111062 +epoch: 1, batch: 37212, sum loss: 4708.344727, avg loss: 2.640687, ppl: 14.022834 +epoch: 1, batch: 37213, sum loss: 3629.272949, avg loss: 2.400313, ppl: 11.026626 +epoch: 1, batch: 37214, sum loss: 6012.144043, avg loss: 3.013606, ppl: 20.360685 +epoch: 1, batch: 37215, sum loss: 4374.378418, avg loss: 2.519803, ppl: 12.426152 +epoch: 1, batch: 37216, sum loss: 5022.496582, avg loss: 2.648996, ppl: 14.139840 +epoch: 1, batch: 37217, sum loss: 4136.256836, avg loss: 2.454752, ppl: 11.643542 +epoch: 1, batch: 37218, sum loss: 4635.685547, avg loss: 2.569670, ppl: 13.061520 +epoch: 1, batch: 37219, sum loss: 4510.636230, avg loss: 2.486569, ppl: 12.019967 +epoch: 1, batch: 37220, sum loss: 3718.907471, avg loss: 2.538503, ppl: 12.660708 +epoch: 1, batch: 37221, sum loss: 5070.510254, avg loss: 2.764727, ppl: 15.874711 +epoch: 1, batch: 37222, sum loss: 4679.921875, avg loss: 2.746433, ppl: 15.586934 +epoch: 1, batch: 37223, sum loss: 3959.116455, avg loss: 2.255907, ppl: 9.543944 +epoch: 1, batch: 37224, sum loss: 4958.239258, avg loss: 2.793374, ppl: 16.336046 +epoch: 1, batch: 37225, sum loss: 5340.659180, avg loss: 2.907272, ppl: 18.306795 +epoch: 1, batch: 37226, sum loss: 4660.624023, avg loss: 2.573509, ppl: 13.111750 +epoch: 1, batch: 37227, sum loss: 4608.125977, avg loss: 2.577252, ppl: 13.160918 +epoch: 1, batch: 37228, sum loss: 4846.260742, avg loss: 2.745757, ppl: 15.576399 +epoch: 1, batch: 37229, sum loss: 3817.735596, avg loss: 2.455136, ppl: 11.648013 +epoch: 1, batch: 37230, sum loss: 4519.401367, avg loss: 2.750701, ppl: 15.653597 +epoch: 1, batch: 37231, sum loss: 5371.892090, avg loss: 2.661988, ppl: 14.324742 +epoch: 1, batch: 37232, sum loss: 4277.696777, avg loss: 2.508913, ppl: 12.291565 +epoch: 1, batch: 37233, sum loss: 4239.989258, avg loss: 2.636809, ppl: 13.968560 +epoch: 1, batch: 37234, sum loss: 4676.354492, avg loss: 2.785202, ppl: 16.203091 +epoch: 1, batch: 37235, sum loss: 4858.429199, avg loss: 2.726391, ppl: 15.277656 +epoch: 1, batch: 37236, sum loss: 4482.518555, avg loss: 2.677729, ppl: 14.552013 +epoch: 1, batch: 37237, sum loss: 4098.551758, avg loss: 2.423744, ppl: 11.288048 +epoch: 1, batch: 37238, sum loss: 3811.079590, avg loss: 2.410550, ppl: 11.140085 +epoch: 1, batch: 37239, sum loss: 4790.616211, avg loss: 2.626434, ppl: 13.824389 +epoch: 1, batch: 37240, sum loss: 4443.420410, avg loss: 2.627688, ppl: 13.841733 +epoch: 1, batch: 37241, sum loss: 3750.070312, avg loss: 2.697892, ppl: 14.848401 +epoch: 1, batch: 37242, sum loss: 4725.312988, avg loss: 2.862092, ppl: 17.498087 +epoch: 1, batch: 37243, sum loss: 4371.718262, avg loss: 2.544656, ppl: 12.738839 +epoch: 1, batch: 37244, sum loss: 4146.189453, avg loss: 2.484236, ppl: 11.991952 +epoch: 1, batch: 37245, sum loss: 4762.349609, avg loss: 2.678487, ppl: 14.563044 +epoch: 1, batch: 37246, sum loss: 5753.997070, avg loss: 2.976719, ppl: 19.623320 +epoch: 1, batch: 37247, sum loss: 3787.845947, avg loss: 2.746806, ppl: 15.592755 +epoch: 1, batch: 37248, sum loss: 3921.952637, avg loss: 2.401685, ppl: 11.041771 +epoch: 1, batch: 37249, sum loss: 4249.639648, avg loss: 2.414568, ppl: 11.184937 +epoch: 1, batch: 37250, sum loss: 4654.673828, avg loss: 2.614985, ppl: 13.667014 +epoch: 1, batch: 37251, sum loss: 4875.975098, avg loss: 2.660106, ppl: 14.297811 +epoch: 1, batch: 37252, sum loss: 4638.208008, avg loss: 2.676404, ppl: 14.532736 +epoch: 1, batch: 37253, sum loss: 3182.189697, avg loss: 2.211390, ppl: 9.128392 +epoch: 1, batch: 37254, sum loss: 3732.152100, avg loss: 2.692750, ppl: 14.772250 +epoch: 1, batch: 37255, sum loss: 5593.988770, avg loss: 2.829534, ppl: 16.937567 +epoch: 1, batch: 37256, sum loss: 4474.844727, avg loss: 2.552678, ppl: 12.841449 +epoch: 1, batch: 37257, sum loss: 4782.414551, avg loss: 2.916106, ppl: 18.469236 +epoch: 1, batch: 37258, sum loss: 4841.218750, avg loss: 2.628240, ppl: 13.849379 +epoch: 1, batch: 37259, sum loss: 4297.052734, avg loss: 2.565405, ppl: 13.005923 +epoch: 1, batch: 37260, sum loss: 4725.862793, avg loss: 2.676026, ppl: 14.527252 +epoch: 1, batch: 37261, sum loss: 4396.294922, avg loss: 2.564933, ppl: 12.999785 +epoch: 1, batch: 37262, sum loss: 4937.733887, avg loss: 2.720515, ppl: 15.188135 +epoch: 1, batch: 37263, sum loss: 4123.830078, avg loss: 2.472320, ppl: 11.849911 +epoch: 1, batch: 37264, sum loss: 4648.553223, avg loss: 2.594059, ppl: 13.383984 +epoch: 1, batch: 37265, sum loss: 4214.257324, avg loss: 2.697988, ppl: 14.849824 +epoch: 1, batch: 37266, sum loss: 4013.249512, avg loss: 2.262260, ppl: 9.604771 +epoch: 1, batch: 37267, sum loss: 3936.024414, avg loss: 2.354082, ppl: 10.528456 +epoch: 1, batch: 37268, sum loss: 4826.798340, avg loss: 2.690523, ppl: 14.739385 +epoch: 1, batch: 37269, sum loss: 4601.702637, avg loss: 2.583775, ppl: 13.247049 +epoch: 1, batch: 37270, sum loss: 4924.355469, avg loss: 2.696799, ppl: 14.832182 +epoch: 1, batch: 37271, sum loss: 4546.515625, avg loss: 2.389131, ppl: 10.904010 +epoch: 1, batch: 37272, sum loss: 6471.294922, avg loss: 3.077173, ppl: 21.696983 +epoch: 1, batch: 37273, sum loss: 3749.833496, avg loss: 2.620429, ppl: 13.741614 +epoch: 1, batch: 37274, sum loss: 4136.858398, avg loss: 2.499612, ppl: 12.177773 +epoch: 1, batch: 37275, sum loss: 4308.473633, avg loss: 2.400264, ppl: 11.026087 +epoch: 1, batch: 37276, sum loss: 3203.620361, avg loss: 2.489216, ppl: 12.051819 +epoch: 1, batch: 37277, sum loss: 5824.501953, avg loss: 2.869213, ppl: 17.623140 +epoch: 1, batch: 37278, sum loss: 3751.635010, avg loss: 2.533177, ppl: 12.593451 +epoch: 1, batch: 37279, sum loss: 4478.853027, avg loss: 2.716102, ppl: 15.121270 +epoch: 1, batch: 37280, sum loss: 5405.553711, avg loss: 2.656292, ppl: 14.243373 +epoch: 1, batch: 37281, sum loss: 4571.180176, avg loss: 2.816500, ppl: 16.718241 +epoch: 1, batch: 37282, sum loss: 3781.622314, avg loss: 2.311505, ppl: 10.089599 +epoch: 1, batch: 37283, sum loss: 3892.959717, avg loss: 2.442258, ppl: 11.498981 +epoch: 1, batch: 37284, sum loss: 4418.322266, avg loss: 2.626827, ppl: 13.829812 +epoch: 1, batch: 37285, sum loss: 4188.014160, avg loss: 2.525944, ppl: 12.502687 +epoch: 1, batch: 37286, sum loss: 4614.093750, avg loss: 2.736711, ppl: 15.436125 +epoch: 1, batch: 37287, sum loss: 4863.925781, avg loss: 2.792150, ppl: 16.316067 +epoch: 1, batch: 37288, sum loss: 5552.630859, avg loss: 2.762503, ppl: 15.839442 +epoch: 1, batch: 37289, sum loss: 5077.905762, avg loss: 2.737416, ppl: 15.447011 +epoch: 1, batch: 37290, sum loss: 3869.299805, avg loss: 2.321115, ppl: 10.187032 +epoch: 1, batch: 37291, sum loss: 3969.879150, avg loss: 2.556265, ppl: 12.887590 +epoch: 1, batch: 37292, sum loss: 4644.188965, avg loss: 2.749668, ppl: 15.637438 +epoch: 1, batch: 37293, sum loss: 4063.953125, avg loss: 2.379364, ppl: 10.798028 +epoch: 1, batch: 37294, sum loss: 4076.630615, avg loss: 2.514886, ppl: 12.365201 +epoch: 1, batch: 37295, sum loss: 4770.016113, avg loss: 2.875236, ppl: 17.729605 +epoch: 1, batch: 37296, sum loss: 4815.023438, avg loss: 2.648528, ppl: 14.133214 +epoch: 1, batch: 37297, sum loss: 4550.942383, avg loss: 2.562468, ppl: 12.967777 +epoch: 1, batch: 37298, sum loss: 3982.373779, avg loss: 2.632104, ppl: 13.902997 +epoch: 1, batch: 37299, sum loss: 5368.677734, avg loss: 2.894166, ppl: 18.068426 +epoch: 1, batch: 37300, sum loss: 4499.453125, avg loss: 2.923621, ppl: 18.608551 +epoch: 1, batch: 37301, sum loss: 4208.578613, avg loss: 2.428493, ppl: 11.341778 +epoch: 1, batch: 37302, sum loss: 4189.280762, avg loss: 2.514574, ppl: 12.361345 +epoch: 1, batch: 37303, sum loss: 3928.589355, avg loss: 2.414622, ppl: 11.185536 +epoch: 1, batch: 37304, sum loss: 4254.638672, avg loss: 2.499788, ppl: 12.179906 +epoch: 1, batch: 37305, sum loss: 4285.818359, avg loss: 2.543512, ppl: 12.724284 +epoch: 1, batch: 37306, sum loss: 4469.976562, avg loss: 2.488851, ppl: 12.047426 +epoch: 1, batch: 37307, sum loss: 4441.767090, avg loss: 2.564531, ppl: 12.994557 +epoch: 1, batch: 37308, sum loss: 3756.881836, avg loss: 2.261819, ppl: 9.600540 +epoch: 1, batch: 37309, sum loss: 4465.135742, avg loss: 2.794203, ppl: 16.349586 +epoch: 1, batch: 37310, sum loss: 5813.927734, avg loss: 3.004614, ppl: 20.178419 +epoch: 1, batch: 37311, sum loss: 3841.997314, avg loss: 2.512752, ppl: 12.338838 +epoch: 1, batch: 37312, sum loss: 4576.778809, avg loss: 2.700165, ppl: 14.882180 +epoch: 1, batch: 37313, sum loss: 5454.065918, avg loss: 2.735239, ppl: 15.413424 +epoch: 1, batch: 37314, sum loss: 4927.459961, avg loss: 2.790181, ppl: 16.283970 +epoch: 1, batch: 37315, sum loss: 5048.805664, avg loss: 2.794026, ppl: 16.346701 +epoch: 1, batch: 37316, sum loss: 4628.906738, avg loss: 2.639057, ppl: 14.000001 +epoch: 1, batch: 37317, sum loss: 4621.551270, avg loss: 2.767396, ppl: 15.917131 +epoch: 1, batch: 37318, sum loss: 5775.650391, avg loss: 3.020738, ppl: 20.506413 +epoch: 1, batch: 37319, sum loss: 4067.575684, avg loss: 2.428404, ppl: 11.340763 +epoch: 1, batch: 37320, sum loss: 4308.077148, avg loss: 2.457546, ppl: 11.676118 +epoch: 1, batch: 37321, sum loss: 3780.472900, avg loss: 2.342300, ppl: 10.405146 +epoch: 1, batch: 37322, sum loss: 5573.991211, avg loss: 2.670815, ppl: 14.451746 +epoch: 1, batch: 37323, sum loss: 4387.718262, avg loss: 2.553969, ppl: 12.858032 +epoch: 1, batch: 37324, sum loss: 4339.167480, avg loss: 2.711980, ppl: 15.059057 +epoch: 1, batch: 37325, sum loss: 3455.699219, avg loss: 2.309959, ppl: 10.074016 +epoch: 1, batch: 37326, sum loss: 4496.984863, avg loss: 2.623678, ppl: 13.786343 +epoch: 1, batch: 37327, sum loss: 3780.565674, avg loss: 2.414154, ppl: 11.180311 +epoch: 1, batch: 37328, sum loss: 4825.696777, avg loss: 2.791034, ppl: 16.297855 +epoch: 1, batch: 37329, sum loss: 5109.709473, avg loss: 2.886842, ppl: 17.936567 +epoch: 1, batch: 37330, sum loss: 5285.315918, avg loss: 2.741347, ppl: 15.507854 +epoch: 1, batch: 37331, sum loss: 4620.041992, avg loss: 2.649107, ppl: 14.141397 +epoch: 1, batch: 37332, sum loss: 5190.869629, avg loss: 3.076983, ppl: 21.692850 +epoch: 1, batch: 37333, sum loss: 4880.341797, avg loss: 2.860693, ppl: 17.473627 +epoch: 1, batch: 37334, sum loss: 4006.025879, avg loss: 2.463731, ppl: 11.748559 +epoch: 1, batch: 37335, sum loss: 4883.338867, avg loss: 2.600287, ppl: 13.467602 +epoch: 1, batch: 37336, sum loss: 4627.758789, avg loss: 2.678101, ppl: 14.557424 +epoch: 1, batch: 37337, sum loss: 4608.556641, avg loss: 2.846545, ppl: 17.228159 +epoch: 1, batch: 37338, sum loss: 5963.160645, avg loss: 2.737907, ppl: 15.454600 +epoch: 1, batch: 37339, sum loss: 4853.668457, avg loss: 2.732922, ppl: 15.377749 +epoch: 1, batch: 37340, sum loss: 3958.433594, avg loss: 2.651329, ppl: 14.172855 +epoch: 1, batch: 37341, sum loss: 3577.618896, avg loss: 2.519450, ppl: 12.421762 +epoch: 1, batch: 37342, sum loss: 4611.280273, avg loss: 2.855282, ppl: 17.379335 +epoch: 1, batch: 37343, sum loss: 4672.813477, avg loss: 2.532690, ppl: 12.587324 +epoch: 1, batch: 37344, sum loss: 4035.802734, avg loss: 2.448910, ppl: 11.575717 +epoch: 1, batch: 37345, sum loss: 3833.476318, avg loss: 2.482822, ppl: 11.975007 +epoch: 1, batch: 37346, sum loss: 4315.215820, avg loss: 2.488590, ppl: 12.044285 +epoch: 1, batch: 37347, sum loss: 4245.482910, avg loss: 2.579273, ppl: 13.187544 +epoch: 1, batch: 37348, sum loss: 4095.398926, avg loss: 2.381046, ppl: 10.816209 +epoch: 1, batch: 37349, sum loss: 3875.697266, avg loss: 2.519959, ppl: 12.428090 +epoch: 1, batch: 37350, sum loss: 4164.548828, avg loss: 2.632458, ppl: 13.907913 +epoch: 1, batch: 37351, sum loss: 3820.602783, avg loss: 2.183202, ppl: 8.874674 +epoch: 1, batch: 37352, sum loss: 4346.733398, avg loss: 2.721812, ppl: 15.207850 +epoch: 1, batch: 37353, sum loss: 4618.114258, avg loss: 2.706984, ppl: 14.984009 +epoch: 1, batch: 37354, sum loss: 4086.595703, avg loss: 2.554122, ppl: 12.860006 +epoch: 1, batch: 37355, sum loss: 4036.208984, avg loss: 2.485350, ppl: 12.005326 +epoch: 1, batch: 37356, sum loss: 4268.313477, avg loss: 2.723876, ppl: 15.239275 +epoch: 1, batch: 37357, sum loss: 4304.684082, avg loss: 2.423809, ppl: 11.288772 +epoch: 1, batch: 37358, sum loss: 4872.322754, avg loss: 2.732655, ppl: 15.373643 +epoch: 1, batch: 37359, sum loss: 4025.341309, avg loss: 2.618960, ppl: 13.721444 +epoch: 1, batch: 37360, sum loss: 5304.439453, avg loss: 2.767053, ppl: 15.911668 +epoch: 1, batch: 37361, sum loss: 4536.633301, avg loss: 2.739513, ppl: 15.479444 +epoch: 1, batch: 37362, sum loss: 4406.784180, avg loss: 2.580084, ppl: 13.198251 +epoch: 1, batch: 37363, sum loss: 3827.316406, avg loss: 2.243444, ppl: 9.425742 +epoch: 1, batch: 37364, sum loss: 4279.997559, avg loss: 2.633845, ppl: 13.927212 +epoch: 1, batch: 37365, sum loss: 4105.577148, avg loss: 2.688656, ppl: 14.711884 +epoch: 1, batch: 37366, sum loss: 4308.398438, avg loss: 2.735491, ppl: 15.417312 +epoch: 1, batch: 37367, sum loss: 5058.369141, avg loss: 2.807086, ppl: 16.561592 +epoch: 1, batch: 37368, sum loss: 3607.562012, avg loss: 2.540537, ppl: 12.686478 +epoch: 1, batch: 37369, sum loss: 4096.875488, avg loss: 2.550981, ppl: 12.819672 +epoch: 1, batch: 37370, sum loss: 5378.477051, avg loss: 2.795466, ppl: 16.370258 +epoch: 1, batch: 37371, sum loss: 4357.325195, avg loss: 2.508535, ppl: 12.286916 +epoch: 1, batch: 37372, sum loss: 4899.541992, avg loss: 2.614483, ppl: 13.660157 +epoch: 1, batch: 37373, sum loss: 3863.827148, avg loss: 2.698204, ppl: 14.853025 +epoch: 1, batch: 37374, sum loss: 4407.275879, avg loss: 2.843404, ppl: 17.174124 +epoch: 1, batch: 37375, sum loss: 4707.209473, avg loss: 2.631196, ppl: 13.890369 +epoch: 1, batch: 37376, sum loss: 5020.383301, avg loss: 2.873717, ppl: 17.702694 +epoch: 1, batch: 37377, sum loss: 5358.162109, avg loss: 2.853122, ppl: 17.341831 +epoch: 1, batch: 37378, sum loss: 3829.847412, avg loss: 2.431649, ppl: 11.377628 +epoch: 1, batch: 37379, sum loss: 3875.716553, avg loss: 2.478080, ppl: 11.918354 +epoch: 1, batch: 37380, sum loss: 4291.678223, avg loss: 2.599442, ppl: 13.456224 +epoch: 1, batch: 37381, sum loss: 4114.746582, avg loss: 2.624201, ppl: 13.793543 +epoch: 1, batch: 37382, sum loss: 3970.117676, avg loss: 2.446160, ppl: 11.543931 +epoch: 1, batch: 37383, sum loss: 3741.784424, avg loss: 2.423436, ppl: 11.284563 +epoch: 1, batch: 37384, sum loss: 4534.474609, avg loss: 2.432658, ppl: 11.389113 +epoch: 1, batch: 37385, sum loss: 4504.659668, avg loss: 2.662328, ppl: 14.329617 +epoch: 1, batch: 37386, sum loss: 4439.872070, avg loss: 2.463858, ppl: 11.750057 +epoch: 1, batch: 37387, sum loss: 4808.323730, avg loss: 2.753908, ppl: 15.703889 +epoch: 1, batch: 37388, sum loss: 3917.668701, avg loss: 2.326407, ppl: 10.241076 +epoch: 1, batch: 37389, sum loss: 4201.200195, avg loss: 2.703475, ppl: 14.931529 +epoch: 1, batch: 37390, sum loss: 4386.742188, avg loss: 2.589576, ppl: 13.324124 +epoch: 1, batch: 37391, sum loss: 4081.696045, avg loss: 2.543113, ppl: 12.719201 +epoch: 1, batch: 37392, sum loss: 5107.860840, avg loss: 2.675674, ppl: 14.522130 +epoch: 1, batch: 37393, sum loss: 5656.399414, avg loss: 2.747159, ppl: 15.598247 +epoch: 1, batch: 37394, sum loss: 3327.652100, avg loss: 2.328658, ppl: 10.264156 +epoch: 1, batch: 37395, sum loss: 3853.924072, avg loss: 2.528821, ppl: 12.538720 +epoch: 1, batch: 37396, sum loss: 4757.449219, avg loss: 2.599699, ppl: 13.459686 +epoch: 1, batch: 37397, sum loss: 4026.582520, avg loss: 2.543640, ppl: 12.725911 +epoch: 1, batch: 37398, sum loss: 5687.071289, avg loss: 2.793257, ppl: 16.334137 +epoch: 1, batch: 37399, sum loss: 4452.152832, avg loss: 2.665960, ppl: 14.381747 +epoch: 1, batch: 37400, sum loss: 4227.808594, avg loss: 2.582657, ppl: 13.232244 +epoch: 1, batch: 37401, sum loss: 4825.041504, avg loss: 2.668718, ppl: 14.421464 +epoch: 1, batch: 37402, sum loss: 3847.643066, avg loss: 2.698207, ppl: 14.853075 +epoch: 1, batch: 37403, sum loss: 4889.274414, avg loss: 2.870977, ppl: 17.654264 +epoch: 1, batch: 37404, sum loss: 4036.041016, avg loss: 2.444604, ppl: 11.525984 +epoch: 1, batch: 37405, sum loss: 3554.092285, avg loss: 2.327500, ppl: 10.252277 +epoch: 1, batch: 37406, sum loss: 4392.260742, avg loss: 2.626950, ppl: 13.831523 +epoch: 1, batch: 37407, sum loss: 4214.125000, avg loss: 2.590120, ppl: 13.331369 +epoch: 1, batch: 37408, sum loss: 4087.934570, avg loss: 2.532797, ppl: 12.588669 +epoch: 1, batch: 37409, sum loss: 4171.665039, avg loss: 2.553039, ppl: 12.846079 +epoch: 1, batch: 37410, sum loss: 4348.066895, avg loss: 2.439993, ppl: 11.472957 +epoch: 1, batch: 37411, sum loss: 4451.240234, avg loss: 2.601543, ppl: 13.484531 +epoch: 1, batch: 37412, sum loss: 4169.095703, avg loss: 2.443784, ppl: 11.516537 +epoch: 1, batch: 37413, sum loss: 4547.081055, avg loss: 2.484744, ppl: 11.998046 +epoch: 1, batch: 37414, sum loss: 4728.252441, avg loss: 2.596514, ppl: 13.416888 +epoch: 1, batch: 37415, sum loss: 4121.124023, avg loss: 2.519025, ppl: 12.416480 +epoch: 1, batch: 37416, sum loss: 3867.286865, avg loss: 2.602481, ppl: 13.497185 +epoch: 1, batch: 37417, sum loss: 4307.175293, avg loss: 2.683598, ppl: 14.637669 +epoch: 1, batch: 37418, sum loss: 4308.715820, avg loss: 2.432928, ppl: 11.392191 +epoch: 1, batch: 37419, sum loss: 4507.906738, avg loss: 2.509970, ppl: 12.304566 +epoch: 1, batch: 37420, sum loss: 3327.344727, avg loss: 2.484947, ppl: 12.000489 +epoch: 1, batch: 37421, sum loss: 3708.041992, avg loss: 2.476982, ppl: 11.905278 +epoch: 1, batch: 37422, sum loss: 4720.723633, avg loss: 2.711501, ppl: 15.051857 +epoch: 1, batch: 37423, sum loss: 5281.254883, avg loss: 2.529337, ppl: 12.545182 +epoch: 1, batch: 37424, sum loss: 4539.570312, avg loss: 2.505282, ppl: 12.247008 +epoch: 1, batch: 37425, sum loss: 4912.480957, avg loss: 2.775413, ppl: 16.045254 +epoch: 1, batch: 37426, sum loss: 5097.037598, avg loss: 2.894399, ppl: 18.072643 +epoch: 1, batch: 37427, sum loss: 5190.399902, avg loss: 3.155258, ppl: 23.459093 +epoch: 1, batch: 37428, sum loss: 4635.642578, avg loss: 2.524860, ppl: 12.489142 +epoch: 1, batch: 37429, sum loss: 5210.927246, avg loss: 2.890143, ppl: 17.995876 +epoch: 1, batch: 37430, sum loss: 3327.315430, avg loss: 2.428698, ppl: 11.344098 +epoch: 1, batch: 37431, sum loss: 4535.360840, avg loss: 2.629195, ppl: 13.862602 +epoch: 1, batch: 37432, sum loss: 3528.405273, avg loss: 2.374432, ppl: 10.744905 +epoch: 1, batch: 37433, sum loss: 4576.050781, avg loss: 2.637493, ppl: 13.978122 +epoch: 1, batch: 37434, sum loss: 4645.916992, avg loss: 2.563972, ppl: 12.987297 +epoch: 1, batch: 37435, sum loss: 4391.752930, avg loss: 2.544469, ppl: 12.736459 +epoch: 1, batch: 37436, sum loss: 4695.577148, avg loss: 2.599987, ppl: 13.463567 +epoch: 1, batch: 37437, sum loss: 4804.948730, avg loss: 2.710067, ppl: 15.030283 +epoch: 1, batch: 37438, sum loss: 4109.430664, avg loss: 2.597617, ppl: 13.431694 +epoch: 1, batch: 37439, sum loss: 3970.360352, avg loss: 2.596704, ppl: 13.419435 +epoch: 1, batch: 37440, sum loss: 3838.688232, avg loss: 2.471789, ppl: 11.843617 +epoch: 1, batch: 37441, sum loss: 4583.589844, avg loss: 2.370005, ppl: 10.697447 +epoch: 1, batch: 37442, sum loss: 4540.195801, avg loss: 2.686506, ppl: 14.680297 +epoch: 1, batch: 37443, sum loss: 3468.406982, avg loss: 2.268415, ppl: 9.664076 +epoch: 1, batch: 37444, sum loss: 4563.064453, avg loss: 2.620944, ppl: 13.748703 +epoch: 1, batch: 37445, sum loss: 4905.416016, avg loss: 2.832226, ppl: 16.983229 +epoch: 1, batch: 37446, sum loss: 4166.143555, avg loss: 2.581254, ppl: 13.213695 +epoch: 1, batch: 37447, sum loss: 3466.293457, avg loss: 2.435905, ppl: 11.426160 +epoch: 1, batch: 37448, sum loss: 4247.156738, avg loss: 2.534103, ppl: 12.605121 +epoch: 1, batch: 37449, sum loss: 4312.966309, avg loss: 2.490165, ppl: 12.063270 +epoch: 1, batch: 37450, sum loss: 4191.554688, avg loss: 2.393806, ppl: 10.955112 +epoch: 1, batch: 37451, sum loss: 4312.799805, avg loss: 2.688778, ppl: 14.713684 +epoch: 1, batch: 37452, sum loss: 3923.287598, avg loss: 2.363426, ppl: 10.627300 +epoch: 1, batch: 37453, sum loss: 3221.945801, avg loss: 2.321286, ppl: 10.188773 +epoch: 1, batch: 37454, sum loss: 4782.325195, avg loss: 2.510407, ppl: 12.309938 +epoch: 1, batch: 37455, sum loss: 3876.376953, avg loss: 2.339395, ppl: 10.374953 +epoch: 1, batch: 37456, sum loss: 4321.730469, avg loss: 2.709549, ppl: 15.022498 +epoch: 1, batch: 37457, sum loss: 5719.339355, avg loss: 3.128742, ppl: 22.845211 +epoch: 1, batch: 37458, sum loss: 3998.145752, avg loss: 2.710607, ppl: 15.038405 +epoch: 1, batch: 37459, sum loss: 4026.790771, avg loss: 2.437525, ppl: 11.444677 +epoch: 1, batch: 37460, sum loss: 4861.202148, avg loss: 2.837830, ppl: 17.078657 +epoch: 1, batch: 37461, sum loss: 3943.039551, avg loss: 2.287146, ppl: 9.846796 +epoch: 1, batch: 37462, sum loss: 5094.699219, avg loss: 2.831962, ppl: 16.978735 +epoch: 1, batch: 37463, sum loss: 5093.098633, avg loss: 2.733816, ppl: 15.391504 +epoch: 1, batch: 37464, sum loss: 4032.961670, avg loss: 2.501837, ppl: 12.204897 +epoch: 1, batch: 37465, sum loss: 4560.713867, avg loss: 2.625627, ppl: 13.813233 +epoch: 1, batch: 37466, sum loss: 5605.366699, avg loss: 2.634101, ppl: 13.930782 +epoch: 1, batch: 37467, sum loss: 4167.795898, avg loss: 2.239547, ppl: 9.389072 +epoch: 1, batch: 37468, sum loss: 4020.987793, avg loss: 2.387760, ppl: 10.889077 +epoch: 1, batch: 37469, sum loss: 4070.822266, avg loss: 2.330179, ppl: 10.279778 +epoch: 1, batch: 37470, sum loss: 4793.355957, avg loss: 2.635160, ppl: 13.945543 +epoch: 1, batch: 37471, sum loss: 4558.802246, avg loss: 2.566893, ppl: 13.025293 +epoch: 1, batch: 37472, sum loss: 4140.993652, avg loss: 2.466345, ppl: 11.779316 +epoch: 1, batch: 37473, sum loss: 5240.957520, avg loss: 2.664442, ppl: 14.359936 +epoch: 1, batch: 37474, sum loss: 4874.784668, avg loss: 2.690279, ppl: 14.735780 +epoch: 1, batch: 37475, sum loss: 4118.165527, avg loss: 2.596574, ppl: 13.417685 +epoch: 1, batch: 37476, sum loss: 5794.365723, avg loss: 2.747447, ppl: 15.602747 +epoch: 1, batch: 37477, sum loss: 5255.214844, avg loss: 2.782009, ppl: 16.151434 +epoch: 1, batch: 37478, sum loss: 4034.752441, avg loss: 2.515432, ppl: 12.371951 +epoch: 1, batch: 37479, sum loss: 4069.670898, avg loss: 2.498263, ppl: 12.161356 +epoch: 1, batch: 37480, sum loss: 4283.871582, avg loss: 2.519925, ppl: 12.427660 +epoch: 1, batch: 37481, sum loss: 4215.840332, avg loss: 2.357852, ppl: 10.568232 +epoch: 1, batch: 37482, sum loss: 4822.159180, avg loss: 2.849976, ppl: 17.287363 +epoch: 1, batch: 37483, sum loss: 4640.602539, avg loss: 2.797229, ppl: 16.399139 +epoch: 1, batch: 37484, sum loss: 4025.177734, avg loss: 2.477032, ppl: 11.905880 +epoch: 1, batch: 37485, sum loss: 5246.230469, avg loss: 2.789065, ppl: 16.265799 +epoch: 1, batch: 37486, sum loss: 4794.959473, avg loss: 2.969015, ppl: 19.472733 +epoch: 1, batch: 37487, sum loss: 4036.219238, avg loss: 2.627747, ppl: 13.842548 +epoch: 1, batch: 37488, sum loss: 3955.807129, avg loss: 2.303906, ppl: 10.013222 +epoch: 1, batch: 37489, sum loss: 3395.346924, avg loss: 2.537628, ppl: 12.649635 +epoch: 1, batch: 37490, sum loss: 5467.643555, avg loss: 2.774045, ppl: 16.023325 +epoch: 1, batch: 37491, sum loss: 4993.879395, avg loss: 2.815039, ppl: 16.693825 +epoch: 1, batch: 37492, sum loss: 3827.836182, avg loss: 2.519971, ppl: 12.428238 +epoch: 1, batch: 37493, sum loss: 4888.472168, avg loss: 2.897731, ppl: 18.132956 +epoch: 1, batch: 37494, sum loss: 3530.135742, avg loss: 2.237095, ppl: 9.366082 +epoch: 1, batch: 37495, sum loss: 5076.600098, avg loss: 2.606058, ppl: 13.545544 +epoch: 1, batch: 37496, sum loss: 4089.782959, avg loss: 2.383323, ppl: 10.840872 +epoch: 1, batch: 37497, sum loss: 4459.451172, avg loss: 2.506718, ppl: 12.264610 +epoch: 1, batch: 37498, sum loss: 4778.742188, avg loss: 2.735399, ppl: 15.415894 +epoch: 1, batch: 37499, sum loss: 5141.979492, avg loss: 2.697786, ppl: 14.846819 +epoch: 1, batch: 37500, sum loss: 5767.841797, avg loss: 3.045323, ppl: 21.016821 +epoch: 1, batch: 37501, sum loss: 5294.974609, avg loss: 2.566638, ppl: 13.021974 +epoch: 1, batch: 37502, sum loss: 4436.671387, avg loss: 2.619050, ppl: 13.722685 +epoch: 1, batch: 37503, sum loss: 4014.733643, avg loss: 2.496725, ppl: 12.142659 +epoch: 1, batch: 37504, sum loss: 4158.612305, avg loss: 2.462174, ppl: 11.730290 +epoch: 1, batch: 37505, sum loss: 4086.854736, avg loss: 2.488949, ppl: 12.048610 +epoch: 1, batch: 37506, sum loss: 5218.311523, avg loss: 2.887831, ppl: 17.954332 +epoch: 1, batch: 37507, sum loss: 5320.510254, avg loss: 2.875952, ppl: 17.742298 +epoch: 1, batch: 37508, sum loss: 4262.616211, avg loss: 2.409619, ppl: 11.129724 +epoch: 1, batch: 37509, sum loss: 4823.032227, avg loss: 2.530447, ppl: 12.559119 +epoch: 1, batch: 37510, sum loss: 6503.401367, avg loss: 2.984581, ppl: 19.778208 +epoch: 1, batch: 37511, sum loss: 3900.307373, avg loss: 2.401667, ppl: 11.041569 +epoch: 1, batch: 37512, sum loss: 3710.007324, avg loss: 2.627484, ppl: 13.838905 +epoch: 1, batch: 37513, sum loss: 4360.984375, avg loss: 2.556263, ppl: 12.887563 +epoch: 1, batch: 37514, sum loss: 4680.794434, avg loss: 2.898325, ppl: 18.143724 +epoch: 1, batch: 37515, sum loss: 3779.809570, avg loss: 2.470464, ppl: 11.827930 +epoch: 1, batch: 37516, sum loss: 4587.581055, avg loss: 2.461149, ppl: 11.718265 +epoch: 1, batch: 37517, sum loss: 5469.472656, avg loss: 2.970925, ppl: 19.509954 +epoch: 1, batch: 37518, sum loss: 4620.853516, avg loss: 2.519549, ppl: 12.422995 +epoch: 1, batch: 37519, sum loss: 4834.379883, avg loss: 2.638854, ppl: 13.997150 +epoch: 1, batch: 37520, sum loss: 4560.623047, avg loss: 2.420713, ppl: 11.253880 +epoch: 1, batch: 37521, sum loss: 4925.241211, avg loss: 2.951013, ppl: 19.125324 +epoch: 1, batch: 37522, sum loss: 4483.274414, avg loss: 2.735372, ppl: 15.415474 +epoch: 1, batch: 37523, sum loss: 4740.057617, avg loss: 2.670455, ppl: 14.446537 +epoch: 1, batch: 37524, sum loss: 5229.521973, avg loss: 2.793548, ppl: 16.338890 +epoch: 1, batch: 37525, sum loss: 3668.365967, avg loss: 2.314426, ppl: 10.119117 +epoch: 1, batch: 37526, sum loss: 5192.005371, avg loss: 2.655757, ppl: 14.235761 +epoch: 1, batch: 37527, sum loss: 4076.844238, avg loss: 2.792359, ppl: 16.319473 +epoch: 1, batch: 37528, sum loss: 3060.423828, avg loss: 2.247007, ppl: 9.459383 +epoch: 1, batch: 37529, sum loss: 4259.705078, avg loss: 2.672337, ppl: 14.473752 +epoch: 1, batch: 37530, sum loss: 5571.793945, avg loss: 2.785897, ppl: 16.214355 +epoch: 1, batch: 37531, sum loss: 4177.539062, avg loss: 2.473380, ppl: 11.862475 +epoch: 1, batch: 37532, sum loss: 5096.878906, avg loss: 2.580698, ppl: 13.206356 +epoch: 1, batch: 37533, sum loss: 4367.143555, avg loss: 2.674307, ppl: 14.502297 +epoch: 1, batch: 37534, sum loss: 4050.761963, avg loss: 2.398320, ppl: 11.004670 +epoch: 1, batch: 37535, sum loss: 4565.517090, avg loss: 2.625369, ppl: 13.809673 +epoch: 1, batch: 37536, sum loss: 4871.628418, avg loss: 2.730734, ppl: 15.344140 +epoch: 1, batch: 37537, sum loss: 3697.513184, avg loss: 2.293743, ppl: 9.911965 +epoch: 1, batch: 37538, sum loss: 4290.657715, avg loss: 2.516515, ppl: 12.385356 +epoch: 1, batch: 37539, sum loss: 4443.861328, avg loss: 2.694883, ppl: 14.803782 +epoch: 1, batch: 37540, sum loss: 5055.449707, avg loss: 2.607246, ppl: 13.561649 +epoch: 1, batch: 37541, sum loss: 4838.541992, avg loss: 2.695567, ppl: 14.813910 +epoch: 1, batch: 37542, sum loss: 3934.747559, avg loss: 2.553373, ppl: 12.850373 +epoch: 1, batch: 37543, sum loss: 4573.416504, avg loss: 2.545029, ppl: 12.743593 +epoch: 1, batch: 37544, sum loss: 4038.899658, avg loss: 2.402677, ppl: 11.052723 +epoch: 1, batch: 37545, sum loss: 4124.095215, avg loss: 2.363378, ppl: 10.626791 +epoch: 1, batch: 37546, sum loss: 5130.765625, avg loss: 2.719007, ppl: 15.165249 +epoch: 1, batch: 37547, sum loss: 3928.163086, avg loss: 2.503609, ppl: 12.226545 +epoch: 1, batch: 37548, sum loss: 4180.975586, avg loss: 2.308656, ppl: 10.060893 +epoch: 1, batch: 37549, sum loss: 4012.310059, avg loss: 2.533024, ppl: 12.591526 +epoch: 1, batch: 37550, sum loss: 3917.453125, avg loss: 2.525759, ppl: 12.500385 +epoch: 1, batch: 37551, sum loss: 4207.799805, avg loss: 2.509123, ppl: 12.294147 +epoch: 1, batch: 37552, sum loss: 4424.683105, avg loss: 2.573987, ppl: 13.118019 +epoch: 1, batch: 37553, sum loss: 4000.743652, avg loss: 2.414450, ppl: 11.183617 +epoch: 1, batch: 37554, sum loss: 3797.328613, avg loss: 2.431068, ppl: 11.371022 +epoch: 1, batch: 37555, sum loss: 4220.031738, avg loss: 2.650774, ppl: 14.164995 +epoch: 1, batch: 37556, sum loss: 3660.868896, avg loss: 2.437329, ppl: 11.442440 +epoch: 1, batch: 37557, sum loss: 4668.145020, avg loss: 2.739522, ppl: 15.479580 +epoch: 1, batch: 37558, sum loss: 3847.354248, avg loss: 2.253869, ppl: 9.524515 +epoch: 1, batch: 37559, sum loss: 4139.822754, avg loss: 2.456868, ppl: 11.668211 +epoch: 1, batch: 37560, sum loss: 4057.318848, avg loss: 2.495276, ppl: 12.125079 +epoch: 1, batch: 37561, sum loss: 5083.936523, avg loss: 2.665934, ppl: 14.381380 +epoch: 1, batch: 37562, sum loss: 3671.700928, avg loss: 2.215873, ppl: 9.169406 +epoch: 1, batch: 37563, sum loss: 4373.479980, avg loss: 2.510608, ppl: 12.312419 +epoch: 1, batch: 37564, sum loss: 3723.394043, avg loss: 2.583896, ppl: 13.248656 +epoch: 1, batch: 37565, sum loss: 4520.485352, avg loss: 2.580186, ppl: 13.199592 +epoch: 1, batch: 37566, sum loss: 4421.303223, avg loss: 2.529350, ppl: 12.545346 +epoch: 1, batch: 37567, sum loss: 4723.363281, avg loss: 2.750940, ppl: 15.657337 +epoch: 1, batch: 37568, sum loss: 4437.277344, avg loss: 2.718920, ppl: 15.163937 +epoch: 1, batch: 37569, sum loss: 4735.507324, avg loss: 2.496314, ppl: 12.137670 +epoch: 1, batch: 37570, sum loss: 4361.408203, avg loss: 2.580715, ppl: 13.206576 +epoch: 1, batch: 37571, sum loss: 5223.044434, avg loss: 2.703439, ppl: 14.930995 +epoch: 1, batch: 37572, sum loss: 4592.888184, avg loss: 2.591923, ppl: 13.355435 +epoch: 1, batch: 37573, sum loss: 4103.727051, avg loss: 2.640751, ppl: 14.023733 +epoch: 1, batch: 37574, sum loss: 3802.512451, avg loss: 2.360343, ppl: 10.594584 +epoch: 1, batch: 37575, sum loss: 4765.866699, avg loss: 2.688024, ppl: 14.702599 +epoch: 1, batch: 37576, sum loss: 4808.092773, avg loss: 2.701176, ppl: 14.897236 +epoch: 1, batch: 37577, sum loss: 4817.375000, avg loss: 2.673349, ppl: 14.488412 +epoch: 1, batch: 37578, sum loss: 4562.051758, avg loss: 2.726869, ppl: 15.284956 +epoch: 1, batch: 37579, sum loss: 4427.096680, avg loss: 2.319066, ppl: 10.166173 +epoch: 1, batch: 37580, sum loss: 4846.288086, avg loss: 2.613963, ppl: 13.653056 +epoch: 1, batch: 37581, sum loss: 4160.233398, avg loss: 2.309958, ppl: 10.073997 +epoch: 1, batch: 37582, sum loss: 3758.612061, avg loss: 2.590360, ppl: 13.334567 +epoch: 1, batch: 37583, sum loss: 4888.713867, avg loss: 2.570302, ppl: 13.069768 +epoch: 1, batch: 37584, sum loss: 4377.613770, avg loss: 2.632360, ppl: 13.906544 +epoch: 1, batch: 37585, sum loss: 4192.573242, avg loss: 2.643489, ppl: 14.062180 +epoch: 1, batch: 37586, sum loss: 3620.630371, avg loss: 2.385132, ppl: 10.860497 +epoch: 1, batch: 37587, sum loss: 4171.425781, avg loss: 2.532742, ppl: 12.587973 +epoch: 1, batch: 37588, sum loss: 3960.956543, avg loss: 2.660146, ppl: 14.298369 +epoch: 1, batch: 37589, sum loss: 3845.616699, avg loss: 2.458834, ppl: 11.691174 +epoch: 1, batch: 37590, sum loss: 4070.995605, avg loss: 2.450931, ppl: 11.599135 +epoch: 1, batch: 37591, sum loss: 4458.685547, avg loss: 2.649249, ppl: 14.143407 +epoch: 1, batch: 37592, sum loss: 4152.150391, avg loss: 2.436708, ppl: 11.435333 +epoch: 1, batch: 37593, sum loss: 3960.865967, avg loss: 2.553750, ppl: 12.855218 +epoch: 1, batch: 37594, sum loss: 3987.771729, avg loss: 2.372262, ppl: 10.721612 +epoch: 1, batch: 37595, sum loss: 4261.252441, avg loss: 2.328553, ppl: 10.263084 +epoch: 1, batch: 37596, sum loss: 3719.612305, avg loss: 2.447114, ppl: 11.554945 +epoch: 1, batch: 37597, sum loss: 4267.907715, avg loss: 2.523896, ppl: 12.477113 +epoch: 1, batch: 37598, sum loss: 4712.637207, avg loss: 2.747893, ppl: 15.609713 +epoch: 1, batch: 37599, sum loss: 4626.087402, avg loss: 2.710069, ppl: 15.030308 +epoch: 1, batch: 37600, sum loss: 5222.347168, avg loss: 2.842867, ppl: 17.164913 +epoch: 1, batch: 37601, sum loss: 4341.339355, avg loss: 2.775792, ppl: 16.051340 +epoch: 1, batch: 37602, sum loss: 4784.164551, avg loss: 2.794489, ppl: 16.354263 +epoch: 1, batch: 37603, sum loss: 5137.678223, avg loss: 2.790700, ppl: 16.292416 +epoch: 1, batch: 37604, sum loss: 4734.379883, avg loss: 2.803067, ppl: 16.495159 +epoch: 1, batch: 37605, sum loss: 4328.042480, avg loss: 2.495987, ppl: 12.133709 +epoch: 1, batch: 37606, sum loss: 4483.851562, avg loss: 2.456905, ppl: 11.668640 +epoch: 1, batch: 37607, sum loss: 4963.127930, avg loss: 2.641367, ppl: 14.032369 +epoch: 1, batch: 37608, sum loss: 4015.073486, avg loss: 2.449709, ppl: 11.584977 +epoch: 1, batch: 37609, sum loss: 4309.110352, avg loss: 2.849941, ppl: 17.286758 +epoch: 1, batch: 37610, sum loss: 4486.562500, avg loss: 2.603925, ppl: 13.516684 +epoch: 1, batch: 37611, sum loss: 4406.367676, avg loss: 2.613504, ppl: 13.646785 +epoch: 1, batch: 37612, sum loss: 5247.077148, avg loss: 2.776231, ppl: 16.058388 +epoch: 1, batch: 37613, sum loss: 5229.250000, avg loss: 2.765336, ppl: 15.884373 +epoch: 1, batch: 37614, sum loss: 4253.259766, avg loss: 2.573055, ppl: 13.105799 +epoch: 1, batch: 37615, sum loss: 4331.081055, avg loss: 2.573429, ppl: 13.110703 +epoch: 1, batch: 37616, sum loss: 4711.394531, avg loss: 2.543949, ppl: 12.729847 +epoch: 1, batch: 37617, sum loss: 3940.997559, avg loss: 2.432715, ppl: 11.389760 +epoch: 1, batch: 37618, sum loss: 4469.006836, avg loss: 2.633475, ppl: 13.922063 +epoch: 1, batch: 37619, sum loss: 3985.715820, avg loss: 2.541911, ppl: 12.703920 +epoch: 1, batch: 37620, sum loss: 4302.142578, avg loss: 2.677127, ppl: 14.543249 +epoch: 1, batch: 37621, sum loss: 4199.353516, avg loss: 2.458638, ppl: 11.688877 +epoch: 1, batch: 37622, sum loss: 4090.776123, avg loss: 2.465808, ppl: 11.772996 +epoch: 1, batch: 37623, sum loss: 4846.240234, avg loss: 2.643884, ppl: 14.067742 +epoch: 1, batch: 37624, sum loss: 3216.403809, avg loss: 2.218210, ppl: 9.190860 +epoch: 1, batch: 37625, sum loss: 4872.599609, avg loss: 2.800345, ppl: 16.450315 +epoch: 1, batch: 37626, sum loss: 5980.733398, avg loss: 2.797349, ppl: 16.401117 +epoch: 1, batch: 37627, sum loss: 5455.936523, avg loss: 2.793618, ppl: 16.340034 +epoch: 1, batch: 37628, sum loss: 4806.021973, avg loss: 2.704571, ppl: 14.947896 +epoch: 1, batch: 37629, sum loss: 4223.355469, avg loss: 2.513902, ppl: 12.353040 +epoch: 1, batch: 37630, sum loss: 4638.976074, avg loss: 2.634285, ppl: 13.933346 +epoch: 1, batch: 37631, sum loss: 4490.788086, avg loss: 2.617009, ppl: 13.694707 +epoch: 1, batch: 37632, sum loss: 3822.253662, avg loss: 2.524606, ppl: 12.485978 +epoch: 1, batch: 37633, sum loss: 4345.118652, avg loss: 2.551450, ppl: 12.825682 +epoch: 1, batch: 37634, sum loss: 4973.577148, avg loss: 2.741774, ppl: 15.514478 +epoch: 1, batch: 37635, sum loss: 4661.414062, avg loss: 2.689795, ppl: 14.728649 +epoch: 1, batch: 37636, sum loss: 4060.572510, avg loss: 2.576505, ppl: 13.151100 +epoch: 1, batch: 37637, sum loss: 4377.610840, avg loss: 2.552543, ppl: 12.839709 +epoch: 1, batch: 37638, sum loss: 4919.602539, avg loss: 2.602964, ppl: 13.503709 +epoch: 1, batch: 37639, sum loss: 4564.282715, avg loss: 3.010741, ppl: 20.302433 +epoch: 1, batch: 37640, sum loss: 4453.125000, avg loss: 2.262767, ppl: 9.609641 +epoch: 1, batch: 37641, sum loss: 3391.484619, avg loss: 2.310276, ppl: 10.077201 +epoch: 1, batch: 37642, sum loss: 3958.782471, avg loss: 2.524734, ppl: 12.487571 +epoch: 1, batch: 37643, sum loss: 4768.903809, avg loss: 2.610237, ppl: 13.602279 +epoch: 1, batch: 37644, sum loss: 4585.635254, avg loss: 2.715000, ppl: 15.104612 +epoch: 1, batch: 37645, sum loss: 3414.328125, avg loss: 2.340184, ppl: 10.383144 +epoch: 1, batch: 37646, sum loss: 5405.998047, avg loss: 2.756756, ppl: 15.748669 +epoch: 1, batch: 37647, sum loss: 4696.508789, avg loss: 2.479677, ppl: 11.937413 +epoch: 1, batch: 37648, sum loss: 3885.332275, avg loss: 2.413250, ppl: 11.170202 +epoch: 1, batch: 37649, sum loss: 3350.187500, avg loss: 2.315264, ppl: 10.127601 +epoch: 1, batch: 37650, sum loss: 3419.510986, avg loss: 2.399657, ppl: 11.019394 +epoch: 1, batch: 37651, sum loss: 3956.417969, avg loss: 2.582518, ppl: 13.230415 +epoch: 1, batch: 37652, sum loss: 4940.885742, avg loss: 2.542916, ppl: 12.716700 +epoch: 1, batch: 37653, sum loss: 5290.457031, avg loss: 2.940777, ppl: 18.930542 +epoch: 1, batch: 37654, sum loss: 4521.836914, avg loss: 2.911679, ppl: 18.387642 +epoch: 1, batch: 37655, sum loss: 3982.515625, avg loss: 2.393339, ppl: 10.949994 +epoch: 1, batch: 37656, sum loss: 5118.420410, avg loss: 2.747408, ppl: 15.602134 +epoch: 1, batch: 37657, sum loss: 4371.226562, avg loss: 2.594200, ppl: 13.385870 +epoch: 1, batch: 37658, sum loss: 4292.706055, avg loss: 2.441812, ppl: 11.493852 +epoch: 1, batch: 37659, sum loss: 4854.000000, avg loss: 2.698166, ppl: 14.852462 +epoch: 1, batch: 37660, sum loss: 3302.540527, avg loss: 2.206106, ppl: 9.080286 +epoch: 1, batch: 37661, sum loss: 4498.957520, avg loss: 2.761791, ppl: 15.828166 +epoch: 1, batch: 37662, sum loss: 4455.210938, avg loss: 2.490336, ppl: 12.065329 +epoch: 1, batch: 37663, sum loss: 3239.664062, avg loss: 2.403311, ppl: 11.059740 +epoch: 1, batch: 37664, sum loss: 4588.630371, avg loss: 2.605696, ppl: 13.540643 +epoch: 1, batch: 37665, sum loss: 4332.454102, avg loss: 2.442195, ppl: 11.498251 +epoch: 1, batch: 37666, sum loss: 3615.846924, avg loss: 2.491969, ppl: 12.085047 +epoch: 1, batch: 37667, sum loss: 4935.678711, avg loss: 2.779099, ppl: 16.104496 +epoch: 1, batch: 37668, sum loss: 3166.150635, avg loss: 2.329765, ppl: 10.275527 +epoch: 1, batch: 37669, sum loss: 3746.004150, avg loss: 2.244460, ppl: 9.435323 +epoch: 1, batch: 37670, sum loss: 5395.064453, avg loss: 2.938488, ppl: 18.887272 +epoch: 1, batch: 37671, sum loss: 4360.504395, avg loss: 2.718519, ppl: 15.157856 +epoch: 1, batch: 37672, sum loss: 3400.231934, avg loss: 2.381115, ppl: 10.816957 +epoch: 1, batch: 37673, sum loss: 4120.229492, avg loss: 2.485060, ppl: 12.001840 +epoch: 1, batch: 37674, sum loss: 3805.683594, avg loss: 2.441106, ppl: 11.485732 +epoch: 1, batch: 37675, sum loss: 4294.827637, avg loss: 2.434710, ppl: 11.412503 +epoch: 1, batch: 37676, sum loss: 3827.022705, avg loss: 2.527756, ppl: 12.525371 +epoch: 1, batch: 37677, sum loss: 4493.201172, avg loss: 2.603245, ppl: 13.507499 +epoch: 1, batch: 37678, sum loss: 4270.356445, avg loss: 2.713060, ppl: 15.075334 +epoch: 1, batch: 37679, sum loss: 3392.484375, avg loss: 2.254143, ppl: 9.527121 +epoch: 1, batch: 37680, sum loss: 5115.476562, avg loss: 2.634128, ppl: 13.931161 +epoch: 1, batch: 37681, sum loss: 5339.908691, avg loss: 2.797228, ppl: 16.399132 +epoch: 1, batch: 37682, sum loss: 3422.567627, avg loss: 2.368559, ppl: 10.681987 +epoch: 1, batch: 37683, sum loss: 4560.541992, avg loss: 2.734138, ppl: 15.396466 +epoch: 1, batch: 37684, sum loss: 3482.576172, avg loss: 2.286655, ppl: 9.841966 +epoch: 1, batch: 37685, sum loss: 4246.565918, avg loss: 2.848133, ppl: 17.255533 +epoch: 1, batch: 37686, sum loss: 3651.711426, avg loss: 2.464043, ppl: 11.752229 +epoch: 1, batch: 37687, sum loss: 3740.843750, avg loss: 2.702922, ppl: 14.923268 +epoch: 1, batch: 37688, sum loss: 3870.772949, avg loss: 2.401224, ppl: 11.036675 +epoch: 1, batch: 37689, sum loss: 4236.175293, avg loss: 2.570495, ppl: 13.072289 +epoch: 1, batch: 37690, sum loss: 4090.071777, avg loss: 2.565917, ppl: 13.012586 +epoch: 1, batch: 37691, sum loss: 5112.809082, avg loss: 2.679669, ppl: 14.580269 +epoch: 1, batch: 37692, sum loss: 4408.778320, avg loss: 2.641569, ppl: 14.035206 +epoch: 1, batch: 37693, sum loss: 4421.642578, avg loss: 2.633498, ppl: 13.922381 +epoch: 1, batch: 37694, sum loss: 4435.549805, avg loss: 2.694745, ppl: 14.801742 +epoch: 1, batch: 37695, sum loss: 4828.848633, avg loss: 2.799333, ppl: 16.433676 +epoch: 1, batch: 37696, sum loss: 4112.445312, avg loss: 2.493902, ppl: 12.108437 +epoch: 1, batch: 37697, sum loss: 4407.929688, avg loss: 2.674715, ppl: 14.508208 +epoch: 1, batch: 37698, sum loss: 3659.516113, avg loss: 2.420315, ppl: 11.249403 +epoch: 1, batch: 37699, sum loss: 5495.613281, avg loss: 2.782589, ppl: 16.160807 +epoch: 1, batch: 37700, sum loss: 3642.136475, avg loss: 2.693888, ppl: 14.789063 +epoch: 1, batch: 37701, sum loss: 4713.689941, avg loss: 2.860249, ppl: 17.465872 +epoch: 1, batch: 37702, sum loss: 4418.766602, avg loss: 2.682918, ppl: 14.627719 +epoch: 1, batch: 37703, sum loss: 5019.137695, avg loss: 2.563400, ppl: 12.979877 +epoch: 1, batch: 37704, sum loss: 3506.231201, avg loss: 2.320471, ppl: 10.180464 +epoch: 1, batch: 37705, sum loss: 5045.617676, avg loss: 2.706877, ppl: 14.982405 +epoch: 1, batch: 37706, sum loss: 4770.408203, avg loss: 2.789712, ppl: 16.276339 +epoch: 1, batch: 37707, sum loss: 3875.943848, avg loss: 2.437701, ppl: 11.446689 +epoch: 1, batch: 37708, sum loss: 4920.995605, avg loss: 2.583200, ppl: 13.239433 +epoch: 1, batch: 37709, sum loss: 5339.631836, avg loss: 2.589540, ppl: 13.323645 +epoch: 1, batch: 37710, sum loss: 5366.645020, avg loss: 2.889954, ppl: 17.992483 +epoch: 1, batch: 37711, sum loss: 5022.047852, avg loss: 2.886234, ppl: 17.925680 +epoch: 1, batch: 37712, sum loss: 4916.331055, avg loss: 2.731295, ppl: 15.352757 +epoch: 1, batch: 37713, sum loss: 3891.679688, avg loss: 2.480357, ppl: 11.945524 +epoch: 1, batch: 37714, sum loss: 4963.418457, avg loss: 2.847630, ppl: 17.246851 +epoch: 1, batch: 37715, sum loss: 4800.788086, avg loss: 2.712310, ppl: 15.064027 +epoch: 1, batch: 37716, sum loss: 4151.912598, avg loss: 2.511744, ppl: 12.326406 +epoch: 1, batch: 37717, sum loss: 4308.302734, avg loss: 2.488910, ppl: 12.048133 +epoch: 1, batch: 37718, sum loss: 4511.985840, avg loss: 2.503877, ppl: 12.229814 +epoch: 1, batch: 37719, sum loss: 3656.632080, avg loss: 2.474041, ppl: 11.870315 +epoch: 1, batch: 37720, sum loss: 4807.342773, avg loss: 2.635605, ppl: 13.951745 +epoch: 1, batch: 37721, sum loss: 5723.459473, avg loss: 3.163881, ppl: 23.662241 +epoch: 1, batch: 37722, sum loss: 4466.146973, avg loss: 2.741649, ppl: 15.512550 +epoch: 1, batch: 37723, sum loss: 4732.032227, avg loss: 2.754384, ppl: 15.711364 +epoch: 1, batch: 37724, sum loss: 5097.826660, avg loss: 2.827414, ppl: 16.901690 +epoch: 1, batch: 37725, sum loss: 4080.564453, avg loss: 2.431803, ppl: 11.379375 +epoch: 1, batch: 37726, sum loss: 5155.818359, avg loss: 2.659009, ppl: 14.282128 +epoch: 1, batch: 37727, sum loss: 5217.369629, avg loss: 2.818676, ppl: 16.754656 +epoch: 1, batch: 37728, sum loss: 4868.053711, avg loss: 2.716548, ppl: 15.128010 +epoch: 1, batch: 37729, sum loss: 3806.662598, avg loss: 2.561684, ppl: 12.957622 +epoch: 1, batch: 37730, sum loss: 4174.922363, avg loss: 2.486553, ppl: 12.019772 +epoch: 1, batch: 37731, sum loss: 4797.454590, avg loss: 2.649064, ppl: 14.140798 +epoch: 1, batch: 37732, sum loss: 3635.280762, avg loss: 2.363642, ppl: 10.629589 +epoch: 1, batch: 37733, sum loss: 5380.949707, avg loss: 2.865255, ppl: 17.553535 +epoch: 1, batch: 37734, sum loss: 4690.569336, avg loss: 2.754298, ppl: 15.710009 +epoch: 1, batch: 37735, sum loss: 4332.917480, avg loss: 2.498799, ppl: 12.167870 +epoch: 1, batch: 37736, sum loss: 4294.086426, avg loss: 2.369805, ppl: 10.695305 +epoch: 1, batch: 37737, sum loss: 4124.683594, avg loss: 2.569896, ppl: 13.064468 +epoch: 1, batch: 37738, sum loss: 4687.262695, avg loss: 2.698482, ppl: 14.857159 +epoch: 1, batch: 37739, sum loss: 4727.688965, avg loss: 2.763115, ppl: 15.849132 +epoch: 1, batch: 37740, sum loss: 4572.640625, avg loss: 2.635528, ppl: 13.950674 +epoch: 1, batch: 37741, sum loss: 3831.713379, avg loss: 2.454653, ppl: 11.642393 +epoch: 1, batch: 37742, sum loss: 3981.785156, avg loss: 2.419068, ppl: 11.235379 +epoch: 1, batch: 37743, sum loss: 4774.422363, avg loss: 2.667275, ppl: 14.400673 +epoch: 1, batch: 37744, sum loss: 3823.830811, avg loss: 2.340166, ppl: 10.382958 +epoch: 1, batch: 37745, sum loss: 4622.761230, avg loss: 2.946311, ppl: 19.035593 +epoch: 1, batch: 37746, sum loss: 5128.123047, avg loss: 2.817650, ppl: 16.737473 +epoch: 1, batch: 37747, sum loss: 3697.872803, avg loss: 2.359842, ppl: 10.589281 +epoch: 1, batch: 37748, sum loss: 4801.756836, avg loss: 2.631100, ppl: 13.889035 +epoch: 1, batch: 37749, sum loss: 3734.946289, avg loss: 2.568739, ppl: 13.049358 +epoch: 1, batch: 37750, sum loss: 4312.350098, avg loss: 2.520368, ppl: 12.433175 +epoch: 1, batch: 37751, sum loss: 3880.343018, avg loss: 2.237799, ppl: 9.372677 +epoch: 1, batch: 37752, sum loss: 4319.391113, avg loss: 2.758232, ppl: 15.771932 +epoch: 1, batch: 37753, sum loss: 3180.877441, avg loss: 2.330313, ppl: 10.281161 +epoch: 1, batch: 37754, sum loss: 4623.247070, avg loss: 2.533286, ppl: 12.594826 +epoch: 1, batch: 37755, sum loss: 4116.345215, avg loss: 2.608584, ppl: 13.579804 +epoch: 1, batch: 37756, sum loss: 5395.672363, avg loss: 2.929247, ppl: 18.713528 +epoch: 1, batch: 37757, sum loss: 4436.557129, avg loss: 2.824034, ppl: 16.844666 +epoch: 1, batch: 37758, sum loss: 5142.675293, avg loss: 2.739838, ppl: 15.484471 +epoch: 1, batch: 37759, sum loss: 4374.213867, avg loss: 2.395517, ppl: 10.973871 +epoch: 1, batch: 37760, sum loss: 4516.077148, avg loss: 2.544269, ppl: 12.733912 +epoch: 1, batch: 37761, sum loss: 5134.471191, avg loss: 2.767909, ppl: 15.925301 +epoch: 1, batch: 37762, sum loss: 4636.774414, avg loss: 2.561754, ppl: 12.958527 +epoch: 1, batch: 37763, sum loss: 4534.888672, avg loss: 2.895842, ppl: 18.098736 +epoch: 1, batch: 37764, sum loss: 3991.087402, avg loss: 2.617106, ppl: 13.696035 +epoch: 1, batch: 37765, sum loss: 4350.618164, avg loss: 2.476163, ppl: 11.895533 +epoch: 1, batch: 37766, sum loss: 4055.244629, avg loss: 2.368718, ppl: 10.683683 +epoch: 1, batch: 37767, sum loss: 3743.262695, avg loss: 2.205812, ppl: 9.077620 +epoch: 1, batch: 37768, sum loss: 4580.822266, avg loss: 2.741366, ppl: 15.508153 +epoch: 1, batch: 37769, sum loss: 3920.167969, avg loss: 2.311420, ppl: 10.088737 +epoch: 1, batch: 37770, sum loss: 4952.638184, avg loss: 2.550277, ppl: 12.810655 +epoch: 1, batch: 37771, sum loss: 4704.254883, avg loss: 2.678961, ppl: 14.569944 +epoch: 1, batch: 37772, sum loss: 4036.127441, avg loss: 2.517859, ppl: 12.402009 +epoch: 1, batch: 37773, sum loss: 4522.581055, avg loss: 2.655655, ppl: 14.234312 +epoch: 1, batch: 37774, sum loss: 4458.512695, avg loss: 2.537571, ppl: 12.648911 +epoch: 1, batch: 37775, sum loss: 4227.388184, avg loss: 2.630609, ppl: 13.882218 +epoch: 1, batch: 37776, sum loss: 4104.393555, avg loss: 2.478498, ppl: 11.923347 +epoch: 1, batch: 37777, sum loss: 3529.581543, avg loss: 2.533799, ppl: 12.601284 +epoch: 1, batch: 37778, sum loss: 3987.430664, avg loss: 2.440288, ppl: 11.476346 +epoch: 1, batch: 37779, sum loss: 3730.975098, avg loss: 2.293162, ppl: 9.906215 +epoch: 1, batch: 37780, sum loss: 4418.036133, avg loss: 2.473705, ppl: 11.866325 +epoch: 1, batch: 37781, sum loss: 3840.290527, avg loss: 2.610667, ppl: 13.608118 +epoch: 1, batch: 37782, sum loss: 4323.213867, avg loss: 2.942964, ppl: 18.971989 +epoch: 1, batch: 37783, sum loss: 4290.074707, avg loss: 2.526546, ppl: 12.510219 +epoch: 1, batch: 37784, sum loss: 5677.695312, avg loss: 2.777737, ppl: 16.082590 +epoch: 1, batch: 37785, sum loss: 4747.187500, avg loss: 2.777757, ppl: 16.082912 +epoch: 1, batch: 37786, sum loss: 3691.177246, avg loss: 2.462426, ppl: 11.733247 +epoch: 1, batch: 37787, sum loss: 3814.663818, avg loss: 2.575735, ppl: 13.140977 +epoch: 1, batch: 37788, sum loss: 4226.057617, avg loss: 2.544285, ppl: 12.734120 +epoch: 1, batch: 37789, sum loss: 4155.214844, avg loss: 2.329156, ppl: 10.269275 +epoch: 1, batch: 37790, sum loss: 4414.014160, avg loss: 2.536790, ppl: 12.639030 +epoch: 1, batch: 37791, sum loss: 4221.411621, avg loss: 2.544552, ppl: 12.737518 +epoch: 1, batch: 37792, sum loss: 4315.528320, avg loss: 2.559625, ppl: 12.930972 +epoch: 1, batch: 37793, sum loss: 5637.965820, avg loss: 2.897208, ppl: 18.123468 +epoch: 1, batch: 37794, sum loss: 4927.060547, avg loss: 2.871248, ppl: 17.659035 +epoch: 1, batch: 37795, sum loss: 4897.668457, avg loss: 2.705894, ppl: 14.967695 +epoch: 1, batch: 37796, sum loss: 4637.485352, avg loss: 2.464126, ppl: 11.753206 +epoch: 1, batch: 37797, sum loss: 4395.897461, avg loss: 2.455809, ppl: 11.655855 +epoch: 1, batch: 37798, sum loss: 3866.517090, avg loss: 2.305615, ppl: 10.030350 +epoch: 1, batch: 37799, sum loss: 3927.915527, avg loss: 2.482879, ppl: 11.975698 +epoch: 1, batch: 37800, sum loss: 4367.733398, avg loss: 2.548269, ppl: 12.784957 +epoch: 1, batch: 37801, sum loss: 3680.959473, avg loss: 2.143832, ppl: 8.532070 +epoch: 1, batch: 37802, sum loss: 3799.601318, avg loss: 2.370306, ppl: 10.700671 +epoch: 1, batch: 37803, sum loss: 4336.617188, avg loss: 2.480902, ppl: 11.952045 +epoch: 1, batch: 37804, sum loss: 4296.857422, avg loss: 2.486607, ppl: 12.020426 +epoch: 1, batch: 37805, sum loss: 5261.715332, avg loss: 2.744766, ppl: 15.560965 +epoch: 1, batch: 37806, sum loss: 3966.412109, avg loss: 2.341447, ppl: 10.396273 +epoch: 1, batch: 37807, sum loss: 3440.984131, avg loss: 2.456091, ppl: 11.659151 +epoch: 1, batch: 37808, sum loss: 4242.079590, avg loss: 2.352790, ppl: 10.514861 +epoch: 1, batch: 37809, sum loss: 4840.908691, avg loss: 2.847594, ppl: 17.246229 +epoch: 1, batch: 37810, sum loss: 4960.775391, avg loss: 2.836349, ppl: 17.053398 +epoch: 1, batch: 37811, sum loss: 4960.468262, avg loss: 2.855768, ppl: 17.387781 +epoch: 1, batch: 37812, sum loss: 4383.405762, avg loss: 2.461205, ppl: 11.718922 +epoch: 1, batch: 37813, sum loss: 4603.452637, avg loss: 2.638082, ppl: 13.986349 +epoch: 1, batch: 37814, sum loss: 4147.642578, avg loss: 2.561855, ppl: 12.959830 +epoch: 1, batch: 37815, sum loss: 4393.889160, avg loss: 2.705597, ppl: 14.963243 +epoch: 1, batch: 37816, sum loss: 4950.671387, avg loss: 2.750373, ppl: 15.648470 +epoch: 1, batch: 37817, sum loss: 3552.132324, avg loss: 2.353965, ppl: 10.527223 +epoch: 1, batch: 37818, sum loss: 4193.801270, avg loss: 2.650949, ppl: 14.167474 +epoch: 1, batch: 37819, sum loss: 4282.487793, avg loss: 2.514673, ppl: 12.362562 +epoch: 1, batch: 37820, sum loss: 4354.862305, avg loss: 2.670057, ppl: 14.440786 +epoch: 1, batch: 37821, sum loss: 4207.625000, avg loss: 2.485307, ppl: 12.004807 +epoch: 1, batch: 37822, sum loss: 4868.436523, avg loss: 2.683813, ppl: 14.640810 +epoch: 1, batch: 37823, sum loss: 3781.596191, avg loss: 2.549964, ppl: 12.806639 +epoch: 1, batch: 37824, sum loss: 3683.058105, avg loss: 2.341423, ppl: 10.396017 +epoch: 1, batch: 37825, sum loss: 4083.932861, avg loss: 2.579869, ppl: 13.195410 +epoch: 1, batch: 37826, sum loss: 4912.824707, avg loss: 2.610428, ppl: 13.604867 +epoch: 1, batch: 37827, sum loss: 4350.501465, avg loss: 2.541181, ppl: 12.694650 +epoch: 1, batch: 37828, sum loss: 4188.228516, avg loss: 2.732047, ppl: 15.364310 +epoch: 1, batch: 37829, sum loss: 4619.874023, avg loss: 2.558070, ppl: 12.910872 +epoch: 1, batch: 37830, sum loss: 4268.396484, avg loss: 2.454512, ppl: 11.640753 +epoch: 1, batch: 37831, sum loss: 4875.360840, avg loss: 2.598806, ppl: 13.447677 +epoch: 1, batch: 37832, sum loss: 4748.651367, avg loss: 2.718175, ppl: 15.152638 +epoch: 1, batch: 37833, sum loss: 4028.528320, avg loss: 2.355865, ppl: 10.547243 +epoch: 1, batch: 37834, sum loss: 5096.875977, avg loss: 2.678337, ppl: 14.560863 +epoch: 1, batch: 37835, sum loss: 5112.856445, avg loss: 2.696654, ppl: 14.830032 +epoch: 1, batch: 37836, sum loss: 4489.846680, avg loss: 2.595287, ppl: 13.400434 +epoch: 1, batch: 37837, sum loss: 4107.310059, avg loss: 2.332374, ppl: 10.302366 +epoch: 1, batch: 37838, sum loss: 4641.465332, avg loss: 2.696958, ppl: 14.834541 +epoch: 1, batch: 37839, sum loss: 5205.773926, avg loss: 2.947777, ppl: 19.063524 +epoch: 1, batch: 37840, sum loss: 4217.074707, avg loss: 2.689461, ppl: 14.723734 +epoch: 1, batch: 37841, sum loss: 4375.074707, avg loss: 2.578123, ppl: 13.172392 +epoch: 1, batch: 37842, sum loss: 3832.277832, avg loss: 2.425492, ppl: 11.307795 +epoch: 1, batch: 37843, sum loss: 5212.919922, avg loss: 2.884848, ppl: 17.900841 +epoch: 1, batch: 37844, sum loss: 3692.614258, avg loss: 2.569669, ppl: 13.061501 +epoch: 1, batch: 37845, sum loss: 3995.938965, avg loss: 2.538716, ppl: 12.663399 +epoch: 1, batch: 37846, sum loss: 4327.553223, avg loss: 2.460235, ppl: 11.707564 +epoch: 1, batch: 37847, sum loss: 3830.958008, avg loss: 2.291243, ppl: 9.887220 +epoch: 1, batch: 37848, sum loss: 3996.387207, avg loss: 2.641366, ppl: 14.032366 +epoch: 1, batch: 37849, sum loss: 4153.111328, avg loss: 2.497361, ppl: 12.150386 +epoch: 1, batch: 37850, sum loss: 3793.082520, avg loss: 2.431463, ppl: 11.375515 +epoch: 1, batch: 37851, sum loss: 4229.146973, avg loss: 2.751559, ppl: 15.667046 +epoch: 1, batch: 37852, sum loss: 4014.237305, avg loss: 2.476396, ppl: 11.898301 +epoch: 1, batch: 37853, sum loss: 4123.804688, avg loss: 2.441566, ppl: 11.491021 +epoch: 1, batch: 37854, sum loss: 4854.372559, avg loss: 2.661388, ppl: 14.316152 +epoch: 1, batch: 37855, sum loss: 5285.920898, avg loss: 2.915566, ppl: 18.459257 +epoch: 1, batch: 37856, sum loss: 4837.682129, avg loss: 2.820806, ppl: 16.790375 +epoch: 1, batch: 37857, sum loss: 5278.138184, avg loss: 2.760532, ppl: 15.808257 +epoch: 1, batch: 37858, sum loss: 4444.443848, avg loss: 2.670940, ppl: 14.453548 +epoch: 1, batch: 37859, sum loss: 5135.447754, avg loss: 2.780427, ppl: 16.125900 +epoch: 1, batch: 37860, sum loss: 4376.630859, avg loss: 2.685050, ppl: 14.658928 +epoch: 1, batch: 37861, sum loss: 4973.259766, avg loss: 2.960274, ppl: 19.303255 +epoch: 1, batch: 37862, sum loss: 4561.667969, avg loss: 2.537079, ppl: 12.642686 +epoch: 1, batch: 37863, sum loss: 4028.704834, avg loss: 2.577546, ppl: 13.164797 +epoch: 1, batch: 37864, sum loss: 4445.595215, avg loss: 2.739122, ppl: 15.473396 +epoch: 1, batch: 37865, sum loss: 5412.555176, avg loss: 2.851715, ppl: 17.317461 +epoch: 1, batch: 37866, sum loss: 4143.064941, avg loss: 2.527800, ppl: 12.525923 +epoch: 1, batch: 37867, sum loss: 5015.997559, avg loss: 2.887736, ppl: 17.952620 +epoch: 1, batch: 37868, sum loss: 3279.725830, avg loss: 2.471534, ppl: 11.840596 +epoch: 1, batch: 37869, sum loss: 4007.673340, avg loss: 2.525314, ppl: 12.494820 +epoch: 1, batch: 37870, sum loss: 3610.650391, avg loss: 2.418386, ppl: 11.227723 +epoch: 1, batch: 37871, sum loss: 5635.964355, avg loss: 2.743897, ppl: 15.547455 +epoch: 1, batch: 37872, sum loss: 4582.383301, avg loss: 2.639622, ppl: 14.007904 +epoch: 1, batch: 37873, sum loss: 4511.249512, avg loss: 2.772741, ppl: 16.002434 +epoch: 1, batch: 37874, sum loss: 3713.101562, avg loss: 2.401747, ppl: 11.042456 +epoch: 1, batch: 37875, sum loss: 3430.988037, avg loss: 2.473675, ppl: 11.865980 +epoch: 1, batch: 37876, sum loss: 4911.320312, avg loss: 2.657641, ppl: 14.262603 +epoch: 1, batch: 37877, sum loss: 4809.773438, avg loss: 2.563845, ppl: 12.985653 +epoch: 1, batch: 37878, sum loss: 5589.807617, avg loss: 3.165236, ppl: 23.694347 +epoch: 1, batch: 37879, sum loss: 4433.500000, avg loss: 2.719939, ppl: 15.179393 +epoch: 1, batch: 37880, sum loss: 4919.423828, avg loss: 2.625093, ppl: 13.805855 +epoch: 1, batch: 37881, sum loss: 4646.508789, avg loss: 2.565715, ppl: 13.009952 +epoch: 1, batch: 37882, sum loss: 3420.764648, avg loss: 2.181610, ppl: 8.860561 +epoch: 1, batch: 37883, sum loss: 4645.169434, avg loss: 2.730846, ppl: 15.345867 +epoch: 1, batch: 37884, sum loss: 4991.582520, avg loss: 2.796405, ppl: 16.385632 +epoch: 1, batch: 37885, sum loss: 4168.227539, avg loss: 2.407988, ppl: 11.111583 +epoch: 1, batch: 37886, sum loss: 4770.314941, avg loss: 2.696617, ppl: 14.829477 +epoch: 1, batch: 37887, sum loss: 3896.141846, avg loss: 2.593969, ppl: 13.382784 +epoch: 1, batch: 37888, sum loss: 4188.324707, avg loss: 2.412629, ppl: 11.163275 +epoch: 1, batch: 37889, sum loss: 4548.569336, avg loss: 2.756709, ppl: 15.747925 +epoch: 1, batch: 37890, sum loss: 4399.095703, avg loss: 2.577092, ppl: 13.158815 +epoch: 1, batch: 37891, sum loss: 5092.103516, avg loss: 2.746550, ppl: 15.588756 +epoch: 1, batch: 37892, sum loss: 4333.410156, avg loss: 2.483330, ppl: 11.981090 +epoch: 1, batch: 37893, sum loss: 4163.964844, avg loss: 2.382131, ppl: 10.827953 +epoch: 1, batch: 37894, sum loss: 4421.237793, avg loss: 2.619217, ppl: 13.724968 +epoch: 1, batch: 37895, sum loss: 4071.268311, avg loss: 2.403346, ppl: 11.060122 +epoch: 1, batch: 37896, sum loss: 4665.151855, avg loss: 2.590312, ppl: 13.333931 +epoch: 1, batch: 37897, sum loss: 4526.837891, avg loss: 2.806471, ppl: 16.551407 +epoch: 1, batch: 37898, sum loss: 3425.562500, avg loss: 2.365720, ppl: 10.651706 +epoch: 1, batch: 37899, sum loss: 4496.830078, avg loss: 2.855130, ppl: 17.376699 +epoch: 1, batch: 37900, sum loss: 4550.206055, avg loss: 2.573646, ppl: 13.113550 +epoch: 1, batch: 37901, sum loss: 4082.932373, avg loss: 2.492633, ppl: 12.093071 +epoch: 1, batch: 37902, sum loss: 4548.960938, avg loss: 2.797639, ppl: 16.405865 +epoch: 1, batch: 37903, sum loss: 3428.105713, avg loss: 1.983858, ppl: 7.270736 +epoch: 1, batch: 37904, sum loss: 4302.729492, avg loss: 2.519162, ppl: 12.418191 +epoch: 1, batch: 37905, sum loss: 4134.583984, avg loss: 2.630142, ppl: 13.875747 +epoch: 1, batch: 37906, sum loss: 4783.204102, avg loss: 2.444151, ppl: 11.520766 +epoch: 1, batch: 37907, sum loss: 3842.534668, avg loss: 2.523004, ppl: 12.465986 +epoch: 1, batch: 37908, sum loss: 3988.382812, avg loss: 2.593227, ppl: 13.372852 +epoch: 1, batch: 37909, sum loss: 4045.070801, avg loss: 2.471027, ppl: 11.834594 +epoch: 1, batch: 37910, sum loss: 4382.840820, avg loss: 2.693817, ppl: 14.788020 +epoch: 1, batch: 37911, sum loss: 4743.096191, avg loss: 2.540491, ppl: 12.685893 +epoch: 1, batch: 37912, sum loss: 4934.002441, avg loss: 3.040051, ppl: 20.906303 +epoch: 1, batch: 37913, sum loss: 4124.090820, avg loss: 2.497935, ppl: 12.157367 +epoch: 1, batch: 37914, sum loss: 5248.947266, avg loss: 2.663088, ppl: 14.340508 +epoch: 1, batch: 37915, sum loss: 4122.361328, avg loss: 2.573259, ppl: 13.108477 +epoch: 1, batch: 37916, sum loss: 4237.726074, avg loss: 2.520955, ppl: 12.440475 +epoch: 1, batch: 37917, sum loss: 4236.943359, avg loss: 2.575649, ppl: 13.139846 +epoch: 1, batch: 37918, sum loss: 5013.979004, avg loss: 2.549049, ppl: 12.794926 +epoch: 1, batch: 37919, sum loss: 4373.503418, avg loss: 2.566610, ppl: 13.021607 +epoch: 1, batch: 37920, sum loss: 4104.893555, avg loss: 2.281764, ppl: 9.793942 +epoch: 1, batch: 37921, sum loss: 4330.902344, avg loss: 2.523836, ppl: 12.476363 +epoch: 1, batch: 37922, sum loss: 4965.515137, avg loss: 2.722322, ppl: 15.215611 +epoch: 1, batch: 37923, sum loss: 4279.168945, avg loss: 2.614031, ppl: 13.653983 +epoch: 1, batch: 37924, sum loss: 4545.535645, avg loss: 2.539405, ppl: 12.672133 +epoch: 1, batch: 37925, sum loss: 5082.857910, avg loss: 2.731251, ppl: 15.352080 +epoch: 1, batch: 37926, sum loss: 4409.721680, avg loss: 2.688855, ppl: 14.714813 +epoch: 1, batch: 37927, sum loss: 4346.297852, avg loss: 2.502187, ppl: 12.209161 +epoch: 1, batch: 37928, sum loss: 4370.563477, avg loss: 2.501754, ppl: 12.203876 +epoch: 1, batch: 37929, sum loss: 4494.130371, avg loss: 2.585806, ppl: 13.273979 +epoch: 1, batch: 37930, sum loss: 4628.684082, avg loss: 2.425935, ppl: 11.312802 +epoch: 1, batch: 37931, sum loss: 5935.549805, avg loss: 3.039196, ppl: 20.888443 +epoch: 1, batch: 37932, sum loss: 4359.308594, avg loss: 2.750352, ppl: 15.648145 +epoch: 1, batch: 37933, sum loss: 4079.232666, avg loss: 2.429561, ppl: 11.353895 +epoch: 1, batch: 37934, sum loss: 3782.742432, avg loss: 2.465934, ppl: 11.774472 +epoch: 1, batch: 37935, sum loss: 4941.857422, avg loss: 2.494627, ppl: 12.117219 +epoch: 1, batch: 37936, sum loss: 5462.559082, avg loss: 2.645307, ppl: 14.087770 +epoch: 1, batch: 37937, sum loss: 4196.990234, avg loss: 2.470271, ppl: 11.825652 +epoch: 1, batch: 37938, sum loss: 4104.375488, avg loss: 2.538266, ppl: 12.657700 +epoch: 1, batch: 37939, sum loss: 4851.932617, avg loss: 2.860809, ppl: 17.475664 +epoch: 1, batch: 37940, sum loss: 4641.705078, avg loss: 2.801270, ppl: 16.465549 +epoch: 1, batch: 37941, sum loss: 5181.674805, avg loss: 2.939124, ppl: 18.899277 +epoch: 1, batch: 37942, sum loss: 3616.893066, avg loss: 2.384241, ppl: 10.850820 +epoch: 1, batch: 37943, sum loss: 3614.001709, avg loss: 2.391795, ppl: 10.933098 +epoch: 1, batch: 37944, sum loss: 3702.063477, avg loss: 2.337161, ppl: 10.351810 +epoch: 1, batch: 37945, sum loss: 3965.361816, avg loss: 2.627808, ppl: 13.843390 +epoch: 1, batch: 37946, sum loss: 3912.774902, avg loss: 2.517873, ppl: 12.402193 +epoch: 1, batch: 37947, sum loss: 4586.051758, avg loss: 2.683471, ppl: 14.635805 +epoch: 1, batch: 37948, sum loss: 5162.406738, avg loss: 2.647388, ppl: 14.117116 +epoch: 1, batch: 37949, sum loss: 3707.191162, avg loss: 2.245422, ppl: 9.444399 +epoch: 1, batch: 37950, sum loss: 4179.401367, avg loss: 2.640178, ppl: 14.015695 +epoch: 1, batch: 37951, sum loss: 4412.479980, avg loss: 2.577383, ppl: 13.162647 +epoch: 1, batch: 37952, sum loss: 4834.306641, avg loss: 2.757734, ppl: 15.764074 +epoch: 1, batch: 37953, sum loss: 3736.481689, avg loss: 2.427863, ppl: 11.334639 +epoch: 1, batch: 37954, sum loss: 3968.382324, avg loss: 2.578546, ppl: 13.177964 +epoch: 1, batch: 37955, sum loss: 4575.968750, avg loss: 2.748330, ppl: 15.616525 +epoch: 1, batch: 37956, sum loss: 3721.281006, avg loss: 2.187702, ppl: 8.914703 +epoch: 1, batch: 37957, sum loss: 3995.814453, avg loss: 2.514673, ppl: 12.362559 +epoch: 1, batch: 37958, sum loss: 5207.455078, avg loss: 2.626049, ppl: 13.819063 +epoch: 1, batch: 37959, sum loss: 4853.333984, avg loss: 2.841530, ppl: 17.141977 +epoch: 1, batch: 37960, sum loss: 5670.341309, avg loss: 2.823875, ppl: 16.841990 +epoch: 1, batch: 37961, sum loss: 4095.432617, avg loss: 2.479075, ppl: 11.930229 +epoch: 1, batch: 37962, sum loss: 4138.763184, avg loss: 2.588345, ppl: 13.307723 +epoch: 1, batch: 37963, sum loss: 3799.479492, avg loss: 2.460803, ppl: 11.714214 +epoch: 1, batch: 37964, sum loss: 4825.146484, avg loss: 2.732246, ppl: 15.367362 +epoch: 1, batch: 37965, sum loss: 4480.303711, avg loss: 2.469848, ppl: 11.820646 +epoch: 1, batch: 37966, sum loss: 5440.344727, avg loss: 2.985919, ppl: 19.804699 +epoch: 1, batch: 37967, sum loss: 4149.940918, avg loss: 2.590475, ppl: 13.336102 +epoch: 1, batch: 37968, sum loss: 5652.995605, avg loss: 2.822264, ppl: 16.814884 +epoch: 1, batch: 37969, sum loss: 4975.483887, avg loss: 2.753450, ppl: 15.696687 +epoch: 1, batch: 37970, sum loss: 4603.457031, avg loss: 2.659420, ppl: 14.288007 +epoch: 1, batch: 37971, sum loss: 5542.260742, avg loss: 2.699591, ppl: 14.873650 +epoch: 1, batch: 37972, sum loss: 4658.746582, avg loss: 2.601198, ppl: 13.479883 +epoch: 1, batch: 37973, sum loss: 3826.927979, avg loss: 2.491490, ppl: 12.079257 +epoch: 1, batch: 37974, sum loss: 4461.978516, avg loss: 2.702592, ppl: 14.918344 +epoch: 1, batch: 37975, sum loss: 4795.464844, avg loss: 2.642129, ppl: 14.043076 +epoch: 1, batch: 37976, sum loss: 3489.835938, avg loss: 2.258794, ppl: 9.571535 +epoch: 1, batch: 37977, sum loss: 4876.611816, avg loss: 2.828661, ppl: 16.922794 +epoch: 1, batch: 37978, sum loss: 4118.074707, avg loss: 2.442512, ppl: 11.501895 +epoch: 1, batch: 37979, sum loss: 4436.773926, avg loss: 2.633100, ppl: 13.916849 +epoch: 1, batch: 37980, sum loss: 4538.125977, avg loss: 2.545219, ppl: 12.746024 +epoch: 1, batch: 37981, sum loss: 4125.654785, avg loss: 2.768896, ppl: 15.941023 +epoch: 1, batch: 37982, sum loss: 4185.641602, avg loss: 2.575779, ppl: 13.141556 +epoch: 1, batch: 37983, sum loss: 4815.565918, avg loss: 2.654667, ppl: 14.220252 +epoch: 1, batch: 37984, sum loss: 3626.992188, avg loss: 2.350611, ppl: 10.491976 +epoch: 1, batch: 37985, sum loss: 3672.623535, avg loss: 2.525876, ppl: 12.501840 +epoch: 1, batch: 37986, sum loss: 4075.908203, avg loss: 2.602751, ppl: 13.500828 +epoch: 1, batch: 37987, sum loss: 3503.588867, avg loss: 2.331064, ppl: 10.288880 +epoch: 1, batch: 37988, sum loss: 4256.025391, avg loss: 2.820428, ppl: 16.784031 +epoch: 1, batch: 37989, sum loss: 4221.343262, avg loss: 2.617076, ppl: 13.695618 +epoch: 1, batch: 37990, sum loss: 4200.932617, avg loss: 2.393694, ppl: 10.953879 +epoch: 1, batch: 37991, sum loss: 4073.499023, avg loss: 2.477798, ppl: 11.914993 +epoch: 1, batch: 37992, sum loss: 4403.962402, avg loss: 2.575417, ppl: 13.136788 +epoch: 1, batch: 37993, sum loss: 4473.223633, avg loss: 2.640628, ppl: 14.022005 +epoch: 1, batch: 37994, sum loss: 4349.344238, avg loss: 2.730285, ppl: 15.337260 +epoch: 1, batch: 37995, sum loss: 4107.372559, avg loss: 2.567108, ppl: 13.028089 +epoch: 1, batch: 37996, sum loss: 3934.782715, avg loss: 2.566721, ppl: 13.023048 +epoch: 1, batch: 37997, sum loss: 4035.961914, avg loss: 2.431302, ppl: 11.373685 +epoch: 1, batch: 37998, sum loss: 4319.424316, avg loss: 2.686209, ppl: 14.675937 +epoch: 1, batch: 37999, sum loss: 4642.459473, avg loss: 2.817026, ppl: 16.727037 +epoch: 1, batch: 38000, sum loss: 3765.806152, avg loss: 2.355101, ppl: 10.539189 +epoch: 1, batch: 38001, sum loss: 4647.172852, avg loss: 2.568918, ppl: 13.051698 +epoch: 1, batch: 38002, sum loss: 3841.159424, avg loss: 2.440381, ppl: 11.477410 +epoch: 1, batch: 38003, sum loss: 3966.507568, avg loss: 2.544264, ppl: 12.733853 +epoch: 1, batch: 38004, sum loss: 4654.361328, avg loss: 2.343586, ppl: 10.418528 +epoch: 1, batch: 38005, sum loss: 4211.387695, avg loss: 2.696151, ppl: 14.822570 +epoch: 1, batch: 38006, sum loss: 5044.461426, avg loss: 2.636937, ppl: 13.970352 +epoch: 1, batch: 38007, sum loss: 4012.433594, avg loss: 2.353333, ppl: 10.520581 +epoch: 1, batch: 38008, sum loss: 3410.811035, avg loss: 2.141124, ppl: 8.508999 +epoch: 1, batch: 38009, sum loss: 4039.076172, avg loss: 2.634753, ppl: 13.939869 +epoch: 1, batch: 38010, sum loss: 4365.842285, avg loss: 2.476371, ppl: 11.898008 +epoch: 1, batch: 38011, sum loss: 4819.826172, avg loss: 2.740095, ppl: 15.488451 +epoch: 1, batch: 38012, sum loss: 4835.375977, avg loss: 2.852729, ppl: 17.335026 +epoch: 1, batch: 38013, sum loss: 5171.223145, avg loss: 2.835100, ppl: 17.032110 +epoch: 1, batch: 38014, sum loss: 5431.664062, avg loss: 2.778345, ppl: 16.092363 +epoch: 1, batch: 38015, sum loss: 5655.620605, avg loss: 2.631745, ppl: 13.898005 +epoch: 1, batch: 38016, sum loss: 3922.181396, avg loss: 2.427092, ppl: 11.325902 +epoch: 1, batch: 38017, sum loss: 4136.977051, avg loss: 2.445022, ppl: 11.530802 +epoch: 1, batch: 38018, sum loss: 5247.844727, avg loss: 2.627864, ppl: 13.844169 +epoch: 1, batch: 38019, sum loss: 3955.589355, avg loss: 2.556942, ppl: 12.896323 +epoch: 1, batch: 38020, sum loss: 3519.571777, avg loss: 2.179302, ppl: 8.840129 +epoch: 1, batch: 38021, sum loss: 5069.126465, avg loss: 2.660959, ppl: 14.310002 +epoch: 1, batch: 38022, sum loss: 4407.592285, avg loss: 2.588134, ppl: 13.304922 +epoch: 1, batch: 38023, sum loss: 4950.907715, avg loss: 2.733798, ppl: 15.391232 +epoch: 1, batch: 38024, sum loss: 5245.815430, avg loss: 2.820331, ppl: 16.782402 +epoch: 1, batch: 38025, sum loss: 4301.751953, avg loss: 2.412648, ppl: 11.163487 +epoch: 1, batch: 38026, sum loss: 4342.517578, avg loss: 2.566500, ppl: 13.020170 +epoch: 1, batch: 38027, sum loss: 3764.134766, avg loss: 2.452205, ppl: 11.613927 +epoch: 1, batch: 38028, sum loss: 3836.707520, avg loss: 2.527475, ppl: 12.521853 +epoch: 1, batch: 38029, sum loss: 5199.240234, avg loss: 2.520233, ppl: 12.431492 +epoch: 1, batch: 38030, sum loss: 4967.819336, avg loss: 2.668002, ppl: 14.411145 +epoch: 1, batch: 38031, sum loss: 4488.404785, avg loss: 2.660584, ppl: 14.304637 +epoch: 1, batch: 38032, sum loss: 4771.190918, avg loss: 3.096165, ppl: 22.112995 +epoch: 1, batch: 38033, sum loss: 4365.240723, avg loss: 2.539407, ppl: 12.672157 +epoch: 1, batch: 38034, sum loss: 4167.026367, avg loss: 2.508746, ppl: 12.289505 +epoch: 1, batch: 38035, sum loss: 5120.718262, avg loss: 2.754555, ppl: 15.714050 +epoch: 1, batch: 38036, sum loss: 3090.940674, avg loss: 2.225299, ppl: 9.256251 +epoch: 1, batch: 38037, sum loss: 3726.063721, avg loss: 2.427403, ppl: 11.329421 +epoch: 1, batch: 38038, sum loss: 4810.166504, avg loss: 2.577796, ppl: 13.168077 +epoch: 1, batch: 38039, sum loss: 5159.738770, avg loss: 2.849111, ppl: 17.272413 +epoch: 1, batch: 38040, sum loss: 5373.759277, avg loss: 2.822353, ppl: 16.816368 +epoch: 1, batch: 38041, sum loss: 3904.298828, avg loss: 2.379219, ppl: 10.796471 +epoch: 1, batch: 38042, sum loss: 4345.970703, avg loss: 2.676090, ppl: 14.528180 +epoch: 1, batch: 38043, sum loss: 4204.679199, avg loss: 2.636163, ppl: 13.959531 +epoch: 1, batch: 38044, sum loss: 3946.450195, avg loss: 2.464991, ppl: 11.763374 +epoch: 1, batch: 38045, sum loss: 3884.376709, avg loss: 2.409663, ppl: 11.130209 +epoch: 1, batch: 38046, sum loss: 5234.690918, avg loss: 2.787375, ppl: 16.238342 +epoch: 1, batch: 38047, sum loss: 3611.133789, avg loss: 2.641649, ppl: 14.036327 +epoch: 1, batch: 38048, sum loss: 4744.605957, avg loss: 2.662518, ppl: 14.332326 +epoch: 1, batch: 38049, sum loss: 4119.557129, avg loss: 2.505813, ppl: 12.253521 +epoch: 1, batch: 38050, sum loss: 4615.354492, avg loss: 2.667835, ppl: 14.408740 +epoch: 1, batch: 38051, sum loss: 4686.055664, avg loss: 2.638545, ppl: 13.992829 +epoch: 1, batch: 38052, sum loss: 4922.666016, avg loss: 2.662340, ppl: 14.329777 +epoch: 1, batch: 38053, sum loss: 4757.594238, avg loss: 2.720180, ppl: 15.183048 +epoch: 1, batch: 38054, sum loss: 4239.149414, avg loss: 2.451793, ppl: 11.609140 +epoch: 1, batch: 38055, sum loss: 4680.514648, avg loss: 2.801026, ppl: 16.461533 +epoch: 1, batch: 38056, sum loss: 5098.899902, avg loss: 2.716516, ppl: 15.127522 +epoch: 1, batch: 38057, sum loss: 3901.936768, avg loss: 2.431113, ppl: 11.371534 +epoch: 1, batch: 38058, sum loss: 4065.656738, avg loss: 2.381756, ppl: 10.823889 +epoch: 1, batch: 38059, sum loss: 3905.513916, avg loss: 2.337232, ppl: 10.352537 +epoch: 1, batch: 38060, sum loss: 3550.977783, avg loss: 2.345428, ppl: 10.437739 +epoch: 1, batch: 38061, sum loss: 4652.606934, avg loss: 2.589097, ppl: 13.317738 +epoch: 1, batch: 38062, sum loss: 4858.070801, avg loss: 2.724661, ppl: 15.251248 +epoch: 1, batch: 38063, sum loss: 4364.556641, avg loss: 2.573442, ppl: 13.110868 +epoch: 1, batch: 38064, sum loss: 4250.049316, avg loss: 2.601009, ppl: 13.477335 +epoch: 1, batch: 38065, sum loss: 4679.865234, avg loss: 2.716115, ppl: 15.121454 +epoch: 1, batch: 38066, sum loss: 4622.123047, avg loss: 2.635190, ppl: 13.945959 +epoch: 1, batch: 38067, sum loss: 4673.794922, avg loss: 2.503372, ppl: 12.223639 +epoch: 1, batch: 38068, sum loss: 4597.691895, avg loss: 2.759719, ppl: 15.795403 +epoch: 1, batch: 38069, sum loss: 4796.885742, avg loss: 2.635652, ppl: 13.952401 +epoch: 1, batch: 38070, sum loss: 4316.932617, avg loss: 2.545361, ppl: 12.747833 +epoch: 1, batch: 38071, sum loss: 4536.430664, avg loss: 2.481636, ppl: 11.960817 +epoch: 1, batch: 38072, sum loss: 3698.989014, avg loss: 2.595782, ppl: 13.407062 +epoch: 1, batch: 38073, sum loss: 4342.154297, avg loss: 2.557217, ppl: 12.899869 +epoch: 1, batch: 38074, sum loss: 4776.187012, avg loss: 2.459417, ppl: 11.697986 +epoch: 1, batch: 38075, sum loss: 4535.906250, avg loss: 2.638689, ppl: 13.994844 +epoch: 1, batch: 38076, sum loss: 4111.525879, avg loss: 2.642369, ppl: 14.046437 +epoch: 1, batch: 38077, sum loss: 3508.848145, avg loss: 2.313018, ppl: 10.104874 +epoch: 1, batch: 38078, sum loss: 4566.666016, avg loss: 2.653496, ppl: 14.203602 +epoch: 1, batch: 38079, sum loss: 4949.484863, avg loss: 2.887681, ppl: 17.951628 +epoch: 1, batch: 38080, sum loss: 4068.726562, avg loss: 2.418981, ppl: 11.234409 +epoch: 1, batch: 38081, sum loss: 3782.148438, avg loss: 2.608378, ppl: 13.577017 +epoch: 1, batch: 38082, sum loss: 4351.176758, avg loss: 2.643485, ppl: 14.062129 +epoch: 1, batch: 38083, sum loss: 4653.991211, avg loss: 2.478163, ppl: 11.919354 +epoch: 1, batch: 38084, sum loss: 4850.238281, avg loss: 2.741797, ppl: 15.514836 +epoch: 1, batch: 38085, sum loss: 3707.525146, avg loss: 2.271768, ppl: 9.696527 +epoch: 1, batch: 38086, sum loss: 4288.161621, avg loss: 2.497473, ppl: 12.151751 +epoch: 1, batch: 38087, sum loss: 3786.182129, avg loss: 2.317125, ppl: 10.146462 +epoch: 1, batch: 38088, sum loss: 3652.085693, avg loss: 2.374568, ppl: 10.746369 +epoch: 1, batch: 38089, sum loss: 4429.159668, avg loss: 2.613074, ppl: 13.640913 +epoch: 1, batch: 38090, sum loss: 4083.378418, avg loss: 2.508218, ppl: 12.283017 +epoch: 1, batch: 38091, sum loss: 4894.380371, avg loss: 2.634220, ppl: 13.932436 +epoch: 1, batch: 38092, sum loss: 4771.894531, avg loss: 2.551815, ppl: 12.830373 +epoch: 1, batch: 38093, sum loss: 4973.683105, avg loss: 2.672586, ppl: 14.477365 +epoch: 1, batch: 38094, sum loss: 4159.035645, avg loss: 2.560982, ppl: 12.948532 +epoch: 1, batch: 38095, sum loss: 3573.172607, avg loss: 2.398103, ppl: 11.002280 +epoch: 1, batch: 38096, sum loss: 4532.782227, avg loss: 2.620105, ppl: 13.737169 +epoch: 1, batch: 38097, sum loss: 3576.980957, avg loss: 2.375154, ppl: 10.752664 +epoch: 1, batch: 38098, sum loss: 3862.878174, avg loss: 2.519816, ppl: 12.426312 +epoch: 1, batch: 38099, sum loss: 4926.395508, avg loss: 2.571188, ppl: 13.081352 +epoch: 1, batch: 38100, sum loss: 4804.530273, avg loss: 2.691614, ppl: 14.755467 +epoch: 1, batch: 38101, sum loss: 5319.027832, avg loss: 2.670195, ppl: 14.442783 +epoch: 1, batch: 38102, sum loss: 5006.024414, avg loss: 2.793540, ppl: 16.338764 +epoch: 1, batch: 38103, sum loss: 4249.649902, avg loss: 2.629734, ppl: 13.870077 +epoch: 1, batch: 38104, sum loss: 4726.671387, avg loss: 2.624470, ppl: 13.797266 +epoch: 1, batch: 38105, sum loss: 4660.964844, avg loss: 2.606804, ppl: 13.555653 +epoch: 1, batch: 38106, sum loss: 4025.924805, avg loss: 2.519352, ppl: 12.420548 +epoch: 1, batch: 38107, sum loss: 4643.568359, avg loss: 2.632408, ppl: 13.907224 +epoch: 1, batch: 38108, sum loss: 4775.321777, avg loss: 2.635387, ppl: 13.948712 +epoch: 1, batch: 38109, sum loss: 5350.579590, avg loss: 2.853642, ppl: 17.350866 +epoch: 1, batch: 38110, sum loss: 3961.304199, avg loss: 2.737598, ppl: 15.449829 +epoch: 1, batch: 38111, sum loss: 3966.349854, avg loss: 2.583941, ppl: 13.249253 +epoch: 1, batch: 38112, sum loss: 4546.510742, avg loss: 2.488512, ppl: 12.043337 +epoch: 1, batch: 38113, sum loss: 4244.517090, avg loss: 2.561567, ppl: 12.956107 +epoch: 1, batch: 38114, sum loss: 5153.287598, avg loss: 2.839277, ppl: 17.103395 +epoch: 1, batch: 38115, sum loss: 4154.202148, avg loss: 2.318193, ppl: 10.157306 +epoch: 1, batch: 38116, sum loss: 3695.613281, avg loss: 2.303998, ppl: 10.014144 +epoch: 1, batch: 38117, sum loss: 3566.039551, avg loss: 2.430838, ppl: 11.368406 +epoch: 1, batch: 38118, sum loss: 3522.243164, avg loss: 2.346598, ppl: 10.449955 +epoch: 1, batch: 38119, sum loss: 4575.077148, avg loss: 2.605397, ppl: 13.536601 +epoch: 1, batch: 38120, sum loss: 4181.773926, avg loss: 2.590938, ppl: 13.342282 +epoch: 1, batch: 38121, sum loss: 4419.056641, avg loss: 2.361869, ppl: 10.610763 +epoch: 1, batch: 38122, sum loss: 4126.828125, avg loss: 2.788398, ppl: 16.254951 +epoch: 1, batch: 38123, sum loss: 4290.408203, avg loss: 2.768005, ppl: 15.926834 +epoch: 1, batch: 38124, sum loss: 4343.168945, avg loss: 2.487497, ppl: 12.031120 +epoch: 1, batch: 38125, sum loss: 4880.908203, avg loss: 2.872813, ppl: 17.686693 +epoch: 1, batch: 38126, sum loss: 4933.861816, avg loss: 2.638429, ppl: 13.991205 +epoch: 1, batch: 38127, sum loss: 4945.051270, avg loss: 2.861720, ppl: 17.491579 +epoch: 1, batch: 38128, sum loss: 5061.578125, avg loss: 2.785679, ppl: 16.210815 +epoch: 1, batch: 38129, sum loss: 5135.697266, avg loss: 2.614917, ppl: 13.666086 +epoch: 1, batch: 38130, sum loss: 4615.072754, avg loss: 2.692575, ppl: 14.769651 +epoch: 1, batch: 38131, sum loss: 4586.227539, avg loss: 2.560708, ppl: 12.944974 +epoch: 1, batch: 38132, sum loss: 5501.733887, avg loss: 2.735820, ppl: 15.422385 +epoch: 1, batch: 38133, sum loss: 4549.122070, avg loss: 2.577406, ppl: 13.162954 +epoch: 1, batch: 38134, sum loss: 5208.253418, avg loss: 2.774776, ppl: 16.035027 +epoch: 1, batch: 38135, sum loss: 4163.786621, avg loss: 2.506795, ppl: 12.265558 +epoch: 1, batch: 38136, sum loss: 3720.846924, avg loss: 2.514086, ppl: 12.355308 +epoch: 1, batch: 38137, sum loss: 4398.102539, avg loss: 2.504614, ppl: 12.238838 +epoch: 1, batch: 38138, sum loss: 4291.453125, avg loss: 2.568195, ppl: 13.042257 +epoch: 1, batch: 38139, sum loss: 5136.661621, avg loss: 2.986431, ppl: 19.814840 +epoch: 1, batch: 38140, sum loss: 4587.395020, avg loss: 2.659360, ppl: 14.287138 +epoch: 1, batch: 38141, sum loss: 5027.960449, avg loss: 2.570532, ppl: 13.072775 +epoch: 1, batch: 38142, sum loss: 5394.394531, avg loss: 2.860230, ppl: 17.465551 +epoch: 1, batch: 38143, sum loss: 5408.355469, avg loss: 2.715038, ppl: 15.105185 +epoch: 1, batch: 38144, sum loss: 3781.996338, avg loss: 2.378614, ppl: 10.789937 +epoch: 1, batch: 38145, sum loss: 4550.424805, avg loss: 2.332355, ppl: 10.302177 +epoch: 1, batch: 38146, sum loss: 4809.541992, avg loss: 2.788141, ppl: 16.250774 +epoch: 1, batch: 38147, sum loss: 4898.643555, avg loss: 2.598750, ppl: 13.446918 +epoch: 1, batch: 38148, sum loss: 4765.276367, avg loss: 2.506721, ppl: 12.264646 +epoch: 1, batch: 38149, sum loss: 3957.459961, avg loss: 2.622571, ppl: 13.771087 +epoch: 1, batch: 38150, sum loss: 4232.866211, avg loss: 2.432682, ppl: 11.389385 +epoch: 1, batch: 38151, sum loss: 4148.303711, avg loss: 2.450268, ppl: 11.591454 +epoch: 1, batch: 38152, sum loss: 5154.370117, avg loss: 2.940314, ppl: 18.921783 +epoch: 1, batch: 38153, sum loss: 4124.041992, avg loss: 2.693692, ppl: 14.786161 +epoch: 1, batch: 38154, sum loss: 4187.453125, avg loss: 2.570567, ppl: 13.073231 +epoch: 1, batch: 38155, sum loss: 5113.425781, avg loss: 2.730072, ppl: 15.333999 +epoch: 1, batch: 38156, sum loss: 4580.891113, avg loss: 2.891977, ppl: 18.028910 +epoch: 1, batch: 38157, sum loss: 4315.373047, avg loss: 2.450524, ppl: 11.594421 +epoch: 1, batch: 38158, sum loss: 3078.907715, avg loss: 2.467073, ppl: 11.787898 +epoch: 1, batch: 38159, sum loss: 3724.683838, avg loss: 2.504831, ppl: 12.241491 +epoch: 1, batch: 38160, sum loss: 4271.287598, avg loss: 2.604444, ppl: 13.523698 +epoch: 1, batch: 38161, sum loss: 4023.599121, avg loss: 2.296575, ppl: 9.940080 +epoch: 1, batch: 38162, sum loss: 4187.994141, avg loss: 2.492854, ppl: 12.095744 +epoch: 1, batch: 38163, sum loss: 4092.150146, avg loss: 2.546453, ppl: 12.761761 +epoch: 1, batch: 38164, sum loss: 4277.654297, avg loss: 2.710808, ppl: 15.041417 +epoch: 1, batch: 38165, sum loss: 3792.281494, avg loss: 2.501505, ppl: 12.200844 +epoch: 1, batch: 38166, sum loss: 4776.982910, avg loss: 2.630497, ppl: 13.880667 +epoch: 1, batch: 38167, sum loss: 4380.977539, avg loss: 2.808319, ppl: 16.582022 +epoch: 1, batch: 38168, sum loss: 4155.841797, avg loss: 2.493006, ppl: 12.097592 +epoch: 1, batch: 38169, sum loss: 4786.781738, avg loss: 2.680169, ppl: 14.587560 +epoch: 1, batch: 38170, sum loss: 3443.182617, avg loss: 2.379532, ppl: 10.799846 +epoch: 1, batch: 38171, sum loss: 3802.106201, avg loss: 2.273987, ppl: 9.718070 +epoch: 1, batch: 38172, sum loss: 5010.366211, avg loss: 2.667927, ppl: 14.410060 +epoch: 1, batch: 38173, sum loss: 4697.131836, avg loss: 2.799244, ppl: 16.432222 +epoch: 1, batch: 38174, sum loss: 4735.371094, avg loss: 2.689024, ppl: 14.717300 +epoch: 1, batch: 38175, sum loss: 4583.395996, avg loss: 2.650894, ppl: 14.166700 +epoch: 1, batch: 38176, sum loss: 4329.947266, avg loss: 2.777388, ppl: 16.076967 +epoch: 1, batch: 38177, sum loss: 4595.158691, avg loss: 2.535959, ppl: 12.628542 +epoch: 1, batch: 38178, sum loss: 5123.416016, avg loss: 2.836886, ppl: 17.062550 +epoch: 1, batch: 38179, sum loss: 3968.383789, avg loss: 2.267648, ppl: 9.656659 +epoch: 1, batch: 38180, sum loss: 3847.305176, avg loss: 2.407575, ppl: 11.106996 +epoch: 1, batch: 38181, sum loss: 4820.097168, avg loss: 2.657165, ppl: 14.255817 +epoch: 1, batch: 38182, sum loss: 4460.655762, avg loss: 2.521569, ppl: 12.448115 +epoch: 1, batch: 38183, sum loss: 4232.775879, avg loss: 2.343730, ppl: 10.420028 +epoch: 1, batch: 38184, sum loss: 4744.836914, avg loss: 2.689817, ppl: 14.728979 +epoch: 1, batch: 38185, sum loss: 4603.192383, avg loss: 2.618426, ppl: 13.714115 +epoch: 1, batch: 38186, sum loss: 4845.096191, avg loss: 2.872019, ppl: 17.672668 +epoch: 1, batch: 38187, sum loss: 4691.460938, avg loss: 2.458837, ppl: 11.691204 +epoch: 1, batch: 38188, sum loss: 4141.620605, avg loss: 2.439117, ppl: 11.462914 +epoch: 1, batch: 38189, sum loss: 5008.174805, avg loss: 2.594909, ppl: 13.395374 +epoch: 1, batch: 38190, sum loss: 3731.093994, avg loss: 2.404056, ppl: 11.067973 +epoch: 1, batch: 38191, sum loss: 4346.412598, avg loss: 2.546229, ppl: 12.758898 +epoch: 1, batch: 38192, sum loss: 4301.177246, avg loss: 2.500684, ppl: 12.190836 +epoch: 1, batch: 38193, sum loss: 4867.018555, avg loss: 2.907419, ppl: 18.309475 +epoch: 1, batch: 38194, sum loss: 4457.128418, avg loss: 2.612619, ppl: 13.634716 +epoch: 1, batch: 38195, sum loss: 4470.637207, avg loss: 2.594682, ppl: 13.392327 +epoch: 1, batch: 38196, sum loss: 3281.809570, avg loss: 2.340806, ppl: 10.389605 +epoch: 1, batch: 38197, sum loss: 4658.374023, avg loss: 2.728983, ppl: 15.317304 +epoch: 1, batch: 38198, sum loss: 4391.498047, avg loss: 2.712476, ppl: 15.066531 +epoch: 1, batch: 38199, sum loss: 4554.704102, avg loss: 2.560261, ppl: 12.939191 +epoch: 1, batch: 38200, sum loss: 3864.176758, avg loss: 2.538881, ppl: 12.665491 +epoch: 1, batch: 38201, sum loss: 4571.154785, avg loss: 2.842758, ppl: 17.163034 +epoch: 1, batch: 38202, sum loss: 3952.004150, avg loss: 2.534961, ppl: 12.615938 +epoch: 1, batch: 38203, sum loss: 4797.253906, avg loss: 2.757042, ppl: 15.753182 +epoch: 1, batch: 38204, sum loss: 4337.625977, avg loss: 2.551545, ppl: 12.826905 +epoch: 1, batch: 38205, sum loss: 4777.987305, avg loss: 2.648552, ppl: 14.133554 +epoch: 1, batch: 38206, sum loss: 3654.773193, avg loss: 2.503269, ppl: 12.222389 +epoch: 1, batch: 38207, sum loss: 3953.856445, avg loss: 2.288111, ppl: 9.856306 +epoch: 1, batch: 38208, sum loss: 4830.676758, avg loss: 2.833242, ppl: 17.000483 +epoch: 1, batch: 38209, sum loss: 4389.475586, avg loss: 2.628428, ppl: 13.851984 +epoch: 1, batch: 38210, sum loss: 4666.456543, avg loss: 2.626031, ppl: 13.818810 +epoch: 1, batch: 38211, sum loss: 4725.766113, avg loss: 2.804609, ppl: 16.520617 +epoch: 1, batch: 38212, sum loss: 4273.776367, avg loss: 2.560681, ppl: 12.944628 +epoch: 1, batch: 38213, sum loss: 3235.716309, avg loss: 2.361837, ppl: 10.610424 +epoch: 1, batch: 38214, sum loss: 4126.526367, avg loss: 2.749185, ppl: 15.629886 +epoch: 1, batch: 38215, sum loss: 4874.238770, avg loss: 2.735263, ppl: 15.413798 +epoch: 1, batch: 38216, sum loss: 5197.382812, avg loss: 2.760161, ppl: 15.802382 +epoch: 1, batch: 38217, sum loss: 4371.626953, avg loss: 2.646263, ppl: 14.101249 +epoch: 1, batch: 38218, sum loss: 4476.861816, avg loss: 2.625726, ppl: 13.814593 +epoch: 1, batch: 38219, sum loss: 3462.099365, avg loss: 2.359986, ppl: 10.590801 +epoch: 1, batch: 38220, sum loss: 4904.618652, avg loss: 2.684520, ppl: 14.651171 +epoch: 1, batch: 38221, sum loss: 4766.931152, avg loss: 2.702342, ppl: 14.914621 +epoch: 1, batch: 38222, sum loss: 3555.878174, avg loss: 2.342476, ppl: 10.406969 +epoch: 1, batch: 38223, sum loss: 3844.359863, avg loss: 2.556090, ppl: 12.885342 +epoch: 1, batch: 38224, sum loss: 4492.703613, avg loss: 2.601450, ppl: 13.483271 +epoch: 1, batch: 38225, sum loss: 3913.213867, avg loss: 2.544352, ppl: 12.734977 +epoch: 1, batch: 38226, sum loss: 4099.839844, avg loss: 2.613027, ppl: 13.640282 +epoch: 1, batch: 38227, sum loss: 4171.461426, avg loss: 2.462492, ppl: 11.734016 +epoch: 1, batch: 38228, sum loss: 4028.285645, avg loss: 2.273299, ppl: 9.711386 +epoch: 1, batch: 38229, sum loss: 4378.043945, avg loss: 2.620014, ppl: 13.735922 +epoch: 1, batch: 38230, sum loss: 4960.134766, avg loss: 2.641179, ppl: 14.029739 +epoch: 1, batch: 38231, sum loss: 3964.425781, avg loss: 2.241055, ppl: 9.403244 +epoch: 1, batch: 38232, sum loss: 4651.243164, avg loss: 2.519633, ppl: 12.424040 +epoch: 1, batch: 38233, sum loss: 4712.453613, avg loss: 2.741393, ppl: 15.508568 +epoch: 1, batch: 38234, sum loss: 4321.298828, avg loss: 2.752420, ppl: 15.680529 +epoch: 1, batch: 38235, sum loss: 3791.249512, avg loss: 2.587883, ppl: 13.301589 +epoch: 1, batch: 38236, sum loss: 4759.828125, avg loss: 2.639949, ppl: 14.012490 +epoch: 1, batch: 38237, sum loss: 5140.301758, avg loss: 2.876498, ppl: 17.751997 +epoch: 1, batch: 38238, sum loss: 4339.490723, avg loss: 2.509827, ppl: 12.302800 +epoch: 1, batch: 38239, sum loss: 4616.549805, avg loss: 2.600873, ppl: 13.475497 +epoch: 1, batch: 38240, sum loss: 5189.685547, avg loss: 2.843663, ppl: 17.178579 +epoch: 1, batch: 38241, sum loss: 3899.669189, avg loss: 2.359146, ppl: 10.581914 +epoch: 1, batch: 38242, sum loss: 4715.203613, avg loss: 2.713006, ppl: 15.074514 +epoch: 1, batch: 38243, sum loss: 4010.792480, avg loss: 2.503616, ppl: 12.226621 +epoch: 1, batch: 38244, sum loss: 4886.218262, avg loss: 2.854099, ppl: 17.358795 +epoch: 1, batch: 38245, sum loss: 4652.233887, avg loss: 2.342515, ppl: 10.407373 +epoch: 1, batch: 38246, sum loss: 4025.647949, avg loss: 2.291205, ppl: 9.886848 +epoch: 1, batch: 38247, sum loss: 4504.235840, avg loss: 2.378160, ppl: 10.785046 +epoch: 1, batch: 38248, sum loss: 4772.386719, avg loss: 2.530428, ppl: 12.558879 +epoch: 1, batch: 38249, sum loss: 4554.767578, avg loss: 2.551691, ppl: 12.828773 +epoch: 1, batch: 38250, sum loss: 4448.018555, avg loss: 2.467010, ppl: 11.787148 +epoch: 1, batch: 38251, sum loss: 4450.184082, avg loss: 2.596374, ppl: 13.415001 +epoch: 1, batch: 38252, sum loss: 4355.677734, avg loss: 2.670557, ppl: 14.448008 +epoch: 1, batch: 38253, sum loss: 4094.551758, avg loss: 2.692013, ppl: 14.761361 +epoch: 1, batch: 38254, sum loss: 4232.308105, avg loss: 2.723493, ppl: 15.233441 +epoch: 1, batch: 38255, sum loss: 4789.562500, avg loss: 2.786249, ppl: 16.220070 +epoch: 1, batch: 38256, sum loss: 4697.488281, avg loss: 2.809503, ppl: 16.601662 +epoch: 1, batch: 38257, sum loss: 5446.989258, avg loss: 2.616229, ppl: 13.684028 +epoch: 1, batch: 38258, sum loss: 3850.714111, avg loss: 2.471575, ppl: 11.841085 +epoch: 1, batch: 38259, sum loss: 5073.865723, avg loss: 2.825092, ppl: 16.862501 +epoch: 1, batch: 38260, sum loss: 3425.133545, avg loss: 2.487388, ppl: 12.029815 +epoch: 1, batch: 38261, sum loss: 4927.960938, avg loss: 2.660886, ppl: 14.308962 +epoch: 1, batch: 38262, sum loss: 4134.784668, avg loss: 2.669325, ppl: 14.430227 +epoch: 1, batch: 38263, sum loss: 4613.452148, avg loss: 2.693200, ppl: 14.778898 +epoch: 1, batch: 38264, sum loss: 4783.705078, avg loss: 2.704186, ppl: 14.942152 +epoch: 1, batch: 38265, sum loss: 4562.609375, avg loss: 2.773623, ppl: 16.016550 +epoch: 1, batch: 38266, sum loss: 4700.310547, avg loss: 2.601168, ppl: 13.479472 +epoch: 1, batch: 38267, sum loss: 4591.658691, avg loss: 2.629816, ppl: 13.871218 +epoch: 1, batch: 38268, sum loss: 4239.004883, avg loss: 2.433413, ppl: 11.397711 +epoch: 1, batch: 38269, sum loss: 3925.007324, avg loss: 2.548706, ppl: 12.790543 +epoch: 1, batch: 38270, sum loss: 4418.546875, avg loss: 2.568923, ppl: 13.051754 +epoch: 1, batch: 38271, sum loss: 3764.351318, avg loss: 2.501230, ppl: 12.197491 +epoch: 1, batch: 38272, sum loss: 4628.976562, avg loss: 2.666461, ppl: 14.388960 +epoch: 1, batch: 38273, sum loss: 4829.788086, avg loss: 2.653730, ppl: 14.206930 +epoch: 1, batch: 38274, sum loss: 4695.285156, avg loss: 2.818298, ppl: 16.748327 +epoch: 1, batch: 38275, sum loss: 4719.831055, avg loss: 2.587627, ppl: 13.298174 +epoch: 1, batch: 38276, sum loss: 4032.678711, avg loss: 2.692042, ppl: 14.761784 +epoch: 1, batch: 38277, sum loss: 4372.474609, avg loss: 2.808269, ppl: 16.581192 +epoch: 1, batch: 38278, sum loss: 4921.347656, avg loss: 2.747821, ppl: 15.608585 +epoch: 1, batch: 38279, sum loss: 4413.527344, avg loss: 2.411764, ppl: 11.153615 +epoch: 1, batch: 38280, sum loss: 4549.764160, avg loss: 2.709806, ppl: 15.026360 +epoch: 1, batch: 38281, sum loss: 3918.976562, avg loss: 2.396928, ppl: 10.989361 +epoch: 1, batch: 38282, sum loss: 4083.282715, avg loss: 2.491326, ppl: 12.077275 +epoch: 1, batch: 38283, sum loss: 4633.588867, avg loss: 2.468614, ppl: 11.806076 +epoch: 1, batch: 38284, sum loss: 3473.242432, avg loss: 2.625278, ppl: 13.808409 +epoch: 1, batch: 38285, sum loss: 5024.765137, avg loss: 2.641833, ppl: 14.038908 +epoch: 1, batch: 38286, sum loss: 3419.741455, avg loss: 2.193548, ppl: 8.966973 +epoch: 1, batch: 38287, sum loss: 5318.678711, avg loss: 2.762950, ppl: 15.846520 +epoch: 1, batch: 38288, sum loss: 4750.100098, avg loss: 2.794177, ppl: 16.349161 +epoch: 1, batch: 38289, sum loss: 4614.576172, avg loss: 2.509286, ppl: 12.296144 +epoch: 1, batch: 38290, sum loss: 4665.692871, avg loss: 2.860633, ppl: 17.472589 +epoch: 1, batch: 38291, sum loss: 5284.102539, avg loss: 2.601725, ppl: 13.486978 +epoch: 1, batch: 38292, sum loss: 5789.565430, avg loss: 2.856224, ppl: 17.395710 +epoch: 1, batch: 38293, sum loss: 3927.690430, avg loss: 2.482737, ppl: 11.973993 +epoch: 1, batch: 38294, sum loss: 4082.236328, avg loss: 2.537126, ppl: 12.643286 +epoch: 1, batch: 38295, sum loss: 4365.028809, avg loss: 2.643870, ppl: 14.067538 +epoch: 1, batch: 38296, sum loss: 4684.668945, avg loss: 2.815306, ppl: 16.698284 +epoch: 1, batch: 38297, sum loss: 3910.488281, avg loss: 2.608732, ppl: 13.581812 +epoch: 1, batch: 38298, sum loss: 4579.044434, avg loss: 2.711098, ppl: 15.045786 +epoch: 1, batch: 38299, sum loss: 4558.988770, avg loss: 2.726668, ppl: 15.281885 +epoch: 1, batch: 38300, sum loss: 3891.801758, avg loss: 2.421781, ppl: 11.265904 +epoch: 1, batch: 38301, sum loss: 4419.657227, avg loss: 2.701502, ppl: 14.902100 +epoch: 1, batch: 38302, sum loss: 3963.477539, avg loss: 2.410874, ppl: 11.143701 +epoch: 1, batch: 38303, sum loss: 3782.447998, avg loss: 2.467350, ppl: 11.791162 +epoch: 1, batch: 38304, sum loss: 4603.993652, avg loss: 2.652070, ppl: 14.183369 +epoch: 1, batch: 38305, sum loss: 4659.084473, avg loss: 2.558531, ppl: 12.916826 +epoch: 1, batch: 38306, sum loss: 4407.325684, avg loss: 2.494242, ppl: 12.112551 +epoch: 1, batch: 38307, sum loss: 4831.213379, avg loss: 2.686993, ppl: 14.687443 +epoch: 1, batch: 38308, sum loss: 3712.544678, avg loss: 2.389025, ppl: 10.902858 +epoch: 1, batch: 38309, sum loss: 4498.023926, avg loss: 2.639685, ppl: 14.008796 +epoch: 1, batch: 38310, sum loss: 4211.956055, avg loss: 2.329622, ppl: 10.274055 +epoch: 1, batch: 38311, sum loss: 4454.380859, avg loss: 2.607951, ppl: 13.571218 +epoch: 1, batch: 38312, sum loss: 5810.963379, avg loss: 2.800464, ppl: 16.452282 +epoch: 1, batch: 38313, sum loss: 4614.651367, avg loss: 2.595417, ppl: 13.402175 +epoch: 1, batch: 38314, sum loss: 5871.112305, avg loss: 2.907931, ppl: 18.318855 +epoch: 1, batch: 38315, sum loss: 4435.096680, avg loss: 2.761580, ppl: 15.824819 +epoch: 1, batch: 38316, sum loss: 4821.263184, avg loss: 2.578216, ppl: 13.173610 +epoch: 1, batch: 38317, sum loss: 4265.227051, avg loss: 2.718436, ppl: 15.156606 +epoch: 1, batch: 38318, sum loss: 3886.782715, avg loss: 2.432280, ppl: 11.384805 +epoch: 1, batch: 38319, sum loss: 4796.251953, avg loss: 2.623770, ppl: 13.787608 +epoch: 1, batch: 38320, sum loss: 3910.521484, avg loss: 2.629806, ppl: 13.871076 +epoch: 1, batch: 38321, sum loss: 5036.779785, avg loss: 2.657931, ppl: 14.266746 +epoch: 1, batch: 38322, sum loss: 4617.132812, avg loss: 2.751569, ppl: 15.667195 +epoch: 1, batch: 38323, sum loss: 3344.520752, avg loss: 2.298640, ppl: 9.960622 +epoch: 1, batch: 38324, sum loss: 4474.380371, avg loss: 2.571483, ppl: 13.085214 +epoch: 1, batch: 38325, sum loss: 4464.012695, avg loss: 2.661904, ppl: 14.323533 +epoch: 1, batch: 38326, sum loss: 4090.933594, avg loss: 2.491433, ppl: 12.078577 +epoch: 1, batch: 38327, sum loss: 4971.698730, avg loss: 2.647337, ppl: 14.116396 +epoch: 1, batch: 38328, sum loss: 4497.377930, avg loss: 2.490242, ppl: 12.064201 +epoch: 1, batch: 38329, sum loss: 4674.071289, avg loss: 2.583787, ppl: 13.247216 +epoch: 1, batch: 38330, sum loss: 4732.169922, avg loss: 2.556548, ppl: 12.891238 +epoch: 1, batch: 38331, sum loss: 4198.220215, avg loss: 2.612458, ppl: 13.632522 +epoch: 1, batch: 38332, sum loss: 4609.429688, avg loss: 2.492931, ppl: 12.096682 +epoch: 1, batch: 38333, sum loss: 3980.207031, avg loss: 2.540017, ppl: 12.679892 +epoch: 1, batch: 38334, sum loss: 4292.490234, avg loss: 2.510228, ppl: 12.307738 +epoch: 1, batch: 38335, sum loss: 4187.378906, avg loss: 2.677352, ppl: 14.546526 +epoch: 1, batch: 38336, sum loss: 4116.855469, avg loss: 2.323282, ppl: 10.209126 +epoch: 1, batch: 38337, sum loss: 5339.576172, avg loss: 2.829664, ppl: 16.939768 +epoch: 1, batch: 38338, sum loss: 5378.345703, avg loss: 2.804143, ppl: 16.512913 +epoch: 1, batch: 38339, sum loss: 3620.200928, avg loss: 2.501867, ppl: 12.205255 +epoch: 1, batch: 38340, sum loss: 4587.757324, avg loss: 2.621576, ppl: 13.757382 +epoch: 1, batch: 38341, sum loss: 3974.725830, avg loss: 2.604670, ppl: 13.526758 +epoch: 1, batch: 38342, sum loss: 3993.646240, avg loss: 2.418926, ppl: 11.233785 +epoch: 1, batch: 38343, sum loss: 4247.089355, avg loss: 2.881336, ppl: 17.838093 +epoch: 1, batch: 38344, sum loss: 5794.858398, avg loss: 2.955053, ppl: 19.202738 +epoch: 1, batch: 38345, sum loss: 3776.161377, avg loss: 2.423724, ppl: 11.287813 +epoch: 1, batch: 38346, sum loss: 4033.131348, avg loss: 2.523862, ppl: 12.476687 +epoch: 1, batch: 38347, sum loss: 4377.082520, avg loss: 2.686975, ppl: 14.687180 +epoch: 1, batch: 38348, sum loss: 5246.318848, avg loss: 2.929268, ppl: 18.713924 +epoch: 1, batch: 38349, sum loss: 3797.772461, avg loss: 2.361799, ppl: 10.610020 +epoch: 1, batch: 38350, sum loss: 4860.665039, avg loss: 2.695876, ppl: 14.818500 +epoch: 1, batch: 38351, sum loss: 5848.541992, avg loss: 2.982429, ppl: 19.735687 +epoch: 1, batch: 38352, sum loss: 5066.459473, avg loss: 3.081788, ppl: 21.797342 +epoch: 1, batch: 38353, sum loss: 5387.248047, avg loss: 2.942244, ppl: 18.958338 +epoch: 1, batch: 38354, sum loss: 5317.112793, avg loss: 2.829757, ppl: 16.941339 +epoch: 1, batch: 38355, sum loss: 4814.872559, avg loss: 2.752929, ppl: 15.688512 +epoch: 1, batch: 38356, sum loss: 4295.249023, avg loss: 2.417135, ppl: 11.213686 +epoch: 1, batch: 38357, sum loss: 4589.589844, avg loss: 2.579870, ppl: 13.195429 +epoch: 1, batch: 38358, sum loss: 4556.073730, avg loss: 2.810656, ppl: 16.620819 +epoch: 1, batch: 38359, sum loss: 4684.941406, avg loss: 2.609995, ppl: 13.598988 +epoch: 1, batch: 38360, sum loss: 4066.936035, avg loss: 2.548206, ppl: 12.784143 +epoch: 1, batch: 38361, sum loss: 4127.675293, avg loss: 2.383184, ppl: 10.839365 +epoch: 1, batch: 38362, sum loss: 3989.413330, avg loss: 2.355026, ppl: 10.538398 +epoch: 1, batch: 38363, sum loss: 4186.982910, avg loss: 2.492252, ppl: 12.088468 +epoch: 1, batch: 38364, sum loss: 4948.958984, avg loss: 2.781877, ppl: 16.149302 +epoch: 1, batch: 38365, sum loss: 4633.465332, avg loss: 2.777857, ppl: 16.084517 +epoch: 1, batch: 38366, sum loss: 4772.528809, avg loss: 2.936941, ppl: 18.858065 +epoch: 1, batch: 38367, sum loss: 4131.755859, avg loss: 2.468194, ppl: 11.801109 +epoch: 1, batch: 38368, sum loss: 4739.100586, avg loss: 2.781162, ppl: 16.137766 +epoch: 1, batch: 38369, sum loss: 4370.848633, avg loss: 2.454154, ppl: 11.636588 +epoch: 1, batch: 38370, sum loss: 4773.304688, avg loss: 2.722935, ppl: 15.224941 +epoch: 1, batch: 38371, sum loss: 5066.580566, avg loss: 2.716665, ppl: 15.129785 +epoch: 1, batch: 38372, sum loss: 4156.317871, avg loss: 2.396954, ppl: 10.989649 +epoch: 1, batch: 38373, sum loss: 3921.026855, avg loss: 2.453709, ppl: 11.631407 +epoch: 1, batch: 38374, sum loss: 5738.141113, avg loss: 2.871943, ppl: 17.671312 +epoch: 1, batch: 38375, sum loss: 3885.947266, avg loss: 2.576888, ppl: 13.156134 +epoch: 1, batch: 38376, sum loss: 3883.833984, avg loss: 2.572076, ppl: 13.092972 +epoch: 1, batch: 38377, sum loss: 4773.341309, avg loss: 2.598444, ppl: 13.442804 +epoch: 1, batch: 38378, sum loss: 4914.564453, avg loss: 2.603053, ppl: 13.504907 +epoch: 1, batch: 38379, sum loss: 3799.027344, avg loss: 2.809932, ppl: 16.608780 +epoch: 1, batch: 38380, sum loss: 4467.541016, avg loss: 2.771427, ppl: 15.981429 +epoch: 1, batch: 38381, sum loss: 3424.683105, avg loss: 2.303082, ppl: 10.004971 +epoch: 1, batch: 38382, sum loss: 4390.625977, avg loss: 2.419078, ppl: 11.235494 +epoch: 1, batch: 38383, sum loss: 4692.705566, avg loss: 2.384505, ppl: 10.853689 +epoch: 1, batch: 38384, sum loss: 3914.689453, avg loss: 2.442102, ppl: 11.497182 +epoch: 1, batch: 38385, sum loss: 4389.361328, avg loss: 2.459026, ppl: 11.693418 +epoch: 1, batch: 38386, sum loss: 3977.254150, avg loss: 2.640939, ppl: 14.026368 +epoch: 1, batch: 38387, sum loss: 4859.092773, avg loss: 2.633655, ppl: 13.924565 +epoch: 1, batch: 38388, sum loss: 4490.217773, avg loss: 2.549811, ppl: 12.804688 +epoch: 1, batch: 38389, sum loss: 4628.945312, avg loss: 2.526717, ppl: 12.512360 +epoch: 1, batch: 38390, sum loss: 4030.289062, avg loss: 2.413347, ppl: 11.171287 +epoch: 1, batch: 38391, sum loss: 4911.795898, avg loss: 2.653590, ppl: 14.204949 +epoch: 1, batch: 38392, sum loss: 4544.548828, avg loss: 2.687492, ppl: 14.694777 +epoch: 1, batch: 38393, sum loss: 3727.369873, avg loss: 2.445781, ppl: 11.539556 +epoch: 1, batch: 38394, sum loss: 3974.816650, avg loss: 2.453591, ppl: 11.630031 +epoch: 1, batch: 38395, sum loss: 4427.499023, avg loss: 2.512769, ppl: 12.339052 +epoch: 1, batch: 38396, sum loss: 3779.160889, avg loss: 2.220423, ppl: 9.211230 +epoch: 1, batch: 38397, sum loss: 3458.771484, avg loss: 2.260635, ppl: 9.589178 +epoch: 1, batch: 38398, sum loss: 5004.132812, avg loss: 2.683181, ppl: 14.631563 +epoch: 1, batch: 38399, sum loss: 4670.315918, avg loss: 2.670278, ppl: 14.443982 +epoch: 1, batch: 38400, sum loss: 5185.987305, avg loss: 2.835423, ppl: 17.037598 +epoch: 1, batch: 38401, sum loss: 3722.858643, avg loss: 2.369738, ppl: 10.694593 +epoch: 1, batch: 38402, sum loss: 4317.737793, avg loss: 2.683491, ppl: 14.636106 +epoch: 1, batch: 38403, sum loss: 5432.474121, avg loss: 2.632013, ppl: 13.901720 +epoch: 1, batch: 38404, sum loss: 3589.611084, avg loss: 2.167639, ppl: 8.737635 +epoch: 1, batch: 38405, sum loss: 4320.498047, avg loss: 2.663686, ppl: 14.349076 +epoch: 1, batch: 38406, sum loss: 5291.624023, avg loss: 2.840378, ppl: 17.122232 +epoch: 1, batch: 38407, sum loss: 5505.579590, avg loss: 2.813275, ppl: 16.664406 +epoch: 1, batch: 38408, sum loss: 4824.165039, avg loss: 2.537699, ppl: 12.650522 +epoch: 1, batch: 38409, sum loss: 4086.623535, avg loss: 2.491844, ppl: 12.083534 +epoch: 1, batch: 38410, sum loss: 4586.065918, avg loss: 2.607201, ppl: 13.561035 +epoch: 1, batch: 38411, sum loss: 4088.330566, avg loss: 2.265003, ppl: 9.631153 +epoch: 1, batch: 38412, sum loss: 5122.148926, avg loss: 2.618685, ppl: 13.717680 +epoch: 1, batch: 38413, sum loss: 4235.837891, avg loss: 2.559419, ppl: 12.928300 +epoch: 1, batch: 38414, sum loss: 3981.093994, avg loss: 2.652295, ppl: 14.186554 +epoch: 1, batch: 38415, sum loss: 4160.032227, avg loss: 2.314987, ppl: 10.124796 +epoch: 1, batch: 38416, sum loss: 4507.288086, avg loss: 2.803040, ppl: 16.494711 +epoch: 1, batch: 38417, sum loss: 4266.462402, avg loss: 2.481945, ppl: 11.964507 +epoch: 1, batch: 38418, sum loss: 4176.426758, avg loss: 2.586023, ppl: 13.276862 +epoch: 1, batch: 38419, sum loss: 4574.150391, avg loss: 2.593056, ppl: 13.370566 +epoch: 1, batch: 38420, sum loss: 3441.590088, avg loss: 2.396651, ppl: 10.986317 +epoch: 1, batch: 38421, sum loss: 4216.959473, avg loss: 2.405567, ppl: 11.084718 +epoch: 1, batch: 38422, sum loss: 3953.857178, avg loss: 2.433143, ppl: 11.394638 +epoch: 1, batch: 38423, sum loss: 4029.651367, avg loss: 2.359281, ppl: 10.583335 +epoch: 1, batch: 38424, sum loss: 4415.857910, avg loss: 2.570348, ppl: 13.070373 +epoch: 1, batch: 38425, sum loss: 3937.339355, avg loss: 2.298505, ppl: 9.959283 +epoch: 1, batch: 38426, sum loss: 5194.666992, avg loss: 2.635549, ppl: 13.950967 +epoch: 1, batch: 38427, sum loss: 4509.328125, avg loss: 2.761377, ppl: 15.821609 +epoch: 1, batch: 38428, sum loss: 5240.102539, avg loss: 2.757949, ppl: 15.767468 +epoch: 1, batch: 38429, sum loss: 4749.268066, avg loss: 2.800276, ppl: 16.449186 +epoch: 1, batch: 38430, sum loss: 6006.399414, avg loss: 3.004702, ppl: 20.180202 +epoch: 1, batch: 38431, sum loss: 4202.720703, avg loss: 2.581524, ppl: 13.217261 +epoch: 1, batch: 38432, sum loss: 5225.799805, avg loss: 2.791560, ppl: 16.306437 +epoch: 1, batch: 38433, sum loss: 5472.665527, avg loss: 2.685312, ppl: 14.662772 +epoch: 1, batch: 38434, sum loss: 4550.403320, avg loss: 2.496107, ppl: 12.135164 +epoch: 1, batch: 38435, sum loss: 4526.572266, avg loss: 2.707280, ppl: 14.988454 +epoch: 1, batch: 38436, sum loss: 4903.136719, avg loss: 2.906424, ppl: 18.291269 +epoch: 1, batch: 38437, sum loss: 5018.800781, avg loss: 2.695382, ppl: 14.811173 +epoch: 1, batch: 38438, sum loss: 4787.225098, avg loss: 2.997636, ppl: 20.038116 +epoch: 1, batch: 38439, sum loss: 4119.338379, avg loss: 2.370160, ppl: 10.699105 +epoch: 1, batch: 38440, sum loss: 4539.553711, avg loss: 2.488791, ppl: 12.046697 +epoch: 1, batch: 38441, sum loss: 5001.665039, avg loss: 2.871220, ppl: 17.658546 +epoch: 1, batch: 38442, sum loss: 4553.668457, avg loss: 2.545371, ppl: 12.747954 +epoch: 1, batch: 38443, sum loss: 4536.223633, avg loss: 2.522927, ppl: 12.465032 +epoch: 1, batch: 38444, sum loss: 3967.307861, avg loss: 2.572833, ppl: 13.102887 +epoch: 1, batch: 38445, sum loss: 4693.555664, avg loss: 2.555012, ppl: 12.871448 +epoch: 1, batch: 38446, sum loss: 4918.594238, avg loss: 2.692170, ppl: 14.763677 +epoch: 1, batch: 38447, sum loss: 3685.129883, avg loss: 2.658824, ppl: 14.279486 +epoch: 1, batch: 38448, sum loss: 4986.931641, avg loss: 2.749135, ppl: 15.629114 +epoch: 1, batch: 38449, sum loss: 3310.989502, avg loss: 2.349886, ppl: 10.484376 +epoch: 1, batch: 38450, sum loss: 3692.644531, avg loss: 2.604122, ppl: 13.519346 +epoch: 1, batch: 38451, sum loss: 4653.121094, avg loss: 2.556660, ppl: 12.892686 +epoch: 1, batch: 38452, sum loss: 3955.437988, avg loss: 2.698116, ppl: 14.851726 +epoch: 1, batch: 38453, sum loss: 4060.415771, avg loss: 2.659081, ppl: 14.283150 +epoch: 1, batch: 38454, sum loss: 4080.401855, avg loss: 2.612293, ppl: 13.630273 +epoch: 1, batch: 38455, sum loss: 3825.768066, avg loss: 2.406143, ppl: 11.091105 +epoch: 1, batch: 38456, sum loss: 3610.160889, avg loss: 2.544159, ppl: 12.732512 +epoch: 1, batch: 38457, sum loss: 3603.958008, avg loss: 2.619155, ppl: 13.724128 +epoch: 1, batch: 38458, sum loss: 4565.255859, avg loss: 2.607228, ppl: 13.561404 +epoch: 1, batch: 38459, sum loss: 4567.925293, avg loss: 2.824939, ppl: 16.859909 +epoch: 1, batch: 38460, sum loss: 4357.457031, avg loss: 2.524598, ppl: 12.485880 +epoch: 1, batch: 38461, sum loss: 4552.229492, avg loss: 2.690443, ppl: 14.738204 +epoch: 1, batch: 38462, sum loss: 4039.714844, avg loss: 2.698540, ppl: 14.858027 +epoch: 1, batch: 38463, sum loss: 3958.071045, avg loss: 2.519460, ppl: 12.421884 +epoch: 1, batch: 38464, sum loss: 5940.910645, avg loss: 2.838467, ppl: 17.089540 +epoch: 1, batch: 38465, sum loss: 4647.142578, avg loss: 2.597620, ppl: 13.431736 +epoch: 1, batch: 38466, sum loss: 4999.858398, avg loss: 2.819999, ppl: 16.776838 +epoch: 1, batch: 38467, sum loss: 4862.649902, avg loss: 2.823838, ppl: 16.841372 +epoch: 1, batch: 38468, sum loss: 4126.289062, avg loss: 2.575711, ppl: 13.140657 +epoch: 1, batch: 38469, sum loss: 4506.058594, avg loss: 2.489535, ppl: 12.055673 +epoch: 1, batch: 38470, sum loss: 4196.462402, avg loss: 2.484584, ppl: 11.996130 +epoch: 1, batch: 38471, sum loss: 3361.763184, avg loss: 2.213142, ppl: 9.144404 +epoch: 1, batch: 38472, sum loss: 3406.153809, avg loss: 2.164011, ppl: 8.705990 +epoch: 1, batch: 38473, sum loss: 3471.344482, avg loss: 2.410656, ppl: 11.141267 +epoch: 1, batch: 38474, sum loss: 4646.785645, avg loss: 2.553179, ppl: 12.847882 +epoch: 1, batch: 38475, sum loss: 5036.468750, avg loss: 2.810529, ppl: 16.618715 +epoch: 1, batch: 38476, sum loss: 4693.016113, avg loss: 2.640977, ppl: 14.026900 +epoch: 1, batch: 38477, sum loss: 3939.420166, avg loss: 2.425751, ppl: 11.310723 +epoch: 1, batch: 38478, sum loss: 5512.527832, avg loss: 2.872604, ppl: 17.683012 +epoch: 1, batch: 38479, sum loss: 4903.725586, avg loss: 2.650663, ppl: 14.163421 +epoch: 1, batch: 38480, sum loss: 2876.276367, avg loss: 2.055952, ppl: 7.814272 +epoch: 1, batch: 38481, sum loss: 4797.269043, avg loss: 2.739731, ppl: 15.482821 +epoch: 1, batch: 38482, sum loss: 4749.310547, avg loss: 2.561656, ppl: 12.957256 +epoch: 1, batch: 38483, sum loss: 3902.230713, avg loss: 2.426760, ppl: 11.322144 +epoch: 1, batch: 38484, sum loss: 3802.382324, avg loss: 2.348599, ppl: 10.470894 +epoch: 1, batch: 38485, sum loss: 5037.623047, avg loss: 2.561069, ppl: 12.949654 +epoch: 1, batch: 38486, sum loss: 3821.858643, avg loss: 2.654068, ppl: 14.211741 +epoch: 1, batch: 38487, sum loss: 3888.134521, avg loss: 2.476519, ppl: 11.899768 +epoch: 1, batch: 38488, sum loss: 4166.477539, avg loss: 2.538987, ppl: 12.666832 +epoch: 1, batch: 38489, sum loss: 4618.450195, avg loss: 2.585918, ppl: 13.275476 +epoch: 1, batch: 38490, sum loss: 4355.080078, avg loss: 2.485776, ppl: 12.010442 +epoch: 1, batch: 38491, sum loss: 4195.399414, avg loss: 2.401488, ppl: 11.039589 +epoch: 1, batch: 38492, sum loss: 3584.880615, avg loss: 2.279009, ppl: 9.766993 +epoch: 1, batch: 38493, sum loss: 4767.286621, avg loss: 2.449788, ppl: 11.585886 +epoch: 1, batch: 38494, sum loss: 3467.810547, avg loss: 2.201784, ppl: 9.041132 +epoch: 1, batch: 38495, sum loss: 3959.559082, avg loss: 2.517202, ppl: 12.393871 +epoch: 1, batch: 38496, sum loss: 4596.484863, avg loss: 2.700637, ppl: 14.889218 +epoch: 1, batch: 38497, sum loss: 3964.902832, avg loss: 2.458092, ppl: 11.682503 +epoch: 1, batch: 38498, sum loss: 4289.548340, avg loss: 2.517341, ppl: 12.395589 +epoch: 1, batch: 38499, sum loss: 5663.687012, avg loss: 2.826191, ppl: 16.881041 +epoch: 1, batch: 38500, sum loss: 4740.088379, avg loss: 2.522666, ppl: 12.461769 +epoch: 1, batch: 38501, sum loss: 5988.056152, avg loss: 2.923856, ppl: 18.612913 +epoch: 1, batch: 38502, sum loss: 4177.338867, avg loss: 2.536332, ppl: 12.633245 +epoch: 1, batch: 38503, sum loss: 3725.926025, avg loss: 2.322897, ppl: 10.205194 +epoch: 1, batch: 38504, sum loss: 4621.645020, avg loss: 2.785802, ppl: 16.212811 +epoch: 1, batch: 38505, sum loss: 4578.696777, avg loss: 2.565096, ppl: 13.001911 +epoch: 1, batch: 38506, sum loss: 5104.862793, avg loss: 2.589986, ppl: 13.329586 +epoch: 1, batch: 38507, sum loss: 4004.764404, avg loss: 2.526665, ppl: 12.511713 +epoch: 1, batch: 38508, sum loss: 3790.852295, avg loss: 2.474447, ppl: 11.875135 +epoch: 1, batch: 38509, sum loss: 4077.619141, avg loss: 2.593905, ppl: 13.381929 +epoch: 1, batch: 38510, sum loss: 4296.716797, avg loss: 2.819368, ppl: 16.766249 +epoch: 1, batch: 38511, sum loss: 5378.427734, avg loss: 2.829262, ppl: 16.932964 +epoch: 1, batch: 38512, sum loss: 4340.918457, avg loss: 2.605593, ppl: 13.539254 +epoch: 1, batch: 38513, sum loss: 4925.824707, avg loss: 2.532558, ppl: 12.585655 +epoch: 1, batch: 38514, sum loss: 5935.690430, avg loss: 2.834618, ppl: 17.023897 +epoch: 1, batch: 38515, sum loss: 4871.808105, avg loss: 2.533442, ppl: 12.596784 +epoch: 1, batch: 38516, sum loss: 3977.596436, avg loss: 2.557940, ppl: 12.909197 +epoch: 1, batch: 38517, sum loss: 3385.303711, avg loss: 2.372322, ppl: 10.722264 +epoch: 1, batch: 38518, sum loss: 4378.645996, avg loss: 2.589383, ppl: 13.321545 +epoch: 1, batch: 38519, sum loss: 4254.801758, avg loss: 2.626421, ppl: 13.824204 +epoch: 1, batch: 38520, sum loss: 4954.486328, avg loss: 2.957902, ppl: 19.257536 +epoch: 1, batch: 38521, sum loss: 4642.898438, avg loss: 2.552446, ppl: 12.838464 +epoch: 1, batch: 38522, sum loss: 4839.426270, avg loss: 2.657565, ppl: 14.261525 +epoch: 1, batch: 38523, sum loss: 4109.279297, avg loss: 2.689319, ppl: 14.721645 +epoch: 1, batch: 38524, sum loss: 4957.610352, avg loss: 2.845930, ppl: 17.217566 +epoch: 1, batch: 38525, sum loss: 4962.988281, avg loss: 2.472839, ppl: 11.856063 +epoch: 1, batch: 38526, sum loss: 4010.090088, avg loss: 2.372834, ppl: 10.727757 +epoch: 1, batch: 38527, sum loss: 3709.276611, avg loss: 2.544086, ppl: 12.731580 +epoch: 1, batch: 38528, sum loss: 4808.726562, avg loss: 2.668550, ppl: 14.419044 +epoch: 1, batch: 38529, sum loss: 4534.058105, avg loss: 2.570328, ppl: 13.070107 +epoch: 1, batch: 38530, sum loss: 4117.041504, avg loss: 2.444799, ppl: 11.528234 +epoch: 1, batch: 38531, sum loss: 3970.980469, avg loss: 2.333126, ppl: 10.310122 +epoch: 1, batch: 38532, sum loss: 3547.081055, avg loss: 2.512097, ppl: 12.330762 +epoch: 1, batch: 38533, sum loss: 4511.194824, avg loss: 2.369325, ppl: 10.690176 +epoch: 1, batch: 38534, sum loss: 4838.108398, avg loss: 2.699837, ppl: 14.877314 +epoch: 1, batch: 38535, sum loss: 4569.907715, avg loss: 2.529002, ppl: 12.540989 +epoch: 1, batch: 38536, sum loss: 4659.501465, avg loss: 2.837699, ppl: 17.076426 +epoch: 1, batch: 38537, sum loss: 4506.384277, avg loss: 2.613912, ppl: 13.652356 +epoch: 1, batch: 38538, sum loss: 4184.728027, avg loss: 2.604062, ppl: 13.518543 +epoch: 1, batch: 38539, sum loss: 4468.216797, avg loss: 2.535878, ppl: 12.627512 +epoch: 1, batch: 38540, sum loss: 4133.638672, avg loss: 2.514379, ppl: 12.358929 +epoch: 1, batch: 38541, sum loss: 4635.751953, avg loss: 2.780895, ppl: 16.133453 +epoch: 1, batch: 38542, sum loss: 4648.653320, avg loss: 2.785293, ppl: 16.204559 +epoch: 1, batch: 38543, sum loss: 4664.520508, avg loss: 2.479809, ppl: 11.938982 +epoch: 1, batch: 38544, sum loss: 4485.546875, avg loss: 2.736758, ppl: 15.436861 +epoch: 1, batch: 38545, sum loss: 5695.653320, avg loss: 2.744893, ppl: 15.562950 +epoch: 1, batch: 38546, sum loss: 4207.272461, avg loss: 2.598686, ppl: 13.446058 +epoch: 1, batch: 38547, sum loss: 4759.391113, avg loss: 2.714998, ppl: 15.104576 +epoch: 1, batch: 38548, sum loss: 4275.781250, avg loss: 2.472979, ppl: 11.857722 +epoch: 1, batch: 38549, sum loss: 5093.947754, avg loss: 2.712432, ppl: 15.065873 +epoch: 1, batch: 38550, sum loss: 4246.117188, avg loss: 2.524445, ppl: 12.483969 +epoch: 1, batch: 38551, sum loss: 4736.045410, avg loss: 2.841059, ppl: 17.133900 +epoch: 1, batch: 38552, sum loss: 4973.648926, avg loss: 2.842085, ppl: 17.151491 +epoch: 1, batch: 38553, sum loss: 3524.710449, avg loss: 2.325007, ppl: 10.226751 +epoch: 1, batch: 38554, sum loss: 3836.967529, avg loss: 2.348205, ppl: 10.466768 +epoch: 1, batch: 38555, sum loss: 4533.988770, avg loss: 2.660792, ppl: 14.307611 +epoch: 1, batch: 38556, sum loss: 4606.816406, avg loss: 2.418276, ppl: 11.226492 +epoch: 1, batch: 38557, sum loss: 4043.825195, avg loss: 2.333425, ppl: 10.313202 +epoch: 1, batch: 38558, sum loss: 3682.407227, avg loss: 2.311618, ppl: 10.090736 +epoch: 1, batch: 38559, sum loss: 4867.137207, avg loss: 2.816630, ppl: 16.720413 +epoch: 1, batch: 38560, sum loss: 5088.994629, avg loss: 2.768767, ppl: 15.938974 +epoch: 1, batch: 38561, sum loss: 4637.349121, avg loss: 2.760327, ppl: 15.805008 +epoch: 1, batch: 38562, sum loss: 4014.355469, avg loss: 2.692391, ppl: 14.766948 +epoch: 1, batch: 38563, sum loss: 5069.625488, avg loss: 2.708133, ppl: 15.001249 +epoch: 1, batch: 38564, sum loss: 4206.541016, avg loss: 2.486135, ppl: 12.014752 +epoch: 1, batch: 38565, sum loss: 3598.007080, avg loss: 2.233400, ppl: 9.331538 +epoch: 1, batch: 38566, sum loss: 4205.617188, avg loss: 2.618690, ppl: 13.717749 +epoch: 1, batch: 38567, sum loss: 4414.692383, avg loss: 2.776536, ppl: 16.063282 +epoch: 1, batch: 38568, sum loss: 4606.990723, avg loss: 2.732497, ppl: 15.371228 +epoch: 1, batch: 38569, sum loss: 4775.576660, avg loss: 2.718029, ppl: 15.150428 +epoch: 1, batch: 38570, sum loss: 4220.637695, avg loss: 2.300075, ppl: 9.974931 +epoch: 1, batch: 38571, sum loss: 4121.427734, avg loss: 2.450314, ppl: 11.591984 +epoch: 1, batch: 38572, sum loss: 4320.465332, avg loss: 2.379111, ppl: 10.795300 +epoch: 1, batch: 38573, sum loss: 5374.132324, avg loss: 2.925494, ppl: 18.643436 +epoch: 1, batch: 38574, sum loss: 3888.200928, avg loss: 2.382476, ppl: 10.831690 +epoch: 1, batch: 38575, sum loss: 5257.558594, avg loss: 2.765681, ppl: 15.889851 +epoch: 1, batch: 38576, sum loss: 4588.369141, avg loss: 2.678558, ppl: 14.564072 +epoch: 1, batch: 38577, sum loss: 3932.931885, avg loss: 2.618463, ppl: 13.714632 +epoch: 1, batch: 38578, sum loss: 4290.488281, avg loss: 2.497374, ppl: 12.150542 +epoch: 1, batch: 38579, sum loss: 3937.303223, avg loss: 2.380474, ppl: 10.810021 +epoch: 1, batch: 38580, sum loss: 3742.245850, avg loss: 2.444315, ppl: 11.522654 +epoch: 1, batch: 38581, sum loss: 4239.010742, avg loss: 2.603815, ppl: 13.515198 +epoch: 1, batch: 38582, sum loss: 4700.874512, avg loss: 2.604362, ppl: 13.522602 +epoch: 1, batch: 38583, sum loss: 4507.220703, avg loss: 2.645083, ppl: 14.084610 +epoch: 1, batch: 38584, sum loss: 4290.770020, avg loss: 2.544941, ppl: 12.742473 +epoch: 1, batch: 38585, sum loss: 4088.692139, avg loss: 2.634467, ppl: 13.935878 +epoch: 1, batch: 38586, sum loss: 3436.609863, avg loss: 2.200134, ppl: 9.026225 +epoch: 1, batch: 38587, sum loss: 4386.647461, avg loss: 2.547414, ppl: 12.774032 +epoch: 1, batch: 38588, sum loss: 3832.997559, avg loss: 2.340047, ppl: 10.381725 +epoch: 1, batch: 38589, sum loss: 4585.208984, avg loss: 2.624619, ppl: 13.799313 +epoch: 1, batch: 38590, sum loss: 4541.849121, avg loss: 2.651401, ppl: 14.173876 +epoch: 1, batch: 38591, sum loss: 4572.954590, avg loss: 2.637229, ppl: 13.974423 +epoch: 1, batch: 38592, sum loss: 4731.682617, avg loss: 2.664236, ppl: 14.356975 +epoch: 1, batch: 38593, sum loss: 4246.897461, avg loss: 2.547629, ppl: 12.776772 +epoch: 1, batch: 38594, sum loss: 4752.647461, avg loss: 2.720462, ppl: 15.187339 +epoch: 1, batch: 38595, sum loss: 4698.181152, avg loss: 2.677026, ppl: 14.541785 +epoch: 1, batch: 38596, sum loss: 4193.435059, avg loss: 2.669278, ppl: 14.429543 +epoch: 1, batch: 38597, sum loss: 4874.641113, avg loss: 2.706630, ppl: 14.978716 +epoch: 1, batch: 38598, sum loss: 4804.596191, avg loss: 2.794995, ppl: 16.362549 +epoch: 1, batch: 38599, sum loss: 4724.014160, avg loss: 2.756134, ppl: 15.738883 +epoch: 1, batch: 38600, sum loss: 4943.715820, avg loss: 2.554892, ppl: 12.869907 +epoch: 1, batch: 38601, sum loss: 4036.661621, avg loss: 2.602619, ppl: 13.499042 +epoch: 1, batch: 38602, sum loss: 4059.259766, avg loss: 2.417665, ppl: 11.219631 +epoch: 1, batch: 38603, sum loss: 4504.150879, avg loss: 2.701950, ppl: 14.908776 +epoch: 1, batch: 38604, sum loss: 4572.268555, avg loss: 2.459531, ppl: 11.699324 +epoch: 1, batch: 38605, sum loss: 4246.273926, avg loss: 2.690921, ppl: 14.745255 +epoch: 1, batch: 38606, sum loss: 4071.275391, avg loss: 2.571873, ppl: 13.090322 +epoch: 1, batch: 38607, sum loss: 4156.449219, avg loss: 2.457983, ppl: 11.681227 +epoch: 1, batch: 38608, sum loss: 3979.351807, avg loss: 2.507468, ppl: 12.273816 +epoch: 1, batch: 38609, sum loss: 5197.650391, avg loss: 2.887584, ppl: 17.949886 +epoch: 1, batch: 38610, sum loss: 4341.374512, avg loss: 2.660156, ppl: 14.298520 +epoch: 1, batch: 38611, sum loss: 4908.452637, avg loss: 2.573913, ppl: 13.117056 +epoch: 1, batch: 38612, sum loss: 4311.742188, avg loss: 2.592749, ppl: 13.366470 +epoch: 1, batch: 38613, sum loss: 4465.836914, avg loss: 2.625418, ppl: 13.810351 +epoch: 1, batch: 38614, sum loss: 4365.695312, avg loss: 2.738830, ppl: 15.468877 +epoch: 1, batch: 38615, sum loss: 4749.675781, avg loss: 2.728131, ppl: 15.304254 +epoch: 1, batch: 38616, sum loss: 4161.158691, avg loss: 2.660587, ppl: 14.304688 +epoch: 1, batch: 38617, sum loss: 5494.465820, avg loss: 3.105973, ppl: 22.330936 +epoch: 1, batch: 38618, sum loss: 3883.914307, avg loss: 2.647521, ppl: 14.119001 +epoch: 1, batch: 38619, sum loss: 4091.701660, avg loss: 2.906038, ppl: 18.284218 +epoch: 1, batch: 38620, sum loss: 4606.714844, avg loss: 2.670559, ppl: 14.448050 +epoch: 1, batch: 38621, sum loss: 4468.858398, avg loss: 2.628740, ppl: 13.856304 +epoch: 1, batch: 38622, sum loss: 3941.907959, avg loss: 2.515576, ppl: 12.373738 +epoch: 1, batch: 38623, sum loss: 3869.979980, avg loss: 2.435481, ppl: 11.421314 +epoch: 1, batch: 38624, sum loss: 4182.262207, avg loss: 2.430135, ppl: 11.360413 +epoch: 1, batch: 38625, sum loss: 4621.359375, avg loss: 2.649862, ppl: 14.152082 +epoch: 1, batch: 38626, sum loss: 5487.393066, avg loss: 2.687264, ppl: 14.691424 +epoch: 1, batch: 38627, sum loss: 4730.080078, avg loss: 2.572094, ppl: 13.093209 +epoch: 1, batch: 38628, sum loss: 5341.114258, avg loss: 2.718124, ppl: 15.151876 +epoch: 1, batch: 38629, sum loss: 4233.443359, avg loss: 2.547198, ppl: 12.771273 +epoch: 1, batch: 38630, sum loss: 4840.544922, avg loss: 2.534317, ppl: 12.607814 +epoch: 1, batch: 38631, sum loss: 4529.188477, avg loss: 2.647100, ppl: 14.113058 +epoch: 1, batch: 38632, sum loss: 5324.627441, avg loss: 2.954843, ppl: 19.198713 +epoch: 1, batch: 38633, sum loss: 4340.027344, avg loss: 2.644745, ppl: 14.079859 +epoch: 1, batch: 38634, sum loss: 3875.356201, avg loss: 2.357273, ppl: 10.562105 +epoch: 1, batch: 38635, sum loss: 3825.203125, avg loss: 2.459938, ppl: 11.704083 +epoch: 1, batch: 38636, sum loss: 4891.604980, avg loss: 2.909937, ppl: 18.355650 +epoch: 1, batch: 38637, sum loss: 4158.088867, avg loss: 2.573075, ppl: 13.106061 +epoch: 1, batch: 38638, sum loss: 3651.929443, avg loss: 2.252887, ppl: 9.515164 +epoch: 1, batch: 38639, sum loss: 4951.208008, avg loss: 2.710021, ppl: 15.029588 +epoch: 1, batch: 38640, sum loss: 3948.439697, avg loss: 2.346072, ppl: 10.444466 +epoch: 1, batch: 38641, sum loss: 3089.767090, avg loss: 2.200689, ppl: 9.031233 +epoch: 1, batch: 38642, sum loss: 4364.935547, avg loss: 2.629479, ppl: 13.866549 +epoch: 1, batch: 38643, sum loss: 5024.830566, avg loss: 2.722010, ppl: 15.210868 +epoch: 1, batch: 38644, sum loss: 5159.510742, avg loss: 2.767978, ppl: 15.926397 +epoch: 1, batch: 38645, sum loss: 3981.432373, avg loss: 2.450112, ppl: 11.589646 +epoch: 1, batch: 38646, sum loss: 4804.019531, avg loss: 2.658561, ppl: 14.275728 +epoch: 1, batch: 38647, sum loss: 5402.780762, avg loss: 2.803726, ppl: 16.506041 +epoch: 1, batch: 38648, sum loss: 3539.375000, avg loss: 2.579720, ppl: 13.193438 +epoch: 1, batch: 38649, sum loss: 4331.125000, avg loss: 2.544727, ppl: 12.739748 +epoch: 1, batch: 38650, sum loss: 4132.155762, avg loss: 2.582597, ppl: 13.231459 +epoch: 1, batch: 38651, sum loss: 4337.910156, avg loss: 2.622679, ppl: 13.772564 +epoch: 1, batch: 38652, sum loss: 4498.473145, avg loss: 2.639949, ppl: 14.012490 +epoch: 1, batch: 38653, sum loss: 4260.934082, avg loss: 2.628584, ppl: 13.854134 +epoch: 1, batch: 38654, sum loss: 4567.019531, avg loss: 2.552834, ppl: 12.843448 +epoch: 1, batch: 38655, sum loss: 4183.606445, avg loss: 2.537057, ppl: 12.642406 +epoch: 1, batch: 38656, sum loss: 4034.095703, avg loss: 2.447874, ppl: 11.563731 +epoch: 1, batch: 38657, sum loss: 4817.228027, avg loss: 2.692693, ppl: 14.771401 +epoch: 1, batch: 38658, sum loss: 4334.199219, avg loss: 2.662284, ppl: 14.328984 +epoch: 1, batch: 38659, sum loss: 4063.606201, avg loss: 2.437676, ppl: 11.446410 +epoch: 1, batch: 38660, sum loss: 4240.613281, avg loss: 2.554586, ppl: 12.865977 +epoch: 1, batch: 38661, sum loss: 4229.593262, avg loss: 2.513127, ppl: 12.343472 +epoch: 1, batch: 38662, sum loss: 4978.410156, avg loss: 2.730889, ppl: 15.346521 +epoch: 1, batch: 38663, sum loss: 4368.303223, avg loss: 2.626761, ppl: 13.828901 +epoch: 1, batch: 38664, sum loss: 3630.569336, avg loss: 2.430100, ppl: 11.360018 +epoch: 1, batch: 38665, sum loss: 4921.930176, avg loss: 2.723813, ppl: 15.238313 +epoch: 1, batch: 38666, sum loss: 4056.162354, avg loss: 2.447895, ppl: 11.563979 +epoch: 1, batch: 38667, sum loss: 4270.194824, avg loss: 2.440111, ppl: 11.474319 +epoch: 1, batch: 38668, sum loss: 4092.970703, avg loss: 2.531213, ppl: 12.568737 +epoch: 1, batch: 38669, sum loss: 3911.009766, avg loss: 2.511888, ppl: 12.328184 +epoch: 1, batch: 38670, sum loss: 5195.020020, avg loss: 2.688934, ppl: 14.715978 +epoch: 1, batch: 38671, sum loss: 4610.915527, avg loss: 2.473667, ppl: 11.865881 +epoch: 1, batch: 38672, sum loss: 3826.978027, avg loss: 2.385897, ppl: 10.868804 +epoch: 1, batch: 38673, sum loss: 3774.636719, avg loss: 2.455847, ppl: 11.656302 +epoch: 1, batch: 38674, sum loss: 4278.547852, avg loss: 2.548271, ppl: 12.784985 +epoch: 1, batch: 38675, sum loss: 5216.536621, avg loss: 2.932286, ppl: 18.770487 +epoch: 1, batch: 38676, sum loss: 4352.834473, avg loss: 2.563507, ppl: 12.981261 +epoch: 1, batch: 38677, sum loss: 3529.727051, avg loss: 2.166806, ppl: 8.730354 +epoch: 1, batch: 38678, sum loss: 5037.790527, avg loss: 2.868901, ppl: 17.617651 +epoch: 1, batch: 38679, sum loss: 4567.014648, avg loss: 2.747903, ppl: 15.609861 +epoch: 1, batch: 38680, sum loss: 4254.358398, avg loss: 2.595704, ppl: 13.406016 +epoch: 1, batch: 38681, sum loss: 5647.207031, avg loss: 2.827845, ppl: 16.908989 +epoch: 1, batch: 38682, sum loss: 3957.071289, avg loss: 2.673697, ppl: 14.493452 +epoch: 1, batch: 38683, sum loss: 4670.994629, avg loss: 2.589243, ppl: 13.319687 +epoch: 1, batch: 38684, sum loss: 4065.319580, avg loss: 2.422717, ppl: 11.276460 +epoch: 1, batch: 38685, sum loss: 3698.262451, avg loss: 2.482055, ppl: 11.965834 +epoch: 1, batch: 38686, sum loss: 4512.331543, avg loss: 2.425985, ppl: 11.313366 +epoch: 1, batch: 38687, sum loss: 4245.306641, avg loss: 2.849199, ppl: 17.273945 +epoch: 1, batch: 38688, sum loss: 4722.421875, avg loss: 2.736050, ppl: 15.425927 +epoch: 1, batch: 38689, sum loss: 4052.838379, avg loss: 2.648914, ppl: 14.138677 +epoch: 1, batch: 38690, sum loss: 3730.566895, avg loss: 2.367111, ppl: 10.666532 +epoch: 1, batch: 38691, sum loss: 4407.919922, avg loss: 2.605154, ppl: 13.533303 +epoch: 1, batch: 38692, sum loss: 3968.297119, avg loss: 2.692196, ppl: 14.764064 +epoch: 1, batch: 38693, sum loss: 3872.250488, avg loss: 2.430791, ppl: 11.367875 +epoch: 1, batch: 38694, sum loss: 4622.316895, avg loss: 2.489131, ppl: 12.050802 +epoch: 1, batch: 38695, sum loss: 4707.675293, avg loss: 2.858333, ppl: 17.432449 +epoch: 1, batch: 38696, sum loss: 4397.256836, avg loss: 2.455196, ppl: 11.648721 +epoch: 1, batch: 38697, sum loss: 3221.568848, avg loss: 2.111120, ppl: 8.257484 +epoch: 1, batch: 38698, sum loss: 4116.001953, avg loss: 2.606714, ppl: 13.554441 +epoch: 1, batch: 38699, sum loss: 4480.591797, avg loss: 2.594437, ppl: 13.389043 +epoch: 1, batch: 38700, sum loss: 4702.500000, avg loss: 2.789146, ppl: 16.267120 +epoch: 1, batch: 38701, sum loss: 4738.620117, avg loss: 2.599353, ppl: 13.455028 +epoch: 1, batch: 38702, sum loss: 5300.655762, avg loss: 2.763637, ppl: 15.857412 +epoch: 1, batch: 38703, sum loss: 3947.857666, avg loss: 2.481369, ppl: 11.957621 +epoch: 1, batch: 38704, sum loss: 3835.696777, avg loss: 2.455632, ppl: 11.653793 +epoch: 1, batch: 38705, sum loss: 4322.193848, avg loss: 2.518761, ppl: 12.413209 +epoch: 1, batch: 38706, sum loss: 4713.799316, avg loss: 2.745369, ppl: 15.570365 +epoch: 1, batch: 38707, sum loss: 5701.645508, avg loss: 2.852249, ppl: 17.326704 +epoch: 1, batch: 38708, sum loss: 4393.700195, avg loss: 2.599823, ppl: 13.461349 +epoch: 1, batch: 38709, sum loss: 5142.251953, avg loss: 2.674078, ppl: 14.498976 +epoch: 1, batch: 38710, sum loss: 4073.923096, avg loss: 2.406334, ppl: 11.093218 +epoch: 1, batch: 38711, sum loss: 4480.511230, avg loss: 2.750467, ppl: 15.649943 +epoch: 1, batch: 38712, sum loss: 5610.152832, avg loss: 2.834842, ppl: 17.027714 +epoch: 1, batch: 38713, sum loss: 4130.637695, avg loss: 2.444164, ppl: 11.520917 +epoch: 1, batch: 38714, sum loss: 5205.864746, avg loss: 3.164659, ppl: 23.680674 +epoch: 1, batch: 38715, sum loss: 4505.765625, avg loss: 2.734081, ppl: 15.395589 +epoch: 1, batch: 38716, sum loss: 4362.395020, avg loss: 2.511454, ppl: 12.322830 +epoch: 1, batch: 38717, sum loss: 3941.592041, avg loss: 2.544604, ppl: 12.738187 +epoch: 1, batch: 38718, sum loss: 4482.828125, avg loss: 2.694007, ppl: 14.790830 +epoch: 1, batch: 38719, sum loss: 4314.083984, avg loss: 2.374290, ppl: 10.743378 +epoch: 1, batch: 38720, sum loss: 4982.576172, avg loss: 2.920619, ppl: 18.552769 +epoch: 1, batch: 38721, sum loss: 4314.928711, avg loss: 2.629451, ppl: 13.866149 +epoch: 1, batch: 38722, sum loss: 3258.975098, avg loss: 2.295053, ppl: 9.924962 +epoch: 1, batch: 38723, sum loss: 4491.344727, avg loss: 2.702374, ppl: 14.915091 +epoch: 1, batch: 38724, sum loss: 4424.040527, avg loss: 2.507960, ppl: 12.279849 +epoch: 1, batch: 38725, sum loss: 4451.110840, avg loss: 2.524737, ppl: 12.487606 +epoch: 1, batch: 38726, sum loss: 5457.675781, avg loss: 2.860417, ppl: 17.468807 +epoch: 1, batch: 38727, sum loss: 4607.069336, avg loss: 2.637132, ppl: 13.973067 +epoch: 1, batch: 38728, sum loss: 5105.262207, avg loss: 2.607386, ppl: 13.563554 +epoch: 1, batch: 38729, sum loss: 4284.352539, avg loss: 2.591865, ppl: 13.354652 +epoch: 1, batch: 38730, sum loss: 5647.135742, avg loss: 2.980019, ppl: 19.688189 +epoch: 1, batch: 38731, sum loss: 4582.730469, avg loss: 2.670589, ppl: 14.448477 +epoch: 1, batch: 38732, sum loss: 4159.706055, avg loss: 2.468668, ppl: 11.806712 +epoch: 1, batch: 38733, sum loss: 4057.350830, avg loss: 2.504538, ppl: 12.237899 +epoch: 1, batch: 38734, sum loss: 4112.499023, avg loss: 2.544863, ppl: 12.741486 +epoch: 1, batch: 38735, sum loss: 4645.734375, avg loss: 2.511208, ppl: 12.319801 +epoch: 1, batch: 38736, sum loss: 3507.677002, avg loss: 2.257192, ppl: 9.556221 +epoch: 1, batch: 38737, sum loss: 4489.424316, avg loss: 2.667513, ppl: 14.404100 +epoch: 1, batch: 38738, sum loss: 4613.352051, avg loss: 2.767458, ppl: 15.918115 +epoch: 1, batch: 38739, sum loss: 3622.989746, avg loss: 2.397743, ppl: 10.998325 +epoch: 1, batch: 38740, sum loss: 4336.234863, avg loss: 2.648891, ppl: 14.138353 +epoch: 1, batch: 38741, sum loss: 3709.579102, avg loss: 2.361285, ppl: 10.604574 +epoch: 1, batch: 38742, sum loss: 4112.733887, avg loss: 2.670606, ppl: 14.448728 +epoch: 1, batch: 38743, sum loss: 3763.084961, avg loss: 2.424668, ppl: 11.298481 +epoch: 1, batch: 38744, sum loss: 4715.301270, avg loss: 2.705279, ppl: 14.958488 +epoch: 1, batch: 38745, sum loss: 4340.708496, avg loss: 2.469117, ppl: 11.812017 +epoch: 1, batch: 38746, sum loss: 5486.795898, avg loss: 2.837020, ppl: 17.064831 +epoch: 1, batch: 38747, sum loss: 5411.294922, avg loss: 2.748245, ppl: 15.615207 +epoch: 1, batch: 38748, sum loss: 4114.341309, avg loss: 2.327116, ppl: 10.248345 +epoch: 1, batch: 38749, sum loss: 4503.652344, avg loss: 2.461012, ppl: 11.716667 +epoch: 1, batch: 38750, sum loss: 3925.218506, avg loss: 2.370301, ppl: 10.700613 +epoch: 1, batch: 38751, sum loss: 4392.007324, avg loss: 2.586577, ppl: 13.284221 +epoch: 1, batch: 38752, sum loss: 5243.234375, avg loss: 2.805369, ppl: 16.533171 +epoch: 1, batch: 38753, sum loss: 4387.158691, avg loss: 2.567091, ppl: 13.027874 +epoch: 1, batch: 38754, sum loss: 4431.254883, avg loss: 2.690501, ppl: 14.739055 +epoch: 1, batch: 38755, sum loss: 4390.604004, avg loss: 2.747562, ppl: 15.604541 +epoch: 1, batch: 38756, sum loss: 3657.345215, avg loss: 2.541588, ppl: 12.699821 +epoch: 1, batch: 38757, sum loss: 4489.316895, avg loss: 2.477548, ppl: 11.912019 +epoch: 1, batch: 38758, sum loss: 5369.845703, avg loss: 2.948844, ppl: 19.083889 +epoch: 1, batch: 38759, sum loss: 4509.920898, avg loss: 2.832865, ppl: 16.994076 +epoch: 1, batch: 38760, sum loss: 3891.453369, avg loss: 2.538456, ppl: 12.660111 +epoch: 1, batch: 38761, sum loss: 4919.092285, avg loss: 2.713234, ppl: 15.077954 +epoch: 1, batch: 38762, sum loss: 4980.145508, avg loss: 2.760613, ppl: 15.809526 +epoch: 1, batch: 38763, sum loss: 4863.308105, avg loss: 2.641667, ppl: 14.036578 +epoch: 1, batch: 38764, sum loss: 3980.991211, avg loss: 2.568382, ppl: 13.044695 +epoch: 1, batch: 38765, sum loss: 4824.858398, avg loss: 2.701488, ppl: 14.901896 +epoch: 1, batch: 38766, sum loss: 4213.034180, avg loss: 2.453718, ppl: 11.631515 +epoch: 1, batch: 38767, sum loss: 5005.336426, avg loss: 2.871679, ppl: 17.666653 +epoch: 1, batch: 38768, sum loss: 4149.668457, avg loss: 2.388986, ppl: 10.902431 +epoch: 1, batch: 38769, sum loss: 4133.068848, avg loss: 2.510978, ppl: 12.316964 +epoch: 1, batch: 38770, sum loss: 4287.288086, avg loss: 2.331315, ppl: 10.291467 +epoch: 1, batch: 38771, sum loss: 4275.823242, avg loss: 2.428065, ppl: 11.336927 +epoch: 1, batch: 38772, sum loss: 3554.358887, avg loss: 2.339933, ppl: 10.380546 +epoch: 1, batch: 38773, sum loss: 4550.848633, avg loss: 2.645842, ppl: 14.095313 +epoch: 1, batch: 38774, sum loss: 3864.948730, avg loss: 2.199743, ppl: 9.022695 +epoch: 1, batch: 38775, sum loss: 4830.567383, avg loss: 2.639654, ppl: 14.008362 +epoch: 1, batch: 38776, sum loss: 4949.591309, avg loss: 2.760508, ppl: 15.807876 +epoch: 1, batch: 38777, sum loss: 3948.204102, avg loss: 2.273002, ppl: 9.708501 +epoch: 1, batch: 38778, sum loss: 3595.522461, avg loss: 2.403424, ppl: 11.060987 +epoch: 1, batch: 38779, sum loss: 4377.132812, avg loss: 2.682067, ppl: 14.615267 +epoch: 1, batch: 38780, sum loss: 4713.551758, avg loss: 2.708938, ppl: 15.013321 +epoch: 1, batch: 38781, sum loss: 3585.641113, avg loss: 2.401635, ppl: 11.041213 +epoch: 1, batch: 38782, sum loss: 5281.627441, avg loss: 2.827424, ppl: 16.901857 +epoch: 1, batch: 38783, sum loss: 5127.763672, avg loss: 2.596336, ppl: 13.414499 +epoch: 1, batch: 38784, sum loss: 4879.259766, avg loss: 2.628911, ppl: 13.858677 +epoch: 1, batch: 38785, sum loss: 4187.626953, avg loss: 2.567521, ppl: 13.033476 +epoch: 1, batch: 38786, sum loss: 4281.195312, avg loss: 2.256824, ppl: 9.552702 +epoch: 1, batch: 38787, sum loss: 4108.380859, avg loss: 2.415274, ppl: 11.192836 +epoch: 1, batch: 38788, sum loss: 4849.414551, avg loss: 2.861012, ppl: 17.479198 +epoch: 1, batch: 38789, sum loss: 3897.699707, avg loss: 2.200847, ppl: 9.032658 +epoch: 1, batch: 38790, sum loss: 4106.104492, avg loss: 2.664571, ppl: 14.361791 +epoch: 1, batch: 38791, sum loss: 5188.976074, avg loss: 2.833958, ppl: 17.012659 +epoch: 1, batch: 38792, sum loss: 4456.595703, avg loss: 2.603152, ppl: 13.506237 +epoch: 1, batch: 38793, sum loss: 4257.369629, avg loss: 2.564681, ppl: 12.996507 +epoch: 1, batch: 38794, sum loss: 4556.767090, avg loss: 2.557108, ppl: 12.898466 +epoch: 1, batch: 38795, sum loss: 4472.435547, avg loss: 2.755660, ppl: 15.731414 +epoch: 1, batch: 38796, sum loss: 4522.551758, avg loss: 2.682415, ppl: 14.620359 +epoch: 1, batch: 38797, sum loss: 4762.565918, avg loss: 2.808117, ppl: 16.578667 +epoch: 1, batch: 38798, sum loss: 4055.207031, avg loss: 2.337295, ppl: 10.353194 +epoch: 1, batch: 38799, sum loss: 4998.352539, avg loss: 2.911096, ppl: 18.376934 +epoch: 1, batch: 38800, sum loss: 4152.457031, avg loss: 2.458530, ppl: 11.687617 +epoch: 1, batch: 38801, sum loss: 4256.312500, avg loss: 2.584282, ppl: 13.253768 +epoch: 1, batch: 38802, sum loss: 3569.681152, avg loss: 2.487583, ppl: 12.032155 +epoch: 1, batch: 38803, sum loss: 3965.854736, avg loss: 2.393394, ppl: 10.950603 +epoch: 1, batch: 38804, sum loss: 4575.372070, avg loss: 2.781381, ppl: 16.141298 +epoch: 1, batch: 38805, sum loss: 4286.417969, avg loss: 2.740676, ppl: 15.497464 +epoch: 1, batch: 38806, sum loss: 4165.321289, avg loss: 2.634612, ppl: 13.937898 +epoch: 1, batch: 38807, sum loss: 4450.198242, avg loss: 2.653666, ppl: 14.206026 +epoch: 1, batch: 38808, sum loss: 5509.031738, avg loss: 2.961845, ppl: 19.333612 +epoch: 1, batch: 38809, sum loss: 4298.899414, avg loss: 2.460732, ppl: 11.713386 +epoch: 1, batch: 38810, sum loss: 3952.717773, avg loss: 2.500138, ppl: 12.184170 +epoch: 1, batch: 38811, sum loss: 4572.226562, avg loss: 2.766017, ppl: 15.895201 +epoch: 1, batch: 38812, sum loss: 4402.656738, avg loss: 2.644238, ppl: 14.072721 +epoch: 1, batch: 38813, sum loss: 4802.338379, avg loss: 2.910508, ppl: 18.366129 +epoch: 1, batch: 38814, sum loss: 3966.319336, avg loss: 2.549048, ppl: 12.794923 +epoch: 1, batch: 38815, sum loss: 5438.692383, avg loss: 2.712565, ppl: 15.067871 +epoch: 1, batch: 38816, sum loss: 4511.935547, avg loss: 2.644745, ppl: 14.079859 +epoch: 1, batch: 38817, sum loss: 3981.188965, avg loss: 2.540644, ppl: 12.687841 +epoch: 1, batch: 38818, sum loss: 3810.831543, avg loss: 2.402794, ppl: 11.054020 +epoch: 1, batch: 38819, sum loss: 2976.140869, avg loss: 2.164466, ppl: 8.709950 +epoch: 1, batch: 38820, sum loss: 4682.239258, avg loss: 2.627519, ppl: 13.839396 +epoch: 1, batch: 38821, sum loss: 3489.814209, avg loss: 2.350043, ppl: 10.486021 +epoch: 1, batch: 38822, sum loss: 4752.321289, avg loss: 2.488126, ppl: 12.038698 +epoch: 1, batch: 38823, sum loss: 4662.298828, avg loss: 2.483910, ppl: 11.988045 +epoch: 1, batch: 38824, sum loss: 4599.664062, avg loss: 2.478267, ppl: 11.920590 +epoch: 1, batch: 38825, sum loss: 4409.660156, avg loss: 2.693745, ppl: 14.786948 +epoch: 1, batch: 38826, sum loss: 4706.481934, avg loss: 2.598831, ppl: 13.448004 +epoch: 1, batch: 38827, sum loss: 4058.168701, avg loss: 2.282435, ppl: 9.800512 +epoch: 1, batch: 38828, sum loss: 4867.489258, avg loss: 2.767191, ppl: 15.913872 +epoch: 1, batch: 38829, sum loss: 4526.482422, avg loss: 2.574791, ppl: 13.128570 +epoch: 1, batch: 38830, sum loss: 4122.313965, avg loss: 2.644204, ppl: 14.072241 +epoch: 1, batch: 38831, sum loss: 4606.993164, avg loss: 2.661463, ppl: 14.317224 +epoch: 1, batch: 38832, sum loss: 4787.192383, avg loss: 2.870019, ppl: 17.637362 +epoch: 1, batch: 38833, sum loss: 3894.245605, avg loss: 2.417285, ppl: 11.215365 +epoch: 1, batch: 38834, sum loss: 3918.666992, avg loss: 2.607230, ppl: 13.561436 +epoch: 1, batch: 38835, sum loss: 4317.471191, avg loss: 2.492766, ppl: 12.094688 +epoch: 1, batch: 38836, sum loss: 4409.334961, avg loss: 2.637162, ppl: 13.973494 +epoch: 1, batch: 38837, sum loss: 3803.736572, avg loss: 2.422762, ppl: 11.276965 +epoch: 1, batch: 38838, sum loss: 4011.457031, avg loss: 2.616737, ppl: 13.690972 +epoch: 1, batch: 38839, sum loss: 3652.885498, avg loss: 2.536726, ppl: 12.638226 +epoch: 1, batch: 38840, sum loss: 4504.378906, avg loss: 2.697233, ppl: 14.838615 +epoch: 1, batch: 38841, sum loss: 3505.785645, avg loss: 2.360798, ppl: 10.599410 +epoch: 1, batch: 38842, sum loss: 4688.412109, avg loss: 2.779142, ppl: 16.105196 +epoch: 1, batch: 38843, sum loss: 3723.218018, avg loss: 2.515688, ppl: 12.375119 +epoch: 1, batch: 38844, sum loss: 3811.094727, avg loss: 2.393904, ppl: 10.956181 +epoch: 1, batch: 38845, sum loss: 4932.839355, avg loss: 2.823606, ppl: 16.837454 +epoch: 1, batch: 38846, sum loss: 5135.669434, avg loss: 2.752234, ppl: 15.677620 +epoch: 1, batch: 38847, sum loss: 4593.125977, avg loss: 2.761952, ppl: 15.830713 +epoch: 1, batch: 38848, sum loss: 4224.560059, avg loss: 2.519117, ppl: 12.417631 +epoch: 1, batch: 38849, sum loss: 4250.421387, avg loss: 2.301257, ppl: 9.986730 +epoch: 1, batch: 38850, sum loss: 4050.392334, avg loss: 2.383986, ppl: 10.848060 +epoch: 1, batch: 38851, sum loss: 4912.163086, avg loss: 2.776802, ppl: 16.067560 +epoch: 1, batch: 38852, sum loss: 4197.408203, avg loss: 2.774229, ppl: 16.026266 +epoch: 1, batch: 38853, sum loss: 4186.871582, avg loss: 2.608643, ppl: 13.580604 +epoch: 1, batch: 38854, sum loss: 4769.752441, avg loss: 2.651335, ppl: 14.172954 +epoch: 1, batch: 38855, sum loss: 5503.279785, avg loss: 2.890378, ppl: 18.000116 +epoch: 1, batch: 38856, sum loss: 4058.433838, avg loss: 2.522333, ppl: 12.457625 +epoch: 1, batch: 38857, sum loss: 3970.528564, avg loss: 2.576592, ppl: 13.152242 +epoch: 1, batch: 38858, sum loss: 3731.384766, avg loss: 2.194932, ppl: 8.979395 +epoch: 1, batch: 38859, sum loss: 4938.717773, avg loss: 2.642438, ppl: 14.047416 +epoch: 1, batch: 38860, sum loss: 4444.505371, avg loss: 2.518133, ppl: 12.405419 +epoch: 1, batch: 38861, sum loss: 4363.828125, avg loss: 2.855908, ppl: 17.390226 +epoch: 1, batch: 38862, sum loss: 4029.185547, avg loss: 2.421386, ppl: 11.261452 +epoch: 1, batch: 38863, sum loss: 3912.789062, avg loss: 2.516263, ppl: 12.382240 +epoch: 1, batch: 38864, sum loss: 4370.105469, avg loss: 2.474578, ppl: 11.876698 +epoch: 1, batch: 38865, sum loss: 4188.105957, avg loss: 2.602925, ppl: 13.503175 +epoch: 1, batch: 38866, sum loss: 4617.028809, avg loss: 2.579346, ppl: 13.188506 +epoch: 1, batch: 38867, sum loss: 3527.118164, avg loss: 2.401033, ppl: 11.034574 +epoch: 1, batch: 38868, sum loss: 4657.647461, avg loss: 2.587582, ppl: 13.297578 +epoch: 1, batch: 38869, sum loss: 5889.040039, avg loss: 2.806978, ppl: 16.559803 +epoch: 1, batch: 38870, sum loss: 5195.308594, avg loss: 2.904029, ppl: 18.247524 +epoch: 1, batch: 38871, sum loss: 2922.471436, avg loss: 2.136310, ppl: 8.468129 +epoch: 1, batch: 38872, sum loss: 4479.656738, avg loss: 2.515248, ppl: 12.369677 +epoch: 1, batch: 38873, sum loss: 5092.705566, avg loss: 2.630530, ppl: 13.881120 +epoch: 1, batch: 38874, sum loss: 4224.791016, avg loss: 2.685817, ppl: 14.670189 +epoch: 1, batch: 38875, sum loss: 3601.241211, avg loss: 2.267784, ppl: 9.657978 +epoch: 1, batch: 38876, sum loss: 4097.490234, avg loss: 2.583537, ppl: 13.243903 +epoch: 1, batch: 38877, sum loss: 3916.829834, avg loss: 2.429795, ppl: 11.356557 +epoch: 1, batch: 38878, sum loss: 4245.020508, avg loss: 2.554164, ppl: 12.860543 +epoch: 1, batch: 38879, sum loss: 3670.445801, avg loss: 2.429150, ppl: 11.349233 +epoch: 1, batch: 38880, sum loss: 4591.791016, avg loss: 2.668095, ppl: 14.412482 +epoch: 1, batch: 38881, sum loss: 4783.134277, avg loss: 2.780892, ppl: 16.133404 +epoch: 1, batch: 38882, sum loss: 4882.728027, avg loss: 2.922040, ppl: 18.579145 +epoch: 1, batch: 38883, sum loss: 5194.451172, avg loss: 2.801754, ppl: 16.473509 +epoch: 1, batch: 38884, sum loss: 4617.027344, avg loss: 2.583675, ppl: 13.245728 +epoch: 1, batch: 38885, sum loss: 4618.943848, avg loss: 2.682313, ppl: 14.618875 +epoch: 1, batch: 38886, sum loss: 4656.978027, avg loss: 2.567243, ppl: 13.029846 +epoch: 1, batch: 38887, sum loss: 5008.262695, avg loss: 2.627630, ppl: 13.840924 +epoch: 1, batch: 38888, sum loss: 5251.388672, avg loss: 2.849370, ppl: 17.276890 +epoch: 1, batch: 38889, sum loss: 4279.078125, avg loss: 2.787673, ppl: 16.243177 +epoch: 1, batch: 38890, sum loss: 4233.436035, avg loss: 2.394478, ppl: 10.962470 +epoch: 1, batch: 38891, sum loss: 4535.145020, avg loss: 2.748573, ppl: 15.620323 +epoch: 1, batch: 38892, sum loss: 4160.317871, avg loss: 2.590484, ppl: 13.336230 +epoch: 1, batch: 38893, sum loss: 4283.463867, avg loss: 2.528609, ppl: 12.536057 +epoch: 1, batch: 38894, sum loss: 5005.900391, avg loss: 2.607240, ppl: 13.561568 +epoch: 1, batch: 38895, sum loss: 5316.066406, avg loss: 2.893885, ppl: 18.063347 +epoch: 1, batch: 38896, sum loss: 4739.129395, avg loss: 2.627012, ppl: 13.832374 +epoch: 1, batch: 38897, sum loss: 4368.546387, avg loss: 2.798556, ppl: 16.420923 +epoch: 1, batch: 38898, sum loss: 4679.135742, avg loss: 2.549938, ppl: 12.806310 +epoch: 1, batch: 38899, sum loss: 4742.718750, avg loss: 2.801370, ppl: 16.467186 +epoch: 1, batch: 38900, sum loss: 4026.755859, avg loss: 2.621586, ppl: 13.757524 +epoch: 1, batch: 38901, sum loss: 4743.216309, avg loss: 2.883414, ppl: 17.875196 +epoch: 1, batch: 38902, sum loss: 4526.561035, avg loss: 2.554493, ppl: 12.864772 +epoch: 1, batch: 38903, sum loss: 5179.330078, avg loss: 2.717382, ppl: 15.140632 +epoch: 1, batch: 38904, sum loss: 4179.943848, avg loss: 2.614099, ppl: 13.654902 +epoch: 1, batch: 38905, sum loss: 3912.859375, avg loss: 2.549094, ppl: 12.795506 +epoch: 1, batch: 38906, sum loss: 5087.673340, avg loss: 2.790825, ppl: 16.294455 +epoch: 1, batch: 38907, sum loss: 3844.197266, avg loss: 2.341168, ppl: 10.393366 +epoch: 1, batch: 38908, sum loss: 4997.978516, avg loss: 2.790608, ppl: 16.290916 +epoch: 1, batch: 38909, sum loss: 4666.927734, avg loss: 2.738807, ppl: 15.468527 +epoch: 1, batch: 38910, sum loss: 4385.235352, avg loss: 2.616489, ppl: 13.687581 +epoch: 1, batch: 38911, sum loss: 4303.480957, avg loss: 2.628883, ppl: 13.858280 +epoch: 1, batch: 38912, sum loss: 4720.981934, avg loss: 2.649261, ppl: 14.143576 +epoch: 1, batch: 38913, sum loss: 4547.699707, avg loss: 2.756182, ppl: 15.739630 +epoch: 1, batch: 38914, sum loss: 4720.344238, avg loss: 2.583659, ppl: 13.245511 +epoch: 1, batch: 38915, sum loss: 4539.392578, avg loss: 2.509338, ppl: 12.296788 +epoch: 1, batch: 38916, sum loss: 4716.402344, avg loss: 2.575862, ppl: 13.142637 +epoch: 1, batch: 38917, sum loss: 4361.442383, avg loss: 2.531307, ppl: 12.569927 +epoch: 1, batch: 38918, sum loss: 4270.294922, avg loss: 2.785581, ppl: 16.209227 +epoch: 1, batch: 38919, sum loss: 4586.206055, avg loss: 2.691436, ppl: 14.752839 +epoch: 1, batch: 38920, sum loss: 3461.753418, avg loss: 2.392366, ppl: 10.939345 +epoch: 1, batch: 38921, sum loss: 4471.233398, avg loss: 2.578566, ppl: 13.178225 +epoch: 1, batch: 38922, sum loss: 4796.422852, avg loss: 2.666161, ppl: 14.384634 +epoch: 1, batch: 38923, sum loss: 4539.373047, avg loss: 2.727989, ppl: 15.302079 +epoch: 1, batch: 38924, sum loss: 3967.026611, avg loss: 2.552784, ppl: 12.842811 +epoch: 1, batch: 38925, sum loss: 3739.904541, avg loss: 2.301480, ppl: 9.988951 +epoch: 1, batch: 38926, sum loss: 2724.349121, avg loss: 1.998789, ppl: 7.380112 +epoch: 1, batch: 38927, sum loss: 4329.110840, avg loss: 2.685553, ppl: 14.666304 +epoch: 1, batch: 38928, sum loss: 4254.689941, avg loss: 2.445224, ppl: 11.533134 +epoch: 1, batch: 38929, sum loss: 4260.949219, avg loss: 2.524259, ppl: 12.481644 +epoch: 1, batch: 38930, sum loss: 4585.223633, avg loss: 2.673600, ppl: 14.492043 +epoch: 1, batch: 38931, sum loss: 5245.083496, avg loss: 2.853691, ppl: 17.351702 +epoch: 1, batch: 38932, sum loss: 6062.958496, avg loss: 3.117203, ppl: 22.583115 +epoch: 1, batch: 38933, sum loss: 4933.340332, avg loss: 2.779346, ppl: 16.108490 +epoch: 1, batch: 38934, sum loss: 5114.829102, avg loss: 2.775274, ppl: 16.043015 +epoch: 1, batch: 38935, sum loss: 3883.471436, avg loss: 2.632862, ppl: 13.913535 +epoch: 1, batch: 38936, sum loss: 5400.745117, avg loss: 2.626822, ppl: 13.829742 +epoch: 1, batch: 38937, sum loss: 4923.651367, avg loss: 2.767651, ppl: 15.921192 +epoch: 1, batch: 38938, sum loss: 4424.433594, avg loss: 2.657318, ppl: 14.257993 +epoch: 1, batch: 38939, sum loss: 4269.929688, avg loss: 2.552259, ppl: 12.836070 +epoch: 1, batch: 38940, sum loss: 4611.822754, avg loss: 2.795044, ppl: 16.363352 +epoch: 1, batch: 38941, sum loss: 4360.052734, avg loss: 2.617078, ppl: 13.695650 +epoch: 1, batch: 38942, sum loss: 3782.737061, avg loss: 2.406321, ppl: 11.093078 +epoch: 1, batch: 38943, sum loss: 4212.423340, avg loss: 2.439157, ppl: 11.463367 +epoch: 1, batch: 38944, sum loss: 4152.335938, avg loss: 2.619770, ppl: 13.732569 +epoch: 1, batch: 38945, sum loss: 4502.752930, avg loss: 2.669089, ppl: 14.426822 +epoch: 1, batch: 38946, sum loss: 4481.161621, avg loss: 2.626707, ppl: 13.828154 +epoch: 1, batch: 38947, sum loss: 4921.680664, avg loss: 2.955964, ppl: 19.220249 +epoch: 1, batch: 38948, sum loss: 3479.357910, avg loss: 2.532284, ppl: 12.582209 +epoch: 1, batch: 38949, sum loss: 5304.303223, avg loss: 2.746920, ppl: 15.594532 +epoch: 1, batch: 38950, sum loss: 4565.000488, avg loss: 2.674283, ppl: 14.501945 +epoch: 1, batch: 38951, sum loss: 4500.632324, avg loss: 2.739277, ppl: 15.475794 +epoch: 1, batch: 38952, sum loss: 3855.038574, avg loss: 2.413925, ppl: 11.177750 +epoch: 1, batch: 38953, sum loss: 4468.786621, avg loss: 2.620989, ppl: 13.749319 +epoch: 1, batch: 38954, sum loss: 4917.860352, avg loss: 2.620064, ppl: 13.736607 +epoch: 1, batch: 38955, sum loss: 5360.183594, avg loss: 2.812268, ppl: 16.647636 +epoch: 1, batch: 38956, sum loss: 4183.698242, avg loss: 2.521819, ppl: 12.451229 +epoch: 1, batch: 38957, sum loss: 3972.947998, avg loss: 2.648632, ppl: 14.134690 +epoch: 1, batch: 38958, sum loss: 4210.505859, avg loss: 2.768249, ppl: 15.930708 +epoch: 1, batch: 38959, sum loss: 4776.438965, avg loss: 2.625860, ppl: 13.816447 +epoch: 1, batch: 38960, sum loss: 4876.119141, avg loss: 2.689531, ppl: 14.724766 +epoch: 1, batch: 38961, sum loss: 3728.584961, avg loss: 2.539908, ppl: 12.678504 +epoch: 1, batch: 38962, sum loss: 5095.828125, avg loss: 2.813820, ppl: 16.673496 +epoch: 1, batch: 38963, sum loss: 4490.583008, avg loss: 2.589725, ppl: 13.326103 +epoch: 1, batch: 38964, sum loss: 4980.509277, avg loss: 2.902395, ppl: 18.217720 +epoch: 1, batch: 38965, sum loss: 4670.147461, avg loss: 2.745530, ppl: 15.572871 +epoch: 1, batch: 38966, sum loss: 4164.671875, avg loss: 2.730932, ppl: 15.347191 +epoch: 1, batch: 38967, sum loss: 4170.717773, avg loss: 2.401104, ppl: 11.035355 +epoch: 1, batch: 38968, sum loss: 4605.628906, avg loss: 2.431694, ppl: 11.378143 +epoch: 1, batch: 38969, sum loss: 4440.474121, avg loss: 2.633733, ppl: 13.925661 +epoch: 1, batch: 38970, sum loss: 4575.252441, avg loss: 2.547468, ppl: 12.774716 +epoch: 1, batch: 38971, sum loss: 4803.351074, avg loss: 2.782938, ppl: 16.166449 +epoch: 1, batch: 38972, sum loss: 3947.439697, avg loss: 2.511094, ppl: 12.318398 +epoch: 1, batch: 38973, sum loss: 3898.278320, avg loss: 2.569729, ppl: 13.062279 +epoch: 1, batch: 38974, sum loss: 4397.857422, avg loss: 2.857607, ppl: 17.419794 +epoch: 1, batch: 38975, sum loss: 4182.647949, avg loss: 2.363078, ppl: 10.623599 +epoch: 1, batch: 38976, sum loss: 5019.275391, avg loss: 2.777684, ppl: 16.081732 +epoch: 1, batch: 38977, sum loss: 3874.281494, avg loss: 2.359489, ppl: 10.585546 +epoch: 1, batch: 38978, sum loss: 3945.464355, avg loss: 2.611161, ppl: 13.614848 +epoch: 1, batch: 38979, sum loss: 4464.324219, avg loss: 2.747276, ppl: 15.600084 +epoch: 1, batch: 38980, sum loss: 3908.471191, avg loss: 2.327857, ppl: 10.255935 +epoch: 1, batch: 38981, sum loss: 4632.351074, avg loss: 2.799004, ppl: 16.428274 +epoch: 1, batch: 38982, sum loss: 3754.829346, avg loss: 2.342376, ppl: 10.405934 +epoch: 1, batch: 38983, sum loss: 4599.471191, avg loss: 2.632782, ppl: 13.912427 +epoch: 1, batch: 38984, sum loss: 5293.395996, avg loss: 2.761292, ppl: 15.820266 +epoch: 1, batch: 38985, sum loss: 3830.429688, avg loss: 2.490526, ppl: 12.067627 +epoch: 1, batch: 38986, sum loss: 4854.545410, avg loss: 2.858979, ppl: 17.443699 +epoch: 1, batch: 38987, sum loss: 4520.609375, avg loss: 2.807832, ppl: 16.573942 +epoch: 1, batch: 38988, sum loss: 4953.215820, avg loss: 2.667321, ppl: 14.401340 +epoch: 1, batch: 38989, sum loss: 3698.129150, avg loss: 2.348018, ppl: 10.464812 +epoch: 1, batch: 38990, sum loss: 4343.357422, avg loss: 2.557925, ppl: 12.909009 +epoch: 1, batch: 38991, sum loss: 3385.589111, avg loss: 2.553235, ppl: 12.848597 +epoch: 1, batch: 38992, sum loss: 4352.997070, avg loss: 2.514729, ppl: 12.363264 +epoch: 1, batch: 38993, sum loss: 3834.436035, avg loss: 2.332382, ppl: 10.302452 +epoch: 1, batch: 38994, sum loss: 4441.359863, avg loss: 2.693366, ppl: 14.781340 +epoch: 1, batch: 38995, sum loss: 4580.102539, avg loss: 2.399216, ppl: 11.014534 +epoch: 1, batch: 38996, sum loss: 3676.538330, avg loss: 2.415597, ppl: 11.196452 +epoch: 1, batch: 38997, sum loss: 4829.925293, avg loss: 2.695271, ppl: 14.809532 +epoch: 1, batch: 38998, sum loss: 4102.500977, avg loss: 2.428953, ppl: 11.346992 +epoch: 1, batch: 38999, sum loss: 5097.672852, avg loss: 2.641281, ppl: 14.031171 +epoch: 1, batch: 39000, sum loss: 4457.931152, avg loss: 2.498841, ppl: 12.168378 +epoch: 1, batch: 39001, sum loss: 4248.861328, avg loss: 2.746517, ppl: 15.588239 +epoch: 1, batch: 39002, sum loss: 4093.846924, avg loss: 2.265549, ppl: 9.636411 +epoch: 1, batch: 39003, sum loss: 4727.767578, avg loss: 2.851488, ppl: 17.313532 +epoch: 1, batch: 39004, sum loss: 4874.017090, avg loss: 2.995708, ppl: 19.999514 +epoch: 1, batch: 39005, sum loss: 4229.912109, avg loss: 2.614284, ppl: 13.657438 +epoch: 1, batch: 39006, sum loss: 5293.022461, avg loss: 2.587010, ppl: 13.289974 +epoch: 1, batch: 39007, sum loss: 4243.464355, avg loss: 2.574918, ppl: 13.130238 +epoch: 1, batch: 39008, sum loss: 4504.549316, avg loss: 2.697335, ppl: 14.840130 +epoch: 1, batch: 39009, sum loss: 4603.045898, avg loss: 2.771250, ppl: 15.978591 +epoch: 1, batch: 39010, sum loss: 4163.328613, avg loss: 2.560473, ppl: 12.941934 +epoch: 1, batch: 39011, sum loss: 4307.974609, avg loss: 2.298812, ppl: 9.962344 +epoch: 1, batch: 39012, sum loss: 3937.519775, avg loss: 2.408269, ppl: 11.114704 +epoch: 1, batch: 39013, sum loss: 5025.767090, avg loss: 2.812405, ppl: 16.649906 +epoch: 1, batch: 39014, sum loss: 3804.434082, avg loss: 2.577530, ppl: 13.164580 +epoch: 1, batch: 39015, sum loss: 4344.936035, avg loss: 2.589354, ppl: 13.321164 +epoch: 1, batch: 39016, sum loss: 3935.887207, avg loss: 2.584299, ppl: 13.253993 +epoch: 1, batch: 39017, sum loss: 4079.754883, avg loss: 2.610208, ppl: 13.601877 +epoch: 1, batch: 39018, sum loss: 4461.793945, avg loss: 2.613822, ppl: 13.651126 +epoch: 1, batch: 39019, sum loss: 4228.400879, avg loss: 2.789183, ppl: 16.267717 +epoch: 1, batch: 39020, sum loss: 4534.204590, avg loss: 2.614882, ppl: 13.665597 +epoch: 1, batch: 39021, sum loss: 5859.104980, avg loss: 2.802059, ppl: 16.478540 +epoch: 1, batch: 39022, sum loss: 5035.942383, avg loss: 2.767001, ppl: 15.910852 +epoch: 1, batch: 39023, sum loss: 4723.558594, avg loss: 2.615481, ppl: 13.673791 +epoch: 1, batch: 39024, sum loss: 4346.715332, avg loss: 2.562922, ppl: 12.973668 +epoch: 1, batch: 39025, sum loss: 5082.259766, avg loss: 2.490083, ppl: 12.062277 +epoch: 1, batch: 39026, sum loss: 4370.682617, avg loss: 2.681400, ppl: 14.605531 +epoch: 1, batch: 39027, sum loss: 3796.998779, avg loss: 2.346724, ppl: 10.451270 +epoch: 1, batch: 39028, sum loss: 4672.938477, avg loss: 2.690235, ppl: 14.735140 +epoch: 1, batch: 39029, sum loss: 4263.870605, avg loss: 2.627154, ppl: 13.834336 +epoch: 1, batch: 39030, sum loss: 4030.569336, avg loss: 2.444251, ppl: 11.521912 +epoch: 1, batch: 39031, sum loss: 4114.566406, avg loss: 2.802838, ppl: 16.491385 +epoch: 1, batch: 39032, sum loss: 5174.510742, avg loss: 2.614710, ppl: 13.663252 +epoch: 1, batch: 39033, sum loss: 3864.987793, avg loss: 2.458644, ppl: 11.688947 +epoch: 1, batch: 39034, sum loss: 3976.358398, avg loss: 2.636842, ppl: 13.969027 +epoch: 1, batch: 39035, sum loss: 4385.437012, avg loss: 2.561587, ppl: 12.956361 +epoch: 1, batch: 39036, sum loss: 3683.511475, avg loss: 2.299320, ppl: 9.967407 +epoch: 1, batch: 39037, sum loss: 4820.409180, avg loss: 2.860777, ppl: 17.475098 +epoch: 1, batch: 39038, sum loss: 4802.383301, avg loss: 2.748931, ppl: 15.625925 +epoch: 1, batch: 39039, sum loss: 4943.001953, avg loss: 2.911073, ppl: 18.376511 +epoch: 1, batch: 39040, sum loss: 4650.079590, avg loss: 2.455163, ppl: 11.648337 +epoch: 1, batch: 39041, sum loss: 4035.011230, avg loss: 2.529788, ppl: 12.550839 +epoch: 1, batch: 39042, sum loss: 4050.524414, avg loss: 2.638778, ppl: 13.996090 +epoch: 1, batch: 39043, sum loss: 5061.471680, avg loss: 2.802587, ppl: 16.487240 +epoch: 1, batch: 39044, sum loss: 4509.828613, avg loss: 2.427249, ppl: 11.327676 +epoch: 1, batch: 39045, sum loss: 5309.120605, avg loss: 2.763728, ppl: 15.858850 +epoch: 1, batch: 39046, sum loss: 3925.471680, avg loss: 2.615238, ppl: 13.670465 +epoch: 1, batch: 39047, sum loss: 4783.522949, avg loss: 2.697983, ppl: 14.849743 +epoch: 1, batch: 39048, sum loss: 5200.842285, avg loss: 2.921822, ppl: 18.575092 +epoch: 1, batch: 39049, sum loss: 4277.885742, avg loss: 2.504617, ppl: 12.238868 +epoch: 1, batch: 39050, sum loss: 4322.473633, avg loss: 2.704927, ppl: 14.953228 +epoch: 1, batch: 39051, sum loss: 4762.755371, avg loss: 2.671203, ppl: 14.457356 +epoch: 1, batch: 39052, sum loss: 5594.115723, avg loss: 2.880595, ppl: 17.824879 +epoch: 1, batch: 39053, sum loss: 4553.840332, avg loss: 2.644506, ppl: 14.076496 +epoch: 1, batch: 39054, sum loss: 3700.773438, avg loss: 2.552258, ppl: 12.836049 +epoch: 1, batch: 39055, sum loss: 4536.231934, avg loss: 2.692126, ppl: 14.763026 +epoch: 1, batch: 39056, sum loss: 3692.166504, avg loss: 2.317744, ppl: 10.152746 +epoch: 1, batch: 39057, sum loss: 4066.945312, avg loss: 2.407901, ppl: 11.110619 +epoch: 1, batch: 39058, sum loss: 3918.502197, avg loss: 2.519937, ppl: 12.427817 +epoch: 1, batch: 39059, sum loss: 4594.428711, avg loss: 2.616417, ppl: 13.686603 +epoch: 1, batch: 39060, sum loss: 4612.349609, avg loss: 2.623635, ppl: 13.785738 +epoch: 1, batch: 39061, sum loss: 3966.139160, avg loss: 2.630066, ppl: 13.874681 +epoch: 1, batch: 39062, sum loss: 4309.896973, avg loss: 2.698746, ppl: 14.861080 +epoch: 1, batch: 39063, sum loss: 4438.529785, avg loss: 2.410934, ppl: 11.144367 +epoch: 1, batch: 39064, sum loss: 5416.693359, avg loss: 2.846397, ppl: 17.225605 +epoch: 1, batch: 39065, sum loss: 4642.119629, avg loss: 2.514691, ppl: 12.362789 +epoch: 1, batch: 39066, sum loss: 3659.455322, avg loss: 2.492817, ppl: 12.095300 +epoch: 1, batch: 39067, sum loss: 3978.001465, avg loss: 2.405080, ppl: 11.079312 +epoch: 1, batch: 39068, sum loss: 5343.600586, avg loss: 2.678496, ppl: 14.563179 +epoch: 1, batch: 39069, sum loss: 4723.234375, avg loss: 2.593759, ppl: 13.379967 +epoch: 1, batch: 39070, sum loss: 4494.795898, avg loss: 2.550963, ppl: 12.819439 +epoch: 1, batch: 39071, sum loss: 4376.189453, avg loss: 2.575744, ppl: 13.141095 +epoch: 1, batch: 39072, sum loss: 3963.101074, avg loss: 2.282892, ppl: 9.804997 +epoch: 1, batch: 39073, sum loss: 3810.490234, avg loss: 2.475952, ppl: 11.893025 +epoch: 1, batch: 39074, sum loss: 4672.378906, avg loss: 2.718080, ppl: 15.151201 +epoch: 1, batch: 39075, sum loss: 4907.230469, avg loss: 2.726239, ppl: 15.275331 +epoch: 1, batch: 39076, sum loss: 2883.713867, avg loss: 2.252902, ppl: 9.515305 +epoch: 1, batch: 39077, sum loss: 4097.179199, avg loss: 2.427239, ppl: 11.327562 +epoch: 1, batch: 39078, sum loss: 4982.833496, avg loss: 2.871950, ppl: 17.671446 +epoch: 1, batch: 39079, sum loss: 4194.174805, avg loss: 2.777599, ppl: 16.080372 +epoch: 1, batch: 39080, sum loss: 4798.754883, avg loss: 2.553888, ppl: 12.856993 +epoch: 1, batch: 39081, sum loss: 3295.491211, avg loss: 2.096369, ppl: 8.136569 +epoch: 1, batch: 39082, sum loss: 4707.799316, avg loss: 2.559978, ppl: 12.935533 +epoch: 1, batch: 39083, sum loss: 5468.955566, avg loss: 3.043381, ppl: 20.976044 +epoch: 1, batch: 39084, sum loss: 4429.083984, avg loss: 2.740770, ppl: 15.498913 +epoch: 1, batch: 39085, sum loss: 4935.496582, avg loss: 2.586738, ppl: 13.286363 +epoch: 1, batch: 39086, sum loss: 4549.779297, avg loss: 2.569045, ppl: 13.053356 +epoch: 1, batch: 39087, sum loss: 3930.475098, avg loss: 2.306617, ppl: 10.040399 +epoch: 1, batch: 39088, sum loss: 5146.570312, avg loss: 2.690314, ppl: 14.736300 +epoch: 1, batch: 39089, sum loss: 5240.934082, avg loss: 2.795165, ppl: 16.365326 +epoch: 1, batch: 39090, sum loss: 4517.238281, avg loss: 2.407910, ppl: 11.110711 +epoch: 1, batch: 39091, sum loss: 3561.752930, avg loss: 2.542293, ppl: 12.708783 +epoch: 1, batch: 39092, sum loss: 5319.789062, avg loss: 2.779409, ppl: 16.109501 +epoch: 1, batch: 39093, sum loss: 5192.618652, avg loss: 2.887997, ppl: 17.957304 +epoch: 1, batch: 39094, sum loss: 5375.031250, avg loss: 2.821539, ppl: 16.802685 +epoch: 1, batch: 39095, sum loss: 4250.265625, avg loss: 2.545069, ppl: 12.744110 +epoch: 1, batch: 39096, sum loss: 4558.345215, avg loss: 2.885029, ppl: 17.904079 +epoch: 1, batch: 39097, sum loss: 4304.993652, avg loss: 2.486998, ppl: 12.025124 +epoch: 1, batch: 39098, sum loss: 4480.411621, avg loss: 2.586843, ppl: 13.287753 +epoch: 1, batch: 39099, sum loss: 3960.223145, avg loss: 2.326806, ppl: 10.245162 +epoch: 1, batch: 39100, sum loss: 5199.172852, avg loss: 2.636497, ppl: 13.964205 +epoch: 1, batch: 39101, sum loss: 3728.422363, avg loss: 2.449686, ppl: 11.584712 +epoch: 1, batch: 39102, sum loss: 4178.544434, avg loss: 2.546340, ppl: 12.760319 +epoch: 1, batch: 39103, sum loss: 5044.870117, avg loss: 2.718141, ppl: 15.152133 +epoch: 1, batch: 39104, sum loss: 4835.353516, avg loss: 2.642270, ppl: 14.045048 +epoch: 1, batch: 39105, sum loss: 5140.227051, avg loss: 2.762078, ppl: 15.832710 +epoch: 1, batch: 39106, sum loss: 4786.067383, avg loss: 2.820311, ppl: 16.782066 +epoch: 1, batch: 39107, sum loss: 4990.312500, avg loss: 2.816204, ppl: 16.713280 +epoch: 1, batch: 39108, sum loss: 5762.424316, avg loss: 2.841432, ppl: 17.140295 +epoch: 1, batch: 39109, sum loss: 3707.711426, avg loss: 2.302926, ppl: 10.003412 +epoch: 1, batch: 39110, sum loss: 4559.712402, avg loss: 2.626562, ppl: 13.826159 +epoch: 1, batch: 39111, sum loss: 3859.415039, avg loss: 2.498003, ppl: 12.158195 +epoch: 1, batch: 39112, sum loss: 4590.769531, avg loss: 2.790741, ppl: 16.293093 +epoch: 1, batch: 39113, sum loss: 4219.558594, avg loss: 2.469022, ppl: 11.810890 +epoch: 1, batch: 39114, sum loss: 4714.495605, avg loss: 2.671102, ppl: 14.455894 +epoch: 1, batch: 39115, sum loss: 4729.444824, avg loss: 2.670494, ppl: 14.447102 +epoch: 1, batch: 39116, sum loss: 4205.304688, avg loss: 2.621761, ppl: 13.759934 +epoch: 1, batch: 39117, sum loss: 4067.870361, avg loss: 2.521928, ppl: 12.452586 +epoch: 1, batch: 39118, sum loss: 5326.475098, avg loss: 2.921819, ppl: 18.575039 +epoch: 1, batch: 39119, sum loss: 4037.571289, avg loss: 2.259413, ppl: 9.577468 +epoch: 1, batch: 39120, sum loss: 5353.802246, avg loss: 2.659614, ppl: 14.290770 +epoch: 1, batch: 39121, sum loss: 4829.088379, avg loss: 2.640289, ppl: 14.017255 +epoch: 1, batch: 39122, sum loss: 5144.433105, avg loss: 2.825059, ppl: 16.861946 +epoch: 1, batch: 39123, sum loss: 5797.418945, avg loss: 2.748895, ppl: 15.625352 +epoch: 1, batch: 39124, sum loss: 4261.327637, avg loss: 2.466046, ppl: 11.775794 +epoch: 1, batch: 39125, sum loss: 3349.223877, avg loss: 2.216561, ppl: 9.175722 +epoch: 1, batch: 39126, sum loss: 3587.897949, avg loss: 2.220234, ppl: 9.209485 +epoch: 1, batch: 39127, sum loss: 4871.230469, avg loss: 2.670631, ppl: 14.449080 +epoch: 1, batch: 39128, sum loss: 4752.059570, avg loss: 2.660728, ppl: 14.306697 +epoch: 1, batch: 39129, sum loss: 3415.222656, avg loss: 2.388268, ppl: 10.894606 +epoch: 1, batch: 39130, sum loss: 4027.737305, avg loss: 2.388931, ppl: 10.901832 +epoch: 1, batch: 39131, sum loss: 4126.951660, avg loss: 2.582573, ppl: 13.231137 +epoch: 1, batch: 39132, sum loss: 5121.286133, avg loss: 2.741588, ppl: 15.511596 +epoch: 1, batch: 39133, sum loss: 3889.430420, avg loss: 2.580909, ppl: 13.209146 +epoch: 1, batch: 39134, sum loss: 4859.745117, avg loss: 2.375242, ppl: 10.753615 +epoch: 1, batch: 39135, sum loss: 5292.734375, avg loss: 2.613696, ppl: 13.649407 +epoch: 1, batch: 39136, sum loss: 4007.200928, avg loss: 2.588631, ppl: 13.311538 +epoch: 1, batch: 39137, sum loss: 4561.039551, avg loss: 2.569600, ppl: 13.060595 +epoch: 1, batch: 39138, sum loss: 4566.745117, avg loss: 2.459206, ppl: 11.695518 +epoch: 1, batch: 39139, sum loss: 4327.465332, avg loss: 2.475667, ppl: 11.889632 +epoch: 1, batch: 39140, sum loss: 4572.161133, avg loss: 2.488928, ppl: 12.048357 +epoch: 1, batch: 39141, sum loss: 5664.828613, avg loss: 2.981489, ppl: 19.717152 +epoch: 1, batch: 39142, sum loss: 4344.327148, avg loss: 2.584371, ppl: 13.254944 +epoch: 1, batch: 39143, sum loss: 4700.915039, avg loss: 2.722012, ppl: 15.210900 +epoch: 1, batch: 39144, sum loss: 4110.344238, avg loss: 2.626418, ppl: 13.824164 +epoch: 1, batch: 39145, sum loss: 4444.827148, avg loss: 2.623865, ppl: 13.788914 +epoch: 1, batch: 39146, sum loss: 4781.518555, avg loss: 2.729177, ppl: 15.320280 +epoch: 1, batch: 39147, sum loss: 4499.762695, avg loss: 2.555231, ppl: 12.874277 +epoch: 1, batch: 39148, sum loss: 4309.717285, avg loss: 2.458481, ppl: 11.687046 +epoch: 1, batch: 39149, sum loss: 4075.616699, avg loss: 2.441951, ppl: 11.495450 +epoch: 1, batch: 39150, sum loss: 3854.759277, avg loss: 2.506346, ppl: 12.260044 +epoch: 1, batch: 39151, sum loss: 3658.578613, avg loss: 2.419695, ppl: 11.242429 +epoch: 1, batch: 39152, sum loss: 3532.253418, avg loss: 2.416042, ppl: 11.201437 +epoch: 1, batch: 39153, sum loss: 5432.661133, avg loss: 2.646206, ppl: 14.100442 +epoch: 1, batch: 39154, sum loss: 3558.088135, avg loss: 2.376812, ppl: 10.770514 +epoch: 1, batch: 39155, sum loss: 4960.641113, avg loss: 2.580979, ppl: 13.210063 +epoch: 1, batch: 39156, sum loss: 4989.707031, avg loss: 2.671149, ppl: 14.456573 +epoch: 1, batch: 39157, sum loss: 3489.088867, avg loss: 2.330721, ppl: 10.285351 +epoch: 1, batch: 39158, sum loss: 4554.355957, avg loss: 2.486002, ppl: 12.013154 +epoch: 1, batch: 39159, sum loss: 4747.110352, avg loss: 2.412150, ppl: 11.157921 +epoch: 1, batch: 39160, sum loss: 3669.174805, avg loss: 2.434754, ppl: 11.413015 +epoch: 1, batch: 39161, sum loss: 5260.769531, avg loss: 2.887360, ppl: 17.945868 +epoch: 1, batch: 39162, sum loss: 3879.333740, avg loss: 2.424583, ppl: 11.297523 +epoch: 1, batch: 39163, sum loss: 5492.386230, avg loss: 2.652045, ppl: 14.183021 +epoch: 1, batch: 39164, sum loss: 4853.617676, avg loss: 2.478865, ppl: 11.927720 +epoch: 1, batch: 39165, sum loss: 4936.376953, avg loss: 2.648271, ppl: 14.129585 +epoch: 1, batch: 39166, sum loss: 4434.690918, avg loss: 2.573819, ppl: 13.115824 +epoch: 1, batch: 39167, sum loss: 4394.600098, avg loss: 2.421267, ppl: 11.260119 +epoch: 1, batch: 39168, sum loss: 3692.255371, avg loss: 2.389809, ppl: 10.911414 +epoch: 1, batch: 39169, sum loss: 5382.271973, avg loss: 2.679080, ppl: 14.571681 +epoch: 1, batch: 39170, sum loss: 4183.476562, avg loss: 2.585585, ppl: 13.271049 +epoch: 1, batch: 39171, sum loss: 3375.271484, avg loss: 2.412632, ppl: 11.163299 +epoch: 1, batch: 39172, sum loss: 4824.187500, avg loss: 2.640497, ppl: 14.020166 +epoch: 1, batch: 39173, sum loss: 4477.312500, avg loss: 2.685850, ppl: 14.670669 +epoch: 1, batch: 39174, sum loss: 5147.027832, avg loss: 2.857872, ppl: 17.424408 +epoch: 1, batch: 39175, sum loss: 5171.219727, avg loss: 2.921593, ppl: 18.570850 +epoch: 1, batch: 39176, sum loss: 3377.857178, avg loss: 2.469194, ppl: 11.812921 +epoch: 1, batch: 39177, sum loss: 3841.917969, avg loss: 2.496373, ppl: 12.138388 +epoch: 1, batch: 39178, sum loss: 4089.142090, avg loss: 2.464823, ppl: 11.761406 +epoch: 1, batch: 39179, sum loss: 4878.324219, avg loss: 2.800416, ppl: 16.451485 +epoch: 1, batch: 39180, sum loss: 4928.227539, avg loss: 2.691550, ppl: 14.754524 +epoch: 1, batch: 39181, sum loss: 4439.287598, avg loss: 2.533840, ppl: 12.601807 +epoch: 1, batch: 39182, sum loss: 4378.387207, avg loss: 2.462535, ppl: 11.734523 +epoch: 1, batch: 39183, sum loss: 4681.925781, avg loss: 2.678447, ppl: 14.562468 +epoch: 1, batch: 39184, sum loss: 3533.553955, avg loss: 2.335462, ppl: 10.334234 +epoch: 1, batch: 39185, sum loss: 4945.067871, avg loss: 2.661500, ppl: 14.317756 +epoch: 1, batch: 39186, sum loss: 5136.458984, avg loss: 2.791554, ppl: 16.306335 +epoch: 1, batch: 39187, sum loss: 4245.376465, avg loss: 2.405313, ppl: 11.081893 +epoch: 1, batch: 39188, sum loss: 4428.571289, avg loss: 2.571760, ppl: 13.088842 +epoch: 1, batch: 39189, sum loss: 4864.375977, avg loss: 2.524326, ppl: 12.482480 +epoch: 1, batch: 39190, sum loss: 4845.938477, avg loss: 2.963877, ppl: 19.372931 +epoch: 1, batch: 39191, sum loss: 5182.145508, avg loss: 2.650714, ppl: 14.164147 +epoch: 1, batch: 39192, sum loss: 4406.321777, avg loss: 2.549955, ppl: 12.806523 +epoch: 1, batch: 39193, sum loss: 4598.196289, avg loss: 2.671817, ppl: 14.466225 +epoch: 1, batch: 39194, sum loss: 4222.373047, avg loss: 2.543598, ppl: 12.725377 +epoch: 1, batch: 39195, sum loss: 4633.724609, avg loss: 2.722517, ppl: 15.218583 +epoch: 1, batch: 39196, sum loss: 4019.911621, avg loss: 2.520321, ppl: 12.432586 +epoch: 1, batch: 39197, sum loss: 4622.523926, avg loss: 2.774624, ppl: 16.032598 +epoch: 1, batch: 39198, sum loss: 4514.304199, avg loss: 2.472237, ppl: 11.848922 +epoch: 1, batch: 39199, sum loss: 3565.343994, avg loss: 2.370574, ppl: 10.703540 +epoch: 1, batch: 39200, sum loss: 3723.949707, avg loss: 2.308710, ppl: 10.061440 +epoch: 1, batch: 39201, sum loss: 4643.443848, avg loss: 2.592654, ppl: 13.365199 +epoch: 1, batch: 39202, sum loss: 5295.008789, avg loss: 2.831556, ppl: 16.971842 +epoch: 1, batch: 39203, sum loss: 4009.741455, avg loss: 2.568701, ppl: 13.048857 +epoch: 1, batch: 39204, sum loss: 3747.061035, avg loss: 2.362586, ppl: 10.618373 +epoch: 1, batch: 39205, sum loss: 4209.844727, avg loss: 2.632798, ppl: 13.912649 +epoch: 1, batch: 39206, sum loss: 3617.968750, avg loss: 2.308851, ppl: 10.062854 +epoch: 1, batch: 39207, sum loss: 3463.296387, avg loss: 2.222912, ppl: 9.234180 +epoch: 1, batch: 39208, sum loss: 4870.487793, avg loss: 2.673155, ppl: 14.485594 +epoch: 1, batch: 39209, sum loss: 4171.083496, avg loss: 2.478362, ppl: 11.921721 +epoch: 1, batch: 39210, sum loss: 3849.129395, avg loss: 2.411735, ppl: 11.153298 +epoch: 1, batch: 39211, sum loss: 4968.093750, avg loss: 2.800504, ppl: 16.452932 +epoch: 1, batch: 39212, sum loss: 5121.245117, avg loss: 2.708220, ppl: 15.002555 +epoch: 1, batch: 39213, sum loss: 3857.121338, avg loss: 2.517703, ppl: 12.400084 +epoch: 1, batch: 39214, sum loss: 5224.619141, avg loss: 2.792421, ppl: 16.320482 +epoch: 1, batch: 39215, sum loss: 4032.459229, avg loss: 2.421897, ppl: 11.267218 +epoch: 1, batch: 39216, sum loss: 4897.959961, avg loss: 2.818159, ppl: 16.745991 +epoch: 1, batch: 39217, sum loss: 4648.650391, avg loss: 2.665510, ppl: 14.375285 +epoch: 1, batch: 39218, sum loss: 4030.103027, avg loss: 2.410349, ppl: 11.137846 +epoch: 1, batch: 39219, sum loss: 4300.812500, avg loss: 2.595541, ppl: 13.403843 +epoch: 1, batch: 39220, sum loss: 4061.331543, avg loss: 2.546289, ppl: 12.759670 +epoch: 1, batch: 39221, sum loss: 4429.587891, avg loss: 2.474630, ppl: 11.877313 +epoch: 1, batch: 39222, sum loss: 4579.268555, avg loss: 2.900107, ppl: 18.176085 +epoch: 1, batch: 39223, sum loss: 4487.255859, avg loss: 2.375466, ppl: 10.756028 +epoch: 1, batch: 39224, sum loss: 3356.080078, avg loss: 2.554095, ppl: 12.859651 +epoch: 1, batch: 39225, sum loss: 4861.092773, avg loss: 2.732486, ppl: 15.371052 +epoch: 1, batch: 39226, sum loss: 4546.694336, avg loss: 2.738973, ppl: 15.471083 +epoch: 1, batch: 39227, sum loss: 4752.703125, avg loss: 2.771255, ppl: 15.978679 +epoch: 1, batch: 39228, sum loss: 5137.513184, avg loss: 2.679976, ppl: 14.584740 +epoch: 1, batch: 39229, sum loss: 4380.581543, avg loss: 2.666209, ppl: 14.385330 +epoch: 1, batch: 39230, sum loss: 3814.656250, avg loss: 2.507992, ppl: 12.280250 +epoch: 1, batch: 39231, sum loss: 4659.678711, avg loss: 2.499828, ppl: 12.180394 +epoch: 1, batch: 39232, sum loss: 5385.074219, avg loss: 2.903005, ppl: 18.228838 +epoch: 1, batch: 39233, sum loss: 4930.508789, avg loss: 2.814218, ppl: 16.680120 +epoch: 1, batch: 39234, sum loss: 3442.394043, avg loss: 2.570869, ppl: 13.077189 +epoch: 1, batch: 39235, sum loss: 4184.192383, avg loss: 2.537412, ppl: 12.646897 +epoch: 1, batch: 39236, sum loss: 4647.575684, avg loss: 2.581987, ppl: 13.223383 +epoch: 1, batch: 39237, sum loss: 3840.498047, avg loss: 2.375076, ppl: 10.751834 +epoch: 1, batch: 39238, sum loss: 3801.800293, avg loss: 2.526113, ppl: 12.504808 +epoch: 1, batch: 39239, sum loss: 3912.632080, avg loss: 2.249932, ppl: 9.487093 +epoch: 1, batch: 39240, sum loss: 3979.041016, avg loss: 2.296042, ppl: 9.934782 +epoch: 1, batch: 39241, sum loss: 4468.872070, avg loss: 2.666392, ppl: 14.387959 +epoch: 1, batch: 39242, sum loss: 4849.309570, avg loss: 2.632633, ppl: 13.910347 +epoch: 1, batch: 39243, sum loss: 4181.815918, avg loss: 2.582962, ppl: 13.236290 +epoch: 1, batch: 39244, sum loss: 4678.959961, avg loss: 2.689057, ppl: 14.717795 +epoch: 1, batch: 39245, sum loss: 4668.820801, avg loss: 2.661813, ppl: 14.322239 +epoch: 1, batch: 39246, sum loss: 5882.737305, avg loss: 2.944313, ppl: 18.997602 +epoch: 1, batch: 39247, sum loss: 4888.526855, avg loss: 2.668410, ppl: 14.417026 +epoch: 1, batch: 39248, sum loss: 4316.128906, avg loss: 2.736924, ppl: 15.439420 +epoch: 1, batch: 39249, sum loss: 3929.222412, avg loss: 2.630002, ppl: 13.873792 +epoch: 1, batch: 39250, sum loss: 4809.450195, avg loss: 2.866180, ppl: 17.569777 +epoch: 1, batch: 39251, sum loss: 4446.809082, avg loss: 2.664356, ppl: 14.358692 +epoch: 1, batch: 39252, sum loss: 4744.514648, avg loss: 2.430592, ppl: 11.365603 +epoch: 1, batch: 39253, sum loss: 3374.945557, avg loss: 2.251465, ppl: 9.501644 +epoch: 1, batch: 39254, sum loss: 3841.339111, avg loss: 2.540568, ppl: 12.686879 +epoch: 1, batch: 39255, sum loss: 4431.472168, avg loss: 2.829803, ppl: 16.942131 +epoch: 1, batch: 39256, sum loss: 5064.492676, avg loss: 2.643263, ppl: 14.059009 +epoch: 1, batch: 39257, sum loss: 4465.829590, avg loss: 2.585889, ppl: 13.275081 +epoch: 1, batch: 39258, sum loss: 4571.954102, avg loss: 2.724645, ppl: 15.250998 +epoch: 1, batch: 39259, sum loss: 3878.085693, avg loss: 2.252082, ppl: 9.507513 +epoch: 1, batch: 39260, sum loss: 4784.671875, avg loss: 2.740362, ppl: 15.492591 +epoch: 1, batch: 39261, sum loss: 4493.198730, avg loss: 2.632220, ppl: 13.904597 +epoch: 1, batch: 39262, sum loss: 5022.878906, avg loss: 2.832983, ppl: 16.996088 +epoch: 1, batch: 39263, sum loss: 5522.470215, avg loss: 2.957938, ppl: 19.258219 +epoch: 1, batch: 39264, sum loss: 4300.915039, avg loss: 2.664755, ppl: 14.364435 +epoch: 1, batch: 39265, sum loss: 3067.217041, avg loss: 2.277073, ppl: 9.748105 +epoch: 1, batch: 39266, sum loss: 3791.747559, avg loss: 2.633158, ppl: 13.917655 +epoch: 1, batch: 39267, sum loss: 2954.669922, avg loss: 2.206624, ppl: 9.084996 +epoch: 1, batch: 39268, sum loss: 4241.234375, avg loss: 2.533593, ppl: 12.598691 +epoch: 1, batch: 39269, sum loss: 4662.348633, avg loss: 2.653585, ppl: 14.204868 +epoch: 1, batch: 39270, sum loss: 4798.230957, avg loss: 2.682074, ppl: 14.615378 +epoch: 1, batch: 39271, sum loss: 5691.746582, avg loss: 3.066674, ppl: 21.470367 +epoch: 1, batch: 39272, sum loss: 4470.468262, avg loss: 2.774965, ppl: 16.038063 +epoch: 1, batch: 39273, sum loss: 4658.156250, avg loss: 2.660284, ppl: 14.300344 +epoch: 1, batch: 39274, sum loss: 4841.596191, avg loss: 2.887058, ppl: 17.940451 +epoch: 1, batch: 39275, sum loss: 4639.727051, avg loss: 2.602203, ppl: 13.493426 +epoch: 1, batch: 39276, sum loss: 4423.241211, avg loss: 2.794214, ppl: 16.349777 +epoch: 1, batch: 39277, sum loss: 4448.026367, avg loss: 2.571113, ppl: 13.080379 +epoch: 1, batch: 39278, sum loss: 4837.418945, avg loss: 2.484550, ppl: 11.995724 +epoch: 1, batch: 39279, sum loss: 3255.691406, avg loss: 2.313924, ppl: 10.114038 +epoch: 1, batch: 39280, sum loss: 3827.812988, avg loss: 2.452154, ppl: 11.613339 +epoch: 1, batch: 39281, sum loss: 4098.508301, avg loss: 2.483944, ppl: 11.988459 +epoch: 1, batch: 39282, sum loss: 4223.135254, avg loss: 2.469670, ppl: 11.818544 +epoch: 1, batch: 39283, sum loss: 3686.809570, avg loss: 2.504626, ppl: 12.238981 +epoch: 1, batch: 39284, sum loss: 4085.333984, avg loss: 2.437550, ppl: 11.444970 +epoch: 1, batch: 39285, sum loss: 2789.990723, avg loss: 2.244562, ppl: 9.436280 +epoch: 1, batch: 39286, sum loss: 4914.723633, avg loss: 2.899542, ppl: 18.165821 +epoch: 1, batch: 39287, sum loss: 4368.174805, avg loss: 2.437598, ppl: 11.445513 +epoch: 1, batch: 39288, sum loss: 4521.107910, avg loss: 2.567353, ppl: 13.031279 +epoch: 1, batch: 39289, sum loss: 4235.418945, avg loss: 2.588887, ppl: 13.314943 +epoch: 1, batch: 39290, sum loss: 4689.583496, avg loss: 2.631641, ppl: 13.896551 +epoch: 1, batch: 39291, sum loss: 3388.145020, avg loss: 2.441027, ppl: 11.484826 +epoch: 1, batch: 39292, sum loss: 4988.201660, avg loss: 2.903493, ppl: 18.237745 +epoch: 1, batch: 39293, sum loss: 4359.776855, avg loss: 2.588941, ppl: 13.315667 +epoch: 1, batch: 39294, sum loss: 4059.624512, avg loss: 2.663796, ppl: 14.350657 +epoch: 1, batch: 39295, sum loss: 4786.063965, avg loss: 2.719354, ppl: 15.170525 +epoch: 1, batch: 39296, sum loss: 4751.830078, avg loss: 2.613768, ppl: 13.650387 +epoch: 1, batch: 39297, sum loss: 5536.772949, avg loss: 2.711446, ppl: 15.051024 +epoch: 1, batch: 39298, sum loss: 3844.050781, avg loss: 2.419163, ppl: 11.236456 +epoch: 1, batch: 39299, sum loss: 5017.769531, avg loss: 2.635383, ppl: 13.948659 +epoch: 1, batch: 39300, sum loss: 4085.926758, avg loss: 2.440817, ppl: 11.482414 +epoch: 1, batch: 39301, sum loss: 5130.423828, avg loss: 2.673488, ppl: 14.490426 +epoch: 1, batch: 39302, sum loss: 4680.207031, avg loss: 2.372127, ppl: 10.720173 +epoch: 1, batch: 39303, sum loss: 4571.078613, avg loss: 2.531051, ppl: 12.566711 +epoch: 1, batch: 39304, sum loss: 4559.209961, avg loss: 2.652246, ppl: 14.185858 +epoch: 1, batch: 39305, sum loss: 5238.776367, avg loss: 2.995298, ppl: 19.991320 +epoch: 1, batch: 39306, sum loss: 4412.971680, avg loss: 2.615870, ppl: 13.679106 +epoch: 1, batch: 39307, sum loss: 4014.326660, avg loss: 2.501138, ppl: 12.196365 +epoch: 1, batch: 39308, sum loss: 3660.191895, avg loss: 2.340276, ppl: 10.384105 +epoch: 1, batch: 39309, sum loss: 4877.517090, avg loss: 2.720311, ppl: 15.185040 +epoch: 1, batch: 39310, sum loss: 4431.860840, avg loss: 2.596286, ppl: 13.413831 +epoch: 1, batch: 39311, sum loss: 4243.819336, avg loss: 2.426426, ppl: 11.318359 +epoch: 1, batch: 39312, sum loss: 4368.274414, avg loss: 2.383128, ppl: 10.838758 +epoch: 1, batch: 39313, sum loss: 3100.960693, avg loss: 2.270103, ppl: 9.680398 +epoch: 1, batch: 39314, sum loss: 4908.612305, avg loss: 2.788984, ppl: 16.264488 +epoch: 1, batch: 39315, sum loss: 3816.686768, avg loss: 2.556388, ppl: 12.889174 +epoch: 1, batch: 39316, sum loss: 4357.640625, avg loss: 2.683276, ppl: 14.632955 +epoch: 1, batch: 39317, sum loss: 4363.195801, avg loss: 2.560561, ppl: 12.943075 +epoch: 1, batch: 39318, sum loss: 5150.644531, avg loss: 2.872641, ppl: 17.683653 +epoch: 1, batch: 39319, sum loss: 4047.567871, avg loss: 2.495418, ppl: 12.126800 +epoch: 1, batch: 39320, sum loss: 3221.455811, avg loss: 2.296120, ppl: 9.935555 +epoch: 1, batch: 39321, sum loss: 3628.570068, avg loss: 2.248185, ppl: 9.470529 +epoch: 1, batch: 39322, sum loss: 4368.738770, avg loss: 2.747634, ppl: 15.605672 +epoch: 1, batch: 39323, sum loss: 4001.015137, avg loss: 2.576314, ppl: 13.148580 +epoch: 1, batch: 39324, sum loss: 3810.493896, avg loss: 2.441060, ppl: 11.485204 +epoch: 1, batch: 39325, sum loss: 4300.946777, avg loss: 2.281669, ppl: 9.793015 +epoch: 1, batch: 39326, sum loss: 6681.561523, avg loss: 2.957752, ppl: 19.254639 +epoch: 1, batch: 39327, sum loss: 4029.467529, avg loss: 2.317118, ppl: 10.146387 +epoch: 1, batch: 39328, sum loss: 3630.618408, avg loss: 2.540671, ppl: 12.688177 +epoch: 1, batch: 39329, sum loss: 5004.098633, avg loss: 2.706381, ppl: 14.974984 +epoch: 1, batch: 39330, sum loss: 3866.194336, avg loss: 2.367541, ppl: 10.671118 +epoch: 1, batch: 39331, sum loss: 4031.587402, avg loss: 2.367344, ppl: 10.669022 +epoch: 1, batch: 39332, sum loss: 5490.892578, avg loss: 2.780199, ppl: 16.122227 +epoch: 1, batch: 39333, sum loss: 5324.141113, avg loss: 2.747235, ppl: 15.599441 +epoch: 1, batch: 39334, sum loss: 4051.337402, avg loss: 2.674150, ppl: 14.500023 +epoch: 1, batch: 39335, sum loss: 3703.301270, avg loss: 2.240352, ppl: 9.396635 +epoch: 1, batch: 39336, sum loss: 4652.328613, avg loss: 2.600519, ppl: 13.470727 +epoch: 1, batch: 39337, sum loss: 3956.838867, avg loss: 2.305850, ppl: 10.032705 +epoch: 1, batch: 39338, sum loss: 4841.408203, avg loss: 2.504609, ppl: 12.238769 +epoch: 1, batch: 39339, sum loss: 3950.307373, avg loss: 2.497034, ppl: 12.146409 +epoch: 1, batch: 39340, sum loss: 3819.654541, avg loss: 2.529572, ppl: 12.548141 +epoch: 1, batch: 39341, sum loss: 4710.495605, avg loss: 2.619853, ppl: 13.733702 +epoch: 1, batch: 39342, sum loss: 5002.734375, avg loss: 2.748755, ppl: 15.623173 +epoch: 1, batch: 39343, sum loss: 4029.478027, avg loss: 2.355043, ppl: 10.538581 +epoch: 1, batch: 39344, sum loss: 4458.391602, avg loss: 2.537502, ppl: 12.648041 +epoch: 1, batch: 39345, sum loss: 3825.964111, avg loss: 2.500630, ppl: 12.190173 +epoch: 1, batch: 39346, sum loss: 3755.160645, avg loss: 2.418004, ppl: 11.223438 +epoch: 1, batch: 39347, sum loss: 4449.588867, avg loss: 2.526740, ppl: 12.512647 +epoch: 1, batch: 39348, sum loss: 4998.921387, avg loss: 2.886213, ppl: 17.925303 +epoch: 1, batch: 39349, sum loss: 3593.583496, avg loss: 2.291826, ppl: 9.892988 +epoch: 1, batch: 39350, sum loss: 4335.593262, avg loss: 2.716537, ppl: 15.127844 +epoch: 1, batch: 39351, sum loss: 3982.558594, avg loss: 2.475176, ppl: 11.883802 +epoch: 1, batch: 39352, sum loss: 3866.936523, avg loss: 2.477218, ppl: 11.908086 +epoch: 1, batch: 39353, sum loss: 4892.983398, avg loss: 2.886716, ppl: 17.934315 +epoch: 1, batch: 39354, sum loss: 4411.731445, avg loss: 2.685168, ppl: 14.660668 +epoch: 1, batch: 39355, sum loss: 3310.137207, avg loss: 2.457414, ppl: 11.674586 +epoch: 1, batch: 39356, sum loss: 4750.767090, avg loss: 2.702370, ppl: 14.915041 +epoch: 1, batch: 39357, sum loss: 5208.717285, avg loss: 2.750115, ppl: 15.644429 +epoch: 1, batch: 39358, sum loss: 3839.856201, avg loss: 2.339949, ppl: 10.380706 +epoch: 1, batch: 39359, sum loss: 4402.369141, avg loss: 2.597268, ppl: 13.427004 +epoch: 1, batch: 39360, sum loss: 4179.631348, avg loss: 2.712285, ppl: 15.063658 +epoch: 1, batch: 39361, sum loss: 3896.315430, avg loss: 2.573524, ppl: 13.111953 +epoch: 1, batch: 39362, sum loss: 4748.072266, avg loss: 2.634890, ppl: 13.941783 +epoch: 1, batch: 39363, sum loss: 4654.518555, avg loss: 2.577253, ppl: 13.160933 +epoch: 1, batch: 39364, sum loss: 4792.861328, avg loss: 2.784928, ppl: 16.198652 +epoch: 1, batch: 39365, sum loss: 3928.212158, avg loss: 2.737430, ppl: 15.447236 +epoch: 1, batch: 39366, sum loss: 4523.559082, avg loss: 2.532788, ppl: 12.588554 +epoch: 1, batch: 39367, sum loss: 4285.958984, avg loss: 2.742136, ppl: 15.520104 +epoch: 1, batch: 39368, sum loss: 5321.285156, avg loss: 2.848654, ppl: 17.264524 +epoch: 1, batch: 39369, sum loss: 3505.673584, avg loss: 2.335559, ppl: 10.335232 +epoch: 1, batch: 39370, sum loss: 4924.945801, avg loss: 2.683894, ppl: 14.642004 +epoch: 1, batch: 39371, sum loss: 5831.865723, avg loss: 2.943900, ppl: 18.989756 +epoch: 1, batch: 39372, sum loss: 4076.950439, avg loss: 2.453039, ppl: 11.623616 +epoch: 1, batch: 39373, sum loss: 5355.149902, avg loss: 2.899378, ppl: 18.162836 +epoch: 1, batch: 39374, sum loss: 4713.649414, avg loss: 2.708994, ppl: 15.014162 +epoch: 1, batch: 39375, sum loss: 4042.039062, avg loss: 2.498170, ppl: 12.160219 +epoch: 1, batch: 39376, sum loss: 3955.274902, avg loss: 2.408816, ppl: 11.120782 +epoch: 1, batch: 39377, sum loss: 4727.825195, avg loss: 2.644197, ppl: 14.072147 +epoch: 1, batch: 39378, sum loss: 5627.658203, avg loss: 2.937191, ppl: 18.862791 +epoch: 1, batch: 39379, sum loss: 3371.499023, avg loss: 2.325172, ppl: 10.228436 +epoch: 1, batch: 39380, sum loss: 4044.939941, avg loss: 2.613010, ppl: 13.640051 +epoch: 1, batch: 39381, sum loss: 3749.039551, avg loss: 2.420297, ppl: 11.249196 +epoch: 1, batch: 39382, sum loss: 4092.486328, avg loss: 2.443276, ppl: 11.510684 +epoch: 1, batch: 39383, sum loss: 5312.260254, avg loss: 2.930094, ppl: 18.729391 +epoch: 1, batch: 39384, sum loss: 3953.558350, avg loss: 2.580652, ppl: 13.205742 +epoch: 1, batch: 39385, sum loss: 4659.465820, avg loss: 2.688670, ppl: 14.712102 +epoch: 1, batch: 39386, sum loss: 3915.380859, avg loss: 2.606778, ppl: 13.555307 +epoch: 1, batch: 39387, sum loss: 4901.207031, avg loss: 2.688539, ppl: 14.710173 +epoch: 1, batch: 39388, sum loss: 3791.801025, avg loss: 2.410554, ppl: 11.140127 +epoch: 1, batch: 39389, sum loss: 3730.958252, avg loss: 2.272204, ppl: 9.700755 +epoch: 1, batch: 39390, sum loss: 4710.622070, avg loss: 2.694864, ppl: 14.803506 +epoch: 1, batch: 39391, sum loss: 3890.760010, avg loss: 2.276630, ppl: 9.743786 +epoch: 1, batch: 39392, sum loss: 4253.328125, avg loss: 2.531743, ppl: 12.575407 +epoch: 1, batch: 39393, sum loss: 3389.045654, avg loss: 2.228169, ppl: 9.282858 +epoch: 1, batch: 39394, sum loss: 4248.197754, avg loss: 2.536238, ppl: 12.632056 +epoch: 1, batch: 39395, sum loss: 4564.757324, avg loss: 2.574595, ppl: 13.126006 +epoch: 1, batch: 39396, sum loss: 4549.733398, avg loss: 2.593919, ppl: 13.382108 +epoch: 1, batch: 39397, sum loss: 4458.157227, avg loss: 2.726701, ppl: 15.282394 +epoch: 1, batch: 39398, sum loss: 3706.423340, avg loss: 2.557918, ppl: 12.908917 +epoch: 1, batch: 39399, sum loss: 4816.340332, avg loss: 2.616154, ppl: 13.683003 +epoch: 1, batch: 39400, sum loss: 4011.416016, avg loss: 2.440034, ppl: 11.473433 +epoch: 1, batch: 39401, sum loss: 4786.201660, avg loss: 2.543146, ppl: 12.719628 +epoch: 1, batch: 39402, sum loss: 4063.050049, avg loss: 2.553771, ppl: 12.855494 +epoch: 1, batch: 39403, sum loss: 3700.304932, avg loss: 2.368953, ppl: 10.686200 +epoch: 1, batch: 39404, sum loss: 4303.460938, avg loss: 2.449323, ppl: 11.580506 +epoch: 1, batch: 39405, sum loss: 3099.498535, avg loss: 1.895718, ppl: 6.657325 +epoch: 1, batch: 39406, sum loss: 4376.649414, avg loss: 2.408723, ppl: 11.119751 +epoch: 1, batch: 39407, sum loss: 3855.746826, avg loss: 2.380091, ppl: 10.805883 +epoch: 1, batch: 39408, sum loss: 4677.455566, avg loss: 2.668258, ppl: 14.414832 +epoch: 1, batch: 39409, sum loss: 3624.240234, avg loss: 2.395400, ppl: 10.972587 +epoch: 1, batch: 39410, sum loss: 4690.637695, avg loss: 2.895456, ppl: 18.091742 +epoch: 1, batch: 39411, sum loss: 5331.140625, avg loss: 2.969995, ppl: 19.491817 +epoch: 1, batch: 39412, sum loss: 3735.764160, avg loss: 2.436898, ppl: 11.437504 +epoch: 1, batch: 39413, sum loss: 4276.229004, avg loss: 2.266152, ppl: 9.642227 +epoch: 1, batch: 39414, sum loss: 4134.420898, avg loss: 2.595368, ppl: 13.401517 +epoch: 1, batch: 39415, sum loss: 4617.954102, avg loss: 2.714847, ppl: 15.102293 +epoch: 1, batch: 39416, sum loss: 4177.240234, avg loss: 2.462996, ppl: 11.739926 +epoch: 1, batch: 39417, sum loss: 3911.078125, avg loss: 2.243877, ppl: 9.429822 +epoch: 1, batch: 39418, sum loss: 4007.910400, avg loss: 2.506510, ppl: 12.262067 +epoch: 1, batch: 39419, sum loss: 3548.997803, avg loss: 2.234885, ppl: 9.345411 +epoch: 1, batch: 39420, sum loss: 5429.239258, avg loss: 2.732380, ppl: 15.369421 +epoch: 1, batch: 39421, sum loss: 3354.859863, avg loss: 2.293137, ppl: 9.905967 +epoch: 1, batch: 39422, sum loss: 4869.794922, avg loss: 2.625226, ppl: 13.807698 +epoch: 1, batch: 39423, sum loss: 3692.070068, avg loss: 2.291788, ppl: 9.892609 +epoch: 1, batch: 39424, sum loss: 4443.479980, avg loss: 2.850212, ppl: 17.291445 +epoch: 1, batch: 39425, sum loss: 4739.259766, avg loss: 2.812617, ppl: 16.653444 +epoch: 1, batch: 39426, sum loss: 4152.414551, avg loss: 2.587174, ppl: 13.292157 +epoch: 1, batch: 39427, sum loss: 5239.216797, avg loss: 2.702020, ppl: 14.909822 +epoch: 1, batch: 39428, sum loss: 4077.584717, avg loss: 2.500052, ppl: 12.183127 +epoch: 1, batch: 39429, sum loss: 3853.954102, avg loss: 2.518924, ppl: 12.415234 +epoch: 1, batch: 39430, sum loss: 3738.230469, avg loss: 2.101310, ppl: 8.176875 +epoch: 1, batch: 39431, sum loss: 4209.687012, avg loss: 2.519262, ppl: 12.419432 +epoch: 1, batch: 39432, sum loss: 4006.649902, avg loss: 2.434174, ppl: 11.406391 +epoch: 1, batch: 39433, sum loss: 3448.302246, avg loss: 2.450819, ppl: 11.597841 +epoch: 1, batch: 39434, sum loss: 4046.355469, avg loss: 2.440504, ppl: 11.478823 +epoch: 1, batch: 39435, sum loss: 4953.495117, avg loss: 2.626456, ppl: 13.824682 +epoch: 1, batch: 39436, sum loss: 4030.805176, avg loss: 2.557617, ppl: 12.905033 +epoch: 1, batch: 39437, sum loss: 4180.084961, avg loss: 2.388620, ppl: 10.898442 +epoch: 1, batch: 39438, sum loss: 4216.726074, avg loss: 2.529530, ppl: 12.547602 +epoch: 1, batch: 39439, sum loss: 3763.997803, avg loss: 2.626656, ppl: 13.827455 +epoch: 1, batch: 39440, sum loss: 3738.815430, avg loss: 2.246884, ppl: 9.458221 +epoch: 1, batch: 39441, sum loss: 4583.352051, avg loss: 2.560532, ppl: 12.942699 +epoch: 1, batch: 39442, sum loss: 4896.754883, avg loss: 2.639760, ppl: 14.009841 +epoch: 1, batch: 39443, sum loss: 3916.571533, avg loss: 2.232937, ppl: 9.327219 +epoch: 1, batch: 39444, sum loss: 4679.848633, avg loss: 2.749617, ppl: 15.636647 +epoch: 1, batch: 39445, sum loss: 4594.992188, avg loss: 2.717323, ppl: 15.139732 +epoch: 1, batch: 39446, sum loss: 4309.172852, avg loss: 2.635580, ppl: 13.951396 +epoch: 1, batch: 39447, sum loss: 4156.690918, avg loss: 2.516157, ppl: 12.380924 +epoch: 1, batch: 39448, sum loss: 5088.820801, avg loss: 2.780777, ppl: 16.131542 +epoch: 1, batch: 39449, sum loss: 4260.478027, avg loss: 2.442935, ppl: 11.506758 +epoch: 1, batch: 39450, sum loss: 4736.016602, avg loss: 2.583752, ppl: 13.246742 +epoch: 1, batch: 39451, sum loss: 3572.125244, avg loss: 2.407092, ppl: 11.101634 +epoch: 1, batch: 39452, sum loss: 3577.436035, avg loss: 2.277171, ppl: 9.749064 +epoch: 1, batch: 39453, sum loss: 4877.572266, avg loss: 2.768202, ppl: 15.929971 +epoch: 1, batch: 39454, sum loss: 4810.056641, avg loss: 2.788439, ppl: 16.255621 +epoch: 1, batch: 39455, sum loss: 4287.743164, avg loss: 2.578318, ppl: 13.174961 +epoch: 1, batch: 39456, sum loss: 4128.255859, avg loss: 2.576939, ppl: 13.156798 +epoch: 1, batch: 39457, sum loss: 4272.796875, avg loss: 2.632654, ppl: 13.910636 +epoch: 1, batch: 39458, sum loss: 4441.782715, avg loss: 2.781329, ppl: 16.140459 +epoch: 1, batch: 39459, sum loss: 4202.614258, avg loss: 2.530171, ppl: 12.555655 +epoch: 1, batch: 39460, sum loss: 3833.867432, avg loss: 2.431114, ppl: 11.371548 +epoch: 1, batch: 39461, sum loss: 4231.075684, avg loss: 2.618240, ppl: 13.711568 +epoch: 1, batch: 39462, sum loss: 3676.999756, avg loss: 2.521948, ppl: 12.452826 +epoch: 1, batch: 39463, sum loss: 4437.316895, avg loss: 2.661858, ppl: 14.322870 +epoch: 1, batch: 39464, sum loss: 4641.485840, avg loss: 2.474140, ppl: 11.871490 +epoch: 1, batch: 39465, sum loss: 4087.230469, avg loss: 2.568969, ppl: 13.052367 +epoch: 1, batch: 39466, sum loss: 4644.305176, avg loss: 2.543431, ppl: 12.723253 +epoch: 1, batch: 39467, sum loss: 4541.397949, avg loss: 2.602520, ppl: 13.497712 +epoch: 1, batch: 39468, sum loss: 3456.085449, avg loss: 2.285771, ppl: 9.833263 +epoch: 1, batch: 39469, sum loss: 4357.413086, avg loss: 2.420785, ppl: 11.254693 +epoch: 1, batch: 39470, sum loss: 3321.330566, avg loss: 2.410254, ppl: 11.136795 +epoch: 1, batch: 39471, sum loss: 3828.812744, avg loss: 2.518956, ppl: 12.415627 +epoch: 1, batch: 39472, sum loss: 4451.868164, avg loss: 2.470515, ppl: 11.828540 +epoch: 1, batch: 39473, sum loss: 5296.875977, avg loss: 2.784898, ppl: 16.198166 +epoch: 1, batch: 39474, sum loss: 3508.096924, avg loss: 2.230195, ppl: 9.301681 +epoch: 1, batch: 39475, sum loss: 4716.011719, avg loss: 2.682600, ppl: 14.623071 +epoch: 1, batch: 39476, sum loss: 3652.145020, avg loss: 2.374607, ppl: 10.746784 +epoch: 1, batch: 39477, sum loss: 4765.122070, avg loss: 2.807968, ppl: 16.576208 +epoch: 1, batch: 39478, sum loss: 4239.495605, avg loss: 2.504132, ppl: 12.232940 +epoch: 1, batch: 39479, sum loss: 5011.166016, avg loss: 2.735353, ppl: 15.415180 +epoch: 1, batch: 39480, sum loss: 3834.603516, avg loss: 2.293423, ppl: 9.908799 +epoch: 1, batch: 39481, sum loss: 3906.777588, avg loss: 2.510782, ppl: 12.314562 +epoch: 1, batch: 39482, sum loss: 4488.756836, avg loss: 2.578264, ppl: 13.174241 +epoch: 1, batch: 39483, sum loss: 3953.566406, avg loss: 2.505429, ppl: 12.248807 +epoch: 1, batch: 39484, sum loss: 4769.038086, avg loss: 2.909724, ppl: 18.351738 +epoch: 1, batch: 39485, sum loss: 3970.033203, avg loss: 2.536762, ppl: 12.638686 +epoch: 1, batch: 39486, sum loss: 4190.474609, avg loss: 2.452004, ppl: 11.611592 +epoch: 1, batch: 39487, sum loss: 3567.447021, avg loss: 2.501716, ppl: 12.203416 +epoch: 1, batch: 39488, sum loss: 4318.495117, avg loss: 2.712623, ppl: 15.068744 +epoch: 1, batch: 39489, sum loss: 4243.890625, avg loss: 2.539731, ppl: 12.676264 +epoch: 1, batch: 39490, sum loss: 4338.939453, avg loss: 2.567420, ppl: 13.032155 +epoch: 1, batch: 39491, sum loss: 4526.997070, avg loss: 2.647367, ppl: 14.116817 +epoch: 1, batch: 39492, sum loss: 4584.555664, avg loss: 2.703158, ppl: 14.926795 +epoch: 1, batch: 39493, sum loss: 4893.389648, avg loss: 2.766190, ppl: 15.897944 +epoch: 1, batch: 39494, sum loss: 3523.848877, avg loss: 2.452226, ppl: 11.614173 +epoch: 1, batch: 39495, sum loss: 4479.205566, avg loss: 2.527768, ppl: 12.525522 +epoch: 1, batch: 39496, sum loss: 3730.198730, avg loss: 2.219035, ppl: 9.198454 +epoch: 1, batch: 39497, sum loss: 3535.076416, avg loss: 2.372535, ppl: 10.724540 +epoch: 1, batch: 39498, sum loss: 5949.765625, avg loss: 2.882638, ppl: 17.861338 +epoch: 1, batch: 39499, sum loss: 4703.976074, avg loss: 2.709664, ppl: 15.024221 +epoch: 1, batch: 39500, sum loss: 4113.479980, avg loss: 2.605117, ppl: 13.532809 +epoch: 1, batch: 39501, sum loss: 3661.598633, avg loss: 2.377661, ppl: 10.779665 +epoch: 1, batch: 39502, sum loss: 3465.930908, avg loss: 2.349784, ppl: 10.483301 +epoch: 1, batch: 39503, sum loss: 4400.240234, avg loss: 2.558279, ppl: 12.913577 +epoch: 1, batch: 39504, sum loss: 4097.745117, avg loss: 2.461108, ppl: 11.717790 +epoch: 1, batch: 39505, sum loss: 5020.369629, avg loss: 2.696224, ppl: 14.823659 +epoch: 1, batch: 39506, sum loss: 5445.022949, avg loss: 2.671748, ppl: 14.465235 +epoch: 1, batch: 39507, sum loss: 4287.875000, avg loss: 2.366377, ppl: 10.658705 +epoch: 1, batch: 39508, sum loss: 4262.795898, avg loss: 2.618425, ppl: 13.714106 +epoch: 1, batch: 39509, sum loss: 4321.116699, avg loss: 2.431692, ppl: 11.378117 +epoch: 1, batch: 39510, sum loss: 4256.272461, avg loss: 2.554785, ppl: 12.868536 +epoch: 1, batch: 39511, sum loss: 4154.335938, avg loss: 2.612790, ppl: 13.637043 +epoch: 1, batch: 39512, sum loss: 4164.779785, avg loss: 2.632604, ppl: 13.909946 +epoch: 1, batch: 39513, sum loss: 4232.483398, avg loss: 2.456462, ppl: 11.663469 +epoch: 1, batch: 39514, sum loss: 4263.842285, avg loss: 2.623903, ppl: 13.789436 +epoch: 1, batch: 39515, sum loss: 3926.253418, avg loss: 2.492859, ppl: 12.095810 +epoch: 1, batch: 39516, sum loss: 3672.900879, avg loss: 2.366560, ppl: 10.660656 +epoch: 1, batch: 39517, sum loss: 4858.932617, avg loss: 2.652256, ppl: 14.186003 +epoch: 1, batch: 39518, sum loss: 4970.841797, avg loss: 2.669625, ppl: 14.434556 +epoch: 1, batch: 39519, sum loss: 3819.969971, avg loss: 2.667577, ppl: 14.405024 +epoch: 1, batch: 39520, sum loss: 3870.166016, avg loss: 2.522924, ppl: 12.464996 +epoch: 1, batch: 39521, sum loss: 4410.922852, avg loss: 2.714414, ppl: 15.095763 +epoch: 1, batch: 39522, sum loss: 5119.380371, avg loss: 2.743505, ppl: 15.541359 +epoch: 1, batch: 39523, sum loss: 4111.481445, avg loss: 2.357501, ppl: 10.564515 +epoch: 1, batch: 39524, sum loss: 4877.584961, avg loss: 2.572566, ppl: 13.099395 +epoch: 1, batch: 39525, sum loss: 2682.258789, avg loss: 2.102084, ppl: 8.183204 +epoch: 1, batch: 39526, sum loss: 3942.536133, avg loss: 2.488975, ppl: 12.048918 +epoch: 1, batch: 39527, sum loss: 4659.874023, avg loss: 2.583079, ppl: 13.237830 +epoch: 1, batch: 39528, sum loss: 4027.916504, avg loss: 2.429383, ppl: 11.351873 +epoch: 1, batch: 39529, sum loss: 4198.399902, avg loss: 2.517026, ppl: 12.391694 +epoch: 1, batch: 39530, sum loss: 4062.811768, avg loss: 2.544028, ppl: 12.730843 +epoch: 1, batch: 39531, sum loss: 4618.667480, avg loss: 2.508782, ppl: 12.289950 +epoch: 1, batch: 39532, sum loss: 4995.899902, avg loss: 2.505466, ppl: 12.249271 +epoch: 1, batch: 39533, sum loss: 5059.528320, avg loss: 2.807729, ppl: 16.572248 +epoch: 1, batch: 39534, sum loss: 4547.614258, avg loss: 2.668788, ppl: 14.422475 +epoch: 1, batch: 39535, sum loss: 5093.184570, avg loss: 2.840594, ppl: 17.125933 +epoch: 1, batch: 39536, sum loss: 4607.608398, avg loss: 2.692933, ppl: 14.774952 +epoch: 1, batch: 39537, sum loss: 4630.534180, avg loss: 2.544250, ppl: 12.733671 +epoch: 1, batch: 39538, sum loss: 4006.058838, avg loss: 2.541915, ppl: 12.703981 +epoch: 1, batch: 39539, sum loss: 4203.523926, avg loss: 2.546047, ppl: 12.756583 +epoch: 1, batch: 39540, sum loss: 4888.799316, avg loss: 2.925673, ppl: 18.646770 +epoch: 1, batch: 39541, sum loss: 4478.116699, avg loss: 2.691176, ppl: 14.749010 +epoch: 1, batch: 39542, sum loss: 4229.172852, avg loss: 2.546161, ppl: 12.758028 +epoch: 1, batch: 39543, sum loss: 3882.088867, avg loss: 2.460132, ppl: 11.706361 +epoch: 1, batch: 39544, sum loss: 4357.574707, avg loss: 2.586098, ppl: 13.277857 +epoch: 1, batch: 39545, sum loss: 4646.338379, avg loss: 2.695092, ppl: 14.806880 +epoch: 1, batch: 39546, sum loss: 5363.866699, avg loss: 2.760611, ppl: 15.809497 +epoch: 1, batch: 39547, sum loss: 3535.267578, avg loss: 2.309123, ppl: 10.065596 +epoch: 1, batch: 39548, sum loss: 3229.010254, avg loss: 2.550561, ppl: 12.814290 +epoch: 1, batch: 39549, sum loss: 3416.156006, avg loss: 2.218283, ppl: 9.191537 +epoch: 1, batch: 39550, sum loss: 4392.863281, avg loss: 2.625740, ppl: 13.814797 +epoch: 1, batch: 39551, sum loss: 4126.408203, avg loss: 2.381078, ppl: 10.816557 +epoch: 1, batch: 39552, sum loss: 4771.352539, avg loss: 2.664072, ppl: 14.354616 +epoch: 1, batch: 39553, sum loss: 4974.875000, avg loss: 2.829849, ppl: 16.942905 +epoch: 1, batch: 39554, sum loss: 4584.097656, avg loss: 2.601645, ppl: 13.485901 +epoch: 1, batch: 39555, sum loss: 4257.167480, avg loss: 2.457949, ppl: 11.680829 +epoch: 1, batch: 39556, sum loss: 4702.645020, avg loss: 2.627176, ppl: 13.834643 +epoch: 1, batch: 39557, sum loss: 3785.058350, avg loss: 2.473894, ppl: 11.868577 +epoch: 1, batch: 39558, sum loss: 4098.001953, avg loss: 2.476134, ppl: 11.895192 +epoch: 1, batch: 39559, sum loss: 3840.237061, avg loss: 2.652097, ppl: 14.183758 +epoch: 1, batch: 39560, sum loss: 3889.093506, avg loss: 2.580686, ppl: 13.206193 +epoch: 1, batch: 39561, sum loss: 4264.598145, avg loss: 2.733717, ppl: 15.389981 +epoch: 1, batch: 39562, sum loss: 5092.704102, avg loss: 3.084618, ppl: 21.859118 +epoch: 1, batch: 39563, sum loss: 3704.109863, avg loss: 2.608528, ppl: 13.579050 +epoch: 1, batch: 39564, sum loss: 4234.181152, avg loss: 2.678166, ppl: 14.558374 +epoch: 1, batch: 39565, sum loss: 3880.681641, avg loss: 2.339169, ppl: 10.372616 +epoch: 1, batch: 39566, sum loss: 3392.486084, avg loss: 2.367401, ppl: 10.669630 +epoch: 1, batch: 39567, sum loss: 3685.403564, avg loss: 2.591704, ppl: 13.352509 +epoch: 1, batch: 39568, sum loss: 3494.347656, avg loss: 2.459076, ppl: 11.694006 +epoch: 1, batch: 39569, sum loss: 4821.021484, avg loss: 2.590554, ppl: 13.337165 +epoch: 1, batch: 39570, sum loss: 3694.153076, avg loss: 2.321906, ppl: 10.195091 +epoch: 1, batch: 39571, sum loss: 4925.961426, avg loss: 2.819669, ppl: 16.771294 +epoch: 1, batch: 39572, sum loss: 3856.315918, avg loss: 2.448454, ppl: 11.570450 +epoch: 1, batch: 39573, sum loss: 4065.733154, avg loss: 2.706879, ppl: 14.982448 +epoch: 1, batch: 39574, sum loss: 3771.857178, avg loss: 2.458838, ppl: 11.691216 +epoch: 1, batch: 39575, sum loss: 4743.568848, avg loss: 2.675448, ppl: 14.518852 +epoch: 1, batch: 39576, sum loss: 3338.356201, avg loss: 2.359262, ppl: 10.583140 +epoch: 1, batch: 39577, sum loss: 4471.781738, avg loss: 2.745108, ppl: 15.566300 +epoch: 1, batch: 39578, sum loss: 3318.377441, avg loss: 2.285384, ppl: 9.829459 +epoch: 1, batch: 39579, sum loss: 4093.678711, avg loss: 2.663421, ppl: 14.345286 +epoch: 1, batch: 39580, sum loss: 4640.670410, avg loss: 2.682468, ppl: 14.621140 +epoch: 1, batch: 39581, sum loss: 4118.567383, avg loss: 2.388960, ppl: 10.902154 +epoch: 1, batch: 39582, sum loss: 4460.058105, avg loss: 2.481947, ppl: 11.964533 +epoch: 1, batch: 39583, sum loss: 4442.870117, avg loss: 2.380959, ppl: 10.815272 +epoch: 1, batch: 39584, sum loss: 4138.937012, avg loss: 2.547038, ppl: 12.769226 +epoch: 1, batch: 39585, sum loss: 4385.770996, avg loss: 2.615248, ppl: 13.670606 +epoch: 1, batch: 39586, sum loss: 4266.386230, avg loss: 2.574765, ppl: 13.128234 +epoch: 1, batch: 39587, sum loss: 4030.687988, avg loss: 2.560793, ppl: 12.946075 +epoch: 1, batch: 39588, sum loss: 3596.576904, avg loss: 2.311425, ppl: 10.088788 +epoch: 1, batch: 39589, sum loss: 4482.302246, avg loss: 2.509688, ppl: 12.301090 +epoch: 1, batch: 39590, sum loss: 4578.458984, avg loss: 2.660348, ppl: 14.301264 +epoch: 1, batch: 39591, sum loss: 4025.556641, avg loss: 2.725496, ppl: 15.263976 +epoch: 1, batch: 39592, sum loss: 4657.137695, avg loss: 2.763880, ppl: 15.861265 +epoch: 1, batch: 39593, sum loss: 4640.682617, avg loss: 2.653335, ppl: 14.201319 +epoch: 1, batch: 39594, sum loss: 3347.297363, avg loss: 2.115864, ppl: 8.296753 +epoch: 1, batch: 39595, sum loss: 4073.454102, avg loss: 2.341066, ppl: 10.392305 +epoch: 1, batch: 39596, sum loss: 4488.847656, avg loss: 2.378828, ppl: 10.792242 +epoch: 1, batch: 39597, sum loss: 4244.509766, avg loss: 2.935346, ppl: 18.828011 +epoch: 1, batch: 39598, sum loss: 4633.220215, avg loss: 2.611736, ppl: 13.622684 +epoch: 1, batch: 39599, sum loss: 5131.124023, avg loss: 2.761638, ppl: 15.825747 +epoch: 1, batch: 39600, sum loss: 4276.353027, avg loss: 2.373115, ppl: 10.730767 +epoch: 1, batch: 39601, sum loss: 3710.288086, avg loss: 2.352751, ppl: 10.514453 +epoch: 1, batch: 39602, sum loss: 4594.761719, avg loss: 2.682289, ppl: 14.618522 +epoch: 1, batch: 39603, sum loss: 4410.311523, avg loss: 2.690855, ppl: 14.744277 +epoch: 1, batch: 39604, sum loss: 4208.375977, avg loss: 2.799984, ppl: 16.444384 +epoch: 1, batch: 39605, sum loss: 4410.433594, avg loss: 2.577694, ppl: 13.166737 +epoch: 1, batch: 39606, sum loss: 4056.225098, avg loss: 2.410116, ppl: 11.135252 +epoch: 1, batch: 39607, sum loss: 5136.791992, avg loss: 2.769160, ppl: 15.945235 +epoch: 1, batch: 39608, sum loss: 3676.456787, avg loss: 2.362761, ppl: 10.620238 +epoch: 1, batch: 39609, sum loss: 4680.219727, avg loss: 2.677471, ppl: 14.548260 +epoch: 1, batch: 39610, sum loss: 4283.174805, avg loss: 2.676984, ppl: 14.541176 +epoch: 1, batch: 39611, sum loss: 4101.389160, avg loss: 2.488707, ppl: 12.045691 +epoch: 1, batch: 39612, sum loss: 4738.832031, avg loss: 2.763167, ppl: 15.849962 +epoch: 1, batch: 39613, sum loss: 4129.199219, avg loss: 2.566314, ppl: 13.017753 +epoch: 1, batch: 39614, sum loss: 3161.957520, avg loss: 2.119275, ppl: 8.325096 +epoch: 1, batch: 39615, sum loss: 4558.101074, avg loss: 2.497590, ppl: 12.153165 +epoch: 1, batch: 39616, sum loss: 3750.739746, avg loss: 2.469216, ppl: 11.813186 +epoch: 1, batch: 39617, sum loss: 4137.025879, avg loss: 2.531840, ppl: 12.576621 +epoch: 1, batch: 39618, sum loss: 3981.311523, avg loss: 2.347471, ppl: 10.459090 +epoch: 1, batch: 39619, sum loss: 4250.737793, avg loss: 2.527192, ppl: 12.518310 +epoch: 1, batch: 39620, sum loss: 4204.966797, avg loss: 2.755548, ppl: 15.729666 +epoch: 1, batch: 39621, sum loss: 5024.143066, avg loss: 2.861130, ppl: 17.481262 +epoch: 1, batch: 39622, sum loss: 3707.810059, avg loss: 2.508667, ppl: 12.288542 +epoch: 1, batch: 39623, sum loss: 3967.840332, avg loss: 2.620766, ppl: 13.746255 +epoch: 1, batch: 39624, sum loss: 4865.555176, avg loss: 2.777143, ppl: 16.073042 +epoch: 1, batch: 39625, sum loss: 4044.734619, avg loss: 2.433655, ppl: 11.400476 +epoch: 1, batch: 39626, sum loss: 4487.818848, avg loss: 2.682498, ppl: 14.621572 +epoch: 1, batch: 39627, sum loss: 4197.199219, avg loss: 2.403894, ppl: 11.066186 +epoch: 1, batch: 39628, sum loss: 3300.519043, avg loss: 2.239158, ppl: 9.385427 +epoch: 1, batch: 39629, sum loss: 4299.801758, avg loss: 2.482564, ppl: 11.971927 +epoch: 1, batch: 39630, sum loss: 3908.484375, avg loss: 2.470597, ppl: 11.829504 +epoch: 1, batch: 39631, sum loss: 4483.600098, avg loss: 2.640518, ppl: 14.020471 +epoch: 1, batch: 39632, sum loss: 4512.892090, avg loss: 2.494689, ppl: 12.117964 +epoch: 1, batch: 39633, sum loss: 3345.995605, avg loss: 2.379798, ppl: 10.802717 +epoch: 1, batch: 39634, sum loss: 4532.915039, avg loss: 2.605124, ppl: 13.532897 +epoch: 1, batch: 39635, sum loss: 3895.615234, avg loss: 2.410653, ppl: 11.141232 +epoch: 1, batch: 39636, sum loss: 4542.856445, avg loss: 2.650441, ppl: 14.160288 +epoch: 1, batch: 39637, sum loss: 4358.534668, avg loss: 2.472226, ppl: 11.848795 +epoch: 1, batch: 39638, sum loss: 4354.469238, avg loss: 2.664914, ppl: 14.366713 +epoch: 1, batch: 39639, sum loss: 4139.436523, avg loss: 2.559948, ppl: 12.935150 +epoch: 1, batch: 39640, sum loss: 4106.092285, avg loss: 2.584073, ppl: 13.251000 +epoch: 1, batch: 39641, sum loss: 4290.546387, avg loss: 2.543300, ppl: 12.721579 +epoch: 1, batch: 39642, sum loss: 4300.977539, avg loss: 2.598778, ppl: 13.447295 +epoch: 1, batch: 39643, sum loss: 4897.161621, avg loss: 2.662948, ppl: 14.338502 +epoch: 1, batch: 39644, sum loss: 3929.929443, avg loss: 2.528912, ppl: 12.539856 +epoch: 1, batch: 39645, sum loss: 4706.340820, avg loss: 2.771697, ppl: 15.985736 +epoch: 1, batch: 39646, sum loss: 4205.575684, avg loss: 2.638379, ppl: 13.990501 +epoch: 1, batch: 39647, sum loss: 4927.725098, avg loss: 2.678111, ppl: 14.557572 +epoch: 1, batch: 39648, sum loss: 3783.124023, avg loss: 2.561357, ppl: 12.953383 +epoch: 1, batch: 39649, sum loss: 5506.399414, avg loss: 2.970010, ppl: 19.492121 +epoch: 1, batch: 39650, sum loss: 4097.058105, avg loss: 2.614587, ppl: 13.661577 +epoch: 1, batch: 39651, sum loss: 4636.738770, avg loss: 2.565987, ppl: 13.013497 +epoch: 1, batch: 39652, sum loss: 3362.577148, avg loss: 2.258279, ppl: 9.566609 +epoch: 1, batch: 39653, sum loss: 4306.353516, avg loss: 2.698216, ppl: 14.853216 +epoch: 1, batch: 39654, sum loss: 4614.321777, avg loss: 2.853631, ppl: 17.350676 +epoch: 1, batch: 39655, sum loss: 4149.811523, avg loss: 2.750041, ppl: 15.643273 +epoch: 1, batch: 39656, sum loss: 5180.945312, avg loss: 3.008679, ppl: 20.260616 +epoch: 1, batch: 39657, sum loss: 3652.760742, avg loss: 2.428697, ppl: 11.344092 +epoch: 1, batch: 39658, sum loss: 3818.104248, avg loss: 2.482512, ppl: 11.971304 +epoch: 1, batch: 39659, sum loss: 4595.188965, avg loss: 2.352887, ppl: 10.515889 +epoch: 1, batch: 39660, sum loss: 4834.018555, avg loss: 2.711172, ppl: 15.046894 +epoch: 1, batch: 39661, sum loss: 4523.110840, avg loss: 2.802423, ppl: 16.484545 +epoch: 1, batch: 39662, sum loss: 4418.189453, avg loss: 2.669601, ppl: 14.434208 +epoch: 1, batch: 39663, sum loss: 4406.108398, avg loss: 2.671988, ppl: 14.468704 +epoch: 1, batch: 39664, sum loss: 4768.730469, avg loss: 2.604441, ppl: 13.523660 +epoch: 1, batch: 39665, sum loss: 3199.233398, avg loss: 2.259346, ppl: 9.576819 +epoch: 1, batch: 39666, sum loss: 3725.895996, avg loss: 2.446419, ppl: 11.546923 +epoch: 1, batch: 39667, sum loss: 4063.802734, avg loss: 2.555851, ppl: 12.882255 +epoch: 1, batch: 39668, sum loss: 3830.544922, avg loss: 2.436733, ppl: 11.435625 +epoch: 1, batch: 39669, sum loss: 3776.934082, avg loss: 2.436732, ppl: 11.435606 +epoch: 1, batch: 39670, sum loss: 3913.967285, avg loss: 2.483482, ppl: 11.982912 +epoch: 1, batch: 39671, sum loss: 4319.061035, avg loss: 2.492245, ppl: 12.088387 +epoch: 1, batch: 39672, sum loss: 3319.775391, avg loss: 2.328033, ppl: 10.257747 +epoch: 1, batch: 39673, sum loss: 3892.959473, avg loss: 2.406032, ppl: 11.089868 +epoch: 1, batch: 39674, sum loss: 4392.364258, avg loss: 2.467620, ppl: 11.794347 +epoch: 1, batch: 39675, sum loss: 4052.241699, avg loss: 2.545378, ppl: 12.748046 +epoch: 1, batch: 39676, sum loss: 4368.115234, avg loss: 2.587746, ppl: 13.299762 +epoch: 1, batch: 39677, sum loss: 3230.957031, avg loss: 2.332821, ppl: 10.306978 +epoch: 1, batch: 39678, sum loss: 4894.159668, avg loss: 2.801465, ppl: 16.468756 +epoch: 1, batch: 39679, sum loss: 4626.812012, avg loss: 2.580487, ppl: 13.203561 +epoch: 1, batch: 39680, sum loss: 4296.361816, avg loss: 2.533232, ppl: 12.594148 +epoch: 1, batch: 39681, sum loss: 3782.433105, avg loss: 2.437135, ppl: 11.440217 +epoch: 1, batch: 39682, sum loss: 4790.038574, avg loss: 2.703182, ppl: 14.927157 +epoch: 1, batch: 39683, sum loss: 4777.225098, avg loss: 2.515653, ppl: 12.374688 +epoch: 1, batch: 39684, sum loss: 4799.321289, avg loss: 2.700800, ppl: 14.891636 +epoch: 1, batch: 39685, sum loss: 4330.881836, avg loss: 2.710189, ppl: 15.032114 +epoch: 1, batch: 39686, sum loss: 4516.221191, avg loss: 2.659730, ppl: 14.292429 +epoch: 1, batch: 39687, sum loss: 3048.831055, avg loss: 2.187110, ppl: 8.909425 +epoch: 1, batch: 39688, sum loss: 4900.186035, avg loss: 2.496274, ppl: 12.137189 +epoch: 1, batch: 39689, sum loss: 4156.625000, avg loss: 2.619172, ppl: 13.724350 +epoch: 1, batch: 39690, sum loss: 4305.183594, avg loss: 2.308409, ppl: 10.058414 +epoch: 1, batch: 39691, sum loss: 4401.847656, avg loss: 2.531252, ppl: 12.569238 +epoch: 1, batch: 39692, sum loss: 2815.579102, avg loss: 2.144386, ppl: 8.536800 +epoch: 1, batch: 39693, sum loss: 5109.399414, avg loss: 3.111693, ppl: 22.459034 +epoch: 1, batch: 39694, sum loss: 4406.912598, avg loss: 2.725363, ppl: 15.261961 +epoch: 1, batch: 39695, sum loss: 3890.417480, avg loss: 2.626886, ppl: 13.830629 +epoch: 1, batch: 39696, sum loss: 5571.169922, avg loss: 2.832318, ppl: 16.984787 +epoch: 1, batch: 39697, sum loss: 4959.833008, avg loss: 2.961095, ppl: 19.319107 +epoch: 1, batch: 39698, sum loss: 3786.727295, avg loss: 2.241994, ppl: 9.412077 +epoch: 1, batch: 39699, sum loss: 4899.051270, avg loss: 2.778815, ppl: 16.099936 +epoch: 1, batch: 39700, sum loss: 4198.270996, avg loss: 2.348026, ppl: 10.464894 +epoch: 1, batch: 39701, sum loss: 4755.882812, avg loss: 2.511026, ppl: 12.317560 +epoch: 1, batch: 39702, sum loss: 4882.394043, avg loss: 2.538946, ppl: 12.666319 +epoch: 1, batch: 39703, sum loss: 4003.859619, avg loss: 2.694387, ppl: 14.796453 +epoch: 1, batch: 39704, sum loss: 4363.542480, avg loss: 2.479285, ppl: 11.932735 +epoch: 1, batch: 39705, sum loss: 4365.805664, avg loss: 2.480571, ppl: 11.948088 +epoch: 1, batch: 39706, sum loss: 4551.369141, avg loss: 2.323313, ppl: 10.209437 +epoch: 1, batch: 39707, sum loss: 4941.269531, avg loss: 2.679647, ppl: 14.579948 +epoch: 1, batch: 39708, sum loss: 4299.383301, avg loss: 2.461009, ppl: 11.716631 +epoch: 1, batch: 39709, sum loss: 4120.286133, avg loss: 2.446726, ppl: 11.550467 +epoch: 1, batch: 39710, sum loss: 4459.536621, avg loss: 2.458400, ppl: 11.686093 +epoch: 1, batch: 39711, sum loss: 4695.937500, avg loss: 2.811939, ppl: 16.642153 +epoch: 1, batch: 39712, sum loss: 4600.126465, avg loss: 2.844853, ppl: 17.199024 +epoch: 1, batch: 39713, sum loss: 3055.826660, avg loss: 2.357891, ppl: 10.568637 +epoch: 1, batch: 39714, sum loss: 3964.494141, avg loss: 2.751210, ppl: 15.661578 +epoch: 1, batch: 39715, sum loss: 3392.523438, avg loss: 2.470884, ppl: 11.832898 +epoch: 1, batch: 39716, sum loss: 4446.798828, avg loss: 2.643757, ppl: 14.065945 +epoch: 1, batch: 39717, sum loss: 3985.237061, avg loss: 2.351172, ppl: 10.497869 +epoch: 1, batch: 39718, sum loss: 4047.609375, avg loss: 2.415041, ppl: 11.190234 +epoch: 1, batch: 39719, sum loss: 4411.441406, avg loss: 2.647924, ppl: 14.124684 +epoch: 1, batch: 39720, sum loss: 5206.711914, avg loss: 2.582695, ppl: 13.232746 +epoch: 1, batch: 39721, sum loss: 3559.995361, avg loss: 2.398919, ppl: 11.011263 +epoch: 1, batch: 39722, sum loss: 4282.079102, avg loss: 2.669626, ppl: 14.434563 +epoch: 1, batch: 39723, sum loss: 3934.100098, avg loss: 2.297956, ppl: 9.953812 +epoch: 1, batch: 39724, sum loss: 4537.708496, avg loss: 2.471519, ppl: 11.840416 +epoch: 1, batch: 39725, sum loss: 4403.755371, avg loss: 2.472631, ppl: 11.853590 +epoch: 1, batch: 39726, sum loss: 4755.788086, avg loss: 2.645043, ppl: 14.084056 +epoch: 1, batch: 39727, sum loss: 4589.891602, avg loss: 2.786819, ppl: 16.229315 +epoch: 1, batch: 39728, sum loss: 3782.606445, avg loss: 2.486921, ppl: 12.024195 +epoch: 1, batch: 39729, sum loss: 4099.936035, avg loss: 2.657120, ppl: 14.255175 +epoch: 1, batch: 39730, sum loss: 4557.184570, avg loss: 2.478078, ppl: 11.918331 +epoch: 1, batch: 39731, sum loss: 4559.750000, avg loss: 2.799110, ppl: 16.430016 +epoch: 1, batch: 39732, sum loss: 4874.101074, avg loss: 2.616265, ppl: 13.684510 +epoch: 1, batch: 39733, sum loss: 4628.069336, avg loss: 2.655232, ppl: 14.228286 +epoch: 1, batch: 39734, sum loss: 4838.194336, avg loss: 2.636618, ppl: 13.965897 +epoch: 1, batch: 39735, sum loss: 3726.663574, avg loss: 2.469625, ppl: 11.818011 +epoch: 1, batch: 39736, sum loss: 4646.400391, avg loss: 2.628055, ppl: 13.846807 +epoch: 1, batch: 39737, sum loss: 4211.641602, avg loss: 2.541727, ppl: 12.701586 +epoch: 1, batch: 39738, sum loss: 4082.799316, avg loss: 2.634064, ppl: 13.930270 +epoch: 1, batch: 39739, sum loss: 3451.545410, avg loss: 2.307183, ppl: 10.046082 +epoch: 1, batch: 39740, sum loss: 4582.875000, avg loss: 2.558836, ppl: 12.920766 +epoch: 1, batch: 39741, sum loss: 3740.989014, avg loss: 2.381279, ppl: 10.818731 +epoch: 1, batch: 39742, sum loss: 4704.362305, avg loss: 2.597660, ppl: 13.432274 +epoch: 1, batch: 39743, sum loss: 4494.940430, avg loss: 2.530935, ppl: 12.565249 +epoch: 1, batch: 39744, sum loss: 4045.104492, avg loss: 2.390724, ppl: 10.921395 +epoch: 1, batch: 39745, sum loss: 5038.350098, avg loss: 2.825771, ppl: 16.873955 +epoch: 1, batch: 39746, sum loss: 4752.349609, avg loss: 2.694076, ppl: 14.791842 +epoch: 1, batch: 39747, sum loss: 4093.591064, avg loss: 2.336525, ppl: 10.345222 +epoch: 1, batch: 39748, sum loss: 4940.509766, avg loss: 2.752373, ppl: 15.679799 +epoch: 1, batch: 39749, sum loss: 4446.254883, avg loss: 2.598629, ppl: 13.445298 +epoch: 1, batch: 39750, sum loss: 5000.604004, avg loss: 2.645822, ppl: 14.095031 +epoch: 1, batch: 39751, sum loss: 4564.938477, avg loss: 2.814389, ppl: 16.682976 +epoch: 1, batch: 39752, sum loss: 4336.216797, avg loss: 2.623240, ppl: 13.780306 +epoch: 1, batch: 39753, sum loss: 4141.990234, avg loss: 2.618198, ppl: 13.710999 +epoch: 1, batch: 39754, sum loss: 4233.510742, avg loss: 2.455633, ppl: 11.653808 +epoch: 1, batch: 39755, sum loss: 5554.682617, avg loss: 2.806813, ppl: 16.557062 +epoch: 1, batch: 39756, sum loss: 4413.155273, avg loss: 2.641027, ppl: 14.027598 +epoch: 1, batch: 39757, sum loss: 4053.626953, avg loss: 2.525624, ppl: 12.498695 +epoch: 1, batch: 39758, sum loss: 4597.546875, avg loss: 2.512321, ppl: 12.333520 +epoch: 1, batch: 39759, sum loss: 4864.396484, avg loss: 2.693464, ppl: 14.782799 +epoch: 1, batch: 39760, sum loss: 5537.911133, avg loss: 2.794102, ppl: 16.347948 +epoch: 1, batch: 39761, sum loss: 4170.089355, avg loss: 2.535009, ppl: 12.616540 +epoch: 1, batch: 39762, sum loss: 4812.891602, avg loss: 2.799821, ppl: 16.441698 +epoch: 1, batch: 39763, sum loss: 4248.779785, avg loss: 2.585989, ppl: 13.276410 +epoch: 1, batch: 39764, sum loss: 4354.350098, avg loss: 2.730000, ppl: 15.332888 +epoch: 1, batch: 39765, sum loss: 4069.910889, avg loss: 2.550070, ppl: 12.807995 +epoch: 1, batch: 39766, sum loss: 4367.026855, avg loss: 2.580985, ppl: 13.210145 +epoch: 1, batch: 39767, sum loss: 4473.525391, avg loss: 2.467471, ppl: 11.792590 +epoch: 1, batch: 39768, sum loss: 3889.143799, avg loss: 2.414118, ppl: 11.179903 +epoch: 1, batch: 39769, sum loss: 3660.978271, avg loss: 2.483703, ppl: 11.985566 +epoch: 1, batch: 39770, sum loss: 3966.154297, avg loss: 2.592258, ppl: 13.359900 +epoch: 1, batch: 39771, sum loss: 4615.684570, avg loss: 2.691361, ppl: 14.751742 +epoch: 1, batch: 39772, sum loss: 4833.557617, avg loss: 2.685310, ppl: 14.662745 +epoch: 1, batch: 39773, sum loss: 4496.796387, avg loss: 2.704027, ppl: 14.939769 +epoch: 1, batch: 39774, sum loss: 4388.147461, avg loss: 2.653052, ppl: 14.197301 +epoch: 1, batch: 39775, sum loss: 3704.244629, avg loss: 2.893941, ppl: 18.064363 +epoch: 1, batch: 39776, sum loss: 4056.250488, avg loss: 2.680932, ppl: 14.598697 +epoch: 1, batch: 39777, sum loss: 4208.972656, avg loss: 2.630608, ppl: 13.882206 +epoch: 1, batch: 39778, sum loss: 3972.631836, avg loss: 2.566300, ppl: 13.017566 +epoch: 1, batch: 39779, sum loss: 5001.548828, avg loss: 2.722672, ppl: 15.220945 +epoch: 1, batch: 39780, sum loss: 4228.456055, avg loss: 2.568928, ppl: 13.051831 +epoch: 1, batch: 39781, sum loss: 4950.536133, avg loss: 2.896744, ppl: 18.115076 +epoch: 1, batch: 39782, sum loss: 4185.280762, avg loss: 2.558240, ppl: 12.913073 +epoch: 1, batch: 39783, sum loss: 5419.982910, avg loss: 2.812653, ppl: 16.654047 +epoch: 1, batch: 39784, sum loss: 5027.373047, avg loss: 2.615699, ppl: 13.676770 +epoch: 1, batch: 39785, sum loss: 5538.386719, avg loss: 3.000210, ppl: 20.089746 +epoch: 1, batch: 39786, sum loss: 4544.375000, avg loss: 2.623773, ppl: 13.787647 +epoch: 1, batch: 39787, sum loss: 4460.949219, avg loss: 2.741825, ppl: 15.515272 +epoch: 1, batch: 39788, sum loss: 3881.130371, avg loss: 2.489500, ppl: 12.055247 +epoch: 1, batch: 39789, sum loss: 4902.848145, avg loss: 2.728352, ppl: 15.307636 +epoch: 1, batch: 39790, sum loss: 3811.376465, avg loss: 2.512443, ppl: 12.335032 +epoch: 1, batch: 39791, sum loss: 4378.061523, avg loss: 2.551318, ppl: 12.823994 +epoch: 1, batch: 39792, sum loss: 5003.431641, avg loss: 2.759753, ppl: 15.795937 +epoch: 1, batch: 39793, sum loss: 4987.825684, avg loss: 2.616907, ppl: 13.693310 +epoch: 1, batch: 39794, sum loss: 4222.599121, avg loss: 2.682719, ppl: 14.624797 +epoch: 1, batch: 39795, sum loss: 4601.174316, avg loss: 2.886559, ppl: 17.931492 +epoch: 1, batch: 39796, sum loss: 3696.277588, avg loss: 2.511058, ppl: 12.317957 +epoch: 1, batch: 39797, sum loss: 4845.530762, avg loss: 2.675611, ppl: 14.521216 +epoch: 1, batch: 39798, sum loss: 3407.813232, avg loss: 2.355089, ppl: 10.539063 +epoch: 1, batch: 39799, sum loss: 4310.016602, avg loss: 2.649057, ppl: 14.140703 +epoch: 1, batch: 39800, sum loss: 4259.291504, avg loss: 2.587662, ppl: 13.298643 +epoch: 1, batch: 39801, sum loss: 4104.333496, avg loss: 2.519542, ppl: 12.422902 +epoch: 1, batch: 39802, sum loss: 4088.458984, avg loss: 2.536265, ppl: 12.632400 +epoch: 1, batch: 39803, sum loss: 4813.514160, avg loss: 2.627464, ppl: 13.838632 +epoch: 1, batch: 39804, sum loss: 3167.713867, avg loss: 2.259425, ppl: 9.577582 +epoch: 1, batch: 39805, sum loss: 4196.406250, avg loss: 2.480145, ppl: 11.943002 +epoch: 1, batch: 39806, sum loss: 3809.919678, avg loss: 2.490144, ppl: 12.063007 +epoch: 1, batch: 39807, sum loss: 3799.849365, avg loss: 2.342694, ppl: 10.409240 +epoch: 1, batch: 39808, sum loss: 4851.944336, avg loss: 2.758354, ppl: 15.773857 +epoch: 1, batch: 39809, sum loss: 4067.401855, avg loss: 2.475595, ppl: 11.888773 +epoch: 1, batch: 39810, sum loss: 4684.937988, avg loss: 2.728560, ppl: 15.310826 +epoch: 1, batch: 39811, sum loss: 3981.881348, avg loss: 2.539465, ppl: 12.672892 +epoch: 1, batch: 39812, sum loss: 3142.373047, avg loss: 2.241350, ppl: 9.406025 +epoch: 1, batch: 39813, sum loss: 5226.417969, avg loss: 2.685724, ppl: 14.668811 +epoch: 1, batch: 39814, sum loss: 5072.608887, avg loss: 2.736035, ppl: 15.425699 +epoch: 1, batch: 39815, sum loss: 4017.019043, avg loss: 2.421350, ppl: 11.261046 +epoch: 1, batch: 39816, sum loss: 4656.220703, avg loss: 2.693014, ppl: 14.776139 +epoch: 1, batch: 39817, sum loss: 5673.393555, avg loss: 2.855256, ppl: 17.378883 +epoch: 1, batch: 39818, sum loss: 3439.956299, avg loss: 2.316469, ppl: 10.139805 +epoch: 1, batch: 39819, sum loss: 4562.880859, avg loss: 2.518146, ppl: 12.405579 +epoch: 1, batch: 39820, sum loss: 4072.748535, avg loss: 2.438772, ppl: 11.458958 +epoch: 1, batch: 39821, sum loss: 4569.551270, avg loss: 2.499755, ppl: 12.179505 +epoch: 1, batch: 39822, sum loss: 3615.199463, avg loss: 2.188377, ppl: 8.920727 +epoch: 1, batch: 39823, sum loss: 4029.984863, avg loss: 2.411721, ppl: 11.153133 +epoch: 1, batch: 39824, sum loss: 3948.353516, avg loss: 2.360044, ppl: 10.591417 +epoch: 1, batch: 39825, sum loss: 5124.870605, avg loss: 2.776203, ppl: 16.057932 +epoch: 1, batch: 39826, sum loss: 3890.974854, avg loss: 2.625489, ppl: 13.811330 +epoch: 1, batch: 39827, sum loss: 4166.400391, avg loss: 2.435068, ppl: 11.416591 +epoch: 1, batch: 39828, sum loss: 3880.919678, avg loss: 2.582115, ppl: 13.225085 +epoch: 1, batch: 39829, sum loss: 4736.034180, avg loss: 2.710953, ppl: 15.043598 +epoch: 1, batch: 39830, sum loss: 4806.672363, avg loss: 2.582844, ppl: 13.234724 +epoch: 1, batch: 39831, sum loss: 4329.313477, avg loss: 2.575439, ppl: 13.137086 +epoch: 1, batch: 39832, sum loss: 3780.890137, avg loss: 2.304016, ppl: 10.014318 +epoch: 1, batch: 39833, sum loss: 3911.413574, avg loss: 2.357693, ppl: 10.566551 +epoch: 1, batch: 39834, sum loss: 4626.867676, avg loss: 2.760661, ppl: 15.810292 +epoch: 1, batch: 39835, sum loss: 4563.651367, avg loss: 2.695601, ppl: 14.814412 +epoch: 1, batch: 39836, sum loss: 3810.811279, avg loss: 2.336488, ppl: 10.344837 +epoch: 1, batch: 39837, sum loss: 3948.272217, avg loss: 2.432700, ppl: 11.389591 +epoch: 1, batch: 39838, sum loss: 3762.921875, avg loss: 2.353297, ppl: 10.520198 +epoch: 1, batch: 39839, sum loss: 4074.340820, avg loss: 2.223985, ppl: 9.244097 +epoch: 1, batch: 39840, sum loss: 4532.156738, avg loss: 2.543298, ppl: 12.721558 +epoch: 1, batch: 39841, sum loss: 4403.021973, avg loss: 2.673359, ppl: 14.488550 +epoch: 1, batch: 39842, sum loss: 4065.866699, avg loss: 2.565216, ppl: 13.003462 +epoch: 1, batch: 39843, sum loss: 3588.193359, avg loss: 2.518030, ppl: 12.404141 +epoch: 1, batch: 39844, sum loss: 3666.083740, avg loss: 2.272835, ppl: 9.706885 +epoch: 1, batch: 39845, sum loss: 4877.447266, avg loss: 2.834077, ppl: 17.014694 +epoch: 1, batch: 39846, sum loss: 4730.739258, avg loss: 2.711025, ppl: 15.044692 +epoch: 1, batch: 39847, sum loss: 5225.857910, avg loss: 2.699307, ppl: 14.869420 +epoch: 1, batch: 39848, sum loss: 4033.723145, avg loss: 2.648538, ppl: 14.133359 +epoch: 1, batch: 39849, sum loss: 4143.636230, avg loss: 2.476770, ppl: 11.902759 +epoch: 1, batch: 39850, sum loss: 4365.101074, avg loss: 2.473145, ppl: 11.859687 +epoch: 1, batch: 39851, sum loss: 4580.403809, avg loss: 2.522249, ppl: 12.456577 +epoch: 1, batch: 39852, sum loss: 4457.510254, avg loss: 2.815862, ppl: 16.707579 +epoch: 1, batch: 39853, sum loss: 4325.828125, avg loss: 2.601220, ppl: 13.480169 +epoch: 1, batch: 39854, sum loss: 4153.683594, avg loss: 2.650724, ppl: 14.164286 +epoch: 1, batch: 39855, sum loss: 3924.300049, avg loss: 2.597154, ppl: 13.425477 +epoch: 1, batch: 39856, sum loss: 4821.699219, avg loss: 2.922242, ppl: 18.582903 +epoch: 1, batch: 39857, sum loss: 4346.479004, avg loss: 2.616785, ppl: 13.691628 +epoch: 1, batch: 39858, sum loss: 3941.031982, avg loss: 2.507018, ppl: 12.268290 +epoch: 1, batch: 39859, sum loss: 4366.831055, avg loss: 2.550719, ppl: 12.816316 +epoch: 1, batch: 39860, sum loss: 3437.213867, avg loss: 2.322442, ppl: 10.200552 +epoch: 1, batch: 39861, sum loss: 4063.267090, avg loss: 2.626546, ppl: 13.825938 +epoch: 1, batch: 39862, sum loss: 4575.094238, avg loss: 2.707156, ppl: 14.986599 +epoch: 1, batch: 39863, sum loss: 4139.685059, avg loss: 2.558520, ppl: 12.916684 +epoch: 1, batch: 39864, sum loss: 3241.866699, avg loss: 2.309022, ppl: 10.064576 +epoch: 1, batch: 39865, sum loss: 4916.483398, avg loss: 2.616543, ppl: 13.688315 +epoch: 1, batch: 39866, sum loss: 4140.888184, avg loss: 2.482547, ppl: 11.971718 +epoch: 1, batch: 39867, sum loss: 4346.296387, avg loss: 2.275548, ppl: 9.733252 +epoch: 1, batch: 39868, sum loss: 3865.629639, avg loss: 2.649506, ppl: 14.147053 +epoch: 1, batch: 39869, sum loss: 5252.971680, avg loss: 2.864216, ppl: 17.535294 +epoch: 1, batch: 39870, sum loss: 3121.472168, avg loss: 2.410403, ppl: 11.138452 +epoch: 1, batch: 39871, sum loss: 4063.833008, avg loss: 2.511640, ppl: 12.325122 +epoch: 1, batch: 39872, sum loss: 4698.413086, avg loss: 2.575884, ppl: 13.142935 +epoch: 1, batch: 39873, sum loss: 4047.395020, avg loss: 2.589504, ppl: 13.323165 +epoch: 1, batch: 39874, sum loss: 4212.930664, avg loss: 2.624879, ppl: 13.802902 +epoch: 1, batch: 39875, sum loss: 3953.082520, avg loss: 2.672808, ppl: 14.480569 +epoch: 1, batch: 39876, sum loss: 4305.775391, avg loss: 2.547796, ppl: 12.778908 +epoch: 1, batch: 39877, sum loss: 4940.180664, avg loss: 2.758336, ppl: 15.773579 +epoch: 1, batch: 39878, sum loss: 4665.154785, avg loss: 2.793506, ppl: 16.338200 +epoch: 1, batch: 39879, sum loss: 4278.078613, avg loss: 2.446014, ppl: 11.542250 +epoch: 1, batch: 39880, sum loss: 4516.899414, avg loss: 2.530476, ppl: 12.559484 +epoch: 1, batch: 39881, sum loss: 3496.739014, avg loss: 2.359473, ppl: 10.585374 +epoch: 1, batch: 39882, sum loss: 4813.638184, avg loss: 2.693698, ppl: 14.786254 +epoch: 1, batch: 39883, sum loss: 4738.841797, avg loss: 2.703275, ppl: 14.928550 +epoch: 1, batch: 39884, sum loss: 4076.459473, avg loss: 2.653945, ppl: 14.209986 +epoch: 1, batch: 39885, sum loss: 4192.123047, avg loss: 2.592531, ppl: 13.363558 +epoch: 1, batch: 39886, sum loss: 3623.272217, avg loss: 2.544433, ppl: 12.736003 +epoch: 1, batch: 39887, sum loss: 4829.764648, avg loss: 2.671330, ppl: 14.459187 +epoch: 1, batch: 39888, sum loss: 4073.395752, avg loss: 2.594520, ppl: 13.390153 +epoch: 1, batch: 39889, sum loss: 3658.086426, avg loss: 2.443611, ppl: 11.514549 +epoch: 1, batch: 39890, sum loss: 4256.798340, avg loss: 2.489356, ppl: 12.053509 +epoch: 1, batch: 39891, sum loss: 3370.166016, avg loss: 2.056233, ppl: 7.816469 +epoch: 1, batch: 39892, sum loss: 3184.745361, avg loss: 2.247527, ppl: 9.464300 +epoch: 1, batch: 39893, sum loss: 3873.296387, avg loss: 2.354587, ppl: 10.533781 +epoch: 1, batch: 39894, sum loss: 4097.107910, avg loss: 2.655287, ppl: 14.229073 +epoch: 1, batch: 39895, sum loss: 5366.172852, avg loss: 2.808044, ppl: 16.577469 +epoch: 1, batch: 39896, sum loss: 3601.772705, avg loss: 2.328231, ppl: 10.259772 +epoch: 1, batch: 39897, sum loss: 4695.769531, avg loss: 2.773638, ppl: 16.016802 +epoch: 1, batch: 39898, sum loss: 5536.116699, avg loss: 2.626241, ppl: 13.821719 +epoch: 1, batch: 39899, sum loss: 4106.421875, avg loss: 2.458935, ppl: 11.692355 +epoch: 1, batch: 39900, sum loss: 4186.201172, avg loss: 2.405863, ppl: 11.087993 +epoch: 1, batch: 39901, sum loss: 5242.733887, avg loss: 2.609624, ppl: 13.593934 +epoch: 1, batch: 39902, sum loss: 4262.638184, avg loss: 2.572503, ppl: 13.098574 +epoch: 1, batch: 39903, sum loss: 4967.717773, avg loss: 2.805035, ppl: 16.527660 +epoch: 1, batch: 39904, sum loss: 3819.539062, avg loss: 2.448423, ppl: 11.570083 +epoch: 1, batch: 39905, sum loss: 3723.555176, avg loss: 2.555631, ppl: 12.879429 +epoch: 1, batch: 39906, sum loss: 4847.603027, avg loss: 2.882047, ppl: 17.850775 +epoch: 1, batch: 39907, sum loss: 4951.267578, avg loss: 2.561442, ppl: 12.954486 +epoch: 1, batch: 39908, sum loss: 4353.946289, avg loss: 2.508034, ppl: 12.280756 +epoch: 1, batch: 39909, sum loss: 4218.826172, avg loss: 2.486050, ppl: 12.013724 +epoch: 1, batch: 39910, sum loss: 4238.236328, avg loss: 2.509317, ppl: 12.296528 +epoch: 1, batch: 39911, sum loss: 4471.159668, avg loss: 2.509068, ppl: 12.293473 +epoch: 1, batch: 39912, sum loss: 3576.560303, avg loss: 2.319429, ppl: 10.169869 +epoch: 1, batch: 39913, sum loss: 4066.223877, avg loss: 2.452487, ppl: 11.617206 +epoch: 1, batch: 39914, sum loss: 4066.021973, avg loss: 2.408781, ppl: 11.120395 +epoch: 1, batch: 39915, sum loss: 4692.185547, avg loss: 2.489223, ppl: 12.051908 +epoch: 1, batch: 39916, sum loss: 4454.189941, avg loss: 2.567256, ppl: 13.030026 +epoch: 1, batch: 39917, sum loss: 4600.435059, avg loss: 2.796617, ppl: 16.389109 +epoch: 1, batch: 39918, sum loss: 4672.126465, avg loss: 2.608669, ppl: 13.580963 +epoch: 1, batch: 39919, sum loss: 5477.664062, avg loss: 2.929232, ppl: 18.713255 +epoch: 1, batch: 39920, sum loss: 4752.790039, avg loss: 2.630210, ppl: 13.876686 +epoch: 1, batch: 39921, sum loss: 3678.867188, avg loss: 2.305055, ppl: 10.024726 +epoch: 1, batch: 39922, sum loss: 3836.206543, avg loss: 2.479772, ppl: 11.938538 +epoch: 1, batch: 39923, sum loss: 4623.308594, avg loss: 2.519514, ppl: 12.422562 +epoch: 1, batch: 39924, sum loss: 4280.720703, avg loss: 2.599102, ppl: 13.451650 +epoch: 1, batch: 39925, sum loss: 4335.212402, avg loss: 2.591281, ppl: 13.346854 +epoch: 1, batch: 39926, sum loss: 4026.425781, avg loss: 2.393832, ppl: 10.955397 +epoch: 1, batch: 39927, sum loss: 3866.904053, avg loss: 2.419840, ppl: 11.244059 +epoch: 1, batch: 39928, sum loss: 4138.229492, avg loss: 2.435686, ppl: 11.423649 +epoch: 1, batch: 39929, sum loss: 4815.635254, avg loss: 2.851176, ppl: 17.308117 +epoch: 1, batch: 39930, sum loss: 3610.017090, avg loss: 2.225658, ppl: 9.259573 +epoch: 1, batch: 39931, sum loss: 3898.727051, avg loss: 2.409596, ppl: 11.129469 +epoch: 1, batch: 39932, sum loss: 4301.248047, avg loss: 2.758979, ppl: 15.783717 +epoch: 1, batch: 39933, sum loss: 3580.720459, avg loss: 2.323634, ppl: 10.212724 +epoch: 1, batch: 39934, sum loss: 4593.001465, avg loss: 2.654914, ppl: 14.223764 +epoch: 1, batch: 39935, sum loss: 4009.457520, avg loss: 2.332436, ppl: 10.303010 +epoch: 1, batch: 39936, sum loss: 4383.234375, avg loss: 2.576857, ppl: 13.155729 +epoch: 1, batch: 39937, sum loss: 4337.392090, avg loss: 2.697383, ppl: 14.840845 +epoch: 1, batch: 39938, sum loss: 4743.060547, avg loss: 2.545926, ppl: 12.755035 +epoch: 1, batch: 39939, sum loss: 4386.197754, avg loss: 2.906692, ppl: 18.296171 +epoch: 1, batch: 39940, sum loss: 4487.980957, avg loss: 2.489174, ppl: 12.051319 +epoch: 1, batch: 39941, sum loss: 4454.653320, avg loss: 2.502614, ppl: 12.214384 +epoch: 1, batch: 39942, sum loss: 4367.743164, avg loss: 2.701140, ppl: 14.896703 +epoch: 1, batch: 39943, sum loss: 3940.230713, avg loss: 2.365084, ppl: 10.644938 +epoch: 1, batch: 39944, sum loss: 3627.280518, avg loss: 2.136208, ppl: 8.467265 +epoch: 1, batch: 39945, sum loss: 3106.662109, avg loss: 2.144004, ppl: 8.533541 +epoch: 1, batch: 39946, sum loss: 3925.110107, avg loss: 2.391901, ppl: 10.934263 +epoch: 1, batch: 39947, sum loss: 4572.219238, avg loss: 2.508074, ppl: 12.281257 +epoch: 1, batch: 39948, sum loss: 5492.957031, avg loss: 2.738264, ppl: 15.460120 +epoch: 1, batch: 39949, sum loss: 4754.068359, avg loss: 2.788310, ppl: 16.253529 +epoch: 1, batch: 39950, sum loss: 5216.311035, avg loss: 2.552011, ppl: 12.832888 +epoch: 1, batch: 39951, sum loss: 3768.762695, avg loss: 2.458423, ppl: 11.686369 +epoch: 1, batch: 39952, sum loss: 5626.954102, avg loss: 2.819115, ppl: 16.762016 +epoch: 1, batch: 39953, sum loss: 5312.756836, avg loss: 2.790314, ppl: 16.286129 +epoch: 1, batch: 39954, sum loss: 3595.109863, avg loss: 2.482811, ppl: 11.974875 +epoch: 1, batch: 39955, sum loss: 4486.300293, avg loss: 2.409399, ppl: 11.127270 +epoch: 1, batch: 39956, sum loss: 4985.578613, avg loss: 2.684749, ppl: 14.654517 +epoch: 1, batch: 39957, sum loss: 3789.958984, avg loss: 2.533395, ppl: 12.596198 +epoch: 1, batch: 39958, sum loss: 5573.602051, avg loss: 2.796589, ppl: 16.388653 +epoch: 1, batch: 39959, sum loss: 3535.151123, avg loss: 2.517914, ppl: 12.402696 +epoch: 1, batch: 39960, sum loss: 4685.682129, avg loss: 2.472656, ppl: 11.853884 +epoch: 1, batch: 39961, sum loss: 4662.000000, avg loss: 2.707317, ppl: 14.989008 +epoch: 1, batch: 39962, sum loss: 4726.624512, avg loss: 2.653916, ppl: 14.209577 +epoch: 1, batch: 39963, sum loss: 5977.503906, avg loss: 2.860050, ppl: 17.462395 +epoch: 1, batch: 39964, sum loss: 4976.071777, avg loss: 2.812929, ppl: 16.658646 +epoch: 1, batch: 39965, sum loss: 5016.448730, avg loss: 2.766933, ppl: 15.909760 +epoch: 1, batch: 39966, sum loss: 4494.080566, avg loss: 2.691066, ppl: 14.747392 +epoch: 1, batch: 39967, sum loss: 4238.184570, avg loss: 2.685795, ppl: 14.669860 +epoch: 1, batch: 39968, sum loss: 4049.832031, avg loss: 2.572955, ppl: 13.104496 +epoch: 1, batch: 39969, sum loss: 3969.557617, avg loss: 2.441302, ppl: 11.487992 +epoch: 1, batch: 39970, sum loss: 4637.693359, avg loss: 2.624614, ppl: 13.799250 +epoch: 1, batch: 39971, sum loss: 4564.893066, avg loss: 2.803988, ppl: 16.510363 +epoch: 1, batch: 39972, sum loss: 4285.150391, avg loss: 2.578310, ppl: 13.174848 +epoch: 1, batch: 39973, sum loss: 3929.619385, avg loss: 2.303411, ppl: 10.008263 +epoch: 1, batch: 39974, sum loss: 4545.276855, avg loss: 2.532188, ppl: 12.580999 +epoch: 1, batch: 39975, sum loss: 4642.485840, avg loss: 2.672704, ppl: 14.479060 +epoch: 1, batch: 39976, sum loss: 4360.845703, avg loss: 2.617554, ppl: 13.702173 +epoch: 1, batch: 39977, sum loss: 4885.490234, avg loss: 2.772696, ppl: 16.001717 +epoch: 1, batch: 39978, sum loss: 4713.127930, avg loss: 2.787184, ppl: 16.235237 +epoch: 1, batch: 39979, sum loss: 4649.165527, avg loss: 2.629619, ppl: 13.868480 +epoch: 1, batch: 39980, sum loss: 4204.315918, avg loss: 2.596860, ppl: 13.421524 +epoch: 1, batch: 39981, sum loss: 4549.349121, avg loss: 2.586327, ppl: 13.280902 +epoch: 1, batch: 39982, sum loss: 4240.015137, avg loss: 2.538931, ppl: 12.666128 +epoch: 1, batch: 39983, sum loss: 4129.869141, avg loss: 2.519749, ppl: 12.425480 +epoch: 1, batch: 39984, sum loss: 3840.520020, avg loss: 2.528321, ppl: 12.532450 +epoch: 1, batch: 39985, sum loss: 4931.798340, avg loss: 2.742936, ppl: 15.532516 +epoch: 1, batch: 39986, sum loss: 4827.189453, avg loss: 2.738054, ppl: 15.456878 +epoch: 1, batch: 39987, sum loss: 4707.825195, avg loss: 2.841174, ppl: 17.135870 +epoch: 1, batch: 39988, sum loss: 4384.520508, avg loss: 2.603635, ppl: 13.512762 +epoch: 1, batch: 39989, sum loss: 4804.918457, avg loss: 2.622772, ppl: 13.773855 +epoch: 1, batch: 39990, sum loss: 4156.745117, avg loss: 2.459612, ppl: 11.700275 +epoch: 1, batch: 39991, sum loss: 4042.802246, avg loss: 2.429569, ppl: 11.353985 +epoch: 1, batch: 39992, sum loss: 5069.465820, avg loss: 2.783891, ppl: 16.181866 +epoch: 1, batch: 39993, sum loss: 4066.207275, avg loss: 2.384872, ppl: 10.857677 +epoch: 1, batch: 39994, sum loss: 4831.650879, avg loss: 2.820578, ppl: 16.786556 +epoch: 1, batch: 39995, sum loss: 3822.083252, avg loss: 2.235136, ppl: 9.347755 +epoch: 1, batch: 39996, sum loss: 4460.652344, avg loss: 2.636319, ppl: 13.961721 +epoch: 1, batch: 39997, sum loss: 3868.903809, avg loss: 2.450224, ppl: 11.590941 +epoch: 1, batch: 39998, sum loss: 3288.801270, avg loss: 2.210216, ppl: 9.117683 +epoch: 1, batch: 39999, sum loss: 4273.362793, avg loss: 2.589917, ppl: 13.328665 +epoch: 1, batch: 40000, sum loss: 4361.418945, avg loss: 2.630530, ppl: 13.881126 +epoch: 1, batch: 40001, sum loss: 4360.512207, avg loss: 2.376301, ppl: 10.765010 +epoch: 1, batch: 40002, sum loss: 5496.458984, avg loss: 2.840547, ppl: 17.125132 +epoch: 1, batch: 40003, sum loss: 5035.682617, avg loss: 2.789852, ppl: 16.278608 +epoch: 1, batch: 40004, sum loss: 4263.002930, avg loss: 2.615339, ppl: 13.671854 +epoch: 1, batch: 40005, sum loss: 4501.360352, avg loss: 2.650978, ppl: 14.167886 +epoch: 1, batch: 40006, sum loss: 4294.332031, avg loss: 2.580729, ppl: 13.206757 +epoch: 1, batch: 40007, sum loss: 4783.374023, avg loss: 2.613866, ppl: 13.651721 +epoch: 1, batch: 40008, sum loss: 4491.884277, avg loss: 2.629909, ppl: 13.872504 +epoch: 1, batch: 40009, sum loss: 5074.601074, avg loss: 2.599693, ppl: 13.459609 +epoch: 1, batch: 40010, sum loss: 4181.323730, avg loss: 2.582658, ppl: 13.232266 +epoch: 1, batch: 40011, sum loss: 4292.794922, avg loss: 2.570536, ppl: 13.072828 +epoch: 1, batch: 40012, sum loss: 3692.425781, avg loss: 2.359377, ppl: 10.584359 +epoch: 1, batch: 40013, sum loss: 4151.294434, avg loss: 2.597806, ppl: 13.434234 +epoch: 1, batch: 40014, sum loss: 3315.703369, avg loss: 2.269475, ppl: 9.674325 +epoch: 1, batch: 40015, sum loss: 3416.058594, avg loss: 2.506279, ppl: 12.259232 +epoch: 1, batch: 40016, sum loss: 3123.190918, avg loss: 2.369644, ppl: 10.693586 +epoch: 1, batch: 40017, sum loss: 3745.711182, avg loss: 2.475685, ppl: 11.889848 +epoch: 1, batch: 40018, sum loss: 3935.811523, avg loss: 2.591054, ppl: 13.343831 +epoch: 1, batch: 40019, sum loss: 4107.905273, avg loss: 2.500247, ppl: 12.185497 +epoch: 1, batch: 40020, sum loss: 4417.660156, avg loss: 2.444748, ppl: 11.527646 +epoch: 1, batch: 40021, sum loss: 4727.308594, avg loss: 2.610331, ppl: 13.603551 +epoch: 1, batch: 40022, sum loss: 3628.883301, avg loss: 2.192679, ppl: 8.959182 +epoch: 1, batch: 40023, sum loss: 3641.395752, avg loss: 2.287309, ppl: 9.848399 +epoch: 1, batch: 40024, sum loss: 3986.714111, avg loss: 2.650741, ppl: 14.164525 +epoch: 1, batch: 40025, sum loss: 4159.432617, avg loss: 2.439550, ppl: 11.467878 +epoch: 1, batch: 40026, sum loss: 4389.966309, avg loss: 2.641376, ppl: 14.032495 +epoch: 1, batch: 40027, sum loss: 3470.331055, avg loss: 2.498438, ppl: 12.163485 +epoch: 1, batch: 40028, sum loss: 3574.901367, avg loss: 2.513995, ppl: 12.354189 +epoch: 1, batch: 40029, sum loss: 4720.128906, avg loss: 2.691066, ppl: 14.747381 +epoch: 1, batch: 40030, sum loss: 3621.053711, avg loss: 2.393294, ppl: 10.949503 +epoch: 1, batch: 40031, sum loss: 3790.660156, avg loss: 2.487310, ppl: 12.028872 +epoch: 1, batch: 40032, sum loss: 4981.132812, avg loss: 2.720444, ppl: 15.187063 +epoch: 1, batch: 40033, sum loss: 3574.184082, avg loss: 2.269323, ppl: 9.672852 +epoch: 1, batch: 40034, sum loss: 4341.062500, avg loss: 2.607245, ppl: 13.561633 +epoch: 1, batch: 40035, sum loss: 4269.773438, avg loss: 2.496943, ppl: 12.145315 +epoch: 1, batch: 40036, sum loss: 3984.115723, avg loss: 2.499445, ppl: 12.175737 +epoch: 1, batch: 40037, sum loss: 4945.075195, avg loss: 2.501302, ppl: 12.198372 +epoch: 1, batch: 40038, sum loss: 4451.145996, avg loss: 2.553727, ppl: 12.854924 +epoch: 1, batch: 40039, sum loss: 4291.618164, avg loss: 2.469285, ppl: 11.814003 +epoch: 1, batch: 40040, sum loss: 4027.541748, avg loss: 2.775701, ppl: 16.049871 +epoch: 1, batch: 40041, sum loss: 4540.510254, avg loss: 2.600522, ppl: 13.470762 +epoch: 1, batch: 40042, sum loss: 4605.532715, avg loss: 2.640787, ppl: 14.024235 +epoch: 1, batch: 40043, sum loss: 3974.123291, avg loss: 2.663622, ppl: 14.348160 +epoch: 1, batch: 40044, sum loss: 4047.183105, avg loss: 2.387719, ppl: 10.888625 +epoch: 1, batch: 40045, sum loss: 4570.883789, avg loss: 2.707870, ppl: 14.997291 +epoch: 1, batch: 40046, sum loss: 4244.968750, avg loss: 2.409177, ppl: 11.124797 +epoch: 1, batch: 40047, sum loss: 4708.566406, avg loss: 2.487357, ppl: 12.029436 +epoch: 1, batch: 40048, sum loss: 4946.648438, avg loss: 2.686936, ppl: 14.686603 +epoch: 1, batch: 40049, sum loss: 4414.338867, avg loss: 2.566476, ppl: 13.019863 +epoch: 1, batch: 40050, sum loss: 3769.520020, avg loss: 2.576569, ppl: 13.151935 +epoch: 1, batch: 40051, sum loss: 5006.020508, avg loss: 2.837880, ppl: 17.079521 +epoch: 1, batch: 40052, sum loss: 4148.226562, avg loss: 2.774733, ppl: 16.034349 +epoch: 1, batch: 40053, sum loss: 5165.703125, avg loss: 2.798322, ppl: 16.417084 +epoch: 1, batch: 40054, sum loss: 4160.411621, avg loss: 2.388296, ppl: 10.894912 +epoch: 1, batch: 40055, sum loss: 4309.044434, avg loss: 2.803542, ppl: 16.503004 +epoch: 1, batch: 40056, sum loss: 5474.157715, avg loss: 2.818825, ppl: 16.757149 +epoch: 1, batch: 40057, sum loss: 4345.121582, avg loss: 2.487190, ppl: 12.027434 +epoch: 1, batch: 40058, sum loss: 3914.583740, avg loss: 2.371038, ppl: 10.708504 +epoch: 1, batch: 40059, sum loss: 3719.743408, avg loss: 2.407601, ppl: 11.107285 +epoch: 1, batch: 40060, sum loss: 4809.536621, avg loss: 2.499759, ppl: 12.179561 +epoch: 1, batch: 40061, sum loss: 3319.320312, avg loss: 2.236739, ppl: 9.362746 +epoch: 1, batch: 40062, sum loss: 5449.485352, avg loss: 2.809013, ppl: 16.593534 +epoch: 1, batch: 40063, sum loss: 4133.408691, avg loss: 2.540509, ppl: 12.686130 +epoch: 1, batch: 40064, sum loss: 4697.410156, avg loss: 2.695014, ppl: 14.805733 +epoch: 1, batch: 40065, sum loss: 5143.138672, avg loss: 2.606761, ppl: 13.555071 +epoch: 1, batch: 40066, sum loss: 4086.525146, avg loss: 2.697376, ppl: 14.840746 +epoch: 1, batch: 40067, sum loss: 3512.266602, avg loss: 2.469948, ppl: 11.821836 +epoch: 1, batch: 40068, sum loss: 3559.913574, avg loss: 2.494684, ppl: 12.117901 +epoch: 1, batch: 40069, sum loss: 3403.859131, avg loss: 2.378658, ppl: 10.790408 +epoch: 1, batch: 40070, sum loss: 4152.695801, avg loss: 2.334286, ppl: 10.322092 +epoch: 1, batch: 40071, sum loss: 4824.399902, avg loss: 2.536488, ppl: 12.635219 +epoch: 1, batch: 40072, sum loss: 3889.464111, avg loss: 2.256070, ppl: 9.545501 +epoch: 1, batch: 40073, sum loss: 3862.832764, avg loss: 2.374206, ppl: 10.742476 +epoch: 1, batch: 40074, sum loss: 4024.020020, avg loss: 2.613000, ppl: 13.639908 +epoch: 1, batch: 40075, sum loss: 4535.626953, avg loss: 2.663316, ppl: 14.343771 +epoch: 1, batch: 40076, sum loss: 3993.697754, avg loss: 2.475944, ppl: 11.892929 +epoch: 1, batch: 40077, sum loss: 3368.521729, avg loss: 2.451617, ppl: 11.607100 +epoch: 1, batch: 40078, sum loss: 4155.208984, avg loss: 2.451451, ppl: 11.605171 +epoch: 1, batch: 40079, sum loss: 3350.907471, avg loss: 2.402084, ppl: 11.046177 +epoch: 1, batch: 40080, sum loss: 4899.071289, avg loss: 2.608664, ppl: 13.580895 +epoch: 1, batch: 40081, sum loss: 3706.988770, avg loss: 2.469679, ppl: 11.818657 +epoch: 1, batch: 40082, sum loss: 4786.216797, avg loss: 2.832081, ppl: 16.980759 +epoch: 1, batch: 40083, sum loss: 3451.822021, avg loss: 2.434289, ppl: 11.407707 +epoch: 1, batch: 40084, sum loss: 4960.709961, avg loss: 2.791621, ppl: 16.307428 +epoch: 1, batch: 40085, sum loss: 4898.183594, avg loss: 2.630604, ppl: 13.882146 +epoch: 1, batch: 40086, sum loss: 4269.820312, avg loss: 2.546106, ppl: 12.757334 +epoch: 1, batch: 40087, sum loss: 5354.983398, avg loss: 2.889899, ppl: 17.991497 +epoch: 1, batch: 40088, sum loss: 4659.203125, avg loss: 2.818635, ppl: 16.753958 +epoch: 1, batch: 40089, sum loss: 4769.660156, avg loss: 2.800740, ppl: 16.456820 +epoch: 1, batch: 40090, sum loss: 4945.716797, avg loss: 2.750677, ppl: 15.653220 +epoch: 1, batch: 40091, sum loss: 3576.506348, avg loss: 2.463159, ppl: 11.741841 +epoch: 1, batch: 40092, sum loss: 4450.871094, avg loss: 2.584710, ppl: 13.259445 +epoch: 1, batch: 40093, sum loss: 3939.527832, avg loss: 2.538356, ppl: 12.658840 +epoch: 1, batch: 40094, sum loss: 4504.903320, avg loss: 2.578651, ppl: 13.179347 +epoch: 1, batch: 40095, sum loss: 5077.646973, avg loss: 2.740230, ppl: 15.490553 +epoch: 1, batch: 40096, sum loss: 4492.917480, avg loss: 2.836438, ppl: 17.054903 +epoch: 1, batch: 40097, sum loss: 3708.208008, avg loss: 2.397032, ppl: 10.990506 +epoch: 1, batch: 40098, sum loss: 4350.748047, avg loss: 2.446990, ppl: 11.553518 +epoch: 1, batch: 40099, sum loss: 4474.194824, avg loss: 2.599765, ppl: 13.460568 +epoch: 1, batch: 40100, sum loss: 3907.325195, avg loss: 2.375273, ppl: 10.753954 +epoch: 1, batch: 40101, sum loss: 4029.854736, avg loss: 2.667012, ppl: 14.396883 +epoch: 1, batch: 40102, sum loss: 4395.964355, avg loss: 2.458593, ppl: 11.688356 +epoch: 1, batch: 40103, sum loss: 5018.298340, avg loss: 2.617788, ppl: 13.705368 +epoch: 1, batch: 40104, sum loss: 3760.903320, avg loss: 2.490664, ppl: 12.069293 +epoch: 1, batch: 40105, sum loss: 4305.569336, avg loss: 2.599982, ppl: 13.463490 +epoch: 1, batch: 40106, sum loss: 3857.107422, avg loss: 2.530910, ppl: 12.564939 +epoch: 1, batch: 40107, sum loss: 4632.665527, avg loss: 2.608483, ppl: 13.578435 +epoch: 1, batch: 40108, sum loss: 5059.371094, avg loss: 2.718631, ppl: 15.159548 +epoch: 1, batch: 40109, sum loss: 4898.854980, avg loss: 3.124270, ppl: 22.743280 +epoch: 1, batch: 40110, sum loss: 4490.862305, avg loss: 2.647914, ppl: 14.124546 +epoch: 1, batch: 40111, sum loss: 4524.363770, avg loss: 2.685082, ppl: 14.659410 +epoch: 1, batch: 40112, sum loss: 3096.532715, avg loss: 2.352988, ppl: 10.516953 +epoch: 1, batch: 40113, sum loss: 5186.131348, avg loss: 2.810911, ppl: 16.625059 +epoch: 1, batch: 40114, sum loss: 5188.560547, avg loss: 2.621809, ppl: 13.760597 +epoch: 1, batch: 40115, sum loss: 4613.957031, avg loss: 2.533749, ppl: 12.600658 +epoch: 1, batch: 40116, sum loss: 4827.681641, avg loss: 2.753954, ppl: 15.704608 +epoch: 1, batch: 40117, sum loss: 4713.063477, avg loss: 2.510956, ppl: 12.316694 +epoch: 1, batch: 40118, sum loss: 4419.233887, avg loss: 2.724558, ppl: 15.249678 +epoch: 1, batch: 40119, sum loss: 4990.743652, avg loss: 2.733156, ppl: 15.381361 +epoch: 1, batch: 40120, sum loss: 3519.941895, avg loss: 2.217985, ppl: 9.188796 +epoch: 1, batch: 40121, sum loss: 4046.318115, avg loss: 2.562583, ppl: 12.969271 +epoch: 1, batch: 40122, sum loss: 4748.114258, avg loss: 2.793009, ppl: 16.330076 +epoch: 1, batch: 40123, sum loss: 4449.065918, avg loss: 2.729488, ppl: 15.325045 +epoch: 1, batch: 40124, sum loss: 4445.406250, avg loss: 2.596616, ppl: 13.418251 +epoch: 1, batch: 40125, sum loss: 3912.234863, avg loss: 2.589169, ppl: 13.318703 +epoch: 1, batch: 40126, sum loss: 4072.489258, avg loss: 2.853882, ppl: 17.355021 +epoch: 1, batch: 40127, sum loss: 4116.790039, avg loss: 2.564978, ppl: 13.000374 +epoch: 1, batch: 40128, sum loss: 4227.672363, avg loss: 2.523984, ppl: 12.478208 +epoch: 1, batch: 40129, sum loss: 4175.645508, avg loss: 2.748944, ppl: 15.626115 +epoch: 1, batch: 40130, sum loss: 3731.825684, avg loss: 2.492869, ppl: 12.095934 +epoch: 1, batch: 40131, sum loss: 4340.957031, avg loss: 2.702962, ppl: 14.923870 +epoch: 1, batch: 40132, sum loss: 4302.912598, avg loss: 2.526666, ppl: 12.511724 +epoch: 1, batch: 40133, sum loss: 4891.190430, avg loss: 2.638182, ppl: 13.987757 +epoch: 1, batch: 40134, sum loss: 4325.716309, avg loss: 2.758748, ppl: 15.780067 +epoch: 1, batch: 40135, sum loss: 3924.344727, avg loss: 2.392893, ppl: 10.945113 +epoch: 1, batch: 40136, sum loss: 3290.577637, avg loss: 2.375868, ppl: 10.760352 +epoch: 1, batch: 40137, sum loss: 4514.755371, avg loss: 2.599168, ppl: 13.452545 +epoch: 1, batch: 40138, sum loss: 3864.935059, avg loss: 2.427723, ppl: 11.333047 +epoch: 1, batch: 40139, sum loss: 4518.270996, avg loss: 2.574513, ppl: 13.124929 +epoch: 1, batch: 40140, sum loss: 4886.619141, avg loss: 2.586882, ppl: 13.288269 +epoch: 1, batch: 40141, sum loss: 4535.423340, avg loss: 2.770570, ppl: 15.967737 +epoch: 1, batch: 40142, sum loss: 4334.165039, avg loss: 2.356806, ppl: 10.557174 +epoch: 1, batch: 40143, sum loss: 3682.733398, avg loss: 2.505261, ppl: 12.246755 +epoch: 1, batch: 40144, sum loss: 4441.611328, avg loss: 2.382839, ppl: 10.835619 +epoch: 1, batch: 40145, sum loss: 3340.683594, avg loss: 2.274121, ppl: 9.719370 +epoch: 1, batch: 40146, sum loss: 4617.898926, avg loss: 2.607509, ppl: 13.565220 +epoch: 1, batch: 40147, sum loss: 4882.023926, avg loss: 2.796119, ppl: 16.380953 +epoch: 1, batch: 40148, sum loss: 4238.719238, avg loss: 2.545777, ppl: 12.753138 +epoch: 1, batch: 40149, sum loss: 3727.517090, avg loss: 2.528845, ppl: 12.539013 +epoch: 1, batch: 40150, sum loss: 4648.281250, avg loss: 2.685315, ppl: 14.662825 +epoch: 1, batch: 40151, sum loss: 4758.477539, avg loss: 2.501828, ppl: 12.204789 +epoch: 1, batch: 40152, sum loss: 4466.697266, avg loss: 2.549485, ppl: 12.800509 +epoch: 1, batch: 40153, sum loss: 4616.460449, avg loss: 2.781000, ppl: 16.135155 +epoch: 1, batch: 40154, sum loss: 4522.413086, avg loss: 2.596104, ppl: 13.411385 +epoch: 1, batch: 40155, sum loss: 4314.170410, avg loss: 2.606750, ppl: 13.554919 +epoch: 1, batch: 40156, sum loss: 4760.298828, avg loss: 2.490999, ppl: 12.073328 +epoch: 1, batch: 40157, sum loss: 5071.187500, avg loss: 2.901137, ppl: 18.194822 +epoch: 1, batch: 40158, sum loss: 3970.783203, avg loss: 2.452615, ppl: 11.618688 +epoch: 1, batch: 40159, sum loss: 4777.179199, avg loss: 2.665837, ppl: 14.379978 +epoch: 1, batch: 40160, sum loss: 4099.700195, avg loss: 2.490705, ppl: 12.069782 +epoch: 1, batch: 40161, sum loss: 4986.976562, avg loss: 2.649828, ppl: 14.151608 +epoch: 1, batch: 40162, sum loss: 3554.088379, avg loss: 2.430977, ppl: 11.369987 +epoch: 1, batch: 40163, sum loss: 4932.012695, avg loss: 2.572777, ppl: 13.102156 +epoch: 1, batch: 40164, sum loss: 3907.206055, avg loss: 2.570530, ppl: 13.072757 +epoch: 1, batch: 40165, sum loss: 3716.441895, avg loss: 2.255122, ppl: 9.536461 +epoch: 1, batch: 40166, sum loss: 5037.925293, avg loss: 2.814483, ppl: 16.684555 +epoch: 1, batch: 40167, sum loss: 4309.776367, avg loss: 2.469786, ppl: 11.819917 +epoch: 1, batch: 40168, sum loss: 3531.605713, avg loss: 2.478320, ppl: 11.921216 +epoch: 1, batch: 40169, sum loss: 4746.125000, avg loss: 2.603470, ppl: 13.510533 +epoch: 1, batch: 40170, sum loss: 5017.475586, avg loss: 2.970678, ppl: 19.505146 +epoch: 1, batch: 40171, sum loss: 5190.677246, avg loss: 2.663252, ppl: 14.342851 +epoch: 1, batch: 40172, sum loss: 3849.297119, avg loss: 2.494684, ppl: 12.117904 +epoch: 1, batch: 40173, sum loss: 5114.191895, avg loss: 2.756977, ppl: 15.752150 +epoch: 1, batch: 40174, sum loss: 4467.195801, avg loss: 2.745664, ppl: 15.574954 +epoch: 1, batch: 40175, sum loss: 4084.497070, avg loss: 2.797601, ppl: 16.405239 +epoch: 1, batch: 40176, sum loss: 4676.807129, avg loss: 2.621529, ppl: 13.756740 +epoch: 1, batch: 40177, sum loss: 3863.542236, avg loss: 2.377564, ppl: 10.778619 +epoch: 1, batch: 40178, sum loss: 3692.871582, avg loss: 2.463557, ppl: 11.746517 +epoch: 1, batch: 40179, sum loss: 4735.381348, avg loss: 2.718359, ppl: 15.155432 +epoch: 1, batch: 40180, sum loss: 4044.428955, avg loss: 2.592583, ppl: 13.364243 +epoch: 1, batch: 40181, sum loss: 3493.356445, avg loss: 2.344535, ppl: 10.428418 +epoch: 1, batch: 40182, sum loss: 4164.295898, avg loss: 2.408500, ppl: 11.117270 +epoch: 1, batch: 40183, sum loss: 5252.664551, avg loss: 2.848517, ppl: 17.262157 +epoch: 1, batch: 40184, sum loss: 3871.768799, avg loss: 2.507622, ppl: 12.275709 +epoch: 1, batch: 40185, sum loss: 3871.908447, avg loss: 2.410902, ppl: 11.144006 +epoch: 1, batch: 40186, sum loss: 4023.705078, avg loss: 2.529042, ppl: 12.541480 +epoch: 1, batch: 40187, sum loss: 5138.298340, avg loss: 2.733137, ppl: 15.381067 +epoch: 1, batch: 40188, sum loss: 4571.257812, avg loss: 2.478990, ppl: 11.929211 +epoch: 1, batch: 40189, sum loss: 4930.875000, avg loss: 2.639655, ppl: 14.008365 +epoch: 1, batch: 40190, sum loss: 4622.250977, avg loss: 2.490437, ppl: 12.066548 +epoch: 1, batch: 40191, sum loss: 4527.404785, avg loss: 2.733940, ppl: 15.393419 +epoch: 1, batch: 40192, sum loss: 4243.676758, avg loss: 2.561060, ppl: 12.949539 +epoch: 1, batch: 40193, sum loss: 4523.033203, avg loss: 2.668456, ppl: 14.417692 +epoch: 1, batch: 40194, sum loss: 3820.387695, avg loss: 2.283555, ppl: 9.811501 +epoch: 1, batch: 40195, sum loss: 4110.492188, avg loss: 2.704271, ppl: 14.943423 +epoch: 1, batch: 40196, sum loss: 4473.837402, avg loss: 2.360864, ppl: 10.600108 +epoch: 1, batch: 40197, sum loss: 4961.404785, avg loss: 2.676054, ppl: 14.527658 +epoch: 1, batch: 40198, sum loss: 3908.777100, avg loss: 2.390689, ppl: 10.921020 +epoch: 1, batch: 40199, sum loss: 4837.219727, avg loss: 2.725194, ppl: 15.259374 +epoch: 1, batch: 40200, sum loss: 3841.885742, avg loss: 2.519269, ppl: 12.419518 +epoch: 1, batch: 40201, sum loss: 4058.703125, avg loss: 2.533523, ppl: 12.597805 +epoch: 1, batch: 40202, sum loss: 4534.709473, avg loss: 2.390464, ppl: 10.918554 +epoch: 1, batch: 40203, sum loss: 4015.161621, avg loss: 2.374430, ppl: 10.744891 +epoch: 1, batch: 40204, sum loss: 3581.100342, avg loss: 2.352891, ppl: 10.515929 +epoch: 1, batch: 40205, sum loss: 4651.410645, avg loss: 2.644350, ppl: 14.074287 +epoch: 1, batch: 40206, sum loss: 4359.966797, avg loss: 2.527517, ppl: 12.522375 +epoch: 1, batch: 40207, sum loss: 4472.221680, avg loss: 2.541035, ppl: 12.692801 +epoch: 1, batch: 40208, sum loss: 4527.362305, avg loss: 2.656903, ppl: 14.252079 +epoch: 1, batch: 40209, sum loss: 3220.950195, avg loss: 2.067362, ppl: 7.903945 +epoch: 1, batch: 40210, sum loss: 4052.597656, avg loss: 2.701732, ppl: 14.905521 +epoch: 1, batch: 40211, sum loss: 3971.194824, avg loss: 2.329147, ppl: 10.269175 +epoch: 1, batch: 40212, sum loss: 5213.478516, avg loss: 2.709708, ppl: 15.024891 +epoch: 1, batch: 40213, sum loss: 4020.138184, avg loss: 2.598667, ppl: 13.445808 +epoch: 1, batch: 40214, sum loss: 4863.020508, avg loss: 2.625821, ppl: 13.815910 +epoch: 1, batch: 40215, sum loss: 4451.875977, avg loss: 2.586796, ppl: 13.287132 +epoch: 1, batch: 40216, sum loss: 3499.003174, avg loss: 2.436632, ppl: 11.434461 +epoch: 1, batch: 40217, sum loss: 4488.175293, avg loss: 2.658872, ppl: 14.280167 +epoch: 1, batch: 40218, sum loss: 3973.499023, avg loss: 2.516465, ppl: 12.384745 +epoch: 1, batch: 40219, sum loss: 3797.951172, avg loss: 2.505245, ppl: 12.246558 +epoch: 1, batch: 40220, sum loss: 4849.152832, avg loss: 2.667301, ppl: 14.401044 +epoch: 1, batch: 40221, sum loss: 4429.275391, avg loss: 2.789216, ppl: 16.268265 +epoch: 1, batch: 40222, sum loss: 3442.190430, avg loss: 2.413878, ppl: 11.177225 +epoch: 1, batch: 40223, sum loss: 4495.897461, avg loss: 2.766706, ppl: 15.906153 +epoch: 1, batch: 40224, sum loss: 4537.683594, avg loss: 2.648969, ppl: 14.139449 +epoch: 1, batch: 40225, sum loss: 3724.178467, avg loss: 2.530013, ppl: 12.553664 +epoch: 1, batch: 40226, sum loss: 3738.983887, avg loss: 2.200697, ppl: 9.031305 +epoch: 1, batch: 40227, sum loss: 4612.082520, avg loss: 2.521642, ppl: 12.449018 +epoch: 1, batch: 40228, sum loss: 3856.078369, avg loss: 2.432857, ppl: 11.391381 +epoch: 1, batch: 40229, sum loss: 4646.420898, avg loss: 2.650554, ppl: 14.161881 +epoch: 1, batch: 40230, sum loss: 3815.889404, avg loss: 2.508803, ppl: 12.290212 +epoch: 1, batch: 40231, sum loss: 4079.027100, avg loss: 2.364654, ppl: 10.640352 +epoch: 1, batch: 40232, sum loss: 4253.456055, avg loss: 2.665073, ppl: 14.368994 +epoch: 1, batch: 40233, sum loss: 4803.427734, avg loss: 2.507008, ppl: 12.268172 +epoch: 1, batch: 40234, sum loss: 4472.882324, avg loss: 2.809599, ppl: 16.603266 +epoch: 1, batch: 40235, sum loss: 3688.159668, avg loss: 2.382532, ppl: 10.832296 +epoch: 1, batch: 40236, sum loss: 3663.181885, avg loss: 2.486885, ppl: 12.023767 +epoch: 1, batch: 40237, sum loss: 4244.137695, avg loss: 2.506874, ppl: 12.266526 +epoch: 1, batch: 40238, sum loss: 4227.472168, avg loss: 2.467876, ppl: 11.797365 +epoch: 1, batch: 40239, sum loss: 4075.517822, avg loss: 2.410123, ppl: 11.135332 +epoch: 1, batch: 40240, sum loss: 4629.770020, avg loss: 2.415112, ppl: 11.191027 +epoch: 1, batch: 40241, sum loss: 3001.413818, avg loss: 2.159291, ppl: 8.664989 +epoch: 1, batch: 40242, sum loss: 4123.586914, avg loss: 2.311428, ppl: 10.088820 +epoch: 1, batch: 40243, sum loss: 4544.626465, avg loss: 2.536064, ppl: 12.629861 +epoch: 1, batch: 40244, sum loss: 4492.913574, avg loss: 2.836435, ppl: 17.054861 +epoch: 1, batch: 40245, sum loss: 3585.831787, avg loss: 2.289803, ppl: 9.872993 +epoch: 1, batch: 40246, sum loss: 4018.545654, avg loss: 2.373624, ppl: 10.736234 +epoch: 1, batch: 40247, sum loss: 4048.221680, avg loss: 2.539662, ppl: 12.675390 +epoch: 1, batch: 40248, sum loss: 4568.631836, avg loss: 2.651556, ppl: 14.176086 +epoch: 1, batch: 40249, sum loss: 4101.789551, avg loss: 2.578120, ppl: 13.172357 +epoch: 1, batch: 40250, sum loss: 4056.623047, avg loss: 2.513397, ppl: 12.346804 +epoch: 1, batch: 40251, sum loss: 4392.588867, avg loss: 2.567264, ppl: 13.030129 +epoch: 1, batch: 40252, sum loss: 3956.867432, avg loss: 2.436495, ppl: 11.432894 +epoch: 1, batch: 40253, sum loss: 4426.861816, avg loss: 2.576753, ppl: 13.154358 +epoch: 1, batch: 40254, sum loss: 3742.772949, avg loss: 2.345096, ppl: 10.434273 +epoch: 1, batch: 40255, sum loss: 4459.291992, avg loss: 2.451507, ppl: 11.605827 +epoch: 1, batch: 40256, sum loss: 4832.198242, avg loss: 2.617659, ppl: 13.703604 +epoch: 1, batch: 40257, sum loss: 4108.870117, avg loss: 2.615449, ppl: 13.673354 +epoch: 1, batch: 40258, sum loss: 4981.869629, avg loss: 2.574610, ppl: 13.126190 +epoch: 1, batch: 40259, sum loss: 3993.901855, avg loss: 2.354895, ppl: 10.537024 +epoch: 1, batch: 40260, sum loss: 4269.792969, avg loss: 2.576821, ppl: 13.155255 +epoch: 1, batch: 40261, sum loss: 4732.655762, avg loss: 2.574894, ppl: 13.129928 +epoch: 1, batch: 40262, sum loss: 5218.580566, avg loss: 2.894387, ppl: 18.072428 +epoch: 1, batch: 40263, sum loss: 4874.579590, avg loss: 2.642049, ppl: 14.041941 +epoch: 1, batch: 40264, sum loss: 3550.149902, avg loss: 2.170018, ppl: 8.758446 +epoch: 1, batch: 40265, sum loss: 3637.634521, avg loss: 2.576228, ppl: 13.147457 +epoch: 1, batch: 40266, sum loss: 4141.757812, avg loss: 2.513203, ppl: 12.344401 +epoch: 1, batch: 40267, sum loss: 3581.188721, avg loss: 2.342177, ppl: 10.403863 +epoch: 1, batch: 40268, sum loss: 4320.396973, avg loss: 2.596392, ppl: 13.415254 +epoch: 1, batch: 40269, sum loss: 4833.575195, avg loss: 2.605701, ppl: 13.540713 +epoch: 1, batch: 40270, sum loss: 4157.674805, avg loss: 2.542920, ppl: 12.716754 +epoch: 1, batch: 40271, sum loss: 3862.078125, avg loss: 2.378127, ppl: 10.784682 +epoch: 1, batch: 40272, sum loss: 4179.060547, avg loss: 2.449625, ppl: 11.584002 +epoch: 1, batch: 40273, sum loss: 4324.977051, avg loss: 2.563709, ppl: 12.983886 +epoch: 1, batch: 40274, sum loss: 4053.093994, avg loss: 2.402545, ppl: 11.051271 +epoch: 1, batch: 40275, sum loss: 3435.564941, avg loss: 2.387467, ppl: 10.885884 +epoch: 1, batch: 40276, sum loss: 3586.822754, avg loss: 2.534857, ppl: 12.614627 +epoch: 1, batch: 40277, sum loss: 4850.355469, avg loss: 2.582724, ppl: 13.233134 +epoch: 1, batch: 40278, sum loss: 4845.827148, avg loss: 2.646547, ppl: 14.105247 +epoch: 1, batch: 40279, sum loss: 3664.805176, avg loss: 2.246968, ppl: 9.459015 +epoch: 1, batch: 40280, sum loss: 4805.129395, avg loss: 2.767932, ppl: 15.925661 +epoch: 1, batch: 40281, sum loss: 5161.831055, avg loss: 2.562975, ppl: 12.974355 +epoch: 1, batch: 40282, sum loss: 4921.777832, avg loss: 2.609638, ppl: 13.594135 +epoch: 1, batch: 40283, sum loss: 3701.831543, avg loss: 2.631010, ppl: 13.887794 +epoch: 1, batch: 40284, sum loss: 4530.344727, avg loss: 2.760722, ppl: 15.811254 +epoch: 1, batch: 40285, sum loss: 4060.861328, avg loss: 2.618221, ppl: 13.711313 +epoch: 1, batch: 40286, sum loss: 2863.967285, avg loss: 2.101223, ppl: 8.176166 +epoch: 1, batch: 40287, sum loss: 3998.166992, avg loss: 2.464961, ppl: 11.763024 +epoch: 1, batch: 40288, sum loss: 4042.227051, avg loss: 2.439485, ppl: 11.467134 +epoch: 1, batch: 40289, sum loss: 4168.921875, avg loss: 2.749949, ppl: 15.641827 +epoch: 1, batch: 40290, sum loss: 4478.628418, avg loss: 2.654789, ppl: 14.221981 +epoch: 1, batch: 40291, sum loss: 4779.010742, avg loss: 2.598701, ppl: 13.446260 +epoch: 1, batch: 40292, sum loss: 4879.686035, avg loss: 2.697450, ppl: 14.841832 +epoch: 1, batch: 40293, sum loss: 3370.435547, avg loss: 2.414352, ppl: 11.182524 +epoch: 1, batch: 40294, sum loss: 3239.904785, avg loss: 2.159936, ppl: 8.670587 +epoch: 1, batch: 40295, sum loss: 3996.986328, avg loss: 2.502809, ppl: 12.216766 +epoch: 1, batch: 40296, sum loss: 4541.715820, avg loss: 2.634406, ppl: 13.935034 +epoch: 1, batch: 40297, sum loss: 3813.389893, avg loss: 2.442915, ppl: 11.506530 +epoch: 1, batch: 40298, sum loss: 3947.899902, avg loss: 2.493935, ppl: 12.108835 +epoch: 1, batch: 40299, sum loss: 4106.228516, avg loss: 2.571214, ppl: 13.081696 +epoch: 1, batch: 40300, sum loss: 4353.078125, avg loss: 2.530859, ppl: 12.564301 +epoch: 1, batch: 40301, sum loss: 4095.711670, avg loss: 2.495863, ppl: 12.132201 +epoch: 1, batch: 40302, sum loss: 5699.867676, avg loss: 2.824513, ppl: 16.852739 +epoch: 1, batch: 40303, sum loss: 4917.043457, avg loss: 2.549012, ppl: 12.794453 +epoch: 1, batch: 40304, sum loss: 4240.375000, avg loss: 2.482655, ppl: 11.973011 +epoch: 1, batch: 40305, sum loss: 4211.946289, avg loss: 2.368924, ppl: 10.685884 +epoch: 1, batch: 40306, sum loss: 3632.000488, avg loss: 2.370758, ppl: 10.705500 +epoch: 1, batch: 40307, sum loss: 4710.006836, avg loss: 2.699144, ppl: 14.867005 +epoch: 1, batch: 40308, sum loss: 5829.641113, avg loss: 2.900319, ppl: 18.179945 +epoch: 1, batch: 40309, sum loss: 3883.306641, avg loss: 2.490896, ppl: 12.072087 +epoch: 1, batch: 40310, sum loss: 4071.467773, avg loss: 2.452692, ppl: 11.619579 +epoch: 1, batch: 40311, sum loss: 5114.443848, avg loss: 2.764564, ppl: 15.872123 +epoch: 1, batch: 40312, sum loss: 4402.842773, avg loss: 2.750058, ppl: 15.643542 +epoch: 1, batch: 40313, sum loss: 4955.717285, avg loss: 2.788811, ppl: 16.261673 +epoch: 1, batch: 40314, sum loss: 4595.870117, avg loss: 2.563229, ppl: 12.977659 +epoch: 1, batch: 40315, sum loss: 4342.188965, avg loss: 2.540778, ppl: 12.689535 +epoch: 1, batch: 40316, sum loss: 4854.927246, avg loss: 2.704695, ppl: 14.949756 +epoch: 1, batch: 40317, sum loss: 3747.802002, avg loss: 2.408613, ppl: 11.118531 +epoch: 1, batch: 40318, sum loss: 3444.084473, avg loss: 2.313018, ppl: 10.104878 +epoch: 1, batch: 40319, sum loss: 3939.949463, avg loss: 2.665730, ppl: 14.378448 +epoch: 1, batch: 40320, sum loss: 4087.742188, avg loss: 2.506280, ppl: 12.259237 +epoch: 1, batch: 40321, sum loss: 3911.429688, avg loss: 2.526763, ppl: 12.512941 +epoch: 1, batch: 40322, sum loss: 3603.661133, avg loss: 2.239690, ppl: 9.390418 +epoch: 1, batch: 40323, sum loss: 4041.037598, avg loss: 2.565738, ppl: 13.010256 +epoch: 1, batch: 40324, sum loss: 4399.399902, avg loss: 2.548899, ppl: 12.793014 +epoch: 1, batch: 40325, sum loss: 4023.140137, avg loss: 2.426502, ppl: 11.319218 +epoch: 1, batch: 40326, sum loss: 4395.717773, avg loss: 2.463968, ppl: 11.751344 +epoch: 1, batch: 40327, sum loss: 4378.193848, avg loss: 2.601422, ppl: 13.482902 +epoch: 1, batch: 40328, sum loss: 4334.879883, avg loss: 2.560472, ppl: 12.941931 +epoch: 1, batch: 40329, sum loss: 4470.554688, avg loss: 2.520042, ppl: 12.429118 +epoch: 1, batch: 40330, sum loss: 3978.145996, avg loss: 2.506708, ppl: 12.264494 +epoch: 1, batch: 40331, sum loss: 4974.094727, avg loss: 2.589326, ppl: 13.320789 +epoch: 1, batch: 40332, sum loss: 3865.653809, avg loss: 2.307853, ppl: 10.052820 +epoch: 1, batch: 40333, sum loss: 3691.141357, avg loss: 2.260344, ppl: 9.586385 +epoch: 1, batch: 40334, sum loss: 4578.823242, avg loss: 2.704562, ppl: 14.947767 +epoch: 1, batch: 40335, sum loss: 4225.545898, avg loss: 2.578124, ppl: 13.172407 +epoch: 1, batch: 40336, sum loss: 4744.644531, avg loss: 2.560520, ppl: 12.942551 +epoch: 1, batch: 40337, sum loss: 4657.436523, avg loss: 2.805685, ppl: 16.538397 +epoch: 1, batch: 40338, sum loss: 4168.222656, avg loss: 2.683981, ppl: 14.643275 +epoch: 1, batch: 40339, sum loss: 4196.934570, avg loss: 2.481925, ppl: 11.964271 +epoch: 1, batch: 40340, sum loss: 5202.029785, avg loss: 2.818001, ppl: 16.743347 +epoch: 1, batch: 40341, sum loss: 3890.439209, avg loss: 2.321265, ppl: 10.188550 +epoch: 1, batch: 40342, sum loss: 4164.987305, avg loss: 2.669864, ppl: 14.438002 +epoch: 1, batch: 40343, sum loss: 4285.342285, avg loss: 2.637134, ppl: 13.973094 +epoch: 1, batch: 40344, sum loss: 3771.776855, avg loss: 2.416257, ppl: 11.203844 +epoch: 1, batch: 40345, sum loss: 5427.985352, avg loss: 2.693789, ppl: 14.787603 +epoch: 1, batch: 40346, sum loss: 5196.051758, avg loss: 2.741980, ppl: 15.517677 +epoch: 1, batch: 40347, sum loss: 3507.960449, avg loss: 2.282343, ppl: 9.799611 +epoch: 1, batch: 40348, sum loss: 5340.227051, avg loss: 2.816576, ppl: 16.719498 +epoch: 1, batch: 40349, sum loss: 3848.593262, avg loss: 2.460737, ppl: 11.713447 +epoch: 1, batch: 40350, sum loss: 3879.284668, avg loss: 2.356795, ppl: 10.557062 +epoch: 1, batch: 40351, sum loss: 5647.791992, avg loss: 2.989832, ppl: 19.882336 +epoch: 1, batch: 40352, sum loss: 3858.490479, avg loss: 2.438995, ppl: 11.461517 +epoch: 1, batch: 40353, sum loss: 4091.951416, avg loss: 2.554277, ppl: 12.861993 +epoch: 1, batch: 40354, sum loss: 4380.034180, avg loss: 2.517261, ppl: 12.394602 +epoch: 1, batch: 40355, sum loss: 4248.016602, avg loss: 2.349567, ppl: 10.481028 +epoch: 1, batch: 40356, sum loss: 3613.744141, avg loss: 2.402755, ppl: 11.053590 +epoch: 1, batch: 40357, sum loss: 5242.981445, avg loss: 2.855654, ppl: 17.385807 +epoch: 1, batch: 40358, sum loss: 4484.747070, avg loss: 2.573005, ppl: 13.105143 +epoch: 1, batch: 40359, sum loss: 4708.917969, avg loss: 2.755365, ppl: 15.726775 +epoch: 1, batch: 40360, sum loss: 4353.055664, avg loss: 2.724065, ppl: 15.242153 +epoch: 1, batch: 40361, sum loss: 4280.903320, avg loss: 2.554238, ppl: 12.861500 +epoch: 1, batch: 40362, sum loss: 4291.714355, avg loss: 2.579155, ppl: 13.185997 +epoch: 1, batch: 40363, sum loss: 5637.015625, avg loss: 2.721881, ppl: 15.208905 +epoch: 1, batch: 40364, sum loss: 3586.246826, avg loss: 2.198802, ppl: 9.014213 +epoch: 1, batch: 40365, sum loss: 3715.439209, avg loss: 2.583755, ppl: 13.246783 +epoch: 1, batch: 40366, sum loss: 4302.970703, avg loss: 2.555208, ppl: 12.873982 +epoch: 1, batch: 40367, sum loss: 4562.166016, avg loss: 2.731836, ppl: 15.361065 +epoch: 1, batch: 40368, sum loss: 3883.540039, avg loss: 2.500670, ppl: 12.190656 +epoch: 1, batch: 40369, sum loss: 3443.621582, avg loss: 2.244864, ppl: 9.439132 +epoch: 1, batch: 40370, sum loss: 4498.056641, avg loss: 2.549919, ppl: 12.806062 +epoch: 1, batch: 40371, sum loss: 4448.582031, avg loss: 2.598471, ppl: 13.443164 +epoch: 1, batch: 40372, sum loss: 4452.191895, avg loss: 2.505454, ppl: 12.249120 +epoch: 1, batch: 40373, sum loss: 4033.665283, avg loss: 2.514754, ppl: 12.363567 +epoch: 1, batch: 40374, sum loss: 4604.233887, avg loss: 2.872261, ppl: 17.676933 +epoch: 1, batch: 40375, sum loss: 4733.792480, avg loss: 2.675971, ppl: 14.526448 +epoch: 1, batch: 40376, sum loss: 4775.904297, avg loss: 2.597012, ppl: 13.423566 +epoch: 1, batch: 40377, sum loss: 4093.826416, avg loss: 2.528614, ppl: 12.536122 +epoch: 1, batch: 40378, sum loss: 4178.018555, avg loss: 2.515363, ppl: 12.371104 +epoch: 1, batch: 40379, sum loss: 4977.813477, avg loss: 2.622663, ppl: 13.772347 +epoch: 1, batch: 40380, sum loss: 3902.677246, avg loss: 2.457605, ppl: 11.676816 +epoch: 1, batch: 40381, sum loss: 4101.284180, avg loss: 2.460278, ppl: 11.708069 +epoch: 1, batch: 40382, sum loss: 4532.260742, avg loss: 2.675478, ppl: 14.519295 +epoch: 1, batch: 40383, sum loss: 4620.070312, avg loss: 2.815399, ppl: 16.699841 +epoch: 1, batch: 40384, sum loss: 4376.470703, avg loss: 2.515213, ppl: 12.369244 +epoch: 1, batch: 40385, sum loss: 4236.677734, avg loss: 2.348491, ppl: 10.469758 +epoch: 1, batch: 40386, sum loss: 5298.681641, avg loss: 2.908168, ppl: 18.323196 +epoch: 1, batch: 40387, sum loss: 3458.843994, avg loss: 2.406990, ppl: 11.100493 +epoch: 1, batch: 40388, sum loss: 5030.572266, avg loss: 2.751954, ppl: 15.673232 +epoch: 1, batch: 40389, sum loss: 5277.279785, avg loss: 2.672040, ppl: 14.469463 +epoch: 1, batch: 40390, sum loss: 3907.013184, avg loss: 2.468107, ppl: 11.800085 +epoch: 1, batch: 40391, sum loss: 3722.982666, avg loss: 2.475387, ppl: 11.886311 +epoch: 1, batch: 40392, sum loss: 4923.031738, avg loss: 2.792417, ppl: 16.320423 +epoch: 1, batch: 40393, sum loss: 3117.247070, avg loss: 2.321107, ppl: 10.186949 +epoch: 1, batch: 40394, sum loss: 3930.700928, avg loss: 2.645155, ppl: 14.085634 +epoch: 1, batch: 40395, sum loss: 3774.895020, avg loss: 2.438563, ppl: 11.456562 +epoch: 1, batch: 40396, sum loss: 5139.905273, avg loss: 2.742746, ppl: 15.529565 +epoch: 1, batch: 40397, sum loss: 4963.022949, avg loss: 2.754175, ppl: 15.708076 +epoch: 1, batch: 40398, sum loss: 3737.423828, avg loss: 2.347628, ppl: 10.460729 +epoch: 1, batch: 40399, sum loss: 5427.446289, avg loss: 2.767693, ppl: 15.921865 +epoch: 1, batch: 40400, sum loss: 3700.872803, avg loss: 2.308717, ppl: 10.061502 +epoch: 1, batch: 40401, sum loss: 4633.416504, avg loss: 2.572691, ppl: 13.101031 +epoch: 1, batch: 40402, sum loss: 4978.488281, avg loss: 2.735433, ppl: 15.416423 +epoch: 1, batch: 40403, sum loss: 3744.677246, avg loss: 2.600470, ppl: 13.470072 +epoch: 1, batch: 40404, sum loss: 3357.801514, avg loss: 2.410482, ppl: 11.139328 +epoch: 1, batch: 40405, sum loss: 4514.405273, avg loss: 2.708102, ppl: 15.000770 +epoch: 1, batch: 40406, sum loss: 4321.625977, avg loss: 2.438841, ppl: 11.459749 +epoch: 1, batch: 40407, sum loss: 4323.747070, avg loss: 2.589070, ppl: 13.317382 +epoch: 1, batch: 40408, sum loss: 4447.350586, avg loss: 2.566272, ppl: 13.017212 +epoch: 1, batch: 40409, sum loss: 4221.817383, avg loss: 2.563338, ppl: 12.979067 +epoch: 1, batch: 40410, sum loss: 3332.854736, avg loss: 2.411617, ppl: 11.151979 +epoch: 1, batch: 40411, sum loss: 4911.088867, avg loss: 2.638952, ppl: 13.998523 +epoch: 1, batch: 40412, sum loss: 4999.575195, avg loss: 2.703935, ppl: 14.938394 +epoch: 1, batch: 40413, sum loss: 4599.387695, avg loss: 2.814803, ppl: 16.689886 +epoch: 1, batch: 40414, sum loss: 4424.818359, avg loss: 2.451423, ppl: 11.604848 +epoch: 1, batch: 40415, sum loss: 4239.470703, avg loss: 2.391129, ppl: 10.925817 +epoch: 1, batch: 40416, sum loss: 3697.303467, avg loss: 2.313707, ppl: 10.111838 +epoch: 1, batch: 40417, sum loss: 3769.610107, avg loss: 2.299945, ppl: 9.973635 +epoch: 1, batch: 40418, sum loss: 4378.880371, avg loss: 2.689730, ppl: 14.727698 +epoch: 1, batch: 40419, sum loss: 4877.237305, avg loss: 2.815957, ppl: 16.709156 +epoch: 1, batch: 40420, sum loss: 4222.093262, avg loss: 2.464736, ppl: 11.760380 +epoch: 1, batch: 40421, sum loss: 4074.205566, avg loss: 2.250942, ppl: 9.496682 +epoch: 1, batch: 40422, sum loss: 4028.784668, avg loss: 2.346409, ppl: 10.447987 +epoch: 1, batch: 40423, sum loss: 4191.095215, avg loss: 2.557105, ppl: 12.898423 +epoch: 1, batch: 40424, sum loss: 4550.371582, avg loss: 2.645565, ppl: 14.091402 +epoch: 1, batch: 40425, sum loss: 4850.641602, avg loss: 2.649176, ppl: 14.142385 +epoch: 1, batch: 40426, sum loss: 4281.617676, avg loss: 2.563843, ppl: 12.985625 +epoch: 1, batch: 40427, sum loss: 4440.487305, avg loss: 2.627507, ppl: 13.839229 +epoch: 1, batch: 40428, sum loss: 4163.329102, avg loss: 2.584314, ppl: 13.254189 +epoch: 1, batch: 40429, sum loss: 4067.448242, avg loss: 2.521667, ppl: 12.449329 +epoch: 1, batch: 40430, sum loss: 3557.027832, avg loss: 2.413180, ppl: 11.169428 +epoch: 1, batch: 40431, sum loss: 5001.503418, avg loss: 2.674601, ppl: 14.506558 +epoch: 1, batch: 40432, sum loss: 3473.829590, avg loss: 2.183425, ppl: 8.876658 +epoch: 1, batch: 40433, sum loss: 5755.504883, avg loss: 2.665820, ppl: 14.379731 +epoch: 1, batch: 40434, sum loss: 3443.943115, avg loss: 2.258323, ppl: 9.567036 +epoch: 1, batch: 40435, sum loss: 4027.323486, avg loss: 2.647813, ppl: 14.123118 +epoch: 1, batch: 40436, sum loss: 4184.564941, avg loss: 2.576703, ppl: 13.153693 +epoch: 1, batch: 40437, sum loss: 4672.829102, avg loss: 2.583101, ppl: 13.238123 +epoch: 1, batch: 40438, sum loss: 4860.262695, avg loss: 2.750573, ppl: 15.651600 +epoch: 1, batch: 40439, sum loss: 4830.435059, avg loss: 2.585886, ppl: 13.275045 +epoch: 1, batch: 40440, sum loss: 4745.353027, avg loss: 2.722520, ppl: 15.218630 +epoch: 1, batch: 40441, sum loss: 3921.608398, avg loss: 2.392684, ppl: 10.942820 +epoch: 1, batch: 40442, sum loss: 3953.731201, avg loss: 2.477275, ppl: 11.908771 +epoch: 1, batch: 40443, sum loss: 3932.842773, avg loss: 2.433690, ppl: 11.400872 +epoch: 1, batch: 40444, sum loss: 4764.517090, avg loss: 2.515585, ppl: 12.373841 +epoch: 1, batch: 40445, sum loss: 3212.402832, avg loss: 2.302798, ppl: 10.002128 +epoch: 1, batch: 40446, sum loss: 4152.275879, avg loss: 2.547409, ppl: 12.773958 +epoch: 1, batch: 40447, sum loss: 3360.740234, avg loss: 2.225656, ppl: 9.259553 +epoch: 1, batch: 40448, sum loss: 4237.614258, avg loss: 2.617427, ppl: 13.700429 +epoch: 1, batch: 40449, sum loss: 4518.762695, avg loss: 2.699380, ppl: 14.870515 +epoch: 1, batch: 40450, sum loss: 4667.015137, avg loss: 2.557269, ppl: 12.900533 +epoch: 1, batch: 40451, sum loss: 5262.269043, avg loss: 2.612845, ppl: 13.637792 +epoch: 1, batch: 40452, sum loss: 3947.961914, avg loss: 2.558627, ppl: 12.918074 +epoch: 1, batch: 40453, sum loss: 3804.909912, avg loss: 2.565685, ppl: 13.009562 +epoch: 1, batch: 40454, sum loss: 4837.362305, avg loss: 2.765787, ppl: 15.891547 +epoch: 1, batch: 40455, sum loss: 5148.007812, avg loss: 2.718061, ppl: 15.150919 +epoch: 1, batch: 40456, sum loss: 4970.544434, avg loss: 2.766024, ppl: 15.895303 +epoch: 1, batch: 40457, sum loss: 4191.181641, avg loss: 2.639283, ppl: 14.003162 +epoch: 1, batch: 40458, sum loss: 4132.208984, avg loss: 2.751138, ppl: 15.660439 +epoch: 1, batch: 40459, sum loss: 4022.038330, avg loss: 2.717593, ppl: 15.143834 +epoch: 1, batch: 40460, sum loss: 4083.834961, avg loss: 2.517777, ppl: 12.401004 +epoch: 1, batch: 40461, sum loss: 4087.663086, avg loss: 2.860506, ppl: 17.470366 +epoch: 1, batch: 40462, sum loss: 3773.735352, avg loss: 2.510802, ppl: 12.314800 +epoch: 1, batch: 40463, sum loss: 4479.649414, avg loss: 2.635088, ppl: 13.944539 +epoch: 1, batch: 40464, sum loss: 4378.554688, avg loss: 2.600092, ppl: 13.464976 +epoch: 1, batch: 40465, sum loss: 3919.321289, avg loss: 2.497974, ppl: 12.157836 +epoch: 1, batch: 40466, sum loss: 4754.541016, avg loss: 2.876310, ppl: 17.748667 +epoch: 1, batch: 40467, sum loss: 4192.437988, avg loss: 2.663556, ppl: 14.347222 +epoch: 1, batch: 40468, sum loss: 4698.743652, avg loss: 2.616227, ppl: 13.683998 +epoch: 1, batch: 40469, sum loss: 4544.361328, avg loss: 2.749160, ppl: 15.629495 +epoch: 1, batch: 40470, sum loss: 5211.130859, avg loss: 2.702869, ppl: 14.922482 +epoch: 1, batch: 40471, sum loss: 4924.887695, avg loss: 2.654926, ppl: 14.223934 +epoch: 1, batch: 40472, sum loss: 4332.376953, avg loss: 2.667720, ppl: 14.407082 +epoch: 1, batch: 40473, sum loss: 4037.889893, avg loss: 2.428076, ppl: 11.337047 +epoch: 1, batch: 40474, sum loss: 4407.915527, avg loss: 2.577728, ppl: 13.167192 +epoch: 1, batch: 40475, sum loss: 4111.747070, avg loss: 2.536550, ppl: 12.635996 +epoch: 1, batch: 40476, sum loss: 4687.886230, avg loss: 2.597167, ppl: 13.425646 +epoch: 1, batch: 40477, sum loss: 4841.860840, avg loss: 2.583704, ppl: 13.246108 +epoch: 1, batch: 40478, sum loss: 3794.621094, avg loss: 2.358372, ppl: 10.573726 +epoch: 1, batch: 40479, sum loss: 4565.677246, avg loss: 2.773802, ppl: 16.019417 +epoch: 1, batch: 40480, sum loss: 5099.928223, avg loss: 2.734546, ppl: 15.402748 +epoch: 1, batch: 40481, sum loss: 3696.533691, avg loss: 2.552855, ppl: 12.843718 +epoch: 1, batch: 40482, sum loss: 4999.798828, avg loss: 2.632859, ppl: 13.913488 +epoch: 1, batch: 40483, sum loss: 4464.974609, avg loss: 2.525438, ppl: 12.496371 +epoch: 1, batch: 40484, sum loss: 4330.565430, avg loss: 2.339581, ppl: 10.376893 +epoch: 1, batch: 40485, sum loss: 4060.599854, avg loss: 2.537875, ppl: 12.652755 +epoch: 1, batch: 40486, sum loss: 5106.348145, avg loss: 2.665109, ppl: 14.369512 +epoch: 1, batch: 40487, sum loss: 4120.108398, avg loss: 2.445168, ppl: 11.532490 +epoch: 1, batch: 40488, sum loss: 4010.674561, avg loss: 2.454513, ppl: 11.640767 +epoch: 1, batch: 40489, sum loss: 4796.291504, avg loss: 2.756489, ppl: 15.744472 +epoch: 1, batch: 40490, sum loss: 3242.820312, avg loss: 2.283676, ppl: 9.812690 +epoch: 1, batch: 40491, sum loss: 3832.723389, avg loss: 2.325682, ppl: 10.233654 +epoch: 1, batch: 40492, sum loss: 5883.230469, avg loss: 3.126052, ppl: 22.783859 +epoch: 1, batch: 40493, sum loss: 5259.585938, avg loss: 2.888295, ppl: 17.962664 +epoch: 1, batch: 40494, sum loss: 4514.048828, avg loss: 2.385861, ppl: 10.868416 +epoch: 1, batch: 40495, sum loss: 4468.959473, avg loss: 2.613426, ppl: 13.645727 +epoch: 1, batch: 40496, sum loss: 4765.185547, avg loss: 2.716753, ppl: 15.131119 +epoch: 1, batch: 40497, sum loss: 4490.627441, avg loss: 2.663480, ppl: 14.346128 +epoch: 1, batch: 40498, sum loss: 4558.172852, avg loss: 2.516937, ppl: 12.390590 +epoch: 1, batch: 40499, sum loss: 4432.769531, avg loss: 2.624494, ppl: 13.797585 +epoch: 1, batch: 40500, sum loss: 4297.260254, avg loss: 2.605979, ppl: 13.544485 +epoch: 1, batch: 40501, sum loss: 5059.698242, avg loss: 2.638007, ppl: 13.985309 +epoch: 1, batch: 40502, sum loss: 4207.232422, avg loss: 2.717850, ppl: 15.147726 +epoch: 1, batch: 40503, sum loss: 4095.000244, avg loss: 2.606620, ppl: 13.553168 +epoch: 1, batch: 40504, sum loss: 4310.457031, avg loss: 2.747264, ppl: 15.599891 +epoch: 1, batch: 40505, sum loss: 4651.927246, avg loss: 2.587279, ppl: 13.293546 +epoch: 1, batch: 40506, sum loss: 4539.882812, avg loss: 2.619667, ppl: 13.731148 +epoch: 1, batch: 40507, sum loss: 5142.680664, avg loss: 2.657716, ppl: 14.263674 +epoch: 1, batch: 40508, sum loss: 4436.727051, avg loss: 2.752312, ppl: 15.678839 +epoch: 1, batch: 40509, sum loss: 3260.812744, avg loss: 2.251943, ppl: 9.506185 +epoch: 1, batch: 40510, sum loss: 4876.931641, avg loss: 2.778878, ppl: 16.100952 +epoch: 1, batch: 40511, sum loss: 3863.894531, avg loss: 2.373399, ppl: 10.733820 +epoch: 1, batch: 40512, sum loss: 3753.151855, avg loss: 2.336956, ppl: 10.349687 +epoch: 1, batch: 40513, sum loss: 3806.935547, avg loss: 2.315654, ppl: 10.131550 +epoch: 1, batch: 40514, sum loss: 4243.918457, avg loss: 2.694551, ppl: 14.798876 +epoch: 1, batch: 40515, sum loss: 4008.138672, avg loss: 2.418913, ppl: 11.233641 +epoch: 1, batch: 40516, sum loss: 4487.941406, avg loss: 2.541303, ppl: 12.696205 +epoch: 1, batch: 40517, sum loss: 4056.637695, avg loss: 2.688295, ppl: 14.706585 +epoch: 1, batch: 40518, sum loss: 4908.441895, avg loss: 2.771565, ppl: 15.983632 +epoch: 1, batch: 40519, sum loss: 4553.749023, avg loss: 2.873028, ppl: 17.690500 +epoch: 1, batch: 40520, sum loss: 4379.427734, avg loss: 2.393130, ppl: 10.947707 +epoch: 1, batch: 40521, sum loss: 3690.166260, avg loss: 2.390004, ppl: 10.913539 +epoch: 1, batch: 40522, sum loss: 4181.186523, avg loss: 2.312603, ppl: 10.100685 +epoch: 1, batch: 40523, sum loss: 4043.877197, avg loss: 2.543319, ppl: 12.721825 +epoch: 1, batch: 40524, sum loss: 4346.509766, avg loss: 2.788012, ppl: 16.248678 +epoch: 1, batch: 40525, sum loss: 4489.293457, avg loss: 2.457194, ppl: 11.672015 +epoch: 1, batch: 40526, sum loss: 3772.856934, avg loss: 2.535522, ppl: 12.623021 +epoch: 1, batch: 40527, sum loss: 4486.563965, avg loss: 2.588900, ppl: 13.315118 +epoch: 1, batch: 40528, sum loss: 4088.546387, avg loss: 2.531608, ppl: 12.573704 +epoch: 1, batch: 40529, sum loss: 4319.311035, avg loss: 2.466768, ppl: 11.784302 +epoch: 1, batch: 40530, sum loss: 5049.544434, avg loss: 2.822551, ppl: 16.819708 +epoch: 1, batch: 40531, sum loss: 5206.087402, avg loss: 2.910054, ppl: 18.357798 +epoch: 1, batch: 40532, sum loss: 4926.183594, avg loss: 2.773752, ppl: 16.018623 +epoch: 1, batch: 40533, sum loss: 3737.275879, avg loss: 2.688688, ppl: 14.712355 +epoch: 1, batch: 40534, sum loss: 5136.701660, avg loss: 2.760184, ppl: 15.802748 +epoch: 1, batch: 40535, sum loss: 3977.318604, avg loss: 2.423716, ppl: 11.287730 +epoch: 1, batch: 40536, sum loss: 4465.330078, avg loss: 2.528499, ppl: 12.534682 +epoch: 1, batch: 40537, sum loss: 4986.228516, avg loss: 2.484419, ppl: 11.994148 +epoch: 1, batch: 40538, sum loss: 3771.267822, avg loss: 2.310826, ppl: 10.082750 +epoch: 1, batch: 40539, sum loss: 4044.306641, avg loss: 2.516681, ppl: 12.387417 +epoch: 1, batch: 40540, sum loss: 3871.338867, avg loss: 2.416566, ppl: 11.207309 +epoch: 1, batch: 40541, sum loss: 5160.475098, avg loss: 2.543359, ppl: 12.722331 +epoch: 1, batch: 40542, sum loss: 4591.786133, avg loss: 2.613424, ppl: 13.645695 +epoch: 1, batch: 40543, sum loss: 3408.673584, avg loss: 2.614013, ppl: 13.653739 +epoch: 1, batch: 40544, sum loss: 4672.041016, avg loss: 2.784292, ppl: 16.188345 +epoch: 1, batch: 40545, sum loss: 3947.153564, avg loss: 2.629683, ppl: 13.869367 +epoch: 1, batch: 40546, sum loss: 3897.559082, avg loss: 2.463691, ppl: 11.748091 +epoch: 1, batch: 40547, sum loss: 3777.705566, avg loss: 2.449874, ppl: 11.586885 +epoch: 1, batch: 40548, sum loss: 3478.705322, avg loss: 2.333136, ppl: 10.310222 +epoch: 1, batch: 40549, sum loss: 3746.836670, avg loss: 2.585809, ppl: 13.274020 +epoch: 1, batch: 40550, sum loss: 4270.891602, avg loss: 2.593134, ppl: 13.371612 +epoch: 1, batch: 40551, sum loss: 5048.647461, avg loss: 2.918293, ppl: 18.509668 +epoch: 1, batch: 40552, sum loss: 4562.487793, avg loss: 2.493163, ppl: 12.099484 +epoch: 1, batch: 40553, sum loss: 4904.092773, avg loss: 2.525280, ppl: 12.494388 +epoch: 1, batch: 40554, sum loss: 3935.236816, avg loss: 2.299963, ppl: 9.973816 +epoch: 1, batch: 40555, sum loss: 4060.798584, avg loss: 2.313845, ppl: 10.113239 +epoch: 1, batch: 40556, sum loss: 4948.801758, avg loss: 2.752392, ppl: 15.680099 +epoch: 1, batch: 40557, sum loss: 4189.990723, avg loss: 2.437458, ppl: 11.443916 +epoch: 1, batch: 40558, sum loss: 4965.538574, avg loss: 2.634238, ppl: 13.932692 +epoch: 1, batch: 40559, sum loss: 4452.398438, avg loss: 2.573641, ppl: 13.113479 +epoch: 1, batch: 40560, sum loss: 4962.474121, avg loss: 2.945088, ppl: 19.012342 +epoch: 1, batch: 40561, sum loss: 4129.007812, avg loss: 2.749007, ppl: 15.627099 +epoch: 1, batch: 40562, sum loss: 3730.524414, avg loss: 2.502028, ppl: 12.207231 +epoch: 1, batch: 40563, sum loss: 3620.811523, avg loss: 2.321033, ppl: 10.186192 +epoch: 1, batch: 40564, sum loss: 4056.642090, avg loss: 2.473562, ppl: 11.864636 +epoch: 1, batch: 40565, sum loss: 4299.346680, avg loss: 2.655557, ppl: 14.232910 +epoch: 1, batch: 40566, sum loss: 3718.662598, avg loss: 2.325618, ppl: 10.232998 +epoch: 1, batch: 40567, sum loss: 4533.012695, avg loss: 2.719264, ppl: 15.169150 +epoch: 1, batch: 40568, sum loss: 4562.683594, avg loss: 2.748605, ppl: 15.620818 +epoch: 1, batch: 40569, sum loss: 3555.358398, avg loss: 2.360796, ppl: 10.599382 +epoch: 1, batch: 40570, sum loss: 4506.062012, avg loss: 2.577839, ppl: 13.168649 +epoch: 1, batch: 40571, sum loss: 5072.153809, avg loss: 2.960977, ppl: 19.316837 +epoch: 1, batch: 40572, sum loss: 3641.106934, avg loss: 2.365891, ppl: 10.653532 +epoch: 1, batch: 40573, sum loss: 4546.834473, avg loss: 2.496889, ppl: 12.144651 +epoch: 1, batch: 40574, sum loss: 3537.766113, avg loss: 2.545156, ppl: 12.745210 +epoch: 1, batch: 40575, sum loss: 4616.165039, avg loss: 2.606530, ppl: 13.551947 +epoch: 1, batch: 40576, sum loss: 3872.071289, avg loss: 2.446033, ppl: 11.542472 +epoch: 1, batch: 40577, sum loss: 4149.036133, avg loss: 2.575442, ppl: 13.137117 +epoch: 1, batch: 40578, sum loss: 4399.867188, avg loss: 2.562532, ppl: 12.968612 +epoch: 1, batch: 40579, sum loss: 3644.036377, avg loss: 2.511396, ppl: 12.322124 +epoch: 1, batch: 40580, sum loss: 3652.806641, avg loss: 2.483213, ppl: 11.979695 +epoch: 1, batch: 40581, sum loss: 5095.443359, avg loss: 2.916682, ppl: 18.479866 +epoch: 1, batch: 40582, sum loss: 5496.073242, avg loss: 2.806983, ppl: 16.559889 +epoch: 1, batch: 40583, sum loss: 4107.076660, avg loss: 2.554152, ppl: 12.860390 +epoch: 1, batch: 40584, sum loss: 3621.982910, avg loss: 2.458916, ppl: 11.692130 +epoch: 1, batch: 40585, sum loss: 3681.297852, avg loss: 2.376564, ppl: 10.767841 +epoch: 1, batch: 40586, sum loss: 4170.353027, avg loss: 2.549116, ppl: 12.795783 +epoch: 1, batch: 40587, sum loss: 4746.613770, avg loss: 2.802015, ppl: 16.477818 +epoch: 1, batch: 40588, sum loss: 4452.235352, avg loss: 2.579510, ppl: 13.190680 +epoch: 1, batch: 40589, sum loss: 5379.568359, avg loss: 2.911022, ppl: 18.375568 +epoch: 1, batch: 40590, sum loss: 4096.020508, avg loss: 2.577735, ppl: 13.167276 +epoch: 1, batch: 40591, sum loss: 5181.078613, avg loss: 2.875182, ppl: 17.728657 +epoch: 1, batch: 40592, sum loss: 4093.459717, avg loss: 2.668487, ppl: 14.418142 +epoch: 1, batch: 40593, sum loss: 4120.903809, avg loss: 2.398664, ppl: 11.008454 +epoch: 1, batch: 40594, sum loss: 4320.253906, avg loss: 2.551833, ppl: 12.830606 +epoch: 1, batch: 40595, sum loss: 4619.157227, avg loss: 2.687119, ppl: 14.689295 +epoch: 1, batch: 40596, sum loss: 4501.679199, avg loss: 2.573859, ppl: 13.116343 +epoch: 1, batch: 40597, sum loss: 4703.931152, avg loss: 2.546796, ppl: 12.766130 +epoch: 1, batch: 40598, sum loss: 4124.185059, avg loss: 2.764199, ppl: 15.866330 +epoch: 1, batch: 40599, sum loss: 4267.539062, avg loss: 2.446983, ppl: 11.553441 +epoch: 1, batch: 40600, sum loss: 5365.622070, avg loss: 2.632788, ppl: 13.912506 +epoch: 1, batch: 40601, sum loss: 4860.652344, avg loss: 2.865951, ppl: 17.565746 +epoch: 1, batch: 40602, sum loss: 4863.265625, avg loss: 2.839034, ppl: 17.099241 +epoch: 1, batch: 40603, sum loss: 4291.578125, avg loss: 2.749249, ppl: 15.630896 +epoch: 1, batch: 40604, sum loss: 4421.282715, avg loss: 2.564549, ppl: 12.994799 +epoch: 1, batch: 40605, sum loss: 3785.173828, avg loss: 2.478830, ppl: 11.927305 +epoch: 1, batch: 40606, sum loss: 4199.758789, avg loss: 2.496884, ppl: 12.144590 +epoch: 1, batch: 40607, sum loss: 3714.854004, avg loss: 2.390511, ppl: 10.919072 +epoch: 1, batch: 40608, sum loss: 4396.914062, avg loss: 2.434615, ppl: 11.411421 +epoch: 1, batch: 40609, sum loss: 4060.410889, avg loss: 2.576403, ppl: 13.149752 +epoch: 1, batch: 40610, sum loss: 4365.062500, avg loss: 2.627973, ppl: 13.845671 +epoch: 1, batch: 40611, sum loss: 5206.808594, avg loss: 2.856176, ppl: 17.394880 +epoch: 1, batch: 40612, sum loss: 4068.096924, avg loss: 2.660626, ppl: 14.305241 +epoch: 1, batch: 40613, sum loss: 4587.300781, avg loss: 2.706372, ppl: 14.974849 +epoch: 1, batch: 40614, sum loss: 3901.826660, avg loss: 2.408535, ppl: 11.117661 +epoch: 1, batch: 40615, sum loss: 5038.045898, avg loss: 2.607684, ppl: 13.567597 +epoch: 1, batch: 40616, sum loss: 4429.858887, avg loss: 2.590561, ppl: 13.337247 +epoch: 1, batch: 40617, sum loss: 4360.812012, avg loss: 2.588019, ppl: 13.303391 +epoch: 1, batch: 40618, sum loss: 4684.184570, avg loss: 2.734492, ppl: 15.401915 +epoch: 1, batch: 40619, sum loss: 3967.903320, avg loss: 2.447812, ppl: 11.563017 +epoch: 1, batch: 40620, sum loss: 4765.916016, avg loss: 2.698707, ppl: 14.860499 +epoch: 1, batch: 40621, sum loss: 4601.393555, avg loss: 2.721108, ppl: 15.197154 +epoch: 1, batch: 40622, sum loss: 5448.165039, avg loss: 2.697112, ppl: 14.836815 +epoch: 1, batch: 40623, sum loss: 3869.363281, avg loss: 2.433562, ppl: 11.399412 +epoch: 1, batch: 40624, sum loss: 4700.383301, avg loss: 2.768188, ppl: 15.929747 +epoch: 1, batch: 40625, sum loss: 4046.278564, avg loss: 2.394248, ppl: 10.959950 +epoch: 1, batch: 40626, sum loss: 5194.561523, avg loss: 2.661149, ppl: 14.312718 +epoch: 1, batch: 40627, sum loss: 4478.660645, avg loss: 2.596325, ppl: 13.414351 +epoch: 1, batch: 40628, sum loss: 4034.635010, avg loss: 2.581340, ppl: 13.214838 +epoch: 1, batch: 40629, sum loss: 4980.663086, avg loss: 2.802849, ppl: 16.491570 +epoch: 1, batch: 40630, sum loss: 3877.415527, avg loss: 2.490312, ppl: 12.065038 +epoch: 1, batch: 40631, sum loss: 5932.532227, avg loss: 3.053285, ppl: 21.184820 +epoch: 1, batch: 40632, sum loss: 4607.592773, avg loss: 2.490591, ppl: 12.068404 +epoch: 1, batch: 40633, sum loss: 4068.103027, avg loss: 2.638199, ppl: 13.987989 +epoch: 1, batch: 40634, sum loss: 3987.860840, avg loss: 2.311804, ppl: 10.092611 +epoch: 1, batch: 40635, sum loss: 4256.551758, avg loss: 2.648757, ppl: 14.136449 +epoch: 1, batch: 40636, sum loss: 3874.791748, avg loss: 2.581474, ppl: 13.216599 +epoch: 1, batch: 40637, sum loss: 4296.494141, avg loss: 2.514040, ppl: 12.354742 +epoch: 1, batch: 40638, sum loss: 4919.307617, avg loss: 2.455970, ppl: 11.657734 +epoch: 1, batch: 40639, sum loss: 4342.341309, avg loss: 2.634916, ppl: 13.942139 +epoch: 1, batch: 40640, sum loss: 4309.281738, avg loss: 2.664986, ppl: 14.367743 +epoch: 1, batch: 40641, sum loss: 4023.135498, avg loss: 2.417750, ppl: 11.220583 +epoch: 1, batch: 40642, sum loss: 5068.912598, avg loss: 2.756342, ppl: 15.742151 +epoch: 1, batch: 40643, sum loss: 4309.900879, avg loss: 2.652247, ppl: 14.185875 +epoch: 1, batch: 40644, sum loss: 4881.389648, avg loss: 2.640016, ppl: 14.013429 +epoch: 1, batch: 40645, sum loss: 4873.905273, avg loss: 2.827091, ppl: 16.896246 +epoch: 1, batch: 40646, sum loss: 5232.433594, avg loss: 2.702703, ppl: 14.920010 +epoch: 1, batch: 40647, sum loss: 3408.578613, avg loss: 2.363786, ppl: 10.631120 +epoch: 1, batch: 40648, sum loss: 4007.199219, avg loss: 2.610553, ppl: 13.606577 +epoch: 1, batch: 40649, sum loss: 4149.397461, avg loss: 2.501144, ppl: 12.196435 +epoch: 1, batch: 40650, sum loss: 4838.496094, avg loss: 2.798436, ppl: 16.418951 +epoch: 1, batch: 40651, sum loss: 3910.817383, avg loss: 2.389015, ppl: 10.902749 +epoch: 1, batch: 40652, sum loss: 3576.029785, avg loss: 2.445985, ppl: 11.541911 +epoch: 1, batch: 40653, sum loss: 3655.970703, avg loss: 2.325681, ppl: 10.233649 +epoch: 1, batch: 40654, sum loss: 4596.113281, avg loss: 2.722816, ppl: 15.223130 +epoch: 1, batch: 40655, sum loss: 3614.058838, avg loss: 2.275856, ppl: 9.736247 +epoch: 1, batch: 40656, sum loss: 4494.440430, avg loss: 2.532079, ppl: 12.579632 +epoch: 1, batch: 40657, sum loss: 4595.308594, avg loss: 2.668588, ppl: 14.419593 +epoch: 1, batch: 40658, sum loss: 4705.419922, avg loss: 2.746888, ppl: 15.594034 +epoch: 1, batch: 40659, sum loss: 3901.169678, avg loss: 2.486405, ppl: 12.017995 +epoch: 1, batch: 40660, sum loss: 4821.982910, avg loss: 2.613541, ppl: 13.647289 +epoch: 1, batch: 40661, sum loss: 4779.282715, avg loss: 2.751458, ppl: 15.665458 +epoch: 1, batch: 40662, sum loss: 4495.292969, avg loss: 2.604457, ppl: 13.523882 +epoch: 1, batch: 40663, sum loss: 5418.186523, avg loss: 2.795762, ppl: 16.375103 +epoch: 1, batch: 40664, sum loss: 3940.371094, avg loss: 2.687838, ppl: 14.699865 +epoch: 1, batch: 40665, sum loss: 3835.020752, avg loss: 2.598253, ppl: 13.440231 +epoch: 1, batch: 40666, sum loss: 4630.331055, avg loss: 2.653485, ppl: 14.203449 +epoch: 1, batch: 40667, sum loss: 4855.020020, avg loss: 2.855894, ppl: 17.389982 +epoch: 1, batch: 40668, sum loss: 4350.699219, avg loss: 2.452480, ppl: 11.617117 +epoch: 1, batch: 40669, sum loss: 3906.806885, avg loss: 2.354917, ppl: 10.537252 +epoch: 1, batch: 40670, sum loss: 5180.526855, avg loss: 2.710898, ppl: 15.042784 +epoch: 1, batch: 40671, sum loss: 3644.920898, avg loss: 2.368370, ppl: 10.679965 +epoch: 1, batch: 40672, sum loss: 3661.353027, avg loss: 2.551466, ppl: 12.825887 +epoch: 1, batch: 40673, sum loss: 4846.967285, avg loss: 2.648616, ppl: 14.134464 +epoch: 1, batch: 40674, sum loss: 4563.269531, avg loss: 2.681122, ppl: 14.601464 +epoch: 1, batch: 40675, sum loss: 3542.741699, avg loss: 2.379276, ppl: 10.797081 +epoch: 1, batch: 40676, sum loss: 4247.810547, avg loss: 2.513497, ppl: 12.348040 +epoch: 1, batch: 40677, sum loss: 5116.364258, avg loss: 2.747779, ppl: 15.607926 +epoch: 1, batch: 40678, sum loss: 4683.213867, avg loss: 2.528733, ppl: 12.537611 +epoch: 1, batch: 40679, sum loss: 4743.718262, avg loss: 2.712246, ppl: 15.063068 +epoch: 1, batch: 40680, sum loss: 3953.886963, avg loss: 2.466554, ppl: 11.781781 +epoch: 1, batch: 40681, sum loss: 4477.236816, avg loss: 2.748457, ppl: 15.618517 +epoch: 1, batch: 40682, sum loss: 4276.656250, avg loss: 2.672910, ppl: 14.482054 +epoch: 1, batch: 40683, sum loss: 3976.998535, avg loss: 2.490294, ppl: 12.064816 +epoch: 1, batch: 40684, sum loss: 4393.450195, avg loss: 2.592006, ppl: 13.356538 +epoch: 1, batch: 40685, sum loss: 4759.972168, avg loss: 2.582731, ppl: 13.233223 +epoch: 1, batch: 40686, sum loss: 4864.288086, avg loss: 2.773254, ppl: 16.010651 +epoch: 1, batch: 40687, sum loss: 5031.571777, avg loss: 2.673524, ppl: 14.490940 +epoch: 1, batch: 40688, sum loss: 4094.517578, avg loss: 2.515060, ppl: 12.367351 +epoch: 1, batch: 40689, sum loss: 4016.422852, avg loss: 2.507130, ppl: 12.269670 +epoch: 1, batch: 40690, sum loss: 3218.913818, avg loss: 2.265245, ppl: 9.633489 +epoch: 1, batch: 40691, sum loss: 3434.210205, avg loss: 2.202829, ppl: 9.050580 +epoch: 1, batch: 40692, sum loss: 5496.385742, avg loss: 2.868677, ppl: 17.613707 +epoch: 1, batch: 40693, sum loss: 4401.157227, avg loss: 2.588916, ppl: 13.315331 +epoch: 1, batch: 40694, sum loss: 4088.651611, avg loss: 2.430827, ppl: 11.368284 +epoch: 1, batch: 40695, sum loss: 4050.116699, avg loss: 2.629946, ppl: 13.873017 +epoch: 1, batch: 40696, sum loss: 4832.423340, avg loss: 2.655178, ppl: 14.227516 +epoch: 1, batch: 40697, sum loss: 3645.283203, avg loss: 2.362465, ppl: 10.617089 +epoch: 1, batch: 40698, sum loss: 4392.634766, avg loss: 2.533238, ppl: 12.594219 +epoch: 1, batch: 40699, sum loss: 3813.982178, avg loss: 2.391212, ppl: 10.926723 +epoch: 1, batch: 40700, sum loss: 4208.253418, avg loss: 2.790619, ppl: 16.291100 +epoch: 1, batch: 40701, sum loss: 4373.872070, avg loss: 2.827325, ppl: 16.900198 +epoch: 1, batch: 40702, sum loss: 4509.328613, avg loss: 2.590080, ppl: 13.330832 +epoch: 1, batch: 40703, sum loss: 4643.734375, avg loss: 2.489938, ppl: 12.060529 +epoch: 1, batch: 40704, sum loss: 4284.403809, avg loss: 2.588764, ppl: 13.313302 +epoch: 1, batch: 40705, sum loss: 5069.799805, avg loss: 2.573503, ppl: 13.111669 +epoch: 1, batch: 40706, sum loss: 4142.038086, avg loss: 2.600149, ppl: 13.465750 +epoch: 1, batch: 40707, sum loss: 3722.119873, avg loss: 2.392108, ppl: 10.936523 +epoch: 1, batch: 40708, sum loss: 4022.580566, avg loss: 2.809065, ppl: 16.594393 +epoch: 1, batch: 40709, sum loss: 4291.193848, avg loss: 2.487649, ppl: 12.032950 +epoch: 1, batch: 40710, sum loss: 4166.009766, avg loss: 2.481245, ppl: 11.956136 +epoch: 1, batch: 40711, sum loss: 6283.041504, avg loss: 3.116588, ppl: 22.569244 +epoch: 1, batch: 40712, sum loss: 4827.182129, avg loss: 2.714951, ppl: 15.103867 +epoch: 1, batch: 40713, sum loss: 4099.569824, avg loss: 2.513531, ppl: 12.348461 +epoch: 1, batch: 40714, sum loss: 4509.642090, avg loss: 2.571062, ppl: 13.079702 +epoch: 1, batch: 40715, sum loss: 3971.804199, avg loss: 2.497990, ppl: 12.158033 +epoch: 1, batch: 40716, sum loss: 4907.647461, avg loss: 2.805973, ppl: 16.543169 +epoch: 1, batch: 40717, sum loss: 3952.497070, avg loss: 2.492117, ppl: 12.086833 +epoch: 1, batch: 40718, sum loss: 4155.455566, avg loss: 2.428671, ppl: 11.343795 +epoch: 1, batch: 40719, sum loss: 3716.891602, avg loss: 2.595595, ppl: 13.404559 +epoch: 1, batch: 40720, sum loss: 5000.261719, avg loss: 2.671080, ppl: 14.455574 +epoch: 1, batch: 40721, sum loss: 3185.133057, avg loss: 2.218059, ppl: 9.189480 +epoch: 1, batch: 40722, sum loss: 4536.070312, avg loss: 2.628082, ppl: 13.847189 +epoch: 1, batch: 40723, sum loss: 4672.059570, avg loss: 2.633630, ppl: 13.924220 +epoch: 1, batch: 40724, sum loss: 5080.106445, avg loss: 2.756433, ppl: 15.743589 +epoch: 1, batch: 40725, sum loss: 3710.870850, avg loss: 2.353120, ppl: 10.518339 +epoch: 1, batch: 40726, sum loss: 5167.041016, avg loss: 2.725233, ppl: 15.259967 +epoch: 1, batch: 40727, sum loss: 4401.042480, avg loss: 2.675406, ppl: 14.518239 +epoch: 1, batch: 40728, sum loss: 4855.531250, avg loss: 2.653296, ppl: 14.200764 +epoch: 1, batch: 40729, sum loss: 4049.418945, avg loss: 2.466151, ppl: 11.777024 +epoch: 1, batch: 40730, sum loss: 3976.164551, avg loss: 2.322526, ppl: 10.201410 +epoch: 1, batch: 40731, sum loss: 3679.200195, avg loss: 2.474243, ppl: 11.872721 +epoch: 1, batch: 40732, sum loss: 3407.379150, avg loss: 2.635251, ppl: 13.946807 +epoch: 1, batch: 40733, sum loss: 4277.318848, avg loss: 2.768491, ppl: 15.934575 +epoch: 1, batch: 40734, sum loss: 4703.653809, avg loss: 2.603018, ppl: 13.504434 +epoch: 1, batch: 40735, sum loss: 3885.609863, avg loss: 2.411924, ppl: 11.155408 +epoch: 1, batch: 40736, sum loss: 3286.634033, avg loss: 2.134178, ppl: 8.450097 +epoch: 1, batch: 40737, sum loss: 5249.633301, avg loss: 2.876511, ppl: 17.752234 +epoch: 1, batch: 40738, sum loss: 4686.202148, avg loss: 2.612153, ppl: 13.628359 +epoch: 1, batch: 40739, sum loss: 5432.694336, avg loss: 2.749339, ppl: 15.632301 +epoch: 1, batch: 40740, sum loss: 4007.963867, avg loss: 2.378613, ppl: 10.789932 +epoch: 1, batch: 40741, sum loss: 3604.548584, avg loss: 2.585759, ppl: 13.273365 +epoch: 1, batch: 40742, sum loss: 4400.315430, avg loss: 2.602197, ppl: 13.493356 +epoch: 1, batch: 40743, sum loss: 4222.162109, avg loss: 2.615962, ppl: 13.680367 +epoch: 1, batch: 40744, sum loss: 4500.375488, avg loss: 2.525463, ppl: 12.496684 +epoch: 1, batch: 40745, sum loss: 4273.752930, avg loss: 2.531844, ppl: 12.576678 +epoch: 1, batch: 40746, sum loss: 3640.894775, avg loss: 2.365754, ppl: 10.652063 +epoch: 1, batch: 40747, sum loss: 4571.728516, avg loss: 2.742489, ppl: 15.525575 +epoch: 1, batch: 40748, sum loss: 3800.788818, avg loss: 2.540634, ppl: 12.687717 +epoch: 1, batch: 40749, sum loss: 3798.446045, avg loss: 2.495694, ppl: 12.130148 +epoch: 1, batch: 40750, sum loss: 4702.617676, avg loss: 2.704208, ppl: 14.942479 +epoch: 1, batch: 40751, sum loss: 5075.893066, avg loss: 2.681401, ppl: 14.605549 +epoch: 1, batch: 40752, sum loss: 4782.673828, avg loss: 2.740787, ppl: 15.499183 +epoch: 1, batch: 40753, sum loss: 4763.664062, avg loss: 2.554243, ppl: 12.861564 +epoch: 1, batch: 40754, sum loss: 3943.148682, avg loss: 2.595885, ppl: 13.408442 +epoch: 1, batch: 40755, sum loss: 4170.233887, avg loss: 2.571044, ppl: 13.079475 +epoch: 1, batch: 40756, sum loss: 4191.088379, avg loss: 2.491729, ppl: 12.082149 +epoch: 1, batch: 40757, sum loss: 4288.207031, avg loss: 2.519511, ppl: 12.422515 +epoch: 1, batch: 40758, sum loss: 4944.347656, avg loss: 2.746860, ppl: 15.593588 +epoch: 1, batch: 40759, sum loss: 4576.851562, avg loss: 2.624341, ppl: 13.795486 +epoch: 1, batch: 40760, sum loss: 4375.198242, avg loss: 2.684171, ppl: 14.646050 +epoch: 1, batch: 40761, sum loss: 4943.975586, avg loss: 2.532774, ppl: 12.588384 +epoch: 1, batch: 40762, sum loss: 4186.355469, avg loss: 2.475669, ppl: 11.889655 +epoch: 1, batch: 40763, sum loss: 3727.514160, avg loss: 2.466919, ppl: 11.786075 +epoch: 1, batch: 40764, sum loss: 6102.992676, avg loss: 2.886941, ppl: 17.938347 +epoch: 1, batch: 40765, sum loss: 4044.243652, avg loss: 2.513514, ppl: 12.348243 +epoch: 1, batch: 40766, sum loss: 5048.601074, avg loss: 2.950673, ppl: 19.118814 +epoch: 1, batch: 40767, sum loss: 4094.718262, avg loss: 2.447531, ppl: 11.559765 +epoch: 1, batch: 40768, sum loss: 3662.659668, avg loss: 2.369120, ppl: 10.687984 +epoch: 1, batch: 40769, sum loss: 2850.363770, avg loss: 2.053576, ppl: 7.795731 +epoch: 1, batch: 40770, sum loss: 4307.025391, avg loss: 2.731151, ppl: 15.350551 +epoch: 1, batch: 40771, sum loss: 4693.381348, avg loss: 2.868815, ppl: 17.616135 +epoch: 1, batch: 40772, sum loss: 4591.890625, avg loss: 2.559582, ppl: 12.930417 +epoch: 1, batch: 40773, sum loss: 4817.358887, avg loss: 2.585807, ppl: 13.273998 +epoch: 1, batch: 40774, sum loss: 3546.140381, avg loss: 2.599810, ppl: 13.461175 +epoch: 1, batch: 40775, sum loss: 5353.413086, avg loss: 2.607605, ppl: 13.566520 +epoch: 1, batch: 40776, sum loss: 4663.890625, avg loss: 2.556957, ppl: 12.896520 +epoch: 1, batch: 40777, sum loss: 4154.874023, avg loss: 2.425496, ppl: 11.307833 +epoch: 1, batch: 40778, sum loss: 4688.681641, avg loss: 2.507316, ppl: 12.271953 +epoch: 1, batch: 40779, sum loss: 4035.103027, avg loss: 2.578341, ppl: 13.175256 +epoch: 1, batch: 40780, sum loss: 4330.341309, avg loss: 2.562332, ppl: 12.966021 +epoch: 1, batch: 40781, sum loss: 3825.671875, avg loss: 2.424380, ppl: 11.295225 +epoch: 1, batch: 40782, sum loss: 3608.297852, avg loss: 2.500553, ppl: 12.189232 +epoch: 1, batch: 40783, sum loss: 4283.325195, avg loss: 2.634271, ppl: 13.933157 +epoch: 1, batch: 40784, sum loss: 4022.888184, avg loss: 2.671241, ppl: 14.457894 +epoch: 1, batch: 40785, sum loss: 3525.879395, avg loss: 2.274761, ppl: 9.725594 +epoch: 1, batch: 40786, sum loss: 3356.971680, avg loss: 2.369070, ppl: 10.687444 +epoch: 1, batch: 40787, sum loss: 2940.695557, avg loss: 2.060754, ppl: 7.851886 +epoch: 1, batch: 40788, sum loss: 3818.926758, avg loss: 2.417042, ppl: 11.212646 +epoch: 1, batch: 40789, sum loss: 5407.277344, avg loss: 2.621075, ppl: 13.750496 +epoch: 1, batch: 40790, sum loss: 3702.750488, avg loss: 2.471796, ppl: 11.843699 +epoch: 1, batch: 40791, sum loss: 4224.792480, avg loss: 2.425254, ppl: 11.305099 +epoch: 1, batch: 40792, sum loss: 3255.799561, avg loss: 2.217847, ppl: 9.187530 +epoch: 1, batch: 40793, sum loss: 3710.615234, avg loss: 2.358942, ppl: 10.579747 +epoch: 1, batch: 40794, sum loss: 4128.353516, avg loss: 2.596449, ppl: 13.416009 +epoch: 1, batch: 40795, sum loss: 3351.361328, avg loss: 2.295453, ppl: 9.928933 +epoch: 1, batch: 40796, sum loss: 4358.565430, avg loss: 2.605239, ppl: 13.534465 +epoch: 1, batch: 40797, sum loss: 6061.749023, avg loss: 2.852588, ppl: 17.332579 +epoch: 1, batch: 40798, sum loss: 4603.002441, avg loss: 2.788009, ppl: 16.248636 +epoch: 1, batch: 40799, sum loss: 5877.539062, avg loss: 2.918341, ppl: 18.510555 +epoch: 1, batch: 40800, sum loss: 3750.903320, avg loss: 2.350190, ppl: 10.487561 +epoch: 1, batch: 40801, sum loss: 4313.650391, avg loss: 2.694348, ppl: 14.795863 +epoch: 1, batch: 40802, sum loss: 4903.892578, avg loss: 2.725899, ppl: 15.270139 +epoch: 1, batch: 40803, sum loss: 4771.731934, avg loss: 2.577921, ppl: 13.169732 +epoch: 1, batch: 40804, sum loss: 4943.721680, avg loss: 2.854343, ppl: 17.363020 +epoch: 1, batch: 40805, sum loss: 3721.351074, avg loss: 2.200681, ppl: 9.031159 +epoch: 1, batch: 40806, sum loss: 4013.465576, avg loss: 2.432403, ppl: 11.386214 +epoch: 1, batch: 40807, sum loss: 3737.786377, avg loss: 2.321606, ppl: 10.192034 +epoch: 1, batch: 40808, sum loss: 4155.722656, avg loss: 2.440236, ppl: 11.475753 +epoch: 1, batch: 40809, sum loss: 5234.243652, avg loss: 2.702242, ppl: 14.913136 +epoch: 1, batch: 40810, sum loss: 4532.052734, avg loss: 2.785527, ppl: 16.208361 +epoch: 1, batch: 40811, sum loss: 4376.545410, avg loss: 2.569903, ppl: 13.064562 +epoch: 1, batch: 40812, sum loss: 4484.504883, avg loss: 2.584729, ppl: 13.259695 +epoch: 1, batch: 40813, sum loss: 3898.669434, avg loss: 2.650353, ppl: 14.159039 +epoch: 1, batch: 40814, sum loss: 3897.639648, avg loss: 2.442130, ppl: 11.497505 +epoch: 1, batch: 40815, sum loss: 3766.630371, avg loss: 2.260883, ppl: 9.591551 +epoch: 1, batch: 40816, sum loss: 4341.011230, avg loss: 2.642125, ppl: 14.043012 +epoch: 1, batch: 40817, sum loss: 4279.831543, avg loss: 2.823108, ppl: 16.829073 +epoch: 1, batch: 40818, sum loss: 3313.910156, avg loss: 2.314183, ppl: 10.116656 +epoch: 1, batch: 40819, sum loss: 4556.166992, avg loss: 2.551045, ppl: 12.820499 +epoch: 1, batch: 40820, sum loss: 4332.840820, avg loss: 2.423289, ppl: 11.282908 +epoch: 1, batch: 40821, sum loss: 3867.018066, avg loss: 2.748414, ppl: 15.617839 +epoch: 1, batch: 40822, sum loss: 5020.966309, avg loss: 2.693651, ppl: 14.785566 +epoch: 1, batch: 40823, sum loss: 3861.377930, avg loss: 2.447008, ppl: 11.553722 +epoch: 1, batch: 40824, sum loss: 4489.777832, avg loss: 2.734335, ppl: 15.399499 +epoch: 1, batch: 40825, sum loss: 4655.726074, avg loss: 2.692728, ppl: 14.771915 +epoch: 1, batch: 40826, sum loss: 4103.432129, avg loss: 2.442519, ppl: 11.501980 +epoch: 1, batch: 40827, sum loss: 4557.301758, avg loss: 2.574747, ppl: 13.127994 +epoch: 1, batch: 40828, sum loss: 3839.349121, avg loss: 2.456397, ppl: 11.662719 +epoch: 1, batch: 40829, sum loss: 3777.866943, avg loss: 2.527001, ppl: 12.515917 +epoch: 1, batch: 40830, sum loss: 4005.085449, avg loss: 2.546144, ppl: 12.757821 +epoch: 1, batch: 40831, sum loss: 4719.888672, avg loss: 2.517274, ppl: 12.394761 +epoch: 1, batch: 40832, sum loss: 4348.482910, avg loss: 2.694227, ppl: 14.794086 +epoch: 1, batch: 40833, sum loss: 3304.020264, avg loss: 2.429427, ppl: 11.352371 +epoch: 1, batch: 40834, sum loss: 4576.238770, avg loss: 2.652892, ppl: 14.195033 +epoch: 1, batch: 40835, sum loss: 4929.275391, avg loss: 2.554029, ppl: 12.858805 +epoch: 1, batch: 40836, sum loss: 4479.879883, avg loss: 2.416332, ppl: 11.204685 +epoch: 1, batch: 40837, sum loss: 4320.815918, avg loss: 2.567330, ppl: 13.030981 +epoch: 1, batch: 40838, sum loss: 3997.930176, avg loss: 2.497146, ppl: 12.147771 +epoch: 1, batch: 40839, sum loss: 4157.130371, avg loss: 2.582068, ppl: 13.224464 +epoch: 1, batch: 40840, sum loss: 3439.218506, avg loss: 2.197584, ppl: 9.003232 +epoch: 1, batch: 40841, sum loss: 4116.774902, avg loss: 2.407471, ppl: 11.105836 +epoch: 1, batch: 40842, sum loss: 4509.478027, avg loss: 2.546289, ppl: 12.759665 +epoch: 1, batch: 40843, sum loss: 4353.298828, avg loss: 2.499023, ppl: 12.170603 +epoch: 1, batch: 40844, sum loss: 3614.099121, avg loss: 2.307854, ppl: 10.052825 +epoch: 1, batch: 40845, sum loss: 3021.995361, avg loss: 2.017353, ppl: 7.518400 +epoch: 1, batch: 40846, sum loss: 4739.954102, avg loss: 2.680970, ppl: 14.599243 +epoch: 1, batch: 40847, sum loss: 4127.034180, avg loss: 2.595619, ppl: 13.404881 +epoch: 1, batch: 40848, sum loss: 4879.890625, avg loss: 2.518003, ppl: 12.403808 +epoch: 1, batch: 40849, sum loss: 4509.993164, avg loss: 2.522367, ppl: 12.458055 +epoch: 1, batch: 40850, sum loss: 3883.295410, avg loss: 2.484514, ppl: 11.995289 +epoch: 1, batch: 40851, sum loss: 4136.303223, avg loss: 2.531397, ppl: 12.571060 +epoch: 1, batch: 40852, sum loss: 3769.193359, avg loss: 2.486275, ppl: 12.016437 +epoch: 1, batch: 40853, sum loss: 4983.758301, avg loss: 2.857659, ppl: 17.420704 +epoch: 1, batch: 40854, sum loss: 4544.518066, avg loss: 2.538837, ppl: 12.664929 +epoch: 1, batch: 40855, sum loss: 4343.196777, avg loss: 2.410209, ppl: 11.136291 +epoch: 1, batch: 40856, sum loss: 4816.423828, avg loss: 2.637691, ppl: 13.980885 +epoch: 1, batch: 40857, sum loss: 4058.926270, avg loss: 2.391825, ppl: 10.933426 +epoch: 1, batch: 40858, sum loss: 4005.613281, avg loss: 2.498823, ppl: 12.168162 +epoch: 1, batch: 40859, sum loss: 5463.583984, avg loss: 2.860515, ppl: 17.470524 +epoch: 1, batch: 40860, sum loss: 5468.414551, avg loss: 2.750711, ppl: 15.653765 +epoch: 1, batch: 40861, sum loss: 4478.198242, avg loss: 2.611194, ppl: 13.615299 +epoch: 1, batch: 40862, sum loss: 3274.349365, avg loss: 2.384814, ppl: 10.857040 +epoch: 1, batch: 40863, sum loss: 4074.987793, avg loss: 2.480211, ppl: 11.943790 +epoch: 1, batch: 40864, sum loss: 5572.010254, avg loss: 2.854514, ppl: 17.365990 +epoch: 1, batch: 40865, sum loss: 5040.641602, avg loss: 2.777213, ppl: 16.074162 +epoch: 1, batch: 40866, sum loss: 4270.316406, avg loss: 2.396362, ppl: 10.983145 +epoch: 1, batch: 40867, sum loss: 4150.537109, avg loss: 2.566813, ppl: 13.024256 +epoch: 1, batch: 40868, sum loss: 3822.940918, avg loss: 2.480818, ppl: 11.951039 +epoch: 1, batch: 40869, sum loss: 4737.673340, avg loss: 2.699529, ppl: 14.872724 +epoch: 1, batch: 40870, sum loss: 3598.475830, avg loss: 2.270332, ppl: 9.682611 +epoch: 1, batch: 40871, sum loss: 4307.263672, avg loss: 2.719232, ppl: 15.168670 +epoch: 1, batch: 40872, sum loss: 3787.182129, avg loss: 2.541733, ppl: 12.701664 +epoch: 1, batch: 40873, sum loss: 4811.757812, avg loss: 2.494431, ppl: 12.114841 +epoch: 1, batch: 40874, sum loss: 3351.412354, avg loss: 2.172011, ppl: 8.775911 +epoch: 1, batch: 40875, sum loss: 3519.611572, avg loss: 2.343283, ppl: 10.415376 +epoch: 1, batch: 40876, sum loss: 4821.965820, avg loss: 2.680359, ppl: 14.590328 +epoch: 1, batch: 40877, sum loss: 4764.294922, avg loss: 2.700847, ppl: 14.892346 +epoch: 1, batch: 40878, sum loss: 4528.752930, avg loss: 2.620806, ppl: 13.746802 +epoch: 1, batch: 40879, sum loss: 4232.380371, avg loss: 2.591782, ppl: 13.353548 +epoch: 1, batch: 40880, sum loss: 4379.910645, avg loss: 2.621132, ppl: 13.751277 +epoch: 1, batch: 40881, sum loss: 5233.505371, avg loss: 2.713067, ppl: 15.075435 +epoch: 1, batch: 40882, sum loss: 4342.451172, avg loss: 2.583255, ppl: 13.240159 +epoch: 1, batch: 40883, sum loss: 3967.149414, avg loss: 2.678696, ppl: 14.566092 +epoch: 1, batch: 40884, sum loss: 4537.472168, avg loss: 2.520818, ppl: 12.438767 +epoch: 1, batch: 40885, sum loss: 5134.306152, avg loss: 2.657508, ppl: 14.260713 +epoch: 1, batch: 40886, sum loss: 4987.248535, avg loss: 2.576058, ppl: 13.145216 +epoch: 1, batch: 40887, sum loss: 3872.546143, avg loss: 2.443247, ppl: 11.510352 +epoch: 1, batch: 40888, sum loss: 4277.358398, avg loss: 2.489731, ppl: 12.058036 +epoch: 1, batch: 40889, sum loss: 5680.205078, avg loss: 2.802272, ppl: 16.482048 +epoch: 1, batch: 40890, sum loss: 4159.596680, avg loss: 2.575602, ppl: 13.139219 +epoch: 1, batch: 40891, sum loss: 5180.234375, avg loss: 2.913518, ppl: 18.421486 +epoch: 1, batch: 40892, sum loss: 3050.945068, avg loss: 2.171491, ppl: 8.771354 +epoch: 1, batch: 40893, sum loss: 3814.662598, avg loss: 2.363484, ppl: 10.627912 +epoch: 1, batch: 40894, sum loss: 4234.669922, avg loss: 2.591597, ppl: 13.351081 +epoch: 1, batch: 40895, sum loss: 4001.623535, avg loss: 2.655357, ppl: 14.230070 +epoch: 1, batch: 40896, sum loss: 3666.507568, avg loss: 2.418541, ppl: 11.229461 +epoch: 1, batch: 40897, sum loss: 4238.368652, avg loss: 2.533394, ppl: 12.596189 +epoch: 1, batch: 40898, sum loss: 3888.160889, avg loss: 2.428583, ppl: 11.342797 +epoch: 1, batch: 40899, sum loss: 4320.723145, avg loss: 2.567275, ppl: 13.030263 +epoch: 1, batch: 40900, sum loss: 4000.653320, avg loss: 2.618229, ppl: 13.711414 +epoch: 1, batch: 40901, sum loss: 5702.143066, avg loss: 2.851072, ppl: 17.306316 +epoch: 1, batch: 40902, sum loss: 4256.511230, avg loss: 2.632351, ppl: 13.906425 +epoch: 1, batch: 40903, sum loss: 5022.513672, avg loss: 2.586258, ppl: 13.279990 +epoch: 1, batch: 40904, sum loss: 3966.188965, avg loss: 2.532688, ppl: 12.587291 +epoch: 1, batch: 40905, sum loss: 4878.754883, avg loss: 2.682108, ppl: 14.615873 +epoch: 1, batch: 40906, sum loss: 3744.683350, avg loss: 2.674774, ppl: 14.509066 +epoch: 1, batch: 40907, sum loss: 4325.460449, avg loss: 2.681625, ppl: 14.608809 +epoch: 1, batch: 40908, sum loss: 3856.554688, avg loss: 2.376189, ppl: 10.763803 +epoch: 1, batch: 40909, sum loss: 3697.853027, avg loss: 2.352324, ppl: 10.509964 +epoch: 1, batch: 40910, sum loss: 4513.838867, avg loss: 2.815870, ppl: 16.707699 +epoch: 1, batch: 40911, sum loss: 4049.989014, avg loss: 2.753222, ppl: 15.693110 +epoch: 1, batch: 40912, sum loss: 4041.714844, avg loss: 2.352570, ppl: 10.512550 +epoch: 1, batch: 40913, sum loss: 5059.757812, avg loss: 2.681377, ppl: 14.605186 +epoch: 1, batch: 40914, sum loss: 3682.851807, avg loss: 2.413402, ppl: 11.171906 +epoch: 1, batch: 40915, sum loss: 4301.216797, avg loss: 2.522708, ppl: 12.462298 +epoch: 1, batch: 40916, sum loss: 4124.961426, avg loss: 2.717366, ppl: 15.140389 +epoch: 1, batch: 40917, sum loss: 4449.812500, avg loss: 2.581098, ppl: 13.211635 +epoch: 1, batch: 40918, sum loss: 4221.630371, avg loss: 2.361091, ppl: 10.602511 +epoch: 1, batch: 40919, sum loss: 4196.951172, avg loss: 2.668119, ppl: 14.412832 +epoch: 1, batch: 40920, sum loss: 4591.934570, avg loss: 2.584094, ppl: 13.251275 +epoch: 1, batch: 40921, sum loss: 3924.245850, avg loss: 2.382663, ppl: 10.833713 +epoch: 1, batch: 40922, sum loss: 3602.438721, avg loss: 2.453977, ppl: 11.634529 +epoch: 1, batch: 40923, sum loss: 5160.891602, avg loss: 2.779155, ppl: 16.105410 +epoch: 1, batch: 40924, sum loss: 4096.517578, avg loss: 2.658350, ppl: 14.272723 +epoch: 1, batch: 40925, sum loss: 4553.186523, avg loss: 2.656468, ppl: 14.245886 +epoch: 1, batch: 40926, sum loss: 4930.244141, avg loss: 2.762042, ppl: 15.832132 +epoch: 1, batch: 40927, sum loss: 3908.983398, avg loss: 2.494565, ppl: 12.116465 +epoch: 1, batch: 40928, sum loss: 3522.462891, avg loss: 2.508877, ppl: 12.291117 +epoch: 1, batch: 40929, sum loss: 4832.170898, avg loss: 3.001348, ppl: 20.112637 +epoch: 1, batch: 40930, sum loss: 3705.188477, avg loss: 2.401289, ppl: 11.037392 +epoch: 1, batch: 40931, sum loss: 3531.539062, avg loss: 2.348098, ppl: 10.465643 +epoch: 1, batch: 40932, sum loss: 4418.537598, avg loss: 2.591518, ppl: 13.350018 +epoch: 1, batch: 40933, sum loss: 5049.169922, avg loss: 2.570861, ppl: 13.077074 +epoch: 1, batch: 40934, sum loss: 4208.613281, avg loss: 2.521638, ppl: 12.448967 +epoch: 1, batch: 40935, sum loss: 4005.092041, avg loss: 2.575622, ppl: 13.139488 +epoch: 1, batch: 40936, sum loss: 4588.708008, avg loss: 2.617632, ppl: 13.703232 +epoch: 1, batch: 40937, sum loss: 3536.415527, avg loss: 2.409002, ppl: 11.122858 +epoch: 1, batch: 40938, sum loss: 4029.543457, avg loss: 2.509056, ppl: 12.293315 +epoch: 1, batch: 40939, sum loss: 4796.213379, avg loss: 2.501937, ppl: 12.206116 +epoch: 1, batch: 40940, sum loss: 3611.294434, avg loss: 2.366510, ppl: 10.660125 +epoch: 1, batch: 40941, sum loss: 4324.412598, avg loss: 2.616100, ppl: 13.682253 +epoch: 1, batch: 40942, sum loss: 4561.392578, avg loss: 2.907197, ppl: 18.305420 +epoch: 1, batch: 40943, sum loss: 4824.833496, avg loss: 2.361641, ppl: 10.608350 +epoch: 1, batch: 40944, sum loss: 4476.896484, avg loss: 2.812121, ppl: 16.645184 +epoch: 1, batch: 40945, sum loss: 5576.118652, avg loss: 2.986673, ppl: 19.819635 +epoch: 1, batch: 40946, sum loss: 4626.476562, avg loss: 2.795454, ppl: 16.370064 +epoch: 1, batch: 40947, sum loss: 3977.207520, avg loss: 2.485755, ppl: 12.010181 +epoch: 1, batch: 40948, sum loss: 4482.429688, avg loss: 2.493009, ppl: 12.097618 +epoch: 1, batch: 40949, sum loss: 3892.784668, avg loss: 2.481061, ppl: 11.953940 +epoch: 1, batch: 40950, sum loss: 5525.629395, avg loss: 2.826409, ppl: 16.884716 +epoch: 1, batch: 40951, sum loss: 3721.242188, avg loss: 2.390008, ppl: 10.913581 +epoch: 1, batch: 40952, sum loss: 4122.734375, avg loss: 2.467226, ppl: 11.789697 +epoch: 1, batch: 40953, sum loss: 4569.657227, avg loss: 2.614221, ppl: 13.656569 +epoch: 1, batch: 40954, sum loss: 4537.721191, avg loss: 2.622960, ppl: 13.776443 +epoch: 1, batch: 40955, sum loss: 6161.080078, avg loss: 2.980687, ppl: 19.701349 +epoch: 1, batch: 40956, sum loss: 4161.392578, avg loss: 2.478495, ppl: 11.923302 +epoch: 1, batch: 40957, sum loss: 3945.833984, avg loss: 2.431198, ppl: 11.372494 +epoch: 1, batch: 40958, sum loss: 4010.655029, avg loss: 2.562719, ppl: 12.971036 +epoch: 1, batch: 40959, sum loss: 4475.170898, avg loss: 2.501493, ppl: 12.200696 +epoch: 1, batch: 40960, sum loss: 4135.722656, avg loss: 2.389210, ppl: 10.904876 +epoch: 1, batch: 40961, sum loss: 4797.216797, avg loss: 2.574996, ppl: 13.131258 +epoch: 1, batch: 40962, sum loss: 4269.136719, avg loss: 2.599962, ppl: 13.463220 +epoch: 1, batch: 40963, sum loss: 5130.220703, avg loss: 2.906641, ppl: 18.295233 +epoch: 1, batch: 40964, sum loss: 5700.384766, avg loss: 2.911330, ppl: 18.381237 +epoch: 1, batch: 40965, sum loss: 3372.065430, avg loss: 2.336844, ppl: 10.348522 +epoch: 1, batch: 40966, sum loss: 4734.554688, avg loss: 2.639105, ppl: 14.000672 +epoch: 1, batch: 40967, sum loss: 3638.715820, avg loss: 2.488862, ppl: 12.047556 +epoch: 1, batch: 40968, sum loss: 3954.644043, avg loss: 2.164556, ppl: 8.710733 +epoch: 1, batch: 40969, sum loss: 4630.056641, avg loss: 2.462796, ppl: 11.737584 +epoch: 1, batch: 40970, sum loss: 3362.195557, avg loss: 2.317158, ppl: 10.146791 +epoch: 1, batch: 40971, sum loss: 3618.930420, avg loss: 2.353011, ppl: 10.517186 +epoch: 1, batch: 40972, sum loss: 5073.305176, avg loss: 2.842188, ppl: 17.153254 +epoch: 1, batch: 40973, sum loss: 4974.067871, avg loss: 2.743557, ppl: 15.542167 +epoch: 1, batch: 40974, sum loss: 4462.465332, avg loss: 2.555822, ppl: 12.881886 +epoch: 1, batch: 40975, sum loss: 4078.776611, avg loss: 2.322766, ppl: 10.203857 +epoch: 1, batch: 40976, sum loss: 4375.261230, avg loss: 2.717554, ppl: 15.143230 +epoch: 1, batch: 40977, sum loss: 5109.087891, avg loss: 2.785762, ppl: 16.212168 +epoch: 1, batch: 40978, sum loss: 3921.566650, avg loss: 2.419227, ppl: 11.237165 +epoch: 1, batch: 40979, sum loss: 4241.792969, avg loss: 2.628124, ppl: 13.847774 +epoch: 1, batch: 40980, sum loss: 3580.950195, avg loss: 2.320771, ppl: 10.183527 +epoch: 1, batch: 40981, sum loss: 4786.113281, avg loss: 2.684304, ppl: 14.648000 +epoch: 1, batch: 40982, sum loss: 3704.404053, avg loss: 2.486177, ppl: 12.015256 +epoch: 1, batch: 40983, sum loss: 3968.590332, avg loss: 2.624729, ppl: 13.800836 +epoch: 1, batch: 40984, sum loss: 4634.166016, avg loss: 2.672529, ppl: 14.476538 +epoch: 1, batch: 40985, sum loss: 4133.583008, avg loss: 2.534386, ppl: 12.608683 +epoch: 1, batch: 40986, sum loss: 3689.457031, avg loss: 2.618493, ppl: 13.715044 +epoch: 1, batch: 40987, sum loss: 3747.994141, avg loss: 2.501999, ppl: 12.206867 +epoch: 1, batch: 40988, sum loss: 4221.794922, avg loss: 2.566441, ppl: 13.019401 +epoch: 1, batch: 40989, sum loss: 3924.000488, avg loss: 2.538163, ppl: 12.656405 +epoch: 1, batch: 40990, sum loss: 4944.616699, avg loss: 2.918900, ppl: 18.520903 +epoch: 1, batch: 40991, sum loss: 3118.862305, avg loss: 2.366360, ppl: 10.658524 +epoch: 1, batch: 40992, sum loss: 4120.203613, avg loss: 2.527732, ppl: 12.525072 +epoch: 1, batch: 40993, sum loss: 3169.753174, avg loss: 2.082624, ppl: 8.025498 +epoch: 1, batch: 40994, sum loss: 3935.232178, avg loss: 2.383545, ppl: 10.843271 +epoch: 1, batch: 40995, sum loss: 4940.582031, avg loss: 2.862446, ppl: 17.504292 +epoch: 1, batch: 40996, sum loss: 4672.486816, avg loss: 2.673048, ppl: 14.484043 +epoch: 1, batch: 40997, sum loss: 4058.246582, avg loss: 2.480591, ppl: 11.948321 +epoch: 1, batch: 40998, sum loss: 4475.352539, avg loss: 2.598927, ppl: 13.449300 +epoch: 1, batch: 40999, sum loss: 3966.399902, avg loss: 2.524761, ppl: 12.487916 +epoch: 1, batch: 41000, sum loss: 3531.627197, avg loss: 2.371811, ppl: 10.716787 +epoch: 1, batch: 41001, sum loss: 4451.284180, avg loss: 2.615325, ppl: 13.671665 +epoch: 1, batch: 41002, sum loss: 4044.961914, avg loss: 2.350355, ppl: 10.489297 +epoch: 1, batch: 41003, sum loss: 4217.519043, avg loss: 2.715724, ppl: 15.115550 +epoch: 1, batch: 41004, sum loss: 4827.795898, avg loss: 2.805227, ppl: 16.530828 +epoch: 1, batch: 41005, sum loss: 4347.982910, avg loss: 2.778264, ppl: 16.091059 +epoch: 1, batch: 41006, sum loss: 3278.857422, avg loss: 2.151481, ppl: 8.597584 +epoch: 1, batch: 41007, sum loss: 3687.046387, avg loss: 2.419322, ppl: 11.238235 +epoch: 1, batch: 41008, sum loss: 4848.982422, avg loss: 2.597205, ppl: 13.426165 +epoch: 1, batch: 41009, sum loss: 4231.377441, avg loss: 2.458674, ppl: 11.689298 +epoch: 1, batch: 41010, sum loss: 3931.638184, avg loss: 2.683712, ppl: 14.639334 +epoch: 1, batch: 41011, sum loss: 4084.042480, avg loss: 2.517905, ppl: 12.402589 +epoch: 1, batch: 41012, sum loss: 4019.918457, avg loss: 2.506184, ppl: 12.258060 +epoch: 1, batch: 41013, sum loss: 4202.062988, avg loss: 2.363365, ppl: 10.626652 +epoch: 1, batch: 41014, sum loss: 3480.804199, avg loss: 2.561298, ppl: 12.952621 +epoch: 1, batch: 41015, sum loss: 4076.613770, avg loss: 2.541530, ppl: 12.699084 +epoch: 1, batch: 41016, sum loss: 3873.809326, avg loss: 2.464255, ppl: 11.754725 +epoch: 1, batch: 41017, sum loss: 4938.916016, avg loss: 2.754555, ppl: 15.714039 +epoch: 1, batch: 41018, sum loss: 4194.233398, avg loss: 2.598658, ppl: 13.445677 +epoch: 1, batch: 41019, sum loss: 4346.246094, avg loss: 2.627718, ppl: 13.842153 +epoch: 1, batch: 41020, sum loss: 4051.988770, avg loss: 2.678116, ppl: 14.557635 +epoch: 1, batch: 41021, sum loss: 4185.914062, avg loss: 2.676416, ppl: 14.532909 +epoch: 1, batch: 41022, sum loss: 3611.002197, avg loss: 2.542959, ppl: 12.717252 +epoch: 1, batch: 41023, sum loss: 4384.478027, avg loss: 2.652437, ppl: 14.188570 +epoch: 1, batch: 41024, sum loss: 4571.044922, avg loss: 2.679393, ppl: 14.576247 +epoch: 1, batch: 41025, sum loss: 4556.377441, avg loss: 2.441788, ppl: 11.493578 +epoch: 1, batch: 41026, sum loss: 4407.418945, avg loss: 2.543231, ppl: 12.720702 +epoch: 1, batch: 41027, sum loss: 3855.312988, avg loss: 2.730392, ppl: 15.338891 +epoch: 1, batch: 41028, sum loss: 3909.811279, avg loss: 2.599608, ppl: 13.458467 +epoch: 1, batch: 41029, sum loss: 4579.954102, avg loss: 2.451796, ppl: 11.609173 +epoch: 1, batch: 41030, sum loss: 4401.892578, avg loss: 2.642192, ppl: 14.043960 +epoch: 1, batch: 41031, sum loss: 3965.712891, avg loss: 2.508357, ppl: 12.284733 +epoch: 1, batch: 41032, sum loss: 5209.820801, avg loss: 2.958444, ppl: 19.267975 +epoch: 1, batch: 41033, sum loss: 4937.550293, avg loss: 2.581051, ppl: 13.211014 +epoch: 1, batch: 41034, sum loss: 3614.921875, avg loss: 2.424495, ppl: 11.296523 +epoch: 1, batch: 41035, sum loss: 3532.645508, avg loss: 2.225990, ppl: 9.262647 +epoch: 1, batch: 41036, sum loss: 4009.677734, avg loss: 2.482773, ppl: 11.974419 +epoch: 1, batch: 41037, sum loss: 4105.674316, avg loss: 2.518819, ppl: 12.413922 +epoch: 1, batch: 41038, sum loss: 4791.048828, avg loss: 2.745587, ppl: 15.573748 +epoch: 1, batch: 41039, sum loss: 5196.711914, avg loss: 2.919501, ppl: 18.532040 +epoch: 1, batch: 41040, sum loss: 4565.467285, avg loss: 2.643583, ppl: 14.063500 +epoch: 1, batch: 41041, sum loss: 4705.673340, avg loss: 2.792685, ppl: 16.324785 +epoch: 1, batch: 41042, sum loss: 4647.784668, avg loss: 2.621424, ppl: 13.755300 +epoch: 1, batch: 41043, sum loss: 3242.598877, avg loss: 2.158854, ppl: 8.661206 +epoch: 1, batch: 41044, sum loss: 4330.109375, avg loss: 2.500063, ppl: 12.183264 +epoch: 1, batch: 41045, sum loss: 4123.416016, avg loss: 2.452954, ppl: 11.622630 +epoch: 1, batch: 41046, sum loss: 3831.004150, avg loss: 2.331713, ppl: 10.295560 +epoch: 1, batch: 41047, sum loss: 4398.689941, avg loss: 2.576854, ppl: 13.155688 +epoch: 1, batch: 41048, sum loss: 4952.657227, avg loss: 2.623230, ppl: 13.780155 +epoch: 1, batch: 41049, sum loss: 4029.132568, avg loss: 2.601119, ppl: 13.478806 +epoch: 1, batch: 41050, sum loss: 4058.763672, avg loss: 2.488513, ppl: 12.043348 +epoch: 1, batch: 41051, sum loss: 4051.656250, avg loss: 2.670835, ppl: 14.452025 +epoch: 1, batch: 41052, sum loss: 4282.083496, avg loss: 2.662987, ppl: 14.339059 +epoch: 1, batch: 41053, sum loss: 5040.250000, avg loss: 2.784669, ppl: 16.194452 +epoch: 1, batch: 41054, sum loss: 4681.095215, avg loss: 2.619527, ppl: 13.729233 +epoch: 1, batch: 41055, sum loss: 4684.701172, avg loss: 2.801855, ppl: 16.475178 +epoch: 1, batch: 41056, sum loss: 4755.555664, avg loss: 2.725247, ppl: 15.260178 +epoch: 1, batch: 41057, sum loss: 4859.545898, avg loss: 2.641057, ppl: 14.028030 +epoch: 1, batch: 41058, sum loss: 4332.405273, avg loss: 2.554484, ppl: 12.864664 +epoch: 1, batch: 41059, sum loss: 4127.071289, avg loss: 2.530393, ppl: 12.558442 +epoch: 1, batch: 41060, sum loss: 4367.555176, avg loss: 2.704369, ppl: 14.944878 +epoch: 1, batch: 41061, sum loss: 4907.830566, avg loss: 2.674567, ppl: 14.506070 +epoch: 1, batch: 41062, sum loss: 4511.231445, avg loss: 2.628923, ppl: 13.858831 +epoch: 1, batch: 41063, sum loss: 4225.416504, avg loss: 2.530190, ppl: 12.555885 +epoch: 1, batch: 41064, sum loss: 4037.627930, avg loss: 2.575018, ppl: 13.131550 +epoch: 1, batch: 41065, sum loss: 4514.722168, avg loss: 2.866490, ppl: 17.575224 +epoch: 1, batch: 41066, sum loss: 4370.537109, avg loss: 2.634441, ppl: 13.935515 +epoch: 1, batch: 41067, sum loss: 3990.262939, avg loss: 2.528684, ppl: 12.536992 +epoch: 1, batch: 41068, sum loss: 3923.056152, avg loss: 2.468884, ppl: 11.809258 +epoch: 1, batch: 41069, sum loss: 4449.035156, avg loss: 2.559859, ppl: 12.933994 +epoch: 1, batch: 41070, sum loss: 3539.841553, avg loss: 2.369372, ppl: 10.690676 +epoch: 1, batch: 41071, sum loss: 3504.899414, avg loss: 2.209899, ppl: 9.114793 +epoch: 1, batch: 41072, sum loss: 5197.245605, avg loss: 2.824590, ppl: 16.854033 +epoch: 1, batch: 41073, sum loss: 5239.056641, avg loss: 2.594877, ppl: 13.394939 +epoch: 1, batch: 41074, sum loss: 5364.625977, avg loss: 2.870319, ppl: 17.642639 +epoch: 1, batch: 41075, sum loss: 4154.032227, avg loss: 2.731119, ppl: 15.350057 +epoch: 1, batch: 41076, sum loss: 4707.103516, avg loss: 2.622342, ppl: 13.767928 +epoch: 1, batch: 41077, sum loss: 4016.387451, avg loss: 2.271713, ppl: 9.695992 +epoch: 1, batch: 41078, sum loss: 4370.648438, avg loss: 2.566441, ppl: 13.019401 +epoch: 1, batch: 41079, sum loss: 4687.805176, avg loss: 2.715994, ppl: 15.119626 +epoch: 1, batch: 41080, sum loss: 3769.486328, avg loss: 2.417887, ppl: 11.222127 +epoch: 1, batch: 41081, sum loss: 4796.101074, avg loss: 2.499271, ppl: 12.173615 +epoch: 1, batch: 41082, sum loss: 4668.838867, avg loss: 2.698751, ppl: 14.861155 +epoch: 1, batch: 41083, sum loss: 4315.295410, avg loss: 2.766215, ppl: 15.898346 +epoch: 1, batch: 41084, sum loss: 4440.781738, avg loss: 2.775489, ppl: 16.046465 +epoch: 1, batch: 41085, sum loss: 4133.328613, avg loss: 2.314294, ppl: 10.117776 +epoch: 1, batch: 41086, sum loss: 3488.125977, avg loss: 2.305437, ppl: 10.028559 +epoch: 1, batch: 41087, sum loss: 4592.478027, avg loss: 2.634812, ppl: 13.940697 +epoch: 1, batch: 41088, sum loss: 4222.486328, avg loss: 2.519383, ppl: 12.420936 +epoch: 1, batch: 41089, sum loss: 4633.520996, avg loss: 2.784568, ppl: 16.192822 +epoch: 1, batch: 41090, sum loss: 5009.063477, avg loss: 2.655919, ppl: 14.238070 +epoch: 1, batch: 41091, sum loss: 4326.655273, avg loss: 2.539117, ppl: 12.668481 +epoch: 1, batch: 41092, sum loss: 5660.556152, avg loss: 2.741189, ppl: 15.505418 +epoch: 1, batch: 41093, sum loss: 4452.854492, avg loss: 2.658421, ppl: 14.273730 +epoch: 1, batch: 41094, sum loss: 4706.317383, avg loss: 2.698576, ppl: 14.858564 +epoch: 1, batch: 41095, sum loss: 5453.339844, avg loss: 2.598066, ppl: 13.437719 +epoch: 1, batch: 41096, sum loss: 4877.351074, avg loss: 2.735475, ppl: 15.417058 +epoch: 1, batch: 41097, sum loss: 4316.352051, avg loss: 2.561633, ppl: 12.956964 +epoch: 1, batch: 41098, sum loss: 3410.357422, avg loss: 2.444701, ppl: 11.527100 +epoch: 1, batch: 41099, sum loss: 4449.706543, avg loss: 2.511121, ppl: 12.318732 +epoch: 1, batch: 41100, sum loss: 4597.203613, avg loss: 2.907782, ppl: 18.316130 +epoch: 1, batch: 41101, sum loss: 3896.283203, avg loss: 2.448952, ppl: 11.576214 +epoch: 1, batch: 41102, sum loss: 3827.310547, avg loss: 2.380168, ppl: 10.806721 +epoch: 1, batch: 41103, sum loss: 5281.621094, avg loss: 2.788607, ppl: 16.258354 +epoch: 1, batch: 41104, sum loss: 3874.027832, avg loss: 2.260226, ppl: 9.585258 +epoch: 1, batch: 41105, sum loss: 4479.690430, avg loss: 2.680844, ppl: 14.597413 +epoch: 1, batch: 41106, sum loss: 3804.220703, avg loss: 2.446444, ppl: 11.547215 +epoch: 1, batch: 41107, sum loss: 3559.689941, avg loss: 2.215115, ppl: 9.162463 +epoch: 1, batch: 41108, sum loss: 5010.169434, avg loss: 2.610823, ppl: 13.610247 +epoch: 1, batch: 41109, sum loss: 4063.254395, avg loss: 2.443328, ppl: 11.511286 +epoch: 1, batch: 41110, sum loss: 4958.891113, avg loss: 2.609943, ppl: 13.598274 +epoch: 1, batch: 41111, sum loss: 5760.439453, avg loss: 2.946516, ppl: 19.039509 +epoch: 1, batch: 41112, sum loss: 3432.268799, avg loss: 2.250668, ppl: 9.494076 +epoch: 1, batch: 41113, sum loss: 5231.988281, avg loss: 2.762402, ppl: 15.837833 +epoch: 1, batch: 41114, sum loss: 4227.533203, avg loss: 2.574625, ppl: 13.126388 +epoch: 1, batch: 41115, sum loss: 4699.722168, avg loss: 2.892137, ppl: 18.031796 +epoch: 1, batch: 41116, sum loss: 4497.468750, avg loss: 2.678659, ppl: 14.565551 +epoch: 1, batch: 41117, sum loss: 5180.122070, avg loss: 2.871464, ppl: 17.662867 +epoch: 1, batch: 41118, sum loss: 4739.416504, avg loss: 2.662594, ppl: 14.333416 +epoch: 1, batch: 41119, sum loss: 4581.130371, avg loss: 2.632833, ppl: 13.913136 +epoch: 1, batch: 41120, sum loss: 4782.920410, avg loss: 2.747226, ppl: 15.599299 +epoch: 1, batch: 41121, sum loss: 5435.269043, avg loss: 2.841228, ppl: 17.136793 +epoch: 1, batch: 41122, sum loss: 4438.329102, avg loss: 2.530404, ppl: 12.558579 +epoch: 1, batch: 41123, sum loss: 4029.094482, avg loss: 2.571216, ppl: 13.081717 +epoch: 1, batch: 41124, sum loss: 4139.633789, avg loss: 2.350729, ppl: 10.493217 +epoch: 1, batch: 41125, sum loss: 3975.454590, avg loss: 2.530525, ppl: 12.560098 +epoch: 1, batch: 41126, sum loss: 3824.037109, avg loss: 2.298099, ppl: 9.955242 +epoch: 1, batch: 41127, sum loss: 3864.218018, avg loss: 2.419673, ppl: 11.242185 +epoch: 1, batch: 41128, sum loss: 4747.670898, avg loss: 2.757068, ppl: 15.753584 +epoch: 1, batch: 41129, sum loss: 5243.298828, avg loss: 2.722378, ppl: 15.216457 +epoch: 1, batch: 41130, sum loss: 4528.087402, avg loss: 2.660451, ppl: 14.302737 +epoch: 1, batch: 41131, sum loss: 4093.531494, avg loss: 2.366203, ppl: 10.656852 +epoch: 1, batch: 41132, sum loss: 4551.517578, avg loss: 2.586089, ppl: 13.277745 +epoch: 1, batch: 41133, sum loss: 4206.938477, avg loss: 2.715906, ppl: 15.118303 +epoch: 1, batch: 41134, sum loss: 4979.600586, avg loss: 2.829319, ppl: 16.933916 +epoch: 1, batch: 41135, sum loss: 4448.229492, avg loss: 2.506045, ppl: 12.256355 +epoch: 1, batch: 41136, sum loss: 4688.275391, avg loss: 2.662280, ppl: 14.328923 +epoch: 1, batch: 41137, sum loss: 4206.966309, avg loss: 2.531267, ppl: 12.569427 +epoch: 1, batch: 41138, sum loss: 4255.151855, avg loss: 2.572643, ppl: 13.100407 +epoch: 1, batch: 41139, sum loss: 4421.101074, avg loss: 2.631608, ppl: 13.896093 +epoch: 1, batch: 41140, sum loss: 4751.029297, avg loss: 2.462949, ppl: 11.739383 +epoch: 1, batch: 41141, sum loss: 5300.251465, avg loss: 2.788139, ppl: 16.250744 +epoch: 1, batch: 41142, sum loss: 5761.487305, avg loss: 2.887964, ppl: 17.956703 +epoch: 1, batch: 41143, sum loss: 3371.129150, avg loss: 2.315336, ppl: 10.128325 +epoch: 1, batch: 41144, sum loss: 5177.143066, avg loss: 2.668631, ppl: 14.420209 +epoch: 1, batch: 41145, sum loss: 4788.060547, avg loss: 2.635146, ppl: 13.945351 +epoch: 1, batch: 41146, sum loss: 3992.300293, avg loss: 2.670435, ppl: 14.446251 +epoch: 1, batch: 41147, sum loss: 3685.702881, avg loss: 2.341615, ppl: 10.398021 +epoch: 1, batch: 41148, sum loss: 4006.507568, avg loss: 2.336156, ppl: 10.341407 +epoch: 1, batch: 41149, sum loss: 4468.248047, avg loss: 2.459135, ppl: 11.694689 +epoch: 1, batch: 41150, sum loss: 4949.473145, avg loss: 2.661007, ppl: 14.310695 +epoch: 1, batch: 41151, sum loss: 3540.662109, avg loss: 2.278418, ppl: 9.761229 +epoch: 1, batch: 41152, sum loss: 5037.646484, avg loss: 2.790940, ppl: 16.296328 +epoch: 1, batch: 41153, sum loss: 5215.222656, avg loss: 2.643296, ppl: 14.059467 +epoch: 1, batch: 41154, sum loss: 4419.794922, avg loss: 2.502715, ppl: 12.215615 +epoch: 1, batch: 41155, sum loss: 3668.127441, avg loss: 2.416421, ppl: 11.205685 +epoch: 1, batch: 41156, sum loss: 4442.720703, avg loss: 2.572508, ppl: 13.098630 +epoch: 1, batch: 41157, sum loss: 4415.211914, avg loss: 2.572967, ppl: 13.104652 +epoch: 1, batch: 41158, sum loss: 3787.506348, avg loss: 2.491781, ppl: 12.082770 +epoch: 1, batch: 41159, sum loss: 3858.251221, avg loss: 2.677482, ppl: 14.548412 +epoch: 1, batch: 41160, sum loss: 4206.321777, avg loss: 2.657184, ppl: 14.256083 +epoch: 1, batch: 41161, sum loss: 3906.031250, avg loss: 2.451997, ppl: 11.611512 +epoch: 1, batch: 41162, sum loss: 4517.172852, avg loss: 2.397650, ppl: 10.997305 +epoch: 1, batch: 41163, sum loss: 4217.821777, avg loss: 2.762162, ppl: 15.834043 +epoch: 1, batch: 41164, sum loss: 4813.987793, avg loss: 2.713634, ppl: 15.083999 +epoch: 1, batch: 41165, sum loss: 4180.658203, avg loss: 2.521507, ppl: 12.447338 +epoch: 1, batch: 41166, sum loss: 4225.403809, avg loss: 2.603453, ppl: 13.510304 +epoch: 1, batch: 41167, sum loss: 4778.412598, avg loss: 2.578744, ppl: 13.180573 +epoch: 1, batch: 41168, sum loss: 4779.710938, avg loss: 2.729704, ppl: 15.328344 +epoch: 1, batch: 41169, sum loss: 4788.972168, avg loss: 2.592838, ppl: 13.367659 +epoch: 1, batch: 41170, sum loss: 4273.888672, avg loss: 2.542468, ppl: 12.711000 +epoch: 1, batch: 41171, sum loss: 4729.980469, avg loss: 2.521312, ppl: 12.444908 +epoch: 1, batch: 41172, sum loss: 4420.783691, avg loss: 2.844777, ppl: 17.197725 +epoch: 1, batch: 41173, sum loss: 5218.545410, avg loss: 2.762597, ppl: 15.840926 +epoch: 1, batch: 41174, sum loss: 4580.397949, avg loss: 2.518086, ppl: 12.404827 +epoch: 1, batch: 41175, sum loss: 5180.590820, avg loss: 2.705269, ppl: 14.958345 +epoch: 1, batch: 41176, sum loss: 3944.019043, avg loss: 2.499378, ppl: 12.174924 +epoch: 1, batch: 41177, sum loss: 3738.098145, avg loss: 2.391618, ppl: 10.931161 +epoch: 1, batch: 41178, sum loss: 3849.541748, avg loss: 2.374794, ppl: 10.748801 +epoch: 1, batch: 41179, sum loss: 3784.381348, avg loss: 2.255293, ppl: 9.538087 +epoch: 1, batch: 41180, sum loss: 4227.564453, avg loss: 2.585666, ppl: 13.272128 +epoch: 1, batch: 41181, sum loss: 4203.710449, avg loss: 2.436934, ppl: 11.437916 +epoch: 1, batch: 41182, sum loss: 4648.588379, avg loss: 2.620399, ppl: 13.741208 +epoch: 1, batch: 41183, sum loss: 4443.104004, avg loss: 2.408186, ppl: 11.113788 +epoch: 1, batch: 41184, sum loss: 5383.675293, avg loss: 2.951576, ppl: 19.136097 +epoch: 1, batch: 41185, sum loss: 4569.390137, avg loss: 2.679994, ppl: 14.585011 +epoch: 1, batch: 41186, sum loss: 4716.924316, avg loss: 2.619058, ppl: 13.722795 +epoch: 1, batch: 41187, sum loss: 3888.859863, avg loss: 2.558460, ppl: 12.915917 +epoch: 1, batch: 41188, sum loss: 4012.112305, avg loss: 2.385322, ppl: 10.862563 +epoch: 1, batch: 41189, sum loss: 5028.027344, avg loss: 2.750562, ppl: 15.651424 +epoch: 1, batch: 41190, sum loss: 4705.079102, avg loss: 2.583789, ppl: 13.247232 +epoch: 1, batch: 41191, sum loss: 4261.739746, avg loss: 2.556532, ppl: 12.891039 +epoch: 1, batch: 41192, sum loss: 4272.326172, avg loss: 2.683622, ppl: 14.638015 +epoch: 1, batch: 41193, sum loss: 4049.333984, avg loss: 2.458612, ppl: 11.688576 +epoch: 1, batch: 41194, sum loss: 4027.708740, avg loss: 2.376229, ppl: 10.764237 +epoch: 1, batch: 41195, sum loss: 4438.496582, avg loss: 2.486553, ppl: 12.019769 +epoch: 1, batch: 41196, sum loss: 4718.317871, avg loss: 2.777115, ppl: 16.072582 +epoch: 1, batch: 41197, sum loss: 4752.464844, avg loss: 2.525220, ppl: 12.493649 +epoch: 1, batch: 41198, sum loss: 4370.549316, avg loss: 2.692883, ppl: 14.774209 +epoch: 1, batch: 41199, sum loss: 4231.720703, avg loss: 2.396218, ppl: 10.981564 +epoch: 1, batch: 41200, sum loss: 3981.231689, avg loss: 2.566880, ppl: 13.025128 +epoch: 1, batch: 41201, sum loss: 4327.221191, avg loss: 2.554440, ppl: 12.864091 +epoch: 1, batch: 41202, sum loss: 4054.123291, avg loss: 2.632548, ppl: 13.909160 +epoch: 1, batch: 41203, sum loss: 4330.498535, avg loss: 2.708254, ppl: 15.003058 +epoch: 1, batch: 41204, sum loss: 4281.973145, avg loss: 2.725636, ppl: 15.266113 +epoch: 1, batch: 41205, sum loss: 4076.329102, avg loss: 2.614708, ppl: 13.663222 +epoch: 1, batch: 41206, sum loss: 4439.167480, avg loss: 2.528000, ppl: 12.528422 +epoch: 1, batch: 41207, sum loss: 5506.540527, avg loss: 2.669191, ppl: 14.428290 +epoch: 1, batch: 41208, sum loss: 3933.142822, avg loss: 2.514797, ppl: 12.364101 +epoch: 1, batch: 41209, sum loss: 4498.588379, avg loss: 2.733043, ppl: 15.379616 +epoch: 1, batch: 41210, sum loss: 3208.690918, avg loss: 2.342110, ppl: 10.403166 +epoch: 1, batch: 41211, sum loss: 4011.571045, avg loss: 2.413701, ppl: 11.175245 +epoch: 1, batch: 41212, sum loss: 4407.485352, avg loss: 2.659919, ppl: 14.295128 +epoch: 1, batch: 41213, sum loss: 4486.074219, avg loss: 2.590112, ppl: 13.331267 +epoch: 1, batch: 41214, sum loss: 4566.658691, avg loss: 2.670561, ppl: 14.448067 +epoch: 1, batch: 41215, sum loss: 5157.308105, avg loss: 2.733073, ppl: 15.380070 +epoch: 1, batch: 41216, sum loss: 4924.280273, avg loss: 2.637536, ppl: 13.978722 +epoch: 1, batch: 41217, sum loss: 3686.782715, avg loss: 2.280014, ppl: 9.776817 +epoch: 1, batch: 41218, sum loss: 4068.709229, avg loss: 2.557328, ppl: 12.901301 +epoch: 1, batch: 41219, sum loss: 4517.134766, avg loss: 2.971799, ppl: 19.527025 +epoch: 1, batch: 41220, sum loss: 5127.242188, avg loss: 2.727256, ppl: 15.290875 +epoch: 1, batch: 41221, sum loss: 4100.357422, avg loss: 2.436338, ppl: 11.431106 +epoch: 1, batch: 41222, sum loss: 4202.743164, avg loss: 2.459183, ppl: 11.695250 +epoch: 1, batch: 41223, sum loss: 4497.303711, avg loss: 2.536551, ppl: 12.636008 +epoch: 1, batch: 41224, sum loss: 4712.172852, avg loss: 2.715950, ppl: 15.118963 +epoch: 1, batch: 41225, sum loss: 4155.327637, avg loss: 2.420109, ppl: 11.247088 +epoch: 1, batch: 41226, sum loss: 4952.149414, avg loss: 2.638332, ppl: 13.989851 +epoch: 1, batch: 41227, sum loss: 4229.101074, avg loss: 2.383935, ppl: 10.847507 +epoch: 1, batch: 41228, sum loss: 4353.926758, avg loss: 2.571723, ppl: 13.088356 +epoch: 1, batch: 41229, sum loss: 5202.198242, avg loss: 2.628700, ppl: 13.855753 +epoch: 1, batch: 41230, sum loss: 4520.697266, avg loss: 2.266014, ppl: 9.640892 +epoch: 1, batch: 41231, sum loss: 3504.190430, avg loss: 2.447061, ppl: 11.554334 +epoch: 1, batch: 41232, sum loss: 3897.549805, avg loss: 2.407381, ppl: 11.104840 +epoch: 1, batch: 41233, sum loss: 3376.558105, avg loss: 2.276843, ppl: 9.745865 +epoch: 1, batch: 41234, sum loss: 4382.642090, avg loss: 2.463542, ppl: 11.746349 +epoch: 1, batch: 41235, sum loss: 3843.000732, avg loss: 2.386957, ppl: 10.880334 +epoch: 1, batch: 41236, sum loss: 4665.204102, avg loss: 2.658236, ppl: 14.271090 +epoch: 1, batch: 41237, sum loss: 3807.160889, avg loss: 2.334249, ppl: 10.321711 +epoch: 1, batch: 41238, sum loss: 4185.762695, avg loss: 2.539905, ppl: 12.678461 +epoch: 1, batch: 41239, sum loss: 4164.357422, avg loss: 2.489156, ppl: 12.051098 +epoch: 1, batch: 41240, sum loss: 4311.652832, avg loss: 2.669754, ppl: 14.436418 +epoch: 1, batch: 41241, sum loss: 5488.559570, avg loss: 2.878112, ppl: 17.780672 +epoch: 1, batch: 41242, sum loss: 4292.377441, avg loss: 2.674378, ppl: 14.503332 +epoch: 1, batch: 41243, sum loss: 4650.957520, avg loss: 2.760212, ppl: 15.803196 +epoch: 1, batch: 41244, sum loss: 4010.492188, avg loss: 2.570828, ppl: 13.076653 +epoch: 1, batch: 41245, sum loss: 4999.234375, avg loss: 2.757438, ppl: 15.759411 +epoch: 1, batch: 41246, sum loss: 4850.187012, avg loss: 2.813334, ppl: 16.665384 +epoch: 1, batch: 41247, sum loss: 4813.894043, avg loss: 2.674386, ppl: 14.503435 +epoch: 1, batch: 41248, sum loss: 3792.943359, avg loss: 2.469364, ppl: 11.814932 +epoch: 1, batch: 41249, sum loss: 4342.849609, avg loss: 2.636824, ppl: 13.968770 +epoch: 1, batch: 41250, sum loss: 3826.632324, avg loss: 2.575123, ppl: 13.132927 +epoch: 1, batch: 41251, sum loss: 4674.737305, avg loss: 2.685087, ppl: 14.659483 +epoch: 1, batch: 41252, sum loss: 5174.062988, avg loss: 2.948184, ppl: 19.071289 +epoch: 1, batch: 41253, sum loss: 5508.221680, avg loss: 2.740409, ppl: 15.493319 +epoch: 1, batch: 41254, sum loss: 4387.308105, avg loss: 2.507033, ppl: 12.268476 +epoch: 1, batch: 41255, sum loss: 3496.266602, avg loss: 2.360747, ppl: 10.598869 +epoch: 1, batch: 41256, sum loss: 4897.177734, avg loss: 2.683385, ppl: 14.634549 +epoch: 1, batch: 41257, sum loss: 4076.671143, avg loss: 2.524255, ppl: 12.481587 +epoch: 1, batch: 41258, sum loss: 4446.239258, avg loss: 2.580522, ppl: 13.204026 +epoch: 1, batch: 41259, sum loss: 5071.165039, avg loss: 2.762072, ppl: 15.832620 +epoch: 1, batch: 41260, sum loss: 3704.420898, avg loss: 2.646015, ppl: 14.097746 +epoch: 1, batch: 41261, sum loss: 4117.119141, avg loss: 2.839393, ppl: 17.105373 +epoch: 1, batch: 41262, sum loss: 4357.436035, avg loss: 2.623381, ppl: 13.782245 +epoch: 1, batch: 41263, sum loss: 5407.865234, avg loss: 2.703933, ppl: 14.938365 +epoch: 1, batch: 41264, sum loss: 3081.204834, avg loss: 2.258948, ppl: 9.573011 +epoch: 1, batch: 41265, sum loss: 4986.655273, avg loss: 2.594514, ppl: 13.390077 +epoch: 1, batch: 41266, sum loss: 4375.709473, avg loss: 2.637558, ppl: 13.979032 +epoch: 1, batch: 41267, sum loss: 4431.731445, avg loss: 2.761203, ppl: 15.818866 +epoch: 1, batch: 41268, sum loss: 4191.212891, avg loss: 2.466871, ppl: 11.785507 +epoch: 1, batch: 41269, sum loss: 3732.268066, avg loss: 2.354743, ppl: 10.535423 +epoch: 1, batch: 41270, sum loss: 4481.155762, avg loss: 2.620559, ppl: 13.743403 +epoch: 1, batch: 41271, sum loss: 3862.223633, avg loss: 2.507937, ppl: 12.279576 +epoch: 1, batch: 41272, sum loss: 4142.614258, avg loss: 2.467310, ppl: 11.790692 +epoch: 1, batch: 41273, sum loss: 4553.812012, avg loss: 2.644490, ppl: 14.076264 +epoch: 1, batch: 41274, sum loss: 4665.316895, avg loss: 2.596170, ppl: 13.412267 +epoch: 1, batch: 41275, sum loss: 3696.272217, avg loss: 2.314510, ppl: 10.119962 +epoch: 1, batch: 41276, sum loss: 4482.862305, avg loss: 2.402391, ppl: 11.049566 +epoch: 1, batch: 41277, sum loss: 3715.041260, avg loss: 2.453792, ppl: 11.632374 +epoch: 1, batch: 41278, sum loss: 4877.422363, avg loss: 2.847298, ppl: 17.241140 +epoch: 1, batch: 41279, sum loss: 4312.773926, avg loss: 2.601191, ppl: 13.479777 +epoch: 1, batch: 41280, sum loss: 4415.650391, avg loss: 2.594389, ppl: 13.388407 +epoch: 1, batch: 41281, sum loss: 4207.774414, avg loss: 2.478077, ppl: 11.918323 +epoch: 1, batch: 41282, sum loss: 5513.831543, avg loss: 2.818932, ppl: 16.758947 +epoch: 1, batch: 41283, sum loss: 4173.185547, avg loss: 2.825447, ppl: 16.868488 +epoch: 1, batch: 41284, sum loss: 4682.183594, avg loss: 2.675534, ppl: 14.520095 +epoch: 1, batch: 41285, sum loss: 4398.714844, avg loss: 2.814277, ppl: 16.681110 +epoch: 1, batch: 41286, sum loss: 4402.837891, avg loss: 2.555332, ppl: 12.875579 +epoch: 1, batch: 41287, sum loss: 4523.672852, avg loss: 2.670409, ppl: 14.445876 +epoch: 1, batch: 41288, sum loss: 3535.965576, avg loss: 2.398891, ppl: 11.010961 +epoch: 1, batch: 41289, sum loss: 3545.349365, avg loss: 2.498485, ppl: 12.164046 +epoch: 1, batch: 41290, sum loss: 4655.998535, avg loss: 2.472649, ppl: 11.853807 +epoch: 1, batch: 41291, sum loss: 3924.598633, avg loss: 2.355701, ppl: 10.545524 +epoch: 1, batch: 41292, sum loss: 4316.033203, avg loss: 2.601587, ppl: 13.485126 +epoch: 1, batch: 41293, sum loss: 4788.656250, avg loss: 2.772818, ppl: 16.003666 +epoch: 1, batch: 41294, sum loss: 4192.068848, avg loss: 2.629905, ppl: 13.872455 +epoch: 1, batch: 41295, sum loss: 4105.819824, avg loss: 2.514280, ppl: 12.357712 +epoch: 1, batch: 41296, sum loss: 4406.397461, avg loss: 2.536786, ppl: 12.638985 +epoch: 1, batch: 41297, sum loss: 4151.545898, avg loss: 2.599591, ppl: 13.458226 +epoch: 1, batch: 41298, sum loss: 4749.059082, avg loss: 2.765905, ppl: 15.893419 +epoch: 1, batch: 41299, sum loss: 4777.648438, avg loss: 2.670569, ppl: 14.448191 +epoch: 1, batch: 41300, sum loss: 4595.758789, avg loss: 2.664208, ppl: 14.356577 +epoch: 1, batch: 41301, sum loss: 3908.745605, avg loss: 2.353248, ppl: 10.519686 +epoch: 1, batch: 41302, sum loss: 4346.306152, avg loss: 2.531337, ppl: 12.570305 +epoch: 1, batch: 41303, sum loss: 5439.497070, avg loss: 2.749998, ppl: 15.642606 +epoch: 1, batch: 41304, sum loss: 4975.356445, avg loss: 2.628292, ppl: 13.850092 +epoch: 1, batch: 41305, sum loss: 4676.410645, avg loss: 2.590809, ppl: 13.340561 +epoch: 1, batch: 41306, sum loss: 4168.851562, avg loss: 2.391768, ppl: 10.932806 +epoch: 1, batch: 41307, sum loss: 3836.946533, avg loss: 2.434611, ppl: 11.411374 +epoch: 1, batch: 41308, sum loss: 4635.711914, avg loss: 2.681152, ppl: 14.601907 +epoch: 1, batch: 41309, sum loss: 4750.261719, avg loss: 2.760175, ppl: 15.802611 +epoch: 1, batch: 41310, sum loss: 4471.265625, avg loss: 2.708217, ppl: 15.002498 +epoch: 1, batch: 41311, sum loss: 3500.816650, avg loss: 2.522202, ppl: 12.455995 +epoch: 1, batch: 41312, sum loss: 3578.885254, avg loss: 2.490526, ppl: 12.067618 +epoch: 1, batch: 41313, sum loss: 4700.152832, avg loss: 2.878232, ppl: 17.782804 +epoch: 1, batch: 41314, sum loss: 4431.545898, avg loss: 2.663189, ppl: 14.341949 +epoch: 1, batch: 41315, sum loss: 4273.944336, avg loss: 2.793428, ppl: 16.336922 +epoch: 1, batch: 41316, sum loss: 5006.402344, avg loss: 2.781335, ppl: 16.140549 +epoch: 1, batch: 41317, sum loss: 4664.684570, avg loss: 2.823659, ppl: 16.838352 +epoch: 1, batch: 41318, sum loss: 4630.157715, avg loss: 2.670218, ppl: 14.443114 +epoch: 1, batch: 41319, sum loss: 5288.770508, avg loss: 2.655005, ppl: 14.225063 +epoch: 1, batch: 41320, sum loss: 4627.396484, avg loss: 2.787588, ppl: 16.241802 +epoch: 1, batch: 41321, sum loss: 4225.451172, avg loss: 2.731384, ppl: 15.354127 +epoch: 1, batch: 41322, sum loss: 4997.288574, avg loss: 2.624626, ppl: 13.799417 +epoch: 1, batch: 41323, sum loss: 3866.823486, avg loss: 2.542290, ppl: 12.708746 +epoch: 1, batch: 41324, sum loss: 4066.997070, avg loss: 2.374196, ppl: 10.742369 +epoch: 1, batch: 41325, sum loss: 4616.336914, avg loss: 2.775909, ppl: 16.053215 +epoch: 1, batch: 41326, sum loss: 4614.434570, avg loss: 2.698500, ppl: 14.857424 +epoch: 1, batch: 41327, sum loss: 3962.375732, avg loss: 2.691831, ppl: 14.758679 +epoch: 1, batch: 41328, sum loss: 3819.897949, avg loss: 2.483679, ppl: 11.985275 +epoch: 1, batch: 41329, sum loss: 4034.920166, avg loss: 2.369301, ppl: 10.689920 +epoch: 1, batch: 41330, sum loss: 4085.538818, avg loss: 2.523495, ppl: 12.472113 +epoch: 1, batch: 41331, sum loss: 5252.893555, avg loss: 2.630392, ppl: 13.879214 +epoch: 1, batch: 41332, sum loss: 4493.570801, avg loss: 2.692373, ppl: 14.766680 +epoch: 1, batch: 41333, sum loss: 4752.704102, avg loss: 2.643328, ppl: 14.059920 +epoch: 1, batch: 41334, sum loss: 3446.797363, avg loss: 2.305550, ppl: 10.029694 +epoch: 1, batch: 41335, sum loss: 4652.959961, avg loss: 2.828547, ppl: 16.920856 +epoch: 1, batch: 41336, sum loss: 4118.212891, avg loss: 2.488346, ppl: 12.041348 +epoch: 1, batch: 41337, sum loss: 4689.566406, avg loss: 2.608212, ppl: 13.574764 +epoch: 1, batch: 41338, sum loss: 4488.065918, avg loss: 2.734958, ppl: 15.409092 +epoch: 1, batch: 41339, sum loss: 4113.321289, avg loss: 2.532833, ppl: 12.589125 +epoch: 1, batch: 41340, sum loss: 3810.184814, avg loss: 2.459771, ppl: 11.702127 +epoch: 1, batch: 41341, sum loss: 3593.712891, avg loss: 2.397407, ppl: 10.994628 +epoch: 1, batch: 41342, sum loss: 3827.995605, avg loss: 2.398494, ppl: 11.006583 +epoch: 1, batch: 41343, sum loss: 3613.614746, avg loss: 2.391539, ppl: 10.930299 +epoch: 1, batch: 41344, sum loss: 4599.199219, avg loss: 2.689590, ppl: 14.725640 +epoch: 1, batch: 41345, sum loss: 3643.942871, avg loss: 2.625319, ppl: 13.808979 +epoch: 1, batch: 41346, sum loss: 4247.221191, avg loss: 2.684716, ppl: 14.654042 +epoch: 1, batch: 41347, sum loss: 3634.622559, avg loss: 2.418245, ppl: 11.226141 +epoch: 1, batch: 41348, sum loss: 4739.402344, avg loss: 2.677629, ppl: 14.550546 +epoch: 1, batch: 41349, sum loss: 4441.512207, avg loss: 2.637478, ppl: 13.977901 +epoch: 1, batch: 41350, sum loss: 4131.949707, avg loss: 2.610202, ppl: 13.601796 +epoch: 1, batch: 41351, sum loss: 4225.835449, avg loss: 2.539565, ppl: 12.674151 +epoch: 1, batch: 41352, sum loss: 4378.141602, avg loss: 2.677762, ppl: 14.552492 +epoch: 1, batch: 41353, sum loss: 4492.141113, avg loss: 2.637781, ppl: 13.982144 +epoch: 1, batch: 41354, sum loss: 4781.589355, avg loss: 2.675764, ppl: 14.523436 +epoch: 1, batch: 41355, sum loss: 4567.174805, avg loss: 2.700872, ppl: 14.892715 +epoch: 1, batch: 41356, sum loss: 4592.403320, avg loss: 2.698239, ppl: 14.853557 +epoch: 1, batch: 41357, sum loss: 4284.860352, avg loss: 2.414006, ppl: 11.178651 +epoch: 1, batch: 41358, sum loss: 4618.820312, avg loss: 2.769077, ppl: 15.943912 +epoch: 1, batch: 41359, sum loss: 5152.672852, avg loss: 2.733513, ppl: 15.386851 +epoch: 1, batch: 41360, sum loss: 4450.616211, avg loss: 2.725423, ppl: 15.262870 +epoch: 1, batch: 41361, sum loss: 4444.239258, avg loss: 2.542471, ppl: 12.711043 +epoch: 1, batch: 41362, sum loss: 4353.399902, avg loss: 2.483400, ppl: 11.981933 +epoch: 1, batch: 41363, sum loss: 4402.150391, avg loss: 2.543125, ppl: 12.719362 +epoch: 1, batch: 41364, sum loss: 4069.986328, avg loss: 2.846144, ppl: 17.221256 +epoch: 1, batch: 41365, sum loss: 5511.886230, avg loss: 3.040202, ppl: 20.909470 +epoch: 1, batch: 41366, sum loss: 5190.919922, avg loss: 2.613756, ppl: 13.650228 +epoch: 1, batch: 41367, sum loss: 3518.047363, avg loss: 2.421230, ppl: 11.259701 +epoch: 1, batch: 41368, sum loss: 5607.894043, avg loss: 2.901135, ppl: 18.194788 +epoch: 1, batch: 41369, sum loss: 3472.040771, avg loss: 2.254572, ppl: 9.531212 +epoch: 1, batch: 41370, sum loss: 4842.479980, avg loss: 2.697761, ppl: 14.846447 +epoch: 1, batch: 41371, sum loss: 3673.937012, avg loss: 2.294776, ppl: 9.922217 +epoch: 1, batch: 41372, sum loss: 4817.494141, avg loss: 2.827168, ppl: 16.897539 +epoch: 1, batch: 41373, sum loss: 4369.322754, avg loss: 2.632122, ppl: 13.903245 +epoch: 1, batch: 41374, sum loss: 4009.727051, avg loss: 2.369815, ppl: 10.695415 +epoch: 1, batch: 41375, sum loss: 4754.992188, avg loss: 2.654937, ppl: 14.224087 +epoch: 1, batch: 41376, sum loss: 4277.867676, avg loss: 2.819952, ppl: 16.776049 +epoch: 1, batch: 41377, sum loss: 3700.535400, avg loss: 2.364559, ppl: 10.639350 +epoch: 1, batch: 41378, sum loss: 4364.450195, avg loss: 2.591716, ppl: 13.352669 +epoch: 1, batch: 41379, sum loss: 4489.689453, avg loss: 2.659769, ppl: 14.292984 +epoch: 1, batch: 41380, sum loss: 4428.147949, avg loss: 2.757253, ppl: 15.756495 +epoch: 1, batch: 41381, sum loss: 4179.691895, avg loss: 2.407657, ppl: 11.107902 +epoch: 1, batch: 41382, sum loss: 4420.225098, avg loss: 2.568405, ppl: 13.045000 +epoch: 1, batch: 41383, sum loss: 3669.702148, avg loss: 2.537830, ppl: 12.652187 +epoch: 1, batch: 41384, sum loss: 3924.952148, avg loss: 2.378759, ppl: 10.791501 +epoch: 1, batch: 41385, sum loss: 4446.270508, avg loss: 2.615453, ppl: 13.673415 +epoch: 1, batch: 41386, sum loss: 4515.755859, avg loss: 2.584863, ppl: 13.261472 +epoch: 1, batch: 41387, sum loss: 5046.408203, avg loss: 2.644868, ppl: 14.081585 +epoch: 1, batch: 41388, sum loss: 5021.388672, avg loss: 2.740933, ppl: 15.501436 +epoch: 1, batch: 41389, sum loss: 5191.170898, avg loss: 2.559749, ppl: 12.932570 +epoch: 1, batch: 41390, sum loss: 4261.489746, avg loss: 2.728226, ppl: 15.305717 +epoch: 1, batch: 41391, sum loss: 3839.772705, avg loss: 2.514586, ppl: 12.361489 +epoch: 1, batch: 41392, sum loss: 4503.681641, avg loss: 2.521658, ppl: 12.449225 +epoch: 1, batch: 41393, sum loss: 4462.000488, avg loss: 2.705883, ppl: 14.967521 +epoch: 1, batch: 41394, sum loss: 4042.302002, avg loss: 2.376427, ppl: 10.766362 +epoch: 1, batch: 41395, sum loss: 5362.967285, avg loss: 2.810779, ppl: 16.622868 +epoch: 1, batch: 41396, sum loss: 4058.310059, avg loss: 2.503584, ppl: 12.226236 +epoch: 1, batch: 41397, sum loss: 3867.151123, avg loss: 2.482125, ppl: 11.966670 +epoch: 1, batch: 41398, sum loss: 5063.297363, avg loss: 2.777453, ppl: 16.078024 +epoch: 1, batch: 41399, sum loss: 3223.907227, avg loss: 2.356657, ppl: 10.555608 +epoch: 1, batch: 41400, sum loss: 4842.459473, avg loss: 2.653403, ppl: 14.202281 +epoch: 1, batch: 41401, sum loss: 3352.775391, avg loss: 2.271528, ppl: 9.694202 +epoch: 1, batch: 41402, sum loss: 5840.441406, avg loss: 3.103316, ppl: 22.271688 +epoch: 1, batch: 41403, sum loss: 3206.983154, avg loss: 2.318860, ppl: 10.164079 +epoch: 1, batch: 41404, sum loss: 3029.922363, avg loss: 2.391415, ppl: 10.928944 +epoch: 1, batch: 41405, sum loss: 4755.285645, avg loss: 2.680544, ppl: 14.593032 +epoch: 1, batch: 41406, sum loss: 4216.891602, avg loss: 2.711828, ppl: 15.056767 +epoch: 1, batch: 41407, sum loss: 4289.465820, avg loss: 2.588694, ppl: 13.312372 +epoch: 1, batch: 41408, sum loss: 3552.394531, avg loss: 2.357262, ppl: 10.561997 +epoch: 1, batch: 41409, sum loss: 4345.993164, avg loss: 2.380062, ppl: 10.805573 +epoch: 1, batch: 41410, sum loss: 3965.202637, avg loss: 2.598429, ppl: 13.442602 +epoch: 1, batch: 41411, sum loss: 4258.632812, avg loss: 2.478832, ppl: 11.927322 +epoch: 1, batch: 41412, sum loss: 4854.991211, avg loss: 2.691237, ppl: 14.749907 +epoch: 1, batch: 41413, sum loss: 5050.724121, avg loss: 2.623753, ppl: 13.787365 +epoch: 1, batch: 41414, sum loss: 4643.780273, avg loss: 2.861233, ppl: 17.483063 +epoch: 1, batch: 41415, sum loss: 5002.003418, avg loss: 2.692144, ppl: 14.763293 +epoch: 1, batch: 41416, sum loss: 3752.742188, avg loss: 2.650242, ppl: 14.157459 +epoch: 1, batch: 41417, sum loss: 4466.753418, avg loss: 2.578957, ppl: 13.183378 +epoch: 1, batch: 41418, sum loss: 4260.083496, avg loss: 2.522252, ppl: 12.456618 +epoch: 1, batch: 41419, sum loss: 4367.421875, avg loss: 2.621502, ppl: 13.756366 +epoch: 1, batch: 41420, sum loss: 5165.794922, avg loss: 2.655936, ppl: 14.238303 +epoch: 1, batch: 41421, sum loss: 4613.919434, avg loss: 2.717267, ppl: 15.138891 +epoch: 1, batch: 41422, sum loss: 3831.370117, avg loss: 2.532300, ppl: 12.582416 +epoch: 1, batch: 41423, sum loss: 4747.710938, avg loss: 2.801009, ppl: 16.461254 +epoch: 1, batch: 41424, sum loss: 5095.257812, avg loss: 2.816616, ppl: 16.720171 +epoch: 1, batch: 41425, sum loss: 3766.650391, avg loss: 2.445877, ppl: 11.540665 +epoch: 1, batch: 41426, sum loss: 5841.637695, avg loss: 2.762004, ppl: 15.831532 +epoch: 1, batch: 41427, sum loss: 5081.637695, avg loss: 2.755769, ppl: 15.733131 +epoch: 1, batch: 41428, sum loss: 4135.498535, avg loss: 2.552777, ppl: 12.842719 +epoch: 1, batch: 41429, sum loss: 4288.764160, avg loss: 2.543751, ppl: 12.727322 +epoch: 1, batch: 41430, sum loss: 4118.836914, avg loss: 2.401654, ppl: 11.041424 +epoch: 1, batch: 41431, sum loss: 3731.928223, avg loss: 2.631825, ppl: 13.899118 +epoch: 1, batch: 41432, sum loss: 3165.485840, avg loss: 2.319037, ppl: 10.165882 +epoch: 1, batch: 41433, sum loss: 3372.575928, avg loss: 2.348590, ppl: 10.470799 +epoch: 1, batch: 41434, sum loss: 5165.645996, avg loss: 2.930032, ppl: 18.728226 +epoch: 1, batch: 41435, sum loss: 4296.581055, avg loss: 2.346576, ppl: 10.449733 +epoch: 1, batch: 41436, sum loss: 4317.362793, avg loss: 2.588347, ppl: 13.307755 +epoch: 1, batch: 41437, sum loss: 4010.444580, avg loss: 2.737505, ppl: 15.448389 +epoch: 1, batch: 41438, sum loss: 4135.309082, avg loss: 2.470316, ppl: 11.826185 +epoch: 1, batch: 41439, sum loss: 4882.632812, avg loss: 2.717102, ppl: 15.136397 +epoch: 1, batch: 41440, sum loss: 4721.747559, avg loss: 2.518265, ppl: 12.407058 +epoch: 1, batch: 41441, sum loss: 4663.138672, avg loss: 2.567807, ppl: 13.037209 +epoch: 1, batch: 41442, sum loss: 4402.108887, avg loss: 2.543102, ppl: 12.719059 +epoch: 1, batch: 41443, sum loss: 3333.994873, avg loss: 2.163527, ppl: 8.701773 +epoch: 1, batch: 41444, sum loss: 4287.950195, avg loss: 2.522324, ppl: 12.457513 +epoch: 1, batch: 41445, sum loss: 4696.829102, avg loss: 2.735486, ppl: 15.417232 +epoch: 1, batch: 41446, sum loss: 4236.201660, avg loss: 2.576765, ppl: 13.154515 +epoch: 1, batch: 41447, sum loss: 4178.458984, avg loss: 2.529334, ppl: 12.545143 +epoch: 1, batch: 41448, sum loss: 4557.074707, avg loss: 2.576074, ppl: 13.145426 +epoch: 1, batch: 41449, sum loss: 4509.866211, avg loss: 2.508268, ppl: 12.283638 +epoch: 1, batch: 41450, sum loss: 4310.589844, avg loss: 2.609316, ppl: 13.589754 +epoch: 1, batch: 41451, sum loss: 4187.671875, avg loss: 2.249018, ppl: 9.478425 +epoch: 1, batch: 41452, sum loss: 4209.478027, avg loss: 2.819476, ppl: 16.768064 +epoch: 1, batch: 41453, sum loss: 3830.419434, avg loss: 2.569027, ppl: 13.053120 +epoch: 1, batch: 41454, sum loss: 5077.570801, avg loss: 2.663993, ppl: 14.353487 +epoch: 1, batch: 41455, sum loss: 4780.050781, avg loss: 3.004432, ppl: 20.174747 +epoch: 1, batch: 41456, sum loss: 3961.230469, avg loss: 2.531138, ppl: 12.567797 +epoch: 1, batch: 41457, sum loss: 4666.585449, avg loss: 2.776077, ppl: 16.055910 +epoch: 1, batch: 41458, sum loss: 4408.581543, avg loss: 2.670249, ppl: 14.443572 +epoch: 1, batch: 41459, sum loss: 4693.981934, avg loss: 2.716425, ppl: 15.126145 +epoch: 1, batch: 41460, sum loss: 3809.226074, avg loss: 2.429353, ppl: 11.351538 +epoch: 1, batch: 41461, sum loss: 4098.851074, avg loss: 2.745379, ppl: 15.570518 +epoch: 1, batch: 41462, sum loss: 3658.659668, avg loss: 2.495675, ppl: 12.129919 +epoch: 1, batch: 41463, sum loss: 4643.601562, avg loss: 2.454335, ppl: 11.638688 +epoch: 1, batch: 41464, sum loss: 4364.771973, avg loss: 2.485633, ppl: 12.008724 +epoch: 1, batch: 41465, sum loss: 5982.058594, avg loss: 2.982083, ppl: 19.728870 +epoch: 1, batch: 41466, sum loss: 5235.741699, avg loss: 2.836263, ppl: 17.051926 +epoch: 1, batch: 41467, sum loss: 4321.282227, avg loss: 2.470716, ppl: 11.830914 +epoch: 1, batch: 41468, sum loss: 3573.449219, avg loss: 2.548823, ppl: 12.792034 +epoch: 1, batch: 41469, sum loss: 4824.175293, avg loss: 2.824459, ppl: 16.851818 +epoch: 1, batch: 41470, sum loss: 4132.769531, avg loss: 2.554246, ppl: 12.861594 +epoch: 1, batch: 41471, sum loss: 4050.877197, avg loss: 2.464037, ppl: 11.752161 +epoch: 1, batch: 41472, sum loss: 4527.102051, avg loss: 2.578076, ppl: 13.171776 +epoch: 1, batch: 41473, sum loss: 4265.023438, avg loss: 2.619793, ppl: 13.732883 +epoch: 1, batch: 41474, sum loss: 4753.240723, avg loss: 2.690006, ppl: 14.731765 +epoch: 1, batch: 41475, sum loss: 4300.565918, avg loss: 2.531234, ppl: 12.569004 +epoch: 1, batch: 41476, sum loss: 4911.781250, avg loss: 2.844112, ppl: 17.186289 +epoch: 1, batch: 41477, sum loss: 3852.111816, avg loss: 2.539296, ppl: 12.670747 +epoch: 1, batch: 41478, sum loss: 4848.095703, avg loss: 2.630546, ppl: 13.881342 +epoch: 1, batch: 41479, sum loss: 5012.251953, avg loss: 2.620100, ppl: 13.737101 +epoch: 1, batch: 41480, sum loss: 3601.385010, avg loss: 2.298268, ppl: 9.956918 +epoch: 1, batch: 41481, sum loss: 3456.832275, avg loss: 2.384022, ppl: 10.848451 +epoch: 1, batch: 41482, sum loss: 3708.755859, avg loss: 2.500847, ppl: 12.192815 +epoch: 1, batch: 41483, sum loss: 4404.873047, avg loss: 2.704035, ppl: 14.939894 +epoch: 1, batch: 41484, sum loss: 5047.001953, avg loss: 2.592194, ppl: 13.359054 +epoch: 1, batch: 41485, sum loss: 3989.174316, avg loss: 2.419147, ppl: 11.236277 +epoch: 1, batch: 41486, sum loss: 5689.827148, avg loss: 2.941999, ppl: 18.953705 +epoch: 1, batch: 41487, sum loss: 4712.869141, avg loss: 2.596622, ppl: 13.418337 +epoch: 1, batch: 41488, sum loss: 4675.083008, avg loss: 2.745204, ppl: 15.567797 +epoch: 1, batch: 41489, sum loss: 4136.151367, avg loss: 2.593198, ppl: 13.372473 +epoch: 1, batch: 41490, sum loss: 3779.571289, avg loss: 2.436861, ppl: 11.437081 +epoch: 1, batch: 41491, sum loss: 4417.666992, avg loss: 2.650070, ppl: 14.155032 +epoch: 1, batch: 41492, sum loss: 5419.741211, avg loss: 2.890529, ppl: 18.002825 +epoch: 1, batch: 41493, sum loss: 2688.017090, avg loss: 2.194300, ppl: 8.973715 +epoch: 1, batch: 41494, sum loss: 4330.548828, avg loss: 2.562455, ppl: 12.967613 +epoch: 1, batch: 41495, sum loss: 4636.155273, avg loss: 2.569931, ppl: 13.064920 +epoch: 1, batch: 41496, sum loss: 4591.267090, avg loss: 2.806398, ppl: 16.550200 +epoch: 1, batch: 41497, sum loss: 4408.922363, avg loss: 2.610374, ppl: 13.604144 +epoch: 1, batch: 41498, sum loss: 4957.123535, avg loss: 2.717721, ppl: 15.145769 +epoch: 1, batch: 41499, sum loss: 3701.257568, avg loss: 2.229673, ppl: 9.296829 +epoch: 1, batch: 41500, sum loss: 3757.645020, avg loss: 2.424287, ppl: 11.294177 +epoch: 1, batch: 41501, sum loss: 4817.962402, avg loss: 2.519855, ppl: 12.426792 +epoch: 1, batch: 41502, sum loss: 4183.996094, avg loss: 2.699353, ppl: 14.870100 +epoch: 1, batch: 41503, sum loss: 4147.260742, avg loss: 2.496846, ppl: 12.144128 +epoch: 1, batch: 41504, sum loss: 5159.832031, avg loss: 2.948475, ppl: 19.076847 +epoch: 1, batch: 41505, sum loss: 4331.047363, avg loss: 2.414185, ppl: 11.180653 +epoch: 1, batch: 41506, sum loss: 4770.015625, avg loss: 2.744543, ppl: 15.557500 +epoch: 1, batch: 41507, sum loss: 4353.541016, avg loss: 2.405272, ppl: 11.081439 +epoch: 1, batch: 41508, sum loss: 4332.184570, avg loss: 2.667601, ppl: 14.405375 +epoch: 1, batch: 41509, sum loss: 5445.043945, avg loss: 2.818346, ppl: 16.749121 +epoch: 1, batch: 41510, sum loss: 4065.481934, avg loss: 2.552092, ppl: 12.833920 +epoch: 1, batch: 41511, sum loss: 3574.089844, avg loss: 2.439652, ppl: 11.469046 +epoch: 1, batch: 41512, sum loss: 4145.842285, avg loss: 2.579865, ppl: 13.195350 +epoch: 1, batch: 41513, sum loss: 4762.436523, avg loss: 2.523814, ppl: 12.476087 +epoch: 1, batch: 41514, sum loss: 5125.662598, avg loss: 2.782662, ppl: 16.161982 +epoch: 1, batch: 41515, sum loss: 3570.792725, avg loss: 2.349206, ppl: 10.477244 +epoch: 1, batch: 41516, sum loss: 4549.150879, avg loss: 2.709441, ppl: 15.020872 +epoch: 1, batch: 41517, sum loss: 4163.529785, avg loss: 2.621870, ppl: 13.761437 +epoch: 1, batch: 41518, sum loss: 4254.952637, avg loss: 2.481022, ppl: 11.953473 +epoch: 1, batch: 41519, sum loss: 5650.652344, avg loss: 2.914210, ppl: 18.434237 +epoch: 1, batch: 41520, sum loss: 5107.180664, avg loss: 2.702212, ppl: 14.912683 +epoch: 1, batch: 41521, sum loss: 5741.176758, avg loss: 2.792401, ppl: 16.320158 +epoch: 1, batch: 41522, sum loss: 3617.866211, avg loss: 2.355382, ppl: 10.542152 +epoch: 1, batch: 41523, sum loss: 3763.765869, avg loss: 2.429804, ppl: 11.356651 +epoch: 1, batch: 41524, sum loss: 5849.509766, avg loss: 2.918917, ppl: 18.521217 +epoch: 1, batch: 41525, sum loss: 3484.318848, avg loss: 2.394721, ppl: 10.965137 +epoch: 1, batch: 41526, sum loss: 4714.997070, avg loss: 2.602096, ppl: 13.491982 +epoch: 1, batch: 41527, sum loss: 4671.711914, avg loss: 2.932650, ppl: 18.777330 +epoch: 1, batch: 41528, sum loss: 4781.278320, avg loss: 2.789544, ppl: 16.273596 +epoch: 1, batch: 41529, sum loss: 3577.167236, avg loss: 2.208128, ppl: 9.098667 +epoch: 1, batch: 41530, sum loss: 4305.330566, avg loss: 3.000230, ppl: 20.090164 +epoch: 1, batch: 41531, sum loss: 4477.210449, avg loss: 2.499838, ppl: 12.180522 +epoch: 1, batch: 41532, sum loss: 4178.628906, avg loss: 2.409821, ppl: 11.131964 +epoch: 1, batch: 41533, sum loss: 4885.900391, avg loss: 2.755725, ppl: 15.732445 +epoch: 1, batch: 41534, sum loss: 5207.435547, avg loss: 2.589476, ppl: 13.322787 +epoch: 1, batch: 41535, sum loss: 3907.199219, avg loss: 2.332656, ppl: 10.305280 +epoch: 1, batch: 41536, sum loss: 4289.514648, avg loss: 2.482358, ppl: 11.969455 +epoch: 1, batch: 41537, sum loss: 6310.729980, avg loss: 2.867210, ppl: 17.587887 +epoch: 1, batch: 41538, sum loss: 4243.186035, avg loss: 2.570071, ppl: 13.066746 +epoch: 1, batch: 41539, sum loss: 3617.718750, avg loss: 2.310165, ppl: 10.076089 +epoch: 1, batch: 41540, sum loss: 4464.277832, avg loss: 2.735465, ppl: 15.416904 +epoch: 1, batch: 41541, sum loss: 4225.600586, avg loss: 2.436909, ppl: 11.437634 +epoch: 1, batch: 41542, sum loss: 5970.568359, avg loss: 2.901151, ppl: 18.195070 +epoch: 1, batch: 41543, sum loss: 4702.872070, avg loss: 2.281840, ppl: 9.794684 +epoch: 1, batch: 41544, sum loss: 4425.500000, avg loss: 2.693548, ppl: 14.784039 +epoch: 1, batch: 41545, sum loss: 4882.346680, avg loss: 2.432659, ppl: 11.389127 +epoch: 1, batch: 41546, sum loss: 3768.751221, avg loss: 2.340839, ppl: 10.389952 +epoch: 1, batch: 41547, sum loss: 4204.252930, avg loss: 2.617841, ppl: 13.706100 +epoch: 1, batch: 41548, sum loss: 4242.025391, avg loss: 2.540135, ppl: 12.681381 +epoch: 1, batch: 41549, sum loss: 4090.493164, avg loss: 2.409007, ppl: 11.122906 +epoch: 1, batch: 41550, sum loss: 3927.948486, avg loss: 2.720186, ppl: 15.183143 +epoch: 1, batch: 41551, sum loss: 3420.666504, avg loss: 2.531951, ppl: 12.578028 +epoch: 1, batch: 41552, sum loss: 3817.240723, avg loss: 2.363616, ppl: 10.629323 +epoch: 1, batch: 41553, sum loss: 3789.855225, avg loss: 2.496611, ppl: 12.141276 +epoch: 1, batch: 41554, sum loss: 6111.305664, avg loss: 2.994270, ppl: 19.970778 +epoch: 1, batch: 41555, sum loss: 5172.744629, avg loss: 2.880147, ppl: 17.816900 +epoch: 1, batch: 41556, sum loss: 4746.736328, avg loss: 2.751731, ppl: 15.669739 +epoch: 1, batch: 41557, sum loss: 4249.763672, avg loss: 2.477996, ppl: 11.917359 +epoch: 1, batch: 41558, sum loss: 4504.601562, avg loss: 2.553629, ppl: 12.853664 +epoch: 1, batch: 41559, sum loss: 4767.958984, avg loss: 2.844844, ppl: 17.198881 +epoch: 1, batch: 41560, sum loss: 4103.786621, avg loss: 2.534766, ppl: 12.613484 +epoch: 1, batch: 41561, sum loss: 4331.879883, avg loss: 2.771516, ppl: 15.982851 +epoch: 1, batch: 41562, sum loss: 3730.033936, avg loss: 2.498348, ppl: 12.162388 +epoch: 1, batch: 41563, sum loss: 4481.311523, avg loss: 2.669036, ppl: 14.426055 +epoch: 1, batch: 41564, sum loss: 5104.703613, avg loss: 2.806324, ppl: 16.548977 +epoch: 1, batch: 41565, sum loss: 4917.774414, avg loss: 2.593763, ppl: 13.380028 +epoch: 1, batch: 41566, sum loss: 4087.906494, avg loss: 2.577495, ppl: 13.164116 +epoch: 1, batch: 41567, sum loss: 4365.075195, avg loss: 2.552675, ppl: 12.841414 +epoch: 1, batch: 41568, sum loss: 4342.395996, avg loss: 2.695466, ppl: 14.812424 +epoch: 1, batch: 41569, sum loss: 4264.289062, avg loss: 2.463483, ppl: 11.745648 +epoch: 1, batch: 41570, sum loss: 5239.781738, avg loss: 2.729053, ppl: 15.318374 +epoch: 1, batch: 41571, sum loss: 4222.366211, avg loss: 2.366797, ppl: 10.663185 +epoch: 1, batch: 41572, sum loss: 4775.168945, avg loss: 2.785980, ppl: 16.215694 +epoch: 1, batch: 41573, sum loss: 4763.301758, avg loss: 2.488663, ppl: 12.045166 +epoch: 1, batch: 41574, sum loss: 4674.930664, avg loss: 2.700711, ppl: 14.890311 +epoch: 1, batch: 41575, sum loss: 4023.261719, avg loss: 2.516111, ppl: 12.380358 +epoch: 1, batch: 41576, sum loss: 4533.197266, avg loss: 2.745728, ppl: 15.575953 +epoch: 1, batch: 41577, sum loss: 3961.976074, avg loss: 2.181705, ppl: 8.861400 +epoch: 1, batch: 41578, sum loss: 4584.501953, avg loss: 2.668511, ppl: 14.418487 +epoch: 1, batch: 41579, sum loss: 3664.338867, avg loss: 2.325088, ppl: 10.227580 +epoch: 1, batch: 41580, sum loss: 4622.952637, avg loss: 2.562612, ppl: 12.969654 +epoch: 1, batch: 41581, sum loss: 4691.098633, avg loss: 2.669948, ppl: 14.439217 +epoch: 1, batch: 41582, sum loss: 4347.449707, avg loss: 2.683611, ppl: 14.637857 +epoch: 1, batch: 41583, sum loss: 4015.294434, avg loss: 2.636438, ppl: 13.963372 +epoch: 1, batch: 41584, sum loss: 4070.140137, avg loss: 2.556621, ppl: 12.892176 +epoch: 1, batch: 41585, sum loss: 4623.171875, avg loss: 2.703609, ppl: 14.933533 +epoch: 1, batch: 41586, sum loss: 3979.904785, avg loss: 2.495238, ppl: 12.124619 +epoch: 1, batch: 41587, sum loss: 4738.802734, avg loss: 2.626831, ppl: 13.829871 +epoch: 1, batch: 41588, sum loss: 3749.727051, avg loss: 2.321812, ppl: 10.194134 +epoch: 1, batch: 41589, sum loss: 4115.458984, avg loss: 2.322494, ppl: 10.201082 +epoch: 1, batch: 41590, sum loss: 4447.377930, avg loss: 2.515485, ppl: 12.372612 +epoch: 1, batch: 41591, sum loss: 5331.214355, avg loss: 2.724177, ppl: 15.243861 +epoch: 1, batch: 41592, sum loss: 3576.355225, avg loss: 2.427940, ppl: 11.335505 +epoch: 1, batch: 41593, sum loss: 4284.238281, avg loss: 2.462206, ppl: 11.730659 +epoch: 1, batch: 41594, sum loss: 4284.333496, avg loss: 2.535109, ppl: 12.617801 +epoch: 1, batch: 41595, sum loss: 4514.046387, avg loss: 2.455956, ppl: 11.657567 +epoch: 1, batch: 41596, sum loss: 5276.186035, avg loss: 2.807976, ppl: 16.576326 +epoch: 1, batch: 41597, sum loss: 3716.658203, avg loss: 2.384001, ppl: 10.848226 +epoch: 1, batch: 41598, sum loss: 4171.030273, avg loss: 2.697950, ppl: 14.849255 +epoch: 1, batch: 41599, sum loss: 4864.565918, avg loss: 2.597206, ppl: 13.426168 +epoch: 1, batch: 41600, sum loss: 4011.361084, avg loss: 2.568093, ppl: 13.040930 +epoch: 1, batch: 41601, sum loss: 5103.513184, avg loss: 2.589301, ppl: 13.320462 +epoch: 1, batch: 41602, sum loss: 4863.443848, avg loss: 2.796690, ppl: 16.390305 +epoch: 1, batch: 41603, sum loss: 4236.875488, avg loss: 2.418308, ppl: 11.226848 +epoch: 1, batch: 41604, sum loss: 5003.686523, avg loss: 2.838166, ppl: 17.084404 +epoch: 1, batch: 41605, sum loss: 4659.994141, avg loss: 2.598993, ppl: 13.450185 +epoch: 1, batch: 41606, sum loss: 4921.139648, avg loss: 2.677443, ppl: 14.547840 +epoch: 1, batch: 41607, sum loss: 4437.634277, avg loss: 2.787459, ppl: 16.239697 +epoch: 1, batch: 41608, sum loss: 4225.375977, avg loss: 2.560834, ppl: 12.946609 +epoch: 1, batch: 41609, sum loss: 3581.439453, avg loss: 2.365548, ppl: 10.649872 +epoch: 1, batch: 41610, sum loss: 4426.062988, avg loss: 2.492153, ppl: 12.087268 +epoch: 1, batch: 41611, sum loss: 5354.989258, avg loss: 2.815452, ppl: 16.700720 +epoch: 1, batch: 41612, sum loss: 4218.751953, avg loss: 2.665036, ppl: 14.368466 +epoch: 1, batch: 41613, sum loss: 4981.995117, avg loss: 2.671311, ppl: 14.458915 +epoch: 1, batch: 41614, sum loss: 4302.130371, avg loss: 2.658919, ppl: 14.280838 +epoch: 1, batch: 41615, sum loss: 5467.620605, avg loss: 2.922299, ppl: 18.583958 +epoch: 1, batch: 41616, sum loss: 5203.081055, avg loss: 2.918161, ppl: 18.507225 +epoch: 1, batch: 41617, sum loss: 4883.500977, avg loss: 2.740461, ppl: 15.494124 +epoch: 1, batch: 41618, sum loss: 5054.734375, avg loss: 2.720524, ppl: 15.188284 +epoch: 1, batch: 41619, sum loss: 3654.909668, avg loss: 2.367170, ppl: 10.667157 +epoch: 1, batch: 41620, sum loss: 3457.039307, avg loss: 2.425992, ppl: 11.313453 +epoch: 1, batch: 41621, sum loss: 4676.102539, avg loss: 2.737765, ppl: 15.452408 +epoch: 1, batch: 41622, sum loss: 3392.792969, avg loss: 2.404531, ppl: 11.073233 +epoch: 1, batch: 41623, sum loss: 5159.550781, avg loss: 2.828701, ppl: 16.923464 +epoch: 1, batch: 41624, sum loss: 4187.694824, avg loss: 2.541077, ppl: 12.693333 +epoch: 1, batch: 41625, sum loss: 3265.453613, avg loss: 2.293156, ppl: 9.906149 +epoch: 1, batch: 41626, sum loss: 4370.332031, avg loss: 2.533526, ppl: 12.597847 +epoch: 1, batch: 41627, sum loss: 4410.479980, avg loss: 2.573209, ppl: 13.107818 +epoch: 1, batch: 41628, sum loss: 4057.687012, avg loss: 2.692559, ppl: 14.769429 +epoch: 1, batch: 41629, sum loss: 4386.402832, avg loss: 2.505085, ppl: 12.244594 +epoch: 1, batch: 41630, sum loss: 3993.173584, avg loss: 2.581237, ppl: 13.213474 +epoch: 1, batch: 41631, sum loss: 3962.342285, avg loss: 2.679068, ppl: 14.571511 +epoch: 1, batch: 41632, sum loss: 4431.320312, avg loss: 2.740458, ppl: 15.494080 +epoch: 1, batch: 41633, sum loss: 3916.506104, avg loss: 2.593713, ppl: 13.379354 +epoch: 1, batch: 41634, sum loss: 4032.068359, avg loss: 2.423118, ppl: 11.280980 +epoch: 1, batch: 41635, sum loss: 4646.474609, avg loss: 2.875294, ppl: 17.730631 +epoch: 1, batch: 41636, sum loss: 4952.135254, avg loss: 2.746608, ppl: 15.589669 +epoch: 1, batch: 41637, sum loss: 4429.130371, avg loss: 2.647418, ppl: 14.117540 +epoch: 1, batch: 41638, sum loss: 4168.379883, avg loss: 2.512586, ppl: 12.336790 +epoch: 1, batch: 41639, sum loss: 4152.891602, avg loss: 2.608600, ppl: 13.580028 +epoch: 1, batch: 41640, sum loss: 4274.401855, avg loss: 2.636892, ppl: 13.969716 +epoch: 1, batch: 41641, sum loss: 5160.210938, avg loss: 2.777293, ppl: 16.075449 +epoch: 1, batch: 41642, sum loss: 4519.768555, avg loss: 2.764384, ppl: 15.869266 +epoch: 1, batch: 41643, sum loss: 4135.794434, avg loss: 2.838568, ppl: 17.091282 +epoch: 1, batch: 41644, sum loss: 4002.116211, avg loss: 2.377966, ppl: 10.782945 +epoch: 1, batch: 41645, sum loss: 4599.621094, avg loss: 2.666447, ppl: 14.388758 +epoch: 1, batch: 41646, sum loss: 3933.258057, avg loss: 2.408609, ppl: 11.118481 +epoch: 1, batch: 41647, sum loss: 3508.940918, avg loss: 2.436764, ppl: 11.435980 +epoch: 1, batch: 41648, sum loss: 5007.372070, avg loss: 2.575809, ppl: 13.141945 +epoch: 1, batch: 41649, sum loss: 4938.508789, avg loss: 2.628264, ppl: 13.849711 +epoch: 1, batch: 41650, sum loss: 3843.816162, avg loss: 2.443621, ppl: 11.514662 +epoch: 1, batch: 41651, sum loss: 5054.154785, avg loss: 2.844206, ppl: 17.187912 +epoch: 1, batch: 41652, sum loss: 4296.034668, avg loss: 2.561738, ppl: 12.958320 +epoch: 1, batch: 41653, sum loss: 4757.490234, avg loss: 2.466299, ppl: 11.778768 +epoch: 1, batch: 41654, sum loss: 4134.534180, avg loss: 2.333259, ppl: 10.311488 +epoch: 1, batch: 41655, sum loss: 4478.842773, avg loss: 2.469042, ppl: 11.811130 +epoch: 1, batch: 41656, sum loss: 3811.649658, avg loss: 2.452799, ppl: 11.620829 +epoch: 1, batch: 41657, sum loss: 4428.371094, avg loss: 2.606457, ppl: 13.550961 +epoch: 1, batch: 41658, sum loss: 3956.600586, avg loss: 2.414033, ppl: 11.178957 +epoch: 1, batch: 41659, sum loss: 4445.385254, avg loss: 2.727230, ppl: 15.290479 +epoch: 1, batch: 41660, sum loss: 3642.573730, avg loss: 2.398008, ppl: 11.001236 +epoch: 1, batch: 41661, sum loss: 5097.195801, avg loss: 2.797583, ppl: 16.404945 +epoch: 1, batch: 41662, sum loss: 5076.762695, avg loss: 2.774187, ppl: 16.025599 +epoch: 1, batch: 41663, sum loss: 4260.372559, avg loss: 2.698146, ppl: 14.852168 +epoch: 1, batch: 41664, sum loss: 4494.168457, avg loss: 2.543389, ppl: 12.722716 +epoch: 1, batch: 41665, sum loss: 4389.083008, avg loss: 2.686097, ppl: 14.674295 +epoch: 1, batch: 41666, sum loss: 4919.831543, avg loss: 2.724159, ppl: 15.243592 +epoch: 1, batch: 41667, sum loss: 5397.867676, avg loss: 2.848479, ppl: 17.261507 +epoch: 1, batch: 41668, sum loss: 4074.105469, avg loss: 2.443974, ppl: 11.518731 +epoch: 1, batch: 41669, sum loss: 4203.267578, avg loss: 2.432446, ppl: 11.386705 +epoch: 1, batch: 41670, sum loss: 4286.687500, avg loss: 2.339895, ppl: 10.380147 +epoch: 1, batch: 41671, sum loss: 5404.277832, avg loss: 2.810337, ppl: 16.615515 +epoch: 1, batch: 41672, sum loss: 5194.122070, avg loss: 2.705272, ppl: 14.958385 +epoch: 1, batch: 41673, sum loss: 4329.576660, avg loss: 2.474044, ppl: 11.870352 +epoch: 1, batch: 41674, sum loss: 4502.619141, avg loss: 2.296083, ppl: 9.935192 +epoch: 1, batch: 41675, sum loss: 4330.125000, avg loss: 2.544139, ppl: 12.732263 +epoch: 1, batch: 41676, sum loss: 3474.786133, avg loss: 2.277055, ppl: 9.747933 +epoch: 1, batch: 41677, sum loss: 4564.906250, avg loss: 2.632587, ppl: 13.909711 +epoch: 1, batch: 41678, sum loss: 3996.382812, avg loss: 2.526158, ppl: 12.505372 +epoch: 1, batch: 41679, sum loss: 3356.466309, avg loss: 2.258726, ppl: 9.570885 +epoch: 1, batch: 41680, sum loss: 5798.045898, avg loss: 2.815952, ppl: 16.709080 +epoch: 1, batch: 41681, sum loss: 4079.839111, avg loss: 2.598624, ppl: 13.445221 +epoch: 1, batch: 41682, sum loss: 4310.959961, avg loss: 2.590721, ppl: 13.339388 +epoch: 1, batch: 41683, sum loss: 4393.475098, avg loss: 2.593551, ppl: 13.377186 +epoch: 1, batch: 41684, sum loss: 3171.893555, avg loss: 2.267258, ppl: 9.652893 +epoch: 1, batch: 41685, sum loss: 4030.143555, avg loss: 2.458904, ppl: 11.691988 +epoch: 1, batch: 41686, sum loss: 5015.171875, avg loss: 2.820682, ppl: 16.788294 +epoch: 1, batch: 41687, sum loss: 4911.141602, avg loss: 2.773089, ppl: 16.008013 +epoch: 1, batch: 41688, sum loss: 3831.687256, avg loss: 2.499470, ppl: 12.176039 +epoch: 1, batch: 41689, sum loss: 5135.626953, avg loss: 2.711524, ppl: 15.052201 +epoch: 1, batch: 41690, sum loss: 5325.585938, avg loss: 2.669466, ppl: 14.432267 +epoch: 1, batch: 41691, sum loss: 4951.168945, avg loss: 2.698185, ppl: 14.852745 +epoch: 1, batch: 41692, sum loss: 4307.468750, avg loss: 2.424012, ppl: 11.291065 +epoch: 1, batch: 41693, sum loss: 3696.691650, avg loss: 2.347106, ppl: 10.455266 +epoch: 1, batch: 41694, sum loss: 4725.708008, avg loss: 2.749103, ppl: 15.628608 +epoch: 1, batch: 41695, sum loss: 5280.853027, avg loss: 2.801514, ppl: 16.469557 +epoch: 1, batch: 41696, sum loss: 4647.346191, avg loss: 2.648060, ppl: 14.126611 +epoch: 1, batch: 41697, sum loss: 4585.020020, avg loss: 2.661068, ppl: 14.311565 +epoch: 1, batch: 41698, sum loss: 3917.188477, avg loss: 2.519092, ppl: 12.417320 +epoch: 1, batch: 41699, sum loss: 4280.447754, avg loss: 2.575480, ppl: 13.137622 +epoch: 1, batch: 41700, sum loss: 5119.591309, avg loss: 2.730449, ppl: 15.339768 +epoch: 1, batch: 41701, sum loss: 4354.049805, avg loss: 2.650061, ppl: 14.154897 +epoch: 1, batch: 41702, sum loss: 5072.504883, avg loss: 2.773376, ppl: 16.012606 +epoch: 1, batch: 41703, sum loss: 3534.050537, avg loss: 2.221276, ppl: 9.219090 +epoch: 1, batch: 41704, sum loss: 3829.838135, avg loss: 2.514667, ppl: 12.362494 +epoch: 1, batch: 41705, sum loss: 5349.180664, avg loss: 2.893013, ppl: 18.047602 +epoch: 1, batch: 41706, sum loss: 4064.777344, avg loss: 2.475504, ppl: 11.887696 +epoch: 1, batch: 41707, sum loss: 4567.689941, avg loss: 2.509720, ppl: 12.301483 +epoch: 1, batch: 41708, sum loss: 4616.631836, avg loss: 2.580565, ppl: 13.204602 +epoch: 1, batch: 41709, sum loss: 4629.213379, avg loss: 2.820971, ppl: 16.793144 +epoch: 1, batch: 41710, sum loss: 4632.274902, avg loss: 2.683821, ppl: 14.640929 +epoch: 1, batch: 41711, sum loss: 4617.423828, avg loss: 2.637021, ppl: 13.971521 +epoch: 1, batch: 41712, sum loss: 4630.166992, avg loss: 2.510936, ppl: 12.316459 +epoch: 1, batch: 41713, sum loss: 2933.355713, avg loss: 2.054171, ppl: 7.800365 +epoch: 1, batch: 41714, sum loss: 4449.437500, avg loss: 2.661147, ppl: 14.312695 +epoch: 1, batch: 41715, sum loss: 4126.038086, avg loss: 2.509756, ppl: 12.301923 +epoch: 1, batch: 41716, sum loss: 4027.968262, avg loss: 2.550961, ppl: 12.819424 +epoch: 1, batch: 41717, sum loss: 4528.807617, avg loss: 2.646878, ppl: 14.109916 +epoch: 1, batch: 41718, sum loss: 4351.661133, avg loss: 2.461347, ppl: 11.720587 +epoch: 1, batch: 41719, sum loss: 4018.952881, avg loss: 2.437206, ppl: 11.441031 +epoch: 1, batch: 41720, sum loss: 3436.621338, avg loss: 2.240301, ppl: 9.396156 +epoch: 1, batch: 41721, sum loss: 4605.138672, avg loss: 2.564108, ppl: 12.989072 +epoch: 1, batch: 41722, sum loss: 4023.652832, avg loss: 2.645400, ppl: 14.089077 +epoch: 1, batch: 41723, sum loss: 4322.818848, avg loss: 2.439514, ppl: 11.467468 +epoch: 1, batch: 41724, sum loss: 4139.858398, avg loss: 2.582569, ppl: 13.231087 +epoch: 1, batch: 41725, sum loss: 5071.128906, avg loss: 2.697409, ppl: 14.841227 +epoch: 1, batch: 41726, sum loss: 4519.291016, avg loss: 2.576563, ppl: 13.151853 +epoch: 1, batch: 41727, sum loss: 4661.972656, avg loss: 2.493034, ppl: 12.097921 +epoch: 1, batch: 41728, sum loss: 4328.346680, avg loss: 2.658689, ppl: 14.277566 +epoch: 1, batch: 41729, sum loss: 4899.363281, avg loss: 2.790070, ppl: 16.282164 +epoch: 1, batch: 41730, sum loss: 4475.710938, avg loss: 2.688115, ppl: 14.703928 +epoch: 1, batch: 41731, sum loss: 3666.521973, avg loss: 2.454165, ppl: 11.636707 +epoch: 1, batch: 41732, sum loss: 4444.185059, avg loss: 2.515102, ppl: 12.367869 +epoch: 1, batch: 41733, sum loss: 4991.520020, avg loss: 2.886940, ppl: 17.938343 +epoch: 1, batch: 41734, sum loss: 4652.469727, avg loss: 2.660074, ppl: 14.297351 +epoch: 1, batch: 41735, sum loss: 3934.036133, avg loss: 2.627947, ppl: 13.845311 +epoch: 1, batch: 41736, sum loss: 4503.971191, avg loss: 2.578117, ppl: 13.172317 +epoch: 1, batch: 41737, sum loss: 4885.944336, avg loss: 2.761981, ppl: 15.831174 +epoch: 1, batch: 41738, sum loss: 4304.683105, avg loss: 2.618420, ppl: 13.714044 +epoch: 1, batch: 41739, sum loss: 5296.837402, avg loss: 2.771762, ppl: 15.986780 +epoch: 1, batch: 41740, sum loss: 3484.859131, avg loss: 2.251201, ppl: 9.499139 +epoch: 1, batch: 41741, sum loss: 4127.994141, avg loss: 2.534066, ppl: 12.604658 +epoch: 1, batch: 41742, sum loss: 5474.705078, avg loss: 2.755262, ppl: 15.725155 +epoch: 1, batch: 41743, sum loss: 4098.485352, avg loss: 2.553573, ppl: 12.852950 +epoch: 1, batch: 41744, sum loss: 4021.076172, avg loss: 2.426721, ppl: 11.321693 +epoch: 1, batch: 41745, sum loss: 4519.418457, avg loss: 2.712736, ppl: 15.070454 +epoch: 1, batch: 41746, sum loss: 3994.006348, avg loss: 2.504079, ppl: 12.232292 +epoch: 1, batch: 41747, sum loss: 3789.603516, avg loss: 2.266510, ppl: 9.645674 +epoch: 1, batch: 41748, sum loss: 4913.833984, avg loss: 2.707347, ppl: 14.989450 +epoch: 1, batch: 41749, sum loss: 4678.054199, avg loss: 2.449243, ppl: 11.579578 +epoch: 1, batch: 41750, sum loss: 4787.495117, avg loss: 2.889255, ppl: 17.979902 +epoch: 1, batch: 41751, sum loss: 4815.042480, avg loss: 2.724982, ppl: 15.256132 +epoch: 1, batch: 41752, sum loss: 3464.802490, avg loss: 2.508909, ppl: 12.291507 +epoch: 1, batch: 41753, sum loss: 5579.879395, avg loss: 2.799739, ppl: 16.440353 +epoch: 1, batch: 41754, sum loss: 3351.829346, avg loss: 2.348864, ppl: 10.473668 +epoch: 1, batch: 41755, sum loss: 4584.307617, avg loss: 2.740172, ppl: 15.489648 +epoch: 1, batch: 41756, sum loss: 4113.737305, avg loss: 2.526866, ppl: 12.514221 +epoch: 1, batch: 41757, sum loss: 3881.585938, avg loss: 2.275255, ppl: 9.730404 +epoch: 1, batch: 41758, sum loss: 4138.407715, avg loss: 2.493017, ppl: 12.097716 +epoch: 1, batch: 41759, sum loss: 3307.867432, avg loss: 2.170517, ppl: 8.762811 +epoch: 1, batch: 41760, sum loss: 4157.689453, avg loss: 2.742539, ppl: 15.526362 +epoch: 1, batch: 41761, sum loss: 4628.828613, avg loss: 2.613681, ppl: 13.649196 +epoch: 1, batch: 41762, sum loss: 6388.489258, avg loss: 3.061087, ppl: 21.350761 +epoch: 1, batch: 41763, sum loss: 3629.877930, avg loss: 2.381810, ppl: 10.824474 +epoch: 1, batch: 41764, sum loss: 5071.020996, avg loss: 2.745545, ppl: 15.573097 +epoch: 1, batch: 41765, sum loss: 4084.349121, avg loss: 2.541599, ppl: 12.699960 +epoch: 1, batch: 41766, sum loss: 4369.198242, avg loss: 2.690393, ppl: 14.737466 +epoch: 1, batch: 41767, sum loss: 4144.760742, avg loss: 2.596968, ppl: 13.422976 +epoch: 1, batch: 41768, sum loss: 4320.174316, avg loss: 2.454644, ppl: 11.642293 +epoch: 1, batch: 41769, sum loss: 4566.271484, avg loss: 2.559569, ppl: 12.930248 +epoch: 1, batch: 41770, sum loss: 4776.638672, avg loss: 2.512698, ppl: 12.338173 +epoch: 1, batch: 41771, sum loss: 4585.278320, avg loss: 2.482555, ppl: 11.971809 +epoch: 1, batch: 41772, sum loss: 4254.326660, avg loss: 2.606818, ppl: 13.555847 +epoch: 1, batch: 41773, sum loss: 4435.862305, avg loss: 2.503308, ppl: 12.222864 +epoch: 1, batch: 41774, sum loss: 4331.468262, avg loss: 2.524166, ppl: 12.480477 +epoch: 1, batch: 41775, sum loss: 4344.088867, avg loss: 2.357075, ppl: 10.560016 +epoch: 1, batch: 41776, sum loss: 4899.170410, avg loss: 2.744633, ppl: 15.558909 +epoch: 1, batch: 41777, sum loss: 3870.559570, avg loss: 2.583818, ppl: 13.247621 +epoch: 1, batch: 41778, sum loss: 4549.096680, avg loss: 2.417161, ppl: 11.213975 +epoch: 1, batch: 41779, sum loss: 4249.955078, avg loss: 2.585131, ppl: 13.265022 +epoch: 1, batch: 41780, sum loss: 4509.273926, avg loss: 2.711530, ppl: 15.052284 +epoch: 1, batch: 41781, sum loss: 4109.683594, avg loss: 2.453543, ppl: 11.629474 +epoch: 1, batch: 41782, sum loss: 3294.543945, avg loss: 2.458615, ppl: 11.688609 +epoch: 1, batch: 41783, sum loss: 4684.120605, avg loss: 2.492880, ppl: 12.096058 +epoch: 1, batch: 41784, sum loss: 3766.391113, avg loss: 2.484427, ppl: 11.994246 +epoch: 1, batch: 41785, sum loss: 3486.177246, avg loss: 2.316397, ppl: 10.139077 +epoch: 1, batch: 41786, sum loss: 4068.956055, avg loss: 2.517918, ppl: 12.402752 +epoch: 1, batch: 41787, sum loss: 4421.535156, avg loss: 2.414820, ppl: 11.187756 +epoch: 1, batch: 41788, sum loss: 5263.664062, avg loss: 2.839085, ppl: 17.100113 +epoch: 1, batch: 41789, sum loss: 3885.770020, avg loss: 2.385371, ppl: 10.863097 +epoch: 1, batch: 41790, sum loss: 4903.331055, avg loss: 2.751589, ppl: 15.667505 +epoch: 1, batch: 41791, sum loss: 3644.354248, avg loss: 2.306553, ppl: 10.039762 +epoch: 1, batch: 41792, sum loss: 5381.098633, avg loss: 2.815855, ppl: 16.707451 +epoch: 1, batch: 41793, sum loss: 4023.949219, avg loss: 2.582766, ppl: 13.233693 +epoch: 1, batch: 41794, sum loss: 4243.122070, avg loss: 2.393188, ppl: 10.948341 +epoch: 1, batch: 41795, sum loss: 4554.020020, avg loss: 2.599327, ppl: 13.454675 +epoch: 1, batch: 41796, sum loss: 5002.168457, avg loss: 2.814951, ppl: 16.692360 +epoch: 1, batch: 41797, sum loss: 4124.590820, avg loss: 2.514994, ppl: 12.366539 +epoch: 1, batch: 41798, sum loss: 5416.098145, avg loss: 2.882437, ppl: 17.857731 +epoch: 1, batch: 41799, sum loss: 4020.816162, avg loss: 2.404794, ppl: 11.076154 +epoch: 1, batch: 41800, sum loss: 4312.528320, avg loss: 2.526379, ppl: 12.508127 +epoch: 1, batch: 41801, sum loss: 4376.250488, avg loss: 2.476656, ppl: 11.901396 +epoch: 1, batch: 41802, sum loss: 4124.676270, avg loss: 2.439194, ppl: 11.463794 +epoch: 1, batch: 41803, sum loss: 6231.278320, avg loss: 2.903671, ppl: 18.240990 +epoch: 1, batch: 41804, sum loss: 4084.983398, avg loss: 2.390277, ppl: 10.916519 +epoch: 1, batch: 41805, sum loss: 4244.535156, avg loss: 2.547740, ppl: 12.778195 +epoch: 1, batch: 41806, sum loss: 4416.989258, avg loss: 2.719821, ppl: 15.177605 +epoch: 1, batch: 41807, sum loss: 4322.362793, avg loss: 2.574367, ppl: 13.123012 +epoch: 1, batch: 41808, sum loss: 3430.934082, avg loss: 2.387567, ppl: 10.886977 +epoch: 1, batch: 41809, sum loss: 3814.717041, avg loss: 2.257229, ppl: 9.556572 +epoch: 1, batch: 41810, sum loss: 4703.960938, avg loss: 2.516833, ppl: 12.389296 +epoch: 1, batch: 41811, sum loss: 4369.931641, avg loss: 2.671108, ppl: 14.455971 +epoch: 1, batch: 41812, sum loss: 3578.026611, avg loss: 2.393329, ppl: 10.949882 +epoch: 1, batch: 41813, sum loss: 4724.956543, avg loss: 2.698433, ppl: 14.856440 +epoch: 1, batch: 41814, sum loss: 4852.604492, avg loss: 2.641592, ppl: 14.035531 +epoch: 1, batch: 41815, sum loss: 3833.814941, avg loss: 2.647662, ppl: 14.120991 +epoch: 1, batch: 41816, sum loss: 3663.114258, avg loss: 2.430733, ppl: 11.367208 +epoch: 1, batch: 41817, sum loss: 4995.256836, avg loss: 2.809481, ppl: 16.601299 +epoch: 1, batch: 41818, sum loss: 3687.666992, avg loss: 2.351828, ppl: 10.504759 +epoch: 1, batch: 41819, sum loss: 4450.754883, avg loss: 2.641397, ppl: 14.032801 +epoch: 1, batch: 41820, sum loss: 5300.090332, avg loss: 2.947770, ppl: 19.063393 +epoch: 1, batch: 41821, sum loss: 4523.248047, avg loss: 2.500414, ppl: 12.187534 +epoch: 1, batch: 41822, sum loss: 4415.141113, avg loss: 2.502914, ppl: 12.218051 +epoch: 1, batch: 41823, sum loss: 4297.198242, avg loss: 2.659157, ppl: 14.284246 +epoch: 1, batch: 41824, sum loss: 4514.687012, avg loss: 2.471093, ppl: 11.835378 +epoch: 1, batch: 41825, sum loss: 5303.986328, avg loss: 2.812294, ppl: 16.648066 +epoch: 1, batch: 41826, sum loss: 5343.127930, avg loss: 2.801850, ppl: 16.475100 +epoch: 1, batch: 41827, sum loss: 4948.236816, avg loss: 2.525899, ppl: 12.502134 +epoch: 1, batch: 41828, sum loss: 4272.305664, avg loss: 2.513121, ppl: 12.343395 +epoch: 1, batch: 41829, sum loss: 4344.210449, avg loss: 2.431008, ppl: 11.370334 +epoch: 1, batch: 41830, sum loss: 4903.271973, avg loss: 2.844126, ppl: 17.186523 +epoch: 1, batch: 41831, sum loss: 4631.280273, avg loss: 2.689477, ppl: 14.723980 +epoch: 1, batch: 41832, sum loss: 4502.277832, avg loss: 2.700826, ppl: 14.892034 +epoch: 1, batch: 41833, sum loss: 5083.925293, avg loss: 2.740660, ppl: 15.497216 +epoch: 1, batch: 41834, sum loss: 5299.487793, avg loss: 2.669767, ppl: 14.436607 +epoch: 1, batch: 41835, sum loss: 3793.537598, avg loss: 2.525657, ppl: 12.499109 +epoch: 1, batch: 41836, sum loss: 3639.045166, avg loss: 2.472178, ppl: 11.848218 +epoch: 1, batch: 41837, sum loss: 4644.242188, avg loss: 2.784318, ppl: 16.188772 +epoch: 1, batch: 41838, sum loss: 4357.123047, avg loss: 2.610619, ppl: 13.607472 +epoch: 1, batch: 41839, sum loss: 4225.094727, avg loss: 2.500056, ppl: 12.183177 +epoch: 1, batch: 41840, sum loss: 4583.476074, avg loss: 2.629648, ppl: 13.868883 +epoch: 1, batch: 41841, sum loss: 3967.314941, avg loss: 2.623886, ppl: 13.789200 +epoch: 1, batch: 41842, sum loss: 4688.962891, avg loss: 2.934270, ppl: 18.807762 +epoch: 1, batch: 41843, sum loss: 4684.442871, avg loss: 2.715619, ppl: 15.113964 +epoch: 1, batch: 41844, sum loss: 4840.000488, avg loss: 2.581334, ppl: 13.214750 +epoch: 1, batch: 41845, sum loss: 3776.848633, avg loss: 2.754813, ppl: 15.718101 +epoch: 1, batch: 41846, sum loss: 4492.319336, avg loss: 2.624018, ppl: 13.791027 +epoch: 1, batch: 41847, sum loss: 4284.897461, avg loss: 2.542966, ppl: 12.717333 +epoch: 1, batch: 41848, sum loss: 3354.456543, avg loss: 2.344135, ppl: 10.424248 +epoch: 1, batch: 41849, sum loss: 5331.203125, avg loss: 2.798532, ppl: 16.420521 +epoch: 1, batch: 41850, sum loss: 3576.652344, avg loss: 2.456492, ppl: 11.663822 +epoch: 1, batch: 41851, sum loss: 4237.093262, avg loss: 2.538702, ppl: 12.663220 +epoch: 1, batch: 41852, sum loss: 3988.203613, avg loss: 2.361281, ppl: 10.604528 +epoch: 1, batch: 41853, sum loss: 3839.247559, avg loss: 2.242551, ppl: 9.417325 +epoch: 1, batch: 41854, sum loss: 4811.175781, avg loss: 2.875777, ppl: 17.739210 +epoch: 1, batch: 41855, sum loss: 4509.395020, avg loss: 2.582700, ppl: 13.232825 +epoch: 1, batch: 41856, sum loss: 3671.142822, avg loss: 2.159496, ppl: 8.666767 +epoch: 1, batch: 41857, sum loss: 5055.140137, avg loss: 2.588398, ppl: 13.308428 +epoch: 1, batch: 41858, sum loss: 5220.095703, avg loss: 2.645766, ppl: 14.094234 +epoch: 1, batch: 41859, sum loss: 3662.384766, avg loss: 2.539795, ppl: 12.677077 +epoch: 1, batch: 41860, sum loss: 4498.140137, avg loss: 2.570366, ppl: 13.070603 +epoch: 1, batch: 41861, sum loss: 4520.646484, avg loss: 2.599567, ppl: 13.457906 +epoch: 1, batch: 41862, sum loss: 4671.625000, avg loss: 2.566827, ppl: 13.024433 +epoch: 1, batch: 41863, sum loss: 4735.829102, avg loss: 2.650156, ppl: 14.156251 +epoch: 1, batch: 41864, sum loss: 4660.169434, avg loss: 2.755866, ppl: 15.734662 +epoch: 1, batch: 41865, sum loss: 3044.880371, avg loss: 2.089829, ppl: 8.083530 +epoch: 1, batch: 41866, sum loss: 5176.763672, avg loss: 2.642554, ppl: 14.049043 +epoch: 1, batch: 41867, sum loss: 3937.981445, avg loss: 2.319188, ppl: 10.167417 +epoch: 1, batch: 41868, sum loss: 4947.468262, avg loss: 2.562128, ppl: 12.963368 +epoch: 1, batch: 41869, sum loss: 4436.362305, avg loss: 2.617323, ppl: 13.699001 +epoch: 1, batch: 41870, sum loss: 4416.036621, avg loss: 2.504842, ppl: 12.241625 +epoch: 1, batch: 41871, sum loss: 4275.987305, avg loss: 2.660851, ppl: 14.308460 +epoch: 1, batch: 41872, sum loss: 4157.966797, avg loss: 2.582588, ppl: 13.231336 +epoch: 1, batch: 41873, sum loss: 4515.798340, avg loss: 2.608780, ppl: 13.582469 +epoch: 1, batch: 41874, sum loss: 3968.008545, avg loss: 2.480005, ppl: 11.941327 +epoch: 1, batch: 41875, sum loss: 4141.919922, avg loss: 2.643216, ppl: 14.058348 +epoch: 1, batch: 41876, sum loss: 4330.162598, avg loss: 2.566783, ppl: 13.023859 +epoch: 1, batch: 41877, sum loss: 4389.069824, avg loss: 2.322259, ppl: 10.198689 +epoch: 1, batch: 41878, sum loss: 4489.906250, avg loss: 2.695022, ppl: 14.805839 +epoch: 1, batch: 41879, sum loss: 4514.546387, avg loss: 2.927722, ppl: 18.685009 +epoch: 1, batch: 41880, sum loss: 4215.852539, avg loss: 2.629977, ppl: 13.873444 +epoch: 1, batch: 41881, sum loss: 3729.177246, avg loss: 2.434189, ppl: 11.406563 +epoch: 1, batch: 41882, sum loss: 4272.005859, avg loss: 2.562691, ppl: 12.970675 +epoch: 1, batch: 41883, sum loss: 4050.370361, avg loss: 2.673512, ppl: 14.490771 +epoch: 1, batch: 41884, sum loss: 4171.040039, avg loss: 2.532507, ppl: 12.585023 +epoch: 1, batch: 41885, sum loss: 4063.829102, avg loss: 2.514746, ppl: 12.363464 +epoch: 1, batch: 41886, sum loss: 4152.282227, avg loss: 2.461341, ppl: 11.720520 +epoch: 1, batch: 41887, sum loss: 4073.668945, avg loss: 2.611326, ppl: 13.617098 +epoch: 1, batch: 41888, sum loss: 4722.686035, avg loss: 2.850142, ppl: 17.290245 +epoch: 1, batch: 41889, sum loss: 3581.868164, avg loss: 2.208303, ppl: 9.100265 +epoch: 1, batch: 41890, sum loss: 4152.043945, avg loss: 2.608068, ppl: 13.572800 +epoch: 1, batch: 41891, sum loss: 5026.933105, avg loss: 2.819368, ppl: 16.766253 +epoch: 1, batch: 41892, sum loss: 4993.645508, avg loss: 2.733249, ppl: 15.382780 +epoch: 1, batch: 41893, sum loss: 4125.965820, avg loss: 2.528165, ppl: 12.530496 +epoch: 1, batch: 41894, sum loss: 3789.375000, avg loss: 2.424424, ppl: 11.295723 +epoch: 1, batch: 41895, sum loss: 4159.872559, avg loss: 2.528798, ppl: 12.538424 +epoch: 1, batch: 41896, sum loss: 4380.327148, avg loss: 2.607338, ppl: 13.562895 +epoch: 1, batch: 41897, sum loss: 3175.737793, avg loss: 2.302928, ppl: 10.003429 +epoch: 1, batch: 41898, sum loss: 4133.643555, avg loss: 2.644686, ppl: 14.079017 +epoch: 1, batch: 41899, sum loss: 2595.982422, avg loss: 2.091847, ppl: 8.099863 +epoch: 1, batch: 41900, sum loss: 3319.148682, avg loss: 2.476977, ppl: 11.905216 +epoch: 1, batch: 41901, sum loss: 4016.313721, avg loss: 2.499262, ppl: 12.173505 +epoch: 1, batch: 41902, sum loss: 4280.298828, avg loss: 2.793929, ppl: 16.345112 +epoch: 1, batch: 41903, sum loss: 4623.796875, avg loss: 2.660413, ppl: 14.302188 +epoch: 1, batch: 41904, sum loss: 3540.768555, avg loss: 2.329453, ppl: 10.272321 +epoch: 1, batch: 41905, sum loss: 5210.256836, avg loss: 2.778804, ppl: 16.099747 +epoch: 1, batch: 41906, sum loss: 4165.768555, avg loss: 2.481101, ppl: 11.954419 +epoch: 1, batch: 41907, sum loss: 3917.093506, avg loss: 2.381212, ppl: 10.818004 +epoch: 1, batch: 41908, sum loss: 3935.860352, avg loss: 2.469172, ppl: 11.812662 +epoch: 1, batch: 41909, sum loss: 5184.854492, avg loss: 2.686453, ppl: 14.679517 +epoch: 1, batch: 41910, sum loss: 4458.887207, avg loss: 2.652521, ppl: 14.189761 +epoch: 1, batch: 41911, sum loss: 4218.354980, avg loss: 2.576882, ppl: 13.156049 +epoch: 1, batch: 41912, sum loss: 4143.123047, avg loss: 2.455912, ppl: 11.657059 +epoch: 1, batch: 41913, sum loss: 4090.115723, avg loss: 2.694411, ppl: 14.796798 +epoch: 1, batch: 41914, sum loss: 5193.835938, avg loss: 2.670353, ppl: 14.445063 +epoch: 1, batch: 41915, sum loss: 4563.727051, avg loss: 2.777679, ppl: 16.081656 +epoch: 1, batch: 41916, sum loss: 4080.280273, avg loss: 2.463937, ppl: 11.750988 +epoch: 1, batch: 41917, sum loss: 4760.695801, avg loss: 2.600052, ppl: 13.464443 +epoch: 1, batch: 41918, sum loss: 3841.070068, avg loss: 2.695488, ppl: 14.812741 +epoch: 1, batch: 41919, sum loss: 5468.586914, avg loss: 2.627865, ppl: 13.844179 +epoch: 1, batch: 41920, sum loss: 4238.475586, avg loss: 2.561013, ppl: 12.948921 +epoch: 1, batch: 41921, sum loss: 4365.254883, avg loss: 2.648820, ppl: 14.137342 +epoch: 1, batch: 41922, sum loss: 3850.026123, avg loss: 2.501642, ppl: 12.202508 +epoch: 1, batch: 41923, sum loss: 3909.507324, avg loss: 2.346643, ppl: 10.450428 +epoch: 1, batch: 41924, sum loss: 4936.385254, avg loss: 2.690128, ppl: 14.733566 +epoch: 1, batch: 41925, sum loss: 3921.824219, avg loss: 2.276160, ppl: 9.739212 +epoch: 1, batch: 41926, sum loss: 5242.406250, avg loss: 2.759161, ppl: 15.786600 +epoch: 1, batch: 41927, sum loss: 4888.580078, avg loss: 2.724961, ppl: 15.255819 +epoch: 1, batch: 41928, sum loss: 4313.660156, avg loss: 2.561556, ppl: 12.955962 +epoch: 1, batch: 41929, sum loss: 4758.370117, avg loss: 2.680772, ppl: 14.596354 +epoch: 1, batch: 41930, sum loss: 5261.737793, avg loss: 2.872128, ppl: 17.674585 +epoch: 1, batch: 41931, sum loss: 3507.077148, avg loss: 2.243811, ppl: 9.429201 +epoch: 1, batch: 41932, sum loss: 4063.117920, avg loss: 2.597901, ppl: 13.435512 +epoch: 1, batch: 41933, sum loss: 4310.916504, avg loss: 2.519531, ppl: 12.422763 +epoch: 1, batch: 41934, sum loss: 4685.581543, avg loss: 2.735307, ppl: 15.414478 +epoch: 1, batch: 41935, sum loss: 3720.930664, avg loss: 2.536422, ppl: 12.634378 +epoch: 1, batch: 41936, sum loss: 3802.392090, avg loss: 2.219727, ppl: 9.204816 +epoch: 1, batch: 41937, sum loss: 4228.900879, avg loss: 2.566081, ppl: 13.014714 +epoch: 1, batch: 41938, sum loss: 5095.652832, avg loss: 2.736656, ppl: 15.435279 +epoch: 1, batch: 41939, sum loss: 4332.204102, avg loss: 2.412140, ppl: 11.157818 +epoch: 1, batch: 41940, sum loss: 3545.448486, avg loss: 2.475872, ppl: 11.892073 +epoch: 1, batch: 41941, sum loss: 4174.813965, avg loss: 2.556530, ppl: 12.891011 +epoch: 1, batch: 41942, sum loss: 4799.315430, avg loss: 2.619714, ppl: 13.731792 +epoch: 1, batch: 41943, sum loss: 4473.652344, avg loss: 2.661304, ppl: 14.314947 +epoch: 1, batch: 41944, sum loss: 3951.500977, avg loss: 2.418299, ppl: 11.226748 +epoch: 1, batch: 41945, sum loss: 4599.096191, avg loss: 2.847738, ppl: 17.248713 +epoch: 1, batch: 41946, sum loss: 4322.207031, avg loss: 2.494061, ppl: 12.110351 +epoch: 1, batch: 41947, sum loss: 4085.479248, avg loss: 2.446395, ppl: 11.546645 +epoch: 1, batch: 41948, sum loss: 4172.925293, avg loss: 2.603197, ppl: 13.506855 +epoch: 1, batch: 41949, sum loss: 3628.139648, avg loss: 2.471485, ppl: 11.840013 +epoch: 1, batch: 41950, sum loss: 3559.691162, avg loss: 2.424858, ppl: 11.300620 +epoch: 1, batch: 41951, sum loss: 3501.237549, avg loss: 2.272056, ppl: 9.699319 +epoch: 1, batch: 41952, sum loss: 3781.829346, avg loss: 2.280959, ppl: 9.786057 +epoch: 1, batch: 41953, sum loss: 5063.810059, avg loss: 2.816357, ppl: 16.715841 +epoch: 1, batch: 41954, sum loss: 4434.232422, avg loss: 2.634719, ppl: 13.939397 +epoch: 1, batch: 41955, sum loss: 4055.795898, avg loss: 2.435914, ppl: 11.426252 +epoch: 1, batch: 41956, sum loss: 5599.310547, avg loss: 2.726052, ppl: 15.272472 +epoch: 1, batch: 41957, sum loss: 4378.862305, avg loss: 2.713050, ppl: 15.075183 +epoch: 1, batch: 41958, sum loss: 4553.247070, avg loss: 2.746229, ppl: 15.583750 +epoch: 1, batch: 41959, sum loss: 4528.062500, avg loss: 2.545285, ppl: 12.746863 +epoch: 1, batch: 41960, sum loss: 4175.649414, avg loss: 2.550794, ppl: 12.817275 +epoch: 1, batch: 41961, sum loss: 3975.581055, avg loss: 2.412367, ppl: 11.160348 +epoch: 1, batch: 41962, sum loss: 4107.890625, avg loss: 2.527933, ppl: 12.527580 +epoch: 1, batch: 41963, sum loss: 4083.627930, avg loss: 2.528562, ppl: 12.535471 +epoch: 1, batch: 41964, sum loss: 3715.054932, avg loss: 2.398357, ppl: 11.005079 +epoch: 1, batch: 41965, sum loss: 3491.919189, avg loss: 2.492448, ppl: 12.090834 +epoch: 1, batch: 41966, sum loss: 3593.326172, avg loss: 2.124971, ppl: 8.372656 +epoch: 1, batch: 41967, sum loss: 4656.078613, avg loss: 2.659097, ppl: 14.283384 +epoch: 1, batch: 41968, sum loss: 3792.817627, avg loss: 2.493634, ppl: 12.105189 +epoch: 1, batch: 41969, sum loss: 4710.338379, avg loss: 2.730631, ppl: 15.342566 +epoch: 1, batch: 41970, sum loss: 4326.645020, avg loss: 2.579991, ppl: 13.197021 +epoch: 1, batch: 41971, sum loss: 3991.020020, avg loss: 2.563276, ppl: 12.978259 +epoch: 1, batch: 41972, sum loss: 4277.080078, avg loss: 2.671505, ppl: 14.461724 +epoch: 1, batch: 41973, sum loss: 4016.578369, avg loss: 2.814701, ppl: 16.688187 +epoch: 1, batch: 41974, sum loss: 4703.280762, avg loss: 2.869604, ppl: 17.630033 +epoch: 1, batch: 41975, sum loss: 3337.689453, avg loss: 2.503893, ppl: 12.230015 +epoch: 1, batch: 41976, sum loss: 4216.719238, avg loss: 2.594904, ppl: 13.395304 +epoch: 1, batch: 41977, sum loss: 4628.932129, avg loss: 2.663367, ppl: 14.344510 +epoch: 1, batch: 41978, sum loss: 4072.281250, avg loss: 2.498332, ppl: 12.162190 +epoch: 1, batch: 41979, sum loss: 4192.437988, avg loss: 2.443146, ppl: 11.509189 +epoch: 1, batch: 41980, sum loss: 4231.556152, avg loss: 2.500920, ppl: 12.193705 +epoch: 1, batch: 41981, sum loss: 3764.298584, avg loss: 2.354158, ppl: 10.529259 +epoch: 1, batch: 41982, sum loss: 4647.313965, avg loss: 2.809742, ppl: 16.605640 +epoch: 1, batch: 41983, sum loss: 3678.729980, avg loss: 2.436245, ppl: 11.430039 +epoch: 1, batch: 41984, sum loss: 4169.026855, avg loss: 2.613810, ppl: 13.650963 +epoch: 1, batch: 41985, sum loss: 4275.757324, avg loss: 2.758553, ppl: 15.777001 +epoch: 1, batch: 41986, sum loss: 4548.582031, avg loss: 2.797406, ppl: 16.402044 +epoch: 1, batch: 41987, sum loss: 3706.931152, avg loss: 2.380816, ppl: 10.813728 +epoch: 1, batch: 41988, sum loss: 3680.917480, avg loss: 2.385559, ppl: 10.865135 +epoch: 1, batch: 41989, sum loss: 3987.324707, avg loss: 2.616355, ppl: 13.685744 +epoch: 1, batch: 41990, sum loss: 4186.387207, avg loss: 2.554233, ppl: 12.861426 +epoch: 1, batch: 41991, sum loss: 4127.327148, avg loss: 2.530550, ppl: 12.560412 +epoch: 1, batch: 41992, sum loss: 4051.265137, avg loss: 2.423006, ppl: 11.279711 +epoch: 1, batch: 41993, sum loss: 5045.359375, avg loss: 2.629161, ppl: 13.862130 +epoch: 1, batch: 41994, sum loss: 4630.453613, avg loss: 2.752945, ppl: 15.688766 +epoch: 1, batch: 41995, sum loss: 4308.854004, avg loss: 2.439895, ppl: 11.471832 +epoch: 1, batch: 41996, sum loss: 4733.482422, avg loss: 2.623882, ppl: 13.789144 +epoch: 1, batch: 41997, sum loss: 4403.670410, avg loss: 2.615006, ppl: 13.667301 +epoch: 1, batch: 41998, sum loss: 3977.525635, avg loss: 2.608213, ppl: 13.574777 +epoch: 1, batch: 41999, sum loss: 4428.911133, avg loss: 2.492353, ppl: 12.089689 +epoch: 1, batch: 42000, sum loss: 3107.792480, avg loss: 2.268462, ppl: 9.664522 +epoch: 1, batch: 42001, sum loss: 4483.490723, avg loss: 2.521648, ppl: 12.449100 +epoch: 1, batch: 42002, sum loss: 3929.164551, avg loss: 2.318091, ppl: 10.156269 +epoch: 1, batch: 42003, sum loss: 4710.477051, avg loss: 2.518972, ppl: 12.415823 +epoch: 1, batch: 42004, sum loss: 3861.223633, avg loss: 2.453128, ppl: 11.624653 +epoch: 1, batch: 42005, sum loss: 3248.571777, avg loss: 2.112205, ppl: 8.266451 +epoch: 1, batch: 42006, sum loss: 4798.971680, avg loss: 2.778791, ppl: 16.099545 +epoch: 1, batch: 42007, sum loss: 4160.304688, avg loss: 2.471958, ppl: 11.845614 +epoch: 1, batch: 42008, sum loss: 3031.630127, avg loss: 2.108227, ppl: 8.233628 +epoch: 1, batch: 42009, sum loss: 4767.924316, avg loss: 2.594083, ppl: 13.384306 +epoch: 1, batch: 42010, sum loss: 4516.873535, avg loss: 2.619996, ppl: 13.735673 +epoch: 1, batch: 42011, sum loss: 5049.162109, avg loss: 2.643540, ppl: 14.062903 +epoch: 1, batch: 42012, sum loss: 3948.789795, avg loss: 2.838814, ppl: 17.095474 +epoch: 1, batch: 42013, sum loss: 5091.159180, avg loss: 3.005407, ppl: 20.194431 +epoch: 1, batch: 42014, sum loss: 3587.693604, avg loss: 2.236717, ppl: 9.362541 +epoch: 1, batch: 42015, sum loss: 4544.851562, avg loss: 2.408506, ppl: 11.117344 +epoch: 1, batch: 42016, sum loss: 4496.844238, avg loss: 2.782701, ppl: 16.162609 +epoch: 1, batch: 42017, sum loss: 4449.013672, avg loss: 2.698007, ppl: 14.850107 +epoch: 1, batch: 42018, sum loss: 3954.637939, avg loss: 2.605163, ppl: 13.533436 +epoch: 1, batch: 42019, sum loss: 4996.611328, avg loss: 2.737869, ppl: 15.454021 +epoch: 1, batch: 42020, sum loss: 4367.988770, avg loss: 2.433420, ppl: 11.397796 +epoch: 1, batch: 42021, sum loss: 3664.900635, avg loss: 2.379806, ppl: 10.802802 +epoch: 1, batch: 42022, sum loss: 3600.623779, avg loss: 2.336550, ppl: 10.345486 +epoch: 1, batch: 42023, sum loss: 4237.734863, avg loss: 2.579266, ppl: 13.187459 +epoch: 1, batch: 42024, sum loss: 5897.192871, avg loss: 3.270767, ppl: 26.331528 +epoch: 1, batch: 42025, sum loss: 4419.945312, avg loss: 2.626230, ppl: 13.821565 +epoch: 1, batch: 42026, sum loss: 3690.594238, avg loss: 2.350697, ppl: 10.492881 +epoch: 1, batch: 42027, sum loss: 5123.977051, avg loss: 2.792358, ppl: 16.319458 +epoch: 1, batch: 42028, sum loss: 3558.670410, avg loss: 2.217240, ppl: 9.181954 +epoch: 1, batch: 42029, sum loss: 5627.330078, avg loss: 2.998045, ppl: 20.046303 +epoch: 1, batch: 42030, sum loss: 4425.146973, avg loss: 2.776127, ppl: 16.056719 +epoch: 1, batch: 42031, sum loss: 3837.811035, avg loss: 2.444466, ppl: 11.524390 +epoch: 1, batch: 42032, sum loss: 4119.028320, avg loss: 2.444527, ppl: 11.525099 +epoch: 1, batch: 42033, sum loss: 4075.952148, avg loss: 2.380813, ppl: 10.813692 +epoch: 1, batch: 42034, sum loss: 4205.565918, avg loss: 2.521322, ppl: 12.445045 +epoch: 1, batch: 42035, sum loss: 3994.459473, avg loss: 2.470290, ppl: 11.825881 +epoch: 1, batch: 42036, sum loss: 3982.233887, avg loss: 2.487342, ppl: 12.029256 +epoch: 1, batch: 42037, sum loss: 4551.156250, avg loss: 2.678727, ppl: 14.566533 +epoch: 1, batch: 42038, sum loss: 5623.527344, avg loss: 2.778423, ppl: 16.093615 +epoch: 1, batch: 42039, sum loss: 3620.482910, avg loss: 2.308981, ppl: 10.064168 +epoch: 1, batch: 42040, sum loss: 5199.929688, avg loss: 2.850839, ppl: 17.302286 +epoch: 1, batch: 42041, sum loss: 4559.826660, avg loss: 2.807775, ppl: 16.573002 +epoch: 1, batch: 42042, sum loss: 4260.107422, avg loss: 2.737858, ppl: 15.453852 +epoch: 1, batch: 42043, sum loss: 4329.880371, avg loss: 2.602092, ppl: 13.491927 +epoch: 1, batch: 42044, sum loss: 3746.652100, avg loss: 2.246194, ppl: 9.451697 +epoch: 1, batch: 42045, sum loss: 5313.420898, avg loss: 2.684902, ppl: 14.656764 +epoch: 1, batch: 42046, sum loss: 5379.244629, avg loss: 2.699069, ppl: 14.865886 +epoch: 1, batch: 42047, sum loss: 3545.287598, avg loss: 2.470584, ppl: 11.829350 +epoch: 1, batch: 42048, sum loss: 4231.324707, avg loss: 2.434594, ppl: 11.411186 +epoch: 1, batch: 42049, sum loss: 3829.444336, avg loss: 2.363855, ppl: 10.631855 +epoch: 1, batch: 42050, sum loss: 4903.466797, avg loss: 2.810009, ppl: 16.610075 +epoch: 1, batch: 42051, sum loss: 4381.923340, avg loss: 2.522696, ppl: 12.462152 +epoch: 1, batch: 42052, sum loss: 3716.009033, avg loss: 2.331248, ppl: 10.290774 +epoch: 1, batch: 42053, sum loss: 3969.743652, avg loss: 2.462620, ppl: 11.735518 +epoch: 1, batch: 42054, sum loss: 4216.442383, avg loss: 2.428826, ppl: 11.345558 +epoch: 1, batch: 42055, sum loss: 5079.201172, avg loss: 2.927494, ppl: 18.680750 +epoch: 1, batch: 42056, sum loss: 3738.888428, avg loss: 2.699558, ppl: 14.873164 +epoch: 1, batch: 42057, sum loss: 4980.330566, avg loss: 2.546181, ppl: 12.758289 +epoch: 1, batch: 42058, sum loss: 4291.717285, avg loss: 2.631341, ppl: 13.892390 +epoch: 1, batch: 42059, sum loss: 3543.140381, avg loss: 2.351122, ppl: 10.497337 +epoch: 1, batch: 42060, sum loss: 5585.054688, avg loss: 2.837934, ppl: 17.080450 +epoch: 1, batch: 42061, sum loss: 3862.499756, avg loss: 2.403547, ppl: 11.062343 +epoch: 1, batch: 42062, sum loss: 3820.243896, avg loss: 2.591753, ppl: 13.353159 +epoch: 1, batch: 42063, sum loss: 3594.309570, avg loss: 2.378762, ppl: 10.791534 +epoch: 1, batch: 42064, sum loss: 5484.895996, avg loss: 3.023647, ppl: 20.566166 +epoch: 1, batch: 42065, sum loss: 5305.271484, avg loss: 2.852297, ppl: 17.327530 +epoch: 1, batch: 42066, sum loss: 4931.896484, avg loss: 2.715802, ppl: 15.116728 +epoch: 1, batch: 42067, sum loss: 4384.911621, avg loss: 2.565776, ppl: 13.010756 +epoch: 1, batch: 42068, sum loss: 4493.283203, avg loss: 2.342692, ppl: 10.409220 +epoch: 1, batch: 42069, sum loss: 4707.780762, avg loss: 2.575372, ppl: 13.136209 +epoch: 1, batch: 42070, sum loss: 4887.270996, avg loss: 2.579035, ppl: 13.184406 +epoch: 1, batch: 42071, sum loss: 4535.468262, avg loss: 2.412483, ppl: 11.161641 +epoch: 1, batch: 42072, sum loss: 3561.581787, avg loss: 2.271417, ppl: 9.693126 +epoch: 1, batch: 42073, sum loss: 4291.485840, avg loss: 2.610393, ppl: 13.604394 +epoch: 1, batch: 42074, sum loss: 4970.008301, avg loss: 2.487492, ppl: 12.031059 +epoch: 1, batch: 42075, sum loss: 4244.034180, avg loss: 2.511263, ppl: 12.320479 +epoch: 1, batch: 42076, sum loss: 4734.250000, avg loss: 2.804651, ppl: 16.521301 +epoch: 1, batch: 42077, sum loss: 5279.895508, avg loss: 2.901042, ppl: 18.193083 +epoch: 1, batch: 42078, sum loss: 4819.634766, avg loss: 2.795612, ppl: 16.372648 +epoch: 1, batch: 42079, sum loss: 4805.527344, avg loss: 2.761797, ppl: 15.828267 +epoch: 1, batch: 42080, sum loss: 3914.735352, avg loss: 2.503028, ppl: 12.219435 +epoch: 1, batch: 42081, sum loss: 4343.621094, avg loss: 2.295783, ppl: 9.932208 +epoch: 1, batch: 42082, sum loss: 3950.027588, avg loss: 2.444324, ppl: 11.522758 +epoch: 1, batch: 42083, sum loss: 3988.414795, avg loss: 2.537159, ppl: 12.643704 +epoch: 1, batch: 42084, sum loss: 4925.119629, avg loss: 2.737699, ppl: 15.451384 +epoch: 1, batch: 42085, sum loss: 4079.773193, avg loss: 2.718037, ppl: 15.150551 +epoch: 1, batch: 42086, sum loss: 5485.827148, avg loss: 2.705043, ppl: 14.954961 +epoch: 1, batch: 42087, sum loss: 3828.930664, avg loss: 2.452870, ppl: 11.621657 +epoch: 1, batch: 42088, sum loss: 4181.853027, avg loss: 2.516157, ppl: 12.380927 +epoch: 1, batch: 42089, sum loss: 3954.892822, avg loss: 2.598484, ppl: 13.443346 +epoch: 1, batch: 42090, sum loss: 4677.923828, avg loss: 2.850654, ppl: 17.299099 +epoch: 1, batch: 42091, sum loss: 4731.503906, avg loss: 2.557570, ppl: 12.904418 +epoch: 1, batch: 42092, sum loss: 4289.927734, avg loss: 2.567282, ppl: 13.030359 +epoch: 1, batch: 42093, sum loss: 4902.789062, avg loss: 2.985864, ppl: 19.803614 +epoch: 1, batch: 42094, sum loss: 4118.471680, avg loss: 2.526670, ppl: 12.511769 +epoch: 1, batch: 42095, sum loss: 4789.538086, avg loss: 2.590340, ppl: 13.334300 +epoch: 1, batch: 42096, sum loss: 4446.998535, avg loss: 2.452840, ppl: 11.621303 +epoch: 1, batch: 42097, sum loss: 3982.680664, avg loss: 2.435890, ppl: 11.425985 +epoch: 1, batch: 42098, sum loss: 4312.822266, avg loss: 2.390700, ppl: 10.921132 +epoch: 1, batch: 42099, sum loss: 3397.175293, avg loss: 2.204526, ppl: 9.065957 +epoch: 1, batch: 42100, sum loss: 4231.338379, avg loss: 2.729896, ppl: 15.331290 +epoch: 1, batch: 42101, sum loss: 3997.107422, avg loss: 2.392045, ppl: 10.935838 +epoch: 1, batch: 42102, sum loss: 4262.390625, avg loss: 2.640887, ppl: 14.025633 +epoch: 1, batch: 42103, sum loss: 3610.174072, avg loss: 2.195970, ppl: 8.988712 +epoch: 1, batch: 42104, sum loss: 5227.666992, avg loss: 2.763037, ppl: 15.847907 +epoch: 1, batch: 42105, sum loss: 4174.124512, avg loss: 2.543647, ppl: 12.725996 +epoch: 1, batch: 42106, sum loss: 4058.121094, avg loss: 2.515884, ppl: 12.377547 +epoch: 1, batch: 42107, sum loss: 4188.142090, avg loss: 2.630743, ppl: 13.884076 +epoch: 1, batch: 42108, sum loss: 3508.577637, avg loss: 2.285718, ppl: 9.832746 +epoch: 1, batch: 42109, sum loss: 5224.905762, avg loss: 2.771833, ppl: 15.987920 +epoch: 1, batch: 42110, sum loss: 3866.361084, avg loss: 2.380764, ppl: 10.813164 +epoch: 1, batch: 42111, sum loss: 3959.655029, avg loss: 2.331952, ppl: 10.298027 +epoch: 1, batch: 42112, sum loss: 3647.184082, avg loss: 2.482766, ppl: 11.974344 +epoch: 1, batch: 42113, sum loss: 4890.584473, avg loss: 2.742897, ppl: 15.531909 +epoch: 1, batch: 42114, sum loss: 4262.508301, avg loss: 2.616641, ppl: 13.689663 +epoch: 1, batch: 42115, sum loss: 3606.901855, avg loss: 2.450341, ppl: 11.592299 +epoch: 1, batch: 42116, sum loss: 4862.782227, avg loss: 2.716638, ppl: 15.129373 +epoch: 1, batch: 42117, sum loss: 4662.873047, avg loss: 2.678273, ppl: 14.559926 +epoch: 1, batch: 42118, sum loss: 3408.732178, avg loss: 2.345996, ppl: 10.443671 +epoch: 1, batch: 42119, sum loss: 3849.133301, avg loss: 2.257556, ppl: 9.559699 +epoch: 1, batch: 42120, sum loss: 3923.966797, avg loss: 2.362412, ppl: 10.616529 +epoch: 1, batch: 42121, sum loss: 5010.841309, avg loss: 2.771483, ppl: 15.982317 +epoch: 1, batch: 42122, sum loss: 3636.227539, avg loss: 2.658061, ppl: 14.268596 +epoch: 1, batch: 42123, sum loss: 3187.455322, avg loss: 2.416570, ppl: 11.207349 +epoch: 1, batch: 42124, sum loss: 4945.256836, avg loss: 2.762713, ppl: 15.842770 +epoch: 1, batch: 42125, sum loss: 3515.762451, avg loss: 2.354831, ppl: 10.536345 +epoch: 1, batch: 42126, sum loss: 5189.806641, avg loss: 2.748838, ppl: 15.624469 +epoch: 1, batch: 42127, sum loss: 4455.582031, avg loss: 2.543141, ppl: 12.719556 +epoch: 1, batch: 42128, sum loss: 4252.105469, avg loss: 2.463560, ppl: 11.746559 +epoch: 1, batch: 42129, sum loss: 3819.796143, avg loss: 2.610934, ppl: 13.611758 +epoch: 1, batch: 42130, sum loss: 4423.203125, avg loss: 2.422346, ppl: 11.272269 +epoch: 1, batch: 42131, sum loss: 4246.789551, avg loss: 2.659230, ppl: 14.285278 +epoch: 1, batch: 42132, sum loss: 4333.755859, avg loss: 2.701843, ppl: 14.907177 +epoch: 1, batch: 42133, sum loss: 4437.900391, avg loss: 2.544667, ppl: 12.738992 +epoch: 1, batch: 42134, sum loss: 5534.015137, avg loss: 2.894359, ppl: 18.071920 +epoch: 1, batch: 42135, sum loss: 5308.051758, avg loss: 2.774726, ppl: 16.034239 +epoch: 1, batch: 42136, sum loss: 4985.516113, avg loss: 2.538450, ppl: 12.660036 +epoch: 1, batch: 42137, sum loss: 4051.287598, avg loss: 2.455326, ppl: 11.650229 +epoch: 1, batch: 42138, sum loss: 3275.685791, avg loss: 2.380586, ppl: 10.811233 +epoch: 1, batch: 42139, sum loss: 3858.305664, avg loss: 2.454393, ppl: 11.639365 +epoch: 1, batch: 42140, sum loss: 4503.190918, avg loss: 2.675693, ppl: 14.522408 +epoch: 1, batch: 42141, sum loss: 4777.268555, avg loss: 2.732991, ppl: 15.378819 +epoch: 1, batch: 42142, sum loss: 4421.439453, avg loss: 2.566128, ppl: 13.015338 +epoch: 1, batch: 42143, sum loss: 4200.112793, avg loss: 2.572022, ppl: 13.092276 +epoch: 1, batch: 42144, sum loss: 4463.840820, avg loss: 2.607383, ppl: 13.563516 +epoch: 1, batch: 42145, sum loss: 4436.915039, avg loss: 2.637881, ppl: 13.983535 +epoch: 1, batch: 42146, sum loss: 3158.318848, avg loss: 2.233606, ppl: 9.333463 +epoch: 1, batch: 42147, sum loss: 4081.189941, avg loss: 2.621188, ppl: 13.752053 +epoch: 1, batch: 42148, sum loss: 4859.158203, avg loss: 2.705545, ppl: 14.962464 +epoch: 1, batch: 42149, sum loss: 4780.921387, avg loss: 2.739783, ppl: 15.483625 +epoch: 1, batch: 42150, sum loss: 3507.199219, avg loss: 2.216940, ppl: 9.179199 +epoch: 1, batch: 42151, sum loss: 4422.263672, avg loss: 2.696502, ppl: 14.827777 +epoch: 1, batch: 42152, sum loss: 4548.090820, avg loss: 2.739814, ppl: 15.484101 +epoch: 1, batch: 42153, sum loss: 3763.116211, avg loss: 2.566928, ppl: 13.025746 +epoch: 1, batch: 42154, sum loss: 4602.025391, avg loss: 2.765640, ppl: 15.889210 +epoch: 1, batch: 42155, sum loss: 4273.091797, avg loss: 2.498884, ppl: 12.168905 +epoch: 1, batch: 42156, sum loss: 4608.149414, avg loss: 2.691676, ppl: 14.756386 +epoch: 1, batch: 42157, sum loss: 4831.058594, avg loss: 2.670569, ppl: 14.448184 +epoch: 1, batch: 42158, sum loss: 4820.021484, avg loss: 2.596994, ppl: 13.423332 +epoch: 1, batch: 42159, sum loss: 5135.980469, avg loss: 2.767231, ppl: 15.914502 +epoch: 1, batch: 42160, sum loss: 4854.625000, avg loss: 2.766168, ppl: 15.897599 +epoch: 1, batch: 42161, sum loss: 4701.587402, avg loss: 2.682024, ppl: 14.614640 +epoch: 1, batch: 42162, sum loss: 5772.885742, avg loss: 2.829846, ppl: 16.942846 +epoch: 1, batch: 42163, sum loss: 4811.165039, avg loss: 2.675843, ppl: 14.524582 +epoch: 1, batch: 42164, sum loss: 4449.803223, avg loss: 2.535500, ppl: 12.622745 +epoch: 1, batch: 42165, sum loss: 4468.921875, avg loss: 2.463573, ppl: 11.746710 +epoch: 1, batch: 42166, sum loss: 4113.197754, avg loss: 2.498905, ppl: 12.169164 +epoch: 1, batch: 42167, sum loss: 4040.095215, avg loss: 2.521907, ppl: 12.452322 +epoch: 1, batch: 42168, sum loss: 4160.899414, avg loss: 2.665534, ppl: 14.375631 +epoch: 1, batch: 42169, sum loss: 4398.887695, avg loss: 2.657938, ppl: 14.266844 +epoch: 1, batch: 42170, sum loss: 3872.869141, avg loss: 2.458964, ppl: 11.692696 +epoch: 1, batch: 42171, sum loss: 5697.325195, avg loss: 2.816275, ppl: 16.714479 +epoch: 1, batch: 42172, sum loss: 4340.197754, avg loss: 2.520440, ppl: 12.434068 +epoch: 1, batch: 42173, sum loss: 4917.897461, avg loss: 2.748965, ppl: 15.626443 +epoch: 1, batch: 42174, sum loss: 4152.942383, avg loss: 2.573075, ppl: 13.106058 +epoch: 1, batch: 42175, sum loss: 4608.277832, avg loss: 2.725179, ppl: 15.259148 +epoch: 1, batch: 42176, sum loss: 3254.582764, avg loss: 2.283918, ppl: 9.815058 +epoch: 1, batch: 42177, sum loss: 4916.701172, avg loss: 2.840382, ppl: 17.122303 +epoch: 1, batch: 42178, sum loss: 4411.579590, avg loss: 2.675306, ppl: 14.516792 +epoch: 1, batch: 42179, sum loss: 4297.844238, avg loss: 2.520730, ppl: 12.437670 +epoch: 1, batch: 42180, sum loss: 4384.016602, avg loss: 2.611088, ppl: 13.613852 +epoch: 1, batch: 42181, sum loss: 4145.016113, avg loss: 2.357802, ppl: 10.567699 +epoch: 1, batch: 42182, sum loss: 4614.434570, avg loss: 2.701659, ppl: 14.904445 +epoch: 1, batch: 42183, sum loss: 4779.318359, avg loss: 2.683503, ppl: 14.636273 +epoch: 1, batch: 42184, sum loss: 4278.231445, avg loss: 2.415715, ppl: 11.197774 +epoch: 1, batch: 42185, sum loss: 4840.167480, avg loss: 2.719195, ppl: 15.168109 +epoch: 1, batch: 42186, sum loss: 3816.329346, avg loss: 2.418460, ppl: 11.228550 +epoch: 1, batch: 42187, sum loss: 5148.650391, avg loss: 2.838286, ppl: 17.086452 +epoch: 1, batch: 42188, sum loss: 4315.935547, avg loss: 2.384495, ppl: 10.853580 +epoch: 1, batch: 42189, sum loss: 4322.333008, avg loss: 2.517375, ppl: 12.396014 +epoch: 1, batch: 42190, sum loss: 4460.510742, avg loss: 2.868496, ppl: 17.610506 +epoch: 1, batch: 42191, sum loss: 4176.500488, avg loss: 2.537364, ppl: 12.646289 +epoch: 1, batch: 42192, sum loss: 4734.723633, avg loss: 2.656972, ppl: 14.253064 +epoch: 1, batch: 42193, sum loss: 4683.966797, avg loss: 2.609452, ppl: 13.591604 +epoch: 1, batch: 42194, sum loss: 3659.511963, avg loss: 2.446198, ppl: 11.544372 +epoch: 1, batch: 42195, sum loss: 4738.107422, avg loss: 2.488502, ppl: 12.043222 +epoch: 1, batch: 42196, sum loss: 5433.593750, avg loss: 3.033832, ppl: 20.776703 +epoch: 1, batch: 42197, sum loss: 3651.984375, avg loss: 2.492822, ppl: 12.095364 +epoch: 1, batch: 42198, sum loss: 4168.195312, avg loss: 2.638098, ppl: 13.986579 +epoch: 1, batch: 42199, sum loss: 4727.203125, avg loss: 2.488002, ppl: 12.037200 +epoch: 1, batch: 42200, sum loss: 4508.286621, avg loss: 2.535594, ppl: 12.623933 +epoch: 1, batch: 42201, sum loss: 4971.158203, avg loss: 2.835801, ppl: 17.044041 +epoch: 1, batch: 42202, sum loss: 4760.220215, avg loss: 2.650457, ppl: 14.160503 +epoch: 1, batch: 42203, sum loss: 4103.169434, avg loss: 2.719132, ppl: 15.167147 +epoch: 1, batch: 42204, sum loss: 4678.862793, avg loss: 2.709243, ppl: 15.017903 +epoch: 1, batch: 42205, sum loss: 3969.763184, avg loss: 2.525295, ppl: 12.494575 +epoch: 1, batch: 42206, sum loss: 4633.608398, avg loss: 2.526504, ppl: 12.509696 +epoch: 1, batch: 42207, sum loss: 4307.884277, avg loss: 2.638018, ppl: 13.985462 +epoch: 1, batch: 42208, sum loss: 4084.597412, avg loss: 2.481529, ppl: 11.959542 +epoch: 1, batch: 42209, sum loss: 3737.783447, avg loss: 2.413030, ppl: 11.167747 +epoch: 1, batch: 42210, sum loss: 4414.237305, avg loss: 2.657578, ppl: 14.261708 +epoch: 1, batch: 42211, sum loss: 4229.390137, avg loss: 2.416794, ppl: 11.209867 +epoch: 1, batch: 42212, sum loss: 3991.376465, avg loss: 2.236065, ppl: 9.356444 +epoch: 1, batch: 42213, sum loss: 4157.911621, avg loss: 2.560290, ppl: 12.939573 +epoch: 1, batch: 42214, sum loss: 4134.381836, avg loss: 2.517894, ppl: 12.402450 +epoch: 1, batch: 42215, sum loss: 3878.913086, avg loss: 2.381162, ppl: 10.817468 +epoch: 1, batch: 42216, sum loss: 4390.863281, avg loss: 2.667596, ppl: 14.405302 +epoch: 1, batch: 42217, sum loss: 4666.614746, avg loss: 2.727420, ppl: 15.293373 +epoch: 1, batch: 42218, sum loss: 3894.113770, avg loss: 2.541850, ppl: 12.703148 +epoch: 1, batch: 42219, sum loss: 4498.083984, avg loss: 2.601552, ppl: 13.484656 +epoch: 1, batch: 42220, sum loss: 3799.861572, avg loss: 2.298767, ppl: 9.961890 +epoch: 1, batch: 42221, sum loss: 3988.536865, avg loss: 2.696779, ppl: 14.831888 +epoch: 1, batch: 42222, sum loss: 4125.430176, avg loss: 2.537165, ppl: 12.643774 +epoch: 1, batch: 42223, sum loss: 4229.182129, avg loss: 2.589824, ppl: 13.327422 +epoch: 1, batch: 42224, sum loss: 5141.190430, avg loss: 2.697371, ppl: 14.840672 +epoch: 1, batch: 42225, sum loss: 4033.226318, avg loss: 2.383703, ppl: 10.844993 +epoch: 1, batch: 42226, sum loss: 5100.340820, avg loss: 2.610205, ppl: 13.601841 +epoch: 1, batch: 42227, sum loss: 3798.627930, avg loss: 2.377114, ppl: 10.773763 +epoch: 1, batch: 42228, sum loss: 4916.411133, avg loss: 2.677784, ppl: 14.552805 +epoch: 1, batch: 42229, sum loss: 4022.526123, avg loss: 2.596853, ppl: 13.421441 +epoch: 1, batch: 42230, sum loss: 4191.195312, avg loss: 2.601611, ppl: 13.485448 +epoch: 1, batch: 42231, sum loss: 4628.489746, avg loss: 2.779874, ppl: 16.116987 +epoch: 1, batch: 42232, sum loss: 4137.712402, avg loss: 2.485113, ppl: 12.002473 +epoch: 1, batch: 42233, sum loss: 3405.954102, avg loss: 2.361965, ppl: 10.611788 +epoch: 1, batch: 42234, sum loss: 4090.975586, avg loss: 2.568095, ppl: 13.040960 +epoch: 1, batch: 42235, sum loss: 4078.424072, avg loss: 2.292537, ppl: 9.900026 +epoch: 1, batch: 42236, sum loss: 5180.196289, avg loss: 2.710725, ppl: 15.040180 +epoch: 1, batch: 42237, sum loss: 4182.627930, avg loss: 2.555057, ppl: 12.872031 +epoch: 1, batch: 42238, sum loss: 3843.570557, avg loss: 2.408252, ppl: 11.114518 +epoch: 1, batch: 42239, sum loss: 4902.506836, avg loss: 2.717576, ppl: 15.143566 +epoch: 1, batch: 42240, sum loss: 4871.127441, avg loss: 2.610465, ppl: 13.605373 +epoch: 1, batch: 42241, sum loss: 4188.856934, avg loss: 2.487445, ppl: 12.030498 +epoch: 1, batch: 42242, sum loss: 4753.789551, avg loss: 2.498050, ppl: 12.158764 +epoch: 1, batch: 42243, sum loss: 3885.027100, avg loss: 2.344615, ppl: 10.429256 +epoch: 1, batch: 42244, sum loss: 3521.762695, avg loss: 2.466220, ppl: 11.777847 +epoch: 1, batch: 42245, sum loss: 5050.753906, avg loss: 2.739021, ppl: 15.471825 +epoch: 1, batch: 42246, sum loss: 3479.494141, avg loss: 2.481807, ppl: 11.962868 +epoch: 1, batch: 42247, sum loss: 4410.531250, avg loss: 2.482010, ppl: 11.965286 +epoch: 1, batch: 42248, sum loss: 4958.951172, avg loss: 2.703899, ppl: 14.937863 +epoch: 1, batch: 42249, sum loss: 4716.862793, avg loss: 2.712400, ppl: 15.065385 +epoch: 1, batch: 42250, sum loss: 4235.204102, avg loss: 2.584017, ppl: 13.250258 +epoch: 1, batch: 42251, sum loss: 4360.323242, avg loss: 2.558875, ppl: 12.921274 +epoch: 1, batch: 42252, sum loss: 4727.385742, avg loss: 2.679924, ppl: 14.583985 +epoch: 1, batch: 42253, sum loss: 4272.084961, avg loss: 2.401397, ppl: 11.038584 +epoch: 1, batch: 42254, sum loss: 4033.004639, avg loss: 2.534887, ppl: 12.615000 +epoch: 1, batch: 42255, sum loss: 4045.184570, avg loss: 2.445698, ppl: 11.538601 +epoch: 1, batch: 42256, sum loss: 4650.748047, avg loss: 2.491027, ppl: 12.073673 +epoch: 1, batch: 42257, sum loss: 3973.343018, avg loss: 2.510008, ppl: 12.305030 +epoch: 1, batch: 42258, sum loss: 3750.405762, avg loss: 2.427447, ppl: 11.329924 +epoch: 1, batch: 42259, sum loss: 4162.620117, avg loss: 2.553755, ppl: 12.855279 +epoch: 1, batch: 42260, sum loss: 3933.807373, avg loss: 2.718595, ppl: 15.159013 +epoch: 1, batch: 42261, sum loss: 5328.304199, avg loss: 2.711605, ppl: 15.053422 +epoch: 1, batch: 42262, sum loss: 4133.044922, avg loss: 2.518614, ppl: 12.411377 +epoch: 1, batch: 42263, sum loss: 3817.662598, avg loss: 2.463008, ppl: 11.740074 +epoch: 1, batch: 42264, sum loss: 4481.078613, avg loss: 2.609830, ppl: 13.596745 +epoch: 1, batch: 42265, sum loss: 3736.588623, avg loss: 2.559307, ppl: 12.926860 +epoch: 1, batch: 42266, sum loss: 4118.263184, avg loss: 2.483874, ppl: 11.987616 +epoch: 1, batch: 42267, sum loss: 4655.530762, avg loss: 2.619882, ppl: 13.734104 +epoch: 1, batch: 42268, sum loss: 4328.453125, avg loss: 2.570340, ppl: 13.070272 +epoch: 1, batch: 42269, sum loss: 4257.549805, avg loss: 2.466715, ppl: 11.783672 +epoch: 1, batch: 42270, sum loss: 4751.688965, avg loss: 2.738726, ppl: 15.467262 +epoch: 1, batch: 42271, sum loss: 3724.355713, avg loss: 2.372201, ppl: 10.720965 +epoch: 1, batch: 42272, sum loss: 4546.403320, avg loss: 2.552725, ppl: 12.842052 +epoch: 1, batch: 42273, sum loss: 4294.919434, avg loss: 2.382096, ppl: 10.827577 +epoch: 1, batch: 42274, sum loss: 4530.474121, avg loss: 2.742418, ppl: 15.524475 +epoch: 1, batch: 42275, sum loss: 4887.158691, avg loss: 2.645998, ppl: 14.097514 +epoch: 1, batch: 42276, sum loss: 4253.910156, avg loss: 2.707772, ppl: 14.995831 +epoch: 1, batch: 42277, sum loss: 4666.163086, avg loss: 2.677087, ppl: 14.542673 +epoch: 1, batch: 42278, sum loss: 4831.071777, avg loss: 2.915553, ppl: 18.459019 +epoch: 1, batch: 42279, sum loss: 4617.674316, avg loss: 2.848658, ppl: 17.264591 +epoch: 1, batch: 42280, sum loss: 4308.010254, avg loss: 2.622039, ppl: 13.763761 +epoch: 1, batch: 42281, sum loss: 6031.025391, avg loss: 2.930527, ppl: 18.737511 +epoch: 1, batch: 42282, sum loss: 4692.774414, avg loss: 2.547652, ppl: 12.777065 +epoch: 1, batch: 42283, sum loss: 3277.741699, avg loss: 2.507836, ppl: 12.278333 +epoch: 1, batch: 42284, sum loss: 3736.739258, avg loss: 2.296705, ppl: 9.941372 +epoch: 1, batch: 42285, sum loss: 4429.341797, avg loss: 2.567734, ppl: 13.036257 +epoch: 1, batch: 42286, sum loss: 4282.593750, avg loss: 2.559829, ppl: 12.933605 +epoch: 1, batch: 42287, sum loss: 4156.070312, avg loss: 2.431873, ppl: 11.380173 +epoch: 1, batch: 42288, sum loss: 3817.220215, avg loss: 2.630751, ppl: 13.884198 +epoch: 1, batch: 42289, sum loss: 5178.170898, avg loss: 2.670537, ppl: 14.447725 +epoch: 1, batch: 42290, sum loss: 4268.300293, avg loss: 2.569717, ppl: 13.062130 +epoch: 1, batch: 42291, sum loss: 4052.357666, avg loss: 2.687240, ppl: 14.691071 +epoch: 1, batch: 42292, sum loss: 4206.559570, avg loss: 2.582296, ppl: 13.227469 +epoch: 1, batch: 42293, sum loss: 4155.698242, avg loss: 2.504942, ppl: 12.242846 +epoch: 1, batch: 42294, sum loss: 4618.452637, avg loss: 2.758932, ppl: 15.782983 +epoch: 1, batch: 42295, sum loss: 3763.372070, avg loss: 2.642817, ppl: 14.052741 +epoch: 1, batch: 42296, sum loss: 4563.241211, avg loss: 2.727580, ppl: 15.295827 +epoch: 1, batch: 42297, sum loss: 4507.981445, avg loss: 2.508615, ppl: 12.287903 +epoch: 1, batch: 42298, sum loss: 4775.631348, avg loss: 2.786249, ppl: 16.220070 +epoch: 1, batch: 42299, sum loss: 4330.155273, avg loss: 2.475789, ppl: 11.891087 +epoch: 1, batch: 42300, sum loss: 4663.841309, avg loss: 2.669629, ppl: 14.434608 +epoch: 1, batch: 42301, sum loss: 3912.673828, avg loss: 2.522678, ppl: 12.461926 +epoch: 1, batch: 42302, sum loss: 4397.753418, avg loss: 2.588437, ppl: 13.308949 +epoch: 1, batch: 42303, sum loss: 3894.794434, avg loss: 2.567432, ppl: 13.032313 +epoch: 1, batch: 42304, sum loss: 4448.907715, avg loss: 2.712749, ppl: 15.070641 +epoch: 1, batch: 42305, sum loss: 3781.316406, avg loss: 2.373708, ppl: 10.737129 +epoch: 1, batch: 42306, sum loss: 3531.290039, avg loss: 2.275316, ppl: 9.730993 +epoch: 1, batch: 42307, sum loss: 4359.901367, avg loss: 2.427562, ppl: 11.331223 +epoch: 1, batch: 42308, sum loss: 4089.858398, avg loss: 2.311961, ppl: 10.094197 +epoch: 1, batch: 42309, sum loss: 5026.754395, avg loss: 2.666713, ppl: 14.392587 +epoch: 1, batch: 42310, sum loss: 4798.785645, avg loss: 2.645417, ppl: 14.089315 +epoch: 1, batch: 42311, sum loss: 3133.904297, avg loss: 2.029731, ppl: 7.612037 +epoch: 1, batch: 42312, sum loss: 4556.564453, avg loss: 2.548414, ppl: 12.786811 +epoch: 1, batch: 42313, sum loss: 4140.690918, avg loss: 2.535634, ppl: 12.624435 +epoch: 1, batch: 42314, sum loss: 5314.480469, avg loss: 2.967326, ppl: 19.439861 +epoch: 1, batch: 42315, sum loss: 5812.016113, avg loss: 2.771586, ppl: 15.983968 +epoch: 1, batch: 42316, sum loss: 4048.412842, avg loss: 2.593474, ppl: 13.376159 +epoch: 1, batch: 42317, sum loss: 3464.223389, avg loss: 2.355012, ppl: 10.538260 +epoch: 1, batch: 42318, sum loss: 4667.204590, avg loss: 2.483877, ppl: 11.987650 +epoch: 1, batch: 42319, sum loss: 5346.812012, avg loss: 2.972102, ppl: 19.532938 +epoch: 1, batch: 42320, sum loss: 3743.084717, avg loss: 2.465800, ppl: 11.772900 +epoch: 1, batch: 42321, sum loss: 3940.191650, avg loss: 2.498536, ppl: 12.164676 +epoch: 1, batch: 42322, sum loss: 3992.766113, avg loss: 2.559466, ppl: 12.928906 +epoch: 1, batch: 42323, sum loss: 5473.472656, avg loss: 2.786901, ppl: 16.230639 +epoch: 1, batch: 42324, sum loss: 4492.086426, avg loss: 2.706076, ppl: 14.970418 +epoch: 1, batch: 42325, sum loss: 4908.587891, avg loss: 2.925261, ppl: 18.639091 +epoch: 1, batch: 42326, sum loss: 3847.571777, avg loss: 2.546374, ppl: 12.760754 +epoch: 1, batch: 42327, sum loss: 3427.252930, avg loss: 2.463877, ppl: 11.750278 +epoch: 1, batch: 42328, sum loss: 3679.547119, avg loss: 2.274133, ppl: 9.719488 +epoch: 1, batch: 42329, sum loss: 4333.808594, avg loss: 2.445716, ppl: 11.538807 +epoch: 1, batch: 42330, sum loss: 3870.051514, avg loss: 2.663491, ppl: 14.346278 +epoch: 1, batch: 42331, sum loss: 3955.240723, avg loss: 2.481330, ppl: 11.957162 +epoch: 1, batch: 42332, sum loss: 4315.244629, avg loss: 2.675291, ppl: 14.516578 +epoch: 1, batch: 42333, sum loss: 5294.473633, avg loss: 2.761854, ppl: 15.829162 +epoch: 1, batch: 42334, sum loss: 5274.353027, avg loss: 2.549228, ppl: 12.797223 +epoch: 1, batch: 42335, sum loss: 4008.717285, avg loss: 2.406193, ppl: 11.091652 +epoch: 1, batch: 42336, sum loss: 3888.984863, avg loss: 2.367002, ppl: 10.665372 +epoch: 1, batch: 42337, sum loss: 3782.625244, avg loss: 2.654474, ppl: 14.217503 +epoch: 1, batch: 42338, sum loss: 4525.161133, avg loss: 2.668138, ppl: 14.413100 +epoch: 1, batch: 42339, sum loss: 4278.957031, avg loss: 2.620304, ppl: 13.739904 +epoch: 1, batch: 42340, sum loss: 4447.128418, avg loss: 2.655002, ppl: 14.225017 +epoch: 1, batch: 42341, sum loss: 4609.154785, avg loss: 2.494131, ppl: 12.111208 +epoch: 1, batch: 42342, sum loss: 4172.692383, avg loss: 2.381674, ppl: 10.823004 +epoch: 1, batch: 42343, sum loss: 4311.116211, avg loss: 2.448107, ppl: 11.566428 +epoch: 1, batch: 42344, sum loss: 4938.168457, avg loss: 2.951685, ppl: 19.138168 +epoch: 1, batch: 42345, sum loss: 4537.094238, avg loss: 2.503915, ppl: 12.230283 +epoch: 1, batch: 42346, sum loss: 3695.108398, avg loss: 2.408806, ppl: 11.120673 +epoch: 1, batch: 42347, sum loss: 4544.325195, avg loss: 2.652846, ppl: 14.194376 +epoch: 1, batch: 42348, sum loss: 4317.703125, avg loss: 2.516144, ppl: 12.380765 +epoch: 1, batch: 42349, sum loss: 4211.241699, avg loss: 2.731026, ppl: 15.348622 +epoch: 1, batch: 42350, sum loss: 4492.963867, avg loss: 2.580680, ppl: 13.206117 +epoch: 1, batch: 42351, sum loss: 3973.432129, avg loss: 2.581827, ppl: 13.221273 +epoch: 1, batch: 42352, sum loss: 4447.927734, avg loss: 2.349671, ppl: 10.482121 +epoch: 1, batch: 42353, sum loss: 4285.855469, avg loss: 2.693812, ppl: 14.787946 +epoch: 1, batch: 42354, sum loss: 5637.938965, avg loss: 3.179887, ppl: 24.044031 +epoch: 1, batch: 42355, sum loss: 3812.551025, avg loss: 2.518198, ppl: 12.406215 +epoch: 1, batch: 42356, sum loss: 4541.030273, avg loss: 2.602309, ppl: 13.494868 +epoch: 1, batch: 42357, sum loss: 3997.761719, avg loss: 2.305514, ppl: 10.029333 +epoch: 1, batch: 42358, sum loss: 3870.270508, avg loss: 2.474597, ppl: 11.876925 +epoch: 1, batch: 42359, sum loss: 4507.775391, avg loss: 2.796387, ppl: 16.385334 +epoch: 1, batch: 42360, sum loss: 5234.181152, avg loss: 2.829287, ppl: 16.933388 +epoch: 1, batch: 42361, sum loss: 4468.203613, avg loss: 2.667584, ppl: 14.405130 +epoch: 1, batch: 42362, sum loss: 5388.720215, avg loss: 2.848161, ppl: 17.256014 +epoch: 1, batch: 42363, sum loss: 3998.240234, avg loss: 2.696049, ppl: 14.821054 +epoch: 1, batch: 42364, sum loss: 5566.375000, avg loss: 2.890122, ppl: 17.995508 +epoch: 1, batch: 42365, sum loss: 6222.545898, avg loss: 3.106613, ppl: 22.345230 +epoch: 1, batch: 42366, sum loss: 3620.564453, avg loss: 2.375699, ppl: 10.758526 +epoch: 1, batch: 42367, sum loss: 5905.310547, avg loss: 3.047116, ppl: 21.054537 +epoch: 1, batch: 42368, sum loss: 4049.286133, avg loss: 2.504197, ppl: 12.233730 +epoch: 1, batch: 42369, sum loss: 4310.922852, avg loss: 2.493304, ppl: 12.101195 +epoch: 1, batch: 42370, sum loss: 4664.465332, avg loss: 2.647256, ppl: 14.115258 +epoch: 1, batch: 42371, sum loss: 3651.528320, avg loss: 2.467249, ppl: 11.789967 +epoch: 1, batch: 42372, sum loss: 4317.633301, avg loss: 2.783774, ppl: 16.179968 +epoch: 1, batch: 42373, sum loss: 5580.091309, avg loss: 2.735339, ppl: 15.414964 +epoch: 1, batch: 42374, sum loss: 5130.049805, avg loss: 2.691526, ppl: 14.754180 +epoch: 1, batch: 42375, sum loss: 5209.323730, avg loss: 2.811292, ppl: 16.631388 +epoch: 1, batch: 42376, sum loss: 3409.474121, avg loss: 2.277538, ppl: 9.752638 +epoch: 1, batch: 42377, sum loss: 4728.161621, avg loss: 2.527077, ppl: 12.516871 +epoch: 1, batch: 42378, sum loss: 3914.833740, avg loss: 2.355496, ppl: 10.543354 +epoch: 1, batch: 42379, sum loss: 4161.980957, avg loss: 2.379635, ppl: 10.800956 +epoch: 1, batch: 42380, sum loss: 4228.833008, avg loss: 2.514169, ppl: 12.356342 +epoch: 1, batch: 42381, sum loss: 3612.197998, avg loss: 2.533098, ppl: 12.592460 +epoch: 1, batch: 42382, sum loss: 4924.640625, avg loss: 2.746593, ppl: 15.589424 +epoch: 1, batch: 42383, sum loss: 4168.197266, avg loss: 2.614929, ppl: 13.666248 +epoch: 1, batch: 42384, sum loss: 3341.379395, avg loss: 2.366416, ppl: 10.659122 +epoch: 1, batch: 42385, sum loss: 4719.543945, avg loss: 2.624885, ppl: 13.802991 +epoch: 1, batch: 42386, sum loss: 3943.473145, avg loss: 2.417825, ppl: 11.221432 +epoch: 1, batch: 42387, sum loss: 5180.072754, avg loss: 2.783489, ppl: 16.175358 +epoch: 1, batch: 42388, sum loss: 4253.760254, avg loss: 2.512558, ppl: 12.336443 +epoch: 1, batch: 42389, sum loss: 3837.979248, avg loss: 2.563780, ppl: 12.984811 +epoch: 1, batch: 42390, sum loss: 4401.824219, avg loss: 2.444100, ppl: 11.520176 +epoch: 1, batch: 42391, sum loss: 4645.149414, avg loss: 2.592159, ppl: 13.358585 +epoch: 1, batch: 42392, sum loss: 4433.944824, avg loss: 2.792157, ppl: 16.316175 +epoch: 1, batch: 42393, sum loss: 5364.580078, avg loss: 2.881085, ppl: 17.833611 +epoch: 1, batch: 42394, sum loss: 4829.424805, avg loss: 2.842510, ppl: 17.158783 +epoch: 1, batch: 42395, sum loss: 4010.238037, avg loss: 2.414352, ppl: 11.182518 +epoch: 1, batch: 42396, sum loss: 4007.323730, avg loss: 2.568797, ppl: 13.050120 +epoch: 1, batch: 42397, sum loss: 4674.730957, avg loss: 2.604307, ppl: 13.521851 +epoch: 1, batch: 42398, sum loss: 4441.376953, avg loss: 2.635832, ppl: 13.954919 +epoch: 1, batch: 42399, sum loss: 4850.088379, avg loss: 2.856354, ppl: 17.397974 +epoch: 1, batch: 42400, sum loss: 5034.591797, avg loss: 2.806350, ppl: 16.549402 +epoch: 1, batch: 42401, sum loss: 4220.033691, avg loss: 2.439326, ppl: 11.465308 +epoch: 1, batch: 42402, sum loss: 4322.181641, avg loss: 2.637084, ppl: 13.972407 +epoch: 1, batch: 42403, sum loss: 4623.429688, avg loss: 2.721265, ppl: 15.199542 +epoch: 1, batch: 42404, sum loss: 3979.483887, avg loss: 2.498107, ppl: 12.159451 +epoch: 1, batch: 42405, sum loss: 4106.066895, avg loss: 2.401209, ppl: 11.036508 +epoch: 1, batch: 42406, sum loss: 3526.838379, avg loss: 2.303618, ppl: 10.010331 +epoch: 1, batch: 42407, sum loss: 4218.079590, avg loss: 2.466713, ppl: 11.783652 +epoch: 1, batch: 42408, sum loss: 5341.864746, avg loss: 2.900035, ppl: 18.174784 +epoch: 1, batch: 42409, sum loss: 4120.078613, avg loss: 2.521468, ppl: 12.446855 +epoch: 1, batch: 42410, sum loss: 4917.373535, avg loss: 2.890872, ppl: 18.009007 +epoch: 1, batch: 42411, sum loss: 4513.002441, avg loss: 2.599656, ppl: 13.459105 +epoch: 1, batch: 42412, sum loss: 4938.447266, avg loss: 2.650804, ppl: 14.165417 +epoch: 1, batch: 42413, sum loss: 4520.077637, avg loss: 2.774756, ppl: 16.034714 +epoch: 1, batch: 42414, sum loss: 3125.655518, avg loss: 2.362551, ppl: 10.618009 +epoch: 1, batch: 42415, sum loss: 5043.355469, avg loss: 2.736492, ppl: 15.432758 +epoch: 1, batch: 42416, sum loss: 4662.623047, avg loss: 2.786983, ppl: 16.231981 +epoch: 1, batch: 42417, sum loss: 3956.181396, avg loss: 2.658724, ppl: 14.278060 +epoch: 1, batch: 42418, sum loss: 4431.588867, avg loss: 2.778426, ppl: 16.093664 +epoch: 1, batch: 42419, sum loss: 5455.989258, avg loss: 2.729359, ppl: 15.323068 +epoch: 1, batch: 42420, sum loss: 4497.858887, avg loss: 2.607455, ppl: 13.564479 +epoch: 1, batch: 42421, sum loss: 3926.930176, avg loss: 2.277802, ppl: 9.755215 +epoch: 1, batch: 42422, sum loss: 4258.920898, avg loss: 2.411620, ppl: 11.152012 +epoch: 1, batch: 42423, sum loss: 5066.572266, avg loss: 2.588948, ppl: 13.315763 +epoch: 1, batch: 42424, sum loss: 5124.380371, avg loss: 2.789538, ppl: 16.273495 +epoch: 1, batch: 42425, sum loss: 4562.886719, avg loss: 2.646686, ppl: 14.107214 +epoch: 1, batch: 42426, sum loss: 5772.937988, avg loss: 2.898062, ppl: 18.138948 +epoch: 1, batch: 42427, sum loss: 4096.996094, avg loss: 2.435788, ppl: 11.424823 +epoch: 1, batch: 42428, sum loss: 4774.013672, avg loss: 3.013897, ppl: 20.366623 +epoch: 1, batch: 42429, sum loss: 5156.954102, avg loss: 2.551685, ppl: 12.828697 +epoch: 1, batch: 42430, sum loss: 4429.146484, avg loss: 2.687589, ppl: 14.696199 +epoch: 1, batch: 42431, sum loss: 3898.022705, avg loss: 2.572952, ppl: 13.104455 +epoch: 1, batch: 42432, sum loss: 4309.653809, avg loss: 2.722460, ppl: 15.217705 +epoch: 1, batch: 42433, sum loss: 3835.138428, avg loss: 2.321512, ppl: 10.191076 +epoch: 1, batch: 42434, sum loss: 3691.454102, avg loss: 2.419039, ppl: 11.235063 +epoch: 1, batch: 42435, sum loss: 4183.742676, avg loss: 2.752462, ppl: 15.681197 +epoch: 1, batch: 42436, sum loss: 4998.445312, avg loss: 2.841640, ppl: 17.143866 +epoch: 1, batch: 42437, sum loss: 4677.953125, avg loss: 2.641419, ppl: 14.033101 +epoch: 1, batch: 42438, sum loss: 4387.665039, avg loss: 2.466366, ppl: 11.779560 +epoch: 1, batch: 42439, sum loss: 3511.143555, avg loss: 2.320650, ppl: 10.182292 +epoch: 1, batch: 42440, sum loss: 4614.503418, avg loss: 2.741832, ppl: 15.515384 +epoch: 1, batch: 42441, sum loss: 3461.135010, avg loss: 2.284578, ppl: 9.821537 +epoch: 1, batch: 42442, sum loss: 4071.296631, avg loss: 2.608134, ppl: 13.573696 +epoch: 1, batch: 42443, sum loss: 5041.188477, avg loss: 2.742758, ppl: 15.529750 +epoch: 1, batch: 42444, sum loss: 3706.970703, avg loss: 2.315410, ppl: 10.129071 +epoch: 1, batch: 42445, sum loss: 4565.849609, avg loss: 2.643804, ppl: 14.066612 +epoch: 1, batch: 42446, sum loss: 4647.819824, avg loss: 2.642308, ppl: 14.045587 +epoch: 1, batch: 42447, sum loss: 4321.184570, avg loss: 2.527009, ppl: 12.516009 +epoch: 1, batch: 42448, sum loss: 5691.150391, avg loss: 3.071317, ppl: 21.570292 +epoch: 1, batch: 42449, sum loss: 3860.299316, avg loss: 2.239153, ppl: 9.385378 +epoch: 1, batch: 42450, sum loss: 5440.902344, avg loss: 2.763282, ppl: 15.851784 +epoch: 1, batch: 42451, sum loss: 4166.036133, avg loss: 2.541816, ppl: 12.702715 +epoch: 1, batch: 42452, sum loss: 4011.424561, avg loss: 2.394881, ppl: 10.966887 +epoch: 1, batch: 42453, sum loss: 4269.802734, avg loss: 2.721353, ppl: 15.200872 +epoch: 1, batch: 42454, sum loss: 3493.093018, avg loss: 2.164246, ppl: 8.708035 +epoch: 1, batch: 42455, sum loss: 4609.734863, avg loss: 2.836760, ppl: 17.060396 +epoch: 1, batch: 42456, sum loss: 3520.921143, avg loss: 2.364621, ppl: 10.640010 +epoch: 1, batch: 42457, sum loss: 4792.409180, avg loss: 2.453871, ppl: 11.633286 +epoch: 1, batch: 42458, sum loss: 4090.596191, avg loss: 2.563030, ppl: 12.975076 +epoch: 1, batch: 42459, sum loss: 3981.009521, avg loss: 2.537291, ppl: 12.645369 +epoch: 1, batch: 42460, sum loss: 3512.764404, avg loss: 2.323257, ppl: 10.208870 +epoch: 1, batch: 42461, sum loss: 3804.274902, avg loss: 2.596775, ppl: 13.420382 +epoch: 1, batch: 42462, sum loss: 4974.248535, avg loss: 2.835946, ppl: 17.046513 +epoch: 1, batch: 42463, sum loss: 4848.828613, avg loss: 2.695291, ppl: 14.809829 +epoch: 1, batch: 42464, sum loss: 3519.634521, avg loss: 2.248968, ppl: 9.477946 +epoch: 1, batch: 42465, sum loss: 4116.473633, avg loss: 2.302278, ppl: 9.996932 +epoch: 1, batch: 42466, sum loss: 4103.767090, avg loss: 2.525395, ppl: 12.495832 +epoch: 1, batch: 42467, sum loss: 4567.019531, avg loss: 2.689646, ppl: 14.726469 +epoch: 1, batch: 42468, sum loss: 4845.296387, avg loss: 2.505324, ppl: 12.247525 +epoch: 1, batch: 42469, sum loss: 4030.014160, avg loss: 2.560365, ppl: 12.940536 +epoch: 1, batch: 42470, sum loss: 4805.393555, avg loss: 2.578001, ppl: 13.170780 +epoch: 1, batch: 42471, sum loss: 4467.384277, avg loss: 2.686341, ppl: 14.677865 +epoch: 1, batch: 42472, sum loss: 4822.615234, avg loss: 2.580318, ppl: 13.201342 +epoch: 1, batch: 42473, sum loss: 4084.533203, avg loss: 2.565661, ppl: 13.009260 +epoch: 1, batch: 42474, sum loss: 4549.909180, avg loss: 2.463405, ppl: 11.744739 +epoch: 1, batch: 42475, sum loss: 4720.160156, avg loss: 2.672797, ppl: 14.480417 +epoch: 1, batch: 42476, sum loss: 5084.783203, avg loss: 2.925652, ppl: 18.646379 +epoch: 1, batch: 42477, sum loss: 4487.945312, avg loss: 2.813759, ppl: 16.672470 +epoch: 1, batch: 42478, sum loss: 4132.448242, avg loss: 2.592502, ppl: 13.363166 +epoch: 1, batch: 42479, sum loss: 4057.319824, avg loss: 2.342563, ppl: 10.407882 +epoch: 1, batch: 42480, sum loss: 4028.143066, avg loss: 2.358397, ppl: 10.573993 +epoch: 1, batch: 42481, sum loss: 4686.125488, avg loss: 2.685459, ppl: 14.664927 +epoch: 1, batch: 42482, sum loss: 4520.143066, avg loss: 2.657345, ppl: 14.258377 +epoch: 1, batch: 42483, sum loss: 3662.680176, avg loss: 2.484858, ppl: 11.999413 +epoch: 1, batch: 42484, sum loss: 3975.638672, avg loss: 2.464748, ppl: 11.760520 +epoch: 1, batch: 42485, sum loss: 4407.361328, avg loss: 2.592566, ppl: 13.364014 +epoch: 1, batch: 42486, sum loss: 4657.255371, avg loss: 2.685845, ppl: 14.670591 +epoch: 1, batch: 42487, sum loss: 4404.945801, avg loss: 2.598788, ppl: 13.447431 +epoch: 1, batch: 42488, sum loss: 3834.741211, avg loss: 2.354046, ppl: 10.528081 +epoch: 1, batch: 42489, sum loss: 5503.809570, avg loss: 2.751905, ppl: 15.672459 +epoch: 1, batch: 42490, sum loss: 5051.430664, avg loss: 2.792389, ppl: 16.319956 +epoch: 1, batch: 42491, sum loss: 4208.652832, avg loss: 2.572526, ppl: 13.098873 +epoch: 1, batch: 42492, sum loss: 4174.854980, avg loss: 2.564407, ppl: 12.992952 +epoch: 1, batch: 42493, sum loss: 4607.952148, avg loss: 2.755952, ppl: 15.736020 +epoch: 1, batch: 42494, sum loss: 5147.506836, avg loss: 2.671254, ppl: 14.458094 +epoch: 1, batch: 42495, sum loss: 4086.619873, avg loss: 2.324585, ppl: 10.222434 +epoch: 1, batch: 42496, sum loss: 4603.401855, avg loss: 2.621527, ppl: 13.756720 +epoch: 1, batch: 42497, sum loss: 5217.737305, avg loss: 2.914937, ppl: 18.447651 +epoch: 1, batch: 42498, sum loss: 4492.163574, avg loss: 2.502598, ppl: 12.214186 +epoch: 1, batch: 42499, sum loss: 3708.916016, avg loss: 2.456236, ppl: 11.660836 +epoch: 1, batch: 42500, sum loss: 4375.114258, avg loss: 2.434677, ppl: 11.412130 +epoch: 1, batch: 42501, sum loss: 5283.787109, avg loss: 2.925685, ppl: 18.646994 +epoch: 1, batch: 42502, sum loss: 3179.187744, avg loss: 2.295443, ppl: 9.928829 +epoch: 1, batch: 42503, sum loss: 4136.069336, avg loss: 2.675336, ppl: 14.517225 +epoch: 1, batch: 42504, sum loss: 3526.196533, avg loss: 2.273499, ppl: 9.713326 +epoch: 1, batch: 42505, sum loss: 3989.272949, avg loss: 2.480891, ppl: 11.951909 +epoch: 1, batch: 42506, sum loss: 5042.372559, avg loss: 2.793558, ppl: 16.339052 +epoch: 1, batch: 42507, sum loss: 4436.174805, avg loss: 2.598814, ppl: 13.447773 +epoch: 1, batch: 42508, sum loss: 4844.827148, avg loss: 2.711151, ppl: 15.046586 +epoch: 1, batch: 42509, sum loss: 4359.815430, avg loss: 2.789389, ppl: 16.271082 +epoch: 1, batch: 42510, sum loss: 5251.938477, avg loss: 2.784697, ppl: 16.194910 +epoch: 1, batch: 42511, sum loss: 4245.266113, avg loss: 2.341570, ppl: 10.397547 +epoch: 1, batch: 42512, sum loss: 4545.699219, avg loss: 2.720347, ppl: 15.185586 +epoch: 1, batch: 42513, sum loss: 5970.043457, avg loss: 2.708731, ppl: 15.010218 +epoch: 1, batch: 42514, sum loss: 4050.062012, avg loss: 2.561709, ppl: 12.957943 +epoch: 1, batch: 42515, sum loss: 4015.660889, avg loss: 2.448574, ppl: 11.571832 +epoch: 1, batch: 42516, sum loss: 4509.057617, avg loss: 2.581029, ppl: 13.210724 +epoch: 1, batch: 42517, sum loss: 4437.312988, avg loss: 2.686025, ppl: 14.673232 +epoch: 1, batch: 42518, sum loss: 4269.504395, avg loss: 2.556590, ppl: 12.891776 +epoch: 1, batch: 42519, sum loss: 4336.608398, avg loss: 2.613989, ppl: 13.653411 +epoch: 1, batch: 42520, sum loss: 4712.708984, avg loss: 2.597965, ppl: 13.436371 +epoch: 1, batch: 42521, sum loss: 3919.950928, avg loss: 2.557046, ppl: 12.897657 +epoch: 1, batch: 42522, sum loss: 4041.161377, avg loss: 2.430043, ppl: 11.359370 +epoch: 1, batch: 42523, sum loss: 4196.718262, avg loss: 2.619674, ppl: 13.731249 +epoch: 1, batch: 42524, sum loss: 4015.400146, avg loss: 2.716780, ppl: 15.131516 +epoch: 1, batch: 42525, sum loss: 3930.180664, avg loss: 2.548755, ppl: 12.791174 +epoch: 1, batch: 42526, sum loss: 4717.487305, avg loss: 2.626663, ppl: 13.827557 +epoch: 1, batch: 42527, sum loss: 5811.789551, avg loss: 2.796819, ppl: 16.392416 +epoch: 1, batch: 42528, sum loss: 5014.818359, avg loss: 2.769088, ppl: 15.944091 +epoch: 1, batch: 42529, sum loss: 3799.925293, avg loss: 2.355812, ppl: 10.546693 +epoch: 1, batch: 42530, sum loss: 4292.696289, avg loss: 2.384831, ppl: 10.857232 +epoch: 1, batch: 42531, sum loss: 3764.166504, avg loss: 2.436354, ppl: 11.431285 +epoch: 1, batch: 42532, sum loss: 4134.675293, avg loss: 2.499804, ppl: 12.180103 +epoch: 1, batch: 42533, sum loss: 4758.934570, avg loss: 2.487681, ppl: 12.033343 +epoch: 1, batch: 42534, sum loss: 5264.492676, avg loss: 2.809228, ppl: 16.597095 +epoch: 1, batch: 42535, sum loss: 3292.690186, avg loss: 2.163397, ppl: 8.700644 +epoch: 1, batch: 42536, sum loss: 4916.951172, avg loss: 2.656376, ppl: 14.244568 +epoch: 1, batch: 42537, sum loss: 3785.906006, avg loss: 2.304264, ppl: 10.016804 +epoch: 1, batch: 42538, sum loss: 3813.424072, avg loss: 2.368586, ppl: 10.682281 +epoch: 1, batch: 42539, sum loss: 3952.960205, avg loss: 2.464439, ppl: 11.756886 +epoch: 1, batch: 42540, sum loss: 3516.172119, avg loss: 2.570301, ppl: 13.069761 +epoch: 1, batch: 42541, sum loss: 4441.108887, avg loss: 2.635673, ppl: 13.952697 +epoch: 1, batch: 42542, sum loss: 3779.160889, avg loss: 2.536350, ppl: 12.633471 +epoch: 1, batch: 42543, sum loss: 4658.806641, avg loss: 2.866958, ppl: 17.583447 +epoch: 1, batch: 42544, sum loss: 4446.447266, avg loss: 2.696451, ppl: 14.827013 +epoch: 1, batch: 42545, sum loss: 4932.869629, avg loss: 2.839879, ppl: 17.113695 +epoch: 1, batch: 42546, sum loss: 4539.268555, avg loss: 2.503733, ppl: 12.228061 +epoch: 1, batch: 42547, sum loss: 4434.317871, avg loss: 2.417840, ppl: 11.221589 +epoch: 1, batch: 42548, sum loss: 4895.776855, avg loss: 2.748892, ppl: 15.625315 +epoch: 1, batch: 42549, sum loss: 4351.641602, avg loss: 2.574936, ppl: 13.130476 +epoch: 1, batch: 42550, sum loss: 2962.556152, avg loss: 2.160872, ppl: 8.678706 +epoch: 1, batch: 42551, sum loss: 3931.368164, avg loss: 2.510452, ppl: 12.310496 +epoch: 1, batch: 42552, sum loss: 4874.287598, avg loss: 2.709443, ppl: 15.020905 +epoch: 1, batch: 42553, sum loss: 4862.793457, avg loss: 2.855428, ppl: 17.381870 +epoch: 1, batch: 42554, sum loss: 4235.530273, avg loss: 2.542335, ppl: 12.709313 +epoch: 1, batch: 42555, sum loss: 3660.078857, avg loss: 2.297601, ppl: 9.950285 +epoch: 1, batch: 42556, sum loss: 4578.548340, avg loss: 2.602927, ppl: 13.503203 +epoch: 1, batch: 42557, sum loss: 4445.476562, avg loss: 2.589095, ppl: 13.317719 +epoch: 1, batch: 42558, sum loss: 4151.084473, avg loss: 2.515809, ppl: 12.376616 +epoch: 1, batch: 42559, sum loss: 4595.535645, avg loss: 2.665624, ppl: 14.376920 +epoch: 1, batch: 42560, sum loss: 5001.561523, avg loss: 2.683241, ppl: 14.632442 +epoch: 1, batch: 42561, sum loss: 4050.446045, avg loss: 2.379816, ppl: 10.802911 +epoch: 1, batch: 42562, sum loss: 4482.482910, avg loss: 2.594029, ppl: 13.383592 +epoch: 1, batch: 42563, sum loss: 4763.679688, avg loss: 2.725217, ppl: 15.259730 +epoch: 1, batch: 42564, sum loss: 4563.423340, avg loss: 2.654697, ppl: 14.220673 +epoch: 1, batch: 42565, sum loss: 4679.126953, avg loss: 2.754048, ppl: 15.706079 +epoch: 1, batch: 42566, sum loss: 4110.038574, avg loss: 2.353974, ppl: 10.527324 +epoch: 1, batch: 42567, sum loss: 3501.781982, avg loss: 2.387036, ppl: 10.881195 +epoch: 1, batch: 42568, sum loss: 4583.910156, avg loss: 2.611915, ppl: 13.625113 +epoch: 1, batch: 42569, sum loss: 4165.396484, avg loss: 2.503243, ppl: 12.222069 +epoch: 1, batch: 42570, sum loss: 4698.875488, avg loss: 2.866916, ppl: 17.582708 +epoch: 1, batch: 42571, sum loss: 4397.326172, avg loss: 2.564039, ppl: 12.988165 +epoch: 1, batch: 42572, sum loss: 3583.322266, avg loss: 2.561346, ppl: 12.953236 +epoch: 1, batch: 42573, sum loss: 5122.407227, avg loss: 2.714577, ppl: 15.098226 +epoch: 1, batch: 42574, sum loss: 4434.175293, avg loss: 2.677642, ppl: 14.550744 +epoch: 1, batch: 42575, sum loss: 6273.893066, avg loss: 2.957988, ppl: 19.259188 +epoch: 1, batch: 42576, sum loss: 4753.913574, avg loss: 2.573857, ppl: 13.116318 +epoch: 1, batch: 42577, sum loss: 3701.324707, avg loss: 2.239156, ppl: 9.385405 +epoch: 1, batch: 42578, sum loss: 4464.210938, avg loss: 2.604557, ppl: 13.525236 +epoch: 1, batch: 42579, sum loss: 4141.842773, avg loss: 2.408048, ppl: 11.112250 +epoch: 1, batch: 42580, sum loss: 5055.169922, avg loss: 2.852805, ppl: 17.336336 +epoch: 1, batch: 42581, sum loss: 4029.686035, avg loss: 2.277946, ppl: 9.756618 +epoch: 1, batch: 42582, sum loss: 4777.040039, avg loss: 2.833357, ppl: 17.002443 +epoch: 1, batch: 42583, sum loss: 3530.392334, avg loss: 2.479208, ppl: 11.931813 +epoch: 1, batch: 42584, sum loss: 3560.599609, avg loss: 2.412330, ppl: 11.159938 +epoch: 1, batch: 42585, sum loss: 3733.934082, avg loss: 2.404336, ppl: 11.071079 +epoch: 1, batch: 42586, sum loss: 4544.221680, avg loss: 2.623685, ppl: 13.786428 +epoch: 1, batch: 42587, sum loss: 4197.305664, avg loss: 2.523936, ppl: 12.477612 +epoch: 1, batch: 42588, sum loss: 4953.826660, avg loss: 2.964588, ppl: 19.386717 +epoch: 1, batch: 42589, sum loss: 3912.442383, avg loss: 2.419569, ppl: 11.241011 +epoch: 1, batch: 42590, sum loss: 4935.327148, avg loss: 2.802571, ppl: 16.486977 +epoch: 1, batch: 42591, sum loss: 4303.748047, avg loss: 2.548104, ppl: 12.782848 +epoch: 1, batch: 42592, sum loss: 4263.445801, avg loss: 2.681412, ppl: 14.605709 +epoch: 1, batch: 42593, sum loss: 4983.182129, avg loss: 2.738012, ppl: 15.456232 +epoch: 1, batch: 42594, sum loss: 3669.028809, avg loss: 2.412248, ppl: 11.159018 +epoch: 1, batch: 42595, sum loss: 4716.154297, avg loss: 2.684208, ppl: 14.646603 +epoch: 1, batch: 42596, sum loss: 4577.176270, avg loss: 2.534428, ppl: 12.609212 +epoch: 1, batch: 42597, sum loss: 4562.100586, avg loss: 2.577458, ppl: 13.163632 +epoch: 1, batch: 42598, sum loss: 4454.953613, avg loss: 2.614409, ppl: 13.659147 +epoch: 1, batch: 42599, sum loss: 4180.252930, avg loss: 2.647405, ppl: 14.117358 +epoch: 1, batch: 42600, sum loss: 3864.659668, avg loss: 2.524272, ppl: 12.481802 +epoch: 1, batch: 42601, sum loss: 4517.708008, avg loss: 2.723151, ppl: 15.228234 +epoch: 1, batch: 42602, sum loss: 4363.516602, avg loss: 2.529575, ppl: 12.548170 +epoch: 1, batch: 42603, sum loss: 5241.372070, avg loss: 2.660595, ppl: 14.304797 +epoch: 1, batch: 42604, sum loss: 4226.297852, avg loss: 2.488986, ppl: 12.049049 +epoch: 1, batch: 42605, sum loss: 4898.205566, avg loss: 2.707687, ppl: 14.994555 +epoch: 1, batch: 42606, sum loss: 3484.089844, avg loss: 2.609805, ppl: 13.596401 +epoch: 1, batch: 42607, sum loss: 3924.896484, avg loss: 2.362972, ppl: 10.622475 +epoch: 1, batch: 42608, sum loss: 4713.645996, avg loss: 2.666090, ppl: 14.383613 +epoch: 1, batch: 42609, sum loss: 3935.354980, avg loss: 2.587347, ppl: 13.294458 +epoch: 1, batch: 42610, sum loss: 4699.792969, avg loss: 2.764584, ppl: 15.872440 +epoch: 1, batch: 42611, sum loss: 3843.236816, avg loss: 2.563867, ppl: 12.985938 +epoch: 1, batch: 42612, sum loss: 3822.851562, avg loss: 2.408854, ppl: 11.121212 +epoch: 1, batch: 42613, sum loss: 4790.408691, avg loss: 2.564459, ppl: 12.993622 +epoch: 1, batch: 42614, sum loss: 3706.348389, avg loss: 2.253099, ppl: 9.517186 +epoch: 1, batch: 42615, sum loss: 4542.026855, avg loss: 3.000018, ppl: 20.085896 +epoch: 1, batch: 42616, sum loss: 3339.674072, avg loss: 2.144942, ppl: 8.541542 +epoch: 1, batch: 42617, sum loss: 4853.974609, avg loss: 2.553380, ppl: 12.850459 +epoch: 1, batch: 42618, sum loss: 3784.981201, avg loss: 2.538552, ppl: 12.661327 +epoch: 1, batch: 42619, sum loss: 4516.444336, avg loss: 2.636570, ppl: 13.965220 +epoch: 1, batch: 42620, sum loss: 5216.189453, avg loss: 2.784938, ppl: 16.198818 +epoch: 1, batch: 42621, sum loss: 5322.175781, avg loss: 2.740564, ppl: 15.495728 +epoch: 1, batch: 42622, sum loss: 3762.218262, avg loss: 2.384169, ppl: 10.850039 +epoch: 1, batch: 42623, sum loss: 4568.276855, avg loss: 2.572228, ppl: 13.094967 +epoch: 1, batch: 42624, sum loss: 4997.963379, avg loss: 2.755217, ppl: 15.724450 +epoch: 1, batch: 42625, sum loss: 3948.042480, avg loss: 2.374048, ppl: 10.740788 +epoch: 1, batch: 42626, sum loss: 3948.443359, avg loss: 2.407588, ppl: 11.107133 +epoch: 1, batch: 42627, sum loss: 4397.764648, avg loss: 2.577822, ppl: 13.168428 +epoch: 1, batch: 42628, sum loss: 5236.773438, avg loss: 2.767851, ppl: 15.924370 +epoch: 1, batch: 42629, sum loss: 4589.670898, avg loss: 2.429683, ppl: 11.355282 +epoch: 1, batch: 42630, sum loss: 4317.814453, avg loss: 2.644099, ppl: 14.070768 +epoch: 1, batch: 42631, sum loss: 3893.350342, avg loss: 2.447109, ppl: 11.554893 +epoch: 1, batch: 42632, sum loss: 4106.998047, avg loss: 2.605963, ppl: 13.544265 +epoch: 1, batch: 42633, sum loss: 3763.659424, avg loss: 2.206131, ppl: 9.080516 +epoch: 1, batch: 42634, sum loss: 3824.712402, avg loss: 2.385972, ppl: 10.869617 +epoch: 1, batch: 42635, sum loss: 4675.567383, avg loss: 2.635607, ppl: 13.951782 +epoch: 1, batch: 42636, sum loss: 5043.299805, avg loss: 2.952752, ppl: 19.158598 +epoch: 1, batch: 42637, sum loss: 4216.672363, avg loss: 2.560214, ppl: 12.938583 +epoch: 1, batch: 42638, sum loss: 3456.652344, avg loss: 2.262207, ppl: 9.604262 +epoch: 1, batch: 42639, sum loss: 4281.922852, avg loss: 2.495293, ppl: 12.125287 +epoch: 1, batch: 42640, sum loss: 3407.005371, avg loss: 2.392560, ppl: 10.941468 +epoch: 1, batch: 42641, sum loss: 4628.065918, avg loss: 2.662869, ppl: 14.337360 +epoch: 1, batch: 42642, sum loss: 4566.742188, avg loss: 2.822461, ppl: 16.818192 +epoch: 1, batch: 42643, sum loss: 3697.521240, avg loss: 2.358113, ppl: 10.570986 +epoch: 1, batch: 42644, sum loss: 4049.826172, avg loss: 2.472421, ppl: 11.851106 +epoch: 1, batch: 42645, sum loss: 4375.083008, avg loss: 2.578128, ppl: 13.172455 +epoch: 1, batch: 42646, sum loss: 5207.483398, avg loss: 2.923910, ppl: 18.613924 +epoch: 1, batch: 42647, sum loss: 3630.518555, avg loss: 2.402726, ppl: 11.053264 +epoch: 1, batch: 42648, sum loss: 4646.387695, avg loss: 2.670338, ppl: 14.444846 +epoch: 1, batch: 42649, sum loss: 3483.953369, avg loss: 2.199466, ppl: 9.020191 +epoch: 1, batch: 42650, sum loss: 4839.413086, avg loss: 2.641601, ppl: 14.035658 +epoch: 1, batch: 42651, sum loss: 4874.448730, avg loss: 2.918832, ppl: 18.519636 +epoch: 1, batch: 42652, sum loss: 4057.538574, avg loss: 2.308042, ppl: 10.054721 +epoch: 1, batch: 42653, sum loss: 4739.390137, avg loss: 2.702047, ppl: 14.910220 +epoch: 1, batch: 42654, sum loss: 4497.550781, avg loss: 2.744082, ppl: 15.550336 +epoch: 1, batch: 42655, sum loss: 3961.710449, avg loss: 2.591047, ppl: 13.343733 +epoch: 1, batch: 42656, sum loss: 4957.127930, avg loss: 2.808571, ppl: 16.586201 +epoch: 1, batch: 42657, sum loss: 3545.084473, avg loss: 2.453346, ppl: 11.627184 +epoch: 1, batch: 42658, sum loss: 3882.841064, avg loss: 2.471573, ppl: 11.841060 +epoch: 1, batch: 42659, sum loss: 3833.219482, avg loss: 2.489104, ppl: 12.050468 +epoch: 1, batch: 42660, sum loss: 4549.254883, avg loss: 2.568749, ppl: 13.049492 +epoch: 1, batch: 42661, sum loss: 5209.910156, avg loss: 2.737735, ppl: 15.451951 +epoch: 1, batch: 42662, sum loss: 4553.351074, avg loss: 2.856556, ppl: 17.401501 +epoch: 1, batch: 42663, sum loss: 4675.204590, avg loss: 2.633918, ppl: 13.928235 +epoch: 1, batch: 42664, sum loss: 3847.349121, avg loss: 2.575200, ppl: 13.133945 +epoch: 1, batch: 42665, sum loss: 5280.910156, avg loss: 2.892065, ppl: 18.030500 +epoch: 1, batch: 42666, sum loss: 5025.245117, avg loss: 2.552181, ppl: 12.835073 +epoch: 1, batch: 42667, sum loss: 4718.029297, avg loss: 2.533850, ppl: 12.601935 +epoch: 1, batch: 42668, sum loss: 4745.286621, avg loss: 2.806201, ppl: 16.546944 +epoch: 1, batch: 42669, sum loss: 3609.800537, avg loss: 2.482669, ppl: 11.973177 +epoch: 1, batch: 42670, sum loss: 4593.303711, avg loss: 2.574722, ppl: 13.127667 +epoch: 1, batch: 42671, sum loss: 4363.513672, avg loss: 2.781080, ppl: 16.136431 +epoch: 1, batch: 42672, sum loss: 4338.115723, avg loss: 2.621218, ppl: 13.752463 +epoch: 1, batch: 42673, sum loss: 4188.206543, avg loss: 2.590109, ppl: 13.331229 +epoch: 1, batch: 42674, sum loss: 5505.867676, avg loss: 2.845410, ppl: 17.208607 +epoch: 1, batch: 42675, sum loss: 5737.141602, avg loss: 2.787727, ppl: 16.244049 +epoch: 1, batch: 42676, sum loss: 5138.594727, avg loss: 2.652863, ppl: 14.194613 +epoch: 1, batch: 42677, sum loss: 4504.487793, avg loss: 2.605256, ppl: 13.534691 +epoch: 1, batch: 42678, sum loss: 4302.599121, avg loss: 2.752783, ppl: 15.686219 +epoch: 1, batch: 42679, sum loss: 4508.610840, avg loss: 2.499230, ppl: 12.173116 +epoch: 1, batch: 42680, sum loss: 5230.524902, avg loss: 2.708713, ppl: 15.009946 +epoch: 1, batch: 42681, sum loss: 3701.405518, avg loss: 2.417639, ppl: 11.219342 +epoch: 1, batch: 42682, sum loss: 4365.821777, avg loss: 2.800399, ppl: 16.451206 +epoch: 1, batch: 42683, sum loss: 4707.315918, avg loss: 2.548628, ppl: 12.789546 +epoch: 1, batch: 42684, sum loss: 4789.010254, avg loss: 2.558232, ppl: 12.912968 +epoch: 1, batch: 42685, sum loss: 4573.065430, avg loss: 2.731819, ppl: 15.360809 +epoch: 1, batch: 42686, sum loss: 4861.348633, avg loss: 2.823083, ppl: 16.828648 +epoch: 1, batch: 42687, sum loss: 3910.174072, avg loss: 2.508130, ppl: 12.281937 +epoch: 1, batch: 42688, sum loss: 4406.849609, avg loss: 2.568094, ppl: 13.040948 +epoch: 1, batch: 42689, sum loss: 5067.841797, avg loss: 2.845504, ppl: 17.210222 +epoch: 1, batch: 42690, sum loss: 4211.678223, avg loss: 2.813412, ppl: 16.666691 +epoch: 1, batch: 42691, sum loss: 3884.319092, avg loss: 2.491545, ppl: 12.079930 +epoch: 1, batch: 42692, sum loss: 4490.687500, avg loss: 2.485162, ppl: 12.003062 +epoch: 1, batch: 42693, sum loss: 4514.518555, avg loss: 2.713052, ppl: 15.075215 +epoch: 1, batch: 42694, sum loss: 4060.633057, avg loss: 2.514324, ppl: 12.358252 +epoch: 1, batch: 42695, sum loss: 3829.437500, avg loss: 2.439132, ppl: 11.463089 +epoch: 1, batch: 42696, sum loss: 4936.543457, avg loss: 2.668402, ppl: 14.416912 +epoch: 1, batch: 42697, sum loss: 4922.984375, avg loss: 2.892470, ppl: 18.037809 +epoch: 1, batch: 42698, sum loss: 5459.879395, avg loss: 2.775739, ppl: 16.050488 +epoch: 1, batch: 42699, sum loss: 4587.962402, avg loss: 2.667420, ppl: 14.402761 +epoch: 1, batch: 42700, sum loss: 3777.013184, avg loss: 2.371006, ppl: 10.708164 +epoch: 1, batch: 42701, sum loss: 5195.470703, avg loss: 2.704566, ppl: 14.947824 +epoch: 1, batch: 42702, sum loss: 4494.092773, avg loss: 2.670287, ppl: 14.444113 +epoch: 1, batch: 42703, sum loss: 4508.769531, avg loss: 2.544452, ppl: 12.736252 +epoch: 1, batch: 42704, sum loss: 4683.523926, avg loss: 2.634153, ppl: 13.931509 +epoch: 1, batch: 42705, sum loss: 4031.919434, avg loss: 2.415770, ppl: 11.198385 +epoch: 1, batch: 42706, sum loss: 4066.478027, avg loss: 2.448211, ppl: 11.567631 +epoch: 1, batch: 42707, sum loss: 4760.988281, avg loss: 2.517709, ppl: 12.400159 +epoch: 1, batch: 42708, sum loss: 4147.489258, avg loss: 2.507551, ppl: 12.274832 +epoch: 1, batch: 42709, sum loss: 4510.384277, avg loss: 2.726955, ppl: 15.286276 +epoch: 1, batch: 42710, sum loss: 4227.194824, avg loss: 2.771931, ppl: 15.989479 +epoch: 1, batch: 42711, sum loss: 4765.841309, avg loss: 2.565038, ppl: 13.001155 +epoch: 1, batch: 42712, sum loss: 4430.570312, avg loss: 2.533202, ppl: 12.593766 +epoch: 1, batch: 42713, sum loss: 4088.643066, avg loss: 2.660145, ppl: 14.298367 +epoch: 1, batch: 42714, sum loss: 4428.707031, avg loss: 2.712007, ppl: 15.059464 +epoch: 1, batch: 42715, sum loss: 5306.815430, avg loss: 2.956443, ppl: 19.229456 +epoch: 1, batch: 42716, sum loss: 4040.758789, avg loss: 2.595221, ppl: 13.399546 +epoch: 1, batch: 42717, sum loss: 4252.437500, avg loss: 2.446742, ppl: 11.550654 +epoch: 1, batch: 42718, sum loss: 4116.671875, avg loss: 2.462125, ppl: 11.729706 +epoch: 1, batch: 42719, sum loss: 4623.142578, avg loss: 2.793440, ppl: 16.337116 +epoch: 1, batch: 42720, sum loss: 3762.537354, avg loss: 2.384371, ppl: 10.852235 +epoch: 1, batch: 42721, sum loss: 4210.759277, avg loss: 2.676897, ppl: 14.539907 +epoch: 1, batch: 42722, sum loss: 4223.632812, avg loss: 2.461324, ppl: 11.720325 +epoch: 1, batch: 42723, sum loss: 5314.860352, avg loss: 2.877564, ppl: 17.770929 +epoch: 1, batch: 42724, sum loss: 4511.556641, avg loss: 2.594340, ppl: 13.387743 +epoch: 1, batch: 42725, sum loss: 4089.024902, avg loss: 2.641489, ppl: 14.034085 +epoch: 1, batch: 42726, sum loss: 5526.660645, avg loss: 2.878469, ppl: 17.787024 +epoch: 1, batch: 42727, sum loss: 4203.638672, avg loss: 2.599653, ppl: 13.459067 +epoch: 1, batch: 42728, sum loss: 4239.112305, avg loss: 2.694922, ppl: 14.804363 +epoch: 1, batch: 42729, sum loss: 3873.894531, avg loss: 2.507375, ppl: 12.272675 +epoch: 1, batch: 42730, sum loss: 5260.928711, avg loss: 2.583953, ppl: 13.249414 +epoch: 1, batch: 42731, sum loss: 4345.437012, avg loss: 2.569744, ppl: 13.062482 +epoch: 1, batch: 42732, sum loss: 4142.126953, avg loss: 2.646726, ppl: 14.107780 +epoch: 1, batch: 42733, sum loss: 4924.310059, avg loss: 2.815500, ppl: 16.701529 +epoch: 1, batch: 42734, sum loss: 3934.293457, avg loss: 2.499551, ppl: 12.177026 +epoch: 1, batch: 42735, sum loss: 4896.354004, avg loss: 2.753855, ppl: 15.703051 +epoch: 1, batch: 42736, sum loss: 3600.114746, avg loss: 2.424320, ppl: 11.294541 +epoch: 1, batch: 42737, sum loss: 3689.538574, avg loss: 2.435339, ppl: 11.419689 +epoch: 1, batch: 42738, sum loss: 5058.333496, avg loss: 2.783893, ppl: 16.181896 +epoch: 1, batch: 42739, sum loss: 4298.458496, avg loss: 2.693270, ppl: 14.779923 +epoch: 1, batch: 42740, sum loss: 5193.762207, avg loss: 2.841227, ppl: 17.136772 +epoch: 1, batch: 42741, sum loss: 3997.154785, avg loss: 2.389214, ppl: 10.904920 +epoch: 1, batch: 42742, sum loss: 4358.801270, avg loss: 2.597617, ppl: 13.431691 +epoch: 1, batch: 42743, sum loss: 5781.604980, avg loss: 2.842480, ppl: 17.158268 +epoch: 1, batch: 42744, sum loss: 4150.850098, avg loss: 2.627120, ppl: 13.833875 +epoch: 1, batch: 42745, sum loss: 3698.383301, avg loss: 2.444404, ppl: 11.523681 +epoch: 1, batch: 42746, sum loss: 4485.970703, avg loss: 2.745392, ppl: 15.570718 +epoch: 1, batch: 42747, sum loss: 4356.314941, avg loss: 2.674227, ppl: 14.501129 +epoch: 1, batch: 42748, sum loss: 4817.157227, avg loss: 2.820349, ppl: 16.782715 +epoch: 1, batch: 42749, sum loss: 4011.422119, avg loss: 2.347234, ppl: 10.456602 +epoch: 1, batch: 42750, sum loss: 5110.823730, avg loss: 2.773100, ppl: 16.008184 +epoch: 1, batch: 42751, sum loss: 4389.583984, avg loss: 2.440013, ppl: 11.473194 +epoch: 1, batch: 42752, sum loss: 5592.792480, avg loss: 2.960716, ppl: 19.311794 +epoch: 1, batch: 42753, sum loss: 3109.827393, avg loss: 2.283280, ppl: 9.808800 +epoch: 1, batch: 42754, sum loss: 4474.596191, avg loss: 2.565709, ppl: 13.009874 +epoch: 1, batch: 42755, sum loss: 4204.541992, avg loss: 2.649365, ppl: 14.145053 +epoch: 1, batch: 42756, sum loss: 5066.983398, avg loss: 2.716881, ppl: 15.133049 +epoch: 1, batch: 42757, sum loss: 4289.676270, avg loss: 2.576382, ppl: 13.149479 +epoch: 1, batch: 42758, sum loss: 5121.875000, avg loss: 2.877458, ppl: 17.769045 +epoch: 1, batch: 42759, sum loss: 3743.370117, avg loss: 2.445049, ppl: 11.531117 +epoch: 1, batch: 42760, sum loss: 3984.257568, avg loss: 2.529687, ppl: 12.549580 +epoch: 1, batch: 42761, sum loss: 3851.907227, avg loss: 2.424108, ppl: 11.292150 +epoch: 1, batch: 42762, sum loss: 4076.591064, avg loss: 2.728642, ppl: 15.312078 +epoch: 1, batch: 42763, sum loss: 3964.778809, avg loss: 2.613565, ppl: 13.647624 +epoch: 1, batch: 42764, sum loss: 5221.711426, avg loss: 2.745379, ppl: 15.570521 +epoch: 1, batch: 42765, sum loss: 4085.439697, avg loss: 2.362892, ppl: 10.621621 +epoch: 1, batch: 42766, sum loss: 4158.032227, avg loss: 2.319036, ppl: 10.165874 +epoch: 1, batch: 42767, sum loss: 3330.546387, avg loss: 2.265678, ppl: 9.637656 +epoch: 1, batch: 42768, sum loss: 3488.106934, avg loss: 2.285784, ppl: 9.833397 +epoch: 1, batch: 42769, sum loss: 5078.344727, avg loss: 2.629904, ppl: 13.872438 +epoch: 1, batch: 42770, sum loss: 4719.326172, avg loss: 2.766311, ppl: 15.899866 +epoch: 1, batch: 42771, sum loss: 4549.029785, avg loss: 2.841368, ppl: 17.139194 +epoch: 1, batch: 42772, sum loss: 5469.798828, avg loss: 2.813683, ppl: 16.671202 +epoch: 1, batch: 42773, sum loss: 4873.843750, avg loss: 2.707691, ppl: 14.994613 +epoch: 1, batch: 42774, sum loss: 5040.571777, avg loss: 2.817536, ppl: 16.735561 +epoch: 1, batch: 42775, sum loss: 4012.351074, avg loss: 2.539463, ppl: 12.672861 +epoch: 1, batch: 42776, sum loss: 3607.199951, avg loss: 2.492882, ppl: 12.096085 +epoch: 1, batch: 42777, sum loss: 5116.452148, avg loss: 2.605118, ppl: 13.532825 +epoch: 1, batch: 42778, sum loss: 4423.645508, avg loss: 2.606745, ppl: 13.554852 +epoch: 1, batch: 42779, sum loss: 4469.250000, avg loss: 2.605977, ppl: 13.544446 +epoch: 1, batch: 42780, sum loss: 3384.023682, avg loss: 2.495593, ppl: 12.128919 +epoch: 1, batch: 42781, sum loss: 4830.365234, avg loss: 2.507978, ppl: 12.280074 +epoch: 1, batch: 42782, sum loss: 4861.916016, avg loss: 2.805491, ppl: 16.535192 +epoch: 1, batch: 42783, sum loss: 3924.504639, avg loss: 2.449753, ppl: 11.585485 +epoch: 1, batch: 42784, sum loss: 5015.301270, avg loss: 2.967634, ppl: 19.445856 +epoch: 1, batch: 42785, sum loss: 4482.545898, avg loss: 2.695457, ppl: 14.812293 +epoch: 1, batch: 42786, sum loss: 4853.272461, avg loss: 2.551668, ppl: 12.828486 +epoch: 1, batch: 42787, sum loss: 4038.502441, avg loss: 2.637820, ppl: 13.982692 +epoch: 1, batch: 42788, sum loss: 3999.227295, avg loss: 2.319738, ppl: 10.173005 +epoch: 1, batch: 42789, sum loss: 4626.118652, avg loss: 2.734113, ppl: 15.396077 +epoch: 1, batch: 42790, sum loss: 3505.553711, avg loss: 2.289715, ppl: 9.872125 +epoch: 1, batch: 42791, sum loss: 4072.611084, avg loss: 2.698881, ppl: 14.863090 +epoch: 1, batch: 42792, sum loss: 3905.912354, avg loss: 2.571371, ppl: 13.083748 +epoch: 1, batch: 42793, sum loss: 3985.344971, avg loss: 2.584530, ppl: 13.257055 +epoch: 1, batch: 42794, sum loss: 3927.269043, avg loss: 2.560149, ppl: 12.937747 +epoch: 1, batch: 42795, sum loss: 5005.281738, avg loss: 2.818289, ppl: 16.748175 +epoch: 1, batch: 42796, sum loss: 4499.537598, avg loss: 2.683087, ppl: 14.630192 +epoch: 1, batch: 42797, sum loss: 4491.933594, avg loss: 2.779662, ppl: 16.113571 +epoch: 1, batch: 42798, sum loss: 6227.600586, avg loss: 3.284600, ppl: 26.698298 +epoch: 1, batch: 42799, sum loss: 4368.114258, avg loss: 2.631394, ppl: 13.893126 +epoch: 1, batch: 42800, sum loss: 4217.391113, avg loss: 2.611388, ppl: 13.617936 +epoch: 1, batch: 42801, sum loss: 3777.568115, avg loss: 2.605219, ppl: 13.534194 +epoch: 1, batch: 42802, sum loss: 4005.265625, avg loss: 2.530174, ppl: 12.555691 +epoch: 1, batch: 42803, sum loss: 3560.772949, avg loss: 2.355009, ppl: 10.538219 +epoch: 1, batch: 42804, sum loss: 4800.282715, avg loss: 2.569744, ppl: 13.062485 +epoch: 1, batch: 42805, sum loss: 4493.188965, avg loss: 2.672926, ppl: 14.482285 +epoch: 1, batch: 42806, sum loss: 4194.777344, avg loss: 2.597385, ppl: 13.428582 +epoch: 1, batch: 42807, sum loss: 4953.964355, avg loss: 2.789394, ppl: 16.271162 +epoch: 1, batch: 42808, sum loss: 4629.630371, avg loss: 2.559221, ppl: 12.925745 +epoch: 1, batch: 42809, sum loss: 3549.515869, avg loss: 2.250803, ppl: 9.495356 +epoch: 1, batch: 42810, sum loss: 4531.418457, avg loss: 2.561571, ppl: 12.956151 +epoch: 1, batch: 42811, sum loss: 4662.681152, avg loss: 2.762252, ppl: 15.835462 +epoch: 1, batch: 42812, sum loss: 4169.259766, avg loss: 2.564120, ppl: 12.989226 +epoch: 1, batch: 42813, sum loss: 4757.683594, avg loss: 2.393201, ppl: 10.948483 +epoch: 1, batch: 42814, sum loss: 4414.306641, avg loss: 2.478555, ppl: 11.924025 +epoch: 1, batch: 42815, sum loss: 4910.348633, avg loss: 2.693554, ppl: 14.784124 +epoch: 1, batch: 42816, sum loss: 5899.479980, avg loss: 3.006871, ppl: 20.224010 +epoch: 1, batch: 42817, sum loss: 3596.541260, avg loss: 2.311402, ppl: 10.088557 +epoch: 1, batch: 42818, sum loss: 4433.339844, avg loss: 2.693402, ppl: 14.781882 +epoch: 1, batch: 42819, sum loss: 4238.499512, avg loss: 2.367877, ppl: 10.674704 +epoch: 1, batch: 42820, sum loss: 4462.431152, avg loss: 2.563143, ppl: 12.976533 +epoch: 1, batch: 42821, sum loss: 4339.720215, avg loss: 2.561818, ppl: 12.959357 +epoch: 1, batch: 42822, sum loss: 4309.390625, avg loss: 2.626076, ppl: 13.819432 +epoch: 1, batch: 42823, sum loss: 3952.268066, avg loss: 2.447225, ppl: 11.556231 +epoch: 1, batch: 42824, sum loss: 4159.648438, avg loss: 2.676737, ppl: 14.537574 +epoch: 1, batch: 42825, sum loss: 4397.883301, avg loss: 2.530428, ppl: 12.558876 +epoch: 1, batch: 42826, sum loss: 3828.928467, avg loss: 2.702137, ppl: 14.911567 +epoch: 1, batch: 42827, sum loss: 3398.476562, avg loss: 2.285458, ppl: 9.830191 +epoch: 1, batch: 42828, sum loss: 4379.614258, avg loss: 2.517020, ppl: 12.391611 +epoch: 1, batch: 42829, sum loss: 5119.951172, avg loss: 2.782582, ppl: 16.160694 +epoch: 1, batch: 42830, sum loss: 4255.591309, avg loss: 2.543689, ppl: 12.726530 +epoch: 1, batch: 42831, sum loss: 4633.544922, avg loss: 2.650769, ppl: 14.164934 +epoch: 1, batch: 42832, sum loss: 4177.582031, avg loss: 2.588341, ppl: 13.307673 +epoch: 1, batch: 42833, sum loss: 4622.537598, avg loss: 2.632425, ppl: 13.907449 +epoch: 1, batch: 42834, sum loss: 4507.751953, avg loss: 2.765492, ppl: 15.886854 +epoch: 1, batch: 42835, sum loss: 4635.315918, avg loss: 2.799104, ppl: 16.429914 +epoch: 1, batch: 42836, sum loss: 4178.181641, avg loss: 2.538385, ppl: 12.659212 +epoch: 1, batch: 42837, sum loss: 4238.411621, avg loss: 2.696191, ppl: 14.823156 +epoch: 1, batch: 42838, sum loss: 5046.402344, avg loss: 2.805115, ppl: 16.528980 +epoch: 1, batch: 42839, sum loss: 4509.044922, avg loss: 2.581022, ppl: 13.210627 +epoch: 1, batch: 42840, sum loss: 4386.571289, avg loss: 2.704421, ppl: 14.945664 +epoch: 1, batch: 42841, sum loss: 3885.862305, avg loss: 2.398680, ppl: 11.008640 +epoch: 1, batch: 42842, sum loss: 3772.792725, avg loss: 2.500194, ppl: 12.184858 +epoch: 1, batch: 42843, sum loss: 4958.788086, avg loss: 2.623698, ppl: 13.786606 +epoch: 1, batch: 42844, sum loss: 6056.108398, avg loss: 2.852618, ppl: 17.333105 +epoch: 1, batch: 42845, sum loss: 4527.953125, avg loss: 2.666639, ppl: 14.391520 +epoch: 1, batch: 42846, sum loss: 4861.481445, avg loss: 2.702324, ppl: 14.914358 +epoch: 1, batch: 42847, sum loss: 4768.613281, avg loss: 2.602955, ppl: 13.503580 +epoch: 1, batch: 42848, sum loss: 4308.808594, avg loss: 2.496413, ppl: 12.138870 +epoch: 1, batch: 42849, sum loss: 4346.991211, avg loss: 2.478330, ppl: 11.921341 +epoch: 1, batch: 42850, sum loss: 3956.261230, avg loss: 2.434622, ppl: 11.411508 +epoch: 1, batch: 42851, sum loss: 4459.003418, avg loss: 2.527780, ppl: 12.525666 +epoch: 1, batch: 42852, sum loss: 3606.660889, avg loss: 2.338950, ppl: 10.370341 +epoch: 1, batch: 42853, sum loss: 4080.945801, avg loss: 2.597674, ppl: 13.432460 +epoch: 1, batch: 42854, sum loss: 4114.395508, avg loss: 2.546037, ppl: 12.756446 +epoch: 1, batch: 42855, sum loss: 5011.951172, avg loss: 2.806244, ppl: 16.547644 +epoch: 1, batch: 42856, sum loss: 3917.931396, avg loss: 2.357359, ppl: 10.563022 +epoch: 1, batch: 42857, sum loss: 4411.099609, avg loss: 2.635066, ppl: 13.944226 +epoch: 1, batch: 42858, sum loss: 4435.746582, avg loss: 2.575927, ppl: 13.143496 +epoch: 1, batch: 42859, sum loss: 4091.382080, avg loss: 2.528666, ppl: 12.536774 +epoch: 1, batch: 42860, sum loss: 4474.532715, avg loss: 2.551045, ppl: 12.820491 +epoch: 1, batch: 42861, sum loss: 4038.613770, avg loss: 2.517839, ppl: 12.401770 +epoch: 1, batch: 42862, sum loss: 4438.857422, avg loss: 2.651647, ppl: 14.177374 +epoch: 1, batch: 42863, sum loss: 3547.747559, avg loss: 2.321824, ppl: 10.194255 +epoch: 1, batch: 42864, sum loss: 4734.580078, avg loss: 2.732014, ppl: 15.363794 +epoch: 1, batch: 42865, sum loss: 5514.924805, avg loss: 2.706048, ppl: 14.969990 +epoch: 1, batch: 42866, sum loss: 5297.049805, avg loss: 2.724820, ppl: 15.253670 +epoch: 1, batch: 42867, sum loss: 5665.937988, avg loss: 3.017006, ppl: 20.430040 +epoch: 1, batch: 42868, sum loss: 4920.940918, avg loss: 2.718752, ppl: 15.161391 +epoch: 1, batch: 42869, sum loss: 3994.887939, avg loss: 2.396453, ppl: 10.984151 +epoch: 1, batch: 42870, sum loss: 4702.531250, avg loss: 2.477625, ppl: 11.912933 +epoch: 1, batch: 42871, sum loss: 3929.346191, avg loss: 2.526911, ppl: 12.514786 +epoch: 1, batch: 42872, sum loss: 4819.799316, avg loss: 2.652614, ppl: 14.191084 +epoch: 1, batch: 42873, sum loss: 5304.595703, avg loss: 2.677736, ppl: 14.552111 +epoch: 1, batch: 42874, sum loss: 3655.324707, avg loss: 2.309112, ppl: 10.065483 +epoch: 1, batch: 42875, sum loss: 4636.292480, avg loss: 2.908590, ppl: 18.330935 +epoch: 1, batch: 42876, sum loss: 4183.683594, avg loss: 2.423918, ppl: 11.290012 +epoch: 1, batch: 42877, sum loss: 3618.967285, avg loss: 2.480444, ppl: 11.946564 +epoch: 1, batch: 42878, sum loss: 4493.285156, avg loss: 2.577903, ppl: 13.169493 +epoch: 1, batch: 42879, sum loss: 4920.669922, avg loss: 2.639844, ppl: 14.011024 +epoch: 1, batch: 42880, sum loss: 5169.174316, avg loss: 2.786617, ppl: 16.226030 +epoch: 1, batch: 42881, sum loss: 3949.657715, avg loss: 2.444095, ppl: 11.520121 +epoch: 1, batch: 42882, sum loss: 3847.222168, avg loss: 2.383657, ppl: 10.844488 +epoch: 1, batch: 42883, sum loss: 3943.366455, avg loss: 2.660841, ppl: 14.308321 +epoch: 1, batch: 42884, sum loss: 4583.660156, avg loss: 2.726746, ppl: 15.283076 +epoch: 1, batch: 42885, sum loss: 5593.076660, avg loss: 2.951492, ppl: 19.134474 +epoch: 1, batch: 42886, sum loss: 4066.085693, avg loss: 2.308964, ppl: 10.063993 +epoch: 1, batch: 42887, sum loss: 4081.514648, avg loss: 2.535102, ppl: 12.617720 +epoch: 1, batch: 42888, sum loss: 3910.807373, avg loss: 2.547757, ppl: 12.778412 +epoch: 1, batch: 42889, sum loss: 4269.923828, avg loss: 2.396142, ppl: 10.980725 +epoch: 1, batch: 42890, sum loss: 3753.832764, avg loss: 2.311473, ppl: 10.089279 +epoch: 1, batch: 42891, sum loss: 4905.075684, avg loss: 2.648529, ppl: 14.133231 +epoch: 1, batch: 42892, sum loss: 4545.934570, avg loss: 2.685136, ppl: 14.660190 +epoch: 1, batch: 42893, sum loss: 4297.960449, avg loss: 2.659629, ppl: 14.290984 +epoch: 1, batch: 42894, sum loss: 3932.149414, avg loss: 2.533602, ppl: 12.598800 +epoch: 1, batch: 42895, sum loss: 4430.151367, avg loss: 2.602909, ppl: 13.502962 +epoch: 1, batch: 42896, sum loss: 4124.601562, avg loss: 2.632164, ppl: 13.903831 +epoch: 1, batch: 42897, sum loss: 4276.408691, avg loss: 2.583933, ppl: 13.249143 +epoch: 1, batch: 42898, sum loss: 3929.492432, avg loss: 2.446757, ppl: 11.550830 +epoch: 1, batch: 42899, sum loss: 3996.601562, avg loss: 2.767730, ppl: 15.922441 +epoch: 1, batch: 42900, sum loss: 4091.814697, avg loss: 2.259423, ppl: 9.577561 +epoch: 1, batch: 42901, sum loss: 3251.744873, avg loss: 2.278728, ppl: 9.764253 +epoch: 1, batch: 42902, sum loss: 4707.020508, avg loss: 2.685123, ppl: 14.660004 +epoch: 1, batch: 42903, sum loss: 4890.158203, avg loss: 2.669300, ppl: 14.429870 +epoch: 1, batch: 42904, sum loss: 4145.869629, avg loss: 2.463381, ppl: 11.744448 +epoch: 1, batch: 42905, sum loss: 4577.884277, avg loss: 2.438937, ppl: 11.460848 +epoch: 1, batch: 42906, sum loss: 4863.040527, avg loss: 2.622999, ppl: 13.776981 +epoch: 1, batch: 42907, sum loss: 4126.343750, avg loss: 2.585428, ppl: 13.268973 +epoch: 1, batch: 42908, sum loss: 5586.237793, avg loss: 2.761363, ppl: 15.821393 +epoch: 1, batch: 42909, sum loss: 4301.673340, avg loss: 2.726029, ppl: 15.272116 +epoch: 1, batch: 42910, sum loss: 4911.724609, avg loss: 2.572931, ppl: 13.104174 +epoch: 1, batch: 42911, sum loss: 4115.714844, avg loss: 2.451289, ppl: 11.603298 +epoch: 1, batch: 42912, sum loss: 4228.145508, avg loss: 2.490074, ppl: 12.062168 +epoch: 1, batch: 42913, sum loss: 4688.380371, avg loss: 2.635402, ppl: 13.948918 +epoch: 1, batch: 42914, sum loss: 4562.806641, avg loss: 2.511176, ppl: 12.319407 +epoch: 1, batch: 42915, sum loss: 4735.731934, avg loss: 2.533832, ppl: 12.601702 +epoch: 1, batch: 42916, sum loss: 5817.167480, avg loss: 2.807513, ppl: 16.568665 +epoch: 1, batch: 42917, sum loss: 3379.258057, avg loss: 2.621612, ppl: 13.757884 +epoch: 1, batch: 42918, sum loss: 3833.960938, avg loss: 2.362268, ppl: 10.614999 +epoch: 1, batch: 42919, sum loss: 4718.760742, avg loss: 2.696435, ppl: 14.826776 +epoch: 1, batch: 42920, sum loss: 4742.713867, avg loss: 2.693193, ppl: 14.778796 +epoch: 1, batch: 42921, sum loss: 5234.118164, avg loss: 2.648845, ppl: 14.137703 +epoch: 1, batch: 42922, sum loss: 4641.025879, avg loss: 2.736454, ppl: 15.432166 +epoch: 1, batch: 42923, sum loss: 4930.450684, avg loss: 2.780852, ppl: 16.132761 +epoch: 1, batch: 42924, sum loss: 4557.803223, avg loss: 2.832693, ppl: 16.991158 +epoch: 1, batch: 42925, sum loss: 4091.070312, avg loss: 2.293201, ppl: 9.906598 +epoch: 1, batch: 42926, sum loss: 4816.351562, avg loss: 2.571464, ppl: 13.084965 +epoch: 1, batch: 42927, sum loss: 4264.252441, avg loss: 2.568827, ppl: 13.050506 +epoch: 1, batch: 42928, sum loss: 5331.282715, avg loss: 3.008625, ppl: 20.259516 +epoch: 1, batch: 42929, sum loss: 4754.429688, avg loss: 2.619521, ppl: 13.729141 +epoch: 1, batch: 42930, sum loss: 3907.377930, avg loss: 2.496727, ppl: 12.142686 +epoch: 1, batch: 42931, sum loss: 4822.398438, avg loss: 2.830046, ppl: 16.946243 +epoch: 1, batch: 42932, sum loss: 4600.553711, avg loss: 2.503021, ppl: 12.219358 +epoch: 1, batch: 42933, sum loss: 3373.122070, avg loss: 2.291523, ppl: 9.889991 +epoch: 1, batch: 42934, sum loss: 3875.045410, avg loss: 2.330154, ppl: 10.279521 +epoch: 1, batch: 42935, sum loss: 3906.396973, avg loss: 2.285779, ppl: 9.833348 +epoch: 1, batch: 42936, sum loss: 3734.593018, avg loss: 2.501402, ppl: 12.199585 +epoch: 1, batch: 42937, sum loss: 3556.879639, avg loss: 2.508378, ppl: 12.284986 +epoch: 1, batch: 42938, sum loss: 4504.764160, avg loss: 2.689412, ppl: 14.723011 +epoch: 1, batch: 42939, sum loss: 4454.392090, avg loss: 2.603385, ppl: 13.509396 +epoch: 1, batch: 42940, sum loss: 3934.122314, avg loss: 2.262290, ppl: 9.605060 +epoch: 1, batch: 42941, sum loss: 5226.052734, avg loss: 2.757812, ppl: 15.765304 +epoch: 1, batch: 42942, sum loss: 4191.622070, avg loss: 2.569971, ppl: 13.065440 +epoch: 1, batch: 42943, sum loss: 3948.906494, avg loss: 2.317433, ppl: 10.149590 +epoch: 1, batch: 42944, sum loss: 3886.672852, avg loss: 2.483497, ppl: 11.983095 +epoch: 1, batch: 42945, sum loss: 4072.139893, avg loss: 2.422451, ppl: 11.273455 +epoch: 1, batch: 42946, sum loss: 4541.772461, avg loss: 2.861861, ppl: 17.494045 +epoch: 1, batch: 42947, sum loss: 3387.470459, avg loss: 2.336187, ppl: 10.341724 +epoch: 1, batch: 42948, sum loss: 5432.872070, avg loss: 2.776123, ppl: 16.056643 +epoch: 1, batch: 42949, sum loss: 3735.270752, avg loss: 2.675695, ppl: 14.522446 +epoch: 1, batch: 42950, sum loss: 3981.524658, avg loss: 2.420380, ppl: 11.250130 +epoch: 1, batch: 42951, sum loss: 3524.722900, avg loss: 2.565300, ppl: 13.004565 +epoch: 1, batch: 42952, sum loss: 5034.768555, avg loss: 2.563528, ppl: 12.981533 +epoch: 1, batch: 42953, sum loss: 4317.134766, avg loss: 2.907161, ppl: 18.304762 +epoch: 1, batch: 42954, sum loss: 4556.416992, avg loss: 2.995672, ppl: 19.998795 +epoch: 1, batch: 42955, sum loss: 4769.054688, avg loss: 2.683767, ppl: 14.640143 +epoch: 1, batch: 42956, sum loss: 3341.062500, avg loss: 2.248360, ppl: 9.472186 +epoch: 1, batch: 42957, sum loss: 4451.733887, avg loss: 2.662520, ppl: 14.332367 +epoch: 1, batch: 42958, sum loss: 4136.764160, avg loss: 2.550409, ppl: 12.812347 +epoch: 1, batch: 42959, sum loss: 3461.672119, avg loss: 2.351680, ppl: 10.503196 +epoch: 1, batch: 42960, sum loss: 4489.432129, avg loss: 2.516498, ppl: 12.385146 +epoch: 1, batch: 42961, sum loss: 4689.251465, avg loss: 2.750294, ppl: 15.647235 +epoch: 1, batch: 42962, sum loss: 4193.808594, avg loss: 2.535555, ppl: 12.623440 +epoch: 1, batch: 42963, sum loss: 3901.373047, avg loss: 2.504091, ppl: 12.232430 +epoch: 1, batch: 42964, sum loss: 4341.118652, avg loss: 2.503529, ppl: 12.225557 +epoch: 1, batch: 42965, sum loss: 4818.822266, avg loss: 2.689075, ppl: 14.718056 +epoch: 1, batch: 42966, sum loss: 4662.124023, avg loss: 2.763559, ppl: 15.856176 +epoch: 1, batch: 42967, sum loss: 4514.858398, avg loss: 2.751285, ppl: 15.662743 +epoch: 1, batch: 42968, sum loss: 3854.622070, avg loss: 2.532603, ppl: 12.586228 +epoch: 1, batch: 42969, sum loss: 4088.769775, avg loss: 2.558680, ppl: 12.918748 +epoch: 1, batch: 42970, sum loss: 4322.215820, avg loss: 2.539492, ppl: 12.673233 +epoch: 1, batch: 42971, sum loss: 4998.600098, avg loss: 2.797202, ppl: 16.398701 +epoch: 1, batch: 42972, sum loss: 4261.242676, avg loss: 2.548590, ppl: 12.789061 +epoch: 1, batch: 42973, sum loss: 4021.933105, avg loss: 2.668834, ppl: 14.423145 +epoch: 1, batch: 42974, sum loss: 4820.060547, avg loss: 2.718590, ppl: 15.158940 +epoch: 1, batch: 42975, sum loss: 5359.486328, avg loss: 2.728863, ppl: 15.315460 +epoch: 1, batch: 42976, sum loss: 4211.951172, avg loss: 2.507114, ppl: 12.269468 +epoch: 1, batch: 42977, sum loss: 3660.148193, avg loss: 2.304879, ppl: 10.022969 +epoch: 1, batch: 42978, sum loss: 4965.519043, avg loss: 2.713398, ppl: 15.080439 +epoch: 1, batch: 42979, sum loss: 4476.693848, avg loss: 2.527777, ppl: 12.525636 +epoch: 1, batch: 42980, sum loss: 5154.645020, avg loss: 2.804486, ppl: 16.518579 +epoch: 1, batch: 42981, sum loss: 4086.128662, avg loss: 2.677673, ppl: 14.551191 +epoch: 1, batch: 42982, sum loss: 4577.170410, avg loss: 2.565679, ppl: 13.009483 +epoch: 1, batch: 42983, sum loss: 4396.757812, avg loss: 2.575722, ppl: 13.140804 +epoch: 1, batch: 42984, sum loss: 4604.399414, avg loss: 2.714858, ppl: 15.102469 +epoch: 1, batch: 42985, sum loss: 4578.557617, avg loss: 2.631355, ppl: 13.892582 +epoch: 1, batch: 42986, sum loss: 4469.240234, avg loss: 2.676192, ppl: 14.529656 +epoch: 1, batch: 42987, sum loss: 3990.330566, avg loss: 2.390851, ppl: 10.922788 +epoch: 1, batch: 42988, sum loss: 3888.048340, avg loss: 2.404483, ppl: 11.072700 +epoch: 1, batch: 42989, sum loss: 4185.373535, avg loss: 2.408155, ppl: 11.113441 +epoch: 1, batch: 42990, sum loss: 4841.067383, avg loss: 2.785424, ppl: 16.206692 +epoch: 1, batch: 42991, sum loss: 4782.009766, avg loss: 2.695609, ppl: 14.814532 +epoch: 1, batch: 42992, sum loss: 5153.613770, avg loss: 2.916590, ppl: 18.478165 +epoch: 1, batch: 42993, sum loss: 4872.089844, avg loss: 2.604003, ppl: 13.517744 +epoch: 1, batch: 42994, sum loss: 4348.237305, avg loss: 2.456631, ppl: 11.665446 +epoch: 1, batch: 42995, sum loss: 4043.298828, avg loss: 2.361740, ppl: 10.609394 +epoch: 1, batch: 42996, sum loss: 3694.201660, avg loss: 2.263604, ppl: 9.617688 +epoch: 1, batch: 42997, sum loss: 5257.947754, avg loss: 2.814747, ppl: 16.688955 +epoch: 1, batch: 42998, sum loss: 5137.101562, avg loss: 2.764855, ppl: 15.876744 +epoch: 1, batch: 42999, sum loss: 4113.803711, avg loss: 2.353435, ppl: 10.521647 +epoch: 1, batch: 43000, sum loss: 4218.949707, avg loss: 2.460029, ppl: 11.705150 +epoch: 1, batch: 43001, sum loss: 3731.806641, avg loss: 2.615141, ppl: 13.669149 +epoch: 1, batch: 43002, sum loss: 4451.238770, avg loss: 2.671812, ppl: 14.466155 +epoch: 1, batch: 43003, sum loss: 4097.166992, avg loss: 2.543245, ppl: 12.720879 +epoch: 1, batch: 43004, sum loss: 4497.643555, avg loss: 2.734130, ppl: 15.396338 +epoch: 1, batch: 43005, sum loss: 4537.302734, avg loss: 2.697564, ppl: 14.843531 +epoch: 1, batch: 43006, sum loss: 4155.013672, avg loss: 2.444126, ppl: 11.520475 +epoch: 1, batch: 43007, sum loss: 4011.018555, avg loss: 2.549916, ppl: 12.806031 +epoch: 1, batch: 43008, sum loss: 4185.846191, avg loss: 2.500506, ppl: 12.188656 +epoch: 1, batch: 43009, sum loss: 4098.373535, avg loss: 2.590628, ppl: 13.338144 +epoch: 1, batch: 43010, sum loss: 4195.047852, avg loss: 2.364739, ppl: 10.641266 +epoch: 1, batch: 43011, sum loss: 4799.454102, avg loss: 3.001535, ppl: 20.116386 +epoch: 1, batch: 43012, sum loss: 4915.252441, avg loss: 2.635524, ppl: 13.950622 +epoch: 1, batch: 43013, sum loss: 4741.070312, avg loss: 2.689206, ppl: 14.719985 +epoch: 1, batch: 43014, sum loss: 4280.790527, avg loss: 2.610238, ppl: 13.602289 +epoch: 1, batch: 43015, sum loss: 4623.854980, avg loss: 2.683607, ppl: 14.637798 +epoch: 1, batch: 43016, sum loss: 4823.049805, avg loss: 2.666142, ppl: 14.384360 +epoch: 1, batch: 43017, sum loss: 3855.124756, avg loss: 2.397466, ppl: 10.995275 +epoch: 1, batch: 43018, sum loss: 4065.536133, avg loss: 2.553729, ppl: 12.854945 +epoch: 1, batch: 43019, sum loss: 5562.345703, avg loss: 3.042859, ppl: 20.965094 +epoch: 1, batch: 43020, sum loss: 4278.977539, avg loss: 2.347218, ppl: 10.456435 +epoch: 1, batch: 43021, sum loss: 4150.454102, avg loss: 2.710943, ppl: 15.043462 +epoch: 1, batch: 43022, sum loss: 3706.640625, avg loss: 2.417900, ppl: 11.222269 +epoch: 1, batch: 43023, sum loss: 4455.951172, avg loss: 2.513227, ppl: 12.344702 +epoch: 1, batch: 43024, sum loss: 4360.100586, avg loss: 2.410227, ppl: 11.136490 +epoch: 1, batch: 43025, sum loss: 5238.152344, avg loss: 2.644196, ppl: 14.072124 +epoch: 1, batch: 43026, sum loss: 5197.719727, avg loss: 2.800496, ppl: 16.452799 +epoch: 1, batch: 43027, sum loss: 4395.295410, avg loss: 2.466496, ppl: 11.781091 +epoch: 1, batch: 43028, sum loss: 4331.192871, avg loss: 2.444240, ppl: 11.521788 +epoch: 1, batch: 43029, sum loss: 4390.601562, avg loss: 2.659359, ppl: 14.287127 +epoch: 1, batch: 43030, sum loss: 4375.289062, avg loss: 2.599696, ppl: 13.459651 +epoch: 1, batch: 43031, sum loss: 3704.531250, avg loss: 2.353578, ppl: 10.523150 +epoch: 1, batch: 43032, sum loss: 4044.803223, avg loss: 2.680453, ppl: 14.591700 +epoch: 1, batch: 43033, sum loss: 4238.104980, avg loss: 2.598470, ppl: 13.443157 +epoch: 1, batch: 43034, sum loss: 4714.826660, avg loss: 2.896085, ppl: 18.103132 +epoch: 1, batch: 43035, sum loss: 4342.089844, avg loss: 2.514239, ppl: 12.357197 +epoch: 1, batch: 43036, sum loss: 4883.974121, avg loss: 2.671758, ppl: 14.465383 +epoch: 1, batch: 43037, sum loss: 4831.616211, avg loss: 2.640228, ppl: 14.016393 +epoch: 1, batch: 43038, sum loss: 4103.801758, avg loss: 2.566480, ppl: 13.019916 +epoch: 1, batch: 43039, sum loss: 4026.811035, avg loss: 2.551845, ppl: 12.830752 +epoch: 1, batch: 43040, sum loss: 5006.966309, avg loss: 2.629709, ppl: 13.869737 +epoch: 1, batch: 43041, sum loss: 4539.698242, avg loss: 2.768109, ppl: 15.928478 +epoch: 1, batch: 43042, sum loss: 3976.469727, avg loss: 2.442549, ppl: 11.502323 +epoch: 1, batch: 43043, sum loss: 4609.164551, avg loss: 2.700155, ppl: 14.882038 +epoch: 1, batch: 43044, sum loss: 4082.474365, avg loss: 2.543598, ppl: 12.725370 +epoch: 1, batch: 43045, sum loss: 3850.999756, avg loss: 2.455995, ppl: 11.658023 +epoch: 1, batch: 43046, sum loss: 4599.492188, avg loss: 2.728050, ppl: 15.303017 +epoch: 1, batch: 43047, sum loss: 4869.007812, avg loss: 2.682649, ppl: 14.623782 +epoch: 1, batch: 43048, sum loss: 3624.936523, avg loss: 2.415015, ppl: 11.189933 +epoch: 1, batch: 43049, sum loss: 4468.117188, avg loss: 2.545936, ppl: 12.755157 +epoch: 1, batch: 43050, sum loss: 3946.895020, avg loss: 2.482324, ppl: 11.969047 +epoch: 1, batch: 43051, sum loss: 5365.436035, avg loss: 2.987437, ppl: 19.834772 +epoch: 1, batch: 43052, sum loss: 3723.554932, avg loss: 2.512520, ppl: 12.335981 +epoch: 1, batch: 43053, sum loss: 3718.161133, avg loss: 2.618423, ppl: 13.714086 +epoch: 1, batch: 43054, sum loss: 5462.083984, avg loss: 2.673560, ppl: 14.491472 +epoch: 1, batch: 43055, sum loss: 3626.939453, avg loss: 2.355155, ppl: 10.539767 +epoch: 1, batch: 43056, sum loss: 4245.727051, avg loss: 2.479981, ppl: 11.941034 +epoch: 1, batch: 43057, sum loss: 4237.254395, avg loss: 2.479377, ppl: 11.933822 +epoch: 1, batch: 43058, sum loss: 4297.924316, avg loss: 2.534153, ppl: 12.605755 +epoch: 1, batch: 43059, sum loss: 5642.584473, avg loss: 2.787838, ppl: 16.245861 +epoch: 1, batch: 43060, sum loss: 4429.166016, avg loss: 2.587130, ppl: 13.291565 +epoch: 1, batch: 43061, sum loss: 4822.385254, avg loss: 2.535429, ppl: 12.621841 +epoch: 1, batch: 43062, sum loss: 4357.954590, avg loss: 2.737409, ppl: 15.446905 +epoch: 1, batch: 43063, sum loss: 4259.389648, avg loss: 2.589294, ppl: 13.320370 +epoch: 1, batch: 43064, sum loss: 4431.378418, avg loss: 2.589935, ppl: 13.328906 +epoch: 1, batch: 43065, sum loss: 4245.280762, avg loss: 2.685187, ppl: 14.660941 +epoch: 1, batch: 43066, sum loss: 4439.265625, avg loss: 2.475887, ppl: 11.892255 +epoch: 1, batch: 43067, sum loss: 4847.017578, avg loss: 2.571362, ppl: 13.083632 +epoch: 1, batch: 43068, sum loss: 5288.764160, avg loss: 2.745984, ppl: 15.579931 +epoch: 1, batch: 43069, sum loss: 5134.348633, avg loss: 2.757437, ppl: 15.759407 +epoch: 1, batch: 43070, sum loss: 3637.998047, avg loss: 2.321633, ppl: 10.192301 +epoch: 1, batch: 43071, sum loss: 5082.410645, avg loss: 2.772728, ppl: 16.002232 +epoch: 1, batch: 43072, sum loss: 4722.704102, avg loss: 2.541821, ppl: 12.702785 +epoch: 1, batch: 43073, sum loss: 4736.446289, avg loss: 2.538288, ppl: 12.657987 +epoch: 1, batch: 43074, sum loss: 4658.065918, avg loss: 2.624262, ppl: 13.794394 +epoch: 1, batch: 43075, sum loss: 4915.025879, avg loss: 2.718488, ppl: 15.157383 +epoch: 1, batch: 43076, sum loss: 4261.255371, avg loss: 2.499270, ppl: 12.173607 +epoch: 1, batch: 43077, sum loss: 4975.709961, avg loss: 2.686668, ppl: 14.682677 +epoch: 1, batch: 43078, sum loss: 3646.028320, avg loss: 2.564014, ppl: 12.987848 +epoch: 1, batch: 43079, sum loss: 4137.141602, avg loss: 2.484770, ppl: 11.998358 +epoch: 1, batch: 43080, sum loss: 4124.665039, avg loss: 2.524275, ppl: 12.481840 +epoch: 1, batch: 43081, sum loss: 4479.572266, avg loss: 2.733113, ppl: 15.380690 +epoch: 1, batch: 43082, sum loss: 4705.555176, avg loss: 2.759856, ppl: 15.797575 +epoch: 1, batch: 43083, sum loss: 4262.860352, avg loss: 2.669293, ppl: 14.429759 +epoch: 1, batch: 43084, sum loss: 3569.003174, avg loss: 2.409860, ppl: 11.132406 +epoch: 1, batch: 43085, sum loss: 5847.369629, avg loss: 2.989453, ppl: 19.874800 +epoch: 1, batch: 43086, sum loss: 4502.783691, avg loss: 2.577438, ppl: 13.163365 +epoch: 1, batch: 43087, sum loss: 4335.407715, avg loss: 2.488753, ppl: 12.046243 +epoch: 1, batch: 43088, sum loss: 4790.329590, avg loss: 2.756231, ppl: 15.740407 +epoch: 1, batch: 43089, sum loss: 4462.628418, avg loss: 2.651591, ppl: 14.176579 +epoch: 1, batch: 43090, sum loss: 4543.710938, avg loss: 2.722415, ppl: 15.217031 +epoch: 1, batch: 43091, sum loss: 5586.072266, avg loss: 3.008116, ppl: 20.249220 +epoch: 1, batch: 43092, sum loss: 4470.781250, avg loss: 2.643868, ppl: 14.067518 +epoch: 1, batch: 43093, sum loss: 4411.814941, avg loss: 2.673827, ppl: 14.495339 +epoch: 1, batch: 43094, sum loss: 4414.104980, avg loss: 2.607268, ppl: 13.561954 +epoch: 1, batch: 43095, sum loss: 4086.083496, avg loss: 2.595987, ppl: 13.409814 +epoch: 1, batch: 43096, sum loss: 4565.075195, avg loss: 2.619091, ppl: 13.723241 +epoch: 1, batch: 43097, sum loss: 5095.162598, avg loss: 2.784242, ppl: 16.187542 +epoch: 1, batch: 43098, sum loss: 3916.021240, avg loss: 2.411343, ppl: 11.148925 +epoch: 1, batch: 43099, sum loss: 5207.866211, avg loss: 2.551625, ppl: 12.827929 +epoch: 1, batch: 43100, sum loss: 4146.598145, avg loss: 2.480023, ppl: 11.941538 +epoch: 1, batch: 43101, sum loss: 4758.168457, avg loss: 2.650790, ppl: 14.165228 +epoch: 1, batch: 43102, sum loss: 3998.221436, avg loss: 2.346374, ppl: 10.447618 +epoch: 1, batch: 43103, sum loss: 4104.238281, avg loss: 2.627553, ppl: 13.839869 +epoch: 1, batch: 43104, sum loss: 4208.882812, avg loss: 2.455591, ppl: 11.653318 +epoch: 1, batch: 43105, sum loss: 4876.265625, avg loss: 2.700036, ppl: 14.880272 +epoch: 1, batch: 43106, sum loss: 4860.225586, avg loss: 2.685208, ppl: 14.661244 +epoch: 1, batch: 43107, sum loss: 4748.173828, avg loss: 2.714793, ppl: 15.101490 +epoch: 1, batch: 43108, sum loss: 4713.253906, avg loss: 2.489833, ppl: 12.059260 +epoch: 1, batch: 43109, sum loss: 4000.851562, avg loss: 2.551564, ppl: 12.827144 +epoch: 1, batch: 43110, sum loss: 4429.907227, avg loss: 2.662204, ppl: 14.327833 +epoch: 1, batch: 43111, sum loss: 3720.095215, avg loss: 2.317816, ppl: 10.153478 +epoch: 1, batch: 43112, sum loss: 4598.056641, avg loss: 2.644081, ppl: 14.070507 +epoch: 1, batch: 43113, sum loss: 4627.131348, avg loss: 2.765769, ppl: 15.891255 +epoch: 1, batch: 43114, sum loss: 5091.988770, avg loss: 2.720080, ppl: 15.181532 +epoch: 1, batch: 43115, sum loss: 4001.346680, avg loss: 2.571560, ppl: 13.086219 +epoch: 1, batch: 43116, sum loss: 3798.770752, avg loss: 2.476382, ppl: 11.898145 +epoch: 1, batch: 43117, sum loss: 3854.712402, avg loss: 2.541010, ppl: 12.692486 +epoch: 1, batch: 43118, sum loss: 3486.006592, avg loss: 2.299477, ppl: 9.968964 +epoch: 1, batch: 43119, sum loss: 3755.217529, avg loss: 2.298175, ppl: 9.955995 +epoch: 1, batch: 43120, sum loss: 4087.479004, avg loss: 2.394540, ppl: 10.963149 +epoch: 1, batch: 43121, sum loss: 3494.506104, avg loss: 2.146502, ppl: 8.554885 +epoch: 1, batch: 43122, sum loss: 4610.646973, avg loss: 2.689992, ppl: 14.731565 +epoch: 1, batch: 43123, sum loss: 4816.235352, avg loss: 2.814866, ppl: 16.690941 +epoch: 1, batch: 43124, sum loss: 4446.989258, avg loss: 2.519541, ppl: 12.422891 +epoch: 1, batch: 43125, sum loss: 4279.903320, avg loss: 2.598605, ppl: 13.444975 +epoch: 1, batch: 43126, sum loss: 3836.992920, avg loss: 2.331101, ppl: 10.289268 +epoch: 1, batch: 43127, sum loss: 4980.732422, avg loss: 2.679254, ppl: 14.574211 +epoch: 1, batch: 43128, sum loss: 4761.024902, avg loss: 2.548728, ppl: 12.790830 +epoch: 1, batch: 43129, sum loss: 4953.141113, avg loss: 2.520683, ppl: 12.437083 +epoch: 1, batch: 43130, sum loss: 4904.914062, avg loss: 2.684682, ppl: 14.653543 +epoch: 1, batch: 43131, sum loss: 4606.960449, avg loss: 2.528518, ppl: 12.534921 +epoch: 1, batch: 43132, sum loss: 4726.217285, avg loss: 2.724045, ppl: 15.241844 +epoch: 1, batch: 43133, sum loss: 4076.521973, avg loss: 2.348227, ppl: 10.466995 +epoch: 1, batch: 43134, sum loss: 5106.325684, avg loss: 2.721922, ppl: 15.209529 +epoch: 1, batch: 43135, sum loss: 4758.460449, avg loss: 2.899732, ppl: 18.169277 +epoch: 1, batch: 43136, sum loss: 4045.350586, avg loss: 2.584888, ppl: 13.261809 +epoch: 1, batch: 43137, sum loss: 4451.573242, avg loss: 2.535065, ppl: 12.617248 +epoch: 1, batch: 43138, sum loss: 4633.189453, avg loss: 2.424484, ppl: 11.296402 +epoch: 1, batch: 43139, sum loss: 3592.202393, avg loss: 2.489399, ppl: 12.054026 +epoch: 1, batch: 43140, sum loss: 4420.312012, avg loss: 2.638992, ppl: 13.999090 +epoch: 1, batch: 43141, sum loss: 3365.187012, avg loss: 2.322420, ppl: 10.200333 +epoch: 1, batch: 43142, sum loss: 3494.027588, avg loss: 2.303248, ppl: 10.006633 +epoch: 1, batch: 43143, sum loss: 4409.318848, avg loss: 2.718446, ppl: 15.156743 +epoch: 1, batch: 43144, sum loss: 5073.227539, avg loss: 2.848528, ppl: 17.262348 +epoch: 1, batch: 43145, sum loss: 4787.315918, avg loss: 2.562803, ppl: 12.972128 +epoch: 1, batch: 43146, sum loss: 4647.793945, avg loss: 2.651337, ppl: 14.172977 +epoch: 1, batch: 43147, sum loss: 4128.391113, avg loss: 2.553118, ppl: 12.847095 +epoch: 1, batch: 43148, sum loss: 3217.176025, avg loss: 2.286550, ppl: 9.840931 +epoch: 1, batch: 43149, sum loss: 5134.754395, avg loss: 2.705350, ppl: 14.959554 +epoch: 1, batch: 43150, sum loss: 4014.543457, avg loss: 2.627319, ppl: 13.836625 +epoch: 1, batch: 43151, sum loss: 5079.482422, avg loss: 2.642811, ppl: 14.052651 +epoch: 1, batch: 43152, sum loss: 4038.094727, avg loss: 2.509692, ppl: 12.301143 +epoch: 1, batch: 43153, sum loss: 4562.189453, avg loss: 2.571696, ppl: 13.088007 +epoch: 1, batch: 43154, sum loss: 4170.423828, avg loss: 2.583906, ppl: 13.248783 +epoch: 1, batch: 43155, sum loss: 3552.721436, avg loss: 2.322040, ppl: 10.196457 +epoch: 1, batch: 43156, sum loss: 5341.019531, avg loss: 2.629749, ppl: 13.870286 +epoch: 1, batch: 43157, sum loss: 4112.751465, avg loss: 2.327533, ppl: 10.252622 +epoch: 1, batch: 43158, sum loss: 4051.785156, avg loss: 2.686860, ppl: 14.685493 +epoch: 1, batch: 43159, sum loss: 4527.251953, avg loss: 2.653723, ppl: 14.206836 +epoch: 1, batch: 43160, sum loss: 3990.581787, avg loss: 2.421469, ppl: 11.262397 +epoch: 1, batch: 43161, sum loss: 3740.313965, avg loss: 2.311690, ppl: 10.091461 +epoch: 1, batch: 43162, sum loss: 4113.161133, avg loss: 2.343682, ppl: 10.419526 +epoch: 1, batch: 43163, sum loss: 4372.792969, avg loss: 2.510214, ppl: 12.307565 +epoch: 1, batch: 43164, sum loss: 5193.481445, avg loss: 2.765432, ppl: 15.885903 +epoch: 1, batch: 43165, sum loss: 4111.907227, avg loss: 2.496604, ppl: 12.141195 +epoch: 1, batch: 43166, sum loss: 3560.215088, avg loss: 2.572410, ppl: 13.097346 +epoch: 1, batch: 43167, sum loss: 3654.895264, avg loss: 2.356477, ppl: 10.553700 +epoch: 1, batch: 43168, sum loss: 5040.855469, avg loss: 2.714515, ppl: 15.097293 +epoch: 1, batch: 43169, sum loss: 4513.833008, avg loss: 2.411236, ppl: 11.147729 +epoch: 1, batch: 43170, sum loss: 4460.888672, avg loss: 2.793293, ppl: 16.334721 +epoch: 1, batch: 43171, sum loss: 3857.785889, avg loss: 2.323967, ppl: 10.216126 +epoch: 1, batch: 43172, sum loss: 4783.790039, avg loss: 3.225752, ppl: 25.172501 +epoch: 1, batch: 43173, sum loss: 4885.948242, avg loss: 2.689019, ppl: 14.717237 +epoch: 1, batch: 43174, sum loss: 5129.435059, avg loss: 2.931106, ppl: 18.748348 +epoch: 1, batch: 43175, sum loss: 3204.766846, avg loss: 2.317257, ppl: 10.147805 +epoch: 1, batch: 43176, sum loss: 4146.128906, avg loss: 2.666321, ppl: 14.386943 +epoch: 1, batch: 43177, sum loss: 4213.206055, avg loss: 2.495975, ppl: 12.133561 +epoch: 1, batch: 43178, sum loss: 4503.021484, avg loss: 2.613477, ppl: 13.646423 +epoch: 1, batch: 43179, sum loss: 4229.279785, avg loss: 2.583555, ppl: 13.244141 +epoch: 1, batch: 43180, sum loss: 4009.322510, avg loss: 2.595031, ppl: 13.396999 +epoch: 1, batch: 43181, sum loss: 4121.764648, avg loss: 2.507156, ppl: 12.269986 +epoch: 1, batch: 43182, sum loss: 4833.281250, avg loss: 2.890719, ppl: 18.006247 +epoch: 1, batch: 43183, sum loss: 3368.020752, avg loss: 2.348690, ppl: 10.471838 +epoch: 1, batch: 43184, sum loss: 5661.283691, avg loss: 3.050261, ppl: 21.120846 +epoch: 1, batch: 43185, sum loss: 4170.591309, avg loss: 2.478070, ppl: 11.918235 +epoch: 1, batch: 43186, sum loss: 5817.038086, avg loss: 2.807451, ppl: 16.567629 +epoch: 1, batch: 43187, sum loss: 4231.493652, avg loss: 2.360008, ppl: 10.591034 +epoch: 1, batch: 43188, sum loss: 4567.737305, avg loss: 2.805735, ppl: 16.539234 +epoch: 1, batch: 43189, sum loss: 4531.366211, avg loss: 2.667078, ppl: 14.397844 +epoch: 1, batch: 43190, sum loss: 3575.121338, avg loss: 2.386596, ppl: 10.876410 +epoch: 1, batch: 43191, sum loss: 5471.428711, avg loss: 2.680759, ppl: 14.596164 +epoch: 1, batch: 43192, sum loss: 4308.581055, avg loss: 2.557021, ppl: 12.897344 +epoch: 1, batch: 43193, sum loss: 4532.359375, avg loss: 2.562103, ppl: 12.963044 +epoch: 1, batch: 43194, sum loss: 4116.319824, avg loss: 2.464862, ppl: 11.761860 +epoch: 1, batch: 43195, sum loss: 5023.059082, avg loss: 2.867043, ppl: 17.584944 +epoch: 1, batch: 43196, sum loss: 5100.028809, avg loss: 2.667379, ppl: 14.402170 +epoch: 1, batch: 43197, sum loss: 4067.088379, avg loss: 2.627318, ppl: 13.836613 +epoch: 1, batch: 43198, sum loss: 4724.592285, avg loss: 2.584569, ppl: 13.257577 +epoch: 1, batch: 43199, sum loss: 4337.768555, avg loss: 2.777061, ppl: 16.071709 +epoch: 1, batch: 43200, sum loss: 4471.284180, avg loss: 2.920499, ppl: 18.550549 +epoch: 1, batch: 43201, sum loss: 4638.122559, avg loss: 2.448850, ppl: 11.575032 +epoch: 1, batch: 43202, sum loss: 4324.403809, avg loss: 2.555794, ppl: 12.881527 +epoch: 1, batch: 43203, sum loss: 4444.927734, avg loss: 2.578264, ppl: 13.174254 +epoch: 1, batch: 43204, sum loss: 4363.987305, avg loss: 2.553533, ppl: 12.852427 +epoch: 1, batch: 43205, sum loss: 4582.574707, avg loss: 2.600780, ppl: 13.474247 +epoch: 1, batch: 43206, sum loss: 4191.836914, avg loss: 2.713163, ppl: 15.076890 +epoch: 1, batch: 43207, sum loss: 4380.832031, avg loss: 2.548477, ppl: 12.787616 +epoch: 1, batch: 43208, sum loss: 5073.977539, avg loss: 2.771151, ppl: 15.977017 +epoch: 1, batch: 43209, sum loss: 4317.378418, avg loss: 2.673299, ppl: 14.487690 +epoch: 1, batch: 43210, sum loss: 4623.366699, avg loss: 2.580004, ppl: 13.197187 +epoch: 1, batch: 43211, sum loss: 3833.475342, avg loss: 2.468432, ppl: 11.803928 +epoch: 1, batch: 43212, sum loss: 4635.702637, avg loss: 2.682698, ppl: 14.624500 +epoch: 1, batch: 43213, sum loss: 3629.960938, avg loss: 2.268726, ppl: 9.667073 +epoch: 1, batch: 43214, sum loss: 3856.465088, avg loss: 2.505825, ppl: 12.253668 +epoch: 1, batch: 43215, sum loss: 5282.769043, avg loss: 2.831066, ppl: 16.963533 +epoch: 1, batch: 43216, sum loss: 4215.607910, avg loss: 2.732086, ppl: 15.364901 +epoch: 1, batch: 43217, sum loss: 4211.460449, avg loss: 2.540085, ppl: 12.680747 +epoch: 1, batch: 43218, sum loss: 4171.904785, avg loss: 2.564170, ppl: 12.989874 +epoch: 1, batch: 43219, sum loss: 5178.065918, avg loss: 2.735375, ppl: 15.415529 +epoch: 1, batch: 43220, sum loss: 3732.882324, avg loss: 2.267851, ppl: 9.658621 +epoch: 1, batch: 43221, sum loss: 4137.490234, avg loss: 2.415347, ppl: 11.193658 +epoch: 1, batch: 43222, sum loss: 5130.062012, avg loss: 2.865956, ppl: 17.565844 +epoch: 1, batch: 43223, sum loss: 5087.414551, avg loss: 2.738113, ppl: 15.457791 +epoch: 1, batch: 43224, sum loss: 3974.908936, avg loss: 2.621972, ppl: 13.762832 +epoch: 1, batch: 43225, sum loss: 3808.525879, avg loss: 2.416577, ppl: 11.207435 +epoch: 1, batch: 43226, sum loss: 4012.869629, avg loss: 2.483211, ppl: 11.979672 +epoch: 1, batch: 43227, sum loss: 4311.541016, avg loss: 2.527281, ppl: 12.519417 +epoch: 1, batch: 43228, sum loss: 4118.111328, avg loss: 2.646601, ppl: 14.106013 +epoch: 1, batch: 43229, sum loss: 3882.746094, avg loss: 2.664891, ppl: 14.366380 +epoch: 1, batch: 43230, sum loss: 4009.176270, avg loss: 2.531046, ppl: 12.566640 +epoch: 1, batch: 43231, sum loss: 4348.762207, avg loss: 2.571711, ppl: 13.088194 +epoch: 1, batch: 43232, sum loss: 3410.926025, avg loss: 2.403754, ppl: 11.064630 +epoch: 1, batch: 43233, sum loss: 3953.898926, avg loss: 2.465024, ppl: 11.763767 +epoch: 1, batch: 43234, sum loss: 5071.966797, avg loss: 2.827183, ppl: 16.897797 +epoch: 1, batch: 43235, sum loss: 4947.749023, avg loss: 2.645855, ppl: 14.095494 +epoch: 1, batch: 43236, sum loss: 4514.367188, avg loss: 2.677561, ppl: 14.549558 +epoch: 1, batch: 43237, sum loss: 4284.026367, avg loss: 2.450816, ppl: 11.597809 +epoch: 1, batch: 43238, sum loss: 5315.664062, avg loss: 2.723189, ppl: 15.228804 +epoch: 1, batch: 43239, sum loss: 4167.738281, avg loss: 2.595105, ppl: 13.397990 +epoch: 1, batch: 43240, sum loss: 4324.796875, avg loss: 2.661413, ppl: 14.316510 +epoch: 1, batch: 43241, sum loss: 4508.687500, avg loss: 2.576393, ppl: 13.149620 +epoch: 1, batch: 43242, sum loss: 3386.932129, avg loss: 2.365176, ppl: 10.645914 +epoch: 1, batch: 43243, sum loss: 4306.594238, avg loss: 2.531801, ppl: 12.576141 +epoch: 1, batch: 43244, sum loss: 4009.678467, avg loss: 2.315057, ppl: 10.125498 +epoch: 1, batch: 43245, sum loss: 4938.706543, avg loss: 2.692861, ppl: 14.773878 +epoch: 1, batch: 43246, sum loss: 3894.428223, avg loss: 2.405453, ppl: 11.083449 +epoch: 1, batch: 43247, sum loss: 4509.690918, avg loss: 2.563781, ppl: 12.984823 +epoch: 1, batch: 43248, sum loss: 4696.181152, avg loss: 2.645736, ppl: 14.093811 +epoch: 1, batch: 43249, sum loss: 3980.138916, avg loss: 2.696571, ppl: 14.828798 +epoch: 1, batch: 43250, sum loss: 4728.558594, avg loss: 2.771722, ppl: 15.986143 +epoch: 1, batch: 43251, sum loss: 2734.041504, avg loss: 2.071244, ppl: 7.934686 +epoch: 1, batch: 43252, sum loss: 3646.536133, avg loss: 2.262119, ppl: 9.603417 +epoch: 1, batch: 43253, sum loss: 4257.710449, avg loss: 2.574190, ppl: 13.120687 +epoch: 1, batch: 43254, sum loss: 4712.236328, avg loss: 2.683506, ppl: 14.636318 +epoch: 1, batch: 43255, sum loss: 4676.600586, avg loss: 2.541631, ppl: 12.700365 +epoch: 1, batch: 43256, sum loss: 3633.091309, avg loss: 2.451479, ppl: 11.605495 +epoch: 1, batch: 43257, sum loss: 4337.916016, avg loss: 2.545725, ppl: 12.752475 +epoch: 1, batch: 43258, sum loss: 4255.299316, avg loss: 2.601039, ppl: 13.477734 +epoch: 1, batch: 43259, sum loss: 4330.506348, avg loss: 2.559401, ppl: 12.928072 +epoch: 1, batch: 43260, sum loss: 4054.119141, avg loss: 2.372217, ppl: 10.721137 +epoch: 1, batch: 43261, sum loss: 4644.021973, avg loss: 2.617825, ppl: 13.705881 +epoch: 1, batch: 43262, sum loss: 4336.159180, avg loss: 2.703341, ppl: 14.929532 +epoch: 1, batch: 43263, sum loss: 4551.895508, avg loss: 2.655715, ppl: 14.235161 +epoch: 1, batch: 43264, sum loss: 3425.785645, avg loss: 2.255290, ppl: 9.538059 +epoch: 1, batch: 43265, sum loss: 3864.593262, avg loss: 2.407846, ppl: 11.110006 +epoch: 1, batch: 43266, sum loss: 3789.008301, avg loss: 2.271588, ppl: 9.694782 +epoch: 1, batch: 43267, sum loss: 4230.490234, avg loss: 2.584295, ppl: 13.253936 +epoch: 1, batch: 43268, sum loss: 4393.954590, avg loss: 2.493731, ppl: 12.106367 +epoch: 1, batch: 43269, sum loss: 4892.082031, avg loss: 2.901591, ppl: 18.203081 +epoch: 1, batch: 43270, sum loss: 3923.353516, avg loss: 2.310573, ppl: 10.080202 +epoch: 1, batch: 43271, sum loss: 4613.736328, avg loss: 2.472527, ppl: 11.852363 +epoch: 1, batch: 43272, sum loss: 4774.491211, avg loss: 2.683806, ppl: 14.640713 +epoch: 1, batch: 43273, sum loss: 5592.437988, avg loss: 2.814513, ppl: 16.685051 +epoch: 1, batch: 43274, sum loss: 3908.248291, avg loss: 2.387446, ppl: 10.885653 +epoch: 1, batch: 43275, sum loss: 4390.738281, avg loss: 2.465322, ppl: 11.767271 +epoch: 1, batch: 43276, sum loss: 4387.257812, avg loss: 2.528679, ppl: 12.536933 +epoch: 1, batch: 43277, sum loss: 4506.041504, avg loss: 2.618269, ppl: 13.711970 +epoch: 1, batch: 43278, sum loss: 4951.158691, avg loss: 2.802014, ppl: 16.477802 +epoch: 1, batch: 43279, sum loss: 4449.770020, avg loss: 2.446273, ppl: 11.545233 +epoch: 1, batch: 43280, sum loss: 5493.780273, avg loss: 2.944148, ppl: 18.994469 +epoch: 1, batch: 43281, sum loss: 4266.419922, avg loss: 2.671522, ppl: 14.461959 +epoch: 1, batch: 43282, sum loss: 3993.935547, avg loss: 2.390147, ppl: 10.915098 +epoch: 1, batch: 43283, sum loss: 3817.210205, avg loss: 2.464306, ppl: 11.755322 +epoch: 1, batch: 43284, sum loss: 5079.149414, avg loss: 3.093270, ppl: 22.049067 +epoch: 1, batch: 43285, sum loss: 5129.009766, avg loss: 2.832143, ppl: 16.981808 +epoch: 1, batch: 43286, sum loss: 5158.032715, avg loss: 2.739263, ppl: 15.475580 +epoch: 1, batch: 43287, sum loss: 5391.799805, avg loss: 2.773560, ppl: 16.015545 +epoch: 1, batch: 43288, sum loss: 3838.575684, avg loss: 2.532042, ppl: 12.579167 +epoch: 1, batch: 43289, sum loss: 4213.066406, avg loss: 2.472457, ppl: 11.851529 +epoch: 1, batch: 43290, sum loss: 4412.657715, avg loss: 2.507192, ppl: 12.270425 +epoch: 1, batch: 43291, sum loss: 3059.186279, avg loss: 2.351411, ppl: 10.500379 +epoch: 1, batch: 43292, sum loss: 4850.191895, avg loss: 2.551390, ppl: 12.824914 +epoch: 1, batch: 43293, sum loss: 5228.601074, avg loss: 2.707717, ppl: 14.994998 +epoch: 1, batch: 43294, sum loss: 4960.871094, avg loss: 2.585133, ppl: 13.265058 +epoch: 1, batch: 43295, sum loss: 4075.348633, avg loss: 2.520315, ppl: 12.432508 +epoch: 1, batch: 43296, sum loss: 4146.940918, avg loss: 2.561421, ppl: 12.954214 +epoch: 1, batch: 43297, sum loss: 3635.718750, avg loss: 2.272324, ppl: 9.701922 +epoch: 1, batch: 43298, sum loss: 4067.739990, avg loss: 2.539164, ppl: 12.669070 +epoch: 1, batch: 43299, sum loss: 4972.027344, avg loss: 2.693406, ppl: 14.781936 +epoch: 1, batch: 43300, sum loss: 4421.651855, avg loss: 2.451026, ppl: 11.600247 +epoch: 1, batch: 43301, sum loss: 4244.199219, avg loss: 2.403284, ppl: 11.059434 +epoch: 1, batch: 43302, sum loss: 3989.699219, avg loss: 2.594083, ppl: 13.384303 +epoch: 1, batch: 43303, sum loss: 5165.011719, avg loss: 2.877444, ppl: 17.768799 +epoch: 1, batch: 43304, sum loss: 4061.301758, avg loss: 2.437756, ppl: 11.447325 +epoch: 1, batch: 43305, sum loss: 4657.513672, avg loss: 2.907312, ppl: 18.307520 +epoch: 1, batch: 43306, sum loss: 4698.587891, avg loss: 2.642626, ppl: 14.050045 +epoch: 1, batch: 43307, sum loss: 4723.708984, avg loss: 2.667255, ppl: 14.400388 +epoch: 1, batch: 43308, sum loss: 3509.326660, avg loss: 2.330230, ppl: 10.280308 +epoch: 1, batch: 43309, sum loss: 4813.188965, avg loss: 2.730113, ppl: 15.334617 +epoch: 1, batch: 43310, sum loss: 3278.642090, avg loss: 2.514296, ppl: 12.357906 +epoch: 1, batch: 43311, sum loss: 4834.287598, avg loss: 2.621631, ppl: 13.758147 +epoch: 1, batch: 43312, sum loss: 4448.496582, avg loss: 2.829833, ppl: 16.942623 +epoch: 1, batch: 43313, sum loss: 4368.814941, avg loss: 2.361522, ppl: 10.607080 +epoch: 1, batch: 43314, sum loss: 4689.539062, avg loss: 2.649457, ppl: 14.146358 +epoch: 1, batch: 43315, sum loss: 5126.181641, avg loss: 2.767917, ppl: 15.925422 +epoch: 1, batch: 43316, sum loss: 4455.170898, avg loss: 2.480608, ppl: 11.948521 +epoch: 1, batch: 43317, sum loss: 4148.395020, avg loss: 2.578244, ppl: 13.173987 +epoch: 1, batch: 43318, sum loss: 3784.429688, avg loss: 2.586760, ppl: 13.286654 +epoch: 1, batch: 43319, sum loss: 4852.189941, avg loss: 2.521928, ppl: 12.452586 +epoch: 1, batch: 43320, sum loss: 4413.949707, avg loss: 2.444047, ppl: 11.519571 +epoch: 1, batch: 43321, sum loss: 3900.284668, avg loss: 2.400175, ppl: 11.025106 +epoch: 1, batch: 43322, sum loss: 3698.835449, avg loss: 2.330709, ppl: 10.285233 +epoch: 1, batch: 43323, sum loss: 4759.673828, avg loss: 2.754441, ppl: 15.712252 +epoch: 1, batch: 43324, sum loss: 4457.106445, avg loss: 2.756405, ppl: 15.743142 +epoch: 1, batch: 43325, sum loss: 4402.255859, avg loss: 2.466250, ppl: 11.778195 +epoch: 1, batch: 43326, sum loss: 5481.550781, avg loss: 2.921936, ppl: 18.577209 +epoch: 1, batch: 43327, sum loss: 4767.389160, avg loss: 2.736733, ppl: 15.436475 +epoch: 1, batch: 43328, sum loss: 4220.564453, avg loss: 2.595673, ppl: 13.405607 +epoch: 1, batch: 43329, sum loss: 4483.013184, avg loss: 2.609437, ppl: 13.591400 +epoch: 1, batch: 43330, sum loss: 4700.493652, avg loss: 2.672253, ppl: 14.472545 +epoch: 1, batch: 43331, sum loss: 3883.750000, avg loss: 2.339608, ppl: 10.377172 +epoch: 1, batch: 43332, sum loss: 5403.580078, avg loss: 2.826140, ppl: 16.880180 +epoch: 1, batch: 43333, sum loss: 4695.435059, avg loss: 2.564410, ppl: 12.992996 +epoch: 1, batch: 43334, sum loss: 3653.844238, avg loss: 2.333234, ppl: 10.311233 +epoch: 1, batch: 43335, sum loss: 5352.162598, avg loss: 2.674744, ppl: 14.508634 +epoch: 1, batch: 43336, sum loss: 4759.248047, avg loss: 2.555987, ppl: 12.884011 +epoch: 1, batch: 43337, sum loss: 5419.694824, avg loss: 2.863019, ppl: 17.514318 +epoch: 1, batch: 43338, sum loss: 3998.068359, avg loss: 2.519262, ppl: 12.419426 +epoch: 1, batch: 43339, sum loss: 4631.314453, avg loss: 2.641937, ppl: 14.040367 +epoch: 1, batch: 43340, sum loss: 5192.861328, avg loss: 2.909166, ppl: 18.341497 +epoch: 1, batch: 43341, sum loss: 4884.538574, avg loss: 2.404992, ppl: 11.078341 +epoch: 1, batch: 43342, sum loss: 4080.088623, avg loss: 2.348928, ppl: 10.474339 +epoch: 1, batch: 43343, sum loss: 5005.930664, avg loss: 2.600483, ppl: 13.470248 +epoch: 1, batch: 43344, sum loss: 4548.088379, avg loss: 2.565194, ppl: 13.003180 +epoch: 1, batch: 43345, sum loss: 4707.475586, avg loss: 2.733726, ppl: 15.390117 +epoch: 1, batch: 43346, sum loss: 4057.841797, avg loss: 2.415382, ppl: 11.194047 +epoch: 1, batch: 43347, sum loss: 4595.737793, avg loss: 2.661111, ppl: 14.312176 +epoch: 1, batch: 43348, sum loss: 4629.680664, avg loss: 2.696378, ppl: 14.825932 +epoch: 1, batch: 43349, sum loss: 4710.820801, avg loss: 2.636162, ppl: 13.959517 +epoch: 1, batch: 43350, sum loss: 4139.081543, avg loss: 2.519222, ppl: 12.418928 +epoch: 1, batch: 43351, sum loss: 4282.679688, avg loss: 2.583040, ppl: 13.237315 +epoch: 1, batch: 43352, sum loss: 3664.408203, avg loss: 2.410795, ppl: 11.142816 +epoch: 1, batch: 43353, sum loss: 4592.021484, avg loss: 2.610586, ppl: 13.607028 +epoch: 1, batch: 43354, sum loss: 4150.064453, avg loss: 2.636636, ppl: 13.966136 +epoch: 1, batch: 43355, sum loss: 3612.806396, avg loss: 2.521149, ppl: 12.442887 +epoch: 1, batch: 43356, sum loss: 4411.065430, avg loss: 2.520609, ppl: 12.436167 +epoch: 1, batch: 43357, sum loss: 4142.822266, avg loss: 2.515375, ppl: 12.371243 +epoch: 1, batch: 43358, sum loss: 4404.073242, avg loss: 2.589108, ppl: 13.317890 +epoch: 1, batch: 43359, sum loss: 4205.291992, avg loss: 2.480998, ppl: 11.953191 +epoch: 1, batch: 43360, sum loss: 3870.196289, avg loss: 2.509855, ppl: 12.303146 +epoch: 1, batch: 43361, sum loss: 4687.910645, avg loss: 2.521738, ppl: 12.450214 +epoch: 1, batch: 43362, sum loss: 4103.799805, avg loss: 2.566479, ppl: 13.019900 +epoch: 1, batch: 43363, sum loss: 3936.747070, avg loss: 2.528418, ppl: 12.533666 +epoch: 1, batch: 43364, sum loss: 4119.199219, avg loss: 2.355174, ppl: 10.539961 +epoch: 1, batch: 43365, sum loss: 3375.565430, avg loss: 2.279248, ppl: 9.769326 +epoch: 1, batch: 43366, sum loss: 3880.068359, avg loss: 2.389205, ppl: 10.904816 +epoch: 1, batch: 43367, sum loss: 4551.399902, avg loss: 2.580159, ppl: 13.199233 +epoch: 1, batch: 43368, sum loss: 4131.315430, avg loss: 2.712617, ppl: 15.068658 +epoch: 1, batch: 43369, sum loss: 3394.792725, avg loss: 2.468940, ppl: 11.809922 +epoch: 1, batch: 43370, sum loss: 4418.396484, avg loss: 2.660082, ppl: 14.297462 +epoch: 1, batch: 43371, sum loss: 5180.770508, avg loss: 2.795883, ppl: 16.377079 +epoch: 1, batch: 43372, sum loss: 4305.534668, avg loss: 2.451899, ppl: 11.610374 +epoch: 1, batch: 43373, sum loss: 4978.614258, avg loss: 2.704299, ppl: 14.943837 +epoch: 1, batch: 43374, sum loss: 4673.712402, avg loss: 2.658539, ppl: 14.275422 +epoch: 1, batch: 43375, sum loss: 3761.133789, avg loss: 2.415629, ppl: 11.196807 +epoch: 1, batch: 43376, sum loss: 3995.817139, avg loss: 2.660331, ppl: 14.301019 +epoch: 1, batch: 43377, sum loss: 3945.718994, avg loss: 2.667829, ppl: 14.408651 +epoch: 1, batch: 43378, sum loss: 3755.036133, avg loss: 2.391743, ppl: 10.932529 +epoch: 1, batch: 43379, sum loss: 4847.381836, avg loss: 2.615964, ppl: 13.680400 +epoch: 1, batch: 43380, sum loss: 6281.037109, avg loss: 2.897157, ppl: 18.122557 +epoch: 1, batch: 43381, sum loss: 4026.026367, avg loss: 2.460896, ppl: 11.715309 +epoch: 1, batch: 43382, sum loss: 4685.268066, avg loss: 2.581415, ppl: 13.215824 +epoch: 1, batch: 43383, sum loss: 4268.637207, avg loss: 2.502132, ppl: 12.208494 +epoch: 1, batch: 43384, sum loss: 4386.553711, avg loss: 2.601752, ppl: 13.487348 +epoch: 1, batch: 43385, sum loss: 4965.193359, avg loss: 2.737152, ppl: 15.442939 +epoch: 1, batch: 43386, sum loss: 4124.956543, avg loss: 2.446594, ppl: 11.548939 +epoch: 1, batch: 43387, sum loss: 5818.051758, avg loss: 2.762608, ppl: 15.841096 +epoch: 1, batch: 43388, sum loss: 3832.482422, avg loss: 2.516403, ppl: 12.383977 +epoch: 1, batch: 43389, sum loss: 4885.386719, avg loss: 2.605540, ppl: 13.538528 +epoch: 1, batch: 43390, sum loss: 4606.676758, avg loss: 2.645995, ppl: 14.097461 +epoch: 1, batch: 43391, sum loss: 4297.634277, avg loss: 2.511768, ppl: 12.326700 +epoch: 1, batch: 43392, sum loss: 4014.331543, avg loss: 2.413909, ppl: 11.177574 +epoch: 1, batch: 43393, sum loss: 4601.584473, avg loss: 2.598297, ppl: 13.440830 +epoch: 1, batch: 43394, sum loss: 4546.437012, avg loss: 2.704602, ppl: 14.948373 +epoch: 1, batch: 43395, sum loss: 4333.155273, avg loss: 2.511974, ppl: 12.329246 +epoch: 1, batch: 43396, sum loss: 4575.951660, avg loss: 2.573651, ppl: 13.113619 +epoch: 1, batch: 43397, sum loss: 4235.341797, avg loss: 2.620880, ppl: 13.747815 +epoch: 1, batch: 43398, sum loss: 4893.041016, avg loss: 2.707825, ppl: 14.996629 +epoch: 1, batch: 43399, sum loss: 3913.650635, avg loss: 2.453700, ppl: 11.631298 +epoch: 1, batch: 43400, sum loss: 4914.117188, avg loss: 2.601439, ppl: 13.483120 +epoch: 1, batch: 43401, sum loss: 4446.098145, avg loss: 2.683222, ppl: 14.632156 +epoch: 1, batch: 43402, sum loss: 4369.640625, avg loss: 2.621260, ppl: 13.753040 +epoch: 1, batch: 43403, sum loss: 4246.561523, avg loss: 2.520215, ppl: 12.431264 +epoch: 1, batch: 43404, sum loss: 4032.704346, avg loss: 2.555579, ppl: 12.878759 +epoch: 1, batch: 43405, sum loss: 5317.633789, avg loss: 2.720017, ppl: 15.180587 +epoch: 1, batch: 43406, sum loss: 4139.576172, avg loss: 2.525672, ppl: 12.499289 +epoch: 1, batch: 43407, sum loss: 4663.355957, avg loss: 2.667824, ppl: 14.408583 +epoch: 1, batch: 43408, sum loss: 5087.543945, avg loss: 2.871074, ppl: 17.655979 +epoch: 1, batch: 43409, sum loss: 4990.990723, avg loss: 2.599474, ppl: 13.456664 +epoch: 1, batch: 43410, sum loss: 4200.396973, avg loss: 2.375790, ppl: 10.759514 +epoch: 1, batch: 43411, sum loss: 4126.540527, avg loss: 2.537848, ppl: 12.652411 +epoch: 1, batch: 43412, sum loss: 2993.007812, avg loss: 2.371639, ppl: 10.714934 +epoch: 1, batch: 43413, sum loss: 3902.576904, avg loss: 2.353786, ppl: 10.525341 +epoch: 1, batch: 43414, sum loss: 5254.307129, avg loss: 2.700055, ppl: 14.880552 +epoch: 1, batch: 43415, sum loss: 3674.979492, avg loss: 2.314219, ppl: 10.117019 +epoch: 1, batch: 43416, sum loss: 4469.155273, avg loss: 2.751943, ppl: 15.673053 +epoch: 1, batch: 43417, sum loss: 4436.849609, avg loss: 2.441854, ppl: 11.494337 +epoch: 1, batch: 43418, sum loss: 3928.912598, avg loss: 2.349828, ppl: 10.483768 +epoch: 1, batch: 43419, sum loss: 3610.902100, avg loss: 2.413705, ppl: 11.175285 +epoch: 1, batch: 43420, sum loss: 4004.006836, avg loss: 2.476195, ppl: 11.895912 +epoch: 1, batch: 43421, sum loss: 4020.624268, avg loss: 2.394654, ppl: 10.964404 +epoch: 1, batch: 43422, sum loss: 3407.982422, avg loss: 2.312064, ppl: 10.095242 +epoch: 1, batch: 43423, sum loss: 4110.951660, avg loss: 2.325199, ppl: 10.228714 +epoch: 1, batch: 43424, sum loss: 3894.215332, avg loss: 2.429330, ppl: 11.351270 +epoch: 1, batch: 43425, sum loss: 4147.023926, avg loss: 2.489210, ppl: 12.051750 +epoch: 1, batch: 43426, sum loss: 4985.024902, avg loss: 2.670072, ppl: 14.441013 +epoch: 1, batch: 43427, sum loss: 4236.824219, avg loss: 2.490784, ppl: 12.070741 +epoch: 1, batch: 43428, sum loss: 4072.833496, avg loss: 2.425750, ppl: 11.310704 +epoch: 1, batch: 43429, sum loss: 4521.983887, avg loss: 2.829777, ppl: 16.941687 +epoch: 1, batch: 43430, sum loss: 4036.715088, avg loss: 2.522947, ppl: 12.465276 +epoch: 1, batch: 43431, sum loss: 5086.025879, avg loss: 2.712547, ppl: 15.067605 +epoch: 1, batch: 43432, sum loss: 5039.325195, avg loss: 2.704952, ppl: 14.953595 +epoch: 1, batch: 43433, sum loss: 4440.853027, avg loss: 2.472635, ppl: 11.853643 +epoch: 1, batch: 43434, sum loss: 4899.617188, avg loss: 2.738746, ppl: 15.467583 +epoch: 1, batch: 43435, sum loss: 3320.364502, avg loss: 2.353199, ppl: 10.519172 +epoch: 1, batch: 43436, sum loss: 3708.657227, avg loss: 2.283656, ppl: 9.812489 +epoch: 1, batch: 43437, sum loss: 5073.362305, avg loss: 2.697162, ppl: 14.837568 +epoch: 1, batch: 43438, sum loss: 3950.664062, avg loss: 2.481573, ppl: 11.960061 +epoch: 1, batch: 43439, sum loss: 4873.327148, avg loss: 2.551480, ppl: 12.826077 +epoch: 1, batch: 43440, sum loss: 4060.271484, avg loss: 2.697855, ppl: 14.847849 +epoch: 1, batch: 43441, sum loss: 3901.336914, avg loss: 2.445979, ppl: 11.541847 +epoch: 1, batch: 43442, sum loss: 4833.142578, avg loss: 2.864933, ppl: 17.547886 +epoch: 1, batch: 43443, sum loss: 3946.075928, avg loss: 2.484935, ppl: 12.000335 +epoch: 1, batch: 43444, sum loss: 4477.193359, avg loss: 2.523784, ppl: 12.475718 +epoch: 1, batch: 43445, sum loss: 5318.192383, avg loss: 2.868496, ppl: 17.610519 +epoch: 1, batch: 43446, sum loss: 4160.971191, avg loss: 2.578049, ppl: 13.171418 +epoch: 1, batch: 43447, sum loss: 4002.345703, avg loss: 2.550889, ppl: 12.818501 +epoch: 1, batch: 43448, sum loss: 4155.401367, avg loss: 2.400578, ppl: 11.029555 +epoch: 1, batch: 43449, sum loss: 4756.366211, avg loss: 2.841318, ppl: 17.138342 +epoch: 1, batch: 43450, sum loss: 4259.511230, avg loss: 2.644017, ppl: 14.069608 +epoch: 1, batch: 43451, sum loss: 4611.402832, avg loss: 2.544924, ppl: 12.742263 +epoch: 1, batch: 43452, sum loss: 4389.812012, avg loss: 2.681620, ppl: 14.608742 +epoch: 1, batch: 43453, sum loss: 3760.052246, avg loss: 2.520142, ppl: 12.430366 +epoch: 1, batch: 43454, sum loss: 3989.493652, avg loss: 2.510695, ppl: 12.313478 +epoch: 1, batch: 43455, sum loss: 4834.321289, avg loss: 2.504830, ppl: 12.241476 +epoch: 1, batch: 43456, sum loss: 3666.094727, avg loss: 2.318846, ppl: 10.163934 +epoch: 1, batch: 43457, sum loss: 4589.709961, avg loss: 2.579938, ppl: 13.196322 +epoch: 1, batch: 43458, sum loss: 5144.582520, avg loss: 2.735025, ppl: 15.410131 +epoch: 1, batch: 43459, sum loss: 4263.630859, avg loss: 2.599775, ppl: 13.460707 +epoch: 1, batch: 43460, sum loss: 5401.212891, avg loss: 2.782696, ppl: 16.162533 +epoch: 1, batch: 43461, sum loss: 4623.654297, avg loss: 2.795438, ppl: 16.369799 +epoch: 1, batch: 43462, sum loss: 4423.106934, avg loss: 2.453193, ppl: 11.625407 +epoch: 1, batch: 43463, sum loss: 4441.299805, avg loss: 2.654692, ppl: 14.220608 +epoch: 1, batch: 43464, sum loss: 4904.310059, avg loss: 2.465716, ppl: 11.771912 +epoch: 1, batch: 43465, sum loss: 4214.384277, avg loss: 2.525095, ppl: 12.492087 +epoch: 1, batch: 43466, sum loss: 5019.453613, avg loss: 2.636268, ppl: 13.961002 +epoch: 1, batch: 43467, sum loss: 4759.935059, avg loss: 2.898864, ppl: 18.153517 +epoch: 1, batch: 43468, sum loss: 4170.839355, avg loss: 2.571418, ppl: 13.084359 +epoch: 1, batch: 43469, sum loss: 3844.614746, avg loss: 2.428689, ppl: 11.344001 +epoch: 1, batch: 43470, sum loss: 5016.880371, avg loss: 2.748976, ppl: 15.626615 +epoch: 1, batch: 43471, sum loss: 4949.204102, avg loss: 2.613096, ppl: 13.641222 +epoch: 1, batch: 43472, sum loss: 4275.843750, avg loss: 2.555794, ppl: 12.881527 +epoch: 1, batch: 43473, sum loss: 3549.257812, avg loss: 2.417751, ppl: 11.220592 +epoch: 1, batch: 43474, sum loss: 4756.145020, avg loss: 2.546116, ppl: 12.757459 +epoch: 1, batch: 43475, sum loss: 4415.545410, avg loss: 2.676088, ppl: 14.528150 +epoch: 1, batch: 43476, sum loss: 4761.232422, avg loss: 2.769769, ppl: 15.954947 +epoch: 1, batch: 43477, sum loss: 3749.603271, avg loss: 2.504745, ppl: 12.240438 +epoch: 1, batch: 43478, sum loss: 5233.839355, avg loss: 2.756103, ppl: 15.738388 +epoch: 1, batch: 43479, sum loss: 3458.361816, avg loss: 2.281241, ppl: 9.788825 +epoch: 1, batch: 43480, sum loss: 4468.098633, avg loss: 2.706299, ppl: 14.973749 +epoch: 1, batch: 43481, sum loss: 4286.314941, avg loss: 2.675602, ppl: 14.521091 +epoch: 1, batch: 43482, sum loss: 3315.176270, avg loss: 2.329709, ppl: 10.274954 +epoch: 1, batch: 43483, sum loss: 4155.458008, avg loss: 2.374547, ppl: 10.746149 +epoch: 1, batch: 43484, sum loss: 5222.024414, avg loss: 2.806031, ppl: 16.544132 +epoch: 1, batch: 43485, sum loss: 5290.594727, avg loss: 2.894198, ppl: 18.069012 +epoch: 1, batch: 43486, sum loss: 4053.383789, avg loss: 2.283596, ppl: 9.811903 +epoch: 1, batch: 43487, sum loss: 4185.135742, avg loss: 2.436051, ppl: 11.427825 +epoch: 1, batch: 43488, sum loss: 4600.292969, avg loss: 2.657593, ppl: 14.261912 +epoch: 1, batch: 43489, sum loss: 4585.655273, avg loss: 2.762443, ppl: 15.838490 +epoch: 1, batch: 43490, sum loss: 4698.081543, avg loss: 2.643827, ppl: 14.066941 +epoch: 1, batch: 43491, sum loss: 4118.687500, avg loss: 2.460387, ppl: 11.709342 +epoch: 1, batch: 43492, sum loss: 4886.800293, avg loss: 2.704372, ppl: 14.944927 +epoch: 1, batch: 43493, sum loss: 5177.862305, avg loss: 2.572212, ppl: 13.094758 +epoch: 1, batch: 43494, sum loss: 3811.833008, avg loss: 2.722738, ppl: 15.221940 +epoch: 1, batch: 43495, sum loss: 4297.451172, avg loss: 2.564112, ppl: 12.989115 +epoch: 1, batch: 43496, sum loss: 5341.816895, avg loss: 2.802632, ppl: 16.487988 +epoch: 1, batch: 43497, sum loss: 3870.896240, avg loss: 2.526695, ppl: 12.512082 +epoch: 1, batch: 43498, sum loss: 4640.505859, avg loss: 2.660841, ppl: 14.308311 +epoch: 1, batch: 43499, sum loss: 4279.964355, avg loss: 2.688420, ppl: 14.708416 +epoch: 1, batch: 43500, sum loss: 3946.453125, avg loss: 2.603201, ppl: 13.506909 +epoch: 1, batch: 43501, sum loss: 3758.107910, avg loss: 2.353230, ppl: 10.519490 +epoch: 1, batch: 43502, sum loss: 5363.363281, avg loss: 2.878885, ppl: 17.794426 +epoch: 1, batch: 43503, sum loss: 4636.519043, avg loss: 2.634386, ppl: 13.934752 +epoch: 1, batch: 43504, sum loss: 4303.180176, avg loss: 2.507681, ppl: 12.276426 +epoch: 1, batch: 43505, sum loss: 3699.302490, avg loss: 2.289172, ppl: 9.866769 +epoch: 1, batch: 43506, sum loss: 4479.501465, avg loss: 2.540840, ppl: 12.690331 +epoch: 1, batch: 43507, sum loss: 4381.206543, avg loss: 2.439425, ppl: 11.466443 +epoch: 1, batch: 43508, sum loss: 4333.692871, avg loss: 2.537291, ppl: 12.645366 +epoch: 1, batch: 43509, sum loss: 4521.113770, avg loss: 2.530002, ppl: 12.553533 +epoch: 1, batch: 43510, sum loss: 4839.559570, avg loss: 2.818614, ppl: 16.753607 +epoch: 1, batch: 43511, sum loss: 4175.398926, avg loss: 2.596641, ppl: 13.418590 +epoch: 1, batch: 43512, sum loss: 4664.724121, avg loss: 2.414454, ppl: 11.183657 +epoch: 1, batch: 43513, sum loss: 4248.358887, avg loss: 2.599975, ppl: 13.463400 +epoch: 1, batch: 43514, sum loss: 5259.895996, avg loss: 2.704317, ppl: 14.944104 +epoch: 1, batch: 43515, sum loss: 4243.841797, avg loss: 2.579843, ppl: 13.195064 +epoch: 1, batch: 43516, sum loss: 4444.486816, avg loss: 2.520979, ppl: 12.440775 +epoch: 1, batch: 43517, sum loss: 5015.459473, avg loss: 2.515275, ppl: 12.370016 +epoch: 1, batch: 43518, sum loss: 4281.778320, avg loss: 2.577832, ppl: 13.168551 +epoch: 1, batch: 43519, sum loss: 3674.688965, avg loss: 2.471210, ppl: 11.836758 +epoch: 1, batch: 43520, sum loss: 5459.460938, avg loss: 2.710755, ppl: 15.040628 +epoch: 1, batch: 43521, sum loss: 4559.335449, avg loss: 2.608316, ppl: 13.576165 +epoch: 1, batch: 43522, sum loss: 3479.534912, avg loss: 2.373489, ppl: 10.734782 +epoch: 1, batch: 43523, sum loss: 4259.176270, avg loss: 2.614596, ppl: 13.661691 +epoch: 1, batch: 43524, sum loss: 4243.564941, avg loss: 2.468624, ppl: 11.806194 +epoch: 1, batch: 43525, sum loss: 4477.676270, avg loss: 2.586757, ppl: 13.286613 +epoch: 1, batch: 43526, sum loss: 4111.084961, avg loss: 2.648895, ppl: 14.138408 +epoch: 1, batch: 43527, sum loss: 4993.596680, avg loss: 2.694871, ppl: 14.803616 +epoch: 1, batch: 43528, sum loss: 5446.138672, avg loss: 2.835054, ppl: 17.031322 +epoch: 1, batch: 43529, sum loss: 3685.134521, avg loss: 2.539721, ppl: 12.676128 +epoch: 1, batch: 43530, sum loss: 4400.481445, avg loss: 2.679952, ppl: 14.584399 +epoch: 1, batch: 43531, sum loss: 3653.186523, avg loss: 2.704061, ppl: 14.940282 +epoch: 1, batch: 43532, sum loss: 4614.058105, avg loss: 2.782906, ppl: 16.165932 +epoch: 1, batch: 43533, sum loss: 4036.292236, avg loss: 2.465664, ppl: 11.771297 +epoch: 1, batch: 43534, sum loss: 4694.794922, avg loss: 2.655427, ppl: 14.231065 +epoch: 1, batch: 43535, sum loss: 4177.885254, avg loss: 2.506230, ppl: 12.258626 +epoch: 1, batch: 43536, sum loss: 4695.073242, avg loss: 2.740848, ppl: 15.500129 +epoch: 1, batch: 43537, sum loss: 3360.010986, avg loss: 2.242998, ppl: 9.421534 +epoch: 1, batch: 43538, sum loss: 4866.835449, avg loss: 2.748072, ppl: 15.612500 +epoch: 1, batch: 43539, sum loss: 4243.386719, avg loss: 2.539430, ppl: 12.672441 +epoch: 1, batch: 43540, sum loss: 4603.997559, avg loss: 2.706642, ppl: 14.978887 +epoch: 1, batch: 43541, sum loss: 3951.143066, avg loss: 2.431473, ppl: 11.375621 +epoch: 1, batch: 43542, sum loss: 4290.211426, avg loss: 2.562850, ppl: 12.972743 +epoch: 1, batch: 43543, sum loss: 4482.708008, avg loss: 2.603199, ppl: 13.506871 +epoch: 1, batch: 43544, sum loss: 3955.911865, avg loss: 2.390279, ppl: 10.916540 +epoch: 1, batch: 43545, sum loss: 4551.291504, avg loss: 2.615685, ppl: 13.676579 +epoch: 1, batch: 43546, sum loss: 4443.005859, avg loss: 2.530186, ppl: 12.555838 +epoch: 1, batch: 43547, sum loss: 3677.265381, avg loss: 2.348190, ppl: 10.466606 +epoch: 1, batch: 43548, sum loss: 5637.473633, avg loss: 2.939246, ppl: 18.901588 +epoch: 1, batch: 43549, sum loss: 3939.716064, avg loss: 2.462322, ppl: 11.732027 +epoch: 1, batch: 43550, sum loss: 4269.087891, avg loss: 2.584194, ppl: 13.252600 +epoch: 1, batch: 43551, sum loss: 5099.048340, avg loss: 2.628376, ppl: 13.851251 +epoch: 1, batch: 43552, sum loss: 4771.179199, avg loss: 2.764298, ppl: 15.867904 +epoch: 1, batch: 43553, sum loss: 5143.559570, avg loss: 2.891264, ppl: 18.016075 +epoch: 1, batch: 43554, sum loss: 3798.632324, avg loss: 2.414896, ppl: 11.188612 +epoch: 1, batch: 43555, sum loss: 5353.956055, avg loss: 2.650473, ppl: 14.160740 +epoch: 1, batch: 43556, sum loss: 4915.418457, avg loss: 2.783363, ppl: 16.173315 +epoch: 1, batch: 43557, sum loss: 3749.970459, avg loss: 2.672823, ppl: 14.480790 +epoch: 1, batch: 43558, sum loss: 3715.071777, avg loss: 2.421820, ppl: 11.266345 +epoch: 1, batch: 43559, sum loss: 4292.601562, avg loss: 2.582793, ppl: 13.234046 +epoch: 1, batch: 43560, sum loss: 5202.554688, avg loss: 2.681729, ppl: 14.610337 +epoch: 1, batch: 43561, sum loss: 3993.880859, avg loss: 2.237468, ppl: 9.369582 +epoch: 1, batch: 43562, sum loss: 4105.480469, avg loss: 2.455431, ppl: 11.651454 +epoch: 1, batch: 43563, sum loss: 4451.493652, avg loss: 2.756343, ppl: 15.742167 +epoch: 1, batch: 43564, sum loss: 4201.537598, avg loss: 2.490538, ppl: 12.067768 +epoch: 1, batch: 43565, sum loss: 4079.443359, avg loss: 2.438400, ppl: 11.454701 +epoch: 1, batch: 43566, sum loss: 5061.910645, avg loss: 2.444187, ppl: 11.521175 +epoch: 1, batch: 43567, sum loss: 4174.899902, avg loss: 2.583478, ppl: 13.243114 +epoch: 1, batch: 43568, sum loss: 3780.539795, avg loss: 2.440633, ppl: 11.480301 +epoch: 1, batch: 43569, sum loss: 4393.645508, avg loss: 2.533821, ppl: 12.601563 +epoch: 1, batch: 43570, sum loss: 4715.221680, avg loss: 2.679103, ppl: 14.572019 +epoch: 1, batch: 43571, sum loss: 4357.229492, avg loss: 2.673147, ppl: 14.485483 +epoch: 1, batch: 43572, sum loss: 4244.441895, avg loss: 2.676193, ppl: 14.529670 +epoch: 1, batch: 43573, sum loss: 3170.406006, avg loss: 2.172999, ppl: 8.784593 +epoch: 1, batch: 43574, sum loss: 4483.980957, avg loss: 2.366217, ppl: 10.657000 +epoch: 1, batch: 43575, sum loss: 3876.834961, avg loss: 2.478795, ppl: 11.926882 +epoch: 1, batch: 43576, sum loss: 4833.047363, avg loss: 2.768069, ppl: 15.927840 +epoch: 1, batch: 43577, sum loss: 4909.625488, avg loss: 2.538586, ppl: 12.661757 +epoch: 1, batch: 43578, sum loss: 4012.988770, avg loss: 2.410203, ppl: 11.136227 +epoch: 1, batch: 43579, sum loss: 4622.836426, avg loss: 2.469464, ppl: 11.816113 +epoch: 1, batch: 43580, sum loss: 4462.913086, avg loss: 2.576740, ppl: 13.154180 +epoch: 1, batch: 43581, sum loss: 6392.211914, avg loss: 2.980052, ppl: 19.688845 +epoch: 1, batch: 43582, sum loss: 5422.121094, avg loss: 2.716494, ppl: 15.127188 +epoch: 1, batch: 43583, sum loss: 3914.260254, avg loss: 2.134275, ppl: 8.450917 +epoch: 1, batch: 43584, sum loss: 3398.169678, avg loss: 2.491327, ppl: 12.077290 +epoch: 1, batch: 43585, sum loss: 4437.632324, avg loss: 2.678112, ppl: 14.557587 +epoch: 1, batch: 43586, sum loss: 4097.877930, avg loss: 2.647208, ppl: 14.114575 +epoch: 1, batch: 43587, sum loss: 4317.810059, avg loss: 2.591723, ppl: 13.352755 +epoch: 1, batch: 43588, sum loss: 3867.191895, avg loss: 2.228929, ppl: 9.289911 +epoch: 1, batch: 43589, sum loss: 5166.841797, avg loss: 2.755649, ppl: 15.731249 +epoch: 1, batch: 43590, sum loss: 4023.450928, avg loss: 2.318992, ppl: 10.165421 +epoch: 1, batch: 43591, sum loss: 4286.316406, avg loss: 2.636111, ppl: 13.958809 +epoch: 1, batch: 43592, sum loss: 3893.935791, avg loss: 2.497714, ppl: 12.154675 +epoch: 1, batch: 43593, sum loss: 4489.994629, avg loss: 2.716270, ppl: 15.123808 +epoch: 1, batch: 43594, sum loss: 5057.338867, avg loss: 2.619026, ppl: 13.722351 +epoch: 1, batch: 43595, sum loss: 3920.911621, avg loss: 2.505375, ppl: 12.248150 +epoch: 1, batch: 43596, sum loss: 4853.998047, avg loss: 2.734647, ppl: 15.404298 +epoch: 1, batch: 43597, sum loss: 3897.258789, avg loss: 2.434265, ppl: 11.407435 +epoch: 1, batch: 43598, sum loss: 4792.690918, avg loss: 2.675986, ppl: 14.526664 +epoch: 1, batch: 43599, sum loss: 4271.661621, avg loss: 2.571741, ppl: 13.088590 +epoch: 1, batch: 43600, sum loss: 4740.188477, avg loss: 2.743165, ppl: 15.536072 +epoch: 1, batch: 43601, sum loss: 4424.269531, avg loss: 2.594880, ppl: 13.394975 +epoch: 1, batch: 43602, sum loss: 5367.118164, avg loss: 3.089878, ppl: 21.974400 +epoch: 1, batch: 43603, sum loss: 5231.480957, avg loss: 2.814137, ppl: 16.678776 +epoch: 1, batch: 43604, sum loss: 3990.784668, avg loss: 2.564772, ppl: 12.997690 +epoch: 1, batch: 43605, sum loss: 3623.004150, avg loss: 2.564051, ppl: 12.988325 +epoch: 1, batch: 43606, sum loss: 4315.641113, avg loss: 2.564255, ppl: 12.990976 +epoch: 1, batch: 43607, sum loss: 4490.316406, avg loss: 2.573247, ppl: 13.108320 +epoch: 1, batch: 43608, sum loss: 4057.676758, avg loss: 2.582862, ppl: 13.234967 +epoch: 1, batch: 43609, sum loss: 3958.742676, avg loss: 2.422731, ppl: 11.276615 +epoch: 1, batch: 43610, sum loss: 3837.536133, avg loss: 2.518068, ppl: 12.404612 +epoch: 1, batch: 43611, sum loss: 4561.642090, avg loss: 2.480501, ppl: 11.947253 +epoch: 1, batch: 43612, sum loss: 4703.763672, avg loss: 2.698660, ppl: 14.859801 +epoch: 1, batch: 43613, sum loss: 4096.551758, avg loss: 2.541285, ppl: 12.695978 +epoch: 1, batch: 43614, sum loss: 4210.666504, avg loss: 2.556567, ppl: 12.891487 +epoch: 1, batch: 43615, sum loss: 4073.714844, avg loss: 2.514639, ppl: 12.362144 +epoch: 1, batch: 43616, sum loss: 4476.359375, avg loss: 2.475863, ppl: 11.891959 +epoch: 1, batch: 43617, sum loss: 5040.419922, avg loss: 2.607563, ppl: 13.565957 +epoch: 1, batch: 43618, sum loss: 4319.342285, avg loss: 2.479531, ppl: 11.935660 +epoch: 1, batch: 43619, sum loss: 3704.069092, avg loss: 2.417800, ppl: 11.221143 +epoch: 1, batch: 43620, sum loss: 3855.206787, avg loss: 2.447750, ppl: 11.562306 +epoch: 1, batch: 43621, sum loss: 4746.191406, avg loss: 2.825114, ppl: 16.862867 +epoch: 1, batch: 43622, sum loss: 4303.480957, avg loss: 2.564649, ppl: 12.996094 +epoch: 1, batch: 43623, sum loss: 4396.917969, avg loss: 2.737807, ppl: 15.453056 +epoch: 1, batch: 43624, sum loss: 4337.795898, avg loss: 2.706048, ppl: 14.970004 +epoch: 1, batch: 43625, sum loss: 3905.408447, avg loss: 2.552555, ppl: 12.839863 +epoch: 1, batch: 43626, sum loss: 5252.367676, avg loss: 2.697672, ppl: 14.845138 +epoch: 1, batch: 43627, sum loss: 4694.601074, avg loss: 2.565356, ppl: 13.005284 +epoch: 1, batch: 43628, sum loss: 4128.025391, avg loss: 2.524786, ppl: 12.488222 +epoch: 1, batch: 43629, sum loss: 4181.574219, avg loss: 2.629921, ppl: 13.872673 +epoch: 1, batch: 43630, sum loss: 5044.251465, avg loss: 2.754916, ppl: 15.719724 +epoch: 1, batch: 43631, sum loss: 4618.623047, avg loss: 2.604976, ppl: 13.530906 +epoch: 1, batch: 43632, sum loss: 4067.686768, avg loss: 2.374598, ppl: 10.746695 +epoch: 1, batch: 43633, sum loss: 3841.177246, avg loss: 2.487809, ppl: 12.034881 +epoch: 1, batch: 43634, sum loss: 3807.219238, avg loss: 2.534766, ppl: 12.613484 +epoch: 1, batch: 43635, sum loss: 4723.419434, avg loss: 2.738214, ppl: 15.459354 +epoch: 1, batch: 43636, sum loss: 3224.197021, avg loss: 2.286665, ppl: 9.842057 +epoch: 1, batch: 43637, sum loss: 4132.637695, avg loss: 2.443902, ppl: 11.517894 +epoch: 1, batch: 43638, sum loss: 3795.802734, avg loss: 2.203020, ppl: 9.052307 +epoch: 1, batch: 43639, sum loss: 4748.166504, avg loss: 2.620401, ppl: 13.741231 +epoch: 1, batch: 43640, sum loss: 4177.582031, avg loss: 2.341694, ppl: 10.398838 +epoch: 1, batch: 43641, sum loss: 4114.219238, avg loss: 2.417285, ppl: 11.215368 +epoch: 1, batch: 43642, sum loss: 4729.415039, avg loss: 2.684118, ppl: 14.645272 +epoch: 1, batch: 43643, sum loss: 5677.083008, avg loss: 2.857113, ppl: 17.411182 +epoch: 1, batch: 43644, sum loss: 4923.915039, avg loss: 2.764691, ppl: 15.874140 +epoch: 1, batch: 43645, sum loss: 3828.004639, avg loss: 2.354246, ppl: 10.530190 +epoch: 1, batch: 43646, sum loss: 4828.062500, avg loss: 2.754172, ppl: 15.708023 +epoch: 1, batch: 43647, sum loss: 4160.578613, avg loss: 2.655124, ppl: 14.226746 +epoch: 1, batch: 43648, sum loss: 4353.859375, avg loss: 2.624388, ppl: 13.796124 +epoch: 1, batch: 43649, sum loss: 4099.129395, avg loss: 2.636096, ppl: 13.958606 +epoch: 1, batch: 43650, sum loss: 4967.949219, avg loss: 2.670940, ppl: 14.453555 +epoch: 1, batch: 43651, sum loss: 4376.269531, avg loss: 2.661964, ppl: 14.324401 +epoch: 1, batch: 43652, sum loss: 3986.654053, avg loss: 2.404496, ppl: 11.072847 +epoch: 1, batch: 43653, sum loss: 4175.057129, avg loss: 2.622523, ppl: 13.770427 +epoch: 1, batch: 43654, sum loss: 4608.985352, avg loss: 2.413081, ppl: 11.168323 +epoch: 1, batch: 43655, sum loss: 4574.592285, avg loss: 2.815134, ppl: 16.695406 +epoch: 1, batch: 43656, sum loss: 3954.089600, avg loss: 2.608239, ppl: 13.575120 +epoch: 1, batch: 43657, sum loss: 3945.953613, avg loss: 2.469308, ppl: 11.814264 +epoch: 1, batch: 43658, sum loss: 4172.258789, avg loss: 2.427143, ppl: 11.326477 +epoch: 1, batch: 43659, sum loss: 3847.945801, avg loss: 2.455613, ppl: 11.653580 +epoch: 1, batch: 43660, sum loss: 3656.019531, avg loss: 2.273644, ppl: 9.714737 +epoch: 1, batch: 43661, sum loss: 5153.003906, avg loss: 2.447983, ppl: 11.564994 +epoch: 1, batch: 43662, sum loss: 5153.838867, avg loss: 2.671767, ppl: 14.465508 +epoch: 1, batch: 43663, sum loss: 4252.184570, avg loss: 2.433992, ppl: 11.404319 +epoch: 1, batch: 43664, sum loss: 3754.433838, avg loss: 2.612689, ppl: 13.635665 +epoch: 1, batch: 43665, sum loss: 5231.392090, avg loss: 2.730372, ppl: 15.338588 +epoch: 1, batch: 43666, sum loss: 4120.571289, avg loss: 2.622897, ppl: 13.775576 +epoch: 1, batch: 43667, sum loss: 5197.951172, avg loss: 2.796101, ppl: 16.380648 +epoch: 1, batch: 43668, sum loss: 3992.840820, avg loss: 2.418438, ppl: 11.228307 +epoch: 1, batch: 43669, sum loss: 3148.235596, avg loss: 2.258419, ppl: 9.567946 +epoch: 1, batch: 43670, sum loss: 4317.656738, avg loss: 2.596306, ppl: 13.414093 +epoch: 1, batch: 43671, sum loss: 4581.467773, avg loss: 2.688655, ppl: 14.711874 +epoch: 1, batch: 43672, sum loss: 4160.720215, avg loss: 2.555725, ppl: 12.880633 +epoch: 1, batch: 43673, sum loss: 5425.702637, avg loss: 2.763985, ppl: 15.862933 +epoch: 1, batch: 43674, sum loss: 4075.078125, avg loss: 2.456346, ppl: 11.662121 +epoch: 1, batch: 43675, sum loss: 6110.512207, avg loss: 3.014560, ppl: 20.380112 +epoch: 1, batch: 43676, sum loss: 3860.496094, avg loss: 2.500321, ppl: 12.186410 +epoch: 1, batch: 43677, sum loss: 3336.363281, avg loss: 2.455014, ppl: 11.646591 +epoch: 1, batch: 43678, sum loss: 4455.468262, avg loss: 2.857902, ppl: 17.424923 +epoch: 1, batch: 43679, sum loss: 4336.422852, avg loss: 2.570494, ppl: 13.072277 +epoch: 1, batch: 43680, sum loss: 4371.107422, avg loss: 2.637964, ppl: 13.984709 +epoch: 1, batch: 43681, sum loss: 3912.535156, avg loss: 2.594519, ppl: 13.390150 +epoch: 1, batch: 43682, sum loss: 3678.923828, avg loss: 2.367390, ppl: 10.669508 +epoch: 1, batch: 43683, sum loss: 4559.307129, avg loss: 2.513400, ppl: 12.346836 +epoch: 1, batch: 43684, sum loss: 3710.055176, avg loss: 2.346651, ppl: 10.450511 +epoch: 1, batch: 43685, sum loss: 3570.281982, avg loss: 2.542936, ppl: 12.716951 +epoch: 1, batch: 43686, sum loss: 4053.945068, avg loss: 2.499350, ppl: 12.174573 +epoch: 1, batch: 43687, sum loss: 4250.217773, avg loss: 2.540477, ppl: 12.685721 +epoch: 1, batch: 43688, sum loss: 4563.346680, avg loss: 2.545090, ppl: 12.744377 +epoch: 1, batch: 43689, sum loss: 4429.510742, avg loss: 2.531149, ppl: 12.567937 +epoch: 1, batch: 43690, sum loss: 4213.693848, avg loss: 2.682173, ppl: 14.616825 +epoch: 1, batch: 43691, sum loss: 4642.579102, avg loss: 2.668149, ppl: 14.413265 +epoch: 1, batch: 43692, sum loss: 4219.377441, avg loss: 2.502596, ppl: 12.214166 +epoch: 1, batch: 43693, sum loss: 3761.082031, avg loss: 2.428071, ppl: 11.336992 +epoch: 1, batch: 43694, sum loss: 4063.670166, avg loss: 2.465819, ppl: 11.773125 +epoch: 1, batch: 43695, sum loss: 4101.319336, avg loss: 2.612305, ppl: 13.630438 +epoch: 1, batch: 43696, sum loss: 4084.498535, avg loss: 2.460541, ppl: 11.711148 +epoch: 1, batch: 43697, sum loss: 4483.327148, avg loss: 2.769195, ppl: 15.945798 +epoch: 1, batch: 43698, sum loss: 3050.958252, avg loss: 2.256626, ppl: 9.550809 +epoch: 1, batch: 43699, sum loss: 3569.222656, avg loss: 2.401900, ppl: 11.044136 +epoch: 1, batch: 43700, sum loss: 4617.252441, avg loss: 2.586696, ppl: 13.285805 +epoch: 1, batch: 43701, sum loss: 4837.492188, avg loss: 2.511678, ppl: 12.325598 +epoch: 1, batch: 43702, sum loss: 3777.066650, avg loss: 2.449460, ppl: 11.582086 +epoch: 1, batch: 43703, sum loss: 4802.379395, avg loss: 2.375064, ppl: 10.751700 +epoch: 1, batch: 43704, sum loss: 4370.443848, avg loss: 2.555815, ppl: 12.881794 +epoch: 1, batch: 43705, sum loss: 4990.738770, avg loss: 2.533370, ppl: 12.595883 +epoch: 1, batch: 43706, sum loss: 4469.070801, avg loss: 2.784468, ppl: 16.191196 +epoch: 1, batch: 43707, sum loss: 4523.426758, avg loss: 2.484035, ppl: 11.989539 +epoch: 1, batch: 43708, sum loss: 4931.062500, avg loss: 2.679925, ppl: 14.584002 +epoch: 1, batch: 43709, sum loss: 5209.563477, avg loss: 2.615243, ppl: 13.670537 +epoch: 1, batch: 43710, sum loss: 4643.287109, avg loss: 2.582473, ppl: 13.229818 +epoch: 1, batch: 43711, sum loss: 4248.062500, avg loss: 2.531622, ppl: 12.573890 +epoch: 1, batch: 43712, sum loss: 4800.414062, avg loss: 2.608921, ppl: 13.584379 +epoch: 1, batch: 43713, sum loss: 4629.788086, avg loss: 2.650136, ppl: 14.155967 +epoch: 1, batch: 43714, sum loss: 4537.990234, avg loss: 2.519706, ppl: 12.424940 +epoch: 1, batch: 43715, sum loss: 3982.369873, avg loss: 2.470453, ppl: 11.827801 +epoch: 1, batch: 43716, sum loss: 4514.937988, avg loss: 2.588840, ppl: 13.314324 +epoch: 1, batch: 43717, sum loss: 3319.348145, avg loss: 2.306705, ppl: 10.041282 +epoch: 1, batch: 43718, sum loss: 3663.207520, avg loss: 2.411592, ppl: 11.151695 +epoch: 1, batch: 43719, sum loss: 3786.156738, avg loss: 2.425469, ppl: 11.307530 +epoch: 1, batch: 43720, sum loss: 3985.621582, avg loss: 2.519356, ppl: 12.420595 +epoch: 1, batch: 43721, sum loss: 4124.333496, avg loss: 2.608686, ppl: 13.581200 +epoch: 1, batch: 43722, sum loss: 5595.596680, avg loss: 2.748328, ppl: 15.616507 +epoch: 1, batch: 43723, sum loss: 3954.429199, avg loss: 2.599888, ppl: 13.462228 +epoch: 1, batch: 43724, sum loss: 4822.397461, avg loss: 2.793973, ppl: 16.345833 +epoch: 1, batch: 43725, sum loss: 4613.679688, avg loss: 2.680813, ppl: 14.596960 +epoch: 1, batch: 43726, sum loss: 4179.963379, avg loss: 2.695012, ppl: 14.805695 +epoch: 1, batch: 43727, sum loss: 3528.073486, avg loss: 2.488063, ppl: 12.037937 +epoch: 1, batch: 43728, sum loss: 5027.812012, avg loss: 2.796336, ppl: 16.384499 +epoch: 1, batch: 43729, sum loss: 4553.874512, avg loss: 2.426145, ppl: 11.315182 +epoch: 1, batch: 43730, sum loss: 4744.692383, avg loss: 2.789355, ppl: 16.270515 +epoch: 1, batch: 43731, sum loss: 4313.680176, avg loss: 2.595475, ppl: 13.402958 +epoch: 1, batch: 43732, sum loss: 4317.849121, avg loss: 2.650613, ppl: 14.162722 +epoch: 1, batch: 43733, sum loss: 5007.689453, avg loss: 2.660834, ppl: 14.308215 +epoch: 1, batch: 43734, sum loss: 3927.894531, avg loss: 2.426124, ppl: 11.314939 +epoch: 1, batch: 43735, sum loss: 3670.767578, avg loss: 2.397628, ppl: 10.997056 +epoch: 1, batch: 43736, sum loss: 4729.921875, avg loss: 2.507912, ppl: 12.279266 +epoch: 1, batch: 43737, sum loss: 4138.934082, avg loss: 2.593317, ppl: 13.374061 +epoch: 1, batch: 43738, sum loss: 4472.438965, avg loss: 2.528230, ppl: 12.531308 +epoch: 1, batch: 43739, sum loss: 3635.487793, avg loss: 2.402834, ppl: 11.054462 +epoch: 1, batch: 43740, sum loss: 4975.760254, avg loss: 2.851439, ppl: 17.312668 +epoch: 1, batch: 43741, sum loss: 3972.852539, avg loss: 2.608570, ppl: 13.579622 +epoch: 1, batch: 43742, sum loss: 4961.583496, avg loss: 2.778042, ppl: 16.087496 +epoch: 1, batch: 43743, sum loss: 4190.723633, avg loss: 2.816346, ppl: 16.715666 +epoch: 1, batch: 43744, sum loss: 4107.723145, avg loss: 2.608078, ppl: 13.572939 +epoch: 1, batch: 43745, sum loss: 4044.875000, avg loss: 2.696583, ppl: 14.828979 +epoch: 1, batch: 43746, sum loss: 4449.270020, avg loss: 2.724599, ppl: 15.250296 +epoch: 1, batch: 43747, sum loss: 5568.599609, avg loss: 2.755369, ppl: 15.726838 +epoch: 1, batch: 43748, sum loss: 3182.177734, avg loss: 2.351942, ppl: 10.505953 +epoch: 1, batch: 43749, sum loss: 4372.322266, avg loss: 2.477237, ppl: 11.908310 +epoch: 1, batch: 43750, sum loss: 4229.551270, avg loss: 2.508631, ppl: 12.288094 +epoch: 1, batch: 43751, sum loss: 4621.909180, avg loss: 2.679368, ppl: 14.575875 +epoch: 1, batch: 43752, sum loss: 3556.720703, avg loss: 2.411336, ppl: 11.148849 +epoch: 1, batch: 43753, sum loss: 5180.645508, avg loss: 2.708126, ppl: 15.001138 +epoch: 1, batch: 43754, sum loss: 4192.629883, avg loss: 2.461908, ppl: 11.727170 +epoch: 1, batch: 43755, sum loss: 5138.234375, avg loss: 2.907886, ppl: 18.318033 +epoch: 1, batch: 43756, sum loss: 4240.919434, avg loss: 2.614624, ppl: 13.662072 +epoch: 1, batch: 43757, sum loss: 4037.203125, avg loss: 2.398813, ppl: 11.010105 +epoch: 1, batch: 43758, sum loss: 4670.024414, avg loss: 2.536678, ppl: 12.637620 +epoch: 1, batch: 43759, sum loss: 4059.677246, avg loss: 2.609047, ppl: 13.586100 +epoch: 1, batch: 43760, sum loss: 4628.854492, avg loss: 2.695897, ppl: 14.818800 +epoch: 1, batch: 43761, sum loss: 4853.602539, avg loss: 2.592736, ppl: 13.366299 +epoch: 1, batch: 43762, sum loss: 3421.900879, avg loss: 2.122767, ppl: 8.354223 +epoch: 1, batch: 43763, sum loss: 5520.717285, avg loss: 2.989019, ppl: 19.866179 +epoch: 1, batch: 43764, sum loss: 4471.289551, avg loss: 2.634820, ppl: 13.940803 +epoch: 1, batch: 43765, sum loss: 3798.402100, avg loss: 2.344693, ppl: 10.430067 +epoch: 1, batch: 43766, sum loss: 4944.873535, avg loss: 2.886674, ppl: 17.933571 +epoch: 1, batch: 43767, sum loss: 4614.300781, avg loss: 2.642784, ppl: 14.052273 +epoch: 1, batch: 43768, sum loss: 3766.366455, avg loss: 2.610095, ppl: 13.600337 +epoch: 1, batch: 43769, sum loss: 4103.545410, avg loss: 2.449878, ppl: 11.586933 +epoch: 1, batch: 43770, sum loss: 4086.878174, avg loss: 2.493519, ppl: 12.103798 +epoch: 1, batch: 43771, sum loss: 5006.827148, avg loss: 2.852893, ppl: 17.337866 +epoch: 1, batch: 43772, sum loss: 5212.695801, avg loss: 2.743524, ppl: 15.541662 +epoch: 1, batch: 43773, sum loss: 4050.512207, avg loss: 2.591499, ppl: 13.349763 +epoch: 1, batch: 43774, sum loss: 4593.160645, avg loss: 2.615695, ppl: 13.676722 +epoch: 1, batch: 43775, sum loss: 4522.161133, avg loss: 2.767540, ppl: 15.919432 +epoch: 1, batch: 43776, sum loss: 3951.340576, avg loss: 2.336689, ppl: 10.346919 +epoch: 1, batch: 43777, sum loss: 4622.102051, avg loss: 2.489016, ppl: 12.049409 +epoch: 1, batch: 43778, sum loss: 4257.812500, avg loss: 2.320334, ppl: 10.179073 +epoch: 1, batch: 43779, sum loss: 4355.500977, avg loss: 2.536692, ppl: 12.637801 +epoch: 1, batch: 43780, sum loss: 4478.034180, avg loss: 2.611099, ppl: 13.613998 +epoch: 1, batch: 43781, sum loss: 4023.802490, avg loss: 2.380948, ppl: 10.815154 +epoch: 1, batch: 43782, sum loss: 3833.032227, avg loss: 2.433671, ppl: 11.400660 +epoch: 1, batch: 43783, sum loss: 3882.485840, avg loss: 2.506447, ppl: 12.261283 +epoch: 1, batch: 43784, sum loss: 4847.226562, avg loss: 2.648758, ppl: 14.136466 +epoch: 1, batch: 43785, sum loss: 4209.788086, avg loss: 2.448975, ppl: 11.576476 +epoch: 1, batch: 43786, sum loss: 5201.038574, avg loss: 2.769456, ppl: 15.949957 +epoch: 1, batch: 43787, sum loss: 4569.879883, avg loss: 2.538822, ppl: 12.664745 +epoch: 1, batch: 43788, sum loss: 4389.207520, avg loss: 2.687818, ppl: 14.699571 +epoch: 1, batch: 43789, sum loss: 4414.581055, avg loss: 2.563636, ppl: 12.982935 +epoch: 1, batch: 43790, sum loss: 5240.194824, avg loss: 2.827952, ppl: 16.910791 +epoch: 1, batch: 43791, sum loss: 3406.359131, avg loss: 2.417572, ppl: 11.218591 +epoch: 1, batch: 43792, sum loss: 3854.110352, avg loss: 2.560871, ppl: 12.947088 +epoch: 1, batch: 43793, sum loss: 4445.173340, avg loss: 2.613271, ppl: 13.643600 +epoch: 1, batch: 43794, sum loss: 4727.259277, avg loss: 2.815521, ppl: 16.701872 +epoch: 1, batch: 43795, sum loss: 3847.775146, avg loss: 2.359151, ppl: 10.581963 +epoch: 1, batch: 43796, sum loss: 5111.622070, avg loss: 2.696003, ppl: 14.820383 +epoch: 1, batch: 43797, sum loss: 4365.538086, avg loss: 2.516160, ppl: 12.380965 +epoch: 1, batch: 43798, sum loss: 4304.674316, avg loss: 2.482511, ppl: 11.971287 +epoch: 1, batch: 43799, sum loss: 3177.819336, avg loss: 2.188581, ppl: 8.922541 +epoch: 1, batch: 43800, sum loss: 4824.533203, avg loss: 2.605039, ppl: 13.531758 +epoch: 1, batch: 43801, sum loss: 4031.845215, avg loss: 2.671866, ppl: 14.466935 +epoch: 1, batch: 43802, sum loss: 5781.495605, avg loss: 2.972492, ppl: 19.540545 +epoch: 1, batch: 43803, sum loss: 4434.901367, avg loss: 2.579931, ppl: 13.196228 +epoch: 1, batch: 43804, sum loss: 4402.232910, avg loss: 2.611052, ppl: 13.613358 +epoch: 1, batch: 43805, sum loss: 3805.905029, avg loss: 2.416448, ppl: 11.205981 +epoch: 1, batch: 43806, sum loss: 4459.777832, avg loss: 2.704535, ppl: 14.947361 +epoch: 1, batch: 43807, sum loss: 3796.866699, avg loss: 2.506183, ppl: 12.258048 +epoch: 1, batch: 43808, sum loss: 3846.574707, avg loss: 2.765331, ppl: 15.884290 +epoch: 1, batch: 43809, sum loss: 4168.928711, avg loss: 2.436545, ppl: 11.433474 +epoch: 1, batch: 43810, sum loss: 4678.412109, avg loss: 2.684115, ppl: 14.645230 +epoch: 1, batch: 43811, sum loss: 4775.314453, avg loss: 2.823959, ppl: 16.843403 +epoch: 1, batch: 43812, sum loss: 4802.897461, avg loss: 2.744513, ppl: 15.557033 +epoch: 1, batch: 43813, sum loss: 5066.404785, avg loss: 2.747508, ppl: 15.603700 +epoch: 1, batch: 43814, sum loss: 3848.953125, avg loss: 2.359873, ppl: 10.589609 +epoch: 1, batch: 43815, sum loss: 4488.078125, avg loss: 2.558767, ppl: 12.919882 +epoch: 1, batch: 43816, sum loss: 4156.764648, avg loss: 2.502568, ppl: 12.213813 +epoch: 1, batch: 43817, sum loss: 4185.811523, avg loss: 2.506474, ppl: 12.261620 +epoch: 1, batch: 43818, sum loss: 3646.232422, avg loss: 2.600736, ppl: 13.473656 +epoch: 1, batch: 43819, sum loss: 3727.392578, avg loss: 2.431437, ppl: 11.375214 +epoch: 1, batch: 43820, sum loss: 3968.705078, avg loss: 2.427342, ppl: 11.328735 +epoch: 1, batch: 43821, sum loss: 5084.227051, avg loss: 2.772207, ppl: 15.993886 +epoch: 1, batch: 43822, sum loss: 5194.843262, avg loss: 2.889234, ppl: 17.979532 +epoch: 1, batch: 43823, sum loss: 3045.399658, avg loss: 2.247527, ppl: 9.464305 +epoch: 1, batch: 43824, sum loss: 4924.705078, avg loss: 2.822181, ppl: 16.813473 +epoch: 1, batch: 43825, sum loss: 4666.591797, avg loss: 2.612874, ppl: 13.638185 +epoch: 1, batch: 43826, sum loss: 3913.288086, avg loss: 2.539447, ppl: 12.672662 +epoch: 1, batch: 43827, sum loss: 4173.581055, avg loss: 2.497655, ppl: 12.153958 +epoch: 1, batch: 43828, sum loss: 4691.460938, avg loss: 2.655043, ppl: 14.225599 +epoch: 1, batch: 43829, sum loss: 5827.425293, avg loss: 3.006927, ppl: 20.225163 +epoch: 1, batch: 43830, sum loss: 4369.987305, avg loss: 2.455049, ppl: 11.647005 +epoch: 1, batch: 43831, sum loss: 4364.044434, avg loss: 2.657762, ppl: 14.264323 +epoch: 1, batch: 43832, sum loss: 3562.791748, avg loss: 2.310500, ppl: 10.079467 +epoch: 1, batch: 43833, sum loss: 4669.290039, avg loss: 2.945924, ppl: 19.028242 +epoch: 1, batch: 43834, sum loss: 4945.199707, avg loss: 2.966527, ppl: 19.424332 +epoch: 1, batch: 43835, sum loss: 4512.532715, avg loss: 2.733212, ppl: 15.382215 +epoch: 1, batch: 43836, sum loss: 5779.713867, avg loss: 2.876911, ppl: 17.759329 +epoch: 1, batch: 43837, sum loss: 4405.897461, avg loss: 2.533581, ppl: 12.598541 +epoch: 1, batch: 43838, sum loss: 3705.957520, avg loss: 2.360482, ppl: 10.596063 +epoch: 1, batch: 43839, sum loss: 4258.820801, avg loss: 2.577979, ppl: 13.170491 +epoch: 1, batch: 43840, sum loss: 4933.403809, avg loss: 2.757632, ppl: 15.762473 +epoch: 1, batch: 43841, sum loss: 3594.822021, avg loss: 2.332785, ppl: 10.306607 +epoch: 1, batch: 43842, sum loss: 4674.997070, avg loss: 2.619046, ppl: 13.722629 +epoch: 1, batch: 43843, sum loss: 4430.562012, avg loss: 2.592488, ppl: 13.362978 +epoch: 1, batch: 43844, sum loss: 5035.074219, avg loss: 2.590059, ppl: 13.330559 +epoch: 1, batch: 43845, sum loss: 4341.499023, avg loss: 2.534442, ppl: 12.609392 +epoch: 1, batch: 43846, sum loss: 4962.832031, avg loss: 2.501427, ppl: 12.199896 +epoch: 1, batch: 43847, sum loss: 4499.593750, avg loss: 2.609973, ppl: 13.598689 +epoch: 1, batch: 43848, sum loss: 3542.284424, avg loss: 2.395054, ppl: 10.968786 +epoch: 1, batch: 43849, sum loss: 4239.481445, avg loss: 2.510054, ppl: 12.305596 +epoch: 1, batch: 43850, sum loss: 4010.458740, avg loss: 2.592410, ppl: 13.361939 +epoch: 1, batch: 43851, sum loss: 4042.094482, avg loss: 2.490508, ppl: 12.067403 +epoch: 1, batch: 43852, sum loss: 5157.790527, avg loss: 2.764089, ppl: 15.864582 +epoch: 1, batch: 43853, sum loss: 5062.014648, avg loss: 2.517163, ppl: 12.393387 +epoch: 1, batch: 43854, sum loss: 5096.431152, avg loss: 2.662712, ppl: 14.335114 +epoch: 1, batch: 43855, sum loss: 3630.645508, avg loss: 2.683404, ppl: 14.634825 +epoch: 1, batch: 43856, sum loss: 4416.205078, avg loss: 2.622450, ppl: 13.769412 +epoch: 1, batch: 43857, sum loss: 4161.048340, avg loss: 2.485692, ppl: 12.009428 +epoch: 1, batch: 43858, sum loss: 5899.744629, avg loss: 2.862564, ppl: 17.506357 +epoch: 1, batch: 43859, sum loss: 4479.432129, avg loss: 2.567010, ppl: 13.026812 +epoch: 1, batch: 43860, sum loss: 4126.219727, avg loss: 2.628165, ppl: 13.848342 +epoch: 1, batch: 43861, sum loss: 4192.955078, avg loss: 2.675785, ppl: 14.523747 +epoch: 1, batch: 43862, sum loss: 3939.668945, avg loss: 2.360497, ppl: 10.596214 +epoch: 1, batch: 43863, sum loss: 3914.774658, avg loss: 2.517540, ppl: 12.398063 +epoch: 1, batch: 43864, sum loss: 3896.210693, avg loss: 2.361340, ppl: 10.605151 +epoch: 1, batch: 43865, sum loss: 4259.383301, avg loss: 2.523331, ppl: 12.470071 +epoch: 1, batch: 43866, sum loss: 4753.790039, avg loss: 2.816226, ppl: 16.713663 +epoch: 1, batch: 43867, sum loss: 4934.049805, avg loss: 2.730520, ppl: 15.340862 +epoch: 1, batch: 43868, sum loss: 4681.437500, avg loss: 2.533245, ppl: 12.594313 +epoch: 1, batch: 43869, sum loss: 5108.295410, avg loss: 2.890943, ppl: 18.010277 +epoch: 1, batch: 43870, sum loss: 4589.719238, avg loss: 2.600408, ppl: 13.469227 +epoch: 1, batch: 43871, sum loss: 3829.135742, avg loss: 2.315076, ppl: 10.125693 +epoch: 1, batch: 43872, sum loss: 3542.584717, avg loss: 2.466981, ppl: 11.786808 +epoch: 1, batch: 43873, sum loss: 4586.521973, avg loss: 2.620870, ppl: 13.747674 +epoch: 1, batch: 43874, sum loss: 4638.302734, avg loss: 3.000196, ppl: 20.089468 +epoch: 1, batch: 43875, sum loss: 4207.822754, avg loss: 2.295594, ppl: 9.930328 +epoch: 1, batch: 43876, sum loss: 4527.971191, avg loss: 2.467559, ppl: 11.793628 +epoch: 1, batch: 43877, sum loss: 3047.397461, avg loss: 2.015475, ppl: 7.504288 +epoch: 1, batch: 43878, sum loss: 4363.015137, avg loss: 2.500295, ppl: 12.186090 +epoch: 1, batch: 43879, sum loss: 3969.768066, avg loss: 2.523692, ppl: 12.474572 +epoch: 1, batch: 43880, sum loss: 4368.770508, avg loss: 2.672031, ppl: 14.469325 +epoch: 1, batch: 43881, sum loss: 4772.597168, avg loss: 2.742872, ppl: 15.531528 +epoch: 1, batch: 43882, sum loss: 4442.510254, avg loss: 2.720459, ppl: 15.187299 +epoch: 1, batch: 43883, sum loss: 5106.163574, avg loss: 2.607847, ppl: 13.569800 +epoch: 1, batch: 43884, sum loss: 3317.967773, avg loss: 2.466891, ppl: 11.785743 +epoch: 1, batch: 43885, sum loss: 4041.418701, avg loss: 2.667603, ppl: 14.405401 +epoch: 1, batch: 43886, sum loss: 3853.719482, avg loss: 2.449917, ppl: 11.587383 +epoch: 1, batch: 43887, sum loss: 4248.153809, avg loss: 2.582464, ppl: 13.229699 +epoch: 1, batch: 43888, sum loss: 4692.285156, avg loss: 2.835218, ppl: 17.034109 +epoch: 1, batch: 43889, sum loss: 3699.816650, avg loss: 2.471487, ppl: 11.840043 +epoch: 1, batch: 43890, sum loss: 3856.002441, avg loss: 2.354092, ppl: 10.528561 +epoch: 1, batch: 43891, sum loss: 4651.664062, avg loss: 2.706029, ppl: 14.969715 +epoch: 1, batch: 43892, sum loss: 4438.334961, avg loss: 2.664067, ppl: 14.354544 +epoch: 1, batch: 43893, sum loss: 4588.970703, avg loss: 2.613309, ppl: 13.644127 +epoch: 1, batch: 43894, sum loss: 3620.013428, avg loss: 2.148376, ppl: 8.570928 +epoch: 1, batch: 43895, sum loss: 4285.730469, avg loss: 2.409067, ppl: 11.123580 +epoch: 1, batch: 43896, sum loss: 4110.716309, avg loss: 2.361124, ppl: 10.602860 +epoch: 1, batch: 43897, sum loss: 5485.064453, avg loss: 2.814297, ppl: 16.681444 +epoch: 1, batch: 43898, sum loss: 4615.531250, avg loss: 2.742443, ppl: 15.524863 +epoch: 1, batch: 43899, sum loss: 5390.253906, avg loss: 2.918384, ppl: 18.511341 +epoch: 1, batch: 43900, sum loss: 5434.602539, avg loss: 2.765701, ppl: 15.890176 +epoch: 1, batch: 43901, sum loss: 4126.080078, avg loss: 2.473669, ppl: 11.865904 +epoch: 1, batch: 43902, sum loss: 3686.737305, avg loss: 2.574537, ppl: 13.125242 +epoch: 1, batch: 43903, sum loss: 6041.056152, avg loss: 2.889075, ppl: 17.976679 +epoch: 1, batch: 43904, sum loss: 5091.411133, avg loss: 2.860343, ppl: 17.467525 +epoch: 1, batch: 43905, sum loss: 4445.320312, avg loss: 2.514322, ppl: 12.358222 +epoch: 1, batch: 43906, sum loss: 4553.509766, avg loss: 2.647389, ppl: 14.117136 +epoch: 1, batch: 43907, sum loss: 4037.255859, avg loss: 2.566596, ppl: 13.021427 +epoch: 1, batch: 43908, sum loss: 3438.024902, avg loss: 2.202450, ppl: 9.047154 +epoch: 1, batch: 43909, sum loss: 4711.936035, avg loss: 2.597539, ppl: 13.430647 +epoch: 1, batch: 43910, sum loss: 4087.450195, avg loss: 2.447575, ppl: 11.560280 +epoch: 1, batch: 43911, sum loss: 4588.420898, avg loss: 2.452390, ppl: 11.616073 +epoch: 1, batch: 43912, sum loss: 4319.729980, avg loss: 2.494071, ppl: 12.110472 +epoch: 1, batch: 43913, sum loss: 3972.033447, avg loss: 2.685621, ppl: 14.667307 +epoch: 1, batch: 43914, sum loss: 3658.182129, avg loss: 2.481806, ppl: 11.962851 +epoch: 1, batch: 43915, sum loss: 5219.508301, avg loss: 2.861573, ppl: 17.489006 +epoch: 1, batch: 43916, sum loss: 4062.609863, avg loss: 2.503148, ppl: 12.220909 +epoch: 1, batch: 43917, sum loss: 3904.506348, avg loss: 2.589195, ppl: 13.319049 +epoch: 1, batch: 43918, sum loss: 4808.933594, avg loss: 2.613551, ppl: 13.647423 +epoch: 1, batch: 43919, sum loss: 3816.262207, avg loss: 2.426104, ppl: 11.314717 +epoch: 1, batch: 43920, sum loss: 4060.098145, avg loss: 2.653659, ppl: 14.205925 +epoch: 1, batch: 43921, sum loss: 3787.308105, avg loss: 2.412298, ppl: 11.159578 +epoch: 1, batch: 43922, sum loss: 3766.579102, avg loss: 2.656262, ppl: 14.242945 +epoch: 1, batch: 43923, sum loss: 5196.234375, avg loss: 2.729115, ppl: 15.319320 +epoch: 1, batch: 43924, sum loss: 4496.747070, avg loss: 2.533379, ppl: 12.595991 +epoch: 1, batch: 43925, sum loss: 4538.327148, avg loss: 2.779135, ppl: 16.105080 +epoch: 1, batch: 43926, sum loss: 4745.957520, avg loss: 2.767322, ppl: 15.915955 +epoch: 1, batch: 43927, sum loss: 4022.079102, avg loss: 2.406989, ppl: 11.100491 +epoch: 1, batch: 43928, sum loss: 4725.268555, avg loss: 2.725068, ppl: 15.257457 +epoch: 1, batch: 43929, sum loss: 4096.645020, avg loss: 2.507127, ppl: 12.269627 +epoch: 1, batch: 43930, sum loss: 4087.244141, avg loss: 2.596724, ppl: 13.419706 +epoch: 1, batch: 43931, sum loss: 4440.517578, avg loss: 2.604409, ppl: 13.523233 +epoch: 1, batch: 43932, sum loss: 3719.467285, avg loss: 2.508070, ppl: 12.281201 +epoch: 1, batch: 43933, sum loss: 4311.837891, avg loss: 2.411542, ppl: 11.151148 +epoch: 1, batch: 43934, sum loss: 4464.821777, avg loss: 2.557172, ppl: 12.899284 +epoch: 1, batch: 43935, sum loss: 5089.782227, avg loss: 2.703017, ppl: 14.924699 +epoch: 1, batch: 43936, sum loss: 3871.626465, avg loss: 2.424312, ppl: 11.294457 +epoch: 1, batch: 43937, sum loss: 4011.481689, avg loss: 2.441559, ppl: 11.490942 +epoch: 1, batch: 43938, sum loss: 4462.687988, avg loss: 2.620486, ppl: 13.742404 +epoch: 1, batch: 43939, sum loss: 4270.910156, avg loss: 2.384651, ppl: 10.855273 +epoch: 1, batch: 43940, sum loss: 4045.316650, avg loss: 2.425250, ppl: 11.305054 +epoch: 1, batch: 43941, sum loss: 3935.868652, avg loss: 2.703207, ppl: 14.927521 +epoch: 1, batch: 43942, sum loss: 3981.922363, avg loss: 2.285834, ppl: 9.833880 +epoch: 1, batch: 43943, sum loss: 4561.699219, avg loss: 2.700829, ppl: 14.892065 +epoch: 1, batch: 43944, sum loss: 3990.815674, avg loss: 2.480308, ppl: 11.944943 +epoch: 1, batch: 43945, sum loss: 4062.772461, avg loss: 2.507884, ppl: 12.278924 +epoch: 1, batch: 43946, sum loss: 3632.722168, avg loss: 2.352799, ppl: 10.514964 +epoch: 1, batch: 43947, sum loss: 4574.594727, avg loss: 2.611070, ppl: 13.613612 +epoch: 1, batch: 43948, sum loss: 3696.899902, avg loss: 2.532123, ppl: 12.580190 +epoch: 1, batch: 43949, sum loss: 4947.075195, avg loss: 2.598254, ppl: 13.440250 +epoch: 1, batch: 43950, sum loss: 3315.601074, avg loss: 2.304101, ppl: 10.015168 +epoch: 1, batch: 43951, sum loss: 3961.768311, avg loss: 2.338706, ppl: 10.367815 +epoch: 1, batch: 43952, sum loss: 3559.837402, avg loss: 2.398812, ppl: 11.010092 +epoch: 1, batch: 43953, sum loss: 4740.594727, avg loss: 2.820104, ppl: 16.778593 +epoch: 1, batch: 43954, sum loss: 4125.211426, avg loss: 2.533914, ppl: 12.602732 +epoch: 1, batch: 43955, sum loss: 4531.514648, avg loss: 2.574724, ppl: 13.127696 +epoch: 1, batch: 43956, sum loss: 4358.193359, avg loss: 2.560631, ppl: 12.943976 +epoch: 1, batch: 43957, sum loss: 3659.253418, avg loss: 2.399510, ppl: 11.017780 +epoch: 1, batch: 43958, sum loss: 3803.908691, avg loss: 2.482969, ppl: 11.976771 +epoch: 1, batch: 43959, sum loss: 4013.091797, avg loss: 2.471116, ppl: 11.835643 +epoch: 1, batch: 43960, sum loss: 4380.782715, avg loss: 2.694208, ppl: 14.793799 +epoch: 1, batch: 43961, sum loss: 4214.374023, avg loss: 2.606292, ppl: 13.548719 +epoch: 1, batch: 43962, sum loss: 4947.968750, avg loss: 2.789159, ppl: 16.267338 +epoch: 1, batch: 43963, sum loss: 4570.378906, avg loss: 2.607176, ppl: 13.560699 +epoch: 1, batch: 43964, sum loss: 3753.218994, avg loss: 2.561924, ppl: 12.960732 +epoch: 1, batch: 43965, sum loss: 4774.785156, avg loss: 2.467589, ppl: 11.793979 +epoch: 1, batch: 43966, sum loss: 3723.221680, avg loss: 2.433478, ppl: 11.398461 +epoch: 1, batch: 43967, sum loss: 4322.006348, avg loss: 2.625763, ppl: 13.815117 +epoch: 1, batch: 43968, sum loss: 3381.811523, avg loss: 2.302118, ppl: 9.995331 +epoch: 1, batch: 43969, sum loss: 4592.675293, avg loss: 2.526224, ppl: 12.506195 +epoch: 1, batch: 43970, sum loss: 3949.264648, avg loss: 2.529958, ppl: 12.552982 +epoch: 1, batch: 43971, sum loss: 4507.299316, avg loss: 2.648237, ppl: 14.129107 +epoch: 1, batch: 43972, sum loss: 5312.633789, avg loss: 3.166051, ppl: 23.713657 +epoch: 1, batch: 43973, sum loss: 5431.498047, avg loss: 2.982701, ppl: 19.741062 +epoch: 1, batch: 43974, sum loss: 3928.654541, avg loss: 2.594884, ppl: 13.395036 +epoch: 1, batch: 43975, sum loss: 5092.593750, avg loss: 2.708826, ppl: 15.011646 +epoch: 1, batch: 43976, sum loss: 3770.379639, avg loss: 2.435646, ppl: 11.423194 +epoch: 1, batch: 43977, sum loss: 4340.905273, avg loss: 2.480517, ppl: 11.947444 +epoch: 1, batch: 43978, sum loss: 4332.217773, avg loss: 2.546865, ppl: 12.767019 +epoch: 1, batch: 43979, sum loss: 4176.620605, avg loss: 2.554508, ppl: 12.864968 +epoch: 1, batch: 43980, sum loss: 4548.390625, avg loss: 2.826843, ppl: 16.892048 +epoch: 1, batch: 43981, sum loss: 4601.245605, avg loss: 2.599574, ppl: 13.458002 +epoch: 1, batch: 43982, sum loss: 4325.673828, avg loss: 2.607398, ppl: 13.563716 +epoch: 1, batch: 43983, sum loss: 4457.427734, avg loss: 2.650076, ppl: 14.155113 +epoch: 1, batch: 43984, sum loss: 4122.737793, avg loss: 2.586410, ppl: 13.282004 +epoch: 1, batch: 43985, sum loss: 4693.708984, avg loss: 2.702193, ppl: 14.912399 +epoch: 1, batch: 43986, sum loss: 4578.291016, avg loss: 2.841894, ppl: 17.148212 +epoch: 1, batch: 43987, sum loss: 2946.840576, avg loss: 2.214005, ppl: 9.152298 +epoch: 1, batch: 43988, sum loss: 4794.209473, avg loss: 2.941233, ppl: 18.939178 +epoch: 1, batch: 43989, sum loss: 4228.990723, avg loss: 2.608878, ppl: 13.583797 +epoch: 1, batch: 43990, sum loss: 4242.343262, avg loss: 2.456481, ppl: 11.663700 +epoch: 1, batch: 43991, sum loss: 4585.020020, avg loss: 2.695485, ppl: 14.812699 +epoch: 1, batch: 43992, sum loss: 4616.750000, avg loss: 2.603920, ppl: 13.516620 +epoch: 1, batch: 43993, sum loss: 5031.948242, avg loss: 2.846125, ppl: 17.220915 +epoch: 1, batch: 43994, sum loss: 4508.569336, avg loss: 2.607617, ppl: 13.566682 +epoch: 1, batch: 43995, sum loss: 4406.552734, avg loss: 2.723457, ppl: 15.232885 +epoch: 1, batch: 43996, sum loss: 4434.441895, avg loss: 2.672961, ppl: 14.482786 +epoch: 1, batch: 43997, sum loss: 4204.671387, avg loss: 2.676430, ppl: 14.533117 +epoch: 1, batch: 43998, sum loss: 4664.324219, avg loss: 2.644175, ppl: 14.071829 +epoch: 1, batch: 43999, sum loss: 4278.655762, avg loss: 2.521306, ppl: 12.444837 +epoch: 1, batch: 44000, sum loss: 5369.791016, avg loss: 2.703822, ppl: 14.936716 +epoch: 1, batch: 44001, sum loss: 3592.324707, avg loss: 2.524473, ppl: 12.484311 +epoch: 1, batch: 44002, sum loss: 4986.636230, avg loss: 2.524879, ppl: 12.489387 +epoch: 1, batch: 44003, sum loss: 5427.015625, avg loss: 2.734013, ppl: 15.394539 +epoch: 1, batch: 44004, sum loss: 3896.349121, avg loss: 2.505691, ppl: 12.252020 +epoch: 1, batch: 44005, sum loss: 4873.737305, avg loss: 2.674938, ppl: 14.511453 +epoch: 1, batch: 44006, sum loss: 4775.261719, avg loss: 2.588218, ppl: 13.306036 +epoch: 1, batch: 44007, sum loss: 3387.510010, avg loss: 2.168700, ppl: 8.746910 +epoch: 1, batch: 44008, sum loss: 4669.511719, avg loss: 2.526792, ppl: 12.513300 +epoch: 1, batch: 44009, sum loss: 3302.104004, avg loss: 2.358646, ppl: 10.576617 +epoch: 1, batch: 44010, sum loss: 4771.147949, avg loss: 2.646227, ppl: 14.100741 +epoch: 1, batch: 44011, sum loss: 4002.666504, avg loss: 2.379707, ppl: 10.801733 +epoch: 1, batch: 44012, sum loss: 5496.629395, avg loss: 3.015156, ppl: 20.392273 +epoch: 1, batch: 44013, sum loss: 3173.524414, avg loss: 2.231733, ppl: 9.315997 +epoch: 1, batch: 44014, sum loss: 5166.895508, avg loss: 2.845207, ppl: 17.205114 +epoch: 1, batch: 44015, sum loss: 3444.725098, avg loss: 2.208157, ppl: 9.098934 +epoch: 1, batch: 44016, sum loss: 4860.505859, avg loss: 2.742949, ppl: 15.532727 +epoch: 1, batch: 44017, sum loss: 4611.008789, avg loss: 2.453970, ppl: 11.634440 +epoch: 1, batch: 44018, sum loss: 4129.809082, avg loss: 2.673016, ppl: 14.483583 +epoch: 1, batch: 44019, sum loss: 4226.613281, avg loss: 2.728608, ppl: 15.311553 +epoch: 1, batch: 44020, sum loss: 4267.014648, avg loss: 2.600253, ppl: 13.467140 +epoch: 1, batch: 44021, sum loss: 4497.609375, avg loss: 2.564201, ppl: 12.990279 +epoch: 1, batch: 44022, sum loss: 4037.827637, avg loss: 2.477195, ppl: 11.907814 +epoch: 1, batch: 44023, sum loss: 2988.457275, avg loss: 2.218602, ppl: 9.194472 +epoch: 1, batch: 44024, sum loss: 3723.163574, avg loss: 2.379018, ppl: 10.794301 +epoch: 1, batch: 44025, sum loss: 4391.612793, avg loss: 2.640777, ppl: 14.024101 +epoch: 1, batch: 44026, sum loss: 3512.931152, avg loss: 2.463486, ppl: 11.745686 +epoch: 1, batch: 44027, sum loss: 4884.336914, avg loss: 2.671957, ppl: 14.468253 +epoch: 1, batch: 44028, sum loss: 4639.158203, avg loss: 2.761404, ppl: 15.822039 +epoch: 1, batch: 44029, sum loss: 5240.855469, avg loss: 2.755445, ppl: 15.728035 +epoch: 1, batch: 44030, sum loss: 4573.006836, avg loss: 2.699532, ppl: 14.872766 +epoch: 1, batch: 44031, sum loss: 3824.921387, avg loss: 2.244672, ppl: 9.437323 +epoch: 1, batch: 44032, sum loss: 4403.578125, avg loss: 2.538085, ppl: 12.655418 +epoch: 1, batch: 44033, sum loss: 4402.061523, avg loss: 2.442876, ppl: 11.506081 +epoch: 1, batch: 44034, sum loss: 3394.231445, avg loss: 2.086190, ppl: 8.054173 +epoch: 1, batch: 44035, sum loss: 3433.424316, avg loss: 2.343634, ppl: 10.419034 +epoch: 1, batch: 44036, sum loss: 5066.743164, avg loss: 2.780869, ppl: 16.133034 +epoch: 1, batch: 44037, sum loss: 5196.551270, avg loss: 2.916134, ppl: 18.469751 +epoch: 1, batch: 44038, sum loss: 4568.592285, avg loss: 2.729147, ppl: 15.319817 +epoch: 1, batch: 44039, sum loss: 5070.290039, avg loss: 2.668574, ppl: 14.419391 +epoch: 1, batch: 44040, sum loss: 5093.906250, avg loss: 2.806560, ppl: 16.552879 +epoch: 1, batch: 44041, sum loss: 4352.430664, avg loss: 2.701695, ppl: 14.904977 +epoch: 1, batch: 44042, sum loss: 5017.540039, avg loss: 2.713651, ppl: 15.084243 +epoch: 1, batch: 44043, sum loss: 4379.979492, avg loss: 2.864604, ppl: 17.542109 +epoch: 1, batch: 44044, sum loss: 4437.460938, avg loss: 2.671560, ppl: 14.462510 +epoch: 1, batch: 44045, sum loss: 4640.751953, avg loss: 2.797319, ppl: 16.400618 +epoch: 1, batch: 44046, sum loss: 4968.357910, avg loss: 2.725375, ppl: 15.262132 +epoch: 1, batch: 44047, sum loss: 4498.937012, avg loss: 2.690752, ppl: 14.742759 +epoch: 1, batch: 44048, sum loss: 4293.894043, avg loss: 2.728014, ppl: 15.302466 +epoch: 1, batch: 44049, sum loss: 4464.876465, avg loss: 2.506949, ppl: 12.267447 +epoch: 1, batch: 44050, sum loss: 3965.542236, avg loss: 2.545278, ppl: 12.746766 +epoch: 1, batch: 44051, sum loss: 4972.783203, avg loss: 2.693815, ppl: 14.787992 +epoch: 1, batch: 44052, sum loss: 4573.675781, avg loss: 2.835509, ppl: 17.039068 +epoch: 1, batch: 44053, sum loss: 4474.410156, avg loss: 2.729963, ppl: 15.332324 +epoch: 1, batch: 44054, sum loss: 4110.343262, avg loss: 2.373177, ppl: 10.731435 +epoch: 1, batch: 44055, sum loss: 4045.097168, avg loss: 2.761158, ppl: 15.818157 +epoch: 1, batch: 44056, sum loss: 4478.536621, avg loss: 2.712621, ppl: 15.068715 +epoch: 1, batch: 44057, sum loss: 3825.084229, avg loss: 2.652624, ppl: 14.191233 +epoch: 1, batch: 44058, sum loss: 6058.913086, avg loss: 2.967146, ppl: 19.436377 +epoch: 1, batch: 44059, sum loss: 4191.237305, avg loss: 2.600023, ppl: 13.464048 +epoch: 1, batch: 44060, sum loss: 3978.118896, avg loss: 2.363707, ppl: 10.630285 +epoch: 1, batch: 44061, sum loss: 4544.355957, avg loss: 2.472446, ppl: 11.851399 +epoch: 1, batch: 44062, sum loss: 6383.699219, avg loss: 3.144680, ppl: 23.212236 +epoch: 1, batch: 44063, sum loss: 4007.208252, avg loss: 2.262681, ppl: 9.608816 +epoch: 1, batch: 44064, sum loss: 4166.523926, avg loss: 2.462485, ppl: 11.733930 +epoch: 1, batch: 44065, sum loss: 3917.588379, avg loss: 2.477918, ppl: 11.916428 +epoch: 1, batch: 44066, sum loss: 4546.218750, avg loss: 2.823738, ppl: 16.839685 +epoch: 1, batch: 44067, sum loss: 3947.840576, avg loss: 2.558549, ppl: 12.917057 +epoch: 1, batch: 44068, sum loss: 4442.289062, avg loss: 2.606977, ppl: 13.558006 +epoch: 1, batch: 44069, sum loss: 3233.999512, avg loss: 2.450000, ppl: 11.588345 +epoch: 1, batch: 44070, sum loss: 5027.334961, avg loss: 2.718948, ppl: 15.164363 +epoch: 1, batch: 44071, sum loss: 4171.721191, avg loss: 2.334483, ppl: 10.324122 +epoch: 1, batch: 44072, sum loss: 4748.317871, avg loss: 2.566658, ppl: 13.022235 +epoch: 1, batch: 44073, sum loss: 4726.415527, avg loss: 2.816696, ppl: 16.721506 +epoch: 1, batch: 44074, sum loss: 4932.582031, avg loss: 2.716179, ppl: 15.122435 +epoch: 1, batch: 44075, sum loss: 4093.303467, avg loss: 2.480790, ppl: 11.950701 +epoch: 1, batch: 44076, sum loss: 4579.518066, avg loss: 2.622863, ppl: 13.775103 +epoch: 1, batch: 44077, sum loss: 4177.923340, avg loss: 2.527479, ppl: 12.521901 +epoch: 1, batch: 44078, sum loss: 3901.515625, avg loss: 2.707506, ppl: 14.991835 +epoch: 1, batch: 44079, sum loss: 5746.947266, avg loss: 2.776303, ppl: 16.059540 +epoch: 1, batch: 44080, sum loss: 4023.615723, avg loss: 2.694987, ppl: 14.805327 +epoch: 1, batch: 44081, sum loss: 4516.081055, avg loss: 2.616501, ppl: 13.687748 +epoch: 1, batch: 44082, sum loss: 3490.219971, avg loss: 2.388926, ppl: 10.901774 +epoch: 1, batch: 44083, sum loss: 6174.816406, avg loss: 3.028355, ppl: 20.663221 +epoch: 1, batch: 44084, sum loss: 4100.077148, avg loss: 2.590068, ppl: 13.330673 +epoch: 1, batch: 44085, sum loss: 4211.122070, avg loss: 2.521630, ppl: 12.448873 +epoch: 1, batch: 44086, sum loss: 4831.328613, avg loss: 2.771847, ppl: 15.988130 +epoch: 1, batch: 44087, sum loss: 4208.440918, avg loss: 2.544402, ppl: 12.735612 +epoch: 1, batch: 44088, sum loss: 3456.854004, avg loss: 2.405605, ppl: 11.085133 +epoch: 1, batch: 44089, sum loss: 4266.204590, avg loss: 2.513969, ppl: 12.353862 +epoch: 1, batch: 44090, sum loss: 4549.399902, avg loss: 2.616101, ppl: 13.682276 +epoch: 1, batch: 44091, sum loss: 4465.284180, avg loss: 2.677029, ppl: 14.541823 +epoch: 1, batch: 44092, sum loss: 4891.934082, avg loss: 2.621615, ppl: 13.757927 +epoch: 1, batch: 44093, sum loss: 4924.043945, avg loss: 2.608074, ppl: 13.572886 +epoch: 1, batch: 44094, sum loss: 3372.061768, avg loss: 2.349869, ppl: 10.484194 +epoch: 1, batch: 44095, sum loss: 4653.407227, avg loss: 2.654539, ppl: 14.218434 +epoch: 1, batch: 44096, sum loss: 4386.666504, avg loss: 2.671539, ppl: 14.462207 +epoch: 1, batch: 44097, sum loss: 3506.860352, avg loss: 2.440404, ppl: 11.477676 +epoch: 1, batch: 44098, sum loss: 3851.472900, avg loss: 2.390734, ppl: 10.921513 +epoch: 1, batch: 44099, sum loss: 4552.665527, avg loss: 2.482369, ppl: 11.969592 +epoch: 1, batch: 44100, sum loss: 4714.175781, avg loss: 2.681556, ppl: 14.607809 +epoch: 1, batch: 44101, sum loss: 4437.354004, avg loss: 2.649167, ppl: 14.142251 +epoch: 1, batch: 44102, sum loss: 5113.540039, avg loss: 2.788190, ppl: 16.251572 +epoch: 1, batch: 44103, sum loss: 4145.957520, avg loss: 2.692180, ppl: 14.763828 +epoch: 1, batch: 44104, sum loss: 4082.806641, avg loss: 2.668501, ppl: 14.418342 +epoch: 1, batch: 44105, sum loss: 4658.701172, avg loss: 2.710123, ppl: 15.031125 +epoch: 1, batch: 44106, sum loss: 4227.403809, avg loss: 2.557413, ppl: 12.902397 +epoch: 1, batch: 44107, sum loss: 4252.680176, avg loss: 2.535886, ppl: 12.627608 +epoch: 1, batch: 44108, sum loss: 4293.987305, avg loss: 2.522907, ppl: 12.464776 +epoch: 1, batch: 44109, sum loss: 5191.965820, avg loss: 3.004610, ppl: 20.178341 +epoch: 1, batch: 44110, sum loss: 4526.809570, avg loss: 2.839906, ppl: 17.114149 +epoch: 1, batch: 44111, sum loss: 3616.597900, avg loss: 2.337814, ppl: 10.358566 +epoch: 1, batch: 44112, sum loss: 4325.453613, avg loss: 2.500262, ppl: 12.185689 +epoch: 1, batch: 44113, sum loss: 4984.067871, avg loss: 2.792195, ppl: 16.316797 +epoch: 1, batch: 44114, sum loss: 3360.030029, avg loss: 2.285735, ppl: 9.832908 +epoch: 1, batch: 44115, sum loss: 3814.388184, avg loss: 2.488185, ppl: 12.039410 +epoch: 1, batch: 44116, sum loss: 5261.499512, avg loss: 2.737513, ppl: 15.448514 +epoch: 1, batch: 44117, sum loss: 5117.516602, avg loss: 2.571616, ppl: 13.086961 +epoch: 1, batch: 44118, sum loss: 3736.322266, avg loss: 2.467848, ppl: 11.797036 +epoch: 1, batch: 44119, sum loss: 5172.203125, avg loss: 2.918851, ppl: 18.519989 +epoch: 1, batch: 44120, sum loss: 4637.774902, avg loss: 2.480093, ppl: 11.942381 +epoch: 1, batch: 44121, sum loss: 5017.750488, avg loss: 2.643704, ppl: 14.065210 +epoch: 1, batch: 44122, sum loss: 4785.824219, avg loss: 2.606658, ppl: 13.553679 +epoch: 1, batch: 44123, sum loss: 5084.608887, avg loss: 2.851716, ppl: 17.317471 +epoch: 1, batch: 44124, sum loss: 3787.068848, avg loss: 2.639072, ppl: 14.000212 +epoch: 1, batch: 44125, sum loss: 4367.251953, avg loss: 2.599555, ppl: 13.457745 +epoch: 1, batch: 44126, sum loss: 4246.168945, avg loss: 2.443135, ppl: 11.509068 +epoch: 1, batch: 44127, sum loss: 3680.774902, avg loss: 2.505633, ppl: 12.251310 +epoch: 1, batch: 44128, sum loss: 3569.797119, avg loss: 2.664028, ppl: 14.353986 +epoch: 1, batch: 44129, sum loss: 4218.093750, avg loss: 2.554872, ppl: 12.869656 +epoch: 1, batch: 44130, sum loss: 4628.758789, avg loss: 2.540482, ppl: 12.685787 +epoch: 1, batch: 44131, sum loss: 4353.890625, avg loss: 2.522532, ppl: 12.460109 +epoch: 1, batch: 44132, sum loss: 3677.894531, avg loss: 2.280158, ppl: 9.778223 +epoch: 1, batch: 44133, sum loss: 3692.564209, avg loss: 2.367028, ppl: 10.665652 +epoch: 1, batch: 44134, sum loss: 3757.498047, avg loss: 2.384199, ppl: 10.850369 +epoch: 1, batch: 44135, sum loss: 3751.257812, avg loss: 2.109819, ppl: 8.246748 +epoch: 1, batch: 44136, sum loss: 4755.669434, avg loss: 2.755312, ppl: 15.725954 +epoch: 1, batch: 44137, sum loss: 4342.938965, avg loss: 2.538246, ppl: 12.657453 +epoch: 1, batch: 44138, sum loss: 3466.364502, avg loss: 2.495583, ppl: 12.128800 +epoch: 1, batch: 44139, sum loss: 5137.743652, avg loss: 2.718383, ppl: 15.155793 +epoch: 1, batch: 44140, sum loss: 4155.776367, avg loss: 2.478101, ppl: 11.918615 +epoch: 1, batch: 44141, sum loss: 4116.212891, avg loss: 2.316383, ppl: 10.138937 +epoch: 1, batch: 44142, sum loss: 3831.454102, avg loss: 2.520694, ppl: 12.437220 +epoch: 1, batch: 44143, sum loss: 3414.606934, avg loss: 2.389508, ppl: 10.908126 +epoch: 1, batch: 44144, sum loss: 5137.871582, avg loss: 2.680163, ppl: 14.587466 +epoch: 1, batch: 44145, sum loss: 4274.362305, avg loss: 2.731222, ppl: 15.351634 +epoch: 1, batch: 44146, sum loss: 3335.288330, avg loss: 2.396040, ppl: 10.979616 +epoch: 1, batch: 44147, sum loss: 4776.322266, avg loss: 2.723103, ppl: 15.227497 +epoch: 1, batch: 44148, sum loss: 4324.281250, avg loss: 2.777316, ppl: 16.075817 +epoch: 1, batch: 44149, sum loss: 4597.890137, avg loss: 2.538868, ppl: 12.665328 +epoch: 1, batch: 44150, sum loss: 4369.036133, avg loss: 2.498019, ppl: 12.158390 +epoch: 1, batch: 44151, sum loss: 4299.166016, avg loss: 2.759413, ppl: 15.790579 +epoch: 1, batch: 44152, sum loss: 4670.003418, avg loss: 2.553310, ppl: 12.849565 +epoch: 1, batch: 44153, sum loss: 3709.245117, avg loss: 2.277007, ppl: 9.747466 +epoch: 1, batch: 44154, sum loss: 4725.350098, avg loss: 2.656183, ppl: 14.241828 +epoch: 1, batch: 44155, sum loss: 3628.660400, avg loss: 2.234397, ppl: 9.340845 +epoch: 1, batch: 44156, sum loss: 4626.459961, avg loss: 2.537828, ppl: 12.652161 +epoch: 1, batch: 44157, sum loss: 4508.081055, avg loss: 2.817551, ppl: 16.735809 +epoch: 1, batch: 44158, sum loss: 5497.955566, avg loss: 3.092214, ppl: 22.025782 +epoch: 1, batch: 44159, sum loss: 4398.395996, avg loss: 2.504781, ppl: 12.240884 +epoch: 1, batch: 44160, sum loss: 4203.575195, avg loss: 2.662175, ppl: 14.327423 +epoch: 1, batch: 44161, sum loss: 4968.537598, avg loss: 2.777271, ppl: 16.075089 +epoch: 1, batch: 44162, sum loss: 4587.326660, avg loss: 2.730552, ppl: 15.341349 +epoch: 1, batch: 44163, sum loss: 4470.475586, avg loss: 2.675330, ppl: 14.517135 +epoch: 1, batch: 44164, sum loss: 4671.192383, avg loss: 2.595107, ppl: 13.398019 +epoch: 1, batch: 44165, sum loss: 3891.005127, avg loss: 2.464221, ppl: 11.754322 +epoch: 1, batch: 44166, sum loss: 3963.876465, avg loss: 2.472786, ppl: 11.855433 +epoch: 1, batch: 44167, sum loss: 3993.488525, avg loss: 2.421764, ppl: 11.265714 +epoch: 1, batch: 44168, sum loss: 3706.411865, avg loss: 2.604646, ppl: 13.526443 +epoch: 1, batch: 44169, sum loss: 4730.183594, avg loss: 2.636669, ppl: 13.966599 +epoch: 1, batch: 44170, sum loss: 5122.584473, avg loss: 2.790079, ppl: 16.282301 +epoch: 1, batch: 44171, sum loss: 3735.074463, avg loss: 2.457286, ppl: 11.673086 +epoch: 1, batch: 44172, sum loss: 4676.383789, avg loss: 2.763820, ppl: 15.860316 +epoch: 1, batch: 44173, sum loss: 4651.954102, avg loss: 2.499707, ppl: 12.178922 +epoch: 1, batch: 44174, sum loss: 4718.011719, avg loss: 2.825157, ppl: 16.863586 +epoch: 1, batch: 44175, sum loss: 3671.532715, avg loss: 2.495943, ppl: 12.133173 +epoch: 1, batch: 44176, sum loss: 3372.673828, avg loss: 2.152313, ppl: 8.604736 +epoch: 1, batch: 44177, sum loss: 4435.787598, avg loss: 2.598587, ppl: 13.444724 +epoch: 1, batch: 44178, sum loss: 4666.193359, avg loss: 2.795802, ppl: 16.375759 +epoch: 1, batch: 44179, sum loss: 4560.916504, avg loss: 2.689220, ppl: 14.720185 +epoch: 1, batch: 44180, sum loss: 5047.775391, avg loss: 2.784212, ppl: 16.187052 +epoch: 1, batch: 44181, sum loss: 3159.468018, avg loss: 2.357812, ppl: 10.567803 +epoch: 1, batch: 44182, sum loss: 4350.771973, avg loss: 2.411736, ppl: 11.153307 +epoch: 1, batch: 44183, sum loss: 4395.725586, avg loss: 2.546770, ppl: 12.765808 +epoch: 1, batch: 44184, sum loss: 4258.685059, avg loss: 2.568568, ppl: 13.047121 +epoch: 1, batch: 44185, sum loss: 3705.835693, avg loss: 2.343982, ppl: 10.422657 +epoch: 1, batch: 44186, sum loss: 3390.686768, avg loss: 2.394553, ppl: 10.963293 +epoch: 1, batch: 44187, sum loss: 3982.692383, avg loss: 2.551373, ppl: 12.824694 +epoch: 1, batch: 44188, sum loss: 3873.767090, avg loss: 2.413562, ppl: 11.173692 +epoch: 1, batch: 44189, sum loss: 4233.757812, avg loss: 2.526109, ppl: 12.504749 +epoch: 1, batch: 44190, sum loss: 4943.209961, avg loss: 2.947650, ppl: 19.061115 +epoch: 1, batch: 44191, sum loss: 4763.912109, avg loss: 2.591900, ppl: 13.355121 +epoch: 1, batch: 44192, sum loss: 4010.868896, avg loss: 2.500542, ppl: 12.189095 +epoch: 1, batch: 44193, sum loss: 3833.067871, avg loss: 2.605757, ppl: 13.541466 +epoch: 1, batch: 44194, sum loss: 3984.499756, avg loss: 2.565679, ppl: 13.009493 +epoch: 1, batch: 44195, sum loss: 3992.424316, avg loss: 2.803669, ppl: 16.505089 +epoch: 1, batch: 44196, sum loss: 4193.433105, avg loss: 2.629112, ppl: 13.861452 +epoch: 1, batch: 44197, sum loss: 4800.309570, avg loss: 2.650640, ppl: 14.163107 +epoch: 1, batch: 44198, sum loss: 4316.976562, avg loss: 2.580381, ppl: 13.202163 +epoch: 1, batch: 44199, sum loss: 3578.923584, avg loss: 2.292712, ppl: 9.901757 +epoch: 1, batch: 44200, sum loss: 4272.428223, avg loss: 2.492665, ppl: 12.093466 +epoch: 1, batch: 44201, sum loss: 4264.249023, avg loss: 2.523224, ppl: 12.468736 +epoch: 1, batch: 44202, sum loss: 4282.336914, avg loss: 2.663145, ppl: 14.341319 +epoch: 1, batch: 44203, sum loss: 4719.292480, avg loss: 2.566228, ppl: 13.016629 +epoch: 1, batch: 44204, sum loss: 4408.758789, avg loss: 2.570705, ppl: 13.075035 +epoch: 1, batch: 44205, sum loss: 4281.823730, avg loss: 2.601351, ppl: 13.481940 +epoch: 1, batch: 44206, sum loss: 4083.305420, avg loss: 2.417588, ppl: 11.218761 +epoch: 1, batch: 44207, sum loss: 4044.098877, avg loss: 2.381684, ppl: 10.823112 +epoch: 1, batch: 44208, sum loss: 3750.583496, avg loss: 2.630143, ppl: 13.875750 +epoch: 1, batch: 44209, sum loss: 5208.724609, avg loss: 2.892129, ppl: 18.031662 +epoch: 1, batch: 44210, sum loss: 4159.935059, avg loss: 2.438414, ppl: 11.454863 +epoch: 1, batch: 44211, sum loss: 4350.733887, avg loss: 2.535393, ppl: 12.621387 +epoch: 1, batch: 44212, sum loss: 3939.853516, avg loss: 2.575068, ppl: 13.132207 +epoch: 1, batch: 44213, sum loss: 4971.302734, avg loss: 2.648536, ppl: 14.133339 +epoch: 1, batch: 44214, sum loss: 4400.090820, avg loss: 2.743199, ppl: 15.536605 +epoch: 1, batch: 44215, sum loss: 4659.296387, avg loss: 2.424192, ppl: 11.293097 +epoch: 1, batch: 44216, sum loss: 3607.862793, avg loss: 2.353466, ppl: 10.521971 +epoch: 1, batch: 44217, sum loss: 4925.244141, avg loss: 2.712139, ppl: 15.061452 +epoch: 1, batch: 44218, sum loss: 4301.824707, avg loss: 2.582128, ppl: 13.225245 +epoch: 1, batch: 44219, sum loss: 4365.576660, avg loss: 2.773556, ppl: 16.015476 +epoch: 1, batch: 44220, sum loss: 4222.374512, avg loss: 2.548204, ppl: 12.784125 +epoch: 1, batch: 44221, sum loss: 4301.213379, avg loss: 2.762501, ppl: 15.839400 +epoch: 1, batch: 44222, sum loss: 3820.512451, avg loss: 2.456921, ppl: 11.668832 +epoch: 1, batch: 44223, sum loss: 4668.048340, avg loss: 2.858572, ppl: 17.436609 +epoch: 1, batch: 44224, sum loss: 4752.352539, avg loss: 2.492057, ppl: 12.086110 +epoch: 1, batch: 44225, sum loss: 4341.491211, avg loss: 2.513892, ppl: 12.352913 +epoch: 1, batch: 44226, sum loss: 4160.056152, avg loss: 2.685640, ppl: 14.667583 +epoch: 1, batch: 44227, sum loss: 3876.296387, avg loss: 2.521989, ppl: 12.453337 +epoch: 1, batch: 44228, sum loss: 4427.958008, avg loss: 2.562476, ppl: 12.967882 +epoch: 1, batch: 44229, sum loss: 4781.903320, avg loss: 2.689485, ppl: 14.724085 +epoch: 1, batch: 44230, sum loss: 4553.968262, avg loss: 2.674086, ppl: 14.499089 +epoch: 1, batch: 44231, sum loss: 5201.265625, avg loss: 2.867291, ppl: 17.589304 +epoch: 1, batch: 44232, sum loss: 4534.549805, avg loss: 2.672098, ppl: 14.470291 +epoch: 1, batch: 44233, sum loss: 4426.452637, avg loss: 2.846593, ppl: 17.228989 +epoch: 1, batch: 44234, sum loss: 3626.432617, avg loss: 2.416011, ppl: 11.201090 +epoch: 1, batch: 44235, sum loss: 4520.482422, avg loss: 2.646652, ppl: 14.106736 +epoch: 1, batch: 44236, sum loss: 3360.153320, avg loss: 2.210627, ppl: 9.121437 +epoch: 1, batch: 44237, sum loss: 4385.813477, avg loss: 2.587501, ppl: 13.296496 +epoch: 1, batch: 44238, sum loss: 4694.006836, avg loss: 2.560833, ppl: 12.946597 +epoch: 1, batch: 44239, sum loss: 3066.927002, avg loss: 2.161330, ppl: 8.682678 +epoch: 1, batch: 44240, sum loss: 4520.660156, avg loss: 2.536846, ppl: 12.639748 +epoch: 1, batch: 44241, sum loss: 3281.761230, avg loss: 2.180572, ppl: 8.851370 +epoch: 1, batch: 44242, sum loss: 3575.337158, avg loss: 2.557466, ppl: 12.903077 +epoch: 1, batch: 44243, sum loss: 3841.480469, avg loss: 2.431317, ppl: 11.373850 +epoch: 1, batch: 44244, sum loss: 5337.504395, avg loss: 2.765546, ppl: 15.887717 +epoch: 1, batch: 44245, sum loss: 4309.791016, avg loss: 2.648919, ppl: 14.138744 +epoch: 1, batch: 44246, sum loss: 4484.003906, avg loss: 2.613056, ppl: 13.640673 +epoch: 1, batch: 44247, sum loss: 4307.774902, avg loss: 2.628294, ppl: 13.850128 +epoch: 1, batch: 44248, sum loss: 4190.859375, avg loss: 2.460869, ppl: 11.714983 +epoch: 1, batch: 44249, sum loss: 4229.543945, avg loss: 2.604399, ppl: 13.523095 +epoch: 1, batch: 44250, sum loss: 4972.956055, avg loss: 2.554163, ppl: 12.860537 +epoch: 1, batch: 44251, sum loss: 4483.034668, avg loss: 2.713701, ppl: 15.085010 +epoch: 1, batch: 44252, sum loss: 4277.387695, avg loss: 2.573639, ppl: 13.113457 +epoch: 1, batch: 44253, sum loss: 5109.613770, avg loss: 2.741209, ppl: 15.505721 +epoch: 1, batch: 44254, sum loss: 4402.229004, avg loss: 2.614150, ppl: 13.655608 +epoch: 1, batch: 44255, sum loss: 4225.867676, avg loss: 2.763812, ppl: 15.860180 +epoch: 1, batch: 44256, sum loss: 3816.468262, avg loss: 2.502602, ppl: 12.214235 +epoch: 1, batch: 44257, sum loss: 3727.711426, avg loss: 2.522132, ppl: 12.455128 +epoch: 1, batch: 44258, sum loss: 4498.546387, avg loss: 2.669760, ppl: 14.436511 +epoch: 1, batch: 44259, sum loss: 4517.576660, avg loss: 2.793801, ppl: 16.343027 +epoch: 1, batch: 44260, sum loss: 3563.898682, avg loss: 2.431036, ppl: 11.370656 +epoch: 1, batch: 44261, sum loss: 4472.551270, avg loss: 2.626278, ppl: 13.822227 +epoch: 1, batch: 44262, sum loss: 5203.839844, avg loss: 2.707513, ppl: 14.991945 +epoch: 1, batch: 44263, sum loss: 4690.352539, avg loss: 2.494868, ppl: 12.120137 +epoch: 1, batch: 44264, sum loss: 4334.277344, avg loss: 2.724247, ppl: 15.244933 +epoch: 1, batch: 44265, sum loss: 4096.900391, avg loss: 2.681218, ppl: 14.602864 +epoch: 1, batch: 44266, sum loss: 4174.654297, avg loss: 2.345311, ppl: 10.436522 +epoch: 1, batch: 44267, sum loss: 3799.604248, avg loss: 2.465674, ppl: 11.771419 +epoch: 1, batch: 44268, sum loss: 4143.486816, avg loss: 2.744031, ppl: 15.549539 +epoch: 1, batch: 44269, sum loss: 3746.387207, avg loss: 2.399992, ppl: 11.023086 +epoch: 1, batch: 44270, sum loss: 4669.299316, avg loss: 2.646995, ppl: 14.111570 +epoch: 1, batch: 44271, sum loss: 4697.674805, avg loss: 2.622934, ppl: 13.776081 +epoch: 1, batch: 44272, sum loss: 4272.705078, avg loss: 2.594235, ppl: 13.386342 +epoch: 1, batch: 44273, sum loss: 3949.150879, avg loss: 2.380440, ppl: 10.809663 +epoch: 1, batch: 44274, sum loss: 4114.696289, avg loss: 2.481723, ppl: 11.961855 +epoch: 1, batch: 44275, sum loss: 4790.943359, avg loss: 2.680998, ppl: 14.599658 +epoch: 1, batch: 44276, sum loss: 5861.121094, avg loss: 2.828726, ppl: 16.923891 +epoch: 1, batch: 44277, sum loss: 4433.138184, avg loss: 2.852727, ppl: 17.334997 +epoch: 1, batch: 44278, sum loss: 4551.898438, avg loss: 2.665046, ppl: 14.368607 +epoch: 1, batch: 44279, sum loss: 4576.621094, avg loss: 2.579831, ppl: 13.194913 +epoch: 1, batch: 44280, sum loss: 3890.499023, avg loss: 2.360740, ppl: 10.598788 +epoch: 1, batch: 44281, sum loss: 4581.425293, avg loss: 2.748305, ppl: 15.616145 +epoch: 1, batch: 44282, sum loss: 3692.336914, avg loss: 2.276410, ppl: 9.741644 +epoch: 1, batch: 44283, sum loss: 4219.458984, avg loss: 2.479118, ppl: 11.930738 +epoch: 1, batch: 44284, sum loss: 3975.737061, avg loss: 2.666491, ppl: 14.389381 +epoch: 1, batch: 44285, sum loss: 4490.263184, avg loss: 2.737965, ppl: 15.455506 +epoch: 1, batch: 44286, sum loss: 4685.135254, avg loss: 2.544886, ppl: 12.741776 +epoch: 1, batch: 44287, sum loss: 5277.566406, avg loss: 2.764571, ppl: 15.872232 +epoch: 1, batch: 44288, sum loss: 3794.173584, avg loss: 2.507716, ppl: 12.276854 +epoch: 1, batch: 44289, sum loss: 5144.857910, avg loss: 2.794600, ppl: 16.356081 +epoch: 1, batch: 44290, sum loss: 4224.196289, avg loss: 2.238578, ppl: 9.379982 +epoch: 1, batch: 44291, sum loss: 3877.932617, avg loss: 2.495452, ppl: 12.127215 +epoch: 1, batch: 44292, sum loss: 3905.670410, avg loss: 2.419870, ppl: 11.244399 +epoch: 1, batch: 44293, sum loss: 3813.841309, avg loss: 2.449481, ppl: 11.582332 +epoch: 1, batch: 44294, sum loss: 3313.274170, avg loss: 2.315356, ppl: 10.128526 +epoch: 1, batch: 44295, sum loss: 4911.763184, avg loss: 2.597442, ppl: 13.429344 +epoch: 1, batch: 44296, sum loss: 5176.554199, avg loss: 2.721637, ppl: 15.205200 +epoch: 1, batch: 44297, sum loss: 3880.868408, avg loss: 2.482961, ppl: 11.976677 +epoch: 1, batch: 44298, sum loss: 4320.463867, avg loss: 2.558001, ppl: 12.909988 +epoch: 1, batch: 44299, sum loss: 3812.474609, avg loss: 2.480465, ppl: 11.946817 +epoch: 1, batch: 44300, sum loss: 3930.530273, avg loss: 2.373509, ppl: 10.734992 +epoch: 1, batch: 44301, sum loss: 4694.979980, avg loss: 2.823199, ppl: 16.830606 +epoch: 1, batch: 44302, sum loss: 4668.098145, avg loss: 2.808723, ppl: 16.588728 +epoch: 1, batch: 44303, sum loss: 4888.033691, avg loss: 2.897471, ppl: 18.128244 +epoch: 1, batch: 44304, sum loss: 3516.311279, avg loss: 2.508068, ppl: 12.281181 +epoch: 1, batch: 44305, sum loss: 3597.256592, avg loss: 2.430579, ppl: 11.365460 +epoch: 1, batch: 44306, sum loss: 4914.002930, avg loss: 2.757577, ppl: 15.761613 +epoch: 1, batch: 44307, sum loss: 4316.208008, avg loss: 2.601693, ppl: 13.486547 +epoch: 1, batch: 44308, sum loss: 4211.086914, avg loss: 2.438383, ppl: 11.454502 +epoch: 1, batch: 44309, sum loss: 4760.446289, avg loss: 2.643224, ppl: 14.058452 +epoch: 1, batch: 44310, sum loss: 4862.001465, avg loss: 2.776700, ppl: 16.065916 +epoch: 1, batch: 44311, sum loss: 3515.609619, avg loss: 2.424558, ppl: 11.297239 +epoch: 1, batch: 44312, sum loss: 5075.651367, avg loss: 2.539095, ppl: 12.668206 +epoch: 1, batch: 44313, sum loss: 4721.405273, avg loss: 2.664450, ppl: 14.360048 +epoch: 1, batch: 44314, sum loss: 3197.730713, avg loss: 2.302182, ppl: 9.995970 +epoch: 1, batch: 44315, sum loss: 4385.689453, avg loss: 2.717280, ppl: 15.139083 +epoch: 1, batch: 44316, sum loss: 3612.496582, avg loss: 2.505199, ppl: 12.245993 +epoch: 1, batch: 44317, sum loss: 4509.210938, avg loss: 2.540401, ppl: 12.684751 +epoch: 1, batch: 44318, sum loss: 3853.000977, avg loss: 2.391683, ppl: 10.931875 +epoch: 1, batch: 44319, sum loss: 4330.735352, avg loss: 2.688228, ppl: 14.705596 +epoch: 1, batch: 44320, sum loss: 4219.146973, avg loss: 2.444465, ppl: 11.524384 +epoch: 1, batch: 44321, sum loss: 4425.626953, avg loss: 2.747130, ppl: 15.597808 +epoch: 1, batch: 44322, sum loss: 4608.804199, avg loss: 2.580518, ppl: 13.203970 +epoch: 1, batch: 44323, sum loss: 4682.818359, avg loss: 2.580065, ppl: 13.197999 +epoch: 1, batch: 44324, sum loss: 3927.510498, avg loss: 2.304877, ppl: 10.022945 +epoch: 1, batch: 44325, sum loss: 5817.984375, avg loss: 3.005157, ppl: 20.189386 +epoch: 1, batch: 44326, sum loss: 3575.809082, avg loss: 2.312943, ppl: 10.104113 +epoch: 1, batch: 44327, sum loss: 4253.264648, avg loss: 2.554513, ppl: 12.865039 +epoch: 1, batch: 44328, sum loss: 4224.370117, avg loss: 2.402941, ppl: 11.055640 +epoch: 1, batch: 44329, sum loss: 3707.050781, avg loss: 2.499697, ppl: 12.178805 +epoch: 1, batch: 44330, sum loss: 4227.837402, avg loss: 2.634166, ppl: 13.931695 +epoch: 1, batch: 44331, sum loss: 4361.363281, avg loss: 2.763855, ppl: 15.860868 +epoch: 1, batch: 44332, sum loss: 4780.684570, avg loss: 2.534827, ppl: 12.614254 +epoch: 1, batch: 44333, sum loss: 3644.356445, avg loss: 2.642753, ppl: 14.051833 +epoch: 1, batch: 44334, sum loss: 4570.661133, avg loss: 2.717396, ppl: 15.140852 +epoch: 1, batch: 44335, sum loss: 4004.104980, avg loss: 2.676541, ppl: 14.534728 +epoch: 1, batch: 44336, sum loss: 5875.218750, avg loss: 2.842389, ppl: 17.156710 +epoch: 1, batch: 44337, sum loss: 4362.390625, avg loss: 2.590493, ppl: 13.336350 +epoch: 1, batch: 44338, sum loss: 4346.671387, avg loss: 2.524199, ppl: 12.480897 +epoch: 1, batch: 44339, sum loss: 5370.413574, avg loss: 3.012010, ppl: 20.328220 +epoch: 1, batch: 44340, sum loss: 3668.848633, avg loss: 2.569222, ppl: 13.055659 +epoch: 1, batch: 44341, sum loss: 4126.518555, avg loss: 2.601840, ppl: 13.488538 +epoch: 1, batch: 44342, sum loss: 4501.299805, avg loss: 2.550312, ppl: 12.811094 +epoch: 1, batch: 44343, sum loss: 3470.131836, avg loss: 2.386611, ppl: 10.876568 +epoch: 1, batch: 44344, sum loss: 3785.221191, avg loss: 2.480486, ppl: 11.947065 +epoch: 1, batch: 44345, sum loss: 4143.731445, avg loss: 2.744193, ppl: 15.552059 +epoch: 1, batch: 44346, sum loss: 4207.181152, avg loss: 2.637731, ppl: 13.981444 +epoch: 1, batch: 44347, sum loss: 4029.075684, avg loss: 2.631663, ppl: 13.896859 +epoch: 1, batch: 44348, sum loss: 4149.110352, avg loss: 2.687248, ppl: 14.691187 +epoch: 1, batch: 44349, sum loss: 3932.768066, avg loss: 2.383496, ppl: 10.842741 +epoch: 1, batch: 44350, sum loss: 4191.156250, avg loss: 2.401809, ppl: 11.043133 +epoch: 1, batch: 44351, sum loss: 5222.273926, avg loss: 2.548694, ppl: 12.790387 +epoch: 1, batch: 44352, sum loss: 4496.117188, avg loss: 2.586949, ppl: 13.289163 +epoch: 1, batch: 44353, sum loss: 4421.407715, avg loss: 2.535211, ppl: 12.619091 +epoch: 1, batch: 44354, sum loss: 3912.572754, avg loss: 2.512892, ppl: 12.340568 +epoch: 1, batch: 44355, sum loss: 4798.780762, avg loss: 2.798123, ppl: 16.413807 +epoch: 1, batch: 44356, sum loss: 4499.967773, avg loss: 2.665858, ppl: 14.380280 +epoch: 1, batch: 44357, sum loss: 4311.824219, avg loss: 2.569621, ppl: 13.060875 +epoch: 1, batch: 44358, sum loss: 3747.303223, avg loss: 2.635234, ppl: 13.946581 +epoch: 1, batch: 44359, sum loss: 4758.951660, avg loss: 2.711653, ppl: 15.054143 +epoch: 1, batch: 44360, sum loss: 4658.592773, avg loss: 2.565304, ppl: 13.004615 +epoch: 1, batch: 44361, sum loss: 3370.312500, avg loss: 2.138523, ppl: 8.486894 +epoch: 1, batch: 44362, sum loss: 4222.135254, avg loss: 2.531256, ppl: 12.569285 +epoch: 1, batch: 44363, sum loss: 4726.481445, avg loss: 2.650859, ppl: 14.166203 +epoch: 1, batch: 44364, sum loss: 4856.794434, avg loss: 2.608375, ppl: 13.576971 +epoch: 1, batch: 44365, sum loss: 3925.185547, avg loss: 2.550478, ppl: 12.813227 +epoch: 1, batch: 44366, sum loss: 4234.082031, avg loss: 2.536898, ppl: 12.640395 +epoch: 1, batch: 44367, sum loss: 4882.372070, avg loss: 2.759962, ppl: 15.799236 +epoch: 1, batch: 44368, sum loss: 4399.821289, avg loss: 2.726036, ppl: 15.272222 +epoch: 1, batch: 44369, sum loss: 4040.653076, avg loss: 2.538099, ppl: 12.655584 +epoch: 1, batch: 44370, sum loss: 4549.130859, avg loss: 2.609943, ppl: 13.598278 +epoch: 1, batch: 44371, sum loss: 4046.755371, avg loss: 2.387466, ppl: 10.885877 +epoch: 1, batch: 44372, sum loss: 3686.410400, avg loss: 2.386026, ppl: 10.870214 +epoch: 1, batch: 44373, sum loss: 4354.239258, avg loss: 2.455860, ppl: 11.656452 +epoch: 1, batch: 44374, sum loss: 3623.082764, avg loss: 2.374235, ppl: 10.742793 +epoch: 1, batch: 44375, sum loss: 4360.357910, avg loss: 2.744089, ppl: 15.550447 +epoch: 1, batch: 44376, sum loss: 4342.921387, avg loss: 2.591242, ppl: 13.346338 +epoch: 1, batch: 44377, sum loss: 4011.787598, avg loss: 2.468792, ppl: 11.808179 +epoch: 1, batch: 44378, sum loss: 4697.132812, avg loss: 2.469576, ppl: 11.817431 +epoch: 1, batch: 44379, sum loss: 5223.608398, avg loss: 2.874853, ppl: 17.722826 +epoch: 1, batch: 44380, sum loss: 4415.786133, avg loss: 2.633146, ppl: 13.917486 +epoch: 1, batch: 44381, sum loss: 5027.153320, avg loss: 2.648659, ppl: 14.135064 +epoch: 1, batch: 44382, sum loss: 4832.643555, avg loss: 2.582920, ppl: 13.235734 +epoch: 1, batch: 44383, sum loss: 4588.533203, avg loss: 2.608603, ppl: 13.580072 +epoch: 1, batch: 44384, sum loss: 5039.781738, avg loss: 2.711018, ppl: 15.044578 +epoch: 1, batch: 44385, sum loss: 4221.660156, avg loss: 2.563242, ppl: 12.977826 +epoch: 1, batch: 44386, sum loss: 4096.345703, avg loss: 2.426745, ppl: 11.321971 +epoch: 1, batch: 44387, sum loss: 4407.431152, avg loss: 2.432357, ppl: 11.385688 +epoch: 1, batch: 44388, sum loss: 5080.751953, avg loss: 2.755289, ppl: 15.725578 +epoch: 1, batch: 44389, sum loss: 3932.380127, avg loss: 2.430396, ppl: 11.363377 +epoch: 1, batch: 44390, sum loss: 4576.303223, avg loss: 2.601650, ppl: 13.485978 +epoch: 1, batch: 44391, sum loss: 4365.617188, avg loss: 2.493214, ppl: 12.100102 +epoch: 1, batch: 44392, sum loss: 3964.655762, avg loss: 2.622127, ppl: 13.764972 +epoch: 1, batch: 44393, sum loss: 4353.817383, avg loss: 2.428231, ppl: 11.338800 +epoch: 1, batch: 44394, sum loss: 2946.719971, avg loss: 2.302125, ppl: 9.995399 +epoch: 1, batch: 44395, sum loss: 4282.288086, avg loss: 2.556590, ppl: 12.891783 +epoch: 1, batch: 44396, sum loss: 3942.280518, avg loss: 2.374868, ppl: 10.749593 +epoch: 1, batch: 44397, sum loss: 4860.562012, avg loss: 2.637310, ppl: 13.975555 +epoch: 1, batch: 44398, sum loss: 3977.078125, avg loss: 2.400168, ppl: 11.025025 +epoch: 1, batch: 44399, sum loss: 4268.580566, avg loss: 2.588587, ppl: 13.310954 +epoch: 1, batch: 44400, sum loss: 3721.581299, avg loss: 2.358417, ppl: 10.574195 +epoch: 1, batch: 44401, sum loss: 3774.913574, avg loss: 2.493338, ppl: 12.101603 +epoch: 1, batch: 44402, sum loss: 4151.690918, avg loss: 2.843624, ppl: 17.177904 +epoch: 1, batch: 44403, sum loss: 4534.862305, avg loss: 2.659743, ppl: 14.292620 +epoch: 1, batch: 44404, sum loss: 3537.340576, avg loss: 2.537547, ppl: 12.648607 +epoch: 1, batch: 44405, sum loss: 4029.729004, avg loss: 2.537613, ppl: 12.649440 +epoch: 1, batch: 44406, sum loss: 4279.686523, avg loss: 2.265583, ppl: 9.636742 +epoch: 1, batch: 44407, sum loss: 4478.318359, avg loss: 2.659334, ppl: 14.286770 +epoch: 1, batch: 44408, sum loss: 4415.678223, avg loss: 2.636226, ppl: 13.960417 +epoch: 1, batch: 44409, sum loss: 4214.964355, avg loss: 2.568534, ppl: 13.046682 +epoch: 1, batch: 44410, sum loss: 4583.620605, avg loss: 2.452446, ppl: 11.616721 +epoch: 1, batch: 44411, sum loss: 4420.592773, avg loss: 2.870515, ppl: 17.646097 +epoch: 1, batch: 44412, sum loss: 4688.261719, avg loss: 2.706849, ppl: 14.981987 +epoch: 1, batch: 44413, sum loss: 4639.983398, avg loss: 2.628886, ppl: 13.858319 +epoch: 1, batch: 44414, sum loss: 4366.796875, avg loss: 2.741241, ppl: 15.506216 +epoch: 1, batch: 44415, sum loss: 5714.168945, avg loss: 2.842870, ppl: 17.164963 +epoch: 1, batch: 44416, sum loss: 4153.504395, avg loss: 2.427531, ppl: 11.330866 +epoch: 1, batch: 44417, sum loss: 5175.763184, avg loss: 2.893104, ppl: 18.049250 +epoch: 1, batch: 44418, sum loss: 3478.419922, avg loss: 2.302065, ppl: 9.994800 +epoch: 1, batch: 44419, sum loss: 4477.321777, avg loss: 2.469565, ppl: 11.817309 +epoch: 1, batch: 44420, sum loss: 3696.516113, avg loss: 2.492594, ppl: 12.092598 +epoch: 1, batch: 44421, sum loss: 4455.003418, avg loss: 2.741540, ppl: 15.510860 +epoch: 1, batch: 44422, sum loss: 3999.660645, avg loss: 2.704301, ppl: 14.943862 +epoch: 1, batch: 44423, sum loss: 4472.430664, avg loss: 2.603277, ppl: 13.507937 +epoch: 1, batch: 44424, sum loss: 4281.773438, avg loss: 2.654540, ppl: 14.218449 +epoch: 1, batch: 44425, sum loss: 4192.411621, avg loss: 2.572032, ppl: 13.092398 +epoch: 1, batch: 44426, sum loss: 4343.237305, avg loss: 2.550345, ppl: 12.811522 +epoch: 1, batch: 44427, sum loss: 4615.772949, avg loss: 2.692983, ppl: 14.775688 +epoch: 1, batch: 44428, sum loss: 4672.467285, avg loss: 2.613237, ppl: 13.643138 +epoch: 1, batch: 44429, sum loss: 4175.540527, avg loss: 2.561681, ppl: 12.957584 +epoch: 1, batch: 44430, sum loss: 4487.940430, avg loss: 2.588201, ppl: 13.305810 +epoch: 1, batch: 44431, sum loss: 4419.675293, avg loss: 2.473238, ppl: 11.860785 +epoch: 1, batch: 44432, sum loss: 4218.536621, avg loss: 2.732213, ppl: 15.366857 +epoch: 1, batch: 44433, sum loss: 3610.226074, avg loss: 2.159226, ppl: 8.664431 +epoch: 1, batch: 44434, sum loss: 4568.631836, avg loss: 2.545199, ppl: 12.745763 +epoch: 1, batch: 44435, sum loss: 4152.783691, avg loss: 2.506206, ppl: 12.258334 +epoch: 1, batch: 44436, sum loss: 4463.279297, avg loss: 2.599464, ppl: 13.456519 +epoch: 1, batch: 44437, sum loss: 5116.858887, avg loss: 2.697343, ppl: 14.840243 +epoch: 1, batch: 44438, sum loss: 4658.404785, avg loss: 2.569446, ppl: 13.058583 +epoch: 1, batch: 44439, sum loss: 4016.507324, avg loss: 2.663466, ppl: 14.345933 +epoch: 1, batch: 44440, sum loss: 4557.081543, avg loss: 2.534528, ppl: 12.610477 +epoch: 1, batch: 44441, sum loss: 4131.967285, avg loss: 2.327869, ppl: 10.256060 +epoch: 1, batch: 44442, sum loss: 4852.737793, avg loss: 2.879963, ppl: 17.813618 +epoch: 1, batch: 44443, sum loss: 5036.015137, avg loss: 2.687308, ppl: 14.692069 +epoch: 1, batch: 44444, sum loss: 2874.936523, avg loss: 2.054994, ppl: 7.806792 +epoch: 1, batch: 44445, sum loss: 4181.272461, avg loss: 2.585821, ppl: 13.274185 +epoch: 1, batch: 44446, sum loss: 4203.545898, avg loss: 2.523137, ppl: 12.467642 +epoch: 1, batch: 44447, sum loss: 4859.826172, avg loss: 2.692425, ppl: 14.767437 +epoch: 1, batch: 44448, sum loss: 4963.267578, avg loss: 2.516870, ppl: 12.389756 +epoch: 1, batch: 44449, sum loss: 4395.049316, avg loss: 2.562711, ppl: 12.970931 +epoch: 1, batch: 44450, sum loss: 4564.053223, avg loss: 2.798316, ppl: 16.416977 +epoch: 1, batch: 44451, sum loss: 3604.196777, avg loss: 2.343431, ppl: 10.416916 +epoch: 1, batch: 44452, sum loss: 3493.174316, avg loss: 2.342840, ppl: 10.410761 +epoch: 1, batch: 44453, sum loss: 4295.216309, avg loss: 2.749818, ppl: 15.639791 +epoch: 1, batch: 44454, sum loss: 4008.734863, avg loss: 2.522804, ppl: 12.463490 +epoch: 1, batch: 44455, sum loss: 4888.276855, avg loss: 2.709688, ppl: 15.024583 +epoch: 1, batch: 44456, sum loss: 3809.806641, avg loss: 2.435938, ppl: 11.426528 +epoch: 1, batch: 44457, sum loss: 4925.351562, avg loss: 2.497643, ppl: 12.153811 +epoch: 1, batch: 44458, sum loss: 4268.200195, avg loss: 2.565024, ppl: 13.000972 +epoch: 1, batch: 44459, sum loss: 3856.817383, avg loss: 2.366146, ppl: 10.656240 +epoch: 1, batch: 44460, sum loss: 4387.032227, avg loss: 2.907245, ppl: 18.306293 +epoch: 1, batch: 44461, sum loss: 5834.520996, avg loss: 2.918720, ppl: 18.517570 +epoch: 1, batch: 44462, sum loss: 4518.710449, avg loss: 2.613482, ppl: 13.646485 +epoch: 1, batch: 44463, sum loss: 4709.626465, avg loss: 2.923418, ppl: 18.604774 +epoch: 1, batch: 44464, sum loss: 4910.644531, avg loss: 2.716064, ppl: 15.120697 +epoch: 1, batch: 44465, sum loss: 4965.695312, avg loss: 2.567578, ppl: 13.034212 +epoch: 1, batch: 44466, sum loss: 4607.785645, avg loss: 2.702514, ppl: 14.917182 +epoch: 1, batch: 44467, sum loss: 5102.122070, avg loss: 2.794152, ppl: 16.348766 +epoch: 1, batch: 44468, sum loss: 3916.730469, avg loss: 2.505906, ppl: 12.254652 +epoch: 1, batch: 44469, sum loss: 3921.680420, avg loss: 2.471128, ppl: 11.835793 +epoch: 1, batch: 44470, sum loss: 4424.480957, avg loss: 2.516769, ppl: 12.388510 +epoch: 1, batch: 44471, sum loss: 4173.132812, avg loss: 2.429065, ppl: 11.348261 +epoch: 1, batch: 44472, sum loss: 4239.240234, avg loss: 2.719205, ppl: 15.168257 +epoch: 1, batch: 44473, sum loss: 4277.753906, avg loss: 2.501611, ppl: 12.202130 +epoch: 1, batch: 44474, sum loss: 4086.574951, avg loss: 2.413807, ppl: 11.176428 +epoch: 1, batch: 44475, sum loss: 4310.250488, avg loss: 2.573284, ppl: 13.108805 +epoch: 1, batch: 44476, sum loss: 4960.879395, avg loss: 2.667140, ppl: 14.398723 +epoch: 1, batch: 44477, sum loss: 4476.469727, avg loss: 2.840400, ppl: 17.122604 +epoch: 1, batch: 44478, sum loss: 4775.305176, avg loss: 2.635378, ppl: 13.948586 +epoch: 1, batch: 44479, sum loss: 4130.387207, avg loss: 2.568649, ppl: 13.048182 +epoch: 1, batch: 44480, sum loss: 5109.889648, avg loss: 2.875571, ppl: 17.735548 +epoch: 1, batch: 44481, sum loss: 3826.883057, avg loss: 2.178078, ppl: 8.829319 +epoch: 1, batch: 44482, sum loss: 3826.603271, avg loss: 2.594307, ppl: 13.387312 +epoch: 1, batch: 44483, sum loss: 5281.359863, avg loss: 2.632782, ppl: 13.912416 +epoch: 1, batch: 44484, sum loss: 4363.611816, avg loss: 2.675421, ppl: 14.518464 +epoch: 1, batch: 44485, sum loss: 4601.705078, avg loss: 2.650752, ppl: 14.164684 +epoch: 1, batch: 44486, sum loss: 3546.535400, avg loss: 2.313461, ppl: 10.109351 +epoch: 1, batch: 44487, sum loss: 3602.635742, avg loss: 2.315319, ppl: 10.128149 +epoch: 1, batch: 44488, sum loss: 4744.434082, avg loss: 2.779399, ppl: 16.109335 +epoch: 1, batch: 44489, sum loss: 5097.609863, avg loss: 2.670304, ppl: 14.444357 +epoch: 1, batch: 44490, sum loss: 4681.892578, avg loss: 2.633235, ppl: 13.918731 +epoch: 1, batch: 44491, sum loss: 3542.122070, avg loss: 2.517500, ppl: 12.397560 +epoch: 1, batch: 44492, sum loss: 4044.435059, avg loss: 2.619453, ppl: 13.728211 +epoch: 1, batch: 44493, sum loss: 4863.491211, avg loss: 2.758645, ppl: 15.778450 +epoch: 1, batch: 44494, sum loss: 4207.545898, avg loss: 2.659637, ppl: 14.291101 +epoch: 1, batch: 44495, sum loss: 3690.151123, avg loss: 2.418186, ppl: 11.225474 +epoch: 1, batch: 44496, sum loss: 3549.033447, avg loss: 2.285276, ppl: 9.828398 +epoch: 1, batch: 44497, sum loss: 4471.153320, avg loss: 2.485355, ppl: 12.005381 +epoch: 1, batch: 44498, sum loss: 4673.317871, avg loss: 2.713890, ppl: 15.087847 +epoch: 1, batch: 44499, sum loss: 4956.020508, avg loss: 2.838500, ppl: 17.090107 +epoch: 1, batch: 44500, sum loss: 4524.509277, avg loss: 2.540432, ppl: 12.685149 +epoch: 1, batch: 44501, sum loss: 4657.685059, avg loss: 2.720610, ppl: 15.189590 +epoch: 1, batch: 44502, sum loss: 4276.214355, avg loss: 2.519867, ppl: 12.426946 +epoch: 1, batch: 44503, sum loss: 4314.764160, avg loss: 2.937212, ppl: 18.863178 +epoch: 1, batch: 44504, sum loss: 4412.235352, avg loss: 2.426972, ppl: 11.324541 +epoch: 1, batch: 44505, sum loss: 4726.830566, avg loss: 2.607188, ppl: 13.560858 +epoch: 1, batch: 44506, sum loss: 3965.993164, avg loss: 2.480296, ppl: 11.944798 +epoch: 1, batch: 44507, sum loss: 4159.323730, avg loss: 2.542374, ppl: 12.709810 +epoch: 1, batch: 44508, sum loss: 4856.741699, avg loss: 2.856907, ppl: 17.407604 +epoch: 1, batch: 44509, sum loss: 4048.050537, avg loss: 2.309213, ppl: 10.066501 +epoch: 1, batch: 44510, sum loss: 3799.378418, avg loss: 2.349647, ppl: 10.481864 +epoch: 1, batch: 44511, sum loss: 3973.834229, avg loss: 2.563764, ppl: 12.984601 +epoch: 1, batch: 44512, sum loss: 4562.077637, avg loss: 2.701052, ppl: 14.895400 +epoch: 1, batch: 44513, sum loss: 4193.195312, avg loss: 2.542872, ppl: 12.716136 +epoch: 1, batch: 44514, sum loss: 4036.998291, avg loss: 2.624836, ppl: 13.802317 +epoch: 1, batch: 44515, sum loss: 4429.199707, avg loss: 2.636429, ppl: 13.963246 +epoch: 1, batch: 44516, sum loss: 4085.604492, avg loss: 2.751249, ppl: 15.662175 +epoch: 1, batch: 44517, sum loss: 5103.349609, avg loss: 2.755588, ppl: 15.730292 +epoch: 1, batch: 44518, sum loss: 5300.531250, avg loss: 2.999735, ppl: 20.080212 +epoch: 1, batch: 44519, sum loss: 4561.402344, avg loss: 2.393181, ppl: 10.948260 +epoch: 1, batch: 44520, sum loss: 4879.082520, avg loss: 2.565238, ppl: 13.003753 +epoch: 1, batch: 44521, sum loss: 4704.250488, avg loss: 2.656268, ppl: 14.243033 +epoch: 1, batch: 44522, sum loss: 3817.741943, avg loss: 2.349380, ppl: 10.479066 +epoch: 1, batch: 44523, sum loss: 3737.310547, avg loss: 2.476680, ppl: 11.901689 +epoch: 1, batch: 44524, sum loss: 5130.751465, avg loss: 2.597849, ppl: 13.434807 +epoch: 1, batch: 44525, sum loss: 4304.826172, avg loss: 2.370499, ppl: 10.702730 +epoch: 1, batch: 44526, sum loss: 3644.438477, avg loss: 2.319821, ppl: 10.173852 +epoch: 1, batch: 44527, sum loss: 3719.220215, avg loss: 2.491105, ppl: 12.074615 +epoch: 1, batch: 44528, sum loss: 5497.586426, avg loss: 2.847015, ppl: 17.236261 +epoch: 1, batch: 44529, sum loss: 3882.044922, avg loss: 2.424763, ppl: 11.299548 +epoch: 1, batch: 44530, sum loss: 3818.628174, avg loss: 2.260881, ppl: 9.591537 +epoch: 1, batch: 44531, sum loss: 4547.250000, avg loss: 2.668574, ppl: 14.419394 +epoch: 1, batch: 44532, sum loss: 4527.354980, avg loss: 2.623033, ppl: 13.777448 +epoch: 1, batch: 44533, sum loss: 5131.695312, avg loss: 2.766412, ppl: 15.901484 +epoch: 1, batch: 44534, sum loss: 4219.015625, avg loss: 2.569437, ppl: 13.058474 +epoch: 1, batch: 44535, sum loss: 4509.544434, avg loss: 2.522117, ppl: 12.454931 +epoch: 1, batch: 44536, sum loss: 3274.819824, avg loss: 2.402656, ppl: 11.052491 +epoch: 1, batch: 44537, sum loss: 3723.055908, avg loss: 2.581870, ppl: 13.221834 +epoch: 1, batch: 44538, sum loss: 3492.108398, avg loss: 2.295929, ppl: 9.933664 +epoch: 1, batch: 44539, sum loss: 4863.967773, avg loss: 2.585841, ppl: 13.274454 +epoch: 1, batch: 44540, sum loss: 4794.318848, avg loss: 2.691925, ppl: 14.760066 +epoch: 1, batch: 44541, sum loss: 2984.436523, avg loss: 2.210694, ppl: 9.122042 +epoch: 1, batch: 44542, sum loss: 4028.252930, avg loss: 2.457750, ppl: 11.678509 +epoch: 1, batch: 44543, sum loss: 3702.122559, avg loss: 2.454988, ppl: 11.646299 +epoch: 1, batch: 44544, sum loss: 4481.406738, avg loss: 2.662749, ppl: 14.335645 +epoch: 1, batch: 44545, sum loss: 5219.219727, avg loss: 2.771758, ppl: 15.986708 +epoch: 1, batch: 44546, sum loss: 4730.042480, avg loss: 2.610399, ppl: 13.604475 +epoch: 1, batch: 44547, sum loss: 4372.693359, avg loss: 2.690888, ppl: 14.744766 +epoch: 1, batch: 44548, sum loss: 3790.337402, avg loss: 2.295783, ppl: 9.932210 +epoch: 1, batch: 44549, sum loss: 3940.973633, avg loss: 2.673659, ppl: 14.492907 +epoch: 1, batch: 44550, sum loss: 4893.874023, avg loss: 2.638207, ppl: 13.988100 +epoch: 1, batch: 44551, sum loss: 4045.793213, avg loss: 2.591796, ppl: 13.353732 +epoch: 1, batch: 44552, sum loss: 3514.852539, avg loss: 2.332351, ppl: 10.302131 +epoch: 1, batch: 44553, sum loss: 4320.898926, avg loss: 2.532766, ppl: 12.588279 +epoch: 1, batch: 44554, sum loss: 4774.927246, avg loss: 2.795625, ppl: 16.372854 +epoch: 1, batch: 44555, sum loss: 5305.414551, avg loss: 2.837120, ppl: 17.066544 +epoch: 1, batch: 44556, sum loss: 4112.727539, avg loss: 2.378674, ppl: 10.790588 +epoch: 1, batch: 44557, sum loss: 4132.053223, avg loss: 2.459556, ppl: 11.699612 +epoch: 1, batch: 44558, sum loss: 3827.190918, avg loss: 2.243371, ppl: 9.425050 +epoch: 1, batch: 44559, sum loss: 4742.312988, avg loss: 2.829542, ppl: 16.937708 +epoch: 1, batch: 44560, sum loss: 4247.381836, avg loss: 2.544866, ppl: 12.741525 +epoch: 1, batch: 44561, sum loss: 5151.682617, avg loss: 2.546556, ppl: 12.763069 +epoch: 1, batch: 44562, sum loss: 4683.730469, avg loss: 2.735824, ppl: 15.422444 +epoch: 1, batch: 44563, sum loss: 4108.294922, avg loss: 2.370626, ppl: 10.704091 +epoch: 1, batch: 44564, sum loss: 3991.208984, avg loss: 2.537323, ppl: 12.645770 +epoch: 1, batch: 44565, sum loss: 4565.242188, avg loss: 2.374021, ppl: 10.740491 +epoch: 1, batch: 44566, sum loss: 4202.358398, avg loss: 2.683498, ppl: 14.636207 +epoch: 1, batch: 44567, sum loss: 4453.853516, avg loss: 2.455267, ppl: 11.649540 +epoch: 1, batch: 44568, sum loss: 4390.591309, avg loss: 2.644935, ppl: 14.082525 +epoch: 1, batch: 44569, sum loss: 4056.675293, avg loss: 2.370938, ppl: 10.707435 +epoch: 1, batch: 44570, sum loss: 3688.122559, avg loss: 2.450580, ppl: 11.595069 +epoch: 1, batch: 44571, sum loss: 5562.112793, avg loss: 2.806313, ppl: 16.548796 +epoch: 1, batch: 44572, sum loss: 3662.906494, avg loss: 2.268054, ppl: 9.660579 +epoch: 1, batch: 44573, sum loss: 6031.036621, avg loss: 2.763995, ppl: 15.863085 +epoch: 1, batch: 44574, sum loss: 5243.615234, avg loss: 2.784713, ppl: 16.195173 +epoch: 1, batch: 44575, sum loss: 4201.151855, avg loss: 2.566373, ppl: 13.018516 +epoch: 1, batch: 44576, sum loss: 4769.062500, avg loss: 2.658340, ppl: 14.272583 +epoch: 1, batch: 44577, sum loss: 4219.313477, avg loss: 2.429081, ppl: 11.348448 +epoch: 1, batch: 44578, sum loss: 4693.746094, avg loss: 2.590368, ppl: 13.334672 +epoch: 1, batch: 44579, sum loss: 4864.912109, avg loss: 2.605737, ppl: 13.541207 +epoch: 1, batch: 44580, sum loss: 4060.770020, avg loss: 2.407095, ppl: 11.101668 +epoch: 1, batch: 44581, sum loss: 4383.394043, avg loss: 2.609163, ppl: 13.587677 +epoch: 1, batch: 44582, sum loss: 4249.629883, avg loss: 2.480811, ppl: 11.950957 +epoch: 1, batch: 44583, sum loss: 4361.562012, avg loss: 2.559602, ppl: 12.930674 +epoch: 1, batch: 44584, sum loss: 4864.440918, avg loss: 2.642282, ppl: 14.045218 +epoch: 1, batch: 44585, sum loss: 4390.775391, avg loss: 2.423165, ppl: 11.281512 +epoch: 1, batch: 44586, sum loss: 5515.950195, avg loss: 2.709209, ppl: 15.017399 +epoch: 1, batch: 44587, sum loss: 3505.817383, avg loss: 2.354478, ppl: 10.532625 +epoch: 1, batch: 44588, sum loss: 3329.745605, avg loss: 2.265133, ppl: 9.632407 +epoch: 1, batch: 44589, sum loss: 4324.608398, avg loss: 2.598923, ppl: 13.449251 +epoch: 1, batch: 44590, sum loss: 5011.415039, avg loss: 2.831308, ppl: 16.967638 +epoch: 1, batch: 44591, sum loss: 4267.755371, avg loss: 2.618255, ppl: 13.711774 +epoch: 1, batch: 44592, sum loss: 4908.389160, avg loss: 2.816058, ppl: 16.710846 +epoch: 1, batch: 44593, sum loss: 4514.229980, avg loss: 2.527565, ppl: 12.522969 +epoch: 1, batch: 44594, sum loss: 3821.753418, avg loss: 2.632062, ppl: 13.902403 +epoch: 1, batch: 44595, sum loss: 3951.595215, avg loss: 2.413925, ppl: 11.177750 +epoch: 1, batch: 44596, sum loss: 4607.846191, avg loss: 2.697802, ppl: 14.847063 +epoch: 1, batch: 44597, sum loss: 5285.501465, avg loss: 3.078335, ppl: 21.722212 +epoch: 1, batch: 44598, sum loss: 5083.759277, avg loss: 2.740571, ppl: 15.495831 +epoch: 1, batch: 44599, sum loss: 5203.997070, avg loss: 2.685241, ppl: 14.661737 +epoch: 1, batch: 44600, sum loss: 5191.975586, avg loss: 2.611658, ppl: 13.621612 +epoch: 1, batch: 44601, sum loss: 3584.901855, avg loss: 2.369400, ppl: 10.690973 +epoch: 1, batch: 44602, sum loss: 4541.128418, avg loss: 2.714363, ppl: 15.094986 +epoch: 1, batch: 44603, sum loss: 5713.250488, avg loss: 3.008557, ppl: 20.258152 +epoch: 1, batch: 44604, sum loss: 4624.032227, avg loss: 2.877431, ppl: 17.768574 +epoch: 1, batch: 44605, sum loss: 4709.082520, avg loss: 2.677136, ppl: 14.543384 +epoch: 1, batch: 44606, sum loss: 3832.379883, avg loss: 2.425557, ppl: 11.308525 +epoch: 1, batch: 44607, sum loss: 5035.943359, avg loss: 2.767002, ppl: 15.910859 +epoch: 1, batch: 44608, sum loss: 4211.379883, avg loss: 2.650333, ppl: 14.158758 +epoch: 1, batch: 44609, sum loss: 4665.346191, avg loss: 2.520446, ppl: 12.434145 +epoch: 1, batch: 44610, sum loss: 4197.000000, avg loss: 2.504177, ppl: 12.233482 +epoch: 1, batch: 44611, sum loss: 4014.328857, avg loss: 2.251446, ppl: 9.501470 +epoch: 1, batch: 44612, sum loss: 3730.540039, avg loss: 2.623446, ppl: 13.783135 +epoch: 1, batch: 44613, sum loss: 4980.660156, avg loss: 2.556807, ppl: 12.894580 +epoch: 1, batch: 44614, sum loss: 5398.065430, avg loss: 2.754115, ppl: 15.707131 +epoch: 1, batch: 44615, sum loss: 4556.801758, avg loss: 2.723731, ppl: 15.237066 +epoch: 1, batch: 44616, sum loss: 5298.553711, avg loss: 2.903317, ppl: 18.234533 +epoch: 1, batch: 44617, sum loss: 4897.412598, avg loss: 2.723811, ppl: 15.238287 +epoch: 1, batch: 44618, sum loss: 3741.912598, avg loss: 2.653839, ppl: 14.208479 +epoch: 1, batch: 44619, sum loss: 4739.851074, avg loss: 2.650923, ppl: 14.167116 +epoch: 1, batch: 44620, sum loss: 4910.195801, avg loss: 2.529725, ppl: 12.550050 +epoch: 1, batch: 44621, sum loss: 4501.387695, avg loss: 2.531714, ppl: 12.575044 +epoch: 1, batch: 44622, sum loss: 3669.921387, avg loss: 2.471328, ppl: 11.838152 +epoch: 1, batch: 44623, sum loss: 4217.835449, avg loss: 2.465129, ppl: 11.764998 +epoch: 1, batch: 44624, sum loss: 4812.404785, avg loss: 2.686993, ppl: 14.687446 +epoch: 1, batch: 44625, sum loss: 4518.669922, avg loss: 2.829474, ppl: 16.936550 +epoch: 1, batch: 44626, sum loss: 3918.632324, avg loss: 2.564550, ppl: 12.994808 +epoch: 1, batch: 44627, sum loss: 4141.723633, avg loss: 2.591817, ppl: 13.354015 +epoch: 1, batch: 44628, sum loss: 3500.101074, avg loss: 2.355384, ppl: 10.542180 +epoch: 1, batch: 44629, sum loss: 4087.780273, avg loss: 2.486484, ppl: 12.018947 +epoch: 1, batch: 44630, sum loss: 4870.643555, avg loss: 2.828480, ppl: 16.919731 +epoch: 1, batch: 44631, sum loss: 4352.899414, avg loss: 2.532228, ppl: 12.581504 +epoch: 1, batch: 44632, sum loss: 4465.043457, avg loss: 2.505636, ppl: 12.251351 +epoch: 1, batch: 44633, sum loss: 3803.916260, avg loss: 2.372998, ppl: 10.729514 +epoch: 1, batch: 44634, sum loss: 5029.270996, avg loss: 2.677993, ppl: 14.555851 +epoch: 1, batch: 44635, sum loss: 3847.055908, avg loss: 2.425634, ppl: 11.309399 +epoch: 1, batch: 44636, sum loss: 5014.341309, avg loss: 2.658718, ppl: 14.277967 +epoch: 1, batch: 44637, sum loss: 4997.620605, avg loss: 2.846025, ppl: 17.219208 +epoch: 1, batch: 44638, sum loss: 4036.686523, avg loss: 2.467412, ppl: 11.791896 +epoch: 1, batch: 44639, sum loss: 5552.644531, avg loss: 2.991727, ppl: 19.920048 +epoch: 1, batch: 44640, sum loss: 5348.391602, avg loss: 3.049254, ppl: 21.099598 +epoch: 1, batch: 44641, sum loss: 5630.930176, avg loss: 2.911546, ppl: 18.385201 +epoch: 1, batch: 44642, sum loss: 4775.318359, avg loss: 2.542768, ppl: 12.714817 +epoch: 1, batch: 44643, sum loss: 3839.187012, avg loss: 2.343826, ppl: 10.421029 +epoch: 1, batch: 44644, sum loss: 4398.272461, avg loss: 2.691721, ppl: 14.757054 +epoch: 1, batch: 44645, sum loss: 4832.742188, avg loss: 2.547571, ppl: 12.776032 +epoch: 1, batch: 44646, sum loss: 3487.760254, avg loss: 2.402039, ppl: 11.045673 +epoch: 1, batch: 44647, sum loss: 4258.030273, avg loss: 2.368204, ppl: 10.678193 +epoch: 1, batch: 44648, sum loss: 4156.459961, avg loss: 2.565716, ppl: 13.009971 +epoch: 1, batch: 44649, sum loss: 3982.052490, avg loss: 2.614611, ppl: 13.661900 +epoch: 1, batch: 44650, sum loss: 4110.769531, avg loss: 2.392765, ppl: 10.943707 +epoch: 1, batch: 44651, sum loss: 4320.042969, avg loss: 2.507280, ppl: 12.271502 +epoch: 1, batch: 44652, sum loss: 4859.183594, avg loss: 2.675762, ppl: 14.523408 +epoch: 1, batch: 44653, sum loss: 3526.792969, avg loss: 2.517340, ppl: 12.395576 +epoch: 1, batch: 44654, sum loss: 3864.345703, avg loss: 2.654084, ppl: 14.211958 +epoch: 1, batch: 44655, sum loss: 4034.751953, avg loss: 2.642274, ppl: 14.045101 +epoch: 1, batch: 44656, sum loss: 5739.998535, avg loss: 2.933060, ppl: 18.785023 +epoch: 1, batch: 44657, sum loss: 3941.763672, avg loss: 2.497949, ppl: 12.157535 +epoch: 1, batch: 44658, sum loss: 4536.272461, avg loss: 2.598094, ppl: 13.438104 +epoch: 1, batch: 44659, sum loss: 4890.932129, avg loss: 2.602944, ppl: 13.503439 +epoch: 1, batch: 44660, sum loss: 4121.270508, avg loss: 2.690125, ppl: 14.733510 +epoch: 1, batch: 44661, sum loss: 4948.524414, avg loss: 2.669107, ppl: 14.427079 +epoch: 1, batch: 44662, sum loss: 4838.553711, avg loss: 2.761732, ppl: 15.827226 +epoch: 1, batch: 44663, sum loss: 4174.447754, avg loss: 2.537658, ppl: 12.650009 +epoch: 1, batch: 44664, sum loss: 4472.356934, avg loss: 2.538228, ppl: 12.657217 +epoch: 1, batch: 44665, sum loss: 4562.246094, avg loss: 2.620475, ppl: 13.742244 +epoch: 1, batch: 44666, sum loss: 4174.849609, avg loss: 2.324527, ppl: 10.221839 +epoch: 1, batch: 44667, sum loss: 4451.664062, avg loss: 2.657710, ppl: 14.263589 +epoch: 1, batch: 44668, sum loss: 4419.854980, avg loss: 2.698324, ppl: 14.854814 +epoch: 1, batch: 44669, sum loss: 4638.913086, avg loss: 2.498068, ppl: 12.158984 +epoch: 1, batch: 44670, sum loss: 3838.433594, avg loss: 2.320698, ppl: 10.182775 +epoch: 1, batch: 44671, sum loss: 3612.443359, avg loss: 2.421209, ppl: 11.259462 +epoch: 1, batch: 44672, sum loss: 3377.085693, avg loss: 2.187232, ppl: 8.910513 +epoch: 1, batch: 44673, sum loss: 4064.286133, avg loss: 2.515028, ppl: 12.366961 +epoch: 1, batch: 44674, sum loss: 3501.713135, avg loss: 2.383739, ppl: 10.845381 +epoch: 1, batch: 44675, sum loss: 3834.339600, avg loss: 2.446930, ppl: 11.552830 +epoch: 1, batch: 44676, sum loss: 4959.651367, avg loss: 2.729583, ppl: 15.326488 +epoch: 1, batch: 44677, sum loss: 4512.119141, avg loss: 2.848560, ppl: 17.262907 +epoch: 1, batch: 44678, sum loss: 4051.027832, avg loss: 2.695295, ppl: 14.809881 +epoch: 1, batch: 44679, sum loss: 4621.100586, avg loss: 2.719895, ppl: 15.178723 +epoch: 1, batch: 44680, sum loss: 4796.210449, avg loss: 2.536335, ppl: 12.633291 +epoch: 1, batch: 44681, sum loss: 5381.693848, avg loss: 2.961857, ppl: 19.333838 +epoch: 1, batch: 44682, sum loss: 5677.315918, avg loss: 2.765376, ppl: 15.885006 +epoch: 1, batch: 44683, sum loss: 4813.533203, avg loss: 2.698169, ppl: 14.852511 +epoch: 1, batch: 44684, sum loss: 4553.551270, avg loss: 2.522743, ppl: 12.462735 +epoch: 1, batch: 44685, sum loss: 4006.552246, avg loss: 2.571600, ppl: 13.086742 +epoch: 1, batch: 44686, sum loss: 4088.680176, avg loss: 2.642974, ppl: 14.054936 +epoch: 1, batch: 44687, sum loss: 4169.997070, avg loss: 2.612780, ppl: 13.636910 +epoch: 1, batch: 44688, sum loss: 3990.648438, avg loss: 2.351590, ppl: 10.502256 +epoch: 1, batch: 44689, sum loss: 4886.992188, avg loss: 2.747045, ppl: 15.596469 +epoch: 1, batch: 44690, sum loss: 4342.071289, avg loss: 2.828711, ppl: 16.923628 +epoch: 1, batch: 44691, sum loss: 4043.455078, avg loss: 2.588640, ppl: 13.311652 +epoch: 1, batch: 44692, sum loss: 3899.511719, avg loss: 2.601409, ppl: 13.482718 +epoch: 1, batch: 44693, sum loss: 4835.476074, avg loss: 2.573431, ppl: 13.110727 +epoch: 1, batch: 44694, sum loss: 5071.592773, avg loss: 2.557535, ppl: 12.903975 +epoch: 1, batch: 44695, sum loss: 4157.072754, avg loss: 2.511827, ppl: 12.327426 +epoch: 1, batch: 44696, sum loss: 4738.941406, avg loss: 2.538265, ppl: 12.657694 +epoch: 1, batch: 44697, sum loss: 5123.646484, avg loss: 3.062550, ppl: 21.382019 +epoch: 1, batch: 44698, sum loss: 4548.203125, avg loss: 2.496269, ppl: 12.137132 +epoch: 1, batch: 44699, sum loss: 5096.472168, avg loss: 2.768317, ppl: 15.931806 +epoch: 1, batch: 44700, sum loss: 3975.001953, avg loss: 2.501575, ppl: 12.201691 +epoch: 1, batch: 44701, sum loss: 4053.508545, avg loss: 2.661529, ppl: 14.318166 +epoch: 1, batch: 44702, sum loss: 4304.845703, avg loss: 2.562408, ppl: 12.967007 +epoch: 1, batch: 44703, sum loss: 3993.319336, avg loss: 2.446887, ppl: 11.552328 +epoch: 1, batch: 44704, sum loss: 4875.365234, avg loss: 2.723668, ppl: 15.236100 +epoch: 1, batch: 44705, sum loss: 4202.534668, avg loss: 2.351726, ppl: 10.503687 +epoch: 1, batch: 44706, sum loss: 5034.820312, avg loss: 2.570097, ppl: 13.067095 +epoch: 1, batch: 44707, sum loss: 4946.730469, avg loss: 2.761993, ppl: 15.831370 +epoch: 1, batch: 44708, sum loss: 4106.817383, avg loss: 2.356178, ppl: 10.550546 +epoch: 1, batch: 44709, sum loss: 4744.718750, avg loss: 2.791011, ppl: 16.297489 +epoch: 1, batch: 44710, sum loss: 4346.706543, avg loss: 2.381757, ppl: 10.823904 +epoch: 1, batch: 44711, sum loss: 4692.114746, avg loss: 2.743927, ppl: 15.547918 +epoch: 1, batch: 44712, sum loss: 4048.673584, avg loss: 2.488429, ppl: 12.042337 +epoch: 1, batch: 44713, sum loss: 4643.562988, avg loss: 2.589829, ppl: 13.327492 +epoch: 1, batch: 44714, sum loss: 4006.348145, avg loss: 2.608300, ppl: 13.575949 +epoch: 1, batch: 44715, sum loss: 4721.099609, avg loss: 2.659774, ppl: 14.293063 +epoch: 1, batch: 44716, sum loss: 5217.807129, avg loss: 2.696541, ppl: 14.828353 +epoch: 1, batch: 44717, sum loss: 3749.062256, avg loss: 2.594507, ppl: 13.389981 +epoch: 1, batch: 44718, sum loss: 4485.720215, avg loss: 2.604948, ppl: 13.530519 +epoch: 1, batch: 44719, sum loss: 4212.642578, avg loss: 2.521031, ppl: 12.441416 +epoch: 1, batch: 44720, sum loss: 4503.833496, avg loss: 2.626142, ppl: 13.820345 +epoch: 1, batch: 44721, sum loss: 4318.300781, avg loss: 2.409766, ppl: 11.131358 +epoch: 1, batch: 44722, sum loss: 5290.175781, avg loss: 2.719885, ppl: 15.178575 +epoch: 1, batch: 44723, sum loss: 3888.548096, avg loss: 2.379772, ppl: 10.802444 +epoch: 1, batch: 44724, sum loss: 4263.346680, avg loss: 2.407310, ppl: 11.104053 +epoch: 1, batch: 44725, sum loss: 4116.484863, avg loss: 2.485800, ppl: 12.010725 +epoch: 1, batch: 44726, sum loss: 4733.903320, avg loss: 2.737943, ppl: 15.455160 +epoch: 1, batch: 44727, sum loss: 4816.982422, avg loss: 2.655448, ppl: 14.231359 +epoch: 1, batch: 44728, sum loss: 4556.096680, avg loss: 2.603484, ppl: 13.510723 +epoch: 1, batch: 44729, sum loss: 3606.547119, avg loss: 2.394786, ppl: 10.965847 +epoch: 1, batch: 44730, sum loss: 4517.620117, avg loss: 2.523810, ppl: 12.476042 +epoch: 1, batch: 44731, sum loss: 4704.034180, avg loss: 2.593183, ppl: 13.372272 +epoch: 1, batch: 44732, sum loss: 5137.273438, avg loss: 2.767927, ppl: 15.925592 +epoch: 1, batch: 44733, sum loss: 5005.756348, avg loss: 2.714618, ppl: 15.098848 +epoch: 1, batch: 44734, sum loss: 4101.035645, avg loss: 2.612125, ppl: 13.627975 +epoch: 1, batch: 44735, sum loss: 3728.073242, avg loss: 2.493694, ppl: 12.105917 +epoch: 1, batch: 44736, sum loss: 4822.900391, avg loss: 2.530378, ppl: 12.558250 +epoch: 1, batch: 44737, sum loss: 4005.329102, avg loss: 2.549541, ppl: 12.801229 +epoch: 1, batch: 44738, sum loss: 3893.861328, avg loss: 2.494466, ppl: 12.115263 +epoch: 1, batch: 44739, sum loss: 4244.883789, avg loss: 2.398239, ppl: 11.003786 +epoch: 1, batch: 44740, sum loss: 4306.050781, avg loss: 2.503518, ppl: 12.225426 +epoch: 1, batch: 44741, sum loss: 3927.732422, avg loss: 2.471827, ppl: 11.844061 +epoch: 1, batch: 44742, sum loss: 4319.192383, avg loss: 2.433348, ppl: 11.396972 +epoch: 1, batch: 44743, sum loss: 3929.171387, avg loss: 2.477409, ppl: 11.910369 +epoch: 1, batch: 44744, sum loss: 4587.021973, avg loss: 2.575532, ppl: 13.138301 +epoch: 1, batch: 44745, sum loss: 3966.901855, avg loss: 2.487086, ppl: 12.026178 +epoch: 1, batch: 44746, sum loss: 4543.046387, avg loss: 2.853672, ppl: 17.351383 +epoch: 1, batch: 44747, sum loss: 3333.168213, avg loss: 2.184252, ppl: 8.883999 +epoch: 1, batch: 44748, sum loss: 4451.185059, avg loss: 2.580397, ppl: 13.202383 +epoch: 1, batch: 44749, sum loss: 5511.064941, avg loss: 2.849568, ppl: 17.280317 +epoch: 1, batch: 44750, sum loss: 4502.242188, avg loss: 2.692729, ppl: 14.771930 +epoch: 1, batch: 44751, sum loss: 5048.211914, avg loss: 2.818655, ppl: 16.754305 +epoch: 1, batch: 44752, sum loss: 4544.673828, avg loss: 2.607386, ppl: 13.563551 +epoch: 1, batch: 44753, sum loss: 4524.703613, avg loss: 2.457742, ppl: 11.678417 +epoch: 1, batch: 44754, sum loss: 3999.593018, avg loss: 2.507582, ppl: 12.275212 +epoch: 1, batch: 44755, sum loss: 3892.701172, avg loss: 2.583080, ppl: 13.237845 +epoch: 1, batch: 44756, sum loss: 3798.871582, avg loss: 2.371330, ppl: 10.711635 +epoch: 1, batch: 44757, sum loss: 4431.453125, avg loss: 2.499410, ppl: 12.175304 +epoch: 1, batch: 44758, sum loss: 4687.089844, avg loss: 2.696830, ppl: 14.832631 +epoch: 1, batch: 44759, sum loss: 3836.275391, avg loss: 2.678963, ppl: 14.569983 +epoch: 1, batch: 44760, sum loss: 3321.988770, avg loss: 2.501498, ppl: 12.200754 +epoch: 1, batch: 44761, sum loss: 4560.200195, avg loss: 2.716022, ppl: 15.120048 +epoch: 1, batch: 44762, sum loss: 5188.992676, avg loss: 2.721024, ppl: 15.195875 +epoch: 1, batch: 44763, sum loss: 3818.219727, avg loss: 2.566008, ppl: 13.013768 +epoch: 1, batch: 44764, sum loss: 3701.145996, avg loss: 2.497400, ppl: 12.150855 +epoch: 1, batch: 44765, sum loss: 4188.863770, avg loss: 2.491888, ppl: 12.084070 +epoch: 1, batch: 44766, sum loss: 3993.270020, avg loss: 2.733244, ppl: 15.382714 +epoch: 1, batch: 44767, sum loss: 4115.226562, avg loss: 2.639658, ppl: 14.008411 +epoch: 1, batch: 44768, sum loss: 3748.489502, avg loss: 2.438835, ppl: 11.459684 +epoch: 1, batch: 44769, sum loss: 4327.318848, avg loss: 2.431078, ppl: 11.371133 +epoch: 1, batch: 44770, sum loss: 5028.173340, avg loss: 2.837570, ppl: 17.074221 +epoch: 1, batch: 44771, sum loss: 4951.440918, avg loss: 2.588312, ppl: 13.307289 +epoch: 1, batch: 44772, sum loss: 5104.079590, avg loss: 2.769441, ppl: 15.949714 +epoch: 1, batch: 44773, sum loss: 4508.996094, avg loss: 2.570693, ppl: 13.074885 +epoch: 1, batch: 44774, sum loss: 5952.105957, avg loss: 2.857468, ppl: 17.417372 +epoch: 1, batch: 44775, sum loss: 4739.548828, avg loss: 2.482739, ppl: 11.974019 +epoch: 1, batch: 44776, sum loss: 4633.585938, avg loss: 2.582824, ppl: 13.234460 +epoch: 1, batch: 44777, sum loss: 4512.774902, avg loss: 2.596533, ppl: 13.417144 +epoch: 1, batch: 44778, sum loss: 4153.132812, avg loss: 2.576385, ppl: 13.149517 +epoch: 1, batch: 44779, sum loss: 4163.414062, avg loss: 2.603761, ppl: 13.514470 +epoch: 1, batch: 44780, sum loss: 4668.146484, avg loss: 2.937789, ppl: 18.874069 +epoch: 1, batch: 44781, sum loss: 4757.079590, avg loss: 2.672517, ppl: 14.476355 +epoch: 1, batch: 44782, sum loss: 3735.381104, avg loss: 2.597622, ppl: 13.431765 +epoch: 1, batch: 44783, sum loss: 4417.209961, avg loss: 2.664180, ppl: 14.356170 +epoch: 1, batch: 44784, sum loss: 5441.559082, avg loss: 2.815085, ppl: 16.694593 +epoch: 1, batch: 44785, sum loss: 3581.262451, avg loss: 2.279607, ppl: 9.772840 +epoch: 1, batch: 44786, sum loss: 4819.761719, avg loss: 2.680624, ppl: 14.594193 +epoch: 1, batch: 44787, sum loss: 4571.566406, avg loss: 2.581347, ppl: 13.214932 +epoch: 1, batch: 44788, sum loss: 4925.405273, avg loss: 2.643803, ppl: 14.066599 +epoch: 1, batch: 44789, sum loss: 5195.687500, avg loss: 2.582350, ppl: 13.228185 +epoch: 1, batch: 44790, sum loss: 3603.063477, avg loss: 2.486586, ppl: 12.020173 +epoch: 1, batch: 44791, sum loss: 4321.040527, avg loss: 2.596779, ppl: 13.420445 +epoch: 1, batch: 44792, sum loss: 5975.187988, avg loss: 2.929013, ppl: 18.709166 +epoch: 1, batch: 44793, sum loss: 4488.232910, avg loss: 2.673158, ppl: 14.485648 +epoch: 1, batch: 44794, sum loss: 4238.979980, avg loss: 2.677814, ppl: 14.553248 +epoch: 1, batch: 44795, sum loss: 5532.250000, avg loss: 3.011568, ppl: 20.319233 +epoch: 1, batch: 44796, sum loss: 3718.473145, avg loss: 2.399015, ppl: 11.012323 +epoch: 1, batch: 44797, sum loss: 3589.658691, avg loss: 2.378833, ppl: 10.792299 +epoch: 1, batch: 44798, sum loss: 3092.489990, avg loss: 2.423582, ppl: 11.286210 +epoch: 1, batch: 44799, sum loss: 4963.212891, avg loss: 2.694470, ppl: 14.797669 +epoch: 1, batch: 44800, sum loss: 4945.296387, avg loss: 2.629078, ppl: 13.860990 +epoch: 1, batch: 44801, sum loss: 4512.387695, avg loss: 2.568234, ppl: 13.042773 +epoch: 1, batch: 44802, sum loss: 4018.711670, avg loss: 2.400664, ppl: 11.030501 +epoch: 1, batch: 44803, sum loss: 2886.061768, avg loss: 2.117434, ppl: 8.309783 +epoch: 1, batch: 44804, sum loss: 3761.659424, avg loss: 2.473149, ppl: 11.859735 +epoch: 1, batch: 44805, sum loss: 4394.355469, avg loss: 2.765485, ppl: 15.886740 +epoch: 1, batch: 44806, sum loss: 4525.301270, avg loss: 2.363082, ppl: 10.623640 +epoch: 1, batch: 44807, sum loss: 3810.739746, avg loss: 2.642677, ppl: 14.050765 +epoch: 1, batch: 44808, sum loss: 3732.895508, avg loss: 2.335980, ppl: 10.339584 +epoch: 1, batch: 44809, sum loss: 4321.256348, avg loss: 2.729789, ppl: 15.329653 +epoch: 1, batch: 44810, sum loss: 4150.515625, avg loss: 2.562047, ppl: 12.962321 +epoch: 1, batch: 44811, sum loss: 4827.836426, avg loss: 2.763501, ppl: 15.855254 +epoch: 1, batch: 44812, sum loss: 3853.277832, avg loss: 2.271980, ppl: 9.698584 +epoch: 1, batch: 44813, sum loss: 4151.538086, avg loss: 2.619267, ppl: 13.725658 +epoch: 1, batch: 44814, sum loss: 4645.983887, avg loss: 2.777038, ppl: 16.071341 +epoch: 1, batch: 44815, sum loss: 3406.296387, avg loss: 2.370422, ppl: 10.701909 +epoch: 1, batch: 44816, sum loss: 4437.866211, avg loss: 2.747905, ppl: 15.609891 +epoch: 1, batch: 44817, sum loss: 4605.305664, avg loss: 2.863996, ppl: 17.531443 +epoch: 1, batch: 44818, sum loss: 5022.563477, avg loss: 2.654632, ppl: 14.219750 +epoch: 1, batch: 44819, sum loss: 4199.558594, avg loss: 2.529855, ppl: 12.551681 +epoch: 1, batch: 44820, sum loss: 4675.897461, avg loss: 2.603507, ppl: 13.511032 +epoch: 1, batch: 44821, sum loss: 4239.462891, avg loss: 2.480669, ppl: 11.949253 +epoch: 1, batch: 44822, sum loss: 3624.221436, avg loss: 2.448798, ppl: 11.574430 +epoch: 1, batch: 44823, sum loss: 5123.988281, avg loss: 2.599689, ppl: 13.459558 +epoch: 1, batch: 44824, sum loss: 4169.330078, avg loss: 2.586433, ppl: 13.282309 +epoch: 1, batch: 44825, sum loss: 4034.591553, avg loss: 2.473692, ppl: 11.866176 +epoch: 1, batch: 44826, sum loss: 4670.903809, avg loss: 2.707771, ppl: 14.995807 +epoch: 1, batch: 44827, sum loss: 4709.552734, avg loss: 2.438919, ppl: 11.460646 +epoch: 1, batch: 44828, sum loss: 5300.736328, avg loss: 2.863715, ppl: 17.526516 +epoch: 1, batch: 44829, sum loss: 4140.897461, avg loss: 2.546677, ppl: 12.764621 +epoch: 1, batch: 44830, sum loss: 3912.836426, avg loss: 2.540803, ppl: 12.689854 +epoch: 1, batch: 44831, sum loss: 5327.029297, avg loss: 2.806654, ppl: 16.554438 +epoch: 1, batch: 44832, sum loss: 4436.079102, avg loss: 2.618701, ppl: 13.717889 +epoch: 1, batch: 44833, sum loss: 3447.968262, avg loss: 2.409482, ppl: 11.128193 +epoch: 1, batch: 44834, sum loss: 4519.223145, avg loss: 2.600244, ppl: 13.467018 +epoch: 1, batch: 44835, sum loss: 4687.393066, avg loss: 2.771965, ppl: 15.990027 +epoch: 1, batch: 44836, sum loss: 3952.464355, avg loss: 2.322247, ppl: 10.198565 +epoch: 1, batch: 44837, sum loss: 4307.678711, avg loss: 2.536913, ppl: 12.640594 +epoch: 1, batch: 44838, sum loss: 5052.140625, avg loss: 2.491194, ppl: 12.075680 +epoch: 1, batch: 44839, sum loss: 4052.808838, avg loss: 2.315891, ppl: 10.133946 +epoch: 1, batch: 44840, sum loss: 4007.735840, avg loss: 2.347824, ppl: 10.462779 +epoch: 1, batch: 44841, sum loss: 4112.596191, avg loss: 2.576815, ppl: 13.155168 +epoch: 1, batch: 44842, sum loss: 3883.718018, avg loss: 2.368121, ppl: 10.677307 +epoch: 1, batch: 44843, sum loss: 4435.657715, avg loss: 2.629317, ppl: 13.864298 +epoch: 1, batch: 44844, sum loss: 3826.878906, avg loss: 2.547855, ppl: 12.779667 +epoch: 1, batch: 44845, sum loss: 5080.332520, avg loss: 2.986674, ppl: 19.819649 +epoch: 1, batch: 44846, sum loss: 3877.836182, avg loss: 2.370316, ppl: 10.700768 +epoch: 1, batch: 44847, sum loss: 4825.490234, avg loss: 2.821924, ppl: 16.809160 +epoch: 1, batch: 44848, sum loss: 4754.547363, avg loss: 2.533057, ppl: 12.591938 +epoch: 1, batch: 44849, sum loss: 3357.159912, avg loss: 2.086488, ppl: 8.056575 +epoch: 1, batch: 44850, sum loss: 4959.172363, avg loss: 2.876550, ppl: 17.752924 +epoch: 1, batch: 44851, sum loss: 4290.941406, avg loss: 2.528545, ppl: 12.535259 +epoch: 1, batch: 44852, sum loss: 4791.517578, avg loss: 2.862317, ppl: 17.502026 +epoch: 1, batch: 44853, sum loss: 4358.405762, avg loss: 2.512049, ppl: 12.330174 +epoch: 1, batch: 44854, sum loss: 6015.742676, avg loss: 2.896361, ppl: 18.108137 +epoch: 1, batch: 44855, sum loss: 4612.741211, avg loss: 2.604597, ppl: 13.525771 +epoch: 1, batch: 44856, sum loss: 3511.248535, avg loss: 2.339273, ppl: 10.373692 +epoch: 1, batch: 44857, sum loss: 4246.660645, avg loss: 2.559771, ppl: 12.932859 +epoch: 1, batch: 44858, sum loss: 4377.042480, avg loss: 2.519886, ppl: 12.427183 +epoch: 1, batch: 44859, sum loss: 3642.575684, avg loss: 2.513855, ppl: 12.352457 +epoch: 1, batch: 44860, sum loss: 5498.000000, avg loss: 2.893684, ppl: 18.059727 +epoch: 1, batch: 44861, sum loss: 3587.160156, avg loss: 2.336912, ppl: 10.349231 +epoch: 1, batch: 44862, sum loss: 5343.438477, avg loss: 2.638735, ppl: 13.995489 +epoch: 1, batch: 44863, sum loss: 3838.250977, avg loss: 2.302490, ppl: 9.999049 +epoch: 1, batch: 44864, sum loss: 4971.354980, avg loss: 2.684317, ppl: 14.648191 +epoch: 1, batch: 44865, sum loss: 5474.446289, avg loss: 2.857227, ppl: 17.413166 +epoch: 1, batch: 44866, sum loss: 4245.788086, avg loss: 2.680422, ppl: 14.591247 +epoch: 1, batch: 44867, sum loss: 4190.915039, avg loss: 2.473976, ppl: 11.869542 +epoch: 1, batch: 44868, sum loss: 3400.880371, avg loss: 2.489664, ppl: 12.057220 +epoch: 1, batch: 44869, sum loss: 4103.263672, avg loss: 2.483816, ppl: 11.986918 +epoch: 1, batch: 44870, sum loss: 3886.122803, avg loss: 2.422770, ppl: 11.277054 +epoch: 1, batch: 44871, sum loss: 3843.173096, avg loss: 2.471494, ppl: 11.840125 +epoch: 1, batch: 44872, sum loss: 4543.629395, avg loss: 2.527046, ppl: 12.516480 +epoch: 1, batch: 44873, sum loss: 5076.183105, avg loss: 2.630147, ppl: 13.875806 +epoch: 1, batch: 44874, sum loss: 4650.233887, avg loss: 2.694226, ppl: 14.794061 +epoch: 1, batch: 44875, sum loss: 5191.147949, avg loss: 2.737947, ppl: 15.455230 +epoch: 1, batch: 44876, sum loss: 3510.589844, avg loss: 2.253267, ppl: 9.518784 +epoch: 1, batch: 44877, sum loss: 4652.889648, avg loss: 2.683327, ppl: 14.633701 +epoch: 1, batch: 44878, sum loss: 3879.959473, avg loss: 2.242751, ppl: 9.419209 +epoch: 1, batch: 44879, sum loss: 3971.407227, avg loss: 2.527949, ppl: 12.527781 +epoch: 1, batch: 44880, sum loss: 4665.666992, avg loss: 2.874718, ppl: 17.720421 +epoch: 1, batch: 44881, sum loss: 5906.284180, avg loss: 2.921011, ppl: 18.560038 +epoch: 1, batch: 44882, sum loss: 4415.537109, avg loss: 2.689121, ppl: 14.718740 +epoch: 1, batch: 44883, sum loss: 4751.341309, avg loss: 2.562752, ppl: 12.971459 +epoch: 1, batch: 44884, sum loss: 4410.261230, avg loss: 2.416581, ppl: 11.207479 +epoch: 1, batch: 44885, sum loss: 4212.732422, avg loss: 2.597246, ppl: 13.426705 +epoch: 1, batch: 44886, sum loss: 3403.025879, avg loss: 2.437698, ppl: 11.446656 +epoch: 1, batch: 44887, sum loss: 4107.019531, avg loss: 2.357646, ppl: 10.566050 +epoch: 1, batch: 44888, sum loss: 4118.344238, avg loss: 2.548480, ppl: 12.787655 +epoch: 1, batch: 44889, sum loss: 4755.339844, avg loss: 2.708052, ppl: 15.000033 +epoch: 1, batch: 44890, sum loss: 4815.769531, avg loss: 2.740905, ppl: 15.501000 +epoch: 1, batch: 44891, sum loss: 4136.078125, avg loss: 2.565805, ppl: 13.011130 +epoch: 1, batch: 44892, sum loss: 3855.977783, avg loss: 2.451353, ppl: 11.604032 +epoch: 1, batch: 44893, sum loss: 3605.521484, avg loss: 2.348874, ppl: 10.473768 +epoch: 1, batch: 44894, sum loss: 5183.345215, avg loss: 2.694046, ppl: 14.791409 +epoch: 1, batch: 44895, sum loss: 4153.932617, avg loss: 2.580082, ppl: 13.198226 +epoch: 1, batch: 44896, sum loss: 3551.641602, avg loss: 2.464706, ppl: 11.760026 +epoch: 1, batch: 44897, sum loss: 5250.124023, avg loss: 2.716050, ppl: 15.120477 +epoch: 1, batch: 44898, sum loss: 4705.384277, avg loss: 2.693408, ppl: 14.781971 +epoch: 1, batch: 44899, sum loss: 3859.610107, avg loss: 2.413765, ppl: 11.175959 +epoch: 1, batch: 44900, sum loss: 4708.463379, avg loss: 2.599925, ppl: 13.462726 +epoch: 1, batch: 44901, sum loss: 4292.276855, avg loss: 2.436026, ppl: 11.427533 +epoch: 1, batch: 44902, sum loss: 3792.168945, avg loss: 2.468860, ppl: 11.808979 +epoch: 1, batch: 44903, sum loss: 3705.828369, avg loss: 2.298901, ppl: 9.963225 +epoch: 1, batch: 44904, sum loss: 4065.888672, avg loss: 2.555555, ppl: 12.878449 +epoch: 1, batch: 44905, sum loss: 3680.210938, avg loss: 2.550389, ppl: 12.812081 +epoch: 1, batch: 44906, sum loss: 4171.541504, avg loss: 2.537434, ppl: 12.647178 +epoch: 1, batch: 44907, sum loss: 4224.961426, avg loss: 2.542095, ppl: 12.706259 +epoch: 1, batch: 44908, sum loss: 4355.803711, avg loss: 2.708833, ppl: 15.011746 +epoch: 1, batch: 44909, sum loss: 3648.170654, avg loss: 2.235399, ppl: 9.350209 +epoch: 1, batch: 44910, sum loss: 5010.789551, avg loss: 2.807165, ppl: 16.562899 +epoch: 1, batch: 44911, sum loss: 4916.690430, avg loss: 2.833827, ppl: 17.010439 +epoch: 1, batch: 44912, sum loss: 4422.103516, avg loss: 2.668741, ppl: 14.421797 +epoch: 1, batch: 44913, sum loss: 3982.040527, avg loss: 2.323244, ppl: 10.208738 +epoch: 1, batch: 44914, sum loss: 5606.951172, avg loss: 2.865075, ppl: 17.550364 +epoch: 1, batch: 44915, sum loss: 4659.684570, avg loss: 2.655091, ppl: 14.226278 +epoch: 1, batch: 44916, sum loss: 4621.630859, avg loss: 2.567573, ppl: 13.034150 +epoch: 1, batch: 44917, sum loss: 3579.048828, avg loss: 2.291325, ppl: 9.888027 +epoch: 1, batch: 44918, sum loss: 4340.440430, avg loss: 2.551699, ppl: 12.828883 +epoch: 1, batch: 44919, sum loss: 4578.686523, avg loss: 2.674466, ppl: 14.504608 +epoch: 1, batch: 44920, sum loss: 3722.794434, avg loss: 2.444382, ppl: 11.523431 +epoch: 1, batch: 44921, sum loss: 5095.516113, avg loss: 2.790535, ppl: 16.289728 +epoch: 1, batch: 44922, sum loss: 4060.109619, avg loss: 2.357787, ppl: 10.567541 +epoch: 1, batch: 44923, sum loss: 4460.753906, avg loss: 2.672711, ppl: 14.479164 +epoch: 1, batch: 44924, sum loss: 4847.499023, avg loss: 2.588094, ppl: 13.304383 +epoch: 1, batch: 44925, sum loss: 3938.829834, avg loss: 2.449521, ppl: 11.582798 +epoch: 1, batch: 44926, sum loss: 3729.887451, avg loss: 2.414167, ppl: 11.180449 +epoch: 1, batch: 44927, sum loss: 4938.187988, avg loss: 2.788361, ppl: 16.254362 +epoch: 1, batch: 44928, sum loss: 4037.115967, avg loss: 2.496670, ppl: 12.141999 +epoch: 1, batch: 44929, sum loss: 4083.791260, avg loss: 2.397998, ppl: 11.001134 +epoch: 1, batch: 44930, sum loss: 4495.235352, avg loss: 2.590914, ppl: 13.341957 +epoch: 1, batch: 44931, sum loss: 4030.077148, avg loss: 2.549068, ppl: 12.795176 +epoch: 1, batch: 44932, sum loss: 4495.165039, avg loss: 2.786835, ppl: 16.229574 +epoch: 1, batch: 44933, sum loss: 4488.745605, avg loss: 2.743732, ppl: 15.544894 +epoch: 1, batch: 44934, sum loss: 3900.634277, avg loss: 2.387169, ppl: 10.882643 +epoch: 1, batch: 44935, sum loss: 5099.292480, avg loss: 2.710948, ppl: 15.043523 +epoch: 1, batch: 44936, sum loss: 4804.205566, avg loss: 2.555429, ppl: 12.876816 +epoch: 1, batch: 44937, sum loss: 4606.642090, avg loss: 2.445139, ppl: 11.532154 +epoch: 1, batch: 44938, sum loss: 3767.617188, avg loss: 2.304353, ppl: 10.017694 +epoch: 1, batch: 44939, sum loss: 4458.708984, avg loss: 2.380517, ppl: 10.810495 +epoch: 1, batch: 44940, sum loss: 4450.240234, avg loss: 2.541542, ppl: 12.699242 +epoch: 1, batch: 44941, sum loss: 4211.088867, avg loss: 2.687357, ppl: 14.692798 +epoch: 1, batch: 44942, sum loss: 3789.461670, avg loss: 2.359565, ppl: 10.586346 +epoch: 1, batch: 44943, sum loss: 4509.530273, avg loss: 2.711684, ppl: 15.054606 +epoch: 1, batch: 44944, sum loss: 3600.872070, avg loss: 2.300877, ppl: 9.982930 +epoch: 1, batch: 44945, sum loss: 4653.703125, avg loss: 2.855033, ppl: 17.375006 +epoch: 1, batch: 44946, sum loss: 4538.429199, avg loss: 2.675961, ppl: 14.526297 +epoch: 1, batch: 44947, sum loss: 3926.382568, avg loss: 2.422198, ppl: 11.270600 +epoch: 1, batch: 44948, sum loss: 4444.929688, avg loss: 2.710323, ppl: 15.034132 +epoch: 1, batch: 44949, sum loss: 4526.023926, avg loss: 2.520058, ppl: 12.429317 +epoch: 1, batch: 44950, sum loss: 4081.426270, avg loss: 2.519399, ppl: 12.421128 +epoch: 1, batch: 44951, sum loss: 3879.510742, avg loss: 2.527369, ppl: 12.520515 +epoch: 1, batch: 44952, sum loss: 3878.406006, avg loss: 2.417959, ppl: 11.222930 +epoch: 1, batch: 44953, sum loss: 3996.420166, avg loss: 2.494644, ppl: 12.117421 +epoch: 1, batch: 44954, sum loss: 4711.734863, avg loss: 2.695501, ppl: 14.812932 +epoch: 1, batch: 44955, sum loss: 3987.882568, avg loss: 2.680029, ppl: 14.585511 +epoch: 1, batch: 44956, sum loss: 5230.494629, avg loss: 2.797056, ppl: 16.396305 +epoch: 1, batch: 44957, sum loss: 5174.317383, avg loss: 2.596246, ppl: 13.413283 +epoch: 1, batch: 44958, sum loss: 5075.296387, avg loss: 2.596060, ppl: 13.410789 +epoch: 1, batch: 44959, sum loss: 4267.905762, avg loss: 2.534386, ppl: 12.608685 +epoch: 1, batch: 44960, sum loss: 5302.539062, avg loss: 2.755997, ppl: 15.736730 +epoch: 1, batch: 44961, sum loss: 4441.592285, avg loss: 2.612701, ppl: 13.635838 +epoch: 1, batch: 44962, sum loss: 4835.077637, avg loss: 2.840821, ppl: 17.129824 +epoch: 1, batch: 44963, sum loss: 4162.884766, avg loss: 2.488276, ppl: 12.040495 +epoch: 1, batch: 44964, sum loss: 4846.207520, avg loss: 2.807768, ppl: 16.572884 +epoch: 1, batch: 44965, sum loss: 3850.460205, avg loss: 2.469827, ppl: 11.820401 +epoch: 1, batch: 44966, sum loss: 4294.396973, avg loss: 2.488063, ppl: 12.037937 +epoch: 1, batch: 44967, sum loss: 3980.576660, avg loss: 2.546754, ppl: 12.765601 +epoch: 1, batch: 44968, sum loss: 4655.208496, avg loss: 2.654053, ppl: 14.211517 +epoch: 1, batch: 44969, sum loss: 3345.823486, avg loss: 2.227579, ppl: 9.277377 +epoch: 1, batch: 44970, sum loss: 4312.925293, avg loss: 2.558081, ppl: 12.911022 +epoch: 1, batch: 44971, sum loss: 4406.388184, avg loss: 2.585909, ppl: 13.275346 +epoch: 1, batch: 44972, sum loss: 4156.134277, avg loss: 2.604094, ppl: 13.518972 +epoch: 1, batch: 44973, sum loss: 3959.156006, avg loss: 2.434905, ppl: 11.414738 +epoch: 1, batch: 44974, sum loss: 4319.550781, avg loss: 2.747806, ppl: 15.608347 +epoch: 1, batch: 44975, sum loss: 4740.464355, avg loss: 2.643873, ppl: 14.067585 +epoch: 1, batch: 44976, sum loss: 6219.958984, avg loss: 2.957660, ppl: 19.252871 +epoch: 1, batch: 44977, sum loss: 4706.964844, avg loss: 2.731843, ppl: 15.361168 +epoch: 1, batch: 44978, sum loss: 5169.835938, avg loss: 2.791488, ppl: 16.305264 +epoch: 1, batch: 44979, sum loss: 5380.472656, avg loss: 2.731205, ppl: 15.351367 +epoch: 1, batch: 44980, sum loss: 3608.205566, avg loss: 2.429768, ppl: 11.356245 +epoch: 1, batch: 44981, sum loss: 5127.725098, avg loss: 2.791358, ppl: 16.303152 +epoch: 1, batch: 44982, sum loss: 4180.839355, avg loss: 2.585553, ppl: 13.270628 +epoch: 1, batch: 44983, sum loss: 4542.208008, avg loss: 2.457905, ppl: 11.680313 +epoch: 1, batch: 44984, sum loss: 4186.478027, avg loss: 2.541881, ppl: 12.703542 +epoch: 1, batch: 44985, sum loss: 4150.164551, avg loss: 2.536776, ppl: 12.638852 +epoch: 1, batch: 44986, sum loss: 4532.110352, avg loss: 2.691277, ppl: 14.750501 +epoch: 1, batch: 44987, sum loss: 4319.563965, avg loss: 2.548415, ppl: 12.786826 +epoch: 1, batch: 44988, sum loss: 3530.822754, avg loss: 2.358599, ppl: 10.576123 +epoch: 1, batch: 44989, sum loss: 3805.180420, avg loss: 2.423682, ppl: 11.287340 +epoch: 1, batch: 44990, sum loss: 3843.390137, avg loss: 2.593381, ppl: 13.374911 +epoch: 1, batch: 44991, sum loss: 4968.998535, avg loss: 2.623547, ppl: 13.784535 +epoch: 1, batch: 44992, sum loss: 4388.898438, avg loss: 2.340746, ppl: 10.388983 +epoch: 1, batch: 44993, sum loss: 4182.551270, avg loss: 2.430303, ppl: 11.362323 +epoch: 1, batch: 44994, sum loss: 4265.465820, avg loss: 2.485703, ppl: 12.009557 +epoch: 1, batch: 44995, sum loss: 4379.813477, avg loss: 2.422463, ppl: 11.273594 +epoch: 1, batch: 44996, sum loss: 4331.403809, avg loss: 2.715614, ppl: 15.113881 +epoch: 1, batch: 44997, sum loss: 3105.730469, avg loss: 2.278599, ppl: 9.762993 +epoch: 1, batch: 44998, sum loss: 4338.089355, avg loss: 2.422160, ppl: 11.270181 +epoch: 1, batch: 44999, sum loss: 3959.182617, avg loss: 2.542828, ppl: 12.715575 +epoch: 1, batch: 45000, sum loss: 5010.875488, avg loss: 2.675321, ppl: 14.517004 +epoch: 1, batch: 45001, sum loss: 4922.102539, avg loss: 2.828794, ppl: 16.925045 +epoch: 1, batch: 45002, sum loss: 4036.562012, avg loss: 2.671451, ppl: 14.460931 +epoch: 1, batch: 45003, sum loss: 4631.832520, avg loss: 2.665036, ppl: 14.368466 +epoch: 1, batch: 45004, sum loss: 4218.917480, avg loss: 2.440091, ppl: 11.474087 +epoch: 1, batch: 45005, sum loss: 3603.900879, avg loss: 2.449967, ppl: 11.587960 +epoch: 1, batch: 45006, sum loss: 4885.826172, avg loss: 2.795095, ppl: 16.364187 +epoch: 1, batch: 45007, sum loss: 4359.820312, avg loss: 2.682966, ppl: 14.628420 +epoch: 1, batch: 45008, sum loss: 3965.880615, avg loss: 2.463280, ppl: 11.743263 +epoch: 1, batch: 45009, sum loss: 4084.684326, avg loss: 2.453264, ppl: 11.626233 +epoch: 1, batch: 45010, sum loss: 4183.070801, avg loss: 2.433433, ppl: 11.397942 +epoch: 1, batch: 45011, sum loss: 3900.045410, avg loss: 2.380980, ppl: 10.815497 +epoch: 1, batch: 45012, sum loss: 4859.968262, avg loss: 2.718103, ppl: 15.151555 +epoch: 1, batch: 45013, sum loss: 4176.818359, avg loss: 2.484722, ppl: 11.997789 +epoch: 1, batch: 45014, sum loss: 4244.556152, avg loss: 2.419929, ppl: 11.245064 +epoch: 1, batch: 45015, sum loss: 5308.498047, avg loss: 2.888193, ppl: 17.960819 +epoch: 1, batch: 45016, sum loss: 4029.473877, avg loss: 2.628489, ppl: 13.852826 +epoch: 1, batch: 45017, sum loss: 3996.766846, avg loss: 2.562030, ppl: 12.962105 +epoch: 1, batch: 45018, sum loss: 3739.473877, avg loss: 2.448902, ppl: 11.575634 +epoch: 1, batch: 45019, sum loss: 4728.761230, avg loss: 2.565796, ppl: 13.011006 +epoch: 1, batch: 45020, sum loss: 4312.593750, avg loss: 2.573147, ppl: 13.107005 +epoch: 1, batch: 45021, sum loss: 4863.307129, avg loss: 2.603483, ppl: 13.510720 +epoch: 1, batch: 45022, sum loss: 4834.386230, avg loss: 2.812325, ppl: 16.648577 +epoch: 1, batch: 45023, sum loss: 4779.831543, avg loss: 2.691347, ppl: 14.751528 +epoch: 1, batch: 45024, sum loss: 4298.795898, avg loss: 2.686747, ppl: 14.683836 +epoch: 1, batch: 45025, sum loss: 4068.771973, avg loss: 2.234361, ppl: 9.340515 +epoch: 1, batch: 45026, sum loss: 4160.998047, avg loss: 2.431910, ppl: 11.380602 +epoch: 1, batch: 45027, sum loss: 4099.770508, avg loss: 2.621337, ppl: 13.754096 +epoch: 1, batch: 45028, sum loss: 4456.802246, avg loss: 2.739276, ppl: 15.475780 +epoch: 1, batch: 45029, sum loss: 3058.654053, avg loss: 2.137424, ppl: 8.477573 +epoch: 1, batch: 45030, sum loss: 5206.420410, avg loss: 2.735901, ppl: 15.423639 +epoch: 1, batch: 45031, sum loss: 4752.512207, avg loss: 2.649115, ppl: 14.141520 +epoch: 1, batch: 45032, sum loss: 4456.544434, avg loss: 2.744178, ppl: 15.551819 +epoch: 1, batch: 45033, sum loss: 4804.750488, avg loss: 2.663387, ppl: 14.344793 +epoch: 1, batch: 45034, sum loss: 5517.646484, avg loss: 2.692848, ppl: 14.773698 +epoch: 1, batch: 45035, sum loss: 5157.102539, avg loss: 2.855539, ppl: 17.383799 +epoch: 1, batch: 45036, sum loss: 4072.041992, avg loss: 2.448612, ppl: 11.572276 +epoch: 1, batch: 45037, sum loss: 5221.760742, avg loss: 2.851863, ppl: 17.320017 +epoch: 1, batch: 45038, sum loss: 5248.735352, avg loss: 2.977161, ppl: 19.632006 +epoch: 1, batch: 45039, sum loss: 4182.017090, avg loss: 2.344180, ppl: 10.424722 +epoch: 1, batch: 45040, sum loss: 4553.536621, avg loss: 2.615472, ppl: 13.673667 +epoch: 1, batch: 45041, sum loss: 3915.867188, avg loss: 2.296696, ppl: 9.941286 +epoch: 1, batch: 45042, sum loss: 3893.564209, avg loss: 2.536524, ppl: 12.635671 +epoch: 1, batch: 45043, sum loss: 3314.020752, avg loss: 2.392795, ppl: 10.944038 +epoch: 1, batch: 45044, sum loss: 4887.178711, avg loss: 2.661862, ppl: 14.322932 +epoch: 1, batch: 45045, sum loss: 4479.811035, avg loss: 2.635183, ppl: 13.945866 +epoch: 1, batch: 45046, sum loss: 4748.633789, avg loss: 2.655835, ppl: 14.236875 +epoch: 1, batch: 45047, sum loss: 5017.355469, avg loss: 2.765907, ppl: 15.893454 +epoch: 1, batch: 45048, sum loss: 4601.051270, avg loss: 2.539212, ppl: 12.669677 +epoch: 1, batch: 45049, sum loss: 4319.595703, avg loss: 2.677989, ppl: 14.555789 +epoch: 1, batch: 45050, sum loss: 5226.345703, avg loss: 2.558172, ppl: 12.912195 +epoch: 1, batch: 45051, sum loss: 4224.517090, avg loss: 2.643628, ppl: 14.064131 +epoch: 1, batch: 45052, sum loss: 4370.464355, avg loss: 2.411956, ppl: 11.155762 +epoch: 1, batch: 45053, sum loss: 3910.454346, avg loss: 2.381519, ppl: 10.821331 +epoch: 1, batch: 45054, sum loss: 4871.610352, avg loss: 2.698953, ppl: 14.864163 +epoch: 1, batch: 45055, sum loss: 3874.699463, avg loss: 2.535798, ppl: 12.626504 +epoch: 1, batch: 45056, sum loss: 4709.019531, avg loss: 2.923041, ppl: 18.597763 +epoch: 1, batch: 45057, sum loss: 4712.426758, avg loss: 2.665400, ppl: 14.373694 +epoch: 1, batch: 45058, sum loss: 4022.319092, avg loss: 2.352233, ppl: 10.509014 +epoch: 1, batch: 45059, sum loss: 4293.630859, avg loss: 2.622866, ppl: 13.775142 +epoch: 1, batch: 45060, sum loss: 3972.785400, avg loss: 2.517608, ppl: 12.398902 +epoch: 1, batch: 45061, sum loss: 4420.809082, avg loss: 2.773406, ppl: 16.013083 +epoch: 1, batch: 45062, sum loss: 4276.419434, avg loss: 2.573056, ppl: 13.105818 +epoch: 1, batch: 45063, sum loss: 4010.661377, avg loss: 2.623062, ppl: 13.777845 +epoch: 1, batch: 45064, sum loss: 4189.057617, avg loss: 2.566825, ppl: 13.024402 +epoch: 1, batch: 45065, sum loss: 4506.833008, avg loss: 2.716596, ppl: 15.128738 +epoch: 1, batch: 45066, sum loss: 5316.949219, avg loss: 2.811713, ppl: 16.638395 +epoch: 1, batch: 45067, sum loss: 4074.834717, avg loss: 2.425497, ppl: 11.307849 +epoch: 1, batch: 45068, sum loss: 3797.097168, avg loss: 2.441863, ppl: 11.494438 +epoch: 1, batch: 45069, sum loss: 4687.892578, avg loss: 2.575765, ppl: 13.141372 +epoch: 1, batch: 45070, sum loss: 4772.187012, avg loss: 2.530322, ppl: 12.557550 +epoch: 1, batch: 45071, sum loss: 5087.483398, avg loss: 2.807662, ppl: 16.571129 +epoch: 1, batch: 45072, sum loss: 4657.488281, avg loss: 2.580326, ppl: 13.201439 +epoch: 1, batch: 45073, sum loss: 3211.803223, avg loss: 2.125614, ppl: 8.378043 +epoch: 1, batch: 45074, sum loss: 5129.574707, avg loss: 2.684236, ppl: 14.647004 +epoch: 1, batch: 45075, sum loss: 4868.859375, avg loss: 2.752323, ppl: 15.679014 +epoch: 1, batch: 45076, sum loss: 4054.265381, avg loss: 2.526022, ppl: 12.503667 +epoch: 1, batch: 45077, sum loss: 4824.182129, avg loss: 2.657952, ppl: 14.267035 +epoch: 1, batch: 45078, sum loss: 3790.675537, avg loss: 2.344265, ppl: 10.425602 +epoch: 1, batch: 45079, sum loss: 4121.666992, avg loss: 2.265897, ppl: 9.639771 +epoch: 1, batch: 45080, sum loss: 4596.571289, avg loss: 2.663135, ppl: 14.341179 +epoch: 1, batch: 45081, sum loss: 4618.119141, avg loss: 2.557098, ppl: 12.898334 +epoch: 1, batch: 45082, sum loss: 3675.494385, avg loss: 2.277258, ppl: 9.749908 +epoch: 1, batch: 45083, sum loss: 4220.423340, avg loss: 2.426925, ppl: 11.324012 +epoch: 1, batch: 45084, sum loss: 4185.767578, avg loss: 2.523067, ppl: 12.466767 +epoch: 1, batch: 45085, sum loss: 3755.531006, avg loss: 2.462643, ppl: 11.735793 +epoch: 1, batch: 45086, sum loss: 4929.488281, avg loss: 2.749296, ppl: 15.631630 +epoch: 1, batch: 45087, sum loss: 4379.389648, avg loss: 2.518338, ppl: 12.407957 +epoch: 1, batch: 45088, sum loss: 5246.960449, avg loss: 2.749979, ppl: 15.642307 +epoch: 1, batch: 45089, sum loss: 4494.404297, avg loss: 2.563836, ppl: 12.985533 +epoch: 1, batch: 45090, sum loss: 3824.991699, avg loss: 2.470925, ppl: 11.833386 +epoch: 1, batch: 45091, sum loss: 4521.865234, avg loss: 2.667767, ppl: 14.407762 +epoch: 1, batch: 45092, sum loss: 5623.268555, avg loss: 2.831455, ppl: 16.970127 +epoch: 1, batch: 45093, sum loss: 3623.863525, avg loss: 2.422369, ppl: 11.272530 +epoch: 1, batch: 45094, sum loss: 4829.068359, avg loss: 2.702333, ppl: 14.914483 +epoch: 1, batch: 45095, sum loss: 4393.778320, avg loss: 2.635740, ppl: 13.953631 +epoch: 1, batch: 45096, sum loss: 4416.666992, avg loss: 2.594986, ppl: 13.396405 +epoch: 1, batch: 45097, sum loss: 4694.239258, avg loss: 2.615175, ppl: 13.669612 +epoch: 1, batch: 45098, sum loss: 3879.506836, avg loss: 2.502908, ppl: 12.217969 +epoch: 1, batch: 45099, sum loss: 3365.194092, avg loss: 2.345083, ppl: 10.434138 +epoch: 1, batch: 45100, sum loss: 4812.300293, avg loss: 2.644121, ppl: 14.071074 +epoch: 1, batch: 45101, sum loss: 4939.601562, avg loss: 2.644326, ppl: 14.073959 +epoch: 1, batch: 45102, sum loss: 4883.113281, avg loss: 2.560626, ppl: 12.943915 +epoch: 1, batch: 45103, sum loss: 4591.242676, avg loss: 2.764144, ppl: 15.865449 +epoch: 1, batch: 45104, sum loss: 4451.357422, avg loss: 2.489573, ppl: 12.056133 +epoch: 1, batch: 45105, sum loss: 4314.549316, avg loss: 2.698280, ppl: 14.854155 +epoch: 1, batch: 45106, sum loss: 4864.276367, avg loss: 2.668281, ppl: 14.415170 +epoch: 1, batch: 45107, sum loss: 3519.037598, avg loss: 2.316680, ppl: 10.141952 +epoch: 1, batch: 45108, sum loss: 4544.041016, avg loss: 2.527275, ppl: 12.519345 +epoch: 1, batch: 45109, sum loss: 4672.261719, avg loss: 2.716431, ppl: 15.126243 +epoch: 1, batch: 45110, sum loss: 3670.884766, avg loss: 2.526418, ppl: 12.508616 +epoch: 1, batch: 45111, sum loss: 4708.364258, avg loss: 2.676728, ppl: 14.537449 +epoch: 1, batch: 45112, sum loss: 4009.284424, avg loss: 2.532713, ppl: 12.587606 +epoch: 1, batch: 45113, sum loss: 5426.236328, avg loss: 2.795588, ppl: 16.372253 +epoch: 1, batch: 45114, sum loss: 3878.960449, avg loss: 2.563755, ppl: 12.984477 +epoch: 1, batch: 45115, sum loss: 4682.502441, avg loss: 2.571391, ppl: 13.084007 +epoch: 1, batch: 45116, sum loss: 4627.079590, avg loss: 2.693294, ppl: 14.780286 +epoch: 1, batch: 45117, sum loss: 4143.873535, avg loss: 2.697834, ppl: 14.847545 +epoch: 1, batch: 45118, sum loss: 4269.220215, avg loss: 2.452166, ppl: 11.613470 +epoch: 1, batch: 45119, sum loss: 4675.054688, avg loss: 2.404864, ppl: 11.076921 +epoch: 1, batch: 45120, sum loss: 3707.737305, avg loss: 2.387468, ppl: 10.885895 +epoch: 1, batch: 45121, sum loss: 4878.925781, avg loss: 2.349025, ppl: 10.475356 +epoch: 1, batch: 45122, sum loss: 4469.771484, avg loss: 2.658995, ppl: 14.281934 +epoch: 1, batch: 45123, sum loss: 4487.001953, avg loss: 2.540771, ppl: 12.689454 +epoch: 1, batch: 45124, sum loss: 4115.183594, avg loss: 2.563977, ppl: 12.987369 +epoch: 1, batch: 45125, sum loss: 3878.641113, avg loss: 2.712337, ppl: 15.064433 +epoch: 1, batch: 45126, sum loss: 4575.779785, avg loss: 2.685317, ppl: 14.662846 +epoch: 1, batch: 45127, sum loss: 4794.154297, avg loss: 2.459802, ppl: 11.702496 +epoch: 1, batch: 45128, sum loss: 4571.344238, avg loss: 2.650055, ppl: 14.154813 +epoch: 1, batch: 45129, sum loss: 4420.471191, avg loss: 2.472299, ppl: 11.849662 +epoch: 1, batch: 45130, sum loss: 4100.291016, avg loss: 2.348391, ppl: 10.468715 +epoch: 1, batch: 45131, sum loss: 3252.111572, avg loss: 2.339648, ppl: 10.377588 +epoch: 1, batch: 45132, sum loss: 4025.698730, avg loss: 2.582232, ppl: 13.226621 +epoch: 1, batch: 45133, sum loss: 4635.510254, avg loss: 2.650378, ppl: 14.159383 +epoch: 1, batch: 45134, sum loss: 3752.434570, avg loss: 2.476855, ppl: 11.903763 +epoch: 1, batch: 45135, sum loss: 4463.083496, avg loss: 2.685369, ppl: 14.663615 +epoch: 1, batch: 45136, sum loss: 4301.151367, avg loss: 2.856010, ppl: 17.391998 +epoch: 1, batch: 45137, sum loss: 4584.752441, avg loss: 2.600540, ppl: 13.471012 +epoch: 1, batch: 45138, sum loss: 5277.578125, avg loss: 2.699528, ppl: 14.872717 +epoch: 1, batch: 45139, sum loss: 4027.702148, avg loss: 2.550793, ppl: 12.817263 +epoch: 1, batch: 45140, sum loss: 4652.885254, avg loss: 2.652728, ppl: 14.192705 +epoch: 1, batch: 45141, sum loss: 4198.881836, avg loss: 2.520337, ppl: 12.432787 +epoch: 1, batch: 45142, sum loss: 4513.687012, avg loss: 2.588123, ppl: 13.304776 +epoch: 1, batch: 45143, sum loss: 4293.208984, avg loss: 2.373250, ppl: 10.732216 +epoch: 1, batch: 45144, sum loss: 4281.309082, avg loss: 2.566732, ppl: 13.023197 +epoch: 1, batch: 45145, sum loss: 3655.056152, avg loss: 2.271632, ppl: 9.695212 +epoch: 1, batch: 45146, sum loss: 4058.374023, avg loss: 2.649069, ppl: 14.140872 +epoch: 1, batch: 45147, sum loss: 5757.712891, avg loss: 2.927154, ppl: 18.674414 +epoch: 1, batch: 45148, sum loss: 4636.868652, avg loss: 2.649639, ppl: 14.148932 +epoch: 1, batch: 45149, sum loss: 4499.528320, avg loss: 2.436128, ppl: 11.428704 +epoch: 1, batch: 45150, sum loss: 3947.627930, avg loss: 2.522446, ppl: 12.459033 +epoch: 1, batch: 45151, sum loss: 4350.117188, avg loss: 2.626882, ppl: 13.830584 +epoch: 1, batch: 45152, sum loss: 4385.747559, avg loss: 2.370674, ppl: 10.704609 +epoch: 1, batch: 45153, sum loss: 5286.687500, avg loss: 2.807588, ppl: 16.569897 +epoch: 1, batch: 45154, sum loss: 4387.725586, avg loss: 2.568926, ppl: 13.051798 +epoch: 1, batch: 45155, sum loss: 3961.498291, avg loss: 2.602824, ppl: 13.501816 +epoch: 1, batch: 45156, sum loss: 4561.915527, avg loss: 2.516225, ppl: 12.381765 +epoch: 1, batch: 45157, sum loss: 4043.395020, avg loss: 2.372885, ppl: 10.728293 +epoch: 1, batch: 45158, sum loss: 4527.696289, avg loss: 2.608120, ppl: 13.573508 +epoch: 1, batch: 45159, sum loss: 4559.165039, avg loss: 2.756448, ppl: 15.743822 +epoch: 1, batch: 45160, sum loss: 3977.706055, avg loss: 2.459930, ppl: 11.703989 +epoch: 1, batch: 45161, sum loss: 4804.834961, avg loss: 2.524874, ppl: 12.489321 +epoch: 1, batch: 45162, sum loss: 4227.024414, avg loss: 2.554093, ppl: 12.859635 +epoch: 1, batch: 45163, sum loss: 4032.851562, avg loss: 2.542782, ppl: 12.714990 +epoch: 1, batch: 45164, sum loss: 4111.210938, avg loss: 2.616939, ppl: 13.693744 +epoch: 1, batch: 45165, sum loss: 3433.407471, avg loss: 2.175797, ppl: 8.809201 +epoch: 1, batch: 45166, sum loss: 4489.328613, avg loss: 2.720805, ppl: 15.192550 +epoch: 1, batch: 45167, sum loss: 3954.214355, avg loss: 2.434861, ppl: 11.414231 +epoch: 1, batch: 45168, sum loss: 4191.778320, avg loss: 2.496592, ppl: 12.141047 +epoch: 1, batch: 45169, sum loss: 4713.488281, avg loss: 2.682691, ppl: 14.624396 +epoch: 1, batch: 45170, sum loss: 4615.830078, avg loss: 2.365879, ppl: 10.653402 +epoch: 1, batch: 45171, sum loss: 5082.187012, avg loss: 2.853558, ppl: 17.349407 +epoch: 1, batch: 45172, sum loss: 4232.722656, avg loss: 2.391369, ppl: 10.928443 +epoch: 1, batch: 45173, sum loss: 3857.867432, avg loss: 2.460375, ppl: 11.709197 +epoch: 1, batch: 45174, sum loss: 3792.631836, avg loss: 2.590595, ppl: 13.337711 +epoch: 1, batch: 45175, sum loss: 4427.714844, avg loss: 2.589307, ppl: 13.320535 +epoch: 1, batch: 45176, sum loss: 4796.119629, avg loss: 2.567516, ppl: 13.033407 +epoch: 1, batch: 45177, sum loss: 3445.964355, avg loss: 2.082154, ppl: 8.021728 +epoch: 1, batch: 45178, sum loss: 3834.145264, avg loss: 2.573252, ppl: 13.108383 +epoch: 1, batch: 45179, sum loss: 3830.127686, avg loss: 2.421067, ppl: 11.257862 +epoch: 1, batch: 45180, sum loss: 3593.565674, avg loss: 2.405332, ppl: 11.082108 +epoch: 1, batch: 45181, sum loss: 3685.233887, avg loss: 2.641745, ppl: 14.037676 +epoch: 1, batch: 45182, sum loss: 4317.270996, avg loss: 2.741124, ppl: 15.504408 +epoch: 1, batch: 45183, sum loss: 4706.020508, avg loss: 2.512558, ppl: 12.336443 +epoch: 1, batch: 45184, sum loss: 4379.573730, avg loss: 2.690156, ppl: 14.733971 +epoch: 1, batch: 45185, sum loss: 5170.955566, avg loss: 2.636897, ppl: 13.969792 +epoch: 1, batch: 45186, sum loss: 4923.907715, avg loss: 2.978770, ppl: 19.663626 +epoch: 1, batch: 45187, sum loss: 5103.467773, avg loss: 2.807188, ppl: 16.563278 +epoch: 1, batch: 45188, sum loss: 4345.134277, avg loss: 2.461833, ppl: 11.726280 +epoch: 1, batch: 45189, sum loss: 4420.051758, avg loss: 2.817114, ppl: 16.728500 +epoch: 1, batch: 45190, sum loss: 4409.051270, avg loss: 2.560425, ppl: 12.941311 +epoch: 1, batch: 45191, sum loss: 3972.281494, avg loss: 2.533343, ppl: 12.595541 +epoch: 1, batch: 45192, sum loss: 4468.289062, avg loss: 2.496251, ppl: 12.136906 +epoch: 1, batch: 45193, sum loss: 4058.693848, avg loss: 2.591758, ppl: 13.353232 +epoch: 1, batch: 45194, sum loss: 4169.481445, avg loss: 2.625618, ppl: 13.813111 +epoch: 1, batch: 45195, sum loss: 4722.457031, avg loss: 2.533507, ppl: 12.597607 +epoch: 1, batch: 45196, sum loss: 4996.146484, avg loss: 2.713822, ppl: 15.086829 +epoch: 1, batch: 45197, sum loss: 4000.963867, avg loss: 2.408768, ppl: 11.120255 +epoch: 1, batch: 45198, sum loss: 4312.939941, avg loss: 2.481554, ppl: 11.959830 +epoch: 1, batch: 45199, sum loss: 4464.759766, avg loss: 2.629423, ppl: 13.865766 +epoch: 1, batch: 45200, sum loss: 4808.852051, avg loss: 2.680520, ppl: 14.592673 +epoch: 1, batch: 45201, sum loss: 3748.709717, avg loss: 2.326946, ppl: 10.246601 +epoch: 1, batch: 45202, sum loss: 3647.838623, avg loss: 2.527955, ppl: 12.527855 +epoch: 1, batch: 45203, sum loss: 4150.205078, avg loss: 2.561855, ppl: 12.959837 +epoch: 1, batch: 45204, sum loss: 4316.630859, avg loss: 2.866289, ppl: 17.571688 +epoch: 1, batch: 45205, sum loss: 6134.304199, avg loss: 2.875905, ppl: 17.741465 +epoch: 1, batch: 45206, sum loss: 4633.250977, avg loss: 2.782733, ppl: 16.163141 +epoch: 1, batch: 45207, sum loss: 3901.930176, avg loss: 2.239914, ppl: 9.392523 +epoch: 1, batch: 45208, sum loss: 4214.700684, avg loss: 2.604883, ppl: 13.529642 +epoch: 1, batch: 45209, sum loss: 3266.585449, avg loss: 2.217641, ppl: 9.185639 +epoch: 1, batch: 45210, sum loss: 4125.734863, avg loss: 2.474946, ppl: 11.881063 +epoch: 1, batch: 45211, sum loss: 5164.934570, avg loss: 2.845694, ppl: 17.213501 +epoch: 1, batch: 45212, sum loss: 3866.775146, avg loss: 2.182153, ppl: 8.865373 +epoch: 1, batch: 45213, sum loss: 4980.010254, avg loss: 2.710948, ppl: 15.043523 +epoch: 1, batch: 45214, sum loss: 4184.048828, avg loss: 2.358539, ppl: 10.575493 +epoch: 1, batch: 45215, sum loss: 4458.800781, avg loss: 2.712166, ppl: 15.061862 +epoch: 1, batch: 45216, sum loss: 4903.570801, avg loss: 2.797245, ppl: 16.399408 +epoch: 1, batch: 45217, sum loss: 4403.176758, avg loss: 2.533473, ppl: 12.597186 +epoch: 1, batch: 45218, sum loss: 4509.305664, avg loss: 2.759673, ppl: 15.794679 +epoch: 1, batch: 45219, sum loss: 4110.981445, avg loss: 2.653958, ppl: 14.210176 +epoch: 1, batch: 45220, sum loss: 4848.550781, avg loss: 2.500542, ppl: 12.189097 +epoch: 1, batch: 45221, sum loss: 3732.612793, avg loss: 2.255355, ppl: 9.538682 +epoch: 1, batch: 45222, sum loss: 4449.755859, avg loss: 2.636111, ppl: 13.958819 +epoch: 1, batch: 45223, sum loss: 4416.187012, avg loss: 2.520655, ppl: 12.436739 +epoch: 1, batch: 45224, sum loss: 4817.437500, avg loss: 2.720179, ppl: 15.183044 +epoch: 1, batch: 45225, sum loss: 3702.955078, avg loss: 2.229353, ppl: 9.293851 +epoch: 1, batch: 45226, sum loss: 4399.306152, avg loss: 2.575706, ppl: 13.140592 +epoch: 1, batch: 45227, sum loss: 5026.177246, avg loss: 2.789222, ppl: 16.268354 +epoch: 1, batch: 45228, sum loss: 4803.892578, avg loss: 2.629388, ppl: 13.865287 +epoch: 1, batch: 45229, sum loss: 3806.084961, avg loss: 2.505652, ppl: 12.251541 +epoch: 1, batch: 45230, sum loss: 4394.707031, avg loss: 2.674806, ppl: 14.509540 +epoch: 1, batch: 45231, sum loss: 4686.125000, avg loss: 2.899830, ppl: 18.171053 +epoch: 1, batch: 45232, sum loss: 3424.677734, avg loss: 2.411745, ppl: 11.153408 +epoch: 1, batch: 45233, sum loss: 4620.329590, avg loss: 2.678452, ppl: 14.562533 +epoch: 1, batch: 45234, sum loss: 4892.332031, avg loss: 2.478385, ppl: 11.921994 +epoch: 1, batch: 45235, sum loss: 3494.975098, avg loss: 2.498195, ppl: 12.160526 +epoch: 1, batch: 45236, sum loss: 5164.792480, avg loss: 2.750156, ppl: 15.645067 +epoch: 1, batch: 45237, sum loss: 4341.498535, avg loss: 2.598144, ppl: 13.438773 +epoch: 1, batch: 45238, sum loss: 4181.284180, avg loss: 2.230018, ppl: 9.300035 +epoch: 1, batch: 45239, sum loss: 4308.359375, avg loss: 2.609545, ppl: 13.592871 +epoch: 1, batch: 45240, sum loss: 4266.271973, avg loss: 2.591903, ppl: 13.355162 +epoch: 1, batch: 45241, sum loss: 5094.748535, avg loss: 2.569213, ppl: 13.055541 +epoch: 1, batch: 45242, sum loss: 3796.049805, avg loss: 2.641649, ppl: 14.036334 +epoch: 1, batch: 45243, sum loss: 4249.185059, avg loss: 2.463296, ppl: 11.743453 +epoch: 1, batch: 45244, sum loss: 4651.358887, avg loss: 2.665535, ppl: 14.375642 +epoch: 1, batch: 45245, sum loss: 3936.879395, avg loss: 2.523641, ppl: 12.473927 +epoch: 1, batch: 45246, sum loss: 4772.756348, avg loss: 2.906673, ppl: 18.295822 +epoch: 1, batch: 45247, sum loss: 4506.736328, avg loss: 2.466741, ppl: 11.783984 +epoch: 1, batch: 45248, sum loss: 3739.444824, avg loss: 2.494626, ppl: 12.117205 +epoch: 1, batch: 45249, sum loss: 4713.206055, avg loss: 2.601107, ppl: 13.478652 +epoch: 1, batch: 45250, sum loss: 3901.071045, avg loss: 2.301517, ppl: 9.989323 +epoch: 1, batch: 45251, sum loss: 3647.810059, avg loss: 2.438376, ppl: 11.454420 +epoch: 1, batch: 45252, sum loss: 5098.279785, avg loss: 2.680484, ppl: 14.592152 +epoch: 1, batch: 45253, sum loss: 3140.298096, avg loss: 2.377213, ppl: 10.774829 +epoch: 1, batch: 45254, sum loss: 4069.191895, avg loss: 2.691265, ppl: 14.750318 +epoch: 1, batch: 45255, sum loss: 4143.100098, avg loss: 2.476450, ppl: 11.898945 +epoch: 1, batch: 45256, sum loss: 3705.256592, avg loss: 2.287195, ppl: 9.847281 +epoch: 1, batch: 45257, sum loss: 4578.282715, avg loss: 2.582224, ppl: 13.226520 +epoch: 1, batch: 45258, sum loss: 4631.963867, avg loss: 2.693002, ppl: 14.775970 +epoch: 1, batch: 45259, sum loss: 5269.541016, avg loss: 2.705103, ppl: 14.955863 +epoch: 1, batch: 45260, sum loss: 4051.862305, avg loss: 2.361225, ppl: 10.603934 +epoch: 1, batch: 45261, sum loss: 3932.970703, avg loss: 2.632510, ppl: 13.908643 +epoch: 1, batch: 45262, sum loss: 5015.712891, avg loss: 2.816234, ppl: 16.713789 +epoch: 1, batch: 45263, sum loss: 3379.598145, avg loss: 2.330757, ppl: 10.285728 +epoch: 1, batch: 45264, sum loss: 4654.432129, avg loss: 2.620739, ppl: 13.745878 +epoch: 1, batch: 45265, sum loss: 4250.395508, avg loss: 2.495828, ppl: 12.131776 +epoch: 1, batch: 45266, sum loss: 4888.459961, avg loss: 2.779113, ppl: 16.104734 +epoch: 1, batch: 45267, sum loss: 4576.510742, avg loss: 2.453893, ppl: 11.633551 +epoch: 1, batch: 45268, sum loss: 3429.735596, avg loss: 2.292604, ppl: 9.900685 +epoch: 1, batch: 45269, sum loss: 3568.753418, avg loss: 2.441008, ppl: 11.484610 +epoch: 1, batch: 45270, sum loss: 3993.367676, avg loss: 2.525849, ppl: 12.501506 +epoch: 1, batch: 45271, sum loss: 3725.278809, avg loss: 2.278458, ppl: 9.761615 +epoch: 1, batch: 45272, sum loss: 4637.969727, avg loss: 2.712263, ppl: 15.063323 +epoch: 1, batch: 45273, sum loss: 3764.272705, avg loss: 2.279996, ppl: 9.776638 +epoch: 1, batch: 45274, sum loss: 4340.877930, avg loss: 2.650109, ppl: 14.155576 +epoch: 1, batch: 45275, sum loss: 4482.088867, avg loss: 2.472195, ppl: 11.848422 +epoch: 1, batch: 45276, sum loss: 4591.383789, avg loss: 2.528295, ppl: 12.532121 +epoch: 1, batch: 45277, sum loss: 4364.291504, avg loss: 2.570254, ppl: 13.069148 +epoch: 1, batch: 45278, sum loss: 4332.970215, avg loss: 2.607082, ppl: 13.559428 +epoch: 1, batch: 45279, sum loss: 4683.833008, avg loss: 2.655234, ppl: 14.228316 +epoch: 1, batch: 45280, sum loss: 4674.184082, avg loss: 2.593887, ppl: 13.381683 +epoch: 1, batch: 45281, sum loss: 3518.039062, avg loss: 2.306911, ppl: 10.043350 +epoch: 1, batch: 45282, sum loss: 4118.868164, avg loss: 2.547228, ppl: 12.771656 +epoch: 1, batch: 45283, sum loss: 4005.701172, avg loss: 2.587662, ppl: 13.298646 +epoch: 1, batch: 45284, sum loss: 4287.400391, avg loss: 3.019296, ppl: 20.476875 +epoch: 1, batch: 45285, sum loss: 4297.085938, avg loss: 2.550199, ppl: 12.809656 +epoch: 1, batch: 45286, sum loss: 4684.950195, avg loss: 2.592667, ppl: 13.365375 +epoch: 1, batch: 45287, sum loss: 3114.645264, avg loss: 2.333068, ppl: 10.309519 +epoch: 1, batch: 45288, sum loss: 4917.649902, avg loss: 2.892735, ppl: 18.042597 +epoch: 1, batch: 45289, sum loss: 5398.957031, avg loss: 2.793046, ppl: 16.330683 +epoch: 1, batch: 45290, sum loss: 3760.872559, avg loss: 2.374288, ppl: 10.743364 +epoch: 1, batch: 45291, sum loss: 5240.889160, avg loss: 2.871720, ppl: 17.667381 +epoch: 1, batch: 45292, sum loss: 4599.996582, avg loss: 2.736464, ppl: 15.432323 +epoch: 1, batch: 45293, sum loss: 4118.096680, avg loss: 2.423836, ppl: 11.289079 +epoch: 1, batch: 45294, sum loss: 4423.982910, avg loss: 2.558695, ppl: 12.918941 +epoch: 1, batch: 45295, sum loss: 3917.831055, avg loss: 2.565705, ppl: 13.009822 +epoch: 1, batch: 45296, sum loss: 3458.771484, avg loss: 2.321323, ppl: 10.189147 +epoch: 1, batch: 45297, sum loss: 4106.141602, avg loss: 2.545655, ppl: 12.751577 +epoch: 1, batch: 45298, sum loss: 3902.792236, avg loss: 2.277008, ppl: 9.747476 +epoch: 1, batch: 45299, sum loss: 3786.418945, avg loss: 2.350353, ppl: 10.489274 +epoch: 1, batch: 45300, sum loss: 4377.096680, avg loss: 2.323300, ppl: 10.209309 +epoch: 1, batch: 45301, sum loss: 4526.151855, avg loss: 2.736489, ppl: 15.432699 +epoch: 1, batch: 45302, sum loss: 5242.138672, avg loss: 2.897810, ppl: 18.134396 +epoch: 1, batch: 45303, sum loss: 4260.570312, avg loss: 2.324370, ppl: 10.220241 +epoch: 1, batch: 45304, sum loss: 3105.224121, avg loss: 2.278228, ppl: 9.759367 +epoch: 1, batch: 45305, sum loss: 4517.122070, avg loss: 2.646235, ppl: 14.100842 +epoch: 1, batch: 45306, sum loss: 4717.160645, avg loss: 2.761804, ppl: 15.828365 +epoch: 1, batch: 45307, sum loss: 4560.041992, avg loss: 2.654274, ppl: 14.214656 +epoch: 1, batch: 45308, sum loss: 4597.104492, avg loss: 2.658823, ppl: 14.279469 +epoch: 1, batch: 45309, sum loss: 5346.731445, avg loss: 2.622232, ppl: 13.766418 +epoch: 1, batch: 45310, sum loss: 4415.751953, avg loss: 2.704073, ppl: 14.940467 +epoch: 1, batch: 45311, sum loss: 4333.540039, avg loss: 2.588734, ppl: 13.312902 +epoch: 1, batch: 45312, sum loss: 4911.505859, avg loss: 2.670748, ppl: 14.450778 +epoch: 1, batch: 45313, sum loss: 4841.494141, avg loss: 2.622694, ppl: 13.772784 +epoch: 1, batch: 45314, sum loss: 4843.948242, avg loss: 2.575198, ppl: 13.133923 +epoch: 1, batch: 45315, sum loss: 3921.953613, avg loss: 2.437510, ppl: 11.444509 +epoch: 1, batch: 45316, sum loss: 4019.343018, avg loss: 2.519964, ppl: 12.428152 +epoch: 1, batch: 45317, sum loss: 4842.828125, avg loss: 2.447109, ppl: 11.554891 +epoch: 1, batch: 45318, sum loss: 4766.950195, avg loss: 2.742779, ppl: 15.530087 +epoch: 1, batch: 45319, sum loss: 5683.011230, avg loss: 2.873110, ppl: 17.691948 +epoch: 1, batch: 45320, sum loss: 4332.704102, avg loss: 2.343269, ppl: 10.415227 +epoch: 1, batch: 45321, sum loss: 4399.319336, avg loss: 2.666254, ppl: 14.385979 +epoch: 1, batch: 45322, sum loss: 5158.174316, avg loss: 2.720556, ppl: 15.188768 +epoch: 1, batch: 45323, sum loss: 3813.112061, avg loss: 2.316593, ppl: 10.141067 +epoch: 1, batch: 45324, sum loss: 4772.620605, avg loss: 2.420193, ppl: 11.248030 +epoch: 1, batch: 45325, sum loss: 3939.200684, avg loss: 2.501080, ppl: 12.195656 +epoch: 1, batch: 45326, sum loss: 4750.149902, avg loss: 2.622943, ppl: 13.776210 +epoch: 1, batch: 45327, sum loss: 4527.600098, avg loss: 2.796541, ppl: 16.387867 +epoch: 1, batch: 45328, sum loss: 4518.916992, avg loss: 2.600067, ppl: 13.464645 +epoch: 1, batch: 45329, sum loss: 4192.150391, avg loss: 2.507267, ppl: 12.271350 +epoch: 1, batch: 45330, sum loss: 5120.829102, avg loss: 2.856012, ppl: 17.392027 +epoch: 1, batch: 45331, sum loss: 4700.599609, avg loss: 2.410564, ppl: 11.140239 +epoch: 1, batch: 45332, sum loss: 3740.761230, avg loss: 2.357128, ppl: 10.560575 +epoch: 1, batch: 45333, sum loss: 4055.776855, avg loss: 2.580011, ppl: 13.197279 +epoch: 1, batch: 45334, sum loss: 4025.282471, avg loss: 2.471014, ppl: 11.834444 +epoch: 1, batch: 45335, sum loss: 3849.314209, avg loss: 2.396833, ppl: 10.988323 +epoch: 1, batch: 45336, sum loss: 5656.581055, avg loss: 2.918773, ppl: 18.518545 +epoch: 1, batch: 45337, sum loss: 4546.797852, avg loss: 2.544375, ppl: 12.735266 +epoch: 1, batch: 45338, sum loss: 5123.002930, avg loss: 2.857224, ppl: 17.413124 +epoch: 1, batch: 45339, sum loss: 3607.310303, avg loss: 2.465694, ppl: 11.771651 +epoch: 1, batch: 45340, sum loss: 4136.750977, avg loss: 2.434815, ppl: 11.413712 +epoch: 1, batch: 45341, sum loss: 3831.658691, avg loss: 2.556143, ppl: 12.886024 +epoch: 1, batch: 45342, sum loss: 4899.684082, avg loss: 2.761941, ppl: 15.830544 +epoch: 1, batch: 45343, sum loss: 5042.892090, avg loss: 2.701067, ppl: 14.895617 +epoch: 1, batch: 45344, sum loss: 5217.089844, avg loss: 2.735758, ppl: 15.421422 +epoch: 1, batch: 45345, sum loss: 4445.663086, avg loss: 2.655713, ppl: 14.235130 +epoch: 1, batch: 45346, sum loss: 3978.031738, avg loss: 2.511384, ppl: 12.321969 +epoch: 1, batch: 45347, sum loss: 4341.284668, avg loss: 2.696450, ppl: 14.827003 +epoch: 1, batch: 45348, sum loss: 4419.999512, avg loss: 2.500000, ppl: 12.182491 +epoch: 1, batch: 45349, sum loss: 4696.676758, avg loss: 2.757884, ppl: 15.766450 +epoch: 1, batch: 45350, sum loss: 4416.283691, avg loss: 2.646066, ppl: 14.098462 +epoch: 1, batch: 45351, sum loss: 4112.834473, avg loss: 2.494138, ppl: 12.111295 +epoch: 1, batch: 45352, sum loss: 4182.035156, avg loss: 2.607254, ppl: 13.561759 +epoch: 1, batch: 45353, sum loss: 3810.052246, avg loss: 2.613204, ppl: 13.642698 +epoch: 1, batch: 45354, sum loss: 4155.769531, avg loss: 2.409142, ppl: 11.124410 +epoch: 1, batch: 45355, sum loss: 4425.945801, avg loss: 2.493490, ppl: 12.103449 +epoch: 1, batch: 45356, sum loss: 4602.576172, avg loss: 2.504122, ppl: 12.232811 +epoch: 1, batch: 45357, sum loss: 4138.859863, avg loss: 2.708678, ppl: 15.009420 +epoch: 1, batch: 45358, sum loss: 4069.308350, avg loss: 2.278448, ppl: 9.761519 +epoch: 1, batch: 45359, sum loss: 4777.053223, avg loss: 2.514239, ppl: 12.357197 +epoch: 1, batch: 45360, sum loss: 3447.505371, avg loss: 2.240094, ppl: 9.394218 +epoch: 1, batch: 45361, sum loss: 3264.668945, avg loss: 2.147809, ppl: 8.566066 +epoch: 1, batch: 45362, sum loss: 3651.575928, avg loss: 2.355855, ppl: 10.547148 +epoch: 1, batch: 45363, sum loss: 3550.841553, avg loss: 2.497076, ppl: 12.146919 +epoch: 1, batch: 45364, sum loss: 4745.154297, avg loss: 2.691523, ppl: 14.754124 +epoch: 1, batch: 45365, sum loss: 4323.029297, avg loss: 2.591744, ppl: 13.353042 +epoch: 1, batch: 45366, sum loss: 5154.305176, avg loss: 2.680346, ppl: 14.590137 +epoch: 1, batch: 45367, sum loss: 3683.204102, avg loss: 2.405751, ppl: 11.086751 +epoch: 1, batch: 45368, sum loss: 4547.014160, avg loss: 2.514941, ppl: 12.365885 +epoch: 1, batch: 45369, sum loss: 4018.228760, avg loss: 2.590734, ppl: 13.339562 +epoch: 1, batch: 45370, sum loss: 4547.981445, avg loss: 2.849612, ppl: 17.281084 +epoch: 1, batch: 45371, sum loss: 5143.534668, avg loss: 2.756449, ppl: 15.743841 +epoch: 1, batch: 45372, sum loss: 4438.149414, avg loss: 2.637047, ppl: 13.971878 +epoch: 1, batch: 45373, sum loss: 4199.799805, avg loss: 2.638065, ppl: 13.986115 +epoch: 1, batch: 45374, sum loss: 5031.494629, avg loss: 2.766077, ppl: 15.896155 +epoch: 1, batch: 45375, sum loss: 3599.488281, avg loss: 2.543808, ppl: 12.728047 +epoch: 1, batch: 45376, sum loss: 4101.332520, avg loss: 2.292528, ppl: 9.899932 +epoch: 1, batch: 45377, sum loss: 4776.149414, avg loss: 2.580308, ppl: 13.201200 +epoch: 1, batch: 45378, sum loss: 4633.540527, avg loss: 2.557142, ppl: 12.898893 +epoch: 1, batch: 45379, sum loss: 5000.810547, avg loss: 2.740170, ppl: 15.489622 +epoch: 1, batch: 45380, sum loss: 3991.307617, avg loss: 2.444156, ppl: 11.520826 +epoch: 1, batch: 45381, sum loss: 3743.303711, avg loss: 2.390360, ppl: 10.917422 +epoch: 1, batch: 45382, sum loss: 3987.536133, avg loss: 2.464485, ppl: 11.757421 +epoch: 1, batch: 45383, sum loss: 4460.775879, avg loss: 2.525921, ppl: 12.502400 +epoch: 1, batch: 45384, sum loss: 5031.863281, avg loss: 2.776967, ppl: 16.070200 +epoch: 1, batch: 45385, sum loss: 4456.932617, avg loss: 2.714332, ppl: 15.094522 +epoch: 1, batch: 45386, sum loss: 4000.373047, avg loss: 2.436281, ppl: 11.430451 +epoch: 1, batch: 45387, sum loss: 3727.230957, avg loss: 2.417141, ppl: 11.213750 +epoch: 1, batch: 45388, sum loss: 5059.016602, avg loss: 2.737563, ppl: 15.449291 +epoch: 1, batch: 45389, sum loss: 5221.234375, avg loss: 2.634326, ppl: 13.933918 +epoch: 1, batch: 45390, sum loss: 4570.842773, avg loss: 2.589713, ppl: 13.325941 +epoch: 1, batch: 45391, sum loss: 4143.173828, avg loss: 2.540266, ppl: 12.683044 +epoch: 1, batch: 45392, sum loss: 4027.770020, avg loss: 2.398910, ppl: 11.011168 +epoch: 1, batch: 45393, sum loss: 4985.650391, avg loss: 2.583239, ppl: 13.239947 +epoch: 1, batch: 45394, sum loss: 3488.317383, avg loss: 2.282930, ppl: 9.805369 +epoch: 1, batch: 45395, sum loss: 4070.678711, avg loss: 2.425911, ppl: 11.312530 +epoch: 1, batch: 45396, sum loss: 4588.555664, avg loss: 2.518417, ppl: 12.408937 +epoch: 1, batch: 45397, sum loss: 4427.502441, avg loss: 2.357563, ppl: 10.565167 +epoch: 1, batch: 45398, sum loss: 4186.807617, avg loss: 2.529793, ppl: 12.550911 +epoch: 1, batch: 45399, sum loss: 4177.252441, avg loss: 2.468825, ppl: 11.808568 +epoch: 1, batch: 45400, sum loss: 4294.772461, avg loss: 2.414150, ppl: 11.180261 +epoch: 1, batch: 45401, sum loss: 4761.331055, avg loss: 2.744283, ppl: 15.553457 +epoch: 1, batch: 45402, sum loss: 4053.955078, avg loss: 2.533722, ppl: 12.600316 +epoch: 1, batch: 45403, sum loss: 3183.496826, avg loss: 2.212298, ppl: 9.136687 +epoch: 1, batch: 45404, sum loss: 3690.401367, avg loss: 2.323931, ppl: 10.215749 +epoch: 1, batch: 45405, sum loss: 4437.323730, avg loss: 2.529831, ppl: 12.551384 +epoch: 1, batch: 45406, sum loss: 4567.281738, avg loss: 2.691386, ppl: 14.752108 +epoch: 1, batch: 45407, sum loss: 4536.849121, avg loss: 2.645393, ppl: 14.088980 +epoch: 1, batch: 45408, sum loss: 5093.969727, avg loss: 2.575313, ppl: 13.135429 +epoch: 1, batch: 45409, sum loss: 5750.431152, avg loss: 2.809199, ppl: 16.596624 +epoch: 1, batch: 45410, sum loss: 5382.749023, avg loss: 2.980481, ppl: 19.697292 +epoch: 1, batch: 45411, sum loss: 4066.976318, avg loss: 2.505839, ppl: 12.253831 +epoch: 1, batch: 45412, sum loss: 4396.250000, avg loss: 2.778919, ppl: 16.101604 +epoch: 1, batch: 45413, sum loss: 4845.824219, avg loss: 2.588581, ppl: 13.310874 +epoch: 1, batch: 45414, sum loss: 4495.789062, avg loss: 2.649257, ppl: 14.143525 +epoch: 1, batch: 45415, sum loss: 3651.437500, avg loss: 2.465522, ppl: 11.769619 +epoch: 1, batch: 45416, sum loss: 4120.220215, avg loss: 2.586453, ppl: 13.282580 +epoch: 1, batch: 45417, sum loss: 4088.643066, avg loss: 2.526973, ppl: 12.515571 +epoch: 1, batch: 45418, sum loss: 4239.179199, avg loss: 2.729671, ppl: 15.327847 +epoch: 1, batch: 45419, sum loss: 4071.878174, avg loss: 2.377045, ppl: 10.773021 +epoch: 1, batch: 45420, sum loss: 5066.298828, avg loss: 2.556155, ppl: 12.886171 +epoch: 1, batch: 45421, sum loss: 5555.077637, avg loss: 2.801350, ppl: 16.466867 +epoch: 1, batch: 45422, sum loss: 5206.833984, avg loss: 2.731812, ppl: 15.360696 +epoch: 1, batch: 45423, sum loss: 4323.262695, avg loss: 2.520853, ppl: 12.439200 +epoch: 1, batch: 45424, sum loss: 4027.856201, avg loss: 2.508005, ppl: 12.280408 +epoch: 1, batch: 45425, sum loss: 3836.613770, avg loss: 2.431314, ppl: 11.373820 +epoch: 1, batch: 45426, sum loss: 3771.480713, avg loss: 2.366048, ppl: 10.655201 +epoch: 1, batch: 45427, sum loss: 3762.571289, avg loss: 2.506710, ppl: 12.264511 +epoch: 1, batch: 45428, sum loss: 3630.270508, avg loss: 2.383631, ppl: 10.844212 +epoch: 1, batch: 45429, sum loss: 5082.618652, avg loss: 2.665243, ppl: 14.371444 +epoch: 1, batch: 45430, sum loss: 4848.954590, avg loss: 2.575122, ppl: 13.132917 +epoch: 1, batch: 45431, sum loss: 4879.929688, avg loss: 2.678337, ppl: 14.560857 +epoch: 1, batch: 45432, sum loss: 4246.308105, avg loss: 2.617946, ppl: 13.707535 +epoch: 1, batch: 45433, sum loss: 3869.627686, avg loss: 2.455348, ppl: 11.650482 +epoch: 1, batch: 45434, sum loss: 4603.119629, avg loss: 2.633364, ppl: 13.920516 +epoch: 1, batch: 45435, sum loss: 4633.605957, avg loss: 2.756458, ppl: 15.743979 +epoch: 1, batch: 45436, sum loss: 4951.251953, avg loss: 2.730972, ppl: 15.347798 +epoch: 1, batch: 45437, sum loss: 4759.640625, avg loss: 2.563081, ppl: 12.975728 +epoch: 1, batch: 45438, sum loss: 4568.154785, avg loss: 2.594069, ppl: 13.384114 +epoch: 1, batch: 45439, sum loss: 4682.443359, avg loss: 2.730288, ppl: 15.337296 +epoch: 1, batch: 45440, sum loss: 4702.031738, avg loss: 2.818964, ppl: 16.759480 +epoch: 1, batch: 45441, sum loss: 4011.409424, avg loss: 2.432631, ppl: 11.388812 +epoch: 1, batch: 45442, sum loss: 4700.955566, avg loss: 2.722036, ppl: 15.211255 +epoch: 1, batch: 45443, sum loss: 3634.914062, avg loss: 2.510300, ppl: 12.308621 +epoch: 1, batch: 45444, sum loss: 4255.754395, avg loss: 2.474276, ppl: 11.873106 +epoch: 1, batch: 45445, sum loss: 4286.235352, avg loss: 2.627980, ppl: 13.845773 +epoch: 1, batch: 45446, sum loss: 4170.414062, avg loss: 2.616320, ppl: 13.685267 +epoch: 1, batch: 45447, sum loss: 4470.242188, avg loss: 2.545696, ppl: 12.752101 +epoch: 1, batch: 45448, sum loss: 4530.381348, avg loss: 2.566788, ppl: 13.023930 +epoch: 1, batch: 45449, sum loss: 4295.097168, avg loss: 2.671080, ppl: 14.455578 +epoch: 1, batch: 45450, sum loss: 3330.225586, avg loss: 2.317485, ppl: 10.150113 +epoch: 1, batch: 45451, sum loss: 4192.117676, avg loss: 2.680382, ppl: 14.590666 +epoch: 1, batch: 45452, sum loss: 4074.362061, avg loss: 2.450007, ppl: 11.588430 +epoch: 1, batch: 45453, sum loss: 4312.961914, avg loss: 2.626652, ppl: 13.827394 +epoch: 1, batch: 45454, sum loss: 4689.225586, avg loss: 2.656785, ppl: 14.250401 +epoch: 1, batch: 45455, sum loss: 4302.422852, avg loss: 2.690696, ppl: 14.741933 +epoch: 1, batch: 45456, sum loss: 4179.559082, avg loss: 2.502730, ppl: 12.215799 +epoch: 1, batch: 45457, sum loss: 4047.027344, avg loss: 2.401797, ppl: 11.042998 +epoch: 1, batch: 45458, sum loss: 4748.895508, avg loss: 2.698236, ppl: 14.853507 +epoch: 1, batch: 45459, sum loss: 3884.529297, avg loss: 2.340078, ppl: 10.382045 +epoch: 1, batch: 45460, sum loss: 5082.726074, avg loss: 2.703578, ppl: 14.933060 +epoch: 1, batch: 45461, sum loss: 4747.761230, avg loss: 2.558061, ppl: 12.910758 +epoch: 1, batch: 45462, sum loss: 3638.453613, avg loss: 2.379630, ppl: 10.800901 +epoch: 1, batch: 45463, sum loss: 4509.452637, avg loss: 2.640195, ppl: 14.015931 +epoch: 1, batch: 45464, sum loss: 3099.182861, avg loss: 2.156703, ppl: 8.642600 +epoch: 1, batch: 45465, sum loss: 3635.309082, avg loss: 2.535083, ppl: 12.617478 +epoch: 1, batch: 45466, sum loss: 4513.407715, avg loss: 2.813845, ppl: 16.673912 +epoch: 1, batch: 45467, sum loss: 4421.067871, avg loss: 2.330558, ppl: 10.283674 +epoch: 1, batch: 45468, sum loss: 4683.074219, avg loss: 2.632419, ppl: 13.907376 +epoch: 1, batch: 45469, sum loss: 4180.976562, avg loss: 2.481292, ppl: 11.956699 +epoch: 1, batch: 45470, sum loss: 3965.621582, avg loss: 2.505130, ppl: 12.245154 +epoch: 1, batch: 45471, sum loss: 4221.203125, avg loss: 2.542893, ppl: 12.716412 +epoch: 1, batch: 45472, sum loss: 4455.239746, avg loss: 2.341166, ppl: 10.393353 +epoch: 1, batch: 45473, sum loss: 4454.810059, avg loss: 2.469407, ppl: 11.815436 +epoch: 1, batch: 45474, sum loss: 4953.423340, avg loss: 2.747323, ppl: 15.600813 +epoch: 1, batch: 45475, sum loss: 4432.253906, avg loss: 2.521191, ppl: 12.443406 +epoch: 1, batch: 45476, sum loss: 5160.896973, avg loss: 2.685170, ppl: 14.660696 +epoch: 1, batch: 45477, sum loss: 4425.305176, avg loss: 2.682003, ppl: 14.614341 +epoch: 1, batch: 45478, sum loss: 5323.058105, avg loss: 2.791326, ppl: 16.302620 +epoch: 1, batch: 45479, sum loss: 3472.956787, avg loss: 2.408431, ppl: 11.116501 +epoch: 1, batch: 45480, sum loss: 4187.260742, avg loss: 2.508844, ppl: 12.290715 +epoch: 1, batch: 45481, sum loss: 4391.774414, avg loss: 2.525460, ppl: 12.496639 +epoch: 1, batch: 45482, sum loss: 3654.814453, avg loss: 2.588395, ppl: 13.308399 +epoch: 1, batch: 45483, sum loss: 3582.635254, avg loss: 2.145291, ppl: 8.544524 +epoch: 1, batch: 45484, sum loss: 4573.027832, avg loss: 2.697952, ppl: 14.849282 +epoch: 1, batch: 45485, sum loss: 4362.182129, avg loss: 2.570526, ppl: 13.072698 +epoch: 1, batch: 45486, sum loss: 4256.524414, avg loss: 2.742606, ppl: 15.527399 +epoch: 1, batch: 45487, sum loss: 3971.861328, avg loss: 2.639111, ppl: 14.000746 +epoch: 1, batch: 45488, sum loss: 4882.238281, avg loss: 2.599701, ppl: 13.459712 +epoch: 1, batch: 45489, sum loss: 5588.317383, avg loss: 2.811025, ppl: 16.626947 +epoch: 1, batch: 45490, sum loss: 4808.889648, avg loss: 2.533662, ppl: 12.599560 +epoch: 1, batch: 45491, sum loss: 5018.899414, avg loss: 2.733605, ppl: 15.388268 +epoch: 1, batch: 45492, sum loss: 5554.251953, avg loss: 2.605184, ppl: 13.533713 +epoch: 1, batch: 45493, sum loss: 4323.043945, avg loss: 2.710372, ppl: 15.034874 +epoch: 1, batch: 45494, sum loss: 4247.945312, avg loss: 2.503209, ppl: 12.221649 +epoch: 1, batch: 45495, sum loss: 3767.719727, avg loss: 2.184186, ppl: 8.883410 +epoch: 1, batch: 45496, sum loss: 4197.250000, avg loss: 2.581335, ppl: 13.214763 +epoch: 1, batch: 45497, sum loss: 3612.711182, avg loss: 2.344394, ppl: 10.426951 +epoch: 1, batch: 45498, sum loss: 4415.332031, avg loss: 2.541930, ppl: 12.704165 +epoch: 1, batch: 45499, sum loss: 3384.397217, avg loss: 2.216370, ppl: 9.173970 +epoch: 1, batch: 45500, sum loss: 3906.927246, avg loss: 2.426663, ppl: 11.321040 +epoch: 1, batch: 45501, sum loss: 4735.415039, avg loss: 2.795404, ppl: 16.369244 +epoch: 1, batch: 45502, sum loss: 4301.075195, avg loss: 2.390814, ppl: 10.922384 +epoch: 1, batch: 45503, sum loss: 3786.466309, avg loss: 2.461942, ppl: 11.727561 +epoch: 1, batch: 45504, sum loss: 4854.822266, avg loss: 2.501196, ppl: 12.197078 +epoch: 1, batch: 45505, sum loss: 4899.585449, avg loss: 2.593746, ppl: 13.379795 +epoch: 1, batch: 45506, sum loss: 3475.811523, avg loss: 2.339039, ppl: 10.371261 +epoch: 1, batch: 45507, sum loss: 4304.916992, avg loss: 2.332024, ppl: 10.298769 +epoch: 1, batch: 45508, sum loss: 4110.257324, avg loss: 2.489556, ppl: 12.055926 +epoch: 1, batch: 45509, sum loss: 3888.083984, avg loss: 2.453050, ppl: 11.623744 +epoch: 1, batch: 45510, sum loss: 4746.431152, avg loss: 2.678573, ppl: 14.564294 +epoch: 1, batch: 45511, sum loss: 3709.977051, avg loss: 2.237622, ppl: 9.371018 +epoch: 1, batch: 45512, sum loss: 4425.243164, avg loss: 2.537410, ppl: 12.646873 +epoch: 1, batch: 45513, sum loss: 4367.186523, avg loss: 2.424868, ppl: 11.300734 +epoch: 1, batch: 45514, sum loss: 4035.892334, avg loss: 2.444514, ppl: 11.524947 +epoch: 1, batch: 45515, sum loss: 4376.205078, avg loss: 2.461308, ppl: 11.720129 +epoch: 1, batch: 45516, sum loss: 4566.317383, avg loss: 2.775877, ppl: 16.052694 +epoch: 1, batch: 45517, sum loss: 4402.073730, avg loss: 2.860347, ppl: 17.467583 +epoch: 1, batch: 45518, sum loss: 4560.016113, avg loss: 2.393709, ppl: 10.954049 +epoch: 1, batch: 45519, sum loss: 4723.140625, avg loss: 2.506975, ppl: 12.267763 +epoch: 1, batch: 45520, sum loss: 4909.766602, avg loss: 2.611578, ppl: 13.620527 +epoch: 1, batch: 45521, sum loss: 5068.065918, avg loss: 2.642370, ppl: 14.046457 +epoch: 1, batch: 45522, sum loss: 3608.167480, avg loss: 2.352130, ppl: 10.507927 +epoch: 1, batch: 45523, sum loss: 3691.955566, avg loss: 2.198901, ppl: 9.015104 +epoch: 1, batch: 45524, sum loss: 4219.497559, avg loss: 2.513102, ppl: 12.343154 +epoch: 1, batch: 45525, sum loss: 4217.106934, avg loss: 2.537369, ppl: 12.646352 +epoch: 1, batch: 45526, sum loss: 4571.041016, avg loss: 2.581051, ppl: 13.211014 +epoch: 1, batch: 45527, sum loss: 4140.732422, avg loss: 2.371554, ppl: 10.714025 +epoch: 1, batch: 45528, sum loss: 4533.719238, avg loss: 2.488320, ppl: 12.041032 +epoch: 1, batch: 45529, sum loss: 5249.873535, avg loss: 2.735734, ppl: 15.421058 +epoch: 1, batch: 45530, sum loss: 5173.743652, avg loss: 2.687659, ppl: 14.697229 +epoch: 1, batch: 45531, sum loss: 4234.811523, avg loss: 2.665080, ppl: 14.369093 +epoch: 1, batch: 45532, sum loss: 3911.031738, avg loss: 2.325227, ppl: 10.229002 +epoch: 1, batch: 45533, sum loss: 4242.285156, avg loss: 2.546390, ppl: 12.760948 +epoch: 1, batch: 45534, sum loss: 4253.440430, avg loss: 2.525796, ppl: 12.500844 +epoch: 1, batch: 45535, sum loss: 4300.246094, avg loss: 2.407753, ppl: 11.108969 +epoch: 1, batch: 45536, sum loss: 4057.839600, avg loss: 2.653918, ppl: 14.209597 +epoch: 1, batch: 45537, sum loss: 4263.400879, avg loss: 2.487398, ppl: 12.029938 +epoch: 1, batch: 45538, sum loss: 4289.118164, avg loss: 2.537940, ppl: 12.653575 +epoch: 1, batch: 45539, sum loss: 5296.431641, avg loss: 2.727308, ppl: 15.291663 +epoch: 1, batch: 45540, sum loss: 4440.858887, avg loss: 2.613808, ppl: 13.650930 +epoch: 1, batch: 45541, sum loss: 4263.864258, avg loss: 2.494947, ppl: 12.121091 +epoch: 1, batch: 45542, sum loss: 4753.335938, avg loss: 2.617476, ppl: 13.701095 +epoch: 1, batch: 45543, sum loss: 5165.920898, avg loss: 2.801476, ppl: 16.468929 +epoch: 1, batch: 45544, sum loss: 4414.919434, avg loss: 2.544622, ppl: 12.738415 +epoch: 1, batch: 45545, sum loss: 4685.339355, avg loss: 2.831021, ppl: 16.962769 +epoch: 1, batch: 45546, sum loss: 4342.229980, avg loss: 2.520157, ppl: 12.430543 +epoch: 1, batch: 45547, sum loss: 3365.089600, avg loss: 2.401920, ppl: 11.044359 +epoch: 1, batch: 45548, sum loss: 4934.470215, avg loss: 2.747478, ppl: 15.603231 +epoch: 1, batch: 45549, sum loss: 4338.747559, avg loss: 2.852563, ppl: 17.332142 +epoch: 1, batch: 45550, sum loss: 4510.022949, avg loss: 2.575684, ppl: 13.140306 +epoch: 1, batch: 45551, sum loss: 4751.191406, avg loss: 2.620624, ppl: 13.744302 +epoch: 1, batch: 45552, sum loss: 3920.844727, avg loss: 2.343601, ppl: 10.418689 +epoch: 1, batch: 45553, sum loss: 4271.019043, avg loss: 2.657759, ppl: 14.264290 +epoch: 1, batch: 45554, sum loss: 3549.281006, avg loss: 2.468207, ppl: 11.801264 +epoch: 1, batch: 45555, sum loss: 4258.958496, avg loss: 2.523080, ppl: 12.466934 +epoch: 1, batch: 45556, sum loss: 4324.572266, avg loss: 2.536406, ppl: 12.634183 +epoch: 1, batch: 45557, sum loss: 5483.837891, avg loss: 2.790757, ppl: 16.293352 +epoch: 1, batch: 45558, sum loss: 4291.456543, avg loss: 2.808545, ppl: 16.585766 +epoch: 1, batch: 45559, sum loss: 4036.964844, avg loss: 2.461564, ppl: 11.723130 +epoch: 1, batch: 45560, sum loss: 5423.850586, avg loss: 2.908231, ppl: 18.324350 +epoch: 1, batch: 45561, sum loss: 4632.717285, avg loss: 2.742876, ppl: 15.531587 +epoch: 1, batch: 45562, sum loss: 4003.293213, avg loss: 2.318062, ppl: 10.155973 +epoch: 1, batch: 45563, sum loss: 3595.662354, avg loss: 2.340926, ppl: 10.390856 +epoch: 1, batch: 45564, sum loss: 4631.244629, avg loss: 2.717867, ppl: 15.147972 +epoch: 1, batch: 45565, sum loss: 3931.860107, avg loss: 2.626493, ppl: 13.825199 +epoch: 1, batch: 45566, sum loss: 4138.160156, avg loss: 2.480911, ppl: 11.952154 +epoch: 1, batch: 45567, sum loss: 3887.941895, avg loss: 2.442174, ppl: 11.498015 +epoch: 1, batch: 45568, sum loss: 4223.001465, avg loss: 2.647650, ppl: 14.120812 +epoch: 1, batch: 45569, sum loss: 4761.851562, avg loss: 2.639607, ppl: 14.007700 +epoch: 1, batch: 45570, sum loss: 4212.244629, avg loss: 2.640906, ppl: 14.025904 +epoch: 1, batch: 45571, sum loss: 4862.999512, avg loss: 2.575741, ppl: 13.141055 +epoch: 1, batch: 45572, sum loss: 4299.755371, avg loss: 2.557856, ppl: 12.908108 +epoch: 1, batch: 45573, sum loss: 4279.497070, avg loss: 2.625458, ppl: 13.810902 +epoch: 1, batch: 45574, sum loss: 4525.179199, avg loss: 2.587295, ppl: 13.293764 +epoch: 1, batch: 45575, sum loss: 4423.117676, avg loss: 2.631242, ppl: 13.891009 +epoch: 1, batch: 45576, sum loss: 3536.176514, avg loss: 2.160157, ppl: 8.672497 +epoch: 1, batch: 45577, sum loss: 4478.464355, avg loss: 2.570875, ppl: 13.077261 +epoch: 1, batch: 45578, sum loss: 3492.796387, avg loss: 2.144135, ppl: 8.534657 +epoch: 1, batch: 45579, sum loss: 4544.425781, avg loss: 2.563128, ppl: 12.976344 +epoch: 1, batch: 45580, sum loss: 4170.679688, avg loss: 2.685563, ppl: 14.666461 +epoch: 1, batch: 45581, sum loss: 4901.133301, avg loss: 2.805457, ppl: 16.534628 +epoch: 1, batch: 45582, sum loss: 3542.965576, avg loss: 2.608958, ppl: 13.584894 +epoch: 1, batch: 45583, sum loss: 3807.327148, avg loss: 2.393040, ppl: 10.946726 +epoch: 1, batch: 45584, sum loss: 3814.611084, avg loss: 2.423514, ppl: 11.285445 +epoch: 1, batch: 45585, sum loss: 3772.093994, avg loss: 2.391943, ppl: 10.934719 +epoch: 1, batch: 45586, sum loss: 4230.814453, avg loss: 2.516844, ppl: 12.389431 +epoch: 1, batch: 45587, sum loss: 4452.830078, avg loss: 2.655236, ppl: 14.228337 +epoch: 1, batch: 45588, sum loss: 4272.594727, avg loss: 2.578512, ppl: 13.177518 +epoch: 1, batch: 45589, sum loss: 4150.281250, avg loss: 2.500170, ppl: 12.184559 +epoch: 1, batch: 45590, sum loss: 5050.179688, avg loss: 2.611261, ppl: 13.616215 +epoch: 1, batch: 45591, sum loss: 4882.565918, avg loss: 2.717065, ppl: 15.135835 +epoch: 1, batch: 45592, sum loss: 4320.891113, avg loss: 2.476155, ppl: 11.895442 +epoch: 1, batch: 45593, sum loss: 3783.518555, avg loss: 2.402234, ppl: 11.047828 +epoch: 1, batch: 45594, sum loss: 4495.441406, avg loss: 2.334082, ppl: 10.319981 +epoch: 1, batch: 45595, sum loss: 4916.698730, avg loss: 2.769971, ppl: 15.958170 +epoch: 1, batch: 45596, sum loss: 3825.125000, avg loss: 2.345264, ppl: 10.436025 +epoch: 1, batch: 45597, sum loss: 4973.692383, avg loss: 2.692849, ppl: 14.773712 +epoch: 1, batch: 45598, sum loss: 4784.346680, avg loss: 2.706079, ppl: 14.970454 +epoch: 1, batch: 45599, sum loss: 5055.683594, avg loss: 2.729851, ppl: 15.330599 +epoch: 1, batch: 45600, sum loss: 4070.160889, avg loss: 2.556634, ppl: 12.892344 +epoch: 1, batch: 45601, sum loss: 4407.642578, avg loss: 2.617365, ppl: 13.699579 +epoch: 1, batch: 45602, sum loss: 4477.317383, avg loss: 2.723429, ppl: 15.232465 +epoch: 1, batch: 45603, sum loss: 4196.858398, avg loss: 2.576340, ppl: 13.148928 +epoch: 1, batch: 45604, sum loss: 3546.814453, avg loss: 2.383612, ppl: 10.844000 +epoch: 1, batch: 45605, sum loss: 4853.721680, avg loss: 2.629318, ppl: 13.864314 +epoch: 1, batch: 45606, sum loss: 4550.867676, avg loss: 2.431019, ppl: 11.370463 +epoch: 1, batch: 45607, sum loss: 4735.230957, avg loss: 2.695066, ppl: 14.806496 +epoch: 1, batch: 45608, sum loss: 4968.448730, avg loss: 2.676966, ppl: 14.540909 +epoch: 1, batch: 45609, sum loss: 4576.473145, avg loss: 2.464444, ppl: 11.756946 +epoch: 1, batch: 45610, sum loss: 3558.392578, avg loss: 2.252147, ppl: 9.508130 +epoch: 1, batch: 45611, sum loss: 3924.171875, avg loss: 2.386966, ppl: 10.880430 +epoch: 1, batch: 45612, sum loss: 3874.448730, avg loss: 2.463095, ppl: 11.741096 +epoch: 1, batch: 45613, sum loss: 4169.152344, avg loss: 2.471341, ppl: 11.838313 +epoch: 1, batch: 45614, sum loss: 4088.208496, avg loss: 2.417628, ppl: 11.219213 +epoch: 1, batch: 45615, sum loss: 4566.770996, avg loss: 2.642807, ppl: 14.052597 +epoch: 1, batch: 45616, sum loss: 3982.418945, avg loss: 2.472017, ppl: 11.846314 +epoch: 1, batch: 45617, sum loss: 5024.784668, avg loss: 2.698596, ppl: 14.858849 +epoch: 1, batch: 45618, sum loss: 4005.665283, avg loss: 2.296826, ppl: 9.942578 +epoch: 1, batch: 45619, sum loss: 3617.086670, avg loss: 2.326101, ppl: 10.237944 +epoch: 1, batch: 45620, sum loss: 4816.852051, avg loss: 2.615012, ppl: 13.667382 +epoch: 1, batch: 45621, sum loss: 4287.865234, avg loss: 2.472817, ppl: 11.855800 +epoch: 1, batch: 45622, sum loss: 2892.802246, avg loss: 2.215009, ppl: 9.161496 +epoch: 1, batch: 45623, sum loss: 4026.013672, avg loss: 2.270736, ppl: 9.686522 +epoch: 1, batch: 45624, sum loss: 3804.102539, avg loss: 2.332374, ppl: 10.302374 +epoch: 1, batch: 45625, sum loss: 4046.084473, avg loss: 2.421355, ppl: 11.261110 +epoch: 1, batch: 45626, sum loss: 5014.347168, avg loss: 2.657312, ppl: 14.257908 +epoch: 1, batch: 45627, sum loss: 4732.782227, avg loss: 2.675400, ppl: 14.518156 +epoch: 1, batch: 45628, sum loss: 3927.924072, avg loss: 2.240687, ppl: 9.399786 +epoch: 1, batch: 45629, sum loss: 4495.361816, avg loss: 2.448454, ppl: 11.570447 +epoch: 1, batch: 45630, sum loss: 4778.550781, avg loss: 2.562226, ppl: 12.964639 +epoch: 1, batch: 45631, sum loss: 4547.565918, avg loss: 2.807140, ppl: 16.562475 +epoch: 1, batch: 45632, sum loss: 4774.842773, avg loss: 2.832054, ppl: 16.980301 +epoch: 1, batch: 45633, sum loss: 4665.783203, avg loss: 2.722161, ppl: 15.213156 +epoch: 1, batch: 45634, sum loss: 4055.705322, avg loss: 2.456515, ppl: 11.664086 +epoch: 1, batch: 45635, sum loss: 4632.787109, avg loss: 2.651853, ppl: 14.180288 +epoch: 1, batch: 45636, sum loss: 4364.438477, avg loss: 2.590171, ppl: 13.332052 +epoch: 1, batch: 45637, sum loss: 4556.732422, avg loss: 2.329618, ppl: 10.274014 +epoch: 1, batch: 45638, sum loss: 4382.964355, avg loss: 2.632411, ppl: 13.907260 +epoch: 1, batch: 45639, sum loss: 4028.963135, avg loss: 2.689561, ppl: 14.725216 +epoch: 1, batch: 45640, sum loss: 4467.655762, avg loss: 2.591448, ppl: 13.349085 +epoch: 1, batch: 45641, sum loss: 3410.015137, avg loss: 2.249351, ppl: 9.481576 +epoch: 1, batch: 45642, sum loss: 5194.065918, avg loss: 2.932843, ppl: 18.780958 +epoch: 1, batch: 45643, sum loss: 4200.848633, avg loss: 2.438101, ppl: 11.451278 +epoch: 1, batch: 45644, sum loss: 2891.308105, avg loss: 2.095151, ppl: 8.126668 +epoch: 1, batch: 45645, sum loss: 4592.313965, avg loss: 2.722178, ppl: 15.213421 +epoch: 1, batch: 45646, sum loss: 4482.778320, avg loss: 2.535508, ppl: 12.622844 +epoch: 1, batch: 45647, sum loss: 4749.824219, avg loss: 2.828960, ppl: 16.927849 +epoch: 1, batch: 45648, sum loss: 3901.271973, avg loss: 2.585336, ppl: 13.267746 +epoch: 1, batch: 45649, sum loss: 4707.368652, avg loss: 2.682261, ppl: 14.618111 +epoch: 1, batch: 45650, sum loss: 4175.663086, avg loss: 2.388824, ppl: 10.900661 +epoch: 1, batch: 45651, sum loss: 4689.909668, avg loss: 2.530982, ppl: 12.565840 +epoch: 1, batch: 45652, sum loss: 3702.931152, avg loss: 2.337709, ppl: 10.357480 +epoch: 1, batch: 45653, sum loss: 4302.273926, avg loss: 2.645925, ppl: 14.096476 +epoch: 1, batch: 45654, sum loss: 4358.619141, avg loss: 2.635199, ppl: 13.946089 +epoch: 1, batch: 45655, sum loss: 3938.023682, avg loss: 2.346856, ppl: 10.452651 +epoch: 1, batch: 45656, sum loss: 5321.553223, avg loss: 2.787613, ppl: 16.242207 +epoch: 1, batch: 45657, sum loss: 3941.513184, avg loss: 2.451190, ppl: 11.602142 +epoch: 1, batch: 45658, sum loss: 4608.951660, avg loss: 2.577713, ppl: 13.166997 +epoch: 1, batch: 45659, sum loss: 5495.634277, avg loss: 2.900071, ppl: 18.175434 +epoch: 1, batch: 45660, sum loss: 3616.061279, avg loss: 2.417153, ppl: 11.213892 +epoch: 1, batch: 45661, sum loss: 4460.494141, avg loss: 2.738179, ppl: 15.458812 +epoch: 1, batch: 45662, sum loss: 4768.662109, avg loss: 2.588850, ppl: 13.314455 +epoch: 1, batch: 45663, sum loss: 5073.775879, avg loss: 2.863305, ppl: 17.519331 +epoch: 1, batch: 45664, sum loss: 4916.427734, avg loss: 2.689512, ppl: 14.724489 +epoch: 1, batch: 45665, sum loss: 4524.336426, avg loss: 2.572107, ppl: 13.093388 +epoch: 1, batch: 45666, sum loss: 5226.499023, avg loss: 2.745010, ppl: 15.564772 +epoch: 1, batch: 45667, sum loss: 4552.875977, avg loss: 2.504332, ppl: 12.235387 +epoch: 1, batch: 45668, sum loss: 4827.751953, avg loss: 2.710697, ppl: 15.039761 +epoch: 1, batch: 45669, sum loss: 4694.211914, avg loss: 2.577821, ppl: 13.168413 +epoch: 1, batch: 45670, sum loss: 4546.967285, avg loss: 2.820699, ppl: 16.788586 +epoch: 1, batch: 45671, sum loss: 4581.763672, avg loss: 2.558215, ppl: 12.912749 +epoch: 1, batch: 45672, sum loss: 5156.406738, avg loss: 2.588558, ppl: 13.310560 +epoch: 1, batch: 45673, sum loss: 4680.450684, avg loss: 2.885605, ppl: 17.914396 +epoch: 1, batch: 45674, sum loss: 4266.629395, avg loss: 2.659993, ppl: 14.296195 +epoch: 1, batch: 45675, sum loss: 3424.200195, avg loss: 2.200643, ppl: 9.030817 +epoch: 1, batch: 45676, sum loss: 4550.819336, avg loss: 2.702387, ppl: 14.915290 +epoch: 1, batch: 45677, sum loss: 5093.090820, avg loss: 2.900393, ppl: 18.181297 +epoch: 1, batch: 45678, sum loss: 3916.278320, avg loss: 2.357784, ppl: 10.567503 +epoch: 1, batch: 45679, sum loss: 3464.651855, avg loss: 2.268927, ppl: 9.669024 +epoch: 1, batch: 45680, sum loss: 3253.370605, avg loss: 2.161708, ppl: 8.685962 +epoch: 1, batch: 45681, sum loss: 4224.664062, avg loss: 2.729111, ppl: 15.319265 +epoch: 1, batch: 45682, sum loss: 3916.294189, avg loss: 2.544701, ppl: 12.739413 +epoch: 1, batch: 45683, sum loss: 5010.954102, avg loss: 2.630422, ppl: 13.879627 +epoch: 1, batch: 45684, sum loss: 5093.794434, avg loss: 2.847286, ppl: 17.240923 +epoch: 1, batch: 45685, sum loss: 4130.881836, avg loss: 2.463257, ppl: 11.742994 +epoch: 1, batch: 45686, sum loss: 4670.553711, avg loss: 2.517819, ppl: 12.401516 +epoch: 1, batch: 45687, sum loss: 5519.479004, avg loss: 2.662556, ppl: 14.332880 +epoch: 1, batch: 45688, sum loss: 4466.949707, avg loss: 2.543821, ppl: 12.728214 +epoch: 1, batch: 45689, sum loss: 5424.417480, avg loss: 2.713566, ppl: 15.082959 +epoch: 1, batch: 45690, sum loss: 4438.449707, avg loss: 2.537707, ppl: 12.650631 +epoch: 1, batch: 45691, sum loss: 4019.573242, avg loss: 2.548873, ppl: 12.792684 +epoch: 1, batch: 45692, sum loss: 4938.553711, avg loss: 2.790143, ppl: 16.283356 +epoch: 1, batch: 45693, sum loss: 4803.628418, avg loss: 2.733994, ppl: 15.394257 +epoch: 1, batch: 45694, sum loss: 4003.115967, avg loss: 2.379974, ppl: 10.804621 +epoch: 1, batch: 45695, sum loss: 3737.392822, avg loss: 2.405015, ppl: 11.078593 +epoch: 1, batch: 45696, sum loss: 4244.735352, avg loss: 2.345158, ppl: 10.434920 +epoch: 1, batch: 45697, sum loss: 3892.827148, avg loss: 2.571220, ppl: 13.081777 +epoch: 1, batch: 45698, sum loss: 3250.701904, avg loss: 2.360713, ppl: 10.598506 +epoch: 1, batch: 45699, sum loss: 4683.974609, avg loss: 2.665893, ppl: 14.380791 +epoch: 1, batch: 45700, sum loss: 5677.818848, avg loss: 2.863247, ppl: 17.518314 +epoch: 1, batch: 45701, sum loss: 4994.150391, avg loss: 2.788470, ppl: 16.256134 +epoch: 1, batch: 45702, sum loss: 4160.453125, avg loss: 2.485337, ppl: 12.005159 +epoch: 1, batch: 45703, sum loss: 4665.625977, avg loss: 2.656962, ppl: 14.252929 +epoch: 1, batch: 45704, sum loss: 3694.500977, avg loss: 2.469586, ppl: 11.817557 +epoch: 1, batch: 45705, sum loss: 4442.834961, avg loss: 2.697532, ppl: 14.843049 +epoch: 1, batch: 45706, sum loss: 4719.150879, avg loss: 2.600083, ppl: 13.464857 +epoch: 1, batch: 45707, sum loss: 5856.799316, avg loss: 2.687838, ppl: 14.699862 +epoch: 1, batch: 45708, sum loss: 4452.782227, avg loss: 2.596374, ppl: 13.415010 +epoch: 1, batch: 45709, sum loss: 4294.602539, avg loss: 2.554790, ppl: 12.868600 +epoch: 1, batch: 45710, sum loss: 4603.714355, avg loss: 2.735421, ppl: 15.416239 +epoch: 1, batch: 45711, sum loss: 4343.601562, avg loss: 2.607204, ppl: 13.561077 +epoch: 1, batch: 45712, sum loss: 4095.716064, avg loss: 2.449591, ppl: 11.583610 +epoch: 1, batch: 45713, sum loss: 4585.502930, avg loss: 2.639898, ppl: 14.011775 +epoch: 1, batch: 45714, sum loss: 3304.502930, avg loss: 2.473430, ppl: 11.863072 +epoch: 1, batch: 45715, sum loss: 5027.092773, avg loss: 2.724711, ppl: 15.252012 +epoch: 1, batch: 45716, sum loss: 4370.627930, avg loss: 2.346016, ppl: 10.443877 +epoch: 1, batch: 45717, sum loss: 4819.247559, avg loss: 2.721201, ppl: 15.198567 +epoch: 1, batch: 45718, sum loss: 5727.245117, avg loss: 2.967485, ppl: 19.442953 +epoch: 1, batch: 45719, sum loss: 3534.079102, avg loss: 2.375053, ppl: 10.751585 +epoch: 1, batch: 45720, sum loss: 4356.891602, avg loss: 2.455970, ppl: 11.657740 +epoch: 1, batch: 45721, sum loss: 4638.898926, avg loss: 2.572878, ppl: 13.103480 +epoch: 1, batch: 45722, sum loss: 5127.265137, avg loss: 2.749204, ppl: 15.630180 +epoch: 1, batch: 45723, sum loss: 4327.632812, avg loss: 2.460280, ppl: 11.708092 +epoch: 1, batch: 45724, sum loss: 4758.920410, avg loss: 2.678064, ppl: 14.556889 +epoch: 1, batch: 45725, sum loss: 4957.331543, avg loss: 2.666666, ppl: 14.391900 +epoch: 1, batch: 45726, sum loss: 3763.011719, avg loss: 2.365186, ppl: 10.646024 +epoch: 1, batch: 45727, sum loss: 3868.177979, avg loss: 2.373115, ppl: 10.730770 +epoch: 1, batch: 45728, sum loss: 4481.766113, avg loss: 2.529213, ppl: 12.543635 +epoch: 1, batch: 45729, sum loss: 4949.149902, avg loss: 2.824857, ppl: 16.858541 +epoch: 1, batch: 45730, sum loss: 4150.492676, avg loss: 2.374424, ppl: 10.744820 +epoch: 1, batch: 45731, sum loss: 4112.075195, avg loss: 2.505835, ppl: 12.253784 +epoch: 1, batch: 45732, sum loss: 4180.511719, avg loss: 2.510818, ppl: 12.315000 +epoch: 1, batch: 45733, sum loss: 3792.076416, avg loss: 2.517979, ppl: 12.403505 +epoch: 1, batch: 45734, sum loss: 4636.614746, avg loss: 2.759890, ppl: 15.798102 +epoch: 1, batch: 45735, sum loss: 4385.075684, avg loss: 2.756176, ppl: 15.739536 +epoch: 1, batch: 45736, sum loss: 4057.665039, avg loss: 2.393903, ppl: 10.956170 +epoch: 1, batch: 45737, sum loss: 2754.093750, avg loss: 2.250077, ppl: 9.488462 +epoch: 1, batch: 45738, sum loss: 5932.570312, avg loss: 2.723861, ppl: 15.239054 +epoch: 1, batch: 45739, sum loss: 4327.380859, avg loss: 2.892634, ppl: 18.040773 +epoch: 1, batch: 45740, sum loss: 3601.110107, avg loss: 2.444746, ppl: 11.527616 +epoch: 1, batch: 45741, sum loss: 4500.435059, avg loss: 2.670881, ppl: 14.452701 +epoch: 1, batch: 45742, sum loss: 4452.456055, avg loss: 2.666142, ppl: 14.384360 +epoch: 1, batch: 45743, sum loss: 6399.693359, avg loss: 2.874975, ppl: 17.724976 +epoch: 1, batch: 45744, sum loss: 4655.879883, avg loss: 2.740365, ppl: 15.492640 +epoch: 1, batch: 45745, sum loss: 3300.660156, avg loss: 2.209277, ppl: 9.109130 +epoch: 1, batch: 45746, sum loss: 4237.636719, avg loss: 2.504514, ppl: 12.237604 +epoch: 1, batch: 45747, sum loss: 4786.102539, avg loss: 2.741181, ppl: 15.505292 +epoch: 1, batch: 45748, sum loss: 5779.466797, avg loss: 2.862539, ppl: 17.505922 +epoch: 1, batch: 45749, sum loss: 4166.416992, avg loss: 2.490387, ppl: 12.065942 +epoch: 1, batch: 45750, sum loss: 3750.635254, avg loss: 2.418205, ppl: 11.225686 +epoch: 1, batch: 45751, sum loss: 3778.139648, avg loss: 2.296741, ppl: 9.941732 +epoch: 1, batch: 45752, sum loss: 4364.725586, avg loss: 2.502710, ppl: 12.215549 +epoch: 1, batch: 45753, sum loss: 5297.154785, avg loss: 2.596644, ppl: 13.418634 +epoch: 1, batch: 45754, sum loss: 4388.331055, avg loss: 2.772161, ppl: 15.993158 +epoch: 1, batch: 45755, sum loss: 3851.823486, avg loss: 2.527443, ppl: 12.521450 +epoch: 1, batch: 45756, sum loss: 5642.812500, avg loss: 3.020777, ppl: 20.507229 +epoch: 1, batch: 45757, sum loss: 5004.494141, avg loss: 2.690588, ppl: 14.740344 +epoch: 1, batch: 45758, sum loss: 4013.646973, avg loss: 2.517972, ppl: 12.403414 +epoch: 1, batch: 45759, sum loss: 4438.812012, avg loss: 2.634310, ppl: 13.933692 +epoch: 1, batch: 45760, sum loss: 4337.245117, avg loss: 2.714171, ppl: 15.092093 +epoch: 1, batch: 45761, sum loss: 4791.008301, avg loss: 2.661671, ppl: 14.320204 +epoch: 1, batch: 45762, sum loss: 4380.262207, avg loss: 2.436186, ppl: 11.429364 +epoch: 1, batch: 45763, sum loss: 4608.896973, avg loss: 2.593639, ppl: 13.378368 +epoch: 1, batch: 45764, sum loss: 3869.537842, avg loss: 2.509428, ppl: 12.297894 +epoch: 1, batch: 45765, sum loss: 4678.399414, avg loss: 2.685648, ppl: 14.667706 +epoch: 1, batch: 45766, sum loss: 4562.558105, avg loss: 2.693364, ppl: 14.781312 +epoch: 1, batch: 45767, sum loss: 5141.724121, avg loss: 2.742253, ppl: 15.521914 +epoch: 1, batch: 45768, sum loss: 4724.300293, avg loss: 2.727656, ppl: 15.296990 +epoch: 1, batch: 45769, sum loss: 4020.924316, avg loss: 2.442846, ppl: 11.505740 +epoch: 1, batch: 45770, sum loss: 4053.037109, avg loss: 2.515852, ppl: 12.377147 +epoch: 1, batch: 45771, sum loss: 4649.688477, avg loss: 2.625459, ppl: 13.810915 +epoch: 1, batch: 45772, sum loss: 4537.551758, avg loss: 2.638112, ppl: 13.986766 +epoch: 1, batch: 45773, sum loss: 4439.372559, avg loss: 2.795575, ppl: 16.372038 +epoch: 1, batch: 45774, sum loss: 5165.191895, avg loss: 2.785972, ppl: 16.215570 +pass_id = 1 time_consumed = 27193.0101941 +epoch: 2, batch: 0, sum loss: 4351.029297, avg loss: 2.748597, ppl: 15.620703 +epoch: 2, batch: 1, sum loss: 4907.828125, avg loss: 2.665849, ppl: 14.380157 +epoch: 2, batch: 2, sum loss: 3667.678223, avg loss: 2.383157, ppl: 10.839066 +epoch: 2, batch: 3, sum loss: 4469.970703, avg loss: 2.573386, ppl: 13.110137 +epoch: 2, batch: 4, sum loss: 3567.686035, avg loss: 2.354908, ppl: 10.537165 +epoch: 2, batch: 5, sum loss: 4304.531250, avg loss: 2.386104, ppl: 10.871057 +epoch: 2, batch: 6, sum loss: 5702.746094, avg loss: 2.818955, ppl: 16.759327 +epoch: 2, batch: 7, sum loss: 3864.446777, avg loss: 2.357808, ppl: 10.567758 +epoch: 2, batch: 8, sum loss: 4764.937012, avg loss: 2.560418, ppl: 12.941221 +epoch: 2, batch: 9, sum loss: 4408.924316, avg loss: 2.425151, ppl: 11.303935 +epoch: 2, batch: 10, sum loss: 3885.932129, avg loss: 2.583731, ppl: 13.246474 +epoch: 2, batch: 11, sum loss: 5313.251465, avg loss: 2.684816, ppl: 14.655510 +epoch: 2, batch: 12, sum loss: 4442.420898, avg loss: 2.623994, ppl: 13.790689 +epoch: 2, batch: 13, sum loss: 4696.919922, avg loss: 2.735539, ppl: 15.418047 +epoch: 2, batch: 14, sum loss: 4588.110840, avg loss: 2.615798, ppl: 13.678134 +epoch: 2, batch: 15, sum loss: 5337.842285, avg loss: 2.926449, ppl: 18.661240 +epoch: 2, batch: 16, sum loss: 3772.003662, avg loss: 2.433551, ppl: 11.399287 +epoch: 2, batch: 17, sum loss: 3982.817871, avg loss: 2.457013, ppl: 11.669900 +epoch: 2, batch: 18, sum loss: 4411.242676, avg loss: 2.393512, ppl: 10.951890 +epoch: 2, batch: 19, sum loss: 3556.606934, avg loss: 2.323061, ppl: 10.206875 +epoch: 2, batch: 20, sum loss: 4886.727051, avg loss: 2.954490, ppl: 19.191940 +epoch: 2, batch: 21, sum loss: 3987.393066, avg loss: 2.447755, ppl: 11.562361 +epoch: 2, batch: 22, sum loss: 5190.567383, avg loss: 2.880448, ppl: 17.822258 +epoch: 2, batch: 23, sum loss: 4617.381836, avg loss: 2.636997, ppl: 13.971189 +epoch: 2, batch: 24, sum loss: 3552.784180, avg loss: 2.149295, ppl: 8.578805 +epoch: 2, batch: 25, sum loss: 4416.315430, avg loss: 2.561668, ppl: 12.957415 +epoch: 2, batch: 26, sum loss: 3797.223145, avg loss: 2.565691, ppl: 13.009651 +epoch: 2, batch: 27, sum loss: 4868.607910, avg loss: 2.675059, ppl: 14.513210 +epoch: 2, batch: 28, sum loss: 4380.897949, avg loss: 2.558936, ppl: 12.922056 +epoch: 2, batch: 29, sum loss: 3850.541260, avg loss: 2.184085, ppl: 8.882514 +epoch: 2, batch: 30, sum loss: 3735.886963, avg loss: 2.345190, ppl: 10.435251 +epoch: 2, batch: 31, sum loss: 4649.665039, avg loss: 2.511975, ppl: 12.329251 +epoch: 2, batch: 32, sum loss: 3757.751221, avg loss: 2.419672, ppl: 11.242177 +epoch: 2, batch: 33, sum loss: 3359.542480, avg loss: 2.148045, ppl: 8.568092 +epoch: 2, batch: 34, sum loss: 4816.286133, avg loss: 2.790432, ppl: 16.288057 +epoch: 2, batch: 35, sum loss: 4134.721191, avg loss: 2.395551, ppl: 10.974243 +epoch: 2, batch: 36, sum loss: 3628.845947, avg loss: 2.338174, ppl: 10.362299 +epoch: 2, batch: 37, sum loss: 4023.706299, avg loss: 2.577647, ppl: 13.166119 +epoch: 2, batch: 38, sum loss: 3551.394043, avg loss: 2.345703, ppl: 10.440609 +epoch: 2, batch: 39, sum loss: 4095.465820, avg loss: 2.302117, ppl: 9.995318 +epoch: 2, batch: 40, sum loss: 3891.711426, avg loss: 2.282529, ppl: 9.801435 +epoch: 2, batch: 41, sum loss: 4951.709961, avg loss: 2.704375, ppl: 14.944970 +epoch: 2, batch: 42, sum loss: 4300.374023, avg loss: 2.516310, ppl: 12.382822 +epoch: 2, batch: 43, sum loss: 4061.841309, avg loss: 2.484307, ppl: 11.992802 +epoch: 2, batch: 44, sum loss: 3840.552734, avg loss: 2.344660, ppl: 10.429722 +epoch: 2, batch: 45, sum loss: 4492.898926, avg loss: 2.320712, ppl: 10.182923 +epoch: 2, batch: 46, sum loss: 4223.651367, avg loss: 2.653047, ppl: 14.197236 +epoch: 2, batch: 47, sum loss: 4168.093750, avg loss: 2.483965, ppl: 11.988708 +epoch: 2, batch: 48, sum loss: 5338.177246, avg loss: 2.715248, ppl: 15.108355 +epoch: 2, batch: 49, sum loss: 3724.520020, avg loss: 2.448731, ppl: 11.573652 +epoch: 2, batch: 50, sum loss: 4178.738281, avg loss: 2.563643, ppl: 12.983031 +epoch: 2, batch: 51, sum loss: 4359.193359, avg loss: 2.326144, ppl: 10.238383 +epoch: 2, batch: 52, sum loss: 3696.829834, avg loss: 2.525157, ppl: 12.492856 +epoch: 2, batch: 53, sum loss: 3549.887207, avg loss: 2.274111, ppl: 9.719275 +epoch: 2, batch: 54, sum loss: 4521.617676, avg loss: 2.545956, ppl: 12.755415 +epoch: 2, batch: 55, sum loss: 5175.818848, avg loss: 2.834512, ppl: 17.022091 +epoch: 2, batch: 56, sum loss: 4579.820312, avg loss: 2.737490, ppl: 15.448157 +epoch: 2, batch: 57, sum loss: 3602.931152, avg loss: 2.260308, ppl: 9.586041 +epoch: 2, batch: 58, sum loss: 4609.220703, avg loss: 2.774967, ppl: 16.038101 +epoch: 2, batch: 59, sum loss: 3939.824951, avg loss: 2.331257, ppl: 10.290873 +epoch: 2, batch: 60, sum loss: 4813.330078, avg loss: 2.687510, ppl: 14.695036 +epoch: 2, batch: 61, sum loss: 3599.952148, avg loss: 2.434045, ppl: 11.404918 +epoch: 2, batch: 62, sum loss: 4029.818359, avg loss: 2.635591, ppl: 13.951556 +epoch: 2, batch: 63, sum loss: 4012.568848, avg loss: 2.629468, ppl: 13.866397 +epoch: 2, batch: 64, sum loss: 4732.712891, avg loss: 2.574925, ppl: 13.130338 +epoch: 2, batch: 65, sum loss: 3511.575439, avg loss: 2.582041, ppl: 13.224098 +epoch: 2, batch: 66, sum loss: 3820.940918, avg loss: 2.443057, ppl: 11.508165 +epoch: 2, batch: 67, sum loss: 5283.791504, avg loss: 2.691692, ppl: 14.756624 +epoch: 2, batch: 68, sum loss: 3948.481689, avg loss: 2.562285, ppl: 12.965415 +epoch: 2, batch: 69, sum loss: 4473.534668, avg loss: 2.596364, ppl: 13.414870 +epoch: 2, batch: 70, sum loss: 4375.749512, avg loss: 2.570946, ppl: 13.078184 +epoch: 2, batch: 71, sum loss: 4251.949219, avg loss: 2.553723, ppl: 12.854877 +epoch: 2, batch: 72, sum loss: 5859.353027, avg loss: 2.823785, ppl: 16.840464 +epoch: 2, batch: 73, sum loss: 5519.442383, avg loss: 3.014442, ppl: 20.377712 +epoch: 2, batch: 74, sum loss: 5029.389160, avg loss: 2.831863, ppl: 16.977066 +epoch: 2, batch: 75, sum loss: 3618.197021, avg loss: 2.237599, ppl: 9.370802 +epoch: 2, batch: 76, sum loss: 4786.746582, avg loss: 2.755755, ppl: 15.732918 +epoch: 2, batch: 77, sum loss: 5126.487793, avg loss: 2.973601, ppl: 19.562239 +epoch: 2, batch: 78, sum loss: 4168.798828, avg loss: 2.565415, ppl: 13.006050 +epoch: 2, batch: 79, sum loss: 5332.918457, avg loss: 2.746096, ppl: 15.581684 +epoch: 2, batch: 80, sum loss: 4318.292969, avg loss: 2.453575, ppl: 11.629853 +epoch: 2, batch: 81, sum loss: 3978.098633, avg loss: 2.425670, ppl: 11.309803 +epoch: 2, batch: 82, sum loss: 4247.606934, avg loss: 2.407940, ppl: 11.111053 +epoch: 2, batch: 83, sum loss: 4147.013184, avg loss: 2.455307, ppl: 11.650007 +epoch: 2, batch: 84, sum loss: 4370.698730, avg loss: 2.653733, ppl: 14.206978 +epoch: 2, batch: 85, sum loss: 4899.250977, avg loss: 2.807594, ppl: 16.569996 +epoch: 2, batch: 86, sum loss: 4497.766113, avg loss: 2.650422, ppl: 14.160014 +epoch: 2, batch: 87, sum loss: 4196.040039, avg loss: 2.505099, ppl: 12.244766 +epoch: 2, batch: 88, sum loss: 3436.647461, avg loss: 2.301840, ppl: 9.992555 +epoch: 2, batch: 89, sum loss: 3866.347656, avg loss: 2.447056, ppl: 11.554276 +epoch: 2, batch: 90, sum loss: 3966.697266, avg loss: 2.579127, ppl: 13.185620 +epoch: 2, batch: 91, sum loss: 4595.305176, avg loss: 2.834858, ppl: 17.027985 +epoch: 2, batch: 92, sum loss: 3613.944336, avg loss: 2.460139, ppl: 11.706439 +epoch: 2, batch: 93, sum loss: 4513.996094, avg loss: 2.610755, ppl: 13.609328 +epoch: 2, batch: 94, sum loss: 3939.531738, avg loss: 2.590093, ppl: 13.331017 +epoch: 2, batch: 95, sum loss: 3965.975098, avg loss: 2.279296, ppl: 9.769799 +epoch: 2, batch: 96, sum loss: 4043.333008, avg loss: 2.415372, ppl: 11.193938 +epoch: 2, batch: 97, sum loss: 3858.218506, avg loss: 2.403874, ppl: 11.065968 +epoch: 2, batch: 98, sum loss: 5871.574219, avg loss: 2.865580, ppl: 17.559240 +epoch: 2, batch: 99, sum loss: 5036.280762, avg loss: 2.851801, ppl: 17.318949 +epoch: 2, batch: 100, sum loss: 4189.141602, avg loss: 2.658085, ppl: 14.268932 +epoch: 2, batch: 101, sum loss: 4764.269531, avg loss: 2.739661, ppl: 15.481729 +epoch: 2, batch: 102, sum loss: 5478.794434, avg loss: 2.831418, ppl: 16.969511 +epoch: 2, batch: 103, sum loss: 4263.818848, avg loss: 2.556246, ppl: 12.887351 +epoch: 2, batch: 104, sum loss: 3711.452637, avg loss: 2.334247, ppl: 10.321684 +epoch: 2, batch: 105, sum loss: 4156.368164, avg loss: 2.548356, ppl: 12.786064 +epoch: 2, batch: 106, sum loss: 4304.778320, avg loss: 2.562368, ppl: 12.966488 +epoch: 2, batch: 107, sum loss: 4292.583496, avg loss: 2.614241, ppl: 13.656845 +epoch: 2, batch: 108, sum loss: 3642.397217, avg loss: 2.529443, ppl: 12.546510 +epoch: 2, batch: 109, sum loss: 3813.438965, avg loss: 2.578390, ppl: 13.175909 +epoch: 2, batch: 110, sum loss: 4698.735352, avg loss: 2.641223, ppl: 14.030348 +epoch: 2, batch: 111, sum loss: 3840.342285, avg loss: 2.533207, ppl: 12.593835 +epoch: 2, batch: 112, sum loss: 4422.679688, avg loss: 2.716634, ppl: 15.129305 +epoch: 2, batch: 113, sum loss: 3119.344727, avg loss: 2.064424, ppl: 7.880758 +epoch: 2, batch: 114, sum loss: 4732.870117, avg loss: 2.442142, ppl: 11.497637 +epoch: 2, batch: 115, sum loss: 3627.340820, avg loss: 2.403805, ppl: 11.065194 +epoch: 2, batch: 116, sum loss: 4095.016113, avg loss: 2.480325, ppl: 11.945143 +epoch: 2, batch: 117, sum loss: 5206.075195, avg loss: 2.617433, ppl: 13.700514 +epoch: 2, batch: 118, sum loss: 5352.943848, avg loss: 2.838253, ppl: 17.085882 +epoch: 2, batch: 119, sum loss: 4296.094238, avg loss: 2.516751, ppl: 12.388286 +epoch: 2, batch: 120, sum loss: 2899.463379, avg loss: 1.947255, ppl: 7.009424 +epoch: 2, batch: 121, sum loss: 4228.986328, avg loss: 2.629966, ppl: 13.873305 +epoch: 2, batch: 122, sum loss: 3535.395752, avg loss: 2.270646, ppl: 9.685657 +epoch: 2, batch: 123, sum loss: 4597.784180, avg loss: 2.498796, ppl: 12.167832 +epoch: 2, batch: 124, sum loss: 4786.320312, avg loss: 2.833819, ppl: 17.010302 +epoch: 2, batch: 125, sum loss: 3888.483887, avg loss: 2.556531, ppl: 12.891024 +epoch: 2, batch: 126, sum loss: 3539.057617, avg loss: 2.502870, ppl: 12.217504 +epoch: 2, batch: 127, sum loss: 4804.403320, avg loss: 2.490618, ppl: 12.068738 +epoch: 2, batch: 128, sum loss: 4556.469238, avg loss: 2.734975, ppl: 15.409364 +epoch: 2, batch: 129, sum loss: 4157.505371, avg loss: 2.460062, ppl: 11.705541 +epoch: 2, batch: 130, sum loss: 4822.629883, avg loss: 2.649797, ppl: 14.151162 +epoch: 2, batch: 131, sum loss: 5128.665039, avg loss: 2.882892, ppl: 17.865870 +epoch: 2, batch: 132, sum loss: 3681.981445, avg loss: 2.235569, ppl: 9.351797 +epoch: 2, batch: 133, sum loss: 4346.852051, avg loss: 2.629674, ppl: 13.869254 +epoch: 2, batch: 134, sum loss: 4178.665039, avg loss: 2.597057, ppl: 13.424174 +epoch: 2, batch: 135, sum loss: 4074.028809, avg loss: 2.410668, ppl: 11.141400 +epoch: 2, batch: 136, sum loss: 4393.044922, avg loss: 2.458335, ppl: 11.685341 +epoch: 2, batch: 137, sum loss: 4457.192383, avg loss: 2.359551, ppl: 10.586202 +epoch: 2, batch: 138, sum loss: 3935.009277, avg loss: 2.493669, ppl: 12.105608 +epoch: 2, batch: 139, sum loss: 5014.340332, avg loss: 2.762722, ppl: 15.842909 +epoch: 2, batch: 140, sum loss: 3846.373291, avg loss: 2.468789, ppl: 11.808140 +epoch: 2, batch: 141, sum loss: 4028.893066, avg loss: 2.518058, ppl: 12.404485 +epoch: 2, batch: 142, sum loss: 3564.812012, avg loss: 2.487657, ppl: 12.033048 +epoch: 2, batch: 143, sum loss: 4357.450684, avg loss: 2.416778, ppl: 11.209684 +epoch: 2, batch: 144, sum loss: 4475.302734, avg loss: 2.638740, ppl: 13.995556 +epoch: 2, batch: 145, sum loss: 4379.572754, avg loss: 2.855002, ppl: 17.374466 +epoch: 2, batch: 146, sum loss: 4546.923340, avg loss: 2.638957, ppl: 13.998599 +epoch: 2, batch: 147, sum loss: 3680.543945, avg loss: 2.413471, ppl: 11.172679 +epoch: 2, batch: 148, sum loss: 3527.842529, avg loss: 2.320949, ppl: 10.185336 +epoch: 2, batch: 149, sum loss: 3872.809082, avg loss: 2.452697, ppl: 11.619646 +epoch: 2, batch: 150, sum loss: 3776.638184, avg loss: 2.544904, ppl: 12.742011 +epoch: 2, batch: 151, sum loss: 4278.903320, avg loss: 2.556095, ppl: 12.885406 +epoch: 2, batch: 152, sum loss: 5074.308594, avg loss: 2.717894, ppl: 15.148391 +epoch: 2, batch: 153, sum loss: 4140.439941, avg loss: 2.447068, ppl: 11.554424 +epoch: 2, batch: 154, sum loss: 3660.430908, avg loss: 2.581404, ppl: 13.215683 +epoch: 2, batch: 155, sum loss: 3611.423340, avg loss: 2.109476, ppl: 8.243921 +epoch: 2, batch: 156, sum loss: 4716.550781, avg loss: 2.581582, ppl: 13.218037 +epoch: 2, batch: 157, sum loss: 3981.953125, avg loss: 2.380127, ppl: 10.806280 +epoch: 2, batch: 158, sum loss: 5158.702637, avg loss: 2.875531, ppl: 17.734842 +epoch: 2, batch: 159, sum loss: 4037.362793, avg loss: 2.686203, ppl: 14.675842 +epoch: 2, batch: 160, sum loss: 4560.960449, avg loss: 2.616730, ppl: 13.690881 +epoch: 2, batch: 161, sum loss: 4317.473145, avg loss: 2.422825, ppl: 11.277670 +epoch: 2, batch: 162, sum loss: 3312.341309, avg loss: 2.301835, ppl: 9.992505 +epoch: 2, batch: 163, sum loss: 3342.442383, avg loss: 2.308317, ppl: 10.057481 +epoch: 2, batch: 164, sum loss: 3853.817383, avg loss: 2.602172, ppl: 13.493018 +epoch: 2, batch: 165, sum loss: 4908.556641, avg loss: 2.717916, ppl: 15.148724 +epoch: 2, batch: 166, sum loss: 4038.741211, avg loss: 2.223976, ppl: 9.244016 +epoch: 2, batch: 167, sum loss: 4384.034180, avg loss: 2.575813, ppl: 13.142001 +epoch: 2, batch: 168, sum loss: 3943.773438, avg loss: 2.486616, ppl: 12.020535 +epoch: 2, batch: 169, sum loss: 4614.923828, avg loss: 2.716259, ppl: 15.123639 +epoch: 2, batch: 170, sum loss: 4076.109863, avg loss: 2.523907, ppl: 12.477249 +epoch: 2, batch: 171, sum loss: 4875.088867, avg loss: 2.738814, ppl: 15.468630 +epoch: 2, batch: 172, sum loss: 4639.295898, avg loss: 2.722592, ppl: 15.219715 +epoch: 2, batch: 173, sum loss: 4362.219727, avg loss: 2.546538, ppl: 12.762844 +epoch: 2, batch: 174, sum loss: 4534.773926, avg loss: 2.408271, ppl: 11.114725 +epoch: 2, batch: 175, sum loss: 4676.927734, avg loss: 2.631923, ppl: 13.900477 +epoch: 2, batch: 176, sum loss: 4882.570312, avg loss: 2.702031, ppl: 14.909982 +epoch: 2, batch: 177, sum loss: 4675.116211, avg loss: 2.477539, ppl: 11.911914 +epoch: 2, batch: 178, sum loss: 5356.531250, avg loss: 2.802999, ppl: 16.494040 +epoch: 2, batch: 179, sum loss: 4259.652344, avg loss: 2.675661, ppl: 14.521947 +epoch: 2, batch: 180, sum loss: 4190.237305, avg loss: 2.359368, ppl: 10.584258 +epoch: 2, batch: 181, sum loss: 4719.616699, avg loss: 2.456854, ppl: 11.668047 +epoch: 2, batch: 182, sum loss: 5049.553711, avg loss: 2.711898, ppl: 15.057826 +epoch: 2, batch: 183, sum loss: 4729.010254, avg loss: 2.741455, ppl: 15.509540 +epoch: 2, batch: 184, sum loss: 4966.545410, avg loss: 2.637571, ppl: 13.979201 +epoch: 2, batch: 185, sum loss: 4823.658203, avg loss: 2.765859, ppl: 15.892684 +epoch: 2, batch: 186, sum loss: 5012.266113, avg loss: 2.778418, ppl: 16.093542 +epoch: 2, batch: 187, sum loss: 3858.883789, avg loss: 2.437703, ppl: 11.446716 +epoch: 2, batch: 188, sum loss: 4425.300781, avg loss: 2.629412, ppl: 13.865617 +epoch: 2, batch: 189, sum loss: 3668.596680, avg loss: 2.423115, ppl: 11.280951 +epoch: 2, batch: 190, sum loss: 3982.245361, avg loss: 2.498272, ppl: 12.161460 +epoch: 2, batch: 191, sum loss: 3741.774170, avg loss: 2.460075, ppl: 11.705691 +epoch: 2, batch: 192, sum loss: 4074.918945, avg loss: 2.753324, ppl: 15.694712 +epoch: 2, batch: 193, sum loss: 4153.445312, avg loss: 2.400835, ppl: 11.032388 +epoch: 2, batch: 194, sum loss: 5232.082031, avg loss: 2.712329, ppl: 15.064315 +epoch: 2, batch: 195, sum loss: 4311.192383, avg loss: 2.411181, ppl: 11.147123 +epoch: 2, batch: 196, sum loss: 3993.182617, avg loss: 2.478698, ppl: 11.925730 +epoch: 2, batch: 197, sum loss: 5291.947266, avg loss: 2.756223, ppl: 15.740275 +epoch: 2, batch: 198, sum loss: 4706.458008, avg loss: 2.512791, ppl: 12.339326 +epoch: 2, batch: 199, sum loss: 3962.937988, avg loss: 2.312099, ppl: 10.095595 +epoch: 2, batch: 200, sum loss: 4020.328125, avg loss: 2.546123, ppl: 12.757545 +epoch: 2, batch: 201, sum loss: 4650.477539, avg loss: 2.619987, ppl: 13.735548 +epoch: 2, batch: 202, sum loss: 4998.927246, avg loss: 2.651951, ppl: 14.181678 +epoch: 2, batch: 203, sum loss: 4911.242676, avg loss: 2.640453, ppl: 14.019554 +epoch: 2, batch: 204, sum loss: 4515.679199, avg loss: 2.446197, ppl: 11.544357 +epoch: 2, batch: 205, sum loss: 4861.138184, avg loss: 2.561190, ppl: 12.951219 +epoch: 2, batch: 206, sum loss: 3459.127197, avg loss: 2.329379, ppl: 10.271557 +epoch: 2, batch: 207, sum loss: 4409.747070, avg loss: 2.810546, ppl: 16.618992 +epoch: 2, batch: 208, sum loss: 4549.126953, avg loss: 2.720770, ppl: 15.192014 +epoch: 2, batch: 209, sum loss: 3917.867432, avg loss: 2.340423, ppl: 10.385625 +epoch: 2, batch: 210, sum loss: 3685.080322, avg loss: 2.418032, ppl: 11.223745 +epoch: 2, batch: 211, sum loss: 6027.596680, avg loss: 3.051948, ppl: 21.156509 +epoch: 2, batch: 212, sum loss: 4717.155273, avg loss: 2.712568, ppl: 15.067918 +epoch: 2, batch: 213, sum loss: 4402.219238, avg loss: 2.580433, ppl: 13.202855 +epoch: 2, batch: 214, sum loss: 3839.000977, avg loss: 2.388924, ppl: 10.901758 +epoch: 2, batch: 215, sum loss: 4487.031738, avg loss: 2.545112, ppl: 12.744651 +epoch: 2, batch: 216, sum loss: 4900.844238, avg loss: 2.709146, ppl: 15.016442 +epoch: 2, batch: 217, sum loss: 4509.937012, avg loss: 2.711928, ppl: 15.058286 +epoch: 2, batch: 218, sum loss: 4201.864258, avg loss: 2.592143, ppl: 13.358372 +epoch: 2, batch: 219, sum loss: 4763.742188, avg loss: 2.670259, ppl: 14.443713 +epoch: 2, batch: 220, sum loss: 3791.400635, avg loss: 2.502575, ppl: 12.213901 +epoch: 2, batch: 221, sum loss: 4996.700684, avg loss: 2.718553, ppl: 15.158373 +epoch: 2, batch: 222, sum loss: 5052.547363, avg loss: 2.833734, ppl: 17.008854 +epoch: 2, batch: 223, sum loss: 3942.683350, avg loss: 2.412903, ppl: 11.166328 +epoch: 2, batch: 224, sum loss: 4647.609375, avg loss: 2.586316, ppl: 13.280753 +epoch: 2, batch: 225, sum loss: 5039.549316, avg loss: 2.770505, ppl: 15.966702 +epoch: 2, batch: 226, sum loss: 4630.887695, avg loss: 2.527777, ppl: 12.525633 +epoch: 2, batch: 227, sum loss: 3669.805176, avg loss: 2.392311, ppl: 10.938745 +epoch: 2, batch: 228, sum loss: 3813.916016, avg loss: 2.225155, ppl: 9.254918 +epoch: 2, batch: 229, sum loss: 3592.145020, avg loss: 2.279280, ppl: 9.769641 +epoch: 2, batch: 230, sum loss: 3703.646484, avg loss: 2.487338, ppl: 12.029213 +epoch: 2, batch: 231, sum loss: 4602.068359, avg loss: 2.581082, ppl: 13.211420 +epoch: 2, batch: 232, sum loss: 4672.228516, avg loss: 2.581342, ppl: 13.214860 +epoch: 2, batch: 233, sum loss: 3513.197021, avg loss: 2.446516, ppl: 11.548043 +epoch: 2, batch: 234, sum loss: 4789.249023, avg loss: 3.010213, ppl: 20.291725 +epoch: 2, batch: 235, sum loss: 3911.609863, avg loss: 2.328339, ppl: 10.260887 +epoch: 2, batch: 236, sum loss: 3915.883545, avg loss: 2.586449, ppl: 13.282520 +epoch: 2, batch: 237, sum loss: 5280.222656, avg loss: 2.886945, ppl: 17.938429 +epoch: 2, batch: 238, sum loss: 4906.149414, avg loss: 2.885970, ppl: 17.920948 +epoch: 2, batch: 239, sum loss: 5023.631348, avg loss: 2.953340, ppl: 19.169870 +epoch: 2, batch: 240, sum loss: 4294.253906, avg loss: 2.393676, ppl: 10.953681 +epoch: 2, batch: 241, sum loss: 4144.749023, avg loss: 2.547479, ppl: 12.774862 +epoch: 2, batch: 242, sum loss: 4445.261719, avg loss: 2.718814, ppl: 15.162334 +epoch: 2, batch: 243, sum loss: 4236.632812, avg loss: 2.533871, ppl: 12.602201 +epoch: 2, batch: 244, sum loss: 3345.328613, avg loss: 2.137590, ppl: 8.478980 +epoch: 2, batch: 245, sum loss: 4635.077637, avg loss: 2.635064, ppl: 13.944206 +epoch: 2, batch: 246, sum loss: 3649.046387, avg loss: 2.302238, ppl: 9.996525 +epoch: 2, batch: 247, sum loss: 4151.332031, avg loss: 2.601085, ppl: 13.478356 +epoch: 2, batch: 248, sum loss: 4659.189941, avg loss: 2.579839, ppl: 13.195020 +epoch: 2, batch: 249, sum loss: 3882.558838, avg loss: 2.244254, ppl: 9.433372 +epoch: 2, batch: 250, sum loss: 3872.077881, avg loss: 2.598710, ppl: 13.446382 +epoch: 2, batch: 251, sum loss: 5161.061523, avg loss: 2.727834, ppl: 15.299708 +epoch: 2, batch: 252, sum loss: 4408.657715, avg loss: 2.706358, ppl: 14.974642 +epoch: 2, batch: 253, sum loss: 4659.722168, avg loss: 2.679541, ppl: 14.578405 +epoch: 2, batch: 254, sum loss: 4709.266602, avg loss: 2.557994, ppl: 12.909893 +epoch: 2, batch: 255, sum loss: 3905.424561, avg loss: 2.591523, ppl: 13.350084 +epoch: 2, batch: 256, sum loss: 4224.808594, avg loss: 2.331572, ppl: 10.294112 +epoch: 2, batch: 257, sum loss: 3808.266113, avg loss: 2.279034, ppl: 9.767244 +epoch: 2, batch: 258, sum loss: 4175.549805, avg loss: 2.555416, ppl: 12.876657 +epoch: 2, batch: 259, sum loss: 4968.631836, avg loss: 2.509410, ppl: 12.297671 +epoch: 2, batch: 260, sum loss: 4150.276367, avg loss: 2.568240, ppl: 13.042854 +epoch: 2, batch: 261, sum loss: 4835.986328, avg loss: 2.747720, ppl: 15.607000 +epoch: 2, batch: 262, sum loss: 4170.011719, avg loss: 2.542690, ppl: 12.713825 +epoch: 2, batch: 263, sum loss: 5147.421875, avg loss: 2.557090, ppl: 12.898230 +epoch: 2, batch: 264, sum loss: 3768.576660, avg loss: 2.472819, ppl: 11.855825 +epoch: 2, batch: 265, sum loss: 5083.317383, avg loss: 2.637944, ppl: 13.984419 +epoch: 2, batch: 266, sum loss: 5178.110840, avg loss: 2.800493, ppl: 16.452747 +epoch: 2, batch: 267, sum loss: 4525.013672, avg loss: 2.493121, ppl: 12.098974 +epoch: 2, batch: 268, sum loss: 5110.337402, avg loss: 2.777357, ppl: 16.076477 +epoch: 2, batch: 269, sum loss: 3622.138184, avg loss: 2.237269, ppl: 9.367712 +epoch: 2, batch: 270, sum loss: 4477.351562, avg loss: 2.488800, ppl: 12.046812 +epoch: 2, batch: 271, sum loss: 5287.037109, avg loss: 2.990406, ppl: 19.893753 +epoch: 2, batch: 272, sum loss: 3659.311523, avg loss: 2.342709, ppl: 10.409398 +epoch: 2, batch: 273, sum loss: 4512.057617, avg loss: 2.644817, ppl: 14.080866 +epoch: 2, batch: 274, sum loss: 4060.239258, avg loss: 2.669454, ppl: 14.432085 +epoch: 2, batch: 275, sum loss: 3621.991455, avg loss: 2.411446, ppl: 11.150068 +epoch: 2, batch: 276, sum loss: 4805.706543, avg loss: 2.693782, ppl: 14.787495 +epoch: 2, batch: 277, sum loss: 4152.864258, avg loss: 2.460228, ppl: 11.707478 +epoch: 2, batch: 278, sum loss: 4263.932617, avg loss: 2.803375, ppl: 16.500233 +epoch: 2, batch: 279, sum loss: 4281.716309, avg loss: 2.351300, ppl: 10.499205 +epoch: 2, batch: 280, sum loss: 4450.253906, avg loss: 2.557617, ppl: 12.905030 +epoch: 2, batch: 281, sum loss: 4706.183105, avg loss: 2.714062, ppl: 15.090445 +epoch: 2, batch: 282, sum loss: 3718.060547, avg loss: 2.555368, ppl: 12.876037 +epoch: 2, batch: 283, sum loss: 4601.845215, avg loss: 2.646260, ppl: 14.101195 +epoch: 2, batch: 284, sum loss: 4484.500488, avg loss: 2.462658, ppl: 11.735967 +epoch: 2, batch: 285, sum loss: 4571.479004, avg loss: 2.653209, ppl: 14.199532 +epoch: 2, batch: 286, sum loss: 4040.899414, avg loss: 2.622258, ppl: 13.766773 +epoch: 2, batch: 287, sum loss: 4187.774414, avg loss: 2.500164, ppl: 12.184492 +epoch: 2, batch: 288, sum loss: 4044.894775, avg loss: 2.512357, ppl: 12.333967 +epoch: 2, batch: 289, sum loss: 3635.559570, avg loss: 2.459783, ppl: 11.702275 +epoch: 2, batch: 290, sum loss: 4368.617188, avg loss: 2.396389, ppl: 10.983446 +epoch: 2, batch: 291, sum loss: 3930.193359, avg loss: 2.310519, ppl: 10.079657 +epoch: 2, batch: 292, sum loss: 3271.290039, avg loss: 2.377391, ppl: 10.776751 +epoch: 2, batch: 293, sum loss: 3788.545410, avg loss: 2.310089, ppl: 10.075317 +epoch: 2, batch: 294, sum loss: 5046.672852, avg loss: 2.757745, ppl: 15.764251 +epoch: 2, batch: 295, sum loss: 3880.894043, avg loss: 2.670953, ppl: 14.453731 +epoch: 2, batch: 296, sum loss: 4207.186523, avg loss: 2.391806, ppl: 10.933220 +epoch: 2, batch: 297, sum loss: 4400.331055, avg loss: 2.837093, ppl: 17.066080 +epoch: 2, batch: 298, sum loss: 5086.577637, avg loss: 2.717189, ppl: 15.137712 +epoch: 2, batch: 299, sum loss: 3697.040771, avg loss: 2.329578, ppl: 10.273609 +epoch: 2, batch: 300, sum loss: 4839.250488, avg loss: 2.934658, ppl: 18.815060 +epoch: 2, batch: 301, sum loss: 5432.086914, avg loss: 2.997840, ppl: 20.042208 +epoch: 2, batch: 302, sum loss: 4450.450195, avg loss: 2.441278, ppl: 11.487715 +epoch: 2, batch: 303, sum loss: 4050.387207, avg loss: 2.421032, ppl: 11.257476 +epoch: 2, batch: 304, sum loss: 4314.601562, avg loss: 2.706776, ppl: 14.980905 +epoch: 2, batch: 305, sum loss: 3487.428955, avg loss: 2.361157, ppl: 10.603214 +epoch: 2, batch: 306, sum loss: 4089.769287, avg loss: 2.667821, ppl: 14.408538 +epoch: 2, batch: 307, sum loss: 5060.153809, avg loss: 2.760586, ppl: 15.809101 +epoch: 2, batch: 308, sum loss: 4604.626465, avg loss: 2.647859, ppl: 14.123765 +epoch: 2, batch: 309, sum loss: 4709.828125, avg loss: 2.762363, ppl: 15.837217 +epoch: 2, batch: 310, sum loss: 4966.994629, avg loss: 2.621105, ppl: 13.750916 +epoch: 2, batch: 311, sum loss: 3822.368408, avg loss: 2.295717, ppl: 9.931552 +epoch: 2, batch: 312, sum loss: 4344.521484, avg loss: 2.727258, ppl: 15.290897 +epoch: 2, batch: 313, sum loss: 4139.144531, avg loss: 2.410684, ppl: 11.141581 +epoch: 2, batch: 314, sum loss: 4130.316406, avg loss: 2.408348, ppl: 11.115579 +epoch: 2, batch: 315, sum loss: 4408.297363, avg loss: 2.370052, ppl: 10.697952 +epoch: 2, batch: 316, sum loss: 4550.660645, avg loss: 2.436114, ppl: 11.428541 +epoch: 2, batch: 317, sum loss: 4522.141113, avg loss: 2.653839, ppl: 14.208475 +epoch: 2, batch: 318, sum loss: 3856.865723, avg loss: 2.340331, ppl: 10.384674 +epoch: 2, batch: 319, sum loss: 4421.086426, avg loss: 2.499201, ppl: 12.172765 +epoch: 2, batch: 320, sum loss: 4154.179688, avg loss: 2.517685, ppl: 12.399854 +epoch: 2, batch: 321, sum loss: 4638.706055, avg loss: 2.562821, ppl: 12.972363 +epoch: 2, batch: 322, sum loss: 4414.529297, avg loss: 2.528368, ppl: 12.533030 +epoch: 2, batch: 323, sum loss: 5184.343262, avg loss: 2.722869, ppl: 15.223943 +epoch: 2, batch: 324, sum loss: 5146.123535, avg loss: 2.870119, ppl: 17.639118 +epoch: 2, batch: 325, sum loss: 3675.235107, avg loss: 2.459997, ppl: 11.704773 +epoch: 2, batch: 326, sum loss: 4369.716797, avg loss: 2.546455, ppl: 12.761785 +epoch: 2, batch: 327, sum loss: 4239.748535, avg loss: 2.454979, ppl: 11.646189 +epoch: 2, batch: 328, sum loss: 4648.963867, avg loss: 2.721876, ppl: 15.208822 +epoch: 2, batch: 329, sum loss: 4030.993408, avg loss: 2.554495, ppl: 12.864802 +epoch: 2, batch: 330, sum loss: 5114.679199, avg loss: 2.977113, ppl: 19.631050 +epoch: 2, batch: 331, sum loss: 5128.990234, avg loss: 2.668570, ppl: 14.419329 +epoch: 2, batch: 332, sum loss: 5194.449219, avg loss: 2.798733, ppl: 16.423832 +epoch: 2, batch: 333, sum loss: 4516.024902, avg loss: 2.465079, ppl: 11.764412 +epoch: 2, batch: 334, sum loss: 4486.221680, avg loss: 2.521766, ppl: 12.450564 +epoch: 2, batch: 335, sum loss: 3688.608154, avg loss: 2.512676, ppl: 12.337899 +epoch: 2, batch: 336, sum loss: 4131.952148, avg loss: 2.547443, ppl: 12.774394 +epoch: 2, batch: 337, sum loss: 4378.359375, avg loss: 2.420321, ppl: 11.249464 +epoch: 2, batch: 338, sum loss: 4086.439453, avg loss: 2.611143, ppl: 13.614608 +epoch: 2, batch: 339, sum loss: 5046.723633, avg loss: 2.708923, ppl: 15.013096 +epoch: 2, batch: 340, sum loss: 4128.754883, avg loss: 2.347217, ppl: 10.456429 +epoch: 2, batch: 341, sum loss: 4074.377686, avg loss: 2.749243, ppl: 15.630792 +epoch: 2, batch: 342, sum loss: 4098.990234, avg loss: 2.545957, ppl: 12.755424 +epoch: 2, batch: 343, sum loss: 5453.357422, avg loss: 2.850683, ppl: 17.299601 +epoch: 2, batch: 344, sum loss: 4549.270996, avg loss: 2.755464, ppl: 15.728338 +epoch: 2, batch: 345, sum loss: 4790.881348, avg loss: 2.903564, ppl: 18.239042 +epoch: 2, batch: 346, sum loss: 3459.446777, avg loss: 2.304761, ppl: 10.021787 +epoch: 2, batch: 347, sum loss: 4147.308105, avg loss: 2.505926, ppl: 12.254907 +epoch: 2, batch: 348, sum loss: 4346.578613, avg loss: 2.588790, ppl: 13.313655 +epoch: 2, batch: 349, sum loss: 3319.924072, avg loss: 2.442917, ppl: 11.506555 +epoch: 2, batch: 350, sum loss: 4748.673828, avg loss: 2.679839, ppl: 14.582740 +epoch: 2, batch: 351, sum loss: 5070.180176, avg loss: 2.877514, ppl: 17.770048 +epoch: 2, batch: 352, sum loss: 5122.509277, avg loss: 2.681942, ppl: 14.613445 +epoch: 2, batch: 353, sum loss: 3537.439209, avg loss: 2.361441, ppl: 10.606228 +epoch: 2, batch: 354, sum loss: 4896.822266, avg loss: 2.683190, ppl: 14.631700 +epoch: 2, batch: 355, sum loss: 4283.491699, avg loss: 2.719677, ppl: 15.175423 +epoch: 2, batch: 356, sum loss: 2780.678467, avg loss: 2.152228, ppl: 8.604006 +epoch: 2, batch: 357, sum loss: 4287.687988, avg loss: 2.579836, ppl: 13.194979 +epoch: 2, batch: 358, sum loss: 5077.400879, avg loss: 2.805194, ppl: 16.530285 +epoch: 2, batch: 359, sum loss: 5052.653320, avg loss: 2.485319, ppl: 12.004948 +epoch: 2, batch: 360, sum loss: 4505.930664, avg loss: 2.631969, ppl: 13.901111 +epoch: 2, batch: 361, sum loss: 3803.573975, avg loss: 2.371305, ppl: 10.711367 +epoch: 2, batch: 362, sum loss: 5206.509766, avg loss: 2.615022, ppl: 13.667522 +epoch: 2, batch: 363, sum loss: 4879.798340, avg loss: 2.822324, ppl: 16.815886 +epoch: 2, batch: 364, sum loss: 4969.868164, avg loss: 2.818984, ppl: 16.759806 +epoch: 2, batch: 365, sum loss: 4632.355469, avg loss: 2.580699, ppl: 13.206372 +epoch: 2, batch: 366, sum loss: 4854.081055, avg loss: 2.802587, ppl: 16.487249 +epoch: 2, batch: 367, sum loss: 3368.320312, avg loss: 2.337488, ppl: 10.355193 +epoch: 2, batch: 368, sum loss: 4336.358887, avg loss: 2.610692, ppl: 13.608461 +epoch: 2, batch: 369, sum loss: 4701.341309, avg loss: 2.570444, ppl: 13.071622 +epoch: 2, batch: 370, sum loss: 4903.866699, avg loss: 2.731959, ppl: 15.362959 +epoch: 2, batch: 371, sum loss: 3469.380615, avg loss: 2.616426, ppl: 13.686716 +epoch: 2, batch: 372, sum loss: 4635.409180, avg loss: 2.501570, ppl: 12.201635 +epoch: 2, batch: 373, sum loss: 4915.540039, avg loss: 2.528570, ppl: 12.535569 +epoch: 2, batch: 374, sum loss: 4751.109375, avg loss: 2.746306, ppl: 15.584953 +epoch: 2, batch: 375, sum loss: 4697.086914, avg loss: 2.674879, ppl: 14.510592 +epoch: 2, batch: 376, sum loss: 4489.794922, avg loss: 2.519526, ppl: 12.422704 +epoch: 2, batch: 377, sum loss: 4072.543457, avg loss: 2.615635, ppl: 13.675897 +epoch: 2, batch: 378, sum loss: 4065.005615, avg loss: 2.558216, ppl: 12.912765 +epoch: 2, batch: 379, sum loss: 5028.035156, avg loss: 2.482980, ppl: 11.976906 +epoch: 2, batch: 380, sum loss: 4129.029297, avg loss: 2.436006, ppl: 11.427304 +epoch: 2, batch: 381, sum loss: 4094.596191, avg loss: 2.568755, ppl: 13.049573 +epoch: 2, batch: 382, sum loss: 4466.335938, avg loss: 2.568336, ppl: 13.044098 +epoch: 2, batch: 383, sum loss: 4483.769531, avg loss: 2.608359, ppl: 13.576757 +epoch: 2, batch: 384, sum loss: 3948.516846, avg loss: 2.365798, ppl: 10.652536 +epoch: 2, batch: 385, sum loss: 3408.941895, avg loss: 2.315857, ppl: 10.133608 +epoch: 2, batch: 386, sum loss: 4056.532471, avg loss: 2.522719, ppl: 12.462438 +epoch: 2, batch: 387, sum loss: 3861.480713, avg loss: 2.430133, ppl: 11.360389 +epoch: 2, batch: 388, sum loss: 4266.099121, avg loss: 2.553022, ppl: 12.845861 +epoch: 2, batch: 389, sum loss: 4200.697266, avg loss: 2.715383, ppl: 15.110394 +epoch: 2, batch: 390, sum loss: 4205.223633, avg loss: 2.478034, ppl: 11.917811 +epoch: 2, batch: 391, sum loss: 3696.438477, avg loss: 2.433468, ppl: 11.398347 +epoch: 2, batch: 392, sum loss: 4709.019531, avg loss: 2.755424, ppl: 15.727709 +epoch: 2, batch: 393, sum loss: 4045.575684, avg loss: 2.498811, ppl: 12.168023 +epoch: 2, batch: 394, sum loss: 5292.135742, avg loss: 2.882427, ppl: 17.857555 +epoch: 2, batch: 395, sum loss: 3387.460693, avg loss: 2.129139, ppl: 8.407628 +epoch: 2, batch: 396, sum loss: 4526.359375, avg loss: 2.393633, ppl: 10.953211 +epoch: 2, batch: 397, sum loss: 5746.971191, avg loss: 2.793860, ppl: 16.343992 +epoch: 2, batch: 398, sum loss: 4907.331543, avg loss: 2.738467, ppl: 15.463254 +epoch: 2, batch: 399, sum loss: 3716.776123, avg loss: 2.426094, ppl: 11.314602 +epoch: 2, batch: 400, sum loss: 4461.983398, avg loss: 2.480258, ppl: 11.944340 +epoch: 2, batch: 401, sum loss: 3877.896240, avg loss: 2.379077, ppl: 10.794939 +epoch: 2, batch: 402, sum loss: 3749.186279, avg loss: 2.238320, ppl: 9.377567 +epoch: 2, batch: 403, sum loss: 3566.559570, avg loss: 2.325006, ppl: 10.226744 +epoch: 2, batch: 404, sum loss: 3635.389404, avg loss: 2.634340, ppl: 13.934117 +epoch: 2, batch: 405, sum loss: 5991.809082, avg loss: 2.810417, ppl: 16.616844 +epoch: 2, batch: 406, sum loss: 4341.664062, avg loss: 2.843264, ppl: 17.171721 +epoch: 2, batch: 407, sum loss: 4204.399902, avg loss: 2.427483, ppl: 11.330323 +epoch: 2, batch: 408, sum loss: 4535.590820, avg loss: 2.620214, ppl: 13.738667 +epoch: 2, batch: 409, sum loss: 3649.692627, avg loss: 2.282484, ppl: 9.800999 +epoch: 2, batch: 410, sum loss: 4429.977051, avg loss: 2.675107, ppl: 14.513899 +epoch: 2, batch: 411, sum loss: 4566.566895, avg loss: 2.546886, ppl: 12.767287 +epoch: 2, batch: 412, sum loss: 4234.767090, avg loss: 2.426800, ppl: 11.322598 +epoch: 2, batch: 413, sum loss: 5113.074707, avg loss: 2.746012, ppl: 15.580377 +epoch: 2, batch: 414, sum loss: 3786.296875, avg loss: 2.314363, ppl: 10.118470 +epoch: 2, batch: 415, sum loss: 3179.111572, avg loss: 2.436101, ppl: 11.428394 +epoch: 2, batch: 416, sum loss: 3241.255859, avg loss: 2.274565, ppl: 9.723693 +epoch: 2, batch: 417, sum loss: 4367.514160, avg loss: 2.393158, ppl: 10.948018 +epoch: 2, batch: 418, sum loss: 4304.404785, avg loss: 2.419564, ppl: 11.240960 +epoch: 2, batch: 419, sum loss: 4444.241211, avg loss: 2.812811, ppl: 16.656673 +epoch: 2, batch: 420, sum loss: 4713.125000, avg loss: 2.741783, ppl: 15.514626 +epoch: 2, batch: 421, sum loss: 4719.468750, avg loss: 2.584594, ppl: 13.257909 +epoch: 2, batch: 422, sum loss: 4187.709473, avg loss: 2.338196, ppl: 10.362526 +epoch: 2, batch: 423, sum loss: 4916.241211, avg loss: 2.886812, ppl: 17.936043 +epoch: 2, batch: 424, sum loss: 3500.229980, avg loss: 2.313437, ppl: 10.109110 +epoch: 2, batch: 425, sum loss: 4122.147461, avg loss: 2.495247, ppl: 12.124723 +epoch: 2, batch: 426, sum loss: 4974.705078, avg loss: 2.739375, ppl: 15.477306 +epoch: 2, batch: 427, sum loss: 4414.747070, avg loss: 2.728521, ppl: 15.310228 +epoch: 2, batch: 428, sum loss: 3922.370117, avg loss: 2.323679, ppl: 10.213180 +epoch: 2, batch: 429, sum loss: 4159.855469, avg loss: 2.489441, ppl: 12.054535 +epoch: 2, batch: 430, sum loss: 4082.972412, avg loss: 2.452236, ppl: 11.614284 +epoch: 2, batch: 431, sum loss: 4156.037598, avg loss: 2.491629, ppl: 12.080944 +epoch: 2, batch: 432, sum loss: 3710.367432, avg loss: 2.505312, ppl: 12.247382 +epoch: 2, batch: 433, sum loss: 4745.686523, avg loss: 2.461456, ppl: 11.721864 +epoch: 2, batch: 434, sum loss: 3517.916260, avg loss: 2.373763, ppl: 10.737720 +epoch: 2, batch: 435, sum loss: 4764.249023, avg loss: 2.824095, ppl: 16.845701 +epoch: 2, batch: 436, sum loss: 4976.639160, avg loss: 2.737425, ppl: 15.447163 +epoch: 2, batch: 437, sum loss: 3774.543457, avg loss: 2.619392, ppl: 13.727374 +epoch: 2, batch: 438, sum loss: 4703.266602, avg loss: 2.763376, ppl: 15.853277 +epoch: 2, batch: 439, sum loss: 3991.657471, avg loss: 2.413336, ppl: 11.171164 +epoch: 2, batch: 440, sum loss: 4325.744629, avg loss: 2.520830, ppl: 12.438919 +epoch: 2, batch: 441, sum loss: 3984.420898, avg loss: 2.484053, ppl: 11.989759 +epoch: 2, batch: 442, sum loss: 4742.852051, avg loss: 2.488380, ppl: 12.041749 +epoch: 2, batch: 443, sum loss: 4356.818359, avg loss: 2.587184, ppl: 13.292294 +epoch: 2, batch: 444, sum loss: 5138.219238, avg loss: 2.678946, ppl: 14.569736 +epoch: 2, batch: 445, sum loss: 3692.575684, avg loss: 2.285010, ppl: 9.825781 +epoch: 2, batch: 446, sum loss: 4192.951660, avg loss: 2.545811, ppl: 12.753572 +epoch: 2, batch: 447, sum loss: 4458.158203, avg loss: 2.576970, ppl: 13.157212 +epoch: 2, batch: 448, sum loss: 4234.606934, avg loss: 2.528124, ppl: 12.529973 +epoch: 2, batch: 449, sum loss: 4589.696777, avg loss: 2.674649, ppl: 14.507250 +epoch: 2, batch: 450, sum loss: 4737.483887, avg loss: 2.605877, ppl: 13.543093 +epoch: 2, batch: 451, sum loss: 4730.115234, avg loss: 2.590425, ppl: 13.335435 +epoch: 2, batch: 452, sum loss: 4252.584961, avg loss: 2.438409, ppl: 11.454797 +epoch: 2, batch: 453, sum loss: 4698.501953, avg loss: 2.594424, ppl: 13.388877 +epoch: 2, batch: 454, sum loss: 5390.740723, avg loss: 2.694023, ppl: 14.791066 +epoch: 2, batch: 455, sum loss: 4169.073242, avg loss: 2.633653, ppl: 13.924545 +epoch: 2, batch: 456, sum loss: 5104.007324, avg loss: 2.758923, ppl: 15.782837 +epoch: 2, batch: 457, sum loss: 4220.896484, avg loss: 2.485805, ppl: 12.010782 +epoch: 2, batch: 458, sum loss: 4665.333984, avg loss: 2.710827, ppl: 15.041715 +epoch: 2, batch: 459, sum loss: 3370.912354, avg loss: 2.302536, ppl: 9.999507 +epoch: 2, batch: 460, sum loss: 4582.228027, avg loss: 2.409163, ppl: 11.124648 +epoch: 2, batch: 461, sum loss: 4074.203125, avg loss: 2.488823, ppl: 12.047088 +epoch: 2, batch: 462, sum loss: 4037.322754, avg loss: 2.675496, ppl: 14.519545 +epoch: 2, batch: 463, sum loss: 4292.050781, avg loss: 2.665870, ppl: 14.380454 +epoch: 2, batch: 464, sum loss: 4563.467773, avg loss: 2.706683, ppl: 14.979508 +epoch: 2, batch: 465, sum loss: 5384.680664, avg loss: 2.850546, ppl: 17.297218 +epoch: 2, batch: 466, sum loss: 4951.574707, avg loss: 2.737189, ppl: 15.443513 +epoch: 2, batch: 467, sum loss: 5052.776367, avg loss: 2.653769, ppl: 14.207490 +epoch: 2, batch: 468, sum loss: 4132.692383, avg loss: 2.515333, ppl: 12.370729 +epoch: 2, batch: 469, sum loss: 3584.492676, avg loss: 2.420319, ppl: 11.249451 +epoch: 2, batch: 470, sum loss: 4161.070312, avg loss: 2.444812, ppl: 11.528382 +epoch: 2, batch: 471, sum loss: 4719.188965, avg loss: 2.676795, ppl: 14.538416 +epoch: 2, batch: 472, sum loss: 4433.447266, avg loss: 2.586609, ppl: 13.284642 +epoch: 2, batch: 473, sum loss: 4616.854980, avg loss: 2.696761, ppl: 14.831613 +epoch: 2, batch: 474, sum loss: 4431.890625, avg loss: 2.597826, ppl: 13.434493 +epoch: 2, batch: 475, sum loss: 5307.483398, avg loss: 3.089339, ppl: 21.962545 +epoch: 2, batch: 476, sum loss: 4039.347656, avg loss: 2.381691, ppl: 10.823189 +epoch: 2, batch: 477, sum loss: 4259.574707, avg loss: 2.660571, ppl: 14.304460 +epoch: 2, batch: 478, sum loss: 3899.325684, avg loss: 2.389293, ppl: 10.905778 +epoch: 2, batch: 479, sum loss: 4622.601562, avg loss: 2.711203, ppl: 15.047372 +epoch: 2, batch: 480, sum loss: 3960.901855, avg loss: 2.336815, ppl: 10.348228 +epoch: 2, batch: 481, sum loss: 4370.440430, avg loss: 2.656803, ppl: 14.250652 +epoch: 2, batch: 482, sum loss: 3790.723389, avg loss: 2.559570, ppl: 12.930260 +epoch: 2, batch: 483, sum loss: 3385.888184, avg loss: 2.382750, ppl: 10.834661 +epoch: 2, batch: 484, sum loss: 4226.931641, avg loss: 2.671891, ppl: 14.467300 +epoch: 2, batch: 485, sum loss: 4639.895508, avg loss: 2.827480, ppl: 16.902817 +epoch: 2, batch: 486, sum loss: 4829.678223, avg loss: 2.528627, ppl: 12.536287 +epoch: 2, batch: 487, sum loss: 3196.327637, avg loss: 2.187767, ppl: 8.915283 +epoch: 2, batch: 488, sum loss: 3985.219238, avg loss: 2.531905, ppl: 12.577449 +epoch: 2, batch: 489, sum loss: 4442.754395, avg loss: 2.565101, ppl: 13.001967 +epoch: 2, batch: 490, sum loss: 3886.912842, avg loss: 2.194756, ppl: 8.977811 +epoch: 2, batch: 491, sum loss: 5018.160156, avg loss: 2.764827, ppl: 15.876286 +epoch: 2, batch: 492, sum loss: 4404.153320, avg loss: 2.720292, ppl: 15.184760 +epoch: 2, batch: 493, sum loss: 4570.623047, avg loss: 2.554848, ppl: 12.869343 +epoch: 2, batch: 494, sum loss: 3822.511475, avg loss: 2.414726, ppl: 11.186705 +epoch: 2, batch: 495, sum loss: 3581.836670, avg loss: 2.501283, ppl: 12.198131 +epoch: 2, batch: 496, sum loss: 5326.457520, avg loss: 2.691489, ppl: 14.753634 +epoch: 2, batch: 497, sum loss: 4699.396973, avg loss: 2.609326, ppl: 13.589893 +epoch: 2, batch: 498, sum loss: 3727.336914, avg loss: 2.436168, ppl: 11.429160 +epoch: 2, batch: 499, sum loss: 3850.990234, avg loss: 2.654025, ppl: 14.211122 +epoch: 2, batch: 500, sum loss: 4628.000488, avg loss: 2.818514, ppl: 16.751945 +epoch: 2, batch: 501, sum loss: 4218.467285, avg loss: 2.583262, ppl: 13.240260 +epoch: 2, batch: 502, sum loss: 4230.368652, avg loss: 2.501697, ppl: 12.203180 +epoch: 2, batch: 503, sum loss: 3888.947266, avg loss: 2.572055, ppl: 13.092704 +epoch: 2, batch: 504, sum loss: 5063.499512, avg loss: 2.797514, ppl: 16.403812 +epoch: 2, batch: 505, sum loss: 5033.675781, avg loss: 2.694687, ppl: 14.800887 +epoch: 2, batch: 506, sum loss: 4672.393066, avg loss: 2.563024, ppl: 12.974998 +epoch: 2, batch: 507, sum loss: 4068.443359, avg loss: 2.616362, ppl: 13.685848 +epoch: 2, batch: 508, sum loss: 4089.250977, avg loss: 2.619636, ppl: 13.730719 +epoch: 2, batch: 509, sum loss: 4693.463379, avg loss: 2.553571, ppl: 12.852917 +epoch: 2, batch: 510, sum loss: 3765.665771, avg loss: 2.188068, ppl: 8.917971 +epoch: 2, batch: 511, sum loss: 3209.400879, avg loss: 2.377334, ppl: 10.776134 +epoch: 2, batch: 512, sum loss: 4135.173340, avg loss: 2.620515, ppl: 13.742805 +epoch: 2, batch: 513, sum loss: 4756.781738, avg loss: 2.809676, ppl: 16.604540 +epoch: 2, batch: 514, sum loss: 4968.155762, avg loss: 2.595693, ppl: 13.405869 +epoch: 2, batch: 515, sum loss: 4260.041992, avg loss: 2.739577, ppl: 15.480436 +epoch: 2, batch: 516, sum loss: 4320.324707, avg loss: 2.497298, ppl: 12.149615 +epoch: 2, batch: 517, sum loss: 4344.486328, avg loss: 2.634619, ppl: 13.937998 +epoch: 2, batch: 518, sum loss: 3676.373047, avg loss: 2.407579, ppl: 11.107038 +epoch: 2, batch: 519, sum loss: 4384.394531, avg loss: 2.573002, ppl: 13.105102 +epoch: 2, batch: 520, sum loss: 4409.676270, avg loss: 2.609276, ppl: 13.589207 +epoch: 2, batch: 521, sum loss: 4203.984863, avg loss: 2.291000, ppl: 9.884819 +epoch: 2, batch: 522, sum loss: 4010.466064, avg loss: 2.675428, ppl: 14.518558 +epoch: 2, batch: 523, sum loss: 3933.471680, avg loss: 2.328876, ppl: 10.266396 +epoch: 2, batch: 524, sum loss: 3321.672852, avg loss: 2.394861, ppl: 10.966679 +epoch: 2, batch: 525, sum loss: 4291.896484, avg loss: 2.604306, ppl: 13.521841 +epoch: 2, batch: 526, sum loss: 3402.482666, avg loss: 2.423421, ppl: 11.284393 +epoch: 2, batch: 527, sum loss: 5071.646484, avg loss: 2.849240, ppl: 17.274641 +epoch: 2, batch: 528, sum loss: 4492.549805, avg loss: 2.501420, ppl: 12.199802 +epoch: 2, batch: 529, sum loss: 3944.127197, avg loss: 2.547886, ppl: 12.780057 +epoch: 2, batch: 530, sum loss: 4825.017578, avg loss: 2.922482, ppl: 18.587364 +epoch: 2, batch: 531, sum loss: 3822.718262, avg loss: 2.367008, ppl: 10.665436 +epoch: 2, batch: 532, sum loss: 4530.833008, avg loss: 2.669908, ppl: 14.438635 +epoch: 2, batch: 533, sum loss: 4469.944824, avg loss: 2.634028, ppl: 13.929762 +epoch: 2, batch: 534, sum loss: 4660.576660, avg loss: 2.532922, ppl: 12.590241 +epoch: 2, batch: 535, sum loss: 4894.280762, avg loss: 2.843858, ppl: 17.181934 +epoch: 2, batch: 536, sum loss: 4625.760254, avg loss: 2.526357, ppl: 12.507862 +epoch: 2, batch: 537, sum loss: 4680.179688, avg loss: 2.585735, ppl: 13.273039 +epoch: 2, batch: 538, sum loss: 4200.223633, avg loss: 2.583163, ppl: 13.238951 +epoch: 2, batch: 539, sum loss: 3903.930176, avg loss: 2.414304, ppl: 11.181991 +epoch: 2, batch: 540, sum loss: 4014.859131, avg loss: 2.411327, ppl: 11.148745 +epoch: 2, batch: 541, sum loss: 3511.283203, avg loss: 2.316150, ppl: 10.136573 +epoch: 2, batch: 542, sum loss: 4238.872559, avg loss: 2.547400, ppl: 12.773843 +epoch: 2, batch: 543, sum loss: 4675.229492, avg loss: 2.501460, ppl: 12.200297 +epoch: 2, batch: 544, sum loss: 3943.975586, avg loss: 2.546143, ppl: 12.757803 +epoch: 2, batch: 545, sum loss: 4351.005859, avg loss: 2.715984, ppl: 15.119474 +epoch: 2, batch: 546, sum loss: 4298.193359, avg loss: 2.471647, ppl: 11.841929 +epoch: 2, batch: 547, sum loss: 4538.136230, avg loss: 2.698060, ppl: 14.850886 +epoch: 2, batch: 548, sum loss: 3240.335449, avg loss: 1.971007, ppl: 7.177901 +epoch: 2, batch: 549, sum loss: 4397.090820, avg loss: 2.415984, ppl: 11.200788 +epoch: 2, batch: 550, sum loss: 5201.066406, avg loss: 2.740288, ppl: 15.491446 +epoch: 2, batch: 551, sum loss: 3643.285156, avg loss: 2.495401, ppl: 12.126595 +epoch: 2, batch: 552, sum loss: 3790.156738, avg loss: 2.290125, ppl: 9.876171 +epoch: 2, batch: 553, sum loss: 4344.675781, avg loss: 2.583041, ppl: 13.237337 +epoch: 2, batch: 554, sum loss: 3517.889893, avg loss: 2.242122, ppl: 9.413287 +epoch: 2, batch: 555, sum loss: 4435.785645, avg loss: 2.334624, ppl: 10.325580 +epoch: 2, batch: 556, sum loss: 5159.350586, avg loss: 2.772354, ppl: 15.996243 +epoch: 2, batch: 557, sum loss: 5744.759766, avg loss: 3.132366, ppl: 22.928167 +epoch: 2, batch: 558, sum loss: 4360.108398, avg loss: 2.461947, ppl: 11.727626 +epoch: 2, batch: 559, sum loss: 4303.533203, avg loss: 2.560103, ppl: 12.937149 +epoch: 2, batch: 560, sum loss: 4100.954102, avg loss: 2.354164, ppl: 10.529324 +epoch: 2, batch: 561, sum loss: 4578.729492, avg loss: 2.460360, ppl: 11.709023 +epoch: 2, batch: 562, sum loss: 4193.721680, avg loss: 2.407418, ppl: 11.105247 +epoch: 2, batch: 563, sum loss: 4307.074707, avg loss: 2.666919, ppl: 14.395552 +epoch: 2, batch: 564, sum loss: 4735.099121, avg loss: 2.664659, ppl: 14.363051 +epoch: 2, batch: 565, sum loss: 3958.127930, avg loss: 2.511503, ppl: 12.323432 +epoch: 2, batch: 566, sum loss: 4996.515625, avg loss: 2.584850, ppl: 13.261304 +epoch: 2, batch: 567, sum loss: 4025.951172, avg loss: 2.377998, ppl: 10.783297 +epoch: 2, batch: 568, sum loss: 4027.489990, avg loss: 2.646183, ppl: 14.100113 +epoch: 2, batch: 569, sum loss: 5309.221191, avg loss: 2.763780, ppl: 15.859681 +epoch: 2, batch: 570, sum loss: 4136.211914, avg loss: 2.616200, ppl: 13.683623 +epoch: 2, batch: 571, sum loss: 4661.336426, avg loss: 2.719566, ppl: 15.173737 +epoch: 2, batch: 572, sum loss: 4700.928711, avg loss: 2.615987, ppl: 13.680714 +epoch: 2, batch: 573, sum loss: 4724.441406, avg loss: 2.538657, ppl: 12.662659 +epoch: 2, batch: 574, sum loss: 4998.355469, avg loss: 2.523147, ppl: 12.467775 +epoch: 2, batch: 575, sum loss: 3824.924805, avg loss: 2.528040, ppl: 12.528931 +epoch: 2, batch: 576, sum loss: 4233.913574, avg loss: 2.402902, ppl: 11.055216 +epoch: 2, batch: 577, sum loss: 5175.507324, avg loss: 2.655468, ppl: 14.231648 +epoch: 2, batch: 578, sum loss: 3846.690430, avg loss: 2.362832, ppl: 10.620986 +epoch: 2, batch: 579, sum loss: 4387.506348, avg loss: 2.367785, ppl: 10.673729 +epoch: 2, batch: 580, sum loss: 3240.524414, avg loss: 2.411104, ppl: 11.146265 +epoch: 2, batch: 581, sum loss: 3561.318604, avg loss: 2.567641, ppl: 13.035042 +epoch: 2, batch: 582, sum loss: 4373.727539, avg loss: 2.371870, ppl: 10.717410 +epoch: 2, batch: 583, sum loss: 4672.196777, avg loss: 2.594223, ppl: 13.386189 +epoch: 2, batch: 584, sum loss: 4248.293457, avg loss: 2.498996, ppl: 12.170272 +epoch: 2, batch: 585, sum loss: 4321.951172, avg loss: 2.605154, ppl: 13.533313 +epoch: 2, batch: 586, sum loss: 5303.766113, avg loss: 2.840796, ppl: 17.129395 +epoch: 2, batch: 587, sum loss: 4193.553711, avg loss: 2.571155, ppl: 13.080925 +epoch: 2, batch: 588, sum loss: 4475.816406, avg loss: 2.569355, ppl: 13.057400 +epoch: 2, batch: 589, sum loss: 3723.745117, avg loss: 2.505885, ppl: 12.254398 +epoch: 2, batch: 590, sum loss: 5659.940918, avg loss: 2.769051, ppl: 15.943501 +epoch: 2, batch: 591, sum loss: 3267.288086, avg loss: 2.291226, ppl: 9.887051 +epoch: 2, batch: 592, sum loss: 4306.093750, avg loss: 2.666312, ppl: 14.386812 +epoch: 2, batch: 593, sum loss: 3473.819824, avg loss: 2.571295, ppl: 13.082756 +epoch: 2, batch: 594, sum loss: 3730.156250, avg loss: 2.199385, ppl: 9.019462 +epoch: 2, batch: 595, sum loss: 4308.770996, avg loss: 2.586297, ppl: 13.280503 +epoch: 2, batch: 596, sum loss: 4485.527344, avg loss: 2.589796, ppl: 13.327057 +epoch: 2, batch: 597, sum loss: 3464.044189, avg loss: 2.195212, ppl: 8.981902 +epoch: 2, batch: 598, sum loss: 4613.519043, avg loss: 2.648403, ppl: 14.131458 +epoch: 2, batch: 599, sum loss: 4938.949707, avg loss: 2.618743, ppl: 13.718474 +epoch: 2, batch: 600, sum loss: 4275.863281, avg loss: 2.758622, ppl: 15.778081 +epoch: 2, batch: 601, sum loss: 4859.566406, avg loss: 2.661318, ppl: 14.315145 +epoch: 2, batch: 602, sum loss: 4386.709473, avg loss: 2.475570, ppl: 11.888478 +epoch: 2, batch: 603, sum loss: 5937.658203, avg loss: 2.849164, ppl: 17.273340 +epoch: 2, batch: 604, sum loss: 4302.659668, avg loss: 2.533957, ppl: 12.603285 +epoch: 2, batch: 605, sum loss: 4267.160156, avg loss: 2.539976, ppl: 12.679371 +epoch: 2, batch: 606, sum loss: 4078.611816, avg loss: 2.513008, ppl: 12.341997 +epoch: 2, batch: 607, sum loss: 4069.290039, avg loss: 2.458786, ppl: 11.690605 +epoch: 2, batch: 608, sum loss: 4718.688477, avg loss: 2.865020, ppl: 17.549410 +epoch: 2, batch: 609, sum loss: 4100.358398, avg loss: 2.535781, ppl: 12.626293 +epoch: 2, batch: 610, sum loss: 5408.821289, avg loss: 2.712548, ppl: 15.067623 +epoch: 2, batch: 611, sum loss: 5290.442871, avg loss: 2.869004, ppl: 17.619457 +epoch: 2, batch: 612, sum loss: 3728.787598, avg loss: 2.443504, ppl: 11.513317 +epoch: 2, batch: 613, sum loss: 3897.405273, avg loss: 2.435878, ppl: 11.425849 +epoch: 2, batch: 614, sum loss: 4180.484863, avg loss: 2.360522, ppl: 10.596484 +epoch: 2, batch: 615, sum loss: 3503.946533, avg loss: 2.276768, ppl: 9.745137 +epoch: 2, batch: 616, sum loss: 3948.325684, avg loss: 2.357209, ppl: 10.561438 +epoch: 2, batch: 617, sum loss: 4240.971191, avg loss: 2.556342, ppl: 12.888583 +epoch: 2, batch: 618, sum loss: 3977.803467, avg loss: 2.556429, ppl: 12.889705 +epoch: 2, batch: 619, sum loss: 3935.790527, avg loss: 2.402802, ppl: 11.054111 +epoch: 2, batch: 620, sum loss: 4346.235840, avg loss: 2.468050, ppl: 11.799413 +epoch: 2, batch: 621, sum loss: 4466.272949, avg loss: 2.504920, ppl: 12.242585 +epoch: 2, batch: 622, sum loss: 4541.736328, avg loss: 2.547244, ppl: 12.771860 +epoch: 2, batch: 623, sum loss: 4210.337891, avg loss: 2.562591, ppl: 12.969381 +epoch: 2, batch: 624, sum loss: 3188.663818, avg loss: 2.439682, ppl: 11.469387 +epoch: 2, batch: 625, sum loss: 4406.646484, avg loss: 2.657809, ppl: 14.264994 +epoch: 2, batch: 626, sum loss: 4357.635742, avg loss: 2.581538, ppl: 13.217450 +epoch: 2, batch: 627, sum loss: 4721.705078, avg loss: 2.594344, ppl: 13.387797 +epoch: 2, batch: 628, sum loss: 4406.834961, avg loss: 2.539963, ppl: 12.679195 +epoch: 2, batch: 629, sum loss: 5006.729980, avg loss: 2.746423, ppl: 15.586785 +epoch: 2, batch: 630, sum loss: 4592.148926, avg loss: 2.798384, ppl: 16.418100 +epoch: 2, batch: 631, sum loss: 4563.377930, avg loss: 2.431208, ppl: 11.372616 +epoch: 2, batch: 632, sum loss: 4729.213867, avg loss: 2.546695, ppl: 12.764853 +epoch: 2, batch: 633, sum loss: 4402.787109, avg loss: 2.455542, ppl: 11.652751 +epoch: 2, batch: 634, sum loss: 4317.310059, avg loss: 2.734205, ppl: 15.397501 +epoch: 2, batch: 635, sum loss: 4487.516113, avg loss: 2.487537, ppl: 12.031602 +epoch: 2, batch: 636, sum loss: 4080.039062, avg loss: 2.527905, ppl: 12.527237 +epoch: 2, batch: 637, sum loss: 4878.836426, avg loss: 2.810390, ppl: 16.616394 +epoch: 2, batch: 638, sum loss: 3987.895264, avg loss: 2.470815, ppl: 11.832086 +epoch: 2, batch: 639, sum loss: 3333.853271, avg loss: 2.431695, ppl: 11.378146 +epoch: 2, batch: 640, sum loss: 4774.652344, avg loss: 2.632113, ppl: 13.903112 +epoch: 2, batch: 641, sum loss: 4602.743652, avg loss: 2.535947, ppl: 12.628386 +epoch: 2, batch: 642, sum loss: 4524.393555, avg loss: 2.585368, ppl: 13.268167 +epoch: 2, batch: 643, sum loss: 4446.643066, avg loss: 2.507977, ppl: 12.280063 +epoch: 2, batch: 644, sum loss: 3527.496338, avg loss: 2.447950, ppl: 11.564619 +epoch: 2, batch: 645, sum loss: 3647.337891, avg loss: 2.415456, ppl: 11.194869 +epoch: 2, batch: 646, sum loss: 3830.670898, avg loss: 2.535189, ppl: 12.618817 +epoch: 2, batch: 647, sum loss: 3748.868164, avg loss: 2.415508, ppl: 11.195457 +epoch: 2, batch: 648, sum loss: 4284.423340, avg loss: 2.245505, ppl: 9.445183 +epoch: 2, batch: 649, sum loss: 4728.187012, avg loss: 2.720476, ppl: 15.187549 +epoch: 2, batch: 650, sum loss: 4657.695312, avg loss: 2.687649, ppl: 14.697079 +epoch: 2, batch: 651, sum loss: 4642.726074, avg loss: 2.674381, ppl: 14.503377 +epoch: 2, batch: 652, sum loss: 4291.918945, avg loss: 2.644436, ppl: 14.075500 +epoch: 2, batch: 653, sum loss: 4573.445312, avg loss: 2.484218, ppl: 11.991740 +epoch: 2, batch: 654, sum loss: 4044.589600, avg loss: 2.564737, ppl: 12.997234 +epoch: 2, batch: 655, sum loss: 4235.093750, avg loss: 2.732319, ppl: 15.368480 +epoch: 2, batch: 656, sum loss: 4585.475586, avg loss: 2.731075, ppl: 15.349383 +epoch: 2, batch: 657, sum loss: 4910.490234, avg loss: 2.903897, ppl: 18.245113 +epoch: 2, batch: 658, sum loss: 4974.167969, avg loss: 2.799194, ppl: 16.431398 +epoch: 2, batch: 659, sum loss: 4851.218750, avg loss: 2.439024, ppl: 11.461848 +epoch: 2, batch: 660, sum loss: 3666.156006, avg loss: 2.404037, ppl: 11.067764 +epoch: 2, batch: 661, sum loss: 3740.347168, avg loss: 2.364316, ppl: 10.636756 +epoch: 2, batch: 662, sum loss: 3883.227539, avg loss: 2.510167, ppl: 12.306981 +epoch: 2, batch: 663, sum loss: 5158.820801, avg loss: 2.749905, ppl: 15.641140 +epoch: 2, batch: 664, sum loss: 4499.506836, avg loss: 2.681470, ppl: 14.606551 +epoch: 2, batch: 665, sum loss: 4354.272949, avg loss: 2.594918, ppl: 13.395493 +epoch: 2, batch: 666, sum loss: 4261.252441, avg loss: 2.430834, ppl: 11.368362 +epoch: 2, batch: 667, sum loss: 4044.275146, avg loss: 2.414493, ppl: 11.184097 +epoch: 2, batch: 668, sum loss: 4082.468994, avg loss: 2.391605, ppl: 10.931021 +epoch: 2, batch: 669, sum loss: 4370.592773, avg loss: 2.716341, ppl: 15.124879 +epoch: 2, batch: 670, sum loss: 3788.092285, avg loss: 2.399045, ppl: 11.012657 +epoch: 2, batch: 671, sum loss: 4042.934082, avg loss: 2.415134, ppl: 11.191269 +epoch: 2, batch: 672, sum loss: 4131.745605, avg loss: 2.433301, ppl: 11.396445 +epoch: 2, batch: 673, sum loss: 3923.702393, avg loss: 2.508761, ppl: 12.289696 +epoch: 2, batch: 674, sum loss: 3668.580811, avg loss: 2.383743, ppl: 10.845425 +epoch: 2, batch: 675, sum loss: 4785.473633, avg loss: 2.686959, ppl: 14.686942 +epoch: 2, batch: 676, sum loss: 5433.381348, avg loss: 2.727601, ppl: 15.296152 +epoch: 2, batch: 677, sum loss: 4262.328125, avg loss: 2.448207, ppl: 11.567586 +epoch: 2, batch: 678, sum loss: 4125.080078, avg loss: 2.584637, ppl: 13.258471 +epoch: 2, batch: 679, sum loss: 3666.066406, avg loss: 2.462100, ppl: 11.729412 +epoch: 2, batch: 680, sum loss: 3725.069824, avg loss: 2.532338, ppl: 12.582895 +epoch: 2, batch: 681, sum loss: 3661.141602, avg loss: 2.487189, ppl: 12.027414 +epoch: 2, batch: 682, sum loss: 4849.544922, avg loss: 2.725995, ppl: 15.271599 +epoch: 2, batch: 683, sum loss: 4527.321289, avg loss: 2.547733, ppl: 12.778101 +epoch: 2, batch: 684, sum loss: 4329.417969, avg loss: 2.840825, ppl: 17.129898 +epoch: 2, batch: 685, sum loss: 3737.941406, avg loss: 2.333297, ppl: 10.311881 +epoch: 2, batch: 686, sum loss: 4210.051758, avg loss: 2.361218, ppl: 10.603862 +epoch: 2, batch: 687, sum loss: 3779.385742, avg loss: 2.444622, ppl: 11.526193 +epoch: 2, batch: 688, sum loss: 4161.625488, avg loss: 2.548454, ppl: 12.787320 +epoch: 2, batch: 689, sum loss: 3990.367432, avg loss: 2.449581, ppl: 11.583491 +epoch: 2, batch: 690, sum loss: 5353.169922, avg loss: 2.788110, ppl: 16.250271 +epoch: 2, batch: 691, sum loss: 4361.263184, avg loss: 2.548956, ppl: 12.793739 +epoch: 2, batch: 692, sum loss: 3921.514648, avg loss: 2.381005, ppl: 10.815763 +epoch: 2, batch: 693, sum loss: 3885.738770, avg loss: 2.436200, ppl: 11.429524 +epoch: 2, batch: 694, sum loss: 4490.754395, avg loss: 2.669890, ppl: 14.438376 +epoch: 2, batch: 695, sum loss: 4305.045898, avg loss: 2.464251, ppl: 11.754669 +epoch: 2, batch: 696, sum loss: 4451.669434, avg loss: 2.664075, ppl: 14.354668 +epoch: 2, batch: 697, sum loss: 4325.938477, avg loss: 2.534235, ppl: 12.606777 +epoch: 2, batch: 698, sum loss: 4680.291992, avg loss: 2.795874, ppl: 16.376930 +epoch: 2, batch: 699, sum loss: 4129.931641, avg loss: 2.563583, ppl: 12.982245 +epoch: 2, batch: 700, sum loss: 3292.815674, avg loss: 2.352011, ppl: 10.506680 +epoch: 2, batch: 701, sum loss: 4385.809082, avg loss: 2.564801, ppl: 12.998065 +epoch: 2, batch: 702, sum loss: 4667.946289, avg loss: 2.607791, ppl: 13.569046 +epoch: 2, batch: 703, sum loss: 3941.979004, avg loss: 2.600250, ppl: 13.467105 +epoch: 2, batch: 704, sum loss: 4798.849121, avg loss: 2.861568, ppl: 17.488924 +epoch: 2, batch: 705, sum loss: 4185.943359, avg loss: 2.547744, ppl: 12.778241 +epoch: 2, batch: 706, sum loss: 4632.563965, avg loss: 2.641142, ppl: 14.029221 +epoch: 2, batch: 707, sum loss: 3513.857910, avg loss: 2.460685, ppl: 11.712830 +epoch: 2, batch: 708, sum loss: 5274.590332, avg loss: 2.851130, ppl: 17.307329 +epoch: 2, batch: 709, sum loss: 4472.583984, avg loss: 2.845155, ppl: 17.204229 +epoch: 2, batch: 710, sum loss: 3704.795654, avg loss: 2.222433, ppl: 9.229758 +epoch: 2, batch: 711, sum loss: 4422.444824, avg loss: 2.515611, ppl: 12.374172 +epoch: 2, batch: 712, sum loss: 4691.159180, avg loss: 2.748189, ppl: 15.614336 +epoch: 2, batch: 713, sum loss: 3678.352539, avg loss: 2.540299, ppl: 12.683459 +epoch: 2, batch: 714, sum loss: 3343.666260, avg loss: 2.273057, ppl: 9.709031 +epoch: 2, batch: 715, sum loss: 4321.378906, avg loss: 2.490708, ppl: 12.069822 +epoch: 2, batch: 716, sum loss: 4381.757324, avg loss: 2.745462, ppl: 15.571806 +epoch: 2, batch: 717, sum loss: 5062.096191, avg loss: 2.924377, ppl: 18.622616 +epoch: 2, batch: 718, sum loss: 4396.552734, avg loss: 2.468587, ppl: 11.805750 +epoch: 2, batch: 719, sum loss: 4605.715332, avg loss: 2.591849, ppl: 13.354439 +epoch: 2, batch: 720, sum loss: 6011.604492, avg loss: 2.726351, ppl: 15.277043 +epoch: 2, batch: 721, sum loss: 4103.939453, avg loss: 2.589236, ppl: 13.319595 +epoch: 2, batch: 722, sum loss: 4870.763184, avg loss: 2.692517, ppl: 14.768803 +epoch: 2, batch: 723, sum loss: 3630.106201, avg loss: 2.354155, ppl: 10.529223 +epoch: 2, batch: 724, sum loss: 4327.370605, avg loss: 2.725044, ppl: 15.257092 +epoch: 2, batch: 725, sum loss: 5115.101562, avg loss: 2.826023, ppl: 16.878204 +epoch: 2, batch: 726, sum loss: 4160.775391, avg loss: 2.636740, ppl: 13.967591 +epoch: 2, batch: 727, sum loss: 3974.972168, avg loss: 2.342353, ppl: 10.405688 +epoch: 2, batch: 728, sum loss: 4340.612793, avg loss: 2.514839, ppl: 12.364623 +epoch: 2, batch: 729, sum loss: 4770.579102, avg loss: 2.804573, ppl: 16.520021 +epoch: 2, batch: 730, sum loss: 4851.777832, avg loss: 2.560305, ppl: 12.939762 +epoch: 2, batch: 731, sum loss: 3812.941162, avg loss: 2.408680, ppl: 11.119279 +epoch: 2, batch: 732, sum loss: 5320.921875, avg loss: 2.944616, ppl: 19.003370 +epoch: 2, batch: 733, sum loss: 3630.844482, avg loss: 2.423795, ppl: 11.288613 +epoch: 2, batch: 734, sum loss: 3819.842041, avg loss: 2.448617, ppl: 11.572329 +epoch: 2, batch: 735, sum loss: 4418.516602, avg loss: 2.516240, ppl: 12.381948 +epoch: 2, batch: 736, sum loss: 3616.877930, avg loss: 2.425807, ppl: 11.311354 +epoch: 2, batch: 737, sum loss: 3657.364746, avg loss: 2.436619, ppl: 11.434314 +epoch: 2, batch: 738, sum loss: 4696.459473, avg loss: 2.676045, ppl: 14.527526 +epoch: 2, batch: 739, sum loss: 4514.344727, avg loss: 2.708065, ppl: 15.000222 +epoch: 2, batch: 740, sum loss: 4725.309082, avg loss: 2.659150, ppl: 14.284138 +epoch: 2, batch: 741, sum loss: 4510.354004, avg loss: 2.619253, ppl: 13.725469 +epoch: 2, batch: 742, sum loss: 2857.663818, avg loss: 2.048505, ppl: 7.756294 +epoch: 2, batch: 743, sum loss: 5200.500977, avg loss: 2.722775, ppl: 15.222513 +epoch: 2, batch: 744, sum loss: 3952.983398, avg loss: 2.417727, ppl: 11.220324 +epoch: 2, batch: 745, sum loss: 3544.062744, avg loss: 2.502869, ppl: 12.217497 +epoch: 2, batch: 746, sum loss: 4416.619141, avg loss: 2.478462, ppl: 11.922913 +epoch: 2, batch: 747, sum loss: 4226.808594, avg loss: 2.511473, ppl: 12.323065 +epoch: 2, batch: 748, sum loss: 4883.894043, avg loss: 2.592301, ppl: 13.360474 +epoch: 2, batch: 749, sum loss: 4679.615234, avg loss: 2.646841, ppl: 14.109401 +epoch: 2, batch: 750, sum loss: 3610.815186, avg loss: 2.518002, ppl: 12.403792 +epoch: 2, batch: 751, sum loss: 4221.554199, avg loss: 2.478893, ppl: 11.928047 +epoch: 2, batch: 752, sum loss: 3738.449219, avg loss: 2.275380, ppl: 9.731615 +epoch: 2, batch: 753, sum loss: 4572.874023, avg loss: 2.628088, ppl: 13.847275 +epoch: 2, batch: 754, sum loss: 4320.297852, avg loss: 2.615192, ppl: 13.669847 +epoch: 2, batch: 755, sum loss: 4586.578125, avg loss: 2.855902, ppl: 17.390112 +epoch: 2, batch: 756, sum loss: 4716.053711, avg loss: 2.570057, ppl: 13.066565 +epoch: 2, batch: 757, sum loss: 4299.428711, avg loss: 2.634454, ppl: 13.935701 +epoch: 2, batch: 758, sum loss: 4793.937012, avg loss: 2.657393, ppl: 14.259067 +epoch: 2, batch: 759, sum loss: 4280.717285, avg loss: 2.632667, ppl: 13.910825 +epoch: 2, batch: 760, sum loss: 4164.727539, avg loss: 2.531749, ppl: 12.575484 +epoch: 2, batch: 761, sum loss: 4566.244629, avg loss: 2.485708, ppl: 12.009614 +epoch: 2, batch: 762, sum loss: 4189.014160, avg loss: 2.494946, ppl: 12.121077 +epoch: 2, batch: 763, sum loss: 4441.352539, avg loss: 2.628019, ppl: 13.846318 +epoch: 2, batch: 764, sum loss: 3223.128174, avg loss: 2.180736, ppl: 8.852822 +epoch: 2, batch: 765, sum loss: 3134.991943, avg loss: 2.242484, ppl: 9.416690 +epoch: 2, batch: 766, sum loss: 3442.267578, avg loss: 2.182795, ppl: 8.871067 +epoch: 2, batch: 767, sum loss: 4091.026855, avg loss: 2.564907, ppl: 12.999451 +epoch: 2, batch: 768, sum loss: 4380.432617, avg loss: 2.601207, ppl: 13.479999 +epoch: 2, batch: 769, sum loss: 3985.808105, avg loss: 2.464940, ppl: 11.762777 +epoch: 2, batch: 770, sum loss: 4072.568848, avg loss: 2.751736, ppl: 15.669806 +epoch: 2, batch: 771, sum loss: 3914.890381, avg loss: 2.490388, ppl: 12.065962 +epoch: 2, batch: 772, sum loss: 4727.649902, avg loss: 2.726442, ppl: 15.278423 +epoch: 2, batch: 773, sum loss: 5088.590332, avg loss: 2.755057, ppl: 15.721942 +epoch: 2, batch: 774, sum loss: 4184.955566, avg loss: 2.749642, ppl: 15.637038 +epoch: 2, batch: 775, sum loss: 4638.710449, avg loss: 2.598717, ppl: 13.446481 +epoch: 2, batch: 776, sum loss: 4387.661133, avg loss: 2.378136, ppl: 10.784783 +epoch: 2, batch: 777, sum loss: 5307.895508, avg loss: 2.887865, ppl: 17.954931 +epoch: 2, batch: 778, sum loss: 4899.485352, avg loss: 2.840281, ppl: 17.120584 +epoch: 2, batch: 779, sum loss: 4776.322266, avg loss: 2.870386, ppl: 17.643829 +epoch: 2, batch: 780, sum loss: 4325.718750, avg loss: 2.507663, ppl: 12.276210 +epoch: 2, batch: 781, sum loss: 3313.603516, avg loss: 2.346745, ppl: 10.451492 +epoch: 2, batch: 782, sum loss: 3625.878418, avg loss: 2.345329, ppl: 10.436704 +epoch: 2, batch: 783, sum loss: 3524.529541, avg loss: 2.219477, ppl: 9.202518 +epoch: 2, batch: 784, sum loss: 3975.432373, avg loss: 2.540212, ppl: 12.682364 +epoch: 2, batch: 785, sum loss: 5062.526855, avg loss: 2.809393, ppl: 16.599846 +epoch: 2, batch: 786, sum loss: 3573.293213, avg loss: 2.290573, ppl: 9.880594 +epoch: 2, batch: 787, sum loss: 5271.835938, avg loss: 2.811646, ppl: 16.637276 +epoch: 2, batch: 788, sum loss: 4976.830078, avg loss: 2.644437, ppl: 14.075516 +epoch: 2, batch: 789, sum loss: 3773.870850, avg loss: 2.484444, ppl: 11.994452 +epoch: 2, batch: 790, sum loss: 4141.926270, avg loss: 2.644908, ppl: 14.082152 +epoch: 2, batch: 791, sum loss: 3942.405762, avg loss: 2.530427, ppl: 12.558873 +epoch: 2, batch: 792, sum loss: 3888.283447, avg loss: 2.419591, ppl: 11.241265 +epoch: 2, batch: 793, sum loss: 5857.884766, avg loss: 2.773620, ppl: 16.016504 +epoch: 2, batch: 794, sum loss: 3657.609863, avg loss: 2.389033, ppl: 10.902949 +epoch: 2, batch: 795, sum loss: 3896.954590, avg loss: 2.341920, ppl: 10.401187 +epoch: 2, batch: 796, sum loss: 4113.494141, avg loss: 2.591994, ppl: 13.356379 +epoch: 2, batch: 797, sum loss: 3785.089844, avg loss: 2.397144, ppl: 10.991735 +epoch: 2, batch: 798, sum loss: 5538.261230, avg loss: 2.877019, ppl: 17.761242 +epoch: 2, batch: 799, sum loss: 4356.211426, avg loss: 2.485004, ppl: 12.001164 +epoch: 2, batch: 800, sum loss: 4677.495117, avg loss: 2.579975, ppl: 13.196813 +epoch: 2, batch: 801, sum loss: 4004.168945, avg loss: 2.467140, ppl: 11.788688 +epoch: 2, batch: 802, sum loss: 4004.440918, avg loss: 2.581845, ppl: 13.221503 +epoch: 2, batch: 803, sum loss: 4685.319824, avg loss: 2.633682, ppl: 13.924944 +epoch: 2, batch: 804, sum loss: 4907.302734, avg loss: 2.680122, ppl: 14.586871 +epoch: 2, batch: 805, sum loss: 3069.625732, avg loss: 2.169347, ppl: 8.752565 +epoch: 2, batch: 806, sum loss: 3942.655762, avg loss: 2.693071, ppl: 14.776985 +epoch: 2, batch: 807, sum loss: 5040.447754, avg loss: 2.937324, ppl: 18.865295 +epoch: 2, batch: 808, sum loss: 4101.643555, avg loss: 2.445822, ppl: 11.540031 +epoch: 2, batch: 809, sum loss: 5078.908203, avg loss: 2.704424, ppl: 14.945704 +epoch: 2, batch: 810, sum loss: 3907.109619, avg loss: 2.530512, ppl: 12.559931 +epoch: 2, batch: 811, sum loss: 3235.435303, avg loss: 2.332686, ppl: 10.305582 +epoch: 2, batch: 812, sum loss: 4505.137695, avg loss: 2.592139, ppl: 13.358315 +epoch: 2, batch: 813, sum loss: 4457.863281, avg loss: 2.602372, ppl: 13.495711 +epoch: 2, batch: 814, sum loss: 4809.086426, avg loss: 2.864256, ppl: 17.536009 +epoch: 2, batch: 815, sum loss: 4984.898926, avg loss: 2.786416, ppl: 16.222778 +epoch: 2, batch: 816, sum loss: 5196.037109, avg loss: 2.744869, ppl: 15.562575 +epoch: 2, batch: 817, sum loss: 4225.633789, avg loss: 2.647640, ppl: 14.120678 +epoch: 2, batch: 818, sum loss: 3601.431641, avg loss: 2.572451, ppl: 13.097890 +epoch: 2, batch: 819, sum loss: 5448.886719, avg loss: 2.743649, ppl: 15.543600 +epoch: 2, batch: 820, sum loss: 4729.027344, avg loss: 2.557613, ppl: 12.904982 +epoch: 2, batch: 821, sum loss: 5550.920410, avg loss: 2.788006, ppl: 16.248592 +epoch: 2, batch: 822, sum loss: 4112.082520, avg loss: 2.604232, ppl: 13.520839 +epoch: 2, batch: 823, sum loss: 4775.374023, avg loss: 2.672286, ppl: 14.473010 +epoch: 2, batch: 824, sum loss: 3476.829590, avg loss: 2.308652, ppl: 10.060853 +epoch: 2, batch: 825, sum loss: 6581.607422, avg loss: 3.089957, ppl: 21.976122 +epoch: 2, batch: 826, sum loss: 4999.771484, avg loss: 2.619053, ppl: 13.722717 +epoch: 2, batch: 827, sum loss: 4378.335938, avg loss: 2.601507, ppl: 13.484039 +epoch: 2, batch: 828, sum loss: 3873.496826, avg loss: 2.486198, ppl: 12.015509 +epoch: 2, batch: 829, sum loss: 4311.033203, avg loss: 2.584552, ppl: 13.257352 +epoch: 2, batch: 830, sum loss: 3970.791504, avg loss: 2.422692, ppl: 11.276169 +epoch: 2, batch: 831, sum loss: 4484.607422, avg loss: 2.891430, ppl: 18.019053 +epoch: 2, batch: 832, sum loss: 3737.976074, avg loss: 2.250437, ppl: 9.491883 +epoch: 2, batch: 833, sum loss: 3674.985107, avg loss: 2.557401, ppl: 12.902240 +epoch: 2, batch: 834, sum loss: 3637.586182, avg loss: 2.360536, ppl: 10.596631 +epoch: 2, batch: 835, sum loss: 4556.645508, avg loss: 2.584598, ppl: 13.257953 +epoch: 2, batch: 836, sum loss: 5074.655273, avg loss: 2.893190, ppl: 18.050798 +epoch: 2, batch: 837, sum loss: 4295.984375, avg loss: 2.453446, ppl: 11.628353 +epoch: 2, batch: 838, sum loss: 3782.996338, avg loss: 2.515290, ppl: 12.370196 +epoch: 2, batch: 839, sum loss: 4700.333008, avg loss: 2.702894, ppl: 14.922859 +epoch: 2, batch: 840, sum loss: 3850.420410, avg loss: 2.362221, ppl: 10.614500 +epoch: 2, batch: 841, sum loss: 4054.078125, avg loss: 2.467485, ppl: 11.792750 +epoch: 2, batch: 842, sum loss: 3622.981201, avg loss: 2.338916, ppl: 10.369992 +epoch: 2, batch: 843, sum loss: 4632.423828, avg loss: 2.629071, ppl: 13.860893 +epoch: 2, batch: 844, sum loss: 4141.695312, avg loss: 2.446365, ppl: 11.546295 +epoch: 2, batch: 845, sum loss: 4276.659668, avg loss: 2.329335, ppl: 10.271111 +epoch: 2, batch: 846, sum loss: 4849.097168, avg loss: 2.591714, ppl: 13.352644 +epoch: 2, batch: 847, sum loss: 3680.874268, avg loss: 2.421628, ppl: 11.264182 +epoch: 2, batch: 848, sum loss: 3965.160645, avg loss: 2.495381, ppl: 12.126354 +epoch: 2, batch: 849, sum loss: 3915.492920, avg loss: 2.707810, ppl: 14.996396 +epoch: 2, batch: 850, sum loss: 4152.605957, avg loss: 2.462993, ppl: 11.739896 +epoch: 2, batch: 851, sum loss: 5069.573242, avg loss: 2.564276, ppl: 12.991246 +epoch: 2, batch: 852, sum loss: 5007.993164, avg loss: 2.815061, ppl: 16.694191 +epoch: 2, batch: 853, sum loss: 3688.304199, avg loss: 2.455595, ppl: 11.653368 +epoch: 2, batch: 854, sum loss: 4382.738770, avg loss: 2.540718, ppl: 12.688783 +epoch: 2, batch: 855, sum loss: 4507.695312, avg loss: 2.540978, ppl: 12.692078 +epoch: 2, batch: 856, sum loss: 4485.199219, avg loss: 2.276751, ppl: 9.744967 +epoch: 2, batch: 857, sum loss: 3937.715332, avg loss: 2.590602, ppl: 13.337804 +epoch: 2, batch: 858, sum loss: 3890.918701, avg loss: 2.628999, ppl: 13.859893 +epoch: 2, batch: 859, sum loss: 4014.939941, avg loss: 2.503080, ppl: 12.220073 +epoch: 2, batch: 860, sum loss: 4167.087891, avg loss: 2.660976, ppl: 14.310245 +epoch: 2, batch: 861, sum loss: 4818.237305, avg loss: 2.708396, ppl: 15.005195 +epoch: 2, batch: 862, sum loss: 3806.543945, avg loss: 2.703511, ppl: 14.932074 +epoch: 2, batch: 863, sum loss: 3611.101562, avg loss: 2.534106, ppl: 12.605160 +epoch: 2, batch: 864, sum loss: 3954.123047, avg loss: 2.534694, ppl: 12.612576 +epoch: 2, batch: 865, sum loss: 4589.043457, avg loss: 2.717018, ppl: 15.135124 +epoch: 2, batch: 866, sum loss: 4652.969238, avg loss: 2.526042, ppl: 12.503918 +epoch: 2, batch: 867, sum loss: 5389.253906, avg loss: 2.815702, ppl: 16.704897 +epoch: 2, batch: 868, sum loss: 4277.860840, avg loss: 2.581690, ppl: 13.219464 +epoch: 2, batch: 869, sum loss: 3882.291016, avg loss: 2.534132, ppl: 12.605491 +epoch: 2, batch: 870, sum loss: 4962.225098, avg loss: 2.946690, ppl: 19.042810 +epoch: 2, batch: 871, sum loss: 4708.357910, avg loss: 2.792620, ppl: 16.323738 +epoch: 2, batch: 872, sum loss: 4421.436035, avg loss: 2.779030, ppl: 16.103386 +epoch: 2, batch: 873, sum loss: 3594.067871, avg loss: 2.548985, ppl: 12.794106 +epoch: 2, batch: 874, sum loss: 4265.272461, avg loss: 2.675830, ppl: 14.524395 +epoch: 2, batch: 875, sum loss: 4470.143555, avg loss: 2.673531, ppl: 14.491048 +epoch: 2, batch: 876, sum loss: 3971.404785, avg loss: 2.575490, ppl: 13.137747 +epoch: 2, batch: 877, sum loss: 4554.194824, avg loss: 2.507816, ppl: 12.278090 +epoch: 2, batch: 878, sum loss: 4508.640625, avg loss: 2.557369, ppl: 12.901822 +epoch: 2, batch: 879, sum loss: 4037.870605, avg loss: 2.475703, ppl: 11.890058 +epoch: 2, batch: 880, sum loss: 4449.584961, avg loss: 2.591488, ppl: 13.349623 +epoch: 2, batch: 881, sum loss: 4946.876953, avg loss: 2.745215, ppl: 15.567960 +epoch: 2, batch: 882, sum loss: 3964.587646, avg loss: 2.318472, ppl: 10.160142 +epoch: 2, batch: 883, sum loss: 4738.749512, avg loss: 2.628258, ppl: 13.849626 +epoch: 2, batch: 884, sum loss: 4242.901367, avg loss: 2.577704, ppl: 13.166878 +epoch: 2, batch: 885, sum loss: 4566.464844, avg loss: 2.744270, ppl: 15.553253 +epoch: 2, batch: 886, sum loss: 4275.783203, avg loss: 2.680742, ppl: 14.595917 +epoch: 2, batch: 887, sum loss: 4505.702637, avg loss: 2.528453, ppl: 12.534100 +epoch: 2, batch: 888, sum loss: 4781.305176, avg loss: 2.810879, ppl: 16.624525 +epoch: 2, batch: 889, sum loss: 5359.933105, avg loss: 3.068078, ppl: 21.500546 +epoch: 2, batch: 890, sum loss: 3978.113770, avg loss: 2.655617, ppl: 14.233759 +epoch: 2, batch: 891, sum loss: 4193.344727, avg loss: 2.445099, ppl: 11.531690 +epoch: 2, batch: 892, sum loss: 4362.420898, avg loss: 2.661636, ppl: 14.319692 +epoch: 2, batch: 893, sum loss: 4052.459717, avg loss: 2.474029, ppl: 11.870176 +epoch: 2, batch: 894, sum loss: 4595.693359, avg loss: 2.518188, ppl: 12.406099 +epoch: 2, batch: 895, sum loss: 4537.661621, avg loss: 2.430456, ppl: 11.364065 +epoch: 2, batch: 896, sum loss: 3688.584229, avg loss: 2.365994, ppl: 10.654621 +epoch: 2, batch: 897, sum loss: 4074.896484, avg loss: 2.484693, ppl: 11.997437 +epoch: 2, batch: 898, sum loss: 4311.412598, avg loss: 2.539112, ppl: 12.668421 +epoch: 2, batch: 899, sum loss: 4430.878906, avg loss: 2.600281, ppl: 13.467522 +epoch: 2, batch: 900, sum loss: 4615.903320, avg loss: 2.633145, ppl: 13.917473 +epoch: 2, batch: 901, sum loss: 4637.516113, avg loss: 2.587900, ppl: 13.301805 +epoch: 2, batch: 902, sum loss: 4039.527588, avg loss: 2.458629, ppl: 11.688774 +epoch: 2, batch: 903, sum loss: 4483.780762, avg loss: 2.683292, ppl: 14.633189 +epoch: 2, batch: 904, sum loss: 3920.335449, avg loss: 2.368783, ppl: 10.684379 +epoch: 2, batch: 905, sum loss: 4262.229004, avg loss: 2.796738, ppl: 16.391094 +epoch: 2, batch: 906, sum loss: 3675.591553, avg loss: 2.371350, ppl: 10.711839 +epoch: 2, batch: 907, sum loss: 5208.443359, avg loss: 2.782288, ppl: 16.155949 +epoch: 2, batch: 908, sum loss: 4506.602539, avg loss: 2.560570, ppl: 12.943187 +epoch: 2, batch: 909, sum loss: 3906.680664, avg loss: 2.716746, ppl: 15.131007 +epoch: 2, batch: 910, sum loss: 4183.780762, avg loss: 2.378500, ppl: 10.788705 +epoch: 2, batch: 911, sum loss: 5652.622070, avg loss: 2.726784, ppl: 15.283648 +epoch: 2, batch: 912, sum loss: 4579.509766, avg loss: 2.594623, ppl: 13.391539 +epoch: 2, batch: 913, sum loss: 3563.068848, avg loss: 2.315184, ppl: 10.126790 +epoch: 2, batch: 914, sum loss: 4067.748291, avg loss: 2.514060, ppl: 12.354984 +epoch: 2, batch: 915, sum loss: 4383.671875, avg loss: 2.609329, ppl: 13.589922 +epoch: 2, batch: 916, sum loss: 4563.598633, avg loss: 2.482915, ppl: 11.976129 +epoch: 2, batch: 917, sum loss: 4757.714844, avg loss: 2.701712, ppl: 14.905226 +epoch: 2, batch: 918, sum loss: 3569.904053, avg loss: 2.586887, ppl: 13.288342 +epoch: 2, batch: 919, sum loss: 5313.482422, avg loss: 2.695831, ppl: 14.817822 +epoch: 2, batch: 920, sum loss: 5116.892090, avg loss: 2.691684, ppl: 14.756512 +epoch: 2, batch: 921, sum loss: 4056.632812, avg loss: 2.587138, ppl: 13.291678 +epoch: 2, batch: 922, sum loss: 4939.412598, avg loss: 2.614829, ppl: 13.664884 +epoch: 2, batch: 923, sum loss: 4890.069824, avg loss: 2.722756, ppl: 15.222219 +epoch: 2, batch: 924, sum loss: 4807.736816, avg loss: 2.728568, ppl: 15.310947 +epoch: 2, batch: 925, sum loss: 4533.661621, avg loss: 2.598087, ppl: 13.438004 +epoch: 2, batch: 926, sum loss: 4377.503906, avg loss: 2.582598, ppl: 13.231472 +epoch: 2, batch: 927, sum loss: 4135.064941, avg loss: 2.686852, ppl: 14.685369 +epoch: 2, batch: 928, sum loss: 3434.145508, avg loss: 2.119843, ppl: 8.329829 +epoch: 2, batch: 929, sum loss: 3820.368896, avg loss: 2.368487, ppl: 10.681215 +epoch: 2, batch: 930, sum loss: 3352.478760, avg loss: 2.194031, ppl: 8.971299 +epoch: 2, batch: 931, sum loss: 4483.874512, avg loss: 2.353740, ppl: 10.524858 +epoch: 2, batch: 932, sum loss: 4403.467285, avg loss: 2.588752, ppl: 13.313147 +epoch: 2, batch: 933, sum loss: 3757.889160, avg loss: 2.395086, ppl: 10.969136 +epoch: 2, batch: 934, sum loss: 3737.586914, avg loss: 2.500058, ppl: 12.183203 +epoch: 2, batch: 935, sum loss: 4700.919922, avg loss: 2.739464, ppl: 15.478683 +epoch: 2, batch: 936, sum loss: 3842.848633, avg loss: 2.268506, ppl: 9.664947 +epoch: 2, batch: 937, sum loss: 5247.076660, avg loss: 2.675715, ppl: 14.522730 +epoch: 2, batch: 938, sum loss: 4747.427246, avg loss: 2.473907, ppl: 11.868725 +epoch: 2, batch: 939, sum loss: 3962.166016, avg loss: 2.329316, ppl: 10.270910 +epoch: 2, batch: 940, sum loss: 3985.832275, avg loss: 2.559944, ppl: 12.935089 +epoch: 2, batch: 941, sum loss: 4815.425293, avg loss: 2.742270, ppl: 15.522177 +epoch: 2, batch: 942, sum loss: 4161.227539, avg loss: 2.662334, ppl: 14.329692 +epoch: 2, batch: 943, sum loss: 3825.005615, avg loss: 2.301447, ppl: 9.988629 +epoch: 2, batch: 944, sum loss: 4321.121582, avg loss: 2.593710, ppl: 13.379323 +epoch: 2, batch: 945, sum loss: 4054.859375, avg loss: 2.595941, ppl: 13.409197 +epoch: 2, batch: 946, sum loss: 3051.578125, avg loss: 2.060485, ppl: 7.849775 +epoch: 2, batch: 947, sum loss: 3406.582520, avg loss: 2.473916, ppl: 11.868834 +epoch: 2, batch: 948, sum loss: 5403.766113, avg loss: 2.701883, ppl: 14.907778 +epoch: 2, batch: 949, sum loss: 4210.879395, avg loss: 2.671878, ppl: 14.467108 +epoch: 2, batch: 950, sum loss: 4293.619629, avg loss: 2.647114, ppl: 14.113253 +epoch: 2, batch: 951, sum loss: 4826.516113, avg loss: 2.620259, ppl: 13.739276 +epoch: 2, batch: 952, sum loss: 4030.875000, avg loss: 2.354483, ppl: 10.532681 +epoch: 2, batch: 953, sum loss: 3863.094727, avg loss: 2.288563, ppl: 9.860760 +epoch: 2, batch: 954, sum loss: 4353.103516, avg loss: 2.638245, ppl: 13.988626 +epoch: 2, batch: 955, sum loss: 4375.706543, avg loss: 2.480559, ppl: 11.947946 +epoch: 2, batch: 956, sum loss: 5029.326172, avg loss: 2.917243, ppl: 18.490234 +epoch: 2, batch: 957, sum loss: 3751.823242, avg loss: 2.241233, ppl: 9.404917 +epoch: 2, batch: 958, sum loss: 4738.395508, avg loss: 2.726350, ppl: 15.277018 +epoch: 2, batch: 959, sum loss: 3893.890137, avg loss: 2.400672, ppl: 11.030586 +epoch: 2, batch: 960, sum loss: 4632.460449, avg loss: 2.481232, ppl: 11.955987 +epoch: 2, batch: 961, sum loss: 4414.202637, avg loss: 2.525288, ppl: 12.494489 +epoch: 2, batch: 962, sum loss: 3650.264160, avg loss: 2.464729, ppl: 11.760299 +epoch: 2, batch: 963, sum loss: 4167.244629, avg loss: 2.570786, ppl: 13.076101 +epoch: 2, batch: 964, sum loss: 4412.972168, avg loss: 2.653621, ppl: 14.205386 +epoch: 2, batch: 965, sum loss: 4250.814453, avg loss: 2.537800, ppl: 12.651804 +epoch: 2, batch: 966, sum loss: 4895.758301, avg loss: 2.564567, ppl: 12.995031 +epoch: 2, batch: 967, sum loss: 4088.306641, avg loss: 2.629136, ppl: 13.861793 +epoch: 2, batch: 968, sum loss: 4197.343262, avg loss: 2.457461, ppl: 11.675129 +epoch: 2, batch: 969, sum loss: 5480.305664, avg loss: 2.965533, ppl: 19.405048 +epoch: 2, batch: 970, sum loss: 4176.104980, avg loss: 2.478400, ppl: 11.922179 +epoch: 2, batch: 971, sum loss: 3497.916992, avg loss: 2.274328, ppl: 9.721386 +epoch: 2, batch: 972, sum loss: 6063.677734, avg loss: 2.998851, ppl: 20.062477 +epoch: 2, batch: 973, sum loss: 4657.347656, avg loss: 2.783830, ppl: 16.180878 +epoch: 2, batch: 974, sum loss: 3720.761719, avg loss: 2.318232, ppl: 10.157696 +epoch: 2, batch: 975, sum loss: 3875.898438, avg loss: 2.526661, ppl: 12.511662 +epoch: 2, batch: 976, sum loss: 4030.569824, avg loss: 2.590341, ppl: 13.334312 +epoch: 2, batch: 977, sum loss: 5003.060547, avg loss: 2.701437, ppl: 14.901122 +epoch: 2, batch: 978, sum loss: 4858.642578, avg loss: 2.539803, ppl: 12.677168 +epoch: 2, batch: 979, sum loss: 3729.677734, avg loss: 2.365046, ppl: 10.644531 +epoch: 2, batch: 980, sum loss: 4027.514648, avg loss: 2.205649, ppl: 9.076139 +epoch: 2, batch: 981, sum loss: 4491.484375, avg loss: 2.457048, ppl: 11.670315 +epoch: 2, batch: 982, sum loss: 4882.402832, avg loss: 2.822198, ppl: 16.813768 +epoch: 2, batch: 983, sum loss: 4384.994629, avg loss: 2.622605, ppl: 13.771546 +epoch: 2, batch: 984, sum loss: 4159.120117, avg loss: 2.561034, ppl: 12.949205 +epoch: 2, batch: 985, sum loss: 5028.436523, avg loss: 2.828142, ppl: 16.914009 +epoch: 2, batch: 986, sum loss: 3292.682617, avg loss: 2.386002, ppl: 10.869949 +epoch: 2, batch: 987, sum loss: 4376.955078, avg loss: 2.665624, ppl: 14.376924 +epoch: 2, batch: 988, sum loss: 3915.286621, avg loss: 2.443999, ppl: 11.519014 +epoch: 2, batch: 989, sum loss: 4034.016113, avg loss: 2.556411, ppl: 12.889471 +epoch: 2, batch: 990, sum loss: 4412.971191, avg loss: 2.605060, ppl: 13.532032 +epoch: 2, batch: 991, sum loss: 4978.729492, avg loss: 2.522153, ppl: 12.455380 +epoch: 2, batch: 992, sum loss: 4474.560547, avg loss: 2.693896, ppl: 14.789176 +epoch: 2, batch: 993, sum loss: 6439.535156, avg loss: 2.845575, ppl: 17.211447 +epoch: 2, batch: 994, sum loss: 3577.060303, avg loss: 2.244078, ppl: 9.431715 +epoch: 2, batch: 995, sum loss: 4067.233154, avg loss: 2.428199, ppl: 11.338444 +epoch: 2, batch: 996, sum loss: 4169.948730, avg loss: 2.464509, ppl: 11.757705 +epoch: 2, batch: 997, sum loss: 5305.210449, avg loss: 2.832467, ppl: 16.987314 +epoch: 2, batch: 998, sum loss: 4179.679688, avg loss: 2.513337, ppl: 12.346062 +epoch: 2, batch: 999, sum loss: 3797.542236, avg loss: 2.428096, ppl: 11.337276 +epoch: 2, batch: 1000, sum loss: 4525.545898, avg loss: 2.366917, ppl: 10.664464 +epoch: 2, batch: 1001, sum loss: 3854.174316, avg loss: 2.358736, ppl: 10.577571 +epoch: 2, batch: 1002, sum loss: 4318.437500, avg loss: 2.567442, ppl: 13.032444 +epoch: 2, batch: 1003, sum loss: 3660.326904, avg loss: 2.266456, ppl: 9.645162 +epoch: 2, batch: 1004, sum loss: 3280.478516, avg loss: 2.287642, ppl: 9.851680 +epoch: 2, batch: 1005, sum loss: 3543.853760, avg loss: 2.407509, ppl: 11.106265 +epoch: 2, batch: 1006, sum loss: 4700.826660, avg loss: 2.513811, ppl: 12.351915 +epoch: 2, batch: 1007, sum loss: 4423.064453, avg loss: 2.461360, ppl: 11.720746 +epoch: 2, batch: 1008, sum loss: 4845.600098, avg loss: 2.683057, ppl: 14.629743 +epoch: 2, batch: 1009, sum loss: 4579.403320, avg loss: 2.596034, ppl: 13.410440 +epoch: 2, batch: 1010, sum loss: 4490.772949, avg loss: 2.530013, ppl: 12.553667 +epoch: 2, batch: 1011, sum loss: 3890.429932, avg loss: 2.534482, ppl: 12.609898 +epoch: 2, batch: 1012, sum loss: 4881.596191, avg loss: 2.784710, ppl: 16.195120 +epoch: 2, batch: 1013, sum loss: 4093.894775, avg loss: 2.413853, ppl: 11.176945 +epoch: 2, batch: 1014, sum loss: 4102.672852, avg loss: 2.474471, ppl: 11.875424 +epoch: 2, batch: 1015, sum loss: 5172.537598, avg loss: 2.674528, ppl: 14.505504 +epoch: 2, batch: 1016, sum loss: 3888.536377, avg loss: 2.719256, ppl: 15.169039 +epoch: 2, batch: 1017, sum loss: 4641.474121, avg loss: 2.609035, ppl: 13.585940 +epoch: 2, batch: 1018, sum loss: 5487.040527, avg loss: 2.927983, ppl: 18.689896 +epoch: 2, batch: 1019, sum loss: 3978.857910, avg loss: 2.376857, ppl: 10.770994 +epoch: 2, batch: 1020, sum loss: 3461.878174, avg loss: 2.387502, ppl: 10.886269 +epoch: 2, batch: 1021, sum loss: 3976.881836, avg loss: 2.541138, ppl: 12.694114 +epoch: 2, batch: 1022, sum loss: 3953.659180, avg loss: 2.371721, ppl: 10.715818 +epoch: 2, batch: 1023, sum loss: 4381.763184, avg loss: 2.461665, ppl: 11.724313 +epoch: 2, batch: 1024, sum loss: 5200.284668, avg loss: 2.804900, ppl: 16.525423 +epoch: 2, batch: 1025, sum loss: 5126.882324, avg loss: 2.893275, ppl: 18.052326 +epoch: 2, batch: 1026, sum loss: 4940.828125, avg loss: 2.708787, ppl: 15.011063 +epoch: 2, batch: 1027, sum loss: 4757.446289, avg loss: 2.734164, ppl: 15.396873 +epoch: 2, batch: 1028, sum loss: 4608.192871, avg loss: 2.685427, ppl: 14.664465 +epoch: 2, batch: 1029, sum loss: 5194.496094, avg loss: 2.712531, ppl: 15.067357 +epoch: 2, batch: 1030, sum loss: 4855.733887, avg loss: 2.678287, ppl: 14.560127 +epoch: 2, batch: 1031, sum loss: 3631.586914, avg loss: 2.217086, ppl: 9.180538 +epoch: 2, batch: 1032, sum loss: 4517.143555, avg loss: 2.529196, ppl: 12.543414 +epoch: 2, batch: 1033, sum loss: 4626.700684, avg loss: 2.693074, ppl: 14.777027 +epoch: 2, batch: 1034, sum loss: 3859.741455, avg loss: 2.535967, ppl: 12.628635 +epoch: 2, batch: 1035, sum loss: 3778.777344, avg loss: 2.405333, ppl: 11.082115 +epoch: 2, batch: 1036, sum loss: 4137.296875, avg loss: 2.618542, ppl: 13.715717 +epoch: 2, batch: 1037, sum loss: 4497.625488, avg loss: 2.538163, ppl: 12.656405 +epoch: 2, batch: 1038, sum loss: 3980.668945, avg loss: 2.698759, ppl: 14.861272 +epoch: 2, batch: 1039, sum loss: 3660.757080, avg loss: 2.179022, ppl: 8.837660 +epoch: 2, batch: 1040, sum loss: 3954.768555, avg loss: 2.447258, ppl: 11.556612 +epoch: 2, batch: 1041, sum loss: 3964.248779, avg loss: 2.447067, ppl: 11.554411 +epoch: 2, batch: 1042, sum loss: 3841.258789, avg loss: 2.259564, ppl: 9.578914 +epoch: 2, batch: 1043, sum loss: 4689.994629, avg loss: 2.758821, ppl: 15.781219 +epoch: 2, batch: 1044, sum loss: 3630.438721, avg loss: 2.328697, ppl: 10.264560 +epoch: 2, batch: 1045, sum loss: 4812.232910, avg loss: 2.711117, ppl: 15.046073 +epoch: 2, batch: 1046, sum loss: 4245.344727, avg loss: 2.661658, ppl: 14.320016 +epoch: 2, batch: 1047, sum loss: 3978.469971, avg loss: 2.633005, ppl: 13.915519 +epoch: 2, batch: 1048, sum loss: 3377.078613, avg loss: 2.475864, ppl: 11.891979 +epoch: 2, batch: 1049, sum loss: 3882.813721, avg loss: 2.506658, ppl: 12.263880 +epoch: 2, batch: 1050, sum loss: 5326.867188, avg loss: 2.850116, ppl: 17.289787 +epoch: 2, batch: 1051, sum loss: 5125.679688, avg loss: 2.720637, ppl: 15.189993 +epoch: 2, batch: 1052, sum loss: 3668.325928, avg loss: 2.481953, ppl: 11.964604 +epoch: 2, batch: 1053, sum loss: 4625.130859, avg loss: 2.658121, ppl: 14.269453 +epoch: 2, batch: 1054, sum loss: 4205.251465, avg loss: 2.464977, ppl: 11.763215 +epoch: 2, batch: 1055, sum loss: 4292.670410, avg loss: 2.534044, ppl: 12.604373 +epoch: 2, batch: 1056, sum loss: 4610.099609, avg loss: 2.626837, ppl: 13.829960 +epoch: 2, batch: 1057, sum loss: 3731.822510, avg loss: 2.406075, ppl: 11.090346 +epoch: 2, batch: 1058, sum loss: 4363.074707, avg loss: 2.698253, ppl: 14.853758 +epoch: 2, batch: 1059, sum loss: 4009.588379, avg loss: 2.605321, ppl: 13.535565 +epoch: 2, batch: 1060, sum loss: 4655.887695, avg loss: 2.579439, ppl: 13.189739 +epoch: 2, batch: 1061, sum loss: 5041.162598, avg loss: 2.735302, ppl: 15.414405 +epoch: 2, batch: 1062, sum loss: 4593.736816, avg loss: 2.599738, ppl: 13.460210 +epoch: 2, batch: 1063, sum loss: 4601.868652, avg loss: 2.434851, ppl: 11.414120 +epoch: 2, batch: 1064, sum loss: 3898.634277, avg loss: 2.359948, ppl: 10.590403 +epoch: 2, batch: 1065, sum loss: 4773.803223, avg loss: 2.759424, ppl: 15.790741 +epoch: 2, batch: 1066, sum loss: 4462.543457, avg loss: 2.640558, ppl: 14.021029 +epoch: 2, batch: 1067, sum loss: 4661.725586, avg loss: 2.511706, ppl: 12.325935 +epoch: 2, batch: 1068, sum loss: 4752.023926, avg loss: 2.687796, ppl: 14.699248 +epoch: 2, batch: 1069, sum loss: 4020.322021, avg loss: 2.522159, ppl: 12.455463 +epoch: 2, batch: 1070, sum loss: 4215.134766, avg loss: 2.504536, ppl: 12.237885 +epoch: 2, batch: 1071, sum loss: 3705.694580, avg loss: 2.386153, ppl: 10.871585 +epoch: 2, batch: 1072, sum loss: 3493.000732, avg loss: 2.342724, ppl: 10.409550 +epoch: 2, batch: 1073, sum loss: 4658.522461, avg loss: 2.568094, ppl: 13.040945 +epoch: 2, batch: 1074, sum loss: 3881.796143, avg loss: 2.542106, ppl: 12.706405 +epoch: 2, batch: 1075, sum loss: 4565.977539, avg loss: 2.474785, ppl: 11.879148 +epoch: 2, batch: 1076, sum loss: 5188.017090, avg loss: 2.729099, ppl: 15.319078 +epoch: 2, batch: 1077, sum loss: 5419.985352, avg loss: 2.848127, ppl: 17.255430 +epoch: 2, batch: 1078, sum loss: 4040.778076, avg loss: 2.503580, ppl: 12.226187 +epoch: 2, batch: 1079, sum loss: 4192.344238, avg loss: 2.348652, ppl: 10.471449 +epoch: 2, batch: 1080, sum loss: 3161.444336, avg loss: 2.334892, ppl: 10.328347 +epoch: 2, batch: 1081, sum loss: 4384.385254, avg loss: 2.670150, ppl: 14.442129 +epoch: 2, batch: 1082, sum loss: 3823.139648, avg loss: 2.383504, ppl: 10.842826 +epoch: 2, batch: 1083, sum loss: 3701.126465, avg loss: 2.374039, ppl: 10.740685 +epoch: 2, batch: 1084, sum loss: 3925.307617, avg loss: 2.448726, ppl: 11.573592 +epoch: 2, batch: 1085, sum loss: 3806.685059, avg loss: 2.315502, ppl: 10.130006 +epoch: 2, batch: 1086, sum loss: 4502.751953, avg loss: 2.616358, ppl: 13.685786 +epoch: 2, batch: 1087, sum loss: 4953.228027, avg loss: 2.751793, ppl: 15.670711 +epoch: 2, batch: 1088, sum loss: 4867.017090, avg loss: 2.528321, ppl: 12.532441 +epoch: 2, batch: 1089, sum loss: 4274.764648, avg loss: 2.589197, ppl: 13.319077 +epoch: 2, batch: 1090, sum loss: 4676.638184, avg loss: 2.720557, ppl: 15.188787 +epoch: 2, batch: 1091, sum loss: 3643.896484, avg loss: 2.458770, ppl: 11.690421 +epoch: 2, batch: 1092, sum loss: 4096.431641, avg loss: 2.416774, ppl: 11.209637 +epoch: 2, batch: 1093, sum loss: 3908.572510, avg loss: 2.445915, ppl: 11.541107 +epoch: 2, batch: 1094, sum loss: 3875.822754, avg loss: 2.395441, ppl: 10.973031 +epoch: 2, batch: 1095, sum loss: 5566.498535, avg loss: 2.903755, ppl: 18.242521 +epoch: 2, batch: 1096, sum loss: 4468.224121, avg loss: 2.690081, ppl: 14.732864 +epoch: 2, batch: 1097, sum loss: 4985.925781, avg loss: 2.629708, ppl: 13.869717 +epoch: 2, batch: 1098, sum loss: 3451.012207, avg loss: 2.235112, ppl: 9.347526 +epoch: 2, batch: 1099, sum loss: 4322.367188, avg loss: 2.352949, ppl: 10.516539 +epoch: 2, batch: 1100, sum loss: 5197.104980, avg loss: 2.777715, ppl: 16.082233 +epoch: 2, batch: 1101, sum loss: 4448.570312, avg loss: 2.689583, ppl: 14.725535 +epoch: 2, batch: 1102, sum loss: 4116.282227, avg loss: 2.373865, ppl: 10.738819 +epoch: 2, batch: 1103, sum loss: 4317.753418, avg loss: 2.596364, ppl: 13.414873 +epoch: 2, batch: 1104, sum loss: 3720.645508, avg loss: 2.454252, ppl: 11.637723 +epoch: 2, batch: 1105, sum loss: 3141.319336, avg loss: 2.291261, ppl: 9.887403 +epoch: 2, batch: 1106, sum loss: 4177.892578, avg loss: 2.619368, ppl: 13.727050 +epoch: 2, batch: 1107, sum loss: 4181.303711, avg loss: 2.825205, ppl: 16.864407 +epoch: 2, batch: 1108, sum loss: 3598.152344, avg loss: 2.566442, ppl: 13.019425 +epoch: 2, batch: 1109, sum loss: 3935.312988, avg loss: 2.530748, ppl: 12.562901 +epoch: 2, batch: 1110, sum loss: 4075.576416, avg loss: 2.553619, ppl: 12.853541 +epoch: 2, batch: 1111, sum loss: 4550.195312, avg loss: 2.742734, ppl: 15.529380 +epoch: 2, batch: 1112, sum loss: 4874.980957, avg loss: 2.857550, ppl: 17.418806 +epoch: 2, batch: 1113, sum loss: 4734.547852, avg loss: 2.444268, ppl: 11.522118 +epoch: 2, batch: 1114, sum loss: 3555.789062, avg loss: 2.359515, ppl: 10.585815 +epoch: 2, batch: 1115, sum loss: 4497.077148, avg loss: 2.782845, ppl: 16.164942 +epoch: 2, batch: 1116, sum loss: 4088.170166, avg loss: 2.378226, ppl: 10.785749 +epoch: 2, batch: 1117, sum loss: 4286.935059, avg loss: 2.542666, ppl: 12.713523 +epoch: 2, batch: 1118, sum loss: 5149.818359, avg loss: 2.821818, ppl: 16.807384 +epoch: 2, batch: 1119, sum loss: 3823.639160, avg loss: 2.653462, ppl: 14.203131 +epoch: 2, batch: 1120, sum loss: 3344.665771, avg loss: 2.330777, ppl: 10.285934 +epoch: 2, batch: 1121, sum loss: 3529.658447, avg loss: 2.290499, ppl: 9.879864 +epoch: 2, batch: 1122, sum loss: 4517.981934, avg loss: 2.705379, ppl: 14.959978 +epoch: 2, batch: 1123, sum loss: 4104.682129, avg loss: 2.504382, ppl: 12.235993 +epoch: 2, batch: 1124, sum loss: 4105.326172, avg loss: 2.848943, ppl: 17.269514 +epoch: 2, batch: 1125, sum loss: 4227.015625, avg loss: 2.538748, ppl: 12.663810 +epoch: 2, batch: 1126, sum loss: 3737.815918, avg loss: 2.374724, ppl: 10.748048 +epoch: 2, batch: 1127, sum loss: 4184.522949, avg loss: 2.457148, ppl: 11.671475 +epoch: 2, batch: 1128, sum loss: 4466.679199, avg loss: 2.636765, ppl: 13.967937 +epoch: 2, batch: 1129, sum loss: 3751.770020, avg loss: 2.123243, ppl: 8.358198 +epoch: 2, batch: 1130, sum loss: 4324.018555, avg loss: 2.592337, ppl: 13.360965 +epoch: 2, batch: 1131, sum loss: 4766.246094, avg loss: 2.848922, ppl: 17.269152 +epoch: 2, batch: 1132, sum loss: 3917.608887, avg loss: 2.377190, ppl: 10.774582 +epoch: 2, batch: 1133, sum loss: 4499.475098, avg loss: 2.699145, ppl: 14.867016 +epoch: 2, batch: 1134, sum loss: 4282.961914, avg loss: 2.481438, ppl: 11.958447 +epoch: 2, batch: 1135, sum loss: 4526.302246, avg loss: 2.797467, ppl: 16.403053 +epoch: 2, batch: 1136, sum loss: 3963.636230, avg loss: 2.379133, ppl: 10.795542 +epoch: 2, batch: 1137, sum loss: 4059.179199, avg loss: 2.529083, ppl: 12.542006 +epoch: 2, batch: 1138, sum loss: 4127.212402, avg loss: 2.544521, ppl: 12.737121 +epoch: 2, batch: 1139, sum loss: 4123.601074, avg loss: 2.450149, ppl: 11.590075 +epoch: 2, batch: 1140, sum loss: 4875.590820, avg loss: 2.779698, ppl: 16.114157 +epoch: 2, batch: 1141, sum loss: 4137.346191, avg loss: 2.657255, ppl: 14.257102 +epoch: 2, batch: 1142, sum loss: 3284.964355, avg loss: 2.233150, ppl: 9.329211 +epoch: 2, batch: 1143, sum loss: 4938.667969, avg loss: 2.737621, ppl: 15.450183 +epoch: 2, batch: 1144, sum loss: 4392.710938, avg loss: 2.512993, ppl: 12.341809 +epoch: 2, batch: 1145, sum loss: 3477.318359, avg loss: 2.116444, ppl: 8.301567 +epoch: 2, batch: 1146, sum loss: 4904.022461, avg loss: 2.849519, ppl: 17.279469 +epoch: 2, batch: 1147, sum loss: 3818.789795, avg loss: 2.360191, ppl: 10.592978 +epoch: 2, batch: 1148, sum loss: 3410.363281, avg loss: 2.282706, ppl: 9.803174 +epoch: 2, batch: 1149, sum loss: 5003.487793, avg loss: 2.768947, ppl: 15.941840 +epoch: 2, batch: 1150, sum loss: 4549.656250, avg loss: 2.698491, ppl: 14.857300 +epoch: 2, batch: 1151, sum loss: 4340.157715, avg loss: 2.389955, ppl: 10.912998 +epoch: 2, batch: 1152, sum loss: 3195.568115, avg loss: 2.289089, ppl: 9.865946 +epoch: 2, batch: 1153, sum loss: 4000.756104, avg loss: 2.541776, ppl: 12.702215 +epoch: 2, batch: 1154, sum loss: 5417.454590, avg loss: 2.624736, ppl: 13.800928 +epoch: 2, batch: 1155, sum loss: 4500.989258, avg loss: 2.435600, ppl: 11.422673 +epoch: 2, batch: 1156, sum loss: 4651.234375, avg loss: 2.597004, ppl: 13.423460 +epoch: 2, batch: 1157, sum loss: 4248.168945, avg loss: 2.488675, ppl: 12.045310 +epoch: 2, batch: 1158, sum loss: 4274.004395, avg loss: 2.648082, ppl: 14.126918 +epoch: 2, batch: 1159, sum loss: 3977.146240, avg loss: 2.448982, ppl: 11.576550 +epoch: 2, batch: 1160, sum loss: 3497.198242, avg loss: 2.506952, ppl: 12.267486 +epoch: 2, batch: 1161, sum loss: 3815.761230, avg loss: 2.276707, ppl: 9.744540 +epoch: 2, batch: 1162, sum loss: 4182.602051, avg loss: 2.632223, ppl: 13.904644 +epoch: 2, batch: 1163, sum loss: 4001.097900, avg loss: 2.625392, ppl: 13.809989 +epoch: 2, batch: 1164, sum loss: 4609.373047, avg loss: 2.795254, ppl: 16.366777 +epoch: 2, batch: 1165, sum loss: 4074.786621, avg loss: 2.457652, ppl: 11.677360 +epoch: 2, batch: 1166, sum loss: 4241.058105, avg loss: 2.438791, ppl: 11.459182 +epoch: 2, batch: 1167, sum loss: 4674.008789, avg loss: 2.728551, ppl: 15.310692 +epoch: 2, batch: 1168, sum loss: 4421.003906, avg loss: 2.639405, ppl: 14.004875 +epoch: 2, batch: 1169, sum loss: 4895.645020, avg loss: 2.617992, ppl: 13.708169 +epoch: 2, batch: 1170, sum loss: 3495.430420, avg loss: 2.272712, ppl: 9.705682 +epoch: 2, batch: 1171, sum loss: 4249.719727, avg loss: 2.747072, ppl: 15.596893 +epoch: 2, batch: 1172, sum loss: 5152.371582, avg loss: 2.910944, ppl: 18.374144 +epoch: 2, batch: 1173, sum loss: 4892.069336, avg loss: 2.654406, ppl: 14.216533 +epoch: 2, batch: 1174, sum loss: 4021.875000, avg loss: 2.588079, ppl: 13.304193 +epoch: 2, batch: 1175, sum loss: 4994.102051, avg loss: 2.800955, ppl: 16.460356 +epoch: 2, batch: 1176, sum loss: 3361.121582, avg loss: 2.201128, ppl: 9.035195 +epoch: 2, batch: 1177, sum loss: 3834.140137, avg loss: 2.335043, ppl: 10.329901 +epoch: 2, batch: 1178, sum loss: 3467.030762, avg loss: 2.328429, ppl: 10.261807 +epoch: 2, batch: 1179, sum loss: 4163.836914, avg loss: 2.515914, ppl: 12.377913 +epoch: 2, batch: 1180, sum loss: 4114.675781, avg loss: 2.501323, ppl: 12.198616 +epoch: 2, batch: 1181, sum loss: 4471.816406, avg loss: 2.647612, ppl: 14.120277 +epoch: 2, batch: 1182, sum loss: 4982.504883, avg loss: 2.627904, ppl: 13.844717 +epoch: 2, batch: 1183, sum loss: 3929.385742, avg loss: 2.241521, ppl: 9.407626 +epoch: 2, batch: 1184, sum loss: 3976.607910, avg loss: 2.414455, ppl: 11.183676 +epoch: 2, batch: 1185, sum loss: 4407.924316, avg loss: 2.481939, ppl: 11.964445 +epoch: 2, batch: 1186, sum loss: 4562.026855, avg loss: 2.505232, ppl: 12.246395 +epoch: 2, batch: 1187, sum loss: 3709.233887, avg loss: 2.359564, ppl: 10.586330 +epoch: 2, batch: 1188, sum loss: 3985.439697, avg loss: 2.439070, ppl: 11.462370 +epoch: 2, batch: 1189, sum loss: 4494.348145, avg loss: 2.717260, ppl: 15.138787 +epoch: 2, batch: 1190, sum loss: 3076.413330, avg loss: 2.148334, ppl: 8.570564 +epoch: 2, batch: 1191, sum loss: 4436.133789, avg loss: 2.483838, ppl: 11.987178 +epoch: 2, batch: 1192, sum loss: 4250.233887, avg loss: 2.420407, ppl: 11.250433 +epoch: 2, batch: 1193, sum loss: 4539.904785, avg loss: 2.707158, ppl: 14.986628 +epoch: 2, batch: 1194, sum loss: 4749.802734, avg loss: 2.473856, ppl: 11.868119 +epoch: 2, batch: 1195, sum loss: 4025.400879, avg loss: 2.509602, ppl: 12.300029 +epoch: 2, batch: 1196, sum loss: 4072.426758, avg loss: 2.377365, ppl: 10.776473 +epoch: 2, batch: 1197, sum loss: 4760.555664, avg loss: 2.857476, ppl: 17.417513 +epoch: 2, batch: 1198, sum loss: 4440.605957, avg loss: 2.697817, ppl: 14.847279 +epoch: 2, batch: 1199, sum loss: 4325.237793, avg loss: 2.576080, ppl: 13.145501 +epoch: 2, batch: 1200, sum loss: 5098.393555, avg loss: 2.807485, ppl: 16.568203 +epoch: 2, batch: 1201, sum loss: 3812.358154, avg loss: 2.222949, ppl: 9.234526 +epoch: 2, batch: 1202, sum loss: 4030.542480, avg loss: 2.519089, ppl: 12.417279 +epoch: 2, batch: 1203, sum loss: 4947.413086, avg loss: 2.619065, ppl: 13.722880 +epoch: 2, batch: 1204, sum loss: 3914.358643, avg loss: 2.505991, ppl: 12.255704 +epoch: 2, batch: 1205, sum loss: 3911.748535, avg loss: 2.420636, ppl: 11.253018 +epoch: 2, batch: 1206, sum loss: 4055.326904, avg loss: 2.422537, ppl: 11.274427 +epoch: 2, batch: 1207, sum loss: 4660.934570, avg loss: 2.666439, ppl: 14.388634 +epoch: 2, batch: 1208, sum loss: 4050.820312, avg loss: 2.591696, ppl: 13.352392 +epoch: 2, batch: 1209, sum loss: 3446.851318, avg loss: 2.276652, ppl: 9.744003 +epoch: 2, batch: 1210, sum loss: 4628.161133, avg loss: 2.626652, ppl: 13.827401 +epoch: 2, batch: 1211, sum loss: 4665.301758, avg loss: 2.649234, ppl: 14.143205 +epoch: 2, batch: 1212, sum loss: 4274.088867, avg loss: 2.580971, ppl: 13.209965 +epoch: 2, batch: 1213, sum loss: 4079.856445, avg loss: 2.425598, ppl: 11.308994 +epoch: 2, batch: 1214, sum loss: 5333.695801, avg loss: 2.825051, ppl: 16.861801 +epoch: 2, batch: 1215, sum loss: 4850.336914, avg loss: 2.795583, ppl: 16.372175 +epoch: 2, batch: 1216, sum loss: 4592.008789, avg loss: 2.679118, ppl: 14.572241 +epoch: 2, batch: 1217, sum loss: 3388.303955, avg loss: 2.295599, ppl: 9.930383 +epoch: 2, batch: 1218, sum loss: 4532.629883, avg loss: 2.646019, ppl: 14.097796 +epoch: 2, batch: 1219, sum loss: 4700.719238, avg loss: 2.525910, ppl: 12.502272 +epoch: 2, batch: 1220, sum loss: 4139.719238, avg loss: 2.450988, ppl: 11.599805 +epoch: 2, batch: 1221, sum loss: 3612.555420, avg loss: 2.408370, ppl: 11.115830 +epoch: 2, batch: 1222, sum loss: 4125.155273, avg loss: 2.412371, ppl: 11.160396 +epoch: 2, batch: 1223, sum loss: 4729.451172, avg loss: 2.765761, ppl: 15.891127 +epoch: 2, batch: 1224, sum loss: 4311.715820, avg loss: 2.554334, ppl: 12.862732 +epoch: 2, batch: 1225, sum loss: 3551.718018, avg loss: 2.459638, ppl: 11.700580 +epoch: 2, batch: 1226, sum loss: 3530.733398, avg loss: 2.369620, ppl: 10.693326 +epoch: 2, batch: 1227, sum loss: 3965.821045, avg loss: 2.391931, ppl: 10.934584 +epoch: 2, batch: 1228, sum loss: 3869.609131, avg loss: 2.184985, ppl: 8.890519 +epoch: 2, batch: 1229, sum loss: 4878.998047, avg loss: 2.648750, ppl: 14.136361 +epoch: 2, batch: 1230, sum loss: 3903.026611, avg loss: 2.356900, ppl: 10.558170 +epoch: 2, batch: 1231, sum loss: 3947.676270, avg loss: 2.501696, ppl: 12.203172 +epoch: 2, batch: 1232, sum loss: 4032.862305, avg loss: 2.693963, ppl: 14.790167 +epoch: 2, batch: 1233, sum loss: 4145.510254, avg loss: 2.515480, ppl: 12.372540 +epoch: 2, batch: 1234, sum loss: 3999.841553, avg loss: 2.376614, ppl: 10.768380 +epoch: 2, batch: 1235, sum loss: 5393.050781, avg loss: 2.807419, ppl: 16.567097 +epoch: 2, batch: 1236, sum loss: 3800.893555, avg loss: 2.461719, ppl: 11.724947 +epoch: 2, batch: 1237, sum loss: 4455.556152, avg loss: 2.653696, ppl: 14.206453 +epoch: 2, batch: 1238, sum loss: 5043.839844, avg loss: 2.802133, ppl: 16.479767 +epoch: 2, batch: 1239, sum loss: 3706.150635, avg loss: 2.392609, ppl: 10.942000 +epoch: 2, batch: 1240, sum loss: 4125.003906, avg loss: 2.492450, ppl: 12.090857 +epoch: 2, batch: 1241, sum loss: 2646.358643, avg loss: 2.100285, ppl: 8.168494 +epoch: 2, batch: 1242, sum loss: 4654.934570, avg loss: 2.701645, ppl: 14.904231 +epoch: 2, batch: 1243, sum loss: 3676.752197, avg loss: 2.547992, ppl: 12.781410 +epoch: 2, batch: 1244, sum loss: 5492.913086, avg loss: 2.630706, ppl: 13.883563 +epoch: 2, batch: 1245, sum loss: 4549.826172, avg loss: 2.623890, ppl: 13.789265 +epoch: 2, batch: 1246, sum loss: 4329.580566, avg loss: 2.747196, ppl: 15.598827 +epoch: 2, batch: 1247, sum loss: 3524.316895, avg loss: 2.376478, ppl: 10.766917 +epoch: 2, batch: 1248, sum loss: 4036.052979, avg loss: 2.598875, ppl: 13.448604 +epoch: 2, batch: 1249, sum loss: 4441.470703, avg loss: 2.520699, ppl: 12.437282 +epoch: 2, batch: 1250, sum loss: 4476.982422, avg loss: 2.488595, ppl: 12.044342 +epoch: 2, batch: 1251, sum loss: 3831.622070, avg loss: 2.352131, ppl: 10.507942 +epoch: 2, batch: 1252, sum loss: 4814.347656, avg loss: 2.746348, ppl: 15.585608 +epoch: 2, batch: 1253, sum loss: 3902.862061, avg loss: 2.400284, ppl: 11.026308 +epoch: 2, batch: 1254, sum loss: 4597.802734, avg loss: 2.500165, ppl: 12.184501 +epoch: 2, batch: 1255, sum loss: 4866.016602, avg loss: 2.713897, ppl: 15.087955 +epoch: 2, batch: 1256, sum loss: 4429.566895, avg loss: 2.519663, ppl: 12.424404 +epoch: 2, batch: 1257, sum loss: 4188.760254, avg loss: 2.588850, ppl: 13.314458 +epoch: 2, batch: 1258, sum loss: 4752.841309, avg loss: 2.547075, ppl: 12.769692 +epoch: 2, batch: 1259, sum loss: 4094.384766, avg loss: 2.463529, ppl: 11.746189 +epoch: 2, batch: 1260, sum loss: 6036.274414, avg loss: 2.871681, ppl: 17.666698 +epoch: 2, batch: 1261, sum loss: 4997.697754, avg loss: 2.681168, ppl: 14.602143 +epoch: 2, batch: 1262, sum loss: 4148.876953, avg loss: 2.398195, ppl: 11.003295 +epoch: 2, batch: 1263, sum loss: 4726.132324, avg loss: 2.665613, ppl: 14.376766 +epoch: 2, batch: 1264, sum loss: 3790.550781, avg loss: 2.277975, ppl: 9.756906 +epoch: 2, batch: 1265, sum loss: 3767.185547, avg loss: 2.301274, ppl: 9.986898 +epoch: 2, batch: 1266, sum loss: 4635.705566, avg loss: 2.703035, ppl: 14.924962 +epoch: 2, batch: 1267, sum loss: 5000.955078, avg loss: 2.806372, ppl: 16.549768 +epoch: 2, batch: 1268, sum loss: 3248.095947, avg loss: 2.311812, ppl: 10.092698 +epoch: 2, batch: 1269, sum loss: 4610.173340, avg loss: 2.523357, ppl: 12.470391 +epoch: 2, batch: 1270, sum loss: 4486.584961, avg loss: 2.724095, ppl: 15.242615 +epoch: 2, batch: 1271, sum loss: 4385.748535, avg loss: 2.616795, ppl: 13.691772 +epoch: 2, batch: 1272, sum loss: 4464.132324, avg loss: 2.712110, ppl: 15.061014 +epoch: 2, batch: 1273, sum loss: 4814.290527, avg loss: 2.575864, ppl: 13.142672 +epoch: 2, batch: 1274, sum loss: 4077.732910, avg loss: 2.691573, ppl: 14.754866 +epoch: 2, batch: 1275, sum loss: 3938.145020, avg loss: 2.669929, ppl: 14.438941 +epoch: 2, batch: 1276, sum loss: 4930.853516, avg loss: 2.645308, ppl: 14.087780 +epoch: 2, batch: 1277, sum loss: 4355.978516, avg loss: 2.678953, ppl: 14.569837 +epoch: 2, batch: 1278, sum loss: 4964.820312, avg loss: 2.708576, ppl: 15.007895 +epoch: 2, batch: 1279, sum loss: 4385.991699, avg loss: 2.724218, ppl: 15.244493 +epoch: 2, batch: 1280, sum loss: 3926.576904, avg loss: 2.528382, ppl: 12.533209 +epoch: 2, batch: 1281, sum loss: 4377.915039, avg loss: 2.526206, ppl: 12.505969 +epoch: 2, batch: 1282, sum loss: 4454.869141, avg loss: 2.723025, ppl: 15.226317 +epoch: 2, batch: 1283, sum loss: 4295.082031, avg loss: 2.344477, ppl: 10.427819 +epoch: 2, batch: 1284, sum loss: 4957.518066, avg loss: 2.639786, ppl: 14.010205 +epoch: 2, batch: 1285, sum loss: 4599.500000, avg loss: 2.649482, ppl: 14.146702 +epoch: 2, batch: 1286, sum loss: 4445.495117, avg loss: 2.717295, ppl: 15.139321 +epoch: 2, batch: 1287, sum loss: 4828.176758, avg loss: 2.729326, ppl: 15.322549 +epoch: 2, batch: 1288, sum loss: 5140.463867, avg loss: 2.799817, ppl: 16.441635 +epoch: 2, batch: 1289, sum loss: 5101.432129, avg loss: 2.820029, ppl: 16.777338 +epoch: 2, batch: 1290, sum loss: 3706.689209, avg loss: 2.421090, ppl: 11.258128 +epoch: 2, batch: 1291, sum loss: 5327.367188, avg loss: 2.826190, ppl: 16.881014 +epoch: 2, batch: 1292, sum loss: 4586.382324, avg loss: 2.611835, ppl: 13.624031 +epoch: 2, batch: 1293, sum loss: 4625.078613, avg loss: 2.626393, ppl: 13.823822 +epoch: 2, batch: 1294, sum loss: 4169.552246, avg loss: 2.807779, ppl: 16.573071 +epoch: 2, batch: 1295, sum loss: 4265.909180, avg loss: 2.379202, ppl: 10.796286 +epoch: 2, batch: 1296, sum loss: 4263.693359, avg loss: 2.524389, ppl: 12.483269 +epoch: 2, batch: 1297, sum loss: 4033.096191, avg loss: 2.472775, ppl: 11.855302 +epoch: 2, batch: 1298, sum loss: 4239.623047, avg loss: 2.492430, ppl: 12.090617 +epoch: 2, batch: 1299, sum loss: 4719.618652, avg loss: 2.486628, ppl: 12.020669 +epoch: 2, batch: 1300, sum loss: 3960.806641, avg loss: 2.410716, ppl: 11.141936 +epoch: 2, batch: 1301, sum loss: 3884.582275, avg loss: 2.364323, ppl: 10.636831 +epoch: 2, batch: 1302, sum loss: 4115.741699, avg loss: 2.422450, ppl: 11.273440 +epoch: 2, batch: 1303, sum loss: 3536.449219, avg loss: 2.376646, ppl: 10.768724 +epoch: 2, batch: 1304, sum loss: 3786.519287, avg loss: 2.421048, ppl: 11.257653 +epoch: 2, batch: 1305, sum loss: 4496.251953, avg loss: 2.652656, ppl: 14.191683 +epoch: 2, batch: 1306, sum loss: 5058.281250, avg loss: 2.588680, ppl: 13.312191 +epoch: 2, batch: 1307, sum loss: 5032.478027, avg loss: 2.587392, ppl: 13.295057 +epoch: 2, batch: 1308, sum loss: 4410.335938, avg loss: 2.534676, ppl: 12.612342 +epoch: 2, batch: 1309, sum loss: 4479.588379, avg loss: 2.775458, ppl: 16.045969 +epoch: 2, batch: 1310, sum loss: 4751.321777, avg loss: 2.642559, ppl: 14.049113 +epoch: 2, batch: 1311, sum loss: 4005.268311, avg loss: 2.704435, ppl: 14.945871 +epoch: 2, batch: 1312, sum loss: 4493.501465, avg loss: 2.679488, ppl: 14.577627 +epoch: 2, batch: 1313, sum loss: 3710.344971, avg loss: 2.614760, ppl: 13.663942 +epoch: 2, batch: 1314, sum loss: 4294.821289, avg loss: 2.547344, ppl: 12.773129 +epoch: 2, batch: 1315, sum loss: 4087.636475, avg loss: 2.551583, ppl: 12.827397 +epoch: 2, batch: 1316, sum loss: 3007.300781, avg loss: 2.172905, ppl: 8.783766 +epoch: 2, batch: 1317, sum loss: 4391.228516, avg loss: 2.632631, ppl: 13.910320 +epoch: 2, batch: 1318, sum loss: 4077.604980, avg loss: 2.405667, ppl: 11.085817 +epoch: 2, batch: 1319, sum loss: 5072.100098, avg loss: 2.719625, ppl: 15.174627 +epoch: 2, batch: 1320, sum loss: 4744.708984, avg loss: 2.544080, ppl: 12.731507 +epoch: 2, batch: 1321, sum loss: 3791.558350, avg loss: 2.455673, ppl: 11.654271 +epoch: 2, batch: 1322, sum loss: 3224.385742, avg loss: 2.384901, ppl: 10.857985 +epoch: 2, batch: 1323, sum loss: 5038.602051, avg loss: 2.716228, ppl: 15.123163 +epoch: 2, batch: 1324, sum loss: 4018.835938, avg loss: 2.434183, ppl: 11.406497 +epoch: 2, batch: 1325, sum loss: 4810.751953, avg loss: 2.813305, ppl: 16.664907 +epoch: 2, batch: 1326, sum loss: 4836.325195, avg loss: 2.700349, ppl: 14.884924 +epoch: 2, batch: 1327, sum loss: 3760.040771, avg loss: 2.428967, ppl: 11.347155 +epoch: 2, batch: 1328, sum loss: 3903.054932, avg loss: 2.493965, ppl: 12.109190 +epoch: 2, batch: 1329, sum loss: 4341.737793, avg loss: 2.420144, ppl: 11.247478 +epoch: 2, batch: 1330, sum loss: 4758.855957, avg loss: 2.716242, ppl: 15.123383 +epoch: 2, batch: 1331, sum loss: 4561.363770, avg loss: 2.672152, ppl: 14.471082 +epoch: 2, batch: 1332, sum loss: 3421.970459, avg loss: 2.401383, ppl: 11.038428 +epoch: 2, batch: 1333, sum loss: 4564.502930, avg loss: 2.670862, ppl: 14.452421 +epoch: 2, batch: 1334, sum loss: 3893.783691, avg loss: 2.533366, ppl: 12.595835 +epoch: 2, batch: 1335, sum loss: 5122.753418, avg loss: 2.512385, ppl: 12.334317 +epoch: 2, batch: 1336, sum loss: 4141.002930, avg loss: 2.408961, ppl: 11.122395 +epoch: 2, batch: 1337, sum loss: 4500.769043, avg loss: 2.527102, ppl: 12.517181 +epoch: 2, batch: 1338, sum loss: 3751.975098, avg loss: 2.535118, ppl: 12.617924 +epoch: 2, batch: 1339, sum loss: 4405.479980, avg loss: 2.612978, ppl: 13.639603 +epoch: 2, batch: 1340, sum loss: 4422.550293, avg loss: 2.618443, ppl: 13.714354 +epoch: 2, batch: 1341, sum loss: 3952.980713, avg loss: 2.400110, ppl: 11.024384 +epoch: 2, batch: 1342, sum loss: 3413.119141, avg loss: 2.370222, ppl: 10.699763 +epoch: 2, batch: 1343, sum loss: 4213.202148, avg loss: 2.487132, ppl: 12.026737 +epoch: 2, batch: 1344, sum loss: 5269.551270, avg loss: 2.848406, ppl: 17.260248 +epoch: 2, batch: 1345, sum loss: 4115.026367, avg loss: 2.782303, ppl: 16.156187 +epoch: 2, batch: 1346, sum loss: 4884.671875, avg loss: 2.660497, ppl: 14.303392 +epoch: 2, batch: 1347, sum loss: 4276.631836, avg loss: 2.421649, ppl: 11.264416 +epoch: 2, batch: 1348, sum loss: 4123.696289, avg loss: 2.458972, ppl: 11.692788 +epoch: 2, batch: 1349, sum loss: 5082.516113, avg loss: 2.756245, ppl: 15.740628 +epoch: 2, batch: 1350, sum loss: 4401.311523, avg loss: 2.749102, ppl: 15.628586 +epoch: 2, batch: 1351, sum loss: 3682.316650, avg loss: 2.402033, ppl: 11.045610 +epoch: 2, batch: 1352, sum loss: 4007.328125, avg loss: 2.622597, ppl: 13.771441 +epoch: 2, batch: 1353, sum loss: 4603.269531, avg loss: 2.397536, ppl: 10.996052 +epoch: 2, batch: 1354, sum loss: 4278.977051, avg loss: 2.405271, ppl: 11.081430 +epoch: 2, batch: 1355, sum loss: 4804.840820, avg loss: 2.787031, ppl: 16.232752 +epoch: 2, batch: 1356, sum loss: 4574.052734, avg loss: 2.701744, ppl: 14.905705 +epoch: 2, batch: 1357, sum loss: 4448.265625, avg loss: 2.468516, ppl: 11.804914 +epoch: 2, batch: 1358, sum loss: 3573.479492, avg loss: 2.276102, ppl: 9.738641 +epoch: 2, batch: 1359, sum loss: 5063.848633, avg loss: 2.665184, ppl: 14.370590 +epoch: 2, batch: 1360, sum loss: 4726.382324, avg loss: 2.591218, ppl: 13.346024 +epoch: 2, batch: 1361, sum loss: 5517.209961, avg loss: 2.839532, ppl: 17.107752 +epoch: 2, batch: 1362, sum loss: 4463.909668, avg loss: 2.536312, ppl: 12.632998 +epoch: 2, batch: 1363, sum loss: 4016.687744, avg loss: 2.573150, ppl: 13.107052 +epoch: 2, batch: 1364, sum loss: 3985.505615, avg loss: 2.649937, ppl: 14.153150 +epoch: 2, batch: 1365, sum loss: 4396.779297, avg loss: 2.601645, ppl: 13.485898 +epoch: 2, batch: 1366, sum loss: 4991.727539, avg loss: 2.780907, ppl: 16.133642 +epoch: 2, batch: 1367, sum loss: 3715.000488, avg loss: 2.594274, ppl: 13.386868 +epoch: 2, batch: 1368, sum loss: 4307.257812, avg loss: 2.494069, ppl: 12.110457 +epoch: 2, batch: 1369, sum loss: 4325.992188, avg loss: 2.769521, ppl: 15.950995 +epoch: 2, batch: 1370, sum loss: 4383.419922, avg loss: 2.627950, ppl: 13.845354 +epoch: 2, batch: 1371, sum loss: 3879.593994, avg loss: 2.292904, ppl: 9.903658 +epoch: 2, batch: 1372, sum loss: 3585.971436, avg loss: 2.179922, ppl: 8.845615 +epoch: 2, batch: 1373, sum loss: 4309.722656, avg loss: 2.532152, ppl: 12.580549 +epoch: 2, batch: 1374, sum loss: 5008.221191, avg loss: 2.771567, ppl: 15.983655 +epoch: 2, batch: 1375, sum loss: 3928.265625, avg loss: 2.345233, ppl: 10.435709 +epoch: 2, batch: 1376, sum loss: 4067.028564, avg loss: 2.444128, ppl: 11.520497 +epoch: 2, batch: 1377, sum loss: 3780.269043, avg loss: 2.255531, ppl: 9.540354 +epoch: 2, batch: 1378, sum loss: 3923.421387, avg loss: 2.547676, ppl: 12.777376 +epoch: 2, batch: 1379, sum loss: 3874.242676, avg loss: 2.381219, ppl: 10.818079 +epoch: 2, batch: 1380, sum loss: 4220.298340, avg loss: 2.525613, ppl: 12.498549 +epoch: 2, batch: 1381, sum loss: 5186.407715, avg loss: 2.742680, ppl: 15.528543 +epoch: 2, batch: 1382, sum loss: 3872.685303, avg loss: 2.552858, ppl: 12.843754 +epoch: 2, batch: 1383, sum loss: 3688.085938, avg loss: 2.379410, ppl: 10.798532 +epoch: 2, batch: 1384, sum loss: 3879.535645, avg loss: 2.474194, ppl: 11.872129 +epoch: 2, batch: 1385, sum loss: 4426.928711, avg loss: 2.605608, ppl: 13.539461 +epoch: 2, batch: 1386, sum loss: 3864.555420, avg loss: 2.475692, ppl: 11.889935 +epoch: 2, batch: 1387, sum loss: 4230.320312, avg loss: 2.519547, ppl: 12.422974 +epoch: 2, batch: 1388, sum loss: 3787.519531, avg loss: 2.451469, ppl: 11.605385 +epoch: 2, batch: 1389, sum loss: 3229.709473, avg loss: 2.448605, ppl: 11.572187 +epoch: 2, batch: 1390, sum loss: 4098.515625, avg loss: 2.395392, ppl: 10.972501 +epoch: 2, batch: 1391, sum loss: 4857.834961, avg loss: 2.418036, ppl: 11.223797 +epoch: 2, batch: 1392, sum loss: 3878.554199, avg loss: 2.507146, ppl: 12.269857 +epoch: 2, batch: 1393, sum loss: 4730.831055, avg loss: 2.569707, ppl: 13.062002 +epoch: 2, batch: 1394, sum loss: 5305.037598, avg loss: 2.677959, ppl: 14.555359 +epoch: 2, batch: 1395, sum loss: 4080.604980, avg loss: 2.389113, ppl: 10.903817 +epoch: 2, batch: 1396, sum loss: 4387.388672, avg loss: 2.515704, ppl: 12.375322 +epoch: 2, batch: 1397, sum loss: 5484.936523, avg loss: 2.671669, ppl: 14.464089 +epoch: 2, batch: 1398, sum loss: 3573.026855, avg loss: 2.448956, ppl: 11.576255 +epoch: 2, batch: 1399, sum loss: 4484.316895, avg loss: 2.608678, ppl: 13.581083 +epoch: 2, batch: 1400, sum loss: 4041.981201, avg loss: 2.596006, ppl: 13.410070 +epoch: 2, batch: 1401, sum loss: 2852.168701, avg loss: 1.982049, ppl: 7.257598 +epoch: 2, batch: 1402, sum loss: 5014.416016, avg loss: 2.687254, ppl: 14.691278 +epoch: 2, batch: 1403, sum loss: 4348.976562, avg loss: 2.490823, ppl: 12.071204 +epoch: 2, batch: 1404, sum loss: 5039.968262, avg loss: 2.696612, ppl: 14.829407 +epoch: 2, batch: 1405, sum loss: 3989.234131, avg loss: 2.504227, ppl: 12.234103 +epoch: 2, batch: 1406, sum loss: 4564.702148, avg loss: 2.609893, ppl: 13.597590 +epoch: 2, batch: 1407, sum loss: 4589.708496, avg loss: 2.738490, ppl: 15.463612 +epoch: 2, batch: 1408, sum loss: 5663.287109, avg loss: 2.979110, ppl: 19.670294 +epoch: 2, batch: 1409, sum loss: 4786.066406, avg loss: 2.803788, ppl: 16.507061 +epoch: 2, batch: 1410, sum loss: 4428.525391, avg loss: 2.723570, ppl: 15.234618 +epoch: 2, batch: 1411, sum loss: 3763.230957, avg loss: 2.502148, ppl: 12.208692 +epoch: 2, batch: 1412, sum loss: 3988.901123, avg loss: 2.445678, ppl: 11.538373 +epoch: 2, batch: 1413, sum loss: 5250.375977, avg loss: 2.531522, ppl: 12.572621 +epoch: 2, batch: 1414, sum loss: 4711.925781, avg loss: 2.779897, ppl: 16.117365 +epoch: 2, batch: 1415, sum loss: 4287.212891, avg loss: 2.427640, ppl: 11.332109 +epoch: 2, batch: 1416, sum loss: 3972.258789, avg loss: 2.429516, ppl: 11.353387 +epoch: 2, batch: 1417, sum loss: 4012.988281, avg loss: 2.460447, ppl: 11.710040 +epoch: 2, batch: 1418, sum loss: 4141.831543, avg loss: 2.578974, ppl: 13.183599 +epoch: 2, batch: 1419, sum loss: 3514.130127, avg loss: 2.244017, ppl: 9.431137 +epoch: 2, batch: 1420, sum loss: 4031.272461, avg loss: 2.499239, ppl: 12.173226 +epoch: 2, batch: 1421, sum loss: 5615.182617, avg loss: 2.856146, ppl: 17.394358 +epoch: 2, batch: 1422, sum loss: 3507.699219, avg loss: 2.446094, ppl: 11.543174 +epoch: 2, batch: 1423, sum loss: 4131.214844, avg loss: 2.428698, ppl: 11.344098 +epoch: 2, batch: 1424, sum loss: 2761.961914, avg loss: 2.250988, ppl: 9.497112 +epoch: 2, batch: 1425, sum loss: 4203.387695, avg loss: 2.542884, ppl: 12.716293 +epoch: 2, batch: 1426, sum loss: 4863.220215, avg loss: 2.472405, ppl: 11.850911 +epoch: 2, batch: 1427, sum loss: 4643.115234, avg loss: 2.797057, ppl: 16.396328 +epoch: 2, batch: 1428, sum loss: 4178.266602, avg loss: 2.596810, ppl: 13.420852 +epoch: 2, batch: 1429, sum loss: 4415.173828, avg loss: 2.580464, ppl: 13.203264 +epoch: 2, batch: 1430, sum loss: 6138.345703, avg loss: 2.984125, ppl: 19.769199 +epoch: 2, batch: 1431, sum loss: 3599.738770, avg loss: 2.189622, ppl: 8.931838 +epoch: 2, batch: 1432, sum loss: 5482.936035, avg loss: 2.733268, ppl: 15.383080 +epoch: 2, batch: 1433, sum loss: 4873.106445, avg loss: 2.628428, ppl: 13.851984 +epoch: 2, batch: 1434, sum loss: 3691.853516, avg loss: 2.269117, ppl: 9.670858 +epoch: 2, batch: 1435, sum loss: 4201.569336, avg loss: 2.496476, ppl: 12.139641 +epoch: 2, batch: 1436, sum loss: 4470.012207, avg loss: 2.655979, ppl: 14.238914 +epoch: 2, batch: 1437, sum loss: 3565.965820, avg loss: 2.294701, ppl: 9.921473 +epoch: 2, batch: 1438, sum loss: 3657.197021, avg loss: 2.396590, ppl: 10.985657 +epoch: 2, batch: 1439, sum loss: 5635.291504, avg loss: 2.632084, ppl: 13.902712 +epoch: 2, batch: 1440, sum loss: 4228.330078, avg loss: 2.557973, ppl: 12.909628 +epoch: 2, batch: 1441, sum loss: 3459.006836, avg loss: 2.407103, ppl: 11.101750 +epoch: 2, batch: 1442, sum loss: 3704.741211, avg loss: 2.461622, ppl: 11.723815 +epoch: 2, batch: 1443, sum loss: 5133.056152, avg loss: 2.665138, ppl: 14.369936 +epoch: 2, batch: 1444, sum loss: 3533.051514, avg loss: 2.319798, ppl: 10.173615 +epoch: 2, batch: 1445, sum loss: 4139.709961, avg loss: 2.450983, ppl: 11.599741 +epoch: 2, batch: 1446, sum loss: 3760.010254, avg loss: 2.446331, ppl: 11.545904 +epoch: 2, batch: 1447, sum loss: 5413.998047, avg loss: 2.721970, ppl: 15.210251 +epoch: 2, batch: 1448, sum loss: 4426.119141, avg loss: 2.645618, ppl: 14.092155 +epoch: 2, batch: 1449, sum loss: 4475.098633, avg loss: 2.462905, ppl: 11.738866 +epoch: 2, batch: 1450, sum loss: 5302.018555, avg loss: 2.768678, ppl: 15.937553 +epoch: 2, batch: 1451, sum loss: 5964.192383, avg loss: 2.948192, ppl: 19.071440 +epoch: 2, batch: 1452, sum loss: 4008.177979, avg loss: 2.432147, ppl: 11.383293 +epoch: 2, batch: 1453, sum loss: 4869.389648, avg loss: 2.577760, ppl: 13.167616 +epoch: 2, batch: 1454, sum loss: 4367.590820, avg loss: 2.716163, ppl: 15.122193 +epoch: 2, batch: 1455, sum loss: 3807.716797, avg loss: 2.306309, ppl: 10.037313 +epoch: 2, batch: 1456, sum loss: 4162.062988, avg loss: 2.519409, ppl: 12.421249 +epoch: 2, batch: 1457, sum loss: 3886.864258, avg loss: 2.699211, ppl: 14.868002 +epoch: 2, batch: 1458, sum loss: 3941.058594, avg loss: 2.457019, ppl: 11.669972 +epoch: 2, batch: 1459, sum loss: 4243.668457, avg loss: 2.616318, ppl: 13.685248 +epoch: 2, batch: 1460, sum loss: 4794.787109, avg loss: 2.657864, ppl: 14.265790 +epoch: 2, batch: 1461, sum loss: 3395.082275, avg loss: 2.315881, ppl: 10.133852 +epoch: 2, batch: 1462, sum loss: 4417.775879, avg loss: 2.672581, ppl: 14.477283 +epoch: 2, batch: 1463, sum loss: 4545.152832, avg loss: 2.603180, ppl: 13.506626 +epoch: 2, batch: 1464, sum loss: 5193.782227, avg loss: 2.835034, ppl: 17.030977 +epoch: 2, batch: 1465, sum loss: 4546.297852, avg loss: 2.789140, ppl: 16.267019 +epoch: 2, batch: 1466, sum loss: 4373.209961, avg loss: 2.709548, ppl: 15.022480 +epoch: 2, batch: 1467, sum loss: 4160.336426, avg loss: 2.351801, ppl: 10.504476 +epoch: 2, batch: 1468, sum loss: 4045.780762, avg loss: 2.454964, ppl: 11.646016 +epoch: 2, batch: 1469, sum loss: 3975.647705, avg loss: 2.271799, ppl: 9.696826 +epoch: 2, batch: 1470, sum loss: 4031.561523, avg loss: 2.313001, ppl: 10.104708 +epoch: 2, batch: 1471, sum loss: 4546.909668, avg loss: 2.417283, ppl: 11.215346 +epoch: 2, batch: 1472, sum loss: 4962.443359, avg loss: 2.519007, ppl: 12.416258 +epoch: 2, batch: 1473, sum loss: 3379.201660, avg loss: 2.486536, ppl: 12.019563 +epoch: 2, batch: 1474, sum loss: 3553.503174, avg loss: 2.384902, ppl: 10.857993 +epoch: 2, batch: 1475, sum loss: 4550.104492, avg loss: 2.618012, ppl: 13.708440 +epoch: 2, batch: 1476, sum loss: 4178.128418, avg loss: 2.382057, ppl: 10.827153 +epoch: 2, batch: 1477, sum loss: 5080.458984, avg loss: 2.708134, ppl: 15.001256 +epoch: 2, batch: 1478, sum loss: 3198.177734, avg loss: 2.175631, ppl: 8.807741 +epoch: 2, batch: 1479, sum loss: 3849.816895, avg loss: 2.357512, ppl: 10.564631 +epoch: 2, batch: 1480, sum loss: 4846.050781, avg loss: 2.598419, ppl: 13.442465 +epoch: 2, batch: 1481, sum loss: 4567.371582, avg loss: 2.670978, ppl: 14.454092 +epoch: 2, batch: 1482, sum loss: 4028.769531, avg loss: 2.424049, ppl: 11.291488 +epoch: 2, batch: 1483, sum loss: 3939.275635, avg loss: 2.340627, ppl: 10.387750 +epoch: 2, batch: 1484, sum loss: 4603.083496, avg loss: 2.637870, ppl: 13.983388 +epoch: 2, batch: 1485, sum loss: 4536.058105, avg loss: 2.747461, ppl: 15.602967 +epoch: 2, batch: 1486, sum loss: 3968.735352, avg loss: 2.412605, ppl: 11.163004 +epoch: 2, batch: 1487, sum loss: 5328.862305, avg loss: 2.769679, ppl: 15.953513 +epoch: 2, batch: 1488, sum loss: 4586.964355, avg loss: 2.553989, ppl: 12.858295 +epoch: 2, batch: 1489, sum loss: 5447.576172, avg loss: 2.821117, ppl: 16.795595 +epoch: 2, batch: 1490, sum loss: 4416.992188, avg loss: 2.586061, ppl: 13.277369 +epoch: 2, batch: 1491, sum loss: 4595.419922, avg loss: 2.627456, ppl: 13.838516 +epoch: 2, batch: 1492, sum loss: 4167.459473, avg loss: 2.408936, ppl: 11.122121 +epoch: 2, batch: 1493, sum loss: 4430.238770, avg loss: 2.574224, ppl: 13.121125 +epoch: 2, batch: 1494, sum loss: 4247.407715, avg loss: 2.659617, ppl: 14.290811 +epoch: 2, batch: 1495, sum loss: 3914.283691, avg loss: 2.482108, ppl: 11.966459 +epoch: 2, batch: 1496, sum loss: 3483.781738, avg loss: 2.325622, ppl: 10.233042 +epoch: 2, batch: 1497, sum loss: 3818.237793, avg loss: 2.343915, ppl: 10.421961 +epoch: 2, batch: 1498, sum loss: 3685.635986, avg loss: 2.365620, ppl: 10.650641 +epoch: 2, batch: 1499, sum loss: 4254.452148, avg loss: 2.723721, ppl: 15.236914 +epoch: 2, batch: 1500, sum loss: 4651.209961, avg loss: 2.699483, ppl: 14.872046 +epoch: 2, batch: 1501, sum loss: 4214.819336, avg loss: 2.592140, ppl: 13.358324 +epoch: 2, batch: 1502, sum loss: 4626.342773, avg loss: 3.166559, ppl: 23.725708 +epoch: 2, batch: 1503, sum loss: 5347.541504, avg loss: 2.904694, ppl: 18.259657 +epoch: 2, batch: 1504, sum loss: 4119.839844, avg loss: 2.524412, ppl: 12.483549 +epoch: 2, batch: 1505, sum loss: 4215.941406, avg loss: 2.395421, ppl: 10.972819 +epoch: 2, batch: 1506, sum loss: 4663.938965, avg loss: 2.749964, ppl: 15.642069 +epoch: 2, batch: 1507, sum loss: 4329.784180, avg loss: 2.526129, ppl: 12.504999 +epoch: 2, batch: 1508, sum loss: 4585.553711, avg loss: 2.547530, ppl: 12.775509 +epoch: 2, batch: 1509, sum loss: 4391.282715, avg loss: 2.659772, ppl: 14.293025 +epoch: 2, batch: 1510, sum loss: 4483.958496, avg loss: 2.636072, ppl: 13.958266 +epoch: 2, batch: 1511, sum loss: 4933.197754, avg loss: 2.783972, ppl: 16.183170 +epoch: 2, batch: 1512, sum loss: 5028.509766, avg loss: 2.758371, ppl: 15.774124 +epoch: 2, batch: 1513, sum loss: 4241.192383, avg loss: 2.311277, ppl: 10.087295 +epoch: 2, batch: 1514, sum loss: 4910.861328, avg loss: 2.633170, ppl: 13.917814 +epoch: 2, batch: 1515, sum loss: 4550.407227, avg loss: 2.684606, ppl: 14.652429 +epoch: 2, batch: 1516, sum loss: 3839.595215, avg loss: 2.529378, ppl: 12.545696 +epoch: 2, batch: 1517, sum loss: 4841.355957, avg loss: 2.795240, ppl: 16.366558 +epoch: 2, batch: 1518, sum loss: 4307.124512, avg loss: 2.343376, ppl: 10.416340 +epoch: 2, batch: 1519, sum loss: 3628.138184, avg loss: 2.197540, ppl: 9.002837 +epoch: 2, batch: 1520, sum loss: 4504.584961, avg loss: 2.741683, ppl: 15.513068 +epoch: 2, batch: 1521, sum loss: 5320.343262, avg loss: 3.010947, ppl: 20.306622 +epoch: 2, batch: 1522, sum loss: 3762.440430, avg loss: 2.473662, ppl: 11.865825 +epoch: 2, batch: 1523, sum loss: 4599.866211, avg loss: 2.429935, ppl: 11.358138 +epoch: 2, batch: 1524, sum loss: 4393.981934, avg loss: 2.534015, ppl: 12.604009 +epoch: 2, batch: 1525, sum loss: 4985.995117, avg loss: 2.837789, ppl: 17.077961 +epoch: 2, batch: 1526, sum loss: 4477.566895, avg loss: 2.698955, ppl: 14.864195 +epoch: 2, batch: 1527, sum loss: 4053.466309, avg loss: 2.544549, ppl: 12.737482 +epoch: 2, batch: 1528, sum loss: 4680.540527, avg loss: 2.632475, ppl: 13.908152 +epoch: 2, batch: 1529, sum loss: 4150.792969, avg loss: 2.418877, ppl: 11.233239 +epoch: 2, batch: 1530, sum loss: 3487.353271, avg loss: 2.362705, ppl: 10.619644 +epoch: 2, batch: 1531, sum loss: 4395.737793, avg loss: 2.498998, ppl: 12.170295 +epoch: 2, batch: 1532, sum loss: 4597.201660, avg loss: 2.615018, ppl: 13.667460 +epoch: 2, batch: 1533, sum loss: 4406.560547, avg loss: 2.325362, ppl: 10.230380 +epoch: 2, batch: 1534, sum loss: 4334.191406, avg loss: 2.422689, ppl: 11.276145 +epoch: 2, batch: 1535, sum loss: 4557.797363, avg loss: 2.676334, ppl: 14.531728 +epoch: 2, batch: 1536, sum loss: 4719.741699, avg loss: 2.572067, ppl: 13.092854 +epoch: 2, batch: 1537, sum loss: 4128.197266, avg loss: 2.526437, ppl: 12.508852 +epoch: 2, batch: 1538, sum loss: 4146.471191, avg loss: 2.388520, ppl: 10.897357 +epoch: 2, batch: 1539, sum loss: 4361.295410, avg loss: 2.372848, ppl: 10.727905 +epoch: 2, batch: 1540, sum loss: 3874.846191, avg loss: 2.504749, ppl: 12.240484 +epoch: 2, batch: 1541, sum loss: 4077.767578, avg loss: 2.288310, ppl: 9.858259 +epoch: 2, batch: 1542, sum loss: 3201.895508, avg loss: 2.200615, ppl: 9.030569 +epoch: 2, batch: 1543, sum loss: 4510.587402, avg loss: 2.696107, ppl: 14.821923 +epoch: 2, batch: 1544, sum loss: 4011.901123, avg loss: 2.428512, ppl: 11.341989 +epoch: 2, batch: 1545, sum loss: 4799.680176, avg loss: 2.650293, ppl: 14.158185 +epoch: 2, batch: 1546, sum loss: 4401.880859, avg loss: 2.610843, ppl: 13.610516 +epoch: 2, batch: 1547, sum loss: 4914.402344, avg loss: 2.683999, ppl: 14.643540 +epoch: 2, batch: 1548, sum loss: 3793.680176, avg loss: 2.546094, ppl: 12.757179 +epoch: 2, batch: 1549, sum loss: 4213.435547, avg loss: 2.572305, ppl: 13.095972 +epoch: 2, batch: 1550, sum loss: 4998.130371, avg loss: 2.735703, ppl: 15.420588 +epoch: 2, batch: 1551, sum loss: 5163.290527, avg loss: 2.854224, ppl: 17.360954 +epoch: 2, batch: 1552, sum loss: 5228.363281, avg loss: 2.735930, ppl: 15.424088 +epoch: 2, batch: 1553, sum loss: 4021.192871, avg loss: 2.497635, ppl: 12.153721 +epoch: 2, batch: 1554, sum loss: 4901.707520, avg loss: 2.625446, ppl: 13.810730 +epoch: 2, batch: 1555, sum loss: 4376.597168, avg loss: 2.634917, ppl: 13.942156 +epoch: 2, batch: 1556, sum loss: 5444.604980, avg loss: 2.773614, ppl: 16.016418 +epoch: 2, batch: 1557, sum loss: 4266.360352, avg loss: 2.580980, ppl: 13.210078 +epoch: 2, batch: 1558, sum loss: 3997.537598, avg loss: 2.577394, ppl: 13.162785 +epoch: 2, batch: 1559, sum loss: 4182.716309, avg loss: 2.467679, ppl: 11.795042 +epoch: 2, batch: 1560, sum loss: 5545.130859, avg loss: 2.689200, ppl: 14.719897 +epoch: 2, batch: 1561, sum loss: 4609.450684, avg loss: 2.552298, ppl: 12.836573 +epoch: 2, batch: 1562, sum loss: 4535.858398, avg loss: 2.548235, ppl: 12.784521 +epoch: 2, batch: 1563, sum loss: 4114.305664, avg loss: 2.550716, ppl: 12.816282 +epoch: 2, batch: 1564, sum loss: 4750.900879, avg loss: 2.778305, ppl: 16.091715 +epoch: 2, batch: 1565, sum loss: 3289.911865, avg loss: 2.389188, ppl: 10.904636 +epoch: 2, batch: 1566, sum loss: 4363.361816, avg loss: 2.562162, ppl: 12.963814 +epoch: 2, batch: 1567, sum loss: 4254.555176, avg loss: 2.514513, ppl: 12.360582 +epoch: 2, batch: 1568, sum loss: 4313.799805, avg loss: 2.628763, ppl: 13.856615 +epoch: 2, batch: 1569, sum loss: 3582.945068, avg loss: 2.564742, ppl: 12.997309 +epoch: 2, batch: 1570, sum loss: 4642.016602, avg loss: 2.497050, ppl: 12.146612 +epoch: 2, batch: 1571, sum loss: 5185.418945, avg loss: 2.804445, ppl: 16.517906 +epoch: 2, batch: 1572, sum loss: 4547.496582, avg loss: 2.887299, ppl: 17.944780 +epoch: 2, batch: 1573, sum loss: 4630.826172, avg loss: 2.667526, ppl: 14.404296 +epoch: 2, batch: 1574, sum loss: 4453.520996, avg loss: 2.687701, ppl: 14.697850 +epoch: 2, batch: 1575, sum loss: 3468.202637, avg loss: 2.522329, ppl: 12.457578 +epoch: 2, batch: 1576, sum loss: 3636.388184, avg loss: 2.468695, ppl: 11.807034 +epoch: 2, batch: 1577, sum loss: 5312.801758, avg loss: 2.648456, ppl: 14.132196 +epoch: 2, batch: 1578, sum loss: 3761.954590, avg loss: 2.439659, ppl: 11.469133 +epoch: 2, batch: 1579, sum loss: 3458.790283, avg loss: 2.312026, ppl: 10.094851 +epoch: 2, batch: 1580, sum loss: 4416.109863, avg loss: 2.679678, ppl: 14.580404 +epoch: 2, batch: 1581, sum loss: 5070.423828, avg loss: 2.617668, ppl: 13.703734 +epoch: 2, batch: 1582, sum loss: 5198.256348, avg loss: 2.857755, ppl: 17.422369 +epoch: 2, batch: 1583, sum loss: 3542.885254, avg loss: 2.431630, ppl: 11.377414 +epoch: 2, batch: 1584, sum loss: 4096.495117, avg loss: 2.813527, ppl: 16.668602 +epoch: 2, batch: 1585, sum loss: 4148.773926, avg loss: 2.540584, ppl: 12.687079 +epoch: 2, batch: 1586, sum loss: 5147.840332, avg loss: 2.710816, ppl: 15.041551 +epoch: 2, batch: 1587, sum loss: 3729.942383, avg loss: 2.383350, ppl: 10.841156 +epoch: 2, batch: 1588, sum loss: 4635.963867, avg loss: 2.608871, ppl: 13.583706 +epoch: 2, batch: 1589, sum loss: 5054.982422, avg loss: 2.862391, ppl: 17.503326 +epoch: 2, batch: 1590, sum loss: 4495.046875, avg loss: 2.656647, ppl: 14.248437 +epoch: 2, batch: 1591, sum loss: 4237.171875, avg loss: 2.599492, ppl: 13.456898 +epoch: 2, batch: 1592, sum loss: 4924.342285, avg loss: 2.622120, ppl: 13.764879 +epoch: 2, batch: 1593, sum loss: 4595.712891, avg loss: 2.648826, ppl: 14.137430 +epoch: 2, batch: 1594, sum loss: 4497.927246, avg loss: 2.774785, ppl: 16.035183 +epoch: 2, batch: 1595, sum loss: 3905.766113, avg loss: 2.316587, ppl: 10.141006 +epoch: 2, batch: 1596, sum loss: 4191.477051, avg loss: 2.624594, ppl: 13.798977 +epoch: 2, batch: 1597, sum loss: 3958.203613, avg loss: 2.472332, ppl: 11.850052 +epoch: 2, batch: 1598, sum loss: 3851.245361, avg loss: 2.413061, ppl: 11.168094 +epoch: 2, batch: 1599, sum loss: 4893.966797, avg loss: 2.674299, ppl: 14.502180 +epoch: 2, batch: 1600, sum loss: 4636.674805, avg loss: 2.660169, ppl: 14.298707 +epoch: 2, batch: 1601, sum loss: 5122.816406, avg loss: 2.703333, ppl: 14.929411 +epoch: 2, batch: 1602, sum loss: 4299.595215, avg loss: 2.613736, ppl: 13.649947 +epoch: 2, batch: 1603, sum loss: 3871.274414, avg loss: 2.300222, ppl: 9.976401 +epoch: 2, batch: 1604, sum loss: 4332.556152, avg loss: 2.550063, ppl: 12.807906 +epoch: 2, batch: 1605, sum loss: 4699.277344, avg loss: 2.603478, ppl: 13.510643 +epoch: 2, batch: 1606, sum loss: 4198.840820, avg loss: 2.491894, ppl: 12.084136 +epoch: 2, batch: 1607, sum loss: 4254.481934, avg loss: 2.456398, ppl: 11.662729 +epoch: 2, batch: 1608, sum loss: 4385.010254, avg loss: 2.579418, ppl: 13.189459 +epoch: 2, batch: 1609, sum loss: 5075.993164, avg loss: 2.638250, ppl: 13.988704 +epoch: 2, batch: 1610, sum loss: 4081.202637, avg loss: 2.614480, ppl: 13.660105 +epoch: 2, batch: 1611, sum loss: 4646.819824, avg loss: 2.632759, ppl: 13.912102 +epoch: 2, batch: 1612, sum loss: 3957.583008, avg loss: 2.556578, ppl: 12.891628 +epoch: 2, batch: 1613, sum loss: 4600.140137, avg loss: 2.651378, ppl: 14.173552 +epoch: 2, batch: 1614, sum loss: 4836.800781, avg loss: 2.810459, ppl: 16.617550 +epoch: 2, batch: 1615, sum loss: 4492.389160, avg loss: 2.420468, ppl: 11.251127 +epoch: 2, batch: 1616, sum loss: 4738.275391, avg loss: 2.443670, ppl: 11.515222 +epoch: 2, batch: 1617, sum loss: 4010.714355, avg loss: 2.533616, ppl: 12.598983 +epoch: 2, batch: 1618, sum loss: 3712.695068, avg loss: 2.500131, ppl: 12.184094 +epoch: 2, batch: 1619, sum loss: 5250.673828, avg loss: 2.789943, ppl: 16.280100 +epoch: 2, batch: 1620, sum loss: 4146.840820, avg loss: 2.530104, ppl: 12.554813 +epoch: 2, batch: 1621, sum loss: 4066.446289, avg loss: 2.466007, ppl: 11.775339 +epoch: 2, batch: 1622, sum loss: 4685.747559, avg loss: 2.696057, ppl: 14.821181 +epoch: 2, batch: 1623, sum loss: 4630.278809, avg loss: 2.642853, ppl: 14.053244 +epoch: 2, batch: 1624, sum loss: 4490.488281, avg loss: 2.538433, ppl: 12.659822 +epoch: 2, batch: 1625, sum loss: 5233.800781, avg loss: 2.696445, ppl: 14.826935 +epoch: 2, batch: 1626, sum loss: 4198.463379, avg loss: 2.421259, ppl: 11.260029 +epoch: 2, batch: 1627, sum loss: 4767.067383, avg loss: 2.935386, ppl: 18.828773 +epoch: 2, batch: 1628, sum loss: 4228.995117, avg loss: 2.658074, ppl: 14.268776 +epoch: 2, batch: 1629, sum loss: 4461.867188, avg loss: 2.600156, ppl: 13.465837 +epoch: 2, batch: 1630, sum loss: 5205.779297, avg loss: 2.754381, ppl: 15.711308 +epoch: 2, batch: 1631, sum loss: 4991.862305, avg loss: 2.620400, ppl: 13.741221 +epoch: 2, batch: 1632, sum loss: 4328.163086, avg loss: 2.548977, ppl: 12.794010 +epoch: 2, batch: 1633, sum loss: 3842.215332, avg loss: 2.470878, ppl: 11.832833 +epoch: 2, batch: 1634, sum loss: 5096.903320, avg loss: 2.852212, ppl: 17.326073 +epoch: 2, batch: 1635, sum loss: 3613.326416, avg loss: 2.478276, ppl: 11.920695 +epoch: 2, batch: 1636, sum loss: 5208.500000, avg loss: 2.684794, ppl: 14.655181 +epoch: 2, batch: 1637, sum loss: 3703.413574, avg loss: 2.422115, ppl: 11.269668 +epoch: 2, batch: 1638, sum loss: 3503.005859, avg loss: 2.286557, ppl: 9.841001 +epoch: 2, batch: 1639, sum loss: 4458.625488, avg loss: 2.674640, ppl: 14.507129 +epoch: 2, batch: 1640, sum loss: 4051.081299, avg loss: 2.603523, ppl: 13.511251 +epoch: 2, batch: 1641, sum loss: 5675.293457, avg loss: 2.764390, ppl: 15.869364 +epoch: 2, batch: 1642, sum loss: 4457.739258, avg loss: 2.603820, ppl: 13.515263 +epoch: 2, batch: 1643, sum loss: 4406.689453, avg loss: 2.682099, ppl: 14.615745 +epoch: 2, batch: 1644, sum loss: 4201.788086, avg loss: 2.508530, ppl: 12.286860 +epoch: 2, batch: 1645, sum loss: 4215.077637, avg loss: 2.500046, ppl: 12.183055 +epoch: 2, batch: 1646, sum loss: 5278.632324, avg loss: 2.715346, ppl: 15.109838 +epoch: 2, batch: 1647, sum loss: 3951.429688, avg loss: 2.302698, ppl: 10.001128 +epoch: 2, batch: 1648, sum loss: 4949.650391, avg loss: 2.887777, ppl: 17.953362 +epoch: 2, batch: 1649, sum loss: 5423.653320, avg loss: 3.104553, ppl: 22.299238 +epoch: 2, batch: 1650, sum loss: 4743.288574, avg loss: 2.663273, ppl: 14.343152 +epoch: 2, batch: 1651, sum loss: 3636.670898, avg loss: 2.224263, ppl: 9.246670 +epoch: 2, batch: 1652, sum loss: 4640.202148, avg loss: 2.466880, ppl: 11.785622 +epoch: 2, batch: 1653, sum loss: 3831.997070, avg loss: 2.619274, ppl: 13.725750 +epoch: 2, batch: 1654, sum loss: 4661.815430, avg loss: 2.562845, ppl: 12.972675 +epoch: 2, batch: 1655, sum loss: 4388.732910, avg loss: 2.736118, ppl: 15.426979 +epoch: 2, batch: 1656, sum loss: 3677.266846, avg loss: 2.298292, ppl: 9.957158 +epoch: 2, batch: 1657, sum loss: 3482.278564, avg loss: 2.344969, ppl: 10.432944 +epoch: 2, batch: 1658, sum loss: 4836.945312, avg loss: 2.684210, ppl: 14.646620 +epoch: 2, batch: 1659, sum loss: 4699.626465, avg loss: 2.551372, ppl: 12.824681 +epoch: 2, batch: 1660, sum loss: 3768.728516, avg loss: 2.633633, ppl: 13.924260 +epoch: 2, batch: 1661, sum loss: 4409.856445, avg loss: 2.421668, ppl: 11.264628 +epoch: 2, batch: 1662, sum loss: 4338.026855, avg loss: 2.426189, ppl: 11.315681 +epoch: 2, batch: 1663, sum loss: 4417.614746, avg loss: 2.504317, ppl: 12.235197 +epoch: 2, batch: 1664, sum loss: 4503.625488, avg loss: 2.535825, ppl: 12.626847 +epoch: 2, batch: 1665, sum loss: 3843.324707, avg loss: 2.508698, ppl: 12.288914 +epoch: 2, batch: 1666, sum loss: 3430.601074, avg loss: 2.268916, ppl: 9.668916 +epoch: 2, batch: 1667, sum loss: 3833.273682, avg loss: 2.391312, ppl: 10.927826 +epoch: 2, batch: 1668, sum loss: 4580.018555, avg loss: 2.500010, ppl: 12.182619 +epoch: 2, batch: 1669, sum loss: 4640.871094, avg loss: 2.754226, ppl: 15.708880 +epoch: 2, batch: 1670, sum loss: 3914.544922, avg loss: 2.355322, ppl: 10.541521 +epoch: 2, batch: 1671, sum loss: 3942.174072, avg loss: 2.442487, ppl: 11.501610 +epoch: 2, batch: 1672, sum loss: 4242.196289, avg loss: 2.438044, ppl: 11.450620 +epoch: 2, batch: 1673, sum loss: 5316.411133, avg loss: 2.876846, ppl: 17.758173 +epoch: 2, batch: 1674, sum loss: 4331.357422, avg loss: 2.612399, ppl: 13.631712 +epoch: 2, batch: 1675, sum loss: 3865.351562, avg loss: 2.609961, ppl: 13.598515 +epoch: 2, batch: 1676, sum loss: 4466.388672, avg loss: 2.697095, ppl: 14.836564 +epoch: 2, batch: 1677, sum loss: 5290.024902, avg loss: 2.985342, ppl: 19.793261 +epoch: 2, batch: 1678, sum loss: 4412.596680, avg loss: 2.882166, ppl: 17.852907 +epoch: 2, batch: 1679, sum loss: 4738.439453, avg loss: 2.500496, ppl: 12.188534 +epoch: 2, batch: 1680, sum loss: 3520.921875, avg loss: 2.491806, ppl: 12.083078 +epoch: 2, batch: 1681, sum loss: 4212.157227, avg loss: 2.402828, ppl: 11.054394 +epoch: 2, batch: 1682, sum loss: 4391.500488, avg loss: 2.674483, ppl: 14.504847 +epoch: 2, batch: 1683, sum loss: 4230.123047, avg loss: 2.523940, ppl: 12.477660 +epoch: 2, batch: 1684, sum loss: 3541.176514, avg loss: 2.261288, ppl: 9.595437 +epoch: 2, batch: 1685, sum loss: 3455.295166, avg loss: 2.423068, ppl: 11.280416 +epoch: 2, batch: 1686, sum loss: 4328.410645, avg loss: 2.681791, ppl: 14.611239 +epoch: 2, batch: 1687, sum loss: 4462.198242, avg loss: 2.634119, ppl: 13.931038 +epoch: 2, batch: 1688, sum loss: 4356.377441, avg loss: 2.565593, ppl: 13.008376 +epoch: 2, batch: 1689, sum loss: 5338.422363, avg loss: 2.700264, ppl: 14.883664 +epoch: 2, batch: 1690, sum loss: 5087.584961, avg loss: 2.813930, ppl: 16.675320 +epoch: 2, batch: 1691, sum loss: 4003.735352, avg loss: 2.462322, ppl: 11.732018 +epoch: 2, batch: 1692, sum loss: 4125.638184, avg loss: 2.568890, ppl: 13.051333 +epoch: 2, batch: 1693, sum loss: 4869.554688, avg loss: 2.696321, ppl: 14.825083 +epoch: 2, batch: 1694, sum loss: 4200.503906, avg loss: 2.435075, ppl: 11.416673 +epoch: 2, batch: 1695, sum loss: 4344.630371, avg loss: 2.678564, ppl: 14.564158 +epoch: 2, batch: 1696, sum loss: 4286.262695, avg loss: 2.518368, ppl: 12.408333 +epoch: 2, batch: 1697, sum loss: 4412.468750, avg loss: 2.658114, ppl: 14.269348 +epoch: 2, batch: 1698, sum loss: 3971.863281, avg loss: 2.510659, ppl: 12.313045 +epoch: 2, batch: 1699, sum loss: 3429.617432, avg loss: 2.516227, ppl: 12.381792 +epoch: 2, batch: 1700, sum loss: 4900.705078, avg loss: 2.860890, ppl: 17.477077 +epoch: 2, batch: 1701, sum loss: 3771.408936, avg loss: 2.479559, ppl: 11.935999 +epoch: 2, batch: 1702, sum loss: 3231.887695, avg loss: 2.316766, ppl: 10.142815 +epoch: 2, batch: 1703, sum loss: 4232.268555, avg loss: 2.641865, ppl: 14.039370 +epoch: 2, batch: 1704, sum loss: 4693.975586, avg loss: 2.464029, ppl: 11.752067 +epoch: 2, batch: 1705, sum loss: 4124.760742, avg loss: 2.512035, ppl: 12.329992 +epoch: 2, batch: 1706, sum loss: 4058.144531, avg loss: 2.773851, ppl: 16.020216 +epoch: 2, batch: 1707, sum loss: 4367.213379, avg loss: 2.450737, ppl: 11.596893 +epoch: 2, batch: 1708, sum loss: 3588.508301, avg loss: 2.446154, ppl: 11.543865 +epoch: 2, batch: 1709, sum loss: 3351.421875, avg loss: 2.179078, ppl: 8.838153 +epoch: 2, batch: 1710, sum loss: 3963.375977, avg loss: 2.600640, ppl: 13.472362 +epoch: 2, batch: 1711, sum loss: 4488.547852, avg loss: 2.477123, ppl: 11.906964 +epoch: 2, batch: 1712, sum loss: 3808.343750, avg loss: 2.442812, ppl: 11.505348 +epoch: 2, batch: 1713, sum loss: 5468.009277, avg loss: 2.798367, ppl: 16.417814 +epoch: 2, batch: 1714, sum loss: 5195.121582, avg loss: 2.761894, ppl: 15.829788 +epoch: 2, batch: 1715, sum loss: 4968.259277, avg loss: 2.905415, ppl: 18.272821 +epoch: 2, batch: 1716, sum loss: 3591.088135, avg loss: 2.434636, ppl: 11.411665 +epoch: 2, batch: 1717, sum loss: 4696.596191, avg loss: 2.711661, ppl: 15.054255 +epoch: 2, batch: 1718, sum loss: 3681.866699, avg loss: 2.496181, ppl: 12.136055 +epoch: 2, batch: 1719, sum loss: 4597.190430, avg loss: 2.532887, ppl: 12.589807 +epoch: 2, batch: 1720, sum loss: 4076.802979, avg loss: 2.429561, ppl: 11.353892 +epoch: 2, batch: 1721, sum loss: 4056.561523, avg loss: 2.460013, ppl: 11.704966 +epoch: 2, batch: 1722, sum loss: 4122.435059, avg loss: 2.350305, ppl: 10.488770 +epoch: 2, batch: 1723, sum loss: 3781.709473, avg loss: 2.492887, ppl: 12.096145 +epoch: 2, batch: 1724, sum loss: 4117.525391, avg loss: 2.500015, ppl: 12.182680 +epoch: 2, batch: 1725, sum loss: 4467.844238, avg loss: 2.521357, ppl: 12.445472 +epoch: 2, batch: 1726, sum loss: 3891.478027, avg loss: 2.467646, ppl: 11.794651 +epoch: 2, batch: 1727, sum loss: 3210.758789, avg loss: 2.350482, ppl: 10.490627 +epoch: 2, batch: 1728, sum loss: 3790.331055, avg loss: 2.286086, ppl: 9.836366 +epoch: 2, batch: 1729, sum loss: 3872.741211, avg loss: 2.479348, ppl: 11.933477 +epoch: 2, batch: 1730, sum loss: 3992.683594, avg loss: 2.495427, ppl: 12.126912 +epoch: 2, batch: 1731, sum loss: 4540.064453, avg loss: 2.773405, ppl: 16.013071 +epoch: 2, batch: 1732, sum loss: 4032.459961, avg loss: 2.733871, ppl: 15.392359 +epoch: 2, batch: 1733, sum loss: 4551.206543, avg loss: 2.482928, ppl: 11.976277 +epoch: 2, batch: 1734, sum loss: 4831.277344, avg loss: 2.792646, ppl: 16.324152 +epoch: 2, batch: 1735, sum loss: 5769.413574, avg loss: 2.907970, ppl: 18.319580 +epoch: 2, batch: 1736, sum loss: 4267.360840, avg loss: 2.523572, ppl: 12.473077 +epoch: 2, batch: 1737, sum loss: 4586.662109, avg loss: 2.521529, ppl: 12.447620 +epoch: 2, batch: 1738, sum loss: 4370.441406, avg loss: 2.564813, ppl: 12.998229 +epoch: 2, batch: 1739, sum loss: 4629.121094, avg loss: 2.578898, ppl: 13.182599 +epoch: 2, batch: 1740, sum loss: 5651.036133, avg loss: 2.971102, ppl: 19.513416 +epoch: 2, batch: 1741, sum loss: 3938.008789, avg loss: 2.604503, ppl: 13.524508 +epoch: 2, batch: 1742, sum loss: 3947.550781, avg loss: 2.482736, ppl: 11.973985 +epoch: 2, batch: 1743, sum loss: 3570.010986, avg loss: 2.425279, ppl: 11.305385 +epoch: 2, batch: 1744, sum loss: 3955.029785, avg loss: 2.500019, ppl: 12.182720 +epoch: 2, batch: 1745, sum loss: 5475.285645, avg loss: 2.659196, ppl: 14.284805 +epoch: 2, batch: 1746, sum loss: 4991.553223, avg loss: 2.634065, ppl: 13.930281 +epoch: 2, batch: 1747, sum loss: 3861.770996, avg loss: 2.550708, ppl: 12.816169 +epoch: 2, batch: 1748, sum loss: 4582.355957, avg loss: 2.675047, ppl: 14.513034 +epoch: 2, batch: 1749, sum loss: 4412.301758, avg loss: 2.666044, ppl: 14.382951 +epoch: 2, batch: 1750, sum loss: 3192.585938, avg loss: 2.453948, ppl: 11.634183 +epoch: 2, batch: 1751, sum loss: 5256.829102, avg loss: 2.880454, ppl: 17.822369 +epoch: 2, batch: 1752, sum loss: 4338.631348, avg loss: 2.431968, ppl: 11.381264 +epoch: 2, batch: 1753, sum loss: 4202.283203, avg loss: 2.692046, ppl: 14.761843 +epoch: 2, batch: 1754, sum loss: 5533.499023, avg loss: 2.711170, ppl: 15.046877 +epoch: 2, batch: 1755, sum loss: 4833.431152, avg loss: 2.667456, ppl: 14.403286 +epoch: 2, batch: 1756, sum loss: 4267.887207, avg loss: 2.347573, ppl: 10.460150 +epoch: 2, batch: 1757, sum loss: 4256.238281, avg loss: 2.373808, ppl: 10.738210 +epoch: 2, batch: 1758, sum loss: 4048.683594, avg loss: 2.399931, ppl: 11.022418 +epoch: 2, batch: 1759, sum loss: 4646.897949, avg loss: 2.513195, ppl: 12.344311 +epoch: 2, batch: 1760, sum loss: 4592.232910, avg loss: 2.531551, ppl: 12.572988 +epoch: 2, batch: 1761, sum loss: 4096.716309, avg loss: 2.424093, ppl: 11.291978 +epoch: 2, batch: 1762, sum loss: 4384.816895, avg loss: 2.701674, ppl: 14.904661 +epoch: 2, batch: 1763, sum loss: 4675.502441, avg loss: 2.632603, ppl: 13.909926 +epoch: 2, batch: 1764, sum loss: 3016.387939, avg loss: 2.266257, ppl: 9.643237 +epoch: 2, batch: 1765, sum loss: 4137.652344, avg loss: 2.192714, ppl: 8.959500 +epoch: 2, batch: 1766, sum loss: 4788.877930, avg loss: 2.756982, ppl: 15.752233 +epoch: 2, batch: 1767, sum loss: 3945.084473, avg loss: 2.593744, ppl: 13.379772 +epoch: 2, batch: 1768, sum loss: 3957.659180, avg loss: 2.444509, ppl: 11.524884 +epoch: 2, batch: 1769, sum loss: 4618.300293, avg loss: 2.773754, ppl: 16.018654 +epoch: 2, batch: 1770, sum loss: 3813.498291, avg loss: 2.275357, ppl: 9.731393 +epoch: 2, batch: 1771, sum loss: 3593.179443, avg loss: 2.215277, ppl: 9.163947 +epoch: 2, batch: 1772, sum loss: 4363.854492, avg loss: 2.485111, ppl: 12.002450 +epoch: 2, batch: 1773, sum loss: 6422.230469, avg loss: 2.841695, ppl: 17.144798 +epoch: 2, batch: 1774, sum loss: 4487.160156, avg loss: 2.616420, ppl: 13.686635 +epoch: 2, batch: 1775, sum loss: 3943.106201, avg loss: 2.557138, ppl: 12.898845 +epoch: 2, batch: 1776, sum loss: 4292.633301, avg loss: 2.631903, ppl: 13.900192 +epoch: 2, batch: 1777, sum loss: 4458.387207, avg loss: 2.784752, ppl: 16.195795 +epoch: 2, batch: 1778, sum loss: 5443.374512, avg loss: 2.860418, ppl: 17.468821 +epoch: 2, batch: 1779, sum loss: 4121.144531, avg loss: 2.346893, ppl: 10.453045 +epoch: 2, batch: 1780, sum loss: 4265.418457, avg loss: 2.519444, ppl: 12.421688 +epoch: 2, batch: 1781, sum loss: 4241.872559, avg loss: 2.470514, ppl: 11.828526 +epoch: 2, batch: 1782, sum loss: 4257.645508, avg loss: 2.547963, ppl: 12.781038 +epoch: 2, batch: 1783, sum loss: 4497.353027, avg loss: 2.552414, ppl: 12.838057 +epoch: 2, batch: 1784, sum loss: 3634.818115, avg loss: 2.372597, ppl: 10.725206 +epoch: 2, batch: 1785, sum loss: 5317.474609, avg loss: 2.762324, ppl: 15.836610 +epoch: 2, batch: 1786, sum loss: 5221.004883, avg loss: 2.887724, ppl: 17.952402 +epoch: 2, batch: 1787, sum loss: 4680.027344, avg loss: 2.797386, ppl: 16.401724 +epoch: 2, batch: 1788, sum loss: 4297.173828, avg loss: 2.371509, ppl: 10.713545 +epoch: 2, batch: 1789, sum loss: 3997.867676, avg loss: 2.351687, ppl: 10.503273 +epoch: 2, batch: 1790, sum loss: 3870.728516, avg loss: 2.373224, ppl: 10.731936 +epoch: 2, batch: 1791, sum loss: 4368.886719, avg loss: 2.453053, ppl: 11.623777 +epoch: 2, batch: 1792, sum loss: 4694.888184, avg loss: 2.876770, ppl: 17.756823 +epoch: 2, batch: 1793, sum loss: 4394.380859, avg loss: 2.656821, ppl: 14.250907 +epoch: 2, batch: 1794, sum loss: 4894.817383, avg loss: 2.670386, ppl: 14.445545 +epoch: 2, batch: 1795, sum loss: 4060.610107, avg loss: 2.541058, ppl: 12.693088 +epoch: 2, batch: 1796, sum loss: 4357.963379, avg loss: 2.460736, ppl: 11.713428 +epoch: 2, batch: 1797, sum loss: 4722.920898, avg loss: 2.668317, ppl: 14.415685 +epoch: 2, batch: 1798, sum loss: 4778.116211, avg loss: 2.721023, ppl: 15.195861 +epoch: 2, batch: 1799, sum loss: 4156.139648, avg loss: 2.667612, ppl: 14.405529 +epoch: 2, batch: 1800, sum loss: 4319.351562, avg loss: 2.381120, ppl: 10.817011 +epoch: 2, batch: 1801, sum loss: 3898.594482, avg loss: 2.368526, ppl: 10.681641 +epoch: 2, batch: 1802, sum loss: 4404.284180, avg loss: 2.415954, ppl: 11.200452 +epoch: 2, batch: 1803, sum loss: 5425.977539, avg loss: 2.824559, ppl: 16.853510 +epoch: 2, batch: 1804, sum loss: 4391.477539, avg loss: 2.663116, ppl: 14.340899 +epoch: 2, batch: 1805, sum loss: 3839.259033, avg loss: 2.465806, ppl: 11.772962 +epoch: 2, batch: 1806, sum loss: 3474.429199, avg loss: 2.231490, ppl: 9.313730 +epoch: 2, batch: 1807, sum loss: 4698.123047, avg loss: 2.729880, ppl: 15.331041 +epoch: 2, batch: 1808, sum loss: 4607.886230, avg loss: 2.531806, ppl: 12.576195 +epoch: 2, batch: 1809, sum loss: 3751.272461, avg loss: 2.645467, ppl: 14.090028 +epoch: 2, batch: 1810, sum loss: 3533.082031, avg loss: 2.193099, ppl: 8.962944 +epoch: 2, batch: 1811, sum loss: 3457.277100, avg loss: 2.151386, ppl: 8.596766 +epoch: 2, batch: 1812, sum loss: 4142.282715, avg loss: 2.518105, ppl: 12.405065 +epoch: 2, batch: 1813, sum loss: 3891.768799, avg loss: 2.636700, ppl: 13.967031 +epoch: 2, batch: 1814, sum loss: 3703.640869, avg loss: 2.490680, ppl: 12.069477 +epoch: 2, batch: 1815, sum loss: 3818.786621, avg loss: 2.443242, ppl: 11.510292 +epoch: 2, batch: 1816, sum loss: 5174.910645, avg loss: 2.826276, ppl: 16.882471 +epoch: 2, batch: 1817, sum loss: 4098.229004, avg loss: 2.585633, ppl: 13.271691 +epoch: 2, batch: 1818, sum loss: 3645.651611, avg loss: 2.371927, ppl: 10.718026 +epoch: 2, batch: 1819, sum loss: 4234.161133, avg loss: 2.592873, ppl: 13.368118 +epoch: 2, batch: 1820, sum loss: 3961.524902, avg loss: 2.333054, ppl: 10.309374 +epoch: 2, batch: 1821, sum loss: 4143.247559, avg loss: 2.500451, ppl: 12.187990 +epoch: 2, batch: 1822, sum loss: 3992.194092, avg loss: 2.550923, ppl: 12.818926 +epoch: 2, batch: 1823, sum loss: 4431.064941, avg loss: 2.384857, ppl: 10.857512 +epoch: 2, batch: 1824, sum loss: 4584.154297, avg loss: 2.459310, ppl: 11.696741 +epoch: 2, batch: 1825, sum loss: 4017.635010, avg loss: 2.678423, ppl: 14.562117 +epoch: 2, batch: 1826, sum loss: 3970.850830, avg loss: 2.463307, ppl: 11.743583 +epoch: 2, batch: 1827, sum loss: 4166.256348, avg loss: 2.660444, ppl: 14.302645 +epoch: 2, batch: 1828, sum loss: 3318.670410, avg loss: 2.433043, ppl: 11.393497 +epoch: 2, batch: 1829, sum loss: 3711.673340, avg loss: 2.535296, ppl: 12.620166 +epoch: 2, batch: 1830, sum loss: 4777.267090, avg loss: 2.756646, ppl: 15.746934 +epoch: 2, batch: 1831, sum loss: 4752.573730, avg loss: 2.674493, ppl: 14.504992 +epoch: 2, batch: 1832, sum loss: 4817.893066, avg loss: 2.639941, ppl: 14.012383 +epoch: 2, batch: 1833, sum loss: 4538.683594, avg loss: 2.722666, ppl: 15.220840 +epoch: 2, batch: 1834, sum loss: 4357.898438, avg loss: 2.560457, ppl: 12.941733 +epoch: 2, batch: 1835, sum loss: 4315.774414, avg loss: 2.552203, ppl: 12.835345 +epoch: 2, batch: 1836, sum loss: 4910.071289, avg loss: 2.477332, ppl: 11.909443 +epoch: 2, batch: 1837, sum loss: 4076.258545, avg loss: 2.455577, ppl: 11.653160 +epoch: 2, batch: 1838, sum loss: 5192.553711, avg loss: 2.745930, ppl: 15.579095 +epoch: 2, batch: 1839, sum loss: 4128.627930, avg loss: 2.586860, ppl: 13.287978 +epoch: 2, batch: 1840, sum loss: 3502.708740, avg loss: 2.319675, ppl: 10.172365 +epoch: 2, batch: 1841, sum loss: 4717.708496, avg loss: 2.590724, ppl: 13.339426 +epoch: 2, batch: 1842, sum loss: 4603.641602, avg loss: 2.913697, ppl: 18.424793 +epoch: 2, batch: 1843, sum loss: 4270.176270, avg loss: 2.566212, ppl: 13.016420 +epoch: 2, batch: 1844, sum loss: 3350.268311, avg loss: 2.323349, ppl: 10.209805 +epoch: 2, batch: 1845, sum loss: 4994.380859, avg loss: 2.840945, ppl: 17.131943 +epoch: 2, batch: 1846, sum loss: 4241.594238, avg loss: 2.502415, ppl: 12.211955 +epoch: 2, batch: 1847, sum loss: 5179.240723, avg loss: 2.645169, ppl: 14.085826 +epoch: 2, batch: 1848, sum loss: 4674.312500, avg loss: 2.661909, ppl: 14.323611 +epoch: 2, batch: 1849, sum loss: 5157.352051, avg loss: 2.874778, ppl: 17.721495 +epoch: 2, batch: 1850, sum loss: 4496.580078, avg loss: 2.654416, ppl: 14.216676 +epoch: 2, batch: 1851, sum loss: 4735.814941, avg loss: 2.852901, ppl: 17.338001 +epoch: 2, batch: 1852, sum loss: 3786.832520, avg loss: 2.441542, ppl: 11.490750 +epoch: 2, batch: 1853, sum loss: 4768.977051, avg loss: 2.795414, ppl: 16.369411 +epoch: 2, batch: 1854, sum loss: 4379.957031, avg loss: 2.547968, ppl: 12.781108 +epoch: 2, batch: 1855, sum loss: 4634.200195, avg loss: 2.664865, ppl: 14.366011 +epoch: 2, batch: 1856, sum loss: 4031.638672, avg loss: 2.431628, ppl: 11.377386 +epoch: 2, batch: 1857, sum loss: 4488.573242, avg loss: 2.557591, ppl: 12.904698 +epoch: 2, batch: 1858, sum loss: 5645.908691, avg loss: 2.880566, ppl: 17.824352 +epoch: 2, batch: 1859, sum loss: 4329.691895, avg loss: 2.569550, ppl: 13.059947 +epoch: 2, batch: 1860, sum loss: 4051.615479, avg loss: 2.510295, ppl: 12.308557 +epoch: 2, batch: 1861, sum loss: 4288.493164, avg loss: 2.578769, ppl: 13.180905 +epoch: 2, batch: 1862, sum loss: 3838.954834, avg loss: 2.442083, ppl: 11.496965 +epoch: 2, batch: 1863, sum loss: 3936.898926, avg loss: 2.530141, ppl: 12.555272 +epoch: 2, batch: 1864, sum loss: 3636.580322, avg loss: 2.573659, ppl: 13.113719 +epoch: 2, batch: 1865, sum loss: 4072.604248, avg loss: 2.428506, ppl: 11.341921 +epoch: 2, batch: 1866, sum loss: 5023.547852, avg loss: 2.828574, ppl: 16.921320 +epoch: 2, batch: 1867, sum loss: 4136.146484, avg loss: 2.641217, ppl: 14.030274 +epoch: 2, batch: 1868, sum loss: 3923.918701, avg loss: 2.416206, ppl: 11.203275 +epoch: 2, batch: 1869, sum loss: 3936.644775, avg loss: 2.496287, ppl: 12.137345 +epoch: 2, batch: 1870, sum loss: 4797.520020, avg loss: 2.597466, ppl: 13.429670 +epoch: 2, batch: 1871, sum loss: 4575.115723, avg loss: 2.633918, ppl: 13.928238 +epoch: 2, batch: 1872, sum loss: 4355.312988, avg loss: 2.578634, ppl: 13.179127 +epoch: 2, batch: 1873, sum loss: 3768.301270, avg loss: 2.323244, ppl: 10.208734 +epoch: 2, batch: 1874, sum loss: 3783.991211, avg loss: 2.332917, ppl: 10.307963 +epoch: 2, batch: 1875, sum loss: 4449.290527, avg loss: 2.701451, ppl: 14.901342 +epoch: 2, batch: 1876, sum loss: 3817.775879, avg loss: 2.396595, ppl: 10.985706 +epoch: 2, batch: 1877, sum loss: 4473.239746, avg loss: 2.456474, ppl: 11.663616 +epoch: 2, batch: 1878, sum loss: 3967.857178, avg loss: 2.377386, ppl: 10.776697 +epoch: 2, batch: 1879, sum loss: 4240.071289, avg loss: 2.455166, ppl: 11.648365 +epoch: 2, batch: 1880, sum loss: 4759.204590, avg loss: 2.337527, ppl: 10.355594 +epoch: 2, batch: 1881, sum loss: 4039.006836, avg loss: 2.507143, ppl: 12.269822 +epoch: 2, batch: 1882, sum loss: 4728.751953, avg loss: 2.831588, ppl: 16.972393 +epoch: 2, batch: 1883, sum loss: 2598.114746, avg loss: 2.053846, ppl: 7.797831 +epoch: 2, batch: 1884, sum loss: 4457.500000, avg loss: 2.465431, ppl: 11.768559 +epoch: 2, batch: 1885, sum loss: 4021.875000, avg loss: 2.441940, ppl: 11.495318 +epoch: 2, batch: 1886, sum loss: 4448.834473, avg loss: 2.464728, ppl: 11.760284 +epoch: 2, batch: 1887, sum loss: 3533.538086, avg loss: 2.194744, ppl: 8.977703 +epoch: 2, batch: 1888, sum loss: 4719.105957, avg loss: 2.730964, ppl: 15.347678 +epoch: 2, batch: 1889, sum loss: 3939.403809, avg loss: 2.418296, ppl: 11.226708 +epoch: 2, batch: 1890, sum loss: 4130.909180, avg loss: 2.274730, ppl: 9.725290 +epoch: 2, batch: 1891, sum loss: 4885.591797, avg loss: 2.607039, ppl: 13.558846 +epoch: 2, batch: 1892, sum loss: 5087.883789, avg loss: 2.492839, ppl: 12.095562 +epoch: 2, batch: 1893, sum loss: 4653.617188, avg loss: 2.668358, ppl: 14.416283 +epoch: 2, batch: 1894, sum loss: 3676.119873, avg loss: 2.345961, ppl: 10.443300 +epoch: 2, batch: 1895, sum loss: 4185.250488, avg loss: 2.564492, ppl: 12.994053 +epoch: 2, batch: 1896, sum loss: 3536.334961, avg loss: 2.314355, ppl: 10.118399 +epoch: 2, batch: 1897, sum loss: 3404.264404, avg loss: 2.409246, ppl: 11.125566 +epoch: 2, batch: 1898, sum loss: 4001.728027, avg loss: 2.655427, ppl: 14.231058 +epoch: 2, batch: 1899, sum loss: 4185.581055, avg loss: 2.460659, ppl: 11.712528 +epoch: 2, batch: 1900, sum loss: 3707.421387, avg loss: 2.404294, ppl: 11.070612 +epoch: 2, batch: 1901, sum loss: 3556.803711, avg loss: 2.383917, ppl: 10.847307 +epoch: 2, batch: 1902, sum loss: 4365.428223, avg loss: 2.606226, ppl: 13.547824 +epoch: 2, batch: 1903, sum loss: 4082.330811, avg loss: 2.529325, ppl: 12.545038 +epoch: 2, batch: 1904, sum loss: 4363.063965, avg loss: 2.668540, ppl: 14.418909 +epoch: 2, batch: 1905, sum loss: 4306.469238, avg loss: 2.558805, ppl: 12.920371 +epoch: 2, batch: 1906, sum loss: 4906.269531, avg loss: 2.663556, ppl: 14.347216 +epoch: 2, batch: 1907, sum loss: 3753.809570, avg loss: 2.360887, ppl: 10.600346 +epoch: 2, batch: 1908, sum loss: 4591.589355, avg loss: 2.617782, ppl: 13.705290 +epoch: 2, batch: 1909, sum loss: 4239.691406, avg loss: 2.528140, ppl: 12.530182 +epoch: 2, batch: 1910, sum loss: 3623.252930, avg loss: 2.351235, ppl: 10.498527 +epoch: 2, batch: 1911, sum loss: 5632.484375, avg loss: 2.827553, ppl: 16.904039 +epoch: 2, batch: 1912, sum loss: 5232.060059, avg loss: 2.823562, ppl: 16.836716 +epoch: 2, batch: 1913, sum loss: 4779.904785, avg loss: 2.863934, ppl: 17.530348 +epoch: 2, batch: 1914, sum loss: 4184.117188, avg loss: 2.484630, ppl: 11.996685 +epoch: 2, batch: 1915, sum loss: 3187.337158, avg loss: 2.102465, ppl: 8.186326 +epoch: 2, batch: 1916, sum loss: 4407.360840, avg loss: 2.601748, ppl: 13.487289 +epoch: 2, batch: 1917, sum loss: 3399.257812, avg loss: 2.414246, ppl: 11.181332 +epoch: 2, batch: 1918, sum loss: 5054.075195, avg loss: 2.679785, ppl: 14.581965 +epoch: 2, batch: 1919, sum loss: 4122.718262, avg loss: 2.526175, ppl: 12.505587 +epoch: 2, batch: 1920, sum loss: 4098.478516, avg loss: 2.544059, ppl: 12.731240 +epoch: 2, batch: 1921, sum loss: 4417.486328, avg loss: 2.632590, ppl: 13.909750 +epoch: 2, batch: 1922, sum loss: 3418.657227, avg loss: 2.434941, ppl: 11.415146 +epoch: 2, batch: 1923, sum loss: 4131.287109, avg loss: 2.588526, ppl: 13.310135 +epoch: 2, batch: 1924, sum loss: 4192.474121, avg loss: 2.364622, ppl: 10.640013 +epoch: 2, batch: 1925, sum loss: 4818.500488, avg loss: 2.696419, ppl: 14.826543 +epoch: 2, batch: 1926, sum loss: 5409.945801, avg loss: 2.964354, ppl: 19.382174 +epoch: 2, batch: 1927, sum loss: 4239.142578, avg loss: 2.589580, ppl: 13.324175 +epoch: 2, batch: 1928, sum loss: 4295.365723, avg loss: 2.457303, ppl: 11.673289 +epoch: 2, batch: 1929, sum loss: 5576.119629, avg loss: 2.955018, ppl: 19.202074 +epoch: 2, batch: 1930, sum loss: 4851.155762, avg loss: 2.748530, ppl: 15.619657 +epoch: 2, batch: 1931, sum loss: 4238.168457, avg loss: 2.625879, ppl: 13.816711 +epoch: 2, batch: 1932, sum loss: 4650.277832, avg loss: 2.550893, ppl: 12.818546 +epoch: 2, batch: 1933, sum loss: 3846.306152, avg loss: 2.405445, ppl: 11.083357 +epoch: 2, batch: 1934, sum loss: 3780.432861, avg loss: 2.594669, ppl: 13.392156 +epoch: 2, batch: 1935, sum loss: 4464.948242, avg loss: 2.831293, ppl: 16.967379 +epoch: 2, batch: 1936, sum loss: 4256.669922, avg loss: 2.637342, ppl: 13.976006 +epoch: 2, batch: 1937, sum loss: 3867.604736, avg loss: 2.362617, ppl: 10.618710 +epoch: 2, batch: 1938, sum loss: 4169.429199, avg loss: 2.787052, ppl: 16.233089 +epoch: 2, batch: 1939, sum loss: 5805.105957, avg loss: 2.957263, ppl: 19.245216 +epoch: 2, batch: 1940, sum loss: 4078.481445, avg loss: 2.539528, ppl: 12.673683 +epoch: 2, batch: 1941, sum loss: 4087.021973, avg loss: 2.476983, ppl: 11.905293 +epoch: 2, batch: 1942, sum loss: 3912.989014, avg loss: 2.396197, ppl: 10.981331 +epoch: 2, batch: 1943, sum loss: 3687.685547, avg loss: 2.455183, ppl: 11.648568 +epoch: 2, batch: 1944, sum loss: 4791.496094, avg loss: 2.545960, ppl: 12.755464 +epoch: 2, batch: 1945, sum loss: 4766.055664, avg loss: 2.823493, ppl: 16.835550 +epoch: 2, batch: 1946, sum loss: 3899.101074, avg loss: 2.429346, ppl: 11.351460 +epoch: 2, batch: 1947, sum loss: 4645.184570, avg loss: 2.634818, ppl: 13.940780 +epoch: 2, batch: 1948, sum loss: 4938.350586, avg loss: 2.526011, ppl: 12.503524 +epoch: 2, batch: 1949, sum loss: 4106.850098, avg loss: 2.221120, ppl: 9.217646 +epoch: 2, batch: 1950, sum loss: 4688.886230, avg loss: 2.546924, ppl: 12.767765 +epoch: 2, batch: 1951, sum loss: 3831.656250, avg loss: 2.363761, ppl: 10.630856 +epoch: 2, batch: 1952, sum loss: 4355.250000, avg loss: 2.567954, ppl: 13.039120 +epoch: 2, batch: 1953, sum loss: 3531.168701, avg loss: 2.320085, ppl: 10.176538 +epoch: 2, batch: 1954, sum loss: 4000.605713, avg loss: 2.587714, ppl: 13.299334 +epoch: 2, batch: 1955, sum loss: 5324.341797, avg loss: 2.777435, ppl: 16.077723 +epoch: 2, batch: 1956, sum loss: 3811.942139, avg loss: 2.175766, ppl: 8.808932 +epoch: 2, batch: 1957, sum loss: 5179.541016, avg loss: 2.730385, ppl: 15.338796 +epoch: 2, batch: 1958, sum loss: 3923.226807, avg loss: 2.483055, ppl: 11.977799 +epoch: 2, batch: 1959, sum loss: 3743.276855, avg loss: 2.303555, ppl: 10.009704 +epoch: 2, batch: 1960, sum loss: 4444.565430, avg loss: 2.611378, ppl: 13.617803 +epoch: 2, batch: 1961, sum loss: 4055.978027, avg loss: 2.576860, ppl: 13.155766 +epoch: 2, batch: 1962, sum loss: 4044.474609, avg loss: 2.725387, ppl: 15.262321 +epoch: 2, batch: 1963, sum loss: 4161.165527, avg loss: 2.422099, ppl: 11.269485 +epoch: 2, batch: 1964, sum loss: 4357.951172, avg loss: 2.548509, ppl: 12.788027 +epoch: 2, batch: 1965, sum loss: 4087.132812, avg loss: 2.537016, ppl: 12.641893 +epoch: 2, batch: 1966, sum loss: 3370.506836, avg loss: 2.340630, ppl: 10.387777 +epoch: 2, batch: 1967, sum loss: 4680.029785, avg loss: 2.587081, ppl: 13.290924 +epoch: 2, batch: 1968, sum loss: 4690.783691, avg loss: 2.769058, ppl: 15.943601 +epoch: 2, batch: 1969, sum loss: 3474.939941, avg loss: 2.230385, ppl: 9.303448 +epoch: 2, batch: 1970, sum loss: 3521.432129, avg loss: 2.374533, ppl: 10.745993 +epoch: 2, batch: 1971, sum loss: 4307.366211, avg loss: 2.548737, ppl: 12.790942 +epoch: 2, batch: 1972, sum loss: 4631.586914, avg loss: 2.780064, ppl: 16.120054 +epoch: 2, batch: 1973, sum loss: 4121.909668, avg loss: 2.563377, ppl: 12.979571 +epoch: 2, batch: 1974, sum loss: 4249.012207, avg loss: 2.594024, ppl: 13.383524 +epoch: 2, batch: 1975, sum loss: 4243.773438, avg loss: 2.542704, ppl: 12.714007 +epoch: 2, batch: 1976, sum loss: 4758.200684, avg loss: 2.576178, ppl: 13.146796 +epoch: 2, batch: 1977, sum loss: 3568.690186, avg loss: 2.502588, ppl: 12.214061 +epoch: 2, batch: 1978, sum loss: 3543.606934, avg loss: 2.531148, ppl: 12.567923 +epoch: 2, batch: 1979, sum loss: 3966.895996, avg loss: 2.488642, ppl: 12.044913 +epoch: 2, batch: 1980, sum loss: 4109.426270, avg loss: 2.558796, ppl: 12.920251 +epoch: 2, batch: 1981, sum loss: 3522.156982, avg loss: 2.294565, ppl: 9.920117 +epoch: 2, batch: 1982, sum loss: 3775.958984, avg loss: 2.394394, ppl: 10.961553 +epoch: 2, batch: 1983, sum loss: 5196.766113, avg loss: 2.710885, ppl: 15.042583 +epoch: 2, batch: 1984, sum loss: 4225.575195, avg loss: 2.487096, ppl: 12.026296 +epoch: 2, batch: 1985, sum loss: 3968.570312, avg loss: 2.451248, ppl: 11.602818 +epoch: 2, batch: 1986, sum loss: 4961.412109, avg loss: 2.684747, ppl: 14.654490 +epoch: 2, batch: 1987, sum loss: 4348.342285, avg loss: 2.542890, ppl: 12.716372 +epoch: 2, batch: 1988, sum loss: 4137.032227, avg loss: 2.477265, ppl: 11.908648 +epoch: 2, batch: 1989, sum loss: 3547.968262, avg loss: 2.569130, ppl: 13.054459 +epoch: 2, batch: 1990, sum loss: 3175.653320, avg loss: 2.265088, ppl: 9.631973 +epoch: 2, batch: 1991, sum loss: 4893.173340, avg loss: 2.522254, ppl: 12.456648 +epoch: 2, batch: 1992, sum loss: 4001.996338, avg loss: 2.476483, ppl: 11.899339 +epoch: 2, batch: 1993, sum loss: 3721.992676, avg loss: 2.600973, ppl: 13.476847 +epoch: 2, batch: 1994, sum loss: 4418.296387, avg loss: 2.573265, ppl: 13.108559 +epoch: 2, batch: 1995, sum loss: 3971.895996, avg loss: 2.286641, ppl: 9.841827 +epoch: 2, batch: 1996, sum loss: 4770.849121, avg loss: 2.788340, ppl: 16.254013 +epoch: 2, batch: 1997, sum loss: 5137.227051, avg loss: 2.581521, ppl: 13.217227 +epoch: 2, batch: 1998, sum loss: 4116.645996, avg loss: 2.382318, ppl: 10.829981 +epoch: 2, batch: 1999, sum loss: 4167.777832, avg loss: 2.544431, ppl: 12.735976 +epoch: 2, batch: 2000, sum loss: 4845.125977, avg loss: 2.720453, ppl: 15.187194 +epoch: 2, batch: 2001, sum loss: 4132.716797, avg loss: 2.618959, ppl: 13.721428 +epoch: 2, batch: 2002, sum loss: 4096.350586, avg loss: 2.542738, ppl: 12.714435 +epoch: 2, batch: 2003, sum loss: 4547.935059, avg loss: 2.582587, ppl: 13.231318 +epoch: 2, batch: 2004, sum loss: 5174.198730, avg loss: 2.880957, ppl: 17.831331 +epoch: 2, batch: 2005, sum loss: 5763.083984, avg loss: 2.777390, ppl: 16.077002 +epoch: 2, batch: 2006, sum loss: 4720.638672, avg loss: 2.803230, ppl: 16.497847 +epoch: 2, batch: 2007, sum loss: 4791.788574, avg loss: 2.808786, ppl: 16.589760 +epoch: 2, batch: 2008, sum loss: 3860.230225, avg loss: 2.212166, ppl: 9.135486 +epoch: 2, batch: 2009, sum loss: 4316.602051, avg loss: 2.636898, ppl: 13.969803 +epoch: 2, batch: 2010, sum loss: 5263.708496, avg loss: 2.829951, ppl: 16.944628 +epoch: 2, batch: 2011, sum loss: 4905.263184, avg loss: 2.518102, ppl: 12.405035 +epoch: 2, batch: 2012, sum loss: 5307.405762, avg loss: 2.681862, ppl: 14.612281 +epoch: 2, batch: 2013, sum loss: 4530.498047, avg loss: 2.750758, ppl: 15.654489 +epoch: 2, batch: 2014, sum loss: 3917.655762, avg loss: 2.337503, ppl: 10.355351 +epoch: 2, batch: 2015, sum loss: 4203.121582, avg loss: 2.586536, ppl: 13.283683 +epoch: 2, batch: 2016, sum loss: 4387.690918, avg loss: 2.555440, ppl: 12.876966 +epoch: 2, batch: 2017, sum loss: 4379.808105, avg loss: 2.484293, ppl: 11.992636 +epoch: 2, batch: 2018, sum loss: 4589.453613, avg loss: 2.572564, ppl: 13.099367 +epoch: 2, batch: 2019, sum loss: 4200.919434, avg loss: 2.377430, ppl: 10.777175 +epoch: 2, batch: 2020, sum loss: 3554.270996, avg loss: 2.235390, ppl: 9.350132 +epoch: 2, batch: 2021, sum loss: 4422.859375, avg loss: 2.604747, ppl: 13.527797 +epoch: 2, batch: 2022, sum loss: 3773.170898, avg loss: 2.349422, ppl: 10.479506 +epoch: 2, batch: 2023, sum loss: 3941.594482, avg loss: 2.533158, ppl: 12.593217 +epoch: 2, batch: 2024, sum loss: 4757.617676, avg loss: 2.949546, ppl: 19.097284 +epoch: 2, batch: 2025, sum loss: 4920.108887, avg loss: 2.561223, ppl: 12.951645 +epoch: 2, batch: 2026, sum loss: 4882.739258, avg loss: 2.867140, ppl: 17.586645 +epoch: 2, batch: 2027, sum loss: 6583.217285, avg loss: 2.829058, ppl: 16.929504 +epoch: 2, batch: 2028, sum loss: 4246.427734, avg loss: 2.541250, ppl: 12.695524 +epoch: 2, batch: 2029, sum loss: 4387.333496, avg loss: 2.576238, ppl: 13.147586 +epoch: 2, batch: 2030, sum loss: 4108.898438, avg loss: 2.482718, ppl: 11.973767 +epoch: 2, batch: 2031, sum loss: 3791.178467, avg loss: 2.296292, ppl: 9.937270 +epoch: 2, batch: 2032, sum loss: 3757.057129, avg loss: 2.596446, ppl: 13.415970 +epoch: 2, batch: 2033, sum loss: 4589.170898, avg loss: 2.452790, ppl: 11.620729 +epoch: 2, batch: 2034, sum loss: 3886.146484, avg loss: 2.412257, ppl: 11.159123 +epoch: 2, batch: 2035, sum loss: 3925.094238, avg loss: 2.367367, ppl: 10.669261 +epoch: 2, batch: 2036, sum loss: 4316.970703, avg loss: 2.577296, ppl: 13.161502 +epoch: 2, batch: 2037, sum loss: 4927.842285, avg loss: 2.871703, ppl: 17.667078 +epoch: 2, batch: 2038, sum loss: 4739.993652, avg loss: 2.738298, ppl: 15.460644 +epoch: 2, batch: 2039, sum loss: 4769.913574, avg loss: 2.606510, ppl: 13.551675 +epoch: 2, batch: 2040, sum loss: 3916.053955, avg loss: 2.502271, ppl: 12.210191 +epoch: 2, batch: 2041, sum loss: 3947.166504, avg loss: 2.528614, ppl: 12.536119 +epoch: 2, batch: 2042, sum loss: 4077.264160, avg loss: 2.416873, ppl: 11.210746 +epoch: 2, batch: 2043, sum loss: 3751.022949, avg loss: 2.423141, ppl: 11.281243 +epoch: 2, batch: 2044, sum loss: 3265.379883, avg loss: 2.079860, ppl: 8.003346 +epoch: 2, batch: 2045, sum loss: 3926.150391, avg loss: 2.398381, ppl: 11.005349 +epoch: 2, batch: 2046, sum loss: 4534.471680, avg loss: 2.547456, ppl: 12.774564 +epoch: 2, batch: 2047, sum loss: 4716.585938, avg loss: 2.500841, ppl: 12.192745 +epoch: 2, batch: 2048, sum loss: 4735.804688, avg loss: 2.520386, ppl: 12.433392 +epoch: 2, batch: 2049, sum loss: 4276.250488, avg loss: 2.444969, ppl: 11.530189 +epoch: 2, batch: 2050, sum loss: 4644.872559, avg loss: 2.581919, ppl: 13.222490 +epoch: 2, batch: 2051, sum loss: 3472.972412, avg loss: 2.449205, ppl: 11.579137 +epoch: 2, batch: 2052, sum loss: 3806.353760, avg loss: 2.401485, ppl: 11.039557 +epoch: 2, batch: 2053, sum loss: 3680.922852, avg loss: 2.374789, ppl: 10.748745 +epoch: 2, batch: 2054, sum loss: 3569.194336, avg loss: 2.501188, ppl: 12.196970 +epoch: 2, batch: 2055, sum loss: 4528.708984, avg loss: 2.660816, ppl: 14.307959 +epoch: 2, batch: 2056, sum loss: 4235.275391, avg loss: 2.414638, ppl: 11.185720 +epoch: 2, batch: 2057, sum loss: 4433.016113, avg loss: 2.641845, ppl: 14.039082 +epoch: 2, batch: 2058, sum loss: 4566.047852, avg loss: 2.493746, ppl: 12.106537 +epoch: 2, batch: 2059, sum loss: 4679.581543, avg loss: 2.690961, ppl: 14.745845 +epoch: 2, batch: 2060, sum loss: 3920.789551, avg loss: 2.313150, ppl: 10.106211 +epoch: 2, batch: 2061, sum loss: 4327.363281, avg loss: 2.517373, ppl: 12.395985 +epoch: 2, batch: 2062, sum loss: 4799.771484, avg loss: 2.738033, ppl: 15.456550 +epoch: 2, batch: 2063, sum loss: 4318.070801, avg loss: 2.467469, ppl: 11.792562 +epoch: 2, batch: 2064, sum loss: 4760.938477, avg loss: 2.656774, ppl: 14.250241 +epoch: 2, batch: 2065, sum loss: 3735.776855, avg loss: 2.457748, ppl: 11.678481 +epoch: 2, batch: 2066, sum loss: 4334.666016, avg loss: 2.690668, ppl: 14.741522 +epoch: 2, batch: 2067, sum loss: 3928.318359, avg loss: 2.574259, ppl: 13.121585 +epoch: 2, batch: 2068, sum loss: 4680.659668, avg loss: 2.905437, ppl: 18.273235 +epoch: 2, batch: 2069, sum loss: 3857.043457, avg loss: 2.391223, ppl: 10.926854 +epoch: 2, batch: 2070, sum loss: 5912.495117, avg loss: 2.886961, ppl: 17.938702 +epoch: 2, batch: 2071, sum loss: 4594.867188, avg loss: 2.598907, ppl: 13.449030 +epoch: 2, batch: 2072, sum loss: 4243.638184, avg loss: 2.436072, ppl: 11.428067 +epoch: 2, batch: 2073, sum loss: 4194.473633, avg loss: 2.579627, ppl: 13.192217 +epoch: 2, batch: 2074, sum loss: 3362.946289, avg loss: 2.343517, ppl: 10.417808 +epoch: 2, batch: 2075, sum loss: 4464.121582, avg loss: 2.606025, ppl: 13.545108 +epoch: 2, batch: 2076, sum loss: 3702.001709, avg loss: 2.329768, ppl: 10.275559 +epoch: 2, batch: 2077, sum loss: 3631.187988, avg loss: 2.338176, ppl: 10.362324 +epoch: 2, batch: 2078, sum loss: 4791.497070, avg loss: 2.685817, ppl: 14.670178 +epoch: 2, batch: 2079, sum loss: 4671.659180, avg loss: 2.601147, ppl: 13.479186 +epoch: 2, batch: 2080, sum loss: 4048.163086, avg loss: 2.514387, ppl: 12.359029 +epoch: 2, batch: 2081, sum loss: 3155.528809, avg loss: 2.148079, ppl: 8.568386 +epoch: 2, batch: 2082, sum loss: 4380.783691, avg loss: 2.587586, ppl: 13.297638 +epoch: 2, batch: 2083, sum loss: 4349.945801, avg loss: 2.611012, ppl: 13.612817 +epoch: 2, batch: 2084, sum loss: 4361.875000, avg loss: 2.538926, ppl: 12.666062 +epoch: 2, batch: 2085, sum loss: 5083.833496, avg loss: 2.557260, ppl: 12.900425 +epoch: 2, batch: 2086, sum loss: 4502.581543, avg loss: 2.634629, ppl: 13.938148 +epoch: 2, batch: 2087, sum loss: 3532.160889, avg loss: 2.356345, ppl: 10.552312 +epoch: 2, batch: 2088, sum loss: 3408.926758, avg loss: 2.499213, ppl: 12.172913 +epoch: 2, batch: 2089, sum loss: 4183.501465, avg loss: 2.509599, ppl: 12.299996 +epoch: 2, batch: 2090, sum loss: 4940.368652, avg loss: 2.872307, ppl: 17.677759 +epoch: 2, batch: 2091, sum loss: 5107.199219, avg loss: 2.488889, ppl: 12.047877 +epoch: 2, batch: 2092, sum loss: 3859.652344, avg loss: 2.362088, ppl: 10.613093 +epoch: 2, batch: 2093, sum loss: 3980.044434, avg loss: 2.319373, ppl: 10.169300 +epoch: 2, batch: 2094, sum loss: 5403.649414, avg loss: 2.568274, ppl: 13.043299 +epoch: 2, batch: 2095, sum loss: 3779.873047, avg loss: 2.587182, ppl: 13.292265 +epoch: 2, batch: 2096, sum loss: 4775.254883, avg loss: 2.500134, ppl: 12.184120 +epoch: 2, batch: 2097, sum loss: 5070.858398, avg loss: 2.832882, ppl: 16.994362 +epoch: 2, batch: 2098, sum loss: 3975.398682, avg loss: 2.376210, ppl: 10.764027 +epoch: 2, batch: 2099, sum loss: 4484.556152, avg loss: 2.746207, ppl: 15.583412 +epoch: 2, batch: 2100, sum loss: 4935.936523, avg loss: 2.617146, ppl: 13.696574 +epoch: 2, batch: 2101, sum loss: 3945.467285, avg loss: 2.319499, ppl: 10.170573 +epoch: 2, batch: 2102, sum loss: 4459.000977, avg loss: 2.526346, ppl: 12.507722 +epoch: 2, batch: 2103, sum loss: 3967.484375, avg loss: 2.596521, ppl: 13.416981 +epoch: 2, batch: 2104, sum loss: 4199.014648, avg loss: 2.560375, ppl: 12.940666 +epoch: 2, batch: 2105, sum loss: 4491.922363, avg loss: 2.442590, ppl: 11.502791 +epoch: 2, batch: 2106, sum loss: 5098.572266, avg loss: 2.900212, ppl: 18.177992 +epoch: 2, batch: 2107, sum loss: 4241.532227, avg loss: 2.613390, ppl: 13.645233 +epoch: 2, batch: 2108, sum loss: 4288.917480, avg loss: 2.516970, ppl: 12.391000 +epoch: 2, batch: 2109, sum loss: 3990.917969, avg loss: 2.492766, ppl: 12.094683 +epoch: 2, batch: 2110, sum loss: 3555.527588, avg loss: 2.274810, ppl: 9.726069 +epoch: 2, batch: 2111, sum loss: 4145.423828, avg loss: 2.461653, ppl: 11.724178 +epoch: 2, batch: 2112, sum loss: 4636.639648, avg loss: 2.783097, ppl: 16.169020 +epoch: 2, batch: 2113, sum loss: 3940.318115, avg loss: 2.479747, ppl: 11.938244 +epoch: 2, batch: 2114, sum loss: 3971.869385, avg loss: 2.417449, ppl: 11.217211 +epoch: 2, batch: 2115, sum loss: 3466.712646, avg loss: 2.280732, ppl: 9.783841 +epoch: 2, batch: 2116, sum loss: 4273.554688, avg loss: 2.725481, ppl: 15.263758 +epoch: 2, batch: 2117, sum loss: 3914.509766, avg loss: 2.313540, ppl: 10.110151 +epoch: 2, batch: 2118, sum loss: 4354.910645, avg loss: 2.658675, ppl: 14.277366 +epoch: 2, batch: 2119, sum loss: 4616.119629, avg loss: 2.441100, ppl: 11.485664 +epoch: 2, batch: 2120, sum loss: 4792.781250, avg loss: 2.539895, ppl: 12.678334 +epoch: 2, batch: 2121, sum loss: 3634.086426, avg loss: 2.346086, ppl: 10.444605 +epoch: 2, batch: 2122, sum loss: 3332.284668, avg loss: 2.271496, ppl: 9.693891 +epoch: 2, batch: 2123, sum loss: 4441.909668, avg loss: 2.629905, ppl: 13.872455 +epoch: 2, batch: 2124, sum loss: 3592.201660, avg loss: 2.432093, ppl: 11.382686 +epoch: 2, batch: 2125, sum loss: 3853.686523, avg loss: 2.626916, ppl: 13.831055 +epoch: 2, batch: 2126, sum loss: 4826.151855, avg loss: 2.612968, ppl: 13.639476 +epoch: 2, batch: 2127, sum loss: 3843.883301, avg loss: 2.425163, ppl: 11.304072 +epoch: 2, batch: 2128, sum loss: 3535.290771, avg loss: 2.380667, ppl: 10.812112 +epoch: 2, batch: 2129, sum loss: 5503.985352, avg loss: 2.803864, ppl: 16.508316 +epoch: 2, batch: 2130, sum loss: 3525.035156, avg loss: 2.364209, ppl: 10.635619 +epoch: 2, batch: 2131, sum loss: 4382.257324, avg loss: 2.527253, ppl: 12.519071 +epoch: 2, batch: 2132, sum loss: 3722.004395, avg loss: 2.538884, ppl: 12.665533 +epoch: 2, batch: 2133, sum loss: 5673.477539, avg loss: 3.055184, ppl: 21.225098 +epoch: 2, batch: 2134, sum loss: 3481.481934, avg loss: 2.301046, ppl: 9.984617 +epoch: 2, batch: 2135, sum loss: 4289.116211, avg loss: 2.341221, ppl: 10.393916 +epoch: 2, batch: 2136, sum loss: 4187.087891, avg loss: 2.511750, ppl: 12.326486 +epoch: 2, batch: 2137, sum loss: 3728.316162, avg loss: 2.317163, ppl: 10.146852 +epoch: 2, batch: 2138, sum loss: 3368.666016, avg loss: 2.248776, ppl: 9.476128 +epoch: 2, batch: 2139, sum loss: 3975.211914, avg loss: 2.884769, ppl: 17.899435 +epoch: 2, batch: 2140, sum loss: 4747.322266, avg loss: 2.635937, ppl: 13.956380 +epoch: 2, batch: 2141, sum loss: 4690.621582, avg loss: 2.681888, ppl: 14.612651 +epoch: 2, batch: 2142, sum loss: 3841.413086, avg loss: 2.328129, ppl: 10.258730 +epoch: 2, batch: 2143, sum loss: 4355.142090, avg loss: 2.765169, ppl: 15.881730 +epoch: 2, batch: 2144, sum loss: 3234.998779, avg loss: 2.447049, ppl: 11.554201 +epoch: 2, batch: 2145, sum loss: 3630.775635, avg loss: 2.282071, ppl: 9.796952 +epoch: 2, batch: 2146, sum loss: 3132.498535, avg loss: 2.278181, ppl: 9.758909 +epoch: 2, batch: 2147, sum loss: 3722.478027, avg loss: 2.701363, ppl: 14.900024 +epoch: 2, batch: 2148, sum loss: 4190.979004, avg loss: 2.544614, ppl: 12.738308 +epoch: 2, batch: 2149, sum loss: 3995.157227, avg loss: 2.574199, ppl: 13.120809 +epoch: 2, batch: 2150, sum loss: 5036.616211, avg loss: 2.694819, ppl: 14.802835 +epoch: 2, batch: 2151, sum loss: 4722.473145, avg loss: 2.844863, ppl: 17.199209 +epoch: 2, batch: 2152, sum loss: 4704.895996, avg loss: 2.643200, ppl: 14.058117 +epoch: 2, batch: 2153, sum loss: 3004.591797, avg loss: 2.235559, ppl: 9.351712 +epoch: 2, batch: 2154, sum loss: 4154.385742, avg loss: 2.585181, ppl: 13.265690 +epoch: 2, batch: 2155, sum loss: 3521.388428, avg loss: 2.433579, ppl: 11.399605 +epoch: 2, batch: 2156, sum loss: 4363.594238, avg loss: 2.655870, ppl: 14.237363 +epoch: 2, batch: 2157, sum loss: 3895.865234, avg loss: 2.489371, ppl: 12.053690 +epoch: 2, batch: 2158, sum loss: 4248.666504, avg loss: 2.491887, ppl: 12.084053 +epoch: 2, batch: 2159, sum loss: 4011.858398, avg loss: 2.421158, ppl: 11.258885 +epoch: 2, batch: 2160, sum loss: 5158.347168, avg loss: 2.664435, ppl: 14.359839 +epoch: 2, batch: 2161, sum loss: 4252.818359, avg loss: 2.656351, ppl: 14.244222 +epoch: 2, batch: 2162, sum loss: 4465.286621, avg loss: 2.497364, ppl: 12.150421 +epoch: 2, batch: 2163, sum loss: 3746.842285, avg loss: 2.363938, ppl: 10.632745 +epoch: 2, batch: 2164, sum loss: 4417.269043, avg loss: 2.521272, ppl: 12.444421 +epoch: 2, batch: 2165, sum loss: 3892.972168, avg loss: 2.453039, ppl: 11.623613 +epoch: 2, batch: 2166, sum loss: 4769.508789, avg loss: 2.614862, ppl: 13.665333 +epoch: 2, batch: 2167, sum loss: 4051.839844, avg loss: 2.466123, ppl: 11.776698 +epoch: 2, batch: 2168, sum loss: 4066.631104, avg loss: 2.387922, ppl: 10.890840 +epoch: 2, batch: 2169, sum loss: 4700.669434, avg loss: 2.848891, ppl: 17.268612 +epoch: 2, batch: 2170, sum loss: 4264.947754, avg loss: 2.610127, ppl: 13.600780 +epoch: 2, batch: 2171, sum loss: 3772.417969, avg loss: 2.418217, ppl: 11.225822 +epoch: 2, batch: 2172, sum loss: 3268.391113, avg loss: 2.417449, ppl: 11.217208 +epoch: 2, batch: 2173, sum loss: 4933.708008, avg loss: 2.795302, ppl: 16.367569 +epoch: 2, batch: 2174, sum loss: 3897.856201, avg loss: 2.534367, ppl: 12.608445 +epoch: 2, batch: 2175, sum loss: 5125.543945, avg loss: 2.754188, ppl: 15.708282 +epoch: 2, batch: 2176, sum loss: 3824.789795, avg loss: 2.375646, ppl: 10.757959 +epoch: 2, batch: 2177, sum loss: 4460.431641, avg loss: 2.664535, ppl: 14.361274 +epoch: 2, batch: 2178, sum loss: 3937.338867, avg loss: 2.592060, ppl: 13.357257 +epoch: 2, batch: 2179, sum loss: 4038.188477, avg loss: 2.393710, ppl: 10.954057 +epoch: 2, batch: 2180, sum loss: 5867.935547, avg loss: 2.765285, ppl: 15.883570 +epoch: 2, batch: 2181, sum loss: 4495.529785, avg loss: 2.848878, ppl: 17.268398 +epoch: 2, batch: 2182, sum loss: 5257.848145, avg loss: 2.817711, ppl: 16.738487 +epoch: 2, batch: 2183, sum loss: 4259.102051, avg loss: 2.511263, ppl: 12.320482 +epoch: 2, batch: 2184, sum loss: 4466.765625, avg loss: 2.494006, ppl: 12.109695 +epoch: 2, batch: 2185, sum loss: 4422.404297, avg loss: 2.604479, ppl: 13.524172 +epoch: 2, batch: 2186, sum loss: 4821.012695, avg loss: 2.700848, ppl: 14.892349 +epoch: 2, batch: 2187, sum loss: 3957.720459, avg loss: 2.638480, ppl: 13.991922 +epoch: 2, batch: 2188, sum loss: 4462.497559, avg loss: 2.617301, ppl: 13.698697 +epoch: 2, batch: 2189, sum loss: 5991.329590, avg loss: 3.027453, ppl: 20.644583 +epoch: 2, batch: 2190, sum loss: 3530.676025, avg loss: 2.257465, ppl: 9.558830 +epoch: 2, batch: 2191, sum loss: 3531.751709, avg loss: 2.513702, ppl: 12.350573 +epoch: 2, batch: 2192, sum loss: 4902.052734, avg loss: 2.738577, ppl: 15.464961 +epoch: 2, batch: 2193, sum loss: 4031.003906, avg loss: 2.575721, ppl: 13.140792 +epoch: 2, batch: 2194, sum loss: 4952.594238, avg loss: 2.771457, ppl: 15.981910 +epoch: 2, batch: 2195, sum loss: 3313.597656, avg loss: 2.263386, ppl: 9.615595 +epoch: 2, batch: 2196, sum loss: 3769.669434, avg loss: 2.454212, ppl: 11.637259 +epoch: 2, batch: 2197, sum loss: 4527.050293, avg loss: 2.645851, ppl: 14.095430 +epoch: 2, batch: 2198, sum loss: 5288.135254, avg loss: 3.013182, ppl: 20.352066 +epoch: 2, batch: 2199, sum loss: 4163.867188, avg loss: 2.526618, ppl: 12.511127 +epoch: 2, batch: 2200, sum loss: 4689.449219, avg loss: 2.576621, ppl: 13.152615 +epoch: 2, batch: 2201, sum loss: 3848.990234, avg loss: 2.484823, ppl: 11.998990 +epoch: 2, batch: 2202, sum loss: 4545.314453, avg loss: 2.380992, ppl: 10.815631 +epoch: 2, batch: 2203, sum loss: 4020.864258, avg loss: 2.483548, ppl: 11.983709 +epoch: 2, batch: 2204, sum loss: 4174.735840, avg loss: 2.507349, ppl: 12.272353 +epoch: 2, batch: 2205, sum loss: 4260.563477, avg loss: 2.506214, ppl: 12.258430 +epoch: 2, batch: 2206, sum loss: 5019.926270, avg loss: 2.767324, ppl: 15.915993 +epoch: 2, batch: 2207, sum loss: 4308.541504, avg loss: 2.636806, ppl: 13.968520 +epoch: 2, batch: 2208, sum loss: 3511.524902, avg loss: 2.501086, ppl: 12.195731 +epoch: 2, batch: 2209, sum loss: 3528.107666, avg loss: 2.293958, ppl: 9.914102 +epoch: 2, batch: 2210, sum loss: 5143.506348, avg loss: 2.521327, ppl: 12.445095 +epoch: 2, batch: 2211, sum loss: 4512.063477, avg loss: 2.626347, ppl: 13.823175 +epoch: 2, batch: 2212, sum loss: 4757.772949, avg loss: 2.582939, ppl: 13.235977 +epoch: 2, batch: 2213, sum loss: 3980.947266, avg loss: 2.278733, ppl: 9.764303 +epoch: 2, batch: 2214, sum loss: 5165.174805, avg loss: 2.737242, ppl: 15.444324 +epoch: 2, batch: 2215, sum loss: 4627.103516, avg loss: 2.509275, ppl: 12.296015 +epoch: 2, batch: 2216, sum loss: 4979.239258, avg loss: 2.704638, ppl: 14.948912 +epoch: 2, batch: 2217, sum loss: 3764.169922, avg loss: 2.491178, ppl: 12.075493 +epoch: 2, batch: 2218, sum loss: 4558.776855, avg loss: 2.596114, ppl: 13.411525 +epoch: 2, batch: 2219, sum loss: 3709.924072, avg loss: 2.460162, ppl: 11.706707 +epoch: 2, batch: 2220, sum loss: 3444.315918, avg loss: 2.109195, ppl: 8.241606 +epoch: 2, batch: 2221, sum loss: 5254.224609, avg loss: 2.866462, ppl: 17.574728 +epoch: 2, batch: 2222, sum loss: 4952.518066, avg loss: 2.762141, ppl: 15.833703 +epoch: 2, batch: 2223, sum loss: 4424.177734, avg loss: 2.646039, ppl: 14.098092 +epoch: 2, batch: 2224, sum loss: 3974.722168, avg loss: 2.574302, ppl: 13.122157 +epoch: 2, batch: 2225, sum loss: 4354.651367, avg loss: 2.645596, ppl: 14.091846 +epoch: 2, batch: 2226, sum loss: 4406.959961, avg loss: 2.511088, ppl: 12.318327 +epoch: 2, batch: 2227, sum loss: 4950.315430, avg loss: 2.734981, ppl: 15.409451 +epoch: 2, batch: 2228, sum loss: 4435.826172, avg loss: 2.594050, ppl: 13.383872 +epoch: 2, batch: 2229, sum loss: 4549.708984, avg loss: 2.749069, ppl: 15.628075 +epoch: 2, batch: 2230, sum loss: 3339.553467, avg loss: 2.404286, ppl: 11.070525 +epoch: 2, batch: 2231, sum loss: 4194.241211, avg loss: 2.624682, ppl: 13.800181 +epoch: 2, batch: 2232, sum loss: 4095.711426, avg loss: 2.494343, ppl: 12.113775 +epoch: 2, batch: 2233, sum loss: 4967.896973, avg loss: 2.659474, ppl: 14.288766 +epoch: 2, batch: 2234, sum loss: 4957.909180, avg loss: 2.706282, ppl: 14.973506 +epoch: 2, batch: 2235, sum loss: 3550.344727, avg loss: 2.472385, ppl: 11.850677 +epoch: 2, batch: 2236, sum loss: 4595.899414, avg loss: 2.750389, ppl: 15.648716 +epoch: 2, batch: 2237, sum loss: 4011.024902, avg loss: 2.592776, ppl: 13.366831 +epoch: 2, batch: 2238, sum loss: 4500.028809, avg loss: 2.513983, ppl: 12.354033 +epoch: 2, batch: 2239, sum loss: 3610.745361, avg loss: 2.269482, ppl: 9.674385 +epoch: 2, batch: 2240, sum loss: 5902.699707, avg loss: 3.136397, ppl: 23.020782 +epoch: 2, batch: 2241, sum loss: 4992.296387, avg loss: 2.484966, ppl: 12.000710 +epoch: 2, batch: 2242, sum loss: 4190.795898, avg loss: 2.582129, ppl: 13.225268 +epoch: 2, batch: 2243, sum loss: 4663.006836, avg loss: 2.576247, ppl: 13.147702 +epoch: 2, batch: 2244, sum loss: 4305.107422, avg loss: 2.669007, ppl: 14.425632 +epoch: 2, batch: 2245, sum loss: 4342.038574, avg loss: 2.778016, ppl: 16.087067 +epoch: 2, batch: 2246, sum loss: 4392.257812, avg loss: 2.649130, ppl: 14.141731 +epoch: 2, batch: 2247, sum loss: 3990.283447, avg loss: 2.543202, ppl: 12.720332 +epoch: 2, batch: 2248, sum loss: 4188.658691, avg loss: 2.459576, ppl: 11.699854 +epoch: 2, batch: 2249, sum loss: 3692.623779, avg loss: 2.110071, ppl: 8.248824 +epoch: 2, batch: 2250, sum loss: 4770.333496, avg loss: 2.645776, ppl: 14.094372 +epoch: 2, batch: 2251, sum loss: 4351.421387, avg loss: 2.357216, ppl: 10.561511 +epoch: 2, batch: 2252, sum loss: 3923.474121, avg loss: 2.499028, ppl: 12.170661 +epoch: 2, batch: 2253, sum loss: 4047.113281, avg loss: 2.741947, ppl: 15.517163 +epoch: 2, batch: 2254, sum loss: 3500.226074, avg loss: 2.242297, ppl: 9.414936 +epoch: 2, batch: 2255, sum loss: 4843.378418, avg loss: 2.559925, ppl: 12.934848 +epoch: 2, batch: 2256, sum loss: 3986.882812, avg loss: 2.297915, ppl: 9.953410 +epoch: 2, batch: 2257, sum loss: 3771.985107, avg loss: 2.543483, ppl: 12.723911 +epoch: 2, batch: 2258, sum loss: 3745.396729, avg loss: 2.501935, ppl: 12.206090 +epoch: 2, batch: 2259, sum loss: 4712.248047, avg loss: 2.570785, ppl: 13.076079 +epoch: 2, batch: 2260, sum loss: 3470.355713, avg loss: 2.420053, ppl: 11.246452 +epoch: 2, batch: 2261, sum loss: 4406.046875, avg loss: 2.480882, ppl: 11.951806 +epoch: 2, batch: 2262, sum loss: 4442.064453, avg loss: 2.485766, ppl: 12.010322 +epoch: 2, batch: 2263, sum loss: 4467.211426, avg loss: 2.621603, ppl: 13.757760 +epoch: 2, batch: 2264, sum loss: 4474.596680, avg loss: 2.961348, ppl: 19.323999 +epoch: 2, batch: 2265, sum loss: 4786.566406, avg loss: 2.732059, ppl: 15.364483 +epoch: 2, batch: 2266, sum loss: 4931.469727, avg loss: 2.613391, ppl: 13.645249 +epoch: 2, batch: 2267, sum loss: 4195.859375, avg loss: 2.578893, ppl: 13.182539 +epoch: 2, batch: 2268, sum loss: 3890.054688, avg loss: 2.529294, ppl: 12.544650 +epoch: 2, batch: 2269, sum loss: 4672.565918, avg loss: 2.514836, ppl: 12.364585 +epoch: 2, batch: 2270, sum loss: 3502.277588, avg loss: 2.249376, ppl: 9.481813 +epoch: 2, batch: 2271, sum loss: 4607.731445, avg loss: 2.591525, ppl: 13.350116 +epoch: 2, batch: 2272, sum loss: 4534.083984, avg loss: 2.718276, ppl: 15.154171 +epoch: 2, batch: 2273, sum loss: 4424.685547, avg loss: 2.662266, ppl: 14.328718 +epoch: 2, batch: 2274, sum loss: 3861.984131, avg loss: 2.452053, ppl: 11.612165 +epoch: 2, batch: 2275, sum loss: 4331.237305, avg loss: 2.458137, ppl: 11.683026 +epoch: 2, batch: 2276, sum loss: 4852.001465, avg loss: 2.749009, ppl: 15.627144 +epoch: 2, batch: 2277, sum loss: 4994.622559, avg loss: 2.907231, ppl: 18.306036 +epoch: 2, batch: 2278, sum loss: 4366.843750, avg loss: 2.600860, ppl: 13.475320 +epoch: 2, batch: 2279, sum loss: 4248.639160, avg loss: 2.721742, ppl: 15.206788 +epoch: 2, batch: 2280, sum loss: 2837.010254, avg loss: 2.066286, ppl: 7.895442 +epoch: 2, batch: 2281, sum loss: 4372.669922, avg loss: 2.490131, ppl: 12.062858 +epoch: 2, batch: 2282, sum loss: 4416.980469, avg loss: 2.407074, ppl: 11.101431 +epoch: 2, batch: 2283, sum loss: 4254.345703, avg loss: 2.564404, ppl: 12.992909 +epoch: 2, batch: 2284, sum loss: 4211.762207, avg loss: 2.445855, ppl: 11.540411 +epoch: 2, batch: 2285, sum loss: 3857.488525, avg loss: 2.514660, ppl: 12.362407 +epoch: 2, batch: 2286, sum loss: 3899.174805, avg loss: 2.518847, ppl: 12.414271 +epoch: 2, batch: 2287, sum loss: 4286.016602, avg loss: 2.608653, ppl: 13.580743 +epoch: 2, batch: 2288, sum loss: 4756.348145, avg loss: 2.789647, ppl: 16.275276 +epoch: 2, batch: 2289, sum loss: 4396.632324, avg loss: 2.635871, ppl: 13.955458 +epoch: 2, batch: 2290, sum loss: 4587.150391, avg loss: 2.711082, ppl: 15.045542 +epoch: 2, batch: 2291, sum loss: 4137.547363, avg loss: 2.404153, ppl: 11.069050 +epoch: 2, batch: 2292, sum loss: 5090.352539, avg loss: 2.638856, ppl: 13.997177 +epoch: 2, batch: 2293, sum loss: 3393.006348, avg loss: 2.165288, ppl: 8.717113 +epoch: 2, batch: 2294, sum loss: 3672.524658, avg loss: 2.463129, ppl: 11.741488 +epoch: 2, batch: 2295, sum loss: 3831.430176, avg loss: 2.549188, ppl: 12.796714 +epoch: 2, batch: 2296, sum loss: 4363.162598, avg loss: 2.533776, ppl: 12.600998 +epoch: 2, batch: 2297, sum loss: 3774.860840, avg loss: 2.508213, ppl: 12.282965 +epoch: 2, batch: 2298, sum loss: 3575.623047, avg loss: 2.194980, ppl: 8.979825 +epoch: 2, batch: 2299, sum loss: 3950.277344, avg loss: 2.404307, ppl: 11.070760 +epoch: 2, batch: 2300, sum loss: 5209.341797, avg loss: 2.770926, ppl: 15.973426 +epoch: 2, batch: 2301, sum loss: 3938.497803, avg loss: 2.312682, ppl: 10.101482 +epoch: 2, batch: 2302, sum loss: 4122.762695, avg loss: 2.541777, ppl: 12.702225 +epoch: 2, batch: 2303, sum loss: 4180.609863, avg loss: 2.485499, ppl: 12.007115 +epoch: 2, batch: 2304, sum loss: 3785.640869, avg loss: 2.564797, ppl: 12.998025 +epoch: 2, batch: 2305, sum loss: 3902.006592, avg loss: 2.318483, ppl: 10.160248 +epoch: 2, batch: 2306, sum loss: 4830.696777, avg loss: 2.798781, ppl: 16.424620 +epoch: 2, batch: 2307, sum loss: 5160.361328, avg loss: 2.925375, ppl: 18.641214 +epoch: 2, batch: 2308, sum loss: 4459.388672, avg loss: 2.648093, ppl: 14.127076 +epoch: 2, batch: 2309, sum loss: 3573.663818, avg loss: 2.400043, ppl: 11.023648 +epoch: 2, batch: 2310, sum loss: 4737.005371, avg loss: 2.817969, ppl: 16.742805 +epoch: 2, batch: 2311, sum loss: 4351.952148, avg loss: 2.549474, ppl: 12.800369 +epoch: 2, batch: 2312, sum loss: 3426.343506, avg loss: 2.207696, ppl: 9.094735 +epoch: 2, batch: 2313, sum loss: 4641.253418, avg loss: 2.564229, ppl: 12.990633 +epoch: 2, batch: 2314, sum loss: 3239.708496, avg loss: 2.047856, ppl: 7.751265 +epoch: 2, batch: 2315, sum loss: 4564.688477, avg loss: 2.707407, ppl: 14.990358 +epoch: 2, batch: 2316, sum loss: 4083.323486, avg loss: 2.392105, ppl: 10.936492 +epoch: 2, batch: 2317, sum loss: 4273.833008, avg loss: 2.656204, ppl: 14.242130 +epoch: 2, batch: 2318, sum loss: 4714.276367, avg loss: 2.493007, ppl: 12.097601 +epoch: 2, batch: 2319, sum loss: 4637.304199, avg loss: 2.592121, ppl: 13.358072 +epoch: 2, batch: 2320, sum loss: 4328.385742, avg loss: 2.546109, ppl: 12.757374 +epoch: 2, batch: 2321, sum loss: 4493.171875, avg loss: 2.577838, ppl: 13.168639 +epoch: 2, batch: 2322, sum loss: 4652.142090, avg loss: 2.676722, ppl: 14.537355 +epoch: 2, batch: 2323, sum loss: 4189.676758, avg loss: 2.545369, ppl: 12.747927 +epoch: 2, batch: 2324, sum loss: 4376.708008, avg loss: 2.334244, ppl: 10.321656 +epoch: 2, batch: 2325, sum loss: 4912.035156, avg loss: 2.571746, ppl: 13.088658 +epoch: 2, batch: 2326, sum loss: 4123.950684, avg loss: 2.389311, ppl: 10.905975 +epoch: 2, batch: 2327, sum loss: 4481.450195, avg loss: 2.409382, ppl: 11.127081 +epoch: 2, batch: 2328, sum loss: 4103.388672, avg loss: 2.389859, ppl: 10.911960 +epoch: 2, batch: 2329, sum loss: 5007.942871, avg loss: 2.886422, ppl: 17.929052 +epoch: 2, batch: 2330, sum loss: 4509.134277, avg loss: 2.713077, ppl: 15.075597 +epoch: 2, batch: 2331, sum loss: 4409.710938, avg loss: 2.675796, ppl: 14.523900 +epoch: 2, batch: 2332, sum loss: 4847.234375, avg loss: 2.386625, ppl: 10.876719 +epoch: 2, batch: 2333, sum loss: 4801.067871, avg loss: 2.689674, ppl: 14.726872 +epoch: 2, batch: 2334, sum loss: 5469.101562, avg loss: 2.613044, ppl: 13.640513 +epoch: 2, batch: 2335, sum loss: 3856.090332, avg loss: 2.594946, ppl: 13.395869 +epoch: 2, batch: 2336, sum loss: 3957.225098, avg loss: 2.421803, ppl: 11.266149 +epoch: 2, batch: 2337, sum loss: 4033.358643, avg loss: 2.557615, ppl: 12.905000 +epoch: 2, batch: 2338, sum loss: 4731.318359, avg loss: 2.568577, ppl: 13.047242 +epoch: 2, batch: 2339, sum loss: 4765.342285, avg loss: 2.748179, ppl: 15.614172 +epoch: 2, batch: 2340, sum loss: 3923.616943, avg loss: 2.518368, ppl: 12.408327 +epoch: 2, batch: 2341, sum loss: 3211.459473, avg loss: 2.204159, ppl: 9.062625 +epoch: 2, batch: 2342, sum loss: 4885.351562, avg loss: 2.565836, ppl: 13.011531 +epoch: 2, batch: 2343, sum loss: 3383.380371, avg loss: 2.608620, ppl: 13.580299 +epoch: 2, batch: 2344, sum loss: 3866.226562, avg loss: 2.420931, ppl: 11.256332 +epoch: 2, batch: 2345, sum loss: 4026.951172, avg loss: 2.518418, ppl: 12.408954 +epoch: 2, batch: 2346, sum loss: 4429.494141, avg loss: 2.644474, ppl: 14.076043 +epoch: 2, batch: 2347, sum loss: 4644.512695, avg loss: 2.596150, ppl: 13.412004 +epoch: 2, batch: 2348, sum loss: 4344.224121, avg loss: 2.652151, ppl: 14.184522 +epoch: 2, batch: 2349, sum loss: 4086.843506, avg loss: 2.294690, ppl: 9.921364 +epoch: 2, batch: 2350, sum loss: 4434.325684, avg loss: 2.555807, ppl: 12.881696 +epoch: 2, batch: 2351, sum loss: 4746.225586, avg loss: 2.622224, ppl: 13.766311 +epoch: 2, batch: 2352, sum loss: 4470.341797, avg loss: 2.446821, ppl: 11.551565 +epoch: 2, batch: 2353, sum loss: 4789.519531, avg loss: 2.845823, ppl: 17.215714 +epoch: 2, batch: 2354, sum loss: 5196.572266, avg loss: 2.688346, ppl: 14.707329 +epoch: 2, batch: 2355, sum loss: 4873.619141, avg loss: 2.704561, ppl: 14.947757 +epoch: 2, batch: 2356, sum loss: 4164.773926, avg loss: 2.507390, ppl: 12.272850 +epoch: 2, batch: 2357, sum loss: 3561.070557, avg loss: 2.401261, ppl: 11.037089 +epoch: 2, batch: 2358, sum loss: 4711.618652, avg loss: 2.817954, ppl: 16.742558 +epoch: 2, batch: 2359, sum loss: 4231.707031, avg loss: 2.526392, ppl: 12.508301 +epoch: 2, batch: 2360, sum loss: 4715.659180, avg loss: 2.489789, ppl: 12.058725 +epoch: 2, batch: 2361, sum loss: 5295.876465, avg loss: 2.782909, ppl: 16.165985 +epoch: 2, batch: 2362, sum loss: 4726.032227, avg loss: 2.583943, ppl: 13.249282 +epoch: 2, batch: 2363, sum loss: 4178.462402, avg loss: 2.412507, ppl: 11.161910 +epoch: 2, batch: 2364, sum loss: 4460.257324, avg loss: 2.585657, ppl: 13.272001 +epoch: 2, batch: 2365, sum loss: 3594.330322, avg loss: 2.214621, ppl: 9.157941 +epoch: 2, batch: 2366, sum loss: 3878.385742, avg loss: 2.334970, ppl: 10.329152 +epoch: 2, batch: 2367, sum loss: 4013.009277, avg loss: 2.572442, ppl: 13.097768 +epoch: 2, batch: 2368, sum loss: 4429.411133, avg loss: 2.614764, ppl: 13.663998 +epoch: 2, batch: 2369, sum loss: 4422.640625, avg loss: 2.709951, ppl: 15.028545 +epoch: 2, batch: 2370, sum loss: 4705.174316, avg loss: 2.535115, ppl: 12.617887 +epoch: 2, batch: 2371, sum loss: 5082.956543, avg loss: 2.706580, ppl: 14.977959 +epoch: 2, batch: 2372, sum loss: 4620.637695, avg loss: 2.482879, ppl: 11.975692 +epoch: 2, batch: 2373, sum loss: 3721.694580, avg loss: 2.538673, ppl: 12.662855 +epoch: 2, batch: 2374, sum loss: 3255.908203, avg loss: 2.183708, ppl: 8.879167 +epoch: 2, batch: 2375, sum loss: 4422.915039, avg loss: 2.534622, ppl: 12.611659 +epoch: 2, batch: 2376, sum loss: 4134.190918, avg loss: 2.571014, ppl: 13.079082 +epoch: 2, batch: 2377, sum loss: 3992.421143, avg loss: 2.597542, ppl: 13.430679 +epoch: 2, batch: 2378, sum loss: 4360.041016, avg loss: 2.587561, ppl: 13.297305 +epoch: 2, batch: 2379, sum loss: 4518.809082, avg loss: 2.696187, ppl: 14.823104 +epoch: 2, batch: 2380, sum loss: 3860.338379, avg loss: 2.495371, ppl: 12.126227 +epoch: 2, batch: 2381, sum loss: 4283.223633, avg loss: 2.630973, ppl: 13.887270 +epoch: 2, batch: 2382, sum loss: 4752.579102, avg loss: 2.729799, ppl: 15.329799 +epoch: 2, batch: 2383, sum loss: 3323.929443, avg loss: 2.345751, ppl: 10.441114 +epoch: 2, batch: 2384, sum loss: 4523.512695, avg loss: 2.602712, ppl: 13.500297 +epoch: 2, batch: 2385, sum loss: 3431.798096, avg loss: 2.396507, ppl: 10.984742 +epoch: 2, batch: 2386, sum loss: 4332.469727, avg loss: 2.600522, ppl: 13.470769 +epoch: 2, batch: 2387, sum loss: 3978.105469, avg loss: 2.501953, ppl: 12.206311 +epoch: 2, batch: 2388, sum loss: 4850.097168, avg loss: 2.724774, ppl: 15.252961 +epoch: 2, batch: 2389, sum loss: 4677.643066, avg loss: 2.411156, ppl: 11.146844 +epoch: 2, batch: 2390, sum loss: 5433.661133, avg loss: 2.660950, ppl: 14.309883 +epoch: 2, batch: 2391, sum loss: 3868.519531, avg loss: 2.457763, ppl: 11.678659 +epoch: 2, batch: 2392, sum loss: 3988.803711, avg loss: 2.583422, ppl: 13.242378 +epoch: 2, batch: 2393, sum loss: 4067.015137, avg loss: 2.488993, ppl: 12.049142 +epoch: 2, batch: 2394, sum loss: 3575.457031, avg loss: 2.406095, ppl: 11.090566 +epoch: 2, batch: 2395, sum loss: 4162.700195, avg loss: 2.414559, ppl: 11.184841 +epoch: 2, batch: 2396, sum loss: 4199.022461, avg loss: 2.420186, ppl: 11.247949 +epoch: 2, batch: 2397, sum loss: 3924.967773, avg loss: 2.416852, ppl: 11.210513 +epoch: 2, batch: 2398, sum loss: 3622.093750, avg loss: 2.356600, ppl: 10.555001 +epoch: 2, batch: 2399, sum loss: 4292.437012, avg loss: 2.774685, ppl: 16.033571 +epoch: 2, batch: 2400, sum loss: 5022.320312, avg loss: 3.034635, ppl: 20.793383 +epoch: 2, batch: 2401, sum loss: 4039.439697, avg loss: 2.534153, ppl: 12.605746 +epoch: 2, batch: 2402, sum loss: 3973.559570, avg loss: 2.555344, ppl: 12.875727 +epoch: 2, batch: 2403, sum loss: 4589.976074, avg loss: 2.668591, ppl: 14.419635 +epoch: 2, batch: 2404, sum loss: 3871.050781, avg loss: 2.399908, ppl: 11.022158 +epoch: 2, batch: 2405, sum loss: 4907.125000, avg loss: 2.821809, ppl: 16.807220 +epoch: 2, batch: 2406, sum loss: 4965.592285, avg loss: 2.644085, ppl: 14.070570 +epoch: 2, batch: 2407, sum loss: 3948.546143, avg loss: 2.505423, ppl: 12.248734 +epoch: 2, batch: 2408, sum loss: 4769.586914, avg loss: 2.567054, ppl: 13.027392 +epoch: 2, batch: 2409, sum loss: 3320.270752, avg loss: 2.423555, ppl: 11.285913 +epoch: 2, batch: 2410, sum loss: 4363.839844, avg loss: 2.741105, ppl: 15.504112 +epoch: 2, batch: 2411, sum loss: 4057.506836, avg loss: 2.739707, ppl: 15.482455 +epoch: 2, batch: 2412, sum loss: 3556.298096, avg loss: 2.595838, ppl: 13.407820 +epoch: 2, batch: 2413, sum loss: 4961.327637, avg loss: 2.668815, ppl: 14.422870 +epoch: 2, batch: 2414, sum loss: 4490.898926, avg loss: 2.633958, ppl: 13.928796 +epoch: 2, batch: 2415, sum loss: 4228.102051, avg loss: 2.482738, ppl: 11.973999 +epoch: 2, batch: 2416, sum loss: 4523.998535, avg loss: 2.457359, ppl: 11.673944 +epoch: 2, batch: 2417, sum loss: 4077.867676, avg loss: 2.507914, ppl: 12.279284 +epoch: 2, batch: 2418, sum loss: 4656.642090, avg loss: 2.424072, ppl: 11.291746 +epoch: 2, batch: 2419, sum loss: 4989.676758, avg loss: 2.892566, ppl: 18.039547 +epoch: 2, batch: 2420, sum loss: 3636.182129, avg loss: 2.336878, ppl: 10.348878 +epoch: 2, batch: 2421, sum loss: 3636.110107, avg loss: 2.181230, ppl: 8.857192 +epoch: 2, batch: 2422, sum loss: 4673.372070, avg loss: 2.555152, ppl: 12.873253 +epoch: 2, batch: 2423, sum loss: 5267.697754, avg loss: 2.756514, ppl: 15.744854 +epoch: 2, batch: 2424, sum loss: 4836.061035, avg loss: 2.700202, ppl: 14.882730 +epoch: 2, batch: 2425, sum loss: 3722.594238, avg loss: 2.420412, ppl: 11.250497 +epoch: 2, batch: 2426, sum loss: 4690.110352, avg loss: 2.835617, ppl: 17.040913 +epoch: 2, batch: 2427, sum loss: 4404.709473, avg loss: 2.667904, ppl: 14.409737 +epoch: 2, batch: 2428, sum loss: 3541.355957, avg loss: 2.280332, ppl: 9.779930 +epoch: 2, batch: 2429, sum loss: 4393.204590, avg loss: 2.635396, ppl: 13.948829 +epoch: 2, batch: 2430, sum loss: 4258.241211, avg loss: 2.623685, ppl: 13.786435 +epoch: 2, batch: 2431, sum loss: 3914.337891, avg loss: 2.522125, ppl: 12.455035 +epoch: 2, batch: 2432, sum loss: 4051.044678, avg loss: 2.480738, ppl: 11.950076 +epoch: 2, batch: 2433, sum loss: 4353.816895, avg loss: 2.497887, ppl: 12.156778 +epoch: 2, batch: 2434, sum loss: 4024.475098, avg loss: 2.304969, ppl: 10.023864 +epoch: 2, batch: 2435, sum loss: 3355.508057, avg loss: 2.339964, ppl: 10.380859 +epoch: 2, batch: 2436, sum loss: 3912.942383, avg loss: 2.501881, ppl: 12.205435 +epoch: 2, batch: 2437, sum loss: 5136.288086, avg loss: 2.918345, ppl: 18.510635 +epoch: 2, batch: 2438, sum loss: 4817.719727, avg loss: 2.711154, ppl: 15.046622 +epoch: 2, batch: 2439, sum loss: 5316.599609, avg loss: 2.811528, ppl: 16.635321 +epoch: 2, batch: 2440, sum loss: 4249.694824, avg loss: 2.570898, ppl: 13.077567 +epoch: 2, batch: 2441, sum loss: 3805.566650, avg loss: 2.346219, ppl: 10.445994 +epoch: 2, batch: 2442, sum loss: 4247.153809, avg loss: 2.738333, ppl: 15.461183 +epoch: 2, batch: 2443, sum loss: 4477.458984, avg loss: 2.509787, ppl: 12.302304 +epoch: 2, batch: 2444, sum loss: 3849.329590, avg loss: 2.562803, ppl: 12.972121 +epoch: 2, batch: 2445, sum loss: 5188.655273, avg loss: 2.580137, ppl: 13.198947 +epoch: 2, batch: 2446, sum loss: 4481.346191, avg loss: 2.739209, ppl: 15.474743 +epoch: 2, batch: 2447, sum loss: 3998.374023, avg loss: 2.508390, ppl: 12.285137 +epoch: 2, batch: 2448, sum loss: 4372.859375, avg loss: 2.543839, ppl: 12.728444 +epoch: 2, batch: 2449, sum loss: 4390.648926, avg loss: 2.646564, ppl: 14.105482 +epoch: 2, batch: 2450, sum loss: 3424.060547, avg loss: 2.130716, ppl: 8.420895 +epoch: 2, batch: 2451, sum loss: 4038.050537, avg loss: 2.601837, ppl: 13.488493 +epoch: 2, batch: 2452, sum loss: 4229.746582, avg loss: 2.555738, ppl: 12.880805 +epoch: 2, batch: 2453, sum loss: 4358.863281, avg loss: 2.468212, ppl: 11.801332 +epoch: 2, batch: 2454, sum loss: 4892.169922, avg loss: 2.762377, ppl: 15.837444 +epoch: 2, batch: 2455, sum loss: 4732.725098, avg loss: 2.548586, ppl: 12.789012 +epoch: 2, batch: 2456, sum loss: 5453.849121, avg loss: 2.750302, ppl: 15.647358 +epoch: 2, batch: 2457, sum loss: 4027.011230, avg loss: 2.455495, ppl: 11.652196 +epoch: 2, batch: 2458, sum loss: 3203.903076, avg loss: 2.197464, ppl: 9.002159 +epoch: 2, batch: 2459, sum loss: 4669.052734, avg loss: 2.474326, ppl: 11.873697 +epoch: 2, batch: 2460, sum loss: 4051.530273, avg loss: 2.355541, ppl: 10.543829 +epoch: 2, batch: 2461, sum loss: 4030.834229, avg loss: 2.491245, ppl: 12.076300 +epoch: 2, batch: 2462, sum loss: 3629.977783, avg loss: 2.467694, ppl: 11.795217 +epoch: 2, batch: 2463, sum loss: 4738.406738, avg loss: 2.753287, ppl: 15.694131 +epoch: 2, batch: 2464, sum loss: 5703.879883, avg loss: 2.825101, ppl: 16.862654 +epoch: 2, batch: 2465, sum loss: 3985.182129, avg loss: 2.437420, ppl: 11.443480 +epoch: 2, batch: 2466, sum loss: 4067.402344, avg loss: 2.554901, ppl: 12.870024 +epoch: 2, batch: 2467, sum loss: 3815.830078, avg loss: 2.635242, ppl: 13.946687 +epoch: 2, batch: 2468, sum loss: 4572.574219, avg loss: 2.557368, ppl: 12.901815 +epoch: 2, batch: 2469, sum loss: 4442.433105, avg loss: 2.501370, ppl: 12.199195 +epoch: 2, batch: 2470, sum loss: 5064.284668, avg loss: 2.841911, ppl: 17.148499 +epoch: 2, batch: 2471, sum loss: 3469.007568, avg loss: 2.502892, ppl: 12.217772 +epoch: 2, batch: 2472, sum loss: 3988.129395, avg loss: 2.372474, ppl: 10.723892 +epoch: 2, batch: 2473, sum loss: 4254.531250, avg loss: 2.463539, ppl: 11.746305 +epoch: 2, batch: 2474, sum loss: 5387.315430, avg loss: 2.704476, ppl: 14.946481 +epoch: 2, batch: 2475, sum loss: 4314.535645, avg loss: 2.427989, ppl: 11.336057 +epoch: 2, batch: 2476, sum loss: 4667.476074, avg loss: 2.664085, ppl: 14.354804 +epoch: 2, batch: 2477, sum loss: 4179.979004, avg loss: 2.486603, ppl: 12.020368 +epoch: 2, batch: 2478, sum loss: 4530.490234, avg loss: 2.682351, ppl: 14.619418 +epoch: 2, batch: 2479, sum loss: 4209.958008, avg loss: 2.325944, ppl: 10.236336 +epoch: 2, batch: 2480, sum loss: 4547.448242, avg loss: 2.786427, ppl: 16.222948 +epoch: 2, batch: 2481, sum loss: 4298.877441, avg loss: 2.533222, ppl: 12.594018 +epoch: 2, batch: 2482, sum loss: 4196.395508, avg loss: 2.521872, ppl: 12.451888 +epoch: 2, batch: 2483, sum loss: 3438.059814, avg loss: 2.191243, ppl: 8.946323 +epoch: 2, batch: 2484, sum loss: 5592.695312, avg loss: 2.863643, ppl: 17.525259 +epoch: 2, batch: 2485, sum loss: 3645.437500, avg loss: 2.327866, ppl: 10.256027 +epoch: 2, batch: 2486, sum loss: 4362.474121, avg loss: 2.634344, ppl: 13.934174 +epoch: 2, batch: 2487, sum loss: 4377.783691, avg loss: 2.358720, ppl: 10.577399 +epoch: 2, batch: 2488, sum loss: 3879.241455, avg loss: 2.458328, ppl: 11.685258 +epoch: 2, batch: 2489, sum loss: 4292.091309, avg loss: 2.593409, ppl: 13.375288 +epoch: 2, batch: 2490, sum loss: 4182.801758, avg loss: 2.535031, ppl: 12.616826 +epoch: 2, batch: 2491, sum loss: 5472.649902, avg loss: 2.875801, ppl: 17.739634 +epoch: 2, batch: 2492, sum loss: 4810.379395, avg loss: 2.369645, ppl: 10.693597 +epoch: 2, batch: 2493, sum loss: 4400.958008, avg loss: 2.540969, ppl: 12.691962 +epoch: 2, batch: 2494, sum loss: 3694.901367, avg loss: 2.481465, ppl: 11.958770 +epoch: 2, batch: 2495, sum loss: 4327.278809, avg loss: 2.676116, ppl: 14.528551 +epoch: 2, batch: 2496, sum loss: 4847.454102, avg loss: 2.749548, ppl: 15.635570 +epoch: 2, batch: 2497, sum loss: 4443.203613, avg loss: 2.500396, ppl: 12.187320 +epoch: 2, batch: 2498, sum loss: 4222.965820, avg loss: 2.603555, ppl: 13.511683 +epoch: 2, batch: 2499, sum loss: 4174.483398, avg loss: 2.591238, ppl: 13.346278 +epoch: 2, batch: 2500, sum loss: 5055.265137, avg loss: 2.720810, ppl: 15.192622 +epoch: 2, batch: 2501, sum loss: 4568.437988, avg loss: 2.618016, ppl: 13.708499 +epoch: 2, batch: 2502, sum loss: 5034.694336, avg loss: 2.763279, ppl: 15.851735 +epoch: 2, batch: 2503, sum loss: 4377.581055, avg loss: 2.407910, ppl: 11.110720 +epoch: 2, batch: 2504, sum loss: 4528.234863, avg loss: 2.703424, ppl: 14.930767 +epoch: 2, batch: 2505, sum loss: 4874.712402, avg loss: 2.615189, ppl: 13.669801 +epoch: 2, batch: 2506, sum loss: 4466.504883, avg loss: 2.716852, ppl: 15.132612 +epoch: 2, batch: 2507, sum loss: 4061.405273, avg loss: 2.507041, ppl: 12.268567 +epoch: 2, batch: 2508, sum loss: 3771.206787, avg loss: 2.489245, ppl: 12.052178 +epoch: 2, batch: 2509, sum loss: 3896.655518, avg loss: 2.522107, ppl: 12.454816 +epoch: 2, batch: 2510, sum loss: 3957.700684, avg loss: 2.657959, ppl: 14.267137 +epoch: 2, batch: 2511, sum loss: 4631.199707, avg loss: 2.735499, ppl: 15.417437 +epoch: 2, batch: 2512, sum loss: 4356.967773, avg loss: 2.547934, ppl: 12.780675 +epoch: 2, batch: 2513, sum loss: 3957.261963, avg loss: 2.304754, ppl: 10.021708 +epoch: 2, batch: 2514, sum loss: 4457.367188, avg loss: 2.609700, ppl: 13.594968 +epoch: 2, batch: 2515, sum loss: 4994.658691, avg loss: 2.850833, ppl: 17.302183 +epoch: 2, batch: 2516, sum loss: 4246.479492, avg loss: 2.595648, ppl: 13.405269 +epoch: 2, batch: 2517, sum loss: 3655.579590, avg loss: 2.344823, ppl: 10.431430 +epoch: 2, batch: 2518, sum loss: 5287.259766, avg loss: 2.725392, ppl: 15.262390 +epoch: 2, batch: 2519, sum loss: 4637.740723, avg loss: 2.677679, ppl: 14.551285 +epoch: 2, batch: 2520, sum loss: 4717.959961, avg loss: 2.786746, ppl: 16.228121 +epoch: 2, batch: 2521, sum loss: 5320.917969, avg loss: 2.830276, ppl: 16.950130 +epoch: 2, batch: 2522, sum loss: 4975.686523, avg loss: 2.785939, ppl: 16.215033 +epoch: 2, batch: 2523, sum loss: 3866.542236, avg loss: 2.591516, ppl: 13.349998 +epoch: 2, batch: 2524, sum loss: 4660.770996, avg loss: 2.695646, ppl: 14.815080 +epoch: 2, batch: 2525, sum loss: 4020.199463, avg loss: 2.388710, ppl: 10.899427 +epoch: 2, batch: 2526, sum loss: 4139.104004, avg loss: 2.673840, ppl: 14.495522 +epoch: 2, batch: 2527, sum loss: 5054.070801, avg loss: 2.602508, ppl: 13.497552 +epoch: 2, batch: 2528, sum loss: 3451.265137, avg loss: 2.254256, ppl: 9.528197 +epoch: 2, batch: 2529, sum loss: 3960.416504, avg loss: 2.576719, ppl: 13.153907 +epoch: 2, batch: 2530, sum loss: 4174.371582, avg loss: 2.605725, ppl: 13.541039 +epoch: 2, batch: 2531, sum loss: 4019.567139, avg loss: 2.417058, ppl: 11.212820 +epoch: 2, batch: 2532, sum loss: 4132.804688, avg loss: 2.622338, ppl: 13.767876 +epoch: 2, batch: 2533, sum loss: 4544.127930, avg loss: 2.632751, ppl: 13.911985 +epoch: 2, batch: 2534, sum loss: 4050.873779, avg loss: 2.506729, ppl: 12.264745 +epoch: 2, batch: 2535, sum loss: 4814.433105, avg loss: 2.617963, ppl: 13.707767 +epoch: 2, batch: 2536, sum loss: 3557.928223, avg loss: 2.227882, ppl: 9.280193 +epoch: 2, batch: 2537, sum loss: 4469.157715, avg loss: 2.758739, ppl: 15.779940 +epoch: 2, batch: 2538, sum loss: 4509.591309, avg loss: 2.724829, ppl: 15.253801 +epoch: 2, batch: 2539, sum loss: 4344.271973, avg loss: 2.429682, ppl: 11.355273 +epoch: 2, batch: 2540, sum loss: 4544.583496, avg loss: 2.636070, ppl: 13.958233 +epoch: 2, batch: 2541, sum loss: 4118.610840, avg loss: 2.428426, ppl: 11.341021 +epoch: 2, batch: 2542, sum loss: 5136.930664, avg loss: 2.656117, ppl: 14.240887 +epoch: 2, batch: 2543, sum loss: 4837.825684, avg loss: 2.509246, ppl: 12.295654 +epoch: 2, batch: 2544, sum loss: 4194.579590, avg loss: 2.433051, ppl: 11.393589 +epoch: 2, batch: 2545, sum loss: 4421.392578, avg loss: 2.709187, ppl: 15.017058 +epoch: 2, batch: 2546, sum loss: 3840.464355, avg loss: 2.334629, ppl: 10.325624 +epoch: 2, batch: 2547, sum loss: 3977.258789, avg loss: 2.459653, ppl: 11.700752 +epoch: 2, batch: 2548, sum loss: 4919.678711, avg loss: 2.759214, ppl: 15.787432 +epoch: 2, batch: 2549, sum loss: 4957.223145, avg loss: 2.650922, ppl: 14.167089 +epoch: 2, batch: 2550, sum loss: 4390.292969, avg loss: 2.486010, ppl: 12.013243 +epoch: 2, batch: 2551, sum loss: 4170.396484, avg loss: 2.552262, ppl: 12.836110 +epoch: 2, batch: 2552, sum loss: 3810.022949, avg loss: 2.415994, ppl: 11.200903 +epoch: 2, batch: 2553, sum loss: 4909.946289, avg loss: 2.804081, ppl: 16.511902 +epoch: 2, batch: 2554, sum loss: 4812.590820, avg loss: 2.634149, ppl: 13.931457 +epoch: 2, batch: 2555, sum loss: 5163.714355, avg loss: 2.663081, ppl: 14.340406 +epoch: 2, batch: 2556, sum loss: 3800.399414, avg loss: 2.328676, ppl: 10.264345 +epoch: 2, batch: 2557, sum loss: 4372.824219, avg loss: 2.663109, ppl: 14.340799 +epoch: 2, batch: 2558, sum loss: 4449.208008, avg loss: 2.631111, ppl: 13.889188 +epoch: 2, batch: 2559, sum loss: 4882.615234, avg loss: 2.752320, ppl: 15.678958 +epoch: 2, batch: 2560, sum loss: 3572.015625, avg loss: 2.354658, ppl: 10.534522 +epoch: 2, batch: 2561, sum loss: 4587.481934, avg loss: 2.654793, ppl: 14.222038 +epoch: 2, batch: 2562, sum loss: 5690.231445, avg loss: 2.754226, ppl: 15.708880 +epoch: 2, batch: 2563, sum loss: 3110.588867, avg loss: 2.149681, ppl: 8.582123 +epoch: 2, batch: 2564, sum loss: 4044.974609, avg loss: 2.828654, ppl: 16.922661 +epoch: 2, batch: 2565, sum loss: 3627.729980, avg loss: 2.486450, ppl: 12.018531 +epoch: 2, batch: 2566, sum loss: 4056.714111, avg loss: 2.693701, ppl: 14.786303 +epoch: 2, batch: 2567, sum loss: 4541.607910, avg loss: 2.662138, ppl: 14.326890 +epoch: 2, batch: 2568, sum loss: 5912.970215, avg loss: 2.878759, ppl: 17.792173 +epoch: 2, batch: 2569, sum loss: 3860.462158, avg loss: 2.585708, ppl: 13.272685 +epoch: 2, batch: 2570, sum loss: 4936.818848, avg loss: 2.890409, ppl: 18.000669 +epoch: 2, batch: 2571, sum loss: 4724.103516, avg loss: 2.584302, ppl: 13.254030 +epoch: 2, batch: 2572, sum loss: 3939.334229, avg loss: 2.286323, ppl: 9.838693 +epoch: 2, batch: 2573, sum loss: 4436.205078, avg loss: 2.464558, ppl: 11.758288 +epoch: 2, batch: 2574, sum loss: 4135.714355, avg loss: 2.503459, ppl: 12.224709 +epoch: 2, batch: 2575, sum loss: 4675.145020, avg loss: 2.629441, ppl: 13.866011 +epoch: 2, batch: 2576, sum loss: 4186.121094, avg loss: 2.456644, ppl: 11.665594 +epoch: 2, batch: 2577, sum loss: 4315.589844, avg loss: 2.692196, ppl: 14.764057 +epoch: 2, batch: 2578, sum loss: 4609.530762, avg loss: 2.659856, ppl: 14.294231 +epoch: 2, batch: 2579, sum loss: 4486.708008, avg loss: 2.577087, ppl: 13.158747 +epoch: 2, batch: 2580, sum loss: 4972.766113, avg loss: 2.720332, ppl: 15.185358 +epoch: 2, batch: 2581, sum loss: 3336.348145, avg loss: 2.366205, ppl: 10.656867 +epoch: 2, batch: 2582, sum loss: 3689.726562, avg loss: 2.317667, ppl: 10.151967 +epoch: 2, batch: 2583, sum loss: 4596.866211, avg loss: 2.657148, ppl: 14.255576 +epoch: 2, batch: 2584, sum loss: 4793.828125, avg loss: 2.788731, ppl: 16.260370 +epoch: 2, batch: 2585, sum loss: 4781.423828, avg loss: 2.648988, ppl: 14.139726 +epoch: 2, batch: 2586, sum loss: 4180.367188, avg loss: 2.550560, ppl: 12.814272 +epoch: 2, batch: 2587, sum loss: 3861.036377, avg loss: 2.390735, ppl: 10.921515 +epoch: 2, batch: 2588, sum loss: 4309.469727, avg loss: 2.536474, ppl: 12.635047 +epoch: 2, batch: 2589, sum loss: 3659.966309, avg loss: 2.271860, ppl: 9.697422 +epoch: 2, batch: 2590, sum loss: 4404.887695, avg loss: 2.626647, ppl: 13.827335 +epoch: 2, batch: 2591, sum loss: 3948.130371, avg loss: 2.408865, ppl: 11.121334 +epoch: 2, batch: 2592, sum loss: 3226.944580, avg loss: 2.120200, ppl: 8.332806 +epoch: 2, batch: 2593, sum loss: 4275.126953, avg loss: 2.556894, ppl: 12.895705 +epoch: 2, batch: 2594, sum loss: 3935.397461, avg loss: 2.420294, ppl: 11.249161 +epoch: 2, batch: 2595, sum loss: 4711.285645, avg loss: 2.781160, ppl: 16.137735 +epoch: 2, batch: 2596, sum loss: 4327.164062, avg loss: 2.565006, ppl: 13.000731 +epoch: 2, batch: 2597, sum loss: 4700.132324, avg loss: 2.592461, ppl: 13.362621 +epoch: 2, batch: 2598, sum loss: 5380.925781, avg loss: 2.811351, ppl: 16.632370 +epoch: 2, batch: 2599, sum loss: 4413.713867, avg loss: 2.563132, ppl: 12.976399 +epoch: 2, batch: 2600, sum loss: 4166.879883, avg loss: 2.496633, ppl: 12.141542 +epoch: 2, batch: 2601, sum loss: 3941.042480, avg loss: 2.589384, ppl: 13.321564 +epoch: 2, batch: 2602, sum loss: 3076.709717, avg loss: 2.218248, ppl: 9.191211 +epoch: 2, batch: 2603, sum loss: 4750.410645, avg loss: 2.667272, ppl: 14.400625 +epoch: 2, batch: 2604, sum loss: 3681.952148, avg loss: 2.245093, ppl: 9.441293 +epoch: 2, batch: 2605, sum loss: 4095.627441, avg loss: 2.623720, ppl: 13.786922 +epoch: 2, batch: 2606, sum loss: 4182.344727, avg loss: 2.480632, ppl: 11.948809 +epoch: 2, batch: 2607, sum loss: 4156.609375, avg loss: 2.578542, ppl: 13.177908 +epoch: 2, batch: 2608, sum loss: 4426.259766, avg loss: 2.846469, ppl: 17.226852 +epoch: 2, batch: 2609, sum loss: 3875.372070, avg loss: 2.422107, ppl: 11.269585 +epoch: 2, batch: 2610, sum loss: 3992.458984, avg loss: 2.544588, ppl: 12.737983 +epoch: 2, batch: 2611, sum loss: 4660.346191, avg loss: 2.605001, ppl: 13.531236 +epoch: 2, batch: 2612, sum loss: 5071.256348, avg loss: 2.673303, ppl: 14.487748 +epoch: 2, batch: 2613, sum loss: 4329.900391, avg loss: 2.672778, ppl: 14.480141 +epoch: 2, batch: 2614, sum loss: 5257.126953, avg loss: 2.930394, ppl: 18.735014 +epoch: 2, batch: 2615, sum loss: 4727.491699, avg loss: 2.716949, ppl: 15.134081 +epoch: 2, batch: 2616, sum loss: 3998.663574, avg loss: 2.635902, ppl: 13.955897 +epoch: 2, batch: 2617, sum loss: 3580.720459, avg loss: 2.442511, ppl: 11.501882 +epoch: 2, batch: 2618, sum loss: 4810.913574, avg loss: 2.733474, ppl: 15.386239 +epoch: 2, batch: 2619, sum loss: 4239.420410, avg loss: 2.560036, ppl: 12.936289 +epoch: 2, batch: 2620, sum loss: 4976.518066, avg loss: 2.851873, ppl: 17.320187 +epoch: 2, batch: 2621, sum loss: 3985.698730, avg loss: 2.383791, ppl: 10.845944 +epoch: 2, batch: 2622, sum loss: 5203.341309, avg loss: 2.725690, ppl: 15.266939 +epoch: 2, batch: 2623, sum loss: 4786.639648, avg loss: 2.612795, ppl: 13.637109 +epoch: 2, batch: 2624, sum loss: 3837.900146, avg loss: 2.556895, ppl: 12.895720 +epoch: 2, batch: 2625, sum loss: 4310.263184, avg loss: 2.485734, ppl: 12.009935 +epoch: 2, batch: 2626, sum loss: 4073.291992, avg loss: 2.565045, ppl: 13.001248 +epoch: 2, batch: 2627, sum loss: 3838.533203, avg loss: 2.295774, ppl: 9.932116 +epoch: 2, batch: 2628, sum loss: 4793.641602, avg loss: 2.750225, ppl: 15.646146 +epoch: 2, batch: 2629, sum loss: 3515.443604, avg loss: 2.625425, ppl: 13.810437 +epoch: 2, batch: 2630, sum loss: 4651.738281, avg loss: 2.698224, ppl: 14.853333 +epoch: 2, batch: 2631, sum loss: 4171.083984, avg loss: 2.538700, ppl: 12.663197 +epoch: 2, batch: 2632, sum loss: 4546.202148, avg loss: 2.758618, ppl: 15.778021 +epoch: 2, batch: 2633, sum loss: 4962.409668, avg loss: 2.756894, ppl: 15.750851 +epoch: 2, batch: 2634, sum loss: 4265.278320, avg loss: 2.699543, ppl: 14.872937 +epoch: 2, batch: 2635, sum loss: 4588.527344, avg loss: 2.734522, ppl: 15.402373 +epoch: 2, batch: 2636, sum loss: 5073.293945, avg loss: 2.695693, ppl: 14.815780 +epoch: 2, batch: 2637, sum loss: 3774.791016, avg loss: 2.468797, ppl: 11.808238 +epoch: 2, batch: 2638, sum loss: 4398.402344, avg loss: 2.588819, ppl: 13.314033 +epoch: 2, batch: 2639, sum loss: 4163.491211, avg loss: 2.813170, ppl: 16.662651 +epoch: 2, batch: 2640, sum loss: 3578.084717, avg loss: 2.257467, ppl: 9.558842 +epoch: 2, batch: 2641, sum loss: 4738.518066, avg loss: 2.683192, ppl: 14.631730 +epoch: 2, batch: 2642, sum loss: 3492.914307, avg loss: 2.269600, ppl: 9.675529 +epoch: 2, batch: 2643, sum loss: 4210.546875, avg loss: 2.435250, ppl: 11.418671 +epoch: 2, batch: 2644, sum loss: 4623.029297, avg loss: 2.684686, ppl: 14.653599 +epoch: 2, batch: 2645, sum loss: 4361.913574, avg loss: 2.871569, ppl: 17.664715 +epoch: 2, batch: 2646, sum loss: 3782.553467, avg loss: 2.378964, ppl: 10.793719 +epoch: 2, batch: 2647, sum loss: 3727.603027, avg loss: 2.493380, ppl: 12.102110 +epoch: 2, batch: 2648, sum loss: 4181.715332, avg loss: 2.349278, ppl: 10.478004 +epoch: 2, batch: 2649, sum loss: 4109.459961, avg loss: 2.675430, ppl: 14.518589 +epoch: 2, batch: 2650, sum loss: 3896.112305, avg loss: 2.330211, ppl: 10.280109 +epoch: 2, batch: 2651, sum loss: 3738.638672, avg loss: 2.438773, ppl: 11.458971 +epoch: 2, batch: 2652, sum loss: 4431.605469, avg loss: 2.677707, ppl: 14.551694 +epoch: 2, batch: 2653, sum loss: 3979.204834, avg loss: 2.473092, ppl: 11.859056 +epoch: 2, batch: 2654, sum loss: 4264.791992, avg loss: 2.480973, ppl: 11.952887 +epoch: 2, batch: 2655, sum loss: 4809.516602, avg loss: 2.581598, ppl: 13.218238 +epoch: 2, batch: 2656, sum loss: 3761.987061, avg loss: 2.349773, ppl: 10.483194 +epoch: 2, batch: 2657, sum loss: 4267.894531, avg loss: 2.641024, ppl: 14.027558 +epoch: 2, batch: 2658, sum loss: 4275.115234, avg loss: 2.549264, ppl: 12.797678 +epoch: 2, batch: 2659, sum loss: 3776.006836, avg loss: 2.358530, ppl: 10.575397 +epoch: 2, batch: 2660, sum loss: 4444.888672, avg loss: 2.645767, ppl: 14.094254 +epoch: 2, batch: 2661, sum loss: 3536.020020, avg loss: 2.352641, ppl: 10.513303 +epoch: 2, batch: 2662, sum loss: 4497.291504, avg loss: 2.762464, ppl: 15.838822 +epoch: 2, batch: 2663, sum loss: 4639.761230, avg loss: 2.753567, ppl: 15.698536 +epoch: 2, batch: 2664, sum loss: 4931.130859, avg loss: 2.588520, ppl: 13.310059 +epoch: 2, batch: 2665, sum loss: 3747.894287, avg loss: 2.549588, ppl: 12.801827 +epoch: 2, batch: 2666, sum loss: 4393.647461, avg loss: 2.805650, ppl: 16.537819 +epoch: 2, batch: 2667, sum loss: 4449.075684, avg loss: 2.473083, ppl: 11.858946 +epoch: 2, batch: 2668, sum loss: 5130.791992, avg loss: 2.713269, ppl: 15.078490 +epoch: 2, batch: 2669, sum loss: 4854.987793, avg loss: 2.677875, ppl: 14.554137 +epoch: 2, batch: 2670, sum loss: 3953.260010, avg loss: 2.478533, ppl: 11.923759 +epoch: 2, batch: 2671, sum loss: 4271.672852, avg loss: 2.375791, ppl: 10.759524 +epoch: 2, batch: 2672, sum loss: 4697.050781, avg loss: 2.919236, ppl: 18.527126 +epoch: 2, batch: 2673, sum loss: 3570.598633, avg loss: 2.396375, ppl: 10.983289 +epoch: 2, batch: 2674, sum loss: 4903.579102, avg loss: 2.751728, ppl: 15.669686 +epoch: 2, batch: 2675, sum loss: 4843.903320, avg loss: 2.639729, ppl: 14.009414 +epoch: 2, batch: 2676, sum loss: 4305.832520, avg loss: 2.643237, ppl: 14.058633 +epoch: 2, batch: 2677, sum loss: 4222.601074, avg loss: 2.563813, ppl: 12.985242 +epoch: 2, batch: 2678, sum loss: 3724.915527, avg loss: 2.296495, ppl: 9.939288 +epoch: 2, batch: 2679, sum loss: 5016.112305, avg loss: 2.717287, ppl: 15.139198 +epoch: 2, batch: 2680, sum loss: 4416.815918, avg loss: 2.762236, ppl: 15.835217 +epoch: 2, batch: 2681, sum loss: 3787.038574, avg loss: 2.550194, ppl: 12.809592 +epoch: 2, batch: 2682, sum loss: 4053.795410, avg loss: 2.446467, ppl: 11.547474 +epoch: 2, batch: 2683, sum loss: 2818.744873, avg loss: 2.009084, ppl: 7.456484 +epoch: 2, batch: 2684, sum loss: 4150.581055, avg loss: 2.473529, ppl: 11.864238 +epoch: 2, batch: 2685, sum loss: 4290.635254, avg loss: 2.605121, ppl: 13.532867 +epoch: 2, batch: 2686, sum loss: 5653.475098, avg loss: 2.900706, ppl: 18.186974 +epoch: 2, batch: 2687, sum loss: 3078.796631, avg loss: 2.168167, ppl: 8.742242 +epoch: 2, batch: 2688, sum loss: 4251.119629, avg loss: 2.460139, ppl: 11.706436 +epoch: 2, batch: 2689, sum loss: 3684.072510, avg loss: 2.337610, ppl: 10.356450 +epoch: 2, batch: 2690, sum loss: 3763.024170, avg loss: 2.503675, ppl: 12.227353 +epoch: 2, batch: 2691, sum loss: 4553.944336, avg loss: 2.821527, ppl: 16.802488 +epoch: 2, batch: 2692, sum loss: 3464.940186, avg loss: 2.386322, ppl: 10.873433 +epoch: 2, batch: 2693, sum loss: 3736.839111, avg loss: 2.248399, ppl: 9.472558 +epoch: 2, batch: 2694, sum loss: 4447.716797, avg loss: 2.545917, ppl: 12.754916 +epoch: 2, batch: 2695, sum loss: 4736.463379, avg loss: 2.930980, ppl: 18.745987 +epoch: 2, batch: 2696, sum loss: 3923.260010, avg loss: 2.409865, ppl: 11.132457 +epoch: 2, batch: 2697, sum loss: 4931.649902, avg loss: 2.789395, ppl: 16.271170 +epoch: 2, batch: 2698, sum loss: 4737.892578, avg loss: 2.788636, ppl: 16.258831 +epoch: 2, batch: 2699, sum loss: 4469.053711, avg loss: 2.610429, ppl: 13.604880 +epoch: 2, batch: 2700, sum loss: 4115.978027, avg loss: 2.738508, ppl: 15.463900 +epoch: 2, batch: 2701, sum loss: 4430.768066, avg loss: 2.777911, ppl: 16.085382 +epoch: 2, batch: 2702, sum loss: 4184.939941, avg loss: 2.483644, ppl: 11.984855 +epoch: 2, batch: 2703, sum loss: 3567.196777, avg loss: 2.538930, ppl: 12.666113 +epoch: 2, batch: 2704, sum loss: 4233.007324, avg loss: 2.543875, ppl: 12.728897 +epoch: 2, batch: 2705, sum loss: 3556.016113, avg loss: 2.415772, ppl: 11.198412 +epoch: 2, batch: 2706, sum loss: 4285.127930, avg loss: 2.656620, ppl: 14.248050 +epoch: 2, batch: 2707, sum loss: 3907.219238, avg loss: 2.358008, ppl: 10.569875 +epoch: 2, batch: 2708, sum loss: 3868.741211, avg loss: 2.427065, ppl: 11.325589 +epoch: 2, batch: 2709, sum loss: 5471.418945, avg loss: 3.053247, ppl: 21.184021 +epoch: 2, batch: 2710, sum loss: 4569.601562, avg loss: 2.644445, ppl: 14.075637 +epoch: 2, batch: 2711, sum loss: 5233.367676, avg loss: 2.652493, ppl: 14.189362 +epoch: 2, batch: 2712, sum loss: 3491.444580, avg loss: 2.253999, ppl: 9.525753 +epoch: 2, batch: 2713, sum loss: 4651.678223, avg loss: 2.625101, ppl: 13.805964 +epoch: 2, batch: 2714, sum loss: 5946.846680, avg loss: 2.929481, ppl: 18.717918 +epoch: 2, batch: 2715, sum loss: 4941.329590, avg loss: 2.825231, ppl: 16.864845 +epoch: 2, batch: 2716, sum loss: 5270.846680, avg loss: 2.626232, ppl: 13.821588 +epoch: 2, batch: 2717, sum loss: 4033.040527, avg loss: 2.447233, ppl: 11.556331 +epoch: 2, batch: 2718, sum loss: 4196.555176, avg loss: 2.255000, ppl: 9.535295 +epoch: 2, batch: 2719, sum loss: 4591.846680, avg loss: 2.655782, ppl: 14.236111 +epoch: 2, batch: 2720, sum loss: 4689.605469, avg loss: 2.446325, ppl: 11.545841 +epoch: 2, batch: 2721, sum loss: 4315.260742, avg loss: 2.700413, ppl: 14.885885 +epoch: 2, batch: 2722, sum loss: 3615.553955, avg loss: 2.560591, ppl: 12.943459 +epoch: 2, batch: 2723, sum loss: 4869.453613, avg loss: 2.737186, ppl: 15.443461 +epoch: 2, batch: 2724, sum loss: 3605.210693, avg loss: 2.401873, ppl: 11.043839 +epoch: 2, batch: 2725, sum loss: 3730.942627, avg loss: 2.498957, ppl: 12.169793 +epoch: 2, batch: 2726, sum loss: 4320.569824, avg loss: 2.550514, ppl: 12.813682 +epoch: 2, batch: 2727, sum loss: 4139.104492, avg loss: 2.583711, ppl: 13.246200 +epoch: 2, batch: 2728, sum loss: 4165.255859, avg loss: 2.411845, ppl: 11.154519 +epoch: 2, batch: 2729, sum loss: 4658.712891, avg loss: 2.708554, ppl: 15.007560 +epoch: 2, batch: 2730, sum loss: 4800.735840, avg loss: 2.703117, ppl: 14.926187 +epoch: 2, batch: 2731, sum loss: 3622.099121, avg loss: 2.643868, ppl: 14.067512 +epoch: 2, batch: 2732, sum loss: 5116.410156, avg loss: 2.702805, ppl: 14.921529 +epoch: 2, batch: 2733, sum loss: 4514.890625, avg loss: 2.562367, ppl: 12.966475 +epoch: 2, batch: 2734, sum loss: 6360.123535, avg loss: 2.898871, ppl: 18.153641 +epoch: 2, batch: 2735, sum loss: 4805.803223, avg loss: 2.686307, ppl: 14.677371 +epoch: 2, batch: 2736, sum loss: 4166.747559, avg loss: 2.438120, ppl: 11.451496 +epoch: 2, batch: 2737, sum loss: 3557.049561, avg loss: 2.371366, ppl: 10.712018 +epoch: 2, batch: 2738, sum loss: 4638.677246, avg loss: 2.611868, ppl: 13.624476 +epoch: 2, batch: 2739, sum loss: 4214.839844, avg loss: 2.489569, ppl: 12.056075 +epoch: 2, batch: 2740, sum loss: 4583.459961, avg loss: 2.705702, ppl: 14.964823 +epoch: 2, batch: 2741, sum loss: 3790.999268, avg loss: 2.396333, ppl: 10.982831 +epoch: 2, batch: 2742, sum loss: 4596.657227, avg loss: 2.772411, ppl: 15.997154 +epoch: 2, batch: 2743, sum loss: 4162.773438, avg loss: 2.685660, ppl: 14.667884 +epoch: 2, batch: 2744, sum loss: 5418.949707, avg loss: 2.643390, ppl: 14.060791 +epoch: 2, batch: 2745, sum loss: 3728.067383, avg loss: 2.532655, ppl: 12.586874 +epoch: 2, batch: 2746, sum loss: 5139.448242, avg loss: 2.781087, ppl: 16.136547 +epoch: 2, batch: 2747, sum loss: 4480.835938, avg loss: 2.606653, ppl: 13.553607 +epoch: 2, batch: 2748, sum loss: 4124.663086, avg loss: 2.513506, ppl: 12.348144 +epoch: 2, batch: 2749, sum loss: 4240.573730, avg loss: 2.537746, ppl: 12.651126 +epoch: 2, batch: 2750, sum loss: 4115.882812, avg loss: 2.535972, ppl: 12.628701 +epoch: 2, batch: 2751, sum loss: 3765.842773, avg loss: 2.371438, ppl: 10.712781 +epoch: 2, batch: 2752, sum loss: 4430.430664, avg loss: 2.830946, ppl: 16.961498 +epoch: 2, batch: 2753, sum loss: 5772.983398, avg loss: 3.077283, ppl: 21.699373 +epoch: 2, batch: 2754, sum loss: 5061.958984, avg loss: 2.679703, ppl: 14.580762 +epoch: 2, batch: 2755, sum loss: 4402.732910, avg loss: 2.531761, ppl: 12.575638 +epoch: 2, batch: 2756, sum loss: 3149.968506, avg loss: 2.144294, ppl: 8.536015 +epoch: 2, batch: 2757, sum loss: 4307.178711, avg loss: 2.535126, ppl: 12.618020 +epoch: 2, batch: 2758, sum loss: 4701.200684, avg loss: 2.638160, ppl: 13.987443 +epoch: 2, batch: 2759, sum loss: 4524.263672, avg loss: 2.746972, ppl: 15.595343 +epoch: 2, batch: 2760, sum loss: 5038.921387, avg loss: 2.874456, ppl: 17.715788 +epoch: 2, batch: 2761, sum loss: 4165.452148, avg loss: 2.585632, ppl: 13.271669 +epoch: 2, batch: 2762, sum loss: 4425.052734, avg loss: 2.711429, ppl: 15.050773 +epoch: 2, batch: 2763, sum loss: 5007.016602, avg loss: 2.760208, ppl: 15.803124 +epoch: 2, batch: 2764, sum loss: 3781.274902, avg loss: 2.254785, ppl: 9.533246 +epoch: 2, batch: 2765, sum loss: 4238.976074, avg loss: 2.535273, ppl: 12.619874 +epoch: 2, batch: 2766, sum loss: 3230.848389, avg loss: 2.253032, ppl: 9.516551 +epoch: 2, batch: 2767, sum loss: 4203.015625, avg loss: 2.558135, ppl: 12.911712 +epoch: 2, batch: 2768, sum loss: 4757.758301, avg loss: 2.602712, ppl: 13.500306 +epoch: 2, batch: 2769, sum loss: 4699.751465, avg loss: 2.616788, ppl: 13.691677 +epoch: 2, batch: 2770, sum loss: 3982.789062, avg loss: 2.325037, ppl: 10.227061 +epoch: 2, batch: 2771, sum loss: 3892.230225, avg loss: 2.289547, ppl: 9.870470 +epoch: 2, batch: 2772, sum loss: 3692.742920, avg loss: 2.355066, ppl: 10.538820 +epoch: 2, batch: 2773, sum loss: 3936.061523, avg loss: 2.323531, ppl: 10.211667 +epoch: 2, batch: 2774, sum loss: 3788.995117, avg loss: 2.425733, ppl: 11.310518 +epoch: 2, batch: 2775, sum loss: 4062.443359, avg loss: 2.496892, ppl: 12.144690 +epoch: 2, batch: 2776, sum loss: 3914.236572, avg loss: 2.673659, ppl: 14.492900 +epoch: 2, batch: 2777, sum loss: 4605.203613, avg loss: 2.660429, ppl: 14.302430 +epoch: 2, batch: 2778, sum loss: 3525.546631, avg loss: 2.314870, ppl: 10.123606 +epoch: 2, batch: 2779, sum loss: 4465.859863, avg loss: 2.534540, ppl: 12.610634 +epoch: 2, batch: 2780, sum loss: 4505.848145, avg loss: 2.677272, ppl: 14.545354 +epoch: 2, batch: 2781, sum loss: 4443.733887, avg loss: 2.496480, ppl: 12.139684 +epoch: 2, batch: 2782, sum loss: 4397.694824, avg loss: 2.474786, ppl: 11.879166 +epoch: 2, batch: 2783, sum loss: 3201.034180, avg loss: 2.203052, ppl: 9.052598 +epoch: 2, batch: 2784, sum loss: 4922.243652, avg loss: 2.611270, ppl: 13.616332 +epoch: 2, batch: 2785, sum loss: 3915.156494, avg loss: 2.620587, ppl: 13.743784 +epoch: 2, batch: 2786, sum loss: 4446.815430, avg loss: 2.950773, ppl: 19.120737 +epoch: 2, batch: 2787, sum loss: 4134.371582, avg loss: 2.410712, ppl: 11.141894 +epoch: 2, batch: 2788, sum loss: 5087.753906, avg loss: 2.484255, ppl: 11.992181 +epoch: 2, batch: 2789, sum loss: 5121.768555, avg loss: 2.923384, ppl: 18.604137 +epoch: 2, batch: 2790, sum loss: 5275.162109, avg loss: 2.865379, ppl: 17.555698 +epoch: 2, batch: 2791, sum loss: 4107.921875, avg loss: 2.451027, ppl: 11.600259 +epoch: 2, batch: 2792, sum loss: 3407.269043, avg loss: 2.188355, ppl: 8.920528 +epoch: 2, batch: 2793, sum loss: 4694.680176, avg loss: 2.593746, ppl: 13.379801 +epoch: 2, batch: 2794, sum loss: 3427.117188, avg loss: 2.456715, ppl: 11.666423 +epoch: 2, batch: 2795, sum loss: 4529.156738, avg loss: 2.491285, ppl: 12.076789 +epoch: 2, batch: 2796, sum loss: 4537.557129, avg loss: 2.448763, ppl: 11.574017 +epoch: 2, batch: 2797, sum loss: 3631.434082, avg loss: 2.535918, ppl: 12.628015 +epoch: 2, batch: 2798, sum loss: 4139.329102, avg loss: 2.712536, ppl: 15.067432 +epoch: 2, batch: 2799, sum loss: 3886.331055, avg loss: 2.378416, ppl: 10.787797 +epoch: 2, batch: 2800, sum loss: 3741.046387, avg loss: 2.576478, ppl: 13.150743 +epoch: 2, batch: 2801, sum loss: 4353.595215, avg loss: 2.469424, ppl: 11.815645 +epoch: 2, batch: 2802, sum loss: 4471.483887, avg loss: 2.711634, ppl: 15.053848 +epoch: 2, batch: 2803, sum loss: 4344.510742, avg loss: 2.514184, ppl: 12.356527 +epoch: 2, batch: 2804, sum loss: 3938.562744, avg loss: 2.431212, ppl: 11.372654 +epoch: 2, batch: 2805, sum loss: 4363.011719, avg loss: 2.438799, ppl: 11.459271 +epoch: 2, batch: 2806, sum loss: 4754.310059, avg loss: 2.491777, ppl: 12.082725 +epoch: 2, batch: 2807, sum loss: 4661.643066, avg loss: 2.466478, ppl: 11.780880 +epoch: 2, batch: 2808, sum loss: 4645.133789, avg loss: 2.555079, ppl: 12.872316 +epoch: 2, batch: 2809, sum loss: 3771.500488, avg loss: 2.457004, ppl: 11.669791 +epoch: 2, batch: 2810, sum loss: 3605.456055, avg loss: 2.410064, ppl: 11.134676 +epoch: 2, batch: 2811, sum loss: 4512.903320, avg loss: 2.574389, ppl: 13.123293 +epoch: 2, batch: 2812, sum loss: 5109.068359, avg loss: 2.934560, ppl: 18.813215 +epoch: 2, batch: 2813, sum loss: 4585.643555, avg loss: 2.755795, ppl: 15.733552 +epoch: 2, batch: 2814, sum loss: 4680.210449, avg loss: 2.824508, ppl: 16.852659 +epoch: 2, batch: 2815, sum loss: 3359.143066, avg loss: 2.195518, ppl: 8.984658 +epoch: 2, batch: 2816, sum loss: 5115.271973, avg loss: 2.671160, ppl: 14.456733 +epoch: 2, batch: 2817, sum loss: 3825.338379, avg loss: 2.314179, ppl: 10.116615 +epoch: 2, batch: 2818, sum loss: 5189.599121, avg loss: 2.757492, ppl: 15.760261 +epoch: 2, batch: 2819, sum loss: 4391.589844, avg loss: 2.620281, ppl: 13.739577 +epoch: 2, batch: 2820, sum loss: 5000.368652, avg loss: 2.716115, ppl: 15.121469 +epoch: 2, batch: 2821, sum loss: 4570.050781, avg loss: 2.681955, ppl: 14.613629 +epoch: 2, batch: 2822, sum loss: 3846.846191, avg loss: 2.493095, ppl: 12.098668 +epoch: 2, batch: 2823, sum loss: 4371.256836, avg loss: 2.554797, ppl: 12.868683 +epoch: 2, batch: 2824, sum loss: 4473.159180, avg loss: 2.528637, ppl: 12.536412 +epoch: 2, batch: 2825, sum loss: 4529.670898, avg loss: 2.582480, ppl: 13.229914 +epoch: 2, batch: 2826, sum loss: 4042.376953, avg loss: 2.451411, ppl: 11.604710 +epoch: 2, batch: 2827, sum loss: 3130.711670, avg loss: 2.184726, ppl: 8.888210 +epoch: 2, batch: 2828, sum loss: 2735.950684, avg loss: 2.127489, ppl: 8.393763 +epoch: 2, batch: 2829, sum loss: 4488.187500, avg loss: 2.842424, ppl: 17.157303 +epoch: 2, batch: 2830, sum loss: 5438.089355, avg loss: 2.811835, ppl: 16.640430 +epoch: 2, batch: 2831, sum loss: 4704.476074, avg loss: 2.731984, ppl: 15.363333 +epoch: 2, batch: 2832, sum loss: 3751.736572, avg loss: 2.423603, ppl: 11.286447 +epoch: 2, batch: 2833, sum loss: 4456.296875, avg loss: 2.658888, ppl: 14.280405 +epoch: 2, batch: 2834, sum loss: 4913.357422, avg loss: 2.780621, ppl: 16.129034 +epoch: 2, batch: 2835, sum loss: 4772.304199, avg loss: 2.579624, ppl: 13.192176 +epoch: 2, batch: 2836, sum loss: 4758.816406, avg loss: 2.623383, ppl: 13.782268 +epoch: 2, batch: 2837, sum loss: 5134.354004, avg loss: 2.619568, ppl: 13.729795 +epoch: 2, batch: 2838, sum loss: 4338.069824, avg loss: 2.749094, ppl: 15.628463 +epoch: 2, batch: 2839, sum loss: 5239.212402, avg loss: 2.720256, ppl: 15.184207 +epoch: 2, batch: 2840, sum loss: 5098.236328, avg loss: 2.670632, ppl: 14.449100 +epoch: 2, batch: 2841, sum loss: 5051.763184, avg loss: 2.620209, ppl: 13.738598 +epoch: 2, batch: 2842, sum loss: 4186.086426, avg loss: 2.532418, ppl: 12.583892 +epoch: 2, batch: 2843, sum loss: 4320.975098, avg loss: 2.543246, ppl: 12.720900 +epoch: 2, batch: 2844, sum loss: 4505.820801, avg loss: 2.652043, ppl: 14.182983 +epoch: 2, batch: 2845, sum loss: 6119.173340, avg loss: 3.117256, ppl: 22.584322 +epoch: 2, batch: 2846, sum loss: 3586.273438, avg loss: 2.625383, ppl: 13.809864 +epoch: 2, batch: 2847, sum loss: 4217.187012, avg loss: 2.733109, ppl: 15.380631 +epoch: 2, batch: 2848, sum loss: 3734.435059, avg loss: 2.432857, ppl: 11.391376 +epoch: 2, batch: 2849, sum loss: 3551.052490, avg loss: 2.454079, ppl: 11.635714 +epoch: 2, batch: 2850, sum loss: 4572.191406, avg loss: 2.767670, ppl: 15.921500 +epoch: 2, batch: 2851, sum loss: 3991.595215, avg loss: 2.309951, ppl: 10.073932 +epoch: 2, batch: 2852, sum loss: 4563.938965, avg loss: 2.536931, ppl: 12.640818 +epoch: 2, batch: 2853, sum loss: 4116.010742, avg loss: 2.572507, ppl: 13.098618 +epoch: 2, batch: 2854, sum loss: 3923.866455, avg loss: 2.447827, ppl: 11.563191 +epoch: 2, batch: 2855, sum loss: 3509.731934, avg loss: 2.262883, ppl: 9.610759 +epoch: 2, batch: 2856, sum loss: 5039.675293, avg loss: 2.611231, ppl: 13.615800 +epoch: 2, batch: 2857, sum loss: 4193.254395, avg loss: 2.590028, ppl: 13.330139 +epoch: 2, batch: 2858, sum loss: 3790.685059, avg loss: 2.500452, ppl: 12.188003 +epoch: 2, batch: 2859, sum loss: 4707.706055, avg loss: 2.588074, ppl: 13.304119 +epoch: 2, batch: 2860, sum loss: 4316.956055, avg loss: 2.568088, ppl: 13.040867 +epoch: 2, batch: 2861, sum loss: 3324.749023, avg loss: 2.271003, ppl: 9.689118 +epoch: 2, batch: 2862, sum loss: 4399.256348, avg loss: 2.674320, ppl: 14.502481 +epoch: 2, batch: 2863, sum loss: 4009.059814, avg loss: 2.545435, ppl: 12.748769 +epoch: 2, batch: 2864, sum loss: 3811.959229, avg loss: 2.478517, ppl: 11.923570 +epoch: 2, batch: 2865, sum loss: 3254.308350, avg loss: 2.185566, ppl: 8.895686 +epoch: 2, batch: 2866, sum loss: 4408.597656, avg loss: 2.693096, ppl: 14.777351 +epoch: 2, batch: 2867, sum loss: 3601.921875, avg loss: 2.222037, ppl: 9.226104 +epoch: 2, batch: 2868, sum loss: 4419.267578, avg loss: 2.391379, ppl: 10.928550 +epoch: 2, batch: 2869, sum loss: 4326.684082, avg loss: 2.564721, ppl: 12.997030 +epoch: 2, batch: 2870, sum loss: 4389.369141, avg loss: 2.668309, ppl: 14.415578 +epoch: 2, batch: 2871, sum loss: 3668.105957, avg loss: 2.345336, ppl: 10.436781 +epoch: 2, batch: 2872, sum loss: 4361.640137, avg loss: 2.513914, ppl: 12.353182 +epoch: 2, batch: 2873, sum loss: 4033.509766, avg loss: 2.508402, ppl: 12.285278 +epoch: 2, batch: 2874, sum loss: 5629.516113, avg loss: 2.799362, ppl: 16.434155 +epoch: 2, batch: 2875, sum loss: 4224.828125, avg loss: 2.701297, ppl: 14.899040 +epoch: 2, batch: 2876, sum loss: 4695.547363, avg loss: 2.578554, ppl: 13.178074 +epoch: 2, batch: 2877, sum loss: 3594.056641, avg loss: 2.324746, ppl: 10.224079 +epoch: 2, batch: 2878, sum loss: 4778.905762, avg loss: 2.517864, ppl: 12.402081 +epoch: 2, batch: 2879, sum loss: 4162.289551, avg loss: 2.574081, ppl: 13.119261 +epoch: 2, batch: 2880, sum loss: 4302.459473, avg loss: 2.536828, ppl: 12.639509 +epoch: 2, batch: 2881, sum loss: 3699.390625, avg loss: 2.360811, ppl: 10.599545 +epoch: 2, batch: 2882, sum loss: 3676.889160, avg loss: 2.398493, ppl: 11.006573 +epoch: 2, batch: 2883, sum loss: 3828.551270, avg loss: 2.592113, ppl: 13.357974 +epoch: 2, batch: 2884, sum loss: 5058.575195, avg loss: 2.708017, ppl: 14.999496 +epoch: 2, batch: 2885, sum loss: 3715.014160, avg loss: 2.345337, ppl: 10.436790 +epoch: 2, batch: 2886, sum loss: 4544.965820, avg loss: 2.573593, ppl: 13.112860 +epoch: 2, batch: 2887, sum loss: 4245.177246, avg loss: 2.617249, ppl: 13.697982 +epoch: 2, batch: 2888, sum loss: 5362.995605, avg loss: 2.827093, ppl: 16.896275 +epoch: 2, batch: 2889, sum loss: 5155.249023, avg loss: 2.762727, ppl: 15.842988 +epoch: 2, batch: 2890, sum loss: 4695.132812, avg loss: 2.695254, ppl: 14.809281 +epoch: 2, batch: 2891, sum loss: 4572.529785, avg loss: 2.728240, ppl: 15.305922 +epoch: 2, batch: 2892, sum loss: 3574.474121, avg loss: 2.382983, ppl: 10.837179 +epoch: 2, batch: 2893, sum loss: 4243.347656, avg loss: 2.524300, ppl: 12.482150 +epoch: 2, batch: 2894, sum loss: 3952.574219, avg loss: 2.286047, ppl: 9.835975 +epoch: 2, batch: 2895, sum loss: 4744.123047, avg loss: 2.586763, ppl: 13.286689 +epoch: 2, batch: 2896, sum loss: 3740.257812, avg loss: 2.604636, ppl: 13.526307 +epoch: 2, batch: 2897, sum loss: 4989.763672, avg loss: 2.759825, ppl: 15.797078 +epoch: 2, batch: 2898, sum loss: 4374.665527, avg loss: 2.404984, ppl: 11.078250 +epoch: 2, batch: 2899, sum loss: 4197.571289, avg loss: 2.630057, ppl: 13.874562 +epoch: 2, batch: 2900, sum loss: 3945.779297, avg loss: 2.407431, ppl: 11.105393 +epoch: 2, batch: 2901, sum loss: 4547.612793, avg loss: 2.964545, ppl: 19.385891 +epoch: 2, batch: 2902, sum loss: 4408.685059, avg loss: 2.723092, ppl: 15.227326 +epoch: 2, batch: 2903, sum loss: 4846.782227, avg loss: 2.665997, ppl: 14.382278 +epoch: 2, batch: 2904, sum loss: 3956.375488, avg loss: 2.625332, ppl: 13.809159 +epoch: 2, batch: 2905, sum loss: 3713.701660, avg loss: 2.420927, ppl: 11.256287 +epoch: 2, batch: 2906, sum loss: 3942.985352, avg loss: 2.436950, ppl: 11.438104 +epoch: 2, batch: 2907, sum loss: 4326.694824, avg loss: 2.543618, ppl: 12.725632 +epoch: 2, batch: 2908, sum loss: 3471.515625, avg loss: 2.283892, ppl: 9.814805 +epoch: 2, batch: 2909, sum loss: 4019.984375, avg loss: 2.359146, ppl: 10.581909 +epoch: 2, batch: 2910, sum loss: 3428.537598, avg loss: 2.264556, ppl: 9.626848 +epoch: 2, batch: 2911, sum loss: 4082.047363, avg loss: 2.580308, ppl: 13.201203 +epoch: 2, batch: 2912, sum loss: 5059.643066, avg loss: 2.664372, ppl: 14.358935 +epoch: 2, batch: 2913, sum loss: 5106.772461, avg loss: 2.682128, ppl: 14.616170 +epoch: 2, batch: 2914, sum loss: 4481.038086, avg loss: 2.541712, ppl: 12.701395 +epoch: 2, batch: 2915, sum loss: 4383.065430, avg loss: 2.557214, ppl: 12.899835 +epoch: 2, batch: 2916, sum loss: 5888.234375, avg loss: 2.797261, ppl: 16.399666 +epoch: 2, batch: 2917, sum loss: 3850.574707, avg loss: 2.435531, ppl: 11.421884 +epoch: 2, batch: 2918, sum loss: 3985.633545, avg loss: 2.425827, ppl: 11.311578 +epoch: 2, batch: 2919, sum loss: 3702.654541, avg loss: 2.295508, ppl: 9.929481 +epoch: 2, batch: 2920, sum loss: 4878.952637, avg loss: 2.638698, ppl: 13.994968 +epoch: 2, batch: 2921, sum loss: 5039.937012, avg loss: 2.881611, ppl: 17.842989 +epoch: 2, batch: 2922, sum loss: 4840.606934, avg loss: 2.541001, ppl: 12.692370 +epoch: 2, batch: 2923, sum loss: 3930.263916, avg loss: 2.748436, ppl: 15.618193 +epoch: 2, batch: 2924, sum loss: 3821.101562, avg loss: 2.364543, ppl: 10.639175 +epoch: 2, batch: 2925, sum loss: 4501.388184, avg loss: 2.685793, ppl: 14.669826 +epoch: 2, batch: 2926, sum loss: 4085.838623, avg loss: 2.537788, ppl: 12.651653 +epoch: 2, batch: 2927, sum loss: 3634.276367, avg loss: 2.250326, ppl: 9.490829 +epoch: 2, batch: 2928, sum loss: 4483.516602, avg loss: 2.825152, ppl: 16.863514 +epoch: 2, batch: 2929, sum loss: 4706.654297, avg loss: 2.701868, ppl: 14.907554 +epoch: 2, batch: 2930, sum loss: 4845.376953, avg loss: 2.691876, ppl: 14.759341 +epoch: 2, batch: 2931, sum loss: 4421.748047, avg loss: 2.505240, ppl: 12.246494 +epoch: 2, batch: 2932, sum loss: 4494.740234, avg loss: 2.781399, ppl: 16.141581 +epoch: 2, batch: 2933, sum loss: 3835.153809, avg loss: 2.455284, ppl: 11.649743 +epoch: 2, batch: 2934, sum loss: 3182.557129, avg loss: 2.273255, ppl: 9.710959 +epoch: 2, batch: 2935, sum loss: 4537.107910, avg loss: 2.746433, ppl: 15.586942 +epoch: 2, batch: 2936, sum loss: 4517.391602, avg loss: 2.602184, ppl: 13.493176 +epoch: 2, batch: 2937, sum loss: 3454.110840, avg loss: 2.356147, ppl: 10.550219 +epoch: 2, batch: 2938, sum loss: 4629.578125, avg loss: 2.767231, ppl: 15.914513 +epoch: 2, batch: 2939, sum loss: 4652.405762, avg loss: 2.527108, ppl: 12.517253 +epoch: 2, batch: 2940, sum loss: 4075.464844, avg loss: 2.387501, ppl: 10.886260 +epoch: 2, batch: 2941, sum loss: 5201.421387, avg loss: 2.652433, ppl: 14.188519 +epoch: 2, batch: 2942, sum loss: 4441.014648, avg loss: 2.479628, ppl: 11.936827 +epoch: 2, batch: 2943, sum loss: 4189.790527, avg loss: 2.573581, ppl: 13.112703 +epoch: 2, batch: 2944, sum loss: 4238.342285, avg loss: 2.430242, ppl: 11.361632 +epoch: 2, batch: 2945, sum loss: 4204.235840, avg loss: 2.447169, ppl: 11.555585 +epoch: 2, batch: 2946, sum loss: 4829.999023, avg loss: 3.055028, ppl: 21.221773 +epoch: 2, batch: 2947, sum loss: 4754.766602, avg loss: 2.585518, ppl: 13.270157 +epoch: 2, batch: 2948, sum loss: 4870.596191, avg loss: 2.638459, ppl: 13.991632 +epoch: 2, batch: 2949, sum loss: 4147.472656, avg loss: 2.590551, ppl: 13.337123 +epoch: 2, batch: 2950, sum loss: 4142.850098, avg loss: 2.595771, ppl: 13.406918 +epoch: 2, batch: 2951, sum loss: 3738.851074, avg loss: 2.543436, ppl: 12.723314 +epoch: 2, batch: 2952, sum loss: 4180.218262, avg loss: 2.524286, ppl: 12.481983 +epoch: 2, batch: 2953, sum loss: 3998.997070, avg loss: 2.636122, ppl: 13.958965 +epoch: 2, batch: 2954, sum loss: 4545.104980, avg loss: 2.639434, ppl: 14.005269 +epoch: 2, batch: 2955, sum loss: 4463.528809, avg loss: 2.690494, ppl: 14.738949 +epoch: 2, batch: 2956, sum loss: 4675.616211, avg loss: 2.701107, ppl: 14.896214 +epoch: 2, batch: 2957, sum loss: 3955.069824, avg loss: 2.523976, ppl: 12.478109 +epoch: 2, batch: 2958, sum loss: 2958.245605, avg loss: 2.293214, ppl: 9.906723 +epoch: 2, batch: 2959, sum loss: 3657.754883, avg loss: 2.443390, ppl: 11.511999 +epoch: 2, batch: 2960, sum loss: 4302.064941, avg loss: 2.553154, ppl: 12.847564 +epoch: 2, batch: 2961, sum loss: 4885.630371, avg loss: 2.711227, ppl: 15.047723 +epoch: 2, batch: 2962, sum loss: 4236.333984, avg loss: 2.433276, ppl: 11.396159 +epoch: 2, batch: 2963, sum loss: 3522.017090, avg loss: 2.285540, ppl: 9.830995 +epoch: 2, batch: 2964, sum loss: 3402.938721, avg loss: 2.340398, ppl: 10.385370 +epoch: 2, batch: 2965, sum loss: 3788.829834, avg loss: 2.315911, ppl: 10.134149 +epoch: 2, batch: 2966, sum loss: 4720.699707, avg loss: 2.656556, ppl: 14.247136 +epoch: 2, batch: 2967, sum loss: 5152.735352, avg loss: 2.849964, ppl: 17.287163 +epoch: 2, batch: 2968, sum loss: 4209.558105, avg loss: 2.429058, ppl: 11.348190 +epoch: 2, batch: 2969, sum loss: 4152.227051, avg loss: 2.627992, ppl: 13.845938 +epoch: 2, batch: 2970, sum loss: 4482.982910, avg loss: 2.539934, ppl: 12.678830 +epoch: 2, batch: 2971, sum loss: 4838.024414, avg loss: 2.790095, ppl: 16.282560 +epoch: 2, batch: 2972, sum loss: 4078.440918, avg loss: 2.592779, ppl: 13.366862 +epoch: 2, batch: 2973, sum loss: 4332.512695, avg loss: 2.649855, ppl: 14.151985 +epoch: 2, batch: 2974, sum loss: 5827.764160, avg loss: 2.947782, ppl: 19.063616 +epoch: 2, batch: 2975, sum loss: 4401.849609, avg loss: 2.519662, ppl: 12.424396 +epoch: 2, batch: 2976, sum loss: 4797.857422, avg loss: 2.706068, ppl: 14.970290 +epoch: 2, batch: 2977, sum loss: 4960.593750, avg loss: 2.760486, ppl: 15.807529 +epoch: 2, batch: 2978, sum loss: 4078.363281, avg loss: 2.534719, ppl: 12.612889 +epoch: 2, batch: 2979, sum loss: 4345.644043, avg loss: 2.685812, ppl: 14.670109 +epoch: 2, batch: 2980, sum loss: 5003.760254, avg loss: 2.559468, ppl: 12.928941 +epoch: 2, batch: 2981, sum loss: 3292.254395, avg loss: 2.251884, ppl: 9.505628 +epoch: 2, batch: 2982, sum loss: 3384.004395, avg loss: 2.232193, ppl: 9.320283 +epoch: 2, batch: 2983, sum loss: 4404.645508, avg loss: 2.597079, ppl: 13.424465 +epoch: 2, batch: 2984, sum loss: 4680.312500, avg loss: 2.779283, ppl: 16.107468 +epoch: 2, batch: 2985, sum loss: 3248.556152, avg loss: 2.366028, ppl: 10.654983 +epoch: 2, batch: 2986, sum loss: 4592.850586, avg loss: 2.765112, ppl: 15.880814 +epoch: 2, batch: 2987, sum loss: 4594.269531, avg loss: 2.760979, ppl: 15.815325 +epoch: 2, batch: 2988, sum loss: 4242.984375, avg loss: 2.656847, ppl: 14.251284 +epoch: 2, batch: 2989, sum loss: 4048.075195, avg loss: 2.552380, ppl: 12.837626 +epoch: 2, batch: 2990, sum loss: 4845.746094, avg loss: 2.815657, ppl: 16.704149 +epoch: 2, batch: 2991, sum loss: 4304.256836, avg loss: 2.610222, ppl: 13.602075 +epoch: 2, batch: 2992, sum loss: 4026.134766, avg loss: 2.446012, ppl: 11.542219 +epoch: 2, batch: 2993, sum loss: 4745.850586, avg loss: 2.843529, ppl: 17.176281 +epoch: 2, batch: 2994, sum loss: 4574.298828, avg loss: 2.676594, ppl: 14.535498 +epoch: 2, batch: 2995, sum loss: 4975.344238, avg loss: 2.679237, ppl: 14.573975 +epoch: 2, batch: 2996, sum loss: 3057.341064, avg loss: 2.139497, ppl: 8.495164 +epoch: 2, batch: 2997, sum loss: 4611.642090, avg loss: 2.606920, ppl: 13.557237 +epoch: 2, batch: 2998, sum loss: 3873.947754, avg loss: 2.378114, ppl: 10.784544 +epoch: 2, batch: 2999, sum loss: 4958.669922, avg loss: 2.654534, ppl: 14.218360 +epoch: 2, batch: 3000, sum loss: 4011.049316, avg loss: 2.686570, ppl: 14.681235 +epoch: 2, batch: 3001, sum loss: 4009.668945, avg loss: 2.488932, ppl: 12.048400 +epoch: 2, batch: 3002, sum loss: 4144.396484, avg loss: 2.585400, ppl: 13.268597 +epoch: 2, batch: 3003, sum loss: 3707.281738, avg loss: 2.642396, ppl: 14.046823 +epoch: 2, batch: 3004, sum loss: 3365.563232, avg loss: 2.356837, ppl: 10.557506 +epoch: 2, batch: 3005, sum loss: 4262.401367, avg loss: 2.730558, ppl: 15.341452 +epoch: 2, batch: 3006, sum loss: 3742.998291, avg loss: 2.440025, ppl: 11.473326 +epoch: 2, batch: 3007, sum loss: 4427.586426, avg loss: 2.789910, ppl: 16.279549 +epoch: 2, batch: 3008, sum loss: 3815.135254, avg loss: 2.336274, ppl: 10.342628 +epoch: 2, batch: 3009, sum loss: 3895.247803, avg loss: 2.566039, ppl: 13.014177 +epoch: 2, batch: 3010, sum loss: 4228.272949, avg loss: 2.719147, ppl: 15.167375 +epoch: 2, batch: 3011, sum loss: 4954.003906, avg loss: 2.744601, ppl: 15.558397 +epoch: 2, batch: 3012, sum loss: 3954.248535, avg loss: 2.712105, ppl: 15.060939 +epoch: 2, batch: 3013, sum loss: 5267.401855, avg loss: 2.746299, ppl: 15.584849 +epoch: 2, batch: 3014, sum loss: 4537.964844, avg loss: 2.489284, ppl: 12.052647 +epoch: 2, batch: 3015, sum loss: 4724.818359, avg loss: 2.604641, ppl: 13.526368 +epoch: 2, batch: 3016, sum loss: 3213.538086, avg loss: 2.376877, ppl: 10.771215 +epoch: 2, batch: 3017, sum loss: 3122.610107, avg loss: 2.270989, ppl: 9.688980 +epoch: 2, batch: 3018, sum loss: 3858.437012, avg loss: 2.402514, ppl: 11.050920 +epoch: 2, batch: 3019, sum loss: 4037.414551, avg loss: 2.470878, ppl: 11.832830 +epoch: 2, batch: 3020, sum loss: 3609.718750, avg loss: 2.351608, ppl: 10.502447 +epoch: 2, batch: 3021, sum loss: 4041.877686, avg loss: 2.274551, ppl: 9.723556 +epoch: 2, batch: 3022, sum loss: 4706.641602, avg loss: 2.622085, ppl: 13.764387 +epoch: 2, batch: 3023, sum loss: 4343.957031, avg loss: 2.763331, ppl: 15.852567 +epoch: 2, batch: 3024, sum loss: 5535.293457, avg loss: 2.627097, ppl: 13.833554 +epoch: 2, batch: 3025, sum loss: 3162.947998, avg loss: 2.185866, ppl: 8.898350 +epoch: 2, batch: 3026, sum loss: 4364.660156, avg loss: 2.632485, ppl: 13.908291 +epoch: 2, batch: 3027, sum loss: 4460.722656, avg loss: 2.841225, ppl: 17.136740 +epoch: 2, batch: 3028, sum loss: 5278.163086, avg loss: 2.851520, ppl: 17.314075 +epoch: 2, batch: 3029, sum loss: 3649.266357, avg loss: 2.444251, ppl: 11.521912 +epoch: 2, batch: 3030, sum loss: 4615.933105, avg loss: 2.500505, ppl: 12.188653 +epoch: 2, batch: 3031, sum loss: 4470.115723, avg loss: 2.720703, ppl: 15.191003 +epoch: 2, batch: 3032, sum loss: 4484.197266, avg loss: 2.540622, ppl: 12.687557 +epoch: 2, batch: 3033, sum loss: 3843.206055, avg loss: 2.359242, ppl: 10.582932 +epoch: 2, batch: 3034, sum loss: 3312.026611, avg loss: 2.233329, ppl: 9.330875 +epoch: 2, batch: 3035, sum loss: 5693.360840, avg loss: 2.871085, ppl: 17.656158 +epoch: 2, batch: 3036, sum loss: 4481.591309, avg loss: 2.605576, ppl: 13.539025 +epoch: 2, batch: 3037, sum loss: 3709.439209, avg loss: 2.438816, ppl: 11.459466 +epoch: 2, batch: 3038, sum loss: 3809.440430, avg loss: 2.558388, ppl: 12.914988 +epoch: 2, batch: 3039, sum loss: 4348.036133, avg loss: 2.603615, ppl: 13.512491 +epoch: 2, batch: 3040, sum loss: 3615.323730, avg loss: 2.586069, ppl: 13.277471 +epoch: 2, batch: 3041, sum loss: 3918.787109, avg loss: 2.470862, ppl: 11.832642 +epoch: 2, batch: 3042, sum loss: 4669.510254, avg loss: 2.777817, ppl: 16.083868 +epoch: 2, batch: 3043, sum loss: 4360.395020, avg loss: 2.321829, ppl: 10.194304 +epoch: 2, batch: 3044, sum loss: 4381.386719, avg loss: 2.740079, ppl: 15.488212 +epoch: 2, batch: 3045, sum loss: 4698.119141, avg loss: 2.768485, ppl: 15.934476 +epoch: 2, batch: 3046, sum loss: 4306.249023, avg loss: 2.598823, ppl: 13.447895 +epoch: 2, batch: 3047, sum loss: 4727.709473, avg loss: 2.875736, ppl: 17.738470 +epoch: 2, batch: 3048, sum loss: 3907.834229, avg loss: 2.479590, ppl: 11.936372 +epoch: 2, batch: 3049, sum loss: 5172.166016, avg loss: 2.868644, ppl: 17.613127 +epoch: 2, batch: 3050, sum loss: 5742.767578, avg loss: 2.888716, ppl: 17.970221 +epoch: 2, batch: 3051, sum loss: 4381.703613, avg loss: 2.420831, ppl: 11.255208 +epoch: 2, batch: 3052, sum loss: 4744.125000, avg loss: 2.608095, ppl: 13.573172 +epoch: 2, batch: 3053, sum loss: 4738.880859, avg loss: 2.416564, ppl: 11.207279 +epoch: 2, batch: 3054, sum loss: 4720.779297, avg loss: 2.608166, ppl: 13.574126 +epoch: 2, batch: 3055, sum loss: 5329.127441, avg loss: 2.869751, ppl: 17.632622 +epoch: 2, batch: 3056, sum loss: 4189.839355, avg loss: 2.501397, ppl: 12.199524 +epoch: 2, batch: 3057, sum loss: 5486.559570, avg loss: 3.053177, ppl: 21.182543 +epoch: 2, batch: 3058, sum loss: 4158.681152, avg loss: 2.332407, ppl: 10.302708 +epoch: 2, batch: 3059, sum loss: 4843.461426, avg loss: 2.588702, ppl: 13.312484 +epoch: 2, batch: 3060, sum loss: 4510.121582, avg loss: 2.622164, ppl: 13.765477 +epoch: 2, batch: 3061, sum loss: 4868.539062, avg loss: 2.670619, ppl: 14.448917 +epoch: 2, batch: 3062, sum loss: 3553.917480, avg loss: 2.333498, ppl: 10.313957 +epoch: 2, batch: 3063, sum loss: 4144.913574, avg loss: 2.513592, ppl: 12.349212 +epoch: 2, batch: 3064, sum loss: 4812.378418, avg loss: 2.663187, ppl: 14.341918 +epoch: 2, batch: 3065, sum loss: 4998.999512, avg loss: 2.706551, ppl: 14.977530 +epoch: 2, batch: 3066, sum loss: 4778.911133, avg loss: 2.871942, ppl: 17.671299 +epoch: 2, batch: 3067, sum loss: 4114.278320, avg loss: 2.715695, ppl: 15.115117 +epoch: 2, batch: 3068, sum loss: 4701.872559, avg loss: 2.778884, ppl: 16.101049 +epoch: 2, batch: 3069, sum loss: 3804.465088, avg loss: 2.237921, ppl: 9.373820 +epoch: 2, batch: 3070, sum loss: 4522.187988, avg loss: 2.490191, ppl: 12.063585 +epoch: 2, batch: 3071, sum loss: 3585.299805, avg loss: 2.299743, ppl: 9.971623 +epoch: 2, batch: 3072, sum loss: 5109.787109, avg loss: 2.552341, ppl: 12.837123 +epoch: 2, batch: 3073, sum loss: 5432.715820, avg loss: 2.860830, ppl: 17.476023 +epoch: 2, batch: 3074, sum loss: 4717.384766, avg loss: 2.698733, ppl: 14.860888 +epoch: 2, batch: 3075, sum loss: 4391.671387, avg loss: 2.655182, ppl: 14.227580 +epoch: 2, batch: 3076, sum loss: 4488.793945, avg loss: 2.538911, ppl: 12.665866 +epoch: 2, batch: 3077, sum loss: 3841.680664, avg loss: 2.535763, ppl: 12.626059 +epoch: 2, batch: 3078, sum loss: 4209.275391, avg loss: 2.470232, ppl: 11.825191 +epoch: 2, batch: 3079, sum loss: 4387.223633, avg loss: 2.609889, ppl: 13.597542 +epoch: 2, batch: 3080, sum loss: 4899.714844, avg loss: 2.811081, ppl: 16.627890 +epoch: 2, batch: 3081, sum loss: 4479.318848, avg loss: 2.968402, ppl: 19.460804 +epoch: 2, batch: 3082, sum loss: 2915.987793, avg loss: 2.024992, ppl: 7.576046 +epoch: 2, batch: 3083, sum loss: 4772.844238, avg loss: 2.609538, ppl: 13.592768 +epoch: 2, batch: 3084, sum loss: 4028.728271, avg loss: 2.440175, ppl: 11.475046 +epoch: 2, batch: 3085, sum loss: 4364.874023, avg loss: 2.471616, ppl: 11.841568 +epoch: 2, batch: 3086, sum loss: 4155.117188, avg loss: 2.598572, ppl: 13.444529 +epoch: 2, batch: 3087, sum loss: 4978.034180, avg loss: 2.750295, ppl: 15.647253 +epoch: 2, batch: 3088, sum loss: 4358.759277, avg loss: 2.548982, ppl: 12.794071 +epoch: 2, batch: 3089, sum loss: 3467.409180, avg loss: 2.419686, ppl: 11.242324 +epoch: 2, batch: 3090, sum loss: 4084.478027, avg loss: 2.409722, ppl: 11.130862 +epoch: 2, batch: 3091, sum loss: 4477.822754, avg loss: 2.707269, ppl: 14.988286 +epoch: 2, batch: 3092, sum loss: 3523.945312, avg loss: 2.508146, ppl: 12.282139 +epoch: 2, batch: 3093, sum loss: 4143.075195, avg loss: 2.500347, ppl: 12.186724 +epoch: 2, batch: 3094, sum loss: 5086.709473, avg loss: 2.583397, ppl: 13.242050 +epoch: 2, batch: 3095, sum loss: 3926.821045, avg loss: 2.501160, ppl: 12.196632 +epoch: 2, batch: 3096, sum loss: 4053.543945, avg loss: 2.499102, ppl: 12.171563 +epoch: 2, batch: 3097, sum loss: 4129.692383, avg loss: 2.469912, ppl: 11.821404 +epoch: 2, batch: 3098, sum loss: 5122.377930, avg loss: 2.746583, ppl: 15.589275 +epoch: 2, batch: 3099, sum loss: 4398.300293, avg loss: 2.655978, ppl: 14.238912 +epoch: 2, batch: 3100, sum loss: 4796.345703, avg loss: 2.732960, ppl: 15.378346 +epoch: 2, batch: 3101, sum loss: 3620.336426, avg loss: 2.227899, ppl: 9.280351 +epoch: 2, batch: 3102, sum loss: 4052.071045, avg loss: 2.551682, ppl: 12.828667 +epoch: 2, batch: 3103, sum loss: 4483.810059, avg loss: 2.466342, ppl: 11.779282 +epoch: 2, batch: 3104, sum loss: 4607.562500, avg loss: 2.643467, ppl: 14.061868 +epoch: 2, batch: 3105, sum loss: 4650.161621, avg loss: 2.566314, ppl: 13.017758 +epoch: 2, batch: 3106, sum loss: 4921.216797, avg loss: 2.592844, ppl: 13.367729 +epoch: 2, batch: 3107, sum loss: 5130.458008, avg loss: 2.826699, ppl: 16.889612 +epoch: 2, batch: 3108, sum loss: 5244.067383, avg loss: 2.839235, ppl: 17.102682 +epoch: 2, batch: 3109, sum loss: 4238.165527, avg loss: 2.515232, ppl: 12.369476 +epoch: 2, batch: 3110, sum loss: 4042.110107, avg loss: 2.520019, ppl: 12.428830 +epoch: 2, batch: 3111, sum loss: 4284.333496, avg loss: 2.606042, ppl: 13.545334 +epoch: 2, batch: 3112, sum loss: 4662.975586, avg loss: 2.715769, ppl: 15.116231 +epoch: 2, batch: 3113, sum loss: 3540.839600, avg loss: 2.520170, ppl: 12.430716 +epoch: 2, batch: 3114, sum loss: 3817.665039, avg loss: 2.410142, ppl: 11.135545 +epoch: 2, batch: 3115, sum loss: 4543.879883, avg loss: 2.661910, ppl: 14.323619 +epoch: 2, batch: 3116, sum loss: 4257.063477, avg loss: 2.463578, ppl: 11.746772 +epoch: 2, batch: 3117, sum loss: 4706.576660, avg loss: 2.523633, ppl: 12.473838 +epoch: 2, batch: 3118, sum loss: 3785.160400, avg loss: 2.626760, ppl: 13.828885 +epoch: 2, batch: 3119, sum loss: 4438.784180, avg loss: 2.477000, ppl: 11.905498 +epoch: 2, batch: 3120, sum loss: 4214.143066, avg loss: 2.434514, ppl: 11.410267 +epoch: 2, batch: 3121, sum loss: 4875.362305, avg loss: 2.755999, ppl: 15.736755 +epoch: 2, batch: 3122, sum loss: 4600.930176, avg loss: 2.645733, ppl: 14.093777 +epoch: 2, batch: 3123, sum loss: 3537.996338, avg loss: 2.368137, ppl: 10.677478 +epoch: 2, batch: 3124, sum loss: 4581.725586, avg loss: 2.603253, ppl: 13.507608 +epoch: 2, batch: 3125, sum loss: 4394.713867, avg loss: 2.558041, ppl: 12.910496 +epoch: 2, batch: 3126, sum loss: 4403.498047, avg loss: 2.490666, ppl: 12.069316 +epoch: 2, batch: 3127, sum loss: 4524.345215, avg loss: 2.661380, ppl: 14.316029 +epoch: 2, batch: 3128, sum loss: 4839.215820, avg loss: 2.685469, ppl: 14.665083 +epoch: 2, batch: 3129, sum loss: 3857.272461, avg loss: 2.524393, ppl: 12.483316 +epoch: 2, batch: 3130, sum loss: 4758.083008, avg loss: 2.627324, ppl: 13.836692 +epoch: 2, batch: 3131, sum loss: 4586.605469, avg loss: 2.642054, ppl: 14.042014 +epoch: 2, batch: 3132, sum loss: 4765.473633, avg loss: 2.680244, ppl: 14.588652 +epoch: 2, batch: 3133, sum loss: 4656.109863, avg loss: 2.839091, ppl: 17.100220 +epoch: 2, batch: 3134, sum loss: 3807.812988, avg loss: 2.288349, ppl: 9.858649 +epoch: 2, batch: 3135, sum loss: 4986.880859, avg loss: 2.573210, ppl: 13.107833 +epoch: 2, batch: 3136, sum loss: 4629.609375, avg loss: 2.603830, ppl: 13.515401 +epoch: 2, batch: 3137, sum loss: 4193.897949, avg loss: 2.587229, ppl: 13.292883 +epoch: 2, batch: 3138, sum loss: 3392.736084, avg loss: 2.298602, ppl: 9.960244 +epoch: 2, batch: 3139, sum loss: 3749.705566, avg loss: 2.376239, ppl: 10.764345 +epoch: 2, batch: 3140, sum loss: 3898.795410, avg loss: 2.467592, ppl: 11.794013 +epoch: 2, batch: 3141, sum loss: 4777.647461, avg loss: 2.744197, ppl: 15.552126 +epoch: 2, batch: 3142, sum loss: 2844.775391, avg loss: 2.024751, ppl: 7.574226 +epoch: 2, batch: 3143, sum loss: 4943.687988, avg loss: 2.647931, ppl: 14.124789 +epoch: 2, batch: 3144, sum loss: 3006.575684, avg loss: 2.279436, ppl: 9.771164 +epoch: 2, batch: 3145, sum loss: 4800.589844, avg loss: 2.781338, ppl: 16.140606 +epoch: 2, batch: 3146, sum loss: 4176.931152, avg loss: 2.489232, ppl: 12.052014 +epoch: 2, batch: 3147, sum loss: 5029.786133, avg loss: 2.622412, ppl: 13.768893 +epoch: 2, batch: 3148, sum loss: 3092.417480, avg loss: 2.188547, ppl: 8.922243 +epoch: 2, batch: 3149, sum loss: 4422.095215, avg loss: 2.809463, ppl: 16.601006 +epoch: 2, batch: 3150, sum loss: 4240.327148, avg loss: 2.787855, ppl: 16.246134 +epoch: 2, batch: 3151, sum loss: 4702.155762, avg loss: 2.769232, ppl: 15.946383 +epoch: 2, batch: 3152, sum loss: 5152.351562, avg loss: 2.590423, ppl: 13.335409 +epoch: 2, batch: 3153, sum loss: 4747.641602, avg loss: 2.497444, ppl: 12.151400 +epoch: 2, batch: 3154, sum loss: 4498.095703, avg loss: 2.355024, ppl: 10.538381 +epoch: 2, batch: 3155, sum loss: 4581.326172, avg loss: 2.651230, ppl: 14.171464 +epoch: 2, batch: 3156, sum loss: 3304.457520, avg loss: 2.164019, ppl: 8.706060 +epoch: 2, batch: 3157, sum loss: 4319.781250, avg loss: 2.666532, ppl: 14.389976 +epoch: 2, batch: 3158, sum loss: 4099.480469, avg loss: 2.624507, ppl: 13.797776 +epoch: 2, batch: 3159, sum loss: 5651.242676, avg loss: 2.991658, ppl: 19.918690 +epoch: 2, batch: 3160, sum loss: 3610.110840, avg loss: 2.166933, ppl: 8.731466 +epoch: 2, batch: 3161, sum loss: 3567.166992, avg loss: 2.371787, ppl: 10.716521 +epoch: 2, batch: 3162, sum loss: 5048.182129, avg loss: 2.662544, ppl: 14.332699 +epoch: 2, batch: 3163, sum loss: 4384.629395, avg loss: 2.642935, ppl: 14.054394 +epoch: 2, batch: 3164, sum loss: 3714.234619, avg loss: 2.476156, ppl: 11.895456 +epoch: 2, batch: 3165, sum loss: 4249.636719, avg loss: 2.546217, ppl: 12.758752 +epoch: 2, batch: 3166, sum loss: 3281.399170, avg loss: 2.497260, ppl: 12.149155 +epoch: 2, batch: 3167, sum loss: 3630.751953, avg loss: 2.343933, ppl: 10.422145 +epoch: 2, batch: 3168, sum loss: 4765.177246, avg loss: 2.586958, ppl: 13.289289 +epoch: 2, batch: 3169, sum loss: 2985.866943, avg loss: 2.102723, ppl: 8.188439 +epoch: 2, batch: 3170, sum loss: 4128.080566, avg loss: 2.576829, ppl: 13.155359 +epoch: 2, batch: 3171, sum loss: 4144.690430, avg loss: 2.475920, ppl: 11.892649 +epoch: 2, batch: 3172, sum loss: 2865.806396, avg loss: 2.221555, ppl: 9.221662 +epoch: 2, batch: 3173, sum loss: 5097.972168, avg loss: 2.779701, ppl: 16.114206 +epoch: 2, batch: 3174, sum loss: 4385.239258, avg loss: 2.633777, ppl: 13.926275 +epoch: 2, batch: 3175, sum loss: 4413.425293, avg loss: 2.558507, ppl: 12.916524 +epoch: 2, batch: 3176, sum loss: 4519.039551, avg loss: 2.822636, ppl: 16.821127 +epoch: 2, batch: 3177, sum loss: 3783.309082, avg loss: 2.318204, ppl: 10.157417 +epoch: 2, batch: 3178, sum loss: 4281.827148, avg loss: 2.508393, ppl: 12.285175 +epoch: 2, batch: 3179, sum loss: 4768.374023, avg loss: 2.721675, ppl: 15.205770 +epoch: 2, batch: 3180, sum loss: 3887.083496, avg loss: 2.237814, ppl: 9.372824 +epoch: 2, batch: 3181, sum loss: 3736.932129, avg loss: 2.487971, ppl: 12.036823 +epoch: 2, batch: 3182, sum loss: 4030.720215, avg loss: 2.478918, ppl: 11.928347 +epoch: 2, batch: 3183, sum loss: 4487.333008, avg loss: 2.564190, ppl: 12.990134 +epoch: 2, batch: 3184, sum loss: 4285.353027, avg loss: 2.589337, ppl: 13.320942 +epoch: 2, batch: 3185, sum loss: 3851.362793, avg loss: 2.329923, ppl: 10.277149 +epoch: 2, batch: 3186, sum loss: 2984.696045, avg loss: 2.169111, ppl: 8.750497 +epoch: 2, batch: 3187, sum loss: 3944.307129, avg loss: 2.510699, ppl: 12.313529 +epoch: 2, batch: 3188, sum loss: 4644.946289, avg loss: 2.717932, ppl: 15.148965 +epoch: 2, batch: 3189, sum loss: 3898.202881, avg loss: 2.325897, ppl: 10.235855 +epoch: 2, batch: 3190, sum loss: 4335.699707, avg loss: 2.565503, ppl: 13.007197 +epoch: 2, batch: 3191, sum loss: 5342.819336, avg loss: 2.829883, ppl: 16.943480 +epoch: 2, batch: 3192, sum loss: 3946.419434, avg loss: 2.454241, ppl: 11.637595 +epoch: 2, batch: 3193, sum loss: 3721.473145, avg loss: 2.476030, ppl: 11.893950 +epoch: 2, batch: 3194, sum loss: 4440.077148, avg loss: 2.621061, ppl: 13.750303 +epoch: 2, batch: 3195, sum loss: 4736.639160, avg loss: 2.875919, ppl: 17.741728 +epoch: 2, batch: 3196, sum loss: 4404.329590, avg loss: 2.436023, ppl: 11.427503 +epoch: 2, batch: 3197, sum loss: 4827.410156, avg loss: 2.563680, ppl: 12.983514 +epoch: 2, batch: 3198, sum loss: 4353.198730, avg loss: 2.493241, ppl: 12.100431 +epoch: 2, batch: 3199, sum loss: 4009.364746, avg loss: 2.547246, ppl: 12.771875 +epoch: 2, batch: 3200, sum loss: 3953.930908, avg loss: 2.383322, ppl: 10.840854 +epoch: 2, batch: 3201, sum loss: 4718.816406, avg loss: 2.594182, ppl: 13.385627 +epoch: 2, batch: 3202, sum loss: 4153.109375, avg loss: 2.682887, ppl: 14.627266 +epoch: 2, batch: 3203, sum loss: 3329.707520, avg loss: 2.288459, ppl: 9.859731 +epoch: 2, batch: 3204, sum loss: 4663.720215, avg loss: 2.898521, ppl: 18.147285 +epoch: 2, batch: 3205, sum loss: 4122.893555, avg loss: 2.847302, ppl: 17.241205 +epoch: 2, batch: 3206, sum loss: 5214.904785, avg loss: 2.677056, ppl: 14.542215 +epoch: 2, batch: 3207, sum loss: 3589.854004, avg loss: 2.267754, ppl: 9.657681 +epoch: 2, batch: 3208, sum loss: 4140.325195, avg loss: 2.426920, ppl: 11.323947 +epoch: 2, batch: 3209, sum loss: 4299.754883, avg loss: 2.291980, ppl: 9.894512 +epoch: 2, batch: 3210, sum loss: 5017.298828, avg loss: 2.609100, ppl: 13.586812 +epoch: 2, batch: 3211, sum loss: 4738.459961, avg loss: 2.402870, ppl: 11.054860 +epoch: 2, batch: 3212, sum loss: 4071.659912, avg loss: 2.606697, ppl: 13.554202 +epoch: 2, batch: 3213, sum loss: 4064.088867, avg loss: 2.707587, ppl: 14.993061 +epoch: 2, batch: 3214, sum loss: 4759.695312, avg loss: 2.662022, ppl: 14.325227 +epoch: 2, batch: 3215, sum loss: 4067.403809, avg loss: 2.608983, ppl: 13.585221 +epoch: 2, batch: 3216, sum loss: 3077.022461, avg loss: 2.127955, ppl: 8.397674 +epoch: 2, batch: 3217, sum loss: 4474.368652, avg loss: 2.656989, ppl: 14.253303 +epoch: 2, batch: 3218, sum loss: 4394.506836, avg loss: 2.721057, ppl: 15.196376 +epoch: 2, batch: 3219, sum loss: 5338.101074, avg loss: 2.745937, ppl: 15.579206 +epoch: 2, batch: 3220, sum loss: 4343.239258, avg loss: 2.548849, ppl: 12.792376 +epoch: 2, batch: 3221, sum loss: 4236.578613, avg loss: 2.664515, ppl: 14.360980 +epoch: 2, batch: 3222, sum loss: 5362.739746, avg loss: 2.643046, ppl: 14.055951 +epoch: 2, batch: 3223, sum loss: 3610.558594, avg loss: 2.537287, ppl: 12.645311 +epoch: 2, batch: 3224, sum loss: 4647.982910, avg loss: 2.497573, ppl: 12.152962 +epoch: 2, batch: 3225, sum loss: 4212.830078, avg loss: 2.512123, ppl: 12.331079 +epoch: 2, batch: 3226, sum loss: 3551.372559, avg loss: 2.285311, ppl: 9.828737 +epoch: 2, batch: 3227, sum loss: 4096.799316, avg loss: 2.431335, ppl: 11.374056 +epoch: 2, batch: 3228, sum loss: 4465.878906, avg loss: 2.474171, ppl: 11.871863 +epoch: 2, batch: 3229, sum loss: 5334.322266, avg loss: 2.698190, ppl: 14.852827 +epoch: 2, batch: 3230, sum loss: 4244.085449, avg loss: 2.636078, ppl: 13.958350 +epoch: 2, batch: 3231, sum loss: 4575.530762, avg loss: 2.614589, ppl: 13.661600 +epoch: 2, batch: 3232, sum loss: 3966.924805, avg loss: 2.450232, ppl: 11.591030 +epoch: 2, batch: 3233, sum loss: 4132.900391, avg loss: 2.416901, ppl: 11.211061 +epoch: 2, batch: 3234, sum loss: 4163.043945, avg loss: 2.692784, ppl: 14.772743 +epoch: 2, batch: 3235, sum loss: 4335.004395, avg loss: 2.917230, ppl: 18.490005 +epoch: 2, batch: 3236, sum loss: 3018.145752, avg loss: 2.318084, ppl: 10.156199 +epoch: 2, batch: 3237, sum loss: 4953.910156, avg loss: 2.790935, ppl: 16.296249 +epoch: 2, batch: 3238, sum loss: 4543.754883, avg loss: 2.599402, ppl: 13.455691 +epoch: 2, batch: 3239, sum loss: 4446.766602, avg loss: 2.703202, ppl: 14.927446 +epoch: 2, batch: 3240, sum loss: 4647.463379, avg loss: 2.561998, ppl: 12.961684 +epoch: 2, batch: 3241, sum loss: 4169.990234, avg loss: 2.437166, ppl: 11.440569 +epoch: 2, batch: 3242, sum loss: 3839.262939, avg loss: 2.517550, ppl: 12.398178 +epoch: 2, batch: 3243, sum loss: 3864.760498, avg loss: 2.588587, ppl: 13.310950 +epoch: 2, batch: 3244, sum loss: 4207.537598, avg loss: 2.714540, ppl: 15.097671 +epoch: 2, batch: 3245, sum loss: 3752.998779, avg loss: 2.512047, ppl: 12.330148 +epoch: 2, batch: 3246, sum loss: 4552.887695, avg loss: 2.591285, ppl: 13.346911 +epoch: 2, batch: 3247, sum loss: 4817.953125, avg loss: 2.996239, ppl: 20.010145 +epoch: 2, batch: 3248, sum loss: 4674.777344, avg loss: 2.567148, ppl: 13.028620 +epoch: 2, batch: 3249, sum loss: 4832.580078, avg loss: 2.795015, ppl: 16.362867 +epoch: 2, batch: 3250, sum loss: 4339.300293, avg loss: 2.628286, ppl: 13.850013 +epoch: 2, batch: 3251, sum loss: 4428.681152, avg loss: 2.720320, ppl: 15.185181 +epoch: 2, batch: 3252, sum loss: 5611.618164, avg loss: 2.938020, ppl: 18.878429 +epoch: 2, batch: 3253, sum loss: 3832.766602, avg loss: 2.355726, ppl: 10.545785 +epoch: 2, batch: 3254, sum loss: 3830.715820, avg loss: 2.485864, ppl: 12.011490 +epoch: 2, batch: 3255, sum loss: 4769.137207, avg loss: 2.596155, ppl: 13.412075 +epoch: 2, batch: 3256, sum loss: 3293.832275, avg loss: 2.414833, ppl: 11.187902 +epoch: 2, batch: 3257, sum loss: 4191.871094, avg loss: 2.776074, ppl: 16.055857 +epoch: 2, batch: 3258, sum loss: 4402.975586, avg loss: 2.570330, ppl: 13.070139 +epoch: 2, batch: 3259, sum loss: 4697.825684, avg loss: 2.729707, ppl: 15.328395 +epoch: 2, batch: 3260, sum loss: 4315.336426, avg loss: 2.649071, ppl: 14.140892 +epoch: 2, batch: 3261, sum loss: 4422.817871, avg loss: 2.438158, ppl: 11.451921 +epoch: 2, batch: 3262, sum loss: 3876.351074, avg loss: 2.437957, ppl: 11.449620 +epoch: 2, batch: 3263, sum loss: 3380.322510, avg loss: 2.492863, ppl: 12.095860 +epoch: 2, batch: 3264, sum loss: 4369.344238, avg loss: 2.400739, ppl: 11.031322 +epoch: 2, batch: 3265, sum loss: 4090.957764, avg loss: 2.514418, ppl: 12.359409 +epoch: 2, batch: 3266, sum loss: 3648.963135, avg loss: 2.292062, ppl: 9.895324 +epoch: 2, batch: 3267, sum loss: 4014.435303, avg loss: 2.484180, ppl: 11.991286 +epoch: 2, batch: 3268, sum loss: 4799.124512, avg loss: 2.524526, ppl: 12.484981 +epoch: 2, batch: 3269, sum loss: 5430.554199, avg loss: 2.723448, ppl: 15.232748 +epoch: 2, batch: 3270, sum loss: 4302.220215, avg loss: 2.542683, ppl: 12.713740 +epoch: 2, batch: 3271, sum loss: 4934.138184, avg loss: 2.797131, ppl: 16.397528 +epoch: 2, batch: 3272, sum loss: 3627.060059, avg loss: 2.378400, ppl: 10.787630 +epoch: 2, batch: 3273, sum loss: 3580.591309, avg loss: 2.282085, ppl: 9.797086 +epoch: 2, batch: 3274, sum loss: 4803.542480, avg loss: 2.613461, ppl: 13.646206 +epoch: 2, batch: 3275, sum loss: 5147.997559, avg loss: 2.950142, ppl: 19.108665 +epoch: 2, batch: 3276, sum loss: 4215.697754, avg loss: 2.434005, ppl: 11.404471 +epoch: 2, batch: 3277, sum loss: 3790.159424, avg loss: 2.562650, ppl: 12.970142 +epoch: 2, batch: 3278, sum loss: 3482.290039, avg loss: 2.447147, ppl: 11.555331 +epoch: 2, batch: 3279, sum loss: 4224.706543, avg loss: 2.729139, ppl: 15.319685 +epoch: 2, batch: 3280, sum loss: 3839.556641, avg loss: 2.359900, ppl: 10.589888 +epoch: 2, batch: 3281, sum loss: 4565.944824, avg loss: 2.595762, ppl: 13.406796 +epoch: 2, batch: 3282, sum loss: 5272.982422, avg loss: 2.825821, ppl: 16.874796 +epoch: 2, batch: 3283, sum loss: 4785.549316, avg loss: 2.528024, ppl: 12.528722 +epoch: 2, batch: 3284, sum loss: 4199.924805, avg loss: 2.410979, ppl: 11.144861 +epoch: 2, batch: 3285, sum loss: 4356.547363, avg loss: 2.553662, ppl: 12.854087 +epoch: 2, batch: 3286, sum loss: 4261.860352, avg loss: 2.545914, ppl: 12.754886 +epoch: 2, batch: 3287, sum loss: 4421.098633, avg loss: 2.520581, ppl: 12.435817 +epoch: 2, batch: 3288, sum loss: 4464.503906, avg loss: 2.505333, ppl: 12.247642 +epoch: 2, batch: 3289, sum loss: 4488.070312, avg loss: 2.474129, ppl: 11.871365 +epoch: 2, batch: 3290, sum loss: 4410.284180, avg loss: 2.677768, ppl: 14.552575 +epoch: 2, batch: 3291, sum loss: 3553.103516, avg loss: 2.319258, ppl: 10.168129 +epoch: 2, batch: 3292, sum loss: 4306.753418, avg loss: 2.695090, ppl: 14.806849 +epoch: 2, batch: 3293, sum loss: 3956.993652, avg loss: 2.632730, ppl: 13.911700 +epoch: 2, batch: 3294, sum loss: 4826.538086, avg loss: 2.780264, ppl: 16.123276 +epoch: 2, batch: 3295, sum loss: 3645.450684, avg loss: 2.080737, ppl: 8.010369 +epoch: 2, batch: 3296, sum loss: 3991.253906, avg loss: 2.497656, ppl: 12.153970 +epoch: 2, batch: 3297, sum loss: 3701.648193, avg loss: 2.519842, ppl: 12.426635 +epoch: 2, batch: 3298, sum loss: 4341.815430, avg loss: 2.710247, ppl: 15.032986 +epoch: 2, batch: 3299, sum loss: 4521.741211, avg loss: 2.678757, ppl: 14.566971 +epoch: 2, batch: 3300, sum loss: 5155.464844, avg loss: 2.749581, ppl: 15.636085 +epoch: 2, batch: 3301, sum loss: 4177.085938, avg loss: 2.501249, ppl: 12.197721 +epoch: 2, batch: 3302, sum loss: 4036.384277, avg loss: 2.438903, ppl: 11.460463 +epoch: 2, batch: 3303, sum loss: 3819.988525, avg loss: 2.322181, ppl: 10.197896 +epoch: 2, batch: 3304, sum loss: 4821.816895, avg loss: 2.557993, ppl: 12.909883 +epoch: 2, batch: 3305, sum loss: 3530.862305, avg loss: 2.316839, ppl: 10.143558 +epoch: 2, batch: 3306, sum loss: 4116.935547, avg loss: 2.444736, ppl: 11.527509 +epoch: 2, batch: 3307, sum loss: 4408.734375, avg loss: 2.525048, ppl: 12.491501 +epoch: 2, batch: 3308, sum loss: 3462.459473, avg loss: 2.394509, ppl: 10.962810 +epoch: 2, batch: 3309, sum loss: 4983.194336, avg loss: 2.801121, ppl: 16.463091 +epoch: 2, batch: 3310, sum loss: 4904.916504, avg loss: 2.558642, ppl: 12.918264 +epoch: 2, batch: 3311, sum loss: 4387.749512, avg loss: 2.389842, ppl: 10.911768 +epoch: 2, batch: 3312, sum loss: 3666.307129, avg loss: 2.475562, ppl: 11.888385 +epoch: 2, batch: 3313, sum loss: 3976.892334, avg loss: 2.604383, ppl: 13.522876 +epoch: 2, batch: 3314, sum loss: 4924.398926, avg loss: 2.755679, ppl: 15.731725 +epoch: 2, batch: 3315, sum loss: 4633.887695, avg loss: 2.533564, ppl: 12.598322 +epoch: 2, batch: 3316, sum loss: 4800.945801, avg loss: 2.749683, ppl: 15.637669 +epoch: 2, batch: 3317, sum loss: 3967.353027, avg loss: 2.493622, ppl: 12.105045 +epoch: 2, batch: 3318, sum loss: 3883.604736, avg loss: 2.437919, ppl: 11.449189 +epoch: 2, batch: 3319, sum loss: 4067.552490, avg loss: 2.602401, ppl: 13.496100 +epoch: 2, batch: 3320, sum loss: 4267.020996, avg loss: 2.564316, ppl: 12.991763 +epoch: 2, batch: 3321, sum loss: 4706.927734, avg loss: 2.513042, ppl: 12.342422 +epoch: 2, batch: 3322, sum loss: 5470.272461, avg loss: 2.870027, ppl: 17.637499 +epoch: 2, batch: 3323, sum loss: 5248.175293, avg loss: 2.762198, ppl: 15.834605 +epoch: 2, batch: 3324, sum loss: 3909.902832, avg loss: 2.416504, ppl: 11.206609 +epoch: 2, batch: 3325, sum loss: 5295.346680, avg loss: 2.738028, ppl: 15.456479 +epoch: 2, batch: 3326, sum loss: 3490.775879, avg loss: 2.564861, ppl: 12.998852 +epoch: 2, batch: 3327, sum loss: 3794.512451, avg loss: 2.570808, ppl: 13.076385 +epoch: 2, batch: 3328, sum loss: 4724.635742, avg loss: 2.712190, ppl: 15.062232 +epoch: 2, batch: 3329, sum loss: 3414.564453, avg loss: 2.241999, ppl: 9.412127 +epoch: 2, batch: 3330, sum loss: 4427.172852, avg loss: 2.478820, ppl: 11.927186 +epoch: 2, batch: 3331, sum loss: 5197.329102, avg loss: 2.623589, ppl: 13.785104 +epoch: 2, batch: 3332, sum loss: 4836.767090, avg loss: 2.726475, ppl: 15.278934 +epoch: 2, batch: 3333, sum loss: 5096.538086, avg loss: 2.772871, ppl: 16.004524 +epoch: 2, batch: 3334, sum loss: 3881.137695, avg loss: 2.521857, ppl: 12.451695 +epoch: 2, batch: 3335, sum loss: 3870.889160, avg loss: 2.538288, ppl: 12.657980 +epoch: 2, batch: 3336, sum loss: 3796.155273, avg loss: 2.525719, ppl: 12.499876 +epoch: 2, batch: 3337, sum loss: 4306.932617, avg loss: 2.634209, ppl: 13.932293 +epoch: 2, batch: 3338, sum loss: 3028.905273, avg loss: 2.358961, ppl: 10.579949 +epoch: 2, batch: 3339, sum loss: 4439.317871, avg loss: 2.636175, ppl: 13.959701 +epoch: 2, batch: 3340, sum loss: 4443.059570, avg loss: 2.778649, ppl: 16.097256 +epoch: 2, batch: 3341, sum loss: 4049.771240, avg loss: 2.349055, ppl: 10.475669 +epoch: 2, batch: 3342, sum loss: 5068.036621, avg loss: 2.723287, ppl: 15.230300 +epoch: 2, batch: 3343, sum loss: 3720.818115, avg loss: 2.520880, ppl: 12.439532 +epoch: 2, batch: 3344, sum loss: 4401.870605, avg loss: 2.707177, ppl: 14.986914 +epoch: 2, batch: 3345, sum loss: 4627.732422, avg loss: 2.635383, ppl: 13.948655 +epoch: 2, batch: 3346, sum loss: 4638.970703, avg loss: 2.684590, ppl: 14.652187 +epoch: 2, batch: 3347, sum loss: 4350.606934, avg loss: 2.541242, ppl: 12.695434 +epoch: 2, batch: 3348, sum loss: 4813.784180, avg loss: 2.813433, ppl: 16.667046 +epoch: 2, batch: 3349, sum loss: 3320.466553, avg loss: 2.388825, ppl: 10.900678 +epoch: 2, batch: 3350, sum loss: 4780.424316, avg loss: 2.779316, ppl: 16.108006 +epoch: 2, batch: 3351, sum loss: 3949.342285, avg loss: 2.442389, ppl: 11.500477 +epoch: 2, batch: 3352, sum loss: 3865.613770, avg loss: 2.428149, ppl: 11.337879 +epoch: 2, batch: 3353, sum loss: 3855.823242, avg loss: 2.420479, ppl: 11.251248 +epoch: 2, batch: 3354, sum loss: 4091.324707, avg loss: 2.520841, ppl: 12.439052 +epoch: 2, batch: 3355, sum loss: 3758.155518, avg loss: 2.353260, ppl: 10.519804 +epoch: 2, batch: 3356, sum loss: 4215.519043, avg loss: 2.649603, ppl: 14.148426 +epoch: 2, batch: 3357, sum loss: 4037.030273, avg loss: 2.597832, ppl: 13.434577 +epoch: 2, batch: 3358, sum loss: 3691.406250, avg loss: 2.657600, ppl: 14.262018 +epoch: 2, batch: 3359, sum loss: 4313.976074, avg loss: 2.758297, ppl: 15.772954 +epoch: 2, batch: 3360, sum loss: 3274.865723, avg loss: 2.373091, ppl: 10.730512 +epoch: 2, batch: 3361, sum loss: 3916.440674, avg loss: 2.410117, ppl: 11.135268 +epoch: 2, batch: 3362, sum loss: 4832.280762, avg loss: 2.663882, ppl: 14.351889 +epoch: 2, batch: 3363, sum loss: 3867.602539, avg loss: 2.482415, ppl: 11.970137 +epoch: 2, batch: 3364, sum loss: 4857.737305, avg loss: 2.722947, ppl: 15.225126 +epoch: 2, batch: 3365, sum loss: 3988.811279, avg loss: 2.313696, ppl: 10.111728 +epoch: 2, batch: 3366, sum loss: 5449.676758, avg loss: 2.757934, ppl: 15.767228 +epoch: 2, batch: 3367, sum loss: 4566.197266, avg loss: 2.689162, ppl: 14.719339 +epoch: 2, batch: 3368, sum loss: 4177.248047, avg loss: 2.402098, ppl: 11.046327 +epoch: 2, batch: 3369, sum loss: 4212.829590, avg loss: 2.527192, ppl: 12.518307 +epoch: 2, batch: 3370, sum loss: 3999.605713, avg loss: 2.529795, ppl: 12.550929 +epoch: 2, batch: 3371, sum loss: 4801.602539, avg loss: 2.769090, ppl: 15.944121 +epoch: 2, batch: 3372, sum loss: 4197.518555, avg loss: 2.623449, ppl: 13.783181 +epoch: 2, batch: 3373, sum loss: 4407.010742, avg loss: 2.460642, ppl: 11.712333 +epoch: 2, batch: 3374, sum loss: 3903.627930, avg loss: 2.441293, ppl: 11.487887 +epoch: 2, batch: 3375, sum loss: 4149.686523, avg loss: 2.397277, ppl: 10.993199 +epoch: 2, batch: 3376, sum loss: 5133.152832, avg loss: 2.916564, ppl: 18.477688 +epoch: 2, batch: 3377, sum loss: 3613.268066, avg loss: 2.555352, ppl: 12.875834 +epoch: 2, batch: 3378, sum loss: 3860.635254, avg loss: 2.364137, ppl: 10.634853 +epoch: 2, batch: 3379, sum loss: 3083.691162, avg loss: 2.325559, ppl: 10.232397 +epoch: 2, batch: 3380, sum loss: 4190.865234, avg loss: 2.630800, ppl: 13.884880 +epoch: 2, batch: 3381, sum loss: 4126.206055, avg loss: 2.273392, ppl: 9.712289 +epoch: 2, batch: 3382, sum loss: 4527.111328, avg loss: 2.658315, ppl: 14.272226 +epoch: 2, batch: 3383, sum loss: 4285.090332, avg loss: 2.549132, ppl: 12.795987 +epoch: 2, batch: 3384, sum loss: 4725.683105, avg loss: 2.478072, ppl: 11.918263 +epoch: 2, batch: 3385, sum loss: 4234.111816, avg loss: 2.451715, ppl: 11.608237 +epoch: 2, batch: 3386, sum loss: 3860.312012, avg loss: 2.200862, ppl: 9.032796 +epoch: 2, batch: 3387, sum loss: 4447.062500, avg loss: 2.548460, ppl: 12.787393 +epoch: 2, batch: 3388, sum loss: 3719.959961, avg loss: 2.195962, ppl: 8.988646 +epoch: 2, batch: 3389, sum loss: 4042.121582, avg loss: 2.535835, ppl: 12.626973 +epoch: 2, batch: 3390, sum loss: 5885.916992, avg loss: 2.840693, ppl: 17.127640 +epoch: 2, batch: 3391, sum loss: 3498.421875, avg loss: 2.424409, ppl: 11.295548 +epoch: 2, batch: 3392, sum loss: 4631.349121, avg loss: 2.641956, ppl: 14.040645 +epoch: 2, batch: 3393, sum loss: 4164.390137, avg loss: 2.617467, ppl: 13.700977 +epoch: 2, batch: 3394, sum loss: 4342.252441, avg loss: 2.604831, ppl: 13.528932 +epoch: 2, batch: 3395, sum loss: 5134.143066, avg loss: 2.788779, ppl: 16.261162 +epoch: 2, batch: 3396, sum loss: 4581.863770, avg loss: 2.696800, ppl: 14.832200 +epoch: 2, batch: 3397, sum loss: 3765.291016, avg loss: 2.386116, ppl: 10.871188 +epoch: 2, batch: 3398, sum loss: 4077.530762, avg loss: 2.516994, ppl: 12.391298 +epoch: 2, batch: 3399, sum loss: 4204.839355, avg loss: 2.560804, ppl: 12.946218 +epoch: 2, batch: 3400, sum loss: 3565.464844, avg loss: 2.443773, ppl: 11.516411 +epoch: 2, batch: 3401, sum loss: 4154.894043, avg loss: 2.593567, ppl: 13.377399 +epoch: 2, batch: 3402, sum loss: 3278.483154, avg loss: 2.377435, ppl: 10.777226 +epoch: 2, batch: 3403, sum loss: 5316.842773, avg loss: 2.677161, ppl: 14.543752 +epoch: 2, batch: 3404, sum loss: 4769.925781, avg loss: 2.744491, ppl: 15.556699 +epoch: 2, batch: 3405, sum loss: 4558.791992, avg loss: 2.641247, ppl: 14.030686 +epoch: 2, batch: 3406, sum loss: 4365.208008, avg loss: 2.557240, ppl: 12.900158 +epoch: 2, batch: 3407, sum loss: 3697.629395, avg loss: 2.460166, ppl: 11.706752 +epoch: 2, batch: 3408, sum loss: 4955.873535, avg loss: 2.753263, ppl: 15.693761 +epoch: 2, batch: 3409, sum loss: 4550.821289, avg loss: 2.642753, ppl: 14.051840 +epoch: 2, batch: 3410, sum loss: 3402.664062, avg loss: 2.276029, ppl: 9.737938 +epoch: 2, batch: 3411, sum loss: 4177.145020, avg loss: 2.650473, ppl: 14.160730 +epoch: 2, batch: 3412, sum loss: 4345.875000, avg loss: 2.483357, ppl: 11.981421 +epoch: 2, batch: 3413, sum loss: 3973.938232, avg loss: 2.406989, ppl: 11.100486 +epoch: 2, batch: 3414, sum loss: 4780.056152, avg loss: 2.511853, ppl: 12.327752 +epoch: 2, batch: 3415, sum loss: 4263.352051, avg loss: 2.656294, ppl: 14.243406 +epoch: 2, batch: 3416, sum loss: 4895.274414, avg loss: 2.659030, ppl: 14.282432 +epoch: 2, batch: 3417, sum loss: 5189.866699, avg loss: 2.790251, ppl: 16.285107 +epoch: 2, batch: 3418, sum loss: 3213.884766, avg loss: 2.247472, ppl: 9.463781 +epoch: 2, batch: 3419, sum loss: 3829.552734, avg loss: 2.494823, ppl: 12.119582 +epoch: 2, batch: 3420, sum loss: 3370.114258, avg loss: 2.172865, ppl: 8.783416 +epoch: 2, batch: 3421, sum loss: 3567.276855, avg loss: 2.127177, ppl: 8.391150 +epoch: 2, batch: 3422, sum loss: 4082.018555, avg loss: 2.384356, ppl: 10.852077 +epoch: 2, batch: 3423, sum loss: 3637.368652, avg loss: 2.563333, ppl: 12.978998 +epoch: 2, batch: 3424, sum loss: 4286.627441, avg loss: 2.739059, ppl: 15.472418 +epoch: 2, batch: 3425, sum loss: 4521.822266, avg loss: 2.423270, ppl: 11.282696 +epoch: 2, batch: 3426, sum loss: 3814.947998, avg loss: 2.434555, ppl: 11.410743 +epoch: 2, batch: 3427, sum loss: 4620.651367, avg loss: 2.724441, ppl: 15.247885 +epoch: 2, batch: 3428, sum loss: 4603.131836, avg loss: 2.438099, ppl: 11.451256 +epoch: 2, batch: 3429, sum loss: 4901.764160, avg loss: 2.767795, ppl: 15.923478 +epoch: 2, batch: 3430, sum loss: 4257.154785, avg loss: 2.475090, ppl: 11.882777 +epoch: 2, batch: 3431, sum loss: 3832.093994, avg loss: 2.469133, ppl: 11.812200 +epoch: 2, batch: 3432, sum loss: 4519.708984, avg loss: 2.754241, ppl: 15.709105 +epoch: 2, batch: 3433, sum loss: 4610.311523, avg loss: 2.716742, ppl: 15.130949 +epoch: 2, batch: 3434, sum loss: 3408.528320, avg loss: 2.388597, ppl: 10.898198 +epoch: 2, batch: 3435, sum loss: 4390.451172, avg loss: 2.534903, ppl: 12.615202 +epoch: 2, batch: 3436, sum loss: 5238.110352, avg loss: 2.672505, ppl: 14.476189 +epoch: 2, batch: 3437, sum loss: 4058.139160, avg loss: 2.555503, ppl: 12.877780 +epoch: 2, batch: 3438, sum loss: 4717.961914, avg loss: 2.847291, ppl: 17.241020 +epoch: 2, batch: 3439, sum loss: 3981.298096, avg loss: 2.417303, ppl: 11.215569 +epoch: 2, batch: 3440, sum loss: 4796.581543, avg loss: 2.853410, ppl: 17.346827 +epoch: 2, batch: 3441, sum loss: 5707.855957, avg loss: 2.912171, ppl: 18.396702 +epoch: 2, batch: 3442, sum loss: 4563.552734, avg loss: 2.838030, ppl: 17.082087 +epoch: 2, batch: 3443, sum loss: 4095.762207, avg loss: 2.384029, ppl: 10.848525 +epoch: 2, batch: 3444, sum loss: 4848.383789, avg loss: 2.558514, ppl: 12.916608 +epoch: 2, batch: 3445, sum loss: 4677.898438, avg loss: 2.727638, ppl: 15.296706 +epoch: 2, batch: 3446, sum loss: 3834.126221, avg loss: 2.378490, ppl: 10.788602 +epoch: 2, batch: 3447, sum loss: 3568.235352, avg loss: 2.355271, ppl: 10.540983 +epoch: 2, batch: 3448, sum loss: 5068.057617, avg loss: 2.398513, ppl: 11.006795 +epoch: 2, batch: 3449, sum loss: 3634.283936, avg loss: 2.316306, ppl: 10.138153 +epoch: 2, batch: 3450, sum loss: 4433.479980, avg loss: 2.645275, ppl: 14.087314 +epoch: 2, batch: 3451, sum loss: 3554.996338, avg loss: 2.349634, ppl: 10.481734 +epoch: 2, batch: 3452, sum loss: 3818.232422, avg loss: 2.416603, ppl: 11.207720 +epoch: 2, batch: 3453, sum loss: 4199.580566, avg loss: 2.618192, ppl: 13.710917 +epoch: 2, batch: 3454, sum loss: 4360.038574, avg loss: 2.592175, ppl: 13.358795 +epoch: 2, batch: 3455, sum loss: 4511.858887, avg loss: 2.791992, ppl: 16.313482 +epoch: 2, batch: 3456, sum loss: 3501.965088, avg loss: 2.382289, ppl: 10.829665 +epoch: 2, batch: 3457, sum loss: 4428.426270, avg loss: 2.592755, ppl: 13.366550 +epoch: 2, batch: 3458, sum loss: 3299.388672, avg loss: 2.294429, ppl: 9.918769 +epoch: 2, batch: 3459, sum loss: 4061.824463, avg loss: 2.269176, ppl: 9.671424 +epoch: 2, batch: 3460, sum loss: 5148.964844, avg loss: 2.569344, ppl: 13.057254 +epoch: 2, batch: 3461, sum loss: 3754.042969, avg loss: 2.531384, ppl: 12.570898 +epoch: 2, batch: 3462, sum loss: 3601.579834, avg loss: 2.329612, ppl: 10.273952 +epoch: 2, batch: 3463, sum loss: 4509.888672, avg loss: 2.640450, ppl: 14.019511 +epoch: 2, batch: 3464, sum loss: 3635.604736, avg loss: 2.200729, ppl: 9.031598 +epoch: 2, batch: 3465, sum loss: 4890.604492, avg loss: 2.482540, ppl: 11.971638 +epoch: 2, batch: 3466, sum loss: 4890.550293, avg loss: 2.556482, ppl: 12.890390 +epoch: 2, batch: 3467, sum loss: 3422.384033, avg loss: 2.191027, ppl: 8.944393 +epoch: 2, batch: 3468, sum loss: 3969.764648, avg loss: 2.707889, ppl: 14.997576 +epoch: 2, batch: 3469, sum loss: 4346.380371, avg loss: 2.704655, ppl: 14.949158 +epoch: 2, batch: 3470, sum loss: 4568.664062, avg loss: 2.428849, ppl: 11.345810 +epoch: 2, batch: 3471, sum loss: 4199.303223, avg loss: 2.487739, ppl: 12.034038 +epoch: 2, batch: 3472, sum loss: 4862.916504, avg loss: 2.742762, ppl: 15.529817 +epoch: 2, batch: 3473, sum loss: 3851.614990, avg loss: 2.829989, ppl: 16.945274 +epoch: 2, batch: 3474, sum loss: 3537.136230, avg loss: 2.347137, ppl: 10.455597 +epoch: 2, batch: 3475, sum loss: 3976.150635, avg loss: 2.654306, ppl: 14.215117 +epoch: 2, batch: 3476, sum loss: 4707.989258, avg loss: 2.589653, ppl: 13.325148 +epoch: 2, batch: 3477, sum loss: 3939.209961, avg loss: 2.598423, ppl: 13.442529 +epoch: 2, batch: 3478, sum loss: 3847.323242, avg loss: 2.389642, ppl: 10.909585 +epoch: 2, batch: 3479, sum loss: 4243.890137, avg loss: 2.518629, ppl: 12.411570 +epoch: 2, batch: 3480, sum loss: 4679.384766, avg loss: 2.911876, ppl: 18.391272 +epoch: 2, batch: 3481, sum loss: 5267.662109, avg loss: 2.598748, ppl: 13.446892 +epoch: 2, batch: 3482, sum loss: 3883.597412, avg loss: 2.534985, ppl: 12.616245 +epoch: 2, batch: 3483, sum loss: 4171.915527, avg loss: 2.632123, ppl: 13.903258 +epoch: 2, batch: 3484, sum loss: 3828.328125, avg loss: 2.586708, ppl: 13.285966 +epoch: 2, batch: 3485, sum loss: 3729.330322, avg loss: 2.364826, ppl: 10.642186 +epoch: 2, batch: 3486, sum loss: 4249.258301, avg loss: 2.495161, ppl: 12.123680 +epoch: 2, batch: 3487, sum loss: 3473.690430, avg loss: 2.420690, ppl: 11.253622 +epoch: 2, batch: 3488, sum loss: 4419.902832, avg loss: 2.615327, ppl: 13.671688 +epoch: 2, batch: 3489, sum loss: 4465.387695, avg loss: 2.605244, ppl: 13.534523 +epoch: 2, batch: 3490, sum loss: 5032.787598, avg loss: 2.711631, ppl: 15.053813 +epoch: 2, batch: 3491, sum loss: 4651.272461, avg loss: 2.605755, ppl: 13.541447 +epoch: 2, batch: 3492, sum loss: 4088.449707, avg loss: 2.473351, ppl: 11.862133 +epoch: 2, batch: 3493, sum loss: 4607.928223, avg loss: 2.707361, ppl: 14.989662 +epoch: 2, batch: 3494, sum loss: 4580.167480, avg loss: 2.474429, ppl: 11.874921 +epoch: 2, batch: 3495, sum loss: 3481.297607, avg loss: 2.342731, ppl: 10.409621 +epoch: 2, batch: 3496, sum loss: 4451.655273, avg loss: 2.606355, ppl: 13.549579 +epoch: 2, batch: 3497, sum loss: 3454.781494, avg loss: 2.415931, ppl: 11.200195 +epoch: 2, batch: 3498, sum loss: 3299.075195, avg loss: 2.217120, ppl: 9.180856 +epoch: 2, batch: 3499, sum loss: 3744.149414, avg loss: 2.507803, ppl: 12.277923 +epoch: 2, batch: 3500, sum loss: 4517.556152, avg loss: 2.899587, ppl: 18.166639 +epoch: 2, batch: 3501, sum loss: 3798.678223, avg loss: 2.411859, ppl: 11.154679 +epoch: 2, batch: 3502, sum loss: 5473.817383, avg loss: 2.728723, ppl: 15.313313 +epoch: 2, batch: 3503, sum loss: 3900.821777, avg loss: 2.356992, ppl: 10.559142 +epoch: 2, batch: 3504, sum loss: 4425.760254, avg loss: 2.631249, ppl: 13.891105 +epoch: 2, batch: 3505, sum loss: 5071.303711, avg loss: 2.700375, ppl: 14.885306 +epoch: 2, batch: 3506, sum loss: 4509.519043, avg loss: 2.643329, ppl: 14.059930 +epoch: 2, batch: 3507, sum loss: 4185.648438, avg loss: 2.550669, ppl: 12.815677 +epoch: 2, batch: 3508, sum loss: 4799.078125, avg loss: 2.575995, ppl: 13.144385 +epoch: 2, batch: 3509, sum loss: 4242.333984, avg loss: 2.574232, ppl: 13.121234 +epoch: 2, batch: 3510, sum loss: 3940.112305, avg loss: 2.624992, ppl: 13.804459 +epoch: 2, batch: 3511, sum loss: 3790.061035, avg loss: 2.799159, ppl: 16.430820 +epoch: 2, batch: 3512, sum loss: 4090.547363, avg loss: 2.483635, ppl: 11.984752 +epoch: 2, batch: 3513, sum loss: 4617.506348, avg loss: 2.559593, ppl: 12.930556 +epoch: 2, batch: 3514, sum loss: 3899.993164, avg loss: 2.283368, ppl: 9.809668 +epoch: 2, batch: 3515, sum loss: 4064.229004, avg loss: 2.439513, ppl: 11.467457 +epoch: 2, batch: 3516, sum loss: 4081.091309, avg loss: 2.436473, ppl: 11.432643 +epoch: 2, batch: 3517, sum loss: 3660.263672, avg loss: 2.461509, ppl: 11.722485 +epoch: 2, batch: 3518, sum loss: 4710.662598, avg loss: 2.557363, ppl: 12.901751 +epoch: 2, batch: 3519, sum loss: 3588.336182, avg loss: 2.337678, ppl: 10.357162 +epoch: 2, batch: 3520, sum loss: 4114.359375, avg loss: 2.519510, ppl: 12.422503 +epoch: 2, batch: 3521, sum loss: 4180.377441, avg loss: 2.373866, ppl: 10.738824 +epoch: 2, batch: 3522, sum loss: 3808.937988, avg loss: 2.415306, ppl: 11.193198 +epoch: 2, batch: 3523, sum loss: 3771.827637, avg loss: 2.335497, ppl: 10.334594 +epoch: 2, batch: 3524, sum loss: 4222.364258, avg loss: 2.590408, ppl: 13.335206 +epoch: 2, batch: 3525, sum loss: 4209.407227, avg loss: 2.375512, ppl: 10.756520 +epoch: 2, batch: 3526, sum loss: 4055.844971, avg loss: 2.654349, ppl: 14.215727 +epoch: 2, batch: 3527, sum loss: 4137.245605, avg loss: 2.408176, ppl: 11.113668 +epoch: 2, batch: 3528, sum loss: 3502.826416, avg loss: 2.286440, ppl: 9.839849 +epoch: 2, batch: 3529, sum loss: 4490.737793, avg loss: 2.743273, ppl: 15.537757 +epoch: 2, batch: 3530, sum loss: 4827.591797, avg loss: 2.738282, ppl: 15.460408 +epoch: 2, batch: 3531, sum loss: 4296.770508, avg loss: 2.441347, ppl: 11.488504 +epoch: 2, batch: 3532, sum loss: 3292.365234, avg loss: 2.303965, ppl: 10.013804 +epoch: 2, batch: 3533, sum loss: 5909.140625, avg loss: 2.824637, ppl: 16.854824 +epoch: 2, batch: 3534, sum loss: 4188.642090, avg loss: 2.459567, ppl: 11.699739 +epoch: 2, batch: 3535, sum loss: 3770.118652, avg loss: 2.438628, ppl: 11.457310 +epoch: 2, batch: 3536, sum loss: 5212.916504, avg loss: 2.753786, ppl: 15.701961 +epoch: 2, batch: 3537, sum loss: 5272.672363, avg loss: 2.590994, ppl: 13.343023 +epoch: 2, batch: 3538, sum loss: 4215.356934, avg loss: 2.534791, ppl: 12.613791 +epoch: 2, batch: 3539, sum loss: 5305.875000, avg loss: 2.967492, ppl: 19.443089 +epoch: 2, batch: 3540, sum loss: 4111.048340, avg loss: 2.383217, ppl: 10.839714 +epoch: 2, batch: 3541, sum loss: 4898.652344, avg loss: 2.615404, ppl: 13.672744 +epoch: 2, batch: 3542, sum loss: 4210.190430, avg loss: 2.651253, ppl: 14.171791 +epoch: 2, batch: 3543, sum loss: 3605.698730, avg loss: 2.320270, ppl: 10.178418 +epoch: 2, batch: 3544, sum loss: 3965.489258, avg loss: 2.415036, ppl: 11.190175 +epoch: 2, batch: 3545, sum loss: 4800.294434, avg loss: 2.674259, ppl: 14.501595 +epoch: 2, batch: 3546, sum loss: 4356.746582, avg loss: 2.365226, ppl: 10.646447 +epoch: 2, batch: 3547, sum loss: 4477.127441, avg loss: 2.794711, ppl: 16.357903 +epoch: 2, batch: 3548, sum loss: 4324.344238, avg loss: 2.616058, ppl: 13.681685 +epoch: 2, batch: 3549, sum loss: 3679.568359, avg loss: 2.322960, ppl: 10.205838 +epoch: 2, batch: 3550, sum loss: 3484.452393, avg loss: 2.355952, ppl: 10.548162 +epoch: 2, batch: 3551, sum loss: 4243.719238, avg loss: 2.529034, ppl: 12.541387 +epoch: 2, batch: 3552, sum loss: 3444.204590, avg loss: 2.306902, ppl: 10.043262 +epoch: 2, batch: 3553, sum loss: 5055.761719, avg loss: 2.546983, ppl: 12.768526 +epoch: 2, batch: 3554, sum loss: 4506.345215, avg loss: 2.563336, ppl: 12.979045 +epoch: 2, batch: 3555, sum loss: 4732.628906, avg loss: 2.721466, ppl: 15.202590 +epoch: 2, batch: 3556, sum loss: 3617.154053, avg loss: 2.422742, ppl: 11.276739 +epoch: 2, batch: 3557, sum loss: 3894.725342, avg loss: 2.382095, ppl: 10.827561 +epoch: 2, batch: 3558, sum loss: 3882.491699, avg loss: 2.399562, ppl: 11.018351 +epoch: 2, batch: 3559, sum loss: 4126.128906, avg loss: 2.500684, ppl: 12.190833 +epoch: 2, batch: 3560, sum loss: 4347.447754, avg loss: 2.491374, ppl: 12.077860 +epoch: 2, batch: 3561, sum loss: 3357.458496, avg loss: 2.410236, ppl: 11.136588 +epoch: 2, batch: 3562, sum loss: 4223.979980, avg loss: 2.480317, ppl: 11.945052 +epoch: 2, batch: 3563, sum loss: 4330.068359, avg loss: 2.526294, ppl: 12.507071 +epoch: 2, batch: 3564, sum loss: 5018.381348, avg loss: 2.561706, ppl: 12.957902 +epoch: 2, batch: 3565, sum loss: 4780.103516, avg loss: 2.674932, ppl: 14.511363 +epoch: 2, batch: 3566, sum loss: 4975.055176, avg loss: 2.794975, ppl: 16.362217 +epoch: 2, batch: 3567, sum loss: 3948.113281, avg loss: 2.702336, ppl: 14.914536 +epoch: 2, batch: 3568, sum loss: 4353.854492, avg loss: 2.443241, ppl: 11.510281 +epoch: 2, batch: 3569, sum loss: 3682.453125, avg loss: 2.368137, ppl: 10.677483 +epoch: 2, batch: 3570, sum loss: 4500.853516, avg loss: 2.577809, ppl: 13.168250 +epoch: 2, batch: 3571, sum loss: 4334.076660, avg loss: 2.678663, ppl: 14.565606 +epoch: 2, batch: 3572, sum loss: 4349.296875, avg loss: 2.486733, ppl: 12.021941 +epoch: 2, batch: 3573, sum loss: 4598.086914, avg loss: 2.751698, ppl: 15.669212 +epoch: 2, batch: 3574, sum loss: 3835.924561, avg loss: 2.327624, ppl: 10.253551 +epoch: 2, batch: 3575, sum loss: 3324.986572, avg loss: 2.561623, ppl: 12.956827 +epoch: 2, batch: 3576, sum loss: 5322.925781, avg loss: 2.768032, ppl: 15.927259 +epoch: 2, batch: 3577, sum loss: 4279.609375, avg loss: 2.571881, ppl: 13.090422 +epoch: 2, batch: 3578, sum loss: 4030.906006, avg loss: 2.431186, ppl: 11.372358 +epoch: 2, batch: 3579, sum loss: 3922.528076, avg loss: 2.601146, ppl: 13.479177 +epoch: 2, batch: 3580, sum loss: 4361.710938, avg loss: 2.529995, ppl: 12.553443 +epoch: 2, batch: 3581, sum loss: 4356.284668, avg loss: 2.600767, ppl: 13.474071 +epoch: 2, batch: 3582, sum loss: 4356.371094, avg loss: 2.648250, ppl: 14.129289 +epoch: 2, batch: 3583, sum loss: 4463.527832, avg loss: 2.733330, ppl: 15.384027 +epoch: 2, batch: 3584, sum loss: 3975.607910, avg loss: 2.682597, ppl: 14.623015 +epoch: 2, batch: 3585, sum loss: 4264.148438, avg loss: 2.460559, ppl: 11.711352 +epoch: 2, batch: 3586, sum loss: 4396.388672, avg loss: 2.789587, ppl: 16.274294 +epoch: 2, batch: 3587, sum loss: 4549.245117, avg loss: 2.576016, ppl: 13.144670 +epoch: 2, batch: 3588, sum loss: 4907.550293, avg loss: 2.652730, ppl: 14.192732 +epoch: 2, batch: 3589, sum loss: 4657.092285, avg loss: 2.547643, ppl: 12.776958 +epoch: 2, batch: 3590, sum loss: 4555.606934, avg loss: 2.601717, ppl: 13.486881 +epoch: 2, batch: 3591, sum loss: 4819.602539, avg loss: 2.803725, ppl: 16.506014 +epoch: 2, batch: 3592, sum loss: 3695.216553, avg loss: 2.530970, ppl: 12.565693 +epoch: 2, batch: 3593, sum loss: 4436.735840, avg loss: 2.748907, ppl: 15.625546 +epoch: 2, batch: 3594, sum loss: 4009.686768, avg loss: 2.462953, ppl: 11.739423 +epoch: 2, batch: 3595, sum loss: 3123.711914, avg loss: 2.384513, ppl: 10.853774 +epoch: 2, batch: 3596, sum loss: 3997.847656, avg loss: 2.455680, ppl: 11.654360 +epoch: 2, batch: 3597, sum loss: 4417.367676, avg loss: 2.548971, ppl: 12.793938 +epoch: 2, batch: 3598, sum loss: 4757.605957, avg loss: 2.706260, ppl: 14.973178 +epoch: 2, batch: 3599, sum loss: 4112.286133, avg loss: 2.247151, ppl: 9.460743 +epoch: 2, batch: 3600, sum loss: 3450.378906, avg loss: 2.452295, ppl: 11.614970 +epoch: 2, batch: 3601, sum loss: 4831.465820, avg loss: 2.749838, ppl: 15.640100 +epoch: 2, batch: 3602, sum loss: 5243.474121, avg loss: 2.846620, ppl: 17.229448 +epoch: 2, batch: 3603, sum loss: 3878.432129, avg loss: 2.471913, ppl: 11.845089 +epoch: 2, batch: 3604, sum loss: 3106.680176, avg loss: 2.346435, ppl: 10.448256 +epoch: 2, batch: 3605, sum loss: 4589.300781, avg loss: 2.584066, ppl: 13.250906 +epoch: 2, batch: 3606, sum loss: 3926.228271, avg loss: 2.596712, ppl: 13.419540 +epoch: 2, batch: 3607, sum loss: 3512.638672, avg loss: 2.399343, ppl: 11.015940 +epoch: 2, batch: 3608, sum loss: 5035.751953, avg loss: 2.650396, ppl: 14.159643 +epoch: 2, batch: 3609, sum loss: 4462.316406, avg loss: 2.516817, ppl: 12.389100 +epoch: 2, batch: 3610, sum loss: 4460.387695, avg loss: 2.653413, ppl: 14.202434 +epoch: 2, batch: 3611, sum loss: 4177.456543, avg loss: 2.588263, ppl: 13.306639 +epoch: 2, batch: 3612, sum loss: 4229.172363, avg loss: 2.711008, ppl: 15.044434 +epoch: 2, batch: 3613, sum loss: 4082.068848, avg loss: 2.467998, ppl: 11.798805 +epoch: 2, batch: 3614, sum loss: 4228.532227, avg loss: 2.502090, ppl: 12.207982 +epoch: 2, batch: 3615, sum loss: 4915.385254, avg loss: 2.702246, ppl: 14.913188 +epoch: 2, batch: 3616, sum loss: 4252.068359, avg loss: 2.386122, ppl: 10.871248 +epoch: 2, batch: 3617, sum loss: 3846.069824, avg loss: 2.505583, ppl: 12.250697 +epoch: 2, batch: 3618, sum loss: 5090.511719, avg loss: 2.595876, ppl: 13.408321 +epoch: 2, batch: 3619, sum loss: 3856.651855, avg loss: 2.776567, ppl: 16.063784 +epoch: 2, batch: 3620, sum loss: 4777.814941, avg loss: 2.633856, ppl: 13.927371 +epoch: 2, batch: 3621, sum loss: 3864.648193, avg loss: 2.426019, ppl: 11.313752 +epoch: 2, batch: 3622, sum loss: 3859.856934, avg loss: 2.747229, ppl: 15.599348 +epoch: 2, batch: 3623, sum loss: 3797.340820, avg loss: 2.572724, ppl: 13.101465 +epoch: 2, batch: 3624, sum loss: 3973.901855, avg loss: 2.505613, ppl: 12.251064 +epoch: 2, batch: 3625, sum loss: 3997.129639, avg loss: 2.545943, ppl: 12.755245 +epoch: 2, batch: 3626, sum loss: 5176.761719, avg loss: 2.596169, ppl: 13.412264 +epoch: 2, batch: 3627, sum loss: 4865.278320, avg loss: 2.515656, ppl: 12.374721 +epoch: 2, batch: 3628, sum loss: 4841.811035, avg loss: 2.871774, ppl: 17.668337 +epoch: 2, batch: 3629, sum loss: 4982.652832, avg loss: 2.696241, ppl: 14.823899 +epoch: 2, batch: 3630, sum loss: 4172.758301, avg loss: 2.510685, ppl: 12.313361 +epoch: 2, batch: 3631, sum loss: 4241.475098, avg loss: 2.556646, ppl: 12.892499 +epoch: 2, batch: 3632, sum loss: 3585.828369, avg loss: 2.509327, ppl: 12.296654 +epoch: 2, batch: 3633, sum loss: 4092.116943, avg loss: 2.588309, ppl: 13.307251 +epoch: 2, batch: 3634, sum loss: 4817.642578, avg loss: 2.835576, ppl: 17.040205 +epoch: 2, batch: 3635, sum loss: 3850.531738, avg loss: 2.332242, ppl: 10.301013 +epoch: 2, batch: 3636, sum loss: 3814.965088, avg loss: 2.567271, ppl: 13.030219 +epoch: 2, batch: 3637, sum loss: 4485.854980, avg loss: 2.544444, ppl: 12.736146 +epoch: 2, batch: 3638, sum loss: 4483.270996, avg loss: 2.563334, ppl: 12.979017 +epoch: 2, batch: 3639, sum loss: 4299.666016, avg loss: 2.705894, ppl: 14.967695 +epoch: 2, batch: 3640, sum loss: 4453.628906, avg loss: 2.691015, ppl: 14.746629 +epoch: 2, batch: 3641, sum loss: 3355.249512, avg loss: 2.377923, ppl: 10.782484 +epoch: 2, batch: 3642, sum loss: 4382.914062, avg loss: 2.656312, ppl: 14.243654 +epoch: 2, batch: 3643, sum loss: 4051.611328, avg loss: 2.627504, ppl: 13.839186 +epoch: 2, batch: 3644, sum loss: 3898.786865, avg loss: 2.530037, ppl: 12.553969 +epoch: 2, batch: 3645, sum loss: 4510.471191, avg loss: 2.395364, ppl: 10.972195 +epoch: 2, batch: 3646, sum loss: 4908.872559, avg loss: 2.627876, ppl: 13.844334 +epoch: 2, batch: 3647, sum loss: 4814.965332, avg loss: 2.598470, ppl: 13.443157 +epoch: 2, batch: 3648, sum loss: 3907.897461, avg loss: 2.644044, ppl: 14.069993 +epoch: 2, batch: 3649, sum loss: 3696.477783, avg loss: 2.415999, ppl: 11.200952 +epoch: 2, batch: 3650, sum loss: 4284.474609, avg loss: 2.628512, ppl: 13.853140 +epoch: 2, batch: 3651, sum loss: 4883.815918, avg loss: 2.578572, ppl: 13.178309 +epoch: 2, batch: 3652, sum loss: 4729.866211, avg loss: 2.963575, ppl: 19.367092 +epoch: 2, batch: 3653, sum loss: 3971.703125, avg loss: 2.361298, ppl: 10.604706 +epoch: 2, batch: 3654, sum loss: 4364.156250, avg loss: 2.763874, ppl: 15.861163 +epoch: 2, batch: 3655, sum loss: 4516.360840, avg loss: 2.415166, ppl: 11.191632 +epoch: 2, batch: 3656, sum loss: 3882.825195, avg loss: 2.273317, ppl: 9.711559 +epoch: 2, batch: 3657, sum loss: 4235.960938, avg loss: 2.701506, ppl: 14.902152 +epoch: 2, batch: 3658, sum loss: 4295.683105, avg loss: 2.722233, ppl: 15.214252 +epoch: 2, batch: 3659, sum loss: 3790.666504, avg loss: 2.645266, ppl: 14.087196 +epoch: 2, batch: 3660, sum loss: 5161.335938, avg loss: 2.832786, ppl: 16.992735 +epoch: 2, batch: 3661, sum loss: 4388.113770, avg loss: 2.626041, ppl: 13.818948 +epoch: 2, batch: 3662, sum loss: 4410.046387, avg loss: 2.698927, ppl: 14.863773 +epoch: 2, batch: 3663, sum loss: 4294.022949, avg loss: 2.441173, ppl: 11.486505 +epoch: 2, batch: 3664, sum loss: 4783.920898, avg loss: 2.781349, ppl: 16.140787 +epoch: 2, batch: 3665, sum loss: 3981.400879, avg loss: 2.523068, ppl: 12.466783 +epoch: 2, batch: 3666, sum loss: 4516.125977, avg loss: 2.615012, ppl: 13.667382 +epoch: 2, batch: 3667, sum loss: 3610.601562, avg loss: 2.423223, ppl: 11.282158 +epoch: 2, batch: 3668, sum loss: 3624.691406, avg loss: 2.472504, ppl: 11.852092 +epoch: 2, batch: 3669, sum loss: 4801.583984, avg loss: 2.592648, ppl: 13.365113 +epoch: 2, batch: 3670, sum loss: 4125.377930, avg loss: 2.622618, ppl: 13.771727 +epoch: 2, batch: 3671, sum loss: 4818.382324, avg loss: 2.593317, ppl: 13.374054 +epoch: 2, batch: 3672, sum loss: 4118.993164, avg loss: 2.541020, ppl: 12.692607 +epoch: 2, batch: 3673, sum loss: 4239.079102, avg loss: 2.713879, ppl: 15.087689 +epoch: 2, batch: 3674, sum loss: 4325.871582, avg loss: 2.838498, ppl: 17.090082 +epoch: 2, batch: 3675, sum loss: 3517.729004, avg loss: 2.325003, ppl: 10.226707 +epoch: 2, batch: 3676, sum loss: 3921.879395, avg loss: 2.525357, ppl: 12.495356 +epoch: 2, batch: 3677, sum loss: 4518.873047, avg loss: 2.530164, ppl: 12.555568 +epoch: 2, batch: 3678, sum loss: 5071.547852, avg loss: 2.786565, ppl: 16.225187 +epoch: 2, batch: 3679, sum loss: 3743.822754, avg loss: 2.363524, ppl: 10.628345 +epoch: 2, batch: 3680, sum loss: 3856.598145, avg loss: 2.605810, ppl: 13.542186 +epoch: 2, batch: 3681, sum loss: 4468.954102, avg loss: 2.473134, ppl: 11.859560 +epoch: 2, batch: 3682, sum loss: 5073.355957, avg loss: 2.980820, ppl: 19.703970 +epoch: 2, batch: 3683, sum loss: 4455.830566, avg loss: 2.496264, ppl: 12.137062 +epoch: 2, batch: 3684, sum loss: 3520.095215, avg loss: 2.290238, ppl: 9.877285 +epoch: 2, batch: 3685, sum loss: 3832.987549, avg loss: 2.505221, ppl: 12.246261 +epoch: 2, batch: 3686, sum loss: 3644.329834, avg loss: 2.405498, ppl: 11.083952 +epoch: 2, batch: 3687, sum loss: 4379.685547, avg loss: 2.625711, ppl: 13.814389 +epoch: 2, batch: 3688, sum loss: 4561.305664, avg loss: 2.697402, ppl: 14.841117 +epoch: 2, batch: 3689, sum loss: 5105.710449, avg loss: 2.674547, ppl: 14.505780 +epoch: 2, batch: 3690, sum loss: 3912.215332, avg loss: 2.519134, ppl: 12.417842 +epoch: 2, batch: 3691, sum loss: 4431.278320, avg loss: 2.622058, ppl: 13.764023 +epoch: 2, batch: 3692, sum loss: 3556.031006, avg loss: 2.410868, ppl: 11.143634 +epoch: 2, batch: 3693, sum loss: 4252.531738, avg loss: 2.508868, ppl: 12.291012 +epoch: 2, batch: 3694, sum loss: 5016.058105, avg loss: 2.775904, ppl: 16.053127 +epoch: 2, batch: 3695, sum loss: 4663.569336, avg loss: 2.440382, ppl: 11.477419 +epoch: 2, batch: 3696, sum loss: 3861.337158, avg loss: 2.368918, ppl: 10.685828 +epoch: 2, batch: 3697, sum loss: 3926.863770, avg loss: 2.439046, ppl: 11.462097 +epoch: 2, batch: 3698, sum loss: 4179.190918, avg loss: 2.651771, ppl: 14.179125 +epoch: 2, batch: 3699, sum loss: 5042.223145, avg loss: 2.790383, ppl: 16.287262 +epoch: 2, batch: 3700, sum loss: 4304.988770, avg loss: 2.565547, ppl: 13.007778 +epoch: 2, batch: 3701, sum loss: 4872.650391, avg loss: 2.674342, ppl: 14.502799 +epoch: 2, batch: 3702, sum loss: 3910.202637, avg loss: 2.442350, ppl: 11.500038 +epoch: 2, batch: 3703, sum loss: 4805.878418, avg loss: 2.574118, ppl: 13.119740 +epoch: 2, batch: 3704, sum loss: 4413.406250, avg loss: 2.609939, ppl: 13.598220 +epoch: 2, batch: 3705, sum loss: 3891.672607, avg loss: 2.551916, ppl: 12.831671 +epoch: 2, batch: 3706, sum loss: 3987.244141, avg loss: 2.482717, ppl: 11.973756 +epoch: 2, batch: 3707, sum loss: 4200.557617, avg loss: 2.650194, ppl: 14.156784 +epoch: 2, batch: 3708, sum loss: 4327.412598, avg loss: 2.575841, ppl: 13.142365 +epoch: 2, batch: 3709, sum loss: 4330.421875, avg loss: 2.547307, ppl: 12.772664 +epoch: 2, batch: 3710, sum loss: 5170.300781, avg loss: 2.817603, ppl: 16.736679 +epoch: 2, batch: 3711, sum loss: 3699.329590, avg loss: 2.438582, ppl: 11.456788 +epoch: 2, batch: 3712, sum loss: 4062.078613, avg loss: 2.757691, ppl: 15.763402 +epoch: 2, batch: 3713, sum loss: 3672.464111, avg loss: 2.468054, ppl: 11.799460 +epoch: 2, batch: 3714, sum loss: 3734.575684, avg loss: 2.315298, ppl: 10.127941 +epoch: 2, batch: 3715, sum loss: 4053.030029, avg loss: 2.300244, ppl: 9.976618 +epoch: 2, batch: 3716, sum loss: 4344.614258, avg loss: 2.665407, ppl: 14.373804 +epoch: 2, batch: 3717, sum loss: 5319.192383, avg loss: 2.753205, ppl: 15.692852 +epoch: 2, batch: 3718, sum loss: 5024.277832, avg loss: 2.591170, ppl: 13.345371 +epoch: 2, batch: 3719, sum loss: 3649.856445, avg loss: 2.302748, ppl: 10.001634 +epoch: 2, batch: 3720, sum loss: 3774.935059, avg loss: 2.395263, ppl: 10.971086 +epoch: 2, batch: 3721, sum loss: 3894.851074, avg loss: 2.385089, ppl: 10.860033 +epoch: 2, batch: 3722, sum loss: 3501.005615, avg loss: 2.271905, ppl: 9.697857 +epoch: 2, batch: 3723, sum loss: 4934.020996, avg loss: 2.613358, ppl: 13.644800 +epoch: 2, batch: 3724, sum loss: 4749.080078, avg loss: 2.764307, ppl: 15.868048 +epoch: 2, batch: 3725, sum loss: 3629.435791, avg loss: 2.373732, ppl: 10.737388 +epoch: 2, batch: 3726, sum loss: 4005.826660, avg loss: 2.817037, ppl: 16.727213 +epoch: 2, batch: 3727, sum loss: 4569.222168, avg loss: 2.827489, ppl: 16.902962 +epoch: 2, batch: 3728, sum loss: 4364.019043, avg loss: 2.698837, ppl: 14.862433 +epoch: 2, batch: 3729, sum loss: 3562.818115, avg loss: 2.348595, ppl: 10.470844 +epoch: 2, batch: 3730, sum loss: 4275.493652, avg loss: 2.497368, ppl: 12.150467 +epoch: 2, batch: 3731, sum loss: 3899.530273, avg loss: 2.592773, ppl: 13.366782 +epoch: 2, batch: 3732, sum loss: 5771.887207, avg loss: 2.711079, ppl: 15.045503 +epoch: 2, batch: 3733, sum loss: 3595.524414, avg loss: 2.267039, ppl: 9.650785 +epoch: 2, batch: 3734, sum loss: 4967.394043, avg loss: 2.548689, ppl: 12.790320 +epoch: 2, batch: 3735, sum loss: 3948.454834, avg loss: 2.287633, ppl: 9.851593 +epoch: 2, batch: 3736, sum loss: 4710.029297, avg loss: 2.525485, ppl: 12.496952 +epoch: 2, batch: 3737, sum loss: 4131.342773, avg loss: 2.609818, ppl: 13.596582 +epoch: 2, batch: 3738, sum loss: 3805.934326, avg loss: 2.418001, ppl: 11.223406 +epoch: 2, batch: 3739, sum loss: 3642.880371, avg loss: 2.330698, ppl: 10.285113 +epoch: 2, batch: 3740, sum loss: 4068.059814, avg loss: 2.730242, ppl: 15.336591 +epoch: 2, batch: 3741, sum loss: 3793.616699, avg loss: 2.186523, ppl: 8.904197 +epoch: 2, batch: 3742, sum loss: 3662.890381, avg loss: 2.219934, ppl: 9.206718 +epoch: 2, batch: 3743, sum loss: 4690.640137, avg loss: 2.647088, ppl: 14.112886 +epoch: 2, batch: 3744, sum loss: 3968.447266, avg loss: 2.449659, ppl: 11.584394 +epoch: 2, batch: 3745, sum loss: 4657.491699, avg loss: 2.590374, ppl: 13.334751 +epoch: 2, batch: 3746, sum loss: 3081.912598, avg loss: 2.296507, ppl: 9.939404 +epoch: 2, batch: 3747, sum loss: 4331.618164, avg loss: 2.762511, ppl: 15.839574 +epoch: 2, batch: 3748, sum loss: 3255.627930, avg loss: 2.132042, ppl: 8.432067 +epoch: 2, batch: 3749, sum loss: 4875.179688, avg loss: 2.612636, ppl: 13.634950 +epoch: 2, batch: 3750, sum loss: 4009.067139, avg loss: 2.428266, ppl: 11.339204 +epoch: 2, batch: 3751, sum loss: 4186.721680, avg loss: 2.559121, ppl: 12.924450 +epoch: 2, batch: 3752, sum loss: 5503.094727, avg loss: 2.826448, ppl: 16.885386 +epoch: 2, batch: 3753, sum loss: 4214.746582, avg loss: 2.483646, ppl: 11.984878 +epoch: 2, batch: 3754, sum loss: 3797.950195, avg loss: 2.423708, ppl: 11.287636 +epoch: 2, batch: 3755, sum loss: 4794.788086, avg loss: 2.597393, ppl: 13.428687 +epoch: 2, batch: 3756, sum loss: 4501.843750, avg loss: 2.703810, ppl: 14.936535 +epoch: 2, batch: 3757, sum loss: 3108.135742, avg loss: 2.036786, ppl: 7.665934 +epoch: 2, batch: 3758, sum loss: 4170.054688, avg loss: 2.656086, ppl: 14.240438 +epoch: 2, batch: 3759, sum loss: 4812.271484, avg loss: 2.648471, ppl: 14.132412 +epoch: 2, batch: 3760, sum loss: 3902.107910, avg loss: 2.428194, ppl: 11.338389 +epoch: 2, batch: 3761, sum loss: 3438.740723, avg loss: 2.599199, ppl: 13.452962 +epoch: 2, batch: 3762, sum loss: 4909.502930, avg loss: 2.643782, ppl: 14.066298 +epoch: 2, batch: 3763, sum loss: 4161.766602, avg loss: 2.640715, ppl: 14.023225 +epoch: 2, batch: 3764, sum loss: 4459.262207, avg loss: 2.660658, ppl: 14.305694 +epoch: 2, batch: 3765, sum loss: 5245.757324, avg loss: 2.672317, ppl: 14.473459 +epoch: 2, batch: 3766, sum loss: 3184.352051, avg loss: 2.074497, ppl: 7.960537 +epoch: 2, batch: 3767, sum loss: 5342.862305, avg loss: 2.784191, ppl: 16.186716 +epoch: 2, batch: 3768, sum loss: 4503.342773, avg loss: 2.727646, ppl: 15.296830 +epoch: 2, batch: 3769, sum loss: 3591.971191, avg loss: 2.621877, ppl: 13.761529 +epoch: 2, batch: 3770, sum loss: 4850.340820, avg loss: 2.588229, ppl: 13.306182 +epoch: 2, batch: 3771, sum loss: 3902.047852, avg loss: 2.399783, ppl: 11.020789 +epoch: 2, batch: 3772, sum loss: 4683.486328, avg loss: 2.694756, ppl: 14.801910 +epoch: 2, batch: 3773, sum loss: 4443.296875, avg loss: 2.590843, ppl: 13.341020 +epoch: 2, batch: 3774, sum loss: 4134.612793, avg loss: 2.420733, ppl: 11.254110 +epoch: 2, batch: 3775, sum loss: 3726.344238, avg loss: 2.283299, ppl: 9.808989 +epoch: 2, batch: 3776, sum loss: 2903.017334, avg loss: 1.995201, ppl: 7.353680 +epoch: 2, batch: 3777, sum loss: 3679.391602, avg loss: 2.274037, ppl: 9.718554 +epoch: 2, batch: 3778, sum loss: 3721.117188, avg loss: 2.602180, ppl: 13.493121 +epoch: 2, batch: 3779, sum loss: 4216.810547, avg loss: 2.477562, ppl: 11.912187 +epoch: 2, batch: 3780, sum loss: 4583.697754, avg loss: 2.822474, ppl: 16.818409 +epoch: 2, batch: 3781, sum loss: 4680.853516, avg loss: 2.603367, ppl: 13.509142 +epoch: 2, batch: 3782, sum loss: 4420.687988, avg loss: 2.585198, ppl: 13.265911 +epoch: 2, batch: 3783, sum loss: 4399.418945, avg loss: 2.862342, ppl: 17.502464 +epoch: 2, batch: 3784, sum loss: 4044.982422, avg loss: 2.539223, ppl: 12.669826 +epoch: 2, batch: 3785, sum loss: 4933.910156, avg loss: 2.691713, ppl: 14.756934 +epoch: 2, batch: 3786, sum loss: 3671.311279, avg loss: 2.524973, ppl: 12.490563 +epoch: 2, batch: 3787, sum loss: 5179.155273, avg loss: 2.705933, ppl: 14.968270 +epoch: 2, batch: 3788, sum loss: 4580.952148, avg loss: 2.810400, ppl: 16.616564 +epoch: 2, batch: 3789, sum loss: 4218.050293, avg loss: 2.608566, ppl: 13.579558 +epoch: 2, batch: 3790, sum loss: 4084.482422, avg loss: 2.398404, ppl: 11.005599 +epoch: 2, batch: 3791, sum loss: 3564.156250, avg loss: 2.326473, ppl: 10.241753 +epoch: 2, batch: 3792, sum loss: 3256.384277, avg loss: 2.357990, ppl: 10.569685 +epoch: 2, batch: 3793, sum loss: 3671.480713, avg loss: 2.487453, ppl: 12.030598 +epoch: 2, batch: 3794, sum loss: 3484.924072, avg loss: 2.232495, ppl: 9.323094 +epoch: 2, batch: 3795, sum loss: 4888.593750, avg loss: 2.899522, ppl: 18.165457 +epoch: 2, batch: 3796, sum loss: 3650.813232, avg loss: 2.432254, ppl: 11.384515 +epoch: 2, batch: 3797, sum loss: 3790.870117, avg loss: 2.460006, ppl: 11.704887 +epoch: 2, batch: 3798, sum loss: 4332.962402, avg loss: 2.519164, ppl: 12.418212 +epoch: 2, batch: 3799, sum loss: 4725.313477, avg loss: 2.781232, ppl: 16.138897 +epoch: 2, batch: 3800, sum loss: 4643.823242, avg loss: 2.653613, ppl: 14.205275 +epoch: 2, batch: 3801, sum loss: 4057.314941, avg loss: 2.416507, ppl: 11.206643 +epoch: 2, batch: 3802, sum loss: 4065.947266, avg loss: 2.514501, ppl: 12.360435 +epoch: 2, batch: 3803, sum loss: 4351.284668, avg loss: 2.451428, ppl: 11.604906 +epoch: 2, batch: 3804, sum loss: 5518.786133, avg loss: 2.951223, ppl: 19.129328 +epoch: 2, batch: 3805, sum loss: 5113.947754, avg loss: 2.590652, ppl: 13.338469 +epoch: 2, batch: 3806, sum loss: 4210.015137, avg loss: 2.531579, ppl: 12.573338 +epoch: 2, batch: 3807, sum loss: 5328.106934, avg loss: 2.979926, ppl: 19.686352 +epoch: 2, batch: 3808, sum loss: 3784.843262, avg loss: 2.481864, ppl: 11.963549 +epoch: 2, batch: 3809, sum loss: 4910.648926, avg loss: 2.828715, ppl: 16.923697 +epoch: 2, batch: 3810, sum loss: 4603.583008, avg loss: 2.832974, ppl: 16.995935 +epoch: 2, batch: 3811, sum loss: 3823.506348, avg loss: 2.560955, ppl: 12.948180 +epoch: 2, batch: 3812, sum loss: 4380.103027, avg loss: 2.521648, ppl: 12.449100 +epoch: 2, batch: 3813, sum loss: 3576.786621, avg loss: 2.487334, ppl: 12.029166 +epoch: 2, batch: 3814, sum loss: 4348.487305, avg loss: 2.638645, ppl: 13.994231 +epoch: 2, batch: 3815, sum loss: 3931.429688, avg loss: 2.446440, ppl: 11.547171 +epoch: 2, batch: 3816, sum loss: 4596.313477, avg loss: 2.604144, ppl: 13.519643 +epoch: 2, batch: 3817, sum loss: 4502.429199, avg loss: 2.599555, ppl: 13.457748 +epoch: 2, batch: 3818, sum loss: 4627.189453, avg loss: 2.831817, ppl: 16.976286 +epoch: 2, batch: 3819, sum loss: 4636.712891, avg loss: 2.624059, ppl: 13.791596 +epoch: 2, batch: 3820, sum loss: 4201.037598, avg loss: 2.468295, ppl: 11.802302 +epoch: 2, batch: 3821, sum loss: 4684.265625, avg loss: 2.634570, ppl: 13.937320 +epoch: 2, batch: 3822, sum loss: 4004.762207, avg loss: 2.687760, ppl: 14.698712 +epoch: 2, batch: 3823, sum loss: 4019.470459, avg loss: 2.461403, ppl: 11.721241 +epoch: 2, batch: 3824, sum loss: 4614.266113, avg loss: 2.612835, ppl: 13.637654 +epoch: 2, batch: 3825, sum loss: 4320.837891, avg loss: 2.506287, ppl: 12.259322 +epoch: 2, batch: 3826, sum loss: 3687.999756, avg loss: 2.344564, ppl: 10.428727 +epoch: 2, batch: 3827, sum loss: 4348.796875, avg loss: 2.344365, ppl: 10.426648 +epoch: 2, batch: 3828, sum loss: 4306.022461, avg loss: 2.507876, ppl: 12.278818 +epoch: 2, batch: 3829, sum loss: 4112.546875, avg loss: 2.452324, ppl: 11.615306 +epoch: 2, batch: 3830, sum loss: 4381.195312, avg loss: 2.568110, ppl: 13.041150 +epoch: 2, batch: 3831, sum loss: 3946.935059, avg loss: 2.261854, ppl: 9.600872 +epoch: 2, batch: 3832, sum loss: 4730.133789, avg loss: 2.738931, ppl: 15.470437 +epoch: 2, batch: 3833, sum loss: 4621.148926, avg loss: 2.521085, ppl: 12.442089 +epoch: 2, batch: 3834, sum loss: 3702.336670, avg loss: 2.347709, ppl: 10.461572 +epoch: 2, batch: 3835, sum loss: 3713.997803, avg loss: 2.475998, ppl: 11.893576 +epoch: 2, batch: 3836, sum loss: 4187.765625, avg loss: 2.457609, ppl: 11.676858 +epoch: 2, batch: 3837, sum loss: 4391.705078, avg loss: 2.413025, ppl: 11.167691 +epoch: 2, batch: 3838, sum loss: 4943.153320, avg loss: 2.713037, ppl: 15.074989 +epoch: 2, batch: 3839, sum loss: 4685.464844, avg loss: 2.556173, ppl: 12.886405 +epoch: 2, batch: 3840, sum loss: 4032.007568, avg loss: 2.561631, ppl: 12.956936 +epoch: 2, batch: 3841, sum loss: 4405.648926, avg loss: 2.465388, ppl: 11.768051 +epoch: 2, batch: 3842, sum loss: 4012.618652, avg loss: 2.563973, ppl: 12.987319 +epoch: 2, batch: 3843, sum loss: 3837.712891, avg loss: 2.503401, ppl: 12.223991 +epoch: 2, batch: 3844, sum loss: 5481.209961, avg loss: 2.789420, ppl: 16.271578 +epoch: 2, batch: 3845, sum loss: 4241.257812, avg loss: 2.551900, ppl: 12.831462 +epoch: 2, batch: 3846, sum loss: 3184.897461, avg loss: 2.222538, ppl: 9.230733 +epoch: 2, batch: 3847, sum loss: 4579.062500, avg loss: 2.406234, ppl: 11.092105 +epoch: 2, batch: 3848, sum loss: 5290.233887, avg loss: 2.603462, ppl: 13.510424 +epoch: 2, batch: 3849, sum loss: 4744.732910, avg loss: 2.439451, ppl: 11.466749 +epoch: 2, batch: 3850, sum loss: 4378.932129, avg loss: 2.315670, ppl: 10.131709 +epoch: 2, batch: 3851, sum loss: 3878.181396, avg loss: 2.417819, ppl: 11.221359 +epoch: 2, batch: 3852, sum loss: 4687.318848, avg loss: 2.480063, ppl: 11.942017 +epoch: 2, batch: 3853, sum loss: 4732.243652, avg loss: 2.610173, ppl: 13.601407 +epoch: 2, batch: 3854, sum loss: 4706.221680, avg loss: 2.486118, ppl: 12.014546 +epoch: 2, batch: 3855, sum loss: 4902.015137, avg loss: 2.588181, ppl: 13.305551 +epoch: 2, batch: 3856, sum loss: 5105.215332, avg loss: 2.803523, ppl: 16.502684 +epoch: 2, batch: 3857, sum loss: 4842.278809, avg loss: 2.673815, ppl: 14.495163 +epoch: 2, batch: 3858, sum loss: 3591.789062, avg loss: 2.321777, ppl: 10.193773 +epoch: 2, batch: 3859, sum loss: 4069.991455, avg loss: 2.344465, ppl: 10.427692 +epoch: 2, batch: 3860, sum loss: 4620.653320, avg loss: 2.703718, ppl: 14.935153 +epoch: 2, batch: 3861, sum loss: 4231.577148, avg loss: 2.468832, ppl: 11.808640 +epoch: 2, batch: 3862, sum loss: 3857.776367, avg loss: 2.544708, ppl: 12.739502 +epoch: 2, batch: 3863, sum loss: 5029.470703, avg loss: 2.854410, ppl: 17.364197 +epoch: 2, batch: 3864, sum loss: 5151.228516, avg loss: 2.742933, ppl: 15.532475 +epoch: 2, batch: 3865, sum loss: 5683.779297, avg loss: 2.799891, ppl: 16.442862 +epoch: 2, batch: 3866, sum loss: 4326.478516, avg loss: 2.724483, ppl: 15.248525 +epoch: 2, batch: 3867, sum loss: 4226.652344, avg loss: 2.381212, ppl: 10.818011 +epoch: 2, batch: 3868, sum loss: 4289.342285, avg loss: 2.483696, ppl: 11.985478 +epoch: 2, batch: 3869, sum loss: 4989.183594, avg loss: 2.917651, ppl: 18.497787 +epoch: 2, batch: 3870, sum loss: 3825.829834, avg loss: 2.424480, ppl: 11.296356 +epoch: 2, batch: 3871, sum loss: 3579.357910, avg loss: 2.463426, ppl: 11.744983 +epoch: 2, batch: 3872, sum loss: 4239.094238, avg loss: 2.616725, ppl: 13.690812 +epoch: 2, batch: 3873, sum loss: 4558.387207, avg loss: 2.702067, ppl: 14.910522 +epoch: 2, batch: 3874, sum loss: 3619.648438, avg loss: 2.249626, ppl: 9.484190 +epoch: 2, batch: 3875, sum loss: 4282.684570, avg loss: 2.471255, ppl: 11.837289 +epoch: 2, batch: 3876, sum loss: 4124.330078, avg loss: 2.568076, ppl: 13.040709 +epoch: 2, batch: 3877, sum loss: 4365.584961, avg loss: 2.645809, ppl: 14.094846 +epoch: 2, batch: 3878, sum loss: 4434.124023, avg loss: 2.608308, ppl: 13.576065 +epoch: 2, batch: 3879, sum loss: 4149.210938, avg loss: 2.525387, ppl: 12.495730 +epoch: 2, batch: 3880, sum loss: 4300.764160, avg loss: 2.651519, ppl: 14.175556 +epoch: 2, batch: 3881, sum loss: 4322.480469, avg loss: 2.433829, ppl: 11.402459 +epoch: 2, batch: 3882, sum loss: 4682.508301, avg loss: 2.706652, ppl: 14.979044 +epoch: 2, batch: 3883, sum loss: 4596.123535, avg loss: 2.496536, ppl: 12.140373 +epoch: 2, batch: 3884, sum loss: 4135.478516, avg loss: 2.324609, ppl: 10.222678 +epoch: 2, batch: 3885, sum loss: 3872.943604, avg loss: 2.518169, ppl: 12.405857 +epoch: 2, batch: 3886, sum loss: 3958.734863, avg loss: 2.405064, ppl: 11.079138 +epoch: 2, batch: 3887, sum loss: 4187.152832, avg loss: 2.396767, ppl: 10.987600 +epoch: 2, batch: 3888, sum loss: 4389.502441, avg loss: 2.597339, ppl: 13.427955 +epoch: 2, batch: 3889, sum loss: 4092.854004, avg loss: 2.341450, ppl: 10.396297 +epoch: 2, batch: 3890, sum loss: 4400.665039, avg loss: 2.419277, ppl: 11.237734 +epoch: 2, batch: 3891, sum loss: 4184.655273, avg loss: 2.502784, ppl: 12.216463 +epoch: 2, batch: 3892, sum loss: 4238.150391, avg loss: 2.560816, ppl: 12.946378 +epoch: 2, batch: 3893, sum loss: 4517.112793, avg loss: 2.644680, ppl: 14.078933 +epoch: 2, batch: 3894, sum loss: 4523.060059, avg loss: 2.568461, ppl: 13.045734 +epoch: 2, batch: 3895, sum loss: 4095.224365, avg loss: 2.495566, ppl: 12.128601 +epoch: 2, batch: 3896, sum loss: 4335.977539, avg loss: 2.607323, ppl: 13.562691 +epoch: 2, batch: 3897, sum loss: 4068.803467, avg loss: 2.488565, ppl: 12.043978 +epoch: 2, batch: 3898, sum loss: 4426.043457, avg loss: 2.623618, ppl: 13.785511 +epoch: 2, batch: 3899, sum loss: 4524.402832, avg loss: 2.557605, ppl: 12.904870 +epoch: 2, batch: 3900, sum loss: 4185.861328, avg loss: 2.596688, ppl: 13.419220 +epoch: 2, batch: 3901, sum loss: 3631.884033, avg loss: 2.575804, ppl: 13.141885 +epoch: 2, batch: 3902, sum loss: 4221.050293, avg loss: 2.518527, ppl: 12.410297 +epoch: 2, batch: 3903, sum loss: 4637.695312, avg loss: 2.798851, ppl: 16.425755 +epoch: 2, batch: 3904, sum loss: 3959.945801, avg loss: 2.429415, ppl: 11.352234 +epoch: 2, batch: 3905, sum loss: 5317.832520, avg loss: 2.876059, ppl: 17.744198 +epoch: 2, batch: 3906, sum loss: 4284.996094, avg loss: 2.609620, ppl: 13.593885 +epoch: 2, batch: 3907, sum loss: 4084.824219, avg loss: 2.203249, ppl: 9.054385 +epoch: 2, batch: 3908, sum loss: 4242.373535, avg loss: 2.429767, ppl: 11.356240 +epoch: 2, batch: 3909, sum loss: 4321.745117, avg loss: 2.499563, ppl: 12.177176 +epoch: 2, batch: 3910, sum loss: 3294.539551, avg loss: 2.365068, ppl: 10.644762 +epoch: 2, batch: 3911, sum loss: 3727.285645, avg loss: 2.523552, ppl: 12.472817 +epoch: 2, batch: 3912, sum loss: 4275.486816, avg loss: 2.586501, ppl: 13.283217 +epoch: 2, batch: 3913, sum loss: 4251.236816, avg loss: 2.412734, ppl: 11.164441 +epoch: 2, batch: 3914, sum loss: 4297.294434, avg loss: 2.507173, ppl: 12.270194 +epoch: 2, batch: 3915, sum loss: 3779.977539, avg loss: 2.467348, ppl: 11.791140 +epoch: 2, batch: 3916, sum loss: 4788.830566, avg loss: 2.714756, ppl: 15.100932 +epoch: 2, batch: 3917, sum loss: 4551.836426, avg loss: 2.873634, ppl: 17.701229 +epoch: 2, batch: 3918, sum loss: 4143.376953, avg loss: 2.318622, ppl: 10.161658 +epoch: 2, batch: 3919, sum loss: 4774.260742, avg loss: 2.645020, ppl: 14.083723 +epoch: 2, batch: 3920, sum loss: 4801.167969, avg loss: 2.551099, ppl: 12.821184 +epoch: 2, batch: 3921, sum loss: 3693.210938, avg loss: 2.367443, ppl: 10.670074 +epoch: 2, batch: 3922, sum loss: 4452.952637, avg loss: 2.650567, ppl: 14.162067 +epoch: 2, batch: 3923, sum loss: 4639.982910, avg loss: 2.657493, ppl: 14.260495 +epoch: 2, batch: 3924, sum loss: 3843.283203, avg loss: 2.234467, ppl: 9.341502 +epoch: 2, batch: 3925, sum loss: 3396.117188, avg loss: 2.452070, ppl: 11.612362 +epoch: 2, batch: 3926, sum loss: 3906.661621, avg loss: 2.357671, ppl: 10.566319 +epoch: 2, batch: 3927, sum loss: 3557.938232, avg loss: 2.448684, ppl: 11.573109 +epoch: 2, batch: 3928, sum loss: 4151.178223, avg loss: 2.557719, ppl: 12.906347 +epoch: 2, batch: 3929, sum loss: 4410.353027, avg loss: 2.722440, ppl: 15.217411 +epoch: 2, batch: 3930, sum loss: 3463.868164, avg loss: 2.256592, ppl: 9.550482 +epoch: 2, batch: 3931, sum loss: 4054.269531, avg loss: 2.513496, ppl: 12.348028 +epoch: 2, batch: 3932, sum loss: 5691.885254, avg loss: 2.834604, ppl: 17.023663 +epoch: 2, batch: 3933, sum loss: 4469.483887, avg loss: 2.576072, ppl: 13.145395 +epoch: 2, batch: 3934, sum loss: 4631.850098, avg loss: 2.622792, ppl: 13.774121 +epoch: 2, batch: 3935, sum loss: 3816.392822, avg loss: 2.329910, ppl: 10.277017 +epoch: 2, batch: 3936, sum loss: 3828.846191, avg loss: 2.291350, ppl: 9.888282 +epoch: 2, batch: 3937, sum loss: 4380.889160, avg loss: 2.435180, ppl: 11.417876 +epoch: 2, batch: 3938, sum loss: 5572.337402, avg loss: 2.879761, ppl: 17.810011 +epoch: 2, batch: 3939, sum loss: 4453.342285, avg loss: 2.429538, ppl: 11.353630 +epoch: 2, batch: 3940, sum loss: 4090.530762, avg loss: 2.506453, ppl: 12.261359 +epoch: 2, batch: 3941, sum loss: 4896.865723, avg loss: 2.842058, ppl: 17.151026 +epoch: 2, batch: 3942, sum loss: 3503.328857, avg loss: 2.396258, ppl: 10.982003 +epoch: 2, batch: 3943, sum loss: 5058.153809, avg loss: 2.744522, ppl: 15.557174 +epoch: 2, batch: 3944, sum loss: 4496.544922, avg loss: 2.528991, ppl: 12.540840 +epoch: 2, batch: 3945, sum loss: 3663.229492, avg loss: 2.388024, ppl: 10.891953 +epoch: 2, batch: 3946, sum loss: 3592.542480, avg loss: 2.612758, ppl: 13.636611 +epoch: 2, batch: 3947, sum loss: 3856.751221, avg loss: 2.486622, ppl: 12.020606 +epoch: 2, batch: 3948, sum loss: 4191.722656, avg loss: 2.690451, ppl: 14.738323 +epoch: 2, batch: 3949, sum loss: 4228.098633, avg loss: 2.506283, ppl: 12.259273 +epoch: 2, batch: 3950, sum loss: 4513.655762, avg loss: 2.669223, ppl: 14.428751 +epoch: 2, batch: 3951, sum loss: 3811.620850, avg loss: 2.491256, ppl: 12.076428 +epoch: 2, batch: 3952, sum loss: 3895.086426, avg loss: 2.539170, ppl: 12.669148 +epoch: 2, batch: 3953, sum loss: 4602.613281, avg loss: 2.746189, ppl: 15.583137 +epoch: 2, batch: 3954, sum loss: 4467.550781, avg loss: 2.514097, ppl: 12.355450 +epoch: 2, batch: 3955, sum loss: 4553.086914, avg loss: 2.569462, ppl: 13.058801 +epoch: 2, batch: 3956, sum loss: 3888.939453, avg loss: 2.384390, ppl: 10.852437 +epoch: 2, batch: 3957, sum loss: 4484.657715, avg loss: 2.619543, ppl: 13.729449 +epoch: 2, batch: 3958, sum loss: 3809.471680, avg loss: 2.305976, ppl: 10.033963 +epoch: 2, batch: 3959, sum loss: 5176.584961, avg loss: 2.626375, ppl: 13.823568 +epoch: 2, batch: 3960, sum loss: 4589.900391, avg loss: 2.717525, ppl: 15.142805 +epoch: 2, batch: 3961, sum loss: 4549.742676, avg loss: 2.765801, ppl: 15.891764 +epoch: 2, batch: 3962, sum loss: 4636.185547, avg loss: 2.550157, ppl: 12.809115 +epoch: 2, batch: 3963, sum loss: 4457.180664, avg loss: 2.367064, ppl: 10.666026 +epoch: 2, batch: 3964, sum loss: 5291.474121, avg loss: 2.880498, ppl: 17.823141 +epoch: 2, batch: 3965, sum loss: 4990.184082, avg loss: 2.809788, ppl: 16.606405 +epoch: 2, batch: 3966, sum loss: 4010.053467, avg loss: 2.464692, ppl: 11.759855 +epoch: 2, batch: 3967, sum loss: 3570.481689, avg loss: 2.445535, ppl: 11.536725 +epoch: 2, batch: 3968, sum loss: 4145.062012, avg loss: 2.590664, ppl: 13.338621 +epoch: 2, batch: 3969, sum loss: 3478.772461, avg loss: 2.293192, ppl: 9.906510 +epoch: 2, batch: 3970, sum loss: 3964.190918, avg loss: 2.355431, ppl: 10.542675 +epoch: 2, batch: 3971, sum loss: 4186.880859, avg loss: 2.517667, ppl: 12.399638 +epoch: 2, batch: 3972, sum loss: 3798.808838, avg loss: 2.346392, ppl: 10.447807 +epoch: 2, batch: 3973, sum loss: 4436.551758, avg loss: 2.471617, ppl: 11.841577 +epoch: 2, batch: 3974, sum loss: 4340.377930, avg loss: 2.583558, ppl: 13.244182 +epoch: 2, batch: 3975, sum loss: 3195.305908, avg loss: 2.317118, ppl: 10.146392 +epoch: 2, batch: 3976, sum loss: 4007.391602, avg loss: 2.383933, ppl: 10.847483 +epoch: 2, batch: 3977, sum loss: 5210.596680, avg loss: 2.669363, ppl: 14.430778 +epoch: 2, batch: 3978, sum loss: 3808.217285, avg loss: 2.408740, ppl: 11.119936 +epoch: 2, batch: 3979, sum loss: 4186.768555, avg loss: 2.492124, ppl: 12.086926 +epoch: 2, batch: 3980, sum loss: 4451.871094, avg loss: 2.591310, ppl: 13.347248 +epoch: 2, batch: 3981, sum loss: 3583.380371, avg loss: 2.532424, ppl: 12.583976 +epoch: 2, batch: 3982, sum loss: 4546.958008, avg loss: 2.920333, ppl: 18.547457 +epoch: 2, batch: 3983, sum loss: 4115.857422, avg loss: 2.561206, ppl: 12.951423 +epoch: 2, batch: 3984, sum loss: 3579.384277, avg loss: 2.328812, ppl: 10.265739 +epoch: 2, batch: 3985, sum loss: 4227.437012, avg loss: 2.784873, ppl: 16.197762 +epoch: 2, batch: 3986, sum loss: 4975.900879, avg loss: 2.652399, ppl: 14.188039 +epoch: 2, batch: 3987, sum loss: 4192.796875, avg loss: 2.651990, ppl: 14.182240 +epoch: 2, batch: 3988, sum loss: 4005.807373, avg loss: 2.401563, ppl: 11.040421 +epoch: 2, batch: 3989, sum loss: 4006.149170, avg loss: 2.540361, ppl: 12.684248 +epoch: 2, batch: 3990, sum loss: 3967.254883, avg loss: 2.339183, ppl: 10.372762 +epoch: 2, batch: 3991, sum loss: 4770.275879, avg loss: 2.675421, ppl: 14.518464 +epoch: 2, batch: 3992, sum loss: 4111.153809, avg loss: 2.496147, ppl: 12.135641 +epoch: 2, batch: 3993, sum loss: 3533.920654, avg loss: 2.253776, ppl: 9.523627 +epoch: 2, batch: 3994, sum loss: 4282.895020, avg loss: 2.558480, ppl: 12.916167 +epoch: 2, batch: 3995, sum loss: 5033.918945, avg loss: 2.498223, ppl: 12.160863 +epoch: 2, batch: 3996, sum loss: 4183.336426, avg loss: 2.582307, ppl: 13.227614 +epoch: 2, batch: 3997, sum loss: 4481.745605, avg loss: 2.670885, ppl: 14.452759 +epoch: 2, batch: 3998, sum loss: 3916.786377, avg loss: 2.664480, ppl: 14.360487 +epoch: 2, batch: 3999, sum loss: 3973.710449, avg loss: 2.337477, ppl: 10.355078 +epoch: 2, batch: 4000, sum loss: 4972.577148, avg loss: 2.773328, ppl: 16.011833 +epoch: 2, batch: 4001, sum loss: 3634.375488, avg loss: 2.555820, ppl: 12.881852 +epoch: 2, batch: 4002, sum loss: 4481.535645, avg loss: 2.439595, ppl: 11.468395 +epoch: 2, batch: 4003, sum loss: 4078.851562, avg loss: 2.522481, ppl: 12.459470 +epoch: 2, batch: 4004, sum loss: 3475.855225, avg loss: 2.301891, ppl: 9.993060 +epoch: 2, batch: 4005, sum loss: 4604.911621, avg loss: 2.691357, ppl: 14.751682 +epoch: 2, batch: 4006, sum loss: 4369.922852, avg loss: 2.489985, ppl: 12.061090 +epoch: 2, batch: 4007, sum loss: 3410.629395, avg loss: 2.491329, ppl: 12.077318 +epoch: 2, batch: 4008, sum loss: 4048.902832, avg loss: 2.692090, ppl: 14.762491 +epoch: 2, batch: 4009, sum loss: 4148.541504, avg loss: 2.587986, ppl: 13.302953 +epoch: 2, batch: 4010, sum loss: 4124.079102, avg loss: 2.610177, ppl: 13.601452 +epoch: 2, batch: 4011, sum loss: 4878.903320, avg loss: 2.712008, ppl: 15.059492 +epoch: 2, batch: 4012, sum loss: 4918.547852, avg loss: 2.823506, ppl: 16.835775 +epoch: 2, batch: 4013, sum loss: 3627.300293, avg loss: 2.501586, ppl: 12.201836 +epoch: 2, batch: 4014, sum loss: 5210.058594, avg loss: 2.789111, ppl: 16.266546 +epoch: 2, batch: 4015, sum loss: 4998.014160, avg loss: 2.652874, ppl: 14.194776 +epoch: 2, batch: 4016, sum loss: 3311.508789, avg loss: 2.294878, ppl: 9.923223 +epoch: 2, batch: 4017, sum loss: 4318.651855, avg loss: 2.538890, ppl: 12.665603 +epoch: 2, batch: 4018, sum loss: 3816.446777, avg loss: 2.364589, ppl: 10.639668 +epoch: 2, batch: 4019, sum loss: 4452.112305, avg loss: 2.552817, ppl: 12.843227 +epoch: 2, batch: 4020, sum loss: 4149.564453, avg loss: 2.572576, ppl: 13.099520 +epoch: 2, batch: 4021, sum loss: 3908.429688, avg loss: 2.371620, ppl: 10.714735 +epoch: 2, batch: 4022, sum loss: 3610.789062, avg loss: 2.304269, ppl: 10.016851 +epoch: 2, batch: 4023, sum loss: 4529.350098, avg loss: 2.580826, ppl: 13.208047 +epoch: 2, batch: 4024, sum loss: 4680.477051, avg loss: 2.799329, ppl: 16.433609 +epoch: 2, batch: 4025, sum loss: 4050.294678, avg loss: 2.598008, ppl: 13.436947 +epoch: 2, batch: 4026, sum loss: 3402.633545, avg loss: 2.379464, ppl: 10.799114 +epoch: 2, batch: 4027, sum loss: 4740.881348, avg loss: 2.573768, ppl: 13.115154 +epoch: 2, batch: 4028, sum loss: 5574.448730, avg loss: 2.937012, ppl: 18.859413 +epoch: 2, batch: 4029, sum loss: 3761.991943, avg loss: 2.302321, ppl: 9.997356 +epoch: 2, batch: 4030, sum loss: 3283.216309, avg loss: 2.276849, ppl: 9.745923 +epoch: 2, batch: 4031, sum loss: 4156.605469, avg loss: 2.480075, ppl: 11.942159 +epoch: 2, batch: 4032, sum loss: 4611.112793, avg loss: 2.718817, ppl: 15.162367 +epoch: 2, batch: 4033, sum loss: 4696.105469, avg loss: 2.695812, ppl: 14.817553 +epoch: 2, batch: 4034, sum loss: 4496.510254, avg loss: 2.480149, ppl: 11.943047 +epoch: 2, batch: 4035, sum loss: 4882.916016, avg loss: 2.875687, ppl: 17.737604 +epoch: 2, batch: 4036, sum loss: 3395.346191, avg loss: 2.317642, ppl: 10.151713 +epoch: 2, batch: 4037, sum loss: 3700.428467, avg loss: 2.271595, ppl: 9.694852 +epoch: 2, batch: 4038, sum loss: 4591.713379, avg loss: 2.623836, ppl: 13.788519 +epoch: 2, batch: 4039, sum loss: 3863.213379, avg loss: 2.556726, ppl: 12.893538 +epoch: 2, batch: 4040, sum loss: 3550.659668, avg loss: 2.316151, ppl: 10.136585 +epoch: 2, batch: 4041, sum loss: 4294.670410, avg loss: 2.499808, ppl: 12.180156 +epoch: 2, batch: 4042, sum loss: 3579.424316, avg loss: 2.431674, ppl: 11.377915 +epoch: 2, batch: 4043, sum loss: 3957.661377, avg loss: 2.336282, ppl: 10.342709 +epoch: 2, batch: 4044, sum loss: 4219.031250, avg loss: 2.728998, ppl: 15.317534 +epoch: 2, batch: 4045, sum loss: 3743.929443, avg loss: 2.256739, ppl: 9.551887 +epoch: 2, batch: 4046, sum loss: 3436.800537, avg loss: 2.435720, ppl: 11.424038 +epoch: 2, batch: 4047, sum loss: 4524.313477, avg loss: 2.566258, ppl: 13.017029 +epoch: 2, batch: 4048, sum loss: 4585.637207, avg loss: 2.568985, ppl: 13.052563 +epoch: 2, batch: 4049, sum loss: 4804.253418, avg loss: 2.651354, ppl: 14.173217 +epoch: 2, batch: 4050, sum loss: 4454.279785, avg loss: 2.768353, ppl: 15.932367 +epoch: 2, batch: 4051, sum loss: 3262.463867, avg loss: 2.242243, ppl: 9.414425 +epoch: 2, batch: 4052, sum loss: 3580.028564, avg loss: 2.370880, ppl: 10.706809 +epoch: 2, batch: 4053, sum loss: 4535.299805, avg loss: 2.666255, ppl: 14.385993 +epoch: 2, batch: 4054, sum loss: 5165.490234, avg loss: 2.680587, ppl: 14.593651 +epoch: 2, batch: 4055, sum loss: 4612.055176, avg loss: 2.712974, ppl: 15.074037 +epoch: 2, batch: 4056, sum loss: 4599.923340, avg loss: 2.551261, ppl: 12.823263 +epoch: 2, batch: 4057, sum loss: 4547.667480, avg loss: 2.536345, ppl: 12.633417 +epoch: 2, batch: 4058, sum loss: 4333.830078, avg loss: 2.496446, ppl: 12.139273 +epoch: 2, batch: 4059, sum loss: 4511.069336, avg loss: 2.650452, ppl: 14.160437 +epoch: 2, batch: 4060, sum loss: 4143.309570, avg loss: 2.543468, ppl: 12.723721 +epoch: 2, batch: 4061, sum loss: 4929.078125, avg loss: 2.714250, ppl: 15.093284 +epoch: 2, batch: 4062, sum loss: 4875.128418, avg loss: 2.714437, ppl: 15.096105 +epoch: 2, batch: 4063, sum loss: 4548.591797, avg loss: 2.485569, ppl: 12.007957 +epoch: 2, batch: 4064, sum loss: 4592.391113, avg loss: 2.637789, ppl: 13.982258 +epoch: 2, batch: 4065, sum loss: 4152.600586, avg loss: 2.291722, ppl: 9.891957 +epoch: 2, batch: 4066, sum loss: 4235.566406, avg loss: 2.495914, ppl: 12.132820 +epoch: 2, batch: 4067, sum loss: 3802.758301, avg loss: 2.363430, ppl: 10.627336 +epoch: 2, batch: 4068, sum loss: 4161.898926, avg loss: 2.493648, ppl: 12.105359 +epoch: 2, batch: 4069, sum loss: 4119.527344, avg loss: 2.433271, ppl: 11.396097 +epoch: 2, batch: 4070, sum loss: 5295.691895, avg loss: 2.733966, ppl: 15.393816 +epoch: 2, batch: 4071, sum loss: 3217.327393, avg loss: 2.452231, ppl: 11.614231 +epoch: 2, batch: 4072, sum loss: 3612.612549, avg loss: 2.544093, ppl: 12.731680 +epoch: 2, batch: 4073, sum loss: 4721.083984, avg loss: 2.655278, ppl: 14.228941 +epoch: 2, batch: 4074, sum loss: 4856.979492, avg loss: 2.502308, ppl: 12.210639 +epoch: 2, batch: 4075, sum loss: 3996.881104, avg loss: 2.838694, ppl: 17.093428 +epoch: 2, batch: 4076, sum loss: 4761.500000, avg loss: 2.375998, ppl: 10.761748 +epoch: 2, batch: 4077, sum loss: 4373.644531, avg loss: 2.655522, ppl: 14.232411 +epoch: 2, batch: 4078, sum loss: 4014.117188, avg loss: 2.456620, ppl: 11.665318 +epoch: 2, batch: 4079, sum loss: 3539.643555, avg loss: 2.245967, ppl: 9.449546 +epoch: 2, batch: 4080, sum loss: 5359.148438, avg loss: 2.525518, ppl: 12.497363 +epoch: 2, batch: 4081, sum loss: 4401.083008, avg loss: 2.755844, ppl: 15.734317 +epoch: 2, batch: 4082, sum loss: 4545.466797, avg loss: 2.550767, ppl: 12.816933 +epoch: 2, batch: 4083, sum loss: 3916.467773, avg loss: 2.398327, ppl: 11.004748 +epoch: 2, batch: 4084, sum loss: 4057.227295, avg loss: 2.413579, ppl: 11.173886 +epoch: 2, batch: 4085, sum loss: 4083.458740, avg loss: 2.261051, ppl: 9.593170 +epoch: 2, batch: 4086, sum loss: 3747.076660, avg loss: 2.684152, ppl: 14.645782 +epoch: 2, batch: 4087, sum loss: 3660.995361, avg loss: 2.460346, ppl: 11.708867 +epoch: 2, batch: 4088, sum loss: 3681.654541, avg loss: 2.519955, ppl: 12.428042 +epoch: 2, batch: 4089, sum loss: 4703.562012, avg loss: 2.703197, ppl: 14.927371 +epoch: 2, batch: 4090, sum loss: 4668.329590, avg loss: 2.695340, ppl: 14.810559 +epoch: 2, batch: 4091, sum loss: 4271.676758, avg loss: 2.505382, ppl: 12.248240 +epoch: 2, batch: 4092, sum loss: 5128.482910, avg loss: 2.582318, ppl: 13.227759 +epoch: 2, batch: 4093, sum loss: 3977.083496, avg loss: 2.433956, ppl: 11.403903 +epoch: 2, batch: 4094, sum loss: 4895.248047, avg loss: 2.719582, ppl: 15.173983 +epoch: 2, batch: 4095, sum loss: 3542.038574, avg loss: 2.267630, ppl: 9.656491 +epoch: 2, batch: 4096, sum loss: 3943.110840, avg loss: 2.345694, ppl: 10.440512 +epoch: 2, batch: 4097, sum loss: 3745.389648, avg loss: 2.439993, ppl: 11.472962 +epoch: 2, batch: 4098, sum loss: 5939.760254, avg loss: 2.850173, ppl: 17.290773 +epoch: 2, batch: 4099, sum loss: 4611.955078, avg loss: 2.641441, ppl: 14.033406 +epoch: 2, batch: 4100, sum loss: 3634.985596, avg loss: 2.259158, ppl: 9.575025 +epoch: 2, batch: 4101, sum loss: 5317.664551, avg loss: 2.763859, ppl: 15.860933 +epoch: 2, batch: 4102, sum loss: 5454.729980, avg loss: 2.841005, ppl: 17.132980 +epoch: 2, batch: 4103, sum loss: 4290.481445, avg loss: 2.693334, ppl: 14.780878 +epoch: 2, batch: 4104, sum loss: 4565.496094, avg loss: 2.553410, ppl: 12.850845 +epoch: 2, batch: 4105, sum loss: 3959.355469, avg loss: 2.724952, ppl: 15.255685 +epoch: 2, batch: 4106, sum loss: 4326.190430, avg loss: 2.526980, ppl: 12.515656 +epoch: 2, batch: 4107, sum loss: 3918.661621, avg loss: 2.313259, ppl: 10.107314 +epoch: 2, batch: 4108, sum loss: 4234.437500, avg loss: 2.344650, ppl: 10.429620 +epoch: 2, batch: 4109, sum loss: 4616.364746, avg loss: 2.657665, ppl: 14.262953 +epoch: 2, batch: 4110, sum loss: 4776.903320, avg loss: 2.570992, ppl: 13.078792 +epoch: 2, batch: 4111, sum loss: 3914.085693, avg loss: 2.505817, ppl: 12.253562 +epoch: 2, batch: 4112, sum loss: 4011.083008, avg loss: 2.637136, ppl: 13.973121 +epoch: 2, batch: 4113, sum loss: 4493.205078, avg loss: 2.751503, ppl: 15.666164 +epoch: 2, batch: 4114, sum loss: 3918.036133, avg loss: 2.346129, ppl: 10.445063 +epoch: 2, batch: 4115, sum loss: 3459.041016, avg loss: 2.231639, ppl: 9.315125 +epoch: 2, batch: 4116, sum loss: 4163.934082, avg loss: 2.583086, ppl: 13.237922 +epoch: 2, batch: 4117, sum loss: 4725.080078, avg loss: 2.678617, ppl: 14.564933 +epoch: 2, batch: 4118, sum loss: 4469.382324, avg loss: 2.610620, ppl: 13.607491 +epoch: 2, batch: 4119, sum loss: 3723.993896, avg loss: 2.528170, ppl: 12.530553 +epoch: 2, batch: 4120, sum loss: 4398.784668, avg loss: 2.645090, ppl: 14.084714 +epoch: 2, batch: 4121, sum loss: 4734.319824, avg loss: 2.679298, ppl: 14.574861 +epoch: 2, batch: 4122, sum loss: 4039.196045, avg loss: 2.336146, ppl: 10.341303 +epoch: 2, batch: 4123, sum loss: 4290.806641, avg loss: 2.629171, ppl: 13.862272 +epoch: 2, batch: 4124, sum loss: 3994.526367, avg loss: 2.507550, ppl: 12.274814 +epoch: 2, batch: 4125, sum loss: 3644.579590, avg loss: 2.279287, ppl: 9.769709 +epoch: 2, batch: 4126, sum loss: 5096.971680, avg loss: 2.768589, ppl: 15.936129 +epoch: 2, batch: 4127, sum loss: 3664.562012, avg loss: 2.584317, ppl: 13.254239 +epoch: 2, batch: 4128, sum loss: 4030.855225, avg loss: 2.463848, ppl: 11.749940 +epoch: 2, batch: 4129, sum loss: 4261.397949, avg loss: 2.582665, ppl: 13.232361 +epoch: 2, batch: 4130, sum loss: 3920.468262, avg loss: 2.624142, ppl: 13.792737 +epoch: 2, batch: 4131, sum loss: 4216.220703, avg loss: 2.607434, ppl: 13.564201 +epoch: 2, batch: 4132, sum loss: 4425.181641, avg loss: 2.656171, ppl: 14.241658 +epoch: 2, batch: 4133, sum loss: 4957.792969, avg loss: 2.676994, ppl: 14.541318 +epoch: 2, batch: 4134, sum loss: 4669.266602, avg loss: 2.626134, ppl: 13.820243 +epoch: 2, batch: 4135, sum loss: 3695.409424, avg loss: 2.365819, ppl: 10.652762 +epoch: 2, batch: 4136, sum loss: 3309.677246, avg loss: 2.428230, ppl: 11.338793 +epoch: 2, batch: 4137, sum loss: 4305.970703, avg loss: 2.538898, ppl: 12.665703 +epoch: 2, batch: 4138, sum loss: 4602.763184, avg loss: 2.479937, ppl: 11.940513 +epoch: 2, batch: 4139, sum loss: 3829.782471, avg loss: 2.572050, ppl: 13.092638 +epoch: 2, batch: 4140, sum loss: 4382.045898, avg loss: 2.597538, ppl: 13.430628 +epoch: 2, batch: 4141, sum loss: 3785.858887, avg loss: 2.331194, ppl: 10.290220 +epoch: 2, batch: 4142, sum loss: 3629.168945, avg loss: 2.390757, ppl: 10.921757 +epoch: 2, batch: 4143, sum loss: 4739.844238, avg loss: 2.636176, ppl: 13.959718 +epoch: 2, batch: 4144, sum loss: 5019.907227, avg loss: 2.876738, ppl: 17.756250 +epoch: 2, batch: 4145, sum loss: 5228.456055, avg loss: 2.697862, ppl: 14.847952 +epoch: 2, batch: 4146, sum loss: 4888.516113, avg loss: 2.665494, ppl: 14.375049 +epoch: 2, batch: 4147, sum loss: 4054.115234, avg loss: 2.569148, ppl: 13.054695 +epoch: 2, batch: 4148, sum loss: 5671.817871, avg loss: 2.879096, ppl: 17.798168 +epoch: 2, batch: 4149, sum loss: 5193.892578, avg loss: 2.737951, ppl: 15.455281 +epoch: 2, batch: 4150, sum loss: 4429.283203, avg loss: 2.627096, ppl: 13.833535 +epoch: 2, batch: 4151, sum loss: 4701.606934, avg loss: 2.475833, ppl: 11.891611 +epoch: 2, batch: 4152, sum loss: 4350.305664, avg loss: 2.612796, ppl: 13.637131 +epoch: 2, batch: 4153, sum loss: 3707.044434, avg loss: 2.243974, ppl: 9.430732 +epoch: 2, batch: 4154, sum loss: 4280.546387, avg loss: 2.471447, ppl: 11.839566 +epoch: 2, batch: 4155, sum loss: 3698.959717, avg loss: 2.492560, ppl: 12.092197 +epoch: 2, batch: 4156, sum loss: 4080.250732, avg loss: 2.524908, ppl: 12.489741 +epoch: 2, batch: 4157, sum loss: 4165.786133, avg loss: 2.490010, ppl: 12.061394 +epoch: 2, batch: 4158, sum loss: 4297.895996, avg loss: 2.500231, ppl: 12.185303 +epoch: 2, batch: 4159, sum loss: 4553.154297, avg loss: 2.650265, ppl: 14.157783 +epoch: 2, batch: 4160, sum loss: 3465.128174, avg loss: 2.251545, ppl: 9.502407 +epoch: 2, batch: 4161, sum loss: 4368.216797, avg loss: 2.740412, ppl: 15.493367 +epoch: 2, batch: 4162, sum loss: 4011.320801, avg loss: 2.503946, ppl: 12.230656 +epoch: 2, batch: 4163, sum loss: 4672.370605, avg loss: 2.477397, ppl: 11.910221 +epoch: 2, batch: 4164, sum loss: 3920.188965, avg loss: 2.318267, ppl: 10.158053 +epoch: 2, batch: 4165, sum loss: 4263.448242, avg loss: 2.537767, ppl: 12.651388 +epoch: 2, batch: 4166, sum loss: 4256.133789, avg loss: 2.673451, ppl: 14.489887 +epoch: 2, batch: 4167, sum loss: 4163.688965, avg loss: 2.832442, ppl: 16.986885 +epoch: 2, batch: 4168, sum loss: 5456.323730, avg loss: 2.737744, ppl: 15.452087 +epoch: 2, batch: 4169, sum loss: 3489.961670, avg loss: 2.291505, ppl: 9.889810 +epoch: 2, batch: 4170, sum loss: 4483.690430, avg loss: 2.570923, ppl: 13.077894 +epoch: 2, batch: 4171, sum loss: 3868.135254, avg loss: 2.503648, ppl: 12.227012 +epoch: 2, batch: 4172, sum loss: 3828.203613, avg loss: 2.355818, ppl: 10.546748 +epoch: 2, batch: 4173, sum loss: 4318.006836, avg loss: 2.394901, ppl: 10.967115 +epoch: 2, batch: 4174, sum loss: 3922.554443, avg loss: 2.410912, ppl: 11.144123 +epoch: 2, batch: 4175, sum loss: 3915.915527, avg loss: 2.656659, ppl: 14.248607 +epoch: 2, batch: 4176, sum loss: 4679.799805, avg loss: 2.595563, ppl: 13.404131 +epoch: 2, batch: 4177, sum loss: 4022.875000, avg loss: 2.554206, ppl: 12.861086 +epoch: 2, batch: 4178, sum loss: 4601.949219, avg loss: 2.604386, ppl: 13.522915 +epoch: 2, batch: 4179, sum loss: 3874.750000, avg loss: 2.534173, ppl: 12.605998 +epoch: 2, batch: 4180, sum loss: 4507.872070, avg loss: 2.575927, ppl: 13.143493 +epoch: 2, batch: 4181, sum loss: 3686.908203, avg loss: 2.275869, ppl: 9.736380 +epoch: 2, batch: 4182, sum loss: 4936.216797, avg loss: 2.690037, ppl: 14.732214 +epoch: 2, batch: 4183, sum loss: 4942.837891, avg loss: 2.559730, ppl: 12.932320 +epoch: 2, batch: 4184, sum loss: 4018.011230, avg loss: 2.444046, ppl: 11.519552 +epoch: 2, batch: 4185, sum loss: 4848.746094, avg loss: 2.572279, ppl: 13.095638 +epoch: 2, batch: 4186, sum loss: 4118.750000, avg loss: 2.734894, ppl: 15.408107 +epoch: 2, batch: 4187, sum loss: 4615.323242, avg loss: 2.670905, ppl: 14.453038 +epoch: 2, batch: 4188, sum loss: 3319.141846, avg loss: 2.367434, ppl: 10.669973 +epoch: 2, batch: 4189, sum loss: 4346.429199, avg loss: 2.561243, ppl: 12.951907 +epoch: 2, batch: 4190, sum loss: 3491.753662, avg loss: 2.512053, ppl: 12.330218 +epoch: 2, batch: 4191, sum loss: 4654.560547, avg loss: 2.543476, ppl: 12.723821 +epoch: 2, batch: 4192, sum loss: 3587.503662, avg loss: 2.319007, ppl: 10.165574 +epoch: 2, batch: 4193, sum loss: 3947.941406, avg loss: 2.432496, ppl: 11.387273 +epoch: 2, batch: 4194, sum loss: 4846.352539, avg loss: 2.558792, ppl: 12.920205 +epoch: 2, batch: 4195, sum loss: 4130.539551, avg loss: 2.444106, ppl: 11.520249 +epoch: 2, batch: 4196, sum loss: 5349.282227, avg loss: 2.768780, ppl: 15.939173 +epoch: 2, batch: 4197, sum loss: 4935.791016, avg loss: 2.854709, ppl: 17.369377 +epoch: 2, batch: 4198, sum loss: 4474.109863, avg loss: 2.492540, ppl: 12.091955 +epoch: 2, batch: 4199, sum loss: 4393.805176, avg loss: 2.347118, ppl: 10.455398 +epoch: 2, batch: 4200, sum loss: 5162.186035, avg loss: 2.775369, ppl: 16.044546 +epoch: 2, batch: 4201, sum loss: 5343.896484, avg loss: 2.847041, ppl: 17.236710 +epoch: 2, batch: 4202, sum loss: 3900.280029, avg loss: 2.372433, ppl: 10.723453 +epoch: 2, batch: 4203, sum loss: 4428.414062, avg loss: 2.643830, ppl: 14.066971 +epoch: 2, batch: 4204, sum loss: 4544.930664, avg loss: 2.505475, ppl: 12.249371 +epoch: 2, batch: 4205, sum loss: 4770.644531, avg loss: 2.919611, ppl: 18.534081 +epoch: 2, batch: 4206, sum loss: 3973.232666, avg loss: 2.511525, ppl: 12.323709 +epoch: 2, batch: 4207, sum loss: 3617.297119, avg loss: 2.449084, ppl: 11.577740 +epoch: 2, batch: 4208, sum loss: 5195.280762, avg loss: 2.671096, ppl: 14.455798 +epoch: 2, batch: 4209, sum loss: 4475.284180, avg loss: 2.612542, ppl: 13.633659 +epoch: 2, batch: 4210, sum loss: 4962.806641, avg loss: 2.858760, ppl: 17.439882 +epoch: 2, batch: 4211, sum loss: 4755.931641, avg loss: 2.891144, ppl: 18.013903 +epoch: 2, batch: 4212, sum loss: 4290.458984, avg loss: 2.850803, ppl: 17.301676 +epoch: 2, batch: 4213, sum loss: 3966.553467, avg loss: 2.534539, ppl: 12.610616 +epoch: 2, batch: 4214, sum loss: 4087.076660, avg loss: 2.662591, ppl: 14.333372 +epoch: 2, batch: 4215, sum loss: 5447.612793, avg loss: 2.616529, ppl: 13.688127 +epoch: 2, batch: 4216, sum loss: 3644.503418, avg loss: 2.322819, ppl: 10.204402 +epoch: 2, batch: 4217, sum loss: 3972.572998, avg loss: 2.422301, ppl: 11.271761 +epoch: 2, batch: 4218, sum loss: 3280.924316, avg loss: 2.190203, ppl: 8.937027 +epoch: 2, batch: 4219, sum loss: 4622.350098, avg loss: 2.577998, ppl: 13.170743 +epoch: 2, batch: 4220, sum loss: 4692.689453, avg loss: 2.778383, ppl: 16.092985 +epoch: 2, batch: 4221, sum loss: 5031.161621, avg loss: 2.734327, ppl: 15.399374 +epoch: 2, batch: 4222, sum loss: 4720.713867, avg loss: 2.599512, ppl: 13.457168 +epoch: 2, batch: 4223, sum loss: 4249.381836, avg loss: 2.457711, ppl: 11.678047 +epoch: 2, batch: 4224, sum loss: 4123.348633, avg loss: 2.444190, ppl: 11.521216 +epoch: 2, batch: 4225, sum loss: 4669.944824, avg loss: 2.674654, ppl: 14.507322 +epoch: 2, batch: 4226, sum loss: 4630.569336, avg loss: 2.632501, ppl: 13.908513 +epoch: 2, batch: 4227, sum loss: 5101.980957, avg loss: 2.713820, ppl: 15.086790 +epoch: 2, batch: 4228, sum loss: 4425.126953, avg loss: 2.701543, ppl: 14.902703 +epoch: 2, batch: 4229, sum loss: 5081.937012, avg loss: 2.829587, ppl: 16.938456 +epoch: 2, batch: 4230, sum loss: 3588.758301, avg loss: 2.367255, ppl: 10.668068 +epoch: 2, batch: 4231, sum loss: 4261.422363, avg loss: 2.835278, ppl: 17.035128 +epoch: 2, batch: 4232, sum loss: 5276.784180, avg loss: 2.642356, ppl: 14.046253 +epoch: 2, batch: 4233, sum loss: 4474.578125, avg loss: 2.484496, ppl: 11.995077 +epoch: 2, batch: 4234, sum loss: 4192.293457, avg loss: 2.364520, ppl: 10.638929 +epoch: 2, batch: 4235, sum loss: 4176.866699, avg loss: 2.490678, ppl: 12.069454 +epoch: 2, batch: 4236, sum loss: 5685.249512, avg loss: 2.849749, ppl: 17.283445 +epoch: 2, batch: 4237, sum loss: 4402.587402, avg loss: 2.527318, ppl: 12.519876 +epoch: 2, batch: 4238, sum loss: 4744.571777, avg loss: 2.643216, ppl: 14.058338 +epoch: 2, batch: 4239, sum loss: 3757.570312, avg loss: 2.390312, ppl: 10.916899 +epoch: 2, batch: 4240, sum loss: 3560.825195, avg loss: 2.327337, ppl: 10.250606 +epoch: 2, batch: 4241, sum loss: 4747.107422, avg loss: 2.644628, ppl: 14.078208 +epoch: 2, batch: 4242, sum loss: 4858.396973, avg loss: 2.626161, ppl: 13.820605 +epoch: 2, batch: 4243, sum loss: 3955.561768, avg loss: 2.495623, ppl: 12.129283 +epoch: 2, batch: 4244, sum loss: 3971.714111, avg loss: 2.307794, ppl: 10.052228 +epoch: 2, batch: 4245, sum loss: 3846.935303, avg loss: 2.331476, ppl: 10.293122 +epoch: 2, batch: 4246, sum loss: 4759.909668, avg loss: 2.795014, ppl: 16.362864 +epoch: 2, batch: 4247, sum loss: 4285.342285, avg loss: 2.678339, ppl: 14.560884 +epoch: 2, batch: 4248, sum loss: 5575.851562, avg loss: 2.726578, ppl: 15.280504 +epoch: 2, batch: 4249, sum loss: 5145.353516, avg loss: 2.836468, ppl: 17.055428 +epoch: 2, batch: 4250, sum loss: 3479.216309, avg loss: 2.284449, ppl: 9.820277 +epoch: 2, batch: 4251, sum loss: 4151.520020, avg loss: 2.480000, ppl: 11.941268 +epoch: 2, batch: 4252, sum loss: 4844.022949, avg loss: 2.779130, ppl: 16.104996 +epoch: 2, batch: 4253, sum loss: 4503.928711, avg loss: 2.459819, ppl: 11.702697 +epoch: 2, batch: 4254, sum loss: 4881.445312, avg loss: 2.831465, ppl: 16.970304 +epoch: 2, batch: 4255, sum loss: 4455.304199, avg loss: 2.420046, ppl: 11.246375 +epoch: 2, batch: 4256, sum loss: 3585.869141, avg loss: 2.281087, ppl: 9.787315 +epoch: 2, batch: 4257, sum loss: 5086.537598, avg loss: 2.712820, ppl: 15.071718 +epoch: 2, batch: 4258, sum loss: 4137.772461, avg loss: 2.426846, ppl: 11.323112 +epoch: 2, batch: 4259, sum loss: 3907.045410, avg loss: 2.331173, ppl: 10.290002 +epoch: 2, batch: 4260, sum loss: 4374.649414, avg loss: 2.788177, ppl: 16.251364 +epoch: 2, batch: 4261, sum loss: 4541.454102, avg loss: 2.575981, ppl: 13.144201 +epoch: 2, batch: 4262, sum loss: 4192.801270, avg loss: 2.426390, ppl: 11.317947 +epoch: 2, batch: 4263, sum loss: 5031.978516, avg loss: 2.839717, ppl: 17.110926 +epoch: 2, batch: 4264, sum loss: 4925.490234, avg loss: 2.742478, ppl: 15.525408 +epoch: 2, batch: 4265, sum loss: 3908.931396, avg loss: 2.340677, ppl: 10.388272 +epoch: 2, batch: 4266, sum loss: 4088.770264, avg loss: 2.436693, ppl: 11.435159 +epoch: 2, batch: 4267, sum loss: 4645.363281, avg loss: 2.509651, ppl: 12.300632 +epoch: 2, batch: 4268, sum loss: 4124.749023, avg loss: 2.182407, ppl: 8.867624 +epoch: 2, batch: 4269, sum loss: 4124.689941, avg loss: 2.375973, ppl: 10.761484 +epoch: 2, batch: 4270, sum loss: 4880.789551, avg loss: 2.629736, ppl: 13.870104 +epoch: 2, batch: 4271, sum loss: 4037.535400, avg loss: 2.411909, ppl: 11.155237 +epoch: 2, batch: 4272, sum loss: 3603.641357, avg loss: 2.361495, ppl: 10.606797 +epoch: 2, batch: 4273, sum loss: 4154.232910, avg loss: 2.616016, ppl: 13.681108 +epoch: 2, batch: 4274, sum loss: 4359.216309, avg loss: 2.417757, ppl: 11.220666 +epoch: 2, batch: 4275, sum loss: 3758.528564, avg loss: 2.260089, ppl: 9.583946 +epoch: 2, batch: 4276, sum loss: 4281.042969, avg loss: 2.761963, ppl: 15.830894 +epoch: 2, batch: 4277, sum loss: 3885.183594, avg loss: 2.371907, ppl: 10.717809 +epoch: 2, batch: 4278, sum loss: 3689.024902, avg loss: 2.380016, ppl: 10.805077 +epoch: 2, batch: 4279, sum loss: 4885.753906, avg loss: 2.619707, ppl: 13.731701 +epoch: 2, batch: 4280, sum loss: 4226.724609, avg loss: 2.564760, ppl: 12.997541 +epoch: 2, batch: 4281, sum loss: 4435.229492, avg loss: 2.477782, ppl: 11.914805 +epoch: 2, batch: 4282, sum loss: 4349.474609, avg loss: 2.602917, ppl: 13.503075 +epoch: 2, batch: 4283, sum loss: 4635.153320, avg loss: 2.690165, ppl: 14.734100 +epoch: 2, batch: 4284, sum loss: 4717.084961, avg loss: 2.452982, ppl: 11.622957 +epoch: 2, batch: 4285, sum loss: 4727.044434, avg loss: 2.364705, ppl: 10.640895 +epoch: 2, batch: 4286, sum loss: 4135.148926, avg loss: 2.352189, ppl: 10.508551 +epoch: 2, batch: 4287, sum loss: 4533.865234, avg loss: 2.700337, ppl: 14.884743 +epoch: 2, batch: 4288, sum loss: 5312.277832, avg loss: 2.721454, ppl: 15.202409 +epoch: 2, batch: 4289, sum loss: 3995.276367, avg loss: 2.458632, ppl: 11.688805 +epoch: 2, batch: 4290, sum loss: 4382.486328, avg loss: 2.499992, ppl: 12.182401 +epoch: 2, batch: 4291, sum loss: 4572.045410, avg loss: 2.490221, ppl: 12.063939 +epoch: 2, batch: 4292, sum loss: 4218.683594, avg loss: 2.589738, ppl: 13.326282 +epoch: 2, batch: 4293, sum loss: 4440.590820, avg loss: 2.525933, ppl: 12.502558 +epoch: 2, batch: 4294, sum loss: 5464.452148, avg loss: 2.728134, ppl: 15.304301 +epoch: 2, batch: 4295, sum loss: 3266.973145, avg loss: 2.257756, ppl: 9.561611 +epoch: 2, batch: 4296, sum loss: 3242.601074, avg loss: 2.220960, ppl: 9.216171 +epoch: 2, batch: 4297, sum loss: 5076.299316, avg loss: 2.991337, ppl: 19.912292 +epoch: 2, batch: 4298, sum loss: 3656.020508, avg loss: 2.403695, ppl: 11.063986 +epoch: 2, batch: 4299, sum loss: 4317.204590, avg loss: 2.653476, ppl: 14.203317 +epoch: 2, batch: 4300, sum loss: 4277.351562, avg loss: 2.522023, ppl: 12.453771 +epoch: 2, batch: 4301, sum loss: 3751.391846, avg loss: 2.590740, ppl: 13.339645 +epoch: 2, batch: 4302, sum loss: 3979.842041, avg loss: 2.301817, ppl: 9.992326 +epoch: 2, batch: 4303, sum loss: 4850.995117, avg loss: 2.687532, ppl: 14.695362 +epoch: 2, batch: 4304, sum loss: 4297.347656, avg loss: 2.580990, ppl: 13.210204 +epoch: 2, batch: 4305, sum loss: 3827.020996, avg loss: 2.393384, ppl: 10.950487 +epoch: 2, batch: 4306, sum loss: 4037.069092, avg loss: 2.414515, ppl: 11.184347 +epoch: 2, batch: 4307, sum loss: 3857.520996, avg loss: 2.316829, ppl: 10.143463 +epoch: 2, batch: 4308, sum loss: 4586.237793, avg loss: 2.539445, ppl: 12.672638 +epoch: 2, batch: 4309, sum loss: 3949.483643, avg loss: 2.428957, ppl: 11.347038 +epoch: 2, batch: 4310, sum loss: 4074.060547, avg loss: 2.473625, ppl: 11.865380 +epoch: 2, batch: 4311, sum loss: 4634.651367, avg loss: 2.672809, ppl: 14.480590 +epoch: 2, batch: 4312, sum loss: 4432.766113, avg loss: 2.636981, ppl: 13.970968 +epoch: 2, batch: 4313, sum loss: 4684.362305, avg loss: 2.439772, ppl: 11.470427 +epoch: 2, batch: 4314, sum loss: 4804.470703, avg loss: 2.602638, ppl: 13.499309 +epoch: 2, batch: 4315, sum loss: 3030.391113, avg loss: 2.094258, ppl: 8.119412 +epoch: 2, batch: 4316, sum loss: 4685.596191, avg loss: 2.516432, ppl: 12.384331 +epoch: 2, batch: 4317, sum loss: 4069.755859, avg loss: 2.466519, ppl: 11.781361 +epoch: 2, batch: 4318, sum loss: 4715.985840, avg loss: 2.718148, ppl: 15.152227 +epoch: 2, batch: 4319, sum loss: 4598.854980, avg loss: 2.724440, ppl: 15.247878 +epoch: 2, batch: 4320, sum loss: 5197.701172, avg loss: 2.855880, ppl: 17.389730 +epoch: 2, batch: 4321, sum loss: 4048.613281, avg loss: 2.522501, ppl: 12.459713 +epoch: 2, batch: 4322, sum loss: 4362.117188, avg loss: 2.642106, ppl: 14.042750 +epoch: 2, batch: 4323, sum loss: 3953.811523, avg loss: 2.364720, ppl: 10.641058 +epoch: 2, batch: 4324, sum loss: 4687.073242, avg loss: 2.615554, ppl: 13.674794 +epoch: 2, batch: 4325, sum loss: 3792.125488, avg loss: 2.617064, ppl: 13.695455 +epoch: 2, batch: 4326, sum loss: 4993.113281, avg loss: 2.595173, ppl: 13.398910 +epoch: 2, batch: 4327, sum loss: 4067.478760, avg loss: 2.561385, ppl: 12.953742 +epoch: 2, batch: 4328, sum loss: 4818.796875, avg loss: 2.563190, ppl: 12.977145 +epoch: 2, batch: 4329, sum loss: 5131.333496, avg loss: 2.608710, ppl: 13.581523 +epoch: 2, batch: 4330, sum loss: 4473.518066, avg loss: 2.506173, ppl: 12.257925 +epoch: 2, batch: 4331, sum loss: 3817.231934, avg loss: 2.344737, ppl: 10.430530 +epoch: 2, batch: 4332, sum loss: 4595.620605, avg loss: 2.470764, ppl: 11.831482 +epoch: 2, batch: 4333, sum loss: 4463.733398, avg loss: 2.424624, ppl: 11.297986 +epoch: 2, batch: 4334, sum loss: 4392.904297, avg loss: 2.723437, ppl: 15.232592 +epoch: 2, batch: 4335, sum loss: 4786.054688, avg loss: 2.710110, ppl: 15.030931 +epoch: 2, batch: 4336, sum loss: 3169.567139, avg loss: 2.208758, ppl: 9.104399 +epoch: 2, batch: 4337, sum loss: 3146.285645, avg loss: 2.294884, ppl: 9.923282 +epoch: 2, batch: 4338, sum loss: 4687.352539, avg loss: 2.561395, ppl: 12.953875 +epoch: 2, batch: 4339, sum loss: 4010.729492, avg loss: 2.478819, ppl: 11.927172 +epoch: 2, batch: 4340, sum loss: 4739.677734, avg loss: 2.714592, ppl: 15.098452 +epoch: 2, batch: 4341, sum loss: 4699.426270, avg loss: 2.674688, ppl: 14.507817 +epoch: 2, batch: 4342, sum loss: 5003.509766, avg loss: 2.726708, ppl: 15.282500 +epoch: 2, batch: 4343, sum loss: 4227.337891, avg loss: 2.772025, ppl: 15.990981 +epoch: 2, batch: 4344, sum loss: 5989.102051, avg loss: 2.691731, ppl: 14.757202 +epoch: 2, batch: 4345, sum loss: 4099.877441, avg loss: 2.594859, ppl: 13.394700 +epoch: 2, batch: 4346, sum loss: 3829.250977, avg loss: 2.529228, ppl: 12.543818 +epoch: 2, batch: 4347, sum loss: 4514.072754, avg loss: 2.534572, ppl: 12.611033 +epoch: 2, batch: 4348, sum loss: 3527.334717, avg loss: 2.340634, ppl: 10.387816 +epoch: 2, batch: 4349, sum loss: 3773.964111, avg loss: 2.313896, ppl: 10.113750 +epoch: 2, batch: 4350, sum loss: 3986.906250, avg loss: 2.554072, ppl: 12.859360 +epoch: 2, batch: 4351, sum loss: 3718.568848, avg loss: 2.238753, ppl: 9.381626 +epoch: 2, batch: 4352, sum loss: 5023.368652, avg loss: 2.754040, ppl: 15.705953 +epoch: 2, batch: 4353, sum loss: 4771.752930, avg loss: 2.671754, ppl: 14.465324 +epoch: 2, batch: 4354, sum loss: 4135.267578, avg loss: 2.510788, ppl: 12.314627 +epoch: 2, batch: 4355, sum loss: 3832.807617, avg loss: 2.338504, ppl: 10.365714 +epoch: 2, batch: 4356, sum loss: 4628.366211, avg loss: 2.652359, ppl: 14.187464 +epoch: 2, batch: 4357, sum loss: 3986.966797, avg loss: 2.531407, ppl: 12.571186 +epoch: 2, batch: 4358, sum loss: 4449.380859, avg loss: 2.646865, ppl: 14.109740 +epoch: 2, batch: 4359, sum loss: 4144.856445, avg loss: 2.365786, ppl: 10.652407 +epoch: 2, batch: 4360, sum loss: 3650.773682, avg loss: 2.319424, ppl: 10.169816 +epoch: 2, batch: 4361, sum loss: 4162.665527, avg loss: 2.375951, ppl: 10.761240 +epoch: 2, batch: 4362, sum loss: 4149.941406, avg loss: 2.495455, ppl: 12.127248 +epoch: 2, batch: 4363, sum loss: 4290.954102, avg loss: 2.619630, ppl: 13.730640 +epoch: 2, batch: 4364, sum loss: 5360.118164, avg loss: 2.709867, ppl: 15.027284 +epoch: 2, batch: 4365, sum loss: 4974.244629, avg loss: 2.648693, ppl: 14.135546 +epoch: 2, batch: 4366, sum loss: 3642.438965, avg loss: 2.251198, ppl: 9.499112 +epoch: 2, batch: 4367, sum loss: 5082.250000, avg loss: 2.730924, ppl: 15.347067 +epoch: 2, batch: 4368, sum loss: 4232.286133, avg loss: 2.485194, ppl: 12.003454 +epoch: 2, batch: 4369, sum loss: 4002.384277, avg loss: 2.518807, ppl: 12.413777 +epoch: 2, batch: 4370, sum loss: 4640.423828, avg loss: 2.630626, ppl: 13.882454 +epoch: 2, batch: 4371, sum loss: 4306.542480, avg loss: 2.434450, ppl: 11.409546 +epoch: 2, batch: 4372, sum loss: 4416.224121, avg loss: 2.638127, ppl: 13.986979 +epoch: 2, batch: 4373, sum loss: 4216.973633, avg loss: 2.566630, ppl: 13.021869 +epoch: 2, batch: 4374, sum loss: 4375.791992, avg loss: 2.527898, ppl: 12.527150 +epoch: 2, batch: 4375, sum loss: 4835.166016, avg loss: 2.602350, ppl: 13.495412 +epoch: 2, batch: 4376, sum loss: 3437.570068, avg loss: 2.246778, ppl: 9.457213 +epoch: 2, batch: 4377, sum loss: 4168.760742, avg loss: 2.613643, ppl: 13.648685 +epoch: 2, batch: 4378, sum loss: 4569.162109, avg loss: 2.597591, ppl: 13.431342 +epoch: 2, batch: 4379, sum loss: 5179.899902, avg loss: 2.714832, ppl: 15.102074 +epoch: 2, batch: 4380, sum loss: 4253.291992, avg loss: 2.515253, ppl: 12.369733 +epoch: 2, batch: 4381, sum loss: 4357.934082, avg loss: 2.590924, ppl: 13.342094 +epoch: 2, batch: 4382, sum loss: 4479.127441, avg loss: 2.581630, ppl: 13.218664 +epoch: 2, batch: 4383, sum loss: 4413.194824, avg loss: 2.573291, ppl: 13.108899 +epoch: 2, batch: 4384, sum loss: 3773.095703, avg loss: 2.528885, ppl: 12.539513 +epoch: 2, batch: 4385, sum loss: 4624.412598, avg loss: 2.736339, ppl: 15.430388 +epoch: 2, batch: 4386, sum loss: 3706.171387, avg loss: 2.347164, ppl: 10.455872 +epoch: 2, batch: 4387, sum loss: 5213.485352, avg loss: 2.716772, ppl: 15.131396 +epoch: 2, batch: 4388, sum loss: 4438.907227, avg loss: 2.552563, ppl: 12.839972 +epoch: 2, batch: 4389, sum loss: 4565.294922, avg loss: 2.604275, ppl: 13.521425 +epoch: 2, batch: 4390, sum loss: 3669.292969, avg loss: 2.722027, ppl: 15.211128 +epoch: 2, batch: 4391, sum loss: 4664.263672, avg loss: 2.595584, ppl: 13.404408 +epoch: 2, batch: 4392, sum loss: 4186.048340, avg loss: 2.681645, ppl: 14.609111 +epoch: 2, batch: 4393, sum loss: 4531.652344, avg loss: 2.927424, ppl: 18.679449 +epoch: 2, batch: 4394, sum loss: 4701.227539, avg loss: 2.559188, ppl: 12.925313 +epoch: 2, batch: 4395, sum loss: 4857.945312, avg loss: 2.691382, ppl: 14.752055 +epoch: 2, batch: 4396, sum loss: 5704.079590, avg loss: 2.935708, ppl: 18.834826 +epoch: 2, batch: 4397, sum loss: 3829.071289, avg loss: 2.611918, ppl: 13.625155 +epoch: 2, batch: 4398, sum loss: 4356.133789, avg loss: 2.657800, ppl: 14.264868 +epoch: 2, batch: 4399, sum loss: 4381.561523, avg loss: 2.531231, ppl: 12.568974 +epoch: 2, batch: 4400, sum loss: 3843.557617, avg loss: 2.451248, ppl: 11.602822 +epoch: 2, batch: 4401, sum loss: 4172.991211, avg loss: 2.614656, ppl: 13.662519 +epoch: 2, batch: 4402, sum loss: 4335.387695, avg loss: 2.583664, ppl: 13.245577 +epoch: 2, batch: 4403, sum loss: 4789.388672, avg loss: 2.725890, ppl: 15.269997 +epoch: 2, batch: 4404, sum loss: 4480.946289, avg loss: 2.597650, ppl: 13.432137 +epoch: 2, batch: 4405, sum loss: 4599.058594, avg loss: 2.753927, ppl: 15.704188 +epoch: 2, batch: 4406, sum loss: 3985.182129, avg loss: 2.410878, ppl: 11.143745 +epoch: 2, batch: 4407, sum loss: 3893.156738, avg loss: 2.282038, ppl: 9.796625 +epoch: 2, batch: 4408, sum loss: 4693.659668, avg loss: 2.783903, ppl: 16.182051 +epoch: 2, batch: 4409, sum loss: 4414.299316, avg loss: 2.604306, ppl: 13.521844 +epoch: 2, batch: 4410, sum loss: 4370.902344, avg loss: 2.553097, ppl: 12.846829 +epoch: 2, batch: 4411, sum loss: 4334.943359, avg loss: 2.563539, ppl: 12.981672 +epoch: 2, batch: 4412, sum loss: 4396.485352, avg loss: 2.612291, ppl: 13.630237 +epoch: 2, batch: 4413, sum loss: 4320.933594, avg loss: 2.494765, ppl: 12.118889 +epoch: 2, batch: 4414, sum loss: 3896.061523, avg loss: 2.450353, ppl: 11.592440 +epoch: 2, batch: 4415, sum loss: 3858.727539, avg loss: 2.623200, ppl: 13.779751 +epoch: 2, batch: 4416, sum loss: 4812.882812, avg loss: 2.804710, ppl: 16.522289 +epoch: 2, batch: 4417, sum loss: 4154.790527, avg loss: 2.687445, ppl: 14.694091 +epoch: 2, batch: 4418, sum loss: 4507.831543, avg loss: 2.512727, ppl: 12.338529 +epoch: 2, batch: 4419, sum loss: 4128.308594, avg loss: 2.741241, ppl: 15.506212 +epoch: 2, batch: 4420, sum loss: 4794.528320, avg loss: 2.439964, ppl: 11.472623 +epoch: 2, batch: 4421, sum loss: 4496.004395, avg loss: 2.705177, ppl: 14.956968 +epoch: 2, batch: 4422, sum loss: 4918.698242, avg loss: 2.791543, ppl: 16.306156 +epoch: 2, batch: 4423, sum loss: 4383.560059, avg loss: 2.790299, ppl: 16.285894 +epoch: 2, batch: 4424, sum loss: 4618.653809, avg loss: 2.410571, ppl: 11.140319 +epoch: 2, batch: 4425, sum loss: 3063.189941, avg loss: 2.151117, ppl: 8.594450 +epoch: 2, batch: 4426, sum loss: 4101.272949, avg loss: 2.547374, ppl: 12.773522 +epoch: 2, batch: 4427, sum loss: 4222.980957, avg loss: 2.642667, ppl: 14.050621 +epoch: 2, batch: 4428, sum loss: 4109.378906, avg loss: 2.614109, ppl: 13.655042 +epoch: 2, batch: 4429, sum loss: 4933.523926, avg loss: 2.740847, ppl: 15.500102 +epoch: 2, batch: 4430, sum loss: 4390.312012, avg loss: 2.616396, ppl: 13.686305 +epoch: 2, batch: 4431, sum loss: 3801.652344, avg loss: 2.426071, ppl: 11.314337 +epoch: 2, batch: 4432, sum loss: 4366.438965, avg loss: 2.528338, ppl: 12.532656 +epoch: 2, batch: 4433, sum loss: 4518.048828, avg loss: 2.477000, ppl: 11.905500 +epoch: 2, batch: 4434, sum loss: 5138.001465, avg loss: 2.757918, ppl: 15.766984 +epoch: 2, batch: 4435, sum loss: 4187.714844, avg loss: 2.522720, ppl: 12.462447 +epoch: 2, batch: 4436, sum loss: 4754.792480, avg loss: 2.674237, ppl: 14.501274 +epoch: 2, batch: 4437, sum loss: 3793.322266, avg loss: 2.517135, ppl: 12.393039 +epoch: 2, batch: 4438, sum loss: 4235.468750, avg loss: 2.431383, ppl: 11.374599 +epoch: 2, batch: 4439, sum loss: 4387.238770, avg loss: 2.555177, ppl: 12.873578 +epoch: 2, batch: 4440, sum loss: 4266.842285, avg loss: 2.721200, ppl: 15.198557 +epoch: 2, batch: 4441, sum loss: 4659.555664, avg loss: 2.715359, ppl: 15.110029 +epoch: 2, batch: 4442, sum loss: 4443.487305, avg loss: 2.610744, ppl: 13.609179 +epoch: 2, batch: 4443, sum loss: 4303.479492, avg loss: 2.600290, ppl: 13.467641 +epoch: 2, batch: 4444, sum loss: 5166.972656, avg loss: 2.647015, ppl: 14.111850 +epoch: 2, batch: 4445, sum loss: 4132.144531, avg loss: 2.180551, ppl: 8.851185 +epoch: 2, batch: 4446, sum loss: 4107.629395, avg loss: 2.348559, ppl: 10.470469 +epoch: 2, batch: 4447, sum loss: 4154.767090, avg loss: 2.490868, ppl: 12.071745 +epoch: 2, batch: 4448, sum loss: 4038.879883, avg loss: 2.622649, ppl: 13.772161 +epoch: 2, batch: 4449, sum loss: 4537.925781, avg loss: 2.599041, ppl: 13.450835 +epoch: 2, batch: 4450, sum loss: 4837.648438, avg loss: 2.674212, ppl: 14.500912 +epoch: 2, batch: 4451, sum loss: 4400.541016, avg loss: 2.757231, ppl: 15.756158 +epoch: 2, batch: 4452, sum loss: 4677.462891, avg loss: 2.789185, ppl: 16.267754 +epoch: 2, batch: 4453, sum loss: 4078.528564, avg loss: 2.667449, ppl: 14.403172 +epoch: 2, batch: 4454, sum loss: 4750.549805, avg loss: 2.860054, ppl: 17.462471 +epoch: 2, batch: 4455, sum loss: 4422.748535, avg loss: 2.562427, ppl: 12.967245 +epoch: 2, batch: 4456, sum loss: 4191.932617, avg loss: 2.559177, ppl: 12.925178 +epoch: 2, batch: 4457, sum loss: 3609.298828, avg loss: 2.501246, ppl: 12.197688 +epoch: 2, batch: 4458, sum loss: 5149.881348, avg loss: 2.765779, ppl: 15.891422 +epoch: 2, batch: 4459, sum loss: 5019.936035, avg loss: 2.765805, ppl: 15.891828 +epoch: 2, batch: 4460, sum loss: 3804.777100, avg loss: 2.449953, ppl: 11.587803 +epoch: 2, batch: 4461, sum loss: 3860.576904, avg loss: 2.508497, ppl: 12.286449 +epoch: 2, batch: 4462, sum loss: 4290.486816, avg loss: 2.743278, ppl: 15.537835 +epoch: 2, batch: 4463, sum loss: 4433.942871, avg loss: 2.623635, ppl: 13.785741 +epoch: 2, batch: 4464, sum loss: 3676.434570, avg loss: 2.497578, ppl: 12.153023 +epoch: 2, batch: 4465, sum loss: 4059.538330, avg loss: 2.543570, ppl: 12.725022 +epoch: 2, batch: 4466, sum loss: 3899.638916, avg loss: 2.475961, ppl: 11.893133 +epoch: 2, batch: 4467, sum loss: 5319.957520, avg loss: 2.745076, ppl: 15.565800 +epoch: 2, batch: 4468, sum loss: 3998.391602, avg loss: 2.516294, ppl: 12.382625 +epoch: 2, batch: 4469, sum loss: 4565.792969, avg loss: 2.763797, ppl: 15.859954 +epoch: 2, batch: 4470, sum loss: 4504.255859, avg loss: 2.551986, ppl: 12.832570 +epoch: 2, batch: 4471, sum loss: 4435.682617, avg loss: 2.450654, ppl: 11.595922 +epoch: 2, batch: 4472, sum loss: 4541.055176, avg loss: 2.609802, ppl: 13.596355 +epoch: 2, batch: 4473, sum loss: 4922.885742, avg loss: 2.744084, ppl: 15.550358 +epoch: 2, batch: 4474, sum loss: 5163.994629, avg loss: 2.548862, ppl: 12.792541 +epoch: 2, batch: 4475, sum loss: 4638.928711, avg loss: 2.640255, ppl: 14.016781 +epoch: 2, batch: 4476, sum loss: 4190.495117, avg loss: 2.347616, ppl: 10.460607 +epoch: 2, batch: 4477, sum loss: 4281.157715, avg loss: 2.719922, ppl: 15.179143 +epoch: 2, batch: 4478, sum loss: 4356.654297, avg loss: 2.643601, ppl: 14.063755 +epoch: 2, batch: 4479, sum loss: 4315.709473, avg loss: 2.512054, ppl: 12.330235 +epoch: 2, batch: 4480, sum loss: 4805.111816, avg loss: 2.638721, ppl: 13.995298 +epoch: 2, batch: 4481, sum loss: 4261.748535, avg loss: 2.702441, ppl: 14.916090 +epoch: 2, batch: 4482, sum loss: 4127.057617, avg loss: 2.350261, ppl: 10.488304 +epoch: 2, batch: 4483, sum loss: 5651.038574, avg loss: 2.706436, ppl: 14.975809 +epoch: 2, batch: 4484, sum loss: 4000.952393, avg loss: 2.439605, ppl: 11.468513 +epoch: 2, batch: 4485, sum loss: 4253.359863, avg loss: 2.632030, ppl: 13.901956 +epoch: 2, batch: 4486, sum loss: 3606.775879, avg loss: 2.326952, ppl: 10.246664 +epoch: 2, batch: 4487, sum loss: 5303.050293, avg loss: 2.736352, ppl: 15.430594 +epoch: 2, batch: 4488, sum loss: 4061.186279, avg loss: 2.380531, ppl: 10.810642 +epoch: 2, batch: 4489, sum loss: 5116.513184, avg loss: 2.887423, ppl: 17.946997 +epoch: 2, batch: 4490, sum loss: 4400.366211, avg loss: 2.423109, ppl: 11.280878 +epoch: 2, batch: 4491, sum loss: 4855.053223, avg loss: 2.763263, ppl: 15.851481 +epoch: 2, batch: 4492, sum loss: 5047.571777, avg loss: 2.840502, ppl: 17.124355 +epoch: 2, batch: 4493, sum loss: 3709.854492, avg loss: 2.503276, ppl: 12.222467 +epoch: 2, batch: 4494, sum loss: 3544.788574, avg loss: 2.232235, ppl: 9.320672 +epoch: 2, batch: 4495, sum loss: 3918.819824, avg loss: 2.322952, ppl: 10.205757 +epoch: 2, batch: 4496, sum loss: 4041.707520, avg loss: 2.404347, ppl: 11.071200 +epoch: 2, batch: 4497, sum loss: 3828.388916, avg loss: 2.590250, ppl: 13.333101 +epoch: 2, batch: 4498, sum loss: 3682.605225, avg loss: 2.429159, ppl: 11.349335 +epoch: 2, batch: 4499, sum loss: 3757.256104, avg loss: 2.414689, ppl: 11.186292 +epoch: 2, batch: 4500, sum loss: 4428.741699, avg loss: 2.826255, ppl: 16.882124 +epoch: 2, batch: 4501, sum loss: 4845.087402, avg loss: 2.644698, ppl: 14.079198 +epoch: 2, batch: 4502, sum loss: 3838.904297, avg loss: 2.476712, ppl: 11.902072 +epoch: 2, batch: 4503, sum loss: 5916.307617, avg loss: 3.009312, ppl: 20.273451 +epoch: 2, batch: 4504, sum loss: 4877.240234, avg loss: 2.882530, ppl: 17.859396 +epoch: 2, batch: 4505, sum loss: 3566.166748, avg loss: 2.435906, ppl: 11.426168 +epoch: 2, batch: 4506, sum loss: 5744.383301, avg loss: 2.821406, ppl: 16.800461 +epoch: 2, batch: 4507, sum loss: 4370.049805, avg loss: 2.570618, ppl: 13.073897 +epoch: 2, batch: 4508, sum loss: 3682.838379, avg loss: 2.468390, ppl: 11.803433 +epoch: 2, batch: 4509, sum loss: 3943.495605, avg loss: 2.352921, ppl: 10.516243 +epoch: 2, batch: 4510, sum loss: 3594.412842, avg loss: 2.388314, ppl: 10.895112 +epoch: 2, batch: 4511, sum loss: 4030.653320, avg loss: 2.520734, ppl: 12.437717 +epoch: 2, batch: 4512, sum loss: 4356.040039, avg loss: 2.473617, ppl: 11.865290 +epoch: 2, batch: 4513, sum loss: 4013.777344, avg loss: 2.538759, ppl: 12.663939 +epoch: 2, batch: 4514, sum loss: 3452.076172, avg loss: 2.351551, ppl: 10.501841 +epoch: 2, batch: 4515, sum loss: 4521.507812, avg loss: 2.557414, ppl: 12.902410 +epoch: 2, batch: 4516, sum loss: 4411.291504, avg loss: 2.696388, ppl: 14.826090 +epoch: 2, batch: 4517, sum loss: 4438.069336, avg loss: 2.479368, ppl: 11.933722 +epoch: 2, batch: 4518, sum loss: 3547.151855, avg loss: 2.398345, ppl: 11.004946 +epoch: 2, batch: 4519, sum loss: 3911.101807, avg loss: 2.340576, ppl: 10.387215 +epoch: 2, batch: 4520, sum loss: 4276.031250, avg loss: 2.411749, ppl: 11.153456 +epoch: 2, batch: 4521, sum loss: 4937.030273, avg loss: 2.558047, ppl: 12.910576 +epoch: 2, batch: 4522, sum loss: 4368.756836, avg loss: 2.562321, ppl: 12.965873 +epoch: 2, batch: 4523, sum loss: 4197.470215, avg loss: 2.402673, ppl: 11.052684 +epoch: 2, batch: 4524, sum loss: 5063.715332, avg loss: 2.796088, ppl: 16.380445 +epoch: 2, batch: 4525, sum loss: 4148.616211, avg loss: 2.540488, ppl: 12.685854 +epoch: 2, batch: 4526, sum loss: 3721.608643, avg loss: 2.584450, ppl: 13.256002 +epoch: 2, batch: 4527, sum loss: 4667.835938, avg loss: 2.687298, ppl: 14.691918 +epoch: 2, batch: 4528, sum loss: 3130.633057, avg loss: 2.165030, ppl: 8.714861 +epoch: 2, batch: 4529, sum loss: 4011.981689, avg loss: 2.545674, ppl: 12.751815 +epoch: 2, batch: 4530, sum loss: 5524.946289, avg loss: 2.876078, ppl: 17.744549 +epoch: 2, batch: 4531, sum loss: 5240.279785, avg loss: 2.702568, ppl: 14.917996 +epoch: 2, batch: 4532, sum loss: 4003.316895, avg loss: 2.367426, ppl: 10.669891 +epoch: 2, batch: 4533, sum loss: 3641.998291, avg loss: 2.281954, ppl: 9.795801 +epoch: 2, batch: 4534, sum loss: 3481.175537, avg loss: 2.313074, ppl: 10.105437 +epoch: 2, batch: 4535, sum loss: 4596.801270, avg loss: 2.823588, ppl: 16.837152 +epoch: 2, batch: 4536, sum loss: 4014.587158, avg loss: 2.421343, ppl: 11.260977 +epoch: 2, batch: 4537, sum loss: 5259.192871, avg loss: 2.861367, ppl: 17.485413 +epoch: 2, batch: 4538, sum loss: 4714.712891, avg loss: 2.662175, ppl: 14.327423 +epoch: 2, batch: 4539, sum loss: 4204.160645, avg loss: 2.447125, ppl: 11.555078 +epoch: 2, batch: 4540, sum loss: 4356.800293, avg loss: 2.436689, ppl: 11.435118 +epoch: 2, batch: 4541, sum loss: 4030.780762, avg loss: 2.541476, ppl: 12.698400 +epoch: 2, batch: 4542, sum loss: 3644.261719, avg loss: 2.309418, ppl: 10.068563 +epoch: 2, batch: 4543, sum loss: 3611.454102, avg loss: 2.374395, ppl: 10.744507 +epoch: 2, batch: 4544, sum loss: 4103.595215, avg loss: 2.728454, ppl: 15.309202 +epoch: 2, batch: 4545, sum loss: 4110.395020, avg loss: 2.674297, ppl: 14.502156 +epoch: 2, batch: 4546, sum loss: 3689.668457, avg loss: 2.532374, ppl: 12.583340 +epoch: 2, batch: 4547, sum loss: 5361.936523, avg loss: 2.791222, ppl: 16.300920 +epoch: 2, batch: 4548, sum loss: 4360.257812, avg loss: 2.657073, ppl: 14.254512 +epoch: 2, batch: 4549, sum loss: 4516.540039, avg loss: 2.533113, ppl: 12.592643 +epoch: 2, batch: 4550, sum loss: 4170.051270, avg loss: 2.428684, ppl: 11.343949 +epoch: 2, batch: 4551, sum loss: 4568.992188, avg loss: 2.491271, ppl: 12.076622 +epoch: 2, batch: 4552, sum loss: 3792.552490, avg loss: 2.592312, ppl: 13.360627 +epoch: 2, batch: 4553, sum loss: 5494.385742, avg loss: 2.665884, ppl: 14.380650 +epoch: 2, batch: 4554, sum loss: 3459.482910, avg loss: 2.226180, ppl: 9.264404 +epoch: 2, batch: 4555, sum loss: 3988.497559, avg loss: 2.483498, ppl: 11.983107 +epoch: 2, batch: 4556, sum loss: 4006.137451, avg loss: 2.475981, ppl: 11.893369 +epoch: 2, batch: 4557, sum loss: 4606.537109, avg loss: 2.748531, ppl: 15.619664 +epoch: 2, batch: 4558, sum loss: 4818.513184, avg loss: 2.513570, ppl: 12.348935 +epoch: 2, batch: 4559, sum loss: 4058.583496, avg loss: 2.684248, ppl: 14.647189 +epoch: 2, batch: 4560, sum loss: 4877.311035, avg loss: 2.586061, ppl: 13.277372 +epoch: 2, batch: 4561, sum loss: 4488.800781, avg loss: 2.903493, ppl: 18.237745 +epoch: 2, batch: 4562, sum loss: 4206.904785, avg loss: 2.554283, ppl: 12.862079 +epoch: 2, batch: 4563, sum loss: 4059.070068, avg loss: 2.559313, ppl: 12.926931 +epoch: 2, batch: 4564, sum loss: 4144.320312, avg loss: 2.572514, ppl: 13.098717 +epoch: 2, batch: 4565, sum loss: 3381.584229, avg loss: 2.575464, ppl: 13.137412 +epoch: 2, batch: 4566, sum loss: 3868.010498, avg loss: 2.375928, ppl: 10.760991 +epoch: 2, batch: 4567, sum loss: 3867.488770, avg loss: 2.477571, ppl: 11.912297 +epoch: 2, batch: 4568, sum loss: 4840.948242, avg loss: 2.761522, ppl: 15.823914 +epoch: 2, batch: 4569, sum loss: 3996.321777, avg loss: 2.518161, ppl: 12.405766 +epoch: 2, batch: 4570, sum loss: 4960.969238, avg loss: 2.596007, ppl: 13.410083 +epoch: 2, batch: 4571, sum loss: 3932.963379, avg loss: 2.414342, ppl: 11.182412 +epoch: 2, batch: 4572, sum loss: 4597.145996, avg loss: 2.501168, ppl: 12.196726 +epoch: 2, batch: 4573, sum loss: 4560.965820, avg loss: 2.558029, ppl: 12.910348 +epoch: 2, batch: 4574, sum loss: 4017.815918, avg loss: 2.522169, ppl: 12.455588 +epoch: 2, batch: 4575, sum loss: 4485.080566, avg loss: 2.893600, ppl: 18.058210 +epoch: 2, batch: 4576, sum loss: 3749.507324, avg loss: 2.334687, ppl: 10.326227 +epoch: 2, batch: 4577, sum loss: 4768.559570, avg loss: 2.726449, ppl: 15.278541 +epoch: 2, batch: 4578, sum loss: 4479.192871, avg loss: 2.631723, ppl: 13.897697 +epoch: 2, batch: 4579, sum loss: 3591.209961, avg loss: 2.507828, ppl: 12.278235 +epoch: 2, batch: 4580, sum loss: 3853.602539, avg loss: 2.572498, ppl: 13.098508 +epoch: 2, batch: 4581, sum loss: 4374.882324, avg loss: 2.507096, ppl: 12.269246 +epoch: 2, batch: 4582, sum loss: 4697.340332, avg loss: 2.701173, ppl: 14.897201 +epoch: 2, batch: 4583, sum loss: 3697.830566, avg loss: 2.452142, ppl: 11.613198 +epoch: 2, batch: 4584, sum loss: 4146.317383, avg loss: 2.517497, ppl: 12.397525 +epoch: 2, batch: 4585, sum loss: 5763.267090, avg loss: 3.009539, ppl: 20.278048 +epoch: 2, batch: 4586, sum loss: 5085.462402, avg loss: 2.798824, ppl: 16.425312 +epoch: 2, batch: 4587, sum loss: 4124.453125, avg loss: 2.550682, ppl: 12.815845 +epoch: 2, batch: 4588, sum loss: 5544.515625, avg loss: 2.787589, ppl: 16.241819 +epoch: 2, batch: 4589, sum loss: 4263.135742, avg loss: 2.784544, ppl: 16.192425 +epoch: 2, batch: 4590, sum loss: 3662.033691, avg loss: 2.316277, ppl: 10.137858 +epoch: 2, batch: 4591, sum loss: 4314.201172, avg loss: 2.696376, ppl: 14.825899 +epoch: 2, batch: 4592, sum loss: 4390.750000, avg loss: 2.457051, ppl: 11.670345 +epoch: 2, batch: 4593, sum loss: 3566.112305, avg loss: 2.440871, ppl: 11.483038 +epoch: 2, batch: 4594, sum loss: 4306.087402, avg loss: 2.669614, ppl: 14.434398 +epoch: 2, batch: 4595, sum loss: 3572.379150, avg loss: 2.526435, ppl: 12.508834 +epoch: 2, batch: 4596, sum loss: 4771.517578, avg loss: 2.706476, ppl: 14.976409 +epoch: 2, batch: 4597, sum loss: 4502.269531, avg loss: 2.556655, ppl: 12.892618 +epoch: 2, batch: 4598, sum loss: 4584.996582, avg loss: 2.594792, ppl: 13.393797 +epoch: 2, batch: 4599, sum loss: 4173.893066, avg loss: 2.509858, ppl: 12.303179 +epoch: 2, batch: 4600, sum loss: 3429.414551, avg loss: 2.342496, ppl: 10.407182 +epoch: 2, batch: 4601, sum loss: 4479.677734, avg loss: 2.474960, ppl: 11.881236 +epoch: 2, batch: 4602, sum loss: 4780.355469, avg loss: 2.726957, ppl: 15.286302 +epoch: 2, batch: 4603, sum loss: 4263.372559, avg loss: 2.596451, ppl: 13.416040 +epoch: 2, batch: 4604, sum loss: 4156.139648, avg loss: 2.526529, ppl: 12.510003 +epoch: 2, batch: 4605, sum loss: 4458.632812, avg loss: 2.880254, ppl: 17.818794 +epoch: 2, batch: 4606, sum loss: 4023.439453, avg loss: 2.479014, ppl: 11.929492 +epoch: 2, batch: 4607, sum loss: 3688.914795, avg loss: 2.328860, ppl: 10.266234 +epoch: 2, batch: 4608, sum loss: 4002.808350, avg loss: 2.721148, ppl: 15.197756 +epoch: 2, batch: 4609, sum loss: 3414.246094, avg loss: 2.300705, ppl: 9.981216 +epoch: 2, batch: 4610, sum loss: 4317.189941, avg loss: 2.565175, ppl: 13.002937 +epoch: 2, batch: 4611, sum loss: 3951.300537, avg loss: 2.387493, ppl: 10.886167 +epoch: 2, batch: 4612, sum loss: 4662.946289, avg loss: 2.644893, ppl: 14.081937 +epoch: 2, batch: 4613, sum loss: 5175.039062, avg loss: 2.943708, ppl: 18.986120 +epoch: 2, batch: 4614, sum loss: 5076.678711, avg loss: 2.663525, ppl: 14.346770 +epoch: 2, batch: 4615, sum loss: 4474.993164, avg loss: 2.674832, ppl: 14.509907 +epoch: 2, batch: 4616, sum loss: 4278.832520, avg loss: 2.580719, ppl: 13.206634 +epoch: 2, batch: 4617, sum loss: 3590.597168, avg loss: 2.486563, ppl: 12.019893 +epoch: 2, batch: 4618, sum loss: 4864.092285, avg loss: 2.509852, ppl: 12.303105 +epoch: 2, batch: 4619, sum loss: 4143.645996, avg loss: 2.602793, ppl: 13.501391 +epoch: 2, batch: 4620, sum loss: 4226.899414, avg loss: 2.558656, ppl: 12.918443 +epoch: 2, batch: 4621, sum loss: 3906.685791, avg loss: 2.353425, ppl: 10.521547 +epoch: 2, batch: 4622, sum loss: 4362.454102, avg loss: 2.606006, ppl: 13.544847 +epoch: 2, batch: 4623, sum loss: 4534.235840, avg loss: 2.574807, ppl: 13.128788 +epoch: 2, batch: 4624, sum loss: 3539.609131, avg loss: 2.339464, ppl: 10.375675 +epoch: 2, batch: 4625, sum loss: 3946.891602, avg loss: 2.427363, ppl: 11.328964 +epoch: 2, batch: 4626, sum loss: 4476.892090, avg loss: 2.667993, ppl: 14.411015 +epoch: 2, batch: 4627, sum loss: 4022.409180, avg loss: 2.404309, ppl: 11.070781 +epoch: 2, batch: 4628, sum loss: 4032.737305, avg loss: 2.396160, ppl: 10.980927 +epoch: 2, batch: 4629, sum loss: 5363.840332, avg loss: 2.877597, ppl: 17.771515 +epoch: 2, batch: 4630, sum loss: 4521.861816, avg loss: 2.536098, ppl: 12.630285 +epoch: 2, batch: 4631, sum loss: 4161.738281, avg loss: 2.405629, ppl: 11.085400 +epoch: 2, batch: 4632, sum loss: 4841.008301, avg loss: 2.783789, ppl: 16.180206 +epoch: 2, batch: 4633, sum loss: 4702.168945, avg loss: 2.647618, ppl: 14.120361 +epoch: 2, batch: 4634, sum loss: 4276.939453, avg loss: 2.617466, ppl: 13.700961 +epoch: 2, batch: 4635, sum loss: 3538.656250, avg loss: 2.331131, ppl: 10.289570 +epoch: 2, batch: 4636, sum loss: 4568.151855, avg loss: 2.680840, ppl: 14.597354 +epoch: 2, batch: 4637, sum loss: 4628.607422, avg loss: 2.638887, ppl: 13.997611 +epoch: 2, batch: 4638, sum loss: 4414.971191, avg loss: 2.675740, ppl: 14.523096 +epoch: 2, batch: 4639, sum loss: 3798.158936, avg loss: 2.411529, ppl: 11.151001 +epoch: 2, batch: 4640, sum loss: 5176.161621, avg loss: 2.788880, ppl: 16.262796 +epoch: 2, batch: 4641, sum loss: 4034.025391, avg loss: 2.564543, ppl: 12.994712 +epoch: 2, batch: 4642, sum loss: 4959.821289, avg loss: 2.632602, ppl: 13.909913 +epoch: 2, batch: 4643, sum loss: 4231.092773, avg loss: 2.491810, ppl: 12.083125 +epoch: 2, batch: 4644, sum loss: 4791.666016, avg loss: 2.664998, ppl: 14.367915 +epoch: 2, batch: 4645, sum loss: 3998.813232, avg loss: 2.376003, ppl: 10.761802 +epoch: 2, batch: 4646, sum loss: 3744.792725, avg loss: 2.237033, ppl: 9.365501 +epoch: 2, batch: 4647, sum loss: 4622.413086, avg loss: 2.801462, ppl: 16.468714 +epoch: 2, batch: 4648, sum loss: 3466.387207, avg loss: 2.379126, ppl: 10.795466 +epoch: 2, batch: 4649, sum loss: 3808.674072, avg loss: 2.374485, ppl: 10.745481 +epoch: 2, batch: 4650, sum loss: 4064.560303, avg loss: 2.485970, ppl: 12.012761 +epoch: 2, batch: 4651, sum loss: 5082.088379, avg loss: 2.956422, ppl: 19.229040 +epoch: 2, batch: 4652, sum loss: 4404.455566, avg loss: 2.679109, ppl: 14.572105 +epoch: 2, batch: 4653, sum loss: 4365.615723, avg loss: 2.721706, ppl: 15.206237 +epoch: 2, batch: 4654, sum loss: 4711.255371, avg loss: 2.721696, ppl: 15.206085 +epoch: 2, batch: 4655, sum loss: 4020.680664, avg loss: 2.702070, ppl: 14.910568 +epoch: 2, batch: 4656, sum loss: 4428.966309, avg loss: 2.756046, ppl: 15.737498 +epoch: 2, batch: 4657, sum loss: 5339.127930, avg loss: 2.657605, ppl: 14.262086 +epoch: 2, batch: 4658, sum loss: 3571.779053, avg loss: 2.390749, ppl: 10.921671 +epoch: 2, batch: 4659, sum loss: 4881.361816, avg loss: 2.696885, ppl: 14.833455 +epoch: 2, batch: 4660, sum loss: 5170.485840, avg loss: 2.793347, ppl: 16.335609 +epoch: 2, batch: 4661, sum loss: 4423.329102, avg loss: 2.486413, ppl: 12.018093 +epoch: 2, batch: 4662, sum loss: 4142.160156, avg loss: 2.536534, ppl: 12.635800 +epoch: 2, batch: 4663, sum loss: 4552.324219, avg loss: 2.635972, ppl: 13.956879 +epoch: 2, batch: 4664, sum loss: 3752.322021, avg loss: 2.420853, ppl: 11.255455 +epoch: 2, batch: 4665, sum loss: 4094.428223, avg loss: 2.377717, ppl: 10.780261 +epoch: 2, batch: 4666, sum loss: 4202.096191, avg loss: 2.476191, ppl: 11.895867 +epoch: 2, batch: 4667, sum loss: 4624.325195, avg loss: 2.553465, ppl: 12.851559 +epoch: 2, batch: 4668, sum loss: 4410.917969, avg loss: 2.402461, ppl: 11.050336 +epoch: 2, batch: 4669, sum loss: 3313.933594, avg loss: 2.063470, ppl: 7.873246 +epoch: 2, batch: 4670, sum loss: 4756.538086, avg loss: 2.641054, ppl: 14.027980 +epoch: 2, batch: 4671, sum loss: 5005.184570, avg loss: 2.738066, ppl: 15.457062 +epoch: 2, batch: 4672, sum loss: 4293.039062, avg loss: 2.510549, ppl: 12.311688 +epoch: 2, batch: 4673, sum loss: 3789.185059, avg loss: 2.424303, ppl: 11.294350 +epoch: 2, batch: 4674, sum loss: 3541.741211, avg loss: 2.299832, ppl: 9.972506 +epoch: 2, batch: 4675, sum loss: 4055.017090, avg loss: 2.572980, ppl: 13.104824 +epoch: 2, batch: 4676, sum loss: 4579.655762, avg loss: 2.435987, ppl: 11.427092 +epoch: 2, batch: 4677, sum loss: 4455.204102, avg loss: 2.619168, ppl: 13.724295 +epoch: 2, batch: 4678, sum loss: 4433.085938, avg loss: 2.649782, ppl: 14.150959 +epoch: 2, batch: 4679, sum loss: 4588.614258, avg loss: 2.737837, ppl: 15.453521 +epoch: 2, batch: 4680, sum loss: 5029.934082, avg loss: 2.584755, ppl: 13.260046 +epoch: 2, batch: 4681, sum loss: 3809.450684, avg loss: 2.454543, ppl: 11.641113 +epoch: 2, batch: 4682, sum loss: 4027.933838, avg loss: 2.585324, ppl: 13.267581 +epoch: 2, batch: 4683, sum loss: 4802.531250, avg loss: 2.761663, ppl: 15.826136 +epoch: 2, batch: 4684, sum loss: 4174.065430, avg loss: 2.535884, ppl: 12.627593 +epoch: 2, batch: 4685, sum loss: 3895.353027, avg loss: 2.508276, ppl: 12.283740 +epoch: 2, batch: 4686, sum loss: 4662.739258, avg loss: 2.848344, ppl: 17.259178 +epoch: 2, batch: 4687, sum loss: 4030.385742, avg loss: 2.478712, ppl: 11.925895 +epoch: 2, batch: 4688, sum loss: 4299.641602, avg loss: 2.530690, ppl: 12.562167 +epoch: 2, batch: 4689, sum loss: 3499.796631, avg loss: 2.427043, ppl: 11.325349 +epoch: 2, batch: 4690, sum loss: 4219.673340, avg loss: 2.722370, ppl: 15.216341 +epoch: 2, batch: 4691, sum loss: 3890.385254, avg loss: 2.411894, ppl: 11.155069 +epoch: 2, batch: 4692, sum loss: 3627.062988, avg loss: 2.237546, ppl: 9.370313 +epoch: 2, batch: 4693, sum loss: 4230.571289, avg loss: 2.425786, ppl: 11.311119 +epoch: 2, batch: 4694, sum loss: 4720.064453, avg loss: 2.681855, ppl: 14.612169 +epoch: 2, batch: 4695, sum loss: 4075.618164, avg loss: 2.497315, ppl: 12.149830 +epoch: 2, batch: 4696, sum loss: 4822.989746, avg loss: 2.732572, ppl: 15.372375 +epoch: 2, batch: 4697, sum loss: 3734.545410, avg loss: 2.331177, ppl: 10.290043 +epoch: 2, batch: 4698, sum loss: 4505.100586, avg loss: 2.651619, ppl: 14.176975 +epoch: 2, batch: 4699, sum loss: 4171.360840, avg loss: 2.529631, ppl: 12.548870 +epoch: 2, batch: 4700, sum loss: 3894.707764, avg loss: 2.243495, ppl: 9.426221 +epoch: 2, batch: 4701, sum loss: 4612.303223, avg loss: 2.605821, ppl: 13.542341 +epoch: 2, batch: 4702, sum loss: 6036.885254, avg loss: 2.774304, ppl: 16.027466 +epoch: 2, batch: 4703, sum loss: 4034.526855, avg loss: 2.623230, ppl: 13.780155 +epoch: 2, batch: 4704, sum loss: 3960.526855, avg loss: 2.652731, ppl: 14.192742 +epoch: 2, batch: 4705, sum loss: 4198.454590, avg loss: 2.782276, ppl: 16.155752 +epoch: 2, batch: 4706, sum loss: 4273.161133, avg loss: 2.677419, ppl: 14.547501 +epoch: 2, batch: 4707, sum loss: 5162.450684, avg loss: 2.672076, ppl: 14.469977 +epoch: 2, batch: 4708, sum loss: 4091.720703, avg loss: 2.265626, ppl: 9.637157 +epoch: 2, batch: 4709, sum loss: 4400.172852, avg loss: 2.546396, ppl: 12.761033 +epoch: 2, batch: 4710, sum loss: 4508.517578, avg loss: 2.444966, ppl: 11.530158 +epoch: 2, batch: 4711, sum loss: 4875.844727, avg loss: 2.805434, ppl: 16.534254 +epoch: 2, batch: 4712, sum loss: 4400.255859, avg loss: 2.520192, ppl: 12.430988 +epoch: 2, batch: 4713, sum loss: 4290.503906, avg loss: 2.659953, ppl: 14.295615 +epoch: 2, batch: 4714, sum loss: 4227.160645, avg loss: 2.552633, ppl: 12.840873 +epoch: 2, batch: 4715, sum loss: 4905.659668, avg loss: 2.762196, ppl: 15.834575 +epoch: 2, batch: 4716, sum loss: 4642.716797, avg loss: 2.630435, ppl: 13.879800 +epoch: 2, batch: 4717, sum loss: 4926.662598, avg loss: 2.799240, ppl: 16.432156 +epoch: 2, batch: 4718, sum loss: 3928.639404, avg loss: 2.399902, ppl: 11.022095 +epoch: 2, batch: 4719, sum loss: 4802.758789, avg loss: 2.705780, ppl: 14.965979 +epoch: 2, batch: 4720, sum loss: 3896.197266, avg loss: 2.390305, ppl: 10.916823 +epoch: 2, batch: 4721, sum loss: 4453.889648, avg loss: 2.757826, ppl: 15.765536 +epoch: 2, batch: 4722, sum loss: 4991.907227, avg loss: 2.616303, ppl: 13.685042 +epoch: 2, batch: 4723, sum loss: 3828.791504, avg loss: 2.494327, ppl: 12.113573 +epoch: 2, batch: 4724, sum loss: 4401.073242, avg loss: 2.554308, ppl: 12.862401 +epoch: 2, batch: 4725, sum loss: 3937.226807, avg loss: 2.493494, ppl: 12.103490 +epoch: 2, batch: 4726, sum loss: 4136.280273, avg loss: 2.622879, ppl: 13.775326 +epoch: 2, batch: 4727, sum loss: 4441.812988, avg loss: 2.711729, ppl: 15.055288 +epoch: 2, batch: 4728, sum loss: 3252.004883, avg loss: 2.250522, ppl: 9.492693 +epoch: 2, batch: 4729, sum loss: 4147.431641, avg loss: 2.471652, ppl: 11.841991 +epoch: 2, batch: 4730, sum loss: 4367.843750, avg loss: 2.445601, ppl: 11.537484 +epoch: 2, batch: 4731, sum loss: 5195.740234, avg loss: 2.943762, ppl: 18.987148 +epoch: 2, batch: 4732, sum loss: 4518.367676, avg loss: 2.713734, ppl: 15.085505 +epoch: 2, batch: 4733, sum loss: 5176.684082, avg loss: 2.866381, ppl: 17.573303 +epoch: 2, batch: 4734, sum loss: 4765.366211, avg loss: 2.660729, ppl: 14.306717 +epoch: 2, batch: 4735, sum loss: 4675.758789, avg loss: 2.618006, ppl: 13.708365 +epoch: 2, batch: 4736, sum loss: 4200.165527, avg loss: 2.859200, ppl: 17.447567 +epoch: 2, batch: 4737, sum loss: 5167.737305, avg loss: 2.885392, ppl: 17.910585 +epoch: 2, batch: 4738, sum loss: 3636.690430, avg loss: 2.285789, ppl: 9.833442 +epoch: 2, batch: 4739, sum loss: 4301.258789, avg loss: 2.417796, ppl: 11.221100 +epoch: 2, batch: 4740, sum loss: 4384.618652, avg loss: 2.479988, ppl: 11.941122 +epoch: 2, batch: 4741, sum loss: 3670.565674, avg loss: 2.345409, ppl: 10.437545 +epoch: 2, batch: 4742, sum loss: 4141.008301, avg loss: 2.627543, ppl: 13.839726 +epoch: 2, batch: 4743, sum loss: 4305.747070, avg loss: 2.694460, ppl: 14.797528 +epoch: 2, batch: 4744, sum loss: 4527.506348, avg loss: 2.691740, ppl: 14.757328 +epoch: 2, batch: 4745, sum loss: 4349.975586, avg loss: 2.334930, ppl: 10.328741 +epoch: 2, batch: 4746, sum loss: 4018.106201, avg loss: 2.655721, ppl: 14.235252 +epoch: 2, batch: 4747, sum loss: 4212.945312, avg loss: 2.615112, ppl: 13.668748 +epoch: 2, batch: 4748, sum loss: 4208.581055, avg loss: 2.703007, ppl: 14.924535 +epoch: 2, batch: 4749, sum loss: 3939.054688, avg loss: 2.557828, ppl: 12.907747 +epoch: 2, batch: 4750, sum loss: 3753.928955, avg loss: 2.406365, ppl: 11.093559 +epoch: 2, batch: 4751, sum loss: 4543.701172, avg loss: 2.544066, ppl: 12.731328 +epoch: 2, batch: 4752, sum loss: 5217.508301, avg loss: 2.851098, ppl: 17.306767 +epoch: 2, batch: 4753, sum loss: 5224.840820, avg loss: 2.785096, ppl: 16.201380 +epoch: 2, batch: 4754, sum loss: 5878.228027, avg loss: 3.037844, ppl: 20.860214 +epoch: 2, batch: 4755, sum loss: 5046.326172, avg loss: 2.591847, ppl: 13.354417 +epoch: 2, batch: 4756, sum loss: 4687.365234, avg loss: 2.692341, ppl: 14.766201 +epoch: 2, batch: 4757, sum loss: 3590.236328, avg loss: 2.376066, ppl: 10.762485 +epoch: 2, batch: 4758, sum loss: 4474.253906, avg loss: 2.536425, ppl: 12.634423 +epoch: 2, batch: 4759, sum loss: 3669.071777, avg loss: 2.313412, ppl: 10.108859 +epoch: 2, batch: 4760, sum loss: 3570.549561, avg loss: 2.339810, ppl: 10.379261 +epoch: 2, batch: 4761, sum loss: 5019.723145, avg loss: 2.824830, ppl: 16.858080 +epoch: 2, batch: 4762, sum loss: 3867.345215, avg loss: 2.488639, ppl: 12.044873 +epoch: 2, batch: 4763, sum loss: 3859.523193, avg loss: 2.585079, ppl: 13.264339 +epoch: 2, batch: 4764, sum loss: 4438.063965, avg loss: 2.693000, ppl: 14.775938 +epoch: 2, batch: 4765, sum loss: 4180.532227, avg loss: 2.364555, ppl: 10.639299 +epoch: 2, batch: 4766, sum loss: 5177.905762, avg loss: 2.761550, ppl: 15.824347 +epoch: 2, batch: 4767, sum loss: 4434.244141, avg loss: 2.799397, ppl: 16.434725 +epoch: 2, batch: 4768, sum loss: 4114.876953, avg loss: 2.419093, ppl: 11.235662 +epoch: 2, batch: 4769, sum loss: 4150.768066, avg loss: 2.537144, ppl: 12.643515 +epoch: 2, batch: 4770, sum loss: 4533.123047, avg loss: 2.470367, ppl: 11.826786 +epoch: 2, batch: 4771, sum loss: 3742.356201, avg loss: 2.483315, ppl: 11.980921 +epoch: 2, batch: 4772, sum loss: 3505.008301, avg loss: 2.304410, ppl: 10.018270 +epoch: 2, batch: 4773, sum loss: 4467.472656, avg loss: 2.617149, ppl: 13.696614 +epoch: 2, batch: 4774, sum loss: 3139.351074, avg loss: 2.281505, ppl: 9.791407 +epoch: 2, batch: 4775, sum loss: 4488.915039, avg loss: 2.656163, ppl: 14.241535 +epoch: 2, batch: 4776, sum loss: 5010.484375, avg loss: 2.693809, ppl: 14.787893 +epoch: 2, batch: 4777, sum loss: 4427.822754, avg loss: 2.618464, ppl: 13.714644 +epoch: 2, batch: 4778, sum loss: 3887.450684, avg loss: 2.423598, ppl: 11.286392 +epoch: 2, batch: 4779, sum loss: 6740.700195, avg loss: 3.005216, ppl: 20.190584 +epoch: 2, batch: 4780, sum loss: 4612.876953, avg loss: 2.719857, ppl: 15.178147 +epoch: 2, batch: 4781, sum loss: 4649.061035, avg loss: 2.607438, ppl: 13.564249 +epoch: 2, batch: 4782, sum loss: 4005.927002, avg loss: 2.567902, ppl: 13.038443 +epoch: 2, batch: 4783, sum loss: 4122.144043, avg loss: 2.478740, ppl: 11.926225 +epoch: 2, batch: 4784, sum loss: 3886.435547, avg loss: 2.396076, ppl: 10.980008 +epoch: 2, batch: 4785, sum loss: 4219.892090, avg loss: 2.466331, ppl: 11.779150 +epoch: 2, batch: 4786, sum loss: 4566.189453, avg loss: 2.572501, ppl: 13.098542 +epoch: 2, batch: 4787, sum loss: 4299.509766, avg loss: 2.550124, ppl: 12.808697 +epoch: 2, batch: 4788, sum loss: 4074.971924, avg loss: 2.352755, ppl: 10.514498 +epoch: 2, batch: 4789, sum loss: 4163.469727, avg loss: 2.490114, ppl: 12.062648 +epoch: 2, batch: 4790, sum loss: 3726.284180, avg loss: 2.411834, ppl: 11.154405 +epoch: 2, batch: 4791, sum loss: 4664.349609, avg loss: 2.488981, ppl: 12.048986 +epoch: 2, batch: 4792, sum loss: 4401.024414, avg loss: 2.572195, ppl: 13.094530 +epoch: 2, batch: 4793, sum loss: 3978.637451, avg loss: 2.295809, ppl: 9.932471 +epoch: 2, batch: 4794, sum loss: 4394.091797, avg loss: 2.437100, ppl: 11.439819 +epoch: 2, batch: 4795, sum loss: 3461.298340, avg loss: 2.293770, ppl: 9.912232 +epoch: 2, batch: 4796, sum loss: 3684.785156, avg loss: 2.458162, ppl: 11.683321 +epoch: 2, batch: 4797, sum loss: 4374.155273, avg loss: 2.395485, ppl: 10.973518 +epoch: 2, batch: 4798, sum loss: 4463.145996, avg loss: 2.636235, ppl: 13.960546 +epoch: 2, batch: 4799, sum loss: 2886.595947, avg loss: 2.088709, ppl: 8.074485 +epoch: 2, batch: 4800, sum loss: 3381.302246, avg loss: 2.298642, ppl: 9.960646 +epoch: 2, batch: 4801, sum loss: 5192.609863, avg loss: 2.784241, ppl: 16.187531 +epoch: 2, batch: 4802, sum loss: 4581.760742, avg loss: 2.704699, ppl: 14.949821 +epoch: 2, batch: 4803, sum loss: 4716.910156, avg loss: 2.686168, ppl: 14.675328 +epoch: 2, batch: 4804, sum loss: 3817.475098, avg loss: 2.281814, ppl: 9.794435 +epoch: 2, batch: 4805, sum loss: 4686.860352, avg loss: 2.586567, ppl: 13.284094 +epoch: 2, batch: 4806, sum loss: 4405.028320, avg loss: 2.558088, ppl: 12.911112 +epoch: 2, batch: 4807, sum loss: 3936.161621, avg loss: 2.554291, ppl: 12.862171 +epoch: 2, batch: 4808, sum loss: 4574.161621, avg loss: 2.813138, ppl: 16.662115 +epoch: 2, batch: 4809, sum loss: 3533.750244, avg loss: 2.360554, ppl: 10.596826 +epoch: 2, batch: 4810, sum loss: 5236.231445, avg loss: 2.629951, ppl: 13.873083 +epoch: 2, batch: 4811, sum loss: 4973.933594, avg loss: 2.655597, ppl: 14.233484 +epoch: 2, batch: 4812, sum loss: 5232.392090, avg loss: 3.008851, ppl: 20.264109 +epoch: 2, batch: 4813, sum loss: 4074.724121, avg loss: 2.498298, ppl: 12.161779 +epoch: 2, batch: 4814, sum loss: 4945.377441, avg loss: 3.008137, ppl: 20.249640 +epoch: 2, batch: 4815, sum loss: 4272.521484, avg loss: 2.416585, ppl: 11.207518 +epoch: 2, batch: 4816, sum loss: 4273.619629, avg loss: 2.527274, ppl: 12.519327 +epoch: 2, batch: 4817, sum loss: 4349.507812, avg loss: 2.527314, ppl: 12.519835 +epoch: 2, batch: 4818, sum loss: 4451.227539, avg loss: 2.507734, ppl: 12.277074 +epoch: 2, batch: 4819, sum loss: 5228.898438, avg loss: 2.570746, ppl: 13.075581 +epoch: 2, batch: 4820, sum loss: 4014.208008, avg loss: 2.383734, ppl: 10.845324 +epoch: 2, batch: 4821, sum loss: 4282.067383, avg loss: 2.523316, ppl: 12.469880 +epoch: 2, batch: 4822, sum loss: 4099.167969, avg loss: 2.570011, ppl: 13.065973 +epoch: 2, batch: 4823, sum loss: 4405.205566, avg loss: 2.408532, ppl: 11.117633 +epoch: 2, batch: 4824, sum loss: 4777.196289, avg loss: 2.572534, ppl: 13.098979 +epoch: 2, batch: 4825, sum loss: 5950.692383, avg loss: 2.935714, ppl: 18.834946 +epoch: 2, batch: 4826, sum loss: 4461.997070, avg loss: 2.652792, ppl: 14.193619 +epoch: 2, batch: 4827, sum loss: 4110.093750, avg loss: 2.596395, ppl: 13.415292 +epoch: 2, batch: 4828, sum loss: 3847.786865, avg loss: 2.233190, ppl: 9.329583 +epoch: 2, batch: 4829, sum loss: 3517.192871, avg loss: 2.392648, ppl: 10.942433 +epoch: 2, batch: 4830, sum loss: 5144.399414, avg loss: 2.870759, ppl: 17.650406 +epoch: 2, batch: 4831, sum loss: 2803.364746, avg loss: 2.205637, ppl: 9.076034 +epoch: 2, batch: 4832, sum loss: 4452.168457, avg loss: 2.547007, ppl: 12.768833 +epoch: 2, batch: 4833, sum loss: 3533.096680, avg loss: 2.292730, ppl: 9.901932 +epoch: 2, batch: 4834, sum loss: 4799.323242, avg loss: 2.605496, ppl: 13.537937 +epoch: 2, batch: 4835, sum loss: 4469.609375, avg loss: 2.511017, ppl: 12.317446 +epoch: 2, batch: 4836, sum loss: 5011.888672, avg loss: 2.668737, ppl: 14.421745 +epoch: 2, batch: 4837, sum loss: 4121.398438, avg loss: 2.445934, ppl: 11.541322 +epoch: 2, batch: 4838, sum loss: 4274.882812, avg loss: 2.522055, ppl: 12.454160 +epoch: 2, batch: 4839, sum loss: 4645.901367, avg loss: 2.601289, ppl: 13.481098 +epoch: 2, batch: 4840, sum loss: 3958.379639, avg loss: 2.483300, ppl: 11.980733 +epoch: 2, batch: 4841, sum loss: 3971.436035, avg loss: 2.434970, ppl: 11.415475 +epoch: 2, batch: 4842, sum loss: 4285.420898, avg loss: 2.477122, ppl: 11.906945 +epoch: 2, batch: 4843, sum loss: 4994.751465, avg loss: 2.883806, ppl: 17.882198 +epoch: 2, batch: 4844, sum loss: 4010.900391, avg loss: 2.616374, ppl: 13.686002 +epoch: 2, batch: 4845, sum loss: 3608.713867, avg loss: 2.242830, ppl: 9.419954 +epoch: 2, batch: 4846, sum loss: 4004.030273, avg loss: 2.389040, ppl: 10.903017 +epoch: 2, batch: 4847, sum loss: 4627.364258, avg loss: 2.487830, ppl: 12.035136 +epoch: 2, batch: 4848, sum loss: 4846.535645, avg loss: 2.753713, ppl: 15.700827 +epoch: 2, batch: 4849, sum loss: 4425.112793, avg loss: 2.629300, ppl: 13.864066 +epoch: 2, batch: 4850, sum loss: 4195.573242, avg loss: 2.389279, ppl: 10.905624 +epoch: 2, batch: 4851, sum loss: 3563.235596, avg loss: 2.341154, ppl: 10.393220 +epoch: 2, batch: 4852, sum loss: 4655.117676, avg loss: 2.733481, ppl: 15.386349 +epoch: 2, batch: 4853, sum loss: 4708.960449, avg loss: 2.601636, ppl: 13.485779 +epoch: 2, batch: 4854, sum loss: 4148.304688, avg loss: 2.284309, ppl: 9.818895 +epoch: 2, batch: 4855, sum loss: 4472.912109, avg loss: 2.638886, ppl: 13.997604 +epoch: 2, batch: 4856, sum loss: 4673.592773, avg loss: 2.763804, ppl: 15.860064 +epoch: 2, batch: 4857, sum loss: 4561.071289, avg loss: 2.806813, ppl: 16.557066 +epoch: 2, batch: 4858, sum loss: 4027.403076, avg loss: 2.369061, ppl: 10.687349 +epoch: 2, batch: 4859, sum loss: 3618.147949, avg loss: 2.469726, ppl: 11.819203 +epoch: 2, batch: 4860, sum loss: 4662.095215, avg loss: 2.687087, ppl: 14.688819 +epoch: 2, batch: 4861, sum loss: 4754.727051, avg loss: 2.706162, ppl: 14.971707 +epoch: 2, batch: 4862, sum loss: 4090.802246, avg loss: 2.607267, ppl: 13.561937 +epoch: 2, batch: 4863, sum loss: 3825.204590, avg loss: 2.183336, ppl: 8.875867 +epoch: 2, batch: 4864, sum loss: 4300.235840, avg loss: 2.689328, ppl: 14.721783 +epoch: 2, batch: 4865, sum loss: 5126.731934, avg loss: 2.689786, ppl: 14.728519 +epoch: 2, batch: 4866, sum loss: 3729.870605, avg loss: 2.363670, ppl: 10.629888 +epoch: 2, batch: 4867, sum loss: 4560.470215, avg loss: 2.482564, ppl: 11.971924 +epoch: 2, batch: 4868, sum loss: 4341.342285, avg loss: 2.678188, ppl: 14.558683 +epoch: 2, batch: 4869, sum loss: 3863.725586, avg loss: 2.520369, ppl: 12.433185 +epoch: 2, batch: 4870, sum loss: 4166.269531, avg loss: 2.670686, ppl: 14.449875 +epoch: 2, batch: 4871, sum loss: 3974.463623, avg loss: 2.408766, ppl: 11.120228 +epoch: 2, batch: 4872, sum loss: 3801.810059, avg loss: 2.336699, ppl: 10.347030 +epoch: 2, batch: 4873, sum loss: 5521.223633, avg loss: 2.774484, ppl: 16.030355 +epoch: 2, batch: 4874, sum loss: 4604.548828, avg loss: 2.552411, ppl: 12.838014 +epoch: 2, batch: 4875, sum loss: 4895.451172, avg loss: 2.819960, ppl: 16.776186 +epoch: 2, batch: 4876, sum loss: 3740.346680, avg loss: 2.472139, ppl: 11.847766 +epoch: 2, batch: 4877, sum loss: 3622.320312, avg loss: 2.384674, ppl: 10.855527 +epoch: 2, batch: 4878, sum loss: 5225.349609, avg loss: 2.807818, ppl: 16.573717 +epoch: 2, batch: 4879, sum loss: 5160.658203, avg loss: 2.693454, ppl: 14.782651 +epoch: 2, batch: 4880, sum loss: 3753.197998, avg loss: 2.398210, ppl: 11.003458 +epoch: 2, batch: 4881, sum loss: 4550.120117, avg loss: 2.784651, ppl: 16.194170 +epoch: 2, batch: 4882, sum loss: 5240.345215, avg loss: 3.068118, ppl: 21.501390 +epoch: 2, batch: 4883, sum loss: 4230.940430, avg loss: 2.584570, ppl: 13.257583 +epoch: 2, batch: 4884, sum loss: 4597.074219, avg loss: 2.442654, ppl: 11.503527 +epoch: 2, batch: 4885, sum loss: 3928.088867, avg loss: 2.367745, ppl: 10.673296 +epoch: 2, batch: 4886, sum loss: 3382.155762, avg loss: 2.375109, ppl: 10.752190 +epoch: 2, batch: 4887, sum loss: 3735.576172, avg loss: 2.184547, ppl: 8.886626 +epoch: 2, batch: 4888, sum loss: 4477.020020, avg loss: 2.837148, ppl: 17.067024 +epoch: 2, batch: 4889, sum loss: 3643.985596, avg loss: 2.283199, ppl: 9.808007 +epoch: 2, batch: 4890, sum loss: 4782.883789, avg loss: 2.498894, ppl: 12.169030 +epoch: 2, batch: 4891, sum loss: 4077.379395, avg loss: 2.742017, ppl: 15.518254 +epoch: 2, batch: 4892, sum loss: 4332.245117, avg loss: 2.548380, ppl: 12.786368 +epoch: 2, batch: 4893, sum loss: 4625.333984, avg loss: 2.500180, ppl: 12.184693 +epoch: 2, batch: 4894, sum loss: 4798.018555, avg loss: 2.594926, ppl: 13.395597 +epoch: 2, batch: 4895, sum loss: 4380.869141, avg loss: 2.539634, ppl: 12.675037 +epoch: 2, batch: 4896, sum loss: 4690.809082, avg loss: 2.724047, ppl: 15.241880 +epoch: 2, batch: 4897, sum loss: 3763.678955, avg loss: 2.133605, ppl: 8.445255 +epoch: 2, batch: 4898, sum loss: 4481.049805, avg loss: 2.827161, ppl: 16.897413 +epoch: 2, batch: 4899, sum loss: 4095.344238, avg loss: 2.526431, ppl: 12.508778 +epoch: 2, batch: 4900, sum loss: 4880.799805, avg loss: 2.485132, ppl: 12.002710 +epoch: 2, batch: 4901, sum loss: 3592.757568, avg loss: 2.326916, ppl: 10.246290 +epoch: 2, batch: 4902, sum loss: 4410.546387, avg loss: 2.562781, ppl: 12.971843 +epoch: 2, batch: 4903, sum loss: 4249.343750, avg loss: 2.498144, ppl: 12.159909 +epoch: 2, batch: 4904, sum loss: 4057.984375, avg loss: 2.760534, ppl: 15.808275 +epoch: 2, batch: 4905, sum loss: 4367.374023, avg loss: 2.758922, ppl: 15.782822 +epoch: 2, batch: 4906, sum loss: 4944.156250, avg loss: 2.580457, ppl: 13.203176 +epoch: 2, batch: 4907, sum loss: 4091.427246, avg loss: 2.670645, ppl: 14.449279 +epoch: 2, batch: 4908, sum loss: 4158.478027, avg loss: 2.472341, ppl: 11.850159 +epoch: 2, batch: 4909, sum loss: 4805.585938, avg loss: 2.681689, ppl: 14.609745 +epoch: 2, batch: 4910, sum loss: 4503.932617, avg loss: 2.685708, ppl: 14.668584 +epoch: 2, batch: 4911, sum loss: 5020.615234, avg loss: 2.770759, ppl: 15.970749 +epoch: 2, batch: 4912, sum loss: 4084.554688, avg loss: 2.655757, ppl: 14.235761 +epoch: 2, batch: 4913, sum loss: 3724.591797, avg loss: 2.402962, ppl: 11.055881 +epoch: 2, batch: 4914, sum loss: 3731.420166, avg loss: 2.514434, ppl: 12.359612 +epoch: 2, batch: 4915, sum loss: 4262.932129, avg loss: 2.551126, ppl: 12.821539 +epoch: 2, batch: 4916, sum loss: 4320.302734, avg loss: 2.461711, ppl: 11.724854 +epoch: 2, batch: 4917, sum loss: 3413.140625, avg loss: 2.276945, ppl: 9.746860 +epoch: 2, batch: 4918, sum loss: 3903.830078, avg loss: 2.673856, ppl: 14.495761 +epoch: 2, batch: 4919, sum loss: 4381.972656, avg loss: 2.738733, ppl: 15.467373 +epoch: 2, batch: 4920, sum loss: 4745.628906, avg loss: 2.666084, ppl: 14.383527 +epoch: 2, batch: 4921, sum loss: 4102.691406, avg loss: 2.504695, ppl: 12.239830 +epoch: 2, batch: 4922, sum loss: 4397.608398, avg loss: 2.505760, ppl: 12.252864 +epoch: 2, batch: 4923, sum loss: 3794.773926, avg loss: 2.392670, ppl: 10.942668 +epoch: 2, batch: 4924, sum loss: 3664.904297, avg loss: 2.517105, ppl: 12.392663 +epoch: 2, batch: 4925, sum loss: 4585.716309, avg loss: 2.507226, ppl: 12.270846 +epoch: 2, batch: 4926, sum loss: 3719.876465, avg loss: 2.244946, ppl: 9.439910 +epoch: 2, batch: 4927, sum loss: 4982.377441, avg loss: 2.752695, ppl: 15.684843 +epoch: 2, batch: 4928, sum loss: 4226.973633, avg loss: 2.514559, ppl: 12.361154 +epoch: 2, batch: 4929, sum loss: 3958.772461, avg loss: 2.573974, ppl: 13.117856 +epoch: 2, batch: 4930, sum loss: 3816.688965, avg loss: 2.558103, ppl: 12.911296 +epoch: 2, batch: 4931, sum loss: 4186.651367, avg loss: 2.508479, ppl: 12.286230 +epoch: 2, batch: 4932, sum loss: 4545.256836, avg loss: 2.681567, ppl: 14.607972 +epoch: 2, batch: 4933, sum loss: 3951.262695, avg loss: 2.430051, ppl: 11.359460 +epoch: 2, batch: 4934, sum loss: 4211.673828, avg loss: 2.502480, ppl: 12.212745 +epoch: 2, batch: 4935, sum loss: 4293.610352, avg loss: 2.543608, ppl: 12.725504 +epoch: 2, batch: 4936, sum loss: 4676.746582, avg loss: 2.612708, ppl: 13.635921 +epoch: 2, batch: 4937, sum loss: 3874.612061, avg loss: 2.294027, ppl: 9.914787 +epoch: 2, batch: 4938, sum loss: 4339.083984, avg loss: 2.524191, ppl: 12.480793 +epoch: 2, batch: 4939, sum loss: 4160.662109, avg loss: 2.554120, ppl: 12.859982 +epoch: 2, batch: 4940, sum loss: 4243.963379, avg loss: 2.631099, ppl: 13.889032 +epoch: 2, batch: 4941, sum loss: 4096.005859, avg loss: 2.547267, ppl: 12.772152 +epoch: 2, batch: 4942, sum loss: 4693.131348, avg loss: 2.441796, ppl: 11.493663 +epoch: 2, batch: 4943, sum loss: 4059.802734, avg loss: 2.481542, ppl: 11.959693 +epoch: 2, batch: 4944, sum loss: 3185.570801, avg loss: 2.254473, ppl: 9.530272 +epoch: 2, batch: 4945, sum loss: 4526.445312, avg loss: 2.410248, ppl: 11.136721 +epoch: 2, batch: 4946, sum loss: 4434.974121, avg loss: 2.608809, ppl: 13.582857 +epoch: 2, batch: 4947, sum loss: 4060.209473, avg loss: 2.466713, ppl: 11.783649 +epoch: 2, batch: 4948, sum loss: 3436.656982, avg loss: 2.259472, ppl: 9.578032 +epoch: 2, batch: 4949, sum loss: 3935.875488, avg loss: 2.492638, ppl: 12.093137 +epoch: 2, batch: 4950, sum loss: 5148.266602, avg loss: 2.655114, ppl: 14.226614 +epoch: 2, batch: 4951, sum loss: 4284.346680, avg loss: 2.372285, ppl: 10.721865 +epoch: 2, batch: 4952, sum loss: 3975.330078, avg loss: 2.464557, ppl: 11.758268 +epoch: 2, batch: 4953, sum loss: 3852.277832, avg loss: 2.389750, ppl: 10.910771 +epoch: 2, batch: 4954, sum loss: 4410.015625, avg loss: 2.420426, ppl: 11.250648 +epoch: 2, batch: 4955, sum loss: 3946.500977, avg loss: 2.449721, ppl: 11.585118 +epoch: 2, batch: 4956, sum loss: 4461.483887, avg loss: 2.596906, ppl: 13.422142 +epoch: 2, batch: 4957, sum loss: 4034.021973, avg loss: 2.757363, ppl: 15.758235 +epoch: 2, batch: 4958, sum loss: 4650.357910, avg loss: 2.549538, ppl: 12.801193 +epoch: 2, batch: 4959, sum loss: 4789.825195, avg loss: 2.792901, ppl: 16.328316 +epoch: 2, batch: 4960, sum loss: 4412.570801, avg loss: 2.419173, ppl: 11.236558 +epoch: 2, batch: 4961, sum loss: 4241.480957, avg loss: 2.532228, ppl: 12.581500 +epoch: 2, batch: 4962, sum loss: 4583.149414, avg loss: 2.427516, ppl: 11.330696 +epoch: 2, batch: 4963, sum loss: 3313.931641, avg loss: 2.408381, ppl: 11.115944 +epoch: 2, batch: 4964, sum loss: 4558.602539, avg loss: 2.838482, ppl: 17.089806 +epoch: 2, batch: 4965, sum loss: 4216.212402, avg loss: 2.766544, ppl: 15.903570 +epoch: 2, batch: 4966, sum loss: 3310.886230, avg loss: 2.249243, ppl: 9.480561 +epoch: 2, batch: 4967, sum loss: 3862.067383, avg loss: 2.370821, ppl: 10.706179 +epoch: 2, batch: 4968, sum loss: 4320.773438, avg loss: 2.714054, ppl: 15.090322 +epoch: 2, batch: 4969, sum loss: 4047.791260, avg loss: 2.483308, ppl: 11.980826 +epoch: 2, batch: 4970, sum loss: 3558.767578, avg loss: 2.556586, ppl: 12.891727 +epoch: 2, batch: 4971, sum loss: 4111.684082, avg loss: 2.496469, ppl: 12.139551 +epoch: 2, batch: 4972, sum loss: 4130.073242, avg loss: 2.540020, ppl: 12.679931 +epoch: 2, batch: 4973, sum loss: 4869.326172, avg loss: 2.533468, ppl: 12.597123 +epoch: 2, batch: 4974, sum loss: 3423.655029, avg loss: 2.247968, ppl: 9.468476 +epoch: 2, batch: 4975, sum loss: 4481.933594, avg loss: 2.683793, ppl: 14.640513 +epoch: 2, batch: 4976, sum loss: 4947.597656, avg loss: 2.699180, ppl: 14.867540 +epoch: 2, batch: 4977, sum loss: 4737.375000, avg loss: 2.772016, ppl: 15.990836 +epoch: 2, batch: 4978, sum loss: 4729.572754, avg loss: 2.687257, ppl: 14.691323 +epoch: 2, batch: 4979, sum loss: 3872.084961, avg loss: 2.478928, ppl: 11.928466 +epoch: 2, batch: 4980, sum loss: 3843.880615, avg loss: 2.533870, ppl: 12.602180 +epoch: 2, batch: 4981, sum loss: 4080.289795, avg loss: 2.449153, ppl: 11.578541 +epoch: 2, batch: 4982, sum loss: 4605.571289, avg loss: 2.523601, ppl: 12.473431 +epoch: 2, batch: 4983, sum loss: 4185.490234, avg loss: 2.503284, ppl: 12.222564 +epoch: 2, batch: 4984, sum loss: 4560.685059, avg loss: 2.613573, ppl: 13.647728 +epoch: 2, batch: 4985, sum loss: 4795.018555, avg loss: 2.538390, ppl: 12.659272 +epoch: 2, batch: 4986, sum loss: 4169.923828, avg loss: 2.445703, ppl: 11.538662 +epoch: 2, batch: 4987, sum loss: 4517.846680, avg loss: 2.654434, ppl: 14.216936 +epoch: 2, batch: 4988, sum loss: 3941.863037, avg loss: 2.455989, ppl: 11.657962 +epoch: 2, batch: 4989, sum loss: 5001.239746, avg loss: 2.866040, ppl: 17.567314 +epoch: 2, batch: 4990, sum loss: 3668.138672, avg loss: 2.442169, ppl: 11.497957 +epoch: 2, batch: 4991, sum loss: 3762.433594, avg loss: 2.571725, ppl: 13.088384 +epoch: 2, batch: 4992, sum loss: 3567.559082, avg loss: 2.262244, ppl: 9.604620 +epoch: 2, batch: 4993, sum loss: 4998.847168, avg loss: 2.814666, ppl: 16.687605 +epoch: 2, batch: 4994, sum loss: 4686.333984, avg loss: 2.656652, ppl: 14.248505 +epoch: 2, batch: 4995, sum loss: 3494.524170, avg loss: 2.329683, ppl: 10.274682 +epoch: 2, batch: 4996, sum loss: 3597.349609, avg loss: 2.246939, ppl: 9.458740 +epoch: 2, batch: 4997, sum loss: 4832.729980, avg loss: 2.836109, ppl: 17.049301 +epoch: 2, batch: 4998, sum loss: 4678.960938, avg loss: 2.842625, ppl: 17.160755 +epoch: 2, batch: 4999, sum loss: 3467.047363, avg loss: 2.551175, ppl: 12.822165 +epoch: 2, batch: 5000, sum loss: 3776.940430, avg loss: 2.424224, ppl: 11.293459 +epoch: 2, batch: 5001, sum loss: 4227.526367, avg loss: 2.356481, ppl: 10.553745 +epoch: 2, batch: 5002, sum loss: 3898.677734, avg loss: 2.426060, ppl: 11.314213 +epoch: 2, batch: 5003, sum loss: 4634.858887, avg loss: 2.723184, ppl: 15.228731 +epoch: 2, batch: 5004, sum loss: 4191.573242, avg loss: 2.478754, ppl: 11.926398 +epoch: 2, batch: 5005, sum loss: 4669.362305, avg loss: 2.586904, ppl: 13.288570 +epoch: 2, batch: 5006, sum loss: 4675.820312, avg loss: 2.511182, ppl: 12.319481 +epoch: 2, batch: 5007, sum loss: 4195.855469, avg loss: 2.439451, ppl: 11.466741 +epoch: 2, batch: 5008, sum loss: 4448.430176, avg loss: 2.831592, ppl: 16.972452 +epoch: 2, batch: 5009, sum loss: 4614.461914, avg loss: 2.678155, ppl: 14.558215 +epoch: 2, batch: 5010, sum loss: 4061.607910, avg loss: 2.502531, ppl: 12.213367 +epoch: 2, batch: 5011, sum loss: 3929.542236, avg loss: 2.655096, ppl: 14.226353 +epoch: 2, batch: 5012, sum loss: 4187.240723, avg loss: 2.457301, ppl: 11.673262 +epoch: 2, batch: 5013, sum loss: 4077.511230, avg loss: 2.481747, ppl: 11.962149 +epoch: 2, batch: 5014, sum loss: 4474.598145, avg loss: 2.677797, ppl: 14.552996 +epoch: 2, batch: 5015, sum loss: 3795.824219, avg loss: 2.448919, ppl: 11.575828 +epoch: 2, batch: 5016, sum loss: 4643.338867, avg loss: 2.551285, ppl: 12.823575 +epoch: 2, batch: 5017, sum loss: 3736.544434, avg loss: 2.578706, ppl: 13.180069 +epoch: 2, batch: 5018, sum loss: 3412.526367, avg loss: 2.371457, ppl: 10.712986 +epoch: 2, batch: 5019, sum loss: 4328.454590, avg loss: 2.647373, ppl: 14.116900 +epoch: 2, batch: 5020, sum loss: 3995.103271, avg loss: 2.525350, ppl: 12.495263 +epoch: 2, batch: 5021, sum loss: 4080.013916, avg loss: 2.400008, ppl: 11.023270 +epoch: 2, batch: 5022, sum loss: 3945.650391, avg loss: 2.265012, ppl: 9.631236 +epoch: 2, batch: 5023, sum loss: 4814.549805, avg loss: 2.512813, ppl: 12.339594 +epoch: 2, batch: 5024, sum loss: 4128.597656, avg loss: 2.559577, ppl: 12.930346 +epoch: 2, batch: 5025, sum loss: 4806.991211, avg loss: 2.753145, ppl: 15.691909 +epoch: 2, batch: 5026, sum loss: 5198.852051, avg loss: 2.729056, ppl: 15.318421 +epoch: 2, batch: 5027, sum loss: 3334.925293, avg loss: 2.196920, ppl: 8.997263 +epoch: 2, batch: 5028, sum loss: 4232.128906, avg loss: 2.377600, ppl: 10.779007 +epoch: 2, batch: 5029, sum loss: 3318.265625, avg loss: 2.301155, ppl: 9.985710 +epoch: 2, batch: 5030, sum loss: 3623.024658, avg loss: 2.399354, ppl: 11.016060 +epoch: 2, batch: 5031, sum loss: 5027.318359, avg loss: 2.669845, ppl: 14.437733 +epoch: 2, batch: 5032, sum loss: 3734.677490, avg loss: 2.417267, ppl: 11.215167 +epoch: 2, batch: 5033, sum loss: 4456.942871, avg loss: 2.606399, ppl: 13.550173 +epoch: 2, batch: 5034, sum loss: 4528.969238, avg loss: 2.558740, ppl: 12.919524 +epoch: 2, batch: 5035, sum loss: 4192.875977, avg loss: 2.500224, ppl: 12.185225 +epoch: 2, batch: 5036, sum loss: 4838.270020, avg loss: 2.594247, ppl: 13.386498 +epoch: 2, batch: 5037, sum loss: 4114.844727, avg loss: 2.769074, ppl: 15.943871 +epoch: 2, batch: 5038, sum loss: 3612.554443, avg loss: 2.522734, ppl: 12.462619 +epoch: 2, batch: 5039, sum loss: 4225.974609, avg loss: 2.471330, ppl: 11.838183 +epoch: 2, batch: 5040, sum loss: 3874.251953, avg loss: 2.229144, ppl: 9.291907 +epoch: 2, batch: 5041, sum loss: 5680.201172, avg loss: 2.678077, ppl: 14.557073 +epoch: 2, batch: 5042, sum loss: 4357.309570, avg loss: 2.665021, ppl: 14.368254 +epoch: 2, batch: 5043, sum loss: 4589.698730, avg loss: 2.637758, ppl: 13.981818 +epoch: 2, batch: 5044, sum loss: 3713.012939, avg loss: 2.339643, ppl: 10.377528 +epoch: 2, batch: 5045, sum loss: 4315.230957, avg loss: 2.286821, ppl: 9.843594 +epoch: 2, batch: 5046, sum loss: 4182.269043, avg loss: 2.420295, ppl: 11.249172 +epoch: 2, batch: 5047, sum loss: 4302.706055, avg loss: 2.634847, ppl: 13.941185 +epoch: 2, batch: 5048, sum loss: 4529.443359, avg loss: 2.606124, ppl: 13.546442 +epoch: 2, batch: 5049, sum loss: 4254.804688, avg loss: 2.595976, ppl: 13.409667 +epoch: 2, batch: 5050, sum loss: 3550.991943, avg loss: 2.257465, ppl: 9.558824 +epoch: 2, batch: 5051, sum loss: 3640.181396, avg loss: 2.388571, ppl: 10.897904 +epoch: 2, batch: 5052, sum loss: 4217.924316, avg loss: 2.469511, ppl: 11.816662 +epoch: 2, batch: 5053, sum loss: 4199.431152, avg loss: 2.616468, ppl: 13.687294 +epoch: 2, batch: 5054, sum loss: 3475.069336, avg loss: 2.450684, ppl: 11.596272 +epoch: 2, batch: 5055, sum loss: 6006.858398, avg loss: 2.882370, ppl: 17.856539 +epoch: 2, batch: 5056, sum loss: 3807.779785, avg loss: 2.485496, ppl: 12.007074 +epoch: 2, batch: 5057, sum loss: 3771.846680, avg loss: 2.259944, ppl: 9.582555 +epoch: 2, batch: 5058, sum loss: 4745.231445, avg loss: 2.736581, ppl: 15.434123 +epoch: 2, batch: 5059, sum loss: 4357.728516, avg loss: 2.728697, ppl: 15.312915 +epoch: 2, batch: 5060, sum loss: 4828.592773, avg loss: 2.857156, ppl: 17.411930 +epoch: 2, batch: 5061, sum loss: 4480.191406, avg loss: 2.574823, ppl: 13.128988 +epoch: 2, batch: 5062, sum loss: 3931.297607, avg loss: 2.478750, ppl: 11.926347 +epoch: 2, batch: 5063, sum loss: 4079.362305, avg loss: 2.596666, ppl: 13.418926 +epoch: 2, batch: 5064, sum loss: 4206.631836, avg loss: 2.546387, ppl: 12.760921 +epoch: 2, batch: 5065, sum loss: 4394.470703, avg loss: 2.556411, ppl: 12.889478 +epoch: 2, batch: 5066, sum loss: 4572.936035, avg loss: 2.560435, ppl: 12.941441 +epoch: 2, batch: 5067, sum loss: 5848.818848, avg loss: 3.231392, ppl: 25.314869 +epoch: 2, batch: 5068, sum loss: 4105.194824, avg loss: 2.594940, ppl: 13.395780 +epoch: 2, batch: 5069, sum loss: 4371.483398, avg loss: 2.633424, ppl: 13.921352 +epoch: 2, batch: 5070, sum loss: 3721.837402, avg loss: 2.287546, ppl: 9.850734 +epoch: 2, batch: 5071, sum loss: 4222.175781, avg loss: 2.687572, ppl: 14.695954 +epoch: 2, batch: 5072, sum loss: 3805.755615, avg loss: 2.377112, ppl: 10.773740 +epoch: 2, batch: 5073, sum loss: 3223.471191, avg loss: 2.191347, ppl: 8.947256 +epoch: 2, batch: 5074, sum loss: 4064.760254, avg loss: 2.580800, ppl: 13.207701 +epoch: 2, batch: 5075, sum loss: 4383.620117, avg loss: 2.651918, ppl: 14.181208 +epoch: 2, batch: 5076, sum loss: 3824.803467, avg loss: 2.350832, ppl: 10.494295 +epoch: 2, batch: 5077, sum loss: 3878.195068, avg loss: 2.383648, ppl: 10.844390 +epoch: 2, batch: 5078, sum loss: 3708.460693, avg loss: 2.568186, ppl: 13.042145 +epoch: 2, batch: 5079, sum loss: 4427.085938, avg loss: 2.607236, ppl: 13.561510 +epoch: 2, batch: 5080, sum loss: 3947.793701, avg loss: 2.354081, ppl: 10.528448 +epoch: 2, batch: 5081, sum loss: 3593.889893, avg loss: 2.369077, ppl: 10.687523 +epoch: 2, batch: 5082, sum loss: 4776.239258, avg loss: 2.762429, ppl: 15.838264 +epoch: 2, batch: 5083, sum loss: 4958.094238, avg loss: 2.605409, ppl: 13.536766 +epoch: 2, batch: 5084, sum loss: 4807.142578, avg loss: 2.591451, ppl: 13.349133 +epoch: 2, batch: 5085, sum loss: 4823.618164, avg loss: 2.734478, ppl: 15.401694 +epoch: 2, batch: 5086, sum loss: 3796.718750, avg loss: 2.387873, ppl: 10.890310 +epoch: 2, batch: 5087, sum loss: 4335.008301, avg loss: 2.588065, ppl: 13.304002 +epoch: 2, batch: 5088, sum loss: 4229.692871, avg loss: 2.388308, ppl: 10.895040 +epoch: 2, batch: 5089, sum loss: 3604.487793, avg loss: 2.433820, ppl: 11.402359 +epoch: 2, batch: 5090, sum loss: 4361.938477, avg loss: 2.618210, ppl: 13.711163 +epoch: 2, batch: 5091, sum loss: 3812.647461, avg loss: 2.533321, ppl: 12.595262 +epoch: 2, batch: 5092, sum loss: 3283.328125, avg loss: 2.205056, ppl: 9.070757 +epoch: 2, batch: 5093, sum loss: 4177.732422, avg loss: 2.419069, ppl: 11.235397 +epoch: 2, batch: 5094, sum loss: 3435.607666, avg loss: 2.222256, ppl: 9.228126 +epoch: 2, batch: 5095, sum loss: 3270.259277, avg loss: 2.178720, ppl: 8.834994 +epoch: 2, batch: 5096, sum loss: 4240.564941, avg loss: 2.427341, ppl: 11.328719 +epoch: 2, batch: 5097, sum loss: 3751.329346, avg loss: 2.517671, ppl: 12.399683 +epoch: 2, batch: 5098, sum loss: 4395.393066, avg loss: 2.630397, ppl: 13.879276 +epoch: 2, batch: 5099, sum loss: 5716.390137, avg loss: 2.903195, ppl: 18.232298 +epoch: 2, batch: 5100, sum loss: 4429.812988, avg loss: 2.476139, ppl: 11.895249 +epoch: 2, batch: 5101, sum loss: 3859.602295, avg loss: 2.488461, ppl: 12.042723 +epoch: 2, batch: 5102, sum loss: 4359.276855, avg loss: 2.656476, ppl: 14.245995 +epoch: 2, batch: 5103, sum loss: 3636.482178, avg loss: 2.325116, ppl: 10.227871 +epoch: 2, batch: 5104, sum loss: 4040.996582, avg loss: 2.639449, ppl: 14.005486 +epoch: 2, batch: 5105, sum loss: 4457.456543, avg loss: 2.661168, ppl: 14.313002 +epoch: 2, batch: 5106, sum loss: 4634.895020, avg loss: 2.726409, ppl: 15.277925 +epoch: 2, batch: 5107, sum loss: 3862.858887, avg loss: 2.431000, ppl: 11.370247 +epoch: 2, batch: 5108, sum loss: 3400.908203, avg loss: 2.313543, ppl: 10.110183 +epoch: 2, batch: 5109, sum loss: 4568.544434, avg loss: 2.640777, ppl: 14.024097 +epoch: 2, batch: 5110, sum loss: 5292.363281, avg loss: 2.887269, ppl: 17.944229 +epoch: 2, batch: 5111, sum loss: 3821.327637, avg loss: 2.465373, ppl: 11.767868 +epoch: 2, batch: 5112, sum loss: 4389.110840, avg loss: 2.844531, ppl: 17.193489 +epoch: 2, batch: 5113, sum loss: 6002.462891, avg loss: 3.023911, ppl: 20.571585 +epoch: 2, batch: 5114, sum loss: 4433.222168, avg loss: 2.653036, ppl: 14.197070 +epoch: 2, batch: 5115, sum loss: 3688.827881, avg loss: 2.425265, ppl: 11.305225 +epoch: 2, batch: 5116, sum loss: 4957.181641, avg loss: 2.761661, ppl: 15.826109 +epoch: 2, batch: 5117, sum loss: 4570.892578, avg loss: 2.687180, ppl: 14.690185 +epoch: 2, batch: 5118, sum loss: 4476.830566, avg loss: 2.459797, ppl: 11.702437 +epoch: 2, batch: 5119, sum loss: 4245.689453, avg loss: 2.614341, ppl: 13.658210 +epoch: 2, batch: 5120, sum loss: 3920.228516, avg loss: 2.543951, ppl: 12.729868 +epoch: 2, batch: 5121, sum loss: 5155.353027, avg loss: 2.821759, ppl: 16.806383 +epoch: 2, batch: 5122, sum loss: 4046.051025, avg loss: 2.396950, ppl: 10.989604 +epoch: 2, batch: 5123, sum loss: 4145.056641, avg loss: 2.519791, ppl: 12.426001 +epoch: 2, batch: 5124, sum loss: 3773.941650, avg loss: 2.588437, ppl: 13.308958 +epoch: 2, batch: 5125, sum loss: 3442.789795, avg loss: 2.540804, ppl: 12.689872 +epoch: 2, batch: 5126, sum loss: 4306.767090, avg loss: 2.551402, ppl: 12.825076 +epoch: 2, batch: 5127, sum loss: 4295.671875, avg loss: 2.643490, ppl: 14.062200 +epoch: 2, batch: 5128, sum loss: 5103.201660, avg loss: 2.883165, ppl: 17.870743 +epoch: 2, batch: 5129, sum loss: 3724.622559, avg loss: 2.382996, ppl: 10.837321 +epoch: 2, batch: 5130, sum loss: 4591.270508, avg loss: 2.663150, ppl: 14.341394 +epoch: 2, batch: 5131, sum loss: 3826.673828, avg loss: 2.330496, ppl: 10.283039 +epoch: 2, batch: 5132, sum loss: 3782.579590, avg loss: 2.275920, ppl: 9.736876 +epoch: 2, batch: 5133, sum loss: 3500.423828, avg loss: 2.295360, ppl: 9.928008 +epoch: 2, batch: 5134, sum loss: 4762.677246, avg loss: 2.831556, ppl: 16.971849 +epoch: 2, batch: 5135, sum loss: 3934.585449, avg loss: 2.335066, ppl: 10.330137 +epoch: 2, batch: 5136, sum loss: 3543.037842, avg loss: 2.440109, ppl: 11.474289 +epoch: 2, batch: 5137, sum loss: 4139.499023, avg loss: 2.391392, ppl: 10.928691 +epoch: 2, batch: 5138, sum loss: 4597.261719, avg loss: 2.707457, ppl: 14.991106 +epoch: 2, batch: 5139, sum loss: 3627.810791, avg loss: 2.235250, ppl: 9.348819 +epoch: 2, batch: 5140, sum loss: 4148.781738, avg loss: 2.654371, ppl: 14.216042 +epoch: 2, batch: 5141, sum loss: 4996.537598, avg loss: 2.808621, ppl: 16.587036 +epoch: 2, batch: 5142, sum loss: 3723.055420, avg loss: 2.360847, ppl: 10.599923 +epoch: 2, batch: 5143, sum loss: 4575.815430, avg loss: 2.563482, ppl: 12.980939 +epoch: 2, batch: 5144, sum loss: 3745.056885, avg loss: 2.667419, ppl: 14.402754 +epoch: 2, batch: 5145, sum loss: 4034.607422, avg loss: 2.548710, ppl: 12.790588 +epoch: 2, batch: 5146, sum loss: 4352.824707, avg loss: 2.471791, ppl: 11.843643 +epoch: 2, batch: 5147, sum loss: 4202.058594, avg loss: 2.464550, ppl: 11.758196 +epoch: 2, batch: 5148, sum loss: 4199.689453, avg loss: 2.448798, ppl: 11.574430 +epoch: 2, batch: 5149, sum loss: 4062.465332, avg loss: 2.471086, ppl: 11.835294 +epoch: 2, batch: 5150, sum loss: 3437.653076, avg loss: 2.278100, ppl: 9.758124 +epoch: 2, batch: 5151, sum loss: 4319.775879, avg loss: 2.407902, ppl: 11.110626 +epoch: 2, batch: 5152, sum loss: 4329.212891, avg loss: 2.480924, ppl: 11.952305 +epoch: 2, batch: 5153, sum loss: 4470.811523, avg loss: 2.475533, ppl: 11.888036 +epoch: 2, batch: 5154, sum loss: 3782.590332, avg loss: 2.639631, ppl: 14.008027 +epoch: 2, batch: 5155, sum loss: 4383.992188, avg loss: 2.615747, ppl: 13.677432 +epoch: 2, batch: 5156, sum loss: 3771.719971, avg loss: 2.428667, ppl: 11.343752 +epoch: 2, batch: 5157, sum loss: 2634.801270, avg loss: 1.917614, ppl: 6.804701 +epoch: 2, batch: 5158, sum loss: 3938.038330, avg loss: 2.442952, ppl: 11.506955 +epoch: 2, batch: 5159, sum loss: 5523.996582, avg loss: 3.007075, ppl: 20.228148 +epoch: 2, batch: 5160, sum loss: 3486.232910, avg loss: 2.773455, ppl: 16.013865 +epoch: 2, batch: 5161, sum loss: 4247.224121, avg loss: 2.431153, ppl: 11.371984 +epoch: 2, batch: 5162, sum loss: 4622.422852, avg loss: 2.613015, ppl: 13.640110 +epoch: 2, batch: 5163, sum loss: 4305.631836, avg loss: 2.559829, ppl: 12.933602 +epoch: 2, batch: 5164, sum loss: 4225.747070, avg loss: 2.359434, ppl: 10.584963 +epoch: 2, batch: 5165, sum loss: 4571.111816, avg loss: 2.701603, ppl: 14.903599 +epoch: 2, batch: 5166, sum loss: 3520.554443, avg loss: 2.325333, ppl: 10.230090 +epoch: 2, batch: 5167, sum loss: 4346.067871, avg loss: 2.567081, ppl: 13.027740 +epoch: 2, batch: 5168, sum loss: 4535.053223, avg loss: 2.619904, ppl: 13.734399 +epoch: 2, batch: 5169, sum loss: 4579.506836, avg loss: 2.701774, ppl: 14.906150 +epoch: 2, batch: 5170, sum loss: 4939.434082, avg loss: 2.687396, ppl: 14.693365 +epoch: 2, batch: 5171, sum loss: 4121.687500, avg loss: 2.528643, ppl: 12.536478 +epoch: 2, batch: 5172, sum loss: 3968.242432, avg loss: 2.457116, ppl: 11.671105 +epoch: 2, batch: 5173, sum loss: 4848.004395, avg loss: 2.678456, ppl: 14.562585 +epoch: 2, batch: 5174, sum loss: 4079.613770, avg loss: 2.642237, ppl: 14.044589 +epoch: 2, batch: 5175, sum loss: 4107.446289, avg loss: 2.591449, ppl: 13.349097 +epoch: 2, batch: 5176, sum loss: 4258.851562, avg loss: 2.633798, ppl: 13.926564 +epoch: 2, batch: 5177, sum loss: 4112.339355, avg loss: 2.568607, ppl: 13.047634 +epoch: 2, batch: 5178, sum loss: 4645.408203, avg loss: 2.677469, ppl: 14.548222 +epoch: 2, batch: 5179, sum loss: 3926.815430, avg loss: 2.561523, ppl: 12.955540 +epoch: 2, batch: 5180, sum loss: 4260.711426, avg loss: 2.471411, ppl: 11.839134 +epoch: 2, batch: 5181, sum loss: 3717.440918, avg loss: 2.457000, ppl: 11.669750 +epoch: 2, batch: 5182, sum loss: 4198.839844, avg loss: 2.517290, ppl: 12.394962 +epoch: 2, batch: 5183, sum loss: 4185.146484, avg loss: 2.681068, ppl: 14.600675 +epoch: 2, batch: 5184, sum loss: 3723.473633, avg loss: 2.367116, ppl: 10.666585 +epoch: 2, batch: 5185, sum loss: 4020.126709, avg loss: 2.413041, ppl: 11.167872 +epoch: 2, batch: 5186, sum loss: 4166.714844, avg loss: 2.645533, ppl: 14.090955 +epoch: 2, batch: 5187, sum loss: 3209.865479, avg loss: 2.238400, ppl: 9.378314 +epoch: 2, batch: 5188, sum loss: 4392.958496, avg loss: 2.754206, ppl: 15.708563 +epoch: 2, batch: 5189, sum loss: 4454.579590, avg loss: 2.635846, ppl: 13.955112 +epoch: 2, batch: 5190, sum loss: 5335.159180, avg loss: 2.783077, ppl: 16.168703 +epoch: 2, batch: 5191, sum loss: 4477.809082, avg loss: 2.541322, ppl: 12.696445 +epoch: 2, batch: 5192, sum loss: 4502.265625, avg loss: 2.545091, ppl: 12.744387 +epoch: 2, batch: 5193, sum loss: 4226.822754, avg loss: 2.520467, ppl: 12.434400 +epoch: 2, batch: 5194, sum loss: 4561.934570, avg loss: 2.786765, ppl: 16.228441 +epoch: 2, batch: 5195, sum loss: 3852.677979, avg loss: 2.480797, ppl: 11.950788 +epoch: 2, batch: 5196, sum loss: 4838.494629, avg loss: 2.680606, ppl: 14.593940 +epoch: 2, batch: 5197, sum loss: 4335.499023, avg loss: 2.621221, ppl: 13.752502 +epoch: 2, batch: 5198, sum loss: 3455.139160, avg loss: 2.315777, ppl: 10.132793 +epoch: 2, batch: 5199, sum loss: 3699.673096, avg loss: 2.373107, ppl: 10.730678 +epoch: 2, batch: 5200, sum loss: 3372.716797, avg loss: 2.241008, ppl: 9.402805 +epoch: 2, batch: 5201, sum loss: 4262.851074, avg loss: 2.519416, ppl: 12.421336 +epoch: 2, batch: 5202, sum loss: 4937.634766, avg loss: 2.530823, ppl: 12.563839 +epoch: 2, batch: 5203, sum loss: 3466.517822, avg loss: 2.314097, ppl: 10.115786 +epoch: 2, batch: 5204, sum loss: 3817.686523, avg loss: 2.682844, ppl: 14.626628 +epoch: 2, batch: 5205, sum loss: 4439.092773, avg loss: 2.631353, ppl: 13.892556 +epoch: 2, batch: 5206, sum loss: 4352.526855, avg loss: 2.686745, ppl: 14.683805 +epoch: 2, batch: 5207, sum loss: 4030.661133, avg loss: 2.508190, ppl: 12.282678 +epoch: 2, batch: 5208, sum loss: 3674.529297, avg loss: 2.331554, ppl: 10.293927 +epoch: 2, batch: 5209, sum loss: 4911.484375, avg loss: 2.824315, ppl: 16.849405 +epoch: 2, batch: 5210, sum loss: 4034.531006, avg loss: 2.650809, ppl: 14.165495 +epoch: 2, batch: 5211, sum loss: 5587.012695, avg loss: 2.925138, ppl: 18.636789 +epoch: 2, batch: 5212, sum loss: 5286.461914, avg loss: 2.732022, ppl: 15.363914 +epoch: 2, batch: 5213, sum loss: 3434.495361, avg loss: 2.280541, ppl: 9.781975 +epoch: 2, batch: 5214, sum loss: 3709.004150, avg loss: 2.511174, ppl: 12.319387 +epoch: 2, batch: 5215, sum loss: 5215.557617, avg loss: 2.578130, ppl: 13.172485 +epoch: 2, batch: 5216, sum loss: 4260.971191, avg loss: 2.477309, ppl: 11.909171 +epoch: 2, batch: 5217, sum loss: 4679.949219, avg loss: 2.686538, ppl: 14.680762 +epoch: 2, batch: 5218, sum loss: 4800.672363, avg loss: 2.589359, ppl: 13.321234 +epoch: 2, batch: 5219, sum loss: 3327.467285, avg loss: 2.555658, ppl: 12.879776 +epoch: 2, batch: 5220, sum loss: 3853.575439, avg loss: 2.596749, ppl: 13.420036 +epoch: 2, batch: 5221, sum loss: 4587.305176, avg loss: 2.584397, ppl: 13.255295 +epoch: 2, batch: 5222, sum loss: 4425.428711, avg loss: 2.411678, ppl: 11.152658 +epoch: 2, batch: 5223, sum loss: 4219.143555, avg loss: 2.493584, ppl: 12.104577 +epoch: 2, batch: 5224, sum loss: 4709.574707, avg loss: 2.793342, ppl: 16.335524 +epoch: 2, batch: 5225, sum loss: 4101.450684, avg loss: 2.565010, ppl: 13.000786 +epoch: 2, batch: 5226, sum loss: 4580.442871, avg loss: 2.558907, ppl: 12.921680 +epoch: 2, batch: 5227, sum loss: 3860.738770, avg loss: 2.508602, ppl: 12.287742 +epoch: 2, batch: 5228, sum loss: 4781.379395, avg loss: 2.897806, ppl: 18.134310 +epoch: 2, batch: 5229, sum loss: 3756.024414, avg loss: 2.201656, ppl: 9.039968 +epoch: 2, batch: 5230, sum loss: 4541.911133, avg loss: 2.558823, ppl: 12.920602 +epoch: 2, batch: 5231, sum loss: 4008.933838, avg loss: 2.530893, ppl: 12.564716 +epoch: 2, batch: 5232, sum loss: 4090.595947, avg loss: 2.300673, ppl: 9.980895 +epoch: 2, batch: 5233, sum loss: 4066.428711, avg loss: 2.345114, ppl: 10.434467 +epoch: 2, batch: 5234, sum loss: 4458.740723, avg loss: 2.569879, ppl: 13.064247 +epoch: 2, batch: 5235, sum loss: 4876.789062, avg loss: 2.667828, ppl: 14.408637 +epoch: 2, batch: 5236, sum loss: 3973.774414, avg loss: 2.635129, ppl: 13.945111 +epoch: 2, batch: 5237, sum loss: 4035.984863, avg loss: 2.570691, ppl: 13.074858 +epoch: 2, batch: 5238, sum loss: 3186.812256, avg loss: 2.216142, ppl: 9.171879 +epoch: 2, batch: 5239, sum loss: 4812.452148, avg loss: 2.636960, ppl: 13.970669 +epoch: 2, batch: 5240, sum loss: 4734.451172, avg loss: 2.581489, ppl: 13.216805 +epoch: 2, batch: 5241, sum loss: 5324.163574, avg loss: 2.820002, ppl: 16.776882 +epoch: 2, batch: 5242, sum loss: 4034.236084, avg loss: 2.476511, ppl: 11.899672 +epoch: 2, batch: 5243, sum loss: 4597.420898, avg loss: 2.440245, ppl: 11.475848 +epoch: 2, batch: 5244, sum loss: 4335.440918, avg loss: 2.430180, ppl: 11.360928 +epoch: 2, batch: 5245, sum loss: 3770.888672, avg loss: 2.490680, ppl: 12.069474 +epoch: 2, batch: 5246, sum loss: 5131.779785, avg loss: 2.792046, ppl: 16.314358 +epoch: 2, batch: 5247, sum loss: 5126.801270, avg loss: 2.813832, ppl: 16.673683 +epoch: 2, batch: 5248, sum loss: 4612.125977, avg loss: 2.841729, ppl: 17.145384 +epoch: 2, batch: 5249, sum loss: 4249.218262, avg loss: 2.552083, ppl: 12.833809 +epoch: 2, batch: 5250, sum loss: 3895.153564, avg loss: 2.435993, ppl: 11.427165 +epoch: 2, batch: 5251, sum loss: 4135.708496, avg loss: 2.551332, ppl: 12.824168 +epoch: 2, batch: 5252, sum loss: 4668.903809, avg loss: 2.831355, ppl: 16.968431 +epoch: 2, batch: 5253, sum loss: 5006.719727, avg loss: 2.674530, ppl: 14.505527 +epoch: 2, batch: 5254, sum loss: 4404.099609, avg loss: 2.481183, ppl: 11.955397 +epoch: 2, batch: 5255, sum loss: 4755.245605, avg loss: 2.862881, ppl: 17.511909 +epoch: 2, batch: 5256, sum loss: 4830.379883, avg loss: 2.741419, ppl: 15.508974 +epoch: 2, batch: 5257, sum loss: 4269.506836, avg loss: 2.460811, ppl: 11.714307 +epoch: 2, batch: 5258, sum loss: 4529.114258, avg loss: 2.528819, ppl: 12.538684 +epoch: 2, batch: 5259, sum loss: 4480.543457, avg loss: 2.727050, ppl: 15.287723 +epoch: 2, batch: 5260, sum loss: 4534.212891, avg loss: 2.521809, ppl: 12.451101 +epoch: 2, batch: 5261, sum loss: 5241.360352, avg loss: 2.682375, ppl: 14.619770 +epoch: 2, batch: 5262, sum loss: 5217.687988, avg loss: 2.560200, ppl: 12.938408 +epoch: 2, batch: 5263, sum loss: 4716.541016, avg loss: 2.524915, ppl: 12.489830 +epoch: 2, batch: 5264, sum loss: 4603.664062, avg loss: 2.488467, ppl: 12.042803 +epoch: 2, batch: 5265, sum loss: 4463.448730, avg loss: 2.534610, ppl: 12.611515 +epoch: 2, batch: 5266, sum loss: 3853.637451, avg loss: 2.440556, ppl: 11.479417 +epoch: 2, batch: 5267, sum loss: 4256.342285, avg loss: 2.866224, ppl: 17.570539 +epoch: 2, batch: 5268, sum loss: 4853.214355, avg loss: 2.712808, ppl: 15.071543 +epoch: 2, batch: 5269, sum loss: 4563.012695, avg loss: 2.525187, ppl: 12.493229 +epoch: 2, batch: 5270, sum loss: 5189.745117, avg loss: 2.677887, ppl: 14.554311 +epoch: 2, batch: 5271, sum loss: 3875.558594, avg loss: 2.407179, ppl: 11.102598 +epoch: 2, batch: 5272, sum loss: 3822.062500, avg loss: 2.483471, ppl: 11.982789 +epoch: 2, batch: 5273, sum loss: 4136.416992, avg loss: 2.193222, ppl: 8.964051 +epoch: 2, batch: 5274, sum loss: 3810.388916, avg loss: 2.359374, ppl: 10.584324 +epoch: 2, batch: 5275, sum loss: 4614.235840, avg loss: 2.464870, ppl: 11.761950 +epoch: 2, batch: 5276, sum loss: 4342.915039, avg loss: 2.510355, ppl: 12.309305 +epoch: 2, batch: 5277, sum loss: 4322.961914, avg loss: 2.588600, ppl: 13.311122 +epoch: 2, batch: 5278, sum loss: 3960.233154, avg loss: 2.496994, ppl: 12.145934 +epoch: 2, batch: 5279, sum loss: 3877.222168, avg loss: 2.522591, ppl: 12.460839 +epoch: 2, batch: 5280, sum loss: 4281.469727, avg loss: 2.553053, ppl: 12.846262 +epoch: 2, batch: 5281, sum loss: 4089.813965, avg loss: 2.431518, ppl: 11.376142 +epoch: 2, batch: 5282, sum loss: 3556.029785, avg loss: 2.388200, ppl: 10.893867 +epoch: 2, batch: 5283, sum loss: 4005.649902, avg loss: 2.492626, ppl: 12.092990 +epoch: 2, batch: 5284, sum loss: 4285.630371, avg loss: 2.409011, ppl: 11.122953 +epoch: 2, batch: 5285, sum loss: 5163.161621, avg loss: 2.645062, ppl: 14.084325 +epoch: 2, batch: 5286, sum loss: 4756.349121, avg loss: 2.806106, ppl: 16.545359 +epoch: 2, batch: 5287, sum loss: 3930.341797, avg loss: 2.386364, ppl: 10.873884 +epoch: 2, batch: 5288, sum loss: 4494.062988, avg loss: 2.529017, ppl: 12.541169 +epoch: 2, batch: 5289, sum loss: 5031.636230, avg loss: 2.743531, ppl: 15.541766 +epoch: 2, batch: 5290, sum loss: 4249.224609, avg loss: 2.495141, ppl: 12.123441 +epoch: 2, batch: 5291, sum loss: 4875.079102, avg loss: 2.590371, ppl: 13.334723 +epoch: 2, batch: 5292, sum loss: 3052.995361, avg loss: 2.428795, ppl: 11.345204 +epoch: 2, batch: 5293, sum loss: 5094.843262, avg loss: 2.774969, ppl: 16.038132 +epoch: 2, batch: 5294, sum loss: 4203.306641, avg loss: 2.466729, ppl: 11.783844 +epoch: 2, batch: 5295, sum loss: 3349.831543, avg loss: 2.233221, ppl: 9.329869 +epoch: 2, batch: 5296, sum loss: 4043.835938, avg loss: 2.528978, ppl: 12.540685 +epoch: 2, batch: 5297, sum loss: 4464.385742, avg loss: 2.474715, ppl: 11.878322 +epoch: 2, batch: 5298, sum loss: 4350.806641, avg loss: 2.702364, ppl: 14.914952 +epoch: 2, batch: 5299, sum loss: 4176.020020, avg loss: 2.561975, ppl: 12.961397 +epoch: 2, batch: 5300, sum loss: 4365.426270, avg loss: 2.561870, ppl: 12.960028 +epoch: 2, batch: 5301, sum loss: 4935.743652, avg loss: 2.603240, ppl: 13.507438 +epoch: 2, batch: 5302, sum loss: 4068.836914, avg loss: 2.530371, ppl: 12.558167 +epoch: 2, batch: 5303, sum loss: 4401.391602, avg loss: 2.456134, ppl: 11.659646 +epoch: 2, batch: 5304, sum loss: 5186.749023, avg loss: 2.864025, ppl: 17.531954 +epoch: 2, batch: 5305, sum loss: 4027.710449, avg loss: 2.330851, ppl: 10.286693 +epoch: 2, batch: 5306, sum loss: 5335.178223, avg loss: 2.722030, ppl: 15.211164 +epoch: 2, batch: 5307, sum loss: 4481.913086, avg loss: 2.695077, ppl: 14.806658 +epoch: 2, batch: 5308, sum loss: 5569.770020, avg loss: 2.668793, ppl: 14.422546 +epoch: 2, batch: 5309, sum loss: 3846.268066, avg loss: 2.487884, ppl: 12.035776 +epoch: 2, batch: 5310, sum loss: 4862.695312, avg loss: 2.683607, ppl: 14.637792 +epoch: 2, batch: 5311, sum loss: 4237.635742, avg loss: 2.422891, ppl: 11.278415 +epoch: 2, batch: 5312, sum loss: 4713.537109, avg loss: 2.581346, ppl: 13.214911 +epoch: 2, batch: 5313, sum loss: 3672.297852, avg loss: 2.293753, ppl: 9.912065 +epoch: 2, batch: 5314, sum loss: 4136.316406, avg loss: 2.556438, ppl: 12.889818 +epoch: 2, batch: 5315, sum loss: 4276.736328, avg loss: 2.681340, ppl: 14.604643 +epoch: 2, batch: 5316, sum loss: 3993.696045, avg loss: 2.566643, ppl: 13.022033 +epoch: 2, batch: 5317, sum loss: 4372.271484, avg loss: 2.664394, ppl: 14.359251 +epoch: 2, batch: 5318, sum loss: 4390.032715, avg loss: 2.603815, ppl: 13.515204 +epoch: 2, batch: 5319, sum loss: 3977.438477, avg loss: 2.599633, ppl: 13.458797 +epoch: 2, batch: 5320, sum loss: 3421.052734, avg loss: 2.267099, ppl: 9.651365 +epoch: 2, batch: 5321, sum loss: 4577.832520, avg loss: 2.590737, ppl: 13.339601 +epoch: 2, batch: 5322, sum loss: 5281.291016, avg loss: 2.809197, ppl: 16.596588 +epoch: 2, batch: 5323, sum loss: 4684.906250, avg loss: 2.763957, ppl: 15.862479 +epoch: 2, batch: 5324, sum loss: 3853.969971, avg loss: 2.275071, ppl: 9.728606 +epoch: 2, batch: 5325, sum loss: 4917.182617, avg loss: 2.543809, ppl: 12.728059 +epoch: 2, batch: 5326, sum loss: 4355.136230, avg loss: 2.542403, ppl: 12.710176 +epoch: 2, batch: 5327, sum loss: 4773.500977, avg loss: 2.482320, ppl: 11.968995 +epoch: 2, batch: 5328, sum loss: 3645.437012, avg loss: 2.269886, ppl: 9.678298 +epoch: 2, batch: 5329, sum loss: 4185.058105, avg loss: 2.539477, ppl: 12.673042 +epoch: 2, batch: 5330, sum loss: 4950.922852, avg loss: 2.537634, ppl: 12.649702 +epoch: 2, batch: 5331, sum loss: 4653.478027, avg loss: 2.714981, ppl: 15.104328 +epoch: 2, batch: 5332, sum loss: 3461.668213, avg loss: 2.415679, ppl: 11.197373 +epoch: 2, batch: 5333, sum loss: 3970.834229, avg loss: 2.511596, ppl: 12.324590 +epoch: 2, batch: 5334, sum loss: 4185.634277, avg loss: 2.434924, ppl: 11.414952 +epoch: 2, batch: 5335, sum loss: 5107.451660, avg loss: 2.705218, ppl: 14.957579 +epoch: 2, batch: 5336, sum loss: 4016.432617, avg loss: 2.406491, ppl: 11.094955 +epoch: 2, batch: 5337, sum loss: 3166.305908, avg loss: 2.214200, ppl: 9.154083 +epoch: 2, batch: 5338, sum loss: 4104.043945, avg loss: 2.383301, ppl: 10.840627 +epoch: 2, batch: 5339, sum loss: 4902.724121, avg loss: 2.790395, ppl: 16.287453 +epoch: 2, batch: 5340, sum loss: 3849.683105, avg loss: 2.483667, ppl: 11.985129 +epoch: 2, batch: 5341, sum loss: 3211.508301, avg loss: 2.258445, ppl: 9.568194 +epoch: 2, batch: 5342, sum loss: 4350.977539, avg loss: 2.442997, ppl: 11.507477 +epoch: 2, batch: 5343, sum loss: 3879.501709, avg loss: 2.456936, ppl: 11.669002 +epoch: 2, batch: 5344, sum loss: 3806.065430, avg loss: 2.372859, ppl: 10.728018 +epoch: 2, batch: 5345, sum loss: 4381.891602, avg loss: 2.730150, ppl: 15.335195 +epoch: 2, batch: 5346, sum loss: 5166.024902, avg loss: 2.680864, ppl: 14.597702 +epoch: 2, batch: 5347, sum loss: 4230.929688, avg loss: 2.477125, ppl: 11.906981 +epoch: 2, batch: 5348, sum loss: 4868.413086, avg loss: 2.906515, ppl: 18.292944 +epoch: 2, batch: 5349, sum loss: 4436.812500, avg loss: 2.495395, ppl: 12.126525 +epoch: 2, batch: 5350, sum loss: 4040.170410, avg loss: 2.264670, ppl: 9.627944 +epoch: 2, batch: 5351, sum loss: 4544.008789, avg loss: 2.557124, ppl: 12.898663 +epoch: 2, batch: 5352, sum loss: 3769.238770, avg loss: 2.355774, ppl: 10.546290 +epoch: 2, batch: 5353, sum loss: 4456.180176, avg loss: 2.526179, ppl: 12.505634 +epoch: 2, batch: 5354, sum loss: 4391.130371, avg loss: 2.618444, ppl: 13.714364 +epoch: 2, batch: 5355, sum loss: 4142.160156, avg loss: 2.616652, ppl: 13.689814 +epoch: 2, batch: 5356, sum loss: 4629.149414, avg loss: 2.758730, ppl: 15.779793 +epoch: 2, batch: 5357, sum loss: 4669.238281, avg loss: 2.550103, ppl: 12.808422 +epoch: 2, batch: 5358, sum loss: 5287.421387, avg loss: 2.696288, ppl: 14.824606 +epoch: 2, batch: 5359, sum loss: 4132.210938, avg loss: 2.553900, ppl: 12.857155 +epoch: 2, batch: 5360, sum loss: 3660.546631, avg loss: 2.367753, ppl: 10.673385 +epoch: 2, batch: 5361, sum loss: 4808.818848, avg loss: 2.795825, ppl: 16.376133 +epoch: 2, batch: 5362, sum loss: 4001.788330, avg loss: 2.455085, ppl: 11.647421 +epoch: 2, batch: 5363, sum loss: 3496.443604, avg loss: 2.398109, ppl: 11.002356 +epoch: 2, batch: 5364, sum loss: 4436.398438, avg loss: 2.519249, ppl: 12.419271 +epoch: 2, batch: 5365, sum loss: 4947.063965, avg loss: 2.748369, ppl: 15.617139 +epoch: 2, batch: 5366, sum loss: 4825.431152, avg loss: 2.646973, ppl: 14.111258 +epoch: 2, batch: 5367, sum loss: 3794.562744, avg loss: 2.361271, ppl: 10.604423 +epoch: 2, batch: 5368, sum loss: 5585.079102, avg loss: 2.864143, ppl: 17.534023 +epoch: 2, batch: 5369, sum loss: 3955.636719, avg loss: 2.466108, ppl: 11.776522 +epoch: 2, batch: 5370, sum loss: 5286.153320, avg loss: 2.631236, ppl: 13.890929 +epoch: 2, batch: 5371, sum loss: 4155.246094, avg loss: 2.390821, ppl: 10.922452 +epoch: 2, batch: 5372, sum loss: 4067.756348, avg loss: 2.518735, ppl: 12.412881 +epoch: 2, batch: 5373, sum loss: 3736.592041, avg loss: 2.507780, ppl: 12.277644 +epoch: 2, batch: 5374, sum loss: 4502.667969, avg loss: 2.696208, ppl: 14.823421 +epoch: 2, batch: 5375, sum loss: 3652.728271, avg loss: 2.398377, ppl: 11.005302 +epoch: 2, batch: 5376, sum loss: 4548.665527, avg loss: 2.411806, ppl: 11.154086 +epoch: 2, batch: 5377, sum loss: 4484.353027, avg loss: 2.613259, ppl: 13.643447 +epoch: 2, batch: 5378, sum loss: 4346.889160, avg loss: 2.527261, ppl: 12.519170 +epoch: 2, batch: 5379, sum loss: 4783.631348, avg loss: 2.669437, ppl: 14.431844 +epoch: 2, batch: 5380, sum loss: 4194.379395, avg loss: 2.639635, ppl: 14.008084 +epoch: 2, batch: 5381, sum loss: 4446.599121, avg loss: 2.559931, ppl: 12.934919 +epoch: 2, batch: 5382, sum loss: 4047.203613, avg loss: 2.357137, ppl: 10.560670 +epoch: 2, batch: 5383, sum loss: 4184.891602, avg loss: 2.544007, ppl: 12.730578 +epoch: 2, batch: 5384, sum loss: 4497.697754, avg loss: 2.688403, ppl: 14.708167 +epoch: 2, batch: 5385, sum loss: 4417.823730, avg loss: 2.650164, ppl: 14.156362 +epoch: 2, batch: 5386, sum loss: 4529.456055, avg loss: 2.655015, ppl: 14.225203 +epoch: 2, batch: 5387, sum loss: 3671.183105, avg loss: 2.379250, ppl: 10.796805 +epoch: 2, batch: 5388, sum loss: 4796.314453, avg loss: 2.641142, ppl: 14.029218 +epoch: 2, batch: 5389, sum loss: 4190.608887, avg loss: 2.521425, ppl: 12.446326 +epoch: 2, batch: 5390, sum loss: 3800.024170, avg loss: 2.439040, ppl: 11.462031 +epoch: 2, batch: 5391, sum loss: 3924.583008, avg loss: 2.427077, ppl: 11.325727 +epoch: 2, batch: 5392, sum loss: 3557.191650, avg loss: 2.231613, ppl: 9.314882 +epoch: 2, batch: 5393, sum loss: 4776.320312, avg loss: 2.786651, ppl: 16.226591 +epoch: 2, batch: 5394, sum loss: 4701.505371, avg loss: 2.783603, ppl: 16.177202 +epoch: 2, batch: 5395, sum loss: 4065.582764, avg loss: 2.568277, ppl: 13.043333 +epoch: 2, batch: 5396, sum loss: 4518.424805, avg loss: 2.441072, ppl: 11.485349 +epoch: 2, batch: 5397, sum loss: 5006.818359, avg loss: 2.744966, ppl: 15.564089 +epoch: 2, batch: 5398, sum loss: 4493.704590, avg loss: 2.587049, ppl: 13.290497 +epoch: 2, batch: 5399, sum loss: 4979.847656, avg loss: 2.960670, ppl: 19.310915 +epoch: 2, batch: 5400, sum loss: 3975.052002, avg loss: 2.685846, ppl: 14.670609 +epoch: 2, batch: 5401, sum loss: 3198.812012, avg loss: 2.333196, ppl: 10.310844 +epoch: 2, batch: 5402, sum loss: 3985.320801, avg loss: 2.527153, ppl: 12.517823 +epoch: 2, batch: 5403, sum loss: 4301.754883, avg loss: 2.475118, ppl: 11.883108 +epoch: 2, batch: 5404, sum loss: 3996.187500, avg loss: 2.423401, ppl: 11.284167 +epoch: 2, batch: 5405, sum loss: 4732.709961, avg loss: 2.734090, ppl: 15.395728 +epoch: 2, batch: 5406, sum loss: 4658.465332, avg loss: 2.476590, ppl: 11.900610 +epoch: 2, batch: 5407, sum loss: 4165.755859, avg loss: 2.353534, ppl: 10.522696 +epoch: 2, batch: 5408, sum loss: 3488.526855, avg loss: 2.437824, ppl: 11.448108 +epoch: 2, batch: 5409, sum loss: 4005.474365, avg loss: 2.541545, ppl: 12.699272 +epoch: 2, batch: 5410, sum loss: 5084.661133, avg loss: 2.517159, ppl: 12.393337 +epoch: 2, batch: 5411, sum loss: 3174.592529, avg loss: 2.116395, ppl: 8.301158 +epoch: 2, batch: 5412, sum loss: 4659.676758, avg loss: 2.532433, ppl: 12.584084 +epoch: 2, batch: 5413, sum loss: 5555.907715, avg loss: 3.069562, ppl: 21.532478 +epoch: 2, batch: 5414, sum loss: 3692.100586, avg loss: 2.368249, ppl: 10.678682 +epoch: 2, batch: 5415, sum loss: 5051.407227, avg loss: 2.853902, ppl: 17.355377 +epoch: 2, batch: 5416, sum loss: 3987.769287, avg loss: 2.469207, ppl: 11.813076 +epoch: 2, batch: 5417, sum loss: 5151.108887, avg loss: 2.778376, ppl: 16.092863 +epoch: 2, batch: 5418, sum loss: 4184.723145, avg loss: 2.441495, ppl: 11.490211 +epoch: 2, batch: 5419, sum loss: 4388.415039, avg loss: 2.589036, ppl: 13.316921 +epoch: 2, batch: 5420, sum loss: 4808.769531, avg loss: 2.760488, ppl: 15.807549 +epoch: 2, batch: 5421, sum loss: 4517.473633, avg loss: 2.713197, ppl: 15.077408 +epoch: 2, batch: 5422, sum loss: 4665.474609, avg loss: 2.670563, ppl: 14.448108 +epoch: 2, batch: 5423, sum loss: 4526.311035, avg loss: 2.520218, ppl: 12.431305 +epoch: 2, batch: 5424, sum loss: 4627.775391, avg loss: 2.451152, ppl: 11.601708 +epoch: 2, batch: 5425, sum loss: 4357.158691, avg loss: 2.609077, ppl: 13.586508 +epoch: 2, batch: 5426, sum loss: 4727.342773, avg loss: 2.654320, ppl: 14.215309 +epoch: 2, batch: 5427, sum loss: 4188.099609, avg loss: 2.604539, ppl: 13.524994 +epoch: 2, batch: 5428, sum loss: 4251.511719, avg loss: 2.878478, ppl: 17.787182 +epoch: 2, batch: 5429, sum loss: 3653.555176, avg loss: 2.315307, ppl: 10.128036 +epoch: 2, batch: 5430, sum loss: 4137.188965, avg loss: 2.439380, ppl: 11.465935 +epoch: 2, batch: 5431, sum loss: 3896.082764, avg loss: 2.421431, ppl: 11.261965 +epoch: 2, batch: 5432, sum loss: 4419.894043, avg loss: 2.612231, ppl: 13.629418 +epoch: 2, batch: 5433, sum loss: 3644.809814, avg loss: 2.346948, ppl: 10.453615 +epoch: 2, batch: 5434, sum loss: 3280.079834, avg loss: 2.176563, ppl: 8.815949 +epoch: 2, batch: 5435, sum loss: 3525.468750, avg loss: 2.225675, ppl: 9.259727 +epoch: 2, batch: 5436, sum loss: 3499.160889, avg loss: 2.588137, ppl: 13.304957 +epoch: 2, batch: 5437, sum loss: 4198.126953, avg loss: 2.515355, ppl: 12.370998 +epoch: 2, batch: 5438, sum loss: 4350.442383, avg loss: 2.508906, ppl: 12.291471 +epoch: 2, batch: 5439, sum loss: 3147.678467, avg loss: 2.340281, ppl: 10.384159 +epoch: 2, batch: 5440, sum loss: 4979.078125, avg loss: 2.642823, ppl: 14.052818 +epoch: 2, batch: 5441, sum loss: 3307.000488, avg loss: 2.464233, ppl: 11.754462 +epoch: 2, batch: 5442, sum loss: 4938.425293, avg loss: 2.708955, ppl: 15.013582 +epoch: 2, batch: 5443, sum loss: 3013.893799, avg loss: 2.188739, ppl: 8.923953 +epoch: 2, batch: 5444, sum loss: 4669.989258, avg loss: 2.794727, ppl: 16.358168 +epoch: 2, batch: 5445, sum loss: 3646.217773, avg loss: 2.551587, ppl: 12.827446 +epoch: 2, batch: 5446, sum loss: 4093.932617, avg loss: 2.536513, ppl: 12.635538 +epoch: 2, batch: 5447, sum loss: 4959.962402, avg loss: 2.832646, ppl: 16.990353 +epoch: 2, batch: 5448, sum loss: 4716.344238, avg loss: 2.769433, ppl: 15.949585 +epoch: 2, batch: 5449, sum loss: 4264.773438, avg loss: 2.473767, ppl: 11.867061 +epoch: 2, batch: 5450, sum loss: 4261.518066, avg loss: 2.456206, ppl: 11.660491 +epoch: 2, batch: 5451, sum loss: 3794.447754, avg loss: 2.496347, ppl: 12.138078 +epoch: 2, batch: 5452, sum loss: 4154.260742, avg loss: 2.617682, ppl: 13.703918 +epoch: 2, batch: 5453, sum loss: 5261.679688, avg loss: 2.798766, ppl: 16.424360 +epoch: 2, batch: 5454, sum loss: 4688.358398, avg loss: 2.688279, ppl: 14.706340 +epoch: 2, batch: 5455, sum loss: 3556.803711, avg loss: 2.358623, ppl: 10.576378 +epoch: 2, batch: 5456, sum loss: 4641.981934, avg loss: 2.505117, ppl: 12.244990 +epoch: 2, batch: 5457, sum loss: 4497.184570, avg loss: 2.601032, ppl: 13.477643 +epoch: 2, batch: 5458, sum loss: 3561.147949, avg loss: 2.356815, ppl: 10.557277 +epoch: 2, batch: 5459, sum loss: 3917.480957, avg loss: 2.460729, ppl: 11.713349 +epoch: 2, batch: 5460, sum loss: 3434.316406, avg loss: 2.277398, ppl: 9.751276 +epoch: 2, batch: 5461, sum loss: 4007.675781, avg loss: 2.440728, ppl: 11.481401 +epoch: 2, batch: 5462, sum loss: 4443.950195, avg loss: 2.562832, ppl: 12.972499 +epoch: 2, batch: 5463, sum loss: 4694.971680, avg loss: 2.514714, ppl: 12.363075 +epoch: 2, batch: 5464, sum loss: 4489.295898, avg loss: 2.734041, ppl: 15.394979 +epoch: 2, batch: 5465, sum loss: 3566.018066, avg loss: 2.396518, ppl: 10.984855 +epoch: 2, batch: 5466, sum loss: 3808.839355, avg loss: 2.400025, ppl: 11.023451 +epoch: 2, batch: 5467, sum loss: 4632.822266, avg loss: 2.714014, ppl: 15.089728 +epoch: 2, batch: 5468, sum loss: 4550.146484, avg loss: 2.824424, ppl: 16.851233 +epoch: 2, batch: 5469, sum loss: 4134.208496, avg loss: 2.493491, ppl: 12.103457 +epoch: 2, batch: 5470, sum loss: 2898.861572, avg loss: 2.313537, ppl: 10.110120 +epoch: 2, batch: 5471, sum loss: 3929.326416, avg loss: 2.494810, ppl: 12.119434 +epoch: 2, batch: 5472, sum loss: 4419.974121, avg loss: 2.554898, ppl: 12.869990 +epoch: 2, batch: 5473, sum loss: 3438.997070, avg loss: 2.172455, ppl: 8.779815 +epoch: 2, batch: 5474, sum loss: 3662.133789, avg loss: 2.467745, ppl: 11.795818 +epoch: 2, batch: 5475, sum loss: 4813.417969, avg loss: 2.669672, ppl: 14.435230 +epoch: 2, batch: 5476, sum loss: 4996.145020, avg loss: 2.718251, ppl: 15.153791 +epoch: 2, batch: 5477, sum loss: 3492.329102, avg loss: 2.315868, ppl: 10.133717 +epoch: 2, batch: 5478, sum loss: 4602.903809, avg loss: 2.680783, ppl: 14.596512 +epoch: 2, batch: 5479, sum loss: 4619.553223, avg loss: 2.534039, ppl: 12.604316 +epoch: 2, batch: 5480, sum loss: 4224.093750, avg loss: 2.503909, ppl: 12.230204 +epoch: 2, batch: 5481, sum loss: 4146.086914, avg loss: 2.416135, ppl: 11.202474 +epoch: 2, batch: 5482, sum loss: 3859.164551, avg loss: 2.305355, ppl: 10.027740 +epoch: 2, batch: 5483, sum loss: 3991.282715, avg loss: 2.413109, ppl: 11.168633 +epoch: 2, batch: 5484, sum loss: 4876.240234, avg loss: 2.703016, ppl: 14.924670 +epoch: 2, batch: 5485, sum loss: 4206.349609, avg loss: 2.394052, ppl: 10.957806 +epoch: 2, batch: 5486, sum loss: 5599.801758, avg loss: 2.750394, ppl: 15.648794 +epoch: 2, batch: 5487, sum loss: 3986.227295, avg loss: 2.435081, ppl: 11.416743 +epoch: 2, batch: 5488, sum loss: 4977.182129, avg loss: 2.620949, ppl: 13.748765 +epoch: 2, batch: 5489, sum loss: 4571.215332, avg loss: 2.419913, ppl: 11.244879 +epoch: 2, batch: 5490, sum loss: 3929.820557, avg loss: 2.454604, ppl: 11.641822 +epoch: 2, batch: 5491, sum loss: 3932.052979, avg loss: 2.566614, ppl: 13.021661 +epoch: 2, batch: 5492, sum loss: 4251.885742, avg loss: 2.342637, ppl: 10.408647 +epoch: 2, batch: 5493, sum loss: 4197.589844, avg loss: 2.623494, ppl: 13.783795 +epoch: 2, batch: 5494, sum loss: 4438.358398, avg loss: 2.704667, ppl: 14.949335 +epoch: 2, batch: 5495, sum loss: 3729.897461, avg loss: 2.400192, ppl: 11.025288 +epoch: 2, batch: 5496, sum loss: 4283.439453, avg loss: 2.460333, ppl: 11.708709 +epoch: 2, batch: 5497, sum loss: 4833.382324, avg loss: 2.586079, ppl: 13.277613 +epoch: 2, batch: 5498, sum loss: 4568.231934, avg loss: 2.460006, ppl: 11.704885 +epoch: 2, batch: 5499, sum loss: 4521.359375, avg loss: 2.619559, ppl: 13.729671 +epoch: 2, batch: 5500, sum loss: 4286.480957, avg loss: 2.588455, ppl: 13.309190 +epoch: 2, batch: 5501, sum loss: 4072.155273, avg loss: 2.620435, ppl: 13.741693 +epoch: 2, batch: 5502, sum loss: 3296.000977, avg loss: 2.308124, ppl: 10.055543 +epoch: 2, batch: 5503, sum loss: 3737.414551, avg loss: 2.344676, ppl: 10.429898 +epoch: 2, batch: 5504, sum loss: 4768.400879, avg loss: 2.573341, ppl: 13.109549 +epoch: 2, batch: 5505, sum loss: 3976.666992, avg loss: 2.555699, ppl: 12.880295 +epoch: 2, batch: 5506, sum loss: 4137.493164, avg loss: 2.336247, ppl: 10.342346 +epoch: 2, batch: 5507, sum loss: 4821.087402, avg loss: 2.799702, ppl: 16.439751 +epoch: 2, batch: 5508, sum loss: 3705.212646, avg loss: 2.204172, ppl: 9.062741 +epoch: 2, batch: 5509, sum loss: 3774.465820, avg loss: 2.361994, ppl: 10.612088 +epoch: 2, batch: 5510, sum loss: 4116.382812, avg loss: 2.441508, ppl: 11.490358 +epoch: 2, batch: 5511, sum loss: 3873.981445, avg loss: 2.447240, ppl: 11.556408 +epoch: 2, batch: 5512, sum loss: 4511.387207, avg loss: 2.769421, ppl: 15.949398 +epoch: 2, batch: 5513, sum loss: 3642.143066, avg loss: 2.342214, ppl: 10.404250 +epoch: 2, batch: 5514, sum loss: 4136.269531, avg loss: 2.575510, ppl: 13.138020 +epoch: 2, batch: 5515, sum loss: 4029.895020, avg loss: 2.606659, ppl: 13.553695 +epoch: 2, batch: 5516, sum loss: 3912.837402, avg loss: 2.369980, ppl: 10.697182 +epoch: 2, batch: 5517, sum loss: 4341.659668, avg loss: 2.811956, ppl: 16.642437 +epoch: 2, batch: 5518, sum loss: 3716.926514, avg loss: 2.430953, ppl: 11.369710 +epoch: 2, batch: 5519, sum loss: 4073.895508, avg loss: 2.708707, ppl: 15.009856 +epoch: 2, batch: 5520, sum loss: 4360.400879, avg loss: 2.402425, ppl: 11.049938 +epoch: 2, batch: 5521, sum loss: 3525.379639, avg loss: 2.580805, ppl: 13.207767 +epoch: 2, batch: 5522, sum loss: 5553.520508, avg loss: 2.609737, ppl: 13.595477 +epoch: 2, batch: 5523, sum loss: 4925.126465, avg loss: 2.550558, ppl: 12.814247 +epoch: 2, batch: 5524, sum loss: 4055.168701, avg loss: 2.445820, ppl: 11.540004 +epoch: 2, batch: 5525, sum loss: 4121.506348, avg loss: 2.179538, ppl: 8.842220 +epoch: 2, batch: 5526, sum loss: 3905.611816, avg loss: 2.454816, ppl: 11.644289 +epoch: 2, batch: 5527, sum loss: 3319.886719, avg loss: 2.349530, ppl: 10.480647 +epoch: 2, batch: 5528, sum loss: 3752.362549, avg loss: 2.467037, ppl: 11.787465 +epoch: 2, batch: 5529, sum loss: 4357.449707, avg loss: 2.382422, ppl: 10.831103 +epoch: 2, batch: 5530, sum loss: 4251.054199, avg loss: 2.630603, ppl: 13.882136 +epoch: 2, batch: 5531, sum loss: 5178.145020, avg loss: 2.843572, ppl: 17.177019 +epoch: 2, batch: 5532, sum loss: 4662.075195, avg loss: 2.587167, ppl: 13.292066 +epoch: 2, batch: 5533, sum loss: 3358.587158, avg loss: 2.498949, ppl: 12.169695 +epoch: 2, batch: 5534, sum loss: 4237.000977, avg loss: 2.436458, ppl: 11.432479 +epoch: 2, batch: 5535, sum loss: 4266.215820, avg loss: 2.503648, ppl: 12.227015 +epoch: 2, batch: 5536, sum loss: 5178.583008, avg loss: 2.808342, ppl: 16.582405 +epoch: 2, batch: 5537, sum loss: 4059.010742, avg loss: 2.572250, ppl: 13.095258 +epoch: 2, batch: 5538, sum loss: 3934.710693, avg loss: 2.462272, ppl: 11.731437 +epoch: 2, batch: 5539, sum loss: 4792.929688, avg loss: 2.772082, ppl: 15.991896 +epoch: 2, batch: 5540, sum loss: 3222.030029, avg loss: 2.133795, ppl: 8.446860 +epoch: 2, batch: 5541, sum loss: 5325.723145, avg loss: 2.765173, ppl: 15.881790 +epoch: 2, batch: 5542, sum loss: 3672.865479, avg loss: 2.342389, ppl: 10.406063 +epoch: 2, batch: 5543, sum loss: 4025.965332, avg loss: 2.350242, ppl: 10.488111 +epoch: 2, batch: 5544, sum loss: 3943.767090, avg loss: 2.286242, ppl: 9.837897 +epoch: 2, batch: 5545, sum loss: 4200.224121, avg loss: 2.439154, ppl: 11.463343 +epoch: 2, batch: 5546, sum loss: 4285.448242, avg loss: 2.540278, ppl: 12.683193 +epoch: 2, batch: 5547, sum loss: 5211.143555, avg loss: 2.627909, ppl: 13.844790 +epoch: 2, batch: 5548, sum loss: 4622.131348, avg loss: 2.665589, ppl: 14.376412 +epoch: 2, batch: 5549, sum loss: 3498.619873, avg loss: 2.349644, ppl: 10.481837 +epoch: 2, batch: 5550, sum loss: 4157.349121, avg loss: 2.429778, ppl: 11.356357 +epoch: 2, batch: 5551, sum loss: 4904.850586, avg loss: 2.699423, ppl: 14.871142 +epoch: 2, batch: 5552, sum loss: 4919.561523, avg loss: 2.483373, ppl: 11.981606 +epoch: 2, batch: 5553, sum loss: 3803.258301, avg loss: 2.371109, ppl: 10.709260 +epoch: 2, batch: 5554, sum loss: 4497.215332, avg loss: 2.760721, ppl: 15.811246 +epoch: 2, batch: 5555, sum loss: 4175.581055, avg loss: 2.579111, ppl: 13.185415 +epoch: 2, batch: 5556, sum loss: 3714.498535, avg loss: 2.597552, ppl: 13.430814 +epoch: 2, batch: 5557, sum loss: 4628.253418, avg loss: 2.637181, ppl: 13.973760 +epoch: 2, batch: 5558, sum loss: 3986.199219, avg loss: 2.247012, ppl: 9.459428 +epoch: 2, batch: 5559, sum loss: 4655.246582, avg loss: 2.583378, ppl: 13.241792 +epoch: 2, batch: 5560, sum loss: 5444.948242, avg loss: 2.789420, ppl: 16.271585 +epoch: 2, batch: 5561, sum loss: 5109.023438, avg loss: 2.857396, ppl: 17.416111 +epoch: 2, batch: 5562, sum loss: 4236.063965, avg loss: 2.340367, ppl: 10.385048 +epoch: 2, batch: 5563, sum loss: 4218.636719, avg loss: 2.420331, ppl: 11.249579 +epoch: 2, batch: 5564, sum loss: 4827.520508, avg loss: 2.642321, ppl: 14.045768 +epoch: 2, batch: 5565, sum loss: 4855.212891, avg loss: 2.700341, ppl: 14.884803 +epoch: 2, batch: 5566, sum loss: 4475.688965, avg loss: 2.876407, ppl: 17.750376 +epoch: 2, batch: 5567, sum loss: 4289.480957, avg loss: 2.404418, ppl: 11.071981 +epoch: 2, batch: 5568, sum loss: 4319.208984, avg loss: 2.448531, ppl: 11.571338 +epoch: 2, batch: 5569, sum loss: 4682.637695, avg loss: 2.589954, ppl: 13.329164 +epoch: 2, batch: 5570, sum loss: 5133.052246, avg loss: 2.839077, ppl: 17.099983 +epoch: 2, batch: 5571, sum loss: 4654.379395, avg loss: 2.653580, ppl: 14.204801 +epoch: 2, batch: 5572, sum loss: 4254.877441, avg loss: 2.746854, ppl: 15.593502 +epoch: 2, batch: 5573, sum loss: 4079.274414, avg loss: 2.423811, ppl: 11.288801 +epoch: 2, batch: 5574, sum loss: 4195.327148, avg loss: 2.376956, ppl: 10.772062 +epoch: 2, batch: 5575, sum loss: 4428.620117, avg loss: 2.634515, ppl: 13.936556 +epoch: 2, batch: 5576, sum loss: 3670.148438, avg loss: 2.458237, ppl: 11.684199 +epoch: 2, batch: 5577, sum loss: 4095.516357, avg loss: 2.539068, ppl: 12.667856 +epoch: 2, batch: 5578, sum loss: 5422.937500, avg loss: 2.793888, ppl: 16.344448 +epoch: 2, batch: 5579, sum loss: 4360.554199, avg loss: 2.791648, ppl: 16.307875 +epoch: 2, batch: 5580, sum loss: 4266.463379, avg loss: 2.349374, ppl: 10.479009 +epoch: 2, batch: 5581, sum loss: 5200.047363, avg loss: 2.606540, ppl: 13.552079 +epoch: 2, batch: 5582, sum loss: 4032.531250, avg loss: 2.466380, ppl: 11.779726 +epoch: 2, batch: 5583, sum loss: 5315.555176, avg loss: 2.933529, ppl: 18.793844 +epoch: 2, batch: 5584, sum loss: 4341.754883, avg loss: 2.710209, ppl: 15.032415 +epoch: 2, batch: 5585, sum loss: 5166.918945, avg loss: 2.613515, ppl: 13.646935 +epoch: 2, batch: 5586, sum loss: 4375.544922, avg loss: 2.539492, ppl: 12.673233 +epoch: 2, batch: 5587, sum loss: 4303.243164, avg loss: 2.730484, ppl: 15.340314 +epoch: 2, batch: 5588, sum loss: 2993.040283, avg loss: 2.222005, ppl: 9.225807 +epoch: 2, batch: 5589, sum loss: 4448.597656, avg loss: 2.510495, ppl: 12.311028 +epoch: 2, batch: 5590, sum loss: 4257.867188, avg loss: 2.549621, ppl: 12.802252 +epoch: 2, batch: 5591, sum loss: 4095.014160, avg loss: 2.457992, ppl: 11.681327 +epoch: 2, batch: 5592, sum loss: 4561.733398, avg loss: 2.686533, ppl: 14.680693 +epoch: 2, batch: 5593, sum loss: 3872.012695, avg loss: 2.359545, ppl: 10.586128 +epoch: 2, batch: 5594, sum loss: 4962.222168, avg loss: 2.756790, ppl: 15.749209 +epoch: 2, batch: 5595, sum loss: 3947.692383, avg loss: 2.532195, ppl: 12.581096 +epoch: 2, batch: 5596, sum loss: 4767.013184, avg loss: 2.408799, ppl: 11.120597 +epoch: 2, batch: 5597, sum loss: 4031.712402, avg loss: 2.327779, ppl: 10.255136 +epoch: 2, batch: 5598, sum loss: 4222.331543, avg loss: 2.584046, ppl: 13.250647 +epoch: 2, batch: 5599, sum loss: 4454.456543, avg loss: 2.583792, ppl: 13.247273 +epoch: 2, batch: 5600, sum loss: 5053.489258, avg loss: 2.730140, ppl: 15.335037 +epoch: 2, batch: 5601, sum loss: 3166.344727, avg loss: 2.297783, ppl: 9.952093 +epoch: 2, batch: 5602, sum loss: 4145.158691, avg loss: 2.525995, ppl: 12.503336 +epoch: 2, batch: 5603, sum loss: 3351.028320, avg loss: 2.152234, ppl: 8.604059 +epoch: 2, batch: 5604, sum loss: 4637.710938, avg loss: 2.478734, ppl: 11.926157 +epoch: 2, batch: 5605, sum loss: 4627.368164, avg loss: 2.656354, ppl: 14.244256 +epoch: 2, batch: 5606, sum loss: 4905.020996, avg loss: 2.720478, ppl: 15.187573 +epoch: 2, batch: 5607, sum loss: 4301.952148, avg loss: 2.434608, ppl: 11.411345 +epoch: 2, batch: 5608, sum loss: 4185.302734, avg loss: 2.525831, ppl: 12.501286 +epoch: 2, batch: 5609, sum loss: 3473.219482, avg loss: 2.324779, ppl: 10.224418 +epoch: 2, batch: 5610, sum loss: 4284.481445, avg loss: 2.606132, ppl: 13.546555 +epoch: 2, batch: 5611, sum loss: 4577.241211, avg loss: 2.596280, ppl: 13.413744 +epoch: 2, batch: 5612, sum loss: 4885.265137, avg loss: 2.599928, ppl: 13.462773 +epoch: 2, batch: 5613, sum loss: 4534.630859, avg loss: 2.615127, ppl: 13.668957 +epoch: 2, batch: 5614, sum loss: 5416.553711, avg loss: 2.967975, ppl: 19.452482 +epoch: 2, batch: 5615, sum loss: 4655.910156, avg loss: 2.618622, ppl: 13.716813 +epoch: 2, batch: 5616, sum loss: 4823.389160, avg loss: 2.631418, ppl: 13.893456 +epoch: 2, batch: 5617, sum loss: 3347.178711, avg loss: 2.158078, ppl: 8.654486 +epoch: 2, batch: 5618, sum loss: 4019.469727, avg loss: 2.425751, ppl: 11.310723 +epoch: 2, batch: 5619, sum loss: 4043.611816, avg loss: 2.519384, ppl: 12.420945 +epoch: 2, batch: 5620, sum loss: 3730.386230, avg loss: 2.505296, ppl: 12.247187 +epoch: 2, batch: 5621, sum loss: 4048.070312, avg loss: 2.680841, ppl: 14.597367 +epoch: 2, batch: 5622, sum loss: 4541.961426, avg loss: 2.579194, ppl: 13.186509 +epoch: 2, batch: 5623, sum loss: 3956.153320, avg loss: 2.477241, ppl: 11.908361 +epoch: 2, batch: 5624, sum loss: 3339.310547, avg loss: 2.215866, ppl: 9.169350 +epoch: 2, batch: 5625, sum loss: 3670.489990, avg loss: 2.308484, ppl: 10.059167 +epoch: 2, batch: 5626, sum loss: 3958.890381, avg loss: 2.557423, ppl: 12.902523 +epoch: 2, batch: 5627, sum loss: 3825.218994, avg loss: 2.398257, ppl: 11.003975 +epoch: 2, batch: 5628, sum loss: 4486.980957, avg loss: 2.562525, ppl: 12.968522 +epoch: 2, batch: 5629, sum loss: 3442.491943, avg loss: 2.206726, ppl: 9.085917 +epoch: 2, batch: 5630, sum loss: 4640.675293, avg loss: 2.866384, ppl: 17.573355 +epoch: 2, batch: 5631, sum loss: 3548.654297, avg loss: 2.518562, ppl: 12.410741 +epoch: 2, batch: 5632, sum loss: 4677.499023, avg loss: 2.611669, ppl: 13.621764 +epoch: 2, batch: 5633, sum loss: 4321.460938, avg loss: 2.608003, ppl: 13.571919 +epoch: 2, batch: 5634, sum loss: 4594.321777, avg loss: 2.490147, ppl: 12.063053 +epoch: 2, batch: 5635, sum loss: 4111.304199, avg loss: 2.434165, ppl: 11.406288 +epoch: 2, batch: 5636, sum loss: 4706.895508, avg loss: 2.497027, ppl: 12.146328 +epoch: 2, batch: 5637, sum loss: 6506.911621, avg loss: 2.843930, ppl: 17.183159 +epoch: 2, batch: 5638, sum loss: 3670.083984, avg loss: 2.520662, ppl: 12.436828 +epoch: 2, batch: 5639, sum loss: 3306.166504, avg loss: 2.549088, ppl: 12.795423 +epoch: 2, batch: 5640, sum loss: 4306.378906, avg loss: 2.533164, ppl: 12.593291 +epoch: 2, batch: 5641, sum loss: 4286.760254, avg loss: 2.490854, ppl: 12.071584 +epoch: 2, batch: 5642, sum loss: 3247.232666, avg loss: 2.259731, ppl: 9.580507 +epoch: 2, batch: 5643, sum loss: 3861.382080, avg loss: 2.488004, ppl: 12.037226 +epoch: 2, batch: 5644, sum loss: 4263.465332, avg loss: 2.500566, ppl: 12.189394 +epoch: 2, batch: 5645, sum loss: 4259.628906, avg loss: 2.689160, ppl: 14.719301 +epoch: 2, batch: 5646, sum loss: 4701.559570, avg loss: 2.653250, ppl: 14.200121 +epoch: 2, batch: 5647, sum loss: 4837.761719, avg loss: 2.532860, ppl: 12.589455 +epoch: 2, batch: 5648, sum loss: 3590.620361, avg loss: 2.333087, ppl: 10.309716 +epoch: 2, batch: 5649, sum loss: 4543.209473, avg loss: 2.508675, ppl: 12.288632 +epoch: 2, batch: 5650, sum loss: 4089.806152, avg loss: 2.401530, ppl: 11.040058 +epoch: 2, batch: 5651, sum loss: 4752.946777, avg loss: 2.480661, ppl: 11.949162 +epoch: 2, batch: 5652, sum loss: 3917.101318, avg loss: 2.306891, ppl: 10.043157 +epoch: 2, batch: 5653, sum loss: 4809.198730, avg loss: 2.636622, ppl: 13.965949 +epoch: 2, batch: 5654, sum loss: 3578.024902, avg loss: 2.355513, ppl: 10.543540 +epoch: 2, batch: 5655, sum loss: 4463.795410, avg loss: 2.657021, ppl: 14.253768 +epoch: 2, batch: 5656, sum loss: 4047.666016, avg loss: 2.382381, ppl: 10.830665 +epoch: 2, batch: 5657, sum loss: 3422.373291, avg loss: 2.196645, ppl: 8.994787 +epoch: 2, batch: 5658, sum loss: 4048.438477, avg loss: 2.559063, ppl: 12.923708 +epoch: 2, batch: 5659, sum loss: 3825.021484, avg loss: 2.513155, ppl: 12.343813 +epoch: 2, batch: 5660, sum loss: 3788.711182, avg loss: 2.399437, ppl: 11.016972 +epoch: 2, batch: 5661, sum loss: 4908.938477, avg loss: 2.771846, ppl: 15.988114 +epoch: 2, batch: 5662, sum loss: 4883.326660, avg loss: 2.869169, ppl: 17.622375 +epoch: 2, batch: 5663, sum loss: 4875.263184, avg loss: 2.595987, ppl: 13.409814 +epoch: 2, batch: 5664, sum loss: 4283.290039, avg loss: 2.657128, ppl: 14.255287 +epoch: 2, batch: 5665, sum loss: 3904.003662, avg loss: 2.318292, ppl: 10.158311 +epoch: 2, batch: 5666, sum loss: 4452.046875, avg loss: 2.618851, ppl: 13.719953 +epoch: 2, batch: 5667, sum loss: 5246.678711, avg loss: 2.594796, ppl: 13.393861 +epoch: 2, batch: 5668, sum loss: 3675.679199, avg loss: 2.411863, ppl: 11.154724 +epoch: 2, batch: 5669, sum loss: 4417.859375, avg loss: 2.759438, ppl: 15.790959 +epoch: 2, batch: 5670, sum loss: 4428.254883, avg loss: 2.407969, ppl: 11.111371 +epoch: 2, batch: 5671, sum loss: 5315.779785, avg loss: 2.805161, ppl: 16.529734 +epoch: 2, batch: 5672, sum loss: 4732.280762, avg loss: 2.655601, ppl: 14.233539 +epoch: 2, batch: 5673, sum loss: 4680.460938, avg loss: 2.642835, ppl: 14.052989 +epoch: 2, batch: 5674, sum loss: 4072.970215, avg loss: 2.571320, ppl: 13.083077 +epoch: 2, batch: 5675, sum loss: 4621.881836, avg loss: 2.632051, ppl: 13.902257 +epoch: 2, batch: 5676, sum loss: 4657.385254, avg loss: 2.590314, ppl: 13.333960 +epoch: 2, batch: 5677, sum loss: 4655.972168, avg loss: 2.500522, ppl: 12.188856 +epoch: 2, batch: 5678, sum loss: 3690.700684, avg loss: 2.404365, ppl: 11.071401 +epoch: 2, batch: 5679, sum loss: 3934.670654, avg loss: 2.460707, ppl: 11.713089 +epoch: 2, batch: 5680, sum loss: 3589.133301, avg loss: 2.358169, ppl: 10.571581 +epoch: 2, batch: 5681, sum loss: 4329.022949, avg loss: 2.483662, ppl: 11.985075 +epoch: 2, batch: 5682, sum loss: 3620.372070, avg loss: 2.287032, ppl: 9.845674 +epoch: 2, batch: 5683, sum loss: 4232.275879, avg loss: 2.373682, ppl: 10.736858 +epoch: 2, batch: 5684, sum loss: 3483.136963, avg loss: 2.231350, ppl: 9.312427 +epoch: 2, batch: 5685, sum loss: 4087.193604, avg loss: 2.521403, ppl: 12.446041 +epoch: 2, batch: 5686, sum loss: 3765.092285, avg loss: 2.365008, ppl: 10.644120 +epoch: 2, batch: 5687, sum loss: 5019.324219, avg loss: 2.618323, ppl: 13.712703 +epoch: 2, batch: 5688, sum loss: 3999.539062, avg loss: 2.330734, ppl: 10.285486 +epoch: 2, batch: 5689, sum loss: 4237.349609, avg loss: 2.290459, ppl: 9.879476 +epoch: 2, batch: 5690, sum loss: 4568.721680, avg loss: 2.734124, ppl: 15.396256 +epoch: 2, batch: 5691, sum loss: 4420.674805, avg loss: 2.682448, ppl: 14.620847 +epoch: 2, batch: 5692, sum loss: 5089.159180, avg loss: 2.767351, ppl: 15.916422 +epoch: 2, batch: 5693, sum loss: 3586.268311, avg loss: 2.461406, ppl: 11.721277 +epoch: 2, batch: 5694, sum loss: 3699.204834, avg loss: 2.229780, ppl: 9.297820 +epoch: 2, batch: 5695, sum loss: 4415.291504, avg loss: 2.536066, ppl: 12.629890 +epoch: 2, batch: 5696, sum loss: 3739.163574, avg loss: 2.374072, ppl: 10.741042 +epoch: 2, batch: 5697, sum loss: 4909.187500, avg loss: 2.724299, ppl: 15.245729 +epoch: 2, batch: 5698, sum loss: 4704.771973, avg loss: 2.534899, ppl: 12.615153 +epoch: 2, batch: 5699, sum loss: 5263.417969, avg loss: 2.932266, ppl: 18.770121 +epoch: 2, batch: 5700, sum loss: 4263.885254, avg loss: 2.533503, ppl: 12.597556 +epoch: 2, batch: 5701, sum loss: 4347.823242, avg loss: 2.551539, ppl: 12.826835 +epoch: 2, batch: 5702, sum loss: 4370.772949, avg loss: 2.660239, ppl: 14.299706 +epoch: 2, batch: 5703, sum loss: 4342.458984, avg loss: 2.614364, ppl: 13.658528 +epoch: 2, batch: 5704, sum loss: 3819.737549, avg loss: 2.417555, ppl: 11.218401 +epoch: 2, batch: 5705, sum loss: 4678.933105, avg loss: 2.630092, ppl: 13.875042 +epoch: 2, batch: 5706, sum loss: 4313.118652, avg loss: 2.602968, ppl: 13.503764 +epoch: 2, batch: 5707, sum loss: 3832.846924, avg loss: 2.684067, ppl: 14.644525 +epoch: 2, batch: 5708, sum loss: 5241.730957, avg loss: 2.699141, ppl: 14.866952 +epoch: 2, batch: 5709, sum loss: 4378.243164, avg loss: 2.626420, ppl: 13.824198 +epoch: 2, batch: 5710, sum loss: 3962.571045, avg loss: 2.489052, ppl: 12.049849 +epoch: 2, batch: 5711, sum loss: 4186.198242, avg loss: 2.515744, ppl: 12.375815 +epoch: 2, batch: 5712, sum loss: 4587.480469, avg loss: 2.751938, ppl: 15.672975 +epoch: 2, batch: 5713, sum loss: 3968.989502, avg loss: 2.552405, ppl: 12.837940 +epoch: 2, batch: 5714, sum loss: 3581.115723, avg loss: 2.308908, ppl: 10.063424 +epoch: 2, batch: 5715, sum loss: 4418.020020, avg loss: 2.608040, ppl: 13.572424 +epoch: 2, batch: 5716, sum loss: 4109.223633, avg loss: 2.622351, ppl: 13.768053 +epoch: 2, batch: 5717, sum loss: 4059.859619, avg loss: 2.558198, ppl: 12.912524 +epoch: 2, batch: 5718, sum loss: 3886.785889, avg loss: 2.489933, ppl: 12.060472 +epoch: 2, batch: 5719, sum loss: 4322.276367, avg loss: 2.591293, ppl: 13.347016 +epoch: 2, batch: 5720, sum loss: 4781.422852, avg loss: 2.938797, ppl: 18.893105 +epoch: 2, batch: 5721, sum loss: 4213.430664, avg loss: 2.592880, ppl: 13.368220 +epoch: 2, batch: 5722, sum loss: 3790.661133, avg loss: 2.338471, ppl: 10.365373 +epoch: 2, batch: 5723, sum loss: 4090.848145, avg loss: 2.445217, ppl: 11.533053 +epoch: 2, batch: 5724, sum loss: 3838.373535, avg loss: 2.209772, ppl: 9.113637 +epoch: 2, batch: 5725, sum loss: 3687.041260, avg loss: 2.643040, ppl: 14.055875 +epoch: 2, batch: 5726, sum loss: 4267.658691, avg loss: 2.482641, ppl: 11.972837 +epoch: 2, batch: 5727, sum loss: 4326.703613, avg loss: 2.374700, ppl: 10.747789 +epoch: 2, batch: 5728, sum loss: 4108.458984, avg loss: 2.654043, ppl: 14.211386 +epoch: 2, batch: 5729, sum loss: 4353.963379, avg loss: 2.734902, ppl: 15.408228 +epoch: 2, batch: 5730, sum loss: 4653.475098, avg loss: 2.665221, ppl: 14.371121 +epoch: 2, batch: 5731, sum loss: 4422.472656, avg loss: 2.577199, ppl: 13.160218 +epoch: 2, batch: 5732, sum loss: 4527.304688, avg loss: 2.686828, ppl: 14.685016 +epoch: 2, batch: 5733, sum loss: 4846.624023, avg loss: 2.611328, ppl: 13.617118 +epoch: 2, batch: 5734, sum loss: 4727.517578, avg loss: 2.667899, ppl: 14.409668 +epoch: 2, batch: 5735, sum loss: 4242.580078, avg loss: 2.502997, ppl: 12.219062 +epoch: 2, batch: 5736, sum loss: 3549.094238, avg loss: 2.361340, ppl: 10.605153 +epoch: 2, batch: 5737, sum loss: 4365.225586, avg loss: 2.914036, ppl: 18.431028 +epoch: 2, batch: 5738, sum loss: 4794.485352, avg loss: 2.484190, ppl: 11.991398 +epoch: 2, batch: 5739, sum loss: 5686.174805, avg loss: 2.773744, ppl: 16.018494 +epoch: 2, batch: 5740, sum loss: 4531.914062, avg loss: 2.680020, ppl: 14.585390 +epoch: 2, batch: 5741, sum loss: 3761.750244, avg loss: 2.365881, ppl: 10.653418 +epoch: 2, batch: 5742, sum loss: 4506.278320, avg loss: 2.606292, ppl: 13.548716 +epoch: 2, batch: 5743, sum loss: 3806.957764, avg loss: 2.440358, ppl: 11.477145 +epoch: 2, batch: 5744, sum loss: 4477.341797, avg loss: 2.655600, ppl: 14.233524 +epoch: 2, batch: 5745, sum loss: 4443.750977, avg loss: 2.556819, ppl: 12.894731 +epoch: 2, batch: 5746, sum loss: 3879.243164, avg loss: 2.494690, ppl: 12.117978 +epoch: 2, batch: 5747, sum loss: 3540.873047, avg loss: 2.271246, ppl: 9.691473 +epoch: 2, batch: 5748, sum loss: 4450.190430, avg loss: 2.505738, ppl: 12.252599 +epoch: 2, batch: 5749, sum loss: 3977.654053, avg loss: 2.317980, ppl: 10.155143 +epoch: 2, batch: 5750, sum loss: 3864.046143, avg loss: 2.373493, ppl: 10.734820 +epoch: 2, batch: 5751, sum loss: 3921.430420, avg loss: 2.504106, ppl: 12.232621 +epoch: 2, batch: 5752, sum loss: 4265.832031, avg loss: 2.399231, ppl: 11.014700 +epoch: 2, batch: 5753, sum loss: 4147.983398, avg loss: 2.555751, ppl: 12.880964 +epoch: 2, batch: 5754, sum loss: 3870.736572, avg loss: 2.408673, ppl: 11.119191 +epoch: 2, batch: 5755, sum loss: 4578.685059, avg loss: 2.308969, ppl: 10.064041 +epoch: 2, batch: 5756, sum loss: 5029.307617, avg loss: 2.567283, ppl: 13.030375 +epoch: 2, batch: 5757, sum loss: 4825.960449, avg loss: 2.658931, ppl: 14.281021 +epoch: 2, batch: 5758, sum loss: 3891.725586, avg loss: 2.481968, ppl: 11.964785 +epoch: 2, batch: 5759, sum loss: 4769.634766, avg loss: 2.670568, ppl: 14.448176 +epoch: 2, batch: 5760, sum loss: 3956.706299, avg loss: 2.446943, ppl: 11.552973 +epoch: 2, batch: 5761, sum loss: 3959.433350, avg loss: 2.457749, ppl: 11.678493 +epoch: 2, batch: 5762, sum loss: 3701.147949, avg loss: 2.251307, ppl: 9.500140 +epoch: 2, batch: 5763, sum loss: 3977.641602, avg loss: 2.459890, ppl: 11.703523 +epoch: 2, batch: 5764, sum loss: 3486.385742, avg loss: 2.196841, ppl: 8.996544 +epoch: 2, batch: 5765, sum loss: 4400.636230, avg loss: 2.554055, ppl: 12.859138 +epoch: 2, batch: 5766, sum loss: 4032.026611, avg loss: 2.472119, ppl: 11.847529 +epoch: 2, batch: 5767, sum loss: 5039.520508, avg loss: 2.892951, ppl: 18.046482 +epoch: 2, batch: 5768, sum loss: 3703.976074, avg loss: 2.325158, ppl: 10.228292 +epoch: 2, batch: 5769, sum loss: 5240.496094, avg loss: 2.733697, ppl: 15.389673 +epoch: 2, batch: 5770, sum loss: 4638.583984, avg loss: 2.534745, ppl: 12.613220 +epoch: 2, batch: 5771, sum loss: 3825.497803, avg loss: 2.463295, ppl: 11.743448 +epoch: 2, batch: 5772, sum loss: 4512.619629, avg loss: 2.471314, ppl: 11.837994 +epoch: 2, batch: 5773, sum loss: 4020.940186, avg loss: 2.438411, ppl: 11.454827 +epoch: 2, batch: 5774, sum loss: 4532.896484, avg loss: 2.762277, ppl: 15.835858 +epoch: 2, batch: 5775, sum loss: 4103.399414, avg loss: 2.394049, ppl: 10.957768 +epoch: 2, batch: 5776, sum loss: 3704.080811, avg loss: 2.338435, ppl: 10.365002 +epoch: 2, batch: 5777, sum loss: 4139.564453, avg loss: 2.439343, ppl: 11.465503 +epoch: 2, batch: 5778, sum loss: 4814.503418, avg loss: 2.735513, ppl: 15.417654 +epoch: 2, batch: 5779, sum loss: 3726.817871, avg loss: 2.432649, ppl: 11.389010 +epoch: 2, batch: 5780, sum loss: 4274.316406, avg loss: 2.601532, ppl: 13.484374 +epoch: 2, batch: 5781, sum loss: 5864.854980, avg loss: 2.984659, ppl: 19.779755 +epoch: 2, batch: 5782, sum loss: 4513.434082, avg loss: 2.665939, ppl: 14.381442 +epoch: 2, batch: 5783, sum loss: 4459.537109, avg loss: 2.443582, ppl: 11.514212 +epoch: 2, batch: 5784, sum loss: 4682.482422, avg loss: 2.633567, ppl: 13.923350 +epoch: 2, batch: 5785, sum loss: 4241.468750, avg loss: 2.699853, ppl: 14.877543 +epoch: 2, batch: 5786, sum loss: 4931.903809, avg loss: 2.626147, ppl: 13.820415 +epoch: 2, batch: 5787, sum loss: 4485.834961, avg loss: 2.400126, ppl: 11.024563 +epoch: 2, batch: 5788, sum loss: 3378.750977, avg loss: 2.351253, ppl: 10.498719 +epoch: 2, batch: 5789, sum loss: 3886.560791, avg loss: 2.420025, ppl: 11.246144 +epoch: 2, batch: 5790, sum loss: 4477.878418, avg loss: 2.632498, ppl: 13.908463 +epoch: 2, batch: 5791, sum loss: 5557.716797, avg loss: 2.690086, ppl: 14.732938 +epoch: 2, batch: 5792, sum loss: 4353.710938, avg loss: 2.622717, ppl: 13.773100 +epoch: 2, batch: 5793, sum loss: 4058.854248, avg loss: 2.479447, ppl: 11.934661 +epoch: 2, batch: 5794, sum loss: 3594.171387, avg loss: 2.315832, ppl: 10.133351 +epoch: 2, batch: 5795, sum loss: 4047.809082, avg loss: 2.826683, ppl: 16.889339 +epoch: 2, batch: 5796, sum loss: 4643.070801, avg loss: 2.793665, ppl: 16.340797 +epoch: 2, batch: 5797, sum loss: 4334.477051, avg loss: 2.655930, ppl: 14.238215 +epoch: 2, batch: 5798, sum loss: 5123.124023, avg loss: 2.784307, ppl: 16.188587 +epoch: 2, batch: 5799, sum loss: 5033.729492, avg loss: 2.652123, ppl: 14.184123 +epoch: 2, batch: 5800, sum loss: 4614.889160, avg loss: 2.670654, ppl: 14.449410 +epoch: 2, batch: 5801, sum loss: 4726.131348, avg loss: 2.783352, ppl: 16.173141 +epoch: 2, batch: 5802, sum loss: 4654.169434, avg loss: 2.626507, ppl: 13.825387 +epoch: 2, batch: 5803, sum loss: 4095.080811, avg loss: 2.480364, ppl: 11.945613 +epoch: 2, batch: 5804, sum loss: 4769.941406, avg loss: 2.684266, ppl: 14.647451 +epoch: 2, batch: 5805, sum loss: 3889.445557, avg loss: 2.502861, ppl: 12.217396 +epoch: 2, batch: 5806, sum loss: 4329.361816, avg loss: 2.709238, ppl: 15.017825 +epoch: 2, batch: 5807, sum loss: 3552.396240, avg loss: 2.406773, ppl: 11.098085 +epoch: 2, batch: 5808, sum loss: 4100.419434, avg loss: 2.489629, ppl: 12.056806 +epoch: 2, batch: 5809, sum loss: 3756.145020, avg loss: 2.510792, ppl: 12.314683 +epoch: 2, batch: 5810, sum loss: 3919.151611, avg loss: 2.544904, ppl: 12.741999 +epoch: 2, batch: 5811, sum loss: 3668.904785, avg loss: 2.511229, ppl: 12.320057 +epoch: 2, batch: 5812, sum loss: 4593.311523, avg loss: 2.679878, ppl: 14.583321 +epoch: 2, batch: 5813, sum loss: 3690.998779, avg loss: 2.627045, ppl: 13.832839 +epoch: 2, batch: 5814, sum loss: 4864.788086, avg loss: 2.786247, ppl: 16.220039 +epoch: 2, batch: 5815, sum loss: 4496.718262, avg loss: 2.617415, ppl: 13.700258 +epoch: 2, batch: 5816, sum loss: 4215.707031, avg loss: 2.407600, ppl: 11.107268 +epoch: 2, batch: 5817, sum loss: 3941.974121, avg loss: 2.437832, ppl: 11.448193 +epoch: 2, batch: 5818, sum loss: 4607.019043, avg loss: 2.955112, ppl: 19.203869 +epoch: 2, batch: 5819, sum loss: 4266.553711, avg loss: 2.671605, ppl: 14.463169 +epoch: 2, batch: 5820, sum loss: 4533.926270, avg loss: 2.713301, ppl: 15.078976 +epoch: 2, batch: 5821, sum loss: 4375.449219, avg loss: 2.426760, ppl: 11.322144 +epoch: 2, batch: 5822, sum loss: 4472.166504, avg loss: 2.567260, ppl: 13.030070 +epoch: 2, batch: 5823, sum loss: 4725.694824, avg loss: 2.548918, ppl: 12.793257 +epoch: 2, batch: 5824, sum loss: 4201.625000, avg loss: 2.544897, ppl: 12.741917 +epoch: 2, batch: 5825, sum loss: 3626.492432, avg loss: 2.393724, ppl: 10.954216 +epoch: 2, batch: 5826, sum loss: 4451.860840, avg loss: 2.485684, ppl: 12.009336 +epoch: 2, batch: 5827, sum loss: 4163.818848, avg loss: 2.430717, ppl: 11.367032 +epoch: 2, batch: 5828, sum loss: 4115.018066, avg loss: 2.591321, ppl: 13.347395 +epoch: 2, batch: 5829, sum loss: 3687.768066, avg loss: 2.418209, ppl: 11.225732 +epoch: 2, batch: 5830, sum loss: 4423.375977, avg loss: 2.517573, ppl: 12.398471 +epoch: 2, batch: 5831, sum loss: 4428.739258, avg loss: 2.570365, ppl: 13.070597 +epoch: 2, batch: 5832, sum loss: 3565.275146, avg loss: 2.445319, ppl: 11.534227 +epoch: 2, batch: 5833, sum loss: 3209.631836, avg loss: 2.418713, ppl: 11.231391 +epoch: 2, batch: 5834, sum loss: 3381.455322, avg loss: 2.223179, ppl: 9.236649 +epoch: 2, batch: 5835, sum loss: 3691.353516, avg loss: 2.449471, ppl: 11.582224 +epoch: 2, batch: 5836, sum loss: 3704.721680, avg loss: 2.340317, ppl: 10.384525 +epoch: 2, batch: 5837, sum loss: 4385.412598, avg loss: 2.665904, ppl: 14.380948 +epoch: 2, batch: 5838, sum loss: 3437.292969, avg loss: 2.224785, ppl: 9.251494 +epoch: 2, batch: 5839, sum loss: 4242.369141, avg loss: 2.429765, ppl: 11.356210 +epoch: 2, batch: 5840, sum loss: 3694.689453, avg loss: 2.474675, ppl: 11.877843 +epoch: 2, batch: 5841, sum loss: 4006.841309, avg loss: 2.416672, ppl: 11.208489 +epoch: 2, batch: 5842, sum loss: 4729.506348, avg loss: 2.573181, ppl: 13.107449 +epoch: 2, batch: 5843, sum loss: 4441.515137, avg loss: 2.481293, ppl: 11.956717 +epoch: 2, batch: 5844, sum loss: 4013.159180, avg loss: 2.393059, ppl: 10.946924 +epoch: 2, batch: 5845, sum loss: 4688.385742, avg loss: 2.393255, ppl: 10.949073 +epoch: 2, batch: 5846, sum loss: 4492.841797, avg loss: 2.900479, ppl: 18.182850 +epoch: 2, batch: 5847, sum loss: 3271.159424, avg loss: 2.126892, ppl: 8.388751 +epoch: 2, batch: 5848, sum loss: 4359.767090, avg loss: 2.404726, ppl: 11.075390 +epoch: 2, batch: 5849, sum loss: 3528.475098, avg loss: 2.430079, ppl: 11.359782 +epoch: 2, batch: 5850, sum loss: 4546.397461, avg loss: 2.753724, ppl: 15.700988 +epoch: 2, batch: 5851, sum loss: 4684.305664, avg loss: 2.773420, ppl: 16.013300 +epoch: 2, batch: 5852, sum loss: 4379.350098, avg loss: 2.759515, ppl: 15.792183 +epoch: 2, batch: 5853, sum loss: 4053.759033, avg loss: 2.455336, ppl: 11.650343 +epoch: 2, batch: 5854, sum loss: 4631.337402, avg loss: 2.607735, ppl: 13.568286 +epoch: 2, batch: 5855, sum loss: 4834.580078, avg loss: 2.732945, ppl: 15.378116 +epoch: 2, batch: 5856, sum loss: 3738.809814, avg loss: 2.527931, ppl: 12.527556 +epoch: 2, batch: 5857, sum loss: 4769.870117, avg loss: 2.574134, ppl: 13.119948 +epoch: 2, batch: 5858, sum loss: 3474.861328, avg loss: 2.409751, ppl: 11.131194 +epoch: 2, batch: 5859, sum loss: 4046.644775, avg loss: 2.572565, ppl: 13.099380 +epoch: 2, batch: 5860, sum loss: 4424.580566, avg loss: 2.572431, ppl: 13.097621 +epoch: 2, batch: 5861, sum loss: 3567.817383, avg loss: 2.328863, ppl: 10.266259 +epoch: 2, batch: 5862, sum loss: 3784.494141, avg loss: 2.404380, ppl: 11.071562 +epoch: 2, batch: 5863, sum loss: 4950.758789, avg loss: 2.589309, ppl: 13.320563 +epoch: 2, batch: 5864, sum loss: 4721.616699, avg loss: 2.702700, ppl: 14.919960 +epoch: 2, batch: 5865, sum loss: 4660.210938, avg loss: 2.584698, ppl: 13.259287 +epoch: 2, batch: 5866, sum loss: 4249.165527, avg loss: 2.576813, ppl: 13.155152 +epoch: 2, batch: 5867, sum loss: 3947.740234, avg loss: 2.637101, ppl: 13.972637 +epoch: 2, batch: 5868, sum loss: 5182.074219, avg loss: 2.728844, ppl: 15.315167 +epoch: 2, batch: 5869, sum loss: 4028.100098, avg loss: 2.665850, ppl: 14.380174 +epoch: 2, batch: 5870, sum loss: 4076.654053, avg loss: 2.214370, ppl: 9.155635 +epoch: 2, batch: 5871, sum loss: 4614.003906, avg loss: 2.682560, ppl: 14.622485 +epoch: 2, batch: 5872, sum loss: 4128.785645, avg loss: 2.556524, ppl: 12.890924 +epoch: 2, batch: 5873, sum loss: 4108.067871, avg loss: 2.545271, ppl: 12.746687 +epoch: 2, batch: 5874, sum loss: 4009.110840, avg loss: 2.382122, ppl: 10.827850 +epoch: 2, batch: 5875, sum loss: 5191.126465, avg loss: 2.784939, ppl: 16.198830 +epoch: 2, batch: 5876, sum loss: 4413.350098, avg loss: 2.720931, ppl: 15.194463 +epoch: 2, batch: 5877, sum loss: 4913.884277, avg loss: 2.546054, ppl: 12.756668 +epoch: 2, batch: 5878, sum loss: 4362.012695, avg loss: 2.525775, ppl: 12.500576 +epoch: 2, batch: 5879, sum loss: 4473.273438, avg loss: 2.457843, ppl: 11.679586 +epoch: 2, batch: 5880, sum loss: 4003.430664, avg loss: 2.508415, ppl: 12.285445 +epoch: 2, batch: 5881, sum loss: 3810.120117, avg loss: 2.482163, ppl: 11.967121 +epoch: 2, batch: 5882, sum loss: 5656.196777, avg loss: 2.798712, ppl: 16.423479 +epoch: 2, batch: 5883, sum loss: 4471.101074, avg loss: 2.696683, ppl: 14.830463 +epoch: 2, batch: 5884, sum loss: 3595.567871, avg loss: 2.271363, ppl: 9.692603 +epoch: 2, batch: 5885, sum loss: 5410.390137, avg loss: 2.929286, ppl: 18.714254 +epoch: 2, batch: 5886, sum loss: 4100.632324, avg loss: 2.540664, ppl: 12.688099 +epoch: 2, batch: 5887, sum loss: 4883.887207, avg loss: 2.629988, ppl: 13.873599 +epoch: 2, batch: 5888, sum loss: 3919.939209, avg loss: 2.316749, ppl: 10.142646 +epoch: 2, batch: 5889, sum loss: 4502.832031, avg loss: 2.681854, ppl: 14.612152 +epoch: 2, batch: 5890, sum loss: 3481.039307, avg loss: 2.570930, ppl: 13.077981 +epoch: 2, batch: 5891, sum loss: 4466.249023, avg loss: 2.584635, ppl: 13.258446 +epoch: 2, batch: 5892, sum loss: 4766.770996, avg loss: 2.723869, ppl: 15.239170 +epoch: 2, batch: 5893, sum loss: 3980.450195, avg loss: 2.410933, ppl: 11.144354 +epoch: 2, batch: 5894, sum loss: 3734.304688, avg loss: 2.270094, ppl: 9.680310 +epoch: 2, batch: 5895, sum loss: 4134.293457, avg loss: 2.527074, ppl: 12.516832 +epoch: 2, batch: 5896, sum loss: 3990.132080, avg loss: 2.551235, ppl: 12.822936 +epoch: 2, batch: 5897, sum loss: 4700.895996, avg loss: 2.630608, ppl: 13.882206 +epoch: 2, batch: 5898, sum loss: 5207.333008, avg loss: 2.867474, ppl: 17.592525 +epoch: 2, batch: 5899, sum loss: 5141.669434, avg loss: 2.770296, ppl: 15.963360 +epoch: 2, batch: 5900, sum loss: 4498.345703, avg loss: 2.794003, ppl: 16.346331 +epoch: 2, batch: 5901, sum loss: 4509.556641, avg loss: 2.536309, ppl: 12.632954 +epoch: 2, batch: 5902, sum loss: 4956.722168, avg loss: 2.673529, ppl: 14.491014 +epoch: 2, batch: 5903, sum loss: 4496.744629, avg loss: 2.851455, ppl: 17.312958 +epoch: 2, batch: 5904, sum loss: 3016.896484, avg loss: 2.315346, ppl: 10.128431 +epoch: 2, batch: 5905, sum loss: 3913.874023, avg loss: 2.581711, ppl: 13.219742 +epoch: 2, batch: 5906, sum loss: 3891.482910, avg loss: 2.488160, ppl: 12.039108 +epoch: 2, batch: 5907, sum loss: 5132.804199, avg loss: 2.715770, ppl: 15.116238 +epoch: 2, batch: 5908, sum loss: 3803.157227, avg loss: 2.604902, ppl: 13.529902 +epoch: 2, batch: 5909, sum loss: 4188.697754, avg loss: 2.435289, ppl: 11.419123 +epoch: 2, batch: 5910, sum loss: 5012.063477, avg loss: 2.740330, ppl: 15.492100 +epoch: 2, batch: 5911, sum loss: 4584.670410, avg loss: 2.727347, ppl: 15.292261 +epoch: 2, batch: 5912, sum loss: 4032.027344, avg loss: 2.596283, ppl: 13.413786 +epoch: 2, batch: 5913, sum loss: 3545.611572, avg loss: 2.172556, ppl: 8.780701 +epoch: 2, batch: 5914, sum loss: 4513.019043, avg loss: 2.689523, ppl: 14.724647 +epoch: 2, batch: 5915, sum loss: 5821.796875, avg loss: 2.798941, ppl: 16.427238 +epoch: 2, batch: 5916, sum loss: 4646.708008, avg loss: 2.529509, ppl: 12.547345 +epoch: 2, batch: 5917, sum loss: 4321.011719, avg loss: 2.526907, ppl: 12.514744 +epoch: 2, batch: 5918, sum loss: 3896.786377, avg loss: 2.336203, ppl: 10.341892 +epoch: 2, batch: 5919, sum loss: 4505.557617, avg loss: 2.503088, ppl: 12.220166 +epoch: 2, batch: 5920, sum loss: 4388.937012, avg loss: 2.686008, ppl: 14.672984 +epoch: 2, batch: 5921, sum loss: 4845.398438, avg loss: 2.708440, ppl: 15.005842 +epoch: 2, batch: 5922, sum loss: 4463.745605, avg loss: 2.513370, ppl: 12.346471 +epoch: 2, batch: 5923, sum loss: 4608.326172, avg loss: 2.679259, ppl: 14.574294 +epoch: 2, batch: 5924, sum loss: 4707.375000, avg loss: 2.507925, ppl: 12.279421 +epoch: 2, batch: 5925, sum loss: 4007.729980, avg loss: 2.394104, ppl: 10.958375 +epoch: 2, batch: 5926, sum loss: 4176.219727, avg loss: 2.520350, ppl: 12.432944 +epoch: 2, batch: 5927, sum loss: 5525.548340, avg loss: 2.765540, ppl: 15.887611 +epoch: 2, batch: 5928, sum loss: 3054.882568, avg loss: 2.158927, ppl: 8.661842 +epoch: 2, batch: 5929, sum loss: 4787.104004, avg loss: 2.723040, ppl: 15.226535 +epoch: 2, batch: 5930, sum loss: 4256.186035, avg loss: 2.499228, ppl: 12.173099 +epoch: 2, batch: 5931, sum loss: 5576.803711, avg loss: 2.820842, ppl: 16.790976 +epoch: 2, batch: 5932, sum loss: 4331.644043, avg loss: 2.521329, ppl: 12.445130 +epoch: 2, batch: 5933, sum loss: 3968.445557, avg loss: 2.521249, ppl: 12.444124 +epoch: 2, batch: 5934, sum loss: 4249.246094, avg loss: 2.549038, ppl: 12.794785 +epoch: 2, batch: 5935, sum loss: 3618.993896, avg loss: 2.534310, ppl: 12.607724 +epoch: 2, batch: 5936, sum loss: 4350.199707, avg loss: 2.538039, ppl: 12.654836 +epoch: 2, batch: 5937, sum loss: 3859.225830, avg loss: 2.312298, ppl: 10.097606 +epoch: 2, batch: 5938, sum loss: 4201.107422, avg loss: 2.637230, ppl: 13.974442 +epoch: 2, batch: 5939, sum loss: 3427.553223, avg loss: 2.425728, ppl: 11.310456 +epoch: 2, batch: 5940, sum loss: 3609.106201, avg loss: 2.334480, ppl: 10.324091 +epoch: 2, batch: 5941, sum loss: 4779.856445, avg loss: 2.774148, ppl: 16.024963 +epoch: 2, batch: 5942, sum loss: 3555.001221, avg loss: 2.489497, ppl: 12.055207 +epoch: 2, batch: 5943, sum loss: 5306.475586, avg loss: 2.880823, ppl: 17.828939 +epoch: 2, batch: 5944, sum loss: 3437.769287, avg loss: 2.156693, ppl: 8.642513 +epoch: 2, batch: 5945, sum loss: 4569.332031, avg loss: 2.510622, ppl: 12.312587 +epoch: 2, batch: 5946, sum loss: 2920.842041, avg loss: 2.052594, ppl: 7.788081 +epoch: 2, batch: 5947, sum loss: 4276.500977, avg loss: 2.487784, ppl: 12.034582 +epoch: 2, batch: 5948, sum loss: 5008.343750, avg loss: 2.816841, ppl: 16.723942 +epoch: 2, batch: 5949, sum loss: 4190.670410, avg loss: 2.604519, ppl: 13.524714 +epoch: 2, batch: 5950, sum loss: 3688.025391, avg loss: 2.584461, ppl: 13.256139 +epoch: 2, batch: 5951, sum loss: 4570.068848, avg loss: 2.728400, ppl: 15.308367 +epoch: 2, batch: 5952, sum loss: 4849.428223, avg loss: 2.605819, ppl: 13.542305 +epoch: 2, batch: 5953, sum loss: 5583.909180, avg loss: 2.847481, ppl: 17.244280 +epoch: 2, batch: 5954, sum loss: 4595.959473, avg loss: 2.812705, ppl: 16.654905 +epoch: 2, batch: 5955, sum loss: 4285.237305, avg loss: 2.755780, ppl: 15.733304 +epoch: 2, batch: 5956, sum loss: 4378.695801, avg loss: 2.568150, ppl: 13.041676 +epoch: 2, batch: 5957, sum loss: 3359.943848, avg loss: 2.198916, ppl: 9.015238 +epoch: 2, batch: 5958, sum loss: 4416.599609, avg loss: 2.704592, ppl: 14.948223 +epoch: 2, batch: 5959, sum loss: 4110.798828, avg loss: 2.403976, ppl: 11.067091 +epoch: 2, batch: 5960, sum loss: 3772.277832, avg loss: 2.410401, ppl: 11.138428 +epoch: 2, batch: 5961, sum loss: 4052.347412, avg loss: 2.643410, ppl: 14.061069 +epoch: 2, batch: 5962, sum loss: 4706.543945, avg loss: 2.650081, ppl: 14.155188 +epoch: 2, batch: 5963, sum loss: 3868.232910, avg loss: 2.442066, ppl: 11.496771 +epoch: 2, batch: 5964, sum loss: 3376.126465, avg loss: 2.482446, ppl: 11.970508 +epoch: 2, batch: 5965, sum loss: 4690.263672, avg loss: 2.561586, ppl: 12.956348 +epoch: 2, batch: 5966, sum loss: 4325.782227, avg loss: 2.343327, ppl: 10.415835 +epoch: 2, batch: 5967, sum loss: 4808.953613, avg loss: 2.930502, ppl: 18.737028 +epoch: 2, batch: 5968, sum loss: 4347.886719, avg loss: 2.470390, ppl: 11.827060 +epoch: 2, batch: 5969, sum loss: 4822.892578, avg loss: 2.810544, ppl: 16.618948 +epoch: 2, batch: 5970, sum loss: 5614.294434, avg loss: 3.200852, ppl: 24.553438 +epoch: 2, batch: 5971, sum loss: 4665.192871, avg loss: 2.613554, ppl: 13.647461 +epoch: 2, batch: 5972, sum loss: 3672.385498, avg loss: 2.443370, ppl: 11.511771 +epoch: 2, batch: 5973, sum loss: 4491.314453, avg loss: 2.815871, ppl: 16.707726 +epoch: 2, batch: 5974, sum loss: 3906.338867, avg loss: 2.486530, ppl: 12.019500 +epoch: 2, batch: 5975, sum loss: 4571.293945, avg loss: 2.653101, ppl: 14.198005 +epoch: 2, batch: 5976, sum loss: 3873.635254, avg loss: 2.244285, ppl: 9.433664 +epoch: 2, batch: 5977, sum loss: 4650.265625, avg loss: 2.675642, ppl: 14.521670 +epoch: 2, batch: 5978, sum loss: 3711.654541, avg loss: 2.350636, ppl: 10.492243 +epoch: 2, batch: 5979, sum loss: 4030.719238, avg loss: 2.538236, ppl: 12.657329 +epoch: 2, batch: 5980, sum loss: 3873.025146, avg loss: 2.370272, ppl: 10.700307 +epoch: 2, batch: 5981, sum loss: 4230.382324, avg loss: 2.414602, ppl: 11.185318 +epoch: 2, batch: 5982, sum loss: 4105.923828, avg loss: 2.655837, ppl: 14.236895 +epoch: 2, batch: 5983, sum loss: 3651.650391, avg loss: 2.567968, ppl: 13.039300 +epoch: 2, batch: 5984, sum loss: 3676.208740, avg loss: 2.404322, ppl: 11.070926 +epoch: 2, batch: 5985, sum loss: 3594.797363, avg loss: 2.349541, ppl: 10.480755 +epoch: 2, batch: 5986, sum loss: 4063.304932, avg loss: 2.574971, ppl: 13.130942 +epoch: 2, batch: 5987, sum loss: 3857.815186, avg loss: 2.345176, ppl: 10.435111 +epoch: 2, batch: 5988, sum loss: 5215.220703, avg loss: 2.660827, ppl: 14.308112 +epoch: 2, batch: 5989, sum loss: 4204.089844, avg loss: 2.535639, ppl: 12.624499 +epoch: 2, batch: 5990, sum loss: 4527.223145, avg loss: 2.621438, ppl: 13.755486 +epoch: 2, batch: 5991, sum loss: 5539.913574, avg loss: 2.789483, ppl: 16.272606 +epoch: 2, batch: 5992, sum loss: 3940.158691, avg loss: 2.639088, ppl: 14.000432 +epoch: 2, batch: 5993, sum loss: 4083.942383, avg loss: 2.538187, ppl: 12.656698 +epoch: 2, batch: 5994, sum loss: 4480.470703, avg loss: 2.770854, ppl: 15.972268 +epoch: 2, batch: 5995, sum loss: 4863.571289, avg loss: 2.695993, ppl: 14.820227 +epoch: 2, batch: 5996, sum loss: 4471.231934, avg loss: 2.724699, ppl: 15.251830 +epoch: 2, batch: 5997, sum loss: 4163.871582, avg loss: 2.645407, ppl: 14.089185 +epoch: 2, batch: 5998, sum loss: 4735.789062, avg loss: 2.706165, ppl: 14.971749 +epoch: 2, batch: 5999, sum loss: 4760.733398, avg loss: 2.661114, ppl: 14.312227 +epoch: 2, batch: 6000, sum loss: 4644.327148, avg loss: 2.666089, ppl: 14.383606 +epoch: 2, batch: 6001, sum loss: 4034.099609, avg loss: 2.546780, ppl: 12.765933 +epoch: 2, batch: 6002, sum loss: 4361.569824, avg loss: 2.534323, ppl: 12.607892 +epoch: 2, batch: 6003, sum loss: 3220.792480, avg loss: 2.114769, ppl: 8.287667 +epoch: 2, batch: 6004, sum loss: 4218.075195, avg loss: 2.762328, ppl: 15.836670 +epoch: 2, batch: 6005, sum loss: 5360.735840, avg loss: 2.689782, ppl: 14.728467 +epoch: 2, batch: 6006, sum loss: 5200.710449, avg loss: 2.797585, ppl: 16.404978 +epoch: 2, batch: 6007, sum loss: 4881.966309, avg loss: 2.497169, ppl: 12.148060 +epoch: 2, batch: 6008, sum loss: 4959.586426, avg loss: 2.617196, ppl: 13.697264 +epoch: 2, batch: 6009, sum loss: 4087.434082, avg loss: 2.390312, ppl: 10.916902 +epoch: 2, batch: 6010, sum loss: 4417.558594, avg loss: 2.646830, ppl: 14.109236 +epoch: 2, batch: 6011, sum loss: 4547.804688, avg loss: 2.537838, ppl: 12.652281 +epoch: 2, batch: 6012, sum loss: 4887.397461, avg loss: 2.736505, ppl: 15.432949 +epoch: 2, batch: 6013, sum loss: 4480.737305, avg loss: 2.443150, ppl: 11.509238 +epoch: 2, batch: 6014, sum loss: 4562.398926, avg loss: 2.614555, ppl: 13.661138 +epoch: 2, batch: 6015, sum loss: 3621.824463, avg loss: 2.452149, ppl: 11.613281 +epoch: 2, batch: 6016, sum loss: 5075.740723, avg loss: 2.920449, ppl: 18.549620 +epoch: 2, batch: 6017, sum loss: 5378.061035, avg loss: 2.680988, ppl: 14.599504 +epoch: 2, batch: 6018, sum loss: 4188.266113, avg loss: 2.527620, ppl: 12.523663 +epoch: 2, batch: 6019, sum loss: 4491.335938, avg loss: 2.443599, ppl: 11.514412 +epoch: 2, batch: 6020, sum loss: 4200.904297, avg loss: 2.450936, ppl: 11.599199 +epoch: 2, batch: 6021, sum loss: 3628.776855, avg loss: 2.453534, ppl: 11.629371 +epoch: 2, batch: 6022, sum loss: 4026.391846, avg loss: 2.574419, ppl: 13.123693 +epoch: 2, batch: 6023, sum loss: 4174.224609, avg loss: 2.469955, ppl: 11.821920 +epoch: 2, batch: 6024, sum loss: 4380.721680, avg loss: 2.392530, ppl: 10.941137 +epoch: 2, batch: 6025, sum loss: 4455.947266, avg loss: 2.690789, ppl: 14.743311 +epoch: 2, batch: 6026, sum loss: 3382.289307, avg loss: 2.280708, ppl: 9.783601 +epoch: 2, batch: 6027, sum loss: 4889.538574, avg loss: 2.730061, ppl: 15.333816 +epoch: 2, batch: 6028, sum loss: 3778.071289, avg loss: 2.343717, ppl: 10.419891 +epoch: 2, batch: 6029, sum loss: 4020.177490, avg loss: 2.646595, ppl: 14.105923 +epoch: 2, batch: 6030, sum loss: 4411.072754, avg loss: 2.683134, ppl: 14.630879 +epoch: 2, batch: 6031, sum loss: 3879.772705, avg loss: 2.279537, ppl: 9.772160 +epoch: 2, batch: 6032, sum loss: 3929.576172, avg loss: 2.357274, ppl: 10.562120 +epoch: 2, batch: 6033, sum loss: 4244.637695, avg loss: 2.360755, ppl: 10.598950 +epoch: 2, batch: 6034, sum loss: 4263.607910, avg loss: 2.455995, ppl: 11.658031 +epoch: 2, batch: 6035, sum loss: 3739.593750, avg loss: 2.302706, ppl: 10.001204 +epoch: 2, batch: 6036, sum loss: 3739.416016, avg loss: 2.479719, ppl: 11.937905 +epoch: 2, batch: 6037, sum loss: 4097.138672, avg loss: 2.603010, ppl: 13.504331 +epoch: 2, batch: 6038, sum loss: 3927.575439, avg loss: 2.311698, ppl: 10.091550 +epoch: 2, batch: 6039, sum loss: 4230.216309, avg loss: 2.453722, ppl: 11.631559 +epoch: 2, batch: 6040, sum loss: 4341.705078, avg loss: 2.593611, ppl: 13.377995 +epoch: 2, batch: 6041, sum loss: 6267.931641, avg loss: 2.924840, ppl: 18.631239 +epoch: 2, batch: 6042, sum loss: 3302.869141, avg loss: 2.130883, ppl: 8.422304 +epoch: 2, batch: 6043, sum loss: 4618.751953, avg loss: 2.432202, ppl: 11.383923 +epoch: 2, batch: 6044, sum loss: 4223.541504, avg loss: 2.364805, ppl: 10.641963 +epoch: 2, batch: 6045, sum loss: 3834.439453, avg loss: 2.302967, ppl: 10.003816 +epoch: 2, batch: 6046, sum loss: 4213.411621, avg loss: 2.728894, ppl: 15.315934 +epoch: 2, batch: 6047, sum loss: 3204.839355, avg loss: 2.353039, ppl: 10.517484 +epoch: 2, batch: 6048, sum loss: 3709.082764, avg loss: 2.441792, ppl: 11.493625 +epoch: 2, batch: 6049, sum loss: 4776.953613, avg loss: 2.610357, ppl: 13.603910 +epoch: 2, batch: 6050, sum loss: 4279.323242, avg loss: 2.636675, ppl: 13.966685 +epoch: 2, batch: 6051, sum loss: 5441.758789, avg loss: 2.799259, ppl: 16.432465 +epoch: 2, batch: 6052, sum loss: 2628.006104, avg loss: 2.325669, ppl: 10.233524 +epoch: 2, batch: 6053, sum loss: 3862.517578, avg loss: 2.504875, ppl: 12.242031 +epoch: 2, batch: 6054, sum loss: 4803.453125, avg loss: 2.804117, ppl: 16.512497 +epoch: 2, batch: 6055, sum loss: 4013.702148, avg loss: 2.473014, ppl: 11.858135 +epoch: 2, batch: 6056, sum loss: 5135.306641, avg loss: 2.598840, ppl: 13.448123 +epoch: 2, batch: 6057, sum loss: 4420.835938, avg loss: 2.465609, ppl: 11.770643 +epoch: 2, batch: 6058, sum loss: 4868.262695, avg loss: 2.882334, ppl: 17.855909 +epoch: 2, batch: 6059, sum loss: 4238.083496, avg loss: 2.417618, ppl: 11.219099 +epoch: 2, batch: 6060, sum loss: 4516.599609, avg loss: 2.458683, ppl: 11.689401 +epoch: 2, batch: 6061, sum loss: 4240.382812, avg loss: 2.530061, ppl: 12.554278 +epoch: 2, batch: 6062, sum loss: 4880.783203, avg loss: 2.473788, ppl: 11.867312 +epoch: 2, batch: 6063, sum loss: 4316.461914, avg loss: 2.572385, ppl: 13.097021 +epoch: 2, batch: 6064, sum loss: 3823.376709, avg loss: 2.293567, ppl: 9.910226 +epoch: 2, batch: 6065, sum loss: 4647.684570, avg loss: 2.692749, ppl: 14.772225 +epoch: 2, batch: 6066, sum loss: 4466.954590, avg loss: 2.549632, ppl: 12.802389 +epoch: 2, batch: 6067, sum loss: 4716.750977, avg loss: 2.627717, ppl: 13.842126 +epoch: 2, batch: 6068, sum loss: 4243.891602, avg loss: 2.654091, ppl: 14.212060 +epoch: 2, batch: 6069, sum loss: 4785.942871, avg loss: 2.755292, ppl: 15.725638 +epoch: 2, batch: 6070, sum loss: 4815.593262, avg loss: 2.542552, ppl: 12.712070 +epoch: 2, batch: 6071, sum loss: 5529.833984, avg loss: 2.946102, ppl: 19.031631 +epoch: 2, batch: 6072, sum loss: 5132.719238, avg loss: 2.909705, ppl: 18.351383 +epoch: 2, batch: 6073, sum loss: 4634.662109, avg loss: 2.780241, ppl: 16.122906 +epoch: 2, batch: 6074, sum loss: 4262.342285, avg loss: 2.635957, ppl: 13.956662 +epoch: 2, batch: 6075, sum loss: 4802.130859, avg loss: 2.602781, ppl: 13.501230 +epoch: 2, batch: 6076, sum loss: 3917.485107, avg loss: 2.582390, ppl: 13.228712 +epoch: 2, batch: 6077, sum loss: 4294.180664, avg loss: 2.569827, ppl: 13.063565 +epoch: 2, batch: 6078, sum loss: 4928.749023, avg loss: 2.735155, ppl: 15.412130 +epoch: 2, batch: 6079, sum loss: 4562.243652, avg loss: 2.924515, ppl: 18.625195 +epoch: 2, batch: 6080, sum loss: 4846.092773, avg loss: 2.499274, ppl: 12.173656 +epoch: 2, batch: 6081, sum loss: 2998.814209, avg loss: 2.066722, ppl: 7.898891 +epoch: 2, batch: 6082, sum loss: 4308.267578, avg loss: 2.623793, ppl: 13.787921 +epoch: 2, batch: 6083, sum loss: 3264.731934, avg loss: 2.284627, ppl: 9.822021 +epoch: 2, batch: 6084, sum loss: 4990.700195, avg loss: 2.788100, ppl: 16.250124 +epoch: 2, batch: 6085, sum loss: 4884.265137, avg loss: 2.886682, ppl: 17.933699 +epoch: 2, batch: 6086, sum loss: 4115.973145, avg loss: 2.511271, ppl: 12.320580 +epoch: 2, batch: 6087, sum loss: 3977.866211, avg loss: 2.478421, ppl: 11.922426 +epoch: 2, batch: 6088, sum loss: 4223.673340, avg loss: 2.371518, ppl: 10.713642 +epoch: 2, batch: 6089, sum loss: 5285.395020, avg loss: 2.792073, ppl: 16.314810 +epoch: 2, batch: 6090, sum loss: 3430.620850, avg loss: 2.410837, ppl: 11.143284 +epoch: 2, batch: 6091, sum loss: 4805.370117, avg loss: 2.631638, ppl: 13.896508 +epoch: 2, batch: 6092, sum loss: 4451.106934, avg loss: 2.572894, ppl: 13.103693 +epoch: 2, batch: 6093, sum loss: 6372.951172, avg loss: 3.151806, ppl: 23.378239 +epoch: 2, batch: 6094, sum loss: 4071.829102, avg loss: 2.633783, ppl: 13.926358 +epoch: 2, batch: 6095, sum loss: 3547.932373, avg loss: 2.556147, ppl: 12.886073 +epoch: 2, batch: 6096, sum loss: 4133.885254, avg loss: 2.559681, ppl: 12.931697 +epoch: 2, batch: 6097, sum loss: 3758.387695, avg loss: 2.300115, ppl: 9.975328 +epoch: 2, batch: 6098, sum loss: 4849.801270, avg loss: 2.567391, ppl: 13.031779 +epoch: 2, batch: 6099, sum loss: 3637.041504, avg loss: 2.399104, ppl: 11.013303 +epoch: 2, batch: 6100, sum loss: 3872.799805, avg loss: 2.324610, ppl: 10.222690 +epoch: 2, batch: 6101, sum loss: 4528.815430, avg loss: 2.590856, ppl: 13.341182 +epoch: 2, batch: 6102, sum loss: 5045.308105, avg loss: 2.911314, ppl: 18.380943 +epoch: 2, batch: 6103, sum loss: 4556.915039, avg loss: 2.669546, ppl: 14.433416 +epoch: 2, batch: 6104, sum loss: 5298.066406, avg loss: 2.762287, ppl: 15.836017 +epoch: 2, batch: 6105, sum loss: 3939.605713, avg loss: 2.558186, ppl: 12.912368 +epoch: 2, batch: 6106, sum loss: 4119.050293, avg loss: 2.563193, ppl: 12.977182 +epoch: 2, batch: 6107, sum loss: 4183.385742, avg loss: 2.609723, ppl: 13.595283 +epoch: 2, batch: 6108, sum loss: 3629.839600, avg loss: 2.493022, ppl: 12.097777 +epoch: 2, batch: 6109, sum loss: 4858.894531, avg loss: 2.463942, ppl: 11.751046 +epoch: 2, batch: 6110, sum loss: 4983.546387, avg loss: 2.679326, ppl: 14.575267 +epoch: 2, batch: 6111, sum loss: 4354.848633, avg loss: 2.704875, ppl: 14.952447 +epoch: 2, batch: 6112, sum loss: 4617.830566, avg loss: 2.544259, ppl: 12.733793 +epoch: 2, batch: 6113, sum loss: 3978.392822, avg loss: 2.585051, ppl: 13.263960 +epoch: 2, batch: 6114, sum loss: 4361.215820, avg loss: 2.541501, ppl: 12.698718 +epoch: 2, batch: 6115, sum loss: 4339.099121, avg loss: 2.570556, ppl: 13.073096 +epoch: 2, batch: 6116, sum loss: 5103.163086, avg loss: 2.713005, ppl: 15.074511 +epoch: 2, batch: 6117, sum loss: 4696.107910, avg loss: 2.772201, ppl: 15.993791 +epoch: 2, batch: 6118, sum loss: 4110.565918, avg loss: 2.328933, ppl: 10.266976 +epoch: 2, batch: 6119, sum loss: 4443.118164, avg loss: 2.624405, ppl: 13.796369 +epoch: 2, batch: 6120, sum loss: 4149.588379, avg loss: 2.634659, ppl: 13.938562 +epoch: 2, batch: 6121, sum loss: 4185.056641, avg loss: 2.647094, ppl: 14.112973 +epoch: 2, batch: 6122, sum loss: 4387.879883, avg loss: 2.607177, ppl: 13.560721 +epoch: 2, batch: 6123, sum loss: 4531.806641, avg loss: 2.697504, ppl: 14.842639 +epoch: 2, batch: 6124, sum loss: 4515.231934, avg loss: 2.739825, ppl: 15.484279 +epoch: 2, batch: 6125, sum loss: 4463.298828, avg loss: 2.575475, ppl: 13.137559 +epoch: 2, batch: 6126, sum loss: 3852.050781, avg loss: 2.524280, ppl: 12.481903 +epoch: 2, batch: 6127, sum loss: 5273.010742, avg loss: 2.807780, ppl: 16.573082 +epoch: 2, batch: 6128, sum loss: 3163.415039, avg loss: 2.369599, ppl: 10.693108 +epoch: 2, batch: 6129, sum loss: 4611.596680, avg loss: 2.735229, ppl: 15.413280 +epoch: 2, batch: 6130, sum loss: 3913.144287, avg loss: 2.402176, ppl: 11.047185 +epoch: 2, batch: 6131, sum loss: 5081.187988, avg loss: 2.667290, ppl: 14.400893 +epoch: 2, batch: 6132, sum loss: 4314.691406, avg loss: 2.651931, ppl: 14.181394 +epoch: 2, batch: 6133, sum loss: 3540.152832, avg loss: 2.449933, ppl: 11.587568 +epoch: 2, batch: 6134, sum loss: 3968.645264, avg loss: 2.692432, ppl: 14.767553 +epoch: 2, batch: 6135, sum loss: 4511.884766, avg loss: 2.539046, ppl: 12.667581 +epoch: 2, batch: 6136, sum loss: 5515.439453, avg loss: 2.699677, ppl: 14.874923 +epoch: 2, batch: 6137, sum loss: 4091.905273, avg loss: 2.490508, ppl: 12.067409 +epoch: 2, batch: 6138, sum loss: 4540.992676, avg loss: 2.482774, ppl: 11.974430 +epoch: 2, batch: 6139, sum loss: 4737.994141, avg loss: 2.765904, ppl: 15.893404 +epoch: 2, batch: 6140, sum loss: 3756.356934, avg loss: 2.459959, ppl: 11.704329 +epoch: 2, batch: 6141, sum loss: 4552.637695, avg loss: 2.369931, ppl: 10.696656 +epoch: 2, batch: 6142, sum loss: 3633.787842, avg loss: 2.438784, ppl: 11.459097 +epoch: 2, batch: 6143, sum loss: 3979.283203, avg loss: 2.245645, ppl: 9.446509 +epoch: 2, batch: 6144, sum loss: 5139.416016, avg loss: 2.953687, ppl: 19.176537 +epoch: 2, batch: 6145, sum loss: 4341.394043, avg loss: 2.463901, ppl: 11.750565 +epoch: 2, batch: 6146, sum loss: 4180.560547, avg loss: 2.625980, ppl: 13.818111 +epoch: 2, batch: 6147, sum loss: 3391.984619, avg loss: 2.315348, ppl: 10.128446 +epoch: 2, batch: 6148, sum loss: 4413.653809, avg loss: 2.490775, ppl: 12.070631 +epoch: 2, batch: 6149, sum loss: 3310.764648, avg loss: 2.373308, ppl: 10.732840 +epoch: 2, batch: 6150, sum loss: 2886.382324, avg loss: 2.144415, ppl: 8.537043 +epoch: 2, batch: 6151, sum loss: 4756.391113, avg loss: 2.559952, ppl: 12.935197 +epoch: 2, batch: 6152, sum loss: 4938.867188, avg loss: 2.831919, ppl: 16.978014 +epoch: 2, batch: 6153, sum loss: 4377.802246, avg loss: 2.546715, ppl: 12.765096 +epoch: 2, batch: 6154, sum loss: 4194.122070, avg loss: 2.486142, ppl: 12.014838 +epoch: 2, batch: 6155, sum loss: 5145.415039, avg loss: 2.879359, ppl: 17.802862 +epoch: 2, batch: 6156, sum loss: 4046.254639, avg loss: 2.548019, ppl: 12.781763 +epoch: 2, batch: 6157, sum loss: 5423.708984, avg loss: 2.976789, ppl: 19.624695 +epoch: 2, batch: 6158, sum loss: 4234.624512, avg loss: 2.554056, ppl: 12.859154 +epoch: 2, batch: 6159, sum loss: 4000.238037, avg loss: 2.436199, ppl: 11.429511 +epoch: 2, batch: 6160, sum loss: 3688.909912, avg loss: 2.307010, ppl: 10.044352 +epoch: 2, batch: 6161, sum loss: 4516.138184, avg loss: 2.662817, ppl: 14.336625 +epoch: 2, batch: 6162, sum loss: 3763.826416, avg loss: 2.466466, ppl: 11.780737 +epoch: 2, batch: 6163, sum loss: 4625.219238, avg loss: 2.486677, ppl: 12.021265 +epoch: 2, batch: 6164, sum loss: 5421.708496, avg loss: 2.793255, ppl: 16.334103 +epoch: 2, batch: 6165, sum loss: 4873.416016, avg loss: 2.787996, ppl: 16.248419 +epoch: 2, batch: 6166, sum loss: 4911.172852, avg loss: 2.829017, ppl: 16.928806 +epoch: 2, batch: 6167, sum loss: 4582.505859, avg loss: 2.802756, ppl: 16.490028 +epoch: 2, batch: 6168, sum loss: 4142.327148, avg loss: 2.465671, ppl: 11.771379 +epoch: 2, batch: 6169, sum loss: 4365.469727, avg loss: 2.398610, ppl: 11.007863 +epoch: 2, batch: 6170, sum loss: 4977.952148, avg loss: 2.947278, ppl: 19.054014 +epoch: 2, batch: 6171, sum loss: 3741.350586, avg loss: 2.472803, ppl: 11.855630 +epoch: 2, batch: 6172, sum loss: 4630.374023, avg loss: 2.502905, ppl: 12.217935 +epoch: 2, batch: 6173, sum loss: 4297.808105, avg loss: 2.520709, ppl: 12.437407 +epoch: 2, batch: 6174, sum loss: 4600.217773, avg loss: 2.657549, ppl: 14.261293 +epoch: 2, batch: 6175, sum loss: 4188.635742, avg loss: 2.484363, ppl: 11.993476 +epoch: 2, batch: 6176, sum loss: 4168.358887, avg loss: 2.661787, ppl: 14.321863 +epoch: 2, batch: 6177, sum loss: 4535.155273, avg loss: 2.471474, ppl: 11.839890 +epoch: 2, batch: 6178, sum loss: 4536.274414, avg loss: 2.687366, ppl: 14.692931 +epoch: 2, batch: 6179, sum loss: 4350.476562, avg loss: 2.670642, ppl: 14.449248 +epoch: 2, batch: 6180, sum loss: 3395.955078, avg loss: 2.307035, ppl: 10.044596 +epoch: 2, batch: 6181, sum loss: 4300.091797, avg loss: 2.602961, ppl: 13.503667 +epoch: 2, batch: 6182, sum loss: 3750.146240, avg loss: 2.382558, ppl: 10.832576 +epoch: 2, batch: 6183, sum loss: 3782.067139, avg loss: 2.316024, ppl: 10.135294 +epoch: 2, batch: 6184, sum loss: 3763.127930, avg loss: 2.470865, ppl: 11.832684 +epoch: 2, batch: 6185, sum loss: 3514.063721, avg loss: 2.267138, ppl: 9.651738 +epoch: 2, batch: 6186, sum loss: 4032.885254, avg loss: 2.347430, ppl: 10.458659 +epoch: 2, batch: 6187, sum loss: 3300.508545, avg loss: 2.506081, ppl: 12.256800 +epoch: 2, batch: 6188, sum loss: 3660.910645, avg loss: 2.410079, ppl: 11.134846 +epoch: 2, batch: 6189, sum loss: 4199.667480, avg loss: 2.578065, ppl: 13.171622 +epoch: 2, batch: 6190, sum loss: 4603.442871, avg loss: 2.382735, ppl: 10.834490 +epoch: 2, batch: 6191, sum loss: 4376.868164, avg loss: 2.609939, ppl: 13.598226 +epoch: 2, batch: 6192, sum loss: 4070.852051, avg loss: 2.444956, ppl: 11.530046 +epoch: 2, batch: 6193, sum loss: 3562.747314, avg loss: 2.598649, ppl: 13.445558 +epoch: 2, batch: 6194, sum loss: 4235.793457, avg loss: 2.365044, ppl: 10.644504 +epoch: 2, batch: 6195, sum loss: 4262.249023, avg loss: 2.540077, ppl: 12.680647 +epoch: 2, batch: 6196, sum loss: 3392.645996, avg loss: 2.317381, ppl: 10.149060 +epoch: 2, batch: 6197, sum loss: 3929.358398, avg loss: 2.455849, ppl: 11.656324 +epoch: 2, batch: 6198, sum loss: 5112.482422, avg loss: 2.648955, ppl: 14.139250 +epoch: 2, batch: 6199, sum loss: 5241.965820, avg loss: 2.640789, ppl: 14.024261 +epoch: 2, batch: 6200, sum loss: 4555.376465, avg loss: 2.820667, ppl: 16.788036 +epoch: 2, batch: 6201, sum loss: 4312.823242, avg loss: 2.321218, ppl: 10.188076 +epoch: 2, batch: 6202, sum loss: 3613.987305, avg loss: 2.307782, ppl: 10.052109 +epoch: 2, batch: 6203, sum loss: 3744.827148, avg loss: 2.218500, ppl: 9.193527 +epoch: 2, batch: 6204, sum loss: 3777.589355, avg loss: 2.304813, ppl: 10.022307 +epoch: 2, batch: 6205, sum loss: 4703.576172, avg loss: 2.678574, ppl: 14.564315 +epoch: 2, batch: 6206, sum loss: 4492.029297, avg loss: 2.572755, ppl: 13.101866 +epoch: 2, batch: 6207, sum loss: 3895.013916, avg loss: 2.279119, ppl: 9.768068 +epoch: 2, batch: 6208, sum loss: 5362.972656, avg loss: 2.972823, ppl: 19.547022 +epoch: 2, batch: 6209, sum loss: 4236.873535, avg loss: 2.736999, ppl: 15.440572 +epoch: 2, batch: 6210, sum loss: 3891.342285, avg loss: 2.476985, ppl: 11.905310 +epoch: 2, batch: 6211, sum loss: 4379.118164, avg loss: 2.406109, ppl: 11.090724 +epoch: 2, batch: 6212, sum loss: 6199.989746, avg loss: 2.993718, ppl: 19.959753 +epoch: 2, batch: 6213, sum loss: 4472.780762, avg loss: 2.778125, ppl: 16.088820 +epoch: 2, batch: 6214, sum loss: 4372.853516, avg loss: 2.601341, ppl: 13.481799 +epoch: 2, batch: 6215, sum loss: 4554.479004, avg loss: 2.701352, ppl: 14.899861 +epoch: 2, batch: 6216, sum loss: 3399.487549, avg loss: 2.424742, ppl: 11.299308 +epoch: 2, batch: 6217, sum loss: 3577.961670, avg loss: 2.467560, ppl: 11.793633 +epoch: 2, batch: 6218, sum loss: 4698.570801, avg loss: 2.548032, ppl: 12.781921 +epoch: 2, batch: 6219, sum loss: 3430.821289, avg loss: 2.313433, ppl: 10.109072 +epoch: 2, batch: 6220, sum loss: 4809.005371, avg loss: 2.579938, ppl: 13.196325 +epoch: 2, batch: 6221, sum loss: 4344.020508, avg loss: 2.410666, ppl: 11.141381 +epoch: 2, batch: 6222, sum loss: 4771.414062, avg loss: 2.857134, ppl: 17.411556 +epoch: 2, batch: 6223, sum loss: 4073.650635, avg loss: 2.483933, ppl: 11.988324 +epoch: 2, batch: 6224, sum loss: 4973.914062, avg loss: 2.532543, ppl: 12.585469 +epoch: 2, batch: 6225, sum loss: 4435.140137, avg loss: 2.625897, ppl: 13.816965 +epoch: 2, batch: 6226, sum loss: 3595.259521, avg loss: 2.586518, ppl: 13.283432 +epoch: 2, batch: 6227, sum loss: 4995.033203, avg loss: 2.749055, ppl: 15.627859 +epoch: 2, batch: 6228, sum loss: 4704.663086, avg loss: 2.668555, ppl: 14.419123 +epoch: 2, batch: 6229, sum loss: 4980.511230, avg loss: 2.610331, ppl: 13.603551 +epoch: 2, batch: 6230, sum loss: 4252.700195, avg loss: 2.610620, ppl: 13.607485 +epoch: 2, batch: 6231, sum loss: 4441.184570, avg loss: 2.394170, ppl: 10.959093 +epoch: 2, batch: 6232, sum loss: 4332.763672, avg loss: 2.725008, ppl: 15.256543 +epoch: 2, batch: 6233, sum loss: 3844.273926, avg loss: 2.609826, ppl: 13.596689 +epoch: 2, batch: 6234, sum loss: 4811.297363, avg loss: 2.500674, ppl: 12.190711 +epoch: 2, batch: 6235, sum loss: 4350.807129, avg loss: 2.630476, ppl: 13.880375 +epoch: 2, batch: 6236, sum loss: 3688.138428, avg loss: 2.252986, ppl: 9.516110 +epoch: 2, batch: 6237, sum loss: 5118.335938, avg loss: 2.859406, ppl: 17.451149 +epoch: 2, batch: 6238, sum loss: 4292.008789, avg loss: 2.539650, ppl: 12.675237 +epoch: 2, batch: 6239, sum loss: 4294.753906, avg loss: 2.574793, ppl: 13.128594 +epoch: 2, batch: 6240, sum loss: 4401.952637, avg loss: 2.459191, ppl: 11.695350 +epoch: 2, batch: 6241, sum loss: 4900.525391, avg loss: 2.672042, ppl: 14.469487 +epoch: 2, batch: 6242, sum loss: 3701.544678, avg loss: 2.289143, ppl: 9.866482 +epoch: 2, batch: 6243, sum loss: 3585.750000, avg loss: 2.175819, ppl: 8.809399 +epoch: 2, batch: 6244, sum loss: 4762.057617, avg loss: 2.775092, ppl: 16.040100 +epoch: 2, batch: 6245, sum loss: 3659.564209, avg loss: 2.398142, ppl: 11.002713 +epoch: 2, batch: 6246, sum loss: 4471.059082, avg loss: 2.509012, ppl: 12.292779 +epoch: 2, batch: 6247, sum loss: 5095.583984, avg loss: 2.547792, ppl: 12.778859 +epoch: 2, batch: 6248, sum loss: 4406.717773, avg loss: 2.735393, ppl: 15.415797 +epoch: 2, batch: 6249, sum loss: 4220.337402, avg loss: 2.402013, ppl: 11.045392 +epoch: 2, batch: 6250, sum loss: 4816.683105, avg loss: 2.867073, ppl: 17.585476 +epoch: 2, batch: 6251, sum loss: 6047.273926, avg loss: 2.771436, ppl: 15.981570 +epoch: 2, batch: 6252, sum loss: 5147.962402, avg loss: 2.577848, ppl: 13.168768 +epoch: 2, batch: 6253, sum loss: 4635.702148, avg loss: 2.584003, ppl: 13.250078 +epoch: 2, batch: 6254, sum loss: 4118.196777, avg loss: 2.570660, ppl: 13.074446 +epoch: 2, batch: 6255, sum loss: 3904.763916, avg loss: 2.360801, ppl: 10.599433 +epoch: 2, batch: 6256, sum loss: 3011.884277, avg loss: 2.136088, ppl: 8.466254 +epoch: 2, batch: 6257, sum loss: 4403.310547, avg loss: 2.382744, ppl: 10.834591 +epoch: 2, batch: 6258, sum loss: 4607.073242, avg loss: 2.650790, ppl: 14.165228 +epoch: 2, batch: 6259, sum loss: 4760.667969, avg loss: 2.573334, ppl: 13.109462 +epoch: 2, batch: 6260, sum loss: 4375.064453, avg loss: 2.358525, ppl: 10.575344 +epoch: 2, batch: 6261, sum loss: 4035.313477, avg loss: 2.387759, ppl: 10.889069 +epoch: 2, batch: 6262, sum loss: 4552.178223, avg loss: 2.576219, ppl: 13.147328 +epoch: 2, batch: 6263, sum loss: 4483.713379, avg loss: 2.738982, ppl: 15.471227 +epoch: 2, batch: 6264, sum loss: 4208.201660, avg loss: 2.391024, ppl: 10.924671 +epoch: 2, batch: 6265, sum loss: 4156.123047, avg loss: 2.430481, ppl: 11.364349 +epoch: 2, batch: 6266, sum loss: 4068.053955, avg loss: 2.503418, ppl: 12.224201 +epoch: 2, batch: 6267, sum loss: 4226.161621, avg loss: 2.340067, ppl: 10.381936 +epoch: 2, batch: 6268, sum loss: 3618.582520, avg loss: 2.402777, ppl: 11.053832 +epoch: 2, batch: 6269, sum loss: 4556.454102, avg loss: 2.672407, ppl: 14.474771 +epoch: 2, batch: 6270, sum loss: 3690.426758, avg loss: 2.473476, ppl: 11.863618 +epoch: 2, batch: 6271, sum loss: 3900.809814, avg loss: 2.536287, ppl: 12.632683 +epoch: 2, batch: 6272, sum loss: 4631.750488, avg loss: 2.677312, ppl: 14.545947 +epoch: 2, batch: 6273, sum loss: 3980.055176, avg loss: 2.383267, ppl: 10.840257 +epoch: 2, batch: 6274, sum loss: 3394.262939, avg loss: 2.160575, ppl: 8.676125 +epoch: 2, batch: 6275, sum loss: 3731.222412, avg loss: 2.307497, ppl: 10.049240 +epoch: 2, batch: 6276, sum loss: 4902.630859, avg loss: 2.765161, ppl: 15.881601 +epoch: 2, batch: 6277, sum loss: 4502.727539, avg loss: 2.650222, ppl: 14.157186 +epoch: 2, batch: 6278, sum loss: 3688.929688, avg loss: 2.334766, ppl: 10.327040 +epoch: 2, batch: 6279, sum loss: 3193.934326, avg loss: 2.392460, ppl: 10.940375 +epoch: 2, batch: 6280, sum loss: 4690.844727, avg loss: 2.499118, ppl: 12.171755 +epoch: 2, batch: 6281, sum loss: 4195.543945, avg loss: 2.567652, ppl: 13.035188 +epoch: 2, batch: 6282, sum loss: 3732.924805, avg loss: 2.269255, ppl: 9.672194 +epoch: 2, batch: 6283, sum loss: 4086.896973, avg loss: 2.485947, ppl: 12.012492 +epoch: 2, batch: 6284, sum loss: 3963.631836, avg loss: 2.500714, ppl: 12.191196 +epoch: 2, batch: 6285, sum loss: 3825.902588, avg loss: 2.281397, ppl: 9.790347 +epoch: 2, batch: 6286, sum loss: 4737.647461, avg loss: 2.595971, ppl: 13.409603 +epoch: 2, batch: 6287, sum loss: 5986.436523, avg loss: 2.872571, ppl: 17.682413 +epoch: 2, batch: 6288, sum loss: 4601.596680, avg loss: 2.621992, ppl: 13.763117 +epoch: 2, batch: 6289, sum loss: 4161.615723, avg loss: 2.475678, ppl: 11.889771 +epoch: 2, batch: 6290, sum loss: 4717.112305, avg loss: 2.819553, ppl: 16.769356 +epoch: 2, batch: 6291, sum loss: 3507.370605, avg loss: 2.239700, ppl: 9.390516 +epoch: 2, batch: 6292, sum loss: 4978.564453, avg loss: 2.550494, ppl: 12.813435 +epoch: 2, batch: 6293, sum loss: 3874.686035, avg loss: 2.586573, ppl: 13.284164 +epoch: 2, batch: 6294, sum loss: 4668.533203, avg loss: 2.596514, ppl: 13.416891 +epoch: 2, batch: 6295, sum loss: 4202.103027, avg loss: 2.671394, ppl: 14.460114 +epoch: 2, batch: 6296, sum loss: 4269.165527, avg loss: 2.420162, ppl: 11.247681 +epoch: 2, batch: 6297, sum loss: 3693.114258, avg loss: 2.409076, ppl: 11.123683 +epoch: 2, batch: 6298, sum loss: 3765.031738, avg loss: 2.451193, ppl: 11.602176 +epoch: 2, batch: 6299, sum loss: 5697.022461, avg loss: 2.857083, ppl: 17.410660 +epoch: 2, batch: 6300, sum loss: 3944.517822, avg loss: 2.450011, ppl: 11.588474 +epoch: 2, batch: 6301, sum loss: 4297.952148, avg loss: 2.433722, ppl: 11.401234 +epoch: 2, batch: 6302, sum loss: 3491.974121, avg loss: 2.271942, ppl: 9.698214 +epoch: 2, batch: 6303, sum loss: 3465.233643, avg loss: 2.175288, ppl: 8.804720 +epoch: 2, batch: 6304, sum loss: 4864.149414, avg loss: 2.779514, ppl: 16.111187 +epoch: 2, batch: 6305, sum loss: 4581.530273, avg loss: 2.646753, ppl: 14.108159 +epoch: 2, batch: 6306, sum loss: 5241.253418, avg loss: 2.648435, ppl: 14.131910 +epoch: 2, batch: 6307, sum loss: 4788.703125, avg loss: 2.468404, ppl: 11.803591 +epoch: 2, batch: 6308, sum loss: 4666.300781, avg loss: 2.828061, ppl: 16.912638 +epoch: 2, batch: 6309, sum loss: 3462.568359, avg loss: 2.264597, ppl: 9.627241 +epoch: 2, batch: 6310, sum loss: 4924.127930, avg loss: 2.565986, ppl: 13.013489 +epoch: 2, batch: 6311, sum loss: 4677.552734, avg loss: 2.666792, ppl: 14.393715 +epoch: 2, batch: 6312, sum loss: 5241.889648, avg loss: 2.839593, ppl: 17.108809 +epoch: 2, batch: 6313, sum loss: 4891.559570, avg loss: 2.671524, ppl: 14.461987 +epoch: 2, batch: 6314, sum loss: 4619.128906, avg loss: 2.634985, ppl: 13.943107 +epoch: 2, batch: 6315, sum loss: 4155.019043, avg loss: 2.351454, ppl: 10.500827 +epoch: 2, batch: 6316, sum loss: 4451.952637, avg loss: 2.574872, ppl: 13.129630 +epoch: 2, batch: 6317, sum loss: 3959.471680, avg loss: 2.505995, ppl: 12.255745 +epoch: 2, batch: 6318, sum loss: 4885.291992, avg loss: 2.696077, ppl: 14.821478 +epoch: 2, batch: 6319, sum loss: 4017.011719, avg loss: 2.606756, ppl: 13.555013 +epoch: 2, batch: 6320, sum loss: 3247.037109, avg loss: 2.240881, ppl: 9.401614 +epoch: 2, batch: 6321, sum loss: 4481.826660, avg loss: 2.639474, ppl: 14.005834 +epoch: 2, batch: 6322, sum loss: 4350.656250, avg loss: 2.457998, ppl: 11.681402 +epoch: 2, batch: 6323, sum loss: 4613.124512, avg loss: 2.710414, ppl: 15.035498 +epoch: 2, batch: 6324, sum loss: 3813.036865, avg loss: 2.317955, ppl: 10.154891 +epoch: 2, batch: 6325, sum loss: 3869.862793, avg loss: 2.474337, ppl: 11.873831 +epoch: 2, batch: 6326, sum loss: 5344.645996, avg loss: 2.856572, ppl: 17.401770 +epoch: 2, batch: 6327, sum loss: 4634.741211, avg loss: 2.549363, ppl: 12.798944 +epoch: 2, batch: 6328, sum loss: 3954.321777, avg loss: 2.412643, ppl: 11.163426 +epoch: 2, batch: 6329, sum loss: 4033.601562, avg loss: 2.409559, ppl: 11.129052 +epoch: 2, batch: 6330, sum loss: 4698.415527, avg loss: 2.590086, ppl: 13.330914 +epoch: 2, batch: 6331, sum loss: 4339.799805, avg loss: 2.633374, ppl: 13.920655 +epoch: 2, batch: 6332, sum loss: 4410.339844, avg loss: 2.552280, ppl: 12.836337 +epoch: 2, batch: 6333, sum loss: 4261.008301, avg loss: 2.525790, ppl: 12.500772 +epoch: 2, batch: 6334, sum loss: 5152.476562, avg loss: 2.679395, ppl: 14.576271 +epoch: 2, batch: 6335, sum loss: 4064.303955, avg loss: 2.458744, ppl: 11.690120 +epoch: 2, batch: 6336, sum loss: 4299.987793, avg loss: 2.467004, ppl: 11.787078 +epoch: 2, batch: 6337, sum loss: 4640.421875, avg loss: 2.592414, ppl: 13.361994 +epoch: 2, batch: 6338, sum loss: 4790.875488, avg loss: 2.536197, ppl: 12.631538 +epoch: 2, batch: 6339, sum loss: 3994.917969, avg loss: 2.438900, ppl: 11.460424 +epoch: 2, batch: 6340, sum loss: 4127.544922, avg loss: 2.581329, ppl: 13.214687 +epoch: 2, batch: 6341, sum loss: 4128.234375, avg loss: 2.436974, ppl: 11.438379 +epoch: 2, batch: 6342, sum loss: 3065.098145, avg loss: 2.125588, ppl: 8.377824 +epoch: 2, batch: 6343, sum loss: 3694.816895, avg loss: 2.646717, ppl: 14.107648 +epoch: 2, batch: 6344, sum loss: 3590.223145, avg loss: 2.416032, ppl: 11.201320 +epoch: 2, batch: 6345, sum loss: 5220.198730, avg loss: 2.690824, ppl: 14.743824 +epoch: 2, batch: 6346, sum loss: 4069.672363, avg loss: 2.391112, ppl: 10.925635 +epoch: 2, batch: 6347, sum loss: 4674.779297, avg loss: 2.618924, ppl: 13.720954 +epoch: 2, batch: 6348, sum loss: 4382.467285, avg loss: 2.683691, ppl: 14.639023 +epoch: 2, batch: 6349, sum loss: 4205.431641, avg loss: 2.680326, ppl: 14.589849 +epoch: 2, batch: 6350, sum loss: 5039.882324, avg loss: 2.795276, ppl: 16.367144 +epoch: 2, batch: 6351, sum loss: 5172.710938, avg loss: 2.893015, ppl: 18.047644 +epoch: 2, batch: 6352, sum loss: 4318.642578, avg loss: 2.379417, ppl: 10.798610 +epoch: 2, batch: 6353, sum loss: 4578.182617, avg loss: 2.599763, ppl: 13.460546 +epoch: 2, batch: 6354, sum loss: 3616.061523, avg loss: 2.272823, ppl: 9.706765 +epoch: 2, batch: 6355, sum loss: 4603.306641, avg loss: 2.627458, ppl: 13.838552 +epoch: 2, batch: 6356, sum loss: 3687.116211, avg loss: 2.467949, ppl: 11.798225 +epoch: 2, batch: 6357, sum loss: 3878.112305, avg loss: 2.474864, ppl: 11.880097 +epoch: 2, batch: 6358, sum loss: 4040.029053, avg loss: 2.390550, ppl: 10.919495 +epoch: 2, batch: 6359, sum loss: 3724.314941, avg loss: 2.432603, ppl: 11.388487 +epoch: 2, batch: 6360, sum loss: 4363.241211, avg loss: 2.551603, ppl: 12.827651 +epoch: 2, batch: 6361, sum loss: 4847.095215, avg loss: 2.418710, ppl: 11.231364 +epoch: 2, batch: 6362, sum loss: 3584.892578, avg loss: 2.274678, ppl: 9.724787 +epoch: 2, batch: 6363, sum loss: 4413.407227, avg loss: 2.419631, ppl: 11.241714 +epoch: 2, batch: 6364, sum loss: 4005.873535, avg loss: 2.530558, ppl: 12.560514 +epoch: 2, batch: 6365, sum loss: 4340.090332, avg loss: 2.515995, ppl: 12.378914 +epoch: 2, batch: 6366, sum loss: 3789.395996, avg loss: 2.402915, ppl: 11.055350 +epoch: 2, batch: 6367, sum loss: 4413.926758, avg loss: 2.774310, ppl: 16.027557 +epoch: 2, batch: 6368, sum loss: 4581.651367, avg loss: 2.463253, ppl: 11.742955 +epoch: 2, batch: 6369, sum loss: 5545.512207, avg loss: 2.713068, ppl: 15.075463 +epoch: 2, batch: 6370, sum loss: 3521.713867, avg loss: 2.544591, ppl: 12.738016 +epoch: 2, batch: 6371, sum loss: 3998.635254, avg loss: 2.454656, ppl: 11.642432 +epoch: 2, batch: 6372, sum loss: 3638.079102, avg loss: 2.323167, ppl: 10.207948 +epoch: 2, batch: 6373, sum loss: 4510.762207, avg loss: 2.661217, ppl: 14.313695 +epoch: 2, batch: 6374, sum loss: 5413.679199, avg loss: 2.912146, ppl: 18.396233 +epoch: 2, batch: 6375, sum loss: 4655.103516, avg loss: 2.390911, ppl: 10.923442 +epoch: 2, batch: 6376, sum loss: 3799.174805, avg loss: 2.546364, ppl: 12.760619 +epoch: 2, batch: 6377, sum loss: 4700.914062, avg loss: 2.680110, ppl: 14.586704 +epoch: 2, batch: 6378, sum loss: 3633.497803, avg loss: 2.368643, ppl: 10.682881 +epoch: 2, batch: 6379, sum loss: 4027.649170, avg loss: 2.743630, ppl: 15.543307 +epoch: 2, batch: 6380, sum loss: 4499.067871, avg loss: 2.590137, ppl: 13.331598 +epoch: 2, batch: 6381, sum loss: 4584.015625, avg loss: 2.800254, ppl: 16.448822 +epoch: 2, batch: 6382, sum loss: 4154.929199, avg loss: 2.367481, ppl: 10.670480 +epoch: 2, batch: 6383, sum loss: 3909.738281, avg loss: 2.557056, ppl: 12.897790 +epoch: 2, batch: 6384, sum loss: 4572.579102, avg loss: 2.678722, ppl: 14.566471 +epoch: 2, batch: 6385, sum loss: 4849.867188, avg loss: 2.594899, ppl: 13.395240 +epoch: 2, batch: 6386, sum loss: 4101.028809, avg loss: 2.598878, ppl: 13.448636 +epoch: 2, batch: 6387, sum loss: 3718.653076, avg loss: 2.526259, ppl: 12.506631 +epoch: 2, batch: 6388, sum loss: 4189.860840, avg loss: 2.553236, ppl: 12.848618 +epoch: 2, batch: 6389, sum loss: 4317.023438, avg loss: 2.454249, ppl: 11.637687 +epoch: 2, batch: 6390, sum loss: 4263.655273, avg loss: 2.560754, ppl: 12.945576 +epoch: 2, batch: 6391, sum loss: 3987.464844, avg loss: 2.705200, ppl: 14.957307 +epoch: 2, batch: 6392, sum loss: 4524.800781, avg loss: 2.714338, ppl: 15.094608 +epoch: 2, batch: 6393, sum loss: 3690.122559, avg loss: 2.294852, ppl: 9.922970 +epoch: 2, batch: 6394, sum loss: 4305.756836, avg loss: 2.497539, ppl: 12.152551 +epoch: 2, batch: 6395, sum loss: 4630.105957, avg loss: 2.644264, ppl: 14.073083 +epoch: 2, batch: 6396, sum loss: 4166.637207, avg loss: 2.575177, ppl: 13.133647 +epoch: 2, batch: 6397, sum loss: 3913.245361, avg loss: 2.397822, ppl: 10.999193 +epoch: 2, batch: 6398, sum loss: 3471.891113, avg loss: 2.469339, ppl: 11.814639 +epoch: 2, batch: 6399, sum loss: 3945.173584, avg loss: 2.423325, ppl: 11.283318 +epoch: 2, batch: 6400, sum loss: 5225.660645, avg loss: 2.934116, ppl: 18.804874 +epoch: 2, batch: 6401, sum loss: 4870.357910, avg loss: 2.604470, ppl: 13.524050 +epoch: 2, batch: 6402, sum loss: 5849.058105, avg loss: 2.657455, ppl: 14.259948 +epoch: 2, batch: 6403, sum loss: 4332.404297, avg loss: 2.563553, ppl: 12.981858 +epoch: 2, batch: 6404, sum loss: 4823.653809, avg loss: 2.788239, ppl: 16.252378 +epoch: 2, batch: 6405, sum loss: 5559.301270, avg loss: 2.677891, ppl: 14.554362 +epoch: 2, batch: 6406, sum loss: 4566.629883, avg loss: 2.813697, ppl: 16.671432 +epoch: 2, batch: 6407, sum loss: 4920.661133, avg loss: 2.583024, ppl: 13.237110 +epoch: 2, batch: 6408, sum loss: 4763.842773, avg loss: 2.584831, ppl: 13.261042 +epoch: 2, batch: 6409, sum loss: 4375.505859, avg loss: 2.615365, ppl: 13.672210 +epoch: 2, batch: 6410, sum loss: 5408.459473, avg loss: 2.834622, ppl: 17.023970 +epoch: 2, batch: 6411, sum loss: 3791.411621, avg loss: 2.407245, ppl: 11.103333 +epoch: 2, batch: 6412, sum loss: 4746.384277, avg loss: 2.687647, ppl: 14.697051 +epoch: 2, batch: 6413, sum loss: 4806.512207, avg loss: 2.859317, ppl: 17.449606 +epoch: 2, batch: 6414, sum loss: 4185.317871, avg loss: 2.630621, ppl: 13.882387 +epoch: 2, batch: 6415, sum loss: 4669.517090, avg loss: 2.588424, ppl: 13.308780 +epoch: 2, batch: 6416, sum loss: 4344.896484, avg loss: 2.482798, ppl: 11.974721 +epoch: 2, batch: 6417, sum loss: 3337.461670, avg loss: 2.182774, ppl: 8.870883 +epoch: 2, batch: 6418, sum loss: 4181.990234, avg loss: 2.592678, ppl: 13.365520 +epoch: 2, batch: 6419, sum loss: 4407.708008, avg loss: 2.411219, ppl: 11.147540 +epoch: 2, batch: 6420, sum loss: 3081.931885, avg loss: 2.372542, ppl: 10.724619 +epoch: 2, batch: 6421, sum loss: 4676.243652, avg loss: 2.519528, ppl: 12.422730 +epoch: 2, batch: 6422, sum loss: 4049.160889, avg loss: 2.343264, ppl: 10.415180 +epoch: 2, batch: 6423, sum loss: 3873.799072, avg loss: 2.497614, ppl: 12.153457 +epoch: 2, batch: 6424, sum loss: 3601.259277, avg loss: 2.466616, ppl: 11.782506 +epoch: 2, batch: 6425, sum loss: 4276.277832, avg loss: 2.534842, ppl: 12.614435 +epoch: 2, batch: 6426, sum loss: 3702.571533, avg loss: 2.419981, ppl: 11.245651 +epoch: 2, batch: 6427, sum loss: 3714.424316, avg loss: 2.438887, ppl: 11.460275 +epoch: 2, batch: 6428, sum loss: 4508.925781, avg loss: 2.703193, ppl: 14.927318 +epoch: 2, batch: 6429, sum loss: 4464.715820, avg loss: 2.705889, ppl: 14.967609 +epoch: 2, batch: 6430, sum loss: 5454.932129, avg loss: 2.860478, ppl: 17.469883 +epoch: 2, batch: 6431, sum loss: 4287.384766, avg loss: 2.789450, ppl: 16.272070 +epoch: 2, batch: 6432, sum loss: 4110.854492, avg loss: 2.523545, ppl: 12.472732 +epoch: 2, batch: 6433, sum loss: 4237.846680, avg loss: 2.759015, ppl: 15.784286 +epoch: 2, batch: 6434, sum loss: 4654.895508, avg loss: 2.676766, ppl: 14.537996 +epoch: 2, batch: 6435, sum loss: 3955.828125, avg loss: 2.578767, ppl: 13.180871 +epoch: 2, batch: 6436, sum loss: 4442.640137, avg loss: 2.525662, ppl: 12.499172 +epoch: 2, batch: 6437, sum loss: 4128.143066, avg loss: 2.535714, ppl: 12.625447 +epoch: 2, batch: 6438, sum loss: 4102.821777, avg loss: 2.540447, ppl: 12.685340 +epoch: 2, batch: 6439, sum loss: 4748.705078, avg loss: 2.593504, ppl: 13.376557 +epoch: 2, batch: 6440, sum loss: 4668.466797, avg loss: 2.619791, ppl: 13.732850 +epoch: 2, batch: 6441, sum loss: 5055.445312, avg loss: 2.616690, ppl: 13.690335 +epoch: 2, batch: 6442, sum loss: 4472.613281, avg loss: 2.604900, ppl: 13.529874 +epoch: 2, batch: 6443, sum loss: 4979.035645, avg loss: 2.747812, ppl: 15.608447 +epoch: 2, batch: 6444, sum loss: 4139.382812, avg loss: 2.403823, ppl: 11.065395 +epoch: 2, batch: 6445, sum loss: 4200.480469, avg loss: 2.482553, ppl: 11.971795 +epoch: 2, batch: 6446, sum loss: 4955.394531, avg loss: 2.841396, ppl: 17.139673 +epoch: 2, batch: 6447, sum loss: 4191.468262, avg loss: 2.483097, ppl: 11.978308 +epoch: 2, batch: 6448, sum loss: 4018.824707, avg loss: 2.415159, ppl: 11.191552 +epoch: 2, batch: 6449, sum loss: 4525.901855, avg loss: 2.462406, ppl: 11.733004 +epoch: 2, batch: 6450, sum loss: 4589.635742, avg loss: 2.306350, ppl: 10.037715 +epoch: 2, batch: 6451, sum loss: 4937.835449, avg loss: 2.802404, ppl: 16.484226 +epoch: 2, batch: 6452, sum loss: 4507.559570, avg loss: 2.796253, ppl: 16.383139 +epoch: 2, batch: 6453, sum loss: 4416.307617, avg loss: 2.641332, ppl: 14.031887 +epoch: 2, batch: 6454, sum loss: 4103.863281, avg loss: 2.568125, ppl: 13.041346 +epoch: 2, batch: 6455, sum loss: 5200.661133, avg loss: 2.705859, ppl: 14.967171 +epoch: 2, batch: 6456, sum loss: 3682.339355, avg loss: 2.494810, ppl: 12.119429 +epoch: 2, batch: 6457, sum loss: 4593.576172, avg loss: 2.748998, ppl: 15.626972 +epoch: 2, batch: 6458, sum loss: 3751.949707, avg loss: 2.386736, ppl: 10.877934 +epoch: 2, batch: 6459, sum loss: 5118.078125, avg loss: 2.726733, ppl: 15.282880 +epoch: 2, batch: 6460, sum loss: 3873.050049, avg loss: 2.315033, ppl: 10.125257 +epoch: 2, batch: 6461, sum loss: 3903.142090, avg loss: 2.475043, ppl: 11.882213 +epoch: 2, batch: 6462, sum loss: 4092.357910, avg loss: 2.253501, ppl: 9.521010 +epoch: 2, batch: 6463, sum loss: 4330.852539, avg loss: 2.452351, ppl: 11.615627 +epoch: 2, batch: 6464, sum loss: 4646.781250, avg loss: 2.703189, ppl: 14.927254 +epoch: 2, batch: 6465, sum loss: 3947.584473, avg loss: 2.647609, ppl: 14.120234 +epoch: 2, batch: 6466, sum loss: 4188.687500, avg loss: 2.370508, ppl: 10.702827 +epoch: 2, batch: 6467, sum loss: 4049.879883, avg loss: 2.638358, ppl: 13.990214 +epoch: 2, batch: 6468, sum loss: 4254.041992, avg loss: 2.408857, ppl: 11.121246 +epoch: 2, batch: 6469, sum loss: 4273.608398, avg loss: 2.609040, ppl: 13.586009 +epoch: 2, batch: 6470, sum loss: 4112.027344, avg loss: 2.610811, ppl: 13.610085 +epoch: 2, batch: 6471, sum loss: 4874.503906, avg loss: 2.771179, ppl: 15.977464 +epoch: 2, batch: 6472, sum loss: 3244.448730, avg loss: 2.579053, ppl: 13.184649 +epoch: 2, batch: 6473, sum loss: 2997.238525, avg loss: 2.231749, ppl: 9.316144 +epoch: 2, batch: 6474, sum loss: 5377.333984, avg loss: 2.941649, ppl: 18.947060 +epoch: 2, batch: 6475, sum loss: 5055.449219, avg loss: 2.786907, ppl: 16.230740 +epoch: 2, batch: 6476, sum loss: 3449.750244, avg loss: 2.343580, ppl: 10.418473 +epoch: 2, batch: 6477, sum loss: 4144.497070, avg loss: 2.472850, ppl: 11.856193 +epoch: 2, batch: 6478, sum loss: 3608.014404, avg loss: 2.345913, ppl: 10.442804 +epoch: 2, batch: 6479, sum loss: 4940.662109, avg loss: 2.702769, ppl: 14.920995 +epoch: 2, batch: 6480, sum loss: 4227.257812, avg loss: 2.495430, ppl: 12.126941 +epoch: 2, batch: 6481, sum loss: 4577.840820, avg loss: 2.680235, ppl: 14.588516 +epoch: 2, batch: 6482, sum loss: 3805.335449, avg loss: 2.362095, ppl: 10.613167 +epoch: 2, batch: 6483, sum loss: 4410.187988, avg loss: 2.521548, ppl: 12.447854 +epoch: 2, batch: 6484, sum loss: 3429.349609, avg loss: 2.331305, ppl: 10.291364 +epoch: 2, batch: 6485, sum loss: 3899.402100, avg loss: 2.474240, ppl: 11.872679 +epoch: 2, batch: 6486, sum loss: 4385.343750, avg loss: 2.564528, ppl: 12.994530 +epoch: 2, batch: 6487, sum loss: 4839.017090, avg loss: 2.513775, ppl: 12.351470 +epoch: 2, batch: 6488, sum loss: 4382.916992, avg loss: 2.621362, ppl: 13.754444 +epoch: 2, batch: 6489, sum loss: 4129.323242, avg loss: 2.667521, ppl: 14.404223 +epoch: 2, batch: 6490, sum loss: 4780.795898, avg loss: 2.596847, ppl: 13.421358 +epoch: 2, batch: 6491, sum loss: 4184.234375, avg loss: 2.414446, ppl: 11.183569 +epoch: 2, batch: 6492, sum loss: 3894.249512, avg loss: 2.509181, ppl: 12.294863 +epoch: 2, batch: 6493, sum loss: 4183.604004, avg loss: 2.517211, ppl: 12.393978 +epoch: 2, batch: 6494, sum loss: 4410.822266, avg loss: 2.549608, ppl: 12.802088 +epoch: 2, batch: 6495, sum loss: 4754.843750, avg loss: 2.855762, ppl: 17.387682 +epoch: 2, batch: 6496, sum loss: 4445.387207, avg loss: 2.511518, ppl: 12.323627 +epoch: 2, batch: 6497, sum loss: 4692.426270, avg loss: 2.692155, ppl: 14.763458 +epoch: 2, batch: 6498, sum loss: 4517.139648, avg loss: 2.781490, ppl: 16.143053 +epoch: 2, batch: 6499, sum loss: 4350.177246, avg loss: 2.529173, ppl: 12.543127 +epoch: 2, batch: 6500, sum loss: 4690.937012, avg loss: 2.509865, ppl: 12.303264 +epoch: 2, batch: 6501, sum loss: 3647.562988, avg loss: 2.315913, ppl: 10.134171 +epoch: 2, batch: 6502, sum loss: 4458.431641, avg loss: 2.503331, ppl: 12.223138 +epoch: 2, batch: 6503, sum loss: 4399.408203, avg loss: 2.563758, ppl: 12.984517 +epoch: 2, batch: 6504, sum loss: 2777.191162, avg loss: 2.022717, ppl: 7.558838 +epoch: 2, batch: 6505, sum loss: 4163.347656, avg loss: 2.350846, ppl: 10.494440 +epoch: 2, batch: 6506, sum loss: 3649.340820, avg loss: 2.213063, ppl: 9.143681 +epoch: 2, batch: 6507, sum loss: 3850.450684, avg loss: 2.356457, ppl: 10.553494 +epoch: 2, batch: 6508, sum loss: 4699.249023, avg loss: 2.689896, ppl: 14.730149 +epoch: 2, batch: 6509, sum loss: 4004.803955, avg loss: 2.433052, ppl: 11.393606 +epoch: 2, batch: 6510, sum loss: 3721.521484, avg loss: 2.519649, ppl: 12.424235 +epoch: 2, batch: 6511, sum loss: 4065.908691, avg loss: 2.500559, ppl: 12.189304 +epoch: 2, batch: 6512, sum loss: 4839.959473, avg loss: 2.474417, ppl: 11.874779 +epoch: 2, batch: 6513, sum loss: 3741.374512, avg loss: 2.519444, ppl: 12.421688 +epoch: 2, batch: 6514, sum loss: 4364.368652, avg loss: 2.645072, ppl: 14.084459 +epoch: 2, batch: 6515, sum loss: 3691.779785, avg loss: 2.383331, ppl: 10.840955 +epoch: 2, batch: 6516, sum loss: 4715.997559, avg loss: 2.622913, ppl: 13.775792 +epoch: 2, batch: 6517, sum loss: 3216.245605, avg loss: 2.318851, ppl: 10.163984 +epoch: 2, batch: 6518, sum loss: 5102.183594, avg loss: 2.780482, ppl: 16.126785 +epoch: 2, batch: 6519, sum loss: 3819.193604, avg loss: 2.614096, ppl: 13.654862 +epoch: 2, batch: 6520, sum loss: 4476.600586, avg loss: 2.413262, ppl: 11.170336 +epoch: 2, batch: 6521, sum loss: 5180.833496, avg loss: 2.598211, ppl: 13.439677 +epoch: 2, batch: 6522, sum loss: 3889.245361, avg loss: 2.514056, ppl: 12.354946 +epoch: 2, batch: 6523, sum loss: 4178.174805, avg loss: 2.595140, ppl: 13.398457 +epoch: 2, batch: 6524, sum loss: 4707.008301, avg loss: 2.657825, ppl: 14.265228 +epoch: 2, batch: 6525, sum loss: 3653.968262, avg loss: 2.447400, ppl: 11.558257 +epoch: 2, batch: 6526, sum loss: 3595.869385, avg loss: 2.254464, ppl: 9.530181 +epoch: 2, batch: 6527, sum loss: 4983.275879, avg loss: 2.456026, ppl: 11.658384 +epoch: 2, batch: 6528, sum loss: 4110.530762, avg loss: 2.621512, ppl: 13.756507 +epoch: 2, batch: 6529, sum loss: 3716.408447, avg loss: 2.142022, ppl: 8.516642 +epoch: 2, batch: 6530, sum loss: 3348.930420, avg loss: 2.340273, ppl: 10.384070 +epoch: 2, batch: 6531, sum loss: 4435.319824, avg loss: 2.549034, ppl: 12.794743 +epoch: 2, batch: 6532, sum loss: 4199.712891, avg loss: 2.723549, ppl: 15.234295 +epoch: 2, batch: 6533, sum loss: 4760.444336, avg loss: 2.678922, ppl: 14.569378 +epoch: 2, batch: 6534, sum loss: 4063.411865, avg loss: 2.523858, ppl: 12.476643 +epoch: 2, batch: 6535, sum loss: 5505.998535, avg loss: 2.796343, ppl: 16.384613 +epoch: 2, batch: 6536, sum loss: 4511.807129, avg loss: 2.498232, ppl: 12.160975 +epoch: 2, batch: 6537, sum loss: 4014.489258, avg loss: 2.603430, ppl: 13.509998 +epoch: 2, batch: 6538, sum loss: 4830.545410, avg loss: 2.463307, ppl: 11.743587 +epoch: 2, batch: 6539, sum loss: 4351.068359, avg loss: 2.470794, ppl: 11.831837 +epoch: 2, batch: 6540, sum loss: 3988.218506, avg loss: 2.344632, ppl: 10.429431 +epoch: 2, batch: 6541, sum loss: 5243.436035, avg loss: 2.843512, ppl: 17.175983 +epoch: 2, batch: 6542, sum loss: 4260.913086, avg loss: 2.455858, ppl: 11.656427 +epoch: 2, batch: 6543, sum loss: 3906.658203, avg loss: 2.283260, ppl: 9.808608 +epoch: 2, batch: 6544, sum loss: 4565.427246, avg loss: 2.563407, ppl: 12.979961 +epoch: 2, batch: 6545, sum loss: 3300.866211, avg loss: 2.383297, ppl: 10.840585 +epoch: 2, batch: 6546, sum loss: 5084.892090, avg loss: 2.947764, ppl: 19.063274 +epoch: 2, batch: 6547, sum loss: 4141.150391, avg loss: 2.499186, ppl: 12.172576 +epoch: 2, batch: 6548, sum loss: 4493.281250, avg loss: 2.535712, ppl: 12.625414 +epoch: 2, batch: 6549, sum loss: 3748.140137, avg loss: 2.457797, ppl: 11.679052 +epoch: 2, batch: 6550, sum loss: 3701.072266, avg loss: 2.333589, ppl: 10.314897 +epoch: 2, batch: 6551, sum loss: 3990.863770, avg loss: 2.466541, ppl: 11.781627 +epoch: 2, batch: 6552, sum loss: 3995.007812, avg loss: 2.542971, ppl: 12.717403 +epoch: 2, batch: 6553, sum loss: 4409.819824, avg loss: 2.626456, ppl: 13.824689 +epoch: 2, batch: 6554, sum loss: 4293.983398, avg loss: 2.460735, ppl: 11.713422 +epoch: 2, batch: 6555, sum loss: 4561.443848, avg loss: 2.416019, ppl: 11.201179 +epoch: 2, batch: 6556, sum loss: 3580.075684, avg loss: 2.422243, ppl: 11.271116 +epoch: 2, batch: 6557, sum loss: 4632.155273, avg loss: 2.423943, ppl: 11.290290 +epoch: 2, batch: 6558, sum loss: 4535.716309, avg loss: 2.701439, ppl: 14.901161 +epoch: 2, batch: 6559, sum loss: 3428.736572, avg loss: 2.230798, ppl: 9.307290 +epoch: 2, batch: 6560, sum loss: 5161.551270, avg loss: 2.899748, ppl: 18.169563 +epoch: 2, batch: 6561, sum loss: 4352.385742, avg loss: 2.531929, ppl: 12.577745 +epoch: 2, batch: 6562, sum loss: 4597.931641, avg loss: 2.580209, ppl: 13.199894 +epoch: 2, batch: 6563, sum loss: 4318.030762, avg loss: 2.503206, ppl: 12.221620 +epoch: 2, batch: 6564, sum loss: 4216.059082, avg loss: 2.481495, ppl: 11.959126 +epoch: 2, batch: 6565, sum loss: 4156.218750, avg loss: 2.279879, ppl: 9.775496 +epoch: 2, batch: 6566, sum loss: 3751.977295, avg loss: 2.461927, ppl: 11.727393 +epoch: 2, batch: 6567, sum loss: 3596.097900, avg loss: 2.336646, ppl: 10.346475 +epoch: 2, batch: 6568, sum loss: 4744.278809, avg loss: 2.531632, ppl: 12.574013 +epoch: 2, batch: 6569, sum loss: 4092.153320, avg loss: 2.409984, ppl: 11.133787 +epoch: 2, batch: 6570, sum loss: 4165.048828, avg loss: 2.663075, ppl: 14.340314 +epoch: 2, batch: 6571, sum loss: 4520.723633, avg loss: 2.653007, ppl: 14.196661 +epoch: 2, batch: 6572, sum loss: 3842.826660, avg loss: 2.577349, ppl: 13.162195 +epoch: 2, batch: 6573, sum loss: 3351.244873, avg loss: 2.187497, ppl: 8.912873 +epoch: 2, batch: 6574, sum loss: 4516.347656, avg loss: 2.758917, ppl: 15.782746 +epoch: 2, batch: 6575, sum loss: 4113.264648, avg loss: 2.451290, ppl: 11.603304 +epoch: 2, batch: 6576, sum loss: 3706.729980, avg loss: 2.511335, ppl: 12.321363 +epoch: 2, batch: 6577, sum loss: 4074.660156, avg loss: 2.600294, ppl: 13.467695 +epoch: 2, batch: 6578, sum loss: 4903.529785, avg loss: 2.832773, ppl: 16.992512 +epoch: 2, batch: 6579, sum loss: 5040.716797, avg loss: 2.723240, ppl: 15.229585 +epoch: 2, batch: 6580, sum loss: 4913.274902, avg loss: 2.634464, ppl: 13.935838 +epoch: 2, batch: 6581, sum loss: 3984.611816, avg loss: 2.323389, ppl: 10.210217 +epoch: 2, batch: 6582, sum loss: 4140.541504, avg loss: 2.623917, ppl: 13.789637 +epoch: 2, batch: 6583, sum loss: 4098.646973, avg loss: 2.358255, ppl: 10.572485 +epoch: 2, batch: 6584, sum loss: 4236.729004, avg loss: 2.427925, ppl: 11.335336 +epoch: 2, batch: 6585, sum loss: 4008.281006, avg loss: 2.471197, ppl: 11.836602 +epoch: 2, batch: 6586, sum loss: 4853.086914, avg loss: 2.738762, ppl: 15.467830 +epoch: 2, batch: 6587, sum loss: 4440.634277, avg loss: 2.590802, ppl: 13.340463 +epoch: 2, batch: 6588, sum loss: 5330.890625, avg loss: 2.974828, ppl: 19.586245 +epoch: 2, batch: 6589, sum loss: 4276.275391, avg loss: 2.556052, ppl: 12.884851 +epoch: 2, batch: 6590, sum loss: 2921.443359, avg loss: 2.275268, ppl: 9.730522 +epoch: 2, batch: 6591, sum loss: 4743.782715, avg loss: 2.424007, ppl: 11.291017 +epoch: 2, batch: 6592, sum loss: 4622.791992, avg loss: 2.573938, ppl: 13.117374 +epoch: 2, batch: 6593, sum loss: 4646.586914, avg loss: 2.804217, ppl: 16.514133 +epoch: 2, batch: 6594, sum loss: 4364.540039, avg loss: 2.753653, ppl: 15.699880 +epoch: 2, batch: 6595, sum loss: 4626.799805, avg loss: 2.758974, ppl: 15.783642 +epoch: 2, batch: 6596, sum loss: 5081.259766, avg loss: 2.721617, ppl: 15.204896 +epoch: 2, batch: 6597, sum loss: 4159.076660, avg loss: 2.372548, ppl: 10.724685 +epoch: 2, batch: 6598, sum loss: 4029.765869, avg loss: 2.306678, ppl: 10.041009 +epoch: 2, batch: 6599, sum loss: 4026.416748, avg loss: 2.524399, ppl: 12.483394 +epoch: 2, batch: 6600, sum loss: 4622.653320, avg loss: 2.561027, ppl: 12.949107 +epoch: 2, batch: 6601, sum loss: 5086.170898, avg loss: 2.718424, ppl: 15.156415 +epoch: 2, batch: 6602, sum loss: 4690.273926, avg loss: 2.723736, ppl: 15.237146 +epoch: 2, batch: 6603, sum loss: 4350.230469, avg loss: 2.698654, ppl: 14.859719 +epoch: 2, batch: 6604, sum loss: 4241.001953, avg loss: 2.542567, ppl: 12.712265 +epoch: 2, batch: 6605, sum loss: 5305.980469, avg loss: 2.848084, ppl: 17.254690 +epoch: 2, batch: 6606, sum loss: 4590.271484, avg loss: 2.678105, ppl: 14.557475 +epoch: 2, batch: 6607, sum loss: 4109.830078, avg loss: 2.318009, ppl: 10.155437 +epoch: 2, batch: 6608, sum loss: 4490.192383, avg loss: 2.606032, ppl: 13.545192 +epoch: 2, batch: 6609, sum loss: 4225.776367, avg loss: 2.503422, ppl: 12.224255 +epoch: 2, batch: 6610, sum loss: 4164.507324, avg loss: 2.511766, ppl: 12.326676 +epoch: 2, batch: 6611, sum loss: 4692.756836, avg loss: 2.631944, ppl: 13.900772 +epoch: 2, batch: 6612, sum loss: 4240.813477, avg loss: 2.653826, ppl: 14.208292 +epoch: 2, batch: 6613, sum loss: 4040.483398, avg loss: 2.392234, ppl: 10.937902 +epoch: 2, batch: 6614, sum loss: 4527.316895, avg loss: 2.543437, ppl: 12.723320 +epoch: 2, batch: 6615, sum loss: 3477.523193, avg loss: 2.341766, ppl: 10.399590 +epoch: 2, batch: 6616, sum loss: 4380.395508, avg loss: 2.352522, ppl: 10.512047 +epoch: 2, batch: 6617, sum loss: 3908.822754, avg loss: 2.343419, ppl: 10.416792 +epoch: 2, batch: 6618, sum loss: 5126.426758, avg loss: 2.476535, ppl: 11.899958 +epoch: 2, batch: 6619, sum loss: 3950.440918, avg loss: 2.715080, ppl: 15.105812 +epoch: 2, batch: 6620, sum loss: 4020.606934, avg loss: 2.528684, ppl: 12.536992 +epoch: 2, batch: 6621, sum loss: 3889.177002, avg loss: 2.458392, ppl: 11.686009 +epoch: 2, batch: 6622, sum loss: 4223.963379, avg loss: 2.620325, ppl: 13.740183 +epoch: 2, batch: 6623, sum loss: 4169.443848, avg loss: 2.596167, ppl: 13.412225 +epoch: 2, batch: 6624, sum loss: 5126.881348, avg loss: 2.857794, ppl: 17.423042 +epoch: 2, batch: 6625, sum loss: 3689.367188, avg loss: 2.290110, ppl: 9.876025 +epoch: 2, batch: 6626, sum loss: 4239.834473, avg loss: 2.680047, ppl: 14.585779 +epoch: 2, batch: 6627, sum loss: 4241.313965, avg loss: 2.526095, ppl: 12.504582 +epoch: 2, batch: 6628, sum loss: 4619.301758, avg loss: 2.690333, ppl: 14.736585 +epoch: 2, batch: 6629, sum loss: 4259.956055, avg loss: 2.662472, ppl: 14.331680 +epoch: 2, batch: 6630, sum loss: 4375.827637, avg loss: 2.819477, ppl: 16.768076 +epoch: 2, batch: 6631, sum loss: 4112.065430, avg loss: 2.365976, ppl: 10.654428 +epoch: 2, batch: 6632, sum loss: 3437.791260, avg loss: 2.233783, ppl: 9.335110 +epoch: 2, batch: 6633, sum loss: 3264.875000, avg loss: 2.240820, ppl: 9.401038 +epoch: 2, batch: 6634, sum loss: 5065.123535, avg loss: 2.873014, ppl: 17.690256 +epoch: 2, batch: 6635, sum loss: 3669.967041, avg loss: 2.309608, ppl: 10.070477 +epoch: 2, batch: 6636, sum loss: 3915.372559, avg loss: 2.711477, ppl: 15.051487 +epoch: 2, batch: 6637, sum loss: 4539.449219, avg loss: 2.551686, ppl: 12.828712 +epoch: 2, batch: 6638, sum loss: 4963.296875, avg loss: 2.740639, ppl: 15.496884 +epoch: 2, batch: 6639, sum loss: 4644.724609, avg loss: 2.570406, ppl: 13.071136 +epoch: 2, batch: 6640, sum loss: 4156.501465, avg loss: 2.543759, ppl: 12.727419 +epoch: 2, batch: 6641, sum loss: 3560.071777, avg loss: 2.470556, ppl: 11.829028 +epoch: 2, batch: 6642, sum loss: 3799.514160, avg loss: 2.321023, ppl: 10.186087 +epoch: 2, batch: 6643, sum loss: 4769.870605, avg loss: 2.511780, ppl: 12.326856 +epoch: 2, batch: 6644, sum loss: 4256.686523, avg loss: 2.667097, ppl: 14.398108 +epoch: 2, batch: 6645, sum loss: 4160.541504, avg loss: 2.595472, ppl: 13.402910 +epoch: 2, batch: 6646, sum loss: 4968.062500, avg loss: 2.665270, ppl: 14.371824 +epoch: 2, batch: 6647, sum loss: 4255.034668, avg loss: 2.557112, ppl: 12.898516 +epoch: 2, batch: 6648, sum loss: 4673.255859, avg loss: 2.491075, ppl: 12.074244 +epoch: 2, batch: 6649, sum loss: 4166.834961, avg loss: 2.298310, ppl: 9.957336 +epoch: 2, batch: 6650, sum loss: 3690.186035, avg loss: 2.345954, ppl: 10.443233 +epoch: 2, batch: 6651, sum loss: 4458.501465, avg loss: 2.399624, ppl: 11.019031 +epoch: 2, batch: 6652, sum loss: 3334.347900, avg loss: 2.257514, ppl: 9.559296 +epoch: 2, batch: 6653, sum loss: 4232.295410, avg loss: 2.493987, ppl: 12.109459 +epoch: 2, batch: 6654, sum loss: 4261.372070, avg loss: 2.511121, ppl: 12.318732 +epoch: 2, batch: 6655, sum loss: 4529.055176, avg loss: 2.718520, ppl: 15.157878 +epoch: 2, batch: 6656, sum loss: 4560.391113, avg loss: 2.645239, ppl: 14.086806 +epoch: 2, batch: 6657, sum loss: 3542.407715, avg loss: 2.330532, ppl: 10.283406 +epoch: 2, batch: 6658, sum loss: 4728.501953, avg loss: 2.662445, ppl: 14.331284 +epoch: 2, batch: 6659, sum loss: 4422.809570, avg loss: 2.620148, ppl: 13.737756 +epoch: 2, batch: 6660, sum loss: 4525.514648, avg loss: 2.545284, ppl: 12.746848 +epoch: 2, batch: 6661, sum loss: 3907.186523, avg loss: 2.405903, ppl: 11.088440 +epoch: 2, batch: 6662, sum loss: 3025.986328, avg loss: 2.110172, ppl: 8.249659 +epoch: 2, batch: 6663, sum loss: 4129.861816, avg loss: 2.512081, ppl: 12.330568 +epoch: 2, batch: 6664, sum loss: 4530.069824, avg loss: 2.660053, ppl: 14.297044 +epoch: 2, batch: 6665, sum loss: 4753.059570, avg loss: 2.639122, ppl: 14.000913 +epoch: 2, batch: 6666, sum loss: 4031.581299, avg loss: 2.417015, ppl: 11.212344 +epoch: 2, batch: 6667, sum loss: 4306.416016, avg loss: 2.546668, ppl: 12.764505 +epoch: 2, batch: 6668, sum loss: 4260.685547, avg loss: 2.419469, ppl: 11.239893 +epoch: 2, batch: 6669, sum loss: 4009.364746, avg loss: 2.362619, ppl: 10.618728 +epoch: 2, batch: 6670, sum loss: 4610.396484, avg loss: 2.794180, ppl: 16.349211 +epoch: 2, batch: 6671, sum loss: 4479.548828, avg loss: 2.441171, ppl: 11.486485 +epoch: 2, batch: 6672, sum loss: 3989.499023, avg loss: 2.270631, ppl: 9.685512 +epoch: 2, batch: 6673, sum loss: 4139.198242, avg loss: 2.604908, ppl: 13.529977 +epoch: 2, batch: 6674, sum loss: 4232.066406, avg loss: 2.457646, ppl: 11.677290 +epoch: 2, batch: 6675, sum loss: 3995.332520, avg loss: 2.365502, ppl: 10.649383 +epoch: 2, batch: 6676, sum loss: 3541.152100, avg loss: 2.191307, ppl: 8.946899 +epoch: 2, batch: 6677, sum loss: 4327.239258, avg loss: 2.350483, ppl: 10.490638 +epoch: 2, batch: 6678, sum loss: 4682.503906, avg loss: 2.561545, ppl: 12.955817 +epoch: 2, batch: 6679, sum loss: 2948.585938, avg loss: 2.177685, ppl: 8.825853 +epoch: 2, batch: 6680, sum loss: 3636.874756, avg loss: 2.290224, ppl: 9.877146 +epoch: 2, batch: 6681, sum loss: 4618.336426, avg loss: 2.664937, ppl: 14.367048 +epoch: 2, batch: 6682, sum loss: 3882.660156, avg loss: 2.269235, ppl: 9.671995 +epoch: 2, batch: 6683, sum loss: 3916.517090, avg loss: 2.452421, ppl: 11.616441 +epoch: 2, batch: 6684, sum loss: 4715.090820, avg loss: 2.556991, ppl: 12.896947 +epoch: 2, batch: 6685, sum loss: 4716.430664, avg loss: 2.437432, ppl: 11.443614 +epoch: 2, batch: 6686, sum loss: 4476.144043, avg loss: 2.651744, ppl: 14.178746 +epoch: 2, batch: 6687, sum loss: 3699.139648, avg loss: 2.297602, ppl: 9.950294 +epoch: 2, batch: 6688, sum loss: 4066.528320, avg loss: 2.345172, ppl: 10.435066 +epoch: 2, batch: 6689, sum loss: 3467.426758, avg loss: 2.275214, ppl: 9.730005 +epoch: 2, batch: 6690, sum loss: 3754.456055, avg loss: 2.555790, ppl: 12.881474 +epoch: 2, batch: 6691, sum loss: 3898.841553, avg loss: 2.553269, ppl: 12.849037 +epoch: 2, batch: 6692, sum loss: 4218.153809, avg loss: 2.624862, ppl: 13.802675 +epoch: 2, batch: 6693, sum loss: 3971.552002, avg loss: 2.631910, ppl: 13.900295 +epoch: 2, batch: 6694, sum loss: 3960.285156, avg loss: 2.661482, ppl: 14.317490 +epoch: 2, batch: 6695, sum loss: 4227.122070, avg loss: 2.454775, ppl: 11.643809 +epoch: 2, batch: 6696, sum loss: 4717.035156, avg loss: 2.593202, ppl: 13.372527 +epoch: 2, batch: 6697, sum loss: 3653.050781, avg loss: 2.420842, ppl: 11.255334 +epoch: 2, batch: 6698, sum loss: 3533.656250, avg loss: 2.402214, ppl: 11.047604 +epoch: 2, batch: 6699, sum loss: 2974.098633, avg loss: 2.427836, ppl: 11.334325 +epoch: 2, batch: 6700, sum loss: 3963.929443, avg loss: 2.471278, ppl: 11.837563 +epoch: 2, batch: 6701, sum loss: 4672.759766, avg loss: 2.542307, ppl: 12.708953 +epoch: 2, batch: 6702, sum loss: 4655.157715, avg loss: 2.709638, ppl: 15.023834 +epoch: 2, batch: 6703, sum loss: 3804.948242, avg loss: 2.482028, ppl: 11.965500 +epoch: 2, batch: 6704, sum loss: 4469.748047, avg loss: 2.552683, ppl: 12.841513 +epoch: 2, batch: 6705, sum loss: 3628.917969, avg loss: 2.472015, ppl: 11.846292 +epoch: 2, batch: 6706, sum loss: 3522.014648, avg loss: 2.332460, ppl: 10.303258 +epoch: 2, batch: 6707, sum loss: 4245.735840, avg loss: 2.721626, ppl: 15.205019 +epoch: 2, batch: 6708, sum loss: 4829.032715, avg loss: 2.822345, ppl: 16.816246 +epoch: 2, batch: 6709, sum loss: 3560.664551, avg loss: 2.286875, ppl: 9.844130 +epoch: 2, batch: 6710, sum loss: 3749.521973, avg loss: 2.473300, ppl: 11.861520 +epoch: 2, batch: 6711, sum loss: 5414.799316, avg loss: 2.752821, ppl: 15.686826 +epoch: 2, batch: 6712, sum loss: 4211.400391, avg loss: 2.418955, ppl: 11.234112 +epoch: 2, batch: 6713, sum loss: 4688.409180, avg loss: 2.617760, ppl: 13.704996 +epoch: 2, batch: 6714, sum loss: 4997.813477, avg loss: 2.598967, ppl: 13.449835 +epoch: 2, batch: 6715, sum loss: 4349.330078, avg loss: 2.624822, ppl: 13.802115 +epoch: 2, batch: 6716, sum loss: 3723.409668, avg loss: 2.475671, ppl: 11.889686 +epoch: 2, batch: 6717, sum loss: 4027.370361, avg loss: 2.455714, ppl: 11.654749 +epoch: 2, batch: 6718, sum loss: 3682.543945, avg loss: 2.536187, ppl: 12.631421 +epoch: 2, batch: 6719, sum loss: 3547.537354, avg loss: 2.405110, ppl: 11.079650 +epoch: 2, batch: 6720, sum loss: 4156.281738, avg loss: 2.384556, ppl: 10.854245 +epoch: 2, batch: 6721, sum loss: 4496.532227, avg loss: 2.651257, ppl: 14.171845 +epoch: 2, batch: 6722, sum loss: 4773.450684, avg loss: 2.569134, ppl: 13.054511 +epoch: 2, batch: 6723, sum loss: 4913.660156, avg loss: 2.793440, ppl: 16.337116 +epoch: 2, batch: 6724, sum loss: 4433.088867, avg loss: 2.577377, ppl: 13.162572 +epoch: 2, batch: 6725, sum loss: 3727.454590, avg loss: 2.348743, ppl: 10.472395 +epoch: 2, batch: 6726, sum loss: 3474.702881, avg loss: 2.332016, ppl: 10.298677 +epoch: 2, batch: 6727, sum loss: 3390.131836, avg loss: 2.357533, ppl: 10.564851 +epoch: 2, batch: 6728, sum loss: 4695.476562, avg loss: 2.714148, ppl: 15.091751 +epoch: 2, batch: 6729, sum loss: 4515.287598, avg loss: 2.848762, ppl: 17.266390 +epoch: 2, batch: 6730, sum loss: 4331.674316, avg loss: 2.667287, ppl: 14.400848 +epoch: 2, batch: 6731, sum loss: 4992.451660, avg loss: 2.758261, ppl: 15.772387 +epoch: 2, batch: 6732, sum loss: 4256.213867, avg loss: 2.619209, ppl: 13.724857 +epoch: 2, batch: 6733, sum loss: 4018.114014, avg loss: 2.595681, ppl: 13.405713 +epoch: 2, batch: 6734, sum loss: 4775.965332, avg loss: 2.690685, ppl: 14.741764 +epoch: 2, batch: 6735, sum loss: 2946.404053, avg loss: 2.133529, ppl: 8.444619 +epoch: 2, batch: 6736, sum loss: 3622.847656, avg loss: 2.437986, ppl: 11.449962 +epoch: 2, batch: 6737, sum loss: 4017.354492, avg loss: 2.335671, ppl: 10.336395 +epoch: 2, batch: 6738, sum loss: 5247.658203, avg loss: 2.821322, ppl: 16.799040 +epoch: 2, batch: 6739, sum loss: 4108.689453, avg loss: 2.490115, ppl: 12.062662 +epoch: 2, batch: 6740, sum loss: 4860.954590, avg loss: 2.771354, ppl: 15.980252 +epoch: 2, batch: 6741, sum loss: 4871.385254, avg loss: 2.721444, ppl: 15.202260 +epoch: 2, batch: 6742, sum loss: 4032.134766, avg loss: 2.457120, ppl: 11.671155 +epoch: 2, batch: 6743, sum loss: 4165.823242, avg loss: 2.605268, ppl: 13.534849 +epoch: 2, batch: 6744, sum loss: 3962.824463, avg loss: 2.364454, ppl: 10.638227 +epoch: 2, batch: 6745, sum loss: 3705.745361, avg loss: 2.265126, ppl: 9.632334 +epoch: 2, batch: 6746, sum loss: 4397.084961, avg loss: 2.504035, ppl: 12.231747 +epoch: 2, batch: 6747, sum loss: 4883.287109, avg loss: 2.784086, ppl: 16.185019 +epoch: 2, batch: 6748, sum loss: 4308.875488, avg loss: 2.583259, ppl: 13.240212 +epoch: 2, batch: 6749, sum loss: 5262.428223, avg loss: 2.765333, ppl: 15.884324 +epoch: 2, batch: 6750, sum loss: 4393.810547, avg loss: 2.530997, ppl: 12.566026 +epoch: 2, batch: 6751, sum loss: 4472.835449, avg loss: 2.637285, ppl: 13.975209 +epoch: 2, batch: 6752, sum loss: 4268.108887, avg loss: 2.607275, ppl: 13.562044 +epoch: 2, batch: 6753, sum loss: 3520.044922, avg loss: 2.317343, ppl: 10.148678 +epoch: 2, batch: 6754, sum loss: 4649.253418, avg loss: 2.637126, ppl: 13.972990 +epoch: 2, batch: 6755, sum loss: 4277.872070, avg loss: 2.690485, ppl: 14.738830 +epoch: 2, batch: 6756, sum loss: 4475.108398, avg loss: 2.407266, ppl: 11.103567 +epoch: 2, batch: 6757, sum loss: 3846.295898, avg loss: 2.533792, ppl: 12.601196 +epoch: 2, batch: 6758, sum loss: 3508.162598, avg loss: 2.173583, ppl: 8.789720 +epoch: 2, batch: 6759, sum loss: 3958.958252, avg loss: 2.542684, ppl: 12.713744 +epoch: 2, batch: 6760, sum loss: 3335.058838, avg loss: 2.300041, ppl: 9.974588 +epoch: 2, batch: 6761, sum loss: 3371.226562, avg loss: 2.323381, ppl: 10.210141 +epoch: 2, batch: 6762, sum loss: 4586.468750, avg loss: 2.602990, ppl: 13.504060 +epoch: 2, batch: 6763, sum loss: 3960.902344, avg loss: 2.526086, ppl: 12.504463 +epoch: 2, batch: 6764, sum loss: 4038.374756, avg loss: 2.365773, ppl: 10.652272 +epoch: 2, batch: 6765, sum loss: 4194.030273, avg loss: 2.529572, ppl: 12.548135 +epoch: 2, batch: 6766, sum loss: 4610.719727, avg loss: 2.516768, ppl: 12.388498 +epoch: 2, batch: 6767, sum loss: 5316.961914, avg loss: 2.880261, ppl: 17.818922 +epoch: 2, batch: 6768, sum loss: 3783.616455, avg loss: 2.527466, ppl: 12.521733 +epoch: 2, batch: 6769, sum loss: 3923.703613, avg loss: 2.617548, ppl: 13.702078 +epoch: 2, batch: 6770, sum loss: 3757.821777, avg loss: 2.490273, ppl: 12.064569 +epoch: 2, batch: 6771, sum loss: 3573.810059, avg loss: 2.301230, ppl: 9.986458 +epoch: 2, batch: 6772, sum loss: 3251.309082, avg loss: 2.392428, ppl: 10.940021 +epoch: 2, batch: 6773, sum loss: 4413.233398, avg loss: 2.517532, ppl: 12.397959 +epoch: 2, batch: 6774, sum loss: 4033.835449, avg loss: 2.459656, ppl: 11.700783 +epoch: 2, batch: 6775, sum loss: 5080.999023, avg loss: 2.865764, ppl: 17.562464 +epoch: 2, batch: 6776, sum loss: 3611.543457, avg loss: 2.237635, ppl: 9.371146 +epoch: 2, batch: 6777, sum loss: 4693.382812, avg loss: 2.600212, ppl: 13.466594 +epoch: 2, batch: 6778, sum loss: 4536.629883, avg loss: 2.402876, ppl: 11.054924 +epoch: 2, batch: 6779, sum loss: 4277.603027, avg loss: 2.690316, ppl: 14.736339 +epoch: 2, batch: 6780, sum loss: 3870.135254, avg loss: 2.479267, ppl: 11.932510 +epoch: 2, batch: 6781, sum loss: 4273.332031, avg loss: 2.568108, ppl: 13.041131 +epoch: 2, batch: 6782, sum loss: 3823.048096, avg loss: 2.531820, ppl: 12.576375 +epoch: 2, batch: 6783, sum loss: 3869.823486, avg loss: 2.277707, ppl: 9.754285 +epoch: 2, batch: 6784, sum loss: 3474.380615, avg loss: 2.402753, ppl: 11.053564 +epoch: 2, batch: 6785, sum loss: 4269.392578, avg loss: 2.538283, ppl: 12.657923 +epoch: 2, batch: 6786, sum loss: 4237.578613, avg loss: 2.535954, ppl: 12.628469 +epoch: 2, batch: 6787, sum loss: 4266.434570, avg loss: 2.489168, ppl: 12.051250 +epoch: 2, batch: 6788, sum loss: 3689.581787, avg loss: 2.266328, ppl: 9.643922 +epoch: 2, batch: 6789, sum loss: 4529.749023, avg loss: 2.796141, ppl: 16.381315 +epoch: 2, batch: 6790, sum loss: 4638.039551, avg loss: 2.668607, ppl: 14.419875 +epoch: 2, batch: 6791, sum loss: 4293.666992, avg loss: 2.382723, ppl: 10.834366 +epoch: 2, batch: 6792, sum loss: 4178.791504, avg loss: 2.541844, ppl: 12.703073 +epoch: 2, batch: 6793, sum loss: 4976.372070, avg loss: 2.707493, ppl: 14.991642 +epoch: 2, batch: 6794, sum loss: 3778.291504, avg loss: 2.498870, ppl: 12.168737 +epoch: 2, batch: 6795, sum loss: 4374.006836, avg loss: 2.424616, ppl: 11.297889 +epoch: 2, batch: 6796, sum loss: 3758.937988, avg loss: 2.564078, ppl: 12.988675 +epoch: 2, batch: 6797, sum loss: 5006.700195, avg loss: 2.769192, ppl: 15.945752 +epoch: 2, batch: 6798, sum loss: 4614.405762, avg loss: 2.476868, ppl: 11.903928 +epoch: 2, batch: 6799, sum loss: 3661.348633, avg loss: 2.489020, ppl: 12.049463 +epoch: 2, batch: 6800, sum loss: 5041.786133, avg loss: 2.659170, ppl: 14.284431 +epoch: 2, batch: 6801, sum loss: 4649.740234, avg loss: 2.556207, ppl: 12.886844 +epoch: 2, batch: 6802, sum loss: 3985.854736, avg loss: 2.581512, ppl: 13.217110 +epoch: 2, batch: 6803, sum loss: 6071.769043, avg loss: 3.128165, ppl: 22.832050 +epoch: 2, batch: 6804, sum loss: 3915.355957, avg loss: 2.439474, ppl: 11.467009 +epoch: 2, batch: 6805, sum loss: 3228.971924, avg loss: 2.323001, ppl: 10.206262 +epoch: 2, batch: 6806, sum loss: 4045.003906, avg loss: 2.591290, ppl: 13.346981 +epoch: 2, batch: 6807, sum loss: 3889.431641, avg loss: 2.577490, ppl: 13.164050 +epoch: 2, batch: 6808, sum loss: 4518.107910, avg loss: 2.733278, ppl: 15.383224 +epoch: 2, batch: 6809, sum loss: 4264.599609, avg loss: 2.731966, ppl: 15.363069 +epoch: 2, batch: 6810, sum loss: 3820.244141, avg loss: 2.325164, ppl: 10.228354 +epoch: 2, batch: 6811, sum loss: 3998.428955, avg loss: 2.453024, ppl: 11.623442 +epoch: 2, batch: 6812, sum loss: 3672.114746, avg loss: 2.419048, ppl: 11.235156 +epoch: 2, batch: 6813, sum loss: 4491.346191, avg loss: 2.895774, ppl: 18.097509 +epoch: 2, batch: 6814, sum loss: 3636.886719, avg loss: 2.394264, ppl: 10.960126 +epoch: 2, batch: 6815, sum loss: 3600.343506, avg loss: 2.395438, ppl: 10.973002 +epoch: 2, batch: 6816, sum loss: 4058.539062, avg loss: 2.411491, ppl: 11.150571 +epoch: 2, batch: 6817, sum loss: 5499.476562, avg loss: 2.719820, ppl: 15.177590 +epoch: 2, batch: 6818, sum loss: 5281.771484, avg loss: 2.735252, ppl: 15.413626 +epoch: 2, batch: 6819, sum loss: 3199.145996, avg loss: 2.272121, ppl: 9.699950 +epoch: 2, batch: 6820, sum loss: 5626.510254, avg loss: 2.789544, ppl: 16.273600 +epoch: 2, batch: 6821, sum loss: 4174.976562, avg loss: 2.407714, ppl: 11.108540 +epoch: 2, batch: 6822, sum loss: 3639.152100, avg loss: 2.346326, ppl: 10.447118 +epoch: 2, batch: 6823, sum loss: 3891.163086, avg loss: 2.497537, ppl: 12.152530 +epoch: 2, batch: 6824, sum loss: 3219.439941, avg loss: 2.237276, ppl: 9.367777 +epoch: 2, batch: 6825, sum loss: 4083.494629, avg loss: 2.416269, ppl: 11.203979 +epoch: 2, batch: 6826, sum loss: 4300.962891, avg loss: 2.654916, ppl: 14.223785 +epoch: 2, batch: 6827, sum loss: 5249.832031, avg loss: 2.675755, ppl: 14.523314 +epoch: 2, batch: 6828, sum loss: 4846.582520, avg loss: 2.674714, ppl: 14.508204 +epoch: 2, batch: 6829, sum loss: 4271.620117, avg loss: 2.463449, ppl: 11.745249 +epoch: 2, batch: 6830, sum loss: 3544.295654, avg loss: 2.366018, ppl: 10.654883 +epoch: 2, batch: 6831, sum loss: 4484.832520, avg loss: 2.672725, ppl: 14.479371 +epoch: 2, batch: 6832, sum loss: 3897.357666, avg loss: 2.388087, ppl: 10.892634 +epoch: 2, batch: 6833, sum loss: 4169.024414, avg loss: 2.560826, ppl: 12.946505 +epoch: 2, batch: 6834, sum loss: 4947.772461, avg loss: 2.697804, ppl: 14.847088 +epoch: 2, batch: 6835, sum loss: 3859.356934, avg loss: 2.304094, ppl: 10.015099 +epoch: 2, batch: 6836, sum loss: 3930.911865, avg loss: 2.518201, ppl: 12.406260 +epoch: 2, batch: 6837, sum loss: 3737.906738, avg loss: 2.301667, ppl: 9.990821 +epoch: 2, batch: 6838, sum loss: 3713.064697, avg loss: 2.336730, ppl: 10.347350 +epoch: 2, batch: 6839, sum loss: 4207.510742, avg loss: 2.501493, ppl: 12.200693 +epoch: 2, batch: 6840, sum loss: 4488.459473, avg loss: 2.499142, ppl: 12.172050 +epoch: 2, batch: 6841, sum loss: 3793.436279, avg loss: 2.419283, ppl: 11.237803 +epoch: 2, batch: 6842, sum loss: 4714.238281, avg loss: 2.664917, ppl: 14.366760 +epoch: 2, batch: 6843, sum loss: 3965.296143, avg loss: 2.465980, ppl: 11.775017 +epoch: 2, batch: 6844, sum loss: 4604.406250, avg loss: 2.576612, ppl: 13.152508 +epoch: 2, batch: 6845, sum loss: 4452.564453, avg loss: 2.349638, ppl: 10.481777 +epoch: 2, batch: 6846, sum loss: 4285.102539, avg loss: 2.479805, ppl: 11.938933 +epoch: 2, batch: 6847, sum loss: 4636.213867, avg loss: 2.622293, ppl: 13.767256 +epoch: 2, batch: 6848, sum loss: 3069.809082, avg loss: 2.178715, ppl: 8.834944 +epoch: 2, batch: 6849, sum loss: 3904.398926, avg loss: 2.616889, ppl: 13.693065 +epoch: 2, batch: 6850, sum loss: 4461.426270, avg loss: 2.658776, ppl: 14.278802 +epoch: 2, batch: 6851, sum loss: 4538.801270, avg loss: 2.693650, ppl: 14.785552 +epoch: 2, batch: 6852, sum loss: 5315.750977, avg loss: 2.777299, ppl: 16.075548 +epoch: 2, batch: 6853, sum loss: 4217.010254, avg loss: 2.457465, ppl: 11.675179 +epoch: 2, batch: 6854, sum loss: 4566.172363, avg loss: 2.643991, ppl: 14.069242 +epoch: 2, batch: 6855, sum loss: 4076.303955, avg loss: 2.601343, ppl: 13.481831 +epoch: 2, batch: 6856, sum loss: 4532.162109, avg loss: 2.404330, ppl: 11.071011 +epoch: 2, batch: 6857, sum loss: 3653.836670, avg loss: 2.424577, ppl: 11.297444 +epoch: 2, batch: 6858, sum loss: 5248.643555, avg loss: 2.753748, ppl: 15.701365 +epoch: 2, batch: 6859, sum loss: 4213.608887, avg loss: 2.496214, ppl: 12.136457 +epoch: 2, batch: 6860, sum loss: 3502.423584, avg loss: 2.296671, ppl: 9.941035 +epoch: 2, batch: 6861, sum loss: 4072.464355, avg loss: 2.367712, ppl: 10.672942 +epoch: 2, batch: 6862, sum loss: 4083.656250, avg loss: 2.474943, ppl: 11.881032 +epoch: 2, batch: 6863, sum loss: 3614.027344, avg loss: 2.275836, ppl: 9.736054 +epoch: 2, batch: 6864, sum loss: 4491.615234, avg loss: 2.576945, ppl: 13.156883 +epoch: 2, batch: 6865, sum loss: 3639.173828, avg loss: 2.387909, ppl: 10.890702 +epoch: 2, batch: 6866, sum loss: 4935.743652, avg loss: 2.522097, ppl: 12.454685 +epoch: 2, batch: 6867, sum loss: 5119.008789, avg loss: 2.744777, ppl: 15.561139 +epoch: 2, batch: 6868, sum loss: 4828.805176, avg loss: 2.789604, ppl: 16.274576 +epoch: 2, batch: 6869, sum loss: 4460.412598, avg loss: 2.755042, ppl: 15.721699 +epoch: 2, batch: 6870, sum loss: 4850.365723, avg loss: 2.749640, ppl: 15.637009 +epoch: 2, batch: 6871, sum loss: 4139.086426, avg loss: 2.378785, ppl: 10.791787 +epoch: 2, batch: 6872, sum loss: 4930.806152, avg loss: 2.790496, ppl: 16.289095 +epoch: 2, batch: 6873, sum loss: 3827.840576, avg loss: 2.614645, ppl: 13.662369 +epoch: 2, batch: 6874, sum loss: 3487.845215, avg loss: 2.297658, ppl: 9.950852 +epoch: 2, batch: 6875, sum loss: 4297.397461, avg loss: 2.596615, ppl: 13.418238 +epoch: 2, batch: 6876, sum loss: 4329.781738, avg loss: 2.431096, ppl: 11.371336 +epoch: 2, batch: 6877, sum loss: 3993.804932, avg loss: 2.398682, ppl: 11.008654 +epoch: 2, batch: 6878, sum loss: 3981.435303, avg loss: 2.550567, ppl: 12.814369 +epoch: 2, batch: 6879, sum loss: 3921.100098, avg loss: 2.557795, ppl: 12.907329 +epoch: 2, batch: 6880, sum loss: 4579.593262, avg loss: 2.541395, ppl: 12.697368 +epoch: 2, batch: 6881, sum loss: 4712.817871, avg loss: 2.898412, ppl: 18.145308 +epoch: 2, batch: 6882, sum loss: 3498.257568, avg loss: 2.520358, ppl: 12.433051 +epoch: 2, batch: 6883, sum loss: 3743.616455, avg loss: 2.271612, ppl: 9.695016 +epoch: 2, batch: 6884, sum loss: 4680.312500, avg loss: 2.775986, ppl: 16.054453 +epoch: 2, batch: 6885, sum loss: 4222.267090, avg loss: 2.794353, ppl: 16.352041 +epoch: 2, batch: 6886, sum loss: 3978.476562, avg loss: 2.486548, ppl: 12.019709 +epoch: 2, batch: 6887, sum loss: 4624.763672, avg loss: 2.609912, ppl: 13.597853 +epoch: 2, batch: 6888, sum loss: 4509.238770, avg loss: 2.540416, ppl: 12.684947 +epoch: 2, batch: 6889, sum loss: 4102.930664, avg loss: 2.474627, ppl: 11.877271 +epoch: 2, batch: 6890, sum loss: 4429.541992, avg loss: 2.596449, ppl: 13.416012 +epoch: 2, batch: 6891, sum loss: 5600.214355, avg loss: 2.845638, ppl: 17.212530 +epoch: 2, batch: 6892, sum loss: 3635.791504, avg loss: 2.542512, ppl: 12.711558 +epoch: 2, batch: 6893, sum loss: 4274.882812, avg loss: 2.488290, ppl: 12.040672 +epoch: 2, batch: 6894, sum loss: 4736.559570, avg loss: 2.758625, ppl: 15.778137 +epoch: 2, batch: 6895, sum loss: 4618.219727, avg loss: 2.810846, ppl: 16.623974 +epoch: 2, batch: 6896, sum loss: 4027.307129, avg loss: 2.285645, ppl: 9.832030 +epoch: 2, batch: 6897, sum loss: 3757.528564, avg loss: 2.525221, ppl: 12.493654 +epoch: 2, batch: 6898, sum loss: 3862.313965, avg loss: 2.446051, ppl: 11.542670 +epoch: 2, batch: 6899, sum loss: 4358.730469, avg loss: 2.737896, ppl: 15.454434 +epoch: 2, batch: 6900, sum loss: 3222.621826, avg loss: 2.233279, ppl: 9.330413 +epoch: 2, batch: 6901, sum loss: 4623.066406, avg loss: 2.652362, ppl: 14.187505 +epoch: 2, batch: 6902, sum loss: 4532.139648, avg loss: 2.664397, ppl: 14.359292 +epoch: 2, batch: 6903, sum loss: 4454.585938, avg loss: 2.609599, ppl: 13.593603 +epoch: 2, batch: 6904, sum loss: 4180.510254, avg loss: 2.529044, ppl: 12.541512 +epoch: 2, batch: 6905, sum loss: 3573.355469, avg loss: 2.587513, ppl: 13.296661 +epoch: 2, batch: 6906, sum loss: 4741.476074, avg loss: 2.800636, ppl: 16.455107 +epoch: 2, batch: 6907, sum loss: 5039.054199, avg loss: 2.719403, ppl: 15.171267 +epoch: 2, batch: 6908, sum loss: 4609.858887, avg loss: 2.569598, ppl: 13.060575 +epoch: 2, batch: 6909, sum loss: 5430.124023, avg loss: 2.888364, ppl: 17.963894 +epoch: 2, batch: 6910, sum loss: 4044.793457, avg loss: 2.252112, ppl: 9.507796 +epoch: 2, batch: 6911, sum loss: 3630.134277, avg loss: 2.304847, ppl: 10.022645 +epoch: 2, batch: 6912, sum loss: 4790.865723, avg loss: 2.574350, ppl: 13.122789 +epoch: 2, batch: 6913, sum loss: 4215.194336, avg loss: 2.450694, ppl: 11.596395 +epoch: 2, batch: 6914, sum loss: 4421.486328, avg loss: 2.617813, ppl: 13.705718 +epoch: 2, batch: 6915, sum loss: 4042.234375, avg loss: 2.616333, ppl: 13.685447 +epoch: 2, batch: 6916, sum loss: 4368.375977, avg loss: 2.553113, ppl: 12.847034 +epoch: 2, batch: 6917, sum loss: 4517.414551, avg loss: 2.584333, ppl: 13.254451 +epoch: 2, batch: 6918, sum loss: 4349.282227, avg loss: 2.584244, ppl: 13.253263 +epoch: 2, batch: 6919, sum loss: 4030.383057, avg loss: 2.625657, ppl: 13.813642 +epoch: 2, batch: 6920, sum loss: 4590.242188, avg loss: 2.647198, ppl: 14.114441 +epoch: 2, batch: 6921, sum loss: 3708.973633, avg loss: 2.343003, ppl: 10.412457 +epoch: 2, batch: 6922, sum loss: 4307.099609, avg loss: 2.504128, ppl: 12.232884 +epoch: 2, batch: 6923, sum loss: 4679.257812, avg loss: 2.543075, ppl: 12.718719 +epoch: 2, batch: 6924, sum loss: 4496.518066, avg loss: 2.540406, ppl: 12.684816 +epoch: 2, batch: 6925, sum loss: 4841.697266, avg loss: 2.724647, ppl: 15.251026 +epoch: 2, batch: 6926, sum loss: 5957.469727, avg loss: 2.783864, ppl: 16.181429 +epoch: 2, batch: 6927, sum loss: 3677.614258, avg loss: 2.445222, ppl: 11.533112 +epoch: 2, batch: 6928, sum loss: 3842.937500, avg loss: 2.397341, ppl: 10.993902 +epoch: 2, batch: 6929, sum loss: 4612.029785, avg loss: 2.510632, ppl: 12.312704 +epoch: 2, batch: 6930, sum loss: 3918.648438, avg loss: 2.235396, ppl: 9.350181 +epoch: 2, batch: 6931, sum loss: 4493.765625, avg loss: 2.474540, ppl: 11.876248 +epoch: 2, batch: 6932, sum loss: 4091.312988, avg loss: 2.331232, ppl: 10.290615 +epoch: 2, batch: 6933, sum loss: 4968.680176, avg loss: 2.657048, ppl: 14.254152 +epoch: 2, batch: 6934, sum loss: 4628.159180, avg loss: 2.719247, ppl: 15.168900 +epoch: 2, batch: 6935, sum loss: 4195.544434, avg loss: 2.397454, ppl: 10.995148 +epoch: 2, batch: 6936, sum loss: 3731.152588, avg loss: 2.334889, ppl: 10.328313 +epoch: 2, batch: 6937, sum loss: 4380.697754, avg loss: 2.602910, ppl: 13.502975 +epoch: 2, batch: 6938, sum loss: 4238.482422, avg loss: 2.592344, ppl: 13.361053 +epoch: 2, batch: 6939, sum loss: 4827.732910, avg loss: 2.609586, ppl: 13.593415 +epoch: 2, batch: 6940, sum loss: 4107.922363, avg loss: 2.427850, ppl: 11.334487 +epoch: 2, batch: 6941, sum loss: 4286.097656, avg loss: 2.597635, ppl: 13.431934 +epoch: 2, batch: 6942, sum loss: 4379.643555, avg loss: 2.639930, ppl: 14.012219 +epoch: 2, batch: 6943, sum loss: 4753.020508, avg loss: 2.534944, ppl: 12.615728 +epoch: 2, batch: 6944, sum loss: 5283.314453, avg loss: 2.843549, ppl: 17.176622 +epoch: 2, batch: 6945, sum loss: 5488.770020, avg loss: 2.855760, ppl: 17.387644 +epoch: 2, batch: 6946, sum loss: 3179.209229, avg loss: 2.476020, ppl: 11.893831 +epoch: 2, batch: 6947, sum loss: 4101.648926, avg loss: 2.438555, ppl: 11.456469 +epoch: 2, batch: 6948, sum loss: 4749.656738, avg loss: 2.517041, ppl: 12.391877 +epoch: 2, batch: 6949, sum loss: 5327.973145, avg loss: 2.714199, ppl: 15.092521 +epoch: 2, batch: 6950, sum loss: 4713.121094, avg loss: 2.491079, ppl: 12.074295 +epoch: 2, batch: 6951, sum loss: 4490.096191, avg loss: 2.536777, ppl: 12.638877 +epoch: 2, batch: 6952, sum loss: 5348.679688, avg loss: 2.894307, ppl: 18.070976 +epoch: 2, batch: 6953, sum loss: 4765.483398, avg loss: 2.781952, ppl: 16.150511 +epoch: 2, batch: 6954, sum loss: 3397.566895, avg loss: 2.330293, ppl: 10.280951 +epoch: 2, batch: 6955, sum loss: 4460.047852, avg loss: 2.761640, ppl: 15.825769 +epoch: 2, batch: 6956, sum loss: 4752.462891, avg loss: 2.567511, ppl: 13.033345 +epoch: 2, batch: 6957, sum loss: 3971.360352, avg loss: 2.628299, ppl: 13.850194 +epoch: 2, batch: 6958, sum loss: 4545.419922, avg loss: 2.565135, ppl: 13.002419 +epoch: 2, batch: 6959, sum loss: 3029.692871, avg loss: 2.257595, ppl: 9.560072 +epoch: 2, batch: 6960, sum loss: 5377.493164, avg loss: 2.711797, ppl: 15.056304 +epoch: 2, batch: 6961, sum loss: 4538.583496, avg loss: 2.762376, ppl: 15.837425 +epoch: 2, batch: 6962, sum loss: 4168.202148, avg loss: 2.419154, ppl: 11.236348 +epoch: 2, batch: 6963, sum loss: 4007.607422, avg loss: 2.554243, ppl: 12.861561 +epoch: 2, batch: 6964, sum loss: 3423.417480, avg loss: 2.392325, ppl: 10.938899 +epoch: 2, batch: 6965, sum loss: 4786.269531, avg loss: 2.567741, ppl: 13.036345 +epoch: 2, batch: 6966, sum loss: 4103.900391, avg loss: 2.550591, ppl: 12.814672 +epoch: 2, batch: 6967, sum loss: 3848.707520, avg loss: 2.394964, ppl: 10.967806 +epoch: 2, batch: 6968, sum loss: 4128.772949, avg loss: 2.438732, ppl: 11.458501 +epoch: 2, batch: 6969, sum loss: 3187.197510, avg loss: 2.265244, ppl: 9.633471 +epoch: 2, batch: 6970, sum loss: 4301.202637, avg loss: 2.525662, ppl: 12.499169 +epoch: 2, batch: 6971, sum loss: 5028.928223, avg loss: 2.621965, ppl: 13.762736 +epoch: 2, batch: 6972, sum loss: 4601.218262, avg loss: 2.797093, ppl: 16.396914 +epoch: 2, batch: 6973, sum loss: 4437.879395, avg loss: 2.782370, ppl: 16.157263 +epoch: 2, batch: 6974, sum loss: 5180.531250, avg loss: 2.617752, ppl: 13.704882 +epoch: 2, batch: 6975, sum loss: 4797.749023, avg loss: 2.545225, ppl: 12.746095 +epoch: 2, batch: 6976, sum loss: 3901.174072, avg loss: 2.266807, ppl: 9.648540 +epoch: 2, batch: 6977, sum loss: 4147.493164, avg loss: 2.459960, ppl: 11.704349 +epoch: 2, batch: 6978, sum loss: 3743.034180, avg loss: 2.459287, ppl: 11.696465 +epoch: 2, batch: 6979, sum loss: 4266.150391, avg loss: 2.533344, ppl: 12.595550 +epoch: 2, batch: 6980, sum loss: 4739.620117, avg loss: 2.731770, ppl: 15.360044 +epoch: 2, batch: 6981, sum loss: 5548.178711, avg loss: 2.861361, ppl: 17.485310 +epoch: 2, batch: 6982, sum loss: 4024.104980, avg loss: 2.606286, ppl: 13.548635 +epoch: 2, batch: 6983, sum loss: 4720.329590, avg loss: 2.606477, ppl: 13.551226 +epoch: 2, batch: 6984, sum loss: 4308.017578, avg loss: 2.750969, ppl: 15.657796 +epoch: 2, batch: 6985, sum loss: 3881.584473, avg loss: 2.376965, ppl: 10.772162 +epoch: 2, batch: 6986, sum loss: 5040.281250, avg loss: 2.778545, ppl: 16.095591 +epoch: 2, batch: 6987, sum loss: 3287.282959, avg loss: 2.201797, ppl: 9.041246 +epoch: 2, batch: 6988, sum loss: 4520.419434, avg loss: 2.542418, ppl: 12.710371 +epoch: 2, batch: 6989, sum loss: 4504.595703, avg loss: 2.520759, ppl: 12.438029 +epoch: 2, batch: 6990, sum loss: 4160.986816, avg loss: 2.291292, ppl: 9.887706 +epoch: 2, batch: 6991, sum loss: 4978.637207, avg loss: 2.571610, ppl: 13.086877 +epoch: 2, batch: 6992, sum loss: 3494.211914, avg loss: 2.431602, ppl: 11.377093 +epoch: 2, batch: 6993, sum loss: 5292.339844, avg loss: 2.651473, ppl: 14.174900 +epoch: 2, batch: 6994, sum loss: 4001.040527, avg loss: 2.384410, ppl: 10.852662 +epoch: 2, batch: 6995, sum loss: 4329.881348, avg loss: 2.508622, ppl: 12.287982 +epoch: 2, batch: 6996, sum loss: 4668.172852, avg loss: 2.530175, ppl: 12.555703 +epoch: 2, batch: 6997, sum loss: 3656.696289, avg loss: 2.389998, ppl: 10.913469 +epoch: 2, batch: 6998, sum loss: 4369.990234, avg loss: 2.511489, ppl: 12.323262 +epoch: 2, batch: 6999, sum loss: 4667.476562, avg loss: 2.591603, ppl: 13.351151 +epoch: 2, batch: 7000, sum loss: 4202.953613, avg loss: 2.409950, ppl: 11.133408 +epoch: 2, batch: 7001, sum loss: 4317.070312, avg loss: 2.637184, ppl: 13.973800 +epoch: 2, batch: 7002, sum loss: 3047.002441, avg loss: 2.177986, ppl: 8.828509 +epoch: 2, batch: 7003, sum loss: 4924.986328, avg loss: 2.575830, ppl: 13.142214 +epoch: 2, batch: 7004, sum loss: 3710.840576, avg loss: 2.384859, ppl: 10.857533 +epoch: 2, batch: 7005, sum loss: 4204.281250, avg loss: 2.450047, ppl: 11.588897 +epoch: 2, batch: 7006, sum loss: 3772.952881, avg loss: 2.321817, ppl: 10.194182 +epoch: 2, batch: 7007, sum loss: 5586.468262, avg loss: 3.140229, ppl: 23.109169 +epoch: 2, batch: 7008, sum loss: 3613.452637, avg loss: 2.505862, ppl: 12.254114 +epoch: 2, batch: 7009, sum loss: 4125.072266, avg loss: 2.426513, ppl: 11.319345 +epoch: 2, batch: 7010, sum loss: 3788.370605, avg loss: 2.375154, ppl: 10.752669 +epoch: 2, batch: 7011, sum loss: 3376.086182, avg loss: 2.249225, ppl: 9.480383 +epoch: 2, batch: 7012, sum loss: 4166.308105, avg loss: 2.471120, ppl: 11.835694 +epoch: 2, batch: 7013, sum loss: 3998.196777, avg loss: 2.411458, ppl: 11.150204 +epoch: 2, batch: 7014, sum loss: 4739.010254, avg loss: 2.464384, ppl: 11.756236 +epoch: 2, batch: 7015, sum loss: 2952.235596, avg loss: 2.073199, ppl: 7.950217 +epoch: 2, batch: 7016, sum loss: 4556.372070, avg loss: 2.779971, ppl: 16.118547 +epoch: 2, batch: 7017, sum loss: 3708.826416, avg loss: 2.325283, ppl: 10.229575 +epoch: 2, batch: 7018, sum loss: 3948.965820, avg loss: 2.324288, ppl: 10.219405 +epoch: 2, batch: 7019, sum loss: 4437.460449, avg loss: 2.518423, ppl: 12.409007 +epoch: 2, batch: 7020, sum loss: 3908.302979, avg loss: 2.526376, ppl: 12.508089 +epoch: 2, batch: 7021, sum loss: 3935.140137, avg loss: 2.504863, ppl: 12.241885 +epoch: 2, batch: 7022, sum loss: 4412.960938, avg loss: 2.537643, ppl: 12.649819 +epoch: 2, batch: 7023, sum loss: 3584.174805, avg loss: 2.078988, ppl: 7.996371 +epoch: 2, batch: 7024, sum loss: 4209.299316, avg loss: 2.292647, ppl: 9.901108 +epoch: 2, batch: 7025, sum loss: 4072.682617, avg loss: 2.344665, ppl: 10.429776 +epoch: 2, batch: 7026, sum loss: 4394.545410, avg loss: 2.537266, ppl: 12.645059 +epoch: 2, batch: 7027, sum loss: 5028.244629, avg loss: 2.901468, ppl: 18.200850 +epoch: 2, batch: 7028, sum loss: 4142.592285, avg loss: 2.459972, ppl: 11.704480 +epoch: 2, batch: 7029, sum loss: 4178.748535, avg loss: 2.619905, ppl: 13.734419 +epoch: 2, batch: 7030, sum loss: 3846.631348, avg loss: 2.522381, ppl: 12.458228 +epoch: 2, batch: 7031, sum loss: 4126.664062, avg loss: 2.593755, ppl: 13.379919 +epoch: 2, batch: 7032, sum loss: 3799.397949, avg loss: 2.276452, ppl: 9.742052 +epoch: 2, batch: 7033, sum loss: 4085.679199, avg loss: 2.608990, ppl: 13.585329 +epoch: 2, batch: 7034, sum loss: 4299.799316, avg loss: 2.585568, ppl: 13.270824 +epoch: 2, batch: 7035, sum loss: 4299.107910, avg loss: 2.525915, ppl: 12.502335 +epoch: 2, batch: 7036, sum loss: 4073.618652, avg loss: 2.706723, ppl: 14.980112 +epoch: 2, batch: 7037, sum loss: 4188.472656, avg loss: 2.543092, ppl: 12.718937 +epoch: 2, batch: 7038, sum loss: 4591.841309, avg loss: 2.805034, ppl: 16.527645 +epoch: 2, batch: 7039, sum loss: 4571.693359, avg loss: 2.606439, ppl: 13.550706 +epoch: 2, batch: 7040, sum loss: 3742.339844, avg loss: 2.334585, ppl: 10.325173 +epoch: 2, batch: 7041, sum loss: 4351.133301, avg loss: 2.582275, ppl: 13.227195 +epoch: 2, batch: 7042, sum loss: 4804.183594, avg loss: 2.588461, ppl: 13.309272 +epoch: 2, batch: 7043, sum loss: 3733.833740, avg loss: 2.237168, ppl: 9.366770 +epoch: 2, batch: 7044, sum loss: 4294.124023, avg loss: 2.728160, ppl: 15.304699 +epoch: 2, batch: 7045, sum loss: 4060.606201, avg loss: 2.469955, ppl: 11.821918 +epoch: 2, batch: 7046, sum loss: 4048.558105, avg loss: 2.398435, ppl: 11.005938 +epoch: 2, batch: 7047, sum loss: 4154.194336, avg loss: 2.661239, ppl: 14.314015 +epoch: 2, batch: 7048, sum loss: 3339.280273, avg loss: 2.244140, ppl: 9.432300 +epoch: 2, batch: 7049, sum loss: 4579.782227, avg loss: 2.651872, ppl: 14.180555 +epoch: 2, batch: 7050, sum loss: 5167.309082, avg loss: 2.744190, ppl: 15.552004 +epoch: 2, batch: 7051, sum loss: 4569.123047, avg loss: 2.545473, ppl: 12.749252 +epoch: 2, batch: 7052, sum loss: 3694.641113, avg loss: 2.258338, ppl: 9.567178 +epoch: 2, batch: 7053, sum loss: 4049.337402, avg loss: 2.414632, ppl: 11.185649 +epoch: 2, batch: 7054, sum loss: 4286.013184, avg loss: 2.370583, ppl: 10.703627 +epoch: 2, batch: 7055, sum loss: 4695.937500, avg loss: 2.515232, ppl: 12.369473 +epoch: 2, batch: 7056, sum loss: 4261.712402, avg loss: 2.565751, ppl: 13.010427 +epoch: 2, batch: 7057, sum loss: 4488.251953, avg loss: 2.558867, ppl: 12.921163 +epoch: 2, batch: 7058, sum loss: 4229.026367, avg loss: 2.623466, ppl: 13.783408 +epoch: 2, batch: 7059, sum loss: 3456.667236, avg loss: 2.287668, ppl: 9.851941 +epoch: 2, batch: 7060, sum loss: 5027.983887, avg loss: 2.691640, ppl: 14.755857 +epoch: 2, batch: 7061, sum loss: 3999.375488, avg loss: 2.180685, ppl: 8.852365 +epoch: 2, batch: 7062, sum loss: 5271.990723, avg loss: 2.903079, ppl: 18.230183 +epoch: 2, batch: 7063, sum loss: 3724.218750, avg loss: 2.336398, ppl: 10.343912 +epoch: 2, batch: 7064, sum loss: 3787.044189, avg loss: 2.459120, ppl: 11.694510 +epoch: 2, batch: 7065, sum loss: 4124.949219, avg loss: 2.474475, ppl: 11.875467 +epoch: 2, batch: 7066, sum loss: 4650.158203, avg loss: 2.711462, ppl: 15.051271 +epoch: 2, batch: 7067, sum loss: 4824.750000, avg loss: 2.671512, ppl: 14.461814 +epoch: 2, batch: 7068, sum loss: 4604.550781, avg loss: 2.595575, ppl: 13.404297 +epoch: 2, batch: 7069, sum loss: 4900.726074, avg loss: 2.713580, ppl: 15.083182 +epoch: 2, batch: 7070, sum loss: 5201.707520, avg loss: 2.647179, ppl: 14.114172 +epoch: 2, batch: 7071, sum loss: 4323.206543, avg loss: 2.555087, ppl: 12.872415 +epoch: 2, batch: 7072, sum loss: 3722.668945, avg loss: 2.404825, ppl: 11.076491 +epoch: 2, batch: 7073, sum loss: 4138.735352, avg loss: 2.641184, ppl: 14.029806 +epoch: 2, batch: 7074, sum loss: 4476.303711, avg loss: 2.566688, ppl: 13.022619 +epoch: 2, batch: 7075, sum loss: 3850.617676, avg loss: 2.466763, ppl: 11.784245 +epoch: 2, batch: 7076, sum loss: 4309.139160, avg loss: 2.589627, ppl: 13.324800 +epoch: 2, batch: 7077, sum loss: 3706.180908, avg loss: 2.662486, ppl: 14.331879 +epoch: 2, batch: 7078, sum loss: 5001.601562, avg loss: 2.533739, ppl: 12.600535 +epoch: 2, batch: 7079, sum loss: 3946.840088, avg loss: 2.338176, ppl: 10.362313 +epoch: 2, batch: 7080, sum loss: 4244.386719, avg loss: 2.731266, ppl: 15.352304 +epoch: 2, batch: 7081, sum loss: 4673.673828, avg loss: 2.563727, ppl: 12.984118 +epoch: 2, batch: 7082, sum loss: 4531.372559, avg loss: 2.678116, ppl: 14.557642 +epoch: 2, batch: 7083, sum loss: 3909.214355, avg loss: 2.483618, ppl: 11.984541 +epoch: 2, batch: 7084, sum loss: 4996.022461, avg loss: 2.656046, ppl: 14.239872 +epoch: 2, batch: 7085, sum loss: 5037.466309, avg loss: 2.751211, ppl: 15.661582 +epoch: 2, batch: 7086, sum loss: 4509.452148, avg loss: 2.520655, ppl: 12.436742 +epoch: 2, batch: 7087, sum loss: 4396.167480, avg loss: 2.581425, ppl: 13.215963 +epoch: 2, batch: 7088, sum loss: 5327.896484, avg loss: 2.978142, ppl: 19.651276 +epoch: 2, batch: 7089, sum loss: 4097.292480, avg loss: 2.389092, ppl: 10.903586 +epoch: 2, batch: 7090, sum loss: 4180.683594, avg loss: 2.402692, ppl: 11.052886 +epoch: 2, batch: 7091, sum loss: 3578.447266, avg loss: 2.299773, ppl: 9.971921 +epoch: 2, batch: 7092, sum loss: 3459.458252, avg loss: 2.223302, ppl: 9.237785 +epoch: 2, batch: 7093, sum loss: 4781.245605, avg loss: 2.509840, ppl: 12.302964 +epoch: 2, batch: 7094, sum loss: 3983.370605, avg loss: 2.615476, ppl: 13.673729 +epoch: 2, batch: 7095, sum loss: 4769.002930, avg loss: 2.606013, ppl: 13.544933 +epoch: 2, batch: 7096, sum loss: 4401.433594, avg loss: 2.687078, ppl: 14.688689 +epoch: 2, batch: 7097, sum loss: 4206.561523, avg loss: 2.672529, ppl: 14.476541 +epoch: 2, batch: 7098, sum loss: 5745.771973, avg loss: 2.716677, ppl: 15.129961 +epoch: 2, batch: 7099, sum loss: 4249.926270, avg loss: 2.621793, ppl: 13.760371 +epoch: 2, batch: 7100, sum loss: 4516.696289, avg loss: 2.588365, ppl: 13.307990 +epoch: 2, batch: 7101, sum loss: 3522.553223, avg loss: 2.393039, ppl: 10.946711 +epoch: 2, batch: 7102, sum loss: 5109.036621, avg loss: 2.706058, ppl: 14.970140 +epoch: 2, batch: 7103, sum loss: 4479.812988, avg loss: 2.743302, ppl: 15.538213 +epoch: 2, batch: 7104, sum loss: 4477.589355, avg loss: 2.460214, ppl: 11.707318 +epoch: 2, batch: 7105, sum loss: 4473.848633, avg loss: 2.537634, ppl: 12.649705 +epoch: 2, batch: 7106, sum loss: 5033.468262, avg loss: 2.747526, ppl: 15.603986 +epoch: 2, batch: 7107, sum loss: 4009.022461, avg loss: 2.510346, ppl: 12.309188 +epoch: 2, batch: 7108, sum loss: 4320.068848, avg loss: 2.372361, ppl: 10.722675 +epoch: 2, batch: 7109, sum loss: 4133.852051, avg loss: 2.363552, ppl: 10.628635 +epoch: 2, batch: 7110, sum loss: 3849.112549, avg loss: 2.310392, ppl: 10.078371 +epoch: 2, batch: 7111, sum loss: 3999.548340, avg loss: 2.464293, ppl: 11.755174 +epoch: 2, batch: 7112, sum loss: 4236.452637, avg loss: 2.592688, ppl: 13.365655 +epoch: 2, batch: 7113, sum loss: 4556.495117, avg loss: 2.663060, ppl: 14.340102 +epoch: 2, batch: 7114, sum loss: 3554.612061, avg loss: 2.302210, ppl: 9.996251 +epoch: 2, batch: 7115, sum loss: 3518.328369, avg loss: 2.269889, ppl: 9.678329 +epoch: 2, batch: 7116, sum loss: 3835.344971, avg loss: 2.500225, ppl: 12.185233 +epoch: 2, batch: 7117, sum loss: 4036.514893, avg loss: 2.443411, ppl: 11.512243 +epoch: 2, batch: 7118, sum loss: 4030.744629, avg loss: 2.456273, ppl: 11.661273 +epoch: 2, batch: 7119, sum loss: 4832.954590, avg loss: 2.728941, ppl: 15.316657 +epoch: 2, batch: 7120, sum loss: 4744.112793, avg loss: 2.600939, ppl: 13.476387 +epoch: 2, batch: 7121, sum loss: 4290.999512, avg loss: 2.600606, ppl: 13.471896 +epoch: 2, batch: 7122, sum loss: 4980.872559, avg loss: 2.559544, ppl: 12.929921 +epoch: 2, batch: 7123, sum loss: 3245.740967, avg loss: 2.413190, ppl: 11.169539 +epoch: 2, batch: 7124, sum loss: 4237.551270, avg loss: 2.704245, ppl: 14.943028 +epoch: 2, batch: 7125, sum loss: 4315.478027, avg loss: 2.741727, ppl: 15.513748 +epoch: 2, batch: 7126, sum loss: 5395.734375, avg loss: 2.800070, ppl: 16.445795 +epoch: 2, batch: 7127, sum loss: 3863.533936, avg loss: 2.320441, ppl: 10.180161 +epoch: 2, batch: 7128, sum loss: 4628.083984, avg loss: 2.738511, ppl: 15.463947 +epoch: 2, batch: 7129, sum loss: 3550.763672, avg loss: 2.201341, ppl: 9.037127 +epoch: 2, batch: 7130, sum loss: 3847.590820, avg loss: 2.548074, ppl: 12.782455 +epoch: 2, batch: 7131, sum loss: 3874.338867, avg loss: 2.445921, ppl: 11.541174 +epoch: 2, batch: 7132, sum loss: 4176.518555, avg loss: 2.341098, ppl: 10.392642 +epoch: 2, batch: 7133, sum loss: 4894.750000, avg loss: 2.621719, ppl: 13.759360 +epoch: 2, batch: 7134, sum loss: 4402.595215, avg loss: 2.473368, ppl: 11.862334 +epoch: 2, batch: 7135, sum loss: 3830.152344, avg loss: 2.593197, ppl: 13.372459 +epoch: 2, batch: 7136, sum loss: 4219.623047, avg loss: 2.507203, ppl: 12.270562 +epoch: 2, batch: 7137, sum loss: 4393.227051, avg loss: 2.657730, ppl: 14.263868 +epoch: 2, batch: 7138, sum loss: 3928.173584, avg loss: 2.237001, ppl: 9.365202 +epoch: 2, batch: 7139, sum loss: 3909.514160, avg loss: 2.499689, ppl: 12.178710 +epoch: 2, batch: 7140, sum loss: 4225.222656, avg loss: 2.590572, ppl: 13.337397 +epoch: 2, batch: 7141, sum loss: 4521.572754, avg loss: 2.639564, ppl: 14.007092 +epoch: 2, batch: 7142, sum loss: 5006.239258, avg loss: 2.875496, ppl: 17.734224 +epoch: 2, batch: 7143, sum loss: 3777.400879, avg loss: 2.504908, ppl: 12.242428 +epoch: 2, batch: 7144, sum loss: 5349.263184, avg loss: 2.803597, ppl: 16.503904 +epoch: 2, batch: 7145, sum loss: 3536.714844, avg loss: 2.410848, ppl: 11.143411 +epoch: 2, batch: 7146, sum loss: 4578.743652, avg loss: 2.547993, ppl: 12.781428 +epoch: 2, batch: 7147, sum loss: 3108.213623, avg loss: 2.191970, ppl: 8.952834 +epoch: 2, batch: 7148, sum loss: 4243.123535, avg loss: 2.584119, ppl: 13.251614 +epoch: 2, batch: 7149, sum loss: 4700.749023, avg loss: 2.505730, ppl: 12.252499 +epoch: 2, batch: 7150, sum loss: 3765.342529, avg loss: 2.351869, ppl: 10.505189 +epoch: 2, batch: 7151, sum loss: 4115.166016, avg loss: 2.658376, ppl: 14.273090 +epoch: 2, batch: 7152, sum loss: 4142.616699, avg loss: 2.395961, ppl: 10.978745 +epoch: 2, batch: 7153, sum loss: 3933.248535, avg loss: 2.344010, ppl: 10.422948 +epoch: 2, batch: 7154, sum loss: 4768.815430, avg loss: 2.689687, ppl: 14.727069 +epoch: 2, batch: 7155, sum loss: 4098.812500, avg loss: 2.539537, ppl: 12.673801 +epoch: 2, batch: 7156, sum loss: 4297.934570, avg loss: 2.497347, ppl: 12.150218 +epoch: 2, batch: 7157, sum loss: 4265.795410, avg loss: 2.415513, ppl: 11.195507 +epoch: 2, batch: 7158, sum loss: 3992.446533, avg loss: 2.438880, ppl: 11.460197 +epoch: 2, batch: 7159, sum loss: 4617.761230, avg loss: 2.711545, ppl: 15.052513 +epoch: 2, batch: 7160, sum loss: 5264.053223, avg loss: 2.676183, ppl: 14.529535 +epoch: 2, batch: 7161, sum loss: 4123.004395, avg loss: 2.320205, ppl: 10.177760 +epoch: 2, batch: 7162, sum loss: 5287.874512, avg loss: 2.895879, ppl: 18.099400 +epoch: 2, batch: 7163, sum loss: 4185.354492, avg loss: 2.541199, ppl: 12.694880 +epoch: 2, batch: 7164, sum loss: 4219.971191, avg loss: 2.365455, ppl: 10.648882 +epoch: 2, batch: 7165, sum loss: 3856.785400, avg loss: 2.545733, ppl: 12.752572 +epoch: 2, batch: 7166, sum loss: 4113.353516, avg loss: 2.445513, ppl: 11.536469 +epoch: 2, batch: 7167, sum loss: 4244.517578, avg loss: 2.436577, ppl: 11.433837 +epoch: 2, batch: 7168, sum loss: 4420.430176, avg loss: 2.695384, ppl: 14.811209 +epoch: 2, batch: 7169, sum loss: 5239.821289, avg loss: 2.641039, ppl: 14.027769 +epoch: 2, batch: 7170, sum loss: 4931.718262, avg loss: 2.591549, ppl: 13.350437 +epoch: 2, batch: 7171, sum loss: 3831.912109, avg loss: 2.516029, ppl: 12.379342 +epoch: 2, batch: 7172, sum loss: 5004.824219, avg loss: 2.705310, ppl: 14.958959 +epoch: 2, batch: 7173, sum loss: 3495.279541, avg loss: 2.319363, ppl: 10.169190 +epoch: 2, batch: 7174, sum loss: 3661.984375, avg loss: 2.285883, ppl: 9.834366 +epoch: 2, batch: 7175, sum loss: 4165.259766, avg loss: 2.395204, ppl: 10.970436 +epoch: 2, batch: 7176, sum loss: 4036.553467, avg loss: 2.391323, ppl: 10.927946 +epoch: 2, batch: 7177, sum loss: 4589.473633, avg loss: 2.606175, ppl: 13.547130 +epoch: 2, batch: 7178, sum loss: 3915.997559, avg loss: 2.529714, ppl: 12.549921 +epoch: 2, batch: 7179, sum loss: 3763.294434, avg loss: 2.565300, ppl: 13.004553 +epoch: 2, batch: 7180, sum loss: 4268.695312, avg loss: 2.717184, ppl: 15.137629 +epoch: 2, batch: 7181, sum loss: 4651.242676, avg loss: 2.685475, ppl: 14.665164 +epoch: 2, batch: 7182, sum loss: 6428.092773, avg loss: 3.015053, ppl: 20.390167 +epoch: 2, batch: 7183, sum loss: 4831.361816, avg loss: 2.804040, ppl: 16.511225 +epoch: 2, batch: 7184, sum loss: 4173.752441, avg loss: 2.490306, ppl: 12.064966 +epoch: 2, batch: 7185, sum loss: 4971.852539, avg loss: 2.772924, ppl: 16.005365 +epoch: 2, batch: 7186, sum loss: 4516.858887, avg loss: 2.368568, ppl: 10.682084 +epoch: 2, batch: 7187, sum loss: 4390.634277, avg loss: 2.814509, ppl: 16.684984 +epoch: 2, batch: 7188, sum loss: 3802.133301, avg loss: 2.391279, ppl: 10.927459 +epoch: 2, batch: 7189, sum loss: 3933.229004, avg loss: 2.435436, ppl: 11.420797 +epoch: 2, batch: 7190, sum loss: 4744.324219, avg loss: 2.547972, ppl: 12.781160 +epoch: 2, batch: 7191, sum loss: 3588.674805, avg loss: 2.426420, ppl: 11.318287 +epoch: 2, batch: 7192, sum loss: 3440.647217, avg loss: 2.369592, ppl: 10.693026 +epoch: 2, batch: 7193, sum loss: 3806.989014, avg loss: 2.429476, ppl: 11.352935 +epoch: 2, batch: 7194, sum loss: 3934.867920, avg loss: 2.578551, ppl: 13.178023 +epoch: 2, batch: 7195, sum loss: 5547.130859, avg loss: 2.780517, ppl: 16.127350 +epoch: 2, batch: 7196, sum loss: 3463.440918, avg loss: 2.253377, ppl: 9.519835 +epoch: 2, batch: 7197, sum loss: 5196.422852, avg loss: 2.904652, ppl: 18.258890 +epoch: 2, batch: 7198, sum loss: 3273.938965, avg loss: 2.197274, ppl: 9.000449 +epoch: 2, batch: 7199, sum loss: 4376.498535, avg loss: 2.541521, ppl: 12.698966 +epoch: 2, batch: 7200, sum loss: 3736.363525, avg loss: 2.218743, ppl: 9.195767 +epoch: 2, batch: 7201, sum loss: 4361.928223, avg loss: 2.502541, ppl: 12.213484 +epoch: 2, batch: 7202, sum loss: 5707.686523, avg loss: 2.892898, ppl: 18.045523 +epoch: 2, batch: 7203, sum loss: 4009.738770, avg loss: 2.506087, ppl: 12.256870 +epoch: 2, batch: 7204, sum loss: 4609.148926, avg loss: 2.579266, ppl: 13.187459 +epoch: 2, batch: 7205, sum loss: 3968.408691, avg loss: 2.323424, ppl: 10.210577 +epoch: 2, batch: 7206, sum loss: 3828.718262, avg loss: 2.617033, ppl: 13.695024 +epoch: 2, batch: 7207, sum loss: 3890.602539, avg loss: 2.552889, ppl: 12.844155 +epoch: 2, batch: 7208, sum loss: 4282.887695, avg loss: 2.597264, ppl: 13.426946 +epoch: 2, batch: 7209, sum loss: 5265.875488, avg loss: 2.823526, ppl: 16.836105 +epoch: 2, batch: 7210, sum loss: 4036.407715, avg loss: 2.600778, ppl: 13.474221 +epoch: 2, batch: 7211, sum loss: 4694.957031, avg loss: 2.718562, ppl: 15.158514 +epoch: 2, batch: 7212, sum loss: 4647.587402, avg loss: 2.532745, ppl: 12.588017 +epoch: 2, batch: 7213, sum loss: 5105.258789, avg loss: 2.702625, ppl: 14.918843 +epoch: 2, batch: 7214, sum loss: 5566.545898, avg loss: 2.710100, ppl: 15.030785 +epoch: 2, batch: 7215, sum loss: 4234.092773, avg loss: 2.426414, ppl: 11.318225 +epoch: 2, batch: 7216, sum loss: 3384.639648, avg loss: 2.315075, ppl: 10.125684 +epoch: 2, batch: 7217, sum loss: 4428.244629, avg loss: 2.507500, ppl: 12.274202 +epoch: 2, batch: 7218, sum loss: 5835.035645, avg loss: 2.861715, ppl: 17.491491 +epoch: 2, batch: 7219, sum loss: 4895.171387, avg loss: 3.010561, ppl: 20.298775 +epoch: 2, batch: 7220, sum loss: 4462.372559, avg loss: 2.542662, ppl: 12.713474 +epoch: 2, batch: 7221, sum loss: 3548.379150, avg loss: 2.302647, ppl: 10.000620 +epoch: 2, batch: 7222, sum loss: 3979.179443, avg loss: 2.580531, ppl: 13.204152 +epoch: 2, batch: 7223, sum loss: 4890.202637, avg loss: 2.724347, ppl: 15.246453 +epoch: 2, batch: 7224, sum loss: 3996.941650, avg loss: 2.428276, ppl: 11.339314 +epoch: 2, batch: 7225, sum loss: 3490.532715, avg loss: 2.456392, ppl: 11.662655 +epoch: 2, batch: 7226, sum loss: 4028.034668, avg loss: 2.343243, ppl: 10.414959 +epoch: 2, batch: 7227, sum loss: 4053.576172, avg loss: 2.440443, ppl: 11.478127 +epoch: 2, batch: 7228, sum loss: 4339.867188, avg loss: 2.317067, ppl: 10.145877 +epoch: 2, batch: 7229, sum loss: 4985.668945, avg loss: 2.657606, ppl: 14.262107 +epoch: 2, batch: 7230, sum loss: 4304.275391, avg loss: 2.670146, ppl: 14.442078 +epoch: 2, batch: 7231, sum loss: 5109.194824, avg loss: 2.929584, ppl: 18.719841 +epoch: 2, batch: 7232, sum loss: 4930.841309, avg loss: 2.724222, ppl: 15.244548 +epoch: 2, batch: 7233, sum loss: 4198.965820, avg loss: 2.365614, ppl: 10.650580 +epoch: 2, batch: 7234, sum loss: 4117.294434, avg loss: 2.379939, ppl: 10.804242 +epoch: 2, batch: 7235, sum loss: 3802.742432, avg loss: 2.447067, ppl: 11.554411 +epoch: 2, batch: 7236, sum loss: 4373.086426, avg loss: 2.699436, ppl: 14.871345 +epoch: 2, batch: 7237, sum loss: 3941.644531, avg loss: 2.468156, ppl: 11.800662 +epoch: 2, batch: 7238, sum loss: 5056.228516, avg loss: 2.706760, ppl: 14.980665 +epoch: 2, batch: 7239, sum loss: 4267.045410, avg loss: 2.483728, ppl: 11.985869 +epoch: 2, batch: 7240, sum loss: 3739.089844, avg loss: 2.249753, ppl: 9.485394 +epoch: 2, batch: 7241, sum loss: 4373.046387, avg loss: 2.748615, ppl: 15.620982 +epoch: 2, batch: 7242, sum loss: 4746.127441, avg loss: 2.565474, ppl: 13.006825 +epoch: 2, batch: 7243, sum loss: 5190.113770, avg loss: 2.820714, ppl: 16.788830 +epoch: 2, batch: 7244, sum loss: 6075.211914, avg loss: 3.016490, ppl: 20.419502 +epoch: 2, batch: 7245, sum loss: 5628.057617, avg loss: 2.790311, ppl: 16.286089 +epoch: 2, batch: 7246, sum loss: 3822.114502, avg loss: 2.432918, ppl: 11.392079 +epoch: 2, batch: 7247, sum loss: 4431.716309, avg loss: 2.594682, ppl: 13.392322 +epoch: 2, batch: 7248, sum loss: 4171.309570, avg loss: 2.665374, ppl: 14.373318 +epoch: 2, batch: 7249, sum loss: 4693.512207, avg loss: 2.542531, ppl: 12.711804 +epoch: 2, batch: 7250, sum loss: 4771.968750, avg loss: 2.734653, ppl: 15.404389 +epoch: 2, batch: 7251, sum loss: 4412.034180, avg loss: 2.635624, ppl: 13.952015 +epoch: 2, batch: 7252, sum loss: 3978.616455, avg loss: 2.465066, ppl: 11.764258 +epoch: 2, batch: 7253, sum loss: 3945.082764, avg loss: 2.459528, ppl: 11.699287 +epoch: 2, batch: 7254, sum loss: 4521.008789, avg loss: 2.537042, ppl: 12.642221 +epoch: 2, batch: 7255, sum loss: 3669.927490, avg loss: 2.494852, ppl: 12.119941 +epoch: 2, batch: 7256, sum loss: 4545.413574, avg loss: 2.748134, ppl: 15.613472 +epoch: 2, batch: 7257, sum loss: 3578.209473, avg loss: 2.469434, ppl: 11.815755 +epoch: 2, batch: 7258, sum loss: 4413.648926, avg loss: 2.689609, ppl: 14.725922 +epoch: 2, batch: 7259, sum loss: 4839.864258, avg loss: 2.694802, ppl: 14.802588 +epoch: 2, batch: 7260, sum loss: 4168.317871, avg loss: 2.527785, ppl: 12.525735 +epoch: 2, batch: 7261, sum loss: 3638.155273, avg loss: 2.388808, ppl: 10.900498 +epoch: 2, batch: 7262, sum loss: 4454.806641, avg loss: 2.583995, ppl: 13.249961 +epoch: 2, batch: 7263, sum loss: 3774.216553, avg loss: 2.452382, ppl: 11.615987 +epoch: 2, batch: 7264, sum loss: 4015.026855, avg loss: 2.408534, ppl: 11.117654 +epoch: 2, batch: 7265, sum loss: 4923.242188, avg loss: 2.644061, ppl: 14.070231 +epoch: 2, batch: 7266, sum loss: 3417.997070, avg loss: 2.277147, ppl: 9.748823 +epoch: 2, batch: 7267, sum loss: 4362.909180, avg loss: 2.473305, ppl: 11.861579 +epoch: 2, batch: 7268, sum loss: 5415.225098, avg loss: 2.661044, ppl: 14.311224 +epoch: 2, batch: 7269, sum loss: 4642.135742, avg loss: 2.600636, ppl: 13.472310 +epoch: 2, batch: 7270, sum loss: 4597.022949, avg loss: 2.654170, ppl: 14.213188 +epoch: 2, batch: 7271, sum loss: 4204.502930, avg loss: 2.545099, ppl: 12.744487 +epoch: 2, batch: 7272, sum loss: 3833.715332, avg loss: 2.217302, ppl: 9.182524 +epoch: 2, batch: 7273, sum loss: 4388.251953, avg loss: 2.545390, ppl: 12.748198 +epoch: 2, batch: 7274, sum loss: 3883.939453, avg loss: 2.363931, ppl: 10.632670 +epoch: 2, batch: 7275, sum loss: 4302.833496, avg loss: 2.603045, ppl: 13.504797 +epoch: 2, batch: 7276, sum loss: 4231.805176, avg loss: 2.661513, ppl: 14.317930 +epoch: 2, batch: 7277, sum loss: 4642.038574, avg loss: 2.507854, ppl: 12.278558 +epoch: 2, batch: 7278, sum loss: 4182.578613, avg loss: 2.431732, ppl: 11.378570 +epoch: 2, batch: 7279, sum loss: 4357.393555, avg loss: 2.395489, ppl: 10.973557 +epoch: 2, batch: 7280, sum loss: 3939.528320, avg loss: 2.328326, ppl: 10.260756 +epoch: 2, batch: 7281, sum loss: 3656.717773, avg loss: 2.484183, ppl: 11.991323 +epoch: 2, batch: 7282, sum loss: 3787.846436, avg loss: 2.440623, ppl: 11.480188 +epoch: 2, batch: 7283, sum loss: 3573.900879, avg loss: 2.239286, ppl: 9.386628 +epoch: 2, batch: 7284, sum loss: 3659.281738, avg loss: 2.402680, ppl: 11.052760 +epoch: 2, batch: 7285, sum loss: 3968.710693, avg loss: 2.515026, ppl: 12.366928 +epoch: 2, batch: 7286, sum loss: 4177.383789, avg loss: 2.499931, ppl: 12.181649 +epoch: 2, batch: 7287, sum loss: 4695.056152, avg loss: 2.768312, ppl: 15.931710 +epoch: 2, batch: 7288, sum loss: 5404.821777, avg loss: 2.931031, ppl: 18.746958 +epoch: 2, batch: 7289, sum loss: 4521.044922, avg loss: 2.584931, ppl: 13.262379 +epoch: 2, batch: 7290, sum loss: 4523.621094, avg loss: 2.557163, ppl: 12.899171 +epoch: 2, batch: 7291, sum loss: 3537.804932, avg loss: 2.315317, ppl: 10.128137 +epoch: 2, batch: 7292, sum loss: 4005.586182, avg loss: 2.527184, ppl: 12.518203 +epoch: 2, batch: 7293, sum loss: 3775.101318, avg loss: 2.334633, ppl: 10.325668 +epoch: 2, batch: 7294, sum loss: 3979.098633, avg loss: 2.312085, ppl: 10.095453 +epoch: 2, batch: 7295, sum loss: 4702.611816, avg loss: 2.517458, ppl: 12.397046 +epoch: 2, batch: 7296, sum loss: 3897.013184, avg loss: 2.402597, ppl: 11.051846 +epoch: 2, batch: 7297, sum loss: 4368.431152, avg loss: 2.300385, ppl: 9.978024 +epoch: 2, batch: 7298, sum loss: 4414.809082, avg loss: 2.522748, ppl: 12.462797 +epoch: 2, batch: 7299, sum loss: 5071.702148, avg loss: 2.596878, ppl: 13.421767 +epoch: 2, batch: 7300, sum loss: 3783.936523, avg loss: 2.322859, ppl: 10.204803 +epoch: 2, batch: 7301, sum loss: 3545.713623, avg loss: 2.479520, ppl: 11.935535 +epoch: 2, batch: 7302, sum loss: 4502.073242, avg loss: 2.534951, ppl: 12.615815 +epoch: 2, batch: 7303, sum loss: 3884.459473, avg loss: 2.426271, ppl: 11.316604 +epoch: 2, batch: 7304, sum loss: 3567.653320, avg loss: 2.211812, ppl: 9.132254 +epoch: 2, batch: 7305, sum loss: 4573.471680, avg loss: 2.691861, ppl: 14.759119 +epoch: 2, batch: 7306, sum loss: 4416.316406, avg loss: 2.694519, ppl: 14.798396 +epoch: 2, batch: 7307, sum loss: 4300.052246, avg loss: 2.439054, ppl: 11.462193 +epoch: 2, batch: 7308, sum loss: 4475.516113, avg loss: 2.804208, ppl: 16.513992 +epoch: 2, batch: 7309, sum loss: 3489.907471, avg loss: 2.196292, ppl: 8.991608 +epoch: 2, batch: 7310, sum loss: 5420.980957, avg loss: 2.908252, ppl: 18.324730 +epoch: 2, batch: 7311, sum loss: 3578.964844, avg loss: 2.362353, ppl: 10.615902 +epoch: 2, batch: 7312, sum loss: 4301.344727, avg loss: 2.477733, ppl: 11.914226 +epoch: 2, batch: 7313, sum loss: 4344.126465, avg loss: 2.703252, ppl: 14.928205 +epoch: 2, batch: 7314, sum loss: 4822.278320, avg loss: 2.695516, ppl: 14.813162 +epoch: 2, batch: 7315, sum loss: 4182.300293, avg loss: 2.489465, ppl: 12.054819 +epoch: 2, batch: 7316, sum loss: 4639.917969, avg loss: 2.555021, ppl: 12.871568 +epoch: 2, batch: 7317, sum loss: 3633.944336, avg loss: 2.311669, ppl: 10.091256 +epoch: 2, batch: 7318, sum loss: 4162.370117, avg loss: 2.493931, ppl: 12.108777 +epoch: 2, batch: 7319, sum loss: 4315.739258, avg loss: 2.480310, ppl: 11.944966 +epoch: 2, batch: 7320, sum loss: 4944.985840, avg loss: 2.617780, ppl: 13.705260 +epoch: 2, batch: 7321, sum loss: 4734.648438, avg loss: 2.906475, ppl: 18.292210 +epoch: 2, batch: 7322, sum loss: 3869.999023, avg loss: 2.174157, ppl: 8.794765 +epoch: 2, batch: 7323, sum loss: 4557.485352, avg loss: 2.471521, ppl: 11.840446 +epoch: 2, batch: 7324, sum loss: 4614.463867, avg loss: 2.453197, ppl: 11.625457 +epoch: 2, batch: 7325, sum loss: 4442.748047, avg loss: 2.607247, ppl: 13.561659 +epoch: 2, batch: 7326, sum loss: 4895.311035, avg loss: 2.770408, ppl: 15.965149 +epoch: 2, batch: 7327, sum loss: 3702.087891, avg loss: 2.501411, ppl: 12.199692 +epoch: 2, batch: 7328, sum loss: 4133.366211, avg loss: 2.551461, ppl: 12.825826 +epoch: 2, batch: 7329, sum loss: 3384.225098, avg loss: 2.152815, ppl: 8.609057 +epoch: 2, batch: 7330, sum loss: 4100.803223, avg loss: 2.514288, ppl: 12.357803 +epoch: 2, batch: 7331, sum loss: 4200.270508, avg loss: 2.450566, ppl: 11.594911 +epoch: 2, batch: 7332, sum loss: 3318.703613, avg loss: 2.173349, ppl: 8.787663 +epoch: 2, batch: 7333, sum loss: 4983.280762, avg loss: 2.736563, ppl: 15.433844 +epoch: 2, batch: 7334, sum loss: 3637.661133, avg loss: 2.348393, ppl: 10.468735 +epoch: 2, batch: 7335, sum loss: 4957.310547, avg loss: 2.670965, ppl: 14.453906 +epoch: 2, batch: 7336, sum loss: 4296.722168, avg loss: 2.412534, ppl: 11.162205 +epoch: 2, batch: 7337, sum loss: 5153.034180, avg loss: 2.782416, ppl: 16.158009 +epoch: 2, batch: 7338, sum loss: 4568.482910, avg loss: 2.588376, ppl: 13.308136 +epoch: 2, batch: 7339, sum loss: 3835.974854, avg loss: 2.381114, ppl: 10.816949 +epoch: 2, batch: 7340, sum loss: 4237.804688, avg loss: 2.476800, ppl: 11.903116 +epoch: 2, batch: 7341, sum loss: 4181.594727, avg loss: 2.310273, ppl: 10.077180 +epoch: 2, batch: 7342, sum loss: 4017.898682, avg loss: 2.570632, ppl: 13.074091 +epoch: 2, batch: 7343, sum loss: 3893.135254, avg loss: 2.376761, ppl: 10.769964 +epoch: 2, batch: 7344, sum loss: 4878.043945, avg loss: 2.600237, ppl: 13.466925 +epoch: 2, batch: 7345, sum loss: 4681.099121, avg loss: 2.454693, ppl: 11.642857 +epoch: 2, batch: 7346, sum loss: 4339.904297, avg loss: 2.502828, ppl: 12.216996 +epoch: 2, batch: 7347, sum loss: 4304.783691, avg loss: 2.488314, ppl: 12.040959 +epoch: 2, batch: 7348, sum loss: 4318.755859, avg loss: 2.490632, ppl: 12.068899 +epoch: 2, batch: 7349, sum loss: 4467.148926, avg loss: 2.453130, ppl: 11.624670 +epoch: 2, batch: 7350, sum loss: 4278.056641, avg loss: 2.513547, ppl: 12.348650 +epoch: 2, batch: 7351, sum loss: 4048.576660, avg loss: 2.525625, ppl: 12.498701 +epoch: 2, batch: 7352, sum loss: 4530.614258, avg loss: 2.635611, ppl: 13.951829 +epoch: 2, batch: 7353, sum loss: 4351.873535, avg loss: 2.385895, ppl: 10.868792 +epoch: 2, batch: 7354, sum loss: 3966.033203, avg loss: 2.609232, ppl: 13.588616 +epoch: 2, batch: 7355, sum loss: 3991.295410, avg loss: 2.305774, ppl: 10.031942 +epoch: 2, batch: 7356, sum loss: 4380.380371, avg loss: 2.656386, ppl: 14.244714 +epoch: 2, batch: 7357, sum loss: 4314.458008, avg loss: 2.621177, ppl: 13.751906 +epoch: 2, batch: 7358, sum loss: 4771.694336, avg loss: 2.739204, ppl: 15.474669 +epoch: 2, batch: 7359, sum loss: 3364.941162, avg loss: 2.320649, ppl: 10.182281 +epoch: 2, batch: 7360, sum loss: 4350.875000, avg loss: 2.525174, ppl: 12.493071 +epoch: 2, batch: 7361, sum loss: 4234.706543, avg loss: 2.334458, ppl: 10.323861 +epoch: 2, batch: 7362, sum loss: 3882.462158, avg loss: 2.672032, ppl: 14.469339 +epoch: 2, batch: 7363, sum loss: 3761.347168, avg loss: 2.667622, ppl: 14.405677 +epoch: 2, batch: 7364, sum loss: 4094.179688, avg loss: 2.538239, ppl: 12.657365 +epoch: 2, batch: 7365, sum loss: 3313.423828, avg loss: 2.231262, ppl: 9.311607 +epoch: 2, batch: 7366, sum loss: 3825.360352, avg loss: 2.392345, ppl: 10.939121 +epoch: 2, batch: 7367, sum loss: 3681.116455, avg loss: 2.250071, ppl: 9.488412 +epoch: 2, batch: 7368, sum loss: 5488.546387, avg loss: 3.022327, ppl: 20.539034 +epoch: 2, batch: 7369, sum loss: 3737.991699, avg loss: 2.443132, ppl: 11.509029 +epoch: 2, batch: 7370, sum loss: 4094.369385, avg loss: 2.573457, ppl: 13.111065 +epoch: 2, batch: 7371, sum loss: 4931.799316, avg loss: 2.776914, ppl: 16.069357 +epoch: 2, batch: 7372, sum loss: 3757.059326, avg loss: 2.236345, ppl: 9.359059 +epoch: 2, batch: 7373, sum loss: 5033.219238, avg loss: 2.916118, ppl: 18.469444 +epoch: 2, batch: 7374, sum loss: 4717.965332, avg loss: 2.566902, ppl: 13.025405 +epoch: 2, batch: 7375, sum loss: 4678.664551, avg loss: 2.599258, ppl: 13.453754 +epoch: 2, batch: 7376, sum loss: 4019.399414, avg loss: 2.658333, ppl: 14.272478 +epoch: 2, batch: 7377, sum loss: 4771.069336, avg loss: 2.578956, ppl: 13.183372 +epoch: 2, batch: 7378, sum loss: 3715.919922, avg loss: 2.282506, ppl: 9.801211 +epoch: 2, batch: 7379, sum loss: 3515.814453, avg loss: 2.222386, ppl: 9.229322 +epoch: 2, batch: 7380, sum loss: 3515.632324, avg loss: 2.453337, ppl: 11.627087 +epoch: 2, batch: 7381, sum loss: 4598.061523, avg loss: 2.450992, ppl: 11.599852 +epoch: 2, batch: 7382, sum loss: 4637.183105, avg loss: 2.718161, ppl: 15.152432 +epoch: 2, batch: 7383, sum loss: 3989.178711, avg loss: 2.473142, ppl: 11.859656 +epoch: 2, batch: 7384, sum loss: 4574.642578, avg loss: 2.535833, ppl: 12.626944 +epoch: 2, batch: 7385, sum loss: 3917.665039, avg loss: 2.360039, ppl: 10.591367 +epoch: 2, batch: 7386, sum loss: 5164.633301, avg loss: 2.549177, ppl: 12.796573 +epoch: 2, batch: 7387, sum loss: 4245.800781, avg loss: 2.519763, ppl: 12.425652 +epoch: 2, batch: 7388, sum loss: 4556.179199, avg loss: 2.638205, ppl: 13.988067 +epoch: 2, batch: 7389, sum loss: 3427.703857, avg loss: 2.115867, ppl: 8.296773 +epoch: 2, batch: 7390, sum loss: 4153.803223, avg loss: 2.539000, ppl: 12.666992 +epoch: 2, batch: 7391, sum loss: 4972.027344, avg loss: 2.765310, ppl: 15.883961 +epoch: 2, batch: 7392, sum loss: 3109.869873, avg loss: 2.118440, ppl: 8.318151 +epoch: 2, batch: 7393, sum loss: 4614.475586, avg loss: 2.487588, ppl: 12.032219 +epoch: 2, batch: 7394, sum loss: 4569.610352, avg loss: 2.615690, ppl: 13.676650 +epoch: 2, batch: 7395, sum loss: 4163.794922, avg loss: 2.531182, ppl: 12.568357 +epoch: 2, batch: 7396, sum loss: 4662.233887, avg loss: 2.521489, ppl: 12.447122 +epoch: 2, batch: 7397, sum loss: 4249.833984, avg loss: 2.427090, ppl: 11.325872 +epoch: 2, batch: 7398, sum loss: 4210.173828, avg loss: 2.584514, ppl: 13.256849 +epoch: 2, batch: 7399, sum loss: 4262.912109, avg loss: 2.534431, ppl: 12.609248 +epoch: 2, batch: 7400, sum loss: 3284.273926, avg loss: 2.237244, ppl: 9.367477 +epoch: 2, batch: 7401, sum loss: 5068.867676, avg loss: 2.413747, ppl: 11.175754 +epoch: 2, batch: 7402, sum loss: 5048.127930, avg loss: 2.672381, ppl: 14.474394 +epoch: 2, batch: 7403, sum loss: 3859.588379, avg loss: 2.410736, ppl: 11.142159 +epoch: 2, batch: 7404, sum loss: 4228.540527, avg loss: 2.488841, ppl: 12.047303 +epoch: 2, batch: 7405, sum loss: 4074.461426, avg loss: 2.488981, ppl: 12.048989 +epoch: 2, batch: 7406, sum loss: 4863.843750, avg loss: 2.627684, ppl: 13.841681 +epoch: 2, batch: 7407, sum loss: 4297.383789, avg loss: 2.481169, ppl: 11.955226 +epoch: 2, batch: 7408, sum loss: 4480.275879, avg loss: 2.449577, ppl: 11.583445 +epoch: 2, batch: 7409, sum loss: 4379.568848, avg loss: 2.862463, ppl: 17.504597 +epoch: 2, batch: 7410, sum loss: 3855.654297, avg loss: 2.614003, ppl: 13.653596 +epoch: 2, batch: 7411, sum loss: 4218.843750, avg loss: 2.499315, ppl: 12.174152 +epoch: 2, batch: 7412, sum loss: 4394.895996, avg loss: 2.576141, ppl: 13.146304 +epoch: 2, batch: 7413, sum loss: 4495.995117, avg loss: 2.636947, ppl: 13.970492 +epoch: 2, batch: 7414, sum loss: 3714.748291, avg loss: 2.305865, ppl: 10.032853 +epoch: 2, batch: 7415, sum loss: 4875.678223, avg loss: 2.770272, ppl: 15.962968 +epoch: 2, batch: 7416, sum loss: 4145.304688, avg loss: 2.524546, ppl: 12.485228 +epoch: 2, batch: 7417, sum loss: 4945.164551, avg loss: 2.784440, ppl: 16.190741 +epoch: 2, batch: 7418, sum loss: 4540.555664, avg loss: 2.844960, ppl: 17.200867 +epoch: 2, batch: 7419, sum loss: 4778.245605, avg loss: 2.622528, ppl: 13.770489 +epoch: 2, batch: 7420, sum loss: 4450.118164, avg loss: 2.458629, ppl: 11.688774 +epoch: 2, batch: 7421, sum loss: 5479.019043, avg loss: 2.614036, ppl: 13.654045 +epoch: 2, batch: 7422, sum loss: 4751.211914, avg loss: 2.579377, ppl: 13.188915 +epoch: 2, batch: 7423, sum loss: 3740.329102, avg loss: 2.419359, ppl: 11.238652 +epoch: 2, batch: 7424, sum loss: 3470.853271, avg loss: 2.275969, ppl: 9.737352 +epoch: 2, batch: 7425, sum loss: 4686.177246, avg loss: 2.758198, ppl: 15.771390 +epoch: 2, batch: 7426, sum loss: 5350.570312, avg loss: 3.083902, ppl: 21.843472 +epoch: 2, batch: 7427, sum loss: 4129.866699, avg loss: 2.543021, ppl: 12.718040 +epoch: 2, batch: 7428, sum loss: 4217.562500, avg loss: 2.407285, ppl: 11.103770 +epoch: 2, batch: 7429, sum loss: 4642.174316, avg loss: 2.622697, ppl: 13.772823 +epoch: 2, batch: 7430, sum loss: 4083.764160, avg loss: 2.219437, ppl: 9.202147 +epoch: 2, batch: 7431, sum loss: 4229.107422, avg loss: 2.511347, ppl: 12.321510 +epoch: 2, batch: 7432, sum loss: 3667.957031, avg loss: 2.498608, ppl: 12.165552 +epoch: 2, batch: 7433, sum loss: 3990.511475, avg loss: 2.445166, ppl: 11.532468 +epoch: 2, batch: 7434, sum loss: 3923.385742, avg loss: 2.564305, ppl: 12.991621 +epoch: 2, batch: 7435, sum loss: 2786.329102, avg loss: 2.109257, ppl: 8.242119 +epoch: 2, batch: 7436, sum loss: 3828.255127, avg loss: 2.370437, ppl: 10.702065 +epoch: 2, batch: 7437, sum loss: 3304.455078, avg loss: 2.280507, ppl: 9.781642 +epoch: 2, batch: 7438, sum loss: 3844.343506, avg loss: 2.564605, ppl: 12.995530 +epoch: 2, batch: 7439, sum loss: 4428.371094, avg loss: 2.530498, ppl: 12.559756 +epoch: 2, batch: 7440, sum loss: 5196.678711, avg loss: 2.643275, ppl: 14.059173 +epoch: 2, batch: 7441, sum loss: 4251.722656, avg loss: 2.715021, ppl: 15.104926 +epoch: 2, batch: 7442, sum loss: 3867.358398, avg loss: 2.545990, ppl: 12.755847 +epoch: 2, batch: 7443, sum loss: 4797.346680, avg loss: 2.705779, ppl: 14.965976 +epoch: 2, batch: 7444, sum loss: 3801.879395, avg loss: 2.410830, ppl: 11.143209 +epoch: 2, batch: 7445, sum loss: 3673.399414, avg loss: 2.661884, ppl: 14.323246 +epoch: 2, batch: 7446, sum loss: 3738.963135, avg loss: 2.404478, ppl: 11.072650 +epoch: 2, batch: 7447, sum loss: 4922.580078, avg loss: 2.740858, ppl: 15.500273 +epoch: 2, batch: 7448, sum loss: 4102.472656, avg loss: 2.487855, ppl: 12.035429 +epoch: 2, batch: 7449, sum loss: 4424.496094, avg loss: 2.515347, ppl: 12.370907 +epoch: 2, batch: 7450, sum loss: 3752.953857, avg loss: 2.396522, ppl: 10.984907 +epoch: 2, batch: 7451, sum loss: 4592.112793, avg loss: 2.784786, ppl: 16.196360 +epoch: 2, batch: 7452, sum loss: 3992.025146, avg loss: 2.434162, ppl: 11.406253 +epoch: 2, batch: 7453, sum loss: 5339.894043, avg loss: 2.718887, ppl: 15.163437 +epoch: 2, batch: 7454, sum loss: 3648.741699, avg loss: 2.354027, ppl: 10.527881 +epoch: 2, batch: 7455, sum loss: 4099.070801, avg loss: 2.519404, ppl: 12.421193 +epoch: 2, batch: 7456, sum loss: 4006.401855, avg loss: 2.524513, ppl: 12.484814 +epoch: 2, batch: 7457, sum loss: 4854.761230, avg loss: 2.709130, ppl: 15.016210 +epoch: 2, batch: 7458, sum loss: 4569.248047, avg loss: 2.500957, ppl: 12.194156 +epoch: 2, batch: 7459, sum loss: 4820.846191, avg loss: 2.764246, ppl: 15.867079 +epoch: 2, batch: 7460, sum loss: 3943.693848, avg loss: 2.673691, ppl: 14.493362 +epoch: 2, batch: 7461, sum loss: 3540.937012, avg loss: 2.311317, ppl: 10.087699 +epoch: 2, batch: 7462, sum loss: 4442.976562, avg loss: 2.534499, ppl: 12.610110 +epoch: 2, batch: 7463, sum loss: 4235.057617, avg loss: 2.443772, ppl: 11.516402 +epoch: 2, batch: 7464, sum loss: 3130.468750, avg loss: 2.332689, ppl: 10.305616 +epoch: 2, batch: 7465, sum loss: 4666.895508, avg loss: 2.539116, ppl: 12.668468 +epoch: 2, batch: 7466, sum loss: 4222.016113, avg loss: 2.443296, ppl: 11.510923 +epoch: 2, batch: 7467, sum loss: 3924.415039, avg loss: 2.506012, ppl: 12.255959 +epoch: 2, batch: 7468, sum loss: 4273.872070, avg loss: 2.565349, ppl: 13.005200 +epoch: 2, batch: 7469, sum loss: 5632.346680, avg loss: 2.980078, ppl: 19.689348 +epoch: 2, batch: 7470, sum loss: 4637.926270, avg loss: 2.573766, ppl: 13.115123 +epoch: 2, batch: 7471, sum loss: 4904.745117, avg loss: 2.726373, ppl: 15.277374 +epoch: 2, batch: 7472, sum loss: 3815.938721, avg loss: 2.323958, ppl: 10.216028 +epoch: 2, batch: 7473, sum loss: 4602.478027, avg loss: 2.682097, ppl: 14.615706 +epoch: 2, batch: 7474, sum loss: 4381.790039, avg loss: 2.565451, ppl: 13.006518 +epoch: 2, batch: 7475, sum loss: 3461.288330, avg loss: 2.508180, ppl: 12.282557 +epoch: 2, batch: 7476, sum loss: 3776.444824, avg loss: 2.316837, ppl: 10.143542 +epoch: 2, batch: 7477, sum loss: 4049.038574, avg loss: 2.508698, ppl: 12.288919 +epoch: 2, batch: 7478, sum loss: 4786.153809, avg loss: 3.093829, ppl: 22.061398 +epoch: 2, batch: 7479, sum loss: 4302.797363, avg loss: 2.574984, ppl: 13.131102 +epoch: 2, batch: 7480, sum loss: 3493.503662, avg loss: 2.201326, ppl: 9.036985 +epoch: 2, batch: 7481, sum loss: 3760.576660, avg loss: 2.553005, ppl: 12.845650 +epoch: 2, batch: 7482, sum loss: 4166.054199, avg loss: 2.710510, ppl: 15.036946 +epoch: 2, batch: 7483, sum loss: 3758.971924, avg loss: 2.425143, ppl: 11.303848 +epoch: 2, batch: 7484, sum loss: 4100.065430, avg loss: 2.520015, ppl: 12.428789 +epoch: 2, batch: 7485, sum loss: 3890.369141, avg loss: 2.539406, ppl: 12.672136 +epoch: 2, batch: 7486, sum loss: 3717.945801, avg loss: 2.264279, ppl: 9.624182 +epoch: 2, batch: 7487, sum loss: 4819.841797, avg loss: 2.620904, ppl: 13.748142 +epoch: 2, batch: 7488, sum loss: 4182.637695, avg loss: 2.385988, ppl: 10.869802 +epoch: 2, batch: 7489, sum loss: 3718.418945, avg loss: 2.240011, ppl: 9.393439 +epoch: 2, batch: 7490, sum loss: 4925.402344, avg loss: 2.887106, ppl: 17.941307 +epoch: 2, batch: 7491, sum loss: 4566.659180, avg loss: 2.651951, ppl: 14.181674 +epoch: 2, batch: 7492, sum loss: 4209.919434, avg loss: 2.525446, ppl: 12.496472 +epoch: 2, batch: 7493, sum loss: 3496.903564, avg loss: 2.343769, ppl: 10.420441 +epoch: 2, batch: 7494, sum loss: 4327.862305, avg loss: 2.500209, ppl: 12.185044 +epoch: 2, batch: 7495, sum loss: 4595.079102, avg loss: 2.649988, ppl: 14.153868 +epoch: 2, batch: 7496, sum loss: 4153.138672, avg loss: 2.579589, ppl: 13.191717 +epoch: 2, batch: 7497, sum loss: 3054.593262, avg loss: 2.085047, ppl: 8.044966 +epoch: 2, batch: 7498, sum loss: 4909.728027, avg loss: 2.684379, ppl: 14.649096 +epoch: 2, batch: 7499, sum loss: 4440.405273, avg loss: 2.411953, ppl: 11.155726 +epoch: 2, batch: 7500, sum loss: 4834.055664, avg loss: 2.729563, ppl: 15.326184 +epoch: 2, batch: 7501, sum loss: 4333.652832, avg loss: 2.497783, ppl: 12.155512 +epoch: 2, batch: 7502, sum loss: 3856.424072, avg loss: 2.399766, ppl: 11.020599 +epoch: 2, batch: 7503, sum loss: 4486.305176, avg loss: 2.537503, ppl: 12.648049 +epoch: 2, batch: 7504, sum loss: 3641.181885, avg loss: 2.196129, ppl: 8.990146 +epoch: 2, batch: 7505, sum loss: 4667.548340, avg loss: 2.493349, ppl: 12.101732 +epoch: 2, batch: 7506, sum loss: 4258.241699, avg loss: 2.659739, ppl: 14.292555 +epoch: 2, batch: 7507, sum loss: 5411.576172, avg loss: 2.704436, ppl: 14.945882 +epoch: 2, batch: 7508, sum loss: 4098.877930, avg loss: 2.415367, ppl: 11.193879 +epoch: 2, batch: 7509, sum loss: 5031.926270, avg loss: 2.618068, ppl: 13.709211 +epoch: 2, batch: 7510, sum loss: 3519.020996, avg loss: 2.313623, ppl: 10.110995 +epoch: 2, batch: 7511, sum loss: 3794.627441, avg loss: 2.290059, ppl: 9.875519 +epoch: 2, batch: 7512, sum loss: 4555.328125, avg loss: 2.560611, ppl: 12.943729 +epoch: 2, batch: 7513, sum loss: 4408.658691, avg loss: 2.578163, ppl: 13.172916 +epoch: 2, batch: 7514, sum loss: 3542.116699, avg loss: 2.273502, ppl: 9.713361 +epoch: 2, batch: 7515, sum loss: 5243.145996, avg loss: 2.708236, ppl: 15.002794 +epoch: 2, batch: 7516, sum loss: 4269.883789, avg loss: 2.537067, ppl: 12.642535 +epoch: 2, batch: 7517, sum loss: 3758.158203, avg loss: 2.344453, ppl: 10.427566 +epoch: 2, batch: 7518, sum loss: 3928.775391, avg loss: 2.413253, ppl: 11.170235 +epoch: 2, batch: 7519, sum loss: 5186.347656, avg loss: 2.698412, ppl: 14.856121 +epoch: 2, batch: 7520, sum loss: 4836.059570, avg loss: 2.758734, ppl: 15.779845 +epoch: 2, batch: 7521, sum loss: 4349.635742, avg loss: 2.543647, ppl: 12.725993 +epoch: 2, batch: 7522, sum loss: 3100.267090, avg loss: 2.315360, ppl: 10.128570 +epoch: 2, batch: 7523, sum loss: 4573.586914, avg loss: 2.404620, ppl: 11.074221 +epoch: 2, batch: 7524, sum loss: 3874.065430, avg loss: 2.589616, ppl: 13.324655 +epoch: 2, batch: 7525, sum loss: 4223.958496, avg loss: 2.314498, ppl: 10.119839 +epoch: 2, batch: 7526, sum loss: 5370.520508, avg loss: 2.698754, ppl: 14.861204 +epoch: 2, batch: 7527, sum loss: 3970.158203, avg loss: 2.569682, ppl: 13.061668 +epoch: 2, batch: 7528, sum loss: 4792.453125, avg loss: 2.641926, ppl: 14.040216 +epoch: 2, batch: 7529, sum loss: 4346.228027, avg loss: 2.596313, ppl: 13.414192 +epoch: 2, batch: 7530, sum loss: 3667.697021, avg loss: 2.570215, ppl: 13.068637 +epoch: 2, batch: 7531, sum loss: 3732.130371, avg loss: 2.240174, ppl: 9.394969 +epoch: 2, batch: 7532, sum loss: 4645.892578, avg loss: 2.978136, ppl: 19.651159 +epoch: 2, batch: 7533, sum loss: 4474.979004, avg loss: 2.750448, ppl: 15.649641 +epoch: 2, batch: 7534, sum loss: 5049.768555, avg loss: 2.696086, ppl: 14.821602 +epoch: 2, batch: 7535, sum loss: 3390.041260, avg loss: 2.175893, ppl: 8.810050 +epoch: 2, batch: 7536, sum loss: 4743.470215, avg loss: 2.672377, ppl: 14.474339 +epoch: 2, batch: 7537, sum loss: 3877.895020, avg loss: 2.554608, ppl: 12.866257 +epoch: 2, batch: 7538, sum loss: 3577.048340, avg loss: 2.262523, ppl: 9.607295 +epoch: 2, batch: 7539, sum loss: 5963.155273, avg loss: 2.868281, ppl: 17.606720 +epoch: 2, batch: 7540, sum loss: 4781.605957, avg loss: 2.617190, ppl: 13.697179 +epoch: 2, batch: 7541, sum loss: 4714.237793, avg loss: 2.850204, ppl: 17.291313 +epoch: 2, batch: 7542, sum loss: 4104.637207, avg loss: 2.387805, ppl: 10.889570 +epoch: 2, batch: 7543, sum loss: 4232.703613, avg loss: 2.483981, ppl: 11.988899 +epoch: 2, batch: 7544, sum loss: 3870.999268, avg loss: 2.541694, ppl: 12.701164 +epoch: 2, batch: 7545, sum loss: 3261.993896, avg loss: 2.384499, ppl: 10.853620 +epoch: 2, batch: 7546, sum loss: 5683.501953, avg loss: 2.893840, ppl: 18.062542 +epoch: 2, batch: 7547, sum loss: 4311.785156, avg loss: 2.586554, ppl: 13.283914 +epoch: 2, batch: 7548, sum loss: 4092.672119, avg loss: 2.444846, ppl: 11.528776 +epoch: 2, batch: 7549, sum loss: 4871.636230, avg loss: 2.589918, ppl: 13.328680 +epoch: 2, batch: 7550, sum loss: 4011.788330, avg loss: 2.352955, ppl: 10.516602 +epoch: 2, batch: 7551, sum loss: 5183.230469, avg loss: 2.624420, ppl: 13.796576 +epoch: 2, batch: 7552, sum loss: 3435.601074, avg loss: 2.279762, ppl: 9.774352 +epoch: 2, batch: 7553, sum loss: 4029.365234, avg loss: 2.437608, ppl: 11.445624 +epoch: 2, batch: 7554, sum loss: 3814.357666, avg loss: 2.421814, ppl: 11.266280 +epoch: 2, batch: 7555, sum loss: 4456.015137, avg loss: 2.740477, ppl: 15.494369 +epoch: 2, batch: 7556, sum loss: 3966.744629, avg loss: 2.444082, ppl: 11.519967 +epoch: 2, batch: 7557, sum loss: 4294.303223, avg loss: 2.293966, ppl: 9.914175 +epoch: 2, batch: 7558, sum loss: 3251.309326, avg loss: 2.501007, ppl: 12.194769 +epoch: 2, batch: 7559, sum loss: 4529.429199, avg loss: 2.557554, ppl: 12.904222 +epoch: 2, batch: 7560, sum loss: 4817.184082, avg loss: 2.610940, ppl: 13.611836 +epoch: 2, batch: 7561, sum loss: 4795.785645, avg loss: 2.831042, ppl: 16.963133 +epoch: 2, batch: 7562, sum loss: 5151.312012, avg loss: 2.958824, ppl: 19.275284 +epoch: 2, batch: 7563, sum loss: 5388.939941, avg loss: 2.622355, ppl: 13.768112 +epoch: 2, batch: 7564, sum loss: 4338.806152, avg loss: 2.698262, ppl: 14.853900 +epoch: 2, batch: 7565, sum loss: 4620.146973, avg loss: 2.578207, ppl: 13.173497 +epoch: 2, batch: 7566, sum loss: 4129.798828, avg loss: 2.477384, ppl: 11.910065 +epoch: 2, batch: 7567, sum loss: 4692.298828, avg loss: 2.586714, ppl: 13.286039 +epoch: 2, batch: 7568, sum loss: 4202.051270, avg loss: 2.920119, ppl: 18.543491 +epoch: 2, batch: 7569, sum loss: 4701.291016, avg loss: 2.492731, ppl: 12.094265 +epoch: 2, batch: 7570, sum loss: 4765.967285, avg loss: 2.614354, ppl: 13.658392 +epoch: 2, batch: 7571, sum loss: 3670.162109, avg loss: 2.471489, ppl: 11.840069 +epoch: 2, batch: 7572, sum loss: 3941.776367, avg loss: 2.390404, ppl: 10.917904 +epoch: 2, batch: 7573, sum loss: 3287.020264, avg loss: 2.209019, ppl: 9.106778 +epoch: 2, batch: 7574, sum loss: 3926.297363, avg loss: 2.429639, ppl: 11.354786 +epoch: 2, batch: 7575, sum loss: 4095.413086, avg loss: 2.402002, ppl: 11.045265 +epoch: 2, batch: 7576, sum loss: 4328.020508, avg loss: 2.314450, ppl: 10.119351 +epoch: 2, batch: 7577, sum loss: 4946.006836, avg loss: 2.781781, ppl: 16.147758 +epoch: 2, batch: 7578, sum loss: 4567.216309, avg loss: 2.537343, ppl: 12.646020 +epoch: 2, batch: 7579, sum loss: 3974.497803, avg loss: 2.465569, ppl: 11.770181 +epoch: 2, batch: 7580, sum loss: 4155.585938, avg loss: 2.523124, ppl: 12.467487 +epoch: 2, batch: 7581, sum loss: 3996.768555, avg loss: 2.234080, ppl: 9.337883 +epoch: 2, batch: 7582, sum loss: 4059.892090, avg loss: 2.641439, ppl: 14.033386 +epoch: 2, batch: 7583, sum loss: 4859.422363, avg loss: 2.723891, ppl: 15.239511 +epoch: 2, batch: 7584, sum loss: 4393.705078, avg loss: 2.544126, ppl: 12.732093 +epoch: 2, batch: 7585, sum loss: 3920.540039, avg loss: 2.584403, ppl: 13.255377 +epoch: 2, batch: 7586, sum loss: 4208.589844, avg loss: 2.567779, ppl: 13.036835 +epoch: 2, batch: 7587, sum loss: 4560.459961, avg loss: 2.563496, ppl: 12.981125 +epoch: 2, batch: 7588, sum loss: 3936.316406, avg loss: 2.362735, ppl: 10.619955 +epoch: 2, batch: 7589, sum loss: 4865.415527, avg loss: 2.794610, ppl: 16.356245 +epoch: 2, batch: 7590, sum loss: 4152.370117, avg loss: 2.549030, ppl: 12.794688 +epoch: 2, batch: 7591, sum loss: 3237.708496, avg loss: 2.234443, ppl: 9.341281 +epoch: 2, batch: 7592, sum loss: 3711.602295, avg loss: 2.396128, ppl: 10.980577 +epoch: 2, batch: 7593, sum loss: 4064.021729, avg loss: 2.443789, ppl: 11.516600 +epoch: 2, batch: 7594, sum loss: 4256.432617, avg loss: 2.371272, ppl: 10.711004 +epoch: 2, batch: 7595, sum loss: 4183.530762, avg loss: 2.540092, ppl: 12.680831 +epoch: 2, batch: 7596, sum loss: 4684.441895, avg loss: 2.571044, ppl: 13.079472 +epoch: 2, batch: 7597, sum loss: 4548.575195, avg loss: 2.638385, ppl: 13.990588 +epoch: 2, batch: 7598, sum loss: 4235.354004, avg loss: 2.513563, ppl: 12.348853 +epoch: 2, batch: 7599, sum loss: 3809.034912, avg loss: 2.607142, ppl: 13.560246 +epoch: 2, batch: 7600, sum loss: 4023.276611, avg loss: 2.489651, ppl: 12.057073 +epoch: 2, batch: 7601, sum loss: 3862.396240, avg loss: 2.710453, ppl: 15.036089 +epoch: 2, batch: 7602, sum loss: 4506.205078, avg loss: 2.618364, ppl: 13.713275 +epoch: 2, batch: 7603, sum loss: 4160.604004, avg loss: 2.395282, ppl: 10.971287 +epoch: 2, batch: 7604, sum loss: 3982.050781, avg loss: 2.444476, ppl: 11.524506 +epoch: 2, batch: 7605, sum loss: 4481.135742, avg loss: 2.645298, ppl: 14.087649 +epoch: 2, batch: 7606, sum loss: 4058.967041, avg loss: 2.535270, ppl: 12.619838 +epoch: 2, batch: 7607, sum loss: 4264.214844, avg loss: 2.371643, ppl: 10.714986 +epoch: 2, batch: 7608, sum loss: 4263.613281, avg loss: 2.510962, ppl: 12.316773 +epoch: 2, batch: 7609, sum loss: 4704.656738, avg loss: 2.563846, ppl: 12.985662 +epoch: 2, batch: 7610, sum loss: 3310.500977, avg loss: 2.163726, ppl: 8.703507 +epoch: 2, batch: 7611, sum loss: 4500.194824, avg loss: 2.549685, ppl: 12.803077 +epoch: 2, batch: 7612, sum loss: 3960.409912, avg loss: 2.449233, ppl: 11.579465 +epoch: 2, batch: 7613, sum loss: 4987.073730, avg loss: 2.623395, ppl: 13.782435 +epoch: 2, batch: 7614, sum loss: 5282.044922, avg loss: 2.972451, ppl: 19.539747 +epoch: 2, batch: 7615, sum loss: 4395.902344, avg loss: 2.555757, ppl: 12.881050 +epoch: 2, batch: 7616, sum loss: 3932.116211, avg loss: 2.306226, ppl: 10.036480 +epoch: 2, batch: 7617, sum loss: 3979.156250, avg loss: 2.309435, ppl: 10.068733 +epoch: 2, batch: 7618, sum loss: 4198.827148, avg loss: 2.535524, ppl: 12.623039 +epoch: 2, batch: 7619, sum loss: 3815.898682, avg loss: 2.356948, ppl: 10.558676 +epoch: 2, batch: 7620, sum loss: 4452.449707, avg loss: 2.465365, ppl: 11.767781 +epoch: 2, batch: 7621, sum loss: 4277.769531, avg loss: 2.772372, ppl: 15.996530 +epoch: 2, batch: 7622, sum loss: 3689.033691, avg loss: 2.501040, ppl: 12.195168 +epoch: 2, batch: 7623, sum loss: 4119.325195, avg loss: 2.569760, ppl: 13.062687 +epoch: 2, batch: 7624, sum loss: 4096.879883, avg loss: 2.496575, ppl: 12.140841 +epoch: 2, batch: 7625, sum loss: 4129.256836, avg loss: 2.400731, ppl: 11.031233 +epoch: 2, batch: 7626, sum loss: 3981.012207, avg loss: 2.365426, ppl: 10.648575 +epoch: 2, batch: 7627, sum loss: 4317.587402, avg loss: 2.504401, ppl: 12.236230 +epoch: 2, batch: 7628, sum loss: 5236.751465, avg loss: 2.786989, ppl: 16.232067 +epoch: 2, batch: 7629, sum loss: 4036.062012, avg loss: 2.459514, ppl: 11.699121 +epoch: 2, batch: 7630, sum loss: 3543.688965, avg loss: 2.343710, ppl: 10.419820 +epoch: 2, batch: 7631, sum loss: 3981.667236, avg loss: 2.381380, ppl: 10.819825 +epoch: 2, batch: 7632, sum loss: 4112.972656, avg loss: 2.648405, ppl: 14.131478 +epoch: 2, batch: 7633, sum loss: 4364.064453, avg loss: 2.544644, ppl: 12.738690 +epoch: 2, batch: 7634, sum loss: 5340.431641, avg loss: 2.918269, ppl: 18.509214 +epoch: 2, batch: 7635, sum loss: 3975.897705, avg loss: 2.400904, ppl: 11.033151 +epoch: 2, batch: 7636, sum loss: 4229.358887, avg loss: 2.440484, ppl: 11.478596 +epoch: 2, batch: 7637, sum loss: 4571.255859, avg loss: 2.651541, ppl: 14.175863 +epoch: 2, batch: 7638, sum loss: 4621.255859, avg loss: 2.715191, ppl: 15.107501 +epoch: 2, batch: 7639, sum loss: 4106.710938, avg loss: 2.451768, ppl: 11.608852 +epoch: 2, batch: 7640, sum loss: 3687.410889, avg loss: 2.437152, ppl: 11.440413 +epoch: 2, batch: 7641, sum loss: 4542.952148, avg loss: 2.835800, ppl: 17.044033 +epoch: 2, batch: 7642, sum loss: 4146.917969, avg loss: 2.753598, ppl: 15.699011 +epoch: 2, batch: 7643, sum loss: 4118.508301, avg loss: 2.275419, ppl: 9.731996 +epoch: 2, batch: 7644, sum loss: 3615.600586, avg loss: 2.413618, ppl: 11.174320 +epoch: 2, batch: 7645, sum loss: 3969.467285, avg loss: 2.343251, ppl: 10.415041 +epoch: 2, batch: 7646, sum loss: 3712.314941, avg loss: 2.499875, ppl: 12.180975 +epoch: 2, batch: 7647, sum loss: 3890.053467, avg loss: 2.423709, ppl: 11.287649 +epoch: 2, batch: 7648, sum loss: 4876.833008, avg loss: 2.777240, ppl: 16.074591 +epoch: 2, batch: 7649, sum loss: 3850.174561, avg loss: 2.308258, ppl: 10.056893 +epoch: 2, batch: 7650, sum loss: 4731.307617, avg loss: 2.503337, ppl: 12.223220 +epoch: 2, batch: 7651, sum loss: 4229.411621, avg loss: 2.486427, ppl: 12.018253 +epoch: 2, batch: 7652, sum loss: 3580.655273, avg loss: 2.239309, ppl: 9.386844 +epoch: 2, batch: 7653, sum loss: 4260.384766, avg loss: 2.597795, ppl: 13.434090 +epoch: 2, batch: 7654, sum loss: 4507.777832, avg loss: 2.507107, ppl: 12.269378 +epoch: 2, batch: 7655, sum loss: 4399.092285, avg loss: 2.465859, ppl: 11.773591 +epoch: 2, batch: 7656, sum loss: 4553.275879, avg loss: 2.627395, ppl: 13.837678 +epoch: 2, batch: 7657, sum loss: 4643.794922, avg loss: 2.767458, ppl: 15.918122 +epoch: 2, batch: 7658, sum loss: 4241.518066, avg loss: 2.446089, ppl: 11.543111 +epoch: 2, batch: 7659, sum loss: 4058.920166, avg loss: 2.645971, ppl: 14.097131 +epoch: 2, batch: 7660, sum loss: 4491.811523, avg loss: 2.728926, ppl: 15.316423 +epoch: 2, batch: 7661, sum loss: 3253.338135, avg loss: 2.225266, ppl: 9.255939 +epoch: 2, batch: 7662, sum loss: 3449.261963, avg loss: 2.338483, ppl: 10.365497 +epoch: 2, batch: 7663, sum loss: 3952.583984, avg loss: 2.467281, ppl: 11.790343 +epoch: 2, batch: 7664, sum loss: 4531.775391, avg loss: 2.602973, ppl: 13.503822 +epoch: 2, batch: 7665, sum loss: 4781.655273, avg loss: 2.630173, ppl: 13.876177 +epoch: 2, batch: 7666, sum loss: 4849.776855, avg loss: 2.571462, ppl: 13.084940 +epoch: 2, batch: 7667, sum loss: 5327.980469, avg loss: 2.677377, ppl: 14.546886 +epoch: 2, batch: 7668, sum loss: 3845.215576, avg loss: 2.253936, ppl: 9.525156 +epoch: 2, batch: 7669, sum loss: 4431.236328, avg loss: 2.642359, ppl: 14.046300 +epoch: 2, batch: 7670, sum loss: 3840.704102, avg loss: 2.420104, ppl: 11.247024 +epoch: 2, batch: 7671, sum loss: 4176.974609, avg loss: 2.470121, ppl: 11.823879 +epoch: 2, batch: 7672, sum loss: 3955.320312, avg loss: 2.384159, ppl: 10.849937 +epoch: 2, batch: 7673, sum loss: 4866.767578, avg loss: 2.595609, ppl: 13.404754 +epoch: 2, batch: 7674, sum loss: 4547.379395, avg loss: 2.513753, ppl: 12.351202 +epoch: 2, batch: 7675, sum loss: 4376.476562, avg loss: 2.636432, ppl: 13.963289 +epoch: 2, batch: 7676, sum loss: 4183.728027, avg loss: 2.303815, ppl: 10.012305 +epoch: 2, batch: 7677, sum loss: 3617.230469, avg loss: 2.419552, ppl: 11.240824 +epoch: 2, batch: 7678, sum loss: 3596.338623, avg loss: 2.394367, ppl: 10.961252 +epoch: 2, batch: 7679, sum loss: 4488.965820, avg loss: 2.753967, ppl: 15.704806 +epoch: 2, batch: 7680, sum loss: 4577.626953, avg loss: 2.656777, ppl: 14.250288 +epoch: 2, batch: 7681, sum loss: 4159.405273, avg loss: 2.449591, ppl: 11.583607 +epoch: 2, batch: 7682, sum loss: 3817.833984, avg loss: 2.231347, ppl: 9.312400 +epoch: 2, batch: 7683, sum loss: 4592.835938, avg loss: 2.498822, ppl: 12.168157 +epoch: 2, batch: 7684, sum loss: 3643.263916, avg loss: 2.308786, ppl: 10.062199 +epoch: 2, batch: 7685, sum loss: 3941.325195, avg loss: 2.572667, ppl: 13.100713 +epoch: 2, batch: 7686, sum loss: 4087.472656, avg loss: 2.532511, ppl: 12.585068 +epoch: 2, batch: 7687, sum loss: 3463.288574, avg loss: 2.253278, ppl: 9.518890 +epoch: 2, batch: 7688, sum loss: 3834.691406, avg loss: 2.478792, ppl: 11.926850 +epoch: 2, batch: 7689, sum loss: 4634.785156, avg loss: 2.643916, ppl: 14.068192 +epoch: 2, batch: 7690, sum loss: 5491.094238, avg loss: 2.778894, ppl: 16.101202 +epoch: 2, batch: 7691, sum loss: 4014.790527, avg loss: 2.479797, ppl: 11.938836 +epoch: 2, batch: 7692, sum loss: 4577.362305, avg loss: 2.468912, ppl: 11.809587 +epoch: 2, batch: 7693, sum loss: 4269.451172, avg loss: 2.539828, ppl: 12.677491 +epoch: 2, batch: 7694, sum loss: 3993.077148, avg loss: 2.742498, ppl: 15.525723 +epoch: 2, batch: 7695, sum loss: 4112.824219, avg loss: 2.304103, ppl: 10.015194 +epoch: 2, batch: 7696, sum loss: 3909.073975, avg loss: 2.410033, ppl: 11.134331 +epoch: 2, batch: 7697, sum loss: 4454.772949, avg loss: 2.549956, ppl: 12.806539 +epoch: 2, batch: 7698, sum loss: 4501.251953, avg loss: 2.610935, ppl: 13.611774 +epoch: 2, batch: 7699, sum loss: 3677.521973, avg loss: 2.206072, ppl: 9.079979 +epoch: 2, batch: 7700, sum loss: 4528.541992, avg loss: 2.575962, ppl: 13.143960 +epoch: 2, batch: 7701, sum loss: 4298.682617, avg loss: 2.638848, ppl: 13.997064 +epoch: 2, batch: 7702, sum loss: 3542.529297, avg loss: 2.482501, ppl: 11.971170 +epoch: 2, batch: 7703, sum loss: 4321.583008, avg loss: 2.664354, ppl: 14.358676 +epoch: 2, batch: 7704, sum loss: 3777.181396, avg loss: 2.360738, ppl: 10.598773 +epoch: 2, batch: 7705, sum loss: 4021.858154, avg loss: 2.437490, ppl: 11.444277 +epoch: 2, batch: 7706, sum loss: 4054.221924, avg loss: 2.692046, ppl: 14.761854 +epoch: 2, batch: 7707, sum loss: 4071.325684, avg loss: 2.773383, ppl: 16.012709 +epoch: 2, batch: 7708, sum loss: 4170.998047, avg loss: 2.391627, ppl: 10.931268 +epoch: 2, batch: 7709, sum loss: 5465.147461, avg loss: 2.627475, ppl: 13.838779 +epoch: 2, batch: 7710, sum loss: 5023.682617, avg loss: 2.578893, ppl: 13.182533 +epoch: 2, batch: 7711, sum loss: 4070.765869, avg loss: 2.384749, ppl: 10.856334 +epoch: 2, batch: 7712, sum loss: 4483.033691, avg loss: 2.457803, ppl: 11.679130 +epoch: 2, batch: 7713, sum loss: 4842.282227, avg loss: 2.700660, ppl: 14.889548 +epoch: 2, batch: 7714, sum loss: 4014.583008, avg loss: 2.479668, ppl: 11.937305 +epoch: 2, batch: 7715, sum loss: 3575.267822, avg loss: 2.743874, ppl: 15.547095 +epoch: 2, batch: 7716, sum loss: 4029.515625, avg loss: 2.389986, ppl: 10.913337 +epoch: 2, batch: 7717, sum loss: 3877.809326, avg loss: 2.665161, ppl: 14.370261 +epoch: 2, batch: 7718, sum loss: 4263.681641, avg loss: 2.648249, ppl: 14.129282 +epoch: 2, batch: 7719, sum loss: 3448.285645, avg loss: 2.147127, ppl: 8.560226 +epoch: 2, batch: 7720, sum loss: 4140.884766, avg loss: 2.581599, ppl: 13.218257 +epoch: 2, batch: 7721, sum loss: 4360.331055, avg loss: 2.838757, ppl: 17.094509 +epoch: 2, batch: 7722, sum loss: 5179.968750, avg loss: 2.589984, ppl: 13.329564 +epoch: 2, batch: 7723, sum loss: 4794.610352, avg loss: 2.752359, ppl: 15.679583 +epoch: 2, batch: 7724, sum loss: 5283.333496, avg loss: 2.842030, ppl: 17.150543 +epoch: 2, batch: 7725, sum loss: 5754.620605, avg loss: 2.980125, ppl: 19.690271 +epoch: 2, batch: 7726, sum loss: 4233.710449, avg loss: 2.547359, ppl: 12.773325 +epoch: 2, batch: 7727, sum loss: 3300.639893, avg loss: 2.215195, ppl: 9.163193 +epoch: 2, batch: 7728, sum loss: 3973.533203, avg loss: 2.524481, ppl: 12.484415 +epoch: 2, batch: 7729, sum loss: 4203.651855, avg loss: 2.449681, ppl: 11.584645 +epoch: 2, batch: 7730, sum loss: 3979.340576, avg loss: 2.445815, ppl: 11.539947 +epoch: 2, batch: 7731, sum loss: 4810.000000, avg loss: 2.399002, ppl: 11.012184 +epoch: 2, batch: 7732, sum loss: 4513.905762, avg loss: 2.320774, ppl: 10.183557 +epoch: 2, batch: 7733, sum loss: 3652.497559, avg loss: 2.414077, ppl: 11.179442 +epoch: 2, batch: 7734, sum loss: 5451.772949, avg loss: 2.768803, ppl: 15.939541 +epoch: 2, batch: 7735, sum loss: 3476.349609, avg loss: 2.451586, ppl: 11.606746 +epoch: 2, batch: 7736, sum loss: 3724.391602, avg loss: 2.538781, ppl: 12.664220 +epoch: 2, batch: 7737, sum loss: 4488.278809, avg loss: 2.635513, ppl: 13.950468 +epoch: 2, batch: 7738, sum loss: 4977.680176, avg loss: 2.728991, ppl: 15.317428 +epoch: 2, batch: 7739, sum loss: 4060.219971, avg loss: 2.413924, ppl: 11.177733 +epoch: 2, batch: 7740, sum loss: 4117.348633, avg loss: 2.395200, ppl: 10.970392 +epoch: 2, batch: 7741, sum loss: 4313.544434, avg loss: 2.537379, ppl: 12.646484 +epoch: 2, batch: 7742, sum loss: 4362.320801, avg loss: 2.540664, ppl: 12.688099 +epoch: 2, batch: 7743, sum loss: 4179.037598, avg loss: 2.389387, ppl: 10.906805 +epoch: 2, batch: 7744, sum loss: 4027.089355, avg loss: 2.364703, ppl: 10.640880 +epoch: 2, batch: 7745, sum loss: 3713.872559, avg loss: 2.352041, ppl: 10.506990 +epoch: 2, batch: 7746, sum loss: 3613.221924, avg loss: 2.490160, ppl: 12.063203 +epoch: 2, batch: 7747, sum loss: 4551.931152, avg loss: 2.717571, ppl: 15.143495 +epoch: 2, batch: 7748, sum loss: 4666.020508, avg loss: 2.639152, ppl: 14.001327 +epoch: 2, batch: 7749, sum loss: 3536.570557, avg loss: 2.348320, ppl: 10.467974 +epoch: 2, batch: 7750, sum loss: 4916.094238, avg loss: 2.628927, ppl: 13.858898 +epoch: 2, batch: 7751, sum loss: 4007.307617, avg loss: 2.558945, ppl: 12.922174 +epoch: 2, batch: 7752, sum loss: 4317.611328, avg loss: 2.542763, ppl: 12.714753 +epoch: 2, batch: 7753, sum loss: 4505.075195, avg loss: 2.637632, ppl: 13.980055 +epoch: 2, batch: 7754, sum loss: 4101.486328, avg loss: 2.534911, ppl: 12.615310 +epoch: 2, batch: 7755, sum loss: 5235.743652, avg loss: 2.793887, ppl: 16.344421 +epoch: 2, batch: 7756, sum loss: 4317.500488, avg loss: 2.580694, ppl: 13.206297 +epoch: 2, batch: 7757, sum loss: 3681.092285, avg loss: 2.449163, ppl: 11.578653 +epoch: 2, batch: 7758, sum loss: 4239.953613, avg loss: 2.472276, ppl: 11.849385 +epoch: 2, batch: 7759, sum loss: 3704.126465, avg loss: 2.306430, ppl: 10.038522 +epoch: 2, batch: 7760, sum loss: 4163.786133, avg loss: 2.339206, ppl: 10.372994 +epoch: 2, batch: 7761, sum loss: 4398.451660, avg loss: 2.843214, ppl: 17.170860 +epoch: 2, batch: 7762, sum loss: 3536.863037, avg loss: 2.302645, ppl: 10.000601 +epoch: 2, batch: 7763, sum loss: 3913.861572, avg loss: 2.536527, ppl: 12.635715 +epoch: 2, batch: 7764, sum loss: 4063.013672, avg loss: 2.320396, ppl: 10.179707 +epoch: 2, batch: 7765, sum loss: 4575.333008, avg loss: 2.749599, ppl: 15.636364 +epoch: 2, batch: 7766, sum loss: 4403.665039, avg loss: 2.509211, ppl: 12.295223 +epoch: 2, batch: 7767, sum loss: 4053.708252, avg loss: 2.546299, ppl: 12.759795 +epoch: 2, batch: 7768, sum loss: 4257.412598, avg loss: 2.586520, ppl: 13.283470 +epoch: 2, batch: 7769, sum loss: 4144.215820, avg loss: 2.480081, ppl: 11.942236 +epoch: 2, batch: 7770, sum loss: 5383.479492, avg loss: 2.609539, ppl: 13.592787 +epoch: 2, batch: 7771, sum loss: 3416.952637, avg loss: 2.245041, ppl: 9.440806 +epoch: 2, batch: 7772, sum loss: 3991.315186, avg loss: 2.353370, ppl: 10.520966 +epoch: 2, batch: 7773, sum loss: 4608.094727, avg loss: 2.551548, ppl: 12.826939 +epoch: 2, batch: 7774, sum loss: 3691.979004, avg loss: 2.489534, ppl: 12.055658 +epoch: 2, batch: 7775, sum loss: 4126.458008, avg loss: 2.575816, ppl: 13.142042 +epoch: 2, batch: 7776, sum loss: 4468.098145, avg loss: 2.438918, ppl: 11.460638 +epoch: 2, batch: 7777, sum loss: 4395.900879, avg loss: 2.408713, ppl: 11.119639 +epoch: 2, batch: 7778, sum loss: 3710.678467, avg loss: 2.573286, ppl: 13.108830 +epoch: 2, batch: 7779, sum loss: 4151.401855, avg loss: 2.376303, ppl: 10.765033 +epoch: 2, batch: 7780, sum loss: 3836.451660, avg loss: 2.236998, ppl: 9.365173 +epoch: 2, batch: 7781, sum loss: 3882.599609, avg loss: 2.490443, ppl: 12.066614 +epoch: 2, batch: 7782, sum loss: 4292.045898, avg loss: 2.498281, ppl: 12.161565 +epoch: 2, batch: 7783, sum loss: 4086.945312, avg loss: 2.562348, ppl: 12.966228 +epoch: 2, batch: 7784, sum loss: 4056.705566, avg loss: 2.400418, ppl: 11.027781 +epoch: 2, batch: 7785, sum loss: 4494.360352, avg loss: 2.537753, ppl: 12.651210 +epoch: 2, batch: 7786, sum loss: 4624.705566, avg loss: 2.699770, ppl: 14.876306 +epoch: 2, batch: 7787, sum loss: 4473.377930, avg loss: 2.827672, ppl: 16.906065 +epoch: 2, batch: 7788, sum loss: 3757.773438, avg loss: 2.357449, ppl: 10.563967 +epoch: 2, batch: 7789, sum loss: 3603.677734, avg loss: 2.552180, ppl: 12.835049 +epoch: 2, batch: 7790, sum loss: 5364.641113, avg loss: 2.799917, ppl: 16.443285 +epoch: 2, batch: 7791, sum loss: 4343.913086, avg loss: 2.477988, ppl: 11.917266 +epoch: 2, batch: 7792, sum loss: 3328.890137, avg loss: 2.475011, ppl: 11.881842 +epoch: 2, batch: 7793, sum loss: 5937.603516, avg loss: 2.910590, ppl: 18.367626 +epoch: 2, batch: 7794, sum loss: 4389.550293, avg loss: 2.591234, ppl: 13.346231 +epoch: 2, batch: 7795, sum loss: 3961.012695, avg loss: 2.369027, ppl: 10.686988 +epoch: 2, batch: 7796, sum loss: 4318.052734, avg loss: 2.495984, ppl: 12.133668 +epoch: 2, batch: 7797, sum loss: 5783.024414, avg loss: 2.804571, ppl: 16.519981 +epoch: 2, batch: 7798, sum loss: 4853.295898, avg loss: 2.688807, ppl: 14.714105 +epoch: 2, batch: 7799, sum loss: 4517.272949, avg loss: 2.658783, ppl: 14.278908 +epoch: 2, batch: 7800, sum loss: 4773.954102, avg loss: 2.566642, ppl: 13.022023 +epoch: 2, batch: 7801, sum loss: 4562.755371, avg loss: 2.458381, ppl: 11.685879 +epoch: 2, batch: 7802, sum loss: 4346.797363, avg loss: 2.508250, ppl: 12.283415 +epoch: 2, batch: 7803, sum loss: 4534.160156, avg loss: 2.349306, ppl: 10.478293 +epoch: 2, batch: 7804, sum loss: 4662.037598, avg loss: 2.590021, ppl: 13.330050 +epoch: 2, batch: 7805, sum loss: 4297.032227, avg loss: 2.556236, ppl: 12.887213 +epoch: 2, batch: 7806, sum loss: 4460.144531, avg loss: 2.817526, ppl: 16.735401 +epoch: 2, batch: 7807, sum loss: 3687.325684, avg loss: 2.330800, ppl: 10.286165 +epoch: 2, batch: 7808, sum loss: 3914.060303, avg loss: 2.380815, ppl: 10.813715 +epoch: 2, batch: 7809, sum loss: 3654.647461, avg loss: 2.317468, ppl: 10.149946 +epoch: 2, batch: 7810, sum loss: 4140.274414, avg loss: 2.587672, ppl: 13.298770 +epoch: 2, batch: 7811, sum loss: 4535.965332, avg loss: 2.571409, ppl: 13.084247 +epoch: 2, batch: 7812, sum loss: 4327.956543, avg loss: 2.490194, ppl: 12.063611 +epoch: 2, batch: 7813, sum loss: 4261.057617, avg loss: 2.601378, ppl: 13.482306 +epoch: 2, batch: 7814, sum loss: 4795.191406, avg loss: 2.589196, ppl: 13.319061 +epoch: 2, batch: 7815, sum loss: 4546.708496, avg loss: 2.655788, ppl: 14.236196 +epoch: 2, batch: 7816, sum loss: 4084.347656, avg loss: 2.488938, ppl: 12.048475 +epoch: 2, batch: 7817, sum loss: 4711.376953, avg loss: 2.789448, ppl: 16.272028 +epoch: 2, batch: 7818, sum loss: 3237.016357, avg loss: 2.221699, ppl: 9.222992 +epoch: 2, batch: 7819, sum loss: 4750.855469, avg loss: 2.682584, ppl: 14.622824 +epoch: 2, batch: 7820, sum loss: 4649.352051, avg loss: 2.555994, ppl: 12.884094 +epoch: 2, batch: 7821, sum loss: 4579.299316, avg loss: 2.603354, ppl: 13.508971 +epoch: 2, batch: 7822, sum loss: 4034.531982, avg loss: 2.460080, ppl: 11.705752 +epoch: 2, batch: 7823, sum loss: 4716.577148, avg loss: 2.658724, ppl: 14.278063 +epoch: 2, batch: 7824, sum loss: 4044.955566, avg loss: 2.417786, ppl: 11.220987 +epoch: 2, batch: 7825, sum loss: 5529.688965, avg loss: 2.802681, ppl: 16.488791 +epoch: 2, batch: 7826, sum loss: 4724.009766, avg loss: 2.730641, ppl: 15.342724 +epoch: 2, batch: 7827, sum loss: 4683.399902, avg loss: 2.772883, ppl: 16.004715 +epoch: 2, batch: 7828, sum loss: 4572.526855, avg loss: 2.578978, ppl: 13.183652 +epoch: 2, batch: 7829, sum loss: 4518.784180, avg loss: 2.492435, ppl: 12.090681 +epoch: 2, batch: 7830, sum loss: 3965.698975, avg loss: 2.356327, ppl: 10.552125 +epoch: 2, batch: 7831, sum loss: 3920.508057, avg loss: 2.279365, ppl: 9.770474 +epoch: 2, batch: 7832, sum loss: 4008.553711, avg loss: 2.494433, ppl: 12.114861 +epoch: 2, batch: 7833, sum loss: 4272.909180, avg loss: 2.621417, ppl: 13.755195 +epoch: 2, batch: 7834, sum loss: 4297.794922, avg loss: 2.582809, ppl: 13.234267 +epoch: 2, batch: 7835, sum loss: 3924.421875, avg loss: 2.423979, ppl: 11.290696 +epoch: 2, batch: 7836, sum loss: 4209.955078, avg loss: 2.667906, ppl: 14.409758 +epoch: 2, batch: 7837, sum loss: 4340.768555, avg loss: 2.529585, ppl: 12.548302 +epoch: 2, batch: 7838, sum loss: 3944.757568, avg loss: 2.312284, ppl: 10.097466 +epoch: 2, batch: 7839, sum loss: 4277.736816, avg loss: 2.466976, ppl: 11.786752 +epoch: 2, batch: 7840, sum loss: 4385.352539, avg loss: 2.477600, ppl: 11.912643 +epoch: 2, batch: 7841, sum loss: 5233.034668, avg loss: 2.678114, ppl: 14.557611 +epoch: 2, batch: 7842, sum loss: 3422.123535, avg loss: 2.089208, ppl: 8.078517 +epoch: 2, batch: 7843, sum loss: 3937.369873, avg loss: 2.425983, ppl: 11.313342 +epoch: 2, batch: 7844, sum loss: 4845.468262, avg loss: 2.626270, ppl: 13.822118 +epoch: 2, batch: 7845, sum loss: 4498.753418, avg loss: 2.442320, ppl: 11.499690 +epoch: 2, batch: 7846, sum loss: 5168.895020, avg loss: 2.679572, ppl: 14.578857 +epoch: 2, batch: 7847, sum loss: 4885.656738, avg loss: 2.611254, ppl: 13.616121 +epoch: 2, batch: 7848, sum loss: 3960.896240, avg loss: 2.486438, ppl: 12.018394 +epoch: 2, batch: 7849, sum loss: 4182.823730, avg loss: 2.824324, ppl: 16.849548 +epoch: 2, batch: 7850, sum loss: 3964.591064, avg loss: 2.655453, ppl: 14.231427 +epoch: 2, batch: 7851, sum loss: 5023.445312, avg loss: 2.715376, ppl: 15.110289 +epoch: 2, batch: 7852, sum loss: 4398.466309, avg loss: 2.549836, ppl: 12.804999 +epoch: 2, batch: 7853, sum loss: 4678.199219, avg loss: 2.569028, ppl: 13.053126 +epoch: 2, batch: 7854, sum loss: 4214.189453, avg loss: 2.435948, ppl: 11.426642 +epoch: 2, batch: 7855, sum loss: 4141.226562, avg loss: 2.367768, ppl: 10.673546 +epoch: 2, batch: 7856, sum loss: 5377.980469, avg loss: 2.552435, ppl: 12.838326 +epoch: 2, batch: 7857, sum loss: 4823.566406, avg loss: 2.656149, ppl: 14.241339 +epoch: 2, batch: 7858, sum loss: 4663.936035, avg loss: 2.541655, ppl: 12.700668 +epoch: 2, batch: 7859, sum loss: 4145.382324, avg loss: 2.485241, ppl: 12.004015 +epoch: 2, batch: 7860, sum loss: 4104.298340, avg loss: 2.482939, ppl: 11.976412 +epoch: 2, batch: 7861, sum loss: 4378.378906, avg loss: 2.469475, ppl: 11.816242 +epoch: 2, batch: 7862, sum loss: 4195.701172, avg loss: 2.494472, ppl: 12.115330 +epoch: 2, batch: 7863, sum loss: 3849.966797, avg loss: 2.389800, ppl: 10.911307 +epoch: 2, batch: 7864, sum loss: 4681.636719, avg loss: 2.875698, ppl: 17.737803 +epoch: 2, batch: 7865, sum loss: 3808.143066, avg loss: 2.369722, ppl: 10.694418 +epoch: 2, batch: 7866, sum loss: 4602.273926, avg loss: 2.458480, ppl: 11.687033 +epoch: 2, batch: 7867, sum loss: 3891.731445, avg loss: 2.371561, ppl: 10.714102 +epoch: 2, batch: 7868, sum loss: 4152.535156, avg loss: 2.553835, ppl: 12.856309 +epoch: 2, batch: 7869, sum loss: 4769.812012, avg loss: 2.574102, ppl: 13.119535 +epoch: 2, batch: 7870, sum loss: 4172.433105, avg loss: 2.353318, ppl: 10.520421 +epoch: 2, batch: 7871, sum loss: 5942.037109, avg loss: 2.996489, ppl: 20.015137 +epoch: 2, batch: 7872, sum loss: 3957.877930, avg loss: 2.406005, ppl: 11.089566 +epoch: 2, batch: 7873, sum loss: 4892.980469, avg loss: 2.644854, ppl: 14.081393 +epoch: 2, batch: 7874, sum loss: 4429.757324, avg loss: 2.488628, ppl: 12.044736 +epoch: 2, batch: 7875, sum loss: 4468.278809, avg loss: 2.588806, ppl: 13.313861 +epoch: 2, batch: 7876, sum loss: 4409.800293, avg loss: 2.677474, ppl: 14.548302 +epoch: 2, batch: 7877, sum loss: 4813.785156, avg loss: 2.668395, ppl: 14.416816 +epoch: 2, batch: 7878, sum loss: 3849.214844, avg loss: 2.314621, ppl: 10.121086 +epoch: 2, batch: 7879, sum loss: 4919.735352, avg loss: 2.630875, ppl: 13.885910 +epoch: 2, batch: 7880, sum loss: 4887.428711, avg loss: 2.700237, ppl: 14.883259 +epoch: 2, batch: 7881, sum loss: 5502.451172, avg loss: 2.635274, ppl: 13.947129 +epoch: 2, batch: 7882, sum loss: 4038.817871, avg loss: 2.372983, ppl: 10.729355 +epoch: 2, batch: 7883, sum loss: 4703.810547, avg loss: 2.808245, ppl: 16.580797 +epoch: 2, batch: 7884, sum loss: 5296.500488, avg loss: 2.811306, ppl: 16.631628 +epoch: 2, batch: 7885, sum loss: 4326.287109, avg loss: 2.518211, ppl: 12.406386 +epoch: 2, batch: 7886, sum loss: 3361.018311, avg loss: 2.212652, ppl: 9.139923 +epoch: 2, batch: 7887, sum loss: 4943.333984, avg loss: 2.636445, ppl: 13.963472 +epoch: 2, batch: 7888, sum loss: 3945.415771, avg loss: 2.583769, ppl: 13.246976 +epoch: 2, batch: 7889, sum loss: 4245.741699, avg loss: 2.504862, ppl: 12.241873 +epoch: 2, batch: 7890, sum loss: 5008.063965, avg loss: 2.729190, ppl: 15.320477 +epoch: 2, batch: 7891, sum loss: 3591.042969, avg loss: 2.309353, ppl: 10.067904 +epoch: 2, batch: 7892, sum loss: 3183.769531, avg loss: 2.221751, ppl: 9.223472 +epoch: 2, batch: 7893, sum loss: 5172.427734, avg loss: 2.785367, ppl: 16.205772 +epoch: 2, batch: 7894, sum loss: 4608.320312, avg loss: 2.731666, ppl: 15.358455 +epoch: 2, batch: 7895, sum loss: 4120.005371, avg loss: 2.506086, ppl: 12.256861 +epoch: 2, batch: 7896, sum loss: 3963.632324, avg loss: 2.321987, ppl: 10.195917 +epoch: 2, batch: 7897, sum loss: 3940.120850, avg loss: 2.418736, ppl: 11.231653 +epoch: 2, batch: 7898, sum loss: 4756.734863, avg loss: 2.767153, ppl: 15.913258 +epoch: 2, batch: 7899, sum loss: 4116.649414, avg loss: 2.417293, ppl: 11.215453 +epoch: 2, batch: 7900, sum loss: 4068.681152, avg loss: 2.476373, ppl: 11.898034 +epoch: 2, batch: 7901, sum loss: 3868.539795, avg loss: 2.410305, ppl: 11.137358 +epoch: 2, batch: 7902, sum loss: 4518.893555, avg loss: 2.603049, ppl: 13.504855 +epoch: 2, batch: 7903, sum loss: 4770.081055, avg loss: 2.593845, ppl: 13.381124 +epoch: 2, batch: 7904, sum loss: 4261.330078, avg loss: 2.411619, ppl: 11.151999 +epoch: 2, batch: 7905, sum loss: 3818.752441, avg loss: 2.357255, ppl: 10.561916 +epoch: 2, batch: 7906, sum loss: 4272.924316, avg loss: 2.471327, ppl: 11.838146 +epoch: 2, batch: 7907, sum loss: 4560.275879, avg loss: 2.583726, ppl: 13.246398 +epoch: 2, batch: 7908, sum loss: 4253.680664, avg loss: 2.525939, ppl: 12.502626 +epoch: 2, batch: 7909, sum loss: 4281.628906, avg loss: 2.486428, ppl: 12.018268 +epoch: 2, batch: 7910, sum loss: 4378.751465, avg loss: 2.611062, ppl: 13.613504 +epoch: 2, batch: 7911, sum loss: 4963.910645, avg loss: 2.765410, ppl: 15.885550 +epoch: 2, batch: 7912, sum loss: 3478.804688, avg loss: 2.230003, ppl: 9.299895 +epoch: 2, batch: 7913, sum loss: 4028.879395, avg loss: 2.508642, ppl: 12.288234 +epoch: 2, batch: 7914, sum loss: 3705.438477, avg loss: 2.352659, ppl: 10.513491 +epoch: 2, batch: 7915, sum loss: 5042.700195, avg loss: 2.638776, ppl: 13.996057 +epoch: 2, batch: 7916, sum loss: 4656.147461, avg loss: 2.504652, ppl: 12.239293 +epoch: 2, batch: 7917, sum loss: 4055.943848, avg loss: 2.414252, ppl: 11.181407 +epoch: 2, batch: 7918, sum loss: 5236.260742, avg loss: 2.601223, ppl: 13.480207 +epoch: 2, batch: 7919, sum loss: 4106.411133, avg loss: 2.563303, ppl: 12.978612 +epoch: 2, batch: 7920, sum loss: 4903.356445, avg loss: 2.806729, ppl: 16.555681 +epoch: 2, batch: 7921, sum loss: 5890.499023, avg loss: 2.957078, ppl: 19.241665 +epoch: 2, batch: 7922, sum loss: 4257.491211, avg loss: 2.452472, ppl: 11.617028 +epoch: 2, batch: 7923, sum loss: 5415.307129, avg loss: 2.714440, ppl: 15.096148 +epoch: 2, batch: 7924, sum loss: 3996.832520, avg loss: 2.508997, ppl: 12.292597 +epoch: 2, batch: 7925, sum loss: 5192.109375, avg loss: 2.963533, ppl: 19.366270 +epoch: 2, batch: 7926, sum loss: 4206.631836, avg loss: 2.588696, ppl: 13.312407 +epoch: 2, batch: 7927, sum loss: 5108.593750, avg loss: 2.511600, ppl: 12.324628 +epoch: 2, batch: 7928, sum loss: 4098.890137, avg loss: 2.422512, ppl: 11.274142 +epoch: 2, batch: 7929, sum loss: 3961.413330, avg loss: 2.508811, ppl: 12.290314 +epoch: 2, batch: 7930, sum loss: 5110.761230, avg loss: 2.702677, ppl: 14.919611 +epoch: 2, batch: 7931, sum loss: 4245.343750, avg loss: 2.585471, ppl: 13.269543 +epoch: 2, batch: 7932, sum loss: 4695.604492, avg loss: 2.720512, ppl: 15.188103 +epoch: 2, batch: 7933, sum loss: 4307.556152, avg loss: 2.471346, ppl: 11.838372 +epoch: 2, batch: 7934, sum loss: 3892.673340, avg loss: 2.362059, ppl: 10.612782 +epoch: 2, batch: 7935, sum loss: 4444.506348, avg loss: 2.662976, ppl: 14.338895 +epoch: 2, batch: 7936, sum loss: 3537.679932, avg loss: 2.429725, ppl: 11.355761 +epoch: 2, batch: 7937, sum loss: 3955.229736, avg loss: 2.551761, ppl: 12.829679 +epoch: 2, batch: 7938, sum loss: 4891.328125, avg loss: 2.472865, ppl: 11.856371 +epoch: 2, batch: 7939, sum loss: 3790.145752, avg loss: 2.557453, ppl: 12.902917 +epoch: 2, batch: 7940, sum loss: 4630.242676, avg loss: 2.478717, ppl: 11.925949 +epoch: 2, batch: 7941, sum loss: 5026.994141, avg loss: 2.659785, ppl: 14.293220 +epoch: 2, batch: 7942, sum loss: 4518.941895, avg loss: 2.707575, ppl: 14.992867 +epoch: 2, batch: 7943, sum loss: 4284.077148, avg loss: 2.477777, ppl: 11.914754 +epoch: 2, batch: 7944, sum loss: 3787.751465, avg loss: 2.221555, ppl: 9.221662 +epoch: 2, batch: 7945, sum loss: 4028.776367, avg loss: 2.357388, ppl: 10.563327 +epoch: 2, batch: 7946, sum loss: 4081.745361, avg loss: 2.573610, ppl: 13.113075 +epoch: 2, batch: 7947, sum loss: 4926.102051, avg loss: 2.683062, ppl: 14.629823 +epoch: 2, batch: 7948, sum loss: 3961.790771, avg loss: 2.473028, ppl: 11.858299 +epoch: 2, batch: 7949, sum loss: 4601.601562, avg loss: 2.529742, ppl: 12.550274 +epoch: 2, batch: 7950, sum loss: 4492.262695, avg loss: 2.444104, ppl: 11.520220 +epoch: 2, batch: 7951, sum loss: 3779.622070, avg loss: 2.396717, ppl: 10.987042 +epoch: 2, batch: 7952, sum loss: 5471.030273, avg loss: 2.916327, ppl: 18.473318 +epoch: 2, batch: 7953, sum loss: 4942.514648, avg loss: 2.850354, ppl: 17.293911 +epoch: 2, batch: 7954, sum loss: 4375.696289, avg loss: 2.494696, ppl: 12.118045 +epoch: 2, batch: 7955, sum loss: 4584.965820, avg loss: 2.656411, ppl: 14.245074 +epoch: 2, batch: 7956, sum loss: 4513.775879, avg loss: 2.561734, ppl: 12.958273 +epoch: 2, batch: 7957, sum loss: 4268.140625, avg loss: 2.487262, ppl: 12.028292 +epoch: 2, batch: 7958, sum loss: 4437.665527, avg loss: 2.379445, ppl: 10.798911 +epoch: 2, batch: 7959, sum loss: 4690.119629, avg loss: 2.678538, ppl: 14.563784 +epoch: 2, batch: 7960, sum loss: 4822.348633, avg loss: 2.763524, ppl: 15.855613 +epoch: 2, batch: 7961, sum loss: 4087.191406, avg loss: 2.585194, ppl: 13.265858 +epoch: 2, batch: 7962, sum loss: 4507.392578, avg loss: 2.539376, ppl: 12.671762 +epoch: 2, batch: 7963, sum loss: 4400.284180, avg loss: 2.561283, ppl: 12.952426 +epoch: 2, batch: 7964, sum loss: 3713.900146, avg loss: 2.416331, ppl: 11.204669 +epoch: 2, batch: 7965, sum loss: 4171.055664, avg loss: 2.562073, ppl: 12.962667 +epoch: 2, batch: 7966, sum loss: 4044.759277, avg loss: 2.351604, ppl: 10.502404 +epoch: 2, batch: 7967, sum loss: 5126.416504, avg loss: 2.840120, ppl: 17.117817 +epoch: 2, batch: 7968, sum loss: 4741.499512, avg loss: 2.720309, ppl: 15.185021 +epoch: 2, batch: 7969, sum loss: 4213.881348, avg loss: 2.685711, ppl: 14.668633 +epoch: 2, batch: 7970, sum loss: 4546.752441, avg loss: 2.525974, ppl: 12.503062 +epoch: 2, batch: 7971, sum loss: 3873.435791, avg loss: 2.319423, ppl: 10.169802 +epoch: 2, batch: 7972, sum loss: 4833.267578, avg loss: 2.736845, ppl: 15.438193 +epoch: 2, batch: 7973, sum loss: 3636.461426, avg loss: 2.448796, ppl: 11.574398 +epoch: 2, batch: 7974, sum loss: 3429.301270, avg loss: 2.209601, ppl: 9.112083 +epoch: 2, batch: 7975, sum loss: 4853.675781, avg loss: 2.683071, ppl: 14.629958 +epoch: 2, batch: 7976, sum loss: 3123.821289, avg loss: 2.193695, ppl: 8.968288 +epoch: 2, batch: 7977, sum loss: 4340.301270, avg loss: 2.467482, ppl: 11.792719 +epoch: 2, batch: 7978, sum loss: 4619.929199, avg loss: 2.559518, ppl: 12.929579 +epoch: 2, batch: 7979, sum loss: 4267.003418, avg loss: 2.852275, ppl: 17.327158 +epoch: 2, batch: 7980, sum loss: 4087.327881, avg loss: 2.509102, ppl: 12.293889 +epoch: 2, batch: 7981, sum loss: 4646.784668, avg loss: 2.593072, ppl: 13.370783 +epoch: 2, batch: 7982, sum loss: 3803.863281, avg loss: 2.568443, ppl: 13.045491 +epoch: 2, batch: 7983, sum loss: 3757.090576, avg loss: 2.621836, ppl: 13.760964 +epoch: 2, batch: 7984, sum loss: 3591.509033, avg loss: 2.386385, ppl: 10.874112 +epoch: 2, batch: 7985, sum loss: 4157.324219, avg loss: 2.580586, ppl: 13.204877 +epoch: 2, batch: 7986, sum loss: 4396.533691, avg loss: 2.756448, ppl: 15.743814 +epoch: 2, batch: 7987, sum loss: 5121.574707, avg loss: 2.682858, ppl: 14.626830 +epoch: 2, batch: 7988, sum loss: 4397.528320, avg loss: 2.582224, ppl: 13.226526 +epoch: 2, batch: 7989, sum loss: 3214.284180, avg loss: 2.427707, ppl: 11.332869 +epoch: 2, batch: 7990, sum loss: 3722.164551, avg loss: 2.355800, ppl: 10.546567 +epoch: 2, batch: 7991, sum loss: 4714.814941, avg loss: 2.563793, ppl: 12.984975 +epoch: 2, batch: 7992, sum loss: 3439.745117, avg loss: 2.237961, ppl: 9.374194 +epoch: 2, batch: 7993, sum loss: 4719.276367, avg loss: 2.640893, ppl: 14.025729 +epoch: 2, batch: 7994, sum loss: 4385.786621, avg loss: 2.646823, ppl: 14.109148 +epoch: 2, batch: 7995, sum loss: 4771.008301, avg loss: 2.488789, ppl: 12.046679 +epoch: 2, batch: 7996, sum loss: 4119.833984, avg loss: 2.657958, ppl: 14.267119 +epoch: 2, batch: 7997, sum loss: 3538.771973, avg loss: 2.469485, ppl: 11.816360 +epoch: 2, batch: 7998, sum loss: 4645.946777, avg loss: 2.659386, ppl: 14.287509 +epoch: 2, batch: 7999, sum loss: 4679.957520, avg loss: 2.602868, ppl: 13.502412 +epoch: 2, batch: 8000, sum loss: 4571.667969, avg loss: 2.397309, ppl: 10.993551 +epoch: 2, batch: 8001, sum loss: 4429.709961, avg loss: 2.616486, ppl: 13.687535 +epoch: 2, batch: 8002, sum loss: 4537.306152, avg loss: 2.572169, ppl: 13.094195 +epoch: 2, batch: 8003, sum loss: 4517.041992, avg loss: 2.454914, ppl: 11.645433 +epoch: 2, batch: 8004, sum loss: 4090.794434, avg loss: 2.555150, ppl: 12.873225 +epoch: 2, batch: 8005, sum loss: 4103.938477, avg loss: 2.368112, ppl: 10.677216 +epoch: 2, batch: 8006, sum loss: 4532.541504, avg loss: 2.656824, ppl: 14.250955 +epoch: 2, batch: 8007, sum loss: 4325.475586, avg loss: 2.395058, ppl: 10.968839 +epoch: 2, batch: 8008, sum loss: 4617.856934, avg loss: 2.697346, ppl: 14.840297 +epoch: 2, batch: 8009, sum loss: 4825.052246, avg loss: 2.587159, ppl: 13.291961 +epoch: 2, batch: 8010, sum loss: 2751.929932, avg loss: 1.932535, ppl: 6.906998 +epoch: 2, batch: 8011, sum loss: 4108.470703, avg loss: 2.582320, ppl: 13.227788 +epoch: 2, batch: 8012, sum loss: 4399.894043, avg loss: 2.426858, ppl: 11.323254 +epoch: 2, batch: 8013, sum loss: 5427.773926, avg loss: 2.620847, ppl: 13.747362 +epoch: 2, batch: 8014, sum loss: 3561.322754, avg loss: 2.151857, ppl: 8.600812 +epoch: 2, batch: 8015, sum loss: 4475.420410, avg loss: 2.598967, ppl: 13.449832 +epoch: 2, batch: 8016, sum loss: 3810.710938, avg loss: 2.631706, ppl: 13.897466 +epoch: 2, batch: 8017, sum loss: 4670.632812, avg loss: 2.531508, ppl: 12.572453 +epoch: 2, batch: 8018, sum loss: 4149.193359, avg loss: 2.551779, ppl: 12.829911 +epoch: 2, batch: 8019, sum loss: 3986.001953, avg loss: 2.320141, ppl: 10.177108 +epoch: 2, batch: 8020, sum loss: 3927.359863, avg loss: 2.402055, ppl: 11.045850 +epoch: 2, batch: 8021, sum loss: 4020.230957, avg loss: 2.439461, ppl: 11.466853 +epoch: 2, batch: 8022, sum loss: 4207.513184, avg loss: 2.592429, ppl: 13.362194 +epoch: 2, batch: 8023, sum loss: 4855.822266, avg loss: 2.709722, ppl: 15.025103 +epoch: 2, batch: 8024, sum loss: 5145.462402, avg loss: 2.801014, ppl: 16.461329 +epoch: 2, batch: 8025, sum loss: 5378.861328, avg loss: 2.726235, ppl: 15.275266 +epoch: 2, batch: 8026, sum loss: 4707.723145, avg loss: 2.641820, ppl: 14.038731 +epoch: 2, batch: 8027, sum loss: 3892.875977, avg loss: 2.314433, ppl: 10.119183 +epoch: 2, batch: 8028, sum loss: 4959.658691, avg loss: 2.890244, ppl: 17.997700 +epoch: 2, batch: 8029, sum loss: 4640.904297, avg loss: 2.687264, ppl: 14.691422 +epoch: 2, batch: 8030, sum loss: 3888.668457, avg loss: 2.398932, ppl: 11.011407 +epoch: 2, batch: 8031, sum loss: 4855.567871, avg loss: 2.718683, ppl: 15.160343 +epoch: 2, batch: 8032, sum loss: 5559.895508, avg loss: 2.833790, ppl: 17.009798 +epoch: 2, batch: 8033, sum loss: 3562.500244, avg loss: 2.436731, ppl: 11.435595 +epoch: 2, batch: 8034, sum loss: 4224.688477, avg loss: 2.650369, ppl: 14.159265 +epoch: 2, batch: 8035, sum loss: 3839.950928, avg loss: 2.327243, ppl: 10.249645 +epoch: 2, batch: 8036, sum loss: 4832.609375, avg loss: 2.957533, ppl: 19.250429 +epoch: 2, batch: 8037, sum loss: 4238.471680, avg loss: 2.427533, ppl: 11.330891 +epoch: 2, batch: 8038, sum loss: 4157.425781, avg loss: 2.704896, ppl: 14.952768 +epoch: 2, batch: 8039, sum loss: 3269.834961, avg loss: 2.206367, ppl: 9.082655 +epoch: 2, batch: 8040, sum loss: 3673.333008, avg loss: 2.327841, ppl: 10.255775 +epoch: 2, batch: 8041, sum loss: 4361.769043, avg loss: 2.622832, ppl: 13.774673 +epoch: 2, batch: 8042, sum loss: 3801.192871, avg loss: 2.665633, ppl: 14.377050 +epoch: 2, batch: 8043, sum loss: 3877.562988, avg loss: 2.522813, ppl: 12.463603 +epoch: 2, batch: 8044, sum loss: 5253.357422, avg loss: 2.712110, ppl: 15.061022 +epoch: 2, batch: 8045, sum loss: 3573.833252, avg loss: 2.308678, ppl: 10.061114 +epoch: 2, batch: 8046, sum loss: 3564.175537, avg loss: 2.475122, ppl: 11.883157 +epoch: 2, batch: 8047, sum loss: 4048.396973, avg loss: 2.431470, ppl: 11.375588 +epoch: 2, batch: 8048, sum loss: 4062.867676, avg loss: 2.429945, ppl: 11.358257 +epoch: 2, batch: 8049, sum loss: 3964.130615, avg loss: 2.454570, ppl: 11.641427 +epoch: 2, batch: 8050, sum loss: 4014.840332, avg loss: 2.558853, ppl: 12.920987 +epoch: 2, batch: 8051, sum loss: 4918.784180, avg loss: 2.714561, ppl: 15.097977 +epoch: 2, batch: 8052, sum loss: 4675.697754, avg loss: 2.649121, ppl: 14.141597 +epoch: 2, batch: 8053, sum loss: 4931.885742, avg loss: 2.563350, ppl: 12.979228 +epoch: 2, batch: 8054, sum loss: 4374.350586, avg loss: 2.404811, ppl: 11.076333 +epoch: 2, batch: 8055, sum loss: 4189.951660, avg loss: 2.586390, ppl: 13.281738 +epoch: 2, batch: 8056, sum loss: 4180.773438, avg loss: 2.475295, ppl: 11.885214 +epoch: 2, batch: 8057, sum loss: 3136.273926, avg loss: 2.379570, ppl: 10.800263 +epoch: 2, batch: 8058, sum loss: 5265.049805, avg loss: 2.725181, ppl: 15.259177 +epoch: 2, batch: 8059, sum loss: 4061.098145, avg loss: 2.552544, ppl: 12.839731 +epoch: 2, batch: 8060, sum loss: 3741.175781, avg loss: 2.598039, ppl: 13.437361 +epoch: 2, batch: 8061, sum loss: 5544.637207, avg loss: 2.998722, ppl: 20.059885 +epoch: 2, batch: 8062, sum loss: 4375.981445, avg loss: 2.606302, ppl: 13.548858 +epoch: 2, batch: 8063, sum loss: 4275.977539, avg loss: 2.628136, ppl: 13.847936 +epoch: 2, batch: 8064, sum loss: 4022.375000, avg loss: 2.378696, ppl: 10.790825 +epoch: 2, batch: 8065, sum loss: 5139.260254, avg loss: 2.702030, ppl: 14.909964 +epoch: 2, batch: 8066, sum loss: 3697.824951, avg loss: 2.413724, ppl: 11.175501 +epoch: 2, batch: 8067, sum loss: 4302.529785, avg loss: 2.508764, ppl: 12.289725 +epoch: 2, batch: 8068, sum loss: 4780.669434, avg loss: 2.750673, ppl: 15.653160 +epoch: 2, batch: 8069, sum loss: 4936.084473, avg loss: 2.656665, ppl: 14.248695 +epoch: 2, batch: 8070, sum loss: 4142.603516, avg loss: 2.387668, ppl: 10.888069 +epoch: 2, batch: 8071, sum loss: 4273.869629, avg loss: 2.371737, ppl: 10.715987 +epoch: 2, batch: 8072, sum loss: 4122.557129, avg loss: 2.268881, ppl: 9.668579 +epoch: 2, batch: 8073, sum loss: 3893.838135, avg loss: 2.355619, ppl: 10.544654 +epoch: 2, batch: 8074, sum loss: 4798.027344, avg loss: 2.700072, ppl: 14.880796 +epoch: 2, batch: 8075, sum loss: 4665.213867, avg loss: 2.545125, ppl: 12.744822 +epoch: 2, batch: 8076, sum loss: 3791.112549, avg loss: 2.328693, ppl: 10.264519 +epoch: 2, batch: 8077, sum loss: 4586.210938, avg loss: 2.632727, ppl: 13.911657 +epoch: 2, batch: 8078, sum loss: 3877.210449, avg loss: 2.529165, ppl: 12.543032 +epoch: 2, batch: 8079, sum loss: 4927.689453, avg loss: 2.730022, ppl: 15.333223 +epoch: 2, batch: 8080, sum loss: 3814.543213, avg loss: 2.429645, ppl: 11.354854 +epoch: 2, batch: 8081, sum loss: 3982.283447, avg loss: 2.470399, ppl: 11.827167 +epoch: 2, batch: 8082, sum loss: 4317.011230, avg loss: 2.478192, ppl: 11.919698 +epoch: 2, batch: 8083, sum loss: 4335.835449, avg loss: 2.516445, ppl: 12.384497 +epoch: 2, batch: 8084, sum loss: 3960.944580, avg loss: 2.604172, ppl: 13.520020 +epoch: 2, batch: 8085, sum loss: 3685.230957, avg loss: 2.422900, ppl: 11.278522 +epoch: 2, batch: 8086, sum loss: 5277.229980, avg loss: 2.669312, ppl: 14.430037 +epoch: 2, batch: 8087, sum loss: 4634.541016, avg loss: 2.685134, ppl: 14.660161 +epoch: 2, batch: 8088, sum loss: 4167.305664, avg loss: 2.514970, ppl: 12.366239 +epoch: 2, batch: 8089, sum loss: 4160.019043, avg loss: 2.404635, ppl: 11.074389 +epoch: 2, batch: 8090, sum loss: 3789.879395, avg loss: 2.353962, ppl: 10.527201 +epoch: 2, batch: 8091, sum loss: 4991.184082, avg loss: 2.659128, ppl: 14.283834 +epoch: 2, batch: 8092, sum loss: 4969.130371, avg loss: 2.459966, ppl: 11.704411 +epoch: 2, batch: 8093, sum loss: 4624.892090, avg loss: 2.481165, ppl: 11.955186 +epoch: 2, batch: 8094, sum loss: 4500.412109, avg loss: 2.592403, ppl: 13.361844 +epoch: 2, batch: 8095, sum loss: 5405.460938, avg loss: 2.745282, ppl: 15.569010 +epoch: 2, batch: 8096, sum loss: 4473.926270, avg loss: 2.678998, ppl: 14.570483 +epoch: 2, batch: 8097, sum loss: 4138.704102, avg loss: 2.475302, ppl: 11.885293 +epoch: 2, batch: 8098, sum loss: 3710.691406, avg loss: 2.493744, ppl: 12.106520 +epoch: 2, batch: 8099, sum loss: 4679.484375, avg loss: 2.602605, ppl: 13.498858 +epoch: 2, batch: 8100, sum loss: 4360.131348, avg loss: 2.610857, ppl: 13.610714 +epoch: 2, batch: 8101, sum loss: 4096.322754, avg loss: 2.491680, ppl: 12.081561 +epoch: 2, batch: 8102, sum loss: 4881.499023, avg loss: 2.421379, ppl: 11.261374 +epoch: 2, batch: 8103, sum loss: 4451.124512, avg loss: 2.546410, ppl: 12.761209 +epoch: 2, batch: 8104, sum loss: 3847.018555, avg loss: 2.253672, ppl: 9.522642 +epoch: 2, batch: 8105, sum loss: 4262.065430, avg loss: 2.535434, ppl: 12.621914 +epoch: 2, batch: 8106, sum loss: 3281.592285, avg loss: 2.186271, ppl: 8.901953 +epoch: 2, batch: 8107, sum loss: 4331.153320, avg loss: 2.538777, ppl: 12.664171 +epoch: 2, batch: 8108, sum loss: 4441.279785, avg loss: 2.440264, ppl: 11.476068 +epoch: 2, batch: 8109, sum loss: 4591.019531, avg loss: 2.742545, ppl: 15.526444 +epoch: 2, batch: 8110, sum loss: 3919.187988, avg loss: 2.411808, ppl: 11.154110 +epoch: 2, batch: 8111, sum loss: 4775.455566, avg loss: 2.655982, ppl: 14.238958 +epoch: 2, batch: 8112, sum loss: 5195.165527, avg loss: 2.731423, ppl: 15.354716 +epoch: 2, batch: 8113, sum loss: 4347.190430, avg loss: 2.633065, ppl: 13.916362 +epoch: 2, batch: 8114, sum loss: 3944.066895, avg loss: 2.421158, ppl: 11.258893 +epoch: 2, batch: 8115, sum loss: 3622.829102, avg loss: 2.424919, ppl: 11.301315 +epoch: 2, batch: 8116, sum loss: 4111.408691, avg loss: 2.662830, ppl: 14.336800 +epoch: 2, batch: 8117, sum loss: 4275.273438, avg loss: 2.489967, ppl: 12.060880 +epoch: 2, batch: 8118, sum loss: 3789.165283, avg loss: 2.422740, ppl: 11.276715 +epoch: 2, batch: 8119, sum loss: 3800.109375, avg loss: 2.482110, ppl: 11.966481 +epoch: 2, batch: 8120, sum loss: 3535.599121, avg loss: 2.662349, ppl: 14.329906 +epoch: 2, batch: 8121, sum loss: 3331.906982, avg loss: 2.283692, ppl: 9.812846 +epoch: 2, batch: 8122, sum loss: 4295.389648, avg loss: 2.587584, ppl: 13.297609 +epoch: 2, batch: 8123, sum loss: 3883.296875, avg loss: 2.288331, ppl: 9.858466 +epoch: 2, batch: 8124, sum loss: 5324.729004, avg loss: 2.629496, ppl: 13.866777 +epoch: 2, batch: 8125, sum loss: 3278.860352, avg loss: 2.338702, ppl: 10.367772 +epoch: 2, batch: 8126, sum loss: 5370.614746, avg loss: 2.702876, ppl: 14.922585 +epoch: 2, batch: 8127, sum loss: 3435.466064, avg loss: 2.321261, ppl: 10.188513 +epoch: 2, batch: 8128, sum loss: 4941.115234, avg loss: 2.799499, ppl: 16.436407 +epoch: 2, batch: 8129, sum loss: 4711.866211, avg loss: 2.644145, ppl: 14.071409 +epoch: 2, batch: 8130, sum loss: 4035.699219, avg loss: 2.310074, ppl: 10.075169 +epoch: 2, batch: 8131, sum loss: 5345.187500, avg loss: 2.687374, ppl: 14.693043 +epoch: 2, batch: 8132, sum loss: 3819.208008, avg loss: 2.459245, ppl: 11.695983 +epoch: 2, batch: 8133, sum loss: 4617.590332, avg loss: 2.534353, ppl: 12.608265 +epoch: 2, batch: 8134, sum loss: 4153.830078, avg loss: 2.446308, ppl: 11.545637 +epoch: 2, batch: 8135, sum loss: 5016.187988, avg loss: 2.611238, ppl: 13.615897 +epoch: 2, batch: 8136, sum loss: 3756.829590, avg loss: 2.528149, ppl: 12.530293 +epoch: 2, batch: 8137, sum loss: 4708.603516, avg loss: 2.624640, ppl: 13.799602 +epoch: 2, batch: 8138, sum loss: 4185.427246, avg loss: 2.624092, ppl: 13.792050 +epoch: 2, batch: 8139, sum loss: 4349.747559, avg loss: 2.569254, ppl: 13.056087 +epoch: 2, batch: 8140, sum loss: 4273.060059, avg loss: 2.522468, ppl: 12.459304 +epoch: 2, batch: 8141, sum loss: 4048.815674, avg loss: 2.503906, ppl: 12.230172 +epoch: 2, batch: 8142, sum loss: 3970.230957, avg loss: 2.352033, ppl: 10.506905 +epoch: 2, batch: 8143, sum loss: 3258.895996, avg loss: 2.142601, ppl: 8.521574 +epoch: 2, batch: 8144, sum loss: 4100.003906, avg loss: 2.481843, ppl: 11.963289 +epoch: 2, batch: 8145, sum loss: 4358.982910, avg loss: 2.603933, ppl: 13.516790 +epoch: 2, batch: 8146, sum loss: 4889.168457, avg loss: 2.555760, ppl: 12.881082 +epoch: 2, batch: 8147, sum loss: 3245.698242, avg loss: 2.457001, ppl: 11.669761 +epoch: 2, batch: 8148, sum loss: 4542.141113, avg loss: 2.604439, ppl: 13.523630 +epoch: 2, batch: 8149, sum loss: 4863.996582, avg loss: 2.706732, ppl: 14.980233 +epoch: 2, batch: 8150, sum loss: 4326.764160, avg loss: 2.704227, ppl: 14.942768 +epoch: 2, batch: 8151, sum loss: 5331.421387, avg loss: 2.838882, ppl: 17.096649 +epoch: 2, batch: 8152, sum loss: 3657.312988, avg loss: 2.236889, ppl: 9.364151 +epoch: 2, batch: 8153, sum loss: 5114.410645, avg loss: 2.686140, ppl: 14.674926 +epoch: 2, batch: 8154, sum loss: 4780.210449, avg loss: 2.610710, ppl: 13.608715 +epoch: 2, batch: 8155, sum loss: 2796.166260, avg loss: 2.172623, ppl: 8.781291 +epoch: 2, batch: 8156, sum loss: 4064.867432, avg loss: 2.364670, ppl: 10.640528 +epoch: 2, batch: 8157, sum loss: 4218.005859, avg loss: 2.512213, ppl: 12.332191 +epoch: 2, batch: 8158, sum loss: 4405.235352, avg loss: 2.487428, ppl: 12.030296 +epoch: 2, batch: 8159, sum loss: 3484.338867, avg loss: 2.355875, ppl: 10.547352 +epoch: 2, batch: 8160, sum loss: 4356.816895, avg loss: 2.389916, ppl: 10.912580 +epoch: 2, batch: 8161, sum loss: 3960.346680, avg loss: 2.394406, ppl: 10.961680 +epoch: 2, batch: 8162, sum loss: 3477.937988, avg loss: 2.230878, ppl: 9.308032 +epoch: 2, batch: 8163, sum loss: 4928.713867, avg loss: 2.750398, ppl: 15.648865 +epoch: 2, batch: 8164, sum loss: 4735.533203, avg loss: 2.543251, ppl: 12.720960 +epoch: 2, batch: 8165, sum loss: 4434.466797, avg loss: 2.371373, ppl: 10.712087 +epoch: 2, batch: 8166, sum loss: 4367.240234, avg loss: 2.575024, ppl: 13.131628 +epoch: 2, batch: 8167, sum loss: 3311.895752, avg loss: 2.471564, ppl: 11.840952 +epoch: 2, batch: 8168, sum loss: 4921.242188, avg loss: 2.486732, ppl: 12.021921 +epoch: 2, batch: 8169, sum loss: 4047.566406, avg loss: 2.347776, ppl: 10.462280 +epoch: 2, batch: 8170, sum loss: 5188.217285, avg loss: 2.849103, ppl: 17.272289 +epoch: 2, batch: 8171, sum loss: 4662.069336, avg loss: 2.765166, ppl: 15.881673 +epoch: 2, batch: 8172, sum loss: 4724.869141, avg loss: 2.829263, ppl: 16.932976 +epoch: 2, batch: 8173, sum loss: 4568.297852, avg loss: 2.554976, ppl: 12.870996 +epoch: 2, batch: 8174, sum loss: 4076.888428, avg loss: 2.516598, ppl: 12.386386 +epoch: 2, batch: 8175, sum loss: 3511.578857, avg loss: 2.269928, ppl: 9.678706 +epoch: 2, batch: 8176, sum loss: 4442.061035, avg loss: 2.569150, ppl: 13.054729 +epoch: 2, batch: 8177, sum loss: 4284.798340, avg loss: 2.553515, ppl: 12.852202 +epoch: 2, batch: 8178, sum loss: 3924.695068, avg loss: 2.488710, ppl: 12.045723 +epoch: 2, batch: 8179, sum loss: 4961.380859, avg loss: 2.580021, ppl: 13.197417 +epoch: 2, batch: 8180, sum loss: 3667.476562, avg loss: 2.363065, ppl: 10.623463 +epoch: 2, batch: 8181, sum loss: 5131.333496, avg loss: 2.630105, ppl: 13.875220 +epoch: 2, batch: 8182, sum loss: 4351.898438, avg loss: 2.607489, ppl: 13.564941 +epoch: 2, batch: 8183, sum loss: 3605.303467, avg loss: 2.222752, ppl: 9.232703 +epoch: 2, batch: 8184, sum loss: 4327.330078, avg loss: 2.658065, ppl: 14.268654 +epoch: 2, batch: 8185, sum loss: 4687.428223, avg loss: 2.720504, ppl: 15.187972 +epoch: 2, batch: 8186, sum loss: 4668.217773, avg loss: 2.672134, ppl: 14.470813 +epoch: 2, batch: 8187, sum loss: 4197.977051, avg loss: 2.242509, ppl: 9.416930 +epoch: 2, batch: 8188, sum loss: 4519.702148, avg loss: 2.717800, ppl: 15.146968 +epoch: 2, batch: 8189, sum loss: 5813.025391, avg loss: 2.852319, ppl: 17.327915 +epoch: 2, batch: 8190, sum loss: 4844.816895, avg loss: 2.744939, ppl: 15.563663 +epoch: 2, batch: 8191, sum loss: 5020.854004, avg loss: 2.696485, ppl: 14.827515 +epoch: 2, batch: 8192, sum loss: 4215.654297, avg loss: 2.702343, ppl: 14.914628 +epoch: 2, batch: 8193, sum loss: 4061.836914, avg loss: 2.300021, ppl: 9.974391 +epoch: 2, batch: 8194, sum loss: 3758.268311, avg loss: 2.311358, ppl: 10.088115 +epoch: 2, batch: 8195, sum loss: 3978.394775, avg loss: 2.372328, ppl: 10.722328 +epoch: 2, batch: 8196, sum loss: 4817.816895, avg loss: 2.770452, ppl: 15.965857 +epoch: 2, batch: 8197, sum loss: 4121.510742, avg loss: 2.473896, ppl: 11.868597 +epoch: 2, batch: 8198, sum loss: 4546.347656, avg loss: 2.575834, ppl: 13.142277 +epoch: 2, batch: 8199, sum loss: 4361.385742, avg loss: 2.418961, ppl: 11.234176 +epoch: 2, batch: 8200, sum loss: 4296.070801, avg loss: 2.456301, ppl: 11.661598 +epoch: 2, batch: 8201, sum loss: 4387.627930, avg loss: 2.445724, ppl: 11.538896 +epoch: 2, batch: 8202, sum loss: 3465.636963, avg loss: 2.225843, ppl: 9.261284 +epoch: 2, batch: 8203, sum loss: 3547.880859, avg loss: 2.317362, ppl: 10.148865 +epoch: 2, batch: 8204, sum loss: 3810.193359, avg loss: 2.425330, ppl: 11.305962 +epoch: 2, batch: 8205, sum loss: 4470.148926, avg loss: 2.371432, ppl: 10.712720 +epoch: 2, batch: 8206, sum loss: 4471.725098, avg loss: 2.373527, ppl: 10.735191 +epoch: 2, batch: 8207, sum loss: 3744.268066, avg loss: 2.487886, ppl: 12.035805 +epoch: 2, batch: 8208, sum loss: 4048.776855, avg loss: 2.485437, ppl: 12.006365 +epoch: 2, batch: 8209, sum loss: 4200.854492, avg loss: 2.657087, ppl: 14.254703 +epoch: 2, batch: 8210, sum loss: 4137.320801, avg loss: 2.408219, ppl: 11.114153 +epoch: 2, batch: 8211, sum loss: 4804.835449, avg loss: 2.638570, ppl: 13.993176 +epoch: 2, batch: 8212, sum loss: 4081.064941, avg loss: 2.491493, ppl: 12.079291 +epoch: 2, batch: 8213, sum loss: 4825.541992, avg loss: 2.657237, ppl: 14.256837 +epoch: 2, batch: 8214, sum loss: 4920.910156, avg loss: 2.598157, ppl: 13.438952 +epoch: 2, batch: 8215, sum loss: 3793.743652, avg loss: 2.371090, ppl: 10.709056 +epoch: 2, batch: 8216, sum loss: 4065.719482, avg loss: 2.417193, ppl: 11.214341 +epoch: 2, batch: 8217, sum loss: 4572.558594, avg loss: 2.523487, ppl: 12.472012 +epoch: 2, batch: 8218, sum loss: 4513.680176, avg loss: 2.699570, ppl: 14.873330 +epoch: 2, batch: 8219, sum loss: 4723.755371, avg loss: 2.650817, ppl: 14.165606 +epoch: 2, batch: 8220, sum loss: 5258.209473, avg loss: 2.396631, ppl: 10.986107 +epoch: 2, batch: 8221, sum loss: 4611.659180, avg loss: 2.503615, ppl: 12.226618 +epoch: 2, batch: 8222, sum loss: 3793.203125, avg loss: 2.510392, ppl: 12.309760 +epoch: 2, batch: 8223, sum loss: 3831.675781, avg loss: 2.400799, ppl: 11.031990 +epoch: 2, batch: 8224, sum loss: 3988.091309, avg loss: 2.248078, ppl: 9.469522 +epoch: 2, batch: 8225, sum loss: 5486.183105, avg loss: 2.895083, ppl: 18.085011 +epoch: 2, batch: 8226, sum loss: 4880.594238, avg loss: 2.709936, ppl: 15.028309 +epoch: 2, batch: 8227, sum loss: 4978.485840, avg loss: 2.519477, ppl: 12.422094 +epoch: 2, batch: 8228, sum loss: 3969.143555, avg loss: 2.510527, ppl: 12.311419 +epoch: 2, batch: 8229, sum loss: 3501.648438, avg loss: 2.243209, ppl: 9.423519 +epoch: 2, batch: 8230, sum loss: 4393.443359, avg loss: 2.454437, ppl: 11.639884 +epoch: 2, batch: 8231, sum loss: 3837.690674, avg loss: 2.593034, ppl: 13.370279 +epoch: 2, batch: 8232, sum loss: 4509.197266, avg loss: 2.578157, ppl: 13.172844 +epoch: 2, batch: 8233, sum loss: 6988.508789, avg loss: 3.204268, ppl: 24.637459 +epoch: 2, batch: 8234, sum loss: 3600.042480, avg loss: 2.248621, ppl: 9.474664 +epoch: 2, batch: 8235, sum loss: 4371.673828, avg loss: 2.647895, ppl: 14.124270 +epoch: 2, batch: 8236, sum loss: 4068.767090, avg loss: 2.598191, ppl: 13.439404 +epoch: 2, batch: 8237, sum loss: 3832.359619, avg loss: 2.326873, ppl: 10.245851 +epoch: 2, batch: 8238, sum loss: 3289.972168, avg loss: 2.247249, ppl: 9.461668 +epoch: 2, batch: 8239, sum loss: 3800.969727, avg loss: 2.306414, ppl: 10.038359 +epoch: 2, batch: 8240, sum loss: 3884.511230, avg loss: 2.411242, ppl: 11.147800 +epoch: 2, batch: 8241, sum loss: 5448.158203, avg loss: 2.856926, ppl: 17.407936 +epoch: 2, batch: 8242, sum loss: 4574.735352, avg loss: 2.561442, ppl: 12.954486 +epoch: 2, batch: 8243, sum loss: 3974.835693, avg loss: 2.403166, ppl: 11.058126 +epoch: 2, batch: 8244, sum loss: 3214.314453, avg loss: 2.276427, ppl: 9.741806 +epoch: 2, batch: 8245, sum loss: 4327.481445, avg loss: 2.527735, ppl: 12.525099 +epoch: 2, batch: 8246, sum loss: 4182.382812, avg loss: 2.471858, ppl: 11.844428 +epoch: 2, batch: 8247, sum loss: 3845.039307, avg loss: 2.436654, ppl: 11.434711 +epoch: 2, batch: 8248, sum loss: 4941.245117, avg loss: 2.663744, ppl: 14.349915 +epoch: 2, batch: 8249, sum loss: 4215.900879, avg loss: 2.641542, ppl: 14.034828 +epoch: 2, batch: 8250, sum loss: 3987.709473, avg loss: 2.376466, ppl: 10.766784 +epoch: 2, batch: 8251, sum loss: 4554.093262, avg loss: 2.605317, ppl: 13.535510 +epoch: 2, batch: 8252, sum loss: 4643.662598, avg loss: 2.514165, ppl: 12.356289 +epoch: 2, batch: 8253, sum loss: 3439.879395, avg loss: 2.213565, ppl: 9.148269 +epoch: 2, batch: 8254, sum loss: 3513.131348, avg loss: 2.348350, ppl: 10.468281 +epoch: 2, batch: 8255, sum loss: 4354.653320, avg loss: 2.499801, ppl: 12.180069 +epoch: 2, batch: 8256, sum loss: 4243.041016, avg loss: 2.596720, ppl: 13.419656 +epoch: 2, batch: 8257, sum loss: 4442.485840, avg loss: 2.531331, ppl: 12.570224 +epoch: 2, batch: 8258, sum loss: 3457.456787, avg loss: 2.441707, ppl: 11.492638 +epoch: 2, batch: 8259, sum loss: 5045.204590, avg loss: 2.775140, ppl: 16.040873 +epoch: 2, batch: 8260, sum loss: 4088.863281, avg loss: 2.612692, ppl: 13.635711 +epoch: 2, batch: 8261, sum loss: 5855.997070, avg loss: 2.788570, ppl: 16.257753 +epoch: 2, batch: 8262, sum loss: 3971.121094, avg loss: 2.466535, ppl: 11.781551 +epoch: 2, batch: 8263, sum loss: 4409.130371, avg loss: 2.477040, ppl: 11.905966 +epoch: 2, batch: 8264, sum loss: 4051.241943, avg loss: 2.496144, ppl: 12.135610 +epoch: 2, batch: 8265, sum loss: 5138.918457, avg loss: 2.595413, ppl: 13.402124 +epoch: 2, batch: 8266, sum loss: 3751.977051, avg loss: 2.300415, ppl: 9.978323 +epoch: 2, batch: 8267, sum loss: 4258.576172, avg loss: 2.598277, ppl: 13.440561 +epoch: 2, batch: 8268, sum loss: 4547.286133, avg loss: 2.544648, ppl: 12.738746 +epoch: 2, batch: 8269, sum loss: 4232.451172, avg loss: 2.645282, ppl: 14.087418 +epoch: 2, batch: 8270, sum loss: 3968.542236, avg loss: 2.460349, ppl: 11.708893 +epoch: 2, batch: 8271, sum loss: 4587.099609, avg loss: 2.475499, ppl: 11.887636 +epoch: 2, batch: 8272, sum loss: 4553.415527, avg loss: 2.603439, ppl: 13.510124 +epoch: 2, batch: 8273, sum loss: 4217.470703, avg loss: 2.574768, ppl: 13.128275 +epoch: 2, batch: 8274, sum loss: 3380.622070, avg loss: 2.323452, ppl: 10.210856 +epoch: 2, batch: 8275, sum loss: 4351.718262, avg loss: 2.457210, ppl: 11.672195 +epoch: 2, batch: 8276, sum loss: 3960.439697, avg loss: 2.356002, ppl: 10.548695 +epoch: 2, batch: 8277, sum loss: 4519.090820, avg loss: 2.625852, ppl: 13.816336 +epoch: 2, batch: 8278, sum loss: 4209.015137, avg loss: 2.475891, ppl: 11.892303 +epoch: 2, batch: 8279, sum loss: 5081.125000, avg loss: 2.793362, ppl: 16.335844 +epoch: 2, batch: 8280, sum loss: 4092.209717, avg loss: 2.448959, ppl: 11.576285 +epoch: 2, batch: 8281, sum loss: 3869.303223, avg loss: 2.441201, ppl: 11.486825 +epoch: 2, batch: 8282, sum loss: 4721.567383, avg loss: 2.615827, ppl: 13.678522 +epoch: 2, batch: 8283, sum loss: 4208.381836, avg loss: 2.583414, ppl: 13.242274 +epoch: 2, batch: 8284, sum loss: 4066.490967, avg loss: 2.350573, ppl: 10.491578 +epoch: 2, batch: 8285, sum loss: 4216.916992, avg loss: 2.499655, ppl: 12.178286 +epoch: 2, batch: 8286, sum loss: 4300.534668, avg loss: 2.450447, ppl: 11.593529 +epoch: 2, batch: 8287, sum loss: 4383.455078, avg loss: 2.640636, ppl: 14.022115 +epoch: 2, batch: 8288, sum loss: 5261.967285, avg loss: 2.669694, ppl: 14.435554 +epoch: 2, batch: 8289, sum loss: 4585.318848, avg loss: 2.458616, ppl: 11.688623 +epoch: 2, batch: 8290, sum loss: 4976.766602, avg loss: 2.694514, ppl: 14.798322 +epoch: 2, batch: 8291, sum loss: 4383.305176, avg loss: 2.448774, ppl: 11.574146 +epoch: 2, batch: 8292, sum loss: 4201.152832, avg loss: 2.533868, ppl: 12.602155 +epoch: 2, batch: 8293, sum loss: 4901.071289, avg loss: 2.653531, ppl: 14.204103 +epoch: 2, batch: 8294, sum loss: 5114.684082, avg loss: 2.558621, ppl: 12.917996 +epoch: 2, batch: 8295, sum loss: 3978.160889, avg loss: 2.466312, ppl: 11.778922 +epoch: 2, batch: 8296, sum loss: 4435.020020, avg loss: 2.529960, ppl: 12.553006 +epoch: 2, batch: 8297, sum loss: 3589.257568, avg loss: 2.296390, ppl: 9.938241 +epoch: 2, batch: 8298, sum loss: 4039.352539, avg loss: 2.604354, ppl: 13.522483 +epoch: 2, batch: 8299, sum loss: 4393.312988, avg loss: 2.509031, ppl: 12.293013 +epoch: 2, batch: 8300, sum loss: 3663.032715, avg loss: 2.421040, ppl: 11.257556 +epoch: 2, batch: 8301, sum loss: 4217.450195, avg loss: 2.492583, ppl: 12.092468 +epoch: 2, batch: 8302, sum loss: 4549.879883, avg loss: 2.576376, ppl: 13.149398 +epoch: 2, batch: 8303, sum loss: 4205.921875, avg loss: 2.415808, ppl: 11.198817 +epoch: 2, batch: 8304, sum loss: 5041.636719, avg loss: 2.546281, ppl: 12.759563 +epoch: 2, batch: 8305, sum loss: 4663.431641, avg loss: 2.541380, ppl: 12.697177 +epoch: 2, batch: 8306, sum loss: 4402.250488, avg loss: 2.695806, ppl: 14.817451 +epoch: 2, batch: 8307, sum loss: 4053.828857, avg loss: 2.386009, ppl: 10.870025 +epoch: 2, batch: 8308, sum loss: 4443.770020, avg loss: 2.425639, ppl: 11.309450 +epoch: 2, batch: 8309, sum loss: 4170.348633, avg loss: 2.571115, ppl: 13.080401 +epoch: 2, batch: 8310, sum loss: 3864.221436, avg loss: 2.421191, ppl: 11.259266 +epoch: 2, batch: 8311, sum loss: 2991.043457, avg loss: 2.191241, ppl: 8.946306 +epoch: 2, batch: 8312, sum loss: 4344.740234, avg loss: 2.511410, ppl: 12.322298 +epoch: 2, batch: 8313, sum loss: 3898.889160, avg loss: 2.489712, ppl: 12.057803 +epoch: 2, batch: 8314, sum loss: 4199.178223, avg loss: 2.519003, ppl: 12.416213 +epoch: 2, batch: 8315, sum loss: 3943.833984, avg loss: 2.335011, ppl: 10.329576 +epoch: 2, batch: 8316, sum loss: 4899.296875, avg loss: 2.599097, ppl: 13.451579 +epoch: 2, batch: 8317, sum loss: 4623.662598, avg loss: 2.548877, ppl: 12.792729 +epoch: 2, batch: 8318, sum loss: 3963.292725, avg loss: 2.419592, ppl: 11.241276 +epoch: 2, batch: 8319, sum loss: 5253.924316, avg loss: 2.665613, ppl: 14.376766 +epoch: 2, batch: 8320, sum loss: 4224.903320, avg loss: 2.366893, ppl: 10.664206 +epoch: 2, batch: 8321, sum loss: 3851.965332, avg loss: 2.357384, ppl: 10.563281 +epoch: 2, batch: 8322, sum loss: 4417.383789, avg loss: 2.551926, ppl: 12.831793 +epoch: 2, batch: 8323, sum loss: 4011.214600, avg loss: 2.400488, ppl: 11.028553 +epoch: 2, batch: 8324, sum loss: 4235.175293, avg loss: 2.619156, ppl: 13.724137 +epoch: 2, batch: 8325, sum loss: 3978.329834, avg loss: 2.489568, ppl: 12.056069 +epoch: 2, batch: 8326, sum loss: 4491.791992, avg loss: 2.461256, ppl: 11.719520 +epoch: 2, batch: 8327, sum loss: 4329.064941, avg loss: 2.628455, ppl: 13.852347 +epoch: 2, batch: 8328, sum loss: 3846.534912, avg loss: 2.593753, ppl: 13.379888 +epoch: 2, batch: 8329, sum loss: 3864.401855, avg loss: 2.507724, ppl: 12.276951 +epoch: 2, batch: 8330, sum loss: 3862.577637, avg loss: 2.388731, ppl: 10.899651 +epoch: 2, batch: 8331, sum loss: 4778.741699, avg loss: 2.662252, ppl: 14.328516 +epoch: 2, batch: 8332, sum loss: 4354.069336, avg loss: 2.579425, ppl: 13.189553 +epoch: 2, batch: 8333, sum loss: 4384.445801, avg loss: 2.437157, ppl: 11.440471 +epoch: 2, batch: 8334, sum loss: 4201.121582, avg loss: 2.652223, ppl: 14.185539 +epoch: 2, batch: 8335, sum loss: 4387.886719, avg loss: 2.651291, ppl: 14.172325 +epoch: 2, batch: 8336, sum loss: 4390.513184, avg loss: 2.670629, ppl: 14.449048 +epoch: 2, batch: 8337, sum loss: 4132.020020, avg loss: 2.579288, ppl: 13.187748 +epoch: 2, batch: 8338, sum loss: 3897.229248, avg loss: 2.674831, ppl: 14.509900 +epoch: 2, batch: 8339, sum loss: 3615.816895, avg loss: 2.255656, ppl: 9.541553 +epoch: 2, batch: 8340, sum loss: 4179.892578, avg loss: 2.573825, ppl: 13.115902 +epoch: 2, batch: 8341, sum loss: 4218.087891, avg loss: 2.429774, ppl: 11.356316 +epoch: 2, batch: 8342, sum loss: 3470.702637, avg loss: 2.500506, ppl: 12.188662 +epoch: 2, batch: 8343, sum loss: 4144.958496, avg loss: 2.524335, ppl: 12.482596 +epoch: 2, batch: 8344, sum loss: 4770.095703, avg loss: 2.559064, ppl: 12.923717 +epoch: 2, batch: 8345, sum loss: 4060.910645, avg loss: 2.616566, ppl: 13.688639 +epoch: 2, batch: 8346, sum loss: 3551.043213, avg loss: 2.248919, ppl: 9.477486 +epoch: 2, batch: 8347, sum loss: 3713.881348, avg loss: 2.312504, ppl: 10.099681 +epoch: 2, batch: 8348, sum loss: 3149.397461, avg loss: 2.161563, ppl: 8.684703 +epoch: 2, batch: 8349, sum loss: 4447.104492, avg loss: 2.755331, ppl: 15.726250 +epoch: 2, batch: 8350, sum loss: 4855.325195, avg loss: 2.519629, ppl: 12.423989 +epoch: 2, batch: 8351, sum loss: 3677.078857, avg loss: 2.515102, ppl: 12.367869 +epoch: 2, batch: 8352, sum loss: 4983.535645, avg loss: 2.957588, ppl: 19.251480 +epoch: 2, batch: 8353, sum loss: 4013.982422, avg loss: 2.563207, ppl: 12.977371 +epoch: 2, batch: 8354, sum loss: 4305.259277, avg loss: 2.472866, ppl: 11.856377 +epoch: 2, batch: 8355, sum loss: 3198.064453, avg loss: 2.439408, ppl: 11.466254 +epoch: 2, batch: 8356, sum loss: 4663.661133, avg loss: 2.631863, ppl: 13.899642 +epoch: 2, batch: 8357, sum loss: 3709.178955, avg loss: 2.424300, ppl: 11.294320 +epoch: 2, batch: 8358, sum loss: 4001.998047, avg loss: 2.468845, ppl: 11.808802 +epoch: 2, batch: 8359, sum loss: 4747.166992, avg loss: 2.653531, ppl: 14.204106 +epoch: 2, batch: 8360, sum loss: 4653.354492, avg loss: 2.732445, ppl: 15.370429 +epoch: 2, batch: 8361, sum loss: 3280.214355, avg loss: 2.511650, ppl: 12.325245 +epoch: 2, batch: 8362, sum loss: 4329.853027, avg loss: 2.721466, ppl: 15.202598 +epoch: 2, batch: 8363, sum loss: 4428.875000, avg loss: 2.568953, ppl: 13.052155 +epoch: 2, batch: 8364, sum loss: 4602.264648, avg loss: 2.638913, ppl: 13.997982 +epoch: 2, batch: 8365, sum loss: 4363.028809, avg loss: 2.503172, ppl: 12.221197 +epoch: 2, batch: 8366, sum loss: 3690.168213, avg loss: 2.520607, ppl: 12.436137 +epoch: 2, batch: 8367, sum loss: 3970.420410, avg loss: 2.600144, ppl: 13.465682 +epoch: 2, batch: 8368, sum loss: 4080.836670, avg loss: 2.533108, ppl: 12.592583 +epoch: 2, batch: 8369, sum loss: 4094.236084, avg loss: 2.514887, ppl: 12.365212 +epoch: 2, batch: 8370, sum loss: 3999.439697, avg loss: 2.567034, ppl: 13.027135 +epoch: 2, batch: 8371, sum loss: 5110.473145, avg loss: 2.665870, ppl: 14.380462 +epoch: 2, batch: 8372, sum loss: 5226.194824, avg loss: 2.691141, ppl: 14.748489 +epoch: 2, batch: 8373, sum loss: 4168.310547, avg loss: 2.420622, ppl: 11.252852 +epoch: 2, batch: 8374, sum loss: 4608.672852, avg loss: 2.592055, ppl: 13.357187 +epoch: 2, batch: 8375, sum loss: 3751.639404, avg loss: 2.400281, ppl: 11.026277 +epoch: 2, batch: 8376, sum loss: 4084.821289, avg loss: 2.509104, ppl: 12.293910 +epoch: 2, batch: 8377, sum loss: 4434.113770, avg loss: 2.479929, ppl: 11.940422 +epoch: 2, batch: 8378, sum loss: 3684.934326, avg loss: 2.641530, ppl: 14.034660 +epoch: 2, batch: 8379, sum loss: 3950.020264, avg loss: 2.340060, ppl: 10.381854 +epoch: 2, batch: 8380, sum loss: 3870.562256, avg loss: 2.452828, ppl: 11.621161 +epoch: 2, batch: 8381, sum loss: 5758.770020, avg loss: 2.951702, ppl: 19.138498 +epoch: 2, batch: 8382, sum loss: 4245.204102, avg loss: 2.453875, ppl: 11.633340 +epoch: 2, batch: 8383, sum loss: 4088.874756, avg loss: 2.735033, ppl: 15.410256 +epoch: 2, batch: 8384, sum loss: 5036.934082, avg loss: 2.638520, ppl: 13.992476 +epoch: 2, batch: 8385, sum loss: 4240.680664, avg loss: 2.456941, ppl: 11.669063 +epoch: 2, batch: 8386, sum loss: 3358.091309, avg loss: 2.311143, ppl: 10.085950 +epoch: 2, batch: 8387, sum loss: 4245.697266, avg loss: 2.431671, ppl: 11.377877 +epoch: 2, batch: 8388, sum loss: 5108.081055, avg loss: 2.585061, ppl: 13.264102 +epoch: 2, batch: 8389, sum loss: 4126.357422, avg loss: 2.413074, ppl: 11.168245 +epoch: 2, batch: 8390, sum loss: 4886.847656, avg loss: 2.736197, ppl: 15.428200 +epoch: 2, batch: 8391, sum loss: 4575.166504, avg loss: 2.733075, ppl: 15.380103 +epoch: 2, batch: 8392, sum loss: 3905.374023, avg loss: 2.384233, ppl: 10.850737 +epoch: 2, batch: 8393, sum loss: 4457.296875, avg loss: 2.815727, ppl: 16.705324 +epoch: 2, batch: 8394, sum loss: 4542.152344, avg loss: 2.413471, ppl: 11.172674 +epoch: 2, batch: 8395, sum loss: 3900.547119, avg loss: 2.450092, ppl: 11.589417 +epoch: 2, batch: 8396, sum loss: 3739.058350, avg loss: 2.386126, ppl: 10.871292 +epoch: 2, batch: 8397, sum loss: 4102.995605, avg loss: 2.486664, ppl: 12.021108 +epoch: 2, batch: 8398, sum loss: 4390.488281, avg loss: 2.622753, ppl: 13.773592 +epoch: 2, batch: 8399, sum loss: 4322.162109, avg loss: 2.532022, ppl: 12.578918 +epoch: 2, batch: 8400, sum loss: 4237.901367, avg loss: 2.478305, ppl: 11.921040 +epoch: 2, batch: 8401, sum loss: 3750.771729, avg loss: 2.524072, ppl: 12.479314 +epoch: 2, batch: 8402, sum loss: 4450.782715, avg loss: 2.540401, ppl: 12.684759 +epoch: 2, batch: 8403, sum loss: 4699.111816, avg loss: 2.827384, ppl: 16.901186 +epoch: 2, batch: 8404, sum loss: 3273.792480, avg loss: 2.362044, ppl: 10.612617 +epoch: 2, batch: 8405, sum loss: 4455.638184, avg loss: 2.590487, ppl: 13.336268 +epoch: 2, batch: 8406, sum loss: 4382.274414, avg loss: 2.576293, ppl: 13.148307 +epoch: 2, batch: 8407, sum loss: 3434.715332, avg loss: 2.294399, ppl: 9.918473 +epoch: 2, batch: 8408, sum loss: 4063.674316, avg loss: 2.440645, ppl: 11.480446 +epoch: 2, batch: 8409, sum loss: 4197.120605, avg loss: 2.552993, ppl: 12.845493 +epoch: 2, batch: 8410, sum loss: 3748.721924, avg loss: 2.406112, ppl: 11.090754 +epoch: 2, batch: 8411, sum loss: 4301.148438, avg loss: 2.555644, ppl: 12.879589 +epoch: 2, batch: 8412, sum loss: 3856.970703, avg loss: 2.232043, ppl: 9.318888 +epoch: 2, batch: 8413, sum loss: 4537.501953, avg loss: 2.464694, ppl: 11.759886 +epoch: 2, batch: 8414, sum loss: 5449.958984, avg loss: 2.845932, ppl: 17.217590 +epoch: 2, batch: 8415, sum loss: 4452.226074, avg loss: 2.549958, ppl: 12.806562 +epoch: 2, batch: 8416, sum loss: 4773.458984, avg loss: 2.834596, ppl: 17.023516 +epoch: 2, batch: 8417, sum loss: 3403.160645, avg loss: 2.410170, ppl: 11.135858 +epoch: 2, batch: 8418, sum loss: 3499.314453, avg loss: 2.488844, ppl: 12.047340 +epoch: 2, batch: 8419, sum loss: 3417.811035, avg loss: 2.244131, ppl: 9.432212 +epoch: 2, batch: 8420, sum loss: 4948.589844, avg loss: 2.729504, ppl: 15.325278 +epoch: 2, batch: 8421, sum loss: 3733.216309, avg loss: 2.413198, ppl: 11.169620 +epoch: 2, batch: 8422, sum loss: 3766.354248, avg loss: 2.422093, ppl: 11.269421 +epoch: 2, batch: 8423, sum loss: 4239.431152, avg loss: 2.636462, ppl: 13.963716 +epoch: 2, batch: 8424, sum loss: 3682.065674, avg loss: 2.352758, ppl: 10.514525 +epoch: 2, batch: 8425, sum loss: 5029.862305, avg loss: 2.708596, ppl: 15.008182 +epoch: 2, batch: 8426, sum loss: 5443.016602, avg loss: 2.772805, ppl: 16.003464 +epoch: 2, batch: 8427, sum loss: 3925.905762, avg loss: 2.345225, ppl: 10.435616 +epoch: 2, batch: 8428, sum loss: 3177.014160, avg loss: 2.010768, ppl: 7.469055 +epoch: 2, batch: 8429, sum loss: 4479.695312, avg loss: 2.568633, ppl: 13.047971 +epoch: 2, batch: 8430, sum loss: 3592.672607, avg loss: 2.242617, ppl: 9.417947 +epoch: 2, batch: 8431, sum loss: 4501.262695, avg loss: 2.682517, ppl: 14.621844 +epoch: 2, batch: 8432, sum loss: 4098.253418, avg loss: 2.540765, ppl: 12.689372 +epoch: 2, batch: 8433, sum loss: 3947.156006, avg loss: 2.398029, ppl: 11.001474 +epoch: 2, batch: 8434, sum loss: 3577.310059, avg loss: 2.268427, ppl: 9.664190 +epoch: 2, batch: 8435, sum loss: 4303.257812, avg loss: 2.669515, ppl: 14.432962 +epoch: 2, batch: 8436, sum loss: 4268.289062, avg loss: 2.392539, ppl: 10.941236 +epoch: 2, batch: 8437, sum loss: 4526.482422, avg loss: 2.567489, ppl: 13.033053 +epoch: 2, batch: 8438, sum loss: 4725.182129, avg loss: 2.695483, ppl: 14.812678 +epoch: 2, batch: 8439, sum loss: 3308.893555, avg loss: 2.127906, ppl: 8.397264 +epoch: 2, batch: 8440, sum loss: 4406.167480, avg loss: 2.764220, ppl: 15.866667 +epoch: 2, batch: 8441, sum loss: 3917.872314, avg loss: 2.348844, ppl: 10.473458 +epoch: 2, batch: 8442, sum loss: 4291.187500, avg loss: 2.574198, ppl: 13.120784 +epoch: 2, batch: 8443, sum loss: 4865.378906, avg loss: 2.770717, ppl: 15.970082 +epoch: 2, batch: 8444, sum loss: 3837.753418, avg loss: 2.367522, ppl: 10.670917 +epoch: 2, batch: 8445, sum loss: 4081.244141, avg loss: 2.403560, ppl: 11.062486 +epoch: 2, batch: 8446, sum loss: 3984.188477, avg loss: 2.479271, ppl: 11.932565 +epoch: 2, batch: 8447, sum loss: 4032.642334, avg loss: 2.464940, ppl: 11.762780 +epoch: 2, batch: 8448, sum loss: 4374.159668, avg loss: 2.521130, ppl: 12.442644 +epoch: 2, batch: 8449, sum loss: 4385.865234, avg loss: 2.560342, ppl: 12.940237 +epoch: 2, batch: 8450, sum loss: 3689.645020, avg loss: 2.351590, ppl: 10.502256 +epoch: 2, batch: 8451, sum loss: 4167.662109, avg loss: 2.652872, ppl: 14.194752 +epoch: 2, batch: 8452, sum loss: 3973.878418, avg loss: 2.417201, ppl: 11.214427 +epoch: 2, batch: 8453, sum loss: 3742.319336, avg loss: 2.300135, ppl: 9.975526 +epoch: 2, batch: 8454, sum loss: 4559.742188, avg loss: 2.515026, ppl: 12.366932 +epoch: 2, batch: 8455, sum loss: 3355.203613, avg loss: 2.263970, ppl: 9.621211 +epoch: 2, batch: 8456, sum loss: 3372.347900, avg loss: 2.283242, ppl: 9.808425 +epoch: 2, batch: 8457, sum loss: 4827.130859, avg loss: 2.677277, ppl: 14.545437 +epoch: 2, batch: 8458, sum loss: 3381.670654, avg loss: 2.477414, ppl: 11.910429 +epoch: 2, batch: 8459, sum loss: 4799.136230, avg loss: 2.485312, ppl: 12.004859 +epoch: 2, batch: 8460, sum loss: 4396.036133, avg loss: 2.492084, ppl: 12.086438 +epoch: 2, batch: 8461, sum loss: 3843.461182, avg loss: 2.443395, ppl: 11.512062 +epoch: 2, batch: 8462, sum loss: 4133.078125, avg loss: 2.559181, ppl: 12.925233 +epoch: 2, batch: 8463, sum loss: 4996.323242, avg loss: 2.715393, ppl: 15.110545 +epoch: 2, batch: 8464, sum loss: 4149.316406, avg loss: 2.499588, ppl: 12.177479 +epoch: 2, batch: 8465, sum loss: 4916.240234, avg loss: 2.633230, ppl: 13.918654 +epoch: 2, batch: 8466, sum loss: 3701.735840, avg loss: 2.247563, ppl: 9.464639 +epoch: 2, batch: 8467, sum loss: 4373.827148, avg loss: 2.657246, ppl: 14.256976 +epoch: 2, batch: 8468, sum loss: 4282.754883, avg loss: 2.426490, ppl: 11.319083 +epoch: 2, batch: 8469, sum loss: 4061.276367, avg loss: 2.506961, ppl: 12.267591 +epoch: 2, batch: 8470, sum loss: 3630.246338, avg loss: 2.576470, ppl: 13.150636 +epoch: 2, batch: 8471, sum loss: 4530.367188, avg loss: 2.539444, ppl: 12.672620 +epoch: 2, batch: 8472, sum loss: 3210.731689, avg loss: 2.087602, ppl: 8.065550 +epoch: 2, batch: 8473, sum loss: 5266.687500, avg loss: 2.669380, ppl: 14.431026 +epoch: 2, batch: 8474, sum loss: 4400.841797, avg loss: 2.546783, ppl: 12.765975 +epoch: 2, batch: 8475, sum loss: 4006.692383, avg loss: 2.681856, ppl: 14.612184 +epoch: 2, batch: 8476, sum loss: 3840.653320, avg loss: 2.356229, ppl: 10.551089 +epoch: 2, batch: 8477, sum loss: 3644.510010, avg loss: 2.319866, ppl: 10.174315 +epoch: 2, batch: 8478, sum loss: 4707.419434, avg loss: 2.868629, ppl: 17.612846 +epoch: 2, batch: 8479, sum loss: 4362.241211, avg loss: 2.530303, ppl: 12.557304 +epoch: 2, batch: 8480, sum loss: 4577.584961, avg loss: 2.687953, ppl: 14.701558 +epoch: 2, batch: 8481, sum loss: 4438.856445, avg loss: 2.604963, ppl: 13.530723 +epoch: 2, batch: 8482, sum loss: 3623.783203, avg loss: 2.348531, ppl: 10.470178 +epoch: 2, batch: 8483, sum loss: 4061.240723, avg loss: 2.535106, ppl: 12.617774 +epoch: 2, batch: 8484, sum loss: 4527.324219, avg loss: 2.717481, ppl: 15.142137 +epoch: 2, batch: 8485, sum loss: 4101.907715, avg loss: 2.568508, ppl: 13.046350 +epoch: 2, batch: 8486, sum loss: 3454.465820, avg loss: 2.250466, ppl: 9.492162 +epoch: 2, batch: 8487, sum loss: 3226.157227, avg loss: 2.229549, ppl: 9.295672 +epoch: 2, batch: 8488, sum loss: 4003.750000, avg loss: 2.397455, ppl: 10.995160 +epoch: 2, batch: 8489, sum loss: 3658.623047, avg loss: 2.322935, ppl: 10.205585 +epoch: 2, batch: 8490, sum loss: 4337.139160, avg loss: 2.527470, ppl: 12.521791 +epoch: 2, batch: 8491, sum loss: 4381.427734, avg loss: 2.488034, ppl: 12.037584 +epoch: 2, batch: 8492, sum loss: 4081.471191, avg loss: 2.573437, ppl: 13.110808 +epoch: 2, batch: 8493, sum loss: 3794.190918, avg loss: 2.363982, ppl: 10.633205 +epoch: 2, batch: 8494, sum loss: 4847.436523, avg loss: 2.709579, ppl: 15.022945 +epoch: 2, batch: 8495, sum loss: 3993.309082, avg loss: 2.632372, ppl: 13.906723 +epoch: 2, batch: 8496, sum loss: 4330.608398, avg loss: 2.473220, ppl: 11.860578 +epoch: 2, batch: 8497, sum loss: 4714.623047, avg loss: 2.594729, ppl: 13.392960 +epoch: 2, batch: 8498, sum loss: 3976.673828, avg loss: 2.512112, ppl: 12.330947 +epoch: 2, batch: 8499, sum loss: 5035.820312, avg loss: 2.460098, ppl: 11.705956 +epoch: 2, batch: 8500, sum loss: 4603.274902, avg loss: 2.662392, ppl: 14.330522 +epoch: 2, batch: 8501, sum loss: 4344.365723, avg loss: 2.770641, ppl: 15.968872 +epoch: 2, batch: 8502, sum loss: 4184.450195, avg loss: 2.599037, ppl: 13.450784 +epoch: 2, batch: 8503, sum loss: 3515.748779, avg loss: 2.403109, ppl: 11.057505 +epoch: 2, batch: 8504, sum loss: 4424.750488, avg loss: 2.646382, ppl: 14.102923 +epoch: 2, batch: 8505, sum loss: 3644.545898, avg loss: 2.274997, ppl: 9.727895 +epoch: 2, batch: 8506, sum loss: 4220.721680, avg loss: 2.438314, ppl: 11.453713 +epoch: 2, batch: 8507, sum loss: 4683.593750, avg loss: 2.855850, ppl: 17.389208 +epoch: 2, batch: 8508, sum loss: 3716.928711, avg loss: 2.146033, ppl: 8.550868 +epoch: 2, batch: 8509, sum loss: 4096.455078, avg loss: 2.637769, ppl: 13.981975 +epoch: 2, batch: 8510, sum loss: 4777.472656, avg loss: 2.569915, ppl: 13.064717 +epoch: 2, batch: 8511, sum loss: 3768.279297, avg loss: 2.554766, ppl: 12.868284 +epoch: 2, batch: 8512, sum loss: 4121.570312, avg loss: 2.561573, ppl: 12.956176 +epoch: 2, batch: 8513, sum loss: 4100.450684, avg loss: 2.250522, ppl: 9.492689 +epoch: 2, batch: 8514, sum loss: 3972.529785, avg loss: 2.328564, ppl: 10.263190 +epoch: 2, batch: 8515, sum loss: 4736.931641, avg loss: 2.819602, ppl: 16.770180 +epoch: 2, batch: 8516, sum loss: 4130.184082, avg loss: 2.304790, ppl: 10.022076 +epoch: 2, batch: 8517, sum loss: 4918.454102, avg loss: 2.727928, ppl: 15.301149 +epoch: 2, batch: 8518, sum loss: 4275.991211, avg loss: 2.425406, ppl: 11.306822 +epoch: 2, batch: 8519, sum loss: 4617.368652, avg loss: 2.471825, ppl: 11.844038 +epoch: 2, batch: 8520, sum loss: 4157.720703, avg loss: 2.435689, ppl: 11.423684 +epoch: 2, batch: 8521, sum loss: 4498.846191, avg loss: 2.581094, ppl: 13.211577 +epoch: 2, batch: 8522, sum loss: 3774.590820, avg loss: 2.249458, ppl: 9.482595 +epoch: 2, batch: 8523, sum loss: 4411.723633, avg loss: 2.360473, ppl: 10.595959 +epoch: 2, batch: 8524, sum loss: 4571.478516, avg loss: 2.645532, ppl: 14.090935 +epoch: 2, batch: 8525, sum loss: 3999.365967, avg loss: 2.259529, ppl: 9.578575 +epoch: 2, batch: 8526, sum loss: 3169.162842, avg loss: 2.065947, ppl: 7.892769 +epoch: 2, batch: 8527, sum loss: 3892.986572, avg loss: 2.628620, ppl: 13.854643 +epoch: 2, batch: 8528, sum loss: 4686.730469, avg loss: 2.541611, ppl: 12.700114 +epoch: 2, batch: 8529, sum loss: 4302.140625, avg loss: 2.620061, ppl: 13.736567 +epoch: 2, batch: 8530, sum loss: 4443.112793, avg loss: 2.531688, ppl: 12.574717 +epoch: 2, batch: 8531, sum loss: 4447.313965, avg loss: 2.674272, ppl: 14.501786 +epoch: 2, batch: 8532, sum loss: 4060.369629, avg loss: 2.385646, ppl: 10.866081 +epoch: 2, batch: 8533, sum loss: 3677.492676, avg loss: 2.453297, ppl: 11.626621 +epoch: 2, batch: 8534, sum loss: 4538.778320, avg loss: 2.737502, ppl: 15.448348 +epoch: 2, batch: 8535, sum loss: 4684.034180, avg loss: 2.734404, ppl: 15.400563 +epoch: 2, batch: 8536, sum loss: 5018.420898, avg loss: 2.827279, ppl: 16.899420 +epoch: 2, batch: 8537, sum loss: 4810.412109, avg loss: 2.858237, ppl: 17.430761 +epoch: 2, batch: 8538, sum loss: 3131.957275, avg loss: 2.164449, ppl: 8.709799 +epoch: 2, batch: 8539, sum loss: 4145.917969, avg loss: 2.459026, ppl: 11.693418 +epoch: 2, batch: 8540, sum loss: 3802.446533, avg loss: 2.465919, ppl: 11.774292 +epoch: 2, batch: 8541, sum loss: 4055.825684, avg loss: 2.455100, ppl: 11.647602 +epoch: 2, batch: 8542, sum loss: 4595.011719, avg loss: 2.490521, ppl: 12.067564 +epoch: 2, batch: 8543, sum loss: 4360.149902, avg loss: 2.887516, ppl: 17.948679 +epoch: 2, batch: 8544, sum loss: 4814.593262, avg loss: 2.480470, ppl: 11.946883 +epoch: 2, batch: 8545, sum loss: 3706.180420, avg loss: 2.497426, ppl: 12.151177 +epoch: 2, batch: 8546, sum loss: 3403.317139, avg loss: 2.321499, ppl: 10.190935 +epoch: 2, batch: 8547, sum loss: 4646.779297, avg loss: 2.647738, ppl: 14.122051 +epoch: 2, batch: 8548, sum loss: 4370.362793, avg loss: 2.733185, ppl: 15.381797 +epoch: 2, batch: 8549, sum loss: 3878.948242, avg loss: 2.404804, ppl: 11.076257 +epoch: 2, batch: 8550, sum loss: 4461.666992, avg loss: 2.548068, ppl: 12.782385 +epoch: 2, batch: 8551, sum loss: 4430.488770, avg loss: 2.572874, ppl: 13.103428 +epoch: 2, batch: 8552, sum loss: 4341.954102, avg loss: 2.465618, ppl: 11.770759 +epoch: 2, batch: 8553, sum loss: 4556.326172, avg loss: 2.405663, ppl: 11.085780 +epoch: 2, batch: 8554, sum loss: 3776.440674, avg loss: 2.487774, ppl: 12.034456 +epoch: 2, batch: 8555, sum loss: 4367.038086, avg loss: 2.509792, ppl: 12.302372 +epoch: 2, batch: 8556, sum loss: 5904.067383, avg loss: 2.974341, ppl: 19.576721 +epoch: 2, batch: 8557, sum loss: 5043.687012, avg loss: 2.578572, ppl: 13.178307 +epoch: 2, batch: 8558, sum loss: 4451.965820, avg loss: 2.615726, ppl: 13.677142 +epoch: 2, batch: 8559, sum loss: 5027.878906, avg loss: 2.595704, ppl: 13.406023 +epoch: 2, batch: 8560, sum loss: 4184.357910, avg loss: 2.448425, ppl: 11.570107 +epoch: 2, batch: 8561, sum loss: 4499.574707, avg loss: 2.603920, ppl: 13.516625 +epoch: 2, batch: 8562, sum loss: 4649.333008, avg loss: 2.480967, ppl: 11.952821 +epoch: 2, batch: 8563, sum loss: 3695.248779, avg loss: 2.379426, ppl: 10.798705 +epoch: 2, batch: 8564, sum loss: 5389.778320, avg loss: 2.729002, ppl: 15.317589 +epoch: 2, batch: 8565, sum loss: 4458.418945, avg loss: 2.407354, ppl: 11.104536 +epoch: 2, batch: 8566, sum loss: 4870.589355, avg loss: 2.611576, ppl: 13.620501 +epoch: 2, batch: 8567, sum loss: 4291.831055, avg loss: 2.544061, ppl: 12.731270 +epoch: 2, batch: 8568, sum loss: 3348.343750, avg loss: 2.168617, ppl: 8.746176 +epoch: 2, batch: 8569, sum loss: 5784.297363, avg loss: 2.702943, ppl: 14.923581 +epoch: 2, batch: 8570, sum loss: 3544.366699, avg loss: 2.295574, ppl: 9.930139 +epoch: 2, batch: 8571, sum loss: 4116.964355, avg loss: 2.582788, ppl: 13.233986 +epoch: 2, batch: 8572, sum loss: 3658.921875, avg loss: 2.490757, ppl: 12.070407 +epoch: 2, batch: 8573, sum loss: 5068.501953, avg loss: 2.924698, ppl: 18.628601 +epoch: 2, batch: 8574, sum loss: 4627.776367, avg loss: 2.582465, ppl: 13.229705 +epoch: 2, batch: 8575, sum loss: 5063.662598, avg loss: 2.468875, ppl: 11.809154 +epoch: 2, batch: 8576, sum loss: 3767.777100, avg loss: 2.337331, ppl: 10.353561 +epoch: 2, batch: 8577, sum loss: 3849.508301, avg loss: 2.504560, ppl: 12.238170 +epoch: 2, batch: 8578, sum loss: 5105.687500, avg loss: 2.675937, ppl: 14.525949 +epoch: 2, batch: 8579, sum loss: 4246.611328, avg loss: 2.401930, ppl: 11.044467 +epoch: 2, batch: 8580, sum loss: 4311.513672, avg loss: 2.372875, ppl: 10.728191 +epoch: 2, batch: 8581, sum loss: 4384.187500, avg loss: 2.778319, ppl: 16.091949 +epoch: 2, batch: 8582, sum loss: 4343.750488, avg loss: 2.455484, ppl: 11.652068 +epoch: 2, batch: 8583, sum loss: 4603.613770, avg loss: 2.412795, ppl: 11.165128 +epoch: 2, batch: 8584, sum loss: 4435.750488, avg loss: 2.493395, ppl: 12.102298 +epoch: 2, batch: 8585, sum loss: 4223.768555, avg loss: 2.398506, ppl: 11.006717 +epoch: 2, batch: 8586, sum loss: 4971.236328, avg loss: 2.847214, ppl: 17.239693 +epoch: 2, batch: 8587, sum loss: 3803.957520, avg loss: 2.339457, ppl: 10.375604 +epoch: 2, batch: 8588, sum loss: 4543.334961, avg loss: 2.574127, ppl: 13.119864 +epoch: 2, batch: 8589, sum loss: 3549.747559, avg loss: 2.235358, ppl: 9.349824 +epoch: 2, batch: 8590, sum loss: 3754.332031, avg loss: 2.408167, ppl: 11.113570 +epoch: 2, batch: 8591, sum loss: 3938.704834, avg loss: 2.470957, ppl: 11.833761 +epoch: 2, batch: 8592, sum loss: 5283.476562, avg loss: 2.794012, ppl: 16.346468 +epoch: 2, batch: 8593, sum loss: 3361.659912, avg loss: 2.174424, ppl: 8.797118 +epoch: 2, batch: 8594, sum loss: 4455.321289, avg loss: 2.755301, ppl: 15.725770 +epoch: 2, batch: 8595, sum loss: 4766.553711, avg loss: 2.692968, ppl: 14.775470 +epoch: 2, batch: 8596, sum loss: 4158.396973, avg loss: 2.444678, ppl: 11.526835 +epoch: 2, batch: 8597, sum loss: 3980.867432, avg loss: 2.467990, ppl: 11.798704 +epoch: 2, batch: 8598, sum loss: 4449.344727, avg loss: 2.635868, ppl: 13.955419 +epoch: 2, batch: 8599, sum loss: 4431.198242, avg loss: 2.517726, ppl: 12.400369 +epoch: 2, batch: 8600, sum loss: 4217.106445, avg loss: 2.630759, ppl: 13.884301 +epoch: 2, batch: 8601, sum loss: 4490.591309, avg loss: 2.482361, ppl: 11.969495 +epoch: 2, batch: 8602, sum loss: 4674.562988, avg loss: 2.599868, ppl: 13.461962 +epoch: 2, batch: 8603, sum loss: 3519.888672, avg loss: 2.329509, ppl: 10.272899 +epoch: 2, batch: 8604, sum loss: 4544.178711, avg loss: 2.584857, ppl: 13.261393 +epoch: 2, batch: 8605, sum loss: 4235.831055, avg loss: 2.377010, ppl: 10.772640 +epoch: 2, batch: 8606, sum loss: 3657.477783, avg loss: 2.435072, ppl: 11.416637 +epoch: 2, batch: 8607, sum loss: 4268.483398, avg loss: 2.502042, ppl: 12.207394 +epoch: 2, batch: 8608, sum loss: 5031.252441, avg loss: 2.758362, ppl: 15.773985 +epoch: 2, batch: 8609, sum loss: 4099.812988, avg loss: 2.472746, ppl: 11.854958 +epoch: 2, batch: 8610, sum loss: 5181.843262, avg loss: 2.844041, ppl: 17.185076 +epoch: 2, batch: 8611, sum loss: 4518.259766, avg loss: 2.493521, ppl: 12.103815 +epoch: 2, batch: 8612, sum loss: 3823.769531, avg loss: 2.282848, ppl: 9.804561 +epoch: 2, batch: 8613, sum loss: 3919.718506, avg loss: 2.276259, ppl: 9.740176 +epoch: 2, batch: 8614, sum loss: 3977.479004, avg loss: 2.343830, ppl: 10.421072 +epoch: 2, batch: 8615, sum loss: 4030.679932, avg loss: 2.535019, ppl: 12.616667 +epoch: 2, batch: 8616, sum loss: 4151.457031, avg loss: 2.381788, ppl: 10.824242 +epoch: 2, batch: 8617, sum loss: 3800.998535, avg loss: 2.419477, ppl: 11.239982 +epoch: 2, batch: 8618, sum loss: 4421.730469, avg loss: 2.641416, ppl: 14.033065 +epoch: 2, batch: 8619, sum loss: 4423.743164, avg loss: 2.630049, ppl: 13.874456 +epoch: 2, batch: 8620, sum loss: 3644.335938, avg loss: 2.330138, ppl: 10.279360 +epoch: 2, batch: 8621, sum loss: 4766.853516, avg loss: 2.623475, ppl: 13.783536 +epoch: 2, batch: 8622, sum loss: 4168.887207, avg loss: 2.582954, ppl: 13.236176 +epoch: 2, batch: 8623, sum loss: 3880.477539, avg loss: 2.379201, ppl: 10.796278 +epoch: 2, batch: 8624, sum loss: 4391.527832, avg loss: 2.515194, ppl: 12.369004 +epoch: 2, batch: 8625, sum loss: 4589.271484, avg loss: 2.549595, ppl: 12.801923 +epoch: 2, batch: 8626, sum loss: 4566.945801, avg loss: 2.480688, ppl: 11.949478 +epoch: 2, batch: 8627, sum loss: 4032.460938, avg loss: 2.458818, ppl: 11.690981 +epoch: 2, batch: 8628, sum loss: 3346.260742, avg loss: 2.281023, ppl: 9.786685 +epoch: 2, batch: 8629, sum loss: 4105.809082, avg loss: 2.392663, ppl: 10.942593 +epoch: 2, batch: 8630, sum loss: 4268.638184, avg loss: 2.421236, ppl: 11.259763 +epoch: 2, batch: 8631, sum loss: 4151.416992, avg loss: 2.599510, ppl: 13.457139 +epoch: 2, batch: 8632, sum loss: 4508.846191, avg loss: 2.523137, ppl: 12.467650 +epoch: 2, batch: 8633, sum loss: 5042.890625, avg loss: 2.778452, ppl: 16.094091 +epoch: 2, batch: 8634, sum loss: 4389.043457, avg loss: 2.475490, ppl: 11.887528 +epoch: 2, batch: 8635, sum loss: 4149.033691, avg loss: 2.432024, ppl: 11.381901 +epoch: 2, batch: 8636, sum loss: 3684.077881, avg loss: 2.254638, ppl: 9.531839 +epoch: 2, batch: 8637, sum loss: 3519.848389, avg loss: 2.226343, ppl: 9.265918 +epoch: 2, batch: 8638, sum loss: 4715.086914, avg loss: 2.848995, ppl: 17.270420 +epoch: 2, batch: 8639, sum loss: 3412.613525, avg loss: 2.361670, ppl: 10.608656 +epoch: 2, batch: 8640, sum loss: 5548.748047, avg loss: 2.914258, ppl: 18.435137 +epoch: 2, batch: 8641, sum loss: 3744.201660, avg loss: 2.365257, ppl: 10.646772 +epoch: 2, batch: 8642, sum loss: 4064.451660, avg loss: 2.410707, ppl: 11.141833 +epoch: 2, batch: 8643, sum loss: 5108.045898, avg loss: 2.598192, ppl: 13.439421 +epoch: 2, batch: 8644, sum loss: 4101.514160, avg loss: 2.358548, ppl: 10.575582 +epoch: 2, batch: 8645, sum loss: 4243.708008, avg loss: 2.753866, ppl: 15.703230 +epoch: 2, batch: 8646, sum loss: 4188.895020, avg loss: 2.396393, ppl: 10.983488 +epoch: 2, batch: 8647, sum loss: 4224.339844, avg loss: 2.529545, ppl: 12.547793 +epoch: 2, batch: 8648, sum loss: 5000.818359, avg loss: 2.643139, ppl: 14.057255 +epoch: 2, batch: 8649, sum loss: 3751.492676, avg loss: 2.489378, ppl: 12.053776 +epoch: 2, batch: 8650, sum loss: 4434.219727, avg loss: 2.833367, ppl: 17.002615 +epoch: 2, batch: 8651, sum loss: 3586.257080, avg loss: 2.244216, ppl: 9.433017 +epoch: 2, batch: 8652, sum loss: 4372.816895, avg loss: 2.338405, ppl: 10.364691 +epoch: 2, batch: 8653, sum loss: 5592.046875, avg loss: 2.918605, ppl: 18.515438 +epoch: 2, batch: 8654, sum loss: 4105.592285, avg loss: 2.471759, ppl: 11.843265 +epoch: 2, batch: 8655, sum loss: 4747.272461, avg loss: 2.711178, ppl: 15.046991 +epoch: 2, batch: 8656, sum loss: 4018.486328, avg loss: 2.425158, ppl: 11.304010 +epoch: 2, batch: 8657, sum loss: 4196.449219, avg loss: 2.472863, ppl: 11.856348 +epoch: 2, batch: 8658, sum loss: 5059.468262, avg loss: 2.603947, ppl: 13.516980 +epoch: 2, batch: 8659, sum loss: 5121.025879, avg loss: 2.667201, ppl: 14.399609 +epoch: 2, batch: 8660, sum loss: 4458.454102, avg loss: 2.605759, ppl: 13.541505 +epoch: 2, batch: 8661, sum loss: 4347.555176, avg loss: 2.821256, ppl: 16.797934 +epoch: 2, batch: 8662, sum loss: 4569.491699, avg loss: 2.464666, ppl: 11.759558 +epoch: 2, batch: 8663, sum loss: 4898.919922, avg loss: 2.724649, ppl: 15.251066 +epoch: 2, batch: 8664, sum loss: 3650.309814, avg loss: 2.622349, ppl: 13.768027 +epoch: 2, batch: 8665, sum loss: 5065.447754, avg loss: 2.840969, ppl: 17.132360 +epoch: 2, batch: 8666, sum loss: 3925.584961, avg loss: 2.338049, ppl: 10.361007 +epoch: 2, batch: 8667, sum loss: 4885.179199, avg loss: 2.693043, ppl: 14.776569 +epoch: 2, batch: 8668, sum loss: 3655.028809, avg loss: 2.323604, ppl: 10.212412 +epoch: 2, batch: 8669, sum loss: 3434.830322, avg loss: 2.367216, ppl: 10.667651 +epoch: 2, batch: 8670, sum loss: 3806.255371, avg loss: 2.529073, ppl: 12.541881 +epoch: 2, batch: 8671, sum loss: 4964.858398, avg loss: 2.786116, ppl: 16.217905 +epoch: 2, batch: 8672, sum loss: 4283.931641, avg loss: 2.531874, ppl: 12.577059 +epoch: 2, batch: 8673, sum loss: 3583.292969, avg loss: 2.288182, ppl: 9.857001 +epoch: 2, batch: 8674, sum loss: 5113.475586, avg loss: 2.981618, ppl: 19.719706 +epoch: 2, batch: 8675, sum loss: 3415.009033, avg loss: 2.229118, ppl: 9.291668 +epoch: 2, batch: 8676, sum loss: 3880.410156, avg loss: 2.463752, ppl: 11.748814 +epoch: 2, batch: 8677, sum loss: 3705.845459, avg loss: 2.296063, ppl: 9.934991 +epoch: 2, batch: 8678, sum loss: 4657.839844, avg loss: 2.655553, ppl: 14.232853 +epoch: 2, batch: 8679, sum loss: 3922.322754, avg loss: 2.592414, ppl: 13.361994 +epoch: 2, batch: 8680, sum loss: 4524.650391, avg loss: 2.603366, ppl: 13.509135 +epoch: 2, batch: 8681, sum loss: 4599.301270, avg loss: 2.565143, ppl: 13.002519 +epoch: 2, batch: 8682, sum loss: 4631.903809, avg loss: 2.444276, ppl: 11.522208 +epoch: 2, batch: 8683, sum loss: 4907.271484, avg loss: 2.665547, ppl: 14.375806 +epoch: 2, batch: 8684, sum loss: 4476.659180, avg loss: 2.517806, ppl: 12.401359 +epoch: 2, batch: 8685, sum loss: 4684.573730, avg loss: 2.722007, ppl: 15.210816 +epoch: 2, batch: 8686, sum loss: 4008.729004, avg loss: 2.374840, ppl: 10.749289 +epoch: 2, batch: 8687, sum loss: 3472.773926, avg loss: 2.534872, ppl: 12.614811 +epoch: 2, batch: 8688, sum loss: 3983.842773, avg loss: 2.438092, ppl: 11.451174 +epoch: 2, batch: 8689, sum loss: 4196.248047, avg loss: 2.385587, ppl: 10.865444 +epoch: 2, batch: 8690, sum loss: 3846.790283, avg loss: 2.281608, ppl: 9.792410 +epoch: 2, batch: 8691, sum loss: 3762.103027, avg loss: 2.223465, ppl: 9.239291 +epoch: 2, batch: 8692, sum loss: 3865.076904, avg loss: 2.257638, ppl: 9.560483 +epoch: 2, batch: 8693, sum loss: 4598.461426, avg loss: 2.473621, ppl: 11.865333 +epoch: 2, batch: 8694, sum loss: 3087.146240, avg loss: 2.142364, ppl: 8.519554 +epoch: 2, batch: 8695, sum loss: 3637.927734, avg loss: 2.454742, ppl: 11.643432 +epoch: 2, batch: 8696, sum loss: 5511.431152, avg loss: 2.789186, ppl: 16.267769 +epoch: 2, batch: 8697, sum loss: 4687.776367, avg loss: 2.475067, ppl: 11.882502 +epoch: 2, batch: 8698, sum loss: 4227.613770, avg loss: 2.501547, ppl: 12.201350 +epoch: 2, batch: 8699, sum loss: 4627.895020, avg loss: 2.665838, ppl: 14.379998 +epoch: 2, batch: 8700, sum loss: 4011.118652, avg loss: 2.516386, ppl: 12.383756 +epoch: 2, batch: 8701, sum loss: 3552.645752, avg loss: 2.720249, ppl: 15.184109 +epoch: 2, batch: 8702, sum loss: 4154.473145, avg loss: 2.706497, ppl: 14.976723 +epoch: 2, batch: 8703, sum loss: 3813.419922, avg loss: 2.566231, ppl: 13.016679 +epoch: 2, batch: 8704, sum loss: 4434.082031, avg loss: 2.459280, ppl: 11.696387 +epoch: 2, batch: 8705, sum loss: 3374.837402, avg loss: 2.277218, ppl: 9.749523 +epoch: 2, batch: 8706, sum loss: 4182.201172, avg loss: 2.631971, ppl: 13.901137 +epoch: 2, batch: 8707, sum loss: 3832.963623, avg loss: 2.471285, ppl: 11.837652 +epoch: 2, batch: 8708, sum loss: 3412.854736, avg loss: 2.487504, ppl: 12.031203 +epoch: 2, batch: 8709, sum loss: 4244.317383, avg loss: 2.710292, ppl: 15.033667 +epoch: 2, batch: 8710, sum loss: 5223.514160, avg loss: 2.756472, ppl: 15.744197 +epoch: 2, batch: 8711, sum loss: 4387.062988, avg loss: 2.364994, ppl: 10.643970 +epoch: 2, batch: 8712, sum loss: 4047.702393, avg loss: 2.459115, ppl: 11.694452 +epoch: 2, batch: 8713, sum loss: 4725.238281, avg loss: 2.602003, ppl: 13.490738 +epoch: 2, batch: 8714, sum loss: 4012.715820, avg loss: 2.346617, ppl: 10.450162 +epoch: 2, batch: 8715, sum loss: 3735.071533, avg loss: 2.380543, ppl: 10.810769 +epoch: 2, batch: 8716, sum loss: 4276.272949, avg loss: 2.755331, ppl: 15.726238 +epoch: 2, batch: 8717, sum loss: 4491.592285, avg loss: 2.859066, ppl: 17.445221 +epoch: 2, batch: 8718, sum loss: 4094.299805, avg loss: 2.557339, ppl: 12.901443 +epoch: 2, batch: 8719, sum loss: 4480.061523, avg loss: 2.774032, ppl: 16.023108 +epoch: 2, batch: 8720, sum loss: 4682.901855, avg loss: 2.648700, ppl: 14.135653 +epoch: 2, batch: 8721, sum loss: 4377.715820, avg loss: 2.502982, ppl: 12.218878 +epoch: 2, batch: 8722, sum loss: 4340.249512, avg loss: 2.643270, ppl: 14.059105 +epoch: 2, batch: 8723, sum loss: 5154.244629, avg loss: 2.768123, ppl: 15.928706 +epoch: 2, batch: 8724, sum loss: 5399.556152, avg loss: 2.644249, ppl: 14.072868 +epoch: 2, batch: 8725, sum loss: 4453.993164, avg loss: 2.663872, ppl: 14.351748 +epoch: 2, batch: 8726, sum loss: 4531.319824, avg loss: 2.504876, ppl: 12.242037 +epoch: 2, batch: 8727, sum loss: 4283.143555, avg loss: 2.766889, ppl: 15.909058 +epoch: 2, batch: 8728, sum loss: 3778.147461, avg loss: 2.453342, ppl: 11.627145 +epoch: 2, batch: 8729, sum loss: 3911.505859, avg loss: 2.420486, ppl: 11.251328 +epoch: 2, batch: 8730, sum loss: 4034.923340, avg loss: 2.365137, ppl: 10.645494 +epoch: 2, batch: 8731, sum loss: 3868.033203, avg loss: 2.455894, ppl: 11.656850 +epoch: 2, batch: 8732, sum loss: 3875.457031, avg loss: 2.425192, ppl: 11.304401 +epoch: 2, batch: 8733, sum loss: 3460.513184, avg loss: 2.257347, ppl: 9.557700 +epoch: 2, batch: 8734, sum loss: 3957.048828, avg loss: 2.515606, ppl: 12.374110 +epoch: 2, batch: 8735, sum loss: 3475.077637, avg loss: 2.440364, ppl: 11.477214 +epoch: 2, batch: 8736, sum loss: 4258.155273, avg loss: 2.594854, ppl: 13.394630 +epoch: 2, batch: 8737, sum loss: 3795.553223, avg loss: 2.436170, ppl: 11.429187 +epoch: 2, batch: 8738, sum loss: 2946.813477, avg loss: 2.026694, ppl: 7.588958 +epoch: 2, batch: 8739, sum loss: 4873.789062, avg loss: 2.561108, ppl: 12.950163 +epoch: 2, batch: 8740, sum loss: 4827.848145, avg loss: 2.495012, ppl: 12.121877 +epoch: 2, batch: 8741, sum loss: 4246.207520, avg loss: 2.454455, ppl: 11.640090 +epoch: 2, batch: 8742, sum loss: 3336.382324, avg loss: 2.095717, ppl: 8.131272 +epoch: 2, batch: 8743, sum loss: 4614.635254, avg loss: 2.495746, ppl: 12.130784 +epoch: 2, batch: 8744, sum loss: 3522.997070, avg loss: 2.281734, ppl: 9.793648 +epoch: 2, batch: 8745, sum loss: 4117.568359, avg loss: 2.576701, ppl: 13.153675 +epoch: 2, batch: 8746, sum loss: 4110.205078, avg loss: 2.609654, ppl: 13.594346 +epoch: 2, batch: 8747, sum loss: 4486.125488, avg loss: 2.422314, ppl: 11.271912 +epoch: 2, batch: 8748, sum loss: 4189.313477, avg loss: 2.481821, ppl: 11.963027 +epoch: 2, batch: 8749, sum loss: 3914.574463, avg loss: 2.703435, ppl: 14.930938 +epoch: 2, batch: 8750, sum loss: 3903.257324, avg loss: 2.624921, ppl: 13.803481 +epoch: 2, batch: 8751, sum loss: 4714.897461, avg loss: 2.642880, ppl: 14.053619 +epoch: 2, batch: 8752, sum loss: 4258.536621, avg loss: 2.559217, ppl: 12.925689 +epoch: 2, batch: 8753, sum loss: 3660.172119, avg loss: 2.365981, ppl: 10.654485 +epoch: 2, batch: 8754, sum loss: 4212.317383, avg loss: 2.499892, ppl: 12.181172 +epoch: 2, batch: 8755, sum loss: 3899.093018, avg loss: 2.331994, ppl: 10.298451 +epoch: 2, batch: 8756, sum loss: 3383.825928, avg loss: 2.128192, ppl: 8.399670 +epoch: 2, batch: 8757, sum loss: 3946.746826, avg loss: 2.505871, ppl: 12.254226 +epoch: 2, batch: 8758, sum loss: 4271.628906, avg loss: 2.539613, ppl: 12.674762 +epoch: 2, batch: 8759, sum loss: 3545.953125, avg loss: 2.252829, ppl: 9.514615 +epoch: 2, batch: 8760, sum loss: 4511.787109, avg loss: 2.617046, ppl: 13.695210 +epoch: 2, batch: 8761, sum loss: 4254.890625, avg loss: 2.557026, ppl: 12.897399 +epoch: 2, batch: 8762, sum loss: 4396.633301, avg loss: 2.472797, ppl: 11.855562 +epoch: 2, batch: 8763, sum loss: 4508.927734, avg loss: 2.469292, ppl: 11.814084 +epoch: 2, batch: 8764, sum loss: 3324.779541, avg loss: 2.409261, ppl: 11.125731 +epoch: 2, batch: 8765, sum loss: 3068.622559, avg loss: 2.338889, ppl: 10.369711 +epoch: 2, batch: 8766, sum loss: 4523.482422, avg loss: 2.492277, ppl: 12.088770 +epoch: 2, batch: 8767, sum loss: 3474.783691, avg loss: 2.304233, ppl: 10.016494 +epoch: 2, batch: 8768, sum loss: 5043.450195, avg loss: 2.647480, ppl: 14.118423 +epoch: 2, batch: 8769, sum loss: 5181.050781, avg loss: 2.718285, ppl: 15.154305 +epoch: 2, batch: 8770, sum loss: 3322.874512, avg loss: 2.326943, ppl: 10.246569 +epoch: 2, batch: 8771, sum loss: 4441.575684, avg loss: 2.540947, ppl: 12.691689 +epoch: 2, batch: 8772, sum loss: 4100.678711, avg loss: 2.223795, ppl: 9.242343 +epoch: 2, batch: 8773, sum loss: 4304.779785, avg loss: 2.305720, ppl: 10.031402 +epoch: 2, batch: 8774, sum loss: 3969.972168, avg loss: 2.684227, ppl: 14.646878 +epoch: 2, batch: 8775, sum loss: 4339.594238, avg loss: 2.598559, ppl: 13.444356 +epoch: 2, batch: 8776, sum loss: 4689.057617, avg loss: 2.560927, ppl: 12.947820 +epoch: 2, batch: 8777, sum loss: 4028.021973, avg loss: 2.394781, ppl: 10.965797 +epoch: 2, batch: 8778, sum loss: 4082.050293, avg loss: 2.499725, ppl: 12.179140 +epoch: 2, batch: 8779, sum loss: 3354.863770, avg loss: 2.198469, ppl: 9.011209 +epoch: 2, batch: 8780, sum loss: 3584.921387, avg loss: 2.370980, ppl: 10.707878 +epoch: 2, batch: 8781, sum loss: 4110.791016, avg loss: 2.398361, ppl: 11.005126 +epoch: 2, batch: 8782, sum loss: 4855.384277, avg loss: 2.710990, ppl: 15.044168 +epoch: 2, batch: 8783, sum loss: 4428.863770, avg loss: 2.664780, ppl: 14.364784 +epoch: 2, batch: 8784, sum loss: 4150.724609, avg loss: 2.732538, ppl: 15.371843 +epoch: 2, batch: 8785, sum loss: 3736.020752, avg loss: 2.336473, ppl: 10.344688 +epoch: 2, batch: 8786, sum loss: 4625.785156, avg loss: 2.815450, ppl: 16.700697 +epoch: 2, batch: 8787, sum loss: 4227.458008, avg loss: 2.510367, ppl: 12.309448 +epoch: 2, batch: 8788, sum loss: 4242.045898, avg loss: 2.657923, ppl: 14.266633 +epoch: 2, batch: 8789, sum loss: 4805.665039, avg loss: 2.461919, ppl: 11.727290 +epoch: 2, batch: 8790, sum loss: 4057.015137, avg loss: 2.481355, ppl: 11.957453 +epoch: 2, batch: 8791, sum loss: 3694.973877, avg loss: 2.305037, ppl: 10.024547 +epoch: 2, batch: 8792, sum loss: 3377.398438, avg loss: 2.454505, ppl: 11.640667 +epoch: 2, batch: 8793, sum loss: 3393.929443, avg loss: 2.258103, ppl: 9.564931 +epoch: 2, batch: 8794, sum loss: 4537.919434, avg loss: 2.558015, ppl: 12.910170 +epoch: 2, batch: 8795, sum loss: 4458.307129, avg loss: 2.448274, ppl: 11.568364 +epoch: 2, batch: 8796, sum loss: 4345.032715, avg loss: 2.501458, ppl: 12.200271 +epoch: 2, batch: 8797, sum loss: 4515.239258, avg loss: 2.476818, ppl: 11.903329 +epoch: 2, batch: 8798, sum loss: 4572.981934, avg loss: 2.652542, ppl: 14.190063 +epoch: 2, batch: 8799, sum loss: 4614.949707, avg loss: 2.420005, ppl: 11.245916 +epoch: 2, batch: 8800, sum loss: 3705.638428, avg loss: 2.414097, ppl: 11.179666 +epoch: 2, batch: 8801, sum loss: 4311.890137, avg loss: 2.568130, ppl: 13.041411 +epoch: 2, batch: 8802, sum loss: 4409.130371, avg loss: 2.586000, ppl: 13.276562 +epoch: 2, batch: 8803, sum loss: 4035.095947, avg loss: 2.509388, ppl: 12.297401 +epoch: 2, batch: 8804, sum loss: 4518.302734, avg loss: 2.622346, ppl: 13.767991 +epoch: 2, batch: 8805, sum loss: 3891.937012, avg loss: 2.567241, ppl: 13.029825 +epoch: 2, batch: 8806, sum loss: 4500.600098, avg loss: 2.512898, ppl: 12.340638 +epoch: 2, batch: 8807, sum loss: 4575.491211, avg loss: 2.458620, ppl: 11.688668 +epoch: 2, batch: 8808, sum loss: 4734.104492, avg loss: 2.579894, ppl: 13.195734 +epoch: 2, batch: 8809, sum loss: 4507.300293, avg loss: 2.662316, ppl: 14.329432 +epoch: 2, batch: 8810, sum loss: 3884.771484, avg loss: 2.415903, ppl: 11.199875 +epoch: 2, batch: 8811, sum loss: 4295.919434, avg loss: 2.643643, ppl: 14.064342 +epoch: 2, batch: 8812, sum loss: 4312.943848, avg loss: 2.500257, ppl: 12.185631 +epoch: 2, batch: 8813, sum loss: 4313.331543, avg loss: 2.702589, ppl: 14.918299 +epoch: 2, batch: 8814, sum loss: 3804.775146, avg loss: 2.240739, ppl: 9.400279 +epoch: 2, batch: 8815, sum loss: 3962.641357, avg loss: 2.598453, ppl: 13.442929 +epoch: 2, batch: 8816, sum loss: 4237.393555, avg loss: 2.351495, ppl: 10.501255 +epoch: 2, batch: 8817, sum loss: 4926.068359, avg loss: 2.850734, ppl: 17.300476 +epoch: 2, batch: 8818, sum loss: 3547.130371, avg loss: 2.273802, ppl: 9.716268 +epoch: 2, batch: 8819, sum loss: 4152.617188, avg loss: 2.710586, ppl: 15.038079 +epoch: 2, batch: 8820, sum loss: 4556.553223, avg loss: 2.492644, ppl: 12.093209 +epoch: 2, batch: 8821, sum loss: 3613.801758, avg loss: 2.301785, ppl: 9.991998 +epoch: 2, batch: 8822, sum loss: 3921.737793, avg loss: 2.509109, ppl: 12.293975 +epoch: 2, batch: 8823, sum loss: 3722.812256, avg loss: 2.219924, ppl: 9.206629 +epoch: 2, batch: 8824, sum loss: 4174.633789, avg loss: 2.308979, ppl: 10.064142 +epoch: 2, batch: 8825, sum loss: 4319.105469, avg loss: 2.386246, ppl: 10.872603 +epoch: 2, batch: 8826, sum loss: 4919.663086, avg loss: 2.679555, ppl: 14.578604 +epoch: 2, batch: 8827, sum loss: 4498.515625, avg loss: 2.387747, ppl: 10.888937 +epoch: 2, batch: 8828, sum loss: 3566.924316, avg loss: 2.379536, ppl: 10.799890 +epoch: 2, batch: 8829, sum loss: 4782.430176, avg loss: 2.388826, ppl: 10.900690 +epoch: 2, batch: 8830, sum loss: 3852.954590, avg loss: 2.388689, ppl: 10.899191 +epoch: 2, batch: 8831, sum loss: 3676.106934, avg loss: 2.487217, ppl: 12.027759 +epoch: 2, batch: 8832, sum loss: 4123.094727, avg loss: 2.312448, ppl: 10.099117 +epoch: 2, batch: 8833, sum loss: 4436.715332, avg loss: 2.602179, ppl: 13.493112 +epoch: 2, batch: 8834, sum loss: 4015.278809, avg loss: 2.449834, ppl: 11.586428 +epoch: 2, batch: 8835, sum loss: 4137.291016, avg loss: 2.438003, ppl: 11.450152 +epoch: 2, batch: 8836, sum loss: 4411.546875, avg loss: 2.608839, ppl: 13.583275 +epoch: 2, batch: 8837, sum loss: 3762.208008, avg loss: 2.435086, ppl: 11.416803 +epoch: 2, batch: 8838, sum loss: 5195.638672, avg loss: 2.839147, ppl: 17.101170 +epoch: 2, batch: 8839, sum loss: 3937.026367, avg loss: 2.425771, ppl: 11.310946 +epoch: 2, batch: 8840, sum loss: 3853.163818, avg loss: 2.462085, ppl: 11.729247 +epoch: 2, batch: 8841, sum loss: 4824.279785, avg loss: 2.488025, ppl: 12.037476 +epoch: 2, batch: 8842, sum loss: 3726.155762, avg loss: 2.470926, ppl: 11.833394 +epoch: 2, batch: 8843, sum loss: 4769.519043, avg loss: 2.850878, ppl: 17.302971 +epoch: 2, batch: 8844, sum loss: 4428.428711, avg loss: 2.433203, ppl: 11.395320 +epoch: 2, batch: 8845, sum loss: 4007.811523, avg loss: 2.339645, ppl: 10.377548 +epoch: 2, batch: 8846, sum loss: 4246.408203, avg loss: 2.448909, ppl: 11.575708 +epoch: 2, batch: 8847, sum loss: 3866.537354, avg loss: 2.278455, ppl: 9.761582 +epoch: 2, batch: 8848, sum loss: 3500.752441, avg loss: 2.273216, ppl: 9.710578 +epoch: 2, batch: 8849, sum loss: 5577.619141, avg loss: 2.779083, ppl: 16.104242 +epoch: 2, batch: 8850, sum loss: 4881.798340, avg loss: 2.695637, ppl: 14.814953 +epoch: 2, batch: 8851, sum loss: 4628.039062, avg loss: 2.537302, ppl: 12.645511 +epoch: 2, batch: 8852, sum loss: 5268.776855, avg loss: 2.810014, ppl: 16.610155 +epoch: 2, batch: 8853, sum loss: 4211.374512, avg loss: 2.536973, ppl: 12.641345 +epoch: 2, batch: 8854, sum loss: 4147.347168, avg loss: 2.538156, ppl: 12.656315 +epoch: 2, batch: 8855, sum loss: 5711.641113, avg loss: 2.775336, ppl: 16.044014 +epoch: 2, batch: 8856, sum loss: 3933.500244, avg loss: 2.383940, ppl: 10.847553 +epoch: 2, batch: 8857, sum loss: 3804.838135, avg loss: 2.581301, ppl: 13.214325 +epoch: 2, batch: 8858, sum loss: 3590.391602, avg loss: 2.477841, ppl: 11.915513 +epoch: 2, batch: 8859, sum loss: 4265.939453, avg loss: 2.773693, ppl: 16.017675 +epoch: 2, batch: 8860, sum loss: 3657.569580, avg loss: 2.488142, ppl: 12.038893 +epoch: 2, batch: 8861, sum loss: 4356.724609, avg loss: 2.605697, ppl: 13.540655 +epoch: 2, batch: 8862, sum loss: 4125.322266, avg loss: 2.516975, ppl: 12.391059 +epoch: 2, batch: 8863, sum loss: 4545.690430, avg loss: 2.490789, ppl: 12.070798 +epoch: 2, batch: 8864, sum loss: 4317.085938, avg loss: 2.689773, ppl: 14.728333 +epoch: 2, batch: 8865, sum loss: 4101.111816, avg loss: 2.597284, ppl: 13.427224 +epoch: 2, batch: 8866, sum loss: 5578.343750, avg loss: 2.807420, ppl: 16.567120 +epoch: 2, batch: 8867, sum loss: 3673.756348, avg loss: 2.382462, ppl: 10.831537 +epoch: 2, batch: 8868, sum loss: 4611.357910, avg loss: 2.808379, ppl: 16.583014 +epoch: 2, batch: 8869, sum loss: 4823.252441, avg loss: 2.611398, ppl: 13.618082 +epoch: 2, batch: 8870, sum loss: 5044.801270, avg loss: 2.842141, ppl: 17.152456 +epoch: 2, batch: 8871, sum loss: 3705.477051, avg loss: 2.413991, ppl: 11.178491 +epoch: 2, batch: 8872, sum loss: 4006.342773, avg loss: 2.573117, ppl: 13.106611 +epoch: 2, batch: 8873, sum loss: 3981.854492, avg loss: 2.599122, ppl: 13.451920 +epoch: 2, batch: 8874, sum loss: 3701.325684, avg loss: 2.427099, ppl: 11.325974 +epoch: 2, batch: 8875, sum loss: 4823.411621, avg loss: 2.720480, ppl: 15.187617 +epoch: 2, batch: 8876, sum loss: 3837.112305, avg loss: 2.594396, ppl: 13.388503 +epoch: 2, batch: 8877, sum loss: 3983.894775, avg loss: 2.357334, ppl: 10.562755 +epoch: 2, batch: 8878, sum loss: 4804.262695, avg loss: 2.739032, ppl: 15.472005 +epoch: 2, batch: 8879, sum loss: 5840.948730, avg loss: 2.797389, ppl: 16.401770 +epoch: 2, batch: 8880, sum loss: 5030.629883, avg loss: 2.533046, ppl: 12.591805 +epoch: 2, batch: 8881, sum loss: 5087.822754, avg loss: 2.617193, ppl: 13.697221 +epoch: 2, batch: 8882, sum loss: 3725.294434, avg loss: 2.380380, ppl: 10.809006 +epoch: 2, batch: 8883, sum loss: 4682.780762, avg loss: 2.683542, ppl: 14.636846 +epoch: 2, batch: 8884, sum loss: 4485.767578, avg loss: 2.489327, ppl: 12.053164 +epoch: 2, batch: 8885, sum loss: 4092.235840, avg loss: 2.357279, ppl: 10.562171 +epoch: 2, batch: 8886, sum loss: 4326.878418, avg loss: 2.589395, ppl: 13.321707 +epoch: 2, batch: 8887, sum loss: 4216.873535, avg loss: 2.513036, ppl: 12.342339 +epoch: 2, batch: 8888, sum loss: 4766.302734, avg loss: 2.517857, ppl: 12.401986 +epoch: 2, batch: 8889, sum loss: 3727.415039, avg loss: 2.350199, ppl: 10.487654 +epoch: 2, batch: 8890, sum loss: 5184.478516, avg loss: 2.854889, ppl: 17.372507 +epoch: 2, batch: 8891, sum loss: 5296.165527, avg loss: 2.669438, ppl: 14.431858 +epoch: 2, batch: 8892, sum loss: 3712.364746, avg loss: 2.432742, ppl: 11.390075 +epoch: 2, batch: 8893, sum loss: 4439.180176, avg loss: 2.603625, ppl: 13.512630 +epoch: 2, batch: 8894, sum loss: 5703.186035, avg loss: 2.755162, ppl: 15.723596 +epoch: 2, batch: 8895, sum loss: 4470.552246, avg loss: 2.399652, ppl: 11.019343 +epoch: 2, batch: 8896, sum loss: 4696.966797, avg loss: 2.797479, ppl: 16.403236 +epoch: 2, batch: 8897, sum loss: 4412.943359, avg loss: 2.422033, ppl: 11.268741 +epoch: 2, batch: 8898, sum loss: 4151.646484, avg loss: 2.588308, ppl: 13.307241 +epoch: 2, batch: 8899, sum loss: 4946.482910, avg loss: 2.678118, ppl: 14.557667 +epoch: 2, batch: 8900, sum loss: 4331.491699, avg loss: 2.417127, ppl: 11.213598 +epoch: 2, batch: 8901, sum loss: 5128.457520, avg loss: 2.714906, ppl: 15.103190 +epoch: 2, batch: 8902, sum loss: 4653.489746, avg loss: 2.569569, ppl: 13.060199 +epoch: 2, batch: 8903, sum loss: 3842.067383, avg loss: 2.448736, ppl: 11.573713 +epoch: 2, batch: 8904, sum loss: 3748.220215, avg loss: 2.467558, ppl: 11.793608 +epoch: 2, batch: 8905, sum loss: 4350.761719, avg loss: 2.541333, ppl: 12.696581 +epoch: 2, batch: 8906, sum loss: 3495.124512, avg loss: 2.400498, ppl: 11.028664 +epoch: 2, batch: 8907, sum loss: 3920.412842, avg loss: 2.329419, ppl: 10.271976 +epoch: 2, batch: 8908, sum loss: 4627.181152, avg loss: 2.531281, ppl: 12.569594 +epoch: 2, batch: 8909, sum loss: 3272.583008, avg loss: 2.299777, ppl: 9.971961 +epoch: 2, batch: 8910, sum loss: 3422.435791, avg loss: 2.247167, ppl: 9.460898 +epoch: 2, batch: 8911, sum loss: 4597.342773, avg loss: 2.561194, ppl: 12.951268 +epoch: 2, batch: 8912, sum loss: 4382.328125, avg loss: 2.591560, ppl: 13.350584 +epoch: 2, batch: 8913, sum loss: 3103.595459, avg loss: 2.250613, ppl: 9.493553 +epoch: 2, batch: 8914, sum loss: 3449.851318, avg loss: 2.316891, ppl: 10.144092 +epoch: 2, batch: 8915, sum loss: 4576.369141, avg loss: 2.643772, ppl: 14.066159 +epoch: 2, batch: 8916, sum loss: 4660.026855, avg loss: 2.762316, ppl: 15.836477 +epoch: 2, batch: 8917, sum loss: 3995.846680, avg loss: 2.500530, ppl: 12.188950 +epoch: 2, batch: 8918, sum loss: 3406.153809, avg loss: 2.355570, ppl: 10.544136 +epoch: 2, batch: 8919, sum loss: 3749.949951, avg loss: 2.589745, ppl: 13.326367 +epoch: 2, batch: 8920, sum loss: 3962.666260, avg loss: 2.533674, ppl: 12.599712 +epoch: 2, batch: 8921, sum loss: 3654.696289, avg loss: 2.269998, ppl: 9.679378 +epoch: 2, batch: 8922, sum loss: 4689.445312, avg loss: 2.740763, ppl: 15.498809 +epoch: 2, batch: 8923, sum loss: 4166.238281, avg loss: 2.506762, ppl: 12.265151 +epoch: 2, batch: 8924, sum loss: 4026.431885, avg loss: 2.411037, ppl: 11.145515 +epoch: 2, batch: 8925, sum loss: 3724.612793, avg loss: 2.415443, ppl: 11.194725 +epoch: 2, batch: 8926, sum loss: 4641.627441, avg loss: 2.449408, ppl: 11.581484 +epoch: 2, batch: 8927, sum loss: 2967.844971, avg loss: 2.385727, ppl: 10.866964 +epoch: 2, batch: 8928, sum loss: 3617.320312, avg loss: 2.368906, ppl: 10.685701 +epoch: 2, batch: 8929, sum loss: 4579.752930, avg loss: 2.540074, ppl: 12.680605 +epoch: 2, batch: 8930, sum loss: 4720.635742, avg loss: 2.583818, ppl: 13.247623 +epoch: 2, batch: 8931, sum loss: 3982.770020, avg loss: 2.349717, ppl: 10.482602 +epoch: 2, batch: 8932, sum loss: 4608.312500, avg loss: 2.712368, ppl: 15.064903 +epoch: 2, batch: 8933, sum loss: 3752.320801, avg loss: 2.215065, ppl: 9.162006 +epoch: 2, batch: 8934, sum loss: 3586.415283, avg loss: 2.219316, ppl: 9.201037 +epoch: 2, batch: 8935, sum loss: 3881.741943, avg loss: 2.186897, ppl: 8.907528 +epoch: 2, batch: 8936, sum loss: 4373.666016, avg loss: 2.360316, ppl: 10.594301 +epoch: 2, batch: 8937, sum loss: 3331.267578, avg loss: 2.151982, ppl: 8.601887 +epoch: 2, batch: 8938, sum loss: 3806.945801, avg loss: 2.345623, ppl: 10.439773 +epoch: 2, batch: 8939, sum loss: 3792.314697, avg loss: 2.398681, ppl: 11.008646 +epoch: 2, batch: 8940, sum loss: 4224.278320, avg loss: 2.623775, ppl: 13.787678 +epoch: 2, batch: 8941, sum loss: 4512.572754, avg loss: 2.415724, ppl: 11.197875 +epoch: 2, batch: 8942, sum loss: 3257.353271, avg loss: 2.382848, ppl: 10.835719 +epoch: 2, batch: 8943, sum loss: 4471.028320, avg loss: 2.554873, ppl: 12.869668 +epoch: 2, batch: 8944, sum loss: 4522.808105, avg loss: 2.479610, ppl: 11.936605 +epoch: 2, batch: 8945, sum loss: 4412.901855, avg loss: 2.497398, ppl: 12.150835 +epoch: 2, batch: 8946, sum loss: 3833.867188, avg loss: 2.502524, ppl: 12.213286 +epoch: 2, batch: 8947, sum loss: 4288.558105, avg loss: 2.512336, ppl: 12.333711 +epoch: 2, batch: 8948, sum loss: 4050.612793, avg loss: 2.666631, ppl: 14.391406 +epoch: 2, batch: 8949, sum loss: 4186.051270, avg loss: 2.466736, ppl: 11.783925 +epoch: 2, batch: 8950, sum loss: 4749.180176, avg loss: 2.641368, ppl: 14.032389 +epoch: 2, batch: 8951, sum loss: 4987.972168, avg loss: 2.693290, ppl: 14.780216 +epoch: 2, batch: 8952, sum loss: 4692.622070, avg loss: 2.788248, ppl: 16.252525 +epoch: 2, batch: 8953, sum loss: 5306.315430, avg loss: 2.688103, ppl: 14.703755 +epoch: 2, batch: 8954, sum loss: 3581.176270, avg loss: 2.295626, ppl: 9.930650 +epoch: 2, batch: 8955, sum loss: 3317.500244, avg loss: 2.184003, ppl: 8.881785 +epoch: 2, batch: 8956, sum loss: 5276.788574, avg loss: 2.724207, ppl: 15.244315 +epoch: 2, batch: 8957, sum loss: 4318.291016, avg loss: 2.464778, ppl: 11.760873 +epoch: 2, batch: 8958, sum loss: 4334.845703, avg loss: 2.393620, ppl: 10.953073 +epoch: 2, batch: 8959, sum loss: 3709.424072, avg loss: 2.353695, ppl: 10.524389 +epoch: 2, batch: 8960, sum loss: 4473.916992, avg loss: 2.727998, ppl: 15.302225 +epoch: 2, batch: 8961, sum loss: 4488.007812, avg loss: 2.682611, ppl: 14.623221 +epoch: 2, batch: 8962, sum loss: 3832.941895, avg loss: 2.472866, ppl: 11.856377 +epoch: 2, batch: 8963, sum loss: 4792.582520, avg loss: 2.628954, ppl: 13.859262 +epoch: 2, batch: 8964, sum loss: 3506.854492, avg loss: 2.226574, ppl: 9.268061 +epoch: 2, batch: 8965, sum loss: 4219.977051, avg loss: 2.460628, ppl: 11.712162 +epoch: 2, batch: 8966, sum loss: 3377.909424, avg loss: 2.078713, ppl: 7.994177 +epoch: 2, batch: 8967, sum loss: 4611.188477, avg loss: 2.730129, ppl: 15.334872 +epoch: 2, batch: 8968, sum loss: 4069.207764, avg loss: 2.581985, ppl: 13.223354 +epoch: 2, batch: 8969, sum loss: 4968.853027, avg loss: 2.615186, ppl: 13.669759 +epoch: 2, batch: 8970, sum loss: 4232.394531, avg loss: 2.700954, ppl: 14.893933 +epoch: 2, batch: 8971, sum loss: 3877.926514, avg loss: 2.341743, ppl: 10.399346 +epoch: 2, batch: 8972, sum loss: 3917.058105, avg loss: 2.348356, ppl: 10.468349 +epoch: 2, batch: 8973, sum loss: 4101.069336, avg loss: 2.428105, ppl: 11.337379 +epoch: 2, batch: 8974, sum loss: 4085.477051, avg loss: 2.349326, ppl: 10.478501 +epoch: 2, batch: 8975, sum loss: 3518.359619, avg loss: 2.418117, ppl: 11.224698 +epoch: 2, batch: 8976, sum loss: 3702.650879, avg loss: 2.458600, ppl: 11.688431 +epoch: 2, batch: 8977, sum loss: 4168.329102, avg loss: 2.470853, ppl: 11.832537 +epoch: 2, batch: 8978, sum loss: 3777.916992, avg loss: 2.087247, ppl: 8.062689 +epoch: 2, batch: 8979, sum loss: 3993.365967, avg loss: 2.440933, ppl: 11.483747 +epoch: 2, batch: 8980, sum loss: 4158.808105, avg loss: 2.734259, ppl: 15.398331 +epoch: 2, batch: 8981, sum loss: 4163.331055, avg loss: 2.551061, ppl: 12.820695 +epoch: 2, batch: 8982, sum loss: 4156.278809, avg loss: 2.401085, ppl: 11.035147 +epoch: 2, batch: 8983, sum loss: 4946.841797, avg loss: 2.564459, ppl: 12.993629 +epoch: 2, batch: 8984, sum loss: 5136.962402, avg loss: 2.729523, ppl: 15.325574 +epoch: 2, batch: 8985, sum loss: 4285.548340, avg loss: 2.656881, ppl: 14.251763 +epoch: 2, batch: 8986, sum loss: 4177.446777, avg loss: 2.650664, ppl: 14.163442 +epoch: 2, batch: 8987, sum loss: 3277.720459, avg loss: 2.265183, ppl: 9.632892 +epoch: 2, batch: 8988, sum loss: 3638.451172, avg loss: 2.228078, ppl: 9.282008 +epoch: 2, batch: 8989, sum loss: 3770.351318, avg loss: 2.265836, ppl: 9.639182 +epoch: 2, batch: 8990, sum loss: 3482.497070, avg loss: 2.248223, ppl: 9.470889 +epoch: 2, batch: 8991, sum loss: 4581.746094, avg loss: 2.627148, ppl: 13.834257 +epoch: 2, batch: 8992, sum loss: 4095.445312, avg loss: 2.456776, ppl: 11.667132 +epoch: 2, batch: 8993, sum loss: 4169.458008, avg loss: 2.483298, ppl: 11.980716 +epoch: 2, batch: 8994, sum loss: 3933.634277, avg loss: 2.345638, ppl: 10.439927 +epoch: 2, batch: 8995, sum loss: 5074.500488, avg loss: 2.552566, ppl: 12.840004 +epoch: 2, batch: 8996, sum loss: 4813.394043, avg loss: 2.482411, ppl: 11.970085 +epoch: 2, batch: 8997, sum loss: 5008.882812, avg loss: 2.694396, ppl: 14.796583 +epoch: 2, batch: 8998, sum loss: 4752.230957, avg loss: 2.611116, ppl: 13.614235 +epoch: 2, batch: 8999, sum loss: 4866.037109, avg loss: 2.494125, ppl: 12.111128 +epoch: 2, batch: 9000, sum loss: 3915.622314, avg loss: 2.424534, ppl: 11.296965 +epoch: 2, batch: 9001, sum loss: 4380.201172, avg loss: 2.688890, ppl: 14.715325 +epoch: 2, batch: 9002, sum loss: 4029.209473, avg loss: 2.474944, ppl: 11.881046 +epoch: 2, batch: 9003, sum loss: 4832.157227, avg loss: 2.629030, ppl: 13.860319 +epoch: 2, batch: 9004, sum loss: 4554.587891, avg loss: 2.608584, ppl: 13.579810 +epoch: 2, batch: 9005, sum loss: 5342.339844, avg loss: 2.843183, ppl: 17.170324 +epoch: 2, batch: 9006, sum loss: 4524.081543, avg loss: 2.466784, ppl: 11.784484 +epoch: 2, batch: 9007, sum loss: 3148.404785, avg loss: 2.301466, ppl: 9.988811 +epoch: 2, batch: 9008, sum loss: 5110.929688, avg loss: 2.753734, ppl: 15.701145 +epoch: 2, batch: 9009, sum loss: 4536.208008, avg loss: 2.544144, ppl: 12.732321 +epoch: 2, batch: 9010, sum loss: 3838.724609, avg loss: 2.284955, ppl: 9.825247 +epoch: 2, batch: 9011, sum loss: 4513.496582, avg loss: 2.678633, ppl: 14.565168 +epoch: 2, batch: 9012, sum loss: 4388.684570, avg loss: 2.420676, ppl: 11.253459 +epoch: 2, batch: 9013, sum loss: 4257.459961, avg loss: 2.403986, ppl: 11.067207 +epoch: 2, batch: 9014, sum loss: 4429.757324, avg loss: 2.704369, ppl: 14.944888 +epoch: 2, batch: 9015, sum loss: 2980.354248, avg loss: 2.178622, ppl: 8.834121 +epoch: 2, batch: 9016, sum loss: 4071.902832, avg loss: 2.395237, ppl: 10.970800 +epoch: 2, batch: 9017, sum loss: 3570.217285, avg loss: 2.150733, ppl: 8.591155 +epoch: 2, batch: 9018, sum loss: 3785.522705, avg loss: 2.299832, ppl: 9.972504 +epoch: 2, batch: 9019, sum loss: 5351.372070, avg loss: 2.900473, ppl: 18.182737 +epoch: 2, batch: 9020, sum loss: 4073.498535, avg loss: 2.380771, ppl: 10.813233 +epoch: 2, batch: 9021, sum loss: 4579.081543, avg loss: 2.522910, ppl: 12.464818 +epoch: 2, batch: 9022, sum loss: 4533.196289, avg loss: 2.730841, ppl: 15.345790 +epoch: 2, batch: 9023, sum loss: 4375.931641, avg loss: 2.665001, ppl: 14.367966 +epoch: 2, batch: 9024, sum loss: 4645.394531, avg loss: 2.556629, ppl: 12.892283 +epoch: 2, batch: 9025, sum loss: 5015.802246, avg loss: 2.500400, ppl: 12.187368 +epoch: 2, batch: 9026, sum loss: 3371.872803, avg loss: 2.295352, ppl: 9.927935 +epoch: 2, batch: 9027, sum loss: 4460.659180, avg loss: 2.551865, ppl: 12.831006 +epoch: 2, batch: 9028, sum loss: 4420.311035, avg loss: 2.534582, ppl: 12.611157 +epoch: 2, batch: 9029, sum loss: 4139.921875, avg loss: 2.754439, ppl: 15.712222 +epoch: 2, batch: 9030, sum loss: 4476.921875, avg loss: 2.580359, ppl: 13.201870 +epoch: 2, batch: 9031, sum loss: 3828.572998, avg loss: 2.499068, ppl: 12.171151 +epoch: 2, batch: 9032, sum loss: 4358.420898, avg loss: 2.583534, ppl: 13.243853 +epoch: 2, batch: 9033, sum loss: 4017.404053, avg loss: 2.716297, ppl: 15.124219 +epoch: 2, batch: 9034, sum loss: 3893.342529, avg loss: 2.498936, ppl: 12.169540 +epoch: 2, batch: 9035, sum loss: 4943.382812, avg loss: 2.725128, ppl: 15.258373 +epoch: 2, batch: 9036, sum loss: 3771.824219, avg loss: 2.512874, ppl: 12.340350 +epoch: 2, batch: 9037, sum loss: 3967.817139, avg loss: 2.383073, ppl: 10.838161 +epoch: 2, batch: 9038, sum loss: 4737.494141, avg loss: 2.455933, ppl: 11.657300 +epoch: 2, batch: 9039, sum loss: 5446.411133, avg loss: 2.752103, ppl: 15.675557 +epoch: 2, batch: 9040, sum loss: 3472.283691, avg loss: 2.386449, ppl: 10.874812 +epoch: 2, batch: 9041, sum loss: 4215.640625, avg loss: 2.335535, ppl: 10.334989 +epoch: 2, batch: 9042, sum loss: 5642.173340, avg loss: 2.708677, ppl: 15.009398 +epoch: 2, batch: 9043, sum loss: 4292.266113, avg loss: 2.680991, ppl: 14.599553 +epoch: 2, batch: 9044, sum loss: 4446.520020, avg loss: 2.642020, ppl: 14.041542 +epoch: 2, batch: 9045, sum loss: 5214.958008, avg loss: 2.911757, ppl: 18.389088 +epoch: 2, batch: 9046, sum loss: 4148.965332, avg loss: 2.639291, ppl: 14.003272 +epoch: 2, batch: 9047, sum loss: 3985.563721, avg loss: 2.519320, ppl: 12.420142 +epoch: 2, batch: 9048, sum loss: 4276.742676, avg loss: 2.672964, ppl: 14.482834 +epoch: 2, batch: 9049, sum loss: 4460.798340, avg loss: 2.585970, ppl: 13.276163 +epoch: 2, batch: 9050, sum loss: 3909.364990, avg loss: 2.357880, ppl: 10.568521 +epoch: 2, batch: 9051, sum loss: 4179.197266, avg loss: 2.658522, ppl: 14.275180 +epoch: 2, batch: 9052, sum loss: 3534.070557, avg loss: 2.506433, ppl: 12.261120 +epoch: 2, batch: 9053, sum loss: 4441.259766, avg loss: 2.574643, ppl: 13.126635 +epoch: 2, batch: 9054, sum loss: 3567.115234, avg loss: 2.463477, ppl: 11.745584 +epoch: 2, batch: 9055, sum loss: 4392.846680, avg loss: 2.603940, ppl: 13.516890 +epoch: 2, batch: 9056, sum loss: 4662.822266, avg loss: 2.689055, ppl: 14.717768 +epoch: 2, batch: 9057, sum loss: 4864.790527, avg loss: 2.776707, ppl: 16.066031 +epoch: 2, batch: 9058, sum loss: 4385.505859, avg loss: 2.602674, ppl: 13.499788 +epoch: 2, batch: 9059, sum loss: 4410.351562, avg loss: 2.576140, ppl: 13.146295 +epoch: 2, batch: 9060, sum loss: 4743.962402, avg loss: 2.524727, ppl: 12.487490 +epoch: 2, batch: 9061, sum loss: 3829.055176, avg loss: 2.301115, ppl: 9.985311 +epoch: 2, batch: 9062, sum loss: 3499.587402, avg loss: 2.334615, ppl: 10.325481 +epoch: 2, batch: 9063, sum loss: 4148.767578, avg loss: 2.522047, ppl: 12.454064 +epoch: 2, batch: 9064, sum loss: 3889.772949, avg loss: 2.341826, ppl: 10.400209 +epoch: 2, batch: 9065, sum loss: 3567.436279, avg loss: 2.402314, ppl: 11.048713 +epoch: 2, batch: 9066, sum loss: 4423.268555, avg loss: 2.441097, ppl: 11.485640 +epoch: 2, batch: 9067, sum loss: 4238.381836, avg loss: 2.387821, ppl: 10.889736 +epoch: 2, batch: 9068, sum loss: 3905.579590, avg loss: 2.447105, ppl: 11.554846 +epoch: 2, batch: 9069, sum loss: 4970.444336, avg loss: 2.705740, ppl: 14.965390 +epoch: 2, batch: 9070, sum loss: 4111.048828, avg loss: 2.582317, ppl: 13.227753 +epoch: 2, batch: 9071, sum loss: 4615.900391, avg loss: 2.428143, ppl: 11.337811 +epoch: 2, batch: 9072, sum loss: 4655.433594, avg loss: 2.397237, ppl: 10.992760 +epoch: 2, batch: 9073, sum loss: 4339.941895, avg loss: 2.533533, ppl: 12.597935 +epoch: 2, batch: 9074, sum loss: 3823.858643, avg loss: 2.304918, ppl: 10.023355 +epoch: 2, batch: 9075, sum loss: 4651.851562, avg loss: 2.395392, ppl: 10.972501 +epoch: 2, batch: 9076, sum loss: 4385.457031, avg loss: 2.527641, ppl: 12.523928 +epoch: 2, batch: 9077, sum loss: 3966.754639, avg loss: 2.376725, ppl: 10.769579 +epoch: 2, batch: 9078, sum loss: 4336.500488, avg loss: 2.675201, ppl: 14.515263 +epoch: 2, batch: 9079, sum loss: 3397.714600, avg loss: 2.330394, ppl: 10.281992 +epoch: 2, batch: 9080, sum loss: 3974.994873, avg loss: 2.360448, ppl: 10.595701 +epoch: 2, batch: 9081, sum loss: 4341.511719, avg loss: 2.496557, ppl: 12.140615 +epoch: 2, batch: 9082, sum loss: 3389.735840, avg loss: 2.514641, ppl: 12.362167 +epoch: 2, batch: 9083, sum loss: 4535.321289, avg loss: 2.568132, ppl: 13.041439 +epoch: 2, batch: 9084, sum loss: 3340.239746, avg loss: 2.275368, ppl: 9.731497 +epoch: 2, batch: 9085, sum loss: 4777.265137, avg loss: 2.726750, ppl: 15.283131 +epoch: 2, batch: 9086, sum loss: 4934.417480, avg loss: 2.776825, ppl: 16.067919 +epoch: 2, batch: 9087, sum loss: 4517.933594, avg loss: 2.479656, ppl: 11.937160 +epoch: 2, batch: 9088, sum loss: 4158.090820, avg loss: 2.509409, ppl: 12.297659 +epoch: 2, batch: 9089, sum loss: 3712.283203, avg loss: 2.390395, ppl: 10.917805 +epoch: 2, batch: 9090, sum loss: 3910.618408, avg loss: 2.537715, ppl: 12.650727 +epoch: 2, batch: 9091, sum loss: 3887.415283, avg loss: 2.376171, ppl: 10.763609 +epoch: 2, batch: 9092, sum loss: 4168.537598, avg loss: 2.638315, ppl: 13.989611 +epoch: 2, batch: 9093, sum loss: 4088.029297, avg loss: 2.354856, ppl: 10.536607 +epoch: 2, batch: 9094, sum loss: 5343.140137, avg loss: 2.813660, ppl: 16.670820 +epoch: 2, batch: 9095, sum loss: 4840.588379, avg loss: 2.567951, ppl: 13.039085 +epoch: 2, batch: 9096, sum loss: 4161.490234, avg loss: 2.459510, ppl: 11.699073 +epoch: 2, batch: 9097, sum loss: 4834.604004, avg loss: 2.607661, ppl: 13.567284 +epoch: 2, batch: 9098, sum loss: 4370.765137, avg loss: 2.500438, ppl: 12.187831 +epoch: 2, batch: 9099, sum loss: 5189.610352, avg loss: 2.878320, ppl: 17.784367 +epoch: 2, batch: 9100, sum loss: 4238.659180, avg loss: 2.494797, ppl: 12.119267 +epoch: 2, batch: 9101, sum loss: 3494.461426, avg loss: 2.448817, ppl: 11.574643 +epoch: 2, batch: 9102, sum loss: 3879.593262, avg loss: 2.517582, ppl: 12.398577 +epoch: 2, batch: 9103, sum loss: 4706.253906, avg loss: 2.573130, ppl: 13.106780 +epoch: 2, batch: 9104, sum loss: 3873.270508, avg loss: 2.192004, ppl: 8.953135 +epoch: 2, batch: 9105, sum loss: 3716.268066, avg loss: 2.177076, ppl: 8.820474 +epoch: 2, batch: 9106, sum loss: 4653.533691, avg loss: 2.851430, ppl: 17.312525 +epoch: 2, batch: 9107, sum loss: 4954.906250, avg loss: 2.659638, ppl: 14.291117 +epoch: 2, batch: 9108, sum loss: 4086.324463, avg loss: 2.563566, ppl: 12.982032 +epoch: 2, batch: 9109, sum loss: 4267.305664, avg loss: 2.322975, ppl: 10.205997 +epoch: 2, batch: 9110, sum loss: 5024.642578, avg loss: 2.542835, ppl: 12.715674 +epoch: 2, batch: 9111, sum loss: 4951.859863, avg loss: 2.673790, ppl: 14.494807 +epoch: 2, batch: 9112, sum loss: 4112.382324, avg loss: 2.236206, ppl: 9.357759 +epoch: 2, batch: 9113, sum loss: 4852.268066, avg loss: 2.674900, ppl: 14.510896 +epoch: 2, batch: 9114, sum loss: 5078.550781, avg loss: 2.830853, ppl: 16.959929 +epoch: 2, batch: 9115, sum loss: 3813.035400, avg loss: 2.362475, ppl: 10.617201 +epoch: 2, batch: 9116, sum loss: 4585.574219, avg loss: 2.557487, ppl: 12.903351 +epoch: 2, batch: 9117, sum loss: 4624.813477, avg loss: 2.494506, ppl: 12.115743 +epoch: 2, batch: 9118, sum loss: 4377.183105, avg loss: 2.597735, ppl: 13.433273 +epoch: 2, batch: 9119, sum loss: 3652.299072, avg loss: 2.399671, ppl: 11.019551 +epoch: 2, batch: 9120, sum loss: 4204.348633, avg loss: 2.696824, ppl: 14.832550 +epoch: 2, batch: 9121, sum loss: 3717.492676, avg loss: 2.346902, ppl: 10.453135 +epoch: 2, batch: 9122, sum loss: 4696.180176, avg loss: 2.782097, ppl: 16.152863 +epoch: 2, batch: 9123, sum loss: 5032.119629, avg loss: 2.547909, ppl: 12.780350 +epoch: 2, batch: 9124, sum loss: 4348.842773, avg loss: 2.762924, ppl: 15.846112 +epoch: 2, batch: 9125, sum loss: 4844.825684, avg loss: 2.592202, ppl: 13.359156 +epoch: 2, batch: 9126, sum loss: 4203.227539, avg loss: 2.484177, ppl: 11.991249 +epoch: 2, batch: 9127, sum loss: 4129.746582, avg loss: 2.358508, ppl: 10.575158 +epoch: 2, batch: 9128, sum loss: 3830.248047, avg loss: 2.477521, ppl: 11.911704 +epoch: 2, batch: 9129, sum loss: 4853.870605, avg loss: 2.557361, ppl: 12.901723 +epoch: 2, batch: 9130, sum loss: 3920.764893, avg loss: 2.465890, ppl: 11.773955 +epoch: 2, batch: 9131, sum loss: 5463.395020, avg loss: 2.766276, ppl: 15.899313 +epoch: 2, batch: 9132, sum loss: 4238.070801, avg loss: 2.536248, ppl: 12.632192 +epoch: 2, batch: 9133, sum loss: 5282.367188, avg loss: 2.689596, ppl: 14.725732 +epoch: 2, batch: 9134, sum loss: 5360.575195, avg loss: 2.606016, ppl: 13.544982 +epoch: 2, batch: 9135, sum loss: 3489.795410, avg loss: 2.353200, ppl: 10.519176 +epoch: 2, batch: 9136, sum loss: 4875.905273, avg loss: 2.593567, ppl: 13.377399 +epoch: 2, batch: 9137, sum loss: 5080.243164, avg loss: 2.634981, ppl: 13.943047 +epoch: 2, batch: 9138, sum loss: 3888.967041, avg loss: 2.288974, ppl: 9.864814 +epoch: 2, batch: 9139, sum loss: 4462.134766, avg loss: 2.543976, ppl: 12.730189 +epoch: 2, batch: 9140, sum loss: 4080.362305, avg loss: 2.249373, ppl: 9.481789 +epoch: 2, batch: 9141, sum loss: 3786.800049, avg loss: 2.316086, ppl: 10.135921 +epoch: 2, batch: 9142, sum loss: 4629.676758, avg loss: 2.598023, ppl: 13.437145 +epoch: 2, batch: 9143, sum loss: 4409.276855, avg loss: 2.624570, ppl: 13.798635 +epoch: 2, batch: 9144, sum loss: 3817.810791, avg loss: 2.496933, ppl: 12.145190 +epoch: 2, batch: 9145, sum loss: 4859.243164, avg loss: 2.441831, ppl: 11.494063 +epoch: 2, batch: 9146, sum loss: 3960.138184, avg loss: 2.437008, ppl: 11.438766 +epoch: 2, batch: 9147, sum loss: 3774.374512, avg loss: 2.350171, ppl: 10.487361 +epoch: 2, batch: 9148, sum loss: 4396.141113, avg loss: 2.525067, ppl: 12.491728 +epoch: 2, batch: 9149, sum loss: 4819.629395, avg loss: 2.537983, ppl: 12.654121 +epoch: 2, batch: 9150, sum loss: 5496.107422, avg loss: 2.724892, ppl: 15.254772 +epoch: 2, batch: 9151, sum loss: 4422.200684, avg loss: 2.400760, ppl: 11.031562 +epoch: 2, batch: 9152, sum loss: 4808.601562, avg loss: 2.596437, ppl: 13.415852 +epoch: 2, batch: 9153, sum loss: 5008.038574, avg loss: 2.642764, ppl: 14.051994 +epoch: 2, batch: 9154, sum loss: 4411.955566, avg loss: 2.462029, ppl: 11.728584 +epoch: 2, batch: 9155, sum loss: 5244.370117, avg loss: 2.554491, ppl: 12.864751 +epoch: 2, batch: 9156, sum loss: 4038.258301, avg loss: 2.440035, ppl: 11.473446 +epoch: 2, batch: 9157, sum loss: 5181.958496, avg loss: 2.919413, ppl: 18.530409 +epoch: 2, batch: 9158, sum loss: 3749.461426, avg loss: 2.479803, ppl: 11.938908 +epoch: 2, batch: 9159, sum loss: 3592.069580, avg loss: 2.441924, ppl: 11.495131 +epoch: 2, batch: 9160, sum loss: 4643.901367, avg loss: 2.565691, ppl: 13.009651 +epoch: 2, batch: 9161, sum loss: 4956.536133, avg loss: 2.632255, ppl: 13.905088 +epoch: 2, batch: 9162, sum loss: 4324.473145, avg loss: 2.424032, ppl: 11.291297 +epoch: 2, batch: 9163, sum loss: 3514.610107, avg loss: 2.432256, ppl: 11.384540 +epoch: 2, batch: 9164, sum loss: 4280.591797, avg loss: 2.398091, ppl: 11.002151 +epoch: 2, batch: 9165, sum loss: 4134.040527, avg loss: 2.629797, ppl: 13.870951 +epoch: 2, batch: 9166, sum loss: 4193.129883, avg loss: 2.581976, ppl: 13.223247 +epoch: 2, batch: 9167, sum loss: 3544.736816, avg loss: 2.242085, ppl: 9.412939 +epoch: 2, batch: 9168, sum loss: 4329.443848, avg loss: 2.536288, ppl: 12.632694 +epoch: 2, batch: 9169, sum loss: 4145.791016, avg loss: 2.325177, ppl: 10.228495 +epoch: 2, batch: 9170, sum loss: 4816.000000, avg loss: 2.532072, ppl: 12.579538 +epoch: 2, batch: 9171, sum loss: 4053.347900, avg loss: 2.506709, ppl: 12.264500 +epoch: 2, batch: 9172, sum loss: 3847.552002, avg loss: 2.633506, ppl: 13.922494 +epoch: 2, batch: 9173, sum loss: 4200.801758, avg loss: 2.585109, ppl: 13.264731 +epoch: 2, batch: 9174, sum loss: 3777.937744, avg loss: 2.224934, ppl: 9.252871 +epoch: 2, batch: 9175, sum loss: 4168.474609, avg loss: 2.538657, ppl: 12.662653 +epoch: 2, batch: 9176, sum loss: 5074.780762, avg loss: 2.756535, ppl: 15.745193 +epoch: 2, batch: 9177, sum loss: 3970.780273, avg loss: 2.477093, ppl: 11.906601 +epoch: 2, batch: 9178, sum loss: 4336.928223, avg loss: 2.715672, ppl: 15.114764 +epoch: 2, batch: 9179, sum loss: 4741.760742, avg loss: 2.766488, ppl: 15.902691 +epoch: 2, batch: 9180, sum loss: 4552.713867, avg loss: 2.586769, ppl: 13.286774 +epoch: 2, batch: 9181, sum loss: 4428.282227, avg loss: 2.731821, ppl: 15.360834 +epoch: 2, batch: 9182, sum loss: 4614.432617, avg loss: 2.555057, ppl: 12.872031 +epoch: 2, batch: 9183, sum loss: 4373.166016, avg loss: 2.520557, ppl: 12.435520 +epoch: 2, batch: 9184, sum loss: 4772.812500, avg loss: 2.727321, ppl: 15.291871 +epoch: 2, batch: 9185, sum loss: 4155.385254, avg loss: 2.549316, ppl: 12.798348 +epoch: 2, batch: 9186, sum loss: 4531.022461, avg loss: 2.607033, ppl: 13.558756 +epoch: 2, batch: 9187, sum loss: 5420.732910, avg loss: 2.853018, ppl: 17.340027 +epoch: 2, batch: 9188, sum loss: 3869.898682, avg loss: 2.446206, ppl: 11.544468 +epoch: 2, batch: 9189, sum loss: 4284.441406, avg loss: 2.392206, ppl: 10.937598 +epoch: 2, batch: 9190, sum loss: 3431.614258, avg loss: 2.389704, ppl: 10.910258 +epoch: 2, batch: 9191, sum loss: 3612.314209, avg loss: 2.608169, ppl: 13.574175 +epoch: 2, batch: 9192, sum loss: 3297.989990, avg loss: 2.377787, ppl: 10.781014 +epoch: 2, batch: 9193, sum loss: 4400.920410, avg loss: 2.492027, ppl: 12.085752 +epoch: 2, batch: 9194, sum loss: 4398.230957, avg loss: 2.365912, ppl: 10.653752 +epoch: 2, batch: 9195, sum loss: 3833.119873, avg loss: 2.295281, ppl: 9.927229 +epoch: 2, batch: 9196, sum loss: 3350.394531, avg loss: 2.192667, ppl: 8.959071 +epoch: 2, batch: 9197, sum loss: 4219.966309, avg loss: 2.419705, ppl: 11.242547 +epoch: 2, batch: 9198, sum loss: 4446.414551, avg loss: 2.497986, ppl: 12.157981 +epoch: 2, batch: 9199, sum loss: 4546.802734, avg loss: 2.467066, ppl: 11.787811 +epoch: 2, batch: 9200, sum loss: 4230.264160, avg loss: 2.660543, ppl: 14.304060 +epoch: 2, batch: 9201, sum loss: 4386.045898, avg loss: 2.676050, ppl: 14.527596 +epoch: 2, batch: 9202, sum loss: 4358.122559, avg loss: 2.676979, ppl: 14.541102 +epoch: 2, batch: 9203, sum loss: 3461.049805, avg loss: 2.437359, ppl: 11.442781 +epoch: 2, batch: 9204, sum loss: 4312.578613, avg loss: 2.554845, ppl: 12.869309 +epoch: 2, batch: 9205, sum loss: 4593.861328, avg loss: 2.570711, ppl: 13.075123 +epoch: 2, batch: 9206, sum loss: 4283.531250, avg loss: 2.594507, ppl: 13.389987 +epoch: 2, batch: 9207, sum loss: 3863.450195, avg loss: 2.515267, ppl: 12.369913 +epoch: 2, batch: 9208, sum loss: 4262.605469, avg loss: 2.637751, ppl: 13.981722 +epoch: 2, batch: 9209, sum loss: 4650.308594, avg loss: 2.698960, ppl: 14.864269 +epoch: 2, batch: 9210, sum loss: 4479.048828, avg loss: 2.577128, ppl: 13.159292 +epoch: 2, batch: 9211, sum loss: 3575.131104, avg loss: 2.465608, ppl: 11.770633 +epoch: 2, batch: 9212, sum loss: 4055.645996, avg loss: 2.308279, ppl: 10.057099 +epoch: 2, batch: 9213, sum loss: 3524.994629, avg loss: 2.422677, ppl: 11.276000 +epoch: 2, batch: 9214, sum loss: 5532.537109, avg loss: 2.748404, ppl: 15.617686 +epoch: 2, batch: 9215, sum loss: 4086.238037, avg loss: 2.315149, ppl: 10.126432 +epoch: 2, batch: 9216, sum loss: 3710.018066, avg loss: 2.610850, ppl: 13.610617 +epoch: 2, batch: 9217, sum loss: 4335.432617, avg loss: 2.568384, ppl: 13.044732 +epoch: 2, batch: 9218, sum loss: 4858.530762, avg loss: 2.580207, ppl: 13.199875 +epoch: 2, batch: 9219, sum loss: 3384.283691, avg loss: 2.374936, ppl: 10.750323 +epoch: 2, batch: 9220, sum loss: 5111.121582, avg loss: 2.618403, ppl: 13.713801 +epoch: 2, batch: 9221, sum loss: 4048.284180, avg loss: 2.726117, ppl: 15.273467 +epoch: 2, batch: 9222, sum loss: 5248.560547, avg loss: 2.672383, ppl: 14.474425 +epoch: 2, batch: 9223, sum loss: 5540.755371, avg loss: 2.745667, ppl: 15.574995 +epoch: 2, batch: 9224, sum loss: 4976.517090, avg loss: 2.832394, ppl: 16.986082 +epoch: 2, batch: 9225, sum loss: 5116.419434, avg loss: 2.535391, ppl: 12.621369 +epoch: 2, batch: 9226, sum loss: 4241.882812, avg loss: 2.745555, ppl: 15.573261 +epoch: 2, batch: 9227, sum loss: 3590.642334, avg loss: 2.424472, ppl: 11.296259 +epoch: 2, batch: 9228, sum loss: 4405.926758, avg loss: 2.585638, ppl: 13.271751 +epoch: 2, batch: 9229, sum loss: 3601.854492, avg loss: 2.396443, ppl: 10.984040 +epoch: 2, batch: 9230, sum loss: 5133.615723, avg loss: 2.543913, ppl: 12.729380 +epoch: 2, batch: 9231, sum loss: 4183.178711, avg loss: 2.415230, ppl: 11.192348 +epoch: 2, batch: 9232, sum loss: 4069.990479, avg loss: 2.561353, ppl: 12.953337 +epoch: 2, batch: 9233, sum loss: 4477.719727, avg loss: 2.525505, ppl: 12.497203 +epoch: 2, batch: 9234, sum loss: 3429.922607, avg loss: 2.177729, ppl: 8.826236 +epoch: 2, batch: 9235, sum loss: 4742.883789, avg loss: 2.654104, ppl: 14.212246 +epoch: 2, batch: 9236, sum loss: 5233.288086, avg loss: 2.757265, ppl: 15.756683 +epoch: 2, batch: 9237, sum loss: 4111.273438, avg loss: 2.671393, ppl: 14.460093 +epoch: 2, batch: 9238, sum loss: 3859.595703, avg loss: 2.347686, ppl: 10.461332 +epoch: 2, batch: 9239, sum loss: 4161.975098, avg loss: 2.301977, ppl: 9.993925 +epoch: 2, batch: 9240, sum loss: 4630.076660, avg loss: 2.741313, ppl: 15.507325 +epoch: 2, batch: 9241, sum loss: 3586.344727, avg loss: 2.136000, ppl: 8.465511 +epoch: 2, batch: 9242, sum loss: 3923.514160, avg loss: 2.619168, ppl: 13.724304 +epoch: 2, batch: 9243, sum loss: 4903.864258, avg loss: 2.689997, ppl: 14.731631 +epoch: 2, batch: 9244, sum loss: 3843.515137, avg loss: 2.318164, ppl: 10.157005 +epoch: 2, batch: 9245, sum loss: 3832.555664, avg loss: 2.392357, ppl: 10.939246 +epoch: 2, batch: 9246, sum loss: 3746.342773, avg loss: 2.507592, ppl: 12.275338 +epoch: 2, batch: 9247, sum loss: 4392.536133, avg loss: 2.639745, ppl: 14.009637 +epoch: 2, batch: 9248, sum loss: 4461.545410, avg loss: 2.561163, ppl: 12.950867 +epoch: 2, batch: 9249, sum loss: 4220.085938, avg loss: 2.469331, ppl: 11.814535 +epoch: 2, batch: 9250, sum loss: 3461.736816, avg loss: 2.467382, ppl: 11.791535 +epoch: 2, batch: 9251, sum loss: 3728.971191, avg loss: 2.462993, ppl: 11.739896 +epoch: 2, batch: 9252, sum loss: 4898.845703, avg loss: 2.590611, ppl: 13.337921 +epoch: 2, batch: 9253, sum loss: 4298.280273, avg loss: 2.428407, ppl: 11.340801 +epoch: 2, batch: 9254, sum loss: 4051.780273, avg loss: 2.569296, ppl: 13.056634 +epoch: 2, batch: 9255, sum loss: 4887.547363, avg loss: 2.871650, ppl: 17.666134 +epoch: 2, batch: 9256, sum loss: 4844.101074, avg loss: 2.591814, ppl: 13.353977 +epoch: 2, batch: 9257, sum loss: 4925.812500, avg loss: 2.642603, ppl: 14.049729 +epoch: 2, batch: 9258, sum loss: 3796.865723, avg loss: 2.376011, ppl: 10.761889 +epoch: 2, batch: 9259, sum loss: 4817.960938, avg loss: 2.580590, ppl: 13.204921 +epoch: 2, batch: 9260, sum loss: 3853.942139, avg loss: 2.552280, ppl: 12.836333 +epoch: 2, batch: 9261, sum loss: 3862.060303, avg loss: 2.547533, ppl: 12.775551 +epoch: 2, batch: 9262, sum loss: 5025.698242, avg loss: 2.616189, ppl: 13.683474 +epoch: 2, batch: 9263, sum loss: 4036.766846, avg loss: 2.453962, ppl: 11.634346 +epoch: 2, batch: 9264, sum loss: 4915.399414, avg loss: 2.644109, ppl: 14.070906 +epoch: 2, batch: 9265, sum loss: 3785.354736, avg loss: 2.428066, ppl: 11.336935 +epoch: 2, batch: 9266, sum loss: 4556.294434, avg loss: 2.572724, ppl: 13.101465 +epoch: 2, batch: 9267, sum loss: 4067.845947, avg loss: 2.597603, ppl: 13.431499 +epoch: 2, batch: 9268, sum loss: 3789.220703, avg loss: 2.209458, ppl: 9.110778 +epoch: 2, batch: 9269, sum loss: 5005.303711, avg loss: 2.702648, ppl: 14.919181 +epoch: 2, batch: 9270, sum loss: 5779.988281, avg loss: 2.717437, ppl: 15.141465 +epoch: 2, batch: 9271, sum loss: 4415.907227, avg loss: 2.565896, ppl: 13.012313 +epoch: 2, batch: 9272, sum loss: 5358.300293, avg loss: 2.715814, ppl: 15.116905 +epoch: 2, batch: 9273, sum loss: 4386.129395, avg loss: 2.550075, ppl: 12.808068 +epoch: 2, batch: 9274, sum loss: 5043.373047, avg loss: 2.611794, ppl: 13.623463 +epoch: 2, batch: 9275, sum loss: 4469.121094, avg loss: 2.580324, ppl: 13.201414 +epoch: 2, batch: 9276, sum loss: 4850.439941, avg loss: 2.518401, ppl: 12.408738 +epoch: 2, batch: 9277, sum loss: 4144.726562, avg loss: 2.307754, ppl: 10.051826 +epoch: 2, batch: 9278, sum loss: 4060.248047, avg loss: 2.657230, ppl: 14.256748 +epoch: 2, batch: 9279, sum loss: 4957.619141, avg loss: 2.595612, ppl: 13.404789 +epoch: 2, batch: 9280, sum loss: 5214.708008, avg loss: 2.882647, ppl: 17.861490 +epoch: 2, batch: 9281, sum loss: 3663.132324, avg loss: 2.384852, ppl: 10.857455 +epoch: 2, batch: 9282, sum loss: 4542.666016, avg loss: 2.754801, ppl: 15.717906 +epoch: 2, batch: 9283, sum loss: 3712.196289, avg loss: 2.447064, ppl: 11.554375 +epoch: 2, batch: 9284, sum loss: 4272.353516, avg loss: 2.435777, ppl: 11.424697 +epoch: 2, batch: 9285, sum loss: 4107.509766, avg loss: 2.355223, ppl: 10.540483 +epoch: 2, batch: 9286, sum loss: 4273.228027, avg loss: 2.515143, ppl: 12.368380 +epoch: 2, batch: 9287, sum loss: 4261.196777, avg loss: 2.513980, ppl: 12.354006 +epoch: 2, batch: 9288, sum loss: 4648.233398, avg loss: 2.512559, ppl: 12.336455 +epoch: 2, batch: 9289, sum loss: 5082.272949, avg loss: 2.699030, ppl: 14.865301 +epoch: 2, batch: 9290, sum loss: 4110.800781, avg loss: 2.502009, ppl: 12.206992 +epoch: 2, batch: 9291, sum loss: 5102.930176, avg loss: 2.764318, ppl: 15.868206 +epoch: 2, batch: 9292, sum loss: 4473.597168, avg loss: 2.536053, ppl: 12.629723 +epoch: 2, batch: 9293, sum loss: 4678.803223, avg loss: 2.511435, ppl: 12.322598 +epoch: 2, batch: 9294, sum loss: 3990.896729, avg loss: 2.551724, ppl: 12.829205 +epoch: 2, batch: 9295, sum loss: 3812.626221, avg loss: 2.423793, ppl: 11.288594 +epoch: 2, batch: 9296, sum loss: 4370.125977, avg loss: 2.599718, ppl: 13.459940 +epoch: 2, batch: 9297, sum loss: 3997.160889, avg loss: 2.673686, ppl: 14.493294 +epoch: 2, batch: 9298, sum loss: 3891.108154, avg loss: 2.316136, ppl: 10.136430 +epoch: 2, batch: 9299, sum loss: 4128.340332, avg loss: 2.589925, ppl: 13.328770 +epoch: 2, batch: 9300, sum loss: 3745.292236, avg loss: 2.416318, ppl: 11.204525 +epoch: 2, batch: 9301, sum loss: 3768.804443, avg loss: 2.359928, ppl: 10.590185 +epoch: 2, batch: 9302, sum loss: 4493.632324, avg loss: 2.431619, ppl: 11.377289 +epoch: 2, batch: 9303, sum loss: 4339.020020, avg loss: 2.568988, ppl: 13.052604 +epoch: 2, batch: 9304, sum loss: 4433.674805, avg loss: 2.623476, ppl: 13.783556 +epoch: 2, batch: 9305, sum loss: 5632.834473, avg loss: 2.978760, ppl: 19.663414 +epoch: 2, batch: 9306, sum loss: 3801.767090, avg loss: 2.329514, ppl: 10.272950 +epoch: 2, batch: 9307, sum loss: 4433.758789, avg loss: 2.592841, ppl: 13.367701 +epoch: 2, batch: 9308, sum loss: 4298.855469, avg loss: 2.614876, ppl: 13.665516 +epoch: 2, batch: 9309, sum loss: 4336.833008, avg loss: 2.472539, ppl: 11.852499 +epoch: 2, batch: 9310, sum loss: 4012.324707, avg loss: 2.478273, ppl: 11.920665 +epoch: 2, batch: 9311, sum loss: 4002.226074, avg loss: 2.400855, ppl: 11.032609 +epoch: 2, batch: 9312, sum loss: 4450.883789, avg loss: 2.658832, ppl: 14.279595 +epoch: 2, batch: 9313, sum loss: 4104.356934, avg loss: 2.457699, ppl: 11.677908 +epoch: 2, batch: 9314, sum loss: 5273.058105, avg loss: 2.821326, ppl: 16.799109 +epoch: 2, batch: 9315, sum loss: 4411.759766, avg loss: 2.627611, ppl: 13.840671 +epoch: 2, batch: 9316, sum loss: 5132.275391, avg loss: 2.812206, ppl: 16.646597 +epoch: 2, batch: 9317, sum loss: 3462.634033, avg loss: 2.164146, ppl: 8.707165 +epoch: 2, batch: 9318, sum loss: 3974.227783, avg loss: 2.564018, ppl: 12.987898 +epoch: 2, batch: 9319, sum loss: 3650.855225, avg loss: 2.406628, ppl: 11.096485 +epoch: 2, batch: 9320, sum loss: 4611.147949, avg loss: 2.695002, ppl: 14.805546 +epoch: 2, batch: 9321, sum loss: 3912.701172, avg loss: 2.530855, ppl: 12.564240 +epoch: 2, batch: 9322, sum loss: 3571.125488, avg loss: 2.368120, ppl: 10.677302 +epoch: 2, batch: 9323, sum loss: 4079.007080, avg loss: 2.390977, ppl: 10.924163 +epoch: 2, batch: 9324, sum loss: 3379.926758, avg loss: 2.303972, ppl: 10.013876 +epoch: 2, batch: 9325, sum loss: 4541.603516, avg loss: 2.564429, ppl: 12.993235 +epoch: 2, batch: 9326, sum loss: 4312.963379, avg loss: 2.604446, ppl: 13.523737 +epoch: 2, batch: 9327, sum loss: 4005.455078, avg loss: 2.538311, ppl: 12.658277 +epoch: 2, batch: 9328, sum loss: 5096.421875, avg loss: 2.963036, ppl: 19.356651 +epoch: 2, batch: 9329, sum loss: 4006.265625, avg loss: 2.450315, ppl: 11.592000 +epoch: 2, batch: 9330, sum loss: 4007.325684, avg loss: 2.583704, ppl: 13.246118 +epoch: 2, batch: 9331, sum loss: 5299.931641, avg loss: 2.866377, ppl: 17.573240 +epoch: 2, batch: 9332, sum loss: 3812.507812, avg loss: 2.309211, ppl: 10.066484 +epoch: 2, batch: 9333, sum loss: 3813.469727, avg loss: 2.436722, ppl: 11.435492 +epoch: 2, batch: 9334, sum loss: 4358.927246, avg loss: 2.538688, ppl: 12.663046 +epoch: 2, batch: 9335, sum loss: 4303.771484, avg loss: 2.564822, ppl: 12.998347 +epoch: 2, batch: 9336, sum loss: 5495.588867, avg loss: 2.626955, ppl: 13.831582 +epoch: 2, batch: 9337, sum loss: 4707.902344, avg loss: 2.637480, ppl: 13.977942 +epoch: 2, batch: 9338, sum loss: 4110.181641, avg loss: 2.423456, ppl: 11.284795 +epoch: 2, batch: 9339, sum loss: 4904.213379, avg loss: 2.581165, ppl: 13.212523 +epoch: 2, batch: 9340, sum loss: 4118.782227, avg loss: 2.369840, ppl: 10.695683 +epoch: 2, batch: 9341, sum loss: 4639.385254, avg loss: 2.781406, ppl: 16.141703 +epoch: 2, batch: 9342, sum loss: 4397.792969, avg loss: 2.462370, ppl: 11.732587 +epoch: 2, batch: 9343, sum loss: 4032.848633, avg loss: 2.409109, ppl: 11.124049 +epoch: 2, batch: 9344, sum loss: 4257.096191, avg loss: 2.562972, ppl: 12.974318 +epoch: 2, batch: 9345, sum loss: 4648.172363, avg loss: 2.723007, ppl: 15.226034 +epoch: 2, batch: 9346, sum loss: 4707.010254, avg loss: 2.616459, ppl: 13.687173 +epoch: 2, batch: 9347, sum loss: 4887.592773, avg loss: 2.821936, ppl: 16.809357 +epoch: 2, batch: 9348, sum loss: 4675.531738, avg loss: 2.471211, ppl: 11.836775 +epoch: 2, batch: 9349, sum loss: 3820.460938, avg loss: 2.294571, ppl: 9.920181 +epoch: 2, batch: 9350, sum loss: 4625.872559, avg loss: 2.638832, ppl: 13.996847 +epoch: 2, batch: 9351, sum loss: 4269.960449, avg loss: 2.528100, ppl: 12.529675 +epoch: 2, batch: 9352, sum loss: 4081.923096, avg loss: 2.442803, ppl: 11.505241 +epoch: 2, batch: 9353, sum loss: 3539.179932, avg loss: 2.336092, ppl: 10.340751 +epoch: 2, batch: 9354, sum loss: 3523.123291, avg loss: 2.228414, ppl: 9.285131 +epoch: 2, batch: 9355, sum loss: 4071.599121, avg loss: 2.593375, ppl: 13.374838 +epoch: 2, batch: 9356, sum loss: 3021.028564, avg loss: 2.056520, ppl: 7.818717 +epoch: 2, batch: 9357, sum loss: 3836.013672, avg loss: 2.495780, ppl: 12.131192 +epoch: 2, batch: 9358, sum loss: 4343.312012, avg loss: 2.477645, ppl: 11.913177 +epoch: 2, batch: 9359, sum loss: 4131.665039, avg loss: 2.616634, ppl: 13.689566 +epoch: 2, batch: 9360, sum loss: 5039.722656, avg loss: 2.457203, ppl: 11.672115 +epoch: 2, batch: 9361, sum loss: 4591.456055, avg loss: 2.880462, ppl: 17.822500 +epoch: 2, batch: 9362, sum loss: 4566.610840, avg loss: 2.621476, ppl: 13.756008 +epoch: 2, batch: 9363, sum loss: 4522.017090, avg loss: 2.603349, ppl: 13.508904 +epoch: 2, batch: 9364, sum loss: 4947.169922, avg loss: 2.642719, ppl: 14.051358 +epoch: 2, batch: 9365, sum loss: 4834.913086, avg loss: 2.698054, ppl: 14.850808 +epoch: 2, batch: 9366, sum loss: 4039.815918, avg loss: 2.642130, ppl: 14.043078 +epoch: 2, batch: 9367, sum loss: 4346.387207, avg loss: 2.629393, ppl: 13.865355 +epoch: 2, batch: 9368, sum loss: 3051.572266, avg loss: 2.257080, ppl: 9.555148 +epoch: 2, batch: 9369, sum loss: 3715.480713, avg loss: 2.415787, ppl: 11.198583 +epoch: 2, batch: 9370, sum loss: 4279.541504, avg loss: 2.603127, ppl: 13.505912 +epoch: 2, batch: 9371, sum loss: 4255.637695, avg loss: 2.559012, ppl: 12.923048 +epoch: 2, batch: 9372, sum loss: 4081.542969, avg loss: 2.382687, ppl: 10.833976 +epoch: 2, batch: 9373, sum loss: 4484.204102, avg loss: 2.729278, ppl: 15.321822 +epoch: 2, batch: 9374, sum loss: 4219.879883, avg loss: 2.765321, ppl: 15.884138 +epoch: 2, batch: 9375, sum loss: 4796.234375, avg loss: 2.834654, ppl: 17.024506 +epoch: 2, batch: 9376, sum loss: 4813.884766, avg loss: 2.596486, ppl: 13.416508 +epoch: 2, batch: 9377, sum loss: 5259.107422, avg loss: 2.677753, ppl: 14.552364 +epoch: 2, batch: 9378, sum loss: 3746.250000, avg loss: 2.505853, ppl: 12.254004 +epoch: 2, batch: 9379, sum loss: 3496.634277, avg loss: 2.294379, ppl: 9.918280 +epoch: 2, batch: 9380, sum loss: 4389.095215, avg loss: 2.756969, ppl: 15.752029 +epoch: 2, batch: 9381, sum loss: 3184.492432, avg loss: 2.124411, ppl: 8.367970 +epoch: 2, batch: 9382, sum loss: 4435.933105, avg loss: 2.568578, ppl: 13.047252 +epoch: 2, batch: 9383, sum loss: 4588.221680, avg loss: 2.586371, ppl: 13.281482 +epoch: 2, batch: 9384, sum loss: 4710.693359, avg loss: 2.488480, ppl: 12.042961 +epoch: 2, batch: 9385, sum loss: 4775.585449, avg loss: 2.770061, ppl: 15.959612 +epoch: 2, batch: 9386, sum loss: 4593.867188, avg loss: 2.635609, ppl: 13.951812 +epoch: 2, batch: 9387, sum loss: 3645.837402, avg loss: 2.365891, ppl: 10.653522 +epoch: 2, batch: 9388, sum loss: 5446.465820, avg loss: 2.777392, ppl: 16.077040 +epoch: 2, batch: 9389, sum loss: 3252.609375, avg loss: 2.358672, ppl: 10.576900 +epoch: 2, batch: 9390, sum loss: 4526.222168, avg loss: 2.604270, ppl: 13.521358 +epoch: 2, batch: 9391, sum loss: 4850.827148, avg loss: 2.760858, ppl: 15.813402 +epoch: 2, batch: 9392, sum loss: 3788.184570, avg loss: 2.245516, ppl: 9.445284 +epoch: 2, batch: 9393, sum loss: 4213.583008, avg loss: 2.357909, ppl: 10.568826 +epoch: 2, batch: 9394, sum loss: 3318.682373, avg loss: 2.254540, ppl: 9.530906 +epoch: 2, batch: 9395, sum loss: 3708.741211, avg loss: 2.347305, ppl: 10.457345 +epoch: 2, batch: 9396, sum loss: 4037.360352, avg loss: 2.534438, ppl: 12.609347 +epoch: 2, batch: 9397, sum loss: 3950.664307, avg loss: 2.372772, ppl: 10.727081 +epoch: 2, batch: 9398, sum loss: 4426.882812, avg loss: 2.579769, ppl: 13.194085 +epoch: 2, batch: 9399, sum loss: 4267.303223, avg loss: 2.622805, ppl: 13.774302 +epoch: 2, batch: 9400, sum loss: 4454.051270, avg loss: 2.789011, ppl: 16.264935 +epoch: 2, batch: 9401, sum loss: 4268.924316, avg loss: 2.524497, ppl: 12.484614 +epoch: 2, batch: 9402, sum loss: 4161.296875, avg loss: 2.537376, ppl: 12.646445 +epoch: 2, batch: 9403, sum loss: 3488.913574, avg loss: 2.352605, ppl: 10.512924 +epoch: 2, batch: 9404, sum loss: 3784.496094, avg loss: 2.340443, ppl: 10.385836 +epoch: 2, batch: 9405, sum loss: 4196.913086, avg loss: 2.457209, ppl: 11.672188 +epoch: 2, batch: 9406, sum loss: 3996.729492, avg loss: 2.384684, ppl: 10.855627 +epoch: 2, batch: 9407, sum loss: 4499.604492, avg loss: 2.707343, ppl: 14.989401 +epoch: 2, batch: 9408, sum loss: 4172.823730, avg loss: 2.571056, ppl: 13.079628 +epoch: 2, batch: 9409, sum loss: 4321.356934, avg loss: 2.606367, ppl: 13.549740 +epoch: 2, batch: 9410, sum loss: 4327.166504, avg loss: 2.492607, ppl: 12.092765 +epoch: 2, batch: 9411, sum loss: 3929.482910, avg loss: 2.448276, ppl: 11.568383 +epoch: 2, batch: 9412, sum loss: 4306.477539, avg loss: 2.314066, ppl: 10.115475 +epoch: 2, batch: 9413, sum loss: 3325.850098, avg loss: 2.370528, ppl: 10.703036 +epoch: 2, batch: 9414, sum loss: 4181.072754, avg loss: 2.590504, ppl: 13.336488 +epoch: 2, batch: 9415, sum loss: 5022.627930, avg loss: 2.714934, ppl: 15.103615 +epoch: 2, batch: 9416, sum loss: 3759.480957, avg loss: 2.428605, ppl: 11.343051 +epoch: 2, batch: 9417, sum loss: 4500.868164, avg loss: 2.438174, ppl: 11.452105 +epoch: 2, batch: 9418, sum loss: 4316.824707, avg loss: 2.451349, ppl: 11.603985 +epoch: 2, batch: 9419, sum loss: 4160.444824, avg loss: 2.606795, ppl: 13.555536 +epoch: 2, batch: 9420, sum loss: 4169.726074, avg loss: 2.548733, ppl: 12.790881 +epoch: 2, batch: 9421, sum loss: 4872.785645, avg loss: 2.784449, ppl: 16.190891 +epoch: 2, batch: 9422, sum loss: 3792.794922, avg loss: 2.300058, ppl: 9.974757 +epoch: 2, batch: 9423, sum loss: 4270.541992, avg loss: 2.535951, ppl: 12.628440 +epoch: 2, batch: 9424, sum loss: 4006.465332, avg loss: 2.438506, ppl: 11.455912 +epoch: 2, batch: 9425, sum loss: 3662.334961, avg loss: 2.418980, ppl: 11.234390 +epoch: 2, batch: 9426, sum loss: 4459.424805, avg loss: 2.620109, ppl: 13.737215 +epoch: 2, batch: 9427, sum loss: 4853.316406, avg loss: 2.631950, ppl: 13.900855 +epoch: 2, batch: 9428, sum loss: 3365.666992, avg loss: 2.295816, ppl: 9.932542 +epoch: 2, batch: 9429, sum loss: 3251.794189, avg loss: 2.239528, ppl: 9.388896 +epoch: 2, batch: 9430, sum loss: 4164.688965, avg loss: 2.625907, ppl: 13.817103 +epoch: 2, batch: 9431, sum loss: 4166.154785, avg loss: 2.535700, ppl: 12.625260 +epoch: 2, batch: 9432, sum loss: 4780.233398, avg loss: 2.481949, ppl: 11.964559 +epoch: 2, batch: 9433, sum loss: 4313.400879, avg loss: 2.552308, ppl: 12.836698 +epoch: 2, batch: 9434, sum loss: 3537.928467, avg loss: 2.213973, ppl: 9.152003 +epoch: 2, batch: 9435, sum loss: 3889.404785, avg loss: 2.506060, ppl: 12.256542 +epoch: 2, batch: 9436, sum loss: 3661.057129, avg loss: 2.563766, ppl: 12.984619 +epoch: 2, batch: 9437, sum loss: 4382.166992, avg loss: 2.565671, ppl: 13.009391 +epoch: 2, batch: 9438, sum loss: 3469.297852, avg loss: 2.169667, ppl: 8.755370 +epoch: 2, batch: 9439, sum loss: 3918.883545, avg loss: 2.603909, ppl: 13.516478 +epoch: 2, batch: 9440, sum loss: 3764.084229, avg loss: 2.612134, ppl: 13.628096 +epoch: 2, batch: 9441, sum loss: 4650.122070, avg loss: 2.581967, ppl: 13.223118 +epoch: 2, batch: 9442, sum loss: 4940.398438, avg loss: 2.676272, ppl: 14.530824 +epoch: 2, batch: 9443, sum loss: 3387.323242, avg loss: 2.249219, ppl: 9.480326 +epoch: 2, batch: 9444, sum loss: 3103.284424, avg loss: 2.204037, ppl: 9.061522 +epoch: 2, batch: 9445, sum loss: 4554.579590, avg loss: 2.586360, ppl: 13.281336 +epoch: 2, batch: 9446, sum loss: 5190.832031, avg loss: 2.983237, ppl: 19.751646 +epoch: 2, batch: 9447, sum loss: 3712.734375, avg loss: 2.491768, ppl: 12.082621 +epoch: 2, batch: 9448, sum loss: 3597.570312, avg loss: 2.349818, ppl: 10.483657 +epoch: 2, batch: 9449, sum loss: 3599.479004, avg loss: 2.343411, ppl: 10.416707 +epoch: 2, batch: 9450, sum loss: 4729.549805, avg loss: 2.818564, ppl: 16.752771 +epoch: 2, batch: 9451, sum loss: 4461.191406, avg loss: 2.687465, ppl: 14.694374 +epoch: 2, batch: 9452, sum loss: 3754.519043, avg loss: 2.434837, ppl: 11.413962 +epoch: 2, batch: 9453, sum loss: 3355.809570, avg loss: 2.172045, ppl: 8.776216 +epoch: 2, batch: 9454, sum loss: 4592.345703, avg loss: 2.485036, ppl: 12.001548 +epoch: 2, batch: 9455, sum loss: 4261.636230, avg loss: 2.614501, ppl: 13.660395 +epoch: 2, batch: 9456, sum loss: 3559.348145, avg loss: 2.102391, ppl: 8.185720 +epoch: 2, batch: 9457, sum loss: 3396.328613, avg loss: 2.110832, ppl: 8.255107 +epoch: 2, batch: 9458, sum loss: 3481.918945, avg loss: 2.293754, ppl: 9.912081 +epoch: 2, batch: 9459, sum loss: 4070.117188, avg loss: 2.539062, ppl: 12.667789 +epoch: 2, batch: 9460, sum loss: 4453.279297, avg loss: 2.348776, ppl: 10.472744 +epoch: 2, batch: 9461, sum loss: 5013.837891, avg loss: 2.665517, ppl: 14.375381 +epoch: 2, batch: 9462, sum loss: 4595.851562, avg loss: 2.416326, ppl: 11.204616 +epoch: 2, batch: 9463, sum loss: 5371.186523, avg loss: 2.855495, ppl: 17.383043 +epoch: 2, batch: 9464, sum loss: 3721.680664, avg loss: 2.421393, ppl: 11.261535 +epoch: 2, batch: 9465, sum loss: 4568.456055, avg loss: 2.652994, ppl: 14.196482 +epoch: 2, batch: 9466, sum loss: 4224.668945, avg loss: 2.625649, ppl: 13.813533 +epoch: 2, batch: 9467, sum loss: 4895.875488, avg loss: 2.627952, ppl: 13.845390 +epoch: 2, batch: 9468, sum loss: 4864.907715, avg loss: 2.635378, ppl: 13.948583 +epoch: 2, batch: 9469, sum loss: 3820.291016, avg loss: 2.464704, ppl: 11.760001 +epoch: 2, batch: 9470, sum loss: 3686.696045, avg loss: 2.305626, ppl: 10.030455 +epoch: 2, batch: 9471, sum loss: 5262.329102, avg loss: 2.918652, ppl: 18.516308 +epoch: 2, batch: 9472, sum loss: 4381.625977, avg loss: 2.748824, ppl: 15.624252 +epoch: 2, batch: 9473, sum loss: 3831.913574, avg loss: 2.366840, ppl: 10.663641 +epoch: 2, batch: 9474, sum loss: 4147.704102, avg loss: 2.707379, ppl: 14.989930 +epoch: 2, batch: 9475, sum loss: 3070.374756, avg loss: 2.202564, ppl: 9.048187 +epoch: 2, batch: 9476, sum loss: 3968.438232, avg loss: 2.526059, ppl: 12.504129 +epoch: 2, batch: 9477, sum loss: 4538.439453, avg loss: 2.734000, ppl: 15.394338 +epoch: 2, batch: 9478, sum loss: 4642.704102, avg loss: 2.592241, ppl: 13.359677 +epoch: 2, batch: 9479, sum loss: 4874.831543, avg loss: 2.674071, ppl: 14.498876 +epoch: 2, batch: 9480, sum loss: 4342.874023, avg loss: 2.809103, ppl: 16.595034 +epoch: 2, batch: 9481, sum loss: 3760.472168, avg loss: 2.308454, ppl: 10.058865 +epoch: 2, batch: 9482, sum loss: 4540.564453, avg loss: 2.508599, ppl: 12.287706 +epoch: 2, batch: 9483, sum loss: 4506.298340, avg loss: 2.714638, ppl: 15.099136 +epoch: 2, batch: 9484, sum loss: 3399.689697, avg loss: 2.411128, ppl: 11.146523 +epoch: 2, batch: 9485, sum loss: 3922.950928, avg loss: 2.462618, ppl: 11.735499 +epoch: 2, batch: 9486, sum loss: 3900.128418, avg loss: 2.442159, ppl: 11.497843 +epoch: 2, batch: 9487, sum loss: 4178.903809, avg loss: 2.641532, ppl: 14.034687 +epoch: 2, batch: 9488, sum loss: 4630.565430, avg loss: 2.636996, ppl: 13.971175 +epoch: 2, batch: 9489, sum loss: 4799.832520, avg loss: 2.539594, ppl: 12.674523 +epoch: 2, batch: 9490, sum loss: 5200.936035, avg loss: 2.923517, ppl: 18.606604 +epoch: 2, batch: 9491, sum loss: 5297.245117, avg loss: 2.731947, ppl: 15.362768 +epoch: 2, batch: 9492, sum loss: 3985.837891, avg loss: 2.362678, ppl: 10.619352 +epoch: 2, batch: 9493, sum loss: 4185.745117, avg loss: 2.346270, ppl: 10.446529 +epoch: 2, batch: 9494, sum loss: 5309.504395, avg loss: 2.896620, ppl: 18.112822 +epoch: 2, batch: 9495, sum loss: 4405.022949, avg loss: 2.600368, ppl: 13.468691 +epoch: 2, batch: 9496, sum loss: 4056.729004, avg loss: 2.319456, ppl: 10.170144 +epoch: 2, batch: 9497, sum loss: 3758.717285, avg loss: 2.230693, ppl: 9.306310 +epoch: 2, batch: 9498, sum loss: 4194.733398, avg loss: 2.444483, ppl: 11.524593 +epoch: 2, batch: 9499, sum loss: 3877.585693, avg loss: 2.458837, ppl: 11.691204 +epoch: 2, batch: 9500, sum loss: 4256.929688, avg loss: 2.307279, ppl: 10.047049 +epoch: 2, batch: 9501, sum loss: 4528.763672, avg loss: 2.510401, ppl: 12.309865 +epoch: 2, batch: 9502, sum loss: 4529.013184, avg loss: 2.595423, ppl: 13.402255 +epoch: 2, batch: 9503, sum loss: 4399.436035, avg loss: 2.438712, ppl: 11.458269 +epoch: 2, batch: 9504, sum loss: 4135.908691, avg loss: 2.261295, ppl: 9.595510 +epoch: 2, batch: 9505, sum loss: 3427.587891, avg loss: 2.462348, ppl: 11.732321 +epoch: 2, batch: 9506, sum loss: 4601.416992, avg loss: 2.670584, ppl: 14.448411 +epoch: 2, batch: 9507, sum loss: 4246.203613, avg loss: 2.517015, ppl: 12.391549 +epoch: 2, batch: 9508, sum loss: 4232.917969, avg loss: 2.559201, ppl: 12.925483 +epoch: 2, batch: 9509, sum loss: 4360.106445, avg loss: 2.825733, ppl: 16.873316 +epoch: 2, batch: 9510, sum loss: 3594.097168, avg loss: 2.233746, ppl: 9.334767 +epoch: 2, batch: 9511, sum loss: 3965.948486, avg loss: 2.593819, ppl: 13.380771 +epoch: 2, batch: 9512, sum loss: 3870.430176, avg loss: 2.396551, ppl: 10.985225 +epoch: 2, batch: 9513, sum loss: 4823.890137, avg loss: 2.641780, ppl: 14.038171 +epoch: 2, batch: 9514, sum loss: 3634.055908, avg loss: 2.419478, ppl: 11.239990 +epoch: 2, batch: 9515, sum loss: 4236.490234, avg loss: 2.530759, ppl: 12.563036 +epoch: 2, batch: 9516, sum loss: 4350.127441, avg loss: 2.392809, ppl: 10.944198 +epoch: 2, batch: 9517, sum loss: 4227.046387, avg loss: 2.430734, ppl: 11.367225 +epoch: 2, batch: 9518, sum loss: 4549.966797, avg loss: 2.561918, ppl: 12.960655 +epoch: 2, batch: 9519, sum loss: 3865.972412, avg loss: 2.368856, ppl: 10.685159 +epoch: 2, batch: 9520, sum loss: 5108.960938, avg loss: 2.665082, ppl: 14.369121 +epoch: 2, batch: 9521, sum loss: 5561.275879, avg loss: 2.581836, ppl: 13.221396 +epoch: 2, batch: 9522, sum loss: 4831.648926, avg loss: 2.548338, ppl: 12.785838 +epoch: 2, batch: 9523, sum loss: 4512.682129, avg loss: 2.578676, ppl: 13.179670 +epoch: 2, batch: 9524, sum loss: 4419.389648, avg loss: 2.481409, ppl: 11.958105 +epoch: 2, batch: 9525, sum loss: 3533.534180, avg loss: 2.360410, ppl: 10.595297 +epoch: 2, batch: 9526, sum loss: 4310.898926, avg loss: 2.556880, ppl: 12.895514 +epoch: 2, batch: 9527, sum loss: 4091.857422, avg loss: 2.463490, ppl: 11.745735 +epoch: 2, batch: 9528, sum loss: 5210.575195, avg loss: 2.657101, ppl: 14.254907 +epoch: 2, batch: 9529, sum loss: 3959.942383, avg loss: 2.148639, ppl: 8.573186 +epoch: 2, batch: 9530, sum loss: 4185.688965, avg loss: 2.476739, ppl: 11.902387 +epoch: 2, batch: 9531, sum loss: 4301.345215, avg loss: 2.393626, ppl: 10.953135 +epoch: 2, batch: 9532, sum loss: 4736.838867, avg loss: 2.738057, ppl: 15.456925 +epoch: 2, batch: 9533, sum loss: 4998.444336, avg loss: 2.781549, ppl: 16.144016 +epoch: 2, batch: 9534, sum loss: 4553.835449, avg loss: 2.635321, ppl: 13.947794 +epoch: 2, batch: 9535, sum loss: 4308.221191, avg loss: 2.464658, ppl: 11.759455 +epoch: 2, batch: 9536, sum loss: 4770.578125, avg loss: 2.832885, ppl: 16.994419 +epoch: 2, batch: 9537, sum loss: 4136.179688, avg loss: 2.371663, ppl: 10.715193 +epoch: 2, batch: 9538, sum loss: 4317.221191, avg loss: 2.568246, ppl: 13.042923 +epoch: 2, batch: 9539, sum loss: 3668.681152, avg loss: 2.326367, ppl: 10.240673 +epoch: 2, batch: 9540, sum loss: 2858.500488, avg loss: 2.062410, ppl: 7.864902 +epoch: 2, batch: 9541, sum loss: 4195.410156, avg loss: 2.761955, ppl: 15.830766 +epoch: 2, batch: 9542, sum loss: 5147.593750, avg loss: 2.831460, ppl: 16.970215 +epoch: 2, batch: 9543, sum loss: 3472.703125, avg loss: 2.344837, ppl: 10.431569 +epoch: 2, batch: 9544, sum loss: 4818.585938, avg loss: 2.600424, ppl: 13.469448 +epoch: 2, batch: 9545, sum loss: 3953.068848, avg loss: 2.388561, ppl: 10.897803 +epoch: 2, batch: 9546, sum loss: 4330.682617, avg loss: 2.626248, ppl: 13.821812 +epoch: 2, batch: 9547, sum loss: 4220.927734, avg loss: 2.576879, ppl: 13.156012 +epoch: 2, batch: 9548, sum loss: 4777.620117, avg loss: 2.667571, ppl: 14.404938 +epoch: 2, batch: 9549, sum loss: 4380.526367, avg loss: 2.448589, ppl: 11.572011 +epoch: 2, batch: 9550, sum loss: 4229.875977, avg loss: 2.447845, ppl: 11.563400 +epoch: 2, batch: 9551, sum loss: 4383.279297, avg loss: 2.368060, ppl: 10.676661 +epoch: 2, batch: 9552, sum loss: 4345.287109, avg loss: 2.374474, ppl: 10.745358 +epoch: 2, batch: 9553, sum loss: 3921.251953, avg loss: 2.388095, ppl: 10.892725 +epoch: 2, batch: 9554, sum loss: 4028.233398, avg loss: 2.387809, ppl: 10.889606 +epoch: 2, batch: 9555, sum loss: 5362.642578, avg loss: 2.782897, ppl: 16.165789 +epoch: 2, batch: 9556, sum loss: 4155.314453, avg loss: 2.753688, ppl: 15.700422 +epoch: 2, batch: 9557, sum loss: 4094.322021, avg loss: 2.485927, ppl: 12.012252 +epoch: 2, batch: 9558, sum loss: 5059.973633, avg loss: 2.698653, ppl: 14.859698 +epoch: 2, batch: 9559, sum loss: 4949.088867, avg loss: 2.933663, ppl: 18.796349 +epoch: 2, batch: 9560, sum loss: 4742.389648, avg loss: 2.755601, ppl: 15.730494 +epoch: 2, batch: 9561, sum loss: 4222.700195, avg loss: 2.442279, ppl: 11.499216 +epoch: 2, batch: 9562, sum loss: 4157.940918, avg loss: 2.417408, ppl: 11.216743 +epoch: 2, batch: 9563, sum loss: 4208.824707, avg loss: 2.499302, ppl: 12.173995 +epoch: 2, batch: 9564, sum loss: 4524.026367, avg loss: 2.591080, ppl: 13.344181 +epoch: 2, batch: 9565, sum loss: 3849.119141, avg loss: 2.317350, ppl: 10.148748 +epoch: 2, batch: 9566, sum loss: 4406.588867, avg loss: 2.743829, ppl: 15.546391 +epoch: 2, batch: 9567, sum loss: 4465.373535, avg loss: 2.717817, ppl: 15.147221 +epoch: 2, batch: 9568, sum loss: 4948.023438, avg loss: 2.871749, ppl: 17.667891 +epoch: 2, batch: 9569, sum loss: 4192.855957, avg loss: 2.710314, ppl: 15.033999 +epoch: 2, batch: 9570, sum loss: 4140.020508, avg loss: 2.487993, ppl: 12.037096 +epoch: 2, batch: 9571, sum loss: 4174.463867, avg loss: 2.490730, ppl: 12.070087 +epoch: 2, batch: 9572, sum loss: 4985.199219, avg loss: 2.680215, ppl: 14.588224 +epoch: 2, batch: 9573, sum loss: 4335.412109, avg loss: 2.560787, ppl: 12.946001 +epoch: 2, batch: 9574, sum loss: 2719.261230, avg loss: 2.269834, ppl: 9.677794 +epoch: 2, batch: 9575, sum loss: 4456.108398, avg loss: 2.644575, ppl: 14.077456 +epoch: 2, batch: 9576, sum loss: 4829.833984, avg loss: 2.637812, ppl: 13.982581 +epoch: 2, batch: 9577, sum loss: 3873.317139, avg loss: 2.500528, ppl: 12.188924 +epoch: 2, batch: 9578, sum loss: 3602.015625, avg loss: 2.304553, ppl: 10.019694 +epoch: 2, batch: 9579, sum loss: 4513.796875, avg loss: 2.803601, ppl: 16.503963 +epoch: 2, batch: 9580, sum loss: 3647.671875, avg loss: 2.549037, ppl: 12.794776 +epoch: 2, batch: 9581, sum loss: 4179.300293, avg loss: 2.508584, ppl: 12.287513 +epoch: 2, batch: 9582, sum loss: 4851.443359, avg loss: 2.704261, ppl: 14.943263 +epoch: 2, batch: 9583, sum loss: 4047.668945, avg loss: 2.521912, ppl: 12.452384 +epoch: 2, batch: 9584, sum loss: 4709.279785, avg loss: 2.629414, ppl: 13.865637 +epoch: 2, batch: 9585, sum loss: 3737.372559, avg loss: 2.411208, ppl: 11.147421 +epoch: 2, batch: 9586, sum loss: 4599.599609, avg loss: 2.696131, ppl: 14.822273 +epoch: 2, batch: 9587, sum loss: 4736.275879, avg loss: 2.388440, ppl: 10.896479 +epoch: 2, batch: 9588, sum loss: 4656.043945, avg loss: 2.491195, ppl: 12.075701 +epoch: 2, batch: 9589, sum loss: 4141.351562, avg loss: 2.350370, ppl: 10.489450 +epoch: 2, batch: 9590, sum loss: 4438.610352, avg loss: 2.786322, ppl: 16.221243 +epoch: 2, batch: 9591, sum loss: 5186.641602, avg loss: 2.571464, ppl: 13.084961 +epoch: 2, batch: 9592, sum loss: 4536.457031, avg loss: 2.703490, ppl: 14.931760 +epoch: 2, batch: 9593, sum loss: 4155.055176, avg loss: 2.547551, ppl: 12.775773 +epoch: 2, batch: 9594, sum loss: 3960.316162, avg loss: 2.537038, ppl: 12.642167 +epoch: 2, batch: 9595, sum loss: 4984.995605, avg loss: 2.716619, ppl: 15.129085 +epoch: 2, batch: 9596, sum loss: 3917.367920, avg loss: 2.390096, ppl: 10.914544 +epoch: 2, batch: 9597, sum loss: 3930.273926, avg loss: 2.558772, ppl: 12.919943 +epoch: 2, batch: 9598, sum loss: 4568.881836, avg loss: 2.488498, ppl: 12.043171 +epoch: 2, batch: 9599, sum loss: 4508.003418, avg loss: 2.658021, ppl: 14.268024 +epoch: 2, batch: 9600, sum loss: 3535.255371, avg loss: 2.321244, ppl: 10.188346 +epoch: 2, batch: 9601, sum loss: 3815.303223, avg loss: 2.518352, ppl: 12.408132 +epoch: 2, batch: 9602, sum loss: 4149.597168, avg loss: 2.571002, ppl: 13.078923 +epoch: 2, batch: 9603, sum loss: 4358.103027, avg loss: 2.484665, ppl: 11.997106 +epoch: 2, batch: 9604, sum loss: 4361.831055, avg loss: 2.454604, ppl: 11.641822 +epoch: 2, batch: 9605, sum loss: 4528.729492, avg loss: 2.708570, ppl: 15.007806 +epoch: 2, batch: 9606, sum loss: 4706.773438, avg loss: 2.706598, ppl: 14.978230 +epoch: 2, batch: 9607, sum loss: 4067.460938, avg loss: 2.457680, ppl: 11.677693 +epoch: 2, batch: 9608, sum loss: 4039.473633, avg loss: 2.415953, ppl: 11.200441 +epoch: 2, batch: 9609, sum loss: 4257.564941, avg loss: 2.516291, ppl: 12.382589 +epoch: 2, batch: 9610, sum loss: 3783.799805, avg loss: 2.260334, ppl: 9.586295 +epoch: 2, batch: 9611, sum loss: 5451.596680, avg loss: 2.821738, ppl: 16.806026 +epoch: 2, batch: 9612, sum loss: 4397.503906, avg loss: 2.477467, ppl: 11.911054 +epoch: 2, batch: 9613, sum loss: 4149.129395, avg loss: 2.384557, ppl: 10.854253 +epoch: 2, batch: 9614, sum loss: 4415.042969, avg loss: 2.415231, ppl: 11.192361 +epoch: 2, batch: 9615, sum loss: 4647.976074, avg loss: 2.625975, ppl: 13.818042 +epoch: 2, batch: 9616, sum loss: 3135.339600, avg loss: 2.296952, ppl: 9.943830 +epoch: 2, batch: 9617, sum loss: 4205.232422, avg loss: 2.561043, ppl: 12.949317 +epoch: 2, batch: 9618, sum loss: 4223.793945, avg loss: 2.438680, ppl: 11.457909 +epoch: 2, batch: 9619, sum loss: 3674.041016, avg loss: 2.371879, ppl: 10.717515 +epoch: 2, batch: 9620, sum loss: 4312.332520, avg loss: 2.505713, ppl: 12.252295 +epoch: 2, batch: 9621, sum loss: 4180.510742, avg loss: 2.388863, ppl: 10.901096 +epoch: 2, batch: 9622, sum loss: 4587.336914, avg loss: 2.613867, ppl: 13.651741 +epoch: 2, batch: 9623, sum loss: 4589.574219, avg loss: 2.429632, ppl: 11.354699 +epoch: 2, batch: 9624, sum loss: 3320.395264, avg loss: 2.177308, ppl: 8.822527 +epoch: 2, batch: 9625, sum loss: 3583.971191, avg loss: 2.208239, ppl: 9.099674 +epoch: 2, batch: 9626, sum loss: 4033.902344, avg loss: 2.419857, ppl: 11.244254 +epoch: 2, batch: 9627, sum loss: 4463.280273, avg loss: 2.602496, ppl: 13.497381 +epoch: 2, batch: 9628, sum loss: 3795.450684, avg loss: 2.500297, ppl: 12.186110 +epoch: 2, batch: 9629, sum loss: 3459.029541, avg loss: 2.231632, ppl: 9.315056 +epoch: 2, batch: 9630, sum loss: 4769.416992, avg loss: 2.768089, ppl: 15.928164 +epoch: 2, batch: 9631, sum loss: 3170.723145, avg loss: 2.173217, ppl: 8.786503 +epoch: 2, batch: 9632, sum loss: 3727.862061, avg loss: 2.180036, ppl: 8.846627 +epoch: 2, batch: 9633, sum loss: 5786.617676, avg loss: 2.523601, ppl: 12.473436 +epoch: 2, batch: 9634, sum loss: 4161.359375, avg loss: 2.459432, ppl: 11.698170 +epoch: 2, batch: 9635, sum loss: 3837.230957, avg loss: 2.332663, ppl: 10.305348 +epoch: 2, batch: 9636, sum loss: 4606.000000, avg loss: 2.667053, ppl: 14.397473 +epoch: 2, batch: 9637, sum loss: 4147.325195, avg loss: 2.544371, ppl: 12.735220 +epoch: 2, batch: 9638, sum loss: 3904.909668, avg loss: 2.613728, ppl: 13.649843 +epoch: 2, batch: 9639, sum loss: 3825.808105, avg loss: 2.388145, ppl: 10.893266 +epoch: 2, batch: 9640, sum loss: 4012.129639, avg loss: 2.495106, ppl: 12.123013 +epoch: 2, batch: 9641, sum loss: 4267.895508, avg loss: 2.397694, ppl: 10.997787 +epoch: 2, batch: 9642, sum loss: 4396.739258, avg loss: 2.559220, ppl: 12.925726 +epoch: 2, batch: 9643, sum loss: 4031.514648, avg loss: 2.592614, ppl: 13.364663 +epoch: 2, batch: 9644, sum loss: 4919.366699, avg loss: 2.469562, ppl: 11.817267 +epoch: 2, batch: 9645, sum loss: 3392.890381, avg loss: 2.147399, ppl: 8.562558 +epoch: 2, batch: 9646, sum loss: 4071.687988, avg loss: 2.371397, ppl: 10.712342 +epoch: 2, batch: 9647, sum loss: 4637.291016, avg loss: 2.554981, ppl: 12.871061 +epoch: 2, batch: 9648, sum loss: 4408.324219, avg loss: 2.727923, ppl: 15.301080 +epoch: 2, batch: 9649, sum loss: 3626.595947, avg loss: 2.485672, ppl: 12.009193 +epoch: 2, batch: 9650, sum loss: 3919.368164, avg loss: 2.540096, ppl: 12.680889 +epoch: 2, batch: 9651, sum loss: 5460.172363, avg loss: 2.639039, ppl: 13.999747 +epoch: 2, batch: 9652, sum loss: 3495.547119, avg loss: 2.341291, ppl: 10.394645 +epoch: 2, batch: 9653, sum loss: 3919.886719, avg loss: 2.220899, ppl: 9.215613 +epoch: 2, batch: 9654, sum loss: 4295.359375, avg loss: 2.458706, ppl: 11.689674 +epoch: 2, batch: 9655, sum loss: 3878.895264, avg loss: 2.465922, ppl: 11.774332 +epoch: 2, batch: 9656, sum loss: 4561.219727, avg loss: 2.545324, ppl: 12.747353 +epoch: 2, batch: 9657, sum loss: 4356.716797, avg loss: 2.256197, ppl: 9.546716 +epoch: 2, batch: 9658, sum loss: 4392.896484, avg loss: 2.614820, ppl: 13.664750 +epoch: 2, batch: 9659, sum loss: 4524.782227, avg loss: 2.356658, ppl: 10.555611 +epoch: 2, batch: 9660, sum loss: 4729.387695, avg loss: 2.748046, ppl: 15.612099 +epoch: 2, batch: 9661, sum loss: 4899.479492, avg loss: 2.586843, ppl: 13.287749 +epoch: 2, batch: 9662, sum loss: 3904.639160, avg loss: 2.506187, ppl: 12.258101 +epoch: 2, batch: 9663, sum loss: 3612.608398, avg loss: 2.467629, ppl: 11.794446 +epoch: 2, batch: 9664, sum loss: 4423.774414, avg loss: 2.560055, ppl: 12.936522 +epoch: 2, batch: 9665, sum loss: 3338.191162, avg loss: 2.238894, ppl: 9.382950 +epoch: 2, batch: 9666, sum loss: 3837.659180, avg loss: 2.696879, ppl: 14.833366 +epoch: 2, batch: 9667, sum loss: 5043.426270, avg loss: 2.885256, ppl: 17.908144 +epoch: 2, batch: 9668, sum loss: 4329.726562, avg loss: 2.566525, ppl: 13.020494 +epoch: 2, batch: 9669, sum loss: 4054.467285, avg loss: 2.357248, ppl: 10.561849 +epoch: 2, batch: 9670, sum loss: 3838.398438, avg loss: 2.369382, ppl: 10.690783 +epoch: 2, batch: 9671, sum loss: 3516.248779, avg loss: 2.570357, ppl: 13.070494 +epoch: 2, batch: 9672, sum loss: 4680.717285, avg loss: 2.927278, ppl: 18.676720 +epoch: 2, batch: 9673, sum loss: 3927.177490, avg loss: 2.409311, ppl: 11.126296 +epoch: 2, batch: 9674, sum loss: 5029.308105, avg loss: 2.828632, ppl: 16.922300 +epoch: 2, batch: 9675, sum loss: 4683.716797, avg loss: 2.516774, ppl: 12.388572 +epoch: 2, batch: 9676, sum loss: 4372.654785, avg loss: 2.557108, ppl: 12.898460 +epoch: 2, batch: 9677, sum loss: 4726.038086, avg loss: 2.595298, ppl: 13.400584 +epoch: 2, batch: 9678, sum loss: 3531.640625, avg loss: 2.233802, ppl: 9.335288 +epoch: 2, batch: 9679, sum loss: 5274.195801, avg loss: 2.770061, ppl: 15.959608 +epoch: 2, batch: 9680, sum loss: 4139.703125, avg loss: 2.603587, ppl: 13.512118 +epoch: 2, batch: 9681, sum loss: 4348.725586, avg loss: 2.452750, ppl: 11.620258 +epoch: 2, batch: 9682, sum loss: 5157.098145, avg loss: 2.650102, ppl: 14.155481 +epoch: 2, batch: 9683, sum loss: 3729.440674, avg loss: 2.474745, ppl: 11.878678 +epoch: 2, batch: 9684, sum loss: 3915.541748, avg loss: 2.383166, ppl: 10.839164 +epoch: 2, batch: 9685, sum loss: 4750.925781, avg loss: 2.504442, ppl: 12.236726 +epoch: 2, batch: 9686, sum loss: 3622.922119, avg loss: 2.358673, ppl: 10.576910 +epoch: 2, batch: 9687, sum loss: 3747.052979, avg loss: 2.362581, ppl: 10.618320 +epoch: 2, batch: 9688, sum loss: 4647.274414, avg loss: 2.581819, ppl: 13.221169 +epoch: 2, batch: 9689, sum loss: 4059.540039, avg loss: 2.615683, ppl: 13.676556 +epoch: 2, batch: 9690, sum loss: 3885.993652, avg loss: 2.554894, ppl: 12.869935 +epoch: 2, batch: 9691, sum loss: 4257.270508, avg loss: 2.592735, ppl: 13.366276 +epoch: 2, batch: 9692, sum loss: 3871.912109, avg loss: 2.449027, ppl: 11.577078 +epoch: 2, batch: 9693, sum loss: 4511.218750, avg loss: 2.585226, ppl: 13.266281 +epoch: 2, batch: 9694, sum loss: 3656.320801, avg loss: 2.416603, ppl: 11.207726 +epoch: 2, batch: 9695, sum loss: 4012.690186, avg loss: 2.367369, ppl: 10.669284 +epoch: 2, batch: 9696, sum loss: 3830.504395, avg loss: 2.324335, ppl: 10.219883 +epoch: 2, batch: 9697, sum loss: 4289.475586, avg loss: 2.524706, ppl: 12.487225 +epoch: 2, batch: 9698, sum loss: 4327.468750, avg loss: 2.650011, ppl: 14.154199 +epoch: 2, batch: 9699, sum loss: 4031.176758, avg loss: 2.319434, ppl: 10.169918 +epoch: 2, batch: 9700, sum loss: 3910.855469, avg loss: 2.394890, ppl: 10.966992 +epoch: 2, batch: 9701, sum loss: 3617.850586, avg loss: 2.311726, ppl: 10.091825 +epoch: 2, batch: 9702, sum loss: 4013.458984, avg loss: 2.720989, ppl: 15.195347 +epoch: 2, batch: 9703, sum loss: 4334.807617, avg loss: 2.474205, ppl: 11.872271 +epoch: 2, batch: 9704, sum loss: 4056.008789, avg loss: 2.371935, ppl: 10.718111 +epoch: 2, batch: 9705, sum loss: 3879.396973, avg loss: 2.332770, ppl: 10.306455 +epoch: 2, batch: 9706, sum loss: 4876.310059, avg loss: 2.553042, ppl: 12.846122 +epoch: 2, batch: 9707, sum loss: 4397.087891, avg loss: 2.600289, ppl: 13.467625 +epoch: 2, batch: 9708, sum loss: 4748.931641, avg loss: 2.801730, ppl: 16.473116 +epoch: 2, batch: 9709, sum loss: 3647.152344, avg loss: 2.351484, ppl: 10.501145 +epoch: 2, batch: 9710, sum loss: 4462.550293, avg loss: 2.548573, ppl: 12.788836 +epoch: 2, batch: 9711, sum loss: 3327.691406, avg loss: 2.218461, ppl: 9.193170 +epoch: 2, batch: 9712, sum loss: 3621.006104, avg loss: 2.358962, ppl: 10.579959 +epoch: 2, batch: 9713, sum loss: 4238.109863, avg loss: 2.452610, ppl: 11.618629 +epoch: 2, batch: 9714, sum loss: 4256.936523, avg loss: 2.587803, ppl: 13.300520 +epoch: 2, batch: 9715, sum loss: 5645.582520, avg loss: 2.794843, ppl: 16.360060 +epoch: 2, batch: 9716, sum loss: 4259.931152, avg loss: 2.677518, ppl: 14.548940 +epoch: 2, batch: 9717, sum loss: 5172.432617, avg loss: 2.653891, ppl: 14.209214 +epoch: 2, batch: 9718, sum loss: 3258.947021, avg loss: 2.408682, ppl: 11.119297 +epoch: 2, batch: 9719, sum loss: 4683.393066, avg loss: 2.763064, ppl: 15.848323 +epoch: 2, batch: 9720, sum loss: 3972.795410, avg loss: 2.449319, ppl: 11.580457 +epoch: 2, batch: 9721, sum loss: 4397.158691, avg loss: 2.637768, ppl: 13.981955 +epoch: 2, batch: 9722, sum loss: 4416.682129, avg loss: 2.632111, ppl: 13.903086 +epoch: 2, batch: 9723, sum loss: 3761.004395, avg loss: 2.344766, ppl: 10.430831 +epoch: 2, batch: 9724, sum loss: 4779.185059, avg loss: 2.659535, ppl: 14.289649 +epoch: 2, batch: 9725, sum loss: 4738.346680, avg loss: 2.452560, ppl: 11.618056 +epoch: 2, batch: 9726, sum loss: 4699.921875, avg loss: 2.596642, ppl: 13.418603 +epoch: 2, batch: 9727, sum loss: 3166.354980, avg loss: 2.312896, ppl: 10.103645 +epoch: 2, batch: 9728, sum loss: 4400.084961, avg loss: 2.331789, ppl: 10.296341 +epoch: 2, batch: 9729, sum loss: 4336.428223, avg loss: 2.601336, ppl: 13.481744 +epoch: 2, batch: 9730, sum loss: 3773.496826, avg loss: 2.201573, ppl: 9.039225 +epoch: 2, batch: 9731, sum loss: 4062.113770, avg loss: 2.419365, ppl: 11.238720 +epoch: 2, batch: 9732, sum loss: 4319.769531, avg loss: 2.597576, ppl: 13.431147 +epoch: 2, batch: 9733, sum loss: 4409.813477, avg loss: 2.551975, ppl: 12.832429 +epoch: 2, batch: 9734, sum loss: 4741.401367, avg loss: 2.618113, ppl: 13.709823 +epoch: 2, batch: 9735, sum loss: 3650.831299, avg loss: 2.400284, ppl: 11.026302 +epoch: 2, batch: 9736, sum loss: 3558.835938, avg loss: 2.232645, ppl: 9.324495 +epoch: 2, batch: 9737, sum loss: 4427.679688, avg loss: 2.694875, ppl: 14.803668 +epoch: 2, batch: 9738, sum loss: 4448.632812, avg loss: 2.722542, ppl: 15.218953 +epoch: 2, batch: 9739, sum loss: 3129.514648, avg loss: 2.193073, ppl: 8.962711 +epoch: 2, batch: 9740, sum loss: 4037.722168, avg loss: 2.528317, ppl: 12.532396 +epoch: 2, batch: 9741, sum loss: 4539.336914, avg loss: 2.614825, ppl: 13.664831 +epoch: 2, batch: 9742, sum loss: 4183.247070, avg loss: 2.532232, ppl: 12.581557 +epoch: 2, batch: 9743, sum loss: 3373.939453, avg loss: 2.371005, ppl: 10.708144 +epoch: 2, batch: 9744, sum loss: 4085.013916, avg loss: 2.459370, ppl: 11.697441 +epoch: 2, batch: 9745, sum loss: 4385.496094, avg loss: 2.462379, ppl: 11.732684 +epoch: 2, batch: 9746, sum loss: 4210.389648, avg loss: 2.686911, ppl: 14.686245 +epoch: 2, batch: 9747, sum loss: 3141.209473, avg loss: 2.396041, ppl: 10.979618 +epoch: 2, batch: 9748, sum loss: 4732.468750, avg loss: 2.643837, ppl: 14.067079 +epoch: 2, batch: 9749, sum loss: 4157.929688, avg loss: 2.463228, ppl: 11.742661 +epoch: 2, batch: 9750, sum loss: 4139.492188, avg loss: 2.580731, ppl: 13.206788 +epoch: 2, batch: 9751, sum loss: 3799.489014, avg loss: 2.549993, ppl: 12.807012 +epoch: 2, batch: 9752, sum loss: 3975.159912, avg loss: 2.419452, ppl: 11.239697 +epoch: 2, batch: 9753, sum loss: 4039.393555, avg loss: 2.624687, ppl: 13.800257 +epoch: 2, batch: 9754, sum loss: 4013.074951, avg loss: 2.466549, ppl: 11.781714 +epoch: 2, batch: 9755, sum loss: 4374.475586, avg loss: 2.485497, ppl: 12.007092 +epoch: 2, batch: 9756, sum loss: 4400.387207, avg loss: 2.706265, ppl: 14.973249 +epoch: 2, batch: 9757, sum loss: 4800.976074, avg loss: 2.617762, ppl: 13.705019 +epoch: 2, batch: 9758, sum loss: 3329.625000, avg loss: 2.488509, ppl: 12.043305 +epoch: 2, batch: 9759, sum loss: 3330.927490, avg loss: 2.314751, ppl: 10.122406 +epoch: 2, batch: 9760, sum loss: 4263.971191, avg loss: 2.517102, ppl: 12.392633 +epoch: 2, batch: 9761, sum loss: 4358.587402, avg loss: 2.480699, ppl: 11.949610 +epoch: 2, batch: 9762, sum loss: 4413.474609, avg loss: 2.503389, ppl: 12.223849 +epoch: 2, batch: 9763, sum loss: 4835.018555, avg loss: 2.722420, ppl: 15.217110 +epoch: 2, batch: 9764, sum loss: 4278.916504, avg loss: 2.537910, ppl: 12.653201 +epoch: 2, batch: 9765, sum loss: 4315.102539, avg loss: 2.585442, ppl: 13.269154 +epoch: 2, batch: 9766, sum loss: 4827.911621, avg loss: 2.654157, ppl: 14.212998 +epoch: 2, batch: 9767, sum loss: 4519.557617, avg loss: 2.400190, ppl: 11.025270 +epoch: 2, batch: 9768, sum loss: 3899.600098, avg loss: 2.660027, ppl: 14.296679 +epoch: 2, batch: 9769, sum loss: 3224.558594, avg loss: 2.229985, ppl: 9.299728 +epoch: 2, batch: 9770, sum loss: 3421.448730, avg loss: 2.440406, ppl: 11.477695 +epoch: 2, batch: 9771, sum loss: 4497.080566, avg loss: 2.745470, ppl: 15.571936 +epoch: 2, batch: 9772, sum loss: 4246.363281, avg loss: 2.520097, ppl: 12.429799 +epoch: 2, batch: 9773, sum loss: 4927.311035, avg loss: 2.581095, ppl: 13.211603 +epoch: 2, batch: 9774, sum loss: 4045.574707, avg loss: 2.647627, ppl: 14.120496 +epoch: 2, batch: 9775, sum loss: 4089.462891, avg loss: 2.437105, ppl: 11.439879 +epoch: 2, batch: 9776, sum loss: 4854.481445, avg loss: 2.657078, ppl: 14.254577 +epoch: 2, batch: 9777, sum loss: 4988.250000, avg loss: 2.721359, ppl: 15.200959 +epoch: 2, batch: 9778, sum loss: 4381.875000, avg loss: 2.505360, ppl: 12.247969 +epoch: 2, batch: 9779, sum loss: 3707.300049, avg loss: 2.476486, ppl: 11.899379 +epoch: 2, batch: 9780, sum loss: 5026.103516, avg loss: 2.719753, ppl: 15.176574 +epoch: 2, batch: 9781, sum loss: 4005.122803, avg loss: 2.544551, ppl: 12.737503 +epoch: 2, batch: 9782, sum loss: 4358.271484, avg loss: 2.735889, ppl: 15.423451 +epoch: 2, batch: 9783, sum loss: 4898.839844, avg loss: 2.616902, ppl: 13.693232 +epoch: 2, batch: 9784, sum loss: 3940.319092, avg loss: 2.492295, ppl: 12.088992 +epoch: 2, batch: 9785, sum loss: 4020.653076, avg loss: 2.673306, ppl: 14.487794 +epoch: 2, batch: 9786, sum loss: 4616.845703, avg loss: 2.679539, ppl: 14.578371 +epoch: 2, batch: 9787, sum loss: 4400.731445, avg loss: 2.588666, ppl: 13.311997 +epoch: 2, batch: 9788, sum loss: 4104.175293, avg loss: 2.549177, ppl: 12.796570 +epoch: 2, batch: 9789, sum loss: 3838.416260, avg loss: 2.326313, ppl: 10.240117 +epoch: 2, batch: 9790, sum loss: 4825.336914, avg loss: 2.547696, ppl: 12.777635 +epoch: 2, batch: 9791, sum loss: 4612.566406, avg loss: 2.748848, ppl: 15.624618 +epoch: 2, batch: 9792, sum loss: 5578.872070, avg loss: 2.767298, ppl: 15.915568 +epoch: 2, batch: 9793, sum loss: 3439.031250, avg loss: 2.171106, ppl: 8.767973 +epoch: 2, batch: 9794, sum loss: 3439.334473, avg loss: 2.512297, ppl: 12.333226 +epoch: 2, batch: 9795, sum loss: 4180.505371, avg loss: 2.647565, ppl: 14.119617 +epoch: 2, batch: 9796, sum loss: 3419.794434, avg loss: 2.430557, ppl: 11.365216 +epoch: 2, batch: 9797, sum loss: 3309.401855, avg loss: 2.306203, ppl: 10.036248 +epoch: 2, batch: 9798, sum loss: 4341.606445, avg loss: 2.715201, ppl: 15.107645 +epoch: 2, batch: 9799, sum loss: 4076.907715, avg loss: 2.438342, ppl: 11.454036 +epoch: 2, batch: 9800, sum loss: 4560.585449, avg loss: 2.674830, ppl: 14.509886 +epoch: 2, batch: 9801, sum loss: 5157.323730, avg loss: 2.639367, ppl: 14.004341 +epoch: 2, batch: 9802, sum loss: 4944.488770, avg loss: 2.662622, ppl: 14.333819 +epoch: 2, batch: 9803, sum loss: 4846.396484, avg loss: 2.575131, ppl: 13.133037 +epoch: 2, batch: 9804, sum loss: 5394.939453, avg loss: 2.688062, ppl: 14.703150 +epoch: 2, batch: 9805, sum loss: 4512.632812, avg loss: 2.692502, ppl: 14.768578 +epoch: 2, batch: 9806, sum loss: 4186.171875, avg loss: 2.500700, ppl: 12.191028 +epoch: 2, batch: 9807, sum loss: 4700.852539, avg loss: 2.639446, ppl: 14.005436 +epoch: 2, batch: 9808, sum loss: 4938.472656, avg loss: 2.669445, ppl: 14.431954 +epoch: 2, batch: 9809, sum loss: 4776.003906, avg loss: 2.608413, ppl: 13.577486 +epoch: 2, batch: 9810, sum loss: 4150.761719, avg loss: 2.568541, ppl: 13.046770 +epoch: 2, batch: 9811, sum loss: 3740.532471, avg loss: 2.430496, ppl: 11.364511 +epoch: 2, batch: 9812, sum loss: 4738.706055, avg loss: 2.536781, ppl: 12.638916 +epoch: 2, batch: 9813, sum loss: 4170.703125, avg loss: 2.422011, ppl: 11.268499 +epoch: 2, batch: 9814, sum loss: 4145.901367, avg loss: 2.356965, ppl: 10.558857 +epoch: 2, batch: 9815, sum loss: 4040.979492, avg loss: 2.546301, ppl: 12.759819 +epoch: 2, batch: 9816, sum loss: 4206.139648, avg loss: 2.347176, ppl: 10.456003 +epoch: 2, batch: 9817, sum loss: 3969.646973, avg loss: 2.479480, ppl: 11.935054 +epoch: 2, batch: 9818, sum loss: 4240.546387, avg loss: 2.417643, ppl: 11.219387 +epoch: 2, batch: 9819, sum loss: 3842.987549, avg loss: 2.261912, ppl: 9.601426 +epoch: 2, batch: 9820, sum loss: 3981.781006, avg loss: 2.483956, ppl: 11.988593 +epoch: 2, batch: 9821, sum loss: 4441.674805, avg loss: 2.594436, ppl: 13.389036 +epoch: 2, batch: 9822, sum loss: 5097.157227, avg loss: 2.834904, ppl: 17.028761 +epoch: 2, batch: 9823, sum loss: 4380.714355, avg loss: 2.593673, ppl: 13.378828 +epoch: 2, batch: 9824, sum loss: 3435.542480, avg loss: 2.453959, ppl: 11.634315 +epoch: 2, batch: 9825, sum loss: 4262.134277, avg loss: 2.730387, ppl: 15.338825 +epoch: 2, batch: 9826, sum loss: 4646.346680, avg loss: 2.595724, ppl: 13.406295 +epoch: 2, batch: 9827, sum loss: 4910.600586, avg loss: 2.683388, ppl: 14.634599 +epoch: 2, batch: 9828, sum loss: 4704.978027, avg loss: 2.767634, ppl: 15.920927 +epoch: 2, batch: 9829, sum loss: 4013.551270, avg loss: 2.513182, ppl: 12.344146 +epoch: 2, batch: 9830, sum loss: 4799.333984, avg loss: 2.766187, ppl: 15.897895 +epoch: 2, batch: 9831, sum loss: 4487.428711, avg loss: 2.660005, ppl: 14.296366 +epoch: 2, batch: 9832, sum loss: 3229.667236, avg loss: 2.095826, ppl: 8.132153 +epoch: 2, batch: 9833, sum loss: 4162.847656, avg loss: 2.659966, ppl: 14.295810 +epoch: 2, batch: 9834, sum loss: 3881.056152, avg loss: 2.580489, ppl: 13.203598 +epoch: 2, batch: 9835, sum loss: 4360.548828, avg loss: 2.469167, ppl: 11.812603 +epoch: 2, batch: 9836, sum loss: 6276.810547, avg loss: 2.920805, ppl: 18.556227 +epoch: 2, batch: 9837, sum loss: 4157.649902, avg loss: 2.799764, ppl: 16.440769 +epoch: 2, batch: 9838, sum loss: 3840.792480, avg loss: 2.381149, ppl: 10.817320 +epoch: 2, batch: 9839, sum loss: 3680.066650, avg loss: 2.310148, ppl: 10.075921 +epoch: 2, batch: 9840, sum loss: 4969.306641, avg loss: 2.696314, ppl: 14.824984 +epoch: 2, batch: 9841, sum loss: 4298.972168, avg loss: 2.385667, ppl: 10.866312 +epoch: 2, batch: 9842, sum loss: 4985.274414, avg loss: 2.816539, ppl: 16.718891 +epoch: 2, batch: 9843, sum loss: 3930.854248, avg loss: 2.421968, ppl: 11.268013 +epoch: 2, batch: 9844, sum loss: 5163.700684, avg loss: 2.883138, ppl: 17.870266 +epoch: 2, batch: 9845, sum loss: 4270.088867, avg loss: 2.569247, ppl: 13.055993 +epoch: 2, batch: 9846, sum loss: 3989.966797, avg loss: 2.631904, ppl: 13.900216 +epoch: 2, batch: 9847, sum loss: 3744.748047, avg loss: 2.471781, ppl: 11.843521 +epoch: 2, batch: 9848, sum loss: 4404.121582, avg loss: 2.635621, ppl: 13.951968 +epoch: 2, batch: 9849, sum loss: 5262.315430, avg loss: 2.579566, ppl: 13.191415 +epoch: 2, batch: 9850, sum loss: 5389.333008, avg loss: 2.834999, ppl: 17.030384 +epoch: 2, batch: 9851, sum loss: 4621.349609, avg loss: 2.558887, ppl: 12.921425 +epoch: 2, batch: 9852, sum loss: 4204.628418, avg loss: 2.275232, ppl: 9.730175 +epoch: 2, batch: 9853, sum loss: 3187.046631, avg loss: 2.294490, ppl: 9.919374 +epoch: 2, batch: 9854, sum loss: 4660.913086, avg loss: 2.640744, ppl: 14.023633 +epoch: 2, batch: 9855, sum loss: 3889.960205, avg loss: 2.517774, ppl: 12.400957 +epoch: 2, batch: 9856, sum loss: 3952.361084, avg loss: 2.337292, ppl: 10.353165 +epoch: 2, batch: 9857, sum loss: 4306.706543, avg loss: 2.340601, ppl: 10.387480 +epoch: 2, batch: 9858, sum loss: 3848.766357, avg loss: 2.277376, ppl: 9.751064 +epoch: 2, batch: 9859, sum loss: 4083.356201, avg loss: 2.617536, ppl: 13.701921 +epoch: 2, batch: 9860, sum loss: 4518.939941, avg loss: 2.462638, ppl: 11.735725 +epoch: 2, batch: 9861, sum loss: 3993.928467, avg loss: 2.366071, ppl: 10.655450 +epoch: 2, batch: 9862, sum loss: 4330.521973, avg loss: 2.681438, ppl: 14.606078 +epoch: 2, batch: 9863, sum loss: 3997.050537, avg loss: 2.437226, ppl: 11.441259 +epoch: 2, batch: 9864, sum loss: 5606.012695, avg loss: 2.693903, ppl: 14.789289 +epoch: 2, batch: 9865, sum loss: 4642.481934, avg loss: 2.466781, ppl: 11.784453 +epoch: 2, batch: 9866, sum loss: 4342.872559, avg loss: 2.605202, ppl: 13.533965 +epoch: 2, batch: 9867, sum loss: 3678.741699, avg loss: 2.428212, ppl: 11.338595 +epoch: 2, batch: 9868, sum loss: 3480.775879, avg loss: 2.300579, ppl: 9.979959 +epoch: 2, batch: 9869, sum loss: 4016.789795, avg loss: 2.561728, ppl: 12.958193 +epoch: 2, batch: 9870, sum loss: 4091.681641, avg loss: 2.575004, ppl: 13.131371 +epoch: 2, batch: 9871, sum loss: 4114.990723, avg loss: 2.498476, ppl: 12.163945 +epoch: 2, batch: 9872, sum loss: 3914.369873, avg loss: 2.431286, ppl: 11.373495 +epoch: 2, batch: 9873, sum loss: 4923.353516, avg loss: 2.654099, ppl: 14.212175 +epoch: 2, batch: 9874, sum loss: 4050.609619, avg loss: 2.377118, ppl: 10.773811 +epoch: 2, batch: 9875, sum loss: 4238.784668, avg loss: 2.380003, ppl: 10.804933 +epoch: 2, batch: 9876, sum loss: 4378.252930, avg loss: 2.648671, ppl: 14.135236 +epoch: 2, batch: 9877, sum loss: 3861.531738, avg loss: 2.553923, ppl: 12.857450 +epoch: 2, batch: 9878, sum loss: 3695.412109, avg loss: 2.587824, ppl: 13.300793 +epoch: 2, batch: 9879, sum loss: 3870.686035, avg loss: 2.351571, ppl: 10.502056 +epoch: 2, batch: 9880, sum loss: 5253.488770, avg loss: 2.861377, ppl: 17.485592 +epoch: 2, batch: 9881, sum loss: 4021.259277, avg loss: 2.444534, ppl: 11.525181 +epoch: 2, batch: 9882, sum loss: 3820.449219, avg loss: 2.393765, ppl: 10.954663 +epoch: 2, batch: 9883, sum loss: 3698.317871, avg loss: 2.271694, ppl: 9.695811 +epoch: 2, batch: 9884, sum loss: 4107.420898, avg loss: 2.530758, ppl: 12.563030 +epoch: 2, batch: 9885, sum loss: 4130.062012, avg loss: 2.420904, ppl: 11.256029 +epoch: 2, batch: 9886, sum loss: 4104.069824, avg loss: 2.421280, ppl: 11.260264 +epoch: 2, batch: 9887, sum loss: 4284.054688, avg loss: 2.482071, ppl: 11.966020 +epoch: 2, batch: 9888, sum loss: 4752.177734, avg loss: 2.901207, ppl: 18.196098 +epoch: 2, batch: 9889, sum loss: 4678.919922, avg loss: 2.703016, ppl: 14.924670 +epoch: 2, batch: 9890, sum loss: 3642.903320, avg loss: 2.576311, ppl: 13.148539 +epoch: 2, batch: 9891, sum loss: 4037.131592, avg loss: 2.387423, ppl: 10.885404 +epoch: 2, batch: 9892, sum loss: 3548.455322, avg loss: 2.375137, ppl: 10.752490 +epoch: 2, batch: 9893, sum loss: 4196.446289, avg loss: 2.572928, ppl: 13.104143 +epoch: 2, batch: 9894, sum loss: 4050.107422, avg loss: 2.364336, ppl: 10.636971 +epoch: 2, batch: 9895, sum loss: 4269.727539, avg loss: 2.462357, ppl: 11.732435 +epoch: 2, batch: 9896, sum loss: 4483.199707, avg loss: 2.634077, ppl: 13.930453 +epoch: 2, batch: 9897, sum loss: 5011.731934, avg loss: 2.557006, ppl: 12.897144 +epoch: 2, batch: 9898, sum loss: 3644.091553, avg loss: 2.334460, ppl: 10.323881 +epoch: 2, batch: 9899, sum loss: 4656.031738, avg loss: 2.692904, ppl: 14.774526 +epoch: 2, batch: 9900, sum loss: 4068.152100, avg loss: 2.465547, ppl: 11.769917 +epoch: 2, batch: 9901, sum loss: 4122.048828, avg loss: 2.518051, ppl: 12.404393 +epoch: 2, batch: 9902, sum loss: 4326.844238, avg loss: 2.468251, ppl: 11.801790 +epoch: 2, batch: 9903, sum loss: 3641.326660, avg loss: 2.267327, ppl: 9.653559 +epoch: 2, batch: 9904, sum loss: 4947.694824, avg loss: 2.664348, ppl: 14.358586 +epoch: 2, batch: 9905, sum loss: 4536.290039, avg loss: 2.510398, ppl: 12.309834 +epoch: 2, batch: 9906, sum loss: 4285.532715, avg loss: 2.717522, ppl: 15.142757 +epoch: 2, batch: 9907, sum loss: 4937.902832, avg loss: 2.596164, ppl: 13.412184 +epoch: 2, batch: 9908, sum loss: 4736.610352, avg loss: 2.776442, ppl: 16.061771 +epoch: 2, batch: 9909, sum loss: 4569.182617, avg loss: 2.425256, ppl: 11.305126 +epoch: 2, batch: 9910, sum loss: 4221.804199, avg loss: 2.591654, ppl: 13.351835 +epoch: 2, batch: 9911, sum loss: 4021.273926, avg loss: 2.410836, ppl: 11.143270 +epoch: 2, batch: 9912, sum loss: 4231.243164, avg loss: 2.595855, ppl: 13.408043 +epoch: 2, batch: 9913, sum loss: 5100.798340, avg loss: 2.532670, ppl: 12.587075 +epoch: 2, batch: 9914, sum loss: 4455.243652, avg loss: 2.434559, ppl: 11.410789 +epoch: 2, batch: 9915, sum loss: 3956.389160, avg loss: 2.461972, ppl: 11.727919 +epoch: 2, batch: 9916, sum loss: 3894.432129, avg loss: 2.387757, ppl: 10.889046 +epoch: 2, batch: 9917, sum loss: 3919.458984, avg loss: 2.590522, ppl: 13.336726 +epoch: 2, batch: 9918, sum loss: 4688.453613, avg loss: 2.389630, ppl: 10.909452 +epoch: 2, batch: 9919, sum loss: 4615.730957, avg loss: 2.668052, ppl: 14.411874 +epoch: 2, batch: 9920, sum loss: 4060.888916, avg loss: 2.541232, ppl: 12.695303 +epoch: 2, batch: 9921, sum loss: 4240.712891, avg loss: 2.546975, ppl: 12.768416 +epoch: 2, batch: 9922, sum loss: 4152.264160, avg loss: 2.636358, ppl: 13.962260 +epoch: 2, batch: 9923, sum loss: 3757.119873, avg loss: 2.438105, ppl: 11.451322 +epoch: 2, batch: 9924, sum loss: 3537.750000, avg loss: 2.100802, ppl: 8.172719 +epoch: 2, batch: 9925, sum loss: 4684.293945, avg loss: 2.603832, ppl: 13.515430 +epoch: 2, batch: 9926, sum loss: 3330.869141, avg loss: 2.261283, ppl: 9.595389 +epoch: 2, batch: 9927, sum loss: 4068.745361, avg loss: 2.400440, ppl: 11.028025 +epoch: 2, batch: 9928, sum loss: 3020.721191, avg loss: 2.293638, ppl: 9.910928 +epoch: 2, batch: 9929, sum loss: 5473.564941, avg loss: 2.867242, ppl: 17.588444 +epoch: 2, batch: 9930, sum loss: 3646.383545, avg loss: 2.246693, ppl: 9.456415 +epoch: 2, batch: 9931, sum loss: 4800.785645, avg loss: 2.738611, ppl: 15.465496 +epoch: 2, batch: 9932, sum loss: 3857.364014, avg loss: 2.261057, ppl: 9.593227 +epoch: 2, batch: 9933, sum loss: 3744.648438, avg loss: 2.386647, ppl: 10.876957 +epoch: 2, batch: 9934, sum loss: 4852.634766, avg loss: 2.661895, ppl: 14.323410 +epoch: 2, batch: 9935, sum loss: 5255.702148, avg loss: 2.757451, ppl: 15.759622 +epoch: 2, batch: 9936, sum loss: 3856.935547, avg loss: 2.502878, ppl: 12.217611 +epoch: 2, batch: 9937, sum loss: 4089.785400, avg loss: 2.351803, ppl: 10.504493 +epoch: 2, batch: 9938, sum loss: 3992.754883, avg loss: 2.388011, ppl: 10.891813 +epoch: 2, batch: 9939, sum loss: 4398.182129, avg loss: 2.643138, ppl: 14.057252 +epoch: 2, batch: 9940, sum loss: 4071.391602, avg loss: 2.392122, ppl: 10.936677 +epoch: 2, batch: 9941, sum loss: 3053.175537, avg loss: 1.939756, ppl: 6.957051 +epoch: 2, batch: 9942, sum loss: 4490.317383, avg loss: 2.609133, ppl: 13.587259 +epoch: 2, batch: 9943, sum loss: 4134.778809, avg loss: 2.459714, ppl: 11.701461 +epoch: 2, batch: 9944, sum loss: 4866.121582, avg loss: 2.825855, ppl: 16.875359 +epoch: 2, batch: 9945, sum loss: 4733.352051, avg loss: 2.860032, ppl: 17.462078 +epoch: 2, batch: 9946, sum loss: 3502.941162, avg loss: 2.382953, ppl: 10.836859 +epoch: 2, batch: 9947, sum loss: 4491.397461, avg loss: 2.803619, ppl: 16.504267 +epoch: 2, batch: 9948, sum loss: 4013.510010, avg loss: 2.592707, ppl: 13.365900 +epoch: 2, batch: 9949, sum loss: 4479.442383, avg loss: 2.580324, ppl: 13.201414 +epoch: 2, batch: 9950, sum loss: 4912.471680, avg loss: 2.416366, ppl: 11.205062 +epoch: 2, batch: 9951, sum loss: 4073.580078, avg loss: 2.566843, ppl: 13.024644 +epoch: 2, batch: 9952, sum loss: 4306.195312, avg loss: 2.633759, ppl: 13.926013 +epoch: 2, batch: 9953, sum loss: 3765.002686, avg loss: 2.467237, ppl: 11.789821 +epoch: 2, batch: 9954, sum loss: 4067.032715, avg loss: 2.567571, ppl: 13.034128 +epoch: 2, batch: 9955, sum loss: 3620.038086, avg loss: 2.238737, ppl: 9.381478 +epoch: 2, batch: 9956, sum loss: 4438.130859, avg loss: 2.370796, ppl: 10.705915 +epoch: 2, batch: 9957, sum loss: 4999.551758, avg loss: 2.442380, ppl: 11.500379 +epoch: 2, batch: 9958, sum loss: 4677.813965, avg loss: 2.600230, ppl: 13.466835 +epoch: 2, batch: 9959, sum loss: 4209.471191, avg loss: 2.590444, ppl: 13.335690 +epoch: 2, batch: 9960, sum loss: 4987.335938, avg loss: 2.695857, ppl: 14.818216 +epoch: 2, batch: 9961, sum loss: 3491.710938, avg loss: 2.324708, ppl: 10.223689 +epoch: 2, batch: 9962, sum loss: 4176.090332, avg loss: 2.322631, ppl: 10.202480 +epoch: 2, batch: 9963, sum loss: 4697.604004, avg loss: 2.631711, ppl: 13.897529 +epoch: 2, batch: 9964, sum loss: 4365.545410, avg loss: 2.510377, ppl: 12.309569 +epoch: 2, batch: 9965, sum loss: 3827.408203, avg loss: 2.416293, ppl: 11.204249 +epoch: 2, batch: 9966, sum loss: 4009.275391, avg loss: 2.423988, ppl: 11.290793 +epoch: 2, batch: 9967, sum loss: 4435.650391, avg loss: 2.398946, ppl: 11.011559 +epoch: 2, batch: 9968, sum loss: 4846.736816, avg loss: 2.580797, ppl: 13.207660 +epoch: 2, batch: 9969, sum loss: 3991.022949, avg loss: 2.586535, ppl: 13.283660 +epoch: 2, batch: 9970, sum loss: 4739.730469, avg loss: 2.783165, ppl: 16.170122 +epoch: 2, batch: 9971, sum loss: 4434.103516, avg loss: 2.492470, ppl: 12.091099 +epoch: 2, batch: 9972, sum loss: 4598.973633, avg loss: 2.676935, ppl: 14.540454 +epoch: 2, batch: 9973, sum loss: 4388.990234, avg loss: 2.522408, ppl: 12.458564 +epoch: 2, batch: 9974, sum loss: 5080.817871, avg loss: 2.586975, ppl: 13.289505 +epoch: 2, batch: 9975, sum loss: 4874.927246, avg loss: 2.721902, ppl: 15.209229 +epoch: 2, batch: 9976, sum loss: 4779.006836, avg loss: 2.650586, ppl: 14.162337 +epoch: 2, batch: 9977, sum loss: 4955.165039, avg loss: 2.482548, ppl: 11.971726 +epoch: 2, batch: 9978, sum loss: 4711.719238, avg loss: 2.692411, ppl: 14.767236 +epoch: 2, batch: 9979, sum loss: 4127.169922, avg loss: 2.427747, ppl: 11.333323 +epoch: 2, batch: 9980, sum loss: 3548.718506, avg loss: 2.212418, ppl: 9.137786 +epoch: 2, batch: 9981, sum loss: 4727.184082, avg loss: 2.820516, ppl: 16.785503 +epoch: 2, batch: 9982, sum loss: 4890.866211, avg loss: 2.699153, ppl: 14.867140 +epoch: 2, batch: 9983, sum loss: 2837.699951, avg loss: 2.053329, ppl: 7.793800 +epoch: 2, batch: 9984, sum loss: 2841.254150, avg loss: 2.199113, ppl: 9.017013 +epoch: 2, batch: 9985, sum loss: 4613.335449, avg loss: 2.604933, ppl: 13.530313 +epoch: 2, batch: 9986, sum loss: 4115.009277, avg loss: 2.385513, ppl: 10.864633 +epoch: 2, batch: 9987, sum loss: 4857.672363, avg loss: 2.498803, ppl: 12.167919 +epoch: 2, batch: 9988, sum loss: 4036.555908, avg loss: 2.537119, ppl: 12.643189 +epoch: 2, batch: 9989, sum loss: 4120.841797, avg loss: 2.502029, ppl: 12.207236 +epoch: 2, batch: 9990, sum loss: 4939.181152, avg loss: 2.694589, ppl: 14.799434 +epoch: 2, batch: 9991, sum loss: 4501.886719, avg loss: 2.566640, ppl: 13.021998 +epoch: 2, batch: 9992, sum loss: 4157.729492, avg loss: 2.486681, ppl: 12.021305 +epoch: 2, batch: 9993, sum loss: 4744.850586, avg loss: 2.599918, ppl: 13.462636 +epoch: 2, batch: 9994, sum loss: 3257.741699, avg loss: 2.222198, ppl: 9.227587 +epoch: 2, batch: 9995, sum loss: 4478.804199, avg loss: 2.527542, ppl: 12.522686 +epoch: 2, batch: 9996, sum loss: 4445.117676, avg loss: 2.682630, ppl: 14.623500 +epoch: 2, batch: 9997, sum loss: 4375.200195, avg loss: 2.593480, ppl: 13.376238 +epoch: 2, batch: 9998, sum loss: 3910.979004, avg loss: 2.602115, ppl: 13.492243 +epoch: 2, batch: 9999, sum loss: 3827.280273, avg loss: 2.386085, ppl: 10.870852 +epoch: 2, batch: 10000, sum loss: 4483.848145, avg loss: 2.584351, ppl: 13.254679 +epoch: 2, batch: 10001, sum loss: 4645.116211, avg loss: 2.652836, ppl: 14.194242 +epoch: 2, batch: 10002, sum loss: 3807.520996, avg loss: 2.462821, ppl: 11.737875 +epoch: 2, batch: 10003, sum loss: 4488.376953, avg loss: 2.700588, ppl: 14.888483 +epoch: 2, batch: 10004, sum loss: 4442.371094, avg loss: 2.593328, ppl: 13.374207 +epoch: 2, batch: 10005, sum loss: 4334.406738, avg loss: 2.342923, ppl: 10.411622 +epoch: 2, batch: 10006, sum loss: 4307.187988, avg loss: 2.753957, ppl: 15.704645 +epoch: 2, batch: 10007, sum loss: 4335.097168, avg loss: 2.495738, ppl: 12.130686 +epoch: 2, batch: 10008, sum loss: 4496.208984, avg loss: 2.566329, ppl: 13.017954 +epoch: 2, batch: 10009, sum loss: 4277.966309, avg loss: 2.464266, ppl: 11.754854 +epoch: 2, batch: 10010, sum loss: 4404.392578, avg loss: 2.477161, ppl: 11.907413 +epoch: 2, batch: 10011, sum loss: 4995.043457, avg loss: 2.773483, ppl: 16.014318 +epoch: 2, batch: 10012, sum loss: 5212.832031, avg loss: 2.575510, ppl: 13.138016 +epoch: 2, batch: 10013, sum loss: 4056.415283, avg loss: 2.524216, ppl: 12.481109 +epoch: 2, batch: 10014, sum loss: 4147.809570, avg loss: 2.441324, ppl: 11.488244 +epoch: 2, batch: 10015, sum loss: 4183.045898, avg loss: 2.409589, ppl: 11.129381 +epoch: 2, batch: 10016, sum loss: 3658.050293, avg loss: 2.262245, ppl: 9.604630 +epoch: 2, batch: 10017, sum loss: 4593.639160, avg loss: 2.512932, ppl: 12.341059 +epoch: 2, batch: 10018, sum loss: 4031.114990, avg loss: 2.255800, ppl: 9.542927 +epoch: 2, batch: 10019, sum loss: 4276.127930, avg loss: 2.481792, ppl: 11.962685 +epoch: 2, batch: 10020, sum loss: 4695.941406, avg loss: 2.722285, ppl: 15.215050 +epoch: 2, batch: 10021, sum loss: 4116.665039, avg loss: 2.528664, ppl: 12.536744 +epoch: 2, batch: 10022, sum loss: 3653.619385, avg loss: 2.308035, ppl: 10.054646 +epoch: 2, batch: 10023, sum loss: 4656.472656, avg loss: 2.494093, ppl: 12.110749 +epoch: 2, batch: 10024, sum loss: 3701.371582, avg loss: 2.366606, ppl: 10.661144 +epoch: 2, batch: 10025, sum loss: 4488.167480, avg loss: 2.558818, ppl: 12.920541 +epoch: 2, batch: 10026, sum loss: 4917.003906, avg loss: 2.567626, ppl: 13.034842 +epoch: 2, batch: 10027, sum loss: 4515.278320, avg loss: 2.410720, ppl: 11.141979 +epoch: 2, batch: 10028, sum loss: 5389.476562, avg loss: 2.999152, ppl: 20.068520 +epoch: 2, batch: 10029, sum loss: 4095.247070, avg loss: 2.578871, ppl: 13.182247 +epoch: 2, batch: 10030, sum loss: 4116.831543, avg loss: 2.493538, ppl: 12.104029 +epoch: 2, batch: 10031, sum loss: 4176.367188, avg loss: 2.404356, ppl: 11.071303 +epoch: 2, batch: 10032, sum loss: 3479.612061, avg loss: 2.352679, ppl: 10.513696 +epoch: 2, batch: 10033, sum loss: 4236.502441, avg loss: 2.539870, ppl: 12.678017 +epoch: 2, batch: 10034, sum loss: 4351.185059, avg loss: 2.422709, ppl: 11.276365 +epoch: 2, batch: 10035, sum loss: 4364.898438, avg loss: 2.501374, ppl: 12.199250 +epoch: 2, batch: 10036, sum loss: 3936.158936, avg loss: 2.432731, ppl: 11.389947 +epoch: 2, batch: 10037, sum loss: 5253.133789, avg loss: 2.798686, ppl: 16.423054 +epoch: 2, batch: 10038, sum loss: 4876.779297, avg loss: 2.620516, ppl: 13.742807 +epoch: 2, batch: 10039, sum loss: 5667.360840, avg loss: 2.863750, ppl: 17.527126 +epoch: 2, batch: 10040, sum loss: 5641.176270, avg loss: 2.935055, ppl: 18.822548 +epoch: 2, batch: 10041, sum loss: 4390.945801, avg loss: 2.651537, ppl: 14.175813 +epoch: 2, batch: 10042, sum loss: 3910.553223, avg loss: 2.509983, ppl: 12.304719 +epoch: 2, batch: 10043, sum loss: 4213.380859, avg loss: 2.420092, ppl: 11.246898 +epoch: 2, batch: 10044, sum loss: 4584.705078, avg loss: 2.415545, ppl: 11.195875 +epoch: 2, batch: 10045, sum loss: 3740.653809, avg loss: 2.435322, ppl: 11.419490 +epoch: 2, batch: 10046, sum loss: 3633.453857, avg loss: 2.384156, ppl: 10.849901 +epoch: 2, batch: 10047, sum loss: 3827.619629, avg loss: 2.351118, ppl: 10.497295 +epoch: 2, batch: 10048, sum loss: 4497.669922, avg loss: 2.562775, ppl: 12.971763 +epoch: 2, batch: 10049, sum loss: 4085.042480, avg loss: 2.610251, ppl: 13.602461 +epoch: 2, batch: 10050, sum loss: 4725.610352, avg loss: 2.572461, ppl: 13.098018 +epoch: 2, batch: 10051, sum loss: 4565.416992, avg loss: 2.732147, ppl: 15.365838 +epoch: 2, batch: 10052, sum loss: 3910.614502, avg loss: 2.589811, ppl: 13.327253 +epoch: 2, batch: 10053, sum loss: 3923.277588, avg loss: 2.511701, ppl: 12.325883 +epoch: 2, batch: 10054, sum loss: 4850.754395, avg loss: 2.527751, ppl: 12.525305 +epoch: 2, batch: 10055, sum loss: 5009.031250, avg loss: 2.680059, ppl: 14.585960 +epoch: 2, batch: 10056, sum loss: 4794.371094, avg loss: 2.673938, ppl: 14.496950 +epoch: 2, batch: 10057, sum loss: 4613.761719, avg loss: 2.583293, ppl: 13.240674 +epoch: 2, batch: 10058, sum loss: 5167.114746, avg loss: 2.851609, ppl: 17.315613 +epoch: 2, batch: 10059, sum loss: 4211.031250, avg loss: 2.580289, ppl: 13.200952 +epoch: 2, batch: 10060, sum loss: 4613.782715, avg loss: 2.442447, ppl: 11.501152 +epoch: 2, batch: 10061, sum loss: 4418.706055, avg loss: 2.540947, ppl: 12.691681 +epoch: 2, batch: 10062, sum loss: 5008.708984, avg loss: 2.548961, ppl: 12.793809 +epoch: 2, batch: 10063, sum loss: 4569.256836, avg loss: 2.708510, ppl: 15.006905 +epoch: 2, batch: 10064, sum loss: 5054.655762, avg loss: 2.721947, ppl: 15.209907 +epoch: 2, batch: 10065, sum loss: 3999.924561, avg loss: 2.514095, ppl: 12.355417 +epoch: 2, batch: 10066, sum loss: 4371.812012, avg loss: 2.406061, ppl: 11.090187 +epoch: 2, batch: 10067, sum loss: 4338.609375, avg loss: 2.555129, ppl: 12.872964 +epoch: 2, batch: 10068, sum loss: 3879.270996, avg loss: 2.469301, ppl: 11.814180 +epoch: 2, batch: 10069, sum loss: 4017.830566, avg loss: 2.542931, ppl: 12.716887 +epoch: 2, batch: 10070, sum loss: 3922.682129, avg loss: 2.490592, ppl: 12.068416 +epoch: 2, batch: 10071, sum loss: 3480.658203, avg loss: 2.094259, ppl: 8.119420 +epoch: 2, batch: 10072, sum loss: 4507.059082, avg loss: 2.546361, ppl: 12.760586 +epoch: 2, batch: 10073, sum loss: 4332.377930, avg loss: 2.563537, ppl: 12.981654 +epoch: 2, batch: 10074, sum loss: 4623.563965, avg loss: 2.691248, ppl: 14.750072 +epoch: 2, batch: 10075, sum loss: 4707.917969, avg loss: 2.679521, ppl: 14.578103 +epoch: 2, batch: 10076, sum loss: 3850.191650, avg loss: 2.430676, ppl: 11.366569 +epoch: 2, batch: 10077, sum loss: 4100.692871, avg loss: 2.384124, ppl: 10.849552 +epoch: 2, batch: 10078, sum loss: 4519.011719, avg loss: 2.624281, ppl: 13.794651 +epoch: 2, batch: 10079, sum loss: 4759.875977, avg loss: 2.716824, ppl: 15.132191 +epoch: 2, batch: 10080, sum loss: 4709.138672, avg loss: 2.697101, ppl: 14.836663 +epoch: 2, batch: 10081, sum loss: 4598.610352, avg loss: 2.504690, ppl: 12.239760 +epoch: 2, batch: 10082, sum loss: 4711.707031, avg loss: 2.639612, ppl: 14.007767 +epoch: 2, batch: 10083, sum loss: 4178.124512, avg loss: 2.649413, ppl: 14.145734 +epoch: 2, batch: 10084, sum loss: 3392.969727, avg loss: 2.070146, ppl: 7.925981 +epoch: 2, batch: 10085, sum loss: 4364.137695, avg loss: 2.461443, ppl: 11.721710 +epoch: 2, batch: 10086, sum loss: 4268.531738, avg loss: 2.272913, ppl: 9.707643 +epoch: 2, batch: 10087, sum loss: 4428.275391, avg loss: 2.526113, ppl: 12.504803 +epoch: 2, batch: 10088, sum loss: 4082.439453, avg loss: 2.307767, ppl: 10.051952 +epoch: 2, batch: 10089, sum loss: 3826.639648, avg loss: 2.326225, ppl: 10.239211 +epoch: 2, batch: 10090, sum loss: 3896.283936, avg loss: 2.384507, ppl: 10.853707 +epoch: 2, batch: 10091, sum loss: 3937.109131, avg loss: 2.264008, ppl: 9.621572 +epoch: 2, batch: 10092, sum loss: 4298.242676, avg loss: 2.336001, ppl: 10.339809 +epoch: 2, batch: 10093, sum loss: 4803.316895, avg loss: 2.765295, ppl: 15.883721 +epoch: 2, batch: 10094, sum loss: 4542.079590, avg loss: 2.554600, ppl: 12.866158 +epoch: 2, batch: 10095, sum loss: 3915.975830, avg loss: 2.402439, ppl: 11.050096 +epoch: 2, batch: 10096, sum loss: 3644.926514, avg loss: 2.484612, ppl: 11.996470 +epoch: 2, batch: 10097, sum loss: 3710.902344, avg loss: 2.244950, ppl: 9.439941 +epoch: 2, batch: 10098, sum loss: 3935.485840, avg loss: 2.464299, ppl: 11.755241 +epoch: 2, batch: 10099, sum loss: 4516.012207, avg loss: 2.422753, ppl: 11.276866 +epoch: 2, batch: 10100, sum loss: 4509.791016, avg loss: 2.674846, ppl: 14.510121 +epoch: 2, batch: 10101, sum loss: 3657.140381, avg loss: 2.399698, ppl: 11.019854 +epoch: 2, batch: 10102, sum loss: 3651.604736, avg loss: 2.392926, ppl: 10.945471 +epoch: 2, batch: 10103, sum loss: 4302.278320, avg loss: 2.421091, ppl: 11.258134 +epoch: 2, batch: 10104, sum loss: 4396.426758, avg loss: 2.616921, ppl: 13.693492 +epoch: 2, batch: 10105, sum loss: 3772.143066, avg loss: 2.315619, ppl: 10.131189 +epoch: 2, batch: 10106, sum loss: 3741.534668, avg loss: 2.468031, ppl: 11.799191 +epoch: 2, batch: 10107, sum loss: 5214.179199, avg loss: 2.821526, ppl: 16.802465 +epoch: 2, batch: 10108, sum loss: 4000.691406, avg loss: 2.408604, ppl: 11.118430 +epoch: 2, batch: 10109, sum loss: 4154.705566, avg loss: 2.514955, ppl: 12.366053 +epoch: 2, batch: 10110, sum loss: 4458.306152, avg loss: 2.604151, ppl: 13.519739 +epoch: 2, batch: 10111, sum loss: 4472.700684, avg loss: 2.475208, ppl: 11.884176 +epoch: 2, batch: 10112, sum loss: 4144.195801, avg loss: 2.424924, ppl: 11.301374 +epoch: 2, batch: 10113, sum loss: 3884.542969, avg loss: 2.438508, ppl: 11.455934 +epoch: 2, batch: 10114, sum loss: 4615.329590, avg loss: 2.595798, ppl: 13.407289 +epoch: 2, batch: 10115, sum loss: 4340.313965, avg loss: 2.560657, ppl: 12.944323 +epoch: 2, batch: 10116, sum loss: 4381.452637, avg loss: 2.560756, ppl: 12.945597 +epoch: 2, batch: 10117, sum loss: 4016.569092, avg loss: 2.589664, ppl: 13.325294 +epoch: 2, batch: 10118, sum loss: 4239.462402, avg loss: 2.479218, ppl: 11.931928 +epoch: 2, batch: 10119, sum loss: 4967.209473, avg loss: 2.635124, ppl: 13.945047 +epoch: 2, batch: 10120, sum loss: 4742.600586, avg loss: 2.640646, ppl: 14.022263 +epoch: 2, batch: 10121, sum loss: 4296.166504, avg loss: 2.676739, ppl: 14.537612 +epoch: 2, batch: 10122, sum loss: 4344.000977, avg loss: 2.565860, ppl: 13.011845 +epoch: 2, batch: 10123, sum loss: 3670.100098, avg loss: 2.451637, ppl: 11.607327 +epoch: 2, batch: 10124, sum loss: 4336.541992, avg loss: 2.444499, ppl: 11.524777 +epoch: 2, batch: 10125, sum loss: 4548.201660, avg loss: 2.488075, ppl: 12.038084 +epoch: 2, batch: 10126, sum loss: 4578.755371, avg loss: 2.693386, ppl: 14.781636 +epoch: 2, batch: 10127, sum loss: 3464.576416, avg loss: 2.210962, ppl: 9.124486 +epoch: 2, batch: 10128, sum loss: 5333.981445, avg loss: 2.743818, ppl: 15.546225 +epoch: 2, batch: 10129, sum loss: 4887.412109, avg loss: 2.622002, ppl: 13.763251 +epoch: 2, batch: 10130, sum loss: 6044.061035, avg loss: 2.697037, ppl: 14.835715 +epoch: 2, batch: 10131, sum loss: 4553.712891, avg loss: 2.666108, ppl: 14.383880 +epoch: 2, batch: 10132, sum loss: 4654.919922, avg loss: 2.698504, ppl: 14.857495 +epoch: 2, batch: 10133, sum loss: 3603.741211, avg loss: 2.288090, ppl: 9.856090 +epoch: 2, batch: 10134, sum loss: 3827.718262, avg loss: 2.125329, ppl: 8.375655 +epoch: 2, batch: 10135, sum loss: 3692.063965, avg loss: 2.375846, ppl: 10.760109 +epoch: 2, batch: 10136, sum loss: 4108.445312, avg loss: 2.509741, ppl: 12.301739 +epoch: 2, batch: 10137, sum loss: 3890.853516, avg loss: 2.388492, ppl: 10.897050 +epoch: 2, batch: 10138, sum loss: 3040.875732, avg loss: 2.335542, ppl: 10.335059 +epoch: 2, batch: 10139, sum loss: 3605.519775, avg loss: 2.233903, ppl: 9.336236 +epoch: 2, batch: 10140, sum loss: 3102.610352, avg loss: 2.177270, ppl: 8.822192 +epoch: 2, batch: 10141, sum loss: 3640.487305, avg loss: 2.292498, ppl: 9.899639 +epoch: 2, batch: 10142, sum loss: 3480.781738, avg loss: 2.317431, ppl: 10.149569 +epoch: 2, batch: 10143, sum loss: 4549.632812, avg loss: 2.696878, ppl: 14.833349 +epoch: 2, batch: 10144, sum loss: 4419.914062, avg loss: 2.369927, ppl: 10.696613 +epoch: 2, batch: 10145, sum loss: 3784.141113, avg loss: 2.292030, ppl: 9.895002 +epoch: 2, batch: 10146, sum loss: 4054.341064, avg loss: 2.384907, ppl: 10.858047 +epoch: 2, batch: 10147, sum loss: 3886.742920, avg loss: 2.430733, ppl: 11.367216 +epoch: 2, batch: 10148, sum loss: 4389.830078, avg loss: 2.484341, ppl: 11.993210 +epoch: 2, batch: 10149, sum loss: 5200.922852, avg loss: 2.811310, ppl: 16.631689 +epoch: 2, batch: 10150, sum loss: 2620.784180, avg loss: 2.180353, ppl: 8.849429 +epoch: 2, batch: 10151, sum loss: 4047.108887, avg loss: 2.554993, ppl: 12.871208 +epoch: 2, batch: 10152, sum loss: 4566.272461, avg loss: 2.573998, ppl: 13.118166 +epoch: 2, batch: 10153, sum loss: 3652.717773, avg loss: 2.106527, ppl: 8.219644 +epoch: 2, batch: 10154, sum loss: 3557.501221, avg loss: 2.305574, ppl: 10.029938 +epoch: 2, batch: 10155, sum loss: 3767.474609, avg loss: 2.284703, ppl: 9.822763 +epoch: 2, batch: 10156, sum loss: 3767.023682, avg loss: 2.239610, ppl: 9.389666 +epoch: 2, batch: 10157, sum loss: 4101.354492, avg loss: 2.639224, ppl: 14.002337 +epoch: 2, batch: 10158, sum loss: 4774.057129, avg loss: 2.662609, ppl: 14.333632 +epoch: 2, batch: 10159, sum loss: 5527.816895, avg loss: 2.858230, ppl: 17.430649 +epoch: 2, batch: 10160, sum loss: 4326.720215, avg loss: 2.548127, ppl: 12.783144 +epoch: 2, batch: 10161, sum loss: 5334.820312, avg loss: 2.879018, ppl: 17.796785 +epoch: 2, batch: 10162, sum loss: 4368.216309, avg loss: 2.490431, ppl: 12.066477 +epoch: 2, batch: 10163, sum loss: 5226.042480, avg loss: 2.727580, ppl: 15.295820 +epoch: 2, batch: 10164, sum loss: 3772.268066, avg loss: 2.213772, ppl: 9.150168 +epoch: 2, batch: 10165, sum loss: 4443.529297, avg loss: 2.606176, ppl: 13.547143 +epoch: 2, batch: 10166, sum loss: 5280.913086, avg loss: 2.782357, ppl: 16.157059 +epoch: 2, batch: 10167, sum loss: 4114.077148, avg loss: 2.421470, ppl: 11.262403 +epoch: 2, batch: 10168, sum loss: 4058.573242, avg loss: 2.437582, ppl: 11.445327 +epoch: 2, batch: 10169, sum loss: 3365.829834, avg loss: 2.327683, ppl: 10.254157 +epoch: 2, batch: 10170, sum loss: 4049.161621, avg loss: 2.415968, ppl: 11.200604 +epoch: 2, batch: 10171, sum loss: 4631.128906, avg loss: 2.584335, ppl: 13.254477 +epoch: 2, batch: 10172, sum loss: 4257.853027, avg loss: 2.331793, ppl: 10.296382 +epoch: 2, batch: 10173, sum loss: 4920.339355, avg loss: 2.730488, ppl: 15.340372 +epoch: 2, batch: 10174, sum loss: 4298.479980, avg loss: 2.435400, ppl: 11.420381 +epoch: 2, batch: 10175, sum loss: 4135.492188, avg loss: 2.393225, ppl: 10.948744 +epoch: 2, batch: 10176, sum loss: 4848.409180, avg loss: 2.710123, ppl: 15.031118 +epoch: 2, batch: 10177, sum loss: 4527.521973, avg loss: 2.639954, ppl: 14.012564 +epoch: 2, batch: 10178, sum loss: 4397.179199, avg loss: 2.597271, ppl: 13.427042 +epoch: 2, batch: 10179, sum loss: 4124.076660, avg loss: 2.530108, ppl: 12.554868 +epoch: 2, batch: 10180, sum loss: 4719.407715, avg loss: 2.519705, ppl: 12.424934 +epoch: 2, batch: 10181, sum loss: 3500.630615, avg loss: 2.338430, ppl: 10.364955 +epoch: 2, batch: 10182, sum loss: 4131.625488, avg loss: 2.574221, ppl: 13.121097 +epoch: 2, batch: 10183, sum loss: 3582.567383, avg loss: 2.117357, ppl: 8.309143 +epoch: 2, batch: 10184, sum loss: 4858.057617, avg loss: 2.656128, ppl: 14.241040 +epoch: 2, batch: 10185, sum loss: 3823.887939, avg loss: 2.437150, ppl: 11.440387 +epoch: 2, batch: 10186, sum loss: 4380.605469, avg loss: 2.680909, ppl: 14.598359 +epoch: 2, batch: 10187, sum loss: 3786.404053, avg loss: 2.275483, ppl: 9.732622 +epoch: 2, batch: 10188, sum loss: 5238.432617, avg loss: 2.696054, ppl: 14.821132 +epoch: 2, batch: 10189, sum loss: 4896.289062, avg loss: 2.589259, ppl: 13.319900 +epoch: 2, batch: 10190, sum loss: 3798.425293, avg loss: 2.413231, ppl: 11.169990 +epoch: 2, batch: 10191, sum loss: 4382.348633, avg loss: 2.359908, ppl: 10.589973 +epoch: 2, batch: 10192, sum loss: 4022.426025, avg loss: 2.427535, ppl: 11.330920 +epoch: 2, batch: 10193, sum loss: 4491.375977, avg loss: 2.695904, ppl: 14.818906 +epoch: 2, batch: 10194, sum loss: 3725.827637, avg loss: 2.419369, ppl: 11.238760 +epoch: 2, batch: 10195, sum loss: 4415.742188, avg loss: 2.500420, ppl: 12.187613 +epoch: 2, batch: 10196, sum loss: 3847.808594, avg loss: 2.392916, ppl: 10.945361 +epoch: 2, batch: 10197, sum loss: 4509.187012, avg loss: 2.460004, ppl: 11.704857 +epoch: 2, batch: 10198, sum loss: 4696.282227, avg loss: 2.662292, ppl: 14.329087 +epoch: 2, batch: 10199, sum loss: 4525.501953, avg loss: 2.409745, ppl: 11.131127 +epoch: 2, batch: 10200, sum loss: 4972.320801, avg loss: 2.693565, ppl: 14.784286 +epoch: 2, batch: 10201, sum loss: 4502.507812, avg loss: 2.696112, ppl: 14.821998 +epoch: 2, batch: 10202, sum loss: 4336.083008, avg loss: 2.835895, ppl: 17.045647 +epoch: 2, batch: 10203, sum loss: 4816.165527, avg loss: 2.453472, ppl: 11.628653 +epoch: 2, batch: 10204, sum loss: 4260.992676, avg loss: 2.625381, ppl: 13.809828 +epoch: 2, batch: 10205, sum loss: 4932.152344, avg loss: 2.617915, ppl: 13.707120 +epoch: 2, batch: 10206, sum loss: 4666.883301, avg loss: 2.572703, ppl: 13.101191 +epoch: 2, batch: 10207, sum loss: 4734.813477, avg loss: 2.621713, ppl: 13.759272 +epoch: 2, batch: 10208, sum loss: 4037.308350, avg loss: 2.466285, ppl: 11.778607 +epoch: 2, batch: 10209, sum loss: 4334.083008, avg loss: 2.454180, ppl: 11.636893 +epoch: 2, batch: 10210, sum loss: 3581.305176, avg loss: 2.330062, ppl: 10.278578 +epoch: 2, batch: 10211, sum loss: 4076.983887, avg loss: 2.406720, ppl: 11.097504 +epoch: 2, batch: 10212, sum loss: 3421.780029, avg loss: 2.419929, ppl: 11.245064 +epoch: 2, batch: 10213, sum loss: 4177.488281, avg loss: 2.466050, ppl: 11.775836 +epoch: 2, batch: 10214, sum loss: 4206.415527, avg loss: 2.568019, ppl: 13.039968 +epoch: 2, batch: 10215, sum loss: 3117.018311, avg loss: 2.169115, ppl: 8.750537 +epoch: 2, batch: 10216, sum loss: 4086.683594, avg loss: 2.352725, ppl: 10.514185 +epoch: 2, batch: 10217, sum loss: 4506.017090, avg loss: 2.459616, ppl: 11.700323 +epoch: 2, batch: 10218, sum loss: 4454.339844, avg loss: 2.638827, ppl: 13.996777 +epoch: 2, batch: 10219, sum loss: 4431.051270, avg loss: 2.562783, ppl: 12.971865 +epoch: 2, batch: 10220, sum loss: 4557.291992, avg loss: 2.646511, ppl: 14.104742 +epoch: 2, batch: 10221, sum loss: 3943.669189, avg loss: 2.412030, ppl: 11.156586 +epoch: 2, batch: 10222, sum loss: 4278.757812, avg loss: 2.566741, ppl: 13.023315 +epoch: 2, batch: 10223, sum loss: 4328.500000, avg loss: 2.658784, ppl: 14.278911 +epoch: 2, batch: 10224, sum loss: 4244.483887, avg loss: 2.558459, ppl: 12.915902 +epoch: 2, batch: 10225, sum loss: 4979.635254, avg loss: 2.554970, ppl: 12.870908 +epoch: 2, batch: 10226, sum loss: 3261.996826, avg loss: 2.067172, ppl: 7.902440 +epoch: 2, batch: 10227, sum loss: 3852.707275, avg loss: 2.268968, ppl: 9.669415 +epoch: 2, batch: 10228, sum loss: 3773.373291, avg loss: 2.397315, ppl: 10.993614 +epoch: 2, batch: 10229, sum loss: 4769.895508, avg loss: 2.746054, ppl: 15.581027 +epoch: 2, batch: 10230, sum loss: 4572.142578, avg loss: 2.627668, ppl: 13.841455 +epoch: 2, batch: 10231, sum loss: 4349.924805, avg loss: 2.580027, ppl: 13.197490 +epoch: 2, batch: 10232, sum loss: 3457.666504, avg loss: 2.174633, ppl: 8.798956 +epoch: 2, batch: 10233, sum loss: 5075.451172, avg loss: 2.540266, ppl: 12.683042 +epoch: 2, batch: 10234, sum loss: 4263.627441, avg loss: 2.502129, ppl: 12.208456 +epoch: 2, batch: 10235, sum loss: 3979.210205, avg loss: 2.351779, ppl: 10.504240 +epoch: 2, batch: 10236, sum loss: 4843.196289, avg loss: 2.658176, ppl: 14.270232 +epoch: 2, batch: 10237, sum loss: 4749.262695, avg loss: 2.498297, ppl: 12.161764 +epoch: 2, batch: 10238, sum loss: 4147.333496, avg loss: 2.171379, ppl: 8.770369 +epoch: 2, batch: 10239, sum loss: 4325.986816, avg loss: 2.479075, ppl: 11.930229 +epoch: 2, batch: 10240, sum loss: 4578.939453, avg loss: 2.585511, ppl: 13.270064 +epoch: 2, batch: 10241, sum loss: 4539.972168, avg loss: 2.527824, ppl: 12.526221 +epoch: 2, batch: 10242, sum loss: 3866.265137, avg loss: 2.373398, ppl: 10.733802 +epoch: 2, batch: 10243, sum loss: 5086.749023, avg loss: 2.743662, ppl: 15.543797 +epoch: 2, batch: 10244, sum loss: 4459.631348, avg loss: 2.638835, ppl: 13.996890 +epoch: 2, batch: 10245, sum loss: 4682.020508, avg loss: 2.676970, ppl: 14.540967 +epoch: 2, batch: 10246, sum loss: 4365.344238, avg loss: 2.315833, ppl: 10.133357 +epoch: 2, batch: 10247, sum loss: 4416.176758, avg loss: 2.490793, ppl: 12.070850 +epoch: 2, batch: 10248, sum loss: 5068.735352, avg loss: 2.635848, ppl: 13.955139 +epoch: 2, batch: 10249, sum loss: 4761.878906, avg loss: 2.644019, ppl: 14.069641 +epoch: 2, batch: 10250, sum loss: 3730.888672, avg loss: 2.194641, ppl: 8.976774 +epoch: 2, batch: 10251, sum loss: 3620.493896, avg loss: 2.367884, ppl: 10.674777 +epoch: 2, batch: 10252, sum loss: 4022.934570, avg loss: 2.336199, ppl: 10.341850 +epoch: 2, batch: 10253, sum loss: 3724.833740, avg loss: 2.455395, ppl: 11.651029 +epoch: 2, batch: 10254, sum loss: 4304.677734, avg loss: 2.579196, ppl: 13.186532 +epoch: 2, batch: 10255, sum loss: 4895.630371, avg loss: 2.584810, ppl: 13.260773 +epoch: 2, batch: 10256, sum loss: 4148.496582, avg loss: 2.431710, ppl: 11.378317 +epoch: 2, batch: 10257, sum loss: 3323.999023, avg loss: 2.264304, ppl: 9.624428 +epoch: 2, batch: 10258, sum loss: 4571.650879, avg loss: 2.572679, ppl: 13.100875 +epoch: 2, batch: 10259, sum loss: 3876.150146, avg loss: 2.484712, ppl: 11.997661 +epoch: 2, batch: 10260, sum loss: 5063.706055, avg loss: 2.575639, ppl: 13.139710 +epoch: 2, batch: 10261, sum loss: 4447.702637, avg loss: 2.625562, ppl: 13.812337 +epoch: 2, batch: 10262, sum loss: 4947.677246, avg loss: 2.657184, ppl: 14.256092 +epoch: 2, batch: 10263, sum loss: 3533.086670, avg loss: 2.385609, ppl: 10.865677 +epoch: 2, batch: 10264, sum loss: 4590.413574, avg loss: 2.504317, ppl: 12.235203 +epoch: 2, batch: 10265, sum loss: 4756.397949, avg loss: 2.804480, ppl: 16.518486 +epoch: 2, batch: 10266, sum loss: 3777.112793, avg loss: 2.279489, ppl: 9.771682 +epoch: 2, batch: 10267, sum loss: 3849.891602, avg loss: 2.428954, ppl: 11.347003 +epoch: 2, batch: 10268, sum loss: 4642.001465, avg loss: 2.910346, ppl: 18.363148 +epoch: 2, batch: 10269, sum loss: 4112.568359, avg loss: 2.636262, ppl: 13.960919 +epoch: 2, batch: 10270, sum loss: 5229.944336, avg loss: 2.613665, ppl: 13.648988 +epoch: 2, batch: 10271, sum loss: 4618.326660, avg loss: 2.694473, ppl: 14.797719 +epoch: 2, batch: 10272, sum loss: 4512.220703, avg loss: 2.453627, ppl: 11.630458 +epoch: 2, batch: 10273, sum loss: 3466.986328, avg loss: 2.323717, ppl: 10.213572 +epoch: 2, batch: 10274, sum loss: 5101.300781, avg loss: 2.730889, ppl: 15.346525 +epoch: 2, batch: 10275, sum loss: 5132.853516, avg loss: 2.807907, ppl: 16.575188 +epoch: 2, batch: 10276, sum loss: 3733.693359, avg loss: 2.279422, ppl: 9.771031 +epoch: 2, batch: 10277, sum loss: 5101.457520, avg loss: 2.778572, ppl: 16.096012 +epoch: 2, batch: 10278, sum loss: 3900.689697, avg loss: 2.486099, ppl: 12.014319 +epoch: 2, batch: 10279, sum loss: 3090.375488, avg loss: 2.137189, ppl: 8.475580 +epoch: 2, batch: 10280, sum loss: 4045.003906, avg loss: 2.472496, ppl: 11.851995 +epoch: 2, batch: 10281, sum loss: 3720.888916, avg loss: 2.275773, ppl: 9.735442 +epoch: 2, batch: 10282, sum loss: 4562.861328, avg loss: 2.685616, ppl: 14.667233 +epoch: 2, batch: 10283, sum loss: 4546.270508, avg loss: 2.502075, ppl: 12.207801 +epoch: 2, batch: 10284, sum loss: 3039.302979, avg loss: 2.158596, ppl: 8.658972 +epoch: 2, batch: 10285, sum loss: 3787.089111, avg loss: 2.548512, ppl: 12.788064 +epoch: 2, batch: 10286, sum loss: 4348.468750, avg loss: 2.451222, ppl: 11.602521 +epoch: 2, batch: 10287, sum loss: 4930.774902, avg loss: 2.635369, ppl: 13.948456 +epoch: 2, batch: 10288, sum loss: 3971.897949, avg loss: 2.433761, ppl: 11.401685 +epoch: 2, batch: 10289, sum loss: 4094.183838, avg loss: 2.445749, ppl: 11.539192 +epoch: 2, batch: 10290, sum loss: 4566.890137, avg loss: 2.653626, ppl: 14.205450 +epoch: 2, batch: 10291, sum loss: 4275.823730, avg loss: 2.629658, ppl: 13.869022 +epoch: 2, batch: 10292, sum loss: 5115.164062, avg loss: 2.685126, ppl: 14.660043 +epoch: 2, batch: 10293, sum loss: 4261.690430, avg loss: 2.526195, ppl: 12.505829 +epoch: 2, batch: 10294, sum loss: 4605.788086, avg loss: 2.640934, ppl: 14.026291 +epoch: 2, batch: 10295, sum loss: 3921.679932, avg loss: 2.368164, ppl: 10.677771 +epoch: 2, batch: 10296, sum loss: 4755.413086, avg loss: 2.668582, ppl: 14.419511 +epoch: 2, batch: 10297, sum loss: 4529.462402, avg loss: 2.750129, ppl: 15.644646 +epoch: 2, batch: 10298, sum loss: 3726.362793, avg loss: 2.378024, ppl: 10.783569 +epoch: 2, batch: 10299, sum loss: 4396.469727, avg loss: 2.431676, ppl: 11.377932 +epoch: 2, batch: 10300, sum loss: 3499.213379, avg loss: 2.243086, ppl: 9.422360 +epoch: 2, batch: 10301, sum loss: 4164.520996, avg loss: 2.564360, ppl: 12.992342 +epoch: 2, batch: 10302, sum loss: 4815.872070, avg loss: 2.505657, ppl: 12.251602 +epoch: 2, batch: 10303, sum loss: 3517.709229, avg loss: 2.463382, ppl: 11.744461 +epoch: 2, batch: 10304, sum loss: 4209.563477, avg loss: 2.440327, ppl: 11.476789 +epoch: 2, batch: 10305, sum loss: 4137.715820, avg loss: 2.419717, ppl: 11.242676 +epoch: 2, batch: 10306, sum loss: 4718.975098, avg loss: 2.532998, ppl: 12.591196 +epoch: 2, batch: 10307, sum loss: 3840.281982, avg loss: 2.495310, ppl: 12.125490 +epoch: 2, batch: 10308, sum loss: 4478.048828, avg loss: 2.578036, ppl: 13.171248 +epoch: 2, batch: 10309, sum loss: 4790.216309, avg loss: 2.549344, ppl: 12.798699 +epoch: 2, batch: 10310, sum loss: 4890.941406, avg loss: 2.838619, ppl: 17.092154 +epoch: 2, batch: 10311, sum loss: 3624.391602, avg loss: 2.238661, ppl: 9.380760 +epoch: 2, batch: 10312, sum loss: 5108.469727, avg loss: 2.899245, ppl: 18.160435 +epoch: 2, batch: 10313, sum loss: 5039.515625, avg loss: 2.616571, ppl: 13.688704 +epoch: 2, batch: 10314, sum loss: 4250.877441, avg loss: 2.396210, ppl: 10.981483 +epoch: 2, batch: 10315, sum loss: 3998.859131, avg loss: 2.526127, ppl: 12.504979 +epoch: 2, batch: 10316, sum loss: 4639.609375, avg loss: 2.457420, ppl: 11.674653 +epoch: 2, batch: 10317, sum loss: 3753.593750, avg loss: 2.328532, ppl: 10.262864 +epoch: 2, batch: 10318, sum loss: 4059.260254, avg loss: 2.641028, ppl: 14.027619 +epoch: 2, batch: 10319, sum loss: 4515.866211, avg loss: 2.592346, ppl: 13.361076 +epoch: 2, batch: 10320, sum loss: 4398.870117, avg loss: 2.828856, ppl: 16.926079 +epoch: 2, batch: 10321, sum loss: 4241.409668, avg loss: 2.595722, ppl: 13.406262 +epoch: 2, batch: 10322, sum loss: 4019.403564, avg loss: 2.228051, ppl: 9.281755 +epoch: 2, batch: 10323, sum loss: 5286.587402, avg loss: 2.729265, ppl: 15.321629 +epoch: 2, batch: 10324, sum loss: 3968.181152, avg loss: 2.348036, ppl: 10.464996 +epoch: 2, batch: 10325, sum loss: 4348.722656, avg loss: 2.484984, ppl: 12.000933 +epoch: 2, batch: 10326, sum loss: 4750.282227, avg loss: 2.573284, ppl: 13.108802 +epoch: 2, batch: 10327, sum loss: 4579.450684, avg loss: 2.385131, ppl: 10.860481 +epoch: 2, batch: 10328, sum loss: 4459.038086, avg loss: 2.613739, ppl: 13.649986 +epoch: 2, batch: 10329, sum loss: 5728.080078, avg loss: 2.809260, ppl: 16.597626 +epoch: 2, batch: 10330, sum loss: 2929.441406, avg loss: 2.132053, ppl: 8.432163 +epoch: 2, batch: 10331, sum loss: 4862.019531, avg loss: 2.789455, ppl: 16.272144 +epoch: 2, batch: 10332, sum loss: 3722.316895, avg loss: 2.434478, ppl: 11.409862 +epoch: 2, batch: 10333, sum loss: 3230.192627, avg loss: 2.058759, ppl: 7.836237 +epoch: 2, batch: 10334, sum loss: 4457.191406, avg loss: 2.628061, ppl: 13.846895 +epoch: 2, batch: 10335, sum loss: 4164.400391, avg loss: 2.436747, ppl: 11.435778 +epoch: 2, batch: 10336, sum loss: 4168.611328, avg loss: 2.651788, ppl: 14.179376 +epoch: 2, batch: 10337, sum loss: 3106.303711, avg loss: 2.038257, ppl: 7.677217 +epoch: 2, batch: 10338, sum loss: 5048.014160, avg loss: 2.731609, ppl: 15.357583 +epoch: 2, batch: 10339, sum loss: 4246.837891, avg loss: 2.664265, ppl: 14.357388 +epoch: 2, batch: 10340, sum loss: 3943.625000, avg loss: 2.455557, ppl: 11.652924 +epoch: 2, batch: 10341, sum loss: 4035.876221, avg loss: 2.196993, ppl: 8.997917 +epoch: 2, batch: 10342, sum loss: 4874.180664, avg loss: 2.746017, ppl: 15.580455 +epoch: 2, batch: 10343, sum loss: 4175.765137, avg loss: 2.718597, ppl: 15.159041 +epoch: 2, batch: 10344, sum loss: 5187.304688, avg loss: 2.682164, ppl: 14.616686 +epoch: 2, batch: 10345, sum loss: 3839.615234, avg loss: 2.362840, ppl: 10.621075 +epoch: 2, batch: 10346, sum loss: 3759.890381, avg loss: 2.491644, ppl: 12.081120 +epoch: 2, batch: 10347, sum loss: 3696.713379, avg loss: 2.444916, ppl: 11.529587 +epoch: 2, batch: 10348, sum loss: 3786.392578, avg loss: 2.376894, ppl: 10.771398 +epoch: 2, batch: 10349, sum loss: 4545.120605, avg loss: 2.572225, ppl: 13.094923 +epoch: 2, batch: 10350, sum loss: 4356.366211, avg loss: 2.446023, ppl: 11.542349 +epoch: 2, batch: 10351, sum loss: 3640.843750, avg loss: 2.289839, ppl: 9.873346 +epoch: 2, batch: 10352, sum loss: 2990.240967, avg loss: 2.099888, ppl: 8.165258 +epoch: 2, batch: 10353, sum loss: 3794.096680, avg loss: 2.507665, ppl: 12.276228 +epoch: 2, batch: 10354, sum loss: 4314.331543, avg loss: 2.696457, ppl: 14.827108 +epoch: 2, batch: 10355, sum loss: 4504.806152, avg loss: 2.583031, ppl: 13.237199 +epoch: 2, batch: 10356, sum loss: 4553.355957, avg loss: 2.672157, ppl: 14.471154 +epoch: 2, batch: 10357, sum loss: 5042.473633, avg loss: 2.947092, ppl: 19.050470 +epoch: 2, batch: 10358, sum loss: 3846.187988, avg loss: 2.326792, ppl: 10.245028 +epoch: 2, batch: 10359, sum loss: 4255.009766, avg loss: 2.446814, ppl: 11.551489 +epoch: 2, batch: 10360, sum loss: 3397.553223, avg loss: 2.271092, ppl: 9.689974 +epoch: 2, batch: 10361, sum loss: 4518.540039, avg loss: 2.376928, ppl: 10.771760 +epoch: 2, batch: 10362, sum loss: 4080.228516, avg loss: 2.313055, ppl: 10.105247 +epoch: 2, batch: 10363, sum loss: 3849.752686, avg loss: 2.573364, ppl: 13.109856 +epoch: 2, batch: 10364, sum loss: 4836.452637, avg loss: 2.870298, ppl: 17.642282 +epoch: 2, batch: 10365, sum loss: 3586.036865, avg loss: 2.231510, ppl: 9.313921 +epoch: 2, batch: 10366, sum loss: 4761.720215, avg loss: 2.871966, ppl: 17.671734 +epoch: 2, batch: 10367, sum loss: 4090.971191, avg loss: 2.440914, ppl: 11.483528 +epoch: 2, batch: 10368, sum loss: 3503.049316, avg loss: 2.429299, ppl: 11.350924 +epoch: 2, batch: 10369, sum loss: 4306.580566, avg loss: 2.789236, ppl: 16.268591 +epoch: 2, batch: 10370, sum loss: 6221.598145, avg loss: 2.742000, ppl: 15.517992 +epoch: 2, batch: 10371, sum loss: 4413.164062, avg loss: 2.838048, ppl: 17.082384 +epoch: 2, batch: 10372, sum loss: 4431.775391, avg loss: 2.558762, ppl: 12.919810 +epoch: 2, batch: 10373, sum loss: 4233.608398, avg loss: 2.464266, ppl: 11.754846 +epoch: 2, batch: 10374, sum loss: 4421.432617, avg loss: 2.548376, ppl: 12.786323 +epoch: 2, batch: 10375, sum loss: 3992.115479, avg loss: 2.456686, ppl: 11.666092 +epoch: 2, batch: 10376, sum loss: 3481.198242, avg loss: 2.266405, ppl: 9.644667 +epoch: 2, batch: 10377, sum loss: 2803.554932, avg loss: 2.096900, ppl: 8.140892 +epoch: 2, batch: 10378, sum loss: 3693.331543, avg loss: 2.349447, ppl: 10.479778 +epoch: 2, batch: 10379, sum loss: 3368.903564, avg loss: 2.126833, ppl: 8.388259 +epoch: 2, batch: 10380, sum loss: 3301.624512, avg loss: 2.366756, ppl: 10.662745 +epoch: 2, batch: 10381, sum loss: 4822.978516, avg loss: 2.392351, ppl: 10.939178 +epoch: 2, batch: 10382, sum loss: 5303.732910, avg loss: 2.773919, ppl: 16.021296 +epoch: 2, batch: 10383, sum loss: 3840.355469, avg loss: 2.461766, ppl: 11.725506 +epoch: 2, batch: 10384, sum loss: 4222.308105, avg loss: 2.708344, ppl: 15.004407 +epoch: 2, batch: 10385, sum loss: 5213.858398, avg loss: 3.086950, ppl: 21.910151 +epoch: 2, batch: 10386, sum loss: 4520.392578, avg loss: 2.654370, ppl: 14.216031 +epoch: 2, batch: 10387, sum loss: 3393.211670, avg loss: 2.432410, ppl: 11.386290 +epoch: 2, batch: 10388, sum loss: 4030.186035, avg loss: 2.423443, ppl: 11.284649 +epoch: 2, batch: 10389, sum loss: 3878.261963, avg loss: 2.383689, ppl: 10.844835 +epoch: 2, batch: 10390, sum loss: 3915.163086, avg loss: 2.296283, ppl: 9.937182 +epoch: 2, batch: 10391, sum loss: 3923.167480, avg loss: 2.257288, ppl: 9.557139 +epoch: 2, batch: 10392, sum loss: 4455.764648, avg loss: 2.441515, ppl: 11.490436 +epoch: 2, batch: 10393, sum loss: 3886.620605, avg loss: 2.402114, ppl: 11.046506 +epoch: 2, batch: 10394, sum loss: 4501.967285, avg loss: 2.543484, ppl: 12.723929 +epoch: 2, batch: 10395, sum loss: 3950.568359, avg loss: 2.514684, ppl: 12.362701 +epoch: 2, batch: 10396, sum loss: 4602.061035, avg loss: 2.760684, ppl: 15.810661 +epoch: 2, batch: 10397, sum loss: 4400.225586, avg loss: 2.436448, ppl: 11.432364 +epoch: 2, batch: 10398, sum loss: 4107.656250, avg loss: 2.404951, ppl: 11.077886 +epoch: 2, batch: 10399, sum loss: 4290.372070, avg loss: 2.567548, ppl: 13.033824 +epoch: 2, batch: 10400, sum loss: 3734.961426, avg loss: 2.423726, ppl: 11.287838 +epoch: 2, batch: 10401, sum loss: 4844.179688, avg loss: 2.726044, ppl: 15.272346 +epoch: 2, batch: 10402, sum loss: 3464.926270, avg loss: 2.464386, ppl: 11.756259 +epoch: 2, batch: 10403, sum loss: 4617.540527, avg loss: 2.622113, ppl: 13.764774 +epoch: 2, batch: 10404, sum loss: 5425.976562, avg loss: 2.726621, ppl: 15.281171 +epoch: 2, batch: 10405, sum loss: 4114.993652, avg loss: 2.584795, ppl: 13.260571 +epoch: 2, batch: 10406, sum loss: 5177.302734, avg loss: 2.732086, ppl: 15.364904 +epoch: 2, batch: 10407, sum loss: 3897.933838, avg loss: 2.511555, ppl: 12.324084 +epoch: 2, batch: 10408, sum loss: 5204.348145, avg loss: 2.668896, ppl: 14.424043 +epoch: 2, batch: 10409, sum loss: 3880.287598, avg loss: 2.564632, ppl: 12.995872 +epoch: 2, batch: 10410, sum loss: 4177.755859, avg loss: 2.721665, ppl: 15.205617 +epoch: 2, batch: 10411, sum loss: 3471.756104, avg loss: 2.368183, ppl: 10.677972 +epoch: 2, batch: 10412, sum loss: 4598.056641, avg loss: 2.714319, ppl: 15.094327 +epoch: 2, batch: 10413, sum loss: 4116.084473, avg loss: 2.421226, ppl: 11.259658 +epoch: 2, batch: 10414, sum loss: 4011.882324, avg loss: 2.366892, ppl: 10.664198 +epoch: 2, batch: 10415, sum loss: 4669.392578, avg loss: 2.503696, ppl: 12.227600 +epoch: 2, batch: 10416, sum loss: 4681.711914, avg loss: 2.742655, ppl: 15.528158 +epoch: 2, batch: 10417, sum loss: 3954.529297, avg loss: 2.393783, ppl: 10.954856 +epoch: 2, batch: 10418, sum loss: 3654.481934, avg loss: 2.569959, ppl: 13.065290 +epoch: 2, batch: 10419, sum loss: 3972.663574, avg loss: 2.345138, ppl: 10.434711 +epoch: 2, batch: 10420, sum loss: 4036.179199, avg loss: 2.615800, ppl: 13.678153 +epoch: 2, batch: 10421, sum loss: 3402.627930, avg loss: 2.282111, ppl: 9.797345 +epoch: 2, batch: 10422, sum loss: 4913.655273, avg loss: 2.682126, ppl: 14.616138 +epoch: 2, batch: 10423, sum loss: 3990.781006, avg loss: 2.433403, ppl: 11.397602 +epoch: 2, batch: 10424, sum loss: 4424.803711, avg loss: 2.707958, ppl: 14.998620 +epoch: 2, batch: 10425, sum loss: 4140.571777, avg loss: 2.397552, ppl: 10.996222 +epoch: 2, batch: 10426, sum loss: 4567.801758, avg loss: 2.534851, ppl: 12.614555 +epoch: 2, batch: 10427, sum loss: 3931.925537, avg loss: 2.571567, ppl: 13.086312 +epoch: 2, batch: 10428, sum loss: 3957.969971, avg loss: 2.590295, ppl: 13.333699 +epoch: 2, batch: 10429, sum loss: 4379.960449, avg loss: 2.740901, ppl: 15.500953 +epoch: 2, batch: 10430, sum loss: 4418.820312, avg loss: 2.500747, ppl: 12.191600 +epoch: 2, batch: 10431, sum loss: 4644.876465, avg loss: 2.700510, ppl: 14.887316 +epoch: 2, batch: 10432, sum loss: 3941.267578, avg loss: 2.521604, ppl: 12.448552 +epoch: 2, batch: 10433, sum loss: 3524.197998, avg loss: 2.415489, ppl: 11.195240 +epoch: 2, batch: 10434, sum loss: 3983.311035, avg loss: 2.436276, ppl: 11.430391 +epoch: 2, batch: 10435, sum loss: 4483.350098, avg loss: 2.651301, ppl: 14.172467 +epoch: 2, batch: 10436, sum loss: 4542.889648, avg loss: 2.552185, ppl: 12.835122 +epoch: 2, batch: 10437, sum loss: 4555.291504, avg loss: 2.483801, ppl: 11.986741 +epoch: 2, batch: 10438, sum loss: 4609.469238, avg loss: 2.824430, ppl: 16.851334 +epoch: 2, batch: 10439, sum loss: 4811.345703, avg loss: 2.519029, ppl: 12.416536 +epoch: 2, batch: 10440, sum loss: 3851.127441, avg loss: 2.381650, ppl: 10.822742 +epoch: 2, batch: 10441, sum loss: 4952.005371, avg loss: 2.697171, ppl: 14.837692 +epoch: 2, batch: 10442, sum loss: 4476.268066, avg loss: 2.509119, ppl: 12.294095 +epoch: 2, batch: 10443, sum loss: 3329.879883, avg loss: 2.385301, ppl: 10.862330 +epoch: 2, batch: 10444, sum loss: 4129.876953, avg loss: 2.512091, ppl: 12.330683 +epoch: 2, batch: 10445, sum loss: 5243.130371, avg loss: 2.615027, ppl: 13.667591 +epoch: 2, batch: 10446, sum loss: 4720.487305, avg loss: 2.634201, ppl: 13.932170 +epoch: 2, batch: 10447, sum loss: 4203.852051, avg loss: 2.503783, ppl: 12.228670 +epoch: 2, batch: 10448, sum loss: 5028.089844, avg loss: 2.589130, ppl: 13.318175 +epoch: 2, batch: 10449, sum loss: 4115.011230, avg loss: 2.366309, ppl: 10.657980 +epoch: 2, batch: 10450, sum loss: 3967.063965, avg loss: 2.344600, ppl: 10.429105 +epoch: 2, batch: 10451, sum loss: 4698.015625, avg loss: 2.742566, ppl: 15.526773 +epoch: 2, batch: 10452, sum loss: 3893.405273, avg loss: 2.681409, ppl: 14.605653 +epoch: 2, batch: 10453, sum loss: 3510.411133, avg loss: 2.191268, ppl: 8.946548 +epoch: 2, batch: 10454, sum loss: 5157.020508, avg loss: 2.631133, ppl: 13.889496 +epoch: 2, batch: 10455, sum loss: 4276.767090, avg loss: 2.469265, ppl: 11.813761 +epoch: 2, batch: 10456, sum loss: 4054.314453, avg loss: 2.454186, ppl: 11.636954 +epoch: 2, batch: 10457, sum loss: 5182.126465, avg loss: 2.713155, ppl: 15.076772 +epoch: 2, batch: 10458, sum loss: 4292.187500, avg loss: 2.596605, ppl: 13.418100 +epoch: 2, batch: 10459, sum loss: 4458.846191, avg loss: 2.730463, ppl: 15.339988 +epoch: 2, batch: 10460, sum loss: 3871.268066, avg loss: 2.424088, ppl: 11.291924 +epoch: 2, batch: 10461, sum loss: 3740.792969, avg loss: 2.482278, ppl: 11.968499 +epoch: 2, batch: 10462, sum loss: 4141.875000, avg loss: 2.379021, ppl: 10.794327 +epoch: 2, batch: 10463, sum loss: 5214.283691, avg loss: 2.909757, ppl: 18.352333 +epoch: 2, batch: 10464, sum loss: 5169.984375, avg loss: 2.796098, ppl: 16.380596 +epoch: 2, batch: 10465, sum loss: 4161.898438, avg loss: 2.436709, ppl: 11.435342 +epoch: 2, batch: 10466, sum loss: 5033.906738, avg loss: 2.641084, ppl: 14.028405 +epoch: 2, batch: 10467, sum loss: 4193.082031, avg loss: 2.614141, ppl: 13.655481 +epoch: 2, batch: 10468, sum loss: 3783.833008, avg loss: 2.455440, ppl: 11.651560 +epoch: 2, batch: 10469, sum loss: 4501.329590, avg loss: 2.560483, ppl: 12.942070 +epoch: 2, batch: 10470, sum loss: 3771.811523, avg loss: 2.382698, ppl: 10.834097 +epoch: 2, batch: 10471, sum loss: 4309.468750, avg loss: 2.701861, ppl: 14.907454 +epoch: 2, batch: 10472, sum loss: 4440.252930, avg loss: 2.678078, ppl: 14.557083 +epoch: 2, batch: 10473, sum loss: 4089.823975, avg loss: 2.591777, ppl: 13.353477 +epoch: 2, batch: 10474, sum loss: 4791.755859, avg loss: 2.784286, ppl: 16.188251 +epoch: 2, batch: 10475, sum loss: 3640.564941, avg loss: 2.468180, ppl: 11.800946 +epoch: 2, batch: 10476, sum loss: 3947.610596, avg loss: 2.524048, ppl: 12.479005 +epoch: 2, batch: 10477, sum loss: 4721.220215, avg loss: 2.700927, ppl: 14.893532 +epoch: 2, batch: 10478, sum loss: 4497.545898, avg loss: 2.518223, ppl: 12.406528 +epoch: 2, batch: 10479, sum loss: 4016.514648, avg loss: 2.321685, ppl: 10.192831 +epoch: 2, batch: 10480, sum loss: 4015.086914, avg loss: 2.692882, ppl: 14.774195 +epoch: 2, batch: 10481, sum loss: 3798.594727, avg loss: 2.589362, ppl: 13.321275 +epoch: 2, batch: 10482, sum loss: 5427.884277, avg loss: 2.967679, ppl: 19.446728 +epoch: 2, batch: 10483, sum loss: 3682.050293, avg loss: 2.373985, ppl: 10.740102 +epoch: 2, batch: 10484, sum loss: 3322.312012, avg loss: 2.201665, ppl: 9.040050 +epoch: 2, batch: 10485, sum loss: 4509.238770, avg loss: 2.724616, ppl: 15.250550 +epoch: 2, batch: 10486, sum loss: 4405.731445, avg loss: 2.719587, ppl: 15.174059 +epoch: 2, batch: 10487, sum loss: 3843.507812, avg loss: 2.216556, ppl: 9.175673 +epoch: 2, batch: 10488, sum loss: 4092.956055, avg loss: 2.295545, ppl: 9.929843 +epoch: 2, batch: 10489, sum loss: 4213.161621, avg loss: 2.623388, ppl: 13.782343 +epoch: 2, batch: 10490, sum loss: 5369.763184, avg loss: 2.782261, ppl: 16.155506 +epoch: 2, batch: 10491, sum loss: 4936.252930, avg loss: 2.652473, ppl: 14.189092 +epoch: 2, batch: 10492, sum loss: 4375.241211, avg loss: 2.458001, ppl: 11.681433 +epoch: 2, batch: 10493, sum loss: 3911.533203, avg loss: 2.401187, ppl: 11.036265 +epoch: 2, batch: 10494, sum loss: 4381.773438, avg loss: 2.491060, ppl: 12.074062 +epoch: 2, batch: 10495, sum loss: 3967.917969, avg loss: 2.374577, ppl: 10.746467 +epoch: 2, batch: 10496, sum loss: 4063.637207, avg loss: 2.583368, ppl: 13.241655 +epoch: 2, batch: 10497, sum loss: 4952.338379, avg loss: 2.651145, ppl: 14.170251 +epoch: 2, batch: 10498, sum loss: 3684.833984, avg loss: 2.557137, ppl: 12.898832 +epoch: 2, batch: 10499, sum loss: 3721.432617, avg loss: 2.362814, ppl: 10.620798 +epoch: 2, batch: 10500, sum loss: 4096.433105, avg loss: 2.344839, ppl: 10.431589 +epoch: 2, batch: 10501, sum loss: 4723.482910, avg loss: 2.456309, ppl: 11.661693 +epoch: 2, batch: 10502, sum loss: 4648.299316, avg loss: 2.747222, ppl: 15.599233 +epoch: 2, batch: 10503, sum loss: 3862.711426, avg loss: 2.423282, ppl: 11.282827 +epoch: 2, batch: 10504, sum loss: 4159.144043, avg loss: 2.573728, ppl: 13.114619 +epoch: 2, batch: 10505, sum loss: 4389.506348, avg loss: 2.582063, ppl: 13.224388 +epoch: 2, batch: 10506, sum loss: 3707.068359, avg loss: 2.426092, ppl: 11.314577 +epoch: 2, batch: 10507, sum loss: 4001.279785, avg loss: 2.439805, ppl: 11.470801 +epoch: 2, batch: 10508, sum loss: 4005.064209, avg loss: 2.517325, ppl: 12.395396 +epoch: 2, batch: 10509, sum loss: 4456.636230, avg loss: 2.648031, ppl: 14.126197 +epoch: 2, batch: 10510, sum loss: 4708.697266, avg loss: 2.595754, ppl: 13.406691 +epoch: 2, batch: 10511, sum loss: 3719.730713, avg loss: 2.509940, ppl: 12.304191 +epoch: 2, batch: 10512, sum loss: 3980.206787, avg loss: 2.349591, ppl: 10.481279 +epoch: 2, batch: 10513, sum loss: 3900.876465, avg loss: 2.484635, ppl: 11.996737 +epoch: 2, batch: 10514, sum loss: 4885.392578, avg loss: 2.639326, ppl: 14.003763 +epoch: 2, batch: 10515, sum loss: 3071.981445, avg loss: 2.341449, ppl: 10.396293 +epoch: 2, batch: 10516, sum loss: 4176.133789, avg loss: 2.611716, ppl: 13.622404 +epoch: 2, batch: 10517, sum loss: 4201.823242, avg loss: 2.577806, ppl: 13.168212 +epoch: 2, batch: 10518, sum loss: 4911.650391, avg loss: 2.721136, ppl: 15.197575 +epoch: 2, batch: 10519, sum loss: 4287.322754, avg loss: 2.461150, ppl: 11.718276 +epoch: 2, batch: 10520, sum loss: 4126.181641, avg loss: 2.626468, ppl: 13.824860 +epoch: 2, batch: 10521, sum loss: 4474.060547, avg loss: 2.520597, ppl: 12.436025 +epoch: 2, batch: 10522, sum loss: 3862.922852, avg loss: 2.473062, ppl: 11.858703 +epoch: 2, batch: 10523, sum loss: 4609.746582, avg loss: 2.410955, ppl: 11.144601 +epoch: 2, batch: 10524, sum loss: 4577.475586, avg loss: 2.404136, ppl: 11.068868 +epoch: 2, batch: 10525, sum loss: 4343.565918, avg loss: 2.718126, ppl: 15.151905 +epoch: 2, batch: 10526, sum loss: 4349.988281, avg loss: 2.592365, ppl: 13.361331 +epoch: 2, batch: 10527, sum loss: 3743.914795, avg loss: 2.308209, ppl: 10.056395 +epoch: 2, batch: 10528, sum loss: 4170.140625, avg loss: 2.544320, ppl: 12.734567 +epoch: 2, batch: 10529, sum loss: 2713.140869, avg loss: 2.160144, ppl: 8.672387 +epoch: 2, batch: 10530, sum loss: 4281.116699, avg loss: 2.443560, ppl: 11.513956 +epoch: 2, batch: 10531, sum loss: 4334.296875, avg loss: 2.540619, ppl: 12.687527 +epoch: 2, batch: 10532, sum loss: 4467.036133, avg loss: 2.533770, ppl: 12.600921 +epoch: 2, batch: 10533, sum loss: 3540.682861, avg loss: 2.186957, ppl: 8.908061 +epoch: 2, batch: 10534, sum loss: 3862.377930, avg loss: 2.224872, ppl: 9.252299 +epoch: 2, batch: 10535, sum loss: 3990.531738, avg loss: 2.540122, ppl: 12.681221 +epoch: 2, batch: 10536, sum loss: 4295.566406, avg loss: 2.678034, ppl: 14.556448 +epoch: 2, batch: 10537, sum loss: 4172.047363, avg loss: 2.757467, ppl: 15.759873 +epoch: 2, batch: 10538, sum loss: 3689.916260, avg loss: 2.506737, ppl: 12.264841 +epoch: 2, batch: 10539, sum loss: 4546.417969, avg loss: 2.629507, ppl: 13.866936 +epoch: 2, batch: 10540, sum loss: 4667.274902, avg loss: 2.571501, ppl: 13.085458 +epoch: 2, batch: 10541, sum loss: 4037.694092, avg loss: 2.363989, ppl: 10.633286 +epoch: 2, batch: 10542, sum loss: 3141.136230, avg loss: 2.133924, ppl: 8.447951 +epoch: 2, batch: 10543, sum loss: 4901.877441, avg loss: 2.898804, ppl: 18.152426 +epoch: 2, batch: 10544, sum loss: 3937.089355, avg loss: 2.408006, ppl: 11.111779 +epoch: 2, batch: 10545, sum loss: 5266.969238, avg loss: 2.764813, ppl: 15.876074 +epoch: 2, batch: 10546, sum loss: 5801.782715, avg loss: 2.914004, ppl: 18.430452 +epoch: 2, batch: 10547, sum loss: 4772.171875, avg loss: 2.790744, ppl: 16.293135 +epoch: 2, batch: 10548, sum loss: 3720.028564, avg loss: 2.403119, ppl: 11.057615 +epoch: 2, batch: 10549, sum loss: 4847.531250, avg loss: 2.676715, ppl: 14.537266 +epoch: 2, batch: 10550, sum loss: 4745.240234, avg loss: 2.762072, ppl: 15.832620 +epoch: 2, batch: 10551, sum loss: 4104.172852, avg loss: 2.294116, ppl: 9.915662 +epoch: 2, batch: 10552, sum loss: 5268.010254, avg loss: 2.694634, ppl: 14.800107 +epoch: 2, batch: 10553, sum loss: 4100.333984, avg loss: 2.626735, ppl: 13.828552 +epoch: 2, batch: 10554, sum loss: 3898.825684, avg loss: 2.420128, ppl: 11.247297 +epoch: 2, batch: 10555, sum loss: 4402.492188, avg loss: 2.666561, ppl: 14.390397 +epoch: 2, batch: 10556, sum loss: 4349.417480, avg loss: 2.872799, ppl: 17.686453 +epoch: 2, batch: 10557, sum loss: 3854.655273, avg loss: 2.398665, ppl: 11.008475 +epoch: 2, batch: 10558, sum loss: 4227.708984, avg loss: 2.357897, ppl: 10.568700 +epoch: 2, batch: 10559, sum loss: 3576.833740, avg loss: 2.418414, ppl: 11.228033 +epoch: 2, batch: 10560, sum loss: 3649.072021, avg loss: 2.463924, ppl: 11.750836 +epoch: 2, batch: 10561, sum loss: 4267.577148, avg loss: 2.549330, ppl: 12.798520 +epoch: 2, batch: 10562, sum loss: 4231.908203, avg loss: 2.601050, ppl: 13.477881 +epoch: 2, batch: 10563, sum loss: 3800.050293, avg loss: 2.383971, ppl: 10.847897 +epoch: 2, batch: 10564, sum loss: 3477.919189, avg loss: 2.348359, ppl: 10.468373 +epoch: 2, batch: 10565, sum loss: 4867.411133, avg loss: 2.544386, ppl: 12.735411 +epoch: 2, batch: 10566, sum loss: 3701.564453, avg loss: 2.145835, ppl: 8.549174 +epoch: 2, batch: 10567, sum loss: 4441.726562, avg loss: 2.548323, ppl: 12.785640 +epoch: 2, batch: 10568, sum loss: 4841.509277, avg loss: 2.605764, ppl: 13.541566 +epoch: 2, batch: 10569, sum loss: 3468.526855, avg loss: 2.292483, ppl: 9.899488 +epoch: 2, batch: 10570, sum loss: 4533.618652, avg loss: 2.559920, ppl: 12.934783 +epoch: 2, batch: 10571, sum loss: 4834.752441, avg loss: 2.503756, ppl: 12.228335 +epoch: 2, batch: 10572, sum loss: 4661.221680, avg loss: 2.646917, ppl: 14.110474 +epoch: 2, batch: 10573, sum loss: 5525.601562, avg loss: 2.855608, ppl: 17.385004 +epoch: 2, batch: 10574, sum loss: 4512.610352, avg loss: 2.381325, ppl: 10.819226 +epoch: 2, batch: 10575, sum loss: 3119.161133, avg loss: 2.381039, ppl: 10.816136 +epoch: 2, batch: 10576, sum loss: 4735.424316, avg loss: 2.561073, ppl: 12.949706 +epoch: 2, batch: 10577, sum loss: 3704.800781, avg loss: 2.362756, ppl: 10.620175 +epoch: 2, batch: 10578, sum loss: 5095.934570, avg loss: 2.640381, ppl: 14.018538 +epoch: 2, batch: 10579, sum loss: 4188.342773, avg loss: 2.560112, ppl: 12.937263 +epoch: 2, batch: 10580, sum loss: 3532.648926, avg loss: 2.202400, ppl: 9.046697 +epoch: 2, batch: 10581, sum loss: 3910.914307, avg loss: 2.487859, ppl: 12.035480 +epoch: 2, batch: 10582, sum loss: 4491.370605, avg loss: 2.594668, ppl: 13.392142 +epoch: 2, batch: 10583, sum loss: 3985.964844, avg loss: 2.460472, ppl: 11.710341 +epoch: 2, batch: 10584, sum loss: 5377.534668, avg loss: 2.927346, ppl: 18.677998 +epoch: 2, batch: 10585, sum loss: 4919.929688, avg loss: 2.514016, ppl: 12.354448 +epoch: 2, batch: 10586, sum loss: 2930.693604, avg loss: 2.142320, ppl: 8.519178 +epoch: 2, batch: 10587, sum loss: 4352.998047, avg loss: 2.539672, ppl: 12.675514 +epoch: 2, batch: 10588, sum loss: 5167.029297, avg loss: 2.849989, ppl: 17.287586 +epoch: 2, batch: 10589, sum loss: 4614.517578, avg loss: 2.708050, ppl: 15.000001 +epoch: 2, batch: 10590, sum loss: 4352.392090, avg loss: 2.520204, ppl: 12.431130 +epoch: 2, batch: 10591, sum loss: 3648.966797, avg loss: 2.381832, ppl: 10.824717 +epoch: 2, batch: 10592, sum loss: 3798.677246, avg loss: 2.438175, ppl: 11.452127 +epoch: 2, batch: 10593, sum loss: 5287.047363, avg loss: 2.884368, ppl: 17.892263 +epoch: 2, batch: 10594, sum loss: 3378.912109, avg loss: 2.301711, ppl: 9.991266 +epoch: 2, batch: 10595, sum loss: 4429.071289, avg loss: 2.742459, ppl: 15.525115 +epoch: 2, batch: 10596, sum loss: 4232.820312, avg loss: 2.598416, ppl: 13.442432 +epoch: 2, batch: 10597, sum loss: 4154.394531, avg loss: 2.581973, ppl: 13.223203 +epoch: 2, batch: 10598, sum loss: 5504.913086, avg loss: 2.794372, ppl: 16.352358 +epoch: 2, batch: 10599, sum loss: 3913.105957, avg loss: 2.524585, ppl: 12.485706 +epoch: 2, batch: 10600, sum loss: 4597.151855, avg loss: 2.637494, ppl: 13.978128 +epoch: 2, batch: 10601, sum loss: 4761.765137, avg loss: 2.773305, ppl: 16.011467 +epoch: 2, batch: 10602, sum loss: 4293.701660, avg loss: 2.599093, ppl: 13.451534 +epoch: 2, batch: 10603, sum loss: 3421.366455, avg loss: 2.221667, ppl: 9.222689 +epoch: 2, batch: 10604, sum loss: 4184.051270, avg loss: 2.678650, ppl: 14.565415 +epoch: 2, batch: 10605, sum loss: 4482.323730, avg loss: 2.584962, ppl: 13.262780 +epoch: 2, batch: 10606, sum loss: 4298.940430, avg loss: 2.642250, ppl: 14.044766 +epoch: 2, batch: 10607, sum loss: 4379.868164, avg loss: 2.546435, ppl: 12.761526 +epoch: 2, batch: 10608, sum loss: 2962.134521, avg loss: 2.200694, ppl: 9.031281 +epoch: 2, batch: 10609, sum loss: 4311.316895, avg loss: 2.506580, ppl: 12.262915 +epoch: 2, batch: 10610, sum loss: 3907.749023, avg loss: 2.345587, ppl: 10.439404 +epoch: 2, batch: 10611, sum loss: 4061.184570, avg loss: 2.552599, ppl: 12.840429 +epoch: 2, batch: 10612, sum loss: 4138.391602, avg loss: 2.588112, ppl: 13.304633 +epoch: 2, batch: 10613, sum loss: 4443.429688, avg loss: 2.452224, ppl: 11.614145 +epoch: 2, batch: 10614, sum loss: 4764.520508, avg loss: 2.590822, ppl: 13.340727 +epoch: 2, batch: 10615, sum loss: 3895.080566, avg loss: 2.426841, ppl: 11.323062 +epoch: 2, batch: 10616, sum loss: 3728.625488, avg loss: 2.254308, ppl: 9.528699 +epoch: 2, batch: 10617, sum loss: 4930.303711, avg loss: 2.929473, ppl: 18.717772 +epoch: 2, batch: 10618, sum loss: 3896.305664, avg loss: 2.473845, ppl: 11.867989 +epoch: 2, batch: 10619, sum loss: 3866.035889, avg loss: 2.464013, ppl: 11.751873 +epoch: 2, batch: 10620, sum loss: 4204.566406, avg loss: 2.319121, ppl: 10.166735 +epoch: 2, batch: 10621, sum loss: 3414.090332, avg loss: 2.402597, ppl: 11.051840 +epoch: 2, batch: 10622, sum loss: 3937.084473, avg loss: 2.413908, ppl: 11.177561 +epoch: 2, batch: 10623, sum loss: 5091.722656, avg loss: 2.731611, ppl: 15.357605 +epoch: 2, batch: 10624, sum loss: 4978.867676, avg loss: 2.762968, ppl: 15.846804 +epoch: 2, batch: 10625, sum loss: 4270.264160, avg loss: 2.528280, ppl: 12.531927 +epoch: 2, batch: 10626, sum loss: 3187.524902, avg loss: 2.326661, ppl: 10.243677 +epoch: 2, batch: 10627, sum loss: 4045.117188, avg loss: 2.529779, ppl: 12.550735 +epoch: 2, batch: 10628, sum loss: 3847.887939, avg loss: 2.599924, ppl: 13.462719 +epoch: 2, batch: 10629, sum loss: 4933.181152, avg loss: 2.666584, ppl: 14.390734 +epoch: 2, batch: 10630, sum loss: 4307.896484, avg loss: 2.593556, ppl: 13.377255 +epoch: 2, batch: 10631, sum loss: 4121.742676, avg loss: 2.566465, ppl: 13.019717 +epoch: 2, batch: 10632, sum loss: 3773.604248, avg loss: 2.536024, ppl: 12.629361 +epoch: 2, batch: 10633, sum loss: 4413.123047, avg loss: 2.658508, ppl: 14.274972 +epoch: 2, batch: 10634, sum loss: 4108.390625, avg loss: 2.582269, ppl: 13.227122 +epoch: 2, batch: 10635, sum loss: 4904.997559, avg loss: 2.530959, ppl: 12.565546 +epoch: 2, batch: 10636, sum loss: 4402.352539, avg loss: 2.528635, ppl: 12.536377 +epoch: 2, batch: 10637, sum loss: 4745.057129, avg loss: 2.646435, ppl: 14.103662 +epoch: 2, batch: 10638, sum loss: 4598.027344, avg loss: 2.783310, ppl: 16.172459 +epoch: 2, batch: 10639, sum loss: 4973.169922, avg loss: 2.507902, ppl: 12.279143 +epoch: 2, batch: 10640, sum loss: 4176.222656, avg loss: 2.359448, ppl: 10.585107 +epoch: 2, batch: 10641, sum loss: 3760.492188, avg loss: 2.360636, ppl: 10.597684 +epoch: 2, batch: 10642, sum loss: 3952.214844, avg loss: 2.358124, ppl: 10.571096 +epoch: 2, batch: 10643, sum loss: 3628.399658, avg loss: 2.446662, ppl: 11.549728 +epoch: 2, batch: 10644, sum loss: 3394.815918, avg loss: 2.286071, ppl: 9.836218 +epoch: 2, batch: 10645, sum loss: 4621.139648, avg loss: 2.734402, ppl: 15.400534 +epoch: 2, batch: 10646, sum loss: 4060.377930, avg loss: 2.389863, ppl: 10.912004 +epoch: 2, batch: 10647, sum loss: 5054.455078, avg loss: 2.847580, ppl: 17.246000 +epoch: 2, batch: 10648, sum loss: 4798.011719, avg loss: 2.574041, ppl: 13.118726 +epoch: 2, batch: 10649, sum loss: 5488.852051, avg loss: 2.606292, ppl: 13.548725 +epoch: 2, batch: 10650, sum loss: 3959.424316, avg loss: 2.505965, ppl: 12.255377 +epoch: 2, batch: 10651, sum loss: 4324.041504, avg loss: 2.530159, ppl: 12.555502 +epoch: 2, batch: 10652, sum loss: 3776.046875, avg loss: 2.524096, ppl: 12.479603 +epoch: 2, batch: 10653, sum loss: 4024.901855, avg loss: 2.375975, ppl: 10.761502 +epoch: 2, batch: 10654, sum loss: 4311.250000, avg loss: 2.479155, ppl: 11.931176 +epoch: 2, batch: 10655, sum loss: 3852.270264, avg loss: 2.302612, ppl: 10.000272 +epoch: 2, batch: 10656, sum loss: 4147.255371, avg loss: 2.316902, ppl: 10.144203 +epoch: 2, batch: 10657, sum loss: 3566.051514, avg loss: 2.262723, ppl: 9.609220 +epoch: 2, batch: 10658, sum loss: 4376.527344, avg loss: 2.657272, ppl: 14.257343 +epoch: 2, batch: 10659, sum loss: 4614.872070, avg loss: 2.599928, ppl: 13.462764 +epoch: 2, batch: 10660, sum loss: 4218.434082, avg loss: 2.643129, ppl: 14.057121 +epoch: 2, batch: 10661, sum loss: 4131.571289, avg loss: 2.577399, ppl: 13.162860 +epoch: 2, batch: 10662, sum loss: 4223.780762, avg loss: 2.523167, ppl: 12.468016 +epoch: 2, batch: 10663, sum loss: 4299.081055, avg loss: 2.568149, ppl: 13.041660 +epoch: 2, batch: 10664, sum loss: 3904.880371, avg loss: 2.445135, ppl: 11.532104 +epoch: 2, batch: 10665, sum loss: 3977.696045, avg loss: 2.490730, ppl: 12.070087 +epoch: 2, batch: 10666, sum loss: 3771.698486, avg loss: 2.413115, ppl: 11.168695 +epoch: 2, batch: 10667, sum loss: 4596.683105, avg loss: 2.643291, ppl: 14.059397 +epoch: 2, batch: 10668, sum loss: 4350.962891, avg loss: 2.664398, ppl: 14.359309 +epoch: 2, batch: 10669, sum loss: 4033.808105, avg loss: 2.402506, ppl: 11.050839 +epoch: 2, batch: 10670, sum loss: 3263.119873, avg loss: 2.171071, ppl: 8.767670 +epoch: 2, batch: 10671, sum loss: 3471.768555, avg loss: 2.306823, ppl: 10.042469 +epoch: 2, batch: 10672, sum loss: 4488.271973, avg loss: 2.846082, ppl: 17.220188 +epoch: 2, batch: 10673, sum loss: 4714.001953, avg loss: 2.720140, ppl: 15.182440 +epoch: 2, batch: 10674, sum loss: 4542.887695, avg loss: 2.546462, ppl: 12.761870 +epoch: 2, batch: 10675, sum loss: 4298.861816, avg loss: 2.527256, ppl: 12.519101 +epoch: 2, batch: 10676, sum loss: 3542.488525, avg loss: 2.285476, ppl: 9.830369 +epoch: 2, batch: 10677, sum loss: 4260.366211, avg loss: 2.488532, ppl: 12.043578 +epoch: 2, batch: 10678, sum loss: 4006.073730, avg loss: 2.410393, ppl: 11.138343 +epoch: 2, batch: 10679, sum loss: 4291.354492, avg loss: 2.540766, ppl: 12.689393 +epoch: 2, batch: 10680, sum loss: 4574.159668, avg loss: 2.725959, ppl: 15.271056 +epoch: 2, batch: 10681, sum loss: 4917.706543, avg loss: 2.649626, ppl: 14.148749 +epoch: 2, batch: 10682, sum loss: 6211.818848, avg loss: 2.869200, ppl: 17.622923 +epoch: 2, batch: 10683, sum loss: 4345.705566, avg loss: 2.583654, ppl: 13.245444 +epoch: 2, batch: 10684, sum loss: 4560.099121, avg loss: 2.634373, ppl: 13.934568 +epoch: 2, batch: 10685, sum loss: 3777.793457, avg loss: 2.406238, ppl: 11.092152 +epoch: 2, batch: 10686, sum loss: 3708.196289, avg loss: 2.374005, ppl: 10.740325 +epoch: 2, batch: 10687, sum loss: 4894.701172, avg loss: 2.591160, ppl: 13.345243 +epoch: 2, batch: 10688, sum loss: 4596.569824, avg loss: 2.463328, ppl: 11.743826 +epoch: 2, batch: 10689, sum loss: 5087.338379, avg loss: 2.677547, ppl: 14.549356 +epoch: 2, batch: 10690, sum loss: 4764.505371, avg loss: 2.597876, ppl: 13.435176 +epoch: 2, batch: 10691, sum loss: 4296.967773, avg loss: 2.377957, ppl: 10.782847 +epoch: 2, batch: 10692, sum loss: 4028.580566, avg loss: 2.549735, ppl: 12.803705 +epoch: 2, batch: 10693, sum loss: 5232.759766, avg loss: 2.645480, ppl: 14.090205 +epoch: 2, batch: 10694, sum loss: 3993.541016, avg loss: 2.514824, ppl: 12.364437 +epoch: 2, batch: 10695, sum loss: 4385.673340, avg loss: 2.520502, ppl: 12.434835 +epoch: 2, batch: 10696, sum loss: 4551.042969, avg loss: 2.454716, ppl: 11.643124 +epoch: 2, batch: 10697, sum loss: 4882.190430, avg loss: 3.057101, ppl: 21.265823 +epoch: 2, batch: 10698, sum loss: 4676.957520, avg loss: 2.591112, ppl: 13.344604 +epoch: 2, batch: 10699, sum loss: 4518.462402, avg loss: 2.545613, ppl: 12.751037 +epoch: 2, batch: 10700, sum loss: 4707.445801, avg loss: 2.516005, ppl: 12.379046 +epoch: 2, batch: 10701, sum loss: 4223.211914, avg loss: 2.149217, ppl: 8.578142 +epoch: 2, batch: 10702, sum loss: 4122.386719, avg loss: 2.462597, ppl: 11.735245 +epoch: 2, batch: 10703, sum loss: 4287.721680, avg loss: 2.418343, ppl: 11.227239 +epoch: 2, batch: 10704, sum loss: 5420.402832, avg loss: 2.707494, ppl: 14.991656 +epoch: 2, batch: 10705, sum loss: 3524.461670, avg loss: 2.340280, ppl: 10.384144 +epoch: 2, batch: 10706, sum loss: 3678.071289, avg loss: 2.223743, ppl: 9.241860 +epoch: 2, batch: 10707, sum loss: 3617.353516, avg loss: 2.277930, ppl: 9.756469 +epoch: 2, batch: 10708, sum loss: 4197.504883, avg loss: 2.308859, ppl: 10.062933 +epoch: 2, batch: 10709, sum loss: 4330.851562, avg loss: 2.562634, ppl: 12.969935 +epoch: 2, batch: 10710, sum loss: 3870.104736, avg loss: 2.613170, ppl: 13.642230 +epoch: 2, batch: 10711, sum loss: 4093.167969, avg loss: 2.539186, ppl: 12.669353 +epoch: 2, batch: 10712, sum loss: 4650.991699, avg loss: 2.406100, ppl: 11.090627 +epoch: 2, batch: 10713, sum loss: 4369.312500, avg loss: 2.391523, ppl: 10.930129 +epoch: 2, batch: 10714, sum loss: 4088.568359, avg loss: 2.594269, ppl: 13.386802 +epoch: 2, batch: 10715, sum loss: 4994.816406, avg loss: 2.522634, ppl: 12.461380 +epoch: 2, batch: 10716, sum loss: 4570.840820, avg loss: 2.514214, ppl: 12.356890 +epoch: 2, batch: 10717, sum loss: 3743.792969, avg loss: 2.377011, ppl: 10.772658 +epoch: 2, batch: 10718, sum loss: 4790.845703, avg loss: 2.742327, ppl: 15.523068 +epoch: 2, batch: 10719, sum loss: 4472.290039, avg loss: 2.429272, ppl: 11.350617 +epoch: 2, batch: 10720, sum loss: 4181.306641, avg loss: 2.412756, ppl: 11.164691 +epoch: 2, batch: 10721, sum loss: 4591.580566, avg loss: 2.678869, ppl: 14.568600 +epoch: 2, batch: 10722, sum loss: 4842.102051, avg loss: 2.551160, ppl: 12.821973 +epoch: 2, batch: 10723, sum loss: 4885.541016, avg loss: 2.632296, ppl: 13.905659 +epoch: 2, batch: 10724, sum loss: 3857.291016, avg loss: 2.316691, ppl: 10.142063 +epoch: 2, batch: 10725, sum loss: 4770.794434, avg loss: 2.632889, ppl: 13.913906 +epoch: 2, batch: 10726, sum loss: 4553.391602, avg loss: 2.470641, ppl: 11.830029 +epoch: 2, batch: 10727, sum loss: 4113.827637, avg loss: 2.640454, ppl: 14.019571 +epoch: 2, batch: 10728, sum loss: 4563.734863, avg loss: 2.443113, ppl: 11.508810 +epoch: 2, batch: 10729, sum loss: 4588.537598, avg loss: 2.539312, ppl: 12.670955 +epoch: 2, batch: 10730, sum loss: 4201.378906, avg loss: 2.512787, ppl: 12.339267 +epoch: 2, batch: 10731, sum loss: 4797.431641, avg loss: 2.464012, ppl: 11.751870 +epoch: 2, batch: 10732, sum loss: 4481.677246, avg loss: 2.647181, ppl: 14.114195 +epoch: 2, batch: 10733, sum loss: 5170.572266, avg loss: 2.997433, ppl: 20.034052 +epoch: 2, batch: 10734, sum loss: 4565.230469, avg loss: 2.469027, ppl: 11.810946 +epoch: 2, batch: 10735, sum loss: 5242.850586, avg loss: 2.841653, ppl: 17.144087 +epoch: 2, batch: 10736, sum loss: 4489.967773, avg loss: 2.642712, ppl: 14.051261 +epoch: 2, batch: 10737, sum loss: 4592.722168, avg loss: 2.475861, ppl: 11.891943 +epoch: 2, batch: 10738, sum loss: 4688.781738, avg loss: 2.478215, ppl: 11.919962 +epoch: 2, batch: 10739, sum loss: 4158.800293, avg loss: 2.540501, ppl: 12.686027 +epoch: 2, batch: 10740, sum loss: 4770.204590, avg loss: 2.651587, ppl: 14.176516 +epoch: 2, batch: 10741, sum loss: 3114.927002, avg loss: 2.280327, ppl: 9.779881 +epoch: 2, batch: 10742, sum loss: 4263.951660, avg loss: 2.417206, ppl: 11.214485 +epoch: 2, batch: 10743, sum loss: 4823.898926, avg loss: 2.492971, ppl: 12.097163 +epoch: 2, batch: 10744, sum loss: 4624.324219, avg loss: 2.697972, ppl: 14.849587 +epoch: 2, batch: 10745, sum loss: 4210.589844, avg loss: 2.488528, ppl: 12.043538 +epoch: 2, batch: 10746, sum loss: 4701.371582, avg loss: 2.482245, ppl: 11.968102 +epoch: 2, batch: 10747, sum loss: 5032.236816, avg loss: 2.538969, ppl: 12.666605 +epoch: 2, batch: 10748, sum loss: 3704.150879, avg loss: 2.360836, ppl: 10.599805 +epoch: 2, batch: 10749, sum loss: 4835.461426, avg loss: 2.577538, ppl: 13.164690 +epoch: 2, batch: 10750, sum loss: 3792.523926, avg loss: 2.535110, ppl: 12.617813 +epoch: 2, batch: 10751, sum loss: 4330.553223, avg loss: 2.504658, ppl: 12.239369 +epoch: 2, batch: 10752, sum loss: 5229.586914, avg loss: 2.748075, ppl: 15.612549 +epoch: 2, batch: 10753, sum loss: 4698.057129, avg loss: 2.563043, ppl: 12.975236 +epoch: 2, batch: 10754, sum loss: 4678.000000, avg loss: 2.531385, ppl: 12.570907 +epoch: 2, batch: 10755, sum loss: 4421.474609, avg loss: 2.827030, ppl: 16.895203 +epoch: 2, batch: 10756, sum loss: 3502.182861, avg loss: 2.197104, ppl: 8.998910 +epoch: 2, batch: 10757, sum loss: 5045.700684, avg loss: 2.820403, ppl: 16.783611 +epoch: 2, batch: 10758, sum loss: 3656.735352, avg loss: 2.385346, ppl: 10.862820 +epoch: 2, batch: 10759, sum loss: 5389.399902, avg loss: 2.715063, ppl: 15.105559 +epoch: 2, batch: 10760, sum loss: 4621.434082, avg loss: 2.547648, ppl: 12.777022 +epoch: 2, batch: 10761, sum loss: 5074.234375, avg loss: 2.778880, ppl: 16.100975 +epoch: 2, batch: 10762, sum loss: 3850.464844, avg loss: 2.603425, ppl: 13.509924 +epoch: 2, batch: 10763, sum loss: 4337.881836, avg loss: 2.557713, ppl: 12.906270 +epoch: 2, batch: 10764, sum loss: 5796.235352, avg loss: 3.000122, ppl: 20.087984 +epoch: 2, batch: 10765, sum loss: 4125.545898, avg loss: 2.313823, ppl: 10.113010 +epoch: 2, batch: 10766, sum loss: 3857.139160, avg loss: 2.439683, ppl: 11.469407 +epoch: 2, batch: 10767, sum loss: 4494.374512, avg loss: 2.654681, ppl: 14.220448 +epoch: 2, batch: 10768, sum loss: 4062.397705, avg loss: 2.510753, ppl: 12.314196 +epoch: 2, batch: 10769, sum loss: 4454.349609, avg loss: 2.678503, ppl: 14.563270 +epoch: 2, batch: 10770, sum loss: 4087.762207, avg loss: 2.613659, ppl: 13.648896 +epoch: 2, batch: 10771, sum loss: 4161.820801, avg loss: 2.472858, ppl: 11.856289 +epoch: 2, batch: 10772, sum loss: 4583.314453, avg loss: 2.791300, ppl: 16.302200 +epoch: 2, batch: 10773, sum loss: 4204.937988, avg loss: 2.511910, ppl: 12.328461 +epoch: 2, batch: 10774, sum loss: 3701.780762, avg loss: 2.514797, ppl: 12.364096 +epoch: 2, batch: 10775, sum loss: 4665.354004, avg loss: 2.446436, ppl: 11.547124 +epoch: 2, batch: 10776, sum loss: 3957.062988, avg loss: 2.423186, ppl: 11.281747 +epoch: 2, batch: 10777, sum loss: 3489.054443, avg loss: 2.378360, ppl: 10.787198 +epoch: 2, batch: 10778, sum loss: 3698.877686, avg loss: 2.406557, ppl: 11.095691 +epoch: 2, batch: 10779, sum loss: 4058.467773, avg loss: 2.425863, ppl: 11.311982 +epoch: 2, batch: 10780, sum loss: 4646.065430, avg loss: 2.561227, ppl: 12.951697 +epoch: 2, batch: 10781, sum loss: 4356.790527, avg loss: 2.568862, ppl: 13.050970 +epoch: 2, batch: 10782, sum loss: 4115.748047, avg loss: 2.512666, ppl: 12.337785 +epoch: 2, batch: 10783, sum loss: 4029.342285, avg loss: 2.415673, ppl: 11.197301 +epoch: 2, batch: 10784, sum loss: 4191.840820, avg loss: 2.504087, ppl: 12.232383 +epoch: 2, batch: 10785, sum loss: 4589.768555, avg loss: 2.631748, ppl: 13.898042 +epoch: 2, batch: 10786, sum loss: 4203.815430, avg loss: 2.622468, ppl: 13.769659 +epoch: 2, batch: 10787, sum loss: 4534.093262, avg loss: 2.579120, ppl: 13.185532 +epoch: 2, batch: 10788, sum loss: 3553.009521, avg loss: 2.561651, ppl: 12.957189 +epoch: 2, batch: 10789, sum loss: 4246.196289, avg loss: 2.353767, ppl: 10.525147 +epoch: 2, batch: 10790, sum loss: 3764.349121, avg loss: 2.615948, ppl: 13.680179 +epoch: 2, batch: 10791, sum loss: 4996.889648, avg loss: 2.628558, ppl: 13.853784 +epoch: 2, batch: 10792, sum loss: 4286.558594, avg loss: 2.540936, ppl: 12.691542 +epoch: 2, batch: 10793, sum loss: 3615.538086, avg loss: 2.279658, ppl: 9.773340 +epoch: 2, batch: 10794, sum loss: 4589.736816, avg loss: 2.501219, ppl: 12.197354 +epoch: 2, batch: 10795, sum loss: 5048.597656, avg loss: 2.476017, ppl: 11.893791 +epoch: 2, batch: 10796, sum loss: 3729.239258, avg loss: 2.426311, ppl: 11.317051 +epoch: 2, batch: 10797, sum loss: 3341.985107, avg loss: 2.301643, ppl: 9.990580 +epoch: 2, batch: 10798, sum loss: 4330.135254, avg loss: 2.569813, ppl: 13.063385 +epoch: 2, batch: 10799, sum loss: 4314.156250, avg loss: 2.406111, ppl: 11.090743 +epoch: 2, batch: 10800, sum loss: 4484.109863, avg loss: 2.712710, ppl: 15.070063 +epoch: 2, batch: 10801, sum loss: 3808.825928, avg loss: 2.349677, ppl: 10.482180 +epoch: 2, batch: 10802, sum loss: 4537.265137, avg loss: 2.756540, ppl: 15.745275 +epoch: 2, batch: 10803, sum loss: 5758.223633, avg loss: 2.556938, ppl: 12.896264 +epoch: 2, batch: 10804, sum loss: 4426.348633, avg loss: 2.570470, ppl: 13.071961 +epoch: 2, batch: 10805, sum loss: 4452.891113, avg loss: 2.705280, ppl: 14.958509 +epoch: 2, batch: 10806, sum loss: 4641.857422, avg loss: 2.640419, ppl: 14.019083 +epoch: 2, batch: 10807, sum loss: 4974.487793, avg loss: 2.549712, ppl: 12.803415 +epoch: 2, batch: 10808, sum loss: 4474.670898, avg loss: 2.585021, ppl: 13.263561 +epoch: 2, batch: 10809, sum loss: 3769.254639, avg loss: 2.305354, ppl: 10.027731 +epoch: 2, batch: 10810, sum loss: 5580.169434, avg loss: 2.916973, ppl: 18.485245 +epoch: 2, batch: 10811, sum loss: 3906.320068, avg loss: 2.438402, ppl: 11.454720 +epoch: 2, batch: 10812, sum loss: 4205.618164, avg loss: 2.504835, ppl: 12.241541 +epoch: 2, batch: 10813, sum loss: 4233.651367, avg loss: 2.703481, ppl: 14.931618 +epoch: 2, batch: 10814, sum loss: 4736.002930, avg loss: 2.771213, ppl: 15.978004 +epoch: 2, batch: 10815, sum loss: 4315.948242, avg loss: 2.500549, ppl: 12.189188 +epoch: 2, batch: 10816, sum loss: 4379.164062, avg loss: 2.569932, ppl: 13.064936 +epoch: 2, batch: 10817, sum loss: 5022.621094, avg loss: 2.741606, ppl: 15.511869 +epoch: 2, batch: 10818, sum loss: 4224.858398, avg loss: 2.577705, ppl: 13.166884 +epoch: 2, batch: 10819, sum loss: 2736.437012, avg loss: 2.048231, ppl: 7.754175 +epoch: 2, batch: 10820, sum loss: 4202.697754, avg loss: 2.351817, ppl: 10.504643 +epoch: 2, batch: 10821, sum loss: 4503.471191, avg loss: 2.636693, ppl: 13.966932 +epoch: 2, batch: 10822, sum loss: 4863.106934, avg loss: 2.896430, ppl: 18.109388 +epoch: 2, batch: 10823, sum loss: 4267.789551, avg loss: 2.687525, ppl: 14.695261 +epoch: 2, batch: 10824, sum loss: 4402.621094, avg loss: 2.630001, ppl: 13.873781 +epoch: 2, batch: 10825, sum loss: 5315.158203, avg loss: 2.631267, ppl: 13.891354 +epoch: 2, batch: 10826, sum loss: 3638.200439, avg loss: 2.254152, ppl: 9.527206 +epoch: 2, batch: 10827, sum loss: 4983.537598, avg loss: 2.572812, ppl: 13.102621 +epoch: 2, batch: 10828, sum loss: 3984.043701, avg loss: 2.395697, ppl: 10.975844 +epoch: 2, batch: 10829, sum loss: 4201.430664, avg loss: 2.497878, ppl: 12.156668 +epoch: 2, batch: 10830, sum loss: 4068.625977, avg loss: 2.549264, ppl: 12.797687 +epoch: 2, batch: 10831, sum loss: 3738.010254, avg loss: 2.236990, ppl: 9.365102 +epoch: 2, batch: 10832, sum loss: 4110.656738, avg loss: 2.512626, ppl: 12.337291 +epoch: 2, batch: 10833, sum loss: 5317.839355, avg loss: 2.700782, ppl: 14.891370 +epoch: 2, batch: 10834, sum loss: 4490.466797, avg loss: 2.561590, ppl: 12.956398 +epoch: 2, batch: 10835, sum loss: 4452.258301, avg loss: 2.404027, ppl: 11.067657 +epoch: 2, batch: 10836, sum loss: 3937.131348, avg loss: 2.344926, ppl: 10.432505 +epoch: 2, batch: 10837, sum loss: 4342.633301, avg loss: 2.818062, ppl: 16.744366 +epoch: 2, batch: 10838, sum loss: 5259.849609, avg loss: 2.762526, ppl: 15.839808 +epoch: 2, batch: 10839, sum loss: 4041.535156, avg loss: 2.589068, ppl: 13.317356 +epoch: 2, batch: 10840, sum loss: 4573.945801, avg loss: 2.416242, ppl: 11.203672 +epoch: 2, batch: 10841, sum loss: 4763.300293, avg loss: 2.775816, ppl: 16.051720 +epoch: 2, batch: 10842, sum loss: 4965.743652, avg loss: 2.898858, ppl: 18.153399 +epoch: 2, batch: 10843, sum loss: 4385.347168, avg loss: 2.404247, ppl: 11.070094 +epoch: 2, batch: 10844, sum loss: 5462.081055, avg loss: 2.776859, ppl: 16.068464 +epoch: 2, batch: 10845, sum loss: 3651.182129, avg loss: 2.372438, ppl: 10.723504 +epoch: 2, batch: 10846, sum loss: 4459.643555, avg loss: 2.430324, ppl: 11.362558 +epoch: 2, batch: 10847, sum loss: 3699.753906, avg loss: 2.283799, ppl: 9.813890 +epoch: 2, batch: 10848, sum loss: 4560.152344, avg loss: 2.930689, ppl: 18.740540 +epoch: 2, batch: 10849, sum loss: 4335.410156, avg loss: 2.602287, ppl: 13.494562 +epoch: 2, batch: 10850, sum loss: 4811.459961, avg loss: 2.624910, ppl: 13.803333 +epoch: 2, batch: 10851, sum loss: 4663.436523, avg loss: 2.599463, ppl: 13.456510 +epoch: 2, batch: 10852, sum loss: 4541.921387, avg loss: 2.762726, ppl: 15.842969 +epoch: 2, batch: 10853, sum loss: 5251.995117, avg loss: 2.916155, ppl: 18.470131 +epoch: 2, batch: 10854, sum loss: 3893.278076, avg loss: 2.460985, ppl: 11.716343 +epoch: 2, batch: 10855, sum loss: 4525.377441, avg loss: 2.634096, ppl: 13.930718 +epoch: 2, batch: 10856, sum loss: 5111.288574, avg loss: 2.681683, ppl: 14.609665 +epoch: 2, batch: 10857, sum loss: 4383.411621, avg loss: 2.671183, ppl: 14.457064 +epoch: 2, batch: 10858, sum loss: 3570.284668, avg loss: 2.613678, ppl: 13.649166 +epoch: 2, batch: 10859, sum loss: 4476.081055, avg loss: 2.741017, ppl: 15.502745 +epoch: 2, batch: 10860, sum loss: 4454.389648, avg loss: 2.582255, ppl: 13.226933 +epoch: 2, batch: 10861, sum loss: 4014.331543, avg loss: 2.392331, ppl: 10.938964 +epoch: 2, batch: 10862, sum loss: 3061.090332, avg loss: 2.128714, ppl: 8.404051 +epoch: 2, batch: 10863, sum loss: 4273.466309, avg loss: 2.410303, ppl: 11.137331 +epoch: 2, batch: 10864, sum loss: 4155.212891, avg loss: 2.468932, ppl: 11.809829 +epoch: 2, batch: 10865, sum loss: 4558.726562, avg loss: 2.470855, ppl: 11.832554 +epoch: 2, batch: 10866, sum loss: 5208.106934, avg loss: 2.585952, ppl: 13.275919 +epoch: 2, batch: 10867, sum loss: 4356.171387, avg loss: 2.622620, ppl: 13.771753 +epoch: 2, batch: 10868, sum loss: 3199.320801, avg loss: 2.142881, ppl: 8.523957 +epoch: 2, batch: 10869, sum loss: 3698.127197, avg loss: 2.522597, ppl: 12.460917 +epoch: 2, batch: 10870, sum loss: 4112.155762, avg loss: 2.427483, ppl: 11.330323 +epoch: 2, batch: 10871, sum loss: 4180.472168, avg loss: 2.566281, ppl: 13.017327 +epoch: 2, batch: 10872, sum loss: 3961.289307, avg loss: 2.418369, ppl: 11.227535 +epoch: 2, batch: 10873, sum loss: 2834.211670, avg loss: 2.261941, ppl: 9.601706 +epoch: 2, batch: 10874, sum loss: 4689.646973, avg loss: 2.821689, ppl: 16.805208 +epoch: 2, batch: 10875, sum loss: 4336.463867, avg loss: 2.519735, ppl: 12.425302 +epoch: 2, batch: 10876, sum loss: 5229.607910, avg loss: 2.655972, ppl: 14.238812 +epoch: 2, batch: 10877, sum loss: 4228.789062, avg loss: 2.693496, ppl: 14.783272 +epoch: 2, batch: 10878, sum loss: 4499.309082, avg loss: 2.635799, ppl: 13.954460 +epoch: 2, batch: 10879, sum loss: 3612.919922, avg loss: 2.318947, ppl: 10.164968 +epoch: 2, batch: 10880, sum loss: 5111.718262, avg loss: 2.666520, ppl: 14.389804 +epoch: 2, batch: 10881, sum loss: 4710.113770, avg loss: 2.664092, ppl: 14.354903 +epoch: 2, batch: 10882, sum loss: 3940.170166, avg loss: 2.417282, ppl: 11.215339 +epoch: 2, batch: 10883, sum loss: 5198.591309, avg loss: 2.746218, ppl: 15.583587 +epoch: 2, batch: 10884, sum loss: 3808.199219, avg loss: 2.430248, ppl: 11.361705 +epoch: 2, batch: 10885, sum loss: 3644.881348, avg loss: 2.266717, ppl: 9.647677 +epoch: 2, batch: 10886, sum loss: 4733.395020, avg loss: 2.626745, ppl: 13.828691 +epoch: 2, batch: 10887, sum loss: 4782.348633, avg loss: 2.656860, ppl: 14.251474 +epoch: 2, batch: 10888, sum loss: 4600.425781, avg loss: 2.820617, ppl: 16.787201 +epoch: 2, batch: 10889, sum loss: 3956.769775, avg loss: 2.315254, ppl: 10.127500 +epoch: 2, batch: 10890, sum loss: 5056.471191, avg loss: 2.921127, ppl: 18.562197 +epoch: 2, batch: 10891, sum loss: 3797.420410, avg loss: 2.275267, ppl: 9.730515 +epoch: 2, batch: 10892, sum loss: 4166.203125, avg loss: 2.538820, ppl: 12.664712 +epoch: 2, batch: 10893, sum loss: 4097.136719, avg loss: 2.417190, ppl: 11.214301 +epoch: 2, batch: 10894, sum loss: 4142.184570, avg loss: 2.356191, ppl: 10.550692 +epoch: 2, batch: 10895, sum loss: 4990.751953, avg loss: 2.838881, ppl: 17.096617 +epoch: 2, batch: 10896, sum loss: 4552.123535, avg loss: 2.742243, ppl: 15.521762 +epoch: 2, batch: 10897, sum loss: 3772.392090, avg loss: 2.399740, ppl: 11.020316 +epoch: 2, batch: 10898, sum loss: 4105.657715, avg loss: 2.521903, ppl: 12.452269 +epoch: 2, batch: 10899, sum loss: 4269.255859, avg loss: 2.592141, ppl: 13.358336 +epoch: 2, batch: 10900, sum loss: 3912.853760, avg loss: 2.333246, ppl: 10.311357 +epoch: 2, batch: 10901, sum loss: 4721.144531, avg loss: 2.581271, ppl: 13.213924 +epoch: 2, batch: 10902, sum loss: 3994.043701, avg loss: 2.620764, ppl: 13.746216 +epoch: 2, batch: 10903, sum loss: 4154.283691, avg loss: 2.420911, ppl: 11.256112 +epoch: 2, batch: 10904, sum loss: 4523.584961, avg loss: 2.401054, ppl: 11.034798 +epoch: 2, batch: 10905, sum loss: 4809.499023, avg loss: 2.726473, ppl: 15.278908 +epoch: 2, batch: 10906, sum loss: 3998.824219, avg loss: 2.773110, ppl: 16.008341 +epoch: 2, batch: 10907, sum loss: 4382.390625, avg loss: 2.471738, ppl: 11.843008 +epoch: 2, batch: 10908, sum loss: 4458.737305, avg loss: 2.649279, ppl: 14.143843 +epoch: 2, batch: 10909, sum loss: 4278.339844, avg loss: 2.539074, ppl: 12.667937 +epoch: 2, batch: 10910, sum loss: 3647.447266, avg loss: 2.350159, ppl: 10.487242 +epoch: 2, batch: 10911, sum loss: 3563.495605, avg loss: 2.262537, ppl: 9.607430 +epoch: 2, batch: 10912, sum loss: 4426.876953, avg loss: 2.791221, ppl: 16.300917 +epoch: 2, batch: 10913, sum loss: 4741.910156, avg loss: 2.589793, ppl: 13.327009 +epoch: 2, batch: 10914, sum loss: 4031.795898, avg loss: 2.569659, ppl: 13.061376 +epoch: 2, batch: 10915, sum loss: 4930.715820, avg loss: 2.505445, ppl: 12.249012 +epoch: 2, batch: 10916, sum loss: 4392.918945, avg loss: 2.568959, ppl: 13.052224 +epoch: 2, batch: 10917, sum loss: 4854.900879, avg loss: 2.428665, ppl: 11.343727 +epoch: 2, batch: 10918, sum loss: 4714.216797, avg loss: 2.377316, ppl: 10.775936 +epoch: 2, batch: 10919, sum loss: 5349.839844, avg loss: 2.845659, ppl: 17.212906 +epoch: 2, batch: 10920, sum loss: 4256.235352, avg loss: 2.617611, ppl: 13.702948 +epoch: 2, batch: 10921, sum loss: 3862.310303, avg loss: 2.544342, ppl: 12.734840 +epoch: 2, batch: 10922, sum loss: 4246.139160, avg loss: 2.581239, ppl: 13.213506 +epoch: 2, batch: 10923, sum loss: 4386.717773, avg loss: 2.652187, ppl: 14.185032 +epoch: 2, batch: 10924, sum loss: 4718.100098, avg loss: 2.543450, ppl: 12.723493 +epoch: 2, batch: 10925, sum loss: 4179.755859, avg loss: 2.451470, ppl: 11.605393 +epoch: 2, batch: 10926, sum loss: 5121.024902, avg loss: 2.807580, ppl: 16.569763 +epoch: 2, batch: 10927, sum loss: 4079.270508, avg loss: 2.645441, ppl: 14.089662 +epoch: 2, batch: 10928, sum loss: 4381.559570, avg loss: 2.611180, ppl: 13.615102 +epoch: 2, batch: 10929, sum loss: 4610.294434, avg loss: 2.519287, ppl: 12.419734 +epoch: 2, batch: 10930, sum loss: 4617.416016, avg loss: 2.458688, ppl: 11.689466 +epoch: 2, batch: 10931, sum loss: 4834.173828, avg loss: 2.575479, ppl: 13.137606 +epoch: 2, batch: 10932, sum loss: 5143.076660, avg loss: 2.873227, ppl: 17.694027 +epoch: 2, batch: 10933, sum loss: 5048.630859, avg loss: 2.773973, ppl: 16.022167 +epoch: 2, batch: 10934, sum loss: 3799.650146, avg loss: 2.227227, ppl: 9.274117 +epoch: 2, batch: 10935, sum loss: 5538.534180, avg loss: 2.825783, ppl: 16.874144 +epoch: 2, batch: 10936, sum loss: 4433.303711, avg loss: 2.634167, ppl: 13.931708 +epoch: 2, batch: 10937, sum loss: 4948.657715, avg loss: 2.660569, ppl: 14.304422 +epoch: 2, batch: 10938, sum loss: 4852.390137, avg loss: 2.686816, ppl: 14.684848 +epoch: 2, batch: 10939, sum loss: 5152.510742, avg loss: 2.849840, ppl: 17.285015 +epoch: 2, batch: 10940, sum loss: 4103.845215, avg loss: 2.553731, ppl: 12.854973 +epoch: 2, batch: 10941, sum loss: 4266.888672, avg loss: 2.493798, ppl: 12.107175 +epoch: 2, batch: 10942, sum loss: 4503.986328, avg loss: 2.588498, ppl: 13.309764 +epoch: 2, batch: 10943, sum loss: 4956.480469, avg loss: 2.657630, ppl: 14.262450 +epoch: 2, batch: 10944, sum loss: 3712.525879, avg loss: 2.241863, ppl: 9.410852 +epoch: 2, batch: 10945, sum loss: 4787.084473, avg loss: 2.788052, ppl: 16.249329 +epoch: 2, batch: 10946, sum loss: 4755.516602, avg loss: 2.701998, ppl: 14.909492 +epoch: 2, batch: 10947, sum loss: 4139.760254, avg loss: 2.516572, ppl: 12.386059 +epoch: 2, batch: 10948, sum loss: 4671.801270, avg loss: 2.478409, ppl: 11.922284 +epoch: 2, batch: 10949, sum loss: 3794.335693, avg loss: 2.365546, ppl: 10.649852 +epoch: 2, batch: 10950, sum loss: 4160.143555, avg loss: 2.351692, ppl: 10.503331 +epoch: 2, batch: 10951, sum loss: 5606.941406, avg loss: 2.870938, ppl: 17.653566 +epoch: 2, batch: 10952, sum loss: 3440.124268, avg loss: 2.311912, ppl: 10.093701 +epoch: 2, batch: 10953, sum loss: 4310.367188, avg loss: 2.461660, ppl: 11.724262 +epoch: 2, batch: 10954, sum loss: 5399.226074, avg loss: 2.807710, ppl: 16.571920 +epoch: 2, batch: 10955, sum loss: 4766.059570, avg loss: 2.572078, ppl: 13.092997 +epoch: 2, batch: 10956, sum loss: 4105.663086, avg loss: 2.569251, ppl: 13.056043 +epoch: 2, batch: 10957, sum loss: 4092.875732, avg loss: 2.483541, ppl: 11.983624 +epoch: 2, batch: 10958, sum loss: 4391.924805, avg loss: 2.665003, ppl: 14.367990 +epoch: 2, batch: 10959, sum loss: 5266.366211, avg loss: 2.819254, ppl: 16.764338 +epoch: 2, batch: 10960, sum loss: 5589.861328, avg loss: 2.897803, ppl: 18.134253 +epoch: 2, batch: 10961, sum loss: 3756.536621, avg loss: 2.494380, ppl: 12.114223 +epoch: 2, batch: 10962, sum loss: 4064.472656, avg loss: 2.488961, ppl: 12.048745 +epoch: 2, batch: 10963, sum loss: 3578.037598, avg loss: 2.367993, ppl: 10.675945 +epoch: 2, batch: 10964, sum loss: 3647.258545, avg loss: 2.368350, ppl: 10.679751 +epoch: 2, batch: 10965, sum loss: 4491.571777, avg loss: 2.895920, ppl: 18.100142 +epoch: 2, batch: 10966, sum loss: 4568.069336, avg loss: 2.654311, ppl: 14.215188 +epoch: 2, batch: 10967, sum loss: 3575.953125, avg loss: 2.292278, ppl: 9.897456 +epoch: 2, batch: 10968, sum loss: 3171.609131, avg loss: 2.326933, ppl: 10.246464 +epoch: 2, batch: 10969, sum loss: 4145.323242, avg loss: 2.615346, ppl: 13.671946 +epoch: 2, batch: 10970, sum loss: 4687.602051, avg loss: 2.557339, ppl: 12.901441 +epoch: 2, batch: 10971, sum loss: 4645.960938, avg loss: 2.683975, ppl: 14.643184 +epoch: 2, batch: 10972, sum loss: 4753.927246, avg loss: 2.675255, ppl: 14.516045 +epoch: 2, batch: 10973, sum loss: 4329.214355, avg loss: 2.567743, ppl: 13.036366 +epoch: 2, batch: 10974, sum loss: 4828.661133, avg loss: 2.593266, ppl: 13.373378 +epoch: 2, batch: 10975, sum loss: 4350.571289, avg loss: 2.562174, ppl: 12.963971 +epoch: 2, batch: 10976, sum loss: 4857.787109, avg loss: 2.648739, ppl: 14.136200 +epoch: 2, batch: 10977, sum loss: 3802.029785, avg loss: 2.434078, ppl: 11.405298 +epoch: 2, batch: 10978, sum loss: 4986.064453, avg loss: 2.735088, ppl: 15.411094 +epoch: 2, batch: 10979, sum loss: 5318.222168, avg loss: 2.669791, ppl: 14.436944 +epoch: 2, batch: 10980, sum loss: 4365.375000, avg loss: 2.500215, ppl: 12.185111 +epoch: 2, batch: 10981, sum loss: 4781.971680, avg loss: 2.596076, ppl: 13.411010 +epoch: 2, batch: 10982, sum loss: 3854.721680, avg loss: 2.415239, ppl: 11.192446 +epoch: 2, batch: 10983, sum loss: 4557.848633, avg loss: 2.591159, ppl: 13.345231 +epoch: 2, batch: 10984, sum loss: 4180.313965, avg loss: 2.577259, ppl: 13.161012 +epoch: 2, batch: 10985, sum loss: 4343.784180, avg loss: 2.509407, ppl: 12.297639 +epoch: 2, batch: 10986, sum loss: 5200.979980, avg loss: 2.809822, ppl: 16.606958 +epoch: 2, batch: 10987, sum loss: 4411.240234, avg loss: 2.538113, ppl: 12.655766 +epoch: 2, batch: 10988, sum loss: 4640.981445, avg loss: 2.592727, ppl: 13.366171 +epoch: 2, batch: 10989, sum loss: 4947.909668, avg loss: 2.717139, ppl: 15.136950 +epoch: 2, batch: 10990, sum loss: 4600.736328, avg loss: 2.609607, ppl: 13.593700 +epoch: 2, batch: 10991, sum loss: 3837.312500, avg loss: 2.498250, ppl: 12.161200 +epoch: 2, batch: 10992, sum loss: 3886.434570, avg loss: 2.447377, ppl: 11.557990 +epoch: 2, batch: 10993, sum loss: 4018.578125, avg loss: 2.463874, ppl: 11.750242 +epoch: 2, batch: 10994, sum loss: 4728.617188, avg loss: 2.643162, ppl: 14.057587 +epoch: 2, batch: 10995, sum loss: 4174.154785, avg loss: 2.419800, ppl: 11.243610 +epoch: 2, batch: 10996, sum loss: 4143.937988, avg loss: 2.724483, ppl: 15.248525 +epoch: 2, batch: 10997, sum loss: 4849.417969, avg loss: 2.641295, ppl: 14.031365 +epoch: 2, batch: 10998, sum loss: 5876.327637, avg loss: 2.906196, ppl: 18.287096 +epoch: 2, batch: 10999, sum loss: 3994.770996, avg loss: 2.351249, ppl: 10.498670 +epoch: 2, batch: 11000, sum loss: 4834.957031, avg loss: 2.626267, ppl: 13.822072 +epoch: 2, batch: 11001, sum loss: 3854.084229, avg loss: 2.545630, ppl: 12.751265 +epoch: 2, batch: 11002, sum loss: 4633.311523, avg loss: 2.682867, ppl: 14.626973 +epoch: 2, batch: 11003, sum loss: 3628.779785, avg loss: 2.697978, ppl: 14.849669 +epoch: 2, batch: 11004, sum loss: 4699.234375, avg loss: 2.666989, ppl: 14.396557 +epoch: 2, batch: 11005, sum loss: 4472.083008, avg loss: 2.609150, ppl: 13.587496 +epoch: 2, batch: 11006, sum loss: 4042.032471, avg loss: 2.532602, ppl: 12.586211 +epoch: 2, batch: 11007, sum loss: 4493.511230, avg loss: 2.594406, ppl: 13.388631 +epoch: 2, batch: 11008, sum loss: 4024.341797, avg loss: 2.452372, ppl: 11.615862 +epoch: 2, batch: 11009, sum loss: 4689.280762, avg loss: 2.743874, ppl: 15.547099 +epoch: 2, batch: 11010, sum loss: 4380.349609, avg loss: 2.710612, ppl: 15.038481 +epoch: 2, batch: 11011, sum loss: 4385.728027, avg loss: 2.619909, ppl: 13.734478 +epoch: 2, batch: 11012, sum loss: 3664.882324, avg loss: 2.287692, ppl: 9.852171 +epoch: 2, batch: 11013, sum loss: 5129.969727, avg loss: 2.669079, ppl: 14.426677 +epoch: 2, batch: 11014, sum loss: 3641.371338, avg loss: 2.397216, ppl: 10.992532 +epoch: 2, batch: 11015, sum loss: 4396.894043, avg loss: 2.574294, ppl: 13.122047 +epoch: 2, batch: 11016, sum loss: 4327.817383, avg loss: 2.646983, ppl: 14.111403 +epoch: 2, batch: 11017, sum loss: 4793.240234, avg loss: 2.468198, ppl: 11.801163 +epoch: 2, batch: 11018, sum loss: 4298.886719, avg loss: 2.577270, ppl: 13.161162 +epoch: 2, batch: 11019, sum loss: 5171.874023, avg loss: 2.830801, ppl: 16.959044 +epoch: 2, batch: 11020, sum loss: 4574.928711, avg loss: 2.437362, ppl: 11.442817 +epoch: 2, batch: 11021, sum loss: 3500.709473, avg loss: 2.414283, ppl: 11.181746 +epoch: 2, batch: 11022, sum loss: 4510.848633, avg loss: 2.529921, ppl: 12.552512 +epoch: 2, batch: 11023, sum loss: 4452.358398, avg loss: 2.721491, ppl: 15.202968 +epoch: 2, batch: 11024, sum loss: 4007.983398, avg loss: 2.498743, ppl: 12.167188 +epoch: 2, batch: 11025, sum loss: 4466.274902, avg loss: 2.685673, ppl: 14.668077 +epoch: 2, batch: 11026, sum loss: 3940.408691, avg loss: 2.458146, ppl: 11.683135 +epoch: 2, batch: 11027, sum loss: 4718.734375, avg loss: 2.505966, ppl: 12.255394 +epoch: 2, batch: 11028, sum loss: 3890.297607, avg loss: 2.544341, ppl: 12.734838 +epoch: 2, batch: 11029, sum loss: 3790.595703, avg loss: 2.263042, ppl: 9.612289 +epoch: 2, batch: 11030, sum loss: 4490.255859, avg loss: 2.639774, ppl: 14.010038 +epoch: 2, batch: 11031, sum loss: 4344.541016, avg loss: 2.488282, ppl: 12.040578 +epoch: 2, batch: 11032, sum loss: 4917.341797, avg loss: 2.904514, ppl: 18.256367 +epoch: 2, batch: 11033, sum loss: 4722.410156, avg loss: 2.755198, ppl: 15.724161 +epoch: 2, batch: 11034, sum loss: 4352.284668, avg loss: 2.567719, ppl: 13.036061 +epoch: 2, batch: 11035, sum loss: 3221.092285, avg loss: 2.297498, ppl: 9.949261 +epoch: 2, batch: 11036, sum loss: 4462.210938, avg loss: 2.621746, ppl: 13.759722 +epoch: 2, batch: 11037, sum loss: 3802.414062, avg loss: 2.332769, ppl: 10.306445 +epoch: 2, batch: 11038, sum loss: 4361.867676, avg loss: 2.498206, ppl: 12.160660 +epoch: 2, batch: 11039, sum loss: 3543.212158, avg loss: 2.507581, ppl: 12.275200 +epoch: 2, batch: 11040, sum loss: 3709.491455, avg loss: 2.194965, ppl: 8.979690 +epoch: 2, batch: 11041, sum loss: 4098.608398, avg loss: 2.402467, ppl: 11.050402 +epoch: 2, batch: 11042, sum loss: 4512.284180, avg loss: 2.464383, ppl: 11.756222 +epoch: 2, batch: 11043, sum loss: 4180.269531, avg loss: 2.453210, ppl: 11.625604 +epoch: 2, batch: 11044, sum loss: 4303.075684, avg loss: 2.364327, ppl: 10.636883 +epoch: 2, batch: 11045, sum loss: 4435.228516, avg loss: 2.796487, ppl: 16.386980 +epoch: 2, batch: 11046, sum loss: 5176.562988, avg loss: 2.690521, ppl: 14.739357 +epoch: 2, batch: 11047, sum loss: 4432.556152, avg loss: 2.443526, ppl: 11.513566 +epoch: 2, batch: 11048, sum loss: 4314.121094, avg loss: 2.563352, ppl: 12.979246 +epoch: 2, batch: 11049, sum loss: 5099.722656, avg loss: 2.725667, ppl: 15.266593 +epoch: 2, batch: 11050, sum loss: 2917.790527, avg loss: 2.153351, ppl: 8.613673 +epoch: 2, batch: 11051, sum loss: 4200.965332, avg loss: 2.536815, ppl: 12.639346 +epoch: 2, batch: 11052, sum loss: 4793.435059, avg loss: 2.619364, ppl: 13.726984 +epoch: 2, batch: 11053, sum loss: 3452.427734, avg loss: 2.332721, ppl: 10.305951 +epoch: 2, batch: 11054, sum loss: 5038.649414, avg loss: 2.719185, ppl: 15.167953 +epoch: 2, batch: 11055, sum loss: 3766.792480, avg loss: 2.372036, ppl: 10.719192 +epoch: 2, batch: 11056, sum loss: 4564.287109, avg loss: 2.754549, ppl: 15.713945 +epoch: 2, batch: 11057, sum loss: 5087.988281, avg loss: 2.939335, ppl: 18.903265 +epoch: 2, batch: 11058, sum loss: 3559.526367, avg loss: 2.441376, ppl: 11.488841 +epoch: 2, batch: 11059, sum loss: 4994.776367, avg loss: 2.630214, ppl: 13.876739 +epoch: 2, batch: 11060, sum loss: 4228.779785, avg loss: 2.678138, ppl: 14.557958 +epoch: 2, batch: 11061, sum loss: 4282.600586, avg loss: 2.565968, ppl: 13.013249 +epoch: 2, batch: 11062, sum loss: 4598.987793, avg loss: 2.488630, ppl: 12.044762 +epoch: 2, batch: 11063, sum loss: 3794.645752, avg loss: 2.534833, ppl: 12.614329 +epoch: 2, batch: 11064, sum loss: 4133.184570, avg loss: 2.296214, ppl: 9.936488 +epoch: 2, batch: 11065, sum loss: 4391.519531, avg loss: 2.560653, ppl: 12.944263 +epoch: 2, batch: 11066, sum loss: 4761.063477, avg loss: 2.593172, ppl: 13.372119 +epoch: 2, batch: 11067, sum loss: 4726.291992, avg loss: 2.509980, ppl: 12.304681 +epoch: 2, batch: 11068, sum loss: 4418.229492, avg loss: 2.333983, ppl: 10.318957 +epoch: 2, batch: 11069, sum loss: 4247.108887, avg loss: 2.431087, ppl: 11.371233 +epoch: 2, batch: 11070, sum loss: 3499.633301, avg loss: 2.297855, ppl: 9.952812 +epoch: 2, batch: 11071, sum loss: 3890.007080, avg loss: 2.446545, ppl: 11.548382 +epoch: 2, batch: 11072, sum loss: 4493.220703, avg loss: 2.612338, ppl: 13.630877 +epoch: 2, batch: 11073, sum loss: 4203.346680, avg loss: 2.460976, ppl: 11.716237 +epoch: 2, batch: 11074, sum loss: 4466.321777, avg loss: 2.672844, ppl: 14.481094 +epoch: 2, batch: 11075, sum loss: 4505.732422, avg loss: 2.500407, ppl: 12.187448 +epoch: 2, batch: 11076, sum loss: 4085.565918, avg loss: 2.531330, ppl: 12.570209 +epoch: 2, batch: 11077, sum loss: 4135.230957, avg loss: 2.404204, ppl: 11.069617 +epoch: 2, batch: 11078, sum loss: 4370.806641, avg loss: 2.436347, ppl: 11.431209 +epoch: 2, batch: 11079, sum loss: 4715.521484, avg loss: 2.498951, ppl: 12.169726 +epoch: 2, batch: 11080, sum loss: 4461.415039, avg loss: 2.451327, ppl: 11.603736 +epoch: 2, batch: 11081, sum loss: 4857.886230, avg loss: 2.633001, ppl: 13.915462 +epoch: 2, batch: 11082, sum loss: 5475.356934, avg loss: 2.836973, ppl: 17.064030 +epoch: 2, batch: 11083, sum loss: 4981.293945, avg loss: 2.752096, ppl: 15.675456 +epoch: 2, batch: 11084, sum loss: 4059.944580, avg loss: 2.614259, ppl: 13.657096 +epoch: 2, batch: 11085, sum loss: 5306.050781, avg loss: 2.603558, ppl: 13.511725 +epoch: 2, batch: 11086, sum loss: 4997.834473, avg loss: 2.654187, ppl: 14.213428 +epoch: 2, batch: 11087, sum loss: 4244.779297, avg loss: 2.783462, ppl: 16.174919 +epoch: 2, batch: 11088, sum loss: 5063.283691, avg loss: 2.619392, ppl: 13.727370 +epoch: 2, batch: 11089, sum loss: 4373.439453, avg loss: 2.417601, ppl: 11.218911 +epoch: 2, batch: 11090, sum loss: 4209.285156, avg loss: 2.450108, ppl: 11.589596 +epoch: 2, batch: 11091, sum loss: 3999.900635, avg loss: 2.498377, ppl: 12.162733 +epoch: 2, batch: 11092, sum loss: 4010.822510, avg loss: 2.538495, ppl: 12.660606 +epoch: 2, batch: 11093, sum loss: 4187.856934, avg loss: 2.497231, ppl: 12.148810 +epoch: 2, batch: 11094, sum loss: 4209.895508, avg loss: 2.400168, ppl: 11.025033 +epoch: 2, batch: 11095, sum loss: 4482.312012, avg loss: 2.639760, ppl: 14.009838 +epoch: 2, batch: 11096, sum loss: 4889.090332, avg loss: 2.410794, ppl: 11.142805 +epoch: 2, batch: 11097, sum loss: 3336.122803, avg loss: 2.289721, ppl: 9.872178 +epoch: 2, batch: 11098, sum loss: 4365.303223, avg loss: 2.535019, ppl: 12.616673 +epoch: 2, batch: 11099, sum loss: 4268.227539, avg loss: 2.460073, ppl: 11.705671 +epoch: 2, batch: 11100, sum loss: 4103.853027, avg loss: 2.448600, ppl: 11.572132 +epoch: 2, batch: 11101, sum loss: 4961.135742, avg loss: 2.486785, ppl: 12.022558 +epoch: 2, batch: 11102, sum loss: 4138.869629, avg loss: 2.564355, ppl: 12.992281 +epoch: 2, batch: 11103, sum loss: 4193.327637, avg loss: 2.445089, ppl: 11.531574 +epoch: 2, batch: 11104, sum loss: 4154.455566, avg loss: 2.406985, ppl: 11.100438 +epoch: 2, batch: 11105, sum loss: 4019.543457, avg loss: 2.521671, ppl: 12.449380 +epoch: 2, batch: 11106, sum loss: 4304.389160, avg loss: 2.691926, ppl: 14.760070 +epoch: 2, batch: 11107, sum loss: 4550.981445, avg loss: 2.661393, ppl: 14.316214 +epoch: 2, batch: 11108, sum loss: 4177.442871, avg loss: 2.411918, ppl: 11.155341 +epoch: 2, batch: 11109, sum loss: 4260.423828, avg loss: 2.642943, ppl: 14.054503 +epoch: 2, batch: 11110, sum loss: 4561.535645, avg loss: 2.681679, ppl: 14.609599 +epoch: 2, batch: 11111, sum loss: 4413.706055, avg loss: 2.446622, ppl: 11.549266 +epoch: 2, batch: 11112, sum loss: 4147.940430, avg loss: 2.464611, ppl: 11.758907 +epoch: 2, batch: 11113, sum loss: 4840.319824, avg loss: 2.669785, ppl: 14.436865 +epoch: 2, batch: 11114, sum loss: 4620.169434, avg loss: 2.786592, ppl: 16.225628 +epoch: 2, batch: 11115, sum loss: 3593.416016, avg loss: 2.464620, ppl: 11.759014 +epoch: 2, batch: 11116, sum loss: 4068.986328, avg loss: 2.407684, ppl: 11.108206 +epoch: 2, batch: 11117, sum loss: 4712.273926, avg loss: 2.754105, ppl: 15.706982 +epoch: 2, batch: 11118, sum loss: 4125.631348, avg loss: 2.514096, ppl: 12.355432 +epoch: 2, batch: 11119, sum loss: 5132.025879, avg loss: 3.002941, ppl: 20.144693 +epoch: 2, batch: 11120, sum loss: 4523.915527, avg loss: 2.531570, ppl: 12.573230 +epoch: 2, batch: 11121, sum loss: 3786.006836, avg loss: 2.400765, ppl: 11.031617 +epoch: 2, batch: 11122, sum loss: 5485.414551, avg loss: 2.764826, ppl: 15.876274 +epoch: 2, batch: 11123, sum loss: 4672.150391, avg loss: 2.722698, ppl: 15.221341 +epoch: 2, batch: 11124, sum loss: 4263.593262, avg loss: 2.412899, ppl: 11.166291 +epoch: 2, batch: 11125, sum loss: 4674.772949, avg loss: 2.536502, ppl: 12.635393 +epoch: 2, batch: 11126, sum loss: 4602.589844, avg loss: 2.533071, ppl: 12.592117 +epoch: 2, batch: 11127, sum loss: 4004.299316, avg loss: 2.377850, ppl: 10.781698 +epoch: 2, batch: 11128, sum loss: 4814.597656, avg loss: 2.516779, ppl: 12.388625 +epoch: 2, batch: 11129, sum loss: 5067.031250, avg loss: 2.617268, ppl: 13.698250 +epoch: 2, batch: 11130, sum loss: 4709.877441, avg loss: 2.668486, ppl: 14.418122 +epoch: 2, batch: 11131, sum loss: 4354.517090, avg loss: 2.621624, ppl: 13.758045 +epoch: 2, batch: 11132, sum loss: 4265.542480, avg loss: 2.574256, ppl: 13.121553 +epoch: 2, batch: 11133, sum loss: 4721.386719, avg loss: 2.645035, ppl: 14.083935 +epoch: 2, batch: 11134, sum loss: 4045.333008, avg loss: 2.447267, ppl: 11.556723 +epoch: 2, batch: 11135, sum loss: 5052.730469, avg loss: 2.567445, ppl: 13.032478 +epoch: 2, batch: 11136, sum loss: 5546.601562, avg loss: 2.724264, ppl: 15.245192 +epoch: 2, batch: 11137, sum loss: 3823.352295, avg loss: 2.427525, ppl: 11.330807 +epoch: 2, batch: 11138, sum loss: 4257.503418, avg loss: 2.498535, ppl: 12.164659 +epoch: 2, batch: 11139, sum loss: 5360.291992, avg loss: 2.852737, ppl: 17.335157 +epoch: 2, batch: 11140, sum loss: 3924.619629, avg loss: 2.581987, ppl: 13.223383 +epoch: 2, batch: 11141, sum loss: 5250.397461, avg loss: 2.783880, ppl: 16.181684 +epoch: 2, batch: 11142, sum loss: 4373.253418, avg loss: 2.757411, ppl: 15.758986 +epoch: 2, batch: 11143, sum loss: 3987.313232, avg loss: 2.345479, ppl: 10.438267 +epoch: 2, batch: 11144, sum loss: 4009.697998, avg loss: 2.441960, ppl: 11.495548 +epoch: 2, batch: 11145, sum loss: 4278.553711, avg loss: 2.315235, ppl: 10.127299 +epoch: 2, batch: 11146, sum loss: 4367.465332, avg loss: 2.520176, ppl: 12.430786 +epoch: 2, batch: 11147, sum loss: 4057.068848, avg loss: 2.562899, ppl: 12.973368 +epoch: 2, batch: 11148, sum loss: 4768.861816, avg loss: 2.559775, ppl: 12.932912 +epoch: 2, batch: 11149, sum loss: 4359.584961, avg loss: 2.464435, ppl: 11.756836 +epoch: 2, batch: 11150, sum loss: 4863.226562, avg loss: 2.679464, ppl: 14.577276 +epoch: 2, batch: 11151, sum loss: 3772.541016, avg loss: 2.293338, ppl: 9.907953 +epoch: 2, batch: 11152, sum loss: 3856.733643, avg loss: 2.338832, ppl: 10.369118 +epoch: 2, batch: 11153, sum loss: 5234.944336, avg loss: 2.812974, ppl: 16.659389 +epoch: 2, batch: 11154, sum loss: 3997.644043, avg loss: 2.322861, ppl: 10.204830 +epoch: 2, batch: 11155, sum loss: 5247.261719, avg loss: 2.669004, ppl: 14.425593 +epoch: 2, batch: 11156, sum loss: 3701.123291, avg loss: 2.487314, ppl: 12.028923 +epoch: 2, batch: 11157, sum loss: 3877.210938, avg loss: 2.323074, ppl: 10.207006 +epoch: 2, batch: 11158, sum loss: 4829.159180, avg loss: 2.829033, ppl: 16.929081 +epoch: 2, batch: 11159, sum loss: 4265.526855, avg loss: 2.578916, ppl: 13.182841 +epoch: 2, batch: 11160, sum loss: 4551.181641, avg loss: 2.416984, ppl: 11.211996 +epoch: 2, batch: 11161, sum loss: 4186.948242, avg loss: 2.618479, ppl: 13.714848 +epoch: 2, batch: 11162, sum loss: 4008.848145, avg loss: 2.273879, ppl: 9.717016 +epoch: 2, batch: 11163, sum loss: 4521.136230, avg loss: 2.797733, ppl: 16.407406 +epoch: 2, batch: 11164, sum loss: 3586.203125, avg loss: 2.339337, ppl: 10.374352 +epoch: 2, batch: 11165, sum loss: 3745.522705, avg loss: 2.244172, ppl: 9.432601 +epoch: 2, batch: 11166, sum loss: 3922.263672, avg loss: 2.387257, ppl: 10.883600 +epoch: 2, batch: 11167, sum loss: 5717.060059, avg loss: 2.768552, ppl: 15.935544 +epoch: 2, batch: 11168, sum loss: 4483.647461, avg loss: 2.730602, ppl: 15.342113 +epoch: 2, batch: 11169, sum loss: 4459.602539, avg loss: 2.546889, ppl: 12.767323 +epoch: 2, batch: 11170, sum loss: 4704.352539, avg loss: 2.719279, ppl: 15.169378 +epoch: 2, batch: 11171, sum loss: 4898.676758, avg loss: 2.681268, ppl: 14.603599 +epoch: 2, batch: 11172, sum loss: 4648.085449, avg loss: 2.562340, ppl: 12.966129 +epoch: 2, batch: 11173, sum loss: 4490.354004, avg loss: 2.698530, ppl: 14.857878 +epoch: 2, batch: 11174, sum loss: 4027.230469, avg loss: 2.573310, ppl: 13.109146 +epoch: 2, batch: 11175, sum loss: 4853.571289, avg loss: 2.526586, ppl: 12.510719 +epoch: 2, batch: 11176, sum loss: 5065.124512, avg loss: 2.724650, ppl: 15.251074 +epoch: 2, batch: 11177, sum loss: 4083.154297, avg loss: 2.545608, ppl: 12.750973 +epoch: 2, batch: 11178, sum loss: 4925.707031, avg loss: 2.835755, ppl: 17.043268 +epoch: 2, batch: 11179, sum loss: 3807.749512, avg loss: 2.314741, ppl: 10.122302 +epoch: 2, batch: 11180, sum loss: 4968.560059, avg loss: 2.745061, ppl: 15.565562 +epoch: 2, batch: 11181, sum loss: 3758.956055, avg loss: 2.516035, ppl: 12.379413 +epoch: 2, batch: 11182, sum loss: 3663.145752, avg loss: 2.209376, ppl: 9.110033 +epoch: 2, batch: 11183, sum loss: 3796.233887, avg loss: 2.486073, ppl: 12.014008 +epoch: 2, batch: 11184, sum loss: 3968.386719, avg loss: 2.643829, ppl: 14.066957 +epoch: 2, batch: 11185, sum loss: 4355.351562, avg loss: 2.387802, ppl: 10.889536 +epoch: 2, batch: 11186, sum loss: 4748.628906, avg loss: 2.725964, ppl: 15.271126 +epoch: 2, batch: 11187, sum loss: 3704.693359, avg loss: 2.511657, ppl: 12.325331 +epoch: 2, batch: 11188, sum loss: 3771.614990, avg loss: 2.501071, ppl: 12.195548 +epoch: 2, batch: 11189, sum loss: 4117.227051, avg loss: 2.632498, ppl: 13.908470 +epoch: 2, batch: 11190, sum loss: 4341.263672, avg loss: 2.629475, ppl: 13.866493 +epoch: 2, batch: 11191, sum loss: 3671.588623, avg loss: 2.222511, ppl: 9.230482 +epoch: 2, batch: 11192, sum loss: 4246.632812, avg loss: 2.483411, ppl: 11.982066 +epoch: 2, batch: 11193, sum loss: 5474.516602, avg loss: 2.723640, ppl: 15.235682 +epoch: 2, batch: 11194, sum loss: 4097.410645, avg loss: 2.562483, ppl: 12.967978 +epoch: 2, batch: 11195, sum loss: 4590.877930, avg loss: 2.583499, ppl: 13.243399 +epoch: 2, batch: 11196, sum loss: 5028.250488, avg loss: 2.555006, ppl: 12.871371 +epoch: 2, batch: 11197, sum loss: 4981.666992, avg loss: 2.619173, ppl: 13.724369 +epoch: 2, batch: 11198, sum loss: 4151.681641, avg loss: 2.516171, ppl: 12.381095 +epoch: 2, batch: 11199, sum loss: 4189.117188, avg loss: 2.611669, ppl: 13.621767 +epoch: 2, batch: 11200, sum loss: 4682.691895, avg loss: 2.702073, ppl: 14.910604 +epoch: 2, batch: 11201, sum loss: 4232.775391, avg loss: 2.492801, ppl: 12.095103 +epoch: 2, batch: 11202, sum loss: 5657.813965, avg loss: 2.976230, ppl: 19.613741 +epoch: 2, batch: 11203, sum loss: 4348.842773, avg loss: 2.450052, ppl: 11.588949 +epoch: 2, batch: 11204, sum loss: 4503.606934, avg loss: 2.700004, ppl: 14.879793 +epoch: 2, batch: 11205, sum loss: 3992.273193, avg loss: 2.399203, ppl: 11.014393 +epoch: 2, batch: 11206, sum loss: 3756.149414, avg loss: 2.180006, ppl: 8.846355 +epoch: 2, batch: 11207, sum loss: 5074.142090, avg loss: 2.706209, ppl: 14.972410 +epoch: 2, batch: 11208, sum loss: 4581.361328, avg loss: 2.854431, ppl: 17.364548 +epoch: 2, batch: 11209, sum loss: 3980.352295, avg loss: 2.486166, ppl: 12.015127 +epoch: 2, batch: 11210, sum loss: 4254.878906, avg loss: 2.362509, ppl: 10.617558 +epoch: 2, batch: 11211, sum loss: 5069.139160, avg loss: 2.841446, ppl: 17.140532 +epoch: 2, batch: 11212, sum loss: 5025.496094, avg loss: 2.853774, ppl: 17.353151 +epoch: 2, batch: 11213, sum loss: 4240.535645, avg loss: 2.595187, ppl: 13.399096 +epoch: 2, batch: 11214, sum loss: 5250.379395, avg loss: 2.556173, ppl: 12.886408 +epoch: 2, batch: 11215, sum loss: 4625.013672, avg loss: 2.659582, ppl: 14.290319 +epoch: 2, batch: 11216, sum loss: 4341.808105, avg loss: 2.582872, ppl: 13.235097 +epoch: 2, batch: 11217, sum loss: 5494.916016, avg loss: 2.938458, ppl: 18.886696 +epoch: 2, batch: 11218, sum loss: 4253.470703, avg loss: 2.571627, ppl: 13.087098 +epoch: 2, batch: 11219, sum loss: 4079.815918, avg loss: 2.564309, ppl: 12.991679 +epoch: 2, batch: 11220, sum loss: 4365.574707, avg loss: 2.609429, ppl: 13.591290 +epoch: 2, batch: 11221, sum loss: 4585.161133, avg loss: 2.686093, ppl: 14.674236 +epoch: 2, batch: 11222, sum loss: 3907.656006, avg loss: 2.298621, ppl: 9.960442 +epoch: 2, batch: 11223, sum loss: 5156.569336, avg loss: 2.685713, ppl: 14.668661 +epoch: 2, batch: 11224, sum loss: 4619.399414, avg loss: 2.687260, ppl: 14.691365 +epoch: 2, batch: 11225, sum loss: 3903.733887, avg loss: 2.270933, ppl: 9.688437 +epoch: 2, batch: 11226, sum loss: 4210.059570, avg loss: 2.717921, ppl: 15.148795 +epoch: 2, batch: 11227, sum loss: 4016.510742, avg loss: 2.488544, ppl: 12.043734 +epoch: 2, batch: 11228, sum loss: 4097.040039, avg loss: 2.478548, ppl: 11.923939 +epoch: 2, batch: 11229, sum loss: 4134.463379, avg loss: 2.651997, ppl: 14.182334 +epoch: 2, batch: 11230, sum loss: 3890.601074, avg loss: 2.487597, ppl: 12.032322 +epoch: 2, batch: 11231, sum loss: 5261.630859, avg loss: 2.780989, ppl: 16.134966 +epoch: 2, batch: 11232, sum loss: 3892.204346, avg loss: 2.495003, ppl: 12.121767 +epoch: 2, batch: 11233, sum loss: 5367.151855, avg loss: 2.939295, ppl: 18.902508 +epoch: 2, batch: 11234, sum loss: 4520.817871, avg loss: 2.619246, ppl: 13.725365 +epoch: 2, batch: 11235, sum loss: 4415.033203, avg loss: 2.424510, ppl: 11.296696 +epoch: 2, batch: 11236, sum loss: 4758.356445, avg loss: 2.889105, ppl: 17.977215 +epoch: 2, batch: 11237, sum loss: 4438.581055, avg loss: 2.645162, ppl: 14.085722 +epoch: 2, batch: 11238, sum loss: 3608.330566, avg loss: 2.326454, ppl: 10.241562 +epoch: 2, batch: 11239, sum loss: 4110.099121, avg loss: 2.540234, ppl: 12.682643 +epoch: 2, batch: 11240, sum loss: 3803.138184, avg loss: 2.452055, ppl: 11.612190 +epoch: 2, batch: 11241, sum loss: 4630.631348, avg loss: 2.830459, ppl: 16.953247 +epoch: 2, batch: 11242, sum loss: 5596.890137, avg loss: 2.762532, ppl: 15.839903 +epoch: 2, batch: 11243, sum loss: 4762.101562, avg loss: 2.569942, ppl: 13.065061 +epoch: 2, batch: 11244, sum loss: 3449.358643, avg loss: 2.299572, ppl: 9.969919 +epoch: 2, batch: 11245, sum loss: 4756.987793, avg loss: 2.518257, ppl: 12.406954 +epoch: 2, batch: 11246, sum loss: 4045.455566, avg loss: 2.684443, ppl: 14.650039 +epoch: 2, batch: 11247, sum loss: 3482.705811, avg loss: 2.295785, ppl: 9.932229 +epoch: 2, batch: 11248, sum loss: 4546.928223, avg loss: 2.490103, ppl: 12.062521 +epoch: 2, batch: 11249, sum loss: 4741.211914, avg loss: 2.587998, ppl: 13.303111 +epoch: 2, batch: 11250, sum loss: 4704.520020, avg loss: 2.536129, ppl: 12.630689 +epoch: 2, batch: 11251, sum loss: 3926.115234, avg loss: 2.392514, ppl: 10.940962 +epoch: 2, batch: 11252, sum loss: 4921.515137, avg loss: 2.797905, ppl: 16.410234 +epoch: 2, batch: 11253, sum loss: 4394.905273, avg loss: 2.644347, ppl: 14.074258 +epoch: 2, batch: 11254, sum loss: 3694.721191, avg loss: 2.110064, ppl: 8.248766 +epoch: 2, batch: 11255, sum loss: 5253.942383, avg loss: 2.736428, ppl: 15.431772 +epoch: 2, batch: 11256, sum loss: 3210.269043, avg loss: 2.217037, ppl: 9.180087 +epoch: 2, batch: 11257, sum loss: 3999.828369, avg loss: 2.478209, ppl: 11.919891 +epoch: 2, batch: 11258, sum loss: 3834.296387, avg loss: 2.178577, ppl: 8.833731 +epoch: 2, batch: 11259, sum loss: 4480.047852, avg loss: 2.627594, ppl: 13.840433 +epoch: 2, batch: 11260, sum loss: 3221.907471, avg loss: 2.044358, ppl: 7.724195 +epoch: 2, batch: 11261, sum loss: 5006.341309, avg loss: 2.717884, ppl: 15.148229 +epoch: 2, batch: 11262, sum loss: 3872.841309, avg loss: 2.420526, ppl: 11.251774 +epoch: 2, batch: 11263, sum loss: 4709.666016, avg loss: 2.548521, ppl: 12.788171 +epoch: 2, batch: 11264, sum loss: 4320.563477, avg loss: 2.475968, ppl: 11.893210 +epoch: 2, batch: 11265, sum loss: 3303.352539, avg loss: 2.361224, ppl: 10.603925 +epoch: 2, batch: 11266, sum loss: 3773.636719, avg loss: 2.359998, ppl: 10.590930 +epoch: 2, batch: 11267, sum loss: 4289.125000, avg loss: 2.628140, ppl: 13.847995 +epoch: 2, batch: 11268, sum loss: 4185.742676, avg loss: 2.442090, ppl: 11.497047 +epoch: 2, batch: 11269, sum loss: 4732.502930, avg loss: 2.418244, ppl: 11.226125 +epoch: 2, batch: 11270, sum loss: 4184.452148, avg loss: 2.480410, ppl: 11.946165 +epoch: 2, batch: 11271, sum loss: 4726.874023, avg loss: 2.538600, ppl: 12.661938 +epoch: 2, batch: 11272, sum loss: 4163.849609, avg loss: 2.632016, ppl: 13.901767 +epoch: 2, batch: 11273, sum loss: 3996.671387, avg loss: 2.516796, ppl: 12.388835 +epoch: 2, batch: 11274, sum loss: 3918.158447, avg loss: 2.418617, ppl: 11.230311 +epoch: 2, batch: 11275, sum loss: 4079.006836, avg loss: 2.494805, ppl: 12.119374 +epoch: 2, batch: 11276, sum loss: 4764.355469, avg loss: 2.597795, ppl: 13.434077 +epoch: 2, batch: 11277, sum loss: 4992.152832, avg loss: 2.596023, ppl: 13.410303 +epoch: 2, batch: 11278, sum loss: 3994.116455, avg loss: 2.257839, ppl: 9.562400 +epoch: 2, batch: 11279, sum loss: 4362.835449, avg loss: 2.365963, ppl: 10.654291 +epoch: 2, batch: 11280, sum loss: 4831.843750, avg loss: 2.697847, ppl: 14.847725 +epoch: 2, batch: 11281, sum loss: 5031.912598, avg loss: 2.725847, ppl: 15.269335 +epoch: 2, batch: 11282, sum loss: 4333.529785, avg loss: 2.383680, ppl: 10.844734 +epoch: 2, batch: 11283, sum loss: 4089.121338, avg loss: 2.394099, ppl: 10.958320 +epoch: 2, batch: 11284, sum loss: 4531.088867, avg loss: 2.674787, ppl: 14.509256 +epoch: 2, batch: 11285, sum loss: 5256.248047, avg loss: 2.759185, ppl: 15.786976 +epoch: 2, batch: 11286, sum loss: 4365.638672, avg loss: 2.738795, ppl: 15.468328 +epoch: 2, batch: 11287, sum loss: 4153.196777, avg loss: 2.433038, ppl: 11.393448 +epoch: 2, batch: 11288, sum loss: 4713.912109, avg loss: 2.471900, ppl: 11.844925 +epoch: 2, batch: 11289, sum loss: 4153.664062, avg loss: 2.274734, ppl: 9.725332 +epoch: 2, batch: 11290, sum loss: 4995.070312, avg loss: 2.647096, ppl: 14.112994 +epoch: 2, batch: 11291, sum loss: 3857.221436, avg loss: 2.190358, ppl: 8.938416 +epoch: 2, batch: 11292, sum loss: 5072.189453, avg loss: 2.825732, ppl: 16.873301 +epoch: 2, batch: 11293, sum loss: 5285.162598, avg loss: 2.665236, ppl: 14.371337 +epoch: 2, batch: 11294, sum loss: 5032.165039, avg loss: 2.749817, ppl: 15.639771 +epoch: 2, batch: 11295, sum loss: 4351.703125, avg loss: 2.559825, ppl: 12.933559 +epoch: 2, batch: 11296, sum loss: 3958.760254, avg loss: 2.273843, ppl: 9.716668 +epoch: 2, batch: 11297, sum loss: 4494.606445, avg loss: 2.634588, ppl: 13.937562 +epoch: 2, batch: 11298, sum loss: 4392.600098, avg loss: 2.887969, ppl: 17.956795 +epoch: 2, batch: 11299, sum loss: 4647.918945, avg loss: 2.556611, ppl: 12.892053 +epoch: 2, batch: 11300, sum loss: 4367.259277, avg loss: 2.556943, ppl: 12.896336 +epoch: 2, batch: 11301, sum loss: 3925.665527, avg loss: 2.632908, ppl: 13.914172 +epoch: 2, batch: 11302, sum loss: 4020.622070, avg loss: 2.471187, ppl: 11.836493 +epoch: 2, batch: 11303, sum loss: 4059.416016, avg loss: 2.476764, ppl: 11.902681 +epoch: 2, batch: 11304, sum loss: 4109.794434, avg loss: 2.478766, ppl: 11.926540 +epoch: 2, batch: 11305, sum loss: 4086.104492, avg loss: 2.458547, ppl: 11.687813 +epoch: 2, batch: 11306, sum loss: 4722.900879, avg loss: 2.657794, ppl: 14.264793 +epoch: 2, batch: 11307, sum loss: 4285.479004, avg loss: 2.537288, ppl: 12.645327 +epoch: 2, batch: 11308, sum loss: 4117.736816, avg loss: 2.483557, ppl: 11.983809 +epoch: 2, batch: 11309, sum loss: 4806.122070, avg loss: 2.694015, ppl: 14.790939 +epoch: 2, batch: 11310, sum loss: 4068.197998, avg loss: 2.330010, ppl: 10.278049 +epoch: 2, batch: 11311, sum loss: 4631.610352, avg loss: 2.599108, ppl: 13.451734 +epoch: 2, batch: 11312, sum loss: 4635.170898, avg loss: 2.723367, ppl: 15.231524 +epoch: 2, batch: 11313, sum loss: 3546.260254, avg loss: 2.392888, ppl: 10.945061 +epoch: 2, batch: 11314, sum loss: 4019.576904, avg loss: 2.556983, ppl: 12.896846 +epoch: 2, batch: 11315, sum loss: 3989.459961, avg loss: 2.423730, ppl: 11.287889 +epoch: 2, batch: 11316, sum loss: 3705.384277, avg loss: 2.418658, ppl: 11.230781 +epoch: 2, batch: 11317, sum loss: 4237.007812, avg loss: 2.443488, ppl: 11.513124 +epoch: 2, batch: 11318, sum loss: 3537.364014, avg loss: 2.345732, ppl: 10.440912 +epoch: 2, batch: 11319, sum loss: 4202.382812, avg loss: 2.510384, ppl: 12.309657 +epoch: 2, batch: 11320, sum loss: 4702.248047, avg loss: 2.587919, ppl: 13.302054 +epoch: 2, batch: 11321, sum loss: 5677.885254, avg loss: 2.969605, ppl: 19.484226 +epoch: 2, batch: 11322, sum loss: 4263.341797, avg loss: 2.418231, ppl: 11.225986 +epoch: 2, batch: 11323, sum loss: 5063.278320, avg loss: 2.795847, ppl: 16.376493 +epoch: 2, batch: 11324, sum loss: 4361.921875, avg loss: 2.531585, ppl: 12.573425 +epoch: 2, batch: 11325, sum loss: 5026.208496, avg loss: 2.700811, ppl: 14.891799 +epoch: 2, batch: 11326, sum loss: 4377.521973, avg loss: 2.707187, ppl: 14.987063 +epoch: 2, batch: 11327, sum loss: 4993.758789, avg loss: 2.757459, ppl: 15.759753 +epoch: 2, batch: 11328, sum loss: 4420.906250, avg loss: 2.680962, ppl: 14.599129 +epoch: 2, batch: 11329, sum loss: 3662.955566, avg loss: 2.469963, ppl: 11.822013 +epoch: 2, batch: 11330, sum loss: 4401.955078, avg loss: 2.537150, ppl: 12.643584 +epoch: 2, batch: 11331, sum loss: 4229.037598, avg loss: 2.604087, ppl: 13.518879 +epoch: 2, batch: 11332, sum loss: 4506.223145, avg loss: 2.484136, ppl: 11.990760 +epoch: 2, batch: 11333, sum loss: 4658.836914, avg loss: 2.495360, ppl: 12.126097 +epoch: 2, batch: 11334, sum loss: 4326.931152, avg loss: 2.674247, ppl: 14.501420 +epoch: 2, batch: 11335, sum loss: 4324.864258, avg loss: 2.467122, ppl: 11.788469 +epoch: 2, batch: 11336, sum loss: 4157.963867, avg loss: 2.417421, ppl: 11.216892 +epoch: 2, batch: 11337, sum loss: 3754.834229, avg loss: 2.542203, ppl: 12.707641 +epoch: 2, batch: 11338, sum loss: 4550.141602, avg loss: 2.526453, ppl: 12.509055 +epoch: 2, batch: 11339, sum loss: 4642.456055, avg loss: 2.512151, ppl: 12.331432 +epoch: 2, batch: 11340, sum loss: 4415.929199, avg loss: 2.530618, ppl: 12.561272 +epoch: 2, batch: 11341, sum loss: 5250.178223, avg loss: 2.733044, ppl: 15.379638 +epoch: 2, batch: 11342, sum loss: 3982.732422, avg loss: 2.696501, ppl: 14.827765 +epoch: 2, batch: 11343, sum loss: 4450.616699, avg loss: 2.489159, ppl: 12.051141 +epoch: 2, batch: 11344, sum loss: 4127.667969, avg loss: 2.438079, ppl: 11.451023 +epoch: 2, batch: 11345, sum loss: 4566.833984, avg loss: 2.496902, ppl: 12.144814 +epoch: 2, batch: 11346, sum loss: 4657.543945, avg loss: 2.826180, ppl: 16.880844 +epoch: 2, batch: 11347, sum loss: 3811.918457, avg loss: 2.744362, ppl: 15.554681 +epoch: 2, batch: 11348, sum loss: 4621.317383, avg loss: 2.849148, ppl: 17.273052 +epoch: 2, batch: 11349, sum loss: 4218.295898, avg loss: 2.565873, ppl: 13.012018 +epoch: 2, batch: 11350, sum loss: 4723.929199, avg loss: 2.652403, ppl: 14.188090 +epoch: 2, batch: 11351, sum loss: 4750.018555, avg loss: 2.779414, ppl: 16.109581 +epoch: 2, batch: 11352, sum loss: 4937.941895, avg loss: 2.581255, ppl: 13.213717 +epoch: 2, batch: 11353, sum loss: 4607.809082, avg loss: 2.728129, ppl: 15.304221 +epoch: 2, batch: 11354, sum loss: 3613.977783, avg loss: 2.351319, ppl: 10.499413 +epoch: 2, batch: 11355, sum loss: 4118.809082, avg loss: 2.473759, ppl: 11.866973 +epoch: 2, batch: 11356, sum loss: 3644.219238, avg loss: 2.675638, ppl: 14.521615 +epoch: 2, batch: 11357, sum loss: 3766.020996, avg loss: 2.294955, ppl: 9.923987 +epoch: 2, batch: 11358, sum loss: 3928.868408, avg loss: 2.683653, ppl: 14.638472 +epoch: 2, batch: 11359, sum loss: 3783.838623, avg loss: 2.373801, ppl: 10.738128 +epoch: 2, batch: 11360, sum loss: 3929.093994, avg loss: 2.358400, ppl: 10.574016 +epoch: 2, batch: 11361, sum loss: 4112.354004, avg loss: 2.403480, ppl: 11.061604 +epoch: 2, batch: 11362, sum loss: 4890.583008, avg loss: 2.652160, ppl: 14.184644 +epoch: 2, batch: 11363, sum loss: 4712.640137, avg loss: 2.783603, ppl: 16.177206 +epoch: 2, batch: 11364, sum loss: 4468.166992, avg loss: 2.669156, ppl: 14.427788 +epoch: 2, batch: 11365, sum loss: 4515.513672, avg loss: 2.625299, ppl: 13.808699 +epoch: 2, batch: 11366, sum loss: 4391.613281, avg loss: 2.311376, ppl: 10.088293 +epoch: 2, batch: 11367, sum loss: 5676.802734, avg loss: 2.801976, ppl: 16.477169 +epoch: 2, batch: 11368, sum loss: 3549.843506, avg loss: 2.526579, ppl: 12.510635 +epoch: 2, batch: 11369, sum loss: 4086.066406, avg loss: 2.553792, ppl: 12.855755 +epoch: 2, batch: 11370, sum loss: 3861.067383, avg loss: 2.717148, ppl: 15.137091 +epoch: 2, batch: 11371, sum loss: 5328.302246, avg loss: 2.785312, ppl: 16.204876 +epoch: 2, batch: 11372, sum loss: 3914.648438, avg loss: 2.471369, ppl: 11.838643 +epoch: 2, batch: 11373, sum loss: 4105.627441, avg loss: 2.435129, ppl: 11.417291 +epoch: 2, batch: 11374, sum loss: 3558.650879, avg loss: 2.380368, ppl: 10.808885 +epoch: 2, batch: 11375, sum loss: 4962.803711, avg loss: 2.805429, ppl: 16.534166 +epoch: 2, batch: 11376, sum loss: 5080.007812, avg loss: 2.766889, ppl: 15.909058 +epoch: 2, batch: 11377, sum loss: 4317.489258, avg loss: 2.510168, ppl: 12.306998 +epoch: 2, batch: 11378, sum loss: 4263.946289, avg loss: 2.459023, ppl: 11.693384 +epoch: 2, batch: 11379, sum loss: 5060.520020, avg loss: 2.646715, ppl: 14.107625 +epoch: 2, batch: 11380, sum loss: 4697.428711, avg loss: 2.561302, ppl: 12.952676 +epoch: 2, batch: 11381, sum loss: 4699.320801, avg loss: 2.446289, ppl: 11.545420 +epoch: 2, batch: 11382, sum loss: 4545.803711, avg loss: 2.629152, ppl: 13.862011 +epoch: 2, batch: 11383, sum loss: 4748.793945, avg loss: 2.841888, ppl: 17.148106 +epoch: 2, batch: 11384, sum loss: 4691.452148, avg loss: 2.647547, ppl: 14.119354 +epoch: 2, batch: 11385, sum loss: 5509.252441, avg loss: 2.836896, ppl: 17.062727 +epoch: 2, batch: 11386, sum loss: 3607.607422, avg loss: 2.392313, ppl: 10.938761 +epoch: 2, batch: 11387, sum loss: 4322.480469, avg loss: 2.669846, ppl: 14.437743 +epoch: 2, batch: 11388, sum loss: 3930.833984, avg loss: 2.484724, ppl: 11.997812 +epoch: 2, batch: 11389, sum loss: 4160.844727, avg loss: 2.443244, ppl: 11.510323 +epoch: 2, batch: 11390, sum loss: 5440.484863, avg loss: 2.788562, ppl: 16.257629 +epoch: 2, batch: 11391, sum loss: 3916.614014, avg loss: 2.493071, ppl: 12.098372 +epoch: 2, batch: 11392, sum loss: 4527.389160, avg loss: 2.597469, ppl: 13.429708 +epoch: 2, batch: 11393, sum loss: 5094.469238, avg loss: 2.831834, ppl: 16.976564 +epoch: 2, batch: 11394, sum loss: 5054.672363, avg loss: 2.563221, ppl: 12.977553 +epoch: 2, batch: 11395, sum loss: 4338.864746, avg loss: 2.541807, ppl: 12.702606 +epoch: 2, batch: 11396, sum loss: 4280.686523, avg loss: 2.471528, ppl: 11.840526 +epoch: 2, batch: 11397, sum loss: 4577.542969, avg loss: 2.473011, ppl: 11.858095 +epoch: 2, batch: 11398, sum loss: 3827.699707, avg loss: 2.315608, ppl: 10.131079 +epoch: 2, batch: 11399, sum loss: 3484.808838, avg loss: 2.209771, ppl: 9.113628 +epoch: 2, batch: 11400, sum loss: 4674.807617, avg loss: 2.724247, ppl: 15.244930 +epoch: 2, batch: 11401, sum loss: 4766.913086, avg loss: 2.554616, ppl: 12.866355 +epoch: 2, batch: 11402, sum loss: 4375.775391, avg loss: 2.422910, ppl: 11.278632 +epoch: 2, batch: 11403, sum loss: 4168.608398, avg loss: 2.369874, ppl: 10.696045 +epoch: 2, batch: 11404, sum loss: 4468.011230, avg loss: 2.520029, ppl: 12.428958 +epoch: 2, batch: 11405, sum loss: 4864.858887, avg loss: 2.736141, ppl: 15.427340 +epoch: 2, batch: 11406, sum loss: 3949.425781, avg loss: 2.559576, ppl: 12.930334 +epoch: 2, batch: 11407, sum loss: 4822.211914, avg loss: 2.589802, ppl: 13.327136 +epoch: 2, batch: 11408, sum loss: 4853.842773, avg loss: 2.830229, ppl: 16.949339 +epoch: 2, batch: 11409, sum loss: 4092.015137, avg loss: 2.619728, ppl: 13.731989 +epoch: 2, batch: 11410, sum loss: 4308.588379, avg loss: 2.592412, ppl: 13.361959 +epoch: 2, batch: 11411, sum loss: 3981.121094, avg loss: 2.431962, ppl: 11.381185 +epoch: 2, batch: 11412, sum loss: 4485.007812, avg loss: 2.616691, ppl: 13.690342 +epoch: 2, batch: 11413, sum loss: 4433.505859, avg loss: 2.607945, ppl: 13.571130 +epoch: 2, batch: 11414, sum loss: 4063.581055, avg loss: 2.536567, ppl: 12.636222 +epoch: 2, batch: 11415, sum loss: 3449.063721, avg loss: 2.415311, ppl: 11.193250 +epoch: 2, batch: 11416, sum loss: 4829.608398, avg loss: 2.607780, ppl: 13.568891 +epoch: 2, batch: 11417, sum loss: 4547.744141, avg loss: 2.569347, ppl: 13.057297 +epoch: 2, batch: 11418, sum loss: 3822.776367, avg loss: 2.329541, ppl: 10.273222 +epoch: 2, batch: 11419, sum loss: 3509.809082, avg loss: 2.461297, ppl: 11.719997 +epoch: 2, batch: 11420, sum loss: 4414.364746, avg loss: 2.783332, ppl: 16.172821 +epoch: 2, batch: 11421, sum loss: 4321.055176, avg loss: 2.590561, ppl: 13.337247 +epoch: 2, batch: 11422, sum loss: 4231.426758, avg loss: 2.404220, ppl: 11.069788 +epoch: 2, batch: 11423, sum loss: 4052.994629, avg loss: 2.578241, ppl: 13.173944 +epoch: 2, batch: 11424, sum loss: 4580.752441, avg loss: 2.537813, ppl: 12.651970 +epoch: 2, batch: 11425, sum loss: 5012.318848, avg loss: 2.804879, ppl: 16.525080 +epoch: 2, batch: 11426, sum loss: 3797.156738, avg loss: 2.499774, ppl: 12.179741 +epoch: 2, batch: 11427, sum loss: 4417.141602, avg loss: 2.571095, ppl: 13.080142 +epoch: 2, batch: 11428, sum loss: 4346.823242, avg loss: 2.555452, ppl: 12.877114 +epoch: 2, batch: 11429, sum loss: 4312.970215, avg loss: 2.544525, ppl: 12.737179 +epoch: 2, batch: 11430, sum loss: 3869.987305, avg loss: 2.493549, ppl: 12.104154 +epoch: 2, batch: 11431, sum loss: 5271.495117, avg loss: 2.820490, ppl: 16.785067 +epoch: 2, batch: 11432, sum loss: 4709.334961, avg loss: 2.852414, ppl: 17.329563 +epoch: 2, batch: 11433, sum loss: 3140.246582, avg loss: 2.104723, ppl: 8.204830 +epoch: 2, batch: 11434, sum loss: 4818.911621, avg loss: 2.597796, ppl: 13.434096 +epoch: 2, batch: 11435, sum loss: 4180.535156, avg loss: 2.606319, ppl: 13.549081 +epoch: 2, batch: 11436, sum loss: 3917.461670, avg loss: 2.391613, ppl: 10.931107 +epoch: 2, batch: 11437, sum loss: 4624.839844, avg loss: 2.557987, ppl: 12.909801 +epoch: 2, batch: 11438, sum loss: 5300.169434, avg loss: 2.664741, ppl: 14.364223 +epoch: 2, batch: 11439, sum loss: 4558.059082, avg loss: 2.875747, ppl: 17.738668 +epoch: 2, batch: 11440, sum loss: 4779.631836, avg loss: 2.614678, ppl: 13.662822 +epoch: 2, batch: 11441, sum loss: 4875.334961, avg loss: 2.635316, ppl: 13.947724 +epoch: 2, batch: 11442, sum loss: 4039.046143, avg loss: 2.638175, ppl: 13.987657 +epoch: 2, batch: 11443, sum loss: 3561.264160, avg loss: 2.361581, ppl: 10.607707 +epoch: 2, batch: 11444, sum loss: 4110.523926, avg loss: 2.491227, ppl: 12.076080 +epoch: 2, batch: 11445, sum loss: 4495.267578, avg loss: 2.433821, ppl: 11.402369 +epoch: 2, batch: 11446, sum loss: 3829.010986, avg loss: 2.499354, ppl: 12.174631 +epoch: 2, batch: 11447, sum loss: 4065.764404, avg loss: 2.393034, ppl: 10.946655 +epoch: 2, batch: 11448, sum loss: 3937.558350, avg loss: 2.568531, ppl: 13.046648 +epoch: 2, batch: 11449, sum loss: 4860.886230, avg loss: 2.717097, ppl: 15.136314 +epoch: 2, batch: 11450, sum loss: 4141.769531, avg loss: 2.466807, ppl: 11.784762 +epoch: 2, batch: 11451, sum loss: 4400.930664, avg loss: 2.651163, ppl: 14.170511 +epoch: 2, batch: 11452, sum loss: 3932.289795, avg loss: 2.599002, ppl: 13.450306 +epoch: 2, batch: 11453, sum loss: 3442.830322, avg loss: 2.248746, ppl: 9.475847 +epoch: 2, batch: 11454, sum loss: 4699.068359, avg loss: 2.601921, ppl: 13.489621 +epoch: 2, batch: 11455, sum loss: 4855.132812, avg loss: 2.753904, ppl: 15.703822 +epoch: 2, batch: 11456, sum loss: 3247.447754, avg loss: 2.219718, ppl: 9.204737 +epoch: 2, batch: 11457, sum loss: 4453.968750, avg loss: 2.633926, ppl: 13.928348 +epoch: 2, batch: 11458, sum loss: 5142.212891, avg loss: 2.720748, ppl: 15.191677 +epoch: 2, batch: 11459, sum loss: 4333.757812, avg loss: 2.525500, ppl: 12.497143 +epoch: 2, batch: 11460, sum loss: 5040.698242, avg loss: 2.757493, ppl: 15.760290 +epoch: 2, batch: 11461, sum loss: 4276.304688, avg loss: 2.343181, ppl: 10.414309 +epoch: 2, batch: 11462, sum loss: 3730.199463, avg loss: 2.295507, ppl: 9.929473 +epoch: 2, batch: 11463, sum loss: 4156.083008, avg loss: 2.330950, ppl: 10.287705 +epoch: 2, batch: 11464, sum loss: 3846.280518, avg loss: 2.475084, ppl: 11.882706 +epoch: 2, batch: 11465, sum loss: 4081.446777, avg loss: 2.508572, ppl: 12.287373 +epoch: 2, batch: 11466, sum loss: 3937.290771, avg loss: 2.373291, ppl: 10.732660 +epoch: 2, batch: 11467, sum loss: 5022.541504, avg loss: 2.654620, ppl: 14.219584 +epoch: 2, batch: 11468, sum loss: 3867.383057, avg loss: 2.418626, ppl: 11.230419 +epoch: 2, batch: 11469, sum loss: 4419.395508, avg loss: 2.392743, ppl: 10.943467 +epoch: 2, batch: 11470, sum loss: 3728.720215, avg loss: 2.308805, ppl: 10.062395 +epoch: 2, batch: 11471, sum loss: 5588.261230, avg loss: 2.652236, ppl: 14.185722 +epoch: 2, batch: 11472, sum loss: 3959.413330, avg loss: 2.331810, ppl: 10.296561 +epoch: 2, batch: 11473, sum loss: 5305.367188, avg loss: 2.713743, ppl: 15.085631 +epoch: 2, batch: 11474, sum loss: 4803.154297, avg loss: 2.693861, ppl: 14.788669 +epoch: 2, batch: 11475, sum loss: 3383.938721, avg loss: 2.544315, ppl: 12.734500 +epoch: 2, batch: 11476, sum loss: 4172.887695, avg loss: 2.500232, ppl: 12.185327 +epoch: 2, batch: 11477, sum loss: 4220.241211, avg loss: 2.584348, ppl: 13.254650 +epoch: 2, batch: 11478, sum loss: 3621.388916, avg loss: 2.488927, ppl: 12.048343 +epoch: 2, batch: 11479, sum loss: 4873.329102, avg loss: 2.622890, ppl: 13.775474 +epoch: 2, batch: 11480, sum loss: 4189.504883, avg loss: 2.531423, ppl: 12.571383 +epoch: 2, batch: 11481, sum loss: 4578.060059, avg loss: 2.629558, ppl: 13.867637 +epoch: 2, batch: 11482, sum loss: 4939.694824, avg loss: 2.620528, ppl: 13.742974 +epoch: 2, batch: 11483, sum loss: 3312.307861, avg loss: 2.281204, ppl: 9.788456 +epoch: 2, batch: 11484, sum loss: 4855.747070, avg loss: 2.737174, ppl: 15.443281 +epoch: 2, batch: 11485, sum loss: 5063.234863, avg loss: 2.567563, ppl: 13.034025 +epoch: 2, batch: 11486, sum loss: 4478.314941, avg loss: 2.564900, ppl: 12.999357 +epoch: 2, batch: 11487, sum loss: 4011.662598, avg loss: 2.422502, ppl: 11.274027 +epoch: 2, batch: 11488, sum loss: 3627.547852, avg loss: 2.322374, ppl: 10.199859 +epoch: 2, batch: 11489, sum loss: 4286.452637, avg loss: 2.655795, ppl: 14.236298 +epoch: 2, batch: 11490, sum loss: 4748.480957, avg loss: 2.503153, ppl: 12.220964 +epoch: 2, batch: 11491, sum loss: 3867.558105, avg loss: 2.454034, ppl: 11.635192 +epoch: 2, batch: 11492, sum loss: 4173.397461, avg loss: 2.394376, ppl: 10.961356 +epoch: 2, batch: 11493, sum loss: 4925.047363, avg loss: 2.734618, ppl: 15.403861 +epoch: 2, batch: 11494, sum loss: 3643.212402, avg loss: 2.395275, ppl: 10.971210 +epoch: 2, batch: 11495, sum loss: 3855.915283, avg loss: 2.632024, ppl: 13.901879 +epoch: 2, batch: 11496, sum loss: 4101.510254, avg loss: 2.333055, ppl: 10.309386 +epoch: 2, batch: 11497, sum loss: 4015.349121, avg loss: 2.778788, ppl: 16.099503 +epoch: 2, batch: 11498, sum loss: 3256.752686, avg loss: 2.127206, ppl: 8.391391 +epoch: 2, batch: 11499, sum loss: 4700.074707, avg loss: 2.528281, ppl: 12.531945 +epoch: 2, batch: 11500, sum loss: 4233.545410, avg loss: 2.332532, ppl: 10.303998 +epoch: 2, batch: 11501, sum loss: 4681.335938, avg loss: 2.590667, ppl: 13.338669 +epoch: 2, batch: 11502, sum loss: 4204.080078, avg loss: 2.331714, ppl: 10.295572 +epoch: 2, batch: 11503, sum loss: 4490.322266, avg loss: 2.472644, ppl: 11.853750 +epoch: 2, batch: 11504, sum loss: 4432.120605, avg loss: 2.495564, ppl: 12.128566 +epoch: 2, batch: 11505, sum loss: 5364.645996, avg loss: 2.641382, ppl: 14.032579 +epoch: 2, batch: 11506, sum loss: 5719.741211, avg loss: 2.857013, ppl: 17.409454 +epoch: 2, batch: 11507, sum loss: 4405.208984, avg loss: 2.615920, ppl: 13.679793 +epoch: 2, batch: 11508, sum loss: 4934.783203, avg loss: 2.862403, ppl: 17.503544 +epoch: 2, batch: 11509, sum loss: 5555.875488, avg loss: 2.682702, ppl: 14.624553 +epoch: 2, batch: 11510, sum loss: 3299.409912, avg loss: 2.401317, ppl: 11.037707 +epoch: 2, batch: 11511, sum loss: 4361.374023, avg loss: 2.446088, ppl: 11.543097 +epoch: 2, batch: 11512, sum loss: 3805.136230, avg loss: 2.293633, ppl: 9.910873 +epoch: 2, batch: 11513, sum loss: 3088.354980, avg loss: 2.315109, ppl: 10.126024 +epoch: 2, batch: 11514, sum loss: 4448.199219, avg loss: 2.604332, ppl: 13.522189 +epoch: 2, batch: 11515, sum loss: 4688.669434, avg loss: 2.759664, ppl: 15.794539 +epoch: 2, batch: 11516, sum loss: 5461.207031, avg loss: 2.696892, ppl: 14.833561 +epoch: 2, batch: 11517, sum loss: 3735.663574, avg loss: 2.396192, ppl: 10.981283 +epoch: 2, batch: 11518, sum loss: 4451.059570, avg loss: 2.681361, ppl: 14.604960 +epoch: 2, batch: 11519, sum loss: 4703.151855, avg loss: 2.704515, ppl: 14.947069 +epoch: 2, batch: 11520, sum loss: 3644.244873, avg loss: 2.729771, ppl: 15.329382 +epoch: 2, batch: 11521, sum loss: 3888.660400, avg loss: 2.103115, ppl: 8.191649 +epoch: 2, batch: 11522, sum loss: 4446.990723, avg loss: 2.532455, ppl: 12.584362 +epoch: 2, batch: 11523, sum loss: 5540.581543, avg loss: 2.601212, ppl: 13.480066 +epoch: 2, batch: 11524, sum loss: 4331.243164, avg loss: 2.631375, ppl: 13.892860 +epoch: 2, batch: 11525, sum loss: 3552.687744, avg loss: 2.179563, ppl: 8.842442 +epoch: 2, batch: 11526, sum loss: 4079.054688, avg loss: 2.521047, ppl: 12.441621 +epoch: 2, batch: 11527, sum loss: 4504.037109, avg loss: 2.402153, ppl: 11.046938 +epoch: 2, batch: 11528, sum loss: 3808.396973, avg loss: 2.392209, ppl: 10.937629 +epoch: 2, batch: 11529, sum loss: 3761.316650, avg loss: 2.484357, ppl: 11.993407 +epoch: 2, batch: 11530, sum loss: 3830.135498, avg loss: 2.513212, ppl: 12.344520 +epoch: 2, batch: 11531, sum loss: 4072.123291, avg loss: 2.658044, ppl: 14.268351 +epoch: 2, batch: 11532, sum loss: 3988.802246, avg loss: 2.278014, ppl: 9.757282 +epoch: 2, batch: 11533, sum loss: 4473.750000, avg loss: 2.619292, ppl: 13.725996 +epoch: 2, batch: 11534, sum loss: 4324.319824, avg loss: 2.587864, ppl: 13.301326 +epoch: 2, batch: 11535, sum loss: 4878.339844, avg loss: 2.614330, ppl: 13.658060 +epoch: 2, batch: 11536, sum loss: 5312.931152, avg loss: 2.707916, ppl: 14.997984 +epoch: 2, batch: 11537, sum loss: 3935.360840, avg loss: 2.506599, ppl: 12.263154 +epoch: 2, batch: 11538, sum loss: 4677.456543, avg loss: 2.577111, ppl: 13.159070 +epoch: 2, batch: 11539, sum loss: 4522.826172, avg loss: 2.612840, ppl: 13.637726 +epoch: 2, batch: 11540, sum loss: 4493.863770, avg loss: 2.631068, ppl: 13.888592 +epoch: 2, batch: 11541, sum loss: 3597.080566, avg loss: 2.320697, ppl: 10.182773 +epoch: 2, batch: 11542, sum loss: 4495.400879, avg loss: 2.647468, ppl: 14.118251 +epoch: 2, batch: 11543, sum loss: 4931.963867, avg loss: 2.733905, ppl: 15.392873 +epoch: 2, batch: 11544, sum loss: 4242.988281, avg loss: 2.330032, ppl: 10.278272 +epoch: 2, batch: 11545, sum loss: 4284.623047, avg loss: 2.656307, ppl: 14.243590 +epoch: 2, batch: 11546, sum loss: 4199.648438, avg loss: 2.435991, ppl: 11.427138 +epoch: 2, batch: 11547, sum loss: 4102.994629, avg loss: 2.524920, ppl: 12.489893 +epoch: 2, batch: 11548, sum loss: 4699.242188, avg loss: 2.540131, ppl: 12.681334 +epoch: 2, batch: 11549, sum loss: 4791.573730, avg loss: 2.585847, ppl: 13.274524 +epoch: 2, batch: 11550, sum loss: 4035.562256, avg loss: 2.495710, ppl: 12.130339 +epoch: 2, batch: 11551, sum loss: 4706.237793, avg loss: 2.667935, ppl: 14.410187 +epoch: 2, batch: 11552, sum loss: 4337.582031, avg loss: 2.399105, ppl: 11.013316 +epoch: 2, batch: 11553, sum loss: 3780.568115, avg loss: 2.221250, ppl: 9.218850 +epoch: 2, batch: 11554, sum loss: 3917.817139, avg loss: 2.316864, ppl: 10.143816 +epoch: 2, batch: 11555, sum loss: 4806.634277, avg loss: 2.593974, ppl: 13.382851 +epoch: 2, batch: 11556, sum loss: 4532.507324, avg loss: 2.431603, ppl: 11.377102 +epoch: 2, batch: 11557, sum loss: 5077.450195, avg loss: 2.677980, ppl: 14.555664 +epoch: 2, batch: 11558, sum loss: 3883.021973, avg loss: 2.454502, ppl: 11.640631 +epoch: 2, batch: 11559, sum loss: 4338.872070, avg loss: 2.585740, ppl: 13.273108 +epoch: 2, batch: 11560, sum loss: 3927.880127, avg loss: 2.363346, ppl: 10.626444 +epoch: 2, batch: 11561, sum loss: 4540.874512, avg loss: 2.629343, ppl: 13.864652 +epoch: 2, batch: 11562, sum loss: 3625.573730, avg loss: 2.491803, ppl: 12.083044 +epoch: 2, batch: 11563, sum loss: 4216.437012, avg loss: 2.526325, ppl: 12.507463 +epoch: 2, batch: 11564, sum loss: 3118.988037, avg loss: 2.395536, ppl: 10.974081 +epoch: 2, batch: 11565, sum loss: 4091.040771, avg loss: 2.519114, ppl: 12.417587 +epoch: 2, batch: 11566, sum loss: 4632.684570, avg loss: 2.718712, ppl: 15.160777 +epoch: 2, batch: 11567, sum loss: 4808.170898, avg loss: 2.538633, ppl: 12.662351 +epoch: 2, batch: 11568, sum loss: 4474.445801, avg loss: 2.610528, ppl: 13.606239 +epoch: 2, batch: 11569, sum loss: 4363.056641, avg loss: 2.732033, ppl: 15.364091 +epoch: 2, batch: 11570, sum loss: 4251.875488, avg loss: 2.334913, ppl: 10.328557 +epoch: 2, batch: 11571, sum loss: 3824.005127, avg loss: 2.313373, ppl: 10.108459 +epoch: 2, batch: 11572, sum loss: 5198.711914, avg loss: 2.598057, ppl: 13.437604 +epoch: 2, batch: 11573, sum loss: 3466.807617, avg loss: 2.291347, ppl: 9.888247 +epoch: 2, batch: 11574, sum loss: 3820.416748, avg loss: 2.488871, ppl: 12.047665 +epoch: 2, batch: 11575, sum loss: 5572.599121, avg loss: 2.688181, ppl: 14.704903 +epoch: 2, batch: 11576, sum loss: 4600.616211, avg loss: 2.850444, ppl: 17.295456 +epoch: 2, batch: 11577, sum loss: 4455.742676, avg loss: 2.511693, ppl: 12.325774 +epoch: 2, batch: 11578, sum loss: 4125.730469, avg loss: 2.575362, ppl: 13.136074 +epoch: 2, batch: 11579, sum loss: 4076.648926, avg loss: 2.402268, ppl: 11.048207 +epoch: 2, batch: 11580, sum loss: 4263.738770, avg loss: 2.376666, ppl: 10.768940 +epoch: 2, batch: 11581, sum loss: 3962.985840, avg loss: 2.247865, ppl: 9.467501 +epoch: 2, batch: 11582, sum loss: 3985.554199, avg loss: 2.428735, ppl: 11.344522 +epoch: 2, batch: 11583, sum loss: 4552.922852, avg loss: 2.682925, ppl: 14.627810 +epoch: 2, batch: 11584, sum loss: 4602.252930, avg loss: 2.667973, ppl: 14.410727 +epoch: 2, batch: 11585, sum loss: 4473.825195, avg loss: 2.519046, ppl: 12.416743 +epoch: 2, batch: 11586, sum loss: 4026.345947, avg loss: 2.561289, ppl: 12.952500 +epoch: 2, batch: 11587, sum loss: 4340.590820, avg loss: 2.563846, ppl: 12.985662 +epoch: 2, batch: 11588, sum loss: 4674.019531, avg loss: 2.906729, ppl: 18.296843 +epoch: 2, batch: 11589, sum loss: 3293.013916, avg loss: 2.274181, ppl: 9.719954 +epoch: 2, batch: 11590, sum loss: 3861.415283, avg loss: 2.363167, ppl: 10.624549 +epoch: 2, batch: 11591, sum loss: 4557.522461, avg loss: 2.696759, ppl: 14.831581 +epoch: 2, batch: 11592, sum loss: 4546.535156, avg loss: 2.699843, ppl: 14.877395 +epoch: 2, batch: 11593, sum loss: 5296.412598, avg loss: 2.718898, ppl: 15.163600 +epoch: 2, batch: 11594, sum loss: 4134.159668, avg loss: 2.759786, ppl: 15.796464 +epoch: 2, batch: 11595, sum loss: 4367.022949, avg loss: 2.528676, ppl: 12.536894 +epoch: 2, batch: 11596, sum loss: 5008.856934, avg loss: 2.661454, ppl: 14.317094 +epoch: 2, batch: 11597, sum loss: 4946.030273, avg loss: 2.821466, ppl: 16.801468 +epoch: 2, batch: 11598, sum loss: 4209.435547, avg loss: 2.674355, ppl: 14.502996 +epoch: 2, batch: 11599, sum loss: 4501.530762, avg loss: 2.543238, ppl: 12.720790 +epoch: 2, batch: 11600, sum loss: 3260.539551, avg loss: 2.165033, ppl: 8.714889 +epoch: 2, batch: 11601, sum loss: 5050.778809, avg loss: 2.675201, ppl: 14.515263 +epoch: 2, batch: 11602, sum loss: 3757.925781, avg loss: 2.304062, ppl: 10.014784 +epoch: 2, batch: 11603, sum loss: 4743.089355, avg loss: 2.614713, ppl: 13.663294 +epoch: 2, batch: 11604, sum loss: 3968.462402, avg loss: 2.486505, ppl: 12.019198 +epoch: 2, batch: 11605, sum loss: 3981.426758, avg loss: 2.507196, ppl: 12.270472 +epoch: 2, batch: 11606, sum loss: 5479.756836, avg loss: 2.792944, ppl: 16.329025 +epoch: 2, batch: 11607, sum loss: 4246.533691, avg loss: 2.463187, ppl: 11.742171 +epoch: 2, batch: 11608, sum loss: 3250.059814, avg loss: 2.269595, ppl: 9.675481 +epoch: 2, batch: 11609, sum loss: 3725.682617, avg loss: 2.409885, ppl: 11.132683 +epoch: 2, batch: 11610, sum loss: 4619.372559, avg loss: 2.592241, ppl: 13.359672 +epoch: 2, batch: 11611, sum loss: 4371.081543, avg loss: 2.791240, ppl: 16.301216 +epoch: 2, batch: 11612, sum loss: 4265.329590, avg loss: 2.408430, ppl: 11.116496 +epoch: 2, batch: 11613, sum loss: 4345.732910, avg loss: 2.372125, ppl: 10.720147 +epoch: 2, batch: 11614, sum loss: 4683.986816, avg loss: 2.486193, ppl: 12.015443 +epoch: 2, batch: 11615, sum loss: 4746.171875, avg loss: 2.738703, ppl: 15.466905 +epoch: 2, batch: 11616, sum loss: 4243.226074, avg loss: 2.579469, ppl: 13.190128 +epoch: 2, batch: 11617, sum loss: 4420.365234, avg loss: 2.598686, ppl: 13.446061 +epoch: 2, batch: 11618, sum loss: 4983.215332, avg loss: 2.793282, ppl: 16.334547 +epoch: 2, batch: 11619, sum loss: 4932.770020, avg loss: 2.698452, ppl: 14.856715 +epoch: 2, batch: 11620, sum loss: 4516.231934, avg loss: 2.642617, ppl: 14.049921 +epoch: 2, batch: 11621, sum loss: 4099.263672, avg loss: 2.528849, ppl: 12.539061 +epoch: 2, batch: 11622, sum loss: 3508.115723, avg loss: 2.391353, ppl: 10.928274 +epoch: 2, batch: 11623, sum loss: 3605.236328, avg loss: 2.316990, ppl: 10.145090 +epoch: 2, batch: 11624, sum loss: 4276.802246, avg loss: 2.421745, ppl: 11.265504 +epoch: 2, batch: 11625, sum loss: 4126.173340, avg loss: 2.396152, ppl: 10.980839 +epoch: 2, batch: 11626, sum loss: 5173.643555, avg loss: 2.720107, ppl: 15.181948 +epoch: 2, batch: 11627, sum loss: 4225.233398, avg loss: 2.410287, ppl: 11.137158 +epoch: 2, batch: 11628, sum loss: 3713.283447, avg loss: 2.088461, ppl: 8.072481 +epoch: 2, batch: 11629, sum loss: 4300.496094, avg loss: 2.578235, ppl: 13.173868 +epoch: 2, batch: 11630, sum loss: 4268.898926, avg loss: 2.580955, ppl: 13.209744 +epoch: 2, batch: 11631, sum loss: 4507.608398, avg loss: 2.333131, ppl: 10.310168 +epoch: 2, batch: 11632, sum loss: 4476.270020, avg loss: 2.634650, ppl: 13.938433 +epoch: 2, batch: 11633, sum loss: 3598.986084, avg loss: 2.475231, ppl: 11.884454 +epoch: 2, batch: 11634, sum loss: 4010.341797, avg loss: 2.577340, ppl: 13.162086 +epoch: 2, batch: 11635, sum loss: 4576.344727, avg loss: 2.637663, ppl: 13.980488 +epoch: 2, batch: 11636, sum loss: 4540.009766, avg loss: 2.616720, ppl: 13.690750 +epoch: 2, batch: 11637, sum loss: 3434.620361, avg loss: 2.315995, ppl: 10.135002 +epoch: 2, batch: 11638, sum loss: 4879.187012, avg loss: 2.561253, ppl: 12.952037 +epoch: 2, batch: 11639, sum loss: 4446.279297, avg loss: 2.673650, ppl: 14.492764 +epoch: 2, batch: 11640, sum loss: 4530.441895, avg loss: 2.563917, ppl: 12.986591 +epoch: 2, batch: 11641, sum loss: 4614.142090, avg loss: 2.602449, ppl: 13.496754 +epoch: 2, batch: 11642, sum loss: 3851.464111, avg loss: 2.359966, ppl: 10.590590 +epoch: 2, batch: 11643, sum loss: 4235.642090, avg loss: 2.471203, ppl: 11.836679 +epoch: 2, batch: 11644, sum loss: 4586.840820, avg loss: 2.754859, ppl: 15.718831 +epoch: 2, batch: 11645, sum loss: 4308.305664, avg loss: 2.692691, ppl: 14.771374 +epoch: 2, batch: 11646, sum loss: 4017.833252, avg loss: 2.397275, ppl: 10.993181 +epoch: 2, batch: 11647, sum loss: 4154.684570, avg loss: 2.473027, ppl: 11.858282 +epoch: 2, batch: 11648, sum loss: 4515.361328, avg loss: 2.707051, ppl: 14.985023 +epoch: 2, batch: 11649, sum loss: 4915.334961, avg loss: 2.615932, ppl: 13.679954 +epoch: 2, batch: 11650, sum loss: 4239.497559, avg loss: 2.456256, ppl: 11.661067 +epoch: 2, batch: 11651, sum loss: 4680.801270, avg loss: 2.449399, ppl: 11.581382 +epoch: 2, batch: 11652, sum loss: 3857.296387, avg loss: 2.162162, ppl: 8.689903 +epoch: 2, batch: 11653, sum loss: 4502.212402, avg loss: 2.733584, ppl: 15.387934 +epoch: 2, batch: 11654, sum loss: 4209.969238, avg loss: 2.457659, ppl: 11.677437 +epoch: 2, batch: 11655, sum loss: 4042.654785, avg loss: 2.369669, ppl: 10.693849 +epoch: 2, batch: 11656, sum loss: 4512.347656, avg loss: 2.818456, ppl: 16.750967 +epoch: 2, batch: 11657, sum loss: 3723.055176, avg loss: 2.303871, ppl: 10.012864 +epoch: 2, batch: 11658, sum loss: 4556.693359, avg loss: 2.596406, ppl: 13.415442 +epoch: 2, batch: 11659, sum loss: 4088.359863, avg loss: 2.506659, ppl: 12.263883 +epoch: 2, batch: 11660, sum loss: 4108.517578, avg loss: 2.623574, ppl: 13.784909 +epoch: 2, batch: 11661, sum loss: 4363.261230, avg loss: 2.556099, ppl: 12.885455 +epoch: 2, batch: 11662, sum loss: 5193.309082, avg loss: 2.768288, ppl: 15.931342 +epoch: 2, batch: 11663, sum loss: 3687.920166, avg loss: 2.503680, ppl: 12.227406 +epoch: 2, batch: 11664, sum loss: 3766.965088, avg loss: 2.254318, ppl: 9.528792 +epoch: 2, batch: 11665, sum loss: 3883.578857, avg loss: 2.592509, ppl: 13.363258 +epoch: 2, batch: 11666, sum loss: 4962.683594, avg loss: 2.698577, ppl: 14.858579 +epoch: 2, batch: 11667, sum loss: 4516.687012, avg loss: 2.527525, ppl: 12.522474 +epoch: 2, batch: 11668, sum loss: 4212.095703, avg loss: 2.483547, ppl: 11.983695 +epoch: 2, batch: 11669, sum loss: 4155.280273, avg loss: 2.396355, ppl: 10.983074 +epoch: 2, batch: 11670, sum loss: 4321.337891, avg loss: 2.667493, ppl: 14.403809 +epoch: 2, batch: 11671, sum loss: 4495.495117, avg loss: 2.337751, ppl: 10.357915 +epoch: 2, batch: 11672, sum loss: 3782.645508, avg loss: 2.457859, ppl: 11.679782 +epoch: 2, batch: 11673, sum loss: 5297.036621, avg loss: 2.926540, ppl: 18.662939 +epoch: 2, batch: 11674, sum loss: 3762.037109, avg loss: 2.313676, ppl: 10.111526 +epoch: 2, batch: 11675, sum loss: 3178.728516, avg loss: 2.337301, ppl: 10.353251 +epoch: 2, batch: 11676, sum loss: 4068.110352, avg loss: 2.508083, ppl: 12.281363 +epoch: 2, batch: 11677, sum loss: 4221.297363, avg loss: 2.527723, ppl: 12.524955 +epoch: 2, batch: 11678, sum loss: 4033.159668, avg loss: 2.246886, ppl: 9.458235 +epoch: 2, batch: 11679, sum loss: 4800.557129, avg loss: 2.755773, ppl: 15.733203 +epoch: 2, batch: 11680, sum loss: 4337.829102, avg loss: 2.533779, ppl: 12.601031 +epoch: 2, batch: 11681, sum loss: 4213.740234, avg loss: 2.461297, ppl: 11.720000 +epoch: 2, batch: 11682, sum loss: 4505.043945, avg loss: 2.743632, ppl: 15.543341 +epoch: 2, batch: 11683, sum loss: 4412.907715, avg loss: 2.373807, ppl: 10.738197 +epoch: 2, batch: 11684, sum loss: 3807.118408, avg loss: 2.321414, ppl: 10.190070 +epoch: 2, batch: 11685, sum loss: 3619.767578, avg loss: 2.455745, ppl: 11.655110 +epoch: 2, batch: 11686, sum loss: 5320.145996, avg loss: 2.681525, ppl: 14.607356 +epoch: 2, batch: 11687, sum loss: 3972.072266, avg loss: 2.217796, ppl: 9.187057 +epoch: 2, batch: 11688, sum loss: 4676.729004, avg loss: 2.625901, ppl: 13.817014 +epoch: 2, batch: 11689, sum loss: 4950.228027, avg loss: 2.790433, ppl: 16.288067 +epoch: 2, batch: 11690, sum loss: 5489.022461, avg loss: 3.019264, ppl: 20.476221 +epoch: 2, batch: 11691, sum loss: 4636.115234, avg loss: 2.852994, ppl: 17.339619 +epoch: 2, batch: 11692, sum loss: 4281.059082, avg loss: 2.414585, ppl: 11.185129 +epoch: 2, batch: 11693, sum loss: 4490.698730, avg loss: 2.804934, ppl: 16.525982 +epoch: 2, batch: 11694, sum loss: 3903.366699, avg loss: 2.313792, ppl: 10.112697 +epoch: 2, batch: 11695, sum loss: 4353.627930, avg loss: 2.591445, ppl: 13.349050 +epoch: 2, batch: 11696, sum loss: 4003.954346, avg loss: 2.535753, ppl: 12.625937 +epoch: 2, batch: 11697, sum loss: 4433.451172, avg loss: 2.501948, ppl: 12.206244 +epoch: 2, batch: 11698, sum loss: 3962.440674, avg loss: 2.650462, ppl: 14.160578 +epoch: 2, batch: 11699, sum loss: 4195.287109, avg loss: 2.501662, ppl: 12.202758 +epoch: 2, batch: 11700, sum loss: 4304.030273, avg loss: 2.743168, ppl: 15.536124 +epoch: 2, batch: 11701, sum loss: 4908.086426, avg loss: 2.633094, ppl: 13.916757 +epoch: 2, batch: 11702, sum loss: 3662.531738, avg loss: 2.299141, ppl: 9.965620 +epoch: 2, batch: 11703, sum loss: 3513.156982, avg loss: 2.229161, ppl: 9.292063 +epoch: 2, batch: 11704, sum loss: 4126.655273, avg loss: 2.552044, ppl: 12.833310 +epoch: 2, batch: 11705, sum loss: 4896.176270, avg loss: 2.724639, ppl: 15.250910 +epoch: 2, batch: 11706, sum loss: 4612.872070, avg loss: 2.426550, ppl: 11.319766 +epoch: 2, batch: 11707, sum loss: 5005.346680, avg loss: 2.726224, ppl: 15.275095 +epoch: 2, batch: 11708, sum loss: 4714.556641, avg loss: 2.736249, ppl: 15.428998 +epoch: 2, batch: 11709, sum loss: 4497.059570, avg loss: 2.718899, ppl: 15.163626 +epoch: 2, batch: 11710, sum loss: 4122.361328, avg loss: 2.509045, ppl: 12.293186 +epoch: 2, batch: 11711, sum loss: 4228.237305, avg loss: 2.462573, ppl: 11.734964 +epoch: 2, batch: 11712, sum loss: 3620.571533, avg loss: 2.243229, ppl: 9.423711 +epoch: 2, batch: 11713, sum loss: 3993.085938, avg loss: 2.464868, ppl: 11.761930 +epoch: 2, batch: 11714, sum loss: 5109.159180, avg loss: 2.656869, ppl: 14.251596 +epoch: 2, batch: 11715, sum loss: 5020.611328, avg loss: 2.591952, ppl: 13.355818 +epoch: 2, batch: 11716, sum loss: 3859.003418, avg loss: 2.351617, ppl: 10.502537 +epoch: 2, batch: 11717, sum loss: 4493.009766, avg loss: 2.657014, ppl: 14.253659 +epoch: 2, batch: 11718, sum loss: 5444.497559, avg loss: 2.946157, ppl: 19.032665 +epoch: 2, batch: 11719, sum loss: 5140.855469, avg loss: 2.432965, ppl: 11.392614 +epoch: 2, batch: 11720, sum loss: 4098.465332, avg loss: 2.403792, ppl: 11.065057 +epoch: 2, batch: 11721, sum loss: 4288.446777, avg loss: 2.504934, ppl: 12.242749 +epoch: 2, batch: 11722, sum loss: 4177.673340, avg loss: 2.427469, ppl: 11.330164 +epoch: 2, batch: 11723, sum loss: 4485.761230, avg loss: 2.598935, ppl: 13.449402 +epoch: 2, batch: 11724, sum loss: 3959.469971, avg loss: 2.414311, ppl: 11.182062 +epoch: 2, batch: 11725, sum loss: 4938.441895, avg loss: 2.854591, ppl: 17.367327 +epoch: 2, batch: 11726, sum loss: 3744.120117, avg loss: 2.435992, ppl: 11.427152 +epoch: 2, batch: 11727, sum loss: 5670.118164, avg loss: 2.610552, ppl: 13.606554 +epoch: 2, batch: 11728, sum loss: 4255.241211, avg loss: 2.491359, ppl: 12.077679 +epoch: 2, batch: 11729, sum loss: 4275.076172, avg loss: 2.531129, ppl: 12.567682 +epoch: 2, batch: 11730, sum loss: 4263.925293, avg loss: 2.685092, ppl: 14.659546 +epoch: 2, batch: 11731, sum loss: 3367.711426, avg loss: 2.572736, ppl: 13.101622 +epoch: 2, batch: 11732, sum loss: 4248.243164, avg loss: 2.430345, ppl: 11.362805 +epoch: 2, batch: 11733, sum loss: 4404.495117, avg loss: 2.593931, ppl: 13.382277 +epoch: 2, batch: 11734, sum loss: 3596.040527, avg loss: 2.215675, ppl: 9.167594 +epoch: 2, batch: 11735, sum loss: 4781.324219, avg loss: 2.601373, ppl: 13.482239 +epoch: 2, batch: 11736, sum loss: 4282.806641, avg loss: 2.376696, ppl: 10.769266 +epoch: 2, batch: 11737, sum loss: 4513.772461, avg loss: 2.454471, ppl: 11.640278 +epoch: 2, batch: 11738, sum loss: 3227.480469, avg loss: 2.345553, ppl: 10.439040 +epoch: 2, batch: 11739, sum loss: 4795.099121, avg loss: 2.474252, ppl: 11.872817 +epoch: 2, batch: 11740, sum loss: 4189.162109, avg loss: 2.805869, ppl: 16.541443 +epoch: 2, batch: 11741, sum loss: 4057.990723, avg loss: 2.414034, ppl: 11.178963 +epoch: 2, batch: 11742, sum loss: 4286.096680, avg loss: 2.586661, ppl: 13.285333 +epoch: 2, batch: 11743, sum loss: 4825.569336, avg loss: 2.786125, ppl: 16.218060 +epoch: 2, batch: 11744, sum loss: 4854.786133, avg loss: 2.766260, ppl: 15.899058 +epoch: 2, batch: 11745, sum loss: 5457.025391, avg loss: 2.839243, ppl: 17.102821 +epoch: 2, batch: 11746, sum loss: 3739.621338, avg loss: 2.476570, ppl: 11.900380 +epoch: 2, batch: 11747, sum loss: 4129.909668, avg loss: 2.345207, ppl: 10.435432 +epoch: 2, batch: 11748, sum loss: 5172.133789, avg loss: 2.794238, ppl: 16.350159 +epoch: 2, batch: 11749, sum loss: 3226.623047, avg loss: 2.240710, ppl: 9.400007 +epoch: 2, batch: 11750, sum loss: 4740.125488, avg loss: 2.608765, ppl: 13.582262 +epoch: 2, batch: 11751, sum loss: 4385.976074, avg loss: 2.513453, ppl: 12.347495 +epoch: 2, batch: 11752, sum loss: 3313.862305, avg loss: 2.036793, ppl: 7.665985 +epoch: 2, batch: 11753, sum loss: 4094.405273, avg loss: 2.445881, ppl: 11.540717 +epoch: 2, batch: 11754, sum loss: 3453.177734, avg loss: 2.428395, ppl: 11.340666 +epoch: 2, batch: 11755, sum loss: 4182.956055, avg loss: 2.500273, ppl: 12.185817 +epoch: 2, batch: 11756, sum loss: 3786.928711, avg loss: 2.544979, ppl: 12.742959 +epoch: 2, batch: 11757, sum loss: 4849.517578, avg loss: 2.608670, ppl: 13.580976 +epoch: 2, batch: 11758, sum loss: 3941.302246, avg loss: 2.157254, ppl: 8.647355 +epoch: 2, batch: 11759, sum loss: 3012.200684, avg loss: 2.287168, ppl: 9.847014 +epoch: 2, batch: 11760, sum loss: 4504.828125, avg loss: 2.520889, ppl: 12.439649 +epoch: 2, batch: 11761, sum loss: 3893.580078, avg loss: 2.453422, ppl: 11.628068 +epoch: 2, batch: 11762, sum loss: 3685.623535, avg loss: 2.115742, ppl: 8.295743 +epoch: 2, batch: 11763, sum loss: 3990.264404, avg loss: 2.449518, ppl: 11.582759 +epoch: 2, batch: 11764, sum loss: 4421.264648, avg loss: 2.677932, ppl: 14.554955 +epoch: 2, batch: 11765, sum loss: 4536.150879, avg loss: 2.608482, ppl: 13.578428 +epoch: 2, batch: 11766, sum loss: 5240.869141, avg loss: 2.690385, ppl: 14.737343 +epoch: 2, batch: 11767, sum loss: 4767.747559, avg loss: 2.680015, ppl: 14.585317 +epoch: 2, batch: 11768, sum loss: 4198.582031, avg loss: 2.517136, ppl: 12.393047 +epoch: 2, batch: 11769, sum loss: 4430.939941, avg loss: 2.504771, ppl: 12.240758 +epoch: 2, batch: 11770, sum loss: 4635.331543, avg loss: 2.552495, ppl: 12.839101 +epoch: 2, batch: 11771, sum loss: 3820.325195, avg loss: 2.553693, ppl: 12.854491 +epoch: 2, batch: 11772, sum loss: 4509.002441, avg loss: 2.671210, ppl: 14.457453 +epoch: 2, batch: 11773, sum loss: 3178.025391, avg loss: 2.099092, ppl: 8.158760 +epoch: 2, batch: 11774, sum loss: 4025.547852, avg loss: 2.500340, ppl: 12.186640 +epoch: 2, batch: 11775, sum loss: 3748.998291, avg loss: 2.401664, ppl: 11.041540 +epoch: 2, batch: 11776, sum loss: 4790.427734, avg loss: 2.620584, ppl: 13.743748 +epoch: 2, batch: 11777, sum loss: 4525.915039, avg loss: 2.916183, ppl: 18.470642 +epoch: 2, batch: 11778, sum loss: 4136.007812, avg loss: 2.546803, ppl: 12.766222 +epoch: 2, batch: 11779, sum loss: 3431.939941, avg loss: 2.229980, ppl: 9.299685 +epoch: 2, batch: 11780, sum loss: 4259.763672, avg loss: 2.432761, ppl: 11.390284 +epoch: 2, batch: 11781, sum loss: 4767.781738, avg loss: 2.570233, ppl: 13.068864 +epoch: 2, batch: 11782, sum loss: 3329.208984, avg loss: 2.234369, ppl: 9.340582 +epoch: 2, batch: 11783, sum loss: 4162.617188, avg loss: 2.616353, ppl: 13.685718 +epoch: 2, batch: 11784, sum loss: 4067.591309, avg loss: 2.384286, ppl: 10.851309 +epoch: 2, batch: 11785, sum loss: 3670.322754, avg loss: 2.498518, ppl: 12.164450 +epoch: 2, batch: 11786, sum loss: 4763.442871, avg loss: 2.640489, ppl: 14.020062 +epoch: 2, batch: 11787, sum loss: 4408.305664, avg loss: 2.545211, ppl: 12.745918 +epoch: 2, batch: 11788, sum loss: 4499.998535, avg loss: 2.604166, ppl: 13.519942 +epoch: 2, batch: 11789, sum loss: 4433.891602, avg loss: 2.538003, ppl: 12.654378 +epoch: 2, batch: 11790, sum loss: 4451.215820, avg loss: 2.539199, ppl: 12.669520 +epoch: 2, batch: 11791, sum loss: 3847.291504, avg loss: 2.340202, ppl: 10.383329 +epoch: 2, batch: 11792, sum loss: 4833.738281, avg loss: 2.771639, ppl: 15.984810 +epoch: 2, batch: 11793, sum loss: 4255.635254, avg loss: 2.646539, ppl: 14.105139 +epoch: 2, batch: 11794, sum loss: 4639.967285, avg loss: 2.805301, ppl: 16.532047 +epoch: 2, batch: 11795, sum loss: 3623.143311, avg loss: 2.412213, ppl: 11.158624 +epoch: 2, batch: 11796, sum loss: 4577.270508, avg loss: 2.612597, ppl: 13.634419 +epoch: 2, batch: 11797, sum loss: 4438.783203, avg loss: 2.634293, ppl: 13.933453 +epoch: 2, batch: 11798, sum loss: 4641.698242, avg loss: 2.584465, ppl: 13.256189 +epoch: 2, batch: 11799, sum loss: 3680.279785, avg loss: 2.237252, ppl: 9.367554 +epoch: 2, batch: 11800, sum loss: 4127.091309, avg loss: 2.333008, ppl: 10.308908 +epoch: 2, batch: 11801, sum loss: 4415.406738, avg loss: 2.443501, ppl: 11.513278 +epoch: 2, batch: 11802, sum loss: 4331.322266, avg loss: 2.462378, ppl: 11.732677 +epoch: 2, batch: 11803, sum loss: 4587.090332, avg loss: 2.677811, ppl: 14.553200 +epoch: 2, batch: 11804, sum loss: 5371.931152, avg loss: 2.739384, ppl: 15.477444 +epoch: 2, batch: 11805, sum loss: 3948.365723, avg loss: 2.410480, ppl: 11.139301 +epoch: 2, batch: 11806, sum loss: 4370.487793, avg loss: 2.301468, ppl: 9.988836 +epoch: 2, batch: 11807, sum loss: 4403.085938, avg loss: 2.565901, ppl: 13.012378 +epoch: 2, batch: 11808, sum loss: 4300.720703, avg loss: 2.513572, ppl: 12.348956 +epoch: 2, batch: 11809, sum loss: 5050.500488, avg loss: 3.022442, ppl: 20.541395 +epoch: 2, batch: 11810, sum loss: 5159.789062, avg loss: 3.104566, ppl: 22.299547 +epoch: 2, batch: 11811, sum loss: 3778.200684, avg loss: 2.544243, ppl: 12.733583 +epoch: 2, batch: 11812, sum loss: 5100.733398, avg loss: 2.860759, ppl: 17.474791 +epoch: 2, batch: 11813, sum loss: 3998.077393, avg loss: 2.601222, ppl: 13.480195 +epoch: 2, batch: 11814, sum loss: 5001.327637, avg loss: 2.623991, ppl: 13.790656 +epoch: 2, batch: 11815, sum loss: 4605.618652, avg loss: 2.619806, ppl: 13.733056 +epoch: 2, batch: 11816, sum loss: 4190.902832, avg loss: 2.435156, ppl: 11.417596 +epoch: 2, batch: 11817, sum loss: 5490.413574, avg loss: 2.808396, ppl: 16.583290 +epoch: 2, batch: 11818, sum loss: 4512.359375, avg loss: 2.738082, ppl: 15.457313 +epoch: 2, batch: 11819, sum loss: 4396.385742, avg loss: 2.460205, ppl: 11.707207 +epoch: 2, batch: 11820, sum loss: 5232.539551, avg loss: 2.698576, ppl: 14.858564 +epoch: 2, batch: 11821, sum loss: 4182.854004, avg loss: 2.847416, ppl: 17.243162 +epoch: 2, batch: 11822, sum loss: 4488.763184, avg loss: 2.585693, ppl: 13.272486 +epoch: 2, batch: 11823, sum loss: 4595.293945, avg loss: 2.621388, ppl: 13.754808 +epoch: 2, batch: 11824, sum loss: 4793.826172, avg loss: 2.763012, ppl: 15.847507 +epoch: 2, batch: 11825, sum loss: 4244.623535, avg loss: 2.501251, ppl: 12.197749 +epoch: 2, batch: 11826, sum loss: 3582.787109, avg loss: 2.349369, ppl: 10.478951 +epoch: 2, batch: 11827, sum loss: 4048.946289, avg loss: 2.662029, ppl: 14.325330 +epoch: 2, batch: 11828, sum loss: 3711.667725, avg loss: 2.338795, ppl: 10.368736 +epoch: 2, batch: 11829, sum loss: 4640.154297, avg loss: 2.726295, ppl: 15.276184 +epoch: 2, batch: 11830, sum loss: 4394.828613, avg loss: 2.612859, ppl: 13.637983 +epoch: 2, batch: 11831, sum loss: 4152.168945, avg loss: 2.575787, ppl: 13.141657 +epoch: 2, batch: 11832, sum loss: 3819.509033, avg loss: 2.445268, ppl: 11.533645 +epoch: 2, batch: 11833, sum loss: 3645.763916, avg loss: 2.344543, ppl: 10.428503 +epoch: 2, batch: 11834, sum loss: 4061.049561, avg loss: 2.701962, ppl: 14.908961 +epoch: 2, batch: 11835, sum loss: 3621.949463, avg loss: 2.477394, ppl: 11.910182 +epoch: 2, batch: 11836, sum loss: 4403.550293, avg loss: 2.579701, ppl: 13.193198 +epoch: 2, batch: 11837, sum loss: 3815.302979, avg loss: 2.516691, ppl: 12.387535 +epoch: 2, batch: 11838, sum loss: 4352.471680, avg loss: 2.454863, ppl: 11.644836 +epoch: 2, batch: 11839, sum loss: 3237.314941, avg loss: 2.135432, ppl: 8.460701 +epoch: 2, batch: 11840, sum loss: 4251.880859, avg loss: 2.482125, ppl: 11.966670 +epoch: 2, batch: 11841, sum loss: 3930.720703, avg loss: 2.207030, ppl: 9.088686 +epoch: 2, batch: 11842, sum loss: 4577.551758, avg loss: 2.593514, ppl: 13.376691 +epoch: 2, batch: 11843, sum loss: 5061.068359, avg loss: 2.559974, ppl: 12.935480 +epoch: 2, batch: 11844, sum loss: 5636.174805, avg loss: 2.865366, ppl: 17.555473 +epoch: 2, batch: 11845, sum loss: 3556.883057, avg loss: 2.379186, ppl: 10.796110 +epoch: 2, batch: 11846, sum loss: 4551.204102, avg loss: 2.454803, ppl: 11.644134 +epoch: 2, batch: 11847, sum loss: 3765.877441, avg loss: 2.477551, ppl: 11.912056 +epoch: 2, batch: 11848, sum loss: 5270.250488, avg loss: 2.942630, ppl: 18.965662 +epoch: 2, batch: 11849, sum loss: 4200.603516, avg loss: 2.516839, ppl: 12.389366 +epoch: 2, batch: 11850, sum loss: 4277.657227, avg loss: 2.481240, ppl: 11.956079 +epoch: 2, batch: 11851, sum loss: 4143.283203, avg loss: 2.343486, ppl: 10.417489 +epoch: 2, batch: 11852, sum loss: 4423.097656, avg loss: 2.695367, ppl: 14.810955 +epoch: 2, batch: 11853, sum loss: 4013.101074, avg loss: 2.689746, ppl: 14.727937 +epoch: 2, batch: 11854, sum loss: 4559.455566, avg loss: 2.548606, ppl: 12.789259 +epoch: 2, batch: 11855, sum loss: 3945.893311, avg loss: 2.577331, ppl: 13.161960 +epoch: 2, batch: 11856, sum loss: 4366.761719, avg loss: 2.617963, ppl: 13.707770 +epoch: 2, batch: 11857, sum loss: 3738.419189, avg loss: 2.515760, ppl: 12.376010 +epoch: 2, batch: 11858, sum loss: 4645.436523, avg loss: 2.489516, ppl: 12.055437 +epoch: 2, batch: 11859, sum loss: 4295.300781, avg loss: 2.741098, ppl: 15.504005 +epoch: 2, batch: 11860, sum loss: 4091.311035, avg loss: 2.491663, ppl: 12.081356 +epoch: 2, batch: 11861, sum loss: 3792.677002, avg loss: 2.321100, ppl: 10.186872 +epoch: 2, batch: 11862, sum loss: 3826.867432, avg loss: 2.344894, ppl: 10.432171 +epoch: 2, batch: 11863, sum loss: 5118.264160, avg loss: 2.848227, ppl: 17.257162 +epoch: 2, batch: 11864, sum loss: 4999.430176, avg loss: 2.727458, ppl: 15.293957 +epoch: 2, batch: 11865, sum loss: 4309.131836, avg loss: 2.490828, ppl: 12.071261 +epoch: 2, batch: 11866, sum loss: 4991.993652, avg loss: 2.635688, ppl: 13.952913 +epoch: 2, batch: 11867, sum loss: 4245.637207, avg loss: 2.441425, ppl: 11.489397 +epoch: 2, batch: 11868, sum loss: 5325.403809, avg loss: 2.762139, ppl: 15.833676 +epoch: 2, batch: 11869, sum loss: 4486.634766, avg loss: 2.749164, ppl: 15.629555 +epoch: 2, batch: 11870, sum loss: 4260.885742, avg loss: 2.580791, ppl: 13.207582 +epoch: 2, batch: 11871, sum loss: 3240.675293, avg loss: 2.409424, ppl: 11.127550 +epoch: 2, batch: 11872, sum loss: 5264.508301, avg loss: 2.633571, ppl: 13.923401 +epoch: 2, batch: 11873, sum loss: 5066.359375, avg loss: 2.687724, ppl: 14.698183 +epoch: 2, batch: 11874, sum loss: 3420.155518, avg loss: 2.287730, ppl: 9.852542 +epoch: 2, batch: 11875, sum loss: 4243.555664, avg loss: 2.713271, ppl: 15.078515 +epoch: 2, batch: 11876, sum loss: 3787.169922, avg loss: 2.392400, ppl: 10.939721 +epoch: 2, batch: 11877, sum loss: 3230.728760, avg loss: 2.365102, ppl: 10.645120 +epoch: 2, batch: 11878, sum loss: 4186.933105, avg loss: 2.539074, ppl: 12.667934 +epoch: 2, batch: 11879, sum loss: 3968.761230, avg loss: 2.545710, ppl: 12.752274 +epoch: 2, batch: 11880, sum loss: 4174.923828, avg loss: 2.601199, ppl: 13.479886 +epoch: 2, batch: 11881, sum loss: 4465.117188, avg loss: 2.548583, ppl: 12.788969 +epoch: 2, batch: 11882, sum loss: 4773.712891, avg loss: 2.569275, ppl: 13.056354 +epoch: 2, batch: 11883, sum loss: 3382.489502, avg loss: 2.421252, ppl: 11.259951 +epoch: 2, batch: 11884, sum loss: 4639.038086, avg loss: 2.833866, ppl: 17.011093 +epoch: 2, batch: 11885, sum loss: 3713.572510, avg loss: 2.394308, ppl: 10.960614 +epoch: 2, batch: 11886, sum loss: 4248.857422, avg loss: 2.714925, ppl: 15.103475 +epoch: 2, batch: 11887, sum loss: 5589.983887, avg loss: 2.900874, ppl: 18.190035 +epoch: 2, batch: 11888, sum loss: 4510.014160, avg loss: 2.734999, ppl: 15.409735 +epoch: 2, batch: 11889, sum loss: 3773.267090, avg loss: 2.589751, ppl: 13.326449 +epoch: 2, batch: 11890, sum loss: 5280.245117, avg loss: 2.611397, ppl: 13.618062 +epoch: 2, batch: 11891, sum loss: 4678.380859, avg loss: 2.400401, ppl: 11.027594 +epoch: 2, batch: 11892, sum loss: 3585.365234, avg loss: 2.427465, ppl: 11.330121 +epoch: 2, batch: 11893, sum loss: 3998.820801, avg loss: 2.608494, ppl: 13.578584 +epoch: 2, batch: 11894, sum loss: 5098.224121, avg loss: 2.785915, ppl: 16.214645 +epoch: 2, batch: 11895, sum loss: 3657.008545, avg loss: 2.357839, ppl: 10.568090 +epoch: 2, batch: 11896, sum loss: 4862.300293, avg loss: 2.909815, ppl: 18.353399 +epoch: 2, batch: 11897, sum loss: 5183.798828, avg loss: 2.721154, ppl: 15.197854 +epoch: 2, batch: 11898, sum loss: 5116.602051, avg loss: 2.591997, ppl: 13.356417 +epoch: 2, batch: 11899, sum loss: 3445.617920, avg loss: 2.379570, ppl: 10.800263 +epoch: 2, batch: 11900, sum loss: 4454.498047, avg loss: 2.563002, ppl: 12.974713 +epoch: 2, batch: 11901, sum loss: 4103.117188, avg loss: 2.477728, ppl: 11.914161 +epoch: 2, batch: 11902, sum loss: 4837.971191, avg loss: 2.608071, ppl: 13.572838 +epoch: 2, batch: 11903, sum loss: 4430.069824, avg loss: 2.583131, ppl: 13.238524 +epoch: 2, batch: 11904, sum loss: 4595.166504, avg loss: 2.528986, ppl: 12.540777 +epoch: 2, batch: 11905, sum loss: 3244.053223, avg loss: 2.215883, ppl: 9.169504 +epoch: 2, batch: 11906, sum loss: 3267.095215, avg loss: 2.082279, ppl: 8.022730 +epoch: 2, batch: 11907, sum loss: 4329.075195, avg loss: 2.500910, ppl: 12.193583 +epoch: 2, batch: 11908, sum loss: 4087.837646, avg loss: 2.485008, ppl: 12.001210 +epoch: 2, batch: 11909, sum loss: 3973.727539, avg loss: 2.340240, ppl: 10.383728 +epoch: 2, batch: 11910, sum loss: 4038.395996, avg loss: 2.618934, ppl: 13.721088 +epoch: 2, batch: 11911, sum loss: 3319.664307, avg loss: 2.262893, ppl: 9.610853 +epoch: 2, batch: 11912, sum loss: 4507.433594, avg loss: 2.575676, ppl: 13.140200 +epoch: 2, batch: 11913, sum loss: 3982.882324, avg loss: 2.498672, ppl: 12.166321 +epoch: 2, batch: 11914, sum loss: 4422.425781, avg loss: 2.528545, ppl: 12.535259 +epoch: 2, batch: 11915, sum loss: 3746.641846, avg loss: 2.450387, ppl: 11.592835 +epoch: 2, batch: 11916, sum loss: 3882.564453, avg loss: 2.501652, ppl: 12.202641 +epoch: 2, batch: 11917, sum loss: 3283.818848, avg loss: 2.485858, ppl: 12.011427 +epoch: 2, batch: 11918, sum loss: 3533.187012, avg loss: 2.355458, ppl: 10.542956 +epoch: 2, batch: 11919, sum loss: 3192.507080, avg loss: 2.212409, ppl: 9.137707 +epoch: 2, batch: 11920, sum loss: 4871.683594, avg loss: 2.577610, ppl: 13.165641 +epoch: 2, batch: 11921, sum loss: 4396.289062, avg loss: 2.476782, ppl: 11.902903 +epoch: 2, batch: 11922, sum loss: 3714.991699, avg loss: 2.445683, ppl: 11.538423 +epoch: 2, batch: 11923, sum loss: 4130.924805, avg loss: 2.554685, ppl: 12.867241 +epoch: 2, batch: 11924, sum loss: 3764.050293, avg loss: 2.507695, ppl: 12.276603 +epoch: 2, batch: 11925, sum loss: 3832.387695, avg loss: 2.230726, ppl: 9.306623 +epoch: 2, batch: 11926, sum loss: 3777.947021, avg loss: 2.470861, ppl: 11.832636 +epoch: 2, batch: 11927, sum loss: 4381.314453, avg loss: 2.668279, ppl: 14.415145 +epoch: 2, batch: 11928, sum loss: 4078.015137, avg loss: 2.355872, ppl: 10.547326 +epoch: 2, batch: 11929, sum loss: 3348.737793, avg loss: 2.231004, ppl: 9.309213 +epoch: 2, batch: 11930, sum loss: 5495.792969, avg loss: 3.049830, ppl: 21.111750 +epoch: 2, batch: 11931, sum loss: 5238.999023, avg loss: 2.963235, ppl: 19.360500 +epoch: 2, batch: 11932, sum loss: 3991.587402, avg loss: 2.288754, ppl: 9.862643 +epoch: 2, batch: 11933, sum loss: 4077.814209, avg loss: 2.489508, ppl: 12.055343 +epoch: 2, batch: 11934, sum loss: 4083.969238, avg loss: 2.497840, ppl: 12.156213 +epoch: 2, batch: 11935, sum loss: 4494.494141, avg loss: 2.484519, ppl: 11.995346 +epoch: 2, batch: 11936, sum loss: 3580.184326, avg loss: 2.309796, ppl: 10.072373 +epoch: 2, batch: 11937, sum loss: 5088.783203, avg loss: 2.703923, ppl: 14.938219 +epoch: 2, batch: 11938, sum loss: 4628.506348, avg loss: 2.512761, ppl: 12.338955 +epoch: 2, batch: 11939, sum loss: 3390.395020, avg loss: 2.242325, ppl: 9.415195 +epoch: 2, batch: 11940, sum loss: 3399.830811, avg loss: 2.265044, ppl: 9.631548 +epoch: 2, batch: 11941, sum loss: 4063.739990, avg loss: 2.386224, ppl: 10.872365 +epoch: 2, batch: 11942, sum loss: 4335.962402, avg loss: 2.534169, ppl: 12.605947 +epoch: 2, batch: 11943, sum loss: 4461.969727, avg loss: 2.616991, ppl: 13.694455 +epoch: 2, batch: 11944, sum loss: 4633.444336, avg loss: 2.640139, ppl: 14.015153 +epoch: 2, batch: 11945, sum loss: 4286.768555, avg loss: 2.373626, ppl: 10.736252 +epoch: 2, batch: 11946, sum loss: 3830.023193, avg loss: 2.641395, ppl: 14.032770 +epoch: 2, batch: 11947, sum loss: 3827.624512, avg loss: 2.521492, ppl: 12.447151 +epoch: 2, batch: 11948, sum loss: 4174.584961, avg loss: 2.477499, ppl: 11.911431 +epoch: 2, batch: 11949, sum loss: 4655.118652, avg loss: 2.789166, ppl: 16.267454 +epoch: 2, batch: 11950, sum loss: 4286.605469, avg loss: 2.867295, ppl: 17.589367 +epoch: 2, batch: 11951, sum loss: 3932.407715, avg loss: 2.487291, ppl: 12.028650 +epoch: 2, batch: 11952, sum loss: 5029.166504, avg loss: 2.757219, ppl: 15.755959 +epoch: 2, batch: 11953, sum loss: 5135.029297, avg loss: 2.666163, ppl: 14.384665 +epoch: 2, batch: 11954, sum loss: 4581.157715, avg loss: 2.652668, ppl: 14.191852 +epoch: 2, batch: 11955, sum loss: 4273.597168, avg loss: 2.510927, ppl: 12.316339 +epoch: 2, batch: 11956, sum loss: 4307.166504, avg loss: 2.488253, ppl: 12.040228 +epoch: 2, batch: 11957, sum loss: 4272.720703, avg loss: 2.552402, ppl: 12.837904 +epoch: 2, batch: 11958, sum loss: 3740.719238, avg loss: 2.400975, ppl: 11.033932 +epoch: 2, batch: 11959, sum loss: 4610.377441, avg loss: 2.765673, ppl: 15.889732 +epoch: 2, batch: 11960, sum loss: 4775.208008, avg loss: 2.654368, ppl: 14.215998 +epoch: 2, batch: 11961, sum loss: 3684.210938, avg loss: 2.464355, ppl: 11.755898 +epoch: 2, batch: 11962, sum loss: 3954.882080, avg loss: 2.543333, ppl: 12.721997 +epoch: 2, batch: 11963, sum loss: 3378.233398, avg loss: 2.271845, ppl: 9.697274 +epoch: 2, batch: 11964, sum loss: 3753.804443, avg loss: 2.392482, ppl: 10.940616 +epoch: 2, batch: 11965, sum loss: 4097.954590, avg loss: 2.337681, ppl: 10.357191 +epoch: 2, batch: 11966, sum loss: 3866.847168, avg loss: 2.431979, ppl: 11.381389 +epoch: 2, batch: 11967, sum loss: 3864.046875, avg loss: 2.497768, ppl: 12.155332 +epoch: 2, batch: 11968, sum loss: 4231.196289, avg loss: 2.538210, ppl: 12.656997 +epoch: 2, batch: 11969, sum loss: 4042.305176, avg loss: 2.321830, ppl: 10.194308 +epoch: 2, batch: 11970, sum loss: 3224.027832, avg loss: 2.076000, ppl: 7.972515 +epoch: 2, batch: 11971, sum loss: 4024.221680, avg loss: 2.576326, ppl: 13.148746 +epoch: 2, batch: 11972, sum loss: 4273.908203, avg loss: 2.365195, ppl: 10.646117 +epoch: 2, batch: 11973, sum loss: 4444.080078, avg loss: 2.473055, ppl: 11.858622 +epoch: 2, batch: 11974, sum loss: 4117.551270, avg loss: 2.581537, ppl: 13.217435 +epoch: 2, batch: 11975, sum loss: 4752.768555, avg loss: 2.715868, ppl: 15.117723 +epoch: 2, batch: 11976, sum loss: 4207.168457, avg loss: 2.498319, ppl: 12.162028 +epoch: 2, batch: 11977, sum loss: 5522.473633, avg loss: 2.800443, ppl: 16.451933 +epoch: 2, batch: 11978, sum loss: 3731.031494, avg loss: 2.373430, ppl: 10.734145 +epoch: 2, batch: 11979, sum loss: 4415.676758, avg loss: 2.556848, ppl: 12.895111 +epoch: 2, batch: 11980, sum loss: 3613.122314, avg loss: 2.452901, ppl: 11.622009 +epoch: 2, batch: 11981, sum loss: 4310.413574, avg loss: 2.425669, ppl: 11.309790 +epoch: 2, batch: 11982, sum loss: 5161.239258, avg loss: 2.727928, ppl: 15.301146 +epoch: 2, batch: 11983, sum loss: 4181.613770, avg loss: 2.419915, ppl: 11.244908 +epoch: 2, batch: 11984, sum loss: 3959.289307, avg loss: 2.434987, ppl: 11.415674 +epoch: 2, batch: 11985, sum loss: 4089.564209, avg loss: 2.544844, ppl: 12.741240 +epoch: 2, batch: 11986, sum loss: 4443.653320, avg loss: 2.841211, ppl: 17.136499 +epoch: 2, batch: 11987, sum loss: 4035.043457, avg loss: 2.537763, ppl: 12.651340 +epoch: 2, batch: 11988, sum loss: 4095.076660, avg loss: 2.565837, ppl: 13.011549 +epoch: 2, batch: 11989, sum loss: 4053.700684, avg loss: 2.282489, ppl: 9.801045 +epoch: 2, batch: 11990, sum loss: 4945.953125, avg loss: 2.699756, ppl: 14.876103 +epoch: 2, batch: 11991, sum loss: 3833.238037, avg loss: 2.383854, ppl: 10.846629 +epoch: 2, batch: 11992, sum loss: 4337.414062, avg loss: 2.420432, ppl: 11.250720 +epoch: 2, batch: 11993, sum loss: 4878.147461, avg loss: 2.692134, ppl: 14.763152 +epoch: 2, batch: 11994, sum loss: 4471.181641, avg loss: 2.746426, ppl: 15.586826 +epoch: 2, batch: 11995, sum loss: 4838.186523, avg loss: 2.559887, ppl: 12.934358 +epoch: 2, batch: 11996, sum loss: 4346.544434, avg loss: 2.612106, ppl: 13.627722 +epoch: 2, batch: 11997, sum loss: 3888.373779, avg loss: 2.407662, ppl: 11.107960 +epoch: 2, batch: 11998, sum loss: 3987.990234, avg loss: 2.483182, ppl: 11.979321 +epoch: 2, batch: 11999, sum loss: 4268.728516, avg loss: 2.387432, ppl: 10.885505 +epoch: 2, batch: 12000, sum loss: 4225.549316, avg loss: 2.372571, ppl: 10.724933 +epoch: 2, batch: 12001, sum loss: 3712.616211, avg loss: 2.457059, ppl: 11.670437 +epoch: 2, batch: 12002, sum loss: 4048.749268, avg loss: 2.452301, ppl: 11.615046 +epoch: 2, batch: 12003, sum loss: 4146.672363, avg loss: 2.511613, ppl: 12.324790 +epoch: 2, batch: 12004, sum loss: 4093.195312, avg loss: 2.417717, ppl: 11.220220 +epoch: 2, batch: 12005, sum loss: 4344.227539, avg loss: 2.452980, ppl: 11.622932 +epoch: 2, batch: 12006, sum loss: 4003.237305, avg loss: 2.596133, ppl: 13.411777 +epoch: 2, batch: 12007, sum loss: 3700.295166, avg loss: 2.402789, ppl: 11.053965 +epoch: 2, batch: 12008, sum loss: 4689.971680, avg loss: 2.594011, ppl: 13.383343 +epoch: 2, batch: 12009, sum loss: 5021.464355, avg loss: 2.551557, ppl: 12.827064 +epoch: 2, batch: 12010, sum loss: 4275.528320, avg loss: 2.594374, ppl: 13.388203 +epoch: 2, batch: 12011, sum loss: 4598.284180, avg loss: 2.638144, ppl: 13.987213 +epoch: 2, batch: 12012, sum loss: 4796.540039, avg loss: 2.628241, ppl: 13.849388 +epoch: 2, batch: 12013, sum loss: 4355.554199, avg loss: 2.578777, ppl: 13.181009 +epoch: 2, batch: 12014, sum loss: 3357.791016, avg loss: 2.200387, ppl: 9.028511 +epoch: 2, batch: 12015, sum loss: 4682.159180, avg loss: 2.780380, ppl: 16.125143 +epoch: 2, batch: 12016, sum loss: 5529.962891, avg loss: 2.851966, ppl: 17.321810 +epoch: 2, batch: 12017, sum loss: 4842.315430, avg loss: 2.606198, ppl: 13.547440 +epoch: 2, batch: 12018, sum loss: 4769.395508, avg loss: 2.763265, ppl: 15.851512 +epoch: 2, batch: 12019, sum loss: 4070.934570, avg loss: 2.563561, ppl: 12.981963 +epoch: 2, batch: 12020, sum loss: 3321.547119, avg loss: 2.413915, ppl: 11.177638 +epoch: 2, batch: 12021, sum loss: 4116.915527, avg loss: 2.367404, ppl: 10.669658 +epoch: 2, batch: 12022, sum loss: 4444.654785, avg loss: 2.666260, ppl: 14.386058 +epoch: 2, batch: 12023, sum loss: 4089.593750, avg loss: 2.580185, ppl: 13.199582 +epoch: 2, batch: 12024, sum loss: 4330.087891, avg loss: 2.716492, ppl: 15.127158 +epoch: 2, batch: 12025, sum loss: 5261.417969, avg loss: 2.443761, ppl: 11.516273 +epoch: 2, batch: 12026, sum loss: 3219.395508, avg loss: 2.217215, ppl: 9.181721 +epoch: 2, batch: 12027, sum loss: 3800.432129, avg loss: 2.304689, ppl: 10.021061 +epoch: 2, batch: 12028, sum loss: 3246.309570, avg loss: 2.305618, ppl: 10.030371 +epoch: 2, batch: 12029, sum loss: 5432.832520, avg loss: 2.829600, ppl: 16.938690 +epoch: 2, batch: 12030, sum loss: 4056.453369, avg loss: 2.338013, ppl: 10.360634 +epoch: 2, batch: 12031, sum loss: 5276.278809, avg loss: 2.918296, ppl: 18.509718 +epoch: 2, batch: 12032, sum loss: 4908.708984, avg loss: 2.774850, ppl: 16.036217 +epoch: 2, batch: 12033, sum loss: 3494.382812, avg loss: 2.340511, ppl: 10.386541 +epoch: 2, batch: 12034, sum loss: 5874.881348, avg loss: 2.786946, ppl: 16.231367 +epoch: 2, batch: 12035, sum loss: 4029.622559, avg loss: 2.310563, ppl: 10.080101 +epoch: 2, batch: 12036, sum loss: 4224.861816, avg loss: 2.557422, ppl: 12.902517 +epoch: 2, batch: 12037, sum loss: 4586.718750, avg loss: 2.764749, ppl: 15.875055 +epoch: 2, batch: 12038, sum loss: 3512.528320, avg loss: 2.217505, ppl: 9.184389 +epoch: 2, batch: 12039, sum loss: 3789.399414, avg loss: 2.157972, ppl: 8.653573 +epoch: 2, batch: 12040, sum loss: 3969.227783, avg loss: 2.477670, ppl: 11.913476 +epoch: 2, batch: 12041, sum loss: 4497.288574, avg loss: 2.776104, ppl: 16.056347 +epoch: 2, batch: 12042, sum loss: 3960.522461, avg loss: 2.422338, ppl: 11.272181 +epoch: 2, batch: 12043, sum loss: 4555.792969, avg loss: 2.562313, ppl: 12.965776 +epoch: 2, batch: 12044, sum loss: 4468.291504, avg loss: 2.406188, ppl: 11.091599 +epoch: 2, batch: 12045, sum loss: 4285.407715, avg loss: 2.312686, ppl: 10.101523 +epoch: 2, batch: 12046, sum loss: 3999.041992, avg loss: 2.519875, ppl: 12.427047 +epoch: 2, batch: 12047, sum loss: 4230.603516, avg loss: 2.601847, ppl: 13.488631 +epoch: 2, batch: 12048, sum loss: 4566.902344, avg loss: 2.577259, ppl: 13.161012 +epoch: 2, batch: 12049, sum loss: 4152.103516, avg loss: 2.637931, ppl: 13.984239 +epoch: 2, batch: 12050, sum loss: 4855.628906, avg loss: 2.832922, ppl: 16.995056 +epoch: 2, batch: 12051, sum loss: 3940.698486, avg loss: 2.527709, ppl: 12.524782 +epoch: 2, batch: 12052, sum loss: 4804.680176, avg loss: 2.691698, ppl: 14.756705 +epoch: 2, batch: 12053, sum loss: 3814.679688, avg loss: 2.629000, ppl: 13.859909 +epoch: 2, batch: 12054, sum loss: 4484.071289, avg loss: 2.607018, ppl: 13.558562 +epoch: 2, batch: 12055, sum loss: 4818.390625, avg loss: 2.477322, ppl: 11.909327 +epoch: 2, batch: 12056, sum loss: 4819.214355, avg loss: 2.649376, ppl: 14.145205 +epoch: 2, batch: 12057, sum loss: 4706.948242, avg loss: 2.725506, ppl: 15.264130 +epoch: 2, batch: 12058, sum loss: 3835.592773, avg loss: 2.289906, ppl: 9.874012 +epoch: 2, batch: 12059, sum loss: 4537.844238, avg loss: 2.710779, ppl: 15.040991 +epoch: 2, batch: 12060, sum loss: 4836.267578, avg loss: 2.586239, ppl: 13.279737 +epoch: 2, batch: 12061, sum loss: 5659.503906, avg loss: 2.723534, ppl: 15.234066 +epoch: 2, batch: 12062, sum loss: 4023.636719, avg loss: 2.634995, ppl: 13.943235 +epoch: 2, batch: 12063, sum loss: 4045.396240, avg loss: 2.589882, ppl: 13.328204 +epoch: 2, batch: 12064, sum loss: 4493.480469, avg loss: 2.689097, ppl: 14.718374 +epoch: 2, batch: 12065, sum loss: 3858.755859, avg loss: 2.268522, ppl: 9.665103 +epoch: 2, batch: 12066, sum loss: 3803.713379, avg loss: 2.494238, ppl: 12.112505 +epoch: 2, batch: 12067, sum loss: 3855.232178, avg loss: 2.524710, ppl: 12.487272 +epoch: 2, batch: 12068, sum loss: 5343.592773, avg loss: 2.942507, ppl: 18.963329 +epoch: 2, batch: 12069, sum loss: 5640.130371, avg loss: 2.811630, ppl: 16.637018 +epoch: 2, batch: 12070, sum loss: 4426.927734, avg loss: 2.707601, ppl: 14.993264 +epoch: 2, batch: 12071, sum loss: 3685.253174, avg loss: 2.162707, ppl: 8.694645 +epoch: 2, batch: 12072, sum loss: 4395.186523, avg loss: 2.652496, ppl: 14.189416 +epoch: 2, batch: 12073, sum loss: 4500.813965, avg loss: 2.496292, ppl: 12.137401 +epoch: 2, batch: 12074, sum loss: 4445.607422, avg loss: 2.528787, ppl: 12.538286 +epoch: 2, batch: 12075, sum loss: 4989.591309, avg loss: 2.612351, ppl: 13.631065 +epoch: 2, batch: 12076, sum loss: 5294.064453, avg loss: 2.928133, ppl: 18.692699 +epoch: 2, batch: 12077, sum loss: 4333.568359, avg loss: 2.581041, ppl: 13.210888 +epoch: 2, batch: 12078, sum loss: 4347.501953, avg loss: 2.524682, ppl: 12.486918 +epoch: 2, batch: 12079, sum loss: 4552.791016, avg loss: 2.687598, ppl: 14.696333 +epoch: 2, batch: 12080, sum loss: 4345.431641, avg loss: 2.632000, ppl: 13.901545 +epoch: 2, batch: 12081, sum loss: 4557.707031, avg loss: 2.665326, ppl: 14.372629 +epoch: 2, batch: 12082, sum loss: 3957.584229, avg loss: 2.350110, ppl: 10.486719 +epoch: 2, batch: 12083, sum loss: 4167.765137, avg loss: 2.374795, ppl: 10.748809 +epoch: 2, batch: 12084, sum loss: 4721.138672, avg loss: 2.608364, ppl: 13.576822 +epoch: 2, batch: 12085, sum loss: 5453.436035, avg loss: 2.896142, ppl: 18.104170 +epoch: 2, batch: 12086, sum loss: 4328.565430, avg loss: 2.582676, ppl: 13.232507 +epoch: 2, batch: 12087, sum loss: 4392.149414, avg loss: 2.562514, ppl: 12.968383 +epoch: 2, batch: 12088, sum loss: 5021.333008, avg loss: 2.586982, ppl: 13.289610 +epoch: 2, batch: 12089, sum loss: 5455.237305, avg loss: 2.714049, ppl: 15.090247 +epoch: 2, batch: 12090, sum loss: 4944.616699, avg loss: 2.639945, ppl: 14.012433 +epoch: 2, batch: 12091, sum loss: 4622.583008, avg loss: 2.629456, ppl: 13.866219 +epoch: 2, batch: 12092, sum loss: 4007.819824, avg loss: 2.554379, ppl: 12.863303 +epoch: 2, batch: 12093, sum loss: 3908.881592, avg loss: 2.333661, ppl: 10.315637 +epoch: 2, batch: 12094, sum loss: 4141.013672, avg loss: 2.649401, ppl: 14.145562 +epoch: 2, batch: 12095, sum loss: 3171.494141, avg loss: 2.118567, ppl: 8.319203 +epoch: 2, batch: 12096, sum loss: 4146.769531, avg loss: 2.487564, ppl: 12.031932 +epoch: 2, batch: 12097, sum loss: 3832.999512, avg loss: 2.436745, ppl: 11.435753 +epoch: 2, batch: 12098, sum loss: 4109.368164, avg loss: 2.599221, ppl: 13.453251 +epoch: 2, batch: 12099, sum loss: 4917.049805, avg loss: 2.590648, ppl: 13.338414 +epoch: 2, batch: 12100, sum loss: 3235.745117, avg loss: 2.348146, ppl: 10.466147 +epoch: 2, batch: 12101, sum loss: 4438.039062, avg loss: 2.689721, ppl: 14.727561 +epoch: 2, batch: 12102, sum loss: 4382.119629, avg loss: 2.415722, ppl: 11.197854 +epoch: 2, batch: 12103, sum loss: 5567.141113, avg loss: 2.787752, ppl: 16.244463 +epoch: 2, batch: 12104, sum loss: 4866.943359, avg loss: 2.663899, ppl: 14.352139 +epoch: 2, batch: 12105, sum loss: 4209.679688, avg loss: 2.609845, ppl: 13.596942 +epoch: 2, batch: 12106, sum loss: 3751.189453, avg loss: 2.201402, ppl: 9.037677 +epoch: 2, batch: 12107, sum loss: 4945.513672, avg loss: 2.709871, ppl: 15.027330 +epoch: 2, batch: 12108, sum loss: 4131.745117, avg loss: 2.396604, ppl: 10.985806 +epoch: 2, batch: 12109, sum loss: 4530.557129, avg loss: 2.701584, ppl: 14.903325 +epoch: 2, batch: 12110, sum loss: 4599.891113, avg loss: 2.575527, ppl: 13.138239 +epoch: 2, batch: 12111, sum loss: 4511.639648, avg loss: 2.586949, ppl: 13.289165 +epoch: 2, batch: 12112, sum loss: 3373.372314, avg loss: 2.347510, ppl: 10.459496 +epoch: 2, batch: 12113, sum loss: 5141.480469, avg loss: 2.837462, ppl: 17.072376 +epoch: 2, batch: 12114, sum loss: 4126.400879, avg loss: 2.525337, ppl: 12.495109 +epoch: 2, batch: 12115, sum loss: 4405.925781, avg loss: 2.608600, ppl: 13.580030 +epoch: 2, batch: 12116, sum loss: 4871.460938, avg loss: 2.688444, ppl: 14.708774 +epoch: 2, batch: 12117, sum loss: 4198.765137, avg loss: 2.417251, ppl: 11.214988 +epoch: 2, batch: 12118, sum loss: 3188.636475, avg loss: 2.173576, ppl: 8.789660 +epoch: 2, batch: 12119, sum loss: 4254.331055, avg loss: 2.541417, ppl: 12.697646 +epoch: 2, batch: 12120, sum loss: 4670.164551, avg loss: 2.634047, ppl: 13.930028 +epoch: 2, batch: 12121, sum loss: 4839.235840, avg loss: 2.642947, ppl: 14.054564 +epoch: 2, batch: 12122, sum loss: 3749.274902, avg loss: 2.437760, ppl: 11.447371 +epoch: 2, batch: 12123, sum loss: 3730.554199, avg loss: 2.291495, ppl: 9.889712 +epoch: 2, batch: 12124, sum loss: 4150.794922, avg loss: 2.578133, ppl: 13.172526 +epoch: 2, batch: 12125, sum loss: 4975.642578, avg loss: 2.509149, ppl: 12.294464 +epoch: 2, batch: 12126, sum loss: 4431.901367, avg loss: 2.555883, ppl: 12.882669 +epoch: 2, batch: 12127, sum loss: 5580.882324, avg loss: 2.885668, ppl: 17.915537 +epoch: 2, batch: 12128, sum loss: 4306.293945, avg loss: 2.592591, ppl: 13.364358 +epoch: 2, batch: 12129, sum loss: 4009.443848, avg loss: 2.380905, ppl: 10.814685 +epoch: 2, batch: 12130, sum loss: 4511.205566, avg loss: 2.609142, ppl: 13.587382 +epoch: 2, batch: 12131, sum loss: 3587.571045, avg loss: 2.304156, ppl: 10.015722 +epoch: 2, batch: 12132, sum loss: 3794.166748, avg loss: 2.483094, ppl: 11.978262 +epoch: 2, batch: 12133, sum loss: 4610.294434, avg loss: 2.429028, ppl: 11.347844 +epoch: 2, batch: 12134, sum loss: 3489.249512, avg loss: 2.333946, ppl: 10.318579 +epoch: 2, batch: 12135, sum loss: 4423.271973, avg loss: 2.653432, ppl: 14.202704 +epoch: 2, batch: 12136, sum loss: 4342.032715, avg loss: 2.576874, ppl: 13.155949 +epoch: 2, batch: 12137, sum loss: 4899.345215, avg loss: 2.931984, ppl: 18.764826 +epoch: 2, batch: 12138, sum loss: 3930.723389, avg loss: 2.539227, ppl: 12.669873 +epoch: 2, batch: 12139, sum loss: 3909.945801, avg loss: 2.277196, ppl: 9.749307 +epoch: 2, batch: 12140, sum loss: 4652.299316, avg loss: 2.646359, ppl: 14.102597 +epoch: 2, batch: 12141, sum loss: 3577.422852, avg loss: 2.347390, ppl: 10.458242 +epoch: 2, batch: 12142, sum loss: 4367.894531, avg loss: 2.543911, ppl: 12.729355 +epoch: 2, batch: 12143, sum loss: 3267.762451, avg loss: 2.511731, ppl: 12.326253 +epoch: 2, batch: 12144, sum loss: 3677.630859, avg loss: 2.435517, ppl: 11.421723 +epoch: 2, batch: 12145, sum loss: 5227.466797, avg loss: 2.712749, ppl: 15.070644 +epoch: 2, batch: 12146, sum loss: 5134.664062, avg loss: 2.757607, ppl: 15.762079 +epoch: 2, batch: 12147, sum loss: 3869.342285, avg loss: 2.545620, ppl: 12.751131 +epoch: 2, batch: 12148, sum loss: 2985.491943, avg loss: 2.317929, ppl: 10.154617 +epoch: 2, batch: 12149, sum loss: 4253.989746, avg loss: 2.590737, ppl: 13.339598 +epoch: 2, batch: 12150, sum loss: 4435.901855, avg loss: 2.645141, ppl: 14.085433 +epoch: 2, batch: 12151, sum loss: 5260.458008, avg loss: 2.871429, ppl: 17.662243 +epoch: 2, batch: 12152, sum loss: 3800.677246, avg loss: 2.390363, ppl: 10.917456 +epoch: 2, batch: 12153, sum loss: 3888.146973, avg loss: 2.362179, ppl: 10.614058 +epoch: 2, batch: 12154, sum loss: 5382.062500, avg loss: 2.660436, ppl: 14.302526 +epoch: 2, batch: 12155, sum loss: 4272.727539, avg loss: 2.411246, ppl: 11.147840 +epoch: 2, batch: 12156, sum loss: 5134.832520, avg loss: 2.629203, ppl: 13.862711 +epoch: 2, batch: 12157, sum loss: 4710.796875, avg loss: 2.589773, ppl: 13.326745 +epoch: 2, batch: 12158, sum loss: 4738.497559, avg loss: 2.767814, ppl: 15.923785 +epoch: 2, batch: 12159, sum loss: 4414.693359, avg loss: 2.562213, ppl: 12.964478 +epoch: 2, batch: 12160, sum loss: 4475.466309, avg loss: 2.645075, ppl: 14.084496 +epoch: 2, batch: 12161, sum loss: 3488.751465, avg loss: 2.293722, ppl: 9.911764 +epoch: 2, batch: 12162, sum loss: 4891.271484, avg loss: 2.544886, ppl: 12.741780 +epoch: 2, batch: 12163, sum loss: 5416.995117, avg loss: 2.472385, ppl: 11.850677 +epoch: 2, batch: 12164, sum loss: 3349.385254, avg loss: 2.481026, ppl: 11.953525 +epoch: 2, batch: 12165, sum loss: 5920.169922, avg loss: 2.864136, ppl: 17.533905 +epoch: 2, batch: 12166, sum loss: 3907.515137, avg loss: 2.389917, ppl: 10.912592 +epoch: 2, batch: 12167, sum loss: 4000.802002, avg loss: 2.300634, ppl: 9.980505 +epoch: 2, batch: 12168, sum loss: 5585.987305, avg loss: 2.930738, ppl: 18.741461 +epoch: 2, batch: 12169, sum loss: 4434.925781, avg loss: 2.419490, ppl: 11.240129 +epoch: 2, batch: 12170, sum loss: 4456.128906, avg loss: 2.559523, ppl: 12.929644 +epoch: 2, batch: 12171, sum loss: 4049.275391, avg loss: 2.582446, ppl: 13.229459 +epoch: 2, batch: 12172, sum loss: 4073.644531, avg loss: 2.533361, ppl: 12.595769 +epoch: 2, batch: 12173, sum loss: 3954.557373, avg loss: 2.586369, ppl: 13.281453 +epoch: 2, batch: 12174, sum loss: 4382.450195, avg loss: 2.427950, ppl: 11.335622 +epoch: 2, batch: 12175, sum loss: 5648.298828, avg loss: 2.798959, ppl: 16.427534 +epoch: 2, batch: 12176, sum loss: 4084.562012, avg loss: 2.548074, ppl: 12.782455 +epoch: 2, batch: 12177, sum loss: 4388.222656, avg loss: 2.480623, ppl: 11.948712 +epoch: 2, batch: 12178, sum loss: 3586.194824, avg loss: 2.288574, ppl: 9.860864 +epoch: 2, batch: 12179, sum loss: 4608.236816, avg loss: 2.451190, ppl: 11.602142 +epoch: 2, batch: 12180, sum loss: 3727.675049, avg loss: 2.292543, ppl: 9.900081 +epoch: 2, batch: 12181, sum loss: 3928.977295, avg loss: 2.337286, ppl: 10.353098 +epoch: 2, batch: 12182, sum loss: 4686.915527, avg loss: 2.710767, ppl: 15.040804 +epoch: 2, batch: 12183, sum loss: 4350.525391, avg loss: 2.486014, ppl: 12.013300 +epoch: 2, batch: 12184, sum loss: 4881.013184, avg loss: 2.886466, ppl: 17.929825 +epoch: 2, batch: 12185, sum loss: 5088.232910, avg loss: 2.696467, ppl: 14.827250 +epoch: 2, batch: 12186, sum loss: 4151.669434, avg loss: 2.465362, ppl: 11.767742 +epoch: 2, batch: 12187, sum loss: 3926.986816, avg loss: 2.502860, ppl: 12.217381 +epoch: 2, batch: 12188, sum loss: 4585.049805, avg loss: 2.562912, ppl: 12.973541 +epoch: 2, batch: 12189, sum loss: 4071.229736, avg loss: 2.288493, ppl: 9.860071 +epoch: 2, batch: 12190, sum loss: 4538.409180, avg loss: 2.591896, ppl: 13.355066 +epoch: 2, batch: 12191, sum loss: 4145.139648, avg loss: 2.552426, ppl: 12.838210 +epoch: 2, batch: 12192, sum loss: 4333.934570, avg loss: 2.540407, ppl: 12.684832 +epoch: 2, batch: 12193, sum loss: 4210.674316, avg loss: 2.599182, ppl: 13.452727 +epoch: 2, batch: 12194, sum loss: 4368.705566, avg loss: 2.550324, ppl: 12.811256 +epoch: 2, batch: 12195, sum loss: 5101.515137, avg loss: 2.622887, ppl: 13.775437 +epoch: 2, batch: 12196, sum loss: 5131.285156, avg loss: 2.927145, ppl: 18.674240 +epoch: 2, batch: 12197, sum loss: 3773.399170, avg loss: 2.385208, ppl: 10.861320 +epoch: 2, batch: 12198, sum loss: 4619.525879, avg loss: 2.712581, ppl: 15.068118 +epoch: 2, batch: 12199, sum loss: 4583.698242, avg loss: 2.431670, ppl: 11.377870 +epoch: 2, batch: 12200, sum loss: 4714.396484, avg loss: 2.660495, ppl: 14.303365 +epoch: 2, batch: 12201, sum loss: 4437.473633, avg loss: 2.567982, ppl: 13.039490 +epoch: 2, batch: 12202, sum loss: 4058.497314, avg loss: 2.557339, ppl: 12.901446 +epoch: 2, batch: 12203, sum loss: 4072.263184, avg loss: 2.460582, ppl: 11.711626 +epoch: 2, batch: 12204, sum loss: 4001.264160, avg loss: 2.658647, ppl: 14.276963 +epoch: 2, batch: 12205, sum loss: 3765.694824, avg loss: 2.424788, ppl: 11.299829 +epoch: 2, batch: 12206, sum loss: 3475.443115, avg loss: 2.310800, ppl: 10.082486 +epoch: 2, batch: 12207, sum loss: 4919.637695, avg loss: 2.564983, ppl: 13.000439 +epoch: 2, batch: 12208, sum loss: 4293.885254, avg loss: 2.557406, ppl: 12.902308 +epoch: 2, batch: 12209, sum loss: 4847.321777, avg loss: 2.730885, ppl: 15.346467 +epoch: 2, batch: 12210, sum loss: 3369.079834, avg loss: 2.325107, ppl: 10.227773 +epoch: 2, batch: 12211, sum loss: 4276.089355, avg loss: 2.507970, ppl: 12.279981 +epoch: 2, batch: 12212, sum loss: 3562.734619, avg loss: 2.381507, ppl: 10.821199 +epoch: 2, batch: 12213, sum loss: 4195.333984, avg loss: 2.510673, ppl: 12.313211 +epoch: 2, batch: 12214, sum loss: 4496.435547, avg loss: 2.270927, ppl: 9.688377 +epoch: 2, batch: 12215, sum loss: 4412.920410, avg loss: 2.408799, ppl: 11.120602 +epoch: 2, batch: 12216, sum loss: 4435.327637, avg loss: 2.465441, ppl: 11.768665 +epoch: 2, batch: 12217, sum loss: 4680.395996, avg loss: 2.737074, ppl: 15.441731 +epoch: 2, batch: 12218, sum loss: 4686.955078, avg loss: 2.580922, ppl: 13.209316 +epoch: 2, batch: 12219, sum loss: 4164.286133, avg loss: 2.486141, ppl: 12.014824 +epoch: 2, batch: 12220, sum loss: 4933.144531, avg loss: 3.050801, ppl: 21.132261 +epoch: 2, batch: 12221, sum loss: 3610.660156, avg loss: 2.476447, ppl: 11.898916 +epoch: 2, batch: 12222, sum loss: 4682.889160, avg loss: 2.651693, ppl: 14.178017 +epoch: 2, batch: 12223, sum loss: 3611.351074, avg loss: 2.334422, ppl: 10.323492 +epoch: 2, batch: 12224, sum loss: 4725.497070, avg loss: 2.529709, ppl: 12.549858 +epoch: 2, batch: 12225, sum loss: 3952.131592, avg loss: 2.515679, ppl: 12.375010 +epoch: 2, batch: 12226, sum loss: 4439.738281, avg loss: 2.388240, ppl: 10.894301 +epoch: 2, batch: 12227, sum loss: 4161.312988, avg loss: 2.475498, ppl: 11.887630 +epoch: 2, batch: 12228, sum loss: 4219.582031, avg loss: 2.451820, ppl: 11.609455 +epoch: 2, batch: 12229, sum loss: 4459.016113, avg loss: 2.467635, ppl: 11.794516 +epoch: 2, batch: 12230, sum loss: 5260.659180, avg loss: 2.975486, ppl: 19.599148 +epoch: 2, batch: 12231, sum loss: 3917.153320, avg loss: 2.361153, ppl: 10.603173 +epoch: 2, batch: 12232, sum loss: 3316.112305, avg loss: 1.954103, ppl: 7.057584 +epoch: 2, batch: 12233, sum loss: 4488.469727, avg loss: 2.434094, ppl: 11.405483 +epoch: 2, batch: 12234, sum loss: 5388.985352, avg loss: 2.960981, ppl: 19.316916 +epoch: 2, batch: 12235, sum loss: 3763.644775, avg loss: 2.400284, ppl: 11.026302 +epoch: 2, batch: 12236, sum loss: 4924.648926, avg loss: 2.793334, ppl: 16.335400 +epoch: 2, batch: 12237, sum loss: 4163.834961, avg loss: 2.347145, ppl: 10.455674 +epoch: 2, batch: 12238, sum loss: 3977.196777, avg loss: 2.520404, ppl: 12.433614 +epoch: 2, batch: 12239, sum loss: 5077.336914, avg loss: 2.578637, ppl: 13.179168 +epoch: 2, batch: 12240, sum loss: 3816.129639, avg loss: 2.429109, ppl: 11.348762 +epoch: 2, batch: 12241, sum loss: 3690.545410, avg loss: 2.337267, ppl: 10.352907 +epoch: 2, batch: 12242, sum loss: 3801.150635, avg loss: 2.764473, ppl: 15.870677 +epoch: 2, batch: 12243, sum loss: 4046.149414, avg loss: 2.565726, ppl: 13.010098 +epoch: 2, batch: 12244, sum loss: 4676.758789, avg loss: 2.673962, ppl: 14.497289 +epoch: 2, batch: 12245, sum loss: 4350.462891, avg loss: 2.763953, ppl: 15.862431 +epoch: 2, batch: 12246, sum loss: 3879.954346, avg loss: 2.393556, ppl: 10.952370 +epoch: 2, batch: 12247, sum loss: 4206.167969, avg loss: 2.367005, ppl: 10.665402 +epoch: 2, batch: 12248, sum loss: 4670.330078, avg loss: 2.849500, ppl: 17.279135 +epoch: 2, batch: 12249, sum loss: 4347.750000, avg loss: 2.636598, ppl: 13.965610 +epoch: 2, batch: 12250, sum loss: 4399.143066, avg loss: 2.609219, ppl: 13.588435 +epoch: 2, batch: 12251, sum loss: 4092.148682, avg loss: 2.383313, ppl: 10.840761 +epoch: 2, batch: 12252, sum loss: 4958.723633, avg loss: 2.640428, ppl: 14.019200 +epoch: 2, batch: 12253, sum loss: 4187.606934, avg loss: 2.458959, ppl: 11.692632 +epoch: 2, batch: 12254, sum loss: 4631.810547, avg loss: 2.502329, ppl: 12.210898 +epoch: 2, batch: 12255, sum loss: 4299.453125, avg loss: 2.745500, ppl: 15.572399 +epoch: 2, batch: 12256, sum loss: 4621.592285, avg loss: 2.759160, ppl: 15.786573 +epoch: 2, batch: 12257, sum loss: 4991.699219, avg loss: 2.513444, ppl: 12.347378 +epoch: 2, batch: 12258, sum loss: 4550.829102, avg loss: 2.409121, ppl: 11.124176 +epoch: 2, batch: 12259, sum loss: 3699.492432, avg loss: 2.278013, ppl: 9.757269 +epoch: 2, batch: 12260, sum loss: 2997.014160, avg loss: 2.178063, ppl: 8.829185 +epoch: 2, batch: 12261, sum loss: 4363.612305, avg loss: 2.600484, ppl: 13.470258 +epoch: 2, batch: 12262, sum loss: 4952.473633, avg loss: 2.687181, ppl: 14.690199 +epoch: 2, batch: 12263, sum loss: 4604.259766, avg loss: 2.409346, ppl: 11.126678 +epoch: 2, batch: 12264, sum loss: 4201.243164, avg loss: 2.593360, ppl: 13.374638 +epoch: 2, batch: 12265, sum loss: 5339.979980, avg loss: 2.958438, ppl: 19.267845 +epoch: 2, batch: 12266, sum loss: 4776.559082, avg loss: 2.607292, ppl: 13.562277 +epoch: 2, batch: 12267, sum loss: 4596.137695, avg loss: 2.496544, ppl: 12.140466 +epoch: 2, batch: 12268, sum loss: 4852.520020, avg loss: 2.577015, ppl: 13.157808 +epoch: 2, batch: 12269, sum loss: 5758.371094, avg loss: 2.927489, ppl: 18.680666 +epoch: 2, batch: 12270, sum loss: 5641.144531, avg loss: 2.746419, ppl: 15.586715 +epoch: 2, batch: 12271, sum loss: 4911.260254, avg loss: 2.602682, ppl: 13.499891 +epoch: 2, batch: 12272, sum loss: 3931.434082, avg loss: 2.420834, ppl: 11.255240 +epoch: 2, batch: 12273, sum loss: 4134.618164, avg loss: 2.487737, ppl: 12.034009 +epoch: 2, batch: 12274, sum loss: 4357.928223, avg loss: 2.636375, ppl: 13.962501 +epoch: 2, batch: 12275, sum loss: 3980.889648, avg loss: 2.578297, ppl: 13.174678 +epoch: 2, batch: 12276, sum loss: 4114.161133, avg loss: 2.600607, ppl: 13.471918 +epoch: 2, batch: 12277, sum loss: 3725.879883, avg loss: 2.433625, ppl: 11.400135 +epoch: 2, batch: 12278, sum loss: 3957.047363, avg loss: 2.549644, ppl: 12.802546 +epoch: 2, batch: 12279, sum loss: 4397.583984, avg loss: 2.776253, ppl: 16.058727 +epoch: 2, batch: 12280, sum loss: 3877.330566, avg loss: 2.394892, ppl: 10.967016 +epoch: 2, batch: 12281, sum loss: 4071.556396, avg loss: 2.527347, ppl: 12.520250 +epoch: 2, batch: 12282, sum loss: 3872.474121, avg loss: 2.406758, ppl: 11.097927 +epoch: 2, batch: 12283, sum loss: 4361.714844, avg loss: 2.562700, ppl: 12.970785 +epoch: 2, batch: 12284, sum loss: 3433.579102, avg loss: 2.253005, ppl: 9.516285 +epoch: 2, batch: 12285, sum loss: 4233.186523, avg loss: 2.655700, ppl: 14.234953 +epoch: 2, batch: 12286, sum loss: 4424.946289, avg loss: 2.499970, ppl: 12.182125 +epoch: 2, batch: 12287, sum loss: 4584.066406, avg loss: 2.600151, ppl: 13.465772 +epoch: 2, batch: 12288, sum loss: 4087.639648, avg loss: 2.387640, ppl: 10.887769 +epoch: 2, batch: 12289, sum loss: 5134.617676, avg loss: 2.738463, ppl: 15.463195 +epoch: 2, batch: 12290, sum loss: 3899.567871, avg loss: 2.483801, ppl: 11.986741 +epoch: 2, batch: 12291, sum loss: 5146.446777, avg loss: 2.793945, ppl: 16.345377 +epoch: 2, batch: 12292, sum loss: 3412.452881, avg loss: 2.177698, ppl: 8.825967 +epoch: 2, batch: 12293, sum loss: 4914.291504, avg loss: 2.870497, ppl: 17.645790 +epoch: 2, batch: 12294, sum loss: 5899.614258, avg loss: 2.900498, ppl: 18.183205 +epoch: 2, batch: 12295, sum loss: 5002.057617, avg loss: 2.778921, ppl: 16.101637 +epoch: 2, batch: 12296, sum loss: 4997.776855, avg loss: 2.642928, ppl: 14.054296 +epoch: 2, batch: 12297, sum loss: 4326.349609, avg loss: 2.518248, ppl: 12.406838 +epoch: 2, batch: 12298, sum loss: 3841.415039, avg loss: 2.440543, ppl: 11.479275 +epoch: 2, batch: 12299, sum loss: 4274.269043, avg loss: 2.817580, ppl: 16.736300 +epoch: 2, batch: 12300, sum loss: 3869.682129, avg loss: 2.296547, ppl: 9.939803 +epoch: 2, batch: 12301, sum loss: 3926.950928, avg loss: 2.669579, ppl: 14.433895 +epoch: 2, batch: 12302, sum loss: 4545.838867, avg loss: 2.561036, ppl: 12.949224 +epoch: 2, batch: 12303, sum loss: 4726.049316, avg loss: 2.762156, ppl: 15.833952 +epoch: 2, batch: 12304, sum loss: 3888.348633, avg loss: 2.506994, ppl: 12.268003 +epoch: 2, batch: 12305, sum loss: 4003.625732, avg loss: 2.550080, ppl: 12.808129 +epoch: 2, batch: 12306, sum loss: 5157.583984, avg loss: 2.632764, ppl: 13.912165 +epoch: 2, batch: 12307, sum loss: 4673.074707, avg loss: 2.658177, ppl: 14.270246 +epoch: 2, batch: 12308, sum loss: 5266.140625, avg loss: 2.644973, ppl: 14.083058 +epoch: 2, batch: 12309, sum loss: 4727.520508, avg loss: 2.593264, ppl: 13.373356 +epoch: 2, batch: 12310, sum loss: 5143.620117, avg loss: 2.914232, ppl: 18.434658 +epoch: 2, batch: 12311, sum loss: 4264.877441, avg loss: 2.452488, ppl: 11.617219 +epoch: 2, batch: 12312, sum loss: 3993.425537, avg loss: 2.465078, ppl: 11.764396 +epoch: 2, batch: 12313, sum loss: 5034.381348, avg loss: 2.623440, ppl: 13.783053 +epoch: 2, batch: 12314, sum loss: 5425.933594, avg loss: 3.081166, ppl: 21.783789 +epoch: 2, batch: 12315, sum loss: 4055.359863, avg loss: 2.456305, ppl: 11.661645 +epoch: 2, batch: 12316, sum loss: 4088.007812, avg loss: 2.207348, ppl: 9.091570 +epoch: 2, batch: 12317, sum loss: 4591.465332, avg loss: 2.674121, ppl: 14.499595 +epoch: 2, batch: 12318, sum loss: 3996.539795, avg loss: 2.586757, ppl: 13.286615 +epoch: 2, batch: 12319, sum loss: 4240.248047, avg loss: 2.332370, ppl: 10.302324 +epoch: 2, batch: 12320, sum loss: 4699.495605, avg loss: 2.561033, ppl: 12.949190 +epoch: 2, batch: 12321, sum loss: 4771.208008, avg loss: 2.561035, ppl: 12.949212 +epoch: 2, batch: 12322, sum loss: 4229.036621, avg loss: 2.719638, ppl: 15.174826 +epoch: 2, batch: 12323, sum loss: 3796.815186, avg loss: 2.536283, ppl: 12.632623 +epoch: 2, batch: 12324, sum loss: 3299.257812, avg loss: 2.366756, ppl: 10.662745 +epoch: 2, batch: 12325, sum loss: 4123.916016, avg loss: 2.676130, ppl: 14.528755 +epoch: 2, batch: 12326, sum loss: 4494.120117, avg loss: 2.875317, ppl: 17.731041 +epoch: 2, batch: 12327, sum loss: 4194.765625, avg loss: 2.367249, ppl: 10.668007 +epoch: 2, batch: 12328, sum loss: 4364.009277, avg loss: 2.622602, ppl: 13.771507 +epoch: 2, batch: 12329, sum loss: 4652.527344, avg loss: 2.599177, ppl: 13.452663 +epoch: 2, batch: 12330, sum loss: 3881.339111, avg loss: 2.243549, ppl: 9.426724 +epoch: 2, batch: 12331, sum loss: 4826.164062, avg loss: 2.702220, ppl: 14.912797 +epoch: 2, batch: 12332, sum loss: 4608.298828, avg loss: 2.560166, ppl: 12.937966 +epoch: 2, batch: 12333, sum loss: 4466.078125, avg loss: 2.322453, ppl: 10.200671 +epoch: 2, batch: 12334, sum loss: 4461.416016, avg loss: 2.994239, ppl: 19.970160 +epoch: 2, batch: 12335, sum loss: 4902.646973, avg loss: 2.722180, ppl: 15.213457 +epoch: 2, batch: 12336, sum loss: 4505.529297, avg loss: 2.536897, ppl: 12.640389 +epoch: 2, batch: 12337, sum loss: 3851.229736, avg loss: 2.504050, ppl: 12.231939 +epoch: 2, batch: 12338, sum loss: 5074.674805, avg loss: 2.709383, ppl: 15.020009 +epoch: 2, batch: 12339, sum loss: 3971.923340, avg loss: 2.551011, ppl: 12.820053 +epoch: 2, batch: 12340, sum loss: 4595.221191, avg loss: 2.360155, ppl: 10.592592 +epoch: 2, batch: 12341, sum loss: 4718.993164, avg loss: 2.667605, ppl: 14.405429 +epoch: 2, batch: 12342, sum loss: 4862.756348, avg loss: 2.778718, ppl: 16.098370 +epoch: 2, batch: 12343, sum loss: 5378.396484, avg loss: 2.836707, ppl: 17.059498 +epoch: 2, batch: 12344, sum loss: 3792.083740, avg loss: 2.415340, ppl: 11.193575 +epoch: 2, batch: 12345, sum loss: 4191.378418, avg loss: 2.608201, ppl: 13.574605 +epoch: 2, batch: 12346, sum loss: 4350.514648, avg loss: 2.705544, ppl: 14.962450 +epoch: 2, batch: 12347, sum loss: 3958.792480, avg loss: 2.477342, ppl: 11.909568 +epoch: 2, batch: 12348, sum loss: 4504.247559, avg loss: 2.655806, ppl: 14.236461 +epoch: 2, batch: 12349, sum loss: 4608.146973, avg loss: 2.537526, ppl: 12.648335 +epoch: 2, batch: 12350, sum loss: 4872.929199, avg loss: 2.828166, ppl: 16.914404 +epoch: 2, batch: 12351, sum loss: 3708.925781, avg loss: 2.192037, ppl: 8.953429 +epoch: 2, batch: 12352, sum loss: 4998.287598, avg loss: 2.643198, ppl: 14.058090 +epoch: 2, batch: 12353, sum loss: 4340.289062, avg loss: 2.617786, ppl: 13.705345 +epoch: 2, batch: 12354, sum loss: 3953.328369, avg loss: 2.573781, ppl: 13.115326 +epoch: 2, batch: 12355, sum loss: 4277.964844, avg loss: 2.225788, ppl: 9.260780 +epoch: 2, batch: 12356, sum loss: 5364.086914, avg loss: 2.675355, ppl: 14.517502 +epoch: 2, batch: 12357, sum loss: 3753.570068, avg loss: 2.412320, ppl: 11.159823 +epoch: 2, batch: 12358, sum loss: 3756.847412, avg loss: 2.469985, ppl: 11.822272 +epoch: 2, batch: 12359, sum loss: 4629.285645, avg loss: 2.479532, ppl: 11.935672 +epoch: 2, batch: 12360, sum loss: 4306.995605, avg loss: 2.430584, ppl: 11.365522 +epoch: 2, batch: 12361, sum loss: 4005.520508, avg loss: 2.446867, ppl: 11.552092 +epoch: 2, batch: 12362, sum loss: 4879.750977, avg loss: 2.676770, ppl: 14.538055 +epoch: 2, batch: 12363, sum loss: 4269.276855, avg loss: 2.643515, ppl: 14.062549 +epoch: 2, batch: 12364, sum loss: 4784.439941, avg loss: 2.783269, ppl: 16.171806 +epoch: 2, batch: 12365, sum loss: 3640.222656, avg loss: 2.404374, ppl: 11.071502 +epoch: 2, batch: 12366, sum loss: 4745.970703, avg loss: 2.778671, ppl: 16.097618 +epoch: 2, batch: 12367, sum loss: 3979.208984, avg loss: 2.474633, ppl: 11.877341 +epoch: 2, batch: 12368, sum loss: 4233.345703, avg loss: 2.441376, ppl: 11.488835 +epoch: 2, batch: 12369, sum loss: 3788.026855, avg loss: 2.459758, ppl: 11.701974 +epoch: 2, batch: 12370, sum loss: 5074.952148, avg loss: 2.817852, ppl: 16.740856 +epoch: 2, batch: 12371, sum loss: 4952.736328, avg loss: 2.700511, ppl: 14.887330 +epoch: 2, batch: 12372, sum loss: 4773.693848, avg loss: 2.861927, ppl: 17.495205 +epoch: 2, batch: 12373, sum loss: 4182.233398, avg loss: 2.610632, ppl: 13.607654 +epoch: 2, batch: 12374, sum loss: 5462.159180, avg loss: 2.739297, ppl: 15.476108 +epoch: 2, batch: 12375, sum loss: 3228.302979, avg loss: 2.193141, ppl: 8.963321 +epoch: 2, batch: 12376, sum loss: 3910.688232, avg loss: 2.481401, ppl: 11.958009 +epoch: 2, batch: 12377, sum loss: 4613.861816, avg loss: 2.467306, ppl: 11.790639 +epoch: 2, batch: 12378, sum loss: 3762.297119, avg loss: 2.422600, ppl: 11.275131 +epoch: 2, batch: 12379, sum loss: 3821.711426, avg loss: 2.350376, ppl: 10.489511 +epoch: 2, batch: 12380, sum loss: 3949.968506, avg loss: 2.418842, ppl: 11.232840 +epoch: 2, batch: 12381, sum loss: 4214.559570, avg loss: 2.309348, ppl: 10.067855 +epoch: 2, batch: 12382, sum loss: 4160.443359, avg loss: 2.470572, ppl: 11.829214 +epoch: 2, batch: 12383, sum loss: 4594.740723, avg loss: 2.688555, ppl: 14.710407 +epoch: 2, batch: 12384, sum loss: 3173.473633, avg loss: 2.228563, ppl: 9.286512 +epoch: 2, batch: 12385, sum loss: 3789.781738, avg loss: 2.602872, ppl: 13.502463 +epoch: 2, batch: 12386, sum loss: 3701.915527, avg loss: 2.257266, ppl: 9.556921 +epoch: 2, batch: 12387, sum loss: 4010.530273, avg loss: 2.432098, ppl: 11.382743 +epoch: 2, batch: 12388, sum loss: 4261.118164, avg loss: 2.470214, ppl: 11.824973 +epoch: 2, batch: 12389, sum loss: 3829.287598, avg loss: 2.476900, ppl: 11.904305 +epoch: 2, batch: 12390, sum loss: 4061.993164, avg loss: 2.446984, ppl: 11.553447 +epoch: 2, batch: 12391, sum loss: 2856.386719, avg loss: 2.202303, ppl: 9.045819 +epoch: 2, batch: 12392, sum loss: 4813.775879, avg loss: 2.567347, ppl: 13.031207 +epoch: 2, batch: 12393, sum loss: 4273.037598, avg loss: 2.416877, ppl: 11.210789 +epoch: 2, batch: 12394, sum loss: 3112.779053, avg loss: 2.204518, ppl: 9.065877 +epoch: 2, batch: 12395, sum loss: 3778.822021, avg loss: 2.235989, ppl: 9.355733 +epoch: 2, batch: 12396, sum loss: 4484.825195, avg loss: 2.625776, ppl: 13.815289 +epoch: 2, batch: 12397, sum loss: 3674.972412, avg loss: 2.354243, ppl: 10.530149 +epoch: 2, batch: 12398, sum loss: 4451.479980, avg loss: 2.444525, ppl: 11.525074 +epoch: 2, batch: 12399, sum loss: 4374.215332, avg loss: 2.515363, ppl: 12.371093 +epoch: 2, batch: 12400, sum loss: 3501.734131, avg loss: 2.528328, ppl: 12.532534 +epoch: 2, batch: 12401, sum loss: 3892.423096, avg loss: 2.379232, ppl: 10.796607 +epoch: 2, batch: 12402, sum loss: 3937.802734, avg loss: 2.441291, ppl: 11.487865 +epoch: 2, batch: 12403, sum loss: 4287.324219, avg loss: 2.476790, ppl: 11.903000 +epoch: 2, batch: 12404, sum loss: 4336.585938, avg loss: 2.610828, ppl: 13.610321 +epoch: 2, batch: 12405, sum loss: 3872.089600, avg loss: 2.420056, ppl: 11.246488 +epoch: 2, batch: 12406, sum loss: 4590.820312, avg loss: 2.596618, ppl: 13.418280 +epoch: 2, batch: 12407, sum loss: 4684.147461, avg loss: 2.544350, ppl: 12.734943 +epoch: 2, batch: 12408, sum loss: 4559.717773, avg loss: 2.551605, ppl: 12.827672 +epoch: 2, batch: 12409, sum loss: 4204.107910, avg loss: 2.563480, ppl: 12.980917 +epoch: 2, batch: 12410, sum loss: 5037.565918, avg loss: 2.641618, ppl: 14.035899 +epoch: 2, batch: 12411, sum loss: 4236.778320, avg loss: 2.501050, ppl: 12.195290 +epoch: 2, batch: 12412, sum loss: 4205.228027, avg loss: 2.646462, ppl: 14.104050 +epoch: 2, batch: 12413, sum loss: 3627.474121, avg loss: 2.341817, ppl: 10.400113 +epoch: 2, batch: 12414, sum loss: 4521.323242, avg loss: 2.506277, ppl: 12.259199 +epoch: 2, batch: 12415, sum loss: 4836.158691, avg loss: 2.757217, ppl: 15.755932 +epoch: 2, batch: 12416, sum loss: 3375.049561, avg loss: 2.350313, ppl: 10.488852 +epoch: 2, batch: 12417, sum loss: 3632.124023, avg loss: 2.410169, ppl: 11.135839 +epoch: 2, batch: 12418, sum loss: 4071.155518, avg loss: 2.674872, ppl: 14.510498 +epoch: 2, batch: 12419, sum loss: 4007.163818, avg loss: 2.485834, ppl: 12.011129 +epoch: 2, batch: 12420, sum loss: 4373.016602, avg loss: 2.729723, ppl: 15.328644 +epoch: 2, batch: 12421, sum loss: 4533.592773, avg loss: 2.534149, ppl: 12.605701 +epoch: 2, batch: 12422, sum loss: 4490.505859, avg loss: 2.451149, ppl: 11.601675 +epoch: 2, batch: 12423, sum loss: 4929.103516, avg loss: 3.000063, ppl: 20.086802 +epoch: 2, batch: 12424, sum loss: 3673.817627, avg loss: 2.361065, ppl: 10.602241 +epoch: 2, batch: 12425, sum loss: 3805.334229, avg loss: 2.436193, ppl: 11.429451 +epoch: 2, batch: 12426, sum loss: 4149.478027, avg loss: 2.477300, ppl: 11.909071 +epoch: 2, batch: 12427, sum loss: 4055.332764, avg loss: 2.401026, ppl: 11.034492 +epoch: 2, batch: 12428, sum loss: 3827.412842, avg loss: 2.309845, ppl: 10.072861 +epoch: 2, batch: 12429, sum loss: 4328.779785, avg loss: 2.431899, ppl: 11.380471 +epoch: 2, batch: 12430, sum loss: 3589.323242, avg loss: 2.330729, ppl: 10.285439 +epoch: 2, batch: 12431, sum loss: 4185.900879, avg loss: 2.549270, ppl: 12.797757 +epoch: 2, batch: 12432, sum loss: 4124.869141, avg loss: 2.345008, ppl: 10.433357 +epoch: 2, batch: 12433, sum loss: 4200.912109, avg loss: 2.515516, ppl: 12.372995 +epoch: 2, batch: 12434, sum loss: 4279.135742, avg loss: 2.720366, ppl: 15.185880 +epoch: 2, batch: 12435, sum loss: 4383.580566, avg loss: 2.528016, ppl: 12.528628 +epoch: 2, batch: 12436, sum loss: 4812.118652, avg loss: 2.570576, ppl: 13.073359 +epoch: 2, batch: 12437, sum loss: 4482.703125, avg loss: 2.483492, ppl: 11.983038 +epoch: 2, batch: 12438, sum loss: 3700.864746, avg loss: 2.417286, ppl: 11.215379 +epoch: 2, batch: 12439, sum loss: 4011.371582, avg loss: 2.440007, ppl: 11.473121 +epoch: 2, batch: 12440, sum loss: 5048.770020, avg loss: 2.837982, ppl: 17.081259 +epoch: 2, batch: 12441, sum loss: 4974.690430, avg loss: 2.841057, ppl: 17.133863 +epoch: 2, batch: 12442, sum loss: 4755.711914, avg loss: 2.653857, ppl: 14.208739 +epoch: 2, batch: 12443, sum loss: 3584.344727, avg loss: 2.309501, ppl: 10.069396 +epoch: 2, batch: 12444, sum loss: 4922.762695, avg loss: 2.423812, ppl: 11.288815 +epoch: 2, batch: 12445, sum loss: 4371.189453, avg loss: 2.473791, ppl: 11.867355 +epoch: 2, batch: 12446, sum loss: 4065.247559, avg loss: 2.523431, ppl: 12.471316 +epoch: 2, batch: 12447, sum loss: 4094.545898, avg loss: 2.626393, ppl: 13.823812 +epoch: 2, batch: 12448, sum loss: 4681.684082, avg loss: 2.490258, ppl: 12.064383 +epoch: 2, batch: 12449, sum loss: 5072.188477, avg loss: 2.803863, ppl: 16.508301 +epoch: 2, batch: 12450, sum loss: 4634.260254, avg loss: 2.411166, ppl: 11.146948 +epoch: 2, batch: 12451, sum loss: 4308.981934, avg loss: 2.557259, ppl: 12.900413 +epoch: 2, batch: 12452, sum loss: 4838.323242, avg loss: 2.667212, ppl: 14.399774 +epoch: 2, batch: 12453, sum loss: 3713.348389, avg loss: 2.293606, ppl: 9.910614 +epoch: 2, batch: 12454, sum loss: 5263.665527, avg loss: 2.625269, ppl: 13.808293 +epoch: 2, batch: 12455, sum loss: 4180.625977, avg loss: 2.467902, ppl: 11.797668 +epoch: 2, batch: 12456, sum loss: 4291.261230, avg loss: 2.499279, ppl: 12.173708 +epoch: 2, batch: 12457, sum loss: 4879.163086, avg loss: 2.491912, ppl: 12.084358 +epoch: 2, batch: 12458, sum loss: 4407.293457, avg loss: 2.458055, ppl: 11.682074 +epoch: 2, batch: 12459, sum loss: 3763.782227, avg loss: 2.365671, ppl: 10.651182 +epoch: 2, batch: 12460, sum loss: 4321.492188, avg loss: 2.388885, ppl: 10.901330 +epoch: 2, batch: 12461, sum loss: 4297.038086, avg loss: 2.476679, ppl: 11.901674 +epoch: 2, batch: 12462, sum loss: 5126.205566, avg loss: 2.816597, ppl: 16.719849 +epoch: 2, batch: 12463, sum loss: 4062.339844, avg loss: 2.410884, ppl: 11.143809 +epoch: 2, batch: 12464, sum loss: 5072.441406, avg loss: 2.652951, ppl: 14.195862 +epoch: 2, batch: 12465, sum loss: 3614.961182, avg loss: 2.370466, ppl: 10.702381 +epoch: 2, batch: 12466, sum loss: 3668.004150, avg loss: 2.334821, ppl: 10.327614 +epoch: 2, batch: 12467, sum loss: 3972.722900, avg loss: 2.353509, ppl: 10.522428 +epoch: 2, batch: 12468, sum loss: 4732.776367, avg loss: 2.670867, ppl: 14.452497 +epoch: 2, batch: 12469, sum loss: 4781.936035, avg loss: 2.526115, ppl: 12.504832 +epoch: 2, batch: 12470, sum loss: 5023.741699, avg loss: 2.527033, ppl: 12.516314 +epoch: 2, batch: 12471, sum loss: 3997.488770, avg loss: 2.406676, ppl: 11.097013 +epoch: 2, batch: 12472, sum loss: 4010.143311, avg loss: 2.490772, ppl: 12.070594 +epoch: 2, batch: 12473, sum loss: 3403.030762, avg loss: 2.219851, ppl: 9.205955 +epoch: 2, batch: 12474, sum loss: 4416.010254, avg loss: 2.577939, ppl: 13.169971 +epoch: 2, batch: 12475, sum loss: 3284.990234, avg loss: 2.169743, ppl: 8.756030 +epoch: 2, batch: 12476, sum loss: 4768.311035, avg loss: 2.622833, ppl: 13.774695 +epoch: 2, batch: 12477, sum loss: 4078.415039, avg loss: 2.520652, ppl: 12.436704 +epoch: 2, batch: 12478, sum loss: 4685.575195, avg loss: 2.623502, ppl: 13.783917 +epoch: 2, batch: 12479, sum loss: 4848.335938, avg loss: 2.574793, ppl: 13.128604 +epoch: 2, batch: 12480, sum loss: 3927.320801, avg loss: 2.532122, ppl: 12.580169 +epoch: 2, batch: 12481, sum loss: 3952.294189, avg loss: 2.296510, ppl: 9.939435 +epoch: 2, batch: 12482, sum loss: 3106.109863, avg loss: 2.052948, ppl: 7.790833 +epoch: 2, batch: 12483, sum loss: 4902.250977, avg loss: 2.677363, ppl: 14.546679 +epoch: 2, batch: 12484, sum loss: 3981.324463, avg loss: 2.497694, ppl: 12.154433 +epoch: 2, batch: 12485, sum loss: 4730.376953, avg loss: 2.443377, ppl: 11.511846 +epoch: 2, batch: 12486, sum loss: 4834.012207, avg loss: 2.851925, ppl: 17.321087 +epoch: 2, batch: 12487, sum loss: 4355.240234, avg loss: 2.631565, ppl: 13.895500 +epoch: 2, batch: 12488, sum loss: 4618.213379, avg loss: 2.613590, ppl: 13.647959 +epoch: 2, batch: 12489, sum loss: 3297.806641, avg loss: 2.255682, ppl: 9.541797 +epoch: 2, batch: 12490, sum loss: 3911.665283, avg loss: 2.393920, ppl: 10.956358 +epoch: 2, batch: 12491, sum loss: 4088.052734, avg loss: 2.426144, ppl: 11.315168 +epoch: 2, batch: 12492, sum loss: 4414.934570, avg loss: 2.640511, ppl: 14.020370 +epoch: 2, batch: 12493, sum loss: 4122.094727, avg loss: 2.647460, ppl: 14.118130 +epoch: 2, batch: 12494, sum loss: 3407.905029, avg loss: 2.253906, ppl: 9.524863 +epoch: 2, batch: 12495, sum loss: 3693.017822, avg loss: 2.361264, ppl: 10.604352 +epoch: 2, batch: 12496, sum loss: 4156.064453, avg loss: 2.583011, ppl: 13.236934 +epoch: 2, batch: 12497, sum loss: 4799.891113, avg loss: 2.531588, ppl: 12.573461 +epoch: 2, batch: 12498, sum loss: 3843.216309, avg loss: 2.274092, ppl: 9.719094 +epoch: 2, batch: 12499, sum loss: 4082.512695, avg loss: 2.346272, ppl: 10.446549 +epoch: 2, batch: 12500, sum loss: 4022.197021, avg loss: 2.412836, ppl: 11.165577 +epoch: 2, batch: 12501, sum loss: 5256.170898, avg loss: 2.692711, ppl: 14.771663 +epoch: 2, batch: 12502, sum loss: 4336.786133, avg loss: 2.466886, ppl: 11.785692 +epoch: 2, batch: 12503, sum loss: 3783.677734, avg loss: 2.463332, ppl: 11.743876 +epoch: 2, batch: 12504, sum loss: 4426.274902, avg loss: 2.555586, ppl: 12.878842 +epoch: 2, batch: 12505, sum loss: 4474.507812, avg loss: 2.499725, ppl: 12.179143 +epoch: 2, batch: 12506, sum loss: 4233.835938, avg loss: 2.436039, ppl: 11.427689 +epoch: 2, batch: 12507, sum loss: 4040.080078, avg loss: 2.555395, ppl: 12.876390 +epoch: 2, batch: 12508, sum loss: 3457.951660, avg loss: 2.299170, ppl: 9.965907 +epoch: 2, batch: 12509, sum loss: 4701.335449, avg loss: 2.549531, ppl: 12.801102 +epoch: 2, batch: 12510, sum loss: 5054.841797, avg loss: 2.778913, ppl: 16.101501 +epoch: 2, batch: 12511, sum loss: 3946.056641, avg loss: 2.357262, ppl: 10.561995 +epoch: 2, batch: 12512, sum loss: 3851.986816, avg loss: 2.509438, ppl: 12.298011 +epoch: 2, batch: 12513, sum loss: 3571.709717, avg loss: 2.392304, ppl: 10.938667 +epoch: 2, batch: 12514, sum loss: 3616.081543, avg loss: 2.441649, ppl: 11.491972 +epoch: 2, batch: 12515, sum loss: 4524.145020, avg loss: 2.461450, ppl: 11.721794 +epoch: 2, batch: 12516, sum loss: 4046.213623, avg loss: 2.551207, ppl: 12.822566 +epoch: 2, batch: 12517, sum loss: 4439.720703, avg loss: 2.771361, ppl: 15.980371 +epoch: 2, batch: 12518, sum loss: 3219.491455, avg loss: 2.311193, ppl: 10.086448 +epoch: 2, batch: 12519, sum loss: 3745.061035, avg loss: 2.457389, ppl: 11.674292 +epoch: 2, batch: 12520, sum loss: 4348.157227, avg loss: 2.633651, ppl: 13.924513 +epoch: 2, batch: 12521, sum loss: 3850.499756, avg loss: 2.550000, ppl: 12.807103 +epoch: 2, batch: 12522, sum loss: 4268.381836, avg loss: 2.583766, ppl: 13.246939 +epoch: 2, batch: 12523, sum loss: 4294.032715, avg loss: 2.724640, ppl: 15.250921 +epoch: 2, batch: 12524, sum loss: 3365.150391, avg loss: 2.300171, ppl: 9.975892 +epoch: 2, batch: 12525, sum loss: 3668.644531, avg loss: 2.326344, ppl: 10.240434 +epoch: 2, batch: 12526, sum loss: 3469.732422, avg loss: 2.237094, ppl: 9.366071 +epoch: 2, batch: 12527, sum loss: 3209.128662, avg loss: 2.100215, ppl: 8.167928 +epoch: 2, batch: 12528, sum loss: 4847.363770, avg loss: 2.589404, ppl: 13.321828 +epoch: 2, batch: 12529, sum loss: 3916.221436, avg loss: 2.586672, ppl: 13.285484 +epoch: 2, batch: 12530, sum loss: 4431.058105, avg loss: 2.556871, ppl: 12.895407 +epoch: 2, batch: 12531, sum loss: 3890.361328, avg loss: 2.559448, ppl: 12.928681 +epoch: 2, batch: 12532, sum loss: 5077.428223, avg loss: 2.747526, ppl: 15.603983 +epoch: 2, batch: 12533, sum loss: 4273.006348, avg loss: 2.608673, ppl: 13.581015 +epoch: 2, batch: 12534, sum loss: 5305.642578, avg loss: 2.779279, ppl: 16.107395 +epoch: 2, batch: 12535, sum loss: 4380.249512, avg loss: 2.384458, ppl: 10.853182 +epoch: 2, batch: 12536, sum loss: 3563.407227, avg loss: 2.624011, ppl: 13.790932 +epoch: 2, batch: 12537, sum loss: 3870.683594, avg loss: 2.435924, ppl: 11.426375 +epoch: 2, batch: 12538, sum loss: 5815.921875, avg loss: 2.887747, ppl: 17.952812 +epoch: 2, batch: 12539, sum loss: 4575.099609, avg loss: 2.734668, ppl: 15.404629 +epoch: 2, batch: 12540, sum loss: 4004.035645, avg loss: 2.391897, ppl: 10.934218 +epoch: 2, batch: 12541, sum loss: 4680.943359, avg loss: 2.658117, ppl: 14.269388 +epoch: 2, batch: 12542, sum loss: 3785.872803, avg loss: 2.589516, ppl: 13.323327 +epoch: 2, batch: 12543, sum loss: 4136.319336, avg loss: 2.506860, ppl: 12.266356 +epoch: 2, batch: 12544, sum loss: 3628.485352, avg loss: 2.498957, ppl: 12.169793 +epoch: 2, batch: 12545, sum loss: 4756.485352, avg loss: 2.612018, ppl: 13.626526 +epoch: 2, batch: 12546, sum loss: 3810.489258, avg loss: 2.355061, ppl: 10.538774 +epoch: 2, batch: 12547, sum loss: 3937.244629, avg loss: 2.400759, ppl: 11.031546 +epoch: 2, batch: 12548, sum loss: 3587.060059, avg loss: 2.327748, ppl: 10.254825 +epoch: 2, batch: 12549, sum loss: 3490.595215, avg loss: 2.533088, ppl: 12.592331 +epoch: 2, batch: 12550, sum loss: 3815.941895, avg loss: 2.404500, ppl: 11.072898 +epoch: 2, batch: 12551, sum loss: 3861.536865, avg loss: 2.639465, ppl: 14.005707 +epoch: 2, batch: 12552, sum loss: 4366.262695, avg loss: 2.387241, ppl: 10.883421 +epoch: 2, batch: 12553, sum loss: 3766.196289, avg loss: 2.395799, ppl: 10.976967 +epoch: 2, batch: 12554, sum loss: 4175.515625, avg loss: 2.558527, ppl: 12.916774 +epoch: 2, batch: 12555, sum loss: 5817.962891, avg loss: 2.813328, ppl: 16.665293 +epoch: 2, batch: 12556, sum loss: 4354.276367, avg loss: 2.607351, ppl: 13.563075 +epoch: 2, batch: 12557, sum loss: 3995.750000, avg loss: 2.472618, ppl: 11.853435 +epoch: 2, batch: 12558, sum loss: 3542.670166, avg loss: 2.352371, ppl: 10.510458 +epoch: 2, batch: 12559, sum loss: 3887.517090, avg loss: 2.540861, ppl: 12.690592 +epoch: 2, batch: 12560, sum loss: 4490.309570, avg loss: 2.574719, ppl: 13.127624 +epoch: 2, batch: 12561, sum loss: 4167.436035, avg loss: 2.621029, ppl: 13.749864 +epoch: 2, batch: 12562, sum loss: 3716.842041, avg loss: 2.371948, ppl: 10.718248 +epoch: 2, batch: 12563, sum loss: 5299.860840, avg loss: 2.672648, ppl: 14.478256 +epoch: 2, batch: 12564, sum loss: 4634.994629, avg loss: 2.598091, ppl: 13.438066 +epoch: 2, batch: 12565, sum loss: 4115.726562, avg loss: 2.342474, ppl: 10.406949 +epoch: 2, batch: 12566, sum loss: 4707.113770, avg loss: 2.513141, ppl: 12.343645 +epoch: 2, batch: 12567, sum loss: 3684.768066, avg loss: 2.246810, ppl: 9.457516 +epoch: 2, batch: 12568, sum loss: 3053.562256, avg loss: 2.070212, ppl: 7.926501 +epoch: 2, batch: 12569, sum loss: 4273.817871, avg loss: 2.669468, ppl: 14.432288 +epoch: 2, batch: 12570, sum loss: 3477.991455, avg loss: 2.349994, ppl: 10.485509 +epoch: 2, batch: 12571, sum loss: 4205.198730, avg loss: 2.495667, ppl: 12.129821 +epoch: 2, batch: 12572, sum loss: 4021.201172, avg loss: 2.352956, ppl: 10.516606 +epoch: 2, batch: 12573, sum loss: 2907.962646, avg loss: 2.107219, ppl: 8.225339 +epoch: 2, batch: 12574, sum loss: 3808.095947, avg loss: 2.492209, ppl: 12.087955 +epoch: 2, batch: 12575, sum loss: 5022.390625, avg loss: 2.692971, ppl: 14.775505 +epoch: 2, batch: 12576, sum loss: 3634.356934, avg loss: 2.503001, ppl: 12.219106 +epoch: 2, batch: 12577, sum loss: 4053.929688, avg loss: 2.585414, ppl: 13.268784 +epoch: 2, batch: 12578, sum loss: 4124.372070, avg loss: 2.449152, ppl: 11.578526 +epoch: 2, batch: 12579, sum loss: 5235.088867, avg loss: 2.966056, ppl: 19.415197 +epoch: 2, batch: 12580, sum loss: 3792.496582, avg loss: 2.513252, ppl: 12.345005 +epoch: 2, batch: 12581, sum loss: 4459.487305, avg loss: 2.573276, ppl: 13.108696 +epoch: 2, batch: 12582, sum loss: 4398.541992, avg loss: 2.442277, ppl: 11.499200 +epoch: 2, batch: 12583, sum loss: 4731.940918, avg loss: 2.611446, ppl: 13.618734 +epoch: 2, batch: 12584, sum loss: 4196.744629, avg loss: 2.339323, ppl: 10.374207 +epoch: 2, batch: 12585, sum loss: 4488.067383, avg loss: 2.539936, ppl: 12.678864 +epoch: 2, batch: 12586, sum loss: 4626.585449, avg loss: 2.862986, ppl: 17.513746 +epoch: 2, batch: 12587, sum loss: 3422.625977, avg loss: 2.283273, ppl: 9.808732 +epoch: 2, batch: 12588, sum loss: 4441.759766, avg loss: 2.757145, ppl: 15.754794 +epoch: 2, batch: 12589, sum loss: 3450.177734, avg loss: 2.459143, ppl: 11.694787 +epoch: 2, batch: 12590, sum loss: 4393.492188, avg loss: 2.520649, ppl: 12.436671 +epoch: 2, batch: 12591, sum loss: 4369.073730, avg loss: 2.565516, ppl: 13.007368 +epoch: 2, batch: 12592, sum loss: 3851.624756, avg loss: 2.502680, ppl: 12.215188 +epoch: 2, batch: 12593, sum loss: 5302.046387, avg loss: 2.724587, ppl: 15.250117 +epoch: 2, batch: 12594, sum loss: 3755.144043, avg loss: 2.478643, ppl: 11.925071 +epoch: 2, batch: 12595, sum loss: 4556.548828, avg loss: 2.541299, ppl: 12.696151 +epoch: 2, batch: 12596, sum loss: 3367.453369, avg loss: 2.419147, ppl: 11.236277 +epoch: 2, batch: 12597, sum loss: 4600.066895, avg loss: 2.547102, ppl: 12.770048 +epoch: 2, batch: 12598, sum loss: 4446.392090, avg loss: 2.696417, ppl: 14.826518 +epoch: 2, batch: 12599, sum loss: 5130.749512, avg loss: 2.770383, ppl: 15.964749 +epoch: 2, batch: 12600, sum loss: 4153.735352, avg loss: 2.494736, ppl: 12.118533 +epoch: 2, batch: 12601, sum loss: 4565.700195, avg loss: 2.657567, ppl: 14.261549 +epoch: 2, batch: 12602, sum loss: 4567.690918, avg loss: 2.483791, ppl: 11.986615 +epoch: 2, batch: 12603, sum loss: 3924.223877, avg loss: 2.505890, ppl: 12.254462 +epoch: 2, batch: 12604, sum loss: 3970.205078, avg loss: 2.455291, ppl: 11.649820 +epoch: 2, batch: 12605, sum loss: 5265.254883, avg loss: 2.809634, ppl: 16.603848 +epoch: 2, batch: 12606, sum loss: 4687.541504, avg loss: 2.529704, ppl: 12.549789 +epoch: 2, batch: 12607, sum loss: 4847.788086, avg loss: 2.620426, ppl: 13.741575 +epoch: 2, batch: 12608, sum loss: 3444.455566, avg loss: 2.308616, ppl: 10.060495 +epoch: 2, batch: 12609, sum loss: 4678.955078, avg loss: 2.701475, ppl: 14.901698 +epoch: 2, batch: 12610, sum loss: 4242.012207, avg loss: 2.540127, ppl: 12.681282 +epoch: 2, batch: 12611, sum loss: 4753.387695, avg loss: 2.646652, ppl: 14.106736 +epoch: 2, batch: 12612, sum loss: 4321.624023, avg loss: 2.382373, ppl: 10.830572 +epoch: 2, batch: 12613, sum loss: 3400.998047, avg loss: 2.437992, ppl: 11.450022 +epoch: 2, batch: 12614, sum loss: 3285.583740, avg loss: 2.072923, ppl: 7.948025 +epoch: 2, batch: 12615, sum loss: 4315.356934, avg loss: 2.616954, ppl: 13.693946 +epoch: 2, batch: 12616, sum loss: 4386.674805, avg loss: 2.628325, ppl: 13.850554 +epoch: 2, batch: 12617, sum loss: 4471.949219, avg loss: 2.509511, ppl: 12.298921 +epoch: 2, batch: 12618, sum loss: 3886.465088, avg loss: 2.372689, ppl: 10.726199 +epoch: 2, batch: 12619, sum loss: 4120.095703, avg loss: 2.477508, ppl: 11.911545 +epoch: 2, batch: 12620, sum loss: 3601.988525, avg loss: 2.462057, ppl: 11.728909 +epoch: 2, batch: 12621, sum loss: 4388.754883, avg loss: 2.720865, ppl: 15.193456 +epoch: 2, batch: 12622, sum loss: 4463.667969, avg loss: 2.390824, ppl: 10.922488 +epoch: 2, batch: 12623, sum loss: 4011.427734, avg loss: 2.297496, ppl: 9.949237 +epoch: 2, batch: 12624, sum loss: 4475.997070, avg loss: 2.546073, ppl: 12.756914 +epoch: 2, batch: 12625, sum loss: 4746.593262, avg loss: 2.424205, ppl: 11.293248 +epoch: 2, batch: 12626, sum loss: 3624.435059, avg loss: 2.430875, ppl: 11.368829 +epoch: 2, batch: 12627, sum loss: 4410.276367, avg loss: 2.546349, ppl: 12.760427 +epoch: 2, batch: 12628, sum loss: 4783.088379, avg loss: 2.629515, ppl: 13.867048 +epoch: 2, batch: 12629, sum loss: 3934.796875, avg loss: 2.439428, ppl: 11.466475 +epoch: 2, batch: 12630, sum loss: 5616.386230, avg loss: 2.887602, ppl: 17.950220 +epoch: 2, batch: 12631, sum loss: 4168.744629, avg loss: 2.502248, ppl: 12.209906 +epoch: 2, batch: 12632, sum loss: 4000.517822, avg loss: 2.543241, ppl: 12.720830 +epoch: 2, batch: 12633, sum loss: 4194.225586, avg loss: 2.656254, ppl: 14.242840 +epoch: 2, batch: 12634, sum loss: 3987.387207, avg loss: 2.459832, ppl: 11.702842 +epoch: 2, batch: 12635, sum loss: 4114.177246, avg loss: 2.404546, ppl: 11.073397 +epoch: 2, batch: 12636, sum loss: 3578.976074, avg loss: 2.464859, ppl: 11.761829 +epoch: 2, batch: 12637, sum loss: 4610.652832, avg loss: 2.669747, ppl: 14.436315 +epoch: 2, batch: 12638, sum loss: 3796.066162, avg loss: 2.466580, ppl: 11.782080 +epoch: 2, batch: 12639, sum loss: 3616.534912, avg loss: 2.309409, ppl: 10.068474 +epoch: 2, batch: 12640, sum loss: 3881.160156, avg loss: 2.372347, ppl: 10.722532 +epoch: 2, batch: 12641, sum loss: 3654.210693, avg loss: 2.336452, ppl: 10.344467 +epoch: 2, batch: 12642, sum loss: 4560.994141, avg loss: 2.540944, ppl: 12.691645 +epoch: 2, batch: 12643, sum loss: 4632.080078, avg loss: 2.715170, ppl: 15.107177 +epoch: 2, batch: 12644, sum loss: 5032.375000, avg loss: 2.587339, ppl: 13.294353 +epoch: 2, batch: 12645, sum loss: 3155.962402, avg loss: 2.273748, ppl: 9.715749 +epoch: 2, batch: 12646, sum loss: 4247.794922, avg loss: 2.620478, ppl: 13.742290 +epoch: 2, batch: 12647, sum loss: 4550.432617, avg loss: 2.624240, ppl: 13.794089 +epoch: 2, batch: 12648, sum loss: 4900.660156, avg loss: 2.650438, ppl: 14.160240 +epoch: 2, batch: 12649, sum loss: 5239.385254, avg loss: 2.786907, ppl: 16.230740 +epoch: 2, batch: 12650, sum loss: 4494.774902, avg loss: 2.565511, ppl: 13.007303 +epoch: 2, batch: 12651, sum loss: 3538.275879, avg loss: 2.374682, ppl: 10.747595 +epoch: 2, batch: 12652, sum loss: 2720.348389, avg loss: 1.991470, ppl: 7.326297 +epoch: 2, batch: 12653, sum loss: 4269.871582, avg loss: 2.723132, ppl: 15.227947 +epoch: 2, batch: 12654, sum loss: 4181.330078, avg loss: 2.287380, ppl: 9.849096 +epoch: 2, batch: 12655, sum loss: 4489.582520, avg loss: 2.752656, ppl: 15.684241 +epoch: 2, batch: 12656, sum loss: 3711.007812, avg loss: 2.460881, ppl: 11.715122 +epoch: 2, batch: 12657, sum loss: 3395.378418, avg loss: 2.257565, ppl: 9.559785 +epoch: 2, batch: 12658, sum loss: 4875.888672, avg loss: 2.673185, ppl: 14.486029 +epoch: 2, batch: 12659, sum loss: 4228.923340, avg loss: 2.749625, ppl: 15.636766 +epoch: 2, batch: 12660, sum loss: 4447.923828, avg loss: 2.642854, ppl: 14.053257 +epoch: 2, batch: 12661, sum loss: 3570.177246, avg loss: 2.276899, ppl: 9.746407 +epoch: 2, batch: 12662, sum loss: 3482.121094, avg loss: 2.492570, ppl: 12.092319 +epoch: 2, batch: 12663, sum loss: 4413.638184, avg loss: 2.642897, ppl: 14.053861 +epoch: 2, batch: 12664, sum loss: 4214.163086, avg loss: 2.326982, ppl: 10.246964 +epoch: 2, batch: 12665, sum loss: 3711.436035, avg loss: 2.411589, ppl: 11.151671 +epoch: 2, batch: 12666, sum loss: 4562.587891, avg loss: 2.485070, ppl: 12.001954 +epoch: 2, batch: 12667, sum loss: 4197.437988, avg loss: 2.485162, ppl: 12.003062 +epoch: 2, batch: 12668, sum loss: 4635.032227, avg loss: 2.773808, ppl: 16.019512 +epoch: 2, batch: 12669, sum loss: 4535.964355, avg loss: 2.535475, ppl: 12.622422 +epoch: 2, batch: 12670, sum loss: 4606.151367, avg loss: 2.574707, ppl: 13.127474 +epoch: 2, batch: 12671, sum loss: 3951.447266, avg loss: 2.579274, ppl: 13.187556 +epoch: 2, batch: 12672, sum loss: 4344.422852, avg loss: 2.582891, ppl: 13.235346 +epoch: 2, batch: 12673, sum loss: 4494.846680, avg loss: 2.693138, ppl: 14.777971 +epoch: 2, batch: 12674, sum loss: 4884.492676, avg loss: 2.704592, ppl: 14.948213 +epoch: 2, batch: 12675, sum loss: 4203.963867, avg loss: 2.598247, ppl: 13.440158 +epoch: 2, batch: 12676, sum loss: 4500.353516, avg loss: 2.424759, ppl: 11.299510 +epoch: 2, batch: 12677, sum loss: 3842.114502, avg loss: 2.417945, ppl: 11.222772 +epoch: 2, batch: 12678, sum loss: 4325.662109, avg loss: 2.604252, ppl: 13.521103 +epoch: 2, batch: 12679, sum loss: 4236.384277, avg loss: 2.352240, ppl: 10.509082 +epoch: 2, batch: 12680, sum loss: 5085.350586, avg loss: 2.699231, ppl: 14.868292 +epoch: 2, batch: 12681, sum loss: 4109.456055, avg loss: 2.474086, ppl: 11.870847 +epoch: 2, batch: 12682, sum loss: 4748.631836, avg loss: 2.722839, ppl: 15.223486 +epoch: 2, batch: 12683, sum loss: 3769.166016, avg loss: 2.442752, ppl: 11.504657 +epoch: 2, batch: 12684, sum loss: 3937.993652, avg loss: 2.452051, ppl: 11.612135 +epoch: 2, batch: 12685, sum loss: 4296.388184, avg loss: 2.657012, ppl: 14.253635 +epoch: 2, batch: 12686, sum loss: 5166.031738, avg loss: 2.662903, ppl: 14.337852 +epoch: 2, batch: 12687, sum loss: 3740.034668, avg loss: 2.333147, ppl: 10.310338 +epoch: 2, batch: 12688, sum loss: 4300.958496, avg loss: 2.580059, ppl: 13.197918 +epoch: 2, batch: 12689, sum loss: 5098.858887, avg loss: 2.774134, ppl: 16.024746 +epoch: 2, batch: 12690, sum loss: 4360.674316, avg loss: 2.448442, ppl: 11.570300 +epoch: 2, batch: 12691, sum loss: 3942.087646, avg loss: 2.340907, ppl: 10.390660 +epoch: 2, batch: 12692, sum loss: 4265.883301, avg loss: 2.355540, ppl: 10.543824 +epoch: 2, batch: 12693, sum loss: 4489.468750, avg loss: 2.701245, ppl: 14.898266 +epoch: 2, batch: 12694, sum loss: 3671.926270, avg loss: 2.329902, ppl: 10.276938 +epoch: 2, batch: 12695, sum loss: 3911.926514, avg loss: 2.279677, ppl: 9.773527 +epoch: 2, batch: 12696, sum loss: 4193.650391, avg loss: 2.395003, ppl: 10.968232 +epoch: 2, batch: 12697, sum loss: 4424.301758, avg loss: 2.599472, ppl: 13.456635 +epoch: 2, batch: 12698, sum loss: 4024.842285, avg loss: 2.411529, ppl: 11.151001 +epoch: 2, batch: 12699, sum loss: 3883.917969, avg loss: 2.302263, ppl: 9.996782 +epoch: 2, batch: 12700, sum loss: 4678.840332, avg loss: 2.653908, ppl: 14.209464 +epoch: 2, batch: 12701, sum loss: 4839.489746, avg loss: 2.723404, ppl: 15.232090 +epoch: 2, batch: 12702, sum loss: 4254.247070, avg loss: 2.415813, ppl: 11.198874 +epoch: 2, batch: 12703, sum loss: 3928.885498, avg loss: 2.328919, ppl: 10.266834 +epoch: 2, batch: 12704, sum loss: 4800.096680, avg loss: 2.566897, ppl: 13.025340 +epoch: 2, batch: 12705, sum loss: 4359.012207, avg loss: 2.746700, ppl: 15.591093 +epoch: 2, batch: 12706, sum loss: 3842.274902, avg loss: 2.287068, ppl: 9.846030 +epoch: 2, batch: 12707, sum loss: 4033.941895, avg loss: 2.284225, ppl: 9.818076 +epoch: 2, batch: 12708, sum loss: 4960.204590, avg loss: 2.552859, ppl: 12.843769 +epoch: 2, batch: 12709, sum loss: 5074.054688, avg loss: 2.704720, ppl: 14.950130 +epoch: 2, batch: 12710, sum loss: 3965.885010, avg loss: 2.331502, ppl: 10.293390 +epoch: 2, batch: 12711, sum loss: 4749.776367, avg loss: 2.732898, ppl: 15.377382 +epoch: 2, batch: 12712, sum loss: 4117.913574, avg loss: 2.619538, ppl: 13.729377 +epoch: 2, batch: 12713, sum loss: 3595.496094, avg loss: 2.146565, ppl: 8.555420 +epoch: 2, batch: 12714, sum loss: 4194.877441, avg loss: 2.628369, ppl: 13.851165 +epoch: 2, batch: 12715, sum loss: 4682.084473, avg loss: 2.551545, ppl: 12.826905 +epoch: 2, batch: 12716, sum loss: 4225.765137, avg loss: 2.497497, ppl: 12.152040 +epoch: 2, batch: 12717, sum loss: 4579.058105, avg loss: 2.514584, ppl: 12.361469 +epoch: 2, batch: 12718, sum loss: 4530.467773, avg loss: 2.519726, ppl: 12.425192 +epoch: 2, batch: 12719, sum loss: 4001.077637, avg loss: 2.465236, ppl: 11.766255 +epoch: 2, batch: 12720, sum loss: 4324.404297, avg loss: 2.462645, ppl: 11.735809 +epoch: 2, batch: 12721, sum loss: 3549.537109, avg loss: 2.239455, ppl: 9.388218 +epoch: 2, batch: 12722, sum loss: 4179.636230, avg loss: 2.465862, ppl: 11.773627 +epoch: 2, batch: 12723, sum loss: 4008.897705, avg loss: 2.646137, ppl: 14.099470 +epoch: 2, batch: 12724, sum loss: 4055.320312, avg loss: 2.518832, ppl: 12.414094 +epoch: 2, batch: 12725, sum loss: 4365.514160, avg loss: 2.752531, ppl: 15.682274 +epoch: 2, batch: 12726, sum loss: 4293.128906, avg loss: 2.467315, ppl: 11.790751 +epoch: 2, batch: 12727, sum loss: 4356.931641, avg loss: 2.478346, ppl: 11.921526 +epoch: 2, batch: 12728, sum loss: 4726.246094, avg loss: 2.781781, ppl: 16.147758 +epoch: 2, batch: 12729, sum loss: 3935.793457, avg loss: 2.582542, ppl: 13.230724 +epoch: 2, batch: 12730, sum loss: 4186.821289, avg loss: 2.327305, ppl: 10.250278 +epoch: 2, batch: 12731, sum loss: 5323.805664, avg loss: 2.527923, ppl: 12.527458 +epoch: 2, batch: 12732, sum loss: 4913.957031, avg loss: 2.790436, ppl: 16.288113 +epoch: 2, batch: 12733, sum loss: 3883.413086, avg loss: 2.392738, ppl: 10.943412 +epoch: 2, batch: 12734, sum loss: 3928.254150, avg loss: 2.494129, ppl: 12.111185 +epoch: 2, batch: 12735, sum loss: 4143.109375, avg loss: 2.448646, ppl: 11.572670 +epoch: 2, batch: 12736, sum loss: 4475.432617, avg loss: 2.663948, ppl: 14.352843 +epoch: 2, batch: 12737, sum loss: 3731.183594, avg loss: 2.464454, ppl: 11.757063 +epoch: 2, batch: 12738, sum loss: 3172.520752, avg loss: 2.069485, ppl: 7.920744 +epoch: 2, batch: 12739, sum loss: 3712.229980, avg loss: 2.432654, ppl: 11.389071 +epoch: 2, batch: 12740, sum loss: 4953.882812, avg loss: 2.769079, ppl: 15.943946 +epoch: 2, batch: 12741, sum loss: 3863.730713, avg loss: 2.324748, ppl: 10.224101 +epoch: 2, batch: 12742, sum loss: 3821.101807, avg loss: 2.242431, ppl: 9.416191 +epoch: 2, batch: 12743, sum loss: 3810.625244, avg loss: 2.414845, ppl: 11.188036 +epoch: 2, batch: 12744, sum loss: 4576.088867, avg loss: 2.473562, ppl: 11.864628 +epoch: 2, batch: 12745, sum loss: 4112.477539, avg loss: 2.436302, ppl: 11.430691 +epoch: 2, batch: 12746, sum loss: 3367.382324, avg loss: 2.221229, ppl: 9.218650 +epoch: 2, batch: 12747, sum loss: 3632.351318, avg loss: 2.325449, ppl: 10.231273 +epoch: 2, batch: 12748, sum loss: 4167.004395, avg loss: 2.674586, ppl: 14.506337 +epoch: 2, batch: 12749, sum loss: 3366.536865, avg loss: 2.222136, ppl: 9.227023 +epoch: 2, batch: 12750, sum loss: 3854.504639, avg loss: 2.525888, ppl: 12.501988 +epoch: 2, batch: 12751, sum loss: 4670.612793, avg loss: 2.531497, ppl: 12.572319 +epoch: 2, batch: 12752, sum loss: 4097.564453, avg loss: 2.474375, ppl: 11.874278 +epoch: 2, batch: 12753, sum loss: 4657.317383, avg loss: 2.676619, ppl: 14.535869 +epoch: 2, batch: 12754, sum loss: 4281.662109, avg loss: 2.453675, ppl: 11.631007 +epoch: 2, batch: 12755, sum loss: 3510.108887, avg loss: 2.376513, ppl: 10.767286 +epoch: 2, batch: 12756, sum loss: 4902.592773, avg loss: 2.650050, ppl: 14.154749 +epoch: 2, batch: 12757, sum loss: 4896.008789, avg loss: 2.781823, ppl: 16.148436 +epoch: 2, batch: 12758, sum loss: 5185.865723, avg loss: 2.742393, ppl: 15.524094 +epoch: 2, batch: 12759, sum loss: 4223.733398, avg loss: 2.477263, ppl: 11.908626 +epoch: 2, batch: 12760, sum loss: 4242.848145, avg loss: 2.554394, ppl: 12.863499 +epoch: 2, batch: 12761, sum loss: 3925.971680, avg loss: 2.820382, ppl: 16.783258 +epoch: 2, batch: 12762, sum loss: 3152.249023, avg loss: 2.347170, ppl: 10.455936 +epoch: 2, batch: 12763, sum loss: 4822.709473, avg loss: 2.631047, ppl: 13.888307 +epoch: 2, batch: 12764, sum loss: 4939.917480, avg loss: 2.472431, ppl: 11.851224 +epoch: 2, batch: 12765, sum loss: 3780.464600, avg loss: 2.474126, ppl: 11.871328 +epoch: 2, batch: 12766, sum loss: 4096.965332, avg loss: 2.448874, ppl: 11.575300 +epoch: 2, batch: 12767, sum loss: 3950.383057, avg loss: 2.308815, ppl: 10.062498 +epoch: 2, batch: 12768, sum loss: 5367.925293, avg loss: 2.849217, ppl: 17.274258 +epoch: 2, batch: 12769, sum loss: 4609.768555, avg loss: 2.616214, ppl: 13.683816 +epoch: 2, batch: 12770, sum loss: 4593.341309, avg loss: 2.579080, ppl: 13.185004 +epoch: 2, batch: 12771, sum loss: 4132.300293, avg loss: 2.309838, ppl: 10.072793 +epoch: 2, batch: 12772, sum loss: 4458.925293, avg loss: 2.730511, ppl: 15.340731 +epoch: 2, batch: 12773, sum loss: 4762.305176, avg loss: 2.710475, ppl: 15.036419 +epoch: 2, batch: 12774, sum loss: 3941.144043, avg loss: 2.505495, ppl: 12.249622 +epoch: 2, batch: 12775, sum loss: 4211.641113, avg loss: 2.575927, ppl: 13.143499 +epoch: 2, batch: 12776, sum loss: 4187.234375, avg loss: 2.479120, ppl: 11.930767 +epoch: 2, batch: 12777, sum loss: 3920.627930, avg loss: 2.436686, ppl: 11.435082 +epoch: 2, batch: 12778, sum loss: 4917.968262, avg loss: 2.780084, ppl: 16.120373 +epoch: 2, batch: 12779, sum loss: 4194.170898, avg loss: 2.220313, ppl: 9.210212 +epoch: 2, batch: 12780, sum loss: 4876.674805, avg loss: 2.617646, ppl: 13.703427 +epoch: 2, batch: 12781, sum loss: 4368.869141, avg loss: 2.594341, ppl: 13.387756 +epoch: 2, batch: 12782, sum loss: 4267.825195, avg loss: 2.650823, ppl: 14.165694 +epoch: 2, batch: 12783, sum loss: 5524.114258, avg loss: 2.805543, ppl: 16.536057 +epoch: 2, batch: 12784, sum loss: 3482.461426, avg loss: 2.304739, ppl: 10.021567 +epoch: 2, batch: 12785, sum loss: 2885.528564, avg loss: 2.228207, ppl: 9.283210 +epoch: 2, batch: 12786, sum loss: 4545.187012, avg loss: 2.408684, ppl: 11.119319 +epoch: 2, batch: 12787, sum loss: 5933.961914, avg loss: 3.105160, ppl: 22.312799 +epoch: 2, batch: 12788, sum loss: 5224.986816, avg loss: 2.734164, ppl: 15.396863 +epoch: 2, batch: 12789, sum loss: 3872.922119, avg loss: 2.463691, ppl: 11.748094 +epoch: 2, batch: 12790, sum loss: 4399.052734, avg loss: 2.469990, ppl: 11.822334 +epoch: 2, batch: 12791, sum loss: 4033.029053, avg loss: 2.514357, ppl: 12.358664 +epoch: 2, batch: 12792, sum loss: 4405.488281, avg loss: 2.599108, ppl: 13.451736 +epoch: 2, batch: 12793, sum loss: 4756.596680, avg loss: 2.610646, ppl: 13.607836 +epoch: 2, batch: 12794, sum loss: 4591.495117, avg loss: 2.602888, ppl: 13.502682 +epoch: 2, batch: 12795, sum loss: 4679.774414, avg loss: 2.518716, ppl: 12.412650 +epoch: 2, batch: 12796, sum loss: 3965.309082, avg loss: 2.470598, ppl: 11.829513 +epoch: 2, batch: 12797, sum loss: 4246.685059, avg loss: 2.530802, ppl: 12.563572 +epoch: 2, batch: 12798, sum loss: 4995.740234, avg loss: 2.622436, ppl: 13.769222 +epoch: 2, batch: 12799, sum loss: 4609.986328, avg loss: 2.788860, ppl: 16.262476 +epoch: 2, batch: 12800, sum loss: 3840.631348, avg loss: 2.313633, ppl: 10.111096 +epoch: 2, batch: 12801, sum loss: 3755.570312, avg loss: 2.384489, ppl: 10.853516 +epoch: 2, batch: 12802, sum loss: 4536.562500, avg loss: 2.732869, ppl: 15.376943 +epoch: 2, batch: 12803, sum loss: 4104.290039, avg loss: 2.313579, ppl: 10.110549 +epoch: 2, batch: 12804, sum loss: 3959.401367, avg loss: 2.625598, ppl: 13.812828 +epoch: 2, batch: 12805, sum loss: 4287.162598, avg loss: 2.505647, ppl: 12.251489 +epoch: 2, batch: 12806, sum loss: 4673.413086, avg loss: 2.681247, ppl: 14.603289 +epoch: 2, batch: 12807, sum loss: 4844.083496, avg loss: 2.416002, ppl: 11.200983 +epoch: 2, batch: 12808, sum loss: 3830.227051, avg loss: 2.325578, ppl: 10.232593 +epoch: 2, batch: 12809, sum loss: 4080.321777, avg loss: 2.542257, ppl: 12.708316 +epoch: 2, batch: 12810, sum loss: 3523.319336, avg loss: 2.249885, ppl: 9.486641 +epoch: 2, batch: 12811, sum loss: 4376.341797, avg loss: 2.721606, ppl: 15.204715 +epoch: 2, batch: 12812, sum loss: 3994.844727, avg loss: 2.562441, ppl: 12.967427 +epoch: 2, batch: 12813, sum loss: 4511.069824, avg loss: 2.514532, ppl: 12.360821 +epoch: 2, batch: 12814, sum loss: 4019.052246, avg loss: 2.551779, ppl: 12.829908 +epoch: 2, batch: 12815, sum loss: 3672.001953, avg loss: 2.476063, ppl: 11.894350 +epoch: 2, batch: 12816, sum loss: 3834.092529, avg loss: 2.185914, ppl: 8.898774 +epoch: 2, batch: 12817, sum loss: 4216.011230, avg loss: 2.355314, ppl: 10.541433 +epoch: 2, batch: 12818, sum loss: 4431.298828, avg loss: 2.524956, ppl: 12.490352 +epoch: 2, batch: 12819, sum loss: 4964.702637, avg loss: 2.656342, ppl: 14.244082 +epoch: 2, batch: 12820, sum loss: 3751.259033, avg loss: 2.541503, ppl: 12.698749 +epoch: 2, batch: 12821, sum loss: 4949.646484, avg loss: 2.725576, ppl: 15.265206 +epoch: 2, batch: 12822, sum loss: 4078.237793, avg loss: 2.511230, ppl: 12.320077 +epoch: 2, batch: 12823, sum loss: 4537.230957, avg loss: 2.681579, ppl: 14.608136 +epoch: 2, batch: 12824, sum loss: 5006.718750, avg loss: 2.697586, ppl: 14.843849 +epoch: 2, batch: 12825, sum loss: 4429.074707, avg loss: 2.488244, ppl: 12.040118 +epoch: 2, batch: 12826, sum loss: 4285.545410, avg loss: 2.497404, ppl: 12.150910 +epoch: 2, batch: 12827, sum loss: 4608.250977, avg loss: 2.530616, ppl: 12.561236 +epoch: 2, batch: 12828, sum loss: 3610.147949, avg loss: 2.401961, ppl: 11.044818 +epoch: 2, batch: 12829, sum loss: 5836.511230, avg loss: 2.974776, ppl: 19.585241 +epoch: 2, batch: 12830, sum loss: 5158.112305, avg loss: 2.824815, ppl: 16.857830 +epoch: 2, batch: 12831, sum loss: 4607.016113, avg loss: 2.595502, ppl: 13.403313 +epoch: 2, batch: 12832, sum loss: 4001.287598, avg loss: 2.491462, ppl: 12.078919 +epoch: 2, batch: 12833, sum loss: 3872.294678, avg loss: 2.453926, ppl: 11.633927 +epoch: 2, batch: 12834, sum loss: 4394.691406, avg loss: 2.559517, ppl: 12.929576 +epoch: 2, batch: 12835, sum loss: 4304.190918, avg loss: 2.455329, ppl: 11.650263 +epoch: 2, batch: 12836, sum loss: 3853.054688, avg loss: 2.396178, ppl: 10.981129 +epoch: 2, batch: 12837, sum loss: 4763.761230, avg loss: 2.543386, ppl: 12.722673 +epoch: 2, batch: 12838, sum loss: 4634.353027, avg loss: 2.529669, ppl: 12.549349 +epoch: 2, batch: 12839, sum loss: 3813.201660, avg loss: 2.416478, ppl: 11.206318 +epoch: 2, batch: 12840, sum loss: 4004.959961, avg loss: 2.362808, ppl: 10.620735 +epoch: 2, batch: 12841, sum loss: 4888.026367, avg loss: 2.497714, ppl: 12.154677 +epoch: 2, batch: 12842, sum loss: 3977.664062, avg loss: 2.486040, ppl: 12.013606 +epoch: 2, batch: 12843, sum loss: 3421.300049, avg loss: 2.457830, ppl: 11.679444 +epoch: 2, batch: 12844, sum loss: 3576.681641, avg loss: 2.300117, ppl: 9.975349 +epoch: 2, batch: 12845, sum loss: 3390.231445, avg loss: 2.377441, ppl: 10.777293 +epoch: 2, batch: 12846, sum loss: 4920.691895, avg loss: 2.595302, ppl: 13.400632 +epoch: 2, batch: 12847, sum loss: 4429.890137, avg loss: 2.655809, ppl: 14.236505 +epoch: 2, batch: 12848, sum loss: 4165.411133, avg loss: 2.456021, ppl: 11.658329 +epoch: 2, batch: 12849, sum loss: 3810.070068, avg loss: 2.247829, ppl: 9.467160 +epoch: 2, batch: 12850, sum loss: 4009.248535, avg loss: 2.588282, ppl: 13.306889 +epoch: 2, batch: 12851, sum loss: 4147.289062, avg loss: 2.655115, ppl: 14.226617 +epoch: 2, batch: 12852, sum loss: 4338.857422, avg loss: 2.505114, ppl: 12.244956 +epoch: 2, batch: 12853, sum loss: 3826.336182, avg loss: 2.392956, ppl: 10.945797 +epoch: 2, batch: 12854, sum loss: 4708.649902, avg loss: 2.530172, ppl: 12.555667 +epoch: 2, batch: 12855, sum loss: 3312.417725, avg loss: 2.412540, ppl: 11.162279 +epoch: 2, batch: 12856, sum loss: 3806.919434, avg loss: 2.255284, ppl: 9.538002 +epoch: 2, batch: 12857, sum loss: 4912.923828, avg loss: 2.439386, ppl: 11.466000 +epoch: 2, batch: 12858, sum loss: 4379.287109, avg loss: 2.641307, ppl: 14.031532 +epoch: 2, batch: 12859, sum loss: 5042.158691, avg loss: 2.892805, ppl: 18.043844 +epoch: 2, batch: 12860, sum loss: 4375.412598, avg loss: 2.474781, ppl: 11.879106 +epoch: 2, batch: 12861, sum loss: 5066.719727, avg loss: 2.592999, ppl: 13.369804 +epoch: 2, batch: 12862, sum loss: 3996.823242, avg loss: 2.567003, ppl: 13.026722 +epoch: 2, batch: 12863, sum loss: 4369.776367, avg loss: 2.460460, ppl: 11.710194 +epoch: 2, batch: 12864, sum loss: 4016.211914, avg loss: 2.431121, ppl: 11.371624 +epoch: 2, batch: 12865, sum loss: 4158.816895, avg loss: 2.436331, ppl: 11.431024 +epoch: 2, batch: 12866, sum loss: 4482.106445, avg loss: 2.505370, ppl: 12.248086 +epoch: 2, batch: 12867, sum loss: 3823.914551, avg loss: 2.529044, ppl: 12.541512 +epoch: 2, batch: 12868, sum loss: 3739.092529, avg loss: 2.416996, ppl: 11.212128 +epoch: 2, batch: 12869, sum loss: 3943.899414, avg loss: 2.603234, ppl: 13.507351 +epoch: 2, batch: 12870, sum loss: 4021.685059, avg loss: 2.462759, ppl: 11.737147 +epoch: 2, batch: 12871, sum loss: 4147.843750, avg loss: 2.628545, ppl: 13.853596 +epoch: 2, batch: 12872, sum loss: 4316.049316, avg loss: 2.584461, ppl: 13.256139 +epoch: 2, batch: 12873, sum loss: 4190.110840, avg loss: 2.580117, ppl: 13.198689 +epoch: 2, batch: 12874, sum loss: 3672.354248, avg loss: 2.446605, ppl: 11.549073 +epoch: 2, batch: 12875, sum loss: 4100.764648, avg loss: 2.494382, ppl: 12.114249 +epoch: 2, batch: 12876, sum loss: 4493.030273, avg loss: 2.653887, ppl: 14.209160 +epoch: 2, batch: 12877, sum loss: 4881.869629, avg loss: 2.691218, ppl: 14.749632 +epoch: 2, batch: 12878, sum loss: 5302.730469, avg loss: 2.659343, ppl: 14.286903 +epoch: 2, batch: 12879, sum loss: 3754.765625, avg loss: 2.376434, ppl: 10.766439 +epoch: 2, batch: 12880, sum loss: 5014.021484, avg loss: 2.850496, ppl: 17.296350 +epoch: 2, batch: 12881, sum loss: 3960.542969, avg loss: 2.397423, ppl: 10.994809 +epoch: 2, batch: 12882, sum loss: 4041.318604, avg loss: 2.474782, ppl: 11.879114 +epoch: 2, batch: 12883, sum loss: 3730.251465, avg loss: 2.495152, ppl: 12.123570 +epoch: 2, batch: 12884, sum loss: 4626.278320, avg loss: 2.666443, ppl: 14.388696 +epoch: 2, batch: 12885, sum loss: 4397.388184, avg loss: 2.611276, ppl: 13.616410 +epoch: 2, batch: 12886, sum loss: 4160.719727, avg loss: 2.518596, ppl: 12.411156 +epoch: 2, batch: 12887, sum loss: 4209.332520, avg loss: 2.362139, ppl: 10.613635 +epoch: 2, batch: 12888, sum loss: 4820.106934, avg loss: 2.773364, ppl: 16.012415 +epoch: 2, batch: 12889, sum loss: 3123.940186, avg loss: 1.969697, ppl: 7.168508 +epoch: 2, batch: 12890, sum loss: 4045.115723, avg loss: 2.589703, ppl: 13.325811 +epoch: 2, batch: 12891, sum loss: 4290.318848, avg loss: 2.606512, ppl: 13.551701 +epoch: 2, batch: 12892, sum loss: 4220.266602, avg loss: 2.693214, ppl: 14.779106 +epoch: 2, batch: 12893, sum loss: 4052.508301, avg loss: 2.542351, ppl: 12.709522 +epoch: 2, batch: 12894, sum loss: 3651.035156, avg loss: 2.228959, ppl: 9.290191 +epoch: 2, batch: 12895, sum loss: 4437.580078, avg loss: 2.735869, ppl: 15.423143 +epoch: 2, batch: 12896, sum loss: 4479.553223, avg loss: 2.420072, ppl: 11.246670 +epoch: 2, batch: 12897, sum loss: 4259.014648, avg loss: 2.643709, ppl: 14.065271 +epoch: 2, batch: 12898, sum loss: 4496.931641, avg loss: 2.434722, ppl: 11.412647 +epoch: 2, batch: 12899, sum loss: 3968.188965, avg loss: 2.617539, ppl: 13.701961 +epoch: 2, batch: 12900, sum loss: 4330.245605, avg loss: 2.503032, ppl: 12.219487 +epoch: 2, batch: 12901, sum loss: 4668.464355, avg loss: 2.701658, ppl: 14.904416 +epoch: 2, batch: 12902, sum loss: 4391.744141, avg loss: 2.707610, ppl: 14.993404 +epoch: 2, batch: 12903, sum loss: 3983.190918, avg loss: 2.486386, ppl: 12.017769 +epoch: 2, batch: 12904, sum loss: 4711.204590, avg loss: 2.602876, ppl: 13.502512 +epoch: 2, batch: 12905, sum loss: 4394.863770, avg loss: 2.797495, ppl: 16.403500 +epoch: 2, batch: 12906, sum loss: 3651.322754, avg loss: 2.389609, ppl: 10.909228 +epoch: 2, batch: 12907, sum loss: 4779.255371, avg loss: 2.743545, ppl: 15.541981 +epoch: 2, batch: 12908, sum loss: 4283.662109, avg loss: 2.758314, ppl: 15.773232 +epoch: 2, batch: 12909, sum loss: 4340.744629, avg loss: 2.750789, ppl: 15.654974 +epoch: 2, batch: 12910, sum loss: 3444.074463, avg loss: 2.167448, ppl: 8.735960 +epoch: 2, batch: 12911, sum loss: 4830.096680, avg loss: 2.772731, ppl: 16.002270 +epoch: 2, batch: 12912, sum loss: 4652.402344, avg loss: 2.770936, ppl: 15.973586 +epoch: 2, batch: 12913, sum loss: 4013.663574, avg loss: 2.333525, ppl: 10.314240 +epoch: 2, batch: 12914, sum loss: 4739.379395, avg loss: 2.571557, ppl: 13.086182 +epoch: 2, batch: 12915, sum loss: 5255.498047, avg loss: 2.741522, ppl: 15.510579 +epoch: 2, batch: 12916, sum loss: 5061.022949, avg loss: 2.764076, ppl: 15.864374 +epoch: 2, batch: 12917, sum loss: 3592.463379, avg loss: 2.325219, ppl: 10.228922 +epoch: 2, batch: 12918, sum loss: 4469.111816, avg loss: 2.660186, ppl: 14.298945 +epoch: 2, batch: 12919, sum loss: 4568.315430, avg loss: 2.540776, ppl: 12.689514 +epoch: 2, batch: 12920, sum loss: 4143.885742, avg loss: 2.591548, ppl: 13.350425 +epoch: 2, batch: 12921, sum loss: 4156.594238, avg loss: 2.384736, ppl: 10.856192 +epoch: 2, batch: 12922, sum loss: 5022.469727, avg loss: 2.843981, ppl: 17.184032 +epoch: 2, batch: 12923, sum loss: 4463.562988, avg loss: 2.447129, ppl: 11.555121 +epoch: 2, batch: 12924, sum loss: 4334.766602, avg loss: 2.656107, ppl: 14.240745 +epoch: 2, batch: 12925, sum loss: 4271.267578, avg loss: 2.550011, ppl: 12.807240 +epoch: 2, batch: 12926, sum loss: 4100.486816, avg loss: 2.477636, ppl: 11.913064 +epoch: 2, batch: 12927, sum loss: 5036.838379, avg loss: 2.735925, ppl: 15.424007 +epoch: 2, batch: 12928, sum loss: 4331.834473, avg loss: 2.454297, ppl: 11.638249 +epoch: 2, batch: 12929, sum loss: 3463.462158, avg loss: 2.362525, ppl: 10.617732 +epoch: 2, batch: 12930, sum loss: 3714.445068, avg loss: 2.336129, ppl: 10.341128 +epoch: 2, batch: 12931, sum loss: 3639.137939, avg loss: 2.081887, ppl: 8.019586 +epoch: 2, batch: 12932, sum loss: 4477.139648, avg loss: 2.336712, ppl: 10.347157 +epoch: 2, batch: 12933, sum loss: 4225.331055, avg loss: 2.415855, ppl: 11.199347 +epoch: 2, batch: 12934, sum loss: 4179.147461, avg loss: 2.349155, ppl: 10.476718 +epoch: 2, batch: 12935, sum loss: 3395.680664, avg loss: 2.332198, ppl: 10.300561 +epoch: 2, batch: 12936, sum loss: 5064.282227, avg loss: 2.737450, ppl: 15.447542 +epoch: 2, batch: 12937, sum loss: 3661.050049, avg loss: 2.477030, ppl: 11.905849 +epoch: 2, batch: 12938, sum loss: 4080.876465, avg loss: 2.495949, ppl: 12.133240 +epoch: 2, batch: 12939, sum loss: 4511.951660, avg loss: 2.687285, ppl: 14.691732 +epoch: 2, batch: 12940, sum loss: 4993.377930, avg loss: 2.569932, ppl: 13.064939 +epoch: 2, batch: 12941, sum loss: 3575.692871, avg loss: 2.273168, ppl: 9.710110 +epoch: 2, batch: 12942, sum loss: 4714.073242, avg loss: 2.583054, ppl: 13.237501 +epoch: 2, batch: 12943, sum loss: 3815.952148, avg loss: 2.309898, ppl: 10.073400 +epoch: 2, batch: 12944, sum loss: 4292.718262, avg loss: 2.384844, ppl: 10.857365 +epoch: 2, batch: 12945, sum loss: 4409.309082, avg loss: 2.471586, ppl: 11.841212 +epoch: 2, batch: 12946, sum loss: 4483.800293, avg loss: 2.648435, ppl: 14.131907 +epoch: 2, batch: 12947, sum loss: 3592.316650, avg loss: 2.242395, ppl: 9.415854 +epoch: 2, batch: 12948, sum loss: 4914.428223, avg loss: 2.753181, ppl: 15.692473 +epoch: 2, batch: 12949, sum loss: 5087.293945, avg loss: 2.585007, ppl: 13.263385 +epoch: 2, batch: 12950, sum loss: 5354.116211, avg loss: 2.938593, ppl: 18.889248 +epoch: 2, batch: 12951, sum loss: 4123.851562, avg loss: 2.526870, ppl: 12.514276 +epoch: 2, batch: 12952, sum loss: 5299.075195, avg loss: 2.890930, ppl: 18.010059 +epoch: 2, batch: 12953, sum loss: 3205.449951, avg loss: 2.372650, ppl: 10.725775 +epoch: 2, batch: 12954, sum loss: 4129.980957, avg loss: 2.522896, ppl: 12.464645 +epoch: 2, batch: 12955, sum loss: 4784.582520, avg loss: 2.639042, ppl: 13.999784 +epoch: 2, batch: 12956, sum loss: 4120.533691, avg loss: 2.707315, ppl: 14.988983 +epoch: 2, batch: 12957, sum loss: 4648.092773, avg loss: 2.476341, ppl: 11.897657 +epoch: 2, batch: 12958, sum loss: 4275.444824, avg loss: 2.477083, ppl: 11.906479 +epoch: 2, batch: 12959, sum loss: 4586.884766, avg loss: 2.730289, ppl: 15.337315 +epoch: 2, batch: 12960, sum loss: 4078.021729, avg loss: 2.495729, ppl: 12.130579 +epoch: 2, batch: 12961, sum loss: 3998.294922, avg loss: 2.508341, ppl: 12.284528 +epoch: 2, batch: 12962, sum loss: 4388.138672, avg loss: 2.447372, ppl: 11.557938 +epoch: 2, batch: 12963, sum loss: 3594.313965, avg loss: 2.305525, ppl: 10.029446 +epoch: 2, batch: 12964, sum loss: 4132.201660, avg loss: 2.362608, ppl: 10.618610 +epoch: 2, batch: 12965, sum loss: 3364.029297, avg loss: 2.307290, ppl: 10.047163 +epoch: 2, batch: 12966, sum loss: 3839.468994, avg loss: 2.216784, ppl: 9.177763 +epoch: 2, batch: 12967, sum loss: 3641.382324, avg loss: 2.332724, ppl: 10.305978 +epoch: 2, batch: 12968, sum loss: 4798.205566, avg loss: 2.542769, ppl: 12.714832 +epoch: 2, batch: 12969, sum loss: 4138.334473, avg loss: 2.659598, ppl: 14.290542 +epoch: 2, batch: 12970, sum loss: 4570.757812, avg loss: 2.507273, ppl: 12.271417 +epoch: 2, batch: 12971, sum loss: 3428.869141, avg loss: 2.391122, ppl: 10.925747 +epoch: 2, batch: 12972, sum loss: 3636.473389, avg loss: 2.463735, ppl: 11.748614 +epoch: 2, batch: 12973, sum loss: 4297.864258, avg loss: 2.467201, ppl: 11.789402 +epoch: 2, batch: 12974, sum loss: 4605.654785, avg loss: 2.435566, ppl: 11.422278 +epoch: 2, batch: 12975, sum loss: 3996.025879, avg loss: 2.588100, ppl: 13.304465 +epoch: 2, batch: 12976, sum loss: 3690.921143, avg loss: 2.162227, ppl: 8.690469 +epoch: 2, batch: 12977, sum loss: 3369.281738, avg loss: 2.258232, ppl: 9.566160 +epoch: 2, batch: 12978, sum loss: 4130.701172, avg loss: 2.448549, ppl: 11.571539 +epoch: 2, batch: 12979, sum loss: 3040.574951, avg loss: 2.203315, ppl: 9.054983 +epoch: 2, batch: 12980, sum loss: 4156.422852, avg loss: 2.403946, ppl: 11.066762 +epoch: 2, batch: 12981, sum loss: 4273.319336, avg loss: 2.794846, ppl: 16.360109 +epoch: 2, batch: 12982, sum loss: 4833.973145, avg loss: 2.493024, ppl: 12.097803 +epoch: 2, batch: 12983, sum loss: 3994.699463, avg loss: 2.789595, ppl: 16.274422 +epoch: 2, batch: 12984, sum loss: 4524.032715, avg loss: 2.627197, ppl: 13.834930 +epoch: 2, batch: 12985, sum loss: 5322.776367, avg loss: 2.831264, ppl: 16.966894 +epoch: 2, batch: 12986, sum loss: 3832.868652, avg loss: 2.546757, ppl: 12.765635 +epoch: 2, batch: 12987, sum loss: 5060.863770, avg loss: 2.880400, ppl: 17.821407 +epoch: 2, batch: 12988, sum loss: 3765.967773, avg loss: 2.203609, ppl: 9.057644 +epoch: 2, batch: 12989, sum loss: 3327.405029, avg loss: 2.335021, ppl: 10.329677 +epoch: 2, batch: 12990, sum loss: 4931.711426, avg loss: 2.762864, ppl: 15.845153 +epoch: 2, batch: 12991, sum loss: 3598.493896, avg loss: 2.198225, ppl: 9.009007 +epoch: 2, batch: 12992, sum loss: 4273.925293, avg loss: 2.500834, ppl: 12.192658 +epoch: 2, batch: 12993, sum loss: 3794.696045, avg loss: 2.427829, ppl: 11.334244 +epoch: 2, batch: 12994, sum loss: 3376.721191, avg loss: 2.349841, ppl: 10.483901 +epoch: 2, batch: 12995, sum loss: 3790.429688, avg loss: 2.225737, ppl: 9.260303 +epoch: 2, batch: 12996, sum loss: 4511.505371, avg loss: 2.434703, ppl: 11.412433 +epoch: 2, batch: 12997, sum loss: 3186.540039, avg loss: 2.030937, ppl: 7.621224 +epoch: 2, batch: 12998, sum loss: 4046.462891, avg loss: 2.434695, ppl: 11.412337 +epoch: 2, batch: 12999, sum loss: 3608.419678, avg loss: 2.436475, ppl: 11.432673 +epoch: 2, batch: 13000, sum loss: 4114.274414, avg loss: 2.476986, ppl: 11.905333 +epoch: 2, batch: 13001, sum loss: 4262.632324, avg loss: 2.528252, ppl: 12.531577 +epoch: 2, batch: 13002, sum loss: 4297.665527, avg loss: 2.550543, ppl: 12.814064 +epoch: 2, batch: 13003, sum loss: 4337.610840, avg loss: 2.670943, ppl: 14.453586 +epoch: 2, batch: 13004, sum loss: 4197.664551, avg loss: 2.525671, ppl: 12.499276 +epoch: 2, batch: 13005, sum loss: 3731.159912, avg loss: 2.405648, ppl: 11.085608 +epoch: 2, batch: 13006, sum loss: 4272.878906, avg loss: 2.814808, ppl: 16.689978 +epoch: 2, batch: 13007, sum loss: 4950.291016, avg loss: 2.628938, ppl: 13.859050 +epoch: 2, batch: 13008, sum loss: 4805.011719, avg loss: 2.764679, ppl: 15.873940 +epoch: 2, batch: 13009, sum loss: 4233.954102, avg loss: 2.348283, ppl: 10.467580 +epoch: 2, batch: 13010, sum loss: 3590.065430, avg loss: 2.346448, ppl: 10.448390 +epoch: 2, batch: 13011, sum loss: 4434.382812, avg loss: 2.642660, ppl: 14.050524 +epoch: 2, batch: 13012, sum loss: 3701.509033, avg loss: 2.551006, ppl: 12.819989 +epoch: 2, batch: 13013, sum loss: 4240.058594, avg loss: 2.466584, ppl: 11.782136 +epoch: 2, batch: 13014, sum loss: 3613.113281, avg loss: 2.363057, ppl: 10.623374 +epoch: 2, batch: 13015, sum loss: 3533.230957, avg loss: 2.380883, ppl: 10.814453 +epoch: 2, batch: 13016, sum loss: 4887.518555, avg loss: 2.817014, ppl: 16.726822 +epoch: 2, batch: 13017, sum loss: 4493.557617, avg loss: 2.530156, ppl: 12.555470 +epoch: 2, batch: 13018, sum loss: 3879.294189, avg loss: 2.567369, ppl: 13.031490 +epoch: 2, batch: 13019, sum loss: 4542.804199, avg loss: 2.644240, ppl: 14.072744 +epoch: 2, batch: 13020, sum loss: 4185.056641, avg loss: 2.501528, ppl: 12.201126 +epoch: 2, batch: 13021, sum loss: 4715.051270, avg loss: 2.510677, ppl: 12.313262 +epoch: 2, batch: 13022, sum loss: 5523.206543, avg loss: 2.793731, ppl: 16.341881 +epoch: 2, batch: 13023, sum loss: 3935.854980, avg loss: 2.348362, ppl: 10.468410 +epoch: 2, batch: 13024, sum loss: 4077.489990, avg loss: 2.535753, ppl: 12.625929 +epoch: 2, batch: 13025, sum loss: 3883.706299, avg loss: 2.406262, ppl: 11.092417 +epoch: 2, batch: 13026, sum loss: 4732.603516, avg loss: 2.705891, ppl: 14.967649 +epoch: 2, batch: 13027, sum loss: 4790.987305, avg loss: 2.733022, ppl: 15.379292 +epoch: 2, batch: 13028, sum loss: 4461.052246, avg loss: 2.616453, ppl: 13.687088 +epoch: 2, batch: 13029, sum loss: 4005.544922, avg loss: 2.457390, ppl: 11.674297 +epoch: 2, batch: 13030, sum loss: 3653.944824, avg loss: 2.349804, ppl: 10.483514 +epoch: 2, batch: 13031, sum loss: 4086.500732, avg loss: 2.323195, ppl: 10.208242 +epoch: 2, batch: 13032, sum loss: 4330.708984, avg loss: 2.423452, ppl: 11.284749 +epoch: 2, batch: 13033, sum loss: 4198.641113, avg loss: 2.791650, ppl: 16.307899 +epoch: 2, batch: 13034, sum loss: 4923.417969, avg loss: 2.459250, ppl: 11.696033 +epoch: 2, batch: 13035, sum loss: 4064.842285, avg loss: 2.341499, ppl: 10.396811 +epoch: 2, batch: 13036, sum loss: 5298.468750, avg loss: 2.569577, ppl: 13.060305 +epoch: 2, batch: 13037, sum loss: 3039.814453, avg loss: 2.191647, ppl: 8.949942 +epoch: 2, batch: 13038, sum loss: 3853.018555, avg loss: 2.443259, ppl: 11.510487 +epoch: 2, batch: 13039, sum loss: 3808.794678, avg loss: 2.206717, ppl: 9.085843 +epoch: 2, batch: 13040, sum loss: 3722.995605, avg loss: 2.549997, ppl: 12.807067 +epoch: 2, batch: 13041, sum loss: 4520.068359, avg loss: 2.584373, ppl: 13.254975 +epoch: 2, batch: 13042, sum loss: 3720.030762, avg loss: 2.326473, ppl: 10.241757 +epoch: 2, batch: 13043, sum loss: 3311.875244, avg loss: 2.298317, ppl: 9.957415 +epoch: 2, batch: 13044, sum loss: 3388.199219, avg loss: 2.555203, ppl: 12.873912 +epoch: 2, batch: 13045, sum loss: 4422.778320, avg loss: 2.595527, ppl: 13.403651 +epoch: 2, batch: 13046, sum loss: 4413.381348, avg loss: 2.689446, ppl: 14.723520 +epoch: 2, batch: 13047, sum loss: 4256.266602, avg loss: 2.514038, ppl: 12.354722 +epoch: 2, batch: 13048, sum loss: 4415.025879, avg loss: 2.480352, ppl: 11.945465 +epoch: 2, batch: 13049, sum loss: 3562.848877, avg loss: 2.394388, ppl: 10.961485 +epoch: 2, batch: 13050, sum loss: 4282.376953, avg loss: 2.628838, ppl: 13.857656 +epoch: 2, batch: 13051, sum loss: 4337.788086, avg loss: 2.524906, ppl: 12.489717 +epoch: 2, batch: 13052, sum loss: 3595.920166, avg loss: 2.468030, ppl: 11.799182 +epoch: 2, batch: 13053, sum loss: 4651.527344, avg loss: 2.622056, ppl: 13.763990 +epoch: 2, batch: 13054, sum loss: 5172.274414, avg loss: 2.640263, ppl: 14.016884 +epoch: 2, batch: 13055, sum loss: 4230.515137, avg loss: 2.427146, ppl: 11.326507 +epoch: 2, batch: 13056, sum loss: 4518.773438, avg loss: 2.608992, ppl: 13.585345 +epoch: 2, batch: 13057, sum loss: 4199.910156, avg loss: 2.368816, ppl: 10.684731 +epoch: 2, batch: 13058, sum loss: 3992.138916, avg loss: 2.356634, ppl: 10.555367 +epoch: 2, batch: 13059, sum loss: 4320.841797, avg loss: 2.359826, ppl: 10.589112 +epoch: 2, batch: 13060, sum loss: 3807.439453, avg loss: 2.534913, ppl: 12.615335 +epoch: 2, batch: 13061, sum loss: 4783.367188, avg loss: 2.727119, ppl: 15.288779 +epoch: 2, batch: 13062, sum loss: 4964.675293, avg loss: 2.790711, ppl: 16.292603 +epoch: 2, batch: 13063, sum loss: 3987.218994, avg loss: 2.322201, ppl: 10.198095 +epoch: 2, batch: 13064, sum loss: 3939.163574, avg loss: 2.360194, ppl: 10.593006 +epoch: 2, batch: 13065, sum loss: 3540.320312, avg loss: 2.298909, ppl: 9.963308 +epoch: 2, batch: 13066, sum loss: 4021.364258, avg loss: 2.572850, ppl: 13.103112 +epoch: 2, batch: 13067, sum loss: 4282.067383, avg loss: 2.469474, ppl: 11.816225 +epoch: 2, batch: 13068, sum loss: 4032.614258, avg loss: 2.380528, ppl: 10.810607 +epoch: 2, batch: 13069, sum loss: 4843.411621, avg loss: 2.601188, ppl: 13.479742 +epoch: 2, batch: 13070, sum loss: 3549.760010, avg loss: 2.165808, ppl: 8.721650 +epoch: 2, batch: 13071, sum loss: 3230.049072, avg loss: 2.223021, ppl: 9.235187 +epoch: 2, batch: 13072, sum loss: 3756.464355, avg loss: 2.504310, ppl: 12.235109 +epoch: 2, batch: 13073, sum loss: 4368.400879, avg loss: 2.424196, ppl: 11.293146 +epoch: 2, batch: 13074, sum loss: 3946.805664, avg loss: 2.436300, ppl: 11.430667 +epoch: 2, batch: 13075, sum loss: 3689.231201, avg loss: 2.273094, ppl: 9.709393 +epoch: 2, batch: 13076, sum loss: 4217.441406, avg loss: 2.376023, ppl: 10.762020 +epoch: 2, batch: 13077, sum loss: 4337.706055, avg loss: 2.557610, ppl: 12.904935 +epoch: 2, batch: 13078, sum loss: 3536.354004, avg loss: 2.332687, ppl: 10.305599 +epoch: 2, batch: 13079, sum loss: 3249.511719, avg loss: 2.197100, ppl: 8.998882 +epoch: 2, batch: 13080, sum loss: 3887.808594, avg loss: 2.508264, ppl: 12.283582 +epoch: 2, batch: 13081, sum loss: 4086.132324, avg loss: 2.477946, ppl: 11.916757 +epoch: 2, batch: 13082, sum loss: 3871.008789, avg loss: 2.492601, ppl: 12.092685 +epoch: 2, batch: 13083, sum loss: 4309.568359, avg loss: 2.655310, ppl: 14.229399 +epoch: 2, batch: 13084, sum loss: 3374.135742, avg loss: 2.408377, ppl: 11.115902 +epoch: 2, batch: 13085, sum loss: 4373.161133, avg loss: 2.454075, ppl: 11.635664 +epoch: 2, batch: 13086, sum loss: 3680.974854, avg loss: 2.550918, ppl: 12.818868 +epoch: 2, batch: 13087, sum loss: 5206.256348, avg loss: 2.778152, ppl: 16.089256 +epoch: 2, batch: 13088, sum loss: 4636.019043, avg loss: 2.576998, ppl: 13.157577 +epoch: 2, batch: 13089, sum loss: 4876.688477, avg loss: 2.607855, ppl: 13.569910 +epoch: 2, batch: 13090, sum loss: 3496.189453, avg loss: 2.318428, ppl: 10.159691 +epoch: 2, batch: 13091, sum loss: 3919.803467, avg loss: 2.424121, ppl: 11.292298 +epoch: 2, batch: 13092, sum loss: 3683.055176, avg loss: 2.356401, ppl: 10.552905 +epoch: 2, batch: 13093, sum loss: 4352.331055, avg loss: 2.438281, ppl: 11.453334 +epoch: 2, batch: 13094, sum loss: 4653.739746, avg loss: 2.647178, ppl: 14.114158 +epoch: 2, batch: 13095, sum loss: 5135.609375, avg loss: 2.720132, ppl: 15.182328 +epoch: 2, batch: 13096, sum loss: 4378.562988, avg loss: 2.535358, ppl: 12.620948 +epoch: 2, batch: 13097, sum loss: 4518.353027, avg loss: 2.614788, ppl: 13.664313 +epoch: 2, batch: 13098, sum loss: 3330.683105, avg loss: 2.343901, ppl: 10.421809 +epoch: 2, batch: 13099, sum loss: 3414.189941, avg loss: 2.361127, ppl: 10.602899 +epoch: 2, batch: 13100, sum loss: 3694.566650, avg loss: 2.311994, ppl: 10.094534 +epoch: 2, batch: 13101, sum loss: 3808.557617, avg loss: 2.484382, ppl: 11.993708 +epoch: 2, batch: 13102, sum loss: 5016.077637, avg loss: 3.000047, ppl: 20.086475 +epoch: 2, batch: 13103, sum loss: 4672.352051, avg loss: 2.626392, ppl: 13.823809 +epoch: 2, batch: 13104, sum loss: 4460.717285, avg loss: 2.569538, ppl: 13.059785 +epoch: 2, batch: 13105, sum loss: 4523.873535, avg loss: 2.532964, ppl: 12.590770 +epoch: 2, batch: 13106, sum loss: 3922.552002, avg loss: 2.219894, ppl: 9.206352 +epoch: 2, batch: 13107, sum loss: 4407.366211, avg loss: 2.677622, ppl: 14.550456 +epoch: 2, batch: 13108, sum loss: 4455.806152, avg loss: 2.575610, ppl: 13.139335 +epoch: 2, batch: 13109, sum loss: 3908.105469, avg loss: 2.505196, ppl: 12.245957 +epoch: 2, batch: 13110, sum loss: 4453.676758, avg loss: 2.397027, ppl: 10.990456 +epoch: 2, batch: 13111, sum loss: 4086.027832, avg loss: 2.689946, ppl: 14.730880 +epoch: 2, batch: 13112, sum loss: 3473.587402, avg loss: 2.596104, ppl: 13.411387 +epoch: 2, batch: 13113, sum loss: 4403.912109, avg loss: 2.808617, ppl: 16.586969 +epoch: 2, batch: 13114, sum loss: 4378.066406, avg loss: 2.405531, ppl: 11.084316 +epoch: 2, batch: 13115, sum loss: 4087.048828, avg loss: 2.525988, ppl: 12.503243 +epoch: 2, batch: 13116, sum loss: 3278.531006, avg loss: 2.278340, ppl: 9.760461 +epoch: 2, batch: 13117, sum loss: 4456.107422, avg loss: 2.845535, ppl: 17.210760 +epoch: 2, batch: 13118, sum loss: 4506.576172, avg loss: 2.402226, ppl: 11.047744 +epoch: 2, batch: 13119, sum loss: 3485.001953, avg loss: 2.370750, ppl: 10.705416 +epoch: 2, batch: 13120, sum loss: 4012.674072, avg loss: 2.428980, ppl: 11.347298 +epoch: 2, batch: 13121, sum loss: 3163.511475, avg loss: 2.235697, ppl: 9.352999 +epoch: 2, batch: 13122, sum loss: 4509.171387, avg loss: 2.523319, ppl: 12.469919 +epoch: 2, batch: 13123, sum loss: 4222.307129, avg loss: 2.606363, ppl: 13.549675 +epoch: 2, batch: 13124, sum loss: 4089.277344, avg loss: 2.611288, ppl: 13.616579 +epoch: 2, batch: 13125, sum loss: 4088.787598, avg loss: 2.631137, ppl: 13.889559 +epoch: 2, batch: 13126, sum loss: 4418.633789, avg loss: 2.507738, ppl: 12.277126 +epoch: 2, batch: 13127, sum loss: 3818.636963, avg loss: 2.183326, ppl: 8.875776 +epoch: 2, batch: 13128, sum loss: 3737.598145, avg loss: 2.554749, ppl: 12.868073 +epoch: 2, batch: 13129, sum loss: 3836.467285, avg loss: 2.400793, ppl: 11.031922 +epoch: 2, batch: 13130, sum loss: 4699.558594, avg loss: 2.534821, ppl: 12.614173 +epoch: 2, batch: 13131, sum loss: 5964.770020, avg loss: 3.109890, ppl: 22.418589 +epoch: 2, batch: 13132, sum loss: 4062.492920, avg loss: 2.653490, ppl: 14.203524 +epoch: 2, batch: 13133, sum loss: 4641.512695, avg loss: 2.519822, ppl: 12.426389 +epoch: 2, batch: 13134, sum loss: 4020.689209, avg loss: 2.261355, ppl: 9.596085 +epoch: 2, batch: 13135, sum loss: 3908.141113, avg loss: 2.456405, ppl: 11.662813 +epoch: 2, batch: 13136, sum loss: 4244.084473, avg loss: 2.718824, ppl: 15.162483 +epoch: 2, batch: 13137, sum loss: 3719.331055, avg loss: 2.443713, ppl: 11.515718 +epoch: 2, batch: 13138, sum loss: 3568.455566, avg loss: 2.265686, ppl: 9.637734 +epoch: 2, batch: 13139, sum loss: 4277.670410, avg loss: 2.619516, ppl: 13.729082 +epoch: 2, batch: 13140, sum loss: 4844.291504, avg loss: 2.650050, ppl: 14.154749 +epoch: 2, batch: 13141, sum loss: 3990.089844, avg loss: 2.501624, ppl: 12.202292 +epoch: 2, batch: 13142, sum loss: 4408.859863, avg loss: 2.547002, ppl: 12.768761 +epoch: 2, batch: 13143, sum loss: 4341.778320, avg loss: 2.408086, ppl: 11.112666 +epoch: 2, batch: 13144, sum loss: 4092.089355, avg loss: 2.447422, ppl: 11.558511 +epoch: 2, batch: 13145, sum loss: 4261.498047, avg loss: 2.368815, ppl: 10.684723 +epoch: 2, batch: 13146, sum loss: 4220.666992, avg loss: 2.445346, ppl: 11.534538 +epoch: 2, batch: 13147, sum loss: 4444.793945, avg loss: 2.490081, ppl: 12.062251 +epoch: 2, batch: 13148, sum loss: 4196.183105, avg loss: 2.541601, ppl: 12.699986 +epoch: 2, batch: 13149, sum loss: 3808.053223, avg loss: 2.488924, ppl: 12.048303 +epoch: 2, batch: 13150, sum loss: 4619.630859, avg loss: 2.719029, ppl: 15.165596 +epoch: 2, batch: 13151, sum loss: 3417.947266, avg loss: 2.272571, ppl: 9.704322 +epoch: 2, batch: 13152, sum loss: 3849.534424, avg loss: 2.464491, ppl: 11.757492 +epoch: 2, batch: 13153, sum loss: 4242.723633, avg loss: 2.353147, ppl: 10.518618 +epoch: 2, batch: 13154, sum loss: 3868.963623, avg loss: 2.520497, ppl: 12.434779 +epoch: 2, batch: 13155, sum loss: 5352.046387, avg loss: 3.022048, ppl: 20.533291 +epoch: 2, batch: 13156, sum loss: 4810.158203, avg loss: 2.542367, ppl: 12.709719 +epoch: 2, batch: 13157, sum loss: 5124.871582, avg loss: 2.677571, ppl: 14.549713 +epoch: 2, batch: 13158, sum loss: 4207.182129, avg loss: 2.489457, ppl: 12.054728 +epoch: 2, batch: 13159, sum loss: 4078.944580, avg loss: 2.539816, ppl: 12.677337 +epoch: 2, batch: 13160, sum loss: 4243.878906, avg loss: 2.644161, ppl: 14.071637 +epoch: 2, batch: 13161, sum loss: 3496.929443, avg loss: 2.312784, ppl: 10.102513 +epoch: 2, batch: 13162, sum loss: 3574.526611, avg loss: 2.357867, ppl: 10.568388 +epoch: 2, batch: 13163, sum loss: 4589.763672, avg loss: 2.459680, ppl: 11.701071 +epoch: 2, batch: 13164, sum loss: 4105.476074, avg loss: 2.494214, ppl: 12.112210 +epoch: 2, batch: 13165, sum loss: 3710.653564, avg loss: 2.338156, ppl: 10.362114 +epoch: 2, batch: 13166, sum loss: 4787.577148, avg loss: 2.593487, ppl: 13.376334 +epoch: 2, batch: 13167, sum loss: 3865.897461, avg loss: 2.448320, ppl: 11.568896 +epoch: 2, batch: 13168, sum loss: 4811.440430, avg loss: 2.704576, ppl: 14.947974 +epoch: 2, batch: 13169, sum loss: 4496.337891, avg loss: 2.594540, ppl: 13.390425 +epoch: 2, batch: 13170, sum loss: 4173.224121, avg loss: 2.387428, ppl: 10.885461 +epoch: 2, batch: 13171, sum loss: 3473.132324, avg loss: 2.375604, ppl: 10.757505 +epoch: 2, batch: 13172, sum loss: 3588.391602, avg loss: 2.459487, ppl: 11.698811 +epoch: 2, batch: 13173, sum loss: 4537.132812, avg loss: 2.670473, ppl: 14.446795 +epoch: 2, batch: 13174, sum loss: 4022.890137, avg loss: 2.463497, ppl: 11.745811 +epoch: 2, batch: 13175, sum loss: 4469.616211, avg loss: 2.668428, ppl: 14.417283 +epoch: 2, batch: 13176, sum loss: 3839.767578, avg loss: 2.464549, ppl: 11.758182 +epoch: 2, batch: 13177, sum loss: 3754.201904, avg loss: 2.311701, ppl: 10.091572 +epoch: 2, batch: 13178, sum loss: 3223.266602, avg loss: 2.504481, ppl: 12.237202 +epoch: 2, batch: 13179, sum loss: 4184.844727, avg loss: 2.326206, ppl: 10.239020 +epoch: 2, batch: 13180, sum loss: 5463.985840, avg loss: 2.856239, ppl: 17.395983 +epoch: 2, batch: 13181, sum loss: 3760.210449, avg loss: 2.329746, ppl: 10.275333 +epoch: 2, batch: 13182, sum loss: 3952.049561, avg loss: 2.369334, ppl: 10.690272 +epoch: 2, batch: 13183, sum loss: 4798.902344, avg loss: 2.657200, ppl: 14.256310 +epoch: 2, batch: 13184, sum loss: 4176.321289, avg loss: 2.565308, ppl: 13.004662 +epoch: 2, batch: 13185, sum loss: 4969.234863, avg loss: 2.655925, ppl: 14.238148 +epoch: 2, batch: 13186, sum loss: 3929.097168, avg loss: 2.297718, ppl: 9.951443 +epoch: 2, batch: 13187, sum loss: 4093.480225, avg loss: 2.306186, ppl: 10.036074 +epoch: 2, batch: 13188, sum loss: 4492.745605, avg loss: 2.305154, ppl: 10.025725 +epoch: 2, batch: 13189, sum loss: 3641.121094, avg loss: 2.260162, ppl: 9.584642 +epoch: 2, batch: 13190, sum loss: 3082.787598, avg loss: 2.321376, ppl: 10.189689 +epoch: 2, batch: 13191, sum loss: 3493.932617, avg loss: 2.291103, ppl: 9.885839 +epoch: 2, batch: 13192, sum loss: 3677.821533, avg loss: 2.397537, ppl: 10.996057 +epoch: 2, batch: 13193, sum loss: 5132.069336, avg loss: 2.768106, ppl: 15.928440 +epoch: 2, batch: 13194, sum loss: 4109.483887, avg loss: 2.383692, ppl: 10.844863 +epoch: 2, batch: 13195, sum loss: 3957.875977, avg loss: 2.484542, ppl: 11.995629 +epoch: 2, batch: 13196, sum loss: 3201.367920, avg loss: 2.256073, ppl: 9.545532 +epoch: 2, batch: 13197, sum loss: 4402.105469, avg loss: 2.676052, ppl: 14.527623 +epoch: 2, batch: 13198, sum loss: 3532.441650, avg loss: 2.322447, ppl: 10.200605 +epoch: 2, batch: 13199, sum loss: 4246.596191, avg loss: 2.659108, ppl: 14.283548 +epoch: 2, batch: 13200, sum loss: 4452.852051, avg loss: 2.639509, ppl: 14.006328 +epoch: 2, batch: 13201, sum loss: 4341.571289, avg loss: 2.653772, ppl: 14.207534 +epoch: 2, batch: 13202, sum loss: 3166.485840, avg loss: 2.173291, ppl: 8.787159 +epoch: 2, batch: 13203, sum loss: 3545.487061, avg loss: 2.217315, ppl: 9.182644 +epoch: 2, batch: 13204, sum loss: 4390.058105, avg loss: 2.639843, ppl: 14.010997 +epoch: 2, batch: 13205, sum loss: 3919.946777, avg loss: 2.422711, ppl: 11.276390 +epoch: 2, batch: 13206, sum loss: 3799.279785, avg loss: 2.283221, ppl: 9.808222 +epoch: 2, batch: 13207, sum loss: 3832.330811, avg loss: 2.614141, ppl: 13.655481 +epoch: 2, batch: 13208, sum loss: 3741.294434, avg loss: 2.448491, ppl: 11.570874 +epoch: 2, batch: 13209, sum loss: 5101.245605, avg loss: 2.757430, ppl: 15.759291 +epoch: 2, batch: 13210, sum loss: 3541.211426, avg loss: 2.313006, ppl: 10.104751 +epoch: 2, batch: 13211, sum loss: 3999.043457, avg loss: 2.426604, ppl: 11.320374 +epoch: 2, batch: 13212, sum loss: 4653.323242, avg loss: 2.671253, ppl: 14.458076 +epoch: 2, batch: 13213, sum loss: 4068.921387, avg loss: 2.476519, ppl: 11.899774 +epoch: 2, batch: 13214, sum loss: 4684.492188, avg loss: 2.723542, ppl: 15.234186 +epoch: 2, batch: 13215, sum loss: 4598.306152, avg loss: 2.719282, ppl: 15.169433 +epoch: 2, batch: 13216, sum loss: 4646.432617, avg loss: 2.568509, ppl: 13.046359 +epoch: 2, batch: 13217, sum loss: 4234.455566, avg loss: 2.566337, ppl: 13.018047 +epoch: 2, batch: 13218, sum loss: 3828.411621, avg loss: 2.522010, ppl: 12.453608 +epoch: 2, batch: 13219, sum loss: 4254.907715, avg loss: 2.375716, ppl: 10.758716 +epoch: 2, batch: 13220, sum loss: 3570.985352, avg loss: 2.424294, ppl: 11.294255 +epoch: 2, batch: 13221, sum loss: 4765.425781, avg loss: 2.696902, ppl: 14.833706 +epoch: 2, batch: 13222, sum loss: 4682.996582, avg loss: 2.515036, ppl: 12.367052 +epoch: 2, batch: 13223, sum loss: 4143.869629, avg loss: 2.553216, ppl: 12.848357 +epoch: 2, batch: 13224, sum loss: 4247.280273, avg loss: 2.320918, ppl: 10.185023 +epoch: 2, batch: 13225, sum loss: 4068.133545, avg loss: 2.555360, ppl: 12.875938 +epoch: 2, batch: 13226, sum loss: 4817.586914, avg loss: 2.907415, ppl: 18.309410 +epoch: 2, batch: 13227, sum loss: 4705.786133, avg loss: 2.712269, ppl: 15.063410 +epoch: 2, batch: 13228, sum loss: 4770.562500, avg loss: 2.619749, ppl: 13.732274 +epoch: 2, batch: 13229, sum loss: 3933.832520, avg loss: 2.515238, ppl: 12.369553 +epoch: 2, batch: 13230, sum loss: 4002.029785, avg loss: 2.383579, ppl: 10.843646 +epoch: 2, batch: 13231, sum loss: 4778.538086, avg loss: 2.773382, ppl: 16.012705 +epoch: 2, batch: 13232, sum loss: 2804.260254, avg loss: 2.055909, ppl: 7.813939 +epoch: 2, batch: 13233, sum loss: 3803.407471, avg loss: 2.466542, ppl: 11.781632 +epoch: 2, batch: 13234, sum loss: 3776.727539, avg loss: 2.574456, ppl: 13.124179 +epoch: 2, batch: 13235, sum loss: 4133.287109, avg loss: 2.404472, ppl: 11.072581 +epoch: 2, batch: 13236, sum loss: 4379.975586, avg loss: 2.619603, ppl: 13.730268 +epoch: 2, batch: 13237, sum loss: 4971.128906, avg loss: 2.751040, ppl: 15.658905 +epoch: 2, batch: 13238, sum loss: 3636.615234, avg loss: 2.175009, ppl: 8.802266 +epoch: 2, batch: 13239, sum loss: 3903.118652, avg loss: 2.293254, ppl: 9.907125 +epoch: 2, batch: 13240, sum loss: 5290.248047, avg loss: 2.777033, ppl: 16.071268 +epoch: 2, batch: 13241, sum loss: 3722.449219, avg loss: 2.239741, ppl: 9.390898 +epoch: 2, batch: 13242, sum loss: 3734.405518, avg loss: 2.463328, ppl: 11.743834 +epoch: 2, batch: 13243, sum loss: 3992.642334, avg loss: 2.291987, ppl: 9.894583 +epoch: 2, batch: 13244, sum loss: 4100.934082, avg loss: 2.741266, ppl: 15.506608 +epoch: 2, batch: 13245, sum loss: 4615.068359, avg loss: 2.563927, ppl: 12.986715 +epoch: 2, batch: 13246, sum loss: 5279.438965, avg loss: 2.869260, ppl: 17.623974 +epoch: 2, batch: 13247, sum loss: 4267.280762, avg loss: 2.458111, ppl: 11.682723 +epoch: 2, batch: 13248, sum loss: 3992.951660, avg loss: 2.477017, ppl: 11.905699 +epoch: 2, batch: 13249, sum loss: 4437.336426, avg loss: 2.624090, ppl: 13.792021 +epoch: 2, batch: 13250, sum loss: 4677.732422, avg loss: 2.714877, ppl: 15.102747 +epoch: 2, batch: 13251, sum loss: 3350.957764, avg loss: 2.315797, ppl: 10.132992 +epoch: 2, batch: 13252, sum loss: 4133.588379, avg loss: 2.636217, ppl: 13.960294 +epoch: 2, batch: 13253, sum loss: 4635.938965, avg loss: 2.975571, ppl: 19.600805 +epoch: 2, batch: 13254, sum loss: 4657.667480, avg loss: 2.556349, ppl: 12.888672 +epoch: 2, batch: 13255, sum loss: 4505.239746, avg loss: 2.590707, ppl: 13.339203 +epoch: 2, batch: 13256, sum loss: 4195.375977, avg loss: 2.354308, ppl: 10.530835 +epoch: 2, batch: 13257, sum loss: 4496.450195, avg loss: 2.560621, ppl: 12.943853 +epoch: 2, batch: 13258, sum loss: 4600.551270, avg loss: 2.526387, ppl: 12.508235 +epoch: 2, batch: 13259, sum loss: 4212.909180, avg loss: 2.716254, ppl: 15.123556 +epoch: 2, batch: 13260, sum loss: 4780.377930, avg loss: 2.811987, ppl: 16.642958 +epoch: 2, batch: 13261, sum loss: 3629.899170, avg loss: 2.264441, ppl: 9.625742 +epoch: 2, batch: 13262, sum loss: 4405.896973, avg loss: 2.523423, ppl: 12.471218 +epoch: 2, batch: 13263, sum loss: 3429.514648, avg loss: 2.312552, ppl: 10.100169 +epoch: 2, batch: 13264, sum loss: 3831.979492, avg loss: 2.587427, ppl: 13.295520 +epoch: 2, batch: 13265, sum loss: 4632.449219, avg loss: 2.716979, ppl: 15.134532 +epoch: 2, batch: 13266, sum loss: 3788.479004, avg loss: 2.466458, ppl: 11.780644 +epoch: 2, batch: 13267, sum loss: 4289.330078, avg loss: 2.508380, ppl: 12.285014 +epoch: 2, batch: 13268, sum loss: 3773.637207, avg loss: 2.392921, ppl: 10.945424 +epoch: 2, batch: 13269, sum loss: 5095.382324, avg loss: 2.693120, ppl: 14.777703 +epoch: 2, batch: 13270, sum loss: 3741.510010, avg loss: 2.424828, ppl: 11.300289 +epoch: 2, batch: 13271, sum loss: 4241.197754, avg loss: 2.429094, ppl: 11.348594 +epoch: 2, batch: 13272, sum loss: 4375.031250, avg loss: 2.579618, ppl: 13.192091 +epoch: 2, batch: 13273, sum loss: 4569.961914, avg loss: 2.516499, ppl: 12.385158 +epoch: 2, batch: 13274, sum loss: 4415.142578, avg loss: 2.628061, ppl: 13.846895 +epoch: 2, batch: 13275, sum loss: 3594.442871, avg loss: 2.272088, ppl: 9.699629 +epoch: 2, batch: 13276, sum loss: 3893.529053, avg loss: 2.566598, ppl: 13.021449 +epoch: 2, batch: 13277, sum loss: 4458.666504, avg loss: 2.540551, ppl: 12.686656 +epoch: 2, batch: 13278, sum loss: 4088.347900, avg loss: 2.430647, ppl: 11.366230 +epoch: 2, batch: 13279, sum loss: 3936.465332, avg loss: 2.311489, ppl: 10.089433 +epoch: 2, batch: 13280, sum loss: 4648.279785, avg loss: 2.660721, ppl: 14.306602 +epoch: 2, batch: 13281, sum loss: 4332.876953, avg loss: 2.333267, ppl: 10.311574 +epoch: 2, batch: 13282, sum loss: 4768.375977, avg loss: 2.780394, ppl: 16.125374 +epoch: 2, batch: 13283, sum loss: 4406.818359, avg loss: 2.544352, ppl: 12.734977 +epoch: 2, batch: 13284, sum loss: 4923.731445, avg loss: 2.652873, ppl: 14.194756 +epoch: 2, batch: 13285, sum loss: 3639.802734, avg loss: 2.396183, ppl: 10.981187 +epoch: 2, batch: 13286, sum loss: 3520.245117, avg loss: 2.386607, ppl: 10.876527 +epoch: 2, batch: 13287, sum loss: 4328.986328, avg loss: 2.652565, ppl: 14.190393 +epoch: 2, batch: 13288, sum loss: 4508.560547, avg loss: 2.489542, ppl: 12.055757 +epoch: 2, batch: 13289, sum loss: 4348.577148, avg loss: 2.509277, ppl: 12.296035 +epoch: 2, batch: 13290, sum loss: 3269.637695, avg loss: 2.060263, ppl: 7.848037 +epoch: 2, batch: 13291, sum loss: 3710.355469, avg loss: 2.294592, ppl: 9.920389 +epoch: 2, batch: 13292, sum loss: 4328.432129, avg loss: 2.767540, ppl: 15.919420 +epoch: 2, batch: 13293, sum loss: 3714.395020, avg loss: 2.367365, ppl: 10.669238 +epoch: 2, batch: 13294, sum loss: 4689.431641, avg loss: 2.406071, ppl: 11.090299 +epoch: 2, batch: 13295, sum loss: 3648.784668, avg loss: 2.528610, ppl: 12.536072 +epoch: 2, batch: 13296, sum loss: 3109.988525, avg loss: 2.140391, ppl: 8.502765 +epoch: 2, batch: 13297, sum loss: 4502.812988, avg loss: 2.707645, ppl: 14.993918 +epoch: 2, batch: 13298, sum loss: 4021.912842, avg loss: 2.626984, ppl: 13.831995 +epoch: 2, batch: 13299, sum loss: 3670.776123, avg loss: 2.289941, ppl: 9.874358 +epoch: 2, batch: 13300, sum loss: 4303.120117, avg loss: 2.526788, ppl: 12.513248 +epoch: 2, batch: 13301, sum loss: 3924.220703, avg loss: 2.384095, ppl: 10.849242 +epoch: 2, batch: 13302, sum loss: 5178.310059, avg loss: 2.632593, ppl: 13.909787 +epoch: 2, batch: 13303, sum loss: 4512.775391, avg loss: 2.568455, ppl: 13.045653 +epoch: 2, batch: 13304, sum loss: 3997.906250, avg loss: 2.279308, ppl: 9.769916 +epoch: 2, batch: 13305, sum loss: 4613.071289, avg loss: 2.505742, ppl: 12.252651 +epoch: 2, batch: 13306, sum loss: 5090.652344, avg loss: 2.826570, ppl: 16.887434 +epoch: 2, batch: 13307, sum loss: 5037.451660, avg loss: 2.711223, ppl: 15.047663 +epoch: 2, batch: 13308, sum loss: 4669.425781, avg loss: 2.568441, ppl: 13.045469 +epoch: 2, batch: 13309, sum loss: 4512.328125, avg loss: 2.577001, ppl: 13.157618 +epoch: 2, batch: 13310, sum loss: 5080.559082, avg loss: 2.657196, ppl: 14.256259 +epoch: 2, batch: 13311, sum loss: 3546.200928, avg loss: 2.194431, ppl: 8.974895 +epoch: 2, batch: 13312, sum loss: 4407.611816, avg loss: 2.443244, ppl: 11.510317 +epoch: 2, batch: 13313, sum loss: 4287.407715, avg loss: 2.415441, ppl: 11.194704 +epoch: 2, batch: 13314, sum loss: 3888.216309, avg loss: 2.141088, ppl: 8.508692 +epoch: 2, batch: 13315, sum loss: 3331.444824, avg loss: 2.403640, ppl: 11.063372 +epoch: 2, batch: 13316, sum loss: 3553.225586, avg loss: 2.248877, ppl: 9.477088 +epoch: 2, batch: 13317, sum loss: 4705.656250, avg loss: 2.698197, ppl: 14.852933 +epoch: 2, batch: 13318, sum loss: 4795.498047, avg loss: 2.482142, ppl: 11.966869 +epoch: 2, batch: 13319, sum loss: 3387.278076, avg loss: 2.282532, ppl: 9.801471 +epoch: 2, batch: 13320, sum loss: 3696.881836, avg loss: 2.311996, ppl: 10.094553 +epoch: 2, batch: 13321, sum loss: 3619.917969, avg loss: 2.442590, ppl: 11.502791 +epoch: 2, batch: 13322, sum loss: 3869.412354, avg loss: 2.376789, ppl: 10.770263 +epoch: 2, batch: 13323, sum loss: 4720.513184, avg loss: 2.616693, ppl: 13.690369 +epoch: 2, batch: 13324, sum loss: 3688.257812, avg loss: 2.762740, ppl: 15.843192 +epoch: 2, batch: 13325, sum loss: 4063.013916, avg loss: 2.462433, ppl: 11.733319 +epoch: 2, batch: 13326, sum loss: 4970.377441, avg loss: 2.723495, ppl: 15.233463 +epoch: 2, batch: 13327, sum loss: 4958.798828, avg loss: 2.590804, ppl: 13.340491 +epoch: 2, batch: 13328, sum loss: 4357.064941, avg loss: 2.623158, ppl: 13.779166 +epoch: 2, batch: 13329, sum loss: 4177.774902, avg loss: 2.606223, ppl: 13.547779 +epoch: 2, batch: 13330, sum loss: 5601.911621, avg loss: 2.868362, ppl: 17.608160 +epoch: 2, batch: 13331, sum loss: 2630.506348, avg loss: 2.018808, ppl: 7.529342 +epoch: 2, batch: 13332, sum loss: 4875.511719, avg loss: 2.692166, ppl: 14.763614 +epoch: 2, batch: 13333, sum loss: 3344.792236, avg loss: 2.362141, ppl: 10.613655 +epoch: 2, batch: 13334, sum loss: 4061.490723, avg loss: 2.567314, ppl: 13.030776 +epoch: 2, batch: 13335, sum loss: 4239.283691, avg loss: 2.498105, ppl: 12.159428 +epoch: 2, batch: 13336, sum loss: 4061.993652, avg loss: 2.460323, ppl: 11.708597 +epoch: 2, batch: 13337, sum loss: 4771.303711, avg loss: 2.833316, ppl: 17.001743 +epoch: 2, batch: 13338, sum loss: 3960.937012, avg loss: 2.418154, ppl: 11.225121 +epoch: 2, batch: 13339, sum loss: 4428.261230, avg loss: 2.555257, ppl: 12.874612 +epoch: 2, batch: 13340, sum loss: 3434.164551, avg loss: 2.151732, ppl: 8.599742 +epoch: 2, batch: 13341, sum loss: 4203.363281, avg loss: 2.428286, ppl: 11.339431 +epoch: 2, batch: 13342, sum loss: 4840.479004, avg loss: 2.783484, ppl: 16.175282 +epoch: 2, batch: 13343, sum loss: 4426.419922, avg loss: 2.489550, ppl: 12.055851 +epoch: 2, batch: 13344, sum loss: 3949.256836, avg loss: 2.387701, ppl: 10.888431 +epoch: 2, batch: 13345, sum loss: 4678.916016, avg loss: 2.820323, ppl: 16.782270 +epoch: 2, batch: 13346, sum loss: 3029.774414, avg loss: 2.193899, ppl: 8.970119 +epoch: 2, batch: 13347, sum loss: 4930.594238, avg loss: 2.676761, ppl: 14.537934 +epoch: 2, batch: 13348, sum loss: 3945.094727, avg loss: 2.279084, ppl: 9.767731 +epoch: 2, batch: 13349, sum loss: 4987.657227, avg loss: 2.735961, ppl: 15.424562 +epoch: 2, batch: 13350, sum loss: 4749.343750, avg loss: 2.599531, ppl: 13.457431 +epoch: 2, batch: 13351, sum loss: 4935.744141, avg loss: 2.707484, ppl: 14.991517 +epoch: 2, batch: 13352, sum loss: 5014.587891, avg loss: 2.798319, ppl: 16.417032 +epoch: 2, batch: 13353, sum loss: 5663.305664, avg loss: 2.892393, ppl: 18.036421 +epoch: 2, batch: 13354, sum loss: 4188.232422, avg loss: 2.339795, ppl: 10.379105 +epoch: 2, batch: 13355, sum loss: 3053.140869, avg loss: 2.182374, ppl: 8.867331 +epoch: 2, batch: 13356, sum loss: 4789.230469, avg loss: 2.418803, ppl: 11.232409 +epoch: 2, batch: 13357, sum loss: 3839.472900, avg loss: 2.407193, ppl: 11.102754 +epoch: 2, batch: 13358, sum loss: 4322.397461, avg loss: 2.538108, ppl: 12.655699 +epoch: 2, batch: 13359, sum loss: 3268.569824, avg loss: 2.097927, ppl: 8.149258 +epoch: 2, batch: 13360, sum loss: 3547.184326, avg loss: 2.388676, ppl: 10.899055 +epoch: 2, batch: 13361, sum loss: 4409.513184, avg loss: 2.645179, ppl: 14.085963 +epoch: 2, batch: 13362, sum loss: 3565.617188, avg loss: 2.500433, ppl: 12.187767 +epoch: 2, batch: 13363, sum loss: 4399.787598, avg loss: 2.521368, ppl: 12.445614 +epoch: 2, batch: 13364, sum loss: 3256.897949, avg loss: 2.303323, ppl: 10.007378 +epoch: 2, batch: 13365, sum loss: 4878.099609, avg loss: 2.645390, ppl: 14.088943 +epoch: 2, batch: 13366, sum loss: 4249.994141, avg loss: 2.505893, ppl: 12.254495 +epoch: 2, batch: 13367, sum loss: 3447.272949, avg loss: 2.408996, ppl: 11.122787 +epoch: 2, batch: 13368, sum loss: 4088.592773, avg loss: 2.547410, ppl: 12.773973 +epoch: 2, batch: 13369, sum loss: 3358.195068, avg loss: 2.255336, ppl: 9.538496 +epoch: 2, batch: 13370, sum loss: 4956.306152, avg loss: 2.686345, ppl: 14.677924 +epoch: 2, batch: 13371, sum loss: 3890.229980, avg loss: 2.337879, ppl: 10.359239 +epoch: 2, batch: 13372, sum loss: 3525.590088, avg loss: 2.123849, ppl: 8.363269 +epoch: 2, batch: 13373, sum loss: 4911.540527, avg loss: 2.669315, ppl: 14.430086 +epoch: 2, batch: 13374, sum loss: 4101.920898, avg loss: 2.693316, ppl: 14.780614 +epoch: 2, batch: 13375, sum loss: 3490.735107, avg loss: 2.289007, ppl: 9.865134 +epoch: 2, batch: 13376, sum loss: 5105.047852, avg loss: 2.775991, ppl: 16.054537 +epoch: 2, batch: 13377, sum loss: 5035.448730, avg loss: 2.492796, ppl: 12.095052 +epoch: 2, batch: 13378, sum loss: 5044.860840, avg loss: 2.824670, ppl: 16.855387 +epoch: 2, batch: 13379, sum loss: 3648.156250, avg loss: 2.295882, ppl: 9.933190 +epoch: 2, batch: 13380, sum loss: 3034.061768, avg loss: 2.409898, ppl: 11.132829 +epoch: 2, batch: 13381, sum loss: 3705.786377, avg loss: 2.321921, ppl: 10.195244 +epoch: 2, batch: 13382, sum loss: 4019.386230, avg loss: 2.361567, ppl: 10.607555 +epoch: 2, batch: 13383, sum loss: 3750.351807, avg loss: 2.303656, ppl: 10.010714 +epoch: 2, batch: 13384, sum loss: 4016.768555, avg loss: 2.584793, ppl: 13.260545 +epoch: 2, batch: 13385, sum loss: 3871.495605, avg loss: 2.404656, ppl: 11.074616 +epoch: 2, batch: 13386, sum loss: 4700.365234, avg loss: 2.591161, ppl: 13.345250 +epoch: 2, batch: 13387, sum loss: 3276.207275, avg loss: 2.459615, ppl: 11.700309 +epoch: 2, batch: 13388, sum loss: 3418.870605, avg loss: 2.349739, ppl: 10.482837 +epoch: 2, batch: 13389, sum loss: 4360.170898, avg loss: 2.617149, ppl: 13.696624 +epoch: 2, batch: 13390, sum loss: 3925.670410, avg loss: 2.442857, ppl: 11.505861 +epoch: 2, batch: 13391, sum loss: 4582.455566, avg loss: 2.665768, ppl: 14.378994 +epoch: 2, batch: 13392, sum loss: 4202.690918, avg loss: 2.507572, ppl: 12.275092 +epoch: 2, batch: 13393, sum loss: 3804.558105, avg loss: 2.609436, ppl: 13.591387 +epoch: 2, batch: 13394, sum loss: 3851.325928, avg loss: 2.307565, ppl: 10.049923 +epoch: 2, batch: 13395, sum loss: 4238.912109, avg loss: 2.345828, ppl: 10.441918 +epoch: 2, batch: 13396, sum loss: 4322.725586, avg loss: 2.472955, ppl: 11.857436 +epoch: 2, batch: 13397, sum loss: 4298.503906, avg loss: 2.748404, ppl: 15.617686 +epoch: 2, batch: 13398, sum loss: 4021.662109, avg loss: 2.496376, ppl: 12.138428 +epoch: 2, batch: 13399, sum loss: 4870.361816, avg loss: 2.683395, ppl: 14.634696 +epoch: 2, batch: 13400, sum loss: 3588.134277, avg loss: 2.477993, ppl: 11.917328 +epoch: 2, batch: 13401, sum loss: 4120.467285, avg loss: 2.405410, ppl: 11.082972 +epoch: 2, batch: 13402, sum loss: 4416.261230, avg loss: 2.557187, ppl: 12.899475 +epoch: 2, batch: 13403, sum loss: 3948.763428, avg loss: 2.489763, ppl: 12.058413 +epoch: 2, batch: 13404, sum loss: 5609.727539, avg loss: 2.702181, ppl: 14.912218 +epoch: 2, batch: 13405, sum loss: 4008.517822, avg loss: 2.346907, ppl: 10.453192 +epoch: 2, batch: 13406, sum loss: 4449.452637, avg loss: 2.552755, ppl: 12.842441 +epoch: 2, batch: 13407, sum loss: 3879.489502, avg loss: 2.493245, ppl: 12.100480 +epoch: 2, batch: 13408, sum loss: 4263.339355, avg loss: 2.577593, ppl: 13.165415 +epoch: 2, batch: 13409, sum loss: 4243.293457, avg loss: 2.383873, ppl: 10.846828 +epoch: 2, batch: 13410, sum loss: 4464.020020, avg loss: 2.402594, ppl: 11.051808 +epoch: 2, batch: 13411, sum loss: 3834.201660, avg loss: 2.397875, ppl: 10.999772 +epoch: 2, batch: 13412, sum loss: 4921.958008, avg loss: 2.873297, ppl: 17.695263 +epoch: 2, batch: 13413, sum loss: 4355.729980, avg loss: 2.479072, ppl: 11.930189 +epoch: 2, batch: 13414, sum loss: 3331.449219, avg loss: 2.329685, ppl: 10.274704 +epoch: 2, batch: 13415, sum loss: 4605.503418, avg loss: 2.631716, ppl: 13.897601 +epoch: 2, batch: 13416, sum loss: 4455.084961, avg loss: 2.437136, ppl: 11.440231 +epoch: 2, batch: 13417, sum loss: 3830.783691, avg loss: 2.460362, ppl: 11.709052 +epoch: 2, batch: 13418, sum loss: 3962.363037, avg loss: 2.558014, ppl: 12.910148 +epoch: 2, batch: 13419, sum loss: 3859.822998, avg loss: 2.498267, ppl: 12.161405 +epoch: 2, batch: 13420, sum loss: 4287.620605, avg loss: 2.331496, ppl: 10.293326 +epoch: 2, batch: 13421, sum loss: 3866.638428, avg loss: 2.433378, ppl: 11.397323 +epoch: 2, batch: 13422, sum loss: 4245.381348, avg loss: 2.533044, ppl: 12.591776 +epoch: 2, batch: 13423, sum loss: 4419.880859, avg loss: 2.596875, ppl: 13.421725 +epoch: 2, batch: 13424, sum loss: 4186.000000, avg loss: 2.524729, ppl: 12.487505 +epoch: 2, batch: 13425, sum loss: 4099.915039, avg loss: 2.548114, ppl: 12.782970 +epoch: 2, batch: 13426, sum loss: 4275.813477, avg loss: 2.580455, ppl: 13.203138 +epoch: 2, batch: 13427, sum loss: 4306.427734, avg loss: 2.302902, ppl: 10.003174 +epoch: 2, batch: 13428, sum loss: 3950.677734, avg loss: 2.339063, ppl: 10.371515 +epoch: 2, batch: 13429, sum loss: 3894.727783, avg loss: 2.341989, ppl: 10.401906 +epoch: 2, batch: 13430, sum loss: 3767.108887, avg loss: 2.338367, ppl: 10.364298 +epoch: 2, batch: 13431, sum loss: 4473.478027, avg loss: 2.664371, ppl: 14.358909 +epoch: 2, batch: 13432, sum loss: 3513.171143, avg loss: 2.394799, ppl: 10.965999 +epoch: 2, batch: 13433, sum loss: 4341.714355, avg loss: 2.524252, ppl: 12.481561 +epoch: 2, batch: 13434, sum loss: 5003.736328, avg loss: 2.556840, ppl: 12.895007 +epoch: 2, batch: 13435, sum loss: 3859.430664, avg loss: 2.377961, ppl: 10.782893 +epoch: 2, batch: 13436, sum loss: 3766.437500, avg loss: 2.455305, ppl: 11.649981 +epoch: 2, batch: 13437, sum loss: 3845.207031, avg loss: 2.466457, ppl: 11.780639 +epoch: 2, batch: 13438, sum loss: 3772.612305, avg loss: 2.424558, ppl: 11.297235 +epoch: 2, batch: 13439, sum loss: 3943.627441, avg loss: 2.537727, ppl: 12.650881 +epoch: 2, batch: 13440, sum loss: 3811.609375, avg loss: 2.598234, ppl: 13.439981 +epoch: 2, batch: 13441, sum loss: 4696.078613, avg loss: 2.765653, ppl: 15.889411 +epoch: 2, batch: 13442, sum loss: 3666.920166, avg loss: 2.477649, ppl: 11.913220 +epoch: 2, batch: 13443, sum loss: 4262.751953, avg loss: 2.569471, ppl: 13.058909 +epoch: 2, batch: 13444, sum loss: 3943.950928, avg loss: 2.393174, ppl: 10.948191 +epoch: 2, batch: 13445, sum loss: 5438.142578, avg loss: 3.007822, ppl: 20.243267 +epoch: 2, batch: 13446, sum loss: 4908.631348, avg loss: 2.630563, ppl: 13.881587 +epoch: 2, batch: 13447, sum loss: 3511.505371, avg loss: 2.232362, ppl: 9.321856 +epoch: 2, batch: 13448, sum loss: 3742.411621, avg loss: 2.294550, ppl: 9.919975 +epoch: 2, batch: 13449, sum loss: 4140.357422, avg loss: 2.347142, ppl: 10.455640 +epoch: 2, batch: 13450, sum loss: 5648.239746, avg loss: 2.934150, ppl: 18.805521 +epoch: 2, batch: 13451, sum loss: 4391.598633, avg loss: 2.482532, ppl: 11.971536 +epoch: 2, batch: 13452, sum loss: 4351.847656, avg loss: 2.495325, ppl: 12.125678 +epoch: 2, batch: 13453, sum loss: 4740.704590, avg loss: 2.751425, ppl: 15.664931 +epoch: 2, batch: 13454, sum loss: 4117.302734, avg loss: 2.591128, ppl: 13.344817 +epoch: 2, batch: 13455, sum loss: 3859.082764, avg loss: 2.538870, ppl: 12.665356 +epoch: 2, batch: 13456, sum loss: 3735.960938, avg loss: 2.278025, ppl: 9.757390 +epoch: 2, batch: 13457, sum loss: 4263.707031, avg loss: 2.476020, ppl: 11.893837 +epoch: 2, batch: 13458, sum loss: 3787.982910, avg loss: 2.495378, ppl: 12.126311 +epoch: 2, batch: 13459, sum loss: 4897.431641, avg loss: 2.394832, ppl: 10.966357 +epoch: 2, batch: 13460, sum loss: 3780.879395, avg loss: 2.426752, ppl: 11.322046 +epoch: 2, batch: 13461, sum loss: 4719.928711, avg loss: 2.633889, ppl: 13.927830 +epoch: 2, batch: 13462, sum loss: 4323.598145, avg loss: 2.509343, ppl: 12.296850 +epoch: 2, batch: 13463, sum loss: 3813.654297, avg loss: 2.494215, ppl: 12.112219 +epoch: 2, batch: 13464, sum loss: 4562.488281, avg loss: 2.730394, ppl: 15.338931 +epoch: 2, batch: 13465, sum loss: 3553.358154, avg loss: 2.305878, ppl: 10.032985 +epoch: 2, batch: 13466, sum loss: 3999.040039, avg loss: 2.560205, ppl: 12.938469 +epoch: 2, batch: 13467, sum loss: 4383.980957, avg loss: 2.522429, ppl: 12.458819 +epoch: 2, batch: 13468, sum loss: 3938.508057, avg loss: 2.394230, ppl: 10.959752 +epoch: 2, batch: 13469, sum loss: 4398.863281, avg loss: 2.544166, ppl: 12.732609 +epoch: 2, batch: 13470, sum loss: 4514.888672, avg loss: 2.637201, ppl: 13.974040 +epoch: 2, batch: 13471, sum loss: 4222.710938, avg loss: 2.585861, ppl: 13.274714 +epoch: 2, batch: 13472, sum loss: 3886.148926, avg loss: 2.437985, ppl: 11.449950 +epoch: 2, batch: 13473, sum loss: 4473.295898, avg loss: 2.385758, ppl: 10.867296 +epoch: 2, batch: 13474, sum loss: 4579.948730, avg loss: 2.670524, ppl: 14.447536 +epoch: 2, batch: 13475, sum loss: 3977.467285, avg loss: 2.698417, ppl: 14.856202 +epoch: 2, batch: 13476, sum loss: 3619.573730, avg loss: 2.457280, ppl: 11.673022 +epoch: 2, batch: 13477, sum loss: 3911.768311, avg loss: 2.336779, ppl: 10.347857 +epoch: 2, batch: 13478, sum loss: 4207.056641, avg loss: 2.637653, ppl: 13.980354 +epoch: 2, batch: 13479, sum loss: 4902.249023, avg loss: 2.604808, ppl: 13.528628 +epoch: 2, batch: 13480, sum loss: 3601.496826, avg loss: 2.518529, ppl: 12.410333 +epoch: 2, batch: 13481, sum loss: 4126.247070, avg loss: 2.554952, ppl: 12.870678 +epoch: 2, batch: 13482, sum loss: 3563.721680, avg loss: 2.265557, ppl: 9.636494 +epoch: 2, batch: 13483, sum loss: 4139.180664, avg loss: 2.544057, ppl: 12.731215 +epoch: 2, batch: 13484, sum loss: 4431.934570, avg loss: 2.396936, ppl: 10.989452 +epoch: 2, batch: 13485, sum loss: 3864.539307, avg loss: 2.438195, ppl: 11.452351 +epoch: 2, batch: 13486, sum loss: 4756.289062, avg loss: 2.783083, ppl: 16.168797 +epoch: 2, batch: 13487, sum loss: 4275.881348, avg loss: 2.588306, ppl: 13.307209 +epoch: 2, batch: 13488, sum loss: 4715.207031, avg loss: 2.759045, ppl: 15.784756 +epoch: 2, batch: 13489, sum loss: 4572.437012, avg loss: 2.531803, ppl: 12.576165 +epoch: 2, batch: 13490, sum loss: 4288.378418, avg loss: 2.394404, ppl: 10.961667 +epoch: 2, batch: 13491, sum loss: 4344.198242, avg loss: 2.457126, ppl: 11.671219 +epoch: 2, batch: 13492, sum loss: 4119.157715, avg loss: 2.479926, ppl: 11.940385 +epoch: 2, batch: 13493, sum loss: 4860.211426, avg loss: 2.820784, ppl: 16.790014 +epoch: 2, batch: 13494, sum loss: 5735.796875, avg loss: 3.072200, ppl: 21.589338 +epoch: 2, batch: 13495, sum loss: 4321.668457, avg loss: 2.537680, ppl: 12.650284 +epoch: 2, batch: 13496, sum loss: 4864.366699, avg loss: 2.743580, ppl: 15.542525 +epoch: 2, batch: 13497, sum loss: 4583.353027, avg loss: 2.614577, ppl: 13.661434 +epoch: 2, batch: 13498, sum loss: 4089.321289, avg loss: 2.380280, ppl: 10.807932 +epoch: 2, batch: 13499, sum loss: 4810.832520, avg loss: 2.668238, ppl: 14.414544 +epoch: 2, batch: 13500, sum loss: 4587.573242, avg loss: 2.517878, ppl: 12.402249 +epoch: 2, batch: 13501, sum loss: 4076.957764, avg loss: 2.328360, ppl: 10.261098 +epoch: 2, batch: 13502, sum loss: 3647.419434, avg loss: 2.436486, ppl: 11.432793 +epoch: 2, batch: 13503, sum loss: 4648.989258, avg loss: 2.459783, ppl: 11.702268 +epoch: 2, batch: 13504, sum loss: 4229.596191, avg loss: 2.520617, ppl: 12.436274 +epoch: 2, batch: 13505, sum loss: 4006.520508, avg loss: 2.284219, ppl: 9.818018 +epoch: 2, batch: 13506, sum loss: 3709.738525, avg loss: 2.567293, ppl: 13.030505 +epoch: 2, batch: 13507, sum loss: 4066.315918, avg loss: 2.353192, ppl: 10.519094 +epoch: 2, batch: 13508, sum loss: 3873.547363, avg loss: 2.608449, ppl: 13.577978 +epoch: 2, batch: 13509, sum loss: 3677.388916, avg loss: 2.271395, ppl: 9.692915 +epoch: 2, batch: 13510, sum loss: 4094.510010, avg loss: 2.345080, ppl: 10.434112 +epoch: 2, batch: 13511, sum loss: 3745.662598, avg loss: 2.464252, ppl: 11.754683 +epoch: 2, batch: 13512, sum loss: 3506.713623, avg loss: 2.435218, ppl: 11.418306 +epoch: 2, batch: 13513, sum loss: 4617.771973, avg loss: 2.511024, ppl: 12.317531 +epoch: 2, batch: 13514, sum loss: 3958.425293, avg loss: 2.386031, ppl: 10.870263 +epoch: 2, batch: 13515, sum loss: 4195.005371, avg loss: 2.477853, ppl: 11.915658 +epoch: 2, batch: 13516, sum loss: 3978.113281, avg loss: 2.583190, ppl: 13.239310 +epoch: 2, batch: 13517, sum loss: 3983.375977, avg loss: 2.525920, ppl: 12.502394 +epoch: 2, batch: 13518, sum loss: 3880.840332, avg loss: 2.554865, ppl: 12.869564 +epoch: 2, batch: 13519, sum loss: 4037.000977, avg loss: 2.668210, ppl: 14.414139 +epoch: 2, batch: 13520, sum loss: 4480.029785, avg loss: 2.461555, ppl: 11.723026 +epoch: 2, batch: 13521, sum loss: 3669.757080, avg loss: 2.285029, ppl: 9.825973 +epoch: 2, batch: 13522, sum loss: 4140.868164, avg loss: 2.227471, ppl: 9.276373 +epoch: 2, batch: 13523, sum loss: 3921.747559, avg loss: 2.491580, ppl: 12.080351 +epoch: 2, batch: 13524, sum loss: 4315.376953, avg loss: 2.484385, ppl: 11.993745 +epoch: 2, batch: 13525, sum loss: 3626.511963, avg loss: 2.392158, ppl: 10.937074 +epoch: 2, batch: 13526, sum loss: 3641.011230, avg loss: 2.433831, ppl: 11.402481 +epoch: 2, batch: 13527, sum loss: 4187.887695, avg loss: 2.638871, ppl: 13.997388 +epoch: 2, batch: 13528, sum loss: 4433.244141, avg loss: 2.456091, ppl: 11.659146 +epoch: 2, batch: 13529, sum loss: 4522.820312, avg loss: 2.407036, ppl: 11.101007 +epoch: 2, batch: 13530, sum loss: 3832.139648, avg loss: 2.371373, ppl: 10.712094 +epoch: 2, batch: 13531, sum loss: 5470.180176, avg loss: 2.853511, ppl: 17.348583 +epoch: 2, batch: 13532, sum loss: 4189.708984, avg loss: 2.511816, ppl: 12.327296 +epoch: 2, batch: 13533, sum loss: 3603.330078, avg loss: 2.355118, ppl: 10.539371 +epoch: 2, batch: 13534, sum loss: 3874.670654, avg loss: 2.490148, ppl: 12.063065 +epoch: 2, batch: 13535, sum loss: 4276.013184, avg loss: 2.604149, ppl: 13.519720 +epoch: 2, batch: 13536, sum loss: 4357.110352, avg loss: 2.647090, ppl: 14.112913 +epoch: 2, batch: 13537, sum loss: 4172.361328, avg loss: 2.499917, ppl: 12.181483 +epoch: 2, batch: 13538, sum loss: 4768.541992, avg loss: 2.474594, ppl: 11.876880 +epoch: 2, batch: 13539, sum loss: 4541.192871, avg loss: 2.772401, ppl: 15.996994 +epoch: 2, batch: 13540, sum loss: 4131.608887, avg loss: 2.453449, ppl: 11.628389 +epoch: 2, batch: 13541, sum loss: 4034.425293, avg loss: 2.460016, ppl: 11.704993 +epoch: 2, batch: 13542, sum loss: 5115.684082, avg loss: 2.598113, ppl: 13.438354 +epoch: 2, batch: 13543, sum loss: 4456.849609, avg loss: 2.361871, ppl: 10.610781 +epoch: 2, batch: 13544, sum loss: 5054.337891, avg loss: 2.644866, ppl: 14.081552 +epoch: 2, batch: 13545, sum loss: 4275.959473, avg loss: 2.709733, ppl: 15.025270 +epoch: 2, batch: 13546, sum loss: 4017.236816, avg loss: 2.545777, ppl: 12.753138 +epoch: 2, batch: 13547, sum loss: 3863.692383, avg loss: 2.327526, ppl: 10.252542 +epoch: 2, batch: 13548, sum loss: 4174.894531, avg loss: 2.511970, ppl: 12.329198 +epoch: 2, batch: 13549, sum loss: 4187.280762, avg loss: 2.502858, ppl: 12.217358 +epoch: 2, batch: 13550, sum loss: 4057.095215, avg loss: 2.624253, ppl: 13.794267 +epoch: 2, batch: 13551, sum loss: 3546.865723, avg loss: 2.439385, ppl: 11.465986 +epoch: 2, batch: 13552, sum loss: 4941.664062, avg loss: 2.649686, ppl: 14.149593 +epoch: 2, batch: 13553, sum loss: 3477.172607, avg loss: 2.133235, ppl: 8.442131 +epoch: 2, batch: 13554, sum loss: 3517.127197, avg loss: 2.400769, ppl: 11.031662 +epoch: 2, batch: 13555, sum loss: 3691.683105, avg loss: 2.262061, ppl: 9.602859 +epoch: 2, batch: 13556, sum loss: 4369.678223, avg loss: 2.456255, ppl: 11.661061 +epoch: 2, batch: 13557, sum loss: 3728.345459, avg loss: 2.524269, ppl: 12.481772 +epoch: 2, batch: 13558, sum loss: 3818.595459, avg loss: 2.289326, ppl: 9.868283 +epoch: 2, batch: 13559, sum loss: 4421.465332, avg loss: 2.628695, ppl: 13.855680 +epoch: 2, batch: 13560, sum loss: 2937.528320, avg loss: 2.142617, ppl: 8.521711 +epoch: 2, batch: 13561, sum loss: 4477.583984, avg loss: 2.539753, ppl: 12.676536 +epoch: 2, batch: 13562, sum loss: 5158.828125, avg loss: 2.625358, ppl: 13.809515 +epoch: 2, batch: 13563, sum loss: 3980.610840, avg loss: 2.424245, ppl: 11.293703 +epoch: 2, batch: 13564, sum loss: 4339.826660, avg loss: 2.440847, ppl: 11.482767 +epoch: 2, batch: 13565, sum loss: 4558.179688, avg loss: 2.419416, ppl: 11.239296 +epoch: 2, batch: 13566, sum loss: 4680.308594, avg loss: 2.642749, ppl: 14.051780 +epoch: 2, batch: 13567, sum loss: 3709.329346, avg loss: 2.630730, ppl: 13.883904 +epoch: 2, batch: 13568, sum loss: 4865.677734, avg loss: 2.608943, ppl: 13.584678 +epoch: 2, batch: 13569, sum loss: 4175.608398, avg loss: 2.343215, ppl: 10.414663 +epoch: 2, batch: 13570, sum loss: 4888.800781, avg loss: 2.746517, ppl: 15.588250 +epoch: 2, batch: 13571, sum loss: 4022.313965, avg loss: 2.426004, ppl: 11.313580 +epoch: 2, batch: 13572, sum loss: 4005.354980, avg loss: 2.473969, ppl: 11.869457 +epoch: 2, batch: 13573, sum loss: 4324.103516, avg loss: 2.525761, ppl: 12.500409 +epoch: 2, batch: 13574, sum loss: 4200.282715, avg loss: 2.545626, ppl: 12.751207 +epoch: 2, batch: 13575, sum loss: 4829.392090, avg loss: 2.536446, ppl: 12.634683 +epoch: 2, batch: 13576, sum loss: 5346.116211, avg loss: 2.705524, ppl: 14.962161 +epoch: 2, batch: 13577, sum loss: 5466.454102, avg loss: 2.727772, ppl: 15.298756 +epoch: 2, batch: 13578, sum loss: 4011.125000, avg loss: 2.276462, ppl: 9.742147 +epoch: 2, batch: 13579, sum loss: 3742.954590, avg loss: 2.419492, ppl: 11.240153 +epoch: 2, batch: 13580, sum loss: 4374.653809, avg loss: 2.349438, ppl: 10.479680 +epoch: 2, batch: 13581, sum loss: 4830.441406, avg loss: 2.645368, ppl: 14.088627 +epoch: 2, batch: 13582, sum loss: 4566.158203, avg loss: 2.604768, ppl: 13.528087 +epoch: 2, batch: 13583, sum loss: 3945.928711, avg loss: 2.379933, ppl: 10.804177 +epoch: 2, batch: 13584, sum loss: 4115.042969, avg loss: 2.443612, ppl: 11.514560 +epoch: 2, batch: 13585, sum loss: 4723.875977, avg loss: 2.631686, ppl: 13.897177 +epoch: 2, batch: 13586, sum loss: 4959.626465, avg loss: 2.643724, ppl: 14.065489 +epoch: 2, batch: 13587, sum loss: 3932.114014, avg loss: 2.498166, ppl: 12.160175 +epoch: 2, batch: 13588, sum loss: 4823.731445, avg loss: 2.709961, ppl: 15.028696 +epoch: 2, batch: 13589, sum loss: 4425.946289, avg loss: 2.623560, ppl: 13.784716 +epoch: 2, batch: 13590, sum loss: 3868.364502, avg loss: 2.274171, ppl: 9.719854 +epoch: 2, batch: 13591, sum loss: 3873.082520, avg loss: 2.583778, ppl: 13.247087 +epoch: 2, batch: 13592, sum loss: 3840.980713, avg loss: 2.376844, ppl: 10.770861 +epoch: 2, batch: 13593, sum loss: 3648.378418, avg loss: 2.706512, ppl: 14.976948 +epoch: 2, batch: 13594, sum loss: 4505.900879, avg loss: 2.635030, ppl: 13.943727 +epoch: 2, batch: 13595, sum loss: 4820.946777, avg loss: 2.634397, ppl: 13.934911 +epoch: 2, batch: 13596, sum loss: 4419.271484, avg loss: 2.550070, ppl: 12.808001 +epoch: 2, batch: 13597, sum loss: 3895.692871, avg loss: 2.314731, ppl: 10.122203 +epoch: 2, batch: 13598, sum loss: 3674.874756, avg loss: 2.380100, ppl: 10.805988 +epoch: 2, batch: 13599, sum loss: 4499.062012, avg loss: 2.655881, ppl: 14.237519 +epoch: 2, batch: 13600, sum loss: 3899.334473, avg loss: 2.382000, ppl: 10.826539 +epoch: 2, batch: 13601, sum loss: 4512.771484, avg loss: 2.577254, ppl: 13.160950 +epoch: 2, batch: 13602, sum loss: 4716.138184, avg loss: 2.676583, ppl: 14.535335 +epoch: 2, batch: 13603, sum loss: 4438.787109, avg loss: 2.502135, ppl: 12.208526 +epoch: 2, batch: 13604, sum loss: 3746.924805, avg loss: 2.388097, ppl: 10.892749 +epoch: 2, batch: 13605, sum loss: 4242.494629, avg loss: 2.431229, ppl: 11.372849 +epoch: 2, batch: 13606, sum loss: 4937.557129, avg loss: 2.752262, ppl: 15.678050 +epoch: 2, batch: 13607, sum loss: 4041.124023, avg loss: 2.372944, ppl: 10.728933 +epoch: 2, batch: 13608, sum loss: 3445.625244, avg loss: 2.266859, ppl: 9.649044 +epoch: 2, batch: 13609, sum loss: 4651.790039, avg loss: 2.698254, ppl: 14.853780 +epoch: 2, batch: 13610, sum loss: 4217.835938, avg loss: 2.432431, ppl: 11.386532 +epoch: 2, batch: 13611, sum loss: 4209.774414, avg loss: 2.401469, ppl: 11.039378 +epoch: 2, batch: 13612, sum loss: 4601.221680, avg loss: 2.489839, ppl: 12.059330 +epoch: 2, batch: 13613, sum loss: 4291.964355, avg loss: 2.372562, ppl: 10.724833 +epoch: 2, batch: 13614, sum loss: 5402.695801, avg loss: 2.796427, ppl: 16.385986 +epoch: 2, batch: 13615, sum loss: 3485.885498, avg loss: 2.337951, ppl: 10.359992 +epoch: 2, batch: 13616, sum loss: 5094.038574, avg loss: 2.617697, ppl: 13.704130 +epoch: 2, batch: 13617, sum loss: 4038.243896, avg loss: 2.383851, ppl: 10.846593 +epoch: 2, batch: 13618, sum loss: 4476.361328, avg loss: 2.598004, ppl: 13.436895 +epoch: 2, batch: 13619, sum loss: 4133.870605, avg loss: 2.743113, ppl: 15.535264 +epoch: 2, batch: 13620, sum loss: 3507.531738, avg loss: 2.179945, ppl: 8.845821 +epoch: 2, batch: 13621, sum loss: 3917.031738, avg loss: 2.239583, ppl: 9.389420 +epoch: 2, batch: 13622, sum loss: 4374.506348, avg loss: 2.543318, ppl: 12.721807 +epoch: 2, batch: 13623, sum loss: 4499.302734, avg loss: 2.413789, ppl: 11.176229 +epoch: 2, batch: 13624, sum loss: 4081.833008, avg loss: 2.482867, ppl: 11.975546 +epoch: 2, batch: 13625, sum loss: 4054.749268, avg loss: 2.558201, ppl: 12.912571 +epoch: 2, batch: 13626, sum loss: 4294.311035, avg loss: 2.453892, ppl: 11.633536 +epoch: 2, batch: 13627, sum loss: 4109.168457, avg loss: 2.602386, ppl: 13.495907 +epoch: 2, batch: 13628, sum loss: 5168.585938, avg loss: 2.792321, ppl: 16.318851 +epoch: 2, batch: 13629, sum loss: 4343.339844, avg loss: 2.691041, ppl: 14.747016 +epoch: 2, batch: 13630, sum loss: 4126.169434, avg loss: 2.639904, ppl: 14.011855 +epoch: 2, batch: 13631, sum loss: 2963.212158, avg loss: 2.213004, ppl: 9.143140 +epoch: 2, batch: 13632, sum loss: 4440.778809, avg loss: 2.439988, ppl: 11.472907 +epoch: 2, batch: 13633, sum loss: 3875.739258, avg loss: 2.484448, ppl: 11.994503 +epoch: 2, batch: 13634, sum loss: 3658.642334, avg loss: 2.358892, ppl: 10.579227 +epoch: 2, batch: 13635, sum loss: 3386.531006, avg loss: 2.396696, ppl: 10.986812 +epoch: 2, batch: 13636, sum loss: 3998.862305, avg loss: 2.600040, ppl: 13.464282 +epoch: 2, batch: 13637, sum loss: 3930.284424, avg loss: 2.464128, ppl: 11.753232 +epoch: 2, batch: 13638, sum loss: 3902.968994, avg loss: 2.397401, ppl: 10.994565 +epoch: 2, batch: 13639, sum loss: 4093.650879, avg loss: 2.566552, ppl: 13.020856 +epoch: 2, batch: 13640, sum loss: 4328.458008, avg loss: 2.698540, ppl: 14.858023 +epoch: 2, batch: 13641, sum loss: 4782.328613, avg loss: 2.624769, ppl: 13.801382 +epoch: 2, batch: 13642, sum loss: 4195.387695, avg loss: 2.545745, ppl: 12.752724 +epoch: 2, batch: 13643, sum loss: 4002.804688, avg loss: 2.342191, ppl: 10.404009 +epoch: 2, batch: 13644, sum loss: 5198.802734, avg loss: 2.760915, ppl: 15.814303 +epoch: 2, batch: 13645, sum loss: 4250.140625, avg loss: 2.686562, ppl: 14.681109 +epoch: 2, batch: 13646, sum loss: 3837.575928, avg loss: 2.585968, ppl: 13.276128 +epoch: 2, batch: 13647, sum loss: 4758.468750, avg loss: 2.562449, ppl: 12.967542 +epoch: 2, batch: 13648, sum loss: 3800.754639, avg loss: 2.503791, ppl: 12.228764 +epoch: 2, batch: 13649, sum loss: 4726.414062, avg loss: 2.582740, ppl: 13.233349 +epoch: 2, batch: 13650, sum loss: 3670.042969, avg loss: 2.381598, ppl: 10.822186 +epoch: 2, batch: 13651, sum loss: 4899.003906, avg loss: 2.671212, ppl: 14.457487 +epoch: 2, batch: 13652, sum loss: 3622.065918, avg loss: 2.376684, ppl: 10.769130 +epoch: 2, batch: 13653, sum loss: 5541.451172, avg loss: 2.622551, ppl: 13.770814 +epoch: 2, batch: 13654, sum loss: 4262.153809, avg loss: 2.448107, ppl: 11.566428 +epoch: 2, batch: 13655, sum loss: 4310.546875, avg loss: 2.654278, ppl: 14.214713 +epoch: 2, batch: 13656, sum loss: 3336.209961, avg loss: 2.331384, ppl: 10.292173 +epoch: 2, batch: 13657, sum loss: 5066.757324, avg loss: 2.992769, ppl: 19.940817 +epoch: 2, batch: 13658, sum loss: 4180.994629, avg loss: 2.313777, ppl: 10.112545 +epoch: 2, batch: 13659, sum loss: 3929.612793, avg loss: 2.482383, ppl: 11.969757 +epoch: 2, batch: 13660, sum loss: 4388.177734, avg loss: 2.587369, ppl: 13.294746 +epoch: 2, batch: 13661, sum loss: 5045.647461, avg loss: 2.763224, ppl: 15.850869 +epoch: 2, batch: 13662, sum loss: 4770.506836, avg loss: 2.751157, ppl: 15.660746 +epoch: 2, batch: 13663, sum loss: 4788.716309, avg loss: 2.688780, ppl: 14.713708 +epoch: 2, batch: 13664, sum loss: 4116.455566, avg loss: 2.290738, ppl: 9.882227 +epoch: 2, batch: 13665, sum loss: 4178.246094, avg loss: 2.436295, ppl: 11.430613 +epoch: 2, batch: 13666, sum loss: 4380.896484, avg loss: 2.648668, ppl: 14.135196 +epoch: 2, batch: 13667, sum loss: 5405.497070, avg loss: 2.782037, ppl: 16.151886 +epoch: 2, batch: 13668, sum loss: 4145.318359, avg loss: 2.431272, ppl: 11.373338 +epoch: 2, batch: 13669, sum loss: 3435.759033, avg loss: 2.298167, ppl: 9.955912 +epoch: 2, batch: 13670, sum loss: 4102.414551, avg loss: 2.291852, ppl: 9.893238 +epoch: 2, batch: 13671, sum loss: 3589.144287, avg loss: 2.324575, ppl: 10.222339 +epoch: 2, batch: 13672, sum loss: 3439.346436, avg loss: 2.276206, ppl: 9.739654 +epoch: 2, batch: 13673, sum loss: 3982.877197, avg loss: 2.447989, ppl: 11.565061 +epoch: 2, batch: 13674, sum loss: 3769.151611, avg loss: 2.354249, ppl: 10.530212 +epoch: 2, batch: 13675, sum loss: 4068.916992, avg loss: 2.623415, ppl: 13.782715 +epoch: 2, batch: 13676, sum loss: 4349.589844, avg loss: 2.481226, ppl: 11.955918 +epoch: 2, batch: 13677, sum loss: 4988.345703, avg loss: 2.550279, ppl: 12.810677 +epoch: 2, batch: 13678, sum loss: 3156.133789, avg loss: 2.339610, ppl: 10.377187 +epoch: 2, batch: 13679, sum loss: 4633.203613, avg loss: 2.779366, ppl: 16.108809 +epoch: 2, batch: 13680, sum loss: 4201.548828, avg loss: 2.406385, ppl: 11.093789 +epoch: 2, batch: 13681, sum loss: 4553.616699, avg loss: 2.590225, ppl: 13.332777 +epoch: 2, batch: 13682, sum loss: 4114.441406, avg loss: 2.632400, ppl: 13.907107 +epoch: 2, batch: 13683, sum loss: 3888.349121, avg loss: 2.581905, ppl: 13.222304 +epoch: 2, batch: 13684, sum loss: 3552.285645, avg loss: 2.317212, ppl: 10.147343 +epoch: 2, batch: 13685, sum loss: 4670.196289, avg loss: 2.567453, ppl: 13.032584 +epoch: 2, batch: 13686, sum loss: 4602.519043, avg loss: 2.661954, ppl: 14.324257 +epoch: 2, batch: 13687, sum loss: 5361.671875, avg loss: 2.659560, ppl: 14.289996 +epoch: 2, batch: 13688, sum loss: 3936.264160, avg loss: 2.406029, ppl: 11.089841 +epoch: 2, batch: 13689, sum loss: 5079.716309, avg loss: 2.792587, ppl: 16.323198 +epoch: 2, batch: 13690, sum loss: 4059.303223, avg loss: 2.490370, ppl: 12.065740 +epoch: 2, batch: 13691, sum loss: 4539.454590, avg loss: 2.633095, ppl: 13.916769 +epoch: 2, batch: 13692, sum loss: 4808.920898, avg loss: 2.688050, ppl: 14.702971 +epoch: 2, batch: 13693, sum loss: 4014.390625, avg loss: 2.518438, ppl: 12.409203 +epoch: 2, batch: 13694, sum loss: 4484.251465, avg loss: 2.455779, ppl: 11.655505 +epoch: 2, batch: 13695, sum loss: 5254.155273, avg loss: 2.537014, ppl: 12.641863 +epoch: 2, batch: 13696, sum loss: 3969.154297, avg loss: 2.379589, ppl: 10.800461 +epoch: 2, batch: 13697, sum loss: 4419.349121, avg loss: 2.678393, ppl: 14.561679 +epoch: 2, batch: 13698, sum loss: 4263.986328, avg loss: 2.363629, ppl: 10.629455 +epoch: 2, batch: 13699, sum loss: 4111.887207, avg loss: 2.511843, ppl: 12.327632 +epoch: 2, batch: 13700, sum loss: 3704.808594, avg loss: 2.355250, ppl: 10.540765 +epoch: 2, batch: 13701, sum loss: 4505.703125, avg loss: 2.577634, ppl: 13.165946 +epoch: 2, batch: 13702, sum loss: 3790.197754, avg loss: 2.438995, ppl: 11.461512 +epoch: 2, batch: 13703, sum loss: 4150.856934, avg loss: 2.518724, ppl: 12.412747 +epoch: 2, batch: 13704, sum loss: 4398.276855, avg loss: 2.337023, ppl: 10.350375 +epoch: 2, batch: 13705, sum loss: 4168.669434, avg loss: 2.423645, ppl: 11.286925 +epoch: 2, batch: 13706, sum loss: 3559.524902, avg loss: 2.041012, ppl: 7.698395 +epoch: 2, batch: 13707, sum loss: 3989.306152, avg loss: 2.471689, ppl: 11.842435 +epoch: 2, batch: 13708, sum loss: 4321.254395, avg loss: 2.480628, ppl: 11.948769 +epoch: 2, batch: 13709, sum loss: 4097.519043, avg loss: 2.483345, ppl: 11.981273 +epoch: 2, batch: 13710, sum loss: 4866.193359, avg loss: 2.581535, ppl: 13.217409 +epoch: 2, batch: 13711, sum loss: 3973.300537, avg loss: 2.532378, ppl: 12.583390 +epoch: 2, batch: 13712, sum loss: 4488.000488, avg loss: 2.521349, ppl: 12.445371 +epoch: 2, batch: 13713, sum loss: 3990.581543, avg loss: 2.409771, ppl: 11.131416 +epoch: 2, batch: 13714, sum loss: 4250.662109, avg loss: 2.747681, ppl: 15.606397 +epoch: 2, batch: 13715, sum loss: 4040.374512, avg loss: 2.291761, ppl: 9.892342 +epoch: 2, batch: 13716, sum loss: 4284.270508, avg loss: 2.459398, ppl: 11.697762 +epoch: 2, batch: 13717, sum loss: 4568.721191, avg loss: 2.579741, ppl: 13.193721 +epoch: 2, batch: 13718, sum loss: 4197.114258, avg loss: 2.406602, ppl: 11.096193 +epoch: 2, batch: 13719, sum loss: 5509.017090, avg loss: 2.582755, ppl: 13.233551 +epoch: 2, batch: 13720, sum loss: 4315.774414, avg loss: 2.634783, ppl: 13.940284 +epoch: 2, batch: 13721, sum loss: 4356.864258, avg loss: 2.684451, ppl: 14.650158 +epoch: 2, batch: 13722, sum loss: 3990.813721, avg loss: 2.376899, ppl: 10.771451 +epoch: 2, batch: 13723, sum loss: 4992.038086, avg loss: 2.521231, ppl: 12.443908 +epoch: 2, batch: 13724, sum loss: 3946.254883, avg loss: 2.260169, ppl: 9.584709 +epoch: 2, batch: 13725, sum loss: 4635.712891, avg loss: 2.607263, ppl: 13.561876 +epoch: 2, batch: 13726, sum loss: 3964.251465, avg loss: 2.445559, ppl: 11.536997 +epoch: 2, batch: 13727, sum loss: 3859.503174, avg loss: 2.378006, ppl: 10.783374 +epoch: 2, batch: 13728, sum loss: 4850.495605, avg loss: 2.558278, ppl: 12.913566 +epoch: 2, batch: 13729, sum loss: 4137.745605, avg loss: 2.709722, ppl: 15.025099 +epoch: 2, batch: 13730, sum loss: 4566.420898, avg loss: 2.615361, ppl: 13.672157 +epoch: 2, batch: 13731, sum loss: 4023.035156, avg loss: 2.480293, ppl: 11.944761 +epoch: 2, batch: 13732, sum loss: 5138.024414, avg loss: 2.765352, ppl: 15.884634 +epoch: 2, batch: 13733, sum loss: 3033.127686, avg loss: 2.169619, ppl: 8.754951 +epoch: 2, batch: 13734, sum loss: 3836.645752, avg loss: 2.631444, ppl: 13.893821 +epoch: 2, batch: 13735, sum loss: 4050.309570, avg loss: 2.462194, ppl: 11.730522 +epoch: 2, batch: 13736, sum loss: 4478.977051, avg loss: 2.685238, ppl: 14.661693 +epoch: 2, batch: 13737, sum loss: 3758.245850, avg loss: 2.300028, ppl: 9.974463 +epoch: 2, batch: 13738, sum loss: 4382.415527, avg loss: 2.423902, ppl: 11.289829 +epoch: 2, batch: 13739, sum loss: 4153.334473, avg loss: 2.582919, ppl: 13.235722 +epoch: 2, batch: 13740, sum loss: 4195.071289, avg loss: 2.453258, ppl: 11.626163 +epoch: 2, batch: 13741, sum loss: 5004.381348, avg loss: 2.558477, ppl: 12.916133 +epoch: 2, batch: 13742, sum loss: 4445.964355, avg loss: 2.537651, ppl: 12.649922 +epoch: 2, batch: 13743, sum loss: 3952.603760, avg loss: 2.471922, ppl: 11.845193 +epoch: 2, batch: 13744, sum loss: 3868.894043, avg loss: 2.273146, ppl: 9.709897 +epoch: 2, batch: 13745, sum loss: 4518.080566, avg loss: 2.595107, ppl: 13.398016 +epoch: 2, batch: 13746, sum loss: 4387.429199, avg loss: 2.613120, ppl: 13.641550 +epoch: 2, batch: 13747, sum loss: 4231.521973, avg loss: 2.558357, ppl: 12.914578 +epoch: 2, batch: 13748, sum loss: 3782.825928, avg loss: 2.503525, ppl: 12.225511 +epoch: 2, batch: 13749, sum loss: 3768.851562, avg loss: 2.271761, ppl: 9.696461 +epoch: 2, batch: 13750, sum loss: 4658.049805, avg loss: 2.667841, ppl: 14.408823 +epoch: 2, batch: 13751, sum loss: 4882.177734, avg loss: 2.895716, ppl: 18.096462 +epoch: 2, batch: 13752, sum loss: 3457.504639, avg loss: 2.267216, ppl: 9.652493 +epoch: 2, batch: 13753, sum loss: 3606.356445, avg loss: 2.459998, ppl: 11.704784 +epoch: 2, batch: 13754, sum loss: 4604.879883, avg loss: 2.669496, ppl: 14.432691 +epoch: 2, batch: 13755, sum loss: 3748.550781, avg loss: 2.578095, ppl: 13.172028 +epoch: 2, batch: 13756, sum loss: 4216.218262, avg loss: 2.451290, ppl: 11.603301 +epoch: 2, batch: 13757, sum loss: 3948.431396, avg loss: 2.414943, ppl: 11.189130 +epoch: 2, batch: 13758, sum loss: 4895.477539, avg loss: 2.862852, ppl: 17.511404 +epoch: 2, batch: 13759, sum loss: 4074.511475, avg loss: 2.278810, ppl: 9.765049 +epoch: 2, batch: 13760, sum loss: 4125.255371, avg loss: 2.473175, ppl: 11.860041 +epoch: 2, batch: 13761, sum loss: 4625.920898, avg loss: 2.587204, ppl: 13.292553 +epoch: 2, batch: 13762, sum loss: 3787.638428, avg loss: 2.432652, ppl: 11.389040 +epoch: 2, batch: 13763, sum loss: 4338.010742, avg loss: 2.493110, ppl: 12.098842 +epoch: 2, batch: 13764, sum loss: 3773.537598, avg loss: 2.439262, ppl: 11.464573 +epoch: 2, batch: 13765, sum loss: 4149.395508, avg loss: 2.486157, ppl: 12.015010 +epoch: 2, batch: 13766, sum loss: 4665.343750, avg loss: 2.785280, ppl: 16.204355 +epoch: 2, batch: 13767, sum loss: 4350.017578, avg loss: 2.555827, ppl: 12.881948 +epoch: 2, batch: 13768, sum loss: 4236.038086, avg loss: 2.357283, ppl: 10.562219 +epoch: 2, batch: 13769, sum loss: 4475.063965, avg loss: 2.465600, ppl: 11.770542 +epoch: 2, batch: 13770, sum loss: 4518.323242, avg loss: 2.622358, ppl: 13.768155 +epoch: 2, batch: 13771, sum loss: 4211.652344, avg loss: 2.440123, ppl: 11.474450 +epoch: 2, batch: 13772, sum loss: 4073.075684, avg loss: 2.415822, ppl: 11.198973 +epoch: 2, batch: 13773, sum loss: 3687.927734, avg loss: 2.391652, ppl: 10.931541 +epoch: 2, batch: 13774, sum loss: 3003.939209, avg loss: 2.065983, ppl: 7.893054 +epoch: 2, batch: 13775, sum loss: 3529.860596, avg loss: 2.492839, ppl: 12.095571 +epoch: 2, batch: 13776, sum loss: 6277.395508, avg loss: 2.921078, ppl: 18.561281 +epoch: 2, batch: 13777, sum loss: 3808.633057, avg loss: 2.421254, ppl: 11.259973 +epoch: 2, batch: 13778, sum loss: 4829.335938, avg loss: 2.513970, ppl: 12.353876 +epoch: 2, batch: 13779, sum loss: 3702.230225, avg loss: 2.334319, ppl: 10.322429 +epoch: 2, batch: 13780, sum loss: 3694.553955, avg loss: 2.365272, ppl: 10.646930 +epoch: 2, batch: 13781, sum loss: 4076.448242, avg loss: 2.618143, ppl: 13.710238 +epoch: 2, batch: 13782, sum loss: 3671.927002, avg loss: 2.364409, ppl: 10.637750 +epoch: 2, batch: 13783, sum loss: 4514.106934, avg loss: 2.765997, ppl: 15.894878 +epoch: 2, batch: 13784, sum loss: 4067.949463, avg loss: 2.471415, ppl: 11.839190 +epoch: 2, batch: 13785, sum loss: 3689.470703, avg loss: 2.446599, ppl: 11.548996 +epoch: 2, batch: 13786, sum loss: 3944.743164, avg loss: 2.687155, ppl: 14.689820 +epoch: 2, batch: 13787, sum loss: 4567.092285, avg loss: 2.653743, ppl: 14.207120 +epoch: 2, batch: 13788, sum loss: 4172.174805, avg loss: 2.473133, ppl: 11.859543 +epoch: 2, batch: 13789, sum loss: 5360.870117, avg loss: 2.496912, ppl: 12.144936 +epoch: 2, batch: 13790, sum loss: 4187.132324, avg loss: 2.444327, ppl: 11.522794 +epoch: 2, batch: 13791, sum loss: 4687.939453, avg loss: 2.783812, ppl: 16.180584 +epoch: 2, batch: 13792, sum loss: 4365.283691, avg loss: 2.410427, ppl: 11.138721 +epoch: 2, batch: 13793, sum loss: 4568.038086, avg loss: 2.645071, ppl: 14.084449 +epoch: 2, batch: 13794, sum loss: 3945.964844, avg loss: 2.407544, ppl: 11.106651 +epoch: 2, batch: 13795, sum loss: 3783.198730, avg loss: 2.464625, ppl: 11.759068 +epoch: 2, batch: 13796, sum loss: 3837.029785, avg loss: 2.345373, ppl: 10.437164 +epoch: 2, batch: 13797, sum loss: 3975.781250, avg loss: 2.581676, ppl: 13.219275 +epoch: 2, batch: 13798, sum loss: 4343.149902, avg loss: 2.596025, ppl: 13.410329 +epoch: 2, batch: 13799, sum loss: 4353.542969, avg loss: 2.490585, ppl: 12.068338 +epoch: 2, batch: 13800, sum loss: 4529.921875, avg loss: 2.876141, ppl: 17.745657 +epoch: 2, batch: 13801, sum loss: 4210.449219, avg loss: 2.438014, ppl: 11.450273 +epoch: 2, batch: 13802, sum loss: 4959.473633, avg loss: 2.769109, ppl: 15.944414 +epoch: 2, batch: 13803, sum loss: 5135.491211, avg loss: 2.618813, ppl: 13.719423 +epoch: 2, batch: 13804, sum loss: 4898.101562, avg loss: 2.615110, ppl: 13.668725 +epoch: 2, batch: 13805, sum loss: 4469.448242, avg loss: 2.587984, ppl: 13.302927 +epoch: 2, batch: 13806, sum loss: 3587.937988, avg loss: 2.559157, ppl: 12.924915 +epoch: 2, batch: 13807, sum loss: 4242.209473, avg loss: 2.605780, ppl: 13.541779 +epoch: 2, batch: 13808, sum loss: 3889.547607, avg loss: 2.543851, ppl: 12.728590 +epoch: 2, batch: 13809, sum loss: 3900.911621, avg loss: 2.271935, ppl: 9.698144 +epoch: 2, batch: 13810, sum loss: 4944.607422, avg loss: 2.655536, ppl: 14.232609 +epoch: 2, batch: 13811, sum loss: 4090.100830, avg loss: 2.457993, ppl: 11.681347 +epoch: 2, batch: 13812, sum loss: 3434.355469, avg loss: 2.358761, ppl: 10.577833 +epoch: 2, batch: 13813, sum loss: 4070.678955, avg loss: 2.522106, ppl: 12.454798 +epoch: 2, batch: 13814, sum loss: 4826.248535, avg loss: 2.702267, ppl: 14.913502 +epoch: 2, batch: 13815, sum loss: 5545.999512, avg loss: 2.681818, ppl: 14.611633 +epoch: 2, batch: 13816, sum loss: 3598.259766, avg loss: 2.387697, ppl: 10.888391 +epoch: 2, batch: 13817, sum loss: 3546.242432, avg loss: 2.237377, ppl: 9.368724 +epoch: 2, batch: 13818, sum loss: 3709.782715, avg loss: 2.263443, ppl: 9.616138 +epoch: 2, batch: 13819, sum loss: 3050.622559, avg loss: 2.398288, ppl: 11.004321 +epoch: 2, batch: 13820, sum loss: 4293.666504, avg loss: 2.574141, ppl: 13.120036 +epoch: 2, batch: 13821, sum loss: 3422.538818, avg loss: 2.196752, ppl: 8.995744 +epoch: 2, batch: 13822, sum loss: 4278.898926, avg loss: 2.391783, ppl: 10.932965 +epoch: 2, batch: 13823, sum loss: 3993.904297, avg loss: 2.590080, ppl: 13.330845 +epoch: 2, batch: 13824, sum loss: 3981.849365, avg loss: 2.493331, ppl: 12.101519 +epoch: 2, batch: 13825, sum loss: 5292.032715, avg loss: 2.522418, ppl: 12.458686 +epoch: 2, batch: 13826, sum loss: 4614.810059, avg loss: 2.889674, ppl: 17.987452 +epoch: 2, batch: 13827, sum loss: 3474.841797, avg loss: 2.354229, ppl: 10.530006 +epoch: 2, batch: 13828, sum loss: 4168.124023, avg loss: 2.391351, ppl: 10.928243 +epoch: 2, batch: 13829, sum loss: 3902.214111, avg loss: 2.260842, ppl: 9.591165 +epoch: 2, batch: 13830, sum loss: 4287.870117, avg loss: 2.410270, ppl: 11.136965 +epoch: 2, batch: 13831, sum loss: 4163.412598, avg loss: 2.511105, ppl: 12.318539 +epoch: 2, batch: 13832, sum loss: 3631.671387, avg loss: 2.455491, ppl: 11.652154 +epoch: 2, batch: 13833, sum loss: 4542.809082, avg loss: 2.573829, ppl: 13.115954 +epoch: 2, batch: 13834, sum loss: 4747.981934, avg loss: 2.728725, ppl: 15.313353 +epoch: 2, batch: 13835, sum loss: 4295.903809, avg loss: 2.552527, ppl: 12.839514 +epoch: 2, batch: 13836, sum loss: 4371.925781, avg loss: 2.313188, ppl: 10.106596 +epoch: 2, batch: 13837, sum loss: 4395.196289, avg loss: 2.704736, ppl: 14.950373 +epoch: 2, batch: 13838, sum loss: 4581.832031, avg loss: 2.798920, ppl: 16.426899 +epoch: 2, batch: 13839, sum loss: 4508.128906, avg loss: 2.463459, ppl: 11.745363 +epoch: 2, batch: 13840, sum loss: 4653.622559, avg loss: 2.598337, ppl: 13.441372 +epoch: 2, batch: 13841, sum loss: 4453.699707, avg loss: 2.622909, ppl: 13.775743 +epoch: 2, batch: 13842, sum loss: 5193.289062, avg loss: 2.912669, ppl: 18.405861 +epoch: 2, batch: 13843, sum loss: 4109.160645, avg loss: 2.445929, ppl: 11.541267 +epoch: 2, batch: 13844, sum loss: 4461.407227, avg loss: 2.467592, ppl: 11.794019 +epoch: 2, batch: 13845, sum loss: 4222.315918, avg loss: 2.601550, ppl: 13.484625 +epoch: 2, batch: 13846, sum loss: 4090.591797, avg loss: 2.452393, ppl: 11.616115 +epoch: 2, batch: 13847, sum loss: 4608.169434, avg loss: 2.774334, ppl: 16.027956 +epoch: 2, batch: 13848, sum loss: 3584.750732, avg loss: 2.333822, ppl: 10.317302 +epoch: 2, batch: 13849, sum loss: 3653.245605, avg loss: 2.200750, ppl: 9.031788 +epoch: 2, batch: 13850, sum loss: 4263.927734, avg loss: 2.563998, ppl: 12.987632 +epoch: 2, batch: 13851, sum loss: 4355.028809, avg loss: 2.375902, ppl: 10.760716 +epoch: 2, batch: 13852, sum loss: 3390.396484, avg loss: 2.019295, ppl: 7.533012 +epoch: 2, batch: 13853, sum loss: 4858.455078, avg loss: 2.517334, ppl: 12.395512 +epoch: 2, batch: 13854, sum loss: 4218.296387, avg loss: 2.462520, ppl: 11.734341 +epoch: 2, batch: 13855, sum loss: 3701.228027, avg loss: 2.288948, ppl: 9.864551 +epoch: 2, batch: 13856, sum loss: 3992.697510, avg loss: 2.626775, ppl: 13.829096 +epoch: 2, batch: 13857, sum loss: 3852.954590, avg loss: 2.400595, ppl: 11.029734 +epoch: 2, batch: 13858, sum loss: 3797.832275, avg loss: 2.324255, ppl: 10.219062 +epoch: 2, batch: 13859, sum loss: 3694.215332, avg loss: 2.344045, ppl: 10.423315 +epoch: 2, batch: 13860, sum loss: 4616.778320, avg loss: 2.545082, ppl: 12.744271 +epoch: 2, batch: 13861, sum loss: 4134.716797, avg loss: 2.481823, ppl: 11.963050 +epoch: 2, batch: 13862, sum loss: 4643.097656, avg loss: 2.559591, ppl: 12.930525 +epoch: 2, batch: 13863, sum loss: 3958.468506, avg loss: 2.607687, ppl: 13.567629 +epoch: 2, batch: 13864, sum loss: 3976.010010, avg loss: 2.615796, ppl: 13.678101 +epoch: 2, batch: 13865, sum loss: 4374.842285, avg loss: 2.525890, ppl: 12.502022 +epoch: 2, batch: 13866, sum loss: 5165.035156, avg loss: 2.592889, ppl: 13.368341 +epoch: 2, batch: 13867, sum loss: 3879.483398, avg loss: 2.403645, ppl: 11.063433 +epoch: 2, batch: 13868, sum loss: 4876.203613, avg loss: 2.535727, ppl: 12.625609 +epoch: 2, batch: 13869, sum loss: 4187.276855, avg loss: 2.525499, ppl: 12.497125 +epoch: 2, batch: 13870, sum loss: 3937.789062, avg loss: 2.508146, ppl: 12.282139 +epoch: 2, batch: 13871, sum loss: 4314.066895, avg loss: 2.485062, ppl: 12.001860 +epoch: 2, batch: 13872, sum loss: 4501.419434, avg loss: 2.600473, ppl: 13.470114 +epoch: 2, batch: 13873, sum loss: 4298.956055, avg loss: 2.474932, ppl: 11.880896 +epoch: 2, batch: 13874, sum loss: 4436.511230, avg loss: 2.400710, ppl: 11.031001 +epoch: 2, batch: 13875, sum loss: 4322.809570, avg loss: 2.613549, ppl: 13.647396 +epoch: 2, batch: 13876, sum loss: 4615.690430, avg loss: 2.449942, ppl: 11.587673 +epoch: 2, batch: 13877, sum loss: 3566.489014, avg loss: 2.442801, ppl: 11.505219 +epoch: 2, batch: 13878, sum loss: 3666.182129, avg loss: 2.297107, ppl: 9.945364 +epoch: 2, batch: 13879, sum loss: 4501.955566, avg loss: 2.387039, ppl: 10.881228 +epoch: 2, batch: 13880, sum loss: 4118.223633, avg loss: 2.279039, ppl: 9.767288 +epoch: 2, batch: 13881, sum loss: 3757.798828, avg loss: 2.326810, ppl: 10.245211 +epoch: 2, batch: 13882, sum loss: 3653.836670, avg loss: 2.439143, ppl: 11.463215 +epoch: 2, batch: 13883, sum loss: 3615.173340, avg loss: 2.333876, ppl: 10.317853 +epoch: 2, batch: 13884, sum loss: 5073.227051, avg loss: 2.984251, ppl: 19.771698 +epoch: 2, batch: 13885, sum loss: 3803.316650, avg loss: 2.333323, ppl: 10.312152 +epoch: 2, batch: 13886, sum loss: 4606.959961, avg loss: 2.681583, ppl: 14.608202 +epoch: 2, batch: 13887, sum loss: 3516.298340, avg loss: 2.245401, ppl: 9.444203 +epoch: 2, batch: 13888, sum loss: 4187.701660, avg loss: 2.572298, ppl: 13.095888 +epoch: 2, batch: 13889, sum loss: 4212.976562, avg loss: 2.484066, ppl: 11.989923 +epoch: 2, batch: 13890, sum loss: 3397.526855, avg loss: 2.354488, ppl: 10.532739 +epoch: 2, batch: 13891, sum loss: 4697.748047, avg loss: 2.729662, ppl: 15.327701 +epoch: 2, batch: 13892, sum loss: 4609.623535, avg loss: 2.535547, ppl: 12.623328 +epoch: 2, batch: 13893, sum loss: 4183.525391, avg loss: 2.400187, ppl: 11.025235 +epoch: 2, batch: 13894, sum loss: 4567.437500, avg loss: 2.669455, ppl: 14.432106 +epoch: 2, batch: 13895, sum loss: 4106.253906, avg loss: 2.466219, ppl: 11.777827 +epoch: 2, batch: 13896, sum loss: 4442.826660, avg loss: 2.702449, ppl: 14.916222 +epoch: 2, batch: 13897, sum loss: 4619.839355, avg loss: 2.771349, ppl: 15.980180 +epoch: 2, batch: 13898, sum loss: 5043.455078, avg loss: 2.860723, ppl: 17.474161 +epoch: 2, batch: 13899, sum loss: 4146.575195, avg loss: 2.578716, ppl: 13.180202 +epoch: 2, batch: 13900, sum loss: 4461.978027, avg loss: 2.496910, ppl: 12.144906 +epoch: 2, batch: 13901, sum loss: 4752.248047, avg loss: 2.550858, ppl: 12.818094 +epoch: 2, batch: 13902, sum loss: 5335.098633, avg loss: 2.691775, ppl: 14.757853 +epoch: 2, batch: 13903, sum loss: 3616.769775, avg loss: 2.424109, ppl: 11.292158 +epoch: 2, batch: 13904, sum loss: 4906.363770, avg loss: 2.734874, ppl: 15.407802 +epoch: 2, batch: 13905, sum loss: 4286.905273, avg loss: 2.609194, ppl: 13.588088 +epoch: 2, batch: 13906, sum loss: 3659.843018, avg loss: 2.399897, ppl: 11.022042 +epoch: 2, batch: 13907, sum loss: 4593.335938, avg loss: 2.650511, ppl: 14.161274 +epoch: 2, batch: 13908, sum loss: 4692.290527, avg loss: 2.977342, ppl: 19.635550 +epoch: 2, batch: 13909, sum loss: 4513.630859, avg loss: 2.485479, ppl: 12.006874 +epoch: 2, batch: 13910, sum loss: 5508.558105, avg loss: 2.897716, ppl: 18.132685 +epoch: 2, batch: 13911, sum loss: 4083.564453, avg loss: 2.459979, ppl: 11.704561 +epoch: 2, batch: 13912, sum loss: 3467.572510, avg loss: 2.327230, ppl: 10.249511 +epoch: 2, batch: 13913, sum loss: 4326.614258, avg loss: 2.302615, ppl: 10.000303 +epoch: 2, batch: 13914, sum loss: 4375.543945, avg loss: 2.516127, ppl: 12.380549 +epoch: 2, batch: 13915, sum loss: 4365.401855, avg loss: 2.437410, ppl: 11.443365 +epoch: 2, batch: 13916, sum loss: 4750.217773, avg loss: 2.484423, ppl: 11.994203 +epoch: 2, batch: 13917, sum loss: 5356.600098, avg loss: 2.726005, ppl: 15.271755 +epoch: 2, batch: 13918, sum loss: 4493.319824, avg loss: 2.770234, ppl: 15.962371 +epoch: 2, batch: 13919, sum loss: 4097.162109, avg loss: 2.504378, ppl: 12.235944 +epoch: 2, batch: 13920, sum loss: 4126.775879, avg loss: 2.449125, ppl: 11.578212 +epoch: 2, batch: 13921, sum loss: 3643.703125, avg loss: 2.424287, ppl: 11.294172 +epoch: 2, batch: 13922, sum loss: 3855.835938, avg loss: 2.484431, ppl: 11.994288 +epoch: 2, batch: 13923, sum loss: 4622.823730, avg loss: 2.697097, ppl: 14.836596 +epoch: 2, batch: 13924, sum loss: 3727.256348, avg loss: 2.260313, ppl: 9.586089 +epoch: 2, batch: 13925, sum loss: 3237.280273, avg loss: 2.339075, ppl: 10.371642 +epoch: 2, batch: 13926, sum loss: 5021.014648, avg loss: 2.761834, ppl: 15.828849 +epoch: 2, batch: 13927, sum loss: 5496.092285, avg loss: 2.641082, ppl: 14.028378 +epoch: 2, batch: 13928, sum loss: 3583.419678, avg loss: 2.323878, ppl: 10.215210 +epoch: 2, batch: 13929, sum loss: 4488.086426, avg loss: 2.305129, ppl: 10.025475 +epoch: 2, batch: 13930, sum loss: 3170.931641, avg loss: 2.160035, ppl: 8.671442 +epoch: 2, batch: 13931, sum loss: 4373.950195, avg loss: 2.468369, ppl: 11.803183 +epoch: 2, batch: 13932, sum loss: 3724.672119, avg loss: 2.474865, ppl: 11.880106 +epoch: 2, batch: 13933, sum loss: 4049.104004, avg loss: 2.484113, ppl: 11.990479 +epoch: 2, batch: 13934, sum loss: 4840.010254, avg loss: 2.678478, ppl: 14.562915 +epoch: 2, batch: 13935, sum loss: 3548.911621, avg loss: 2.282259, ppl: 9.798786 +epoch: 2, batch: 13936, sum loss: 4868.820312, avg loss: 2.607831, ppl: 13.569583 +epoch: 2, batch: 13937, sum loss: 4130.169922, avg loss: 2.510741, ppl: 12.314057 +epoch: 2, batch: 13938, sum loss: 4062.341553, avg loss: 2.524762, ppl: 12.487919 +epoch: 2, batch: 13939, sum loss: 4324.982422, avg loss: 2.400101, ppl: 11.024295 +epoch: 2, batch: 13940, sum loss: 4923.323730, avg loss: 2.806912, ppl: 16.558704 +epoch: 2, batch: 13941, sum loss: 3890.732422, avg loss: 2.421115, ppl: 11.258410 +epoch: 2, batch: 13942, sum loss: 4792.811523, avg loss: 2.639213, ppl: 14.002184 +epoch: 2, batch: 13943, sum loss: 5146.450195, avg loss: 2.813806, ppl: 16.673248 +epoch: 2, batch: 13944, sum loss: 4858.002441, avg loss: 2.732285, ppl: 15.367963 +epoch: 2, batch: 13945, sum loss: 4380.285156, avg loss: 2.397529, ppl: 10.995971 +epoch: 2, batch: 13946, sum loss: 5283.338867, avg loss: 2.713579, ppl: 15.083168 +epoch: 2, batch: 13947, sum loss: 3621.877930, avg loss: 2.222011, ppl: 9.225866 +epoch: 2, batch: 13948, sum loss: 4445.778809, avg loss: 2.673349, ppl: 14.488405 +epoch: 2, batch: 13949, sum loss: 4651.110352, avg loss: 2.470053, ppl: 11.823076 +epoch: 2, batch: 13950, sum loss: 3596.276123, avg loss: 2.411989, ppl: 11.156134 +epoch: 2, batch: 13951, sum loss: 3696.506836, avg loss: 2.327775, ppl: 10.255101 +epoch: 2, batch: 13952, sum loss: 4012.721680, avg loss: 2.242997, ppl: 9.421525 +epoch: 2, batch: 13953, sum loss: 3975.498535, avg loss: 2.379114, ppl: 10.795330 +epoch: 2, batch: 13954, sum loss: 3681.793213, avg loss: 2.491065, ppl: 12.074122 +epoch: 2, batch: 13955, sum loss: 4474.597168, avg loss: 2.884975, ppl: 17.903128 +epoch: 2, batch: 13956, sum loss: 3721.143311, avg loss: 2.505820, ppl: 12.253606 +epoch: 2, batch: 13957, sum loss: 4236.108887, avg loss: 2.584569, ppl: 13.257573 +epoch: 2, batch: 13958, sum loss: 3844.016113, avg loss: 2.504245, ppl: 12.234319 +epoch: 2, batch: 13959, sum loss: 4934.771973, avg loss: 2.879097, ppl: 17.798189 +epoch: 2, batch: 13960, sum loss: 4353.773438, avg loss: 2.558034, ppl: 12.910407 +epoch: 2, batch: 13961, sum loss: 3685.930176, avg loss: 2.588434, ppl: 13.308916 +epoch: 2, batch: 13962, sum loss: 3892.416016, avg loss: 2.535776, ppl: 12.626224 +epoch: 2, batch: 13963, sum loss: 3889.920898, avg loss: 2.567605, ppl: 13.034563 +epoch: 2, batch: 13964, sum loss: 4350.677734, avg loss: 2.514843, ppl: 12.364661 +epoch: 2, batch: 13965, sum loss: 4194.977051, avg loss: 2.492559, ppl: 12.092183 +epoch: 2, batch: 13966, sum loss: 3498.791016, avg loss: 2.242815, ppl: 9.419808 +epoch: 2, batch: 13967, sum loss: 3759.376709, avg loss: 2.479800, ppl: 11.938876 +epoch: 2, batch: 13968, sum loss: 4551.961426, avg loss: 2.409720, ppl: 11.130846 +epoch: 2, batch: 13969, sum loss: 4247.746094, avg loss: 2.525414, ppl: 12.496064 +epoch: 2, batch: 13970, sum loss: 5103.582520, avg loss: 2.656732, ppl: 14.249650 +epoch: 2, batch: 13971, sum loss: 4317.420898, avg loss: 2.553176, ppl: 12.847849 +epoch: 2, batch: 13972, sum loss: 3997.686279, avg loss: 2.379575, ppl: 10.800315 +epoch: 2, batch: 13973, sum loss: 5491.856445, avg loss: 2.882864, ppl: 17.865370 +epoch: 2, batch: 13974, sum loss: 4248.062500, avg loss: 2.646768, ppl: 14.108365 +epoch: 2, batch: 13975, sum loss: 4940.599121, avg loss: 2.510467, ppl: 12.310681 +epoch: 2, batch: 13976, sum loss: 4764.897949, avg loss: 2.493406, ppl: 12.102422 +epoch: 2, batch: 13977, sum loss: 4012.735352, avg loss: 2.696731, ppl: 14.831167 +epoch: 2, batch: 13978, sum loss: 5163.848633, avg loss: 2.755522, ppl: 15.729254 +epoch: 2, batch: 13979, sum loss: 3911.698486, avg loss: 2.450939, ppl: 11.599233 +epoch: 2, batch: 13980, sum loss: 3981.545410, avg loss: 2.490022, ppl: 12.061543 +epoch: 2, batch: 13981, sum loss: 4215.244141, avg loss: 2.730081, ppl: 15.334123 +epoch: 2, batch: 13982, sum loss: 3953.682861, avg loss: 2.500748, ppl: 12.191612 +epoch: 2, batch: 13983, sum loss: 3944.178711, avg loss: 2.451323, ppl: 11.603688 +epoch: 2, batch: 13984, sum loss: 4151.062988, avg loss: 2.563967, ppl: 12.987239 +epoch: 2, batch: 13985, sum loss: 4538.451172, avg loss: 2.649417, ppl: 14.145788 +epoch: 2, batch: 13986, sum loss: 3130.006348, avg loss: 2.250184, ppl: 9.489485 +epoch: 2, batch: 13987, sum loss: 4431.896973, avg loss: 2.538315, ppl: 12.658319 +epoch: 2, batch: 13988, sum loss: 4157.001953, avg loss: 2.458310, ppl: 11.685046 +epoch: 2, batch: 13989, sum loss: 4109.282227, avg loss: 2.490474, ppl: 12.066995 +epoch: 2, batch: 13990, sum loss: 3893.093750, avg loss: 2.635812, ppl: 13.954637 +epoch: 2, batch: 13991, sum loss: 4031.541016, avg loss: 2.516567, ppl: 12.386008 +epoch: 2, batch: 13992, sum loss: 4702.809570, avg loss: 2.445559, ppl: 11.536995 +epoch: 2, batch: 13993, sum loss: 4758.263672, avg loss: 2.532338, ppl: 12.582895 +epoch: 2, batch: 13994, sum loss: 4881.288086, avg loss: 2.625760, ppl: 13.815071 +epoch: 2, batch: 13995, sum loss: 4156.195312, avg loss: 2.491724, ppl: 12.082085 +epoch: 2, batch: 13996, sum loss: 4202.871582, avg loss: 2.506185, ppl: 12.258071 +epoch: 2, batch: 13997, sum loss: 4903.897461, avg loss: 2.448276, ppl: 11.568389 +epoch: 2, batch: 13998, sum loss: 5019.241699, avg loss: 2.739761, ppl: 15.483282 +epoch: 2, batch: 13999, sum loss: 4696.814941, avg loss: 2.746675, ppl: 15.590711 +epoch: 2, batch: 14000, sum loss: 4381.371582, avg loss: 2.655377, ppl: 14.230345 +epoch: 2, batch: 14001, sum loss: 3430.555420, avg loss: 2.099483, ppl: 8.161951 +epoch: 2, batch: 14002, sum loss: 5264.272949, avg loss: 2.593238, ppl: 13.373002 +epoch: 2, batch: 14003, sum loss: 4337.265625, avg loss: 2.534930, ppl: 12.615554 +epoch: 2, batch: 14004, sum loss: 4143.912109, avg loss: 2.460756, ppl: 11.713659 +epoch: 2, batch: 14005, sum loss: 4243.933105, avg loss: 2.645844, ppl: 14.095333 +epoch: 2, batch: 14006, sum loss: 4707.902344, avg loss: 2.528412, ppl: 12.533583 +epoch: 2, batch: 14007, sum loss: 3163.102295, avg loss: 2.224404, ppl: 9.247968 +epoch: 2, batch: 14008, sum loss: 4257.577148, avg loss: 2.569449, ppl: 13.058626 +epoch: 2, batch: 14009, sum loss: 4746.379883, avg loss: 2.454178, ppl: 11.636863 +epoch: 2, batch: 14010, sum loss: 4584.398438, avg loss: 2.600339, ppl: 13.468308 +epoch: 2, batch: 14011, sum loss: 4965.568848, avg loss: 2.595697, ppl: 13.405931 +epoch: 2, batch: 14012, sum loss: 4768.417480, avg loss: 2.615698, ppl: 13.676761 +epoch: 2, batch: 14013, sum loss: 4095.056152, avg loss: 2.504622, ppl: 12.238926 +epoch: 2, batch: 14014, sum loss: 4065.135986, avg loss: 2.425499, ppl: 11.307870 +epoch: 2, batch: 14015, sum loss: 4309.199707, avg loss: 2.506806, ppl: 12.265693 +epoch: 2, batch: 14016, sum loss: 4890.757324, avg loss: 2.649381, ppl: 14.145279 +epoch: 2, batch: 14017, sum loss: 3848.073975, avg loss: 2.323716, ppl: 10.213559 +epoch: 2, batch: 14018, sum loss: 4416.054688, avg loss: 2.571960, ppl: 13.091455 +epoch: 2, batch: 14019, sum loss: 3618.880127, avg loss: 2.499227, ppl: 12.173079 +epoch: 2, batch: 14020, sum loss: 3444.988281, avg loss: 2.202678, ppl: 9.049214 +epoch: 2, batch: 14021, sum loss: 3821.651123, avg loss: 2.372223, ppl: 10.721201 +epoch: 2, batch: 14022, sum loss: 4518.334961, avg loss: 2.546976, ppl: 12.768429 +epoch: 2, batch: 14023, sum loss: 4671.884766, avg loss: 2.547374, ppl: 12.773520 +epoch: 2, batch: 14024, sum loss: 4149.918457, avg loss: 2.509020, ppl: 12.292872 +epoch: 2, batch: 14025, sum loss: 4128.987793, avg loss: 2.490342, ppl: 12.065407 +epoch: 2, batch: 14026, sum loss: 3641.222900, avg loss: 2.271505, ppl: 9.693980 +epoch: 2, batch: 14027, sum loss: 4002.408936, avg loss: 2.567292, ppl: 13.030496 +epoch: 2, batch: 14028, sum loss: 4390.111816, avg loss: 2.491550, ppl: 12.079991 +epoch: 2, batch: 14029, sum loss: 4355.863281, avg loss: 2.746446, ppl: 15.587135 +epoch: 2, batch: 14030, sum loss: 4255.600586, avg loss: 2.548264, ppl: 12.784887 +epoch: 2, batch: 14031, sum loss: 4584.450195, avg loss: 2.613712, ppl: 13.649619 +epoch: 2, batch: 14032, sum loss: 4575.546875, avg loss: 2.559031, ppl: 12.923285 +epoch: 2, batch: 14033, sum loss: 4320.682617, avg loss: 2.263323, ppl: 9.614983 +epoch: 2, batch: 14034, sum loss: 3651.207520, avg loss: 2.445551, ppl: 11.536903 +epoch: 2, batch: 14035, sum loss: 3936.075684, avg loss: 2.639890, ppl: 14.011662 +epoch: 2, batch: 14036, sum loss: 4598.041016, avg loss: 2.714310, ppl: 15.094191 +epoch: 2, batch: 14037, sum loss: 4601.309082, avg loss: 2.633835, ppl: 13.927073 +epoch: 2, batch: 14038, sum loss: 4293.640625, avg loss: 2.542120, ppl: 12.706580 +epoch: 2, batch: 14039, sum loss: 3942.022461, avg loss: 2.559755, ppl: 12.932647 +epoch: 2, batch: 14040, sum loss: 4265.789062, avg loss: 2.586895, ppl: 13.288441 +epoch: 2, batch: 14041, sum loss: 5288.821289, avg loss: 2.857278, ppl: 17.414059 +epoch: 2, batch: 14042, sum loss: 4684.610352, avg loss: 2.534962, ppl: 12.615956 +epoch: 2, batch: 14043, sum loss: 4047.082520, avg loss: 2.609338, ppl: 13.590045 +epoch: 2, batch: 14044, sum loss: 4315.091797, avg loss: 2.675196, ppl: 14.515201 +epoch: 2, batch: 14045, sum loss: 4347.038086, avg loss: 2.623439, ppl: 13.783040 +epoch: 2, batch: 14046, sum loss: 4743.091309, avg loss: 2.621941, ppl: 13.762412 +epoch: 2, batch: 14047, sum loss: 3165.305176, avg loss: 2.199656, ppl: 9.021910 +epoch: 2, batch: 14048, sum loss: 4025.696289, avg loss: 2.416384, ppl: 11.205270 +epoch: 2, batch: 14049, sum loss: 5172.677734, avg loss: 2.818898, ppl: 16.758377 +epoch: 2, batch: 14050, sum loss: 3574.517578, avg loss: 2.285497, ppl: 9.830573 +epoch: 2, batch: 14051, sum loss: 4141.652832, avg loss: 2.468208, ppl: 11.801278 +epoch: 2, batch: 14052, sum loss: 4044.013184, avg loss: 2.499390, ppl: 12.175066 +epoch: 2, batch: 14053, sum loss: 5017.433594, avg loss: 2.668848, ppl: 14.423338 +epoch: 2, batch: 14054, sum loss: 5387.213379, avg loss: 2.716699, ppl: 15.130289 +epoch: 2, batch: 14055, sum loss: 4748.065430, avg loss: 2.497667, ppl: 12.154109 +epoch: 2, batch: 14056, sum loss: 4739.340820, avg loss: 2.650638, ppl: 14.163074 +epoch: 2, batch: 14057, sum loss: 5560.630859, avg loss: 2.851605, ppl: 17.315557 +epoch: 2, batch: 14058, sum loss: 3984.733398, avg loss: 2.564178, ppl: 12.989983 +epoch: 2, batch: 14059, sum loss: 3957.220459, avg loss: 2.424767, ppl: 11.299602 +epoch: 2, batch: 14060, sum loss: 5190.960449, avg loss: 2.710684, ppl: 15.039563 +epoch: 2, batch: 14061, sum loss: 3588.292725, avg loss: 2.343758, ppl: 10.420319 +epoch: 2, batch: 14062, sum loss: 4647.804199, avg loss: 2.705358, ppl: 14.959664 +epoch: 2, batch: 14063, sum loss: 4134.028320, avg loss: 2.600018, ppl: 13.463978 +epoch: 2, batch: 14064, sum loss: 4135.788086, avg loss: 2.501989, ppl: 12.206751 +epoch: 2, batch: 14065, sum loss: 4569.003906, avg loss: 2.548245, ppl: 12.784653 +epoch: 2, batch: 14066, sum loss: 4838.333008, avg loss: 2.381069, ppl: 10.816464 +epoch: 2, batch: 14067, sum loss: 4981.072266, avg loss: 2.793647, ppl: 16.340506 +epoch: 2, batch: 14068, sum loss: 4264.893555, avg loss: 2.431524, ppl: 11.376209 +epoch: 2, batch: 14069, sum loss: 3959.047607, avg loss: 2.579184, ppl: 13.186375 +epoch: 2, batch: 14070, sum loss: 3941.035889, avg loss: 2.400144, ppl: 11.024762 +epoch: 2, batch: 14071, sum loss: 3986.953857, avg loss: 2.391694, ppl: 10.931995 +epoch: 2, batch: 14072, sum loss: 3962.745117, avg loss: 2.462862, ppl: 11.738358 +epoch: 2, batch: 14073, sum loss: 3209.107422, avg loss: 2.187531, ppl: 8.913175 +epoch: 2, batch: 14074, sum loss: 3258.821777, avg loss: 2.334400, ppl: 10.323261 +epoch: 2, batch: 14075, sum loss: 4445.698242, avg loss: 2.552066, ppl: 12.833586 +epoch: 2, batch: 14076, sum loss: 4984.443359, avg loss: 2.462670, ppl: 11.736103 +epoch: 2, batch: 14077, sum loss: 4353.319336, avg loss: 2.500471, ppl: 12.188229 +epoch: 2, batch: 14078, sum loss: 4243.935059, avg loss: 2.460252, ppl: 11.707765 +epoch: 2, batch: 14079, sum loss: 4282.561035, avg loss: 2.513240, ppl: 12.344864 +epoch: 2, batch: 14080, sum loss: 3989.261230, avg loss: 2.189496, ppl: 8.930709 +epoch: 2, batch: 14081, sum loss: 4230.653320, avg loss: 2.469733, ppl: 11.819293 +epoch: 2, batch: 14082, sum loss: 4864.219727, avg loss: 2.763761, ppl: 15.859379 +epoch: 2, batch: 14083, sum loss: 4046.346191, avg loss: 2.420064, ppl: 11.246576 +epoch: 2, batch: 14084, sum loss: 2926.702148, avg loss: 1.995025, ppl: 7.352389 +epoch: 2, batch: 14085, sum loss: 4437.580078, avg loss: 2.714116, ppl: 15.091265 +epoch: 2, batch: 14086, sum loss: 3470.751709, avg loss: 2.330928, ppl: 10.287482 +epoch: 2, batch: 14087, sum loss: 3597.366699, avg loss: 2.198879, ppl: 9.014907 +epoch: 2, batch: 14088, sum loss: 4131.886230, avg loss: 2.613464, ppl: 13.646235 +epoch: 2, batch: 14089, sum loss: 4859.909180, avg loss: 2.587811, ppl: 13.300625 +epoch: 2, batch: 14090, sum loss: 3942.132080, avg loss: 2.409616, ppl: 11.129689 +epoch: 2, batch: 14091, sum loss: 4753.657227, avg loss: 2.711727, ppl: 15.055252 +epoch: 2, batch: 14092, sum loss: 3699.670898, avg loss: 2.371584, ppl: 10.714350 +epoch: 2, batch: 14093, sum loss: 3832.584473, avg loss: 2.275882, ppl: 9.736498 +epoch: 2, batch: 14094, sum loss: 4775.873535, avg loss: 2.429234, ppl: 11.350183 +epoch: 2, batch: 14095, sum loss: 5285.538086, avg loss: 2.754319, ppl: 15.710338 +epoch: 2, batch: 14096, sum loss: 4709.901367, avg loss: 2.662466, ppl: 14.331581 +epoch: 2, batch: 14097, sum loss: 3177.136475, avg loss: 2.204814, ppl: 9.068563 +epoch: 2, batch: 14098, sum loss: 5026.793457, avg loss: 2.907341, ppl: 18.308062 +epoch: 2, batch: 14099, sum loss: 3660.961670, avg loss: 2.233656, ppl: 9.333926 +epoch: 2, batch: 14100, sum loss: 4698.472656, avg loss: 2.680247, ppl: 14.588694 +epoch: 2, batch: 14101, sum loss: 4940.031250, avg loss: 2.647391, ppl: 14.117153 +epoch: 2, batch: 14102, sum loss: 4583.010742, avg loss: 2.460016, ppl: 11.705005 +epoch: 2, batch: 14103, sum loss: 4664.616699, avg loss: 2.533741, ppl: 12.600554 +epoch: 2, batch: 14104, sum loss: 4240.446289, avg loss: 2.603098, ppl: 13.505509 +epoch: 2, batch: 14105, sum loss: 4328.994629, avg loss: 2.506656, ppl: 12.263850 +epoch: 2, batch: 14106, sum loss: 4163.866699, avg loss: 2.474074, ppl: 11.870711 +epoch: 2, batch: 14107, sum loss: 4105.208984, avg loss: 2.565756, ppl: 13.010486 +epoch: 2, batch: 14108, sum loss: 3553.504639, avg loss: 2.289629, ppl: 9.871280 +epoch: 2, batch: 14109, sum loss: 4057.245361, avg loss: 2.479979, ppl: 11.941014 +epoch: 2, batch: 14110, sum loss: 4051.763184, avg loss: 2.580741, ppl: 13.206920 +epoch: 2, batch: 14111, sum loss: 4232.033203, avg loss: 2.532635, ppl: 12.586631 +epoch: 2, batch: 14112, sum loss: 4127.704590, avg loss: 2.563792, ppl: 12.984957 +epoch: 2, batch: 14113, sum loss: 4791.055664, avg loss: 2.714479, ppl: 15.096745 +epoch: 2, batch: 14114, sum loss: 4572.648926, avg loss: 2.486487, ppl: 12.018975 +epoch: 2, batch: 14115, sum loss: 3705.842285, avg loss: 2.372498, ppl: 10.724151 +epoch: 2, batch: 14116, sum loss: 4564.282715, avg loss: 2.742959, ppl: 15.532872 +epoch: 2, batch: 14117, sum loss: 2947.422852, avg loss: 2.243092, ppl: 9.422421 +epoch: 2, batch: 14118, sum loss: 4746.411621, avg loss: 2.354371, ppl: 10.531504 +epoch: 2, batch: 14119, sum loss: 4055.235352, avg loss: 2.471198, ppl: 11.836614 +epoch: 2, batch: 14120, sum loss: 3876.906494, avg loss: 2.464658, ppl: 11.759455 +epoch: 2, batch: 14121, sum loss: 4597.237305, avg loss: 2.669708, ppl: 14.435754 +epoch: 2, batch: 14122, sum loss: 3824.263916, avg loss: 2.421953, ppl: 11.267843 +epoch: 2, batch: 14123, sum loss: 4244.371582, avg loss: 2.451977, ppl: 11.611274 +epoch: 2, batch: 14124, sum loss: 4469.312500, avg loss: 2.512261, ppl: 12.332785 +epoch: 2, batch: 14125, sum loss: 4072.132812, avg loss: 2.450140, ppl: 11.589972 +epoch: 2, batch: 14126, sum loss: 4942.540039, avg loss: 2.605451, ppl: 13.537328 +epoch: 2, batch: 14127, sum loss: 3935.149902, avg loss: 2.396559, ppl: 10.985313 +epoch: 2, batch: 14128, sum loss: 4516.163574, avg loss: 2.631797, ppl: 13.898725 +epoch: 2, batch: 14129, sum loss: 3459.583252, avg loss: 2.291115, ppl: 9.885952 +epoch: 2, batch: 14130, sum loss: 4626.364258, avg loss: 2.694446, ppl: 14.797323 +epoch: 2, batch: 14131, sum loss: 4321.307617, avg loss: 2.442797, ppl: 11.505176 +epoch: 2, batch: 14132, sum loss: 3628.610840, avg loss: 2.168925, ppl: 8.748870 +epoch: 2, batch: 14133, sum loss: 3948.709961, avg loss: 2.460255, ppl: 11.707801 +epoch: 2, batch: 14134, sum loss: 4828.492188, avg loss: 2.666202, ppl: 14.385235 +epoch: 2, batch: 14135, sum loss: 4471.017090, avg loss: 2.559254, ppl: 12.926173 +epoch: 2, batch: 14136, sum loss: 4136.167969, avg loss: 2.347428, ppl: 10.458636 +epoch: 2, batch: 14137, sum loss: 5347.626953, avg loss: 2.664488, ppl: 14.360593 +epoch: 2, batch: 14138, sum loss: 4470.778320, avg loss: 2.763151, ppl: 15.849706 +epoch: 2, batch: 14139, sum loss: 3923.428711, avg loss: 2.484755, ppl: 11.998183 +epoch: 2, batch: 14140, sum loss: 3393.051270, avg loss: 2.381088, ppl: 10.816670 +epoch: 2, batch: 14141, sum loss: 3864.852051, avg loss: 2.378371, ppl: 10.787311 +epoch: 2, batch: 14142, sum loss: 4413.460938, avg loss: 2.588540, ppl: 13.310328 +epoch: 2, batch: 14143, sum loss: 4024.792969, avg loss: 2.315761, ppl: 10.132634 +epoch: 2, batch: 14144, sum loss: 4072.443359, avg loss: 2.399790, ppl: 11.020860 +epoch: 2, batch: 14145, sum loss: 2922.912842, avg loss: 2.217688, ppl: 9.186069 +epoch: 2, batch: 14146, sum loss: 4493.880859, avg loss: 2.581207, ppl: 13.213074 +epoch: 2, batch: 14147, sum loss: 4338.145020, avg loss: 2.707956, ppl: 14.998581 +epoch: 2, batch: 14148, sum loss: 4645.327148, avg loss: 2.485461, ppl: 12.006657 +epoch: 2, batch: 14149, sum loss: 5231.318848, avg loss: 2.685482, ppl: 14.665269 +epoch: 2, batch: 14150, sum loss: 4179.622559, avg loss: 2.531570, ppl: 12.573236 +epoch: 2, batch: 14151, sum loss: 4508.711426, avg loss: 2.541551, ppl: 12.699350 +epoch: 2, batch: 14152, sum loss: 3994.540527, avg loss: 2.464245, ppl: 11.754600 +epoch: 2, batch: 14153, sum loss: 3505.212891, avg loss: 2.161044, ppl: 8.680192 +epoch: 2, batch: 14154, sum loss: 4383.557129, avg loss: 2.599975, ppl: 13.463396 +epoch: 2, batch: 14155, sum loss: 4430.086426, avg loss: 2.601343, ppl: 13.481828 +epoch: 2, batch: 14156, sum loss: 3762.659912, avg loss: 2.526971, ppl: 12.515540 +epoch: 2, batch: 14157, sum loss: 3709.905029, avg loss: 2.429538, ppl: 11.353639 +epoch: 2, batch: 14158, sum loss: 3823.216797, avg loss: 2.463413, ppl: 11.744828 +epoch: 2, batch: 14159, sum loss: 3548.193359, avg loss: 2.267216, ppl: 9.652493 +epoch: 2, batch: 14160, sum loss: 4979.209961, avg loss: 2.604189, ppl: 13.520258 +epoch: 2, batch: 14161, sum loss: 4085.994629, avg loss: 2.525337, ppl: 12.495099 +epoch: 2, batch: 14162, sum loss: 3614.968994, avg loss: 2.084757, ppl: 8.042638 +epoch: 2, batch: 14163, sum loss: 4120.872070, avg loss: 2.489953, ppl: 12.060710 +epoch: 2, batch: 14164, sum loss: 4586.861816, avg loss: 2.447632, ppl: 11.560933 +epoch: 2, batch: 14165, sum loss: 3525.126953, avg loss: 2.356368, ppl: 10.552558 +epoch: 2, batch: 14166, sum loss: 5102.008301, avg loss: 2.797154, ppl: 16.397907 +epoch: 2, batch: 14167, sum loss: 4535.263672, avg loss: 2.533667, ppl: 12.599623 +epoch: 2, batch: 14168, sum loss: 3980.152100, avg loss: 2.674833, ppl: 14.509931 +epoch: 2, batch: 14169, sum loss: 5119.140137, avg loss: 2.555737, ppl: 12.880783 +epoch: 2, batch: 14170, sum loss: 4388.807617, avg loss: 2.575591, ppl: 13.139085 +epoch: 2, batch: 14171, sum loss: 3532.416504, avg loss: 2.354944, ppl: 10.537541 +epoch: 2, batch: 14172, sum loss: 4116.327637, avg loss: 2.597052, ppl: 13.424107 +epoch: 2, batch: 14173, sum loss: 4496.058594, avg loss: 2.733166, ppl: 15.381512 +epoch: 2, batch: 14174, sum loss: 3447.067871, avg loss: 2.311917, ppl: 10.093754 +epoch: 2, batch: 14175, sum loss: 3916.445801, avg loss: 2.254719, ppl: 9.532610 +epoch: 2, batch: 14176, sum loss: 3862.173340, avg loss: 2.604298, ppl: 13.521725 +epoch: 2, batch: 14177, sum loss: 4733.284180, avg loss: 2.675684, ppl: 14.522276 +epoch: 2, batch: 14178, sum loss: 4555.293457, avg loss: 2.838189, ppl: 17.084795 +epoch: 2, batch: 14179, sum loss: 4370.963867, avg loss: 2.532424, ppl: 12.583972 +epoch: 2, batch: 14180, sum loss: 3972.595947, avg loss: 2.336821, ppl: 10.348290 +epoch: 2, batch: 14181, sum loss: 3545.653320, avg loss: 2.210507, ppl: 9.120340 +epoch: 2, batch: 14182, sum loss: 4445.583008, avg loss: 2.578645, ppl: 13.179262 +epoch: 2, batch: 14183, sum loss: 3788.145752, avg loss: 2.535573, ppl: 12.623659 +epoch: 2, batch: 14184, sum loss: 4553.504883, avg loss: 2.514359, ppl: 12.358690 +epoch: 2, batch: 14185, sum loss: 4532.977051, avg loss: 2.767385, ppl: 15.916957 +epoch: 2, batch: 14186, sum loss: 4116.152344, avg loss: 2.412750, ppl: 11.164627 +epoch: 2, batch: 14187, sum loss: 3712.044922, avg loss: 2.339033, ppl: 10.371202 +epoch: 2, batch: 14188, sum loss: 5001.433105, avg loss: 2.715219, ppl: 15.107919 +epoch: 2, batch: 14189, sum loss: 4365.484375, avg loss: 2.609375, ppl: 13.590554 +epoch: 2, batch: 14190, sum loss: 3506.181641, avg loss: 2.419725, ppl: 11.242769 +epoch: 2, batch: 14191, sum loss: 4108.216797, avg loss: 2.575685, ppl: 13.140309 +epoch: 2, batch: 14192, sum loss: 4089.272949, avg loss: 2.485880, ppl: 12.011687 +epoch: 2, batch: 14193, sum loss: 4358.152344, avg loss: 2.548627, ppl: 12.789534 +epoch: 2, batch: 14194, sum loss: 5167.517578, avg loss: 2.623106, ppl: 13.778446 +epoch: 2, batch: 14195, sum loss: 3571.556396, avg loss: 2.387404, ppl: 10.885199 +epoch: 2, batch: 14196, sum loss: 4873.792480, avg loss: 2.797814, ppl: 16.408741 +epoch: 2, batch: 14197, sum loss: 3745.833984, avg loss: 2.319402, ppl: 10.169591 +epoch: 2, batch: 14198, sum loss: 3864.646973, avg loss: 2.409381, ppl: 11.127070 +epoch: 2, batch: 14199, sum loss: 3879.808105, avg loss: 2.371521, ppl: 10.713675 +epoch: 2, batch: 14200, sum loss: 4174.662598, avg loss: 2.366589, ppl: 10.660964 +epoch: 2, batch: 14201, sum loss: 3932.601807, avg loss: 2.438067, ppl: 11.450884 +epoch: 2, batch: 14202, sum loss: 4744.786133, avg loss: 2.646283, ppl: 14.101531 +epoch: 2, batch: 14203, sum loss: 3692.101074, avg loss: 2.202924, ppl: 9.051443 +epoch: 2, batch: 14204, sum loss: 5318.166992, avg loss: 2.669763, ppl: 14.436545 +epoch: 2, batch: 14205, sum loss: 3727.125732, avg loss: 2.473209, ppl: 11.860445 +epoch: 2, batch: 14206, sum loss: 3469.060059, avg loss: 2.282276, ppl: 9.798962 +epoch: 2, batch: 14207, sum loss: 4439.321289, avg loss: 2.530970, ppl: 12.565687 +epoch: 2, batch: 14208, sum loss: 4515.951172, avg loss: 2.601354, ppl: 13.481985 +epoch: 2, batch: 14209, sum loss: 3832.312988, avg loss: 2.383279, ppl: 10.840391 +epoch: 2, batch: 14210, sum loss: 4017.918457, avg loss: 2.503376, ppl: 12.223692 +epoch: 2, batch: 14211, sum loss: 4147.782715, avg loss: 2.524518, ppl: 12.484873 +epoch: 2, batch: 14212, sum loss: 3830.827637, avg loss: 2.508728, ppl: 12.289289 +epoch: 2, batch: 14213, sum loss: 4280.979492, avg loss: 2.466002, ppl: 11.775275 +epoch: 2, batch: 14214, sum loss: 4250.937500, avg loss: 2.521315, ppl: 12.444955 +epoch: 2, batch: 14215, sum loss: 4052.764160, avg loss: 2.280678, ppl: 9.783307 +epoch: 2, batch: 14216, sum loss: 5146.938965, avg loss: 2.556850, ppl: 12.895136 +epoch: 2, batch: 14217, sum loss: 4336.705566, avg loss: 2.623536, ppl: 13.784384 +epoch: 2, batch: 14218, sum loss: 3575.729980, avg loss: 2.264554, ppl: 9.626825 +epoch: 2, batch: 14219, sum loss: 4886.541992, avg loss: 2.559739, ppl: 12.932443 +epoch: 2, batch: 14220, sum loss: 4126.238281, avg loss: 2.382355, ppl: 10.830376 +epoch: 2, batch: 14221, sum loss: 3850.688232, avg loss: 2.265111, ppl: 9.632194 +epoch: 2, batch: 14222, sum loss: 4069.556152, avg loss: 2.505884, ppl: 12.254389 +epoch: 2, batch: 14223, sum loss: 4205.998535, avg loss: 2.488756, ppl: 12.046287 +epoch: 2, batch: 14224, sum loss: 4534.432129, avg loss: 2.592586, ppl: 13.364281 +epoch: 2, batch: 14225, sum loss: 4609.041016, avg loss: 2.627731, ppl: 13.842331 +epoch: 2, batch: 14226, sum loss: 4122.473145, avg loss: 2.530677, ppl: 12.562009 +epoch: 2, batch: 14227, sum loss: 3974.419434, avg loss: 2.432325, ppl: 11.385326 +epoch: 2, batch: 14228, sum loss: 4455.647461, avg loss: 2.752099, ppl: 15.675493 +epoch: 2, batch: 14229, sum loss: 4484.344727, avg loss: 2.550822, ppl: 12.817633 +epoch: 2, batch: 14230, sum loss: 5204.563965, avg loss: 2.639231, ppl: 14.002435 +epoch: 2, batch: 14231, sum loss: 4803.453125, avg loss: 2.694029, ppl: 14.791147 +epoch: 2, batch: 14232, sum loss: 3794.892822, avg loss: 2.609967, ppl: 13.598608 +epoch: 2, batch: 14233, sum loss: 4622.143555, avg loss: 2.738237, ppl: 15.459704 +epoch: 2, batch: 14234, sum loss: 3927.586914, avg loss: 2.408085, ppl: 11.112661 +epoch: 2, batch: 14235, sum loss: 3819.440430, avg loss: 2.534466, ppl: 12.609695 +epoch: 2, batch: 14236, sum loss: 3301.670898, avg loss: 2.227848, ppl: 9.279877 +epoch: 2, batch: 14237, sum loss: 3187.605713, avg loss: 2.201385, ppl: 9.037524 +epoch: 2, batch: 14238, sum loss: 4217.077637, avg loss: 2.501232, ppl: 12.197516 +epoch: 2, batch: 14239, sum loss: 4604.966309, avg loss: 2.584156, ppl: 13.252103 +epoch: 2, batch: 14240, sum loss: 4161.276855, avg loss: 2.434919, ppl: 11.414895 +epoch: 2, batch: 14241, sum loss: 5269.977539, avg loss: 2.650894, ppl: 14.166697 +epoch: 2, batch: 14242, sum loss: 3699.349854, avg loss: 2.351780, ppl: 10.504250 +epoch: 2, batch: 14243, sum loss: 4550.746582, avg loss: 2.581252, ppl: 13.213666 +epoch: 2, batch: 14244, sum loss: 4156.537109, avg loss: 2.357650, ppl: 10.566092 +epoch: 2, batch: 14245, sum loss: 4169.118164, avg loss: 2.573530, ppl: 13.112028 +epoch: 2, batch: 14246, sum loss: 4705.096680, avg loss: 2.525548, ppl: 12.497747 +epoch: 2, batch: 14247, sum loss: 2659.138428, avg loss: 1.990373, ppl: 7.318264 +epoch: 2, batch: 14248, sum loss: 4813.826172, avg loss: 2.716606, ppl: 15.128890 +epoch: 2, batch: 14249, sum loss: 4012.540039, avg loss: 2.593756, ppl: 13.379929 +epoch: 2, batch: 14250, sum loss: 3916.181641, avg loss: 2.576435, ppl: 13.150178 +epoch: 2, batch: 14251, sum loss: 4144.981445, avg loss: 2.568142, ppl: 13.041573 +epoch: 2, batch: 14252, sum loss: 5069.006836, avg loss: 2.792841, ppl: 16.327347 +epoch: 2, batch: 14253, sum loss: 4399.134277, avg loss: 2.516667, ppl: 12.387245 +epoch: 2, batch: 14254, sum loss: 4366.416504, avg loss: 2.529789, ppl: 12.550863 +epoch: 2, batch: 14255, sum loss: 4499.323242, avg loss: 2.758629, ppl: 15.778190 +epoch: 2, batch: 14256, sum loss: 3661.287598, avg loss: 2.295478, ppl: 9.929182 +epoch: 2, batch: 14257, sum loss: 3871.405762, avg loss: 2.278638, ppl: 9.763372 +epoch: 2, batch: 14258, sum loss: 3709.900391, avg loss: 2.210906, ppl: 9.123980 +epoch: 2, batch: 14259, sum loss: 4992.044922, avg loss: 2.714543, ppl: 15.097714 +epoch: 2, batch: 14260, sum loss: 4972.901367, avg loss: 2.486451, ppl: 12.018545 +epoch: 2, batch: 14261, sum loss: 4742.560547, avg loss: 2.578880, ppl: 13.182370 +epoch: 2, batch: 14262, sum loss: 4517.794922, avg loss: 2.540942, ppl: 12.691624 +epoch: 2, batch: 14263, sum loss: 4531.249512, avg loss: 2.708458, ppl: 15.006114 +epoch: 2, batch: 14264, sum loss: 4999.318359, avg loss: 2.638163, ppl: 13.987479 +epoch: 2, batch: 14265, sum loss: 5216.900391, avg loss: 2.858576, ppl: 17.436672 +epoch: 2, batch: 14266, sum loss: 4328.977539, avg loss: 2.531566, ppl: 12.573176 +epoch: 2, batch: 14267, sum loss: 5097.275391, avg loss: 2.430747, ppl: 11.367365 +epoch: 2, batch: 14268, sum loss: 4301.293457, avg loss: 2.669953, ppl: 14.439285 +epoch: 2, batch: 14269, sum loss: 5118.683105, avg loss: 2.568331, ppl: 13.044033 +epoch: 2, batch: 14270, sum loss: 4401.153809, avg loss: 2.665751, ppl: 14.378736 +epoch: 2, batch: 14271, sum loss: 3802.248291, avg loss: 2.591853, ppl: 13.354493 +epoch: 2, batch: 14272, sum loss: 3616.166504, avg loss: 2.420460, ppl: 11.251028 +epoch: 2, batch: 14273, sum loss: 3782.733887, avg loss: 2.300933, ppl: 9.983492 +epoch: 2, batch: 14274, sum loss: 5076.305664, avg loss: 2.748406, ppl: 15.617720 +epoch: 2, batch: 14275, sum loss: 3007.497070, avg loss: 2.162112, ppl: 8.689466 +epoch: 2, batch: 14276, sum loss: 3259.739258, avg loss: 2.158768, ppl: 8.660459 +epoch: 2, batch: 14277, sum loss: 4152.984375, avg loss: 2.418745, ppl: 11.231750 +epoch: 2, batch: 14278, sum loss: 3225.162598, avg loss: 2.308635, ppl: 10.060678 +epoch: 2, batch: 14279, sum loss: 3540.029053, avg loss: 2.187904, ppl: 8.916506 +epoch: 2, batch: 14280, sum loss: 3388.077393, avg loss: 2.048414, ppl: 7.755595 +epoch: 2, batch: 14281, sum loss: 3955.286133, avg loss: 2.422098, ppl: 11.269477 +epoch: 2, batch: 14282, sum loss: 3677.542725, avg loss: 2.425820, ppl: 11.311500 +epoch: 2, batch: 14283, sum loss: 3658.309814, avg loss: 2.442129, ppl: 11.497498 +epoch: 2, batch: 14284, sum loss: 3918.520508, avg loss: 2.248147, ppl: 9.470174 +epoch: 2, batch: 14285, sum loss: 4721.194336, avg loss: 2.534189, ppl: 12.606203 +epoch: 2, batch: 14286, sum loss: 4340.437500, avg loss: 2.613147, ppl: 13.641918 +epoch: 2, batch: 14287, sum loss: 4130.096191, avg loss: 2.458391, ppl: 11.685990 +epoch: 2, batch: 14288, sum loss: 4693.160156, avg loss: 2.543718, ppl: 12.726903 +epoch: 2, batch: 14289, sum loss: 5148.762695, avg loss: 2.648541, ppl: 14.133399 +epoch: 2, batch: 14290, sum loss: 4191.706055, avg loss: 2.310753, ppl: 10.082015 +epoch: 2, batch: 14291, sum loss: 5628.357910, avg loss: 2.720328, ppl: 15.185296 +epoch: 2, batch: 14292, sum loss: 4472.662598, avg loss: 2.560196, ppl: 12.938352 +epoch: 2, batch: 14293, sum loss: 4931.687500, avg loss: 2.633042, ppl: 13.916037 +epoch: 2, batch: 14294, sum loss: 3809.936035, avg loss: 2.399204, ppl: 11.014408 +epoch: 2, batch: 14295, sum loss: 4768.101074, avg loss: 2.653368, ppl: 14.201783 +epoch: 2, batch: 14296, sum loss: 4855.101074, avg loss: 2.493632, ppl: 12.105164 +epoch: 2, batch: 14297, sum loss: 4202.514160, avg loss: 2.666570, ppl: 14.390524 +epoch: 2, batch: 14298, sum loss: 4619.153320, avg loss: 2.535210, ppl: 12.619085 +epoch: 2, batch: 14299, sum loss: 4170.030273, avg loss: 2.424436, ppl: 11.295857 +epoch: 2, batch: 14300, sum loss: 4009.345703, avg loss: 2.505841, ppl: 12.253860 +epoch: 2, batch: 14301, sum loss: 4222.131836, avg loss: 2.555770, ppl: 12.881213 +epoch: 2, batch: 14302, sum loss: 4639.701172, avg loss: 2.651258, ppl: 14.171852 +epoch: 2, batch: 14303, sum loss: 3505.590332, avg loss: 2.303279, ppl: 10.006941 +epoch: 2, batch: 14304, sum loss: 5215.988281, avg loss: 2.749599, ppl: 15.636353 +epoch: 2, batch: 14305, sum loss: 3939.079590, avg loss: 2.589796, ppl: 13.327053 +epoch: 2, batch: 14306, sum loss: 4059.957520, avg loss: 2.289880, ppl: 9.873755 +epoch: 2, batch: 14307, sum loss: 3380.974121, avg loss: 2.361016, ppl: 10.601712 +epoch: 2, batch: 14308, sum loss: 3534.538086, avg loss: 2.328418, ppl: 10.261692 +epoch: 2, batch: 14309, sum loss: 3654.731934, avg loss: 2.368588, ppl: 10.682303 +epoch: 2, batch: 14310, sum loss: 3553.953857, avg loss: 2.353612, ppl: 10.523512 +epoch: 2, batch: 14311, sum loss: 4427.541016, avg loss: 2.431379, ppl: 11.374555 +epoch: 2, batch: 14312, sum loss: 4712.909180, avg loss: 2.733706, ppl: 15.389820 +epoch: 2, batch: 14313, sum loss: 4513.460449, avg loss: 2.702671, ppl: 14.919529 +epoch: 2, batch: 14314, sum loss: 4519.342773, avg loss: 2.615360, ppl: 13.672144 +epoch: 2, batch: 14315, sum loss: 4097.476562, avg loss: 2.436074, ppl: 11.428086 +epoch: 2, batch: 14316, sum loss: 3556.711914, avg loss: 2.186055, ppl: 8.900035 +epoch: 2, batch: 14317, sum loss: 4795.498047, avg loss: 2.614775, ppl: 13.664145 +epoch: 2, batch: 14318, sum loss: 3822.445801, avg loss: 2.335031, ppl: 10.329781 +epoch: 2, batch: 14319, sum loss: 3327.860352, avg loss: 2.248554, ppl: 9.474029 +epoch: 2, batch: 14320, sum loss: 5123.591309, avg loss: 2.627483, ppl: 13.838889 +epoch: 2, batch: 14321, sum loss: 4346.630859, avg loss: 2.576545, ppl: 13.151618 +epoch: 2, batch: 14322, sum loss: 4298.246094, avg loss: 2.540334, ppl: 12.683912 +epoch: 2, batch: 14323, sum loss: 3970.182129, avg loss: 2.569697, ppl: 13.061872 +epoch: 2, batch: 14324, sum loss: 4476.205566, avg loss: 2.430079, ppl: 11.359782 +epoch: 2, batch: 14325, sum loss: 4934.721191, avg loss: 2.693625, ppl: 14.785178 +epoch: 2, batch: 14326, sum loss: 3671.606689, avg loss: 2.484172, ppl: 11.991192 +epoch: 2, batch: 14327, sum loss: 4006.103760, avg loss: 2.529106, ppl: 12.542287 +epoch: 2, batch: 14328, sum loss: 4205.082031, avg loss: 2.390610, ppl: 10.920148 +epoch: 2, batch: 14329, sum loss: 3757.558594, avg loss: 2.387267, ppl: 10.883709 +epoch: 2, batch: 14330, sum loss: 4384.697266, avg loss: 2.524293, ppl: 12.482070 +epoch: 2, batch: 14331, sum loss: 3992.985107, avg loss: 2.443687, ppl: 11.515425 +epoch: 2, batch: 14332, sum loss: 5117.305664, avg loss: 2.732144, ppl: 15.365798 +epoch: 2, batch: 14333, sum loss: 4749.168457, avg loss: 2.454350, ppl: 11.638871 +epoch: 2, batch: 14334, sum loss: 3900.212646, avg loss: 2.242790, ppl: 9.419580 +epoch: 2, batch: 14335, sum loss: 3387.832764, avg loss: 2.205620, ppl: 9.075879 +epoch: 2, batch: 14336, sum loss: 4948.896973, avg loss: 2.826326, ppl: 16.883320 +epoch: 2, batch: 14337, sum loss: 4287.297852, avg loss: 2.536863, ppl: 12.639953 +epoch: 2, batch: 14338, sum loss: 4440.303711, avg loss: 2.647766, ppl: 14.122456 +epoch: 2, batch: 14339, sum loss: 3712.347900, avg loss: 2.342175, ppl: 10.403843 +epoch: 2, batch: 14340, sum loss: 4702.784180, avg loss: 2.527020, ppl: 12.516152 +epoch: 2, batch: 14341, sum loss: 5090.067383, avg loss: 2.698869, ppl: 14.862919 +epoch: 2, batch: 14342, sum loss: 4284.044922, avg loss: 2.620211, ppl: 13.738621 +epoch: 2, batch: 14343, sum loss: 3353.789062, avg loss: 2.309772, ppl: 10.072128 +epoch: 2, batch: 14344, sum loss: 3703.873535, avg loss: 2.172360, ppl: 8.778977 +epoch: 2, batch: 14345, sum loss: 4818.479492, avg loss: 2.635930, ppl: 13.956284 +epoch: 2, batch: 14346, sum loss: 3580.302979, avg loss: 2.240490, ppl: 9.397935 +epoch: 2, batch: 14347, sum loss: 5123.385254, avg loss: 2.674001, ppl: 14.497855 +epoch: 2, batch: 14348, sum loss: 3459.218506, avg loss: 2.589236, ppl: 13.319586 +epoch: 2, batch: 14349, sum loss: 4300.499023, avg loss: 2.595352, ppl: 13.401309 +epoch: 2, batch: 14350, sum loss: 3412.285400, avg loss: 2.386214, ppl: 10.872249 +epoch: 2, batch: 14351, sum loss: 4381.946289, avg loss: 2.655725, ppl: 14.235303 +epoch: 2, batch: 14352, sum loss: 3938.644531, avg loss: 2.379846, ppl: 10.803235 +epoch: 2, batch: 14353, sum loss: 4339.226562, avg loss: 2.545001, ppl: 12.743241 +epoch: 2, batch: 14354, sum loss: 4052.488770, avg loss: 2.339774, ppl: 10.378892 +epoch: 2, batch: 14355, sum loss: 4549.397461, avg loss: 2.750543, ppl: 15.651122 +epoch: 2, batch: 14356, sum loss: 4041.070068, avg loss: 2.701250, ppl: 14.898344 +epoch: 2, batch: 14357, sum loss: 4059.172607, avg loss: 2.403299, ppl: 11.059606 +epoch: 2, batch: 14358, sum loss: 5324.452148, avg loss: 2.780393, ppl: 16.125351 +epoch: 2, batch: 14359, sum loss: 3751.844971, avg loss: 2.466696, ppl: 11.783453 +epoch: 2, batch: 14360, sum loss: 5034.429688, avg loss: 2.815677, ppl: 16.704475 +epoch: 2, batch: 14361, sum loss: 4514.288574, avg loss: 2.603396, ppl: 13.509538 +epoch: 2, batch: 14362, sum loss: 3485.343262, avg loss: 2.353372, ppl: 10.520983 +epoch: 2, batch: 14363, sum loss: 5049.346680, avg loss: 2.800525, ppl: 16.453285 +epoch: 2, batch: 14364, sum loss: 4461.580078, avg loss: 2.507915, ppl: 12.279295 +epoch: 2, batch: 14365, sum loss: 4288.645020, avg loss: 2.515334, ppl: 12.370745 +epoch: 2, batch: 14366, sum loss: 4907.563965, avg loss: 2.689076, ppl: 14.718073 +epoch: 2, batch: 14367, sum loss: 3436.787109, avg loss: 2.273007, ppl: 9.708554 +epoch: 2, batch: 14368, sum loss: 3921.620605, avg loss: 2.425245, ppl: 11.304996 +epoch: 2, batch: 14369, sum loss: 4668.458984, avg loss: 2.535828, ppl: 12.626880 +epoch: 2, batch: 14370, sum loss: 3579.543945, avg loss: 2.230245, ppl: 9.302148 +epoch: 2, batch: 14371, sum loss: 4404.090332, avg loss: 2.649874, ppl: 14.152255 +epoch: 2, batch: 14372, sum loss: 4206.911621, avg loss: 2.523642, ppl: 12.473945 +epoch: 2, batch: 14373, sum loss: 4001.382324, avg loss: 2.502428, ppl: 12.212107 +epoch: 2, batch: 14374, sum loss: 3803.123047, avg loss: 2.341825, ppl: 10.400195 +epoch: 2, batch: 14375, sum loss: 4772.107910, avg loss: 2.422390, ppl: 11.272766 +epoch: 2, batch: 14376, sum loss: 4163.745117, avg loss: 2.373857, ppl: 10.738729 +epoch: 2, batch: 14377, sum loss: 3851.042969, avg loss: 2.468617, ppl: 11.806113 +epoch: 2, batch: 14378, sum loss: 4220.959961, avg loss: 2.254786, ppl: 9.533257 +epoch: 2, batch: 14379, sum loss: 4400.490234, avg loss: 2.504548, ppl: 12.238021 +epoch: 2, batch: 14380, sum loss: 3887.780762, avg loss: 2.452858, ppl: 11.621519 +epoch: 2, batch: 14381, sum loss: 4872.691406, avg loss: 2.674364, ppl: 14.503124 +epoch: 2, batch: 14382, sum loss: 4024.141602, avg loss: 2.306098, ppl: 10.035193 +epoch: 2, batch: 14383, sum loss: 3840.539551, avg loss: 2.398838, ppl: 11.010375 +epoch: 2, batch: 14384, sum loss: 4864.491211, avg loss: 2.636580, ppl: 13.965366 +epoch: 2, batch: 14385, sum loss: 2935.958008, avg loss: 2.149310, ppl: 8.578939 +epoch: 2, batch: 14386, sum loss: 4329.757812, avg loss: 2.492664, ppl: 12.093454 +epoch: 2, batch: 14387, sum loss: 4010.562988, avg loss: 2.604262, ppl: 13.521238 +epoch: 2, batch: 14388, sum loss: 3999.812012, avg loss: 2.322771, ppl: 10.203911 +epoch: 2, batch: 14389, sum loss: 3572.030518, avg loss: 2.213154, ppl: 9.144513 +epoch: 2, batch: 14390, sum loss: 4149.493164, avg loss: 2.537916, ppl: 12.653276 +epoch: 2, batch: 14391, sum loss: 4237.143555, avg loss: 2.596289, ppl: 13.413866 +epoch: 2, batch: 14392, sum loss: 4942.700684, avg loss: 2.637514, ppl: 13.978405 +epoch: 2, batch: 14393, sum loss: 3883.580811, avg loss: 2.459519, ppl: 11.699184 +epoch: 2, batch: 14394, sum loss: 4458.054199, avg loss: 2.530110, ppl: 12.554892 +epoch: 2, batch: 14395, sum loss: 4229.708984, avg loss: 2.617394, ppl: 13.699978 +epoch: 2, batch: 14396, sum loss: 4545.797363, avg loss: 2.455860, ppl: 11.656458 +epoch: 2, batch: 14397, sum loss: 3181.099365, avg loss: 2.286916, ppl: 9.844525 +epoch: 2, batch: 14398, sum loss: 4536.546875, avg loss: 2.476281, ppl: 11.896939 +epoch: 2, batch: 14399, sum loss: 4129.820312, avg loss: 2.547699, ppl: 12.777668 +epoch: 2, batch: 14400, sum loss: 4288.805176, avg loss: 2.452147, ppl: 11.613254 +epoch: 2, batch: 14401, sum loss: 3445.023926, avg loss: 2.367714, ppl: 10.672965 +epoch: 2, batch: 14402, sum loss: 4253.541016, avg loss: 2.448786, ppl: 11.574287 +epoch: 2, batch: 14403, sum loss: 4249.990234, avg loss: 2.367683, ppl: 10.672632 +epoch: 2, batch: 14404, sum loss: 3663.294189, avg loss: 2.230995, ppl: 9.309128 +epoch: 2, batch: 14405, sum loss: 5099.487793, avg loss: 2.712493, ppl: 15.066796 +epoch: 2, batch: 14406, sum loss: 4055.901367, avg loss: 2.588323, ppl: 13.307431 +epoch: 2, batch: 14407, sum loss: 3817.577148, avg loss: 2.346390, ppl: 10.447787 +epoch: 2, batch: 14408, sum loss: 3817.375244, avg loss: 2.375467, ppl: 10.756033 +epoch: 2, batch: 14409, sum loss: 4826.454590, avg loss: 2.641738, ppl: 14.037576 +epoch: 2, batch: 14410, sum loss: 4752.100586, avg loss: 2.780632, ppl: 16.129215 +epoch: 2, batch: 14411, sum loss: 4585.859375, avg loss: 2.702333, ppl: 14.914490 +epoch: 2, batch: 14412, sum loss: 4716.681152, avg loss: 2.721685, ppl: 15.205929 +epoch: 2, batch: 14413, sum loss: 4425.840820, avg loss: 2.476688, ppl: 11.901776 +epoch: 2, batch: 14414, sum loss: 4582.404785, avg loss: 2.658008, ppl: 14.267834 +epoch: 2, batch: 14415, sum loss: 3916.947021, avg loss: 2.323219, ppl: 10.208481 +epoch: 2, batch: 14416, sum loss: 4783.922852, avg loss: 2.544640, ppl: 12.738639 +epoch: 2, batch: 14417, sum loss: 4234.752441, avg loss: 2.631916, ppl: 13.900374 +epoch: 2, batch: 14418, sum loss: 5020.010254, avg loss: 2.618680, ppl: 13.717611 +epoch: 2, batch: 14419, sum loss: 3535.261475, avg loss: 2.416447, ppl: 11.205970 +epoch: 2, batch: 14420, sum loss: 4189.069336, avg loss: 2.529631, ppl: 12.548877 +epoch: 2, batch: 14421, sum loss: 2831.768555, avg loss: 2.265415, ppl: 9.635122 +epoch: 2, batch: 14422, sum loss: 3549.172119, avg loss: 2.294229, ppl: 9.916787 +epoch: 2, batch: 14423, sum loss: 4291.666504, avg loss: 2.572942, ppl: 13.104315 +epoch: 2, batch: 14424, sum loss: 4526.602051, avg loss: 2.604489, ppl: 13.524314 +epoch: 2, batch: 14425, sum loss: 4110.346680, avg loss: 2.398102, ppl: 11.002275 +epoch: 2, batch: 14426, sum loss: 4730.084961, avg loss: 2.751649, ppl: 15.668454 +epoch: 2, batch: 14427, sum loss: 4505.946289, avg loss: 2.589624, ppl: 13.324766 +epoch: 2, batch: 14428, sum loss: 4750.996582, avg loss: 2.851738, ppl: 17.317862 +epoch: 2, batch: 14429, sum loss: 4013.575684, avg loss: 2.380531, ppl: 10.810645 +epoch: 2, batch: 14430, sum loss: 4743.907227, avg loss: 2.636969, ppl: 13.970792 +epoch: 2, batch: 14431, sum loss: 4520.158691, avg loss: 2.615833, ppl: 13.678600 +epoch: 2, batch: 14432, sum loss: 4524.560059, avg loss: 2.556249, ppl: 12.887382 +epoch: 2, batch: 14433, sum loss: 4483.069336, avg loss: 2.597375, ppl: 13.428441 +epoch: 2, batch: 14434, sum loss: 4225.001953, avg loss: 2.691084, ppl: 14.747656 +epoch: 2, batch: 14435, sum loss: 3903.425293, avg loss: 2.396209, ppl: 10.981472 +epoch: 2, batch: 14436, sum loss: 4774.303711, avg loss: 2.869173, ppl: 17.622438 +epoch: 2, batch: 14437, sum loss: 4209.503906, avg loss: 2.523684, ppl: 12.474463 +epoch: 2, batch: 14438, sum loss: 4387.414062, avg loss: 2.307950, ppl: 10.053799 +epoch: 2, batch: 14439, sum loss: 4064.214355, avg loss: 2.458690, ppl: 11.689485 +epoch: 2, batch: 14440, sum loss: 4738.712891, avg loss: 2.606553, ppl: 13.552254 +epoch: 2, batch: 14441, sum loss: 5310.037109, avg loss: 2.825991, ppl: 16.877665 +epoch: 2, batch: 14442, sum loss: 4027.101074, avg loss: 2.330498, ppl: 10.283064 +epoch: 2, batch: 14443, sum loss: 4161.071289, avg loss: 2.427696, ppl: 11.332744 +epoch: 2, batch: 14444, sum loss: 3797.341309, avg loss: 2.531561, ppl: 12.573116 +epoch: 2, batch: 14445, sum loss: 4953.899902, avg loss: 2.809926, ppl: 16.608694 +epoch: 2, batch: 14446, sum loss: 4452.675781, avg loss: 2.450565, ppl: 11.594892 +epoch: 2, batch: 14447, sum loss: 4119.158691, avg loss: 2.441706, ppl: 11.492636 +epoch: 2, batch: 14448, sum loss: 3745.405273, avg loss: 2.400901, ppl: 11.033111 +epoch: 2, batch: 14449, sum loss: 4659.270020, avg loss: 2.691664, ppl: 14.756206 +epoch: 2, batch: 14450, sum loss: 3345.711670, avg loss: 2.057633, ppl: 7.827422 +epoch: 2, batch: 14451, sum loss: 3904.061768, avg loss: 2.538402, ppl: 12.659420 +epoch: 2, batch: 14452, sum loss: 4180.842773, avg loss: 2.426490, ppl: 11.319086 +epoch: 2, batch: 14453, sum loss: 5644.771484, avg loss: 2.888829, ppl: 17.972248 +epoch: 2, batch: 14454, sum loss: 4418.946777, avg loss: 2.522230, ppl: 12.456346 +epoch: 2, batch: 14455, sum loss: 3569.231445, avg loss: 2.444679, ppl: 11.526849 +epoch: 2, batch: 14456, sum loss: 4895.172852, avg loss: 2.737792, ppl: 15.452835 +epoch: 2, batch: 14457, sum loss: 3762.110352, avg loss: 2.346918, ppl: 10.453306 +epoch: 2, batch: 14458, sum loss: 5107.520996, avg loss: 2.814061, ppl: 16.677511 +epoch: 2, batch: 14459, sum loss: 4868.118164, avg loss: 2.402822, ppl: 11.054333 +epoch: 2, batch: 14460, sum loss: 3843.818848, avg loss: 2.457685, ppl: 11.677743 +epoch: 2, batch: 14461, sum loss: 4444.016113, avg loss: 2.462059, ppl: 11.728934 +epoch: 2, batch: 14462, sum loss: 4152.173828, avg loss: 2.411251, ppl: 11.147897 +epoch: 2, batch: 14463, sum loss: 4090.838379, avg loss: 2.420614, ppl: 11.252771 +epoch: 2, batch: 14464, sum loss: 3708.989258, avg loss: 2.362414, ppl: 10.616545 +epoch: 2, batch: 14465, sum loss: 3665.640137, avg loss: 2.235147, ppl: 9.347852 +epoch: 2, batch: 14466, sum loss: 5290.052246, avg loss: 2.510704, ppl: 12.313590 +epoch: 2, batch: 14467, sum loss: 3390.817383, avg loss: 2.305110, ppl: 10.025286 +epoch: 2, batch: 14468, sum loss: 4071.583984, avg loss: 2.286123, ppl: 9.836722 +epoch: 2, batch: 14469, sum loss: 4502.758301, avg loss: 2.793274, ppl: 16.334417 +epoch: 2, batch: 14470, sum loss: 4107.590332, avg loss: 2.434849, ppl: 11.414095 +epoch: 2, batch: 14471, sum loss: 4214.389648, avg loss: 2.399994, ppl: 11.023111 +epoch: 2, batch: 14472, sum loss: 5011.104004, avg loss: 2.651378, ppl: 14.173555 +epoch: 2, batch: 14473, sum loss: 4237.143066, avg loss: 2.593111, ppl: 13.371303 +epoch: 2, batch: 14474, sum loss: 4062.604492, avg loss: 2.425436, ppl: 11.307156 +epoch: 2, batch: 14475, sum loss: 4025.793945, avg loss: 2.219291, ppl: 9.200808 +epoch: 2, batch: 14476, sum loss: 4646.141602, avg loss: 2.565512, ppl: 13.007316 +epoch: 2, batch: 14477, sum loss: 4216.156738, avg loss: 2.438494, ppl: 11.455780 +epoch: 2, batch: 14478, sum loss: 3930.296387, avg loss: 2.552140, ppl: 12.834546 +epoch: 2, batch: 14479, sum loss: 4816.675293, avg loss: 2.566156, ppl: 13.015701 +epoch: 2, batch: 14480, sum loss: 5243.284668, avg loss: 2.882509, ppl: 17.859034 +epoch: 2, batch: 14481, sum loss: 4017.760986, avg loss: 2.318385, ppl: 10.159253 +epoch: 2, batch: 14482, sum loss: 3872.485352, avg loss: 2.303679, ppl: 10.010950 +epoch: 2, batch: 14483, sum loss: 3393.862305, avg loss: 2.213870, ppl: 9.151061 +epoch: 2, batch: 14484, sum loss: 4538.821289, avg loss: 2.473472, ppl: 11.863567 +epoch: 2, batch: 14485, sum loss: 4613.880859, avg loss: 2.772765, ppl: 16.002823 +epoch: 2, batch: 14486, sum loss: 4153.776855, avg loss: 2.387228, ppl: 10.883284 +epoch: 2, batch: 14487, sum loss: 4415.925781, avg loss: 2.636374, ppl: 13.962481 +epoch: 2, batch: 14488, sum loss: 4023.558594, avg loss: 2.497554, ppl: 12.152727 +epoch: 2, batch: 14489, sum loss: 3582.565918, avg loss: 2.332400, ppl: 10.302636 +epoch: 2, batch: 14490, sum loss: 3819.839355, avg loss: 2.528021, ppl: 12.528683 +epoch: 2, batch: 14491, sum loss: 3530.207031, avg loss: 2.565557, ppl: 13.007908 +epoch: 2, batch: 14492, sum loss: 4810.814453, avg loss: 2.585070, ppl: 13.264213 +epoch: 2, batch: 14493, sum loss: 4029.592285, avg loss: 2.640624, ppl: 14.021952 +epoch: 2, batch: 14494, sum loss: 4578.393066, avg loss: 2.500488, ppl: 12.188438 +epoch: 2, batch: 14495, sum loss: 3819.349609, avg loss: 2.480097, ppl: 11.942424 +epoch: 2, batch: 14496, sum loss: 5308.543945, avg loss: 2.555871, ppl: 12.882516 +epoch: 2, batch: 14497, sum loss: 3316.649414, avg loss: 2.365656, ppl: 10.651023 +epoch: 2, batch: 14498, sum loss: 4071.855713, avg loss: 2.562527, ppl: 12.968550 +epoch: 2, batch: 14499, sum loss: 4845.511230, avg loss: 2.596737, ppl: 13.419876 +epoch: 2, batch: 14500, sum loss: 5089.990234, avg loss: 2.840396, ppl: 17.122551 +epoch: 2, batch: 14501, sum loss: 3077.500488, avg loss: 2.034039, ppl: 7.644900 +epoch: 2, batch: 14502, sum loss: 5687.438477, avg loss: 2.843719, ppl: 17.179546 +epoch: 2, batch: 14503, sum loss: 3923.889160, avg loss: 2.466304, ppl: 11.778830 +epoch: 2, batch: 14504, sum loss: 3561.165039, avg loss: 2.355268, ppl: 10.540954 +epoch: 2, batch: 14505, sum loss: 4451.583496, avg loss: 2.678450, ppl: 14.562502 +epoch: 2, batch: 14506, sum loss: 3665.626953, avg loss: 2.381824, ppl: 10.824629 +epoch: 2, batch: 14507, sum loss: 3701.026123, avg loss: 2.423724, ppl: 11.287813 +epoch: 2, batch: 14508, sum loss: 3678.026611, avg loss: 2.264795, ppl: 9.629147 +epoch: 2, batch: 14509, sum loss: 4064.745361, avg loss: 2.359109, ppl: 10.581523 +epoch: 2, batch: 14510, sum loss: 5160.501953, avg loss: 2.962401, ppl: 19.344355 +epoch: 2, batch: 14511, sum loss: 3998.231445, avg loss: 2.488010, ppl: 12.037294 +epoch: 2, batch: 14512, sum loss: 4172.667969, avg loss: 2.707766, ppl: 14.995742 +epoch: 2, batch: 14513, sum loss: 4284.570801, avg loss: 2.652985, ppl: 14.196353 +epoch: 2, batch: 14514, sum loss: 4219.193359, avg loss: 2.512920, ppl: 12.340918 +epoch: 2, batch: 14515, sum loss: 4163.611328, avg loss: 2.625228, ppl: 13.807721 +epoch: 2, batch: 14516, sum loss: 4082.882568, avg loss: 2.298920, ppl: 9.963420 +epoch: 2, batch: 14517, sum loss: 3953.873779, avg loss: 2.599523, ppl: 13.457312 +epoch: 2, batch: 14518, sum loss: 3921.704590, avg loss: 2.381120, ppl: 10.817011 +epoch: 2, batch: 14519, sum loss: 4179.593262, avg loss: 2.302806, ppl: 10.002213 +epoch: 2, batch: 14520, sum loss: 3235.211914, avg loss: 2.132638, ppl: 8.437095 +epoch: 2, batch: 14521, sum loss: 4855.309570, avg loss: 2.651726, ppl: 14.178486 +epoch: 2, batch: 14522, sum loss: 3979.126221, avg loss: 2.331064, ppl: 10.288883 +epoch: 2, batch: 14523, sum loss: 4673.583008, avg loss: 2.818808, ppl: 16.756859 +epoch: 2, batch: 14524, sum loss: 3988.083496, avg loss: 2.289371, ppl: 9.868724 +epoch: 2, batch: 14525, sum loss: 4336.272461, avg loss: 2.456812, ppl: 11.667552 +epoch: 2, batch: 14526, sum loss: 4049.065186, avg loss: 2.403006, ppl: 11.056363 +epoch: 2, batch: 14527, sum loss: 3762.591797, avg loss: 2.367899, ppl: 10.674943 +epoch: 2, batch: 14528, sum loss: 3511.198730, avg loss: 2.445124, ppl: 11.531984 +epoch: 2, batch: 14529, sum loss: 4810.806152, avg loss: 2.636058, ppl: 13.958074 +epoch: 2, batch: 14530, sum loss: 3895.307129, avg loss: 2.470074, ppl: 11.823327 +epoch: 2, batch: 14531, sum loss: 4173.442871, avg loss: 2.690808, ppl: 14.743578 +epoch: 2, batch: 14532, sum loss: 4346.801758, avg loss: 2.539020, ppl: 12.667246 +epoch: 2, batch: 14533, sum loss: 4134.550781, avg loss: 2.552192, ppl: 12.835207 +epoch: 2, batch: 14534, sum loss: 3729.487549, avg loss: 2.375470, ppl: 10.756066 +epoch: 2, batch: 14535, sum loss: 3771.381348, avg loss: 2.368958, ppl: 10.686252 +epoch: 2, batch: 14536, sum loss: 4465.844727, avg loss: 2.608554, ppl: 13.579403 +epoch: 2, batch: 14537, sum loss: 4015.458008, avg loss: 2.528626, ppl: 12.536272 +epoch: 2, batch: 14538, sum loss: 5069.505859, avg loss: 2.821094, ppl: 16.795216 +epoch: 2, batch: 14539, sum loss: 4662.544434, avg loss: 2.588864, ppl: 13.314639 +epoch: 2, batch: 14540, sum loss: 4307.493652, avg loss: 2.312128, ppl: 10.095881 +epoch: 2, batch: 14541, sum loss: 4388.184570, avg loss: 2.608909, ppl: 13.584218 +epoch: 2, batch: 14542, sum loss: 5024.910156, avg loss: 2.688555, ppl: 14.710411 +epoch: 2, batch: 14543, sum loss: 3987.258057, avg loss: 2.389010, ppl: 10.902699 +epoch: 2, batch: 14544, sum loss: 3530.703125, avg loss: 2.330497, ppl: 10.283053 +epoch: 2, batch: 14545, sum loss: 5287.577637, avg loss: 2.751081, ppl: 15.659554 +epoch: 2, batch: 14546, sum loss: 3783.239014, avg loss: 2.360099, ppl: 10.592001 +epoch: 2, batch: 14547, sum loss: 4163.000488, avg loss: 2.724477, ppl: 15.248434 +epoch: 2, batch: 14548, sum loss: 4416.832031, avg loss: 2.560483, ppl: 12.942060 +epoch: 2, batch: 14549, sum loss: 4065.130127, avg loss: 2.480250, ppl: 11.944252 +epoch: 2, batch: 14550, sum loss: 4380.795898, avg loss: 2.815422, ppl: 16.700214 +epoch: 2, batch: 14551, sum loss: 4513.663574, avg loss: 2.560218, ppl: 12.938633 +epoch: 2, batch: 14552, sum loss: 4442.194824, avg loss: 2.584174, ppl: 13.252337 +epoch: 2, batch: 14553, sum loss: 2858.820068, avg loss: 2.170706, ppl: 8.764470 +epoch: 2, batch: 14554, sum loss: 4264.951660, avg loss: 2.534136, ppl: 12.605538 +epoch: 2, batch: 14555, sum loss: 4624.180664, avg loss: 2.457057, ppl: 11.670412 +epoch: 2, batch: 14556, sum loss: 3715.499268, avg loss: 2.251818, ppl: 9.504997 +epoch: 2, batch: 14557, sum loss: 4396.458984, avg loss: 2.460246, ppl: 11.707686 +epoch: 2, batch: 14558, sum loss: 5044.213867, avg loss: 2.750389, ppl: 15.648720 +epoch: 2, batch: 14559, sum loss: 4046.897217, avg loss: 2.337895, ppl: 10.359411 +epoch: 2, batch: 14560, sum loss: 3696.744629, avg loss: 2.375800, ppl: 10.759616 +epoch: 2, batch: 14561, sum loss: 3941.989746, avg loss: 2.387638, ppl: 10.887745 +epoch: 2, batch: 14562, sum loss: 3804.231445, avg loss: 2.361410, ppl: 10.605897 +epoch: 2, batch: 14563, sum loss: 4670.013672, avg loss: 2.542196, ppl: 12.707546 +epoch: 2, batch: 14564, sum loss: 3718.766602, avg loss: 2.494143, ppl: 12.111347 +epoch: 2, batch: 14565, sum loss: 4669.488770, avg loss: 2.653119, ppl: 14.198249 +epoch: 2, batch: 14566, sum loss: 3386.231934, avg loss: 2.367995, ppl: 10.675961 +epoch: 2, batch: 14567, sum loss: 3397.373291, avg loss: 2.258892, ppl: 9.572475 +epoch: 2, batch: 14568, sum loss: 3059.529785, avg loss: 2.307338, ppl: 10.047640 +epoch: 2, batch: 14569, sum loss: 3787.521240, avg loss: 2.619309, ppl: 13.726241 +epoch: 2, batch: 14570, sum loss: 4011.328125, avg loss: 2.499270, ppl: 12.173600 +epoch: 2, batch: 14571, sum loss: 3751.509033, avg loss: 2.357957, ppl: 10.569332 +epoch: 2, batch: 14572, sum loss: 5195.139648, avg loss: 2.744395, ppl: 15.555197 +epoch: 2, batch: 14573, sum loss: 4067.545410, avg loss: 2.363478, ppl: 10.627848 +epoch: 2, batch: 14574, sum loss: 3114.877197, avg loss: 2.260433, ppl: 9.587244 +epoch: 2, batch: 14575, sum loss: 3659.169434, avg loss: 2.251796, ppl: 9.504796 +epoch: 2, batch: 14576, sum loss: 4571.625488, avg loss: 2.442108, ppl: 11.497248 +epoch: 2, batch: 14577, sum loss: 3578.559082, avg loss: 2.264911, ppl: 9.630267 +epoch: 2, batch: 14578, sum loss: 4136.864746, avg loss: 2.510233, ppl: 12.307802 +epoch: 2, batch: 14579, sum loss: 3669.784668, avg loss: 2.233588, ppl: 9.333291 +epoch: 2, batch: 14580, sum loss: 3679.488281, avg loss: 2.324377, ppl: 10.220306 +epoch: 2, batch: 14581, sum loss: 4266.691406, avg loss: 2.838783, ppl: 17.094954 +epoch: 2, batch: 14582, sum loss: 4819.769043, avg loss: 2.484417, ppl: 11.994128 +epoch: 2, batch: 14583, sum loss: 3819.301270, avg loss: 2.304950, ppl: 10.023672 +epoch: 2, batch: 14584, sum loss: 3855.001221, avg loss: 2.419963, ppl: 11.245444 +epoch: 2, batch: 14585, sum loss: 5074.125488, avg loss: 2.836292, ppl: 17.052410 +epoch: 2, batch: 14586, sum loss: 3959.243408, avg loss: 2.497945, ppl: 12.157489 +epoch: 2, batch: 14587, sum loss: 4654.173340, avg loss: 2.696508, ppl: 14.827865 +epoch: 2, batch: 14588, sum loss: 4343.944336, avg loss: 2.587221, ppl: 13.292782 +epoch: 2, batch: 14589, sum loss: 4189.117188, avg loss: 2.435533, ppl: 11.421908 +epoch: 2, batch: 14590, sum loss: 4996.732422, avg loss: 2.624334, ppl: 13.795388 +epoch: 2, batch: 14591, sum loss: 4631.206543, avg loss: 2.432357, ppl: 11.385682 +epoch: 2, batch: 14592, sum loss: 4148.677246, avg loss: 2.657705, ppl: 14.263515 +epoch: 2, batch: 14593, sum loss: 4039.618652, avg loss: 2.576287, ppl: 13.148232 +epoch: 2, batch: 14594, sum loss: 4746.208984, avg loss: 2.699778, ppl: 14.876423 +epoch: 2, batch: 14595, sum loss: 3836.459473, avg loss: 2.381415, ppl: 10.820204 +epoch: 2, batch: 14596, sum loss: 3700.528320, avg loss: 2.211912, ppl: 9.133160 +epoch: 2, batch: 14597, sum loss: 4272.693848, avg loss: 2.624505, ppl: 13.797740 +epoch: 2, batch: 14598, sum loss: 4129.340820, avg loss: 2.533338, ppl: 12.595478 +epoch: 2, batch: 14599, sum loss: 4551.672852, avg loss: 2.760262, ppl: 15.803990 +epoch: 2, batch: 14600, sum loss: 4223.094238, avg loss: 2.360589, ppl: 10.597194 +epoch: 2, batch: 14601, sum loss: 4617.021973, avg loss: 2.636792, ppl: 13.968317 +epoch: 2, batch: 14602, sum loss: 4518.849121, avg loss: 2.437351, ppl: 11.442689 +epoch: 2, batch: 14603, sum loss: 4206.463867, avg loss: 2.520350, ppl: 12.432947 +epoch: 2, batch: 14604, sum loss: 3700.132568, avg loss: 2.293945, ppl: 9.913967 +epoch: 2, batch: 14605, sum loss: 3838.212646, avg loss: 2.353288, ppl: 10.520103 +epoch: 2, batch: 14606, sum loss: 4349.626465, avg loss: 2.607690, ppl: 13.567672 +epoch: 2, batch: 14607, sum loss: 3094.195801, avg loss: 2.247056, ppl: 9.459843 +epoch: 2, batch: 14608, sum loss: 4311.556641, avg loss: 2.446968, ppl: 11.553260 +epoch: 2, batch: 14609, sum loss: 4074.044434, avg loss: 2.563905, ppl: 12.986424 +epoch: 2, batch: 14610, sum loss: 4971.567383, avg loss: 2.638836, ppl: 13.996907 +epoch: 2, batch: 14611, sum loss: 4504.082031, avg loss: 2.524710, ppl: 12.487269 +epoch: 2, batch: 14612, sum loss: 4473.383789, avg loss: 2.648540, ppl: 14.133386 +epoch: 2, batch: 14613, sum loss: 3513.540771, avg loss: 2.337685, ppl: 10.357233 +epoch: 2, batch: 14614, sum loss: 3831.048096, avg loss: 2.262876, ppl: 9.610685 +epoch: 2, batch: 14615, sum loss: 4344.833496, avg loss: 2.332170, ppl: 10.300271 +epoch: 2, batch: 14616, sum loss: 4840.338867, avg loss: 2.819067, ppl: 16.761213 +epoch: 2, batch: 14617, sum loss: 3200.952881, avg loss: 2.047955, ppl: 7.752028 +epoch: 2, batch: 14618, sum loss: 3636.224121, avg loss: 2.578882, ppl: 13.182398 +epoch: 2, batch: 14619, sum loss: 4521.454102, avg loss: 2.748604, ppl: 15.620814 +epoch: 2, batch: 14620, sum loss: 5143.832520, avg loss: 2.781954, ppl: 16.150545 +epoch: 2, batch: 14621, sum loss: 4034.248535, avg loss: 2.388543, ppl: 10.897601 +epoch: 2, batch: 14622, sum loss: 3895.461914, avg loss: 2.282052, ppl: 9.796759 +epoch: 2, batch: 14623, sum loss: 3509.105957, avg loss: 2.302563, ppl: 9.999779 +epoch: 2, batch: 14624, sum loss: 4161.661133, avg loss: 2.660909, ppl: 14.309286 +epoch: 2, batch: 14625, sum loss: 4524.614258, avg loss: 2.481961, ppl: 11.964702 +epoch: 2, batch: 14626, sum loss: 4856.581543, avg loss: 2.705616, ppl: 14.963538 +epoch: 2, batch: 14627, sum loss: 3889.034180, avg loss: 2.577226, ppl: 13.160583 +epoch: 2, batch: 14628, sum loss: 3641.913330, avg loss: 2.496171, ppl: 12.135937 +epoch: 2, batch: 14629, sum loss: 4938.913574, avg loss: 2.681278, ppl: 14.603742 +epoch: 2, batch: 14630, sum loss: 3637.354492, avg loss: 2.579684, ppl: 13.192972 +epoch: 2, batch: 14631, sum loss: 4378.384766, avg loss: 2.584643, ppl: 13.258551 +epoch: 2, batch: 14632, sum loss: 4534.551270, avg loss: 2.380342, ppl: 10.808597 +epoch: 2, batch: 14633, sum loss: 4477.810059, avg loss: 2.609447, ppl: 13.591526 +epoch: 2, batch: 14634, sum loss: 3736.127441, avg loss: 2.303408, ppl: 10.008229 +epoch: 2, batch: 14635, sum loss: 3599.484619, avg loss: 2.253904, ppl: 9.524849 +epoch: 2, batch: 14636, sum loss: 3561.469238, avg loss: 2.339993, ppl: 10.381164 +epoch: 2, batch: 14637, sum loss: 4754.863770, avg loss: 2.568808, ppl: 13.050261 +epoch: 2, batch: 14638, sum loss: 3539.104004, avg loss: 2.343777, ppl: 10.420525 +epoch: 2, batch: 14639, sum loss: 4442.433594, avg loss: 2.634895, ppl: 13.941853 +epoch: 2, batch: 14640, sum loss: 4895.615234, avg loss: 2.590273, ppl: 13.333406 +epoch: 2, batch: 14641, sum loss: 4551.637207, avg loss: 2.627966, ppl: 13.845578 +epoch: 2, batch: 14642, sum loss: 4521.297852, avg loss: 2.681671, ppl: 14.609491 +epoch: 2, batch: 14643, sum loss: 5144.331055, avg loss: 2.720429, ppl: 15.186835 +epoch: 2, batch: 14644, sum loss: 4077.343506, avg loss: 2.365049, ppl: 10.644557 +epoch: 2, batch: 14645, sum loss: 5000.878418, avg loss: 2.595163, ppl: 13.398767 +epoch: 2, batch: 14646, sum loss: 3670.823242, avg loss: 2.478611, ppl: 11.924692 +epoch: 2, batch: 14647, sum loss: 4245.952148, avg loss: 2.380018, ppl: 10.805099 +epoch: 2, batch: 14648, sum loss: 4541.550293, avg loss: 2.596655, ppl: 13.418782 +epoch: 2, batch: 14649, sum loss: 4232.418945, avg loss: 2.396613, ppl: 10.985906 +epoch: 2, batch: 14650, sum loss: 4344.911621, avg loss: 2.592429, ppl: 13.362194 +epoch: 2, batch: 14651, sum loss: 4423.617188, avg loss: 2.299177, ppl: 9.965981 +epoch: 2, batch: 14652, sum loss: 4104.575195, avg loss: 2.423008, ppl: 11.279735 +epoch: 2, batch: 14653, sum loss: 4180.958008, avg loss: 2.572897, ppl: 13.103734 +epoch: 2, batch: 14654, sum loss: 4714.098145, avg loss: 2.552300, ppl: 12.836596 +epoch: 2, batch: 14655, sum loss: 3618.209229, avg loss: 2.259968, ppl: 9.582786 +epoch: 2, batch: 14656, sum loss: 4322.322266, avg loss: 2.448908, ppl: 11.575698 +epoch: 2, batch: 14657, sum loss: 4892.488281, avg loss: 2.628957, ppl: 13.859304 +epoch: 2, batch: 14658, sum loss: 3280.659668, avg loss: 2.430118, ppl: 11.360227 +epoch: 2, batch: 14659, sum loss: 4116.485840, avg loss: 2.466439, ppl: 11.780416 +epoch: 2, batch: 14660, sum loss: 4124.656250, avg loss: 2.323750, ppl: 10.213902 +epoch: 2, batch: 14661, sum loss: 3721.401367, avg loss: 2.359798, ppl: 10.588812 +epoch: 2, batch: 14662, sum loss: 5121.173340, avg loss: 2.654833, ppl: 14.222611 +epoch: 2, batch: 14663, sum loss: 4272.150391, avg loss: 2.502724, ppl: 12.215730 +epoch: 2, batch: 14664, sum loss: 3370.068115, avg loss: 2.353400, ppl: 10.521278 +epoch: 2, batch: 14665, sum loss: 3641.014648, avg loss: 2.308824, ppl: 10.062580 +epoch: 2, batch: 14666, sum loss: 3176.256592, avg loss: 2.235226, ppl: 9.348596 +epoch: 2, batch: 14667, sum loss: 4748.329102, avg loss: 2.556989, ppl: 12.896929 +epoch: 2, batch: 14668, sum loss: 4176.987793, avg loss: 2.620444, ppl: 13.741824 +epoch: 2, batch: 14669, sum loss: 3861.864502, avg loss: 2.547404, ppl: 12.773900 +epoch: 2, batch: 14670, sum loss: 4623.824219, avg loss: 2.745739, ppl: 15.576120 +epoch: 2, batch: 14671, sum loss: 4630.239746, avg loss: 2.565230, ppl: 13.003644 +epoch: 2, batch: 14672, sum loss: 3585.273438, avg loss: 2.420846, ppl: 11.255383 +epoch: 2, batch: 14673, sum loss: 3405.298828, avg loss: 2.428887, ppl: 11.346243 +epoch: 2, batch: 14674, sum loss: 4504.347656, avg loss: 2.708567, ppl: 15.007760 +epoch: 2, batch: 14675, sum loss: 4433.556641, avg loss: 2.386198, ppl: 10.872083 +epoch: 2, batch: 14676, sum loss: 4361.848633, avg loss: 2.697495, ppl: 14.842497 +epoch: 2, batch: 14677, sum loss: 4081.169678, avg loss: 2.406350, ppl: 11.093398 +epoch: 2, batch: 14678, sum loss: 3083.128418, avg loss: 2.118989, ppl: 8.322715 +epoch: 2, batch: 14679, sum loss: 3887.833496, avg loss: 2.398417, ppl: 11.005735 +epoch: 2, batch: 14680, sum loss: 4702.333496, avg loss: 2.649202, ppl: 14.142746 +epoch: 2, batch: 14681, sum loss: 4568.218262, avg loss: 2.748627, ppl: 15.621172 +epoch: 2, batch: 14682, sum loss: 3760.834717, avg loss: 2.431050, ppl: 11.370818 +epoch: 2, batch: 14683, sum loss: 3308.074219, avg loss: 2.286160, ppl: 9.837095 +epoch: 2, batch: 14684, sum loss: 3942.718506, avg loss: 2.454993, ppl: 11.646349 +epoch: 2, batch: 14685, sum loss: 3763.879395, avg loss: 2.505912, ppl: 12.254725 +epoch: 2, batch: 14686, sum loss: 3861.021240, avg loss: 2.420703, ppl: 11.253767 +epoch: 2, batch: 14687, sum loss: 4151.497070, avg loss: 2.575370, ppl: 13.136181 +epoch: 2, batch: 14688, sum loss: 4288.636719, avg loss: 2.695560, ppl: 14.813819 +epoch: 2, batch: 14689, sum loss: 3921.240479, avg loss: 2.422014, ppl: 11.268531 +epoch: 2, batch: 14690, sum loss: 4319.481445, avg loss: 2.375952, ppl: 10.761258 +epoch: 2, batch: 14691, sum loss: 4524.401855, avg loss: 2.619804, ppl: 13.733037 +epoch: 2, batch: 14692, sum loss: 4286.326172, avg loss: 2.612021, ppl: 13.626558 +epoch: 2, batch: 14693, sum loss: 4224.370605, avg loss: 2.510024, ppl: 12.305226 +epoch: 2, batch: 14694, sum loss: 3988.121338, avg loss: 2.409741, ppl: 11.131080 +epoch: 2, batch: 14695, sum loss: 4146.615234, avg loss: 2.478551, ppl: 11.923970 +epoch: 2, batch: 14696, sum loss: 3800.986816, avg loss: 2.137788, ppl: 8.480659 +epoch: 2, batch: 14697, sum loss: 5142.933594, avg loss: 2.808811, ppl: 16.590187 +epoch: 2, batch: 14698, sum loss: 3774.951172, avg loss: 2.410569, ppl: 11.140298 +epoch: 2, batch: 14699, sum loss: 4397.555176, avg loss: 2.712866, ppl: 15.072405 +epoch: 2, batch: 14700, sum loss: 3602.474121, avg loss: 2.571359, ppl: 13.083595 +epoch: 2, batch: 14701, sum loss: 3845.980957, avg loss: 2.314068, ppl: 10.115492 +epoch: 2, batch: 14702, sum loss: 4214.388184, avg loss: 2.490773, ppl: 12.070605 +epoch: 2, batch: 14703, sum loss: 4585.914551, avg loss: 2.567701, ppl: 13.035825 +epoch: 2, batch: 14704, sum loss: 4741.282227, avg loss: 2.392171, ppl: 10.937209 +epoch: 2, batch: 14705, sum loss: 4234.024414, avg loss: 2.304858, ppl: 10.022757 +epoch: 2, batch: 14706, sum loss: 3986.221680, avg loss: 2.471309, ppl: 11.837934 +epoch: 2, batch: 14707, sum loss: 5065.979492, avg loss: 2.696104, ppl: 14.821874 +epoch: 2, batch: 14708, sum loss: 4472.500488, avg loss: 2.624707, ppl: 13.800529 +epoch: 2, batch: 14709, sum loss: 4383.748535, avg loss: 2.620292, ppl: 13.739738 +epoch: 2, batch: 14710, sum loss: 4936.227539, avg loss: 2.634059, ppl: 13.930204 +epoch: 2, batch: 14711, sum loss: 4260.832520, avg loss: 2.461486, ppl: 11.722219 +epoch: 2, batch: 14712, sum loss: 3708.223145, avg loss: 2.578737, ppl: 13.180474 +epoch: 2, batch: 14713, sum loss: 3402.658936, avg loss: 2.223960, ppl: 9.243866 +epoch: 2, batch: 14714, sum loss: 4267.968750, avg loss: 2.762439, ppl: 15.838433 +epoch: 2, batch: 14715, sum loss: 4034.132324, avg loss: 2.534003, ppl: 12.603856 +epoch: 2, batch: 14716, sum loss: 4412.433594, avg loss: 2.399366, ppl: 11.016189 +epoch: 2, batch: 14717, sum loss: 4241.329102, avg loss: 2.505215, ppl: 12.246194 +epoch: 2, batch: 14718, sum loss: 4217.037109, avg loss: 2.375795, ppl: 10.759567 +epoch: 2, batch: 14719, sum loss: 4043.363770, avg loss: 2.476034, ppl: 11.894001 +epoch: 2, batch: 14720, sum loss: 5661.292969, avg loss: 2.625832, ppl: 13.816059 +epoch: 2, batch: 14721, sum loss: 4617.001465, avg loss: 2.784681, ppl: 16.194656 +epoch: 2, batch: 14722, sum loss: 4784.603027, avg loss: 2.656637, ppl: 14.248287 +epoch: 2, batch: 14723, sum loss: 4472.340820, avg loss: 2.612349, ppl: 13.631026 +epoch: 2, batch: 14724, sum loss: 3672.980469, avg loss: 2.235533, ppl: 9.351462 +epoch: 2, batch: 14725, sum loss: 4022.413086, avg loss: 2.490658, ppl: 12.069219 +epoch: 2, batch: 14726, sum loss: 5053.038086, avg loss: 2.821350, ppl: 16.799517 +epoch: 2, batch: 14727, sum loss: 4948.242676, avg loss: 2.657488, ppl: 14.260424 +epoch: 2, batch: 14728, sum loss: 4076.233643, avg loss: 2.437939, ppl: 11.449418 +epoch: 2, batch: 14729, sum loss: 4394.307129, avg loss: 2.617217, ppl: 13.697548 +epoch: 2, batch: 14730, sum loss: 3352.552734, avg loss: 2.132667, ppl: 8.437340 +epoch: 2, batch: 14731, sum loss: 3796.584961, avg loss: 2.415130, ppl: 11.191229 +epoch: 2, batch: 14732, sum loss: 5651.904297, avg loss: 2.600968, ppl: 13.476782 +epoch: 2, batch: 14733, sum loss: 4330.667480, avg loss: 2.480337, ppl: 11.945285 +epoch: 2, batch: 14734, sum loss: 5077.135742, avg loss: 2.561622, ppl: 12.956821 +epoch: 2, batch: 14735, sum loss: 3452.020996, avg loss: 2.422471, ppl: 11.273680 +epoch: 2, batch: 14736, sum loss: 4114.002930, avg loss: 2.525477, ppl: 12.496860 +epoch: 2, batch: 14737, sum loss: 5178.909180, avg loss: 2.549931, ppl: 12.806218 +epoch: 2, batch: 14738, sum loss: 3575.292969, avg loss: 2.234558, ppl: 9.342353 +epoch: 2, batch: 14739, sum loss: 4084.096680, avg loss: 2.541442, ppl: 12.697964 +epoch: 2, batch: 14740, sum loss: 4031.731201, avg loss: 2.579483, ppl: 13.190311 +epoch: 2, batch: 14741, sum loss: 4856.795410, avg loss: 2.638129, ppl: 13.987009 +epoch: 2, batch: 14742, sum loss: 3788.087402, avg loss: 2.573429, ppl: 13.110705 +epoch: 2, batch: 14743, sum loss: 4646.265137, avg loss: 2.559926, ppl: 12.934857 +epoch: 2, batch: 14744, sum loss: 4546.181152, avg loss: 2.564118, ppl: 12.989199 +epoch: 2, batch: 14745, sum loss: 4385.562988, avg loss: 2.698808, ppl: 14.862005 +epoch: 2, batch: 14746, sum loss: 5218.645508, avg loss: 2.827002, ppl: 16.894732 +epoch: 2, batch: 14747, sum loss: 3927.839844, avg loss: 2.562192, ppl: 12.964200 +epoch: 2, batch: 14748, sum loss: 3814.047119, avg loss: 2.170772, ppl: 8.765051 +epoch: 2, batch: 14749, sum loss: 3042.209473, avg loss: 1.918165, ppl: 6.808453 +epoch: 2, batch: 14750, sum loss: 4497.416504, avg loss: 2.491643, ppl: 12.081114 +epoch: 2, batch: 14751, sum loss: 5465.972656, avg loss: 2.858772, ppl: 17.440102 +epoch: 2, batch: 14752, sum loss: 4714.509766, avg loss: 2.540145, ppl: 12.681515 +epoch: 2, batch: 14753, sum loss: 3855.125000, avg loss: 2.301567, ppl: 9.989827 +epoch: 2, batch: 14754, sum loss: 4238.522949, avg loss: 2.488857, ppl: 12.047496 +epoch: 2, batch: 14755, sum loss: 3984.784180, avg loss: 2.250019, ppl: 9.487919 +epoch: 2, batch: 14756, sum loss: 4305.534180, avg loss: 2.531178, ppl: 12.568306 +epoch: 2, batch: 14757, sum loss: 4268.255859, avg loss: 2.616956, ppl: 13.693982 +epoch: 2, batch: 14758, sum loss: 3660.680420, avg loss: 2.500465, ppl: 12.188156 +epoch: 2, batch: 14759, sum loss: 5001.318848, avg loss: 2.699039, ppl: 14.865436 +epoch: 2, batch: 14760, sum loss: 4234.490723, avg loss: 2.470531, ppl: 11.828732 +epoch: 2, batch: 14761, sum loss: 4694.263672, avg loss: 2.626896, ppl: 13.830778 +epoch: 2, batch: 14762, sum loss: 5272.276367, avg loss: 2.713472, ppl: 15.081553 +epoch: 2, batch: 14763, sum loss: 4200.741211, avg loss: 2.694510, ppl: 14.798269 +epoch: 2, batch: 14764, sum loss: 3685.380127, avg loss: 2.245814, ppl: 9.448099 +epoch: 2, batch: 14765, sum loss: 4022.731445, avg loss: 2.522089, ppl: 12.454584 +epoch: 2, batch: 14766, sum loss: 4059.430176, avg loss: 2.393532, ppl: 10.952109 +epoch: 2, batch: 14767, sum loss: 3620.008545, avg loss: 2.317547, ppl: 10.150744 +epoch: 2, batch: 14768, sum loss: 3673.807129, avg loss: 2.288976, ppl: 9.864835 +epoch: 2, batch: 14769, sum loss: 4195.545898, avg loss: 2.530486, ppl: 12.559610 +epoch: 2, batch: 14770, sum loss: 4712.741211, avg loss: 2.641671, ppl: 14.036642 +epoch: 2, batch: 14771, sum loss: 3415.416992, avg loss: 2.230841, ppl: 9.307688 +epoch: 2, batch: 14772, sum loss: 4276.423828, avg loss: 2.676110, ppl: 14.528468 +epoch: 2, batch: 14773, sum loss: 4384.854492, avg loss: 2.573272, ppl: 13.108640 +epoch: 2, batch: 14774, sum loss: 4716.114258, avg loss: 2.535545, ppl: 12.623313 +epoch: 2, batch: 14775, sum loss: 4197.512207, avg loss: 2.729202, ppl: 15.320653 +epoch: 2, batch: 14776, sum loss: 3146.828125, avg loss: 2.296955, ppl: 9.943856 +epoch: 2, batch: 14777, sum loss: 4721.834473, avg loss: 2.604432, ppl: 13.523537 +epoch: 2, batch: 14778, sum loss: 4106.707520, avg loss: 2.407214, ppl: 11.102987 +epoch: 2, batch: 14779, sum loss: 4673.476074, avg loss: 2.618194, ppl: 13.710940 +epoch: 2, batch: 14780, sum loss: 3025.734619, avg loss: 2.309721, ppl: 10.071614 +epoch: 2, batch: 14781, sum loss: 3843.221191, avg loss: 2.455732, ppl: 11.654966 +epoch: 2, batch: 14782, sum loss: 3706.734863, avg loss: 2.486073, ppl: 12.014008 +epoch: 2, batch: 14783, sum loss: 3868.389648, avg loss: 2.358774, ppl: 10.577977 +epoch: 2, batch: 14784, sum loss: 3872.273682, avg loss: 2.690948, ppl: 14.745642 +epoch: 2, batch: 14785, sum loss: 5445.127930, avg loss: 2.758423, ppl: 15.774951 +epoch: 2, batch: 14786, sum loss: 5090.435547, avg loss: 2.712006, ppl: 15.059456 +epoch: 2, batch: 14787, sum loss: 4232.867676, avg loss: 2.500217, ppl: 12.185141 +epoch: 2, batch: 14788, sum loss: 3603.040039, avg loss: 2.312606, ppl: 10.100714 +epoch: 2, batch: 14789, sum loss: 4603.028809, avg loss: 2.801600, ppl: 16.470980 +epoch: 2, batch: 14790, sum loss: 4681.884277, avg loss: 2.615578, ppl: 13.675115 +epoch: 2, batch: 14791, sum loss: 4407.255371, avg loss: 2.501280, ppl: 12.198098 +epoch: 2, batch: 14792, sum loss: 3624.431152, avg loss: 2.227677, ppl: 9.278291 +epoch: 2, batch: 14793, sum loss: 3791.664551, avg loss: 2.236970, ppl: 9.364914 +epoch: 2, batch: 14794, sum loss: 3626.419434, avg loss: 2.238531, ppl: 9.379539 +epoch: 2, batch: 14795, sum loss: 5023.182617, avg loss: 2.635458, ppl: 13.949697 +epoch: 2, batch: 14796, sum loss: 4571.998535, avg loss: 2.639722, ppl: 14.009310 +epoch: 2, batch: 14797, sum loss: 4173.057129, avg loss: 2.368364, ppl: 10.679907 +epoch: 2, batch: 14798, sum loss: 4305.322266, avg loss: 2.386542, ppl: 10.875824 +epoch: 2, batch: 14799, sum loss: 4804.306641, avg loss: 2.657249, ppl: 14.257017 +epoch: 2, batch: 14800, sum loss: 4730.406738, avg loss: 3.028430, ppl: 20.664753 +epoch: 2, batch: 14801, sum loss: 3364.692139, avg loss: 2.246123, ppl: 9.451021 +epoch: 2, batch: 14802, sum loss: 4657.778320, avg loss: 2.681507, ppl: 14.607095 +epoch: 2, batch: 14803, sum loss: 4767.471680, avg loss: 2.535889, ppl: 12.627654 +epoch: 2, batch: 14804, sum loss: 3636.710938, avg loss: 2.173766, ppl: 8.791333 +epoch: 2, batch: 14805, sum loss: 4489.918945, avg loss: 2.511140, ppl: 12.318970 +epoch: 2, batch: 14806, sum loss: 5488.629395, avg loss: 2.810358, ppl: 16.615866 +epoch: 2, batch: 14807, sum loss: 3583.458008, avg loss: 2.302994, ppl: 10.004086 +epoch: 2, batch: 14808, sum loss: 4188.503418, avg loss: 2.497617, ppl: 12.153495 +epoch: 2, batch: 14809, sum loss: 4209.650879, avg loss: 2.387777, ppl: 10.889261 +epoch: 2, batch: 14810, sum loss: 4459.780762, avg loss: 2.490106, ppl: 12.062559 +epoch: 2, batch: 14811, sum loss: 4229.342285, avg loss: 2.504051, ppl: 12.231948 +epoch: 2, batch: 14812, sum loss: 4482.151367, avg loss: 2.393033, ppl: 10.946648 +epoch: 2, batch: 14813, sum loss: 4107.838379, avg loss: 2.432113, ppl: 11.382905 +epoch: 2, batch: 14814, sum loss: 4956.002441, avg loss: 2.745708, ppl: 15.575634 +epoch: 2, batch: 14815, sum loss: 3799.885986, avg loss: 2.352871, ppl: 10.515714 +epoch: 2, batch: 14816, sum loss: 5376.571777, avg loss: 2.618886, ppl: 13.720424 +epoch: 2, batch: 14817, sum loss: 4379.964844, avg loss: 2.710374, ppl: 15.034903 +epoch: 2, batch: 14818, sum loss: 4801.408203, avg loss: 2.480066, ppl: 11.942053 +epoch: 2, batch: 14819, sum loss: 5007.901367, avg loss: 2.837338, ppl: 17.070263 +epoch: 2, batch: 14820, sum loss: 4574.363770, avg loss: 2.701928, ppl: 14.908446 +epoch: 2, batch: 14821, sum loss: 5092.229004, avg loss: 2.575735, ppl: 13.140977 +epoch: 2, batch: 14822, sum loss: 4021.479980, avg loss: 2.350368, ppl: 10.489432 +epoch: 2, batch: 14823, sum loss: 3509.044434, avg loss: 2.380627, ppl: 10.811682 +epoch: 2, batch: 14824, sum loss: 3728.186523, avg loss: 2.244543, ppl: 9.436105 +epoch: 2, batch: 14825, sum loss: 4915.562012, avg loss: 2.760001, ppl: 15.799862 +epoch: 2, batch: 14826, sum loss: 3936.192871, avg loss: 2.450929, ppl: 11.599122 +epoch: 2, batch: 14827, sum loss: 4014.729492, avg loss: 2.586810, ppl: 13.287322 +epoch: 2, batch: 14828, sum loss: 4583.653809, avg loss: 2.651043, ppl: 14.168815 +epoch: 2, batch: 14829, sum loss: 3523.822266, avg loss: 2.206526, ppl: 9.084105 +epoch: 2, batch: 14830, sum loss: 4618.363281, avg loss: 2.612196, ppl: 13.628953 +epoch: 2, batch: 14831, sum loss: 4948.528809, avg loss: 2.791049, ppl: 16.298100 +epoch: 2, batch: 14832, sum loss: 4511.715332, avg loss: 2.664923, ppl: 14.366850 +epoch: 2, batch: 14833, sum loss: 4201.336914, avg loss: 2.494856, ppl: 12.119984 +epoch: 2, batch: 14834, sum loss: 3494.017822, avg loss: 2.396446, ppl: 10.984067 +epoch: 2, batch: 14835, sum loss: 3524.473633, avg loss: 2.151693, ppl: 8.599406 +epoch: 2, batch: 14836, sum loss: 4257.104004, avg loss: 2.502707, ppl: 12.215511 +epoch: 2, batch: 14837, sum loss: 3098.802734, avg loss: 2.424728, ppl: 11.299157 +epoch: 2, batch: 14838, sum loss: 3722.220947, avg loss: 2.373865, ppl: 10.738822 +epoch: 2, batch: 14839, sum loss: 4725.668945, avg loss: 2.521702, ppl: 12.449763 +epoch: 2, batch: 14840, sum loss: 4034.767578, avg loss: 2.613192, ppl: 13.642523 +epoch: 2, batch: 14841, sum loss: 4097.125488, avg loss: 2.368281, ppl: 10.679015 +epoch: 2, batch: 14842, sum loss: 4086.077881, avg loss: 2.432189, ppl: 11.383777 +epoch: 2, batch: 14843, sum loss: 4349.250977, avg loss: 2.575045, ppl: 13.131909 +epoch: 2, batch: 14844, sum loss: 4465.482910, avg loss: 2.554624, ppl: 12.866465 +epoch: 2, batch: 14845, sum loss: 4439.371094, avg loss: 2.415327, ppl: 11.193431 +epoch: 2, batch: 14846, sum loss: 4099.000977, avg loss: 2.405517, ppl: 11.084161 +epoch: 2, batch: 14847, sum loss: 3645.139160, avg loss: 2.441486, ppl: 11.490107 +epoch: 2, batch: 14848, sum loss: 4334.016602, avg loss: 2.612427, ppl: 13.632095 +epoch: 2, batch: 14849, sum loss: 4238.745117, avg loss: 2.415239, ppl: 11.192449 +epoch: 2, batch: 14850, sum loss: 4149.214355, avg loss: 2.405342, ppl: 11.082218 +epoch: 2, batch: 14851, sum loss: 4171.500000, avg loss: 2.578183, ppl: 13.173180 +epoch: 2, batch: 14852, sum loss: 4375.632324, avg loss: 2.833959, ppl: 17.012678 +epoch: 2, batch: 14853, sum loss: 4220.730469, avg loss: 2.283945, ppl: 9.815327 +epoch: 2, batch: 14854, sum loss: 3674.121582, avg loss: 2.368873, ppl: 10.685339 +epoch: 2, batch: 14855, sum loss: 3845.653076, avg loss: 2.369472, ppl: 10.691746 +epoch: 2, batch: 14856, sum loss: 4241.688477, avg loss: 2.689720, ppl: 14.727554 +epoch: 2, batch: 14857, sum loss: 4896.666504, avg loss: 2.708333, ppl: 15.004247 +epoch: 2, batch: 14858, sum loss: 5242.429688, avg loss: 2.600412, ppl: 13.469281 +epoch: 2, batch: 14859, sum loss: 5244.750977, avg loss: 2.873836, ppl: 17.704809 +epoch: 2, batch: 14860, sum loss: 4095.735840, avg loss: 2.269106, ppl: 9.670748 +epoch: 2, batch: 14861, sum loss: 3906.082031, avg loss: 2.448954, ppl: 11.576236 +epoch: 2, batch: 14862, sum loss: 3745.362305, avg loss: 2.172484, ppl: 8.780068 +epoch: 2, batch: 14863, sum loss: 4419.926758, avg loss: 2.387859, ppl: 10.890152 +epoch: 2, batch: 14864, sum loss: 4726.479492, avg loss: 2.447685, ppl: 11.561551 +epoch: 2, batch: 14865, sum loss: 3492.155518, avg loss: 2.237127, ppl: 9.366385 +epoch: 2, batch: 14866, sum loss: 5020.018555, avg loss: 2.782715, ppl: 16.162849 +epoch: 2, batch: 14867, sum loss: 4948.659180, avg loss: 2.553488, ppl: 12.851851 +epoch: 2, batch: 14868, sum loss: 3790.934326, avg loss: 2.353156, ppl: 10.518716 +epoch: 2, batch: 14869, sum loss: 4403.888184, avg loss: 2.366410, ppl: 10.659053 +epoch: 2, batch: 14870, sum loss: 3389.187500, avg loss: 2.265500, ppl: 9.635938 +epoch: 2, batch: 14871, sum loss: 4064.794922, avg loss: 2.617383, ppl: 13.699818 +epoch: 2, batch: 14872, sum loss: 3562.507324, avg loss: 2.438403, ppl: 11.454737 +epoch: 2, batch: 14873, sum loss: 4355.943848, avg loss: 2.452671, ppl: 11.619341 +epoch: 2, batch: 14874, sum loss: 4416.300781, avg loss: 2.743044, ppl: 15.534198 +epoch: 2, batch: 14875, sum loss: 4357.670898, avg loss: 2.467537, ppl: 11.793365 +epoch: 2, batch: 14876, sum loss: 3941.710205, avg loss: 2.523502, ppl: 12.472199 +epoch: 2, batch: 14877, sum loss: 5213.660645, avg loss: 2.650564, ppl: 14.162030 +epoch: 2, batch: 14878, sum loss: 3731.495117, avg loss: 2.418338, ppl: 11.227182 +epoch: 2, batch: 14879, sum loss: 5407.333008, avg loss: 2.966173, ppl: 19.417465 +epoch: 2, batch: 14880, sum loss: 4076.651855, avg loss: 2.412220, ppl: 11.158706 +epoch: 2, batch: 14881, sum loss: 3334.903076, avg loss: 2.314298, ppl: 10.117817 +epoch: 2, batch: 14882, sum loss: 3914.768066, avg loss: 2.449792, ppl: 11.585941 +epoch: 2, batch: 14883, sum loss: 4433.614258, avg loss: 2.421417, ppl: 11.261806 +epoch: 2, batch: 14884, sum loss: 4486.616699, avg loss: 2.476058, ppl: 11.894281 +epoch: 2, batch: 14885, sum loss: 4012.453613, avg loss: 2.421517, ppl: 11.262931 +epoch: 2, batch: 14886, sum loss: 4126.497070, avg loss: 2.566229, ppl: 13.016650 +epoch: 2, batch: 14887, sum loss: 3789.576172, avg loss: 2.241027, ppl: 9.402982 +epoch: 2, batch: 14888, sum loss: 3627.561035, avg loss: 2.337346, ppl: 10.353722 +epoch: 2, batch: 14889, sum loss: 4777.250488, avg loss: 2.551950, ppl: 12.832105 +epoch: 2, batch: 14890, sum loss: 4714.395508, avg loss: 2.555228, ppl: 12.874231 +epoch: 2, batch: 14891, sum loss: 4930.934570, avg loss: 2.624233, ppl: 13.793997 +epoch: 2, batch: 14892, sum loss: 4502.252441, avg loss: 2.499862, ppl: 12.180819 +epoch: 2, batch: 14893, sum loss: 4915.630371, avg loss: 2.644233, ppl: 14.072654 +epoch: 2, batch: 14894, sum loss: 4631.600586, avg loss: 2.631591, ppl: 13.895862 +epoch: 2, batch: 14895, sum loss: 5695.396973, avg loss: 2.882286, ppl: 17.855045 +epoch: 2, batch: 14896, sum loss: 3959.345703, avg loss: 2.358157, ppl: 10.571449 +epoch: 2, batch: 14897, sum loss: 4091.998047, avg loss: 2.658868, ppl: 14.280116 +epoch: 2, batch: 14898, sum loss: 4754.275391, avg loss: 2.751317, ppl: 15.663243 +epoch: 2, batch: 14899, sum loss: 4359.935547, avg loss: 2.546691, ppl: 12.764797 +epoch: 2, batch: 14900, sum loss: 4326.569824, avg loss: 2.502354, ppl: 12.211205 +epoch: 2, batch: 14901, sum loss: 4185.802246, avg loss: 2.447838, ppl: 11.563315 +epoch: 2, batch: 14902, sum loss: 3788.952148, avg loss: 2.469982, ppl: 11.822233 +epoch: 2, batch: 14903, sum loss: 4349.313965, avg loss: 2.623229, ppl: 13.780151 +epoch: 2, batch: 14904, sum loss: 3955.198486, avg loss: 2.348693, ppl: 10.471873 +epoch: 2, batch: 14905, sum loss: 4437.963379, avg loss: 2.446507, ppl: 11.547939 +epoch: 2, batch: 14906, sum loss: 4175.787109, avg loss: 2.325049, ppl: 10.227178 +epoch: 2, batch: 14907, sum loss: 4427.627930, avg loss: 2.592288, ppl: 13.360305 +epoch: 2, batch: 14908, sum loss: 4167.139160, avg loss: 2.536299, ppl: 12.632827 +epoch: 2, batch: 14909, sum loss: 3811.935059, avg loss: 2.438858, ppl: 11.459947 +epoch: 2, batch: 14910, sum loss: 4328.617676, avg loss: 2.629780, ppl: 13.870719 +epoch: 2, batch: 14911, sum loss: 4164.462402, avg loss: 2.302080, ppl: 9.994949 +epoch: 2, batch: 14912, sum loss: 3699.653564, avg loss: 2.453351, ppl: 11.627245 +epoch: 2, batch: 14913, sum loss: 3785.898193, avg loss: 2.396138, ppl: 10.980689 +epoch: 2, batch: 14914, sum loss: 4954.420898, avg loss: 2.610338, ppl: 13.603644 +epoch: 2, batch: 14915, sum loss: 3232.601562, avg loss: 2.217148, ppl: 9.181108 +epoch: 2, batch: 14916, sum loss: 4727.621094, avg loss: 2.431904, ppl: 11.380531 +epoch: 2, batch: 14917, sum loss: 4564.255859, avg loss: 2.535698, ppl: 12.625237 +epoch: 2, batch: 14918, sum loss: 4523.532227, avg loss: 2.508892, ppl: 12.291305 +epoch: 2, batch: 14919, sum loss: 3635.045410, avg loss: 2.225992, ppl: 9.262669 +epoch: 2, batch: 14920, sum loss: 4864.771973, avg loss: 2.802288, ppl: 16.482317 +epoch: 2, batch: 14921, sum loss: 4390.553223, avg loss: 2.423043, ppl: 11.280128 +epoch: 2, batch: 14922, sum loss: 4067.670898, avg loss: 2.510908, ppl: 12.316110 +epoch: 2, batch: 14923, sum loss: 4143.408203, avg loss: 2.520321, ppl: 12.432591 +epoch: 2, batch: 14924, sum loss: 4133.698242, avg loss: 2.562739, ppl: 12.971299 +epoch: 2, batch: 14925, sum loss: 4360.173828, avg loss: 2.484429, ppl: 11.994274 +epoch: 2, batch: 14926, sum loss: 3771.822754, avg loss: 2.370725, ppl: 10.705147 +epoch: 2, batch: 14927, sum loss: 4537.458496, avg loss: 2.540570, ppl: 12.686904 +epoch: 2, batch: 14928, sum loss: 3628.162598, avg loss: 2.240990, ppl: 9.402635 +epoch: 2, batch: 14929, sum loss: 4913.842773, avg loss: 2.622115, ppl: 13.764801 +epoch: 2, batch: 14930, sum loss: 5337.368652, avg loss: 2.754060, ppl: 15.706274 +epoch: 2, batch: 14931, sum loss: 4278.721680, avg loss: 2.659243, ppl: 14.285469 +epoch: 2, batch: 14932, sum loss: 3532.911377, avg loss: 2.630612, ppl: 13.882258 +epoch: 2, batch: 14933, sum loss: 3261.270996, avg loss: 2.308047, ppl: 10.054771 +epoch: 2, batch: 14934, sum loss: 4711.376953, avg loss: 2.704579, ppl: 14.948024 +epoch: 2, batch: 14935, sum loss: 4408.458984, avg loss: 2.352433, ppl: 10.511109 +epoch: 2, batch: 14936, sum loss: 4489.107910, avg loss: 2.662579, ppl: 14.333204 +epoch: 2, batch: 14937, sum loss: 4407.260254, avg loss: 2.456667, ppl: 11.665864 +epoch: 2, batch: 14938, sum loss: 4402.039062, avg loss: 2.466128, ppl: 11.776763 +epoch: 2, batch: 14939, sum loss: 4738.520996, avg loss: 2.644264, ppl: 14.073083 +epoch: 2, batch: 14940, sum loss: 4493.494141, avg loss: 2.479853, ppl: 11.939514 +epoch: 2, batch: 14941, sum loss: 3989.355469, avg loss: 2.520123, ppl: 12.430132 +epoch: 2, batch: 14942, sum loss: 3783.054932, avg loss: 2.284453, ppl: 9.820317 +epoch: 2, batch: 14943, sum loss: 4223.242676, avg loss: 2.403667, ppl: 11.063669 +epoch: 2, batch: 14944, sum loss: 3946.671387, avg loss: 2.222225, ppl: 9.227839 +epoch: 2, batch: 14945, sum loss: 4318.024902, avg loss: 2.514866, ppl: 12.364953 +epoch: 2, batch: 14946, sum loss: 4091.199951, avg loss: 2.428012, ppl: 11.336322 +epoch: 2, batch: 14947, sum loss: 4703.722656, avg loss: 2.537067, ppl: 12.642538 +epoch: 2, batch: 14948, sum loss: 4157.299316, avg loss: 2.492386, ppl: 12.090084 +epoch: 2, batch: 14949, sum loss: 3815.169678, avg loss: 2.308028, ppl: 10.054572 +epoch: 2, batch: 14950, sum loss: 5093.071289, avg loss: 2.719205, ppl: 15.168261 +epoch: 2, batch: 14951, sum loss: 4680.821289, avg loss: 2.658047, ppl: 14.268398 +epoch: 2, batch: 14952, sum loss: 4511.258301, avg loss: 2.524487, ppl: 12.484489 +epoch: 2, batch: 14953, sum loss: 5176.698242, avg loss: 2.590940, ppl: 13.342307 +epoch: 2, batch: 14954, sum loss: 4690.393555, avg loss: 2.517656, ppl: 12.399499 +epoch: 2, batch: 14955, sum loss: 4142.263672, avg loss: 2.234231, ppl: 9.339293 +epoch: 2, batch: 14956, sum loss: 2687.894775, avg loss: 2.056538, ppl: 7.818851 +epoch: 2, batch: 14957, sum loss: 3753.382568, avg loss: 2.254284, ppl: 9.528467 +epoch: 2, batch: 14958, sum loss: 4824.365723, avg loss: 2.963370, ppl: 19.363108 +epoch: 2, batch: 14959, sum loss: 4702.268066, avg loss: 2.463210, ppl: 11.742446 +epoch: 2, batch: 14960, sum loss: 3929.062012, avg loss: 2.304435, ppl: 10.018518 +epoch: 2, batch: 14961, sum loss: 5368.473145, avg loss: 2.812191, ppl: 16.646358 +epoch: 2, batch: 14962, sum loss: 4506.439941, avg loss: 2.652407, ppl: 14.188154 +epoch: 2, batch: 14963, sum loss: 4379.291992, avg loss: 2.574540, ppl: 13.125274 +epoch: 2, batch: 14964, sum loss: 4487.959961, avg loss: 2.437784, ppl: 11.447644 +epoch: 2, batch: 14965, sum loss: 3967.962158, avg loss: 2.476880, ppl: 11.904070 +epoch: 2, batch: 14966, sum loss: 3698.338379, avg loss: 2.318707, ppl: 10.162530 +epoch: 2, batch: 14967, sum loss: 4006.344482, avg loss: 2.546945, ppl: 12.768036 +epoch: 2, batch: 14968, sum loss: 4900.444336, avg loss: 2.688121, ppl: 14.704022 +epoch: 2, batch: 14969, sum loss: 4606.251465, avg loss: 2.644232, ppl: 14.072627 +epoch: 2, batch: 14970, sum loss: 5014.507324, avg loss: 2.477524, ppl: 11.911729 +epoch: 2, batch: 14971, sum loss: 3824.102295, avg loss: 2.525827, ppl: 12.501231 +epoch: 2, batch: 14972, sum loss: 3941.056152, avg loss: 2.423774, ppl: 11.288379 +epoch: 2, batch: 14973, sum loss: 5083.882812, avg loss: 2.819680, ppl: 16.771482 +epoch: 2, batch: 14974, sum loss: 4625.011230, avg loss: 2.517698, ppl: 12.400020 +epoch: 2, batch: 14975, sum loss: 4432.777832, avg loss: 2.609051, ppl: 13.586155 +epoch: 2, batch: 14976, sum loss: 4326.438477, avg loss: 2.421062, ppl: 11.257814 +epoch: 2, batch: 14977, sum loss: 4137.927246, avg loss: 2.587822, ppl: 13.300767 +epoch: 2, batch: 14978, sum loss: 3585.831787, avg loss: 2.414701, ppl: 11.186430 +epoch: 2, batch: 14979, sum loss: 4343.646484, avg loss: 2.499221, ppl: 12.173012 +epoch: 2, batch: 14980, sum loss: 4711.243164, avg loss: 2.532927, ppl: 12.590299 +epoch: 2, batch: 14981, sum loss: 4021.573242, avg loss: 2.553380, ppl: 12.850462 +epoch: 2, batch: 14982, sum loss: 4653.693359, avg loss: 2.768408, ppl: 15.933242 +epoch: 2, batch: 14983, sum loss: 4078.216064, avg loss: 2.342456, ppl: 10.406765 +epoch: 2, batch: 14984, sum loss: 3765.900146, avg loss: 2.356633, ppl: 10.555356 +epoch: 2, batch: 14985, sum loss: 3627.977051, avg loss: 2.324136, ppl: 10.217854 +epoch: 2, batch: 14986, sum loss: 3984.954590, avg loss: 2.374824, ppl: 10.749122 +epoch: 2, batch: 14987, sum loss: 4931.875000, avg loss: 2.926929, ppl: 18.670202 +epoch: 2, batch: 14988, sum loss: 3726.490479, avg loss: 2.509421, ppl: 12.297809 +epoch: 2, batch: 14989, sum loss: 3369.613037, avg loss: 2.492317, ppl: 12.089260 +epoch: 2, batch: 14990, sum loss: 3945.020264, avg loss: 2.617797, ppl: 13.705499 +epoch: 2, batch: 14991, sum loss: 4005.004639, avg loss: 2.360050, ppl: 10.591480 +epoch: 2, batch: 14992, sum loss: 4171.788086, avg loss: 2.207295, ppl: 9.091096 +epoch: 2, batch: 14993, sum loss: 4364.597656, avg loss: 2.610405, ppl: 13.604566 +epoch: 2, batch: 14994, sum loss: 5314.704590, avg loss: 2.843609, ppl: 17.177641 +epoch: 2, batch: 14995, sum loss: 4587.935059, avg loss: 2.567395, ppl: 13.031835 +epoch: 2, batch: 14996, sum loss: 4760.478516, avg loss: 2.506835, ppl: 12.266041 +epoch: 2, batch: 14997, sum loss: 3733.976074, avg loss: 2.568072, ppl: 13.040652 +epoch: 2, batch: 14998, sum loss: 4688.183105, avg loss: 2.630855, ppl: 13.885634 +epoch: 2, batch: 14999, sum loss: 3888.297363, avg loss: 2.665043, ppl: 14.368566 +epoch: 2, batch: 15000, sum loss: 4526.803711, avg loss: 2.697738, ppl: 14.846107 +epoch: 2, batch: 15001, sum loss: 4722.078613, avg loss: 2.480083, ppl: 11.942261 +epoch: 2, batch: 15002, sum loss: 5250.427734, avg loss: 2.673334, ppl: 14.488194 +epoch: 2, batch: 15003, sum loss: 4250.348633, avg loss: 2.633425, ppl: 13.921375 +epoch: 2, batch: 15004, sum loss: 4561.439941, avg loss: 2.680047, ppl: 14.585779 +epoch: 2, batch: 15005, sum loss: 3555.364258, avg loss: 2.182544, ppl: 8.868840 +epoch: 2, batch: 15006, sum loss: 4787.040039, avg loss: 2.614440, ppl: 13.659571 +epoch: 2, batch: 15007, sum loss: 3565.636475, avg loss: 2.445567, ppl: 11.537088 +epoch: 2, batch: 15008, sum loss: 4360.186523, avg loss: 2.453678, ppl: 11.631051 +epoch: 2, batch: 15009, sum loss: 3047.215576, avg loss: 2.142908, ppl: 8.524193 +epoch: 2, batch: 15010, sum loss: 4364.460938, avg loss: 2.537477, ppl: 12.647724 +epoch: 2, batch: 15011, sum loss: 3974.296143, avg loss: 2.386965, ppl: 10.880420 +epoch: 2, batch: 15012, sum loss: 3945.269775, avg loss: 2.470426, ppl: 11.827479 +epoch: 2, batch: 15013, sum loss: 4187.080566, avg loss: 2.197942, ppl: 9.006464 +epoch: 2, batch: 15014, sum loss: 4515.376953, avg loss: 2.440744, ppl: 11.481585 +epoch: 2, batch: 15015, sum loss: 4403.349609, avg loss: 2.333519, ppl: 10.314168 +epoch: 2, batch: 15016, sum loss: 4286.743652, avg loss: 2.554674, ppl: 12.867109 +epoch: 2, batch: 15017, sum loss: 4295.575684, avg loss: 2.490189, ppl: 12.063554 +epoch: 2, batch: 15018, sum loss: 4299.081543, avg loss: 2.632628, ppl: 13.910277 +epoch: 2, batch: 15019, sum loss: 4436.464355, avg loss: 2.507894, ppl: 12.279043 +epoch: 2, batch: 15020, sum loss: 3169.372314, avg loss: 2.193337, ppl: 8.965081 +epoch: 2, batch: 15021, sum loss: 3689.324707, avg loss: 2.193415, ppl: 8.965783 +epoch: 2, batch: 15022, sum loss: 3536.689941, avg loss: 2.346841, ppl: 10.452501 +epoch: 2, batch: 15023, sum loss: 4437.421875, avg loss: 2.452970, ppl: 11.622813 +epoch: 2, batch: 15024, sum loss: 4414.180176, avg loss: 2.607313, ppl: 13.562555 +epoch: 2, batch: 15025, sum loss: 4157.638672, avg loss: 2.501588, ppl: 12.201851 +epoch: 2, batch: 15026, sum loss: 4586.775879, avg loss: 2.585556, ppl: 13.270659 +epoch: 2, batch: 15027, sum loss: 3656.921387, avg loss: 2.580749, ppl: 13.207027 +epoch: 2, batch: 15028, sum loss: 3903.333252, avg loss: 2.557886, ppl: 12.908495 +epoch: 2, batch: 15029, sum loss: 4111.503418, avg loss: 2.460505, ppl: 11.710721 +epoch: 2, batch: 15030, sum loss: 4186.467773, avg loss: 2.487503, ppl: 12.031198 +epoch: 2, batch: 15031, sum loss: 4226.178223, avg loss: 2.502178, ppl: 12.209053 +epoch: 2, batch: 15032, sum loss: 4299.037109, avg loss: 2.430208, ppl: 11.361242 +epoch: 2, batch: 15033, sum loss: 4454.795898, avg loss: 2.481781, ppl: 11.962545 +epoch: 2, batch: 15034, sum loss: 4818.855469, avg loss: 2.581069, ppl: 13.211250 +epoch: 2, batch: 15035, sum loss: 3930.709473, avg loss: 2.458230, ppl: 11.684110 +epoch: 2, batch: 15036, sum loss: 3968.175293, avg loss: 2.437454, ppl: 11.443868 +epoch: 2, batch: 15037, sum loss: 3875.829834, avg loss: 2.495705, ppl: 12.130284 +epoch: 2, batch: 15038, sum loss: 3629.657959, avg loss: 2.247466, ppl: 9.463727 +epoch: 2, batch: 15039, sum loss: 3735.445801, avg loss: 2.470533, ppl: 11.828752 +epoch: 2, batch: 15040, sum loss: 4226.529297, avg loss: 2.218651, ppl: 9.194915 +epoch: 2, batch: 15041, sum loss: 4367.783691, avg loss: 2.394618, ppl: 10.964012 +epoch: 2, batch: 15042, sum loss: 3624.406250, avg loss: 2.235908, ppl: 9.354968 +epoch: 2, batch: 15043, sum loss: 4208.632812, avg loss: 2.406308, ppl: 11.092930 +epoch: 2, batch: 15044, sum loss: 4176.500977, avg loss: 2.502397, ppl: 12.211734 +epoch: 2, batch: 15045, sum loss: 4006.540039, avg loss: 2.322632, ppl: 10.202493 +epoch: 2, batch: 15046, sum loss: 3643.375244, avg loss: 2.445218, ppl: 11.533068 +epoch: 2, batch: 15047, sum loss: 4492.172852, avg loss: 2.800607, ppl: 16.454628 +epoch: 2, batch: 15048, sum loss: 4462.266602, avg loss: 2.508301, ppl: 12.284036 +epoch: 2, batch: 15049, sum loss: 3314.564941, avg loss: 2.192173, ppl: 8.954648 +epoch: 2, batch: 15050, sum loss: 3749.797363, avg loss: 2.420786, ppl: 11.254701 +epoch: 2, batch: 15051, sum loss: 3359.653320, avg loss: 2.329857, ppl: 10.276468 +epoch: 2, batch: 15052, sum loss: 4408.766602, avg loss: 2.806344, ppl: 16.549307 +epoch: 2, batch: 15053, sum loss: 3421.674316, avg loss: 2.330841, ppl: 10.286587 +epoch: 2, batch: 15054, sum loss: 4213.626953, avg loss: 2.612292, ppl: 13.630257 +epoch: 2, batch: 15055, sum loss: 4818.255859, avg loss: 2.785119, ppl: 16.201742 +epoch: 2, batch: 15056, sum loss: 4934.495605, avg loss: 2.627527, ppl: 13.839502 +epoch: 2, batch: 15057, sum loss: 5024.333984, avg loss: 2.543967, ppl: 12.730065 +epoch: 2, batch: 15058, sum loss: 4874.200195, avg loss: 2.662043, ppl: 14.325524 +epoch: 2, batch: 15059, sum loss: 4308.704590, avg loss: 2.723581, ppl: 15.234775 +epoch: 2, batch: 15060, sum loss: 4959.229492, avg loss: 2.784520, ppl: 16.192038 +epoch: 2, batch: 15061, sum loss: 3792.328125, avg loss: 2.258682, ppl: 9.570472 +epoch: 2, batch: 15062, sum loss: 4585.047363, avg loss: 2.543010, ppl: 12.717897 +epoch: 2, batch: 15063, sum loss: 4032.621094, avg loss: 2.553908, ppl: 12.857253 +epoch: 2, batch: 15064, sum loss: 4026.270508, avg loss: 2.360065, ppl: 10.591638 +epoch: 2, batch: 15065, sum loss: 4659.147461, avg loss: 2.679211, ppl: 14.573588 +epoch: 2, batch: 15066, sum loss: 4033.847656, avg loss: 2.530645, ppl: 12.561602 +epoch: 2, batch: 15067, sum loss: 3625.567139, avg loss: 2.352737, ppl: 10.514305 +epoch: 2, batch: 15068, sum loss: 4832.665039, avg loss: 2.806426, ppl: 16.550653 +epoch: 2, batch: 15069, sum loss: 4272.212891, avg loss: 2.369503, ppl: 10.692072 +epoch: 2, batch: 15070, sum loss: 4303.530273, avg loss: 2.461974, ppl: 11.727939 +epoch: 2, batch: 15071, sum loss: 3937.291748, avg loss: 2.381907, ppl: 10.825522 +epoch: 2, batch: 15072, sum loss: 4639.214355, avg loss: 2.681627, ppl: 14.608840 +epoch: 2, batch: 15073, sum loss: 3747.850830, avg loss: 2.390211, ppl: 10.915795 +epoch: 2, batch: 15074, sum loss: 4344.719238, avg loss: 2.505605, ppl: 12.250968 +epoch: 2, batch: 15075, sum loss: 3717.062500, avg loss: 2.393472, ppl: 10.951456 +epoch: 2, batch: 15076, sum loss: 4453.467773, avg loss: 2.395625, ppl: 10.975060 +epoch: 2, batch: 15077, sum loss: 3269.194824, avg loss: 2.175113, ppl: 8.803180 +epoch: 2, batch: 15078, sum loss: 4598.920898, avg loss: 2.672238, ppl: 14.472317 +epoch: 2, batch: 15079, sum loss: 3398.332764, avg loss: 2.467925, ppl: 11.797941 +epoch: 2, batch: 15080, sum loss: 3730.925049, avg loss: 2.424253, ppl: 11.293787 +epoch: 2, batch: 15081, sum loss: 3963.316895, avg loss: 2.617779, ppl: 13.705247 +epoch: 2, batch: 15082, sum loss: 4330.230957, avg loss: 2.464559, ppl: 11.758299 +epoch: 2, batch: 15083, sum loss: 4791.603027, avg loss: 2.870943, ppl: 17.653650 +epoch: 2, batch: 15084, sum loss: 5488.119629, avg loss: 2.759235, ppl: 15.787766 +epoch: 2, batch: 15085, sum loss: 4466.721680, avg loss: 2.592410, ppl: 13.361930 +epoch: 2, batch: 15086, sum loss: 3451.595215, avg loss: 2.405293, ppl: 11.081674 +epoch: 2, batch: 15087, sum loss: 3936.861084, avg loss: 2.378768, ppl: 10.791601 +epoch: 2, batch: 15088, sum loss: 4167.869629, avg loss: 2.337560, ppl: 10.355939 +epoch: 2, batch: 15089, sum loss: 3386.960205, avg loss: 2.222415, ppl: 9.229591 +epoch: 2, batch: 15090, sum loss: 4212.609863, avg loss: 2.443509, ppl: 11.513374 +epoch: 2, batch: 15091, sum loss: 4688.172852, avg loss: 2.446854, ppl: 11.551951 +epoch: 2, batch: 15092, sum loss: 3563.250244, avg loss: 2.402731, ppl: 11.053324 +epoch: 2, batch: 15093, sum loss: 4271.907227, avg loss: 2.338209, ppl: 10.362657 +epoch: 2, batch: 15094, sum loss: 3729.423828, avg loss: 2.416996, ppl: 11.212122 +epoch: 2, batch: 15095, sum loss: 3934.609375, avg loss: 2.499752, ppl: 12.179471 +epoch: 2, batch: 15096, sum loss: 4058.339355, avg loss: 2.576723, ppl: 13.153966 +epoch: 2, batch: 15097, sum loss: 4213.796875, avg loss: 2.388774, ppl: 10.900119 +epoch: 2, batch: 15098, sum loss: 3320.045654, avg loss: 2.418096, ppl: 11.224465 +epoch: 2, batch: 15099, sum loss: 3760.113281, avg loss: 2.197612, ppl: 9.003484 +epoch: 2, batch: 15100, sum loss: 4135.980469, avg loss: 2.364769, ppl: 10.641578 +epoch: 2, batch: 15101, sum loss: 3557.183105, avg loss: 2.377796, ppl: 10.781116 +epoch: 2, batch: 15102, sum loss: 4246.128418, avg loss: 2.598610, ppl: 13.445035 +epoch: 2, batch: 15103, sum loss: 4623.965820, avg loss: 2.433666, ppl: 11.400605 +epoch: 2, batch: 15104, sum loss: 4344.328125, avg loss: 2.661966, ppl: 14.324421 +epoch: 2, batch: 15105, sum loss: 4530.208496, avg loss: 2.599087, ppl: 13.451451 +epoch: 2, batch: 15106, sum loss: 4373.347656, avg loss: 2.455557, ppl: 11.652926 +epoch: 2, batch: 15107, sum loss: 3727.660156, avg loss: 2.436379, ppl: 11.431574 +epoch: 2, batch: 15108, sum loss: 3772.161621, avg loss: 2.432084, ppl: 11.382574 +epoch: 2, batch: 15109, sum loss: 3328.931641, avg loss: 2.161644, ppl: 8.685405 +epoch: 2, batch: 15110, sum loss: 4886.003906, avg loss: 2.746489, ppl: 15.587804 +epoch: 2, batch: 15111, sum loss: 3328.528320, avg loss: 2.340737, ppl: 10.388891 +epoch: 2, batch: 15112, sum loss: 4262.166992, avg loss: 2.675560, ppl: 14.520479 +epoch: 2, batch: 15113, sum loss: 3894.928711, avg loss: 2.204261, ppl: 9.063550 +epoch: 2, batch: 15114, sum loss: 4739.599121, avg loss: 2.591361, ppl: 13.347926 +epoch: 2, batch: 15115, sum loss: 3727.408691, avg loss: 2.428279, ppl: 11.339352 +epoch: 2, batch: 15116, sum loss: 4131.189941, avg loss: 2.399065, ppl: 11.012875 +epoch: 2, batch: 15117, sum loss: 4297.861328, avg loss: 2.503123, ppl: 12.220594 +epoch: 2, batch: 15118, sum loss: 4041.812988, avg loss: 2.418799, ppl: 11.232363 +epoch: 2, batch: 15119, sum loss: 3928.891113, avg loss: 2.362532, ppl: 10.617804 +epoch: 2, batch: 15120, sum loss: 4468.714844, avg loss: 2.806982, ppl: 16.559858 +epoch: 2, batch: 15121, sum loss: 3679.979736, avg loss: 2.530935, ppl: 12.565252 +epoch: 2, batch: 15122, sum loss: 3590.287598, avg loss: 2.540897, ppl: 12.691051 +epoch: 2, batch: 15123, sum loss: 4300.779785, avg loss: 2.431193, ppl: 11.372437 +epoch: 2, batch: 15124, sum loss: 3330.831543, avg loss: 2.450943, ppl: 11.599279 +epoch: 2, batch: 15125, sum loss: 2894.715820, avg loss: 2.099141, ppl: 8.159159 +epoch: 2, batch: 15126, sum loss: 3635.560059, avg loss: 2.428564, ppl: 11.342581 +epoch: 2, batch: 15127, sum loss: 3528.417236, avg loss: 2.214951, ppl: 9.160963 +epoch: 2, batch: 15128, sum loss: 5501.724121, avg loss: 2.962695, ppl: 19.350042 +epoch: 2, batch: 15129, sum loss: 3763.061768, avg loss: 2.390764, ppl: 10.921830 +epoch: 2, batch: 15130, sum loss: 4957.465332, avg loss: 2.741961, ppl: 15.517385 +epoch: 2, batch: 15131, sum loss: 4034.654785, avg loss: 2.537519, ppl: 12.648249 +epoch: 2, batch: 15132, sum loss: 3901.373047, avg loss: 2.512153, ppl: 12.331447 +epoch: 2, batch: 15133, sum loss: 4266.751465, avg loss: 2.440933, ppl: 11.483755 +epoch: 2, batch: 15134, sum loss: 4400.479492, avg loss: 2.558418, ppl: 12.915373 +epoch: 2, batch: 15135, sum loss: 3607.940918, avg loss: 2.350450, ppl: 10.490290 +epoch: 2, batch: 15136, sum loss: 4397.931641, avg loss: 2.636650, ppl: 13.966332 +epoch: 2, batch: 15137, sum loss: 4226.472656, avg loss: 2.586581, ppl: 13.284268 +epoch: 2, batch: 15138, sum loss: 3800.462891, avg loss: 2.528585, ppl: 12.535752 +epoch: 2, batch: 15139, sum loss: 4458.712402, avg loss: 2.708817, ppl: 15.011503 +epoch: 2, batch: 15140, sum loss: 4286.532715, avg loss: 2.531915, ppl: 12.577575 +epoch: 2, batch: 15141, sum loss: 4275.599609, avg loss: 2.488708, ppl: 12.045697 +epoch: 2, batch: 15142, sum loss: 3843.750488, avg loss: 2.443579, ppl: 11.514181 +epoch: 2, batch: 15143, sum loss: 3750.797607, avg loss: 2.299692, ppl: 9.971110 +epoch: 2, batch: 15144, sum loss: 4884.565918, avg loss: 2.677942, ppl: 14.555105 +epoch: 2, batch: 15145, sum loss: 5603.731445, avg loss: 2.885547, ppl: 17.913359 +epoch: 2, batch: 15146, sum loss: 5123.766113, avg loss: 2.793766, ppl: 16.342442 +epoch: 2, batch: 15147, sum loss: 2591.550293, avg loss: 2.143549, ppl: 8.529654 +epoch: 2, batch: 15148, sum loss: 4224.267090, avg loss: 2.574203, ppl: 13.120852 +epoch: 2, batch: 15149, sum loss: 4005.665283, avg loss: 2.261810, ppl: 9.600449 +epoch: 2, batch: 15150, sum loss: 3861.161621, avg loss: 2.398237, ppl: 11.003759 +epoch: 2, batch: 15151, sum loss: 4670.126465, avg loss: 2.611928, ppl: 13.625288 +epoch: 2, batch: 15152, sum loss: 4356.046875, avg loss: 2.477842, ppl: 11.915527 +epoch: 2, batch: 15153, sum loss: 4341.028320, avg loss: 2.460901, ppl: 11.715357 +epoch: 2, batch: 15154, sum loss: 3916.761963, avg loss: 2.414773, ppl: 11.187230 +epoch: 2, batch: 15155, sum loss: 5027.798828, avg loss: 2.767088, ppl: 15.912229 +epoch: 2, batch: 15156, sum loss: 4618.070801, avg loss: 2.612031, ppl: 13.626701 +epoch: 2, batch: 15157, sum loss: 4277.156250, avg loss: 2.628860, ppl: 13.857969 +epoch: 2, batch: 15158, sum loss: 4955.576660, avg loss: 2.742433, ppl: 15.524712 +epoch: 2, batch: 15159, sum loss: 4227.382812, avg loss: 2.569838, ppl: 13.063703 +epoch: 2, batch: 15160, sum loss: 4767.091797, avg loss: 2.554711, ppl: 12.867585 +epoch: 2, batch: 15161, sum loss: 4731.271973, avg loss: 2.459081, ppl: 11.694062 +epoch: 2, batch: 15162, sum loss: 4317.910156, avg loss: 2.594898, ppl: 13.395221 +epoch: 2, batch: 15163, sum loss: 4226.314453, avg loss: 2.671501, ppl: 14.461656 +epoch: 2, batch: 15164, sum loss: 3209.959961, avg loss: 2.191099, ppl: 8.945038 +epoch: 2, batch: 15165, sum loss: 4424.262695, avg loss: 2.596398, ppl: 13.415334 +epoch: 2, batch: 15166, sum loss: 3609.739502, avg loss: 2.448941, ppl: 11.576087 +epoch: 2, batch: 15167, sum loss: 3328.763672, avg loss: 2.357481, ppl: 10.564309 +epoch: 2, batch: 15168, sum loss: 3346.697754, avg loss: 2.282877, ppl: 9.804848 +epoch: 2, batch: 15169, sum loss: 3370.983398, avg loss: 2.373932, ppl: 10.739538 +epoch: 2, batch: 15170, sum loss: 4463.584961, avg loss: 2.451172, ppl: 11.601940 +epoch: 2, batch: 15171, sum loss: 4511.812988, avg loss: 2.466820, ppl: 11.784908 +epoch: 2, batch: 15172, sum loss: 3858.204102, avg loss: 2.375741, ppl: 10.758987 +epoch: 2, batch: 15173, sum loss: 3898.935547, avg loss: 2.278747, ppl: 9.764437 +epoch: 2, batch: 15174, sum loss: 3711.041016, avg loss: 2.345791, ppl: 10.441525 +epoch: 2, batch: 15175, sum loss: 4175.375000, avg loss: 2.630986, ppl: 13.887459 +epoch: 2, batch: 15176, sum loss: 3750.705078, avg loss: 2.500470, ppl: 12.188220 +epoch: 2, batch: 15177, sum loss: 4344.420898, avg loss: 2.626615, ppl: 13.826884 +epoch: 2, batch: 15178, sum loss: 4062.791260, avg loss: 2.425547, ppl: 11.308414 +epoch: 2, batch: 15179, sum loss: 3860.412842, avg loss: 2.531418, ppl: 12.571323 +epoch: 2, batch: 15180, sum loss: 4148.944824, avg loss: 2.439121, ppl: 11.462960 +epoch: 2, batch: 15181, sum loss: 5227.810059, avg loss: 2.616522, ppl: 13.688028 +epoch: 2, batch: 15182, sum loss: 4003.291016, avg loss: 2.543387, ppl: 12.722686 +epoch: 2, batch: 15183, sum loss: 4300.287598, avg loss: 2.575022, ppl: 13.131612 +epoch: 2, batch: 15184, sum loss: 4249.997559, avg loss: 2.473805, ppl: 11.867522 +epoch: 2, batch: 15185, sum loss: 4043.483398, avg loss: 2.593639, ppl: 13.378368 +epoch: 2, batch: 15186, sum loss: 4615.759277, avg loss: 2.437043, ppl: 11.439164 +epoch: 2, batch: 15187, sum loss: 3444.991943, avg loss: 2.226886, ppl: 9.270947 +epoch: 2, batch: 15188, sum loss: 3681.140137, avg loss: 2.421803, ppl: 11.266151 +epoch: 2, batch: 15189, sum loss: 3559.101074, avg loss: 2.338437, ppl: 10.365025 +epoch: 2, batch: 15190, sum loss: 3628.546875, avg loss: 2.403011, ppl: 11.056420 +epoch: 2, batch: 15191, sum loss: 4245.930176, avg loss: 2.522834, ppl: 12.463873 +epoch: 2, batch: 15192, sum loss: 4854.197266, avg loss: 2.643898, ppl: 14.067937 +epoch: 2, batch: 15193, sum loss: 5024.670898, avg loss: 2.626592, ppl: 13.826571 +epoch: 2, batch: 15194, sum loss: 3916.064209, avg loss: 2.406923, ppl: 11.099758 +epoch: 2, batch: 15195, sum loss: 4161.814941, avg loss: 2.619141, ppl: 13.723927 +epoch: 2, batch: 15196, sum loss: 4970.985840, avg loss: 2.585016, ppl: 13.263501 +epoch: 2, batch: 15197, sum loss: 4523.215820, avg loss: 2.565636, ppl: 13.008925 +epoch: 2, batch: 15198, sum loss: 4327.884277, avg loss: 2.500222, ppl: 12.185199 +epoch: 2, batch: 15199, sum loss: 4091.945068, avg loss: 2.578415, ppl: 13.176243 +epoch: 2, batch: 15200, sum loss: 3356.614258, avg loss: 2.367147, ppl: 10.666916 +epoch: 2, batch: 15201, sum loss: 4341.271973, avg loss: 2.406470, ppl: 11.094728 +epoch: 2, batch: 15202, sum loss: 3799.918213, avg loss: 2.508197, ppl: 12.282763 +epoch: 2, batch: 15203, sum loss: 4519.597656, avg loss: 2.349063, ppl: 10.475751 +epoch: 2, batch: 15204, sum loss: 4139.789551, avg loss: 2.323114, ppl: 10.207415 +epoch: 2, batch: 15205, sum loss: 3842.213135, avg loss: 2.357186, ppl: 10.561192 +epoch: 2, batch: 15206, sum loss: 4217.791016, avg loss: 2.666113, ppl: 14.383948 +epoch: 2, batch: 15207, sum loss: 3700.221436, avg loss: 2.198587, ppl: 9.012267 +epoch: 2, batch: 15208, sum loss: 5311.163086, avg loss: 2.844758, ppl: 17.197393 +epoch: 2, batch: 15209, sum loss: 4274.204102, avg loss: 2.688179, ppl: 14.704867 +epoch: 2, batch: 15210, sum loss: 4675.872070, avg loss: 2.483203, ppl: 11.979579 +epoch: 2, batch: 15211, sum loss: 4695.119629, avg loss: 2.667681, ppl: 14.406528 +epoch: 2, batch: 15212, sum loss: 3785.267090, avg loss: 2.438961, ppl: 11.461124 +epoch: 2, batch: 15213, sum loss: 3861.595459, avg loss: 2.364725, ppl: 10.641109 +epoch: 2, batch: 15214, sum loss: 4710.862793, avg loss: 2.545037, ppl: 12.743697 +epoch: 2, batch: 15215, sum loss: 4121.955078, avg loss: 2.582679, ppl: 13.232534 +epoch: 2, batch: 15216, sum loss: 3463.836670, avg loss: 2.461860, ppl: 11.726600 +epoch: 2, batch: 15217, sum loss: 4095.758057, avg loss: 2.417803, ppl: 11.221177 +epoch: 2, batch: 15218, sum loss: 4933.223633, avg loss: 2.691339, ppl: 14.751408 +epoch: 2, batch: 15219, sum loss: 3591.751465, avg loss: 2.332306, ppl: 10.301671 +epoch: 2, batch: 15220, sum loss: 4669.022949, avg loss: 2.612772, ppl: 13.636796 +epoch: 2, batch: 15221, sum loss: 4543.212891, avg loss: 2.666205, ppl: 14.385269 +epoch: 2, batch: 15222, sum loss: 3555.778809, avg loss: 2.370519, ppl: 10.702948 +epoch: 2, batch: 15223, sum loss: 4108.191895, avg loss: 2.438096, ppl: 11.451218 +epoch: 2, batch: 15224, sum loss: 3650.755859, avg loss: 2.238354, ppl: 9.377885 +epoch: 2, batch: 15225, sum loss: 4896.463867, avg loss: 2.688887, ppl: 14.715294 +epoch: 2, batch: 15226, sum loss: 4014.231689, avg loss: 2.535838, ppl: 12.627007 +epoch: 2, batch: 15227, sum loss: 4294.074707, avg loss: 2.602469, ppl: 13.497027 +epoch: 2, batch: 15228, sum loss: 4014.351562, avg loss: 2.449269, ppl: 11.579877 +epoch: 2, batch: 15229, sum loss: 4455.031738, avg loss: 2.450512, ppl: 11.594286 +epoch: 2, batch: 15230, sum loss: 3799.230713, avg loss: 2.519384, ppl: 12.420938 +epoch: 2, batch: 15231, sum loss: 3718.076660, avg loss: 2.322346, ppl: 10.199579 +epoch: 2, batch: 15232, sum loss: 4203.483887, avg loss: 2.488741, ppl: 12.046103 +epoch: 2, batch: 15233, sum loss: 5450.227539, avg loss: 2.635507, ppl: 13.950378 +epoch: 2, batch: 15234, sum loss: 3790.021484, avg loss: 2.617418, ppl: 13.700308 +epoch: 2, batch: 15235, sum loss: 4875.941406, avg loss: 2.607455, ppl: 13.564488 +epoch: 2, batch: 15236, sum loss: 4471.495117, avg loss: 2.553681, ppl: 12.854332 +epoch: 2, batch: 15237, sum loss: 5301.752930, avg loss: 2.546471, ppl: 12.761989 +epoch: 2, batch: 15238, sum loss: 3809.711182, avg loss: 2.274455, ppl: 9.722615 +epoch: 2, batch: 15239, sum loss: 4208.406738, avg loss: 2.438242, ppl: 11.452894 +epoch: 2, batch: 15240, sum loss: 4065.449219, avg loss: 2.581238, ppl: 13.213480 +epoch: 2, batch: 15241, sum loss: 4046.005371, avg loss: 2.440293, ppl: 11.476401 +epoch: 2, batch: 15242, sum loss: 3996.745605, avg loss: 2.655645, ppl: 14.234166 +epoch: 2, batch: 15243, sum loss: 4891.310059, avg loss: 2.741766, ppl: 15.514359 +epoch: 2, batch: 15244, sum loss: 4847.892578, avg loss: 2.660753, ppl: 14.307062 +epoch: 2, batch: 15245, sum loss: 3637.501465, avg loss: 2.220697, ppl: 9.213748 +epoch: 2, batch: 15246, sum loss: 3927.034668, avg loss: 2.350111, ppl: 10.486729 +epoch: 2, batch: 15247, sum loss: 5087.528809, avg loss: 2.694666, ppl: 14.800570 +epoch: 2, batch: 15248, sum loss: 5002.113281, avg loss: 2.715588, ppl: 15.113496 +epoch: 2, batch: 15249, sum loss: 3760.387695, avg loss: 2.441810, ppl: 11.493827 +epoch: 2, batch: 15250, sum loss: 3980.810059, avg loss: 2.457290, ppl: 11.673137 +epoch: 2, batch: 15251, sum loss: 4411.337402, avg loss: 2.557297, ppl: 12.900902 +epoch: 2, batch: 15252, sum loss: 4583.560059, avg loss: 2.691462, ppl: 14.753230 +epoch: 2, batch: 15253, sum loss: 4588.871094, avg loss: 2.550790, ppl: 12.817224 +epoch: 2, batch: 15254, sum loss: 3691.991699, avg loss: 2.514981, ppl: 12.366371 +epoch: 2, batch: 15255, sum loss: 3362.761475, avg loss: 2.120278, ppl: 8.333457 +epoch: 2, batch: 15256, sum loss: 4395.309570, avg loss: 2.709809, ppl: 15.026399 +epoch: 2, batch: 15257, sum loss: 4862.791504, avg loss: 2.618628, ppl: 13.716885 +epoch: 2, batch: 15258, sum loss: 3922.579102, avg loss: 2.540531, ppl: 12.686401 +epoch: 2, batch: 15259, sum loss: 4049.775879, avg loss: 2.368290, ppl: 10.679114 +epoch: 2, batch: 15260, sum loss: 3872.175049, avg loss: 2.586623, ppl: 13.284836 +epoch: 2, batch: 15261, sum loss: 3443.481445, avg loss: 2.396299, ppl: 10.982453 +epoch: 2, batch: 15262, sum loss: 4719.175293, avg loss: 2.682874, ppl: 14.627070 +epoch: 2, batch: 15263, sum loss: 4986.171875, avg loss: 2.924441, ppl: 18.623814 +epoch: 2, batch: 15264, sum loss: 3994.150146, avg loss: 2.467048, ppl: 11.787595 +epoch: 2, batch: 15265, sum loss: 3558.617676, avg loss: 2.266635, ppl: 9.646889 +epoch: 2, batch: 15266, sum loss: 3773.193848, avg loss: 2.466140, ppl: 11.776897 +epoch: 2, batch: 15267, sum loss: 4546.834961, avg loss: 2.467083, ppl: 11.788016 +epoch: 2, batch: 15268, sum loss: 5082.999512, avg loss: 2.725469, ppl: 15.263569 +epoch: 2, batch: 15269, sum loss: 4755.150391, avg loss: 2.530682, ppl: 12.562066 +epoch: 2, batch: 15270, sum loss: 4085.492920, avg loss: 2.384993, ppl: 10.858985 +epoch: 2, batch: 15271, sum loss: 4891.631348, avg loss: 2.696599, ppl: 14.829219 +epoch: 2, batch: 15272, sum loss: 4023.580566, avg loss: 2.450415, ppl: 11.593153 +epoch: 2, batch: 15273, sum loss: 4708.439941, avg loss: 2.678293, ppl: 14.560225 +epoch: 2, batch: 15274, sum loss: 3281.258301, avg loss: 2.315638, ppl: 10.131380 +epoch: 2, batch: 15275, sum loss: 3509.153320, avg loss: 2.372653, ppl: 10.725805 +epoch: 2, batch: 15276, sum loss: 3867.965332, avg loss: 2.268601, ppl: 9.665873 +epoch: 2, batch: 15277, sum loss: 4925.270508, avg loss: 2.773238, ppl: 16.010391 +epoch: 2, batch: 15278, sum loss: 4388.624023, avg loss: 2.547083, ppl: 12.769802 +epoch: 2, batch: 15279, sum loss: 3629.054199, avg loss: 2.216893, ppl: 9.178770 +epoch: 2, batch: 15280, sum loss: 3940.007812, avg loss: 2.367793, ppl: 10.673813 +epoch: 2, batch: 15281, sum loss: 3367.520264, avg loss: 2.179625, ppl: 8.842988 +epoch: 2, batch: 15282, sum loss: 3825.717285, avg loss: 2.471394, ppl: 11.838934 +epoch: 2, batch: 15283, sum loss: 4533.168945, avg loss: 2.518427, ppl: 12.409066 +epoch: 2, batch: 15284, sum loss: 5517.294434, avg loss: 2.967883, ppl: 19.450697 +epoch: 2, batch: 15285, sum loss: 4242.564453, avg loss: 2.520834, ppl: 12.438972 +epoch: 2, batch: 15286, sum loss: 5281.267090, avg loss: 2.800248, ppl: 16.448725 +epoch: 2, batch: 15287, sum loss: 4140.815918, avg loss: 2.615803, ppl: 13.678192 +epoch: 2, batch: 15288, sum loss: 4033.481445, avg loss: 2.469982, ppl: 11.822239 +epoch: 2, batch: 15289, sum loss: 4496.811523, avg loss: 2.676674, ppl: 14.536659 +epoch: 2, batch: 15290, sum loss: 4387.081543, avg loss: 2.411809, ppl: 11.154125 +epoch: 2, batch: 15291, sum loss: 4857.502441, avg loss: 2.645699, ppl: 14.093287 +epoch: 2, batch: 15292, sum loss: 4475.512695, avg loss: 2.637309, ppl: 13.975539 +epoch: 2, batch: 15293, sum loss: 3970.595459, avg loss: 2.521013, ppl: 12.441191 +epoch: 2, batch: 15294, sum loss: 4811.809570, avg loss: 2.490585, ppl: 12.068330 +epoch: 2, batch: 15295, sum loss: 4101.177734, avg loss: 2.331539, ppl: 10.293776 +epoch: 2, batch: 15296, sum loss: 4626.981934, avg loss: 2.676103, ppl: 14.528364 +epoch: 2, batch: 15297, sum loss: 3831.119141, avg loss: 2.547287, ppl: 12.772399 +epoch: 2, batch: 15298, sum loss: 4142.839355, avg loss: 2.370045, ppl: 10.697878 +epoch: 2, batch: 15299, sum loss: 4177.841797, avg loss: 2.405205, ppl: 11.080707 +epoch: 2, batch: 15300, sum loss: 4705.425781, avg loss: 2.468744, ppl: 11.807605 +epoch: 2, batch: 15301, sum loss: 3525.830322, avg loss: 2.426587, ppl: 11.320176 +epoch: 2, batch: 15302, sum loss: 5029.839844, avg loss: 2.721775, ppl: 15.207289 +epoch: 2, batch: 15303, sum loss: 3938.242676, avg loss: 2.263358, ppl: 9.615322 +epoch: 2, batch: 15304, sum loss: 4359.969727, avg loss: 2.254379, ppl: 9.529376 +epoch: 2, batch: 15305, sum loss: 4118.624512, avg loss: 2.411373, ppl: 11.149255 +epoch: 2, batch: 15306, sum loss: 5255.517578, avg loss: 2.748702, ppl: 15.622334 +epoch: 2, batch: 15307, sum loss: 4292.417480, avg loss: 2.508719, ppl: 12.289174 +epoch: 2, batch: 15308, sum loss: 3658.486816, avg loss: 2.408484, ppl: 11.117092 +epoch: 2, batch: 15309, sum loss: 3590.203613, avg loss: 2.430741, ppl: 11.367297 +epoch: 2, batch: 15310, sum loss: 4257.104980, avg loss: 2.496836, ppl: 12.144009 +epoch: 2, batch: 15311, sum loss: 3809.780029, avg loss: 2.296432, ppl: 9.938653 +epoch: 2, batch: 15312, sum loss: 4373.925781, avg loss: 2.628561, ppl: 13.853824 +epoch: 2, batch: 15313, sum loss: 3287.502686, avg loss: 2.517230, ppl: 12.394220 +epoch: 2, batch: 15314, sum loss: 3954.458008, avg loss: 2.533285, ppl: 12.594814 +epoch: 2, batch: 15315, sum loss: 4922.504883, avg loss: 2.630949, ppl: 13.886939 +epoch: 2, batch: 15316, sum loss: 5078.334473, avg loss: 2.790294, ppl: 16.285803 +epoch: 2, batch: 15317, sum loss: 5120.030273, avg loss: 2.538439, ppl: 12.659888 +epoch: 2, batch: 15318, sum loss: 4295.809082, avg loss: 2.578517, ppl: 13.177578 +epoch: 2, batch: 15319, sum loss: 4500.678223, avg loss: 2.592556, ppl: 13.363893 +epoch: 2, batch: 15320, sum loss: 3208.116699, avg loss: 2.237180, ppl: 9.366884 +epoch: 2, batch: 15321, sum loss: 3710.892578, avg loss: 2.455918, ppl: 11.657133 +epoch: 2, batch: 15322, sum loss: 4377.039551, avg loss: 2.683654, ppl: 14.638486 +epoch: 2, batch: 15323, sum loss: 3270.414551, avg loss: 2.190499, ppl: 8.939670 +epoch: 2, batch: 15324, sum loss: 4432.588867, avg loss: 2.514231, ppl: 12.357100 +epoch: 2, batch: 15325, sum loss: 3385.360107, avg loss: 2.375691, ppl: 10.758447 +epoch: 2, batch: 15326, sum loss: 4122.687012, avg loss: 2.581520, ppl: 13.217211 +epoch: 2, batch: 15327, sum loss: 4040.687500, avg loss: 2.247323, ppl: 9.462374 +epoch: 2, batch: 15328, sum loss: 4193.883789, avg loss: 2.579264, ppl: 13.187430 +epoch: 2, batch: 15329, sum loss: 4813.005859, avg loss: 2.728462, ppl: 15.309316 +epoch: 2, batch: 15330, sum loss: 4868.857422, avg loss: 2.670794, ppl: 14.451439 +epoch: 2, batch: 15331, sum loss: 5087.645020, avg loss: 2.599716, ppl: 13.459921 +epoch: 2, batch: 15332, sum loss: 5826.903320, avg loss: 2.731788, ppl: 15.360326 +epoch: 2, batch: 15333, sum loss: 4734.038086, avg loss: 2.500813, ppl: 12.192396 +epoch: 2, batch: 15334, sum loss: 4743.537598, avg loss: 2.616403, ppl: 13.686399 +epoch: 2, batch: 15335, sum loss: 2741.890381, avg loss: 2.202322, ppl: 9.045990 +epoch: 2, batch: 15336, sum loss: 3906.115967, avg loss: 2.261793, ppl: 9.600284 +epoch: 2, batch: 15337, sum loss: 4054.601562, avg loss: 2.551669, ppl: 12.828492 +epoch: 2, batch: 15338, sum loss: 4695.572754, avg loss: 2.599985, ppl: 13.463534 +epoch: 2, batch: 15339, sum loss: 3819.678955, avg loss: 2.541370, ppl: 12.697053 +epoch: 2, batch: 15340, sum loss: 3737.194824, avg loss: 2.378864, ppl: 10.792633 +epoch: 2, batch: 15341, sum loss: 3430.312012, avg loss: 2.448474, ppl: 11.570676 +epoch: 2, batch: 15342, sum loss: 4344.801270, avg loss: 2.572411, ppl: 13.097359 +epoch: 2, batch: 15343, sum loss: 3347.519043, avg loss: 2.263366, ppl: 9.615405 +epoch: 2, batch: 15344, sum loss: 4820.534180, avg loss: 2.504174, ppl: 12.233444 +epoch: 2, batch: 15345, sum loss: 4997.834473, avg loss: 2.615298, ppl: 13.671290 +epoch: 2, batch: 15346, sum loss: 5088.332031, avg loss: 2.712331, ppl: 15.064343 +epoch: 2, batch: 15347, sum loss: 4585.028809, avg loss: 2.523406, ppl: 12.471001 +epoch: 2, batch: 15348, sum loss: 4031.663086, avg loss: 2.391259, ppl: 10.927245 +epoch: 2, batch: 15349, sum loss: 3911.893311, avg loss: 2.609669, ppl: 13.594546 +epoch: 2, batch: 15350, sum loss: 4008.566650, avg loss: 2.326504, ppl: 10.242075 +epoch: 2, batch: 15351, sum loss: 3794.036621, avg loss: 2.527673, ppl: 12.524325 +epoch: 2, batch: 15352, sum loss: 3149.488281, avg loss: 2.129471, ppl: 8.410419 +epoch: 2, batch: 15353, sum loss: 4139.259766, avg loss: 2.556677, ppl: 12.892901 +epoch: 2, batch: 15354, sum loss: 4136.903320, avg loss: 2.499640, ppl: 12.178103 +epoch: 2, batch: 15355, sum loss: 3769.803467, avg loss: 2.369455, ppl: 10.691567 +epoch: 2, batch: 15356, sum loss: 3450.357178, avg loss: 2.283493, ppl: 9.810885 +epoch: 2, batch: 15357, sum loss: 3985.014160, avg loss: 2.393402, ppl: 10.950684 +epoch: 2, batch: 15358, sum loss: 4929.512695, avg loss: 2.755457, ppl: 15.728226 +epoch: 2, batch: 15359, sum loss: 4087.556641, avg loss: 2.471316, ppl: 11.838017 +epoch: 2, batch: 15360, sum loss: 4851.094727, avg loss: 2.406297, ppl: 11.092811 +epoch: 2, batch: 15361, sum loss: 3741.117188, avg loss: 2.310758, ppl: 10.082066 +epoch: 2, batch: 15362, sum loss: 4077.696777, avg loss: 2.518651, ppl: 12.411848 +epoch: 2, batch: 15363, sum loss: 4363.929688, avg loss: 2.592947, ppl: 13.369109 +epoch: 2, batch: 15364, sum loss: 4203.285156, avg loss: 2.503446, ppl: 12.224543 +epoch: 2, batch: 15365, sum loss: 4597.048828, avg loss: 2.482208, ppl: 11.967657 +epoch: 2, batch: 15366, sum loss: 3431.970459, avg loss: 2.266823, ppl: 9.648701 +epoch: 2, batch: 15367, sum loss: 5077.946777, avg loss: 2.628337, ppl: 13.850716 +epoch: 2, batch: 15368, sum loss: 4859.386719, avg loss: 2.601385, ppl: 13.482393 +epoch: 2, batch: 15369, sum loss: 4219.562500, avg loss: 2.420862, ppl: 11.255557 +epoch: 2, batch: 15370, sum loss: 3053.538818, avg loss: 2.304558, ppl: 10.019744 +epoch: 2, batch: 15371, sum loss: 5046.304199, avg loss: 2.811311, ppl: 16.631716 +epoch: 2, batch: 15372, sum loss: 3352.433594, avg loss: 2.033010, ppl: 7.637039 +epoch: 2, batch: 15373, sum loss: 4678.965820, avg loss: 2.570860, ppl: 13.077071 +epoch: 2, batch: 15374, sum loss: 4122.794434, avg loss: 2.606065, ppl: 13.545638 +epoch: 2, batch: 15375, sum loss: 4762.165527, avg loss: 2.449674, ppl: 11.584568 +epoch: 2, batch: 15376, sum loss: 4418.542969, avg loss: 2.471221, ppl: 11.836890 +epoch: 2, batch: 15377, sum loss: 4457.261719, avg loss: 2.615764, ppl: 13.677661 +epoch: 2, batch: 15378, sum loss: 3593.384033, avg loss: 2.240264, ppl: 9.395815 +epoch: 2, batch: 15379, sum loss: 3548.032471, avg loss: 2.343482, ppl: 10.417452 +epoch: 2, batch: 15380, sum loss: 4372.042480, avg loss: 2.567259, ppl: 13.030065 +epoch: 2, batch: 15381, sum loss: 4597.959473, avg loss: 2.633425, ppl: 13.921366 +epoch: 2, batch: 15382, sum loss: 3980.326172, avg loss: 2.475327, ppl: 11.885596 +epoch: 2, batch: 15383, sum loss: 4779.888672, avg loss: 2.779005, ppl: 16.102991 +epoch: 2, batch: 15384, sum loss: 5212.756836, avg loss: 2.638035, ppl: 13.985692 +epoch: 2, batch: 15385, sum loss: 3949.550293, avg loss: 2.355128, ppl: 10.539481 +epoch: 2, batch: 15386, sum loss: 4385.154785, avg loss: 2.643252, ppl: 14.058847 +epoch: 2, batch: 15387, sum loss: 4484.971680, avg loss: 2.360512, ppl: 10.596371 +epoch: 2, batch: 15388, sum loss: 4382.523926, avg loss: 2.561382, ppl: 12.953705 +epoch: 2, batch: 15389, sum loss: 4039.996582, avg loss: 2.552114, ppl: 12.834207 +epoch: 2, batch: 15390, sum loss: 4482.999023, avg loss: 2.731870, ppl: 15.361589 +epoch: 2, batch: 15391, sum loss: 3635.891846, avg loss: 2.411069, ppl: 11.145869 +epoch: 2, batch: 15392, sum loss: 4487.515137, avg loss: 2.590944, ppl: 13.342361 +epoch: 2, batch: 15393, sum loss: 4275.998535, avg loss: 2.549790, ppl: 12.804420 +epoch: 2, batch: 15394, sum loss: 4142.151855, avg loss: 2.608408, ppl: 13.577421 +epoch: 2, batch: 15395, sum loss: 3339.609375, avg loss: 2.140775, ppl: 8.506029 +epoch: 2, batch: 15396, sum loss: 3914.199951, avg loss: 2.432691, ppl: 11.389491 +epoch: 2, batch: 15397, sum loss: 4680.225586, avg loss: 2.665277, ppl: 14.371923 +epoch: 2, batch: 15398, sum loss: 4122.662109, avg loss: 2.402484, ppl: 11.050589 +epoch: 2, batch: 15399, sum loss: 3927.728027, avg loss: 2.595987, ppl: 13.409814 +epoch: 2, batch: 15400, sum loss: 4120.616211, avg loss: 2.486793, ppl: 12.022658 +epoch: 2, batch: 15401, sum loss: 3957.540527, avg loss: 2.482773, ppl: 11.974427 +epoch: 2, batch: 15402, sum loss: 5098.389648, avg loss: 2.799775, ppl: 16.440941 +epoch: 2, batch: 15403, sum loss: 4088.292236, avg loss: 2.374153, ppl: 10.741915 +epoch: 2, batch: 15404, sum loss: 4557.834961, avg loss: 2.701740, ppl: 14.905642 +epoch: 2, batch: 15405, sum loss: 4235.959961, avg loss: 2.497618, ppl: 12.153509 +epoch: 2, batch: 15406, sum loss: 4214.929688, avg loss: 2.427955, ppl: 11.335676 +epoch: 2, batch: 15407, sum loss: 3856.635986, avg loss: 2.350174, ppl: 10.487396 +epoch: 2, batch: 15408, sum loss: 6042.938965, avg loss: 2.966588, ppl: 19.425518 +epoch: 2, batch: 15409, sum loss: 4339.995117, avg loss: 2.597245, ppl: 13.426690 +epoch: 2, batch: 15410, sum loss: 4489.509766, avg loss: 2.645557, ppl: 14.091288 +epoch: 2, batch: 15411, sum loss: 3761.301025, avg loss: 2.334762, ppl: 10.327001 +epoch: 2, batch: 15412, sum loss: 3861.772461, avg loss: 2.337635, ppl: 10.356712 +epoch: 2, batch: 15413, sum loss: 4276.435547, avg loss: 2.617158, ppl: 13.696738 +epoch: 2, batch: 15414, sum loss: 4099.886719, avg loss: 2.298143, ppl: 9.955677 +epoch: 2, batch: 15415, sum loss: 3629.848145, avg loss: 2.355515, ppl: 10.543555 +epoch: 2, batch: 15416, sum loss: 3686.613281, avg loss: 2.275687, ppl: 9.734607 +epoch: 2, batch: 15417, sum loss: 4631.747070, avg loss: 2.554742, ppl: 12.867981 +epoch: 2, batch: 15418, sum loss: 3900.677246, avg loss: 2.403375, ppl: 11.060439 +epoch: 2, batch: 15419, sum loss: 4498.205566, avg loss: 2.594121, ppl: 13.384813 +epoch: 2, batch: 15420, sum loss: 3899.885986, avg loss: 2.631502, ppl: 13.894626 +epoch: 2, batch: 15421, sum loss: 4247.533203, avg loss: 2.513333, ppl: 12.346015 +epoch: 2, batch: 15422, sum loss: 3872.958252, avg loss: 2.341571, ppl: 10.397557 +epoch: 2, batch: 15423, sum loss: 3705.649414, avg loss: 2.363297, ppl: 10.625925 +epoch: 2, batch: 15424, sum loss: 5384.768555, avg loss: 2.852102, ppl: 17.324160 +epoch: 2, batch: 15425, sum loss: 3962.305420, avg loss: 2.293001, ppl: 9.904616 +epoch: 2, batch: 15426, sum loss: 3931.024170, avg loss: 2.292142, ppl: 9.896116 +epoch: 2, batch: 15427, sum loss: 4138.877930, avg loss: 2.325212, ppl: 10.228850 +epoch: 2, batch: 15428, sum loss: 4508.255859, avg loss: 2.504587, ppl: 12.238500 +epoch: 2, batch: 15429, sum loss: 3656.047363, avg loss: 2.402134, ppl: 11.046721 +epoch: 2, batch: 15430, sum loss: 4587.533203, avg loss: 2.462444, ppl: 11.733454 +epoch: 2, batch: 15431, sum loss: 3986.598145, avg loss: 2.316443, ppl: 10.139541 +epoch: 2, batch: 15432, sum loss: 4375.791992, avg loss: 2.468016, ppl: 11.799013 +epoch: 2, batch: 15433, sum loss: 4203.426758, avg loss: 2.258693, ppl: 9.570567 +epoch: 2, batch: 15434, sum loss: 4265.137695, avg loss: 2.435830, ppl: 11.425294 +epoch: 2, batch: 15435, sum loss: 4717.260254, avg loss: 2.750589, ppl: 15.651847 +epoch: 2, batch: 15436, sum loss: 4528.465332, avg loss: 2.441221, ppl: 11.487061 +epoch: 2, batch: 15437, sum loss: 5263.740234, avg loss: 2.680112, ppl: 14.586732 +epoch: 2, batch: 15438, sum loss: 4038.610840, avg loss: 2.392542, ppl: 10.941273 +epoch: 2, batch: 15439, sum loss: 3940.943848, avg loss: 2.466173, ppl: 11.777286 +epoch: 2, batch: 15440, sum loss: 3954.209229, avg loss: 2.377757, ppl: 10.780690 +epoch: 2, batch: 15441, sum loss: 3551.266602, avg loss: 2.237723, ppl: 9.371970 +epoch: 2, batch: 15442, sum loss: 4091.931396, avg loss: 2.586556, ppl: 13.283939 +epoch: 2, batch: 15443, sum loss: 3860.060059, avg loss: 2.278666, ppl: 9.763645 +epoch: 2, batch: 15444, sum loss: 4233.293945, avg loss: 2.462649, ppl: 11.735863 +epoch: 2, batch: 15445, sum loss: 4101.460938, avg loss: 2.545910, ppl: 12.754831 +epoch: 2, batch: 15446, sum loss: 3855.187744, avg loss: 2.496883, ppl: 12.144585 +epoch: 2, batch: 15447, sum loss: 3929.663574, avg loss: 2.370123, ppl: 10.698708 +epoch: 2, batch: 15448, sum loss: 4328.521484, avg loss: 2.558228, ppl: 12.912912 +epoch: 2, batch: 15449, sum loss: 3958.303711, avg loss: 2.313445, ppl: 10.109189 +epoch: 2, batch: 15450, sum loss: 3637.481201, avg loss: 2.441263, ppl: 11.487537 +epoch: 2, batch: 15451, sum loss: 3873.214600, avg loss: 2.551525, ppl: 12.826648 +epoch: 2, batch: 15452, sum loss: 5218.026855, avg loss: 2.618177, ppl: 13.710709 +epoch: 2, batch: 15453, sum loss: 3657.523438, avg loss: 2.388977, ppl: 10.902333 +epoch: 2, batch: 15454, sum loss: 3697.677979, avg loss: 2.332920, ppl: 10.307995 +epoch: 2, batch: 15455, sum loss: 4231.154297, avg loss: 2.542761, ppl: 12.714729 +epoch: 2, batch: 15456, sum loss: 4889.858398, avg loss: 2.688212, ppl: 14.705365 +epoch: 2, batch: 15457, sum loss: 4329.070801, avg loss: 2.687195, ppl: 14.690409 +epoch: 2, batch: 15458, sum loss: 4066.937012, avg loss: 2.473806, ppl: 11.867528 +epoch: 2, batch: 15459, sum loss: 4034.861084, avg loss: 2.558568, ppl: 12.917300 +epoch: 2, batch: 15460, sum loss: 4141.758789, avg loss: 2.545641, ppl: 12.751405 +epoch: 2, batch: 15461, sum loss: 4403.599609, avg loss: 2.590353, ppl: 13.334475 +epoch: 2, batch: 15462, sum loss: 3876.808594, avg loss: 2.424521, ppl: 11.296814 +epoch: 2, batch: 15463, sum loss: 3737.432129, avg loss: 2.369963, ppl: 10.697001 +epoch: 2, batch: 15464, sum loss: 3902.550781, avg loss: 2.658413, ppl: 14.273624 +epoch: 2, batch: 15465, sum loss: 3291.893799, avg loss: 2.422291, ppl: 11.271656 +epoch: 2, batch: 15466, sum loss: 4968.218750, avg loss: 2.922482, ppl: 18.587360 +epoch: 2, batch: 15467, sum loss: 4210.551758, avg loss: 2.484101, ppl: 11.990339 +epoch: 2, batch: 15468, sum loss: 3748.683350, avg loss: 2.230032, ppl: 9.300161 +epoch: 2, batch: 15469, sum loss: 3945.280518, avg loss: 2.550278, ppl: 12.810670 +epoch: 2, batch: 15470, sum loss: 3486.699463, avg loss: 2.226500, ppl: 9.267376 +epoch: 2, batch: 15471, sum loss: 3992.233643, avg loss: 2.434289, ppl: 11.407702 +epoch: 2, batch: 15472, sum loss: 4826.042969, avg loss: 2.538687, ppl: 12.663028 +epoch: 2, batch: 15473, sum loss: 4246.561035, avg loss: 2.399187, ppl: 11.014219 +epoch: 2, batch: 15474, sum loss: 3588.237549, avg loss: 2.357581, ppl: 10.565359 +epoch: 2, batch: 15475, sum loss: 3505.014404, avg loss: 2.477042, ppl: 11.905993 +epoch: 2, batch: 15476, sum loss: 3815.829590, avg loss: 2.307031, ppl: 10.044559 +epoch: 2, batch: 15477, sum loss: 4203.970703, avg loss: 2.269963, ppl: 9.679038 +epoch: 2, batch: 15478, sum loss: 4065.591064, avg loss: 2.545768, ppl: 12.753016 +epoch: 2, batch: 15479, sum loss: 4473.693848, avg loss: 2.541871, ppl: 12.703421 +epoch: 2, batch: 15480, sum loss: 4519.159668, avg loss: 2.752229, ppl: 15.677538 +epoch: 2, batch: 15481, sum loss: 3348.524414, avg loss: 2.295082, ppl: 9.925248 +epoch: 2, batch: 15482, sum loss: 4028.340332, avg loss: 2.312480, ppl: 10.099440 +epoch: 2, batch: 15483, sum loss: 3923.431152, avg loss: 2.374959, ppl: 10.750567 +epoch: 2, batch: 15484, sum loss: 4503.320312, avg loss: 2.624313, ppl: 13.795089 +epoch: 2, batch: 15485, sum loss: 4376.259277, avg loss: 2.807094, ppl: 16.561718 +epoch: 2, batch: 15486, sum loss: 4849.884277, avg loss: 2.732329, ppl: 15.368641 +epoch: 2, batch: 15487, sum loss: 4108.733398, avg loss: 2.494677, ppl: 12.117820 +epoch: 2, batch: 15488, sum loss: 3706.201904, avg loss: 2.409754, ppl: 11.131223 +epoch: 2, batch: 15489, sum loss: 4834.587891, avg loss: 2.610469, ppl: 13.605425 +epoch: 2, batch: 15490, sum loss: 3793.651367, avg loss: 2.662211, ppl: 14.327939 +epoch: 2, batch: 15491, sum loss: 5123.914062, avg loss: 2.804551, ppl: 16.519651 +epoch: 2, batch: 15492, sum loss: 4385.128906, avg loss: 2.594751, ppl: 13.393250 +epoch: 2, batch: 15493, sum loss: 4140.376465, avg loss: 2.345822, ppl: 10.441855 +epoch: 2, batch: 15494, sum loss: 5340.198242, avg loss: 2.735757, ppl: 15.421418 +epoch: 2, batch: 15495, sum loss: 5228.145508, avg loss: 2.866308, ppl: 17.572021 +epoch: 2, batch: 15496, sum loss: 4623.593262, avg loss: 2.598984, ppl: 13.450069 +epoch: 2, batch: 15497, sum loss: 4924.206055, avg loss: 2.888098, ppl: 17.959110 +epoch: 2, batch: 15498, sum loss: 4699.384766, avg loss: 2.659527, ppl: 14.289533 +epoch: 2, batch: 15499, sum loss: 4923.588867, avg loss: 2.603696, ppl: 13.513590 +epoch: 2, batch: 15500, sum loss: 4047.860840, avg loss: 2.419522, ppl: 11.240488 +epoch: 2, batch: 15501, sum loss: 3405.428955, avg loss: 2.326113, ppl: 10.238064 +epoch: 2, batch: 15502, sum loss: 3998.618652, avg loss: 2.555028, ppl: 12.871659 +epoch: 2, batch: 15503, sum loss: 3638.806396, avg loss: 2.338564, ppl: 10.366343 +epoch: 2, batch: 15504, sum loss: 4489.113770, avg loss: 2.439736, ppl: 11.470008 +epoch: 2, batch: 15505, sum loss: 4219.243164, avg loss: 2.566450, ppl: 13.019518 +epoch: 2, batch: 15506, sum loss: 4543.736328, avg loss: 2.698181, ppl: 14.852689 +epoch: 2, batch: 15507, sum loss: 3670.962891, avg loss: 2.238392, ppl: 9.378240 +epoch: 2, batch: 15508, sum loss: 4329.312500, avg loss: 2.498161, ppl: 12.160106 +epoch: 2, batch: 15509, sum loss: 3984.331055, avg loss: 2.438391, ppl: 11.454597 +epoch: 2, batch: 15510, sum loss: 3349.625732, avg loss: 2.429025, ppl: 11.347815 +epoch: 2, batch: 15511, sum loss: 3890.880127, avg loss: 2.349565, ppl: 10.481012 +epoch: 2, batch: 15512, sum loss: 4418.148926, avg loss: 2.440966, ppl: 11.484134 +epoch: 2, batch: 15513, sum loss: 4372.275391, avg loss: 2.394455, ppl: 10.962227 +epoch: 2, batch: 15514, sum loss: 4460.362305, avg loss: 2.504415, ppl: 12.236393 +epoch: 2, batch: 15515, sum loss: 3610.759521, avg loss: 2.438055, ppl: 11.450747 +epoch: 2, batch: 15516, sum loss: 4674.263184, avg loss: 2.469236, ppl: 11.813414 +epoch: 2, batch: 15517, sum loss: 4127.803711, avg loss: 2.468782, ppl: 11.808058 +epoch: 2, batch: 15518, sum loss: 4551.543457, avg loss: 2.558484, ppl: 12.916223 +epoch: 2, batch: 15519, sum loss: 4776.579102, avg loss: 2.796592, ppl: 16.388695 +epoch: 2, batch: 15520, sum loss: 4413.006348, avg loss: 2.397070, ppl: 10.990931 +epoch: 2, batch: 15521, sum loss: 3636.874512, avg loss: 2.175164, ppl: 8.803631 +epoch: 2, batch: 15522, sum loss: 4080.271240, avg loss: 2.380555, ppl: 10.810901 +epoch: 2, batch: 15523, sum loss: 4738.036133, avg loss: 2.552821, ppl: 12.843286 +epoch: 2, batch: 15524, sum loss: 4278.114258, avg loss: 2.522473, ppl: 12.459372 +epoch: 2, batch: 15525, sum loss: 3399.019531, avg loss: 2.171897, ppl: 8.774918 +epoch: 2, batch: 15526, sum loss: 4523.457031, avg loss: 2.632978, ppl: 13.915154 +epoch: 2, batch: 15527, sum loss: 4069.855469, avg loss: 2.484649, ppl: 11.996911 +epoch: 2, batch: 15528, sum loss: 4041.572510, avg loss: 2.332125, ppl: 10.299805 +epoch: 2, batch: 15529, sum loss: 5315.855469, avg loss: 2.639452, ppl: 14.005520 +epoch: 2, batch: 15530, sum loss: 4793.812988, avg loss: 2.562166, ppl: 12.963873 +epoch: 2, batch: 15531, sum loss: 4228.380859, avg loss: 2.420367, ppl: 11.249985 +epoch: 2, batch: 15532, sum loss: 3033.011230, avg loss: 2.074563, ppl: 7.961069 +epoch: 2, batch: 15533, sum loss: 3486.575928, avg loss: 2.384799, ppl: 10.856880 +epoch: 2, batch: 15534, sum loss: 3886.151123, avg loss: 2.530046, ppl: 12.554090 +epoch: 2, batch: 15535, sum loss: 3882.511963, avg loss: 2.476092, ppl: 11.894688 +epoch: 2, batch: 15536, sum loss: 4706.200684, avg loss: 2.651381, ppl: 14.173593 +epoch: 2, batch: 15537, sum loss: 5020.382324, avg loss: 2.653479, ppl: 14.203368 +epoch: 2, batch: 15538, sum loss: 4067.773926, avg loss: 2.558348, ppl: 12.914471 +epoch: 2, batch: 15539, sum loss: 3831.299316, avg loss: 2.460693, ppl: 11.712930 +epoch: 2, batch: 15540, sum loss: 4187.946289, avg loss: 2.326637, ppl: 10.243433 +epoch: 2, batch: 15541, sum loss: 4359.973145, avg loss: 2.495692, ppl: 12.130119 +epoch: 2, batch: 15542, sum loss: 5120.386719, avg loss: 2.661324, ppl: 14.315227 +epoch: 2, batch: 15543, sum loss: 3871.031738, avg loss: 2.596266, ppl: 13.413552 +epoch: 2, batch: 15544, sum loss: 3716.857910, avg loss: 2.352442, ppl: 10.511205 +epoch: 2, batch: 15545, sum loss: 4197.615234, avg loss: 2.605596, ppl: 13.539293 +epoch: 2, batch: 15546, sum loss: 3773.622559, avg loss: 2.380834, ppl: 10.813922 +epoch: 2, batch: 15547, sum loss: 3699.913086, avg loss: 2.385502, ppl: 10.864511 +epoch: 2, batch: 15548, sum loss: 4236.706543, avg loss: 2.616866, ppl: 13.692748 +epoch: 2, batch: 15549, sum loss: 5113.780762, avg loss: 2.822175, ppl: 16.813377 +epoch: 2, batch: 15550, sum loss: 5020.562500, avg loss: 2.724125, ppl: 15.243073 +epoch: 2, batch: 15551, sum loss: 3959.420654, avg loss: 2.372331, ppl: 10.722359 +epoch: 2, batch: 15552, sum loss: 4455.178223, avg loss: 2.501504, ppl: 12.200830 +epoch: 2, batch: 15553, sum loss: 4700.641602, avg loss: 2.605677, ppl: 13.540391 +epoch: 2, batch: 15554, sum loss: 4084.854736, avg loss: 2.395809, ppl: 10.977079 +epoch: 2, batch: 15555, sum loss: 4222.529297, avg loss: 2.609721, ppl: 13.595263 +epoch: 2, batch: 15556, sum loss: 5000.290527, avg loss: 2.738385, ppl: 15.461993 +epoch: 2, batch: 15557, sum loss: 3528.903809, avg loss: 2.201437, ppl: 9.037993 +epoch: 2, batch: 15558, sum loss: 3589.072754, avg loss: 2.311058, ppl: 10.085087 +epoch: 2, batch: 15559, sum loss: 4132.721191, avg loss: 2.482115, ppl: 11.966544 +epoch: 2, batch: 15560, sum loss: 4287.386230, avg loss: 2.559634, ppl: 12.931080 +epoch: 2, batch: 15561, sum loss: 3845.779053, avg loss: 2.267558, ppl: 9.655796 +epoch: 2, batch: 15562, sum loss: 3696.493652, avg loss: 2.389459, ppl: 10.907596 +epoch: 2, batch: 15563, sum loss: 3427.163330, avg loss: 2.277185, ppl: 9.749197 +epoch: 2, batch: 15564, sum loss: 3698.684326, avg loss: 2.237559, ppl: 9.370426 +epoch: 2, batch: 15565, sum loss: 4602.135254, avg loss: 2.708732, ppl: 15.010229 +epoch: 2, batch: 15566, sum loss: 3844.744141, avg loss: 2.578635, ppl: 13.179132 +epoch: 2, batch: 15567, sum loss: 3250.806396, avg loss: 2.197976, ppl: 9.006764 +epoch: 2, batch: 15568, sum loss: 3480.211182, avg loss: 2.313970, ppl: 10.114500 +epoch: 2, batch: 15569, sum loss: 4079.362549, avg loss: 2.616653, ppl: 13.689833 +epoch: 2, batch: 15570, sum loss: 3949.588623, avg loss: 2.476231, ppl: 11.896343 +epoch: 2, batch: 15571, sum loss: 4815.016113, avg loss: 2.648524, ppl: 14.133160 +epoch: 2, batch: 15572, sum loss: 4138.729492, avg loss: 2.522078, ppl: 12.454445 +epoch: 2, batch: 15573, sum loss: 4778.547363, avg loss: 2.654749, ppl: 14.221412 +epoch: 2, batch: 15574, sum loss: 4667.464355, avg loss: 2.508041, ppl: 12.280850 +epoch: 2, batch: 15575, sum loss: 3760.901367, avg loss: 2.705684, ppl: 14.964556 +epoch: 2, batch: 15576, sum loss: 4006.973145, avg loss: 2.293631, ppl: 9.910857 +epoch: 2, batch: 15577, sum loss: 4722.574707, avg loss: 2.703248, ppl: 14.928144 +epoch: 2, batch: 15578, sum loss: 4674.210449, avg loss: 2.522510, ppl: 12.459826 +epoch: 2, batch: 15579, sum loss: 4835.862305, avg loss: 2.599926, ppl: 13.462742 +epoch: 2, batch: 15580, sum loss: 4001.665283, avg loss: 2.390481, ppl: 10.918747 +epoch: 2, batch: 15581, sum loss: 3751.099609, avg loss: 2.553505, ppl: 12.852077 +epoch: 2, batch: 15582, sum loss: 4473.897949, avg loss: 2.756561, ppl: 15.745594 +epoch: 2, batch: 15583, sum loss: 3304.104492, avg loss: 2.404734, ppl: 11.075485 +epoch: 2, batch: 15584, sum loss: 4865.587891, avg loss: 2.606100, ppl: 13.546112 +epoch: 2, batch: 15585, sum loss: 3258.522705, avg loss: 2.255033, ppl: 9.535608 +epoch: 2, batch: 15586, sum loss: 3560.567383, avg loss: 2.400922, ppl: 11.033345 +epoch: 2, batch: 15587, sum loss: 3945.472168, avg loss: 2.481429, ppl: 11.958342 +epoch: 2, batch: 15588, sum loss: 4423.163574, avg loss: 2.788880, ppl: 16.262793 +epoch: 2, batch: 15589, sum loss: 3878.415283, avg loss: 2.208665, ppl: 9.103554 +epoch: 2, batch: 15590, sum loss: 4061.554199, avg loss: 2.446719, ppl: 11.550392 +epoch: 2, batch: 15591, sum loss: 3650.722900, avg loss: 2.517740, ppl: 12.400540 +epoch: 2, batch: 15592, sum loss: 5322.441895, avg loss: 2.555181, ppl: 12.873630 +epoch: 2, batch: 15593, sum loss: 5013.256348, avg loss: 2.884497, ppl: 17.894571 +epoch: 2, batch: 15594, sum loss: 4392.462402, avg loss: 2.444331, ppl: 11.522838 +epoch: 2, batch: 15595, sum loss: 4499.882812, avg loss: 2.645434, ppl: 14.089554 +epoch: 2, batch: 15596, sum loss: 3425.252930, avg loss: 2.240192, ppl: 9.395132 +epoch: 2, batch: 15597, sum loss: 4539.572266, avg loss: 2.463143, ppl: 11.741656 +epoch: 2, batch: 15598, sum loss: 3687.963867, avg loss: 2.248759, ppl: 9.475965 +epoch: 2, batch: 15599, sum loss: 4111.300293, avg loss: 2.441390, ppl: 11.488997 +epoch: 2, batch: 15600, sum loss: 3982.756104, avg loss: 2.390610, ppl: 10.920151 +epoch: 2, batch: 15601, sum loss: 4831.491699, avg loss: 2.630099, ppl: 13.875144 +epoch: 2, batch: 15602, sum loss: 5120.342773, avg loss: 2.547434, ppl: 12.774287 +epoch: 2, batch: 15603, sum loss: 3636.625488, avg loss: 2.264399, ppl: 9.625340 +epoch: 2, batch: 15604, sum loss: 3736.754150, avg loss: 2.200680, ppl: 9.031151 +epoch: 2, batch: 15605, sum loss: 4376.866211, avg loss: 2.461680, ppl: 11.724488 +epoch: 2, batch: 15606, sum loss: 4285.166016, avg loss: 2.625715, ppl: 13.814442 +epoch: 2, batch: 15607, sum loss: 3893.562012, avg loss: 2.413864, ppl: 11.177062 +epoch: 2, batch: 15608, sum loss: 4387.872070, avg loss: 2.479024, ppl: 11.929615 +epoch: 2, batch: 15609, sum loss: 3400.272705, avg loss: 2.371180, ppl: 10.710026 +epoch: 2, batch: 15610, sum loss: 4040.589844, avg loss: 2.658283, ppl: 14.271764 +epoch: 2, batch: 15611, sum loss: 4519.383789, avg loss: 2.677360, ppl: 14.546640 +epoch: 2, batch: 15612, sum loss: 4077.113770, avg loss: 2.416784, ppl: 11.209746 +epoch: 2, batch: 15613, sum loss: 4164.062012, avg loss: 2.628827, ppl: 13.857507 +epoch: 2, batch: 15614, sum loss: 3926.251221, avg loss: 2.588168, ppl: 13.305376 +epoch: 2, batch: 15615, sum loss: 4155.050293, avg loss: 2.636453, ppl: 13.963589 +epoch: 2, batch: 15616, sum loss: 4261.688477, avg loss: 2.655257, ppl: 14.228649 +epoch: 2, batch: 15617, sum loss: 4576.701660, avg loss: 2.742182, ppl: 15.520815 +epoch: 2, batch: 15618, sum loss: 4189.818848, avg loss: 2.543909, ppl: 12.729337 +epoch: 2, batch: 15619, sum loss: 3929.208252, avg loss: 2.413518, ppl: 11.173204 +epoch: 2, batch: 15620, sum loss: 3893.226807, avg loss: 2.240062, ppl: 9.393909 +epoch: 2, batch: 15621, sum loss: 4037.884766, avg loss: 2.313974, ppl: 10.114539 +epoch: 2, batch: 15622, sum loss: 5408.568359, avg loss: 2.638326, ppl: 13.989764 +epoch: 2, batch: 15623, sum loss: 4845.571777, avg loss: 2.624903, ppl: 13.803242 +epoch: 2, batch: 15624, sum loss: 4072.220703, avg loss: 2.519938, ppl: 12.427832 +epoch: 2, batch: 15625, sum loss: 4450.892090, avg loss: 2.657249, ppl: 14.257017 +epoch: 2, batch: 15626, sum loss: 4771.964355, avg loss: 2.609057, ppl: 13.586229 +epoch: 2, batch: 15627, sum loss: 3786.462158, avg loss: 2.421012, ppl: 11.257242 +epoch: 2, batch: 15628, sum loss: 4093.168945, avg loss: 2.590613, ppl: 13.337950 +epoch: 2, batch: 15629, sum loss: 3823.602051, avg loss: 2.363166, ppl: 10.624532 +epoch: 2, batch: 15630, sum loss: 4162.704590, avg loss: 2.492638, ppl: 12.093131 +epoch: 2, batch: 15631, sum loss: 4914.728516, avg loss: 2.603140, ppl: 13.506083 +epoch: 2, batch: 15632, sum loss: 4374.177734, avg loss: 2.419346, ppl: 11.238508 +epoch: 2, batch: 15633, sum loss: 4075.019531, avg loss: 2.562905, ppl: 12.973454 +epoch: 2, batch: 15634, sum loss: 5506.283203, avg loss: 2.669066, ppl: 14.426492 +epoch: 2, batch: 15635, sum loss: 4600.162598, avg loss: 2.545746, ppl: 12.752733 +epoch: 2, batch: 15636, sum loss: 4001.080078, avg loss: 2.535539, ppl: 12.623229 +epoch: 2, batch: 15637, sum loss: 4189.132812, avg loss: 2.600331, ppl: 13.468193 +epoch: 2, batch: 15638, sum loss: 3383.901855, avg loss: 2.133608, ppl: 8.445279 +epoch: 2, batch: 15639, sum loss: 4937.108398, avg loss: 2.718672, ppl: 15.160173 +epoch: 2, batch: 15640, sum loss: 5016.372559, avg loss: 2.799315, ppl: 16.433390 +epoch: 2, batch: 15641, sum loss: 4310.613281, avg loss: 2.439510, ppl: 11.467416 +epoch: 2, batch: 15642, sum loss: 4320.180664, avg loss: 2.538296, ppl: 12.658089 +epoch: 2, batch: 15643, sum loss: 4608.538574, avg loss: 2.464459, ppl: 11.757122 +epoch: 2, batch: 15644, sum loss: 3823.148926, avg loss: 2.386485, ppl: 10.875199 +epoch: 2, batch: 15645, sum loss: 5094.791992, avg loss: 2.867075, ppl: 17.585501 +epoch: 2, batch: 15646, sum loss: 3926.266113, avg loss: 2.477139, ppl: 11.907155 +epoch: 2, batch: 15647, sum loss: 3888.371582, avg loss: 2.382581, ppl: 10.832824 +epoch: 2, batch: 15648, sum loss: 3933.819336, avg loss: 2.480340, ppl: 11.945325 +epoch: 2, batch: 15649, sum loss: 4256.014648, avg loss: 2.480195, ppl: 11.943594 +epoch: 2, batch: 15650, sum loss: 4779.166992, avg loss: 2.539409, ppl: 12.672175 +epoch: 2, batch: 15651, sum loss: 3486.131836, avg loss: 2.342830, ppl: 10.410662 +epoch: 2, batch: 15652, sum loss: 4819.218750, avg loss: 2.578501, ppl: 13.177374 +epoch: 2, batch: 15653, sum loss: 3854.936523, avg loss: 2.449134, ppl: 11.578311 +epoch: 2, batch: 15654, sum loss: 3882.775879, avg loss: 2.258741, ppl: 9.571033 +epoch: 2, batch: 15655, sum loss: 3897.310547, avg loss: 2.343542, ppl: 10.418073 +epoch: 2, batch: 15656, sum loss: 4550.138184, avg loss: 2.708416, ppl: 15.005485 +epoch: 2, batch: 15657, sum loss: 4533.221191, avg loss: 2.438527, ppl: 11.456149 +epoch: 2, batch: 15658, sum loss: 3311.731689, avg loss: 2.293443, ppl: 9.908996 +epoch: 2, batch: 15659, sum loss: 5300.936523, avg loss: 2.779726, ppl: 16.114597 +epoch: 2, batch: 15660, sum loss: 4133.230957, avg loss: 2.316833, ppl: 10.143504 +epoch: 2, batch: 15661, sum loss: 4184.288086, avg loss: 2.579709, ppl: 13.193299 +epoch: 2, batch: 15662, sum loss: 4812.496582, avg loss: 2.669161, ppl: 14.427854 +epoch: 2, batch: 15663, sum loss: 3705.126953, avg loss: 2.445628, ppl: 11.537798 +epoch: 2, batch: 15664, sum loss: 3804.196289, avg loss: 2.478304, ppl: 11.921025 +epoch: 2, batch: 15665, sum loss: 5160.816406, avg loss: 2.679552, ppl: 14.578558 +epoch: 2, batch: 15666, sum loss: 4478.963379, avg loss: 2.520519, ppl: 12.435055 +epoch: 2, batch: 15667, sum loss: 4230.695312, avg loss: 2.423079, ppl: 11.280536 +epoch: 2, batch: 15668, sum loss: 3942.460205, avg loss: 2.360755, ppl: 10.598948 +epoch: 2, batch: 15669, sum loss: 2720.271973, avg loss: 2.040714, ppl: 7.696104 +epoch: 2, batch: 15670, sum loss: 4981.389648, avg loss: 2.773602, ppl: 16.016224 +epoch: 2, batch: 15671, sum loss: 4333.930176, avg loss: 2.385212, ppl: 10.861364 +epoch: 2, batch: 15672, sum loss: 4698.726562, avg loss: 2.597417, ppl: 13.429001 +epoch: 2, batch: 15673, sum loss: 3572.176270, avg loss: 2.087771, ppl: 8.066916 +epoch: 2, batch: 15674, sum loss: 4437.094727, avg loss: 2.551521, ppl: 12.826596 +epoch: 2, batch: 15675, sum loss: 4459.846680, avg loss: 2.598978, ppl: 13.449989 +epoch: 2, batch: 15676, sum loss: 3970.080566, avg loss: 2.236665, ppl: 9.362057 +epoch: 2, batch: 15677, sum loss: 4209.810547, avg loss: 2.598649, ppl: 13.445555 +epoch: 2, batch: 15678, sum loss: 4292.710938, avg loss: 2.581306, ppl: 13.214378 +epoch: 2, batch: 15679, sum loss: 4899.015625, avg loss: 2.587964, ppl: 13.302657 +epoch: 2, batch: 15680, sum loss: 4704.298340, avg loss: 2.691246, ppl: 14.750047 +epoch: 2, batch: 15681, sum loss: 5209.183105, avg loss: 2.762027, ppl: 15.831906 +epoch: 2, batch: 15682, sum loss: 4705.623535, avg loss: 2.772907, ppl: 16.005096 +epoch: 2, batch: 15683, sum loss: 3841.592041, avg loss: 2.392025, ppl: 10.935614 +epoch: 2, batch: 15684, sum loss: 3677.480713, avg loss: 2.386425, ppl: 10.874545 +epoch: 2, batch: 15685, sum loss: 3517.360352, avg loss: 2.362230, ppl: 10.614594 +epoch: 2, batch: 15686, sum loss: 4250.330566, avg loss: 2.520955, ppl: 12.440475 +epoch: 2, batch: 15687, sum loss: 4858.009277, avg loss: 2.631641, ppl: 13.896554 +epoch: 2, batch: 15688, sum loss: 4803.423340, avg loss: 2.619097, ppl: 13.723319 +epoch: 2, batch: 15689, sum loss: 4734.924316, avg loss: 2.636372, ppl: 13.962457 +epoch: 2, batch: 15690, sum loss: 4646.077637, avg loss: 2.800529, ppl: 16.453348 +epoch: 2, batch: 15691, sum loss: 3757.053711, avg loss: 2.336476, ppl: 10.344718 +epoch: 2, batch: 15692, sum loss: 4976.479004, avg loss: 2.662642, ppl: 14.334117 +epoch: 2, batch: 15693, sum loss: 5137.749512, avg loss: 2.787710, ppl: 16.243778 +epoch: 2, batch: 15694, sum loss: 3949.951416, avg loss: 2.312618, ppl: 10.100832 +epoch: 2, batch: 15695, sum loss: 3583.093506, avg loss: 2.282225, ppl: 9.798459 +epoch: 2, batch: 15696, sum loss: 4763.401367, avg loss: 2.507054, ppl: 12.268728 +epoch: 2, batch: 15697, sum loss: 3956.233398, avg loss: 2.492901, ppl: 12.096314 +epoch: 2, batch: 15698, sum loss: 3433.217529, avg loss: 2.335522, ppl: 10.334855 +epoch: 2, batch: 15699, sum loss: 4595.425293, avg loss: 2.630467, ppl: 13.880246 +epoch: 2, batch: 15700, sum loss: 3882.334961, avg loss: 2.506349, ppl: 12.260091 +epoch: 2, batch: 15701, sum loss: 4983.941406, avg loss: 2.529920, ppl: 12.552497 +epoch: 2, batch: 15702, sum loss: 5320.431152, avg loss: 2.732630, ppl: 15.373273 +epoch: 2, batch: 15703, sum loss: 3377.066162, avg loss: 2.201477, ppl: 9.038356 +epoch: 2, batch: 15704, sum loss: 4553.816406, avg loss: 2.735025, ppl: 15.410128 +epoch: 2, batch: 15705, sum loss: 4289.405273, avg loss: 2.829423, ppl: 16.935690 +epoch: 2, batch: 15706, sum loss: 3856.812012, avg loss: 2.430254, ppl: 11.361762 +epoch: 2, batch: 15707, sum loss: 4645.864258, avg loss: 2.677732, ppl: 14.552045 +epoch: 2, batch: 15708, sum loss: 4363.693359, avg loss: 2.654315, ppl: 14.215241 +epoch: 2, batch: 15709, sum loss: 4159.089355, avg loss: 2.649101, ppl: 14.141327 +epoch: 2, batch: 15710, sum loss: 5133.291992, avg loss: 2.717465, ppl: 15.141895 +epoch: 2, batch: 15711, sum loss: 4654.788086, avg loss: 2.449889, ppl: 11.587057 +epoch: 2, batch: 15712, sum loss: 3843.683105, avg loss: 2.352315, ppl: 10.509874 +epoch: 2, batch: 15713, sum loss: 4623.053711, avg loss: 2.608947, ppl: 13.584736 +epoch: 2, batch: 15714, sum loss: 4458.482422, avg loss: 2.618017, ppl: 13.708508 +epoch: 2, batch: 15715, sum loss: 4410.848633, avg loss: 2.671623, ppl: 14.463417 +epoch: 2, batch: 15716, sum loss: 5524.860352, avg loss: 2.865592, ppl: 17.559437 +epoch: 2, batch: 15717, sum loss: 4481.210449, avg loss: 2.642223, ppl: 14.044392 +epoch: 2, batch: 15718, sum loss: 3902.409912, avg loss: 2.373729, ppl: 10.737354 +epoch: 2, batch: 15719, sum loss: 5100.795898, avg loss: 2.769162, ppl: 15.945266 +epoch: 2, batch: 15720, sum loss: 4176.625488, avg loss: 2.570231, ppl: 13.068843 +epoch: 2, batch: 15721, sum loss: 4276.036133, avg loss: 2.487514, ppl: 12.031326 +epoch: 2, batch: 15722, sum loss: 4024.745117, avg loss: 2.553772, ppl: 12.855506 +epoch: 2, batch: 15723, sum loss: 4006.094482, avg loss: 2.527504, ppl: 12.522217 +epoch: 2, batch: 15724, sum loss: 3928.030518, avg loss: 2.508321, ppl: 12.284285 +epoch: 2, batch: 15725, sum loss: 3963.320801, avg loss: 2.510020, ppl: 12.305170 +epoch: 2, batch: 15726, sum loss: 4530.037598, avg loss: 2.451319, ppl: 11.603642 +epoch: 2, batch: 15727, sum loss: 3707.983887, avg loss: 2.548443, ppl: 12.787173 +epoch: 2, batch: 15728, sum loss: 4132.748047, avg loss: 2.652598, ppl: 14.190864 +epoch: 2, batch: 15729, sum loss: 4146.861328, avg loss: 2.668508, ppl: 14.418442 +epoch: 2, batch: 15730, sum loss: 3757.811523, avg loss: 2.401157, ppl: 11.035942 +epoch: 2, batch: 15731, sum loss: 3798.682129, avg loss: 2.368256, ppl: 10.678751 +epoch: 2, batch: 15732, sum loss: 4588.195312, avg loss: 2.450959, ppl: 11.599467 +epoch: 2, batch: 15733, sum loss: 4273.660156, avg loss: 2.476049, ppl: 11.894174 +epoch: 2, batch: 15734, sum loss: 4276.819824, avg loss: 2.410834, ppl: 11.143251 +epoch: 2, batch: 15735, sum loss: 3501.908447, avg loss: 2.325305, ppl: 10.229795 +epoch: 2, batch: 15736, sum loss: 4826.712402, avg loss: 2.570134, ppl: 13.067580 +epoch: 2, batch: 15737, sum loss: 4373.077148, avg loss: 2.480475, ppl: 11.946935 +epoch: 2, batch: 15738, sum loss: 4659.538086, avg loss: 2.464060, ppl: 11.752433 +epoch: 2, batch: 15739, sum loss: 3402.995850, avg loss: 2.194066, ppl: 8.971614 +epoch: 2, batch: 15740, sum loss: 3484.931152, avg loss: 2.198695, ppl: 9.013241 +epoch: 2, batch: 15741, sum loss: 4454.873535, avg loss: 2.807104, ppl: 16.561884 +epoch: 2, batch: 15742, sum loss: 3850.667969, avg loss: 2.437132, ppl: 11.440179 +epoch: 2, batch: 15743, sum loss: 3626.214111, avg loss: 2.523462, ppl: 12.471694 +epoch: 2, batch: 15744, sum loss: 5159.474121, avg loss: 2.842685, ppl: 17.161791 +epoch: 2, batch: 15745, sum loss: 4910.145996, avg loss: 2.691966, ppl: 14.760668 +epoch: 2, batch: 15746, sum loss: 3967.847412, avg loss: 2.472179, ppl: 11.848235 +epoch: 2, batch: 15747, sum loss: 3652.914551, avg loss: 2.403233, ppl: 11.058875 +epoch: 2, batch: 15748, sum loss: 4050.325928, avg loss: 2.483339, ppl: 11.981204 +epoch: 2, batch: 15749, sum loss: 4521.916992, avg loss: 2.548995, ppl: 12.794236 +epoch: 2, batch: 15750, sum loss: 4615.542969, avg loss: 2.651087, ppl: 14.169436 +epoch: 2, batch: 15751, sum loss: 3346.339355, avg loss: 2.084947, ppl: 8.044162 +epoch: 2, batch: 15752, sum loss: 3698.724854, avg loss: 2.245734, ppl: 9.447351 +epoch: 2, batch: 15753, sum loss: 5245.114258, avg loss: 2.736106, ppl: 15.426791 +epoch: 2, batch: 15754, sum loss: 4597.661621, avg loss: 2.389637, ppl: 10.909533 +epoch: 2, batch: 15755, sum loss: 4415.599609, avg loss: 2.752868, ppl: 15.687554 +epoch: 2, batch: 15756, sum loss: 4729.458496, avg loss: 2.573155, ppl: 13.107108 +epoch: 2, batch: 15757, sum loss: 3664.405273, avg loss: 2.234393, ppl: 9.340814 +epoch: 2, batch: 15758, sum loss: 4209.539062, avg loss: 2.468938, ppl: 11.809896 +epoch: 2, batch: 15759, sum loss: 3891.679443, avg loss: 2.472477, ppl: 11.851769 +epoch: 2, batch: 15760, sum loss: 4461.914062, avg loss: 2.595645, ppl: 13.405233 +epoch: 2, batch: 15761, sum loss: 4137.937500, avg loss: 2.501776, ppl: 12.204149 +epoch: 2, batch: 15762, sum loss: 4090.792480, avg loss: 2.589109, ppl: 13.317903 +epoch: 2, batch: 15763, sum loss: 3863.125732, avg loss: 2.397968, ppl: 11.000795 +epoch: 2, batch: 15764, sum loss: 4158.040527, avg loss: 2.492830, ppl: 12.095458 +epoch: 2, batch: 15765, sum loss: 3787.269287, avg loss: 2.384930, ppl: 10.858307 +epoch: 2, batch: 15766, sum loss: 4131.711426, avg loss: 2.621644, ppl: 13.758327 +epoch: 2, batch: 15767, sum loss: 4756.228027, avg loss: 2.617627, ppl: 13.703166 +epoch: 2, batch: 15768, sum loss: 4265.038574, avg loss: 2.343428, ppl: 10.416883 +epoch: 2, batch: 15769, sum loss: 4266.228516, avg loss: 2.494870, ppl: 12.120163 +epoch: 2, batch: 15770, sum loss: 4760.481934, avg loss: 2.541635, ppl: 12.700417 +epoch: 2, batch: 15771, sum loss: 4374.592285, avg loss: 2.427632, ppl: 11.332014 +epoch: 2, batch: 15772, sum loss: 4096.604004, avg loss: 2.392876, ppl: 10.944928 +epoch: 2, batch: 15773, sum loss: 4528.670898, avg loss: 2.639086, ppl: 14.000399 +epoch: 2, batch: 15774, sum loss: 5048.315430, avg loss: 2.633446, ppl: 13.921661 +epoch: 2, batch: 15775, sum loss: 3971.827393, avg loss: 2.332253, ppl: 10.301126 +epoch: 2, batch: 15776, sum loss: 3582.341309, avg loss: 2.404256, ppl: 11.070192 +epoch: 2, batch: 15777, sum loss: 4895.015625, avg loss: 2.436543, ppl: 11.433453 +epoch: 2, batch: 15778, sum loss: 3862.280518, avg loss: 2.475821, ppl: 11.891466 +epoch: 2, batch: 15779, sum loss: 3590.703369, avg loss: 2.353017, ppl: 10.517248 +epoch: 2, batch: 15780, sum loss: 3360.167969, avg loss: 2.231187, ppl: 9.310915 +epoch: 2, batch: 15781, sum loss: 4363.520996, avg loss: 2.636569, ppl: 13.965203 +epoch: 2, batch: 15782, sum loss: 4147.315430, avg loss: 2.390383, ppl: 10.917680 +epoch: 2, batch: 15783, sum loss: 4210.231934, avg loss: 2.412740, ppl: 11.164513 +epoch: 2, batch: 15784, sum loss: 4395.926758, avg loss: 2.510524, ppl: 12.311374 +epoch: 2, batch: 15785, sum loss: 4294.632812, avg loss: 2.462519, ppl: 11.734329 +epoch: 2, batch: 15786, sum loss: 3771.179688, avg loss: 2.408161, ppl: 11.113502 +epoch: 2, batch: 15787, sum loss: 4309.654785, avg loss: 2.623040, ppl: 13.777543 +epoch: 2, batch: 15788, sum loss: 4259.229492, avg loss: 2.632404, ppl: 13.907161 +epoch: 2, batch: 15789, sum loss: 4118.195312, avg loss: 2.577094, ppl: 13.158837 +epoch: 2, batch: 15790, sum loss: 3665.016602, avg loss: 2.381427, ppl: 10.820335 +epoch: 2, batch: 15791, sum loss: 4117.795410, avg loss: 2.291483, ppl: 9.889597 +epoch: 2, batch: 15792, sum loss: 3764.253662, avg loss: 2.388486, ppl: 10.896980 +epoch: 2, batch: 15793, sum loss: 5256.812500, avg loss: 2.853861, ppl: 17.354664 +epoch: 2, batch: 15794, sum loss: 4227.708008, avg loss: 2.361848, ppl: 10.610540 +epoch: 2, batch: 15795, sum loss: 4423.757324, avg loss: 2.519224, ppl: 12.418958 +epoch: 2, batch: 15796, sum loss: 4751.656250, avg loss: 2.540993, ppl: 12.692265 +epoch: 2, batch: 15797, sum loss: 4449.126465, avg loss: 2.522181, ppl: 12.455728 +epoch: 2, batch: 15798, sum loss: 3585.443359, avg loss: 2.393487, ppl: 10.951612 +epoch: 2, batch: 15799, sum loss: 3380.950439, avg loss: 2.282884, ppl: 9.804914 +epoch: 2, batch: 15800, sum loss: 4712.490723, avg loss: 2.566716, ppl: 13.022986 +epoch: 2, batch: 15801, sum loss: 3336.581543, avg loss: 2.240820, ppl: 9.401041 +epoch: 2, batch: 15802, sum loss: 3750.064941, avg loss: 2.513448, ppl: 12.347437 +epoch: 2, batch: 15803, sum loss: 4574.546387, avg loss: 2.567086, ppl: 13.027802 +epoch: 2, batch: 15804, sum loss: 4626.392090, avg loss: 2.540578, ppl: 12.687000 +epoch: 2, batch: 15805, sum loss: 3548.918945, avg loss: 2.305990, ppl: 10.034109 +epoch: 2, batch: 15806, sum loss: 3633.358643, avg loss: 2.282260, ppl: 9.798805 +epoch: 2, batch: 15807, sum loss: 3967.388672, avg loss: 2.499930, ppl: 12.181640 +epoch: 2, batch: 15808, sum loss: 4254.113770, avg loss: 2.470449, ppl: 11.827759 +epoch: 2, batch: 15809, sum loss: 3539.371582, avg loss: 2.444318, ppl: 11.522683 +epoch: 2, batch: 15810, sum loss: 4382.091309, avg loss: 2.570142, ppl: 13.067674 +epoch: 2, batch: 15811, sum loss: 4308.810547, avg loss: 2.737491, ppl: 15.448172 +epoch: 2, batch: 15812, sum loss: 4151.765625, avg loss: 2.612817, ppl: 13.637407 +epoch: 2, batch: 15813, sum loss: 4462.322754, avg loss: 2.443769, ppl: 11.516370 +epoch: 2, batch: 15814, sum loss: 3656.572754, avg loss: 2.321633, ppl: 10.192308 +epoch: 2, batch: 15815, sum loss: 4146.413574, avg loss: 2.556359, ppl: 12.888798 +epoch: 2, batch: 15816, sum loss: 4019.208008, avg loss: 2.272023, ppl: 9.698999 +epoch: 2, batch: 15817, sum loss: 3853.070801, avg loss: 2.614024, ppl: 13.653879 +epoch: 2, batch: 15818, sum loss: 4418.487305, avg loss: 2.562928, ppl: 12.973745 +epoch: 2, batch: 15819, sum loss: 4194.241211, avg loss: 2.504025, ppl: 12.231625 +epoch: 2, batch: 15820, sum loss: 3826.933350, avg loss: 2.439091, ppl: 11.462613 +epoch: 2, batch: 15821, sum loss: 3875.127686, avg loss: 2.614796, ppl: 13.664431 +epoch: 2, batch: 15822, sum loss: 4756.052246, avg loss: 2.659985, ppl: 14.296069 +epoch: 2, batch: 15823, sum loss: 3821.124268, avg loss: 2.441613, ppl: 11.491561 +epoch: 2, batch: 15824, sum loss: 3885.294678, avg loss: 2.272102, ppl: 9.699769 +epoch: 2, batch: 15825, sum loss: 2864.791016, avg loss: 2.071432, ppl: 7.936182 +epoch: 2, batch: 15826, sum loss: 4308.948730, avg loss: 2.713444, ppl: 15.081125 +epoch: 2, batch: 15827, sum loss: 4275.385742, avg loss: 2.438897, ppl: 11.460389 +epoch: 2, batch: 15828, sum loss: 4037.967285, avg loss: 2.625466, ppl: 13.811013 +epoch: 2, batch: 15829, sum loss: 4035.635986, avg loss: 2.439925, ppl: 11.472182 +epoch: 2, batch: 15830, sum loss: 4059.949951, avg loss: 2.361809, ppl: 10.610130 +epoch: 2, batch: 15831, sum loss: 3648.620850, avg loss: 2.595036, ppl: 13.397070 +epoch: 2, batch: 15832, sum loss: 3302.649414, avg loss: 2.210609, ppl: 9.121267 +epoch: 2, batch: 15833, sum loss: 4676.759766, avg loss: 2.498269, ppl: 12.161428 +epoch: 2, batch: 15834, sum loss: 3527.645508, avg loss: 2.254086, ppl: 9.526587 +epoch: 2, batch: 15835, sum loss: 5120.745117, avg loss: 2.899629, ppl: 18.167406 +epoch: 2, batch: 15836, sum loss: 3573.514648, avg loss: 2.502461, ppl: 12.212514 +epoch: 2, batch: 15837, sum loss: 4741.870605, avg loss: 2.592603, ppl: 13.364513 +epoch: 2, batch: 15838, sum loss: 4492.342773, avg loss: 2.413940, ppl: 11.177920 +epoch: 2, batch: 15839, sum loss: 3689.420654, avg loss: 2.230605, ppl: 9.305495 +epoch: 2, batch: 15840, sum loss: 5306.949707, avg loss: 2.769807, ppl: 15.955552 +epoch: 2, batch: 15841, sum loss: 4003.764160, avg loss: 2.303662, ppl: 10.010773 +epoch: 2, batch: 15842, sum loss: 4675.905762, avg loss: 2.638773, ppl: 13.996019 +epoch: 2, batch: 15843, sum loss: 4347.008789, avg loss: 2.612385, ppl: 13.631527 +epoch: 2, batch: 15844, sum loss: 5174.137207, avg loss: 2.641214, ppl: 14.030221 +epoch: 2, batch: 15845, sum loss: 3934.935059, avg loss: 2.433479, ppl: 11.398466 +epoch: 2, batch: 15846, sum loss: 3585.244141, avg loss: 2.464085, ppl: 11.752728 +epoch: 2, batch: 15847, sum loss: 4323.612305, avg loss: 2.688813, ppl: 14.714207 +epoch: 2, batch: 15848, sum loss: 3579.430176, avg loss: 2.342559, ppl: 10.407837 +epoch: 2, batch: 15849, sum loss: 4070.424805, avg loss: 2.463938, ppl: 11.750990 +epoch: 2, batch: 15850, sum loss: 3972.420166, avg loss: 2.623791, ppl: 13.787901 +epoch: 2, batch: 15851, sum loss: 3669.342773, avg loss: 2.213114, ppl: 9.144145 +epoch: 2, batch: 15852, sum loss: 4260.544922, avg loss: 2.566593, ppl: 13.021390 +epoch: 2, batch: 15853, sum loss: 3874.195312, avg loss: 2.711123, ppl: 15.046170 +epoch: 2, batch: 15854, sum loss: 4062.519043, avg loss: 2.460642, ppl: 11.712324 +epoch: 2, batch: 15855, sum loss: 4512.927734, avg loss: 2.423699, ppl: 11.287537 +epoch: 2, batch: 15856, sum loss: 3174.521973, avg loss: 2.195382, ppl: 8.983431 +epoch: 2, batch: 15857, sum loss: 3223.966797, avg loss: 2.307779, ppl: 10.052070 +epoch: 2, batch: 15858, sum loss: 2724.108887, avg loss: 1.917036, ppl: 6.800774 +epoch: 2, batch: 15859, sum loss: 4247.723633, avg loss: 2.613984, ppl: 13.653333 +epoch: 2, batch: 15860, sum loss: 4130.449219, avg loss: 2.494233, ppl: 12.112435 +epoch: 2, batch: 15861, sum loss: 4711.641602, avg loss: 2.638097, ppl: 13.986566 +epoch: 2, batch: 15862, sum loss: 5177.554199, avg loss: 2.843248, ppl: 17.171446 +epoch: 2, batch: 15863, sum loss: 3889.358398, avg loss: 2.378812, ppl: 10.792078 +epoch: 2, batch: 15864, sum loss: 5124.213379, avg loss: 2.473076, ppl: 11.858867 +epoch: 2, batch: 15865, sum loss: 5017.604004, avg loss: 2.737373, ppl: 15.446348 +epoch: 2, batch: 15866, sum loss: 3441.585449, avg loss: 2.460032, ppl: 11.705192 +epoch: 2, batch: 15867, sum loss: 4384.121582, avg loss: 2.422167, ppl: 11.270253 +epoch: 2, batch: 15868, sum loss: 4644.141113, avg loss: 2.564407, ppl: 12.992952 +epoch: 2, batch: 15869, sum loss: 4525.913574, avg loss: 2.593647, ppl: 13.378470 +epoch: 2, batch: 15870, sum loss: 4481.833984, avg loss: 2.546496, ppl: 12.762311 +epoch: 2, batch: 15871, sum loss: 4707.640137, avg loss: 2.565472, ppl: 13.006791 +epoch: 2, batch: 15872, sum loss: 3967.353027, avg loss: 2.507808, ppl: 12.277992 +epoch: 2, batch: 15873, sum loss: 3988.521484, avg loss: 2.405622, ppl: 11.085326 +epoch: 2, batch: 15874, sum loss: 4734.587891, avg loss: 2.601422, ppl: 13.482898 +epoch: 2, batch: 15875, sum loss: 4783.393555, avg loss: 2.634027, ppl: 13.929755 +epoch: 2, batch: 15876, sum loss: 4835.786621, avg loss: 2.521265, ppl: 12.444332 +epoch: 2, batch: 15877, sum loss: 3896.472412, avg loss: 2.435295, ppl: 11.419188 +epoch: 2, batch: 15878, sum loss: 3442.438477, avg loss: 2.267746, ppl: 9.657608 +epoch: 2, batch: 15879, sum loss: 4168.501953, avg loss: 2.648349, ppl: 14.130693 +epoch: 2, batch: 15880, sum loss: 4169.240234, avg loss: 2.404406, ppl: 11.071853 +epoch: 2, batch: 15881, sum loss: 3240.114990, avg loss: 2.233022, ppl: 9.328012 +epoch: 2, batch: 15882, sum loss: 4712.706055, avg loss: 2.717824, ppl: 15.147318 +epoch: 2, batch: 15883, sum loss: 5244.169922, avg loss: 2.666075, ppl: 14.383403 +epoch: 2, batch: 15884, sum loss: 4373.820801, avg loss: 2.566796, ppl: 13.024033 +epoch: 2, batch: 15885, sum loss: 3731.358398, avg loss: 2.301887, ppl: 9.993017 +epoch: 2, batch: 15886, sum loss: 4165.672363, avg loss: 2.323298, ppl: 10.209284 +epoch: 2, batch: 15887, sum loss: 4533.609375, avg loss: 2.524282, ppl: 12.481924 +epoch: 2, batch: 15888, sum loss: 4075.947266, avg loss: 2.383595, ppl: 10.843814 +epoch: 2, batch: 15889, sum loss: 3998.656250, avg loss: 2.337029, ppl: 10.350439 +epoch: 2, batch: 15890, sum loss: 4893.190918, avg loss: 2.687090, ppl: 14.688871 +epoch: 2, batch: 15891, sum loss: 4859.025391, avg loss: 2.575000, ppl: 13.131318 +epoch: 2, batch: 15892, sum loss: 4075.722900, avg loss: 2.278213, ppl: 9.759223 +epoch: 2, batch: 15893, sum loss: 4056.643555, avg loss: 2.448185, ppl: 11.567338 +epoch: 2, batch: 15894, sum loss: 3719.340576, avg loss: 2.479560, ppl: 11.936016 +epoch: 2, batch: 15895, sum loss: 4297.027344, avg loss: 2.533625, ppl: 12.599091 +epoch: 2, batch: 15896, sum loss: 4285.169434, avg loss: 2.469838, ppl: 11.820537 +epoch: 2, batch: 15897, sum loss: 4200.994141, avg loss: 2.360109, ppl: 10.592107 +epoch: 2, batch: 15898, sum loss: 5424.109863, avg loss: 2.991787, ppl: 19.921253 +epoch: 2, batch: 15899, sum loss: 3305.398193, avg loss: 2.386569, ppl: 10.876117 +epoch: 2, batch: 15900, sum loss: 4050.496338, avg loss: 2.357681, ppl: 10.566422 +epoch: 2, batch: 15901, sum loss: 3620.247070, avg loss: 2.418335, ppl: 11.227147 +epoch: 2, batch: 15902, sum loss: 4343.289062, avg loss: 2.553374, ppl: 12.850386 +epoch: 2, batch: 15903, sum loss: 3986.476074, avg loss: 2.542395, ppl: 12.710079 +epoch: 2, batch: 15904, sum loss: 3541.959717, avg loss: 2.442731, ppl: 11.504416 +epoch: 2, batch: 15905, sum loss: 3447.032227, avg loss: 2.370724, ppl: 10.705137 +epoch: 2, batch: 15906, sum loss: 4723.687012, avg loss: 2.633047, ppl: 13.916113 +epoch: 2, batch: 15907, sum loss: 4734.524902, avg loss: 2.563360, ppl: 12.979351 +epoch: 2, batch: 15908, sum loss: 4182.554688, avg loss: 2.588214, ppl: 13.305991 +epoch: 2, batch: 15909, sum loss: 4597.212891, avg loss: 2.666597, ppl: 14.390912 +epoch: 2, batch: 15910, sum loss: 4610.451172, avg loss: 2.615117, ppl: 13.668814 +epoch: 2, batch: 15911, sum loss: 3946.358398, avg loss: 2.393183, ppl: 10.948284 +epoch: 2, batch: 15912, sum loss: 3667.796631, avg loss: 2.378597, ppl: 10.789755 +epoch: 2, batch: 15913, sum loss: 3820.141602, avg loss: 2.577693, ppl: 13.166734 +epoch: 2, batch: 15914, sum loss: 4005.722656, avg loss: 2.307444, ppl: 10.048706 +epoch: 2, batch: 15915, sum loss: 4613.866699, avg loss: 2.599361, ppl: 13.455143 +epoch: 2, batch: 15916, sum loss: 5305.181641, avg loss: 2.821905, ppl: 16.808844 +epoch: 2, batch: 15917, sum loss: 4817.405273, avg loss: 2.471732, ppl: 11.842940 +epoch: 2, batch: 15918, sum loss: 3914.273193, avg loss: 2.341073, ppl: 10.392377 +epoch: 2, batch: 15919, sum loss: 4497.450684, avg loss: 2.512542, ppl: 12.336252 +epoch: 2, batch: 15920, sum loss: 4269.458496, avg loss: 2.794148, ppl: 16.348698 +epoch: 2, batch: 15921, sum loss: 4242.046387, avg loss: 2.763548, ppl: 15.856003 +epoch: 2, batch: 15922, sum loss: 4015.441406, avg loss: 2.490969, ppl: 12.072966 +epoch: 2, batch: 15923, sum loss: 4189.066406, avg loss: 2.525055, ppl: 12.491585 +epoch: 2, batch: 15924, sum loss: 4324.776367, avg loss: 2.733740, ppl: 15.390333 +epoch: 2, batch: 15925, sum loss: 4868.136719, avg loss: 2.639987, ppl: 14.013028 +epoch: 2, batch: 15926, sum loss: 3123.930420, avg loss: 2.057925, ppl: 7.829708 +epoch: 2, batch: 15927, sum loss: 3161.670654, avg loss: 2.150796, ppl: 8.591699 +epoch: 2, batch: 15928, sum loss: 3687.175293, avg loss: 2.402069, ppl: 11.046002 +epoch: 2, batch: 15929, sum loss: 5084.457520, avg loss: 2.437420, ppl: 11.443475 +epoch: 2, batch: 15930, sum loss: 4205.814453, avg loss: 2.373485, ppl: 10.734734 +epoch: 2, batch: 15931, sum loss: 4168.737305, avg loss: 2.610355, ppl: 13.603885 +epoch: 2, batch: 15932, sum loss: 4104.726562, avg loss: 2.462343, ppl: 11.732273 +epoch: 2, batch: 15933, sum loss: 3998.442627, avg loss: 2.463612, ppl: 11.747167 +epoch: 2, batch: 15934, sum loss: 4929.104492, avg loss: 2.565906, ppl: 13.012437 +epoch: 2, batch: 15935, sum loss: 4499.359375, avg loss: 2.679785, ppl: 14.581962 +epoch: 2, batch: 15936, sum loss: 4296.911133, avg loss: 2.599462, ppl: 13.456500 +epoch: 2, batch: 15937, sum loss: 4545.719238, avg loss: 2.433469, ppl: 11.398350 +epoch: 2, batch: 15938, sum loss: 4074.240723, avg loss: 2.448462, ppl: 11.570538 +epoch: 2, batch: 15939, sum loss: 4432.629883, avg loss: 2.599783, ppl: 13.460816 +epoch: 2, batch: 15940, sum loss: 3680.513428, avg loss: 2.586447, ppl: 13.282489 +epoch: 2, batch: 15941, sum loss: 3955.558105, avg loss: 2.315900, ppl: 10.134043 +epoch: 2, batch: 15942, sum loss: 3852.479980, avg loss: 2.319374, ppl: 10.169305 +epoch: 2, batch: 15943, sum loss: 3390.369873, avg loss: 2.144446, ppl: 8.537313 +epoch: 2, batch: 15944, sum loss: 5592.680176, avg loss: 2.674644, ppl: 14.507181 +epoch: 2, batch: 15945, sum loss: 4699.818848, avg loss: 2.515963, ppl: 12.378522 +epoch: 2, batch: 15946, sum loss: 4604.834473, avg loss: 2.675674, ppl: 14.522130 +epoch: 2, batch: 15947, sum loss: 5167.389160, avg loss: 2.678792, ppl: 14.567478 +epoch: 2, batch: 15948, sum loss: 4951.761719, avg loss: 2.625537, ppl: 13.811985 +epoch: 2, batch: 15949, sum loss: 4907.239258, avg loss: 2.675703, ppl: 14.522552 +epoch: 2, batch: 15950, sum loss: 5091.516113, avg loss: 2.472810, ppl: 11.855715 +epoch: 2, batch: 15951, sum loss: 4336.804199, avg loss: 2.610960, ppl: 13.612109 +epoch: 2, batch: 15952, sum loss: 4264.765625, avg loss: 2.468036, ppl: 11.799247 +epoch: 2, batch: 15953, sum loss: 4018.696533, avg loss: 2.640405, ppl: 14.018883 +epoch: 2, batch: 15954, sum loss: 3817.208984, avg loss: 2.323316, ppl: 10.209476 +epoch: 2, batch: 15955, sum loss: 3899.198242, avg loss: 2.583962, ppl: 13.249525 +epoch: 2, batch: 15956, sum loss: 3993.362305, avg loss: 2.267667, ppl: 9.656848 +epoch: 2, batch: 15957, sum loss: 4036.603027, avg loss: 2.488658, ppl: 12.045097 +epoch: 2, batch: 15958, sum loss: 5975.145020, avg loss: 2.823793, ppl: 16.840601 +epoch: 2, batch: 15959, sum loss: 3803.773926, avg loss: 2.484503, ppl: 11.995157 +epoch: 2, batch: 15960, sum loss: 4133.699219, avg loss: 2.317096, ppl: 10.146167 +epoch: 2, batch: 15961, sum loss: 3651.963867, avg loss: 2.332033, ppl: 10.298860 +epoch: 2, batch: 15962, sum loss: 4777.920898, avg loss: 2.609460, ppl: 13.591707 +epoch: 2, batch: 15963, sum loss: 3884.297363, avg loss: 2.253073, ppl: 9.516936 +epoch: 2, batch: 15964, sum loss: 5150.980469, avg loss: 2.772325, ppl: 15.995786 +epoch: 2, batch: 15965, sum loss: 4367.676270, avg loss: 2.694433, ppl: 14.797130 +epoch: 2, batch: 15966, sum loss: 4981.647461, avg loss: 2.824063, ppl: 16.845160 +epoch: 2, batch: 15967, sum loss: 3910.405273, avg loss: 2.350003, ppl: 10.485603 +epoch: 2, batch: 15968, sum loss: 3444.156494, avg loss: 2.197930, ppl: 9.006354 +epoch: 2, batch: 15969, sum loss: 3039.918701, avg loss: 2.143807, ppl: 8.531860 +epoch: 2, batch: 15970, sum loss: 2954.448486, avg loss: 2.196616, ppl: 8.994524 +epoch: 2, batch: 15971, sum loss: 4025.670898, avg loss: 2.590522, ppl: 13.336729 +epoch: 2, batch: 15972, sum loss: 3730.258789, avg loss: 2.454118, ppl: 11.636164 +epoch: 2, batch: 15973, sum loss: 3905.451416, avg loss: 2.431788, ppl: 11.379210 +epoch: 2, batch: 15974, sum loss: 3974.515625, avg loss: 2.344847, ppl: 10.431676 +epoch: 2, batch: 15975, sum loss: 4028.854980, avg loss: 2.589238, ppl: 13.319624 +epoch: 2, batch: 15976, sum loss: 4010.274658, avg loss: 2.449771, ppl: 11.585690 +epoch: 2, batch: 15977, sum loss: 4090.875244, avg loss: 2.410651, ppl: 11.141216 +epoch: 2, batch: 15978, sum loss: 4276.652832, avg loss: 2.602953, ppl: 13.503561 +epoch: 2, batch: 15979, sum loss: 3774.510742, avg loss: 2.488142, ppl: 12.038881 +epoch: 2, batch: 15980, sum loss: 3499.234375, avg loss: 2.160021, ppl: 8.671323 +epoch: 2, batch: 15981, sum loss: 4706.658203, avg loss: 2.591772, ppl: 13.353414 +epoch: 2, batch: 15982, sum loss: 4330.956055, avg loss: 2.490487, ppl: 12.067147 +epoch: 2, batch: 15983, sum loss: 3121.718018, avg loss: 2.126511, ppl: 8.385557 +epoch: 2, batch: 15984, sum loss: 4409.817871, avg loss: 2.566832, ppl: 13.024502 +epoch: 2, batch: 15985, sum loss: 4523.969727, avg loss: 2.630215, ppl: 13.876752 +epoch: 2, batch: 15986, sum loss: 4951.877930, avg loss: 2.670916, ppl: 14.453197 +epoch: 2, batch: 15987, sum loss: 3838.235107, avg loss: 2.438523, ppl: 11.456105 +epoch: 2, batch: 15988, sum loss: 4728.413086, avg loss: 2.758701, ppl: 15.779330 +epoch: 2, batch: 15989, sum loss: 5286.126465, avg loss: 2.607857, ppl: 13.569942 +epoch: 2, batch: 15990, sum loss: 3031.419434, avg loss: 1.990426, ppl: 7.318654 +epoch: 2, batch: 15991, sum loss: 4448.563477, avg loss: 2.599979, ppl: 13.463454 +epoch: 2, batch: 15992, sum loss: 3170.881592, avg loss: 2.139596, ppl: 8.496006 +epoch: 2, batch: 15993, sum loss: 4827.509766, avg loss: 2.369911, ppl: 10.696445 +epoch: 2, batch: 15994, sum loss: 4556.655762, avg loss: 2.422464, ppl: 11.273607 +epoch: 2, batch: 15995, sum loss: 4727.514648, avg loss: 2.601824, ppl: 13.488322 +epoch: 2, batch: 15996, sum loss: 4032.970947, avg loss: 2.352959, ppl: 10.516639 +epoch: 2, batch: 15997, sum loss: 3774.114258, avg loss: 2.288729, ppl: 9.862394 +epoch: 2, batch: 15998, sum loss: 4363.072754, avg loss: 2.569536, ppl: 13.059769 +epoch: 2, batch: 15999, sum loss: 3747.348633, avg loss: 2.246612, ppl: 9.455645 +epoch: 2, batch: 16000, sum loss: 3496.535156, avg loss: 2.273430, ppl: 9.712657 +epoch: 2, batch: 16001, sum loss: 3969.197021, avg loss: 2.488525, ppl: 12.043498 +epoch: 2, batch: 16002, sum loss: 3389.049072, avg loss: 2.436412, ppl: 11.431951 +epoch: 2, batch: 16003, sum loss: 4909.149414, avg loss: 2.623811, ppl: 13.788164 +epoch: 2, batch: 16004, sum loss: 4768.152832, avg loss: 2.740318, ppl: 15.491908 +epoch: 2, batch: 16005, sum loss: 3957.431152, avg loss: 2.375409, ppl: 10.755410 +epoch: 2, batch: 16006, sum loss: 3559.237305, avg loss: 2.320233, ppl: 10.178044 +epoch: 2, batch: 16007, sum loss: 3244.171875, avg loss: 2.167115, ppl: 8.733057 +epoch: 2, batch: 16008, sum loss: 4802.570801, avg loss: 2.554559, ppl: 12.865622 +epoch: 2, batch: 16009, sum loss: 3953.227783, avg loss: 2.481625, ppl: 11.960680 +epoch: 2, batch: 16010, sum loss: 3658.985840, avg loss: 2.357594, ppl: 10.565500 +epoch: 2, batch: 16011, sum loss: 5246.767090, avg loss: 2.922990, ppl: 18.596811 +epoch: 2, batch: 16012, sum loss: 4030.176514, avg loss: 2.404640, ppl: 11.074442 +epoch: 2, batch: 16013, sum loss: 5516.591309, avg loss: 2.800300, ppl: 16.449583 +epoch: 2, batch: 16014, sum loss: 4068.083008, avg loss: 2.392990, ppl: 10.946176 +epoch: 2, batch: 16015, sum loss: 3285.400879, avg loss: 2.058522, ppl: 7.834380 +epoch: 2, batch: 16016, sum loss: 3712.550537, avg loss: 2.317447, ppl: 10.149731 +epoch: 2, batch: 16017, sum loss: 4406.626465, avg loss: 2.708437, ppl: 15.005799 +epoch: 2, batch: 16018, sum loss: 4118.168457, avg loss: 2.454212, ppl: 11.637265 +epoch: 2, batch: 16019, sum loss: 3680.736084, avg loss: 2.382354, ppl: 10.830362 +epoch: 2, batch: 16020, sum loss: 3751.819336, avg loss: 2.315938, ppl: 10.134424 +epoch: 2, batch: 16021, sum loss: 4255.764648, avg loss: 2.557551, ppl: 12.904175 +epoch: 2, batch: 16022, sum loss: 3511.093994, avg loss: 2.309930, ppl: 10.073723 +epoch: 2, batch: 16023, sum loss: 4621.479492, avg loss: 2.704201, ppl: 14.942372 +epoch: 2, batch: 16024, sum loss: 4388.405762, avg loss: 2.489169, ppl: 12.051262 +epoch: 2, batch: 16025, sum loss: 4397.246094, avg loss: 2.719385, ppl: 15.170995 +epoch: 2, batch: 16026, sum loss: 4825.456055, avg loss: 2.723169, ppl: 15.228510 +epoch: 2, batch: 16027, sum loss: 4416.178711, avg loss: 2.479606, ppl: 11.936565 +epoch: 2, batch: 16028, sum loss: 3894.206055, avg loss: 2.426296, ppl: 11.316892 +epoch: 2, batch: 16029, sum loss: 4761.148926, avg loss: 2.634836, ppl: 13.941026 +epoch: 2, batch: 16030, sum loss: 4000.049805, avg loss: 2.559213, ppl: 12.925640 +epoch: 2, batch: 16031, sum loss: 3883.485107, avg loss: 2.470410, ppl: 11.827299 +epoch: 2, batch: 16032, sum loss: 4841.515625, avg loss: 2.631258, ppl: 13.891241 +epoch: 2, batch: 16033, sum loss: 3745.578613, avg loss: 2.430616, ppl: 11.365877 +epoch: 2, batch: 16034, sum loss: 3979.897949, avg loss: 2.559420, ppl: 12.928318 +epoch: 2, batch: 16035, sum loss: 3841.085938, avg loss: 2.381330, ppl: 10.819288 +epoch: 2, batch: 16036, sum loss: 5048.433105, avg loss: 2.528009, ppl: 12.528533 +epoch: 2, batch: 16037, sum loss: 5597.281738, avg loss: 2.857214, ppl: 17.412943 +epoch: 2, batch: 16038, sum loss: 5369.615723, avg loss: 3.016638, ppl: 20.422516 +epoch: 2, batch: 16039, sum loss: 4368.969238, avg loss: 2.399214, ppl: 11.014519 +epoch: 2, batch: 16040, sum loss: 4422.965332, avg loss: 2.514477, ppl: 12.360146 +epoch: 2, batch: 16041, sum loss: 4092.651367, avg loss: 2.401791, ppl: 11.042935 +epoch: 2, batch: 16042, sum loss: 3892.758545, avg loss: 2.312988, ppl: 10.104570 +epoch: 2, batch: 16043, sum loss: 4842.231445, avg loss: 2.792521, ppl: 16.322117 +epoch: 2, batch: 16044, sum loss: 3261.081055, avg loss: 2.309547, ppl: 10.069866 +epoch: 2, batch: 16045, sum loss: 5124.033691, avg loss: 2.740125, ppl: 15.488920 +epoch: 2, batch: 16046, sum loss: 4383.754883, avg loss: 2.651999, ppl: 14.182364 +epoch: 2, batch: 16047, sum loss: 3796.603516, avg loss: 2.180703, ppl: 8.852525 +epoch: 2, batch: 16048, sum loss: 3874.755127, avg loss: 2.421722, ppl: 11.265241 +epoch: 2, batch: 16049, sum loss: 4309.485352, avg loss: 2.695113, ppl: 14.807187 +epoch: 2, batch: 16050, sum loss: 3610.531494, avg loss: 2.544420, ppl: 12.735836 +epoch: 2, batch: 16051, sum loss: 4599.157715, avg loss: 2.787368, ppl: 16.238230 +epoch: 2, batch: 16052, sum loss: 3911.985352, avg loss: 2.274410, ppl: 9.722181 +epoch: 2, batch: 16053, sum loss: 3622.690674, avg loss: 2.461067, ppl: 11.717307 +epoch: 2, batch: 16054, sum loss: 4300.515137, avg loss: 2.437934, ppl: 11.449358 +epoch: 2, batch: 16055, sum loss: 4674.101562, avg loss: 2.788844, ppl: 16.262203 +epoch: 2, batch: 16056, sum loss: 4114.144531, avg loss: 2.372632, ppl: 10.725588 +epoch: 2, batch: 16057, sum loss: 4829.637207, avg loss: 2.573062, ppl: 13.105892 +epoch: 2, batch: 16058, sum loss: 5926.426758, avg loss: 2.969152, ppl: 19.475391 +epoch: 2, batch: 16059, sum loss: 4246.782227, avg loss: 2.410206, ppl: 11.136250 +epoch: 2, batch: 16060, sum loss: 2979.393555, avg loss: 2.137298, ppl: 8.476504 +epoch: 2, batch: 16061, sum loss: 3432.662354, avg loss: 2.291497, ppl: 9.889729 +epoch: 2, batch: 16062, sum loss: 4423.496094, avg loss: 2.549566, ppl: 12.801541 +epoch: 2, batch: 16063, sum loss: 3651.327637, avg loss: 2.327169, ppl: 10.248883 +epoch: 2, batch: 16064, sum loss: 4380.180176, avg loss: 2.505824, ppl: 12.253653 +epoch: 2, batch: 16065, sum loss: 3499.083984, avg loss: 2.377095, ppl: 10.773562 +epoch: 2, batch: 16066, sum loss: 4476.623047, avg loss: 2.763348, ppl: 15.852823 +epoch: 2, batch: 16067, sum loss: 4648.803711, avg loss: 2.627928, ppl: 13.845047 +epoch: 2, batch: 16068, sum loss: 5429.660156, avg loss: 2.758974, ppl: 15.783638 +epoch: 2, batch: 16069, sum loss: 4234.343262, avg loss: 2.437734, ppl: 11.447068 +epoch: 2, batch: 16070, sum loss: 4907.636719, avg loss: 2.425920, ppl: 11.312635 +epoch: 2, batch: 16071, sum loss: 3637.391602, avg loss: 2.513747, ppl: 12.351120 +epoch: 2, batch: 16072, sum loss: 4233.075195, avg loss: 2.507746, ppl: 12.277225 +epoch: 2, batch: 16073, sum loss: 3928.308594, avg loss: 2.415934, ppl: 11.200225 +epoch: 2, batch: 16074, sum loss: 4783.437500, avg loss: 2.495273, ppl: 12.125039 +epoch: 2, batch: 16075, sum loss: 4964.517578, avg loss: 2.720284, ppl: 15.184627 +epoch: 2, batch: 16076, sum loss: 4667.102539, avg loss: 2.544767, ppl: 12.740258 +epoch: 2, batch: 16077, sum loss: 4055.654297, avg loss: 2.384276, ppl: 10.851208 +epoch: 2, batch: 16078, sum loss: 3932.569336, avg loss: 2.496869, ppl: 12.144414 +epoch: 2, batch: 16079, sum loss: 3999.890625, avg loss: 2.422708, ppl: 11.276355 +epoch: 2, batch: 16080, sum loss: 3937.594727, avg loss: 2.374906, ppl: 10.750006 +epoch: 2, batch: 16081, sum loss: 3912.760010, avg loss: 2.142804, ppl: 8.523305 +epoch: 2, batch: 16082, sum loss: 4649.074219, avg loss: 2.303803, ppl: 10.012186 +epoch: 2, batch: 16083, sum loss: 4189.450195, avg loss: 2.546778, ppl: 12.765908 +epoch: 2, batch: 16084, sum loss: 4479.625977, avg loss: 2.721523, ppl: 15.203453 +epoch: 2, batch: 16085, sum loss: 3993.104492, avg loss: 2.472511, ppl: 11.852165 +epoch: 2, batch: 16086, sum loss: 4340.855957, avg loss: 2.757850, ppl: 15.765908 +epoch: 2, batch: 16087, sum loss: 5269.751465, avg loss: 2.816543, ppl: 16.718950 +epoch: 2, batch: 16088, sum loss: 4954.346191, avg loss: 2.683828, ppl: 14.641030 +epoch: 2, batch: 16089, sum loss: 4242.237305, avg loss: 2.621902, ppl: 13.761873 +epoch: 2, batch: 16090, sum loss: 4808.458496, avg loss: 2.826842, ppl: 16.892033 +epoch: 2, batch: 16091, sum loss: 3824.802246, avg loss: 2.393493, ppl: 10.951686 +epoch: 2, batch: 16092, sum loss: 4164.637695, avg loss: 2.589949, ppl: 13.329087 +epoch: 2, batch: 16093, sum loss: 4303.560547, avg loss: 2.649976, ppl: 14.153696 +epoch: 2, batch: 16094, sum loss: 4666.598633, avg loss: 2.733801, ppl: 15.391280 +epoch: 2, batch: 16095, sum loss: 5130.204102, avg loss: 2.720151, ppl: 15.182611 +epoch: 2, batch: 16096, sum loss: 3778.014404, avg loss: 2.346593, ppl: 10.449903 +epoch: 2, batch: 16097, sum loss: 5334.097168, avg loss: 2.731233, ppl: 15.351799 +epoch: 2, batch: 16098, sum loss: 4965.258301, avg loss: 2.538476, ppl: 12.660356 +epoch: 2, batch: 16099, sum loss: 3740.241699, avg loss: 2.276471, ppl: 9.742238 +epoch: 2, batch: 16100, sum loss: 5746.547363, avg loss: 3.022908, ppl: 20.550962 +epoch: 2, batch: 16101, sum loss: 3884.784668, avg loss: 2.447880, ppl: 11.563800 +epoch: 2, batch: 16102, sum loss: 4757.777344, avg loss: 2.871320, ppl: 17.660315 +epoch: 2, batch: 16103, sum loss: 4572.814941, avg loss: 2.536226, ppl: 12.631906 +epoch: 2, batch: 16104, sum loss: 4535.032227, avg loss: 2.720475, ppl: 15.187537 +epoch: 2, batch: 16105, sum loss: 3653.905762, avg loss: 2.282265, ppl: 9.798847 +epoch: 2, batch: 16106, sum loss: 4466.006836, avg loss: 2.557851, ppl: 12.908043 +epoch: 2, batch: 16107, sum loss: 3123.675293, avg loss: 2.231197, ppl: 9.311002 +epoch: 2, batch: 16108, sum loss: 4494.613770, avg loss: 2.520815, ppl: 12.438735 +epoch: 2, batch: 16109, sum loss: 4529.730469, avg loss: 2.641242, ppl: 14.030620 +epoch: 2, batch: 16110, sum loss: 4455.079102, avg loss: 2.561863, ppl: 12.959935 +epoch: 2, batch: 16111, sum loss: 4036.204346, avg loss: 2.465611, ppl: 11.770666 +epoch: 2, batch: 16112, sum loss: 3749.520264, avg loss: 2.496352, ppl: 12.138130 +epoch: 2, batch: 16113, sum loss: 4190.155762, avg loss: 2.527235, ppl: 12.518844 +epoch: 2, batch: 16114, sum loss: 4387.461914, avg loss: 2.518635, ppl: 12.411641 +epoch: 2, batch: 16115, sum loss: 3946.489746, avg loss: 2.297142, ppl: 9.945717 +epoch: 2, batch: 16116, sum loss: 4773.569824, avg loss: 2.653457, ppl: 14.203059 +epoch: 2, batch: 16117, sum loss: 5089.717773, avg loss: 2.681622, ppl: 14.608767 +epoch: 2, batch: 16118, sum loss: 4310.847656, avg loss: 2.510686, ppl: 12.313374 +epoch: 2, batch: 16119, sum loss: 4709.244629, avg loss: 2.639711, ppl: 14.009156 +epoch: 2, batch: 16120, sum loss: 4743.424805, avg loss: 2.708981, ppl: 15.013962 +epoch: 2, batch: 16121, sum loss: 2959.256104, avg loss: 2.276351, ppl: 9.741068 +epoch: 2, batch: 16122, sum loss: 3497.866699, avg loss: 2.289180, ppl: 9.866841 +epoch: 2, batch: 16123, sum loss: 4012.392090, avg loss: 2.353309, ppl: 10.520326 +epoch: 2, batch: 16124, sum loss: 3893.933350, avg loss: 2.333094, ppl: 10.309790 +epoch: 2, batch: 16125, sum loss: 4140.869629, avg loss: 2.490000, ppl: 12.061274 +epoch: 2, batch: 16126, sum loss: 5515.658691, avg loss: 2.989517, ppl: 19.876074 +epoch: 2, batch: 16127, sum loss: 4272.056641, avg loss: 2.326828, ppl: 10.245394 +epoch: 2, batch: 16128, sum loss: 4102.334473, avg loss: 2.387855, ppl: 10.890107 +epoch: 2, batch: 16129, sum loss: 4715.772949, avg loss: 2.735367, ppl: 15.415404 +epoch: 2, batch: 16130, sum loss: 4233.260742, avg loss: 2.513813, ppl: 12.351938 +epoch: 2, batch: 16131, sum loss: 4268.259277, avg loss: 2.498981, ppl: 12.170083 +epoch: 2, batch: 16132, sum loss: 3639.643311, avg loss: 2.496326, ppl: 12.137817 +epoch: 2, batch: 16133, sum loss: 4721.981445, avg loss: 2.729469, ppl: 15.324748 +epoch: 2, batch: 16134, sum loss: 4902.727539, avg loss: 2.755889, ppl: 15.735029 +epoch: 2, batch: 16135, sum loss: 4431.478516, avg loss: 2.463301, ppl: 11.743512 +epoch: 2, batch: 16136, sum loss: 4774.277344, avg loss: 2.677665, ppl: 14.551084 +epoch: 2, batch: 16137, sum loss: 5359.352539, avg loss: 2.580333, ppl: 13.201540 +epoch: 2, batch: 16138, sum loss: 4329.989746, avg loss: 2.330457, ppl: 10.282641 +epoch: 2, batch: 16139, sum loss: 3440.105957, avg loss: 2.382345, ppl: 10.830267 +epoch: 2, batch: 16140, sum loss: 4785.829102, avg loss: 2.625249, ppl: 13.808014 +epoch: 2, batch: 16141, sum loss: 4616.119141, avg loss: 2.696331, ppl: 14.825242 +epoch: 2, batch: 16142, sum loss: 4917.591309, avg loss: 2.688678, ppl: 14.712211 +epoch: 2, batch: 16143, sum loss: 4102.751465, avg loss: 2.545131, ppl: 12.744900 +epoch: 2, batch: 16144, sum loss: 4260.029297, avg loss: 2.393275, ppl: 10.949294 +epoch: 2, batch: 16145, sum loss: 3600.585449, avg loss: 2.351787, ppl: 10.504323 +epoch: 2, batch: 16146, sum loss: 4463.292969, avg loss: 2.625467, ppl: 13.811017 +epoch: 2, batch: 16147, sum loss: 4228.878906, avg loss: 2.448685, ppl: 11.573117 +epoch: 2, batch: 16148, sum loss: 4940.502441, avg loss: 2.578550, ppl: 13.178020 +epoch: 2, batch: 16149, sum loss: 4806.007324, avg loss: 2.424827, ppl: 11.300276 +epoch: 2, batch: 16150, sum loss: 4188.603516, avg loss: 2.469696, ppl: 11.818849 +epoch: 2, batch: 16151, sum loss: 5558.494141, avg loss: 2.678792, ppl: 14.567489 +epoch: 2, batch: 16152, sum loss: 3296.020752, avg loss: 2.127838, ppl: 8.396692 +epoch: 2, batch: 16153, sum loss: 3795.401855, avg loss: 2.312859, ppl: 10.103270 +epoch: 2, batch: 16154, sum loss: 4009.730225, avg loss: 2.482805, ppl: 11.974807 +epoch: 2, batch: 16155, sum loss: 5739.789062, avg loss: 2.845706, ppl: 17.213711 +epoch: 2, batch: 16156, sum loss: 4446.416016, avg loss: 2.598724, ppl: 13.446565 +epoch: 2, batch: 16157, sum loss: 4860.218750, avg loss: 2.850568, ppl: 17.297609 +epoch: 2, batch: 16158, sum loss: 4770.574707, avg loss: 2.568969, ppl: 13.052355 +epoch: 2, batch: 16159, sum loss: 4271.681641, avg loss: 2.464906, ppl: 11.762373 +epoch: 2, batch: 16160, sum loss: 3285.910156, avg loss: 2.291430, ppl: 9.889067 +epoch: 2, batch: 16161, sum loss: 4679.729980, avg loss: 2.733487, ppl: 15.386448 +epoch: 2, batch: 16162, sum loss: 4319.007324, avg loss: 2.635148, ppl: 13.945374 +epoch: 2, batch: 16163, sum loss: 3409.074707, avg loss: 2.207950, ppl: 9.097050 +epoch: 2, batch: 16164, sum loss: 4218.711914, avg loss: 2.464201, ppl: 11.754084 +epoch: 2, batch: 16165, sum loss: 3545.583984, avg loss: 2.252594, ppl: 9.512383 +epoch: 2, batch: 16166, sum loss: 4235.090820, avg loss: 2.337247, ppl: 10.352693 +epoch: 2, batch: 16167, sum loss: 3480.006348, avg loss: 2.423403, ppl: 11.284192 +epoch: 2, batch: 16168, sum loss: 4551.143066, avg loss: 2.408012, ppl: 11.111851 +epoch: 2, batch: 16169, sum loss: 4355.334473, avg loss: 2.598648, ppl: 13.445552 +epoch: 2, batch: 16170, sum loss: 4357.776367, avg loss: 2.494434, ppl: 12.114876 +epoch: 2, batch: 16171, sum loss: 3748.040527, avg loss: 2.251076, ppl: 9.497946 +epoch: 2, batch: 16172, sum loss: 4066.971191, avg loss: 2.702307, ppl: 14.914091 +epoch: 2, batch: 16173, sum loss: 4382.929199, avg loss: 2.483246, ppl: 11.980090 +epoch: 2, batch: 16174, sum loss: 4788.232422, avg loss: 2.758198, ppl: 15.771405 +epoch: 2, batch: 16175, sum loss: 4010.583740, avg loss: 2.397241, ppl: 10.992804 +epoch: 2, batch: 16176, sum loss: 4115.699707, avg loss: 2.494364, ppl: 12.114021 +epoch: 2, batch: 16177, sum loss: 4374.119629, avg loss: 2.698408, ppl: 14.856064 +epoch: 2, batch: 16178, sum loss: 4136.329102, avg loss: 2.436001, ppl: 11.427250 +epoch: 2, batch: 16179, sum loss: 3580.565430, avg loss: 2.340239, ppl: 10.383716 +epoch: 2, batch: 16180, sum loss: 4581.521484, avg loss: 2.514556, ppl: 12.361121 +epoch: 2, batch: 16181, sum loss: 4753.234863, avg loss: 2.680900, ppl: 14.598220 +epoch: 2, batch: 16182, sum loss: 4141.015137, avg loss: 2.708316, ppl: 15.003989 +epoch: 2, batch: 16183, sum loss: 4449.630859, avg loss: 2.632918, ppl: 13.914308 +epoch: 2, batch: 16184, sum loss: 5065.678711, avg loss: 2.891369, ppl: 18.017965 +epoch: 2, batch: 16185, sum loss: 4249.181152, avg loss: 2.506891, ppl: 12.266740 +epoch: 2, batch: 16186, sum loss: 4199.021484, avg loss: 2.640894, ppl: 14.025736 +epoch: 2, batch: 16187, sum loss: 3786.578613, avg loss: 2.395053, ppl: 10.968779 +epoch: 2, batch: 16188, sum loss: 4266.161621, avg loss: 2.623716, ppl: 13.786856 +epoch: 2, batch: 16189, sum loss: 4045.995361, avg loss: 2.434414, ppl: 11.409127 +epoch: 2, batch: 16190, sum loss: 4735.270996, avg loss: 2.617618, ppl: 13.703045 +epoch: 2, batch: 16191, sum loss: 3926.475098, avg loss: 2.417780, ppl: 11.220923 +epoch: 2, batch: 16192, sum loss: 3870.217529, avg loss: 2.337088, ppl: 10.351049 +epoch: 2, batch: 16193, sum loss: 4208.226074, avg loss: 2.469616, ppl: 11.817910 +epoch: 2, batch: 16194, sum loss: 3221.682617, avg loss: 2.302847, ppl: 10.002619 +epoch: 2, batch: 16195, sum loss: 3856.230225, avg loss: 2.191040, ppl: 8.944509 +epoch: 2, batch: 16196, sum loss: 5116.227539, avg loss: 2.974551, ppl: 19.580830 +epoch: 2, batch: 16197, sum loss: 5237.850586, avg loss: 2.671010, ppl: 14.454561 +epoch: 2, batch: 16198, sum loss: 4157.646484, avg loss: 2.503098, ppl: 12.220300 +epoch: 2, batch: 16199, sum loss: 5714.267578, avg loss: 2.899172, ppl: 18.159096 +epoch: 2, batch: 16200, sum loss: 3908.916504, avg loss: 2.662750, ppl: 14.335651 +epoch: 2, batch: 16201, sum loss: 4299.655273, avg loss: 2.504167, ppl: 12.233368 +epoch: 2, batch: 16202, sum loss: 3833.070801, avg loss: 2.407708, ppl: 11.108468 +epoch: 2, batch: 16203, sum loss: 4639.774902, avg loss: 2.363614, ppl: 10.629300 +epoch: 2, batch: 16204, sum loss: 3686.972656, avg loss: 2.417687, ppl: 11.219877 +epoch: 2, batch: 16205, sum loss: 3660.992188, avg loss: 2.149731, ppl: 8.582551 +epoch: 2, batch: 16206, sum loss: 5043.324219, avg loss: 2.763465, ppl: 15.854691 +epoch: 2, batch: 16207, sum loss: 3654.186035, avg loss: 2.327507, ppl: 10.252351 +epoch: 2, batch: 16208, sum loss: 3492.548340, avg loss: 2.297729, ppl: 9.951559 +epoch: 2, batch: 16209, sum loss: 3773.825684, avg loss: 2.595479, ppl: 13.403000 +epoch: 2, batch: 16210, sum loss: 3990.171143, avg loss: 2.428589, ppl: 11.342862 +epoch: 2, batch: 16211, sum loss: 4110.159668, avg loss: 2.567245, ppl: 13.029884 +epoch: 2, batch: 16212, sum loss: 4162.965332, avg loss: 2.366666, ppl: 10.661787 +epoch: 2, batch: 16213, sum loss: 4138.568848, avg loss: 2.476702, ppl: 11.901947 +epoch: 2, batch: 16214, sum loss: 3633.267334, avg loss: 2.317135, ppl: 10.146559 +epoch: 2, batch: 16215, sum loss: 4384.262207, avg loss: 2.559406, ppl: 12.928133 +epoch: 2, batch: 16216, sum loss: 4862.411133, avg loss: 2.625492, ppl: 13.811366 +epoch: 2, batch: 16217, sum loss: 4397.245117, avg loss: 2.515587, ppl: 12.373865 +epoch: 2, batch: 16218, sum loss: 4669.400391, avg loss: 2.654577, ppl: 14.218967 +epoch: 2, batch: 16219, sum loss: 4788.896973, avg loss: 2.681353, ppl: 14.604845 +epoch: 2, batch: 16220, sum loss: 3912.876709, avg loss: 2.357155, ppl: 10.560859 +epoch: 2, batch: 16221, sum loss: 4860.249023, avg loss: 2.719781, ppl: 15.177001 +epoch: 2, batch: 16222, sum loss: 5298.859375, avg loss: 2.762700, ppl: 15.842566 +epoch: 2, batch: 16223, sum loss: 4942.897949, avg loss: 2.797339, ppl: 16.400946 +epoch: 2, batch: 16224, sum loss: 4268.354492, avg loss: 2.391235, ppl: 10.926982 +epoch: 2, batch: 16225, sum loss: 4432.837402, avg loss: 2.668776, ppl: 14.422310 +epoch: 2, batch: 16226, sum loss: 5092.328613, avg loss: 2.867302, ppl: 17.589500 +epoch: 2, batch: 16227, sum loss: 4679.273926, avg loss: 2.562582, ppl: 12.969258 +epoch: 2, batch: 16228, sum loss: 4499.959961, avg loss: 2.555343, ppl: 12.875720 +epoch: 2, batch: 16229, sum loss: 4318.311035, avg loss: 2.559758, ppl: 12.932684 +epoch: 2, batch: 16230, sum loss: 5249.013672, avg loss: 2.722517, ppl: 15.218587 +epoch: 2, batch: 16231, sum loss: 3696.639404, avg loss: 2.375732, ppl: 10.758888 +epoch: 2, batch: 16232, sum loss: 5258.654297, avg loss: 2.850219, ppl: 17.291569 +epoch: 2, batch: 16233, sum loss: 4993.174805, avg loss: 2.576458, ppl: 13.150474 +epoch: 2, batch: 16234, sum loss: 4231.988770, avg loss: 2.382877, ppl: 10.836029 +epoch: 2, batch: 16235, sum loss: 4284.178223, avg loss: 2.492250, ppl: 12.088449 +epoch: 2, batch: 16236, sum loss: 4038.121094, avg loss: 2.756397, ppl: 15.743011 +epoch: 2, batch: 16237, sum loss: 4599.317383, avg loss: 2.731186, ppl: 15.351085 +epoch: 2, batch: 16238, sum loss: 3988.891357, avg loss: 2.462279, ppl: 11.731515 +epoch: 2, batch: 16239, sum loss: 3530.726562, avg loss: 2.280831, ppl: 9.784809 +epoch: 2, batch: 16240, sum loss: 2853.661377, avg loss: 2.148841, ppl: 8.574918 +epoch: 2, batch: 16241, sum loss: 4043.774902, avg loss: 2.510103, ppl: 12.306191 +epoch: 2, batch: 16242, sum loss: 3429.159668, avg loss: 2.340723, ppl: 10.388748 +epoch: 2, batch: 16243, sum loss: 4390.607910, avg loss: 2.514667, ppl: 12.362489 +epoch: 2, batch: 16244, sum loss: 5226.120117, avg loss: 2.785778, ppl: 16.212431 +epoch: 2, batch: 16245, sum loss: 4898.866211, avg loss: 2.656652, ppl: 14.248505 +epoch: 2, batch: 16246, sum loss: 4582.012207, avg loss: 2.624291, ppl: 13.794793 +epoch: 2, batch: 16247, sum loss: 3855.363770, avg loss: 2.561704, ppl: 12.957874 +epoch: 2, batch: 16248, sum loss: 4618.307129, avg loss: 2.686624, ppl: 14.682030 +epoch: 2, batch: 16249, sum loss: 4751.027832, avg loss: 2.468066, ppl: 11.799607 +epoch: 2, batch: 16250, sum loss: 4581.895020, avg loss: 2.506507, ppl: 12.262026 +epoch: 2, batch: 16251, sum loss: 4826.922852, avg loss: 2.657997, ppl: 14.267684 +epoch: 2, batch: 16252, sum loss: 3914.391113, avg loss: 2.555086, ppl: 12.872402 +epoch: 2, batch: 16253, sum loss: 4511.644531, avg loss: 2.658600, ppl: 14.276293 +epoch: 2, batch: 16254, sum loss: 4485.904785, avg loss: 2.717084, ppl: 15.136116 +epoch: 2, batch: 16255, sum loss: 4989.892578, avg loss: 2.703084, ppl: 14.925688 +epoch: 2, batch: 16256, sum loss: 3814.053223, avg loss: 2.354354, ppl: 10.531322 +epoch: 2, batch: 16257, sum loss: 3999.538574, avg loss: 2.539389, ppl: 12.671930 +epoch: 2, batch: 16258, sum loss: 4532.710938, avg loss: 2.641440, ppl: 14.033396 +epoch: 2, batch: 16259, sum loss: 4443.518555, avg loss: 2.469994, ppl: 11.822371 +epoch: 2, batch: 16260, sum loss: 3754.662598, avg loss: 2.346664, ppl: 10.450650 +epoch: 2, batch: 16261, sum loss: 3592.929688, avg loss: 2.291409, ppl: 9.888864 +epoch: 2, batch: 16262, sum loss: 4735.272461, avg loss: 2.654301, ppl: 14.215045 +epoch: 2, batch: 16263, sum loss: 3723.357422, avg loss: 2.386768, ppl: 10.878274 +epoch: 2, batch: 16264, sum loss: 5117.076172, avg loss: 2.868316, ppl: 17.607349 +epoch: 2, batch: 16265, sum loss: 3655.822510, avg loss: 2.367761, ppl: 10.673467 +epoch: 2, batch: 16266, sum loss: 5868.455566, avg loss: 2.981939, ppl: 19.726030 +epoch: 2, batch: 16267, sum loss: 4611.187500, avg loss: 2.567477, ppl: 13.032894 +epoch: 2, batch: 16268, sum loss: 4311.414551, avg loss: 2.578597, ppl: 13.178639 +epoch: 2, batch: 16269, sum loss: 3965.707031, avg loss: 2.413699, ppl: 11.175219 +epoch: 2, batch: 16270, sum loss: 5371.565430, avg loss: 2.613900, ppl: 13.652197 +epoch: 2, batch: 16271, sum loss: 3041.990723, avg loss: 2.069381, ppl: 7.919923 +epoch: 2, batch: 16272, sum loss: 3723.395508, avg loss: 2.358072, ppl: 10.570550 +epoch: 2, batch: 16273, sum loss: 3965.718994, avg loss: 2.473936, ppl: 11.869067 +epoch: 2, batch: 16274, sum loss: 3859.681152, avg loss: 2.352030, ppl: 10.506875 +epoch: 2, batch: 16275, sum loss: 4318.146973, avg loss: 2.670468, ppl: 14.446733 +epoch: 2, batch: 16276, sum loss: 4729.726562, avg loss: 2.617447, ppl: 13.700696 +epoch: 2, batch: 16277, sum loss: 3663.884521, avg loss: 2.265853, ppl: 9.639345 +epoch: 2, batch: 16278, sum loss: 3967.528809, avg loss: 2.579668, ppl: 13.192752 +epoch: 2, batch: 16279, sum loss: 3779.139893, avg loss: 2.391861, ppl: 10.933820 +epoch: 2, batch: 16280, sum loss: 5177.875488, avg loss: 2.730947, ppl: 15.347418 +epoch: 2, batch: 16281, sum loss: 3736.095215, avg loss: 2.446690, ppl: 11.550051 +epoch: 2, batch: 16282, sum loss: 4902.333008, avg loss: 2.673028, ppl: 14.483756 +epoch: 2, batch: 16283, sum loss: 3892.657227, avg loss: 2.579627, ppl: 13.192220 +epoch: 2, batch: 16284, sum loss: 3866.945801, avg loss: 2.363659, ppl: 10.629774 +epoch: 2, batch: 16285, sum loss: 3964.132812, avg loss: 2.700363, ppl: 14.885133 +epoch: 2, batch: 16286, sum loss: 3999.764648, avg loss: 2.425570, ppl: 11.308671 +epoch: 2, batch: 16287, sum loss: 5509.963867, avg loss: 2.787033, ppl: 16.232782 +epoch: 2, batch: 16288, sum loss: 4089.731934, avg loss: 2.519859, ppl: 12.426848 +epoch: 2, batch: 16289, sum loss: 3807.868164, avg loss: 2.365135, ppl: 10.645480 +epoch: 2, batch: 16290, sum loss: 3799.862305, avg loss: 2.400418, ppl: 11.027788 +epoch: 2, batch: 16291, sum loss: 4018.746094, avg loss: 2.150212, ppl: 8.586677 +epoch: 2, batch: 16292, sum loss: 3881.349365, avg loss: 2.585842, ppl: 13.274467 +epoch: 2, batch: 16293, sum loss: 4478.314941, avg loss: 2.531552, ppl: 12.573002 +epoch: 2, batch: 16294, sum loss: 3609.872559, avg loss: 2.393815, ppl: 10.955204 +epoch: 2, batch: 16295, sum loss: 4603.959961, avg loss: 2.641400, ppl: 14.032834 +epoch: 2, batch: 16296, sum loss: 3945.454102, avg loss: 2.419040, ppl: 11.235068 +epoch: 2, batch: 16297, sum loss: 3491.680176, avg loss: 2.438324, ppl: 11.453831 +epoch: 2, batch: 16298, sum loss: 4495.372070, avg loss: 2.468628, ppl: 11.806242 +epoch: 2, batch: 16299, sum loss: 4651.268555, avg loss: 2.650295, ppl: 14.158222 +epoch: 2, batch: 16300, sum loss: 3640.024658, avg loss: 2.295098, ppl: 9.925405 +epoch: 2, batch: 16301, sum loss: 3908.954346, avg loss: 2.489780, ppl: 12.058622 +epoch: 2, batch: 16302, sum loss: 4333.702148, avg loss: 2.467940, ppl: 11.798116 +epoch: 2, batch: 16303, sum loss: 3340.567383, avg loss: 2.288060, ppl: 9.855799 +epoch: 2, batch: 16304, sum loss: 4210.294922, avg loss: 2.513609, ppl: 12.349421 +epoch: 2, batch: 16305, sum loss: 5964.037109, avg loss: 2.927853, ppl: 18.687468 +epoch: 2, batch: 16306, sum loss: 4685.629883, avg loss: 2.770923, ppl: 15.973365 +epoch: 2, batch: 16307, sum loss: 4354.965332, avg loss: 2.512963, ppl: 12.341444 +epoch: 2, batch: 16308, sum loss: 4375.847656, avg loss: 2.535254, ppl: 12.619630 +epoch: 2, batch: 16309, sum loss: 3530.022949, avg loss: 2.283327, ppl: 9.809258 +epoch: 2, batch: 16310, sum loss: 3988.679932, avg loss: 2.401373, ppl: 11.038318 +epoch: 2, batch: 16311, sum loss: 5714.540527, avg loss: 2.808128, ppl: 16.578852 +epoch: 2, batch: 16312, sum loss: 5652.673828, avg loss: 2.663842, ppl: 14.351327 +epoch: 2, batch: 16313, sum loss: 4098.108398, avg loss: 2.633746, ppl: 13.925834 +epoch: 2, batch: 16314, sum loss: 3349.697754, avg loss: 2.100124, ppl: 8.167184 +epoch: 2, batch: 16315, sum loss: 3897.703613, avg loss: 2.404505, ppl: 11.072953 +epoch: 2, batch: 16316, sum loss: 4842.304688, avg loss: 2.552612, ppl: 12.840597 +epoch: 2, batch: 16317, sum loss: 3724.917725, avg loss: 2.532235, ppl: 12.581596 +epoch: 2, batch: 16318, sum loss: 4289.225586, avg loss: 2.742472, ppl: 15.525311 +epoch: 2, batch: 16319, sum loss: 4233.670410, avg loss: 2.629609, ppl: 13.868344 +epoch: 2, batch: 16320, sum loss: 4525.161133, avg loss: 2.512583, ppl: 12.336749 +epoch: 2, batch: 16321, sum loss: 4026.341553, avg loss: 2.398059, ppl: 11.001802 +epoch: 2, batch: 16322, sum loss: 4024.547607, avg loss: 2.424426, ppl: 11.295748 +epoch: 2, batch: 16323, sum loss: 3627.670166, avg loss: 2.202593, ppl: 9.048442 +epoch: 2, batch: 16324, sum loss: 4818.533691, avg loss: 2.646092, ppl: 14.098835 +epoch: 2, batch: 16325, sum loss: 4286.700195, avg loss: 2.518625, ppl: 12.411522 +epoch: 2, batch: 16326, sum loss: 4978.221680, avg loss: 2.882584, ppl: 17.860357 +epoch: 2, batch: 16327, sum loss: 3924.439697, avg loss: 2.480682, ppl: 11.949415 +epoch: 2, batch: 16328, sum loss: 4050.234863, avg loss: 2.504784, ppl: 12.240911 +epoch: 2, batch: 16329, sum loss: 3726.978760, avg loss: 2.358847, ppl: 10.578752 +epoch: 2, batch: 16330, sum loss: 3631.960938, avg loss: 2.331169, ppl: 10.289963 +epoch: 2, batch: 16331, sum loss: 5150.584473, avg loss: 2.625170, ppl: 13.806928 +epoch: 2, batch: 16332, sum loss: 4271.522461, avg loss: 2.365184, ppl: 10.645998 +epoch: 2, batch: 16333, sum loss: 4680.735840, avg loss: 2.499058, ppl: 12.171026 +epoch: 2, batch: 16334, sum loss: 4031.154297, avg loss: 2.510059, ppl: 12.305652 +epoch: 2, batch: 16335, sum loss: 3284.796631, avg loss: 2.544382, ppl: 12.735351 +epoch: 2, batch: 16336, sum loss: 4753.995117, avg loss: 2.606357, ppl: 13.549601 +epoch: 2, batch: 16337, sum loss: 4592.651367, avg loss: 2.577245, ppl: 13.160836 +epoch: 2, batch: 16338, sum loss: 4932.944336, avg loss: 2.616947, ppl: 13.693849 +epoch: 2, batch: 16339, sum loss: 4323.651855, avg loss: 2.467838, ppl: 11.796915 +epoch: 2, batch: 16340, sum loss: 3942.862549, avg loss: 2.529097, ppl: 12.542179 +epoch: 2, batch: 16341, sum loss: 4797.437500, avg loss: 2.627293, ppl: 13.836269 +epoch: 2, batch: 16342, sum loss: 4295.435547, avg loss: 2.600143, ppl: 13.465660 +epoch: 2, batch: 16343, sum loss: 4474.814453, avg loss: 2.425374, ppl: 11.306452 +epoch: 2, batch: 16344, sum loss: 3467.075195, avg loss: 2.300647, ppl: 9.980639 +epoch: 2, batch: 16345, sum loss: 3816.069336, avg loss: 2.454064, ppl: 11.635539 +epoch: 2, batch: 16346, sum loss: 3813.347656, avg loss: 2.372961, ppl: 10.729110 +epoch: 2, batch: 16347, sum loss: 5047.412109, avg loss: 2.787086, ppl: 16.233643 +epoch: 2, batch: 16348, sum loss: 4078.635742, avg loss: 2.477908, ppl: 11.916306 +epoch: 2, batch: 16349, sum loss: 4514.881836, avg loss: 2.599241, ppl: 13.453526 +epoch: 2, batch: 16350, sum loss: 4539.483398, avg loss: 2.604408, ppl: 13.523217 +epoch: 2, batch: 16351, sum loss: 4591.500000, avg loss: 2.613261, ppl: 13.643473 +epoch: 2, batch: 16352, sum loss: 3436.092285, avg loss: 2.313867, ppl: 10.113457 +epoch: 2, batch: 16353, sum loss: 4155.070312, avg loss: 2.541327, ppl: 12.696511 +epoch: 2, batch: 16354, sum loss: 4189.219238, avg loss: 2.666594, ppl: 14.390874 +epoch: 2, batch: 16355, sum loss: 4420.688477, avg loss: 2.580670, ppl: 13.205988 +epoch: 2, batch: 16356, sum loss: 4910.094727, avg loss: 2.848083, ppl: 17.254673 +epoch: 2, batch: 16357, sum loss: 5075.087891, avg loss: 2.628217, ppl: 13.849061 +epoch: 2, batch: 16358, sum loss: 4376.393555, avg loss: 2.583467, ppl: 13.242975 +epoch: 2, batch: 16359, sum loss: 3848.820801, avg loss: 2.515569, ppl: 12.373650 +epoch: 2, batch: 16360, sum loss: 4153.682617, avg loss: 2.500712, ppl: 12.191170 +epoch: 2, batch: 16361, sum loss: 4586.918457, avg loss: 2.588554, ppl: 13.310515 +epoch: 2, batch: 16362, sum loss: 5252.084961, avg loss: 2.748344, ppl: 15.616745 +epoch: 2, batch: 16363, sum loss: 4790.046875, avg loss: 2.438924, ppl: 11.460703 +epoch: 2, batch: 16364, sum loss: 4375.126953, avg loss: 2.615139, ppl: 13.669113 +epoch: 2, batch: 16365, sum loss: 4494.708496, avg loss: 2.636193, ppl: 13.959954 +epoch: 2, batch: 16366, sum loss: 4128.186035, avg loss: 2.259544, ppl: 9.578717 +epoch: 2, batch: 16367, sum loss: 4600.214844, avg loss: 2.490642, ppl: 12.069017 +epoch: 2, batch: 16368, sum loss: 3773.568359, avg loss: 2.411226, ppl: 11.147617 +epoch: 2, batch: 16369, sum loss: 4309.573242, avg loss: 2.600829, ppl: 13.474899 +epoch: 2, batch: 16370, sum loss: 5343.537598, avg loss: 2.836273, ppl: 17.052090 +epoch: 2, batch: 16371, sum loss: 4133.041992, avg loss: 2.581538, ppl: 13.217450 +epoch: 2, batch: 16372, sum loss: 4940.191406, avg loss: 2.550434, ppl: 12.812668 +epoch: 2, batch: 16373, sum loss: 4834.625488, avg loss: 2.786528, ppl: 16.224585 +epoch: 2, batch: 16374, sum loss: 3259.447266, avg loss: 2.237095, ppl: 9.366082 +epoch: 2, batch: 16375, sum loss: 4201.873047, avg loss: 2.365920, ppl: 10.653831 +epoch: 2, batch: 16376, sum loss: 3822.246094, avg loss: 2.315110, ppl: 10.126034 +epoch: 2, batch: 16377, sum loss: 3536.917236, avg loss: 2.497823, ppl: 12.156001 +epoch: 2, batch: 16378, sum loss: 3497.221680, avg loss: 2.525070, ppl: 12.491769 +epoch: 2, batch: 16379, sum loss: 3936.115723, avg loss: 2.647018, ppl: 14.111894 +epoch: 2, batch: 16380, sum loss: 4103.354980, avg loss: 2.457099, ppl: 11.670902 +epoch: 2, batch: 16381, sum loss: 4187.449219, avg loss: 2.530181, ppl: 12.555778 +epoch: 2, batch: 16382, sum loss: 4855.389648, avg loss: 2.814719, ppl: 16.688480 +epoch: 2, batch: 16383, sum loss: 5122.953613, avg loss: 2.907465, ppl: 18.310328 +epoch: 2, batch: 16384, sum loss: 4514.649902, avg loss: 2.450950, ppl: 11.599362 +epoch: 2, batch: 16385, sum loss: 3388.940918, avg loss: 2.386578, ppl: 10.876215 +epoch: 2, batch: 16386, sum loss: 5104.836914, avg loss: 2.737178, ppl: 15.443340 +epoch: 2, batch: 16387, sum loss: 4447.851074, avg loss: 2.556236, ppl: 12.887222 +epoch: 2, batch: 16388, sum loss: 5008.424316, avg loss: 2.607197, ppl: 13.560980 +epoch: 2, batch: 16389, sum loss: 4019.045410, avg loss: 2.443189, ppl: 11.509683 +epoch: 2, batch: 16390, sum loss: 4409.843750, avg loss: 2.607832, ppl: 13.569599 +epoch: 2, batch: 16391, sum loss: 4278.913086, avg loss: 2.557629, ppl: 12.905181 +epoch: 2, batch: 16392, sum loss: 4655.246582, avg loss: 2.502821, ppl: 12.216906 +epoch: 2, batch: 16393, sum loss: 4650.992188, avg loss: 2.514050, ppl: 12.354866 +epoch: 2, batch: 16394, sum loss: 4443.072266, avg loss: 2.550558, ppl: 12.814253 +epoch: 2, batch: 16395, sum loss: 4655.585449, avg loss: 2.589313, ppl: 13.320621 +epoch: 2, batch: 16396, sum loss: 4070.223633, avg loss: 2.454900, ppl: 11.645267 +epoch: 2, batch: 16397, sum loss: 4130.282227, avg loss: 2.421033, ppl: 11.257482 +epoch: 2, batch: 16398, sum loss: 3900.782227, avg loss: 2.412358, ppl: 11.160241 +epoch: 2, batch: 16399, sum loss: 5115.205078, avg loss: 2.775477, ppl: 16.046286 +epoch: 2, batch: 16400, sum loss: 3642.991943, avg loss: 2.272609, ppl: 9.704685 +epoch: 2, batch: 16401, sum loss: 4049.693848, avg loss: 2.449905, ppl: 11.587251 +epoch: 2, batch: 16402, sum loss: 5206.025391, avg loss: 2.556987, ppl: 12.896901 +epoch: 2, batch: 16403, sum loss: 3987.929688, avg loss: 2.314527, ppl: 10.120132 +epoch: 2, batch: 16404, sum loss: 4614.861816, avg loss: 2.730687, ppl: 15.343431 +epoch: 2, batch: 16405, sum loss: 4034.747803, avg loss: 2.302938, ppl: 10.003534 +epoch: 2, batch: 16406, sum loss: 4579.744141, avg loss: 2.442530, ppl: 11.502106 +epoch: 2, batch: 16407, sum loss: 4392.556152, avg loss: 2.750505, ppl: 15.650533 +epoch: 2, batch: 16408, sum loss: 4263.606445, avg loss: 2.560725, ppl: 12.945196 +epoch: 2, batch: 16409, sum loss: 3911.481445, avg loss: 2.276765, ppl: 9.745100 +epoch: 2, batch: 16410, sum loss: 4578.657715, avg loss: 2.565074, ppl: 13.001626 +epoch: 2, batch: 16411, sum loss: 3798.918213, avg loss: 2.306568, ppl: 10.039912 +epoch: 2, batch: 16412, sum loss: 3859.635498, avg loss: 2.421352, ppl: 11.261076 +epoch: 2, batch: 16413, sum loss: 3872.746338, avg loss: 2.316236, ppl: 10.137445 +epoch: 2, batch: 16414, sum loss: 5238.309570, avg loss: 2.822365, ppl: 16.816576 +epoch: 2, batch: 16415, sum loss: 5593.432617, avg loss: 2.905679, ppl: 18.277655 +epoch: 2, batch: 16416, sum loss: 4673.703125, avg loss: 2.612467, ppl: 13.632639 +epoch: 2, batch: 16417, sum loss: 4338.104004, avg loss: 2.234984, ppl: 9.346332 +epoch: 2, batch: 16418, sum loss: 4334.789062, avg loss: 2.536448, ppl: 12.634709 +epoch: 2, batch: 16419, sum loss: 3938.600342, avg loss: 2.484921, ppl: 12.000175 +epoch: 2, batch: 16420, sum loss: 4072.390381, avg loss: 2.569331, ppl: 13.057092 +epoch: 2, batch: 16421, sum loss: 4622.728027, avg loss: 2.589764, ppl: 13.326621 +epoch: 2, batch: 16422, sum loss: 4446.743652, avg loss: 2.406247, ppl: 11.092250 +epoch: 2, batch: 16423, sum loss: 4403.066895, avg loss: 2.594618, ppl: 13.391472 +epoch: 2, batch: 16424, sum loss: 3775.951660, avg loss: 2.394389, ppl: 10.961500 +epoch: 2, batch: 16425, sum loss: 4207.662598, avg loss: 2.299269, ppl: 9.966896 +epoch: 2, batch: 16426, sum loss: 4875.452637, avg loss: 2.603018, ppl: 13.504434 +epoch: 2, batch: 16427, sum loss: 4176.125488, avg loss: 2.616620, ppl: 13.689376 +epoch: 2, batch: 16428, sum loss: 5267.907227, avg loss: 2.676782, ppl: 14.538239 +epoch: 2, batch: 16429, sum loss: 4511.895996, avg loss: 2.539052, ppl: 12.667660 +epoch: 2, batch: 16430, sum loss: 3973.233154, avg loss: 2.440561, ppl: 11.479477 +epoch: 2, batch: 16431, sum loss: 3858.515137, avg loss: 2.393620, ppl: 10.953070 +epoch: 2, batch: 16432, sum loss: 3607.017822, avg loss: 2.324110, ppl: 10.217578 +epoch: 2, batch: 16433, sum loss: 3798.188477, avg loss: 2.232915, ppl: 9.327014 +epoch: 2, batch: 16434, sum loss: 5222.268066, avg loss: 2.735604, ppl: 15.419054 +epoch: 2, batch: 16435, sum loss: 4105.999512, avg loss: 2.408211, ppl: 11.114060 +epoch: 2, batch: 16436, sum loss: 3909.466309, avg loss: 2.370810, ppl: 10.706063 +epoch: 2, batch: 16437, sum loss: 4170.551758, avg loss: 2.426150, ppl: 11.315235 +epoch: 2, batch: 16438, sum loss: 4532.828613, avg loss: 2.535139, ppl: 12.618186 +epoch: 2, batch: 16439, sum loss: 4257.528809, avg loss: 2.301367, ppl: 9.987827 +epoch: 2, batch: 16440, sum loss: 3861.042480, avg loss: 2.456134, ppl: 11.659646 +epoch: 2, batch: 16441, sum loss: 4278.698242, avg loss: 2.424192, ppl: 11.293097 +epoch: 2, batch: 16442, sum loss: 4652.274414, avg loss: 2.904041, ppl: 18.247742 +epoch: 2, batch: 16443, sum loss: 4280.287598, avg loss: 2.663527, ppl: 14.346802 +epoch: 2, batch: 16444, sum loss: 3868.127197, avg loss: 2.373084, ppl: 10.730434 +epoch: 2, batch: 16445, sum loss: 3361.818359, avg loss: 2.230802, ppl: 9.307326 +epoch: 2, batch: 16446, sum loss: 3851.789795, avg loss: 2.341513, ppl: 10.396959 +epoch: 2, batch: 16447, sum loss: 4513.417480, avg loss: 2.631730, ppl: 13.897794 +epoch: 2, batch: 16448, sum loss: 3761.286621, avg loss: 2.379055, ppl: 10.794700 +epoch: 2, batch: 16449, sum loss: 3846.723633, avg loss: 2.475369, ppl: 11.886095 +epoch: 2, batch: 16450, sum loss: 4248.605957, avg loss: 2.443132, ppl: 11.509027 +epoch: 2, batch: 16451, sum loss: 4503.208984, avg loss: 2.691697, ppl: 14.756699 +epoch: 2, batch: 16452, sum loss: 4190.353516, avg loss: 2.427783, ppl: 11.333728 +epoch: 2, batch: 16453, sum loss: 4930.690918, avg loss: 2.779420, ppl: 16.109669 +epoch: 2, batch: 16454, sum loss: 4417.154297, avg loss: 2.534225, ppl: 12.606657 +epoch: 2, batch: 16455, sum loss: 4883.353516, avg loss: 2.665586, ppl: 14.376371 +epoch: 2, batch: 16456, sum loss: 4049.576416, avg loss: 2.548506, ppl: 12.787988 +epoch: 2, batch: 16457, sum loss: 4107.880371, avg loss: 2.476118, ppl: 11.895002 +epoch: 2, batch: 16458, sum loss: 3938.657959, avg loss: 2.420810, ppl: 11.254972 +epoch: 2, batch: 16459, sum loss: 4374.591797, avg loss: 2.568756, ppl: 13.049582 +epoch: 2, batch: 16460, sum loss: 5388.875977, avg loss: 2.712066, ppl: 15.060361 +epoch: 2, batch: 16461, sum loss: 3803.697021, avg loss: 2.364013, ppl: 10.633538 +epoch: 2, batch: 16462, sum loss: 4880.996094, avg loss: 2.535582, ppl: 12.623779 +epoch: 2, batch: 16463, sum loss: 4425.244629, avg loss: 2.741787, ppl: 15.514688 +epoch: 2, batch: 16464, sum loss: 4474.306152, avg loss: 2.565542, ppl: 13.007712 +epoch: 2, batch: 16465, sum loss: 3412.339355, avg loss: 2.279452, ppl: 9.771320 +epoch: 2, batch: 16466, sum loss: 3938.238281, avg loss: 2.192783, ppl: 8.960115 +epoch: 2, batch: 16467, sum loss: 5550.224121, avg loss: 2.820236, ppl: 16.780809 +epoch: 2, batch: 16468, sum loss: 3148.052002, avg loss: 2.274604, ppl: 9.724069 +epoch: 2, batch: 16469, sum loss: 4815.421875, avg loss: 2.767484, ppl: 15.918528 +epoch: 2, batch: 16470, sum loss: 5254.801758, avg loss: 2.681021, ppl: 14.599996 +epoch: 2, batch: 16471, sum loss: 4242.844238, avg loss: 2.348004, ppl: 10.464665 +epoch: 2, batch: 16472, sum loss: 3747.073975, avg loss: 2.297409, ppl: 9.948374 +epoch: 2, batch: 16473, sum loss: 4985.490234, avg loss: 2.839118, ppl: 17.100668 +epoch: 2, batch: 16474, sum loss: 4602.234375, avg loss: 2.603074, ppl: 13.505187 +epoch: 2, batch: 16475, sum loss: 4765.366211, avg loss: 2.816410, ppl: 16.716734 +epoch: 2, batch: 16476, sum loss: 5548.337402, avg loss: 2.734518, ppl: 15.402322 +epoch: 2, batch: 16477, sum loss: 4071.385742, avg loss: 2.544616, ppl: 12.738336 +epoch: 2, batch: 16478, sum loss: 4672.593262, avg loss: 2.557522, ppl: 12.903806 +epoch: 2, batch: 16479, sum loss: 4616.843750, avg loss: 2.527008, ppl: 12.516003 +epoch: 2, batch: 16480, sum loss: 4627.544922, avg loss: 2.904925, ppl: 18.263866 +epoch: 2, batch: 16481, sum loss: 3651.236084, avg loss: 2.426071, ppl: 11.314337 +epoch: 2, batch: 16482, sum loss: 4811.983398, avg loss: 2.756004, ppl: 15.736838 +epoch: 2, batch: 16483, sum loss: 3183.712891, avg loss: 2.298710, ppl: 9.961320 +epoch: 2, batch: 16484, sum loss: 4156.324219, avg loss: 2.505319, ppl: 12.247464 +epoch: 2, batch: 16485, sum loss: 5209.963379, avg loss: 2.938502, ppl: 18.887529 +epoch: 2, batch: 16486, sum loss: 3534.868896, avg loss: 2.385202, ppl: 10.861256 +epoch: 2, batch: 16487, sum loss: 3531.494629, avg loss: 2.407290, ppl: 11.103828 +epoch: 2, batch: 16488, sum loss: 3836.026611, avg loss: 2.460569, ppl: 11.711473 +epoch: 2, batch: 16489, sum loss: 4569.096680, avg loss: 2.552568, ppl: 12.840034 +epoch: 2, batch: 16490, sum loss: 3233.734863, avg loss: 2.234786, ppl: 9.344480 +epoch: 2, batch: 16491, sum loss: 4791.276855, avg loss: 2.543141, ppl: 12.719556 +epoch: 2, batch: 16492, sum loss: 4258.870605, avg loss: 2.576449, ppl: 13.150360 +epoch: 2, batch: 16493, sum loss: 5076.845703, avg loss: 2.855369, ppl: 17.380848 +epoch: 2, batch: 16494, sum loss: 4588.976562, avg loss: 2.444846, ppl: 11.528778 +epoch: 2, batch: 16495, sum loss: 4284.605957, avg loss: 2.255056, ppl: 9.535827 +epoch: 2, batch: 16496, sum loss: 3906.282227, avg loss: 2.276388, ppl: 9.741435 +epoch: 2, batch: 16497, sum loss: 2937.759766, avg loss: 2.065935, ppl: 7.892675 +epoch: 2, batch: 16498, sum loss: 3553.829102, avg loss: 2.044781, ppl: 7.727466 +epoch: 2, batch: 16499, sum loss: 4793.604492, avg loss: 2.841496, ppl: 17.141397 +epoch: 2, batch: 16500, sum loss: 4471.406250, avg loss: 2.608755, ppl: 13.582132 +epoch: 2, batch: 16501, sum loss: 4002.272705, avg loss: 2.565559, ppl: 13.007933 +epoch: 2, batch: 16502, sum loss: 4768.750977, avg loss: 2.828441, ppl: 16.919058 +epoch: 2, batch: 16503, sum loss: 3574.782715, avg loss: 2.342584, ppl: 10.408092 +epoch: 2, batch: 16504, sum loss: 4029.039062, avg loss: 2.319539, ppl: 10.170985 +epoch: 2, batch: 16505, sum loss: 5029.526367, avg loss: 2.726031, ppl: 15.272145 +epoch: 2, batch: 16506, sum loss: 3414.466309, avg loss: 2.161055, ppl: 8.680287 +epoch: 2, batch: 16507, sum loss: 4617.663086, avg loss: 2.539969, ppl: 12.679275 +epoch: 2, batch: 16508, sum loss: 3304.042236, avg loss: 2.284953, ppl: 9.825226 +epoch: 2, batch: 16509, sum loss: 4948.886719, avg loss: 2.597841, ppl: 13.434698 +epoch: 2, batch: 16510, sum loss: 4556.598145, avg loss: 2.402002, ppl: 11.045268 +epoch: 2, batch: 16511, sum loss: 4170.146973, avg loss: 2.686950, ppl: 14.686819 +epoch: 2, batch: 16512, sum loss: 3854.100586, avg loss: 2.306464, ppl: 10.038861 +epoch: 2, batch: 16513, sum loss: 4390.702637, avg loss: 2.657810, ppl: 14.265021 +epoch: 2, batch: 16514, sum loss: 5388.180664, avg loss: 2.699489, ppl: 14.872135 +epoch: 2, batch: 16515, sum loss: 4477.324219, avg loss: 2.511119, ppl: 12.318703 +epoch: 2, batch: 16516, sum loss: 4696.638672, avg loss: 2.651970, ppl: 14.181949 +epoch: 2, batch: 16517, sum loss: 4360.072754, avg loss: 2.655343, ppl: 14.229863 +epoch: 2, batch: 16518, sum loss: 3925.724854, avg loss: 2.418808, ppl: 11.232459 +epoch: 2, batch: 16519, sum loss: 3770.204102, avg loss: 2.360804, ppl: 10.599471 +epoch: 2, batch: 16520, sum loss: 4221.779297, avg loss: 2.296942, ppl: 9.943727 +epoch: 2, batch: 16521, sum loss: 3739.565186, avg loss: 2.400234, ppl: 11.025762 +epoch: 2, batch: 16522, sum loss: 4553.730957, avg loss: 2.774973, ppl: 16.038197 +epoch: 2, batch: 16523, sum loss: 4615.165527, avg loss: 2.683236, ppl: 14.632362 +epoch: 2, batch: 16524, sum loss: 4608.372070, avg loss: 2.639389, ppl: 14.004638 +epoch: 2, batch: 16525, sum loss: 4527.251953, avg loss: 2.546261, ppl: 12.759309 +epoch: 2, batch: 16526, sum loss: 4780.344727, avg loss: 2.648390, ppl: 14.131273 +epoch: 2, batch: 16527, sum loss: 4376.283691, avg loss: 2.392720, ppl: 10.943213 +epoch: 2, batch: 16528, sum loss: 4385.090820, avg loss: 2.613284, ppl: 13.643785 +epoch: 2, batch: 16529, sum loss: 4501.909668, avg loss: 2.543452, ppl: 12.723515 +epoch: 2, batch: 16530, sum loss: 5384.970703, avg loss: 2.750241, ppl: 15.646396 +epoch: 2, batch: 16531, sum loss: 3941.354004, avg loss: 2.615364, ppl: 13.672196 +epoch: 2, batch: 16532, sum loss: 6088.443359, avg loss: 2.756199, ppl: 15.739900 +epoch: 2, batch: 16533, sum loss: 3710.667969, avg loss: 2.426859, ppl: 11.323264 +epoch: 2, batch: 16534, sum loss: 4324.084961, avg loss: 2.586175, ppl: 13.278889 +epoch: 2, batch: 16535, sum loss: 4709.106934, avg loss: 2.771693, ppl: 15.985682 +epoch: 2, batch: 16536, sum loss: 4807.553711, avg loss: 2.796715, ppl: 16.390722 +epoch: 2, batch: 16537, sum loss: 3332.356934, avg loss: 2.259225, ppl: 9.575666 +epoch: 2, batch: 16538, sum loss: 4651.819336, avg loss: 2.453491, ppl: 11.628878 +epoch: 2, batch: 16539, sum loss: 4581.827637, avg loss: 2.631722, ppl: 13.897677 +epoch: 2, batch: 16540, sum loss: 5451.162598, avg loss: 2.628333, ppl: 13.850659 +epoch: 2, batch: 16541, sum loss: 5118.946289, avg loss: 2.737405, ppl: 15.446842 +epoch: 2, batch: 16542, sum loss: 4227.389160, avg loss: 2.662084, ppl: 14.326115 +epoch: 2, batch: 16543, sum loss: 4008.708984, avg loss: 2.393259, ppl: 10.949122 +epoch: 2, batch: 16544, sum loss: 4094.973145, avg loss: 2.546625, ppl: 12.763954 +epoch: 2, batch: 16545, sum loss: 5095.637207, avg loss: 2.686156, ppl: 14.675150 +epoch: 2, batch: 16546, sum loss: 4003.566895, avg loss: 2.551668, ppl: 12.828483 +epoch: 2, batch: 16547, sum loss: 3596.858643, avg loss: 2.289535, ppl: 9.870342 +epoch: 2, batch: 16548, sum loss: 3586.867432, avg loss: 2.362890, ppl: 10.621606 +epoch: 2, batch: 16549, sum loss: 5493.672852, avg loss: 2.762028, ppl: 15.831910 +epoch: 2, batch: 16550, sum loss: 4074.967041, avg loss: 2.402693, ppl: 11.052899 +epoch: 2, batch: 16551, sum loss: 4235.909668, avg loss: 2.509425, ppl: 12.297859 +epoch: 2, batch: 16552, sum loss: 4325.520996, avg loss: 2.573183, ppl: 13.107480 +epoch: 2, batch: 16553, sum loss: 4042.505615, avg loss: 2.457450, ppl: 11.675004 +epoch: 2, batch: 16554, sum loss: 4743.157227, avg loss: 2.461421, ppl: 11.721451 +epoch: 2, batch: 16555, sum loss: 5009.267090, avg loss: 2.671609, ppl: 14.463224 +epoch: 2, batch: 16556, sum loss: 4595.407227, avg loss: 2.533301, ppl: 12.595009 +epoch: 2, batch: 16557, sum loss: 3654.655029, avg loss: 2.396495, ppl: 10.984609 +epoch: 2, batch: 16558, sum loss: 4093.381348, avg loss: 2.423553, ppl: 11.285890 +epoch: 2, batch: 16559, sum loss: 4480.341797, avg loss: 2.371806, ppl: 10.716731 +epoch: 2, batch: 16560, sum loss: 4160.640625, avg loss: 2.488422, ppl: 12.042254 +epoch: 2, batch: 16561, sum loss: 4012.081543, avg loss: 2.403884, ppl: 11.066071 +epoch: 2, batch: 16562, sum loss: 3783.050781, avg loss: 2.261238, ppl: 9.594959 +epoch: 2, batch: 16563, sum loss: 5186.576172, avg loss: 2.623458, ppl: 13.783302 +epoch: 2, batch: 16564, sum loss: 4646.771484, avg loss: 2.661381, ppl: 14.316050 +epoch: 2, batch: 16565, sum loss: 3695.335205, avg loss: 2.391803, ppl: 10.933187 +epoch: 2, batch: 16566, sum loss: 4784.906738, avg loss: 2.472820, ppl: 11.855831 +epoch: 2, batch: 16567, sum loss: 4080.688965, avg loss: 2.505027, ppl: 12.243890 +epoch: 2, batch: 16568, sum loss: 4250.457520, avg loss: 2.383880, ppl: 10.846907 +epoch: 2, batch: 16569, sum loss: 4709.114258, avg loss: 2.859207, ppl: 17.447689 +epoch: 2, batch: 16570, sum loss: 4053.493164, avg loss: 2.509903, ppl: 12.303736 +epoch: 2, batch: 16571, sum loss: 4558.708496, avg loss: 2.669033, ppl: 14.426013 +epoch: 2, batch: 16572, sum loss: 5395.222656, avg loss: 2.832138, ppl: 16.981726 +epoch: 2, batch: 16573, sum loss: 3805.152344, avg loss: 2.299186, ppl: 9.966064 +epoch: 2, batch: 16574, sum loss: 3949.392090, avg loss: 2.382022, ppl: 10.826772 +epoch: 2, batch: 16575, sum loss: 3451.636963, avg loss: 2.417113, ppl: 11.213437 +epoch: 2, batch: 16576, sum loss: 3505.080322, avg loss: 2.413967, ppl: 11.178219 +epoch: 2, batch: 16577, sum loss: 4667.552246, avg loss: 2.628126, ppl: 13.847800 +epoch: 2, batch: 16578, sum loss: 3869.105225, avg loss: 2.434931, ppl: 11.415029 +epoch: 2, batch: 16579, sum loss: 4186.952637, avg loss: 2.396653, ppl: 10.986342 +epoch: 2, batch: 16580, sum loss: 4497.532227, avg loss: 2.491708, ppl: 12.081889 +epoch: 2, batch: 16581, sum loss: 4000.988770, avg loss: 2.380124, ppl: 10.806244 +epoch: 2, batch: 16582, sum loss: 4346.400879, avg loss: 2.597968, ppl: 13.436412 +epoch: 2, batch: 16583, sum loss: 4840.020996, avg loss: 2.489723, ppl: 12.057935 +epoch: 2, batch: 16584, sum loss: 3470.298096, avg loss: 1.985296, ppl: 7.281206 +epoch: 2, batch: 16585, sum loss: 4905.174805, avg loss: 2.693671, ppl: 14.785855 +epoch: 2, batch: 16586, sum loss: 4264.942871, avg loss: 2.441295, ppl: 11.487909 +epoch: 2, batch: 16587, sum loss: 4640.973633, avg loss: 2.557010, ppl: 12.897202 +epoch: 2, batch: 16588, sum loss: 4122.625977, avg loss: 2.201082, ppl: 9.034781 +epoch: 2, batch: 16589, sum loss: 4938.006348, avg loss: 2.761749, ppl: 15.827494 +epoch: 2, batch: 16590, sum loss: 3999.812012, avg loss: 2.476664, ppl: 11.901493 +epoch: 2, batch: 16591, sum loss: 4526.243652, avg loss: 2.734890, ppl: 15.408055 +epoch: 2, batch: 16592, sum loss: 4981.989258, avg loss: 2.747926, ppl: 15.610219 +epoch: 2, batch: 16593, sum loss: 4114.472656, avg loss: 2.421703, ppl: 11.265023 +epoch: 2, batch: 16594, sum loss: 4686.170898, avg loss: 2.590476, ppl: 13.336119 +epoch: 2, batch: 16595, sum loss: 3135.669434, avg loss: 2.395469, ppl: 10.973348 +epoch: 2, batch: 16596, sum loss: 5060.929688, avg loss: 2.915282, ppl: 18.454016 +epoch: 2, batch: 16597, sum loss: 4695.984375, avg loss: 2.666658, ppl: 14.391787 +epoch: 2, batch: 16598, sum loss: 4491.947754, avg loss: 2.733991, ppl: 15.394205 +epoch: 2, batch: 16599, sum loss: 5046.496094, avg loss: 2.695778, ppl: 14.817040 +epoch: 2, batch: 16600, sum loss: 4326.486328, avg loss: 2.704054, ppl: 14.940175 +epoch: 2, batch: 16601, sum loss: 4384.782227, avg loss: 2.216775, ppl: 9.177689 +epoch: 2, batch: 16602, sum loss: 4408.764160, avg loss: 2.479620, ppl: 11.936728 +epoch: 2, batch: 16603, sum loss: 4535.081055, avg loss: 2.707511, ppl: 14.991917 +epoch: 2, batch: 16604, sum loss: 4569.993164, avg loss: 2.377728, ppl: 10.780382 +epoch: 2, batch: 16605, sum loss: 4662.767578, avg loss: 2.768864, ppl: 15.940521 +epoch: 2, batch: 16606, sum loss: 4769.987305, avg loss: 2.667778, ppl: 14.407923 +epoch: 2, batch: 16607, sum loss: 5157.613281, avg loss: 2.543202, ppl: 12.720335 +epoch: 2, batch: 16608, sum loss: 4304.567383, avg loss: 2.707275, ppl: 14.988379 +epoch: 2, batch: 16609, sum loss: 4180.198730, avg loss: 2.555134, ppl: 12.873022 +epoch: 2, batch: 16610, sum loss: 3973.363525, avg loss: 2.477160, ppl: 11.907393 +epoch: 2, batch: 16611, sum loss: 4412.049805, avg loss: 2.580146, ppl: 13.199066 +epoch: 2, batch: 16612, sum loss: 4462.072266, avg loss: 2.709212, ppl: 15.017438 +epoch: 2, batch: 16613, sum loss: 4127.456055, avg loss: 2.552539, ppl: 12.839666 +epoch: 2, batch: 16614, sum loss: 4339.333984, avg loss: 2.451601, ppl: 11.606917 +epoch: 2, batch: 16615, sum loss: 3331.987793, avg loss: 2.193540, ppl: 8.966903 +epoch: 2, batch: 16616, sum loss: 4995.581543, avg loss: 2.641767, ppl: 14.037987 +epoch: 2, batch: 16617, sum loss: 4152.861328, avg loss: 2.494211, ppl: 12.112173 +epoch: 2, batch: 16618, sum loss: 4834.924805, avg loss: 2.717777, ppl: 15.146610 +epoch: 2, batch: 16619, sum loss: 4223.683594, avg loss: 2.633219, ppl: 13.918505 +epoch: 2, batch: 16620, sum loss: 4339.763184, avg loss: 2.659169, ppl: 14.284410 +epoch: 2, batch: 16621, sum loss: 4110.443848, avg loss: 2.313137, ppl: 10.106073 +epoch: 2, batch: 16622, sum loss: 4271.114746, avg loss: 2.629997, ppl: 13.873725 +epoch: 2, batch: 16623, sum loss: 4202.587402, avg loss: 2.494117, ppl: 12.111035 +epoch: 2, batch: 16624, sum loss: 4153.321777, avg loss: 2.385596, ppl: 10.865532 +epoch: 2, batch: 16625, sum loss: 5298.277832, avg loss: 2.856215, ppl: 17.395552 +epoch: 2, batch: 16626, sum loss: 4074.311279, avg loss: 2.370164, ppl: 10.699143 +epoch: 2, batch: 16627, sum loss: 4893.600586, avg loss: 2.782036, ppl: 16.151867 +epoch: 2, batch: 16628, sum loss: 4904.819336, avg loss: 2.797957, ppl: 16.411091 +epoch: 2, batch: 16629, sum loss: 3893.190918, avg loss: 2.376795, ppl: 10.770331 +epoch: 2, batch: 16630, sum loss: 4119.010742, avg loss: 2.571168, ppl: 13.081090 +epoch: 2, batch: 16631, sum loss: 3638.450195, avg loss: 2.294105, ppl: 9.915556 +epoch: 2, batch: 16632, sum loss: 4058.447754, avg loss: 2.377532, ppl: 10.778275 +epoch: 2, batch: 16633, sum loss: 4975.974121, avg loss: 2.618934, ppl: 13.721088 +epoch: 2, batch: 16634, sum loss: 4312.389648, avg loss: 2.501386, ppl: 12.199392 +epoch: 2, batch: 16635, sum loss: 3704.813965, avg loss: 2.479795, ppl: 11.938819 +epoch: 2, batch: 16636, sum loss: 3567.457764, avg loss: 2.462014, ppl: 11.728406 +epoch: 2, batch: 16637, sum loss: 4604.259766, avg loss: 2.570776, ppl: 13.075964 +epoch: 2, batch: 16638, sum loss: 3335.170410, avg loss: 2.418543, ppl: 11.229482 +epoch: 2, batch: 16639, sum loss: 4534.873047, avg loss: 2.603256, ppl: 13.507653 +epoch: 2, batch: 16640, sum loss: 3367.695801, avg loss: 2.215590, ppl: 9.166812 +epoch: 2, batch: 16641, sum loss: 3875.476562, avg loss: 2.377593, ppl: 10.778927 +epoch: 2, batch: 16642, sum loss: 5588.954102, avg loss: 2.793081, ppl: 16.331251 +epoch: 2, batch: 16643, sum loss: 4582.344727, avg loss: 2.619980, ppl: 13.735447 +epoch: 2, batch: 16644, sum loss: 4505.955078, avg loss: 2.597092, ppl: 13.424647 +epoch: 2, batch: 16645, sum loss: 4573.085938, avg loss: 2.539193, ppl: 12.669438 +epoch: 2, batch: 16646, sum loss: 5177.419434, avg loss: 2.657813, ppl: 14.265055 +epoch: 2, batch: 16647, sum loss: 4795.766113, avg loss: 2.731074, ppl: 15.349365 +epoch: 2, batch: 16648, sum loss: 4431.318848, avg loss: 2.829705, ppl: 16.940472 +epoch: 2, batch: 16649, sum loss: 4081.028564, avg loss: 2.470356, ppl: 11.826662 +epoch: 2, batch: 16650, sum loss: 3279.442139, avg loss: 2.309466, ppl: 10.069050 +epoch: 2, batch: 16651, sum loss: 4970.745117, avg loss: 2.755402, ppl: 15.727360 +epoch: 2, batch: 16652, sum loss: 3911.610840, avg loss: 2.606003, ppl: 13.544807 +epoch: 2, batch: 16653, sum loss: 4552.590820, avg loss: 2.665451, ppl: 14.374435 +epoch: 2, batch: 16654, sum loss: 3353.037354, avg loss: 2.205946, ppl: 9.078834 +epoch: 2, batch: 16655, sum loss: 4620.828613, avg loss: 2.643495, ppl: 14.062263 +epoch: 2, batch: 16656, sum loss: 4351.291016, avg loss: 2.629179, ppl: 13.862384 +epoch: 2, batch: 16657, sum loss: 4447.038086, avg loss: 2.728244, ppl: 15.305991 +epoch: 2, batch: 16658, sum loss: 4392.081055, avg loss: 2.639472, ppl: 14.005803 +epoch: 2, batch: 16659, sum loss: 3864.174316, avg loss: 2.666787, ppl: 14.393650 +epoch: 2, batch: 16660, sum loss: 4196.083008, avg loss: 2.540002, ppl: 12.679695 +epoch: 2, batch: 16661, sum loss: 5216.130859, avg loss: 2.592510, ppl: 13.363277 +epoch: 2, batch: 16662, sum loss: 4294.860352, avg loss: 2.612445, ppl: 13.632346 +epoch: 2, batch: 16663, sum loss: 4053.265137, avg loss: 2.364799, ppl: 10.641897 +epoch: 2, batch: 16664, sum loss: 5095.175781, avg loss: 2.770623, ppl: 15.968583 +epoch: 2, batch: 16665, sum loss: 3918.784424, avg loss: 2.357873, ppl: 10.568445 +epoch: 2, batch: 16666, sum loss: 4541.189453, avg loss: 2.696668, ppl: 14.830240 +epoch: 2, batch: 16667, sum loss: 3821.488770, avg loss: 2.304879, ppl: 10.022963 +epoch: 2, batch: 16668, sum loss: 4018.873535, avg loss: 2.303079, ppl: 10.004944 +epoch: 2, batch: 16669, sum loss: 5192.109863, avg loss: 2.657170, ppl: 14.255885 +epoch: 2, batch: 16670, sum loss: 4067.878906, avg loss: 2.615999, ppl: 13.680883 +epoch: 2, batch: 16671, sum loss: 4184.128418, avg loss: 2.493521, ppl: 12.103819 +epoch: 2, batch: 16672, sum loss: 4062.111816, avg loss: 2.346685, ppl: 10.450867 +epoch: 2, batch: 16673, sum loss: 3629.520752, avg loss: 2.518752, ppl: 12.413091 +epoch: 2, batch: 16674, sum loss: 3875.563477, avg loss: 2.343146, ppl: 10.413948 +epoch: 2, batch: 16675, sum loss: 2999.947021, avg loss: 2.099333, ppl: 8.160726 +epoch: 2, batch: 16676, sum loss: 5341.957031, avg loss: 2.827929, ppl: 16.910395 +epoch: 2, batch: 16677, sum loss: 3845.979004, avg loss: 2.298852, ppl: 9.962736 +epoch: 2, batch: 16678, sum loss: 4180.009766, avg loss: 2.539496, ppl: 12.673278 +epoch: 2, batch: 16679, sum loss: 4214.528809, avg loss: 2.582432, ppl: 13.229273 +epoch: 2, batch: 16680, sum loss: 4424.508789, avg loss: 2.411177, ppl: 11.147070 +epoch: 2, batch: 16681, sum loss: 4791.769531, avg loss: 2.654720, ppl: 14.221002 +epoch: 2, batch: 16682, sum loss: 4698.550781, avg loss: 2.638153, ppl: 13.987350 +epoch: 2, batch: 16683, sum loss: 4095.885986, avg loss: 2.452626, ppl: 11.618823 +epoch: 2, batch: 16684, sum loss: 3767.817871, avg loss: 2.537251, ppl: 12.644862 +epoch: 2, batch: 16685, sum loss: 3680.901123, avg loss: 2.500612, ppl: 12.189955 +epoch: 2, batch: 16686, sum loss: 4189.101562, avg loss: 2.509947, ppl: 12.304278 +epoch: 2, batch: 16687, sum loss: 4618.149414, avg loss: 2.845440, ppl: 17.209131 +epoch: 2, batch: 16688, sum loss: 4521.867676, avg loss: 2.567784, ppl: 13.036903 +epoch: 2, batch: 16689, sum loss: 4232.731445, avg loss: 2.638860, ppl: 13.997237 +epoch: 2, batch: 16690, sum loss: 4547.123047, avg loss: 2.721199, ppl: 15.198531 +epoch: 2, batch: 16691, sum loss: 4545.272461, avg loss: 2.595815, ppl: 13.407516 +epoch: 2, batch: 16692, sum loss: 4798.192383, avg loss: 2.607713, ppl: 13.567988 +epoch: 2, batch: 16693, sum loss: 4509.224121, avg loss: 2.451998, ppl: 11.611524 +epoch: 2, batch: 16694, sum loss: 4176.901367, avg loss: 2.747962, ppl: 15.610777 +epoch: 2, batch: 16695, sum loss: 4493.105469, avg loss: 2.724746, ppl: 15.252532 +epoch: 2, batch: 16696, sum loss: 3216.210693, avg loss: 2.323852, ppl: 10.214942 +epoch: 2, batch: 16697, sum loss: 3454.941406, avg loss: 2.249311, ppl: 9.481201 +epoch: 2, batch: 16698, sum loss: 4492.903320, avg loss: 2.644440, ppl: 14.075560 +epoch: 2, batch: 16699, sum loss: 5079.070801, avg loss: 2.630280, ppl: 13.877655 +epoch: 2, batch: 16700, sum loss: 4279.380371, avg loss: 2.606200, ppl: 13.547476 +epoch: 2, batch: 16701, sum loss: 4543.973145, avg loss: 2.695121, ppl: 14.807304 +epoch: 2, batch: 16702, sum loss: 3859.624756, avg loss: 2.477294, ppl: 11.909000 +epoch: 2, batch: 16703, sum loss: 3703.755127, avg loss: 2.427100, ppl: 11.325994 +epoch: 2, batch: 16704, sum loss: 5111.887695, avg loss: 2.752767, ppl: 15.685968 +epoch: 2, batch: 16705, sum loss: 4063.179199, avg loss: 2.446225, ppl: 11.544680 +epoch: 2, batch: 16706, sum loss: 4195.776855, avg loss: 2.443667, ppl: 11.515194 +epoch: 2, batch: 16707, sum loss: 4553.301270, avg loss: 2.569583, ppl: 13.060379 +epoch: 2, batch: 16708, sum loss: 3748.660156, avg loss: 2.450105, ppl: 11.589561 +epoch: 2, batch: 16709, sum loss: 4728.700684, avg loss: 2.781589, ppl: 16.144651 +epoch: 2, batch: 16710, sum loss: 3986.378662, avg loss: 2.537479, ppl: 12.647742 +epoch: 2, batch: 16711, sum loss: 3880.122803, avg loss: 2.549358, ppl: 12.798886 +epoch: 2, batch: 16712, sum loss: 3776.740234, avg loss: 2.483064, ppl: 11.977908 +epoch: 2, batch: 16713, sum loss: 4477.470703, avg loss: 2.571781, ppl: 13.089114 +epoch: 2, batch: 16714, sum loss: 3386.989746, avg loss: 2.251988, ppl: 9.506614 +epoch: 2, batch: 16715, sum loss: 4747.339355, avg loss: 2.492042, ppl: 12.085925 +epoch: 2, batch: 16716, sum loss: 3852.378662, avg loss: 2.456874, ppl: 11.668281 +epoch: 2, batch: 16717, sum loss: 5603.563477, avg loss: 2.935340, ppl: 18.827902 +epoch: 2, batch: 16718, sum loss: 4302.095703, avg loss: 2.662188, ppl: 14.327601 +epoch: 2, batch: 16719, sum loss: 5003.399414, avg loss: 2.645901, ppl: 14.096147 +epoch: 2, batch: 16720, sum loss: 4302.396484, avg loss: 2.529334, ppl: 12.545143 +epoch: 2, batch: 16721, sum loss: 4246.368164, avg loss: 2.597167, ppl: 13.425650 +epoch: 2, batch: 16722, sum loss: 3884.352295, avg loss: 2.460008, ppl: 11.704902 +epoch: 2, batch: 16723, sum loss: 3542.639160, avg loss: 2.069299, ppl: 7.919266 +epoch: 2, batch: 16724, sum loss: 3805.767578, avg loss: 2.438032, ppl: 11.450483 +epoch: 2, batch: 16725, sum loss: 3937.218018, avg loss: 2.418438, ppl: 11.228312 +epoch: 2, batch: 16726, sum loss: 4685.550293, avg loss: 2.624958, ppl: 13.803998 +epoch: 2, batch: 16727, sum loss: 3710.628174, avg loss: 2.329334, ppl: 10.271094 +epoch: 2, batch: 16728, sum loss: 4103.796387, avg loss: 2.550526, ppl: 12.813841 +epoch: 2, batch: 16729, sum loss: 3805.823242, avg loss: 2.617485, ppl: 13.701222 +epoch: 2, batch: 16730, sum loss: 4802.313477, avg loss: 2.429091, ppl: 11.348564 +epoch: 2, batch: 16731, sum loss: 4504.181152, avg loss: 2.596070, ppl: 13.410927 +epoch: 2, batch: 16732, sum loss: 4269.780273, avg loss: 2.355091, ppl: 10.539091 +epoch: 2, batch: 16733, sum loss: 4641.803223, avg loss: 2.533736, ppl: 12.600488 +epoch: 2, batch: 16734, sum loss: 4540.837891, avg loss: 2.603691, ppl: 13.513529 +epoch: 2, batch: 16735, sum loss: 4042.958496, avg loss: 2.472757, ppl: 11.855090 +epoch: 2, batch: 16736, sum loss: 3772.340088, avg loss: 2.353300, ppl: 10.520230 +epoch: 2, batch: 16737, sum loss: 3905.716064, avg loss: 2.453339, ppl: 11.627106 +epoch: 2, batch: 16738, sum loss: 3926.147949, avg loss: 2.382371, ppl: 10.830556 +epoch: 2, batch: 16739, sum loss: 4598.170410, avg loss: 2.295642, ppl: 9.930806 +epoch: 2, batch: 16740, sum loss: 3358.532715, avg loss: 2.337183, ppl: 10.352038 +epoch: 2, batch: 16741, sum loss: 4055.529053, avg loss: 2.437217, ppl: 11.441155 +epoch: 2, batch: 16742, sum loss: 4398.675781, avg loss: 2.615146, ppl: 13.669214 +epoch: 2, batch: 16743, sum loss: 5207.935547, avg loss: 2.649001, ppl: 14.139904 +epoch: 2, batch: 16744, sum loss: 4512.431152, avg loss: 2.623507, ppl: 13.783973 +epoch: 2, batch: 16745, sum loss: 3796.927979, avg loss: 2.459150, ppl: 11.694870 +epoch: 2, batch: 16746, sum loss: 4788.322266, avg loss: 2.769417, ppl: 15.949337 +epoch: 2, batch: 16747, sum loss: 4206.763184, avg loss: 2.502536, ppl: 12.213428 +epoch: 2, batch: 16748, sum loss: 5554.339355, avg loss: 2.823762, ppl: 16.840078 +epoch: 2, batch: 16749, sum loss: 4530.956543, avg loss: 2.525617, ppl: 12.498603 +epoch: 2, batch: 16750, sum loss: 3745.182373, avg loss: 2.183780, ppl: 8.879807 +epoch: 2, batch: 16751, sum loss: 4316.730469, avg loss: 2.362742, ppl: 10.620036 +epoch: 2, batch: 16752, sum loss: 5306.921387, avg loss: 2.812359, ppl: 16.649145 +epoch: 2, batch: 16753, sum loss: 3351.683105, avg loss: 2.270788, ppl: 9.687031 +epoch: 2, batch: 16754, sum loss: 3431.007080, avg loss: 2.479051, ppl: 11.929942 +epoch: 2, batch: 16755, sum loss: 4880.876465, avg loss: 2.528952, ppl: 12.540353 +epoch: 2, batch: 16756, sum loss: 4222.657227, avg loss: 2.675955, ppl: 14.526217 +epoch: 2, batch: 16757, sum loss: 5537.129883, avg loss: 2.892962, ppl: 18.046688 +epoch: 2, batch: 16758, sum loss: 4382.660645, avg loss: 2.616514, ppl: 13.687923 +epoch: 2, batch: 16759, sum loss: 4360.006348, avg loss: 2.655303, ppl: 14.229290 +epoch: 2, batch: 16760, sum loss: 4478.136230, avg loss: 2.600544, ppl: 13.471061 +epoch: 2, batch: 16761, sum loss: 4412.529785, avg loss: 2.537395, ppl: 12.646684 +epoch: 2, batch: 16762, sum loss: 4323.024414, avg loss: 2.395027, ppl: 10.968498 +epoch: 2, batch: 16763, sum loss: 4585.276855, avg loss: 2.632191, ppl: 13.904200 +epoch: 2, batch: 16764, sum loss: 3792.205078, avg loss: 2.351026, ppl: 10.496334 +epoch: 2, batch: 16765, sum loss: 4073.179688, avg loss: 2.653537, ppl: 14.204194 +epoch: 2, batch: 16766, sum loss: 3900.295654, avg loss: 2.428577, ppl: 11.342734 +epoch: 2, batch: 16767, sum loss: 4212.450195, avg loss: 2.358595, ppl: 10.576080 +epoch: 2, batch: 16768, sum loss: 4483.032715, avg loss: 2.730227, ppl: 15.336371 +epoch: 2, batch: 16769, sum loss: 3540.674561, avg loss: 2.362025, ppl: 10.612415 +epoch: 2, batch: 16770, sum loss: 4483.977539, avg loss: 2.625279, ppl: 13.808432 +epoch: 2, batch: 16771, sum loss: 4737.594238, avg loss: 2.655602, ppl: 14.233555 +epoch: 2, batch: 16772, sum loss: 5057.723145, avg loss: 2.588395, ppl: 13.308390 +epoch: 2, batch: 16773, sum loss: 4188.298828, avg loss: 2.524592, ppl: 12.485805 +epoch: 2, batch: 16774, sum loss: 5828.317871, avg loss: 2.847249, ppl: 17.240280 +epoch: 2, batch: 16775, sum loss: 4622.062988, avg loss: 2.466416, ppl: 11.780147 +epoch: 2, batch: 16776, sum loss: 5286.439941, avg loss: 2.661853, ppl: 14.322803 +epoch: 2, batch: 16777, sum loss: 3721.432617, avg loss: 2.485927, ppl: 12.012249 +epoch: 2, batch: 16778, sum loss: 4855.997559, avg loss: 2.821614, ppl: 16.803947 +epoch: 2, batch: 16779, sum loss: 3457.180664, avg loss: 2.281967, ppl: 9.795934 +epoch: 2, batch: 16780, sum loss: 4579.718750, avg loss: 2.429559, ppl: 11.353874 +epoch: 2, batch: 16781, sum loss: 4012.447754, avg loss: 2.436216, ppl: 11.429708 +epoch: 2, batch: 16782, sum loss: 3853.878662, avg loss: 2.347064, ppl: 10.454827 +epoch: 2, batch: 16783, sum loss: 3977.171387, avg loss: 2.739099, ppl: 15.473034 +epoch: 2, batch: 16784, sum loss: 4418.824219, avg loss: 2.517848, ppl: 12.401885 +epoch: 2, batch: 16785, sum loss: 3862.834473, avg loss: 2.414272, ppl: 11.181623 +epoch: 2, batch: 16786, sum loss: 4524.242188, avg loss: 2.572054, ppl: 13.092685 +epoch: 2, batch: 16787, sum loss: 4538.731445, avg loss: 2.499301, ppl: 12.173986 +epoch: 2, batch: 16788, sum loss: 4032.399658, avg loss: 2.728281, ppl: 15.306557 +epoch: 2, batch: 16789, sum loss: 3437.947021, avg loss: 2.164954, ppl: 8.714203 +epoch: 2, batch: 16790, sum loss: 4327.314453, avg loss: 2.562057, ppl: 12.962457 +epoch: 2, batch: 16791, sum loss: 3332.105469, avg loss: 2.216970, ppl: 9.179472 +epoch: 2, batch: 16792, sum loss: 4262.078613, avg loss: 2.529423, ppl: 12.546270 +epoch: 2, batch: 16793, sum loss: 3496.121094, avg loss: 2.125302, ppl: 8.375423 +epoch: 2, batch: 16794, sum loss: 4926.239258, avg loss: 2.862428, ppl: 17.503983 +epoch: 2, batch: 16795, sum loss: 4796.241211, avg loss: 2.562095, ppl: 12.962942 +epoch: 2, batch: 16796, sum loss: 4307.529785, avg loss: 2.607464, ppl: 13.564602 +epoch: 2, batch: 16797, sum loss: 4269.732910, avg loss: 2.595582, ppl: 13.404389 +epoch: 2, batch: 16798, sum loss: 3667.165039, avg loss: 2.436655, ppl: 11.434723 +epoch: 2, batch: 16799, sum loss: 3791.906494, avg loss: 2.454309, ppl: 11.638383 +epoch: 2, batch: 16800, sum loss: 4959.931152, avg loss: 2.843997, ppl: 17.184315 +epoch: 2, batch: 16801, sum loss: 3909.791260, avg loss: 2.341192, ppl: 10.393623 +epoch: 2, batch: 16802, sum loss: 4945.481934, avg loss: 2.806744, ppl: 16.555918 +epoch: 2, batch: 16803, sum loss: 4103.578613, avg loss: 2.548806, ppl: 12.791826 +epoch: 2, batch: 16804, sum loss: 4502.823730, avg loss: 2.382447, ppl: 10.831369 +epoch: 2, batch: 16805, sum loss: 4460.928711, avg loss: 2.578572, ppl: 13.178300 +epoch: 2, batch: 16806, sum loss: 4035.436035, avg loss: 2.487938, ppl: 12.036436 +epoch: 2, batch: 16807, sum loss: 3621.949951, avg loss: 2.258074, ppl: 9.564646 +epoch: 2, batch: 16808, sum loss: 3362.906982, avg loss: 2.116367, ppl: 8.300924 +epoch: 2, batch: 16809, sum loss: 4796.004883, avg loss: 2.688344, ppl: 14.707297 +epoch: 2, batch: 16810, sum loss: 4649.664551, avg loss: 2.495794, ppl: 12.131363 +epoch: 2, batch: 16811, sum loss: 3928.518066, avg loss: 2.491134, ppl: 12.074961 +epoch: 2, batch: 16812, sum loss: 4329.188477, avg loss: 2.419893, ppl: 11.244657 +epoch: 2, batch: 16813, sum loss: 4845.980469, avg loss: 2.733210, ppl: 15.382178 +epoch: 2, batch: 16814, sum loss: 4272.377441, avg loss: 2.494091, ppl: 12.110718 +epoch: 2, batch: 16815, sum loss: 4239.910645, avg loss: 2.636760, ppl: 13.967877 +epoch: 2, batch: 16816, sum loss: 5219.364258, avg loss: 2.664300, ppl: 14.357902 +epoch: 2, batch: 16817, sum loss: 4001.629639, avg loss: 2.422294, ppl: 11.271688 +epoch: 2, batch: 16818, sum loss: 4917.426758, avg loss: 2.684185, ppl: 14.646260 +epoch: 2, batch: 16819, sum loss: 4672.775391, avg loss: 2.598874, ppl: 13.448585 +epoch: 2, batch: 16820, sum loss: 4784.484375, avg loss: 2.650684, ppl: 14.163722 +epoch: 2, batch: 16821, sum loss: 5116.597656, avg loss: 2.727397, ppl: 15.293035 +epoch: 2, batch: 16822, sum loss: 3737.226562, avg loss: 2.343089, ppl: 10.413352 +epoch: 2, batch: 16823, sum loss: 4351.744141, avg loss: 2.618378, ppl: 13.713461 +epoch: 2, batch: 16824, sum loss: 3990.032227, avg loss: 2.429983, ppl: 11.358690 +epoch: 2, batch: 16825, sum loss: 4136.803711, avg loss: 2.459455, ppl: 11.698437 +epoch: 2, batch: 16826, sum loss: 3681.928223, avg loss: 2.414379, ppl: 11.182825 +epoch: 2, batch: 16827, sum loss: 4086.856201, avg loss: 2.544742, ppl: 12.739945 +epoch: 2, batch: 16828, sum loss: 4705.040039, avg loss: 2.664236, ppl: 14.356971 +epoch: 2, batch: 16829, sum loss: 4218.969727, avg loss: 2.670234, ppl: 14.443348 +epoch: 2, batch: 16830, sum loss: 3873.453857, avg loss: 2.382198, ppl: 10.828676 +epoch: 2, batch: 16831, sum loss: 5343.947754, avg loss: 2.844038, ppl: 17.185022 +epoch: 2, batch: 16832, sum loss: 4390.437012, avg loss: 2.413654, ppl: 11.174723 +epoch: 2, batch: 16833, sum loss: 4232.857910, avg loss: 2.319374, ppl: 10.169310 +epoch: 2, batch: 16834, sum loss: 4569.815430, avg loss: 2.447678, ppl: 11.561474 +epoch: 2, batch: 16835, sum loss: 5735.807617, avg loss: 3.006188, ppl: 20.210220 +epoch: 2, batch: 16836, sum loss: 4969.178711, avg loss: 2.665869, ppl: 14.380434 +epoch: 2, batch: 16837, sum loss: 3412.725586, avg loss: 2.255602, ppl: 9.541034 +epoch: 2, batch: 16838, sum loss: 3665.751221, avg loss: 2.275451, ppl: 9.732306 +epoch: 2, batch: 16839, sum loss: 3967.056885, avg loss: 2.337688, ppl: 10.357265 +epoch: 2, batch: 16840, sum loss: 4123.406250, avg loss: 2.572306, ppl: 13.095984 +epoch: 2, batch: 16841, sum loss: 4091.213867, avg loss: 2.458662, ppl: 11.689164 +epoch: 2, batch: 16842, sum loss: 4282.016602, avg loss: 2.486653, ppl: 12.020975 +epoch: 2, batch: 16843, sum loss: 5035.769531, avg loss: 2.837053, ppl: 17.065401 +epoch: 2, batch: 16844, sum loss: 4467.202148, avg loss: 2.518152, ppl: 12.405653 +epoch: 2, batch: 16845, sum loss: 5072.612793, avg loss: 2.716986, ppl: 15.134637 +epoch: 2, batch: 16846, sum loss: 4544.629883, avg loss: 2.492940, ppl: 12.096790 +epoch: 2, batch: 16847, sum loss: 4505.942871, avg loss: 2.641232, ppl: 14.030485 +epoch: 2, batch: 16848, sum loss: 3595.427246, avg loss: 2.464309, ppl: 11.755359 +epoch: 2, batch: 16849, sum loss: 4102.814453, avg loss: 2.635077, ppl: 13.944383 +epoch: 2, batch: 16850, sum loss: 3889.104248, avg loss: 2.397722, ppl: 10.998089 +epoch: 2, batch: 16851, sum loss: 4239.212402, avg loss: 2.720932, ppl: 15.194481 +epoch: 2, batch: 16852, sum loss: 3831.921143, avg loss: 2.353760, ppl: 10.525067 +epoch: 2, batch: 16853, sum loss: 4276.431641, avg loss: 2.398448, ppl: 11.006076 +epoch: 2, batch: 16854, sum loss: 4387.885742, avg loss: 2.444505, ppl: 11.524840 +epoch: 2, batch: 16855, sum loss: 4249.208984, avg loss: 2.490744, ppl: 12.070251 +epoch: 2, batch: 16856, sum loss: 4059.885742, avg loss: 2.323919, ppl: 10.215627 +epoch: 2, batch: 16857, sum loss: 4766.771973, avg loss: 2.491778, ppl: 12.082745 +epoch: 2, batch: 16858, sum loss: 4474.338867, avg loss: 2.658550, ppl: 14.275568 +epoch: 2, batch: 16859, sum loss: 3682.421387, avg loss: 2.494865, ppl: 12.120102 +epoch: 2, batch: 16860, sum loss: 4524.586914, avg loss: 2.630574, ppl: 13.881732 +epoch: 2, batch: 16861, sum loss: 5097.074707, avg loss: 2.679850, ppl: 14.582907 +epoch: 2, batch: 16862, sum loss: 3724.697266, avg loss: 2.303462, ppl: 10.008768 +epoch: 2, batch: 16863, sum loss: 4543.566406, avg loss: 2.983300, ppl: 19.752903 +epoch: 2, batch: 16864, sum loss: 5047.404297, avg loss: 2.746139, ppl: 15.582356 +epoch: 2, batch: 16865, sum loss: 4926.726562, avg loss: 2.621994, ppl: 13.763140 +epoch: 2, batch: 16866, sum loss: 4568.425781, avg loss: 2.588343, ppl: 13.307704 +epoch: 2, batch: 16867, sum loss: 3107.464355, avg loss: 2.024407, ppl: 7.571617 +epoch: 2, batch: 16868, sum loss: 4071.040283, avg loss: 2.571725, ppl: 13.088378 +epoch: 2, batch: 16869, sum loss: 4944.996094, avg loss: 2.773414, ppl: 16.013205 +epoch: 2, batch: 16870, sum loss: 3905.068359, avg loss: 2.678373, ppl: 14.561388 +epoch: 2, batch: 16871, sum loss: 4402.505859, avg loss: 2.609666, ppl: 13.594504 +epoch: 2, batch: 16872, sum loss: 4662.540527, avg loss: 2.603317, ppl: 13.508469 +epoch: 2, batch: 16873, sum loss: 3896.224121, avg loss: 2.494382, ppl: 12.114241 +epoch: 2, batch: 16874, sum loss: 5311.391602, avg loss: 2.913545, ppl: 18.421982 +epoch: 2, batch: 16875, sum loss: 4242.551758, avg loss: 2.541972, ppl: 12.704705 +epoch: 2, batch: 16876, sum loss: 3827.099854, avg loss: 2.605242, ppl: 13.534494 +epoch: 2, batch: 16877, sum loss: 2797.690918, avg loss: 1.994078, ppl: 7.345425 +epoch: 2, batch: 16878, sum loss: 4159.670898, avg loss: 2.533295, ppl: 12.594943 +epoch: 2, batch: 16879, sum loss: 4374.038574, avg loss: 2.546006, ppl: 12.756057 +epoch: 2, batch: 16880, sum loss: 4722.842773, avg loss: 2.644369, ppl: 14.074559 +epoch: 2, batch: 16881, sum loss: 3764.356934, avg loss: 2.329429, ppl: 10.272071 +epoch: 2, batch: 16882, sum loss: 4572.137695, avg loss: 2.554267, ppl: 12.861864 +epoch: 2, batch: 16883, sum loss: 4346.966309, avg loss: 2.573692, ppl: 13.114157 +epoch: 2, batch: 16884, sum loss: 5112.444824, avg loss: 2.706429, ppl: 14.975705 +epoch: 2, batch: 16885, sum loss: 4407.532227, avg loss: 2.694091, ppl: 14.792061 +epoch: 2, batch: 16886, sum loss: 3420.732910, avg loss: 2.387113, ppl: 10.882033 +epoch: 2, batch: 16887, sum loss: 3232.351562, avg loss: 2.279515, ppl: 9.771935 +epoch: 2, batch: 16888, sum loss: 4860.446289, avg loss: 2.623015, ppl: 13.777195 +epoch: 2, batch: 16889, sum loss: 4628.039062, avg loss: 2.811688, ppl: 16.637987 +epoch: 2, batch: 16890, sum loss: 4555.385254, avg loss: 2.697090, ppl: 14.836497 +epoch: 2, batch: 16891, sum loss: 4979.749512, avg loss: 2.700515, ppl: 14.887397 +epoch: 2, batch: 16892, sum loss: 4942.295898, avg loss: 2.738114, ppl: 15.457806 +epoch: 2, batch: 16893, sum loss: 4696.726074, avg loss: 2.538771, ppl: 12.664096 +epoch: 2, batch: 16894, sum loss: 4113.006836, avg loss: 2.556250, ppl: 12.887403 +epoch: 2, batch: 16895, sum loss: 3760.736816, avg loss: 2.314300, ppl: 10.117834 +epoch: 2, batch: 16896, sum loss: 3783.018555, avg loss: 2.335197, ppl: 10.331492 +epoch: 2, batch: 16897, sum loss: 4283.154297, avg loss: 2.577109, ppl: 13.159036 +epoch: 2, batch: 16898, sum loss: 4235.030273, avg loss: 2.546621, ppl: 12.763900 +epoch: 2, batch: 16899, sum loss: 4314.905273, avg loss: 2.488411, ppl: 12.042129 +epoch: 2, batch: 16900, sum loss: 4391.324219, avg loss: 2.454625, ppl: 11.642069 +epoch: 2, batch: 16901, sum loss: 4801.386230, avg loss: 2.652700, ppl: 14.192302 +epoch: 2, batch: 16902, sum loss: 4223.494629, avg loss: 2.578446, ppl: 13.176644 +epoch: 2, batch: 16903, sum loss: 3879.682129, avg loss: 2.274139, ppl: 9.719551 +epoch: 2, batch: 16904, sum loss: 5165.687500, avg loss: 2.642295, ppl: 14.045406 +epoch: 2, batch: 16905, sum loss: 5169.711914, avg loss: 2.818818, ppl: 16.757025 +epoch: 2, batch: 16906, sum loss: 3897.642090, avg loss: 2.379513, ppl: 10.799640 +epoch: 2, batch: 16907, sum loss: 5232.917480, avg loss: 2.617768, ppl: 13.705097 +epoch: 2, batch: 16908, sum loss: 3786.111328, avg loss: 2.328482, ppl: 10.262348 +epoch: 2, batch: 16909, sum loss: 4225.210449, avg loss: 2.585808, ppl: 13.274011 +epoch: 2, batch: 16910, sum loss: 4764.118164, avg loss: 2.623413, ppl: 13.782685 +epoch: 2, batch: 16911, sum loss: 3506.860596, avg loss: 2.249429, ppl: 9.482325 +epoch: 2, batch: 16912, sum loss: 3483.301758, avg loss: 2.254564, ppl: 9.531139 +epoch: 2, batch: 16913, sum loss: 3506.816895, avg loss: 2.263923, ppl: 9.620759 +epoch: 2, batch: 16914, sum loss: 4340.254883, avg loss: 2.619345, ppl: 13.726732 +epoch: 2, batch: 16915, sum loss: 4249.946289, avg loss: 2.232115, ppl: 9.319554 +epoch: 2, batch: 16916, sum loss: 4409.863281, avg loss: 2.328333, ppl: 10.260826 +epoch: 2, batch: 16917, sum loss: 4066.821533, avg loss: 2.521278, ppl: 12.444492 +epoch: 2, batch: 16918, sum loss: 4866.417480, avg loss: 2.529323, ppl: 12.545012 +epoch: 2, batch: 16919, sum loss: 4479.625488, avg loss: 2.570066, ppl: 13.066689 +epoch: 2, batch: 16920, sum loss: 4702.770020, avg loss: 2.634605, ppl: 13.937808 +epoch: 2, batch: 16921, sum loss: 3888.717285, avg loss: 2.597673, ppl: 13.432450 +epoch: 2, batch: 16922, sum loss: 4302.586914, avg loss: 2.565645, ppl: 13.009049 +epoch: 2, batch: 16923, sum loss: 4730.074219, avg loss: 2.782397, ppl: 16.157701 +epoch: 2, batch: 16924, sum loss: 4914.098145, avg loss: 2.574174, ppl: 13.120478 +epoch: 2, batch: 16925, sum loss: 4248.972168, avg loss: 2.487689, ppl: 12.033429 +epoch: 2, batch: 16926, sum loss: 3375.313477, avg loss: 2.156750, ppl: 8.643000 +epoch: 2, batch: 16927, sum loss: 3896.045898, avg loss: 2.448803, ppl: 11.574486 +epoch: 2, batch: 16928, sum loss: 4711.687500, avg loss: 2.514241, ppl: 12.357225 +epoch: 2, batch: 16929, sum loss: 4095.530029, avg loss: 2.723092, ppl: 15.227326 +epoch: 2, batch: 16930, sum loss: 3868.555176, avg loss: 2.479843, ppl: 11.939391 +epoch: 2, batch: 16931, sum loss: 3920.881592, avg loss: 2.476868, ppl: 11.903919 +epoch: 2, batch: 16932, sum loss: 3382.638672, avg loss: 2.349055, ppl: 10.475661 +epoch: 2, batch: 16933, sum loss: 5025.232422, avg loss: 2.738546, ppl: 15.464489 +epoch: 2, batch: 16934, sum loss: 4417.206055, avg loss: 2.683600, ppl: 14.637700 +epoch: 2, batch: 16935, sum loss: 4140.189941, avg loss: 2.411293, ppl: 11.148367 +epoch: 2, batch: 16936, sum loss: 3291.305908, avg loss: 2.342566, ppl: 10.407914 +epoch: 2, batch: 16937, sum loss: 4655.836914, avg loss: 2.569446, ppl: 13.058592 +epoch: 2, batch: 16938, sum loss: 3533.148682, avg loss: 2.424948, ppl: 11.301636 +epoch: 2, batch: 16939, sum loss: 5121.861328, avg loss: 2.803427, ppl: 16.501102 +epoch: 2, batch: 16940, sum loss: 4095.696777, avg loss: 2.404989, ppl: 11.078311 +epoch: 2, batch: 16941, sum loss: 2823.331787, avg loss: 2.035567, ppl: 7.656594 +epoch: 2, batch: 16942, sum loss: 4679.872559, avg loss: 2.708260, ppl: 15.003141 +epoch: 2, batch: 16943, sum loss: 4126.377930, avg loss: 2.665619, ppl: 14.376844 +epoch: 2, batch: 16944, sum loss: 4053.314453, avg loss: 2.408386, ppl: 11.116011 +epoch: 2, batch: 16945, sum loss: 4425.843262, avg loss: 2.536300, ppl: 12.632839 +epoch: 2, batch: 16946, sum loss: 3608.778564, avg loss: 2.188465, ppl: 8.921507 +epoch: 2, batch: 16947, sum loss: 3847.111816, avg loss: 2.401443, ppl: 11.039094 +epoch: 2, batch: 16948, sum loss: 4676.887207, avg loss: 2.618638, ppl: 13.717029 +epoch: 2, batch: 16949, sum loss: 4246.879395, avg loss: 2.734629, ppl: 15.404034 +epoch: 2, batch: 16950, sum loss: 4105.985840, avg loss: 2.265997, ppl: 9.640729 +epoch: 2, batch: 16951, sum loss: 5228.322754, avg loss: 2.683944, ppl: 14.642731 +epoch: 2, batch: 16952, sum loss: 4221.819336, avg loss: 2.614130, ppl: 13.655325 +epoch: 2, batch: 16953, sum loss: 3727.346680, avg loss: 2.276937, ppl: 9.746785 +epoch: 2, batch: 16954, sum loss: 5134.610840, avg loss: 2.588009, ppl: 13.303263 +epoch: 2, batch: 16955, sum loss: 3578.516357, avg loss: 2.246401, ppl: 9.453650 +epoch: 2, batch: 16956, sum loss: 4166.294434, avg loss: 2.589369, ppl: 13.321361 +epoch: 2, batch: 16957, sum loss: 3746.954102, avg loss: 2.380530, ppl: 10.810630 +epoch: 2, batch: 16958, sum loss: 4866.133301, avg loss: 2.648957, ppl: 14.139280 +epoch: 2, batch: 16959, sum loss: 3674.520752, avg loss: 2.254307, ppl: 9.528690 +epoch: 2, batch: 16960, sum loss: 4933.372070, avg loss: 2.497910, ppl: 12.157060 +epoch: 2, batch: 16961, sum loss: 3721.880371, avg loss: 2.484566, ppl: 11.995915 +epoch: 2, batch: 16962, sum loss: 4984.272461, avg loss: 2.591925, ppl: 13.355458 +epoch: 2, batch: 16963, sum loss: 3915.911377, avg loss: 2.489454, ppl: 12.054693 +epoch: 2, batch: 16964, sum loss: 5640.780273, avg loss: 2.810553, ppl: 16.619112 +epoch: 2, batch: 16965, sum loss: 3970.265381, avg loss: 2.363253, ppl: 10.625463 +epoch: 2, batch: 16966, sum loss: 3703.803711, avg loss: 2.121308, ppl: 8.342043 +epoch: 2, batch: 16967, sum loss: 4095.742188, avg loss: 2.423516, ppl: 11.285470 +epoch: 2, batch: 16968, sum loss: 3759.701660, avg loss: 2.417815, ppl: 11.221311 +epoch: 2, batch: 16969, sum loss: 4817.702637, avg loss: 2.580451, ppl: 13.203094 +epoch: 2, batch: 16970, sum loss: 4533.108398, avg loss: 2.824367, ppl: 16.850267 +epoch: 2, batch: 16971, sum loss: 4530.385742, avg loss: 2.584362, ppl: 13.254824 +epoch: 2, batch: 16972, sum loss: 4315.106934, avg loss: 2.541288, ppl: 12.696014 +epoch: 2, batch: 16973, sum loss: 5218.479004, avg loss: 2.654364, ppl: 14.215937 +epoch: 2, batch: 16974, sum loss: 3744.725586, avg loss: 2.445935, ppl: 11.541330 +epoch: 2, batch: 16975, sum loss: 4873.188965, avg loss: 2.605983, ppl: 13.544539 +epoch: 2, batch: 16976, sum loss: 4294.047852, avg loss: 2.421911, ppl: 11.267371 +epoch: 2, batch: 16977, sum loss: 5718.046875, avg loss: 3.094181, ppl: 22.069162 +epoch: 2, batch: 16978, sum loss: 5228.530762, avg loss: 2.590947, ppl: 13.342400 +epoch: 2, batch: 16979, sum loss: 3949.806152, avg loss: 2.379401, ppl: 10.798437 +epoch: 2, batch: 16980, sum loss: 4730.983398, avg loss: 2.651897, ppl: 14.180910 +epoch: 2, batch: 16981, sum loss: 4913.798340, avg loss: 2.871887, ppl: 17.670330 +epoch: 2, batch: 16982, sum loss: 4578.062500, avg loss: 2.675665, ppl: 14.522002 +epoch: 2, batch: 16983, sum loss: 4020.093506, avg loss: 2.486143, ppl: 12.014850 +epoch: 2, batch: 16984, sum loss: 5227.735352, avg loss: 2.782190, ppl: 16.154366 +epoch: 2, batch: 16985, sum loss: 3378.369629, avg loss: 2.406246, ppl: 11.092245 +epoch: 2, batch: 16986, sum loss: 4264.139648, avg loss: 2.514233, ppl: 12.357132 +epoch: 2, batch: 16987, sum loss: 4093.846191, avg loss: 2.385691, ppl: 10.866573 +epoch: 2, batch: 16988, sum loss: 5169.986328, avg loss: 2.699732, ppl: 14.875742 +epoch: 2, batch: 16989, sum loss: 4703.596191, avg loss: 2.582974, ppl: 13.236450 +epoch: 2, batch: 16990, sum loss: 4305.704102, avg loss: 2.531278, ppl: 12.569561 +epoch: 2, batch: 16991, sum loss: 3740.734375, avg loss: 2.185008, ppl: 8.890722 +epoch: 2, batch: 16992, sum loss: 4797.059570, avg loss: 2.530095, ppl: 12.554697 +epoch: 2, batch: 16993, sum loss: 4870.312988, avg loss: 2.714779, ppl: 15.101267 +epoch: 2, batch: 16994, sum loss: 4488.984863, avg loss: 2.772690, ppl: 16.001617 +epoch: 2, batch: 16995, sum loss: 4030.063232, avg loss: 2.635751, ppl: 13.953788 +epoch: 2, batch: 16996, sum loss: 4337.277344, avg loss: 2.659275, ppl: 14.285926 +epoch: 2, batch: 16997, sum loss: 4855.465332, avg loss: 2.617501, ppl: 13.701447 +epoch: 2, batch: 16998, sum loss: 3687.081543, avg loss: 2.278790, ppl: 9.764853 +epoch: 2, batch: 16999, sum loss: 4221.766602, avg loss: 2.630384, ppl: 13.879102 +epoch: 2, batch: 17000, sum loss: 3464.617188, avg loss: 2.262977, ppl: 9.611657 +epoch: 2, batch: 17001, sum loss: 4598.560059, avg loss: 2.434389, ppl: 11.408842 +epoch: 2, batch: 17002, sum loss: 3853.479248, avg loss: 2.377223, ppl: 10.774944 +epoch: 2, batch: 17003, sum loss: 3896.407715, avg loss: 2.700213, ppl: 14.882904 +epoch: 2, batch: 17004, sum loss: 4254.597168, avg loss: 2.795399, ppl: 16.369162 +epoch: 2, batch: 17005, sum loss: 4684.408691, avg loss: 2.450005, ppl: 11.588400 +epoch: 2, batch: 17006, sum loss: 5178.720703, avg loss: 2.608927, ppl: 13.584471 +epoch: 2, batch: 17007, sum loss: 3527.706299, avg loss: 2.286265, ppl: 9.838120 +epoch: 2, batch: 17008, sum loss: 4169.171875, avg loss: 2.418313, ppl: 11.226907 +epoch: 2, batch: 17009, sum loss: 5345.422852, avg loss: 2.786978, ppl: 16.231886 +epoch: 2, batch: 17010, sum loss: 4814.279297, avg loss: 2.747876, ppl: 15.609448 +epoch: 2, batch: 17011, sum loss: 3691.230469, avg loss: 2.414147, ppl: 11.180228 +epoch: 2, batch: 17012, sum loss: 4559.871094, avg loss: 2.637288, ppl: 13.975252 +epoch: 2, batch: 17013, sum loss: 4767.236816, avg loss: 2.633833, ppl: 13.927046 +epoch: 2, batch: 17014, sum loss: 5880.188477, avg loss: 2.792112, ppl: 16.315443 +epoch: 2, batch: 17015, sum loss: 4741.468262, avg loss: 2.785821, ppl: 16.213131 +epoch: 2, batch: 17016, sum loss: 5225.409668, avg loss: 2.924124, ppl: 18.617910 +epoch: 2, batch: 17017, sum loss: 5300.291504, avg loss: 2.689138, ppl: 14.718986 +epoch: 2, batch: 17018, sum loss: 4718.051758, avg loss: 2.661056, ppl: 14.311391 +epoch: 2, batch: 17019, sum loss: 5318.050293, avg loss: 2.754040, ppl: 15.705949 +epoch: 2, batch: 17020, sum loss: 4306.656250, avg loss: 2.665010, ppl: 14.368093 +epoch: 2, batch: 17021, sum loss: 4521.382812, avg loss: 2.678545, ppl: 14.563881 +epoch: 2, batch: 17022, sum loss: 4305.112793, avg loss: 2.465700, ppl: 11.771724 +epoch: 2, batch: 17023, sum loss: 4973.063965, avg loss: 2.836888, ppl: 17.062576 +epoch: 2, batch: 17024, sum loss: 3831.413574, avg loss: 2.255099, ppl: 9.536241 +epoch: 2, batch: 17025, sum loss: 3053.337158, avg loss: 2.235240, ppl: 9.348721 +epoch: 2, batch: 17026, sum loss: 4238.133789, avg loss: 2.536286, ppl: 12.632668 +epoch: 2, batch: 17027, sum loss: 4252.609375, avg loss: 2.613773, ppl: 13.650462 +epoch: 2, batch: 17028, sum loss: 3639.608643, avg loss: 2.352688, ppl: 10.513796 +epoch: 2, batch: 17029, sum loss: 3792.352295, avg loss: 2.730275, ppl: 15.337107 +epoch: 2, batch: 17030, sum loss: 4489.197754, avg loss: 2.632961, ppl: 13.914905 +epoch: 2, batch: 17031, sum loss: 4607.368652, avg loss: 2.474419, ppl: 11.874810 +epoch: 2, batch: 17032, sum loss: 4996.780762, avg loss: 2.763706, ppl: 15.858509 +epoch: 2, batch: 17033, sum loss: 4233.649414, avg loss: 2.562742, ppl: 12.971336 +epoch: 2, batch: 17034, sum loss: 3914.305176, avg loss: 2.452572, ppl: 11.618192 +epoch: 2, batch: 17035, sum loss: 4688.089355, avg loss: 2.830972, ppl: 16.961935 +epoch: 2, batch: 17036, sum loss: 4168.880859, avg loss: 2.349989, ppl: 10.485456 +epoch: 2, batch: 17037, sum loss: 3781.318359, avg loss: 2.460194, ppl: 11.707084 +epoch: 2, batch: 17038, sum loss: 3783.527588, avg loss: 2.298620, ppl: 9.960423 +epoch: 2, batch: 17039, sum loss: 4260.887207, avg loss: 2.582356, ppl: 13.228267 +epoch: 2, batch: 17040, sum loss: 3505.934082, avg loss: 2.288469, ppl: 9.859829 +epoch: 2, batch: 17041, sum loss: 3690.895996, avg loss: 2.540190, ppl: 12.682080 +epoch: 2, batch: 17042, sum loss: 5222.070801, avg loss: 2.869270, ppl: 17.624146 +epoch: 2, batch: 17043, sum loss: 3470.208252, avg loss: 2.452444, ppl: 11.616701 +epoch: 2, batch: 17044, sum loss: 4412.231445, avg loss: 2.476000, ppl: 11.893593 +epoch: 2, batch: 17045, sum loss: 4006.962646, avg loss: 2.583470, ppl: 13.243016 +epoch: 2, batch: 17046, sum loss: 4359.315918, avg loss: 2.632437, ppl: 13.907624 +epoch: 2, batch: 17047, sum loss: 4464.497559, avg loss: 2.565803, ppl: 13.011106 +epoch: 2, batch: 17048, sum loss: 4590.209473, avg loss: 2.706491, ppl: 14.976637 +epoch: 2, batch: 17049, sum loss: 4191.698730, avg loss: 2.532748, ppl: 12.588057 +epoch: 2, batch: 17050, sum loss: 5571.986816, avg loss: 2.903589, ppl: 18.239485 +epoch: 2, batch: 17051, sum loss: 4517.325195, avg loss: 2.618739, ppl: 13.718419 +epoch: 2, batch: 17052, sum loss: 3422.353516, avg loss: 2.298424, ppl: 9.958475 +epoch: 2, batch: 17053, sum loss: 3977.263428, avg loss: 2.346468, ppl: 10.448600 +epoch: 2, batch: 17054, sum loss: 3735.398926, avg loss: 2.411491, ppl: 11.150571 +epoch: 2, batch: 17055, sum loss: 4847.408203, avg loss: 2.630173, ppl: 13.876166 +epoch: 2, batch: 17056, sum loss: 4374.368164, avg loss: 2.537337, ppl: 12.645945 +epoch: 2, batch: 17057, sum loss: 3620.741699, avg loss: 2.466445, ppl: 11.780495 +epoch: 2, batch: 17058, sum loss: 4882.354980, avg loss: 2.442399, ppl: 11.500595 +epoch: 2, batch: 17059, sum loss: 4553.944336, avg loss: 2.624752, ppl: 13.801148 +epoch: 2, batch: 17060, sum loss: 4066.327637, avg loss: 2.456996, ppl: 11.669700 +epoch: 2, batch: 17061, sum loss: 4317.711426, avg loss: 2.472916, ppl: 11.856973 +epoch: 2, batch: 17062, sum loss: 3629.303467, avg loss: 2.339976, ppl: 10.380991 +epoch: 2, batch: 17063, sum loss: 3665.832031, avg loss: 2.307006, ppl: 10.044303 +epoch: 2, batch: 17064, sum loss: 4499.369141, avg loss: 2.368089, ppl: 10.676971 +epoch: 2, batch: 17065, sum loss: 4728.033691, avg loss: 2.663681, ppl: 14.349008 +epoch: 2, batch: 17066, sum loss: 4389.009766, avg loss: 2.516634, ppl: 12.386832 +epoch: 2, batch: 17067, sum loss: 3529.794922, avg loss: 2.432664, ppl: 11.389179 +epoch: 2, batch: 17068, sum loss: 3757.368652, avg loss: 2.545643, ppl: 12.751423 +epoch: 2, batch: 17069, sum loss: 4127.490723, avg loss: 2.487939, ppl: 12.036442 +epoch: 2, batch: 17070, sum loss: 4795.634277, avg loss: 2.695691, ppl: 14.815751 +epoch: 2, batch: 17071, sum loss: 4993.373047, avg loss: 2.547639, ppl: 12.776903 +epoch: 2, batch: 17072, sum loss: 3989.088867, avg loss: 2.361805, ppl: 10.610087 +epoch: 2, batch: 17073, sum loss: 5316.482910, avg loss: 2.681030, ppl: 14.600127 +epoch: 2, batch: 17074, sum loss: 5610.311523, avg loss: 2.865328, ppl: 17.554808 +epoch: 2, batch: 17075, sum loss: 3746.057617, avg loss: 2.248534, ppl: 9.473837 +epoch: 2, batch: 17076, sum loss: 4405.526855, avg loss: 2.550971, ppl: 12.819546 +epoch: 2, batch: 17077, sum loss: 5436.213379, avg loss: 2.792098, ppl: 16.315207 +epoch: 2, batch: 17078, sum loss: 4361.629883, avg loss: 2.772810, ppl: 16.003536 +epoch: 2, batch: 17079, sum loss: 4830.282715, avg loss: 2.656921, ppl: 14.252340 +epoch: 2, batch: 17080, sum loss: 5340.712402, avg loss: 2.537156, ppl: 12.643657 +epoch: 2, batch: 17081, sum loss: 4571.489746, avg loss: 2.630316, ppl: 13.878159 +epoch: 2, batch: 17082, sum loss: 3414.207520, avg loss: 2.389229, ppl: 10.905078 +epoch: 2, batch: 17083, sum loss: 4544.550781, avg loss: 2.660744, ppl: 14.306929 +epoch: 2, batch: 17084, sum loss: 5363.439453, avg loss: 2.728097, ppl: 15.303743 +epoch: 2, batch: 17085, sum loss: 3726.721191, avg loss: 2.445355, ppl: 11.534646 +epoch: 2, batch: 17086, sum loss: 3424.485596, avg loss: 2.307605, ppl: 10.050323 +epoch: 2, batch: 17087, sum loss: 3997.053467, avg loss: 2.521800, ppl: 12.450992 +epoch: 2, batch: 17088, sum loss: 5288.377930, avg loss: 2.758674, ppl: 15.778909 +epoch: 2, batch: 17089, sum loss: 4705.520020, avg loss: 2.702769, ppl: 14.920984 +epoch: 2, batch: 17090, sum loss: 4252.222656, avg loss: 2.600748, ppl: 13.473810 +epoch: 2, batch: 17091, sum loss: 4367.621094, avg loss: 2.558653, ppl: 12.918409 +epoch: 2, batch: 17092, sum loss: 3968.583740, avg loss: 2.287368, ppl: 9.848982 +epoch: 2, batch: 17093, sum loss: 3795.098145, avg loss: 2.543632, ppl: 12.725801 +epoch: 2, batch: 17094, sum loss: 4483.487305, avg loss: 2.652951, ppl: 14.195868 +epoch: 2, batch: 17095, sum loss: 4572.881348, avg loss: 2.483912, ppl: 11.988068 +epoch: 2, batch: 17096, sum loss: 4484.382324, avg loss: 2.741065, ppl: 15.503488 +epoch: 2, batch: 17097, sum loss: 4434.686523, avg loss: 2.641267, ppl: 14.030964 +epoch: 2, batch: 17098, sum loss: 3538.239502, avg loss: 2.308049, ppl: 10.054790 +epoch: 2, batch: 17099, sum loss: 5768.562012, avg loss: 2.952181, ppl: 19.147671 +epoch: 2, batch: 17100, sum loss: 4118.279297, avg loss: 2.402730, ppl: 11.053310 +epoch: 2, batch: 17101, sum loss: 4836.250000, avg loss: 2.671962, ppl: 14.468322 +epoch: 2, batch: 17102, sum loss: 3538.228027, avg loss: 2.402056, ppl: 11.045860 +epoch: 2, batch: 17103, sum loss: 5015.788574, avg loss: 2.987367, ppl: 19.833385 +epoch: 2, batch: 17104, sum loss: 4826.441895, avg loss: 2.500747, ppl: 12.191600 +epoch: 2, batch: 17105, sum loss: 4453.622559, avg loss: 2.596864, ppl: 13.421588 +epoch: 2, batch: 17106, sum loss: 4303.092773, avg loss: 2.339909, ppl: 10.380293 +epoch: 2, batch: 17107, sum loss: 4270.460449, avg loss: 2.563302, ppl: 12.978597 +epoch: 2, batch: 17108, sum loss: 4150.769531, avg loss: 2.662457, ppl: 14.331451 +epoch: 2, batch: 17109, sum loss: 4219.720703, avg loss: 2.540470, ppl: 12.685634 +epoch: 2, batch: 17110, sum loss: 3767.133301, avg loss: 2.667941, ppl: 14.410273 +epoch: 2, batch: 17111, sum loss: 4649.027832, avg loss: 2.717141, ppl: 15.136982 +epoch: 2, batch: 17112, sum loss: 4381.422363, avg loss: 2.325596, ppl: 10.232776 +epoch: 2, batch: 17113, sum loss: 4125.705078, avg loss: 2.476413, ppl: 11.898514 +epoch: 2, batch: 17114, sum loss: 4406.176758, avg loss: 2.806482, ppl: 16.551588 +epoch: 2, batch: 17115, sum loss: 5878.881836, avg loss: 2.981177, ppl: 19.711010 +epoch: 2, batch: 17116, sum loss: 4546.417969, avg loss: 2.454869, ppl: 11.644911 +epoch: 2, batch: 17117, sum loss: 4461.356934, avg loss: 2.663497, ppl: 14.346371 +epoch: 2, batch: 17118, sum loss: 4071.083496, avg loss: 2.640132, ppl: 14.015053 +epoch: 2, batch: 17119, sum loss: 4421.403320, avg loss: 2.606960, ppl: 13.557767 +epoch: 2, batch: 17120, sum loss: 3776.433350, avg loss: 2.452229, ppl: 11.614212 +epoch: 2, batch: 17121, sum loss: 3976.457031, avg loss: 2.465255, ppl: 11.766488 +epoch: 2, batch: 17122, sum loss: 4238.063965, avg loss: 2.452583, ppl: 11.618321 +epoch: 2, batch: 17123, sum loss: 5949.976562, avg loss: 2.966090, ppl: 19.415854 +epoch: 2, batch: 17124, sum loss: 4514.738281, avg loss: 2.612696, ppl: 13.635759 +epoch: 2, batch: 17125, sum loss: 3847.582275, avg loss: 2.359033, ppl: 10.580711 +epoch: 2, batch: 17126, sum loss: 5067.433105, avg loss: 2.688293, ppl: 14.706557 +epoch: 2, batch: 17127, sum loss: 4731.842773, avg loss: 2.634656, ppl: 13.938522 +epoch: 2, batch: 17128, sum loss: 4324.526367, avg loss: 2.635299, ppl: 13.947489 +epoch: 2, batch: 17129, sum loss: 5197.106445, avg loss: 2.560151, ppl: 12.937772 +epoch: 2, batch: 17130, sum loss: 3499.857666, avg loss: 2.498114, ppl: 12.159538 +epoch: 2, batch: 17131, sum loss: 3587.280273, avg loss: 2.261841, ppl: 9.600751 +epoch: 2, batch: 17132, sum loss: 4162.366211, avg loss: 2.624443, ppl: 13.796885 +epoch: 2, batch: 17133, sum loss: 3833.981689, avg loss: 2.182118, ppl: 8.865065 +epoch: 2, batch: 17134, sum loss: 5193.416992, avg loss: 2.581221, ppl: 13.213266 +epoch: 2, batch: 17135, sum loss: 4283.297852, avg loss: 2.512198, ppl: 12.332008 +epoch: 2, batch: 17136, sum loss: 4649.673828, avg loss: 2.676842, ppl: 14.539103 +epoch: 2, batch: 17137, sum loss: 5012.803223, avg loss: 2.722870, ppl: 15.223950 +epoch: 2, batch: 17138, sum loss: 4598.599609, avg loss: 2.457830, ppl: 11.679440 +epoch: 2, batch: 17139, sum loss: 4267.867188, avg loss: 2.506088, ppl: 12.256882 +epoch: 2, batch: 17140, sum loss: 4154.793945, avg loss: 2.272863, ppl: 9.707154 +epoch: 2, batch: 17141, sum loss: 4296.283203, avg loss: 2.518337, ppl: 12.407946 +epoch: 2, batch: 17142, sum loss: 4670.751465, avg loss: 2.638843, ppl: 13.996994 +epoch: 2, batch: 17143, sum loss: 4359.958496, avg loss: 2.524585, ppl: 12.485716 +epoch: 2, batch: 17144, sum loss: 4527.359375, avg loss: 2.609429, ppl: 13.591290 +epoch: 2, batch: 17145, sum loss: 4064.213867, avg loss: 2.402018, ppl: 11.045439 +epoch: 2, batch: 17146, sum loss: 3722.565918, avg loss: 2.390858, ppl: 10.922861 +epoch: 2, batch: 17147, sum loss: 3165.034424, avg loss: 2.193371, ppl: 8.965385 +epoch: 2, batch: 17148, sum loss: 4733.099609, avg loss: 2.645668, ppl: 14.092853 +epoch: 2, batch: 17149, sum loss: 5418.648926, avg loss: 2.706618, ppl: 14.978530 +epoch: 2, batch: 17150, sum loss: 4975.606445, avg loss: 2.662176, ppl: 14.327427 +epoch: 2, batch: 17151, sum loss: 4745.694336, avg loss: 2.598957, ppl: 13.449697 +epoch: 2, batch: 17152, sum loss: 4476.806641, avg loss: 2.571400, ppl: 13.084126 +epoch: 2, batch: 17153, sum loss: 4934.967773, avg loss: 2.538564, ppl: 12.661475 +epoch: 2, batch: 17154, sum loss: 3894.917480, avg loss: 2.520982, ppl: 12.440811 +epoch: 2, batch: 17155, sum loss: 5141.486328, avg loss: 2.861150, ppl: 17.481619 +epoch: 2, batch: 17156, sum loss: 4590.252930, avg loss: 2.620008, ppl: 13.735826 +epoch: 2, batch: 17157, sum loss: 3667.904297, avg loss: 2.324401, ppl: 10.220552 +epoch: 2, batch: 17158, sum loss: 4154.362793, avg loss: 2.553388, ppl: 12.850573 +epoch: 2, batch: 17159, sum loss: 3848.211670, avg loss: 2.691057, ppl: 14.747258 +epoch: 2, batch: 17160, sum loss: 4079.617432, avg loss: 2.416835, ppl: 11.210323 +epoch: 2, batch: 17161, sum loss: 3950.646729, avg loss: 2.521153, ppl: 12.442938 +epoch: 2, batch: 17162, sum loss: 5291.533203, avg loss: 2.863384, ppl: 17.520712 +epoch: 2, batch: 17163, sum loss: 4697.257812, avg loss: 2.543183, ppl: 12.720090 +epoch: 2, batch: 17164, sum loss: 4250.690430, avg loss: 2.452793, ppl: 11.620757 +epoch: 2, batch: 17165, sum loss: 4726.608398, avg loss: 2.685573, ppl: 14.666601 +epoch: 2, batch: 17166, sum loss: 3399.561523, avg loss: 2.281585, ppl: 9.792189 +epoch: 2, batch: 17167, sum loss: 4907.745605, avg loss: 2.617464, ppl: 13.700938 +epoch: 2, batch: 17168, sum loss: 4438.639160, avg loss: 2.607896, ppl: 13.570470 +epoch: 2, batch: 17169, sum loss: 2959.882080, avg loss: 2.100697, ppl: 8.171865 +epoch: 2, batch: 17170, sum loss: 3626.158203, avg loss: 2.537549, ppl: 12.648638 +epoch: 2, batch: 17171, sum loss: 3720.743408, avg loss: 2.388154, ppl: 10.893364 +epoch: 2, batch: 17172, sum loss: 4708.828613, avg loss: 2.715587, ppl: 15.113484 +epoch: 2, batch: 17173, sum loss: 4730.787109, avg loss: 2.512367, ppl: 12.334090 +epoch: 2, batch: 17174, sum loss: 4158.665039, avg loss: 2.508242, ppl: 12.283316 +epoch: 2, batch: 17175, sum loss: 4646.948730, avg loss: 2.678357, ppl: 14.561145 +epoch: 2, batch: 17176, sum loss: 4722.738770, avg loss: 2.731486, ppl: 15.355686 +epoch: 2, batch: 17177, sum loss: 5206.857910, avg loss: 2.840621, ppl: 17.126394 +epoch: 2, batch: 17178, sum loss: 4934.128906, avg loss: 2.431803, ppl: 11.379386 +epoch: 2, batch: 17179, sum loss: 3404.391602, avg loss: 2.333373, ppl: 10.312671 +epoch: 2, batch: 17180, sum loss: 4605.597656, avg loss: 2.696486, ppl: 14.827529 +epoch: 2, batch: 17181, sum loss: 3644.339355, avg loss: 2.266380, ppl: 9.644425 +epoch: 2, batch: 17182, sum loss: 3894.584961, avg loss: 2.620851, ppl: 13.747421 +epoch: 2, batch: 17183, sum loss: 3408.876465, avg loss: 2.360718, ppl: 10.598557 +epoch: 2, batch: 17184, sum loss: 4448.648438, avg loss: 2.583420, ppl: 13.242344 +epoch: 2, batch: 17185, sum loss: 4353.784668, avg loss: 2.549054, ppl: 12.794996 +epoch: 2, batch: 17186, sum loss: 4478.563965, avg loss: 2.559179, ppl: 12.925205 +epoch: 2, batch: 17187, sum loss: 3977.443848, avg loss: 2.364711, ppl: 10.640964 +epoch: 2, batch: 17188, sum loss: 4561.154785, avg loss: 2.654921, ppl: 14.223866 +epoch: 2, batch: 17189, sum loss: 4714.084473, avg loss: 2.721758, ppl: 15.207027 +epoch: 2, batch: 17190, sum loss: 4273.113281, avg loss: 2.427905, ppl: 11.335111 +epoch: 2, batch: 17191, sum loss: 4032.105469, avg loss: 2.490491, ppl: 12.067204 +epoch: 2, batch: 17192, sum loss: 3170.063965, avg loss: 2.180237, ppl: 8.848399 +epoch: 2, batch: 17193, sum loss: 4440.603027, avg loss: 2.729320, ppl: 15.322457 +epoch: 2, batch: 17194, sum loss: 3993.981689, avg loss: 2.473054, ppl: 11.858604 +epoch: 2, batch: 17195, sum loss: 3302.244141, avg loss: 2.009887, ppl: 7.462472 +epoch: 2, batch: 17196, sum loss: 3746.445068, avg loss: 2.398492, ppl: 11.006570 +epoch: 2, batch: 17197, sum loss: 4533.405762, avg loss: 2.534045, ppl: 12.604382 +epoch: 2, batch: 17198, sum loss: 3969.248535, avg loss: 2.423229, ppl: 11.282226 +epoch: 2, batch: 17199, sum loss: 3688.372070, avg loss: 2.283822, ppl: 9.814116 +epoch: 2, batch: 17200, sum loss: 4738.849609, avg loss: 2.622495, ppl: 13.770042 +epoch: 2, batch: 17201, sum loss: 4670.708008, avg loss: 2.507090, ppl: 12.269170 +epoch: 2, batch: 17202, sum loss: 3908.789795, avg loss: 2.464559, ppl: 11.758291 +epoch: 2, batch: 17203, sum loss: 4039.827637, avg loss: 2.354212, ppl: 10.529829 +epoch: 2, batch: 17204, sum loss: 4169.057617, avg loss: 2.330384, ppl: 10.281892 +epoch: 2, batch: 17205, sum loss: 4483.991211, avg loss: 2.650113, ppl: 14.155640 +epoch: 2, batch: 17206, sum loss: 4731.118652, avg loss: 2.779741, ppl: 16.114840 +epoch: 2, batch: 17207, sum loss: 4664.808594, avg loss: 2.688651, ppl: 14.711810 +epoch: 2, batch: 17208, sum loss: 5113.437012, avg loss: 2.526402, ppl: 12.508417 +epoch: 2, batch: 17209, sum loss: 4628.190430, avg loss: 2.533219, ppl: 12.593976 +epoch: 2, batch: 17210, sum loss: 4855.224121, avg loss: 2.582566, ppl: 13.231046 +epoch: 2, batch: 17211, sum loss: 4731.998535, avg loss: 2.639152, ppl: 14.001320 +epoch: 2, batch: 17212, sum loss: 4747.672852, avg loss: 2.691425, ppl: 14.752678 +epoch: 2, batch: 17213, sum loss: 4668.437988, avg loss: 2.595018, ppl: 13.396833 +epoch: 2, batch: 17214, sum loss: 4390.915039, avg loss: 2.433988, ppl: 11.404276 +epoch: 2, batch: 17215, sum loss: 4039.045410, avg loss: 2.569367, ppl: 13.057562 +epoch: 2, batch: 17216, sum loss: 4438.626953, avg loss: 2.729783, ppl: 15.329557 +epoch: 2, batch: 17217, sum loss: 4743.617188, avg loss: 2.520519, ppl: 12.435052 +epoch: 2, batch: 17218, sum loss: 4196.841309, avg loss: 2.417535, ppl: 11.218176 +epoch: 2, batch: 17219, sum loss: 4397.740234, avg loss: 2.478997, ppl: 11.929288 +epoch: 2, batch: 17220, sum loss: 4542.272461, avg loss: 2.710186, ppl: 15.032079 +epoch: 2, batch: 17221, sum loss: 4332.022949, avg loss: 2.793051, ppl: 16.330776 +epoch: 2, batch: 17222, sum loss: 3947.632812, avg loss: 2.581840, ppl: 13.221440 +epoch: 2, batch: 17223, sum loss: 4391.355469, avg loss: 2.581632, ppl: 13.218689 +epoch: 2, batch: 17224, sum loss: 5179.138672, avg loss: 2.784483, ppl: 16.191448 +epoch: 2, batch: 17225, sum loss: 4588.336426, avg loss: 2.482866, ppl: 11.975537 +epoch: 2, batch: 17226, sum loss: 4189.319336, avg loss: 2.337790, ppl: 10.358319 +epoch: 2, batch: 17227, sum loss: 4798.785156, avg loss: 2.617995, ppl: 13.708215 +epoch: 2, batch: 17228, sum loss: 4173.965820, avg loss: 2.382401, ppl: 10.830873 +epoch: 2, batch: 17229, sum loss: 4280.373047, avg loss: 2.414198, ppl: 11.180801 +epoch: 2, batch: 17230, sum loss: 4338.577148, avg loss: 2.573296, ppl: 13.108961 +epoch: 2, batch: 17231, sum loss: 4618.503418, avg loss: 2.500543, ppl: 12.189116 +epoch: 2, batch: 17232, sum loss: 4122.863770, avg loss: 2.607757, ppl: 13.568581 +epoch: 2, batch: 17233, sum loss: 3782.374268, avg loss: 2.333359, ppl: 10.312518 +epoch: 2, batch: 17234, sum loss: 3404.104248, avg loss: 2.187728, ppl: 8.914932 +epoch: 2, batch: 17235, sum loss: 3885.004395, avg loss: 2.554244, ppl: 12.861567 +epoch: 2, batch: 17236, sum loss: 4074.136475, avg loss: 2.436685, ppl: 11.435066 +epoch: 2, batch: 17237, sum loss: 3995.810059, avg loss: 2.384135, ppl: 10.849673 +epoch: 2, batch: 17238, sum loss: 3911.440674, avg loss: 2.350626, ppl: 10.492130 +epoch: 2, batch: 17239, sum loss: 4910.891113, avg loss: 2.685014, ppl: 14.658414 +epoch: 2, batch: 17240, sum loss: 5257.441895, avg loss: 2.772913, ppl: 16.005182 +epoch: 2, batch: 17241, sum loss: 5053.055664, avg loss: 2.600646, ppl: 13.472442 +epoch: 2, batch: 17242, sum loss: 3034.219482, avg loss: 2.205101, ppl: 9.071170 +epoch: 2, batch: 17243, sum loss: 4748.910645, avg loss: 2.737124, ppl: 15.442512 +epoch: 2, batch: 17244, sum loss: 4405.684570, avg loss: 2.426038, ppl: 11.313962 +epoch: 2, batch: 17245, sum loss: 3296.240723, avg loss: 2.237774, ppl: 9.372444 +epoch: 2, batch: 17246, sum loss: 4447.920898, avg loss: 2.572540, ppl: 13.099049 +epoch: 2, batch: 17247, sum loss: 4423.897949, avg loss: 2.694213, ppl: 14.793874 +epoch: 2, batch: 17248, sum loss: 4338.791992, avg loss: 2.555237, ppl: 12.874348 +epoch: 2, batch: 17249, sum loss: 4427.116211, avg loss: 2.618046, ppl: 13.708914 +epoch: 2, batch: 17250, sum loss: 4356.472656, avg loss: 2.489413, ppl: 12.054198 +epoch: 2, batch: 17251, sum loss: 4689.367188, avg loss: 2.707487, ppl: 14.991552 +epoch: 2, batch: 17252, sum loss: 5159.058594, avg loss: 2.741264, ppl: 15.506571 +epoch: 2, batch: 17253, sum loss: 3600.302734, avg loss: 2.245978, ppl: 9.449652 +epoch: 2, batch: 17254, sum loss: 3624.902588, avg loss: 2.259914, ppl: 9.582269 +epoch: 2, batch: 17255, sum loss: 4091.050537, avg loss: 2.590912, ppl: 13.341938 +epoch: 2, batch: 17256, sum loss: 5478.253418, avg loss: 2.970853, ppl: 19.508560 +epoch: 2, batch: 17257, sum loss: 5079.253418, avg loss: 2.534558, ppl: 12.610850 +epoch: 2, batch: 17258, sum loss: 4460.235352, avg loss: 2.661238, ppl: 14.314005 +epoch: 2, batch: 17259, sum loss: 3980.119873, avg loss: 2.683830, ppl: 14.641062 +epoch: 2, batch: 17260, sum loss: 4172.361328, avg loss: 2.630745, ppl: 13.884109 +epoch: 2, batch: 17261, sum loss: 3359.985352, avg loss: 2.362859, ppl: 10.621272 +epoch: 2, batch: 17262, sum loss: 4860.289551, avg loss: 2.613059, ppl: 13.640715 +epoch: 2, batch: 17263, sum loss: 3265.099609, avg loss: 2.273746, ppl: 9.715731 +epoch: 2, batch: 17264, sum loss: 4803.191895, avg loss: 2.681849, ppl: 14.612086 +epoch: 2, batch: 17265, sum loss: 3875.705811, avg loss: 2.390935, ppl: 10.923702 +epoch: 2, batch: 17266, sum loss: 5054.326172, avg loss: 2.613405, ppl: 13.645441 +epoch: 2, batch: 17267, sum loss: 4498.873047, avg loss: 2.288338, ppl: 9.858541 +epoch: 2, batch: 17268, sum loss: 4006.920166, avg loss: 2.397918, ppl: 11.000247 +epoch: 2, batch: 17269, sum loss: 3976.907959, avg loss: 2.473202, ppl: 11.860357 +epoch: 2, batch: 17270, sum loss: 4335.231445, avg loss: 2.610013, ppl: 13.599228 +epoch: 2, batch: 17271, sum loss: 4162.953125, avg loss: 2.549267, ppl: 12.797720 +epoch: 2, batch: 17272, sum loss: 4056.061035, avg loss: 2.572011, ppl: 13.092123 +epoch: 2, batch: 17273, sum loss: 5217.071289, avg loss: 2.578878, ppl: 13.182344 +epoch: 2, batch: 17274, sum loss: 3863.402832, avg loss: 2.420678, ppl: 11.253490 +epoch: 2, batch: 17275, sum loss: 3869.095459, avg loss: 2.342067, ppl: 10.402721 +epoch: 2, batch: 17276, sum loss: 3456.986816, avg loss: 2.237532, ppl: 9.370178 +epoch: 2, batch: 17277, sum loss: 4205.526367, avg loss: 2.494381, ppl: 12.114232 +epoch: 2, batch: 17278, sum loss: 4743.676270, avg loss: 2.647141, ppl: 14.113629 +epoch: 2, batch: 17279, sum loss: 3837.552246, avg loss: 2.332858, ppl: 10.307362 +epoch: 2, batch: 17280, sum loss: 4478.610352, avg loss: 2.616011, ppl: 13.681036 +epoch: 2, batch: 17281, sum loss: 3910.267090, avg loss: 2.460835, ppl: 11.714592 +epoch: 2, batch: 17282, sum loss: 4136.000000, avg loss: 2.497585, ppl: 12.153104 +epoch: 2, batch: 17283, sum loss: 4949.916992, avg loss: 2.669858, ppl: 14.437919 +epoch: 2, batch: 17284, sum loss: 5239.749512, avg loss: 2.609437, ppl: 13.591396 +epoch: 2, batch: 17285, sum loss: 4920.225098, avg loss: 2.703420, ppl: 14.930714 +epoch: 2, batch: 17286, sum loss: 4934.534180, avg loss: 2.619180, ppl: 13.724462 +epoch: 2, batch: 17287, sum loss: 4880.408203, avg loss: 2.758852, ppl: 15.781708 +epoch: 2, batch: 17288, sum loss: 4012.617676, avg loss: 2.457206, ppl: 11.672154 +epoch: 2, batch: 17289, sum loss: 3215.990723, avg loss: 2.230229, ppl: 9.301999 +epoch: 2, batch: 17290, sum loss: 4178.873535, avg loss: 2.708278, ppl: 15.003424 +epoch: 2, batch: 17291, sum loss: 4800.783203, avg loss: 2.659714, ppl: 14.292197 +epoch: 2, batch: 17292, sum loss: 3921.444336, avg loss: 2.285224, ppl: 9.827887 +epoch: 2, batch: 17293, sum loss: 5163.190918, avg loss: 2.716039, ppl: 15.120318 +epoch: 2, batch: 17294, sum loss: 4733.007324, avg loss: 2.630910, ppl: 13.886403 +epoch: 2, batch: 17295, sum loss: 3513.726318, avg loss: 2.150383, ppl: 8.588149 +epoch: 2, batch: 17296, sum loss: 3622.393799, avg loss: 2.487908, ppl: 12.036069 +epoch: 2, batch: 17297, sum loss: 4933.429688, avg loss: 2.838567, ppl: 17.091261 +epoch: 2, batch: 17298, sum loss: 5024.063477, avg loss: 2.929483, ppl: 18.717949 +epoch: 2, batch: 17299, sum loss: 4119.141602, avg loss: 2.625329, ppl: 13.809120 +epoch: 2, batch: 17300, sum loss: 4404.927734, avg loss: 2.409698, ppl: 11.130599 +epoch: 2, batch: 17301, sum loss: 4735.042480, avg loss: 2.649716, ppl: 14.150022 +epoch: 2, batch: 17302, sum loss: 4156.193359, avg loss: 2.412184, ppl: 11.158307 +epoch: 2, batch: 17303, sum loss: 5264.383301, avg loss: 2.803186, ppl: 16.497122 +epoch: 2, batch: 17304, sum loss: 4084.666504, avg loss: 2.276849, ppl: 9.745920 +epoch: 2, batch: 17305, sum loss: 4258.825684, avg loss: 2.421163, ppl: 11.258946 +epoch: 2, batch: 17306, sum loss: 4484.206543, avg loss: 2.571219, ppl: 13.081764 +epoch: 2, batch: 17307, sum loss: 4278.310547, avg loss: 2.524077, ppl: 12.479371 +epoch: 2, batch: 17308, sum loss: 4615.858398, avg loss: 2.543173, ppl: 12.719965 +epoch: 2, batch: 17309, sum loss: 4296.303711, avg loss: 2.637387, ppl: 13.976635 +epoch: 2, batch: 17310, sum loss: 3729.105225, avg loss: 2.445315, ppl: 11.534181 +epoch: 2, batch: 17311, sum loss: 4730.034668, avg loss: 2.770963, ppl: 15.974016 +epoch: 2, batch: 17312, sum loss: 3818.262207, avg loss: 2.252662, ppl: 9.513025 +epoch: 2, batch: 17313, sum loss: 5112.724609, avg loss: 2.708011, ppl: 14.999411 +epoch: 2, batch: 17314, sum loss: 4533.211914, avg loss: 2.591888, ppl: 13.354961 +epoch: 2, batch: 17315, sum loss: 3985.553467, avg loss: 2.286606, ppl: 9.841475 +epoch: 2, batch: 17316, sum loss: 3657.591309, avg loss: 2.334136, ppl: 10.320541 +epoch: 2, batch: 17317, sum loss: 4662.805664, avg loss: 2.574713, ppl: 13.127555 +epoch: 2, batch: 17318, sum loss: 4375.603027, avg loss: 2.463741, ppl: 11.748676 +epoch: 2, batch: 17319, sum loss: 4918.289551, avg loss: 2.644242, ppl: 14.072771 +epoch: 2, batch: 17320, sum loss: 3941.769287, avg loss: 2.584767, ppl: 13.260198 +epoch: 2, batch: 17321, sum loss: 3994.158203, avg loss: 2.532757, ppl: 12.588167 +epoch: 2, batch: 17322, sum loss: 3367.609619, avg loss: 2.243577, ppl: 9.426996 +epoch: 2, batch: 17323, sum loss: 3989.586426, avg loss: 2.381843, ppl: 10.824833 +epoch: 2, batch: 17324, sum loss: 3813.702148, avg loss: 2.589072, ppl: 13.317401 +epoch: 2, batch: 17325, sum loss: 3619.018311, avg loss: 2.297789, ppl: 9.952157 +epoch: 2, batch: 17326, sum loss: 4485.786133, avg loss: 2.567708, ppl: 13.035913 +epoch: 2, batch: 17327, sum loss: 4007.184570, avg loss: 2.241155, ppl: 9.404183 +epoch: 2, batch: 17328, sum loss: 4212.276367, avg loss: 2.555993, ppl: 12.884089 +epoch: 2, batch: 17329, sum loss: 4715.016602, avg loss: 2.559727, ppl: 12.932282 +epoch: 2, batch: 17330, sum loss: 4260.697266, avg loss: 2.477149, ppl: 11.907274 +epoch: 2, batch: 17331, sum loss: 5381.012695, avg loss: 2.632589, ppl: 13.909740 +epoch: 2, batch: 17332, sum loss: 4360.424316, avg loss: 2.440081, ppl: 11.473969 +epoch: 2, batch: 17333, sum loss: 3817.429443, avg loss: 2.448640, ppl: 11.572596 +epoch: 2, batch: 17334, sum loss: 4950.587402, avg loss: 2.727597, ppl: 15.296079 +epoch: 2, batch: 17335, sum loss: 4989.313965, avg loss: 2.620438, ppl: 13.741742 +epoch: 2, batch: 17336, sum loss: 3766.303467, avg loss: 2.469707, ppl: 11.818986 +epoch: 2, batch: 17337, sum loss: 4417.836914, avg loss: 2.598728, ppl: 13.446619 +epoch: 2, batch: 17338, sum loss: 3927.006104, avg loss: 2.361399, ppl: 10.605775 +epoch: 2, batch: 17339, sum loss: 3849.336914, avg loss: 2.344298, ppl: 10.425950 +epoch: 2, batch: 17340, sum loss: 4447.846191, avg loss: 2.669775, ppl: 14.436728 +epoch: 2, batch: 17341, sum loss: 4113.735352, avg loss: 2.503795, ppl: 12.228813 +epoch: 2, batch: 17342, sum loss: 4122.840820, avg loss: 2.373541, ppl: 10.735340 +epoch: 2, batch: 17343, sum loss: 5398.603516, avg loss: 2.664661, ppl: 14.363086 +epoch: 2, batch: 17344, sum loss: 4942.020020, avg loss: 2.715396, ppl: 15.110588 +epoch: 2, batch: 17345, sum loss: 4357.358887, avg loss: 2.802160, ppl: 16.480209 +epoch: 2, batch: 17346, sum loss: 3684.018066, avg loss: 2.464226, ppl: 11.754381 +epoch: 2, batch: 17347, sum loss: 4799.307617, avg loss: 2.655953, ppl: 14.238551 +epoch: 2, batch: 17348, sum loss: 4299.671387, avg loss: 2.545691, ppl: 12.752030 +epoch: 2, batch: 17349, sum loss: 4726.128906, avg loss: 2.576951, ppl: 13.156965 +epoch: 2, batch: 17350, sum loss: 4111.868164, avg loss: 2.714104, ppl: 15.091088 +epoch: 2, batch: 17351, sum loss: 4495.194336, avg loss: 2.634932, ppl: 13.942365 +epoch: 2, batch: 17352, sum loss: 4687.355957, avg loss: 2.747571, ppl: 15.604678 +epoch: 2, batch: 17353, sum loss: 4009.508789, avg loss: 2.462843, ppl: 11.738138 +epoch: 2, batch: 17354, sum loss: 5508.023438, avg loss: 2.751260, ppl: 15.662358 +epoch: 2, batch: 17355, sum loss: 4312.624023, avg loss: 2.663758, ppl: 14.350116 +epoch: 2, batch: 17356, sum loss: 4053.515869, avg loss: 2.844572, ppl: 17.194202 +epoch: 2, batch: 17357, sum loss: 4046.127930, avg loss: 2.543135, ppl: 12.719486 +epoch: 2, batch: 17358, sum loss: 3469.390625, avg loss: 2.168369, ppl: 8.744011 +epoch: 2, batch: 17359, sum loss: 4238.138672, avg loss: 2.713277, ppl: 15.078609 +epoch: 2, batch: 17360, sum loss: 4369.108398, avg loss: 2.526957, ppl: 12.515365 +epoch: 2, batch: 17361, sum loss: 5070.040039, avg loss: 2.678309, ppl: 14.560457 +epoch: 2, batch: 17362, sum loss: 4367.269043, avg loss: 2.640429, ppl: 14.019214 +epoch: 2, batch: 17363, sum loss: 3721.481201, avg loss: 2.487621, ppl: 12.032618 +epoch: 2, batch: 17364, sum loss: 4476.289551, avg loss: 2.677207, ppl: 14.544411 +epoch: 2, batch: 17365, sum loss: 5537.275391, avg loss: 2.699793, ppl: 14.876654 +epoch: 2, batch: 17366, sum loss: 4649.100098, avg loss: 2.466366, ppl: 11.779563 +epoch: 2, batch: 17367, sum loss: 5086.483887, avg loss: 2.743519, ppl: 15.541577 +epoch: 2, batch: 17368, sum loss: 4445.505859, avg loss: 2.521558, ppl: 12.447970 +epoch: 2, batch: 17369, sum loss: 4026.063965, avg loss: 2.506889, ppl: 12.266710 +epoch: 2, batch: 17370, sum loss: 4493.134766, avg loss: 2.609254, ppl: 13.588905 +epoch: 2, batch: 17371, sum loss: 4342.242188, avg loss: 2.650941, ppl: 14.167369 +epoch: 2, batch: 17372, sum loss: 4978.080078, avg loss: 2.781050, ppl: 16.135958 +epoch: 2, batch: 17373, sum loss: 4453.281738, avg loss: 2.628856, ppl: 13.857903 +epoch: 2, batch: 17374, sum loss: 4579.421387, avg loss: 2.521708, ppl: 12.449840 +epoch: 2, batch: 17375, sum loss: 4652.509766, avg loss: 2.831716, ppl: 16.974566 +epoch: 2, batch: 17376, sum loss: 5517.967285, avg loss: 2.844313, ppl: 17.189747 +epoch: 2, batch: 17377, sum loss: 4788.328613, avg loss: 2.948478, ppl: 19.076900 +epoch: 2, batch: 17378, sum loss: 4901.005371, avg loss: 2.707738, ppl: 14.995317 +epoch: 2, batch: 17379, sum loss: 4471.436035, avg loss: 2.526235, ppl: 12.506332 +epoch: 2, batch: 17380, sum loss: 4302.149414, avg loss: 2.539639, ppl: 12.675100 +epoch: 2, batch: 17381, sum loss: 4080.146240, avg loss: 2.486378, ppl: 12.017669 +epoch: 2, batch: 17382, sum loss: 3979.569580, avg loss: 2.470248, ppl: 11.825382 +epoch: 2, batch: 17383, sum loss: 4573.190430, avg loss: 2.635845, ppl: 13.955095 +epoch: 2, batch: 17384, sum loss: 4063.389648, avg loss: 2.230181, ppl: 9.301549 +epoch: 2, batch: 17385, sum loss: 4771.164551, avg loss: 2.646237, ppl: 14.100872 +epoch: 2, batch: 17386, sum loss: 3757.565918, avg loss: 2.366225, ppl: 10.657091 +epoch: 2, batch: 17387, sum loss: 4701.365723, avg loss: 2.689569, ppl: 14.725325 +epoch: 2, batch: 17388, sum loss: 3386.714600, avg loss: 2.203458, ppl: 9.056274 +epoch: 2, batch: 17389, sum loss: 4252.540527, avg loss: 2.507394, ppl: 12.272909 +epoch: 2, batch: 17390, sum loss: 4780.275391, avg loss: 2.630862, ppl: 13.885728 +epoch: 2, batch: 17391, sum loss: 5689.360352, avg loss: 3.039188, ppl: 20.888283 +epoch: 2, batch: 17392, sum loss: 4463.989258, avg loss: 2.721945, ppl: 15.209870 +epoch: 2, batch: 17393, sum loss: 4228.323730, avg loss: 2.188573, ppl: 8.922475 +epoch: 2, batch: 17394, sum loss: 3380.912598, avg loss: 2.290591, ppl: 9.880778 +epoch: 2, batch: 17395, sum loss: 5319.573730, avg loss: 2.856914, ppl: 17.407724 +epoch: 2, batch: 17396, sum loss: 4092.943848, avg loss: 2.630427, ppl: 13.879690 +epoch: 2, batch: 17397, sum loss: 4695.310547, avg loss: 2.687642, ppl: 14.696978 +epoch: 2, batch: 17398, sum loss: 4310.230957, avg loss: 2.488586, ppl: 12.044233 +epoch: 2, batch: 17399, sum loss: 3870.759277, avg loss: 2.330379, ppl: 10.281836 +epoch: 2, batch: 17400, sum loss: 3425.909912, avg loss: 2.025967, ppl: 7.583440 +epoch: 2, batch: 17401, sum loss: 4723.310547, avg loss: 2.621149, ppl: 13.751516 +epoch: 2, batch: 17402, sum loss: 4473.039551, avg loss: 2.742514, ppl: 15.525963 +epoch: 2, batch: 17403, sum loss: 3732.960449, avg loss: 2.295794, ppl: 9.932315 +epoch: 2, batch: 17404, sum loss: 4800.812012, avg loss: 2.726185, ppl: 15.274505 +epoch: 2, batch: 17405, sum loss: 4500.342773, avg loss: 2.561379, ppl: 12.953668 +epoch: 2, batch: 17406, sum loss: 4809.386230, avg loss: 2.937927, ppl: 18.876675 +epoch: 2, batch: 17407, sum loss: 4563.580078, avg loss: 2.682881, ppl: 14.627169 +epoch: 2, batch: 17408, sum loss: 4455.069824, avg loss: 2.787904, ppl: 16.246923 +epoch: 2, batch: 17409, sum loss: 4370.356445, avg loss: 2.484569, ppl: 11.995947 +epoch: 2, batch: 17410, sum loss: 4420.406250, avg loss: 2.497405, ppl: 12.150919 +epoch: 2, batch: 17411, sum loss: 4621.030273, avg loss: 2.591717, ppl: 13.352673 +epoch: 2, batch: 17412, sum loss: 4315.612793, avg loss: 2.525227, ppl: 12.493731 +epoch: 2, batch: 17413, sum loss: 3904.508789, avg loss: 2.229874, ppl: 9.298691 +epoch: 2, batch: 17414, sum loss: 5198.873047, avg loss: 2.699311, ppl: 14.869487 +epoch: 2, batch: 17415, sum loss: 4551.902344, avg loss: 2.626602, ppl: 13.826713 +epoch: 2, batch: 17416, sum loss: 4082.323486, avg loss: 2.506030, ppl: 12.256180 +epoch: 2, batch: 17417, sum loss: 4851.680664, avg loss: 2.667224, ppl: 14.399941 +epoch: 2, batch: 17418, sum loss: 4554.208008, avg loss: 2.693204, ppl: 14.778955 +epoch: 2, batch: 17419, sum loss: 3758.576660, avg loss: 2.265568, ppl: 9.636592 +epoch: 2, batch: 17420, sum loss: 5868.141602, avg loss: 2.869507, ppl: 17.628323 +epoch: 2, batch: 17421, sum loss: 4359.929688, avg loss: 2.497096, ppl: 12.147168 +epoch: 2, batch: 17422, sum loss: 4831.982422, avg loss: 2.520596, ppl: 12.436007 +epoch: 2, batch: 17423, sum loss: 4634.709473, avg loss: 2.553559, ppl: 12.852766 +epoch: 2, batch: 17424, sum loss: 3416.093750, avg loss: 2.232741, ppl: 9.325393 +epoch: 2, batch: 17425, sum loss: 4669.664551, avg loss: 2.569986, ppl: 13.065643 +epoch: 2, batch: 17426, sum loss: 4809.040039, avg loss: 2.589682, ppl: 13.325535 +epoch: 2, batch: 17427, sum loss: 4505.608398, avg loss: 2.486539, ppl: 12.019603 +epoch: 2, batch: 17428, sum loss: 4360.692383, avg loss: 2.527938, ppl: 12.527643 +epoch: 2, batch: 17429, sum loss: 4002.676758, avg loss: 2.602521, ppl: 13.497719 +epoch: 2, batch: 17430, sum loss: 4881.852539, avg loss: 2.538665, ppl: 12.662752 +epoch: 2, batch: 17431, sum loss: 4339.402832, avg loss: 2.543612, ppl: 12.725559 +epoch: 2, batch: 17432, sum loss: 4416.161133, avg loss: 2.627104, ppl: 13.833644 +epoch: 2, batch: 17433, sum loss: 3983.225586, avg loss: 2.299784, ppl: 9.972025 +epoch: 2, batch: 17434, sum loss: 4148.757324, avg loss: 2.478350, ppl: 11.921576 +epoch: 2, batch: 17435, sum loss: 4687.049805, avg loss: 2.695256, ppl: 14.809306 +epoch: 2, batch: 17436, sum loss: 4607.012207, avg loss: 2.531326, ppl: 12.570158 +epoch: 2, batch: 17437, sum loss: 3513.071533, avg loss: 2.449841, ppl: 11.586502 +epoch: 2, batch: 17438, sum loss: 4297.852051, avg loss: 2.414524, ppl: 11.184441 +epoch: 2, batch: 17439, sum loss: 4074.562988, avg loss: 2.317726, ppl: 10.152565 +epoch: 2, batch: 17440, sum loss: 3686.052002, avg loss: 2.299471, ppl: 9.968906 +epoch: 2, batch: 17441, sum loss: 3931.790039, avg loss: 2.501139, ppl: 12.196374 +epoch: 2, batch: 17442, sum loss: 4007.211914, avg loss: 2.542647, ppl: 12.713280 +epoch: 2, batch: 17443, sum loss: 3826.146729, avg loss: 2.520518, ppl: 12.435040 +epoch: 2, batch: 17444, sum loss: 4629.799805, avg loss: 2.603937, ppl: 13.516848 +epoch: 2, batch: 17445, sum loss: 4943.201172, avg loss: 2.593495, ppl: 13.376436 +epoch: 2, batch: 17446, sum loss: 5360.859375, avg loss: 3.015107, ppl: 20.391266 +epoch: 2, batch: 17447, sum loss: 4017.340332, avg loss: 2.412817, ppl: 11.165370 +epoch: 2, batch: 17448, sum loss: 4436.272949, avg loss: 2.695184, ppl: 14.808247 +epoch: 2, batch: 17449, sum loss: 4035.025879, avg loss: 2.645919, ppl: 14.096388 +epoch: 2, batch: 17450, sum loss: 5003.711426, avg loss: 2.887312, ppl: 17.945004 +epoch: 2, batch: 17451, sum loss: 3529.039307, avg loss: 2.249228, ppl: 9.480416 +epoch: 2, batch: 17452, sum loss: 4718.929688, avg loss: 2.764458, ppl: 15.870435 +epoch: 2, batch: 17453, sum loss: 4516.235840, avg loss: 2.606022, ppl: 13.545056 +epoch: 2, batch: 17454, sum loss: 4678.607422, avg loss: 2.682688, ppl: 14.624344 +epoch: 2, batch: 17455, sum loss: 4416.455078, avg loss: 2.564724, ppl: 12.997073 +epoch: 2, batch: 17456, sum loss: 4099.778320, avg loss: 2.336056, ppl: 10.340374 +epoch: 2, batch: 17457, sum loss: 4226.703125, avg loss: 2.493630, ppl: 12.105140 +epoch: 2, batch: 17458, sum loss: 4157.341797, avg loss: 2.622929, ppl: 13.776010 +epoch: 2, batch: 17459, sum loss: 3986.452881, avg loss: 2.515112, ppl: 12.367996 +epoch: 2, batch: 17460, sum loss: 4937.404297, avg loss: 2.580975, ppl: 13.210006 +epoch: 2, batch: 17461, sum loss: 5104.424316, avg loss: 2.783219, ppl: 16.170998 +epoch: 2, batch: 17462, sum loss: 4035.342285, avg loss: 2.471122, ppl: 11.835719 +epoch: 2, batch: 17463, sum loss: 5395.952148, avg loss: 2.808929, ppl: 16.592138 +epoch: 2, batch: 17464, sum loss: 4795.944824, avg loss: 2.534854, ppl: 12.614594 +epoch: 2, batch: 17465, sum loss: 4127.371094, avg loss: 2.337130, ppl: 10.351480 +epoch: 2, batch: 17466, sum loss: 4662.384277, avg loss: 2.681072, ppl: 14.600740 +epoch: 2, batch: 17467, sum loss: 4460.151367, avg loss: 2.576633, ppl: 13.152775 +epoch: 2, batch: 17468, sum loss: 4639.124023, avg loss: 2.609181, ppl: 13.587920 +epoch: 2, batch: 17469, sum loss: 3901.481934, avg loss: 2.305840, ppl: 10.032607 +epoch: 2, batch: 17470, sum loss: 4990.932129, avg loss: 2.699260, ppl: 14.868729 +epoch: 2, batch: 17471, sum loss: 3874.706299, avg loss: 2.394750, ppl: 10.965460 +epoch: 2, batch: 17472, sum loss: 4803.323730, avg loss: 2.621902, ppl: 13.761870 +epoch: 2, batch: 17473, sum loss: 3930.041992, avg loss: 2.450151, ppl: 11.590096 +epoch: 2, batch: 17474, sum loss: 4748.719727, avg loss: 2.436491, ppl: 11.432847 +epoch: 2, batch: 17475, sum loss: 4618.308594, avg loss: 2.767111, ppl: 15.912601 +epoch: 2, batch: 17476, sum loss: 5239.282227, avg loss: 2.785371, ppl: 16.205822 +epoch: 2, batch: 17477, sum loss: 4677.176758, avg loss: 2.645462, ppl: 14.089953 +epoch: 2, batch: 17478, sum loss: 4385.044434, avg loss: 2.654385, ppl: 14.216245 +epoch: 2, batch: 17479, sum loss: 4308.926758, avg loss: 2.616227, ppl: 13.684002 +epoch: 2, batch: 17480, sum loss: 4291.900391, avg loss: 2.613825, ppl: 13.651165 +epoch: 2, batch: 17481, sum loss: 4959.814453, avg loss: 2.437255, ppl: 11.441592 +epoch: 2, batch: 17482, sum loss: 4107.250000, avg loss: 2.584802, ppl: 13.260659 +epoch: 2, batch: 17483, sum loss: 4480.532715, avg loss: 2.733699, ppl: 15.389706 +epoch: 2, batch: 17484, sum loss: 3841.828369, avg loss: 2.216866, ppl: 9.178518 +epoch: 2, batch: 17485, sum loss: 3437.831055, avg loss: 2.193894, ppl: 8.970072 +epoch: 2, batch: 17486, sum loss: 4366.480957, avg loss: 2.535703, ppl: 12.625305 +epoch: 2, batch: 17487, sum loss: 4183.756836, avg loss: 2.523376, ppl: 12.470623 +epoch: 2, batch: 17488, sum loss: 4231.661621, avg loss: 2.678267, ppl: 14.559835 +epoch: 2, batch: 17489, sum loss: 5017.046387, avg loss: 2.568892, ppl: 13.051358 +epoch: 2, batch: 17490, sum loss: 3663.883789, avg loss: 2.363796, ppl: 10.631231 +epoch: 2, batch: 17491, sum loss: 4235.202148, avg loss: 2.406365, ppl: 11.093559 +epoch: 2, batch: 17492, sum loss: 4514.033203, avg loss: 2.524627, ppl: 12.486237 +epoch: 2, batch: 17493, sum loss: 4263.336914, avg loss: 2.330966, ppl: 10.287877 +epoch: 2, batch: 17494, sum loss: 4360.352051, avg loss: 2.511724, ppl: 12.326156 +epoch: 2, batch: 17495, sum loss: 4122.565430, avg loss: 2.484970, ppl: 12.000761 +epoch: 2, batch: 17496, sum loss: 3786.195801, avg loss: 2.413127, ppl: 11.168828 +epoch: 2, batch: 17497, sum loss: 4748.465820, avg loss: 2.507110, ppl: 12.269419 +epoch: 2, batch: 17498, sum loss: 4157.758789, avg loss: 2.375862, ppl: 10.760286 +epoch: 2, batch: 17499, sum loss: 4471.588379, avg loss: 2.568402, ppl: 13.044966 +epoch: 2, batch: 17500, sum loss: 4689.365234, avg loss: 2.525237, ppl: 12.493854 +epoch: 2, batch: 17501, sum loss: 4527.635742, avg loss: 2.709537, ppl: 15.022315 +epoch: 2, batch: 17502, sum loss: 3731.433594, avg loss: 2.348290, ppl: 10.467659 +epoch: 2, batch: 17503, sum loss: 4048.969238, avg loss: 2.574043, ppl: 13.118751 +epoch: 2, batch: 17504, sum loss: 4637.353027, avg loss: 2.600871, ppl: 13.475471 +epoch: 2, batch: 17505, sum loss: 4554.019531, avg loss: 2.688323, ppl: 14.706995 +epoch: 2, batch: 17506, sum loss: 3978.510254, avg loss: 2.287815, ppl: 9.853385 +epoch: 2, batch: 17507, sum loss: 3784.801758, avg loss: 2.307806, ppl: 10.052345 +epoch: 2, batch: 17508, sum loss: 3386.225830, avg loss: 2.338554, ppl: 10.366235 +epoch: 2, batch: 17509, sum loss: 4046.828857, avg loss: 2.400254, ppl: 11.025982 +epoch: 2, batch: 17510, sum loss: 3845.613525, avg loss: 2.332088, ppl: 10.299427 +epoch: 2, batch: 17511, sum loss: 3485.989746, avg loss: 2.338022, ppl: 10.360718 +epoch: 2, batch: 17512, sum loss: 3692.839600, avg loss: 2.463536, ppl: 11.746267 +epoch: 2, batch: 17513, sum loss: 4514.864746, avg loss: 2.568182, ppl: 13.042098 +epoch: 2, batch: 17514, sum loss: 4080.489258, avg loss: 2.474524, ppl: 11.876047 +epoch: 2, batch: 17515, sum loss: 3570.143555, avg loss: 2.206517, ppl: 9.084017 +epoch: 2, batch: 17516, sum loss: 3781.871582, avg loss: 2.353374, ppl: 10.521006 +epoch: 2, batch: 17517, sum loss: 4076.771484, avg loss: 2.493438, ppl: 12.102818 +epoch: 2, batch: 17518, sum loss: 3346.447021, avg loss: 2.201610, ppl: 9.039557 +epoch: 2, batch: 17519, sum loss: 3286.548828, avg loss: 2.042603, ppl: 7.710656 +epoch: 2, batch: 17520, sum loss: 4533.378418, avg loss: 2.641829, ppl: 14.038857 +epoch: 2, batch: 17521, sum loss: 4760.455566, avg loss: 2.691043, ppl: 14.747055 +epoch: 2, batch: 17522, sum loss: 4347.273438, avg loss: 2.474259, ppl: 11.872908 +epoch: 2, batch: 17523, sum loss: 4003.679199, avg loss: 2.474462, ppl: 11.875314 +epoch: 2, batch: 17524, sum loss: 4564.889160, avg loss: 2.571768, ppl: 13.088949 +epoch: 2, batch: 17525, sum loss: 3936.196777, avg loss: 2.341580, ppl: 10.397655 +epoch: 2, batch: 17526, sum loss: 3705.853760, avg loss: 2.276323, ppl: 9.740796 +epoch: 2, batch: 17527, sum loss: 5042.963867, avg loss: 2.561180, ppl: 12.951096 +epoch: 2, batch: 17528, sum loss: 4495.909668, avg loss: 2.601799, ppl: 13.487988 +epoch: 2, batch: 17529, sum loss: 4174.723633, avg loss: 2.554910, ppl: 12.870147 +epoch: 2, batch: 17530, sum loss: 4030.838867, avg loss: 2.378076, ppl: 10.784135 +epoch: 2, batch: 17531, sum loss: 4803.549805, avg loss: 2.703179, ppl: 14.927115 +epoch: 2, batch: 17532, sum loss: 3718.375000, avg loss: 2.326893, ppl: 10.246058 +epoch: 2, batch: 17533, sum loss: 4613.750000, avg loss: 2.504750, ppl: 12.240501 +epoch: 2, batch: 17534, sum loss: 3822.523438, avg loss: 2.628971, ppl: 13.859499 +epoch: 2, batch: 17535, sum loss: 3989.515381, avg loss: 2.307412, ppl: 10.048388 +epoch: 2, batch: 17536, sum loss: 4179.645020, avg loss: 2.438533, ppl: 11.456218 +epoch: 2, batch: 17537, sum loss: 4060.364014, avg loss: 2.318883, ppl: 10.164314 +epoch: 2, batch: 17538, sum loss: 4207.335449, avg loss: 2.408320, ppl: 11.115273 +epoch: 2, batch: 17539, sum loss: 3969.823730, avg loss: 2.391460, ppl: 10.929441 +epoch: 2, batch: 17540, sum loss: 4646.840820, avg loss: 2.775891, ppl: 16.052921 +epoch: 2, batch: 17541, sum loss: 3561.794434, avg loss: 2.281739, ppl: 9.793696 +epoch: 2, batch: 17542, sum loss: 3734.497070, avg loss: 2.348740, ppl: 10.472370 +epoch: 2, batch: 17543, sum loss: 4424.565430, avg loss: 2.681555, ppl: 14.607788 +epoch: 2, batch: 17544, sum loss: 4252.640137, avg loss: 2.416273, ppl: 11.204020 +epoch: 2, batch: 17545, sum loss: 4037.517578, avg loss: 2.472454, ppl: 11.851496 +epoch: 2, batch: 17546, sum loss: 4569.390625, avg loss: 2.670597, ppl: 14.448586 +epoch: 2, batch: 17547, sum loss: 3821.872314, avg loss: 2.341834, ppl: 10.400289 +epoch: 2, batch: 17548, sum loss: 4419.961426, avg loss: 2.403459, ppl: 11.061375 +epoch: 2, batch: 17549, sum loss: 3853.321289, avg loss: 2.265327, ppl: 9.634274 +epoch: 2, batch: 17550, sum loss: 4333.214355, avg loss: 2.419438, ppl: 11.239545 +epoch: 2, batch: 17551, sum loss: 4798.227051, avg loss: 2.586645, ppl: 13.285130 +epoch: 2, batch: 17552, sum loss: 3978.828613, avg loss: 2.499264, ppl: 12.173533 +epoch: 2, batch: 17553, sum loss: 4451.155273, avg loss: 2.531943, ppl: 12.577916 +epoch: 2, batch: 17554, sum loss: 3764.194336, avg loss: 2.367418, ppl: 10.669806 +epoch: 2, batch: 17555, sum loss: 3601.065918, avg loss: 2.259138, ppl: 9.574831 +epoch: 2, batch: 17556, sum loss: 4214.910156, avg loss: 2.522388, ppl: 12.458311 +epoch: 2, batch: 17557, sum loss: 3580.960205, avg loss: 2.231128, ppl: 9.310360 +epoch: 2, batch: 17558, sum loss: 5033.295410, avg loss: 2.941727, ppl: 18.948549 +epoch: 2, batch: 17559, sum loss: 4952.889160, avg loss: 2.823768, ppl: 16.840183 +epoch: 2, batch: 17560, sum loss: 3840.076660, avg loss: 2.508215, ppl: 12.282982 +epoch: 2, batch: 17561, sum loss: 3654.053955, avg loss: 2.437661, ppl: 11.446239 +epoch: 2, batch: 17562, sum loss: 4858.493652, avg loss: 2.944542, ppl: 19.001951 +epoch: 2, batch: 17563, sum loss: 3714.545898, avg loss: 2.451846, ppl: 11.609755 +epoch: 2, batch: 17564, sum loss: 4154.641113, avg loss: 2.553559, ppl: 12.852769 +epoch: 2, batch: 17565, sum loss: 4435.248047, avg loss: 2.524330, ppl: 12.482531 +epoch: 2, batch: 17566, sum loss: 4198.413086, avg loss: 2.539875, ppl: 12.678083 +epoch: 2, batch: 17567, sum loss: 4003.376709, avg loss: 2.337056, ppl: 10.350718 +epoch: 2, batch: 17568, sum loss: 4511.060059, avg loss: 2.770921, ppl: 15.973346 +epoch: 2, batch: 17569, sum loss: 3732.363281, avg loss: 2.389477, ppl: 10.907790 +epoch: 2, batch: 17570, sum loss: 4839.867676, avg loss: 2.757759, ppl: 15.764481 +epoch: 2, batch: 17571, sum loss: 4752.610840, avg loss: 2.787455, ppl: 16.239639 +epoch: 2, batch: 17572, sum loss: 4128.251953, avg loss: 2.581771, ppl: 13.220530 +epoch: 2, batch: 17573, sum loss: 4960.020020, avg loss: 2.581999, ppl: 13.223546 +epoch: 2, batch: 17574, sum loss: 3731.448975, avg loss: 2.387363, ppl: 10.884758 +epoch: 2, batch: 17575, sum loss: 5011.328125, avg loss: 2.805895, ppl: 16.541872 +epoch: 2, batch: 17576, sum loss: 4045.827881, avg loss: 2.546147, ppl: 12.757854 +epoch: 2, batch: 17577, sum loss: 4019.450439, avg loss: 2.462899, ppl: 11.738790 +epoch: 2, batch: 17578, sum loss: 4133.492188, avg loss: 2.561024, ppl: 12.949067 +epoch: 2, batch: 17579, sum loss: 4351.688477, avg loss: 2.522718, ppl: 12.462426 +epoch: 2, batch: 17580, sum loss: 3797.572266, avg loss: 2.516615, ppl: 12.386599 +epoch: 2, batch: 17581, sum loss: 4010.017578, avg loss: 2.345040, ppl: 10.433686 +epoch: 2, batch: 17582, sum loss: 4507.995605, avg loss: 2.680140, ppl: 14.587135 +epoch: 2, batch: 17583, sum loss: 3688.660400, avg loss: 2.347970, ppl: 10.464303 +epoch: 2, batch: 17584, sum loss: 4463.771484, avg loss: 2.608867, ppl: 13.583654 +epoch: 2, batch: 17585, sum loss: 4273.604004, avg loss: 2.422678, ppl: 11.276016 +epoch: 2, batch: 17586, sum loss: 4305.569336, avg loss: 2.297529, ppl: 9.949567 +epoch: 2, batch: 17587, sum loss: 4798.416992, avg loss: 2.609253, ppl: 13.588902 +epoch: 2, batch: 17588, sum loss: 5302.167969, avg loss: 2.655067, ppl: 14.225935 +epoch: 2, batch: 17589, sum loss: 4716.157715, avg loss: 2.507261, ppl: 12.271271 +epoch: 2, batch: 17590, sum loss: 4001.719482, avg loss: 2.627524, ppl: 13.839466 +epoch: 2, batch: 17591, sum loss: 4896.176758, avg loss: 2.568823, ppl: 13.050453 +epoch: 2, batch: 17592, sum loss: 4409.113281, avg loss: 2.547148, ppl: 12.770630 +epoch: 2, batch: 17593, sum loss: 3723.779297, avg loss: 2.252740, ppl: 9.513767 +epoch: 2, batch: 17594, sum loss: 4496.798828, avg loss: 2.447904, ppl: 11.564078 +epoch: 2, batch: 17595, sum loss: 3837.605713, avg loss: 2.330058, ppl: 10.278539 +epoch: 2, batch: 17596, sum loss: 4857.301758, avg loss: 2.648474, ppl: 14.132459 +epoch: 2, batch: 17597, sum loss: 3620.111328, avg loss: 2.460987, ppl: 11.716366 +epoch: 2, batch: 17598, sum loss: 4706.409180, avg loss: 2.737876, ppl: 15.454132 +epoch: 2, batch: 17599, sum loss: 4825.296875, avg loss: 2.671814, ppl: 14.466193 +epoch: 2, batch: 17600, sum loss: 3713.132568, avg loss: 2.277995, ppl: 9.757101 +epoch: 2, batch: 17601, sum loss: 3762.416504, avg loss: 2.294156, ppl: 9.916066 +epoch: 2, batch: 17602, sum loss: 4563.189941, avg loss: 2.328158, ppl: 10.259028 +epoch: 2, batch: 17603, sum loss: 5299.587891, avg loss: 2.763080, ppl: 15.848587 +epoch: 2, batch: 17604, sum loss: 4055.531494, avg loss: 2.576576, ppl: 13.152035 +epoch: 2, batch: 17605, sum loss: 3869.398926, avg loss: 2.385573, ppl: 10.865283 +epoch: 2, batch: 17606, sum loss: 3852.090332, avg loss: 2.581830, ppl: 13.221312 +epoch: 2, batch: 17607, sum loss: 4396.517578, avg loss: 2.539871, ppl: 12.678041 +epoch: 2, batch: 17608, sum loss: 3963.321777, avg loss: 2.368991, ppl: 10.686603 +epoch: 2, batch: 17609, sum loss: 4913.631836, avg loss: 2.676270, ppl: 14.530792 +epoch: 2, batch: 17610, sum loss: 4172.770996, avg loss: 2.474953, ppl: 11.881151 +epoch: 2, batch: 17611, sum loss: 4733.357422, avg loss: 2.687880, ppl: 14.700482 +epoch: 2, batch: 17612, sum loss: 4717.619629, avg loss: 2.612193, ppl: 13.628901 +epoch: 2, batch: 17613, sum loss: 4967.832031, avg loss: 2.749215, ppl: 15.630360 +epoch: 2, batch: 17614, sum loss: 4708.970215, avg loss: 2.675551, ppl: 14.520350 +epoch: 2, batch: 17615, sum loss: 4420.961914, avg loss: 2.503376, ppl: 12.223689 +epoch: 2, batch: 17616, sum loss: 5087.032227, avg loss: 2.634403, ppl: 13.934991 +epoch: 2, batch: 17617, sum loss: 4934.185547, avg loss: 2.770458, ppl: 15.965944 +epoch: 2, batch: 17618, sum loss: 4443.708984, avg loss: 2.542168, ppl: 12.707186 +epoch: 2, batch: 17619, sum loss: 4436.032715, avg loss: 2.461727, ppl: 11.725048 +epoch: 2, batch: 17620, sum loss: 4361.040039, avg loss: 2.536963, ppl: 12.641227 +epoch: 2, batch: 17621, sum loss: 4035.436768, avg loss: 2.775403, ppl: 16.045099 +epoch: 2, batch: 17622, sum loss: 4713.540527, avg loss: 2.596992, ppl: 13.423300 +epoch: 2, batch: 17623, sum loss: 4192.536621, avg loss: 2.292256, ppl: 9.897244 +epoch: 2, batch: 17624, sum loss: 3954.791016, avg loss: 2.626023, ppl: 13.818707 +epoch: 2, batch: 17625, sum loss: 4748.833008, avg loss: 2.651498, ppl: 14.175255 +epoch: 2, batch: 17626, sum loss: 4302.908691, avg loss: 2.603091, ppl: 13.505412 +epoch: 2, batch: 17627, sum loss: 4978.684570, avg loss: 2.649646, ppl: 14.149030 +epoch: 2, batch: 17628, sum loss: 4463.139648, avg loss: 2.491982, ppl: 12.085202 +epoch: 2, batch: 17629, sum loss: 5047.549316, avg loss: 2.731358, ppl: 15.353721 +epoch: 2, batch: 17630, sum loss: 4891.628906, avg loss: 2.554375, ppl: 12.863263 +epoch: 2, batch: 17631, sum loss: 5333.031738, avg loss: 2.637503, ppl: 13.978258 +epoch: 2, batch: 17632, sum loss: 4036.252930, avg loss: 2.301170, ppl: 9.985863 +epoch: 2, batch: 17633, sum loss: 4401.589355, avg loss: 2.568022, ppl: 13.040003 +epoch: 2, batch: 17634, sum loss: 4109.759766, avg loss: 2.536889, ppl: 12.640284 +epoch: 2, batch: 17635, sum loss: 4251.090332, avg loss: 2.434760, ppl: 11.413077 +epoch: 2, batch: 17636, sum loss: 4434.759277, avg loss: 2.771724, ppl: 15.986177 +epoch: 2, batch: 17637, sum loss: 3694.490967, avg loss: 2.462994, ppl: 11.739906 +epoch: 2, batch: 17638, sum loss: 4115.956055, avg loss: 2.536017, ppl: 12.629271 +epoch: 2, batch: 17639, sum loss: 4815.324707, avg loss: 2.797981, ppl: 16.411470 +epoch: 2, batch: 17640, sum loss: 4107.616699, avg loss: 2.464077, ppl: 11.752632 +epoch: 2, batch: 17641, sum loss: 4391.602539, avg loss: 2.655141, ppl: 14.226987 +epoch: 2, batch: 17642, sum loss: 4543.079590, avg loss: 2.568163, ppl: 13.041841 +epoch: 2, batch: 17643, sum loss: 5082.023438, avg loss: 2.634538, ppl: 13.936868 +epoch: 2, batch: 17644, sum loss: 4920.070312, avg loss: 2.668151, ppl: 14.413293 +epoch: 2, batch: 17645, sum loss: 4038.424805, avg loss: 2.326282, ppl: 10.239795 +epoch: 2, batch: 17646, sum loss: 3657.271729, avg loss: 2.342903, ppl: 10.411419 +epoch: 2, batch: 17647, sum loss: 3748.794189, avg loss: 2.326998, ppl: 10.247138 +epoch: 2, batch: 17648, sum loss: 3432.034912, avg loss: 2.209939, ppl: 9.115158 +epoch: 2, batch: 17649, sum loss: 4042.316650, avg loss: 2.449889, ppl: 11.587060 +epoch: 2, batch: 17650, sum loss: 4285.877930, avg loss: 2.413220, ppl: 11.169867 +epoch: 2, batch: 17651, sum loss: 4023.958496, avg loss: 2.551654, ppl: 12.828305 +epoch: 2, batch: 17652, sum loss: 4548.567383, avg loss: 2.562573, ppl: 12.969144 +epoch: 2, batch: 17653, sum loss: 5305.529785, avg loss: 2.817594, ppl: 16.736534 +epoch: 2, batch: 17654, sum loss: 5315.597656, avg loss: 2.698273, ppl: 14.854055 +epoch: 2, batch: 17655, sum loss: 4339.342285, avg loss: 2.767438, ppl: 15.917795 +epoch: 2, batch: 17656, sum loss: 4374.598633, avg loss: 2.565747, ppl: 13.010374 +epoch: 2, batch: 17657, sum loss: 3977.897949, avg loss: 2.514474, ppl: 12.360105 +epoch: 2, batch: 17658, sum loss: 4572.836426, avg loss: 2.558946, ppl: 12.922189 +epoch: 2, batch: 17659, sum loss: 4594.359863, avg loss: 2.663397, ppl: 14.344937 +epoch: 2, batch: 17660, sum loss: 4379.699707, avg loss: 2.511296, ppl: 12.320882 +epoch: 2, batch: 17661, sum loss: 4367.947754, avg loss: 2.430689, ppl: 11.366710 +epoch: 2, batch: 17662, sum loss: 4064.803711, avg loss: 2.639483, ppl: 14.005960 +epoch: 2, batch: 17663, sum loss: 5480.864258, avg loss: 2.796359, ppl: 16.384886 +epoch: 2, batch: 17664, sum loss: 3552.524902, avg loss: 2.444959, ppl: 11.530073 +epoch: 2, batch: 17665, sum loss: 5298.817383, avg loss: 2.932384, ppl: 18.772322 +epoch: 2, batch: 17666, sum loss: 3681.389404, avg loss: 2.460822, ppl: 11.714436 +epoch: 2, batch: 17667, sum loss: 4413.257324, avg loss: 2.717523, ppl: 15.142765 +epoch: 2, batch: 17668, sum loss: 5167.070801, avg loss: 2.630892, ppl: 13.886145 +epoch: 2, batch: 17669, sum loss: 4105.021484, avg loss: 2.784954, ppl: 16.199066 +epoch: 2, batch: 17670, sum loss: 4815.088867, avg loss: 2.598537, ppl: 13.444055 +epoch: 2, batch: 17671, sum loss: 4476.754883, avg loss: 2.405564, ppl: 11.084683 +epoch: 2, batch: 17672, sum loss: 4188.227539, avg loss: 2.462215, ppl: 11.730763 +epoch: 2, batch: 17673, sum loss: 3845.080811, avg loss: 2.441321, ppl: 11.488208 +epoch: 2, batch: 17674, sum loss: 4012.113770, avg loss: 2.275731, ppl: 9.735034 +epoch: 2, batch: 17675, sum loss: 3622.169434, avg loss: 2.195254, ppl: 8.982285 +epoch: 2, batch: 17676, sum loss: 4052.359131, avg loss: 2.280450, ppl: 9.781077 +epoch: 2, batch: 17677, sum loss: 4300.965820, avg loss: 2.432673, ppl: 11.389285 +epoch: 2, batch: 17678, sum loss: 5263.571289, avg loss: 2.686867, ppl: 14.685587 +epoch: 2, batch: 17679, sum loss: 5227.059082, avg loss: 2.686053, ppl: 14.673645 +epoch: 2, batch: 17680, sum loss: 3591.843750, avg loss: 2.298045, ppl: 9.954699 +epoch: 2, batch: 17681, sum loss: 4295.708496, avg loss: 2.538835, ppl: 12.664906 +epoch: 2, batch: 17682, sum loss: 5404.703125, avg loss: 2.974520, ppl: 19.580227 +epoch: 2, batch: 17683, sum loss: 4119.153320, avg loss: 2.547405, ppl: 12.773910 +epoch: 2, batch: 17684, sum loss: 4060.390137, avg loss: 2.408298, ppl: 11.115025 +epoch: 2, batch: 17685, sum loss: 4267.812012, avg loss: 2.376287, ppl: 10.764864 +epoch: 2, batch: 17686, sum loss: 3452.727051, avg loss: 2.594085, ppl: 13.384335 +epoch: 2, batch: 17687, sum loss: 4432.089844, avg loss: 2.699202, ppl: 14.867867 +epoch: 2, batch: 17688, sum loss: 4790.695801, avg loss: 2.843143, ppl: 17.169645 +epoch: 2, batch: 17689, sum loss: 4282.086914, avg loss: 2.445509, ppl: 11.536426 +epoch: 2, batch: 17690, sum loss: 3809.268311, avg loss: 2.434037, ppl: 11.404833 +epoch: 2, batch: 17691, sum loss: 4516.002441, avg loss: 2.541363, ppl: 12.696968 +epoch: 2, batch: 17692, sum loss: 4394.419434, avg loss: 2.569836, ppl: 13.063681 +epoch: 2, batch: 17693, sum loss: 4974.483398, avg loss: 2.541892, ppl: 12.703688 +epoch: 2, batch: 17694, sum loss: 3461.593994, avg loss: 2.301592, ppl: 9.990070 +epoch: 2, batch: 17695, sum loss: 3824.533203, avg loss: 2.268406, ppl: 9.663988 +epoch: 2, batch: 17696, sum loss: 3706.145020, avg loss: 2.251607, ppl: 9.502995 +epoch: 2, batch: 17697, sum loss: 4925.581543, avg loss: 2.584250, ppl: 13.253351 +epoch: 2, batch: 17698, sum loss: 3414.712891, avg loss: 2.449579, ppl: 11.583467 +epoch: 2, batch: 17699, sum loss: 5007.113281, avg loss: 2.745128, ppl: 15.566605 +epoch: 2, batch: 17700, sum loss: 4363.689453, avg loss: 2.544425, ppl: 12.735906 +epoch: 2, batch: 17701, sum loss: 4101.449219, avg loss: 2.422593, ppl: 11.275054 +epoch: 2, batch: 17702, sum loss: 4065.836914, avg loss: 2.455215, ppl: 11.648943 +epoch: 2, batch: 17703, sum loss: 5142.658691, avg loss: 2.845965, ppl: 17.218164 +epoch: 2, batch: 17704, sum loss: 3683.734863, avg loss: 2.213783, ppl: 9.150267 +epoch: 2, batch: 17705, sum loss: 3804.794922, avg loss: 2.354452, ppl: 10.532359 +epoch: 2, batch: 17706, sum loss: 4529.805664, avg loss: 2.697919, ppl: 14.848794 +epoch: 2, batch: 17707, sum loss: 3778.743408, avg loss: 2.399202, ppl: 11.014384 +epoch: 2, batch: 17708, sum loss: 3924.634521, avg loss: 2.463675, ppl: 11.747909 +epoch: 2, batch: 17709, sum loss: 4735.689453, avg loss: 2.546070, ppl: 12.756866 +epoch: 2, batch: 17710, sum loss: 4191.292969, avg loss: 2.392291, ppl: 10.938521 +epoch: 2, batch: 17711, sum loss: 4181.247559, avg loss: 2.442317, ppl: 11.499660 +epoch: 2, batch: 17712, sum loss: 4615.780273, avg loss: 2.536143, ppl: 12.630860 +epoch: 2, batch: 17713, sum loss: 4079.410156, avg loss: 2.600006, ppl: 13.463823 +epoch: 2, batch: 17714, sum loss: 4686.241699, avg loss: 2.494009, ppl: 12.109721 +epoch: 2, batch: 17715, sum loss: 4950.823242, avg loss: 2.718739, ppl: 15.161189 +epoch: 2, batch: 17716, sum loss: 4223.684570, avg loss: 2.716196, ppl: 15.122687 +epoch: 2, batch: 17717, sum loss: 4249.550781, avg loss: 2.575485, ppl: 13.137691 +epoch: 2, batch: 17718, sum loss: 3905.195557, avg loss: 2.369658, ppl: 10.693729 +epoch: 2, batch: 17719, sum loss: 4148.315918, avg loss: 2.313617, ppl: 10.110934 +epoch: 2, batch: 17720, sum loss: 4541.740234, avg loss: 2.552974, ppl: 12.845245 +epoch: 2, batch: 17721, sum loss: 4906.195312, avg loss: 2.593127, ppl: 13.371512 +epoch: 2, batch: 17722, sum loss: 4576.935059, avg loss: 2.492884, ppl: 12.096110 +epoch: 2, batch: 17723, sum loss: 4337.448242, avg loss: 2.425866, ppl: 11.312020 +epoch: 2, batch: 17724, sum loss: 4668.039062, avg loss: 2.615148, ppl: 13.669240 +epoch: 2, batch: 17725, sum loss: 4516.509766, avg loss: 2.502221, ppl: 12.209585 +epoch: 2, batch: 17726, sum loss: 4666.633301, avg loss: 2.522505, ppl: 12.459764 +epoch: 2, batch: 17727, sum loss: 4637.232422, avg loss: 2.622869, ppl: 13.775188 +epoch: 2, batch: 17728, sum loss: 3770.375977, avg loss: 2.238941, ppl: 9.383387 +epoch: 2, batch: 17729, sum loss: 3553.686279, avg loss: 2.250593, ppl: 9.493363 +epoch: 2, batch: 17730, sum loss: 4448.148438, avg loss: 2.562298, ppl: 12.965572 +epoch: 2, batch: 17731, sum loss: 3891.926514, avg loss: 2.421859, ppl: 11.266780 +epoch: 2, batch: 17732, sum loss: 4198.622559, avg loss: 2.408848, ppl: 11.121145 +epoch: 2, batch: 17733, sum loss: 4069.730469, avg loss: 2.537239, ppl: 12.644706 +epoch: 2, batch: 17734, sum loss: 4742.016602, avg loss: 2.560484, ppl: 12.942082 +epoch: 2, batch: 17735, sum loss: 4663.746094, avg loss: 2.657405, ppl: 14.259240 +epoch: 2, batch: 17736, sum loss: 5184.952637, avg loss: 2.703312, ppl: 14.929097 +epoch: 2, batch: 17737, sum loss: 4256.561035, avg loss: 2.683834, ppl: 14.641125 +epoch: 2, batch: 17738, sum loss: 5273.939453, avg loss: 2.551495, ppl: 12.826260 +epoch: 2, batch: 17739, sum loss: 4640.571777, avg loss: 2.406936, ppl: 11.099895 +epoch: 2, batch: 17740, sum loss: 3647.295410, avg loss: 2.329052, ppl: 10.268203 +epoch: 2, batch: 17741, sum loss: 3575.018555, avg loss: 2.281441, ppl: 9.790783 +epoch: 2, batch: 17742, sum loss: 4990.803711, avg loss: 2.826050, ppl: 16.878651 +epoch: 2, batch: 17743, sum loss: 4451.790527, avg loss: 2.555563, ppl: 12.878545 +epoch: 2, batch: 17744, sum loss: 4325.255859, avg loss: 2.615028, ppl: 13.667598 +epoch: 2, batch: 17745, sum loss: 4219.872070, avg loss: 2.531417, ppl: 12.571305 +epoch: 2, batch: 17746, sum loss: 4216.333008, avg loss: 2.329466, ppl: 10.272453 +epoch: 2, batch: 17747, sum loss: 2725.483154, avg loss: 2.147741, ppl: 8.565486 +epoch: 2, batch: 17748, sum loss: 3959.330811, avg loss: 2.517057, ppl: 12.392073 +epoch: 2, batch: 17749, sum loss: 3816.791504, avg loss: 2.247816, ppl: 9.467038 +epoch: 2, batch: 17750, sum loss: 4678.869141, avg loss: 2.404352, ppl: 11.071256 +epoch: 2, batch: 17751, sum loss: 3255.333984, avg loss: 2.177481, ppl: 8.824050 +epoch: 2, batch: 17752, sum loss: 3875.584961, avg loss: 2.423755, ppl: 11.288172 +epoch: 2, batch: 17753, sum loss: 3716.676270, avg loss: 2.373356, ppl: 10.733356 +epoch: 2, batch: 17754, sum loss: 3858.072021, avg loss: 2.795704, ppl: 16.374159 +epoch: 2, batch: 17755, sum loss: 3854.162598, avg loss: 2.492990, ppl: 12.097393 +epoch: 2, batch: 17756, sum loss: 4334.778809, avg loss: 2.689069, ppl: 14.717964 +epoch: 2, batch: 17757, sum loss: 4448.059082, avg loss: 2.580081, ppl: 13.198204 +epoch: 2, batch: 17758, sum loss: 3896.057129, avg loss: 2.364112, ppl: 10.634595 +epoch: 2, batch: 17759, sum loss: 3419.086426, avg loss: 2.123656, ppl: 8.361652 +epoch: 2, batch: 17760, sum loss: 4250.239746, avg loss: 2.590030, ppl: 13.330174 +epoch: 2, batch: 17761, sum loss: 4461.148438, avg loss: 2.589175, ppl: 13.318779 +epoch: 2, batch: 17762, sum loss: 4679.448242, avg loss: 2.526700, ppl: 12.512145 +epoch: 2, batch: 17763, sum loss: 4507.781738, avg loss: 2.512699, ppl: 12.338184 +epoch: 2, batch: 17764, sum loss: 3681.442383, avg loss: 2.470767, ppl: 11.831515 +epoch: 2, batch: 17765, sum loss: 4279.019043, avg loss: 2.423001, ppl: 11.279654 +epoch: 2, batch: 17766, sum loss: 5038.452637, avg loss: 2.900664, ppl: 18.186211 +epoch: 2, batch: 17767, sum loss: 4406.409180, avg loss: 2.649675, ppl: 14.149438 +epoch: 2, batch: 17768, sum loss: 4328.356445, avg loss: 2.416726, ppl: 11.209099 +epoch: 2, batch: 17769, sum loss: 3910.805664, avg loss: 2.360172, ppl: 10.592776 +epoch: 2, batch: 17770, sum loss: 4097.077148, avg loss: 2.465149, ppl: 11.765231 +epoch: 2, batch: 17771, sum loss: 5001.454102, avg loss: 2.703489, ppl: 14.931735 +epoch: 2, batch: 17772, sum loss: 4301.940430, avg loss: 2.523132, ppl: 12.467588 +epoch: 2, batch: 17773, sum loss: 3395.074707, avg loss: 2.233602, ppl: 9.333423 +epoch: 2, batch: 17774, sum loss: 4240.061035, avg loss: 2.414613, ppl: 11.185446 +epoch: 2, batch: 17775, sum loss: 3945.373047, avg loss: 2.311291, ppl: 10.087434 +epoch: 2, batch: 17776, sum loss: 3975.432861, avg loss: 2.422567, ppl: 11.274766 +epoch: 2, batch: 17777, sum loss: 3878.312744, avg loss: 2.367712, ppl: 10.672947 +epoch: 2, batch: 17778, sum loss: 4694.435547, avg loss: 2.681003, ppl: 14.599724 +epoch: 2, batch: 17779, sum loss: 4667.550293, avg loss: 2.584469, ppl: 13.256243 +epoch: 2, batch: 17780, sum loss: 3713.995361, avg loss: 2.568462, ppl: 13.045740 +epoch: 2, batch: 17781, sum loss: 3876.776611, avg loss: 2.679182, ppl: 14.573172 +epoch: 2, batch: 17782, sum loss: 3883.958984, avg loss: 2.448902, ppl: 11.575634 +epoch: 2, batch: 17783, sum loss: 4363.454102, avg loss: 2.528073, ppl: 12.529340 +epoch: 2, batch: 17784, sum loss: 4685.986816, avg loss: 2.610578, ppl: 13.606908 +epoch: 2, batch: 17785, sum loss: 5004.637695, avg loss: 2.482459, ppl: 11.970668 +epoch: 2, batch: 17786, sum loss: 4298.562500, avg loss: 2.431314, ppl: 11.373815 +epoch: 2, batch: 17787, sum loss: 3982.812012, avg loss: 2.434482, ppl: 11.409905 +epoch: 2, batch: 17788, sum loss: 4061.446777, avg loss: 2.312897, ppl: 10.103652 +epoch: 2, batch: 17789, sum loss: 4598.380859, avg loss: 2.512777, ppl: 12.339144 +epoch: 2, batch: 17790, sum loss: 4916.231934, avg loss: 2.684999, ppl: 14.658179 +epoch: 2, batch: 17791, sum loss: 4902.650879, avg loss: 2.710145, ppl: 15.031447 +epoch: 2, batch: 17792, sum loss: 3051.114258, avg loss: 2.148672, ppl: 8.573466 +epoch: 2, batch: 17793, sum loss: 4469.955566, avg loss: 2.484689, ppl: 11.997389 +epoch: 2, batch: 17794, sum loss: 4886.082520, avg loss: 2.542187, ppl: 12.707428 +epoch: 2, batch: 17795, sum loss: 3804.701904, avg loss: 2.335606, ppl: 10.335720 +epoch: 2, batch: 17796, sum loss: 4072.868896, avg loss: 2.376236, ppl: 10.764312 +epoch: 2, batch: 17797, sum loss: 4532.625000, avg loss: 2.490453, ppl: 12.066747 +epoch: 2, batch: 17798, sum loss: 4771.811523, avg loss: 2.621875, ppl: 13.761497 +epoch: 2, batch: 17799, sum loss: 4155.808105, avg loss: 2.643644, ppl: 14.064359 +epoch: 2, batch: 17800, sum loss: 5167.898438, avg loss: 2.853616, ppl: 17.350412 +epoch: 2, batch: 17801, sum loss: 3332.045410, avg loss: 2.492181, ppl: 12.087605 +epoch: 2, batch: 17802, sum loss: 4418.226562, avg loss: 2.424932, ppl: 11.301464 +epoch: 2, batch: 17803, sum loss: 4639.453125, avg loss: 2.580341, ppl: 13.201637 +epoch: 2, batch: 17804, sum loss: 4541.539551, avg loss: 2.589247, ppl: 13.319741 +epoch: 2, batch: 17805, sum loss: 4874.796875, avg loss: 2.776080, ppl: 16.055964 +epoch: 2, batch: 17806, sum loss: 4338.033691, avg loss: 2.535379, ppl: 12.621219 +epoch: 2, batch: 17807, sum loss: 3881.487305, avg loss: 2.504185, ppl: 12.233590 +epoch: 2, batch: 17808, sum loss: 4781.729980, avg loss: 2.671357, ppl: 14.459583 +epoch: 2, batch: 17809, sum loss: 4716.981445, avg loss: 2.641087, ppl: 14.028445 +epoch: 2, batch: 17810, sum loss: 3934.483887, avg loss: 2.348946, ppl: 10.474523 +epoch: 2, batch: 17811, sum loss: 4562.728027, avg loss: 2.409043, ppl: 11.123315 +epoch: 2, batch: 17812, sum loss: 3662.848877, avg loss: 2.234807, ppl: 9.344678 +epoch: 2, batch: 17813, sum loss: 4533.754883, avg loss: 2.716450, ppl: 15.126527 +epoch: 2, batch: 17814, sum loss: 4470.345703, avg loss: 2.554483, ppl: 12.864649 +epoch: 2, batch: 17815, sum loss: 4660.593262, avg loss: 2.670827, ppl: 14.451915 +epoch: 2, batch: 17816, sum loss: 4032.042725, avg loss: 2.473646, ppl: 11.865629 +epoch: 2, batch: 17817, sum loss: 4643.846680, avg loss: 2.529328, ppl: 12.545074 +epoch: 2, batch: 17818, sum loss: 4647.388184, avg loss: 2.751562, ppl: 15.667087 +epoch: 2, batch: 17819, sum loss: 4265.957031, avg loss: 2.358185, ppl: 10.571750 +epoch: 2, batch: 17820, sum loss: 3846.763916, avg loss: 2.392266, ppl: 10.938252 +epoch: 2, batch: 17821, sum loss: 3773.508789, avg loss: 2.388297, ppl: 10.894919 +epoch: 2, batch: 17822, sum loss: 4402.109375, avg loss: 2.591000, ppl: 13.343112 +epoch: 2, batch: 17823, sum loss: 6251.718262, avg loss: 2.928205, ppl: 18.694054 +epoch: 2, batch: 17824, sum loss: 3581.022705, avg loss: 2.141760, ppl: 8.514411 +epoch: 2, batch: 17825, sum loss: 5134.581543, avg loss: 2.889466, ppl: 17.983709 +epoch: 2, batch: 17826, sum loss: 4417.817383, avg loss: 2.404909, ppl: 11.077421 +epoch: 2, batch: 17827, sum loss: 4541.501953, avg loss: 2.665201, ppl: 14.370833 +epoch: 2, batch: 17828, sum loss: 4824.249512, avg loss: 2.803166, ppl: 16.496799 +epoch: 2, batch: 17829, sum loss: 4349.629883, avg loss: 2.545132, ppl: 12.744906 +epoch: 2, batch: 17830, sum loss: 4418.693359, avg loss: 2.633310, ppl: 13.919763 +epoch: 2, batch: 17831, sum loss: 4604.628418, avg loss: 2.474277, ppl: 11.873115 +epoch: 2, batch: 17832, sum loss: 4065.937744, avg loss: 2.650546, ppl: 14.161770 +epoch: 2, batch: 17833, sum loss: 3719.118652, avg loss: 2.491037, ppl: 12.073792 +epoch: 2, batch: 17834, sum loss: 3891.648438, avg loss: 2.355720, ppl: 10.545714 +epoch: 2, batch: 17835, sum loss: 4162.714355, avg loss: 2.512199, ppl: 12.332024 +epoch: 2, batch: 17836, sum loss: 4294.405762, avg loss: 2.502568, ppl: 12.213813 +epoch: 2, batch: 17837, sum loss: 4189.724121, avg loss: 2.525451, ppl: 12.496535 +epoch: 2, batch: 17838, sum loss: 4167.151367, avg loss: 2.688485, ppl: 14.709373 +epoch: 2, batch: 17839, sum loss: 4006.505371, avg loss: 2.556800, ppl: 12.894490 +epoch: 2, batch: 17840, sum loss: 3833.348145, avg loss: 2.460429, ppl: 11.709833 +epoch: 2, batch: 17841, sum loss: 4220.742188, avg loss: 2.472608, ppl: 11.853324 +epoch: 2, batch: 17842, sum loss: 3399.919434, avg loss: 2.356147, ppl: 10.550219 +epoch: 2, batch: 17843, sum loss: 4968.543945, avg loss: 2.755709, ppl: 15.732198 +epoch: 2, batch: 17844, sum loss: 4544.388672, avg loss: 2.547303, ppl: 12.772613 +epoch: 2, batch: 17845, sum loss: 4686.674316, avg loss: 2.698143, ppl: 14.852126 +epoch: 2, batch: 17846, sum loss: 3729.533936, avg loss: 2.333876, ppl: 10.317858 +epoch: 2, batch: 17847, sum loss: 3918.046143, avg loss: 2.550811, ppl: 12.817498 +epoch: 2, batch: 17848, sum loss: 3763.615723, avg loss: 2.385054, ppl: 10.859653 +epoch: 2, batch: 17849, sum loss: 3751.845215, avg loss: 2.164942, ppl: 8.714099 +epoch: 2, batch: 17850, sum loss: 4451.128906, avg loss: 2.523316, ppl: 12.469874 +epoch: 2, batch: 17851, sum loss: 3138.951172, avg loss: 2.109510, ppl: 8.244203 +epoch: 2, batch: 17852, sum loss: 3796.269531, avg loss: 2.405747, ppl: 11.086714 +epoch: 2, batch: 17853, sum loss: 4881.944336, avg loss: 2.675038, ppl: 14.512903 +epoch: 2, batch: 17854, sum loss: 4118.924805, avg loss: 2.531607, ppl: 12.573698 +epoch: 2, batch: 17855, sum loss: 4598.957520, avg loss: 2.599750, ppl: 13.460373 +epoch: 2, batch: 17856, sum loss: 4745.822266, avg loss: 2.610463, ppl: 13.605354 +epoch: 2, batch: 17857, sum loss: 4857.905762, avg loss: 2.522277, ppl: 12.456933 +epoch: 2, batch: 17858, sum loss: 5802.862305, avg loss: 2.885561, ppl: 17.913610 +epoch: 2, batch: 17859, sum loss: 3412.233643, avg loss: 2.322827, ppl: 10.204485 +epoch: 2, batch: 17860, sum loss: 4612.240234, avg loss: 2.545387, ppl: 12.748155 +epoch: 2, batch: 17861, sum loss: 4442.323730, avg loss: 2.693950, ppl: 14.789984 +epoch: 2, batch: 17862, sum loss: 4618.708984, avg loss: 2.818004, ppl: 16.743401 +epoch: 2, batch: 17863, sum loss: 3492.635742, avg loss: 2.372715, ppl: 10.726470 +epoch: 2, batch: 17864, sum loss: 4442.907715, avg loss: 2.520084, ppl: 12.429637 +epoch: 2, batch: 17865, sum loss: 4323.696289, avg loss: 2.412777, ppl: 11.164923 +epoch: 2, batch: 17866, sum loss: 3413.767090, avg loss: 2.108565, ppl: 8.236416 +epoch: 2, batch: 17867, sum loss: 3859.821045, avg loss: 2.488601, ppl: 12.044419 +epoch: 2, batch: 17868, sum loss: 3664.061523, avg loss: 2.408982, ppl: 11.122633 +epoch: 2, batch: 17869, sum loss: 3436.348145, avg loss: 2.528586, ppl: 12.535767 +epoch: 2, batch: 17870, sum loss: 4139.798828, avg loss: 2.658830, ppl: 14.279578 +epoch: 2, batch: 17871, sum loss: 4058.530762, avg loss: 2.534997, ppl: 12.616399 +epoch: 2, batch: 17872, sum loss: 4148.400879, avg loss: 2.523358, ppl: 12.470404 +epoch: 2, batch: 17873, sum loss: 4854.561523, avg loss: 2.609979, ppl: 13.598770 +epoch: 2, batch: 17874, sum loss: 4325.554688, avg loss: 2.895284, ppl: 18.088640 +epoch: 2, batch: 17875, sum loss: 3974.001709, avg loss: 2.457639, ppl: 11.677206 +epoch: 2, batch: 17876, sum loss: 4412.224121, avg loss: 2.469068, ppl: 11.811431 +epoch: 2, batch: 17877, sum loss: 4181.844727, avg loss: 2.525269, ppl: 12.494250 +epoch: 2, batch: 17878, sum loss: 3311.470459, avg loss: 2.220973, ppl: 9.216294 +epoch: 2, batch: 17879, sum loss: 4576.235352, avg loss: 2.600134, ppl: 13.465538 +epoch: 2, batch: 17880, sum loss: 3386.766846, avg loss: 2.225208, ppl: 9.255410 +epoch: 2, batch: 17881, sum loss: 4108.475586, avg loss: 2.503641, ppl: 12.226935 +epoch: 2, batch: 17882, sum loss: 4879.063477, avg loss: 2.551811, ppl: 12.830324 +epoch: 2, batch: 17883, sum loss: 3789.861572, avg loss: 2.407790, ppl: 11.109382 +epoch: 2, batch: 17884, sum loss: 3298.099609, avg loss: 2.350748, ppl: 10.493419 +epoch: 2, batch: 17885, sum loss: 5338.260254, avg loss: 2.624513, ppl: 13.797855 +epoch: 2, batch: 17886, sum loss: 3210.770508, avg loss: 2.267493, ppl: 9.655168 +epoch: 2, batch: 17887, sum loss: 4927.469238, avg loss: 2.589316, ppl: 13.320662 +epoch: 2, batch: 17888, sum loss: 3666.313477, avg loss: 2.301515, ppl: 9.989305 +epoch: 2, batch: 17889, sum loss: 4031.780518, avg loss: 2.437594, ppl: 11.445472 +epoch: 2, batch: 17890, sum loss: 3672.043945, avg loss: 2.353874, ppl: 10.526275 +epoch: 2, batch: 17891, sum loss: 3983.056885, avg loss: 2.568057, ppl: 13.040466 +epoch: 2, batch: 17892, sum loss: 3782.184814, avg loss: 2.499792, ppl: 12.179958 +epoch: 2, batch: 17893, sum loss: 4382.406738, avg loss: 2.495676, ppl: 12.129931 +epoch: 2, batch: 17894, sum loss: 4224.520996, avg loss: 2.319891, ppl: 10.174562 +epoch: 2, batch: 17895, sum loss: 3594.828857, avg loss: 2.517387, ppl: 12.396165 +epoch: 2, batch: 17896, sum loss: 4687.611816, avg loss: 2.469764, ppl: 11.819657 +epoch: 2, batch: 17897, sum loss: 3784.211426, avg loss: 2.278273, ppl: 9.759809 +epoch: 2, batch: 17898, sum loss: 4379.943359, avg loss: 2.585563, ppl: 13.270755 +epoch: 2, batch: 17899, sum loss: 4022.817871, avg loss: 2.248641, ppl: 9.474847 +epoch: 2, batch: 17900, sum loss: 4117.641113, avg loss: 2.543324, ppl: 12.721886 +epoch: 2, batch: 17901, sum loss: 4410.740723, avg loss: 2.558434, ppl: 12.915582 +epoch: 2, batch: 17902, sum loss: 5142.659668, avg loss: 2.882657, ppl: 17.861666 +epoch: 2, batch: 17903, sum loss: 3183.799316, avg loss: 2.017617, ppl: 7.520380 +epoch: 2, batch: 17904, sum loss: 3635.831299, avg loss: 2.350246, ppl: 10.488154 +epoch: 2, batch: 17905, sum loss: 3693.048096, avg loss: 2.340335, ppl: 10.384711 +epoch: 2, batch: 17906, sum loss: 3241.501221, avg loss: 2.255742, ppl: 9.542372 +epoch: 2, batch: 17907, sum loss: 4272.018555, avg loss: 2.517395, ppl: 12.396256 +epoch: 2, batch: 17908, sum loss: 4262.817383, avg loss: 2.433115, ppl: 11.394320 +epoch: 2, batch: 17909, sum loss: 4461.669922, avg loss: 2.452815, ppl: 11.621012 +epoch: 2, batch: 17910, sum loss: 4588.243652, avg loss: 2.515485, ppl: 12.372602 +epoch: 2, batch: 17911, sum loss: 4301.589844, avg loss: 2.568114, ppl: 13.041200 +epoch: 2, batch: 17912, sum loss: 3555.577881, avg loss: 2.391108, ppl: 10.925593 +epoch: 2, batch: 17913, sum loss: 3925.358398, avg loss: 2.545628, ppl: 12.751234 +epoch: 2, batch: 17914, sum loss: 4038.311035, avg loss: 2.672608, ppl: 14.477680 +epoch: 2, batch: 17915, sum loss: 4179.589844, avg loss: 2.385611, ppl: 10.865698 +epoch: 2, batch: 17916, sum loss: 4504.292480, avg loss: 2.576827, ppl: 13.155324 +epoch: 2, batch: 17917, sum loss: 4092.320312, avg loss: 2.383413, ppl: 10.841844 +epoch: 2, batch: 17918, sum loss: 4197.858398, avg loss: 2.564361, ppl: 12.992352 +epoch: 2, batch: 17919, sum loss: 3562.874023, avg loss: 2.336311, ppl: 10.343009 +epoch: 2, batch: 17920, sum loss: 3748.136963, avg loss: 2.253841, ppl: 9.524245 +epoch: 2, batch: 17921, sum loss: 3387.315430, avg loss: 2.388798, ppl: 10.900384 +epoch: 2, batch: 17922, sum loss: 3748.208496, avg loss: 2.410424, ppl: 11.138678 +epoch: 2, batch: 17923, sum loss: 4168.455078, avg loss: 2.363070, ppl: 10.623513 +epoch: 2, batch: 17924, sum loss: 4269.257324, avg loss: 2.434012, ppl: 11.404548 +epoch: 2, batch: 17925, sum loss: 4138.075195, avg loss: 2.652612, ppl: 14.191064 +epoch: 2, batch: 17926, sum loss: 3638.020508, avg loss: 2.370046, ppl: 10.697884 +epoch: 2, batch: 17927, sum loss: 4401.215820, avg loss: 2.626024, ppl: 13.818714 +epoch: 2, batch: 17928, sum loss: 4262.516602, avg loss: 2.400066, ppl: 11.023900 +epoch: 2, batch: 17929, sum loss: 4375.911621, avg loss: 2.496242, ppl: 12.136796 +epoch: 2, batch: 17930, sum loss: 3566.342285, avg loss: 2.451094, ppl: 11.601036 +epoch: 2, batch: 17931, sum loss: 4899.419922, avg loss: 2.471957, ppl: 11.845612 +epoch: 2, batch: 17932, sum loss: 4590.141113, avg loss: 2.642568, ppl: 14.049241 +epoch: 2, batch: 17933, sum loss: 3468.068848, avg loss: 2.283126, ppl: 9.807294 +epoch: 2, batch: 17934, sum loss: 3974.760498, avg loss: 2.338094, ppl: 10.361474 +epoch: 2, batch: 17935, sum loss: 3596.338135, avg loss: 2.375389, ppl: 10.755192 +epoch: 2, batch: 17936, sum loss: 4322.867188, avg loss: 2.293298, ppl: 9.907561 +epoch: 2, batch: 17937, sum loss: 3798.197754, avg loss: 2.542301, ppl: 12.708879 +epoch: 2, batch: 17938, sum loss: 3900.794189, avg loss: 2.529698, ppl: 12.549714 +epoch: 2, batch: 17939, sum loss: 4083.559814, avg loss: 2.563440, ppl: 12.980392 +epoch: 2, batch: 17940, sum loss: 4399.255859, avg loss: 2.550293, ppl: 12.810863 +epoch: 2, batch: 17941, sum loss: 3941.021484, avg loss: 2.307389, ppl: 10.048160 +epoch: 2, batch: 17942, sum loss: 4429.620605, avg loss: 2.400878, ppl: 11.032864 +epoch: 2, batch: 17943, sum loss: 4275.717773, avg loss: 2.426628, ppl: 11.320641 +epoch: 2, batch: 17944, sum loss: 3154.588379, avg loss: 2.258116, ppl: 9.565054 +epoch: 2, batch: 17945, sum loss: 4996.996094, avg loss: 2.602602, ppl: 13.498819 +epoch: 2, batch: 17946, sum loss: 4209.429688, avg loss: 2.576150, ppl: 13.146432 +epoch: 2, batch: 17947, sum loss: 4259.182617, avg loss: 2.663654, ppl: 14.348621 +epoch: 2, batch: 17948, sum loss: 4608.527344, avg loss: 2.605160, ppl: 13.533387 +epoch: 2, batch: 17949, sum loss: 4255.868652, avg loss: 2.521249, ppl: 12.444134 +epoch: 2, batch: 17950, sum loss: 3771.679199, avg loss: 2.438060, ppl: 11.450808 +epoch: 2, batch: 17951, sum loss: 4825.976074, avg loss: 2.718860, ppl: 15.163022 +epoch: 2, batch: 17952, sum loss: 4013.011230, avg loss: 2.424780, ppl: 11.299747 +epoch: 2, batch: 17953, sum loss: 3800.924805, avg loss: 2.343357, ppl: 10.416143 +epoch: 2, batch: 17954, sum loss: 4083.430664, avg loss: 2.639580, ppl: 14.007323 +epoch: 2, batch: 17955, sum loss: 3438.665039, avg loss: 2.122633, ppl: 8.353100 +epoch: 2, batch: 17956, sum loss: 5092.315430, avg loss: 2.614125, ppl: 13.655263 +epoch: 2, batch: 17957, sum loss: 4079.329590, avg loss: 2.565616, ppl: 13.008671 +epoch: 2, batch: 17958, sum loss: 3938.156250, avg loss: 2.527700, ppl: 12.524666 +epoch: 2, batch: 17959, sum loss: 4266.498535, avg loss: 2.607884, ppl: 13.570312 +epoch: 2, batch: 17960, sum loss: 3751.020020, avg loss: 2.500680, ppl: 12.190781 +epoch: 2, batch: 17961, sum loss: 4054.978027, avg loss: 2.493836, ppl: 12.107636 +epoch: 2, batch: 17962, sum loss: 4225.837891, avg loss: 2.465483, ppl: 11.769165 +epoch: 2, batch: 17963, sum loss: 4356.622070, avg loss: 2.659720, ppl: 14.292290 +epoch: 2, batch: 17964, sum loss: 3481.683105, avg loss: 2.278589, ppl: 9.762891 +epoch: 2, batch: 17965, sum loss: 3991.134033, avg loss: 2.550245, ppl: 12.810246 +epoch: 2, batch: 17966, sum loss: 4099.932617, avg loss: 2.478799, ppl: 11.926928 +epoch: 2, batch: 17967, sum loss: 4057.441650, avg loss: 2.297532, ppl: 9.949597 +epoch: 2, batch: 17968, sum loss: 4588.215332, avg loss: 2.839242, ppl: 17.102800 +epoch: 2, batch: 17969, sum loss: 4223.875000, avg loss: 2.651522, ppl: 14.175603 +epoch: 2, batch: 17970, sum loss: 4580.964844, avg loss: 2.469523, ppl: 11.816806 +epoch: 2, batch: 17971, sum loss: 4687.363770, avg loss: 2.602645, ppl: 13.499395 +epoch: 2, batch: 17972, sum loss: 4018.266602, avg loss: 2.330781, ppl: 10.285974 +epoch: 2, batch: 17973, sum loss: 3944.342285, avg loss: 2.393412, ppl: 10.950790 +epoch: 2, batch: 17974, sum loss: 4272.770996, avg loss: 2.547866, ppl: 12.779800 +epoch: 2, batch: 17975, sum loss: 4852.577637, avg loss: 2.755581, ppl: 15.730172 +epoch: 2, batch: 17976, sum loss: 4364.575195, avg loss: 2.546427, ppl: 12.761419 +epoch: 2, batch: 17977, sum loss: 3975.391113, avg loss: 2.489287, ppl: 12.052678 +epoch: 2, batch: 17978, sum loss: 4548.882812, avg loss: 3.006532, ppl: 20.217165 +epoch: 2, batch: 17979, sum loss: 3738.603516, avg loss: 2.274090, ppl: 9.719069 +epoch: 2, batch: 17980, sum loss: 3875.781738, avg loss: 2.451475, ppl: 11.605449 +epoch: 2, batch: 17981, sum loss: 3962.057129, avg loss: 2.468571, ppl: 11.805570 +epoch: 2, batch: 17982, sum loss: 4691.900879, avg loss: 2.538907, ppl: 12.665823 +epoch: 2, batch: 17983, sum loss: 4597.007812, avg loss: 2.723346, ppl: 15.231200 +epoch: 2, batch: 17984, sum loss: 3234.320312, avg loss: 2.244497, ppl: 9.435671 +epoch: 2, batch: 17985, sum loss: 3674.616211, avg loss: 2.435133, ppl: 11.417342 +epoch: 2, batch: 17986, sum loss: 3695.548340, avg loss: 2.318412, ppl: 10.159527 +epoch: 2, batch: 17987, sum loss: 3947.787842, avg loss: 2.371044, ppl: 10.708566 +epoch: 2, batch: 17988, sum loss: 4193.204590, avg loss: 2.530600, ppl: 12.561042 +epoch: 2, batch: 17989, sum loss: 3895.184570, avg loss: 2.319943, ppl: 10.175097 +epoch: 2, batch: 17990, sum loss: 4778.734375, avg loss: 2.699850, ppl: 14.877501 +epoch: 2, batch: 17991, sum loss: 4623.760254, avg loss: 2.729492, ppl: 15.325107 +epoch: 2, batch: 17992, sum loss: 4387.258789, avg loss: 2.415891, ppl: 11.199749 +epoch: 2, batch: 17993, sum loss: 4589.469727, avg loss: 2.665197, ppl: 14.370782 +epoch: 2, batch: 17994, sum loss: 4150.726562, avg loss: 2.350355, ppl: 10.489289 +epoch: 2, batch: 17995, sum loss: 3746.984131, avg loss: 2.403454, ppl: 11.061312 +epoch: 2, batch: 17996, sum loss: 3883.320801, avg loss: 2.465600, ppl: 11.770548 +epoch: 2, batch: 17997, sum loss: 4728.061523, avg loss: 2.800984, ppl: 16.460842 +epoch: 2, batch: 17998, sum loss: 4207.899414, avg loss: 2.669987, ppl: 14.439781 +epoch: 2, batch: 17999, sum loss: 4441.728516, avg loss: 2.495353, ppl: 12.126016 +epoch: 2, batch: 18000, sum loss: 4028.534912, avg loss: 2.536861, ppl: 12.639931 +epoch: 2, batch: 18001, sum loss: 5353.489746, avg loss: 2.941478, ppl: 18.943825 +epoch: 2, batch: 18002, sum loss: 3879.297119, avg loss: 2.359670, ppl: 10.587454 +epoch: 2, batch: 18003, sum loss: 3258.524902, avg loss: 2.440843, ppl: 11.482713 +epoch: 2, batch: 18004, sum loss: 3417.205078, avg loss: 2.346982, ppl: 10.453967 +epoch: 2, batch: 18005, sum loss: 3879.983643, avg loss: 2.581493, ppl: 13.216852 +epoch: 2, batch: 18006, sum loss: 3845.704346, avg loss: 2.587957, ppl: 13.302568 +epoch: 2, batch: 18007, sum loss: 3022.495361, avg loss: 2.176023, ppl: 8.811191 +epoch: 2, batch: 18008, sum loss: 4096.082520, avg loss: 2.281940, ppl: 9.795668 +epoch: 2, batch: 18009, sum loss: 5604.876953, avg loss: 2.747488, ppl: 15.603395 +epoch: 2, batch: 18010, sum loss: 3939.501953, avg loss: 2.473008, ppl: 11.858064 +epoch: 2, batch: 18011, sum loss: 4410.212402, avg loss: 2.611138, ppl: 13.614540 +epoch: 2, batch: 18012, sum loss: 5514.182617, avg loss: 2.890033, ppl: 17.993898 +epoch: 2, batch: 18013, sum loss: 3866.463135, avg loss: 2.560572, ppl: 12.943214 +epoch: 2, batch: 18014, sum loss: 4933.709961, avg loss: 2.606292, ppl: 13.548713 +epoch: 2, batch: 18015, sum loss: 3924.709961, avg loss: 2.171948, ppl: 8.775359 +epoch: 2, batch: 18016, sum loss: 3993.514160, avg loss: 2.421779, ppl: 11.265888 +epoch: 2, batch: 18017, sum loss: 3938.608887, avg loss: 2.311390, ppl: 10.088440 +epoch: 2, batch: 18018, sum loss: 3829.169434, avg loss: 2.232752, ppl: 9.325493 +epoch: 2, batch: 18019, sum loss: 4616.098145, avg loss: 2.782458, ppl: 16.158691 +epoch: 2, batch: 18020, sum loss: 4173.287598, avg loss: 2.450551, ppl: 11.594728 +epoch: 2, batch: 18021, sum loss: 3661.696777, avg loss: 2.402688, ppl: 11.052850 +epoch: 2, batch: 18022, sum loss: 3153.144531, avg loss: 2.260319, ppl: 9.586144 +epoch: 2, batch: 18023, sum loss: 3490.432129, avg loss: 2.390707, ppl: 10.921213 +epoch: 2, batch: 18024, sum loss: 4747.905273, avg loss: 2.624602, ppl: 13.799086 +epoch: 2, batch: 18025, sum loss: 4286.103027, avg loss: 2.447803, ppl: 11.562915 +epoch: 2, batch: 18026, sum loss: 4969.237305, avg loss: 2.513524, ppl: 12.348370 +epoch: 2, batch: 18027, sum loss: 4098.169922, avg loss: 2.542289, ppl: 12.708728 +epoch: 2, batch: 18028, sum loss: 4664.500000, avg loss: 2.575649, ppl: 13.139842 +epoch: 2, batch: 18029, sum loss: 4924.753906, avg loss: 2.660591, ppl: 14.304743 +epoch: 2, batch: 18030, sum loss: 3347.875000, avg loss: 2.161314, ppl: 8.682537 +epoch: 2, batch: 18031, sum loss: 3524.077881, avg loss: 2.266288, ppl: 9.643538 +epoch: 2, batch: 18032, sum loss: 3705.597900, avg loss: 2.087661, ppl: 8.066025 +epoch: 2, batch: 18033, sum loss: 4656.808594, avg loss: 2.647418, ppl: 14.117544 +epoch: 2, batch: 18034, sum loss: 5061.754395, avg loss: 2.788846, ppl: 16.262236 +epoch: 2, batch: 18035, sum loss: 4217.617676, avg loss: 2.503037, ppl: 12.219551 +epoch: 2, batch: 18036, sum loss: 3953.457031, avg loss: 2.394583, ppl: 10.963631 +epoch: 2, batch: 18037, sum loss: 3728.609863, avg loss: 2.283288, ppl: 9.808882 +epoch: 2, batch: 18038, sum loss: 4075.119141, avg loss: 2.380327, ppl: 10.808432 +epoch: 2, batch: 18039, sum loss: 3359.387207, avg loss: 2.315222, ppl: 10.127172 +epoch: 2, batch: 18040, sum loss: 4662.243652, avg loss: 2.791763, ppl: 16.309746 +epoch: 2, batch: 18041, sum loss: 3712.151855, avg loss: 2.426243, ppl: 11.316288 +epoch: 2, batch: 18042, sum loss: 4831.281738, avg loss: 2.717257, ppl: 15.138733 +epoch: 2, batch: 18043, sum loss: 3729.305664, avg loss: 2.511317, ppl: 12.321143 +epoch: 2, batch: 18044, sum loss: 4839.122559, avg loss: 2.568537, ppl: 13.046717 +epoch: 2, batch: 18045, sum loss: 4066.769043, avg loss: 2.536974, ppl: 12.641356 +epoch: 2, batch: 18046, sum loss: 3130.347656, avg loss: 2.095279, ppl: 8.127712 +epoch: 2, batch: 18047, sum loss: 3569.471924, avg loss: 2.167257, ppl: 8.734289 +epoch: 2, batch: 18048, sum loss: 3666.918701, avg loss: 2.499604, ppl: 12.177665 +epoch: 2, batch: 18049, sum loss: 4529.473145, avg loss: 2.618192, ppl: 13.710917 +epoch: 2, batch: 18050, sum loss: 4330.651367, avg loss: 2.486022, ppl: 12.013397 +epoch: 2, batch: 18051, sum loss: 4365.994141, avg loss: 2.514974, ppl: 12.366282 +epoch: 2, batch: 18052, sum loss: 4052.839355, avg loss: 2.556996, ppl: 12.897021 +epoch: 2, batch: 18053, sum loss: 4548.604492, avg loss: 2.578574, ppl: 13.178331 +epoch: 2, batch: 18054, sum loss: 3677.032715, avg loss: 2.115669, ppl: 8.295134 +epoch: 2, batch: 18055, sum loss: 4281.643555, avg loss: 2.484993, ppl: 12.001041 +epoch: 2, batch: 18056, sum loss: 3771.373779, avg loss: 2.422205, ppl: 11.270689 +epoch: 2, batch: 18057, sum loss: 4013.329346, avg loss: 2.530473, ppl: 12.559440 +epoch: 2, batch: 18058, sum loss: 4171.405762, avg loss: 2.670554, ppl: 14.447977 +epoch: 2, batch: 18059, sum loss: 3405.205811, avg loss: 2.196907, ppl: 8.997143 +epoch: 2, batch: 18060, sum loss: 3933.579346, avg loss: 2.261978, ppl: 9.602062 +epoch: 2, batch: 18061, sum loss: 4445.953125, avg loss: 2.604542, ppl: 13.525029 +epoch: 2, batch: 18062, sum loss: 3752.671875, avg loss: 2.394813, ppl: 10.966147 +epoch: 2, batch: 18063, sum loss: 4758.148438, avg loss: 2.668620, ppl: 14.420051 +epoch: 2, batch: 18064, sum loss: 3713.737793, avg loss: 2.536706, ppl: 12.637973 +epoch: 2, batch: 18065, sum loss: 4938.390137, avg loss: 2.666517, ppl: 14.389766 +epoch: 2, batch: 18066, sum loss: 4461.052246, avg loss: 2.677702, ppl: 14.551621 +epoch: 2, batch: 18067, sum loss: 3509.727539, avg loss: 2.157177, ppl: 8.646695 +epoch: 2, batch: 18068, sum loss: 4783.566406, avg loss: 2.496642, ppl: 12.141655 +epoch: 2, batch: 18069, sum loss: 4659.581543, avg loss: 2.610410, ppl: 13.604628 +epoch: 2, batch: 18070, sum loss: 4299.289062, avg loss: 2.514204, ppl: 12.356770 +epoch: 2, batch: 18071, sum loss: 4594.149414, avg loss: 2.592635, ppl: 13.364944 +epoch: 2, batch: 18072, sum loss: 4334.611328, avg loss: 2.322943, ppl: 10.205663 +epoch: 2, batch: 18073, sum loss: 3992.983887, avg loss: 2.325558, ppl: 10.232392 +epoch: 2, batch: 18074, sum loss: 4382.522461, avg loss: 2.242847, ppl: 9.420110 +epoch: 2, batch: 18075, sum loss: 3967.196533, avg loss: 2.488831, ppl: 12.047182 +epoch: 2, batch: 18076, sum loss: 5208.181641, avg loss: 2.605394, ppl: 13.536552 +epoch: 2, batch: 18077, sum loss: 3959.157227, avg loss: 2.577576, ppl: 13.165193 +epoch: 2, batch: 18078, sum loss: 4132.833008, avg loss: 2.498690, ppl: 12.166544 +epoch: 2, batch: 18079, sum loss: 5444.936523, avg loss: 2.613988, ppl: 13.653391 +epoch: 2, batch: 18080, sum loss: 3581.849121, avg loss: 2.373657, ppl: 10.736589 +epoch: 2, batch: 18081, sum loss: 4012.379639, avg loss: 2.299358, ppl: 9.967780 +epoch: 2, batch: 18082, sum loss: 4469.245117, avg loss: 2.583379, ppl: 13.241801 +epoch: 2, batch: 18083, sum loss: 4371.562012, avg loss: 2.628721, ppl: 13.856030 +epoch: 2, batch: 18084, sum loss: 4266.208008, avg loss: 2.443418, ppl: 11.512325 +epoch: 2, batch: 18085, sum loss: 3157.168457, avg loss: 2.214003, ppl: 9.152281 +epoch: 2, batch: 18086, sum loss: 4230.395508, avg loss: 2.343709, ppl: 10.419817 +epoch: 2, batch: 18087, sum loss: 3191.546143, avg loss: 2.265114, ppl: 9.632226 +epoch: 2, batch: 18088, sum loss: 3060.244385, avg loss: 2.201615, ppl: 9.039598 +epoch: 2, batch: 18089, sum loss: 4089.336670, avg loss: 2.437030, ppl: 11.439017 +epoch: 2, batch: 18090, sum loss: 5352.955078, avg loss: 2.753578, ppl: 15.698701 +epoch: 2, batch: 18091, sum loss: 3778.087158, avg loss: 2.365740, ppl: 10.651921 +epoch: 2, batch: 18092, sum loss: 3681.701416, avg loss: 2.330191, ppl: 10.279903 +epoch: 2, batch: 18093, sum loss: 4314.669434, avg loss: 2.621306, ppl: 13.753674 +epoch: 2, batch: 18094, sum loss: 4056.373779, avg loss: 2.546374, ppl: 12.760751 +epoch: 2, batch: 18095, sum loss: 4005.669434, avg loss: 2.373027, ppl: 10.729823 +epoch: 2, batch: 18096, sum loss: 3948.460449, avg loss: 2.601094, ppl: 13.478473 +epoch: 2, batch: 18097, sum loss: 3409.501709, avg loss: 2.372653, ppl: 10.725805 +epoch: 2, batch: 18098, sum loss: 4398.974609, avg loss: 2.723823, ppl: 15.238472 +epoch: 2, batch: 18099, sum loss: 4145.416992, avg loss: 2.468980, ppl: 11.810390 +epoch: 2, batch: 18100, sum loss: 4100.691895, avg loss: 2.497376, ppl: 12.150575 +epoch: 2, batch: 18101, sum loss: 4152.701172, avg loss: 2.501627, ppl: 12.202333 +epoch: 2, batch: 18102, sum loss: 4987.956543, avg loss: 2.786568, ppl: 16.225239 +epoch: 2, batch: 18103, sum loss: 3904.532227, avg loss: 2.558671, ppl: 12.918640 +epoch: 2, batch: 18104, sum loss: 3499.759277, avg loss: 2.305507, ppl: 10.029259 +epoch: 2, batch: 18105, sum loss: 4306.169922, avg loss: 2.630525, ppl: 13.881061 +epoch: 2, batch: 18106, sum loss: 3711.243896, avg loss: 2.383586, ppl: 10.843723 +epoch: 2, batch: 18107, sum loss: 4383.034668, avg loss: 2.382084, ppl: 10.827442 +epoch: 2, batch: 18108, sum loss: 4054.817383, avg loss: 2.460448, ppl: 11.710052 +epoch: 2, batch: 18109, sum loss: 4620.870605, avg loss: 2.459218, ppl: 11.695662 +epoch: 2, batch: 18110, sum loss: 3694.352539, avg loss: 2.339679, ppl: 10.377900 +epoch: 2, batch: 18111, sum loss: 4936.650391, avg loss: 2.779645, ppl: 16.113306 +epoch: 2, batch: 18112, sum loss: 3608.056152, avg loss: 2.571672, ppl: 13.087691 +epoch: 2, batch: 18113, sum loss: 4365.080566, avg loss: 2.655159, ppl: 14.227242 +epoch: 2, batch: 18114, sum loss: 3881.477051, avg loss: 2.615551, ppl: 13.674743 +epoch: 2, batch: 18115, sum loss: 4412.426758, avg loss: 2.439153, ppl: 11.463324 +epoch: 2, batch: 18116, sum loss: 4965.895508, avg loss: 2.718060, ppl: 15.150901 +epoch: 2, batch: 18117, sum loss: 3357.859863, avg loss: 2.384844, ppl: 10.857367 +epoch: 2, batch: 18118, sum loss: 3758.744141, avg loss: 2.289126, ppl: 9.866307 +epoch: 2, batch: 18119, sum loss: 4102.426270, avg loss: 2.580142, ppl: 13.199016 +epoch: 2, batch: 18120, sum loss: 4586.137207, avg loss: 2.724977, ppl: 15.256070 +epoch: 2, batch: 18121, sum loss: 3839.543457, avg loss: 2.420897, ppl: 11.255957 +epoch: 2, batch: 18122, sum loss: 4487.829590, avg loss: 2.610721, ppl: 13.608861 +epoch: 2, batch: 18123, sum loss: 3326.157471, avg loss: 2.305029, ppl: 10.024473 +epoch: 2, batch: 18124, sum loss: 4109.836914, avg loss: 2.383896, ppl: 10.847085 +epoch: 2, batch: 18125, sum loss: 4118.618652, avg loss: 2.258015, ppl: 9.564082 +epoch: 2, batch: 18126, sum loss: 3700.392578, avg loss: 2.613272, ppl: 13.643616 +epoch: 2, batch: 18127, sum loss: 4490.781250, avg loss: 2.677866, ppl: 14.554002 +epoch: 2, batch: 18128, sum loss: 4107.195801, avg loss: 2.452057, ppl: 11.612213 +epoch: 2, batch: 18129, sum loss: 4145.684570, avg loss: 2.479476, ppl: 11.935014 +epoch: 2, batch: 18130, sum loss: 4372.470215, avg loss: 2.893759, ppl: 18.061079 +epoch: 2, batch: 18131, sum loss: 3775.689209, avg loss: 2.490560, ppl: 12.068036 +epoch: 2, batch: 18132, sum loss: 4790.598633, avg loss: 2.668857, ppl: 14.423478 +epoch: 2, batch: 18133, sum loss: 3887.575195, avg loss: 2.410152, ppl: 11.135653 +epoch: 2, batch: 18134, sum loss: 4488.046875, avg loss: 2.429912, ppl: 11.357881 +epoch: 2, batch: 18135, sum loss: 4496.275879, avg loss: 2.547465, ppl: 12.774680 +epoch: 2, batch: 18136, sum loss: 4410.781250, avg loss: 2.405006, ppl: 11.078496 +epoch: 2, batch: 18137, sum loss: 4424.002930, avg loss: 2.334566, ppl: 10.324979 +epoch: 2, batch: 18138, sum loss: 3287.952148, avg loss: 2.350216, ppl: 10.487837 +epoch: 2, batch: 18139, sum loss: 4127.110840, avg loss: 2.468368, ppl: 11.803166 +epoch: 2, batch: 18140, sum loss: 3708.629395, avg loss: 2.332471, ppl: 10.303373 +epoch: 2, batch: 18141, sum loss: 3847.022461, avg loss: 2.447215, ppl: 11.556122 +epoch: 2, batch: 18142, sum loss: 4046.607910, avg loss: 2.473477, ppl: 11.863624 +epoch: 2, batch: 18143, sum loss: 3753.174805, avg loss: 2.334064, ppl: 10.319794 +epoch: 2, batch: 18144, sum loss: 4202.983398, avg loss: 2.503266, ppl: 12.222345 +epoch: 2, batch: 18145, sum loss: 3859.610596, avg loss: 2.439703, ppl: 11.469636 +epoch: 2, batch: 18146, sum loss: 5005.416992, avg loss: 2.717382, ppl: 15.140628 +epoch: 2, batch: 18147, sum loss: 4429.114258, avg loss: 2.415002, ppl: 11.189794 +epoch: 2, batch: 18148, sum loss: 4623.652344, avg loss: 2.702310, ppl: 14.914145 +epoch: 2, batch: 18149, sum loss: 5100.956055, avg loss: 2.733631, ppl: 15.388664 +epoch: 2, batch: 18150, sum loss: 4288.721191, avg loss: 2.627893, ppl: 13.844568 +epoch: 2, batch: 18151, sum loss: 4449.962891, avg loss: 2.795203, ppl: 16.365946 +epoch: 2, batch: 18152, sum loss: 4918.489258, avg loss: 2.758547, ppl: 15.776903 +epoch: 2, batch: 18153, sum loss: 4279.519043, avg loss: 2.455261, ppl: 11.649468 +epoch: 2, batch: 18154, sum loss: 4358.433105, avg loss: 2.473572, ppl: 11.864750 +epoch: 2, batch: 18155, sum loss: 4940.464844, avg loss: 2.616772, ppl: 13.691452 +epoch: 2, batch: 18156, sum loss: 3716.453613, avg loss: 2.424301, ppl: 11.294333 +epoch: 2, batch: 18157, sum loss: 4720.278320, avg loss: 2.722190, ppl: 15.213610 +epoch: 2, batch: 18158, sum loss: 4908.990234, avg loss: 2.507145, ppl: 12.269852 +epoch: 2, batch: 18159, sum loss: 3677.899170, avg loss: 2.176272, ppl: 8.813386 +epoch: 2, batch: 18160, sum loss: 4663.318359, avg loss: 2.535790, ppl: 12.626408 +epoch: 2, batch: 18161, sum loss: 4223.143555, avg loss: 2.582962, ppl: 13.236290 +epoch: 2, batch: 18162, sum loss: 4186.627930, avg loss: 2.464172, ppl: 11.753745 +epoch: 2, batch: 18163, sum loss: 3707.550049, avg loss: 2.416917, ppl: 11.211237 +epoch: 2, batch: 18164, sum loss: 4465.183105, avg loss: 2.457448, ppl: 11.674979 +epoch: 2, batch: 18165, sum loss: 3656.532471, avg loss: 2.288193, ppl: 9.857110 +epoch: 2, batch: 18166, sum loss: 3144.956543, avg loss: 1.965598, ppl: 7.139179 +epoch: 2, batch: 18167, sum loss: 3533.700195, avg loss: 2.278337, ppl: 9.760430 +epoch: 2, batch: 18168, sum loss: 4204.399902, avg loss: 2.513090, ppl: 12.343016 +epoch: 2, batch: 18169, sum loss: 5015.099121, avg loss: 2.620219, ppl: 13.738732 +epoch: 2, batch: 18170, sum loss: 4125.036621, avg loss: 2.455379, ppl: 11.650848 +epoch: 2, batch: 18171, sum loss: 3638.828613, avg loss: 2.277114, ppl: 9.748509 +epoch: 2, batch: 18172, sum loss: 4992.536621, avg loss: 2.681277, ppl: 14.603724 +epoch: 2, batch: 18173, sum loss: 5284.983398, avg loss: 2.543303, ppl: 12.721619 +epoch: 2, batch: 18174, sum loss: 4735.830078, avg loss: 2.593554, ppl: 13.377234 +epoch: 2, batch: 18175, sum loss: 4199.182617, avg loss: 2.359091, ppl: 10.581331 +epoch: 2, batch: 18176, sum loss: 4524.167969, avg loss: 2.483078, ppl: 11.978076 +epoch: 2, batch: 18177, sum loss: 4486.373535, avg loss: 2.675238, ppl: 14.515800 +epoch: 2, batch: 18178, sum loss: 3917.889160, avg loss: 2.522788, ppl: 12.463294 +epoch: 2, batch: 18179, sum loss: 4076.482178, avg loss: 2.383908, ppl: 10.847209 +epoch: 2, batch: 18180, sum loss: 4330.774414, avg loss: 2.401983, ppl: 11.045052 +epoch: 2, batch: 18181, sum loss: 4603.814941, avg loss: 2.584961, ppl: 13.262768 +epoch: 2, batch: 18182, sum loss: 4019.409424, avg loss: 2.391082, ppl: 10.925312 +epoch: 2, batch: 18183, sum loss: 4467.220215, avg loss: 2.854454, ppl: 17.364950 +epoch: 2, batch: 18184, sum loss: 4329.090332, avg loss: 2.634869, ppl: 13.941491 +epoch: 2, batch: 18185, sum loss: 3754.500000, avg loss: 2.531693, ppl: 12.574771 +epoch: 2, batch: 18186, sum loss: 4519.561035, avg loss: 2.620035, ppl: 13.736210 +epoch: 2, batch: 18187, sum loss: 4455.890137, avg loss: 2.516031, ppl: 12.379360 +epoch: 2, batch: 18188, sum loss: 4383.167969, avg loss: 2.645243, ppl: 14.086870 +epoch: 2, batch: 18189, sum loss: 3921.053711, avg loss: 2.640440, ppl: 14.019371 +epoch: 2, batch: 18190, sum loss: 3942.207764, avg loss: 2.433462, ppl: 11.398271 +epoch: 2, batch: 18191, sum loss: 3480.338867, avg loss: 2.254106, ppl: 9.526769 +epoch: 2, batch: 18192, sum loss: 4283.044434, avg loss: 2.581702, ppl: 13.219625 +epoch: 2, batch: 18193, sum loss: 4004.878906, avg loss: 2.291121, ppl: 9.886011 +epoch: 2, batch: 18194, sum loss: 4398.119141, avg loss: 2.483410, ppl: 11.982052 +epoch: 2, batch: 18195, sum loss: 3643.332031, avg loss: 2.167360, ppl: 8.735191 +epoch: 2, batch: 18196, sum loss: 4432.735840, avg loss: 2.680010, ppl: 14.585234 +epoch: 2, batch: 18197, sum loss: 4205.997559, avg loss: 2.594693, ppl: 13.392474 +epoch: 2, batch: 18198, sum loss: 4330.472168, avg loss: 2.577662, ppl: 13.166319 +epoch: 2, batch: 18199, sum loss: 3809.644531, avg loss: 2.549963, ppl: 12.806627 +epoch: 2, batch: 18200, sum loss: 3932.788574, avg loss: 2.604496, ppl: 13.524405 +epoch: 2, batch: 18201, sum loss: 5126.907227, avg loss: 2.706920, ppl: 14.983063 +epoch: 2, batch: 18202, sum loss: 4225.726562, avg loss: 2.554853, ppl: 12.869408 +epoch: 2, batch: 18203, sum loss: 4220.833984, avg loss: 2.542671, ppl: 12.713586 +epoch: 2, batch: 18204, sum loss: 5634.024414, avg loss: 2.713885, ppl: 15.087771 +epoch: 2, batch: 18205, sum loss: 3852.153809, avg loss: 2.427318, ppl: 11.328463 +epoch: 2, batch: 18206, sum loss: 3242.224609, avg loss: 2.351142, ppl: 10.497548 +epoch: 2, batch: 18207, sum loss: 4149.754395, avg loss: 2.598469, ppl: 13.443138 +epoch: 2, batch: 18208, sum loss: 3983.130859, avg loss: 2.446640, ppl: 11.549480 +epoch: 2, batch: 18209, sum loss: 3616.485840, avg loss: 2.379267, ppl: 10.796988 +epoch: 2, batch: 18210, sum loss: 5103.412109, avg loss: 2.584006, ppl: 13.250113 +epoch: 2, batch: 18211, sum loss: 4158.265625, avg loss: 2.508001, ppl: 12.280358 +epoch: 2, batch: 18212, sum loss: 4974.529297, avg loss: 2.547122, ppl: 12.770298 +epoch: 2, batch: 18213, sum loss: 4293.833008, avg loss: 2.378855, ppl: 10.792536 +epoch: 2, batch: 18214, sum loss: 3924.380615, avg loss: 2.410553, ppl: 11.140122 +epoch: 2, batch: 18215, sum loss: 4682.612793, avg loss: 2.834512, ppl: 17.022083 +epoch: 2, batch: 18216, sum loss: 3027.213867, avg loss: 2.240721, ppl: 9.400104 +epoch: 2, batch: 18217, sum loss: 4017.577881, avg loss: 2.394266, ppl: 10.960147 +epoch: 2, batch: 18218, sum loss: 3644.599854, avg loss: 2.382092, ppl: 10.827525 +epoch: 2, batch: 18219, sum loss: 3545.639893, avg loss: 2.460541, ppl: 11.711148 +epoch: 2, batch: 18220, sum loss: 4147.740723, avg loss: 2.426999, ppl: 11.324841 +epoch: 2, batch: 18221, sum loss: 5004.975098, avg loss: 2.665056, ppl: 14.368754 +epoch: 2, batch: 18222, sum loss: 3890.919189, avg loss: 2.483037, ppl: 11.977588 +epoch: 2, batch: 18223, sum loss: 4092.326172, avg loss: 2.487736, ppl: 12.034003 +epoch: 2, batch: 18224, sum loss: 4068.292725, avg loss: 2.282993, ppl: 9.805982 +epoch: 2, batch: 18225, sum loss: 4557.304199, avg loss: 2.631238, ppl: 13.890956 +epoch: 2, batch: 18226, sum loss: 3509.057373, avg loss: 2.186328, ppl: 8.902467 +epoch: 2, batch: 18227, sum loss: 4509.726562, avg loss: 2.706919, ppl: 14.983037 +epoch: 2, batch: 18228, sum loss: 4016.554932, avg loss: 2.286030, ppl: 9.835813 +epoch: 2, batch: 18229, sum loss: 3418.759277, avg loss: 2.222860, ppl: 9.233705 +epoch: 2, batch: 18230, sum loss: 4039.472412, avg loss: 2.410187, ppl: 11.136038 +epoch: 2, batch: 18231, sum loss: 3847.122314, avg loss: 2.556228, ppl: 12.887112 +epoch: 2, batch: 18232, sum loss: 5278.461914, avg loss: 2.649831, ppl: 14.151641 +epoch: 2, batch: 18233, sum loss: 4499.426758, avg loss: 2.314520, ppl: 10.120066 +epoch: 2, batch: 18234, sum loss: 4228.626465, avg loss: 2.555061, ppl: 12.872089 +epoch: 2, batch: 18235, sum loss: 5063.495117, avg loss: 2.649657, ppl: 14.149188 +epoch: 2, batch: 18236, sum loss: 3876.345703, avg loss: 2.193744, ppl: 8.968731 +epoch: 2, batch: 18237, sum loss: 4761.557617, avg loss: 2.651201, ppl: 14.171055 +epoch: 2, batch: 18238, sum loss: 3297.278320, avg loss: 2.330232, ppl: 10.280325 +epoch: 2, batch: 18239, sum loss: 4229.901367, avg loss: 2.836956, ppl: 17.063745 +epoch: 2, batch: 18240, sum loss: 5146.151367, avg loss: 2.774206, ppl: 16.025892 +epoch: 2, batch: 18241, sum loss: 4169.657227, avg loss: 2.501294, ppl: 12.198267 +epoch: 2, batch: 18242, sum loss: 3984.164795, avg loss: 2.321774, ppl: 10.193747 +epoch: 2, batch: 18243, sum loss: 4582.266113, avg loss: 2.502603, ppl: 12.214247 +epoch: 2, batch: 18244, sum loss: 4254.068359, avg loss: 2.473295, ppl: 11.861471 +epoch: 2, batch: 18245, sum loss: 4760.081055, avg loss: 2.503988, ppl: 12.231175 +epoch: 2, batch: 18246, sum loss: 3444.083984, avg loss: 2.243703, ppl: 9.428178 +epoch: 2, batch: 18247, sum loss: 3238.567871, avg loss: 2.272679, ppl: 9.705368 +epoch: 2, batch: 18248, sum loss: 3847.126953, avg loss: 2.433350, ppl: 11.397002 +epoch: 2, batch: 18249, sum loss: 4330.939453, avg loss: 2.544618, ppl: 12.738357 +epoch: 2, batch: 18250, sum loss: 4339.138672, avg loss: 2.506724, ppl: 12.264681 +epoch: 2, batch: 18251, sum loss: 4118.084961, avg loss: 2.358582, ppl: 10.575950 +epoch: 2, batch: 18252, sum loss: 4071.744873, avg loss: 2.678780, ppl: 14.567305 +epoch: 2, batch: 18253, sum loss: 3997.695557, avg loss: 2.490776, ppl: 12.070640 +epoch: 2, batch: 18254, sum loss: 3967.384766, avg loss: 2.495210, ppl: 12.124285 +epoch: 2, batch: 18255, sum loss: 3902.409668, avg loss: 2.345198, ppl: 10.435340 +epoch: 2, batch: 18256, sum loss: 4315.994629, avg loss: 2.375341, ppl: 10.754680 +epoch: 2, batch: 18257, sum loss: 4896.331055, avg loss: 2.656718, ppl: 14.249442 +epoch: 2, batch: 18258, sum loss: 4151.784668, avg loss: 2.307829, ppl: 10.052578 +epoch: 2, batch: 18259, sum loss: 3744.429199, avg loss: 2.302847, ppl: 10.002619 +epoch: 2, batch: 18260, sum loss: 4371.292480, avg loss: 2.625401, ppl: 13.810111 +epoch: 2, batch: 18261, sum loss: 3560.092041, avg loss: 2.285040, ppl: 9.826078 +epoch: 2, batch: 18262, sum loss: 4222.641113, avg loss: 2.426805, ppl: 11.322651 +epoch: 2, batch: 18263, sum loss: 4000.019287, avg loss: 2.386647, ppl: 10.876957 +epoch: 2, batch: 18264, sum loss: 4466.143555, avg loss: 2.502041, ppl: 12.207388 +epoch: 2, batch: 18265, sum loss: 4498.689453, avg loss: 2.497884, ppl: 12.156743 +epoch: 2, batch: 18266, sum loss: 4274.639648, avg loss: 2.475182, ppl: 11.883873 +epoch: 2, batch: 18267, sum loss: 5085.642090, avg loss: 2.849099, ppl: 17.272219 +epoch: 2, batch: 18268, sum loss: 4977.077148, avg loss: 2.660116, ppl: 14.297951 +epoch: 2, batch: 18269, sum loss: 4024.633057, avg loss: 2.448074, ppl: 11.566045 +epoch: 2, batch: 18270, sum loss: 3539.120605, avg loss: 2.257092, ppl: 9.555264 +epoch: 2, batch: 18271, sum loss: 3199.191895, avg loss: 2.197247, ppl: 9.000204 +epoch: 2, batch: 18272, sum loss: 3988.358398, avg loss: 2.364172, ppl: 10.635232 +epoch: 2, batch: 18273, sum loss: 3960.678711, avg loss: 2.333930, ppl: 10.318411 +epoch: 2, batch: 18274, sum loss: 3768.875488, avg loss: 2.298095, ppl: 9.955197 +epoch: 2, batch: 18275, sum loss: 4097.227539, avg loss: 2.238922, ppl: 9.383212 +epoch: 2, batch: 18276, sum loss: 4111.668457, avg loss: 2.448879, ppl: 11.575366 +epoch: 2, batch: 18277, sum loss: 4598.448730, avg loss: 2.571839, ppl: 13.089879 +epoch: 2, batch: 18278, sum loss: 4056.086182, avg loss: 2.494518, ppl: 12.115890 +epoch: 2, batch: 18279, sum loss: 4991.713867, avg loss: 2.522341, ppl: 12.457732 +epoch: 2, batch: 18280, sum loss: 4498.005859, avg loss: 2.297245, ppl: 9.946744 +epoch: 2, batch: 18281, sum loss: 3886.206543, avg loss: 2.384176, ppl: 10.850116 +epoch: 2, batch: 18282, sum loss: 4239.930664, avg loss: 2.601184, ppl: 13.479693 +epoch: 2, batch: 18283, sum loss: 3507.492188, avg loss: 2.463127, ppl: 11.741466 +epoch: 2, batch: 18284, sum loss: 5067.541992, avg loss: 2.758597, ppl: 15.777686 +epoch: 2, batch: 18285, sum loss: 4449.404785, avg loss: 2.361680, ppl: 10.608760 +epoch: 2, batch: 18286, sum loss: 4128.353027, avg loss: 2.500517, ppl: 12.188792 +epoch: 2, batch: 18287, sum loss: 3632.479736, avg loss: 2.084039, ppl: 8.036862 +epoch: 2, batch: 18288, sum loss: 4199.567383, avg loss: 2.535971, ppl: 12.628683 +epoch: 2, batch: 18289, sum loss: 4448.167969, avg loss: 2.592173, ppl: 13.358764 +epoch: 2, batch: 18290, sum loss: 3337.533936, avg loss: 2.191421, ppl: 8.947917 +epoch: 2, batch: 18291, sum loss: 4100.782715, avg loss: 2.514275, ppl: 12.357647 +epoch: 2, batch: 18292, sum loss: 4161.163574, avg loss: 2.523447, ppl: 12.471506 +epoch: 2, batch: 18293, sum loss: 3362.910645, avg loss: 2.273773, ppl: 9.715992 +epoch: 2, batch: 18294, sum loss: 4851.681152, avg loss: 2.943981, ppl: 18.991308 +epoch: 2, batch: 18295, sum loss: 4109.974609, avg loss: 2.588145, ppl: 13.305071 +epoch: 2, batch: 18296, sum loss: 4082.976807, avg loss: 2.415963, ppl: 11.200548 +epoch: 2, batch: 18297, sum loss: 3796.004883, avg loss: 2.500662, ppl: 12.190562 +epoch: 2, batch: 18298, sum loss: 4426.327637, avg loss: 2.611403, ppl: 13.618140 +epoch: 2, batch: 18299, sum loss: 3713.843750, avg loss: 2.321152, ppl: 10.187406 +epoch: 2, batch: 18300, sum loss: 4071.396484, avg loss: 2.591596, ppl: 13.351058 +epoch: 2, batch: 18301, sum loss: 4255.746582, avg loss: 2.636770, ppl: 13.968014 +epoch: 2, batch: 18302, sum loss: 4295.501465, avg loss: 2.620806, ppl: 13.746802 +epoch: 2, batch: 18303, sum loss: 3240.290283, avg loss: 2.113692, ppl: 8.278752 +epoch: 2, batch: 18304, sum loss: 4400.577148, avg loss: 2.616277, ppl: 13.684673 +epoch: 2, batch: 18305, sum loss: 4320.784180, avg loss: 2.469019, ppl: 11.810860 +epoch: 2, batch: 18306, sum loss: 3183.102051, avg loss: 2.205892, ppl: 9.078344 +epoch: 2, batch: 18307, sum loss: 4642.130371, avg loss: 2.700483, ppl: 14.886922 +epoch: 2, batch: 18308, sum loss: 4156.308105, avg loss: 2.499283, ppl: 12.173766 +epoch: 2, batch: 18309, sum loss: 3917.110107, avg loss: 2.235794, ppl: 9.353902 +epoch: 2, batch: 18310, sum loss: 3644.876709, avg loss: 2.393222, ppl: 10.948712 +epoch: 2, batch: 18311, sum loss: 3773.595459, avg loss: 2.479367, ppl: 11.933702 +epoch: 2, batch: 18312, sum loss: 3797.191895, avg loss: 2.266980, ppl: 9.650217 +epoch: 2, batch: 18313, sum loss: 4158.006836, avg loss: 2.557200, ppl: 12.899644 +epoch: 2, batch: 18314, sum loss: 4967.756836, avg loss: 2.541052, ppl: 12.693018 +epoch: 2, batch: 18315, sum loss: 4258.872070, avg loss: 2.620844, ppl: 13.747327 +epoch: 2, batch: 18316, sum loss: 4314.636719, avg loss: 2.355151, ppl: 10.539722 +epoch: 2, batch: 18317, sum loss: 3389.974609, avg loss: 2.194159, ppl: 8.972448 +epoch: 2, batch: 18318, sum loss: 4587.669434, avg loss: 2.523471, ppl: 12.471807 +epoch: 2, batch: 18319, sum loss: 5420.211914, avg loss: 2.846750, ppl: 17.231688 +epoch: 2, batch: 18320, sum loss: 4443.906738, avg loss: 2.837744, ppl: 17.077192 +epoch: 2, batch: 18321, sum loss: 3231.506836, avg loss: 2.201299, ppl: 9.036744 +epoch: 2, batch: 18322, sum loss: 4467.107910, avg loss: 2.463932, ppl: 11.750923 +epoch: 2, batch: 18323, sum loss: 4207.523926, avg loss: 2.597237, ppl: 13.426591 +epoch: 2, batch: 18324, sum loss: 5097.480469, avg loss: 2.653556, ppl: 14.204458 +epoch: 2, batch: 18325, sum loss: 4309.065430, avg loss: 2.574113, ppl: 13.119679 +epoch: 2, batch: 18326, sum loss: 4011.565186, avg loss: 2.482404, ppl: 11.970009 +epoch: 2, batch: 18327, sum loss: 3213.449219, avg loss: 2.410690, ppl: 11.141642 +epoch: 2, batch: 18328, sum loss: 3426.631348, avg loss: 2.343797, ppl: 10.420732 +epoch: 2, batch: 18329, sum loss: 3586.540283, avg loss: 2.194945, ppl: 8.979508 +epoch: 2, batch: 18330, sum loss: 4351.843750, avg loss: 2.534563, ppl: 12.610913 +epoch: 2, batch: 18331, sum loss: 3532.680176, avg loss: 2.167289, ppl: 8.734569 +epoch: 2, batch: 18332, sum loss: 3641.710449, avg loss: 2.475670, ppl: 11.889668 +epoch: 2, batch: 18333, sum loss: 4002.389648, avg loss: 2.313520, ppl: 10.109949 +epoch: 2, batch: 18334, sum loss: 3449.413330, avg loss: 2.395426, ppl: 10.972872 +epoch: 2, batch: 18335, sum loss: 4136.719238, avg loss: 2.354422, ppl: 10.532038 +epoch: 2, batch: 18336, sum loss: 4746.268066, avg loss: 2.639749, ppl: 14.009681 +epoch: 2, batch: 18337, sum loss: 3493.774170, avg loss: 2.113596, ppl: 8.277955 +epoch: 2, batch: 18338, sum loss: 3888.789551, avg loss: 2.344056, ppl: 10.423432 +epoch: 2, batch: 18339, sum loss: 4366.506836, avg loss: 2.521078, ppl: 12.442000 +epoch: 2, batch: 18340, sum loss: 4021.815674, avg loss: 2.464348, ppl: 11.755813 +epoch: 2, batch: 18341, sum loss: 4478.764648, avg loss: 2.717697, ppl: 15.145401 +epoch: 2, batch: 18342, sum loss: 5345.083984, avg loss: 2.756619, ppl: 15.746513 +epoch: 2, batch: 18343, sum loss: 4570.017578, avg loss: 2.598077, ppl: 13.437873 +epoch: 2, batch: 18344, sum loss: 4440.593750, avg loss: 2.374649, ppl: 10.747241 +epoch: 2, batch: 18345, sum loss: 4837.115723, avg loss: 2.571566, ppl: 13.086303 +epoch: 2, batch: 18346, sum loss: 3406.241699, avg loss: 2.381987, ppl: 10.826397 +epoch: 2, batch: 18347, sum loss: 3643.584473, avg loss: 2.118363, ppl: 8.317512 +epoch: 2, batch: 18348, sum loss: 3727.226074, avg loss: 2.395390, ppl: 10.972471 +epoch: 2, batch: 18349, sum loss: 3979.350830, avg loss: 2.338044, ppl: 10.360950 +epoch: 2, batch: 18350, sum loss: 3496.740234, avg loss: 2.386853, ppl: 10.879209 +epoch: 2, batch: 18351, sum loss: 4162.134766, avg loss: 2.312297, ppl: 10.097593 +epoch: 2, batch: 18352, sum loss: 5296.025391, avg loss: 2.727099, ppl: 15.288466 +epoch: 2, batch: 18353, sum loss: 4441.875977, avg loss: 2.659806, ppl: 14.293516 +epoch: 2, batch: 18354, sum loss: 4127.702637, avg loss: 2.622428, ppl: 13.769120 +epoch: 2, batch: 18355, sum loss: 4125.665039, avg loss: 2.325629, ppl: 10.233109 +epoch: 2, batch: 18356, sum loss: 3581.821777, avg loss: 2.284325, ppl: 9.819057 +epoch: 2, batch: 18357, sum loss: 4675.923340, avg loss: 2.457133, ppl: 11.671297 +epoch: 2, batch: 18358, sum loss: 3743.420898, avg loss: 2.415110, ppl: 11.191005 +epoch: 2, batch: 18359, sum loss: 3868.399902, avg loss: 2.453012, ppl: 11.623303 +epoch: 2, batch: 18360, sum loss: 3932.467773, avg loss: 2.511154, ppl: 12.319143 +epoch: 2, batch: 18361, sum loss: 4085.031738, avg loss: 2.567588, ppl: 13.034343 +epoch: 2, batch: 18362, sum loss: 3734.376465, avg loss: 2.417072, ppl: 11.212983 +epoch: 2, batch: 18363, sum loss: 4323.424316, avg loss: 2.415321, ppl: 11.193361 +epoch: 2, batch: 18364, sum loss: 4378.564941, avg loss: 2.525124, ppl: 12.492442 +epoch: 2, batch: 18365, sum loss: 5001.014160, avg loss: 2.662947, ppl: 14.338478 +epoch: 2, batch: 18366, sum loss: 4437.134766, avg loss: 2.619324, ppl: 13.726441 +epoch: 2, batch: 18367, sum loss: 4808.722168, avg loss: 2.762046, ppl: 15.832204 +epoch: 2, batch: 18368, sum loss: 4134.889648, avg loss: 2.315168, ppl: 10.126623 +epoch: 2, batch: 18369, sum loss: 3942.297119, avg loss: 2.511017, ppl: 12.317454 +epoch: 2, batch: 18370, sum loss: 3978.176758, avg loss: 2.344241, ppl: 10.425356 +epoch: 2, batch: 18371, sum loss: 4594.453613, avg loss: 2.532775, ppl: 12.588389 +epoch: 2, batch: 18372, sum loss: 4517.994629, avg loss: 2.565585, ppl: 13.008262 +epoch: 2, batch: 18373, sum loss: 4501.703125, avg loss: 2.666886, ppl: 14.395071 +epoch: 2, batch: 18374, sum loss: 4428.656738, avg loss: 2.496424, ppl: 12.139009 +epoch: 2, batch: 18375, sum loss: 3843.342773, avg loss: 2.482780, ppl: 11.974504 +epoch: 2, batch: 18376, sum loss: 3244.544434, avg loss: 2.262583, ppl: 9.607877 +epoch: 2, batch: 18377, sum loss: 3795.686523, avg loss: 2.542322, ppl: 12.709146 +epoch: 2, batch: 18378, sum loss: 4963.938477, avg loss: 2.643205, ppl: 14.058184 +epoch: 2, batch: 18379, sum loss: 4354.927246, avg loss: 2.467381, ppl: 11.791524 +epoch: 2, batch: 18380, sum loss: 4576.639160, avg loss: 2.487304, ppl: 12.028799 +epoch: 2, batch: 18381, sum loss: 4404.676758, avg loss: 2.703914, ppl: 14.938091 +epoch: 2, batch: 18382, sum loss: 4432.137207, avg loss: 2.476054, ppl: 11.894239 +epoch: 2, batch: 18383, sum loss: 4070.989746, avg loss: 2.370990, ppl: 10.707989 +epoch: 2, batch: 18384, sum loss: 4138.159668, avg loss: 2.704679, ppl: 14.949525 +epoch: 2, batch: 18385, sum loss: 4077.027344, avg loss: 2.271324, ppl: 9.692228 +epoch: 2, batch: 18386, sum loss: 3851.900391, avg loss: 2.494754, ppl: 12.118755 +epoch: 2, batch: 18387, sum loss: 4270.792969, avg loss: 2.519642, ppl: 12.424147 +epoch: 2, batch: 18388, sum loss: 4392.581055, avg loss: 2.591493, ppl: 13.349689 +epoch: 2, batch: 18389, sum loss: 4634.602539, avg loss: 2.351397, ppl: 10.500224 +epoch: 2, batch: 18390, sum loss: 3607.725342, avg loss: 2.403548, ppl: 11.062356 +epoch: 2, batch: 18391, sum loss: 3945.148438, avg loss: 2.447362, ppl: 11.557822 +epoch: 2, batch: 18392, sum loss: 4718.929688, avg loss: 2.726129, ppl: 15.273649 +epoch: 2, batch: 18393, sum loss: 4104.153320, avg loss: 2.427057, ppl: 11.325502 +epoch: 2, batch: 18394, sum loss: 4290.296875, avg loss: 2.520738, ppl: 12.437777 +epoch: 2, batch: 18395, sum loss: 3747.822510, avg loss: 2.264545, ppl: 9.626748 +epoch: 2, batch: 18396, sum loss: 4886.308105, avg loss: 2.768446, ppl: 15.933861 +epoch: 2, batch: 18397, sum loss: 4316.561035, avg loss: 2.502354, ppl: 12.211210 +epoch: 2, batch: 18398, sum loss: 3916.822754, avg loss: 2.460316, ppl: 11.708508 +epoch: 2, batch: 18399, sum loss: 4228.508789, avg loss: 2.533558, ppl: 12.598256 +epoch: 2, batch: 18400, sum loss: 4460.446289, avg loss: 2.265336, ppl: 9.634359 +epoch: 2, batch: 18401, sum loss: 3635.760010, avg loss: 2.363953, ppl: 10.632901 +epoch: 2, batch: 18402, sum loss: 4167.121582, avg loss: 2.619184, ppl: 13.724520 +epoch: 2, batch: 18403, sum loss: 5332.937988, avg loss: 2.776126, ppl: 16.056700 +epoch: 2, batch: 18404, sum loss: 4422.296875, avg loss: 2.575595, ppl: 13.139134 +epoch: 2, batch: 18405, sum loss: 3478.645508, avg loss: 2.345681, ppl: 10.440385 +epoch: 2, batch: 18406, sum loss: 3354.048340, avg loss: 2.291017, ppl: 9.884981 +epoch: 2, batch: 18407, sum loss: 3618.715332, avg loss: 2.244861, ppl: 9.439100 +epoch: 2, batch: 18408, sum loss: 3925.698486, avg loss: 2.484619, ppl: 11.996553 +epoch: 2, batch: 18409, sum loss: 3589.097412, avg loss: 2.208675, ppl: 9.103649 +epoch: 2, batch: 18410, sum loss: 3880.124023, avg loss: 2.529416, ppl: 12.546175 +epoch: 2, batch: 18411, sum loss: 4983.091309, avg loss: 2.753089, ppl: 15.691030 +epoch: 2, batch: 18412, sum loss: 4117.899414, avg loss: 2.609569, ppl: 13.593185 +epoch: 2, batch: 18413, sum loss: 3203.451172, avg loss: 2.188150, ppl: 8.918695 +epoch: 2, batch: 18414, sum loss: 4131.522461, avg loss: 2.255198, ppl: 9.537182 +epoch: 2, batch: 18415, sum loss: 4154.254395, avg loss: 2.534627, ppl: 12.611732 +epoch: 2, batch: 18416, sum loss: 4130.630859, avg loss: 2.535685, ppl: 12.625077 +epoch: 2, batch: 18417, sum loss: 4902.795898, avg loss: 2.673280, ppl: 14.487410 +epoch: 2, batch: 18418, sum loss: 4710.152344, avg loss: 2.609503, ppl: 13.592288 +epoch: 2, batch: 18419, sum loss: 4199.520508, avg loss: 2.654564, ppl: 14.218784 +epoch: 2, batch: 18420, sum loss: 3903.910889, avg loss: 2.423284, ppl: 11.282855 +epoch: 2, batch: 18421, sum loss: 5027.461426, avg loss: 2.745746, ppl: 15.576235 +epoch: 2, batch: 18422, sum loss: 4853.352051, avg loss: 2.607927, ppl: 13.570890 +epoch: 2, batch: 18423, sum loss: 3715.910645, avg loss: 2.441466, ppl: 11.489871 +epoch: 2, batch: 18424, sum loss: 4302.039062, avg loss: 2.414163, ppl: 11.180412 +epoch: 2, batch: 18425, sum loss: 4107.660156, avg loss: 2.385401, ppl: 10.863416 +epoch: 2, batch: 18426, sum loss: 4104.643066, avg loss: 2.515100, ppl: 12.367846 +epoch: 2, batch: 18427, sum loss: 4895.112793, avg loss: 2.784478, ppl: 16.191366 +epoch: 2, batch: 18428, sum loss: 4233.011719, avg loss: 2.537777, ppl: 12.651515 +epoch: 2, batch: 18429, sum loss: 4281.831055, avg loss: 2.563971, ppl: 12.987285 +epoch: 2, batch: 18430, sum loss: 4511.199707, avg loss: 2.683640, ppl: 14.638287 +epoch: 2, batch: 18431, sum loss: 3784.431152, avg loss: 2.618984, ppl: 13.721768 +epoch: 2, batch: 18432, sum loss: 4752.864746, avg loss: 2.731531, ppl: 15.356385 +epoch: 2, batch: 18433, sum loss: 4123.350098, avg loss: 2.502033, ppl: 12.207283 +epoch: 2, batch: 18434, sum loss: 5220.867676, avg loss: 2.784463, ppl: 16.191116 +epoch: 2, batch: 18435, sum loss: 3495.631592, avg loss: 2.204055, ppl: 9.061687 +epoch: 2, batch: 18436, sum loss: 4472.270996, avg loss: 2.477712, ppl: 11.913979 +epoch: 2, batch: 18437, sum loss: 3922.574219, avg loss: 2.522556, ppl: 12.460402 +epoch: 2, batch: 18438, sum loss: 4771.486816, avg loss: 2.576397, ppl: 13.149671 +epoch: 2, batch: 18439, sum loss: 3593.345703, avg loss: 2.471352, ppl: 11.838440 +epoch: 2, batch: 18440, sum loss: 3882.187012, avg loss: 2.433973, ppl: 11.404101 +epoch: 2, batch: 18441, sum loss: 3961.179199, avg loss: 2.275232, ppl: 9.730177 +epoch: 2, batch: 18442, sum loss: 4093.141602, avg loss: 2.520407, ppl: 12.433661 +epoch: 2, batch: 18443, sum loss: 3615.277832, avg loss: 2.352165, ppl: 10.508298 +epoch: 2, batch: 18444, sum loss: 3411.105713, avg loss: 2.293951, ppl: 9.914034 +epoch: 2, batch: 18445, sum loss: 4696.895996, avg loss: 2.693174, ppl: 14.778510 +epoch: 2, batch: 18446, sum loss: 3765.731689, avg loss: 2.291985, ppl: 9.894559 +epoch: 2, batch: 18447, sum loss: 4486.664062, avg loss: 2.617657, ppl: 13.703578 +epoch: 2, batch: 18448, sum loss: 4547.809082, avg loss: 2.474325, ppl: 11.873686 +epoch: 2, batch: 18449, sum loss: 4548.909668, avg loss: 2.686893, ppl: 14.685975 +epoch: 2, batch: 18450, sum loss: 4054.375977, avg loss: 2.461673, ppl: 11.724413 +epoch: 2, batch: 18451, sum loss: 3622.200684, avg loss: 2.370550, ppl: 10.703279 +epoch: 2, batch: 18452, sum loss: 5290.591309, avg loss: 2.745507, ppl: 15.572503 +epoch: 2, batch: 18453, sum loss: 4580.722168, avg loss: 2.547676, ppl: 12.777379 +epoch: 2, batch: 18454, sum loss: 4170.738770, avg loss: 2.540036, ppl: 12.680127 +epoch: 2, batch: 18455, sum loss: 3825.523438, avg loss: 2.482494, ppl: 11.971085 +epoch: 2, batch: 18456, sum loss: 4912.958984, avg loss: 2.664294, ppl: 14.357817 +epoch: 2, batch: 18457, sum loss: 3918.173828, avg loss: 2.216162, ppl: 9.172058 +epoch: 2, batch: 18458, sum loss: 5152.125977, avg loss: 2.625956, ppl: 13.817779 +epoch: 2, batch: 18459, sum loss: 4410.819824, avg loss: 2.655521, ppl: 14.232394 +epoch: 2, batch: 18460, sum loss: 4614.843750, avg loss: 2.419950, ppl: 11.245292 +epoch: 2, batch: 18461, sum loss: 4150.682129, avg loss: 2.378614, ppl: 10.789942 +epoch: 2, batch: 18462, sum loss: 3888.551025, avg loss: 2.321523, ppl: 10.191186 +epoch: 2, batch: 18463, sum loss: 3698.220703, avg loss: 2.226502, ppl: 9.267396 +epoch: 2, batch: 18464, sum loss: 3949.408936, avg loss: 2.408176, ppl: 11.113673 +epoch: 2, batch: 18465, sum loss: 3923.646973, avg loss: 2.379410, ppl: 10.798531 +epoch: 2, batch: 18466, sum loss: 4737.331055, avg loss: 2.615865, ppl: 13.679044 +epoch: 2, batch: 18467, sum loss: 4262.242188, avg loss: 2.702754, ppl: 14.920760 +epoch: 2, batch: 18468, sum loss: 4423.680664, avg loss: 2.642581, ppl: 14.049421 +epoch: 2, batch: 18469, sum loss: 4074.963379, avg loss: 2.474173, ppl: 11.871886 +epoch: 2, batch: 18470, sum loss: 4551.782227, avg loss: 2.602505, ppl: 13.497513 +epoch: 2, batch: 18471, sum loss: 3962.683838, avg loss: 2.240070, ppl: 9.393990 +epoch: 2, batch: 18472, sum loss: 4122.669922, avg loss: 2.694556, ppl: 14.798940 +epoch: 2, batch: 18473, sum loss: 3482.858398, avg loss: 2.397012, ppl: 10.990288 +epoch: 2, batch: 18474, sum loss: 4440.555664, avg loss: 2.508789, ppl: 12.290032 +epoch: 2, batch: 18475, sum loss: 4326.708984, avg loss: 2.406401, ppl: 11.093961 +epoch: 2, batch: 18476, sum loss: 3594.788818, avg loss: 2.556749, ppl: 12.893829 +epoch: 2, batch: 18477, sum loss: 3740.816650, avg loss: 2.364612, ppl: 10.639911 +epoch: 2, batch: 18478, sum loss: 4570.669922, avg loss: 2.550597, ppl: 12.814754 +epoch: 2, batch: 18479, sum loss: 3594.082520, avg loss: 2.369204, ppl: 10.688881 +epoch: 2, batch: 18480, sum loss: 3592.697021, avg loss: 2.476014, ppl: 11.893765 +epoch: 2, batch: 18481, sum loss: 5046.814453, avg loss: 2.698831, ppl: 14.862352 +epoch: 2, batch: 18482, sum loss: 4317.866699, avg loss: 2.547414, ppl: 12.774028 +epoch: 2, batch: 18483, sum loss: 4050.155273, avg loss: 2.431066, ppl: 11.370992 +epoch: 2, batch: 18484, sum loss: 4404.803223, avg loss: 2.554991, ppl: 12.871178 +epoch: 2, batch: 18485, sum loss: 4961.088867, avg loss: 2.609726, ppl: 13.595325 +epoch: 2, batch: 18486, sum loss: 4140.976562, avg loss: 2.456095, ppl: 11.659196 +epoch: 2, batch: 18487, sum loss: 4111.346680, avg loss: 2.281547, ppl: 9.791813 +epoch: 2, batch: 18488, sum loss: 4082.878174, avg loss: 2.548613, ppl: 12.789353 +epoch: 2, batch: 18489, sum loss: 3454.274902, avg loss: 2.194584, ppl: 8.976263 +epoch: 2, batch: 18490, sum loss: 4025.098877, avg loss: 2.571948, ppl: 13.091302 +epoch: 2, batch: 18491, sum loss: 4280.529297, avg loss: 2.558595, ppl: 12.917654 +epoch: 2, batch: 18492, sum loss: 3946.078613, avg loss: 2.332198, ppl: 10.300554 +epoch: 2, batch: 18493, sum loss: 4079.884521, avg loss: 2.551522, ppl: 12.826617 +epoch: 2, batch: 18494, sum loss: 3143.476807, avg loss: 2.264753, ppl: 9.628742 +epoch: 2, batch: 18495, sum loss: 4593.237305, avg loss: 2.612763, ppl: 13.636676 +epoch: 2, batch: 18496, sum loss: 3019.564697, avg loss: 2.027915, ppl: 7.598224 +epoch: 2, batch: 18497, sum loss: 4891.853027, avg loss: 2.593772, ppl: 13.380142 +epoch: 2, batch: 18498, sum loss: 3510.619141, avg loss: 2.254733, ppl: 9.532746 +epoch: 2, batch: 18499, sum loss: 3641.483398, avg loss: 2.483959, ppl: 11.988630 +epoch: 2, batch: 18500, sum loss: 5413.821289, avg loss: 2.939100, ppl: 18.898821 +epoch: 2, batch: 18501, sum loss: 4140.194336, avg loss: 2.460008, ppl: 11.704909 +epoch: 2, batch: 18502, sum loss: 3055.852051, avg loss: 2.235444, ppl: 9.350634 +epoch: 2, batch: 18503, sum loss: 4414.799316, avg loss: 2.353305, ppl: 10.520278 +epoch: 2, batch: 18504, sum loss: 5056.150879, avg loss: 2.551035, ppl: 12.820362 +epoch: 2, batch: 18505, sum loss: 4645.492188, avg loss: 2.689920, ppl: 14.730500 +epoch: 2, batch: 18506, sum loss: 4037.892578, avg loss: 2.462130, ppl: 11.729765 +epoch: 2, batch: 18507, sum loss: 4091.127197, avg loss: 2.558553, ppl: 12.917119 +epoch: 2, batch: 18508, sum loss: 4486.099121, avg loss: 2.503404, ppl: 12.224030 +epoch: 2, batch: 18509, sum loss: 4485.118652, avg loss: 2.601577, ppl: 13.484987 +epoch: 2, batch: 18510, sum loss: 4342.249023, avg loss: 2.557273, ppl: 12.900585 +epoch: 2, batch: 18511, sum loss: 3744.541504, avg loss: 2.334502, ppl: 10.324320 +epoch: 2, batch: 18512, sum loss: 4413.109863, avg loss: 2.518898, ppl: 12.414914 +epoch: 2, batch: 18513, sum loss: 3777.498291, avg loss: 2.278346, ppl: 9.760526 +epoch: 2, batch: 18514, sum loss: 3611.479248, avg loss: 2.215631, ppl: 9.167196 +epoch: 2, batch: 18515, sum loss: 4206.817871, avg loss: 2.557336, ppl: 12.901403 +epoch: 2, batch: 18516, sum loss: 3262.597900, avg loss: 2.398969, ppl: 11.011819 +epoch: 2, batch: 18517, sum loss: 3164.534668, avg loss: 2.250736, ppl: 9.494720 +epoch: 2, batch: 18518, sum loss: 5026.135742, avg loss: 2.786106, ppl: 16.217751 +epoch: 2, batch: 18519, sum loss: 4286.562988, avg loss: 2.472066, ppl: 11.846899 +epoch: 2, batch: 18520, sum loss: 5059.084473, avg loss: 2.731687, ppl: 15.358776 +epoch: 2, batch: 18521, sum loss: 3917.504395, avg loss: 2.289599, ppl: 9.870983 +epoch: 2, batch: 18522, sum loss: 4290.811523, avg loss: 2.402470, ppl: 11.050439 +epoch: 2, batch: 18523, sum loss: 3888.694824, avg loss: 2.255624, ppl: 9.541242 +epoch: 2, batch: 18524, sum loss: 3222.339355, avg loss: 2.089714, ppl: 8.082605 +epoch: 2, batch: 18525, sum loss: 3540.470703, avg loss: 2.333863, ppl: 10.317724 +epoch: 2, batch: 18526, sum loss: 3684.395020, avg loss: 2.270114, ppl: 9.680504 +epoch: 2, batch: 18527, sum loss: 5002.649902, avg loss: 2.771551, ppl: 15.983407 +epoch: 2, batch: 18528, sum loss: 3616.359863, avg loss: 2.376058, ppl: 10.762392 +epoch: 2, batch: 18529, sum loss: 4521.117188, avg loss: 2.892589, ppl: 18.039961 +epoch: 2, batch: 18530, sum loss: 3901.006348, avg loss: 2.458101, ppl: 11.682609 +epoch: 2, batch: 18531, sum loss: 5006.069824, avg loss: 2.622352, ppl: 13.768069 +epoch: 2, batch: 18532, sum loss: 3886.251709, avg loss: 2.484816, ppl: 11.998907 +epoch: 2, batch: 18533, sum loss: 3043.048340, avg loss: 2.107374, ppl: 8.226611 +epoch: 2, batch: 18534, sum loss: 4148.391113, avg loss: 2.280589, ppl: 9.782439 +epoch: 2, batch: 18535, sum loss: 4122.706543, avg loss: 2.609308, ppl: 13.589643 +epoch: 2, batch: 18536, sum loss: 4249.413574, avg loss: 2.515935, ppl: 12.378173 +epoch: 2, batch: 18537, sum loss: 4062.129395, avg loss: 2.492104, ppl: 12.086678 +epoch: 2, batch: 18538, sum loss: 3911.081787, avg loss: 2.380451, ppl: 10.809782 +epoch: 2, batch: 18539, sum loss: 4485.083984, avg loss: 2.473847, ppl: 11.868014 +epoch: 2, batch: 18540, sum loss: 4802.136719, avg loss: 2.714605, ppl: 15.098650 +epoch: 2, batch: 18541, sum loss: 4389.402832, avg loss: 2.717896, ppl: 15.148423 +epoch: 2, batch: 18542, sum loss: 3886.570312, avg loss: 2.257010, ppl: 9.554474 +epoch: 2, batch: 18543, sum loss: 4720.465332, avg loss: 2.689724, ppl: 14.727607 +epoch: 2, batch: 18544, sum loss: 4480.468262, avg loss: 2.630927, ppl: 13.886635 +epoch: 2, batch: 18545, sum loss: 3295.293945, avg loss: 2.073816, ppl: 7.955124 +epoch: 2, batch: 18546, sum loss: 4099.466309, avg loss: 2.558968, ppl: 12.922469 +epoch: 2, batch: 18547, sum loss: 4756.542969, avg loss: 2.779979, ppl: 16.118675 +epoch: 2, batch: 18548, sum loss: 4484.681152, avg loss: 2.642711, ppl: 14.051250 +epoch: 2, batch: 18549, sum loss: 4939.486328, avg loss: 2.572649, ppl: 13.100485 +epoch: 2, batch: 18550, sum loss: 4389.647461, avg loss: 2.644366, ppl: 14.074519 +epoch: 2, batch: 18551, sum loss: 4665.100098, avg loss: 2.695032, ppl: 14.805987 +epoch: 2, batch: 18552, sum loss: 3937.065430, avg loss: 2.292991, ppl: 9.904517 +epoch: 2, batch: 18553, sum loss: 4433.963379, avg loss: 2.618998, ppl: 13.721968 +epoch: 2, batch: 18554, sum loss: 4039.111816, avg loss: 2.537131, ppl: 12.643340 +epoch: 2, batch: 18555, sum loss: 4788.683105, avg loss: 2.588477, ppl: 13.309491 +epoch: 2, batch: 18556, sum loss: 4011.293213, avg loss: 2.460916, ppl: 11.715539 +epoch: 2, batch: 18557, sum loss: 4606.263184, avg loss: 2.773187, ppl: 16.009571 +epoch: 2, batch: 18558, sum loss: 3590.429443, avg loss: 2.579331, ppl: 13.188317 +epoch: 2, batch: 18559, sum loss: 3794.028076, avg loss: 2.441459, ppl: 11.489797 +epoch: 2, batch: 18560, sum loss: 4996.784668, avg loss: 2.639611, ppl: 14.007760 +epoch: 2, batch: 18561, sum loss: 4060.687012, avg loss: 2.498884, ppl: 12.168908 +epoch: 2, batch: 18562, sum loss: 3436.666016, avg loss: 2.342649, ppl: 10.408771 +epoch: 2, batch: 18563, sum loss: 4453.895020, avg loss: 2.376678, ppl: 10.769071 +epoch: 2, batch: 18564, sum loss: 5222.661621, avg loss: 2.764776, ppl: 15.875484 +epoch: 2, batch: 18565, sum loss: 4655.337891, avg loss: 2.523218, ppl: 12.468661 +epoch: 2, batch: 18566, sum loss: 3577.880371, avg loss: 2.171044, ppl: 8.767431 +epoch: 2, batch: 18567, sum loss: 5141.526367, avg loss: 2.541535, ppl: 12.699154 +epoch: 2, batch: 18568, sum loss: 5140.029297, avg loss: 2.808759, ppl: 16.589321 +epoch: 2, batch: 18569, sum loss: 4219.316895, avg loss: 2.411038, ppl: 11.145526 +epoch: 2, batch: 18570, sum loss: 3061.220947, avg loss: 2.319107, ppl: 10.166590 +epoch: 2, batch: 18571, sum loss: 4603.494141, avg loss: 2.437001, ppl: 11.438682 +epoch: 2, batch: 18572, sum loss: 3591.174805, avg loss: 2.302035, ppl: 9.994501 +epoch: 2, batch: 18573, sum loss: 5335.321777, avg loss: 2.733259, ppl: 15.382941 +epoch: 2, batch: 18574, sum loss: 3616.144043, avg loss: 2.210357, ppl: 9.118973 +epoch: 2, batch: 18575, sum loss: 4502.875977, avg loss: 2.472749, ppl: 11.854991 +epoch: 2, batch: 18576, sum loss: 4611.365234, avg loss: 2.401753, ppl: 11.042517 +epoch: 2, batch: 18577, sum loss: 3177.063965, avg loss: 2.206295, ppl: 9.082001 +epoch: 2, batch: 18578, sum loss: 5038.862305, avg loss: 2.694579, ppl: 14.799285 +epoch: 2, batch: 18579, sum loss: 4658.351074, avg loss: 2.495100, ppl: 12.122940 +epoch: 2, batch: 18580, sum loss: 3509.191406, avg loss: 2.266920, ppl: 9.649631 +epoch: 2, batch: 18581, sum loss: 4769.397949, avg loss: 2.585039, ppl: 13.263811 +epoch: 2, batch: 18582, sum loss: 4599.542969, avg loss: 2.720014, ppl: 15.180529 +epoch: 2, batch: 18583, sum loss: 3227.713379, avg loss: 2.255565, ppl: 9.540682 +epoch: 2, batch: 18584, sum loss: 4468.737305, avg loss: 2.719864, ppl: 15.178264 +epoch: 2, batch: 18585, sum loss: 3611.714844, avg loss: 2.422344, ppl: 11.272250 +epoch: 2, batch: 18586, sum loss: 3734.820557, avg loss: 2.263528, ppl: 9.616955 +epoch: 2, batch: 18587, sum loss: 3843.977783, avg loss: 2.453081, ppl: 11.624107 +epoch: 2, batch: 18588, sum loss: 3798.330322, avg loss: 2.484193, ppl: 11.991435 +epoch: 2, batch: 18589, sum loss: 3739.734619, avg loss: 2.259659, ppl: 9.579818 +epoch: 2, batch: 18590, sum loss: 4201.853027, avg loss: 2.454353, ppl: 11.638905 +epoch: 2, batch: 18591, sum loss: 5131.209961, avg loss: 2.564323, ppl: 12.991859 +epoch: 2, batch: 18592, sum loss: 3358.629883, avg loss: 2.305168, ppl: 10.025862 +epoch: 2, batch: 18593, sum loss: 3760.946045, avg loss: 2.323006, ppl: 10.206306 +epoch: 2, batch: 18594, sum loss: 4115.892578, avg loss: 2.405548, ppl: 11.084504 +epoch: 2, batch: 18595, sum loss: 4184.840332, avg loss: 2.458778, ppl: 11.690518 +epoch: 2, batch: 18596, sum loss: 4834.916992, avg loss: 2.626245, ppl: 13.821772 +epoch: 2, batch: 18597, sum loss: 4806.068359, avg loss: 2.730721, ppl: 15.343939 +epoch: 2, batch: 18598, sum loss: 3863.383301, avg loss: 2.320350, ppl: 10.179240 +epoch: 2, batch: 18599, sum loss: 4155.014648, avg loss: 2.553789, ppl: 12.855721 +epoch: 2, batch: 18600, sum loss: 3876.985840, avg loss: 2.137258, ppl: 8.476165 +epoch: 2, batch: 18601, sum loss: 3658.608887, avg loss: 2.403817, ppl: 11.065329 +epoch: 2, batch: 18602, sum loss: 4208.467773, avg loss: 2.349787, ppl: 10.483332 +epoch: 2, batch: 18603, sum loss: 3506.626709, avg loss: 2.465982, ppl: 11.775042 +epoch: 2, batch: 18604, sum loss: 3940.636963, avg loss: 2.557195, ppl: 12.899579 +epoch: 2, batch: 18605, sum loss: 3429.635010, avg loss: 2.199894, ppl: 9.024058 +epoch: 2, batch: 18606, sum loss: 4550.499512, avg loss: 2.705410, ppl: 14.960449 +epoch: 2, batch: 18607, sum loss: 4441.303223, avg loss: 2.543702, ppl: 12.726697 +epoch: 2, batch: 18608, sum loss: 4342.579590, avg loss: 2.495735, ppl: 12.130651 +epoch: 2, batch: 18609, sum loss: 5170.687500, avg loss: 2.615421, ppl: 13.672973 +epoch: 2, batch: 18610, sum loss: 4884.836426, avg loss: 2.594177, ppl: 13.385573 +epoch: 2, batch: 18611, sum loss: 3510.332520, avg loss: 2.344911, ppl: 10.432347 +epoch: 2, batch: 18612, sum loss: 3619.921387, avg loss: 2.512090, ppl: 12.330674 +epoch: 2, batch: 18613, sum loss: 4215.250488, avg loss: 2.488341, ppl: 12.041287 +epoch: 2, batch: 18614, sum loss: 4599.393555, avg loss: 2.842641, ppl: 17.161030 +epoch: 2, batch: 18615, sum loss: 5044.627930, avg loss: 2.729777, ppl: 15.329470 +epoch: 2, batch: 18616, sum loss: 4524.886230, avg loss: 2.677448, ppl: 14.547914 +epoch: 2, batch: 18617, sum loss: 4258.311035, avg loss: 2.712300, ppl: 15.063884 +epoch: 2, batch: 18618, sum loss: 3094.469482, avg loss: 2.232662, ppl: 9.324655 +epoch: 2, batch: 18619, sum loss: 4377.902344, avg loss: 2.508826, ppl: 12.290499 +epoch: 2, batch: 18620, sum loss: 4493.014160, avg loss: 2.680796, ppl: 14.596710 +epoch: 2, batch: 18621, sum loss: 4452.976074, avg loss: 2.490479, ppl: 12.067052 +epoch: 2, batch: 18622, sum loss: 4736.067871, avg loss: 2.678771, ppl: 14.567186 +epoch: 2, batch: 18623, sum loss: 4490.018066, avg loss: 2.634987, ppl: 13.943132 +epoch: 2, batch: 18624, sum loss: 3545.120361, avg loss: 2.528617, ppl: 12.536153 +epoch: 2, batch: 18625, sum loss: 3872.211182, avg loss: 2.388779, ppl: 10.900179 +epoch: 2, batch: 18626, sum loss: 4217.402344, avg loss: 2.514849, ppl: 12.364744 +epoch: 2, batch: 18627, sum loss: 4345.385254, avg loss: 2.556109, ppl: 12.885584 +epoch: 2, batch: 18628, sum loss: 3976.507812, avg loss: 2.510422, ppl: 12.310118 +epoch: 2, batch: 18629, sum loss: 3867.792725, avg loss: 2.328593, ppl: 10.263489 +epoch: 2, batch: 18630, sum loss: 5222.520508, avg loss: 2.697583, ppl: 14.843806 +epoch: 2, batch: 18631, sum loss: 3943.041260, avg loss: 2.315350, ppl: 10.128468 +epoch: 2, batch: 18632, sum loss: 4322.099609, avg loss: 2.509930, ppl: 12.304070 +epoch: 2, batch: 18633, sum loss: 4277.589844, avg loss: 2.282598, ppl: 9.802118 +epoch: 2, batch: 18634, sum loss: 4610.017090, avg loss: 2.502724, ppl: 12.215720 +epoch: 2, batch: 18635, sum loss: 3575.939697, avg loss: 2.310039, ppl: 10.074813 +epoch: 2, batch: 18636, sum loss: 5317.270508, avg loss: 2.602678, ppl: 13.499837 +epoch: 2, batch: 18637, sum loss: 4416.383789, avg loss: 2.533783, ppl: 12.601086 +epoch: 2, batch: 18638, sum loss: 4812.402344, avg loss: 2.441604, ppl: 11.491463 +epoch: 2, batch: 18639, sum loss: 5319.014160, avg loss: 2.690447, ppl: 14.738264 +epoch: 2, batch: 18640, sum loss: 4093.932617, avg loss: 2.568339, ppl: 13.044142 +epoch: 2, batch: 18641, sum loss: 4258.410156, avg loss: 2.648265, ppl: 14.129501 +epoch: 2, batch: 18642, sum loss: 4097.707031, avg loss: 2.464045, ppl: 11.752254 +epoch: 2, batch: 18643, sum loss: 4070.758301, avg loss: 2.474625, ppl: 11.877254 +epoch: 2, batch: 18644, sum loss: 4090.029297, avg loss: 2.246035, ppl: 9.450191 +epoch: 2, batch: 18645, sum loss: 4842.170898, avg loss: 2.596338, ppl: 13.414528 +epoch: 2, batch: 18646, sum loss: 4394.146973, avg loss: 2.565176, ppl: 13.002950 +epoch: 2, batch: 18647, sum loss: 3915.665527, avg loss: 2.511652, ppl: 12.325277 +epoch: 2, batch: 18648, sum loss: 3842.198730, avg loss: 2.450382, ppl: 11.592772 +epoch: 2, batch: 18649, sum loss: 4125.842285, avg loss: 2.684348, ppl: 14.648642 +epoch: 2, batch: 18650, sum loss: 4076.962891, avg loss: 2.560906, ppl: 12.947548 +epoch: 2, batch: 18651, sum loss: 3790.169922, avg loss: 2.454773, ppl: 11.643795 +epoch: 2, batch: 18652, sum loss: 5461.274414, avg loss: 2.911127, ppl: 18.377501 +epoch: 2, batch: 18653, sum loss: 4565.763184, avg loss: 2.521128, ppl: 12.442629 +epoch: 2, batch: 18654, sum loss: 3674.419922, avg loss: 2.446351, ppl: 11.546142 +epoch: 2, batch: 18655, sum loss: 3554.307861, avg loss: 2.234009, ppl: 9.337222 +epoch: 2, batch: 18656, sum loss: 3623.947510, avg loss: 2.138022, ppl: 8.482644 +epoch: 2, batch: 18657, sum loss: 4242.965332, avg loss: 2.449749, ppl: 11.585439 +epoch: 2, batch: 18658, sum loss: 4829.282227, avg loss: 2.617497, ppl: 13.701385 +epoch: 2, batch: 18659, sum loss: 4090.436035, avg loss: 2.521847, ppl: 12.451573 +epoch: 2, batch: 18660, sum loss: 3787.461426, avg loss: 2.329312, ppl: 10.270874 +epoch: 2, batch: 18661, sum loss: 3249.705566, avg loss: 2.235011, ppl: 9.346581 +epoch: 2, batch: 18662, sum loss: 4037.553711, avg loss: 2.472476, ppl: 11.851758 +epoch: 2, batch: 18663, sum loss: 4361.265625, avg loss: 2.326008, ppl: 10.236997 +epoch: 2, batch: 18664, sum loss: 3805.187012, avg loss: 2.550394, ppl: 12.812145 +epoch: 2, batch: 18665, sum loss: 3943.084717, avg loss: 2.447601, ppl: 11.560577 +epoch: 2, batch: 18666, sum loss: 3768.685059, avg loss: 2.400436, ppl: 11.027988 +epoch: 2, batch: 18667, sum loss: 4012.112793, avg loss: 2.402463, ppl: 11.050357 +epoch: 2, batch: 18668, sum loss: 3544.516602, avg loss: 2.239113, ppl: 9.385006 +epoch: 2, batch: 18669, sum loss: 5027.881836, avg loss: 2.681537, ppl: 14.607527 +epoch: 2, batch: 18670, sum loss: 3839.727051, avg loss: 2.328518, ppl: 10.262725 +epoch: 2, batch: 18671, sum loss: 5008.577148, avg loss: 2.885125, ppl: 17.905809 +epoch: 2, batch: 18672, sum loss: 4123.411133, avg loss: 2.594972, ppl: 13.396217 +epoch: 2, batch: 18673, sum loss: 4013.886230, avg loss: 2.350050, ppl: 10.486098 +epoch: 2, batch: 18674, sum loss: 4299.093750, avg loss: 2.618206, ppl: 13.711101 +epoch: 2, batch: 18675, sum loss: 5001.535156, avg loss: 2.919752, ppl: 18.536688 +epoch: 2, batch: 18676, sum loss: 4571.444824, avg loss: 2.713024, ppl: 15.074788 +epoch: 2, batch: 18677, sum loss: 4258.969238, avg loss: 2.646966, ppl: 14.111167 +epoch: 2, batch: 18678, sum loss: 4209.191406, avg loss: 2.502492, ppl: 12.212890 +epoch: 2, batch: 18679, sum loss: 4763.407227, avg loss: 2.505738, ppl: 12.252592 +epoch: 2, batch: 18680, sum loss: 4528.395508, avg loss: 2.422897, ppl: 11.278490 +epoch: 2, batch: 18681, sum loss: 3647.376465, avg loss: 2.320214, ppl: 10.177853 +epoch: 2, batch: 18682, sum loss: 4752.250488, avg loss: 2.552229, ppl: 12.835685 +epoch: 2, batch: 18683, sum loss: 3412.565918, avg loss: 2.211644, ppl: 9.130713 +epoch: 2, batch: 18684, sum loss: 4109.483398, avg loss: 2.576479, ppl: 13.150749 +epoch: 2, batch: 18685, sum loss: 4762.924316, avg loss: 2.640202, ppl: 14.016035 +epoch: 2, batch: 18686, sum loss: 4189.583008, avg loss: 2.590961, ppl: 13.342581 +epoch: 2, batch: 18687, sum loss: 4582.557617, avg loss: 2.538813, ppl: 12.664631 +epoch: 2, batch: 18688, sum loss: 3474.384766, avg loss: 2.266396, ppl: 9.644577 +epoch: 2, batch: 18689, sum loss: 3991.279785, avg loss: 2.590058, ppl: 13.330549 +epoch: 2, batch: 18690, sum loss: 3316.793701, avg loss: 2.312966, ppl: 10.104353 +epoch: 2, batch: 18691, sum loss: 4927.844238, avg loss: 2.599074, ppl: 13.451278 +epoch: 2, batch: 18692, sum loss: 3565.393311, avg loss: 2.388073, ppl: 10.892487 +epoch: 2, batch: 18693, sum loss: 3389.660400, avg loss: 2.160395, ppl: 8.674567 +epoch: 2, batch: 18694, sum loss: 4936.828125, avg loss: 2.599699, ppl: 13.459683 +epoch: 2, batch: 18695, sum loss: 4322.333008, avg loss: 2.653366, ppl: 14.201759 +epoch: 2, batch: 18696, sum loss: 3274.334473, avg loss: 2.352252, ppl: 10.509208 +epoch: 2, batch: 18697, sum loss: 4055.863770, avg loss: 2.520736, ppl: 12.437744 +epoch: 2, batch: 18698, sum loss: 4747.161621, avg loss: 2.498506, ppl: 12.164310 +epoch: 2, batch: 18699, sum loss: 3772.277344, avg loss: 2.566175, ppl: 13.015943 +epoch: 2, batch: 18700, sum loss: 4621.042480, avg loss: 2.351676, ppl: 10.503153 +epoch: 2, batch: 18701, sum loss: 4584.916016, avg loss: 2.611000, ppl: 13.612658 +epoch: 2, batch: 18702, sum loss: 4154.422363, avg loss: 2.652888, ppl: 14.194972 +epoch: 2, batch: 18703, sum loss: 3588.052246, avg loss: 2.357459, ppl: 10.564073 +epoch: 2, batch: 18704, sum loss: 4960.367188, avg loss: 2.617608, ppl: 13.702908 +epoch: 2, batch: 18705, sum loss: 4021.395508, avg loss: 2.502424, ppl: 12.212063 +epoch: 2, batch: 18706, sum loss: 5184.181641, avg loss: 2.812904, ppl: 16.658218 +epoch: 2, batch: 18707, sum loss: 3777.526855, avg loss: 2.459327, ppl: 11.696942 +epoch: 2, batch: 18708, sum loss: 4076.665039, avg loss: 2.621650, ppl: 13.758403 +epoch: 2, batch: 18709, sum loss: 5193.582031, avg loss: 2.678485, ppl: 14.563012 +epoch: 2, batch: 18710, sum loss: 4076.580078, avg loss: 2.342862, ppl: 10.410992 +epoch: 2, batch: 18711, sum loss: 4086.024414, avg loss: 2.474879, ppl: 11.880264 +epoch: 2, batch: 18712, sum loss: 4087.057129, avg loss: 2.376196, ppl: 10.763878 +epoch: 2, batch: 18713, sum loss: 4315.200195, avg loss: 2.435215, ppl: 11.418268 +epoch: 2, batch: 18714, sum loss: 3937.223389, avg loss: 2.265376, ppl: 9.634746 +epoch: 2, batch: 18715, sum loss: 4233.282227, avg loss: 2.528843, ppl: 12.538986 +epoch: 2, batch: 18716, sum loss: 3633.600342, avg loss: 2.406358, ppl: 11.093482 +epoch: 2, batch: 18717, sum loss: 4726.585938, avg loss: 2.570194, ppl: 13.068357 +epoch: 2, batch: 18718, sum loss: 4040.544922, avg loss: 2.438470, ppl: 11.455502 +epoch: 2, batch: 18719, sum loss: 4902.272461, avg loss: 2.561271, ppl: 12.952266 +epoch: 2, batch: 18720, sum loss: 3895.485107, avg loss: 2.440780, ppl: 11.481995 +epoch: 2, batch: 18721, sum loss: 4636.647949, avg loss: 2.464991, ppl: 11.763374 +epoch: 2, batch: 18722, sum loss: 4100.283203, avg loss: 2.402040, ppl: 11.045691 +epoch: 2, batch: 18723, sum loss: 3814.991211, avg loss: 2.440813, ppl: 11.482375 +epoch: 2, batch: 18724, sum loss: 4168.104980, avg loss: 2.498864, ppl: 12.168662 +epoch: 2, batch: 18725, sum loss: 4240.088867, avg loss: 2.552733, ppl: 12.842150 +epoch: 2, batch: 18726, sum loss: 3185.312012, avg loss: 2.156610, ppl: 8.641788 +epoch: 2, batch: 18727, sum loss: 5475.830078, avg loss: 2.608780, ppl: 13.582476 +epoch: 2, batch: 18728, sum loss: 3619.039062, avg loss: 2.236736, ppl: 9.362722 +epoch: 2, batch: 18729, sum loss: 4114.724121, avg loss: 2.486238, ppl: 12.015989 +epoch: 2, batch: 18730, sum loss: 4904.817383, avg loss: 2.645532, ppl: 14.090944 +epoch: 2, batch: 18731, sum loss: 2949.126709, avg loss: 2.277318, ppl: 9.750492 +epoch: 2, batch: 18732, sum loss: 4310.032715, avg loss: 2.533823, ppl: 12.601587 +epoch: 2, batch: 18733, sum loss: 3312.766602, avg loss: 2.318241, ppl: 10.157792 +epoch: 2, batch: 18734, sum loss: 4204.779297, avg loss: 2.751819, ppl: 15.671110 +epoch: 2, batch: 18735, sum loss: 4328.275391, avg loss: 2.473300, ppl: 11.861528 +epoch: 2, batch: 18736, sum loss: 3586.240234, avg loss: 2.532655, ppl: 12.586886 +epoch: 2, batch: 18737, sum loss: 4156.496094, avg loss: 2.671270, ppl: 14.458318 +epoch: 2, batch: 18738, sum loss: 4153.275879, avg loss: 2.478088, ppl: 11.918459 +epoch: 2, batch: 18739, sum loss: 4352.291992, avg loss: 2.564698, ppl: 12.996730 +epoch: 2, batch: 18740, sum loss: 3785.202393, avg loss: 2.165448, ppl: 8.718504 +epoch: 2, batch: 18741, sum loss: 3858.417725, avg loss: 2.283087, ppl: 9.806912 +epoch: 2, batch: 18742, sum loss: 4570.433105, avg loss: 2.725363, ppl: 15.261946 +epoch: 2, batch: 18743, sum loss: 4091.686523, avg loss: 2.319550, ppl: 10.171099 +epoch: 2, batch: 18744, sum loss: 5479.729492, avg loss: 2.966827, ppl: 19.430172 +epoch: 2, batch: 18745, sum loss: 4053.300781, avg loss: 2.396985, ppl: 10.989987 +epoch: 2, batch: 18746, sum loss: 4363.569824, avg loss: 2.421515, ppl: 11.262910 +epoch: 2, batch: 18747, sum loss: 5229.378418, avg loss: 2.881200, ppl: 17.835669 +epoch: 2, batch: 18748, sum loss: 4931.922852, avg loss: 2.384876, ppl: 10.857714 +epoch: 2, batch: 18749, sum loss: 5007.996094, avg loss: 2.682376, ppl: 14.619787 +epoch: 2, batch: 18750, sum loss: 4420.230957, avg loss: 2.528736, ppl: 12.537653 +epoch: 2, batch: 18751, sum loss: 3639.635010, avg loss: 2.355751, ppl: 10.546046 +epoch: 2, batch: 18752, sum loss: 4484.548828, avg loss: 2.433287, ppl: 11.396284 +epoch: 2, batch: 18753, sum loss: 4376.956055, avg loss: 2.472857, ppl: 11.856266 +epoch: 2, batch: 18754, sum loss: 4338.215332, avg loss: 2.579201, ppl: 13.186591 +epoch: 2, batch: 18755, sum loss: 4946.432129, avg loss: 2.972615, ppl: 19.542967 +epoch: 2, batch: 18756, sum loss: 4718.068848, avg loss: 2.491061, ppl: 12.074077 +epoch: 2, batch: 18757, sum loss: 4088.806152, avg loss: 2.255271, ppl: 9.537877 +epoch: 2, batch: 18758, sum loss: 2812.216797, avg loss: 2.299441, ppl: 9.968612 +epoch: 2, batch: 18759, sum loss: 4830.076172, avg loss: 2.566459, ppl: 13.019643 +epoch: 2, batch: 18760, sum loss: 3533.088867, avg loss: 2.387222, ppl: 10.883222 +epoch: 2, batch: 18761, sum loss: 3890.025391, avg loss: 2.452727, ppl: 11.619995 +epoch: 2, batch: 18762, sum loss: 4254.886230, avg loss: 2.322536, ppl: 10.201515 +epoch: 2, batch: 18763, sum loss: 3643.053711, avg loss: 2.422243, ppl: 11.271113 +epoch: 2, batch: 18764, sum loss: 3746.802246, avg loss: 2.672470, ppl: 14.475675 +epoch: 2, batch: 18765, sum loss: 3676.819824, avg loss: 2.349406, ppl: 10.479339 +epoch: 2, batch: 18766, sum loss: 3479.242432, avg loss: 2.173168, ppl: 8.786078 +epoch: 2, batch: 18767, sum loss: 4435.070312, avg loss: 2.455742, ppl: 11.655080 +epoch: 2, batch: 18768, sum loss: 3729.712891, avg loss: 2.392375, ppl: 10.939446 +epoch: 2, batch: 18769, sum loss: 3742.191895, avg loss: 2.305725, ppl: 10.031449 +epoch: 2, batch: 18770, sum loss: 3013.924561, avg loss: 2.174549, ppl: 8.798213 +epoch: 2, batch: 18771, sum loss: 3647.090332, avg loss: 2.388402, ppl: 10.896070 +epoch: 2, batch: 18772, sum loss: 2948.701660, avg loss: 2.125957, ppl: 8.380910 +epoch: 2, batch: 18773, sum loss: 4504.346191, avg loss: 2.473557, ppl: 11.864569 +epoch: 2, batch: 18774, sum loss: 4088.776367, avg loss: 2.417964, ppl: 11.222983 +epoch: 2, batch: 18775, sum loss: 4779.731934, avg loss: 2.665774, ppl: 14.379069 +epoch: 2, batch: 18776, sum loss: 3266.789307, avg loss: 2.173512, ppl: 8.789102 +epoch: 2, batch: 18777, sum loss: 3641.044922, avg loss: 2.314714, ppl: 10.122025 +epoch: 2, batch: 18778, sum loss: 4685.571289, avg loss: 2.451895, ppl: 11.610328 +epoch: 2, batch: 18779, sum loss: 4287.033691, avg loss: 2.483797, ppl: 11.986692 +epoch: 2, batch: 18780, sum loss: 4320.289062, avg loss: 2.573132, ppl: 13.106814 +epoch: 2, batch: 18781, sum loss: 4451.208984, avg loss: 2.776799, ppl: 16.067507 +epoch: 2, batch: 18782, sum loss: 4004.167725, avg loss: 2.542329, ppl: 12.709231 +epoch: 2, batch: 18783, sum loss: 4540.190430, avg loss: 2.573804, ppl: 13.115623 +epoch: 2, batch: 18784, sum loss: 4521.754395, avg loss: 2.436290, ppl: 11.430555 +epoch: 2, batch: 18785, sum loss: 5337.684082, avg loss: 2.755645, ppl: 15.731181 +epoch: 2, batch: 18786, sum loss: 3731.653320, avg loss: 2.402868, ppl: 11.054832 +epoch: 2, batch: 18787, sum loss: 5840.333984, avg loss: 2.795756, ppl: 16.375002 +epoch: 2, batch: 18788, sum loss: 4120.895020, avg loss: 2.526606, ppl: 12.510976 +epoch: 2, batch: 18789, sum loss: 3744.926270, avg loss: 2.356782, ppl: 10.556922 +epoch: 2, batch: 18790, sum loss: 4071.266602, avg loss: 2.550919, ppl: 12.818876 +epoch: 2, batch: 18791, sum loss: 4164.048828, avg loss: 2.400028, ppl: 11.023487 +epoch: 2, batch: 18792, sum loss: 4739.275391, avg loss: 2.765038, ppl: 15.879647 +epoch: 2, batch: 18793, sum loss: 4353.955566, avg loss: 2.367567, ppl: 10.671398 +epoch: 2, batch: 18794, sum loss: 4759.590820, avg loss: 2.738545, ppl: 15.464463 +epoch: 2, batch: 18795, sum loss: 5159.877930, avg loss: 2.536813, ppl: 12.639325 +epoch: 2, batch: 18796, sum loss: 3817.679932, avg loss: 2.428550, ppl: 11.342419 +epoch: 2, batch: 18797, sum loss: 3831.341064, avg loss: 2.402095, ppl: 11.046292 +epoch: 2, batch: 18798, sum loss: 4235.075195, avg loss: 2.388649, ppl: 10.898765 +epoch: 2, batch: 18799, sum loss: 4030.860840, avg loss: 2.506754, ppl: 12.265055 +epoch: 2, batch: 18800, sum loss: 3767.012695, avg loss: 2.321018, ppl: 10.186041 +epoch: 2, batch: 18801, sum loss: 3424.027100, avg loss: 2.229184, ppl: 9.292284 +epoch: 2, batch: 18802, sum loss: 3480.986084, avg loss: 2.306817, ppl: 10.042404 +epoch: 2, batch: 18803, sum loss: 4512.947266, avg loss: 2.607133, ppl: 13.560117 +epoch: 2, batch: 18804, sum loss: 4925.148438, avg loss: 2.745345, ppl: 15.569983 +epoch: 2, batch: 18805, sum loss: 3267.331787, avg loss: 2.103884, ppl: 8.197948 +epoch: 2, batch: 18806, sum loss: 3865.364014, avg loss: 2.387501, ppl: 10.886255 +epoch: 2, batch: 18807, sum loss: 3409.518066, avg loss: 2.309972, ppl: 10.074139 +epoch: 2, batch: 18808, sum loss: 3785.014648, avg loss: 2.350941, ppl: 10.495439 +epoch: 2, batch: 18809, sum loss: 4178.626953, avg loss: 2.485798, ppl: 12.010705 +epoch: 2, batch: 18810, sum loss: 4958.911621, avg loss: 2.589510, ppl: 13.323241 +epoch: 2, batch: 18811, sum loss: 5060.013184, avg loss: 2.705889, ppl: 14.967624 +epoch: 2, batch: 18812, sum loss: 4665.091797, avg loss: 2.696585, ppl: 14.829000 +epoch: 2, batch: 18813, sum loss: 4745.402344, avg loss: 2.713209, ppl: 15.077580 +epoch: 2, batch: 18814, sum loss: 3483.589355, avg loss: 2.303961, ppl: 10.013771 +epoch: 2, batch: 18815, sum loss: 4280.121094, avg loss: 2.507394, ppl: 12.272903 +epoch: 2, batch: 18816, sum loss: 3791.012939, avg loss: 2.314416, ppl: 10.119009 +epoch: 2, batch: 18817, sum loss: 4757.152344, avg loss: 2.485451, ppl: 12.006528 +epoch: 2, batch: 18818, sum loss: 5025.585449, avg loss: 2.656229, ppl: 14.242479 +epoch: 2, batch: 18819, sum loss: 4096.414551, avg loss: 2.478170, ppl: 11.919428 +epoch: 2, batch: 18820, sum loss: 3213.165283, avg loss: 2.262793, ppl: 9.609888 +epoch: 2, batch: 18821, sum loss: 4420.229492, avg loss: 2.623281, ppl: 13.780871 +epoch: 2, batch: 18822, sum loss: 3652.096191, avg loss: 2.350126, ppl: 10.486894 +epoch: 2, batch: 18823, sum loss: 3706.223877, avg loss: 2.312055, ppl: 10.095147 +epoch: 2, batch: 18824, sum loss: 3867.546387, avg loss: 2.727466, ppl: 15.294081 +epoch: 2, batch: 18825, sum loss: 4437.552246, avg loss: 2.710783, ppl: 15.041056 +epoch: 2, batch: 18826, sum loss: 4562.439941, avg loss: 2.644893, ppl: 14.081937 +epoch: 2, batch: 18827, sum loss: 4716.213867, avg loss: 2.563160, ppl: 12.976755 +epoch: 2, batch: 18828, sum loss: 5110.611328, avg loss: 2.699742, ppl: 14.875891 +epoch: 2, batch: 18829, sum loss: 4704.256348, avg loss: 2.452688, ppl: 11.619543 +epoch: 2, batch: 18830, sum loss: 4856.123047, avg loss: 2.717472, ppl: 15.141999 +epoch: 2, batch: 18831, sum loss: 3822.088867, avg loss: 2.443791, ppl: 11.516617 +epoch: 2, batch: 18832, sum loss: 3516.010254, avg loss: 2.139994, ppl: 8.499386 +epoch: 2, batch: 18833, sum loss: 4798.225586, avg loss: 2.586644, ppl: 13.285117 +epoch: 2, batch: 18834, sum loss: 3674.920410, avg loss: 2.235353, ppl: 9.349782 +epoch: 2, batch: 18835, sum loss: 4220.489258, avg loss: 2.548604, ppl: 12.789244 +epoch: 2, batch: 18836, sum loss: 4322.063965, avg loss: 2.377373, ppl: 10.776555 +epoch: 2, batch: 18837, sum loss: 3714.285645, avg loss: 2.376382, ppl: 10.765885 +epoch: 2, batch: 18838, sum loss: 3769.142578, avg loss: 2.177436, ppl: 8.823656 +epoch: 2, batch: 18839, sum loss: 3958.036621, avg loss: 2.332373, ppl: 10.302359 +epoch: 2, batch: 18840, sum loss: 5082.824707, avg loss: 2.692174, ppl: 14.763741 +epoch: 2, batch: 18841, sum loss: 3734.892822, avg loss: 2.465276, ppl: 11.766726 +epoch: 2, batch: 18842, sum loss: 3387.863770, avg loss: 2.246594, ppl: 9.455475 +epoch: 2, batch: 18843, sum loss: 4025.716309, avg loss: 2.413499, ppl: 11.172989 +epoch: 2, batch: 18844, sum loss: 3847.798584, avg loss: 2.418478, ppl: 11.228756 +epoch: 2, batch: 18845, sum loss: 4116.106934, avg loss: 2.442793, ppl: 11.505134 +epoch: 2, batch: 18846, sum loss: 4066.765381, avg loss: 2.375447, ppl: 10.755820 +epoch: 2, batch: 18847, sum loss: 3671.136719, avg loss: 2.216870, ppl: 9.178557 +epoch: 2, batch: 18848, sum loss: 4261.979980, avg loss: 2.512960, ppl: 12.341406 +epoch: 2, batch: 18849, sum loss: 4138.509277, avg loss: 2.634315, ppl: 13.933771 +epoch: 2, batch: 18850, sum loss: 3951.652832, avg loss: 2.442307, ppl: 11.499539 +epoch: 2, batch: 18851, sum loss: 4417.419434, avg loss: 2.544596, ppl: 12.738087 +epoch: 2, batch: 18852, sum loss: 3841.294678, avg loss: 2.262247, ppl: 9.604643 +epoch: 2, batch: 18853, sum loss: 3949.710449, avg loss: 2.626137, ppl: 13.820283 +epoch: 2, batch: 18854, sum loss: 4702.159668, avg loss: 2.510496, ppl: 12.311040 +epoch: 2, batch: 18855, sum loss: 3400.485840, avg loss: 2.237162, ppl: 9.366710 +epoch: 2, batch: 18856, sum loss: 4211.067871, avg loss: 2.451145, ppl: 11.601627 +epoch: 2, batch: 18857, sum loss: 4514.971680, avg loss: 2.578511, ppl: 13.177496 +epoch: 2, batch: 18858, sum loss: 4531.978516, avg loss: 2.546055, ppl: 12.756683 +epoch: 2, batch: 18859, sum loss: 4052.772949, avg loss: 2.353527, ppl: 10.522614 +epoch: 2, batch: 18860, sum loss: 4739.629395, avg loss: 2.526455, ppl: 12.509082 +epoch: 2, batch: 18861, sum loss: 3977.963379, avg loss: 2.498721, ppl: 12.166918 +epoch: 2, batch: 18862, sum loss: 3988.131836, avg loss: 2.584661, ppl: 13.258794 +epoch: 2, batch: 18863, sum loss: 3239.448730, avg loss: 2.221844, ppl: 9.224327 +epoch: 2, batch: 18864, sum loss: 2823.591553, avg loss: 2.240946, ppl: 9.402220 +epoch: 2, batch: 18865, sum loss: 3897.433838, avg loss: 2.392531, ppl: 10.941155 +epoch: 2, batch: 18866, sum loss: 5460.889648, avg loss: 3.007098, ppl: 20.228605 +epoch: 2, batch: 18867, sum loss: 4049.194336, avg loss: 2.690495, ppl: 14.738967 +epoch: 2, batch: 18868, sum loss: 4492.632812, avg loss: 2.512658, ppl: 12.337682 +epoch: 2, batch: 18869, sum loss: 4748.963867, avg loss: 2.488975, ppl: 12.048915 +epoch: 2, batch: 18870, sum loss: 3448.447754, avg loss: 2.210543, ppl: 9.120671 +epoch: 2, batch: 18871, sum loss: 3711.690918, avg loss: 2.332930, ppl: 10.308096 +epoch: 2, batch: 18872, sum loss: 4212.846680, avg loss: 2.905411, ppl: 18.272760 +epoch: 2, batch: 18873, sum loss: 4002.201416, avg loss: 2.506075, ppl: 12.256726 +epoch: 2, batch: 18874, sum loss: 4617.610352, avg loss: 2.640143, ppl: 14.015210 +epoch: 2, batch: 18875, sum loss: 4650.556152, avg loss: 2.530226, ppl: 12.556347 +epoch: 2, batch: 18876, sum loss: 3687.559814, avg loss: 2.530926, ppl: 12.565142 +epoch: 2, batch: 18877, sum loss: 3894.974365, avg loss: 2.349200, ppl: 10.477190 +epoch: 2, batch: 18878, sum loss: 3286.138672, avg loss: 2.310927, ppl: 10.083772 +epoch: 2, batch: 18879, sum loss: 4626.911621, avg loss: 2.646975, ppl: 14.111284 +epoch: 2, batch: 18880, sum loss: 3590.949219, avg loss: 2.527058, ppl: 12.516626 +epoch: 2, batch: 18881, sum loss: 3710.071533, avg loss: 2.406013, ppl: 11.089656 +epoch: 2, batch: 18882, sum loss: 4825.334473, avg loss: 2.638237, ppl: 13.988513 +epoch: 2, batch: 18883, sum loss: 4098.345215, avg loss: 2.427930, ppl: 11.335389 +epoch: 2, batch: 18884, sum loss: 4313.621094, avg loss: 2.484805, ppl: 11.998778 +epoch: 2, batch: 18885, sum loss: 3559.736328, avg loss: 2.401981, ppl: 11.045039 +epoch: 2, batch: 18886, sum loss: 4377.341797, avg loss: 2.457800, ppl: 11.679091 +epoch: 2, batch: 18887, sum loss: 4606.432129, avg loss: 2.480577, ppl: 11.948159 +epoch: 2, batch: 18888, sum loss: 4518.086426, avg loss: 2.418676, ppl: 11.230979 +epoch: 2, batch: 18889, sum loss: 4749.636719, avg loss: 2.559072, ppl: 12.923813 +epoch: 2, batch: 18890, sum loss: 4074.518066, avg loss: 2.436913, ppl: 11.437675 +epoch: 2, batch: 18891, sum loss: 4076.048584, avg loss: 2.497579, ppl: 12.153034 +epoch: 2, batch: 18892, sum loss: 4886.856934, avg loss: 2.648703, ppl: 14.135691 +epoch: 2, batch: 18893, sum loss: 4960.351562, avg loss: 2.624525, ppl: 13.798013 +epoch: 2, batch: 18894, sum loss: 3777.447998, avg loss: 2.349159, ppl: 10.476758 +epoch: 2, batch: 18895, sum loss: 5295.449219, avg loss: 2.723997, ppl: 15.241114 +epoch: 2, batch: 18896, sum loss: 4427.671387, avg loss: 2.527210, ppl: 12.518528 +epoch: 2, batch: 18897, sum loss: 4128.332031, avg loss: 2.728574, ppl: 15.311035 +epoch: 2, batch: 18898, sum loss: 3559.229492, avg loss: 2.375988, ppl: 10.761636 +epoch: 2, batch: 18899, sum loss: 4060.703613, avg loss: 2.452116, ppl: 11.612891 +epoch: 2, batch: 18900, sum loss: 3554.995850, avg loss: 2.441618, ppl: 11.491621 +epoch: 2, batch: 18901, sum loss: 3241.093994, avg loss: 2.399033, ppl: 11.012526 +epoch: 2, batch: 18902, sum loss: 5757.162109, avg loss: 2.742812, ppl: 15.530595 +epoch: 2, batch: 18903, sum loss: 4344.960449, avg loss: 2.545378, ppl: 12.748049 +epoch: 2, batch: 18904, sum loss: 4025.267090, avg loss: 2.318702, ppl: 10.162477 +epoch: 2, batch: 18905, sum loss: 4659.673828, avg loss: 2.800285, ppl: 16.449331 +epoch: 2, batch: 18906, sum loss: 5156.872070, avg loss: 2.855411, ppl: 17.381577 +epoch: 2, batch: 18907, sum loss: 4687.615234, avg loss: 2.539336, ppl: 12.671260 +epoch: 2, batch: 18908, sum loss: 4301.514160, avg loss: 2.369980, ppl: 10.697182 +epoch: 2, batch: 18909, sum loss: 4413.506836, avg loss: 2.567485, ppl: 13.033007 +epoch: 2, batch: 18910, sum loss: 3733.807617, avg loss: 2.354229, ppl: 10.530009 +epoch: 2, batch: 18911, sum loss: 4008.266357, avg loss: 2.477297, ppl: 11.909029 +epoch: 2, batch: 18912, sum loss: 4363.550293, avg loss: 2.827965, ppl: 16.911016 +epoch: 2, batch: 18913, sum loss: 3795.556641, avg loss: 2.196503, ppl: 8.993505 +epoch: 2, batch: 18914, sum loss: 4983.783203, avg loss: 2.582271, ppl: 13.227144 +epoch: 2, batch: 18915, sum loss: 3454.874756, avg loss: 2.314049, ppl: 10.115296 +epoch: 2, batch: 18916, sum loss: 3979.440674, avg loss: 2.343605, ppl: 10.418726 +epoch: 2, batch: 18917, sum loss: 4151.671875, avg loss: 2.450810, ppl: 11.597733 +epoch: 2, batch: 18918, sum loss: 4435.278320, avg loss: 2.508642, ppl: 12.288228 +epoch: 2, batch: 18919, sum loss: 5347.496582, avg loss: 2.616192, ppl: 13.683518 +epoch: 2, batch: 18920, sum loss: 4503.770996, avg loss: 2.677628, ppl: 14.550543 +epoch: 2, batch: 18921, sum loss: 4250.311035, avg loss: 2.413578, ppl: 11.173870 +epoch: 2, batch: 18922, sum loss: 3889.109619, avg loss: 2.497823, ppl: 12.155995 +epoch: 2, batch: 18923, sum loss: 3830.434814, avg loss: 2.427398, ppl: 11.329370 +epoch: 2, batch: 18924, sum loss: 3858.713867, avg loss: 2.303710, ppl: 10.011255 +epoch: 2, batch: 18925, sum loss: 3482.201172, avg loss: 2.267058, ppl: 9.650968 +epoch: 2, batch: 18926, sum loss: 3823.646484, avg loss: 2.455778, ppl: 11.655499 +epoch: 2, batch: 18927, sum loss: 4515.529297, avg loss: 2.602611, ppl: 13.498932 +epoch: 2, batch: 18928, sum loss: 4563.235352, avg loss: 2.567943, ppl: 13.038980 +epoch: 2, batch: 18929, sum loss: 4432.092285, avg loss: 2.531178, ppl: 12.568303 +epoch: 2, batch: 18930, sum loss: 4047.040039, avg loss: 2.627948, ppl: 13.845331 +epoch: 2, batch: 18931, sum loss: 3534.052734, avg loss: 2.274165, ppl: 9.719801 +epoch: 2, batch: 18932, sum loss: 3703.474854, avg loss: 2.377070, ppl: 10.773291 +epoch: 2, batch: 18933, sum loss: 3829.708008, avg loss: 2.333765, ppl: 10.316709 +epoch: 2, batch: 18934, sum loss: 4258.064941, avg loss: 2.386808, ppl: 10.878713 +epoch: 2, batch: 18935, sum loss: 3442.847168, avg loss: 2.337303, ppl: 10.353275 +epoch: 2, batch: 18936, sum loss: 4331.126953, avg loss: 2.458074, ppl: 11.682294 +epoch: 2, batch: 18937, sum loss: 3976.715332, avg loss: 2.365684, ppl: 10.651324 +epoch: 2, batch: 18938, sum loss: 4211.856445, avg loss: 2.510046, ppl: 12.305490 +epoch: 2, batch: 18939, sum loss: 5179.708008, avg loss: 2.675469, ppl: 14.519156 +epoch: 2, batch: 18940, sum loss: 4205.160156, avg loss: 2.415371, ppl: 11.193919 +epoch: 2, batch: 18941, sum loss: 4413.380859, avg loss: 2.793279, ppl: 16.334492 +epoch: 2, batch: 18942, sum loss: 3651.580566, avg loss: 2.368081, ppl: 10.676882 +epoch: 2, batch: 18943, sum loss: 4643.537598, avg loss: 2.547196, ppl: 12.771238 +epoch: 2, batch: 18944, sum loss: 3598.214355, avg loss: 2.413290, ppl: 11.170647 +epoch: 2, batch: 18945, sum loss: 4701.564453, avg loss: 2.583277, ppl: 13.240459 +epoch: 2, batch: 18946, sum loss: 4271.302246, avg loss: 2.471818, ppl: 11.843965 +epoch: 2, batch: 18947, sum loss: 4719.382324, avg loss: 2.727967, ppl: 15.301740 +epoch: 2, batch: 18948, sum loss: 4226.796387, avg loss: 2.512959, ppl: 12.341389 +epoch: 2, batch: 18949, sum loss: 4501.801758, avg loss: 2.512166, ppl: 12.331614 +epoch: 2, batch: 18950, sum loss: 4249.338379, avg loss: 2.508464, ppl: 12.286045 +epoch: 2, batch: 18951, sum loss: 4312.833984, avg loss: 2.775311, ppl: 16.043623 +epoch: 2, batch: 18952, sum loss: 3677.962158, avg loss: 2.089751, ppl: 8.082904 +epoch: 2, batch: 18953, sum loss: 4143.739746, avg loss: 2.416175, ppl: 11.202922 +epoch: 2, batch: 18954, sum loss: 4340.450684, avg loss: 2.588223, ppl: 13.306111 +epoch: 2, batch: 18955, sum loss: 4034.218262, avg loss: 2.447948, ppl: 11.564591 +epoch: 2, batch: 18956, sum loss: 3444.917969, avg loss: 2.092903, ppl: 8.108419 +epoch: 2, batch: 18957, sum loss: 3430.628906, avg loss: 2.291669, ppl: 9.891434 +epoch: 2, batch: 18958, sum loss: 4352.143066, avg loss: 2.626520, ppl: 13.825568 +epoch: 2, batch: 18959, sum loss: 4678.579590, avg loss: 2.685752, ppl: 14.669224 +epoch: 2, batch: 18960, sum loss: 3908.971191, avg loss: 2.564942, ppl: 12.999900 +epoch: 2, batch: 18961, sum loss: 4698.694336, avg loss: 2.627905, ppl: 13.844737 +epoch: 2, batch: 18962, sum loss: 3322.629395, avg loss: 2.161763, ppl: 8.686438 +epoch: 2, batch: 18963, sum loss: 3921.498535, avg loss: 2.313568, ppl: 10.110438 +epoch: 2, batch: 18964, sum loss: 4372.070801, avg loss: 2.608634, ppl: 13.580490 +epoch: 2, batch: 18965, sum loss: 4192.984863, avg loss: 2.635440, ppl: 13.949447 +epoch: 2, batch: 18966, sum loss: 4946.796387, avg loss: 2.854470, ppl: 17.365227 +epoch: 2, batch: 18967, sum loss: 4474.215332, avg loss: 2.527805, ppl: 12.525986 +epoch: 2, batch: 18968, sum loss: 3908.658936, avg loss: 2.510378, ppl: 12.309587 +epoch: 2, batch: 18969, sum loss: 4603.115234, avg loss: 2.640915, ppl: 14.026033 +epoch: 2, batch: 18970, sum loss: 3897.789062, avg loss: 2.278077, ppl: 9.757895 +epoch: 2, batch: 18971, sum loss: 3844.533691, avg loss: 2.576765, ppl: 13.154518 +epoch: 2, batch: 18972, sum loss: 5078.753906, avg loss: 2.787461, ppl: 16.239735 +epoch: 2, batch: 18973, sum loss: 3880.281250, avg loss: 2.452769, ppl: 11.620482 +epoch: 2, batch: 18974, sum loss: 3667.704346, avg loss: 2.244617, ppl: 9.436803 +epoch: 2, batch: 18975, sum loss: 4140.584961, avg loss: 2.647433, ppl: 14.117749 +epoch: 2, batch: 18976, sum loss: 3980.724609, avg loss: 2.362448, ppl: 10.616907 +epoch: 2, batch: 18977, sum loss: 3920.075684, avg loss: 2.459270, ppl: 11.696264 +epoch: 2, batch: 18978, sum loss: 3998.396240, avg loss: 2.324649, ppl: 10.223089 +epoch: 2, batch: 18979, sum loss: 4524.127930, avg loss: 2.516200, ppl: 12.381458 +epoch: 2, batch: 18980, sum loss: 3831.352783, avg loss: 2.296974, ppl: 9.944048 +epoch: 2, batch: 18981, sum loss: 3500.333496, avg loss: 2.322716, ppl: 10.203351 +epoch: 2, batch: 18982, sum loss: 3829.927979, avg loss: 2.407246, ppl: 11.103339 +epoch: 2, batch: 18983, sum loss: 4140.785156, avg loss: 2.548175, ppl: 12.783756 +epoch: 2, batch: 18984, sum loss: 4751.475586, avg loss: 2.641176, ppl: 14.029693 +epoch: 2, batch: 18985, sum loss: 3595.274414, avg loss: 2.414556, ppl: 11.184806 +epoch: 2, batch: 18986, sum loss: 4517.708984, avg loss: 2.652794, ppl: 14.193645 +epoch: 2, batch: 18987, sum loss: 4029.955566, avg loss: 2.512441, ppl: 12.335005 +epoch: 2, batch: 18988, sum loss: 3245.519531, avg loss: 2.168016, ppl: 8.740922 +epoch: 2, batch: 18989, sum loss: 4764.684082, avg loss: 2.546598, ppl: 12.763604 +epoch: 2, batch: 18990, sum loss: 4521.484375, avg loss: 2.673852, ppl: 14.495706 +epoch: 2, batch: 18991, sum loss: 3493.625244, avg loss: 2.316728, ppl: 10.142430 +epoch: 2, batch: 18992, sum loss: 4014.088867, avg loss: 2.406528, ppl: 11.095374 +epoch: 2, batch: 18993, sum loss: 3948.984375, avg loss: 2.616955, ppl: 13.693956 +epoch: 2, batch: 18994, sum loss: 4400.362793, avg loss: 2.387609, ppl: 10.887428 +epoch: 2, batch: 18995, sum loss: 3658.614258, avg loss: 2.418119, ppl: 11.224728 +epoch: 2, batch: 18996, sum loss: 4234.708008, avg loss: 2.339618, ppl: 10.377269 +epoch: 2, batch: 18997, sum loss: 3957.494873, avg loss: 2.345877, ppl: 10.442431 +epoch: 2, batch: 18998, sum loss: 4753.045898, avg loss: 2.656817, ppl: 14.250859 +epoch: 2, batch: 18999, sum loss: 4267.937988, avg loss: 2.551069, ppl: 12.820799 +epoch: 2, batch: 19000, sum loss: 3785.043457, avg loss: 2.619407, ppl: 13.727583 +epoch: 2, batch: 19001, sum loss: 3794.102051, avg loss: 2.331962, ppl: 10.298125 +epoch: 2, batch: 19002, sum loss: 4580.853027, avg loss: 2.857675, ppl: 17.420973 +epoch: 2, batch: 19003, sum loss: 4789.414551, avg loss: 2.830623, ppl: 16.956024 +epoch: 2, batch: 19004, sum loss: 3406.910156, avg loss: 2.370849, ppl: 10.706480 +epoch: 2, batch: 19005, sum loss: 3068.210938, avg loss: 2.329697, ppl: 10.274827 +epoch: 2, batch: 19006, sum loss: 4522.654297, avg loss: 2.614251, ppl: 13.656982 +epoch: 2, batch: 19007, sum loss: 3927.921875, avg loss: 2.427640, ppl: 11.332109 +epoch: 2, batch: 19008, sum loss: 4088.005859, avg loss: 2.532841, ppl: 12.589227 +epoch: 2, batch: 19009, sum loss: 3758.578613, avg loss: 2.428022, ppl: 11.336441 +epoch: 2, batch: 19010, sum loss: 5307.945801, avg loss: 2.697127, ppl: 14.837045 +epoch: 2, batch: 19011, sum loss: 3900.706055, avg loss: 2.561199, ppl: 12.951336 +epoch: 2, batch: 19012, sum loss: 3530.868896, avg loss: 2.178204, ppl: 8.830433 +epoch: 2, batch: 19013, sum loss: 4731.384277, avg loss: 2.763659, ppl: 15.857760 +epoch: 2, batch: 19014, sum loss: 4829.172852, avg loss: 2.668051, ppl: 14.411860 +epoch: 2, batch: 19015, sum loss: 5170.776367, avg loss: 2.740210, ppl: 15.490238 +epoch: 2, batch: 19016, sum loss: 3640.618896, avg loss: 2.427079, ppl: 11.325753 +epoch: 2, batch: 19017, sum loss: 4651.207031, avg loss: 2.527830, ppl: 12.526293 +epoch: 2, batch: 19018, sum loss: 4102.084473, avg loss: 2.530589, ppl: 12.560898 +epoch: 2, batch: 19019, sum loss: 4591.167969, avg loss: 2.614560, ppl: 13.661209 +epoch: 2, batch: 19020, sum loss: 3750.404541, avg loss: 2.357262, ppl: 10.561997 +epoch: 2, batch: 19021, sum loss: 4158.170898, avg loss: 2.427420, ppl: 11.329616 +epoch: 2, batch: 19022, sum loss: 3836.036377, avg loss: 2.264484, ppl: 9.626158 +epoch: 2, batch: 19023, sum loss: 3944.742676, avg loss: 2.402401, ppl: 11.049677 +epoch: 2, batch: 19024, sum loss: 4292.641113, avg loss: 2.725486, ppl: 15.263835 +epoch: 2, batch: 19025, sum loss: 4948.982910, avg loss: 2.670795, ppl: 14.451453 +epoch: 2, batch: 19026, sum loss: 3389.254883, avg loss: 2.231241, ppl: 9.311412 +epoch: 2, batch: 19027, sum loss: 3804.447998, avg loss: 2.438749, ppl: 11.458695 +epoch: 2, batch: 19028, sum loss: 4301.910156, avg loss: 2.401960, ppl: 11.044802 +epoch: 2, batch: 19029, sum loss: 4290.605957, avg loss: 2.384995, ppl: 10.859008 +epoch: 2, batch: 19030, sum loss: 5391.945801, avg loss: 2.742597, ppl: 15.527259 +epoch: 2, batch: 19031, sum loss: 4686.178223, avg loss: 2.638614, ppl: 13.993793 +epoch: 2, batch: 19032, sum loss: 3778.606201, avg loss: 2.425293, ppl: 11.305541 +epoch: 2, batch: 19033, sum loss: 4716.996094, avg loss: 2.700055, ppl: 14.880548 +epoch: 2, batch: 19034, sum loss: 3637.164307, avg loss: 2.457544, ppl: 11.676095 +epoch: 2, batch: 19035, sum loss: 4272.533203, avg loss: 2.431720, ppl: 11.378442 +epoch: 2, batch: 19036, sum loss: 4098.017090, avg loss: 2.578991, ppl: 13.183831 +epoch: 2, batch: 19037, sum loss: 4461.985840, avg loss: 2.651210, ppl: 14.171169 +epoch: 2, batch: 19038, sum loss: 4350.386230, avg loss: 2.662415, ppl: 14.330856 +epoch: 2, batch: 19039, sum loss: 4315.926758, avg loss: 2.463429, ppl: 11.745013 +epoch: 2, batch: 19040, sum loss: 4147.686523, avg loss: 2.470331, ppl: 11.826365 +epoch: 2, batch: 19041, sum loss: 4784.617676, avg loss: 2.532884, ppl: 12.589761 +epoch: 2, batch: 19042, sum loss: 4481.398926, avg loss: 2.620701, ppl: 13.745357 +epoch: 2, batch: 19043, sum loss: 4531.545898, avg loss: 2.617877, ppl: 13.706600 +epoch: 2, batch: 19044, sum loss: 5343.705078, avg loss: 2.737554, ppl: 15.449151 +epoch: 2, batch: 19045, sum loss: 4831.987305, avg loss: 2.672559, ppl: 14.476973 +epoch: 2, batch: 19046, sum loss: 4512.851074, avg loss: 2.526792, ppl: 12.513303 +epoch: 2, batch: 19047, sum loss: 3597.394531, avg loss: 2.420858, ppl: 11.255508 +epoch: 2, batch: 19048, sum loss: 3393.902832, avg loss: 2.332579, ppl: 10.304484 +epoch: 2, batch: 19049, sum loss: 4923.859375, avg loss: 2.514739, ppl: 12.363385 +epoch: 2, batch: 19050, sum loss: 3429.180176, avg loss: 2.442436, ppl: 11.501023 +epoch: 2, batch: 19051, sum loss: 4265.606445, avg loss: 2.372417, ppl: 10.723282 +epoch: 2, batch: 19052, sum loss: 3994.245117, avg loss: 2.534420, ppl: 12.609110 +epoch: 2, batch: 19053, sum loss: 3752.314941, avg loss: 2.397645, ppl: 10.997250 +epoch: 2, batch: 19054, sum loss: 4311.192383, avg loss: 2.525596, ppl: 12.498343 +epoch: 2, batch: 19055, sum loss: 4651.220703, avg loss: 2.487284, ppl: 12.028562 +epoch: 2, batch: 19056, sum loss: 3935.066895, avg loss: 2.356328, ppl: 10.552128 +epoch: 2, batch: 19057, sum loss: 4272.787598, avg loss: 2.781763, ppl: 16.147461 +epoch: 2, batch: 19058, sum loss: 3474.482910, avg loss: 2.262033, ppl: 9.602593 +epoch: 2, batch: 19059, sum loss: 3804.359131, avg loss: 2.473576, ppl: 11.864795 +epoch: 2, batch: 19060, sum loss: 4095.393555, avg loss: 2.386593, ppl: 10.876376 +epoch: 2, batch: 19061, sum loss: 3725.013184, avg loss: 2.404786, ppl: 11.076058 +epoch: 2, batch: 19062, sum loss: 3908.832031, avg loss: 2.347647, ppl: 10.460926 +epoch: 2, batch: 19063, sum loss: 4419.799316, avg loss: 2.303178, ppl: 10.005934 +epoch: 2, batch: 19064, sum loss: 5027.523926, avg loss: 2.566373, ppl: 13.018516 +epoch: 2, batch: 19065, sum loss: 3867.277344, avg loss: 2.192334, ppl: 8.956094 +epoch: 2, batch: 19066, sum loss: 3634.440674, avg loss: 2.566695, ppl: 13.022719 +epoch: 2, batch: 19067, sum loss: 4167.015137, avg loss: 2.403123, ppl: 11.057654 +epoch: 2, batch: 19068, sum loss: 4080.336914, avg loss: 2.324978, ppl: 10.226459 +epoch: 2, batch: 19069, sum loss: 4258.062012, avg loss: 2.522549, ppl: 12.460314 +epoch: 2, batch: 19070, sum loss: 3947.974121, avg loss: 2.472119, ppl: 11.847527 +epoch: 2, batch: 19071, sum loss: 4026.925293, avg loss: 2.374366, ppl: 10.744203 +epoch: 2, batch: 19072, sum loss: 4311.740234, avg loss: 2.633928, ppl: 13.928374 +epoch: 2, batch: 19073, sum loss: 5079.544922, avg loss: 2.771165, ppl: 15.977231 +epoch: 2, batch: 19074, sum loss: 5135.477539, avg loss: 2.909619, ppl: 18.349808 +epoch: 2, batch: 19075, sum loss: 4115.054688, avg loss: 2.591344, ppl: 13.347704 +epoch: 2, batch: 19076, sum loss: 4316.763672, avg loss: 2.486615, ppl: 12.020517 +epoch: 2, batch: 19077, sum loss: 4581.666992, avg loss: 2.397523, ppl: 10.995908 +epoch: 2, batch: 19078, sum loss: 5880.664062, avg loss: 2.919893, ppl: 18.539299 +epoch: 2, batch: 19079, sum loss: 4740.262207, avg loss: 2.610276, ppl: 13.602811 +epoch: 2, batch: 19080, sum loss: 3410.710449, avg loss: 2.417229, ppl: 11.214742 +epoch: 2, batch: 19081, sum loss: 3651.983398, avg loss: 2.486033, ppl: 12.013529 +epoch: 2, batch: 19082, sum loss: 4316.765137, avg loss: 2.721794, ppl: 15.207579 +epoch: 2, batch: 19083, sum loss: 3992.794189, avg loss: 2.445067, ppl: 11.531319 +epoch: 2, batch: 19084, sum loss: 4874.890137, avg loss: 2.643650, ppl: 14.064443 +epoch: 2, batch: 19085, sum loss: 4617.080078, avg loss: 2.648927, ppl: 14.138863 +epoch: 2, batch: 19086, sum loss: 4762.700684, avg loss: 2.510649, ppl: 12.312915 +epoch: 2, batch: 19087, sum loss: 3791.222412, avg loss: 2.430271, ppl: 11.361959 +epoch: 2, batch: 19088, sum loss: 3842.175781, avg loss: 2.376114, ppl: 10.762993 +epoch: 2, batch: 19089, sum loss: 4204.663086, avg loss: 2.505759, ppl: 12.252852 +epoch: 2, batch: 19090, sum loss: 3570.282715, avg loss: 2.493214, ppl: 12.100108 +epoch: 2, batch: 19091, sum loss: 3889.560059, avg loss: 2.461747, ppl: 11.725277 +epoch: 2, batch: 19092, sum loss: 4113.531738, avg loss: 2.550237, ppl: 12.810136 +epoch: 2, batch: 19093, sum loss: 4860.124023, avg loss: 2.503928, ppl: 12.230437 +epoch: 2, batch: 19094, sum loss: 4089.511719, avg loss: 2.291043, ppl: 9.885243 +epoch: 2, batch: 19095, sum loss: 5483.267578, avg loss: 2.659199, ppl: 14.284839 +epoch: 2, batch: 19096, sum loss: 3760.943115, avg loss: 2.350589, ppl: 10.491751 +epoch: 2, batch: 19097, sum loss: 4445.398438, avg loss: 2.639786, ppl: 14.010199 +epoch: 2, batch: 19098, sum loss: 3371.200684, avg loss: 2.502748, ppl: 12.216012 +epoch: 2, batch: 19099, sum loss: 4514.237793, avg loss: 2.510700, ppl: 12.313540 +epoch: 2, batch: 19100, sum loss: 3928.737305, avg loss: 2.353947, ppl: 10.527038 +epoch: 2, batch: 19101, sum loss: 3820.125488, avg loss: 2.416271, ppl: 11.204006 +epoch: 2, batch: 19102, sum loss: 4041.731689, avg loss: 2.398654, ppl: 11.008347 +epoch: 2, batch: 19103, sum loss: 3453.758545, avg loss: 2.440819, ppl: 11.482438 +epoch: 2, batch: 19104, sum loss: 3992.654785, avg loss: 2.412480, ppl: 11.161612 +epoch: 2, batch: 19105, sum loss: 4067.328125, avg loss: 2.474044, ppl: 11.870352 +epoch: 2, batch: 19106, sum loss: 4901.971680, avg loss: 2.747742, ppl: 15.607353 +epoch: 2, batch: 19107, sum loss: 5346.003418, avg loss: 2.866490, ppl: 17.575214 +epoch: 2, batch: 19108, sum loss: 3961.895020, avg loss: 2.223286, ppl: 9.237633 +epoch: 2, batch: 19109, sum loss: 5400.969727, avg loss: 2.725010, ppl: 15.256565 +epoch: 2, batch: 19110, sum loss: 4399.217285, avg loss: 2.698906, ppl: 14.863465 +epoch: 2, batch: 19111, sum loss: 3443.707520, avg loss: 2.370067, ppl: 10.698111 +epoch: 2, batch: 19112, sum loss: 3929.478271, avg loss: 2.344558, ppl: 10.428658 +epoch: 2, batch: 19113, sum loss: 3806.829102, avg loss: 2.409385, ppl: 11.127121 +epoch: 2, batch: 19114, sum loss: 3797.178467, avg loss: 2.581359, ppl: 13.215081 +epoch: 2, batch: 19115, sum loss: 4593.671875, avg loss: 2.460456, ppl: 11.710152 +epoch: 2, batch: 19116, sum loss: 4278.446777, avg loss: 2.561944, ppl: 12.960992 +epoch: 2, batch: 19117, sum loss: 4638.480957, avg loss: 2.703078, ppl: 14.925595 +epoch: 2, batch: 19118, sum loss: 4488.513672, avg loss: 2.458113, ppl: 11.682742 +epoch: 2, batch: 19119, sum loss: 4327.813965, avg loss: 2.611837, ppl: 13.624054 +epoch: 2, batch: 19120, sum loss: 4060.028809, avg loss: 2.710299, ppl: 15.033777 +epoch: 2, batch: 19121, sum loss: 4831.354492, avg loss: 2.917485, ppl: 18.494705 +epoch: 2, batch: 19122, sum loss: 4010.293945, avg loss: 2.411482, ppl: 11.150470 +epoch: 2, batch: 19123, sum loss: 4312.148926, avg loss: 2.597680, ppl: 13.432540 +epoch: 2, batch: 19124, sum loss: 3766.547852, avg loss: 2.476363, ppl: 11.897912 +epoch: 2, batch: 19125, sum loss: 4315.393555, avg loss: 2.312644, ppl: 10.101094 +epoch: 2, batch: 19126, sum loss: 3719.361816, avg loss: 2.298740, ppl: 9.961627 +epoch: 2, batch: 19127, sum loss: 5665.917969, avg loss: 2.747778, ppl: 15.607912 +epoch: 2, batch: 19128, sum loss: 4401.337402, avg loss: 2.454734, ppl: 11.643332 +epoch: 2, batch: 19129, sum loss: 3928.724365, avg loss: 2.447803, ppl: 11.562918 +epoch: 2, batch: 19130, sum loss: 4479.161133, avg loss: 2.536331, ppl: 12.633237 +epoch: 2, batch: 19131, sum loss: 3970.399902, avg loss: 2.300348, ppl: 9.977650 +epoch: 2, batch: 19132, sum loss: 3908.802246, avg loss: 2.405417, ppl: 11.083048 +epoch: 2, batch: 19133, sum loss: 4832.562500, avg loss: 2.445629, ppl: 11.537804 +epoch: 2, batch: 19134, sum loss: 4059.148926, avg loss: 2.417599, ppl: 11.218890 +epoch: 2, batch: 19135, sum loss: 4677.179688, avg loss: 2.642474, ppl: 14.047921 +epoch: 2, batch: 19136, sum loss: 5070.577637, avg loss: 2.537827, ppl: 12.652143 +epoch: 2, batch: 19137, sum loss: 4422.813477, avg loss: 2.514391, ppl: 12.359079 +epoch: 2, batch: 19138, sum loss: 4429.704102, avg loss: 2.532707, ppl: 12.587531 +epoch: 2, batch: 19139, sum loss: 3939.204102, avg loss: 2.391745, ppl: 10.932553 +epoch: 2, batch: 19140, sum loss: 4333.359375, avg loss: 2.556554, ppl: 12.891318 +epoch: 2, batch: 19141, sum loss: 4499.298828, avg loss: 2.509369, ppl: 12.297170 +epoch: 2, batch: 19142, sum loss: 4955.768555, avg loss: 2.886295, ppl: 17.926769 +epoch: 2, batch: 19143, sum loss: 3422.039551, avg loss: 2.401431, ppl: 11.038962 +epoch: 2, batch: 19144, sum loss: 4011.942627, avg loss: 2.502771, ppl: 12.216303 +epoch: 2, batch: 19145, sum loss: 4054.790283, avg loss: 2.364309, ppl: 10.636687 +epoch: 2, batch: 19146, sum loss: 4306.284180, avg loss: 2.625783, ppl: 13.815387 +epoch: 2, batch: 19147, sum loss: 3988.365234, avg loss: 2.337846, ppl: 10.358897 +epoch: 2, batch: 19148, sum loss: 3091.178223, avg loss: 2.356081, ppl: 10.549524 +epoch: 2, batch: 19149, sum loss: 4343.843750, avg loss: 2.469496, ppl: 11.816493 +epoch: 2, batch: 19150, sum loss: 4165.398926, avg loss: 2.294986, ppl: 9.924294 +epoch: 2, batch: 19151, sum loss: 4480.712402, avg loss: 2.535774, ppl: 12.626200 +epoch: 2, batch: 19152, sum loss: 5410.789551, avg loss: 2.640698, ppl: 14.022985 +epoch: 2, batch: 19153, sum loss: 4859.200195, avg loss: 2.542753, ppl: 12.714620 +epoch: 2, batch: 19154, sum loss: 4668.173340, avg loss: 2.492351, ppl: 12.089666 +epoch: 2, batch: 19155, sum loss: 3939.623535, avg loss: 2.507717, ppl: 12.276872 +epoch: 2, batch: 19156, sum loss: 4454.023926, avg loss: 2.627743, ppl: 13.842489 +epoch: 2, batch: 19157, sum loss: 3538.990234, avg loss: 2.150055, ppl: 8.585330 +epoch: 2, batch: 19158, sum loss: 3432.581299, avg loss: 2.246454, ppl: 9.454149 +epoch: 2, batch: 19159, sum loss: 4590.289062, avg loss: 2.605159, ppl: 13.533371 +epoch: 2, batch: 19160, sum loss: 5211.691406, avg loss: 2.607149, ppl: 13.560340 +epoch: 2, batch: 19161, sum loss: 3605.098877, avg loss: 2.397007, ppl: 10.990236 +epoch: 2, batch: 19162, sum loss: 4897.606934, avg loss: 2.806651, ppl: 16.554390 +epoch: 2, batch: 19163, sum loss: 4890.498535, avg loss: 2.629300, ppl: 13.864066 +epoch: 2, batch: 19164, sum loss: 5057.888184, avg loss: 2.885276, ppl: 17.908506 +epoch: 2, batch: 19165, sum loss: 4383.772461, avg loss: 2.461411, ppl: 11.721335 +epoch: 2, batch: 19166, sum loss: 4610.831543, avg loss: 2.830467, ppl: 16.953384 +epoch: 2, batch: 19167, sum loss: 4297.054199, avg loss: 2.709366, ppl: 15.019748 +epoch: 2, batch: 19168, sum loss: 4644.184082, avg loss: 2.641743, ppl: 14.037649 +epoch: 2, batch: 19169, sum loss: 4182.324707, avg loss: 2.448668, ppl: 11.572919 +epoch: 2, batch: 19170, sum loss: 3796.733398, avg loss: 2.329284, ppl: 10.270587 +epoch: 2, batch: 19171, sum loss: 3693.416016, avg loss: 2.398322, ppl: 11.004696 +epoch: 2, batch: 19172, sum loss: 4238.262695, avg loss: 2.600161, ppl: 13.465907 +epoch: 2, batch: 19173, sum loss: 3963.564209, avg loss: 2.513357, ppl: 12.346309 +epoch: 2, batch: 19174, sum loss: 3556.559326, avg loss: 2.476712, ppl: 11.902072 +epoch: 2, batch: 19175, sum loss: 4441.534180, avg loss: 2.716535, ppl: 15.127808 +epoch: 2, batch: 19176, sum loss: 4643.395996, avg loss: 2.552719, ppl: 12.841975 +epoch: 2, batch: 19177, sum loss: 2877.318848, avg loss: 2.283587, ppl: 9.811808 +epoch: 2, batch: 19178, sum loss: 4404.303223, avg loss: 2.572607, ppl: 13.099932 +epoch: 2, batch: 19179, sum loss: 4689.241211, avg loss: 2.599358, ppl: 13.455091 +epoch: 2, batch: 19180, sum loss: 4241.468262, avg loss: 2.616575, ppl: 13.688759 +epoch: 2, batch: 19181, sum loss: 4799.047363, avg loss: 2.903235, ppl: 18.233032 +epoch: 2, batch: 19182, sum loss: 4166.043457, avg loss: 2.355027, ppl: 10.538418 +epoch: 2, batch: 19183, sum loss: 4083.302734, avg loss: 2.505094, ppl: 12.244704 +epoch: 2, batch: 19184, sum loss: 3855.543945, avg loss: 2.340950, ppl: 10.391099 +epoch: 2, batch: 19185, sum loss: 4428.068359, avg loss: 2.520244, ppl: 12.431625 +epoch: 2, batch: 19186, sum loss: 3715.868164, avg loss: 2.483869, ppl: 11.987556 +epoch: 2, batch: 19187, sum loss: 3667.455566, avg loss: 2.448235, ppl: 11.567906 +epoch: 2, batch: 19188, sum loss: 4064.938721, avg loss: 2.408139, ppl: 11.113263 +epoch: 2, batch: 19189, sum loss: 3374.449951, avg loss: 2.127648, ppl: 8.395100 +epoch: 2, batch: 19190, sum loss: 4661.768555, avg loss: 2.530819, ppl: 12.563794 +epoch: 2, batch: 19191, sum loss: 3927.343018, avg loss: 2.223864, ppl: 9.242973 +epoch: 2, batch: 19192, sum loss: 3575.694092, avg loss: 2.396578, ppl: 10.985518 +epoch: 2, batch: 19193, sum loss: 4484.540527, avg loss: 2.506730, ppl: 12.264763 +epoch: 2, batch: 19194, sum loss: 4283.271484, avg loss: 2.480181, ppl: 11.943420 +epoch: 2, batch: 19195, sum loss: 5017.096680, avg loss: 2.753621, ppl: 15.699370 +epoch: 2, batch: 19196, sum loss: 3291.837646, avg loss: 2.424034, ppl: 11.291312 +epoch: 2, batch: 19197, sum loss: 5169.018555, avg loss: 2.685204, ppl: 14.661196 +epoch: 2, batch: 19198, sum loss: 4441.767090, avg loss: 2.482821, ppl: 11.975001 +epoch: 2, batch: 19199, sum loss: 4461.792969, avg loss: 2.662168, ppl: 14.327314 +epoch: 2, batch: 19200, sum loss: 3608.770508, avg loss: 2.153204, ppl: 8.612412 +epoch: 2, batch: 19201, sum loss: 5452.796387, avg loss: 2.777787, ppl: 16.083397 +epoch: 2, batch: 19202, sum loss: 4131.206055, avg loss: 2.425840, ppl: 11.311731 +epoch: 2, batch: 19203, sum loss: 4195.680664, avg loss: 2.463700, ppl: 11.748198 +epoch: 2, batch: 19204, sum loss: 3698.728027, avg loss: 2.360388, ppl: 10.595064 +epoch: 2, batch: 19205, sum loss: 3559.705811, avg loss: 2.443175, ppl: 11.509523 +epoch: 2, batch: 19206, sum loss: 4311.645020, avg loss: 2.463797, ppl: 11.749340 +epoch: 2, batch: 19207, sum loss: 4408.674316, avg loss: 2.535178, ppl: 12.618676 +epoch: 2, batch: 19208, sum loss: 4099.125488, avg loss: 2.550794, ppl: 12.817275 +epoch: 2, batch: 19209, sum loss: 4759.859375, avg loss: 2.626854, ppl: 13.830191 +epoch: 2, batch: 19210, sum loss: 4203.282227, avg loss: 2.589823, ppl: 13.327406 +epoch: 2, batch: 19211, sum loss: 3832.823975, avg loss: 2.303380, ppl: 10.007950 +epoch: 2, batch: 19212, sum loss: 3184.119141, avg loss: 2.166067, ppl: 8.723908 +epoch: 2, batch: 19213, sum loss: 2950.943604, avg loss: 2.194010, ppl: 8.971115 +epoch: 2, batch: 19214, sum loss: 3537.197021, avg loss: 2.337870, ppl: 10.359147 +epoch: 2, batch: 19215, sum loss: 3969.394775, avg loss: 2.411540, ppl: 11.151121 +epoch: 2, batch: 19216, sum loss: 3801.635986, avg loss: 2.320901, ppl: 10.184846 +epoch: 2, batch: 19217, sum loss: 3356.386475, avg loss: 2.200909, ppl: 9.033222 +epoch: 2, batch: 19218, sum loss: 3830.868164, avg loss: 2.502200, ppl: 12.209327 +epoch: 2, batch: 19219, sum loss: 5016.211914, avg loss: 2.513132, ppl: 12.343534 +epoch: 2, batch: 19220, sum loss: 4229.030762, avg loss: 2.468786, ppl: 11.808105 +epoch: 2, batch: 19221, sum loss: 3787.878418, avg loss: 2.520212, ppl: 12.431228 +epoch: 2, batch: 19222, sum loss: 4712.413574, avg loss: 2.763879, ppl: 15.861251 +epoch: 2, batch: 19223, sum loss: 4024.509277, avg loss: 2.404128, ppl: 11.068769 +epoch: 2, batch: 19224, sum loss: 3622.985596, avg loss: 2.502062, ppl: 12.207642 +epoch: 2, batch: 19225, sum loss: 4219.772461, avg loss: 2.582480, ppl: 13.229910 +epoch: 2, batch: 19226, sum loss: 5000.149414, avg loss: 2.527881, ppl: 12.526936 +epoch: 2, batch: 19227, sum loss: 6663.127441, avg loss: 3.140022, ppl: 23.104382 +epoch: 2, batch: 19228, sum loss: 3352.358398, avg loss: 2.411769, ppl: 11.153671 +epoch: 2, batch: 19229, sum loss: 3818.275146, avg loss: 2.516991, ppl: 12.391254 +epoch: 2, batch: 19230, sum loss: 2773.838379, avg loss: 2.080899, ppl: 8.011668 +epoch: 2, batch: 19231, sum loss: 4165.733887, avg loss: 2.399616, ppl: 11.018947 +epoch: 2, batch: 19232, sum loss: 4547.273926, avg loss: 2.505385, ppl: 12.248276 +epoch: 2, batch: 19233, sum loss: 4605.708984, avg loss: 2.553054, ppl: 12.846274 +epoch: 2, batch: 19234, sum loss: 4066.576660, avg loss: 2.621906, ppl: 13.761932 +epoch: 2, batch: 19235, sum loss: 4626.175293, avg loss: 2.587346, ppl: 13.294445 +epoch: 2, batch: 19236, sum loss: 4946.845215, avg loss: 2.547294, ppl: 12.772496 +epoch: 2, batch: 19237, sum loss: 4096.306152, avg loss: 2.449944, ppl: 11.587698 +epoch: 2, batch: 19238, sum loss: 3956.282227, avg loss: 2.440643, ppl: 11.480418 +epoch: 2, batch: 19239, sum loss: 4056.693848, avg loss: 2.670635, ppl: 14.449135 +epoch: 2, batch: 19240, sum loss: 4373.471680, avg loss: 2.525099, ppl: 12.492132 +epoch: 2, batch: 19241, sum loss: 3759.815674, avg loss: 2.491594, ppl: 12.080521 +epoch: 2, batch: 19242, sum loss: 4587.029297, avg loss: 2.437316, ppl: 11.442290 +epoch: 2, batch: 19243, sum loss: 4125.871582, avg loss: 2.348248, ppl: 10.467213 +epoch: 2, batch: 19244, sum loss: 3853.366455, avg loss: 2.505440, ppl: 12.248944 +epoch: 2, batch: 19245, sum loss: 4153.777344, avg loss: 2.532791, ppl: 12.588593 +epoch: 2, batch: 19246, sum loss: 3973.738037, avg loss: 2.333375, ppl: 10.312691 +epoch: 2, batch: 19247, sum loss: 3844.059814, avg loss: 2.572999, ppl: 13.105062 +epoch: 2, batch: 19248, sum loss: 4674.242676, avg loss: 2.657330, ppl: 14.258162 +epoch: 2, batch: 19249, sum loss: 4247.106445, avg loss: 2.651128, ppl: 14.170008 +epoch: 2, batch: 19250, sum loss: 4467.451660, avg loss: 2.577872, ppl: 13.169079 +epoch: 2, batch: 19251, sum loss: 4579.631836, avg loss: 2.558454, ppl: 12.915829 +epoch: 2, batch: 19252, sum loss: 4990.279297, avg loss: 2.448616, ppl: 11.572320 +epoch: 2, batch: 19253, sum loss: 3901.553711, avg loss: 2.464658, ppl: 11.759460 +epoch: 2, batch: 19254, sum loss: 4216.456055, avg loss: 2.585197, ppl: 13.265902 +epoch: 2, batch: 19255, sum loss: 3270.629883, avg loss: 2.375185, ppl: 10.753002 +epoch: 2, batch: 19256, sum loss: 3727.699463, avg loss: 2.318221, ppl: 10.157589 +epoch: 2, batch: 19257, sum loss: 4675.543457, avg loss: 2.638569, ppl: 13.993160 +epoch: 2, batch: 19258, sum loss: 4995.916992, avg loss: 2.697579, ppl: 14.843758 +epoch: 2, batch: 19259, sum loss: 3547.409424, avg loss: 2.273980, ppl: 9.718005 +epoch: 2, batch: 19260, sum loss: 4338.812012, avg loss: 2.486425, ppl: 12.018236 +epoch: 2, batch: 19261, sum loss: 4211.573730, avg loss: 2.541686, ppl: 12.701068 +epoch: 2, batch: 19262, sum loss: 4046.494141, avg loss: 2.445012, ppl: 11.530684 +epoch: 2, batch: 19263, sum loss: 4353.504395, avg loss: 2.490563, ppl: 12.068073 +epoch: 2, batch: 19264, sum loss: 3969.626465, avg loss: 2.354464, ppl: 10.532482 +epoch: 2, batch: 19265, sum loss: 3320.824951, avg loss: 2.202138, ppl: 9.044333 +epoch: 2, batch: 19266, sum loss: 4487.299805, avg loss: 2.549602, ppl: 12.802008 +epoch: 2, batch: 19267, sum loss: 5369.513672, avg loss: 2.772077, ppl: 15.991819 +epoch: 2, batch: 19268, sum loss: 4107.129883, avg loss: 2.566956, ppl: 13.026114 +epoch: 2, batch: 19269, sum loss: 2913.231689, avg loss: 2.073475, ppl: 7.952407 +epoch: 2, batch: 19270, sum loss: 3877.152344, avg loss: 2.595149, ppl: 13.398581 +epoch: 2, batch: 19271, sum loss: 4933.545898, avg loss: 2.634034, ppl: 13.929852 +epoch: 2, batch: 19272, sum loss: 3365.682861, avg loss: 2.289580, ppl: 9.870792 +epoch: 2, batch: 19273, sum loss: 4022.720703, avg loss: 2.554108, ppl: 12.859829 +epoch: 2, batch: 19274, sum loss: 3998.203125, avg loss: 2.319144, ppl: 10.166963 +epoch: 2, batch: 19275, sum loss: 4352.050781, avg loss: 2.286942, ppl: 9.844787 +epoch: 2, batch: 19276, sum loss: 4463.852539, avg loss: 2.269371, ppl: 9.673312 +epoch: 2, batch: 19277, sum loss: 4388.475586, avg loss: 2.472380, ppl: 11.850622 +epoch: 2, batch: 19278, sum loss: 4134.586914, avg loss: 2.531896, ppl: 12.577334 +epoch: 2, batch: 19279, sum loss: 4232.267090, avg loss: 2.435136, ppl: 11.417375 +epoch: 2, batch: 19280, sum loss: 4096.519043, avg loss: 2.184810, ppl: 8.888961 +epoch: 2, batch: 19281, sum loss: 4708.284668, avg loss: 2.716841, ppl: 15.132436 +epoch: 2, batch: 19282, sum loss: 4756.678711, avg loss: 2.641132, ppl: 14.029074 +epoch: 2, batch: 19283, sum loss: 4711.168457, avg loss: 2.517995, ppl: 12.403704 +epoch: 2, batch: 19284, sum loss: 4048.403320, avg loss: 2.300229, ppl: 9.976467 +epoch: 2, batch: 19285, sum loss: 3599.629883, avg loss: 2.289841, ppl: 9.873367 +epoch: 2, batch: 19286, sum loss: 5027.875000, avg loss: 2.706068, ppl: 14.970301 +epoch: 2, batch: 19287, sum loss: 4158.439941, avg loss: 2.267415, ppl: 9.654414 +epoch: 2, batch: 19288, sum loss: 4846.587891, avg loss: 2.616948, ppl: 13.693868 +epoch: 2, batch: 19289, sum loss: 5446.869141, avg loss: 2.942663, ppl: 18.966286 +epoch: 2, batch: 19290, sum loss: 2663.261963, avg loss: 2.047088, ppl: 7.745317 +epoch: 2, batch: 19291, sum loss: 4347.375977, avg loss: 2.451989, ppl: 11.611418 +epoch: 2, batch: 19292, sum loss: 5657.137695, avg loss: 2.693875, ppl: 14.788873 +epoch: 2, batch: 19293, sum loss: 4254.998535, avg loss: 2.526721, ppl: 12.512414 +epoch: 2, batch: 19294, sum loss: 3860.784180, avg loss: 2.341288, ppl: 10.394617 +epoch: 2, batch: 19295, sum loss: 4521.473145, avg loss: 2.458659, ppl: 11.689122 +epoch: 2, batch: 19296, sum loss: 4182.071289, avg loss: 2.816210, ppl: 16.713379 +epoch: 2, batch: 19297, sum loss: 4108.932617, avg loss: 2.366897, ppl: 10.664246 +epoch: 2, batch: 19298, sum loss: 4282.252930, avg loss: 2.479591, ppl: 11.936378 +epoch: 2, batch: 19299, sum loss: 4199.114746, avg loss: 2.584071, ppl: 13.250969 +epoch: 2, batch: 19300, sum loss: 4682.895020, avg loss: 2.581530, ppl: 13.217343 +epoch: 2, batch: 19301, sum loss: 4282.742188, avg loss: 2.534167, ppl: 12.605927 +epoch: 2, batch: 19302, sum loss: 4727.823730, avg loss: 2.726542, ppl: 15.279954 +epoch: 2, batch: 19303, sum loss: 4030.912109, avg loss: 2.327317, ppl: 10.250398 +epoch: 2, batch: 19304, sum loss: 5033.666992, avg loss: 2.747635, ppl: 15.605679 +epoch: 2, batch: 19305, sum loss: 4664.971680, avg loss: 2.535311, ppl: 12.620349 +epoch: 2, batch: 19306, sum loss: 4890.438477, avg loss: 2.813831, ppl: 16.673674 +epoch: 2, batch: 19307, sum loss: 3916.407959, avg loss: 2.385145, ppl: 10.860637 +epoch: 2, batch: 19308, sum loss: 3535.684082, avg loss: 2.273752, ppl: 9.715786 +epoch: 2, batch: 19309, sum loss: 3591.181152, avg loss: 2.418304, ppl: 11.226799 +epoch: 2, batch: 19310, sum loss: 4248.935547, avg loss: 2.539711, ppl: 12.676001 +epoch: 2, batch: 19311, sum loss: 5028.498535, avg loss: 2.839355, ppl: 17.104738 +epoch: 2, batch: 19312, sum loss: 3614.431152, avg loss: 2.373231, ppl: 10.732014 +epoch: 2, batch: 19313, sum loss: 4795.257324, avg loss: 2.562938, ppl: 12.973882 +epoch: 2, batch: 19314, sum loss: 3307.584961, avg loss: 2.347470, ppl: 10.459073 +epoch: 2, batch: 19315, sum loss: 3960.936523, avg loss: 2.380371, ppl: 10.808908 +epoch: 2, batch: 19316, sum loss: 4241.817383, avg loss: 2.311617, ppl: 10.090730 +epoch: 2, batch: 19317, sum loss: 4401.637207, avg loss: 2.739040, ppl: 15.472127 +epoch: 2, batch: 19318, sum loss: 4298.844727, avg loss: 2.624447, ppl: 13.796944 +epoch: 2, batch: 19319, sum loss: 4155.548340, avg loss: 2.498827, ppl: 12.168206 +epoch: 2, batch: 19320, sum loss: 3571.427002, avg loss: 2.247594, ppl: 9.464937 +epoch: 2, batch: 19321, sum loss: 4563.828125, avg loss: 2.694113, ppl: 14.792396 +epoch: 2, batch: 19322, sum loss: 4702.584961, avg loss: 2.568315, ppl: 13.043831 +epoch: 2, batch: 19323, sum loss: 4306.959961, avg loss: 2.534997, ppl: 12.616396 +epoch: 2, batch: 19324, sum loss: 4506.541016, avg loss: 2.603432, ppl: 13.510024 +epoch: 2, batch: 19325, sum loss: 4067.106445, avg loss: 2.453020, ppl: 11.623392 +epoch: 2, batch: 19326, sum loss: 4364.026367, avg loss: 2.633691, ppl: 13.925074 +epoch: 2, batch: 19327, sum loss: 3874.362061, avg loss: 2.494760, ppl: 12.118825 +epoch: 2, batch: 19328, sum loss: 4840.375977, avg loss: 2.722371, ppl: 15.216363 +epoch: 2, batch: 19329, sum loss: 5281.589844, avg loss: 2.691942, ppl: 14.760309 +epoch: 2, batch: 19330, sum loss: 3902.127441, avg loss: 2.342213, ppl: 10.404240 +epoch: 2, batch: 19331, sum loss: 3821.933594, avg loss: 2.537805, ppl: 12.651865 +epoch: 2, batch: 19332, sum loss: 4608.200684, avg loss: 2.474866, ppl: 11.880116 +epoch: 2, batch: 19333, sum loss: 3543.365723, avg loss: 2.366978, ppl: 10.665111 +epoch: 2, batch: 19334, sum loss: 4086.255615, avg loss: 2.517718, ppl: 12.400262 +epoch: 2, batch: 19335, sum loss: 3399.820068, avg loss: 2.395927, ppl: 10.978370 +epoch: 2, batch: 19336, sum loss: 3808.160889, avg loss: 2.507018, ppl: 12.268295 +epoch: 2, batch: 19337, sum loss: 4217.561523, avg loss: 2.486770, ppl: 12.022380 +epoch: 2, batch: 19338, sum loss: 3674.181152, avg loss: 2.362818, ppl: 10.620833 +epoch: 2, batch: 19339, sum loss: 4010.656250, avg loss: 2.468096, ppl: 11.799958 +epoch: 2, batch: 19340, sum loss: 4104.725098, avg loss: 2.387856, ppl: 10.890125 +epoch: 2, batch: 19341, sum loss: 4042.581055, avg loss: 2.540906, ppl: 12.691160 +epoch: 2, batch: 19342, sum loss: 4703.264160, avg loss: 2.734456, ppl: 15.401361 +epoch: 2, batch: 19343, sum loss: 4120.046875, avg loss: 2.358355, ppl: 10.573546 +epoch: 2, batch: 19344, sum loss: 4383.708984, avg loss: 2.607798, ppl: 13.569143 +epoch: 2, batch: 19345, sum loss: 3793.558105, avg loss: 2.442729, ppl: 11.504394 +epoch: 2, batch: 19346, sum loss: 4514.288574, avg loss: 2.329354, ppl: 10.271307 +epoch: 2, batch: 19347, sum loss: 4718.251953, avg loss: 2.565662, ppl: 13.009267 +epoch: 2, batch: 19348, sum loss: 5256.791992, avg loss: 2.847666, ppl: 17.247484 +epoch: 2, batch: 19349, sum loss: 4481.532715, avg loss: 2.650226, ppl: 14.157243 +epoch: 2, batch: 19350, sum loss: 4459.616211, avg loss: 2.609489, ppl: 13.592100 +epoch: 2, batch: 19351, sum loss: 4803.384766, avg loss: 2.613376, ppl: 13.645034 +epoch: 2, batch: 19352, sum loss: 3772.970947, avg loss: 2.369956, ppl: 10.696927 +epoch: 2, batch: 19353, sum loss: 4297.543457, avg loss: 2.452936, ppl: 11.622419 +epoch: 2, batch: 19354, sum loss: 4549.921875, avg loss: 2.697049, ppl: 14.835889 +epoch: 2, batch: 19355, sum loss: 3905.173828, avg loss: 2.545746, ppl: 12.752733 +epoch: 2, batch: 19356, sum loss: 4093.906494, avg loss: 2.433952, ppl: 11.403857 +epoch: 2, batch: 19357, sum loss: 4207.224121, avg loss: 2.318030, ppl: 10.155646 +epoch: 2, batch: 19358, sum loss: 3135.892334, avg loss: 2.214613, ppl: 9.157866 +epoch: 2, batch: 19359, sum loss: 3863.663818, avg loss: 2.358769, ppl: 10.577921 +epoch: 2, batch: 19360, sum loss: 4450.452148, avg loss: 2.475224, ppl: 11.884363 +epoch: 2, batch: 19361, sum loss: 5075.288574, avg loss: 2.765825, ppl: 15.892146 +epoch: 2, batch: 19362, sum loss: 4349.534180, avg loss: 2.429907, ppl: 11.357829 +epoch: 2, batch: 19363, sum loss: 4168.840820, avg loss: 2.703528, ppl: 14.932326 +epoch: 2, batch: 19364, sum loss: 4680.615723, avg loss: 2.763055, ppl: 15.848190 +epoch: 2, batch: 19365, sum loss: 4222.305176, avg loss: 2.262757, ppl: 9.609550 +epoch: 2, batch: 19366, sum loss: 3575.243652, avg loss: 2.487992, ppl: 12.037076 +epoch: 2, batch: 19367, sum loss: 5043.149414, avg loss: 2.733414, ppl: 15.385325 +epoch: 2, batch: 19368, sum loss: 4327.732422, avg loss: 2.440910, ppl: 11.483481 +epoch: 2, batch: 19369, sum loss: 4261.442383, avg loss: 2.604794, ppl: 13.528436 +epoch: 2, batch: 19370, sum loss: 3886.119873, avg loss: 2.398839, ppl: 11.010391 +epoch: 2, batch: 19371, sum loss: 4416.166504, avg loss: 2.678088, ppl: 14.557229 +epoch: 2, batch: 19372, sum loss: 4335.219727, avg loss: 2.611578, ppl: 13.620530 +epoch: 2, batch: 19373, sum loss: 4415.071289, avg loss: 2.474816, ppl: 11.879519 +epoch: 2, batch: 19374, sum loss: 3754.920410, avg loss: 2.352707, ppl: 10.513991 +epoch: 2, batch: 19375, sum loss: 4191.039551, avg loss: 2.500620, ppl: 12.190054 +epoch: 2, batch: 19376, sum loss: 5029.167480, avg loss: 2.605786, ppl: 13.541869 +epoch: 2, batch: 19377, sum loss: 3434.690186, avg loss: 2.323877, ppl: 10.215203 +epoch: 2, batch: 19378, sum loss: 4748.093750, avg loss: 2.860297, ppl: 17.466721 +epoch: 2, batch: 19379, sum loss: 4096.938477, avg loss: 2.350510, ppl: 10.490915 +epoch: 2, batch: 19380, sum loss: 4176.347656, avg loss: 2.338381, ppl: 10.364439 +epoch: 2, batch: 19381, sum loss: 4172.759277, avg loss: 2.453121, ppl: 11.624573 +epoch: 2, batch: 19382, sum loss: 3790.376465, avg loss: 2.402013, ppl: 11.045389 +epoch: 2, batch: 19383, sum loss: 4625.814453, avg loss: 2.666176, ppl: 14.384851 +epoch: 2, batch: 19384, sum loss: 3687.111328, avg loss: 2.394228, ppl: 10.959734 +epoch: 2, batch: 19385, sum loss: 4338.691406, avg loss: 2.500687, ppl: 12.190862 +epoch: 2, batch: 19386, sum loss: 4311.164551, avg loss: 2.671106, ppl: 14.455943 +epoch: 2, batch: 19387, sum loss: 4626.253906, avg loss: 2.661826, ppl: 14.322420 +epoch: 2, batch: 19388, sum loss: 4567.455566, avg loss: 2.649336, ppl: 14.144649 +epoch: 2, batch: 19389, sum loss: 4580.803223, avg loss: 2.347926, ppl: 10.463844 +epoch: 2, batch: 19390, sum loss: 4663.133301, avg loss: 2.590630, ppl: 13.338166 +epoch: 2, batch: 19391, sum loss: 3804.522217, avg loss: 2.412506, ppl: 11.161899 +epoch: 2, batch: 19392, sum loss: 4698.604004, avg loss: 2.559152, ppl: 12.924857 +epoch: 2, batch: 19393, sum loss: 4468.541504, avg loss: 2.538944, ppl: 12.666288 +epoch: 2, batch: 19394, sum loss: 4372.526367, avg loss: 2.690785, ppl: 14.743251 +epoch: 2, batch: 19395, sum loss: 3673.919189, avg loss: 2.452549, ppl: 11.617929 +epoch: 2, batch: 19396, sum loss: 4797.713867, avg loss: 2.558781, ppl: 12.920053 +epoch: 2, batch: 19397, sum loss: 3839.963379, avg loss: 2.521316, ppl: 12.444958 +epoch: 2, batch: 19398, sum loss: 3735.012451, avg loss: 2.409686, ppl: 11.130462 +epoch: 2, batch: 19399, sum loss: 3908.637695, avg loss: 2.396467, ppl: 10.984300 +epoch: 2, batch: 19400, sum loss: 3636.586182, avg loss: 2.314823, ppl: 10.123128 +epoch: 2, batch: 19401, sum loss: 4488.380859, avg loss: 2.489396, ppl: 12.053991 +epoch: 2, batch: 19402, sum loss: 4031.532715, avg loss: 2.459751, ppl: 11.701901 +epoch: 2, batch: 19403, sum loss: 4117.017090, avg loss: 2.450605, ppl: 11.595365 +epoch: 2, batch: 19404, sum loss: 4462.358887, avg loss: 2.466755, ppl: 11.784142 +epoch: 2, batch: 19405, sum loss: 4185.129883, avg loss: 2.573881, ppl: 13.116627 +epoch: 2, batch: 19406, sum loss: 4185.239746, avg loss: 2.397045, ppl: 10.990647 +epoch: 2, batch: 19407, sum loss: 4310.832031, avg loss: 2.523906, ppl: 12.477241 +epoch: 2, batch: 19408, sum loss: 4221.855957, avg loss: 2.601267, ppl: 13.480803 +epoch: 2, batch: 19409, sum loss: 3943.188232, avg loss: 2.291219, ppl: 9.886985 +epoch: 2, batch: 19410, sum loss: 4430.897461, avg loss: 2.588141, ppl: 13.305014 +epoch: 2, batch: 19411, sum loss: 3804.433350, avg loss: 2.324028, ppl: 10.216743 +epoch: 2, batch: 19412, sum loss: 4357.284668, avg loss: 2.634392, ppl: 13.934841 +epoch: 2, batch: 19413, sum loss: 4733.386719, avg loss: 2.654732, ppl: 14.221174 +epoch: 2, batch: 19414, sum loss: 3853.988281, avg loss: 2.584835, ppl: 13.261095 +epoch: 2, batch: 19415, sum loss: 5103.349609, avg loss: 2.779602, ppl: 16.112608 +epoch: 2, batch: 19416, sum loss: 3712.339355, avg loss: 2.382760, ppl: 10.834761 +epoch: 2, batch: 19417, sum loss: 3574.041992, avg loss: 2.331404, ppl: 10.292379 +epoch: 2, batch: 19418, sum loss: 4329.664551, avg loss: 2.712822, ppl: 15.071754 +epoch: 2, batch: 19419, sum loss: 4885.967285, avg loss: 2.691993, ppl: 14.761065 +epoch: 2, batch: 19420, sum loss: 4239.689941, avg loss: 2.663122, ppl: 14.340987 +epoch: 2, batch: 19421, sum loss: 4922.774902, avg loss: 2.597770, ppl: 13.433753 +epoch: 2, batch: 19422, sum loss: 3528.976318, avg loss: 2.290056, ppl: 9.875490 +epoch: 2, batch: 19423, sum loss: 4654.106445, avg loss: 2.688681, ppl: 14.712253 +epoch: 2, batch: 19424, sum loss: 4809.590820, avg loss: 2.507607, ppl: 12.275522 +epoch: 2, batch: 19425, sum loss: 4624.795410, avg loss: 2.759425, ppl: 15.790755 +epoch: 2, batch: 19426, sum loss: 4591.021484, avg loss: 2.487010, ppl: 12.025272 +epoch: 2, batch: 19427, sum loss: 4624.308594, avg loss: 2.702694, ppl: 14.919868 +epoch: 2, batch: 19428, sum loss: 3607.198486, avg loss: 2.442247, ppl: 11.498849 +epoch: 2, batch: 19429, sum loss: 4780.367676, avg loss: 2.561826, ppl: 12.959459 +epoch: 2, batch: 19430, sum loss: 3451.490234, avg loss: 2.130550, ppl: 8.419494 +epoch: 2, batch: 19431, sum loss: 4530.084473, avg loss: 2.590100, ppl: 13.331102 +epoch: 2, batch: 19432, sum loss: 4933.294434, avg loss: 2.695789, ppl: 14.817210 +epoch: 2, batch: 19433, sum loss: 4489.388672, avg loss: 2.527809, ppl: 12.526031 +epoch: 2, batch: 19434, sum loss: 3462.000977, avg loss: 2.325051, ppl: 10.227202 +epoch: 2, batch: 19435, sum loss: 3499.628418, avg loss: 2.278404, ppl: 9.761089 +epoch: 2, batch: 19436, sum loss: 3899.827637, avg loss: 2.342239, ppl: 10.404505 +epoch: 2, batch: 19437, sum loss: 5358.089844, avg loss: 2.751972, ppl: 15.673516 +epoch: 2, batch: 19438, sum loss: 4059.731934, avg loss: 2.318522, ppl: 10.160645 +epoch: 2, batch: 19439, sum loss: 3575.001709, avg loss: 2.145859, ppl: 8.549384 +epoch: 2, batch: 19440, sum loss: 3472.272949, avg loss: 2.131536, ppl: 8.427806 +epoch: 2, batch: 19441, sum loss: 4223.529785, avg loss: 2.352942, ppl: 10.516459 +epoch: 2, batch: 19442, sum loss: 4615.500488, avg loss: 2.588615, ppl: 13.311325 +epoch: 2, batch: 19443, sum loss: 3518.622803, avg loss: 2.506142, ppl: 12.257545 +epoch: 2, batch: 19444, sum loss: 3679.012207, avg loss: 2.455949, ppl: 11.657495 +epoch: 2, batch: 19445, sum loss: 4104.067871, avg loss: 2.458998, ppl: 11.693091 +epoch: 2, batch: 19446, sum loss: 4976.250000, avg loss: 2.803521, ppl: 16.502649 +epoch: 2, batch: 19447, sum loss: 4032.926758, avg loss: 2.497168, ppl: 12.148046 +epoch: 2, batch: 19448, sum loss: 4002.625977, avg loss: 2.324405, ppl: 10.220599 +epoch: 2, batch: 19449, sum loss: 4440.103516, avg loss: 2.476355, ppl: 11.897813 +epoch: 2, batch: 19450, sum loss: 4505.610840, avg loss: 2.497567, ppl: 12.152889 +epoch: 2, batch: 19451, sum loss: 4526.463379, avg loss: 2.514702, ppl: 12.362922 +epoch: 2, batch: 19452, sum loss: 4590.836914, avg loss: 2.347054, ppl: 10.454720 +epoch: 2, batch: 19453, sum loss: 3476.838623, avg loss: 2.386300, ppl: 10.873184 +epoch: 2, batch: 19454, sum loss: 3140.746826, avg loss: 2.451793, ppl: 11.609142 +epoch: 2, batch: 19455, sum loss: 3615.551514, avg loss: 2.261133, ppl: 9.593950 +epoch: 2, batch: 19456, sum loss: 3729.866211, avg loss: 2.431464, ppl: 11.375525 +epoch: 2, batch: 19457, sum loss: 4671.017090, avg loss: 2.505910, ppl: 12.254710 +epoch: 2, batch: 19458, sum loss: 4251.627441, avg loss: 2.407490, ppl: 11.106050 +epoch: 2, batch: 19459, sum loss: 4684.836914, avg loss: 2.712702, ppl: 15.069943 +epoch: 2, batch: 19460, sum loss: 4072.957275, avg loss: 2.456548, ppl: 11.664481 +epoch: 2, batch: 19461, sum loss: 3789.526123, avg loss: 2.396917, ppl: 10.989245 +epoch: 2, batch: 19462, sum loss: 3310.940186, avg loss: 2.168265, ppl: 8.743098 +epoch: 2, batch: 19463, sum loss: 4208.857910, avg loss: 2.518766, ppl: 12.413271 +epoch: 2, batch: 19464, sum loss: 3563.232910, avg loss: 2.420675, ppl: 11.253448 +epoch: 2, batch: 19465, sum loss: 4765.406738, avg loss: 2.508109, ppl: 12.281682 +epoch: 2, batch: 19466, sum loss: 3536.816895, avg loss: 2.332993, ppl: 10.308747 +epoch: 2, batch: 19467, sum loss: 3956.375488, avg loss: 2.451286, ppl: 11.603260 +epoch: 2, batch: 19468, sum loss: 4855.061035, avg loss: 2.747629, ppl: 15.605594 +epoch: 2, batch: 19469, sum loss: 5751.117188, avg loss: 2.961441, ppl: 19.325792 +epoch: 2, batch: 19470, sum loss: 3723.217773, avg loss: 2.425549, ppl: 11.308436 +epoch: 2, batch: 19471, sum loss: 4291.259766, avg loss: 2.280159, ppl: 9.778237 +epoch: 2, batch: 19472, sum loss: 4370.208496, avg loss: 2.373823, ppl: 10.738368 +epoch: 2, batch: 19473, sum loss: 4017.144531, avg loss: 2.639385, ppl: 14.004595 +epoch: 2, batch: 19474, sum loss: 4297.578125, avg loss: 2.469872, ppl: 11.820939 +epoch: 2, batch: 19475, sum loss: 4387.603027, avg loss: 2.497213, ppl: 12.148587 +epoch: 2, batch: 19476, sum loss: 4286.889648, avg loss: 2.591832, ppl: 13.354210 +epoch: 2, batch: 19477, sum loss: 4236.700684, avg loss: 2.647938, ppl: 14.124880 +epoch: 2, batch: 19478, sum loss: 4066.823486, avg loss: 2.395067, ppl: 10.968933 +epoch: 2, batch: 19479, sum loss: 3757.500732, avg loss: 2.170711, ppl: 8.764514 +epoch: 2, batch: 19480, sum loss: 3780.709229, avg loss: 2.480780, ppl: 11.950586 +epoch: 2, batch: 19481, sum loss: 3976.203857, avg loss: 2.336195, ppl: 10.341811 +epoch: 2, batch: 19482, sum loss: 4565.501953, avg loss: 2.500275, ppl: 12.185843 +epoch: 2, batch: 19483, sum loss: 4421.514160, avg loss: 2.464612, ppl: 11.758922 +epoch: 2, batch: 19484, sum loss: 5066.133789, avg loss: 2.816083, ppl: 16.711267 +epoch: 2, batch: 19485, sum loss: 4096.445801, avg loss: 2.426805, ppl: 11.322643 +epoch: 2, batch: 19486, sum loss: 4282.305176, avg loss: 2.466766, ppl: 11.784270 +epoch: 2, batch: 19487, sum loss: 4550.541504, avg loss: 2.600309, ppl: 13.467904 +epoch: 2, batch: 19488, sum loss: 4655.805664, avg loss: 2.534461, ppl: 12.609639 +epoch: 2, batch: 19489, sum loss: 4768.641602, avg loss: 2.641907, ppl: 14.039948 +epoch: 2, batch: 19490, sum loss: 4346.730469, avg loss: 2.648830, ppl: 14.137490 +epoch: 2, batch: 19491, sum loss: 4691.394043, avg loss: 2.705533, ppl: 14.962286 +epoch: 2, batch: 19492, sum loss: 3117.229980, avg loss: 2.366917, ppl: 10.664464 +epoch: 2, batch: 19493, sum loss: 4084.284180, avg loss: 2.373204, ppl: 10.731722 +epoch: 2, batch: 19494, sum loss: 4861.767578, avg loss: 2.703986, ppl: 14.939167 +epoch: 2, batch: 19495, sum loss: 4414.584961, avg loss: 2.545897, ppl: 12.754661 +epoch: 2, batch: 19496, sum loss: 3482.674316, avg loss: 2.172598, ppl: 8.781067 +epoch: 2, batch: 19497, sum loss: 4107.807617, avg loss: 2.549850, ppl: 12.805180 +epoch: 2, batch: 19498, sum loss: 3862.792969, avg loss: 2.245810, ppl: 9.448064 +epoch: 2, batch: 19499, sum loss: 5033.871582, avg loss: 2.602829, ppl: 13.501884 +epoch: 2, batch: 19500, sum loss: 3615.131836, avg loss: 2.226066, ppl: 9.263355 +epoch: 2, batch: 19501, sum loss: 4312.028320, avg loss: 2.483887, ppl: 11.987773 +epoch: 2, batch: 19502, sum loss: 3904.295654, avg loss: 2.517276, ppl: 12.394791 +epoch: 2, batch: 19503, sum loss: 4406.951172, avg loss: 2.540030, ppl: 12.680045 +epoch: 2, batch: 19504, sum loss: 4719.821777, avg loss: 2.505213, ppl: 12.246173 +epoch: 2, batch: 19505, sum loss: 4194.820312, avg loss: 2.648245, ppl: 14.129222 +epoch: 2, batch: 19506, sum loss: 3560.314941, avg loss: 2.431909, ppl: 11.380588 +epoch: 2, batch: 19507, sum loss: 3676.436523, avg loss: 2.429899, ppl: 11.357732 +epoch: 2, batch: 19508, sum loss: 3223.625244, avg loss: 2.224724, ppl: 9.250932 +epoch: 2, batch: 19509, sum loss: 3796.387695, avg loss: 2.329072, ppl: 10.268411 +epoch: 2, batch: 19510, sum loss: 4467.971680, avg loss: 2.413815, ppl: 11.176518 +epoch: 2, batch: 19511, sum loss: 4607.253418, avg loss: 2.594174, ppl: 13.385531 +epoch: 2, batch: 19512, sum loss: 5195.160645, avg loss: 2.680681, ppl: 14.595036 +epoch: 2, batch: 19513, sum loss: 5627.817871, avg loss: 2.829471, ppl: 16.936497 +epoch: 2, batch: 19514, sum loss: 4541.685547, avg loss: 2.420941, ppl: 11.256448 +epoch: 2, batch: 19515, sum loss: 4984.144043, avg loss: 2.916410, ppl: 18.474838 +epoch: 2, batch: 19516, sum loss: 4291.111816, avg loss: 2.671925, ppl: 14.467793 +epoch: 2, batch: 19517, sum loss: 4709.118164, avg loss: 2.760327, ppl: 15.805012 +epoch: 2, batch: 19518, sum loss: 4048.807617, avg loss: 2.684886, ppl: 14.656527 +epoch: 2, batch: 19519, sum loss: 4003.660889, avg loss: 2.240437, ppl: 9.397437 +epoch: 2, batch: 19520, sum loss: 3574.333740, avg loss: 2.282461, ppl: 9.800770 +epoch: 2, batch: 19521, sum loss: 2612.311523, avg loss: 1.824240, ppl: 6.198082 +epoch: 2, batch: 19522, sum loss: 4713.062988, avg loss: 2.571229, ppl: 13.081895 +epoch: 2, batch: 19523, sum loss: 4840.099121, avg loss: 2.845443, ppl: 17.209185 +epoch: 2, batch: 19524, sum loss: 3967.810059, avg loss: 2.528878, ppl: 12.539432 +epoch: 2, batch: 19525, sum loss: 4434.035156, avg loss: 2.519338, ppl: 12.420373 +epoch: 2, batch: 19526, sum loss: 3087.638428, avg loss: 2.299061, ppl: 9.964817 +epoch: 2, batch: 19527, sum loss: 3569.152832, avg loss: 2.311628, ppl: 10.090836 +epoch: 2, batch: 19528, sum loss: 4071.713379, avg loss: 2.460250, ppl: 11.707737 +epoch: 2, batch: 19529, sum loss: 4423.148438, avg loss: 2.496134, ppl: 12.135482 +epoch: 2, batch: 19530, sum loss: 3521.072510, avg loss: 2.299851, ppl: 9.972700 +epoch: 2, batch: 19531, sum loss: 3828.626709, avg loss: 2.473273, ppl: 11.861205 +epoch: 2, batch: 19532, sum loss: 3669.541504, avg loss: 2.365920, ppl: 10.653831 +epoch: 2, batch: 19533, sum loss: 5023.543945, avg loss: 2.637031, ppl: 13.971658 +epoch: 2, batch: 19534, sum loss: 3715.990479, avg loss: 2.365366, ppl: 10.647940 +epoch: 2, batch: 19535, sum loss: 3926.888428, avg loss: 2.460456, ppl: 11.710155 +epoch: 2, batch: 19536, sum loss: 4657.866699, avg loss: 2.658600, ppl: 14.276286 +epoch: 2, batch: 19537, sum loss: 4302.635254, avg loss: 2.642896, ppl: 14.053850 +epoch: 2, batch: 19538, sum loss: 3293.600342, avg loss: 2.134543, ppl: 8.453186 +epoch: 2, batch: 19539, sum loss: 4463.627441, avg loss: 2.753626, ppl: 15.699450 +epoch: 2, batch: 19540, sum loss: 4681.940918, avg loss: 2.494375, ppl: 12.114154 +epoch: 2, batch: 19541, sum loss: 3894.681396, avg loss: 2.363278, ppl: 10.625722 +epoch: 2, batch: 19542, sum loss: 3904.916016, avg loss: 2.613732, ppl: 13.649902 +epoch: 2, batch: 19543, sum loss: 4050.299072, avg loss: 2.409458, ppl: 11.127930 +epoch: 2, batch: 19544, sum loss: 3750.398438, avg loss: 2.563499, ppl: 12.981155 +epoch: 2, batch: 19545, sum loss: 4324.369141, avg loss: 2.648113, ppl: 14.127358 +epoch: 2, batch: 19546, sum loss: 4662.142090, avg loss: 2.590079, ppl: 13.330826 +epoch: 2, batch: 19547, sum loss: 4478.341309, avg loss: 2.699422, ppl: 14.871136 +epoch: 2, batch: 19548, sum loss: 3559.616699, avg loss: 2.551697, ppl: 12.828853 +epoch: 2, batch: 19549, sum loss: 4026.598145, avg loss: 2.459742, ppl: 11.701796 +epoch: 2, batch: 19550, sum loss: 4288.602539, avg loss: 2.421571, ppl: 11.263540 +epoch: 2, batch: 19551, sum loss: 4180.362305, avg loss: 2.732263, ppl: 15.367626 +epoch: 2, batch: 19552, sum loss: 4610.120117, avg loss: 2.512327, ppl: 12.333599 +epoch: 2, batch: 19553, sum loss: 4709.031250, avg loss: 2.529018, ppl: 12.541183 +epoch: 2, batch: 19554, sum loss: 5050.355469, avg loss: 2.680656, ppl: 14.594664 +epoch: 2, batch: 19555, sum loss: 5101.328613, avg loss: 2.838803, ppl: 17.095287 +epoch: 2, batch: 19556, sum loss: 3821.568359, avg loss: 2.484765, ppl: 11.998298 +epoch: 2, batch: 19557, sum loss: 3058.088135, avg loss: 2.189039, ppl: 8.926635 +epoch: 2, batch: 19558, sum loss: 5302.841309, avg loss: 2.601983, ppl: 13.490464 +epoch: 2, batch: 19559, sum loss: 4676.700195, avg loss: 2.744542, ppl: 15.557492 +epoch: 2, batch: 19560, sum loss: 4768.242188, avg loss: 2.585815, ppl: 13.274099 +epoch: 2, batch: 19561, sum loss: 4654.708984, avg loss: 2.728434, ppl: 15.308900 +epoch: 2, batch: 19562, sum loss: 3628.728027, avg loss: 2.458488, ppl: 11.687127 +epoch: 2, batch: 19563, sum loss: 5299.560547, avg loss: 2.527211, ppl: 12.518539 +epoch: 2, batch: 19564, sum loss: 3870.460449, avg loss: 2.352863, ppl: 10.515636 +epoch: 2, batch: 19565, sum loss: 3990.313477, avg loss: 2.514375, ppl: 12.358888 +epoch: 2, batch: 19566, sum loss: 5266.929199, avg loss: 2.581828, ppl: 13.221283 +epoch: 2, batch: 19567, sum loss: 3089.860107, avg loss: 2.292181, ppl: 9.896499 +epoch: 2, batch: 19568, sum loss: 3495.740234, avg loss: 2.491618, ppl: 12.080809 +epoch: 2, batch: 19569, sum loss: 4457.604492, avg loss: 2.447888, ppl: 11.563902 +epoch: 2, batch: 19570, sum loss: 5301.286621, avg loss: 2.680125, ppl: 14.586909 +epoch: 2, batch: 19571, sum loss: 4463.840332, avg loss: 2.485435, ppl: 12.006336 +epoch: 2, batch: 19572, sum loss: 4219.123047, avg loss: 2.393150, ppl: 10.947924 +epoch: 2, batch: 19573, sum loss: 4142.144043, avg loss: 2.563208, ppl: 12.977381 +epoch: 2, batch: 19574, sum loss: 4207.758301, avg loss: 2.394854, ppl: 10.966595 +epoch: 2, batch: 19575, sum loss: 4436.845215, avg loss: 2.522368, ppl: 12.458062 +epoch: 2, batch: 19576, sum loss: 4271.442383, avg loss: 2.505245, ppl: 12.246558 +epoch: 2, batch: 19577, sum loss: 3939.107910, avg loss: 2.458869, ppl: 11.691578 +epoch: 2, batch: 19578, sum loss: 4147.845215, avg loss: 2.554092, ppl: 12.859617 +epoch: 2, batch: 19579, sum loss: 4251.556641, avg loss: 2.644003, ppl: 14.069409 +epoch: 2, batch: 19580, sum loss: 3983.672852, avg loss: 2.530923, ppl: 12.565097 +epoch: 2, batch: 19581, sum loss: 4298.981445, avg loss: 2.833870, ppl: 17.011173 +epoch: 2, batch: 19582, sum loss: 5061.504883, avg loss: 2.658354, ppl: 14.272771 +epoch: 2, batch: 19583, sum loss: 3361.462158, avg loss: 2.318250, ppl: 10.157882 +epoch: 2, batch: 19584, sum loss: 4082.687012, avg loss: 2.550086, ppl: 12.808203 +epoch: 2, batch: 19585, sum loss: 4165.965332, avg loss: 2.396988, ppl: 10.990026 +epoch: 2, batch: 19586, sum loss: 4895.595703, avg loss: 2.502861, ppl: 12.217396 +epoch: 2, batch: 19587, sum loss: 4707.224609, avg loss: 2.709974, ppl: 15.028886 +epoch: 2, batch: 19588, sum loss: 5202.041504, avg loss: 2.764103, ppl: 15.864799 +epoch: 2, batch: 19589, sum loss: 3458.996826, avg loss: 2.337160, ppl: 10.351797 +epoch: 2, batch: 19590, sum loss: 4139.817871, avg loss: 2.560184, ppl: 12.938201 +epoch: 2, batch: 19591, sum loss: 4329.832031, avg loss: 2.363446, ppl: 10.627505 +epoch: 2, batch: 19592, sum loss: 3671.797852, avg loss: 2.410898, ppl: 11.143967 +epoch: 2, batch: 19593, sum loss: 3598.837891, avg loss: 2.358347, ppl: 10.573463 +epoch: 2, batch: 19594, sum loss: 4178.026367, avg loss: 2.587013, ppl: 13.290018 +epoch: 2, batch: 19595, sum loss: 4007.328369, avg loss: 2.424276, ppl: 11.294051 +epoch: 2, batch: 19596, sum loss: 4128.524414, avg loss: 2.455993, ppl: 11.658004 +epoch: 2, batch: 19597, sum loss: 4906.643066, avg loss: 2.739611, ppl: 15.480957 +epoch: 2, batch: 19598, sum loss: 3023.424805, avg loss: 2.095235, ppl: 8.127354 +epoch: 2, batch: 19599, sum loss: 4164.041504, avg loss: 2.640483, ppl: 14.019972 +epoch: 2, batch: 19600, sum loss: 4932.520508, avg loss: 2.634894, ppl: 13.941830 +epoch: 2, batch: 19601, sum loss: 3554.670898, avg loss: 2.549979, ppl: 12.806837 +epoch: 2, batch: 19602, sum loss: 2951.375732, avg loss: 2.155863, ppl: 8.635336 +epoch: 2, batch: 19603, sum loss: 3909.377197, avg loss: 2.575347, ppl: 13.135880 +epoch: 2, batch: 19604, sum loss: 4267.567871, avg loss: 2.427513, ppl: 11.330667 +epoch: 2, batch: 19605, sum loss: 4382.162109, avg loss: 2.747437, ppl: 15.602592 +epoch: 2, batch: 19606, sum loss: 4702.120605, avg loss: 2.610839, ppl: 13.610460 +epoch: 2, batch: 19607, sum loss: 3571.250488, avg loss: 2.474879, ppl: 11.880270 +epoch: 2, batch: 19608, sum loss: 3864.421631, avg loss: 2.325164, ppl: 10.228354 +epoch: 2, batch: 19609, sum loss: 5615.090820, avg loss: 2.980409, ppl: 19.695879 +epoch: 2, batch: 19610, sum loss: 5258.425781, avg loss: 2.623965, ppl: 13.790295 +epoch: 2, batch: 19611, sum loss: 4279.839355, avg loss: 2.703625, ppl: 14.933775 +epoch: 2, batch: 19612, sum loss: 4014.667725, avg loss: 2.542538, ppl: 12.711895 +epoch: 2, batch: 19613, sum loss: 4806.713867, avg loss: 2.671881, ppl: 14.467155 +epoch: 2, batch: 19614, sum loss: 3575.221436, avg loss: 2.192043, ppl: 8.953483 +epoch: 2, batch: 19615, sum loss: 3635.367920, avg loss: 2.256591, ppl: 9.550477 +epoch: 2, batch: 19616, sum loss: 4559.459473, avg loss: 2.693125, ppl: 14.777778 +epoch: 2, batch: 19617, sum loss: 5021.040039, avg loss: 2.990494, ppl: 19.895514 +epoch: 2, batch: 19618, sum loss: 4829.382812, avg loss: 2.616134, ppl: 13.682719 +epoch: 2, batch: 19619, sum loss: 4612.344727, avg loss: 2.424997, ppl: 11.302199 +epoch: 2, batch: 19620, sum loss: 3494.406494, avg loss: 2.377147, ppl: 10.774122 +epoch: 2, batch: 19621, sum loss: 3677.553467, avg loss: 2.363466, ppl: 10.627727 +epoch: 2, batch: 19622, sum loss: 4242.884766, avg loss: 2.566778, ppl: 13.023800 +epoch: 2, batch: 19623, sum loss: 4109.002441, avg loss: 2.332011, ppl: 10.298626 +epoch: 2, batch: 19624, sum loss: 4008.400879, avg loss: 2.607938, ppl: 13.571042 +epoch: 2, batch: 19625, sum loss: 3752.068604, avg loss: 2.382266, ppl: 10.829410 +epoch: 2, batch: 19626, sum loss: 4954.469727, avg loss: 2.531666, ppl: 12.574432 +epoch: 2, batch: 19627, sum loss: 3986.634521, avg loss: 2.196493, ppl: 8.993418 +epoch: 2, batch: 19628, sum loss: 4146.402344, avg loss: 2.703000, ppl: 14.924439 +epoch: 2, batch: 19629, sum loss: 5597.939453, avg loss: 2.767147, ppl: 15.913174 +epoch: 2, batch: 19630, sum loss: 3328.749756, avg loss: 2.246120, ppl: 9.450994 +epoch: 2, batch: 19631, sum loss: 5350.153320, avg loss: 2.567252, ppl: 13.029971 +epoch: 2, batch: 19632, sum loss: 4122.391113, avg loss: 2.498419, ppl: 12.163246 +epoch: 2, batch: 19633, sum loss: 3802.771240, avg loss: 2.299136, ppl: 9.965570 +epoch: 2, batch: 19634, sum loss: 3877.207520, avg loss: 2.408203, ppl: 11.113976 +epoch: 2, batch: 19635, sum loss: 4820.486328, avg loss: 2.764040, ppl: 15.863807 +epoch: 2, batch: 19636, sum loss: 3493.264648, avg loss: 2.414143, ppl: 11.180183 +epoch: 2, batch: 19637, sum loss: 4052.641113, avg loss: 2.498546, ppl: 12.164789 +epoch: 2, batch: 19638, sum loss: 4007.491943, avg loss: 2.243837, ppl: 9.429440 +epoch: 2, batch: 19639, sum loss: 3845.981934, avg loss: 2.274383, ppl: 9.721922 +epoch: 2, batch: 19640, sum loss: 4483.618164, avg loss: 2.587200, ppl: 13.292502 +epoch: 2, batch: 19641, sum loss: 4218.620605, avg loss: 2.493275, ppl: 12.100838 +epoch: 2, batch: 19642, sum loss: 4542.661621, avg loss: 2.840939, ppl: 17.131845 +epoch: 2, batch: 19643, sum loss: 4538.843262, avg loss: 2.605536, ppl: 13.538477 +epoch: 2, batch: 19644, sum loss: 3560.722656, avg loss: 2.137288, ppl: 8.476421 +epoch: 2, batch: 19645, sum loss: 5103.902344, avg loss: 2.683440, ppl: 14.635352 +epoch: 2, batch: 19646, sum loss: 3912.493896, avg loss: 2.442256, ppl: 11.498950 +epoch: 2, batch: 19647, sum loss: 3627.132324, avg loss: 2.373778, ppl: 10.737880 +epoch: 2, batch: 19648, sum loss: 4225.052734, avg loss: 2.399235, ppl: 11.014745 +epoch: 2, batch: 19649, sum loss: 4545.297852, avg loss: 2.604755, ppl: 13.527913 +epoch: 2, batch: 19650, sum loss: 4185.302734, avg loss: 2.258663, ppl: 9.570287 +epoch: 2, batch: 19651, sum loss: 4295.822266, avg loss: 2.330886, ppl: 10.287048 +epoch: 2, batch: 19652, sum loss: 3696.561279, avg loss: 2.448054, ppl: 11.565816 +epoch: 2, batch: 19653, sum loss: 4995.615723, avg loss: 2.720923, ppl: 15.194346 +epoch: 2, batch: 19654, sum loss: 4271.030273, avg loss: 2.613850, ppl: 13.651504 +epoch: 2, batch: 19655, sum loss: 3532.519531, avg loss: 2.569105, ppl: 13.054137 +epoch: 2, batch: 19656, sum loss: 4453.162109, avg loss: 2.538861, ppl: 12.665235 +epoch: 2, batch: 19657, sum loss: 5238.797363, avg loss: 2.663344, ppl: 14.344172 +epoch: 2, batch: 19658, sum loss: 3896.062744, avg loss: 2.505507, ppl: 12.249765 +epoch: 2, batch: 19659, sum loss: 3662.436523, avg loss: 2.489760, ppl: 12.058378 +epoch: 2, batch: 19660, sum loss: 3411.657471, avg loss: 2.203913, ppl: 9.060399 +epoch: 2, batch: 19661, sum loss: 4180.353027, avg loss: 2.403883, ppl: 11.066068 +epoch: 2, batch: 19662, sum loss: 4738.747070, avg loss: 2.887719, ppl: 17.952312 +epoch: 2, batch: 19663, sum loss: 4746.175781, avg loss: 2.888725, ppl: 17.970388 +epoch: 2, batch: 19664, sum loss: 4002.691895, avg loss: 2.476913, ppl: 11.904461 +epoch: 2, batch: 19665, sum loss: 4292.159668, avg loss: 2.526286, ppl: 12.506968 +epoch: 2, batch: 19666, sum loss: 3992.863770, avg loss: 2.424325, ppl: 11.294605 +epoch: 2, batch: 19667, sum loss: 3659.173828, avg loss: 2.564242, ppl: 12.990812 +epoch: 2, batch: 19668, sum loss: 4793.315430, avg loss: 2.674841, ppl: 14.510049 +epoch: 2, batch: 19669, sum loss: 4562.896484, avg loss: 2.762044, ppl: 15.832170 +epoch: 2, batch: 19670, sum loss: 3633.855469, avg loss: 2.193033, ppl: 8.962353 +epoch: 2, batch: 19671, sum loss: 4445.385254, avg loss: 2.512937, ppl: 12.341121 +epoch: 2, batch: 19672, sum loss: 3795.937744, avg loss: 2.379898, ppl: 10.803804 +epoch: 2, batch: 19673, sum loss: 4631.130371, avg loss: 2.575712, ppl: 13.140670 +epoch: 2, batch: 19674, sum loss: 4431.269043, avg loss: 2.595940, ppl: 13.409182 +epoch: 2, batch: 19675, sum loss: 4111.356445, avg loss: 2.494755, ppl: 12.118765 +epoch: 2, batch: 19676, sum loss: 5013.717773, avg loss: 2.581729, ppl: 13.219978 +epoch: 2, batch: 19677, sum loss: 4305.452637, avg loss: 2.491581, ppl: 12.080365 +epoch: 2, batch: 19678, sum loss: 3957.980957, avg loss: 2.479938, ppl: 11.940524 +epoch: 2, batch: 19679, sum loss: 3677.393066, avg loss: 2.075278, ppl: 7.966763 +epoch: 2, batch: 19680, sum loss: 4911.370117, avg loss: 2.720981, ppl: 15.195216 +epoch: 2, batch: 19681, sum loss: 4811.791016, avg loss: 2.700220, ppl: 14.883000 +epoch: 2, batch: 19682, sum loss: 4461.770508, avg loss: 2.619947, ppl: 13.735002 +epoch: 2, batch: 19683, sum loss: 4306.055664, avg loss: 2.506435, ppl: 12.261143 +epoch: 2, batch: 19684, sum loss: 4149.929688, avg loss: 2.489460, ppl: 12.054762 +epoch: 2, batch: 19685, sum loss: 3861.241699, avg loss: 2.419324, ppl: 11.238264 +epoch: 2, batch: 19686, sum loss: 4370.835449, avg loss: 2.628284, ppl: 13.849976 +epoch: 2, batch: 19687, sum loss: 3918.895020, avg loss: 2.317502, ppl: 10.150283 +epoch: 2, batch: 19688, sum loss: 4968.187500, avg loss: 2.675384, ppl: 14.517918 +epoch: 2, batch: 19689, sum loss: 4259.782227, avg loss: 2.650767, ppl: 14.164897 +epoch: 2, batch: 19690, sum loss: 5014.612305, avg loss: 2.747733, ppl: 15.607208 +epoch: 2, batch: 19691, sum loss: 3956.075195, avg loss: 2.454141, ppl: 11.636433 +epoch: 2, batch: 19692, sum loss: 4161.473633, avg loss: 2.415249, ppl: 11.192555 +epoch: 2, batch: 19693, sum loss: 4459.008301, avg loss: 2.492459, ppl: 12.090966 +epoch: 2, batch: 19694, sum loss: 4972.744141, avg loss: 2.670647, ppl: 14.449310 +epoch: 2, batch: 19695, sum loss: 3491.652100, avg loss: 2.471091, ppl: 11.835356 +epoch: 2, batch: 19696, sum loss: 4448.788086, avg loss: 2.780493, ppl: 16.126963 +epoch: 2, batch: 19697, sum loss: 4442.602051, avg loss: 2.497247, ppl: 12.148998 +epoch: 2, batch: 19698, sum loss: 4444.863770, avg loss: 2.524057, ppl: 12.479117 +epoch: 2, batch: 19699, sum loss: 4052.796143, avg loss: 2.635108, ppl: 13.944818 +epoch: 2, batch: 19700, sum loss: 3466.591064, avg loss: 2.107350, ppl: 8.226414 +epoch: 2, batch: 19701, sum loss: 3859.806152, avg loss: 2.359295, ppl: 10.583487 +epoch: 2, batch: 19702, sum loss: 4734.101074, avg loss: 2.758800, ppl: 15.780899 +epoch: 2, batch: 19703, sum loss: 4579.200684, avg loss: 2.782017, ppl: 16.151573 +epoch: 2, batch: 19704, sum loss: 3241.488770, avg loss: 2.136776, ppl: 8.472077 +epoch: 2, batch: 19705, sum loss: 3918.485840, avg loss: 2.373402, ppl: 10.733843 +epoch: 2, batch: 19706, sum loss: 4490.384766, avg loss: 2.743057, ppl: 15.534405 +epoch: 2, batch: 19707, sum loss: 3804.635254, avg loss: 2.404953, ppl: 11.077905 +epoch: 2, batch: 19708, sum loss: 4495.451172, avg loss: 2.568829, ppl: 13.050537 +epoch: 2, batch: 19709, sum loss: 5373.027344, avg loss: 2.785395, ppl: 16.206221 +epoch: 2, batch: 19710, sum loss: 4401.549805, avg loss: 2.674089, ppl: 14.499131 +epoch: 2, batch: 19711, sum loss: 4243.853027, avg loss: 2.725660, ppl: 15.266492 +epoch: 2, batch: 19712, sum loss: 5011.709473, avg loss: 2.587356, ppl: 13.294579 +epoch: 2, batch: 19713, sum loss: 5182.978516, avg loss: 2.846226, ppl: 17.222668 +epoch: 2, batch: 19714, sum loss: 3264.707520, avg loss: 2.040442, ppl: 7.694011 +epoch: 2, batch: 19715, sum loss: 6010.719238, avg loss: 2.874567, ppl: 17.717747 +epoch: 2, batch: 19716, sum loss: 4419.315918, avg loss: 2.566386, ppl: 13.018683 +epoch: 2, batch: 19717, sum loss: 4053.812744, avg loss: 2.605278, ppl: 13.534987 +epoch: 2, batch: 19718, sum loss: 4479.441895, avg loss: 2.561145, ppl: 12.950632 +epoch: 2, batch: 19719, sum loss: 4077.462158, avg loss: 2.518507, ppl: 12.410048 +epoch: 2, batch: 19720, sum loss: 4624.949219, avg loss: 2.532831, ppl: 12.589095 +epoch: 2, batch: 19721, sum loss: 4993.398438, avg loss: 2.589937, ppl: 13.328932 +epoch: 2, batch: 19722, sum loss: 3888.363525, avg loss: 2.657802, ppl: 14.264895 +epoch: 2, batch: 19723, sum loss: 4636.723633, avg loss: 2.570246, ppl: 13.069038 +epoch: 2, batch: 19724, sum loss: 4422.126465, avg loss: 2.556142, ppl: 12.886011 +epoch: 2, batch: 19725, sum loss: 4429.940430, avg loss: 2.678320, ppl: 14.560606 +epoch: 2, batch: 19726, sum loss: 4832.925781, avg loss: 2.665707, ppl: 14.378106 +epoch: 2, batch: 19727, sum loss: 4045.629883, avg loss: 2.512814, ppl: 12.339600 +epoch: 2, batch: 19728, sum loss: 3783.235352, avg loss: 2.575381, ppl: 13.136325 +epoch: 2, batch: 19729, sum loss: 4359.689453, avg loss: 2.629487, ppl: 13.866655 +epoch: 2, batch: 19730, sum loss: 3476.179443, avg loss: 2.337713, ppl: 10.357522 +epoch: 2, batch: 19731, sum loss: 4053.025391, avg loss: 2.481951, ppl: 11.964579 +epoch: 2, batch: 19732, sum loss: 4436.888672, avg loss: 2.452675, ppl: 11.619386 +epoch: 2, batch: 19733, sum loss: 4942.589844, avg loss: 2.709753, ppl: 15.025568 +epoch: 2, batch: 19734, sum loss: 4614.266602, avg loss: 2.673387, ppl: 14.488964 +epoch: 2, batch: 19735, sum loss: 4738.469727, avg loss: 2.519123, ppl: 12.417697 +epoch: 2, batch: 19736, sum loss: 5198.038086, avg loss: 2.745926, ppl: 15.579032 +epoch: 2, batch: 19737, sum loss: 4687.738281, avg loss: 2.533913, ppl: 12.602720 +epoch: 2, batch: 19738, sum loss: 4664.168945, avg loss: 2.494208, ppl: 12.112138 +epoch: 2, batch: 19739, sum loss: 4559.943848, avg loss: 2.529087, ppl: 12.542051 +epoch: 2, batch: 19740, sum loss: 4581.969238, avg loss: 2.717657, ppl: 15.144794 +epoch: 2, batch: 19741, sum loss: 5073.513672, avg loss: 2.721842, ppl: 15.208311 +epoch: 2, batch: 19742, sum loss: 3981.641846, avg loss: 2.466940, ppl: 11.786330 +epoch: 2, batch: 19743, sum loss: 4392.330078, avg loss: 2.719709, ppl: 15.175908 +epoch: 2, batch: 19744, sum loss: 3511.138428, avg loss: 2.262331, ppl: 9.605458 +epoch: 2, batch: 19745, sum loss: 3911.320068, avg loss: 2.423371, ppl: 11.283829 +epoch: 2, batch: 19746, sum loss: 3945.361572, avg loss: 2.484485, ppl: 11.994937 +epoch: 2, batch: 19747, sum loss: 4422.384277, avg loss: 2.575646, ppl: 13.139805 +epoch: 2, batch: 19748, sum loss: 4255.929199, avg loss: 2.641793, ppl: 14.038359 +epoch: 2, batch: 19749, sum loss: 5088.341797, avg loss: 2.643294, ppl: 14.059443 +epoch: 2, batch: 19750, sum loss: 4466.665039, avg loss: 2.718603, ppl: 15.159132 +epoch: 2, batch: 19751, sum loss: 3926.506836, avg loss: 2.480421, ppl: 11.946294 +epoch: 2, batch: 19752, sum loss: 4316.832520, avg loss: 2.606783, ppl: 13.555372 +epoch: 2, batch: 19753, sum loss: 4314.676270, avg loss: 2.651922, ppl: 14.181262 +epoch: 2, batch: 19754, sum loss: 3379.365723, avg loss: 2.384874, ppl: 10.857691 +epoch: 2, batch: 19755, sum loss: 3766.111816, avg loss: 2.455092, ppl: 11.647511 +epoch: 2, batch: 19756, sum loss: 4706.089844, avg loss: 2.797913, ppl: 16.410364 +epoch: 2, batch: 19757, sum loss: 4001.966309, avg loss: 2.473403, ppl: 11.862750 +epoch: 2, batch: 19758, sum loss: 4251.561035, avg loss: 2.551957, ppl: 12.832196 +epoch: 2, batch: 19759, sum loss: 4825.579590, avg loss: 2.612658, ppl: 13.635249 +epoch: 2, batch: 19760, sum loss: 4372.624023, avg loss: 2.630941, ppl: 13.886833 +epoch: 2, batch: 19761, sum loss: 5102.334961, avg loss: 2.570446, ppl: 13.071651 +epoch: 2, batch: 19762, sum loss: 3462.675049, avg loss: 2.353960, ppl: 10.527175 +epoch: 2, batch: 19763, sum loss: 4443.290039, avg loss: 2.539023, ppl: 12.667288 +epoch: 2, batch: 19764, sum loss: 4580.091309, avg loss: 2.638301, ppl: 13.989421 +epoch: 2, batch: 19765, sum loss: 4409.692871, avg loss: 2.568254, ppl: 13.043037 +epoch: 2, batch: 19766, sum loss: 4318.928223, avg loss: 2.600198, ppl: 13.466398 +epoch: 2, batch: 19767, sum loss: 3405.665283, avg loss: 2.215787, ppl: 9.168626 +epoch: 2, batch: 19768, sum loss: 2626.581055, avg loss: 1.896448, ppl: 6.662191 +epoch: 2, batch: 19769, sum loss: 4399.604004, avg loss: 2.684322, ppl: 14.648269 +epoch: 2, batch: 19770, sum loss: 3832.476074, avg loss: 2.281236, ppl: 9.788772 +epoch: 2, batch: 19771, sum loss: 4382.185059, avg loss: 2.582313, ppl: 13.227699 +epoch: 2, batch: 19772, sum loss: 3976.674072, avg loss: 2.457771, ppl: 11.678754 +epoch: 2, batch: 19773, sum loss: 2861.244141, avg loss: 2.194206, ppl: 8.972870 +epoch: 2, batch: 19774, sum loss: 3921.995361, avg loss: 2.501272, ppl: 12.198006 +epoch: 2, batch: 19775, sum loss: 4393.706543, avg loss: 2.569419, ppl: 13.058234 +epoch: 2, batch: 19776, sum loss: 4185.911621, avg loss: 2.437922, ppl: 11.449222 +epoch: 2, batch: 19777, sum loss: 3820.517578, avg loss: 2.669824, ppl: 14.437423 +epoch: 2, batch: 19778, sum loss: 4848.925781, avg loss: 2.429322, ppl: 11.351178 +epoch: 2, batch: 19779, sum loss: 3379.927246, avg loss: 2.026335, ppl: 7.586234 +epoch: 2, batch: 19780, sum loss: 5238.343262, avg loss: 2.826953, ppl: 16.893902 +epoch: 2, batch: 19781, sum loss: 3797.233887, avg loss: 2.451410, ppl: 11.604695 +epoch: 2, batch: 19782, sum loss: 3865.789062, avg loss: 2.330192, ppl: 10.279919 +epoch: 2, batch: 19783, sum loss: 3738.714600, avg loss: 2.456449, ppl: 11.663317 +epoch: 2, batch: 19784, sum loss: 4626.443359, avg loss: 2.665002, ppl: 14.367976 +epoch: 2, batch: 19785, sum loss: 3902.991455, avg loss: 2.401841, ppl: 11.043488 +epoch: 2, batch: 19786, sum loss: 4865.219238, avg loss: 2.686482, ppl: 14.679947 +epoch: 2, batch: 19787, sum loss: 4354.820312, avg loss: 2.515783, ppl: 12.376293 +epoch: 2, batch: 19788, sum loss: 4190.586914, avg loss: 2.265182, ppl: 9.632880 +epoch: 2, batch: 19789, sum loss: 4764.528320, avg loss: 2.672198, ppl: 14.471741 +epoch: 2, batch: 19790, sum loss: 4678.850586, avg loss: 2.567975, ppl: 13.039393 +epoch: 2, batch: 19791, sum loss: 4254.134277, avg loss: 2.454780, ppl: 11.643873 +epoch: 2, batch: 19792, sum loss: 4148.809570, avg loss: 2.539051, ppl: 12.667647 +epoch: 2, batch: 19793, sum loss: 5220.705566, avg loss: 2.870096, ppl: 17.638720 +epoch: 2, batch: 19794, sum loss: 4463.419922, avg loss: 2.476926, ppl: 11.904609 +epoch: 2, batch: 19795, sum loss: 5364.495117, avg loss: 2.590292, ppl: 13.333668 +epoch: 2, batch: 19796, sum loss: 4193.342773, avg loss: 2.500503, ppl: 12.188619 +epoch: 2, batch: 19797, sum loss: 4450.054199, avg loss: 2.679142, ppl: 14.572577 +epoch: 2, batch: 19798, sum loss: 4846.867188, avg loss: 2.700205, ppl: 14.882776 +epoch: 2, batch: 19799, sum loss: 4104.185547, avg loss: 2.525653, ppl: 12.499050 +epoch: 2, batch: 19800, sum loss: 3646.998535, avg loss: 2.238796, ppl: 9.382029 +epoch: 2, batch: 19801, sum loss: 4440.947754, avg loss: 2.597045, ppl: 13.424017 +epoch: 2, batch: 19802, sum loss: 4867.791504, avg loss: 2.714887, ppl: 15.102899 +epoch: 2, batch: 19803, sum loss: 5305.600586, avg loss: 2.880348, ppl: 17.820473 +epoch: 2, batch: 19804, sum loss: 3219.907715, avg loss: 2.381588, ppl: 10.822080 +epoch: 2, batch: 19805, sum loss: 3437.978760, avg loss: 2.298114, ppl: 9.955392 +epoch: 2, batch: 19806, sum loss: 4280.375488, avg loss: 2.663582, ppl: 14.347585 +epoch: 2, batch: 19807, sum loss: 3255.227051, avg loss: 2.248085, ppl: 9.469584 +epoch: 2, batch: 19808, sum loss: 4137.713867, avg loss: 2.587688, ppl: 13.298995 +epoch: 2, batch: 19809, sum loss: 4503.013184, avg loss: 2.712659, ppl: 15.069286 +epoch: 2, batch: 19810, sum loss: 4661.517090, avg loss: 2.610032, ppl: 13.599487 +epoch: 2, batch: 19811, sum loss: 4015.080078, avg loss: 2.612284, ppl: 13.630142 +epoch: 2, batch: 19812, sum loss: 4582.132324, avg loss: 2.534365, ppl: 12.608424 +epoch: 2, batch: 19813, sum loss: 5166.300781, avg loss: 2.753892, ppl: 15.703627 +epoch: 2, batch: 19814, sum loss: 4259.172852, avg loss: 2.538244, ppl: 12.657419 +epoch: 2, batch: 19815, sum loss: 3945.012207, avg loss: 2.438203, ppl: 11.452440 +epoch: 2, batch: 19816, sum loss: 4113.972656, avg loss: 2.520817, ppl: 12.438749 +epoch: 2, batch: 19817, sum loss: 5219.311035, avg loss: 2.788094, ppl: 16.250011 +epoch: 2, batch: 19818, sum loss: 4009.285645, avg loss: 2.370955, ppl: 10.707618 +epoch: 2, batch: 19819, sum loss: 3405.444092, avg loss: 2.181579, ppl: 8.860282 +epoch: 2, batch: 19820, sum loss: 3845.613525, avg loss: 2.320829, ppl: 10.184113 +epoch: 2, batch: 19821, sum loss: 4540.416016, avg loss: 2.590083, ppl: 13.330883 +epoch: 2, batch: 19822, sum loss: 4384.834961, avg loss: 2.385655, ppl: 10.866182 +epoch: 2, batch: 19823, sum loss: 4250.645020, avg loss: 2.512202, ppl: 12.332049 +epoch: 2, batch: 19824, sum loss: 4080.719238, avg loss: 2.365634, ppl: 10.650794 +epoch: 2, batch: 19825, sum loss: 3795.683105, avg loss: 2.211936, ppl: 9.133386 +epoch: 2, batch: 19826, sum loss: 4498.254395, avg loss: 2.773276, ppl: 16.011005 +epoch: 2, batch: 19827, sum loss: 5239.376465, avg loss: 2.872465, ppl: 17.680550 +epoch: 2, batch: 19828, sum loss: 5633.531738, avg loss: 2.866937, ppl: 17.583086 +epoch: 2, batch: 19829, sum loss: 3218.639648, avg loss: 2.210604, ppl: 9.121225 +epoch: 2, batch: 19830, sum loss: 4563.579102, avg loss: 2.595892, ppl: 13.408548 +epoch: 2, batch: 19831, sum loss: 5185.834473, avg loss: 2.798615, ppl: 16.421894 +epoch: 2, batch: 19832, sum loss: 4081.198486, avg loss: 2.474954, ppl: 11.881156 +epoch: 2, batch: 19833, sum loss: 3675.721924, avg loss: 2.638709, ppl: 13.995125 +epoch: 2, batch: 19834, sum loss: 3872.568848, avg loss: 2.338508, ppl: 10.365756 +epoch: 2, batch: 19835, sum loss: 3899.228027, avg loss: 2.348933, ppl: 10.474382 +epoch: 2, batch: 19836, sum loss: 4645.883301, avg loss: 2.662397, ppl: 14.330601 +epoch: 2, batch: 19837, sum loss: 4528.062500, avg loss: 2.704936, ppl: 14.953360 +epoch: 2, batch: 19838, sum loss: 3889.362793, avg loss: 2.411260, ppl: 11.148003 +epoch: 2, batch: 19839, sum loss: 4308.681641, avg loss: 2.581595, ppl: 13.218204 +epoch: 2, batch: 19840, sum loss: 4427.892578, avg loss: 2.680322, ppl: 14.589796 +epoch: 2, batch: 19841, sum loss: 5680.683594, avg loss: 2.931209, ppl: 18.750292 +epoch: 2, batch: 19842, sum loss: 3985.670898, avg loss: 2.305189, ppl: 10.026070 +epoch: 2, batch: 19843, sum loss: 4164.109375, avg loss: 2.471282, ppl: 11.837607 +epoch: 2, batch: 19844, sum loss: 3853.163574, avg loss: 2.431018, ppl: 11.370450 +epoch: 2, batch: 19845, sum loss: 3642.010498, avg loss: 2.489413, ppl: 12.054193 +epoch: 2, batch: 19846, sum loss: 4832.230469, avg loss: 2.839148, ppl: 17.101194 +epoch: 2, batch: 19847, sum loss: 3698.374023, avg loss: 2.331888, ppl: 10.297361 +epoch: 2, batch: 19848, sum loss: 3872.684814, avg loss: 2.220576, ppl: 9.212636 +epoch: 2, batch: 19849, sum loss: 4026.508301, avg loss: 2.322092, ppl: 10.196987 +epoch: 2, batch: 19850, sum loss: 3861.474609, avg loss: 2.453287, ppl: 11.626505 +epoch: 2, batch: 19851, sum loss: 5255.393555, avg loss: 2.602968, ppl: 13.503764 +epoch: 2, batch: 19852, sum loss: 3834.552246, avg loss: 2.328204, ppl: 10.259501 +epoch: 2, batch: 19853, sum loss: 4568.521973, avg loss: 2.531037, ppl: 12.566532 +epoch: 2, batch: 19854, sum loss: 4682.336914, avg loss: 2.764071, ppl: 15.864299 +epoch: 2, batch: 19855, sum loss: 4204.403320, avg loss: 2.609810, ppl: 13.596462 +epoch: 2, batch: 19856, sum loss: 3854.643066, avg loss: 2.611547, ppl: 13.620102 +epoch: 2, batch: 19857, sum loss: 4587.091797, avg loss: 2.679376, ppl: 14.575994 +epoch: 2, batch: 19858, sum loss: 4318.518555, avg loss: 2.736704, ppl: 15.436018 +epoch: 2, batch: 19859, sum loss: 4565.343262, avg loss: 2.631322, ppl: 13.892122 +epoch: 2, batch: 19860, sum loss: 4199.013672, avg loss: 2.376352, ppl: 10.765556 +epoch: 2, batch: 19861, sum loss: 4482.248535, avg loss: 2.645955, ppl: 14.096906 +epoch: 2, batch: 19862, sum loss: 3912.691650, avg loss: 2.362736, ppl: 10.619973 +epoch: 2, batch: 19863, sum loss: 4767.985352, avg loss: 2.811312, ppl: 16.631727 +epoch: 2, batch: 19864, sum loss: 4715.764648, avg loss: 2.749717, ppl: 15.638202 +epoch: 2, batch: 19865, sum loss: 4499.758301, avg loss: 2.449515, ppl: 11.582724 +epoch: 2, batch: 19866, sum loss: 4584.685547, avg loss: 2.647047, ppl: 14.112304 +epoch: 2, batch: 19867, sum loss: 3613.195801, avg loss: 2.493579, ppl: 12.104520 +epoch: 2, batch: 19868, sum loss: 4809.799805, avg loss: 2.490834, ppl: 12.071337 +epoch: 2, batch: 19869, sum loss: 4415.703613, avg loss: 2.409004, ppl: 11.122874 +epoch: 2, batch: 19870, sum loss: 3159.241455, avg loss: 2.231103, ppl: 9.310126 +epoch: 2, batch: 19871, sum loss: 3138.037109, avg loss: 2.269007, ppl: 9.669798 +epoch: 2, batch: 19872, sum loss: 4449.012695, avg loss: 2.524979, ppl: 12.490631 +epoch: 2, batch: 19873, sum loss: 3618.501221, avg loss: 2.413944, ppl: 11.177955 +epoch: 2, batch: 19874, sum loss: 3682.224121, avg loss: 2.237074, ppl: 9.365891 +epoch: 2, batch: 19875, sum loss: 4308.418457, avg loss: 2.315109, ppl: 10.126032 +epoch: 2, batch: 19876, sum loss: 4068.655273, avg loss: 2.576729, ppl: 13.154041 +epoch: 2, batch: 19877, sum loss: 3853.339844, avg loss: 2.437280, ppl: 11.441875 +epoch: 2, batch: 19878, sum loss: 4865.261719, avg loss: 2.748735, ppl: 15.622863 +epoch: 2, batch: 19879, sum loss: 5018.810059, avg loss: 2.627649, ppl: 13.841195 +epoch: 2, batch: 19880, sum loss: 5218.119141, avg loss: 2.766765, ppl: 15.907097 +epoch: 2, batch: 19881, sum loss: 5385.527344, avg loss: 2.610532, ppl: 13.606288 +epoch: 2, batch: 19882, sum loss: 3771.085205, avg loss: 2.436102, ppl: 11.428402 +epoch: 2, batch: 19883, sum loss: 3608.268066, avg loss: 2.343031, ppl: 10.412751 +epoch: 2, batch: 19884, sum loss: 4366.147461, avg loss: 2.595807, ppl: 13.407404 +epoch: 2, batch: 19885, sum loss: 4220.207031, avg loss: 2.368242, ppl: 10.678603 +epoch: 2, batch: 19886, sum loss: 3859.420654, avg loss: 2.416669, ppl: 11.208463 +epoch: 2, batch: 19887, sum loss: 3625.611816, avg loss: 2.330085, ppl: 10.278813 +epoch: 2, batch: 19888, sum loss: 4080.259277, avg loss: 2.460953, ppl: 11.715966 +epoch: 2, batch: 19889, sum loss: 3692.699219, avg loss: 2.299314, ppl: 9.967347 +epoch: 2, batch: 19890, sum loss: 5384.078125, avg loss: 2.783908, ppl: 16.182135 +epoch: 2, batch: 19891, sum loss: 4339.695312, avg loss: 2.683794, ppl: 14.640538 +epoch: 2, batch: 19892, sum loss: 5121.837891, avg loss: 2.870986, ppl: 17.654408 +epoch: 2, batch: 19893, sum loss: 4255.340820, avg loss: 2.523927, ppl: 12.477499 +epoch: 2, batch: 19894, sum loss: 4887.969727, avg loss: 2.673944, ppl: 14.497033 +epoch: 2, batch: 19895, sum loss: 3828.533691, avg loss: 2.572939, ppl: 13.104286 +epoch: 2, batch: 19896, sum loss: 3620.177979, avg loss: 2.424767, ppl: 11.299602 +epoch: 2, batch: 19897, sum loss: 4968.128906, avg loss: 2.817997, ppl: 16.743280 +epoch: 2, batch: 19898, sum loss: 4333.263672, avg loss: 2.574726, ppl: 13.127718 +epoch: 2, batch: 19899, sum loss: 3430.017334, avg loss: 2.158601, ppl: 8.659018 +epoch: 2, batch: 19900, sum loss: 4951.839844, avg loss: 2.707403, ppl: 14.990294 +epoch: 2, batch: 19901, sum loss: 5341.452148, avg loss: 2.954343, ppl: 19.189114 +epoch: 2, batch: 19902, sum loss: 4086.842041, avg loss: 2.404025, ppl: 11.067633 +epoch: 2, batch: 19903, sum loss: 4853.179688, avg loss: 2.737270, ppl: 15.444765 +epoch: 2, batch: 19904, sum loss: 4202.292969, avg loss: 2.514837, ppl: 12.364596 +epoch: 2, batch: 19905, sum loss: 4174.388184, avg loss: 2.462766, ppl: 11.737231 +epoch: 2, batch: 19906, sum loss: 2783.765869, avg loss: 1.984152, ppl: 7.272881 +epoch: 2, batch: 19907, sum loss: 4232.842285, avg loss: 2.478245, ppl: 11.920323 +epoch: 2, batch: 19908, sum loss: 3506.902832, avg loss: 2.358374, ppl: 10.573749 +epoch: 2, batch: 19909, sum loss: 4093.081299, avg loss: 2.471668, ppl: 11.842177 +epoch: 2, batch: 19910, sum loss: 3979.834961, avg loss: 2.303145, ppl: 10.005602 +epoch: 2, batch: 19911, sum loss: 4767.401367, avg loss: 2.672310, ppl: 14.473370 +epoch: 2, batch: 19912, sum loss: 3867.058838, avg loss: 2.310071, ppl: 10.075142 +epoch: 2, batch: 19913, sum loss: 3797.263428, avg loss: 2.642494, ppl: 14.048192 +epoch: 2, batch: 19914, sum loss: 4193.108398, avg loss: 2.518384, ppl: 12.408522 +epoch: 2, batch: 19915, sum loss: 4650.128418, avg loss: 2.513583, ppl: 12.349097 +epoch: 2, batch: 19916, sum loss: 3744.565918, avg loss: 2.394224, ppl: 10.959686 +epoch: 2, batch: 19917, sum loss: 4403.848145, avg loss: 2.539705, ppl: 12.675928 +epoch: 2, batch: 19918, sum loss: 4040.003174, avg loss: 2.409066, ppl: 11.123564 +epoch: 2, batch: 19919, sum loss: 4543.504395, avg loss: 2.638504, ppl: 13.992259 +epoch: 2, batch: 19920, sum loss: 4000.784668, avg loss: 2.290088, ppl: 9.875811 +epoch: 2, batch: 19921, sum loss: 3936.456543, avg loss: 2.541289, ppl: 12.696024 +epoch: 2, batch: 19922, sum loss: 3977.572998, avg loss: 2.315235, ppl: 10.127299 +epoch: 2, batch: 19923, sum loss: 5166.900879, avg loss: 2.743973, ppl: 15.548634 +epoch: 2, batch: 19924, sum loss: 3970.175293, avg loss: 2.467480, ppl: 11.792691 +epoch: 2, batch: 19925, sum loss: 4210.446777, avg loss: 2.658110, ppl: 14.269300 +epoch: 2, batch: 19926, sum loss: 4158.125488, avg loss: 2.479502, ppl: 11.935325 +epoch: 2, batch: 19927, sum loss: 4103.384766, avg loss: 2.518959, ppl: 12.415668 +epoch: 2, batch: 19928, sum loss: 4307.440918, avg loss: 2.303444, ppl: 10.008596 +epoch: 2, batch: 19929, sum loss: 3665.670410, avg loss: 2.406875, ppl: 11.099220 +epoch: 2, batch: 19930, sum loss: 3760.294678, avg loss: 2.359030, ppl: 10.580688 +epoch: 2, batch: 19931, sum loss: 4346.046387, avg loss: 2.661388, ppl: 14.316141 +epoch: 2, batch: 19932, sum loss: 3895.895508, avg loss: 2.410826, ppl: 11.143167 +epoch: 2, batch: 19933, sum loss: 3728.143799, avg loss: 2.324279, ppl: 10.219313 +epoch: 2, batch: 19934, sum loss: 4322.561523, avg loss: 2.651878, ppl: 14.180647 +epoch: 2, batch: 19935, sum loss: 4791.841797, avg loss: 2.731951, ppl: 15.362827 +epoch: 2, batch: 19936, sum loss: 4125.507812, avg loss: 2.557662, ppl: 12.905602 +epoch: 2, batch: 19937, sum loss: 4818.936523, avg loss: 2.621837, ppl: 13.760978 +epoch: 2, batch: 19938, sum loss: 3649.325928, avg loss: 2.207699, ppl: 9.094762 +epoch: 2, batch: 19939, sum loss: 4212.227539, avg loss: 2.422213, ppl: 11.270769 +epoch: 2, batch: 19940, sum loss: 3764.765625, avg loss: 2.470319, ppl: 11.826214 +epoch: 2, batch: 19941, sum loss: 4909.058594, avg loss: 2.849134, ppl: 17.272825 +epoch: 2, batch: 19942, sum loss: 4689.614746, avg loss: 2.558437, ppl: 12.915613 +epoch: 2, batch: 19943, sum loss: 4645.894531, avg loss: 2.757207, ppl: 15.755782 +epoch: 2, batch: 19944, sum loss: 5027.415039, avg loss: 2.580809, ppl: 13.207814 +epoch: 2, batch: 19945, sum loss: 4788.113770, avg loss: 2.581193, ppl: 13.212897 +epoch: 2, batch: 19946, sum loss: 3951.834229, avg loss: 2.367786, ppl: 10.673731 +epoch: 2, batch: 19947, sum loss: 5132.753418, avg loss: 2.914681, ppl: 18.442928 +epoch: 2, batch: 19948, sum loss: 4492.237793, avg loss: 2.630116, ppl: 13.875376 +epoch: 2, batch: 19949, sum loss: 3213.760254, avg loss: 2.258440, ppl: 9.568151 +epoch: 2, batch: 19950, sum loss: 4310.500000, avg loss: 2.588889, ppl: 13.314969 +epoch: 2, batch: 19951, sum loss: 4525.275391, avg loss: 2.744254, ppl: 15.553013 +epoch: 2, batch: 19952, sum loss: 4120.379883, avg loss: 2.241774, ppl: 9.410007 +epoch: 2, batch: 19953, sum loss: 4904.338867, avg loss: 2.632495, ppl: 13.908434 +epoch: 2, batch: 19954, sum loss: 4924.038574, avg loss: 2.627555, ppl: 13.839891 +epoch: 2, batch: 19955, sum loss: 4272.794434, avg loss: 2.588004, ppl: 13.303193 +epoch: 2, batch: 19956, sum loss: 3616.499268, avg loss: 2.221437, ppl: 9.220569 +epoch: 2, batch: 19957, sum loss: 4793.568359, avg loss: 2.511036, ppl: 12.317690 +epoch: 2, batch: 19958, sum loss: 3982.997559, avg loss: 2.767893, ppl: 15.925038 +epoch: 2, batch: 19959, sum loss: 4740.375000, avg loss: 2.545851, ppl: 12.754080 +epoch: 2, batch: 19960, sum loss: 4287.979492, avg loss: 2.625829, ppl: 13.816030 +epoch: 2, batch: 19961, sum loss: 4034.699219, avg loss: 2.643971, ppl: 14.068956 +epoch: 2, batch: 19962, sum loss: 4255.851074, avg loss: 2.390928, ppl: 10.923621 +epoch: 2, batch: 19963, sum loss: 4062.102051, avg loss: 2.260491, ppl: 9.587794 +epoch: 2, batch: 19964, sum loss: 4579.707031, avg loss: 2.633529, ppl: 13.922819 +epoch: 2, batch: 19965, sum loss: 4922.391602, avg loss: 2.796813, ppl: 16.392324 +epoch: 2, batch: 19966, sum loss: 3886.835938, avg loss: 2.444551, ppl: 11.525371 +epoch: 2, batch: 19967, sum loss: 4421.943848, avg loss: 2.515326, ppl: 12.370645 +epoch: 2, batch: 19968, sum loss: 5079.921875, avg loss: 2.891248, ppl: 18.015770 +epoch: 2, batch: 19969, sum loss: 4275.830566, avg loss: 2.440543, ppl: 11.479269 +epoch: 2, batch: 19970, sum loss: 4535.511230, avg loss: 2.585810, ppl: 13.274039 +epoch: 2, batch: 19971, sum loss: 4070.751465, avg loss: 2.491280, ppl: 12.076725 +epoch: 2, batch: 19972, sum loss: 4061.880859, avg loss: 2.508883, ppl: 12.291187 +epoch: 2, batch: 19973, sum loss: 4351.046875, avg loss: 2.661191, ppl: 14.313322 +epoch: 2, batch: 19974, sum loss: 3452.219727, avg loss: 2.419215, ppl: 11.237035 +epoch: 2, batch: 19975, sum loss: 4000.846924, avg loss: 2.540220, ppl: 12.682464 +epoch: 2, batch: 19976, sum loss: 4216.024902, avg loss: 2.475646, ppl: 11.889382 +epoch: 2, batch: 19977, sum loss: 3289.669434, avg loss: 2.200448, ppl: 9.029056 +epoch: 2, batch: 19978, sum loss: 5144.050781, avg loss: 2.736197, ppl: 15.428204 +epoch: 2, batch: 19979, sum loss: 4258.604004, avg loss: 2.491869, ppl: 12.083839 +epoch: 2, batch: 19980, sum loss: 4102.793945, avg loss: 2.565850, ppl: 13.011711 +epoch: 2, batch: 19981, sum loss: 4243.206055, avg loss: 2.609598, ppl: 13.593581 +epoch: 2, batch: 19982, sum loss: 3869.295166, avg loss: 2.625031, ppl: 13.804998 +epoch: 2, batch: 19983, sum loss: 4813.338867, avg loss: 2.457039, ppl: 11.670203 +epoch: 2, batch: 19984, sum loss: 4227.717773, avg loss: 2.603274, ppl: 13.507895 +epoch: 2, batch: 19985, sum loss: 3677.534180, avg loss: 2.571702, ppl: 13.088084 +epoch: 2, batch: 19986, sum loss: 3733.490234, avg loss: 2.307472, ppl: 10.048991 +epoch: 2, batch: 19987, sum loss: 4153.486328, avg loss: 2.440356, ppl: 11.477129 +epoch: 2, batch: 19988, sum loss: 4582.913086, avg loss: 2.473240, ppl: 11.860809 +epoch: 2, batch: 19989, sum loss: 3740.736084, avg loss: 2.470764, ppl: 11.831479 +epoch: 2, batch: 19990, sum loss: 5344.950195, avg loss: 2.711796, ppl: 15.056293 +epoch: 2, batch: 19991, sum loss: 5322.960938, avg loss: 2.696535, ppl: 14.828268 +epoch: 2, batch: 19992, sum loss: 4174.325195, avg loss: 2.587926, ppl: 13.302159 +epoch: 2, batch: 19993, sum loss: 4217.215332, avg loss: 2.566777, ppl: 13.023784 +epoch: 2, batch: 19994, sum loss: 5365.391602, avg loss: 2.817958, ppl: 16.742626 +epoch: 2, batch: 19995, sum loss: 4201.782227, avg loss: 2.609802, ppl: 13.596365 +epoch: 2, batch: 19996, sum loss: 5049.006836, avg loss: 2.593224, ppl: 13.372817 +epoch: 2, batch: 19997, sum loss: 3968.637207, avg loss: 2.351089, ppl: 10.496990 +epoch: 2, batch: 19998, sum loss: 3766.009521, avg loss: 2.340590, ppl: 10.387363 +epoch: 2, batch: 19999, sum loss: 5190.286133, avg loss: 2.885095, ppl: 17.905272 +epoch: 2, batch: 20000, sum loss: 4292.964844, avg loss: 2.394292, ppl: 10.960431 +epoch: 2, batch: 20001, sum loss: 4217.798828, avg loss: 2.391042, ppl: 10.924877 +epoch: 2, batch: 20002, sum loss: 3745.560059, avg loss: 2.464184, ppl: 11.753890 +epoch: 2, batch: 20003, sum loss: 4407.914551, avg loss: 2.644220, ppl: 14.072462 +epoch: 2, batch: 20004, sum loss: 4085.698486, avg loss: 2.482198, ppl: 11.967546 +epoch: 2, batch: 20005, sum loss: 4279.378418, avg loss: 2.428705, ppl: 11.344184 +epoch: 2, batch: 20006, sum loss: 4264.686523, avg loss: 2.349690, ppl: 10.482317 +epoch: 2, batch: 20007, sum loss: 3517.372070, avg loss: 2.324767, ppl: 10.224297 +epoch: 2, batch: 20008, sum loss: 3595.388672, avg loss: 2.220747, ppl: 9.214207 +epoch: 2, batch: 20009, sum loss: 4640.996094, avg loss: 2.595635, ppl: 13.405103 +epoch: 2, batch: 20010, sum loss: 4090.837158, avg loss: 2.520540, ppl: 12.435316 +epoch: 2, batch: 20011, sum loss: 4308.308594, avg loss: 2.497570, ppl: 12.152930 +epoch: 2, batch: 20012, sum loss: 4411.450684, avg loss: 2.663919, ppl: 14.352432 +epoch: 2, batch: 20013, sum loss: 4311.790039, avg loss: 2.375642, ppl: 10.757918 +epoch: 2, batch: 20014, sum loss: 3193.846191, avg loss: 2.069894, ppl: 7.923982 +epoch: 2, batch: 20015, sum loss: 3991.882324, avg loss: 2.440026, ppl: 11.473340 +epoch: 2, batch: 20016, sum loss: 4261.600098, avg loss: 2.449195, ppl: 11.579026 +epoch: 2, batch: 20017, sum loss: 4414.773438, avg loss: 2.379932, ppl: 10.804165 +epoch: 2, batch: 20018, sum loss: 4757.237305, avg loss: 2.791806, ppl: 16.310450 +epoch: 2, batch: 20019, sum loss: 3781.525391, avg loss: 2.356091, ppl: 10.549627 +epoch: 2, batch: 20020, sum loss: 4647.175781, avg loss: 2.627007, ppl: 13.832312 +epoch: 2, batch: 20021, sum loss: 4365.388184, avg loss: 2.650509, ppl: 14.161243 +epoch: 2, batch: 20022, sum loss: 4176.814453, avg loss: 2.483243, ppl: 11.980050 +epoch: 2, batch: 20023, sum loss: 3681.044189, avg loss: 2.293485, ppl: 9.909416 +epoch: 2, batch: 20024, sum loss: 3636.796387, avg loss: 2.214858, ppl: 9.160107 +epoch: 2, batch: 20025, sum loss: 3138.104248, avg loss: 2.338379, ppl: 10.364419 +epoch: 2, batch: 20026, sum loss: 4995.249023, avg loss: 2.847919, ppl: 17.251835 +epoch: 2, batch: 20027, sum loss: 4079.084961, avg loss: 2.496380, ppl: 12.138474 +epoch: 2, batch: 20028, sum loss: 3764.165527, avg loss: 2.326431, ppl: 10.241325 +epoch: 2, batch: 20029, sum loss: 3876.025391, avg loss: 2.375016, ppl: 10.751183 +epoch: 2, batch: 20030, sum loss: 4828.009277, avg loss: 2.605510, ppl: 13.538121 +epoch: 2, batch: 20031, sum loss: 3829.221436, avg loss: 2.519225, ppl: 12.418963 +epoch: 2, batch: 20032, sum loss: 4617.214355, avg loss: 2.630891, ppl: 13.886142 +epoch: 2, batch: 20033, sum loss: 3808.771484, avg loss: 2.436834, ppl: 11.436773 +epoch: 2, batch: 20034, sum loss: 4692.914062, avg loss: 2.744394, ppl: 15.555185 +epoch: 2, batch: 20035, sum loss: 4174.505859, avg loss: 2.647119, ppl: 14.113314 +epoch: 2, batch: 20036, sum loss: 4508.295410, avg loss: 2.486650, ppl: 12.020932 +epoch: 2, batch: 20037, sum loss: 4520.032227, avg loss: 2.727841, ppl: 15.299814 +epoch: 2, batch: 20038, sum loss: 5168.148438, avg loss: 2.880796, ppl: 17.828463 +epoch: 2, batch: 20039, sum loss: 4134.029297, avg loss: 2.488880, ppl: 12.047771 +epoch: 2, batch: 20040, sum loss: 4690.432129, avg loss: 2.617429, ppl: 13.700452 +epoch: 2, batch: 20041, sum loss: 4616.596680, avg loss: 2.627545, ppl: 13.839750 +epoch: 2, batch: 20042, sum loss: 5233.923340, avg loss: 2.792915, ppl: 16.328554 +epoch: 2, batch: 20043, sum loss: 4766.474609, avg loss: 2.797227, ppl: 16.399107 +epoch: 2, batch: 20044, sum loss: 4441.118652, avg loss: 2.567121, ppl: 13.028256 +epoch: 2, batch: 20045, sum loss: 4189.140625, avg loss: 2.299199, ppl: 9.966197 +epoch: 2, batch: 20046, sum loss: 5136.664551, avg loss: 2.923543, ppl: 18.607090 +epoch: 2, batch: 20047, sum loss: 4177.968750, avg loss: 2.501778, ppl: 12.204169 +epoch: 2, batch: 20048, sum loss: 4043.220459, avg loss: 2.495815, ppl: 12.131620 +epoch: 2, batch: 20049, sum loss: 4118.273438, avg loss: 2.593371, ppl: 13.374787 +epoch: 2, batch: 20050, sum loss: 3247.953857, avg loss: 2.193082, ppl: 8.962790 +epoch: 2, batch: 20051, sum loss: 4347.633789, avg loss: 2.454903, ppl: 11.645308 +epoch: 2, batch: 20052, sum loss: 4273.310547, avg loss: 2.623272, ppl: 13.780743 +epoch: 2, batch: 20053, sum loss: 4504.288574, avg loss: 2.609669, ppl: 13.594550 +epoch: 2, batch: 20054, sum loss: 4333.274902, avg loss: 2.574733, ppl: 13.127806 +epoch: 2, batch: 20055, sum loss: 3927.833496, avg loss: 2.326916, ppl: 10.246290 +epoch: 2, batch: 20056, sum loss: 4993.556641, avg loss: 2.845331, ppl: 17.207260 +epoch: 2, batch: 20057, sum loss: 4663.882812, avg loss: 2.615751, ppl: 13.677478 +epoch: 2, batch: 20058, sum loss: 4818.090820, avg loss: 2.607192, ppl: 13.560919 +epoch: 2, batch: 20059, sum loss: 5043.422363, avg loss: 2.485669, ppl: 12.009153 +epoch: 2, batch: 20060, sum loss: 4940.540527, avg loss: 2.649083, ppl: 14.141070 +epoch: 2, batch: 20061, sum loss: 4038.569092, avg loss: 2.412527, ppl: 11.162127 +epoch: 2, batch: 20062, sum loss: 3851.726562, avg loss: 2.423994, ppl: 11.290866 +epoch: 2, batch: 20063, sum loss: 5292.396484, avg loss: 2.856123, ppl: 17.393963 +epoch: 2, batch: 20064, sum loss: 4029.649658, avg loss: 2.609877, ppl: 13.597377 +epoch: 2, batch: 20065, sum loss: 3786.073975, avg loss: 2.276653, ppl: 9.744013 +epoch: 2, batch: 20066, sum loss: 4624.121582, avg loss: 2.622871, ppl: 13.775214 +epoch: 2, batch: 20067, sum loss: 3971.756348, avg loss: 2.560771, ppl: 12.945798 +epoch: 2, batch: 20068, sum loss: 4089.221680, avg loss: 2.351479, ppl: 10.501087 +epoch: 2, batch: 20069, sum loss: 3795.122559, avg loss: 2.562541, ppl: 12.968723 +epoch: 2, batch: 20070, sum loss: 5326.050781, avg loss: 2.787049, ppl: 16.233046 +epoch: 2, batch: 20071, sum loss: 3372.080566, avg loss: 2.135580, ppl: 8.461951 +epoch: 2, batch: 20072, sum loss: 3286.730469, avg loss: 2.385145, ppl: 10.860642 +epoch: 2, batch: 20073, sum loss: 4512.532227, avg loss: 2.686031, ppl: 14.673324 +epoch: 2, batch: 20074, sum loss: 4680.769043, avg loss: 2.287766, ppl: 9.852901 +epoch: 2, batch: 20075, sum loss: 3837.632812, avg loss: 2.480694, ppl: 11.949550 +epoch: 2, batch: 20076, sum loss: 3298.114990, avg loss: 2.347413, ppl: 10.458477 +epoch: 2, batch: 20077, sum loss: 3326.841553, avg loss: 2.297543, ppl: 9.949701 +epoch: 2, batch: 20078, sum loss: 5083.712891, avg loss: 2.653295, ppl: 14.200750 +epoch: 2, batch: 20079, sum loss: 3997.880371, avg loss: 2.381108, ppl: 10.816882 +epoch: 2, batch: 20080, sum loss: 3700.462891, avg loss: 2.066143, ppl: 7.894318 +epoch: 2, batch: 20081, sum loss: 4928.467285, avg loss: 2.470410, ppl: 11.827291 +epoch: 2, batch: 20082, sum loss: 4670.832520, avg loss: 2.511200, ppl: 12.319710 +epoch: 2, batch: 20083, sum loss: 4453.593262, avg loss: 2.580297, ppl: 13.201061 +epoch: 2, batch: 20084, sum loss: 4519.064941, avg loss: 2.556032, ppl: 12.884595 +epoch: 2, batch: 20085, sum loss: 3445.398926, avg loss: 2.227149, ppl: 9.273388 +epoch: 2, batch: 20086, sum loss: 3806.466064, avg loss: 2.407632, ppl: 11.107626 +epoch: 2, batch: 20087, sum loss: 4886.101074, avg loss: 2.754285, ppl: 15.709799 +epoch: 2, batch: 20088, sum loss: 4821.066895, avg loss: 2.614461, ppl: 13.659858 +epoch: 2, batch: 20089, sum loss: 4850.618164, avg loss: 2.433827, ppl: 11.402440 +epoch: 2, batch: 20090, sum loss: 4784.310059, avg loss: 2.644727, ppl: 14.079594 +epoch: 2, batch: 20091, sum loss: 4696.350586, avg loss: 2.677509, ppl: 14.548804 +epoch: 2, batch: 20092, sum loss: 5001.570312, avg loss: 2.702091, ppl: 14.910877 +epoch: 2, batch: 20093, sum loss: 4800.019043, avg loss: 2.671129, ppl: 14.456285 +epoch: 2, batch: 20094, sum loss: 4311.199707, avg loss: 2.450938, ppl: 11.599221 +epoch: 2, batch: 20095, sum loss: 4275.964355, avg loss: 2.449006, ppl: 11.576835 +epoch: 2, batch: 20096, sum loss: 4839.580078, avg loss: 2.735772, ppl: 15.421642 +epoch: 2, batch: 20097, sum loss: 4470.457031, avg loss: 2.603644, ppl: 13.512891 +epoch: 2, batch: 20098, sum loss: 5051.110352, avg loss: 2.829754, ppl: 16.941290 +epoch: 2, batch: 20099, sum loss: 5069.598633, avg loss: 2.496110, ppl: 12.135193 +epoch: 2, batch: 20100, sum loss: 4084.874756, avg loss: 2.340902, ppl: 10.390608 +epoch: 2, batch: 20101, sum loss: 4400.416992, avg loss: 2.537726, ppl: 12.650869 +epoch: 2, batch: 20102, sum loss: 4157.896484, avg loss: 2.587366, ppl: 13.294703 +epoch: 2, batch: 20103, sum loss: 3633.408691, avg loss: 2.315748, ppl: 10.132499 +epoch: 2, batch: 20104, sum loss: 4709.230957, avg loss: 2.731573, ppl: 15.357018 +epoch: 2, batch: 20105, sum loss: 4397.669434, avg loss: 2.642830, ppl: 14.052922 +epoch: 2, batch: 20106, sum loss: 4148.726562, avg loss: 2.438993, ppl: 11.461488 +epoch: 2, batch: 20107, sum loss: 3490.150635, avg loss: 2.374252, ppl: 10.742975 +epoch: 2, batch: 20108, sum loss: 2718.865723, avg loss: 2.239593, ppl: 9.389507 +epoch: 2, batch: 20109, sum loss: 3667.512695, avg loss: 2.579123, ppl: 13.185567 +epoch: 2, batch: 20110, sum loss: 4105.949707, avg loss: 2.387180, ppl: 10.882762 +epoch: 2, batch: 20111, sum loss: 4612.437988, avg loss: 2.680092, ppl: 14.586430 +epoch: 2, batch: 20112, sum loss: 4282.366699, avg loss: 2.619184, ppl: 13.724526 +epoch: 2, batch: 20113, sum loss: 3889.698486, avg loss: 2.550622, ppl: 12.815072 +epoch: 2, batch: 20114, sum loss: 4026.876221, avg loss: 2.409860, ppl: 11.132404 +epoch: 2, batch: 20115, sum loss: 3446.585693, avg loss: 2.189698, ppl: 8.932519 +epoch: 2, batch: 20116, sum loss: 4443.117676, avg loss: 2.649444, ppl: 14.146173 +epoch: 2, batch: 20117, sum loss: 4636.384766, avg loss: 2.453114, ppl: 11.624484 +epoch: 2, batch: 20118, sum loss: 4179.339355, avg loss: 2.475912, ppl: 11.892550 +epoch: 2, batch: 20119, sum loss: 4409.024414, avg loss: 2.555956, ppl: 12.883615 +epoch: 2, batch: 20120, sum loss: 4217.592773, avg loss: 2.571703, ppl: 13.088094 +epoch: 2, batch: 20121, sum loss: 3701.582520, avg loss: 2.246106, ppl: 9.450862 +epoch: 2, batch: 20122, sum loss: 3975.779541, avg loss: 2.425735, ppl: 11.310537 +epoch: 2, batch: 20123, sum loss: 3729.989502, avg loss: 2.356279, ppl: 10.551615 +epoch: 2, batch: 20124, sum loss: 4013.733887, avg loss: 2.553266, ppl: 12.848998 +epoch: 2, batch: 20125, sum loss: 3757.823242, avg loss: 2.252892, ppl: 9.515212 +epoch: 2, batch: 20126, sum loss: 3590.513672, avg loss: 2.445854, ppl: 11.540401 +epoch: 2, batch: 20127, sum loss: 3349.336426, avg loss: 2.232891, ppl: 9.326790 +epoch: 2, batch: 20128, sum loss: 3978.525635, avg loss: 2.617451, ppl: 13.700758 +epoch: 2, batch: 20129, sum loss: 4214.331055, avg loss: 2.817066, ppl: 16.727703 +epoch: 2, batch: 20130, sum loss: 3000.460449, avg loss: 2.292178, ppl: 9.896465 +epoch: 2, batch: 20131, sum loss: 3789.580078, avg loss: 2.380389, ppl: 10.809112 +epoch: 2, batch: 20132, sum loss: 4199.218262, avg loss: 2.557380, ppl: 12.901973 +epoch: 2, batch: 20133, sum loss: 3717.040527, avg loss: 2.481335, ppl: 11.957221 +epoch: 2, batch: 20134, sum loss: 3973.244141, avg loss: 2.634777, ppl: 13.940207 +epoch: 2, batch: 20135, sum loss: 3974.863770, avg loss: 2.528539, ppl: 12.535184 +epoch: 2, batch: 20136, sum loss: 4127.287598, avg loss: 2.541433, ppl: 12.697855 +epoch: 2, batch: 20137, sum loss: 3891.090332, avg loss: 2.231130, ppl: 9.310377 +epoch: 2, batch: 20138, sum loss: 5412.789551, avg loss: 2.871506, ppl: 17.663607 +epoch: 2, batch: 20139, sum loss: 4564.188965, avg loss: 2.455185, ppl: 11.648587 +epoch: 2, batch: 20140, sum loss: 3814.360352, avg loss: 2.586007, ppl: 13.276653 +epoch: 2, batch: 20141, sum loss: 4942.599609, avg loss: 2.613749, ppl: 13.650129 +epoch: 2, batch: 20142, sum loss: 4114.595215, avg loss: 2.570016, ppl: 13.066032 +epoch: 2, batch: 20143, sum loss: 5232.192383, avg loss: 2.763968, ppl: 15.862669 +epoch: 2, batch: 20144, sum loss: 4216.850098, avg loss: 2.720549, ppl: 15.188653 +epoch: 2, batch: 20145, sum loss: 5020.971680, avg loss: 2.907337, ppl: 18.307987 +epoch: 2, batch: 20146, sum loss: 5089.465820, avg loss: 2.791808, ppl: 16.310484 +epoch: 2, batch: 20147, sum loss: 5002.314453, avg loss: 2.682206, ppl: 14.617306 +epoch: 2, batch: 20148, sum loss: 4714.699219, avg loss: 2.906719, ppl: 18.296677 +epoch: 2, batch: 20149, sum loss: 3875.688965, avg loss: 2.592434, ppl: 13.362255 +epoch: 2, batch: 20150, sum loss: 3654.761475, avg loss: 2.418770, ppl: 11.232037 +epoch: 2, batch: 20151, sum loss: 4112.171387, avg loss: 2.420348, ppl: 11.249776 +epoch: 2, batch: 20152, sum loss: 4082.059326, avg loss: 2.588497, ppl: 13.309748 +epoch: 2, batch: 20153, sum loss: 4390.901855, avg loss: 2.503365, ppl: 12.223555 +epoch: 2, batch: 20154, sum loss: 4659.086914, avg loss: 2.614527, ppl: 13.660753 +epoch: 2, batch: 20155, sum loss: 4184.464355, avg loss: 2.377537, ppl: 10.778318 +epoch: 2, batch: 20156, sum loss: 5022.657227, avg loss: 2.812238, ppl: 16.647137 +epoch: 2, batch: 20157, sum loss: 3936.853516, avg loss: 2.566397, ppl: 13.018835 +epoch: 2, batch: 20158, sum loss: 3482.251709, avg loss: 2.313789, ppl: 10.112665 +epoch: 2, batch: 20159, sum loss: 4024.346191, avg loss: 2.481101, ppl: 11.954422 +epoch: 2, batch: 20160, sum loss: 2962.314941, avg loss: 2.162274, ppl: 8.690877 +epoch: 2, batch: 20161, sum loss: 3592.859131, avg loss: 2.416180, ppl: 11.202978 +epoch: 2, batch: 20162, sum loss: 4068.921631, avg loss: 2.383668, ppl: 10.844610 +epoch: 2, batch: 20163, sum loss: 4114.848145, avg loss: 2.587955, ppl: 13.302537 +epoch: 2, batch: 20164, sum loss: 4179.626953, avg loss: 2.600888, ppl: 13.475699 +epoch: 2, batch: 20165, sum loss: 5448.868652, avg loss: 2.747791, ppl: 15.608109 +epoch: 2, batch: 20166, sum loss: 4539.610352, avg loss: 2.534679, ppl: 12.612381 +epoch: 2, batch: 20167, sum loss: 3610.580322, avg loss: 2.358315, ppl: 10.573123 +epoch: 2, batch: 20168, sum loss: 3289.464111, avg loss: 2.124977, ppl: 8.372704 +epoch: 2, batch: 20169, sum loss: 3639.427734, avg loss: 2.407029, ppl: 11.100933 +epoch: 2, batch: 20170, sum loss: 4194.140137, avg loss: 2.497999, ppl: 12.158140 +epoch: 2, batch: 20171, sum loss: 3940.943359, avg loss: 2.410363, ppl: 11.138003 +epoch: 2, batch: 20172, sum loss: 4437.357422, avg loss: 2.628766, ppl: 13.856665 +epoch: 2, batch: 20173, sum loss: 3779.359619, avg loss: 2.345971, ppl: 10.443412 +epoch: 2, batch: 20174, sum loss: 3935.924561, avg loss: 2.299021, ppl: 9.964425 +epoch: 2, batch: 20175, sum loss: 2858.904297, avg loss: 2.162560, ppl: 8.693364 +epoch: 2, batch: 20176, sum loss: 3770.667236, avg loss: 2.570325, ppl: 13.070073 +epoch: 2, batch: 20177, sum loss: 3865.522949, avg loss: 2.429618, ppl: 11.354548 +epoch: 2, batch: 20178, sum loss: 5423.973633, avg loss: 2.850223, ppl: 17.291630 +epoch: 2, batch: 20179, sum loss: 3590.347168, avg loss: 2.401570, ppl: 11.040495 +epoch: 2, batch: 20180, sum loss: 4569.057617, avg loss: 2.516001, ppl: 12.378991 +epoch: 2, batch: 20181, sum loss: 4623.096680, avg loss: 2.562692, ppl: 12.970690 +epoch: 2, batch: 20182, sum loss: 3720.379639, avg loss: 2.397152, ppl: 10.991826 +epoch: 2, batch: 20183, sum loss: 4184.272461, avg loss: 2.378779, ppl: 10.791720 +epoch: 2, batch: 20184, sum loss: 3578.033203, avg loss: 2.372701, ppl: 10.726324 +epoch: 2, batch: 20185, sum loss: 5289.318848, avg loss: 2.800063, ppl: 16.445681 +epoch: 2, batch: 20186, sum loss: 4298.874023, avg loss: 2.572636, ppl: 13.100307 +epoch: 2, batch: 20187, sum loss: 4065.374512, avg loss: 2.388587, ppl: 10.898079 +epoch: 2, batch: 20188, sum loss: 3798.071777, avg loss: 2.384226, ppl: 10.850659 +epoch: 2, batch: 20189, sum loss: 4781.248535, avg loss: 2.721257, ppl: 15.199415 +epoch: 2, batch: 20190, sum loss: 4514.691406, avg loss: 2.688917, ppl: 14.715729 +epoch: 2, batch: 20191, sum loss: 4845.928223, avg loss: 2.728563, ppl: 15.310874 +epoch: 2, batch: 20192, sum loss: 4232.907227, avg loss: 2.405061, ppl: 11.079103 +epoch: 2, batch: 20193, sum loss: 3649.560059, avg loss: 2.429800, ppl: 11.356613 +epoch: 2, batch: 20194, sum loss: 4335.625000, avg loss: 2.684597, ppl: 14.652303 +epoch: 2, batch: 20195, sum loss: 4832.277344, avg loss: 2.514192, ppl: 12.356625 +epoch: 2, batch: 20196, sum loss: 3975.218262, avg loss: 2.407764, ppl: 11.109093 +epoch: 2, batch: 20197, sum loss: 3190.684814, avg loss: 2.287229, ppl: 9.847615 +epoch: 2, batch: 20198, sum loss: 3583.920166, avg loss: 2.368751, ppl: 10.684040 +epoch: 2, batch: 20199, sum loss: 3841.319092, avg loss: 2.352308, ppl: 10.509799 +epoch: 2, batch: 20200, sum loss: 4774.650391, avg loss: 2.662940, ppl: 14.338375 +epoch: 2, batch: 20201, sum loss: 3365.532471, avg loss: 2.245185, ppl: 9.442163 +epoch: 2, batch: 20202, sum loss: 4826.457031, avg loss: 2.736087, ppl: 15.426500 +epoch: 2, batch: 20203, sum loss: 4057.767090, avg loss: 2.545650, ppl: 12.751520 +epoch: 2, batch: 20204, sum loss: 4430.801270, avg loss: 2.601762, ppl: 13.487486 +epoch: 2, batch: 20205, sum loss: 3406.733154, avg loss: 2.069704, ppl: 7.922480 +epoch: 2, batch: 20206, sum loss: 3829.374023, avg loss: 2.309634, ppl: 10.070743 +epoch: 2, batch: 20207, sum loss: 4891.205078, avg loss: 2.693395, ppl: 14.781770 +epoch: 2, batch: 20208, sum loss: 5100.046875, avg loss: 2.811492, ppl: 16.634722 +epoch: 2, batch: 20209, sum loss: 3869.971680, avg loss: 2.539351, ppl: 12.671451 +epoch: 2, batch: 20210, sum loss: 3886.595215, avg loss: 2.533634, ppl: 12.599214 +epoch: 2, batch: 20211, sum loss: 4984.691895, avg loss: 2.570754, ppl: 13.075681 +epoch: 2, batch: 20212, sum loss: 3982.780762, avg loss: 2.400712, ppl: 11.031025 +epoch: 2, batch: 20213, sum loss: 3867.400146, avg loss: 2.375553, ppl: 10.756959 +epoch: 2, batch: 20214, sum loss: 4070.811523, avg loss: 2.323523, ppl: 10.211584 +epoch: 2, batch: 20215, sum loss: 5085.625977, avg loss: 2.803543, ppl: 16.503008 +epoch: 2, batch: 20216, sum loss: 4178.680664, avg loss: 2.568335, ppl: 13.044086 +epoch: 2, batch: 20217, sum loss: 4688.945801, avg loss: 2.629807, ppl: 13.871093 +epoch: 2, batch: 20218, sum loss: 3849.646240, avg loss: 2.327477, ppl: 10.252040 +epoch: 2, batch: 20219, sum loss: 3432.279785, avg loss: 2.352488, ppl: 10.511688 +epoch: 2, batch: 20220, sum loss: 4042.983398, avg loss: 2.562093, ppl: 12.962927 +epoch: 2, batch: 20221, sum loss: 5014.807129, avg loss: 2.722480, ppl: 15.218010 +epoch: 2, batch: 20222, sum loss: 3677.116943, avg loss: 2.302515, ppl: 9.999302 +epoch: 2, batch: 20223, sum loss: 3797.008301, avg loss: 2.418477, ppl: 11.228740 +epoch: 2, batch: 20224, sum loss: 3051.903809, avg loss: 2.250667, ppl: 9.494062 +epoch: 2, batch: 20225, sum loss: 4196.013672, avg loss: 2.724684, ppl: 15.251597 +epoch: 2, batch: 20226, sum loss: 4445.645508, avg loss: 2.689441, ppl: 14.723439 +epoch: 2, batch: 20227, sum loss: 4162.809570, avg loss: 2.546061, ppl: 12.756753 +epoch: 2, batch: 20228, sum loss: 4249.176270, avg loss: 2.415677, ppl: 11.197351 +epoch: 2, batch: 20229, sum loss: 4875.262207, avg loss: 2.635277, ppl: 13.947176 +epoch: 2, batch: 20230, sum loss: 4694.109375, avg loss: 2.542855, ppl: 12.715918 +epoch: 2, batch: 20231, sum loss: 3999.987305, avg loss: 2.341913, ppl: 10.401113 +epoch: 2, batch: 20232, sum loss: 4185.150391, avg loss: 2.461853, ppl: 11.726523 +epoch: 2, batch: 20233, sum loss: 3466.209961, avg loss: 2.465299, ppl: 11.766995 +epoch: 2, batch: 20234, sum loss: 3605.086670, avg loss: 2.340965, ppl: 10.391262 +epoch: 2, batch: 20235, sum loss: 4448.073242, avg loss: 2.537406, ppl: 12.646828 +epoch: 2, batch: 20236, sum loss: 4282.100586, avg loss: 2.559534, ppl: 12.929794 +epoch: 2, batch: 20237, sum loss: 4416.984863, avg loss: 2.676960, ppl: 14.540829 +epoch: 2, batch: 20238, sum loss: 3444.439941, avg loss: 2.367313, ppl: 10.668683 +epoch: 2, batch: 20239, sum loss: 4506.178711, avg loss: 2.547303, ppl: 12.772609 +epoch: 2, batch: 20240, sum loss: 4942.895508, avg loss: 2.816465, ppl: 16.717644 +epoch: 2, batch: 20241, sum loss: 4137.075684, avg loss: 2.511886, ppl: 12.328155 +epoch: 2, batch: 20242, sum loss: 4854.131348, avg loss: 2.891085, ppl: 18.012836 +epoch: 2, batch: 20243, sum loss: 4466.566406, avg loss: 2.398801, ppl: 11.009963 +epoch: 2, batch: 20244, sum loss: 3930.617188, avg loss: 2.341047, ppl: 10.392107 +epoch: 2, batch: 20245, sum loss: 4123.966797, avg loss: 2.347164, ppl: 10.455872 +epoch: 2, batch: 20246, sum loss: 4277.434570, avg loss: 2.468225, ppl: 11.801483 +epoch: 2, batch: 20247, sum loss: 4649.145996, avg loss: 2.482192, ppl: 11.967472 +epoch: 2, batch: 20248, sum loss: 3683.072021, avg loss: 2.297612, ppl: 9.950392 +epoch: 2, batch: 20249, sum loss: 4048.830078, avg loss: 2.549641, ppl: 12.802508 +epoch: 2, batch: 20250, sum loss: 4326.073730, avg loss: 2.531348, ppl: 12.570439 +epoch: 2, batch: 20251, sum loss: 4545.129883, avg loss: 2.470179, ppl: 11.824564 +epoch: 2, batch: 20252, sum loss: 3952.391602, avg loss: 2.627920, ppl: 13.844941 +epoch: 2, batch: 20253, sum loss: 3860.240234, avg loss: 2.423252, ppl: 11.282489 +epoch: 2, batch: 20254, sum loss: 4104.720215, avg loss: 2.489218, ppl: 12.051845 +epoch: 2, batch: 20255, sum loss: 3295.987305, avg loss: 2.157060, ppl: 8.645679 +epoch: 2, batch: 20256, sum loss: 4700.899414, avg loss: 2.852488, ppl: 17.330839 +epoch: 2, batch: 20257, sum loss: 4234.609863, avg loss: 2.664953, ppl: 14.367270 +epoch: 2, batch: 20258, sum loss: 4988.512695, avg loss: 2.850579, ppl: 17.297787 +epoch: 2, batch: 20259, sum loss: 3954.463135, avg loss: 2.312551, ppl: 10.100163 +epoch: 2, batch: 20260, sum loss: 4818.546387, avg loss: 2.767689, ppl: 15.921796 +epoch: 2, batch: 20261, sum loss: 4996.016113, avg loss: 2.585930, ppl: 13.275625 +epoch: 2, batch: 20262, sum loss: 5309.581055, avg loss: 2.631111, ppl: 13.889188 +epoch: 2, batch: 20263, sum loss: 4284.325195, avg loss: 2.567001, ppl: 13.026703 +epoch: 2, batch: 20264, sum loss: 4851.931641, avg loss: 2.642664, ppl: 14.050588 +epoch: 2, batch: 20265, sum loss: 4254.094727, avg loss: 2.505356, ppl: 12.247922 +epoch: 2, batch: 20266, sum loss: 4233.276367, avg loss: 2.488698, ppl: 12.045588 +epoch: 2, batch: 20267, sum loss: 4064.343506, avg loss: 2.521305, ppl: 12.444824 +epoch: 2, batch: 20268, sum loss: 4155.075684, avg loss: 2.504566, ppl: 12.238252 +epoch: 2, batch: 20269, sum loss: 4521.122559, avg loss: 2.337706, ppl: 10.357446 +epoch: 2, batch: 20270, sum loss: 4074.888672, avg loss: 2.356789, ppl: 10.557002 +epoch: 2, batch: 20271, sum loss: 5363.711426, avg loss: 2.863701, ppl: 17.526270 +epoch: 2, batch: 20272, sum loss: 4544.505859, avg loss: 2.560285, ppl: 12.939503 +epoch: 2, batch: 20273, sum loss: 3821.750977, avg loss: 2.388594, ppl: 10.898165 +epoch: 2, batch: 20274, sum loss: 5154.561523, avg loss: 2.667993, ppl: 14.411012 +epoch: 2, batch: 20275, sum loss: 3928.320312, avg loss: 2.442985, ppl: 11.507342 +epoch: 2, batch: 20276, sum loss: 4191.830566, avg loss: 2.459995, ppl: 11.704748 +epoch: 2, batch: 20277, sum loss: 4891.862305, avg loss: 2.661514, ppl: 14.317947 +epoch: 2, batch: 20278, sum loss: 3757.122314, avg loss: 2.358520, ppl: 10.575289 +epoch: 2, batch: 20279, sum loss: 4205.722656, avg loss: 2.498944, ppl: 12.169634 +epoch: 2, batch: 20280, sum loss: 3932.102295, avg loss: 2.353143, ppl: 10.518582 +epoch: 2, batch: 20281, sum loss: 4549.553711, avg loss: 2.384462, ppl: 10.853224 +epoch: 2, batch: 20282, sum loss: 4609.788086, avg loss: 2.745556, ppl: 15.573265 +epoch: 2, batch: 20283, sum loss: 3133.449951, avg loss: 2.197370, ppl: 9.001311 +epoch: 2, batch: 20284, sum loss: 4356.089844, avg loss: 2.727671, ppl: 15.297214 +epoch: 2, batch: 20285, sum loss: 4290.523438, avg loss: 2.583097, ppl: 13.238067 +epoch: 2, batch: 20286, sum loss: 4722.208984, avg loss: 2.584679, ppl: 13.259037 +epoch: 2, batch: 20287, sum loss: 3774.682373, avg loss: 2.145925, ppl: 8.549949 +epoch: 2, batch: 20288, sum loss: 3844.452637, avg loss: 2.327151, ppl: 10.248697 +epoch: 2, batch: 20289, sum loss: 4023.924316, avg loss: 2.548400, ppl: 12.786633 +epoch: 2, batch: 20290, sum loss: 3799.896973, avg loss: 2.370491, ppl: 10.702643 +epoch: 2, batch: 20291, sum loss: 4458.691406, avg loss: 2.880292, ppl: 17.819469 +epoch: 2, batch: 20292, sum loss: 3964.737549, avg loss: 2.576178, ppl: 13.146792 +epoch: 2, batch: 20293, sum loss: 3257.898682, avg loss: 2.409688, ppl: 11.130493 +epoch: 2, batch: 20294, sum loss: 4215.458008, avg loss: 2.611808, ppl: 13.623661 +epoch: 2, batch: 20295, sum loss: 4012.105469, avg loss: 2.416931, ppl: 11.211400 +epoch: 2, batch: 20296, sum loss: 4758.726562, avg loss: 2.820822, ppl: 16.790648 +epoch: 2, batch: 20297, sum loss: 4835.671387, avg loss: 2.569432, ppl: 13.058409 +epoch: 2, batch: 20298, sum loss: 4191.452148, avg loss: 2.529543, ppl: 12.547767 +epoch: 2, batch: 20299, sum loss: 5763.106445, avg loss: 2.831993, ppl: 16.979273 +epoch: 2, batch: 20300, sum loss: 4584.407227, avg loss: 2.537027, ppl: 12.642026 +epoch: 2, batch: 20301, sum loss: 4307.663086, avg loss: 2.415964, ppl: 11.200559 +epoch: 2, batch: 20302, sum loss: 5489.747559, avg loss: 2.650771, ppl: 14.164961 +epoch: 2, batch: 20303, sum loss: 4100.208984, avg loss: 2.577127, ppl: 13.159277 +epoch: 2, batch: 20304, sum loss: 4538.750000, avg loss: 2.655793, ppl: 14.236270 +epoch: 2, batch: 20305, sum loss: 4007.237305, avg loss: 2.261421, ppl: 9.596714 +epoch: 2, batch: 20306, sum loss: 5041.233398, avg loss: 2.705976, ppl: 14.968920 +epoch: 2, batch: 20307, sum loss: 3279.349365, avg loss: 2.236937, ppl: 9.364602 +epoch: 2, batch: 20308, sum loss: 5194.837402, avg loss: 2.773538, ppl: 16.015202 +epoch: 2, batch: 20309, sum loss: 4533.109375, avg loss: 2.514204, ppl: 12.356767 +epoch: 2, batch: 20310, sum loss: 4321.738281, avg loss: 2.338603, ppl: 10.366744 +epoch: 2, batch: 20311, sum loss: 3214.836914, avg loss: 2.332973, ppl: 10.308543 +epoch: 2, batch: 20312, sum loss: 4154.154297, avg loss: 2.427910, ppl: 11.335171 +epoch: 2, batch: 20313, sum loss: 4269.638672, avg loss: 2.505656, ppl: 12.251599 +epoch: 2, batch: 20314, sum loss: 5511.581543, avg loss: 2.683341, ppl: 14.633897 +epoch: 2, batch: 20315, sum loss: 3914.396240, avg loss: 2.325845, ppl: 10.235320 +epoch: 2, batch: 20316, sum loss: 4743.000977, avg loss: 2.590389, ppl: 13.334951 +epoch: 2, batch: 20317, sum loss: 3938.899658, avg loss: 2.259839, ppl: 9.581547 +epoch: 2, batch: 20318, sum loss: 5121.004883, avg loss: 2.647882, ppl: 14.124099 +epoch: 2, batch: 20319, sum loss: 4365.729004, avg loss: 2.693232, ppl: 14.779363 +epoch: 2, batch: 20320, sum loss: 3880.083496, avg loss: 2.305457, ppl: 10.028757 +epoch: 2, batch: 20321, sum loss: 3810.712646, avg loss: 2.436517, ppl: 11.433149 +epoch: 2, batch: 20322, sum loss: 4567.629883, avg loss: 2.547479, ppl: 12.774859 +epoch: 2, batch: 20323, sum loss: 3397.057373, avg loss: 2.208750, ppl: 9.104329 +epoch: 2, batch: 20324, sum loss: 4716.191406, avg loss: 2.605631, ppl: 13.539764 +epoch: 2, batch: 20325, sum loss: 4187.367188, avg loss: 2.437350, ppl: 11.442677 +epoch: 2, batch: 20326, sum loss: 4253.910645, avg loss: 2.732120, ppl: 15.365428 +epoch: 2, batch: 20327, sum loss: 4167.909180, avg loss: 2.566447, ppl: 13.019478 +epoch: 2, batch: 20328, sum loss: 3548.481934, avg loss: 2.418870, ppl: 11.233155 +epoch: 2, batch: 20329, sum loss: 4485.919922, avg loss: 2.652821, ppl: 14.194021 +epoch: 2, batch: 20330, sum loss: 4571.871094, avg loss: 2.613991, ppl: 13.653440 +epoch: 2, batch: 20331, sum loss: 4619.232422, avg loss: 2.542230, ppl: 12.707983 +epoch: 2, batch: 20332, sum loss: 4566.608398, avg loss: 2.621474, ppl: 13.755989 +epoch: 2, batch: 20333, sum loss: 4408.876953, avg loss: 2.441239, ppl: 11.487261 +epoch: 2, batch: 20334, sum loss: 3865.268066, avg loss: 2.364078, ppl: 10.634233 +epoch: 2, batch: 20335, sum loss: 4465.481934, avg loss: 2.625210, ppl: 13.807471 +epoch: 2, batch: 20336, sum loss: 4110.222656, avg loss: 2.458267, ppl: 11.684547 +epoch: 2, batch: 20337, sum loss: 4655.416016, avg loss: 2.690992, ppl: 14.746295 +epoch: 2, batch: 20338, sum loss: 3792.092285, avg loss: 2.491519, ppl: 12.079617 +epoch: 2, batch: 20339, sum loss: 4210.386719, avg loss: 2.393625, ppl: 10.953130 +epoch: 2, batch: 20340, sum loss: 4852.187988, avg loss: 2.628488, ppl: 13.852803 +epoch: 2, batch: 20341, sum loss: 4045.291992, avg loss: 2.805334, ppl: 16.532602 +epoch: 2, batch: 20342, sum loss: 3501.311279, avg loss: 2.155980, ppl: 8.636349 +epoch: 2, batch: 20343, sum loss: 5921.675293, avg loss: 2.813147, ppl: 16.662277 +epoch: 2, batch: 20344, sum loss: 4259.843750, avg loss: 2.541673, ppl: 12.700901 +epoch: 2, batch: 20345, sum loss: 5346.343262, avg loss: 3.079691, ppl: 21.751678 +epoch: 2, batch: 20346, sum loss: 4312.502930, avg loss: 2.488461, ppl: 12.042728 +epoch: 2, batch: 20347, sum loss: 4913.091797, avg loss: 2.804276, ppl: 16.515118 +epoch: 2, batch: 20348, sum loss: 4576.586426, avg loss: 2.651556, ppl: 14.176086 +epoch: 2, batch: 20349, sum loss: 3973.163818, avg loss: 2.440518, ppl: 11.478990 +epoch: 2, batch: 20350, sum loss: 4119.063477, avg loss: 2.353750, ppl: 10.524969 +epoch: 2, batch: 20351, sum loss: 4680.816895, avg loss: 2.545306, ppl: 12.747125 +epoch: 2, batch: 20352, sum loss: 4902.605957, avg loss: 2.583038, ppl: 13.237293 +epoch: 2, batch: 20353, sum loss: 3647.145996, avg loss: 2.407357, ppl: 11.104575 +epoch: 2, batch: 20354, sum loss: 4213.244629, avg loss: 2.446716, ppl: 11.550351 +epoch: 2, batch: 20355, sum loss: 4456.274414, avg loss: 2.618258, ppl: 13.711810 +epoch: 2, batch: 20356, sum loss: 3997.334961, avg loss: 2.485905, ppl: 12.011983 +epoch: 2, batch: 20357, sum loss: 4861.235840, avg loss: 2.720334, ppl: 15.185387 +epoch: 2, batch: 20358, sum loss: 5683.254395, avg loss: 2.749518, ppl: 15.635100 +epoch: 2, batch: 20359, sum loss: 3968.567871, avg loss: 2.439193, ppl: 11.463791 +epoch: 2, batch: 20360, sum loss: 4639.774414, avg loss: 2.603689, ppl: 13.513503 +epoch: 2, batch: 20361, sum loss: 4114.226074, avg loss: 2.465085, ppl: 11.764477 +epoch: 2, batch: 20362, sum loss: 3527.457520, avg loss: 2.447923, ppl: 11.564307 +epoch: 2, batch: 20363, sum loss: 3607.027344, avg loss: 2.218344, ppl: 9.192096 +epoch: 2, batch: 20364, sum loss: 3868.910645, avg loss: 2.407536, ppl: 11.106564 +epoch: 2, batch: 20365, sum loss: 3784.063232, avg loss: 2.398012, ppl: 11.001286 +epoch: 2, batch: 20366, sum loss: 4757.119141, avg loss: 2.665053, ppl: 14.368710 +epoch: 2, batch: 20367, sum loss: 3453.361816, avg loss: 2.129076, ppl: 8.407097 +epoch: 2, batch: 20368, sum loss: 3968.419434, avg loss: 2.443608, ppl: 11.514511 +epoch: 2, batch: 20369, sum loss: 5044.739746, avg loss: 2.871223, ppl: 17.658609 +epoch: 2, batch: 20370, sum loss: 4959.087402, avg loss: 2.545733, ppl: 12.752569 +epoch: 2, batch: 20371, sum loss: 4050.951660, avg loss: 2.351104, ppl: 10.497149 +epoch: 2, batch: 20372, sum loss: 4646.307617, avg loss: 2.608820, ppl: 13.583010 +epoch: 2, batch: 20373, sum loss: 4544.336914, avg loss: 2.726057, ppl: 15.272546 +epoch: 2, batch: 20374, sum loss: 3705.962891, avg loss: 2.268031, ppl: 9.660362 +epoch: 2, batch: 20375, sum loss: 4054.609375, avg loss: 2.624343, ppl: 13.795504 +epoch: 2, batch: 20376, sum loss: 3582.309082, avg loss: 2.244555, ppl: 9.436211 +epoch: 2, batch: 20377, sum loss: 3435.051025, avg loss: 2.314724, ppl: 10.122133 +epoch: 2, batch: 20378, sum loss: 3850.907471, avg loss: 2.286762, ppl: 9.843017 +epoch: 2, batch: 20379, sum loss: 5110.177246, avg loss: 2.861242, ppl: 17.483221 +epoch: 2, batch: 20380, sum loss: 5268.814453, avg loss: 2.747036, ppl: 15.596332 +epoch: 2, batch: 20381, sum loss: 3833.058105, avg loss: 2.452373, ppl: 11.615873 +epoch: 2, batch: 20382, sum loss: 4093.759277, avg loss: 2.508431, ppl: 12.285638 +epoch: 2, batch: 20383, sum loss: 4322.543945, avg loss: 2.566831, ppl: 13.024489 +epoch: 2, batch: 20384, sum loss: 3923.507324, avg loss: 2.398232, ppl: 11.003704 +epoch: 2, batch: 20385, sum loss: 4738.285645, avg loss: 2.704501, ppl: 14.946858 +epoch: 2, batch: 20386, sum loss: 3607.552002, avg loss: 2.239325, ppl: 9.386992 +epoch: 2, batch: 20387, sum loss: 4563.871582, avg loss: 2.596059, ppl: 13.410780 +epoch: 2, batch: 20388, sum loss: 3518.171875, avg loss: 2.343885, ppl: 10.421651 +epoch: 2, batch: 20389, sum loss: 3824.329346, avg loss: 2.472094, ppl: 11.847230 +epoch: 2, batch: 20390, sum loss: 4272.010254, avg loss: 2.374658, ppl: 10.747341 +epoch: 2, batch: 20391, sum loss: 5615.590332, avg loss: 2.878314, ppl: 17.784264 +epoch: 2, batch: 20392, sum loss: 4372.848145, avg loss: 2.532049, ppl: 12.579251 +epoch: 2, batch: 20393, sum loss: 4126.952148, avg loss: 2.647179, ppl: 14.114168 +epoch: 2, batch: 20394, sum loss: 5469.241211, avg loss: 2.455878, ppl: 11.656670 +epoch: 2, batch: 20395, sum loss: 4120.372070, avg loss: 2.354498, ppl: 10.532845 +epoch: 2, batch: 20396, sum loss: 4052.187988, avg loss: 2.512206, ppl: 12.332103 +epoch: 2, batch: 20397, sum loss: 3926.752441, avg loss: 2.635404, ppl: 13.948952 +epoch: 2, batch: 20398, sum loss: 4386.737793, avg loss: 2.503846, ppl: 12.229437 +epoch: 2, batch: 20399, sum loss: 3513.113770, avg loss: 2.318887, ppl: 10.164355 +epoch: 2, batch: 20400, sum loss: 5111.500000, avg loss: 2.634794, ppl: 13.940440 +epoch: 2, batch: 20401, sum loss: 3582.033936, avg loss: 2.482352, ppl: 11.969383 +epoch: 2, batch: 20402, sum loss: 4540.964844, avg loss: 2.481402, ppl: 11.958014 +epoch: 2, batch: 20403, sum loss: 3055.016602, avg loss: 2.212177, ppl: 9.135583 +epoch: 2, batch: 20404, sum loss: 4019.326660, avg loss: 2.401031, ppl: 11.034553 +epoch: 2, batch: 20405, sum loss: 4082.691162, avg loss: 2.689520, ppl: 14.724605 +epoch: 2, batch: 20406, sum loss: 3556.185059, avg loss: 2.307713, ppl: 10.051406 +epoch: 2, batch: 20407, sum loss: 4072.999756, avg loss: 2.424405, ppl: 11.295503 +epoch: 2, batch: 20408, sum loss: 4760.423340, avg loss: 2.677403, ppl: 14.547272 +epoch: 2, batch: 20409, sum loss: 3826.799316, avg loss: 2.327737, ppl: 10.254707 +epoch: 2, batch: 20410, sum loss: 4193.999023, avg loss: 2.563569, ppl: 12.982072 +epoch: 2, batch: 20411, sum loss: 3519.968018, avg loss: 2.441032, ppl: 11.484886 +epoch: 2, batch: 20412, sum loss: 4117.969727, avg loss: 2.546673, ppl: 12.764563 +epoch: 2, batch: 20413, sum loss: 3218.381836, avg loss: 2.401778, ppl: 11.042788 +epoch: 2, batch: 20414, sum loss: 4248.184570, avg loss: 2.340598, ppl: 10.387443 +epoch: 2, batch: 20415, sum loss: 4485.577148, avg loss: 2.592819, ppl: 13.367404 +epoch: 2, batch: 20416, sum loss: 4393.182617, avg loss: 2.798206, ppl: 16.415165 +epoch: 2, batch: 20417, sum loss: 5219.052734, avg loss: 2.684698, ppl: 14.653777 +epoch: 2, batch: 20418, sum loss: 4009.757812, avg loss: 2.502970, ppl: 12.218730 +epoch: 2, batch: 20419, sum loss: 3943.444580, avg loss: 2.514952, ppl: 12.366014 +epoch: 2, batch: 20420, sum loss: 4891.127930, avg loss: 2.772748, ppl: 16.002552 +epoch: 2, batch: 20421, sum loss: 4025.982422, avg loss: 2.319114, ppl: 10.166665 +epoch: 2, batch: 20422, sum loss: 4092.438965, avg loss: 2.687091, ppl: 14.688878 +epoch: 2, batch: 20423, sum loss: 3812.976074, avg loss: 2.428647, ppl: 11.343527 +epoch: 2, batch: 20424, sum loss: 5145.335938, avg loss: 2.539653, ppl: 12.675266 +epoch: 2, batch: 20425, sum loss: 5187.921387, avg loss: 2.696425, ppl: 14.826632 +epoch: 2, batch: 20426, sum loss: 3775.123535, avg loss: 2.287954, ppl: 9.854751 +epoch: 2, batch: 20427, sum loss: 4866.782227, avg loss: 2.585963, ppl: 13.276068 +epoch: 2, batch: 20428, sum loss: 3788.181641, avg loss: 2.185910, ppl: 8.898741 +epoch: 2, batch: 20429, sum loss: 3878.647461, avg loss: 2.725683, ppl: 15.266845 +epoch: 2, batch: 20430, sum loss: 3183.395996, avg loss: 2.183399, ppl: 8.876428 +epoch: 2, batch: 20431, sum loss: 4372.107910, avg loss: 2.619597, ppl: 13.730195 +epoch: 2, batch: 20432, sum loss: 4225.376465, avg loss: 2.478227, ppl: 11.920107 +epoch: 2, batch: 20433, sum loss: 3614.575928, avg loss: 2.308158, ppl: 10.055886 +epoch: 2, batch: 20434, sum loss: 3739.535645, avg loss: 2.421979, ppl: 11.268139 +epoch: 2, batch: 20435, sum loss: 4033.630615, avg loss: 2.323520, ppl: 10.211555 +epoch: 2, batch: 20436, sum loss: 3948.062256, avg loss: 2.410294, ppl: 11.137239 +epoch: 2, batch: 20437, sum loss: 4130.564453, avg loss: 2.373888, ppl: 10.739059 +epoch: 2, batch: 20438, sum loss: 4689.629395, avg loss: 2.417335, ppl: 11.215927 +epoch: 2, batch: 20439, sum loss: 4184.684570, avg loss: 2.610533, ppl: 13.606301 +epoch: 2, batch: 20440, sum loss: 3583.731445, avg loss: 2.105600, ppl: 8.212029 +epoch: 2, batch: 20441, sum loss: 4903.678223, avg loss: 2.756424, ppl: 15.743443 +epoch: 2, batch: 20442, sum loss: 5473.837891, avg loss: 2.795627, ppl: 16.372892 +epoch: 2, batch: 20443, sum loss: 4643.657227, avg loss: 2.755880, ppl: 15.734876 +epoch: 2, batch: 20444, sum loss: 4220.124023, avg loss: 2.425359, ppl: 11.306282 +epoch: 2, batch: 20445, sum loss: 4007.556396, avg loss: 2.398299, ppl: 11.004436 +epoch: 2, batch: 20446, sum loss: 4382.765137, avg loss: 2.573555, ppl: 13.112362 +epoch: 2, batch: 20447, sum loss: 4001.560791, avg loss: 2.571697, ppl: 13.088019 +epoch: 2, batch: 20448, sum loss: 4273.445312, avg loss: 2.557418, ppl: 12.902461 +epoch: 2, batch: 20449, sum loss: 4191.581055, avg loss: 2.685190, ppl: 14.660983 +epoch: 2, batch: 20450, sum loss: 3923.939941, avg loss: 2.515346, ppl: 12.370892 +epoch: 2, batch: 20451, sum loss: 3623.682373, avg loss: 2.339369, ppl: 10.374686 +epoch: 2, batch: 20452, sum loss: 4450.424805, avg loss: 2.630275, ppl: 13.877583 +epoch: 2, batch: 20453, sum loss: 4055.552979, avg loss: 2.472898, ppl: 11.856761 +epoch: 2, batch: 20454, sum loss: 4032.585205, avg loss: 2.665291, ppl: 14.372132 +epoch: 2, batch: 20455, sum loss: 4669.061523, avg loss: 2.620124, ppl: 13.737432 +epoch: 2, batch: 20456, sum loss: 3958.589355, avg loss: 2.532687, ppl: 12.587279 +epoch: 2, batch: 20457, sum loss: 4159.561523, avg loss: 2.639316, ppl: 14.003616 +epoch: 2, batch: 20458, sum loss: 4579.051758, avg loss: 2.592895, ppl: 13.368411 +epoch: 2, batch: 20459, sum loss: 4231.965332, avg loss: 2.602685, ppl: 13.499933 +epoch: 2, batch: 20460, sum loss: 4777.834961, avg loss: 2.808839, ppl: 16.590643 +epoch: 2, batch: 20461, sum loss: 4455.279785, avg loss: 2.495955, ppl: 12.133318 +epoch: 2, batch: 20462, sum loss: 4958.620117, avg loss: 2.629173, ppl: 13.862302 +epoch: 2, batch: 20463, sum loss: 3827.190430, avg loss: 2.519546, ppl: 12.422956 +epoch: 2, batch: 20464, sum loss: 3894.799072, avg loss: 2.411640, ppl: 11.152241 +epoch: 2, batch: 20465, sum loss: 3674.341553, avg loss: 2.303663, ppl: 10.010780 +epoch: 2, batch: 20466, sum loss: 3432.924805, avg loss: 2.327407, ppl: 10.251322 +epoch: 2, batch: 20467, sum loss: 4541.135742, avg loss: 2.591973, ppl: 13.356091 +epoch: 2, batch: 20468, sum loss: 4217.452637, avg loss: 2.639207, ppl: 14.002094 +epoch: 2, batch: 20469, sum loss: 5673.244629, avg loss: 2.796079, ppl: 16.380297 +epoch: 2, batch: 20470, sum loss: 4585.891602, avg loss: 2.521106, ppl: 12.442350 +epoch: 2, batch: 20471, sum loss: 3533.993652, avg loss: 2.196391, ppl: 8.992504 +epoch: 2, batch: 20472, sum loss: 3823.894043, avg loss: 2.507472, ppl: 12.273857 +epoch: 2, batch: 20473, sum loss: 4447.777832, avg loss: 2.645912, ppl: 14.096291 +epoch: 2, batch: 20474, sum loss: 4227.868652, avg loss: 2.587435, ppl: 13.295625 +epoch: 2, batch: 20475, sum loss: 4106.965332, avg loss: 2.418708, ppl: 11.231335 +epoch: 2, batch: 20476, sum loss: 4594.493164, avg loss: 2.566756, ppl: 13.023504 +epoch: 2, batch: 20477, sum loss: 4102.361328, avg loss: 2.507556, ppl: 12.274893 +epoch: 2, batch: 20478, sum loss: 4634.393066, avg loss: 2.677292, ppl: 14.545655 +epoch: 2, batch: 20479, sum loss: 4130.237793, avg loss: 2.337430, ppl: 10.354586 +epoch: 2, batch: 20480, sum loss: 5077.170898, avg loss: 2.581175, ppl: 13.212648 +epoch: 2, batch: 20481, sum loss: 3836.991211, avg loss: 2.289374, ppl: 9.868761 +epoch: 2, batch: 20482, sum loss: 3936.032715, avg loss: 2.592907, ppl: 13.368577 +epoch: 2, batch: 20483, sum loss: 4280.874512, avg loss: 2.483106, ppl: 11.978414 +epoch: 2, batch: 20484, sum loss: 4585.274902, avg loss: 2.568782, ppl: 13.049915 +epoch: 2, batch: 20485, sum loss: 4605.470703, avg loss: 2.657513, ppl: 14.260784 +epoch: 2, batch: 20486, sum loss: 5675.377441, avg loss: 2.882365, ppl: 17.856462 +epoch: 2, batch: 20487, sum loss: 4634.966797, avg loss: 2.539708, ppl: 12.675968 +epoch: 2, batch: 20488, sum loss: 4109.811523, avg loss: 2.254422, ppl: 9.529785 +epoch: 2, batch: 20489, sum loss: 3458.602783, avg loss: 2.418603, ppl: 11.230165 +epoch: 2, batch: 20490, sum loss: 4586.992676, avg loss: 2.673073, ppl: 14.484408 +epoch: 2, batch: 20491, sum loss: 4488.019531, avg loss: 2.389787, ppl: 10.911166 +epoch: 2, batch: 20492, sum loss: 4313.713867, avg loss: 2.515285, ppl: 12.370134 +epoch: 2, batch: 20493, sum loss: 4567.022949, avg loss: 2.584620, ppl: 13.258246 +epoch: 2, batch: 20494, sum loss: 3988.827148, avg loss: 2.381390, ppl: 10.819928 +epoch: 2, batch: 20495, sum loss: 4380.061035, avg loss: 2.504323, ppl: 12.235273 +epoch: 2, batch: 20496, sum loss: 3761.685547, avg loss: 2.590693, ppl: 13.339005 +epoch: 2, batch: 20497, sum loss: 4274.063965, avg loss: 2.427066, ppl: 11.325607 +epoch: 2, batch: 20498, sum loss: 4185.307617, avg loss: 2.693248, ppl: 14.779606 +epoch: 2, batch: 20499, sum loss: 4741.591309, avg loss: 2.578353, ppl: 13.175423 +epoch: 2, batch: 20500, sum loss: 4682.149902, avg loss: 2.695538, ppl: 14.813491 +epoch: 2, batch: 20501, sum loss: 4277.056641, avg loss: 2.703575, ppl: 14.933027 +epoch: 2, batch: 20502, sum loss: 4060.824219, avg loss: 2.453670, ppl: 11.630958 +epoch: 2, batch: 20503, sum loss: 4677.408203, avg loss: 2.503966, ppl: 12.230904 +epoch: 2, batch: 20504, sum loss: 3719.442627, avg loss: 2.496270, ppl: 12.137143 +epoch: 2, batch: 20505, sum loss: 3670.536133, avg loss: 2.333462, ppl: 10.313588 +epoch: 2, batch: 20506, sum loss: 4136.265137, avg loss: 2.612928, ppl: 13.638927 +epoch: 2, batch: 20507, sum loss: 4624.677246, avg loss: 2.838967, ppl: 17.098091 +epoch: 2, batch: 20508, sum loss: 5081.707031, avg loss: 2.639848, ppl: 14.011070 +epoch: 2, batch: 20509, sum loss: 3045.609375, avg loss: 2.208564, ppl: 9.102632 +epoch: 2, batch: 20510, sum loss: 3804.113770, avg loss: 2.299948, ppl: 9.973663 +epoch: 2, batch: 20511, sum loss: 4059.658936, avg loss: 2.442635, ppl: 11.503310 +epoch: 2, batch: 20512, sum loss: 4666.332520, avg loss: 2.589530, ppl: 13.323504 +epoch: 2, batch: 20513, sum loss: 4454.371094, avg loss: 2.525154, ppl: 12.492814 +epoch: 2, batch: 20514, sum loss: 3892.021484, avg loss: 2.270724, ppl: 9.686414 +epoch: 2, batch: 20515, sum loss: 4207.194824, avg loss: 2.466116, ppl: 11.776623 +epoch: 2, batch: 20516, sum loss: 3566.242432, avg loss: 2.601198, ppl: 13.479877 +epoch: 2, batch: 20517, sum loss: 4180.397461, avg loss: 2.671180, ppl: 14.457026 +epoch: 2, batch: 20518, sum loss: 4242.316406, avg loss: 2.729933, ppl: 15.331863 +epoch: 2, batch: 20519, sum loss: 4606.213379, avg loss: 2.741794, ppl: 15.514791 +epoch: 2, batch: 20520, sum loss: 3087.382324, avg loss: 2.206850, ppl: 9.087043 +epoch: 2, batch: 20521, sum loss: 4611.109863, avg loss: 2.741445, ppl: 15.509374 +epoch: 2, batch: 20522, sum loss: 4691.126953, avg loss: 2.730575, ppl: 15.341700 +epoch: 2, batch: 20523, sum loss: 3425.152832, avg loss: 2.271322, ppl: 9.692201 +epoch: 2, batch: 20524, sum loss: 4524.753418, avg loss: 2.658492, ppl: 14.274748 +epoch: 2, batch: 20525, sum loss: 4302.152344, avg loss: 2.483922, ppl: 11.988187 +epoch: 2, batch: 20526, sum loss: 4336.623047, avg loss: 2.502379, ppl: 12.211510 +epoch: 2, batch: 20527, sum loss: 4567.010254, avg loss: 2.431848, ppl: 11.379890 +epoch: 2, batch: 20528, sum loss: 3710.533936, avg loss: 2.622285, ppl: 13.767151 +epoch: 2, batch: 20529, sum loss: 4635.710938, avg loss: 2.647465, ppl: 14.118203 +epoch: 2, batch: 20530, sum loss: 4192.300781, avg loss: 2.314909, ppl: 10.124006 +epoch: 2, batch: 20531, sum loss: 5301.164062, avg loss: 2.670612, ppl: 14.448804 +epoch: 2, batch: 20532, sum loss: 5044.326660, avg loss: 2.953353, ppl: 19.170118 +epoch: 2, batch: 20533, sum loss: 4560.731934, avg loss: 2.619605, ppl: 13.730297 +epoch: 2, batch: 20534, sum loss: 4042.132324, avg loss: 2.473765, ppl: 11.867043 +epoch: 2, batch: 20535, sum loss: 3666.859619, avg loss: 2.452749, ppl: 11.620244 +epoch: 2, batch: 20536, sum loss: 4212.926758, avg loss: 2.556388, ppl: 12.889174 +epoch: 2, batch: 20537, sum loss: 3700.618896, avg loss: 2.552151, ppl: 12.834682 +epoch: 2, batch: 20538, sum loss: 4152.282715, avg loss: 2.574261, ppl: 13.121616 +epoch: 2, batch: 20539, sum loss: 4726.725098, avg loss: 2.662944, ppl: 14.338433 +epoch: 2, batch: 20540, sum loss: 3900.022217, avg loss: 2.559070, ppl: 12.923788 +epoch: 2, batch: 20541, sum loss: 3962.099365, avg loss: 2.531693, ppl: 12.574774 +epoch: 2, batch: 20542, sum loss: 3830.279297, avg loss: 2.355645, ppl: 10.544930 +epoch: 2, batch: 20543, sum loss: 3946.538818, avg loss: 2.541236, ppl: 12.695349 +epoch: 2, batch: 20544, sum loss: 4332.108887, avg loss: 2.662636, ppl: 14.334024 +epoch: 2, batch: 20545, sum loss: 4082.708496, avg loss: 2.551693, ppl: 12.828801 +epoch: 2, batch: 20546, sum loss: 4766.899414, avg loss: 2.531545, ppl: 12.572915 +epoch: 2, batch: 20547, sum loss: 4271.838379, avg loss: 2.568754, ppl: 13.049557 +epoch: 2, batch: 20548, sum loss: 4784.246582, avg loss: 2.765460, ppl: 15.886353 +epoch: 2, batch: 20549, sum loss: 5023.257324, avg loss: 2.572072, ppl: 13.092928 +epoch: 2, batch: 20550, sum loss: 3893.462402, avg loss: 2.371171, ppl: 10.709927 +epoch: 2, batch: 20551, sum loss: 3357.680176, avg loss: 2.281033, ppl: 9.786783 +epoch: 2, batch: 20552, sum loss: 4762.299805, avg loss: 2.600929, ppl: 13.476246 +epoch: 2, batch: 20553, sum loss: 4685.600586, avg loss: 2.406575, ppl: 11.095889 +epoch: 2, batch: 20554, sum loss: 3633.148682, avg loss: 2.431826, ppl: 11.379646 +epoch: 2, batch: 20555, sum loss: 3802.749023, avg loss: 2.411382, ppl: 11.149356 +epoch: 2, batch: 20556, sum loss: 4072.038574, avg loss: 2.577240, ppl: 13.160758 +epoch: 2, batch: 20557, sum loss: 3796.177734, avg loss: 2.424124, ppl: 11.292331 +epoch: 2, batch: 20558, sum loss: 5098.310547, avg loss: 2.754355, ppl: 15.710899 +epoch: 2, batch: 20559, sum loss: 4201.242676, avg loss: 2.393870, ppl: 10.955815 +epoch: 2, batch: 20560, sum loss: 4260.127930, avg loss: 2.349767, ppl: 10.483131 +epoch: 2, batch: 20561, sum loss: 4414.485352, avg loss: 2.378494, ppl: 10.788646 +epoch: 2, batch: 20562, sum loss: 4357.683594, avg loss: 2.510186, ppl: 12.307224 +epoch: 2, batch: 20563, sum loss: 3217.082275, avg loss: 2.206504, ppl: 9.083900 +epoch: 2, batch: 20564, sum loss: 3694.763672, avg loss: 2.259794, ppl: 9.581118 +epoch: 2, batch: 20565, sum loss: 3132.980469, avg loss: 2.478624, ppl: 11.924843 +epoch: 2, batch: 20566, sum loss: 3641.991211, avg loss: 2.375728, ppl: 10.758844 +epoch: 2, batch: 20567, sum loss: 4658.830566, avg loss: 2.699207, ppl: 14.867930 +epoch: 2, batch: 20568, sum loss: 4379.211426, avg loss: 2.416783, ppl: 11.209743 +epoch: 2, batch: 20569, sum loss: 5554.444336, avg loss: 2.995924, ppl: 20.003845 +epoch: 2, batch: 20570, sum loss: 4879.186035, avg loss: 2.541243, ppl: 12.695439 +epoch: 2, batch: 20571, sum loss: 5429.798340, avg loss: 2.742322, ppl: 15.522991 +epoch: 2, batch: 20572, sum loss: 3859.477295, avg loss: 2.519241, ppl: 12.419168 +epoch: 2, batch: 20573, sum loss: 4080.134766, avg loss: 2.407159, ppl: 11.102375 +epoch: 2, batch: 20574, sum loss: 3872.398438, avg loss: 2.409707, ppl: 11.130694 +epoch: 2, batch: 20575, sum loss: 3294.490479, avg loss: 2.122739, ppl: 8.353987 +epoch: 2, batch: 20576, sum loss: 4857.729492, avg loss: 2.641506, ppl: 14.034327 +epoch: 2, batch: 20577, sum loss: 4317.361328, avg loss: 2.435060, ppl: 11.416504 +epoch: 2, batch: 20578, sum loss: 4466.547363, avg loss: 2.843124, ppl: 17.169317 +epoch: 2, batch: 20579, sum loss: 3461.951172, avg loss: 2.261235, ppl: 9.594936 +epoch: 2, batch: 20580, sum loss: 4718.456543, avg loss: 2.682466, ppl: 14.621098 +epoch: 2, batch: 20581, sum loss: 2952.571533, avg loss: 2.402418, ppl: 11.049862 +epoch: 2, batch: 20582, sum loss: 4025.431396, avg loss: 2.495618, ppl: 12.129226 +epoch: 2, batch: 20583, sum loss: 4124.178711, avg loss: 2.326102, ppl: 10.237956 +epoch: 2, batch: 20584, sum loss: 3609.645996, avg loss: 2.429102, ppl: 11.348688 +epoch: 2, batch: 20585, sum loss: 4019.167480, avg loss: 2.365608, ppl: 10.650510 +epoch: 2, batch: 20586, sum loss: 4953.860352, avg loss: 2.779944, ppl: 16.118122 +epoch: 2, batch: 20587, sum loss: 4819.464355, avg loss: 2.564909, ppl: 12.999478 +epoch: 2, batch: 20588, sum loss: 4057.835205, avg loss: 2.568250, ppl: 13.042981 +epoch: 2, batch: 20589, sum loss: 4335.308594, avg loss: 2.529352, ppl: 12.545370 +epoch: 2, batch: 20590, sum loss: 4759.261719, avg loss: 2.539627, ppl: 12.674947 +epoch: 2, batch: 20591, sum loss: 4791.251953, avg loss: 2.697777, ppl: 14.846691 +epoch: 2, batch: 20592, sum loss: 4629.157715, avg loss: 2.494158, ppl: 12.111534 +epoch: 2, batch: 20593, sum loss: 4344.981934, avg loss: 2.660736, ppl: 14.306813 +epoch: 2, batch: 20594, sum loss: 4507.139648, avg loss: 2.588822, ppl: 13.314083 +epoch: 2, batch: 20595, sum loss: 3479.156006, avg loss: 2.335004, ppl: 10.329502 +epoch: 2, batch: 20596, sum loss: 4633.186035, avg loss: 2.455318, ppl: 11.650143 +epoch: 2, batch: 20597, sum loss: 3529.672852, avg loss: 2.461418, ppl: 11.721416 +epoch: 2, batch: 20598, sum loss: 4676.894531, avg loss: 2.646800, ppl: 14.108812 +epoch: 2, batch: 20599, sum loss: 3369.256836, avg loss: 2.285792, ppl: 9.833467 +epoch: 2, batch: 20600, sum loss: 3078.126221, avg loss: 2.255038, ppl: 9.535651 +epoch: 2, batch: 20601, sum loss: 5395.938477, avg loss: 2.787158, ppl: 16.234819 +epoch: 2, batch: 20602, sum loss: 3392.666992, avg loss: 2.116448, ppl: 8.301601 +epoch: 2, batch: 20603, sum loss: 4118.122559, avg loss: 2.384553, ppl: 10.854207 +epoch: 2, batch: 20604, sum loss: 4945.954590, avg loss: 2.708628, ppl: 15.008669 +epoch: 2, batch: 20605, sum loss: 3561.320801, avg loss: 2.159685, ppl: 8.668407 +epoch: 2, batch: 20606, sum loss: 5027.903809, avg loss: 2.725151, ppl: 15.258718 +epoch: 2, batch: 20607, sum loss: 4892.638672, avg loss: 2.602467, ppl: 13.496998 +epoch: 2, batch: 20608, sum loss: 5109.410156, avg loss: 2.707690, ppl: 14.994591 +epoch: 2, batch: 20609, sum loss: 4199.373047, avg loss: 2.557475, ppl: 12.903191 +epoch: 2, batch: 20610, sum loss: 4462.721680, avg loss: 2.454742, ppl: 11.643435 +epoch: 2, batch: 20611, sum loss: 3774.506348, avg loss: 2.529830, ppl: 12.551372 +epoch: 2, batch: 20612, sum loss: 4365.751953, avg loss: 2.484776, ppl: 11.998435 +epoch: 2, batch: 20613, sum loss: 4317.193848, avg loss: 2.450167, ppl: 11.590281 +epoch: 2, batch: 20614, sum loss: 4536.641602, avg loss: 2.510593, ppl: 12.312228 +epoch: 2, batch: 20615, sum loss: 3722.499512, avg loss: 2.429830, ppl: 11.356952 +epoch: 2, batch: 20616, sum loss: 3718.795410, avg loss: 2.459521, ppl: 11.699204 +epoch: 2, batch: 20617, sum loss: 4562.840332, avg loss: 2.649733, ppl: 14.150261 +epoch: 2, batch: 20618, sum loss: 3821.787109, avg loss: 2.379693, ppl: 10.801586 +epoch: 2, batch: 20619, sum loss: 4921.571777, avg loss: 2.761825, ppl: 15.828702 +epoch: 2, batch: 20620, sum loss: 3960.124023, avg loss: 2.517561, ppl: 12.398323 +epoch: 2, batch: 20621, sum loss: 4518.853027, avg loss: 2.678632, ppl: 14.565162 +epoch: 2, batch: 20622, sum loss: 5014.727539, avg loss: 2.574296, ppl: 13.122069 +epoch: 2, batch: 20623, sum loss: 4866.535645, avg loss: 2.772955, ppl: 16.005857 +epoch: 2, batch: 20624, sum loss: 3695.076172, avg loss: 2.329808, ppl: 10.275973 +epoch: 2, batch: 20625, sum loss: 3754.916992, avg loss: 2.394717, ppl: 10.965099 +epoch: 2, batch: 20626, sum loss: 5384.147461, avg loss: 2.525398, ppl: 12.495862 +epoch: 2, batch: 20627, sum loss: 3664.573730, avg loss: 2.439796, ppl: 11.470700 +epoch: 2, batch: 20628, sum loss: 4045.708984, avg loss: 2.486607, ppl: 12.020416 +epoch: 2, batch: 20629, sum loss: 4815.370117, avg loss: 2.809434, ppl: 16.600523 +epoch: 2, batch: 20630, sum loss: 4382.926270, avg loss: 2.150602, ppl: 8.590025 +epoch: 2, batch: 20631, sum loss: 3685.042969, avg loss: 2.385141, ppl: 10.860596 +epoch: 2, batch: 20632, sum loss: 4691.435547, avg loss: 2.604906, ppl: 13.529951 +epoch: 2, batch: 20633, sum loss: 4361.582031, avg loss: 2.438000, ppl: 11.450117 +epoch: 2, batch: 20634, sum loss: 3307.279053, avg loss: 2.455293, ppl: 11.649843 +epoch: 2, batch: 20635, sum loss: 4202.921875, avg loss: 2.625185, ppl: 13.807135 +epoch: 2, batch: 20636, sum loss: 4420.970703, avg loss: 2.708928, ppl: 15.013174 +epoch: 2, batch: 20637, sum loss: 3513.008301, avg loss: 2.401236, ppl: 11.036810 +epoch: 2, batch: 20638, sum loss: 4155.568359, avg loss: 2.636782, ppl: 13.968181 +epoch: 2, batch: 20639, sum loss: 3450.498535, avg loss: 2.038097, ppl: 7.675991 +epoch: 2, batch: 20640, sum loss: 3041.029297, avg loss: 2.161357, ppl: 8.682914 +epoch: 2, batch: 20641, sum loss: 4403.182617, avg loss: 2.508936, ppl: 12.291843 +epoch: 2, batch: 20642, sum loss: 4040.356445, avg loss: 2.392159, ppl: 10.937081 +epoch: 2, batch: 20643, sum loss: 5131.505859, avg loss: 2.664333, ppl: 14.358375 +epoch: 2, batch: 20644, sum loss: 5370.695801, avg loss: 2.700199, ppl: 14.882691 +epoch: 2, batch: 20645, sum loss: 4251.264160, avg loss: 2.477427, ppl: 11.910576 +epoch: 2, batch: 20646, sum loss: 4090.147949, avg loss: 2.497038, ppl: 12.146459 +epoch: 2, batch: 20647, sum loss: 4950.761230, avg loss: 2.714233, ppl: 15.093032 +epoch: 2, batch: 20648, sum loss: 3807.998779, avg loss: 2.702625, ppl: 14.918843 +epoch: 2, batch: 20649, sum loss: 4700.711914, avg loss: 2.715605, ppl: 15.113748 +epoch: 2, batch: 20650, sum loss: 4520.205566, avg loss: 2.605306, ppl: 13.535365 +epoch: 2, batch: 20651, sum loss: 4667.622070, avg loss: 2.837460, ppl: 17.072348 +epoch: 2, batch: 20652, sum loss: 4499.547852, avg loss: 2.620587, ppl: 13.743791 +epoch: 2, batch: 20653, sum loss: 4594.354492, avg loss: 2.592751, ppl: 13.366489 +epoch: 2, batch: 20654, sum loss: 4836.535156, avg loss: 2.826730, ppl: 16.890144 +epoch: 2, batch: 20655, sum loss: 3663.566406, avg loss: 2.294030, ppl: 9.914818 +epoch: 2, batch: 20656, sum loss: 4925.319336, avg loss: 2.753113, ppl: 15.691403 +epoch: 2, batch: 20657, sum loss: 5156.146973, avg loss: 2.816028, ppl: 16.710346 +epoch: 2, batch: 20658, sum loss: 4615.082031, avg loss: 2.653871, ppl: 14.208940 +epoch: 2, batch: 20659, sum loss: 3753.719971, avg loss: 2.429592, ppl: 11.354253 +epoch: 2, batch: 20660, sum loss: 4062.691650, avg loss: 2.543952, ppl: 12.729883 +epoch: 2, batch: 20661, sum loss: 4044.375977, avg loss: 2.413112, ppl: 11.168666 +epoch: 2, batch: 20662, sum loss: 3784.466309, avg loss: 2.444746, ppl: 11.527618 +epoch: 2, batch: 20663, sum loss: 3778.207275, avg loss: 2.593142, ppl: 13.371714 +epoch: 2, batch: 20664, sum loss: 3217.885498, avg loss: 2.195011, ppl: 8.980097 +epoch: 2, batch: 20665, sum loss: 4294.375977, avg loss: 2.682309, ppl: 14.618804 +epoch: 2, batch: 20666, sum loss: 3802.541260, avg loss: 2.335713, ppl: 10.336829 +epoch: 2, batch: 20667, sum loss: 3775.518799, avg loss: 2.434248, ppl: 11.407237 +epoch: 2, batch: 20668, sum loss: 4815.157227, avg loss: 2.367334, ppl: 10.668907 +epoch: 2, batch: 20669, sum loss: 4207.520508, avg loss: 2.628058, ppl: 13.846852 +epoch: 2, batch: 20670, sum loss: 4534.411621, avg loss: 2.821663, ppl: 16.804768 +epoch: 2, batch: 20671, sum loss: 4470.913086, avg loss: 2.478333, ppl: 11.921378 +epoch: 2, batch: 20672, sum loss: 3880.552734, avg loss: 2.285367, ppl: 9.829290 +epoch: 2, batch: 20673, sum loss: 4337.022949, avg loss: 2.614239, ppl: 13.656822 +epoch: 2, batch: 20674, sum loss: 3767.995605, avg loss: 2.426269, ppl: 11.316582 +epoch: 2, batch: 20675, sum loss: 4498.264648, avg loss: 2.802657, ppl: 16.488400 +epoch: 2, batch: 20676, sum loss: 4569.384277, avg loss: 2.444828, ppl: 11.528569 +epoch: 2, batch: 20677, sum loss: 4721.567383, avg loss: 2.727653, ppl: 15.296944 +epoch: 2, batch: 20678, sum loss: 4147.318359, avg loss: 2.398680, ppl: 11.008640 +epoch: 2, batch: 20679, sum loss: 3588.824707, avg loss: 2.302004, ppl: 9.994194 +epoch: 2, batch: 20680, sum loss: 4643.960938, avg loss: 2.739800, ppl: 15.483888 +epoch: 2, batch: 20681, sum loss: 5041.800781, avg loss: 2.597527, ppl: 13.430487 +epoch: 2, batch: 20682, sum loss: 3273.569824, avg loss: 2.252973, ppl: 9.515985 +epoch: 2, batch: 20683, sum loss: 5234.700195, avg loss: 2.756556, ppl: 15.745526 +epoch: 2, batch: 20684, sum loss: 4106.711914, avg loss: 2.582838, ppl: 13.234639 +epoch: 2, batch: 20685, sum loss: 4394.821777, avg loss: 2.558104, ppl: 12.911308 +epoch: 2, batch: 20686, sum loss: 4460.243164, avg loss: 2.508573, ppl: 12.287387 +epoch: 2, batch: 20687, sum loss: 3936.469727, avg loss: 2.404685, ppl: 11.074944 +epoch: 2, batch: 20688, sum loss: 4177.260254, avg loss: 2.471752, ppl: 11.843174 +epoch: 2, batch: 20689, sum loss: 3854.130127, avg loss: 2.410338, ppl: 11.137722 +epoch: 2, batch: 20690, sum loss: 4076.172852, avg loss: 2.436445, ppl: 11.432329 +epoch: 2, batch: 20691, sum loss: 3704.920898, avg loss: 2.274353, ppl: 9.721625 +epoch: 2, batch: 20692, sum loss: 4692.199707, avg loss: 2.737573, ppl: 15.449439 +epoch: 2, batch: 20693, sum loss: 3439.427246, avg loss: 2.470853, ppl: 11.832534 +epoch: 2, batch: 20694, sum loss: 4155.167480, avg loss: 2.605121, ppl: 13.532858 +epoch: 2, batch: 20695, sum loss: 4076.588867, avg loss: 2.313615, ppl: 10.110906 +epoch: 2, batch: 20696, sum loss: 4602.127930, avg loss: 2.499798, ppl: 12.180034 +epoch: 2, batch: 20697, sum loss: 3659.450928, avg loss: 2.193915, ppl: 8.970266 +epoch: 2, batch: 20698, sum loss: 4940.513184, avg loss: 2.743206, ppl: 15.536709 +epoch: 2, batch: 20699, sum loss: 3904.929443, avg loss: 2.577511, ppl: 13.164335 +epoch: 2, batch: 20700, sum loss: 5228.248047, avg loss: 2.861657, ppl: 17.490492 +epoch: 2, batch: 20701, sum loss: 4454.159180, avg loss: 2.523603, ppl: 12.473457 +epoch: 2, batch: 20702, sum loss: 4870.465820, avg loss: 2.366601, ppl: 10.661098 +epoch: 2, batch: 20703, sum loss: 3757.970703, avg loss: 2.430770, ppl: 11.367634 +epoch: 2, batch: 20704, sum loss: 5413.786133, avg loss: 2.604034, ppl: 13.518156 +epoch: 2, batch: 20705, sum loss: 3874.104980, avg loss: 2.639036, ppl: 13.999701 +epoch: 2, batch: 20706, sum loss: 3824.053223, avg loss: 2.360527, ppl: 10.596532 +epoch: 2, batch: 20707, sum loss: 4142.398926, avg loss: 2.496925, ppl: 12.145092 +epoch: 2, batch: 20708, sum loss: 4126.887207, avg loss: 2.343491, ppl: 10.417537 +epoch: 2, batch: 20709, sum loss: 4140.798340, avg loss: 2.610844, ppl: 13.610532 +epoch: 2, batch: 20710, sum loss: 4658.902832, avg loss: 2.654645, ppl: 14.219943 +epoch: 2, batch: 20711, sum loss: 4468.245605, avg loss: 2.770146, ppl: 15.960966 +epoch: 2, batch: 20712, sum loss: 3645.108887, avg loss: 2.377762, ppl: 10.780746 +epoch: 2, batch: 20713, sum loss: 3508.364502, avg loss: 2.489968, ppl: 12.060888 +epoch: 2, batch: 20714, sum loss: 4117.903809, avg loss: 2.461389, ppl: 11.721082 +epoch: 2, batch: 20715, sum loss: 4801.195801, avg loss: 2.723310, ppl: 15.230656 +epoch: 2, batch: 20716, sum loss: 5452.278809, avg loss: 2.790317, ppl: 16.286175 +epoch: 2, batch: 20717, sum loss: 3497.883301, avg loss: 2.153869, ppl: 8.618136 +epoch: 2, batch: 20718, sum loss: 4664.378418, avg loss: 2.711848, ppl: 15.057076 +epoch: 2, batch: 20719, sum loss: 5221.488281, avg loss: 2.690102, ppl: 14.733180 +epoch: 2, batch: 20720, sum loss: 5558.825195, avg loss: 2.710300, ppl: 15.033785 +epoch: 2, batch: 20721, sum loss: 5066.167969, avg loss: 2.641381, ppl: 14.032563 +epoch: 2, batch: 20722, sum loss: 3721.418945, avg loss: 2.421222, ppl: 11.259615 +epoch: 2, batch: 20723, sum loss: 4189.501953, avg loss: 2.490786, ppl: 12.070758 +epoch: 2, batch: 20724, sum loss: 4693.282227, avg loss: 2.610279, ppl: 13.602850 +epoch: 2, batch: 20725, sum loss: 4449.380859, avg loss: 2.538152, ppl: 12.656263 +epoch: 2, batch: 20726, sum loss: 4260.497070, avg loss: 2.561934, ppl: 12.960866 +epoch: 2, batch: 20727, sum loss: 4852.667969, avg loss: 2.480914, ppl: 11.952185 +epoch: 2, batch: 20728, sum loss: 4384.482910, avg loss: 2.538786, ppl: 12.664284 +epoch: 2, batch: 20729, sum loss: 4476.191406, avg loss: 2.402679, ppl: 11.052750 +epoch: 2, batch: 20730, sum loss: 4177.133301, avg loss: 2.439914, ppl: 11.472056 +epoch: 2, batch: 20731, sum loss: 3737.860596, avg loss: 2.116569, ppl: 8.302601 +epoch: 2, batch: 20732, sum loss: 4530.280273, avg loss: 2.466130, ppl: 11.776780 +epoch: 2, batch: 20733, sum loss: 4161.051270, avg loss: 2.233522, ppl: 9.332678 +epoch: 2, batch: 20734, sum loss: 4146.826660, avg loss: 2.458107, ppl: 11.682678 +epoch: 2, batch: 20735, sum loss: 4721.875000, avg loss: 2.640870, ppl: 14.025395 +epoch: 2, batch: 20736, sum loss: 4493.311523, avg loss: 2.573489, ppl: 13.111490 +epoch: 2, batch: 20737, sum loss: 4859.010742, avg loss: 2.725188, ppl: 15.259290 +epoch: 2, batch: 20738, sum loss: 4180.267090, avg loss: 2.491220, ppl: 12.076000 +epoch: 2, batch: 20739, sum loss: 4154.130371, avg loss: 2.322040, ppl: 10.196457 +epoch: 2, batch: 20740, sum loss: 3639.200439, avg loss: 2.249197, ppl: 9.480118 +epoch: 2, batch: 20741, sum loss: 3458.255371, avg loss: 2.177743, ppl: 8.826360 +epoch: 2, batch: 20742, sum loss: 4276.555664, avg loss: 2.512665, ppl: 12.337764 +epoch: 2, batch: 20743, sum loss: 4113.920410, avg loss: 2.373872, ppl: 10.738894 +epoch: 2, batch: 20744, sum loss: 3979.554932, avg loss: 2.594234, ppl: 13.386330 +epoch: 2, batch: 20745, sum loss: 4746.370117, avg loss: 2.540883, ppl: 12.690876 +epoch: 2, batch: 20746, sum loss: 4412.529297, avg loss: 2.574404, ppl: 13.123500 +epoch: 2, batch: 20747, sum loss: 4095.429932, avg loss: 2.461196, ppl: 11.718819 +epoch: 2, batch: 20748, sum loss: 4228.203125, avg loss: 2.691409, ppl: 14.752445 +epoch: 2, batch: 20749, sum loss: 3973.149902, avg loss: 2.480119, ppl: 11.942679 +epoch: 2, batch: 20750, sum loss: 3350.941895, avg loss: 2.139810, ppl: 8.497820 +epoch: 2, batch: 20751, sum loss: 4868.188477, avg loss: 2.716623, ppl: 15.129146 +epoch: 2, batch: 20752, sum loss: 5097.478027, avg loss: 2.790081, ppl: 16.282339 +epoch: 2, batch: 20753, sum loss: 5361.616211, avg loss: 2.624384, ppl: 13.796072 +epoch: 2, batch: 20754, sum loss: 5693.299805, avg loss: 2.793572, ppl: 16.339279 +epoch: 2, batch: 20755, sum loss: 3745.827393, avg loss: 2.344072, ppl: 10.423598 +epoch: 2, batch: 20756, sum loss: 3985.193359, avg loss: 2.360897, ppl: 10.600451 +epoch: 2, batch: 20757, sum loss: 4401.930664, avg loss: 2.538599, ppl: 12.661920 +epoch: 2, batch: 20758, sum loss: 5613.901855, avg loss: 2.931541, ppl: 18.756521 +epoch: 2, batch: 20759, sum loss: 4141.736328, avg loss: 2.649863, ppl: 14.152103 +epoch: 2, batch: 20760, sum loss: 4284.536621, avg loss: 2.595116, ppl: 13.398144 +epoch: 2, batch: 20761, sum loss: 4099.108887, avg loss: 2.411241, ppl: 11.147782 +epoch: 2, batch: 20762, sum loss: 3946.956299, avg loss: 2.433388, ppl: 11.397436 +epoch: 2, batch: 20763, sum loss: 4403.133789, avg loss: 2.577947, ppl: 13.170074 +epoch: 2, batch: 20764, sum loss: 4450.689941, avg loss: 2.713835, ppl: 15.087027 +epoch: 2, batch: 20765, sum loss: 3401.466553, avg loss: 2.410678, ppl: 11.141512 +epoch: 2, batch: 20766, sum loss: 3431.696777, avg loss: 2.474186, ppl: 11.872045 +epoch: 2, batch: 20767, sum loss: 3940.224609, avg loss: 2.450388, ppl: 11.592849 +epoch: 2, batch: 20768, sum loss: 4950.420898, avg loss: 2.534778, ppl: 12.613626 +epoch: 2, batch: 20769, sum loss: 4322.337891, avg loss: 2.426916, ppl: 11.323910 +epoch: 2, batch: 20770, sum loss: 4041.452881, avg loss: 2.434610, ppl: 11.411369 +epoch: 2, batch: 20771, sum loss: 4935.014160, avg loss: 2.611119, ppl: 13.614270 +epoch: 2, batch: 20772, sum loss: 4352.585938, avg loss: 2.566383, ppl: 13.018656 +epoch: 2, batch: 20773, sum loss: 4037.296875, avg loss: 2.521734, ppl: 12.450172 +epoch: 2, batch: 20774, sum loss: 3810.846436, avg loss: 2.309604, ppl: 10.070436 +epoch: 2, batch: 20775, sum loss: 3747.831543, avg loss: 2.408632, ppl: 11.118741 +epoch: 2, batch: 20776, sum loss: 4095.484375, avg loss: 2.470135, ppl: 11.824048 +epoch: 2, batch: 20777, sum loss: 4096.045898, avg loss: 2.615610, ppl: 13.675561 +epoch: 2, batch: 20778, sum loss: 5176.231445, avg loss: 2.911266, ppl: 18.380058 +epoch: 2, batch: 20779, sum loss: 4451.393555, avg loss: 2.637082, ppl: 13.972367 +epoch: 2, batch: 20780, sum loss: 4260.918945, avg loss: 2.256843, ppl: 9.552879 +epoch: 2, batch: 20781, sum loss: 4002.518066, avg loss: 2.414064, ppl: 11.179301 +epoch: 2, batch: 20782, sum loss: 4370.484863, avg loss: 2.516111, ppl: 12.380358 +epoch: 2, batch: 20783, sum loss: 5073.117676, avg loss: 2.819965, ppl: 16.776270 +epoch: 2, batch: 20784, sum loss: 4460.602051, avg loss: 2.593373, ppl: 13.374813 +epoch: 2, batch: 20785, sum loss: 4242.692383, avg loss: 2.475317, ppl: 11.885468 +epoch: 2, batch: 20786, sum loss: 4298.263184, avg loss: 2.407991, ppl: 11.111612 +epoch: 2, batch: 20787, sum loss: 4034.218750, avg loss: 2.446464, ppl: 11.547441 +epoch: 2, batch: 20788, sum loss: 3631.706787, avg loss: 2.548566, ppl: 12.788753 +epoch: 2, batch: 20789, sum loss: 4335.118164, avg loss: 2.419151, ppl: 11.236314 +epoch: 2, batch: 20790, sum loss: 4857.420410, avg loss: 2.614327, ppl: 13.658028 +epoch: 2, batch: 20791, sum loss: 3840.032715, avg loss: 2.575475, ppl: 13.137552 +epoch: 2, batch: 20792, sum loss: 4104.305664, avg loss: 2.299331, ppl: 9.967511 +epoch: 2, batch: 20793, sum loss: 3776.509766, avg loss: 2.399307, ppl: 11.015543 +epoch: 2, batch: 20794, sum loss: 4860.013672, avg loss: 2.804393, ppl: 16.517052 +epoch: 2, batch: 20795, sum loss: 4399.897461, avg loss: 2.595810, ppl: 13.407439 +epoch: 2, batch: 20796, sum loss: 4430.471680, avg loss: 2.593953, ppl: 13.382567 +epoch: 2, batch: 20797, sum loss: 3964.067627, avg loss: 2.489992, ppl: 12.061181 +epoch: 2, batch: 20798, sum loss: 4418.464355, avg loss: 2.511919, ppl: 12.328560 +epoch: 2, batch: 20799, sum loss: 4426.825195, avg loss: 2.457982, ppl: 11.681213 +epoch: 2, batch: 20800, sum loss: 4297.168945, avg loss: 2.422305, ppl: 11.271810 +epoch: 2, batch: 20801, sum loss: 4097.653809, avg loss: 2.548292, ppl: 12.785250 +epoch: 2, batch: 20802, sum loss: 3785.587646, avg loss: 2.370437, ppl: 10.702067 +epoch: 2, batch: 20803, sum loss: 4346.907227, avg loss: 2.525803, ppl: 12.500931 +epoch: 2, batch: 20804, sum loss: 4786.530273, avg loss: 2.515255, ppl: 12.369762 +epoch: 2, batch: 20805, sum loss: 4528.189453, avg loss: 2.494870, ppl: 12.120163 +epoch: 2, batch: 20806, sum loss: 4590.767090, avg loss: 2.713219, ppl: 15.077739 +epoch: 2, batch: 20807, sum loss: 5178.931641, avg loss: 2.686168, ppl: 14.675331 +epoch: 2, batch: 20808, sum loss: 3847.322266, avg loss: 2.456783, ppl: 11.667218 +epoch: 2, batch: 20809, sum loss: 4850.615723, avg loss: 2.656416, ppl: 14.245146 +epoch: 2, batch: 20810, sum loss: 4468.890625, avg loss: 2.406511, ppl: 11.095181 +epoch: 2, batch: 20811, sum loss: 4897.362305, avg loss: 2.702739, ppl: 14.920536 +epoch: 2, batch: 20812, sum loss: 5174.376953, avg loss: 2.559039, ppl: 12.923390 +epoch: 2, batch: 20813, sum loss: 4972.692383, avg loss: 2.704020, ppl: 14.939669 +epoch: 2, batch: 20814, sum loss: 3728.151855, avg loss: 2.391374, ppl: 10.928498 +epoch: 2, batch: 20815, sum loss: 4066.334961, avg loss: 2.604955, ppl: 13.530619 +epoch: 2, batch: 20816, sum loss: 4960.365723, avg loss: 2.872244, ppl: 17.676647 +epoch: 2, batch: 20817, sum loss: 3927.542480, avg loss: 2.695637, ppl: 14.814945 +epoch: 2, batch: 20818, sum loss: 3966.285645, avg loss: 2.414051, ppl: 11.179154 +epoch: 2, batch: 20819, sum loss: 4678.991211, avg loss: 2.522367, ppl: 12.458053 +epoch: 2, batch: 20820, sum loss: 5140.263672, avg loss: 2.500128, ppl: 12.184056 +epoch: 2, batch: 20821, sum loss: 4338.214355, avg loss: 2.319901, ppl: 10.174664 +epoch: 2, batch: 20822, sum loss: 3895.635742, avg loss: 2.467154, ppl: 11.788845 +epoch: 2, batch: 20823, sum loss: 4561.345215, avg loss: 2.565436, ppl: 13.006330 +epoch: 2, batch: 20824, sum loss: 4690.514160, avg loss: 2.463506, ppl: 11.745914 +epoch: 2, batch: 20825, sum loss: 3120.479492, avg loss: 2.108432, ppl: 8.235318 +epoch: 2, batch: 20826, sum loss: 4612.212891, avg loss: 2.631040, ppl: 13.888205 +epoch: 2, batch: 20827, sum loss: 4067.947510, avg loss: 2.418518, ppl: 11.229206 +epoch: 2, batch: 20828, sum loss: 4222.867188, avg loss: 2.399356, ppl: 11.016084 +epoch: 2, batch: 20829, sum loss: 3484.017090, avg loss: 2.196732, ppl: 8.995568 +epoch: 2, batch: 20830, sum loss: 4065.708252, avg loss: 2.315324, ppl: 10.128200 +epoch: 2, batch: 20831, sum loss: 4576.279785, avg loss: 2.663725, ppl: 14.349644 +epoch: 2, batch: 20832, sum loss: 2977.471191, avg loss: 2.363072, ppl: 10.623541 +epoch: 2, batch: 20833, sum loss: 4192.728516, avg loss: 2.679060, ppl: 14.571386 +epoch: 2, batch: 20834, sum loss: 4392.575684, avg loss: 2.571765, ppl: 13.088911 +epoch: 2, batch: 20835, sum loss: 4006.320068, avg loss: 2.553423, ppl: 12.851013 +epoch: 2, batch: 20836, sum loss: 4891.388672, avg loss: 2.806304, ppl: 16.548649 +epoch: 2, batch: 20837, sum loss: 3635.794434, avg loss: 2.347188, ppl: 10.456128 +epoch: 2, batch: 20838, sum loss: 4079.476074, avg loss: 2.336470, ppl: 10.344654 +epoch: 2, batch: 20839, sum loss: 4332.277344, avg loss: 2.657839, ppl: 14.265426 +epoch: 2, batch: 20840, sum loss: 4336.446777, avg loss: 2.558376, ppl: 12.914822 +epoch: 2, batch: 20841, sum loss: 4083.919678, avg loss: 2.533449, ppl: 12.596877 +epoch: 2, batch: 20842, sum loss: 4227.717773, avg loss: 2.533084, ppl: 12.592286 +epoch: 2, batch: 20843, sum loss: 4818.159668, avg loss: 2.527890, ppl: 12.527052 +epoch: 2, batch: 20844, sum loss: 3267.008789, avg loss: 2.110471, ppl: 8.252125 +epoch: 2, batch: 20845, sum loss: 4783.840820, avg loss: 2.396714, ppl: 10.987014 +epoch: 2, batch: 20846, sum loss: 3953.922607, avg loss: 2.708166, ppl: 15.001739 +epoch: 2, batch: 20847, sum loss: 3649.963135, avg loss: 2.181688, ppl: 8.861248 +epoch: 2, batch: 20848, sum loss: 3373.262695, avg loss: 2.414648, ppl: 11.185827 +epoch: 2, batch: 20849, sum loss: 3390.910889, avg loss: 2.222091, ppl: 9.226605 +epoch: 2, batch: 20850, sum loss: 4482.291504, avg loss: 2.443997, ppl: 11.518994 +epoch: 2, batch: 20851, sum loss: 4082.830078, avg loss: 2.323751, ppl: 10.213912 +epoch: 2, batch: 20852, sum loss: 3832.112549, avg loss: 2.617563, ppl: 13.702293 +epoch: 2, batch: 20853, sum loss: 4041.487549, avg loss: 2.301531, ppl: 9.989461 +epoch: 2, batch: 20854, sum loss: 4130.119141, avg loss: 2.416688, ppl: 11.208672 +epoch: 2, batch: 20855, sum loss: 3804.983398, avg loss: 2.232971, ppl: 9.327541 +epoch: 2, batch: 20856, sum loss: 4612.082520, avg loss: 2.676775, ppl: 14.538125 +epoch: 2, batch: 20857, sum loss: 4867.596680, avg loss: 2.778309, ppl: 16.091785 +epoch: 2, batch: 20858, sum loss: 4142.834961, avg loss: 2.458656, ppl: 11.689089 +epoch: 2, batch: 20859, sum loss: 4661.173340, avg loss: 2.480667, ppl: 11.949233 +epoch: 2, batch: 20860, sum loss: 4070.657471, avg loss: 2.506562, ppl: 12.262704 +epoch: 2, batch: 20861, sum loss: 4346.855469, avg loss: 2.575151, ppl: 13.133306 +epoch: 2, batch: 20862, sum loss: 4068.176514, avg loss: 2.541022, ppl: 12.692640 +epoch: 2, batch: 20863, sum loss: 4038.391357, avg loss: 2.249800, ppl: 9.485841 +epoch: 2, batch: 20864, sum loss: 4310.115234, avg loss: 2.346280, ppl: 10.446632 +epoch: 2, batch: 20865, sum loss: 4135.715332, avg loss: 2.450068, ppl: 11.589140 +epoch: 2, batch: 20866, sum loss: 4534.228516, avg loss: 2.738061, ppl: 15.456981 +epoch: 2, batch: 20867, sum loss: 4294.456055, avg loss: 2.393788, ppl: 10.954916 +epoch: 2, batch: 20868, sum loss: 4027.713867, avg loss: 2.393175, ppl: 10.948201 +epoch: 2, batch: 20869, sum loss: 4605.201172, avg loss: 2.574176, ppl: 13.120502 +epoch: 2, batch: 20870, sum loss: 3466.265869, avg loss: 2.359609, ppl: 10.586813 +epoch: 2, batch: 20871, sum loss: 4870.051270, avg loss: 2.568593, ppl: 13.047448 +epoch: 2, batch: 20872, sum loss: 4313.884277, avg loss: 2.441361, ppl: 11.488663 +epoch: 2, batch: 20873, sum loss: 4262.336914, avg loss: 2.391884, ppl: 10.934072 +epoch: 2, batch: 20874, sum loss: 4365.918945, avg loss: 2.594129, ppl: 13.384922 +epoch: 2, batch: 20875, sum loss: 4445.628906, avg loss: 2.730730, ppl: 15.344089 +epoch: 2, batch: 20876, sum loss: 4462.446289, avg loss: 2.582434, ppl: 13.229301 +epoch: 2, batch: 20877, sum loss: 4624.737305, avg loss: 2.517549, ppl: 12.398172 +epoch: 2, batch: 20878, sum loss: 4749.828613, avg loss: 2.609796, ppl: 13.596277 +epoch: 2, batch: 20879, sum loss: 5113.676758, avg loss: 2.699935, ppl: 14.878764 +epoch: 2, batch: 20880, sum loss: 3323.234131, avg loss: 2.257632, ppl: 9.560424 +epoch: 2, batch: 20881, sum loss: 4672.925781, avg loss: 2.600404, ppl: 13.469179 +epoch: 2, batch: 20882, sum loss: 4193.311523, avg loss: 2.553783, ppl: 12.855644 +epoch: 2, batch: 20883, sum loss: 4891.560547, avg loss: 2.454370, ppl: 11.639105 +epoch: 2, batch: 20884, sum loss: 4198.665039, avg loss: 2.443926, ppl: 11.518173 +epoch: 2, batch: 20885, sum loss: 4019.490723, avg loss: 2.200050, ppl: 9.025461 +epoch: 2, batch: 20886, sum loss: 4573.144043, avg loss: 2.480013, ppl: 11.941422 +epoch: 2, batch: 20887, sum loss: 4152.064453, avg loss: 2.547279, ppl: 12.772302 +epoch: 2, batch: 20888, sum loss: 3966.197998, avg loss: 2.370710, ppl: 10.704991 +epoch: 2, batch: 20889, sum loss: 4329.356934, avg loss: 2.483854, ppl: 11.987370 +epoch: 2, batch: 20890, sum loss: 4874.392578, avg loss: 2.783777, ppl: 16.180010 +epoch: 2, batch: 20891, sum loss: 4468.156738, avg loss: 2.611430, ppl: 13.618514 +epoch: 2, batch: 20892, sum loss: 3764.934570, avg loss: 2.372360, ppl: 10.722665 +epoch: 2, batch: 20893, sum loss: 4309.100098, avg loss: 2.503835, ppl: 12.229303 +epoch: 2, batch: 20894, sum loss: 5267.032715, avg loss: 2.623024, ppl: 13.777327 +epoch: 2, batch: 20895, sum loss: 4047.194580, avg loss: 2.550217, ppl: 12.809885 +epoch: 2, batch: 20896, sum loss: 3986.337158, avg loss: 2.193912, ppl: 8.970232 +epoch: 2, batch: 20897, sum loss: 4218.573242, avg loss: 2.405116, ppl: 11.079714 +epoch: 2, batch: 20898, sum loss: 5051.190430, avg loss: 2.606394, ppl: 13.550096 +epoch: 2, batch: 20899, sum loss: 5175.652832, avg loss: 2.702691, ppl: 14.919825 +epoch: 2, batch: 20900, sum loss: 3519.302734, avg loss: 2.363534, ppl: 10.628451 +epoch: 2, batch: 20901, sum loss: 3429.844971, avg loss: 2.259450, ppl: 9.577820 +epoch: 2, batch: 20902, sum loss: 3965.169678, avg loss: 2.670148, ppl: 14.442105 +epoch: 2, batch: 20903, sum loss: 4027.042725, avg loss: 2.596417, ppl: 13.415583 +epoch: 2, batch: 20904, sum loss: 4252.234863, avg loss: 2.586518, ppl: 13.283432 +epoch: 2, batch: 20905, sum loss: 4301.203613, avg loss: 2.436943, ppl: 11.438016 +epoch: 2, batch: 20906, sum loss: 4492.187500, avg loss: 2.623941, ppl: 13.789966 +epoch: 2, batch: 20907, sum loss: 4099.000000, avg loss: 2.388695, ppl: 10.899259 +epoch: 2, batch: 20908, sum loss: 4208.537109, avg loss: 2.620509, ppl: 13.742712 +epoch: 2, batch: 20909, sum loss: 3918.160645, avg loss: 2.394964, ppl: 10.967800 +epoch: 2, batch: 20910, sum loss: 4065.178467, avg loss: 2.438619, ppl: 11.457211 +epoch: 2, batch: 20911, sum loss: 2926.053223, avg loss: 1.937784, ppl: 6.943345 +epoch: 2, batch: 20912, sum loss: 4427.517578, avg loss: 2.372732, ppl: 10.726654 +epoch: 2, batch: 20913, sum loss: 3584.829346, avg loss: 2.257449, ppl: 9.558678 +epoch: 2, batch: 20914, sum loss: 4920.984863, avg loss: 2.602319, ppl: 13.494993 +epoch: 2, batch: 20915, sum loss: 4244.022949, avg loss: 2.618151, ppl: 13.710349 +epoch: 2, batch: 20916, sum loss: 5177.395508, avg loss: 2.783546, ppl: 16.176279 +epoch: 2, batch: 20917, sum loss: 4032.617920, avg loss: 2.458913, ppl: 11.692100 +epoch: 2, batch: 20918, sum loss: 4151.898438, avg loss: 2.614546, ppl: 13.661011 +epoch: 2, batch: 20919, sum loss: 5451.899414, avg loss: 2.811707, ppl: 16.638292 +epoch: 2, batch: 20920, sum loss: 4855.684570, avg loss: 2.663568, ppl: 14.347393 +epoch: 2, batch: 20921, sum loss: 4019.940186, avg loss: 2.316968, ppl: 10.144873 +epoch: 2, batch: 20922, sum loss: 4491.841797, avg loss: 2.740599, ppl: 15.496264 +epoch: 2, batch: 20923, sum loss: 4657.041504, avg loss: 2.568694, ppl: 13.048767 +epoch: 2, batch: 20924, sum loss: 3075.221924, avg loss: 2.215578, ppl: 9.166702 +epoch: 2, batch: 20925, sum loss: 3668.157715, avg loss: 2.400627, ppl: 11.030089 +epoch: 2, batch: 20926, sum loss: 3995.679688, avg loss: 2.527312, ppl: 12.519802 +epoch: 2, batch: 20927, sum loss: 4844.159180, avg loss: 2.849506, ppl: 17.279238 +epoch: 2, batch: 20928, sum loss: 4214.748047, avg loss: 2.482184, ppl: 11.967372 +epoch: 2, batch: 20929, sum loss: 4494.940918, avg loss: 2.744164, ppl: 15.551607 +epoch: 2, batch: 20930, sum loss: 3812.170166, avg loss: 2.290968, ppl: 9.884499 +epoch: 2, batch: 20931, sum loss: 4403.918457, avg loss: 2.512218, ppl: 12.332255 +epoch: 2, batch: 20932, sum loss: 5651.542480, avg loss: 2.751481, ppl: 15.665820 +epoch: 2, batch: 20933, sum loss: 4037.990967, avg loss: 2.575249, ppl: 13.134590 +epoch: 2, batch: 20934, sum loss: 4446.895020, avg loss: 2.552752, ppl: 12.842403 +epoch: 2, batch: 20935, sum loss: 4143.376465, avg loss: 2.537279, ppl: 12.645215 +epoch: 2, batch: 20936, sum loss: 4307.211914, avg loss: 2.532165, ppl: 12.580709 +epoch: 2, batch: 20937, sum loss: 4640.714355, avg loss: 2.537296, ppl: 12.645432 +epoch: 2, batch: 20938, sum loss: 5966.310059, avg loss: 2.775028, ppl: 16.039076 +epoch: 2, batch: 20939, sum loss: 4534.222168, avg loss: 2.805830, ppl: 16.540808 +epoch: 2, batch: 20940, sum loss: 4450.904785, avg loss: 2.560935, ppl: 12.947915 +epoch: 2, batch: 20941, sum loss: 4595.998047, avg loss: 2.429174, ppl: 11.349508 +epoch: 2, batch: 20942, sum loss: 4690.250000, avg loss: 2.522996, ppl: 12.465891 +epoch: 2, batch: 20943, sum loss: 3674.459961, avg loss: 2.479393, ppl: 11.934015 +epoch: 2, batch: 20944, sum loss: 4460.238770, avg loss: 2.505752, ppl: 12.252770 +epoch: 2, batch: 20945, sum loss: 5347.024902, avg loss: 2.669508, ppl: 14.432870 +epoch: 2, batch: 20946, sum loss: 4549.977051, avg loss: 2.610429, ppl: 13.604880 +epoch: 2, batch: 20947, sum loss: 3920.364502, avg loss: 2.457909, ppl: 11.680361 +epoch: 2, batch: 20948, sum loss: 3330.694824, avg loss: 2.253515, ppl: 9.521142 +epoch: 2, batch: 20949, sum loss: 4496.964355, avg loss: 2.400942, ppl: 11.033566 +epoch: 2, batch: 20950, sum loss: 4231.509766, avg loss: 2.575478, ppl: 13.137590 +epoch: 2, batch: 20951, sum loss: 4897.002930, avg loss: 2.833914, ppl: 17.011911 +epoch: 2, batch: 20952, sum loss: 4193.946289, avg loss: 2.524953, ppl: 12.490304 +epoch: 2, batch: 20953, sum loss: 3708.751465, avg loss: 2.461016, ppl: 11.716712 +epoch: 2, batch: 20954, sum loss: 3913.737061, avg loss: 2.453754, ppl: 11.631927 +epoch: 2, batch: 20955, sum loss: 4541.663086, avg loss: 2.749191, ppl: 15.629979 +epoch: 2, batch: 20956, sum loss: 4794.042969, avg loss: 2.669289, ppl: 14.429708 +epoch: 2, batch: 20957, sum loss: 4646.680176, avg loss: 2.614902, ppl: 13.665871 +epoch: 2, batch: 20958, sum loss: 4754.924805, avg loss: 2.517165, ppl: 12.393414 +epoch: 2, batch: 20959, sum loss: 3845.013672, avg loss: 2.435094, ppl: 11.416893 +epoch: 2, batch: 20960, sum loss: 4804.777344, avg loss: 2.579054, ppl: 13.184658 +epoch: 2, batch: 20961, sum loss: 4913.853027, avg loss: 2.714836, ppl: 15.102135 +epoch: 2, batch: 20962, sum loss: 4324.875488, avg loss: 2.498484, ppl: 12.164035 +epoch: 2, batch: 20963, sum loss: 3118.976074, avg loss: 2.176536, ppl: 8.815717 +epoch: 2, batch: 20964, sum loss: 4656.708496, avg loss: 2.565680, ppl: 13.009500 +epoch: 2, batch: 20965, sum loss: 3368.223389, avg loss: 2.233570, ppl: 9.333124 +epoch: 2, batch: 20966, sum loss: 4723.217285, avg loss: 2.691292, ppl: 14.750719 +epoch: 2, batch: 20967, sum loss: 3981.995605, avg loss: 2.501254, ppl: 12.197776 +epoch: 2, batch: 20968, sum loss: 4697.518066, avg loss: 2.712193, ppl: 15.062267 +epoch: 2, batch: 20969, sum loss: 4406.000488, avg loss: 2.532184, ppl: 12.580955 +epoch: 2, batch: 20970, sum loss: 4322.340820, avg loss: 2.378834, ppl: 10.792309 +epoch: 2, batch: 20971, sum loss: 4330.690918, avg loss: 2.486045, ppl: 12.013672 +epoch: 2, batch: 20972, sum loss: 3588.239746, avg loss: 2.275358, ppl: 9.731404 +epoch: 2, batch: 20973, sum loss: 4568.894531, avg loss: 2.558172, ppl: 12.912189 +epoch: 2, batch: 20974, sum loss: 3721.272217, avg loss: 2.382377, ppl: 10.830613 +epoch: 2, batch: 20975, sum loss: 4320.609375, avg loss: 2.773177, ppl: 16.009415 +epoch: 2, batch: 20976, sum loss: 3929.419922, avg loss: 2.395988, ppl: 10.979037 +epoch: 2, batch: 20977, sum loss: 4754.458008, avg loss: 2.586756, ppl: 13.286603 +epoch: 2, batch: 20978, sum loss: 5022.676270, avg loss: 2.593018, ppl: 13.370066 +epoch: 2, batch: 20979, sum loss: 3726.903809, avg loss: 2.521586, ppl: 12.448323 +epoch: 2, batch: 20980, sum loss: 3659.605469, avg loss: 2.090009, ppl: 8.084988 +epoch: 2, batch: 20981, sum loss: 4647.942383, avg loss: 2.602432, ppl: 13.496515 +epoch: 2, batch: 20982, sum loss: 3941.824707, avg loss: 2.490098, ppl: 12.062452 +epoch: 2, batch: 20983, sum loss: 4635.978027, avg loss: 2.736705, ppl: 15.436037 +epoch: 2, batch: 20984, sum loss: 4403.811035, avg loss: 2.349952, ppl: 10.485071 +epoch: 2, batch: 20985, sum loss: 4138.766602, avg loss: 2.573860, ppl: 13.116352 +epoch: 2, batch: 20986, sum loss: 4130.554199, avg loss: 2.567156, ppl: 13.028719 +epoch: 2, batch: 20987, sum loss: 4725.710938, avg loss: 2.691180, ppl: 14.749063 +epoch: 2, batch: 20988, sum loss: 3736.321777, avg loss: 2.325029, ppl: 10.226978 +epoch: 2, batch: 20989, sum loss: 4827.930176, avg loss: 2.736922, ppl: 15.439386 +epoch: 2, batch: 20990, sum loss: 3752.791992, avg loss: 2.604297, ppl: 13.521718 +epoch: 2, batch: 20991, sum loss: 4960.985352, avg loss: 2.821948, ppl: 16.809568 +epoch: 2, batch: 20992, sum loss: 5041.170898, avg loss: 2.582567, ppl: 13.231062 +epoch: 2, batch: 20993, sum loss: 4063.050293, avg loss: 2.504963, ppl: 12.243108 +epoch: 2, batch: 20994, sum loss: 5325.554688, avg loss: 2.718507, ppl: 15.157672 +epoch: 2, batch: 20995, sum loss: 4326.902344, avg loss: 2.372205, ppl: 10.721009 +epoch: 2, batch: 20996, sum loss: 4108.778809, avg loss: 2.312200, ppl: 10.096609 +epoch: 2, batch: 20997, sum loss: 4664.543457, avg loss: 2.779823, ppl: 16.116173 +epoch: 2, batch: 20998, sum loss: 4362.589844, avg loss: 2.582942, ppl: 13.236028 +epoch: 2, batch: 20999, sum loss: 5230.303223, avg loss: 2.572702, ppl: 13.101175 +epoch: 2, batch: 21000, sum loss: 4255.421387, avg loss: 2.666304, ppl: 14.386699 +epoch: 2, batch: 21001, sum loss: 3759.995850, avg loss: 2.435231, ppl: 11.418453 +epoch: 2, batch: 21002, sum loss: 3697.517578, avg loss: 2.255959, ppl: 9.544445 +epoch: 2, batch: 21003, sum loss: 4352.546875, avg loss: 2.606316, ppl: 13.549039 +epoch: 2, batch: 21004, sum loss: 3279.892578, avg loss: 2.436770, ppl: 11.436042 +epoch: 2, batch: 21005, sum loss: 3944.294434, avg loss: 2.521927, ppl: 12.452574 +epoch: 2, batch: 21006, sum loss: 4770.715820, avg loss: 2.614091, ppl: 13.654798 +epoch: 2, batch: 21007, sum loss: 5510.150391, avg loss: 2.753698, ppl: 15.700591 +epoch: 2, batch: 21008, sum loss: 4421.304199, avg loss: 2.803617, ppl: 16.504238 +epoch: 2, batch: 21009, sum loss: 3707.075684, avg loss: 2.526977, ppl: 12.515615 +epoch: 2, batch: 21010, sum loss: 4428.974121, avg loss: 2.600689, ppl: 13.473023 +epoch: 2, batch: 21011, sum loss: 3864.269043, avg loss: 2.367812, ppl: 10.674011 +epoch: 2, batch: 21012, sum loss: 3786.064941, avg loss: 2.505668, ppl: 12.251745 +epoch: 2, batch: 21013, sum loss: 4349.163086, avg loss: 2.462720, ppl: 11.736689 +epoch: 2, batch: 21014, sum loss: 4599.213867, avg loss: 2.436024, ppl: 11.427517 +epoch: 2, batch: 21015, sum loss: 4417.852051, avg loss: 2.643837, ppl: 14.067081 +epoch: 2, batch: 21016, sum loss: 4172.834473, avg loss: 2.601518, ppl: 13.484190 +epoch: 2, batch: 21017, sum loss: 3563.265137, avg loss: 2.498783, ppl: 12.167681 +epoch: 2, batch: 21018, sum loss: 3626.048828, avg loss: 2.145591, ppl: 8.547091 +epoch: 2, batch: 21019, sum loss: 4243.212891, avg loss: 2.619267, ppl: 13.725662 +epoch: 2, batch: 21020, sum loss: 3362.010742, avg loss: 2.416974, ppl: 11.211882 +epoch: 2, batch: 21021, sum loss: 4511.390137, avg loss: 2.577937, ppl: 13.169942 +epoch: 2, batch: 21022, sum loss: 4819.733398, avg loss: 2.466599, ppl: 11.782301 +epoch: 2, batch: 21023, sum loss: 3879.969238, avg loss: 2.350072, ppl: 10.486329 +epoch: 2, batch: 21024, sum loss: 4539.969727, avg loss: 2.570764, ppl: 13.075814 +epoch: 2, batch: 21025, sum loss: 4839.250000, avg loss: 2.371019, ppl: 10.708300 +epoch: 2, batch: 21026, sum loss: 4694.917480, avg loss: 2.468411, ppl: 11.803676 +epoch: 2, batch: 21027, sum loss: 3830.043457, avg loss: 2.351162, ppl: 10.497763 +epoch: 2, batch: 21028, sum loss: 4386.460938, avg loss: 2.516616, ppl: 12.386604 +epoch: 2, batch: 21029, sum loss: 4893.208496, avg loss: 2.817046, ppl: 16.727364 +epoch: 2, batch: 21030, sum loss: 5656.520996, avg loss: 2.908237, ppl: 18.324469 +epoch: 2, batch: 21031, sum loss: 4386.523926, avg loss: 2.590977, ppl: 13.342800 +epoch: 2, batch: 21032, sum loss: 4034.732910, avg loss: 2.560110, ppl: 12.937236 +epoch: 2, batch: 21033, sum loss: 4842.327148, avg loss: 2.329162, ppl: 10.269328 +epoch: 2, batch: 21034, sum loss: 4256.492188, avg loss: 2.542708, ppl: 12.714050 +epoch: 2, batch: 21035, sum loss: 5243.524902, avg loss: 2.807026, ppl: 16.560596 +epoch: 2, batch: 21036, sum loss: 3782.036133, avg loss: 2.274225, ppl: 9.720383 +epoch: 2, batch: 21037, sum loss: 4613.770508, avg loss: 2.594922, ppl: 13.395537 +epoch: 2, batch: 21038, sum loss: 4696.683105, avg loss: 2.567897, ppl: 13.038374 +epoch: 2, batch: 21039, sum loss: 3821.408691, avg loss: 2.507486, ppl: 12.274035 +epoch: 2, batch: 21040, sum loss: 4347.106445, avg loss: 2.481225, ppl: 11.955904 +epoch: 2, batch: 21041, sum loss: 4018.078613, avg loss: 2.460550, ppl: 11.711254 +epoch: 2, batch: 21042, sum loss: 4027.430176, avg loss: 2.492222, ppl: 12.088101 +epoch: 2, batch: 21043, sum loss: 3952.194580, avg loss: 2.382275, ppl: 10.829514 +epoch: 2, batch: 21044, sum loss: 4005.349121, avg loss: 2.570827, ppl: 13.076641 +epoch: 2, batch: 21045, sum loss: 4466.309082, avg loss: 2.445952, ppl: 11.541536 +epoch: 2, batch: 21046, sum loss: 4888.705078, avg loss: 2.594854, ppl: 13.394633 +epoch: 2, batch: 21047, sum loss: 4564.184570, avg loss: 2.314495, ppl: 10.119812 +epoch: 2, batch: 21048, sum loss: 4626.709961, avg loss: 2.739319, ppl: 15.476443 +epoch: 2, batch: 21049, sum loss: 3712.656494, avg loss: 2.269350, ppl: 9.673112 +epoch: 2, batch: 21050, sum loss: 3788.231445, avg loss: 2.246875, ppl: 9.458136 +epoch: 2, batch: 21051, sum loss: 4849.814453, avg loss: 2.594871, ppl: 13.394863 +epoch: 2, batch: 21052, sum loss: 4419.785645, avg loss: 2.537190, ppl: 12.644096 +epoch: 2, batch: 21053, sum loss: 4031.306641, avg loss: 2.496165, ppl: 12.135864 +epoch: 2, batch: 21054, sum loss: 3175.855713, avg loss: 2.208523, ppl: 9.102261 +epoch: 2, batch: 21055, sum loss: 4290.180664, avg loss: 2.570510, ppl: 13.072485 +epoch: 2, batch: 21056, sum loss: 4204.868652, avg loss: 2.571785, ppl: 13.089168 +epoch: 2, batch: 21057, sum loss: 3683.666260, avg loss: 2.470601, ppl: 11.829558 +epoch: 2, batch: 21058, sum loss: 3881.841797, avg loss: 2.384424, ppl: 10.852807 +epoch: 2, batch: 21059, sum loss: 4716.405762, avg loss: 2.542537, ppl: 12.711877 +epoch: 2, batch: 21060, sum loss: 3603.262695, avg loss: 2.117076, ppl: 8.306810 +epoch: 2, batch: 21061, sum loss: 3895.167480, avg loss: 2.455969, ppl: 11.657728 +epoch: 2, batch: 21062, sum loss: 3094.204102, avg loss: 2.263500, ppl: 9.616686 +epoch: 2, batch: 21063, sum loss: 4949.150879, avg loss: 2.655124, ppl: 14.226749 +epoch: 2, batch: 21064, sum loss: 4401.852051, avg loss: 2.854638, ppl: 17.368151 +epoch: 2, batch: 21065, sum loss: 4217.871582, avg loss: 2.548563, ppl: 12.788713 +epoch: 2, batch: 21066, sum loss: 3994.591553, avg loss: 2.404932, ppl: 11.077675 +epoch: 2, batch: 21067, sum loss: 4073.736328, avg loss: 2.389288, ppl: 10.905728 +epoch: 2, batch: 21068, sum loss: 4482.641602, avg loss: 2.641510, ppl: 14.034373 +epoch: 2, batch: 21069, sum loss: 4371.654785, avg loss: 2.475456, ppl: 11.887123 +epoch: 2, batch: 21070, sum loss: 4198.673340, avg loss: 2.569568, ppl: 13.060177 +epoch: 2, batch: 21071, sum loss: 4536.881348, avg loss: 2.409390, ppl: 11.127172 +epoch: 2, batch: 21072, sum loss: 3215.783936, avg loss: 2.310189, ppl: 10.076334 +epoch: 2, batch: 21073, sum loss: 4081.527832, avg loss: 2.324332, ppl: 10.219856 +epoch: 2, batch: 21074, sum loss: 4730.840332, avg loss: 2.686451, ppl: 14.679485 +epoch: 2, batch: 21075, sum loss: 3409.526123, avg loss: 2.237222, ppl: 9.367272 +epoch: 2, batch: 21076, sum loss: 4458.583008, avg loss: 2.461946, ppl: 11.727606 +epoch: 2, batch: 21077, sum loss: 4454.132812, avg loss: 2.676763, ppl: 14.537951 +epoch: 2, batch: 21078, sum loss: 5024.767090, avg loss: 2.644614, ppl: 14.078016 +epoch: 2, batch: 21079, sum loss: 4021.678711, avg loss: 2.292861, ppl: 9.903233 +epoch: 2, batch: 21080, sum loss: 3455.118896, avg loss: 2.297287, ppl: 9.947154 +epoch: 2, batch: 21081, sum loss: 3452.619141, avg loss: 2.428002, ppl: 11.336211 +epoch: 2, batch: 21082, sum loss: 4092.139160, avg loss: 2.384696, ppl: 10.855767 +epoch: 2, batch: 21083, sum loss: 3806.951660, avg loss: 2.283714, ppl: 9.813062 +epoch: 2, batch: 21084, sum loss: 4513.731934, avg loss: 2.777681, ppl: 16.081686 +epoch: 2, batch: 21085, sum loss: 4745.143066, avg loss: 2.607222, ppl: 13.561319 +epoch: 2, batch: 21086, sum loss: 4638.476562, avg loss: 2.557043, ppl: 12.897627 +epoch: 2, batch: 21087, sum loss: 4679.833008, avg loss: 2.703543, ppl: 14.932544 +epoch: 2, batch: 21088, sum loss: 4046.706055, avg loss: 2.303191, ppl: 10.006058 +epoch: 2, batch: 21089, sum loss: 4425.783691, avg loss: 2.679046, ppl: 14.571184 +epoch: 2, batch: 21090, sum loss: 4176.788574, avg loss: 2.537539, ppl: 12.648501 +epoch: 2, batch: 21091, sum loss: 4325.239258, avg loss: 2.535310, ppl: 12.620346 +epoch: 2, batch: 21092, sum loss: 4620.140625, avg loss: 2.412606, ppl: 11.163014 +epoch: 2, batch: 21093, sum loss: 3751.014648, avg loss: 2.258287, ppl: 9.566687 +epoch: 2, batch: 21094, sum loss: 5059.254883, avg loss: 2.601159, ppl: 13.479356 +epoch: 2, batch: 21095, sum loss: 4062.915039, avg loss: 2.542500, ppl: 12.711411 +epoch: 2, batch: 21096, sum loss: 4477.250977, avg loss: 2.548236, ppl: 12.784534 +epoch: 2, batch: 21097, sum loss: 5720.912109, avg loss: 2.704923, ppl: 14.953164 +epoch: 2, batch: 21098, sum loss: 4704.267578, avg loss: 2.586183, ppl: 13.278993 +epoch: 2, batch: 21099, sum loss: 4636.416504, avg loss: 2.349932, ppl: 10.484861 +epoch: 2, batch: 21100, sum loss: 4535.548828, avg loss: 2.588784, ppl: 13.313569 +epoch: 2, batch: 21101, sum loss: 4260.131348, avg loss: 2.577212, ppl: 13.160394 +epoch: 2, batch: 21102, sum loss: 4387.390625, avg loss: 2.538999, ppl: 12.666989 +epoch: 2, batch: 21103, sum loss: 4340.204590, avg loss: 2.400556, ppl: 11.029303 +epoch: 2, batch: 21104, sum loss: 4812.780762, avg loss: 2.883631, ppl: 17.879084 +epoch: 2, batch: 21105, sum loss: 3222.434570, avg loss: 2.045990, ppl: 7.736816 +epoch: 2, batch: 21106, sum loss: 4368.471680, avg loss: 2.383236, ppl: 10.839926 +epoch: 2, batch: 21107, sum loss: 4806.944824, avg loss: 2.720399, ppl: 15.186378 +epoch: 2, batch: 21108, sum loss: 3567.887207, avg loss: 2.389744, ppl: 10.910696 +epoch: 2, batch: 21109, sum loss: 4155.847656, avg loss: 2.325600, ppl: 10.232821 +epoch: 2, batch: 21110, sum loss: 5016.879883, avg loss: 2.654434, ppl: 14.216933 +epoch: 2, batch: 21111, sum loss: 4149.390625, avg loss: 2.631193, ppl: 13.890326 +epoch: 2, batch: 21112, sum loss: 4439.665527, avg loss: 2.560361, ppl: 12.940484 +epoch: 2, batch: 21113, sum loss: 5032.812500, avg loss: 2.723383, ppl: 15.231771 +epoch: 2, batch: 21114, sum loss: 4166.751953, avg loss: 2.493568, ppl: 12.104387 +epoch: 2, batch: 21115, sum loss: 4274.220703, avg loss: 2.700076, ppl: 14.880864 +epoch: 2, batch: 21116, sum loss: 5212.570312, avg loss: 2.631282, ppl: 13.891569 +epoch: 2, batch: 21117, sum loss: 4292.908691, avg loss: 2.492978, ppl: 12.097252 +epoch: 2, batch: 21118, sum loss: 4334.053223, avg loss: 2.349080, ppl: 10.475931 +epoch: 2, batch: 21119, sum loss: 4645.111328, avg loss: 2.669604, ppl: 14.434257 +epoch: 2, batch: 21120, sum loss: 4400.032715, avg loss: 2.558159, ppl: 12.912020 +epoch: 2, batch: 21121, sum loss: 3822.168945, avg loss: 2.280530, ppl: 9.781868 +epoch: 2, batch: 21122, sum loss: 3416.819580, avg loss: 2.308662, ppl: 10.060953 +epoch: 2, batch: 21123, sum loss: 5833.926270, avg loss: 2.772779, ppl: 16.003036 +epoch: 2, batch: 21124, sum loss: 4355.871582, avg loss: 2.313262, ppl: 10.107336 +epoch: 2, batch: 21125, sum loss: 3635.856445, avg loss: 2.280964, ppl: 9.786109 +epoch: 2, batch: 21126, sum loss: 4585.191406, avg loss: 2.517952, ppl: 12.403174 +epoch: 2, batch: 21127, sum loss: 4213.662598, avg loss: 2.618808, ppl: 13.719364 +epoch: 2, batch: 21128, sum loss: 5276.140137, avg loss: 2.605501, ppl: 13.538012 +epoch: 2, batch: 21129, sum loss: 3867.517578, avg loss: 2.321439, ppl: 10.190328 +epoch: 2, batch: 21130, sum loss: 4468.473145, avg loss: 2.516032, ppl: 12.379380 +epoch: 2, batch: 21131, sum loss: 4034.385254, avg loss: 2.478124, ppl: 11.918880 +epoch: 2, batch: 21132, sum loss: 4457.328613, avg loss: 2.419831, ppl: 11.243959 +epoch: 2, batch: 21133, sum loss: 5538.477539, avg loss: 2.977676, ppl: 19.642118 +epoch: 2, batch: 21134, sum loss: 3440.526123, avg loss: 2.147644, ppl: 8.564659 +epoch: 2, batch: 21135, sum loss: 3981.368652, avg loss: 2.480603, ppl: 11.948473 +epoch: 2, batch: 21136, sum loss: 4902.790527, avg loss: 2.763692, ppl: 15.858290 +epoch: 2, batch: 21137, sum loss: 3881.930176, avg loss: 2.402184, ppl: 11.047282 +epoch: 2, batch: 21138, sum loss: 4681.593262, avg loss: 2.562449, ppl: 12.967529 +epoch: 2, batch: 21139, sum loss: 4892.247559, avg loss: 2.645888, ppl: 14.095962 +epoch: 2, batch: 21140, sum loss: 3540.401611, avg loss: 2.336899, ppl: 10.349092 +epoch: 2, batch: 21141, sum loss: 4594.829102, avg loss: 2.613668, ppl: 13.649030 +epoch: 2, batch: 21142, sum loss: 4980.329590, avg loss: 2.694983, ppl: 14.805274 +epoch: 2, batch: 21143, sum loss: 3646.270752, avg loss: 2.209861, ppl: 9.114450 +epoch: 2, batch: 21144, sum loss: 4061.110596, avg loss: 2.549348, ppl: 12.798752 +epoch: 2, batch: 21145, sum loss: 4546.520996, avg loss: 2.587661, ppl: 13.298634 +epoch: 2, batch: 21146, sum loss: 4072.549561, avg loss: 2.527964, ppl: 12.527971 +epoch: 2, batch: 21147, sum loss: 4610.179688, avg loss: 2.558368, ppl: 12.914729 +epoch: 2, batch: 21148, sum loss: 4597.173828, avg loss: 2.409420, ppl: 11.127508 +epoch: 2, batch: 21149, sum loss: 4478.073242, avg loss: 2.451053, ppl: 11.600552 +epoch: 2, batch: 21150, sum loss: 4214.104004, avg loss: 2.619083, ppl: 13.723129 +epoch: 2, batch: 21151, sum loss: 3480.391113, avg loss: 2.400270, ppl: 11.026151 +epoch: 2, batch: 21152, sum loss: 4807.994141, avg loss: 2.856800, ppl: 17.405737 +epoch: 2, batch: 21153, sum loss: 4135.169434, avg loss: 2.435318, ppl: 11.419447 +epoch: 2, batch: 21154, sum loss: 5074.824219, avg loss: 2.509804, ppl: 12.302522 +epoch: 2, batch: 21155, sum loss: 4799.321289, avg loss: 2.503558, ppl: 12.225922 +epoch: 2, batch: 21156, sum loss: 4476.301758, avg loss: 2.608567, ppl: 13.579584 +epoch: 2, batch: 21157, sum loss: 4472.450195, avg loss: 2.469603, ppl: 11.817752 +epoch: 2, batch: 21158, sum loss: 4060.971680, avg loss: 2.372063, ppl: 10.719483 +epoch: 2, batch: 21159, sum loss: 3559.700928, avg loss: 2.421565, ppl: 11.263476 +epoch: 2, batch: 21160, sum loss: 4101.923828, avg loss: 2.468065, ppl: 11.799593 +epoch: 2, batch: 21161, sum loss: 4632.058594, avg loss: 2.634845, ppl: 13.941158 +epoch: 2, batch: 21162, sum loss: 4689.580566, avg loss: 2.562613, ppl: 12.969657 +epoch: 2, batch: 21163, sum loss: 4261.886719, avg loss: 2.699105, ppl: 14.866421 +epoch: 2, batch: 21164, sum loss: 3565.397461, avg loss: 2.410681, ppl: 11.141546 +epoch: 2, batch: 21165, sum loss: 4217.135254, avg loss: 2.425035, ppl: 11.302622 +epoch: 2, batch: 21166, sum loss: 4322.729004, avg loss: 2.597794, ppl: 13.434071 +epoch: 2, batch: 21167, sum loss: 3562.422852, avg loss: 2.416841, ppl: 11.210384 +epoch: 2, batch: 21168, sum loss: 3289.718750, avg loss: 2.184408, ppl: 8.885388 +epoch: 2, batch: 21169, sum loss: 4672.346680, avg loss: 2.578558, ppl: 13.178118 +epoch: 2, batch: 21170, sum loss: 3420.181885, avg loss: 2.331412, ppl: 10.292467 +epoch: 2, batch: 21171, sum loss: 4091.902832, avg loss: 2.570291, ppl: 13.069625 +epoch: 2, batch: 21172, sum loss: 5048.341797, avg loss: 2.644495, ppl: 14.076342 +epoch: 2, batch: 21173, sum loss: 4941.528809, avg loss: 2.752941, ppl: 15.688703 +epoch: 2, batch: 21174, sum loss: 4703.870117, avg loss: 2.581707, ppl: 13.219685 +epoch: 2, batch: 21175, sum loss: 4420.967773, avg loss: 2.588388, ppl: 13.308308 +epoch: 2, batch: 21176, sum loss: 4701.792480, avg loss: 2.442489, ppl: 11.501637 +epoch: 2, batch: 21177, sum loss: 4470.650391, avg loss: 2.623621, ppl: 13.785554 +epoch: 2, batch: 21178, sum loss: 4521.899414, avg loss: 2.610796, ppl: 13.609886 +epoch: 2, batch: 21179, sum loss: 3612.827637, avg loss: 2.477934, ppl: 11.916617 +epoch: 2, batch: 21180, sum loss: 4928.720703, avg loss: 2.547142, ppl: 12.770556 +epoch: 2, batch: 21181, sum loss: 4045.738770, avg loss: 2.562216, ppl: 12.964512 +epoch: 2, batch: 21182, sum loss: 4242.705566, avg loss: 2.354443, ppl: 10.532256 +epoch: 2, batch: 21183, sum loss: 4158.609863, avg loss: 2.452011, ppl: 11.611670 +epoch: 2, batch: 21184, sum loss: 4232.888184, avg loss: 2.737961, ppl: 15.455443 +epoch: 2, batch: 21185, sum loss: 3962.117676, avg loss: 2.691656, ppl: 14.756093 +epoch: 2, batch: 21186, sum loss: 4365.211914, avg loss: 2.425118, ppl: 11.303560 +epoch: 2, batch: 21187, sum loss: 4209.130371, avg loss: 2.747474, ppl: 15.603171 +epoch: 2, batch: 21188, sum loss: 5232.030762, avg loss: 2.752252, ppl: 15.677896 +epoch: 2, batch: 21189, sum loss: 4446.657715, avg loss: 2.546769, ppl: 12.765786 +epoch: 2, batch: 21190, sum loss: 4196.145996, avg loss: 2.543119, ppl: 12.719277 +epoch: 2, batch: 21191, sum loss: 4951.610840, avg loss: 2.636641, ppl: 13.966206 +epoch: 2, batch: 21192, sum loss: 4318.457031, avg loss: 2.526891, ppl: 12.514541 +epoch: 2, batch: 21193, sum loss: 4745.507812, avg loss: 2.540422, ppl: 12.685019 +epoch: 2, batch: 21194, sum loss: 3950.732422, avg loss: 2.461515, ppl: 11.722563 +epoch: 2, batch: 21195, sum loss: 4387.625488, avg loss: 2.614795, ppl: 13.664412 +epoch: 2, batch: 21196, sum loss: 4230.315918, avg loss: 2.470979, ppl: 11.834023 +epoch: 2, batch: 21197, sum loss: 4302.957031, avg loss: 2.576621, ppl: 13.152621 +epoch: 2, batch: 21198, sum loss: 4293.791016, avg loss: 2.436885, ppl: 11.437356 +epoch: 2, batch: 21199, sum loss: 3563.983887, avg loss: 2.464719, ppl: 11.760180 +epoch: 2, batch: 21200, sum loss: 4263.144043, avg loss: 2.579034, ppl: 13.184401 +epoch: 2, batch: 21201, sum loss: 4433.556152, avg loss: 2.362044, ppl: 10.612620 +epoch: 2, batch: 21202, sum loss: 4496.883789, avg loss: 2.316787, ppl: 10.143032 +epoch: 2, batch: 21203, sum loss: 3135.017090, avg loss: 2.284998, ppl: 9.825666 +epoch: 2, batch: 21204, sum loss: 3912.524170, avg loss: 2.490468, ppl: 12.066916 +epoch: 2, batch: 21205, sum loss: 4274.394043, avg loss: 2.412186, ppl: 11.158331 +epoch: 2, batch: 21206, sum loss: 4294.650391, avg loss: 2.331515, ppl: 10.293525 +epoch: 2, batch: 21207, sum loss: 3506.811523, avg loss: 2.383964, ppl: 10.847822 +epoch: 2, batch: 21208, sum loss: 4155.026855, avg loss: 2.563249, ppl: 12.977916 +epoch: 2, batch: 21209, sum loss: 3755.517578, avg loss: 2.659715, ppl: 14.292214 +epoch: 2, batch: 21210, sum loss: 3645.245850, avg loss: 2.387194, ppl: 10.882918 +epoch: 2, batch: 21211, sum loss: 4643.291504, avg loss: 2.737790, ppl: 15.452795 +epoch: 2, batch: 21212, sum loss: 4631.588379, avg loss: 2.699061, ppl: 14.865765 +epoch: 2, batch: 21213, sum loss: 4171.054199, avg loss: 2.429269, ppl: 11.350577 +epoch: 2, batch: 21214, sum loss: 4326.492676, avg loss: 2.575293, ppl: 13.135169 +epoch: 2, batch: 21215, sum loss: 5016.205078, avg loss: 2.568461, ppl: 13.045737 +epoch: 2, batch: 21216, sum loss: 4492.010742, avg loss: 2.473574, ppl: 11.864778 +epoch: 2, batch: 21217, sum loss: 4731.690918, avg loss: 2.749385, ppl: 15.633009 +epoch: 2, batch: 21218, sum loss: 3936.027832, avg loss: 2.564188, ppl: 12.990100 +epoch: 2, batch: 21219, sum loss: 4626.420898, avg loss: 2.434958, ppl: 11.415344 +epoch: 2, batch: 21220, sum loss: 4283.873047, avg loss: 2.662444, ppl: 14.331277 +epoch: 2, batch: 21221, sum loss: 3567.246094, avg loss: 2.395733, ppl: 10.976236 +epoch: 2, batch: 21222, sum loss: 4274.191406, avg loss: 2.441000, ppl: 11.484525 +epoch: 2, batch: 21223, sum loss: 3655.410156, avg loss: 2.406458, ppl: 11.094596 +epoch: 2, batch: 21224, sum loss: 4371.555664, avg loss: 2.657480, ppl: 14.260315 +epoch: 2, batch: 21225, sum loss: 4525.322754, avg loss: 2.493291, ppl: 12.101034 +epoch: 2, batch: 21226, sum loss: 4319.264160, avg loss: 2.713106, ppl: 15.076024 +epoch: 2, batch: 21227, sum loss: 4186.097168, avg loss: 2.429540, ppl: 11.353658 +epoch: 2, batch: 21228, sum loss: 5445.583008, avg loss: 2.815710, ppl: 16.705029 +epoch: 2, batch: 21229, sum loss: 4662.044922, avg loss: 2.657950, ppl: 14.267017 +epoch: 2, batch: 21230, sum loss: 3370.366699, avg loss: 2.407405, ppl: 11.105103 +epoch: 2, batch: 21231, sum loss: 4751.785645, avg loss: 2.723086, ppl: 15.227246 +epoch: 2, batch: 21232, sum loss: 3877.780762, avg loss: 2.315093, ppl: 10.125865 +epoch: 2, batch: 21233, sum loss: 4594.660156, avg loss: 2.597321, ppl: 13.427714 +epoch: 2, batch: 21234, sum loss: 4622.666992, avg loss: 2.489320, ppl: 12.053075 +epoch: 2, batch: 21235, sum loss: 4528.512695, avg loss: 2.581820, ppl: 13.221182 +epoch: 2, batch: 21236, sum loss: 4533.152344, avg loss: 2.559657, ppl: 12.931379 +epoch: 2, batch: 21237, sum loss: 4222.389648, avg loss: 2.528377, ppl: 12.533149 +epoch: 2, batch: 21238, sum loss: 5149.266602, avg loss: 2.774389, ppl: 16.028835 +epoch: 2, batch: 21239, sum loss: 5124.427246, avg loss: 2.777467, ppl: 16.078247 +epoch: 2, batch: 21240, sum loss: 4161.041016, avg loss: 2.560641, ppl: 12.944106 +epoch: 2, batch: 21241, sum loss: 4998.488281, avg loss: 2.665860, ppl: 14.380318 +epoch: 2, batch: 21242, sum loss: 4390.077148, avg loss: 2.555342, ppl: 12.875699 +epoch: 2, batch: 21243, sum loss: 3924.777832, avg loss: 2.555194, ppl: 12.873796 +epoch: 2, batch: 21244, sum loss: 4098.263672, avg loss: 2.430761, ppl: 11.367534 +epoch: 2, batch: 21245, sum loss: 4478.874023, avg loss: 2.591941, ppl: 13.355671 +epoch: 2, batch: 21246, sum loss: 4468.246094, avg loss: 2.438999, ppl: 11.461561 +epoch: 2, batch: 21247, sum loss: 3952.639893, avg loss: 2.621114, ppl: 13.751034 +epoch: 2, batch: 21248, sum loss: 3916.334473, avg loss: 2.202663, ppl: 9.049079 +epoch: 2, batch: 21249, sum loss: 3849.490234, avg loss: 2.427169, ppl: 11.326772 +epoch: 2, batch: 21250, sum loss: 4334.621582, avg loss: 2.595582, ppl: 13.404384 +epoch: 2, batch: 21251, sum loss: 5096.322266, avg loss: 2.579111, ppl: 13.185406 +epoch: 2, batch: 21252, sum loss: 3732.558105, avg loss: 2.366873, ppl: 10.663989 +epoch: 2, batch: 21253, sum loss: 4209.114258, avg loss: 2.577535, ppl: 13.164643 +epoch: 2, batch: 21254, sum loss: 3535.405518, avg loss: 2.539803, ppl: 12.677171 +epoch: 2, batch: 21255, sum loss: 3460.554443, avg loss: 2.296320, ppl: 9.937547 +epoch: 2, batch: 21256, sum loss: 5406.856934, avg loss: 2.654323, ppl: 14.215364 +epoch: 2, batch: 21257, sum loss: 4839.959961, avg loss: 2.567618, ppl: 13.034740 +epoch: 2, batch: 21258, sum loss: 4522.994629, avg loss: 2.618989, ppl: 13.721850 +epoch: 2, batch: 21259, sum loss: 4602.056641, avg loss: 2.555279, ppl: 12.874886 +epoch: 2, batch: 21260, sum loss: 3738.956787, avg loss: 2.580371, ppl: 13.202031 +epoch: 2, batch: 21261, sum loss: 4581.713379, avg loss: 2.446190, ppl: 11.544278 +epoch: 2, batch: 21262, sum loss: 4777.611816, avg loss: 2.497445, ppl: 12.151402 +epoch: 2, batch: 21263, sum loss: 4151.179199, avg loss: 2.366693, ppl: 10.662072 +epoch: 2, batch: 21264, sum loss: 3562.656494, avg loss: 2.477508, ppl: 11.911545 +epoch: 2, batch: 21265, sum loss: 4336.856445, avg loss: 2.715627, ppl: 15.114086 +epoch: 2, batch: 21266, sum loss: 4084.266113, avg loss: 2.552666, ppl: 12.841295 +epoch: 2, batch: 21267, sum loss: 4738.187500, avg loss: 2.815322, ppl: 16.698555 +epoch: 2, batch: 21268, sum loss: 4187.772461, avg loss: 2.443274, ppl: 11.510671 +epoch: 2, batch: 21269, sum loss: 3858.087891, avg loss: 2.462086, ppl: 11.729250 +epoch: 2, batch: 21270, sum loss: 4766.990723, avg loss: 2.554657, ppl: 12.866888 +epoch: 2, batch: 21271, sum loss: 4590.054199, avg loss: 2.780166, ppl: 16.121698 +epoch: 2, batch: 21272, sum loss: 4201.577148, avg loss: 2.660910, ppl: 14.309306 +epoch: 2, batch: 21273, sum loss: 3739.515625, avg loss: 2.305497, ppl: 10.029159 +epoch: 2, batch: 21274, sum loss: 3579.662598, avg loss: 2.463636, ppl: 11.747444 +epoch: 2, batch: 21275, sum loss: 4634.728516, avg loss: 2.677486, ppl: 14.548471 +epoch: 2, batch: 21276, sum loss: 3417.000977, avg loss: 2.307225, ppl: 10.046512 +epoch: 2, batch: 21277, sum loss: 4211.839844, avg loss: 2.713814, ppl: 15.086714 +epoch: 2, batch: 21278, sum loss: 4660.810059, avg loss: 2.711350, ppl: 15.049575 +epoch: 2, batch: 21279, sum loss: 3574.555664, avg loss: 2.405488, ppl: 11.083840 +epoch: 2, batch: 21280, sum loss: 3804.432861, avg loss: 2.321191, ppl: 10.187804 +epoch: 2, batch: 21281, sum loss: 3897.841553, avg loss: 2.664280, ppl: 14.357611 +epoch: 2, batch: 21282, sum loss: 4767.462891, avg loss: 2.510512, ppl: 12.311236 +epoch: 2, batch: 21283, sum loss: 4019.286621, avg loss: 2.465820, ppl: 11.773133 +epoch: 2, batch: 21284, sum loss: 4319.360352, avg loss: 2.677843, ppl: 14.553665 +epoch: 2, batch: 21285, sum loss: 5543.812988, avg loss: 2.838614, ppl: 17.092060 +epoch: 2, batch: 21286, sum loss: 3641.801758, avg loss: 2.207153, ppl: 9.089797 +epoch: 2, batch: 21287, sum loss: 4242.505859, avg loss: 2.555727, ppl: 12.880654 +epoch: 2, batch: 21288, sum loss: 4377.214355, avg loss: 2.395848, ppl: 10.977504 +epoch: 2, batch: 21289, sum loss: 4249.960938, avg loss: 2.575734, ppl: 13.140958 +epoch: 2, batch: 21290, sum loss: 3880.548340, avg loss: 2.250898, ppl: 9.496264 +epoch: 2, batch: 21291, sum loss: 4382.135742, avg loss: 2.639841, ppl: 14.010974 +epoch: 2, batch: 21292, sum loss: 3521.654297, avg loss: 2.216271, ppl: 9.173058 +epoch: 2, batch: 21293, sum loss: 4478.443359, avg loss: 2.523067, ppl: 12.466767 +epoch: 2, batch: 21294, sum loss: 4117.288086, avg loss: 2.543106, ppl: 12.719113 +epoch: 2, batch: 21295, sum loss: 3710.670410, avg loss: 2.239391, ppl: 9.387609 +epoch: 2, batch: 21296, sum loss: 5127.268066, avg loss: 2.579108, ppl: 13.185378 +epoch: 2, batch: 21297, sum loss: 3915.731689, avg loss: 2.494097, ppl: 12.110789 +epoch: 2, batch: 21298, sum loss: 4510.953613, avg loss: 2.479908, ppl: 11.940171 +epoch: 2, batch: 21299, sum loss: 3929.990234, avg loss: 2.535478, ppl: 12.622458 +epoch: 2, batch: 21300, sum loss: 3985.981689, avg loss: 2.357174, ppl: 10.561068 +epoch: 2, batch: 21301, sum loss: 4184.742676, avg loss: 2.523970, ppl: 12.478041 +epoch: 2, batch: 21302, sum loss: 4589.453613, avg loss: 2.459514, ppl: 11.699126 +epoch: 2, batch: 21303, sum loss: 4298.226074, avg loss: 2.576874, ppl: 13.155951 +epoch: 2, batch: 21304, sum loss: 3925.393066, avg loss: 2.439648, ppl: 11.468999 +epoch: 2, batch: 21305, sum loss: 4629.163086, avg loss: 2.519958, ppl: 12.428078 +epoch: 2, batch: 21306, sum loss: 2983.062012, avg loss: 2.195042, ppl: 8.980377 +epoch: 2, batch: 21307, sum loss: 4401.568848, avg loss: 2.541322, ppl: 12.696439 +epoch: 2, batch: 21308, sum loss: 3592.808594, avg loss: 2.161738, ppl: 8.686223 +epoch: 2, batch: 21309, sum loss: 4567.246582, avg loss: 2.784906, ppl: 16.198301 +epoch: 2, batch: 21310, sum loss: 3922.739502, avg loss: 2.384644, ppl: 10.855198 +epoch: 2, batch: 21311, sum loss: 4203.347168, avg loss: 2.400541, ppl: 11.029142 +epoch: 2, batch: 21312, sum loss: 5378.000000, avg loss: 2.653182, ppl: 14.199149 +epoch: 2, batch: 21313, sum loss: 4282.743164, avg loss: 2.495771, ppl: 12.131085 +epoch: 2, batch: 21314, sum loss: 3140.575928, avg loss: 2.279083, ppl: 9.767717 +epoch: 2, batch: 21315, sum loss: 4602.924805, avg loss: 2.538845, ppl: 12.665030 +epoch: 2, batch: 21316, sum loss: 3391.113281, avg loss: 2.434396, ppl: 11.408923 +epoch: 2, batch: 21317, sum loss: 3512.997070, avg loss: 2.188783, ppl: 8.924347 +epoch: 2, batch: 21318, sum loss: 4052.623535, avg loss: 2.484748, ppl: 11.998092 +epoch: 2, batch: 21319, sum loss: 4331.307129, avg loss: 2.479283, ppl: 11.932704 +epoch: 2, batch: 21320, sum loss: 3830.662109, avg loss: 2.335770, ppl: 10.337413 +epoch: 2, batch: 21321, sum loss: 5037.681152, avg loss: 2.789414, ppl: 16.271477 +epoch: 2, batch: 21322, sum loss: 5240.112793, avg loss: 2.572466, ppl: 13.098080 +epoch: 2, batch: 21323, sum loss: 3738.039795, avg loss: 2.352448, ppl: 10.511270 +epoch: 2, batch: 21324, sum loss: 3952.049316, avg loss: 2.655947, ppl: 14.238466 +epoch: 2, batch: 21325, sum loss: 4731.299805, avg loss: 2.692828, ppl: 14.773402 +epoch: 2, batch: 21326, sum loss: 5014.719238, avg loss: 2.518694, ppl: 12.412372 +epoch: 2, batch: 21327, sum loss: 3922.198730, avg loss: 2.522315, ppl: 12.457397 +epoch: 2, batch: 21328, sum loss: 4727.546387, avg loss: 2.710749, ppl: 15.040535 +epoch: 2, batch: 21329, sum loss: 3603.930908, avg loss: 2.298425, ppl: 9.958488 +epoch: 2, batch: 21330, sum loss: 4110.100098, avg loss: 2.477456, ppl: 11.910929 +epoch: 2, batch: 21331, sum loss: 4490.704590, avg loss: 2.577902, ppl: 13.169475 +epoch: 2, batch: 21332, sum loss: 4397.757812, avg loss: 2.555350, ppl: 12.875810 +epoch: 2, batch: 21333, sum loss: 4193.557129, avg loss: 2.491715, ppl: 12.081985 +epoch: 2, batch: 21334, sum loss: 3354.971436, avg loss: 2.265342, ppl: 9.634419 +epoch: 2, batch: 21335, sum loss: 4529.905762, avg loss: 2.633666, ppl: 13.924725 +epoch: 2, batch: 21336, sum loss: 4841.115723, avg loss: 2.516172, ppl: 12.381116 +epoch: 2, batch: 21337, sum loss: 4490.199219, avg loss: 2.533973, ppl: 12.603475 +epoch: 2, batch: 21338, sum loss: 3513.521484, avg loss: 2.509658, ppl: 12.300724 +epoch: 2, batch: 21339, sum loss: 4661.041504, avg loss: 2.568067, ppl: 13.040593 +epoch: 2, batch: 21340, sum loss: 4557.242188, avg loss: 2.617600, ppl: 13.702804 +epoch: 2, batch: 21341, sum loss: 4499.594238, avg loss: 2.695982, ppl: 14.820068 +epoch: 2, batch: 21342, sum loss: 3887.744873, avg loss: 2.465279, ppl: 11.766763 +epoch: 2, batch: 21343, sum loss: 4031.046631, avg loss: 2.429805, ppl: 11.356668 +epoch: 2, batch: 21344, sum loss: 3659.776123, avg loss: 2.361146, ppl: 10.603095 +epoch: 2, batch: 21345, sum loss: 4615.408691, avg loss: 2.814274, ppl: 16.681055 +epoch: 2, batch: 21346, sum loss: 4363.103516, avg loss: 2.444316, ppl: 11.522664 +epoch: 2, batch: 21347, sum loss: 4275.792969, avg loss: 2.624796, ppl: 13.801761 +epoch: 2, batch: 21348, sum loss: 3145.460693, avg loss: 2.261295, ppl: 9.595504 +epoch: 2, batch: 21349, sum loss: 4426.127930, avg loss: 2.387340, ppl: 10.884499 +epoch: 2, batch: 21350, sum loss: 3973.994873, avg loss: 2.453083, ppl: 11.624132 +epoch: 2, batch: 21351, sum loss: 4919.626953, avg loss: 2.768501, ppl: 15.934734 +epoch: 2, batch: 21352, sum loss: 3749.494385, avg loss: 2.229188, ppl: 9.292317 +epoch: 2, batch: 21353, sum loss: 4636.117188, avg loss: 2.415903, ppl: 11.199875 +epoch: 2, batch: 21354, sum loss: 4155.744141, avg loss: 2.557381, ppl: 12.901981 +epoch: 2, batch: 21355, sum loss: 4443.455078, avg loss: 2.575916, ppl: 13.143352 +epoch: 2, batch: 21356, sum loss: 3922.461914, avg loss: 2.652104, ppl: 14.183848 +epoch: 2, batch: 21357, sum loss: 4388.391602, avg loss: 2.509086, ppl: 12.293690 +epoch: 2, batch: 21358, sum loss: 4286.883789, avg loss: 2.511355, ppl: 12.321619 +epoch: 2, batch: 21359, sum loss: 4434.232910, avg loss: 2.755894, ppl: 15.735097 +epoch: 2, batch: 21360, sum loss: 3780.322998, avg loss: 2.247517, ppl: 9.464204 +epoch: 2, batch: 21361, sum loss: 4022.564453, avg loss: 2.421773, ppl: 11.265813 +epoch: 2, batch: 21362, sum loss: 3873.687744, avg loss: 2.518653, ppl: 12.411862 +epoch: 2, batch: 21363, sum loss: 4658.976562, avg loss: 2.769903, ppl: 15.957082 +epoch: 2, batch: 21364, sum loss: 3609.345459, avg loss: 2.398236, ppl: 11.003752 +epoch: 2, batch: 21365, sum loss: 5259.895020, avg loss: 2.655171, ppl: 14.227425 +epoch: 2, batch: 21366, sum loss: 3985.765869, avg loss: 2.611904, ppl: 13.624973 +epoch: 2, batch: 21367, sum loss: 4150.221680, avg loss: 2.628386, ppl: 13.851396 +epoch: 2, batch: 21368, sum loss: 4517.954102, avg loss: 2.574333, ppl: 13.122558 +epoch: 2, batch: 21369, sum loss: 4476.617676, avg loss: 2.648886, ppl: 14.138283 +epoch: 2, batch: 21370, sum loss: 3766.843262, avg loss: 2.555525, ppl: 12.878054 +epoch: 2, batch: 21371, sum loss: 4285.315918, avg loss: 2.525230, ppl: 12.493773 +epoch: 2, batch: 21372, sum loss: 4379.307129, avg loss: 2.552044, ppl: 12.833307 +epoch: 2, batch: 21373, sum loss: 4917.634766, avg loss: 2.739630, ppl: 15.481248 +epoch: 2, batch: 21374, sum loss: 4532.937500, avg loss: 2.703004, ppl: 14.924496 +epoch: 2, batch: 21375, sum loss: 4259.889648, avg loss: 2.459521, ppl: 11.699202 +epoch: 2, batch: 21376, sum loss: 3582.096191, avg loss: 2.275792, ppl: 9.735623 +epoch: 2, batch: 21377, sum loss: 4445.153320, avg loss: 2.578395, ppl: 13.175979 +epoch: 2, batch: 21378, sum loss: 3699.043945, avg loss: 2.449698, ppl: 11.584847 +epoch: 2, batch: 21379, sum loss: 3660.387207, avg loss: 2.274945, ppl: 9.727388 +epoch: 2, batch: 21380, sum loss: 4770.632812, avg loss: 2.702908, ppl: 14.923065 +epoch: 2, batch: 21381, sum loss: 5179.754395, avg loss: 2.566776, ppl: 13.023771 +epoch: 2, batch: 21382, sum loss: 4351.576660, avg loss: 2.550748, ppl: 12.816691 +epoch: 2, batch: 21383, sum loss: 4543.982422, avg loss: 2.671359, ppl: 14.459611 +epoch: 2, batch: 21384, sum loss: 4201.982422, avg loss: 2.486380, ppl: 12.017694 +epoch: 2, batch: 21385, sum loss: 4468.524414, avg loss: 2.601004, ppl: 13.477261 +epoch: 2, batch: 21386, sum loss: 4902.291992, avg loss: 2.588327, ppl: 13.307495 +epoch: 2, batch: 21387, sum loss: 4379.244629, avg loss: 2.460250, ppl: 11.707737 +epoch: 2, batch: 21388, sum loss: 3959.457520, avg loss: 2.477758, ppl: 11.914524 +epoch: 2, batch: 21389, sum loss: 4974.200195, avg loss: 2.657158, ppl: 14.255722 +epoch: 2, batch: 21390, sum loss: 3673.313965, avg loss: 2.298695, ppl: 9.961171 +epoch: 2, batch: 21391, sum loss: 4154.671387, avg loss: 2.353921, ppl: 10.526769 +epoch: 2, batch: 21392, sum loss: 3595.587158, avg loss: 2.444315, ppl: 11.522654 +epoch: 2, batch: 21393, sum loss: 3992.532715, avg loss: 2.515774, ppl: 12.376182 +epoch: 2, batch: 21394, sum loss: 3951.422363, avg loss: 2.370379, ppl: 10.701449 +epoch: 2, batch: 21395, sum loss: 4548.021484, avg loss: 2.507178, ppl: 12.270258 +epoch: 2, batch: 21396, sum loss: 4551.300781, avg loss: 2.539788, ppl: 12.676990 +epoch: 2, batch: 21397, sum loss: 3522.386230, avg loss: 2.320413, ppl: 10.179874 +epoch: 2, batch: 21398, sum loss: 4532.848145, avg loss: 2.605085, ppl: 13.532377 +epoch: 2, batch: 21399, sum loss: 4163.757324, avg loss: 2.326121, ppl: 10.238154 +epoch: 2, batch: 21400, sum loss: 4527.683594, avg loss: 2.497344, ppl: 12.150174 +epoch: 2, batch: 21401, sum loss: 4382.699219, avg loss: 2.528967, ppl: 12.540541 +epoch: 2, batch: 21402, sum loss: 4413.428711, avg loss: 2.676427, ppl: 14.533079 +epoch: 2, batch: 21403, sum loss: 5218.990234, avg loss: 2.759910, ppl: 15.798427 +epoch: 2, batch: 21404, sum loss: 4621.857910, avg loss: 2.860060, ppl: 17.462582 +epoch: 2, batch: 21405, sum loss: 4085.088623, avg loss: 2.325036, ppl: 10.227049 +epoch: 2, batch: 21406, sum loss: 3926.977295, avg loss: 2.454361, ppl: 11.638990 +epoch: 2, batch: 21407, sum loss: 3747.959961, avg loss: 2.469012, ppl: 11.810769 +epoch: 2, batch: 21408, sum loss: 4211.750977, avg loss: 2.575994, ppl: 13.144382 +epoch: 2, batch: 21409, sum loss: 4395.949707, avg loss: 2.654559, ppl: 14.218713 +epoch: 2, batch: 21410, sum loss: 4155.777832, avg loss: 2.342603, ppl: 10.408294 +epoch: 2, batch: 21411, sum loss: 3806.622559, avg loss: 2.476658, ppl: 11.901419 +epoch: 2, batch: 21412, sum loss: 4302.891113, avg loss: 2.507512, ppl: 12.274358 +epoch: 2, batch: 21413, sum loss: 3800.405762, avg loss: 2.393203, ppl: 10.948503 +epoch: 2, batch: 21414, sum loss: 3821.646240, avg loss: 2.282943, ppl: 9.805495 +epoch: 2, batch: 21415, sum loss: 3648.264648, avg loss: 2.404921, ppl: 11.077551 +epoch: 2, batch: 21416, sum loss: 3888.687988, avg loss: 2.303725, ppl: 10.011408 +epoch: 2, batch: 21417, sum loss: 3894.772461, avg loss: 2.207921, ppl: 9.096785 +epoch: 2, batch: 21418, sum loss: 3965.290039, avg loss: 2.232708, ppl: 9.325089 +epoch: 2, batch: 21419, sum loss: 5125.064453, avg loss: 2.918602, ppl: 18.515381 +epoch: 2, batch: 21420, sum loss: 3750.226318, avg loss: 2.396311, ppl: 10.982585 +epoch: 2, batch: 21421, sum loss: 4194.552734, avg loss: 2.559215, ppl: 12.925661 +epoch: 2, batch: 21422, sum loss: 5002.756836, avg loss: 2.852199, ppl: 17.325836 +epoch: 2, batch: 21423, sum loss: 4601.936523, avg loss: 2.401846, ppl: 11.043543 +epoch: 2, batch: 21424, sum loss: 3787.281738, avg loss: 2.412282, ppl: 11.159392 +epoch: 2, batch: 21425, sum loss: 4408.943848, avg loss: 2.397468, ppl: 10.995305 +epoch: 2, batch: 21426, sum loss: 4815.395020, avg loss: 2.734466, ppl: 15.401518 +epoch: 2, batch: 21427, sum loss: 4348.740234, avg loss: 2.602478, ppl: 13.497140 +epoch: 2, batch: 21428, sum loss: 4822.311035, avg loss: 2.670161, ppl: 14.442298 +epoch: 2, batch: 21429, sum loss: 4229.960449, avg loss: 2.627305, ppl: 13.836424 +epoch: 2, batch: 21430, sum loss: 4227.196289, avg loss: 2.499820, ppl: 12.180307 +epoch: 2, batch: 21431, sum loss: 4949.780273, avg loss: 2.756002, ppl: 15.736808 +epoch: 2, batch: 21432, sum loss: 4292.947266, avg loss: 2.589232, ppl: 13.319544 +epoch: 2, batch: 21433, sum loss: 3237.623779, avg loss: 2.080735, ppl: 8.010354 +epoch: 2, batch: 21434, sum loss: 4738.947266, avg loss: 2.553312, ppl: 12.849592 +epoch: 2, batch: 21435, sum loss: 4886.858887, avg loss: 2.673337, ppl: 14.488229 +epoch: 2, batch: 21436, sum loss: 4032.874512, avg loss: 2.406250, ppl: 11.092284 +epoch: 2, batch: 21437, sum loss: 4670.664062, avg loss: 2.534272, ppl: 12.607255 +epoch: 2, batch: 21438, sum loss: 5547.305176, avg loss: 2.843314, ppl: 17.172581 +epoch: 2, batch: 21439, sum loss: 3952.135986, avg loss: 2.522104, ppl: 12.454768 +epoch: 2, batch: 21440, sum loss: 3313.034424, avg loss: 2.320052, ppl: 10.176205 +epoch: 2, batch: 21441, sum loss: 4118.396484, avg loss: 2.476486, ppl: 11.899379 +epoch: 2, batch: 21442, sum loss: 4196.558594, avg loss: 2.601710, ppl: 13.486785 +epoch: 2, batch: 21443, sum loss: 4594.101562, avg loss: 2.646372, ppl: 14.102778 +epoch: 2, batch: 21444, sum loss: 4491.817871, avg loss: 2.495455, ppl: 12.127245 +epoch: 2, batch: 21445, sum loss: 4307.395508, avg loss: 2.565453, ppl: 13.006547 +epoch: 2, batch: 21446, sum loss: 3959.128174, avg loss: 2.328899, ppl: 10.266633 +epoch: 2, batch: 21447, sum loss: 4023.986572, avg loss: 2.462660, ppl: 11.735989 +epoch: 2, batch: 21448, sum loss: 4237.567383, avg loss: 2.598141, ppl: 13.438728 +epoch: 2, batch: 21449, sum loss: 3532.153809, avg loss: 2.236956, ppl: 9.364782 +epoch: 2, batch: 21450, sum loss: 4084.333984, avg loss: 2.575242, ppl: 13.134496 +epoch: 2, batch: 21451, sum loss: 4629.891113, avg loss: 2.477202, ppl: 11.907902 +epoch: 2, batch: 21452, sum loss: 4346.610840, avg loss: 2.534467, ppl: 12.609705 +epoch: 2, batch: 21453, sum loss: 4153.398926, avg loss: 2.387011, ppl: 10.880920 +epoch: 2, batch: 21454, sum loss: 3131.120850, avg loss: 2.364895, ppl: 10.642920 +epoch: 2, batch: 21455, sum loss: 5263.370605, avg loss: 2.810128, ppl: 16.612051 +epoch: 2, batch: 21456, sum loss: 4093.938965, avg loss: 2.636149, ppl: 13.959342 +epoch: 2, batch: 21457, sum loss: 3626.455078, avg loss: 2.287984, ppl: 9.855051 +epoch: 2, batch: 21458, sum loss: 4191.728516, avg loss: 2.467174, ppl: 11.789084 +epoch: 2, batch: 21459, sum loss: 3504.221680, avg loss: 2.356571, ppl: 10.554702 +epoch: 2, batch: 21460, sum loss: 3954.584961, avg loss: 2.484036, ppl: 11.989553 +epoch: 2, batch: 21461, sum loss: 4621.888672, avg loss: 2.662378, ppl: 14.330331 +epoch: 2, batch: 21462, sum loss: 4069.383057, avg loss: 2.210420, ppl: 9.119545 +epoch: 2, batch: 21463, sum loss: 5202.535645, avg loss: 2.540301, ppl: 12.683483 +epoch: 2, batch: 21464, sum loss: 4364.975098, avg loss: 2.659948, ppl: 14.295547 +epoch: 2, batch: 21465, sum loss: 4727.612793, avg loss: 2.755019, ppl: 15.721342 +epoch: 2, batch: 21466, sum loss: 3469.246094, avg loss: 2.347257, ppl: 10.456851 +epoch: 2, batch: 21467, sum loss: 4099.143066, avg loss: 2.411261, ppl: 11.148008 +epoch: 2, batch: 21468, sum loss: 4365.367676, avg loss: 2.372482, ppl: 10.723979 +epoch: 2, batch: 21469, sum loss: 3407.176025, avg loss: 2.367739, ppl: 10.673230 +epoch: 2, batch: 21470, sum loss: 4175.255859, avg loss: 2.507661, ppl: 12.276187 +epoch: 2, batch: 21471, sum loss: 3679.366211, avg loss: 2.244885, ppl: 9.439327 +epoch: 2, batch: 21472, sum loss: 4458.585449, avg loss: 2.610413, ppl: 13.604666 +epoch: 2, batch: 21473, sum loss: 2661.610107, avg loss: 2.219858, ppl: 9.206027 +epoch: 2, batch: 21474, sum loss: 4468.445312, avg loss: 2.507545, ppl: 12.274761 +epoch: 2, batch: 21475, sum loss: 3930.751221, avg loss: 2.277376, ppl: 9.751061 +epoch: 2, batch: 21476, sum loss: 3950.516357, avg loss: 2.303508, ppl: 10.009233 +epoch: 2, batch: 21477, sum loss: 4286.319336, avg loss: 2.301998, ppl: 9.994127 +epoch: 2, batch: 21478, sum loss: 4564.007324, avg loss: 2.650411, ppl: 14.159852 +epoch: 2, batch: 21479, sum loss: 5090.916992, avg loss: 2.628248, ppl: 13.849487 +epoch: 2, batch: 21480, sum loss: 3802.077881, avg loss: 2.344068, ppl: 10.423549 +epoch: 2, batch: 21481, sum loss: 4043.741699, avg loss: 2.441873, ppl: 11.494551 +epoch: 2, batch: 21482, sum loss: 3318.183594, avg loss: 2.225475, ppl: 9.257882 +epoch: 2, batch: 21483, sum loss: 3920.713623, avg loss: 2.489342, ppl: 12.053342 +epoch: 2, batch: 21484, sum loss: 4046.777832, avg loss: 2.418875, ppl: 11.233214 +epoch: 2, batch: 21485, sum loss: 4639.191406, avg loss: 2.618054, ppl: 13.709019 +epoch: 2, batch: 21486, sum loss: 4409.506348, avg loss: 2.478643, ppl: 11.925074 +epoch: 2, batch: 21487, sum loss: 4729.747559, avg loss: 2.511815, ppl: 12.327282 +epoch: 2, batch: 21488, sum loss: 5096.890137, avg loss: 2.620509, ppl: 13.742719 +epoch: 2, batch: 21489, sum loss: 3901.730957, avg loss: 2.368993, ppl: 10.686621 +epoch: 2, batch: 21490, sum loss: 3779.153564, avg loss: 2.497788, ppl: 12.155581 +epoch: 2, batch: 21491, sum loss: 3841.109131, avg loss: 2.478135, ppl: 11.919016 +epoch: 2, batch: 21492, sum loss: 3960.674805, avg loss: 2.182190, ppl: 8.865701 +epoch: 2, batch: 21493, sum loss: 4547.587402, avg loss: 2.694068, ppl: 14.791733 +epoch: 2, batch: 21494, sum loss: 4500.888672, avg loss: 2.470301, ppl: 11.826008 +epoch: 2, batch: 21495, sum loss: 3593.729980, avg loss: 2.557815, ppl: 12.907581 +epoch: 2, batch: 21496, sum loss: 4838.695801, avg loss: 2.445021, ppl: 11.530788 +epoch: 2, batch: 21497, sum loss: 4530.081055, avg loss: 2.518111, ppl: 12.405135 +epoch: 2, batch: 21498, sum loss: 3938.786133, avg loss: 2.323768, ppl: 10.214086 +epoch: 2, batch: 21499, sum loss: 5561.735352, avg loss: 2.644667, ppl: 14.078761 +epoch: 2, batch: 21500, sum loss: 4301.318359, avg loss: 2.519812, ppl: 12.426256 +epoch: 2, batch: 21501, sum loss: 4523.099609, avg loss: 2.543926, ppl: 12.729543 +epoch: 2, batch: 21502, sum loss: 4217.192871, avg loss: 2.365223, ppl: 10.646415 +epoch: 2, batch: 21503, sum loss: 4971.634766, avg loss: 2.696114, ppl: 14.822025 +epoch: 2, batch: 21504, sum loss: 4489.041504, avg loss: 2.612946, ppl: 13.639176 +epoch: 2, batch: 21505, sum loss: 4854.203613, avg loss: 2.641025, ppl: 14.027572 +epoch: 2, batch: 21506, sum loss: 3926.862793, avg loss: 2.404692, ppl: 11.075024 +epoch: 2, batch: 21507, sum loss: 4131.294434, avg loss: 2.374307, ppl: 10.743567 +epoch: 2, batch: 21508, sum loss: 3989.635742, avg loss: 2.825521, ppl: 16.869732 +epoch: 2, batch: 21509, sum loss: 4958.434570, avg loss: 2.581174, ppl: 13.212636 +epoch: 2, batch: 21510, sum loss: 4929.081055, avg loss: 2.652896, ppl: 14.195090 +epoch: 2, batch: 21511, sum loss: 4246.195801, avg loss: 2.548737, ppl: 12.790936 +epoch: 2, batch: 21512, sum loss: 4802.869141, avg loss: 2.835224, ppl: 17.034210 +epoch: 2, batch: 21513, sum loss: 3508.033447, avg loss: 2.263247, ppl: 9.614261 +epoch: 2, batch: 21514, sum loss: 4503.844238, avg loss: 2.551753, ppl: 12.829575 +epoch: 2, batch: 21515, sum loss: 4479.358887, avg loss: 2.729652, ppl: 15.327551 +epoch: 2, batch: 21516, sum loss: 3452.686523, avg loss: 2.239096, ppl: 9.384848 +epoch: 2, batch: 21517, sum loss: 4759.123047, avg loss: 2.842965, ppl: 17.166588 +epoch: 2, batch: 21518, sum loss: 3753.683594, avg loss: 2.541424, ppl: 12.697743 +epoch: 2, batch: 21519, sum loss: 4103.875977, avg loss: 2.358549, ppl: 10.575599 +epoch: 2, batch: 21520, sum loss: 3758.543701, avg loss: 2.392453, ppl: 10.940300 +epoch: 2, batch: 21521, sum loss: 3646.564209, avg loss: 2.363295, ppl: 10.625907 +epoch: 2, batch: 21522, sum loss: 3805.373047, avg loss: 2.496964, ppl: 12.145564 +epoch: 2, batch: 21523, sum loss: 4989.317871, avg loss: 2.765697, ppl: 15.890116 +epoch: 2, batch: 21524, sum loss: 4152.233398, avg loss: 2.513458, ppl: 12.347560 +epoch: 2, batch: 21525, sum loss: 3914.812500, avg loss: 2.443703, ppl: 11.515606 +epoch: 2, batch: 21526, sum loss: 3776.578125, avg loss: 2.463521, ppl: 11.746099 +epoch: 2, batch: 21527, sum loss: 4272.969727, avg loss: 2.298531, ppl: 9.959544 +epoch: 2, batch: 21528, sum loss: 4486.893066, avg loss: 2.547923, ppl: 12.780535 +epoch: 2, batch: 21529, sum loss: 3530.724121, avg loss: 2.274951, ppl: 9.727445 +epoch: 2, batch: 21530, sum loss: 4273.656250, avg loss: 2.531787, ppl: 12.575958 +epoch: 2, batch: 21531, sum loss: 4615.390137, avg loss: 2.622381, ppl: 13.768464 +epoch: 2, batch: 21532, sum loss: 5096.448242, avg loss: 2.653018, ppl: 14.196827 +epoch: 2, batch: 21533, sum loss: 3677.599609, avg loss: 2.441965, ppl: 11.495611 +epoch: 2, batch: 21534, sum loss: 5028.292969, avg loss: 2.765838, ppl: 15.892347 +epoch: 2, batch: 21535, sum loss: 3638.587891, avg loss: 2.228162, ppl: 9.282784 +epoch: 2, batch: 21536, sum loss: 4643.477051, avg loss: 2.497836, ppl: 12.156158 +epoch: 2, batch: 21537, sum loss: 3816.114746, avg loss: 2.242136, ppl: 9.413412 +epoch: 2, batch: 21538, sum loss: 4887.822266, avg loss: 2.468597, ppl: 11.805871 +epoch: 2, batch: 21539, sum loss: 4710.609375, avg loss: 2.571293, ppl: 13.082734 +epoch: 2, batch: 21540, sum loss: 4348.997559, avg loss: 2.341948, ppl: 10.401480 +epoch: 2, batch: 21541, sum loss: 4204.115234, avg loss: 2.531075, ppl: 12.567005 +epoch: 2, batch: 21542, sum loss: 4122.902832, avg loss: 2.593021, ppl: 13.370097 +epoch: 2, batch: 21543, sum loss: 3737.553711, avg loss: 2.457300, ppl: 11.673256 +epoch: 2, batch: 21544, sum loss: 3832.666260, avg loss: 2.682062, ppl: 14.615197 +epoch: 2, batch: 21545, sum loss: 4401.839355, avg loss: 2.519656, ppl: 12.424324 +epoch: 2, batch: 21546, sum loss: 4091.911621, avg loss: 2.672705, ppl: 14.479088 +epoch: 2, batch: 21547, sum loss: 3973.806641, avg loss: 2.415688, ppl: 11.197470 +epoch: 2, batch: 21548, sum loss: 3234.395020, avg loss: 2.164923, ppl: 8.713930 +epoch: 2, batch: 21549, sum loss: 4411.689453, avg loss: 2.496712, ppl: 12.142501 +epoch: 2, batch: 21550, sum loss: 3814.837158, avg loss: 2.316234, ppl: 10.137424 +epoch: 2, batch: 21551, sum loss: 3602.430176, avg loss: 2.226471, ppl: 9.267104 +epoch: 2, batch: 21552, sum loss: 3359.815186, avg loss: 2.208951, ppl: 9.106163 +epoch: 2, batch: 21553, sum loss: 4238.606934, avg loss: 2.519980, ppl: 12.428350 +epoch: 2, batch: 21554, sum loss: 4240.924316, avg loss: 2.595425, ppl: 13.402281 +epoch: 2, batch: 21555, sum loss: 4184.858398, avg loss: 2.431643, ppl: 11.377565 +epoch: 2, batch: 21556, sum loss: 4986.810059, avg loss: 2.864337, ppl: 17.537416 +epoch: 2, batch: 21557, sum loss: 4732.222168, avg loss: 2.429272, ppl: 11.350617 +epoch: 2, batch: 21558, sum loss: 5053.992188, avg loss: 2.528260, ppl: 12.531685 +epoch: 2, batch: 21559, sum loss: 4072.724121, avg loss: 2.390096, ppl: 10.914546 +epoch: 2, batch: 21560, sum loss: 5173.866211, avg loss: 2.802744, ppl: 16.489840 +epoch: 2, batch: 21561, sum loss: 3885.710205, avg loss: 2.365009, ppl: 10.644135 +epoch: 2, batch: 21562, sum loss: 4314.960449, avg loss: 2.661913, ppl: 14.323660 +epoch: 2, batch: 21563, sum loss: 4022.168457, avg loss: 2.496691, ppl: 12.142245 +epoch: 2, batch: 21564, sum loss: 4143.212402, avg loss: 2.477998, ppl: 11.917382 +epoch: 2, batch: 21565, sum loss: 4061.969727, avg loss: 2.436694, ppl: 11.435178 +epoch: 2, batch: 21566, sum loss: 3644.264404, avg loss: 2.253720, ppl: 9.523092 +epoch: 2, batch: 21567, sum loss: 2823.911377, avg loss: 2.262749, ppl: 9.609473 +epoch: 2, batch: 21568, sum loss: 4843.626465, avg loss: 2.587407, ppl: 13.295257 +epoch: 2, batch: 21569, sum loss: 4253.558105, avg loss: 2.693830, ppl: 14.788210 +epoch: 2, batch: 21570, sum loss: 4611.717285, avg loss: 2.535304, ppl: 12.620262 +epoch: 2, batch: 21571, sum loss: 3881.757080, avg loss: 2.385837, ppl: 10.868156 +epoch: 2, batch: 21572, sum loss: 4385.698730, avg loss: 2.718970, ppl: 15.164696 +epoch: 2, batch: 21573, sum loss: 4098.334961, avg loss: 2.358075, ppl: 10.570587 +epoch: 2, batch: 21574, sum loss: 4143.285156, avg loss: 2.556005, ppl: 12.884248 +epoch: 2, batch: 21575, sum loss: 3975.286133, avg loss: 2.418057, ppl: 11.224032 +epoch: 2, batch: 21576, sum loss: 4597.795898, avg loss: 2.693495, ppl: 14.783254 +epoch: 2, batch: 21577, sum loss: 4294.185059, avg loss: 2.527478, ppl: 12.521886 +epoch: 2, batch: 21578, sum loss: 4349.623047, avg loss: 2.772226, ppl: 15.994203 +epoch: 2, batch: 21579, sum loss: 4952.715820, avg loss: 2.617714, ppl: 13.704365 +epoch: 2, batch: 21580, sum loss: 4051.747559, avg loss: 2.191318, ppl: 8.947000 +epoch: 2, batch: 21581, sum loss: 3701.550049, avg loss: 2.216497, ppl: 9.175136 +epoch: 2, batch: 21582, sum loss: 4118.964355, avg loss: 2.526972, ppl: 12.515553 +epoch: 2, batch: 21583, sum loss: 3485.375000, avg loss: 2.270602, ppl: 9.685234 +epoch: 2, batch: 21584, sum loss: 4085.136475, avg loss: 2.404436, ppl: 11.072183 +epoch: 2, batch: 21585, sum loss: 3724.093750, avg loss: 2.276341, ppl: 9.740973 +epoch: 2, batch: 21586, sum loss: 4784.930664, avg loss: 2.808058, ppl: 16.577694 +epoch: 2, batch: 21587, sum loss: 4432.911133, avg loss: 2.554992, ppl: 12.871196 +epoch: 2, batch: 21588, sum loss: 4538.776855, avg loss: 2.527159, ppl: 12.517889 +epoch: 2, batch: 21589, sum loss: 4397.385254, avg loss: 2.645839, ppl: 14.095272 +epoch: 2, batch: 21590, sum loss: 4671.916016, avg loss: 2.502365, ppl: 12.211344 +epoch: 2, batch: 21591, sum loss: 3130.425293, avg loss: 2.391463, ppl: 10.929475 +epoch: 2, batch: 21592, sum loss: 3955.890869, avg loss: 2.475526, ppl: 11.887962 +epoch: 2, batch: 21593, sum loss: 4482.517090, avg loss: 2.595551, ppl: 13.403975 +epoch: 2, batch: 21594, sum loss: 4044.310303, avg loss: 2.325653, ppl: 10.233358 +epoch: 2, batch: 21595, sum loss: 3662.216797, avg loss: 2.234421, ppl: 9.341077 +epoch: 2, batch: 21596, sum loss: 4028.925293, avg loss: 2.462668, ppl: 11.736087 +epoch: 2, batch: 21597, sum loss: 4593.566895, avg loss: 2.607019, ppl: 13.558569 +epoch: 2, batch: 21598, sum loss: 5159.160156, avg loss: 2.779720, ppl: 16.114506 +epoch: 2, batch: 21599, sum loss: 4792.317383, avg loss: 2.743170, ppl: 15.536150 +epoch: 2, batch: 21600, sum loss: 3765.552246, avg loss: 2.423135, ppl: 11.281174 +epoch: 2, batch: 21601, sum loss: 3692.590088, avg loss: 2.405596, ppl: 11.085035 +epoch: 2, batch: 21602, sum loss: 3895.335205, avg loss: 2.457625, ppl: 11.677042 +epoch: 2, batch: 21603, sum loss: 5758.024414, avg loss: 2.857580, ppl: 17.419329 +epoch: 2, batch: 21604, sum loss: 4519.090332, avg loss: 2.606165, ppl: 13.546997 +epoch: 2, batch: 21605, sum loss: 4312.287109, avg loss: 2.594637, ppl: 13.391730 +epoch: 2, batch: 21606, sum loss: 3981.880371, avg loss: 2.474755, ppl: 11.878795 +epoch: 2, batch: 21607, sum loss: 4216.790039, avg loss: 2.441685, ppl: 11.492392 +epoch: 2, batch: 21608, sum loss: 4355.923828, avg loss: 2.392051, ppl: 10.935895 +epoch: 2, batch: 21609, sum loss: 3830.505859, avg loss: 2.278707, ppl: 9.764043 +epoch: 2, batch: 21610, sum loss: 4352.135742, avg loss: 2.536210, ppl: 12.631701 +epoch: 2, batch: 21611, sum loss: 3643.949707, avg loss: 2.264729, ppl: 9.628520 +epoch: 2, batch: 21612, sum loss: 4065.070312, avg loss: 2.422569, ppl: 11.274785 +epoch: 2, batch: 21613, sum loss: 4054.944092, avg loss: 2.416534, ppl: 11.206948 +epoch: 2, batch: 21614, sum loss: 4149.520508, avg loss: 2.448095, ppl: 11.566287 +epoch: 2, batch: 21615, sum loss: 4182.556641, avg loss: 2.780955, ppl: 16.134422 +epoch: 2, batch: 21616, sum loss: 4854.474121, avg loss: 2.857254, ppl: 17.413639 +epoch: 2, batch: 21617, sum loss: 4738.726562, avg loss: 2.539510, ppl: 12.673463 +epoch: 2, batch: 21618, sum loss: 4186.762695, avg loss: 2.665030, ppl: 14.368387 +epoch: 2, batch: 21619, sum loss: 3230.695068, avg loss: 2.221936, ppl: 9.225173 +epoch: 2, batch: 21620, sum loss: 4745.942871, avg loss: 2.783544, ppl: 16.176252 +epoch: 2, batch: 21621, sum loss: 4519.283203, avg loss: 2.448149, ppl: 11.566916 +epoch: 2, batch: 21622, sum loss: 4210.635742, avg loss: 2.268661, ppl: 9.666453 +epoch: 2, batch: 21623, sum loss: 3878.234375, avg loss: 2.482865, ppl: 11.975521 +epoch: 2, batch: 21624, sum loss: 4731.244141, avg loss: 2.828000, ppl: 16.911606 +epoch: 2, batch: 21625, sum loss: 3792.601318, avg loss: 2.325323, ppl: 10.229980 +epoch: 2, batch: 21626, sum loss: 4593.623535, avg loss: 2.386298, ppl: 10.873166 +epoch: 2, batch: 21627, sum loss: 4924.109863, avg loss: 2.667449, ppl: 14.403172 +epoch: 2, batch: 21628, sum loss: 5251.136230, avg loss: 2.572825, ppl: 13.102790 +epoch: 2, batch: 21629, sum loss: 3755.302246, avg loss: 2.165688, ppl: 8.720595 +epoch: 2, batch: 21630, sum loss: 4959.379883, avg loss: 2.858432, ppl: 17.434174 +epoch: 2, batch: 21631, sum loss: 4170.412598, avg loss: 2.500247, ppl: 12.185510 +epoch: 2, batch: 21632, sum loss: 3717.263184, avg loss: 2.252887, ppl: 9.515164 +epoch: 2, batch: 21633, sum loss: 4364.078125, avg loss: 2.683935, ppl: 14.642597 +epoch: 2, batch: 21634, sum loss: 3767.853516, avg loss: 2.338829, ppl: 10.369087 +epoch: 2, batch: 21635, sum loss: 4969.702637, avg loss: 2.578984, ppl: 13.183740 +epoch: 2, batch: 21636, sum loss: 4627.764648, avg loss: 2.515089, ppl: 12.367713 +epoch: 2, batch: 21637, sum loss: 4193.572266, avg loss: 2.588625, ppl: 13.311455 +epoch: 2, batch: 21638, sum loss: 4356.734863, avg loss: 2.348644, ppl: 10.471361 +epoch: 2, batch: 21639, sum loss: 4718.450684, avg loss: 2.561591, ppl: 12.956416 +epoch: 2, batch: 21640, sum loss: 3889.862549, avg loss: 2.405605, ppl: 11.085131 +epoch: 2, batch: 21641, sum loss: 3340.665039, avg loss: 2.324749, ppl: 10.224118 +epoch: 2, batch: 21642, sum loss: 3393.296387, avg loss: 2.182184, ppl: 8.865649 +epoch: 2, batch: 21643, sum loss: 4358.502930, avg loss: 2.382998, ppl: 10.837344 +epoch: 2, batch: 21644, sum loss: 4339.146484, avg loss: 2.378918, ppl: 10.793218 +epoch: 2, batch: 21645, sum loss: 3281.771240, avg loss: 2.192232, ppl: 8.955178 +epoch: 2, batch: 21646, sum loss: 3677.724609, avg loss: 2.132014, ppl: 8.431834 +epoch: 2, batch: 21647, sum loss: 4812.751465, avg loss: 2.538371, ppl: 12.659034 +epoch: 2, batch: 21648, sum loss: 5529.245117, avg loss: 2.831155, ppl: 16.965036 +epoch: 2, batch: 21649, sum loss: 4449.778809, avg loss: 2.534043, ppl: 12.604358 +epoch: 2, batch: 21650, sum loss: 4494.576172, avg loss: 2.559554, ppl: 12.930044 +epoch: 2, batch: 21651, sum loss: 3861.068359, avg loss: 2.300994, ppl: 9.984103 +epoch: 2, batch: 21652, sum loss: 3655.739258, avg loss: 2.366174, ppl: 10.656548 +epoch: 2, batch: 21653, sum loss: 5193.109375, avg loss: 2.738982, ppl: 15.471227 +epoch: 2, batch: 21654, sum loss: 3510.427490, avg loss: 2.397833, ppl: 10.999313 +epoch: 2, batch: 21655, sum loss: 3944.674316, avg loss: 2.536768, ppl: 12.638759 +epoch: 2, batch: 21656, sum loss: 3887.809326, avg loss: 2.635803, ppl: 13.954514 +epoch: 2, batch: 21657, sum loss: 3979.705078, avg loss: 2.429612, ppl: 11.354478 +epoch: 2, batch: 21658, sum loss: 3675.973633, avg loss: 2.314845, ppl: 10.123355 +epoch: 2, batch: 21659, sum loss: 3048.487549, avg loss: 2.149850, ppl: 8.583571 +epoch: 2, batch: 21660, sum loss: 4392.588867, avg loss: 2.351493, ppl: 10.501235 +epoch: 2, batch: 21661, sum loss: 4332.333496, avg loss: 2.508589, ppl: 12.287583 +epoch: 2, batch: 21662, sum loss: 4312.762695, avg loss: 2.352844, ppl: 10.515430 +epoch: 2, batch: 21663, sum loss: 3344.987793, avg loss: 2.415154, ppl: 11.191491 +epoch: 2, batch: 21664, sum loss: 4148.410156, avg loss: 2.362421, ppl: 10.616618 +epoch: 2, batch: 21665, sum loss: 4753.243164, avg loss: 2.680904, ppl: 14.598289 +epoch: 2, batch: 21666, sum loss: 4058.604004, avg loss: 2.290409, ppl: 9.878974 +epoch: 2, batch: 21667, sum loss: 3267.235107, avg loss: 2.251713, ppl: 9.503998 +epoch: 2, batch: 21668, sum loss: 4800.274414, avg loss: 2.568365, ppl: 13.044480 +epoch: 2, batch: 21669, sum loss: 4388.452637, avg loss: 2.455765, ppl: 11.655353 +epoch: 2, batch: 21670, sum loss: 3937.245361, avg loss: 2.466946, ppl: 11.786392 +epoch: 2, batch: 21671, sum loss: 4913.479004, avg loss: 2.708643, ppl: 15.008902 +epoch: 2, batch: 21672, sum loss: 4364.453613, avg loss: 2.610319, ppl: 13.603395 +epoch: 2, batch: 21673, sum loss: 4563.701172, avg loss: 2.593012, ppl: 13.369980 +epoch: 2, batch: 21674, sum loss: 3581.375244, avg loss: 2.135585, ppl: 8.461992 +epoch: 2, batch: 21675, sum loss: 3103.214844, avg loss: 2.199302, ppl: 9.018714 +epoch: 2, batch: 21676, sum loss: 3774.390381, avg loss: 2.332751, ppl: 10.306251 +epoch: 2, batch: 21677, sum loss: 4462.488281, avg loss: 2.573523, ppl: 13.111931 +epoch: 2, batch: 21678, sum loss: 4506.031738, avg loss: 2.532902, ppl: 12.589983 +epoch: 2, batch: 21679, sum loss: 3687.954834, avg loss: 2.261162, ppl: 9.594230 +epoch: 2, batch: 21680, sum loss: 4078.552734, avg loss: 2.350751, ppl: 10.493446 +epoch: 2, batch: 21681, sum loss: 4266.809082, avg loss: 2.329044, ppl: 10.268124 +epoch: 2, batch: 21682, sum loss: 3720.146973, avg loss: 2.339715, ppl: 10.378279 +epoch: 2, batch: 21683, sum loss: 3877.637207, avg loss: 2.471407, ppl: 11.839089 +epoch: 2, batch: 21684, sum loss: 4583.796875, avg loss: 2.664998, ppl: 14.367922 +epoch: 2, batch: 21685, sum loss: 5611.646973, avg loss: 2.812855, ppl: 16.657415 +epoch: 2, batch: 21686, sum loss: 3573.722656, avg loss: 2.225232, ppl: 9.255629 +epoch: 2, batch: 21687, sum loss: 4641.907227, avg loss: 2.469100, ppl: 11.811806 +epoch: 2, batch: 21688, sum loss: 4142.690430, avg loss: 2.519885, ppl: 12.427162 +epoch: 2, batch: 21689, sum loss: 4145.757812, avg loss: 2.423003, ppl: 11.279681 +epoch: 2, batch: 21690, sum loss: 3695.982422, avg loss: 2.423595, ppl: 11.286361 +epoch: 2, batch: 21691, sum loss: 4905.388672, avg loss: 2.671780, ppl: 14.465700 +epoch: 2, batch: 21692, sum loss: 4168.428223, avg loss: 2.372469, ppl: 10.723836 +epoch: 2, batch: 21693, sum loss: 3741.091553, avg loss: 2.571197, ppl: 13.081470 +epoch: 2, batch: 21694, sum loss: 3770.729248, avg loss: 2.386538, ppl: 10.875772 +epoch: 2, batch: 21695, sum loss: 4304.422852, avg loss: 2.693631, ppl: 14.785270 +epoch: 2, batch: 21696, sum loss: 3780.778320, avg loss: 2.510477, ppl: 12.310802 +epoch: 2, batch: 21697, sum loss: 4634.069336, avg loss: 2.464931, ppl: 11.762665 +epoch: 2, batch: 21698, sum loss: 3952.879883, avg loss: 2.527417, ppl: 12.521118 +epoch: 2, batch: 21699, sum loss: 3780.220703, avg loss: 2.445162, ppl: 11.532421 +epoch: 2, batch: 21700, sum loss: 4753.005859, avg loss: 2.491093, ppl: 12.074469 +epoch: 2, batch: 21701, sum loss: 3872.126221, avg loss: 2.455375, ppl: 11.650802 +epoch: 2, batch: 21702, sum loss: 3474.190918, avg loss: 2.140598, ppl: 8.504524 +epoch: 2, batch: 21703, sum loss: 3871.638672, avg loss: 2.171418, ppl: 8.770716 +epoch: 2, batch: 21704, sum loss: 4508.888184, avg loss: 2.572098, ppl: 13.093268 +epoch: 2, batch: 21705, sum loss: 5020.268555, avg loss: 2.405495, ppl: 11.083920 +epoch: 2, batch: 21706, sum loss: 4206.216797, avg loss: 2.302253, ppl: 9.996682 +epoch: 2, batch: 21707, sum loss: 3836.519775, avg loss: 2.266108, ppl: 9.641798 +epoch: 2, batch: 21708, sum loss: 5000.801758, avg loss: 2.812599, ppl: 16.653151 +epoch: 2, batch: 21709, sum loss: 4243.612305, avg loss: 2.460065, ppl: 11.705574 +epoch: 2, batch: 21710, sum loss: 4286.555664, avg loss: 2.655859, ppl: 14.237204 +epoch: 2, batch: 21711, sum loss: 4601.525879, avg loss: 2.638489, ppl: 13.992052 +epoch: 2, batch: 21712, sum loss: 4425.959473, avg loss: 2.406721, ppl: 11.097511 +epoch: 2, batch: 21713, sum loss: 4729.778809, avg loss: 2.623283, ppl: 13.780888 +epoch: 2, batch: 21714, sum loss: 3928.150879, avg loss: 2.369210, ppl: 10.688950 +epoch: 2, batch: 21715, sum loss: 3839.177246, avg loss: 2.368400, ppl: 10.680294 +epoch: 2, batch: 21716, sum loss: 4181.088379, avg loss: 2.375618, ppl: 10.757662 +epoch: 2, batch: 21717, sum loss: 4274.553223, avg loss: 2.473700, ppl: 11.866269 +epoch: 2, batch: 21718, sum loss: 4670.477539, avg loss: 2.473770, ppl: 11.867101 +epoch: 2, batch: 21719, sum loss: 4464.305664, avg loss: 2.640039, ppl: 14.013750 +epoch: 2, batch: 21720, sum loss: 5142.260254, avg loss: 2.905232, ppl: 18.269482 +epoch: 2, batch: 21721, sum loss: 5130.398926, avg loss: 2.686073, ppl: 14.673936 +epoch: 2, batch: 21722, sum loss: 4351.738281, avg loss: 2.351020, ppl: 10.496271 +epoch: 2, batch: 21723, sum loss: 4406.388672, avg loss: 2.476891, ppl: 11.904194 +epoch: 2, batch: 21724, sum loss: 4122.117188, avg loss: 2.613898, ppl: 13.652164 +epoch: 2, batch: 21725, sum loss: 4414.676270, avg loss: 2.541552, ppl: 12.699369 +epoch: 2, batch: 21726, sum loss: 3621.018555, avg loss: 2.327133, ppl: 10.248514 +epoch: 2, batch: 21727, sum loss: 4255.980469, avg loss: 2.557681, ppl: 12.905849 +epoch: 2, batch: 21728, sum loss: 3665.440674, avg loss: 2.336164, ppl: 10.341485 +epoch: 2, batch: 21729, sum loss: 5113.001953, avg loss: 2.620708, ppl: 13.745459 +epoch: 2, batch: 21730, sum loss: 4479.695312, avg loss: 2.676043, ppl: 14.527492 +epoch: 2, batch: 21731, sum loss: 4400.477051, avg loss: 2.491776, ppl: 12.082719 +epoch: 2, batch: 21732, sum loss: 3634.772705, avg loss: 2.436175, ppl: 11.429238 +epoch: 2, batch: 21733, sum loss: 4343.860840, avg loss: 2.522567, ppl: 12.460546 +epoch: 2, batch: 21734, sum loss: 4945.696289, avg loss: 2.612623, ppl: 13.634774 +epoch: 2, batch: 21735, sum loss: 3380.487061, avg loss: 2.428511, ppl: 11.341980 +epoch: 2, batch: 21736, sum loss: 3789.479492, avg loss: 2.440103, ppl: 11.474220 +epoch: 2, batch: 21737, sum loss: 4337.671875, avg loss: 2.517511, ppl: 12.397705 +epoch: 2, batch: 21738, sum loss: 4097.374512, avg loss: 2.465328, ppl: 11.767338 +epoch: 2, batch: 21739, sum loss: 4154.801270, avg loss: 2.364713, ppl: 10.640986 +epoch: 2, batch: 21740, sum loss: 4119.059082, avg loss: 2.514688, ppl: 12.362751 +epoch: 2, batch: 21741, sum loss: 4530.540039, avg loss: 2.508605, ppl: 12.287774 +epoch: 2, batch: 21742, sum loss: 3987.752686, avg loss: 2.466143, ppl: 11.776932 +epoch: 2, batch: 21743, sum loss: 4100.782227, avg loss: 2.448228, ppl: 11.567835 +epoch: 2, batch: 21744, sum loss: 3558.511719, avg loss: 2.429018, ppl: 11.347736 +epoch: 2, batch: 21745, sum loss: 3904.535156, avg loss: 2.262187, ppl: 9.604073 +epoch: 2, batch: 21746, sum loss: 4882.767578, avg loss: 2.568526, ppl: 13.046576 +epoch: 2, batch: 21747, sum loss: 3967.965576, avg loss: 2.527367, ppl: 12.520492 +epoch: 2, batch: 21748, sum loss: 4154.067383, avg loss: 2.525269, ppl: 12.494253 +epoch: 2, batch: 21749, sum loss: 3924.612305, avg loss: 2.241355, ppl: 9.406067 +epoch: 2, batch: 21750, sum loss: 5169.288086, avg loss: 2.759898, ppl: 15.798227 +epoch: 2, batch: 21751, sum loss: 4478.842773, avg loss: 2.656490, ppl: 14.246202 +epoch: 2, batch: 21752, sum loss: 3814.722412, avg loss: 2.362057, ppl: 10.612761 +epoch: 2, batch: 21753, sum loss: 3616.761963, avg loss: 2.333395, ppl: 10.312895 +epoch: 2, batch: 21754, sum loss: 3451.041504, avg loss: 2.252638, ppl: 9.512798 +epoch: 2, batch: 21755, sum loss: 3864.087891, avg loss: 2.422626, ppl: 11.275427 +epoch: 2, batch: 21756, sum loss: 3953.436523, avg loss: 2.361671, ppl: 10.608661 +epoch: 2, batch: 21757, sum loss: 4604.068848, avg loss: 2.436015, ppl: 11.427416 +epoch: 2, batch: 21758, sum loss: 4452.092285, avg loss: 2.600521, ppl: 13.470756 +epoch: 2, batch: 21759, sum loss: 4061.151611, avg loss: 2.420233, ppl: 11.248483 +epoch: 2, batch: 21760, sum loss: 4218.053711, avg loss: 2.479749, ppl: 11.938270 +epoch: 2, batch: 21761, sum loss: 3397.477539, avg loss: 2.080513, ppl: 8.008574 +epoch: 2, batch: 21762, sum loss: 4070.087891, avg loss: 2.364955, ppl: 10.643561 +epoch: 2, batch: 21763, sum loss: 4008.919434, avg loss: 2.429648, ppl: 11.354886 +epoch: 2, batch: 21764, sum loss: 3533.256104, avg loss: 2.363382, ppl: 10.626829 +epoch: 2, batch: 21765, sum loss: 3909.033447, avg loss: 2.504185, ppl: 12.233590 +epoch: 2, batch: 21766, sum loss: 4894.690918, avg loss: 2.731412, ppl: 15.354559 +epoch: 2, batch: 21767, sum loss: 4531.205566, avg loss: 2.529986, ppl: 12.553332 +epoch: 2, batch: 21768, sum loss: 4048.057129, avg loss: 2.466823, ppl: 11.784950 +epoch: 2, batch: 21769, sum loss: 4699.448730, avg loss: 2.741802, ppl: 15.514917 +epoch: 2, batch: 21770, sum loss: 3864.923828, avg loss: 2.559552, ppl: 12.930026 +epoch: 2, batch: 21771, sum loss: 4785.993652, avg loss: 2.616727, ppl: 13.690842 +epoch: 2, batch: 21772, sum loss: 3504.249756, avg loss: 2.220690, ppl: 9.213691 +epoch: 2, batch: 21773, sum loss: 5020.562500, avg loss: 2.903738, ppl: 18.242203 +epoch: 2, batch: 21774, sum loss: 3982.540039, avg loss: 2.613215, ppl: 13.642845 +epoch: 2, batch: 21775, sum loss: 3937.593262, avg loss: 2.442676, ppl: 11.503779 +epoch: 2, batch: 21776, sum loss: 4951.017578, avg loss: 2.446155, ppl: 11.543876 +epoch: 2, batch: 21777, sum loss: 4786.277832, avg loss: 2.699536, ppl: 14.872834 +epoch: 2, batch: 21778, sum loss: 4014.693604, avg loss: 2.466028, ppl: 11.775581 +epoch: 2, batch: 21779, sum loss: 3709.993164, avg loss: 2.474979, ppl: 11.881456 +epoch: 2, batch: 21780, sum loss: 3943.551514, avg loss: 2.322469, ppl: 10.200824 +epoch: 2, batch: 21781, sum loss: 3973.820312, avg loss: 2.465149, ppl: 11.765237 +epoch: 2, batch: 21782, sum loss: 4108.470703, avg loss: 2.291395, ppl: 9.888720 +epoch: 2, batch: 21783, sum loss: 3879.195557, avg loss: 2.325657, ppl: 10.233400 +epoch: 2, batch: 21784, sum loss: 4066.325684, avg loss: 2.482494, ppl: 11.971085 +epoch: 2, batch: 21785, sum loss: 3891.481934, avg loss: 2.427624, ppl: 11.331931 +epoch: 2, batch: 21786, sum loss: 3444.409668, avg loss: 2.299339, ppl: 9.967590 +epoch: 2, batch: 21787, sum loss: 3703.318359, avg loss: 2.218885, ppl: 9.197067 +epoch: 2, batch: 21788, sum loss: 4408.512695, avg loss: 2.460108, ppl: 11.706071 +epoch: 2, batch: 21789, sum loss: 5302.015137, avg loss: 2.636507, ppl: 13.964341 +epoch: 2, batch: 21790, sum loss: 4756.697266, avg loss: 2.578156, ppl: 13.172822 +epoch: 2, batch: 21791, sum loss: 3311.191406, avg loss: 2.377022, ppl: 10.772772 +epoch: 2, batch: 21792, sum loss: 4461.979492, avg loss: 2.417107, ppl: 11.213370 +epoch: 2, batch: 21793, sum loss: 4712.367676, avg loss: 2.669897, ppl: 14.438477 +epoch: 2, batch: 21794, sum loss: 4526.957520, avg loss: 2.702661, ppl: 14.919384 +epoch: 2, batch: 21795, sum loss: 4347.473633, avg loss: 2.583169, ppl: 13.239026 +epoch: 2, batch: 21796, sum loss: 4395.699707, avg loss: 2.749030, ppl: 15.627471 +epoch: 2, batch: 21797, sum loss: 4255.173340, avg loss: 2.538886, ppl: 12.665558 +epoch: 2, batch: 21798, sum loss: 3416.062012, avg loss: 2.315974, ppl: 10.134791 +epoch: 2, batch: 21799, sum loss: 5105.221680, avg loss: 2.814345, ppl: 16.682247 +epoch: 2, batch: 21800, sum loss: 4571.998535, avg loss: 2.662783, ppl: 14.336133 +epoch: 2, batch: 21801, sum loss: 4699.777344, avg loss: 2.420071, ppl: 11.246656 +epoch: 2, batch: 21802, sum loss: 4742.586914, avg loss: 2.477841, ppl: 11.915507 +epoch: 2, batch: 21803, sum loss: 4599.954590, avg loss: 2.581344, ppl: 13.214886 +epoch: 2, batch: 21804, sum loss: 3638.681152, avg loss: 2.389154, ppl: 10.904262 +epoch: 2, batch: 21805, sum loss: 4432.730957, avg loss: 2.613639, ppl: 13.648623 +epoch: 2, batch: 21806, sum loss: 3356.110840, avg loss: 2.376849, ppl: 10.770909 +epoch: 2, batch: 21807, sum loss: 4870.712402, avg loss: 2.683588, ppl: 14.637523 +epoch: 2, batch: 21808, sum loss: 3687.670410, avg loss: 2.410242, ppl: 11.136657 +epoch: 2, batch: 21809, sum loss: 4621.438965, avg loss: 2.741067, ppl: 15.503521 +epoch: 2, batch: 21810, sum loss: 4241.111328, avg loss: 2.354865, ppl: 10.536702 +epoch: 2, batch: 21811, sum loss: 5368.979980, avg loss: 2.812457, ppl: 16.650776 +epoch: 2, batch: 21812, sum loss: 3524.316895, avg loss: 2.152912, ppl: 8.609895 +epoch: 2, batch: 21813, sum loss: 3556.753418, avg loss: 2.350796, ppl: 10.493915 +epoch: 2, batch: 21814, sum loss: 3977.483398, avg loss: 2.207261, ppl: 9.090779 +epoch: 2, batch: 21815, sum loss: 4085.697021, avg loss: 2.614010, ppl: 13.653687 +epoch: 2, batch: 21816, sum loss: 4060.418457, avg loss: 2.394115, ppl: 10.958492 +epoch: 2, batch: 21817, sum loss: 4799.728516, avg loss: 2.602890, ppl: 13.502702 +epoch: 2, batch: 21818, sum loss: 3906.679199, avg loss: 2.296695, ppl: 9.941277 +epoch: 2, batch: 21819, sum loss: 4102.474609, avg loss: 2.352336, ppl: 10.510095 +epoch: 2, batch: 21820, sum loss: 4173.341797, avg loss: 2.563478, ppl: 12.980884 +epoch: 2, batch: 21821, sum loss: 4287.863770, avg loss: 2.412979, ppl: 11.167180 +epoch: 2, batch: 21822, sum loss: 4938.115234, avg loss: 2.544109, ppl: 12.731874 +epoch: 2, batch: 21823, sum loss: 3679.384521, avg loss: 2.367686, ppl: 10.672670 +epoch: 2, batch: 21824, sum loss: 4015.408203, avg loss: 2.480178, ppl: 11.943392 +epoch: 2, batch: 21825, sum loss: 3724.088867, avg loss: 2.419811, ppl: 11.243731 +epoch: 2, batch: 21826, sum loss: 4553.299316, avg loss: 2.691075, ppl: 14.747526 +epoch: 2, batch: 21827, sum loss: 4435.550293, avg loss: 2.318636, ppl: 10.161801 +epoch: 2, batch: 21828, sum loss: 3804.005859, avg loss: 2.365675, ppl: 10.651228 +epoch: 2, batch: 21829, sum loss: 4006.022949, avg loss: 2.406020, ppl: 11.089735 +epoch: 2, batch: 21830, sum loss: 5148.013672, avg loss: 2.642718, ppl: 14.051337 +epoch: 2, batch: 21831, sum loss: 3554.900146, avg loss: 2.283173, ppl: 9.807750 +epoch: 2, batch: 21832, sum loss: 4266.550293, avg loss: 2.348129, ppl: 10.465970 +epoch: 2, batch: 21833, sum loss: 4098.748047, avg loss: 2.528531, ppl: 12.535073 +epoch: 2, batch: 21834, sum loss: 4251.201660, avg loss: 2.548682, ppl: 12.790237 +epoch: 2, batch: 21835, sum loss: 4427.909180, avg loss: 2.613878, ppl: 13.651887 +epoch: 2, batch: 21836, sum loss: 4546.080078, avg loss: 2.571312, ppl: 13.082984 +epoch: 2, batch: 21837, sum loss: 4034.248535, avg loss: 2.322538, ppl: 10.201534 +epoch: 2, batch: 21838, sum loss: 3773.697754, avg loss: 2.377882, ppl: 10.782037 +epoch: 2, batch: 21839, sum loss: 4217.743164, avg loss: 2.486877, ppl: 12.023667 +epoch: 2, batch: 21840, sum loss: 4231.374023, avg loss: 2.327488, ppl: 10.252160 +epoch: 2, batch: 21841, sum loss: 3870.384277, avg loss: 2.344267, ppl: 10.425627 +epoch: 2, batch: 21842, sum loss: 4583.608887, avg loss: 2.545035, ppl: 12.743679 +epoch: 2, batch: 21843, sum loss: 4477.795410, avg loss: 2.508569, ppl: 12.287334 +epoch: 2, batch: 21844, sum loss: 3924.591797, avg loss: 2.365637, ppl: 10.650822 +epoch: 2, batch: 21845, sum loss: 3875.316406, avg loss: 2.498592, ppl: 12.165355 +epoch: 2, batch: 21846, sum loss: 3758.466553, avg loss: 2.530954, ppl: 12.565487 +epoch: 2, batch: 21847, sum loss: 4554.125000, avg loss: 2.722131, ppl: 15.212707 +epoch: 2, batch: 21848, sum loss: 4791.526367, avg loss: 2.580251, ppl: 13.200451 +epoch: 2, batch: 21849, sum loss: 3307.436279, avg loss: 2.290468, ppl: 9.879562 +epoch: 2, batch: 21850, sum loss: 3622.149658, avg loss: 2.223542, ppl: 9.239999 +epoch: 2, batch: 21851, sum loss: 5267.877930, avg loss: 2.651171, ppl: 14.170630 +epoch: 2, batch: 21852, sum loss: 4103.327148, avg loss: 2.405233, ppl: 11.081008 +epoch: 2, batch: 21853, sum loss: 3999.909668, avg loss: 2.493709, ppl: 12.106098 +epoch: 2, batch: 21854, sum loss: 4193.110352, avg loss: 2.453546, ppl: 11.629516 +epoch: 2, batch: 21855, sum loss: 4585.323242, avg loss: 2.617194, ppl: 13.697231 +epoch: 2, batch: 21856, sum loss: 4302.214844, avg loss: 2.416975, ppl: 11.211887 +epoch: 2, batch: 21857, sum loss: 4346.471191, avg loss: 2.409352, ppl: 11.126749 +epoch: 2, batch: 21858, sum loss: 4680.120605, avg loss: 2.648625, ppl: 14.134592 +epoch: 2, batch: 21859, sum loss: 4053.156006, avg loss: 2.500405, ppl: 12.187424 +epoch: 2, batch: 21860, sum loss: 3541.494385, avg loss: 2.420707, ppl: 11.253816 +epoch: 2, batch: 21861, sum loss: 4701.606934, avg loss: 2.660785, ppl: 14.307515 +epoch: 2, batch: 21862, sum loss: 4695.145508, avg loss: 2.561454, ppl: 12.954643 +epoch: 2, batch: 21863, sum loss: 4768.691895, avg loss: 2.650746, ppl: 14.164599 +epoch: 2, batch: 21864, sum loss: 3445.376709, avg loss: 2.128089, ppl: 8.398805 +epoch: 2, batch: 21865, sum loss: 3897.174561, avg loss: 2.325283, ppl: 10.229578 +epoch: 2, batch: 21866, sum loss: 4507.004883, avg loss: 2.519287, ppl: 12.419740 +epoch: 2, batch: 21867, sum loss: 5094.957031, avg loss: 2.616825, ppl: 13.692177 +epoch: 2, batch: 21868, sum loss: 4974.532227, avg loss: 2.561551, ppl: 12.955900 +epoch: 2, batch: 21869, sum loss: 4024.337158, avg loss: 2.335657, ppl: 10.336250 +epoch: 2, batch: 21870, sum loss: 3789.896240, avg loss: 2.358367, ppl: 10.573675 +epoch: 2, batch: 21871, sum loss: 3822.157471, avg loss: 2.372537, ppl: 10.724570 +epoch: 2, batch: 21872, sum loss: 4219.108398, avg loss: 2.473100, ppl: 11.859150 +epoch: 2, batch: 21873, sum loss: 3595.661621, avg loss: 2.561013, ppl: 12.948921 +epoch: 2, batch: 21874, sum loss: 4630.374023, avg loss: 2.565304, ppl: 13.004612 +epoch: 2, batch: 21875, sum loss: 3527.685059, avg loss: 2.283291, ppl: 9.808912 +epoch: 2, batch: 21876, sum loss: 4578.190918, avg loss: 2.593876, ppl: 13.381536 +epoch: 2, batch: 21877, sum loss: 4666.436523, avg loss: 2.375986, ppl: 10.761620 +epoch: 2, batch: 21878, sum loss: 4005.754639, avg loss: 2.259309, ppl: 9.576470 +epoch: 2, batch: 21879, sum loss: 2845.360107, avg loss: 2.157210, ppl: 8.646980 +epoch: 2, batch: 21880, sum loss: 4172.097656, avg loss: 2.483392, ppl: 11.981833 +epoch: 2, batch: 21881, sum loss: 3770.870361, avg loss: 2.330575, ppl: 10.283853 +epoch: 2, batch: 21882, sum loss: 4387.488770, avg loss: 2.541998, ppl: 12.705032 +epoch: 2, batch: 21883, sum loss: 3870.268555, avg loss: 2.519706, ppl: 12.424946 +epoch: 2, batch: 21884, sum loss: 4830.422852, avg loss: 2.559843, ppl: 12.933782 +epoch: 2, batch: 21885, sum loss: 3493.684326, avg loss: 2.207002, ppl: 9.088428 +epoch: 2, batch: 21886, sum loss: 4576.517578, avg loss: 2.668523, ppl: 14.418661 +epoch: 2, batch: 21887, sum loss: 3154.610596, avg loss: 2.335019, ppl: 10.329655 +epoch: 2, batch: 21888, sum loss: 3658.121582, avg loss: 2.453469, ppl: 11.628612 +epoch: 2, batch: 21889, sum loss: 4821.365234, avg loss: 2.631750, ppl: 13.898066 +epoch: 2, batch: 21890, sum loss: 4805.897461, avg loss: 2.468360, ppl: 11.803079 +epoch: 2, batch: 21891, sum loss: 4243.320801, avg loss: 2.516798, ppl: 12.388862 +epoch: 2, batch: 21892, sum loss: 4303.552734, avg loss: 2.570820, ppl: 13.076544 +epoch: 2, batch: 21893, sum loss: 4063.564697, avg loss: 2.452363, ppl: 11.615757 +epoch: 2, batch: 21894, sum loss: 3496.880859, avg loss: 2.385321, ppl: 10.862551 +epoch: 2, batch: 21895, sum loss: 4881.150879, avg loss: 2.638460, ppl: 13.991639 +epoch: 2, batch: 21896, sum loss: 4742.026367, avg loss: 2.643270, ppl: 14.059102 +epoch: 2, batch: 21897, sum loss: 4445.695801, avg loss: 2.489192, ppl: 12.051529 +epoch: 2, batch: 21898, sum loss: 4416.060547, avg loss: 2.710903, ppl: 15.042849 +epoch: 2, batch: 21899, sum loss: 4208.064453, avg loss: 2.494407, ppl: 12.114547 +epoch: 2, batch: 21900, sum loss: 3637.573975, avg loss: 2.182108, ppl: 8.864973 +epoch: 2, batch: 21901, sum loss: 4747.826172, avg loss: 2.858414, ppl: 17.433863 +epoch: 2, batch: 21902, sum loss: 4745.981445, avg loss: 2.575139, ppl: 13.133143 +epoch: 2, batch: 21903, sum loss: 4813.690430, avg loss: 2.579684, ppl: 13.192968 +epoch: 2, batch: 21904, sum loss: 3851.194336, avg loss: 2.557234, ppl: 12.900087 +epoch: 2, batch: 21905, sum loss: 3925.603516, avg loss: 2.444336, ppl: 11.522895 +epoch: 2, batch: 21906, sum loss: 5073.578613, avg loss: 2.636995, ppl: 13.971158 +epoch: 2, batch: 21907, sum loss: 5649.707031, avg loss: 2.769464, ppl: 15.950083 +epoch: 2, batch: 21908, sum loss: 4587.021484, avg loss: 2.604782, ppl: 13.528277 +epoch: 2, batch: 21909, sum loss: 4608.681152, avg loss: 2.624534, ppl: 13.798141 +epoch: 2, batch: 21910, sum loss: 4142.020996, avg loss: 2.423652, ppl: 11.287004 +epoch: 2, batch: 21911, sum loss: 4473.842285, avg loss: 2.530454, ppl: 12.559206 +epoch: 2, batch: 21912, sum loss: 3868.020996, avg loss: 2.294200, ppl: 9.916499 +epoch: 2, batch: 21913, sum loss: 4214.532227, avg loss: 2.505667, ppl: 12.251731 +epoch: 2, batch: 21914, sum loss: 4306.916016, avg loss: 2.540953, ppl: 12.691766 +epoch: 2, batch: 21915, sum loss: 4791.181641, avg loss: 2.647062, ppl: 14.112513 +epoch: 2, batch: 21916, sum loss: 3651.779297, avg loss: 2.317119, ppl: 10.146399 +epoch: 2, batch: 21917, sum loss: 3953.086670, avg loss: 2.651299, ppl: 14.172437 +epoch: 2, batch: 21918, sum loss: 4445.495117, avg loss: 2.449309, ppl: 11.580338 +epoch: 2, batch: 21919, sum loss: 3943.662842, avg loss: 2.214297, ppl: 9.154971 +epoch: 2, batch: 21920, sum loss: 4071.657959, avg loss: 2.425049, ppl: 11.302787 +epoch: 2, batch: 21921, sum loss: 4296.195312, avg loss: 2.534628, ppl: 12.611743 +epoch: 2, batch: 21922, sum loss: 4374.410156, avg loss: 2.665698, ppl: 14.377979 +epoch: 2, batch: 21923, sum loss: 3956.763428, avg loss: 2.385029, ppl: 10.859379 +epoch: 2, batch: 21924, sum loss: 4152.223145, avg loss: 2.422534, ppl: 11.274392 +epoch: 2, batch: 21925, sum loss: 4317.266602, avg loss: 2.425431, ppl: 11.307099 +epoch: 2, batch: 21926, sum loss: 3890.546875, avg loss: 2.586800, ppl: 13.287180 +epoch: 2, batch: 21927, sum loss: 4201.150391, avg loss: 2.506653, ppl: 12.263815 +epoch: 2, batch: 21928, sum loss: 3734.094238, avg loss: 2.453413, ppl: 11.627965 +epoch: 2, batch: 21929, sum loss: 4160.546875, avg loss: 2.558762, ppl: 12.919810 +epoch: 2, batch: 21930, sum loss: 3864.496094, avg loss: 2.250726, ppl: 9.494624 +epoch: 2, batch: 21931, sum loss: 5310.032715, avg loss: 2.748464, ppl: 15.618625 +epoch: 2, batch: 21932, sum loss: 3671.423096, avg loss: 2.396490, ppl: 10.984556 +epoch: 2, batch: 21933, sum loss: 4166.334473, avg loss: 2.552901, ppl: 12.844312 +epoch: 2, batch: 21934, sum loss: 3713.225098, avg loss: 2.482102, ppl: 11.966396 +epoch: 2, batch: 21935, sum loss: 3802.815186, avg loss: 2.399252, ppl: 11.014938 +epoch: 2, batch: 21936, sum loss: 5027.271484, avg loss: 2.741151, ppl: 15.504823 +epoch: 2, batch: 21937, sum loss: 4859.701172, avg loss: 2.716434, ppl: 15.126293 +epoch: 2, batch: 21938, sum loss: 3831.820557, avg loss: 2.563091, ppl: 12.975858 +epoch: 2, batch: 21939, sum loss: 4214.456055, avg loss: 2.463154, ppl: 11.741785 +epoch: 2, batch: 21940, sum loss: 4120.306641, avg loss: 2.482112, ppl: 11.966516 +epoch: 2, batch: 21941, sum loss: 3350.346191, avg loss: 2.276050, ppl: 9.738142 +epoch: 2, batch: 21942, sum loss: 4379.771973, avg loss: 2.392011, ppl: 10.935465 +epoch: 2, batch: 21943, sum loss: 3894.542969, avg loss: 2.415970, ppl: 11.200625 +epoch: 2, batch: 21944, sum loss: 3497.654297, avg loss: 2.480606, ppl: 11.948503 +epoch: 2, batch: 21945, sum loss: 4205.983398, avg loss: 2.335360, ppl: 10.333179 +epoch: 2, batch: 21946, sum loss: 4290.832520, avg loss: 2.586397, ppl: 13.281830 +epoch: 2, batch: 21947, sum loss: 3657.980957, avg loss: 2.080763, ppl: 8.010575 +epoch: 2, batch: 21948, sum loss: 5136.036133, avg loss: 2.801984, ppl: 16.477303 +epoch: 2, batch: 21949, sum loss: 4730.632324, avg loss: 2.739220, ppl: 15.474905 +epoch: 2, batch: 21950, sum loss: 4044.120605, avg loss: 2.376099, ppl: 10.762836 +epoch: 2, batch: 21951, sum loss: 4685.382812, avg loss: 2.610241, ppl: 13.602331 +epoch: 2, batch: 21952, sum loss: 4430.266113, avg loss: 2.446310, ppl: 11.545659 +epoch: 2, batch: 21953, sum loss: 4532.949219, avg loss: 2.464899, ppl: 11.762295 +epoch: 2, batch: 21954, sum loss: 4245.809570, avg loss: 2.419265, ppl: 11.237592 +epoch: 2, batch: 21955, sum loss: 3145.242188, avg loss: 2.257891, ppl: 9.562900 +epoch: 2, batch: 21956, sum loss: 4476.733398, avg loss: 2.527800, ppl: 12.525917 +epoch: 2, batch: 21957, sum loss: 4258.681641, avg loss: 2.503634, ppl: 12.226846 +epoch: 2, batch: 21958, sum loss: 4584.782715, avg loss: 2.498519, ppl: 12.164470 +epoch: 2, batch: 21959, sum loss: 4116.968262, avg loss: 2.525747, ppl: 12.500234 +epoch: 2, batch: 21960, sum loss: 3893.713379, avg loss: 2.438143, ppl: 11.451750 +epoch: 2, batch: 21961, sum loss: 3425.812988, avg loss: 2.139796, ppl: 8.497702 +epoch: 2, batch: 21962, sum loss: 4249.265137, avg loss: 2.479151, ppl: 11.931133 +epoch: 2, batch: 21963, sum loss: 4069.156494, avg loss: 2.527426, ppl: 12.521238 +epoch: 2, batch: 21964, sum loss: 4618.100586, avg loss: 2.574192, ppl: 13.120712 +epoch: 2, batch: 21965, sum loss: 4575.044922, avg loss: 2.545935, ppl: 12.755148 +epoch: 2, batch: 21966, sum loss: 3499.825684, avg loss: 2.541631, ppl: 12.700365 +epoch: 2, batch: 21967, sum loss: 4643.229004, avg loss: 2.545630, ppl: 12.751259 +epoch: 2, batch: 21968, sum loss: 4882.468262, avg loss: 2.750686, ppl: 15.653369 +epoch: 2, batch: 21969, sum loss: 4145.983398, avg loss: 2.540431, ppl: 12.685138 +epoch: 2, batch: 21970, sum loss: 4140.530273, avg loss: 2.652486, ppl: 14.189267 +epoch: 2, batch: 21971, sum loss: 4542.577148, avg loss: 2.741447, ppl: 15.509407 +epoch: 2, batch: 21972, sum loss: 4548.329102, avg loss: 2.533888, ppl: 12.602410 +epoch: 2, batch: 21973, sum loss: 4066.586426, avg loss: 2.481139, ppl: 11.954870 +epoch: 2, batch: 21974, sum loss: 4151.133789, avg loss: 2.231792, ppl: 9.316550 +epoch: 2, batch: 21975, sum loss: 4220.115234, avg loss: 2.516467, ppl: 12.384765 +epoch: 2, batch: 21976, sum loss: 4806.152344, avg loss: 2.552391, ppl: 12.837763 +epoch: 2, batch: 21977, sum loss: 4201.961426, avg loss: 2.559051, ppl: 12.923548 +epoch: 2, batch: 21978, sum loss: 4893.751465, avg loss: 2.540889, ppl: 12.690946 +epoch: 2, batch: 21979, sum loss: 4412.942871, avg loss: 2.439438, ppl: 11.466593 +epoch: 2, batch: 21980, sum loss: 4178.844727, avg loss: 2.481499, ppl: 11.959183 +epoch: 2, batch: 21981, sum loss: 4782.097168, avg loss: 2.691107, ppl: 14.747993 +epoch: 2, batch: 21982, sum loss: 3336.899902, avg loss: 2.186697, ppl: 8.905751 +epoch: 2, batch: 21983, sum loss: 3604.031738, avg loss: 2.422064, ppl: 11.269098 +epoch: 2, batch: 21984, sum loss: 4426.182129, avg loss: 2.454899, ppl: 11.645252 +epoch: 2, batch: 21985, sum loss: 4892.710938, avg loss: 2.532459, ppl: 12.584414 +epoch: 2, batch: 21986, sum loss: 3486.291992, avg loss: 2.246322, ppl: 9.452908 +epoch: 2, batch: 21987, sum loss: 3578.229736, avg loss: 2.208784, ppl: 9.104637 +epoch: 2, batch: 21988, sum loss: 5391.713379, avg loss: 2.703969, ppl: 14.938900 +epoch: 2, batch: 21989, sum loss: 3889.250977, avg loss: 2.198559, ppl: 9.012018 +epoch: 2, batch: 21990, sum loss: 4835.254395, avg loss: 2.538191, ppl: 12.656758 +epoch: 2, batch: 21991, sum loss: 2508.555176, avg loss: 1.961341, ppl: 7.108854 +epoch: 2, batch: 21992, sum loss: 4804.099121, avg loss: 2.533808, ppl: 12.601398 +epoch: 2, batch: 21993, sum loss: 3937.602783, avg loss: 2.537115, ppl: 12.643147 +epoch: 2, batch: 21994, sum loss: 4194.738281, avg loss: 2.437384, ppl: 11.443068 +epoch: 2, batch: 21995, sum loss: 4458.016602, avg loss: 2.543079, ppl: 12.718767 +epoch: 2, batch: 21996, sum loss: 4485.375000, avg loss: 2.439029, ppl: 11.461911 +epoch: 2, batch: 21997, sum loss: 4133.874023, avg loss: 2.484299, ppl: 11.992716 +epoch: 2, batch: 21998, sum loss: 4668.526367, avg loss: 2.757547, ppl: 15.761128 +epoch: 2, batch: 21999, sum loss: 4856.895020, avg loss: 2.773784, ppl: 16.019131 +epoch: 2, batch: 22000, sum loss: 4675.394531, avg loss: 2.802994, ppl: 16.493963 +epoch: 2, batch: 22001, sum loss: 3686.395752, avg loss: 2.409409, ppl: 11.127383 +epoch: 2, batch: 22002, sum loss: 4295.249512, avg loss: 2.584386, ppl: 13.255150 +epoch: 2, batch: 22003, sum loss: 4050.052246, avg loss: 2.397900, ppl: 11.000048 +epoch: 2, batch: 22004, sum loss: 4452.499023, avg loss: 2.396393, ppl: 10.983491 +epoch: 2, batch: 22005, sum loss: 3401.552246, avg loss: 2.341055, ppl: 10.392191 +epoch: 2, batch: 22006, sum loss: 4956.002930, avg loss: 2.841745, ppl: 17.145653 +epoch: 2, batch: 22007, sum loss: 4573.765137, avg loss: 2.592837, ppl: 13.367646 +epoch: 2, batch: 22008, sum loss: 4043.844727, avg loss: 2.685156, ppl: 14.660486 +epoch: 2, batch: 22009, sum loss: 3369.357422, avg loss: 2.267401, ppl: 9.654274 +epoch: 2, batch: 22010, sum loss: 3981.152344, avg loss: 2.231588, ppl: 9.314643 +epoch: 2, batch: 22011, sum loss: 4809.633301, avg loss: 2.615353, ppl: 13.672037 +epoch: 2, batch: 22012, sum loss: 3950.946777, avg loss: 2.385837, ppl: 10.868159 +epoch: 2, batch: 22013, sum loss: 3434.779541, avg loss: 2.213131, ppl: 9.144304 +epoch: 2, batch: 22014, sum loss: 4711.874023, avg loss: 2.519719, ppl: 12.425103 +epoch: 2, batch: 22015, sum loss: 4047.102539, avg loss: 2.460245, ppl: 11.707676 +epoch: 2, batch: 22016, sum loss: 3756.734131, avg loss: 2.241488, ppl: 9.407321 +epoch: 2, batch: 22017, sum loss: 4361.340820, avg loss: 2.468218, ppl: 11.801396 +epoch: 2, batch: 22018, sum loss: 3707.670410, avg loss: 2.379763, ppl: 10.802341 +epoch: 2, batch: 22019, sum loss: 4361.755859, avg loss: 2.325030, ppl: 10.226985 +epoch: 2, batch: 22020, sum loss: 4563.341797, avg loss: 2.485480, ppl: 12.006886 +epoch: 2, batch: 22021, sum loss: 4077.733154, avg loss: 2.483394, ppl: 11.981864 +epoch: 2, batch: 22022, sum loss: 4305.898926, avg loss: 2.589236, ppl: 13.319589 +epoch: 2, batch: 22023, sum loss: 4157.844727, avg loss: 2.501712, ppl: 12.203363 +epoch: 2, batch: 22024, sum loss: 3991.852295, avg loss: 2.671922, ppl: 14.467756 +epoch: 2, batch: 22025, sum loss: 3994.406006, avg loss: 2.549079, ppl: 12.795310 +epoch: 2, batch: 22026, sum loss: 4221.920898, avg loss: 2.706360, ppl: 14.974663 +epoch: 2, batch: 22027, sum loss: 3599.778320, avg loss: 2.351260, ppl: 10.498787 +epoch: 2, batch: 22028, sum loss: 3789.547852, avg loss: 2.523001, ppl: 12.465953 +epoch: 2, batch: 22029, sum loss: 3907.770020, avg loss: 2.552430, ppl: 12.838262 +epoch: 2, batch: 22030, sum loss: 4538.272949, avg loss: 2.545302, ppl: 12.747076 +epoch: 2, batch: 22031, sum loss: 3366.960938, avg loss: 2.287338, ppl: 9.848681 +epoch: 2, batch: 22032, sum loss: 4124.138672, avg loss: 2.367473, ppl: 10.670398 +epoch: 2, batch: 22033, sum loss: 3917.196289, avg loss: 2.284079, ppl: 9.816644 +epoch: 2, batch: 22034, sum loss: 4717.833984, avg loss: 2.536470, ppl: 12.634990 +epoch: 2, batch: 22035, sum loss: 3688.056396, avg loss: 2.239257, ppl: 9.386355 +epoch: 2, batch: 22036, sum loss: 4336.948730, avg loss: 2.413439, ppl: 11.172312 +epoch: 2, batch: 22037, sum loss: 3859.452148, avg loss: 2.524168, ppl: 12.480501 +epoch: 2, batch: 22038, sum loss: 3812.242432, avg loss: 2.325956, ppl: 10.236465 +epoch: 2, batch: 22039, sum loss: 3882.676514, avg loss: 2.425157, ppl: 11.304008 +epoch: 2, batch: 22040, sum loss: 3910.578125, avg loss: 2.397657, ppl: 10.997375 +epoch: 2, batch: 22041, sum loss: 4607.270996, avg loss: 2.748968, ppl: 15.626503 +epoch: 2, batch: 22042, sum loss: 3971.533203, avg loss: 2.472935, ppl: 11.857193 +epoch: 2, batch: 22043, sum loss: 3411.821777, avg loss: 2.306844, ppl: 10.042675 +epoch: 2, batch: 22044, sum loss: 4763.110352, avg loss: 2.619973, ppl: 13.735349 +epoch: 2, batch: 22045, sum loss: 3039.330566, avg loss: 2.236446, ppl: 9.360010 +epoch: 2, batch: 22046, sum loss: 4667.166992, avg loss: 2.565787, ppl: 13.010898 +epoch: 2, batch: 22047, sum loss: 4763.106934, avg loss: 2.360311, ppl: 10.594244 +epoch: 2, batch: 22048, sum loss: 4212.216797, avg loss: 2.613038, ppl: 13.640422 +epoch: 2, batch: 22049, sum loss: 4532.618652, avg loss: 2.629129, ppl: 13.861697 +epoch: 2, batch: 22050, sum loss: 3999.876953, avg loss: 2.529966, ppl: 12.553083 +epoch: 2, batch: 22051, sum loss: 5034.296875, avg loss: 2.655220, ppl: 14.228116 +epoch: 2, batch: 22052, sum loss: 4583.908691, avg loss: 2.671276, ppl: 14.458401 +epoch: 2, batch: 22053, sum loss: 5394.290527, avg loss: 2.880027, ppl: 17.814754 +epoch: 2, batch: 22054, sum loss: 4543.359863, avg loss: 2.629259, ppl: 13.863495 +epoch: 2, batch: 22055, sum loss: 3963.641113, avg loss: 2.366353, ppl: 10.658450 +epoch: 2, batch: 22056, sum loss: 3611.839600, avg loss: 2.246169, ppl: 9.451456 +epoch: 2, batch: 22057, sum loss: 4477.351562, avg loss: 2.541062, ppl: 12.693149 +epoch: 2, batch: 22058, sum loss: 4159.555176, avg loss: 2.459820, ppl: 11.702703 +epoch: 2, batch: 22059, sum loss: 4531.525391, avg loss: 2.580595, ppl: 13.204999 +epoch: 2, batch: 22060, sum loss: 4314.248535, avg loss: 2.622643, ppl: 13.772081 +epoch: 2, batch: 22061, sum loss: 3852.816406, avg loss: 2.347847, ppl: 10.463014 +epoch: 2, batch: 22062, sum loss: 3289.464355, avg loss: 2.124977, ppl: 8.372705 +epoch: 2, batch: 22063, sum loss: 3936.989990, avg loss: 2.383166, ppl: 10.839167 +epoch: 2, batch: 22064, sum loss: 4170.444824, avg loss: 2.374969, ppl: 10.750677 +epoch: 2, batch: 22065, sum loss: 4342.947266, avg loss: 2.592804, ppl: 13.367207 +epoch: 2, batch: 22066, sum loss: 4896.117676, avg loss: 2.474036, ppl: 11.870261 +epoch: 2, batch: 22067, sum loss: 4272.818359, avg loss: 2.380400, ppl: 10.809228 +epoch: 2, batch: 22068, sum loss: 4787.442383, avg loss: 2.611807, ppl: 13.623648 +epoch: 2, batch: 22069, sum loss: 4281.428711, avg loss: 2.356318, ppl: 10.552022 +epoch: 2, batch: 22070, sum loss: 4623.826660, avg loss: 2.533604, ppl: 12.598826 +epoch: 2, batch: 22071, sum loss: 4485.361328, avg loss: 2.557219, ppl: 12.899887 +epoch: 2, batch: 22072, sum loss: 3258.880615, avg loss: 2.280532, ppl: 9.781886 +epoch: 2, batch: 22073, sum loss: 4503.600586, avg loss: 2.719566, ppl: 15.173730 +epoch: 2, batch: 22074, sum loss: 5107.544434, avg loss: 2.496356, ppl: 12.138182 +epoch: 2, batch: 22075, sum loss: 3888.554932, avg loss: 2.422776, ppl: 11.277116 +epoch: 2, batch: 22076, sum loss: 3999.277100, avg loss: 2.393344, ppl: 10.950047 +epoch: 2, batch: 22077, sum loss: 3976.298096, avg loss: 2.523032, ppl: 12.466333 +epoch: 2, batch: 22078, sum loss: 4795.589355, avg loss: 2.487339, ppl: 12.029224 +epoch: 2, batch: 22079, sum loss: 5216.796387, avg loss: 2.665711, ppl: 14.378168 +epoch: 2, batch: 22080, sum loss: 3573.213623, avg loss: 2.340022, ppl: 10.381466 +epoch: 2, batch: 22081, sum loss: 4608.569336, avg loss: 2.506019, ppl: 12.256046 +epoch: 2, batch: 22082, sum loss: 3967.793457, avg loss: 2.429757, ppl: 11.356124 +epoch: 2, batch: 22083, sum loss: 4488.714355, avg loss: 2.427644, ppl: 11.332155 +epoch: 2, batch: 22084, sum loss: 4258.725586, avg loss: 2.500720, ppl: 12.191263 +epoch: 2, batch: 22085, sum loss: 4830.340332, avg loss: 2.796955, ppl: 16.394644 +epoch: 2, batch: 22086, sum loss: 4211.762695, avg loss: 2.402603, ppl: 11.051906 +epoch: 2, batch: 22087, sum loss: 4028.400391, avg loss: 2.423827, ppl: 11.288979 +epoch: 2, batch: 22088, sum loss: 4021.790527, avg loss: 2.502670, ppl: 12.215065 +epoch: 2, batch: 22089, sum loss: 3593.844238, avg loss: 2.441470, ppl: 11.489923 +epoch: 2, batch: 22090, sum loss: 3786.140625, avg loss: 2.265793, ppl: 9.638768 +epoch: 2, batch: 22091, sum loss: 3764.324219, avg loss: 2.546904, ppl: 12.767515 +epoch: 2, batch: 22092, sum loss: 4808.778320, avg loss: 2.581201, ppl: 13.213001 +epoch: 2, batch: 22093, sum loss: 4634.994141, avg loss: 2.430516, ppl: 11.364745 +epoch: 2, batch: 22094, sum loss: 4928.098633, avg loss: 2.746989, ppl: 15.595607 +epoch: 2, batch: 22095, sum loss: 4077.211426, avg loss: 2.225552, ppl: 9.258593 +epoch: 2, batch: 22096, sum loss: 4332.639160, avg loss: 2.557638, ppl: 12.905301 +epoch: 2, batch: 22097, sum loss: 3541.729736, avg loss: 2.381795, ppl: 10.824320 +epoch: 2, batch: 22098, sum loss: 5049.479004, avg loss: 2.693055, ppl: 14.776755 +epoch: 2, batch: 22099, sum loss: 3803.987305, avg loss: 2.443152, ppl: 11.509257 +epoch: 2, batch: 22100, sum loss: 3723.211914, avg loss: 2.343116, ppl: 10.413638 +epoch: 2, batch: 22101, sum loss: 3648.559570, avg loss: 2.215276, ppl: 9.163938 +epoch: 2, batch: 22102, sum loss: 4053.184570, avg loss: 2.408309, ppl: 11.115152 +epoch: 2, batch: 22103, sum loss: 5132.995605, avg loss: 2.770100, ppl: 15.960232 +epoch: 2, batch: 22104, sum loss: 4605.112305, avg loss: 2.509598, ppl: 12.299985 +epoch: 2, batch: 22105, sum loss: 4430.935059, avg loss: 2.507603, ppl: 12.275476 +epoch: 2, batch: 22106, sum loss: 4109.102539, avg loss: 2.408618, ppl: 11.118585 +epoch: 2, batch: 22107, sum loss: 3451.849121, avg loss: 2.549371, ppl: 12.799057 +epoch: 2, batch: 22108, sum loss: 3845.545898, avg loss: 2.427744, ppl: 11.333282 +epoch: 2, batch: 22109, sum loss: 3981.818604, avg loss: 2.456396, ppl: 11.662707 +epoch: 2, batch: 22110, sum loss: 4393.595215, avg loss: 2.454522, ppl: 11.640872 +epoch: 2, batch: 22111, sum loss: 4598.286133, avg loss: 2.514099, ppl: 12.355467 +epoch: 2, batch: 22112, sum loss: 4157.349121, avg loss: 2.566265, ppl: 13.017116 +epoch: 2, batch: 22113, sum loss: 3923.096436, avg loss: 2.448874, ppl: 11.575308 +epoch: 2, batch: 22114, sum loss: 4197.000000, avg loss: 2.566972, ppl: 13.026327 +epoch: 2, batch: 22115, sum loss: 4322.687012, avg loss: 2.579169, ppl: 13.186176 +epoch: 2, batch: 22116, sum loss: 4318.245605, avg loss: 2.235117, ppl: 9.347573 +epoch: 2, batch: 22117, sum loss: 3902.998047, avg loss: 2.425729, ppl: 11.310472 +epoch: 2, batch: 22118, sum loss: 3556.246582, avg loss: 2.222654, ppl: 9.231800 +epoch: 2, batch: 22119, sum loss: 3257.618896, avg loss: 2.147408, ppl: 8.562639 +epoch: 2, batch: 22120, sum loss: 3570.438965, avg loss: 2.366096, ppl: 10.655714 +epoch: 2, batch: 22121, sum loss: 4376.874023, avg loss: 2.317032, ppl: 10.145521 +epoch: 2, batch: 22122, sum loss: 4819.893555, avg loss: 2.620932, ppl: 13.748529 +epoch: 2, batch: 22123, sum loss: 3994.570801, avg loss: 2.496607, ppl: 12.141224 +epoch: 2, batch: 22124, sum loss: 3110.144531, avg loss: 2.223120, ppl: 9.236100 +epoch: 2, batch: 22125, sum loss: 3681.049072, avg loss: 2.225544, ppl: 9.258515 +epoch: 2, batch: 22126, sum loss: 3508.650146, avg loss: 2.322072, ppl: 10.196775 +epoch: 2, batch: 22127, sum loss: 3932.692871, avg loss: 2.516118, ppl: 12.380445 +epoch: 2, batch: 22128, sum loss: 4150.730469, avg loss: 2.361053, ppl: 10.602104 +epoch: 2, batch: 22129, sum loss: 4446.328125, avg loss: 2.486761, ppl: 12.022268 +epoch: 2, batch: 22130, sum loss: 3503.271973, avg loss: 2.406093, ppl: 11.090549 +epoch: 2, batch: 22131, sum loss: 3377.394287, avg loss: 2.303816, ppl: 10.012318 +epoch: 2, batch: 22132, sum loss: 3999.373291, avg loss: 2.425333, ppl: 11.305988 +epoch: 2, batch: 22133, sum loss: 3423.252441, avg loss: 2.271568, ppl: 9.694586 +epoch: 2, batch: 22134, sum loss: 4293.319824, avg loss: 2.555548, ppl: 12.878351 +epoch: 2, batch: 22135, sum loss: 4512.019531, avg loss: 2.509466, ppl: 12.298357 +epoch: 2, batch: 22136, sum loss: 4215.281250, avg loss: 2.466519, ppl: 11.781365 +epoch: 2, batch: 22137, sum loss: 4700.537109, avg loss: 2.664704, ppl: 14.363691 +epoch: 2, batch: 22138, sum loss: 4659.066406, avg loss: 2.699343, ppl: 14.869962 +epoch: 2, batch: 22139, sum loss: 3866.914062, avg loss: 2.422879, ppl: 11.278277 +epoch: 2, batch: 22140, sum loss: 4297.923340, avg loss: 2.549184, ppl: 12.796653 +epoch: 2, batch: 22141, sum loss: 4431.102539, avg loss: 2.457628, ppl: 11.677075 +epoch: 2, batch: 22142, sum loss: 4832.936035, avg loss: 2.418887, ppl: 11.233349 +epoch: 2, batch: 22143, sum loss: 4939.540527, avg loss: 2.811349, ppl: 16.632343 +epoch: 2, batch: 22144, sum loss: 4112.769043, avg loss: 2.464212, ppl: 11.754212 +epoch: 2, batch: 22145, sum loss: 3570.073730, avg loss: 2.087762, ppl: 8.066844 +epoch: 2, batch: 22146, sum loss: 4361.907227, avg loss: 2.654843, ppl: 14.222754 +epoch: 2, batch: 22147, sum loss: 3670.478516, avg loss: 2.223185, ppl: 9.236704 +epoch: 2, batch: 22148, sum loss: 4900.395020, avg loss: 2.792248, ppl: 16.317657 +epoch: 2, batch: 22149, sum loss: 4626.865234, avg loss: 2.525582, ppl: 12.498162 +epoch: 2, batch: 22150, sum loss: 3462.208984, avg loss: 2.138486, ppl: 8.486581 +epoch: 2, batch: 22151, sum loss: 4413.509766, avg loss: 2.393444, ppl: 10.951140 +epoch: 2, batch: 22152, sum loss: 5177.553223, avg loss: 2.610970, ppl: 13.612246 +epoch: 2, batch: 22153, sum loss: 3225.083496, avg loss: 2.225731, ppl: 9.260246 +epoch: 2, batch: 22154, sum loss: 4776.234863, avg loss: 2.602853, ppl: 13.502203 +epoch: 2, batch: 22155, sum loss: 4951.431152, avg loss: 2.653500, ppl: 14.203662 +epoch: 2, batch: 22156, sum loss: 4714.062988, avg loss: 2.742329, ppl: 15.523094 +epoch: 2, batch: 22157, sum loss: 3768.193848, avg loss: 2.445291, ppl: 11.533909 +epoch: 2, batch: 22158, sum loss: 3587.710449, avg loss: 2.390213, ppl: 10.915824 +epoch: 2, batch: 22159, sum loss: 4062.209473, avg loss: 2.598982, ppl: 13.450044 +epoch: 2, batch: 22160, sum loss: 4672.003418, avg loss: 2.745008, ppl: 15.564734 +epoch: 2, batch: 22161, sum loss: 4033.881348, avg loss: 2.416945, ppl: 11.211558 +epoch: 2, batch: 22162, sum loss: 3404.554199, avg loss: 2.380807, ppl: 10.813627 +epoch: 2, batch: 22163, sum loss: 4959.009766, avg loss: 2.643395, ppl: 14.060865 +epoch: 2, batch: 22164, sum loss: 5159.335938, avg loss: 2.691360, ppl: 14.751718 +epoch: 2, batch: 22165, sum loss: 3849.897461, avg loss: 2.496691, ppl: 12.142248 +epoch: 2, batch: 22166, sum loss: 5142.486328, avg loss: 2.860115, ppl: 17.463528 +epoch: 2, batch: 22167, sum loss: 4698.984863, avg loss: 2.677484, ppl: 14.548448 +epoch: 2, batch: 22168, sum loss: 4116.775879, avg loss: 2.475512, ppl: 11.887787 +epoch: 2, batch: 22169, sum loss: 3617.891113, avg loss: 2.346233, ppl: 10.446144 +epoch: 2, batch: 22170, sum loss: 3431.087646, avg loss: 2.229427, ppl: 9.294536 +epoch: 2, batch: 22171, sum loss: 5272.253418, avg loss: 2.741681, ppl: 15.513046 +epoch: 2, batch: 22172, sum loss: 3433.297852, avg loss: 2.279746, ppl: 9.774200 +epoch: 2, batch: 22173, sum loss: 5159.889160, avg loss: 2.858664, ppl: 17.438219 +epoch: 2, batch: 22174, sum loss: 3278.531982, avg loss: 1.936522, ppl: 6.934592 +epoch: 2, batch: 22175, sum loss: 3010.363281, avg loss: 2.190948, ppl: 8.943692 +epoch: 2, batch: 22176, sum loss: 4019.813477, avg loss: 2.605194, ppl: 13.533845 +epoch: 2, batch: 22177, sum loss: 2914.599854, avg loss: 2.001786, ppl: 7.402263 +epoch: 2, batch: 22178, sum loss: 4142.084961, avg loss: 2.403996, ppl: 11.067313 +epoch: 2, batch: 22179, sum loss: 4526.711914, avg loss: 2.720380, ppl: 15.186090 +epoch: 2, batch: 22180, sum loss: 4339.609375, avg loss: 2.417610, ppl: 11.219010 +epoch: 2, batch: 22181, sum loss: 4350.900879, avg loss: 2.562368, ppl: 12.966488 +epoch: 2, batch: 22182, sum loss: 4420.005371, avg loss: 2.735152, ppl: 15.412083 +epoch: 2, batch: 22183, sum loss: 4379.532715, avg loss: 2.518420, ppl: 12.408978 +epoch: 2, batch: 22184, sum loss: 3675.084961, avg loss: 2.348297, ppl: 10.467730 +epoch: 2, batch: 22185, sum loss: 4391.181152, avg loss: 2.371048, ppl: 10.708609 +epoch: 2, batch: 22186, sum loss: 3634.817383, avg loss: 2.460946, ppl: 11.715894 +epoch: 2, batch: 22187, sum loss: 3674.617188, avg loss: 2.481173, ppl: 11.955280 +epoch: 2, batch: 22188, sum loss: 4952.685547, avg loss: 2.700483, ppl: 14.886918 +epoch: 2, batch: 22189, sum loss: 4070.338867, avg loss: 2.362356, ppl: 10.615930 +epoch: 2, batch: 22190, sum loss: 3811.171631, avg loss: 2.370131, ppl: 10.698799 +epoch: 2, batch: 22191, sum loss: 4955.870117, avg loss: 2.765553, ppl: 15.887819 +epoch: 2, batch: 22192, sum loss: 4624.426270, avg loss: 2.605310, ppl: 13.535426 +epoch: 2, batch: 22193, sum loss: 3541.215576, avg loss: 2.142296, ppl: 8.518975 +epoch: 2, batch: 22194, sum loss: 4629.577148, avg loss: 2.636434, ppl: 13.963316 +epoch: 2, batch: 22195, sum loss: 5196.202148, avg loss: 2.700729, ppl: 14.890581 +epoch: 2, batch: 22196, sum loss: 4989.117188, avg loss: 2.781002, ppl: 16.135178 +epoch: 2, batch: 22197, sum loss: 4476.877930, avg loss: 2.334139, ppl: 10.320566 +epoch: 2, batch: 22198, sum loss: 3704.411133, avg loss: 2.481186, ppl: 11.955437 +epoch: 2, batch: 22199, sum loss: 3972.345459, avg loss: 2.358875, ppl: 10.579043 +epoch: 2, batch: 22200, sum loss: 3994.088135, avg loss: 2.345325, ppl: 10.436662 +epoch: 2, batch: 22201, sum loss: 3726.092041, avg loss: 2.263726, ppl: 9.618857 +epoch: 2, batch: 22202, sum loss: 3810.093262, avg loss: 2.302171, ppl: 9.995862 +epoch: 2, batch: 22203, sum loss: 3500.912354, avg loss: 2.229881, ppl: 9.298756 +epoch: 2, batch: 22204, sum loss: 4410.968750, avg loss: 2.479465, ppl: 11.934880 +epoch: 2, batch: 22205, sum loss: 4157.454590, avg loss: 2.545900, ppl: 12.754701 +epoch: 2, batch: 22206, sum loss: 4289.507324, avg loss: 2.360764, ppl: 10.599041 +epoch: 2, batch: 22207, sum loss: 3974.723633, avg loss: 2.314924, ppl: 10.124148 +epoch: 2, batch: 22208, sum loss: 3473.077881, avg loss: 2.318476, ppl: 10.160183 +epoch: 2, batch: 22209, sum loss: 4190.303711, avg loss: 2.324073, ppl: 10.217205 +epoch: 2, batch: 22210, sum loss: 3902.402588, avg loss: 2.353681, ppl: 10.524234 +epoch: 2, batch: 22211, sum loss: 4456.870117, avg loss: 2.586692, ppl: 13.285748 +epoch: 2, batch: 22212, sum loss: 3476.245117, avg loss: 2.285500, ppl: 9.830599 +epoch: 2, batch: 22213, sum loss: 3631.357666, avg loss: 2.225097, ppl: 9.254377 +epoch: 2, batch: 22214, sum loss: 4240.550293, avg loss: 2.348035, ppl: 10.464982 +epoch: 2, batch: 22215, sum loss: 4532.720703, avg loss: 2.557969, ppl: 12.909570 +epoch: 2, batch: 22216, sum loss: 4522.903320, avg loss: 2.345904, ppl: 10.442713 +epoch: 2, batch: 22217, sum loss: 4147.956055, avg loss: 2.628616, ppl: 13.854583 +epoch: 2, batch: 22218, sum loss: 3943.170654, avg loss: 2.319512, ppl: 10.170713 +epoch: 2, batch: 22219, sum loss: 3738.672607, avg loss: 2.376778, ppl: 10.770149 +epoch: 2, batch: 22220, sum loss: 4240.240234, avg loss: 2.383497, ppl: 10.842749 +epoch: 2, batch: 22221, sum loss: 4357.708008, avg loss: 2.600065, ppl: 13.464606 +epoch: 2, batch: 22222, sum loss: 3726.821533, avg loss: 2.413745, ppl: 11.175733 +epoch: 2, batch: 22223, sum loss: 3513.633301, avg loss: 2.168910, ppl: 8.748739 +epoch: 2, batch: 22224, sum loss: 3913.528076, avg loss: 2.576385, ppl: 13.149510 +epoch: 2, batch: 22225, sum loss: 4686.605469, avg loss: 2.514273, ppl: 12.357627 +epoch: 2, batch: 22226, sum loss: 4381.700684, avg loss: 2.436986, ppl: 11.438510 +epoch: 2, batch: 22227, sum loss: 4694.633301, avg loss: 2.519932, ppl: 12.427752 +epoch: 2, batch: 22228, sum loss: 4106.492188, avg loss: 2.289015, ppl: 9.865211 +epoch: 2, batch: 22229, sum loss: 4008.326416, avg loss: 2.592708, ppl: 13.365916 +epoch: 2, batch: 22230, sum loss: 4547.508301, avg loss: 2.852891, ppl: 17.337833 +epoch: 2, batch: 22231, sum loss: 3816.853760, avg loss: 2.558213, ppl: 12.912722 +epoch: 2, batch: 22232, sum loss: 4128.596680, avg loss: 2.569133, ppl: 13.054502 +epoch: 2, batch: 22233, sum loss: 4289.754395, avg loss: 2.517462, ppl: 12.397087 +epoch: 2, batch: 22234, sum loss: 5564.672852, avg loss: 2.966244, ppl: 19.418837 +epoch: 2, batch: 22235, sum loss: 4601.086914, avg loss: 2.602425, ppl: 13.496426 +epoch: 2, batch: 22236, sum loss: 4706.359863, avg loss: 2.500723, ppl: 12.191300 +epoch: 2, batch: 22237, sum loss: 3702.001221, avg loss: 2.343039, ppl: 10.412831 +epoch: 2, batch: 22238, sum loss: 4240.224609, avg loss: 2.519444, ppl: 12.421691 +epoch: 2, batch: 22239, sum loss: 3869.827637, avg loss: 2.358213, ppl: 10.572042 +epoch: 2, batch: 22240, sum loss: 4465.613281, avg loss: 2.521521, ppl: 12.447510 +epoch: 2, batch: 22241, sum loss: 4276.509766, avg loss: 2.533478, ppl: 12.597238 +epoch: 2, batch: 22242, sum loss: 3407.589111, avg loss: 2.421883, ppl: 11.267054 +epoch: 2, batch: 22243, sum loss: 3483.959229, avg loss: 2.391187, ppl: 10.926453 +epoch: 2, batch: 22244, sum loss: 4112.151367, avg loss: 2.408993, ppl: 11.122758 +epoch: 2, batch: 22245, sum loss: 4597.579102, avg loss: 2.432582, ppl: 11.388245 +epoch: 2, batch: 22246, sum loss: 4178.611816, avg loss: 2.462352, ppl: 11.732377 +epoch: 2, batch: 22247, sum loss: 4198.487305, avg loss: 2.341599, ppl: 10.397852 +epoch: 2, batch: 22248, sum loss: 4191.503906, avg loss: 2.595358, ppl: 13.401389 +epoch: 2, batch: 22249, sum loss: 4161.557129, avg loss: 2.557810, ppl: 12.907519 +epoch: 2, batch: 22250, sum loss: 3885.938232, avg loss: 2.424166, ppl: 11.292807 +epoch: 2, batch: 22251, sum loss: 4662.271973, avg loss: 2.581546, ppl: 13.217558 +epoch: 2, batch: 22252, sum loss: 3940.907471, avg loss: 2.452338, ppl: 11.615475 +epoch: 2, batch: 22253, sum loss: 4095.690918, avg loss: 2.403574, ppl: 11.062649 +epoch: 2, batch: 22254, sum loss: 4302.090820, avg loss: 2.539605, ppl: 12.674663 +epoch: 2, batch: 22255, sum loss: 4621.779785, avg loss: 2.677740, ppl: 14.552173 +epoch: 2, batch: 22256, sum loss: 4082.335938, avg loss: 2.501431, ppl: 12.199945 +epoch: 2, batch: 22257, sum loss: 3799.632324, avg loss: 2.365898, ppl: 10.653601 +epoch: 2, batch: 22258, sum loss: 3528.493164, avg loss: 2.248880, ppl: 9.477117 +epoch: 2, batch: 22259, sum loss: 4305.921875, avg loss: 2.617582, ppl: 13.702545 +epoch: 2, batch: 22260, sum loss: 3878.965332, avg loss: 2.283087, ppl: 9.806912 +epoch: 2, batch: 22261, sum loss: 4649.120117, avg loss: 2.843498, ppl: 17.175749 +epoch: 2, batch: 22262, sum loss: 4716.500488, avg loss: 2.559143, ppl: 12.924737 +epoch: 2, batch: 22263, sum loss: 4363.594727, avg loss: 2.597378, ppl: 13.428483 +epoch: 2, batch: 22264, sum loss: 3563.270752, avg loss: 2.256663, ppl: 9.551162 +epoch: 2, batch: 22265, sum loss: 4245.996582, avg loss: 2.513911, ppl: 12.353155 +epoch: 2, batch: 22266, sum loss: 4952.595215, avg loss: 2.632959, ppl: 13.914878 +epoch: 2, batch: 22267, sum loss: 4351.098633, avg loss: 2.600776, ppl: 13.474193 +epoch: 2, batch: 22268, sum loss: 3271.997559, avg loss: 2.299366, ppl: 9.967858 +epoch: 2, batch: 22269, sum loss: 4142.585449, avg loss: 2.516759, ppl: 12.388383 +epoch: 2, batch: 22270, sum loss: 3791.865479, avg loss: 2.395367, ppl: 10.972220 +epoch: 2, batch: 22271, sum loss: 4776.056152, avg loss: 2.535062, ppl: 12.617211 +epoch: 2, batch: 22272, sum loss: 4172.464355, avg loss: 2.572419, ppl: 13.097474 +epoch: 2, batch: 22273, sum loss: 4076.195312, avg loss: 2.396352, ppl: 10.983040 +epoch: 2, batch: 22274, sum loss: 4274.449219, avg loss: 2.373375, ppl: 10.733562 +epoch: 2, batch: 22275, sum loss: 3978.274902, avg loss: 2.546911, ppl: 12.767604 +epoch: 2, batch: 22276, sum loss: 4574.958984, avg loss: 2.684835, ppl: 14.655786 +epoch: 2, batch: 22277, sum loss: 3634.301270, avg loss: 2.359936, ppl: 10.590271 +epoch: 2, batch: 22278, sum loss: 4428.474609, avg loss: 2.549496, ppl: 12.800653 +epoch: 2, batch: 22279, sum loss: 3805.405029, avg loss: 2.402402, ppl: 11.049687 +epoch: 2, batch: 22280, sum loss: 3732.740479, avg loss: 2.380574, ppl: 10.811110 +epoch: 2, batch: 22281, sum loss: 4105.684082, avg loss: 2.408026, ppl: 11.112004 +epoch: 2, batch: 22282, sum loss: 3946.626465, avg loss: 2.390446, ppl: 10.918365 +epoch: 2, batch: 22283, sum loss: 4461.546387, avg loss: 2.596942, ppl: 13.422628 +epoch: 2, batch: 22284, sum loss: 4761.297852, avg loss: 2.614661, ppl: 13.662587 +epoch: 2, batch: 22285, sum loss: 4090.660645, avg loss: 2.637434, ppl: 13.977295 +epoch: 2, batch: 22286, sum loss: 4286.048340, avg loss: 2.600758, ppl: 13.473942 +epoch: 2, batch: 22287, sum loss: 4498.875000, avg loss: 2.310670, ppl: 10.081181 +epoch: 2, batch: 22288, sum loss: 5489.800781, avg loss: 2.762859, ppl: 15.845078 +epoch: 2, batch: 22289, sum loss: 4330.641602, avg loss: 2.422059, ppl: 11.269039 +epoch: 2, batch: 22290, sum loss: 3661.797852, avg loss: 2.418625, ppl: 11.230406 +epoch: 2, batch: 22291, sum loss: 3720.791992, avg loss: 2.375985, ppl: 10.761604 +epoch: 2, batch: 22292, sum loss: 4123.643555, avg loss: 2.891756, ppl: 18.024925 +epoch: 2, batch: 22293, sum loss: 3673.775391, avg loss: 2.153444, ppl: 8.614474 +epoch: 2, batch: 22294, sum loss: 3563.237549, avg loss: 2.351972, ppl: 10.506269 +epoch: 2, batch: 22295, sum loss: 4760.553711, avg loss: 2.543031, ppl: 12.718161 +epoch: 2, batch: 22296, sum loss: 3642.898438, avg loss: 2.262670, ppl: 9.608708 +epoch: 2, batch: 22297, sum loss: 4245.812500, avg loss: 2.260816, ppl: 9.590913 +epoch: 2, batch: 22298, sum loss: 3037.243652, avg loss: 2.236557, ppl: 9.361041 +epoch: 2, batch: 22299, sum loss: 4047.770996, avg loss: 2.477216, ppl: 11.908066 +epoch: 2, batch: 22300, sum loss: 3475.642090, avg loss: 2.432220, ppl: 11.384124 +epoch: 2, batch: 22301, sum loss: 3766.206543, avg loss: 2.602769, ppl: 13.501069 +epoch: 2, batch: 22302, sum loss: 4336.531738, avg loss: 2.499442, ppl: 12.175696 +epoch: 2, batch: 22303, sum loss: 4239.142578, avg loss: 2.392293, ppl: 10.938544 +epoch: 2, batch: 22304, sum loss: 4202.350098, avg loss: 2.514872, ppl: 12.365021 +epoch: 2, batch: 22305, sum loss: 4977.509766, avg loss: 2.595156, ppl: 13.398680 +epoch: 2, batch: 22306, sum loss: 4139.480469, avg loss: 2.449397, ppl: 11.581357 +epoch: 2, batch: 22307, sum loss: 4405.378906, avg loss: 2.579262, ppl: 13.187396 +epoch: 2, batch: 22308, sum loss: 4355.167969, avg loss: 2.441238, ppl: 11.487249 +epoch: 2, batch: 22309, sum loss: 4531.594727, avg loss: 2.537287, ppl: 12.645321 +epoch: 2, batch: 22310, sum loss: 3950.693359, avg loss: 2.524405, ppl: 12.483462 +epoch: 2, batch: 22311, sum loss: 5521.376953, avg loss: 2.726606, ppl: 15.280933 +epoch: 2, batch: 22312, sum loss: 3932.658936, avg loss: 2.532298, ppl: 12.582388 +epoch: 2, batch: 22313, sum loss: 3564.745117, avg loss: 2.096909, ppl: 8.140967 +epoch: 2, batch: 22314, sum loss: 3208.315918, avg loss: 2.184013, ppl: 8.881881 +epoch: 2, batch: 22315, sum loss: 5340.453613, avg loss: 2.798980, ppl: 16.427877 +epoch: 2, batch: 22316, sum loss: 4819.645020, avg loss: 2.471613, ppl: 11.841528 +epoch: 2, batch: 22317, sum loss: 3649.976318, avg loss: 2.164873, ppl: 8.713498 +epoch: 2, batch: 22318, sum loss: 4098.560547, avg loss: 2.654508, ppl: 14.217994 +epoch: 2, batch: 22319, sum loss: 3510.681641, avg loss: 2.388219, ppl: 10.894073 +epoch: 2, batch: 22320, sum loss: 4116.301270, avg loss: 2.540927, ppl: 12.691430 +epoch: 2, batch: 22321, sum loss: 3525.012207, avg loss: 2.402871, ppl: 11.054871 +epoch: 2, batch: 22322, sum loss: 4771.094238, avg loss: 2.665416, ppl: 14.373924 +epoch: 2, batch: 22323, sum loss: 4362.412598, avg loss: 2.502818, ppl: 12.216877 +epoch: 2, batch: 22324, sum loss: 3983.434326, avg loss: 2.311918, ppl: 10.093763 +epoch: 2, batch: 22325, sum loss: 3798.603271, avg loss: 2.449132, ppl: 11.578289 +epoch: 2, batch: 22326, sum loss: 3104.159668, avg loss: 2.346304, ppl: 10.446883 +epoch: 2, batch: 22327, sum loss: 3918.822266, avg loss: 2.352234, ppl: 10.509022 +epoch: 2, batch: 22328, sum loss: 5227.674316, avg loss: 2.847317, ppl: 17.241461 +epoch: 2, batch: 22329, sum loss: 4387.672852, avg loss: 2.636823, ppl: 13.968750 +epoch: 2, batch: 22330, sum loss: 3334.812988, avg loss: 2.306233, ppl: 10.036548 +epoch: 2, batch: 22331, sum loss: 4288.159180, avg loss: 2.821157, ppl: 16.796280 +epoch: 2, batch: 22332, sum loss: 4044.640137, avg loss: 2.470764, ppl: 11.831479 +epoch: 2, batch: 22333, sum loss: 3913.597656, avg loss: 2.408368, ppl: 11.115801 +epoch: 2, batch: 22334, sum loss: 4832.829590, avg loss: 2.532929, ppl: 12.590335 +epoch: 2, batch: 22335, sum loss: 3892.339844, avg loss: 2.660520, ppl: 14.303720 +epoch: 2, batch: 22336, sum loss: 3982.724609, avg loss: 2.393464, ppl: 10.951367 +epoch: 2, batch: 22337, sum loss: 3801.916992, avg loss: 2.329606, ppl: 10.273893 +epoch: 2, batch: 22338, sum loss: 3766.790771, avg loss: 2.167314, ppl: 8.734787 +epoch: 2, batch: 22339, sum loss: 4601.584473, avg loss: 2.656804, ppl: 14.250669 +epoch: 2, batch: 22340, sum loss: 3551.527832, avg loss: 2.119050, ppl: 8.323227 +epoch: 2, batch: 22341, sum loss: 4253.762207, avg loss: 2.538044, ppl: 12.654900 +epoch: 2, batch: 22342, sum loss: 3217.773193, avg loss: 2.028861, ppl: 7.605417 +epoch: 2, batch: 22343, sum loss: 3243.579590, avg loss: 2.238495, ppl: 9.379209 +epoch: 2, batch: 22344, sum loss: 3796.768066, avg loss: 2.403018, ppl: 11.056492 +epoch: 2, batch: 22345, sum loss: 4189.048828, avg loss: 2.506912, ppl: 12.266985 +epoch: 2, batch: 22346, sum loss: 4350.848145, avg loss: 2.356906, ppl: 10.558230 +epoch: 2, batch: 22347, sum loss: 4634.780273, avg loss: 2.475844, ppl: 11.891744 +epoch: 2, batch: 22348, sum loss: 4154.580566, avg loss: 2.395952, ppl: 10.978642 +epoch: 2, batch: 22349, sum loss: 4847.349121, avg loss: 2.700473, ppl: 14.886772 +epoch: 2, batch: 22350, sum loss: 4152.370117, avg loss: 2.545904, ppl: 12.754758 +epoch: 2, batch: 22351, sum loss: 4940.569824, avg loss: 2.605786, ppl: 13.541862 +epoch: 2, batch: 22352, sum loss: 4718.036133, avg loss: 2.869852, ppl: 17.634401 +epoch: 2, batch: 22353, sum loss: 4443.914062, avg loss: 2.722987, ppl: 15.225729 +epoch: 2, batch: 22354, sum loss: 4051.800537, avg loss: 2.361189, ppl: 10.603553 +epoch: 2, batch: 22355, sum loss: 4874.013184, avg loss: 2.486741, ppl: 12.022036 +epoch: 2, batch: 22356, sum loss: 3791.252197, avg loss: 2.270211, ppl: 9.681443 +epoch: 2, batch: 22357, sum loss: 3644.860840, avg loss: 2.309798, ppl: 10.072388 +epoch: 2, batch: 22358, sum loss: 3513.182861, avg loss: 2.285740, ppl: 9.832964 +epoch: 2, batch: 22359, sum loss: 4477.792480, avg loss: 2.868541, ppl: 17.611305 +epoch: 2, batch: 22360, sum loss: 4116.609375, avg loss: 2.485875, ppl: 12.011627 +epoch: 2, batch: 22361, sum loss: 3662.730469, avg loss: 2.265140, ppl: 9.632469 +epoch: 2, batch: 22362, sum loss: 4148.965332, avg loss: 2.496369, ppl: 12.138341 +epoch: 2, batch: 22363, sum loss: 4680.359375, avg loss: 2.536780, ppl: 12.638909 +epoch: 2, batch: 22364, sum loss: 5026.449219, avg loss: 2.605728, ppl: 13.541079 +epoch: 2, batch: 22365, sum loss: 3333.156494, avg loss: 2.406611, ppl: 11.096294 +epoch: 2, batch: 22366, sum loss: 4021.713867, avg loss: 2.502622, ppl: 12.214483 +epoch: 2, batch: 22367, sum loss: 3746.241699, avg loss: 2.359094, ppl: 10.581365 +epoch: 2, batch: 22368, sum loss: 4291.630859, avg loss: 2.551505, ppl: 12.826388 +epoch: 2, batch: 22369, sum loss: 4424.052734, avg loss: 2.489619, ppl: 12.056679 +epoch: 2, batch: 22370, sum loss: 4126.697266, avg loss: 2.595407, ppl: 13.402041 +epoch: 2, batch: 22371, sum loss: 4274.839355, avg loss: 2.530988, ppl: 12.565921 +epoch: 2, batch: 22372, sum loss: 4412.574707, avg loss: 2.728865, ppl: 15.315496 +epoch: 2, batch: 22373, sum loss: 3877.697266, avg loss: 2.324759, ppl: 10.224211 +epoch: 2, batch: 22374, sum loss: 3526.666992, avg loss: 2.469655, ppl: 11.818366 +epoch: 2, batch: 22375, sum loss: 3452.956299, avg loss: 2.221980, ppl: 9.225575 +epoch: 2, batch: 22376, sum loss: 4582.248047, avg loss: 2.647168, ppl: 14.114014 +epoch: 2, batch: 22377, sum loss: 4207.875000, avg loss: 2.366634, ppl: 10.661445 +epoch: 2, batch: 22378, sum loss: 4523.744629, avg loss: 2.580573, ppl: 13.204703 +epoch: 2, batch: 22379, sum loss: 3805.776367, avg loss: 2.266692, ppl: 9.647436 +epoch: 2, batch: 22380, sum loss: 3669.817871, avg loss: 2.282225, ppl: 9.798457 +epoch: 2, batch: 22381, sum loss: 4077.336182, avg loss: 2.434231, ppl: 11.407042 +epoch: 2, batch: 22382, sum loss: 3431.101074, avg loss: 2.329329, ppl: 10.271045 +epoch: 2, batch: 22383, sum loss: 3794.323486, avg loss: 2.281614, ppl: 9.792469 +epoch: 2, batch: 22384, sum loss: 3844.292480, avg loss: 2.554347, ppl: 12.862901 +epoch: 2, batch: 22385, sum loss: 3956.924561, avg loss: 2.505969, ppl: 12.255424 +epoch: 2, batch: 22386, sum loss: 4458.180664, avg loss: 2.543172, ppl: 12.719956 +epoch: 2, batch: 22387, sum loss: 4854.115723, avg loss: 2.553454, ppl: 12.851415 +epoch: 2, batch: 22388, sum loss: 4134.124023, avg loss: 2.444781, ppl: 11.528023 +epoch: 2, batch: 22389, sum loss: 4794.028809, avg loss: 2.641338, ppl: 14.031971 +epoch: 2, batch: 22390, sum loss: 3774.708252, avg loss: 2.509779, ppl: 12.302217 +epoch: 2, batch: 22391, sum loss: 4370.006836, avg loss: 2.466144, ppl: 11.776945 +epoch: 2, batch: 22392, sum loss: 4661.934570, avg loss: 2.515885, ppl: 12.377557 +epoch: 2, batch: 22393, sum loss: 4516.016602, avg loss: 2.386901, ppl: 10.879725 +epoch: 2, batch: 22394, sum loss: 3430.083984, avg loss: 2.285199, ppl: 9.827643 +epoch: 2, batch: 22395, sum loss: 4356.522949, avg loss: 2.562661, ppl: 12.970282 +epoch: 2, batch: 22396, sum loss: 4464.279785, avg loss: 2.530771, ppl: 12.563186 +epoch: 2, batch: 22397, sum loss: 3740.248047, avg loss: 2.347927, ppl: 10.463860 +epoch: 2, batch: 22398, sum loss: 3487.910645, avg loss: 2.217362, ppl: 9.183076 +epoch: 2, batch: 22399, sum loss: 3648.265137, avg loss: 2.430557, ppl: 11.365206 +epoch: 2, batch: 22400, sum loss: 4569.781738, avg loss: 2.691273, ppl: 14.750444 +epoch: 2, batch: 22401, sum loss: 4514.243164, avg loss: 2.364716, ppl: 10.641020 +epoch: 2, batch: 22402, sum loss: 4524.114258, avg loss: 2.619638, ppl: 13.730748 +epoch: 2, batch: 22403, sum loss: 3307.862061, avg loss: 2.281284, ppl: 9.789245 +epoch: 2, batch: 22404, sum loss: 4751.916504, avg loss: 2.821803, ppl: 16.807133 +epoch: 2, batch: 22405, sum loss: 4160.688477, avg loss: 2.438856, ppl: 11.459925 +epoch: 2, batch: 22406, sum loss: 3960.099121, avg loss: 2.551610, ppl: 12.827745 +epoch: 2, batch: 22407, sum loss: 3812.914062, avg loss: 2.592056, ppl: 13.357203 +epoch: 2, batch: 22408, sum loss: 3663.218994, avg loss: 2.245996, ppl: 9.449821 +epoch: 2, batch: 22409, sum loss: 3527.695557, avg loss: 2.383578, ppl: 10.843633 +epoch: 2, batch: 22410, sum loss: 4553.906738, avg loss: 2.532762, ppl: 12.588230 +epoch: 2, batch: 22411, sum loss: 4532.633301, avg loss: 2.604962, ppl: 13.530706 +epoch: 2, batch: 22412, sum loss: 4788.798828, avg loss: 2.654545, ppl: 14.218513 +epoch: 2, batch: 22413, sum loss: 3950.758057, avg loss: 2.378542, ppl: 10.789160 +epoch: 2, batch: 22414, sum loss: 4735.008789, avg loss: 2.618921, ppl: 13.720908 +epoch: 2, batch: 22415, sum loss: 5036.356934, avg loss: 2.794871, ppl: 16.360516 +epoch: 2, batch: 22416, sum loss: 4514.583984, avg loss: 2.560740, ppl: 12.945387 +epoch: 2, batch: 22417, sum loss: 4361.787109, avg loss: 2.667760, ppl: 14.407655 +epoch: 2, batch: 22418, sum loss: 3558.782471, avg loss: 2.520384, ppl: 12.433372 +epoch: 2, batch: 22419, sum loss: 4292.226562, avg loss: 2.461139, ppl: 11.718150 +epoch: 2, batch: 22420, sum loss: 4253.790039, avg loss: 2.496356, ppl: 12.138179 +epoch: 2, batch: 22421, sum loss: 3791.674316, avg loss: 2.443089, ppl: 11.508538 +epoch: 2, batch: 22422, sum loss: 4348.392578, avg loss: 2.545897, ppl: 12.754667 +epoch: 2, batch: 22423, sum loss: 4200.842773, avg loss: 2.504975, ppl: 12.243251 +epoch: 2, batch: 22424, sum loss: 4185.309082, avg loss: 2.469209, ppl: 11.813099 +epoch: 2, batch: 22425, sum loss: 3781.262451, avg loss: 2.153339, ppl: 8.613568 +epoch: 2, batch: 22426, sum loss: 3773.092529, avg loss: 2.206487, ppl: 9.083749 +epoch: 2, batch: 22427, sum loss: 4531.346191, avg loss: 2.769772, ppl: 15.954989 +epoch: 2, batch: 22428, sum loss: 4439.276855, avg loss: 2.666232, ppl: 14.385667 +epoch: 2, batch: 22429, sum loss: 4050.994141, avg loss: 2.275839, ppl: 9.736088 +epoch: 2, batch: 22430, sum loss: 4764.270508, avg loss: 2.439463, ppl: 11.466878 +epoch: 2, batch: 22431, sum loss: 3464.143555, avg loss: 2.141003, ppl: 8.507970 +epoch: 2, batch: 22432, sum loss: 3341.255371, avg loss: 2.383206, ppl: 10.839603 +epoch: 2, batch: 22433, sum loss: 4741.846680, avg loss: 2.680524, ppl: 14.592736 +epoch: 2, batch: 22434, sum loss: 4099.961426, avg loss: 2.334830, ppl: 10.327704 +epoch: 2, batch: 22435, sum loss: 4388.786621, avg loss: 2.525194, ppl: 12.493315 +epoch: 2, batch: 22436, sum loss: 4397.418945, avg loss: 2.508511, ppl: 12.286616 +epoch: 2, batch: 22437, sum loss: 4686.915527, avg loss: 2.794821, ppl: 16.359705 +epoch: 2, batch: 22438, sum loss: 4326.701172, avg loss: 2.524330, ppl: 12.482525 +epoch: 2, batch: 22439, sum loss: 4528.187500, avg loss: 2.442388, ppl: 11.500472 +epoch: 2, batch: 22440, sum loss: 4679.198730, avg loss: 2.481018, ppl: 11.953422 +epoch: 2, batch: 22441, sum loss: 4251.698242, avg loss: 2.586191, ppl: 13.279098 +epoch: 2, batch: 22442, sum loss: 3901.351074, avg loss: 2.424706, ppl: 11.298902 +epoch: 2, batch: 22443, sum loss: 3621.835693, avg loss: 2.339687, ppl: 10.377989 +epoch: 2, batch: 22444, sum loss: 4064.210449, avg loss: 2.578814, ppl: 13.181493 +epoch: 2, batch: 22445, sum loss: 3359.591553, avg loss: 2.207353, ppl: 9.091622 +epoch: 2, batch: 22446, sum loss: 4306.949219, avg loss: 2.566716, ppl: 13.022986 +epoch: 2, batch: 22447, sum loss: 3590.110352, avg loss: 2.412709, ppl: 11.164159 +epoch: 2, batch: 22448, sum loss: 4303.529297, avg loss: 2.563150, ppl: 12.976631 +epoch: 2, batch: 22449, sum loss: 4641.550781, avg loss: 2.425053, ppl: 11.302824 +epoch: 2, batch: 22450, sum loss: 4598.605469, avg loss: 2.574807, ppl: 13.128785 +epoch: 2, batch: 22451, sum loss: 4451.567871, avg loss: 2.582116, ppl: 13.225095 +epoch: 2, batch: 22452, sum loss: 4133.316406, avg loss: 2.370021, ppl: 10.697616 +epoch: 2, batch: 22453, sum loss: 4336.406250, avg loss: 2.657112, ppl: 14.255059 +epoch: 2, batch: 22454, sum loss: 3641.267822, avg loss: 2.395571, ppl: 10.974463 +epoch: 2, batch: 22455, sum loss: 4854.317871, avg loss: 2.425946, ppl: 11.312926 +epoch: 2, batch: 22456, sum loss: 5030.627930, avg loss: 2.783967, ppl: 16.183084 +epoch: 2, batch: 22457, sum loss: 4034.618164, avg loss: 2.362189, ppl: 10.614156 +epoch: 2, batch: 22458, sum loss: 4728.182617, avg loss: 2.709560, ppl: 15.022666 +epoch: 2, batch: 22459, sum loss: 3952.699219, avg loss: 2.394125, ppl: 10.958600 +epoch: 2, batch: 22460, sum loss: 3768.792725, avg loss: 2.415893, ppl: 11.199765 +epoch: 2, batch: 22461, sum loss: 4660.525879, avg loss: 2.580579, ppl: 13.204782 +epoch: 2, batch: 22462, sum loss: 4874.607910, avg loss: 2.750907, ppl: 15.656833 +epoch: 2, batch: 22463, sum loss: 4655.779785, avg loss: 2.602448, ppl: 13.496740 +epoch: 2, batch: 22464, sum loss: 4563.894531, avg loss: 2.765997, ppl: 15.894875 +epoch: 2, batch: 22465, sum loss: 4154.174805, avg loss: 2.350976, ppl: 10.495811 +epoch: 2, batch: 22466, sum loss: 3416.745117, avg loss: 2.267250, ppl: 9.652815 +epoch: 2, batch: 22467, sum loss: 3411.387939, avg loss: 2.128127, ppl: 8.399121 +epoch: 2, batch: 22468, sum loss: 5288.032227, avg loss: 2.784641, ppl: 16.193996 +epoch: 2, batch: 22469, sum loss: 4203.749512, avg loss: 2.451166, ppl: 11.601863 +epoch: 2, batch: 22470, sum loss: 4989.717285, avg loss: 2.598811, ppl: 13.447742 +epoch: 2, batch: 22471, sum loss: 3807.711426, avg loss: 2.393282, ppl: 10.949370 +epoch: 2, batch: 22472, sum loss: 3913.546875, avg loss: 2.340638, ppl: 10.387863 +epoch: 2, batch: 22473, sum loss: 4409.864258, avg loss: 2.535862, ppl: 12.627314 +epoch: 2, batch: 22474, sum loss: 3934.991943, avg loss: 2.642708, ppl: 14.051200 +epoch: 2, batch: 22475, sum loss: 4008.718262, avg loss: 2.432475, ppl: 11.387026 +epoch: 2, batch: 22476, sum loss: 4417.702148, avg loss: 2.495877, ppl: 12.132369 +epoch: 2, batch: 22477, sum loss: 3335.628906, avg loss: 2.219314, ppl: 9.201015 +epoch: 2, batch: 22478, sum loss: 4358.697266, avg loss: 2.609998, ppl: 13.599030 +epoch: 2, batch: 22479, sum loss: 5426.237305, avg loss: 2.636656, ppl: 13.966415 +epoch: 2, batch: 22480, sum loss: 4157.761719, avg loss: 2.501662, ppl: 12.202755 +epoch: 2, batch: 22481, sum loss: 4061.092529, avg loss: 2.179867, ppl: 8.845130 +epoch: 2, batch: 22482, sum loss: 4635.122070, avg loss: 2.545372, ppl: 12.747967 +epoch: 2, batch: 22483, sum loss: 3827.396973, avg loss: 2.298737, ppl: 9.961594 +epoch: 2, batch: 22484, sum loss: 4057.519043, avg loss: 2.333249, ppl: 10.311385 +epoch: 2, batch: 22485, sum loss: 3251.230469, avg loss: 2.223824, ppl: 9.242607 +epoch: 2, batch: 22486, sum loss: 4232.497559, avg loss: 2.399375, ppl: 11.016289 +epoch: 2, batch: 22487, sum loss: 4109.726074, avg loss: 2.571794, ppl: 13.089280 +epoch: 2, batch: 22488, sum loss: 4822.125977, avg loss: 2.544657, ppl: 12.738864 +epoch: 2, batch: 22489, sum loss: 4340.463867, avg loss: 2.672699, ppl: 14.479002 +epoch: 2, batch: 22490, sum loss: 4636.852539, avg loss: 2.514562, ppl: 12.361195 +epoch: 2, batch: 22491, sum loss: 4249.042969, avg loss: 2.704674, ppl: 14.949446 +epoch: 2, batch: 22492, sum loss: 3971.455811, avg loss: 2.688867, ppl: 14.714989 +epoch: 2, batch: 22493, sum loss: 4680.592773, avg loss: 2.410192, ppl: 11.136099 +epoch: 2, batch: 22494, sum loss: 4409.900879, avg loss: 2.429698, ppl: 11.355447 +epoch: 2, batch: 22495, sum loss: 3747.695068, avg loss: 2.172577, ppl: 8.780883 +epoch: 2, batch: 22496, sum loss: 4286.748047, avg loss: 2.437037, ppl: 11.439096 +epoch: 2, batch: 22497, sum loss: 3968.570557, avg loss: 2.327607, ppl: 10.253380 +epoch: 2, batch: 22498, sum loss: 3459.149658, avg loss: 2.223104, ppl: 9.235955 +epoch: 2, batch: 22499, sum loss: 4160.529297, avg loss: 2.372024, ppl: 10.719061 +epoch: 2, batch: 22500, sum loss: 4613.183105, avg loss: 2.819794, ppl: 16.773397 +epoch: 2, batch: 22501, sum loss: 4222.435059, avg loss: 2.600022, ppl: 13.464029 +epoch: 2, batch: 22502, sum loss: 3565.534180, avg loss: 2.414038, ppl: 11.179013 +epoch: 2, batch: 22503, sum loss: 4206.202148, avg loss: 2.417357, ppl: 11.216181 +epoch: 2, batch: 22504, sum loss: 3996.911133, avg loss: 2.323786, ppl: 10.214268 +epoch: 2, batch: 22505, sum loss: 3924.597412, avg loss: 2.449811, ppl: 11.586157 +epoch: 2, batch: 22506, sum loss: 3398.782471, avg loss: 2.236041, ppl: 9.356219 +epoch: 2, batch: 22507, sum loss: 3995.768066, avg loss: 2.219871, ppl: 9.206146 +epoch: 2, batch: 22508, sum loss: 3675.172363, avg loss: 2.515518, ppl: 12.373022 +epoch: 2, batch: 22509, sum loss: 4308.525879, avg loss: 2.438328, ppl: 11.453876 +epoch: 2, batch: 22510, sum loss: 5027.746582, avg loss: 2.674333, ppl: 14.502675 +epoch: 2, batch: 22511, sum loss: 5381.977051, avg loss: 2.726432, ppl: 15.278278 +epoch: 2, batch: 22512, sum loss: 4533.600586, avg loss: 2.492359, ppl: 12.089759 +epoch: 2, batch: 22513, sum loss: 4661.768555, avg loss: 2.613099, ppl: 13.641261 +epoch: 2, batch: 22514, sum loss: 3804.473633, avg loss: 2.438765, ppl: 11.458883 +epoch: 2, batch: 22515, sum loss: 4154.752930, avg loss: 2.480450, ppl: 11.946635 +epoch: 2, batch: 22516, sum loss: 3921.416504, avg loss: 2.429626, ppl: 11.354637 +epoch: 2, batch: 22517, sum loss: 4290.750000, avg loss: 2.540409, ppl: 12.684854 +epoch: 2, batch: 22518, sum loss: 4615.558594, avg loss: 2.504373, ppl: 12.235880 +epoch: 2, batch: 22519, sum loss: 5068.915039, avg loss: 2.646953, ppl: 14.110978 +epoch: 2, batch: 22520, sum loss: 3487.704102, avg loss: 2.083455, ppl: 8.032175 +epoch: 2, batch: 22521, sum loss: 4839.044922, avg loss: 2.882099, ppl: 17.851711 +epoch: 2, batch: 22522, sum loss: 3458.113037, avg loss: 2.081946, ppl: 8.020063 +epoch: 2, batch: 22523, sum loss: 4059.301270, avg loss: 2.445362, ppl: 11.534728 +epoch: 2, batch: 22524, sum loss: 3816.714600, avg loss: 2.375056, ppl: 10.751613 +epoch: 2, batch: 22525, sum loss: 5040.932129, avg loss: 2.456595, ppl: 11.665021 +epoch: 2, batch: 22526, sum loss: 4518.733398, avg loss: 2.525843, ppl: 12.501431 +epoch: 2, batch: 22527, sum loss: 4222.834961, avg loss: 2.504647, ppl: 12.239238 +epoch: 2, batch: 22528, sum loss: 5518.862305, avg loss: 2.872911, ppl: 17.688438 +epoch: 2, batch: 22529, sum loss: 4776.123535, avg loss: 2.746477, ppl: 15.587622 +epoch: 2, batch: 22530, sum loss: 4277.472656, avg loss: 2.803062, ppl: 16.495081 +epoch: 2, batch: 22531, sum loss: 4939.280273, avg loss: 2.730393, ppl: 15.338913 +epoch: 2, batch: 22532, sum loss: 3850.494141, avg loss: 2.337883, ppl: 10.359287 +epoch: 2, batch: 22533, sum loss: 4169.835449, avg loss: 2.545687, ppl: 12.751986 +epoch: 2, batch: 22534, sum loss: 3084.282959, avg loss: 2.095301, ppl: 8.127887 +epoch: 2, batch: 22535, sum loss: 4893.956543, avg loss: 2.717355, ppl: 15.140224 +epoch: 2, batch: 22536, sum loss: 4097.716797, avg loss: 2.484971, ppl: 12.000770 +epoch: 2, batch: 22537, sum loss: 3599.007080, avg loss: 2.332474, ppl: 10.303401 +epoch: 2, batch: 22538, sum loss: 4285.426758, avg loss: 2.570742, ppl: 13.075521 +epoch: 2, batch: 22539, sum loss: 4373.365234, avg loss: 2.592392, ppl: 13.361694 +epoch: 2, batch: 22540, sum loss: 4588.064453, avg loss: 2.683079, ppl: 14.630063 +epoch: 2, batch: 22541, sum loss: 4366.802734, avg loss: 2.762051, ppl: 15.832279 +epoch: 2, batch: 22542, sum loss: 4130.467773, avg loss: 2.486736, ppl: 12.021967 +epoch: 2, batch: 22543, sum loss: 3454.571289, avg loss: 2.265293, ppl: 9.633944 +epoch: 2, batch: 22544, sum loss: 4718.798828, avg loss: 2.531544, ppl: 12.572909 +epoch: 2, batch: 22545, sum loss: 4835.939453, avg loss: 2.645481, ppl: 14.090222 +epoch: 2, batch: 22546, sum loss: 5100.509766, avg loss: 2.482000, ppl: 11.965169 +epoch: 2, batch: 22547, sum loss: 3662.561279, avg loss: 2.315146, ppl: 10.126401 +epoch: 2, batch: 22548, sum loss: 4955.242188, avg loss: 2.679958, ppl: 14.584478 +epoch: 2, batch: 22549, sum loss: 4685.806152, avg loss: 2.685276, ppl: 14.662241 +epoch: 2, batch: 22550, sum loss: 3883.200439, avg loss: 2.471802, ppl: 11.843767 +epoch: 2, batch: 22551, sum loss: 3509.005371, avg loss: 2.200003, ppl: 9.025044 +epoch: 2, batch: 22552, sum loss: 4207.200195, avg loss: 2.378293, ppl: 10.786475 +epoch: 2, batch: 22553, sum loss: 4616.202148, avg loss: 2.627321, ppl: 13.836645 +epoch: 2, batch: 22554, sum loss: 4111.187012, avg loss: 2.425479, ppl: 11.307644 +epoch: 2, batch: 22555, sum loss: 4614.466797, avg loss: 2.428667, ppl: 11.343749 +epoch: 2, batch: 22556, sum loss: 4757.418457, avg loss: 2.550894, ppl: 12.818564 +epoch: 2, batch: 22557, sum loss: 4388.617188, avg loss: 2.536773, ppl: 12.638819 +epoch: 2, batch: 22558, sum loss: 4284.305664, avg loss: 2.656110, ppl: 14.240788 +epoch: 2, batch: 22559, sum loss: 4403.366211, avg loss: 2.489184, ppl: 12.051437 +epoch: 2, batch: 22560, sum loss: 4054.020020, avg loss: 2.524296, ppl: 12.482108 +epoch: 2, batch: 22561, sum loss: 3613.764648, avg loss: 2.401173, ppl: 11.036110 +epoch: 2, batch: 22562, sum loss: 4626.988770, avg loss: 2.593604, ppl: 13.377893 +epoch: 2, batch: 22563, sum loss: 4018.585449, avg loss: 2.442909, ppl: 11.506464 +epoch: 2, batch: 22564, sum loss: 4838.303223, avg loss: 2.547816, ppl: 12.779167 +epoch: 2, batch: 22565, sum loss: 4940.717285, avg loss: 2.532403, ppl: 12.583702 +epoch: 2, batch: 22566, sum loss: 4186.875000, avg loss: 2.368142, ppl: 10.677536 +epoch: 2, batch: 22567, sum loss: 3493.928467, avg loss: 2.667121, ppl: 14.398456 +epoch: 2, batch: 22568, sum loss: 4421.976074, avg loss: 2.498292, ppl: 12.161701 +epoch: 2, batch: 22569, sum loss: 4660.817871, avg loss: 2.580741, ppl: 13.206920 +epoch: 2, batch: 22570, sum loss: 4529.928223, avg loss: 2.656849, ppl: 14.251318 +epoch: 2, batch: 22571, sum loss: 4470.703125, avg loss: 2.522970, ppl: 12.465567 +epoch: 2, batch: 22572, sum loss: 3987.841064, avg loss: 2.309115, ppl: 10.065510 +epoch: 2, batch: 22573, sum loss: 4197.653320, avg loss: 2.441916, ppl: 11.495044 +epoch: 2, batch: 22574, sum loss: 4098.734375, avg loss: 2.318289, ppl: 10.158277 +epoch: 2, batch: 22575, sum loss: 3828.421631, avg loss: 2.330141, ppl: 10.279389 +epoch: 2, batch: 22576, sum loss: 4185.644043, avg loss: 2.594944, ppl: 13.395834 +epoch: 2, batch: 22577, sum loss: 4356.154785, avg loss: 2.565462, ppl: 13.006671 +epoch: 2, batch: 22578, sum loss: 3715.572021, avg loss: 2.529320, ppl: 12.544979 +epoch: 2, batch: 22579, sum loss: 5092.771484, avg loss: 2.641479, ppl: 14.033945 +epoch: 2, batch: 22580, sum loss: 3590.019043, avg loss: 2.243762, ppl: 9.428734 +epoch: 2, batch: 22581, sum loss: 4141.369629, avg loss: 2.424689, ppl: 11.298718 +epoch: 2, batch: 22582, sum loss: 4214.237305, avg loss: 2.642155, ppl: 14.043437 +epoch: 2, batch: 22583, sum loss: 4559.312500, avg loss: 2.490067, ppl: 12.062084 +epoch: 2, batch: 22584, sum loss: 4119.287109, avg loss: 2.594010, ppl: 13.383327 +epoch: 2, batch: 22585, sum loss: 4589.563965, avg loss: 2.671458, ppl: 14.461031 +epoch: 2, batch: 22586, sum loss: 4131.157715, avg loss: 2.367426, ppl: 10.669889 +epoch: 2, batch: 22587, sum loss: 4577.955566, avg loss: 2.560378, ppl: 12.940706 +epoch: 2, batch: 22588, sum loss: 3713.733887, avg loss: 2.362426, ppl: 10.616676 +epoch: 2, batch: 22589, sum loss: 3874.409424, avg loss: 2.296627, ppl: 9.940595 +epoch: 2, batch: 22590, sum loss: 4759.899414, avg loss: 2.719943, ppl: 15.179450 +epoch: 2, batch: 22591, sum loss: 3936.651123, avg loss: 2.378641, ppl: 10.790233 +epoch: 2, batch: 22592, sum loss: 3866.257568, avg loss: 2.454767, ppl: 11.643715 +epoch: 2, batch: 22593, sum loss: 4102.178223, avg loss: 2.413046, ppl: 11.167929 +epoch: 2, batch: 22594, sum loss: 4571.005859, avg loss: 2.431386, ppl: 11.374637 +epoch: 2, batch: 22595, sum loss: 4613.250000, avg loss: 2.639159, ppl: 14.001427 +epoch: 2, batch: 22596, sum loss: 4705.649902, avg loss: 2.491080, ppl: 12.074307 +epoch: 2, batch: 22597, sum loss: 3193.738281, avg loss: 2.252284, ppl: 9.509429 +epoch: 2, batch: 22598, sum loss: 4826.626953, avg loss: 2.614641, ppl: 13.662307 +epoch: 2, batch: 22599, sum loss: 3323.417236, avg loss: 2.245552, ppl: 9.445631 +epoch: 2, batch: 22600, sum loss: 4437.564453, avg loss: 2.691064, ppl: 14.747357 +epoch: 2, batch: 22601, sum loss: 4294.768555, avg loss: 2.475371, ppl: 11.886115 +epoch: 2, batch: 22602, sum loss: 4653.597656, avg loss: 2.630638, ppl: 13.882619 +epoch: 2, batch: 22603, sum loss: 3867.701904, avg loss: 2.377198, ppl: 10.774674 +epoch: 2, batch: 22604, sum loss: 4525.019531, avg loss: 2.557954, ppl: 12.909372 +epoch: 2, batch: 22605, sum loss: 3971.243164, avg loss: 2.274481, ppl: 9.722870 +epoch: 2, batch: 22606, sum loss: 4570.469727, avg loss: 2.485302, ppl: 12.004742 +epoch: 2, batch: 22607, sum loss: 4804.116211, avg loss: 2.712657, ppl: 15.069264 +epoch: 2, batch: 22608, sum loss: 4502.836426, avg loss: 2.583383, ppl: 13.241858 +epoch: 2, batch: 22609, sum loss: 3708.159180, avg loss: 2.401658, ppl: 11.041463 +epoch: 2, batch: 22610, sum loss: 4375.069336, avg loss: 2.605759, ppl: 13.541498 +epoch: 2, batch: 22611, sum loss: 4708.713867, avg loss: 2.607261, ppl: 13.561860 +epoch: 2, batch: 22612, sum loss: 4055.162842, avg loss: 2.442869, ppl: 11.506006 +epoch: 2, batch: 22613, sum loss: 3980.232666, avg loss: 2.334447, ppl: 10.323753 +epoch: 2, batch: 22614, sum loss: 3374.433838, avg loss: 2.436414, ppl: 11.431977 +epoch: 2, batch: 22615, sum loss: 5304.261230, avg loss: 2.720134, ppl: 15.182353 +epoch: 2, batch: 22616, sum loss: 3706.628662, avg loss: 2.449854, ppl: 11.586654 +epoch: 2, batch: 22617, sum loss: 3376.693604, avg loss: 2.461147, ppl: 11.718246 +epoch: 2, batch: 22618, sum loss: 4480.336914, avg loss: 2.627764, ppl: 13.842779 +epoch: 2, batch: 22619, sum loss: 3949.885498, avg loss: 2.515851, ppl: 12.377134 +epoch: 2, batch: 22620, sum loss: 5306.161621, avg loss: 2.767951, ppl: 15.925968 +epoch: 2, batch: 22621, sum loss: 4335.467773, avg loss: 2.594535, ppl: 13.390361 +epoch: 2, batch: 22622, sum loss: 4303.932129, avg loss: 2.404431, ppl: 11.072132 +epoch: 2, batch: 22623, sum loss: 4772.030273, avg loss: 2.696062, ppl: 14.821256 +epoch: 2, batch: 22624, sum loss: 3100.280029, avg loss: 2.129313, ppl: 8.409090 +epoch: 2, batch: 22625, sum loss: 3807.521729, avg loss: 2.437594, ppl: 11.445469 +epoch: 2, batch: 22626, sum loss: 4970.040527, avg loss: 2.751961, ppl: 15.673329 +epoch: 2, batch: 22627, sum loss: 4593.055664, avg loss: 2.548866, ppl: 12.792583 +epoch: 2, batch: 22628, sum loss: 4866.342773, avg loss: 2.663570, ppl: 14.347421 +epoch: 2, batch: 22629, sum loss: 4580.478516, avg loss: 2.523680, ppl: 12.474415 +epoch: 2, batch: 22630, sum loss: 3191.721680, avg loss: 2.082010, ppl: 8.020576 +epoch: 2, batch: 22631, sum loss: 3528.574463, avg loss: 2.254680, ppl: 9.532244 +epoch: 2, batch: 22632, sum loss: 4632.093262, avg loss: 2.651456, ppl: 14.174657 +epoch: 2, batch: 22633, sum loss: 4024.804932, avg loss: 2.578351, ppl: 13.175388 +epoch: 2, batch: 22634, sum loss: 4054.527100, avg loss: 2.316873, ppl: 10.143901 +epoch: 2, batch: 22635, sum loss: 4621.982422, avg loss: 2.684078, ppl: 14.644692 +epoch: 2, batch: 22636, sum loss: 4650.800781, avg loss: 2.732550, ppl: 15.372042 +epoch: 2, batch: 22637, sum loss: 4085.089111, avg loss: 2.481828, ppl: 11.963116 +epoch: 2, batch: 22638, sum loss: 4379.785156, avg loss: 2.718675, ppl: 15.160220 +epoch: 2, batch: 22639, sum loss: 3848.199463, avg loss: 2.266313, ppl: 9.643780 +epoch: 2, batch: 22640, sum loss: 4887.018555, avg loss: 2.638779, ppl: 13.996103 +epoch: 2, batch: 22641, sum loss: 3770.461914, avg loss: 2.178199, ppl: 8.830384 +epoch: 2, batch: 22642, sum loss: 3795.421387, avg loss: 2.305845, ppl: 10.032658 +epoch: 2, batch: 22643, sum loss: 4776.233398, avg loss: 2.516456, ppl: 12.384629 +epoch: 2, batch: 22644, sum loss: 4895.716797, avg loss: 2.640624, ppl: 14.021948 +epoch: 2, batch: 22645, sum loss: 4458.661621, avg loss: 2.611987, ppl: 13.626098 +epoch: 2, batch: 22646, sum loss: 4081.397949, avg loss: 2.398001, ppl: 11.001165 +epoch: 2, batch: 22647, sum loss: 4989.974121, avg loss: 2.636014, ppl: 13.957455 +epoch: 2, batch: 22648, sum loss: 3833.736328, avg loss: 2.263126, ppl: 9.613096 +epoch: 2, batch: 22649, sum loss: 4614.414551, avg loss: 2.585106, ppl: 13.264700 +epoch: 2, batch: 22650, sum loss: 5556.618164, avg loss: 2.783877, ppl: 16.181635 +epoch: 2, batch: 22651, sum loss: 4089.917236, avg loss: 2.701399, ppl: 14.900558 +epoch: 2, batch: 22652, sum loss: 4712.693359, avg loss: 2.689894, ppl: 14.730110 +epoch: 2, batch: 22653, sum loss: 4281.699707, avg loss: 2.303227, ppl: 10.006423 +epoch: 2, batch: 22654, sum loss: 4052.852051, avg loss: 2.319892, ppl: 10.174580 +epoch: 2, batch: 22655, sum loss: 5182.403320, avg loss: 2.526769, ppl: 12.513013 +epoch: 2, batch: 22656, sum loss: 3970.430908, avg loss: 2.386076, ppl: 10.870758 +epoch: 2, batch: 22657, sum loss: 3268.091797, avg loss: 2.246111, ppl: 9.450912 +epoch: 2, batch: 22658, sum loss: 4806.059570, avg loss: 2.532171, ppl: 12.580787 +epoch: 2, batch: 22659, sum loss: 4066.251953, avg loss: 2.500770, ppl: 12.191876 +epoch: 2, batch: 22660, sum loss: 3769.440186, avg loss: 2.427199, ppl: 11.327112 +epoch: 2, batch: 22661, sum loss: 4559.726562, avg loss: 2.607048, ppl: 13.558963 +epoch: 2, batch: 22662, sum loss: 5266.447754, avg loss: 2.823833, ppl: 16.841272 +epoch: 2, batch: 22663, sum loss: 4300.628906, avg loss: 2.368188, ppl: 10.678022 +epoch: 2, batch: 22664, sum loss: 3703.041504, avg loss: 2.315848, ppl: 10.133516 +epoch: 2, batch: 22665, sum loss: 4412.682617, avg loss: 2.373686, ppl: 10.736897 +epoch: 2, batch: 22666, sum loss: 4212.771484, avg loss: 2.479560, ppl: 11.936007 +epoch: 2, batch: 22667, sum loss: 3989.479004, avg loss: 2.406200, ppl: 11.091729 +epoch: 2, batch: 22668, sum loss: 4371.243164, avg loss: 2.598837, ppl: 13.448084 +epoch: 2, batch: 22669, sum loss: 3717.598389, avg loss: 2.510195, ppl: 12.307327 +epoch: 2, batch: 22670, sum loss: 4180.667969, avg loss: 2.419368, ppl: 11.238754 +epoch: 2, batch: 22671, sum loss: 3644.552490, avg loss: 2.506570, ppl: 12.262794 +epoch: 2, batch: 22672, sum loss: 3266.710938, avg loss: 2.254459, ppl: 9.530135 +epoch: 2, batch: 22673, sum loss: 4766.644043, avg loss: 2.542210, ppl: 12.707726 +epoch: 2, batch: 22674, sum loss: 4260.767090, avg loss: 2.438905, ppl: 11.460485 +epoch: 2, batch: 22675, sum loss: 3574.408691, avg loss: 2.313533, ppl: 10.110084 +epoch: 2, batch: 22676, sum loss: 4219.133789, avg loss: 2.463009, ppl: 11.740080 +epoch: 2, batch: 22677, sum loss: 4616.068359, avg loss: 2.581694, ppl: 13.219508 +epoch: 2, batch: 22678, sum loss: 3559.503662, avg loss: 2.535259, ppl: 12.619699 +epoch: 2, batch: 22679, sum loss: 4004.628662, avg loss: 2.382289, ppl: 10.829668 +epoch: 2, batch: 22680, sum loss: 4948.445801, avg loss: 2.563962, ppl: 12.987167 +epoch: 2, batch: 22681, sum loss: 4233.806641, avg loss: 2.445873, ppl: 11.540623 +epoch: 2, batch: 22682, sum loss: 3667.781494, avg loss: 2.262666, ppl: 9.608671 +epoch: 2, batch: 22683, sum loss: 4361.018066, avg loss: 2.517909, ppl: 12.402634 +epoch: 2, batch: 22684, sum loss: 4036.714600, avg loss: 2.482604, ppl: 11.972404 +epoch: 2, batch: 22685, sum loss: 4855.130859, avg loss: 2.540623, ppl: 12.687575 +epoch: 2, batch: 22686, sum loss: 4294.502930, avg loss: 2.407233, ppl: 11.103193 +epoch: 2, batch: 22687, sum loss: 3850.874512, avg loss: 2.432643, ppl: 11.388948 +epoch: 2, batch: 22688, sum loss: 4524.997070, avg loss: 2.405634, ppl: 11.085453 +epoch: 2, batch: 22689, sum loss: 4344.023926, avg loss: 2.381592, ppl: 10.822119 +epoch: 2, batch: 22690, sum loss: 4002.931641, avg loss: 2.423082, ppl: 11.280574 +epoch: 2, batch: 22691, sum loss: 4703.539551, avg loss: 2.643923, ppl: 14.068290 +epoch: 2, batch: 22692, sum loss: 4375.907227, avg loss: 2.596977, ppl: 13.423105 +epoch: 2, batch: 22693, sum loss: 4045.471436, avg loss: 2.375497, ppl: 10.756359 +epoch: 2, batch: 22694, sum loss: 3817.918457, avg loss: 2.513442, ppl: 12.347357 +epoch: 2, batch: 22695, sum loss: 4258.195312, avg loss: 2.328155, ppl: 10.258997 +epoch: 2, batch: 22696, sum loss: 4422.510742, avg loss: 2.587777, ppl: 13.300171 +epoch: 2, batch: 22697, sum loss: 3918.941162, avg loss: 2.426589, ppl: 11.320203 +epoch: 2, batch: 22698, sum loss: 4081.062744, avg loss: 2.396396, ppl: 10.983522 +epoch: 2, batch: 22699, sum loss: 4341.195801, avg loss: 2.689713, ppl: 14.727442 +epoch: 2, batch: 22700, sum loss: 4681.427246, avg loss: 2.593589, ppl: 13.377692 +epoch: 2, batch: 22701, sum loss: 3895.946533, avg loss: 2.132428, ppl: 8.435327 +epoch: 2, batch: 22702, sum loss: 3467.948242, avg loss: 2.103061, ppl: 8.191209 +epoch: 2, batch: 22703, sum loss: 3301.026611, avg loss: 2.480110, ppl: 11.942580 +epoch: 2, batch: 22704, sum loss: 4300.594727, avg loss: 2.478729, ppl: 11.926097 +epoch: 2, batch: 22705, sum loss: 4734.933105, avg loss: 2.581752, ppl: 13.220278 +epoch: 2, batch: 22706, sum loss: 4405.621582, avg loss: 2.555465, ppl: 12.877289 +epoch: 2, batch: 22707, sum loss: 4190.422852, avg loss: 2.570812, ppl: 13.076432 +epoch: 2, batch: 22708, sum loss: 3595.441895, avg loss: 2.199047, ppl: 9.016418 +epoch: 2, batch: 22709, sum loss: 4957.085449, avg loss: 2.868684, ppl: 17.613819 +epoch: 2, batch: 22710, sum loss: 3875.427734, avg loss: 2.315070, ppl: 10.125636 +epoch: 2, batch: 22711, sum loss: 4530.894531, avg loss: 2.687363, ppl: 14.692885 +epoch: 2, batch: 22712, sum loss: 3872.159668, avg loss: 2.315885, ppl: 10.133888 +epoch: 2, batch: 22713, sum loss: 3698.232422, avg loss: 2.399891, ppl: 11.021979 +epoch: 2, batch: 22714, sum loss: 4088.125000, avg loss: 2.590700, ppl: 13.339108 +epoch: 2, batch: 22715, sum loss: 4610.269531, avg loss: 2.579893, ppl: 13.195731 +epoch: 2, batch: 22716, sum loss: 4271.797852, avg loss: 2.623954, ppl: 13.790146 +epoch: 2, batch: 22717, sum loss: 3226.579590, avg loss: 2.251626, ppl: 9.503173 +epoch: 2, batch: 22718, sum loss: 3779.562988, avg loss: 2.457453, ppl: 11.675040 +epoch: 2, batch: 22719, sum loss: 3834.319336, avg loss: 2.561336, ppl: 12.953106 +epoch: 2, batch: 22720, sum loss: 3822.541016, avg loss: 2.140280, ppl: 8.501822 +epoch: 2, batch: 22721, sum loss: 3291.111816, avg loss: 2.112395, ppl: 8.268022 +epoch: 2, batch: 22722, sum loss: 3695.884033, avg loss: 2.336210, ppl: 10.341964 +epoch: 2, batch: 22723, sum loss: 5052.294922, avg loss: 2.736888, ppl: 15.438864 +epoch: 2, batch: 22724, sum loss: 3402.837402, avg loss: 2.083795, ppl: 8.034904 +epoch: 2, batch: 22725, sum loss: 4353.901367, avg loss: 2.690916, ppl: 14.745171 +epoch: 2, batch: 22726, sum loss: 4058.422363, avg loss: 2.470129, ppl: 11.823975 +epoch: 2, batch: 22727, sum loss: 3610.666016, avg loss: 2.346112, ppl: 10.444879 +epoch: 2, batch: 22728, sum loss: 4647.020020, avg loss: 2.646367, ppl: 14.102708 +epoch: 2, batch: 22729, sum loss: 3782.714111, avg loss: 2.306533, ppl: 10.039558 +epoch: 2, batch: 22730, sum loss: 3795.012695, avg loss: 2.184809, ppl: 8.888948 +epoch: 2, batch: 22731, sum loss: 3685.343018, avg loss: 2.388427, ppl: 10.896343 +epoch: 2, batch: 22732, sum loss: 4729.473633, avg loss: 2.623113, ppl: 13.778555 +epoch: 2, batch: 22733, sum loss: 4486.935547, avg loss: 2.481712, ppl: 11.961726 +epoch: 2, batch: 22734, sum loss: 4067.547852, avg loss: 2.382864, ppl: 10.835887 +epoch: 2, batch: 22735, sum loss: 4334.569336, avg loss: 2.380324, ppl: 10.808400 +epoch: 2, batch: 22736, sum loss: 4307.842773, avg loss: 2.494408, ppl: 12.114561 +epoch: 2, batch: 22737, sum loss: 4944.364746, avg loss: 2.644045, ppl: 14.070006 +epoch: 2, batch: 22738, sum loss: 4013.500977, avg loss: 2.377666, ppl: 10.779718 +epoch: 2, batch: 22739, sum loss: 3692.829834, avg loss: 2.566247, ppl: 13.016883 +epoch: 2, batch: 22740, sum loss: 3754.742188, avg loss: 2.413073, ppl: 11.168232 +epoch: 2, batch: 22741, sum loss: 3361.814453, avg loss: 2.171715, ppl: 8.773315 +epoch: 2, batch: 22742, sum loss: 4570.519531, avg loss: 2.514037, ppl: 12.354707 +epoch: 2, batch: 22743, sum loss: 4270.996094, avg loss: 2.666040, ppl: 14.382899 +epoch: 2, batch: 22744, sum loss: 4621.944336, avg loss: 2.617182, ppl: 13.697078 +epoch: 2, batch: 22745, sum loss: 3552.312744, avg loss: 2.444813, ppl: 11.528391 +epoch: 2, batch: 22746, sum loss: 3934.580078, avg loss: 2.430253, ppl: 11.361759 +epoch: 2, batch: 22747, sum loss: 4158.252930, avg loss: 2.497449, ppl: 12.151458 +epoch: 2, batch: 22748, sum loss: 3769.447266, avg loss: 2.578281, ppl: 13.174477 +epoch: 2, batch: 22749, sum loss: 4500.163086, avg loss: 2.593754, ppl: 13.379907 +epoch: 2, batch: 22750, sum loss: 4841.345703, avg loss: 2.673300, ppl: 14.487697 +epoch: 2, batch: 22751, sum loss: 4385.091797, avg loss: 2.698518, ppl: 14.857697 +epoch: 2, batch: 22752, sum loss: 3506.726074, avg loss: 2.390406, ppl: 10.917927 +epoch: 2, batch: 22753, sum loss: 4989.206543, avg loss: 2.692502, ppl: 14.768584 +epoch: 2, batch: 22754, sum loss: 3879.581543, avg loss: 2.446142, ppl: 11.543727 +epoch: 2, batch: 22755, sum loss: 4842.619629, avg loss: 2.600763, ppl: 13.474010 +epoch: 2, batch: 22756, sum loss: 4594.838379, avg loss: 2.541393, ppl: 12.697344 +epoch: 2, batch: 22757, sum loss: 3732.332031, avg loss: 2.368231, ppl: 10.678483 +epoch: 2, batch: 22758, sum loss: 4092.665039, avg loss: 2.456582, ppl: 11.664868 +epoch: 2, batch: 22759, sum loss: 4435.014648, avg loss: 2.484602, ppl: 11.996347 +epoch: 2, batch: 22760, sum loss: 3450.555664, avg loss: 2.119506, ppl: 8.327022 +epoch: 2, batch: 22761, sum loss: 4686.209473, avg loss: 2.490016, ppl: 12.061466 +epoch: 2, batch: 22762, sum loss: 5011.915527, avg loss: 2.602241, ppl: 13.493941 +epoch: 2, batch: 22763, sum loss: 4677.384277, avg loss: 2.656096, ppl: 14.240578 +epoch: 2, batch: 22764, sum loss: 3613.008057, avg loss: 2.173892, ppl: 8.792436 +epoch: 2, batch: 22765, sum loss: 4750.727051, avg loss: 2.588952, ppl: 13.315813 +epoch: 2, batch: 22766, sum loss: 4188.375000, avg loss: 2.544578, ppl: 12.737849 +epoch: 2, batch: 22767, sum loss: 4306.924805, avg loss: 2.800341, ppl: 16.450258 +epoch: 2, batch: 22768, sum loss: 3651.124268, avg loss: 2.279104, ppl: 9.767922 +epoch: 2, batch: 22769, sum loss: 4580.201172, avg loss: 2.617258, ppl: 13.698110 +epoch: 2, batch: 22770, sum loss: 3680.489014, avg loss: 2.555895, ppl: 12.882829 +epoch: 2, batch: 22771, sum loss: 4129.469238, avg loss: 2.563296, ppl: 12.978522 +epoch: 2, batch: 22772, sum loss: 4238.390625, avg loss: 2.465614, ppl: 11.770711 +epoch: 2, batch: 22773, sum loss: 4711.953125, avg loss: 2.413911, ppl: 11.177588 +epoch: 2, batch: 22774, sum loss: 3723.523682, avg loss: 2.399178, ppl: 11.014116 +epoch: 2, batch: 22775, sum loss: 4862.480469, avg loss: 2.596092, ppl: 13.411227 +epoch: 2, batch: 22776, sum loss: 3171.956787, avg loss: 2.127402, ppl: 8.393036 +epoch: 2, batch: 22777, sum loss: 3326.679932, avg loss: 2.204559, ppl: 9.066256 +epoch: 2, batch: 22778, sum loss: 4698.859863, avg loss: 2.654723, ppl: 14.221048 +epoch: 2, batch: 22779, sum loss: 3876.537598, avg loss: 2.392925, ppl: 10.945457 +epoch: 2, batch: 22780, sum loss: 4819.578125, avg loss: 2.558163, ppl: 12.912072 +epoch: 2, batch: 22781, sum loss: 3683.111816, avg loss: 2.400985, ppl: 11.034045 +epoch: 2, batch: 22782, sum loss: 3268.693359, avg loss: 2.211565, ppl: 9.129996 +epoch: 2, batch: 22783, sum loss: 3676.322021, avg loss: 2.404397, ppl: 11.071747 +epoch: 2, batch: 22784, sum loss: 4868.204590, avg loss: 2.419585, ppl: 11.241193 +epoch: 2, batch: 22785, sum loss: 3097.412842, avg loss: 2.122970, ppl: 8.355917 +epoch: 2, batch: 22786, sum loss: 3772.747803, avg loss: 2.201136, ppl: 9.035275 +epoch: 2, batch: 22787, sum loss: 3255.424805, avg loss: 2.205572, ppl: 9.075444 +epoch: 2, batch: 22788, sum loss: 5060.844238, avg loss: 2.779157, ppl: 16.105433 +epoch: 2, batch: 22789, sum loss: 3673.183838, avg loss: 2.332180, ppl: 10.300372 +epoch: 2, batch: 22790, sum loss: 3545.605469, avg loss: 2.493393, ppl: 12.102275 +epoch: 2, batch: 22791, sum loss: 3300.028809, avg loss: 2.394796, ppl: 10.965960 +epoch: 2, batch: 22792, sum loss: 3655.992188, avg loss: 2.351120, ppl: 10.497325 +epoch: 2, batch: 22793, sum loss: 3271.683350, avg loss: 2.207614, ppl: 9.093989 +epoch: 2, batch: 22794, sum loss: 5015.241699, avg loss: 2.690580, ppl: 14.740229 +epoch: 2, batch: 22795, sum loss: 3420.115234, avg loss: 2.298465, ppl: 9.958879 +epoch: 2, batch: 22796, sum loss: 4597.361328, avg loss: 2.474360, ppl: 11.874108 +epoch: 2, batch: 22797, sum loss: 4173.500977, avg loss: 2.446366, ppl: 11.546314 +epoch: 2, batch: 22798, sum loss: 3901.521484, avg loss: 2.492985, ppl: 12.097333 +epoch: 2, batch: 22799, sum loss: 3701.315430, avg loss: 2.342605, ppl: 10.408311 +epoch: 2, batch: 22800, sum loss: 4077.216309, avg loss: 2.527723, ppl: 12.524949 +epoch: 2, batch: 22801, sum loss: 4672.626465, avg loss: 2.732530, ppl: 15.371730 +epoch: 2, batch: 22802, sum loss: 4196.881836, avg loss: 2.632925, ppl: 13.914404 +epoch: 2, batch: 22803, sum loss: 4474.916504, avg loss: 2.576233, ppl: 13.147517 +epoch: 2, batch: 22804, sum loss: 5022.894531, avg loss: 2.807655, ppl: 16.571011 +epoch: 2, batch: 22805, sum loss: 4485.368652, avg loss: 2.573361, ppl: 13.109815 +epoch: 2, batch: 22806, sum loss: 3692.880371, avg loss: 2.404219, ppl: 11.069783 +epoch: 2, batch: 22807, sum loss: 4288.931641, avg loss: 2.690672, ppl: 14.741581 +epoch: 2, batch: 22808, sum loss: 4339.739258, avg loss: 2.424435, ppl: 11.295850 +epoch: 2, batch: 22809, sum loss: 4450.851074, avg loss: 2.630527, ppl: 13.881077 +epoch: 2, batch: 22810, sum loss: 5505.400391, avg loss: 2.859948, ppl: 17.460621 +epoch: 2, batch: 22811, sum loss: 4793.630859, avg loss: 2.608069, ppl: 13.572816 +epoch: 2, batch: 22812, sum loss: 4553.367188, avg loss: 2.542360, ppl: 12.709631 +epoch: 2, batch: 22813, sum loss: 4559.944336, avg loss: 2.696596, ppl: 14.829173 +epoch: 2, batch: 22814, sum loss: 3693.878174, avg loss: 2.278765, ppl: 9.764613 +epoch: 2, batch: 22815, sum loss: 3382.309814, avg loss: 2.256377, ppl: 9.548437 +epoch: 2, batch: 22816, sum loss: 4482.692383, avg loss: 2.649345, ppl: 14.144776 +epoch: 2, batch: 22817, sum loss: 3668.241943, avg loss: 2.271357, ppl: 9.692548 +epoch: 2, batch: 22818, sum loss: 4188.328125, avg loss: 2.437909, ppl: 11.449080 +epoch: 2, batch: 22819, sum loss: 5059.051758, avg loss: 2.821557, ppl: 16.802994 +epoch: 2, batch: 22820, sum loss: 5524.434082, avg loss: 2.738936, ppl: 15.470518 +epoch: 2, batch: 22821, sum loss: 4877.237305, avg loss: 2.735411, ppl: 15.416077 +epoch: 2, batch: 22822, sum loss: 4027.023438, avg loss: 2.457000, ppl: 11.669753 +epoch: 2, batch: 22823, sum loss: 4740.554688, avg loss: 2.528296, ppl: 12.532130 +epoch: 2, batch: 22824, sum loss: 4211.985352, avg loss: 2.408225, ppl: 11.114214 +epoch: 2, batch: 22825, sum loss: 3971.541504, avg loss: 2.466796, ppl: 11.784627 +epoch: 2, batch: 22826, sum loss: 4452.037109, avg loss: 2.476105, ppl: 11.894843 +epoch: 2, batch: 22827, sum loss: 5253.081055, avg loss: 2.681512, ppl: 14.607158 +epoch: 2, batch: 22828, sum loss: 4515.349121, avg loss: 2.331104, ppl: 10.289297 +epoch: 2, batch: 22829, sum loss: 4090.932861, avg loss: 2.512858, ppl: 12.340147 +epoch: 2, batch: 22830, sum loss: 3531.958008, avg loss: 2.364095, ppl: 10.634410 +epoch: 2, batch: 22831, sum loss: 4939.473145, avg loss: 2.631579, ppl: 13.895690 +epoch: 2, batch: 22832, sum loss: 3062.159424, avg loss: 1.983264, ppl: 7.266422 +epoch: 2, batch: 22833, sum loss: 4828.349609, avg loss: 2.487558, ppl: 12.031855 +epoch: 2, batch: 22834, sum loss: 3291.547607, avg loss: 2.239148, ppl: 9.385333 +epoch: 2, batch: 22835, sum loss: 4349.270508, avg loss: 2.564428, ppl: 12.993228 +epoch: 2, batch: 22836, sum loss: 4497.466309, avg loss: 2.463016, ppl: 11.740161 +epoch: 2, batch: 22837, sum loss: 4900.281738, avg loss: 2.540322, ppl: 12.683759 +epoch: 2, batch: 22838, sum loss: 4432.672852, avg loss: 2.821562, ppl: 16.803070 +epoch: 2, batch: 22839, sum loss: 4675.952637, avg loss: 2.272086, ppl: 9.699613 +epoch: 2, batch: 22840, sum loss: 4999.760254, avg loss: 2.685156, ppl: 14.660486 +epoch: 2, batch: 22841, sum loss: 3327.784912, avg loss: 2.407948, ppl: 11.111143 +epoch: 2, batch: 22842, sum loss: 4721.206055, avg loss: 2.442424, ppl: 11.500889 +epoch: 2, batch: 22843, sum loss: 3628.720215, avg loss: 2.090277, ppl: 8.087152 +epoch: 2, batch: 22844, sum loss: 3517.815430, avg loss: 2.364123, ppl: 10.634712 +epoch: 2, batch: 22845, sum loss: 3808.274170, avg loss: 2.510398, ppl: 12.309831 +epoch: 2, batch: 22846, sum loss: 5384.011719, avg loss: 2.741350, ppl: 15.507913 +epoch: 2, batch: 22847, sum loss: 4048.885498, avg loss: 2.525817, ppl: 12.501109 +epoch: 2, batch: 22848, sum loss: 4009.575684, avg loss: 2.343411, ppl: 10.416707 +epoch: 2, batch: 22849, sum loss: 4618.973633, avg loss: 2.450384, ppl: 11.592796 +epoch: 2, batch: 22850, sum loss: 4099.267090, avg loss: 2.302959, ppl: 10.003739 +epoch: 2, batch: 22851, sum loss: 5109.699219, avg loss: 2.680849, ppl: 14.597486 +epoch: 2, batch: 22852, sum loss: 3599.460938, avg loss: 2.354128, ppl: 10.528940 +epoch: 2, batch: 22853, sum loss: 4227.513672, avg loss: 2.327926, ppl: 10.256646 +epoch: 2, batch: 22854, sum loss: 4578.663574, avg loss: 2.357705, ppl: 10.566677 +epoch: 2, batch: 22855, sum loss: 2970.492676, avg loss: 2.093370, ppl: 8.112211 +epoch: 2, batch: 22856, sum loss: 3682.207520, avg loss: 2.406672, ppl: 11.096967 +epoch: 2, batch: 22857, sum loss: 4566.395508, avg loss: 2.594543, ppl: 13.390463 +epoch: 2, batch: 22858, sum loss: 4860.837402, avg loss: 2.796799, ppl: 16.392099 +epoch: 2, batch: 22859, sum loss: 3470.768799, avg loss: 2.440766, ppl: 11.481828 +epoch: 2, batch: 22860, sum loss: 4273.712891, avg loss: 2.574526, ppl: 13.125093 +epoch: 2, batch: 22861, sum loss: 4226.214355, avg loss: 2.491872, ppl: 12.083871 +epoch: 2, batch: 22862, sum loss: 2975.830566, avg loss: 2.259552, ppl: 9.578801 +epoch: 2, batch: 22863, sum loss: 4117.500977, avg loss: 2.503040, ppl: 12.219586 +epoch: 2, batch: 22864, sum loss: 4393.321289, avg loss: 2.560211, ppl: 12.938543 +epoch: 2, batch: 22865, sum loss: 4288.792969, avg loss: 2.655599, ppl: 14.233515 +epoch: 2, batch: 22866, sum loss: 4062.728516, avg loss: 2.650182, ppl: 14.156611 +epoch: 2, batch: 22867, sum loss: 4716.910645, avg loss: 2.433907, ppl: 11.403343 +epoch: 2, batch: 22868, sum loss: 5068.165527, avg loss: 2.664651, ppl: 14.362931 +epoch: 2, batch: 22869, sum loss: 5303.548828, avg loss: 2.518304, ppl: 12.407537 +epoch: 2, batch: 22870, sum loss: 3395.917725, avg loss: 2.250443, ppl: 9.491935 +epoch: 2, batch: 22871, sum loss: 3842.507080, avg loss: 2.453708, ppl: 11.631398 +epoch: 2, batch: 22872, sum loss: 3706.770508, avg loss: 2.316731, ppl: 10.142469 +epoch: 2, batch: 22873, sum loss: 4225.170410, avg loss: 2.381719, ppl: 10.823496 +epoch: 2, batch: 22874, sum loss: 4058.003662, avg loss: 2.469874, ppl: 11.820962 +epoch: 2, batch: 22875, sum loss: 3777.125000, avg loss: 2.332999, ppl: 10.308808 +epoch: 2, batch: 22876, sum loss: 4467.411133, avg loss: 2.461384, ppl: 11.721017 +epoch: 2, batch: 22877, sum loss: 3209.947998, avg loss: 2.183638, ppl: 8.878549 +epoch: 2, batch: 22878, sum loss: 4639.310547, avg loss: 2.672414, ppl: 14.474867 +epoch: 2, batch: 22879, sum loss: 4208.634766, avg loss: 2.660325, ppl: 14.300941 +epoch: 2, batch: 22880, sum loss: 4634.638672, avg loss: 2.685190, ppl: 14.660994 +epoch: 2, batch: 22881, sum loss: 3452.361816, avg loss: 2.340584, ppl: 10.387304 +epoch: 2, batch: 22882, sum loss: 4763.061523, avg loss: 2.630073, ppl: 13.874781 +epoch: 2, batch: 22883, sum loss: 4032.755371, avg loss: 2.133733, ppl: 8.446339 +epoch: 2, batch: 22884, sum loss: 4045.892578, avg loss: 2.443172, ppl: 11.509488 +epoch: 2, batch: 22885, sum loss: 4512.802246, avg loss: 2.511298, ppl: 12.320911 +epoch: 2, batch: 22886, sum loss: 3585.796387, avg loss: 2.238325, ppl: 9.377609 +epoch: 2, batch: 22887, sum loss: 4530.730957, avg loss: 2.553963, ppl: 12.857962 +epoch: 2, batch: 22888, sum loss: 5041.958984, avg loss: 2.747662, ppl: 15.606095 +epoch: 2, batch: 22889, sum loss: 4304.195801, avg loss: 2.582001, ppl: 13.223572 +epoch: 2, batch: 22890, sum loss: 4524.470215, avg loss: 2.469689, ppl: 11.818772 +epoch: 2, batch: 22891, sum loss: 4861.841797, avg loss: 2.601306, ppl: 13.481339 +epoch: 2, batch: 22892, sum loss: 4087.105957, avg loss: 2.186787, ppl: 8.906553 +epoch: 2, batch: 22893, sum loss: 4271.655273, avg loss: 2.559410, ppl: 12.928188 +epoch: 2, batch: 22894, sum loss: 4210.335449, avg loss: 2.460746, ppl: 11.713542 +epoch: 2, batch: 22895, sum loss: 4602.200195, avg loss: 2.644943, ppl: 14.082636 +epoch: 2, batch: 22896, sum loss: 3472.750000, avg loss: 2.203521, ppl: 9.056851 +epoch: 2, batch: 22897, sum loss: 4627.156250, avg loss: 2.715467, ppl: 15.111669 +epoch: 2, batch: 22898, sum loss: 4134.135742, avg loss: 2.336990, ppl: 10.350039 +epoch: 2, batch: 22899, sum loss: 4305.651367, avg loss: 2.630209, ppl: 13.876666 +epoch: 2, batch: 22900, sum loss: 5377.409668, avg loss: 2.777587, ppl: 16.080179 +epoch: 2, batch: 22901, sum loss: 4373.118652, avg loss: 2.454051, ppl: 11.635386 +epoch: 2, batch: 22902, sum loss: 4766.356934, avg loss: 2.591820, ppl: 13.354053 +epoch: 2, batch: 22903, sum loss: 4534.996582, avg loss: 2.868435, ppl: 17.609444 +epoch: 2, batch: 22904, sum loss: 3723.616455, avg loss: 2.258106, ppl: 9.564954 +epoch: 2, batch: 22905, sum loss: 4661.329102, avg loss: 2.766367, ppl: 15.900768 +epoch: 2, batch: 22906, sum loss: 4287.087891, avg loss: 2.666099, ppl: 14.383753 +epoch: 2, batch: 22907, sum loss: 4471.177734, avg loss: 2.656671, ppl: 14.248780 +epoch: 2, batch: 22908, sum loss: 4187.501953, avg loss: 2.430355, ppl: 11.362916 +epoch: 2, batch: 22909, sum loss: 4710.093262, avg loss: 2.569609, ppl: 13.060716 +epoch: 2, batch: 22910, sum loss: 4275.627930, avg loss: 2.631155, ppl: 13.889811 +epoch: 2, batch: 22911, sum loss: 4029.718750, avg loss: 2.410119, ppl: 11.135286 +epoch: 2, batch: 22912, sum loss: 3917.631348, avg loss: 2.467022, ppl: 11.787297 +epoch: 2, batch: 22913, sum loss: 4747.892090, avg loss: 2.613039, ppl: 13.640445 +epoch: 2, batch: 22914, sum loss: 4073.380859, avg loss: 2.530050, ppl: 12.554137 +epoch: 2, batch: 22915, sum loss: 3943.380859, avg loss: 2.614974, ppl: 13.666862 +epoch: 2, batch: 22916, sum loss: 3547.584717, avg loss: 2.368214, ppl: 10.678303 +epoch: 2, batch: 22917, sum loss: 4466.675781, avg loss: 2.517855, ppl: 12.401971 +epoch: 2, batch: 22918, sum loss: 5047.563965, avg loss: 2.683447, ppl: 14.635457 +epoch: 2, batch: 22919, sum loss: 4004.529785, avg loss: 2.488832, ppl: 12.047191 +epoch: 2, batch: 22920, sum loss: 4178.369141, avg loss: 2.613114, ppl: 13.641462 +epoch: 2, batch: 22921, sum loss: 3846.557129, avg loss: 2.472080, ppl: 11.847069 +epoch: 2, batch: 22922, sum loss: 3903.755615, avg loss: 2.531618, ppl: 12.573838 +epoch: 2, batch: 22923, sum loss: 4466.022949, avg loss: 2.506186, ppl: 12.258085 +epoch: 2, batch: 22924, sum loss: 3993.072266, avg loss: 2.289606, ppl: 9.871044 +epoch: 2, batch: 22925, sum loss: 3870.165771, avg loss: 2.630976, ppl: 13.887317 +epoch: 2, batch: 22926, sum loss: 5155.243164, avg loss: 2.593181, ppl: 13.372236 +epoch: 2, batch: 22927, sum loss: 3522.922119, avg loss: 2.401446, ppl: 11.039131 +epoch: 2, batch: 22928, sum loss: 4080.028076, avg loss: 2.469751, ppl: 11.819499 +epoch: 2, batch: 22929, sum loss: 4440.302246, avg loss: 2.578573, ppl: 13.178316 +epoch: 2, batch: 22930, sum loss: 4493.749512, avg loss: 2.479994, ppl: 11.941196 +epoch: 2, batch: 22931, sum loss: 4018.117920, avg loss: 2.406059, ppl: 11.090166 +epoch: 2, batch: 22932, sum loss: 3586.692383, avg loss: 2.220862, ppl: 9.215272 +epoch: 2, batch: 22933, sum loss: 4856.991211, avg loss: 2.560354, ppl: 12.940394 +epoch: 2, batch: 22934, sum loss: 4572.060059, avg loss: 2.495666, ppl: 12.129809 +epoch: 2, batch: 22935, sum loss: 4378.923828, avg loss: 2.502242, ppl: 12.209839 +epoch: 2, batch: 22936, sum loss: 4567.009277, avg loss: 2.713612, ppl: 15.083661 +epoch: 2, batch: 22937, sum loss: 4123.966309, avg loss: 2.496348, ppl: 12.138081 +epoch: 2, batch: 22938, sum loss: 4735.839844, avg loss: 2.415013, ppl: 11.189914 +epoch: 2, batch: 22939, sum loss: 4044.305420, avg loss: 2.311032, ppl: 10.084823 +epoch: 2, batch: 22940, sum loss: 3456.987061, avg loss: 2.303123, ppl: 10.005378 +epoch: 2, batch: 22941, sum loss: 3372.529297, avg loss: 2.092140, ppl: 8.102233 +epoch: 2, batch: 22942, sum loss: 4319.738770, avg loss: 2.663218, ppl: 14.342362 +epoch: 2, batch: 22943, sum loss: 3992.399902, avg loss: 2.431425, ppl: 11.375081 +epoch: 2, batch: 22944, sum loss: 4049.516602, avg loss: 2.423409, ppl: 11.284265 +epoch: 2, batch: 22945, sum loss: 4363.301758, avg loss: 2.601850, ppl: 13.488663 +epoch: 2, batch: 22946, sum loss: 3956.968506, avg loss: 2.491794, ppl: 12.082932 +epoch: 2, batch: 22947, sum loss: 3296.991455, avg loss: 2.411844, ppl: 11.154516 +epoch: 2, batch: 22948, sum loss: 4324.392090, avg loss: 2.403775, ppl: 11.064873 +epoch: 2, batch: 22949, sum loss: 3676.684082, avg loss: 2.316751, ppl: 10.142670 +epoch: 2, batch: 22950, sum loss: 3511.838379, avg loss: 2.395524, ppl: 10.973949 +epoch: 2, batch: 22951, sum loss: 4157.654297, avg loss: 2.533610, ppl: 12.598907 +epoch: 2, batch: 22952, sum loss: 4240.659668, avg loss: 2.452666, ppl: 11.619283 +epoch: 2, batch: 22953, sum loss: 3283.004639, avg loss: 2.399857, ppl: 11.021604 +epoch: 2, batch: 22954, sum loss: 4349.663086, avg loss: 2.494073, ppl: 12.110497 +epoch: 2, batch: 22955, sum loss: 4638.461426, avg loss: 2.647524, ppl: 14.119035 +epoch: 2, batch: 22956, sum loss: 3740.454102, avg loss: 2.320381, ppl: 10.179551 +epoch: 2, batch: 22957, sum loss: 4172.906250, avg loss: 2.437445, ppl: 11.443767 +epoch: 2, batch: 22958, sum loss: 3680.151367, avg loss: 2.185363, ppl: 8.893877 +epoch: 2, batch: 22959, sum loss: 3332.870361, avg loss: 2.317712, ppl: 10.152422 +epoch: 2, batch: 22960, sum loss: 4580.349609, avg loss: 2.481230, ppl: 11.955956 +epoch: 2, batch: 22961, sum loss: 4736.941406, avg loss: 2.554985, ppl: 12.871101 +epoch: 2, batch: 22962, sum loss: 3340.377930, avg loss: 2.255488, ppl: 9.539949 +epoch: 2, batch: 22963, sum loss: 5362.494141, avg loss: 2.745773, ppl: 15.576648 +epoch: 2, batch: 22964, sum loss: 4866.599609, avg loss: 2.493135, ppl: 12.099150 +epoch: 2, batch: 22965, sum loss: 4411.358887, avg loss: 2.485272, ppl: 12.004390 +epoch: 2, batch: 22966, sum loss: 4381.197266, avg loss: 2.444865, ppl: 11.528990 +epoch: 2, batch: 22967, sum loss: 4465.231445, avg loss: 2.642149, ppl: 14.043346 +epoch: 2, batch: 22968, sum loss: 4647.890625, avg loss: 2.438557, ppl: 11.456502 +epoch: 2, batch: 22969, sum loss: 4634.703125, avg loss: 2.618476, ppl: 13.714811 +epoch: 2, batch: 22970, sum loss: 4156.639160, avg loss: 2.573770, ppl: 13.115179 +epoch: 2, batch: 22971, sum loss: 3867.971436, avg loss: 2.290096, ppl: 9.875881 +epoch: 2, batch: 22972, sum loss: 3370.593506, avg loss: 2.129244, ppl: 8.408508 +epoch: 2, batch: 22973, sum loss: 4175.419434, avg loss: 2.355003, ppl: 10.538157 +epoch: 2, batch: 22974, sum loss: 4500.247070, avg loss: 2.419488, ppl: 11.240100 +epoch: 2, batch: 22975, sum loss: 3582.843994, avg loss: 2.469224, ppl: 11.813276 +epoch: 2, batch: 22976, sum loss: 4717.977051, avg loss: 2.546129, ppl: 12.757623 +epoch: 2, batch: 22977, sum loss: 4527.618164, avg loss: 2.372965, ppl: 10.729160 +epoch: 2, batch: 22978, sum loss: 4233.260254, avg loss: 2.557861, ppl: 12.908181 +epoch: 2, batch: 22979, sum loss: 4473.958496, avg loss: 2.569764, ppl: 13.062737 +epoch: 2, batch: 22980, sum loss: 4487.664062, avg loss: 2.625901, ppl: 13.817011 +epoch: 2, batch: 22981, sum loss: 4193.206055, avg loss: 2.553719, ppl: 12.854819 +epoch: 2, batch: 22982, sum loss: 4204.743164, avg loss: 2.488014, ppl: 12.037343 +epoch: 2, batch: 22983, sum loss: 4116.937012, avg loss: 2.374243, ppl: 10.742875 +epoch: 2, batch: 22984, sum loss: 3485.144775, avg loss: 2.374077, ppl: 10.741093 +epoch: 2, batch: 22985, sum loss: 4390.510742, avg loss: 2.500291, ppl: 12.186038 +epoch: 2, batch: 22986, sum loss: 3663.829102, avg loss: 2.291325, ppl: 9.888032 +epoch: 2, batch: 22987, sum loss: 4210.456543, avg loss: 2.620073, ppl: 13.736721 +epoch: 2, batch: 22988, sum loss: 3846.933838, avg loss: 2.300798, ppl: 9.982144 +epoch: 2, batch: 22989, sum loss: 4402.431641, avg loss: 2.518554, ppl: 12.410635 +epoch: 2, batch: 22990, sum loss: 4703.249512, avg loss: 2.430620, ppl: 11.365924 +epoch: 2, batch: 22991, sum loss: 4745.799316, avg loss: 2.550134, ppl: 12.808823 +epoch: 2, batch: 22992, sum loss: 3913.192139, avg loss: 2.418536, ppl: 11.229412 +epoch: 2, batch: 22993, sum loss: 3242.556641, avg loss: 2.412616, ppl: 11.163131 +epoch: 2, batch: 22994, sum loss: 3711.927490, avg loss: 2.352299, ppl: 10.509701 +epoch: 2, batch: 22995, sum loss: 3989.594971, avg loss: 2.287612, ppl: 9.851381 +epoch: 2, batch: 22996, sum loss: 4494.308105, avg loss: 2.704157, ppl: 14.941710 +epoch: 2, batch: 22997, sum loss: 3930.199707, avg loss: 2.364741, ppl: 10.641283 +epoch: 2, batch: 22998, sum loss: 3633.576416, avg loss: 2.525070, ppl: 12.491775 +epoch: 2, batch: 22999, sum loss: 4346.520996, avg loss: 2.408045, ppl: 11.112213 +epoch: 2, batch: 23000, sum loss: 4110.707031, avg loss: 2.367919, ppl: 10.675151 +epoch: 2, batch: 23001, sum loss: 3438.208496, avg loss: 2.236961, ppl: 9.364825 +epoch: 2, batch: 23002, sum loss: 3763.436768, avg loss: 2.340446, ppl: 10.385865 +epoch: 2, batch: 23003, sum loss: 4062.479248, avg loss: 2.453188, ppl: 11.625349 +epoch: 2, batch: 23004, sum loss: 5002.375000, avg loss: 2.821419, ppl: 16.800667 +epoch: 2, batch: 23005, sum loss: 4424.952637, avg loss: 2.437990, ppl: 11.450008 +epoch: 2, batch: 23006, sum loss: 3419.134766, avg loss: 2.371106, ppl: 10.709229 +epoch: 2, batch: 23007, sum loss: 4240.932617, avg loss: 2.551704, ppl: 12.828951 +epoch: 2, batch: 23008, sum loss: 4827.222168, avg loss: 2.747423, ppl: 15.602371 +epoch: 2, batch: 23009, sum loss: 3399.630127, avg loss: 2.182048, ppl: 8.864439 +epoch: 2, batch: 23010, sum loss: 4747.685547, avg loss: 2.615805, ppl: 13.678222 +epoch: 2, batch: 23011, sum loss: 5099.244141, avg loss: 2.850332, ppl: 17.293522 +epoch: 2, batch: 23012, sum loss: 4319.613770, avg loss: 2.273481, ppl: 9.713155 +epoch: 2, batch: 23013, sum loss: 3780.046875, avg loss: 2.384888, ppl: 10.857843 +epoch: 2, batch: 23014, sum loss: 5075.626465, avg loss: 2.821360, ppl: 16.799681 +epoch: 2, batch: 23015, sum loss: 3315.117432, avg loss: 2.269074, ppl: 9.670444 +epoch: 2, batch: 23016, sum loss: 4216.204590, avg loss: 2.314053, ppl: 10.115339 +epoch: 2, batch: 23017, sum loss: 4307.559570, avg loss: 2.322134, ppl: 10.197417 +epoch: 2, batch: 23018, sum loss: 5674.860352, avg loss: 2.583004, ppl: 13.236845 +epoch: 2, batch: 23019, sum loss: 4197.237305, avg loss: 2.690537, ppl: 14.739585 +epoch: 2, batch: 23020, sum loss: 4360.046875, avg loss: 2.403554, ppl: 11.062422 +epoch: 2, batch: 23021, sum loss: 4385.745117, avg loss: 2.178711, ppl: 8.834910 +epoch: 2, batch: 23022, sum loss: 4021.315674, avg loss: 2.525952, ppl: 12.502793 +epoch: 2, batch: 23023, sum loss: 5022.933105, avg loss: 2.639481, ppl: 14.005937 +epoch: 2, batch: 23024, sum loss: 3871.044434, avg loss: 2.273074, ppl: 9.709198 +epoch: 2, batch: 23025, sum loss: 3866.061768, avg loss: 2.287611, ppl: 9.851370 +epoch: 2, batch: 23026, sum loss: 3889.348145, avg loss: 2.482035, ppl: 11.965586 +epoch: 2, batch: 23027, sum loss: 4438.416016, avg loss: 2.739763, ppl: 15.483315 +epoch: 2, batch: 23028, sum loss: 3602.244141, avg loss: 2.433949, ppl: 11.403824 +epoch: 2, batch: 23029, sum loss: 5280.207031, avg loss: 2.654704, ppl: 14.220780 +epoch: 2, batch: 23030, sum loss: 3613.295898, avg loss: 2.288344, ppl: 9.858603 +epoch: 2, batch: 23031, sum loss: 4535.050781, avg loss: 2.647432, ppl: 14.117736 +epoch: 2, batch: 23032, sum loss: 4211.227539, avg loss: 2.344782, ppl: 10.430995 +epoch: 2, batch: 23033, sum loss: 3973.891113, avg loss: 2.482131, ppl: 11.966736 +epoch: 2, batch: 23034, sum loss: 3612.938477, avg loss: 2.403818, ppl: 11.065342 +epoch: 2, batch: 23035, sum loss: 4015.254639, avg loss: 2.448326, ppl: 11.568965 +epoch: 2, batch: 23036, sum loss: 4241.037109, avg loss: 2.367971, ppl: 10.675714 +epoch: 2, batch: 23037, sum loss: 4598.777832, avg loss: 2.587945, ppl: 13.302403 +epoch: 2, batch: 23038, sum loss: 3607.675049, avg loss: 2.158992, ppl: 8.662400 +epoch: 2, batch: 23039, sum loss: 4436.604004, avg loss: 2.542466, ppl: 12.710982 +epoch: 2, batch: 23040, sum loss: 4159.805176, avg loss: 2.387948, ppl: 10.891120 +epoch: 2, batch: 23041, sum loss: 4380.144531, avg loss: 2.471865, ppl: 11.844516 +epoch: 2, batch: 23042, sum loss: 4150.997559, avg loss: 2.573464, ppl: 13.111165 +epoch: 2, batch: 23043, sum loss: 4930.084961, avg loss: 2.737415, ppl: 15.447008 +epoch: 2, batch: 23044, sum loss: 3585.812500, avg loss: 2.221693, ppl: 9.222933 +epoch: 2, batch: 23045, sum loss: 4303.654785, avg loss: 2.427329, ppl: 11.328587 +epoch: 2, batch: 23046, sum loss: 5286.148926, avg loss: 2.665733, ppl: 14.378487 +epoch: 2, batch: 23047, sum loss: 4451.987793, avg loss: 2.603502, ppl: 13.510964 +epoch: 2, batch: 23048, sum loss: 4014.956299, avg loss: 2.365914, ppl: 10.653773 +epoch: 2, batch: 23049, sum loss: 3531.602539, avg loss: 2.278453, ppl: 9.761571 +epoch: 2, batch: 23050, sum loss: 3698.703125, avg loss: 2.200299, ppl: 9.027715 +epoch: 2, batch: 23051, sum loss: 3356.630371, avg loss: 2.260357, ppl: 9.586513 +epoch: 2, batch: 23052, sum loss: 4078.932129, avg loss: 2.608013, ppl: 13.572055 +epoch: 2, batch: 23053, sum loss: 4366.801270, avg loss: 2.651367, ppl: 14.173396 +epoch: 2, batch: 23054, sum loss: 3661.219727, avg loss: 2.360554, ppl: 10.596823 +epoch: 2, batch: 23055, sum loss: 5299.236816, avg loss: 2.873773, ppl: 17.703682 +epoch: 2, batch: 23056, sum loss: 3699.375488, avg loss: 2.494522, ppl: 12.115936 +epoch: 2, batch: 23057, sum loss: 3988.250977, avg loss: 2.442285, ppl: 11.499282 +epoch: 2, batch: 23058, sum loss: 4282.408203, avg loss: 2.510204, ppl: 12.307442 +epoch: 2, batch: 23059, sum loss: 4485.203613, avg loss: 2.711731, ppl: 15.055320 +epoch: 2, batch: 23060, sum loss: 5268.004395, avg loss: 2.581090, ppl: 13.211528 +epoch: 2, batch: 23061, sum loss: 4438.788086, avg loss: 2.657957, ppl: 14.267113 +epoch: 2, batch: 23062, sum loss: 3794.525635, avg loss: 2.498042, ppl: 12.158663 +epoch: 2, batch: 23063, sum loss: 3406.638672, avg loss: 2.465006, ppl: 11.763554 +epoch: 2, batch: 23064, sum loss: 4522.269531, avg loss: 2.632287, ppl: 13.905540 +epoch: 2, batch: 23065, sum loss: 4463.855469, avg loss: 2.741926, ppl: 15.516841 +epoch: 2, batch: 23066, sum loss: 3450.633057, avg loss: 2.350567, ppl: 10.491520 +epoch: 2, batch: 23067, sum loss: 4779.091797, avg loss: 2.674366, ppl: 14.503152 +epoch: 2, batch: 23068, sum loss: 4255.644043, avg loss: 2.455651, ppl: 11.654021 +epoch: 2, batch: 23069, sum loss: 5023.507812, avg loss: 2.649530, ppl: 14.147384 +epoch: 2, batch: 23070, sum loss: 4062.001465, avg loss: 2.502774, ppl: 12.216330 +epoch: 2, batch: 23071, sum loss: 4748.544434, avg loss: 2.455297, ppl: 11.649893 +epoch: 2, batch: 23072, sum loss: 5028.196289, avg loss: 2.624320, ppl: 13.795184 +epoch: 2, batch: 23073, sum loss: 4217.239258, avg loss: 2.326111, ppl: 10.238050 +epoch: 2, batch: 23074, sum loss: 3953.366699, avg loss: 2.497389, ppl: 12.150725 +epoch: 2, batch: 23075, sum loss: 4576.021973, avg loss: 2.588248, ppl: 13.306436 +epoch: 2, batch: 23076, sum loss: 4408.738770, avg loss: 2.621129, ppl: 13.751237 +epoch: 2, batch: 23077, sum loss: 3977.389160, avg loss: 2.441614, ppl: 11.491572 +epoch: 2, batch: 23078, sum loss: 4191.014160, avg loss: 2.588644, ppl: 13.311706 +epoch: 2, batch: 23079, sum loss: 3868.939453, avg loss: 2.624789, ppl: 13.801668 +epoch: 2, batch: 23080, sum loss: 3958.208008, avg loss: 2.464638, ppl: 11.759219 +epoch: 2, batch: 23081, sum loss: 4049.542480, avg loss: 2.527804, ppl: 12.525970 +epoch: 2, batch: 23082, sum loss: 4454.415039, avg loss: 2.546835, ppl: 12.766639 +epoch: 2, batch: 23083, sum loss: 3899.568115, avg loss: 2.491737, ppl: 12.082243 +epoch: 2, batch: 23084, sum loss: 3893.579102, avg loss: 2.282285, ppl: 9.799050 +epoch: 2, batch: 23085, sum loss: 4470.808594, avg loss: 2.680341, ppl: 14.590068 +epoch: 2, batch: 23086, sum loss: 4165.270508, avg loss: 2.580713, ppl: 13.206549 +epoch: 2, batch: 23087, sum loss: 4099.109375, avg loss: 2.553962, ppl: 12.857949 +epoch: 2, batch: 23088, sum loss: 2926.330322, avg loss: 2.203562, ppl: 9.057219 +epoch: 2, batch: 23089, sum loss: 4364.666016, avg loss: 2.467307, ppl: 11.790653 +epoch: 2, batch: 23090, sum loss: 3570.229004, avg loss: 2.193015, ppl: 8.962196 +epoch: 2, batch: 23091, sum loss: 4449.869141, avg loss: 2.753632, ppl: 15.699547 +epoch: 2, batch: 23092, sum loss: 4532.026367, avg loss: 2.494236, ppl: 12.112473 +epoch: 2, batch: 23093, sum loss: 4578.821777, avg loss: 2.642136, ppl: 14.043166 +epoch: 2, batch: 23094, sum loss: 5089.625000, avg loss: 2.596747, ppl: 13.420017 +epoch: 2, batch: 23095, sum loss: 4637.367188, avg loss: 2.654474, ppl: 14.217513 +epoch: 2, batch: 23096, sum loss: 4379.128418, avg loss: 2.543048, ppl: 12.718376 +epoch: 2, batch: 23097, sum loss: 4750.272461, avg loss: 2.520038, ppl: 12.429073 +epoch: 2, batch: 23098, sum loss: 4177.287598, avg loss: 2.406272, ppl: 11.092527 +epoch: 2, batch: 23099, sum loss: 4575.215332, avg loss: 2.641579, ppl: 14.035354 +epoch: 2, batch: 23100, sum loss: 4612.303711, avg loss: 2.585372, ppl: 13.268227 +epoch: 2, batch: 23101, sum loss: 3608.709961, avg loss: 2.249819, ppl: 9.486021 +epoch: 2, batch: 23102, sum loss: 4145.119141, avg loss: 2.421214, ppl: 11.259524 +epoch: 2, batch: 23103, sum loss: 4159.358887, avg loss: 2.483200, ppl: 11.979533 +epoch: 2, batch: 23104, sum loss: 4505.807617, avg loss: 2.595511, ppl: 13.403438 +epoch: 2, batch: 23105, sum loss: 3576.120605, avg loss: 2.222573, ppl: 9.231054 +epoch: 2, batch: 23106, sum loss: 3916.037109, avg loss: 2.427797, ppl: 11.333890 +epoch: 2, batch: 23107, sum loss: 4202.433594, avg loss: 2.504430, ppl: 12.236583 +epoch: 2, batch: 23108, sum loss: 4836.036621, avg loss: 2.527986, ppl: 12.528243 +epoch: 2, batch: 23109, sum loss: 4285.376953, avg loss: 2.457211, ppl: 11.672218 +epoch: 2, batch: 23110, sum loss: 3969.523926, avg loss: 2.232578, ppl: 9.323874 +epoch: 2, batch: 23111, sum loss: 4920.916504, avg loss: 2.755272, ppl: 15.725324 +epoch: 2, batch: 23112, sum loss: 3825.350342, avg loss: 2.495336, ppl: 12.125811 +epoch: 2, batch: 23113, sum loss: 4393.526855, avg loss: 2.462739, ppl: 11.736920 +epoch: 2, batch: 23114, sum loss: 3797.285400, avg loss: 2.448282, ppl: 11.568455 +epoch: 2, batch: 23115, sum loss: 4856.599609, avg loss: 2.840117, ppl: 17.117764 +epoch: 2, batch: 23116, sum loss: 4462.144531, avg loss: 2.754410, ppl: 15.711772 +epoch: 2, batch: 23117, sum loss: 5767.014160, avg loss: 2.833914, ppl: 17.011908 +epoch: 2, batch: 23118, sum loss: 4203.462402, avg loss: 2.586746, ppl: 13.286467 +epoch: 2, batch: 23119, sum loss: 5433.343262, avg loss: 2.749668, ppl: 15.637434 +epoch: 2, batch: 23120, sum loss: 3892.517822, avg loss: 2.357673, ppl: 10.566334 +epoch: 2, batch: 23121, sum loss: 3873.287842, avg loss: 2.390918, ppl: 10.923523 +epoch: 2, batch: 23122, sum loss: 4388.225586, avg loss: 2.601201, ppl: 13.479916 +epoch: 2, batch: 23123, sum loss: 4250.731934, avg loss: 2.536236, ppl: 12.632038 +epoch: 2, batch: 23124, sum loss: 4466.332031, avg loss: 2.743447, ppl: 15.540462 +epoch: 2, batch: 23125, sum loss: 4309.096680, avg loss: 2.833068, ppl: 16.997536 +epoch: 2, batch: 23126, sum loss: 3886.565674, avg loss: 2.263579, ppl: 9.617452 +epoch: 2, batch: 23127, sum loss: 3957.121826, avg loss: 2.307359, ppl: 10.047858 +epoch: 2, batch: 23128, sum loss: 3992.249023, avg loss: 2.656187, ppl: 14.241879 +epoch: 2, batch: 23129, sum loss: 4197.435547, avg loss: 2.335802, ppl: 10.337746 +epoch: 2, batch: 23130, sum loss: 4347.753906, avg loss: 2.483012, ppl: 11.977285 +epoch: 2, batch: 23131, sum loss: 4326.928711, avg loss: 2.499670, ppl: 12.178472 +epoch: 2, batch: 23132, sum loss: 4118.807129, avg loss: 2.484202, ppl: 11.991549 +epoch: 2, batch: 23133, sum loss: 4551.745605, avg loss: 2.491377, ppl: 12.077894 +epoch: 2, batch: 23134, sum loss: 3200.879883, avg loss: 2.162757, ppl: 8.695074 +epoch: 2, batch: 23135, sum loss: 4330.070312, avg loss: 2.645125, ppl: 14.085211 +epoch: 2, batch: 23136, sum loss: 4963.406250, avg loss: 2.528480, ppl: 12.534440 +epoch: 2, batch: 23137, sum loss: 4176.448242, avg loss: 2.341058, ppl: 10.392231 +epoch: 2, batch: 23138, sum loss: 2676.843018, avg loss: 2.086394, ppl: 8.055810 +epoch: 2, batch: 23139, sum loss: 4418.562988, avg loss: 2.540864, ppl: 12.690637 +epoch: 2, batch: 23140, sum loss: 3933.301758, avg loss: 2.430965, ppl: 11.369851 +epoch: 2, batch: 23141, sum loss: 3314.569580, avg loss: 2.362487, ppl: 10.617328 +epoch: 2, batch: 23142, sum loss: 4351.717285, avg loss: 2.505306, ppl: 12.247312 +epoch: 2, batch: 23143, sum loss: 4128.319336, avg loss: 2.564173, ppl: 12.989917 +epoch: 2, batch: 23144, sum loss: 3954.889404, avg loss: 2.538440, ppl: 12.659906 +epoch: 2, batch: 23145, sum loss: 3327.633545, avg loss: 2.360024, ppl: 10.591206 +epoch: 2, batch: 23146, sum loss: 5008.129883, avg loss: 2.548667, ppl: 12.790040 +epoch: 2, batch: 23147, sum loss: 3895.361328, avg loss: 2.452998, ppl: 11.623145 +epoch: 2, batch: 23148, sum loss: 4701.984863, avg loss: 2.600655, ppl: 13.472564 +epoch: 2, batch: 23149, sum loss: 3425.562256, avg loss: 2.238930, ppl: 9.383284 +epoch: 2, batch: 23150, sum loss: 4258.187012, avg loss: 2.618811, ppl: 13.719403 +epoch: 2, batch: 23151, sum loss: 4011.449951, avg loss: 2.486950, ppl: 12.024542 +epoch: 2, batch: 23152, sum loss: 3863.902344, avg loss: 2.268880, ppl: 9.668565 +epoch: 2, batch: 23153, sum loss: 4726.836914, avg loss: 2.552288, ppl: 12.836434 +epoch: 2, batch: 23154, sum loss: 5313.676758, avg loss: 2.621449, ppl: 13.755638 +epoch: 2, batch: 23155, sum loss: 3878.994873, avg loss: 2.401855, ppl: 11.043638 +epoch: 2, batch: 23156, sum loss: 4090.944580, avg loss: 2.641023, ppl: 14.027545 +epoch: 2, batch: 23157, sum loss: 4415.021973, avg loss: 2.478957, ppl: 11.928816 +epoch: 2, batch: 23158, sum loss: 5023.631348, avg loss: 2.708157, ppl: 15.001603 +epoch: 2, batch: 23159, sum loss: 4203.476074, avg loss: 2.382923, ppl: 10.836531 +epoch: 2, batch: 23160, sum loss: 5129.367188, avg loss: 2.672937, ppl: 14.482448 +epoch: 2, batch: 23161, sum loss: 4670.240723, avg loss: 2.481531, ppl: 11.959557 +epoch: 2, batch: 23162, sum loss: 3938.171875, avg loss: 2.359600, ppl: 10.586714 +epoch: 2, batch: 23163, sum loss: 3929.428955, avg loss: 2.437611, ppl: 11.445665 +epoch: 2, batch: 23164, sum loss: 4312.570801, avg loss: 2.623218, ppl: 13.780001 +epoch: 2, batch: 23165, sum loss: 4421.713867, avg loss: 2.349476, ppl: 10.480078 +epoch: 2, batch: 23166, sum loss: 4007.538330, avg loss: 2.346334, ppl: 10.447197 +epoch: 2, batch: 23167, sum loss: 4139.663574, avg loss: 2.478841, ppl: 11.927427 +epoch: 2, batch: 23168, sum loss: 4485.363281, avg loss: 2.511402, ppl: 12.322189 +epoch: 2, batch: 23169, sum loss: 3666.906006, avg loss: 2.370334, ppl: 10.700962 +epoch: 2, batch: 23170, sum loss: 3907.115967, avg loss: 2.465057, ppl: 11.764157 +epoch: 2, batch: 23171, sum loss: 3401.683594, avg loss: 2.236479, ppl: 9.360312 +epoch: 2, batch: 23172, sum loss: 5000.583008, avg loss: 2.668401, ppl: 14.416895 +epoch: 2, batch: 23173, sum loss: 4194.997070, avg loss: 2.352775, ppl: 10.514704 +epoch: 2, batch: 23174, sum loss: 3948.006836, avg loss: 2.394182, ppl: 10.959234 +epoch: 2, batch: 23175, sum loss: 3835.102295, avg loss: 2.491944, ppl: 12.084750 +epoch: 2, batch: 23176, sum loss: 4389.541992, avg loss: 2.553544, ppl: 12.852573 +epoch: 2, batch: 23177, sum loss: 4209.823242, avg loss: 2.639388, ppl: 14.004624 +epoch: 2, batch: 23178, sum loss: 4271.976562, avg loss: 2.496772, ppl: 12.143233 +epoch: 2, batch: 23179, sum loss: 3833.952637, avg loss: 2.514067, ppl: 12.355082 +epoch: 2, batch: 23180, sum loss: 5204.772461, avg loss: 2.743686, ppl: 15.544179 +epoch: 2, batch: 23181, sum loss: 2933.400391, avg loss: 2.014698, ppl: 7.498463 +epoch: 2, batch: 23182, sum loss: 4641.112793, avg loss: 2.527839, ppl: 12.526409 +epoch: 2, batch: 23183, sum loss: 3976.332764, avg loss: 2.383893, ppl: 10.847043 +epoch: 2, batch: 23184, sum loss: 4105.055664, avg loss: 2.474416, ppl: 11.874767 +epoch: 2, batch: 23185, sum loss: 3908.483398, avg loss: 2.251431, ppl: 9.501318 +epoch: 2, batch: 23186, sum loss: 5483.943359, avg loss: 2.710797, ppl: 15.041264 +epoch: 2, batch: 23187, sum loss: 4854.222656, avg loss: 2.512538, ppl: 12.336196 +epoch: 2, batch: 23188, sum loss: 4045.614258, avg loss: 2.382576, ppl: 10.832774 +epoch: 2, batch: 23189, sum loss: 4401.464355, avg loss: 2.619919, ppl: 13.734615 +epoch: 2, batch: 23190, sum loss: 3822.002441, avg loss: 2.329069, ppl: 10.268378 +epoch: 2, batch: 23191, sum loss: 3964.035400, avg loss: 2.565719, ppl: 13.010008 +epoch: 2, batch: 23192, sum loss: 3104.535156, avg loss: 2.046496, ppl: 7.740733 +epoch: 2, batch: 23193, sum loss: 4083.484375, avg loss: 2.574706, ppl: 13.127461 +epoch: 2, batch: 23194, sum loss: 4281.052246, avg loss: 2.597726, ppl: 13.433154 +epoch: 2, batch: 23195, sum loss: 3864.054443, avg loss: 2.376417, ppl: 10.766260 +epoch: 2, batch: 23196, sum loss: 3931.824951, avg loss: 2.449735, ppl: 11.585279 +epoch: 2, batch: 23197, sum loss: 4078.279297, avg loss: 2.418908, ppl: 11.233589 +epoch: 2, batch: 23198, sum loss: 4529.863281, avg loss: 2.587015, ppl: 13.290044 +epoch: 2, batch: 23199, sum loss: 3782.395020, avg loss: 2.383362, ppl: 10.841288 +epoch: 2, batch: 23200, sum loss: 3674.700195, avg loss: 2.275356, ppl: 9.731385 +epoch: 2, batch: 23201, sum loss: 3992.907227, avg loss: 2.280358, ppl: 9.780184 +epoch: 2, batch: 23202, sum loss: 4224.122070, avg loss: 2.313320, ppl: 10.107926 +epoch: 2, batch: 23203, sum loss: 5285.508301, avg loss: 2.754303, ppl: 15.710094 +epoch: 2, batch: 23204, sum loss: 3722.692627, avg loss: 2.265790, ppl: 9.638734 +epoch: 2, batch: 23205, sum loss: 3249.035156, avg loss: 2.489682, ppl: 12.057444 +epoch: 2, batch: 23206, sum loss: 3738.752441, avg loss: 2.366299, ppl: 10.657873 +epoch: 2, batch: 23207, sum loss: 4237.206055, avg loss: 2.582088, ppl: 13.224719 +epoch: 2, batch: 23208, sum loss: 4142.777832, avg loss: 2.396054, ppl: 10.979768 +epoch: 2, batch: 23209, sum loss: 3930.820068, avg loss: 2.503707, ppl: 12.227740 +epoch: 2, batch: 23210, sum loss: 4069.154785, avg loss: 2.433705, ppl: 11.401046 +epoch: 2, batch: 23211, sum loss: 4173.941406, avg loss: 2.374256, ppl: 10.743013 +epoch: 2, batch: 23212, sum loss: 4676.636719, avg loss: 2.320912, ppl: 10.184956 +epoch: 2, batch: 23213, sum loss: 4184.523926, avg loss: 2.483397, ppl: 11.981898 +epoch: 2, batch: 23214, sum loss: 3841.750977, avg loss: 2.501140, ppl: 12.196391 +epoch: 2, batch: 23215, sum loss: 4594.633789, avg loss: 2.545503, ppl: 12.749644 +epoch: 2, batch: 23216, sum loss: 3747.957275, avg loss: 2.490337, ppl: 12.065343 +epoch: 2, batch: 23217, sum loss: 4071.781982, avg loss: 2.554443, ppl: 12.864131 +epoch: 2, batch: 23218, sum loss: 4104.830566, avg loss: 2.345618, ppl: 10.439717 +epoch: 2, batch: 23219, sum loss: 4022.768311, avg loss: 2.497063, ppl: 12.146766 +epoch: 2, batch: 23220, sum loss: 4250.320801, avg loss: 2.546627, ppl: 12.763982 +epoch: 2, batch: 23221, sum loss: 4720.457520, avg loss: 2.694325, ppl: 14.795531 +epoch: 2, batch: 23222, sum loss: 3744.391602, avg loss: 2.298583, ppl: 9.960057 +epoch: 2, batch: 23223, sum loss: 4017.673340, avg loss: 2.364728, ppl: 10.641149 +epoch: 2, batch: 23224, sum loss: 3614.349365, avg loss: 2.313924, ppl: 10.114035 +epoch: 2, batch: 23225, sum loss: 4279.724121, avg loss: 2.510102, ppl: 12.306189 +epoch: 2, batch: 23226, sum loss: 3967.467041, avg loss: 2.440017, ppl: 11.473230 +epoch: 2, batch: 23227, sum loss: 4030.351074, avg loss: 2.590200, ppl: 13.332438 +epoch: 2, batch: 23228, sum loss: 4813.287598, avg loss: 2.651949, ppl: 14.181654 +epoch: 2, batch: 23229, sum loss: 4124.480469, avg loss: 2.620381, ppl: 13.740963 +epoch: 2, batch: 23230, sum loss: 3424.769531, avg loss: 2.244279, ppl: 9.433611 +epoch: 2, batch: 23231, sum loss: 3421.753662, avg loss: 2.374569, ppl: 10.746380 +epoch: 2, batch: 23232, sum loss: 4448.247070, avg loss: 2.525978, ppl: 12.503116 +epoch: 2, batch: 23233, sum loss: 4550.861816, avg loss: 2.569657, ppl: 13.061338 +epoch: 2, batch: 23234, sum loss: 4134.905273, avg loss: 2.356071, ppl: 10.549424 +epoch: 2, batch: 23235, sum loss: 3740.112305, avg loss: 2.447717, ppl: 11.561926 +epoch: 2, batch: 23236, sum loss: 4720.231445, avg loss: 2.666797, ppl: 14.393798 +epoch: 2, batch: 23237, sum loss: 3446.978516, avg loss: 2.293399, ppl: 9.908558 +epoch: 2, batch: 23238, sum loss: 3972.312012, avg loss: 2.272490, ppl: 9.703530 +epoch: 2, batch: 23239, sum loss: 4321.701660, avg loss: 2.432021, ppl: 11.381864 +epoch: 2, batch: 23240, sum loss: 4385.911621, avg loss: 2.260779, ppl: 9.590561 +epoch: 2, batch: 23241, sum loss: 4908.734863, avg loss: 2.721028, ppl: 15.195937 +epoch: 2, batch: 23242, sum loss: 3833.364746, avg loss: 2.361901, ppl: 10.611099 +epoch: 2, batch: 23243, sum loss: 3957.154053, avg loss: 2.465517, ppl: 11.769561 +epoch: 2, batch: 23244, sum loss: 4281.689453, avg loss: 2.490803, ppl: 12.070959 +epoch: 2, batch: 23245, sum loss: 4074.224854, avg loss: 2.317534, ppl: 10.150612 +epoch: 2, batch: 23246, sum loss: 4386.261230, avg loss: 2.571079, ppl: 13.079933 +epoch: 2, batch: 23247, sum loss: 4893.631836, avg loss: 2.809203, ppl: 16.596687 +epoch: 2, batch: 23248, sum loss: 3514.567139, avg loss: 2.072268, ppl: 7.942821 +epoch: 2, batch: 23249, sum loss: 3103.667725, avg loss: 2.300717, ppl: 9.981340 +epoch: 2, batch: 23250, sum loss: 4879.826172, avg loss: 2.428983, ppl: 11.347333 +epoch: 2, batch: 23251, sum loss: 5020.525391, avg loss: 2.574628, ppl: 13.126438 +epoch: 2, batch: 23252, sum loss: 4919.824707, avg loss: 2.743907, ppl: 15.547607 +epoch: 2, batch: 23253, sum loss: 3513.842285, avg loss: 2.003331, ppl: 7.413708 +epoch: 2, batch: 23254, sum loss: 3652.975586, avg loss: 2.337157, ppl: 10.351760 +epoch: 2, batch: 23255, sum loss: 4717.125488, avg loss: 2.594678, ppl: 13.392280 +epoch: 2, batch: 23256, sum loss: 5201.505371, avg loss: 2.912377, ppl: 18.400486 +epoch: 2, batch: 23257, sum loss: 4054.676758, avg loss: 2.472364, ppl: 11.850428 +epoch: 2, batch: 23258, sum loss: 3748.414062, avg loss: 2.309559, ppl: 10.069982 +epoch: 2, batch: 23259, sum loss: 4513.826660, avg loss: 2.520283, ppl: 12.432112 +epoch: 2, batch: 23260, sum loss: 4633.667969, avg loss: 2.609047, ppl: 13.586102 +epoch: 2, batch: 23261, sum loss: 2983.986816, avg loss: 2.076539, ppl: 7.976816 +epoch: 2, batch: 23262, sum loss: 4475.068359, avg loss: 2.695824, ppl: 14.817729 +epoch: 2, batch: 23263, sum loss: 4093.664551, avg loss: 2.355388, ppl: 10.542220 +epoch: 2, batch: 23264, sum loss: 5308.006348, avg loss: 2.717873, ppl: 15.148069 +epoch: 2, batch: 23265, sum loss: 4603.121582, avg loss: 2.486830, ppl: 12.023100 +epoch: 2, batch: 23266, sum loss: 4601.986328, avg loss: 2.691220, ppl: 14.749660 +epoch: 2, batch: 23267, sum loss: 3942.972168, avg loss: 2.542213, ppl: 12.707759 +epoch: 2, batch: 23268, sum loss: 3932.773193, avg loss: 2.374863, ppl: 10.749539 +epoch: 2, batch: 23269, sum loss: 4167.768555, avg loss: 2.482292, ppl: 11.968667 +epoch: 2, batch: 23270, sum loss: 3970.628418, avg loss: 2.495681, ppl: 12.129992 +epoch: 2, batch: 23271, sum loss: 4682.507812, avg loss: 2.700408, ppl: 14.885803 +epoch: 2, batch: 23272, sum loss: 3531.962646, avg loss: 2.320606, ppl: 10.181848 +epoch: 2, batch: 23273, sum loss: 3516.281250, avg loss: 2.201804, ppl: 9.041310 +epoch: 2, batch: 23274, sum loss: 4183.105957, avg loss: 2.700520, ppl: 14.887475 +epoch: 2, batch: 23275, sum loss: 5032.636719, avg loss: 2.617076, ppl: 13.695615 +epoch: 2, batch: 23276, sum loss: 4947.708496, avg loss: 2.700714, ppl: 14.890365 +epoch: 2, batch: 23277, sum loss: 3079.712646, avg loss: 2.238163, ppl: 9.376094 +epoch: 2, batch: 23278, sum loss: 4813.441895, avg loss: 2.518808, ppl: 12.413789 +epoch: 2, batch: 23279, sum loss: 4741.868164, avg loss: 2.584125, ppl: 13.251683 +epoch: 2, batch: 23280, sum loss: 3892.479980, avg loss: 2.462037, ppl: 11.728674 +epoch: 2, batch: 23281, sum loss: 3524.530029, avg loss: 2.335673, ppl: 10.336412 +epoch: 2, batch: 23282, sum loss: 5001.277344, avg loss: 2.560818, ppl: 12.946403 +epoch: 2, batch: 23283, sum loss: 4578.459473, avg loss: 2.556370, ppl: 12.888948 +epoch: 2, batch: 23284, sum loss: 3798.581787, avg loss: 2.417939, ppl: 11.222705 +epoch: 2, batch: 23285, sum loss: 4467.073730, avg loss: 2.678102, ppl: 14.557434 +epoch: 2, batch: 23286, sum loss: 4409.977051, avg loss: 2.535927, ppl: 12.628133 +epoch: 2, batch: 23287, sum loss: 3811.508301, avg loss: 2.551210, ppl: 12.822615 +epoch: 2, batch: 23288, sum loss: 4949.900391, avg loss: 2.887923, ppl: 17.955980 +epoch: 2, batch: 23289, sum loss: 3243.654785, avg loss: 2.320211, ppl: 10.177821 +epoch: 2, batch: 23290, sum loss: 4454.808105, avg loss: 2.502701, ppl: 12.215446 +epoch: 2, batch: 23291, sum loss: 4216.649414, avg loss: 2.349108, ppl: 10.476226 +epoch: 2, batch: 23292, sum loss: 4369.102539, avg loss: 2.274390, ppl: 9.721984 +epoch: 2, batch: 23293, sum loss: 4337.180664, avg loss: 2.534881, ppl: 12.614925 +epoch: 2, batch: 23294, sum loss: 4405.017578, avg loss: 2.449954, ppl: 11.587814 +epoch: 2, batch: 23295, sum loss: 3494.923828, avg loss: 2.361435, ppl: 10.606162 +epoch: 2, batch: 23296, sum loss: 4613.783203, avg loss: 2.537835, ppl: 12.652245 +epoch: 2, batch: 23297, sum loss: 4658.629395, avg loss: 2.702221, ppl: 14.912822 +epoch: 2, batch: 23298, sum loss: 3429.109619, avg loss: 2.132531, ppl: 8.436190 +epoch: 2, batch: 23299, sum loss: 4004.502686, avg loss: 2.416718, ppl: 11.209016 +epoch: 2, batch: 23300, sum loss: 4552.455078, avg loss: 2.585153, ppl: 13.265323 +epoch: 2, batch: 23301, sum loss: 5396.990234, avg loss: 2.727130, ppl: 15.288943 +epoch: 2, batch: 23302, sum loss: 4967.196289, avg loss: 2.750386, ppl: 15.648664 +epoch: 2, batch: 23303, sum loss: 3941.179688, avg loss: 2.488118, ppl: 12.038603 +epoch: 2, batch: 23304, sum loss: 3754.793213, avg loss: 2.335070, ppl: 10.330187 +epoch: 2, batch: 23305, sum loss: 4814.070312, avg loss: 2.472558, ppl: 11.852728 +epoch: 2, batch: 23306, sum loss: 4712.862305, avg loss: 2.686923, ppl: 14.686410 +epoch: 2, batch: 23307, sum loss: 4440.377930, avg loss: 2.569663, ppl: 13.061426 +epoch: 2, batch: 23308, sum loss: 4506.014160, avg loss: 2.456932, ppl: 11.668960 +epoch: 2, batch: 23309, sum loss: 4658.097168, avg loss: 2.560801, ppl: 12.946183 +epoch: 2, batch: 23310, sum loss: 3439.184082, avg loss: 2.066818, ppl: 7.899642 +epoch: 2, batch: 23311, sum loss: 4206.911133, avg loss: 2.387577, ppl: 10.887086 +epoch: 2, batch: 23312, sum loss: 4262.125488, avg loss: 2.409342, ppl: 11.126636 +epoch: 2, batch: 23313, sum loss: 4204.054688, avg loss: 2.540214, ppl: 12.682388 +epoch: 2, batch: 23314, sum loss: 3901.199219, avg loss: 2.448964, ppl: 11.576344 +epoch: 2, batch: 23315, sum loss: 3808.691895, avg loss: 2.489341, ppl: 12.053333 +epoch: 2, batch: 23316, sum loss: 4335.578125, avg loss: 2.566950, ppl: 13.026032 +epoch: 2, batch: 23317, sum loss: 3958.962891, avg loss: 2.418426, ppl: 11.228170 +epoch: 2, batch: 23318, sum loss: 3975.984375, avg loss: 2.461910, ppl: 11.727186 +epoch: 2, batch: 23319, sum loss: 4180.661621, avg loss: 2.489971, ppl: 12.060928 +epoch: 2, batch: 23320, sum loss: 3378.298828, avg loss: 2.485871, ppl: 12.011579 +epoch: 2, batch: 23321, sum loss: 4154.056152, avg loss: 2.481515, ppl: 11.959372 +epoch: 2, batch: 23322, sum loss: 4319.948242, avg loss: 2.450339, ppl: 11.592279 +epoch: 2, batch: 23323, sum loss: 3953.652344, avg loss: 2.413707, ppl: 11.175311 +epoch: 2, batch: 23324, sum loss: 4049.281738, avg loss: 2.307283, ppl: 10.047089 +epoch: 2, batch: 23325, sum loss: 4093.360352, avg loss: 2.603919, ppl: 13.516603 +epoch: 2, batch: 23326, sum loss: 4449.073730, avg loss: 2.439185, ppl: 11.463696 +epoch: 2, batch: 23327, sum loss: 4791.035156, avg loss: 2.663166, ppl: 14.341620 +epoch: 2, batch: 23328, sum loss: 4162.146973, avg loss: 2.367547, ppl: 10.671179 +epoch: 2, batch: 23329, sum loss: 4502.638184, avg loss: 2.688142, ppl: 14.704334 +epoch: 2, batch: 23330, sum loss: 5049.612305, avg loss: 2.621813, ppl: 13.760653 +epoch: 2, batch: 23331, sum loss: 3710.962158, avg loss: 2.357663, ppl: 10.566234 +epoch: 2, batch: 23332, sum loss: 4119.554688, avg loss: 2.455039, ppl: 11.646882 +epoch: 2, batch: 23333, sum loss: 3542.342773, avg loss: 2.209821, ppl: 9.114082 +epoch: 2, batch: 23334, sum loss: 4910.805664, avg loss: 2.561714, ppl: 12.958011 +epoch: 2, batch: 23335, sum loss: 4496.604492, avg loss: 2.654430, ppl: 14.216879 +epoch: 2, batch: 23336, sum loss: 4735.152832, avg loss: 2.538956, ppl: 12.666439 +epoch: 2, batch: 23337, sum loss: 4737.654297, avg loss: 2.691849, ppl: 14.758940 +epoch: 2, batch: 23338, sum loss: 4455.080078, avg loss: 2.402956, ppl: 11.055806 +epoch: 2, batch: 23339, sum loss: 4117.719727, avg loss: 2.597930, ppl: 13.435903 +epoch: 2, batch: 23340, sum loss: 3820.918457, avg loss: 2.317112, ppl: 10.146334 +epoch: 2, batch: 23341, sum loss: 4519.464844, avg loss: 2.677408, ppl: 14.547340 +epoch: 2, batch: 23342, sum loss: 4235.026367, avg loss: 2.645238, ppl: 14.086803 +epoch: 2, batch: 23343, sum loss: 4284.838379, avg loss: 2.604765, ppl: 13.528045 +epoch: 2, batch: 23344, sum loss: 4126.264648, avg loss: 2.454649, ppl: 11.642344 +epoch: 2, batch: 23345, sum loss: 4188.798340, avg loss: 2.588874, ppl: 13.314772 +epoch: 2, batch: 23346, sum loss: 5478.223633, avg loss: 2.860691, ppl: 17.473598 +epoch: 2, batch: 23347, sum loss: 5291.067383, avg loss: 2.666868, ppl: 14.394820 +epoch: 2, batch: 23348, sum loss: 4300.090332, avg loss: 2.370502, ppl: 10.702764 +epoch: 2, batch: 23349, sum loss: 3561.551514, avg loss: 2.454550, ppl: 11.641189 +epoch: 2, batch: 23350, sum loss: 4198.009766, avg loss: 2.494361, ppl: 12.113992 +epoch: 2, batch: 23351, sum loss: 3851.454346, avg loss: 2.337048, ppl: 10.350632 +epoch: 2, batch: 23352, sum loss: 4219.158203, avg loss: 2.599605, ppl: 13.458416 +epoch: 2, batch: 23353, sum loss: 3883.390869, avg loss: 2.333769, ppl: 10.316748 +epoch: 2, batch: 23354, sum loss: 4982.915527, avg loss: 2.734860, ppl: 15.407593 +epoch: 2, batch: 23355, sum loss: 4169.900391, avg loss: 2.513502, ppl: 12.348102 +epoch: 2, batch: 23356, sum loss: 4446.005859, avg loss: 2.562539, ppl: 12.968707 +epoch: 2, batch: 23357, sum loss: 4047.219482, avg loss: 2.319324, ppl: 10.168793 +epoch: 2, batch: 23358, sum loss: 4771.361328, avg loss: 2.664077, ppl: 14.354688 +epoch: 2, batch: 23359, sum loss: 4759.531738, avg loss: 2.631029, ppl: 13.888059 +epoch: 2, batch: 23360, sum loss: 5079.256836, avg loss: 2.584863, ppl: 13.261477 +epoch: 2, batch: 23361, sum loss: 3754.705566, avg loss: 2.400707, ppl: 11.030970 +epoch: 2, batch: 23362, sum loss: 3058.187012, avg loss: 2.168927, ppl: 8.748891 +epoch: 2, batch: 23363, sum loss: 3557.742432, avg loss: 2.320771, ppl: 10.183527 +epoch: 2, batch: 23364, sum loss: 3952.271484, avg loss: 2.568078, ppl: 13.040730 +epoch: 2, batch: 23365, sum loss: 3983.019531, avg loss: 2.475463, ppl: 11.887206 +epoch: 2, batch: 23366, sum loss: 4047.393799, avg loss: 2.420690, ppl: 11.253625 +epoch: 2, batch: 23367, sum loss: 4581.452148, avg loss: 2.555188, ppl: 12.873722 +epoch: 2, batch: 23368, sum loss: 4048.142578, avg loss: 2.370107, ppl: 10.698534 +epoch: 2, batch: 23369, sum loss: 4380.403809, avg loss: 2.654790, ppl: 14.222001 +epoch: 2, batch: 23370, sum loss: 3984.768799, avg loss: 2.353673, ppl: 10.524157 +epoch: 2, batch: 23371, sum loss: 3731.650146, avg loss: 2.456649, ppl: 11.665655 +epoch: 2, batch: 23372, sum loss: 4676.577148, avg loss: 2.494174, ppl: 12.111731 +epoch: 2, batch: 23373, sum loss: 4236.810059, avg loss: 2.572441, ppl: 13.097753 +epoch: 2, batch: 23374, sum loss: 4614.159180, avg loss: 2.503613, ppl: 12.226592 +epoch: 2, batch: 23375, sum loss: 3520.529297, avg loss: 2.149285, ppl: 8.578723 +epoch: 2, batch: 23376, sum loss: 5277.419922, avg loss: 2.823660, ppl: 16.838360 +epoch: 2, batch: 23377, sum loss: 4421.659668, avg loss: 2.818139, ppl: 16.745651 +epoch: 2, batch: 23378, sum loss: 4117.731445, avg loss: 2.458347, ppl: 11.685483 +epoch: 2, batch: 23379, sum loss: 5088.096680, avg loss: 2.666717, ppl: 14.392645 +epoch: 2, batch: 23380, sum loss: 4631.819336, avg loss: 2.677352, ppl: 14.546526 +epoch: 2, batch: 23381, sum loss: 4825.455078, avg loss: 2.737070, ppl: 15.441680 +epoch: 2, batch: 23382, sum loss: 3464.628906, avg loss: 2.194192, ppl: 8.972745 +epoch: 2, batch: 23383, sum loss: 4537.760254, avg loss: 2.581206, ppl: 13.213064 +epoch: 2, batch: 23384, sum loss: 3729.444336, avg loss: 2.375442, ppl: 10.755769 +epoch: 2, batch: 23385, sum loss: 3799.289062, avg loss: 2.329423, ppl: 10.272015 +epoch: 2, batch: 23386, sum loss: 4265.303711, avg loss: 2.862620, ppl: 17.507338 +epoch: 2, batch: 23387, sum loss: 3522.457520, avg loss: 2.345178, ppl: 10.435131 +epoch: 2, batch: 23388, sum loss: 3949.816406, avg loss: 2.362330, ppl: 10.615662 +epoch: 2, batch: 23389, sum loss: 4193.733887, avg loss: 2.348115, ppl: 10.465828 +epoch: 2, batch: 23390, sum loss: 4481.592285, avg loss: 2.568248, ppl: 13.042948 +epoch: 2, batch: 23391, sum loss: 4384.846680, avg loss: 2.448267, ppl: 11.568284 +epoch: 2, batch: 23392, sum loss: 4217.989258, avg loss: 2.293632, ppl: 9.910869 +epoch: 2, batch: 23393, sum loss: 3640.260498, avg loss: 2.366879, ppl: 10.664062 +epoch: 2, batch: 23394, sum loss: 4105.403320, avg loss: 2.260684, ppl: 9.589651 +epoch: 2, batch: 23395, sum loss: 3764.178467, avg loss: 2.345283, ppl: 10.436221 +epoch: 2, batch: 23396, sum loss: 3737.885742, avg loss: 2.352351, ppl: 10.510250 +epoch: 2, batch: 23397, sum loss: 3963.705566, avg loss: 2.687258, ppl: 14.691337 +epoch: 2, batch: 23398, sum loss: 3851.167480, avg loss: 2.434366, ppl: 11.408586 +epoch: 2, batch: 23399, sum loss: 3981.491943, avg loss: 2.442633, ppl: 11.503291 +epoch: 2, batch: 23400, sum loss: 4896.478516, avg loss: 2.586623, ppl: 13.284839 +epoch: 2, batch: 23401, sum loss: 3092.868896, avg loss: 2.210771, ppl: 9.122750 +epoch: 2, batch: 23402, sum loss: 3550.927734, avg loss: 2.438824, ppl: 11.459556 +epoch: 2, batch: 23403, sum loss: 3176.894531, avg loss: 2.190962, ppl: 8.943811 +epoch: 2, batch: 23404, sum loss: 4075.036621, avg loss: 2.637564, ppl: 13.979112 +epoch: 2, batch: 23405, sum loss: 3932.076172, avg loss: 2.564955, ppl: 13.000076 +epoch: 2, batch: 23406, sum loss: 3770.942383, avg loss: 2.223433, ppl: 9.238994 +epoch: 2, batch: 23407, sum loss: 4603.200684, avg loss: 2.489562, ppl: 12.055998 +epoch: 2, batch: 23408, sum loss: 4167.295410, avg loss: 2.316451, ppl: 10.139626 +epoch: 2, batch: 23409, sum loss: 4998.406738, avg loss: 2.769200, ppl: 15.945877 +epoch: 2, batch: 23410, sum loss: 5029.090820, avg loss: 2.746636, ppl: 15.590104 +epoch: 2, batch: 23411, sum loss: 4147.675293, avg loss: 2.707360, ppl: 14.989648 +epoch: 2, batch: 23412, sum loss: 4569.155273, avg loss: 2.345562, ppl: 10.439140 +epoch: 2, batch: 23413, sum loss: 4177.211426, avg loss: 2.413178, ppl: 11.169403 +epoch: 2, batch: 23414, sum loss: 3916.201660, avg loss: 2.284832, ppl: 9.824034 +epoch: 2, batch: 23415, sum loss: 4975.982422, avg loss: 2.668087, ppl: 14.412372 +epoch: 2, batch: 23416, sum loss: 4225.899902, avg loss: 2.578340, ppl: 13.175253 +epoch: 2, batch: 23417, sum loss: 3722.662109, avg loss: 2.407932, ppl: 11.110955 +epoch: 2, batch: 23418, sum loss: 5394.468262, avg loss: 2.812549, ppl: 16.652306 +epoch: 2, batch: 23419, sum loss: 3650.382080, avg loss: 2.090712, ppl: 8.090671 +epoch: 2, batch: 23420, sum loss: 3779.438721, avg loss: 2.432071, ppl: 11.382433 +epoch: 2, batch: 23421, sum loss: 4155.472168, avg loss: 2.630046, ppl: 13.874403 +epoch: 2, batch: 23422, sum loss: 4184.729004, avg loss: 2.684239, ppl: 14.647057 +epoch: 2, batch: 23423, sum loss: 3894.691162, avg loss: 2.277597, ppl: 9.753217 +epoch: 2, batch: 23424, sum loss: 4955.397949, avg loss: 2.476461, ppl: 11.899076 +epoch: 2, batch: 23425, sum loss: 4259.711426, avg loss: 2.505713, ppl: 12.252289 +epoch: 2, batch: 23426, sum loss: 4250.562500, avg loss: 2.336758, ppl: 10.347634 +epoch: 2, batch: 23427, sum loss: 4323.873047, avg loss: 2.472197, ppl: 11.848453 +epoch: 2, batch: 23428, sum loss: 5136.081543, avg loss: 2.946691, ppl: 19.042828 +epoch: 2, batch: 23429, sum loss: 4884.037109, avg loss: 2.798875, ppl: 16.426159 +epoch: 2, batch: 23430, sum loss: 4167.850098, avg loss: 2.312902, ppl: 10.103707 +epoch: 2, batch: 23431, sum loss: 4147.999023, avg loss: 2.507859, ppl: 12.278616 +epoch: 2, batch: 23432, sum loss: 4482.362793, avg loss: 2.668073, ppl: 14.412172 +epoch: 2, batch: 23433, sum loss: 4523.134766, avg loss: 2.413626, ppl: 11.174403 +epoch: 2, batch: 23434, sum loss: 4840.913086, avg loss: 2.724205, ppl: 15.244297 +epoch: 2, batch: 23435, sum loss: 2993.585693, avg loss: 2.206032, ppl: 9.079619 +epoch: 2, batch: 23436, sum loss: 4642.346191, avg loss: 2.624277, ppl: 13.794599 +epoch: 2, batch: 23437, sum loss: 4444.923340, avg loss: 2.551621, ppl: 12.827877 +epoch: 2, batch: 23438, sum loss: 3898.565918, avg loss: 2.469009, ppl: 11.810741 +epoch: 2, batch: 23439, sum loss: 4189.043457, avg loss: 2.478724, ppl: 11.926037 +epoch: 2, batch: 23440, sum loss: 4618.524902, avg loss: 2.680514, ppl: 14.592587 +epoch: 2, batch: 23441, sum loss: 4379.302246, avg loss: 2.489655, ppl: 12.057110 +epoch: 2, batch: 23442, sum loss: 3872.259766, avg loss: 2.313178, ppl: 10.106493 +epoch: 2, batch: 23443, sum loss: 4097.226562, avg loss: 2.471186, ppl: 11.836478 +epoch: 2, batch: 23444, sum loss: 5343.291504, avg loss: 2.592572, ppl: 13.364106 +epoch: 2, batch: 23445, sum loss: 3742.283691, avg loss: 2.205235, ppl: 9.072383 +epoch: 2, batch: 23446, sum loss: 4112.641113, avg loss: 2.629566, ppl: 13.867749 +epoch: 2, batch: 23447, sum loss: 4976.922852, avg loss: 2.843956, ppl: 17.183609 +epoch: 2, batch: 23448, sum loss: 3873.930176, avg loss: 2.584343, ppl: 13.254580 +epoch: 2, batch: 23449, sum loss: 4867.318848, avg loss: 2.782915, ppl: 16.166082 +epoch: 2, batch: 23450, sum loss: 5017.918457, avg loss: 2.652177, ppl: 14.184883 +epoch: 2, batch: 23451, sum loss: 3310.738770, avg loss: 2.166714, ppl: 8.729551 +epoch: 2, batch: 23452, sum loss: 3715.259277, avg loss: 2.551689, ppl: 12.828755 +epoch: 2, batch: 23453, sum loss: 3645.520508, avg loss: 2.316087, ppl: 10.135930 +epoch: 2, batch: 23454, sum loss: 4584.899414, avg loss: 2.510898, ppl: 12.315984 +epoch: 2, batch: 23455, sum loss: 4336.303711, avg loss: 2.393104, ppl: 10.947417 +epoch: 2, batch: 23456, sum loss: 3749.619629, avg loss: 2.566475, ppl: 13.019847 +epoch: 2, batch: 23457, sum loss: 4611.469238, avg loss: 2.636632, ppl: 13.966086 +epoch: 2, batch: 23458, sum loss: 4094.001953, avg loss: 2.479711, ppl: 11.937809 +epoch: 2, batch: 23459, sum loss: 3321.223145, avg loss: 2.338890, ppl: 10.369716 +epoch: 2, batch: 23460, sum loss: 5036.592773, avg loss: 2.460475, ppl: 11.710375 +epoch: 2, batch: 23461, sum loss: 4061.734375, avg loss: 2.314378, ppl: 10.118632 +epoch: 2, batch: 23462, sum loss: 4378.781250, avg loss: 2.486531, ppl: 12.019508 +epoch: 2, batch: 23463, sum loss: 4892.713379, avg loss: 2.713651, ppl: 15.084254 +epoch: 2, batch: 23464, sum loss: 4669.388672, avg loss: 2.586919, ppl: 13.288764 +epoch: 2, batch: 23465, sum loss: 4082.461670, avg loss: 2.649229, ppl: 14.143127 +epoch: 2, batch: 23466, sum loss: 4473.625000, avg loss: 2.396157, ppl: 10.980893 +epoch: 2, batch: 23467, sum loss: 4309.169922, avg loss: 2.466611, ppl: 11.782450 +epoch: 2, batch: 23468, sum loss: 5101.164551, avg loss: 2.732279, ppl: 15.367867 +epoch: 2, batch: 23469, sum loss: 4168.995117, avg loss: 2.346086, ppl: 10.444610 +epoch: 2, batch: 23470, sum loss: 5600.441406, avg loss: 2.735926, ppl: 15.424026 +epoch: 2, batch: 23471, sum loss: 4751.392578, avg loss: 2.690483, ppl: 14.738788 +epoch: 2, batch: 23472, sum loss: 4687.712402, avg loss: 2.706531, ppl: 14.977234 +epoch: 2, batch: 23473, sum loss: 3647.151855, avg loss: 2.525728, ppl: 12.499998 +epoch: 2, batch: 23474, sum loss: 2939.288818, avg loss: 2.342063, ppl: 10.402675 +epoch: 2, batch: 23475, sum loss: 4072.654541, avg loss: 2.434342, ppl: 11.408314 +epoch: 2, batch: 23476, sum loss: 3709.487549, avg loss: 2.111262, ppl: 8.258657 +epoch: 2, batch: 23477, sum loss: 3351.882812, avg loss: 2.233100, ppl: 9.328740 +epoch: 2, batch: 23478, sum loss: 4092.345459, avg loss: 2.394585, ppl: 10.963646 +epoch: 2, batch: 23479, sum loss: 4108.326172, avg loss: 2.595279, ppl: 13.400319 +epoch: 2, batch: 23480, sum loss: 4134.643066, avg loss: 2.297024, ppl: 9.944544 +epoch: 2, batch: 23481, sum loss: 2981.686035, avg loss: 2.131298, ppl: 8.425797 +epoch: 2, batch: 23482, sum loss: 4178.313477, avg loss: 2.683567, ppl: 14.637208 +epoch: 2, batch: 23483, sum loss: 4057.060547, avg loss: 2.444012, ppl: 11.519168 +epoch: 2, batch: 23484, sum loss: 4596.904785, avg loss: 2.576741, ppl: 13.154192 +epoch: 2, batch: 23485, sum loss: 4870.256348, avg loss: 2.719294, ppl: 15.169614 +epoch: 2, batch: 23486, sum loss: 3683.703857, avg loss: 2.279520, ppl: 9.771987 +epoch: 2, batch: 23487, sum loss: 4302.798340, avg loss: 2.689249, ppl: 14.720613 +epoch: 2, batch: 23488, sum loss: 4449.772949, avg loss: 2.582573, ppl: 13.231137 +epoch: 2, batch: 23489, sum loss: 3193.974121, avg loss: 2.078057, ppl: 7.988934 +epoch: 2, batch: 23490, sum loss: 3968.550781, avg loss: 2.513332, ppl: 12.345994 +epoch: 2, batch: 23491, sum loss: 4283.378906, avg loss: 2.443456, ppl: 11.512765 +epoch: 2, batch: 23492, sum loss: 4758.208984, avg loss: 2.404350, ppl: 11.071235 +epoch: 2, batch: 23493, sum loss: 3498.972656, avg loss: 2.483302, ppl: 11.980761 +epoch: 2, batch: 23494, sum loss: 4133.874023, avg loss: 2.427407, ppl: 11.329467 +epoch: 2, batch: 23495, sum loss: 4553.716309, avg loss: 2.559705, ppl: 12.932008 +epoch: 2, batch: 23496, sum loss: 4592.487305, avg loss: 2.660769, ppl: 14.307287 +epoch: 2, batch: 23497, sum loss: 3850.766602, avg loss: 2.346597, ppl: 10.449952 +epoch: 2, batch: 23498, sum loss: 3933.166992, avg loss: 2.414467, ppl: 11.183809 +epoch: 2, batch: 23499, sum loss: 4089.922852, avg loss: 2.444664, ppl: 11.526676 +epoch: 2, batch: 23500, sum loss: 4563.166016, avg loss: 2.681061, ppl: 14.600576 +epoch: 2, batch: 23501, sum loss: 3887.549316, avg loss: 2.225271, ppl: 9.255995 +epoch: 2, batch: 23502, sum loss: 3685.791504, avg loss: 2.444159, ppl: 11.520854 +epoch: 2, batch: 23503, sum loss: 4564.137207, avg loss: 2.647412, ppl: 14.117450 +epoch: 2, batch: 23504, sum loss: 4519.126953, avg loss: 2.424424, ppl: 11.295726 +epoch: 2, batch: 23505, sum loss: 3900.052246, avg loss: 2.290107, ppl: 9.875995 +epoch: 2, batch: 23506, sum loss: 3005.520996, avg loss: 2.246279, ppl: 9.452496 +epoch: 2, batch: 23507, sum loss: 4065.781738, avg loss: 2.404366, ppl: 11.071404 +epoch: 2, batch: 23508, sum loss: 5229.661621, avg loss: 2.892512, ppl: 18.038563 +epoch: 2, batch: 23509, sum loss: 4036.046387, avg loss: 2.229860, ppl: 9.298565 +epoch: 2, batch: 23510, sum loss: 4198.986816, avg loss: 2.527987, ppl: 12.528264 +epoch: 2, batch: 23511, sum loss: 3080.507080, avg loss: 2.229021, ppl: 9.290767 +epoch: 2, batch: 23512, sum loss: 3251.568604, avg loss: 2.247110, ppl: 9.460360 +epoch: 2, batch: 23513, sum loss: 5560.620605, avg loss: 2.867778, ppl: 17.597864 +epoch: 2, batch: 23514, sum loss: 4595.797852, avg loss: 2.448481, ppl: 11.570753 +epoch: 2, batch: 23515, sum loss: 4592.205078, avg loss: 2.449176, ppl: 11.578803 +epoch: 2, batch: 23516, sum loss: 4375.189941, avg loss: 2.427963, ppl: 11.335773 +epoch: 2, batch: 23517, sum loss: 3735.740967, avg loss: 2.352482, ppl: 10.511625 +epoch: 2, batch: 23518, sum loss: 5214.851562, avg loss: 2.707607, ppl: 14.993358 +epoch: 2, batch: 23519, sum loss: 4021.396484, avg loss: 2.385170, ppl: 10.860909 +epoch: 2, batch: 23520, sum loss: 3715.390137, avg loss: 2.458895, ppl: 11.691882 +epoch: 2, batch: 23521, sum loss: 4331.193848, avg loss: 2.456718, ppl: 11.666459 +epoch: 2, batch: 23522, sum loss: 4112.661133, avg loss: 2.406472, ppl: 11.094752 +epoch: 2, batch: 23523, sum loss: 4522.603516, avg loss: 2.893540, ppl: 18.057125 +epoch: 2, batch: 23524, sum loss: 4603.688477, avg loss: 2.353624, ppl: 10.523637 +epoch: 2, batch: 23525, sum loss: 4636.725098, avg loss: 2.556078, ppl: 12.885181 +epoch: 2, batch: 23526, sum loss: 4436.664551, avg loss: 2.519401, ppl: 12.421149 +epoch: 2, batch: 23527, sum loss: 4446.924805, avg loss: 2.580920, ppl: 13.209282 +epoch: 2, batch: 23528, sum loss: 4366.344238, avg loss: 2.300498, ppl: 9.979146 +epoch: 2, batch: 23529, sum loss: 3305.317383, avg loss: 2.088008, ppl: 8.068830 +epoch: 2, batch: 23530, sum loss: 3952.163086, avg loss: 2.280533, ppl: 9.781889 +epoch: 2, batch: 23531, sum loss: 4048.673096, avg loss: 2.533588, ppl: 12.598625 +epoch: 2, batch: 23532, sum loss: 4181.528809, avg loss: 2.582785, ppl: 13.233942 +epoch: 2, batch: 23533, sum loss: 4383.030762, avg loss: 2.440440, ppl: 11.478095 +epoch: 2, batch: 23534, sum loss: 3516.775146, avg loss: 2.318243, ppl: 10.157814 +epoch: 2, batch: 23535, sum loss: 3461.955566, avg loss: 2.458775, ppl: 11.690485 +epoch: 2, batch: 23536, sum loss: 5075.290039, avg loss: 2.664194, ppl: 14.356379 +epoch: 2, batch: 23537, sum loss: 4051.362061, avg loss: 2.717211, ppl: 15.138051 +epoch: 2, batch: 23538, sum loss: 4025.183350, avg loss: 2.526794, ppl: 12.513330 +epoch: 2, batch: 23539, sum loss: 4243.166016, avg loss: 2.504821, ppl: 12.241363 +epoch: 2, batch: 23540, sum loss: 5102.283203, avg loss: 2.752040, ppl: 15.674582 +epoch: 2, batch: 23541, sum loss: 4924.536133, avg loss: 2.570217, ppl: 13.068662 +epoch: 2, batch: 23542, sum loss: 4863.576172, avg loss: 2.419690, ppl: 11.242370 +epoch: 2, batch: 23543, sum loss: 4279.890625, avg loss: 2.482535, ppl: 11.971579 +epoch: 2, batch: 23544, sum loss: 3606.041016, avg loss: 2.242563, ppl: 9.417435 +epoch: 2, batch: 23545, sum loss: 3282.650879, avg loss: 2.155385, ppl: 8.631210 +epoch: 2, batch: 23546, sum loss: 3358.487061, avg loss: 2.205179, ppl: 9.071873 +epoch: 2, batch: 23547, sum loss: 4046.795410, avg loss: 2.505756, ppl: 12.252814 +epoch: 2, batch: 23548, sum loss: 3616.266602, avg loss: 2.414063, ppl: 11.179290 +epoch: 2, batch: 23549, sum loss: 4134.801758, avg loss: 2.292019, ppl: 9.894892 +epoch: 2, batch: 23550, sum loss: 4675.451172, avg loss: 2.653491, ppl: 14.203541 +epoch: 2, batch: 23551, sum loss: 3688.838867, avg loss: 2.740593, ppl: 15.496179 +epoch: 2, batch: 23552, sum loss: 4163.359375, avg loss: 2.307849, ppl: 10.052777 +epoch: 2, batch: 23553, sum loss: 4301.706543, avg loss: 2.500992, ppl: 12.194586 +epoch: 2, batch: 23554, sum loss: 4552.687500, avg loss: 2.559127, ppl: 12.924534 +epoch: 2, batch: 23555, sum loss: 4279.297852, avg loss: 2.501051, ppl: 12.195304 +epoch: 2, batch: 23556, sum loss: 4161.400391, avg loss: 2.322210, ppl: 10.198188 +epoch: 2, batch: 23557, sum loss: 4234.290039, avg loss: 2.480545, ppl: 11.947772 +epoch: 2, batch: 23558, sum loss: 4455.901855, avg loss: 2.520307, ppl: 12.432407 +epoch: 2, batch: 23559, sum loss: 4944.564453, avg loss: 2.766964, ppl: 15.910256 +epoch: 2, batch: 23560, sum loss: 4406.403809, avg loss: 2.578352, ppl: 13.175410 +epoch: 2, batch: 23561, sum loss: 4297.367188, avg loss: 2.296829, ppl: 9.942607 +epoch: 2, batch: 23562, sum loss: 4253.331543, avg loss: 2.545381, ppl: 12.748085 +epoch: 2, batch: 23563, sum loss: 5071.880859, avg loss: 2.558971, ppl: 12.922515 +epoch: 2, batch: 23564, sum loss: 4791.061035, avg loss: 2.412417, ppl: 11.160909 +epoch: 2, batch: 23565, sum loss: 3500.899414, avg loss: 2.472387, ppl: 11.850696 +epoch: 2, batch: 23566, sum loss: 3250.981689, avg loss: 2.267072, ppl: 9.651103 +epoch: 2, batch: 23567, sum loss: 4470.988281, avg loss: 2.303446, ppl: 10.008611 +epoch: 2, batch: 23568, sum loss: 4428.181152, avg loss: 2.507464, ppl: 12.273761 +epoch: 2, batch: 23569, sum loss: 3892.908447, avg loss: 2.423978, ppl: 11.290683 +epoch: 2, batch: 23570, sum loss: 4752.025391, avg loss: 2.648844, ppl: 14.137683 +epoch: 2, batch: 23571, sum loss: 4280.875977, avg loss: 2.577288, ppl: 13.161401 +epoch: 2, batch: 23572, sum loss: 4064.265381, avg loss: 2.262954, ppl: 9.611440 +epoch: 2, batch: 23573, sum loss: 4493.115234, avg loss: 2.470102, ppl: 11.823650 +epoch: 2, batch: 23574, sum loss: 3784.615479, avg loss: 2.206773, ppl: 9.086346 +epoch: 2, batch: 23575, sum loss: 4322.647461, avg loss: 2.577607, ppl: 13.165597 +epoch: 2, batch: 23576, sum loss: 3885.066895, avg loss: 2.371835, ppl: 10.717045 +epoch: 2, batch: 23577, sum loss: 4669.332520, avg loss: 2.503664, ppl: 12.227207 +epoch: 2, batch: 23578, sum loss: 4030.428223, avg loss: 2.581953, ppl: 13.222935 +epoch: 2, batch: 23579, sum loss: 4159.889160, avg loss: 2.483516, ppl: 11.983323 +epoch: 2, batch: 23580, sum loss: 4516.353516, avg loss: 2.509085, ppl: 12.293681 +epoch: 2, batch: 23581, sum loss: 3909.882324, avg loss: 2.197798, ppl: 9.005160 +epoch: 2, batch: 23582, sum loss: 3699.771484, avg loss: 2.257334, ppl: 9.557579 +epoch: 2, batch: 23583, sum loss: 4131.364258, avg loss: 2.487275, ppl: 12.028459 +epoch: 2, batch: 23584, sum loss: 4218.516602, avg loss: 2.491740, ppl: 12.082287 +epoch: 2, batch: 23585, sum loss: 3375.979736, avg loss: 2.299714, ppl: 9.971327 +epoch: 2, batch: 23586, sum loss: 4289.662598, avg loss: 2.373914, ppl: 10.739342 +epoch: 2, batch: 23587, sum loss: 4415.343750, avg loss: 2.659846, ppl: 14.294082 +epoch: 2, batch: 23588, sum loss: 3988.160645, avg loss: 2.301304, ppl: 9.987201 +epoch: 2, batch: 23589, sum loss: 3878.628662, avg loss: 2.470464, ppl: 11.827936 +epoch: 2, batch: 23590, sum loss: 4342.818359, avg loss: 2.446658, ppl: 11.549685 +epoch: 2, batch: 23591, sum loss: 5094.496094, avg loss: 2.612562, ppl: 13.633936 +epoch: 2, batch: 23592, sum loss: 4353.061035, avg loss: 2.399703, ppl: 11.019903 +epoch: 2, batch: 23593, sum loss: 3622.645020, avg loss: 2.501827, ppl: 12.204769 +epoch: 2, batch: 23594, sum loss: 4053.441406, avg loss: 2.382976, ppl: 10.837102 +epoch: 2, batch: 23595, sum loss: 3916.264648, avg loss: 2.318688, ppl: 10.162336 +epoch: 2, batch: 23596, sum loss: 4738.226562, avg loss: 2.596288, ppl: 13.413859 +epoch: 2, batch: 23597, sum loss: 3506.663574, avg loss: 2.337776, ppl: 10.358171 +epoch: 2, batch: 23598, sum loss: 3714.700439, avg loss: 2.269212, ppl: 9.671779 +epoch: 2, batch: 23599, sum loss: 4507.821777, avg loss: 2.577371, ppl: 13.162487 +epoch: 2, batch: 23600, sum loss: 3646.699219, avg loss: 2.462322, ppl: 11.732024 +epoch: 2, batch: 23601, sum loss: 4352.840820, avg loss: 2.566534, ppl: 13.020611 +epoch: 2, batch: 23602, sum loss: 4375.920410, avg loss: 2.574071, ppl: 13.119123 +epoch: 2, batch: 23603, sum loss: 3530.030029, avg loss: 2.326981, ppl: 10.246958 +epoch: 2, batch: 23604, sum loss: 3906.770996, avg loss: 2.413077, ppl: 11.168269 +epoch: 2, batch: 23605, sum loss: 4455.596680, avg loss: 2.557748, ppl: 12.906716 +epoch: 2, batch: 23606, sum loss: 3467.150879, avg loss: 2.177859, ppl: 8.827383 +epoch: 2, batch: 23607, sum loss: 4585.833496, avg loss: 2.528023, ppl: 12.528712 +epoch: 2, batch: 23608, sum loss: 4153.379395, avg loss: 2.600739, ppl: 13.473685 +epoch: 2, batch: 23609, sum loss: 4600.376953, avg loss: 2.587389, ppl: 13.295010 +epoch: 2, batch: 23610, sum loss: 5095.518066, avg loss: 2.532564, ppl: 12.585733 +epoch: 2, batch: 23611, sum loss: 4008.604004, avg loss: 2.397491, ppl: 10.995548 +epoch: 2, batch: 23612, sum loss: 4714.882812, avg loss: 2.497290, ppl: 12.149520 +epoch: 2, batch: 23613, sum loss: 4160.803711, avg loss: 2.607020, ppl: 13.558584 +epoch: 2, batch: 23614, sum loss: 3613.900635, avg loss: 2.219841, ppl: 9.205863 +epoch: 2, batch: 23615, sum loss: 3772.502441, avg loss: 2.340262, ppl: 10.383956 +epoch: 2, batch: 23616, sum loss: 4622.094727, avg loss: 2.632173, ppl: 13.903945 +epoch: 2, batch: 23617, sum loss: 4912.403809, avg loss: 2.791138, ppl: 16.299564 +epoch: 2, batch: 23618, sum loss: 3973.682617, avg loss: 2.393785, ppl: 10.954877 +epoch: 2, batch: 23619, sum loss: 4511.760254, avg loss: 2.425678, ppl: 11.309890 +epoch: 2, batch: 23620, sum loss: 5504.629395, avg loss: 2.951544, ppl: 19.135473 +epoch: 2, batch: 23621, sum loss: 4137.159668, avg loss: 2.621774, ppl: 13.760115 +epoch: 2, batch: 23622, sum loss: 3469.004395, avg loss: 2.243858, ppl: 9.429640 +epoch: 2, batch: 23623, sum loss: 4111.289062, avg loss: 2.421254, ppl: 11.259970 +epoch: 2, batch: 23624, sum loss: 4613.571289, avg loss: 2.643880, ppl: 14.067686 +epoch: 2, batch: 23625, sum loss: 3662.506348, avg loss: 2.334293, ppl: 10.322164 +epoch: 2, batch: 23626, sum loss: 4488.998047, avg loss: 2.591800, ppl: 13.353789 +epoch: 2, batch: 23627, sum loss: 4654.644531, avg loss: 2.513307, ppl: 12.345688 +epoch: 2, batch: 23628, sum loss: 3971.804199, avg loss: 2.490160, ppl: 12.063200 +epoch: 2, batch: 23629, sum loss: 4217.590820, avg loss: 2.567006, ppl: 13.026762 +epoch: 2, batch: 23630, sum loss: 4125.228027, avg loss: 2.448206, ppl: 11.567581 +epoch: 2, batch: 23631, sum loss: 4137.728516, avg loss: 2.380741, ppl: 10.812916 +epoch: 2, batch: 23632, sum loss: 4481.810547, avg loss: 2.751265, ppl: 15.662430 +epoch: 2, batch: 23633, sum loss: 4394.741211, avg loss: 2.394954, ppl: 10.967698 +epoch: 2, batch: 23634, sum loss: 4462.100586, avg loss: 2.421107, ppl: 11.258319 +epoch: 2, batch: 23635, sum loss: 4198.397461, avg loss: 2.374659, ppl: 10.747351 +epoch: 2, batch: 23636, sum loss: 4769.199219, avg loss: 2.494351, ppl: 12.113868 +epoch: 2, batch: 23637, sum loss: 5474.233398, avg loss: 2.882693, ppl: 17.862309 +epoch: 2, batch: 23638, sum loss: 4801.858887, avg loss: 2.448679, ppl: 11.573046 +epoch: 2, batch: 23639, sum loss: 4009.540283, avg loss: 2.504398, ppl: 12.236186 +epoch: 2, batch: 23640, sum loss: 3896.638672, avg loss: 2.334715, ppl: 10.326513 +epoch: 2, batch: 23641, sum loss: 3620.676758, avg loss: 2.375772, ppl: 10.759318 +epoch: 2, batch: 23642, sum loss: 4027.503662, avg loss: 2.436481, ppl: 11.432741 +epoch: 2, batch: 23643, sum loss: 5021.445801, avg loss: 2.476058, ppl: 11.894284 +epoch: 2, batch: 23644, sum loss: 5028.476562, avg loss: 2.464939, ppl: 11.762769 +epoch: 2, batch: 23645, sum loss: 4640.712402, avg loss: 2.575312, ppl: 13.135417 +epoch: 2, batch: 23646, sum loss: 4355.933594, avg loss: 2.715669, ppl: 15.114724 +epoch: 2, batch: 23647, sum loss: 4835.172363, avg loss: 2.717916, ppl: 15.148716 +epoch: 2, batch: 23648, sum loss: 4021.157715, avg loss: 2.370966, ppl: 10.707728 +epoch: 2, batch: 23649, sum loss: 5106.894531, avg loss: 2.633778, ppl: 13.926279 +epoch: 2, batch: 23650, sum loss: 4719.073730, avg loss: 2.519527, ppl: 12.422719 +epoch: 2, batch: 23651, sum loss: 4463.623047, avg loss: 2.428522, ppl: 11.342102 +epoch: 2, batch: 23652, sum loss: 4497.784668, avg loss: 2.386093, ppl: 10.870934 +epoch: 2, batch: 23653, sum loss: 3043.992432, avg loss: 2.183639, ppl: 8.878555 +epoch: 2, batch: 23654, sum loss: 3494.453613, avg loss: 2.278001, ppl: 9.757157 +epoch: 2, batch: 23655, sum loss: 3408.111816, avg loss: 2.230440, ppl: 9.303956 +epoch: 2, batch: 23656, sum loss: 4224.986328, avg loss: 2.370924, ppl: 10.707281 +epoch: 2, batch: 23657, sum loss: 4624.384766, avg loss: 2.459779, ppl: 11.702226 +epoch: 2, batch: 23658, sum loss: 4162.476074, avg loss: 2.496986, ppl: 12.145833 +epoch: 2, batch: 23659, sum loss: 3789.643799, avg loss: 2.327791, ppl: 10.255262 +epoch: 2, batch: 23660, sum loss: 3097.222168, avg loss: 2.144891, ppl: 8.541106 +epoch: 2, batch: 23661, sum loss: 3764.665283, avg loss: 2.318144, ppl: 10.156802 +epoch: 2, batch: 23662, sum loss: 3708.926025, avg loss: 2.174048, ppl: 8.793809 +epoch: 2, batch: 23663, sum loss: 5329.395996, avg loss: 2.702533, ppl: 14.917477 +epoch: 2, batch: 23664, sum loss: 3985.937012, avg loss: 2.484998, ppl: 12.001099 +epoch: 2, batch: 23665, sum loss: 4871.752930, avg loss: 2.927736, ppl: 18.685284 +epoch: 2, batch: 23666, sum loss: 3295.470459, avg loss: 2.254084, ppl: 9.526562 +epoch: 2, batch: 23667, sum loss: 3883.789551, avg loss: 2.169715, ppl: 8.755786 +epoch: 2, batch: 23668, sum loss: 4829.691406, avg loss: 2.524669, ppl: 12.486757 +epoch: 2, batch: 23669, sum loss: 3942.143311, avg loss: 2.436430, ppl: 11.432152 +epoch: 2, batch: 23670, sum loss: 3761.683594, avg loss: 2.251157, ppl: 9.498722 +epoch: 2, batch: 23671, sum loss: 5245.020508, avg loss: 2.477572, ppl: 11.912309 +epoch: 2, batch: 23672, sum loss: 4100.869141, avg loss: 2.214292, ppl: 9.154925 +epoch: 2, batch: 23673, sum loss: 4089.152344, avg loss: 2.416757, ppl: 11.209444 +epoch: 2, batch: 23674, sum loss: 4282.719727, avg loss: 2.492852, ppl: 12.095724 +epoch: 2, batch: 23675, sum loss: 4109.881348, avg loss: 2.493860, ppl: 12.107923 +epoch: 2, batch: 23676, sum loss: 4529.834473, avg loss: 2.592922, ppl: 13.368774 +epoch: 2, batch: 23677, sum loss: 3969.812500, avg loss: 2.523721, ppl: 12.474923 +epoch: 2, batch: 23678, sum loss: 3673.091309, avg loss: 2.270143, ppl: 9.680785 +epoch: 2, batch: 23679, sum loss: 3688.104980, avg loss: 2.259868, ppl: 9.581828 +epoch: 2, batch: 23680, sum loss: 4139.157715, avg loss: 2.548742, ppl: 12.791006 +epoch: 2, batch: 23681, sum loss: 4200.799316, avg loss: 2.392255, ppl: 10.938129 +epoch: 2, batch: 23682, sum loss: 3946.051270, avg loss: 2.515010, ppl: 12.366736 +epoch: 2, batch: 23683, sum loss: 3919.575195, avg loss: 2.423980, ppl: 11.290705 +epoch: 2, batch: 23684, sum loss: 3888.453125, avg loss: 2.277946, ppl: 9.756615 +epoch: 2, batch: 23685, sum loss: 4679.638672, avg loss: 2.439853, ppl: 11.471359 +epoch: 2, batch: 23686, sum loss: 4343.813965, avg loss: 2.475108, ppl: 11.882986 +epoch: 2, batch: 23687, sum loss: 4301.142578, avg loss: 2.551093, ppl: 12.821108 +epoch: 2, batch: 23688, sum loss: 4806.190430, avg loss: 2.561935, ppl: 12.960875 +epoch: 2, batch: 23689, sum loss: 4125.468750, avg loss: 2.486720, ppl: 12.021781 +epoch: 2, batch: 23690, sum loss: 4710.690430, avg loss: 2.860164, ppl: 17.464394 +epoch: 2, batch: 23691, sum loss: 4794.185547, avg loss: 2.750537, ppl: 15.651030 +epoch: 2, batch: 23692, sum loss: 4097.678223, avg loss: 2.543562, ppl: 12.724916 +epoch: 2, batch: 23693, sum loss: 4499.856445, avg loss: 2.645418, ppl: 14.089335 +epoch: 2, batch: 23694, sum loss: 3358.417969, avg loss: 2.279985, ppl: 9.776536 +epoch: 2, batch: 23695, sum loss: 3559.473877, avg loss: 2.414840, ppl: 11.187977 +epoch: 2, batch: 23696, sum loss: 4362.847656, avg loss: 2.589227, ppl: 13.319471 +epoch: 2, batch: 23697, sum loss: 5397.686523, avg loss: 2.805450, ppl: 16.534521 +epoch: 2, batch: 23698, sum loss: 3392.867920, avg loss: 2.252900, ppl: 9.515294 +epoch: 2, batch: 23699, sum loss: 3698.287109, avg loss: 2.263334, ppl: 9.615088 +epoch: 2, batch: 23700, sum loss: 4177.560547, avg loss: 2.381733, ppl: 10.823648 +epoch: 2, batch: 23701, sum loss: 4057.510986, avg loss: 2.465074, ppl: 11.764348 +epoch: 2, batch: 23702, sum loss: 4434.735352, avg loss: 2.456917, ppl: 11.668781 +epoch: 2, batch: 23703, sum loss: 4049.560059, avg loss: 2.577696, ppl: 13.166765 +epoch: 2, batch: 23704, sum loss: 4489.150391, avg loss: 2.445071, ppl: 11.531368 +epoch: 2, batch: 23705, sum loss: 4887.687012, avg loss: 2.769228, ppl: 15.946315 +epoch: 2, batch: 23706, sum loss: 3634.723145, avg loss: 2.229892, ppl: 9.298858 +epoch: 2, batch: 23707, sum loss: 4045.756592, avg loss: 2.557368, ppl: 12.901815 +epoch: 2, batch: 23708, sum loss: 4419.285645, avg loss: 2.581358, ppl: 13.215077 +epoch: 2, batch: 23709, sum loss: 4757.567383, avg loss: 2.644562, ppl: 14.077281 +epoch: 2, batch: 23710, sum loss: 4419.617188, avg loss: 2.477364, ppl: 11.909829 +epoch: 2, batch: 23711, sum loss: 4921.892578, avg loss: 2.702852, ppl: 14.922222 +epoch: 2, batch: 23712, sum loss: 4725.307129, avg loss: 2.582135, ppl: 13.225347 +epoch: 2, batch: 23713, sum loss: 3630.520020, avg loss: 2.197651, ppl: 9.003842 +epoch: 2, batch: 23714, sum loss: 3683.343994, avg loss: 2.536738, ppl: 12.638382 +epoch: 2, batch: 23715, sum loss: 3666.390625, avg loss: 2.423259, ppl: 11.282570 +epoch: 2, batch: 23716, sum loss: 3907.532959, avg loss: 2.361047, ppl: 10.602049 +epoch: 2, batch: 23717, sum loss: 4750.853027, avg loss: 2.669019, ppl: 14.425803 +epoch: 2, batch: 23718, sum loss: 3453.686035, avg loss: 2.325714, ppl: 10.233988 +epoch: 2, batch: 23719, sum loss: 4189.685547, avg loss: 2.448677, ppl: 11.573021 +epoch: 2, batch: 23720, sum loss: 3922.732666, avg loss: 2.276688, ppl: 9.744349 +epoch: 2, batch: 23721, sum loss: 4921.043945, avg loss: 2.667233, ppl: 14.400062 +epoch: 2, batch: 23722, sum loss: 4367.334473, avg loss: 2.382616, ppl: 10.833200 +epoch: 2, batch: 23723, sum loss: 3362.959717, avg loss: 2.158511, ppl: 8.658234 +epoch: 2, batch: 23724, sum loss: 4946.616211, avg loss: 2.583089, ppl: 13.237972 +epoch: 2, batch: 23725, sum loss: 3957.913574, avg loss: 2.419263, ppl: 11.237570 +epoch: 2, batch: 23726, sum loss: 3612.445068, avg loss: 2.115015, ppl: 8.289706 +epoch: 2, batch: 23727, sum loss: 3659.496338, avg loss: 2.481014, ppl: 11.953384 +epoch: 2, batch: 23728, sum loss: 4085.984131, avg loss: 2.403520, ppl: 11.062048 +epoch: 2, batch: 23729, sum loss: 4155.726074, avg loss: 2.473647, ppl: 11.865638 +epoch: 2, batch: 23730, sum loss: 3749.368652, avg loss: 2.324469, ppl: 10.221252 +epoch: 2, batch: 23731, sum loss: 3705.131104, avg loss: 2.465157, ppl: 11.765329 +epoch: 2, batch: 23732, sum loss: 3928.429688, avg loss: 2.629471, ppl: 13.866433 +epoch: 2, batch: 23733, sum loss: 4802.374023, avg loss: 2.492151, ppl: 12.087242 +epoch: 2, batch: 23734, sum loss: 5014.975586, avg loss: 2.807937, ppl: 16.575689 +epoch: 2, batch: 23735, sum loss: 3778.466064, avg loss: 2.295544, ppl: 9.929840 +epoch: 2, batch: 23736, sum loss: 3917.878174, avg loss: 2.438008, ppl: 11.450205 +epoch: 2, batch: 23737, sum loss: 4047.736328, avg loss: 2.312992, ppl: 10.104613 +epoch: 2, batch: 23738, sum loss: 3620.097412, avg loss: 2.395829, ppl: 10.977291 +epoch: 2, batch: 23739, sum loss: 4299.376953, avg loss: 2.232283, ppl: 9.321123 +epoch: 2, batch: 23740, sum loss: 4828.049316, avg loss: 2.626795, ppl: 13.829376 +epoch: 2, batch: 23741, sum loss: 4059.332031, avg loss: 2.442438, ppl: 11.501047 +epoch: 2, batch: 23742, sum loss: 5124.322266, avg loss: 2.820210, ppl: 16.780382 +epoch: 2, batch: 23743, sum loss: 4360.195312, avg loss: 2.455065, ppl: 11.647191 +epoch: 2, batch: 23744, sum loss: 4351.732910, avg loss: 2.570427, ppl: 13.071407 +epoch: 2, batch: 23745, sum loss: 3742.218506, avg loss: 2.369993, ppl: 10.697314 +epoch: 2, batch: 23746, sum loss: 3192.216553, avg loss: 2.109859, ppl: 8.247078 +epoch: 2, batch: 23747, sum loss: 4279.699219, avg loss: 2.555044, ppl: 12.871871 +epoch: 2, batch: 23748, sum loss: 4778.724609, avg loss: 2.779945, ppl: 16.118130 +epoch: 2, batch: 23749, sum loss: 3727.997559, avg loss: 2.455861, ppl: 11.656469 +epoch: 2, batch: 23750, sum loss: 4065.052246, avg loss: 2.426897, ppl: 11.323690 +epoch: 2, batch: 23751, sum loss: 3769.844971, avg loss: 2.344431, ppl: 10.427337 +epoch: 2, batch: 23752, sum loss: 4399.648438, avg loss: 2.413411, ppl: 11.172006 +epoch: 2, batch: 23753, sum loss: 3865.670898, avg loss: 2.769105, ppl: 15.944365 +epoch: 2, batch: 23754, sum loss: 3964.635010, avg loss: 2.421891, ppl: 11.267142 +epoch: 2, batch: 23755, sum loss: 3810.817871, avg loss: 2.644565, ppl: 14.077318 +epoch: 2, batch: 23756, sum loss: 4427.365234, avg loss: 2.816390, ppl: 16.716396 +epoch: 2, batch: 23757, sum loss: 3825.480713, avg loss: 2.304507, ppl: 10.019233 +epoch: 2, batch: 23758, sum loss: 3852.336914, avg loss: 2.436646, ppl: 11.434622 +epoch: 2, batch: 23759, sum loss: 3829.956543, avg loss: 2.470940, ppl: 11.833564 +epoch: 2, batch: 23760, sum loss: 4472.758789, avg loss: 2.679904, ppl: 14.583686 +epoch: 2, batch: 23761, sum loss: 4008.484131, avg loss: 2.597851, ppl: 13.434836 +epoch: 2, batch: 23762, sum loss: 3407.069092, avg loss: 2.309877, ppl: 10.073190 +epoch: 2, batch: 23763, sum loss: 3618.541016, avg loss: 2.485262, ppl: 12.004261 +epoch: 2, batch: 23764, sum loss: 4053.148926, avg loss: 2.545948, ppl: 12.755312 +epoch: 2, batch: 23765, sum loss: 5146.403320, avg loss: 2.608415, ppl: 13.577518 +epoch: 2, batch: 23766, sum loss: 4584.327148, avg loss: 2.642263, ppl: 14.044957 +epoch: 2, batch: 23767, sum loss: 4687.382812, avg loss: 2.737957, ppl: 15.455381 +epoch: 2, batch: 23768, sum loss: 4460.915039, avg loss: 2.389349, ppl: 10.906394 +epoch: 2, batch: 23769, sum loss: 3903.236084, avg loss: 2.279927, ppl: 9.775971 +epoch: 2, batch: 23770, sum loss: 4737.443359, avg loss: 2.441981, ppl: 11.495795 +epoch: 2, batch: 23771, sum loss: 4833.238281, avg loss: 2.546490, ppl: 12.762235 +epoch: 2, batch: 23772, sum loss: 4971.672852, avg loss: 2.746781, ppl: 15.592354 +epoch: 2, batch: 23773, sum loss: 3973.279785, avg loss: 2.232180, ppl: 9.320159 +epoch: 2, batch: 23774, sum loss: 4363.779297, avg loss: 2.569952, ppl: 13.065204 +epoch: 2, batch: 23775, sum loss: 5385.946289, avg loss: 2.809571, ppl: 16.602787 +epoch: 2, batch: 23776, sum loss: 4651.805176, avg loss: 2.526782, ppl: 12.513171 +epoch: 2, batch: 23777, sum loss: 4504.109863, avg loss: 2.476146, ppl: 11.895334 +epoch: 2, batch: 23778, sum loss: 3905.805176, avg loss: 2.348650, ppl: 10.471426 +epoch: 2, batch: 23779, sum loss: 4334.871094, avg loss: 2.495608, ppl: 12.129107 +epoch: 2, batch: 23780, sum loss: 3605.220947, avg loss: 2.135794, ppl: 8.463767 +epoch: 2, batch: 23781, sum loss: 4084.375977, avg loss: 2.331265, ppl: 10.290951 +epoch: 2, batch: 23782, sum loss: 4080.713623, avg loss: 2.566487, ppl: 13.020000 +epoch: 2, batch: 23783, sum loss: 4662.380859, avg loss: 2.501277, ppl: 12.198064 +epoch: 2, batch: 23784, sum loss: 4055.958496, avg loss: 2.325664, ppl: 10.233476 +epoch: 2, batch: 23785, sum loss: 5094.167969, avg loss: 2.718339, ppl: 15.155131 +epoch: 2, batch: 23786, sum loss: 4684.444336, avg loss: 2.652573, ppl: 14.190505 +epoch: 2, batch: 23787, sum loss: 3859.196289, avg loss: 2.397016, ppl: 10.990335 +epoch: 2, batch: 23788, sum loss: 4915.181641, avg loss: 2.736738, ppl: 15.436552 +epoch: 2, batch: 23789, sum loss: 4986.101562, avg loss: 2.689375, ppl: 14.722474 +epoch: 2, batch: 23790, sum loss: 4277.968750, avg loss: 2.652182, ppl: 14.184951 +epoch: 2, batch: 23791, sum loss: 3917.291016, avg loss: 2.515922, ppl: 12.378020 +epoch: 2, batch: 23792, sum loss: 3381.500732, avg loss: 2.336905, ppl: 10.349152 +epoch: 2, batch: 23793, sum loss: 4396.499023, avg loss: 2.441143, ppl: 11.486165 +epoch: 2, batch: 23794, sum loss: 4340.430664, avg loss: 2.694246, ppl: 14.794364 +epoch: 2, batch: 23795, sum loss: 4639.179199, avg loss: 2.632905, ppl: 13.914139 +epoch: 2, batch: 23796, sum loss: 4131.017578, avg loss: 2.499103, ppl: 12.171572 +epoch: 2, batch: 23797, sum loss: 4148.340820, avg loss: 2.512623, ppl: 12.337252 +epoch: 2, batch: 23798, sum loss: 4646.509766, avg loss: 2.591472, ppl: 13.349413 +epoch: 2, batch: 23799, sum loss: 4959.157227, avg loss: 2.531474, ppl: 12.572022 +epoch: 2, batch: 23800, sum loss: 3914.255371, avg loss: 2.502721, ppl: 12.215686 +epoch: 2, batch: 23801, sum loss: 4088.857910, avg loss: 2.472103, ppl: 11.847334 +epoch: 2, batch: 23802, sum loss: 4866.458984, avg loss: 2.630518, ppl: 13.880964 +epoch: 2, batch: 23803, sum loss: 4979.409180, avg loss: 2.650032, ppl: 14.154486 +epoch: 2, batch: 23804, sum loss: 4517.833008, avg loss: 2.496041, ppl: 12.134354 +epoch: 2, batch: 23805, sum loss: 4744.839355, avg loss: 2.541424, ppl: 12.697743 +epoch: 2, batch: 23806, sum loss: 4546.087402, avg loss: 2.768628, ppl: 15.936760 +epoch: 2, batch: 23807, sum loss: 4181.038574, avg loss: 2.554086, ppl: 12.859540 +epoch: 2, batch: 23808, sum loss: 4535.124023, avg loss: 2.741913, ppl: 15.516642 +epoch: 2, batch: 23809, sum loss: 4574.307617, avg loss: 2.591676, ppl: 13.352128 +epoch: 2, batch: 23810, sum loss: 3870.100342, avg loss: 2.337017, ppl: 10.350316 +epoch: 2, batch: 23811, sum loss: 4412.683105, avg loss: 2.803483, ppl: 16.502031 +epoch: 2, batch: 23812, sum loss: 5020.658691, avg loss: 2.667725, ppl: 14.407157 +epoch: 2, batch: 23813, sum loss: 4302.886230, avg loss: 2.447603, ppl: 11.560602 +epoch: 2, batch: 23814, sum loss: 5347.808594, avg loss: 2.756603, ppl: 15.746255 +epoch: 2, batch: 23815, sum loss: 4003.513428, avg loss: 2.296910, ppl: 9.943405 +epoch: 2, batch: 23816, sum loss: 3634.347656, avg loss: 2.349288, ppl: 10.478102 +epoch: 2, batch: 23817, sum loss: 3459.152832, avg loss: 2.272768, ppl: 9.706231 +epoch: 2, batch: 23818, sum loss: 3983.441406, avg loss: 2.307904, ppl: 10.053326 +epoch: 2, batch: 23819, sum loss: 3992.947266, avg loss: 2.395289, ppl: 10.971371 +epoch: 2, batch: 23820, sum loss: 4211.730469, avg loss: 2.383549, ppl: 10.843315 +epoch: 2, batch: 23821, sum loss: 4032.153320, avg loss: 2.561724, ppl: 12.958134 +epoch: 2, batch: 23822, sum loss: 3730.691650, avg loss: 2.386879, ppl: 10.879486 +epoch: 2, batch: 23823, sum loss: 4632.504883, avg loss: 2.457562, ppl: 11.676312 +epoch: 2, batch: 23824, sum loss: 3916.188477, avg loss: 2.446089, ppl: 11.543114 +epoch: 2, batch: 23825, sum loss: 4094.621094, avg loss: 2.474091, ppl: 11.870915 +epoch: 2, batch: 23826, sum loss: 5110.500977, avg loss: 2.594163, ppl: 13.385378 +epoch: 2, batch: 23827, sum loss: 4236.511719, avg loss: 2.654456, ppl: 14.217248 +epoch: 2, batch: 23828, sum loss: 4130.001465, avg loss: 2.618898, ppl: 13.720591 +epoch: 2, batch: 23829, sum loss: 3395.785400, avg loss: 2.393084, ppl: 10.947199 +epoch: 2, batch: 23830, sum loss: 4094.254150, avg loss: 2.377616, ppl: 10.779171 +epoch: 2, batch: 23831, sum loss: 4116.002930, avg loss: 2.435505, ppl: 11.421581 +epoch: 2, batch: 23832, sum loss: 3659.042969, avg loss: 2.206902, ppl: 9.087518 +epoch: 2, batch: 23833, sum loss: 3968.809326, avg loss: 2.482057, ppl: 11.965854 +epoch: 2, batch: 23834, sum loss: 4504.665527, avg loss: 2.660759, ppl: 14.307151 +epoch: 2, batch: 23835, sum loss: 4188.008789, avg loss: 2.676044, ppl: 14.527509 +epoch: 2, batch: 23836, sum loss: 4656.660156, avg loss: 2.587034, ppl: 13.290288 +epoch: 2, batch: 23837, sum loss: 5269.456055, avg loss: 2.670784, ppl: 14.451291 +epoch: 2, batch: 23838, sum loss: 3960.107666, avg loss: 2.350212, ppl: 10.487797 +epoch: 2, batch: 23839, sum loss: 4369.885742, avg loss: 2.616698, ppl: 13.690443 +epoch: 2, batch: 23840, sum loss: 3867.372559, avg loss: 2.405082, ppl: 11.079344 +epoch: 2, batch: 23841, sum loss: 4913.723633, avg loss: 2.768295, ppl: 15.931445 +epoch: 2, batch: 23842, sum loss: 3606.798340, avg loss: 2.328469, ppl: 10.262216 +epoch: 2, batch: 23843, sum loss: 3901.520752, avg loss: 2.311327, ppl: 10.087807 +epoch: 2, batch: 23844, sum loss: 3891.614746, avg loss: 2.499431, ppl: 12.175569 +epoch: 2, batch: 23845, sum loss: 4252.020020, avg loss: 2.498249, ppl: 12.161181 +epoch: 2, batch: 23846, sum loss: 4495.641113, avg loss: 2.432706, ppl: 11.389662 +epoch: 2, batch: 23847, sum loss: 3608.267334, avg loss: 2.178905, ppl: 8.836627 +epoch: 2, batch: 23848, sum loss: 4681.473145, avg loss: 2.555389, ppl: 12.876313 +epoch: 2, batch: 23849, sum loss: 5701.408203, avg loss: 2.771710, ppl: 15.985950 +epoch: 2, batch: 23850, sum loss: 4533.377441, avg loss: 2.506013, ppl: 12.255970 +epoch: 2, batch: 23851, sum loss: 3876.198242, avg loss: 2.500773, ppl: 12.191917 +epoch: 2, batch: 23852, sum loss: 4405.424805, avg loss: 2.663497, ppl: 14.346377 +epoch: 2, batch: 23853, sum loss: 5055.159180, avg loss: 2.635641, ppl: 13.952251 +epoch: 2, batch: 23854, sum loss: 4223.728516, avg loss: 2.586484, ppl: 13.282986 +epoch: 2, batch: 23855, sum loss: 4234.925293, avg loss: 2.485285, ppl: 12.004539 +epoch: 2, batch: 23856, sum loss: 3655.891113, avg loss: 2.476891, ppl: 11.904197 +epoch: 2, batch: 23857, sum loss: 4180.940430, avg loss: 2.484219, ppl: 11.991749 +epoch: 2, batch: 23858, sum loss: 4553.412109, avg loss: 2.669057, ppl: 14.426364 +epoch: 2, batch: 23859, sum loss: 3398.130859, avg loss: 2.317961, ppl: 10.154947 +epoch: 2, batch: 23860, sum loss: 4243.656738, avg loss: 2.441690, ppl: 11.492443 +epoch: 2, batch: 23861, sum loss: 3755.904541, avg loss: 2.314174, ppl: 10.116563 +epoch: 2, batch: 23862, sum loss: 4374.892578, avg loss: 2.558417, ppl: 12.915351 +epoch: 2, batch: 23863, sum loss: 4507.760254, avg loss: 2.380021, ppl: 10.805134 +epoch: 2, batch: 23864, sum loss: 4182.545410, avg loss: 2.278075, ppl: 9.757875 +epoch: 2, batch: 23865, sum loss: 3764.454102, avg loss: 2.325172, ppl: 10.228443 +epoch: 2, batch: 23866, sum loss: 3481.947998, avg loss: 2.275783, ppl: 9.735539 +epoch: 2, batch: 23867, sum loss: 3608.672607, avg loss: 2.466625, ppl: 11.782616 +epoch: 2, batch: 23868, sum loss: 3919.638672, avg loss: 2.391482, ppl: 10.929676 +epoch: 2, batch: 23869, sum loss: 3915.665283, avg loss: 2.281856, ppl: 9.794846 +epoch: 2, batch: 23870, sum loss: 3244.575195, avg loss: 2.016517, ppl: 7.512112 +epoch: 2, batch: 23871, sum loss: 4179.480469, avg loss: 2.528421, ppl: 12.533702 +epoch: 2, batch: 23872, sum loss: 4421.951172, avg loss: 2.535522, ppl: 12.623024 +epoch: 2, batch: 23873, sum loss: 3616.781982, avg loss: 2.502963, ppl: 12.218648 +epoch: 2, batch: 23874, sum loss: 4503.381836, avg loss: 2.510246, ppl: 12.307961 +epoch: 2, batch: 23875, sum loss: 4070.883789, avg loss: 2.440578, ppl: 11.479674 +epoch: 2, batch: 23876, sum loss: 4071.133789, avg loss: 2.579933, ppl: 13.196250 +epoch: 2, batch: 23877, sum loss: 4625.366699, avg loss: 2.791410, ppl: 16.303995 +epoch: 2, batch: 23878, sum loss: 4282.379883, avg loss: 2.468230, ppl: 11.801545 +epoch: 2, batch: 23879, sum loss: 3498.427734, avg loss: 2.267290, ppl: 9.653202 +epoch: 2, batch: 23880, sum loss: 3860.585449, avg loss: 2.383078, ppl: 10.838207 +epoch: 2, batch: 23881, sum loss: 3616.969727, avg loss: 2.160675, ppl: 8.676993 +epoch: 2, batch: 23882, sum loss: 4137.925293, avg loss: 2.369946, ppl: 10.696812 +epoch: 2, batch: 23883, sum loss: 4684.755371, avg loss: 2.574042, ppl: 13.118738 +epoch: 2, batch: 23884, sum loss: 4768.842773, avg loss: 2.644949, ppl: 14.082726 +epoch: 2, batch: 23885, sum loss: 4250.740234, avg loss: 2.661703, ppl: 14.320662 +epoch: 2, batch: 23886, sum loss: 4191.950195, avg loss: 2.476049, ppl: 11.894174 +epoch: 2, batch: 23887, sum loss: 4858.715820, avg loss: 2.644919, ppl: 14.082303 +epoch: 2, batch: 23888, sum loss: 3866.870117, avg loss: 2.336478, ppl: 10.344733 +epoch: 2, batch: 23889, sum loss: 4387.029297, avg loss: 2.649172, ppl: 14.142328 +epoch: 2, batch: 23890, sum loss: 3585.291504, avg loss: 2.254900, ppl: 9.534342 +epoch: 2, batch: 23891, sum loss: 3977.396240, avg loss: 2.417870, ppl: 11.221931 +epoch: 2, batch: 23892, sum loss: 5352.594727, avg loss: 2.839573, ppl: 17.108458 +epoch: 2, batch: 23893, sum loss: 3818.765625, avg loss: 2.271723, ppl: 9.696089 +epoch: 2, batch: 23894, sum loss: 4320.152344, avg loss: 2.608788, ppl: 13.582573 +epoch: 2, batch: 23895, sum loss: 4475.178223, avg loss: 2.663797, ppl: 14.350671 +epoch: 2, batch: 23896, sum loss: 4992.238770, avg loss: 2.810945, ppl: 16.625626 +epoch: 2, batch: 23897, sum loss: 4211.278320, avg loss: 2.585192, ppl: 13.265839 +epoch: 2, batch: 23898, sum loss: 5526.212891, avg loss: 2.704950, ppl: 14.953570 +epoch: 2, batch: 23899, sum loss: 4839.648926, avg loss: 2.757635, ppl: 15.762515 +epoch: 2, batch: 23900, sum loss: 4000.319092, avg loss: 2.427378, ppl: 11.329140 +epoch: 2, batch: 23901, sum loss: 3837.734863, avg loss: 2.285727, ppl: 9.832828 +epoch: 2, batch: 23902, sum loss: 3345.664795, avg loss: 2.341263, ppl: 10.394357 +epoch: 2, batch: 23903, sum loss: 3348.873535, avg loss: 2.161958, ppl: 8.688136 +epoch: 2, batch: 23904, sum loss: 4113.218262, avg loss: 2.398378, ppl: 11.005310 +epoch: 2, batch: 23905, sum loss: 4148.498047, avg loss: 2.376001, ppl: 10.761785 +epoch: 2, batch: 23906, sum loss: 3227.784668, avg loss: 2.276294, ppl: 9.740515 +epoch: 2, batch: 23907, sum loss: 4277.229492, avg loss: 2.455356, ppl: 11.650577 +epoch: 2, batch: 23908, sum loss: 3927.780762, avg loss: 2.508161, ppl: 12.282326 +epoch: 2, batch: 23909, sum loss: 4242.921875, avg loss: 2.452556, ppl: 11.618003 +epoch: 2, batch: 23910, sum loss: 5198.343750, avg loss: 2.822119, ppl: 16.812447 +epoch: 2, batch: 23911, sum loss: 3714.962646, avg loss: 2.286131, ppl: 9.836804 +epoch: 2, batch: 23912, sum loss: 3925.559326, avg loss: 2.278328, ppl: 9.760349 +epoch: 2, batch: 23913, sum loss: 5239.232422, avg loss: 2.684033, ppl: 14.644036 +epoch: 2, batch: 23914, sum loss: 3763.482422, avg loss: 2.641040, ppl: 14.027786 +epoch: 2, batch: 23915, sum loss: 4392.009766, avg loss: 2.433246, ppl: 11.395817 +epoch: 2, batch: 23916, sum loss: 3980.462891, avg loss: 2.349742, ppl: 10.482864 +epoch: 2, batch: 23917, sum loss: 3990.233398, avg loss: 2.574344, ppl: 13.122708 +epoch: 2, batch: 23918, sum loss: 4126.644043, avg loss: 2.577542, ppl: 13.164734 +epoch: 2, batch: 23919, sum loss: 5114.891602, avg loss: 2.633827, ppl: 13.926966 +epoch: 2, batch: 23920, sum loss: 3901.224609, avg loss: 2.360087, ppl: 10.591877 +epoch: 2, batch: 23921, sum loss: 3404.252686, avg loss: 2.271016, ppl: 9.689239 +epoch: 2, batch: 23922, sum loss: 4309.482422, avg loss: 2.688386, ppl: 14.707914 +epoch: 2, batch: 23923, sum loss: 4684.556152, avg loss: 2.703148, ppl: 14.926652 +epoch: 2, batch: 23924, sum loss: 4027.160156, avg loss: 2.395693, ppl: 10.975802 +epoch: 2, batch: 23925, sum loss: 4044.283203, avg loss: 2.413057, ppl: 11.168048 +epoch: 2, batch: 23926, sum loss: 4298.111328, avg loss: 2.460281, ppl: 11.708102 +epoch: 2, batch: 23927, sum loss: 3471.546875, avg loss: 2.387584, ppl: 10.887156 +epoch: 2, batch: 23928, sum loss: 4258.170410, avg loss: 2.663021, ppl: 14.339541 +epoch: 2, batch: 23929, sum loss: 3104.043213, avg loss: 2.319913, ppl: 10.174786 +epoch: 2, batch: 23930, sum loss: 4334.380859, avg loss: 2.719185, ppl: 15.167953 +epoch: 2, batch: 23931, sum loss: 4973.386719, avg loss: 2.454781, ppl: 11.643887 +epoch: 2, batch: 23932, sum loss: 4510.242188, avg loss: 2.599563, ppl: 13.457861 +epoch: 2, batch: 23933, sum loss: 4505.194336, avg loss: 2.484939, ppl: 12.000389 +epoch: 2, batch: 23934, sum loss: 5082.179199, avg loss: 3.021510, ppl: 20.522249 +epoch: 2, batch: 23935, sum loss: 4640.097656, avg loss: 2.699301, ppl: 14.869331 +epoch: 2, batch: 23936, sum loss: 4633.338379, avg loss: 2.587012, ppl: 13.289999 +epoch: 2, batch: 23937, sum loss: 3778.891846, avg loss: 2.494318, ppl: 12.113472 +epoch: 2, batch: 23938, sum loss: 3723.044189, avg loss: 2.244150, ppl: 9.432391 +epoch: 2, batch: 23939, sum loss: 3997.061035, avg loss: 2.413684, ppl: 11.175056 +epoch: 2, batch: 23940, sum loss: 3762.030029, avg loss: 2.355686, ppl: 10.545358 +epoch: 2, batch: 23941, sum loss: 3781.957275, avg loss: 2.301861, ppl: 9.992760 +epoch: 2, batch: 23942, sum loss: 4770.852539, avg loss: 2.743446, ppl: 15.540447 +epoch: 2, batch: 23943, sum loss: 3958.043945, avg loss: 2.388681, ppl: 10.899105 +epoch: 2, batch: 23944, sum loss: 4707.137207, avg loss: 2.549912, ppl: 12.805973 +epoch: 2, batch: 23945, sum loss: 4161.644043, avg loss: 2.516109, ppl: 12.380331 +epoch: 2, batch: 23946, sum loss: 4102.198730, avg loss: 2.594686, ppl: 13.392382 +epoch: 2, batch: 23947, sum loss: 3374.173340, avg loss: 2.338305, ppl: 10.363650 +epoch: 2, batch: 23948, sum loss: 3857.188232, avg loss: 2.325008, ppl: 10.226761 +epoch: 2, batch: 23949, sum loss: 3754.422363, avg loss: 2.353870, ppl: 10.526227 +epoch: 2, batch: 23950, sum loss: 4283.410645, avg loss: 2.603897, ppl: 13.516307 +epoch: 2, batch: 23951, sum loss: 4595.986816, avg loss: 2.550492, ppl: 12.813410 +epoch: 2, batch: 23952, sum loss: 4303.012207, avg loss: 2.394553, ppl: 10.963301 +epoch: 2, batch: 23953, sum loss: 3639.017578, avg loss: 2.303176, ppl: 10.005908 +epoch: 2, batch: 23954, sum loss: 4340.576660, avg loss: 2.681023, ppl: 14.600027 +epoch: 2, batch: 23955, sum loss: 4457.046875, avg loss: 2.436877, ppl: 11.437261 +epoch: 2, batch: 23956, sum loss: 4267.205078, avg loss: 2.410850, ppl: 11.143433 +epoch: 2, batch: 23957, sum loss: 4432.156250, avg loss: 2.515412, ppl: 12.371709 +epoch: 2, batch: 23958, sum loss: 4355.889648, avg loss: 2.417253, ppl: 11.215010 +epoch: 2, batch: 23959, sum loss: 5067.992188, avg loss: 2.739455, ppl: 15.478551 +epoch: 2, batch: 23960, sum loss: 4791.517090, avg loss: 2.653110, ppl: 14.198130 +epoch: 2, batch: 23961, sum loss: 3699.414307, avg loss: 2.306368, ppl: 10.037902 +epoch: 2, batch: 23962, sum loss: 4790.112793, avg loss: 2.658220, ppl: 14.270868 +epoch: 2, batch: 23963, sum loss: 4343.932617, avg loss: 2.418671, ppl: 11.230923 +epoch: 2, batch: 23964, sum loss: 4123.419434, avg loss: 2.486984, ppl: 12.024954 +epoch: 2, batch: 23965, sum loss: 3822.620605, avg loss: 2.407192, ppl: 11.102740 +epoch: 2, batch: 23966, sum loss: 5056.009277, avg loss: 2.529269, ppl: 12.544338 +epoch: 2, batch: 23967, sum loss: 4948.452637, avg loss: 2.695235, ppl: 14.809006 +epoch: 2, batch: 23968, sum loss: 4709.222168, avg loss: 2.525052, ppl: 12.491546 +epoch: 2, batch: 23969, sum loss: 4684.447754, avg loss: 2.675299, ppl: 14.516685 +epoch: 2, batch: 23970, sum loss: 4547.142578, avg loss: 2.601340, ppl: 13.481795 +epoch: 2, batch: 23971, sum loss: 4385.583496, avg loss: 2.588892, ppl: 13.315013 +epoch: 2, batch: 23972, sum loss: 3861.721680, avg loss: 2.337604, ppl: 10.356394 +epoch: 2, batch: 23973, sum loss: 4368.578613, avg loss: 2.578854, ppl: 13.182021 +epoch: 2, batch: 23974, sum loss: 3721.249756, avg loss: 2.251210, ppl: 9.499221 +epoch: 2, batch: 23975, sum loss: 3890.567383, avg loss: 2.479648, ppl: 11.937060 +epoch: 2, batch: 23976, sum loss: 4263.551758, avg loss: 2.434924, ppl: 11.414952 +epoch: 2, batch: 23977, sum loss: 4508.954590, avg loss: 2.655450, ppl: 14.231394 +epoch: 2, batch: 23978, sum loss: 4122.209961, avg loss: 2.475802, ppl: 11.891239 +epoch: 2, batch: 23979, sum loss: 4792.809082, avg loss: 2.533197, ppl: 12.593706 +epoch: 2, batch: 23980, sum loss: 4228.986816, avg loss: 2.298362, ppl: 9.957861 +epoch: 2, batch: 23981, sum loss: 4601.589844, avg loss: 2.522801, ppl: 12.463463 +epoch: 2, batch: 23982, sum loss: 4193.286133, avg loss: 2.468091, ppl: 11.799896 +epoch: 2, batch: 23983, sum loss: 4165.205566, avg loss: 2.324334, ppl: 10.219868 +epoch: 2, batch: 23984, sum loss: 3679.435791, avg loss: 2.247670, ppl: 9.465657 +epoch: 2, batch: 23985, sum loss: 4531.668945, avg loss: 2.524607, ppl: 12.485984 +epoch: 2, batch: 23986, sum loss: 4371.031250, avg loss: 2.629983, ppl: 13.873530 +epoch: 2, batch: 23987, sum loss: 4094.588867, avg loss: 2.333099, ppl: 10.309844 +epoch: 2, batch: 23988, sum loss: 3205.224365, avg loss: 2.156948, ppl: 8.644711 +epoch: 2, batch: 23989, sum loss: 5253.640137, avg loss: 2.750597, ppl: 15.651974 +epoch: 2, batch: 23990, sum loss: 3800.638672, avg loss: 2.493857, ppl: 12.107891 +epoch: 2, batch: 23991, sum loss: 4089.654297, avg loss: 2.530727, ppl: 12.562632 +epoch: 2, batch: 23992, sum loss: 3835.171875, avg loss: 2.523139, ppl: 12.467677 +epoch: 2, batch: 23993, sum loss: 4730.137207, avg loss: 2.812210, ppl: 16.646664 +epoch: 2, batch: 23994, sum loss: 3835.653320, avg loss: 2.220992, ppl: 9.216470 +epoch: 2, batch: 23995, sum loss: 4113.370117, avg loss: 2.431070, ppl: 11.371041 +epoch: 2, batch: 23996, sum loss: 4653.035156, avg loss: 2.488254, ppl: 12.040236 +epoch: 2, batch: 23997, sum loss: 3841.610107, avg loss: 2.371364, ppl: 10.711997 +epoch: 2, batch: 23998, sum loss: 4561.318359, avg loss: 2.604979, ppl: 13.530945 +epoch: 2, batch: 23999, sum loss: 3930.428223, avg loss: 2.309300, ppl: 10.067372 +epoch: 2, batch: 24000, sum loss: 4529.901367, avg loss: 2.792788, ppl: 16.326468 +epoch: 2, batch: 24001, sum loss: 4731.589355, avg loss: 2.440222, ppl: 11.475583 +epoch: 2, batch: 24002, sum loss: 3074.023438, avg loss: 2.101178, ppl: 8.175797 +epoch: 2, batch: 24003, sum loss: 4032.424072, avg loss: 2.292453, ppl: 9.899186 +epoch: 2, batch: 24004, sum loss: 3861.009033, avg loss: 2.316142, ppl: 10.136493 +epoch: 2, batch: 24005, sum loss: 3334.714111, avg loss: 2.114594, ppl: 8.286219 +epoch: 2, batch: 24006, sum loss: 4166.926270, avg loss: 2.442513, ppl: 11.501904 +epoch: 2, batch: 24007, sum loss: 4647.824707, avg loss: 2.566441, ppl: 13.019410 +epoch: 2, batch: 24008, sum loss: 3996.865967, avg loss: 2.403407, ppl: 11.060798 +epoch: 2, batch: 24009, sum loss: 4260.025391, avg loss: 2.497084, ppl: 12.147020 +epoch: 2, batch: 24010, sum loss: 3794.143311, avg loss: 2.484704, ppl: 11.997572 +epoch: 2, batch: 24011, sum loss: 2969.090576, avg loss: 1.936785, ppl: 6.936411 +epoch: 2, batch: 24012, sum loss: 4058.739258, avg loss: 2.411610, ppl: 11.151897 +epoch: 2, batch: 24013, sum loss: 4522.186523, avg loss: 2.506755, ppl: 12.265070 +epoch: 2, batch: 24014, sum loss: 3728.113281, avg loss: 2.341780, ppl: 10.399729 +epoch: 2, batch: 24015, sum loss: 2895.817871, avg loss: 2.137135, ppl: 8.475121 +epoch: 2, batch: 24016, sum loss: 4987.913086, avg loss: 2.668760, ppl: 14.422079 +epoch: 2, batch: 24017, sum loss: 3191.027588, avg loss: 2.141629, ppl: 8.513296 +epoch: 2, batch: 24018, sum loss: 4013.910645, avg loss: 2.354200, ppl: 10.529700 +epoch: 2, batch: 24019, sum loss: 3280.442871, avg loss: 2.106900, ppl: 8.222709 +epoch: 2, batch: 24020, sum loss: 3816.357666, avg loss: 2.363070, ppl: 10.623513 +epoch: 2, batch: 24021, sum loss: 4223.278320, avg loss: 2.287800, ppl: 9.853235 +epoch: 2, batch: 24022, sum loss: 4223.634766, avg loss: 2.461326, ppl: 11.720338 +epoch: 2, batch: 24023, sum loss: 3876.292236, avg loss: 2.240631, ppl: 9.399263 +epoch: 2, batch: 24024, sum loss: 4088.087402, avg loss: 2.274951, ppl: 9.727447 +epoch: 2, batch: 24025, sum loss: 4881.194824, avg loss: 2.756180, ppl: 15.739603 +epoch: 2, batch: 24026, sum loss: 3654.978516, avg loss: 2.379543, ppl: 10.799970 +epoch: 2, batch: 24027, sum loss: 3856.230469, avg loss: 2.318840, ppl: 10.163876 +epoch: 2, batch: 24028, sum loss: 3702.282471, avg loss: 2.429319, ppl: 11.351151 +epoch: 2, batch: 24029, sum loss: 3870.624756, avg loss: 2.202974, ppl: 9.051890 +epoch: 2, batch: 24030, sum loss: 3642.816406, avg loss: 2.213133, ppl: 9.144320 +epoch: 2, batch: 24031, sum loss: 5485.952148, avg loss: 2.682617, ppl: 14.623315 +epoch: 2, batch: 24032, sum loss: 3915.748047, avg loss: 2.579544, ppl: 13.191126 +epoch: 2, batch: 24033, sum loss: 4310.310547, avg loss: 2.570251, ppl: 13.069101 +epoch: 2, batch: 24034, sum loss: 3948.665527, avg loss: 2.401865, ppl: 11.043751 +epoch: 2, batch: 24035, sum loss: 4908.331055, avg loss: 2.470222, ppl: 11.825068 +epoch: 2, batch: 24036, sum loss: 3859.506836, avg loss: 2.316631, ppl: 10.141446 +epoch: 2, batch: 24037, sum loss: 3911.610107, avg loss: 2.221244, ppl: 9.218788 +epoch: 2, batch: 24038, sum loss: 3991.523438, avg loss: 2.303245, ppl: 10.006599 +epoch: 2, batch: 24039, sum loss: 3874.593506, avg loss: 2.298098, ppl: 9.955233 +epoch: 2, batch: 24040, sum loss: 4448.213867, avg loss: 2.546201, ppl: 12.758545 +epoch: 2, batch: 24041, sum loss: 3610.972412, avg loss: 2.172667, ppl: 8.781673 +epoch: 2, batch: 24042, sum loss: 4782.450684, avg loss: 2.484390, ppl: 11.993799 +epoch: 2, batch: 24043, sum loss: 4005.256592, avg loss: 2.427428, ppl: 11.329707 +epoch: 2, batch: 24044, sum loss: 4109.352539, avg loss: 2.530390, ppl: 12.558397 +epoch: 2, batch: 24045, sum loss: 4182.840332, avg loss: 2.629064, ppl: 13.860785 +epoch: 2, batch: 24046, sum loss: 4809.060547, avg loss: 2.504719, ppl: 12.240122 +epoch: 2, batch: 24047, sum loss: 5020.668945, avg loss: 2.657845, ppl: 14.265511 +epoch: 2, batch: 24048, sum loss: 4144.942871, avg loss: 2.309160, ppl: 10.065970 +epoch: 2, batch: 24049, sum loss: 2943.975586, avg loss: 2.147320, ppl: 8.561882 +epoch: 2, batch: 24050, sum loss: 5790.738281, avg loss: 2.758808, ppl: 15.781023 +epoch: 2, batch: 24051, sum loss: 3862.384766, avg loss: 2.357988, ppl: 10.569665 +epoch: 2, batch: 24052, sum loss: 4048.785156, avg loss: 2.307000, ppl: 10.044246 +epoch: 2, batch: 24053, sum loss: 6019.881836, avg loss: 2.862521, ppl: 17.505606 +epoch: 2, batch: 24054, sum loss: 3918.147949, avg loss: 2.438175, ppl: 11.452127 +epoch: 2, batch: 24055, sum loss: 4411.842285, avg loss: 2.457851, ppl: 11.679684 +epoch: 2, batch: 24056, sum loss: 4329.937012, avg loss: 2.621027, ppl: 13.749844 +epoch: 2, batch: 24057, sum loss: 4523.899414, avg loss: 2.725241, ppl: 15.260087 +epoch: 2, batch: 24058, sum loss: 4354.180664, avg loss: 2.531500, ppl: 12.572355 +epoch: 2, batch: 24059, sum loss: 4946.669434, avg loss: 2.751206, ppl: 15.661514 +epoch: 2, batch: 24060, sum loss: 3789.006836, avg loss: 2.494409, ppl: 12.114567 +epoch: 2, batch: 24061, sum loss: 5170.139648, avg loss: 2.839176, ppl: 17.101671 +epoch: 2, batch: 24062, sum loss: 5174.312012, avg loss: 2.657582, ppl: 14.261763 +epoch: 2, batch: 24063, sum loss: 3811.110840, avg loss: 2.280737, ppl: 9.783885 +epoch: 2, batch: 24064, sum loss: 5178.930664, avg loss: 2.548686, ppl: 12.790290 +epoch: 2, batch: 24065, sum loss: 3630.323242, avg loss: 2.397836, ppl: 10.999345 +epoch: 2, batch: 24066, sum loss: 3772.973633, avg loss: 2.375928, ppl: 10.760997 +epoch: 2, batch: 24067, sum loss: 4316.437988, avg loss: 2.548074, ppl: 12.782464 +epoch: 2, batch: 24068, sum loss: 3813.567139, avg loss: 2.297330, ppl: 9.947583 +epoch: 2, batch: 24069, sum loss: 3393.981201, avg loss: 2.232882, ppl: 9.326712 +epoch: 2, batch: 24070, sum loss: 4781.879883, avg loss: 2.593210, ppl: 13.372635 +epoch: 2, batch: 24071, sum loss: 3536.468262, avg loss: 2.217221, ppl: 9.181784 +epoch: 2, batch: 24072, sum loss: 4324.840332, avg loss: 2.492703, ppl: 12.093927 +epoch: 2, batch: 24073, sum loss: 3640.464111, avg loss: 2.363938, ppl: 10.632737 +epoch: 2, batch: 24074, sum loss: 4300.653320, avg loss: 2.598582, ppl: 13.444664 +epoch: 2, batch: 24075, sum loss: 3680.586670, avg loss: 2.237439, ppl: 9.369303 +epoch: 2, batch: 24076, sum loss: 4874.814453, avg loss: 2.696247, ppl: 14.823991 +epoch: 2, batch: 24077, sum loss: 4058.350586, avg loss: 2.252137, ppl: 9.508032 +epoch: 2, batch: 24078, sum loss: 5071.466797, avg loss: 2.653829, ppl: 14.208336 +epoch: 2, batch: 24079, sum loss: 4518.635742, avg loss: 2.617981, ppl: 13.708022 +epoch: 2, batch: 24080, sum loss: 4392.188965, avg loss: 2.844682, ppl: 17.196093 +epoch: 2, batch: 24081, sum loss: 4299.813477, avg loss: 2.620240, ppl: 13.739017 +epoch: 2, batch: 24082, sum loss: 4282.639648, avg loss: 2.469804, ppl: 11.820125 +epoch: 2, batch: 24083, sum loss: 4430.953125, avg loss: 2.570158, ppl: 13.067895 +epoch: 2, batch: 24084, sum loss: 4221.407227, avg loss: 2.394445, ppl: 10.962117 +epoch: 2, batch: 24085, sum loss: 3888.570068, avg loss: 2.553231, ppl: 12.848544 +epoch: 2, batch: 24086, sum loss: 3959.942383, avg loss: 2.473418, ppl: 11.862927 +epoch: 2, batch: 24087, sum loss: 4690.016113, avg loss: 2.591169, ppl: 13.345368 +epoch: 2, batch: 24088, sum loss: 3155.670410, avg loss: 2.189917, ppl: 8.934472 +epoch: 2, batch: 24089, sum loss: 4421.821777, avg loss: 2.484169, ppl: 11.991158 +epoch: 2, batch: 24090, sum loss: 5250.906738, avg loss: 2.602035, ppl: 13.491165 +epoch: 2, batch: 24091, sum loss: 4564.642578, avg loss: 2.575984, ppl: 13.144238 +epoch: 2, batch: 24092, sum loss: 3959.951416, avg loss: 2.413133, ppl: 11.168898 +epoch: 2, batch: 24093, sum loss: 3562.499268, avg loss: 2.508802, ppl: 12.290203 +epoch: 2, batch: 24094, sum loss: 3769.144043, avg loss: 2.381013, ppl: 10.815855 +epoch: 2, batch: 24095, sum loss: 4076.236084, avg loss: 2.423446, ppl: 11.284678 +epoch: 2, batch: 24096, sum loss: 4125.131836, avg loss: 2.201244, ppl: 9.036251 +epoch: 2, batch: 24097, sum loss: 4054.424316, avg loss: 2.470703, ppl: 11.830766 +epoch: 2, batch: 24098, sum loss: 3741.179688, avg loss: 2.299434, ppl: 9.968540 +epoch: 2, batch: 24099, sum loss: 3447.258057, avg loss: 2.521769, ppl: 12.450603 +epoch: 2, batch: 24100, sum loss: 3874.632812, avg loss: 2.490124, ppl: 12.062772 +epoch: 2, batch: 24101, sum loss: 3917.730957, avg loss: 2.300488, ppl: 9.979051 +epoch: 2, batch: 24102, sum loss: 4778.387695, avg loss: 2.524241, ppl: 12.481415 +epoch: 2, batch: 24103, sum loss: 4425.752930, avg loss: 2.573112, ppl: 13.106552 +epoch: 2, batch: 24104, sum loss: 4381.570312, avg loss: 2.534164, ppl: 12.605893 +epoch: 2, batch: 24105, sum loss: 3083.870361, avg loss: 2.220209, ppl: 9.209254 +epoch: 2, batch: 24106, sum loss: 2748.093994, avg loss: 1.950386, ppl: 7.031402 +epoch: 2, batch: 24107, sum loss: 4587.831543, avg loss: 2.703496, ppl: 14.931835 +epoch: 2, batch: 24108, sum loss: 3151.577881, avg loss: 2.112318, ppl: 8.267382 +epoch: 2, batch: 24109, sum loss: 5107.060547, avg loss: 2.678060, ppl: 14.556827 +epoch: 2, batch: 24110, sum loss: 4933.303223, avg loss: 2.691382, ppl: 14.752051 +epoch: 2, batch: 24111, sum loss: 4290.640625, avg loss: 2.700214, ppl: 14.882922 +epoch: 2, batch: 24112, sum loss: 3669.747803, avg loss: 2.466228, ppl: 11.777940 +epoch: 2, batch: 24113, sum loss: 3947.435791, avg loss: 2.447263, ppl: 11.556679 +epoch: 2, batch: 24114, sum loss: 3927.867188, avg loss: 2.464158, ppl: 11.753576 +epoch: 2, batch: 24115, sum loss: 4094.290039, avg loss: 2.232437, ppl: 9.322558 +epoch: 2, batch: 24116, sum loss: 3805.992676, avg loss: 2.460241, ppl: 11.707634 +epoch: 2, batch: 24117, sum loss: 4572.673340, avg loss: 2.546032, ppl: 12.756385 +epoch: 2, batch: 24118, sum loss: 4669.731445, avg loss: 2.685297, ppl: 14.662556 +epoch: 2, batch: 24119, sum loss: 3358.801025, avg loss: 2.319614, ppl: 10.171749 +epoch: 2, batch: 24120, sum loss: 3664.294434, avg loss: 2.206077, ppl: 9.080029 +epoch: 2, batch: 24121, sum loss: 3980.231201, avg loss: 2.335816, ppl: 10.337896 +epoch: 2, batch: 24122, sum loss: 4253.032715, avg loss: 2.440065, ppl: 11.473783 +epoch: 2, batch: 24123, sum loss: 4773.321289, avg loss: 2.551214, ppl: 12.822660 +epoch: 2, batch: 24124, sum loss: 3661.895264, avg loss: 2.327969, ppl: 10.257087 +epoch: 2, batch: 24125, sum loss: 4328.082520, avg loss: 2.363781, ppl: 10.631071 +epoch: 2, batch: 24126, sum loss: 4637.214844, avg loss: 2.722968, ppl: 15.225446 +epoch: 2, batch: 24127, sum loss: 4548.339355, avg loss: 2.602025, ppl: 13.491030 +epoch: 2, batch: 24128, sum loss: 4236.271484, avg loss: 2.541255, ppl: 12.695588 +epoch: 2, batch: 24129, sum loss: 3810.026123, avg loss: 2.467634, ppl: 11.794505 +epoch: 2, batch: 24130, sum loss: 4200.402344, avg loss: 2.442094, ppl: 11.497094 +epoch: 2, batch: 24131, sum loss: 4734.589844, avg loss: 2.574546, ppl: 13.125359 +epoch: 2, batch: 24132, sum loss: 3907.732910, avg loss: 2.210256, ppl: 9.118053 +epoch: 2, batch: 24133, sum loss: 4265.399414, avg loss: 2.216944, ppl: 9.179232 +epoch: 2, batch: 24134, sum loss: 3901.965332, avg loss: 2.372015, ppl: 10.718972 +epoch: 2, batch: 24135, sum loss: 4200.023438, avg loss: 2.440455, ppl: 11.478265 +epoch: 2, batch: 24136, sum loss: 4730.986816, avg loss: 2.695719, ppl: 14.816168 +epoch: 2, batch: 24137, sum loss: 3805.555176, avg loss: 2.309196, ppl: 10.066330 +epoch: 2, batch: 24138, sum loss: 4274.593750, avg loss: 2.381389, ppl: 10.819925 +epoch: 2, batch: 24139, sum loss: 4250.173828, avg loss: 2.409396, ppl: 11.127234 +epoch: 2, batch: 24140, sum loss: 3612.748047, avg loss: 2.296725, ppl: 9.941568 +epoch: 2, batch: 24141, sum loss: 4338.911133, avg loss: 2.693303, ppl: 14.780416 +epoch: 2, batch: 24142, sum loss: 3760.422119, avg loss: 2.393649, ppl: 10.953388 +epoch: 2, batch: 24143, sum loss: 3912.557617, avg loss: 2.484164, ppl: 11.991086 +epoch: 2, batch: 24144, sum loss: 4569.671387, avg loss: 2.391246, ppl: 10.927101 +epoch: 2, batch: 24145, sum loss: 4133.458008, avg loss: 2.427163, ppl: 11.326698 +epoch: 2, batch: 24146, sum loss: 4138.398926, avg loss: 2.415878, ppl: 11.199597 +epoch: 2, batch: 24147, sum loss: 3734.956787, avg loss: 2.237841, ppl: 9.373075 +epoch: 2, batch: 24148, sum loss: 3753.688721, avg loss: 2.362296, ppl: 10.615300 +epoch: 2, batch: 24149, sum loss: 4623.502930, avg loss: 2.388173, ppl: 10.893572 +epoch: 2, batch: 24150, sum loss: 4887.315430, avg loss: 2.692736, ppl: 14.772036 +epoch: 2, batch: 24151, sum loss: 3349.209717, avg loss: 2.094565, ppl: 8.121908 +epoch: 2, batch: 24152, sum loss: 4675.334961, avg loss: 2.500179, ppl: 12.184678 +epoch: 2, batch: 24153, sum loss: 3178.640625, avg loss: 2.037590, ppl: 7.672099 +epoch: 2, batch: 24154, sum loss: 4409.716309, avg loss: 2.635814, ppl: 13.954663 +epoch: 2, batch: 24155, sum loss: 4768.840332, avg loss: 2.586139, ppl: 13.278404 +epoch: 2, batch: 24156, sum loss: 4571.253906, avg loss: 2.720984, ppl: 15.195274 +epoch: 2, batch: 24157, sum loss: 4322.978516, avg loss: 2.573202, ppl: 13.107724 +epoch: 2, batch: 24158, sum loss: 3939.355469, avg loss: 2.605394, ppl: 13.536556 +epoch: 2, batch: 24159, sum loss: 4151.290527, avg loss: 2.503794, ppl: 12.228802 +epoch: 2, batch: 24160, sum loss: 3821.436035, avg loss: 2.395885, ppl: 10.977906 +epoch: 2, batch: 24161, sum loss: 4104.570801, avg loss: 2.807504, ppl: 16.568514 +epoch: 2, batch: 24162, sum loss: 3817.625977, avg loss: 2.567334, ppl: 13.031039 +epoch: 2, batch: 24163, sum loss: 3593.184814, avg loss: 2.367052, ppl: 10.665901 +epoch: 2, batch: 24164, sum loss: 4183.533203, avg loss: 2.680034, ppl: 14.585591 +epoch: 2, batch: 24165, sum loss: 3408.726074, avg loss: 2.200598, ppl: 9.030410 +epoch: 2, batch: 24166, sum loss: 4422.769043, avg loss: 2.446222, ppl: 11.544646 +epoch: 2, batch: 24167, sum loss: 4105.988281, avg loss: 2.300274, ppl: 9.976912 +epoch: 2, batch: 24168, sum loss: 3996.282227, avg loss: 2.319375, ppl: 10.169312 +epoch: 2, batch: 24169, sum loss: 3741.361084, avg loss: 2.458188, ppl: 11.683617 +epoch: 2, batch: 24170, sum loss: 4397.329102, avg loss: 2.577567, ppl: 13.165067 +epoch: 2, batch: 24171, sum loss: 3455.299072, avg loss: 2.360177, ppl: 10.592824 +epoch: 2, batch: 24172, sum loss: 4594.090820, avg loss: 2.834109, ppl: 17.015234 +epoch: 2, batch: 24173, sum loss: 4154.019531, avg loss: 2.532939, ppl: 12.590451 +epoch: 2, batch: 24174, sum loss: 4331.344238, avg loss: 2.425165, ppl: 11.304093 +epoch: 2, batch: 24175, sum loss: 3444.271729, avg loss: 2.314699, ppl: 10.121873 +epoch: 2, batch: 24176, sum loss: 4467.905762, avg loss: 2.594603, ppl: 13.391264 +epoch: 2, batch: 24177, sum loss: 4535.672363, avg loss: 2.770722, ppl: 15.970166 +epoch: 2, batch: 24178, sum loss: 4500.051270, avg loss: 2.675417, ppl: 14.518402 +epoch: 2, batch: 24179, sum loss: 4108.221680, avg loss: 2.562833, ppl: 12.972517 +epoch: 2, batch: 24180, sum loss: 4617.356934, avg loss: 2.598400, ppl: 13.442215 +epoch: 2, batch: 24181, sum loss: 3092.252686, avg loss: 2.113638, ppl: 8.278306 +epoch: 2, batch: 24182, sum loss: 3580.148926, avg loss: 2.350722, ppl: 10.493139 +epoch: 2, batch: 24183, sum loss: 4046.386963, avg loss: 2.508610, ppl: 12.287832 +epoch: 2, batch: 24184, sum loss: 4125.288086, avg loss: 2.501691, ppl: 12.203111 +epoch: 2, batch: 24185, sum loss: 4424.521973, avg loss: 2.721108, ppl: 15.197154 +epoch: 2, batch: 24186, sum loss: 4219.082520, avg loss: 2.345237, ppl: 10.435751 +epoch: 2, batch: 24187, sum loss: 4295.084473, avg loss: 2.539967, ppl: 12.679256 +epoch: 2, batch: 24188, sum loss: 3619.288574, avg loss: 2.541635, ppl: 12.700422 +epoch: 2, batch: 24189, sum loss: 4141.261719, avg loss: 2.421790, ppl: 11.266011 +epoch: 2, batch: 24190, sum loss: 4543.809082, avg loss: 2.742190, ppl: 15.520941 +epoch: 2, batch: 24191, sum loss: 4194.792969, avg loss: 2.715076, ppl: 15.105764 +epoch: 2, batch: 24192, sum loss: 4580.950684, avg loss: 2.588108, ppl: 13.304573 +epoch: 2, batch: 24193, sum loss: 4002.520264, avg loss: 2.369757, ppl: 10.694798 +epoch: 2, batch: 24194, sum loss: 4476.201172, avg loss: 2.543296, ppl: 12.721534 +epoch: 2, batch: 24195, sum loss: 3539.939941, avg loss: 2.239051, ppl: 9.384422 +epoch: 2, batch: 24196, sum loss: 3579.246582, avg loss: 2.122922, ppl: 8.355518 +epoch: 2, batch: 24197, sum loss: 3912.861084, avg loss: 2.341629, ppl: 10.398157 +epoch: 2, batch: 24198, sum loss: 3675.233398, avg loss: 2.332001, ppl: 10.298526 +epoch: 2, batch: 24199, sum loss: 4653.849609, avg loss: 2.402607, ppl: 11.051950 +epoch: 2, batch: 24200, sum loss: 2953.005127, avg loss: 2.184175, ppl: 8.883321 +epoch: 2, batch: 24201, sum loss: 4760.579590, avg loss: 2.588679, ppl: 13.312169 +epoch: 2, batch: 24202, sum loss: 3091.437744, avg loss: 2.230474, ppl: 9.304279 +epoch: 2, batch: 24203, sum loss: 5039.083008, avg loss: 2.436694, ppl: 11.435172 +epoch: 2, batch: 24204, sum loss: 4074.739258, avg loss: 2.475540, ppl: 11.888130 +epoch: 2, batch: 24205, sum loss: 3526.636719, avg loss: 2.282613, ppl: 9.802258 +epoch: 2, batch: 24206, sum loss: 4413.173340, avg loss: 2.536307, ppl: 12.632927 +epoch: 2, batch: 24207, sum loss: 4236.487305, avg loss: 2.607069, ppl: 13.559251 +epoch: 2, batch: 24208, sum loss: 3683.344482, avg loss: 2.266674, ppl: 9.647257 +epoch: 2, batch: 24209, sum loss: 3484.505127, avg loss: 2.315286, ppl: 10.127818 +epoch: 2, batch: 24210, sum loss: 4735.187012, avg loss: 2.463677, ppl: 11.747929 +epoch: 2, batch: 24211, sum loss: 3004.976562, avg loss: 2.160300, ppl: 8.673736 +epoch: 2, batch: 24212, sum loss: 4646.551758, avg loss: 2.496804, ppl: 12.143618 +epoch: 2, batch: 24213, sum loss: 3757.596680, avg loss: 2.192297, ppl: 8.955759 +epoch: 2, batch: 24214, sum loss: 4254.351074, avg loss: 2.589380, ppl: 13.321507 +epoch: 2, batch: 24215, sum loss: 3952.256836, avg loss: 2.326226, ppl: 10.239221 +epoch: 2, batch: 24216, sum loss: 4035.043457, avg loss: 2.560307, ppl: 12.939787 +epoch: 2, batch: 24217, sum loss: 3888.150879, avg loss: 2.497207, ppl: 12.148512 +epoch: 2, batch: 24218, sum loss: 4596.744629, avg loss: 2.631222, ppl: 13.890731 +epoch: 2, batch: 24219, sum loss: 4290.386719, avg loss: 2.537189, ppl: 12.644081 +epoch: 2, batch: 24220, sum loss: 4188.302246, avg loss: 2.326835, ppl: 10.245460 +epoch: 2, batch: 24221, sum loss: 4256.076172, avg loss: 2.576317, ppl: 13.148626 +epoch: 2, batch: 24222, sum loss: 4012.303711, avg loss: 2.350500, ppl: 10.490815 +epoch: 2, batch: 24223, sum loss: 3772.603760, avg loss: 2.503387, ppl: 12.223823 +epoch: 2, batch: 24224, sum loss: 3790.661865, avg loss: 2.434593, ppl: 11.411179 +epoch: 2, batch: 24225, sum loss: 3780.560791, avg loss: 2.379208, ppl: 10.796345 +epoch: 2, batch: 24226, sum loss: 4300.155762, avg loss: 2.827190, ppl: 16.897909 +epoch: 2, batch: 24227, sum loss: 4315.195801, avg loss: 2.488579, ppl: 12.044147 +epoch: 2, batch: 24228, sum loss: 3419.980225, avg loss: 2.403359, ppl: 11.060267 +epoch: 2, batch: 24229, sum loss: 4600.715820, avg loss: 2.327120, ppl: 10.248382 +epoch: 2, batch: 24230, sum loss: 4266.796875, avg loss: 2.467783, ppl: 11.796268 +epoch: 2, batch: 24231, sum loss: 5506.066406, avg loss: 2.913263, ppl: 18.416788 +epoch: 2, batch: 24232, sum loss: 4273.370117, avg loss: 2.572769, ppl: 13.102059 +epoch: 2, batch: 24233, sum loss: 3839.830078, avg loss: 2.291068, ppl: 9.885490 +epoch: 2, batch: 24234, sum loss: 4488.465820, avg loss: 2.455397, ppl: 11.651059 +epoch: 2, batch: 24235, sum loss: 3374.591309, avg loss: 2.364815, ppl: 10.642073 +epoch: 2, batch: 24236, sum loss: 3296.055176, avg loss: 2.263774, ppl: 9.619326 +epoch: 2, batch: 24237, sum loss: 4551.289062, avg loss: 2.513136, ppl: 12.343581 +epoch: 2, batch: 24238, sum loss: 4772.203125, avg loss: 2.673503, ppl: 14.490644 +epoch: 2, batch: 24239, sum loss: 4142.893555, avg loss: 2.400286, ppl: 11.026329 +epoch: 2, batch: 24240, sum loss: 3510.994629, avg loss: 2.234879, ppl: 9.345351 +epoch: 2, batch: 24241, sum loss: 4873.085449, avg loss: 2.578352, ppl: 13.175410 +epoch: 2, batch: 24242, sum loss: 3541.345703, avg loss: 2.317635, ppl: 10.151633 +epoch: 2, batch: 24243, sum loss: 4302.886719, avg loss: 2.785040, ppl: 16.200468 +epoch: 2, batch: 24244, sum loss: 3918.113770, avg loss: 2.292635, ppl: 9.900997 +epoch: 2, batch: 24245, sum loss: 3979.807861, avg loss: 2.507756, ppl: 12.277343 +epoch: 2, batch: 24246, sum loss: 4207.116699, avg loss: 2.683110, ppl: 14.630524 +epoch: 2, batch: 24247, sum loss: 3758.122314, avg loss: 2.321261, ppl: 10.188519 +epoch: 2, batch: 24248, sum loss: 4063.914062, avg loss: 2.611770, ppl: 13.623141 +epoch: 2, batch: 24249, sum loss: 4387.178711, avg loss: 2.466093, ppl: 11.776341 +epoch: 2, batch: 24250, sum loss: 4330.381836, avg loss: 2.363746, ppl: 10.630696 +epoch: 2, batch: 24251, sum loss: 3887.403320, avg loss: 2.493524, ppl: 12.103850 +epoch: 2, batch: 24252, sum loss: 3235.922119, avg loss: 2.355111, ppl: 10.539295 +epoch: 2, batch: 24253, sum loss: 4419.211426, avg loss: 2.704536, ppl: 14.947375 +epoch: 2, batch: 24254, sum loss: 4255.575195, avg loss: 2.515115, ppl: 12.368034 +epoch: 2, batch: 24255, sum loss: 3854.600098, avg loss: 2.391191, ppl: 10.926499 +epoch: 2, batch: 24256, sum loss: 3950.951660, avg loss: 2.494288, ppl: 12.113103 +epoch: 2, batch: 24257, sum loss: 3995.272217, avg loss: 2.481535, ppl: 11.959614 +epoch: 2, batch: 24258, sum loss: 4429.927734, avg loss: 2.401045, ppl: 11.034700 +epoch: 2, batch: 24259, sum loss: 4341.307129, avg loss: 2.470863, ppl: 11.832658 +epoch: 2, batch: 24260, sum loss: 4259.741699, avg loss: 2.702882, ppl: 14.922674 +epoch: 2, batch: 24261, sum loss: 4934.235840, avg loss: 2.565905, ppl: 13.012430 +epoch: 2, batch: 24262, sum loss: 3389.634033, avg loss: 2.323258, ppl: 10.208885 +epoch: 2, batch: 24263, sum loss: 4717.601562, avg loss: 2.539075, ppl: 12.667950 +epoch: 2, batch: 24264, sum loss: 4208.004395, avg loss: 2.533416, ppl: 12.596466 +epoch: 2, batch: 24265, sum loss: 4296.644043, avg loss: 2.677037, ppl: 14.541938 +epoch: 2, batch: 24266, sum loss: 4177.713867, avg loss: 2.448836, ppl: 11.574862 +epoch: 2, batch: 24267, sum loss: 4208.915039, avg loss: 2.540082, ppl: 12.680704 +epoch: 2, batch: 24268, sum loss: 4617.520508, avg loss: 2.678376, ppl: 14.561433 +epoch: 2, batch: 24269, sum loss: 3576.665527, avg loss: 2.248061, ppl: 9.469361 +epoch: 2, batch: 24270, sum loss: 4515.421875, avg loss: 2.760038, ppl: 15.800442 +epoch: 2, batch: 24271, sum loss: 4015.491943, avg loss: 2.303782, ppl: 10.011976 +epoch: 2, batch: 24272, sum loss: 4068.785156, avg loss: 2.678595, ppl: 14.564610 +epoch: 2, batch: 24273, sum loss: 5118.250000, avg loss: 2.603383, ppl: 13.509357 +epoch: 2, batch: 24274, sum loss: 4423.551270, avg loss: 2.595981, ppl: 13.409735 +epoch: 2, batch: 24275, sum loss: 3775.273193, avg loss: 2.429391, ppl: 11.351963 +epoch: 2, batch: 24276, sum loss: 4929.163574, avg loss: 2.463350, ppl: 11.744092 +epoch: 2, batch: 24277, sum loss: 4194.969238, avg loss: 2.397125, ppl: 10.991533 +epoch: 2, batch: 24278, sum loss: 4226.846191, avg loss: 2.635191, ppl: 13.945975 +epoch: 2, batch: 24279, sum loss: 4839.914062, avg loss: 2.702353, ppl: 14.914782 +epoch: 2, batch: 24280, sum loss: 4759.969727, avg loss: 2.510533, ppl: 12.311485 +epoch: 2, batch: 24281, sum loss: 3655.120850, avg loss: 2.332560, ppl: 10.304285 +epoch: 2, batch: 24282, sum loss: 4088.486084, avg loss: 2.517541, ppl: 12.398068 +epoch: 2, batch: 24283, sum loss: 3998.376465, avg loss: 2.433583, ppl: 11.399649 +epoch: 2, batch: 24284, sum loss: 3603.103027, avg loss: 2.447760, ppl: 11.562422 +epoch: 2, batch: 24285, sum loss: 4299.586426, avg loss: 2.491070, ppl: 12.074186 +epoch: 2, batch: 24286, sum loss: 3743.537842, avg loss: 2.271564, ppl: 9.694553 +epoch: 2, batch: 24287, sum loss: 4188.731445, avg loss: 2.480007, ppl: 11.941347 +epoch: 2, batch: 24288, sum loss: 4242.800781, avg loss: 2.309636, ppl: 10.070757 +epoch: 2, batch: 24289, sum loss: 4738.500000, avg loss: 2.605003, ppl: 13.531264 +epoch: 2, batch: 24290, sum loss: 3878.861816, avg loss: 2.587633, ppl: 13.298259 +epoch: 2, batch: 24291, sum loss: 3582.969238, avg loss: 2.233771, ppl: 9.335005 +epoch: 2, batch: 24292, sum loss: 3945.368164, avg loss: 2.511374, ppl: 12.321846 +epoch: 2, batch: 24293, sum loss: 3396.975098, avg loss: 2.318754, ppl: 10.163005 +epoch: 2, batch: 24294, sum loss: 3858.177979, avg loss: 2.248356, ppl: 9.472147 +epoch: 2, batch: 24295, sum loss: 4166.638184, avg loss: 2.433784, ppl: 11.401946 +epoch: 2, batch: 24296, sum loss: 3455.071777, avg loss: 2.195090, ppl: 8.980810 +epoch: 2, batch: 24297, sum loss: 5014.366211, avg loss: 2.598117, ppl: 13.438415 +epoch: 2, batch: 24298, sum loss: 3514.317871, avg loss: 2.255660, ppl: 9.541587 +epoch: 2, batch: 24299, sum loss: 4392.299805, avg loss: 2.531585, ppl: 12.573419 +epoch: 2, batch: 24300, sum loss: 4943.299805, avg loss: 2.640652, ppl: 14.022339 +epoch: 2, batch: 24301, sum loss: 4366.414062, avg loss: 2.487985, ppl: 12.036999 +epoch: 2, batch: 24302, sum loss: 4547.766602, avg loss: 2.670444, ppl: 14.446385 +epoch: 2, batch: 24303, sum loss: 3343.133057, avg loss: 2.313587, ppl: 10.110626 +epoch: 2, batch: 24304, sum loss: 3436.584229, avg loss: 2.329888, ppl: 10.276787 +epoch: 2, batch: 24305, sum loss: 4951.468750, avg loss: 2.855518, ppl: 17.383442 +epoch: 2, batch: 24306, sum loss: 3995.562988, avg loss: 2.536865, ppl: 12.639985 +epoch: 2, batch: 24307, sum loss: 4368.730469, avg loss: 2.459871, ppl: 11.703300 +epoch: 2, batch: 24308, sum loss: 3476.597168, avg loss: 2.319278, ppl: 10.168327 +epoch: 2, batch: 24309, sum loss: 3910.358887, avg loss: 2.240893, ppl: 9.401727 +epoch: 2, batch: 24310, sum loss: 3954.593018, avg loss: 2.411337, ppl: 11.148859 +epoch: 2, batch: 24311, sum loss: 4166.780273, avg loss: 2.314878, ppl: 10.123688 +epoch: 2, batch: 24312, sum loss: 4030.966309, avg loss: 2.279958, ppl: 9.776275 +epoch: 2, batch: 24313, sum loss: 4098.423340, avg loss: 2.576005, ppl: 13.144517 +epoch: 2, batch: 24314, sum loss: 3814.752930, avg loss: 2.453218, ppl: 11.625692 +epoch: 2, batch: 24315, sum loss: 4014.168945, avg loss: 2.494822, ppl: 12.119579 +epoch: 2, batch: 24316, sum loss: 3250.062988, avg loss: 2.344923, ppl: 10.432467 +epoch: 2, batch: 24317, sum loss: 3446.160645, avg loss: 2.226202, ppl: 9.264612 +epoch: 2, batch: 24318, sum loss: 4570.006836, avg loss: 2.494545, ppl: 12.116222 +epoch: 2, batch: 24319, sum loss: 4381.666504, avg loss: 2.603486, ppl: 13.510749 +epoch: 2, batch: 24320, sum loss: 4080.366455, avg loss: 2.558223, ppl: 12.912857 +epoch: 2, batch: 24321, sum loss: 3448.589844, avg loss: 2.447544, ppl: 11.559924 +epoch: 2, batch: 24322, sum loss: 4596.300293, avg loss: 2.420379, ppl: 11.250127 +epoch: 2, batch: 24323, sum loss: 4855.766602, avg loss: 2.630426, ppl: 13.879683 +epoch: 2, batch: 24324, sum loss: 4633.515625, avg loss: 2.632679, ppl: 13.910991 +epoch: 2, batch: 24325, sum loss: 4136.607422, avg loss: 2.184059, ppl: 8.882285 +epoch: 2, batch: 24326, sum loss: 3787.376953, avg loss: 2.451377, ppl: 11.604311 +epoch: 2, batch: 24327, sum loss: 4513.370605, avg loss: 2.856564, ppl: 17.401628 +epoch: 2, batch: 24328, sum loss: 4345.285156, avg loss: 2.592652, ppl: 13.365173 +epoch: 2, batch: 24329, sum loss: 3732.666016, avg loss: 2.455701, ppl: 11.654605 +epoch: 2, batch: 24330, sum loss: 4445.451172, avg loss: 2.512974, ppl: 12.341582 +epoch: 2, batch: 24331, sum loss: 4065.478516, avg loss: 2.354070, ppl: 10.528333 +epoch: 2, batch: 24332, sum loss: 4392.137695, avg loss: 2.568501, ppl: 13.046260 +epoch: 2, batch: 24333, sum loss: 4664.943848, avg loss: 2.506687, ppl: 12.264228 +epoch: 2, batch: 24334, sum loss: 3773.154297, avg loss: 2.183538, ppl: 8.877664 +epoch: 2, batch: 24335, sum loss: 4026.638428, avg loss: 2.403963, ppl: 11.066952 +epoch: 2, batch: 24336, sum loss: 3876.642578, avg loss: 2.515667, ppl: 12.374860 +epoch: 2, batch: 24337, sum loss: 5267.146484, avg loss: 2.770724, ppl: 15.970193 +epoch: 2, batch: 24338, sum loss: 4387.266113, avg loss: 2.609914, ppl: 13.597885 +epoch: 2, batch: 24339, sum loss: 4796.266113, avg loss: 2.834673, ppl: 17.024826 +epoch: 2, batch: 24340, sum loss: 4308.989746, avg loss: 2.370181, ppl: 10.699332 +epoch: 2, batch: 24341, sum loss: 4510.184082, avg loss: 2.619154, ppl: 13.724114 +epoch: 2, batch: 24342, sum loss: 4572.220215, avg loss: 2.523300, ppl: 12.469684 +epoch: 2, batch: 24343, sum loss: 4416.967773, avg loss: 2.572491, ppl: 13.098417 +epoch: 2, batch: 24344, sum loss: 4707.774414, avg loss: 2.691695, ppl: 14.756667 +epoch: 2, batch: 24345, sum loss: 3554.058838, avg loss: 2.193864, ppl: 8.969802 +epoch: 2, batch: 24346, sum loss: 4074.486328, avg loss: 2.635502, ppl: 13.950315 +epoch: 2, batch: 24347, sum loss: 4317.273438, avg loss: 2.426798, ppl: 11.322568 +epoch: 2, batch: 24348, sum loss: 4593.917969, avg loss: 2.546518, ppl: 12.762582 +epoch: 2, batch: 24349, sum loss: 3427.202637, avg loss: 2.314114, ppl: 10.115954 +epoch: 2, batch: 24350, sum loss: 4072.621826, avg loss: 2.333881, ppl: 10.317904 +epoch: 2, batch: 24351, sum loss: 3914.537354, avg loss: 2.382555, ppl: 10.832540 +epoch: 2, batch: 24352, sum loss: 3402.111572, avg loss: 2.421432, ppl: 11.261972 +epoch: 2, batch: 24353, sum loss: 4120.871094, avg loss: 2.387527, ppl: 10.886536 +epoch: 2, batch: 24354, sum loss: 4598.954102, avg loss: 2.718058, ppl: 15.150868 +epoch: 2, batch: 24355, sum loss: 3734.410889, avg loss: 2.448794, ppl: 11.574381 +epoch: 2, batch: 24356, sum loss: 4613.389648, avg loss: 2.686890, ppl: 14.685926 +epoch: 2, batch: 24357, sum loss: 3394.367188, avg loss: 2.285769, ppl: 9.833245 +epoch: 2, batch: 24358, sum loss: 4386.546387, avg loss: 2.831857, ppl: 16.976957 +epoch: 2, batch: 24359, sum loss: 4094.515137, avg loss: 2.459168, ppl: 11.695080 +epoch: 2, batch: 24360, sum loss: 4458.278320, avg loss: 2.440218, ppl: 11.475542 +epoch: 2, batch: 24361, sum loss: 5160.526855, avg loss: 2.717497, ppl: 15.142375 +epoch: 2, batch: 24362, sum loss: 4465.243652, avg loss: 2.561815, ppl: 12.959317 +epoch: 2, batch: 24363, sum loss: 3657.206543, avg loss: 2.407641, ppl: 11.107726 +epoch: 2, batch: 24364, sum loss: 3658.312500, avg loss: 2.430773, ppl: 11.367661 +epoch: 2, batch: 24365, sum loss: 3592.541016, avg loss: 2.294087, ppl: 9.915383 +epoch: 2, batch: 24366, sum loss: 4156.411133, avg loss: 2.450714, ppl: 11.596625 +epoch: 2, batch: 24367, sum loss: 3326.368652, avg loss: 2.046996, ppl: 7.744602 +epoch: 2, batch: 24368, sum loss: 5423.082520, avg loss: 2.666215, ppl: 14.385424 +epoch: 2, batch: 24369, sum loss: 4666.867188, avg loss: 2.581232, ppl: 13.213405 +epoch: 2, batch: 24370, sum loss: 4028.705078, avg loss: 2.489929, ppl: 12.060419 +epoch: 2, batch: 24371, sum loss: 4889.251465, avg loss: 2.522834, ppl: 12.463864 +epoch: 2, batch: 24372, sum loss: 4586.679688, avg loss: 2.712407, ppl: 15.065493 +epoch: 2, batch: 24373, sum loss: 3737.620361, avg loss: 2.554765, ppl: 12.868269 +epoch: 2, batch: 24374, sum loss: 4673.878418, avg loss: 2.585110, ppl: 13.264745 +epoch: 2, batch: 24375, sum loss: 4776.859863, avg loss: 2.759595, ppl: 15.793451 +epoch: 2, batch: 24376, sum loss: 3171.798828, avg loss: 2.241554, ppl: 9.407938 +epoch: 2, batch: 24377, sum loss: 4064.772217, avg loss: 2.363240, ppl: 10.625319 +epoch: 2, batch: 24378, sum loss: 3234.983398, avg loss: 2.337416, ppl: 10.354443 +epoch: 2, batch: 24379, sum loss: 3785.214355, avg loss: 2.253104, ppl: 9.517231 +epoch: 2, batch: 24380, sum loss: 4165.146973, avg loss: 2.428657, ppl: 11.343638 +epoch: 2, batch: 24381, sum loss: 4977.439453, avg loss: 2.559095, ppl: 12.924114 +epoch: 2, batch: 24382, sum loss: 3819.034668, avg loss: 2.476676, ppl: 11.901640 +epoch: 2, batch: 24383, sum loss: 3196.641846, avg loss: 2.161353, ppl: 8.682880 +epoch: 2, batch: 24384, sum loss: 4012.823486, avg loss: 2.578935, ppl: 13.183096 +epoch: 2, batch: 24385, sum loss: 4004.250732, avg loss: 2.471760, ppl: 11.843270 +epoch: 2, batch: 24386, sum loss: 4221.835449, avg loss: 2.382526, ppl: 10.832227 +epoch: 2, batch: 24387, sum loss: 3824.254883, avg loss: 2.412779, ppl: 11.164947 +epoch: 2, batch: 24388, sum loss: 4601.020020, avg loss: 2.473667, ppl: 11.865875 +epoch: 2, batch: 24389, sum loss: 4860.388672, avg loss: 2.673481, ppl: 14.490322 +epoch: 2, batch: 24390, sum loss: 4913.914062, avg loss: 2.505821, ppl: 12.253612 +epoch: 2, batch: 24391, sum loss: 4852.918945, avg loss: 2.623199, ppl: 13.779741 +epoch: 2, batch: 24392, sum loss: 3681.910400, avg loss: 2.154424, ppl: 8.622922 +epoch: 2, batch: 24393, sum loss: 3315.070557, avg loss: 2.289413, ppl: 9.869147 +epoch: 2, batch: 24394, sum loss: 3746.841064, avg loss: 2.314293, ppl: 10.117771 +epoch: 2, batch: 24395, sum loss: 3478.592773, avg loss: 2.270622, ppl: 9.685424 +epoch: 2, batch: 24396, sum loss: 3582.750488, avg loss: 2.308473, ppl: 10.059056 +epoch: 2, batch: 24397, sum loss: 4221.191895, avg loss: 2.581769, ppl: 13.220501 +epoch: 2, batch: 24398, sum loss: 3867.120117, avg loss: 2.262797, ppl: 9.609932 +epoch: 2, batch: 24399, sum loss: 3852.024414, avg loss: 2.507828, ppl: 12.278238 +epoch: 2, batch: 24400, sum loss: 3254.761719, avg loss: 2.127295, ppl: 8.392138 +epoch: 2, batch: 24401, sum loss: 3989.506836, avg loss: 2.507547, ppl: 12.274779 +epoch: 2, batch: 24402, sum loss: 4272.194824, avg loss: 2.419136, ppl: 11.236151 +epoch: 2, batch: 24403, sum loss: 4041.400879, avg loss: 2.471805, ppl: 11.843804 +epoch: 2, batch: 24404, sum loss: 4498.902344, avg loss: 2.463802, ppl: 11.749399 +epoch: 2, batch: 24405, sum loss: 3629.819824, avg loss: 2.442678, ppl: 11.503809 +epoch: 2, batch: 24406, sum loss: 4897.177246, avg loss: 2.578819, ppl: 13.181562 +epoch: 2, batch: 24407, sum loss: 3436.128174, avg loss: 2.234154, ppl: 9.338573 +epoch: 2, batch: 24408, sum loss: 4336.328125, avg loss: 2.524056, ppl: 12.479109 +epoch: 2, batch: 24409, sum loss: 4094.888184, avg loss: 2.329288, ppl: 10.270624 +epoch: 2, batch: 24410, sum loss: 4147.122559, avg loss: 2.558373, ppl: 12.914787 +epoch: 2, batch: 24411, sum loss: 4356.229004, avg loss: 2.435008, ppl: 11.415908 +epoch: 2, batch: 24412, sum loss: 3645.977539, avg loss: 2.297402, ppl: 9.948307 +epoch: 2, batch: 24413, sum loss: 4503.656250, avg loss: 2.527304, ppl: 12.519712 +epoch: 2, batch: 24414, sum loss: 4110.104980, avg loss: 2.349974, ppl: 10.485299 +epoch: 2, batch: 24415, sum loss: 3575.716309, avg loss: 2.464312, ppl: 11.755387 +epoch: 2, batch: 24416, sum loss: 5367.717773, avg loss: 2.765439, ppl: 15.886017 +epoch: 2, batch: 24417, sum loss: 4166.425781, avg loss: 2.298084, ppl: 9.955088 +epoch: 2, batch: 24418, sum loss: 3142.277832, avg loss: 2.056465, ppl: 7.818281 +epoch: 2, batch: 24419, sum loss: 4298.213379, avg loss: 2.416084, ppl: 11.201904 +epoch: 2, batch: 24420, sum loss: 3857.204590, avg loss: 2.340537, ppl: 10.386811 +epoch: 2, batch: 24421, sum loss: 5122.957031, avg loss: 2.697713, ppl: 14.845742 +epoch: 2, batch: 24422, sum loss: 4758.550781, avg loss: 2.515090, ppl: 12.367724 +epoch: 2, batch: 24423, sum loss: 5105.469727, avg loss: 2.638485, ppl: 13.991996 +epoch: 2, batch: 24424, sum loss: 3502.145020, avg loss: 2.363121, ppl: 10.624058 +epoch: 2, batch: 24425, sum loss: 4819.747559, avg loss: 2.417125, ppl: 11.213576 +epoch: 2, batch: 24426, sum loss: 4144.722656, avg loss: 2.192975, ppl: 8.961835 +epoch: 2, batch: 24427, sum loss: 4600.287598, avg loss: 2.511074, ppl: 12.318153 +epoch: 2, batch: 24428, sum loss: 4478.864258, avg loss: 2.511982, ppl: 12.329348 +epoch: 2, batch: 24429, sum loss: 3214.257324, avg loss: 2.116035, ppl: 8.298170 +epoch: 2, batch: 24430, sum loss: 4394.981445, avg loss: 2.387280, ppl: 10.883844 +epoch: 2, batch: 24431, sum loss: 3274.996582, avg loss: 2.099357, ppl: 8.160920 +epoch: 2, batch: 24432, sum loss: 4403.329590, avg loss: 2.506164, ppl: 12.257813 +epoch: 2, batch: 24433, sum loss: 3948.529297, avg loss: 2.196067, ppl: 8.989591 +epoch: 2, batch: 24434, sum loss: 4878.575684, avg loss: 2.610260, ppl: 13.602584 +epoch: 2, batch: 24435, sum loss: 3499.106201, avg loss: 2.481636, ppl: 11.960814 +epoch: 2, batch: 24436, sum loss: 4422.181152, avg loss: 2.599754, ppl: 13.460422 +epoch: 2, batch: 24437, sum loss: 3517.826172, avg loss: 2.282820, ppl: 9.804294 +epoch: 2, batch: 24438, sum loss: 3798.381104, avg loss: 2.556111, ppl: 12.885609 +epoch: 2, batch: 24439, sum loss: 4402.685059, avg loss: 2.508652, ppl: 12.288360 +epoch: 2, batch: 24440, sum loss: 4296.830566, avg loss: 2.482282, ppl: 11.968548 +epoch: 2, batch: 24441, sum loss: 4589.409180, avg loss: 2.594352, ppl: 13.387916 +epoch: 2, batch: 24442, sum loss: 3985.578613, avg loss: 2.455686, ppl: 11.654427 +epoch: 2, batch: 24443, sum loss: 3701.576416, avg loss: 2.342770, ppl: 10.410031 +epoch: 2, batch: 24444, sum loss: 4716.138672, avg loss: 2.812247, ppl: 16.647287 +epoch: 2, batch: 24445, sum loss: 3477.400879, avg loss: 2.501727, ppl: 12.203555 +epoch: 2, batch: 24446, sum loss: 4644.535645, avg loss: 2.517364, ppl: 12.395872 +epoch: 2, batch: 24447, sum loss: 4087.574951, avg loss: 2.401630, ppl: 11.041163 +epoch: 2, batch: 24448, sum loss: 4179.268555, avg loss: 2.474404, ppl: 11.874629 +epoch: 2, batch: 24449, sum loss: 3994.432617, avg loss: 2.582051, ppl: 13.224231 +epoch: 2, batch: 24450, sum loss: 4531.900391, avg loss: 2.583752, ppl: 13.246742 +epoch: 2, batch: 24451, sum loss: 4537.443359, avg loss: 2.483549, ppl: 11.983715 +epoch: 2, batch: 24452, sum loss: 3824.586914, avg loss: 2.536198, ppl: 12.631557 +epoch: 2, batch: 24453, sum loss: 5293.743164, avg loss: 2.549973, ppl: 12.806752 +epoch: 2, batch: 24454, sum loss: 3918.862793, avg loss: 2.455428, ppl: 11.651418 +epoch: 2, batch: 24455, sum loss: 4689.041504, avg loss: 2.523704, ppl: 12.474712 +epoch: 2, batch: 24456, sum loss: 5435.461426, avg loss: 3.018024, ppl: 20.450840 +epoch: 2, batch: 24457, sum loss: 3987.650879, avg loss: 2.490725, ppl: 12.070027 +epoch: 2, batch: 24458, sum loss: 4423.516113, avg loss: 2.548108, ppl: 12.782900 +epoch: 2, batch: 24459, sum loss: 4034.400879, avg loss: 2.322626, ppl: 10.202430 +epoch: 2, batch: 24460, sum loss: 3397.162109, avg loss: 2.289193, ppl: 9.866968 +epoch: 2, batch: 24461, sum loss: 3251.584229, avg loss: 2.415739, ppl: 11.198038 +epoch: 2, batch: 24462, sum loss: 3967.372559, avg loss: 2.506237, ppl: 12.258708 +epoch: 2, batch: 24463, sum loss: 3570.908691, avg loss: 2.300843, ppl: 9.982599 +epoch: 2, batch: 24464, sum loss: 3681.049316, avg loss: 2.379476, ppl: 10.799243 +epoch: 2, batch: 24465, sum loss: 4687.193848, avg loss: 2.517290, ppl: 12.394959 +epoch: 2, batch: 24466, sum loss: 4451.624023, avg loss: 2.526461, ppl: 12.509156 +epoch: 2, batch: 24467, sum loss: 3901.452393, avg loss: 2.307187, ppl: 10.046122 +epoch: 2, batch: 24468, sum loss: 3858.710693, avg loss: 2.423813, ppl: 11.288823 +epoch: 2, batch: 24469, sum loss: 3500.967285, avg loss: 2.221426, ppl: 9.220470 +epoch: 2, batch: 24470, sum loss: 4175.504395, avg loss: 2.535218, ppl: 12.619182 +epoch: 2, batch: 24471, sum loss: 4678.934082, avg loss: 2.596523, ppl: 13.417004 +epoch: 2, batch: 24472, sum loss: 3144.693848, avg loss: 2.097861, ppl: 8.148724 +epoch: 2, batch: 24473, sum loss: 3372.644775, avg loss: 2.092211, ppl: 8.102815 +epoch: 2, batch: 24474, sum loss: 4172.747559, avg loss: 2.527406, ppl: 12.520988 +epoch: 2, batch: 24475, sum loss: 3291.717285, avg loss: 2.178503, ppl: 8.833070 +epoch: 2, batch: 24476, sum loss: 4458.136719, avg loss: 2.556271, ppl: 12.887668 +epoch: 2, batch: 24477, sum loss: 4230.526367, avg loss: 2.546976, ppl: 12.768429 +epoch: 2, batch: 24478, sum loss: 4055.903809, avg loss: 2.404211, ppl: 11.069694 +epoch: 2, batch: 24479, sum loss: 3470.408691, avg loss: 2.110954, ppl: 8.256116 +epoch: 2, batch: 24480, sum loss: 4778.167480, avg loss: 2.730381, ppl: 15.338737 +epoch: 2, batch: 24481, sum loss: 4319.274902, avg loss: 2.443029, ppl: 11.507844 +epoch: 2, batch: 24482, sum loss: 4071.578613, avg loss: 2.329278, ppl: 10.270529 +epoch: 2, batch: 24483, sum loss: 3744.698975, avg loss: 2.422186, ppl: 11.270466 +epoch: 2, batch: 24484, sum loss: 4217.639648, avg loss: 2.527046, ppl: 12.516478 +epoch: 2, batch: 24485, sum loss: 4821.478027, avg loss: 2.626077, ppl: 13.819452 +epoch: 2, batch: 24486, sum loss: 3060.697021, avg loss: 2.301276, ppl: 9.986917 +epoch: 2, batch: 24487, sum loss: 3976.884033, avg loss: 2.371428, ppl: 10.712674 +epoch: 2, batch: 24488, sum loss: 3957.763428, avg loss: 2.344647, ppl: 10.429587 +epoch: 2, batch: 24489, sum loss: 3515.098877, avg loss: 2.009776, ppl: 7.461648 +epoch: 2, batch: 24490, sum loss: 4113.355957, avg loss: 2.515814, ppl: 12.376680 +epoch: 2, batch: 24491, sum loss: 4754.338867, avg loss: 3.010981, ppl: 20.307308 +epoch: 2, batch: 24492, sum loss: 4366.453125, avg loss: 2.503700, ppl: 12.227653 +epoch: 2, batch: 24493, sum loss: 4035.530762, avg loss: 2.199199, ppl: 9.017792 +epoch: 2, batch: 24494, sum loss: 3425.506592, avg loss: 2.186029, ppl: 8.899799 +epoch: 2, batch: 24495, sum loss: 4024.325195, avg loss: 2.491842, ppl: 12.083517 +epoch: 2, batch: 24496, sum loss: 3490.717285, avg loss: 2.319414, ppl: 10.169709 +epoch: 2, batch: 24497, sum loss: 4436.835938, avg loss: 2.545517, ppl: 12.749818 +epoch: 2, batch: 24498, sum loss: 4012.480957, avg loss: 2.496877, ppl: 12.144506 +epoch: 2, batch: 24499, sum loss: 4130.374512, avg loss: 2.567044, ppl: 13.027266 +epoch: 2, batch: 24500, sum loss: 4022.147949, avg loss: 2.467575, ppl: 11.793816 +epoch: 2, batch: 24501, sum loss: 4266.060059, avg loss: 2.606023, ppl: 13.545079 +epoch: 2, batch: 24502, sum loss: 3506.666748, avg loss: 2.265289, ppl: 9.633904 +epoch: 2, batch: 24503, sum loss: 4335.126465, avg loss: 2.458949, ppl: 11.692512 +epoch: 2, batch: 24504, sum loss: 4382.899902, avg loss: 2.615096, ppl: 13.668523 +epoch: 2, batch: 24505, sum loss: 4017.708008, avg loss: 2.517361, ppl: 12.395840 +epoch: 2, batch: 24506, sum loss: 3558.413330, avg loss: 2.344146, ppl: 10.424364 +epoch: 2, batch: 24507, sum loss: 4073.790527, avg loss: 2.465975, ppl: 11.774958 +epoch: 2, batch: 24508, sum loss: 3730.690918, avg loss: 2.310025, ppl: 10.074681 +epoch: 2, batch: 24509, sum loss: 4256.363281, avg loss: 2.648639, ppl: 14.134794 +epoch: 2, batch: 24510, sum loss: 3909.041504, avg loss: 2.536691, ppl: 12.637789 +epoch: 2, batch: 24511, sum loss: 3681.026611, avg loss: 2.245898, ppl: 9.448895 +epoch: 2, batch: 24512, sum loss: 4127.242676, avg loss: 2.437828, ppl: 11.448149 +epoch: 2, batch: 24513, sum loss: 3833.877930, avg loss: 2.315144, ppl: 10.126377 +epoch: 2, batch: 24514, sum loss: 4684.034668, avg loss: 2.698177, ppl: 14.852625 +epoch: 2, batch: 24515, sum loss: 4188.366211, avg loss: 2.523112, ppl: 12.467339 +epoch: 2, batch: 24516, sum loss: 4584.782715, avg loss: 2.480943, ppl: 11.952530 +epoch: 2, batch: 24517, sum loss: 5300.117188, avg loss: 2.687686, ppl: 14.697629 +epoch: 2, batch: 24518, sum loss: 4220.944824, avg loss: 2.586363, ppl: 13.281384 +epoch: 2, batch: 24519, sum loss: 3783.137695, avg loss: 2.459777, ppl: 11.702206 +epoch: 2, batch: 24520, sum loss: 4006.725342, avg loss: 2.373652, ppl: 10.736535 +epoch: 2, batch: 24521, sum loss: 4236.125488, avg loss: 2.462864, ppl: 11.738379 +epoch: 2, batch: 24522, sum loss: 4312.162109, avg loss: 2.461280, ppl: 11.719802 +epoch: 2, batch: 24523, sum loss: 3423.165771, avg loss: 2.455643, ppl: 11.653921 +epoch: 2, batch: 24524, sum loss: 4301.153320, avg loss: 2.598884, ppl: 13.448726 +epoch: 2, batch: 24525, sum loss: 3739.022217, avg loss: 2.398347, ppl: 11.004967 +epoch: 2, batch: 24526, sum loss: 3683.767334, avg loss: 2.325611, ppl: 10.232926 +epoch: 2, batch: 24527, sum loss: 4192.802246, avg loss: 2.464904, ppl: 11.762357 +epoch: 2, batch: 24528, sum loss: 3840.393311, avg loss: 2.260385, ppl: 9.586775 +epoch: 2, batch: 24529, sum loss: 3811.729004, avg loss: 2.580724, ppl: 13.206697 +epoch: 2, batch: 24530, sum loss: 4854.105469, avg loss: 2.479114, ppl: 11.930693 +epoch: 2, batch: 24531, sum loss: 4608.968750, avg loss: 2.768149, ppl: 15.929128 +epoch: 2, batch: 24532, sum loss: 3980.754883, avg loss: 2.541989, ppl: 12.704917 +epoch: 2, batch: 24533, sum loss: 3934.411133, avg loss: 2.590132, ppl: 13.331534 +epoch: 2, batch: 24534, sum loss: 3418.741699, avg loss: 2.198548, ppl: 9.011917 +epoch: 2, batch: 24535, sum loss: 3632.843750, avg loss: 2.241113, ppl: 9.403790 +epoch: 2, batch: 24536, sum loss: 3950.164795, avg loss: 2.372471, ppl: 10.723862 +epoch: 2, batch: 24537, sum loss: 4320.009277, avg loss: 2.689919, ppl: 14.730475 +epoch: 2, batch: 24538, sum loss: 4320.929199, avg loss: 2.483293, ppl: 11.980647 +epoch: 2, batch: 24539, sum loss: 3956.968750, avg loss: 2.490226, ppl: 12.064000 +epoch: 2, batch: 24540, sum loss: 4172.284180, avg loss: 2.489430, ppl: 12.054399 +epoch: 2, batch: 24541, sum loss: 4250.950684, avg loss: 2.547005, ppl: 12.768800 +epoch: 2, batch: 24542, sum loss: 4569.067383, avg loss: 2.656435, ppl: 14.245407 +epoch: 2, batch: 24543, sum loss: 4640.443848, avg loss: 2.420680, ppl: 11.253512 +epoch: 2, batch: 24544, sum loss: 4373.604492, avg loss: 2.506364, ppl: 12.260266 +epoch: 2, batch: 24545, sum loss: 3173.862305, avg loss: 2.176860, ppl: 8.818575 +epoch: 2, batch: 24546, sum loss: 3767.906982, avg loss: 2.296104, ppl: 9.935401 +epoch: 2, batch: 24547, sum loss: 3616.316406, avg loss: 2.357442, ppl: 10.563896 +epoch: 2, batch: 24548, sum loss: 3527.849121, avg loss: 2.519892, ppl: 12.427258 +epoch: 2, batch: 24549, sum loss: 4022.428223, avg loss: 2.514018, ppl: 12.354465 +epoch: 2, batch: 24550, sum loss: 3977.968750, avg loss: 2.310086, ppl: 10.075294 +epoch: 2, batch: 24551, sum loss: 4329.136719, avg loss: 2.592298, ppl: 13.360433 +epoch: 2, batch: 24552, sum loss: 4157.940430, avg loss: 2.440106, ppl: 11.474256 +epoch: 2, batch: 24553, sum loss: 3689.910645, avg loss: 2.329489, ppl: 10.272691 +epoch: 2, batch: 24554, sum loss: 4119.061523, avg loss: 2.482858, ppl: 11.975441 +epoch: 2, batch: 24555, sum loss: 4556.392090, avg loss: 2.395580, ppl: 10.974557 +epoch: 2, batch: 24556, sum loss: 4516.925781, avg loss: 2.353791, ppl: 10.525401 +epoch: 2, batch: 24557, sum loss: 4387.467285, avg loss: 2.475997, ppl: 11.893564 +epoch: 2, batch: 24558, sum loss: 4048.068848, avg loss: 2.260228, ppl: 9.585276 +epoch: 2, batch: 24559, sum loss: 4396.589355, avg loss: 2.747868, ppl: 15.609322 +epoch: 2, batch: 24560, sum loss: 4198.794922, avg loss: 2.472789, ppl: 11.855460 +epoch: 2, batch: 24561, sum loss: 2821.484375, avg loss: 2.172043, ppl: 8.776198 +epoch: 2, batch: 24562, sum loss: 4498.203125, avg loss: 2.601621, ppl: 13.485586 +epoch: 2, batch: 24563, sum loss: 4178.461426, avg loss: 2.561902, ppl: 12.960439 +epoch: 2, batch: 24564, sum loss: 4106.493164, avg loss: 2.550617, ppl: 12.815005 +epoch: 2, batch: 24565, sum loss: 4264.673828, avg loss: 2.511587, ppl: 12.324470 +epoch: 2, batch: 24566, sum loss: 4118.121582, avg loss: 2.296777, ppl: 9.942090 +epoch: 2, batch: 24567, sum loss: 4610.156738, avg loss: 2.566902, ppl: 13.025414 +epoch: 2, batch: 24568, sum loss: 4530.571289, avg loss: 2.504462, ppl: 12.236974 +epoch: 2, batch: 24569, sum loss: 4146.058594, avg loss: 2.342406, ppl: 10.406244 +epoch: 2, batch: 24570, sum loss: 4166.729492, avg loss: 2.433837, ppl: 11.402552 +epoch: 2, batch: 24571, sum loss: 4517.909668, avg loss: 2.505774, ppl: 12.253034 +epoch: 2, batch: 24572, sum loss: 3803.965088, avg loss: 2.378965, ppl: 10.793724 +epoch: 2, batch: 24573, sum loss: 4801.290527, avg loss: 2.596696, ppl: 13.419323 +epoch: 2, batch: 24574, sum loss: 4520.258789, avg loss: 2.550936, ppl: 12.819099 +epoch: 2, batch: 24575, sum loss: 4431.882812, avg loss: 2.436439, ppl: 11.432261 +epoch: 2, batch: 24576, sum loss: 3646.764404, avg loss: 2.374196, ppl: 10.742369 +epoch: 2, batch: 24577, sum loss: 3158.468750, avg loss: 2.320697, ppl: 10.182770 +epoch: 2, batch: 24578, sum loss: 4082.752686, avg loss: 2.514010, ppl: 12.354374 +epoch: 2, batch: 24579, sum loss: 3333.504639, avg loss: 2.240258, ppl: 9.395760 +epoch: 2, batch: 24580, sum loss: 4147.926758, avg loss: 2.222897, ppl: 9.234046 +epoch: 2, batch: 24581, sum loss: 4644.798340, avg loss: 2.366174, ppl: 10.656537 +epoch: 2, batch: 24582, sum loss: 4983.393066, avg loss: 2.626986, ppl: 13.832021 +epoch: 2, batch: 24583, sum loss: 4320.591797, avg loss: 2.562629, ppl: 12.969867 +epoch: 2, batch: 24584, sum loss: 4684.080566, avg loss: 2.622666, ppl: 13.772387 +epoch: 2, batch: 24585, sum loss: 3954.585449, avg loss: 2.459319, ppl: 11.696847 +epoch: 2, batch: 24586, sum loss: 3673.265625, avg loss: 2.475246, ppl: 11.884636 +epoch: 2, batch: 24587, sum loss: 3687.320312, avg loss: 2.522107, ppl: 12.454813 +epoch: 2, batch: 24588, sum loss: 4305.503418, avg loss: 2.570450, ppl: 13.071706 +epoch: 2, batch: 24589, sum loss: 3352.251221, avg loss: 2.041566, ppl: 7.702662 +epoch: 2, batch: 24590, sum loss: 4223.307129, avg loss: 2.352817, ppl: 10.515152 +epoch: 2, batch: 24591, sum loss: 4126.733887, avg loss: 2.625149, ppl: 13.806628 +epoch: 2, batch: 24592, sum loss: 4530.699219, avg loss: 2.558272, ppl: 12.913479 +epoch: 2, batch: 24593, sum loss: 4782.743164, avg loss: 2.612094, ppl: 13.627553 +epoch: 2, batch: 24594, sum loss: 5239.017090, avg loss: 2.471235, ppl: 11.837051 +epoch: 2, batch: 24595, sum loss: 4770.043945, avg loss: 2.598063, ppl: 13.437684 +epoch: 2, batch: 24596, sum loss: 2904.530762, avg loss: 2.195413, ppl: 8.983707 +epoch: 2, batch: 24597, sum loss: 4029.004883, avg loss: 2.581041, ppl: 13.210885 +epoch: 2, batch: 24598, sum loss: 3480.971436, avg loss: 2.231392, ppl: 9.312819 +epoch: 2, batch: 24599, sum loss: 4070.174072, avg loss: 2.418404, ppl: 11.227926 +epoch: 2, batch: 24600, sum loss: 3978.189453, avg loss: 2.472461, ppl: 11.851575 +epoch: 2, batch: 24601, sum loss: 4434.628906, avg loss: 2.424619, ppl: 11.297929 +epoch: 2, batch: 24602, sum loss: 3831.931885, avg loss: 2.498000, ppl: 12.158153 +epoch: 2, batch: 24603, sum loss: 3767.402832, avg loss: 2.229232, ppl: 9.292729 +epoch: 2, batch: 24604, sum loss: 3909.398438, avg loss: 2.419182, ppl: 11.236665 +epoch: 2, batch: 24605, sum loss: 4913.647461, avg loss: 2.597065, ppl: 13.424283 +epoch: 2, batch: 24606, sum loss: 5278.643066, avg loss: 2.619674, ppl: 13.731246 +epoch: 2, batch: 24607, sum loss: 4158.395508, avg loss: 2.636903, ppl: 13.969870 +epoch: 2, batch: 24608, sum loss: 3572.462891, avg loss: 2.438541, ppl: 11.456316 +epoch: 2, batch: 24609, sum loss: 4477.494629, avg loss: 2.684349, ppl: 14.648666 +epoch: 2, batch: 24610, sum loss: 4071.263184, avg loss: 2.400509, ppl: 11.028788 +epoch: 2, batch: 24611, sum loss: 4239.280273, avg loss: 2.400498, ppl: 11.028671 +epoch: 2, batch: 24612, sum loss: 4534.378418, avg loss: 2.580750, ppl: 13.207043 +epoch: 2, batch: 24613, sum loss: 4383.133301, avg loss: 2.487590, ppl: 12.032245 +epoch: 2, batch: 24614, sum loss: 4373.461914, avg loss: 2.612582, ppl: 13.634209 +epoch: 2, batch: 24615, sum loss: 4455.360352, avg loss: 2.453392, ppl: 11.627725 +epoch: 2, batch: 24616, sum loss: 4490.041992, avg loss: 2.493083, ppl: 12.098516 +epoch: 2, batch: 24617, sum loss: 3985.033203, avg loss: 2.544721, ppl: 12.739675 +epoch: 2, batch: 24618, sum loss: 4188.120605, avg loss: 2.452061, ppl: 11.612257 +epoch: 2, batch: 24619, sum loss: 4251.984375, avg loss: 2.510026, ppl: 12.305253 +epoch: 2, batch: 24620, sum loss: 4446.483398, avg loss: 2.347668, ppl: 10.461147 +epoch: 2, batch: 24621, sum loss: 4918.641602, avg loss: 2.743247, ppl: 15.537354 +epoch: 2, batch: 24622, sum loss: 4201.991699, avg loss: 2.543579, ppl: 12.725128 +epoch: 2, batch: 24623, sum loss: 4646.049316, avg loss: 2.656403, ppl: 14.244962 +epoch: 2, batch: 24624, sum loss: 3899.394531, avg loss: 2.490035, ppl: 12.061696 +epoch: 2, batch: 24625, sum loss: 4397.201172, avg loss: 2.628333, ppl: 13.850663 +epoch: 2, batch: 24626, sum loss: 4812.450195, avg loss: 2.717363, ppl: 15.140347 +epoch: 2, batch: 24627, sum loss: 2890.524414, avg loss: 2.054388, ppl: 7.802064 +epoch: 2, batch: 24628, sum loss: 3632.573486, avg loss: 2.325591, ppl: 10.232729 +epoch: 2, batch: 24629, sum loss: 3761.695801, avg loss: 2.431607, ppl: 11.377151 +epoch: 2, batch: 24630, sum loss: 4412.003418, avg loss: 2.341828, ppl: 10.400229 +epoch: 2, batch: 24631, sum loss: 3914.132080, avg loss: 2.362180, ppl: 10.614062 +epoch: 2, batch: 24632, sum loss: 4405.281250, avg loss: 2.446019, ppl: 11.542310 +epoch: 2, batch: 24633, sum loss: 4472.273438, avg loss: 2.548304, ppl: 12.785399 +epoch: 2, batch: 24634, sum loss: 4813.774414, avg loss: 2.777712, ppl: 16.082176 +epoch: 2, batch: 24635, sum loss: 3688.421387, avg loss: 2.188974, ppl: 8.926051 +epoch: 2, batch: 24636, sum loss: 3562.332031, avg loss: 2.632914, ppl: 13.914251 +epoch: 2, batch: 24637, sum loss: 4916.121582, avg loss: 2.621932, ppl: 13.762280 +epoch: 2, batch: 24638, sum loss: 3962.947266, avg loss: 2.292046, ppl: 9.895161 +epoch: 2, batch: 24639, sum loss: 4444.779785, avg loss: 2.402584, ppl: 11.051695 +epoch: 2, batch: 24640, sum loss: 3995.979492, avg loss: 2.494369, ppl: 12.114090 +epoch: 2, batch: 24641, sum loss: 4364.764160, avg loss: 2.564491, ppl: 12.994046 +epoch: 2, batch: 24642, sum loss: 4136.080566, avg loss: 2.502166, ppl: 12.208910 +epoch: 2, batch: 24643, sum loss: 4783.005859, avg loss: 2.595228, ppl: 13.399648 +epoch: 2, batch: 24644, sum loss: 4064.207275, avg loss: 2.454232, ppl: 11.637486 +epoch: 2, batch: 24645, sum loss: 3530.709473, avg loss: 2.322835, ppl: 10.204565 +epoch: 2, batch: 24646, sum loss: 3937.042725, avg loss: 2.431774, ppl: 11.379055 +epoch: 2, batch: 24647, sum loss: 3751.505615, avg loss: 2.589031, ppl: 13.316861 +epoch: 2, batch: 24648, sum loss: 3722.022949, avg loss: 2.382857, ppl: 10.835820 +epoch: 2, batch: 24649, sum loss: 3974.712158, avg loss: 2.703886, ppl: 14.937664 +epoch: 2, batch: 24650, sum loss: 3685.419189, avg loss: 2.148932, ppl: 8.575699 +epoch: 2, batch: 24651, sum loss: 3732.715820, avg loss: 2.343199, ppl: 10.414499 +epoch: 2, batch: 24652, sum loss: 5210.839355, avg loss: 2.622466, ppl: 13.769632 +epoch: 2, batch: 24653, sum loss: 4530.972656, avg loss: 2.610007, ppl: 13.599150 +epoch: 2, batch: 24654, sum loss: 4241.354492, avg loss: 2.686102, ppl: 14.674359 +epoch: 2, batch: 24655, sum loss: 4211.248535, avg loss: 2.286237, ppl: 9.837848 +epoch: 2, batch: 24656, sum loss: 4316.514160, avg loss: 2.542117, ppl: 12.706541 +epoch: 2, batch: 24657, sum loss: 4562.748047, avg loss: 2.387623, ppl: 10.887587 +epoch: 2, batch: 24658, sum loss: 3797.363281, avg loss: 2.493344, ppl: 12.101680 +epoch: 2, batch: 24659, sum loss: 3692.190430, avg loss: 2.226894, ppl: 9.271027 +epoch: 2, batch: 24660, sum loss: 4998.130859, avg loss: 2.434550, ppl: 11.410681 +epoch: 2, batch: 24661, sum loss: 4511.753906, avg loss: 2.762862, ppl: 15.845126 +epoch: 2, batch: 24662, sum loss: 4672.808105, avg loss: 2.471078, ppl: 11.835197 +epoch: 2, batch: 24663, sum loss: 3833.239990, avg loss: 2.229924, ppl: 9.299164 +epoch: 2, batch: 24664, sum loss: 4826.151855, avg loss: 2.620061, ppl: 13.736561 +epoch: 2, batch: 24665, sum loss: 5091.040039, avg loss: 2.665466, ppl: 14.374647 +epoch: 2, batch: 24666, sum loss: 4432.239746, avg loss: 2.581386, ppl: 13.215444 +epoch: 2, batch: 24667, sum loss: 4518.545898, avg loss: 2.598359, ppl: 13.441661 +epoch: 2, batch: 24668, sum loss: 4298.161133, avg loss: 2.561479, ppl: 12.954959 +epoch: 2, batch: 24669, sum loss: 3570.088379, avg loss: 2.445266, ppl: 11.533617 +epoch: 2, batch: 24670, sum loss: 4872.483887, avg loss: 2.591747, ppl: 13.353073 +epoch: 2, batch: 24671, sum loss: 3963.997070, avg loss: 2.685635, ppl: 14.667510 +epoch: 2, batch: 24672, sum loss: 5115.234863, avg loss: 2.514864, ppl: 12.364932 +epoch: 2, batch: 24673, sum loss: 4244.163086, avg loss: 2.338382, ppl: 10.364453 +epoch: 2, batch: 24674, sum loss: 4588.159668, avg loss: 2.490858, ppl: 12.071625 +epoch: 2, batch: 24675, sum loss: 4700.440918, avg loss: 2.766593, ppl: 15.904351 +epoch: 2, batch: 24676, sum loss: 5427.274414, avg loss: 2.709573, ppl: 15.022857 +epoch: 2, batch: 24677, sum loss: 3988.751465, avg loss: 2.514976, ppl: 12.366309 +epoch: 2, batch: 24678, sum loss: 3518.944824, avg loss: 2.213173, ppl: 9.144686 +epoch: 2, batch: 24679, sum loss: 4441.097656, avg loss: 2.521918, ppl: 12.452458 +epoch: 2, batch: 24680, sum loss: 4312.852539, avg loss: 2.535480, ppl: 12.622495 +epoch: 2, batch: 24681, sum loss: 5369.110840, avg loss: 2.905363, ppl: 18.271877 +epoch: 2, batch: 24682, sum loss: 3691.113770, avg loss: 2.431564, ppl: 11.376659 +epoch: 2, batch: 24683, sum loss: 5021.914062, avg loss: 2.748721, ppl: 15.622643 +epoch: 2, batch: 24684, sum loss: 3726.800781, avg loss: 2.392042, ppl: 10.935799 +epoch: 2, batch: 24685, sum loss: 3192.523438, avg loss: 2.293479, ppl: 9.909357 +epoch: 2, batch: 24686, sum loss: 3923.666260, avg loss: 2.327204, ppl: 10.249247 +epoch: 2, batch: 24687, sum loss: 4037.866455, avg loss: 2.429523, ppl: 11.353459 +epoch: 2, batch: 24688, sum loss: 4426.822754, avg loss: 2.650792, ppl: 14.165255 +epoch: 2, batch: 24689, sum loss: 3816.649902, avg loss: 2.097060, ppl: 8.142200 +epoch: 2, batch: 24690, sum loss: 4025.794922, avg loss: 2.269332, ppl: 9.672936 +epoch: 2, batch: 24691, sum loss: 3926.332031, avg loss: 2.351097, ppl: 10.497080 +epoch: 2, batch: 24692, sum loss: 4626.794922, avg loss: 2.810933, ppl: 16.625416 +epoch: 2, batch: 24693, sum loss: 3523.504639, avg loss: 2.161659, ppl: 8.685537 +epoch: 2, batch: 24694, sum loss: 4242.130371, avg loss: 2.534128, ppl: 12.605436 +epoch: 2, batch: 24695, sum loss: 4422.844238, avg loss: 2.606272, ppl: 13.548454 +epoch: 2, batch: 24696, sum loss: 3979.613525, avg loss: 2.495055, ppl: 12.122406 +epoch: 2, batch: 24697, sum loss: 4083.199219, avg loss: 2.556793, ppl: 12.894404 +epoch: 2, batch: 24698, sum loss: 4523.485352, avg loss: 2.682969, ppl: 14.628459 +epoch: 2, batch: 24699, sum loss: 3842.950928, avg loss: 2.322025, ppl: 10.196300 +epoch: 2, batch: 24700, sum loss: 5105.048828, avg loss: 2.607277, ppl: 13.562077 +epoch: 2, batch: 24701, sum loss: 3841.664551, avg loss: 2.492969, ppl: 12.097134 +epoch: 2, batch: 24702, sum loss: 4187.089355, avg loss: 2.502743, ppl: 12.215960 +epoch: 2, batch: 24703, sum loss: 3463.965820, avg loss: 2.463702, ppl: 11.748228 +epoch: 2, batch: 24704, sum loss: 4018.711914, avg loss: 2.369524, ppl: 10.692296 +epoch: 2, batch: 24705, sum loss: 3226.592041, avg loss: 2.181604, ppl: 8.860504 +epoch: 2, batch: 24706, sum loss: 3958.995605, avg loss: 2.451391, ppl: 11.604471 +epoch: 2, batch: 24707, sum loss: 3738.491699, avg loss: 2.422872, ppl: 11.278207 +epoch: 2, batch: 24708, sum loss: 4667.921387, avg loss: 2.659784, ppl: 14.293206 +epoch: 2, batch: 24709, sum loss: 3516.030029, avg loss: 2.388608, ppl: 10.898308 +epoch: 2, batch: 24710, sum loss: 3652.468750, avg loss: 2.191043, ppl: 8.944536 +epoch: 2, batch: 24711, sum loss: 4486.353027, avg loss: 2.596269, ppl: 13.413600 +epoch: 2, batch: 24712, sum loss: 4383.403809, avg loss: 2.621653, ppl: 13.758449 +epoch: 2, batch: 24713, sum loss: 3757.170898, avg loss: 2.319241, ppl: 10.167956 +epoch: 2, batch: 24714, sum loss: 4052.815430, avg loss: 2.301428, ppl: 9.988439 +epoch: 2, batch: 24715, sum loss: 4011.711914, avg loss: 2.452147, ppl: 11.613251 +epoch: 2, batch: 24716, sum loss: 4734.975098, avg loss: 2.574755, ppl: 13.128106 +epoch: 2, batch: 24717, sum loss: 3976.552246, avg loss: 2.336400, ppl: 10.343926 +epoch: 2, batch: 24718, sum loss: 4815.642578, avg loss: 2.725321, ppl: 15.261316 +epoch: 2, batch: 24719, sum loss: 3762.755371, avg loss: 2.486950, ppl: 12.024548 +epoch: 2, batch: 24720, sum loss: 5075.462402, avg loss: 2.861027, ppl: 17.479473 +epoch: 2, batch: 24721, sum loss: 3624.326660, avg loss: 2.473943, ppl: 11.869155 +epoch: 2, batch: 24722, sum loss: 4321.884766, avg loss: 2.686069, ppl: 14.673876 +epoch: 2, batch: 24723, sum loss: 4348.155273, avg loss: 2.522132, ppl: 12.455122 +epoch: 2, batch: 24724, sum loss: 4637.979492, avg loss: 2.531648, ppl: 12.574213 +epoch: 2, batch: 24725, sum loss: 3535.469238, avg loss: 2.301738, ppl: 9.991530 +epoch: 2, batch: 24726, sum loss: 4060.053711, avg loss: 2.501573, ppl: 12.201676 +epoch: 2, batch: 24727, sum loss: 3415.549561, avg loss: 2.182460, ppl: 8.868093 +epoch: 2, batch: 24728, sum loss: 3984.640625, avg loss: 2.539605, ppl: 12.674665 +epoch: 2, batch: 24729, sum loss: 4021.466064, avg loss: 2.476272, ppl: 11.896832 +epoch: 2, batch: 24730, sum loss: 3709.742676, avg loss: 2.450292, ppl: 11.591735 +epoch: 2, batch: 24731, sum loss: 5285.484375, avg loss: 2.760044, ppl: 15.800540 +epoch: 2, batch: 24732, sum loss: 4381.834473, avg loss: 2.416897, ppl: 11.211020 +epoch: 2, batch: 24733, sum loss: 3227.002441, avg loss: 2.075243, ppl: 7.966480 +epoch: 2, batch: 24734, sum loss: 3721.093506, avg loss: 2.425745, ppl: 11.310658 +epoch: 2, batch: 24735, sum loss: 4251.122070, avg loss: 2.445985, ppl: 11.541914 +epoch: 2, batch: 24736, sum loss: 4035.155518, avg loss: 2.282328, ppl: 9.799466 +epoch: 2, batch: 24737, sum loss: 3892.556641, avg loss: 2.333667, ppl: 10.315701 +epoch: 2, batch: 24738, sum loss: 3950.619629, avg loss: 2.446204, ppl: 11.544443 +epoch: 2, batch: 24739, sum loss: 3734.415527, avg loss: 2.192845, ppl: 8.960673 +epoch: 2, batch: 24740, sum loss: 3934.611328, avg loss: 2.356055, ppl: 10.549251 +epoch: 2, batch: 24741, sum loss: 4333.025391, avg loss: 2.478848, ppl: 11.927510 +epoch: 2, batch: 24742, sum loss: 4014.298584, avg loss: 2.604996, ppl: 13.531171 +epoch: 2, batch: 24743, sum loss: 4398.896973, avg loss: 2.338595, ppl: 10.366660 +epoch: 2, batch: 24744, sum loss: 4056.381836, avg loss: 2.273757, ppl: 9.715832 +epoch: 2, batch: 24745, sum loss: 4274.742188, avg loss: 2.499849, ppl: 12.180658 +epoch: 2, batch: 24746, sum loss: 4241.713867, avg loss: 2.356508, ppl: 10.554030 +epoch: 2, batch: 24747, sum loss: 4085.092285, avg loss: 2.380590, ppl: 10.811279 +epoch: 2, batch: 24748, sum loss: 4744.395996, avg loss: 2.647542, ppl: 14.119297 +epoch: 2, batch: 24749, sum loss: 4743.812500, avg loss: 2.585184, ppl: 13.265731 +epoch: 2, batch: 24750, sum loss: 5001.104004, avg loss: 2.698923, ppl: 14.863713 +epoch: 2, batch: 24751, sum loss: 3865.346436, avg loss: 2.399346, ppl: 11.015971 +epoch: 2, batch: 24752, sum loss: 3904.498047, avg loss: 2.572133, ppl: 13.093724 +epoch: 2, batch: 24753, sum loss: 4301.499023, avg loss: 2.453793, ppl: 11.632385 +epoch: 2, batch: 24754, sum loss: 3950.835938, avg loss: 2.444824, ppl: 11.528522 +epoch: 2, batch: 24755, sum loss: 4228.296875, avg loss: 2.391571, ppl: 10.930651 +epoch: 2, batch: 24756, sum loss: 4430.312012, avg loss: 2.465394, ppl: 11.768112 +epoch: 2, batch: 24757, sum loss: 4030.345703, avg loss: 2.332376, ppl: 10.302391 +epoch: 2, batch: 24758, sum loss: 3385.350098, avg loss: 2.184097, ppl: 8.882623 +epoch: 2, batch: 24759, sum loss: 3769.843018, avg loss: 2.410386, ppl: 11.138255 +epoch: 2, batch: 24760, sum loss: 4082.522949, avg loss: 2.348978, ppl: 10.474854 +epoch: 2, batch: 24761, sum loss: 4168.402344, avg loss: 2.286562, ppl: 9.841045 +epoch: 2, batch: 24762, sum loss: 3385.500244, avg loss: 2.309345, ppl: 10.067830 +epoch: 2, batch: 24763, sum loss: 3816.438232, avg loss: 2.151318, ppl: 8.596181 +epoch: 2, batch: 24764, sum loss: 4071.236084, avg loss: 2.400493, ppl: 11.028611 +epoch: 2, batch: 24765, sum loss: 3656.064941, avg loss: 2.303759, ppl: 10.011744 +epoch: 2, batch: 24766, sum loss: 4866.091797, avg loss: 2.531786, ppl: 12.575943 +epoch: 2, batch: 24767, sum loss: 4062.106934, avg loss: 2.548373, ppl: 12.786286 +epoch: 2, batch: 24768, sum loss: 4263.922852, avg loss: 2.481911, ppl: 11.964106 +epoch: 2, batch: 24769, sum loss: 4251.036621, avg loss: 2.571710, ppl: 13.088184 +epoch: 2, batch: 24770, sum loss: 4429.551270, avg loss: 2.395647, ppl: 10.975297 +epoch: 2, batch: 24771, sum loss: 3741.362549, avg loss: 2.386073, ppl: 10.870722 +epoch: 2, batch: 24772, sum loss: 4890.106445, avg loss: 2.525881, ppl: 12.501908 +epoch: 2, batch: 24773, sum loss: 4071.728760, avg loss: 2.446953, ppl: 11.553086 +epoch: 2, batch: 24774, sum loss: 3709.875000, avg loss: 2.325940, ppl: 10.236301 +epoch: 2, batch: 24775, sum loss: 3965.653564, avg loss: 2.517875, ppl: 12.402216 +epoch: 2, batch: 24776, sum loss: 4636.354980, avg loss: 2.456998, ppl: 11.669724 +epoch: 2, batch: 24777, sum loss: 3481.344482, avg loss: 2.098459, ppl: 8.153600 +epoch: 2, batch: 24778, sum loss: 5167.729004, avg loss: 2.874154, ppl: 17.710432 +epoch: 2, batch: 24779, sum loss: 4269.517578, avg loss: 2.571999, ppl: 13.091964 +epoch: 2, batch: 24780, sum loss: 4281.450195, avg loss: 2.418899, ppl: 11.233479 +epoch: 2, batch: 24781, sum loss: 4232.994629, avg loss: 2.303044, ppl: 10.004588 +epoch: 2, batch: 24782, sum loss: 4813.631836, avg loss: 2.597751, ppl: 13.433488 +epoch: 2, batch: 24783, sum loss: 4600.453125, avg loss: 2.544498, ppl: 12.736838 +epoch: 2, batch: 24784, sum loss: 4014.926270, avg loss: 2.385577, ppl: 10.865330 +epoch: 2, batch: 24785, sum loss: 4218.662598, avg loss: 2.552125, ppl: 12.834348 +epoch: 2, batch: 24786, sum loss: 4487.014160, avg loss: 2.422794, ppl: 11.277323 +epoch: 2, batch: 24787, sum loss: 3750.374023, avg loss: 2.527206, ppl: 12.518483 +epoch: 2, batch: 24788, sum loss: 3753.671387, avg loss: 2.243677, ppl: 9.427933 +epoch: 2, batch: 24789, sum loss: 4943.536133, avg loss: 2.672182, ppl: 14.471509 +epoch: 2, batch: 24790, sum loss: 4345.507812, avg loss: 2.719342, ppl: 15.170329 +epoch: 2, batch: 24791, sum loss: 4124.072754, avg loss: 2.315594, ppl: 10.130939 +epoch: 2, batch: 24792, sum loss: 4519.896484, avg loss: 2.576908, ppl: 13.156394 +epoch: 2, batch: 24793, sum loss: 3443.289307, avg loss: 2.183443, ppl: 8.876815 +epoch: 2, batch: 24794, sum loss: 3347.138672, avg loss: 2.199171, ppl: 9.017538 +epoch: 2, batch: 24795, sum loss: 5018.464355, avg loss: 2.760432, ppl: 15.806663 +epoch: 2, batch: 24796, sum loss: 4175.713379, avg loss: 2.420704, ppl: 11.253776 +epoch: 2, batch: 24797, sum loss: 3989.997070, avg loss: 2.539782, ppl: 12.676905 +epoch: 2, batch: 24798, sum loss: 3745.166504, avg loss: 2.397674, ppl: 10.997564 +epoch: 2, batch: 24799, sum loss: 4703.862305, avg loss: 2.678737, ppl: 14.566687 +epoch: 2, batch: 24800, sum loss: 4415.500977, avg loss: 2.651953, ppl: 14.181702 +epoch: 2, batch: 24801, sum loss: 3763.819580, avg loss: 2.415802, ppl: 11.198749 +epoch: 2, batch: 24802, sum loss: 4296.849609, avg loss: 2.588464, ppl: 13.309310 +epoch: 2, batch: 24803, sum loss: 4654.908203, avg loss: 2.509384, ppl: 12.297358 +epoch: 2, batch: 24804, sum loss: 4262.418945, avg loss: 2.680767, ppl: 14.596278 +epoch: 2, batch: 24805, sum loss: 3671.460205, avg loss: 2.328130, ppl: 10.258735 +epoch: 2, batch: 24806, sum loss: 4572.954590, avg loss: 2.561879, ppl: 12.960152 +epoch: 2, batch: 24807, sum loss: 3670.778564, avg loss: 2.388275, ppl: 10.894684 +epoch: 2, batch: 24808, sum loss: 4837.672363, avg loss: 2.544804, ppl: 12.740732 +epoch: 2, batch: 24809, sum loss: 3825.833984, avg loss: 2.500545, ppl: 12.189138 +epoch: 2, batch: 24810, sum loss: 3570.637451, avg loss: 2.406090, ppl: 11.090510 +epoch: 2, batch: 24811, sum loss: 3702.972412, avg loss: 2.635568, ppl: 13.951230 +epoch: 2, batch: 24812, sum loss: 4298.750488, avg loss: 2.323649, ppl: 10.212873 +epoch: 2, batch: 24813, sum loss: 3919.124023, avg loss: 2.295913, ppl: 9.933503 +epoch: 2, batch: 24814, sum loss: 5033.285645, avg loss: 2.647704, ppl: 14.121580 +epoch: 2, batch: 24815, sum loss: 4001.168457, avg loss: 2.457720, ppl: 11.678156 +epoch: 2, batch: 24816, sum loss: 3763.072998, avg loss: 2.407596, ppl: 11.107231 +epoch: 2, batch: 24817, sum loss: 3597.105957, avg loss: 2.326718, ppl: 10.244266 +epoch: 2, batch: 24818, sum loss: 4200.618164, avg loss: 2.567615, ppl: 13.034700 +epoch: 2, batch: 24819, sum loss: 3696.793701, avg loss: 2.296145, ppl: 9.935808 +epoch: 2, batch: 24820, sum loss: 4954.621094, avg loss: 2.549985, ppl: 12.806914 +epoch: 2, batch: 24821, sum loss: 3908.486084, avg loss: 2.377425, ppl: 10.777113 +epoch: 2, batch: 24822, sum loss: 5128.397949, avg loss: 2.609872, ppl: 13.597305 +epoch: 2, batch: 24823, sum loss: 5459.783203, avg loss: 2.866028, ppl: 17.567101 +epoch: 2, batch: 24824, sum loss: 5712.485840, avg loss: 2.692029, ppl: 14.761597 +epoch: 2, batch: 24825, sum loss: 4111.104492, avg loss: 2.445630, ppl: 11.537820 +epoch: 2, batch: 24826, sum loss: 4108.136719, avg loss: 2.304059, ppl: 10.014748 +epoch: 2, batch: 24827, sum loss: 3850.118652, avg loss: 2.483948, ppl: 11.988496 +epoch: 2, batch: 24828, sum loss: 4316.908691, avg loss: 2.646786, ppl: 14.108627 +epoch: 2, batch: 24829, sum loss: 4633.692383, avg loss: 2.711347, ppl: 15.049539 +epoch: 2, batch: 24830, sum loss: 3810.661377, avg loss: 2.386137, ppl: 10.871422 +epoch: 2, batch: 24831, sum loss: 4018.012695, avg loss: 2.416123, ppl: 11.202345 +epoch: 2, batch: 24832, sum loss: 4212.934082, avg loss: 2.396436, ppl: 10.983957 +epoch: 2, batch: 24833, sum loss: 4699.328613, avg loss: 2.530602, ppl: 12.561069 +epoch: 2, batch: 24834, sum loss: 4466.493164, avg loss: 2.512089, ppl: 12.330656 +epoch: 2, batch: 24835, sum loss: 3969.565918, avg loss: 2.509207, ppl: 12.295179 +epoch: 2, batch: 24836, sum loss: 4074.345215, avg loss: 2.444118, ppl: 11.520387 +epoch: 2, batch: 24837, sum loss: 3841.859619, avg loss: 2.317165, ppl: 10.146869 +epoch: 2, batch: 24838, sum loss: 4111.904297, avg loss: 2.502681, ppl: 12.215194 +epoch: 2, batch: 24839, sum loss: 3908.282959, avg loss: 2.579725, ppl: 13.193506 +epoch: 2, batch: 24840, sum loss: 3916.539062, avg loss: 2.486691, ppl: 12.021437 +epoch: 2, batch: 24841, sum loss: 3939.761719, avg loss: 2.411115, ppl: 11.146381 +epoch: 2, batch: 24842, sum loss: 4385.144043, avg loss: 2.550986, ppl: 12.819733 +epoch: 2, batch: 24843, sum loss: 3248.766113, avg loss: 2.506764, ppl: 12.265175 +epoch: 2, batch: 24844, sum loss: 4102.101562, avg loss: 2.453410, ppl: 11.627933 +epoch: 2, batch: 24845, sum loss: 2952.544922, avg loss: 2.274688, ppl: 9.724882 +epoch: 2, batch: 24846, sum loss: 3140.917969, avg loss: 2.175151, ppl: 8.803514 +epoch: 2, batch: 24847, sum loss: 4590.295898, avg loss: 2.562979, ppl: 12.974414 +epoch: 2, batch: 24848, sum loss: 4083.312012, avg loss: 2.462794, ppl: 11.737558 +epoch: 2, batch: 24849, sum loss: 4428.583984, avg loss: 2.469930, ppl: 11.821616 +epoch: 2, batch: 24850, sum loss: 4778.433594, avg loss: 2.566291, ppl: 13.017451 +epoch: 2, batch: 24851, sum loss: 4667.379883, avg loss: 2.581515, ppl: 13.217151 +epoch: 2, batch: 24852, sum loss: 4272.098145, avg loss: 2.493928, ppl: 12.108743 +epoch: 2, batch: 24853, sum loss: 4576.199707, avg loss: 2.652869, ppl: 14.194712 +epoch: 2, batch: 24854, sum loss: 3384.705811, avg loss: 2.348859, ppl: 10.473614 +epoch: 2, batch: 24855, sum loss: 3923.121582, avg loss: 2.300951, ppl: 9.983675 +epoch: 2, batch: 24856, sum loss: 3809.841553, avg loss: 2.443773, ppl: 11.516408 +epoch: 2, batch: 24857, sum loss: 4267.458008, avg loss: 2.652242, ppl: 14.185814 +epoch: 2, batch: 24858, sum loss: 4404.710449, avg loss: 2.534356, ppl: 12.608307 +epoch: 2, batch: 24859, sum loss: 3753.421875, avg loss: 2.484065, ppl: 11.989902 +epoch: 2, batch: 24860, sum loss: 4211.297852, avg loss: 2.477234, ppl: 11.908282 +epoch: 2, batch: 24861, sum loss: 3783.401367, avg loss: 2.358729, ppl: 10.577500 +epoch: 2, batch: 24862, sum loss: 4433.666016, avg loss: 2.601917, ppl: 13.489570 +epoch: 2, batch: 24863, sum loss: 4086.791260, avg loss: 2.356858, ppl: 10.557722 +epoch: 2, batch: 24864, sum loss: 4767.615234, avg loss: 2.610961, ppl: 13.612132 +epoch: 2, batch: 24865, sum loss: 3959.140381, avg loss: 2.533039, ppl: 12.591719 +epoch: 2, batch: 24866, sum loss: 4519.211914, avg loss: 2.560460, ppl: 12.941771 +epoch: 2, batch: 24867, sum loss: 4432.775879, avg loss: 2.401287, ppl: 11.037374 +epoch: 2, batch: 24868, sum loss: 4025.612305, avg loss: 2.511299, ppl: 12.320923 +epoch: 2, batch: 24869, sum loss: 3386.388428, avg loss: 2.286555, ppl: 9.840980 +epoch: 2, batch: 24870, sum loss: 4406.926758, avg loss: 2.532717, ppl: 12.587654 +epoch: 2, batch: 24871, sum loss: 4820.743652, avg loss: 2.556068, ppl: 12.885050 +epoch: 2, batch: 24872, sum loss: 3911.324219, avg loss: 2.279326, ppl: 9.770098 +epoch: 2, batch: 24873, sum loss: 3935.928467, avg loss: 2.410244, ppl: 11.136678 +epoch: 2, batch: 24874, sum loss: 3947.468994, avg loss: 2.496818, ppl: 12.143788 +epoch: 2, batch: 24875, sum loss: 5491.826172, avg loss: 2.867794, ppl: 17.598158 +epoch: 2, batch: 24876, sum loss: 3347.481445, avg loss: 2.137600, ppl: 8.479063 +epoch: 2, batch: 24877, sum loss: 4839.940430, avg loss: 2.623274, ppl: 13.780766 +epoch: 2, batch: 24878, sum loss: 4673.979980, avg loss: 2.764033, ppl: 15.863697 +epoch: 2, batch: 24879, sum loss: 3901.748535, avg loss: 2.259264, ppl: 9.576036 +epoch: 2, batch: 24880, sum loss: 3743.513916, avg loss: 2.220352, ppl: 9.210576 +epoch: 2, batch: 24881, sum loss: 4241.585449, avg loss: 2.489193, ppl: 12.051552 +epoch: 2, batch: 24882, sum loss: 4739.329102, avg loss: 2.614081, ppl: 13.654667 +epoch: 2, batch: 24883, sum loss: 4271.932617, avg loss: 2.521802, ppl: 12.451013 +epoch: 2, batch: 24884, sum loss: 3687.131592, avg loss: 2.349988, ppl: 10.485446 +epoch: 2, batch: 24885, sum loss: 3805.124756, avg loss: 2.415952, ppl: 11.200431 +epoch: 2, batch: 24886, sum loss: 4069.086670, avg loss: 2.568868, ppl: 13.051042 +epoch: 2, batch: 24887, sum loss: 4523.045898, avg loss: 2.971778, ppl: 19.526609 +epoch: 2, batch: 24888, sum loss: 4337.639160, avg loss: 2.602063, ppl: 13.491542 +epoch: 2, batch: 24889, sum loss: 4542.392578, avg loss: 2.556214, ppl: 12.886936 +epoch: 2, batch: 24890, sum loss: 4987.094727, avg loss: 2.668322, ppl: 14.415764 +epoch: 2, batch: 24891, sum loss: 3967.823242, avg loss: 2.452301, ppl: 11.615043 +epoch: 2, batch: 24892, sum loss: 4158.552246, avg loss: 2.381760, ppl: 10.823933 +epoch: 2, batch: 24893, sum loss: 4512.783203, avg loss: 2.572852, ppl: 13.103147 +epoch: 2, batch: 24894, sum loss: 3559.849854, avg loss: 2.326699, ppl: 10.244073 +epoch: 2, batch: 24895, sum loss: 4174.030273, avg loss: 2.401628, ppl: 11.041142 +epoch: 2, batch: 24896, sum loss: 3941.561768, avg loss: 2.576184, ppl: 13.146877 +epoch: 2, batch: 24897, sum loss: 5196.875488, avg loss: 2.689894, ppl: 14.730118 +epoch: 2, batch: 24898, sum loss: 4231.974121, avg loss: 2.733834, ppl: 15.391779 +epoch: 2, batch: 24899, sum loss: 4987.749023, avg loss: 2.855036, ppl: 17.375071 +epoch: 2, batch: 24900, sum loss: 3591.188721, avg loss: 2.324394, ppl: 10.220487 +epoch: 2, batch: 24901, sum loss: 3251.035156, avg loss: 2.127641, ppl: 8.395038 +epoch: 2, batch: 24902, sum loss: 3787.095947, avg loss: 2.533175, ppl: 12.593421 +epoch: 2, batch: 24903, sum loss: 4137.395508, avg loss: 2.566622, ppl: 13.021769 +epoch: 2, batch: 24904, sum loss: 4524.927246, avg loss: 2.597547, ppl: 13.430753 +epoch: 2, batch: 24905, sum loss: 3939.691162, avg loss: 2.418472, ppl: 11.228689 +epoch: 2, batch: 24906, sum loss: 3667.304443, avg loss: 2.427071, ppl: 11.325662 +epoch: 2, batch: 24907, sum loss: 4268.289551, avg loss: 2.362086, ppl: 10.613068 +epoch: 2, batch: 24908, sum loss: 5102.820312, avg loss: 2.614150, ppl: 13.655602 +epoch: 2, batch: 24909, sum loss: 4030.335938, avg loss: 2.413375, ppl: 11.171600 +epoch: 2, batch: 24910, sum loss: 4605.635742, avg loss: 2.575859, ppl: 13.142600 +epoch: 2, batch: 24911, sum loss: 3678.090820, avg loss: 2.365332, ppl: 10.647572 +epoch: 2, batch: 24912, sum loss: 4088.086182, avg loss: 2.364422, ppl: 10.637892 +epoch: 2, batch: 24913, sum loss: 3604.327637, avg loss: 2.266873, ppl: 9.649178 +epoch: 2, batch: 24914, sum loss: 3777.543457, avg loss: 2.301976, ppl: 9.993916 +epoch: 2, batch: 24915, sum loss: 3496.625000, avg loss: 2.312583, ppl: 10.100478 +epoch: 2, batch: 24916, sum loss: 3639.371582, avg loss: 2.329943, ppl: 10.277360 +epoch: 2, batch: 24917, sum loss: 4003.999023, avg loss: 2.301149, ppl: 9.985648 +epoch: 2, batch: 24918, sum loss: 4717.883789, avg loss: 2.561283, ppl: 12.952429 +epoch: 2, batch: 24919, sum loss: 3752.395020, avg loss: 2.468681, ppl: 11.806865 +epoch: 2, batch: 24920, sum loss: 3967.084473, avg loss: 2.464028, ppl: 11.752049 +epoch: 2, batch: 24921, sum loss: 3754.381348, avg loss: 2.545343, ppl: 12.747605 +epoch: 2, batch: 24922, sum loss: 4200.626465, avg loss: 2.378611, ppl: 10.789901 +epoch: 2, batch: 24923, sum loss: 3877.077148, avg loss: 2.569302, ppl: 13.056712 +epoch: 2, batch: 24924, sum loss: 3896.906738, avg loss: 2.290950, ppl: 9.884326 +epoch: 2, batch: 24925, sum loss: 4123.594727, avg loss: 2.531366, ppl: 12.570662 +epoch: 2, batch: 24926, sum loss: 4356.656250, avg loss: 2.513939, ppl: 12.353494 +epoch: 2, batch: 24927, sum loss: 3732.331787, avg loss: 2.337090, ppl: 10.351067 +epoch: 2, batch: 24928, sum loss: 3371.261963, avg loss: 2.334669, ppl: 10.326040 +epoch: 2, batch: 24929, sum loss: 3813.616455, avg loss: 2.328215, ppl: 10.259613 +epoch: 2, batch: 24930, sum loss: 2885.937988, avg loss: 2.238897, ppl: 9.382975 +epoch: 2, batch: 24931, sum loss: 3558.911621, avg loss: 2.442630, ppl: 11.503252 +epoch: 2, batch: 24932, sum loss: 3844.787598, avg loss: 2.389551, ppl: 10.908594 +epoch: 2, batch: 24933, sum loss: 4964.375977, avg loss: 2.558957, ppl: 12.922331 +epoch: 2, batch: 24934, sum loss: 4830.306152, avg loss: 2.487284, ppl: 12.028567 +epoch: 2, batch: 24935, sum loss: 3804.364746, avg loss: 2.486513, ppl: 12.019291 +epoch: 2, batch: 24936, sum loss: 4133.226562, avg loss: 2.438482, ppl: 11.455636 +epoch: 2, batch: 24937, sum loss: 3807.415039, avg loss: 2.388592, ppl: 10.898133 +epoch: 2, batch: 24938, sum loss: 4390.509766, avg loss: 2.377104, ppl: 10.773652 +epoch: 2, batch: 24939, sum loss: 4362.016113, avg loss: 2.436880, ppl: 11.437304 +epoch: 2, batch: 24940, sum loss: 3737.531494, avg loss: 2.496681, ppl: 12.142127 +epoch: 2, batch: 24941, sum loss: 3932.212402, avg loss: 2.232943, ppl: 9.327274 +epoch: 2, batch: 24942, sum loss: 5089.302734, avg loss: 2.608561, ppl: 13.579499 +epoch: 2, batch: 24943, sum loss: 3856.304199, avg loss: 2.537042, ppl: 12.642224 +epoch: 2, batch: 24944, sum loss: 4032.839600, avg loss: 2.362530, ppl: 10.617785 +epoch: 2, batch: 24945, sum loss: 3703.623291, avg loss: 2.431795, ppl: 11.379286 +epoch: 2, batch: 24946, sum loss: 3669.299805, avg loss: 2.513219, ppl: 12.344604 +epoch: 2, batch: 24947, sum loss: 3364.640869, avg loss: 2.128172, ppl: 8.399502 +epoch: 2, batch: 24948, sum loss: 3988.984375, avg loss: 2.296479, ppl: 9.939127 +epoch: 2, batch: 24949, sum loss: 4306.833008, avg loss: 2.426385, ppl: 11.317890 +epoch: 2, batch: 24950, sum loss: 4232.882812, avg loss: 2.624230, ppl: 13.793947 +epoch: 2, batch: 24951, sum loss: 4380.039062, avg loss: 2.372719, ppl: 10.726516 +epoch: 2, batch: 24952, sum loss: 4222.425293, avg loss: 2.454898, ppl: 11.645249 +epoch: 2, batch: 24953, sum loss: 4463.075195, avg loss: 2.430869, ppl: 11.368755 +epoch: 2, batch: 24954, sum loss: 4344.026855, avg loss: 2.355763, ppl: 10.546172 +epoch: 2, batch: 24955, sum loss: 4570.526855, avg loss: 2.638872, ppl: 13.997411 +epoch: 2, batch: 24956, sum loss: 3657.608643, avg loss: 2.370453, ppl: 10.702238 +epoch: 2, batch: 24957, sum loss: 3873.960449, avg loss: 2.605219, ppl: 13.534187 +epoch: 2, batch: 24958, sum loss: 4660.812500, avg loss: 2.598000, ppl: 13.436841 +epoch: 2, batch: 24959, sum loss: 4493.849609, avg loss: 2.700631, ppl: 14.889122 +epoch: 2, batch: 24960, sum loss: 4254.005859, avg loss: 2.533654, ppl: 12.599466 +epoch: 2, batch: 24961, sum loss: 4117.108398, avg loss: 2.400646, ppl: 11.030302 +epoch: 2, batch: 24962, sum loss: 4638.054688, avg loss: 2.565296, ppl: 13.004503 +epoch: 2, batch: 24963, sum loss: 3883.469971, avg loss: 2.566735, ppl: 13.023234 +epoch: 2, batch: 24964, sum loss: 5053.067871, avg loss: 2.709420, ppl: 15.020557 +epoch: 2, batch: 24965, sum loss: 4203.206055, avg loss: 2.516890, ppl: 12.390001 +epoch: 2, batch: 24966, sum loss: 4282.396484, avg loss: 2.669823, ppl: 14.437420 +epoch: 2, batch: 24967, sum loss: 3511.666504, avg loss: 2.395407, ppl: 10.972663 +epoch: 2, batch: 24968, sum loss: 3588.812012, avg loss: 2.368853, ppl: 10.685128 +epoch: 2, batch: 24969, sum loss: 4996.939941, avg loss: 2.498470, ppl: 12.163870 +epoch: 2, batch: 24970, sum loss: 3767.097656, avg loss: 2.471849, ppl: 11.844326 +epoch: 2, batch: 24971, sum loss: 3715.348145, avg loss: 2.462126, ppl: 11.729723 +epoch: 2, batch: 24972, sum loss: 4394.795898, avg loss: 2.341394, ppl: 10.395715 +epoch: 2, batch: 24973, sum loss: 3666.754395, avg loss: 2.322200, ppl: 10.198089 +epoch: 2, batch: 24974, sum loss: 5452.636719, avg loss: 2.865285, ppl: 17.554050 +epoch: 2, batch: 24975, sum loss: 4918.447754, avg loss: 2.634412, ppl: 13.935121 +epoch: 2, batch: 24976, sum loss: 3437.117188, avg loss: 2.317679, ppl: 10.152081 +epoch: 2, batch: 24977, sum loss: 4328.225586, avg loss: 2.608937, ppl: 13.584597 +epoch: 2, batch: 24978, sum loss: 3741.329590, avg loss: 2.264727, ppl: 9.628499 +epoch: 2, batch: 24979, sum loss: 4612.312500, avg loss: 2.637114, ppl: 13.972820 +epoch: 2, batch: 24980, sum loss: 4249.210449, avg loss: 2.490745, ppl: 12.070260 +epoch: 2, batch: 24981, sum loss: 3531.531250, avg loss: 2.185354, ppl: 8.893792 +epoch: 2, batch: 24982, sum loss: 4444.983398, avg loss: 2.538540, ppl: 12.661174 +epoch: 2, batch: 24983, sum loss: 4431.941406, avg loss: 2.518148, ppl: 12.405605 +epoch: 2, batch: 24984, sum loss: 4644.569336, avg loss: 2.411511, ppl: 11.150794 +epoch: 2, batch: 24985, sum loss: 3659.886963, avg loss: 2.281725, ppl: 9.793561 +epoch: 2, batch: 24986, sum loss: 3985.452637, avg loss: 2.361050, ppl: 10.602079 +epoch: 2, batch: 24987, sum loss: 4280.889648, avg loss: 2.528582, ppl: 12.535722 +epoch: 2, batch: 24988, sum loss: 4882.708984, avg loss: 2.464770, ppl: 11.760772 +epoch: 2, batch: 24989, sum loss: 4411.038086, avg loss: 2.442435, ppl: 11.501015 +epoch: 2, batch: 24990, sum loss: 3785.380371, avg loss: 2.398847, ppl: 11.010472 +epoch: 2, batch: 24991, sum loss: 3663.173340, avg loss: 2.163717, ppl: 8.703430 +epoch: 2, batch: 24992, sum loss: 5109.601562, avg loss: 2.778467, ppl: 16.094336 +epoch: 2, batch: 24993, sum loss: 3289.401611, avg loss: 2.181301, ppl: 8.857821 +epoch: 2, batch: 24994, sum loss: 4819.864258, avg loss: 2.618069, ppl: 13.709221 +epoch: 2, batch: 24995, sum loss: 4949.148438, avg loss: 2.626937, ppl: 13.831335 +epoch: 2, batch: 24996, sum loss: 5542.967285, avg loss: 2.643284, ppl: 14.059303 +epoch: 2, batch: 24997, sum loss: 4007.357910, avg loss: 2.421364, ppl: 11.261212 +epoch: 2, batch: 24998, sum loss: 3640.716797, avg loss: 2.381110, ppl: 10.816902 +epoch: 2, batch: 24999, sum loss: 3499.041992, avg loss: 2.372232, ppl: 10.721295 +epoch: 2, batch: 25000, sum loss: 4161.553223, avg loss: 2.442226, ppl: 11.498610 +epoch: 2, batch: 25001, sum loss: 3635.614990, avg loss: 2.312732, ppl: 10.101988 +epoch: 2, batch: 25002, sum loss: 4147.816895, avg loss: 2.444206, ppl: 11.521396 +epoch: 2, batch: 25003, sum loss: 4322.115723, avg loss: 2.380020, ppl: 10.805116 +epoch: 2, batch: 25004, sum loss: 3548.578613, avg loss: 2.448985, ppl: 11.576586 +epoch: 2, batch: 25005, sum loss: 4581.585938, avg loss: 2.573925, ppl: 13.117206 +epoch: 2, batch: 25006, sum loss: 3793.169189, avg loss: 2.394677, ppl: 10.964661 +epoch: 2, batch: 25007, sum loss: 3516.185547, avg loss: 2.441796, ppl: 11.493660 +epoch: 2, batch: 25008, sum loss: 4047.092773, avg loss: 2.426315, ppl: 11.317097 +epoch: 2, batch: 25009, sum loss: 4014.559814, avg loss: 2.362896, ppl: 10.621664 +epoch: 2, batch: 25010, sum loss: 4525.323242, avg loss: 2.435588, ppl: 11.422537 +epoch: 2, batch: 25011, sum loss: 4716.647949, avg loss: 2.699856, ppl: 14.877583 +epoch: 2, batch: 25012, sum loss: 4518.027832, avg loss: 2.396832, ppl: 10.988308 +epoch: 2, batch: 25013, sum loss: 5188.360840, avg loss: 2.893676, ppl: 18.059576 +epoch: 2, batch: 25014, sum loss: 4187.004883, avg loss: 2.448541, ppl: 11.571451 +epoch: 2, batch: 25015, sum loss: 4105.962402, avg loss: 2.398342, ppl: 11.004919 +epoch: 2, batch: 25016, sum loss: 4365.152832, avg loss: 2.607618, ppl: 13.566701 +epoch: 2, batch: 25017, sum loss: 4573.443359, avg loss: 2.625398, ppl: 13.810068 +epoch: 2, batch: 25018, sum loss: 3491.741699, avg loss: 2.372107, ppl: 10.719958 +epoch: 2, batch: 25019, sum loss: 3518.032959, avg loss: 2.218180, ppl: 9.190586 +epoch: 2, batch: 25020, sum loss: 4041.288574, avg loss: 2.777518, ppl: 16.079060 +epoch: 2, batch: 25021, sum loss: 5019.190918, avg loss: 2.704305, ppl: 14.943933 +epoch: 2, batch: 25022, sum loss: 4471.682129, avg loss: 2.376027, ppl: 10.762056 +epoch: 2, batch: 25023, sum loss: 4279.131348, avg loss: 2.550138, ppl: 12.808868 +epoch: 2, batch: 25024, sum loss: 3399.104004, avg loss: 2.328153, ppl: 10.258980 +epoch: 2, batch: 25025, sum loss: 3412.906982, avg loss: 2.370074, ppl: 10.698187 +epoch: 2, batch: 25026, sum loss: 2727.626221, avg loss: 1.937235, ppl: 6.939534 +epoch: 2, batch: 25027, sum loss: 4689.328613, avg loss: 2.665906, ppl: 14.380976 +epoch: 2, batch: 25028, sum loss: 4046.662598, avg loss: 2.468982, ppl: 11.810423 +epoch: 2, batch: 25029, sum loss: 4239.700195, avg loss: 2.567959, ppl: 13.039186 +epoch: 2, batch: 25030, sum loss: 4168.699219, avg loss: 2.411047, ppl: 11.145619 +epoch: 2, batch: 25031, sum loss: 3880.438721, avg loss: 2.503509, ppl: 12.225318 +epoch: 2, batch: 25032, sum loss: 4161.746094, avg loss: 2.513132, ppl: 12.343525 +epoch: 2, batch: 25033, sum loss: 4627.880859, avg loss: 2.446026, ppl: 11.542384 +epoch: 2, batch: 25034, sum loss: 4808.918945, avg loss: 2.805670, ppl: 16.538157 +epoch: 2, batch: 25035, sum loss: 4884.174316, avg loss: 2.478018, ppl: 11.917624 +epoch: 2, batch: 25036, sum loss: 4836.373535, avg loss: 2.669080, ppl: 14.426694 +epoch: 2, batch: 25037, sum loss: 4670.930664, avg loss: 2.551027, ppl: 12.820268 +epoch: 2, batch: 25038, sum loss: 4006.622803, avg loss: 2.714514, ppl: 15.097275 +epoch: 2, batch: 25039, sum loss: 3690.460938, avg loss: 2.371762, ppl: 10.716253 +epoch: 2, batch: 25040, sum loss: 4417.598145, avg loss: 2.592487, ppl: 13.362966 +epoch: 2, batch: 25041, sum loss: 4535.003906, avg loss: 2.632040, ppl: 13.902095 +epoch: 2, batch: 25042, sum loss: 5089.791504, avg loss: 2.833960, ppl: 17.012695 +epoch: 2, batch: 25043, sum loss: 4390.812500, avg loss: 2.670810, ppl: 14.451677 +epoch: 2, batch: 25044, sum loss: 4391.974121, avg loss: 2.665033, ppl: 14.368422 +epoch: 2, batch: 25045, sum loss: 4870.526367, avg loss: 2.875163, ppl: 17.728315 +epoch: 2, batch: 25046, sum loss: 5116.235840, avg loss: 2.749187, ppl: 15.629912 +epoch: 2, batch: 25047, sum loss: 4501.759277, avg loss: 2.700515, ppl: 14.887401 +epoch: 2, batch: 25048, sum loss: 4018.320557, avg loss: 2.503626, ppl: 12.226752 +epoch: 2, batch: 25049, sum loss: 3716.867920, avg loss: 2.387198, ppl: 10.882962 +epoch: 2, batch: 25050, sum loss: 3836.760254, avg loss: 2.473733, ppl: 11.866665 +epoch: 2, batch: 25051, sum loss: 3429.181885, avg loss: 2.244229, ppl: 9.433141 +epoch: 2, batch: 25052, sum loss: 3449.226074, avg loss: 2.147712, ppl: 8.565241 +epoch: 2, batch: 25053, sum loss: 3992.623047, avg loss: 2.470683, ppl: 11.830520 +epoch: 2, batch: 25054, sum loss: 3735.594238, avg loss: 2.433612, ppl: 11.399983 +epoch: 2, batch: 25055, sum loss: 3779.943115, avg loss: 2.352174, ppl: 10.508389 +epoch: 2, batch: 25056, sum loss: 4439.752441, avg loss: 2.518294, ppl: 12.407413 +epoch: 2, batch: 25057, sum loss: 3576.848633, avg loss: 2.225793, ppl: 9.260820 +epoch: 2, batch: 25058, sum loss: 3972.935547, avg loss: 2.139437, ppl: 8.494658 +epoch: 2, batch: 25059, sum loss: 4328.230957, avg loss: 2.597978, ppl: 13.436537 +epoch: 2, batch: 25060, sum loss: 4669.225098, avg loss: 2.654477, ppl: 14.217550 +epoch: 2, batch: 25061, sum loss: 4236.403809, avg loss: 2.437517, ppl: 11.444585 +epoch: 2, batch: 25062, sum loss: 4874.843262, avg loss: 2.618068, ppl: 13.709218 +epoch: 2, batch: 25063, sum loss: 3497.075684, avg loss: 2.195277, ppl: 8.982487 +epoch: 2, batch: 25064, sum loss: 4292.717773, avg loss: 2.640048, ppl: 14.013873 +epoch: 2, batch: 25065, sum loss: 3943.608398, avg loss: 2.461678, ppl: 11.724469 +epoch: 2, batch: 25066, sum loss: 4103.046387, avg loss: 2.437936, ppl: 11.449386 +epoch: 2, batch: 25067, sum loss: 4170.405273, avg loss: 2.299011, ppl: 9.964320 +epoch: 2, batch: 25068, sum loss: 3888.902588, avg loss: 2.504123, ppl: 12.232822 +epoch: 2, batch: 25069, sum loss: 4487.488770, avg loss: 2.568683, ppl: 13.048624 +epoch: 2, batch: 25070, sum loss: 4147.449707, avg loss: 2.376762, ppl: 10.769972 +epoch: 2, batch: 25071, sum loss: 4066.374268, avg loss: 2.595006, ppl: 13.396667 +epoch: 2, batch: 25072, sum loss: 4354.616699, avg loss: 2.699700, ppl: 14.875274 +epoch: 2, batch: 25073, sum loss: 3875.394043, avg loss: 2.459006, ppl: 11.693187 +epoch: 2, batch: 25074, sum loss: 4838.297852, avg loss: 2.690933, ppl: 14.745427 +epoch: 2, batch: 25075, sum loss: 4902.035645, avg loss: 2.705318, ppl: 14.959069 +epoch: 2, batch: 25076, sum loss: 3534.971191, avg loss: 2.337944, ppl: 10.359915 +epoch: 2, batch: 25077, sum loss: 3946.919922, avg loss: 2.200067, ppl: 9.025617 +epoch: 2, batch: 25078, sum loss: 3285.991211, avg loss: 2.261522, ppl: 9.597684 +epoch: 2, batch: 25079, sum loss: 4827.844727, avg loss: 2.721446, ppl: 15.202282 +epoch: 2, batch: 25080, sum loss: 4925.010742, avg loss: 2.660730, ppl: 14.306727 +epoch: 2, batch: 25081, sum loss: 4647.551758, avg loss: 2.589165, ppl: 13.318652 +epoch: 2, batch: 25082, sum loss: 4885.563477, avg loss: 2.584954, ppl: 13.262683 +epoch: 2, batch: 25083, sum loss: 3886.977539, avg loss: 2.464792, ppl: 11.761042 +epoch: 2, batch: 25084, sum loss: 3470.409424, avg loss: 2.134323, ppl: 8.451324 +epoch: 2, batch: 25085, sum loss: 3671.968994, avg loss: 2.224088, ppl: 9.245047 +epoch: 2, batch: 25086, sum loss: 4233.016113, avg loss: 2.438373, ppl: 11.454393 +epoch: 2, batch: 25087, sum loss: 4476.824707, avg loss: 2.535008, ppl: 12.616534 +epoch: 2, batch: 25088, sum loss: 4649.195312, avg loss: 2.535003, ppl: 12.616465 +epoch: 2, batch: 25089, sum loss: 5898.572266, avg loss: 2.878757, ppl: 17.792135 +epoch: 2, batch: 25090, sum loss: 4612.804688, avg loss: 2.600228, ppl: 13.466809 +epoch: 2, batch: 25091, sum loss: 3645.836670, avg loss: 2.451807, ppl: 11.609303 +epoch: 2, batch: 25092, sum loss: 3894.087646, avg loss: 2.531917, ppl: 12.577589 +epoch: 2, batch: 25093, sum loss: 4121.074219, avg loss: 2.469188, ppl: 11.812848 +epoch: 2, batch: 25094, sum loss: 3719.510742, avg loss: 2.421556, ppl: 11.263377 +epoch: 2, batch: 25095, sum loss: 3990.965820, avg loss: 2.511621, ppl: 12.324892 +epoch: 2, batch: 25096, sum loss: 3498.712891, avg loss: 2.318564, ppl: 10.161072 +epoch: 2, batch: 25097, sum loss: 3595.153076, avg loss: 2.307544, ppl: 10.049709 +epoch: 2, batch: 25098, sum loss: 4837.417969, avg loss: 2.728380, ppl: 15.308071 +epoch: 2, batch: 25099, sum loss: 4065.300293, avg loss: 2.382943, ppl: 10.836745 +epoch: 2, batch: 25100, sum loss: 4035.131348, avg loss: 2.362489, ppl: 10.617345 +epoch: 2, batch: 25101, sum loss: 4985.847656, avg loss: 2.610391, ppl: 13.604374 +epoch: 2, batch: 25102, sum loss: 4909.010254, avg loss: 2.587776, ppl: 13.300156 +epoch: 2, batch: 25103, sum loss: 4168.344238, avg loss: 2.376479, ppl: 10.766925 +epoch: 2, batch: 25104, sum loss: 3555.795410, avg loss: 2.250504, ppl: 9.492515 +epoch: 2, batch: 25105, sum loss: 4437.946777, avg loss: 2.688036, ppl: 14.702767 +epoch: 2, batch: 25106, sum loss: 3498.241211, avg loss: 2.321328, ppl: 10.189196 +epoch: 2, batch: 25107, sum loss: 4122.830566, avg loss: 2.403983, ppl: 11.067165 +epoch: 2, batch: 25108, sum loss: 3342.182129, avg loss: 2.430678, ppl: 11.366585 +epoch: 2, batch: 25109, sum loss: 4484.921387, avg loss: 2.577541, ppl: 13.164728 +epoch: 2, batch: 25110, sum loss: 3853.177246, avg loss: 2.484318, ppl: 11.992938 +epoch: 2, batch: 25111, sum loss: 3538.010254, avg loss: 2.088554, ppl: 8.073232 +epoch: 2, batch: 25112, sum loss: 4123.335449, avg loss: 2.603116, ppl: 13.505754 +epoch: 2, batch: 25113, sum loss: 3981.840820, avg loss: 2.285787, ppl: 9.833421 +epoch: 2, batch: 25114, sum loss: 4017.057129, avg loss: 2.565171, ppl: 13.002875 +epoch: 2, batch: 25115, sum loss: 4933.314453, avg loss: 2.608839, ppl: 13.583272 +epoch: 2, batch: 25116, sum loss: 3325.704102, avg loss: 2.218615, ppl: 9.194591 +epoch: 2, batch: 25117, sum loss: 4308.384766, avg loss: 2.393547, ppl: 10.952273 +epoch: 2, batch: 25118, sum loss: 3698.704102, avg loss: 2.384722, ppl: 10.856044 +epoch: 2, batch: 25119, sum loss: 4697.445312, avg loss: 2.451694, ppl: 11.607991 +epoch: 2, batch: 25120, sum loss: 3744.074707, avg loss: 2.285760, ppl: 9.833154 +epoch: 2, batch: 25121, sum loss: 4490.366211, avg loss: 2.494648, ppl: 12.117467 +epoch: 2, batch: 25122, sum loss: 4352.634766, avg loss: 2.448051, ppl: 11.565785 +epoch: 2, batch: 25123, sum loss: 3776.056885, avg loss: 2.296872, ppl: 9.943028 +epoch: 2, batch: 25124, sum loss: 3533.624268, avg loss: 2.447108, ppl: 11.554885 +epoch: 2, batch: 25125, sum loss: 4911.326660, avg loss: 2.571375, ppl: 13.083804 +epoch: 2, batch: 25126, sum loss: 4514.691895, avg loss: 2.714788, ppl: 15.101404 +epoch: 2, batch: 25127, sum loss: 3774.444580, avg loss: 2.351679, ppl: 10.503188 +epoch: 2, batch: 25128, sum loss: 3908.551270, avg loss: 2.386173, ppl: 10.871805 +epoch: 2, batch: 25129, sum loss: 4619.694336, avg loss: 2.549500, ppl: 12.800705 +epoch: 2, batch: 25130, sum loss: 4298.670410, avg loss: 2.624341, ppl: 13.795477 +epoch: 2, batch: 25131, sum loss: 3710.949951, avg loss: 2.405023, ppl: 11.078684 +epoch: 2, batch: 25132, sum loss: 4343.790527, avg loss: 2.647039, ppl: 14.112186 +epoch: 2, batch: 25133, sum loss: 3404.893311, avg loss: 2.294402, ppl: 9.918507 +epoch: 2, batch: 25134, sum loss: 4332.753418, avg loss: 2.468805, ppl: 11.808331 +epoch: 2, batch: 25135, sum loss: 4004.169189, avg loss: 2.195268, ppl: 8.982409 +epoch: 2, batch: 25136, sum loss: 4488.515625, avg loss: 2.710456, ppl: 15.036136 +epoch: 2, batch: 25137, sum loss: 3924.897949, avg loss: 2.517574, ppl: 12.398482 +epoch: 2, batch: 25138, sum loss: 3717.394043, avg loss: 2.223322, ppl: 9.237967 +epoch: 2, batch: 25139, sum loss: 4164.442383, avg loss: 2.586610, ppl: 13.284661 +epoch: 2, batch: 25140, sum loss: 4395.516602, avg loss: 2.564479, ppl: 12.993885 +epoch: 2, batch: 25141, sum loss: 4617.782227, avg loss: 2.370525, ppl: 10.703009 +epoch: 2, batch: 25142, sum loss: 4839.086914, avg loss: 2.628510, ppl: 13.853113 +epoch: 2, batch: 25143, sum loss: 5742.975098, avg loss: 3.145113, ppl: 23.222288 +epoch: 2, batch: 25144, sum loss: 3869.994385, avg loss: 2.344031, ppl: 10.423163 +epoch: 2, batch: 25145, sum loss: 4386.432129, avg loss: 2.382636, ppl: 10.833418 +epoch: 2, batch: 25146, sum loss: 4289.671875, avg loss: 2.533770, ppl: 12.600917 +epoch: 2, batch: 25147, sum loss: 4487.598145, avg loss: 2.412687, ppl: 11.163921 +epoch: 2, batch: 25148, sum loss: 3178.124512, avg loss: 2.278226, ppl: 9.759349 +epoch: 2, batch: 25149, sum loss: 3577.985840, avg loss: 2.360149, ppl: 10.592531 +epoch: 2, batch: 25150, sum loss: 4643.986328, avg loss: 2.712609, ppl: 15.068535 +epoch: 2, batch: 25151, sum loss: 4486.560547, avg loss: 2.553535, ppl: 12.852454 +epoch: 2, batch: 25152, sum loss: 3303.699463, avg loss: 2.299026, ppl: 9.964468 +epoch: 2, batch: 25153, sum loss: 4641.281250, avg loss: 2.667403, ppl: 14.402517 +epoch: 2, batch: 25154, sum loss: 3540.549561, avg loss: 2.329309, ppl: 10.270842 +epoch: 2, batch: 25155, sum loss: 4757.000977, avg loss: 2.610868, ppl: 13.610856 +epoch: 2, batch: 25156, sum loss: 3636.746826, avg loss: 2.306117, ppl: 10.035384 +epoch: 2, batch: 25157, sum loss: 4889.981934, avg loss: 2.595532, ppl: 13.403715 +epoch: 2, batch: 25158, sum loss: 4546.920898, avg loss: 2.607179, ppl: 13.560747 +epoch: 2, batch: 25159, sum loss: 4751.554688, avg loss: 2.487725, ppl: 12.033868 +epoch: 2, batch: 25160, sum loss: 4995.446777, avg loss: 2.982357, ppl: 19.734266 +epoch: 2, batch: 25161, sum loss: 3557.110352, avg loss: 2.382525, ppl: 10.832222 +epoch: 2, batch: 25162, sum loss: 3634.319824, avg loss: 2.537933, ppl: 12.653487 +epoch: 2, batch: 25163, sum loss: 3301.545898, avg loss: 2.145254, ppl: 8.544210 +epoch: 2, batch: 25164, sum loss: 4595.454102, avg loss: 2.712783, ppl: 15.071158 +epoch: 2, batch: 25165, sum loss: 3960.600098, avg loss: 2.450866, ppl: 11.598392 +epoch: 2, batch: 25166, sum loss: 4614.516113, avg loss: 2.497033, ppl: 12.146398 +epoch: 2, batch: 25167, sum loss: 4408.357422, avg loss: 2.548183, ppl: 12.783860 +epoch: 2, batch: 25168, sum loss: 3304.645996, avg loss: 2.294893, ppl: 9.923374 +epoch: 2, batch: 25169, sum loss: 4125.794434, avg loss: 2.463161, ppl: 11.741869 +epoch: 2, batch: 25170, sum loss: 5140.226562, avg loss: 2.598699, ppl: 13.446228 +epoch: 2, batch: 25171, sum loss: 4527.982910, avg loss: 2.632548, ppl: 13.909166 +epoch: 2, batch: 25172, sum loss: 4385.328125, avg loss: 2.356436, ppl: 10.553278 +epoch: 2, batch: 25173, sum loss: 3314.978760, avg loss: 2.287770, ppl: 9.852942 +epoch: 2, batch: 25174, sum loss: 3839.344971, avg loss: 2.374363, ppl: 10.744169 +epoch: 2, batch: 25175, sum loss: 4877.000977, avg loss: 2.564144, ppl: 12.989531 +epoch: 2, batch: 25176, sum loss: 4767.864746, avg loss: 2.775241, ppl: 16.042500 +epoch: 2, batch: 25177, sum loss: 3452.980957, avg loss: 2.216291, ppl: 9.173242 +epoch: 2, batch: 25178, sum loss: 4185.541992, avg loss: 2.382209, ppl: 10.828800 +epoch: 2, batch: 25179, sum loss: 4468.656738, avg loss: 2.544793, ppl: 12.740592 +epoch: 2, batch: 25180, sum loss: 5276.861328, avg loss: 2.574079, ppl: 13.119226 +epoch: 2, batch: 25181, sum loss: 3788.368652, avg loss: 2.321304, ppl: 10.188955 +epoch: 2, batch: 25182, sum loss: 4442.735840, avg loss: 2.445094, ppl: 11.531634 +epoch: 2, batch: 25183, sum loss: 3798.777832, avg loss: 2.492636, ppl: 12.093117 +epoch: 2, batch: 25184, sum loss: 4901.313477, avg loss: 2.346249, ppl: 10.446308 +epoch: 2, batch: 25185, sum loss: 3886.415039, avg loss: 2.276752, ppl: 9.744975 +epoch: 2, batch: 25186, sum loss: 3835.594971, avg loss: 2.337352, ppl: 10.353784 +epoch: 2, batch: 25187, sum loss: 4539.151367, avg loss: 2.521751, ppl: 12.450377 +epoch: 2, batch: 25188, sum loss: 4730.932617, avg loss: 2.456352, ppl: 11.662185 +epoch: 2, batch: 25189, sum loss: 4389.696777, avg loss: 2.442792, ppl: 11.505117 +epoch: 2, batch: 25190, sum loss: 5214.057617, avg loss: 2.572303, ppl: 13.095944 +epoch: 2, batch: 25191, sum loss: 3942.375000, avg loss: 2.405354, ppl: 11.082351 +epoch: 2, batch: 25192, sum loss: 4127.624023, avg loss: 2.591101, ppl: 13.344458 +epoch: 2, batch: 25193, sum loss: 3664.965088, avg loss: 2.373682, ppl: 10.736856 +epoch: 2, batch: 25194, sum loss: 4208.718750, avg loss: 2.375124, ppl: 10.752341 +epoch: 2, batch: 25195, sum loss: 4272.350098, avg loss: 2.368265, ppl: 10.678847 +epoch: 2, batch: 25196, sum loss: 4420.650879, avg loss: 2.580648, ppl: 13.205698 +epoch: 2, batch: 25197, sum loss: 4320.272461, avg loss: 2.478642, ppl: 11.925056 +epoch: 2, batch: 25198, sum loss: 3869.477539, avg loss: 2.382683, ppl: 10.833935 +epoch: 2, batch: 25199, sum loss: 3973.243408, avg loss: 2.300662, ppl: 9.980788 +epoch: 2, batch: 25200, sum loss: 3533.427979, avg loss: 2.161118, ppl: 8.680838 +epoch: 2, batch: 25201, sum loss: 4154.351074, avg loss: 2.558098, ppl: 12.911235 +epoch: 2, batch: 25202, sum loss: 3545.394043, avg loss: 2.413474, ppl: 11.172711 +epoch: 2, batch: 25203, sum loss: 4231.148926, avg loss: 2.496253, ppl: 12.136931 +epoch: 2, batch: 25204, sum loss: 4467.710938, avg loss: 2.525558, ppl: 12.497861 +epoch: 2, batch: 25205, sum loss: 4592.599121, avg loss: 2.648558, ppl: 14.133638 +epoch: 2, batch: 25206, sum loss: 4627.147461, avg loss: 2.605376, ppl: 13.536311 +epoch: 2, batch: 25207, sum loss: 3800.451660, avg loss: 2.248788, ppl: 9.476242 +epoch: 2, batch: 25208, sum loss: 4579.145996, avg loss: 2.393699, ppl: 10.953937 +epoch: 2, batch: 25209, sum loss: 4934.514160, avg loss: 2.463562, ppl: 11.746575 +epoch: 2, batch: 25210, sum loss: 4022.716553, avg loss: 2.305282, ppl: 10.027001 +epoch: 2, batch: 25211, sum loss: 3616.510742, avg loss: 2.268827, ppl: 9.668056 +epoch: 2, batch: 25212, sum loss: 5084.593750, avg loss: 2.717581, ppl: 15.143646 +epoch: 2, batch: 25213, sum loss: 4176.475586, avg loss: 2.458196, ppl: 11.683720 +epoch: 2, batch: 25214, sum loss: 4448.994141, avg loss: 2.799870, ppl: 16.442516 +epoch: 2, batch: 25215, sum loss: 4088.647949, avg loss: 2.432271, ppl: 11.384710 +epoch: 2, batch: 25216, sum loss: 3745.606689, avg loss: 2.220277, ppl: 9.209880 +epoch: 2, batch: 25217, sum loss: 4566.788086, avg loss: 2.602158, ppl: 13.492828 +epoch: 2, batch: 25218, sum loss: 4550.814941, avg loss: 2.603441, ppl: 13.510150 +epoch: 2, batch: 25219, sum loss: 4293.711914, avg loss: 2.539156, ppl: 12.668970 +epoch: 2, batch: 25220, sum loss: 4186.971191, avg loss: 2.420214, ppl: 11.248271 +epoch: 2, batch: 25221, sum loss: 5073.984375, avg loss: 2.820447, ppl: 16.784351 +epoch: 2, batch: 25222, sum loss: 3940.531982, avg loss: 2.395460, ppl: 10.973246 +epoch: 2, batch: 25223, sum loss: 4319.845703, avg loss: 2.532149, ppl: 12.580507 +epoch: 2, batch: 25224, sum loss: 3995.695801, avg loss: 2.327138, ppl: 10.248568 +epoch: 2, batch: 25225, sum loss: 3646.946533, avg loss: 2.225105, ppl: 9.254451 +epoch: 2, batch: 25226, sum loss: 4410.250488, avg loss: 2.570076, ppl: 13.066817 +epoch: 2, batch: 25227, sum loss: 4148.399902, avg loss: 2.460498, ppl: 11.710646 +epoch: 2, batch: 25228, sum loss: 4015.191162, avg loss: 2.483112, ppl: 11.978479 +epoch: 2, batch: 25229, sum loss: 3696.188232, avg loss: 2.384638, ppl: 10.855128 +epoch: 2, batch: 25230, sum loss: 3973.355469, avg loss: 2.314127, ppl: 10.116085 +epoch: 2, batch: 25231, sum loss: 4370.240234, avg loss: 2.640629, ppl: 14.022015 +epoch: 2, batch: 25232, sum loss: 5126.272461, avg loss: 2.566987, ppl: 13.026514 +epoch: 2, batch: 25233, sum loss: 3527.772949, avg loss: 2.436307, ppl: 11.430754 +epoch: 2, batch: 25234, sum loss: 3564.514160, avg loss: 2.468500, ppl: 11.804729 +epoch: 2, batch: 25235, sum loss: 4440.821777, avg loss: 2.565466, ppl: 13.006717 +epoch: 2, batch: 25236, sum loss: 4620.866699, avg loss: 2.580048, ppl: 13.197776 +epoch: 2, batch: 25237, sum loss: 4278.341797, avg loss: 2.496115, ppl: 12.135262 +epoch: 2, batch: 25238, sum loss: 3429.166016, avg loss: 2.152647, ppl: 8.607609 +epoch: 2, batch: 25239, sum loss: 4234.400391, avg loss: 2.490824, ppl: 12.071218 +epoch: 2, batch: 25240, sum loss: 3801.512207, avg loss: 2.228319, ppl: 9.284245 +epoch: 2, batch: 25241, sum loss: 3833.866699, avg loss: 2.260535, ppl: 9.588213 +epoch: 2, batch: 25242, sum loss: 3590.468506, avg loss: 2.310469, ppl: 10.079150 +epoch: 2, batch: 25243, sum loss: 4415.160645, avg loss: 2.683988, ppl: 14.643376 +epoch: 2, batch: 25244, sum loss: 5129.152344, avg loss: 2.679808, ppl: 14.582292 +epoch: 2, batch: 25245, sum loss: 3769.468506, avg loss: 2.543501, ppl: 12.724142 +epoch: 2, batch: 25246, sum loss: 3297.713867, avg loss: 2.176709, ppl: 8.817241 +epoch: 2, batch: 25247, sum loss: 4695.104492, avg loss: 2.558640, ppl: 12.918240 +epoch: 2, batch: 25248, sum loss: 4381.515137, avg loss: 2.426088, ppl: 11.314534 +epoch: 2, batch: 25249, sum loss: 3179.168457, avg loss: 2.180500, ppl: 8.850726 +epoch: 2, batch: 25250, sum loss: 3561.432129, avg loss: 2.327734, ppl: 10.254673 +epoch: 2, batch: 25251, sum loss: 3480.641357, avg loss: 2.351785, ppl: 10.504300 +epoch: 2, batch: 25252, sum loss: 4054.121582, avg loss: 2.507187, ppl: 12.270367 +epoch: 2, batch: 25253, sum loss: 4475.092285, avg loss: 2.508460, ppl: 12.285995 +epoch: 2, batch: 25254, sum loss: 4199.323730, avg loss: 2.501086, ppl: 12.195734 +epoch: 2, batch: 25255, sum loss: 5456.434082, avg loss: 2.683932, ppl: 14.642556 +epoch: 2, batch: 25256, sum loss: 4312.841309, avg loss: 2.374913, ppl: 10.750072 +epoch: 2, batch: 25257, sum loss: 4279.432129, avg loss: 2.430115, ppl: 11.360186 +epoch: 2, batch: 25258, sum loss: 3608.683594, avg loss: 2.493907, ppl: 12.108495 +epoch: 2, batch: 25259, sum loss: 4123.120117, avg loss: 2.433955, ppl: 11.403897 +epoch: 2, batch: 25260, sum loss: 5109.175781, avg loss: 2.624127, ppl: 13.792534 +epoch: 2, batch: 25261, sum loss: 4226.317871, avg loss: 2.558304, ppl: 12.913895 +epoch: 2, batch: 25262, sum loss: 4885.794922, avg loss: 2.769725, ppl: 15.954247 +epoch: 2, batch: 25263, sum loss: 4420.418945, avg loss: 2.672563, ppl: 14.477024 +epoch: 2, batch: 25264, sum loss: 3900.495605, avg loss: 2.456232, ppl: 11.660786 +epoch: 2, batch: 25265, sum loss: 3975.240234, avg loss: 2.237051, ppl: 9.365673 +epoch: 2, batch: 25266, sum loss: 3664.500977, avg loss: 2.266234, ppl: 9.643021 +epoch: 2, batch: 25267, sum loss: 3593.351807, avg loss: 2.280046, ppl: 9.777125 +epoch: 2, batch: 25268, sum loss: 4484.792480, avg loss: 2.604409, ppl: 13.523231 +epoch: 2, batch: 25269, sum loss: 4776.008789, avg loss: 2.616991, ppl: 13.694455 +epoch: 2, batch: 25270, sum loss: 5612.145508, avg loss: 2.772799, ppl: 16.003368 +epoch: 2, batch: 25271, sum loss: 4345.686523, avg loss: 2.535406, ppl: 12.621558 +epoch: 2, batch: 25272, sum loss: 4648.984375, avg loss: 2.607395, ppl: 13.563667 +epoch: 2, batch: 25273, sum loss: 4877.438477, avg loss: 2.790297, ppl: 16.285852 +epoch: 2, batch: 25274, sum loss: 4098.174805, avg loss: 2.508063, ppl: 12.281116 +epoch: 2, batch: 25275, sum loss: 4485.734375, avg loss: 2.712052, ppl: 15.060152 +epoch: 2, batch: 25276, sum loss: 3877.218506, avg loss: 2.844621, ppl: 17.195044 +epoch: 2, batch: 25277, sum loss: 4724.076660, avg loss: 2.460457, ppl: 11.710157 +epoch: 2, batch: 25278, sum loss: 4170.048828, avg loss: 2.378807, ppl: 10.792021 +epoch: 2, batch: 25279, sum loss: 4334.707520, avg loss: 2.520179, ppl: 12.430819 +epoch: 2, batch: 25280, sum loss: 4011.674805, avg loss: 2.298954, ppl: 9.963755 +epoch: 2, batch: 25281, sum loss: 3783.271484, avg loss: 2.585968, ppl: 13.276137 +epoch: 2, batch: 25282, sum loss: 4008.454102, avg loss: 2.468260, ppl: 11.801891 +epoch: 2, batch: 25283, sum loss: 3913.053955, avg loss: 2.219543, ppl: 9.203122 +epoch: 2, batch: 25284, sum loss: 4668.603516, avg loss: 2.515411, ppl: 12.371697 +epoch: 2, batch: 25285, sum loss: 4393.261230, avg loss: 2.394148, ppl: 10.958856 +epoch: 2, batch: 25286, sum loss: 4624.118652, avg loss: 2.492786, ppl: 12.094931 +epoch: 2, batch: 25287, sum loss: 4367.410156, avg loss: 2.482894, ppl: 11.975872 +epoch: 2, batch: 25288, sum loss: 3850.940186, avg loss: 2.403833, ppl: 11.065506 +epoch: 2, batch: 25289, sum loss: 4370.146484, avg loss: 2.566146, ppl: 13.015564 +epoch: 2, batch: 25290, sum loss: 3901.642822, avg loss: 2.418874, ppl: 11.233198 +epoch: 2, batch: 25291, sum loss: 4217.260742, avg loss: 2.439133, ppl: 11.463098 +epoch: 2, batch: 25292, sum loss: 3637.488525, avg loss: 2.454446, ppl: 11.639981 +epoch: 2, batch: 25293, sum loss: 4215.747559, avg loss: 2.578439, ppl: 13.176550 +epoch: 2, batch: 25294, sum loss: 3634.991699, avg loss: 2.232796, ppl: 9.325905 +epoch: 2, batch: 25295, sum loss: 3633.071045, avg loss: 2.309645, ppl: 10.070846 +epoch: 2, batch: 25296, sum loss: 3681.506836, avg loss: 2.362970, ppl: 10.622452 +epoch: 2, batch: 25297, sum loss: 3874.374512, avg loss: 2.535585, ppl: 12.623818 +epoch: 2, batch: 25298, sum loss: 4508.038086, avg loss: 2.728837, ppl: 15.315062 +epoch: 2, batch: 25299, sum loss: 4337.455078, avg loss: 2.485647, ppl: 12.008893 +epoch: 2, batch: 25300, sum loss: 4206.999023, avg loss: 2.437427, ppl: 11.443559 +epoch: 2, batch: 25301, sum loss: 3754.900879, avg loss: 2.538810, ppl: 12.664598 +epoch: 2, batch: 25302, sum loss: 4471.335449, avg loss: 2.427435, ppl: 11.329785 +epoch: 2, batch: 25303, sum loss: 3258.413086, avg loss: 2.296274, ppl: 9.937090 +epoch: 2, batch: 25304, sum loss: 3946.400391, avg loss: 2.287768, ppl: 9.852925 +epoch: 2, batch: 25305, sum loss: 3741.128906, avg loss: 2.167514, ppl: 8.736537 +epoch: 2, batch: 25306, sum loss: 4989.797363, avg loss: 2.530323, ppl: 12.557562 +epoch: 2, batch: 25307, sum loss: 4240.254395, avg loss: 2.663476, ppl: 14.346076 +epoch: 2, batch: 25308, sum loss: 3809.767090, avg loss: 2.432801, ppl: 11.390748 +epoch: 2, batch: 25309, sum loss: 4757.807617, avg loss: 2.761351, ppl: 15.821201 +epoch: 2, batch: 25310, sum loss: 4129.837891, avg loss: 2.380310, ppl: 10.808253 +epoch: 2, batch: 25311, sum loss: 3593.619629, avg loss: 2.438005, ppl: 11.450177 +epoch: 2, batch: 25312, sum loss: 3354.390625, avg loss: 2.310186, ppl: 10.076303 +epoch: 2, batch: 25313, sum loss: 3433.390625, avg loss: 2.340416, ppl: 10.385558 +epoch: 2, batch: 25314, sum loss: 3021.947998, avg loss: 2.072667, ppl: 7.945984 +epoch: 2, batch: 25315, sum loss: 4258.045898, avg loss: 2.527030, ppl: 12.516280 +epoch: 2, batch: 25316, sum loss: 3918.921143, avg loss: 2.480330, ppl: 11.945203 +epoch: 2, batch: 25317, sum loss: 4600.965820, avg loss: 2.812326, ppl: 16.648605 +epoch: 2, batch: 25318, sum loss: 4322.481445, avg loss: 2.562230, ppl: 12.964695 +epoch: 2, batch: 25319, sum loss: 3744.305664, avg loss: 2.371315, ppl: 10.711464 +epoch: 2, batch: 25320, sum loss: 4882.520020, avg loss: 2.799610, ppl: 16.438232 +epoch: 2, batch: 25321, sum loss: 4156.314453, avg loss: 2.565626, ppl: 13.008804 +epoch: 2, batch: 25322, sum loss: 4368.808594, avg loss: 2.396494, ppl: 10.984598 +epoch: 2, batch: 25323, sum loss: 3810.945557, avg loss: 2.463443, ppl: 11.745175 +epoch: 2, batch: 25324, sum loss: 5197.456543, avg loss: 2.666730, ppl: 14.392827 +epoch: 2, batch: 25325, sum loss: 4213.187500, avg loss: 2.233928, ppl: 9.336465 +epoch: 2, batch: 25326, sum loss: 4232.336426, avg loss: 2.426798, ppl: 11.322573 +epoch: 2, batch: 25327, sum loss: 3168.172852, avg loss: 2.444578, ppl: 11.525684 +epoch: 2, batch: 25328, sum loss: 4350.559570, avg loss: 2.519143, ppl: 12.417949 +epoch: 2, batch: 25329, sum loss: 4043.047607, avg loss: 2.320923, ppl: 10.185069 +epoch: 2, batch: 25330, sum loss: 3839.032959, avg loss: 2.509172, ppl: 12.294745 +epoch: 2, batch: 25331, sum loss: 4815.375000, avg loss: 2.653099, ppl: 14.197974 +epoch: 2, batch: 25332, sum loss: 4168.675293, avg loss: 2.452162, ppl: 11.613428 +epoch: 2, batch: 25333, sum loss: 4258.408691, avg loss: 2.441748, ppl: 11.493112 +epoch: 2, batch: 25334, sum loss: 3491.821777, avg loss: 2.320148, ppl: 10.177176 +epoch: 2, batch: 25335, sum loss: 3964.398682, avg loss: 2.559328, ppl: 12.927125 +epoch: 2, batch: 25336, sum loss: 3621.302490, avg loss: 2.357619, ppl: 10.565763 +epoch: 2, batch: 25337, sum loss: 3654.353516, avg loss: 2.274022, ppl: 9.718410 +epoch: 2, batch: 25338, sum loss: 4025.497559, avg loss: 2.595421, ppl: 13.402224 +epoch: 2, batch: 25339, sum loss: 4241.015625, avg loss: 2.490320, ppl: 12.065142 +epoch: 2, batch: 25340, sum loss: 5100.033203, avg loss: 2.611384, ppl: 13.617887 +epoch: 2, batch: 25341, sum loss: 4275.076660, avg loss: 2.611531, ppl: 13.619890 +epoch: 2, batch: 25342, sum loss: 4871.014648, avg loss: 2.660303, ppl: 14.300623 +epoch: 2, batch: 25343, sum loss: 3838.762207, avg loss: 2.279550, ppl: 9.772283 +epoch: 2, batch: 25344, sum loss: 5581.001465, avg loss: 2.878289, ppl: 17.783810 +epoch: 2, batch: 25345, sum loss: 4216.286133, avg loss: 2.297704, ppl: 9.951305 +epoch: 2, batch: 25346, sum loss: 4060.968262, avg loss: 2.652494, ppl: 14.189385 +epoch: 2, batch: 25347, sum loss: 4057.983154, avg loss: 2.523621, ppl: 12.473686 +epoch: 2, batch: 25348, sum loss: 3491.170410, avg loss: 2.384679, ppl: 10.855578 +epoch: 2, batch: 25349, sum loss: 3209.951660, avg loss: 2.262122, ppl: 9.603450 +epoch: 2, batch: 25350, sum loss: 4066.786377, avg loss: 2.504179, ppl: 12.233508 +epoch: 2, batch: 25351, sum loss: 4191.411133, avg loss: 2.569841, ppl: 13.063752 +epoch: 2, batch: 25352, sum loss: 3864.901611, avg loss: 2.378401, ppl: 10.787640 +epoch: 2, batch: 25353, sum loss: 4314.316406, avg loss: 2.533363, ppl: 12.595790 +epoch: 2, batch: 25354, sum loss: 5260.245117, avg loss: 2.522899, ppl: 12.464684 +epoch: 2, batch: 25355, sum loss: 3841.638916, avg loss: 2.420693, ppl: 11.253652 +epoch: 2, batch: 25356, sum loss: 3890.871582, avg loss: 2.342487, ppl: 10.407090 +epoch: 2, batch: 25357, sum loss: 3210.547363, avg loss: 2.185533, ppl: 8.895385 +epoch: 2, batch: 25358, sum loss: 4165.053223, avg loss: 2.609683, ppl: 13.594734 +epoch: 2, batch: 25359, sum loss: 3753.878174, avg loss: 2.463175, ppl: 11.742028 +epoch: 2, batch: 25360, sum loss: 3999.658203, avg loss: 2.432882, ppl: 11.391666 +epoch: 2, batch: 25361, sum loss: 3942.764893, avg loss: 2.267260, ppl: 9.652914 +epoch: 2, batch: 25362, sum loss: 3897.793945, avg loss: 2.183638, ppl: 8.878549 +epoch: 2, batch: 25363, sum loss: 4666.128418, avg loss: 2.518148, ppl: 12.405602 +epoch: 2, batch: 25364, sum loss: 4171.230957, avg loss: 2.374064, ppl: 10.740957 +epoch: 2, batch: 25365, sum loss: 3108.730713, avg loss: 2.154352, ppl: 8.622305 +epoch: 2, batch: 25366, sum loss: 4348.905273, avg loss: 2.489356, ppl: 12.053512 +epoch: 2, batch: 25367, sum loss: 4651.093262, avg loss: 2.558357, ppl: 12.914581 +epoch: 2, batch: 25368, sum loss: 3511.042725, avg loss: 2.299308, ppl: 9.967278 +epoch: 2, batch: 25369, sum loss: 4168.110840, avg loss: 2.366900, ppl: 10.664281 +epoch: 2, batch: 25370, sum loss: 4806.296387, avg loss: 2.648097, ppl: 14.127133 +epoch: 2, batch: 25371, sum loss: 4465.397461, avg loss: 2.615933, ppl: 13.679973 +epoch: 2, batch: 25372, sum loss: 4488.600586, avg loss: 2.708872, ppl: 15.012330 +epoch: 2, batch: 25373, sum loss: 3918.707031, avg loss: 2.356408, ppl: 10.552980 +epoch: 2, batch: 25374, sum loss: 3916.583984, avg loss: 2.391077, ppl: 10.925252 +epoch: 2, batch: 25375, sum loss: 3939.760010, avg loss: 2.220834, ppl: 9.215015 +epoch: 2, batch: 25376, sum loss: 3782.930176, avg loss: 2.280247, ppl: 9.779098 +epoch: 2, batch: 25377, sum loss: 4428.198730, avg loss: 2.520318, ppl: 12.432550 +epoch: 2, batch: 25378, sum loss: 4912.193359, avg loss: 2.462252, ppl: 11.731202 +epoch: 2, batch: 25379, sum loss: 3946.452148, avg loss: 2.457318, ppl: 11.673456 +epoch: 2, batch: 25380, sum loss: 4415.511719, avg loss: 2.483415, ppl: 11.982115 +epoch: 2, batch: 25381, sum loss: 4491.528809, avg loss: 2.493908, ppl: 12.108506 +epoch: 2, batch: 25382, sum loss: 3576.358398, avg loss: 2.556368, ppl: 12.888918 +epoch: 2, batch: 25383, sum loss: 3927.238281, avg loss: 2.583709, ppl: 13.246183 +epoch: 2, batch: 25384, sum loss: 4117.341309, avg loss: 2.355458, ppl: 10.542962 +epoch: 2, batch: 25385, sum loss: 4566.868164, avg loss: 2.585995, ppl: 13.276499 +epoch: 2, batch: 25386, sum loss: 3728.860840, avg loss: 2.367531, ppl: 10.671008 +epoch: 2, batch: 25387, sum loss: 3697.155518, avg loss: 2.259875, ppl: 9.581892 +epoch: 2, batch: 25388, sum loss: 3898.739258, avg loss: 2.533294, ppl: 12.594925 +epoch: 2, batch: 25389, sum loss: 4956.071777, avg loss: 2.703804, ppl: 14.936435 +epoch: 2, batch: 25390, sum loss: 4722.097168, avg loss: 2.537398, ppl: 12.646720 +epoch: 2, batch: 25391, sum loss: 4548.304688, avg loss: 2.632121, ppl: 13.903225 +epoch: 2, batch: 25392, sum loss: 5027.659180, avg loss: 2.633661, ppl: 13.924659 +epoch: 2, batch: 25393, sum loss: 3634.484619, avg loss: 2.474122, ppl: 11.871274 +epoch: 2, batch: 25394, sum loss: 4037.915527, avg loss: 2.181478, ppl: 8.859391 +epoch: 2, batch: 25395, sum loss: 4435.704102, avg loss: 2.491969, ppl: 12.085044 +epoch: 2, batch: 25396, sum loss: 3759.230713, avg loss: 2.492858, ppl: 12.095801 +epoch: 2, batch: 25397, sum loss: 3612.641846, avg loss: 2.154229, ppl: 8.621240 +epoch: 2, batch: 25398, sum loss: 4367.180664, avg loss: 2.347947, ppl: 10.464061 +epoch: 2, batch: 25399, sum loss: 3274.610596, avg loss: 2.150105, ppl: 8.585764 +epoch: 2, batch: 25400, sum loss: 4616.756348, avg loss: 2.455721, ppl: 11.654838 +epoch: 2, batch: 25401, sum loss: 3934.187744, avg loss: 2.364296, ppl: 10.636545 +epoch: 2, batch: 25402, sum loss: 4156.981445, avg loss: 2.415445, ppl: 11.194755 +epoch: 2, batch: 25403, sum loss: 3602.437012, avg loss: 2.373147, ppl: 10.731111 +epoch: 2, batch: 25404, sum loss: 3976.830078, avg loss: 2.358737, ppl: 10.577580 +epoch: 2, batch: 25405, sum loss: 3705.448730, avg loss: 2.458825, ppl: 11.691062 +epoch: 2, batch: 25406, sum loss: 4205.739258, avg loss: 2.410166, ppl: 11.135807 +epoch: 2, batch: 25407, sum loss: 4294.143066, avg loss: 2.594648, ppl: 13.391877 +epoch: 2, batch: 25408, sum loss: 3796.749756, avg loss: 2.455854, ppl: 11.656381 +epoch: 2, batch: 25409, sum loss: 3889.420166, avg loss: 2.319273, ppl: 10.168274 +epoch: 2, batch: 25410, sum loss: 4284.792480, avg loss: 2.404485, ppl: 11.072729 +epoch: 2, batch: 25411, sum loss: 4273.343262, avg loss: 2.451717, ppl: 11.608265 +epoch: 2, batch: 25412, sum loss: 4778.843262, avg loss: 2.615678, ppl: 13.676483 +epoch: 2, batch: 25413, sum loss: 4801.695312, avg loss: 2.649942, ppl: 14.153220 +epoch: 2, batch: 25414, sum loss: 4575.809570, avg loss: 2.350185, ppl: 10.487509 +epoch: 2, batch: 25415, sum loss: 2922.811523, avg loss: 2.384022, ppl: 10.848453 +epoch: 2, batch: 25416, sum loss: 4204.930664, avg loss: 2.551536, ppl: 12.826785 +epoch: 2, batch: 25417, sum loss: 3987.197754, avg loss: 2.498244, ppl: 12.161123 +epoch: 2, batch: 25418, sum loss: 3550.599854, avg loss: 2.342084, ppl: 10.402898 +epoch: 2, batch: 25419, sum loss: 4072.973877, avg loss: 2.401518, ppl: 11.039918 +epoch: 2, batch: 25420, sum loss: 4279.069824, avg loss: 2.730740, ppl: 15.344242 +epoch: 2, batch: 25421, sum loss: 3730.115234, avg loss: 2.363825, ppl: 10.631536 +epoch: 2, batch: 25422, sum loss: 4312.262207, avg loss: 2.524744, ppl: 12.487693 +epoch: 2, batch: 25423, sum loss: 4225.628418, avg loss: 2.409138, ppl: 11.124370 +epoch: 2, batch: 25424, sum loss: 3666.501953, avg loss: 2.231590, ppl: 9.314660 +epoch: 2, batch: 25425, sum loss: 4256.245605, avg loss: 2.406018, ppl: 11.089714 +epoch: 2, batch: 25426, sum loss: 5139.109375, avg loss: 2.639502, ppl: 14.006221 +epoch: 2, batch: 25427, sum loss: 4349.649414, avg loss: 2.351162, ppl: 10.497761 +epoch: 2, batch: 25428, sum loss: 4081.031006, avg loss: 2.482379, ppl: 11.969706 +epoch: 2, batch: 25429, sum loss: 4030.545166, avg loss: 2.309768, ppl: 10.072087 +epoch: 2, batch: 25430, sum loss: 2912.308594, avg loss: 2.026659, ppl: 7.588688 +epoch: 2, batch: 25431, sum loss: 3446.993408, avg loss: 2.184407, ppl: 8.885374 +epoch: 2, batch: 25432, sum loss: 3332.742188, avg loss: 2.507707, ppl: 12.276746 +epoch: 2, batch: 25433, sum loss: 4430.208008, avg loss: 2.528658, ppl: 12.536667 +epoch: 2, batch: 25434, sum loss: 3945.342773, avg loss: 2.445966, ppl: 11.541691 +epoch: 2, batch: 25435, sum loss: 4149.664062, avg loss: 2.371237, ppl: 10.710629 +epoch: 2, batch: 25436, sum loss: 4321.858887, avg loss: 2.611395, ppl: 13.618036 +epoch: 2, batch: 25437, sum loss: 4500.166016, avg loss: 2.574466, ppl: 13.124304 +epoch: 2, batch: 25438, sum loss: 3610.312988, avg loss: 2.362770, ppl: 10.620333 +epoch: 2, batch: 25439, sum loss: 4342.599609, avg loss: 2.633475, ppl: 13.922059 +epoch: 2, batch: 25440, sum loss: 3847.682861, avg loss: 2.446079, ppl: 11.543001 +epoch: 2, batch: 25441, sum loss: 5086.404785, avg loss: 2.744957, ppl: 15.563940 +epoch: 2, batch: 25442, sum loss: 3461.481201, avg loss: 2.192198, ppl: 8.954877 +epoch: 2, batch: 25443, sum loss: 3854.437256, avg loss: 2.380752, ppl: 10.813029 +epoch: 2, batch: 25444, sum loss: 3434.388916, avg loss: 2.349103, ppl: 10.476171 +epoch: 2, batch: 25445, sum loss: 4025.450684, avg loss: 2.374897, ppl: 10.749908 +epoch: 2, batch: 25446, sum loss: 4667.670410, avg loss: 2.498753, ppl: 12.167310 +epoch: 2, batch: 25447, sum loss: 3823.186279, avg loss: 2.409065, ppl: 11.123558 +epoch: 2, batch: 25448, sum loss: 5100.207520, avg loss: 2.615491, ppl: 13.673927 +epoch: 2, batch: 25449, sum loss: 3726.814941, avg loss: 2.367735, ppl: 10.673189 +epoch: 2, batch: 25450, sum loss: 4331.003418, avg loss: 2.604331, ppl: 13.522182 +epoch: 2, batch: 25451, sum loss: 4044.458496, avg loss: 2.408849, ppl: 11.121159 +epoch: 2, batch: 25452, sum loss: 4215.238770, avg loss: 2.416995, ppl: 11.212111 +epoch: 2, batch: 25453, sum loss: 3872.651855, avg loss: 2.328714, ppl: 10.264736 +epoch: 2, batch: 25454, sum loss: 4631.668945, avg loss: 2.529584, ppl: 12.548290 +epoch: 2, batch: 25455, sum loss: 3912.420410, avg loss: 2.428566, ppl: 11.342610 +epoch: 2, batch: 25456, sum loss: 4180.792480, avg loss: 2.566478, ppl: 13.019885 +epoch: 2, batch: 25457, sum loss: 3366.854736, avg loss: 2.304487, ppl: 10.019032 +epoch: 2, batch: 25458, sum loss: 5008.293945, avg loss: 2.619401, ppl: 13.727491 +epoch: 2, batch: 25459, sum loss: 4172.361816, avg loss: 2.430030, ppl: 11.359224 +epoch: 2, batch: 25460, sum loss: 4964.499023, avg loss: 2.578960, ppl: 13.183426 +epoch: 2, batch: 25461, sum loss: 3576.337402, avg loss: 2.329861, ppl: 10.276517 +epoch: 2, batch: 25462, sum loss: 4344.691406, avg loss: 2.297563, ppl: 9.949903 +epoch: 2, batch: 25463, sum loss: 4136.289062, avg loss: 2.526750, ppl: 12.512769 +epoch: 2, batch: 25464, sum loss: 4457.861328, avg loss: 2.401865, ppl: 11.043754 +epoch: 2, batch: 25465, sum loss: 4336.597656, avg loss: 2.466779, ppl: 11.784428 +epoch: 2, batch: 25466, sum loss: 4248.342285, avg loss: 2.365447, ppl: 10.648796 +epoch: 2, batch: 25467, sum loss: 5137.341309, avg loss: 2.795071, ppl: 16.363796 +epoch: 2, batch: 25468, sum loss: 4640.684570, avg loss: 2.523483, ppl: 12.471958 +epoch: 2, batch: 25469, sum loss: 3605.424072, avg loss: 2.263292, ppl: 9.614690 +epoch: 2, batch: 25470, sum loss: 4194.372070, avg loss: 2.302070, ppl: 9.994854 +epoch: 2, batch: 25471, sum loss: 3591.841553, avg loss: 2.126608, ppl: 8.386375 +epoch: 2, batch: 25472, sum loss: 3691.445068, avg loss: 2.268866, ppl: 9.668431 +epoch: 2, batch: 25473, sum loss: 4533.670898, avg loss: 2.381130, ppl: 10.817117 +epoch: 2, batch: 25474, sum loss: 3900.271484, avg loss: 2.504991, ppl: 12.243452 +epoch: 2, batch: 25475, sum loss: 4753.517090, avg loss: 2.589061, ppl: 13.317267 +epoch: 2, batch: 25476, sum loss: 4154.983398, avg loss: 2.383811, ppl: 10.846164 +epoch: 2, batch: 25477, sum loss: 4216.835938, avg loss: 2.549478, ppl: 12.800415 +epoch: 2, batch: 25478, sum loss: 4720.250977, avg loss: 2.666809, ppl: 14.393959 +epoch: 2, batch: 25479, sum loss: 4089.210449, avg loss: 2.391351, ppl: 10.928250 +epoch: 2, batch: 25480, sum loss: 3440.551514, avg loss: 2.350103, ppl: 10.486653 +epoch: 2, batch: 25481, sum loss: 3891.584717, avg loss: 2.438336, ppl: 11.453970 +epoch: 2, batch: 25482, sum loss: 4300.905273, avg loss: 2.251783, ppl: 9.504666 +epoch: 2, batch: 25483, sum loss: 3652.105957, avg loss: 2.213398, ppl: 9.146740 +epoch: 2, batch: 25484, sum loss: 3801.895508, avg loss: 2.345401, ppl: 10.437460 +epoch: 2, batch: 25485, sum loss: 4040.075928, avg loss: 2.332607, ppl: 10.304774 +epoch: 2, batch: 25486, sum loss: 4152.648926, avg loss: 2.335573, ppl: 10.335382 +epoch: 2, batch: 25487, sum loss: 3859.318848, avg loss: 2.336149, ppl: 10.341340 +epoch: 2, batch: 25488, sum loss: 4787.992676, avg loss: 2.572807, ppl: 13.102547 +epoch: 2, batch: 25489, sum loss: 3924.687988, avg loss: 2.452930, ppl: 11.622350 +epoch: 2, batch: 25490, sum loss: 4069.340820, avg loss: 2.435273, ppl: 11.418935 +epoch: 2, batch: 25491, sum loss: 4159.732422, avg loss: 2.634409, ppl: 13.935081 +epoch: 2, batch: 25492, sum loss: 4315.979004, avg loss: 2.549309, ppl: 12.798251 +epoch: 2, batch: 25493, sum loss: 4493.804688, avg loss: 2.550400, ppl: 12.812228 +epoch: 2, batch: 25494, sum loss: 3529.326660, avg loss: 2.271124, ppl: 9.690286 +epoch: 2, batch: 25495, sum loss: 4737.802246, avg loss: 2.525481, ppl: 12.496904 +epoch: 2, batch: 25496, sum loss: 3740.279541, avg loss: 2.446226, ppl: 11.544696 +epoch: 2, batch: 25497, sum loss: 3913.073730, avg loss: 2.539308, ppl: 12.670900 +epoch: 2, batch: 25498, sum loss: 5312.897461, avg loss: 2.862552, ppl: 17.506153 +epoch: 2, batch: 25499, sum loss: 4394.196289, avg loss: 2.557739, ppl: 12.906609 +epoch: 2, batch: 25500, sum loss: 4733.269043, avg loss: 2.666630, ppl: 14.391393 +epoch: 2, batch: 25501, sum loss: 3750.010742, avg loss: 2.333548, ppl: 10.314468 +epoch: 2, batch: 25502, sum loss: 4786.498047, avg loss: 2.531199, ppl: 12.568573 +epoch: 2, batch: 25503, sum loss: 4496.661133, avg loss: 2.694225, ppl: 14.794046 +epoch: 2, batch: 25504, sum loss: 4204.579102, avg loss: 2.433206, ppl: 11.395352 +epoch: 2, batch: 25505, sum loss: 3377.231201, avg loss: 2.363353, ppl: 10.626520 +epoch: 2, batch: 25506, sum loss: 3942.884766, avg loss: 2.366678, ppl: 10.661909 +epoch: 2, batch: 25507, sum loss: 5432.747559, avg loss: 2.697491, ppl: 14.842451 +epoch: 2, batch: 25508, sum loss: 4482.726074, avg loss: 2.497340, ppl: 12.150137 +epoch: 2, batch: 25509, sum loss: 4313.526855, avg loss: 2.561477, ppl: 12.954937 +epoch: 2, batch: 25510, sum loss: 4994.832031, avg loss: 2.781087, ppl: 16.136551 +epoch: 2, batch: 25511, sum loss: 4601.695312, avg loss: 2.529794, ppl: 12.550920 +epoch: 2, batch: 25512, sum loss: 4357.427246, avg loss: 2.495663, ppl: 12.129772 +epoch: 2, batch: 25513, sum loss: 4367.997559, avg loss: 2.314784, ppl: 10.122737 +epoch: 2, batch: 25514, sum loss: 3622.092529, avg loss: 2.333823, ppl: 10.317307 +epoch: 2, batch: 25515, sum loss: 3418.569824, avg loss: 2.177433, ppl: 8.823627 +epoch: 2, batch: 25516, sum loss: 3567.702393, avg loss: 2.324236, ppl: 10.218871 +epoch: 2, batch: 25517, sum loss: 4171.042969, avg loss: 2.357854, ppl: 10.568244 +epoch: 2, batch: 25518, sum loss: 4583.390625, avg loss: 2.547744, ppl: 12.778238 +epoch: 2, batch: 25519, sum loss: 3510.778809, avg loss: 2.406291, ppl: 11.092745 +epoch: 2, batch: 25520, sum loss: 4369.713867, avg loss: 2.524387, ppl: 12.483239 +epoch: 2, batch: 25521, sum loss: 4318.739746, avg loss: 2.291109, ppl: 9.885891 +epoch: 2, batch: 25522, sum loss: 4795.644043, avg loss: 2.604913, ppl: 13.530042 +epoch: 2, batch: 25523, sum loss: 4294.524414, avg loss: 2.526191, ppl: 12.505781 +epoch: 2, batch: 25524, sum loss: 4032.246094, avg loss: 2.656289, ppl: 14.243328 +epoch: 2, batch: 25525, sum loss: 4319.682617, avg loss: 2.790493, ppl: 16.289045 +epoch: 2, batch: 25526, sum loss: 3913.444824, avg loss: 2.229883, ppl: 9.298778 +epoch: 2, batch: 25527, sum loss: 5320.978027, avg loss: 2.559393, ppl: 12.927967 +epoch: 2, batch: 25528, sum loss: 4000.005859, avg loss: 2.313479, ppl: 10.109539 +epoch: 2, batch: 25529, sum loss: 4485.937500, avg loss: 2.523025, ppl: 12.466245 +epoch: 2, batch: 25530, sum loss: 4265.481445, avg loss: 2.310662, ppl: 10.081092 +epoch: 2, batch: 25531, sum loss: 4863.027832, avg loss: 2.452359, ppl: 11.615715 +epoch: 2, batch: 25532, sum loss: 4089.811768, avg loss: 2.244683, ppl: 9.437419 +epoch: 2, batch: 25533, sum loss: 4404.862305, avg loss: 2.784363, ppl: 16.189499 +epoch: 2, batch: 25534, sum loss: 3948.190186, avg loss: 2.606066, ppl: 13.545660 +epoch: 2, batch: 25535, sum loss: 3938.739990, avg loss: 2.423840, ppl: 11.289127 +epoch: 2, batch: 25536, sum loss: 4093.470459, avg loss: 2.440948, ppl: 11.483925 +epoch: 2, batch: 25537, sum loss: 3725.712891, avg loss: 2.457594, ppl: 11.676688 +epoch: 2, batch: 25538, sum loss: 5340.572266, avg loss: 2.797576, ppl: 16.404833 +epoch: 2, batch: 25539, sum loss: 3695.693848, avg loss: 2.154923, ppl: 8.627230 +epoch: 2, batch: 25540, sum loss: 3784.166260, avg loss: 2.335905, ppl: 10.338816 +epoch: 2, batch: 25541, sum loss: 4078.425049, avg loss: 2.437792, ppl: 11.447731 +epoch: 2, batch: 25542, sum loss: 5569.098145, avg loss: 2.797136, ppl: 16.397617 +epoch: 2, batch: 25543, sum loss: 4024.606445, avg loss: 2.405622, ppl: 11.085328 +epoch: 2, batch: 25544, sum loss: 4291.980957, avg loss: 2.335136, ppl: 10.330869 +epoch: 2, batch: 25545, sum loss: 3114.974609, avg loss: 2.114715, ppl: 8.287220 +epoch: 2, batch: 25546, sum loss: 3971.051758, avg loss: 2.346957, ppl: 10.453712 +epoch: 2, batch: 25547, sum loss: 4639.587402, avg loss: 2.725962, ppl: 15.271096 +epoch: 2, batch: 25548, sum loss: 4574.522949, avg loss: 2.459421, ppl: 11.698035 +epoch: 2, batch: 25549, sum loss: 3423.928955, avg loss: 2.287194, ppl: 9.847263 +epoch: 2, batch: 25550, sum loss: 3964.030273, avg loss: 2.346969, ppl: 10.453835 +epoch: 2, batch: 25551, sum loss: 3897.980957, avg loss: 2.434716, ppl: 11.412582 +epoch: 2, batch: 25552, sum loss: 3986.470703, avg loss: 2.463826, ppl: 11.749682 +epoch: 2, batch: 25553, sum loss: 3631.167236, avg loss: 2.266646, ppl: 9.646992 +epoch: 2, batch: 25554, sum loss: 4032.133545, avg loss: 2.315987, ppl: 10.134922 +epoch: 2, batch: 25555, sum loss: 3615.543701, avg loss: 2.305831, ppl: 10.032516 +epoch: 2, batch: 25556, sum loss: 5392.589355, avg loss: 2.818918, ppl: 16.758699 +epoch: 2, batch: 25557, sum loss: 4145.162598, avg loss: 2.576235, ppl: 13.147549 +epoch: 2, batch: 25558, sum loss: 3681.333252, avg loss: 2.421930, ppl: 11.267583 +epoch: 2, batch: 25559, sum loss: 3872.204346, avg loss: 2.320075, ppl: 10.176433 +epoch: 2, batch: 25560, sum loss: 4298.291992, avg loss: 2.537362, ppl: 12.646271 +epoch: 2, batch: 25561, sum loss: 3581.720703, avg loss: 2.220534, ppl: 9.212245 +epoch: 2, batch: 25562, sum loss: 4509.402832, avg loss: 2.529110, ppl: 12.542338 +epoch: 2, batch: 25563, sum loss: 4599.883789, avg loss: 2.625504, ppl: 13.811541 +epoch: 2, batch: 25564, sum loss: 3685.046387, avg loss: 2.443665, ppl: 11.515164 +epoch: 2, batch: 25565, sum loss: 4368.937012, avg loss: 2.662363, ppl: 14.330105 +epoch: 2, batch: 25566, sum loss: 4473.664551, avg loss: 2.473004, ppl: 11.858019 +epoch: 2, batch: 25567, sum loss: 4169.504883, avg loss: 2.470086, ppl: 11.823462 +epoch: 2, batch: 25568, sum loss: 4153.171387, avg loss: 2.640287, ppl: 14.017225 +epoch: 2, batch: 25569, sum loss: 5101.771484, avg loss: 2.675287, ppl: 14.516509 +epoch: 2, batch: 25570, sum loss: 4258.913086, avg loss: 2.351691, ppl: 10.503321 +epoch: 2, batch: 25571, sum loss: 3947.873535, avg loss: 2.637190, ppl: 13.973880 +epoch: 2, batch: 25572, sum loss: 4417.606934, avg loss: 2.604721, ppl: 13.527452 +epoch: 2, batch: 25573, sum loss: 3787.172607, avg loss: 2.401505, ppl: 11.039773 +epoch: 2, batch: 25574, sum loss: 3807.409180, avg loss: 2.311724, ppl: 10.091805 +epoch: 2, batch: 25575, sum loss: 5481.667969, avg loss: 2.897287, ppl: 18.124912 +epoch: 2, batch: 25576, sum loss: 3978.586426, avg loss: 2.396739, ppl: 10.987286 +epoch: 2, batch: 25577, sum loss: 4399.786133, avg loss: 2.462108, ppl: 11.729507 +epoch: 2, batch: 25578, sum loss: 3812.008301, avg loss: 2.353092, ppl: 10.518039 +epoch: 2, batch: 25579, sum loss: 3856.394775, avg loss: 2.371706, ppl: 10.715663 +epoch: 2, batch: 25580, sum loss: 3386.108643, avg loss: 2.263442, ppl: 9.616127 +epoch: 2, batch: 25581, sum loss: 3659.900879, avg loss: 2.334121, ppl: 10.320379 +epoch: 2, batch: 25582, sum loss: 3331.074707, avg loss: 2.438561, ppl: 11.456545 +epoch: 2, batch: 25583, sum loss: 4146.243164, avg loss: 2.382898, ppl: 10.836265 +epoch: 2, batch: 25584, sum loss: 4371.859863, avg loss: 2.453345, ppl: 11.627170 +epoch: 2, batch: 25585, sum loss: 4882.681641, avg loss: 2.637861, ppl: 13.983268 +epoch: 2, batch: 25586, sum loss: 3456.000000, avg loss: 2.209719, ppl: 9.113153 +epoch: 2, batch: 25587, sum loss: 4680.222656, avg loss: 2.722643, ppl: 15.220491 +epoch: 2, batch: 25588, sum loss: 5210.554199, avg loss: 2.617054, ppl: 13.695314 +epoch: 2, batch: 25589, sum loss: 3483.353027, avg loss: 2.350441, ppl: 10.490192 +epoch: 2, batch: 25590, sum loss: 3956.066406, avg loss: 2.557250, ppl: 12.900299 +epoch: 2, batch: 25591, sum loss: 4983.905273, avg loss: 2.686741, ppl: 14.683748 +epoch: 2, batch: 25592, sum loss: 4832.283203, avg loss: 2.523386, ppl: 12.470745 +epoch: 2, batch: 25593, sum loss: 3291.296875, avg loss: 2.151175, ppl: 8.594948 +epoch: 2, batch: 25594, sum loss: 3863.749512, avg loss: 2.499191, ppl: 12.172646 +epoch: 2, batch: 25595, sum loss: 4315.725586, avg loss: 2.529734, ppl: 12.550163 +epoch: 2, batch: 25596, sum loss: 4578.610840, avg loss: 2.588248, ppl: 13.306441 +epoch: 2, batch: 25597, sum loss: 2986.551758, avg loss: 2.148598, ppl: 8.572835 +epoch: 2, batch: 25598, sum loss: 4704.041504, avg loss: 2.619177, ppl: 13.724422 +epoch: 2, batch: 25599, sum loss: 3146.070068, avg loss: 2.127160, ppl: 8.391006 +epoch: 2, batch: 25600, sum loss: 3576.283691, avg loss: 2.280793, ppl: 9.784438 +epoch: 2, batch: 25601, sum loss: 4448.442871, avg loss: 2.412388, ppl: 11.160577 +epoch: 2, batch: 25602, sum loss: 4645.169922, avg loss: 2.589281, ppl: 13.320189 +epoch: 2, batch: 25603, sum loss: 5524.152832, avg loss: 2.489478, ppl: 12.054986 +epoch: 2, batch: 25604, sum loss: 3627.612061, avg loss: 2.377203, ppl: 10.774726 +epoch: 2, batch: 25605, sum loss: 4511.378906, avg loss: 2.378165, ppl: 10.785094 +epoch: 2, batch: 25606, sum loss: 4702.270020, avg loss: 2.465795, ppl: 11.772833 +epoch: 2, batch: 25607, sum loss: 3189.322266, avg loss: 2.205617, ppl: 9.075851 +epoch: 2, batch: 25608, sum loss: 4670.783203, avg loss: 2.559333, ppl: 12.927197 +epoch: 2, batch: 25609, sum loss: 5626.945312, avg loss: 2.563529, ppl: 12.981545 +epoch: 2, batch: 25610, sum loss: 4734.829590, avg loss: 2.521209, ppl: 12.443626 +epoch: 2, batch: 25611, sum loss: 3279.637695, avg loss: 2.399150, ppl: 11.013809 +epoch: 2, batch: 25612, sum loss: 4075.967041, avg loss: 2.508287, ppl: 12.283875 +epoch: 2, batch: 25613, sum loss: 4841.573242, avg loss: 2.694253, ppl: 14.794470 +epoch: 2, batch: 25614, sum loss: 5229.379883, avg loss: 2.817554, ppl: 16.735861 +epoch: 2, batch: 25615, sum loss: 4533.283203, avg loss: 2.631041, ppl: 13.888217 +epoch: 2, batch: 25616, sum loss: 3860.974854, avg loss: 2.486140, ppl: 12.014807 +epoch: 2, batch: 25617, sum loss: 4215.238770, avg loss: 2.470832, ppl: 11.832283 +epoch: 2, batch: 25618, sum loss: 3680.925537, avg loss: 2.243099, ppl: 9.422486 +epoch: 2, batch: 25619, sum loss: 3663.772461, avg loss: 2.497459, ppl: 12.151577 +epoch: 2, batch: 25620, sum loss: 3584.369141, avg loss: 2.299146, ppl: 9.965672 +epoch: 2, batch: 25621, sum loss: 3761.952881, avg loss: 2.438077, ppl: 11.451002 +epoch: 2, batch: 25622, sum loss: 4541.360352, avg loss: 2.515989, ppl: 12.378846 +epoch: 2, batch: 25623, sum loss: 3977.631592, avg loss: 2.323383, ppl: 10.210155 +epoch: 2, batch: 25624, sum loss: 4873.084961, avg loss: 2.416007, ppl: 11.201039 +epoch: 2, batch: 25625, sum loss: 4264.674805, avg loss: 2.740793, ppl: 15.499278 +epoch: 2, batch: 25626, sum loss: 3970.549561, avg loss: 2.366239, ppl: 10.657238 +epoch: 2, batch: 25627, sum loss: 3460.882812, avg loss: 2.172557, ppl: 8.780704 +epoch: 2, batch: 25628, sum loss: 3819.732178, avg loss: 2.417552, ppl: 11.218363 +epoch: 2, batch: 25629, sum loss: 4025.637207, avg loss: 2.396213, ppl: 10.981508 +epoch: 2, batch: 25630, sum loss: 4723.432129, avg loss: 2.616860, ppl: 13.692660 +epoch: 2, batch: 25631, sum loss: 4374.911621, avg loss: 2.565931, ppl: 13.012762 +epoch: 2, batch: 25632, sum loss: 3842.193848, avg loss: 2.354285, ppl: 10.530602 +epoch: 2, batch: 25633, sum loss: 3893.272949, avg loss: 2.264848, ppl: 9.629659 +epoch: 2, batch: 25634, sum loss: 4223.624023, avg loss: 2.629903, ppl: 13.872422 +epoch: 2, batch: 25635, sum loss: 4161.694336, avg loss: 2.253219, ppl: 9.518323 +epoch: 2, batch: 25636, sum loss: 4230.182129, avg loss: 2.445192, ppl: 11.532763 +epoch: 2, batch: 25637, sum loss: 3216.440430, avg loss: 2.384315, ppl: 10.851622 +epoch: 2, batch: 25638, sum loss: 4312.143555, avg loss: 2.586769, ppl: 13.286768 +epoch: 2, batch: 25639, sum loss: 3983.551758, avg loss: 2.382507, ppl: 10.832026 +epoch: 2, batch: 25640, sum loss: 3445.239258, avg loss: 2.244456, ppl: 9.435277 +epoch: 2, batch: 25641, sum loss: 3607.646240, avg loss: 2.392338, ppl: 10.939042 +epoch: 2, batch: 25642, sum loss: 3768.520508, avg loss: 2.458265, ppl: 11.684525 +epoch: 2, batch: 25643, sum loss: 4010.443604, avg loss: 2.528653, ppl: 12.536607 +epoch: 2, batch: 25644, sum loss: 3564.520996, avg loss: 2.226434, ppl: 9.266764 +epoch: 2, batch: 25645, sum loss: 3372.356689, avg loss: 2.150738, ppl: 8.591192 +epoch: 2, batch: 25646, sum loss: 4163.026367, avg loss: 2.568184, ppl: 13.042117 +epoch: 2, batch: 25647, sum loss: 3683.540283, avg loss: 2.365793, ppl: 10.652488 +epoch: 2, batch: 25648, sum loss: 5272.587891, avg loss: 2.654878, ppl: 14.223252 +epoch: 2, batch: 25649, sum loss: 4144.738281, avg loss: 2.371132, ppl: 10.709508 +epoch: 2, batch: 25650, sum loss: 3671.299316, avg loss: 2.396410, ppl: 10.983668 +epoch: 2, batch: 25651, sum loss: 4154.171875, avg loss: 2.348317, ppl: 10.467934 +epoch: 2, batch: 25652, sum loss: 4803.440918, avg loss: 2.453238, ppl: 11.625937 +epoch: 2, batch: 25653, sum loss: 4549.736816, avg loss: 2.573381, ppl: 13.110071 +epoch: 2, batch: 25654, sum loss: 4498.572754, avg loss: 2.666611, ppl: 14.391118 +epoch: 2, batch: 25655, sum loss: 4436.728516, avg loss: 2.516579, ppl: 12.386150 +epoch: 2, batch: 25656, sum loss: 4339.220215, avg loss: 2.515490, ppl: 12.372670 +epoch: 2, batch: 25657, sum loss: 5149.443359, avg loss: 2.654352, ppl: 14.215777 +epoch: 2, batch: 25658, sum loss: 5112.286133, avg loss: 2.904708, ppl: 18.259909 +epoch: 2, batch: 25659, sum loss: 4221.286621, avg loss: 2.428818, ppl: 11.345469 +epoch: 2, batch: 25660, sum loss: 4975.541016, avg loss: 2.551559, ppl: 12.827091 +epoch: 2, batch: 25661, sum loss: 4259.413574, avg loss: 2.622792, ppl: 13.774121 +epoch: 2, batch: 25662, sum loss: 4952.478027, avg loss: 2.580760, ppl: 13.207169 +epoch: 2, batch: 25663, sum loss: 4442.202637, avg loss: 2.372972, ppl: 10.729227 +epoch: 2, batch: 25664, sum loss: 4649.995605, avg loss: 2.549340, ppl: 12.798651 +epoch: 2, batch: 25665, sum loss: 3979.716309, avg loss: 2.411949, ppl: 11.155686 +epoch: 2, batch: 25666, sum loss: 4618.699219, avg loss: 2.383230, ppl: 10.839856 +epoch: 2, batch: 25667, sum loss: 4655.601562, avg loss: 2.608180, ppl: 13.574327 +epoch: 2, batch: 25668, sum loss: 3586.740479, avg loss: 2.394353, ppl: 10.961101 +epoch: 2, batch: 25669, sum loss: 3720.276123, avg loss: 2.297885, ppl: 9.953111 +epoch: 2, batch: 25670, sum loss: 4294.005859, avg loss: 2.464986, ppl: 11.763318 +epoch: 2, batch: 25671, sum loss: 3380.381104, avg loss: 2.231275, ppl: 9.311729 +epoch: 2, batch: 25672, sum loss: 4228.686523, avg loss: 2.569068, ppl: 13.053658 +epoch: 2, batch: 25673, sum loss: 3959.659180, avg loss: 2.549684, ppl: 12.803058 +epoch: 2, batch: 25674, sum loss: 4642.697266, avg loss: 2.768454, ppl: 15.933978 +epoch: 2, batch: 25675, sum loss: 3827.602295, avg loss: 2.263514, ppl: 9.616826 +epoch: 2, batch: 25676, sum loss: 3786.667480, avg loss: 2.479809, ppl: 11.938979 +epoch: 2, batch: 25677, sum loss: 4149.448242, avg loss: 2.368407, ppl: 10.680363 +epoch: 2, batch: 25678, sum loss: 4783.593750, avg loss: 2.577367, ppl: 13.162440 +epoch: 2, batch: 25679, sum loss: 4435.775391, avg loss: 2.418634, ppl: 11.230510 +epoch: 2, batch: 25680, sum loss: 3473.005859, avg loss: 2.227714, ppl: 9.278629 +epoch: 2, batch: 25681, sum loss: 3910.036621, avg loss: 2.457597, ppl: 11.676719 +epoch: 2, batch: 25682, sum loss: 3994.441406, avg loss: 2.319652, ppl: 10.172137 +epoch: 2, batch: 25683, sum loss: 3585.331299, avg loss: 2.457390, ppl: 11.674297 +epoch: 2, batch: 25684, sum loss: 4773.213379, avg loss: 2.689134, ppl: 14.718925 +epoch: 2, batch: 25685, sum loss: 4780.790039, avg loss: 2.719448, ppl: 15.171947 +epoch: 2, batch: 25686, sum loss: 4653.187012, avg loss: 2.582235, ppl: 13.226665 +epoch: 2, batch: 25687, sum loss: 4713.305664, avg loss: 2.737111, ppl: 15.442313 +epoch: 2, batch: 25688, sum loss: 3436.819336, avg loss: 2.183494, ppl: 8.877268 +epoch: 2, batch: 25689, sum loss: 4448.133301, avg loss: 2.578628, ppl: 13.179045 +epoch: 2, batch: 25690, sum loss: 3523.297607, avg loss: 2.560536, ppl: 12.942755 +epoch: 2, batch: 25691, sum loss: 3920.505859, avg loss: 2.321199, ppl: 10.187886 +epoch: 2, batch: 25692, sum loss: 3914.084473, avg loss: 2.347981, ppl: 10.464420 +epoch: 2, batch: 25693, sum loss: 3364.193848, avg loss: 2.198820, ppl: 9.014367 +epoch: 2, batch: 25694, sum loss: 4151.885254, avg loss: 2.412484, ppl: 11.161654 +epoch: 2, batch: 25695, sum loss: 3691.384277, avg loss: 2.209087, ppl: 9.107399 +epoch: 2, batch: 25696, sum loss: 5501.000000, avg loss: 2.657488, ppl: 14.260424 +epoch: 2, batch: 25697, sum loss: 3958.382324, avg loss: 2.302724, ppl: 10.001388 +epoch: 2, batch: 25698, sum loss: 4670.550293, avg loss: 2.827210, ppl: 16.898245 +epoch: 2, batch: 25699, sum loss: 3580.810303, avg loss: 2.214478, ppl: 9.156626 +epoch: 2, batch: 25700, sum loss: 4266.578125, avg loss: 2.479127, ppl: 11.930846 +epoch: 2, batch: 25701, sum loss: 4161.394043, avg loss: 2.587932, ppl: 13.302229 +epoch: 2, batch: 25702, sum loss: 4882.051758, avg loss: 2.685397, ppl: 14.664021 +epoch: 2, batch: 25703, sum loss: 4523.064453, avg loss: 2.665330, ppl: 14.372687 +epoch: 2, batch: 25704, sum loss: 4323.092285, avg loss: 2.658728, ppl: 14.278121 +epoch: 2, batch: 25705, sum loss: 3569.790527, avg loss: 1.978820, ppl: 7.234199 +epoch: 2, batch: 25706, sum loss: 4052.215820, avg loss: 2.170442, ppl: 8.762159 +epoch: 2, batch: 25707, sum loss: 3610.038086, avg loss: 2.365687, ppl: 10.651353 +epoch: 2, batch: 25708, sum loss: 4406.431152, avg loss: 2.596601, ppl: 13.418046 +epoch: 2, batch: 25709, sum loss: 5347.180664, avg loss: 2.823221, ppl: 16.830980 +epoch: 2, batch: 25710, sum loss: 5340.299805, avg loss: 2.800367, ppl: 16.450682 +epoch: 2, batch: 25711, sum loss: 4139.597656, avg loss: 2.536518, ppl: 12.635601 +epoch: 2, batch: 25712, sum loss: 4073.633545, avg loss: 2.442226, ppl: 11.498613 +epoch: 2, batch: 25713, sum loss: 4424.616699, avg loss: 2.501197, ppl: 12.197081 +epoch: 2, batch: 25714, sum loss: 5476.483398, avg loss: 2.707110, ppl: 14.985899 +epoch: 2, batch: 25715, sum loss: 3646.519775, avg loss: 2.244012, ppl: 9.431094 +epoch: 2, batch: 25716, sum loss: 3784.390625, avg loss: 2.292181, ppl: 9.896499 +epoch: 2, batch: 25717, sum loss: 4960.915039, avg loss: 2.628996, ppl: 13.859843 +epoch: 2, batch: 25718, sum loss: 4451.362305, avg loss: 2.670283, ppl: 14.444061 +epoch: 2, batch: 25719, sum loss: 3893.691406, avg loss: 2.402030, ppl: 11.045581 +epoch: 2, batch: 25720, sum loss: 5162.994141, avg loss: 2.866737, ppl: 17.579569 +epoch: 2, batch: 25721, sum loss: 3887.958740, avg loss: 2.498688, ppl: 12.166524 +epoch: 2, batch: 25722, sum loss: 4233.687012, avg loss: 2.304675, ppl: 10.020917 +epoch: 2, batch: 25723, sum loss: 5136.978027, avg loss: 2.674117, ppl: 14.499539 +epoch: 2, batch: 25724, sum loss: 4557.047852, avg loss: 2.563019, ppl: 12.974930 +epoch: 2, batch: 25725, sum loss: 4281.102539, avg loss: 2.747819, ppl: 15.608559 +epoch: 2, batch: 25726, sum loss: 3346.272705, avg loss: 2.132742, ppl: 8.437976 +epoch: 2, batch: 25727, sum loss: 3956.591797, avg loss: 2.539533, ppl: 12.673747 +epoch: 2, batch: 25728, sum loss: 3530.798584, avg loss: 2.135994, ppl: 8.465459 +epoch: 2, batch: 25729, sum loss: 3989.217285, avg loss: 2.368894, ppl: 10.685566 +epoch: 2, batch: 25730, sum loss: 3536.520508, avg loss: 2.126591, ppl: 8.386227 +epoch: 2, batch: 25731, sum loss: 3745.104492, avg loss: 2.178653, ppl: 8.834398 +epoch: 2, batch: 25732, sum loss: 4537.150391, avg loss: 2.517842, ppl: 12.401799 +epoch: 2, batch: 25733, sum loss: 3288.700195, avg loss: 2.136907, ppl: 8.473192 +epoch: 2, batch: 25734, sum loss: 4678.354004, avg loss: 2.798059, ppl: 16.412754 +epoch: 2, batch: 25735, sum loss: 5936.375000, avg loss: 2.569859, ppl: 13.063986 +epoch: 2, batch: 25736, sum loss: 5236.290039, avg loss: 2.583271, ppl: 13.240374 +epoch: 2, batch: 25737, sum loss: 4348.981445, avg loss: 2.568802, ppl: 13.050179 +epoch: 2, batch: 25738, sum loss: 4297.973145, avg loss: 2.558317, ppl: 12.914070 +epoch: 2, batch: 25739, sum loss: 3976.188721, avg loss: 2.440877, ppl: 11.483107 +epoch: 2, batch: 25740, sum loss: 4473.863281, avg loss: 2.436745, ppl: 11.435753 +epoch: 2, batch: 25741, sum loss: 4553.166016, avg loss: 2.715066, ppl: 15.105610 +epoch: 2, batch: 25742, sum loss: 3866.974121, avg loss: 2.598773, ppl: 13.447228 +epoch: 2, batch: 25743, sum loss: 4615.341309, avg loss: 2.531729, ppl: 12.575227 +epoch: 2, batch: 25744, sum loss: 3376.451904, avg loss: 2.141060, ppl: 8.508452 +epoch: 2, batch: 25745, sum loss: 4543.958984, avg loss: 2.547062, ppl: 12.769537 +epoch: 2, batch: 25746, sum loss: 4777.923340, avg loss: 2.669231, ppl: 14.428868 +epoch: 2, batch: 25747, sum loss: 4929.781250, avg loss: 2.666188, ppl: 14.385025 +epoch: 2, batch: 25748, sum loss: 3250.563477, avg loss: 2.268363, ppl: 9.663564 +epoch: 2, batch: 25749, sum loss: 3669.559082, avg loss: 2.411011, ppl: 11.145228 +epoch: 2, batch: 25750, sum loss: 4217.382812, avg loss: 2.703451, ppl: 14.931166 +epoch: 2, batch: 25751, sum loss: 4185.183105, avg loss: 2.434662, ppl: 11.411957 +epoch: 2, batch: 25752, sum loss: 3768.914307, avg loss: 2.468182, ppl: 11.800977 +epoch: 2, batch: 25753, sum loss: 4078.527344, avg loss: 2.604424, ppl: 13.523427 +epoch: 2, batch: 25754, sum loss: 4012.984863, avg loss: 2.594043, ppl: 13.383777 +epoch: 2, batch: 25755, sum loss: 4312.644043, avg loss: 2.538343, ppl: 12.658674 +epoch: 2, batch: 25756, sum loss: 3497.169434, avg loss: 2.287227, ppl: 9.847589 +epoch: 2, batch: 25757, sum loss: 5133.231445, avg loss: 2.681939, ppl: 14.613403 +epoch: 2, batch: 25758, sum loss: 3713.529053, avg loss: 2.323860, ppl: 10.215033 +epoch: 2, batch: 25759, sum loss: 4050.144043, avg loss: 2.425236, ppl: 11.304897 +epoch: 2, batch: 25760, sum loss: 4380.726562, avg loss: 2.548416, ppl: 12.786832 +epoch: 2, batch: 25761, sum loss: 5002.590820, avg loss: 2.729182, ppl: 15.320354 +epoch: 2, batch: 25762, sum loss: 4720.113281, avg loss: 2.547282, ppl: 12.772338 +epoch: 2, batch: 25763, sum loss: 3740.879395, avg loss: 2.407258, ppl: 11.103477 +epoch: 2, batch: 25764, sum loss: 5152.513184, avg loss: 2.536934, ppl: 12.640857 +epoch: 2, batch: 25765, sum loss: 3380.462646, avg loss: 2.502193, ppl: 12.209236 +epoch: 2, batch: 25766, sum loss: 4772.315430, avg loss: 2.815525, ppl: 16.701946 +epoch: 2, batch: 25767, sum loss: 3273.719727, avg loss: 2.267119, ppl: 9.651554 +epoch: 2, batch: 25768, sum loss: 4101.671387, avg loss: 2.514820, ppl: 12.364385 +epoch: 2, batch: 25769, sum loss: 4854.080078, avg loss: 2.503394, ppl: 12.223907 +epoch: 2, batch: 25770, sum loss: 3807.746338, avg loss: 2.180840, ppl: 8.853741 +epoch: 2, batch: 25771, sum loss: 4630.949219, avg loss: 2.740207, ppl: 15.490187 +epoch: 2, batch: 25772, sum loss: 5044.320312, avg loss: 2.613638, ppl: 13.648610 +epoch: 2, batch: 25773, sum loss: 3934.116211, avg loss: 2.242940, ppl: 9.420985 +epoch: 2, batch: 25774, sum loss: 4714.002441, avg loss: 2.718571, ppl: 15.158648 +epoch: 2, batch: 25775, sum loss: 3860.307129, avg loss: 2.498581, ppl: 12.165215 +epoch: 2, batch: 25776, sum loss: 4078.507812, avg loss: 2.591174, ppl: 13.345428 +epoch: 2, batch: 25777, sum loss: 4096.663086, avg loss: 2.424061, ppl: 11.291622 +epoch: 2, batch: 25778, sum loss: 4111.858398, avg loss: 2.418740, ppl: 11.231702 +epoch: 2, batch: 25779, sum loss: 4293.600586, avg loss: 2.424393, ppl: 11.295373 +epoch: 2, batch: 25780, sum loss: 5660.681641, avg loss: 2.617051, ppl: 13.695278 +epoch: 2, batch: 25781, sum loss: 4110.559570, avg loss: 2.803929, ppl: 16.509382 +epoch: 2, batch: 25782, sum loss: 5273.785645, avg loss: 2.621166, ppl: 13.751749 +epoch: 2, batch: 25783, sum loss: 4876.574219, avg loss: 2.543857, ppl: 12.728675 +epoch: 2, batch: 25784, sum loss: 3870.707031, avg loss: 2.442086, ppl: 11.497004 +epoch: 2, batch: 25785, sum loss: 4073.935547, avg loss: 2.193826, ppl: 8.969466 +epoch: 2, batch: 25786, sum loss: 4703.838867, avg loss: 2.598806, ppl: 13.447674 +epoch: 2, batch: 25787, sum loss: 4593.462402, avg loss: 2.460344, ppl: 11.708839 +epoch: 2, batch: 25788, sum loss: 3813.306396, avg loss: 2.502170, ppl: 12.208954 +epoch: 2, batch: 25789, sum loss: 4468.639648, avg loss: 2.447229, ppl: 11.556279 +epoch: 2, batch: 25790, sum loss: 4822.106445, avg loss: 2.474144, ppl: 11.871540 +epoch: 2, batch: 25791, sum loss: 5408.358398, avg loss: 2.955387, ppl: 19.209158 +epoch: 2, batch: 25792, sum loss: 4096.411133, avg loss: 2.494769, ppl: 12.118937 +epoch: 2, batch: 25793, sum loss: 4506.969727, avg loss: 2.505264, ppl: 12.246789 +epoch: 2, batch: 25794, sum loss: 4560.437988, avg loss: 2.533577, ppl: 12.598487 +epoch: 2, batch: 25795, sum loss: 5018.304688, avg loss: 2.698013, ppl: 14.850200 +epoch: 2, batch: 25796, sum loss: 4224.344238, avg loss: 2.518989, ppl: 12.416036 +epoch: 2, batch: 25797, sum loss: 3363.763916, avg loss: 2.080250, ppl: 8.006469 +epoch: 2, batch: 25798, sum loss: 4324.597656, avg loss: 2.521631, ppl: 12.448887 +epoch: 2, batch: 25799, sum loss: 4921.633301, avg loss: 2.636118, ppl: 13.958916 +epoch: 2, batch: 25800, sum loss: 4547.609863, avg loss: 2.416371, ppl: 11.205121 +epoch: 2, batch: 25801, sum loss: 4270.056641, avg loss: 2.498570, ppl: 12.165091 +epoch: 2, batch: 25802, sum loss: 5303.678711, avg loss: 2.749444, ppl: 15.633945 +epoch: 2, batch: 25803, sum loss: 4407.568848, avg loss: 2.431092, ppl: 11.371287 +epoch: 2, batch: 25804, sum loss: 4567.757812, avg loss: 2.553246, ppl: 12.848746 +epoch: 2, batch: 25805, sum loss: 3347.276855, avg loss: 2.342391, ppl: 10.406088 +epoch: 2, batch: 25806, sum loss: 4256.546875, avg loss: 2.527641, ppl: 12.523925 +epoch: 2, batch: 25807, sum loss: 4397.444336, avg loss: 2.477433, ppl: 11.910653 +epoch: 2, batch: 25808, sum loss: 3597.277832, avg loss: 2.081758, ppl: 8.018554 +epoch: 2, batch: 25809, sum loss: 5232.008789, avg loss: 2.720753, ppl: 15.191760 +epoch: 2, batch: 25810, sum loss: 3063.591797, avg loss: 2.320903, ppl: 10.184868 +epoch: 2, batch: 25811, sum loss: 4814.166992, avg loss: 2.537779, ppl: 12.651542 +epoch: 2, batch: 25812, sum loss: 3316.956543, avg loss: 2.268780, ppl: 9.667602 +epoch: 2, batch: 25813, sum loss: 4129.645020, avg loss: 2.455199, ppl: 11.648754 +epoch: 2, batch: 25814, sum loss: 4141.531250, avg loss: 2.499415, ppl: 12.175374 +epoch: 2, batch: 25815, sum loss: 4614.424805, avg loss: 2.642855, ppl: 14.053271 +epoch: 2, batch: 25816, sum loss: 4115.546875, avg loss: 2.653480, ppl: 14.203375 +epoch: 2, batch: 25817, sum loss: 4046.983887, avg loss: 2.380579, ppl: 10.811161 +epoch: 2, batch: 25818, sum loss: 4311.584961, avg loss: 2.514044, ppl: 12.354787 +epoch: 2, batch: 25819, sum loss: 3846.359131, avg loss: 2.431327, ppl: 11.373963 +epoch: 2, batch: 25820, sum loss: 4682.925781, avg loss: 2.638268, ppl: 13.988954 +epoch: 2, batch: 25821, sum loss: 4442.038086, avg loss: 2.645645, ppl: 14.092534 +epoch: 2, batch: 25822, sum loss: 3236.020508, avg loss: 2.109531, ppl: 8.244373 +epoch: 2, batch: 25823, sum loss: 3972.090088, avg loss: 2.562639, ppl: 12.969996 +epoch: 2, batch: 25824, sum loss: 4248.368164, avg loss: 2.638738, ppl: 13.995529 +epoch: 2, batch: 25825, sum loss: 3678.041016, avg loss: 2.359231, ppl: 10.582810 +epoch: 2, batch: 25826, sum loss: 3986.547119, avg loss: 2.414626, ppl: 11.185585 +epoch: 2, batch: 25827, sum loss: 4158.316895, avg loss: 2.433187, ppl: 11.395144 +epoch: 2, batch: 25828, sum loss: 4089.321533, avg loss: 2.387228, ppl: 10.883284 +epoch: 2, batch: 25829, sum loss: 5766.299316, avg loss: 2.795104, ppl: 16.364330 +epoch: 2, batch: 25830, sum loss: 4315.964355, avg loss: 2.469087, ppl: 11.811662 +epoch: 2, batch: 25831, sum loss: 3721.389648, avg loss: 2.309988, ppl: 10.074299 +epoch: 2, batch: 25832, sum loss: 3794.464355, avg loss: 2.234667, ppl: 9.343370 +epoch: 2, batch: 25833, sum loss: 2833.023682, avg loss: 2.174232, ppl: 8.795424 +epoch: 2, batch: 25834, sum loss: 3818.014648, avg loss: 2.421062, ppl: 11.257809 +epoch: 2, batch: 25835, sum loss: 3941.192627, avg loss: 2.441879, ppl: 11.494619 +epoch: 2, batch: 25836, sum loss: 4039.196289, avg loss: 2.434717, ppl: 11.412593 +epoch: 2, batch: 25837, sum loss: 4434.809570, avg loss: 2.545815, ppl: 12.753614 +epoch: 2, batch: 25838, sum loss: 4655.515137, avg loss: 2.471080, ppl: 11.835225 +epoch: 2, batch: 25839, sum loss: 4081.188477, avg loss: 2.500728, ppl: 12.191371 +epoch: 2, batch: 25840, sum loss: 4302.004883, avg loss: 2.535065, ppl: 12.617250 +epoch: 2, batch: 25841, sum loss: 3872.229736, avg loss: 2.255230, ppl: 9.537486 +epoch: 2, batch: 25842, sum loss: 3143.412598, avg loss: 2.147140, ppl: 8.560339 +epoch: 2, batch: 25843, sum loss: 4253.476562, avg loss: 2.351286, ppl: 10.499064 +epoch: 2, batch: 25844, sum loss: 4716.470703, avg loss: 2.800755, ppl: 16.457064 +epoch: 2, batch: 25845, sum loss: 4134.128906, avg loss: 2.522348, ppl: 12.457815 +epoch: 2, batch: 25846, sum loss: 4408.276367, avg loss: 2.475169, ppl: 11.883718 +epoch: 2, batch: 25847, sum loss: 5075.148926, avg loss: 2.771791, ppl: 15.987241 +epoch: 2, batch: 25848, sum loss: 5085.274414, avg loss: 2.625335, ppl: 13.809202 +epoch: 2, batch: 25849, sum loss: 5168.040527, avg loss: 2.796559, ppl: 16.388151 +epoch: 2, batch: 25850, sum loss: 4904.648438, avg loss: 2.622807, ppl: 13.774331 +epoch: 2, batch: 25851, sum loss: 4654.384277, avg loss: 2.610423, ppl: 13.604806 +epoch: 2, batch: 25852, sum loss: 3928.336670, avg loss: 2.421909, ppl: 11.267349 +epoch: 2, batch: 25853, sum loss: 3891.035889, avg loss: 2.390071, ppl: 10.914270 +epoch: 2, batch: 25854, sum loss: 4183.306641, avg loss: 2.375529, ppl: 10.756702 +epoch: 2, batch: 25855, sum loss: 3835.365967, avg loss: 2.328698, ppl: 10.264570 +epoch: 2, batch: 25856, sum loss: 4144.159668, avg loss: 2.567633, ppl: 13.034936 +epoch: 2, batch: 25857, sum loss: 3709.657959, avg loss: 2.280060, ppl: 9.777267 +epoch: 2, batch: 25858, sum loss: 3367.078613, avg loss: 2.112345, ppl: 8.267610 +epoch: 2, batch: 25859, sum loss: 4808.270508, avg loss: 2.530669, ppl: 12.561904 +epoch: 2, batch: 25860, sum loss: 2744.710449, avg loss: 1.886399, ppl: 6.595574 +epoch: 2, batch: 25861, sum loss: 3918.540039, avg loss: 2.409926, ppl: 11.133140 +epoch: 2, batch: 25862, sum loss: 3588.257812, avg loss: 2.449323, ppl: 11.580501 +epoch: 2, batch: 25863, sum loss: 4296.946289, avg loss: 2.479484, ppl: 11.935105 +epoch: 2, batch: 25864, sum loss: 3980.785645, avg loss: 2.266962, ppl: 9.650043 +epoch: 2, batch: 25865, sum loss: 4086.464844, avg loss: 2.415168, ppl: 11.191653 +epoch: 2, batch: 25866, sum loss: 3683.516602, avg loss: 2.356697, ppl: 10.556023 +epoch: 2, batch: 25867, sum loss: 4746.911133, avg loss: 2.543896, ppl: 12.729173 +epoch: 2, batch: 25868, sum loss: 4210.708984, avg loss: 2.608865, ppl: 13.583632 +epoch: 2, batch: 25869, sum loss: 3761.494629, avg loss: 2.539834, ppl: 12.677569 +epoch: 2, batch: 25870, sum loss: 6255.877930, avg loss: 3.042742, ppl: 20.962650 +epoch: 2, batch: 25871, sum loss: 4337.736816, avg loss: 2.303631, ppl: 10.010463 +epoch: 2, batch: 25872, sum loss: 4222.521973, avg loss: 2.536049, ppl: 12.629677 +epoch: 2, batch: 25873, sum loss: 4095.979492, avg loss: 2.349960, ppl: 10.485147 +epoch: 2, batch: 25874, sum loss: 4020.822998, avg loss: 2.495856, ppl: 12.132109 +epoch: 2, batch: 25875, sum loss: 3774.336670, avg loss: 2.254682, ppl: 9.532258 +epoch: 2, batch: 25876, sum loss: 4804.416016, avg loss: 2.769116, ppl: 15.944531 +epoch: 2, batch: 25877, sum loss: 4447.005859, avg loss: 2.459627, ppl: 11.700448 +epoch: 2, batch: 25878, sum loss: 4079.959961, avg loss: 2.592096, ppl: 13.357744 +epoch: 2, batch: 25879, sum loss: 3864.774902, avg loss: 2.498239, ppl: 12.161055 +epoch: 2, batch: 25880, sum loss: 3843.121338, avg loss: 2.610816, ppl: 13.610155 +epoch: 2, batch: 25881, sum loss: 3733.573486, avg loss: 2.324765, ppl: 10.224281 +epoch: 2, batch: 25882, sum loss: 3644.354004, avg loss: 2.234429, ppl: 9.341147 +epoch: 2, batch: 25883, sum loss: 3450.464844, avg loss: 2.218949, ppl: 9.197656 +epoch: 2, batch: 25884, sum loss: 3784.410156, avg loss: 2.415067, ppl: 11.190522 +epoch: 2, batch: 25885, sum loss: 4530.182617, avg loss: 2.618603, ppl: 13.716541 +epoch: 2, batch: 25886, sum loss: 4826.805176, avg loss: 2.812824, ppl: 16.656883 +epoch: 2, batch: 25887, sum loss: 4264.003906, avg loss: 2.461896, ppl: 11.727024 +epoch: 2, batch: 25888, sum loss: 4513.117188, avg loss: 2.767086, ppl: 15.912199 +epoch: 2, batch: 25889, sum loss: 4189.578125, avg loss: 2.324960, ppl: 10.226274 +epoch: 2, batch: 25890, sum loss: 4272.748047, avg loss: 2.420820, ppl: 11.255090 +epoch: 2, batch: 25891, sum loss: 4428.188965, avg loss: 2.482169, ppl: 11.967192 +epoch: 2, batch: 25892, sum loss: 2907.848633, avg loss: 1.984880, ppl: 7.278171 +epoch: 2, batch: 25893, sum loss: 4418.063477, avg loss: 2.504571, ppl: 12.238310 +epoch: 2, batch: 25894, sum loss: 4252.638184, avg loss: 2.508931, ppl: 12.291785 +epoch: 2, batch: 25895, sum loss: 4659.199219, avg loss: 2.572722, ppl: 13.101438 +epoch: 2, batch: 25896, sum loss: 3261.152100, avg loss: 2.399670, ppl: 11.019544 +epoch: 2, batch: 25897, sum loss: 3461.181152, avg loss: 2.238798, ppl: 9.382044 +epoch: 2, batch: 25898, sum loss: 3545.754395, avg loss: 2.334269, ppl: 10.321910 +epoch: 2, batch: 25899, sum loss: 3713.883057, avg loss: 2.484203, ppl: 11.991554 +epoch: 2, batch: 25900, sum loss: 3810.521240, avg loss: 2.215419, ppl: 9.165251 +epoch: 2, batch: 25901, sum loss: 3656.188477, avg loss: 2.397501, ppl: 10.995661 +epoch: 2, batch: 25902, sum loss: 4448.696777, avg loss: 2.517655, ppl: 12.399490 +epoch: 2, batch: 25903, sum loss: 4537.381348, avg loss: 2.429005, ppl: 11.347585 +epoch: 2, batch: 25904, sum loss: 4730.294434, avg loss: 2.488319, ppl: 12.041018 +epoch: 2, batch: 25905, sum loss: 4277.967285, avg loss: 2.602170, ppl: 13.492983 +epoch: 2, batch: 25906, sum loss: 4601.074707, avg loss: 2.452599, ppl: 11.618499 +epoch: 2, batch: 25907, sum loss: 3785.503906, avg loss: 2.325248, ppl: 10.229217 +epoch: 2, batch: 25908, sum loss: 4091.386963, avg loss: 2.463207, ppl: 11.742409 +epoch: 2, batch: 25909, sum loss: 4387.959961, avg loss: 2.488917, ppl: 12.048217 +epoch: 2, batch: 25910, sum loss: 4098.582031, avg loss: 2.336706, ppl: 10.347094 +epoch: 2, batch: 25911, sum loss: 4094.112305, avg loss: 2.431183, ppl: 11.372329 +epoch: 2, batch: 25912, sum loss: 3762.699463, avg loss: 2.183807, ppl: 8.880050 +epoch: 2, batch: 25913, sum loss: 4134.919922, avg loss: 2.465665, ppl: 11.771306 +epoch: 2, batch: 25914, sum loss: 4585.860352, avg loss: 2.576326, ppl: 13.148743 +epoch: 2, batch: 25915, sum loss: 4356.215820, avg loss: 2.591443, ppl: 13.349021 +epoch: 2, batch: 25916, sum loss: 4362.003418, avg loss: 2.594886, ppl: 13.395061 +epoch: 2, batch: 25917, sum loss: 5104.082520, avg loss: 2.588277, ppl: 13.306826 +epoch: 2, batch: 25918, sum loss: 4195.886719, avg loss: 2.541422, ppl: 12.697710 +epoch: 2, batch: 25919, sum loss: 4612.544922, avg loss: 2.770297, ppl: 15.963379 +epoch: 2, batch: 25920, sum loss: 4074.091064, avg loss: 2.581807, ppl: 13.221003 +epoch: 2, batch: 25921, sum loss: 3237.057861, avg loss: 2.185724, ppl: 8.897092 +epoch: 2, batch: 25922, sum loss: 3716.793701, avg loss: 2.274660, ppl: 9.724608 +epoch: 2, batch: 25923, sum loss: 3563.699951, avg loss: 2.375800, ppl: 10.759616 +epoch: 2, batch: 25924, sum loss: 4311.868652, avg loss: 2.394152, ppl: 10.958905 +epoch: 2, batch: 25925, sum loss: 4120.239258, avg loss: 2.331771, ppl: 10.296159 +epoch: 2, batch: 25926, sum loss: 3506.711670, avg loss: 2.330041, ppl: 10.278365 +epoch: 2, batch: 25927, sum loss: 3937.614258, avg loss: 2.489010, ppl: 12.049342 +epoch: 2, batch: 25928, sum loss: 4625.064941, avg loss: 2.601274, ppl: 13.480902 +epoch: 2, batch: 25929, sum loss: 4494.512207, avg loss: 2.810827, ppl: 16.623657 +epoch: 2, batch: 25930, sum loss: 3728.344482, avg loss: 2.432058, ppl: 11.382281 +epoch: 2, batch: 25931, sum loss: 3804.382080, avg loss: 2.417015, ppl: 11.212341 +epoch: 2, batch: 25932, sum loss: 3474.566406, avg loss: 2.394601, ppl: 10.963823 +epoch: 2, batch: 25933, sum loss: 4484.938477, avg loss: 2.526726, ppl: 12.512470 +epoch: 2, batch: 25934, sum loss: 3361.346436, avg loss: 2.554215, ppl: 12.861196 +epoch: 2, batch: 25935, sum loss: 4125.003418, avg loss: 2.380267, ppl: 10.807793 +epoch: 2, batch: 25936, sum loss: 4805.571289, avg loss: 2.681680, ppl: 14.609623 +epoch: 2, batch: 25937, sum loss: 4159.239258, avg loss: 2.462545, ppl: 11.734643 +epoch: 2, batch: 25938, sum loss: 3377.073975, avg loss: 2.274124, ppl: 9.719400 +epoch: 2, batch: 25939, sum loss: 6245.240234, avg loss: 3.059892, ppl: 21.325258 +epoch: 2, batch: 25940, sum loss: 3656.203125, avg loss: 2.397510, ppl: 10.995766 +epoch: 2, batch: 25941, sum loss: 4252.623535, avg loss: 2.629947, ppl: 13.873031 +epoch: 2, batch: 25942, sum loss: 4099.058594, avg loss: 2.282327, ppl: 9.799455 +epoch: 2, batch: 25943, sum loss: 3904.907227, avg loss: 2.455916, ppl: 11.657111 +epoch: 2, batch: 25944, sum loss: 4536.698730, avg loss: 2.475013, ppl: 11.881862 +epoch: 2, batch: 25945, sum loss: 4605.490234, avg loss: 2.596105, ppl: 13.411397 +epoch: 2, batch: 25946, sum loss: 4351.197266, avg loss: 2.611763, ppl: 13.623047 +epoch: 2, batch: 25947, sum loss: 4185.975586, avg loss: 2.558665, ppl: 12.918557 +epoch: 2, batch: 25948, sum loss: 4013.738770, avg loss: 2.447402, ppl: 11.558276 +epoch: 2, batch: 25949, sum loss: 3606.298828, avg loss: 2.340233, ppl: 10.383654 +epoch: 2, batch: 25950, sum loss: 4413.288574, avg loss: 2.491976, ppl: 12.085127 +epoch: 2, batch: 25951, sum loss: 4256.654785, avg loss: 2.657088, ppl: 14.254716 +epoch: 2, batch: 25952, sum loss: 3886.763916, avg loss: 2.344249, ppl: 10.425435 +epoch: 2, batch: 25953, sum loss: 4150.457031, avg loss: 2.294338, ppl: 9.917868 +epoch: 2, batch: 25954, sum loss: 4504.440430, avg loss: 2.568096, ppl: 13.040970 +epoch: 2, batch: 25955, sum loss: 4301.935059, avg loss: 2.527576, ppl: 12.523119 +epoch: 2, batch: 25956, sum loss: 4053.443359, avg loss: 2.392824, ppl: 10.944354 +epoch: 2, batch: 25957, sum loss: 3547.138672, avg loss: 2.095180, ppl: 8.126900 +epoch: 2, batch: 25958, sum loss: 4974.784180, avg loss: 2.551171, ppl: 12.822114 +epoch: 2, batch: 25959, sum loss: 4197.605957, avg loss: 2.583142, ppl: 13.238669 +epoch: 2, batch: 25960, sum loss: 3385.585693, avg loss: 2.101543, ppl: 8.178780 +epoch: 2, batch: 25961, sum loss: 5294.885742, avg loss: 2.612178, ppl: 13.628706 +epoch: 2, batch: 25962, sum loss: 4289.583984, avg loss: 2.559418, ppl: 12.928288 +epoch: 2, batch: 25963, sum loss: 3725.520996, avg loss: 2.534368, ppl: 12.608460 +epoch: 2, batch: 25964, sum loss: 5140.918457, avg loss: 2.801591, ppl: 16.470825 +epoch: 2, batch: 25965, sum loss: 4819.001953, avg loss: 2.626159, ppl: 13.820585 +epoch: 2, batch: 25966, sum loss: 3396.685547, avg loss: 2.282719, ppl: 9.803298 +epoch: 2, batch: 25967, sum loss: 3770.168213, avg loss: 2.399853, ppl: 11.021551 +epoch: 2, batch: 25968, sum loss: 4755.928711, avg loss: 2.607417, ppl: 13.563972 +epoch: 2, batch: 25969, sum loss: 4780.178711, avg loss: 2.737789, ppl: 15.452776 +epoch: 2, batch: 25970, sum loss: 5382.791992, avg loss: 2.682009, ppl: 14.614424 +epoch: 2, batch: 25971, sum loss: 4196.583008, avg loss: 2.417387, ppl: 11.216507 +epoch: 2, batch: 25972, sum loss: 4249.386230, avg loss: 2.418546, ppl: 11.229525 +epoch: 2, batch: 25973, sum loss: 4243.161621, avg loss: 2.506298, ppl: 12.259457 +epoch: 2, batch: 25974, sum loss: 4480.793945, avg loss: 2.567790, ppl: 13.036984 +epoch: 2, batch: 25975, sum loss: 4216.985352, avg loss: 2.658881, ppl: 14.280300 +epoch: 2, batch: 25976, sum loss: 4553.559570, avg loss: 2.470732, ppl: 11.831106 +epoch: 2, batch: 25977, sum loss: 3962.521973, avg loss: 2.402985, ppl: 11.056129 +epoch: 2, batch: 25978, sum loss: 3824.989990, avg loss: 2.459801, ppl: 11.702479 +epoch: 2, batch: 25979, sum loss: 3990.913574, avg loss: 2.477290, ppl: 11.908943 +epoch: 2, batch: 25980, sum loss: 4128.926270, avg loss: 2.628216, ppl: 13.849035 +epoch: 2, batch: 25981, sum loss: 4484.676758, avg loss: 2.479092, ppl: 11.930426 +epoch: 2, batch: 25982, sum loss: 4903.202637, avg loss: 2.467641, ppl: 11.794589 +epoch: 2, batch: 25983, sum loss: 4479.378906, avg loss: 2.317320, ppl: 10.148438 +epoch: 2, batch: 25984, sum loss: 3683.552002, avg loss: 2.300782, ppl: 9.981987 +epoch: 2, batch: 25985, sum loss: 4712.841309, avg loss: 2.435577, ppl: 11.422406 +epoch: 2, batch: 25986, sum loss: 4075.449219, avg loss: 2.536061, ppl: 12.629819 +epoch: 2, batch: 25987, sum loss: 4076.785889, avg loss: 2.496501, ppl: 12.139938 +epoch: 2, batch: 25988, sum loss: 3896.657959, avg loss: 2.390588, ppl: 10.919910 +epoch: 2, batch: 25989, sum loss: 3664.622314, avg loss: 2.309151, ppl: 10.065874 +epoch: 2, batch: 25990, sum loss: 4547.961426, avg loss: 2.523841, ppl: 12.476428 +epoch: 2, batch: 25991, sum loss: 3481.669922, avg loss: 2.202195, ppl: 9.044843 +epoch: 2, batch: 25992, sum loss: 4444.606934, avg loss: 2.548513, ppl: 12.788073 +epoch: 2, batch: 25993, sum loss: 4835.851074, avg loss: 2.685092, ppl: 14.659554 +epoch: 2, batch: 25994, sum loss: 3144.052734, avg loss: 2.261909, ppl: 9.601397 +epoch: 2, batch: 25995, sum loss: 4230.804199, avg loss: 2.536453, ppl: 12.634782 +epoch: 2, batch: 25996, sum loss: 4049.680420, avg loss: 2.118034, ppl: 8.314772 +epoch: 2, batch: 25997, sum loss: 4634.961426, avg loss: 2.653097, ppl: 14.197948 +epoch: 2, batch: 25998, sum loss: 4085.805664, avg loss: 2.430580, ppl: 11.365477 +epoch: 2, batch: 25999, sum loss: 3863.496094, avg loss: 2.267310, ppl: 9.653398 +epoch: 2, batch: 26000, sum loss: 3544.585938, avg loss: 2.292746, ppl: 9.902095 +epoch: 2, batch: 26001, sum loss: 4007.533936, avg loss: 2.287405, ppl: 9.849348 +epoch: 2, batch: 26002, sum loss: 4287.609863, avg loss: 2.471245, ppl: 11.837173 +epoch: 2, batch: 26003, sum loss: 3774.556641, avg loss: 2.328536, ppl: 10.262903 +epoch: 2, batch: 26004, sum loss: 4012.517090, avg loss: 2.470762, ppl: 11.831456 +epoch: 2, batch: 26005, sum loss: 4308.057617, avg loss: 2.332462, ppl: 10.303280 +epoch: 2, batch: 26006, sum loss: 4206.594727, avg loss: 2.340899, ppl: 10.390571 +epoch: 2, batch: 26007, sum loss: 4484.351562, avg loss: 2.581665, ppl: 13.219127 +epoch: 2, batch: 26008, sum loss: 3638.031250, avg loss: 2.289510, ppl: 9.870100 +epoch: 2, batch: 26009, sum loss: 4409.436035, avg loss: 2.277601, ppl: 9.753257 +epoch: 2, batch: 26010, sum loss: 4798.934570, avg loss: 2.714330, ppl: 15.094489 +epoch: 2, batch: 26011, sum loss: 3945.050537, avg loss: 2.399666, ppl: 11.019493 +epoch: 2, batch: 26012, sum loss: 3277.611816, avg loss: 2.237278, ppl: 9.367795 +epoch: 2, batch: 26013, sum loss: 3759.885742, avg loss: 2.260906, ppl: 9.591770 +epoch: 2, batch: 26014, sum loss: 3513.871338, avg loss: 2.287677, ppl: 9.852023 +epoch: 2, batch: 26015, sum loss: 4128.183594, avg loss: 2.476415, ppl: 11.898528 +epoch: 2, batch: 26016, sum loss: 4407.843262, avg loss: 2.533243, ppl: 12.594286 +epoch: 2, batch: 26017, sum loss: 3986.550781, avg loss: 2.515174, ppl: 12.368760 +epoch: 2, batch: 26018, sum loss: 4437.587891, avg loss: 2.633583, ppl: 13.923573 +epoch: 2, batch: 26019, sum loss: 4670.729004, avg loss: 2.587662, ppl: 13.298636 +epoch: 2, batch: 26020, sum loss: 4647.803223, avg loss: 2.586424, ppl: 13.282185 +epoch: 2, batch: 26021, sum loss: 4931.510254, avg loss: 2.664241, ppl: 14.357050 +epoch: 2, batch: 26022, sum loss: 3345.674316, avg loss: 2.196766, ppl: 8.995872 +epoch: 2, batch: 26023, sum loss: 4197.005859, avg loss: 2.746732, ppl: 15.591588 +epoch: 2, batch: 26024, sum loss: 4049.294922, avg loss: 2.473607, ppl: 11.865171 +epoch: 2, batch: 26025, sum loss: 4471.999023, avg loss: 2.539466, ppl: 12.672897 +epoch: 2, batch: 26026, sum loss: 4032.319336, avg loss: 2.330820, ppl: 10.286376 +epoch: 2, batch: 26027, sum loss: 3826.435547, avg loss: 2.336041, ppl: 10.340220 +epoch: 2, batch: 26028, sum loss: 3823.898438, avg loss: 2.327388, ppl: 10.251128 +epoch: 2, batch: 26029, sum loss: 4575.260254, avg loss: 2.556011, ppl: 12.884322 +epoch: 2, batch: 26030, sum loss: 3917.238281, avg loss: 2.455949, ppl: 11.657489 +epoch: 2, batch: 26031, sum loss: 3119.097656, avg loss: 2.431097, ppl: 11.371350 +epoch: 2, batch: 26032, sum loss: 5255.252441, avg loss: 2.590070, ppl: 13.330708 +epoch: 2, batch: 26033, sum loss: 3770.166504, avg loss: 2.438659, ppl: 11.457663 +epoch: 2, batch: 26034, sum loss: 3441.575439, avg loss: 2.247926, ppl: 9.468083 +epoch: 2, batch: 26035, sum loss: 4252.641113, avg loss: 2.340474, ppl: 10.386157 +epoch: 2, batch: 26036, sum loss: 3895.967773, avg loss: 2.503835, ppl: 12.229309 +epoch: 2, batch: 26037, sum loss: 4181.346680, avg loss: 2.412779, ppl: 11.164949 +epoch: 2, batch: 26038, sum loss: 3153.506348, avg loss: 2.330751, ppl: 10.285665 +epoch: 2, batch: 26039, sum loss: 4641.833984, avg loss: 2.619545, ppl: 13.729478 +epoch: 2, batch: 26040, sum loss: 3826.483887, avg loss: 2.519081, ppl: 12.417178 +epoch: 2, batch: 26041, sum loss: 3732.533691, avg loss: 2.223069, ppl: 9.235636 +epoch: 2, batch: 26042, sum loss: 4753.780762, avg loss: 2.649822, ppl: 14.151523 +epoch: 2, batch: 26043, sum loss: 4560.968750, avg loss: 2.538102, ppl: 12.655626 +epoch: 2, batch: 26044, sum loss: 3136.297607, avg loss: 2.381395, ppl: 10.819987 +epoch: 2, batch: 26045, sum loss: 4046.237793, avg loss: 2.314781, ppl: 10.122710 +epoch: 2, batch: 26046, sum loss: 3421.026611, avg loss: 2.341565, ppl: 10.397497 +epoch: 2, batch: 26047, sum loss: 4198.465820, avg loss: 2.349449, ppl: 10.479798 +epoch: 2, batch: 26048, sum loss: 4403.112793, avg loss: 2.450258, ppl: 11.591331 +epoch: 2, batch: 26049, sum loss: 4208.575684, avg loss: 2.517091, ppl: 12.392492 +epoch: 2, batch: 26050, sum loss: 3636.885010, avg loss: 2.429449, ppl: 11.352623 +epoch: 2, batch: 26051, sum loss: 4347.755371, avg loss: 2.550003, ppl: 12.807146 +epoch: 2, batch: 26052, sum loss: 4115.331055, avg loss: 2.492630, ppl: 12.093034 +epoch: 2, batch: 26053, sum loss: 4040.673584, avg loss: 2.465328, ppl: 11.767346 +epoch: 2, batch: 26054, sum loss: 3791.215332, avg loss: 2.534235, ppl: 12.606783 +epoch: 2, batch: 26055, sum loss: 4244.936523, avg loss: 2.424293, ppl: 11.294239 +epoch: 2, batch: 26056, sum loss: 4413.081055, avg loss: 2.655283, ppl: 14.229019 +epoch: 2, batch: 26057, sum loss: 5019.799805, avg loss: 2.847306, ppl: 17.241262 +epoch: 2, batch: 26058, sum loss: 3928.969238, avg loss: 2.327589, ppl: 10.253187 +epoch: 2, batch: 26059, sum loss: 4313.396973, avg loss: 2.424619, ppl: 11.297921 +epoch: 2, batch: 26060, sum loss: 4267.728516, avg loss: 2.351366, ppl: 10.499898 +epoch: 2, batch: 26061, sum loss: 3971.037354, avg loss: 2.408149, ppl: 11.113369 +epoch: 2, batch: 26062, sum loss: 4577.542969, avg loss: 2.603836, ppl: 13.515478 +epoch: 2, batch: 26063, sum loss: 4078.386719, avg loss: 2.452427, ppl: 11.616510 +epoch: 2, batch: 26064, sum loss: 2970.659668, avg loss: 2.171535, ppl: 8.771736 +epoch: 2, batch: 26065, sum loss: 3397.837891, avg loss: 2.327286, ppl: 10.250088 +epoch: 2, batch: 26066, sum loss: 4139.852051, avg loss: 2.373768, ppl: 10.737780 +epoch: 2, batch: 26067, sum loss: 4391.860840, avg loss: 2.522608, ppl: 12.461057 +epoch: 2, batch: 26068, sum loss: 5320.711426, avg loss: 2.776989, ppl: 16.070562 +epoch: 2, batch: 26069, sum loss: 3991.578613, avg loss: 2.423545, ppl: 11.285795 +epoch: 2, batch: 26070, sum loss: 4760.480469, avg loss: 2.574624, ppl: 13.126385 +epoch: 2, batch: 26071, sum loss: 3912.291992, avg loss: 2.257526, ppl: 9.559407 +epoch: 2, batch: 26072, sum loss: 3264.718750, avg loss: 2.372615, ppl: 10.725407 +epoch: 2, batch: 26073, sum loss: 3249.865234, avg loss: 2.166577, ppl: 8.728354 +epoch: 2, batch: 26074, sum loss: 4674.044434, avg loss: 2.578072, ppl: 13.171720 +epoch: 2, batch: 26075, sum loss: 4637.961426, avg loss: 2.699628, ppl: 14.874203 +epoch: 2, batch: 26076, sum loss: 4627.052246, avg loss: 2.685463, ppl: 14.664986 +epoch: 2, batch: 26077, sum loss: 4395.897461, avg loss: 2.582783, ppl: 13.233923 +epoch: 2, batch: 26078, sum loss: 3484.705078, avg loss: 2.292569, ppl: 9.900341 +epoch: 2, batch: 26079, sum loss: 5139.083008, avg loss: 2.696266, ppl: 14.824273 +epoch: 2, batch: 26080, sum loss: 3711.838867, avg loss: 2.396281, ppl: 10.982255 +epoch: 2, batch: 26081, sum loss: 4077.339600, avg loss: 2.453273, ppl: 11.626338 +epoch: 2, batch: 26082, sum loss: 4459.996094, avg loss: 2.746303, ppl: 15.584909 +epoch: 2, batch: 26083, sum loss: 4089.091553, avg loss: 2.594601, ppl: 13.391245 +epoch: 2, batch: 26084, sum loss: 3543.870117, avg loss: 2.422331, ppl: 11.272105 +epoch: 2, batch: 26085, sum loss: 4070.505615, avg loss: 2.441815, ppl: 11.493882 +epoch: 2, batch: 26086, sum loss: 4745.284180, avg loss: 2.560866, ppl: 12.947020 +epoch: 2, batch: 26087, sum loss: 3768.317383, avg loss: 2.303373, ppl: 10.007878 +epoch: 2, batch: 26088, sum loss: 3641.857178, avg loss: 2.339022, ppl: 10.371088 +epoch: 2, batch: 26089, sum loss: 3595.282715, avg loss: 2.230324, ppl: 9.302882 +epoch: 2, batch: 26090, sum loss: 3790.050781, avg loss: 2.509968, ppl: 12.304531 +epoch: 2, batch: 26091, sum loss: 4554.743652, avg loss: 2.472717, ppl: 11.854607 +epoch: 2, batch: 26092, sum loss: 4157.139160, avg loss: 2.663126, ppl: 14.341045 +epoch: 2, batch: 26093, sum loss: 4764.392090, avg loss: 2.728747, ppl: 15.313689 +epoch: 2, batch: 26094, sum loss: 3312.240723, avg loss: 2.354116, ppl: 10.528814 +epoch: 2, batch: 26095, sum loss: 4111.203613, avg loss: 2.544062, ppl: 12.731277 +epoch: 2, batch: 26096, sum loss: 3986.485107, avg loss: 2.397165, ppl: 10.991968 +epoch: 2, batch: 26097, sum loss: 4599.507324, avg loss: 2.535561, ppl: 12.623508 +epoch: 2, batch: 26098, sum loss: 4899.108887, avg loss: 2.588013, ppl: 13.303310 +epoch: 2, batch: 26099, sum loss: 4810.082520, avg loss: 2.502645, ppl: 12.214754 +epoch: 2, batch: 26100, sum loss: 3484.083984, avg loss: 2.384726, ppl: 10.856083 +epoch: 2, batch: 26101, sum loss: 4034.568115, avg loss: 2.555141, ppl: 12.873117 +epoch: 2, batch: 26102, sum loss: 4567.857910, avg loss: 2.563332, ppl: 12.978995 +epoch: 2, batch: 26103, sum loss: 3785.085449, avg loss: 2.412419, ppl: 11.160928 +epoch: 2, batch: 26104, sum loss: 3781.207275, avg loss: 2.351497, ppl: 10.501278 +epoch: 2, batch: 26105, sum loss: 4479.194824, avg loss: 2.506544, ppl: 12.262482 +epoch: 2, batch: 26106, sum loss: 4705.668945, avg loss: 2.649588, ppl: 14.148213 +epoch: 2, batch: 26107, sum loss: 3644.313477, avg loss: 2.372600, ppl: 10.725243 +epoch: 2, batch: 26108, sum loss: 3970.187256, avg loss: 2.528782, ppl: 12.538221 +epoch: 2, batch: 26109, sum loss: 4904.147949, avg loss: 2.688678, ppl: 14.712207 +epoch: 2, batch: 26110, sum loss: 4286.754883, avg loss: 2.470752, ppl: 11.831344 +epoch: 2, batch: 26111, sum loss: 4451.372070, avg loss: 2.552392, ppl: 12.837778 +epoch: 2, batch: 26112, sum loss: 4152.261719, avg loss: 2.395996, ppl: 10.979132 +epoch: 2, batch: 26113, sum loss: 3983.788330, avg loss: 2.357271, ppl: 10.562090 +epoch: 2, batch: 26114, sum loss: 3175.749023, avg loss: 2.118578, ppl: 8.319303 +epoch: 2, batch: 26115, sum loss: 4908.498535, avg loss: 2.770033, ppl: 15.959163 +epoch: 2, batch: 26116, sum loss: 4362.771484, avg loss: 2.546860, ppl: 12.766955 +epoch: 2, batch: 26117, sum loss: 4509.965820, avg loss: 2.419510, ppl: 11.240346 +epoch: 2, batch: 26118, sum loss: 3734.656006, avg loss: 2.335620, ppl: 10.335863 +epoch: 2, batch: 26119, sum loss: 4024.387695, avg loss: 2.601414, ppl: 13.482792 +epoch: 2, batch: 26120, sum loss: 4527.198730, avg loss: 2.615366, ppl: 13.672219 +epoch: 2, batch: 26121, sum loss: 3674.556641, avg loss: 2.452975, ppl: 11.622874 +epoch: 2, batch: 26122, sum loss: 4211.954102, avg loss: 2.563575, ppl: 12.982146 +epoch: 2, batch: 26123, sum loss: 4284.299316, avg loss: 2.520176, ppl: 12.430786 +epoch: 2, batch: 26124, sum loss: 4267.482910, avg loss: 2.355123, ppl: 10.539426 +epoch: 2, batch: 26125, sum loss: 5042.228516, avg loss: 2.687755, ppl: 14.698642 +epoch: 2, batch: 26126, sum loss: 3469.713867, avg loss: 2.269270, ppl: 9.672339 +epoch: 2, batch: 26127, sum loss: 4026.729004, avg loss: 2.345212, ppl: 10.435485 +epoch: 2, batch: 26128, sum loss: 3695.846191, avg loss: 2.406150, ppl: 11.091176 +epoch: 2, batch: 26129, sum loss: 4661.944824, avg loss: 2.560102, ppl: 12.937131 +epoch: 2, batch: 26130, sum loss: 4014.046143, avg loss: 2.668914, ppl: 14.424290 +epoch: 2, batch: 26131, sum loss: 4134.417969, avg loss: 2.282948, ppl: 9.805542 +epoch: 2, batch: 26132, sum loss: 4290.047852, avg loss: 2.414208, ppl: 11.180914 +epoch: 2, batch: 26133, sum loss: 4362.869629, avg loss: 2.564885, ppl: 12.999166 +epoch: 2, batch: 26134, sum loss: 4039.263184, avg loss: 2.526118, ppl: 12.504871 +epoch: 2, batch: 26135, sum loss: 3956.342041, avg loss: 2.346585, ppl: 10.449821 +epoch: 2, batch: 26136, sum loss: 3477.882080, avg loss: 2.216623, ppl: 9.176293 +epoch: 2, batch: 26137, sum loss: 4099.404785, avg loss: 2.649906, ppl: 14.152711 +epoch: 2, batch: 26138, sum loss: 3911.708252, avg loss: 2.366430, ppl: 10.659266 +epoch: 2, batch: 26139, sum loss: 4750.842773, avg loss: 2.844816, ppl: 17.198393 +epoch: 2, batch: 26140, sum loss: 3999.696777, avg loss: 2.547578, ppl: 12.776117 +epoch: 2, batch: 26141, sum loss: 3884.809570, avg loss: 2.477557, ppl: 11.912129 +epoch: 2, batch: 26142, sum loss: 4625.972656, avg loss: 2.578580, ppl: 13.178413 +epoch: 2, batch: 26143, sum loss: 3851.360596, avg loss: 2.505765, ppl: 12.252929 +epoch: 2, batch: 26144, sum loss: 5363.302246, avg loss: 2.822791, ppl: 16.823738 +epoch: 2, batch: 26145, sum loss: 4132.647461, avg loss: 2.373721, ppl: 10.737267 +epoch: 2, batch: 26146, sum loss: 4365.871582, avg loss: 2.583356, ppl: 13.241501 +epoch: 2, batch: 26147, sum loss: 3973.208496, avg loss: 2.365005, ppl: 10.644094 +epoch: 2, batch: 26148, sum loss: 4468.867676, avg loss: 2.772250, ppl: 15.994588 +epoch: 2, batch: 26149, sum loss: 4506.861328, avg loss: 2.510786, ppl: 12.314610 +epoch: 2, batch: 26150, sum loss: 4149.812500, avg loss: 2.309300, ppl: 10.067379 +epoch: 2, batch: 26151, sum loss: 4335.557129, avg loss: 2.431608, ppl: 11.377164 +epoch: 2, batch: 26152, sum loss: 4015.478760, avg loss: 2.401602, ppl: 11.040853 +epoch: 2, batch: 26153, sum loss: 4285.598633, avg loss: 2.448914, ppl: 11.575764 +epoch: 2, batch: 26154, sum loss: 4605.140625, avg loss: 2.612105, ppl: 13.627703 +epoch: 2, batch: 26155, sum loss: 4719.142578, avg loss: 2.507515, ppl: 12.274386 +epoch: 2, batch: 26156, sum loss: 4696.360352, avg loss: 2.557930, ppl: 12.909074 +epoch: 2, batch: 26157, sum loss: 3943.677734, avg loss: 2.554196, ppl: 12.860950 +epoch: 2, batch: 26158, sum loss: 4818.733398, avg loss: 2.412986, ppl: 11.167260 +epoch: 2, batch: 26159, sum loss: 3937.034668, avg loss: 2.111011, ppl: 8.256580 +epoch: 2, batch: 26160, sum loss: 3910.558838, avg loss: 2.465674, ppl: 11.771413 +epoch: 2, batch: 26161, sum loss: 4364.239258, avg loss: 2.518315, ppl: 12.407667 +epoch: 2, batch: 26162, sum loss: 3602.650391, avg loss: 2.294682, ppl: 9.921278 +epoch: 2, batch: 26163, sum loss: 3963.440430, avg loss: 2.345231, ppl: 10.435683 +epoch: 2, batch: 26164, sum loss: 4006.955566, avg loss: 2.428458, ppl: 11.341380 +epoch: 2, batch: 26165, sum loss: 4102.583008, avg loss: 2.575382, ppl: 13.136331 +epoch: 2, batch: 26166, sum loss: 3823.679199, avg loss: 2.379390, ppl: 10.798311 +epoch: 2, batch: 26167, sum loss: 3737.050781, avg loss: 2.334198, ppl: 10.321179 +epoch: 2, batch: 26168, sum loss: 3652.885742, avg loss: 2.375088, ppl: 10.751962 +epoch: 2, batch: 26169, sum loss: 3903.620117, avg loss: 2.390459, ppl: 10.918508 +epoch: 2, batch: 26170, sum loss: 3809.278809, avg loss: 2.554848, ppl: 12.869349 +epoch: 2, batch: 26171, sum loss: 4227.883301, avg loss: 2.347520, ppl: 10.459597 +epoch: 2, batch: 26172, sum loss: 3214.407471, avg loss: 2.224504, ppl: 9.248890 +epoch: 2, batch: 26173, sum loss: 3728.698242, avg loss: 2.404061, ppl: 11.068028 +epoch: 2, batch: 26174, sum loss: 3869.794189, avg loss: 2.514486, ppl: 12.360255 +epoch: 2, batch: 26175, sum loss: 4029.466309, avg loss: 2.630200, ppl: 13.876544 +epoch: 2, batch: 26176, sum loss: 4433.394531, avg loss: 2.533368, ppl: 12.595862 +epoch: 2, batch: 26177, sum loss: 4152.344238, avg loss: 2.421192, ppl: 11.259272 +epoch: 2, batch: 26178, sum loss: 4250.418945, avg loss: 2.445580, ppl: 11.537245 +epoch: 2, batch: 26179, sum loss: 3708.706543, avg loss: 2.459354, ppl: 11.697257 +epoch: 2, batch: 26180, sum loss: 4519.046875, avg loss: 2.557469, ppl: 12.903113 +epoch: 2, batch: 26181, sum loss: 3545.064697, avg loss: 2.400179, ppl: 11.025154 +epoch: 2, batch: 26182, sum loss: 4952.121582, avg loss: 2.928517, ppl: 18.699875 +epoch: 2, batch: 26183, sum loss: 4373.382812, avg loss: 2.370397, ppl: 10.701641 +epoch: 2, batch: 26184, sum loss: 3646.563232, avg loss: 2.303577, ppl: 10.009928 +epoch: 2, batch: 26185, sum loss: 4515.695312, avg loss: 2.404524, ppl: 11.073153 +epoch: 2, batch: 26186, sum loss: 4732.875977, avg loss: 2.607645, ppl: 13.567066 +epoch: 2, batch: 26187, sum loss: 4230.682129, avg loss: 2.378124, ppl: 10.784649 +epoch: 2, batch: 26188, sum loss: 3394.227051, avg loss: 2.181380, ppl: 8.858521 +epoch: 2, batch: 26189, sum loss: 4070.452881, avg loss: 2.542444, ppl: 12.710701 +epoch: 2, batch: 26190, sum loss: 3926.202148, avg loss: 2.341206, ppl: 10.393763 +epoch: 2, batch: 26191, sum loss: 4040.426270, avg loss: 2.372535, ppl: 10.724540 +epoch: 2, batch: 26192, sum loss: 3431.091797, avg loss: 2.296581, ppl: 9.940137 +epoch: 2, batch: 26193, sum loss: 4751.974121, avg loss: 2.727884, ppl: 15.300478 +epoch: 2, batch: 26194, sum loss: 3919.662842, avg loss: 2.468302, ppl: 11.802386 +epoch: 2, batch: 26195, sum loss: 4024.173584, avg loss: 2.406802, ppl: 11.098416 +epoch: 2, batch: 26196, sum loss: 4303.340820, avg loss: 2.447862, ppl: 11.563594 +epoch: 2, batch: 26197, sum loss: 4191.230957, avg loss: 2.472703, ppl: 11.854441 +epoch: 2, batch: 26198, sum loss: 4448.301270, avg loss: 2.586222, ppl: 13.279503 +epoch: 2, batch: 26199, sum loss: 3994.796875, avg loss: 2.549328, ppl: 12.798501 +epoch: 2, batch: 26200, sum loss: 4367.026367, avg loss: 2.482676, ppl: 11.973260 +epoch: 2, batch: 26201, sum loss: 4477.797852, avg loss: 2.398392, ppl: 11.005465 +epoch: 2, batch: 26202, sum loss: 4733.386719, avg loss: 2.571095, ppl: 13.080146 +epoch: 2, batch: 26203, sum loss: 3666.186523, avg loss: 2.315974, ppl: 10.134787 +epoch: 2, batch: 26204, sum loss: 4211.254883, avg loss: 2.434251, ppl: 11.407275 +epoch: 2, batch: 26205, sum loss: 4555.673828, avg loss: 2.266505, ppl: 9.645626 +epoch: 2, batch: 26206, sum loss: 4734.584473, avg loss: 2.754267, ppl: 15.709517 +epoch: 2, batch: 26207, sum loss: 3658.109863, avg loss: 2.376940, ppl: 10.771885 +epoch: 2, batch: 26208, sum loss: 4211.438965, avg loss: 2.722327, ppl: 15.215681 +epoch: 2, batch: 26209, sum loss: 4698.600098, avg loss: 2.659083, ppl: 14.283191 +epoch: 2, batch: 26210, sum loss: 4291.604980, avg loss: 2.615237, ppl: 13.670463 +epoch: 2, batch: 26211, sum loss: 4114.561523, avg loss: 2.245940, ppl: 9.449291 +epoch: 2, batch: 26212, sum loss: 4723.826172, avg loss: 2.618529, ppl: 13.715531 +epoch: 2, batch: 26213, sum loss: 3389.396729, avg loss: 2.139771, ppl: 8.497489 +epoch: 2, batch: 26214, sum loss: 4790.022461, avg loss: 2.562880, ppl: 12.973124 +epoch: 2, batch: 26215, sum loss: 4123.773438, avg loss: 2.788217, ppl: 16.252022 +epoch: 2, batch: 26216, sum loss: 2990.458740, avg loss: 1.953272, ppl: 7.051721 +epoch: 2, batch: 26217, sum loss: 4142.823242, avg loss: 2.360583, ppl: 10.597129 +epoch: 2, batch: 26218, sum loss: 4084.639160, avg loss: 2.459145, ppl: 11.694803 +epoch: 2, batch: 26219, sum loss: 4370.564453, avg loss: 2.445755, ppl: 11.539261 +epoch: 2, batch: 26220, sum loss: 3998.111816, avg loss: 2.428987, ppl: 11.347376 +epoch: 2, batch: 26221, sum loss: 3776.266602, avg loss: 2.166533, ppl: 8.727969 +epoch: 2, batch: 26222, sum loss: 4669.343750, avg loss: 2.620283, ppl: 13.739610 +epoch: 2, batch: 26223, sum loss: 4012.133789, avg loss: 2.381088, ppl: 10.816668 +epoch: 2, batch: 26224, sum loss: 4497.833008, avg loss: 2.516974, ppl: 12.391047 +epoch: 2, batch: 26225, sum loss: 3184.714844, avg loss: 2.077440, ppl: 7.984000 +epoch: 2, batch: 26226, sum loss: 4375.353516, avg loss: 2.494500, ppl: 12.115676 +epoch: 2, batch: 26227, sum loss: 4106.394043, avg loss: 2.478210, ppl: 11.919909 +epoch: 2, batch: 26228, sum loss: 4888.745605, avg loss: 2.709948, ppl: 15.028488 +epoch: 2, batch: 26229, sum loss: 3645.966309, avg loss: 2.453544, ppl: 11.629488 +epoch: 2, batch: 26230, sum loss: 3994.237061, avg loss: 2.380356, ppl: 10.808748 +epoch: 2, batch: 26231, sum loss: 4571.159668, avg loss: 2.504745, ppl: 12.240438 +epoch: 2, batch: 26232, sum loss: 3657.867676, avg loss: 2.438578, ppl: 11.456742 +epoch: 2, batch: 26233, sum loss: 4594.532227, avg loss: 2.585556, ppl: 13.270659 +epoch: 2, batch: 26234, sum loss: 4939.621582, avg loss: 2.915951, ppl: 18.466370 +epoch: 2, batch: 26235, sum loss: 6012.321289, avg loss: 3.015206, ppl: 20.393299 +epoch: 2, batch: 26236, sum loss: 3417.368408, avg loss: 2.173898, ppl: 8.792495 +epoch: 2, batch: 26237, sum loss: 4707.823730, avg loss: 2.705646, ppl: 14.963977 +epoch: 2, batch: 26238, sum loss: 5076.520996, avg loss: 2.567790, ppl: 13.036982 +epoch: 2, batch: 26239, sum loss: 4449.744141, avg loss: 2.597632, ppl: 13.431896 +epoch: 2, batch: 26240, sum loss: 4616.194336, avg loss: 2.591912, ppl: 13.355276 +epoch: 2, batch: 26241, sum loss: 3790.438232, avg loss: 2.237567, ppl: 9.370502 +epoch: 2, batch: 26242, sum loss: 4513.407715, avg loss: 2.592423, ppl: 13.362105 +epoch: 2, batch: 26243, sum loss: 5259.180176, avg loss: 2.869165, ppl: 17.622305 +epoch: 2, batch: 26244, sum loss: 4394.950684, avg loss: 2.603644, ppl: 13.512888 +epoch: 2, batch: 26245, sum loss: 4038.500732, avg loss: 2.364462, ppl: 10.638310 +epoch: 2, batch: 26246, sum loss: 3799.671143, avg loss: 2.371830, ppl: 10.716981 +epoch: 2, batch: 26247, sum loss: 4400.750000, avg loss: 2.579572, ppl: 13.191490 +epoch: 2, batch: 26248, sum loss: 4479.364258, avg loss: 2.335435, ppl: 10.333953 +epoch: 2, batch: 26249, sum loss: 4378.541504, avg loss: 2.663346, ppl: 14.344209 +epoch: 2, batch: 26250, sum loss: 4531.663574, avg loss: 2.593969, ppl: 13.382781 +epoch: 2, batch: 26251, sum loss: 4188.652832, avg loss: 2.330914, ppl: 10.287342 +epoch: 2, batch: 26252, sum loss: 4854.546387, avg loss: 2.747338, ppl: 15.601048 +epoch: 2, batch: 26253, sum loss: 3884.165039, avg loss: 2.502684, ppl: 12.215234 +epoch: 2, batch: 26254, sum loss: 3224.890625, avg loss: 2.394128, ppl: 10.958641 +epoch: 2, batch: 26255, sum loss: 3426.138672, avg loss: 2.450743, ppl: 11.596960 +epoch: 2, batch: 26256, sum loss: 3952.988037, avg loss: 2.621345, ppl: 13.754208 +epoch: 2, batch: 26257, sum loss: 4205.415039, avg loss: 2.545651, ppl: 12.751526 +epoch: 2, batch: 26258, sum loss: 3269.586426, avg loss: 2.110772, ppl: 8.254615 +epoch: 2, batch: 26259, sum loss: 3958.469727, avg loss: 2.503776, ppl: 12.228580 +epoch: 2, batch: 26260, sum loss: 4090.089111, avg loss: 2.347927, ppl: 10.463857 +epoch: 2, batch: 26261, sum loss: 3456.674072, avg loss: 2.299850, ppl: 9.972682 +epoch: 2, batch: 26262, sum loss: 4387.431641, avg loss: 2.470401, ppl: 11.827187 +epoch: 2, batch: 26263, sum loss: 4757.175293, avg loss: 2.659125, ppl: 14.283790 +epoch: 2, batch: 26264, sum loss: 3608.840820, avg loss: 2.289874, ppl: 9.873690 +epoch: 2, batch: 26265, sum loss: 3718.532471, avg loss: 2.255023, ppl: 9.535510 +epoch: 2, batch: 26266, sum loss: 3287.218018, avg loss: 2.284377, ppl: 9.819563 +epoch: 2, batch: 26267, sum loss: 4255.420410, avg loss: 2.434451, ppl: 11.409554 +epoch: 2, batch: 26268, sum loss: 3664.341797, avg loss: 2.514991, ppl: 12.366495 +epoch: 2, batch: 26269, sum loss: 3544.429688, avg loss: 2.161238, ppl: 8.681877 +epoch: 2, batch: 26270, sum loss: 3744.413818, avg loss: 2.538586, ppl: 12.661751 +epoch: 2, batch: 26271, sum loss: 3229.479736, avg loss: 2.134488, ppl: 8.452715 +epoch: 2, batch: 26272, sum loss: 4701.623047, avg loss: 2.516929, ppl: 12.390486 +epoch: 2, batch: 26273, sum loss: 3612.659912, avg loss: 2.287942, ppl: 9.854633 +epoch: 2, batch: 26274, sum loss: 3361.382324, avg loss: 2.073647, ppl: 7.953780 +epoch: 2, batch: 26275, sum loss: 4342.153809, avg loss: 2.654128, ppl: 14.212592 +epoch: 2, batch: 26276, sum loss: 4723.917969, avg loss: 2.652396, ppl: 14.187999 +epoch: 2, batch: 26277, sum loss: 4131.590332, avg loss: 2.403485, ppl: 11.061657 +epoch: 2, batch: 26278, sum loss: 3564.965332, avg loss: 2.546404, ppl: 12.761130 +epoch: 2, batch: 26279, sum loss: 4416.021973, avg loss: 2.396105, ppl: 10.980328 +epoch: 2, batch: 26280, sum loss: 4140.972168, avg loss: 2.656172, ppl: 14.241668 +epoch: 2, batch: 26281, sum loss: 3849.824219, avg loss: 2.425850, ppl: 11.311845 +epoch: 2, batch: 26282, sum loss: 3957.798828, avg loss: 2.255156, ppl: 9.536781 +epoch: 2, batch: 26283, sum loss: 3469.889893, avg loss: 2.299463, ppl: 9.968830 +epoch: 2, batch: 26284, sum loss: 3854.089600, avg loss: 2.399807, ppl: 11.021047 +epoch: 2, batch: 26285, sum loss: 4368.639160, avg loss: 2.459819, ppl: 11.702697 +epoch: 2, batch: 26286, sum loss: 4198.029785, avg loss: 2.592977, ppl: 13.369514 +epoch: 2, batch: 26287, sum loss: 4140.773926, avg loss: 2.470629, ppl: 11.829885 +epoch: 2, batch: 26288, sum loss: 3709.249023, avg loss: 2.293908, ppl: 9.913603 +epoch: 2, batch: 26289, sum loss: 2629.627441, avg loss: 2.064072, ppl: 7.877983 +epoch: 2, batch: 26290, sum loss: 3896.623047, avg loss: 2.278727, ppl: 9.764241 +epoch: 2, batch: 26291, sum loss: 4327.290527, avg loss: 2.372418, ppl: 10.723291 +epoch: 2, batch: 26292, sum loss: 4227.622070, avg loss: 2.449375, ppl: 11.581111 +epoch: 2, batch: 26293, sum loss: 4501.621094, avg loss: 2.459902, ppl: 11.703668 +epoch: 2, batch: 26294, sum loss: 3865.000000, avg loss: 2.140089, ppl: 8.500191 +epoch: 2, batch: 26295, sum loss: 3953.808350, avg loss: 2.430122, ppl: 11.360267 +epoch: 2, batch: 26296, sum loss: 3948.169922, avg loss: 2.517966, ppl: 12.403337 +epoch: 2, batch: 26297, sum loss: 3634.916992, avg loss: 2.307884, ppl: 10.053127 +epoch: 2, batch: 26298, sum loss: 4202.188965, avg loss: 2.644549, ppl: 14.077100 +epoch: 2, batch: 26299, sum loss: 5165.081543, avg loss: 2.738644, ppl: 15.465994 +epoch: 2, batch: 26300, sum loss: 3939.513672, avg loss: 2.536712, ppl: 12.638051 +epoch: 2, batch: 26301, sum loss: 4064.538330, avg loss: 2.341324, ppl: 10.394989 +epoch: 2, batch: 26302, sum loss: 4716.119141, avg loss: 2.478255, ppl: 11.920445 +epoch: 2, batch: 26303, sum loss: 4532.020020, avg loss: 2.448417, ppl: 11.570019 +epoch: 2, batch: 26304, sum loss: 3475.527344, avg loss: 2.174923, ppl: 8.801509 +epoch: 2, batch: 26305, sum loss: 3912.576416, avg loss: 2.484175, ppl: 11.991229 +epoch: 2, batch: 26306, sum loss: 4313.420410, avg loss: 2.430096, ppl: 11.359972 +epoch: 2, batch: 26307, sum loss: 5416.416016, avg loss: 2.783359, ppl: 16.173252 +epoch: 2, batch: 26308, sum loss: 4153.446289, avg loss: 2.524891, ppl: 12.489535 +epoch: 2, batch: 26309, sum loss: 4675.416016, avg loss: 2.682396, ppl: 14.620080 +epoch: 2, batch: 26310, sum loss: 3759.324463, avg loss: 2.276999, ppl: 9.747380 +epoch: 2, batch: 26311, sum loss: 4423.814453, avg loss: 2.427999, ppl: 11.336179 +epoch: 2, batch: 26312, sum loss: 4662.650391, avg loss: 2.626845, ppl: 13.830069 +epoch: 2, batch: 26313, sum loss: 3960.564453, avg loss: 2.363105, ppl: 10.623891 +epoch: 2, batch: 26314, sum loss: 2979.200684, avg loss: 2.030812, ppl: 7.620268 +epoch: 2, batch: 26315, sum loss: 4408.784668, avg loss: 2.619599, ppl: 13.730211 +epoch: 2, batch: 26316, sum loss: 4418.658691, avg loss: 2.552662, ppl: 12.841247 +epoch: 2, batch: 26317, sum loss: 3760.497314, avg loss: 2.407489, ppl: 11.106037 +epoch: 2, batch: 26318, sum loss: 3553.967529, avg loss: 2.131954, ppl: 8.431325 +epoch: 2, batch: 26319, sum loss: 4323.735352, avg loss: 2.465071, ppl: 11.764322 +epoch: 2, batch: 26320, sum loss: 5161.261230, avg loss: 2.670078, ppl: 14.441099 +epoch: 2, batch: 26321, sum loss: 4755.003906, avg loss: 2.582838, ppl: 13.234639 +epoch: 2, batch: 26322, sum loss: 4208.692383, avg loss: 2.399482, ppl: 11.017473 +epoch: 2, batch: 26323, sum loss: 4386.784180, avg loss: 2.472821, ppl: 11.855843 +epoch: 2, batch: 26324, sum loss: 4030.285156, avg loss: 2.458990, ppl: 11.693000 +epoch: 2, batch: 26325, sum loss: 3814.771484, avg loss: 2.285663, ppl: 9.832204 +epoch: 2, batch: 26326, sum loss: 4411.086426, avg loss: 2.560120, ppl: 12.937368 +epoch: 2, batch: 26327, sum loss: 3702.669189, avg loss: 2.378079, ppl: 10.784168 +epoch: 2, batch: 26328, sum loss: 4416.996582, avg loss: 2.475895, ppl: 11.892348 +epoch: 2, batch: 26329, sum loss: 4281.475586, avg loss: 2.271340, ppl: 9.692379 +epoch: 2, batch: 26330, sum loss: 4548.684570, avg loss: 2.489702, ppl: 12.057676 +epoch: 2, batch: 26331, sum loss: 4396.832031, avg loss: 2.502465, ppl: 12.212566 +epoch: 2, batch: 26332, sum loss: 4871.255859, avg loss: 2.479011, ppl: 11.929456 +epoch: 2, batch: 26333, sum loss: 4441.770508, avg loss: 2.407464, ppl: 11.105756 +epoch: 2, batch: 26334, sum loss: 3806.862549, avg loss: 2.329781, ppl: 10.275694 +epoch: 2, batch: 26335, sum loss: 4729.965332, avg loss: 2.729351, ppl: 15.322936 +epoch: 2, batch: 26336, sum loss: 3362.120605, avg loss: 2.244406, ppl: 9.434812 +epoch: 2, batch: 26337, sum loss: 4144.710938, avg loss: 2.391639, ppl: 10.931398 +epoch: 2, batch: 26338, sum loss: 3824.674072, avg loss: 2.393413, ppl: 10.950806 +epoch: 2, batch: 26339, sum loss: 3825.569580, avg loss: 2.485750, ppl: 12.010127 +epoch: 2, batch: 26340, sum loss: 4020.804688, avg loss: 2.436851, ppl: 11.436972 +epoch: 2, batch: 26341, sum loss: 4203.735352, avg loss: 2.483010, ppl: 11.977260 +epoch: 2, batch: 26342, sum loss: 3774.281982, avg loss: 2.351578, ppl: 10.502124 +epoch: 2, batch: 26343, sum loss: 3627.898682, avg loss: 2.124062, ppl: 8.365050 +epoch: 2, batch: 26344, sum loss: 4027.328369, avg loss: 2.496794, ppl: 12.143497 +epoch: 2, batch: 26345, sum loss: 4892.574219, avg loss: 2.648931, ppl: 14.138909 +epoch: 2, batch: 26346, sum loss: 4250.612305, avg loss: 2.518135, ppl: 12.405443 +epoch: 2, batch: 26347, sum loss: 3073.432617, avg loss: 2.211102, ppl: 9.125772 +epoch: 2, batch: 26348, sum loss: 3139.020996, avg loss: 2.131039, ppl: 8.423617 +epoch: 2, batch: 26349, sum loss: 2912.138428, avg loss: 2.133435, ppl: 8.443820 +epoch: 2, batch: 26350, sum loss: 4369.240723, avg loss: 2.535833, ppl: 12.626949 +epoch: 2, batch: 26351, sum loss: 4863.212891, avg loss: 2.458651, ppl: 11.689036 +epoch: 2, batch: 26352, sum loss: 4498.606934, avg loss: 2.663474, ppl: 14.346035 +epoch: 2, batch: 26353, sum loss: 3976.061523, avg loss: 2.365295, ppl: 10.647183 +epoch: 2, batch: 26354, sum loss: 4163.791504, avg loss: 2.534261, ppl: 12.607114 +epoch: 2, batch: 26355, sum loss: 4114.023438, avg loss: 2.290659, ppl: 9.881447 +epoch: 2, batch: 26356, sum loss: 4357.846680, avg loss: 2.696687, ppl: 14.830520 +epoch: 2, batch: 26357, sum loss: 3845.517578, avg loss: 2.687294, ppl: 14.691862 +epoch: 2, batch: 26358, sum loss: 4588.759766, avg loss: 2.540842, ppl: 12.690347 +epoch: 2, batch: 26359, sum loss: 4676.757812, avg loss: 2.723796, ppl: 15.238058 +epoch: 2, batch: 26360, sum loss: 3434.930176, avg loss: 2.313084, ppl: 10.105543 +epoch: 2, batch: 26361, sum loss: 3481.719971, avg loss: 2.121706, ppl: 8.345365 +epoch: 2, batch: 26362, sum loss: 4342.280762, avg loss: 2.425855, ppl: 11.311899 +epoch: 2, batch: 26363, sum loss: 3534.198730, avg loss: 2.417373, ppl: 11.216352 +epoch: 2, batch: 26364, sum loss: 4241.539551, avg loss: 2.502383, ppl: 12.211562 +epoch: 2, batch: 26365, sum loss: 4065.816406, avg loss: 2.476137, ppl: 11.895221 +epoch: 2, batch: 26366, sum loss: 4116.619141, avg loss: 2.400361, ppl: 11.027155 +epoch: 2, batch: 26367, sum loss: 4631.319336, avg loss: 2.565828, ppl: 13.011425 +epoch: 2, batch: 26368, sum loss: 4506.966309, avg loss: 2.398598, ppl: 11.007737 +epoch: 2, batch: 26369, sum loss: 4108.237305, avg loss: 2.454144, ppl: 11.636471 +epoch: 2, batch: 26370, sum loss: 3849.135010, avg loss: 2.405709, ppl: 11.086290 +epoch: 2, batch: 26371, sum loss: 3378.618652, avg loss: 2.296818, ppl: 9.942491 +epoch: 2, batch: 26372, sum loss: 4393.246582, avg loss: 2.415199, ppl: 11.191995 +epoch: 2, batch: 26373, sum loss: 3442.102051, avg loss: 2.275018, ppl: 9.728094 +epoch: 2, batch: 26374, sum loss: 4003.390381, avg loss: 2.245311, ppl: 9.443357 +epoch: 2, batch: 26375, sum loss: 3901.229980, avg loss: 2.202840, ppl: 9.050682 +epoch: 2, batch: 26376, sum loss: 3646.863770, avg loss: 2.411947, ppl: 11.155661 +epoch: 2, batch: 26377, sum loss: 4348.875000, avg loss: 2.421423, ppl: 11.261870 +epoch: 2, batch: 26378, sum loss: 3526.459717, avg loss: 2.467782, ppl: 11.796248 +epoch: 2, batch: 26379, sum loss: 4637.074707, avg loss: 2.637699, ppl: 13.980995 +epoch: 2, batch: 26380, sum loss: 4845.311523, avg loss: 2.600811, ppl: 13.474665 +epoch: 2, batch: 26381, sum loss: 4416.076172, avg loss: 2.633319, ppl: 13.919895 +epoch: 2, batch: 26382, sum loss: 5065.435547, avg loss: 2.520117, ppl: 12.430055 +epoch: 2, batch: 26383, sum loss: 4658.797363, avg loss: 2.612898, ppl: 13.638523 +epoch: 2, batch: 26384, sum loss: 4084.450195, avg loss: 2.387172, ppl: 10.882668 +epoch: 2, batch: 26385, sum loss: 3422.468262, avg loss: 2.242771, ppl: 9.419395 +epoch: 2, batch: 26386, sum loss: 4047.093750, avg loss: 2.272372, ppl: 9.702383 +epoch: 2, batch: 26387, sum loss: 4190.124023, avg loss: 2.302266, ppl: 9.996811 +epoch: 2, batch: 26388, sum loss: 3771.793945, avg loss: 2.344185, ppl: 10.424774 +epoch: 2, batch: 26389, sum loss: 4154.781738, avg loss: 2.454094, ppl: 11.635891 +epoch: 2, batch: 26390, sum loss: 4914.901367, avg loss: 2.816562, ppl: 16.719278 +epoch: 2, batch: 26391, sum loss: 3935.437500, avg loss: 2.298737, ppl: 9.961591 +epoch: 2, batch: 26392, sum loss: 3736.273926, avg loss: 2.271291, ppl: 9.691905 +epoch: 2, batch: 26393, sum loss: 4680.391113, avg loss: 2.468561, ppl: 11.805446 +epoch: 2, batch: 26394, sum loss: 3585.167969, avg loss: 2.472530, ppl: 11.852391 +epoch: 2, batch: 26395, sum loss: 4698.860352, avg loss: 2.813689, ppl: 16.671305 +epoch: 2, batch: 26396, sum loss: 3339.610352, avg loss: 2.378640, ppl: 10.790217 +epoch: 2, batch: 26397, sum loss: 4177.341797, avg loss: 2.413253, ppl: 11.170242 +epoch: 2, batch: 26398, sum loss: 3505.513428, avg loss: 2.344825, ppl: 10.431447 +epoch: 2, batch: 26399, sum loss: 3703.452393, avg loss: 2.335090, ppl: 10.330386 +epoch: 2, batch: 26400, sum loss: 4081.148438, avg loss: 2.356321, ppl: 10.552062 +epoch: 2, batch: 26401, sum loss: 3658.458008, avg loss: 2.228050, ppl: 9.281749 +epoch: 2, batch: 26402, sum loss: 4099.370605, avg loss: 2.695181, ppl: 14.808205 +epoch: 2, batch: 26403, sum loss: 4439.160156, avg loss: 2.533767, ppl: 12.600890 +epoch: 2, batch: 26404, sum loss: 4146.721680, avg loss: 2.394181, ppl: 10.959219 +epoch: 2, batch: 26405, sum loss: 4001.138428, avg loss: 2.492921, ppl: 12.096560 +epoch: 2, batch: 26406, sum loss: 2790.189453, avg loss: 2.186669, ppl: 8.905499 +epoch: 2, batch: 26407, sum loss: 5724.095703, avg loss: 2.812823, ppl: 16.656879 +epoch: 2, batch: 26408, sum loss: 4555.473633, avg loss: 2.481195, ppl: 11.955540 +epoch: 2, batch: 26409, sum loss: 4969.365234, avg loss: 2.558891, ppl: 12.921474 +epoch: 2, batch: 26410, sum loss: 3636.531250, avg loss: 2.427591, ppl: 11.331550 +epoch: 2, batch: 26411, sum loss: 4613.795410, avg loss: 2.439871, ppl: 11.471556 +epoch: 2, batch: 26412, sum loss: 4244.915039, avg loss: 2.446637, ppl: 11.549439 +epoch: 2, batch: 26413, sum loss: 4371.136719, avg loss: 2.379498, ppl: 10.799475 +epoch: 2, batch: 26414, sum loss: 4067.983643, avg loss: 2.339266, ppl: 10.373620 +epoch: 2, batch: 26415, sum loss: 4029.232666, avg loss: 2.352150, ppl: 10.508135 +epoch: 2, batch: 26416, sum loss: 4555.318359, avg loss: 2.570721, ppl: 13.075253 +epoch: 2, batch: 26417, sum loss: 3046.931641, avg loss: 2.313540, ppl: 10.110147 +epoch: 2, batch: 26418, sum loss: 4780.720215, avg loss: 2.707089, ppl: 14.985596 +epoch: 2, batch: 26419, sum loss: 4225.289062, avg loss: 2.450864, ppl: 11.598362 +epoch: 2, batch: 26420, sum loss: 3588.637695, avg loss: 2.152752, ppl: 8.608516 +epoch: 2, batch: 26421, sum loss: 3530.919434, avg loss: 2.263410, ppl: 9.615822 +epoch: 2, batch: 26422, sum loss: 5264.128906, avg loss: 2.830177, ppl: 16.948458 +epoch: 2, batch: 26423, sum loss: 4047.526367, avg loss: 2.540820, ppl: 12.690074 +epoch: 2, batch: 26424, sum loss: 3019.131836, avg loss: 2.353181, ppl: 10.518981 +epoch: 2, batch: 26425, sum loss: 4924.458008, avg loss: 2.735810, ppl: 15.422231 +epoch: 2, batch: 26426, sum loss: 4666.585938, avg loss: 2.393121, ppl: 10.947608 +epoch: 2, batch: 26427, sum loss: 3966.956787, avg loss: 2.376847, ppl: 10.770884 +epoch: 2, batch: 26428, sum loss: 3685.688232, avg loss: 2.397975, ppl: 11.000882 +epoch: 2, batch: 26429, sum loss: 4735.237793, avg loss: 2.555444, ppl: 12.877016 +epoch: 2, batch: 26430, sum loss: 4137.759766, avg loss: 2.386251, ppl: 10.872658 +epoch: 2, batch: 26431, sum loss: 5013.703613, avg loss: 2.588386, ppl: 13.308272 +epoch: 2, batch: 26432, sum loss: 4279.681641, avg loss: 2.532356, ppl: 12.583117 +epoch: 2, batch: 26433, sum loss: 3871.827637, avg loss: 2.564124, ppl: 12.989280 +epoch: 2, batch: 26434, sum loss: 3710.218262, avg loss: 2.392146, ppl: 10.936938 +epoch: 2, batch: 26435, sum loss: 3910.434814, avg loss: 2.436408, ppl: 11.431901 +epoch: 2, batch: 26436, sum loss: 3657.677979, avg loss: 2.438452, ppl: 11.455295 +epoch: 2, batch: 26437, sum loss: 4648.573242, avg loss: 2.591178, ppl: 13.345483 +epoch: 2, batch: 26438, sum loss: 4574.130371, avg loss: 2.598938, ppl: 13.449441 +epoch: 2, batch: 26439, sum loss: 3425.497314, avg loss: 2.218586, ppl: 9.194325 +epoch: 2, batch: 26440, sum loss: 4818.649414, avg loss: 2.578197, ppl: 13.173359 +epoch: 2, batch: 26441, sum loss: 4269.446289, avg loss: 2.431348, ppl: 11.374200 +epoch: 2, batch: 26442, sum loss: 4375.261230, avg loss: 2.490188, ppl: 12.063548 +epoch: 2, batch: 26443, sum loss: 4807.277344, avg loss: 2.528815, ppl: 12.538639 +epoch: 2, batch: 26444, sum loss: 4573.202148, avg loss: 2.566331, ppl: 13.017979 +epoch: 2, batch: 26445, sum loss: 4599.222168, avg loss: 2.550872, ppl: 12.818277 +epoch: 2, batch: 26446, sum loss: 4342.954590, avg loss: 2.457813, ppl: 11.679235 +epoch: 2, batch: 26447, sum loss: 3533.154297, avg loss: 2.093101, ppl: 8.110025 +epoch: 2, batch: 26448, sum loss: 3918.868164, avg loss: 2.327119, ppl: 10.248372 +epoch: 2, batch: 26449, sum loss: 3886.032227, avg loss: 2.553241, ppl: 12.848676 +epoch: 2, batch: 26450, sum loss: 4141.500000, avg loss: 2.484403, ppl: 11.993959 +epoch: 2, batch: 26451, sum loss: 4108.912598, avg loss: 2.381979, ppl: 10.826302 +epoch: 2, batch: 26452, sum loss: 4402.254883, avg loss: 2.340380, ppl: 10.385181 +epoch: 2, batch: 26453, sum loss: 4073.599365, avg loss: 2.439281, ppl: 11.464794 +epoch: 2, batch: 26454, sum loss: 3920.878418, avg loss: 2.612178, ppl: 13.628696 +epoch: 2, batch: 26455, sum loss: 4400.030273, avg loss: 2.456745, ppl: 11.666773 +epoch: 2, batch: 26456, sum loss: 3992.958008, avg loss: 2.445167, ppl: 11.532476 +epoch: 2, batch: 26457, sum loss: 4004.642578, avg loss: 2.326928, ppl: 10.246412 +epoch: 2, batch: 26458, sum loss: 4199.596680, avg loss: 2.568561, ppl: 13.047031 +epoch: 2, batch: 26459, sum loss: 5814.684082, avg loss: 2.957622, ppl: 19.252127 +epoch: 2, batch: 26460, sum loss: 3975.177490, avg loss: 2.373240, ppl: 10.732113 +epoch: 2, batch: 26461, sum loss: 4166.205078, avg loss: 2.672357, ppl: 14.474049 +epoch: 2, batch: 26462, sum loss: 3773.860840, avg loss: 2.426920, ppl: 11.323953 +epoch: 2, batch: 26463, sum loss: 6008.505859, avg loss: 2.873508, ppl: 17.699001 +epoch: 2, batch: 26464, sum loss: 4434.962402, avg loss: 2.326843, ppl: 10.245543 +epoch: 2, batch: 26465, sum loss: 3212.402344, avg loss: 2.397315, ppl: 10.993622 +epoch: 2, batch: 26466, sum loss: 4640.358398, avg loss: 2.512376, ppl: 12.334202 +epoch: 2, batch: 26467, sum loss: 4166.055664, avg loss: 2.341796, ppl: 10.399899 +epoch: 2, batch: 26468, sum loss: 3836.985352, avg loss: 2.534337, ppl: 12.608064 +epoch: 2, batch: 26469, sum loss: 4060.989746, avg loss: 2.479237, ppl: 11.932152 +epoch: 2, batch: 26470, sum loss: 4016.410156, avg loss: 2.456520, ppl: 11.664147 +epoch: 2, batch: 26471, sum loss: 4740.908691, avg loss: 2.407775, ppl: 11.109215 +epoch: 2, batch: 26472, sum loss: 4542.105957, avg loss: 2.579277, ppl: 13.187594 +epoch: 2, batch: 26473, sum loss: 4618.288086, avg loss: 2.724654, ppl: 15.251132 +epoch: 2, batch: 26474, sum loss: 4521.417480, avg loss: 2.574839, ppl: 13.129205 +epoch: 2, batch: 26475, sum loss: 3604.989258, avg loss: 2.498260, ppl: 12.161315 +epoch: 2, batch: 26476, sum loss: 4947.025391, avg loss: 2.602328, ppl: 13.495115 +epoch: 2, batch: 26477, sum loss: 4257.349609, avg loss: 2.516164, ppl: 12.381013 +epoch: 2, batch: 26478, sum loss: 3521.724854, avg loss: 2.216315, ppl: 9.173467 +epoch: 2, batch: 26479, sum loss: 3914.582031, avg loss: 2.330108, ppl: 10.279056 +epoch: 2, batch: 26480, sum loss: 4313.959473, avg loss: 2.558695, ppl: 12.918945 +epoch: 2, batch: 26481, sum loss: 3535.258301, avg loss: 2.270558, ppl: 9.684800 +epoch: 2, batch: 26482, sum loss: 3960.554688, avg loss: 2.464564, ppl: 11.758358 +epoch: 2, batch: 26483, sum loss: 4780.940430, avg loss: 2.724183, ppl: 15.243949 +epoch: 2, batch: 26484, sum loss: 4251.209961, avg loss: 2.427876, ppl: 11.334776 +epoch: 2, batch: 26485, sum loss: 3685.610840, avg loss: 2.374749, ppl: 10.748320 +epoch: 2, batch: 26486, sum loss: 4458.289551, avg loss: 2.668037, ppl: 14.411651 +epoch: 2, batch: 26487, sum loss: 4383.564941, avg loss: 2.553037, ppl: 12.846060 +epoch: 2, batch: 26488, sum loss: 3593.399170, avg loss: 2.427973, ppl: 11.335876 +epoch: 2, batch: 26489, sum loss: 4310.578613, avg loss: 2.440871, ppl: 11.483041 +epoch: 2, batch: 26490, sum loss: 4599.413086, avg loss: 2.604424, ppl: 13.523434 +epoch: 2, batch: 26491, sum loss: 3118.144531, avg loss: 2.291069, ppl: 9.885498 +epoch: 2, batch: 26492, sum loss: 4405.989258, avg loss: 2.497726, ppl: 12.154828 +epoch: 2, batch: 26493, sum loss: 4143.620117, avg loss: 2.365080, ppl: 10.644892 +epoch: 2, batch: 26494, sum loss: 3527.225830, avg loss: 2.152060, ppl: 8.602557 +epoch: 2, batch: 26495, sum loss: 4707.596680, avg loss: 2.598011, ppl: 13.436992 +epoch: 2, batch: 26496, sum loss: 4723.145508, avg loss: 2.544798, ppl: 12.740656 +epoch: 2, batch: 26497, sum loss: 4516.956055, avg loss: 2.529091, ppl: 12.542099 +epoch: 2, batch: 26498, sum loss: 3814.051270, avg loss: 2.388260, ppl: 10.894523 +epoch: 2, batch: 26499, sum loss: 3414.632568, avg loss: 2.310306, ppl: 10.077511 +epoch: 2, batch: 26500, sum loss: 4379.675293, avg loss: 2.672163, ppl: 14.471237 +epoch: 2, batch: 26501, sum loss: 3571.019775, avg loss: 2.411222, ppl: 11.147575 +epoch: 2, batch: 26502, sum loss: 5007.371582, avg loss: 2.743765, ppl: 15.545409 +epoch: 2, batch: 26503, sum loss: 4542.002930, avg loss: 2.566103, ppl: 13.015012 +epoch: 2, batch: 26504, sum loss: 4260.571777, avg loss: 2.434613, ppl: 11.411396 +epoch: 2, batch: 26505, sum loss: 4307.877441, avg loss: 2.497320, ppl: 12.149894 +epoch: 2, batch: 26506, sum loss: 3641.500488, avg loss: 2.346328, ppl: 10.447135 +epoch: 2, batch: 26507, sum loss: 4145.666016, avg loss: 2.248192, ppl: 9.470598 +epoch: 2, batch: 26508, sum loss: 4173.658203, avg loss: 2.628248, ppl: 13.849490 +epoch: 2, batch: 26509, sum loss: 4395.446777, avg loss: 2.594715, ppl: 13.392765 +epoch: 2, batch: 26510, sum loss: 4246.113770, avg loss: 2.335596, ppl: 10.335619 +epoch: 2, batch: 26511, sum loss: 4124.456543, avg loss: 2.505745, ppl: 12.252686 +epoch: 2, batch: 26512, sum loss: 4250.687500, avg loss: 2.445735, ppl: 11.539027 +epoch: 2, batch: 26513, sum loss: 3867.038574, avg loss: 2.395935, ppl: 10.978456 +epoch: 2, batch: 26514, sum loss: 3480.113037, avg loss: 2.362602, ppl: 10.618547 +epoch: 2, batch: 26515, sum loss: 4431.073730, avg loss: 2.516226, ppl: 12.381777 +epoch: 2, batch: 26516, sum loss: 4204.745605, avg loss: 2.393139, ppl: 10.947806 +epoch: 2, batch: 26517, sum loss: 4907.698730, avg loss: 2.876728, ppl: 17.756084 +epoch: 2, batch: 26518, sum loss: 4180.070312, avg loss: 2.492588, ppl: 12.092531 +epoch: 2, batch: 26519, sum loss: 3398.533203, avg loss: 2.417164, ppl: 11.214015 +epoch: 2, batch: 26520, sum loss: 3765.052002, avg loss: 2.378428, ppl: 10.787933 +epoch: 2, batch: 26521, sum loss: 4407.952637, avg loss: 2.514519, ppl: 12.360667 +epoch: 2, batch: 26522, sum loss: 4033.477051, avg loss: 2.412367, ppl: 11.160345 +epoch: 2, batch: 26523, sum loss: 5013.208496, avg loss: 2.780482, ppl: 16.126789 +epoch: 2, batch: 26524, sum loss: 3883.430908, avg loss: 2.521708, ppl: 12.449846 +epoch: 2, batch: 26525, sum loss: 3948.585693, avg loss: 2.210854, ppl: 9.123507 +epoch: 2, batch: 26526, sum loss: 3921.531982, avg loss: 2.481982, ppl: 11.964958 +epoch: 2, batch: 26527, sum loss: 3842.871094, avg loss: 2.249924, ppl: 9.487019 +epoch: 2, batch: 26528, sum loss: 4969.073242, avg loss: 2.655838, ppl: 14.236916 +epoch: 2, batch: 26529, sum loss: 3884.474121, avg loss: 2.381652, ppl: 10.822766 +epoch: 2, batch: 26530, sum loss: 4567.082031, avg loss: 2.538678, ppl: 12.662922 +epoch: 2, batch: 26531, sum loss: 4467.789062, avg loss: 2.626566, ppl: 13.826212 +epoch: 2, batch: 26532, sum loss: 3989.746094, avg loss: 2.396244, ppl: 10.981852 +epoch: 2, batch: 26533, sum loss: 2936.369873, avg loss: 2.253546, ppl: 9.521435 +epoch: 2, batch: 26534, sum loss: 5098.361816, avg loss: 2.672097, ppl: 14.470284 +epoch: 2, batch: 26535, sum loss: 3251.298340, avg loss: 2.337382, ppl: 10.354095 +epoch: 2, batch: 26536, sum loss: 3957.639648, avg loss: 2.489081, ppl: 12.050201 +epoch: 2, batch: 26537, sum loss: 3920.146484, avg loss: 2.289805, ppl: 9.873013 +epoch: 2, batch: 26538, sum loss: 4429.665039, avg loss: 2.532684, ppl: 12.587249 +epoch: 2, batch: 26539, sum loss: 3880.964111, avg loss: 2.473527, ppl: 11.864221 +epoch: 2, batch: 26540, sum loss: 3686.343994, avg loss: 2.350985, ppl: 10.495898 +epoch: 2, batch: 26541, sum loss: 3967.889404, avg loss: 2.648791, ppl: 14.136941 +epoch: 2, batch: 26542, sum loss: 4345.783691, avg loss: 2.606949, ppl: 13.557621 +epoch: 2, batch: 26543, sum loss: 3531.644531, avg loss: 2.502937, ppl: 12.218330 +epoch: 2, batch: 26544, sum loss: 4235.144531, avg loss: 2.406332, ppl: 11.093197 +epoch: 2, batch: 26545, sum loss: 4180.499023, avg loss: 2.544430, ppl: 12.735970 +epoch: 2, batch: 26546, sum loss: 4167.881836, avg loss: 2.626265, ppl: 13.822042 +epoch: 2, batch: 26547, sum loss: 4606.345215, avg loss: 2.722426, ppl: 15.217197 +epoch: 2, batch: 26548, sum loss: 5221.663574, avg loss: 2.683280, ppl: 14.633018 +epoch: 2, batch: 26549, sum loss: 3436.658691, avg loss: 2.257989, ppl: 9.563834 +epoch: 2, batch: 26550, sum loss: 4769.491211, avg loss: 2.417380, ppl: 11.216437 +epoch: 2, batch: 26551, sum loss: 3900.170654, avg loss: 2.443716, ppl: 11.515755 +epoch: 2, batch: 26552, sum loss: 3698.425781, avg loss: 2.351192, ppl: 10.498081 +epoch: 2, batch: 26553, sum loss: 4089.686768, avg loss: 2.568899, ppl: 13.051443 +epoch: 2, batch: 26554, sum loss: 3942.034912, avg loss: 2.379019, ppl: 10.794312 +epoch: 2, batch: 26555, sum loss: 4632.315430, avg loss: 2.793918, ppl: 16.344929 +epoch: 2, batch: 26556, sum loss: 4072.825684, avg loss: 2.319377, ppl: 10.169336 +epoch: 2, batch: 26557, sum loss: 3735.020508, avg loss: 2.318449, ppl: 10.159900 +epoch: 2, batch: 26558, sum loss: 3755.882080, avg loss: 2.503921, ppl: 12.230358 +epoch: 2, batch: 26559, sum loss: 4352.810547, avg loss: 2.391654, ppl: 10.931562 +epoch: 2, batch: 26560, sum loss: 3898.905518, avg loss: 2.430739, ppl: 11.367281 +epoch: 2, batch: 26561, sum loss: 3152.342285, avg loss: 2.370182, ppl: 10.699343 +epoch: 2, batch: 26562, sum loss: 4200.163086, avg loss: 2.301459, ppl: 9.988749 +epoch: 2, batch: 26563, sum loss: 3657.559326, avg loss: 2.291704, ppl: 9.891778 +epoch: 2, batch: 26564, sum loss: 3862.708496, avg loss: 2.421761, ppl: 11.265678 +epoch: 2, batch: 26565, sum loss: 5544.726562, avg loss: 2.614204, ppl: 13.656341 +epoch: 2, batch: 26566, sum loss: 4217.719727, avg loss: 2.382893, ppl: 10.836203 +epoch: 2, batch: 26567, sum loss: 3966.276855, avg loss: 2.167365, ppl: 8.735232 +epoch: 2, batch: 26568, sum loss: 5052.566895, avg loss: 2.690398, ppl: 14.737536 +epoch: 2, batch: 26569, sum loss: 4611.961914, avg loss: 2.627898, ppl: 13.844645 +epoch: 2, batch: 26570, sum loss: 4547.171875, avg loss: 2.742564, ppl: 15.526752 +epoch: 2, batch: 26571, sum loss: 4603.765625, avg loss: 2.712885, ppl: 15.072696 +epoch: 2, batch: 26572, sum loss: 4186.300781, avg loss: 2.546412, ppl: 12.761231 +epoch: 2, batch: 26573, sum loss: 3887.219727, avg loss: 2.323503, ppl: 10.211377 +epoch: 2, batch: 26574, sum loss: 3156.708740, avg loss: 2.144503, ppl: 8.537800 +epoch: 2, batch: 26575, sum loss: 4090.093750, avg loss: 2.364216, ppl: 10.635698 +epoch: 2, batch: 26576, sum loss: 3923.211670, avg loss: 2.271692, ppl: 9.695790 +epoch: 2, batch: 26577, sum loss: 4091.442627, avg loss: 2.525582, ppl: 12.498168 +epoch: 2, batch: 26578, sum loss: 3592.711182, avg loss: 2.323875, ppl: 10.215183 +epoch: 2, batch: 26579, sum loss: 4091.079590, avg loss: 2.371640, ppl: 10.714955 +epoch: 2, batch: 26580, sum loss: 4571.473633, avg loss: 2.796008, ppl: 16.379137 +epoch: 2, batch: 26581, sum loss: 4523.676270, avg loss: 2.628516, ppl: 13.853196 +epoch: 2, batch: 26582, sum loss: 4115.379395, avg loss: 2.309416, ppl: 10.068543 +epoch: 2, batch: 26583, sum loss: 3908.364746, avg loss: 2.344550, ppl: 10.428577 +epoch: 2, batch: 26584, sum loss: 5778.907715, avg loss: 2.971161, ppl: 19.514561 +epoch: 2, batch: 26585, sum loss: 4136.207031, avg loss: 2.591608, ppl: 13.351227 +epoch: 2, batch: 26586, sum loss: 4545.145508, avg loss: 2.458164, ppl: 11.683344 +epoch: 2, batch: 26587, sum loss: 3594.654053, avg loss: 2.357150, ppl: 10.560814 +epoch: 2, batch: 26588, sum loss: 4080.676514, avg loss: 2.563239, ppl: 12.977786 +epoch: 2, batch: 26589, sum loss: 3485.719238, avg loss: 2.385845, ppl: 10.868242 +epoch: 2, batch: 26590, sum loss: 4648.887695, avg loss: 2.507491, ppl: 12.274091 +epoch: 2, batch: 26591, sum loss: 3229.506592, avg loss: 2.388688, ppl: 10.899188 +epoch: 2, batch: 26592, sum loss: 3945.815430, avg loss: 2.321068, ppl: 10.186548 +epoch: 2, batch: 26593, sum loss: 4998.984375, avg loss: 2.610436, ppl: 13.604978 +epoch: 2, batch: 26594, sum loss: 4008.026367, avg loss: 2.354892, ppl: 10.536991 +epoch: 2, batch: 26595, sum loss: 3200.349121, avg loss: 2.039738, ppl: 7.688596 +epoch: 2, batch: 26596, sum loss: 4894.001465, avg loss: 2.769667, ppl: 15.953320 +epoch: 2, batch: 26597, sum loss: 3643.781494, avg loss: 2.346286, ppl: 10.446694 +epoch: 2, batch: 26598, sum loss: 4909.169922, avg loss: 2.745621, ppl: 15.574278 +epoch: 2, batch: 26599, sum loss: 4077.147461, avg loss: 2.480017, ppl: 11.941463 +epoch: 2, batch: 26600, sum loss: 4101.717285, avg loss: 2.478379, ppl: 11.921924 +epoch: 2, batch: 26601, sum loss: 3371.500732, avg loss: 2.170960, ppl: 8.766696 +epoch: 2, batch: 26602, sum loss: 4354.228516, avg loss: 2.381963, ppl: 10.826134 +epoch: 2, batch: 26603, sum loss: 4061.399902, avg loss: 2.404618, ppl: 11.074200 +epoch: 2, batch: 26604, sum loss: 5831.980469, avg loss: 2.868657, ppl: 17.613354 +epoch: 2, batch: 26605, sum loss: 3943.878906, avg loss: 2.710570, ppl: 15.037839 +epoch: 2, batch: 26606, sum loss: 4570.586914, avg loss: 2.544870, ppl: 12.741576 +epoch: 2, batch: 26607, sum loss: 3841.089355, avg loss: 2.471744, ppl: 11.843079 +epoch: 2, batch: 26608, sum loss: 4413.167480, avg loss: 2.633155, ppl: 13.917609 +epoch: 2, batch: 26609, sum loss: 4016.222656, avg loss: 2.471522, ppl: 11.840449 +epoch: 2, batch: 26610, sum loss: 4012.448486, avg loss: 2.449602, ppl: 11.583737 +epoch: 2, batch: 26611, sum loss: 4615.481445, avg loss: 2.513879, ppl: 12.352752 +epoch: 2, batch: 26612, sum loss: 4979.521484, avg loss: 2.477374, ppl: 11.909948 +epoch: 2, batch: 26613, sum loss: 3161.625244, avg loss: 2.024088, ppl: 7.569204 +epoch: 2, batch: 26614, sum loss: 4780.840332, avg loss: 2.663421, ppl: 14.345280 +epoch: 2, batch: 26615, sum loss: 4333.053223, avg loss: 2.770494, ppl: 15.966527 +epoch: 2, batch: 26616, sum loss: 4702.352539, avg loss: 2.661207, ppl: 14.313555 +epoch: 2, batch: 26617, sum loss: 4415.401367, avg loss: 2.479170, ppl: 11.931355 +epoch: 2, batch: 26618, sum loss: 4814.320312, avg loss: 2.532520, ppl: 12.585178 +epoch: 2, batch: 26619, sum loss: 3922.307373, avg loss: 2.275120, ppl: 9.729092 +epoch: 2, batch: 26620, sum loss: 5034.284180, avg loss: 2.689255, ppl: 14.720697 +epoch: 2, batch: 26621, sum loss: 3382.470215, avg loss: 2.081520, ppl: 8.016645 +epoch: 2, batch: 26622, sum loss: 4152.755371, avg loss: 2.634997, ppl: 13.943269 +epoch: 2, batch: 26623, sum loss: 3824.470703, avg loss: 2.285996, ppl: 9.835475 +epoch: 2, batch: 26624, sum loss: 4494.076660, avg loss: 2.691064, ppl: 14.747357 +epoch: 2, batch: 26625, sum loss: 4167.732422, avg loss: 2.469036, ppl: 11.811054 +epoch: 2, batch: 26626, sum loss: 4831.630371, avg loss: 2.690217, ppl: 14.734880 +epoch: 2, batch: 26627, sum loss: 4436.841797, avg loss: 2.693893, ppl: 14.789138 +epoch: 2, batch: 26628, sum loss: 3622.252197, avg loss: 2.288220, ppl: 9.857375 +epoch: 2, batch: 26629, sum loss: 4181.873047, avg loss: 2.431322, ppl: 11.373904 +epoch: 2, batch: 26630, sum loss: 4607.577637, avg loss: 2.620920, ppl: 13.748369 +epoch: 2, batch: 26631, sum loss: 4025.079102, avg loss: 2.449835, ppl: 11.586435 +epoch: 2, batch: 26632, sum loss: 4990.794434, avg loss: 2.630888, ppl: 13.886095 +epoch: 2, batch: 26633, sum loss: 3784.249023, avg loss: 2.455710, ppl: 11.654705 +epoch: 2, batch: 26634, sum loss: 4528.443848, avg loss: 2.489524, ppl: 12.055535 +epoch: 2, batch: 26635, sum loss: 4162.539062, avg loss: 2.457225, ppl: 11.672374 +epoch: 2, batch: 26636, sum loss: 3628.593750, avg loss: 2.380967, ppl: 10.815355 +epoch: 2, batch: 26637, sum loss: 3609.261230, avg loss: 2.153497, ppl: 8.614934 +epoch: 2, batch: 26638, sum loss: 4712.274902, avg loss: 2.579242, ppl: 13.187139 +epoch: 2, batch: 26639, sum loss: 2895.291260, avg loss: 2.301503, ppl: 9.989189 +epoch: 2, batch: 26640, sum loss: 3749.157471, avg loss: 2.300097, ppl: 9.975145 +epoch: 2, batch: 26641, sum loss: 4647.343750, avg loss: 2.559110, ppl: 12.924309 +epoch: 2, batch: 26642, sum loss: 4794.091797, avg loss: 2.464829, ppl: 11.761468 +epoch: 2, batch: 26643, sum loss: 4125.056641, avg loss: 2.661327, ppl: 14.315271 +epoch: 2, batch: 26644, sum loss: 4131.537598, avg loss: 2.449044, ppl: 11.577276 +epoch: 2, batch: 26645, sum loss: 4509.710938, avg loss: 2.557976, ppl: 12.909656 +epoch: 2, batch: 26646, sum loss: 3711.780273, avg loss: 2.318414, ppl: 10.159546 +epoch: 2, batch: 26647, sum loss: 3873.906250, avg loss: 2.596452, ppl: 13.416054 +epoch: 2, batch: 26648, sum loss: 3710.871582, avg loss: 2.264107, ppl: 9.622528 +epoch: 2, batch: 26649, sum loss: 4205.050781, avg loss: 2.503006, ppl: 12.219175 +epoch: 2, batch: 26650, sum loss: 4298.427734, avg loss: 2.524033, ppl: 12.478820 +epoch: 2, batch: 26651, sum loss: 4709.813477, avg loss: 2.513241, ppl: 12.344872 +epoch: 2, batch: 26652, sum loss: 5009.216797, avg loss: 2.608967, ppl: 13.585014 +epoch: 2, batch: 26653, sum loss: 3763.042725, avg loss: 2.377159, ppl: 10.774248 +epoch: 2, batch: 26654, sum loss: 4051.904541, avg loss: 2.413284, ppl: 11.170588 +epoch: 2, batch: 26655, sum loss: 4926.665039, avg loss: 2.582109, ppl: 13.225006 +epoch: 2, batch: 26656, sum loss: 3599.492676, avg loss: 2.360323, ppl: 10.594373 +epoch: 2, batch: 26657, sum loss: 4439.290527, avg loss: 2.427168, ppl: 11.326763 +epoch: 2, batch: 26658, sum loss: 4468.725098, avg loss: 2.611762, ppl: 13.623037 +epoch: 2, batch: 26659, sum loss: 4660.783203, avg loss: 2.551058, ppl: 12.820662 +epoch: 2, batch: 26660, sum loss: 4094.376465, avg loss: 2.481440, ppl: 11.958476 +epoch: 2, batch: 26661, sum loss: 4382.710938, avg loss: 2.463581, ppl: 11.746802 +epoch: 2, batch: 26662, sum loss: 4051.303467, avg loss: 2.391560, ppl: 10.930535 +epoch: 2, batch: 26663, sum loss: 2867.706543, avg loss: 1.997010, ppl: 7.366997 +epoch: 2, batch: 26664, sum loss: 4358.688477, avg loss: 2.540028, ppl: 12.680030 +epoch: 2, batch: 26665, sum loss: 4645.002441, avg loss: 2.510812, ppl: 12.314929 +epoch: 2, batch: 26666, sum loss: 4007.326660, avg loss: 2.443492, ppl: 11.513174 +epoch: 2, batch: 26667, sum loss: 4678.385254, avg loss: 2.596218, ppl: 13.412919 +epoch: 2, batch: 26668, sum loss: 3429.252930, avg loss: 2.313936, ppl: 10.114156 +epoch: 2, batch: 26669, sum loss: 4326.553711, avg loss: 2.468086, ppl: 11.799835 +epoch: 2, batch: 26670, sum loss: 3814.102539, avg loss: 2.459125, ppl: 11.694572 +epoch: 2, batch: 26671, sum loss: 4147.649414, avg loss: 2.379604, ppl: 10.800624 +epoch: 2, batch: 26672, sum loss: 3957.256348, avg loss: 2.302069, ppl: 9.994840 +epoch: 2, batch: 26673, sum loss: 4186.652344, avg loss: 2.634772, ppl: 13.940131 +epoch: 2, batch: 26674, sum loss: 4611.397461, avg loss: 2.465988, ppl: 11.775109 +epoch: 2, batch: 26675, sum loss: 4684.384766, avg loss: 2.643558, ppl: 14.063151 +epoch: 2, batch: 26676, sum loss: 3639.156738, avg loss: 2.550215, ppl: 12.809857 +epoch: 2, batch: 26677, sum loss: 4235.037598, avg loss: 2.463664, ppl: 11.747771 +epoch: 2, batch: 26678, sum loss: 4248.118652, avg loss: 2.557567, ppl: 12.904381 +epoch: 2, batch: 26679, sum loss: 4095.334961, avg loss: 2.383780, ppl: 10.845828 +epoch: 2, batch: 26680, sum loss: 3941.136230, avg loss: 2.483388, ppl: 11.981787 +epoch: 2, batch: 26681, sum loss: 3884.785889, avg loss: 2.267826, ppl: 9.658380 +epoch: 2, batch: 26682, sum loss: 3144.424316, avg loss: 2.378536, ppl: 10.789096 +epoch: 2, batch: 26683, sum loss: 4698.787109, avg loss: 2.692715, ppl: 14.771722 +epoch: 2, batch: 26684, sum loss: 4831.032227, avg loss: 2.712539, ppl: 15.067486 +epoch: 2, batch: 26685, sum loss: 4446.788086, avg loss: 2.562990, ppl: 12.974556 +epoch: 2, batch: 26686, sum loss: 4943.559082, avg loss: 2.755607, ppl: 15.730589 +epoch: 2, batch: 26687, sum loss: 4627.568848, avg loss: 2.681094, ppl: 14.601061 +epoch: 2, batch: 26688, sum loss: 4854.349121, avg loss: 2.710413, ppl: 15.035477 +epoch: 2, batch: 26689, sum loss: 3610.294434, avg loss: 2.364305, ppl: 10.636649 +epoch: 2, batch: 26690, sum loss: 4849.365234, avg loss: 2.855928, ppl: 17.390564 +epoch: 2, batch: 26691, sum loss: 4399.013184, avg loss: 2.443896, ppl: 11.517830 +epoch: 2, batch: 26692, sum loss: 3889.822510, avg loss: 2.491879, ppl: 12.083958 +epoch: 2, batch: 26693, sum loss: 4697.982422, avg loss: 2.595571, ppl: 13.404233 +epoch: 2, batch: 26694, sum loss: 4412.184570, avg loss: 2.632568, ppl: 13.909449 +epoch: 2, batch: 26695, sum loss: 3221.894043, avg loss: 2.178427, ppl: 8.832404 +epoch: 2, batch: 26696, sum loss: 4320.844727, avg loss: 2.424716, ppl: 11.299026 +epoch: 2, batch: 26697, sum loss: 3494.187012, avg loss: 2.315565, ppl: 10.130641 +epoch: 2, batch: 26698, sum loss: 4131.707520, avg loss: 2.419032, ppl: 11.234982 +epoch: 2, batch: 26699, sum loss: 3802.690918, avg loss: 2.439186, ppl: 11.463707 +epoch: 2, batch: 26700, sum loss: 3728.443359, avg loss: 2.517518, ppl: 12.397781 +epoch: 2, batch: 26701, sum loss: 3695.030273, avg loss: 2.332721, ppl: 10.305948 +epoch: 2, batch: 26702, sum loss: 3934.625732, avg loss: 2.350434, ppl: 10.490117 +epoch: 2, batch: 26703, sum loss: 3903.968750, avg loss: 2.350373, ppl: 10.489477 +epoch: 2, batch: 26704, sum loss: 3629.298828, avg loss: 2.292671, ppl: 9.901351 +epoch: 2, batch: 26705, sum loss: 3092.012695, avg loss: 2.125095, ppl: 8.373690 +epoch: 2, batch: 26706, sum loss: 4284.902832, avg loss: 2.389795, ppl: 10.911261 +epoch: 2, batch: 26707, sum loss: 3922.376465, avg loss: 2.454554, ppl: 11.641235 +epoch: 2, batch: 26708, sum loss: 4306.867188, avg loss: 2.546935, ppl: 12.767911 +epoch: 2, batch: 26709, sum loss: 4555.892090, avg loss: 2.578321, ppl: 13.174993 +epoch: 2, batch: 26710, sum loss: 3720.020996, avg loss: 2.455460, ppl: 11.651788 +epoch: 2, batch: 26711, sum loss: 4388.476562, avg loss: 2.689018, ppl: 14.717210 +epoch: 2, batch: 26712, sum loss: 5185.836914, avg loss: 2.636419, ppl: 13.963117 +epoch: 2, batch: 26713, sum loss: 3561.036621, avg loss: 2.236832, ppl: 9.363622 +epoch: 2, batch: 26714, sum loss: 3953.809326, avg loss: 2.300064, ppl: 9.974817 +epoch: 2, batch: 26715, sum loss: 4167.745605, avg loss: 2.458847, ppl: 11.691324 +epoch: 2, batch: 26716, sum loss: 3629.415771, avg loss: 2.233487, ppl: 9.332348 +epoch: 2, batch: 26717, sum loss: 4060.138428, avg loss: 2.672902, ppl: 14.481936 +epoch: 2, batch: 26718, sum loss: 3901.678467, avg loss: 2.392200, ppl: 10.937532 +epoch: 2, batch: 26719, sum loss: 3888.136230, avg loss: 2.465527, ppl: 11.769684 +epoch: 2, batch: 26720, sum loss: 4486.858398, avg loss: 2.376514, ppl: 10.767303 +epoch: 2, batch: 26721, sum loss: 4138.051758, avg loss: 2.480846, ppl: 11.951376 +epoch: 2, batch: 26722, sum loss: 3728.686035, avg loss: 2.343612, ppl: 10.418798 +epoch: 2, batch: 26723, sum loss: 5047.428711, avg loss: 2.612541, ppl: 13.633646 +epoch: 2, batch: 26724, sum loss: 4463.049316, avg loss: 2.548857, ppl: 12.792480 +epoch: 2, batch: 26725, sum loss: 4376.346680, avg loss: 2.510813, ppl: 12.314935 +epoch: 2, batch: 26726, sum loss: 3905.221191, avg loss: 2.473224, ppl: 11.860626 +epoch: 2, batch: 26727, sum loss: 4285.557129, avg loss: 2.349538, ppl: 10.480725 +epoch: 2, batch: 26728, sum loss: 4652.905273, avg loss: 2.512368, ppl: 12.334099 +epoch: 2, batch: 26729, sum loss: 3753.908203, avg loss: 2.317227, ppl: 10.147500 +epoch: 2, batch: 26730, sum loss: 3999.308838, avg loss: 2.474820, ppl: 11.879567 +epoch: 2, batch: 26731, sum loss: 3434.494385, avg loss: 2.146559, ppl: 8.555368 +epoch: 2, batch: 26732, sum loss: 4388.528809, avg loss: 2.626289, ppl: 13.822382 +epoch: 2, batch: 26733, sum loss: 4467.876465, avg loss: 2.491844, ppl: 12.083540 +epoch: 2, batch: 26734, sum loss: 3839.787109, avg loss: 2.416480, ppl: 11.206347 +epoch: 2, batch: 26735, sum loss: 3353.944824, avg loss: 2.464324, ppl: 11.755530 +epoch: 2, batch: 26736, sum loss: 4600.414551, avg loss: 2.476003, ppl: 11.893636 +epoch: 2, batch: 26737, sum loss: 3116.401123, avg loss: 2.205521, ppl: 9.074977 +epoch: 2, batch: 26738, sum loss: 3454.396240, avg loss: 2.307546, ppl: 10.049731 +epoch: 2, batch: 26739, sum loss: 5586.273438, avg loss: 2.824203, ppl: 16.847507 +epoch: 2, batch: 26740, sum loss: 4241.848633, avg loss: 2.511456, ppl: 12.322856 +epoch: 2, batch: 26741, sum loss: 3785.235352, avg loss: 2.327943, ppl: 10.256823 +epoch: 2, batch: 26742, sum loss: 4486.424805, avg loss: 2.397876, ppl: 10.999783 +epoch: 2, batch: 26743, sum loss: 5430.615234, avg loss: 2.700455, ppl: 14.886510 +epoch: 2, batch: 26744, sum loss: 3931.188477, avg loss: 2.414735, ppl: 11.186804 +epoch: 2, batch: 26745, sum loss: 4680.976562, avg loss: 2.424120, ppl: 11.292293 +epoch: 2, batch: 26746, sum loss: 4425.071777, avg loss: 2.784816, ppl: 16.196831 +epoch: 2, batch: 26747, sum loss: 3935.680908, avg loss: 2.308317, ppl: 10.057487 +epoch: 2, batch: 26748, sum loss: 4067.391113, avg loss: 2.585754, ppl: 13.273293 +epoch: 2, batch: 26749, sum loss: 4385.801758, avg loss: 2.513353, ppl: 12.346262 +epoch: 2, batch: 26750, sum loss: 4563.008301, avg loss: 2.742193, ppl: 15.520978 +epoch: 2, batch: 26751, sum loss: 4713.639648, avg loss: 2.699679, ppl: 14.874958 +epoch: 2, batch: 26752, sum loss: 4147.664062, avg loss: 2.310676, ppl: 10.081241 +epoch: 2, batch: 26753, sum loss: 3935.651611, avg loss: 2.462861, ppl: 11.738345 +epoch: 2, batch: 26754, sum loss: 4369.101074, avg loss: 2.897282, ppl: 18.124813 +epoch: 2, batch: 26755, sum loss: 4390.030273, avg loss: 2.667090, ppl: 14.398012 +epoch: 2, batch: 26756, sum loss: 4073.020752, avg loss: 2.480524, ppl: 11.947527 +epoch: 2, batch: 26757, sum loss: 3716.972412, avg loss: 2.330390, ppl: 10.281953 +epoch: 2, batch: 26758, sum loss: 4500.444824, avg loss: 2.608954, ppl: 13.584829 +epoch: 2, batch: 26759, sum loss: 3868.678711, avg loss: 2.329126, ppl: 10.268967 +epoch: 2, batch: 26760, sum loss: 3731.601074, avg loss: 2.499398, ppl: 12.175159 +epoch: 2, batch: 26761, sum loss: 4536.111816, avg loss: 2.517265, ppl: 12.394655 +epoch: 2, batch: 26762, sum loss: 4145.112793, avg loss: 2.386363, ppl: 10.873877 +epoch: 2, batch: 26763, sum loss: 4496.348145, avg loss: 2.438367, ppl: 11.454316 +epoch: 2, batch: 26764, sum loss: 4202.182617, avg loss: 2.451682, ppl: 11.607853 +epoch: 2, batch: 26765, sum loss: 4349.202148, avg loss: 2.499542, ppl: 12.176909 +epoch: 2, batch: 26766, sum loss: 3708.609131, avg loss: 2.604360, ppl: 13.522573 +epoch: 2, batch: 26767, sum loss: 3449.924805, avg loss: 2.123031, ppl: 8.356424 +epoch: 2, batch: 26768, sum loss: 4554.760742, avg loss: 2.512279, ppl: 12.332999 +epoch: 2, batch: 26769, sum loss: 3119.519531, avg loss: 2.397786, ppl: 10.998797 +epoch: 2, batch: 26770, sum loss: 4431.329590, avg loss: 2.588393, ppl: 13.308373 +epoch: 2, batch: 26771, sum loss: 3781.937744, avg loss: 2.432114, ppl: 11.382924 +epoch: 2, batch: 26772, sum loss: 3954.960449, avg loss: 2.576521, ppl: 13.151310 +epoch: 2, batch: 26773, sum loss: 3738.522705, avg loss: 2.346844, ppl: 10.452532 +epoch: 2, batch: 26774, sum loss: 3894.283936, avg loss: 2.376012, ppl: 10.761900 +epoch: 2, batch: 26775, sum loss: 4845.670898, avg loss: 2.884328, ppl: 17.891542 +epoch: 2, batch: 26776, sum loss: 3832.791016, avg loss: 2.357190, ppl: 10.561232 +epoch: 2, batch: 26777, sum loss: 3166.806152, avg loss: 2.288155, ppl: 9.856731 +epoch: 2, batch: 26778, sum loss: 4034.228516, avg loss: 2.596029, ppl: 13.410374 +epoch: 2, batch: 26779, sum loss: 4379.857910, avg loss: 2.393365, ppl: 10.950281 +epoch: 2, batch: 26780, sum loss: 3187.941406, avg loss: 2.273853, ppl: 9.716765 +epoch: 2, batch: 26781, sum loss: 4223.206055, avg loss: 2.634564, ppl: 13.937233 +epoch: 2, batch: 26782, sum loss: 3535.086182, avg loss: 2.318089, ppl: 10.156250 +epoch: 2, batch: 26783, sum loss: 4579.575195, avg loss: 2.685968, ppl: 14.672397 +epoch: 2, batch: 26784, sum loss: 4049.827881, avg loss: 2.574589, ppl: 13.125916 +epoch: 2, batch: 26785, sum loss: 4341.467773, avg loss: 2.493663, ppl: 12.105541 +epoch: 2, batch: 26786, sum loss: 4611.066895, avg loss: 2.424325, ppl: 11.294608 +epoch: 2, batch: 26787, sum loss: 5754.806641, avg loss: 2.830697, ppl: 16.957273 +epoch: 2, batch: 26788, sum loss: 4193.423828, avg loss: 2.546098, ppl: 12.757231 +epoch: 2, batch: 26789, sum loss: 4266.693848, avg loss: 2.565661, ppl: 13.009251 +epoch: 2, batch: 26790, sum loss: 4209.555664, avg loss: 2.563676, ppl: 12.983456 +epoch: 2, batch: 26791, sum loss: 4053.039062, avg loss: 2.289853, ppl: 9.873483 +epoch: 2, batch: 26792, sum loss: 3797.062012, avg loss: 2.374648, ppl: 10.747228 +epoch: 2, batch: 26793, sum loss: 5013.164062, avg loss: 2.724545, ppl: 15.249481 +epoch: 2, batch: 26794, sum loss: 3689.082764, avg loss: 2.398623, ppl: 11.008013 +epoch: 2, batch: 26795, sum loss: 4188.412109, avg loss: 2.764629, ppl: 15.873144 +epoch: 2, batch: 26796, sum loss: 3617.427246, avg loss: 2.105604, ppl: 8.212059 +epoch: 2, batch: 26797, sum loss: 5098.045410, avg loss: 2.815045, ppl: 16.693920 +epoch: 2, batch: 26798, sum loss: 4156.972168, avg loss: 2.624351, ppl: 13.795618 +epoch: 2, batch: 26799, sum loss: 3726.342285, avg loss: 2.228674, ppl: 9.287539 +epoch: 2, batch: 26800, sum loss: 4554.353516, avg loss: 2.628017, ppl: 13.846285 +epoch: 2, batch: 26801, sum loss: 4315.366211, avg loss: 2.609049, ppl: 13.586119 +epoch: 2, batch: 26802, sum loss: 4550.459961, avg loss: 2.628804, ppl: 13.857186 +epoch: 2, batch: 26803, sum loss: 4281.649902, avg loss: 2.459305, ppl: 11.696680 +epoch: 2, batch: 26804, sum loss: 4027.289795, avg loss: 2.424618, ppl: 11.297908 +epoch: 2, batch: 26805, sum loss: 5614.045898, avg loss: 2.684862, ppl: 14.656174 +epoch: 2, batch: 26806, sum loss: 5142.529297, avg loss: 2.484314, ppl: 11.992887 +epoch: 2, batch: 26807, sum loss: 4257.656250, avg loss: 2.275605, ppl: 9.733804 +epoch: 2, batch: 26808, sum loss: 3983.352539, avg loss: 2.601798, ppl: 13.487968 +epoch: 2, batch: 26809, sum loss: 3739.728760, avg loss: 2.255566, ppl: 9.540693 +epoch: 2, batch: 26810, sum loss: 4414.975586, avg loss: 2.363477, ppl: 10.627843 +epoch: 2, batch: 26811, sum loss: 4545.323242, avg loss: 2.518185, ppl: 12.406055 +epoch: 2, batch: 26812, sum loss: 3592.784424, avg loss: 2.112160, ppl: 8.266078 +epoch: 2, batch: 26813, sum loss: 3337.336670, avg loss: 2.236821, ppl: 9.363517 +epoch: 2, batch: 26814, sum loss: 3336.395020, avg loss: 2.170719, ppl: 8.764583 +epoch: 2, batch: 26815, sum loss: 4100.623535, avg loss: 2.645564, ppl: 14.091385 +epoch: 2, batch: 26816, sum loss: 5340.763672, avg loss: 2.575103, ppl: 13.132670 +epoch: 2, batch: 26817, sum loss: 3803.802734, avg loss: 2.443033, ppl: 11.507894 +epoch: 2, batch: 26818, sum loss: 3787.145996, avg loss: 2.306423, ppl: 10.038450 +epoch: 2, batch: 26819, sum loss: 4656.855957, avg loss: 2.617682, ppl: 13.703918 +epoch: 2, batch: 26820, sum loss: 3931.128174, avg loss: 2.391197, ppl: 10.926567 +epoch: 2, batch: 26821, sum loss: 4041.598877, avg loss: 2.405714, ppl: 11.086341 +epoch: 2, batch: 26822, sum loss: 4938.569336, avg loss: 2.771363, ppl: 15.980409 +epoch: 2, batch: 26823, sum loss: 4650.262207, avg loss: 2.423274, ppl: 11.282734 +epoch: 2, batch: 26824, sum loss: 3899.769287, avg loss: 2.514358, ppl: 12.358672 +epoch: 2, batch: 26825, sum loss: 3750.617188, avg loss: 2.492105, ppl: 12.086686 +epoch: 2, batch: 26826, sum loss: 3777.292480, avg loss: 2.452787, ppl: 11.620690 +epoch: 2, batch: 26827, sum loss: 4134.222168, avg loss: 2.403618, ppl: 11.063127 +epoch: 2, batch: 26828, sum loss: 3135.856689, avg loss: 2.033629, ppl: 7.641771 +epoch: 2, batch: 26829, sum loss: 3170.834473, avg loss: 2.255217, ppl: 9.537359 +epoch: 2, batch: 26830, sum loss: 4444.324219, avg loss: 2.662867, ppl: 14.337330 +epoch: 2, batch: 26831, sum loss: 4712.817871, avg loss: 2.697663, ppl: 14.845003 +epoch: 2, batch: 26832, sum loss: 3852.033203, avg loss: 2.496457, ppl: 12.139409 +epoch: 2, batch: 26833, sum loss: 3556.907227, avg loss: 2.350897, ppl: 10.494980 +epoch: 2, batch: 26834, sum loss: 3784.379395, avg loss: 2.460585, ppl: 11.711659 +epoch: 2, batch: 26835, sum loss: 4215.011719, avg loss: 2.454870, ppl: 11.644919 +epoch: 2, batch: 26836, sum loss: 4631.704590, avg loss: 2.498222, ppl: 12.160857 +epoch: 2, batch: 26837, sum loss: 3888.536133, avg loss: 2.450243, ppl: 11.591168 +epoch: 2, batch: 26838, sum loss: 5696.199219, avg loss: 2.946818, ppl: 19.045258 +epoch: 2, batch: 26839, sum loss: 4252.024902, avg loss: 2.510050, ppl: 12.305546 +epoch: 2, batch: 26840, sum loss: 3496.726807, avg loss: 2.506614, ppl: 12.263339 +epoch: 2, batch: 26841, sum loss: 3746.303711, avg loss: 2.285725, ppl: 9.832813 +epoch: 2, batch: 26842, sum loss: 3687.320312, avg loss: 2.249738, ppl: 9.485248 +epoch: 2, batch: 26843, sum loss: 3292.266602, avg loss: 2.073216, ppl: 7.950350 +epoch: 2, batch: 26844, sum loss: 3650.601807, avg loss: 2.645364, ppl: 14.088570 +epoch: 2, batch: 26845, sum loss: 3217.618896, avg loss: 2.112685, ppl: 8.270415 +epoch: 2, batch: 26846, sum loss: 4574.815430, avg loss: 2.624679, ppl: 13.800141 +epoch: 2, batch: 26847, sum loss: 4673.885254, avg loss: 2.652602, ppl: 14.190922 +epoch: 2, batch: 26848, sum loss: 5004.738281, avg loss: 2.591786, ppl: 13.353599 +epoch: 2, batch: 26849, sum loss: 4843.440918, avg loss: 2.693794, ppl: 14.787667 +epoch: 2, batch: 26850, sum loss: 5151.886719, avg loss: 2.733096, ppl: 15.380437 +epoch: 2, batch: 26851, sum loss: 4359.730469, avg loss: 2.464517, ppl: 11.757803 +epoch: 2, batch: 26852, sum loss: 4924.319336, avg loss: 2.626304, ppl: 13.822582 +epoch: 2, batch: 26853, sum loss: 5045.508301, avg loss: 2.530345, ppl: 12.557840 +epoch: 2, batch: 26854, sum loss: 3769.750488, avg loss: 2.227985, ppl: 9.281145 +epoch: 2, batch: 26855, sum loss: 4622.371094, avg loss: 2.501283, ppl: 12.198136 +epoch: 2, batch: 26856, sum loss: 3523.482910, avg loss: 2.403467, ppl: 11.061462 +epoch: 2, batch: 26857, sum loss: 3550.321289, avg loss: 2.299431, ppl: 9.968510 +epoch: 2, batch: 26858, sum loss: 4520.366699, avg loss: 2.574241, ppl: 13.121353 +epoch: 2, batch: 26859, sum loss: 4681.816895, avg loss: 2.806845, ppl: 16.557592 +epoch: 2, batch: 26860, sum loss: 4152.327148, avg loss: 2.327538, ppl: 10.252666 +epoch: 2, batch: 26861, sum loss: 3904.245605, avg loss: 2.533579, ppl: 12.598520 +epoch: 2, batch: 26862, sum loss: 4540.226562, avg loss: 2.335508, ppl: 10.334704 +epoch: 2, batch: 26863, sum loss: 4311.518555, avg loss: 2.518410, ppl: 12.408854 +epoch: 2, batch: 26864, sum loss: 4590.689941, avg loss: 2.794090, ppl: 16.347746 +epoch: 2, batch: 26865, sum loss: 3843.213379, avg loss: 2.304085, ppl: 10.015008 +epoch: 2, batch: 26866, sum loss: 4232.539062, avg loss: 2.443729, ppl: 11.515905 +epoch: 2, batch: 26867, sum loss: 4369.310547, avg loss: 2.503903, ppl: 12.230134 +epoch: 2, batch: 26868, sum loss: 3283.478027, avg loss: 2.186070, ppl: 8.900170 +epoch: 2, batch: 26869, sum loss: 4211.270020, avg loss: 2.467059, ppl: 11.787729 +epoch: 2, batch: 26870, sum loss: 4155.384766, avg loss: 2.686092, ppl: 14.674222 +epoch: 2, batch: 26871, sum loss: 3577.444336, avg loss: 2.224779, ppl: 9.251434 +epoch: 2, batch: 26872, sum loss: 4228.633301, avg loss: 2.431647, ppl: 11.377601 +epoch: 2, batch: 26873, sum loss: 5257.083984, avg loss: 2.679451, ppl: 14.577095 +epoch: 2, batch: 26874, sum loss: 3623.545410, avg loss: 2.306522, ppl: 10.039443 +epoch: 2, batch: 26875, sum loss: 4529.556152, avg loss: 2.641140, ppl: 14.029194 +epoch: 2, batch: 26876, sum loss: 3961.749756, avg loss: 2.465308, ppl: 11.767105 +epoch: 2, batch: 26877, sum loss: 4803.156738, avg loss: 2.552155, ppl: 12.834739 +epoch: 2, batch: 26878, sum loss: 4111.188477, avg loss: 2.494653, ppl: 12.117531 +epoch: 2, batch: 26879, sum loss: 3697.909180, avg loss: 2.413779, ppl: 11.176113 +epoch: 2, batch: 26880, sum loss: 4276.098633, avg loss: 2.387548, ppl: 10.886766 +epoch: 2, batch: 26881, sum loss: 3411.718750, avg loss: 2.177230, ppl: 8.821833 +epoch: 2, batch: 26882, sum loss: 3981.323242, avg loss: 2.558691, ppl: 12.918896 +epoch: 2, batch: 26883, sum loss: 4319.925781, avg loss: 2.508668, ppl: 12.288548 +epoch: 2, batch: 26884, sum loss: 3078.306641, avg loss: 2.242029, ppl: 9.412414 +epoch: 2, batch: 26885, sum loss: 4107.711914, avg loss: 2.509293, ppl: 12.296228 +epoch: 2, batch: 26886, sum loss: 4820.316895, avg loss: 2.614055, ppl: 13.654303 +epoch: 2, batch: 26887, sum loss: 4023.617676, avg loss: 2.540163, ppl: 12.681732 +epoch: 2, batch: 26888, sum loss: 4248.466797, avg loss: 2.796884, ppl: 16.393482 +epoch: 2, batch: 26889, sum loss: 4941.355957, avg loss: 2.599346, ppl: 13.454931 +epoch: 2, batch: 26890, sum loss: 4400.973633, avg loss: 2.573669, ppl: 13.113848 +epoch: 2, batch: 26891, sum loss: 4405.721680, avg loss: 2.511814, ppl: 12.327270 +epoch: 2, batch: 26892, sum loss: 4996.788574, avg loss: 2.629889, ppl: 13.872231 +epoch: 2, batch: 26893, sum loss: 3933.799561, avg loss: 2.600000, ppl: 13.463737 +epoch: 2, batch: 26894, sum loss: 4272.911133, avg loss: 2.411350, ppl: 11.148997 +epoch: 2, batch: 26895, sum loss: 4660.510742, avg loss: 2.664672, ppl: 14.363233 +epoch: 2, batch: 26896, sum loss: 4295.526367, avg loss: 2.424112, ppl: 11.292198 +epoch: 2, batch: 26897, sum loss: 3814.918701, avg loss: 2.562068, ppl: 12.962590 +epoch: 2, batch: 26898, sum loss: 3766.305664, avg loss: 2.464860, ppl: 11.761832 +epoch: 2, batch: 26899, sum loss: 4499.250488, avg loss: 2.294365, ppl: 9.918140 +epoch: 2, batch: 26900, sum loss: 4608.602051, avg loss: 2.386640, ppl: 10.876887 +epoch: 2, batch: 26901, sum loss: 3639.357666, avg loss: 2.213721, ppl: 9.149702 +epoch: 2, batch: 26902, sum loss: 4718.604980, avg loss: 2.659867, ppl: 14.294392 +epoch: 2, batch: 26903, sum loss: 4124.916504, avg loss: 2.597555, ppl: 13.430855 +epoch: 2, batch: 26904, sum loss: 4327.222168, avg loss: 2.301714, ppl: 9.991292 +epoch: 2, batch: 26905, sum loss: 3801.805176, avg loss: 2.173702, ppl: 8.790770 +epoch: 2, batch: 26906, sum loss: 4325.286621, avg loss: 2.460345, ppl: 11.708851 +epoch: 2, batch: 26907, sum loss: 3612.043457, avg loss: 2.377909, ppl: 10.782331 +epoch: 2, batch: 26908, sum loss: 3371.918945, avg loss: 2.338363, ppl: 10.364253 +epoch: 2, batch: 26909, sum loss: 4325.899902, avg loss: 2.662092, ppl: 14.326231 +epoch: 2, batch: 26910, sum loss: 4577.699707, avg loss: 2.501475, ppl: 12.200480 +epoch: 2, batch: 26911, sum loss: 4222.269531, avg loss: 2.325038, ppl: 10.227071 +epoch: 2, batch: 26912, sum loss: 3347.427734, avg loss: 2.114610, ppl: 8.286353 +epoch: 2, batch: 26913, sum loss: 4686.254395, avg loss: 2.476879, ppl: 11.904050 +epoch: 2, batch: 26914, sum loss: 4843.197266, avg loss: 2.594107, ppl: 13.384625 +epoch: 2, batch: 26915, sum loss: 4875.252930, avg loss: 2.509137, ppl: 12.294314 +epoch: 2, batch: 26916, sum loss: 3488.633057, avg loss: 2.322658, ppl: 10.202761 +epoch: 2, batch: 26917, sum loss: 5033.416016, avg loss: 2.674504, ppl: 14.505151 +epoch: 2, batch: 26918, sum loss: 4055.006104, avg loss: 2.397993, ppl: 11.001076 +epoch: 2, batch: 26919, sum loss: 4624.252930, avg loss: 2.471541, ppl: 11.840678 +epoch: 2, batch: 26920, sum loss: 3271.908936, avg loss: 2.261167, ppl: 9.594282 +epoch: 2, batch: 26921, sum loss: 4783.311523, avg loss: 2.612404, ppl: 13.631783 +epoch: 2, batch: 26922, sum loss: 4561.079102, avg loss: 2.472130, ppl: 11.847651 +epoch: 2, batch: 26923, sum loss: 4078.766357, avg loss: 2.513103, ppl: 12.343172 +epoch: 2, batch: 26924, sum loss: 4413.811523, avg loss: 2.596360, ppl: 13.414819 +epoch: 2, batch: 26925, sum loss: 4176.045898, avg loss: 2.487222, ppl: 12.027816 +epoch: 2, batch: 26926, sum loss: 3652.853516, avg loss: 2.266038, ppl: 9.641129 +epoch: 2, batch: 26927, sum loss: 3874.628906, avg loss: 2.346838, ppl: 10.452464 +epoch: 2, batch: 26928, sum loss: 4336.425293, avg loss: 2.398465, ppl: 11.006273 +epoch: 2, batch: 26929, sum loss: 3273.267578, avg loss: 2.082231, ppl: 8.022349 +epoch: 2, batch: 26930, sum loss: 3635.590332, avg loss: 2.390263, ppl: 10.916368 +epoch: 2, batch: 26931, sum loss: 4664.955566, avg loss: 2.753811, ppl: 15.702358 +epoch: 2, batch: 26932, sum loss: 3888.031250, avg loss: 2.394108, ppl: 10.958417 +epoch: 2, batch: 26933, sum loss: 3320.761230, avg loss: 2.341863, ppl: 10.400592 +epoch: 2, batch: 26934, sum loss: 4352.956055, avg loss: 2.550062, ppl: 12.807900 +epoch: 2, batch: 26935, sum loss: 3715.117676, avg loss: 2.581736, ppl: 13.220063 +epoch: 2, batch: 26936, sum loss: 3546.196777, avg loss: 2.506146, ppl: 12.257598 +epoch: 2, batch: 26937, sum loss: 4283.472656, avg loss: 2.374430, ppl: 10.744891 +epoch: 2, batch: 26938, sum loss: 4797.560547, avg loss: 2.576563, ppl: 13.151859 +epoch: 2, batch: 26939, sum loss: 4247.470215, avg loss: 2.532779, ppl: 12.588438 +epoch: 2, batch: 26940, sum loss: 5035.233398, avg loss: 2.739517, ppl: 15.479514 +epoch: 2, batch: 26941, sum loss: 3257.841553, avg loss: 2.270273, ppl: 9.682041 +epoch: 2, batch: 26942, sum loss: 3835.460938, avg loss: 2.422906, ppl: 11.278592 +epoch: 2, batch: 26943, sum loss: 3942.712646, avg loss: 2.386630, ppl: 10.876778 +epoch: 2, batch: 26944, sum loss: 4467.703613, avg loss: 2.425463, ppl: 11.307468 +epoch: 2, batch: 26945, sum loss: 3343.161621, avg loss: 2.248259, ppl: 9.471235 +epoch: 2, batch: 26946, sum loss: 3692.111816, avg loss: 2.650475, ppl: 14.160764 +epoch: 2, batch: 26947, sum loss: 4125.157715, avg loss: 2.614168, ppl: 13.655855 +epoch: 2, batch: 26948, sum loss: 4109.385254, avg loss: 2.463660, ppl: 11.747732 +epoch: 2, batch: 26949, sum loss: 3991.914062, avg loss: 2.695418, ppl: 14.811710 +epoch: 2, batch: 26950, sum loss: 4523.179688, avg loss: 2.492110, ppl: 12.086753 +epoch: 2, batch: 26951, sum loss: 4049.395020, avg loss: 2.164295, ppl: 8.708458 +epoch: 2, batch: 26952, sum loss: 3745.028564, avg loss: 2.515130, ppl: 12.368217 +epoch: 2, batch: 26953, sum loss: 3880.485107, avg loss: 2.221228, ppl: 9.218643 +epoch: 2, batch: 26954, sum loss: 4831.800293, avg loss: 2.690312, ppl: 14.736275 +epoch: 2, batch: 26955, sum loss: 4164.033203, avg loss: 2.528253, ppl: 12.531598 +epoch: 2, batch: 26956, sum loss: 3985.843750, avg loss: 2.538754, ppl: 12.663881 +epoch: 2, batch: 26957, sum loss: 3737.838867, avg loss: 2.417748, ppl: 11.220565 +epoch: 2, batch: 26958, sum loss: 4430.872070, avg loss: 2.487856, ppl: 12.035449 +epoch: 2, batch: 26959, sum loss: 3440.664062, avg loss: 2.139716, ppl: 8.497027 +epoch: 2, batch: 26960, sum loss: 3999.065918, avg loss: 2.547176, ppl: 12.770986 +epoch: 2, batch: 26961, sum loss: 4053.391357, avg loss: 2.321530, ppl: 10.191257 +epoch: 2, batch: 26962, sum loss: 4097.171387, avg loss: 2.495232, ppl: 12.124550 +epoch: 2, batch: 26963, sum loss: 4410.390137, avg loss: 2.436680, ppl: 11.435012 +epoch: 2, batch: 26964, sum loss: 4406.171387, avg loss: 2.410378, ppl: 11.138173 +epoch: 2, batch: 26965, sum loss: 4477.515137, avg loss: 2.486127, ppl: 12.014655 +epoch: 2, batch: 26966, sum loss: 3966.214111, avg loss: 2.457382, ppl: 11.674205 +epoch: 2, batch: 26967, sum loss: 3957.826416, avg loss: 2.530580, ppl: 12.560783 +epoch: 2, batch: 26968, sum loss: 3946.572754, avg loss: 2.458924, ppl: 11.692222 +epoch: 2, batch: 26969, sum loss: 4583.886719, avg loss: 2.699580, ppl: 14.873483 +epoch: 2, batch: 26970, sum loss: 4600.629395, avg loss: 2.587531, ppl: 13.296896 +epoch: 2, batch: 26971, sum loss: 3324.924805, avg loss: 2.222543, ppl: 9.230780 +epoch: 2, batch: 26972, sum loss: 5126.508789, avg loss: 2.664506, ppl: 14.360849 +epoch: 2, batch: 26973, sum loss: 3471.232910, avg loss: 2.474150, ppl: 11.871617 +epoch: 2, batch: 26974, sum loss: 4478.367676, avg loss: 2.461994, ppl: 11.728179 +epoch: 2, batch: 26975, sum loss: 4848.542969, avg loss: 2.579012, ppl: 13.184108 +epoch: 2, batch: 26976, sum loss: 4473.831543, avg loss: 2.434076, ppl: 11.405273 +epoch: 2, batch: 26977, sum loss: 3186.334229, avg loss: 2.182421, ppl: 8.867746 +epoch: 2, batch: 26978, sum loss: 3952.819336, avg loss: 2.245920, ppl: 9.449104 +epoch: 2, batch: 26979, sum loss: 4296.013184, avg loss: 2.393322, ppl: 10.949811 +epoch: 2, batch: 26980, sum loss: 4906.265625, avg loss: 2.663554, ppl: 14.347184 +epoch: 2, batch: 26981, sum loss: 4012.370850, avg loss: 2.534662, ppl: 12.612173 +epoch: 2, batch: 26982, sum loss: 4820.619141, avg loss: 2.645784, ppl: 14.094496 +epoch: 2, batch: 26983, sum loss: 4371.849121, avg loss: 2.749591, ppl: 15.636230 +epoch: 2, batch: 26984, sum loss: 4050.292725, avg loss: 2.393790, ppl: 10.954935 +epoch: 2, batch: 26985, sum loss: 3742.431641, avg loss: 2.323049, ppl: 10.206745 +epoch: 2, batch: 26986, sum loss: 4773.583496, avg loss: 2.519041, ppl: 12.416687 +epoch: 2, batch: 26987, sum loss: 4733.604980, avg loss: 2.578216, ppl: 13.173616 +epoch: 2, batch: 26988, sum loss: 3367.040771, avg loss: 2.304614, ppl: 10.020308 +epoch: 2, batch: 26989, sum loss: 3975.875000, avg loss: 2.430242, ppl: 11.361627 +epoch: 2, batch: 26990, sum loss: 5946.854980, avg loss: 2.758281, ppl: 15.772714 +epoch: 2, batch: 26991, sum loss: 4176.541992, avg loss: 2.579705, ppl: 13.193242 +epoch: 2, batch: 26992, sum loss: 5949.872070, avg loss: 2.846829, ppl: 17.233044 +epoch: 2, batch: 26993, sum loss: 4468.531250, avg loss: 2.653522, ppl: 14.203980 +epoch: 2, batch: 26994, sum loss: 4725.065918, avg loss: 2.636756, ppl: 13.967814 +epoch: 2, batch: 26995, sum loss: 3983.209473, avg loss: 2.463333, ppl: 11.743891 +epoch: 2, batch: 26996, sum loss: 4411.102539, avg loss: 2.670159, ppl: 14.442266 +epoch: 2, batch: 26997, sum loss: 5242.072266, avg loss: 2.584848, ppl: 13.261278 +epoch: 2, batch: 26998, sum loss: 4774.087891, avg loss: 2.855316, ppl: 17.379927 +epoch: 2, batch: 26999, sum loss: 4440.994629, avg loss: 2.646600, ppl: 14.105993 +epoch: 2, batch: 27000, sum loss: 3789.460693, avg loss: 2.263716, ppl: 9.618769 +epoch: 2, batch: 27001, sum loss: 4841.864746, avg loss: 2.593393, ppl: 13.375074 +epoch: 2, batch: 27002, sum loss: 4182.123047, avg loss: 2.505766, ppl: 12.252940 +epoch: 2, batch: 27003, sum loss: 4617.917969, avg loss: 2.647889, ppl: 14.124187 +epoch: 2, batch: 27004, sum loss: 3440.642334, avg loss: 2.484218, ppl: 11.991743 +epoch: 2, batch: 27005, sum loss: 4032.049072, avg loss: 2.426022, ppl: 11.313789 +epoch: 2, batch: 27006, sum loss: 3802.503174, avg loss: 2.390008, ppl: 10.913584 +epoch: 2, batch: 27007, sum loss: 4478.121094, avg loss: 2.754072, ppl: 15.706458 +epoch: 2, batch: 27008, sum loss: 4319.018555, avg loss: 2.682620, ppl: 14.623361 +epoch: 2, batch: 27009, sum loss: 4345.163086, avg loss: 2.659219, ppl: 14.285122 +epoch: 2, batch: 27010, sum loss: 3001.342285, avg loss: 1.953999, ppl: 7.056851 +epoch: 2, batch: 27011, sum loss: 4206.790039, avg loss: 2.397031, ppl: 10.990500 +epoch: 2, batch: 27012, sum loss: 4632.624023, avg loss: 2.742821, ppl: 15.530731 +epoch: 2, batch: 27013, sum loss: 4297.203125, avg loss: 2.609109, ppl: 13.586942 +epoch: 2, batch: 27014, sum loss: 4171.138184, avg loss: 2.419454, ppl: 11.239719 +epoch: 2, batch: 27015, sum loss: 3641.770020, avg loss: 2.273265, ppl: 9.711052 +epoch: 2, batch: 27016, sum loss: 5142.347168, avg loss: 2.614310, ppl: 13.657784 +epoch: 2, batch: 27017, sum loss: 4264.268555, avg loss: 2.614512, ppl: 13.660544 +epoch: 2, batch: 27018, sum loss: 3198.368896, avg loss: 2.266739, ppl: 9.647889 +epoch: 2, batch: 27019, sum loss: 5037.057617, avg loss: 2.716859, ppl: 15.132721 +epoch: 2, batch: 27020, sum loss: 4116.900391, avg loss: 2.574672, ppl: 13.127007 +epoch: 2, batch: 27021, sum loss: 3854.378906, avg loss: 2.535776, ppl: 12.626221 +epoch: 2, batch: 27022, sum loss: 4211.644531, avg loss: 2.503950, ppl: 12.230715 +epoch: 2, batch: 27023, sum loss: 3914.077148, avg loss: 2.588675, ppl: 13.312128 +epoch: 2, batch: 27024, sum loss: 4796.480469, avg loss: 2.696167, ppl: 14.822800 +epoch: 2, batch: 27025, sum loss: 4500.743164, avg loss: 2.541357, ppl: 12.696886 +epoch: 2, batch: 27026, sum loss: 4384.197754, avg loss: 2.642675, ppl: 14.050738 +epoch: 2, batch: 27027, sum loss: 4648.861328, avg loss: 2.792109, ppl: 16.315393 +epoch: 2, batch: 27028, sum loss: 4806.871094, avg loss: 2.830902, ppl: 16.960751 +epoch: 2, batch: 27029, sum loss: 3933.974365, avg loss: 2.536412, ppl: 12.634252 +epoch: 2, batch: 27030, sum loss: 4091.885498, avg loss: 2.451699, ppl: 11.608052 +epoch: 2, batch: 27031, sum loss: 4554.836914, avg loss: 2.498539, ppl: 12.164711 +epoch: 2, batch: 27032, sum loss: 4427.837402, avg loss: 2.537443, ppl: 12.647286 +epoch: 2, batch: 27033, sum loss: 4318.472168, avg loss: 2.535803, ppl: 12.626564 +epoch: 2, batch: 27034, sum loss: 5521.642090, avg loss: 2.756686, ppl: 15.747568 +epoch: 2, batch: 27035, sum loss: 4170.604980, avg loss: 2.544603, ppl: 12.738174 +epoch: 2, batch: 27036, sum loss: 4561.918457, avg loss: 2.605322, ppl: 13.535582 +epoch: 2, batch: 27037, sum loss: 3794.533447, avg loss: 2.257307, ppl: 9.557317 +epoch: 2, batch: 27038, sum loss: 3186.195557, avg loss: 2.087940, ppl: 8.068274 +epoch: 2, batch: 27039, sum loss: 5253.520508, avg loss: 2.694113, ppl: 14.792393 +epoch: 2, batch: 27040, sum loss: 3856.615723, avg loss: 2.344447, ppl: 10.427506 +epoch: 2, batch: 27041, sum loss: 4859.521484, avg loss: 2.730068, ppl: 15.333933 +epoch: 2, batch: 27042, sum loss: 4648.885742, avg loss: 2.701270, ppl: 14.898639 +epoch: 2, batch: 27043, sum loss: 4732.099609, avg loss: 2.643631, ppl: 14.064178 +epoch: 2, batch: 27044, sum loss: 4530.200195, avg loss: 2.765690, ppl: 15.889997 +epoch: 2, batch: 27045, sum loss: 4367.700195, avg loss: 2.456524, ppl: 11.664201 +epoch: 2, batch: 27046, sum loss: 3702.221191, avg loss: 2.308118, ppl: 10.055484 +epoch: 2, batch: 27047, sum loss: 4167.256836, avg loss: 2.575560, ppl: 13.138677 +epoch: 2, batch: 27048, sum loss: 4162.127930, avg loss: 2.510330, ppl: 12.308997 +epoch: 2, batch: 27049, sum loss: 3560.189453, avg loss: 2.349960, ppl: 10.485151 +epoch: 2, batch: 27050, sum loss: 3648.032227, avg loss: 2.344494, ppl: 10.427991 +epoch: 2, batch: 27051, sum loss: 4278.845215, avg loss: 2.409260, ppl: 11.125723 +epoch: 2, batch: 27052, sum loss: 4228.822266, avg loss: 2.676470, ppl: 14.533696 +epoch: 2, batch: 27053, sum loss: 4354.604004, avg loss: 2.412523, ppl: 11.162088 +epoch: 2, batch: 27054, sum loss: 3758.086670, avg loss: 2.253050, ppl: 9.516714 +epoch: 2, batch: 27055, sum loss: 4120.020508, avg loss: 2.715900, ppl: 15.118213 +epoch: 2, batch: 27056, sum loss: 4189.565430, avg loss: 2.549948, ppl: 12.806443 +epoch: 2, batch: 27057, sum loss: 3697.171875, avg loss: 2.360902, ppl: 10.600505 +epoch: 2, batch: 27058, sum loss: 4271.768066, avg loss: 2.469230, ppl: 11.813346 +epoch: 2, batch: 27059, sum loss: 4385.619141, avg loss: 2.545339, ppl: 12.747550 +epoch: 2, batch: 27060, sum loss: 3991.742676, avg loss: 2.526419, ppl: 12.508637 +epoch: 2, batch: 27061, sum loss: 3332.226562, avg loss: 2.393841, ppl: 10.955494 +epoch: 2, batch: 27062, sum loss: 4243.635254, avg loss: 2.729026, ppl: 15.317962 +epoch: 2, batch: 27063, sum loss: 4773.235352, avg loss: 2.707451, ppl: 14.991009 +epoch: 2, batch: 27064, sum loss: 4057.054932, avg loss: 2.436670, ppl: 11.434895 +epoch: 2, batch: 27065, sum loss: 4775.983887, avg loss: 2.770293, ppl: 15.963303 +epoch: 2, batch: 27066, sum loss: 3776.870117, avg loss: 2.369429, ppl: 10.691287 +epoch: 2, batch: 27067, sum loss: 4365.629883, avg loss: 2.448475, ppl: 11.570683 +epoch: 2, batch: 27068, sum loss: 3933.430664, avg loss: 2.492668, ppl: 12.093503 +epoch: 2, batch: 27069, sum loss: 5002.337402, avg loss: 2.819807, ppl: 16.773609 +epoch: 2, batch: 27070, sum loss: 4264.953125, avg loss: 2.432945, ppl: 11.392389 +epoch: 2, batch: 27071, sum loss: 4722.061523, avg loss: 2.630675, ppl: 13.883139 +epoch: 2, batch: 27072, sum loss: 4418.779297, avg loss: 2.461715, ppl: 11.724908 +epoch: 2, batch: 27073, sum loss: 4628.731445, avg loss: 2.477908, ppl: 11.916306 +epoch: 2, batch: 27074, sum loss: 4403.822754, avg loss: 2.651308, ppl: 14.172565 +epoch: 2, batch: 27075, sum loss: 4568.816895, avg loss: 2.613740, ppl: 13.650002 +epoch: 2, batch: 27076, sum loss: 3969.577393, avg loss: 2.436819, ppl: 11.436599 +epoch: 2, batch: 27077, sum loss: 3941.173828, avg loss: 2.387144, ppl: 10.882365 +epoch: 2, batch: 27078, sum loss: 4149.288086, avg loss: 2.315451, ppl: 10.129492 +epoch: 2, batch: 27079, sum loss: 3856.547607, avg loss: 2.395371, ppl: 10.972270 +epoch: 2, batch: 27080, sum loss: 3923.404297, avg loss: 2.560969, ppl: 12.948357 +epoch: 2, batch: 27081, sum loss: 4146.628906, avg loss: 2.487480, ppl: 12.030916 +epoch: 2, batch: 27082, sum loss: 4347.690430, avg loss: 2.682104, ppl: 14.615810 +epoch: 2, batch: 27083, sum loss: 4364.812988, avg loss: 2.406181, ppl: 11.091525 +epoch: 2, batch: 27084, sum loss: 4850.550293, avg loss: 2.484913, ppl: 12.000077 +epoch: 2, batch: 27085, sum loss: 3970.232422, avg loss: 2.462923, ppl: 11.739079 +epoch: 2, batch: 27086, sum loss: 4339.229980, avg loss: 2.409345, ppl: 11.126667 +epoch: 2, batch: 27087, sum loss: 4609.988770, avg loss: 2.744041, ppl: 15.549694 +epoch: 2, batch: 27088, sum loss: 3992.046875, avg loss: 2.425302, ppl: 11.305643 +epoch: 2, batch: 27089, sum loss: 4338.706055, avg loss: 2.230697, ppl: 9.306354 +epoch: 2, batch: 27090, sum loss: 3830.233887, avg loss: 2.384953, ppl: 10.858547 +epoch: 2, batch: 27091, sum loss: 5173.929688, avg loss: 2.593448, ppl: 13.375817 +epoch: 2, batch: 27092, sum loss: 3919.861328, avg loss: 2.471539, ppl: 11.840659 +epoch: 2, batch: 27093, sum loss: 5045.468750, avg loss: 2.575533, ppl: 13.138317 +epoch: 2, batch: 27094, sum loss: 3845.523193, avg loss: 2.229289, ppl: 9.293255 +epoch: 2, batch: 27095, sum loss: 4150.841797, avg loss: 2.443109, ppl: 11.508763 +epoch: 2, batch: 27096, sum loss: 4943.438477, avg loss: 2.469250, ppl: 11.813583 +epoch: 2, batch: 27097, sum loss: 3888.518066, avg loss: 2.467334, ppl: 11.790968 +epoch: 2, batch: 27098, sum loss: 4109.987305, avg loss: 2.347223, ppl: 10.456492 +epoch: 2, batch: 27099, sum loss: 3932.983887, avg loss: 2.503491, ppl: 12.225097 +epoch: 2, batch: 27100, sum loss: 4995.907715, avg loss: 2.608829, ppl: 13.583136 +epoch: 2, batch: 27101, sum loss: 4606.414551, avg loss: 2.642808, ppl: 14.052610 +epoch: 2, batch: 27102, sum loss: 3395.177246, avg loss: 2.286314, ppl: 9.838611 +epoch: 2, batch: 27103, sum loss: 4083.198486, avg loss: 2.335926, ppl: 10.339030 +epoch: 2, batch: 27104, sum loss: 4095.283936, avg loss: 2.404747, ppl: 11.075625 +epoch: 2, batch: 27105, sum loss: 5092.508789, avg loss: 2.827600, ppl: 16.904848 +epoch: 2, batch: 27106, sum loss: 3476.645264, avg loss: 2.317764, ppl: 10.152943 +epoch: 2, batch: 27107, sum loss: 3704.002930, avg loss: 2.430448, ppl: 11.363973 +epoch: 2, batch: 27108, sum loss: 3665.633545, avg loss: 2.325910, ppl: 10.235987 +epoch: 2, batch: 27109, sum loss: 5306.922852, avg loss: 2.596342, ppl: 13.414576 +epoch: 2, batch: 27110, sum loss: 3872.329102, avg loss: 2.239635, ppl: 9.389906 +epoch: 2, batch: 27111, sum loss: 3710.749268, avg loss: 2.322121, ppl: 10.197279 +epoch: 2, batch: 27112, sum loss: 4980.549316, avg loss: 2.693645, ppl: 14.785467 +epoch: 2, batch: 27113, sum loss: 4283.310547, avg loss: 2.474472, ppl: 11.875430 +epoch: 2, batch: 27114, sum loss: 5720.400391, avg loss: 2.947141, ppl: 19.051401 +epoch: 2, batch: 27115, sum loss: 3855.519531, avg loss: 2.513376, ppl: 12.346547 +epoch: 2, batch: 27116, sum loss: 4003.805664, avg loss: 2.476070, ppl: 11.894432 +epoch: 2, batch: 27117, sum loss: 3871.116699, avg loss: 2.475138, ppl: 11.883352 +epoch: 2, batch: 27118, sum loss: 4152.010254, avg loss: 2.451010, ppl: 11.600051 +epoch: 2, batch: 27119, sum loss: 4695.979492, avg loss: 2.663630, ppl: 14.348275 +epoch: 2, batch: 27120, sum loss: 4144.551758, avg loss: 2.481768, ppl: 11.962391 +epoch: 2, batch: 27121, sum loss: 4421.436523, avg loss: 2.606979, ppl: 13.558032 +epoch: 2, batch: 27122, sum loss: 4681.968750, avg loss: 2.605436, ppl: 13.537130 +epoch: 2, batch: 27123, sum loss: 4854.591309, avg loss: 2.565852, ppl: 13.011736 +epoch: 2, batch: 27124, sum loss: 4127.790039, avg loss: 2.288132, ppl: 9.856508 +epoch: 2, batch: 27125, sum loss: 3905.251465, avg loss: 2.410649, ppl: 11.141191 +epoch: 2, batch: 27126, sum loss: 3731.968750, avg loss: 2.364999, ppl: 10.644032 +epoch: 2, batch: 27127, sum loss: 4145.873535, avg loss: 2.573478, ppl: 13.111353 +epoch: 2, batch: 27128, sum loss: 4144.320312, avg loss: 2.562969, ppl: 12.974277 +epoch: 2, batch: 27129, sum loss: 4522.753418, avg loss: 2.516836, ppl: 12.389331 +epoch: 2, batch: 27130, sum loss: 4129.207031, avg loss: 2.454939, ppl: 11.645720 +epoch: 2, batch: 27131, sum loss: 4263.267578, avg loss: 2.301980, ppl: 9.993954 +epoch: 2, batch: 27132, sum loss: 3156.272705, avg loss: 2.487212, ppl: 12.027701 +epoch: 2, batch: 27133, sum loss: 4314.731934, avg loss: 2.459938, ppl: 11.704089 +epoch: 2, batch: 27134, sum loss: 3714.618164, avg loss: 2.311523, ppl: 10.089784 +epoch: 2, batch: 27135, sum loss: 4254.209473, avg loss: 2.564322, ppl: 12.991840 +epoch: 2, batch: 27136, sum loss: 5182.229492, avg loss: 2.834918, ppl: 17.028999 +epoch: 2, batch: 27137, sum loss: 4737.873047, avg loss: 2.494931, ppl: 12.120892 +epoch: 2, batch: 27138, sum loss: 4279.733887, avg loss: 2.394927, ppl: 10.967395 +epoch: 2, batch: 27139, sum loss: 3762.793457, avg loss: 2.208212, ppl: 9.099433 +epoch: 2, batch: 27140, sum loss: 3848.206787, avg loss: 2.427891, ppl: 11.334949 +epoch: 2, batch: 27141, sum loss: 4494.473633, avg loss: 2.588983, ppl: 13.316216 +epoch: 2, batch: 27142, sum loss: 3572.514893, avg loss: 2.295961, ppl: 9.933974 +epoch: 2, batch: 27143, sum loss: 3875.173828, avg loss: 2.405446, ppl: 11.083376 +epoch: 2, batch: 27144, sum loss: 4180.602539, avg loss: 2.512381, ppl: 12.334267 +epoch: 2, batch: 27145, sum loss: 3772.869141, avg loss: 2.464317, ppl: 11.755451 +epoch: 2, batch: 27146, sum loss: 4428.494629, avg loss: 2.524797, ppl: 12.488365 +epoch: 2, batch: 27147, sum loss: 4143.847656, avg loss: 2.351787, ppl: 10.504320 +epoch: 2, batch: 27148, sum loss: 4618.513672, avg loss: 2.767234, ppl: 15.914555 +epoch: 2, batch: 27149, sum loss: 3780.788574, avg loss: 2.437646, ppl: 11.446061 +epoch: 2, batch: 27150, sum loss: 4014.925537, avg loss: 2.388415, ppl: 10.896208 +epoch: 2, batch: 27151, sum loss: 3946.626953, avg loss: 2.474374, ppl: 11.874275 +epoch: 2, batch: 27152, sum loss: 5592.727051, avg loss: 2.790782, ppl: 16.293756 +epoch: 2, batch: 27153, sum loss: 4474.239258, avg loss: 2.690463, ppl: 14.738492 +epoch: 2, batch: 27154, sum loss: 3264.096680, avg loss: 2.189200, ppl: 8.928065 +epoch: 2, batch: 27155, sum loss: 4405.770508, avg loss: 2.622483, ppl: 13.769865 +epoch: 2, batch: 27156, sum loss: 4961.817383, avg loss: 2.744368, ppl: 15.554781 +epoch: 2, batch: 27157, sum loss: 4224.970703, avg loss: 2.309990, ppl: 10.074321 +epoch: 2, batch: 27158, sum loss: 4004.183105, avg loss: 2.417985, ppl: 11.223222 +epoch: 2, batch: 27159, sum loss: 4273.900879, avg loss: 2.503750, ppl: 12.228262 +epoch: 2, batch: 27160, sum loss: 3498.264893, avg loss: 2.170140, ppl: 8.759506 +epoch: 2, batch: 27161, sum loss: 5113.646973, avg loss: 2.738965, ppl: 15.470958 +epoch: 2, batch: 27162, sum loss: 4078.466797, avg loss: 2.448059, ppl: 11.565879 +epoch: 2, batch: 27163, sum loss: 4218.771973, avg loss: 2.496315, ppl: 12.137681 +epoch: 2, batch: 27164, sum loss: 3745.803711, avg loss: 2.345525, ppl: 10.438754 +epoch: 2, batch: 27165, sum loss: 4362.687988, avg loss: 2.480209, ppl: 11.943765 +epoch: 2, batch: 27166, sum loss: 4254.429199, avg loss: 2.597332, ppl: 13.427858 +epoch: 2, batch: 27167, sum loss: 3644.233643, avg loss: 2.349602, ppl: 10.481402 +epoch: 2, batch: 27168, sum loss: 3748.363281, avg loss: 2.594023, ppl: 13.383505 +epoch: 2, batch: 27169, sum loss: 4178.687500, avg loss: 2.465302, ppl: 11.767041 +epoch: 2, batch: 27170, sum loss: 3949.761719, avg loss: 2.335755, ppl: 10.337263 +epoch: 2, batch: 27171, sum loss: 4215.628418, avg loss: 2.653007, ppl: 14.196668 +epoch: 2, batch: 27172, sum loss: 3828.852539, avg loss: 2.350431, ppl: 10.490092 +epoch: 2, batch: 27173, sum loss: 4891.441895, avg loss: 2.570385, ppl: 13.070849 +epoch: 2, batch: 27174, sum loss: 4223.819336, avg loss: 2.569233, ppl: 13.055809 +epoch: 2, batch: 27175, sum loss: 3565.625977, avg loss: 2.291533, ppl: 9.890092 +epoch: 2, batch: 27176, sum loss: 3533.359375, avg loss: 2.201470, ppl: 9.038289 +epoch: 2, batch: 27177, sum loss: 3539.875000, avg loss: 2.441293, ppl: 11.487887 +epoch: 2, batch: 27178, sum loss: 4227.239258, avg loss: 2.587050, ppl: 13.290504 +epoch: 2, batch: 27179, sum loss: 3716.387695, avg loss: 2.383828, ppl: 10.846342 +epoch: 2, batch: 27180, sum loss: 3793.721191, avg loss: 2.232914, ppl: 9.327007 +epoch: 2, batch: 27181, sum loss: 4116.016602, avg loss: 2.381954, ppl: 10.826035 +epoch: 2, batch: 27182, sum loss: 4646.420898, avg loss: 2.453232, ppl: 11.625858 +epoch: 2, batch: 27183, sum loss: 4305.249512, avg loss: 2.660846, ppl: 14.308395 +epoch: 2, batch: 27184, sum loss: 3963.727783, avg loss: 2.373490, ppl: 10.734787 +epoch: 2, batch: 27185, sum loss: 4235.500000, avg loss: 2.427221, ppl: 11.327354 +epoch: 2, batch: 27186, sum loss: 4652.456543, avg loss: 2.631480, ppl: 13.894318 +epoch: 2, batch: 27187, sum loss: 4396.665039, avg loss: 2.643816, ppl: 14.066773 +epoch: 2, batch: 27188, sum loss: 3240.179199, avg loss: 2.253254, ppl: 9.518659 +epoch: 2, batch: 27189, sum loss: 3985.571777, avg loss: 2.276169, ppl: 9.739298 +epoch: 2, batch: 27190, sum loss: 4190.846191, avg loss: 2.328248, ppl: 10.259951 +epoch: 2, batch: 27191, sum loss: 4090.786377, avg loss: 2.576062, ppl: 13.145272 +epoch: 2, batch: 27192, sum loss: 3796.752686, avg loss: 2.401488, ppl: 11.039592 +epoch: 2, batch: 27193, sum loss: 4283.894531, avg loss: 2.356378, ppl: 10.552656 +epoch: 2, batch: 27194, sum loss: 4912.406250, avg loss: 2.700608, ppl: 14.888785 +epoch: 2, batch: 27195, sum loss: 4510.704102, avg loss: 2.405709, ppl: 11.086286 +epoch: 2, batch: 27196, sum loss: 4550.594238, avg loss: 2.579702, ppl: 13.193205 +epoch: 2, batch: 27197, sum loss: 3923.697754, avg loss: 2.266723, ppl: 9.647735 +epoch: 2, batch: 27198, sum loss: 3422.764160, avg loss: 2.181494, ppl: 8.859532 +epoch: 2, batch: 27199, sum loss: 4477.259277, avg loss: 2.653977, ppl: 14.210443 +epoch: 2, batch: 27200, sum loss: 3292.236572, avg loss: 2.167371, ppl: 8.735289 +epoch: 2, batch: 27201, sum loss: 3663.916504, avg loss: 2.320403, ppl: 10.179777 +epoch: 2, batch: 27202, sum loss: 4782.540039, avg loss: 2.511838, ppl: 12.327573 +epoch: 2, batch: 27203, sum loss: 4457.416504, avg loss: 2.486010, ppl: 12.013251 +epoch: 2, batch: 27204, sum loss: 4827.627930, avg loss: 2.689486, ppl: 14.724110 +epoch: 2, batch: 27205, sum loss: 3658.184326, avg loss: 2.167171, ppl: 8.733541 +epoch: 2, batch: 27206, sum loss: 4653.842285, avg loss: 2.878072, ppl: 17.779961 +epoch: 2, batch: 27207, sum loss: 4367.989258, avg loss: 2.295317, ppl: 9.927587 +epoch: 2, batch: 27208, sum loss: 3919.904785, avg loss: 2.258010, ppl: 9.564034 +epoch: 2, batch: 27209, sum loss: 3493.955566, avg loss: 2.310817, ppl: 10.082662 +epoch: 2, batch: 27210, sum loss: 4889.790527, avg loss: 2.679337, ppl: 14.575431 +epoch: 2, batch: 27211, sum loss: 4136.140625, avg loss: 2.580250, ppl: 13.200435 +epoch: 2, batch: 27212, sum loss: 3168.806152, avg loss: 2.154185, ppl: 8.620862 +epoch: 2, batch: 27213, sum loss: 4699.144043, avg loss: 2.534598, ppl: 12.611356 +epoch: 2, batch: 27214, sum loss: 3692.209229, avg loss: 2.402218, ppl: 11.047654 +epoch: 2, batch: 27215, sum loss: 3674.319092, avg loss: 2.103216, ppl: 8.192478 +epoch: 2, batch: 27216, sum loss: 4200.250977, avg loss: 2.641667, ppl: 14.036585 +epoch: 2, batch: 27217, sum loss: 3491.211426, avg loss: 2.619063, ppl: 13.722864 +epoch: 2, batch: 27218, sum loss: 4394.529297, avg loss: 2.611129, ppl: 13.614407 +epoch: 2, batch: 27219, sum loss: 4278.371094, avg loss: 2.399535, ppl: 11.018054 +epoch: 2, batch: 27220, sum loss: 5205.075195, avg loss: 2.917643, ppl: 18.497643 +epoch: 2, batch: 27221, sum loss: 3519.287598, avg loss: 2.371488, ppl: 10.713318 +epoch: 2, batch: 27222, sum loss: 3689.142090, avg loss: 2.288550, ppl: 9.860626 +epoch: 2, batch: 27223, sum loss: 4008.706055, avg loss: 2.471459, ppl: 11.839705 +epoch: 2, batch: 27224, sum loss: 4332.000977, avg loss: 2.476845, ppl: 11.903644 +epoch: 2, batch: 27225, sum loss: 4507.072266, avg loss: 2.611282, ppl: 13.616491 +epoch: 2, batch: 27226, sum loss: 4078.393066, avg loss: 2.274620, ppl: 9.724221 +epoch: 2, batch: 27227, sum loss: 3988.406738, avg loss: 2.600004, ppl: 13.463798 +epoch: 2, batch: 27228, sum loss: 4183.991211, avg loss: 2.552771, ppl: 12.842636 +epoch: 2, batch: 27229, sum loss: 4503.245605, avg loss: 2.655216, ppl: 14.228052 +epoch: 2, batch: 27230, sum loss: 5059.349121, avg loss: 2.832782, ppl: 16.992674 +epoch: 2, batch: 27231, sum loss: 4350.833984, avg loss: 2.520761, ppl: 12.438062 +epoch: 2, batch: 27232, sum loss: 4685.750000, avg loss: 2.657828, ppl: 14.265265 +epoch: 2, batch: 27233, sum loss: 4785.413574, avg loss: 2.597944, ppl: 13.436091 +epoch: 2, batch: 27234, sum loss: 3714.370117, avg loss: 2.477899, ppl: 11.916201 +epoch: 2, batch: 27235, sum loss: 3852.711670, avg loss: 2.381157, ppl: 10.817408 +epoch: 2, batch: 27236, sum loss: 3950.125732, avg loss: 2.246943, ppl: 9.458776 +epoch: 2, batch: 27237, sum loss: 4786.243164, avg loss: 2.745980, ppl: 15.579875 +epoch: 2, batch: 27238, sum loss: 5015.731445, avg loss: 2.500365, ppl: 12.186938 +epoch: 2, batch: 27239, sum loss: 3968.909180, avg loss: 2.597454, ppl: 13.429498 +epoch: 2, batch: 27240, sum loss: 4047.535645, avg loss: 2.203340, ppl: 9.055208 +epoch: 2, batch: 27241, sum loss: 4229.466309, avg loss: 2.362830, ppl: 10.620968 +epoch: 2, batch: 27242, sum loss: 3123.796143, avg loss: 2.164793, ppl: 8.712796 +epoch: 2, batch: 27243, sum loss: 5630.887695, avg loss: 2.836719, ppl: 17.059706 +epoch: 2, batch: 27244, sum loss: 3658.759277, avg loss: 2.258493, ppl: 9.568663 +epoch: 2, batch: 27245, sum loss: 4334.426758, avg loss: 2.528837, ppl: 12.538918 +epoch: 2, batch: 27246, sum loss: 4347.392090, avg loss: 2.517309, ppl: 12.395193 +epoch: 2, batch: 27247, sum loss: 5126.055176, avg loss: 2.586304, ppl: 13.280599 +epoch: 2, batch: 27248, sum loss: 4673.143555, avg loss: 2.652181, ppl: 14.184948 +epoch: 2, batch: 27249, sum loss: 3521.592285, avg loss: 2.144697, ppl: 8.539453 +epoch: 2, batch: 27250, sum loss: 3883.801758, avg loss: 2.224400, ppl: 9.247931 +epoch: 2, batch: 27251, sum loss: 3907.129883, avg loss: 2.425282, ppl: 11.305423 +epoch: 2, batch: 27252, sum loss: 4112.533691, avg loss: 2.501541, ppl: 12.201283 +epoch: 2, batch: 27253, sum loss: 4725.904785, avg loss: 2.624045, ppl: 13.791396 +epoch: 2, batch: 27254, sum loss: 4727.293457, avg loss: 2.555294, ppl: 12.875082 +epoch: 2, batch: 27255, sum loss: 3969.750000, avg loss: 2.394300, ppl: 10.960528 +epoch: 2, batch: 27256, sum loss: 3533.188965, avg loss: 2.445113, ppl: 11.531857 +epoch: 2, batch: 27257, sum loss: 4137.109863, avg loss: 2.597056, ppl: 13.424158 +epoch: 2, batch: 27258, sum loss: 4503.335938, avg loss: 2.793633, ppl: 16.340273 +epoch: 2, batch: 27259, sum loss: 3407.526611, avg loss: 2.170399, ppl: 8.761781 +epoch: 2, batch: 27260, sum loss: 4749.147461, avg loss: 2.581058, ppl: 13.211112 +epoch: 2, batch: 27261, sum loss: 4655.022461, avg loss: 2.784105, ppl: 16.185318 +epoch: 2, batch: 27262, sum loss: 4089.979004, avg loss: 2.507651, ppl: 12.276061 +epoch: 2, batch: 27263, sum loss: 5028.892090, avg loss: 2.839578, ppl: 17.108540 +epoch: 2, batch: 27264, sum loss: 4359.671875, avg loss: 2.499812, ppl: 12.180200 +epoch: 2, batch: 27265, sum loss: 4060.464111, avg loss: 2.437253, ppl: 11.441570 +epoch: 2, batch: 27266, sum loss: 3588.447754, avg loss: 2.188078, ppl: 8.918056 +epoch: 2, batch: 27267, sum loss: 4154.419434, avg loss: 2.369891, ppl: 10.696231 +epoch: 2, batch: 27268, sum loss: 3953.237305, avg loss: 2.453903, ppl: 11.633664 +epoch: 2, batch: 27269, sum loss: 5077.842285, avg loss: 2.759697, ppl: 15.795052 +epoch: 2, batch: 27270, sum loss: 3954.974121, avg loss: 2.595127, ppl: 13.398293 +epoch: 2, batch: 27271, sum loss: 3602.196777, avg loss: 2.337571, ppl: 10.356050 +epoch: 2, batch: 27272, sum loss: 3898.411865, avg loss: 2.412384, ppl: 11.160531 +epoch: 2, batch: 27273, sum loss: 3932.996582, avg loss: 2.321722, ppl: 10.193208 +epoch: 2, batch: 27274, sum loss: 4003.627930, avg loss: 2.377451, ppl: 10.777401 +epoch: 2, batch: 27275, sum loss: 4263.729492, avg loss: 2.595088, ppl: 13.397763 +epoch: 2, batch: 27276, sum loss: 3474.846436, avg loss: 2.152941, ppl: 8.610142 +epoch: 2, batch: 27277, sum loss: 3792.766602, avg loss: 2.395936, ppl: 10.978467 +epoch: 2, batch: 27278, sum loss: 4738.764160, avg loss: 2.766354, ppl: 15.900553 +epoch: 2, batch: 27279, sum loss: 3973.922607, avg loss: 2.462158, ppl: 11.730095 +epoch: 2, batch: 27280, sum loss: 4944.238281, avg loss: 2.620158, ppl: 13.737893 +epoch: 2, batch: 27281, sum loss: 4135.687988, avg loss: 2.407269, ppl: 11.103596 +epoch: 2, batch: 27282, sum loss: 4176.185547, avg loss: 2.411193, ppl: 11.147248 +epoch: 2, batch: 27283, sum loss: 3728.053711, avg loss: 2.322775, ppl: 10.203950 +epoch: 2, batch: 27284, sum loss: 4684.107422, avg loss: 2.675104, ppl: 14.513865 +epoch: 2, batch: 27285, sum loss: 3992.505859, avg loss: 2.362429, ppl: 10.616713 +epoch: 2, batch: 27286, sum loss: 4213.550781, avg loss: 2.549032, ppl: 12.794718 +epoch: 2, batch: 27287, sum loss: 4469.978516, avg loss: 2.689518, ppl: 14.724573 +epoch: 2, batch: 27288, sum loss: 3767.844727, avg loss: 2.525365, ppl: 12.495457 +epoch: 2, batch: 27289, sum loss: 3738.386719, avg loss: 2.313358, ppl: 10.108312 +epoch: 2, batch: 27290, sum loss: 4833.676270, avg loss: 2.586236, ppl: 13.279699 +epoch: 2, batch: 27291, sum loss: 4838.728027, avg loss: 2.544021, ppl: 12.730761 +epoch: 2, batch: 27292, sum loss: 4647.713867, avg loss: 2.639247, ppl: 14.002655 +epoch: 2, batch: 27293, sum loss: 4594.411133, avg loss: 2.600120, ppl: 13.465348 +epoch: 2, batch: 27294, sum loss: 4578.527832, avg loss: 2.462898, ppl: 11.738784 +epoch: 2, batch: 27295, sum loss: 4435.427246, avg loss: 2.531637, ppl: 12.574069 +epoch: 2, batch: 27296, sum loss: 4973.275879, avg loss: 2.635546, ppl: 13.950931 +epoch: 2, batch: 27297, sum loss: 4324.012695, avg loss: 2.370621, ppl: 10.704037 +epoch: 2, batch: 27298, sum loss: 4768.278320, avg loss: 2.445271, ppl: 11.533672 +epoch: 2, batch: 27299, sum loss: 3964.623047, avg loss: 2.474796, ppl: 11.879281 +epoch: 2, batch: 27300, sum loss: 3675.811279, avg loss: 2.745191, ppl: 15.567593 +epoch: 2, batch: 27301, sum loss: 3495.738037, avg loss: 2.453149, ppl: 11.624900 +epoch: 2, batch: 27302, sum loss: 4204.242188, avg loss: 2.236299, ppl: 9.358631 +epoch: 2, batch: 27303, sum loss: 5007.105957, avg loss: 2.705082, ppl: 14.955539 +epoch: 2, batch: 27304, sum loss: 5368.854004, avg loss: 2.492504, ppl: 12.091516 +epoch: 2, batch: 27305, sum loss: 4230.558594, avg loss: 2.442586, ppl: 11.502745 +epoch: 2, batch: 27306, sum loss: 3583.057373, avg loss: 2.319131, ppl: 10.166837 +epoch: 2, batch: 27307, sum loss: 4585.175781, avg loss: 2.552993, ppl: 12.845497 +epoch: 2, batch: 27308, sum loss: 3305.202637, avg loss: 2.242336, ppl: 9.415296 +epoch: 2, batch: 27309, sum loss: 5123.209473, avg loss: 2.619228, ppl: 13.725119 +epoch: 2, batch: 27310, sum loss: 4235.125000, avg loss: 2.519408, ppl: 12.421241 +epoch: 2, batch: 27311, sum loss: 3925.328125, avg loss: 2.601278, ppl: 13.480953 +epoch: 2, batch: 27312, sum loss: 5163.889648, avg loss: 2.749675, ppl: 15.637549 +epoch: 2, batch: 27313, sum loss: 2877.833496, avg loss: 2.041017, ppl: 7.698433 +epoch: 2, batch: 27314, sum loss: 3483.900146, avg loss: 2.224713, ppl: 9.250824 +epoch: 2, batch: 27315, sum loss: 3987.447754, avg loss: 2.325042, ppl: 10.227112 +epoch: 2, batch: 27316, sum loss: 3615.667480, avg loss: 2.320711, ppl: 10.182911 +epoch: 2, batch: 27317, sum loss: 3884.327637, avg loss: 2.312100, ppl: 10.095602 +epoch: 2, batch: 27318, sum loss: 4093.024658, avg loss: 2.464193, ppl: 11.753994 +epoch: 2, batch: 27319, sum loss: 4395.828125, avg loss: 2.289494, ppl: 9.869943 +epoch: 2, batch: 27320, sum loss: 3923.982422, avg loss: 2.576482, ppl: 13.150796 +epoch: 2, batch: 27321, sum loss: 4491.708984, avg loss: 2.473408, ppl: 11.862806 +epoch: 2, batch: 27322, sum loss: 3695.360596, avg loss: 2.230151, ppl: 9.301272 +epoch: 2, batch: 27323, sum loss: 4511.764648, avg loss: 2.569342, ppl: 13.057232 +epoch: 2, batch: 27324, sum loss: 4817.521973, avg loss: 2.692858, ppl: 14.773832 +epoch: 2, batch: 27325, sum loss: 5156.212891, avg loss: 2.883788, ppl: 17.881880 +epoch: 2, batch: 27326, sum loss: 3891.296387, avg loss: 2.382913, ppl: 10.836419 +epoch: 2, batch: 27327, sum loss: 3324.625488, avg loss: 2.040900, ppl: 7.697530 +epoch: 2, batch: 27328, sum loss: 3717.858398, avg loss: 2.398618, ppl: 11.007957 +epoch: 2, batch: 27329, sum loss: 3429.532227, avg loss: 2.496021, ppl: 12.134110 +epoch: 2, batch: 27330, sum loss: 4884.148438, avg loss: 2.900326, ppl: 18.180067 +epoch: 2, batch: 27331, sum loss: 3938.090332, avg loss: 2.353909, ppl: 10.526643 +epoch: 2, batch: 27332, sum loss: 3860.487305, avg loss: 2.435639, ppl: 11.423112 +epoch: 2, batch: 27333, sum loss: 5156.605957, avg loss: 2.781341, ppl: 16.140648 +epoch: 2, batch: 27334, sum loss: 4719.859375, avg loss: 2.666587, ppl: 14.390775 +epoch: 2, batch: 27335, sum loss: 3517.239746, avg loss: 2.163124, ppl: 8.698270 +epoch: 2, batch: 27336, sum loss: 4706.705566, avg loss: 2.530487, ppl: 12.559619 +epoch: 2, batch: 27337, sum loss: 3710.450195, avg loss: 2.395384, ppl: 10.972414 +epoch: 2, batch: 27338, sum loss: 4322.787109, avg loss: 2.468754, ppl: 11.807720 +epoch: 2, batch: 27339, sum loss: 3899.779541, avg loss: 2.601588, ppl: 13.485129 +epoch: 2, batch: 27340, sum loss: 3919.688965, avg loss: 2.248817, ppl: 9.476522 +epoch: 2, batch: 27341, sum loss: 3427.393066, avg loss: 2.311121, ppl: 10.085729 +epoch: 2, batch: 27342, sum loss: 4085.324951, avg loss: 2.677146, ppl: 14.543530 +epoch: 2, batch: 27343, sum loss: 4533.312500, avg loss: 2.507363, ppl: 12.272526 +epoch: 2, batch: 27344, sum loss: 4303.845703, avg loss: 2.545148, ppl: 12.745119 +epoch: 2, batch: 27345, sum loss: 3844.843262, avg loss: 2.268344, ppl: 9.663386 +epoch: 2, batch: 27346, sum loss: 3611.951172, avg loss: 2.373161, ppl: 10.731263 +epoch: 2, batch: 27347, sum loss: 4694.450195, avg loss: 2.561075, ppl: 12.949731 +epoch: 2, batch: 27348, sum loss: 4340.925781, avg loss: 2.275118, ppl: 9.729071 +epoch: 2, batch: 27349, sum loss: 3342.914795, avg loss: 2.225642, ppl: 9.259427 +epoch: 2, batch: 27350, sum loss: 4129.242188, avg loss: 2.504089, ppl: 12.232406 +epoch: 2, batch: 27351, sum loss: 4512.660645, avg loss: 2.559649, ppl: 12.931274 +epoch: 2, batch: 27352, sum loss: 4575.528809, avg loss: 2.674184, ppl: 14.500514 +epoch: 2, batch: 27353, sum loss: 4024.301270, avg loss: 2.395417, ppl: 10.972777 +epoch: 2, batch: 27354, sum loss: 4348.591309, avg loss: 2.419917, ppl: 11.244930 +epoch: 2, batch: 27355, sum loss: 3239.323486, avg loss: 2.140994, ppl: 8.507889 +epoch: 2, batch: 27356, sum loss: 4219.843750, avg loss: 2.442039, ppl: 11.496461 +epoch: 2, batch: 27357, sum loss: 4882.524902, avg loss: 2.723104, ppl: 15.227511 +epoch: 2, batch: 27358, sum loss: 3667.212402, avg loss: 2.241572, ppl: 9.408113 +epoch: 2, batch: 27359, sum loss: 3736.403564, avg loss: 2.102647, ppl: 8.187813 +epoch: 2, batch: 27360, sum loss: 4275.050781, avg loss: 2.724698, ppl: 15.251804 +epoch: 2, batch: 27361, sum loss: 3835.352295, avg loss: 2.526582, ppl: 12.510677 +epoch: 2, batch: 27362, sum loss: 3700.040771, avg loss: 2.380979, ppl: 10.815481 +epoch: 2, batch: 27363, sum loss: 4481.590820, avg loss: 2.634680, ppl: 13.938851 +epoch: 2, batch: 27364, sum loss: 3941.635498, avg loss: 2.472795, ppl: 11.855539 +epoch: 2, batch: 27365, sum loss: 4855.369629, avg loss: 2.710982, ppl: 15.044046 +epoch: 2, batch: 27366, sum loss: 4378.052246, avg loss: 2.456820, ppl: 11.667644 +epoch: 2, batch: 27367, sum loss: 4452.356445, avg loss: 2.454441, ppl: 11.639929 +epoch: 2, batch: 27368, sum loss: 3935.920410, avg loss: 2.426585, ppl: 11.320154 +epoch: 2, batch: 27369, sum loss: 3588.520508, avg loss: 2.264051, ppl: 9.621986 +epoch: 2, batch: 27370, sum loss: 4620.947266, avg loss: 2.518228, ppl: 12.406588 +epoch: 2, batch: 27371, sum loss: 4108.861816, avg loss: 2.464824, ppl: 11.761412 +epoch: 2, batch: 27372, sum loss: 5068.064941, avg loss: 2.682935, ppl: 14.627971 +epoch: 2, batch: 27373, sum loss: 4344.535645, avg loss: 2.269872, ppl: 9.678164 +epoch: 2, batch: 27374, sum loss: 4334.529297, avg loss: 2.524478, ppl: 12.484383 +epoch: 2, batch: 27375, sum loss: 3986.598877, avg loss: 2.384330, ppl: 10.851785 +epoch: 2, batch: 27376, sum loss: 5244.025391, avg loss: 2.744126, ppl: 15.551022 +epoch: 2, batch: 27377, sum loss: 3994.711426, avg loss: 2.352598, ppl: 10.512849 +epoch: 2, batch: 27378, sum loss: 4726.081055, avg loss: 2.813143, ppl: 16.662214 +epoch: 2, batch: 27379, sum loss: 4681.363770, avg loss: 2.486120, ppl: 12.014566 +epoch: 2, batch: 27380, sum loss: 4250.713379, avg loss: 2.518195, ppl: 12.406185 +epoch: 2, batch: 27381, sum loss: 4509.878418, avg loss: 2.497164, ppl: 12.147996 +epoch: 2, batch: 27382, sum loss: 3991.551025, avg loss: 2.308589, ppl: 10.060225 +epoch: 2, batch: 27383, sum loss: 3965.183594, avg loss: 2.519176, ppl: 12.418363 +epoch: 2, batch: 27384, sum loss: 4074.537109, avg loss: 2.304603, ppl: 10.020196 +epoch: 2, batch: 27385, sum loss: 4608.658203, avg loss: 2.404099, ppl: 11.068459 +epoch: 2, batch: 27386, sum loss: 3633.342529, avg loss: 2.272259, ppl: 9.701294 +epoch: 2, batch: 27387, sum loss: 3657.520508, avg loss: 2.253555, ppl: 9.521527 +epoch: 2, batch: 27388, sum loss: 4176.203125, avg loss: 2.460933, ppl: 11.715737 +epoch: 2, batch: 27389, sum loss: 4847.271484, avg loss: 2.600467, ppl: 13.470033 +epoch: 2, batch: 27390, sum loss: 4003.987305, avg loss: 2.444437, ppl: 11.524055 +epoch: 2, batch: 27391, sum loss: 4847.143555, avg loss: 2.700359, ppl: 14.885069 +epoch: 2, batch: 27392, sum loss: 3455.005615, avg loss: 2.122239, ppl: 8.349814 +epoch: 2, batch: 27393, sum loss: 3686.122803, avg loss: 2.175987, ppl: 8.810882 +epoch: 2, batch: 27394, sum loss: 4592.576660, avg loss: 2.551432, ppl: 12.825452 +epoch: 2, batch: 27395, sum loss: 3812.529541, avg loss: 2.253268, ppl: 9.518792 +epoch: 2, batch: 27396, sum loss: 4090.713135, avg loss: 2.517362, ppl: 12.395852 +epoch: 2, batch: 27397, sum loss: 4030.119141, avg loss: 2.295057, ppl: 9.924997 +epoch: 2, batch: 27398, sum loss: 4306.406738, avg loss: 2.595785, ppl: 13.407104 +epoch: 2, batch: 27399, sum loss: 4693.286621, avg loss: 2.462375, ppl: 11.732640 +epoch: 2, batch: 27400, sum loss: 3732.260498, avg loss: 2.357713, ppl: 10.566763 +epoch: 2, batch: 27401, sum loss: 4181.683594, avg loss: 2.371913, ppl: 10.717880 +epoch: 2, batch: 27402, sum loss: 3648.719482, avg loss: 2.306396, ppl: 10.038187 +epoch: 2, batch: 27403, sum loss: 3898.513184, avg loss: 2.375694, ppl: 10.758472 +epoch: 2, batch: 27404, sum loss: 4089.651855, avg loss: 2.296267, ppl: 9.937021 +epoch: 2, batch: 27405, sum loss: 3952.942383, avg loss: 2.469046, ppl: 11.811172 +epoch: 2, batch: 27406, sum loss: 3997.308594, avg loss: 2.161876, ppl: 8.687420 +epoch: 2, batch: 27407, sum loss: 3554.473389, avg loss: 2.375985, ppl: 10.761607 +epoch: 2, batch: 27408, sum loss: 4208.820312, avg loss: 2.356562, ppl: 10.554606 +epoch: 2, batch: 27409, sum loss: 4627.555664, avg loss: 2.626309, ppl: 13.822652 +epoch: 2, batch: 27410, sum loss: 3014.706055, avg loss: 2.351565, ppl: 10.501991 +epoch: 2, batch: 27411, sum loss: 3801.344727, avg loss: 2.552951, ppl: 12.844957 +epoch: 2, batch: 27412, sum loss: 3384.659668, avg loss: 2.106198, ppl: 8.216940 +epoch: 2, batch: 27413, sum loss: 4154.608398, avg loss: 2.271519, ppl: 9.694117 +epoch: 2, batch: 27414, sum loss: 3779.946777, avg loss: 2.478654, ppl: 11.925199 +epoch: 2, batch: 27415, sum loss: 4897.037598, avg loss: 2.369152, ppl: 10.688328 +epoch: 2, batch: 27416, sum loss: 3919.956055, avg loss: 2.227248, ppl: 9.274305 +epoch: 2, batch: 27417, sum loss: 3663.169922, avg loss: 2.268217, ppl: 9.662154 +epoch: 2, batch: 27418, sum loss: 5278.106934, avg loss: 2.789697, ppl: 16.276091 +epoch: 2, batch: 27419, sum loss: 5805.192871, avg loss: 2.818055, ppl: 16.744246 +epoch: 2, batch: 27420, sum loss: 4630.172852, avg loss: 2.667150, ppl: 14.398877 +epoch: 2, batch: 27421, sum loss: 4330.011230, avg loss: 2.617903, ppl: 13.706950 +epoch: 2, batch: 27422, sum loss: 4854.532227, avg loss: 2.622654, ppl: 13.772226 +epoch: 2, batch: 27423, sum loss: 3749.926758, avg loss: 2.152656, ppl: 8.607689 +epoch: 2, batch: 27424, sum loss: 4059.166016, avg loss: 2.461592, ppl: 11.723466 +epoch: 2, batch: 27425, sum loss: 3963.133301, avg loss: 2.309518, ppl: 10.069573 +epoch: 2, batch: 27426, sum loss: 3957.120605, avg loss: 2.378077, ppl: 10.784148 +epoch: 2, batch: 27427, sum loss: 4951.537109, avg loss: 2.478247, ppl: 11.920346 +epoch: 2, batch: 27428, sum loss: 5440.123047, avg loss: 2.714632, ppl: 15.099056 +epoch: 2, batch: 27429, sum loss: 3166.286133, avg loss: 2.250381, ppl: 9.491351 +epoch: 2, batch: 27430, sum loss: 3712.833008, avg loss: 2.357354, ppl: 10.562966 +epoch: 2, batch: 27431, sum loss: 4608.037598, avg loss: 2.474779, ppl: 11.879077 +epoch: 2, batch: 27432, sum loss: 4071.672607, avg loss: 2.420733, ppl: 11.254103 +epoch: 2, batch: 27433, sum loss: 5358.121094, avg loss: 2.907282, ppl: 18.306974 +epoch: 2, batch: 27434, sum loss: 3826.715820, avg loss: 2.396190, ppl: 10.981262 +epoch: 2, batch: 27435, sum loss: 5402.619141, avg loss: 2.787729, ppl: 16.244091 +epoch: 2, batch: 27436, sum loss: 4969.281738, avg loss: 2.648871, ppl: 14.138067 +epoch: 2, batch: 27437, sum loss: 4002.065430, avg loss: 2.536163, ppl: 12.631113 +epoch: 2, batch: 27438, sum loss: 3760.941895, avg loss: 2.386384, ppl: 10.874107 +epoch: 2, batch: 27439, sum loss: 3413.391113, avg loss: 2.380329, ppl: 10.808455 +epoch: 2, batch: 27440, sum loss: 4512.453125, avg loss: 2.526570, ppl: 12.510516 +epoch: 2, batch: 27441, sum loss: 4155.237305, avg loss: 2.410231, ppl: 11.136529 +epoch: 2, batch: 27442, sum loss: 5137.336426, avg loss: 2.850908, ppl: 17.303492 +epoch: 2, batch: 27443, sum loss: 4504.612793, avg loss: 2.319574, ppl: 10.171342 +epoch: 2, batch: 27444, sum loss: 4448.133301, avg loss: 2.531664, ppl: 12.574409 +epoch: 2, batch: 27445, sum loss: 3646.143066, avg loss: 2.447076, ppl: 11.554513 +epoch: 2, batch: 27446, sum loss: 4089.782227, avg loss: 2.325061, ppl: 10.227304 +epoch: 2, batch: 27447, sum loss: 3513.793945, avg loss: 2.481493, ppl: 11.959106 +epoch: 2, batch: 27448, sum loss: 4489.470215, avg loss: 2.483114, ppl: 11.978508 +epoch: 2, batch: 27449, sum loss: 3190.517578, avg loss: 2.163063, ppl: 8.697737 +epoch: 2, batch: 27450, sum loss: 3889.844971, avg loss: 2.455710, ppl: 11.654708 +epoch: 2, batch: 27451, sum loss: 3707.215576, avg loss: 2.362789, ppl: 10.620527 +epoch: 2, batch: 27452, sum loss: 3972.968994, avg loss: 2.446409, ppl: 11.546813 +epoch: 2, batch: 27453, sum loss: 5066.133789, avg loss: 2.622222, ppl: 13.766284 +epoch: 2, batch: 27454, sum loss: 4227.931641, avg loss: 2.665783, ppl: 14.379203 +epoch: 2, batch: 27455, sum loss: 4610.853027, avg loss: 2.764300, ppl: 15.867934 +epoch: 2, batch: 27456, sum loss: 3898.235596, avg loss: 2.508517, ppl: 12.286695 +epoch: 2, batch: 27457, sum loss: 3740.879395, avg loss: 2.355718, ppl: 10.545694 +epoch: 2, batch: 27458, sum loss: 3629.646973, avg loss: 2.251642, ppl: 9.503327 +epoch: 2, batch: 27459, sum loss: 4931.032227, avg loss: 2.762483, ppl: 15.839125 +epoch: 2, batch: 27460, sum loss: 4251.768066, avg loss: 2.359472, ppl: 10.585359 +epoch: 2, batch: 27461, sum loss: 4367.995117, avg loss: 2.483227, ppl: 11.979856 +epoch: 2, batch: 27462, sum loss: 4171.986816, avg loss: 2.588081, ppl: 13.304218 +epoch: 2, batch: 27463, sum loss: 4087.747803, avg loss: 2.342549, ppl: 10.407730 +epoch: 2, batch: 27464, sum loss: 3828.810791, avg loss: 2.147398, ppl: 8.562551 +epoch: 2, batch: 27465, sum loss: 4863.928711, avg loss: 2.793756, ppl: 16.342283 +epoch: 2, batch: 27466, sum loss: 3876.626465, avg loss: 2.496218, ppl: 12.136506 +epoch: 2, batch: 27467, sum loss: 4239.715820, avg loss: 2.296704, ppl: 9.941362 +epoch: 2, batch: 27468, sum loss: 4400.536621, avg loss: 2.423203, ppl: 11.281938 +epoch: 2, batch: 27469, sum loss: 4033.693115, avg loss: 2.391045, ppl: 10.924906 +epoch: 2, batch: 27470, sum loss: 4058.901123, avg loss: 2.452508, ppl: 11.617449 +epoch: 2, batch: 27471, sum loss: 4624.684570, avg loss: 2.510687, ppl: 12.313382 +epoch: 2, batch: 27472, sum loss: 3914.530762, avg loss: 2.618415, ppl: 13.713971 +epoch: 2, batch: 27473, sum loss: 3978.152588, avg loss: 2.475515, ppl: 11.887829 +epoch: 2, batch: 27474, sum loss: 5433.126953, avg loss: 2.693667, ppl: 14.785802 +epoch: 2, batch: 27475, sum loss: 3268.660645, avg loss: 2.031486, ppl: 7.625408 +epoch: 2, batch: 27476, sum loss: 4419.395996, avg loss: 2.390155, ppl: 10.915181 +epoch: 2, batch: 27477, sum loss: 3840.645996, avg loss: 2.284739, ppl: 9.823120 +epoch: 2, batch: 27478, sum loss: 4424.303711, avg loss: 2.668458, ppl: 14.417723 +epoch: 2, batch: 27479, sum loss: 4668.764160, avg loss: 2.622901, ppl: 13.775632 +epoch: 2, batch: 27480, sum loss: 4114.196777, avg loss: 2.364481, ppl: 10.638515 +epoch: 2, batch: 27481, sum loss: 5483.689941, avg loss: 2.720084, ppl: 15.181604 +epoch: 2, batch: 27482, sum loss: 4056.568359, avg loss: 2.291847, ppl: 9.893188 +epoch: 2, batch: 27483, sum loss: 3680.778564, avg loss: 2.413625, ppl: 11.174397 +epoch: 2, batch: 27484, sum loss: 3616.614502, avg loss: 2.414295, ppl: 11.181887 +epoch: 2, batch: 27485, sum loss: 3434.848145, avg loss: 2.096977, ppl: 8.141519 +epoch: 2, batch: 27486, sum loss: 3715.135986, avg loss: 2.428194, ppl: 11.338382 +epoch: 2, batch: 27487, sum loss: 4723.489258, avg loss: 2.478221, ppl: 11.920039 +epoch: 2, batch: 27488, sum loss: 3851.003906, avg loss: 2.297735, ppl: 9.951618 +epoch: 2, batch: 27489, sum loss: 4513.839844, avg loss: 2.558866, ppl: 12.921160 +epoch: 2, batch: 27490, sum loss: 4010.882324, avg loss: 2.384591, ppl: 10.854624 +epoch: 2, batch: 27491, sum loss: 4269.233398, avg loss: 2.402495, ppl: 11.050713 +epoch: 2, batch: 27492, sum loss: 4197.977051, avg loss: 2.558182, ppl: 12.912325 +epoch: 2, batch: 27493, sum loss: 5014.605957, avg loss: 2.685916, ppl: 14.671640 +epoch: 2, batch: 27494, sum loss: 4259.047852, avg loss: 2.576556, ppl: 13.151772 +epoch: 2, batch: 27495, sum loss: 3882.983887, avg loss: 2.333524, ppl: 10.314225 +epoch: 2, batch: 27496, sum loss: 4067.922363, avg loss: 2.360953, ppl: 10.601053 +epoch: 2, batch: 27497, sum loss: 4581.499023, avg loss: 2.531215, ppl: 12.568770 +epoch: 2, batch: 27498, sum loss: 3816.250000, avg loss: 2.368870, ppl: 10.685314 +epoch: 2, batch: 27499, sum loss: 3818.250000, avg loss: 2.487459, ppl: 12.030669 +epoch: 2, batch: 27500, sum loss: 5274.712891, avg loss: 2.672093, ppl: 14.470229 +epoch: 2, batch: 27501, sum loss: 4737.295898, avg loss: 2.696241, ppl: 14.823906 +epoch: 2, batch: 27502, sum loss: 2925.389893, avg loss: 2.103084, ppl: 8.191394 +epoch: 2, batch: 27503, sum loss: 4483.968262, avg loss: 2.444912, ppl: 11.529531 +epoch: 2, batch: 27504, sum loss: 3686.157471, avg loss: 2.251776, ppl: 9.504603 +epoch: 2, batch: 27505, sum loss: 5394.435059, avg loss: 3.000242, ppl: 20.090393 +epoch: 2, batch: 27506, sum loss: 3780.617188, avg loss: 2.467766, ppl: 11.796062 +epoch: 2, batch: 27507, sum loss: 3946.093506, avg loss: 2.579146, ppl: 13.185875 +epoch: 2, batch: 27508, sum loss: 4066.943848, avg loss: 2.472306, ppl: 11.849744 +epoch: 2, batch: 27509, sum loss: 4588.283203, avg loss: 2.457570, ppl: 11.676404 +epoch: 2, batch: 27510, sum loss: 4581.760254, avg loss: 2.473953, ppl: 11.869267 +epoch: 2, batch: 27511, sum loss: 3604.578613, avg loss: 2.177993, ppl: 8.828572 +epoch: 2, batch: 27512, sum loss: 3314.655762, avg loss: 2.203893, ppl: 9.060220 +epoch: 2, batch: 27513, sum loss: 4065.853271, avg loss: 2.584776, ppl: 13.260324 +epoch: 2, batch: 27514, sum loss: 4856.446289, avg loss: 2.693537, ppl: 14.783867 +epoch: 2, batch: 27515, sum loss: 3739.474854, avg loss: 2.231190, ppl: 9.310944 +epoch: 2, batch: 27516, sum loss: 4223.866211, avg loss: 2.671642, ppl: 14.463700 +epoch: 2, batch: 27517, sum loss: 4310.148438, avg loss: 2.477097, ppl: 11.906647 +epoch: 2, batch: 27518, sum loss: 4068.668945, avg loss: 2.328946, ppl: 10.267116 +epoch: 2, batch: 27519, sum loss: 4787.077637, avg loss: 2.699987, ppl: 14.879544 +epoch: 2, batch: 27520, sum loss: 4585.779297, avg loss: 2.574834, ppl: 13.129139 +epoch: 2, batch: 27521, sum loss: 4241.943359, avg loss: 2.561560, ppl: 12.956012 +epoch: 2, batch: 27522, sum loss: 3349.550049, avg loss: 2.370524, ppl: 10.702996 +epoch: 2, batch: 27523, sum loss: 4004.265137, avg loss: 2.269992, ppl: 9.679320 +epoch: 2, batch: 27524, sum loss: 4936.034180, avg loss: 2.589735, ppl: 13.326234 +epoch: 2, batch: 27525, sum loss: 3543.335938, avg loss: 2.491797, ppl: 12.082975 +epoch: 2, batch: 27526, sum loss: 3101.669922, avg loss: 2.147971, ppl: 8.567456 +epoch: 2, batch: 27527, sum loss: 3701.633301, avg loss: 2.277928, ppl: 9.756445 +epoch: 2, batch: 27528, sum loss: 4378.258301, avg loss: 2.643876, ppl: 14.067622 +epoch: 2, batch: 27529, sum loss: 4153.497070, avg loss: 2.548158, ppl: 12.783531 +epoch: 2, batch: 27530, sum loss: 3928.618164, avg loss: 2.494361, ppl: 12.113986 +epoch: 2, batch: 27531, sum loss: 3995.548096, avg loss: 2.569484, ppl: 13.059091 +epoch: 2, batch: 27532, sum loss: 3403.040771, avg loss: 2.273240, ppl: 9.710816 +epoch: 2, batch: 27533, sum loss: 3750.774414, avg loss: 2.241945, ppl: 9.411622 +epoch: 2, batch: 27534, sum loss: 5426.790039, avg loss: 2.480251, ppl: 11.944265 +epoch: 2, batch: 27535, sum loss: 3814.318848, avg loss: 2.285392, ppl: 9.829536 +epoch: 2, batch: 27536, sum loss: 4608.331543, avg loss: 2.488300, ppl: 12.040788 +epoch: 2, batch: 27537, sum loss: 4328.908691, avg loss: 2.483597, ppl: 11.984289 +epoch: 2, batch: 27538, sum loss: 4454.107422, avg loss: 2.776875, ppl: 16.068727 +epoch: 2, batch: 27539, sum loss: 3785.736572, avg loss: 2.348472, ppl: 10.469559 +epoch: 2, batch: 27540, sum loss: 3649.955566, avg loss: 2.251669, ppl: 9.503583 +epoch: 2, batch: 27541, sum loss: 3531.065918, avg loss: 2.440267, ppl: 11.476100 +epoch: 2, batch: 27542, sum loss: 3639.682373, avg loss: 2.313848, ppl: 10.113261 +epoch: 2, batch: 27543, sum loss: 3529.960449, avg loss: 2.295163, ppl: 9.926053 +epoch: 2, batch: 27544, sum loss: 4364.641602, avg loss: 2.591830, ppl: 13.354188 +epoch: 2, batch: 27545, sum loss: 4253.329590, avg loss: 2.447255, ppl: 11.556581 +epoch: 2, batch: 27546, sum loss: 4643.507324, avg loss: 2.468637, ppl: 11.806350 +epoch: 2, batch: 27547, sum loss: 5366.063477, avg loss: 2.723890, ppl: 15.239490 +epoch: 2, batch: 27548, sum loss: 4175.174316, avg loss: 2.516681, ppl: 12.387417 +epoch: 2, batch: 27549, sum loss: 4071.394531, avg loss: 2.334515, ppl: 10.324452 +epoch: 2, batch: 27550, sum loss: 4456.171875, avg loss: 2.513351, ppl: 12.346238 +epoch: 2, batch: 27551, sum loss: 4136.509277, avg loss: 2.499402, ppl: 12.175206 +epoch: 2, batch: 27552, sum loss: 4351.433105, avg loss: 2.619767, ppl: 13.732523 +epoch: 2, batch: 27553, sum loss: 3626.085693, avg loss: 2.293539, ppl: 9.909950 +epoch: 2, batch: 27554, sum loss: 4178.574219, avg loss: 2.523293, ppl: 12.469598 +epoch: 2, batch: 27555, sum loss: 4171.082031, avg loss: 2.430701, ppl: 11.366842 +epoch: 2, batch: 27556, sum loss: 5040.805664, avg loss: 2.540729, ppl: 12.688912 +epoch: 2, batch: 27557, sum loss: 3967.350586, avg loss: 2.544805, ppl: 12.740741 +epoch: 2, batch: 27558, sum loss: 5081.185547, avg loss: 2.796470, ppl: 16.386694 +epoch: 2, batch: 27559, sum loss: 4841.940918, avg loss: 2.729392, ppl: 15.323561 +epoch: 2, batch: 27560, sum loss: 3868.767822, avg loss: 2.492763, ppl: 12.094646 +epoch: 2, batch: 27561, sum loss: 4149.248047, avg loss: 2.458086, ppl: 11.682425 +epoch: 2, batch: 27562, sum loss: 4253.356934, avg loss: 2.405745, ppl: 11.086687 +epoch: 2, batch: 27563, sum loss: 4763.248047, avg loss: 2.503021, ppl: 12.219347 +epoch: 2, batch: 27564, sum loss: 4018.696289, avg loss: 2.521139, ppl: 12.442765 +epoch: 2, batch: 27565, sum loss: 4470.437988, avg loss: 2.452243, ppl: 11.614364 +epoch: 2, batch: 27566, sum loss: 3459.336182, avg loss: 2.120991, ppl: 8.339397 +epoch: 2, batch: 27567, sum loss: 3508.429932, avg loss: 2.370561, ppl: 10.703394 +epoch: 2, batch: 27568, sum loss: 4215.926270, avg loss: 2.576972, ppl: 13.157241 +epoch: 2, batch: 27569, sum loss: 4878.956055, avg loss: 2.505884, ppl: 12.254389 +epoch: 2, batch: 27570, sum loss: 3893.687012, avg loss: 2.442714, ppl: 11.504226 +epoch: 2, batch: 27571, sum loss: 3409.557861, avg loss: 2.211127, ppl: 9.125996 +epoch: 2, batch: 27572, sum loss: 3737.527344, avg loss: 2.373033, ppl: 10.729887 +epoch: 2, batch: 27573, sum loss: 3956.910156, avg loss: 2.239338, ppl: 9.387115 +epoch: 2, batch: 27574, sum loss: 4603.987793, avg loss: 2.641416, ppl: 14.033058 +epoch: 2, batch: 27575, sum loss: 4519.222656, avg loss: 2.516271, ppl: 12.382339 +epoch: 2, batch: 27576, sum loss: 4612.881836, avg loss: 2.363157, ppl: 10.624437 +epoch: 2, batch: 27577, sum loss: 4694.686035, avg loss: 2.515909, ppl: 12.377851 +epoch: 2, batch: 27578, sum loss: 4869.905762, avg loss: 2.557724, ppl: 12.906405 +epoch: 2, batch: 27579, sum loss: 3420.356934, avg loss: 2.239919, ppl: 9.392574 +epoch: 2, batch: 27580, sum loss: 4237.779785, avg loss: 2.628896, ppl: 13.858455 +epoch: 2, batch: 27581, sum loss: 4271.718750, avg loss: 2.447976, ppl: 11.564919 +epoch: 2, batch: 27582, sum loss: 4580.399902, avg loss: 2.608428, ppl: 13.577693 +epoch: 2, batch: 27583, sum loss: 3981.009521, avg loss: 2.425966, ppl: 11.313148 +epoch: 2, batch: 27584, sum loss: 5000.569336, avg loss: 2.641611, ppl: 14.035795 +epoch: 2, batch: 27585, sum loss: 3289.813721, avg loss: 2.026995, ppl: 7.591244 +epoch: 2, batch: 27586, sum loss: 6186.195312, avg loss: 2.684981, ppl: 14.657917 +epoch: 2, batch: 27587, sum loss: 5017.352539, avg loss: 2.900204, ppl: 18.177849 +epoch: 2, batch: 27588, sum loss: 4097.080078, avg loss: 2.402980, ppl: 11.056070 +epoch: 2, batch: 27589, sum loss: 4480.657227, avg loss: 2.678217, ppl: 14.559114 +epoch: 2, batch: 27590, sum loss: 4484.461914, avg loss: 2.693371, ppl: 14.781414 +epoch: 2, batch: 27591, sum loss: 3092.524414, avg loss: 2.060309, ppl: 7.848398 +epoch: 2, batch: 27592, sum loss: 4124.059082, avg loss: 2.577537, ppl: 13.164671 +epoch: 2, batch: 27593, sum loss: 4015.890869, avg loss: 2.393260, ppl: 10.949133 +epoch: 2, batch: 27594, sum loss: 3788.138916, avg loss: 2.379484, ppl: 10.799331 +epoch: 2, batch: 27595, sum loss: 3637.245850, avg loss: 2.333064, ppl: 10.309478 +epoch: 2, batch: 27596, sum loss: 3740.883545, avg loss: 2.324974, ppl: 10.226418 +epoch: 2, batch: 27597, sum loss: 4188.887207, avg loss: 2.580953, ppl: 13.209725 +epoch: 2, batch: 27598, sum loss: 5091.304688, avg loss: 2.765511, ppl: 15.887149 +epoch: 2, batch: 27599, sum loss: 4781.676758, avg loss: 2.743360, ppl: 15.539109 +epoch: 2, batch: 27600, sum loss: 3990.721191, avg loss: 2.469506, ppl: 11.816603 +epoch: 2, batch: 27601, sum loss: 3905.978271, avg loss: 2.336112, ppl: 10.340948 +epoch: 2, batch: 27602, sum loss: 4020.166992, avg loss: 2.473949, ppl: 11.869226 +epoch: 2, batch: 27603, sum loss: 4325.813477, avg loss: 2.531196, ppl: 12.568524 +epoch: 2, batch: 27604, sum loss: 4557.032227, avg loss: 2.655613, ppl: 14.233711 +epoch: 2, batch: 27605, sum loss: 4908.190918, avg loss: 2.621897, ppl: 13.761805 +epoch: 2, batch: 27606, sum loss: 4230.009277, avg loss: 2.456451, ppl: 11.663350 +epoch: 2, batch: 27607, sum loss: 4417.033691, avg loss: 2.578537, ppl: 13.177841 +epoch: 2, batch: 27608, sum loss: 4772.287109, avg loss: 2.550661, ppl: 12.815577 +epoch: 2, batch: 27609, sum loss: 4358.815430, avg loss: 2.613199, ppl: 13.642620 +epoch: 2, batch: 27610, sum loss: 3814.103516, avg loss: 2.437127, ppl: 11.440125 +epoch: 2, batch: 27611, sum loss: 3576.914551, avg loss: 2.253885, ppl: 9.524663 +epoch: 2, batch: 27612, sum loss: 4270.779785, avg loss: 2.421077, ppl: 11.257977 +epoch: 2, batch: 27613, sum loss: 3731.893311, avg loss: 2.213460, ppl: 9.147311 +epoch: 2, batch: 27614, sum loss: 4638.641602, avg loss: 2.605978, ppl: 13.544472 +epoch: 2, batch: 27615, sum loss: 3888.192627, avg loss: 2.416528, ppl: 11.206876 +epoch: 2, batch: 27616, sum loss: 3935.487305, avg loss: 2.363656, ppl: 10.629744 +epoch: 2, batch: 27617, sum loss: 3664.839355, avg loss: 2.337270, ppl: 10.352935 +epoch: 2, batch: 27618, sum loss: 3928.141602, avg loss: 2.405476, ppl: 11.083700 +epoch: 2, batch: 27619, sum loss: 4673.110352, avg loss: 2.577557, ppl: 13.164935 +epoch: 2, batch: 27620, sum loss: 4820.771484, avg loss: 2.711345, ppl: 15.049506 +epoch: 2, batch: 27621, sum loss: 2930.325439, avg loss: 2.103608, ppl: 8.195683 +epoch: 2, batch: 27622, sum loss: 4881.061523, avg loss: 2.495430, ppl: 12.126950 +epoch: 2, batch: 27623, sum loss: 4225.375488, avg loss: 2.481137, ppl: 11.954844 +epoch: 2, batch: 27624, sum loss: 3881.223877, avg loss: 2.310252, ppl: 10.076968 +epoch: 2, batch: 27625, sum loss: 4196.968262, avg loss: 2.554454, ppl: 12.864275 +epoch: 2, batch: 27626, sum loss: 3977.620605, avg loss: 2.450783, ppl: 11.597422 +epoch: 2, batch: 27627, sum loss: 3425.544434, avg loss: 2.282175, ppl: 9.797966 +epoch: 2, batch: 27628, sum loss: 5072.175293, avg loss: 2.676610, ppl: 14.535730 +epoch: 2, batch: 27629, sum loss: 3751.767334, avg loss: 2.389661, ppl: 10.909793 +epoch: 2, batch: 27630, sum loss: 3003.325195, avg loss: 2.253057, ppl: 9.516787 +epoch: 2, batch: 27631, sum loss: 3753.712158, avg loss: 2.418629, ppl: 11.230453 +epoch: 2, batch: 27632, sum loss: 4829.609375, avg loss: 2.612011, ppl: 13.626432 +epoch: 2, batch: 27633, sum loss: 3982.640137, avg loss: 2.544818, ppl: 12.740908 +epoch: 2, batch: 27634, sum loss: 4368.112305, avg loss: 2.580102, ppl: 13.198483 +epoch: 2, batch: 27635, sum loss: 4578.284668, avg loss: 2.440450, ppl: 11.478207 +epoch: 2, batch: 27636, sum loss: 3393.379395, avg loss: 2.166909, ppl: 8.731254 +epoch: 2, batch: 27637, sum loss: 3589.865967, avg loss: 2.301196, ppl: 9.986119 +epoch: 2, batch: 27638, sum loss: 3854.753174, avg loss: 2.370697, ppl: 10.704848 +epoch: 2, batch: 27639, sum loss: 3432.576660, avg loss: 2.320877, ppl: 10.184598 +epoch: 2, batch: 27640, sum loss: 4708.110352, avg loss: 2.349356, ppl: 10.478824 +epoch: 2, batch: 27641, sum loss: 3779.642578, avg loss: 2.164744, ppl: 8.712371 +epoch: 2, batch: 27642, sum loss: 4200.932129, avg loss: 2.236918, ppl: 9.364426 +epoch: 2, batch: 27643, sum loss: 3792.080078, avg loss: 2.506332, ppl: 12.259877 +epoch: 2, batch: 27644, sum loss: 4184.195312, avg loss: 2.570144, ppl: 13.067712 +epoch: 2, batch: 27645, sum loss: 4515.727051, avg loss: 2.735147, ppl: 15.412005 +epoch: 2, batch: 27646, sum loss: 3608.716797, avg loss: 2.378851, ppl: 10.792495 +epoch: 2, batch: 27647, sum loss: 3760.756836, avg loss: 2.435724, ppl: 11.424082 +epoch: 2, batch: 27648, sum loss: 3538.068359, avg loss: 2.523587, ppl: 12.473255 +epoch: 2, batch: 27649, sum loss: 4832.164062, avg loss: 2.499826, ppl: 12.180380 +epoch: 2, batch: 27650, sum loss: 3714.738525, avg loss: 2.420025, ppl: 11.246141 +epoch: 2, batch: 27651, sum loss: 4604.368164, avg loss: 2.628064, ppl: 13.846938 +epoch: 2, batch: 27652, sum loss: 3942.402344, avg loss: 2.660191, ppl: 14.299014 +epoch: 2, batch: 27653, sum loss: 4621.858398, avg loss: 2.833757, ppl: 17.009251 +epoch: 2, batch: 27654, sum loss: 4478.856934, avg loss: 2.497968, ppl: 12.157766 +epoch: 2, batch: 27655, sum loss: 4498.904785, avg loss: 2.269881, ppl: 9.678251 +epoch: 2, batch: 27656, sum loss: 4083.010254, avg loss: 2.504914, ppl: 12.242510 +epoch: 2, batch: 27657, sum loss: 4388.573730, avg loss: 2.616919, ppl: 13.693473 +epoch: 2, batch: 27658, sum loss: 4744.148438, avg loss: 2.634175, ppl: 13.931808 +epoch: 2, batch: 27659, sum loss: 3849.303223, avg loss: 2.300839, ppl: 9.982552 +epoch: 2, batch: 27660, sum loss: 3448.629883, avg loss: 2.220625, ppl: 9.213084 +epoch: 2, batch: 27661, sum loss: 3768.594727, avg loss: 2.507382, ppl: 12.272754 +epoch: 2, batch: 27662, sum loss: 4242.260742, avg loss: 2.714178, ppl: 15.092204 +epoch: 2, batch: 27663, sum loss: 3958.144775, avg loss: 2.273489, ppl: 9.713234 +epoch: 2, batch: 27664, sum loss: 4490.507812, avg loss: 2.542756, ppl: 12.714668 +epoch: 2, batch: 27665, sum loss: 3261.617676, avg loss: 2.106988, ppl: 8.223436 +epoch: 2, batch: 27666, sum loss: 3409.583496, avg loss: 2.145742, ppl: 8.548379 +epoch: 2, batch: 27667, sum loss: 3770.244385, avg loss: 2.389255, ppl: 10.905367 +epoch: 2, batch: 27668, sum loss: 3289.285400, avg loss: 2.324583, ppl: 10.222420 +epoch: 2, batch: 27669, sum loss: 4159.963867, avg loss: 2.506002, ppl: 12.255838 +epoch: 2, batch: 27670, sum loss: 4049.469238, avg loss: 2.455712, ppl: 11.654730 +epoch: 2, batch: 27671, sum loss: 4582.973633, avg loss: 2.457359, ppl: 11.673935 +epoch: 2, batch: 27672, sum loss: 4383.518066, avg loss: 2.417826, ppl: 11.221437 +epoch: 2, batch: 27673, sum loss: 3934.329590, avg loss: 2.387336, ppl: 10.884459 +epoch: 2, batch: 27674, sum loss: 5154.883301, avg loss: 2.615364, ppl: 13.672196 +epoch: 2, batch: 27675, sum loss: 3449.525879, avg loss: 2.260502, ppl: 9.587900 +epoch: 2, batch: 27676, sum loss: 4544.691406, avg loss: 2.487516, ppl: 12.031352 +epoch: 2, batch: 27677, sum loss: 3763.705078, avg loss: 2.292147, ppl: 9.896161 +epoch: 2, batch: 27678, sum loss: 3858.102051, avg loss: 2.479500, ppl: 11.935296 +epoch: 2, batch: 27679, sum loss: 5153.367188, avg loss: 2.547389, ppl: 12.773703 +epoch: 2, batch: 27680, sum loss: 4272.912109, avg loss: 2.461355, ppl: 11.720682 +epoch: 2, batch: 27681, sum loss: 4521.416992, avg loss: 2.594043, ppl: 13.383773 +epoch: 2, batch: 27682, sum loss: 4488.560547, avg loss: 2.573716, ppl: 13.114463 +epoch: 2, batch: 27683, sum loss: 4167.551270, avg loss: 2.631030, ppl: 13.888065 +epoch: 2, batch: 27684, sum loss: 3550.755859, avg loss: 2.261628, ppl: 9.598702 +epoch: 2, batch: 27685, sum loss: 4980.346680, avg loss: 2.563225, ppl: 12.977606 +epoch: 2, batch: 27686, sum loss: 4457.039062, avg loss: 2.570380, ppl: 13.070790 +epoch: 2, batch: 27687, sum loss: 4303.291016, avg loss: 2.507746, ppl: 12.277220 +epoch: 2, batch: 27688, sum loss: 4772.631836, avg loss: 2.466476, ppl: 11.780860 +epoch: 2, batch: 27689, sum loss: 3421.672363, avg loss: 2.285686, ppl: 9.832432 +epoch: 2, batch: 27690, sum loss: 3297.398438, avg loss: 2.350248, ppl: 10.488174 +epoch: 2, batch: 27691, sum loss: 3913.470459, avg loss: 2.392097, ppl: 10.936403 +epoch: 2, batch: 27692, sum loss: 4725.201172, avg loss: 2.703204, ppl: 14.927488 +epoch: 2, batch: 27693, sum loss: 3576.370605, avg loss: 2.149261, ppl: 8.578519 +epoch: 2, batch: 27694, sum loss: 3838.673096, avg loss: 2.451260, ppl: 11.602956 +epoch: 2, batch: 27695, sum loss: 4025.291016, avg loss: 2.475579, ppl: 11.888586 +epoch: 2, batch: 27696, sum loss: 3776.592529, avg loss: 2.417793, ppl: 11.221067 +epoch: 2, batch: 27697, sum loss: 3329.033203, avg loss: 2.147763, ppl: 8.565680 +epoch: 2, batch: 27698, sum loss: 4768.444824, avg loss: 2.675895, ppl: 14.525344 +epoch: 2, batch: 27699, sum loss: 3585.480957, avg loss: 2.425901, ppl: 11.312414 +epoch: 2, batch: 27700, sum loss: 4619.091309, avg loss: 2.577618, ppl: 13.165742 +epoch: 2, batch: 27701, sum loss: 4187.579590, avg loss: 2.441737, ppl: 11.492989 +epoch: 2, batch: 27702, sum loss: 3845.302979, avg loss: 2.159070, ppl: 8.663075 +epoch: 2, batch: 27703, sum loss: 4715.208984, avg loss: 2.504094, ppl: 12.232470 +epoch: 2, batch: 27704, sum loss: 3470.221924, avg loss: 2.260731, ppl: 9.590095 +epoch: 2, batch: 27705, sum loss: 4100.416016, avg loss: 2.416274, ppl: 11.204031 +epoch: 2, batch: 27706, sum loss: 4028.580811, avg loss: 2.315276, ppl: 10.127722 +epoch: 2, batch: 27707, sum loss: 4185.587891, avg loss: 2.535184, ppl: 12.618749 +epoch: 2, batch: 27708, sum loss: 3591.767822, avg loss: 2.366118, ppl: 10.655951 +epoch: 2, batch: 27709, sum loss: 4052.505127, avg loss: 2.385230, ppl: 10.861559 +epoch: 2, batch: 27710, sum loss: 3723.186768, avg loss: 2.350497, ppl: 10.490780 +epoch: 2, batch: 27711, sum loss: 3998.353027, avg loss: 2.332762, ppl: 10.306363 +epoch: 2, batch: 27712, sum loss: 4775.290039, avg loss: 2.586831, ppl: 13.287598 +epoch: 2, batch: 27713, sum loss: 3955.489746, avg loss: 2.472181, ppl: 11.848261 +epoch: 2, batch: 27714, sum loss: 3558.082031, avg loss: 2.333169, ppl: 10.310559 +epoch: 2, batch: 27715, sum loss: 3523.073730, avg loss: 2.367657, ppl: 10.672359 +epoch: 2, batch: 27716, sum loss: 4189.977051, avg loss: 2.290857, ppl: 9.883403 +epoch: 2, batch: 27717, sum loss: 3476.682129, avg loss: 2.353881, ppl: 10.526344 +epoch: 2, batch: 27718, sum loss: 4346.589355, avg loss: 2.472463, ppl: 11.851598 +epoch: 2, batch: 27719, sum loss: 3925.672607, avg loss: 2.476765, ppl: 11.902696 +epoch: 2, batch: 27720, sum loss: 3542.469727, avg loss: 2.297322, ppl: 9.947503 +epoch: 2, batch: 27721, sum loss: 4156.339844, avg loss: 2.348215, ppl: 10.466866 +epoch: 2, batch: 27722, sum loss: 4780.035645, avg loss: 2.590805, ppl: 13.340508 +epoch: 2, batch: 27723, sum loss: 4826.112793, avg loss: 2.815702, ppl: 16.704893 +epoch: 2, batch: 27724, sum loss: 4767.135742, avg loss: 2.639610, ppl: 14.007740 +epoch: 2, batch: 27725, sum loss: 3970.244873, avg loss: 2.368881, ppl: 10.685431 +epoch: 2, batch: 27726, sum loss: 3301.007324, avg loss: 2.194819, ppl: 8.978374 +epoch: 2, batch: 27727, sum loss: 3402.172607, avg loss: 2.303435, ppl: 10.008499 +epoch: 2, batch: 27728, sum loss: 4632.601562, avg loss: 2.505463, ppl: 12.249230 +epoch: 2, batch: 27729, sum loss: 4007.491699, avg loss: 2.590492, ppl: 13.336338 +epoch: 2, batch: 27730, sum loss: 4050.252441, avg loss: 2.385308, ppl: 10.862406 +epoch: 2, batch: 27731, sum loss: 4880.503906, avg loss: 2.816217, ppl: 16.713499 +epoch: 2, batch: 27732, sum loss: 4483.236328, avg loss: 2.606533, ppl: 13.551982 +epoch: 2, batch: 27733, sum loss: 3489.286377, avg loss: 2.399784, ppl: 11.020800 +epoch: 2, batch: 27734, sum loss: 4172.752930, avg loss: 2.627678, ppl: 13.841598 +epoch: 2, batch: 27735, sum loss: 4124.313477, avg loss: 2.324867, ppl: 10.225315 +epoch: 2, batch: 27736, sum loss: 4349.932617, avg loss: 2.576974, ppl: 13.157269 +epoch: 2, batch: 27737, sum loss: 4030.619141, avg loss: 2.321785, ppl: 10.193856 +epoch: 2, batch: 27738, sum loss: 4327.614746, avg loss: 2.421721, ppl: 11.265227 +epoch: 2, batch: 27739, sum loss: 5414.568359, avg loss: 2.685798, ppl: 14.669902 +epoch: 2, batch: 27740, sum loss: 3332.229736, avg loss: 2.121089, ppl: 8.340211 +epoch: 2, batch: 27741, sum loss: 3916.965332, avg loss: 2.278630, ppl: 9.763298 +epoch: 2, batch: 27742, sum loss: 3823.885254, avg loss: 2.415594, ppl: 11.196418 +epoch: 2, batch: 27743, sum loss: 4679.557617, avg loss: 2.571186, ppl: 13.081325 +epoch: 2, batch: 27744, sum loss: 4180.097168, avg loss: 2.357641, ppl: 10.565997 +epoch: 2, batch: 27745, sum loss: 4775.093750, avg loss: 2.661702, ppl: 14.320644 +epoch: 2, batch: 27746, sum loss: 4127.917969, avg loss: 2.381949, ppl: 10.825984 +epoch: 2, batch: 27747, sum loss: 3793.605469, avg loss: 2.497436, ppl: 12.151299 +epoch: 2, batch: 27748, sum loss: 4087.934326, avg loss: 2.627207, ppl: 13.835075 +epoch: 2, batch: 27749, sum loss: 4257.194336, avg loss: 2.682542, ppl: 14.622220 +epoch: 2, batch: 27750, sum loss: 3415.075684, avg loss: 2.324762, ppl: 10.224248 +epoch: 2, batch: 27751, sum loss: 5175.714844, avg loss: 2.853206, ppl: 17.343290 +epoch: 2, batch: 27752, sum loss: 3988.031982, avg loss: 2.458713, ppl: 11.689753 +epoch: 2, batch: 27753, sum loss: 3020.097168, avg loss: 2.186892, ppl: 8.907481 +epoch: 2, batch: 27754, sum loss: 4256.566406, avg loss: 2.424013, ppl: 11.291079 +epoch: 2, batch: 27755, sum loss: 4553.997070, avg loss: 2.476344, ppl: 11.897691 +epoch: 2, batch: 27756, sum loss: 4434.421875, avg loss: 2.431152, ppl: 11.371979 +epoch: 2, batch: 27757, sum loss: 3118.698486, avg loss: 2.072225, ppl: 7.942476 +epoch: 2, batch: 27758, sum loss: 3927.600830, avg loss: 2.441020, ppl: 11.484747 +epoch: 2, batch: 27759, sum loss: 4247.898926, avg loss: 2.557435, ppl: 12.902674 +epoch: 2, batch: 27760, sum loss: 4830.644531, avg loss: 2.514651, ppl: 12.362294 +epoch: 2, batch: 27761, sum loss: 4313.919434, avg loss: 2.612913, ppl: 13.638724 +epoch: 2, batch: 27762, sum loss: 4870.484375, avg loss: 2.574252, ppl: 13.121494 +epoch: 2, batch: 27763, sum loss: 4497.487793, avg loss: 2.562671, ppl: 12.970414 +epoch: 2, batch: 27764, sum loss: 3920.036621, avg loss: 2.673968, ppl: 14.497375 +epoch: 2, batch: 27765, sum loss: 4054.428467, avg loss: 2.457229, ppl: 11.672427 +epoch: 2, batch: 27766, sum loss: 4254.643555, avg loss: 2.619854, ppl: 13.733725 +epoch: 2, batch: 27767, sum loss: 3750.640869, avg loss: 2.258062, ppl: 9.564534 +epoch: 2, batch: 27768, sum loss: 4253.897461, avg loss: 2.364590, ppl: 10.639675 +epoch: 2, batch: 27769, sum loss: 4272.936523, avg loss: 2.469905, ppl: 11.821328 +epoch: 2, batch: 27770, sum loss: 4460.898926, avg loss: 2.408693, ppl: 11.119414 +epoch: 2, batch: 27771, sum loss: 5351.312500, avg loss: 2.724701, ppl: 15.251852 +epoch: 2, batch: 27772, sum loss: 3663.747070, avg loss: 2.157684, ppl: 8.651077 +epoch: 2, batch: 27773, sum loss: 4630.011230, avg loss: 2.751046, ppl: 15.659009 +epoch: 2, batch: 27774, sum loss: 4560.079102, avg loss: 2.513825, ppl: 12.352092 +epoch: 2, batch: 27775, sum loss: 4705.470703, avg loss: 2.611249, ppl: 13.616050 +epoch: 2, batch: 27776, sum loss: 4595.232422, avg loss: 2.676315, ppl: 14.531444 +epoch: 2, batch: 27777, sum loss: 4918.451660, avg loss: 2.530068, ppl: 12.554358 +epoch: 2, batch: 27778, sum loss: 3611.762451, avg loss: 2.388732, ppl: 10.899664 +epoch: 2, batch: 27779, sum loss: 3870.593262, avg loss: 2.451294, ppl: 11.603351 +epoch: 2, batch: 27780, sum loss: 3687.430420, avg loss: 2.206721, ppl: 9.085874 +epoch: 2, batch: 27781, sum loss: 4424.190918, avg loss: 2.506624, ppl: 12.263455 +epoch: 2, batch: 27782, sum loss: 4203.479980, avg loss: 2.541403, ppl: 12.697471 +epoch: 2, batch: 27783, sum loss: 3583.876709, avg loss: 2.295885, ppl: 9.933224 +epoch: 2, batch: 27784, sum loss: 3984.084473, avg loss: 2.471516, ppl: 11.840387 +epoch: 2, batch: 27785, sum loss: 4772.175781, avg loss: 2.603478, ppl: 13.510653 +epoch: 2, batch: 27786, sum loss: 4278.567383, avg loss: 2.454715, ppl: 11.643109 +epoch: 2, batch: 27787, sum loss: 4447.565430, avg loss: 2.593333, ppl: 13.374268 +epoch: 2, batch: 27788, sum loss: 3583.736816, avg loss: 2.613958, ppl: 13.652987 +epoch: 2, batch: 27789, sum loss: 3934.445068, avg loss: 2.350326, ppl: 10.488987 +epoch: 2, batch: 27790, sum loss: 3805.947754, avg loss: 2.445982, ppl: 11.541875 +epoch: 2, batch: 27791, sum loss: 4654.149902, avg loss: 2.627978, ppl: 13.845750 +epoch: 2, batch: 27792, sum loss: 3412.003906, avg loss: 2.514373, ppl: 12.358855 +epoch: 2, batch: 27793, sum loss: 4437.497070, avg loss: 2.591996, ppl: 13.356403 +epoch: 2, batch: 27794, sum loss: 4361.401367, avg loss: 2.556507, ppl: 12.890713 +epoch: 2, batch: 27795, sum loss: 4037.101318, avg loss: 2.558366, ppl: 12.914695 +epoch: 2, batch: 27796, sum loss: 3713.989258, avg loss: 2.302535, ppl: 9.999502 +epoch: 2, batch: 27797, sum loss: 3597.730713, avg loss: 2.371609, ppl: 10.714616 +epoch: 2, batch: 27798, sum loss: 3246.727295, avg loss: 2.160164, ppl: 8.672563 +epoch: 2, batch: 27799, sum loss: 4206.480957, avg loss: 2.563364, ppl: 12.979410 +epoch: 2, batch: 27800, sum loss: 4302.632812, avg loss: 2.719743, ppl: 15.176414 +epoch: 2, batch: 27801, sum loss: 4403.293945, avg loss: 2.607042, ppl: 13.558886 +epoch: 2, batch: 27802, sum loss: 4344.524902, avg loss: 2.525887, ppl: 12.501974 +epoch: 2, batch: 27803, sum loss: 4105.558594, avg loss: 2.514120, ppl: 12.355735 +epoch: 2, batch: 27804, sum loss: 4760.081543, avg loss: 2.553692, ppl: 12.854473 +epoch: 2, batch: 27805, sum loss: 3906.098145, avg loss: 2.561376, ppl: 12.953628 +epoch: 2, batch: 27806, sum loss: 4417.062988, avg loss: 2.335835, ppl: 10.338083 +epoch: 2, batch: 27807, sum loss: 3395.189697, avg loss: 2.236620, ppl: 9.361639 +epoch: 2, batch: 27808, sum loss: 3952.239990, avg loss: 2.353925, ppl: 10.526806 +epoch: 2, batch: 27809, sum loss: 4708.598633, avg loss: 2.781216, ppl: 16.138636 +epoch: 2, batch: 27810, sum loss: 4367.854492, avg loss: 2.555795, ppl: 12.881542 +epoch: 2, batch: 27811, sum loss: 4116.314941, avg loss: 2.408610, ppl: 11.118500 +epoch: 2, batch: 27812, sum loss: 4770.789062, avg loss: 2.454110, ppl: 11.636072 +epoch: 2, batch: 27813, sum loss: 3961.575195, avg loss: 2.405328, ppl: 11.082063 +epoch: 2, batch: 27814, sum loss: 3854.973633, avg loss: 2.397372, ppl: 10.994241 +epoch: 2, batch: 27815, sum loss: 4851.071289, avg loss: 2.671295, ppl: 14.458676 +epoch: 2, batch: 27816, sum loss: 4375.048828, avg loss: 2.418490, ppl: 11.228895 +epoch: 2, batch: 27817, sum loss: 4624.512695, avg loss: 2.598041, ppl: 13.437386 +epoch: 2, batch: 27818, sum loss: 5073.007324, avg loss: 2.694109, ppl: 14.792333 +epoch: 2, batch: 27819, sum loss: 3770.971680, avg loss: 2.631523, ppl: 13.894911 +epoch: 2, batch: 27820, sum loss: 3738.131348, avg loss: 2.525764, ppl: 12.500448 +epoch: 2, batch: 27821, sum loss: 3440.709229, avg loss: 2.304561, ppl: 10.019775 +epoch: 2, batch: 27822, sum loss: 3510.399170, avg loss: 2.491412, ppl: 12.078315 +epoch: 2, batch: 27823, sum loss: 3534.509277, avg loss: 2.257030, ppl: 9.554672 +epoch: 2, batch: 27824, sum loss: 4907.424316, avg loss: 2.507626, ppl: 12.275754 +epoch: 2, batch: 27825, sum loss: 4723.219727, avg loss: 2.560011, ppl: 12.935955 +epoch: 2, batch: 27826, sum loss: 4367.780273, avg loss: 2.540884, ppl: 12.690891 +epoch: 2, batch: 27827, sum loss: 4731.157227, avg loss: 2.558765, ppl: 12.919857 +epoch: 2, batch: 27828, sum loss: 4982.324219, avg loss: 2.658658, ppl: 14.277110 +epoch: 2, batch: 27829, sum loss: 4299.094727, avg loss: 2.482156, ppl: 11.967041 +epoch: 2, batch: 27830, sum loss: 4267.607422, avg loss: 2.711313, ppl: 15.049026 +epoch: 2, batch: 27831, sum loss: 4004.697021, avg loss: 2.622591, ppl: 13.771366 +epoch: 2, batch: 27832, sum loss: 3604.378662, avg loss: 2.460327, ppl: 11.708635 +epoch: 2, batch: 27833, sum loss: 4396.641113, avg loss: 2.491015, ppl: 12.073522 +epoch: 2, batch: 27834, sum loss: 5150.024902, avg loss: 2.552044, ppl: 12.833310 +epoch: 2, batch: 27835, sum loss: 4298.963867, avg loss: 2.383018, ppl: 10.837556 +epoch: 2, batch: 27836, sum loss: 4546.863281, avg loss: 2.457764, ppl: 11.678668 +epoch: 2, batch: 27837, sum loss: 4279.879883, avg loss: 2.752335, ppl: 15.679193 +epoch: 2, batch: 27838, sum loss: 4738.493652, avg loss: 2.628116, ppl: 13.847661 +epoch: 2, batch: 27839, sum loss: 3707.499268, avg loss: 2.421619, ppl: 11.264086 +epoch: 2, batch: 27840, sum loss: 4306.104980, avg loss: 2.708242, ppl: 15.002880 +epoch: 2, batch: 27841, sum loss: 5016.783203, avg loss: 2.753448, ppl: 15.696669 +epoch: 2, batch: 27842, sum loss: 3952.837402, avg loss: 2.224444, ppl: 9.248341 +epoch: 2, batch: 27843, sum loss: 4528.466309, avg loss: 2.564250, ppl: 12.990917 +epoch: 2, batch: 27844, sum loss: 3789.597168, avg loss: 2.296726, ppl: 9.941575 +epoch: 2, batch: 27845, sum loss: 3690.083252, avg loss: 2.272219, ppl: 9.700900 +epoch: 2, batch: 27846, sum loss: 4124.204102, avg loss: 2.407591, ppl: 11.107176 +epoch: 2, batch: 27847, sum loss: 4585.538086, avg loss: 2.621806, ppl: 13.760549 +epoch: 2, batch: 27848, sum loss: 4008.493408, avg loss: 2.460708, ppl: 11.713100 +epoch: 2, batch: 27849, sum loss: 3922.452393, avg loss: 2.575478, ppl: 13.137590 +epoch: 2, batch: 27850, sum loss: 4341.225586, avg loss: 2.721771, ppl: 15.207237 +epoch: 2, batch: 27851, sum loss: 2923.393066, avg loss: 2.181637, ppl: 8.860796 +epoch: 2, batch: 27852, sum loss: 4555.988770, avg loss: 2.870819, ppl: 17.651463 +epoch: 2, batch: 27853, sum loss: 4219.393555, avg loss: 2.322176, ppl: 10.197841 +epoch: 2, batch: 27854, sum loss: 4774.476562, avg loss: 2.772635, ppl: 16.000732 +epoch: 2, batch: 27855, sum loss: 4381.394531, avg loss: 2.595613, ppl: 13.404802 +epoch: 2, batch: 27856, sum loss: 5610.450195, avg loss: 2.832130, ppl: 16.981596 +epoch: 2, batch: 27857, sum loss: 4264.160156, avg loss: 2.341659, ppl: 10.398469 +epoch: 2, batch: 27858, sum loss: 4334.439453, avg loss: 2.413385, ppl: 11.171715 +epoch: 2, batch: 27859, sum loss: 4803.427734, avg loss: 2.676004, ppl: 14.526934 +epoch: 2, batch: 27860, sum loss: 3572.280762, avg loss: 2.158478, ppl: 8.657948 +epoch: 2, batch: 27861, sum loss: 5109.534180, avg loss: 2.816722, ppl: 16.721952 +epoch: 2, batch: 27862, sum loss: 4186.981445, avg loss: 2.438545, ppl: 11.456357 +epoch: 2, batch: 27863, sum loss: 4063.072510, avg loss: 2.475973, ppl: 11.893278 +epoch: 2, batch: 27864, sum loss: 4226.920898, avg loss: 2.561770, ppl: 12.958736 +epoch: 2, batch: 27865, sum loss: 4425.583496, avg loss: 2.586548, ppl: 13.283837 +epoch: 2, batch: 27866, sum loss: 4225.843750, avg loss: 2.321892, ppl: 10.194947 +epoch: 2, batch: 27867, sum loss: 4213.332520, avg loss: 2.527494, ppl: 12.522086 +epoch: 2, batch: 27868, sum loss: 3804.294922, avg loss: 2.358521, ppl: 10.575304 +epoch: 2, batch: 27869, sum loss: 3488.981689, avg loss: 2.293874, ppl: 9.913263 +epoch: 2, batch: 27870, sum loss: 3447.885742, avg loss: 2.284881, ppl: 9.824520 +epoch: 2, batch: 27871, sum loss: 4301.494141, avg loss: 2.581929, ppl: 13.222623 +epoch: 2, batch: 27872, sum loss: 4226.229980, avg loss: 2.493351, ppl: 12.101761 +epoch: 2, batch: 27873, sum loss: 4297.131836, avg loss: 2.590194, ppl: 13.332357 +epoch: 2, batch: 27874, sum loss: 3256.822510, avg loss: 2.205025, ppl: 9.070482 +epoch: 2, batch: 27875, sum loss: 3892.979492, avg loss: 2.381027, ppl: 10.816008 +epoch: 2, batch: 27876, sum loss: 4350.589844, avg loss: 2.566720, ppl: 13.023036 +epoch: 2, batch: 27877, sum loss: 5332.669922, avg loss: 2.967541, ppl: 19.444038 +epoch: 2, batch: 27878, sum loss: 3951.362793, avg loss: 2.437608, ppl: 11.445630 +epoch: 2, batch: 27879, sum loss: 4492.763184, avg loss: 2.346090, ppl: 10.444654 +epoch: 2, batch: 27880, sum loss: 3940.896729, avg loss: 2.483237, ppl: 11.979979 +epoch: 2, batch: 27881, sum loss: 4371.812988, avg loss: 2.454696, ppl: 11.642890 +epoch: 2, batch: 27882, sum loss: 4544.109863, avg loss: 2.514726, ppl: 12.363220 +epoch: 2, batch: 27883, sum loss: 3880.160889, avg loss: 2.503330, ppl: 12.223126 +epoch: 2, batch: 27884, sum loss: 4448.045410, avg loss: 2.457484, ppl: 11.675397 +epoch: 2, batch: 27885, sum loss: 4326.551758, avg loss: 2.664133, ppl: 14.355495 +epoch: 2, batch: 27886, sum loss: 4020.357422, avg loss: 2.455930, ppl: 11.657269 +epoch: 2, batch: 27887, sum loss: 4622.292480, avg loss: 2.409954, ppl: 11.133452 +epoch: 2, batch: 27888, sum loss: 3716.994385, avg loss: 2.311564, ppl: 10.090191 +epoch: 2, batch: 27889, sum loss: 3436.889160, avg loss: 2.167017, ppl: 8.732199 +epoch: 2, batch: 27890, sum loss: 3388.927734, avg loss: 2.189230, ppl: 8.928335 +epoch: 2, batch: 27891, sum loss: 4665.084473, avg loss: 2.430998, ppl: 11.370219 +epoch: 2, batch: 27892, sum loss: 4435.725586, avg loss: 2.486393, ppl: 12.017855 +epoch: 2, batch: 27893, sum loss: 4478.048828, avg loss: 2.483665, ppl: 11.985115 +epoch: 2, batch: 27894, sum loss: 5243.826660, avg loss: 2.589544, ppl: 13.323695 +epoch: 2, batch: 27895, sum loss: 4078.847900, avg loss: 2.274873, ppl: 9.726688 +epoch: 2, batch: 27896, sum loss: 3845.906738, avg loss: 2.508745, ppl: 12.289502 +epoch: 2, batch: 27897, sum loss: 3599.127930, avg loss: 2.210767, ppl: 9.122706 +epoch: 2, batch: 27898, sum loss: 3790.117188, avg loss: 2.304023, ppl: 10.014385 +epoch: 2, batch: 27899, sum loss: 4302.446777, avg loss: 2.623443, ppl: 13.783099 +epoch: 2, batch: 27900, sum loss: 4112.569336, avg loss: 2.478945, ppl: 11.928670 +epoch: 2, batch: 27901, sum loss: 3113.905273, avg loss: 2.039231, ppl: 7.684696 +epoch: 2, batch: 27902, sum loss: 4194.301270, avg loss: 2.576352, ppl: 13.149084 +epoch: 2, batch: 27903, sum loss: 3895.696533, avg loss: 2.296991, ppl: 9.944214 +epoch: 2, batch: 27904, sum loss: 4549.909180, avg loss: 2.660766, ppl: 14.307240 +epoch: 2, batch: 27905, sum loss: 4684.884766, avg loss: 2.569877, ppl: 13.064213 +epoch: 2, batch: 27906, sum loss: 4159.817383, avg loss: 2.609672, ppl: 13.594592 +epoch: 2, batch: 27907, sum loss: 5102.085938, avg loss: 2.837645, ppl: 17.075506 +epoch: 2, batch: 27908, sum loss: 4452.614258, avg loss: 2.521299, ppl: 12.444754 +epoch: 2, batch: 27909, sum loss: 3640.934082, avg loss: 2.419225, ppl: 11.237152 +epoch: 2, batch: 27910, sum loss: 4746.847656, avg loss: 2.547959, ppl: 12.780990 +epoch: 2, batch: 27911, sum loss: 4933.862793, avg loss: 2.742558, ppl: 15.526659 +epoch: 2, batch: 27912, sum loss: 4460.255371, avg loss: 2.584157, ppl: 13.252116 +epoch: 2, batch: 27913, sum loss: 3763.450195, avg loss: 2.404760, ppl: 11.075776 +epoch: 2, batch: 27914, sum loss: 4327.649902, avg loss: 2.345610, ppl: 10.439635 +epoch: 2, batch: 27915, sum loss: 3898.330078, avg loss: 2.365492, ppl: 10.649273 +epoch: 2, batch: 27916, sum loss: 4783.572754, avg loss: 2.553963, ppl: 12.857962 +epoch: 2, batch: 27917, sum loss: 3847.421387, avg loss: 2.647916, ppl: 14.124567 +epoch: 2, batch: 27918, sum loss: 4602.434082, avg loss: 2.724946, ppl: 15.255594 +epoch: 2, batch: 27919, sum loss: 3861.925049, avg loss: 2.350533, ppl: 10.491155 +epoch: 2, batch: 27920, sum loss: 4618.146973, avg loss: 2.504418, ppl: 12.236437 +epoch: 2, batch: 27921, sum loss: 4333.672852, avg loss: 2.426469, ppl: 11.318840 +epoch: 2, batch: 27922, sum loss: 4085.222168, avg loss: 2.453587, ppl: 11.629990 +epoch: 2, batch: 27923, sum loss: 3852.039062, avg loss: 2.264573, ppl: 9.627014 +epoch: 2, batch: 27924, sum loss: 4372.031250, avg loss: 2.567253, ppl: 13.029977 +epoch: 2, batch: 27925, sum loss: 3897.377441, avg loss: 2.322633, ppl: 10.202497 +epoch: 2, batch: 27926, sum loss: 2660.083496, avg loss: 1.860198, ppl: 6.425011 +epoch: 2, batch: 27927, sum loss: 4000.180908, avg loss: 2.346147, ppl: 10.445250 +epoch: 2, batch: 27928, sum loss: 4558.052734, avg loss: 2.505801, ppl: 12.253375 +epoch: 2, batch: 27929, sum loss: 3739.403076, avg loss: 2.259458, ppl: 9.577897 +epoch: 2, batch: 27930, sum loss: 4176.614258, avg loss: 2.390735, ppl: 10.921520 +epoch: 2, batch: 27931, sum loss: 3632.732910, avg loss: 2.396262, ppl: 10.982048 +epoch: 2, batch: 27932, sum loss: 3669.624512, avg loss: 2.461184, ppl: 11.718673 +epoch: 2, batch: 27933, sum loss: 3901.163818, avg loss: 2.534869, ppl: 12.614780 +epoch: 2, batch: 27934, sum loss: 4018.212158, avg loss: 2.430860, ppl: 11.368658 +epoch: 2, batch: 27935, sum loss: 3781.709473, avg loss: 2.312972, ppl: 10.104411 +epoch: 2, batch: 27936, sum loss: 3381.988281, avg loss: 2.429589, ppl: 11.354218 +epoch: 2, batch: 27937, sum loss: 4949.692383, avg loss: 2.681307, ppl: 14.604166 +epoch: 2, batch: 27938, sum loss: 4441.452148, avg loss: 2.763816, ppl: 15.860249 +epoch: 2, batch: 27939, sum loss: 4250.708008, avg loss: 2.461325, ppl: 11.720330 +epoch: 2, batch: 27940, sum loss: 4407.548828, avg loss: 2.498610, ppl: 12.165578 +epoch: 2, batch: 27941, sum loss: 4106.560059, avg loss: 2.461967, ppl: 11.727852 +epoch: 2, batch: 27942, sum loss: 5181.562500, avg loss: 2.714281, ppl: 15.093755 +epoch: 2, batch: 27943, sum loss: 4353.891602, avg loss: 2.515246, ppl: 12.369657 +epoch: 2, batch: 27944, sum loss: 3039.415527, avg loss: 1.990449, ppl: 7.318819 +epoch: 2, batch: 27945, sum loss: 3689.000732, avg loss: 2.423785, ppl: 11.288507 +epoch: 2, batch: 27946, sum loss: 4845.759766, avg loss: 2.518586, ppl: 12.411037 +epoch: 2, batch: 27947, sum loss: 4339.375488, avg loss: 2.729167, ppl: 15.320120 +epoch: 2, batch: 27948, sum loss: 4618.605469, avg loss: 2.463256, ppl: 11.742989 +epoch: 2, batch: 27949, sum loss: 4363.291992, avg loss: 2.597198, ppl: 13.426063 +epoch: 2, batch: 27950, sum loss: 5026.628906, avg loss: 2.686600, ppl: 14.681676 +epoch: 2, batch: 27951, sum loss: 3956.412109, avg loss: 2.330043, ppl: 10.278379 +epoch: 2, batch: 27952, sum loss: 4273.524414, avg loss: 2.395473, ppl: 10.973392 +epoch: 2, batch: 27953, sum loss: 4071.041748, avg loss: 2.467298, ppl: 11.790546 +epoch: 2, batch: 27954, sum loss: 4033.215576, avg loss: 2.457779, ppl: 11.678846 +epoch: 2, batch: 27955, sum loss: 4139.267578, avg loss: 2.388498, ppl: 10.897118 +epoch: 2, batch: 27956, sum loss: 3894.502686, avg loss: 2.276156, ppl: 9.739170 +epoch: 2, batch: 27957, sum loss: 5353.178711, avg loss: 2.783764, ppl: 16.179810 +epoch: 2, batch: 27958, sum loss: 4972.544922, avg loss: 2.604790, ppl: 13.528390 +epoch: 2, batch: 27959, sum loss: 4264.469727, avg loss: 2.217613, ppl: 9.185379 +epoch: 2, batch: 27960, sum loss: 4441.185059, avg loss: 2.526271, ppl: 12.506785 +epoch: 2, batch: 27961, sum loss: 3743.835693, avg loss: 2.291209, ppl: 9.886886 +epoch: 2, batch: 27962, sum loss: 4967.285156, avg loss: 2.563099, ppl: 12.975964 +epoch: 2, batch: 27963, sum loss: 4424.403809, avg loss: 2.558938, ppl: 12.922087 +epoch: 2, batch: 27964, sum loss: 4052.562012, avg loss: 2.257695, ppl: 9.561023 +epoch: 2, batch: 27965, sum loss: 3688.483398, avg loss: 2.478819, ppl: 11.927175 +epoch: 2, batch: 27966, sum loss: 4765.804199, avg loss: 2.573328, ppl: 13.109384 +epoch: 2, batch: 27967, sum loss: 3973.524658, avg loss: 2.344262, ppl: 10.425580 +epoch: 2, batch: 27968, sum loss: 4180.329590, avg loss: 2.494230, ppl: 12.112407 +epoch: 2, batch: 27969, sum loss: 4576.773438, avg loss: 2.506448, ppl: 12.261298 +epoch: 2, batch: 27970, sum loss: 3908.386475, avg loss: 2.373034, ppl: 10.729893 +epoch: 2, batch: 27971, sum loss: 4745.030762, avg loss: 2.607160, ppl: 13.560482 +epoch: 2, batch: 27972, sum loss: 4768.893555, avg loss: 2.659729, ppl: 14.292412 +epoch: 2, batch: 27973, sum loss: 4606.860840, avg loss: 2.479473, ppl: 11.934972 +epoch: 2, batch: 27974, sum loss: 4161.814941, avg loss: 2.469920, ppl: 11.821500 +epoch: 2, batch: 27975, sum loss: 4265.519043, avg loss: 2.542026, ppl: 12.705380 +epoch: 2, batch: 27976, sum loss: 3719.717041, avg loss: 2.346825, ppl: 10.452327 +epoch: 2, batch: 27977, sum loss: 4453.702148, avg loss: 2.646288, ppl: 14.101592 +epoch: 2, batch: 27978, sum loss: 4479.592285, avg loss: 2.893794, ppl: 18.061699 +epoch: 2, batch: 27979, sum loss: 5085.188965, avg loss: 2.610467, ppl: 13.605399 +epoch: 2, batch: 27980, sum loss: 3186.061768, avg loss: 2.142610, ppl: 8.521653 +epoch: 2, batch: 27981, sum loss: 4061.454590, avg loss: 2.485590, ppl: 12.008208 +epoch: 2, batch: 27982, sum loss: 4681.709961, avg loss: 2.686007, ppl: 14.672966 +epoch: 2, batch: 27983, sum loss: 5206.998535, avg loss: 2.593127, ppl: 13.371516 +epoch: 2, batch: 27984, sum loss: 5093.273438, avg loss: 2.648608, ppl: 14.134350 +epoch: 2, batch: 27985, sum loss: 4552.794922, avg loss: 2.622578, ppl: 13.771175 +epoch: 2, batch: 27986, sum loss: 3934.858887, avg loss: 2.344970, ppl: 10.432955 +epoch: 2, batch: 27987, sum loss: 4981.402344, avg loss: 2.825526, ppl: 16.869816 +epoch: 2, batch: 27988, sum loss: 3670.927734, avg loss: 2.372933, ppl: 10.728818 +epoch: 2, batch: 27989, sum loss: 4969.597656, avg loss: 2.739580, ppl: 15.480480 +epoch: 2, batch: 27990, sum loss: 3652.134521, avg loss: 2.423447, ppl: 11.284690 +epoch: 2, batch: 27991, sum loss: 3945.581543, avg loss: 2.478380, ppl: 11.921941 +epoch: 2, batch: 27992, sum loss: 3280.436279, avg loss: 2.240735, ppl: 9.400238 +epoch: 2, batch: 27993, sum loss: 4168.217285, avg loss: 2.558758, ppl: 12.919765 +epoch: 2, batch: 27994, sum loss: 3719.169678, avg loss: 2.507869, ppl: 12.278736 +epoch: 2, batch: 27995, sum loss: 4349.053223, avg loss: 2.714765, ppl: 15.101058 +epoch: 2, batch: 27996, sum loss: 4081.936035, avg loss: 2.495071, ppl: 12.122593 +epoch: 2, batch: 27997, sum loss: 3868.081787, avg loss: 2.278022, ppl: 9.757364 +epoch: 2, batch: 27998, sum loss: 3269.216553, avg loss: 2.198532, ppl: 9.011771 +epoch: 2, batch: 27999, sum loss: 4623.642578, avg loss: 2.446372, ppl: 11.546378 +epoch: 2, batch: 28000, sum loss: 3818.666748, avg loss: 2.595967, ppl: 13.409542 +epoch: 2, batch: 28001, sum loss: 4232.219727, avg loss: 2.546462, ppl: 12.761873 +epoch: 2, batch: 28002, sum loss: 4417.543457, avg loss: 2.534448, ppl: 12.609473 +epoch: 2, batch: 28003, sum loss: 3799.105957, avg loss: 2.452618, ppl: 11.618729 +epoch: 2, batch: 28004, sum loss: 4655.724609, avg loss: 2.689615, ppl: 14.726012 +epoch: 2, batch: 28005, sum loss: 4462.012695, avg loss: 2.509568, ppl: 12.299621 +epoch: 2, batch: 28006, sum loss: 4497.919434, avg loss: 2.534039, ppl: 12.604313 +epoch: 2, batch: 28007, sum loss: 4381.510742, avg loss: 2.483850, ppl: 11.987324 +epoch: 2, batch: 28008, sum loss: 4964.490234, avg loss: 2.592423, ppl: 13.362108 +epoch: 2, batch: 28009, sum loss: 3733.757324, avg loss: 2.287842, ppl: 9.853646 +epoch: 2, batch: 28010, sum loss: 4096.020508, avg loss: 2.393934, ppl: 10.956510 +epoch: 2, batch: 28011, sum loss: 4282.539062, avg loss: 2.501483, ppl: 12.200574 +epoch: 2, batch: 28012, sum loss: 3819.311523, avg loss: 2.522663, ppl: 12.461737 +epoch: 2, batch: 28013, sum loss: 4206.786621, avg loss: 2.378059, ppl: 10.783953 +epoch: 2, batch: 28014, sum loss: 4148.344238, avg loss: 2.472196, ppl: 11.848433 +epoch: 2, batch: 28015, sum loss: 2972.293945, avg loss: 2.118527, ppl: 8.318878 +epoch: 2, batch: 28016, sum loss: 3865.144043, avg loss: 2.260318, ppl: 9.586138 +epoch: 2, batch: 28017, sum loss: 3419.904785, avg loss: 2.192247, ppl: 8.955310 +epoch: 2, batch: 28018, sum loss: 3640.912598, avg loss: 2.260033, ppl: 9.583402 +epoch: 2, batch: 28019, sum loss: 3875.922119, avg loss: 2.411899, ppl: 11.155128 +epoch: 2, batch: 28020, sum loss: 3485.694336, avg loss: 2.410577, ppl: 11.140388 +epoch: 2, batch: 28021, sum loss: 3973.022461, avg loss: 2.265121, ppl: 9.632290 +epoch: 2, batch: 28022, sum loss: 4740.737305, avg loss: 2.484663, ppl: 11.997077 +epoch: 2, batch: 28023, sum loss: 3751.401855, avg loss: 2.415584, ppl: 11.196308 +epoch: 2, batch: 28024, sum loss: 3809.973145, avg loss: 2.480451, ppl: 11.946655 +epoch: 2, batch: 28025, sum loss: 4315.848633, avg loss: 2.609340, ppl: 13.590081 +epoch: 2, batch: 28026, sum loss: 5034.025391, avg loss: 2.574949, ppl: 13.130648 +epoch: 2, batch: 28027, sum loss: 3306.131104, avg loss: 2.353118, ppl: 10.518316 +epoch: 2, batch: 28028, sum loss: 3767.358398, avg loss: 2.341429, ppl: 10.396077 +epoch: 2, batch: 28029, sum loss: 4035.339600, avg loss: 2.433860, ppl: 11.402810 +epoch: 2, batch: 28030, sum loss: 4071.889893, avg loss: 2.292731, ppl: 9.901941 +epoch: 2, batch: 28031, sum loss: 4478.526367, avg loss: 2.929056, ppl: 18.709959 +epoch: 2, batch: 28032, sum loss: 3556.800537, avg loss: 2.406496, ppl: 11.095014 +epoch: 2, batch: 28033, sum loss: 3368.525635, avg loss: 2.163472, ppl: 8.701296 +epoch: 2, batch: 28034, sum loss: 4876.207520, avg loss: 2.521307, ppl: 12.444848 +epoch: 2, batch: 28035, sum loss: 4360.911133, avg loss: 2.606642, ppl: 13.553455 +epoch: 2, batch: 28036, sum loss: 4747.767578, avg loss: 2.532143, ppl: 12.580433 +epoch: 2, batch: 28037, sum loss: 3817.894531, avg loss: 2.217128, ppl: 9.180925 +epoch: 2, batch: 28038, sum loss: 3407.624512, avg loss: 2.338795, ppl: 10.368736 +epoch: 2, batch: 28039, sum loss: 4384.442383, avg loss: 2.482697, ppl: 11.973508 +epoch: 2, batch: 28040, sum loss: 4409.506836, avg loss: 2.515406, ppl: 12.371633 +epoch: 2, batch: 28041, sum loss: 4175.075195, avg loss: 2.356137, ppl: 10.550120 +epoch: 2, batch: 28042, sum loss: 4175.975098, avg loss: 2.318698, ppl: 10.162434 +epoch: 2, batch: 28043, sum loss: 4520.418945, avg loss: 2.641975, ppl: 14.040906 +epoch: 2, batch: 28044, sum loss: 4439.378906, avg loss: 2.554303, ppl: 12.862333 +epoch: 2, batch: 28045, sum loss: 3712.345703, avg loss: 2.390435, ppl: 10.918244 +epoch: 2, batch: 28046, sum loss: 3617.555176, avg loss: 2.304175, ppl: 10.015916 +epoch: 2, batch: 28047, sum loss: 4691.986328, avg loss: 2.555548, ppl: 12.878354 +epoch: 2, batch: 28048, sum loss: 2994.376953, avg loss: 2.050943, ppl: 7.775231 +epoch: 2, batch: 28049, sum loss: 4606.635254, avg loss: 2.748589, ppl: 15.620580 +epoch: 2, batch: 28050, sum loss: 4625.009766, avg loss: 2.498655, ppl: 12.166115 +epoch: 2, batch: 28051, sum loss: 3893.082520, avg loss: 2.359444, ppl: 10.585064 +epoch: 2, batch: 28052, sum loss: 5478.697266, avg loss: 2.596539, ppl: 13.417221 +epoch: 2, batch: 28053, sum loss: 4419.757812, avg loss: 2.574116, ppl: 13.119720 +epoch: 2, batch: 28054, sum loss: 4421.045898, avg loss: 2.537914, ppl: 12.653246 +epoch: 2, batch: 28055, sum loss: 3879.835693, avg loss: 2.275563, ppl: 9.733402 +epoch: 2, batch: 28056, sum loss: 4149.115723, avg loss: 2.272243, ppl: 9.701137 +epoch: 2, batch: 28057, sum loss: 4217.884766, avg loss: 2.703772, ppl: 14.935968 +epoch: 2, batch: 28058, sum loss: 4891.763672, avg loss: 2.808131, ppl: 16.578899 +epoch: 2, batch: 28059, sum loss: 4493.821289, avg loss: 2.524619, ppl: 12.486133 +epoch: 2, batch: 28060, sum loss: 3942.714844, avg loss: 2.382305, ppl: 10.829839 +epoch: 2, batch: 28061, sum loss: 4104.606445, avg loss: 2.505865, ppl: 12.254149 +epoch: 2, batch: 28062, sum loss: 3553.051270, avg loss: 2.505678, ppl: 12.251863 +epoch: 2, batch: 28063, sum loss: 3893.886963, avg loss: 2.477027, ppl: 11.905821 +epoch: 2, batch: 28064, sum loss: 4478.241211, avg loss: 2.249242, ppl: 9.480550 +epoch: 2, batch: 28065, sum loss: 4314.442383, avg loss: 2.251797, ppl: 9.504798 +epoch: 2, batch: 28066, sum loss: 3465.555664, avg loss: 2.324317, ppl: 10.219692 +epoch: 2, batch: 28067, sum loss: 4166.907715, avg loss: 2.484739, ppl: 11.997992 +epoch: 2, batch: 28068, sum loss: 4299.278320, avg loss: 2.596183, ppl: 13.412439 +epoch: 2, batch: 28069, sum loss: 3646.318848, avg loss: 2.450483, ppl: 11.593946 +epoch: 2, batch: 28070, sum loss: 3806.813965, avg loss: 2.278165, ppl: 9.758759 +epoch: 2, batch: 28071, sum loss: 4673.449219, avg loss: 2.479283, ppl: 11.932710 +epoch: 2, batch: 28072, sum loss: 4032.442627, avg loss: 2.490700, ppl: 12.069716 +epoch: 2, batch: 28073, sum loss: 5002.453125, avg loss: 2.690938, ppl: 14.745494 +epoch: 2, batch: 28074, sum loss: 4402.828613, avg loss: 2.523111, ppl: 12.467320 +epoch: 2, batch: 28075, sum loss: 3637.879395, avg loss: 2.204775, ppl: 9.068214 +epoch: 2, batch: 28076, sum loss: 3732.627930, avg loss: 2.248571, ppl: 9.474189 +epoch: 2, batch: 28077, sum loss: 3881.180908, avg loss: 2.356515, ppl: 10.554111 +epoch: 2, batch: 28078, sum loss: 3378.143311, avg loss: 2.426827, ppl: 11.322900 +epoch: 2, batch: 28079, sum loss: 3992.421875, avg loss: 2.394974, ppl: 10.967913 +epoch: 2, batch: 28080, sum loss: 3810.359375, avg loss: 2.299553, ppl: 9.969724 +epoch: 2, batch: 28081, sum loss: 4088.129639, avg loss: 2.334740, ppl: 10.326777 +epoch: 2, batch: 28082, sum loss: 3083.007324, avg loss: 2.340932, ppl: 10.390915 +epoch: 2, batch: 28083, sum loss: 4173.508789, avg loss: 2.466613, ppl: 11.782470 +epoch: 2, batch: 28084, sum loss: 5562.366211, avg loss: 2.974527, ppl: 19.580366 +epoch: 2, batch: 28085, sum loss: 4427.237793, avg loss: 2.445988, ppl: 11.541946 +epoch: 2, batch: 28086, sum loss: 4260.900879, avg loss: 2.448794, ppl: 11.574375 +epoch: 2, batch: 28087, sum loss: 3096.649170, avg loss: 2.232624, ppl: 9.324299 +epoch: 2, batch: 28088, sum loss: 4249.301758, avg loss: 2.466223, ppl: 11.777875 +epoch: 2, batch: 28089, sum loss: 4404.880859, avg loss: 2.793203, ppl: 16.333250 +epoch: 2, batch: 28090, sum loss: 5013.555664, avg loss: 2.987816, ppl: 19.842306 +epoch: 2, batch: 28091, sum loss: 4151.395020, avg loss: 2.549997, ppl: 12.807063 +epoch: 2, batch: 28092, sum loss: 3612.301514, avg loss: 2.465735, ppl: 11.772128 +epoch: 2, batch: 28093, sum loss: 4678.298828, avg loss: 2.509817, ppl: 12.302677 +epoch: 2, batch: 28094, sum loss: 4610.332031, avg loss: 2.500180, ppl: 12.184687 +epoch: 2, batch: 28095, sum loss: 3938.974365, avg loss: 2.395970, ppl: 10.978841 +epoch: 2, batch: 28096, sum loss: 4612.094727, avg loss: 2.531336, ppl: 12.570292 +epoch: 2, batch: 28097, sum loss: 4039.139893, avg loss: 2.356558, ppl: 10.554556 +epoch: 2, batch: 28098, sum loss: 4358.571289, avg loss: 2.560853, ppl: 12.946854 +epoch: 2, batch: 28099, sum loss: 3965.167969, avg loss: 2.317457, ppl: 10.149828 +epoch: 2, batch: 28100, sum loss: 4100.681152, avg loss: 2.306345, ppl: 10.037670 +epoch: 2, batch: 28101, sum loss: 3867.329590, avg loss: 2.322721, ppl: 10.203395 +epoch: 2, batch: 28102, sum loss: 4215.041504, avg loss: 2.488218, ppl: 12.039803 +epoch: 2, batch: 28103, sum loss: 4249.324707, avg loss: 2.544506, ppl: 12.736933 +epoch: 2, batch: 28104, sum loss: 4226.821289, avg loss: 2.571059, ppl: 13.079672 +epoch: 2, batch: 28105, sum loss: 3909.128662, avg loss: 2.386525, ppl: 10.875640 +epoch: 2, batch: 28106, sum loss: 3850.179443, avg loss: 2.498494, ppl: 12.164163 +epoch: 2, batch: 28107, sum loss: 3597.192871, avg loss: 2.433825, ppl: 11.402411 +epoch: 2, batch: 28108, sum loss: 4396.810059, avg loss: 2.426496, ppl: 11.319145 +epoch: 2, batch: 28109, sum loss: 4551.108398, avg loss: 2.579993, ppl: 13.197052 +epoch: 2, batch: 28110, sum loss: 4362.204590, avg loss: 2.415396, ppl: 11.194200 +epoch: 2, batch: 28111, sum loss: 4073.425537, avg loss: 2.426102, ppl: 11.314693 +epoch: 2, batch: 28112, sum loss: 3789.557129, avg loss: 2.531434, ppl: 12.571522 +epoch: 2, batch: 28113, sum loss: 4094.598145, avg loss: 2.453324, ppl: 11.626934 +epoch: 2, batch: 28114, sum loss: 4960.993652, avg loss: 2.705013, ppl: 14.954508 +epoch: 2, batch: 28115, sum loss: 4610.753906, avg loss: 2.588857, ppl: 13.314544 +epoch: 2, batch: 28116, sum loss: 4378.569824, avg loss: 2.399216, ppl: 11.014543 +epoch: 2, batch: 28117, sum loss: 4455.433594, avg loss: 2.585858, ppl: 13.274675 +epoch: 2, batch: 28118, sum loss: 4149.214355, avg loss: 2.526927, ppl: 12.514992 +epoch: 2, batch: 28119, sum loss: 4860.690918, avg loss: 2.718507, ppl: 15.157680 +epoch: 2, batch: 28120, sum loss: 5038.857910, avg loss: 2.745972, ppl: 15.579745 +epoch: 2, batch: 28121, sum loss: 4618.560059, avg loss: 2.584533, ppl: 13.257093 +epoch: 2, batch: 28122, sum loss: 3836.135010, avg loss: 2.385656, ppl: 10.866190 +epoch: 2, batch: 28123, sum loss: 4529.889160, avg loss: 2.451239, ppl: 11.602710 +epoch: 2, batch: 28124, sum loss: 3984.607910, avg loss: 2.541204, ppl: 12.694946 +epoch: 2, batch: 28125, sum loss: 4827.271484, avg loss: 2.661120, ppl: 14.312309 +epoch: 2, batch: 28126, sum loss: 4616.533691, avg loss: 2.563317, ppl: 12.978794 +epoch: 2, batch: 28127, sum loss: 3498.657959, avg loss: 2.122972, ppl: 8.355934 +epoch: 2, batch: 28128, sum loss: 4261.613281, avg loss: 2.512744, ppl: 12.338737 +epoch: 2, batch: 28129, sum loss: 5062.709473, avg loss: 2.780181, ppl: 16.121937 +epoch: 2, batch: 28130, sum loss: 3978.220703, avg loss: 2.250125, ppl: 9.488921 +epoch: 2, batch: 28131, sum loss: 3129.964844, avg loss: 2.012839, ppl: 7.484538 +epoch: 2, batch: 28132, sum loss: 5108.155762, avg loss: 2.681447, ppl: 14.606207 +epoch: 2, batch: 28133, sum loss: 3713.982422, avg loss: 2.502683, ppl: 12.215228 +epoch: 2, batch: 28134, sum loss: 5273.734375, avg loss: 2.692054, ppl: 14.761970 +epoch: 2, batch: 28135, sum loss: 4806.208984, avg loss: 2.946787, ppl: 19.044659 +epoch: 2, batch: 28136, sum loss: 4244.522461, avg loss: 2.452064, ppl: 11.612288 +epoch: 2, batch: 28137, sum loss: 4450.901367, avg loss: 2.644624, ppl: 14.078144 +epoch: 2, batch: 28138, sum loss: 4193.573242, avg loss: 2.439543, ppl: 11.467793 +epoch: 2, batch: 28139, sum loss: 3800.451172, avg loss: 2.464625, ppl: 11.759068 +epoch: 2, batch: 28140, sum loss: 4439.016602, avg loss: 2.505088, ppl: 12.244640 +epoch: 2, batch: 28141, sum loss: 4220.315918, avg loss: 2.374967, ppl: 10.750654 +epoch: 2, batch: 28142, sum loss: 4590.155273, avg loss: 2.657878, ppl: 14.265986 +epoch: 2, batch: 28143, sum loss: 3529.832764, avg loss: 2.442791, ppl: 11.505107 +epoch: 2, batch: 28144, sum loss: 5391.571777, avg loss: 2.712058, ppl: 15.060239 +epoch: 2, batch: 28145, sum loss: 4086.341797, avg loss: 2.331056, ppl: 10.288804 +epoch: 2, batch: 28146, sum loss: 4027.518311, avg loss: 2.329392, ppl: 10.271691 +epoch: 2, batch: 28147, sum loss: 4397.831543, avg loss: 2.428400, ppl: 11.340720 +epoch: 2, batch: 28148, sum loss: 4871.981445, avg loss: 2.760329, ppl: 15.805050 +epoch: 2, batch: 28149, sum loss: 4148.510742, avg loss: 2.423195, ppl: 11.281852 +epoch: 2, batch: 28150, sum loss: 5173.124023, avg loss: 2.615330, ppl: 13.671734 +epoch: 2, batch: 28151, sum loss: 4471.074219, avg loss: 2.576988, ppl: 13.157448 +epoch: 2, batch: 28152, sum loss: 4336.175293, avg loss: 2.671704, ppl: 14.464593 +epoch: 2, batch: 28153, sum loss: 3208.583008, avg loss: 2.264349, ppl: 9.624861 +epoch: 2, batch: 28154, sum loss: 4304.442383, avg loss: 2.665289, ppl: 14.372108 +epoch: 2, batch: 28155, sum loss: 4596.831543, avg loss: 2.823606, ppl: 16.837465 +epoch: 2, batch: 28156, sum loss: 5450.302246, avg loss: 2.891407, ppl: 18.018644 +epoch: 2, batch: 28157, sum loss: 2815.888672, avg loss: 1.933990, ppl: 6.917051 +epoch: 2, batch: 28158, sum loss: 3351.041016, avg loss: 2.089178, ppl: 8.078271 +epoch: 2, batch: 28159, sum loss: 5006.558594, avg loss: 2.687364, ppl: 14.692889 +epoch: 2, batch: 28160, sum loss: 4441.361816, avg loss: 2.645242, ppl: 14.086857 +epoch: 2, batch: 28161, sum loss: 4612.109375, avg loss: 2.722615, ppl: 15.220067 +epoch: 2, batch: 28162, sum loss: 4955.594238, avg loss: 2.867821, ppl: 17.598625 +epoch: 2, batch: 28163, sum loss: 4352.897949, avg loss: 2.562035, ppl: 12.962173 +epoch: 2, batch: 28164, sum loss: 5152.542969, avg loss: 2.911041, ppl: 18.375923 +epoch: 2, batch: 28165, sum loss: 4335.948242, avg loss: 2.718463, ppl: 15.157007 +epoch: 2, batch: 28166, sum loss: 4158.023438, avg loss: 2.650110, ppl: 14.155602 +epoch: 2, batch: 28167, sum loss: 4872.400879, avg loss: 2.514139, ppl: 12.355965 +epoch: 2, batch: 28168, sum loss: 3965.772217, avg loss: 2.342453, ppl: 10.406731 +epoch: 2, batch: 28169, sum loss: 4567.862305, avg loss: 2.564774, ppl: 12.997721 +epoch: 2, batch: 28170, sum loss: 4312.514648, avg loss: 2.743330, ppl: 15.538643 +epoch: 2, batch: 28171, sum loss: 5006.278809, avg loss: 2.723764, ppl: 15.237572 +epoch: 2, batch: 28172, sum loss: 3874.857178, avg loss: 2.303720, ppl: 10.011355 +epoch: 2, batch: 28173, sum loss: 4179.121094, avg loss: 2.397660, ppl: 10.997410 +epoch: 2, batch: 28174, sum loss: 4834.962891, avg loss: 2.600841, ppl: 13.475060 +epoch: 2, batch: 28175, sum loss: 3773.237793, avg loss: 2.439068, ppl: 11.462351 +epoch: 2, batch: 28176, sum loss: 3377.928223, avg loss: 2.315235, ppl: 10.127306 +epoch: 2, batch: 28177, sum loss: 4448.454102, avg loss: 2.514672, ppl: 12.362551 +epoch: 2, batch: 28178, sum loss: 3549.167236, avg loss: 2.262057, ppl: 9.602820 +epoch: 2, batch: 28179, sum loss: 4228.854492, avg loss: 2.584874, ppl: 13.261623 +epoch: 2, batch: 28180, sum loss: 3926.408203, avg loss: 2.554592, ppl: 12.866054 +epoch: 2, batch: 28181, sum loss: 3556.839355, avg loss: 2.406522, ppl: 11.095304 +epoch: 2, batch: 28182, sum loss: 2919.988770, avg loss: 2.119005, ppl: 8.322852 +epoch: 2, batch: 28183, sum loss: 3250.231201, avg loss: 2.084818, ppl: 8.043129 +epoch: 2, batch: 28184, sum loss: 5752.852539, avg loss: 2.876426, ppl: 17.750727 +epoch: 2, batch: 28185, sum loss: 4274.585938, avg loss: 2.612828, ppl: 13.637561 +epoch: 2, batch: 28186, sum loss: 4028.427979, avg loss: 2.422386, ppl: 11.272726 +epoch: 2, batch: 28187, sum loss: 5637.875000, avg loss: 3.006867, ppl: 20.223932 +epoch: 2, batch: 28188, sum loss: 4126.297852, avg loss: 2.570902, ppl: 13.077617 +epoch: 2, batch: 28189, sum loss: 4199.206543, avg loss: 2.593704, ppl: 13.379236 +epoch: 2, batch: 28190, sum loss: 4464.292480, avg loss: 2.674831, ppl: 14.509896 +epoch: 2, batch: 28191, sum loss: 3676.499512, avg loss: 2.331325, ppl: 10.291570 +epoch: 2, batch: 28192, sum loss: 3414.602783, avg loss: 2.341977, ppl: 10.401782 +epoch: 2, batch: 28193, sum loss: 4283.712891, avg loss: 2.546797, ppl: 12.766151 +epoch: 2, batch: 28194, sum loss: 4223.459473, avg loss: 2.550398, ppl: 12.812203 +epoch: 2, batch: 28195, sum loss: 4105.929199, avg loss: 2.603633, ppl: 13.512740 +epoch: 2, batch: 28196, sum loss: 4234.380371, avg loss: 2.521966, ppl: 12.453053 +epoch: 2, batch: 28197, sum loss: 4310.021484, avg loss: 2.607394, ppl: 13.563651 +epoch: 2, batch: 28198, sum loss: 3421.321289, avg loss: 2.249389, ppl: 9.481944 +epoch: 2, batch: 28199, sum loss: 4180.894043, avg loss: 2.406963, ppl: 11.100195 +epoch: 2, batch: 28200, sum loss: 4381.632324, avg loss: 2.445107, ppl: 11.531789 +epoch: 2, batch: 28201, sum loss: 3925.504150, avg loss: 2.572414, ppl: 13.097408 +epoch: 2, batch: 28202, sum loss: 4376.988770, avg loss: 2.443880, ppl: 11.517638 +epoch: 2, batch: 28203, sum loss: 3675.632812, avg loss: 2.347147, ppl: 10.455699 +epoch: 2, batch: 28204, sum loss: 3448.852539, avg loss: 2.263026, ppl: 9.612136 +epoch: 2, batch: 28205, sum loss: 4106.401367, avg loss: 2.438481, ppl: 11.455622 +epoch: 2, batch: 28206, sum loss: 4058.265137, avg loss: 2.488207, ppl: 12.039665 +epoch: 2, batch: 28207, sum loss: 4358.024414, avg loss: 2.393204, ppl: 10.948517 +epoch: 2, batch: 28208, sum loss: 4885.725098, avg loss: 2.626734, ppl: 13.828532 +epoch: 2, batch: 28209, sum loss: 3499.247559, avg loss: 2.342201, ppl: 10.404106 +epoch: 2, batch: 28210, sum loss: 4437.561035, avg loss: 2.660408, ppl: 14.302127 +epoch: 2, batch: 28211, sum loss: 5184.337402, avg loss: 2.715735, ppl: 15.115712 +epoch: 2, batch: 28212, sum loss: 4194.050781, avg loss: 2.685052, ppl: 14.658959 +epoch: 2, batch: 28213, sum loss: 4330.979492, avg loss: 2.446881, ppl: 11.552259 +epoch: 2, batch: 28214, sum loss: 3838.638916, avg loss: 2.334939, ppl: 10.328825 +epoch: 2, batch: 28215, sum loss: 3934.983887, avg loss: 2.484207, ppl: 11.991606 +epoch: 2, batch: 28216, sum loss: 4416.818848, avg loss: 2.453788, ppl: 11.632330 +epoch: 2, batch: 28217, sum loss: 4597.156250, avg loss: 2.516232, ppl: 12.381857 +epoch: 2, batch: 28218, sum loss: 4361.519531, avg loss: 2.531352, ppl: 12.570491 +epoch: 2, batch: 28219, sum loss: 2785.991455, avg loss: 1.968899, ppl: 7.162783 +epoch: 2, batch: 28220, sum loss: 4237.069336, avg loss: 2.601025, ppl: 13.477540 +epoch: 2, batch: 28221, sum loss: 5232.241699, avg loss: 2.816061, ppl: 16.710897 +epoch: 2, batch: 28222, sum loss: 4623.703125, avg loss: 2.551713, ppl: 12.829055 +epoch: 2, batch: 28223, sum loss: 4152.208984, avg loss: 2.498321, ppl: 12.162054 +epoch: 2, batch: 28224, sum loss: 3380.425049, avg loss: 2.507734, ppl: 12.277074 +epoch: 2, batch: 28225, sum loss: 3614.529541, avg loss: 2.342534, ppl: 10.407574 +epoch: 2, batch: 28226, sum loss: 3918.277588, avg loss: 2.386284, ppl: 10.873011 +epoch: 2, batch: 28227, sum loss: 5732.903809, avg loss: 2.743016, ppl: 15.533768 +epoch: 2, batch: 28228, sum loss: 3774.835449, avg loss: 2.276740, ppl: 9.744864 +epoch: 2, batch: 28229, sum loss: 3524.549316, avg loss: 2.058732, ppl: 7.836028 +epoch: 2, batch: 28230, sum loss: 4164.656250, avg loss: 2.390733, ppl: 10.921492 +epoch: 2, batch: 28231, sum loss: 4546.153320, avg loss: 2.433701, ppl: 11.400997 +epoch: 2, batch: 28232, sum loss: 4638.370117, avg loss: 2.626484, ppl: 13.825071 +epoch: 2, batch: 28233, sum loss: 4399.122070, avg loss: 2.433143, ppl: 11.394635 +epoch: 2, batch: 28234, sum loss: 4009.694824, avg loss: 2.447921, ppl: 11.564280 +epoch: 2, batch: 28235, sum loss: 4104.368652, avg loss: 2.535126, ppl: 12.618017 +epoch: 2, batch: 28236, sum loss: 4714.823242, avg loss: 2.425321, ppl: 11.305857 +epoch: 2, batch: 28237, sum loss: 4833.752930, avg loss: 2.768473, ppl: 15.934278 +epoch: 2, batch: 28238, sum loss: 4168.572266, avg loss: 2.349815, ppl: 10.483631 +epoch: 2, batch: 28239, sum loss: 4284.471680, avg loss: 2.463756, ppl: 11.748858 +epoch: 2, batch: 28240, sum loss: 5108.005371, avg loss: 2.670154, ppl: 14.442198 +epoch: 2, batch: 28241, sum loss: 3770.876221, avg loss: 2.346532, ppl: 10.449265 +epoch: 2, batch: 28242, sum loss: 2916.911133, avg loss: 2.199782, ppl: 9.023047 +epoch: 2, batch: 28243, sum loss: 3718.180664, avg loss: 2.283895, ppl: 9.814833 +epoch: 2, batch: 28244, sum loss: 4385.682129, avg loss: 2.667690, ppl: 14.406649 +epoch: 2, batch: 28245, sum loss: 4421.946289, avg loss: 2.630545, ppl: 13.881331 +epoch: 2, batch: 28246, sum loss: 4699.495605, avg loss: 2.646112, ppl: 14.099121 +epoch: 2, batch: 28247, sum loss: 4521.594238, avg loss: 2.492610, ppl: 12.092797 +epoch: 2, batch: 28248, sum loss: 4213.690430, avg loss: 2.372574, ppl: 10.724958 +epoch: 2, batch: 28249, sum loss: 4656.997070, avg loss: 2.635539, ppl: 13.950828 +epoch: 2, batch: 28250, sum loss: 4173.796875, avg loss: 2.468242, ppl: 11.801681 +epoch: 2, batch: 28251, sum loss: 4176.162109, avg loss: 2.327850, ppl: 10.255864 +epoch: 2, batch: 28252, sum loss: 3806.236572, avg loss: 2.336548, ppl: 10.345461 +epoch: 2, batch: 28253, sum loss: 3404.778809, avg loss: 2.375980, ppl: 10.761551 +epoch: 2, batch: 28254, sum loss: 3138.423828, avg loss: 2.079804, ppl: 8.002898 +epoch: 2, batch: 28255, sum loss: 4377.889160, avg loss: 2.318797, ppl: 10.163442 +epoch: 2, batch: 28256, sum loss: 4001.631592, avg loss: 2.345622, ppl: 10.439768 +epoch: 2, batch: 28257, sum loss: 4104.895020, avg loss: 2.480299, ppl: 11.944839 +epoch: 2, batch: 28258, sum loss: 3202.591797, avg loss: 2.260121, ppl: 9.584252 +epoch: 2, batch: 28259, sum loss: 3647.222168, avg loss: 2.416980, ppl: 11.211946 +epoch: 2, batch: 28260, sum loss: 3557.885010, avg loss: 2.276318, ppl: 9.740749 +epoch: 2, batch: 28261, sum loss: 4189.215332, avg loss: 2.538918, ppl: 12.665965 +epoch: 2, batch: 28262, sum loss: 4594.887207, avg loss: 2.722090, ppl: 15.212079 +epoch: 2, batch: 28263, sum loss: 3442.824707, avg loss: 2.323094, ppl: 10.207203 +epoch: 2, batch: 28264, sum loss: 4127.450195, avg loss: 2.419373, ppl: 11.238808 +epoch: 2, batch: 28265, sum loss: 4243.979980, avg loss: 2.420981, ppl: 11.256899 +epoch: 2, batch: 28266, sum loss: 4634.172363, avg loss: 2.455841, ppl: 11.656233 +epoch: 2, batch: 28267, sum loss: 4342.171875, avg loss: 2.408304, ppl: 11.115093 +epoch: 2, batch: 28268, sum loss: 4079.038574, avg loss: 2.460216, ppl: 11.707343 +epoch: 2, batch: 28269, sum loss: 4113.219727, avg loss: 2.306910, ppl: 10.043339 +epoch: 2, batch: 28270, sum loss: 3758.741211, avg loss: 2.225424, ppl: 9.257407 +epoch: 2, batch: 28271, sum loss: 5255.709473, avg loss: 2.764708, ppl: 15.874401 +epoch: 2, batch: 28272, sum loss: 2989.193848, avg loss: 2.060092, ppl: 7.846694 +epoch: 2, batch: 28273, sum loss: 4059.198975, avg loss: 2.411883, ppl: 11.154944 +epoch: 2, batch: 28274, sum loss: 4859.047852, avg loss: 2.576378, ppl: 13.149420 +epoch: 2, batch: 28275, sum loss: 3982.352051, avg loss: 2.391803, ppl: 10.933189 +epoch: 2, batch: 28276, sum loss: 4723.083008, avg loss: 2.462504, ppl: 11.734159 +epoch: 2, batch: 28277, sum loss: 3857.838623, avg loss: 2.468227, ppl: 11.801503 +epoch: 2, batch: 28278, sum loss: 3774.045898, avg loss: 2.628166, ppl: 13.848345 +epoch: 2, batch: 28279, sum loss: 4689.589844, avg loss: 2.616959, ppl: 13.694012 +epoch: 2, batch: 28280, sum loss: 4180.402344, avg loss: 2.424828, ppl: 11.300281 +epoch: 2, batch: 28281, sum loss: 3947.345703, avg loss: 2.323335, ppl: 10.209666 +epoch: 2, batch: 28282, sum loss: 2917.211914, avg loss: 2.200009, ppl: 9.025096 +epoch: 2, batch: 28283, sum loss: 4776.514648, avg loss: 2.987189, ppl: 19.829853 +epoch: 2, batch: 28284, sum loss: 4456.662598, avg loss: 2.414227, ppl: 11.181122 +epoch: 2, batch: 28285, sum loss: 3980.592041, avg loss: 2.551662, ppl: 12.828403 +epoch: 2, batch: 28286, sum loss: 4450.689941, avg loss: 2.636665, ppl: 13.966545 +epoch: 2, batch: 28287, sum loss: 4650.973633, avg loss: 2.562520, ppl: 12.968457 +epoch: 2, batch: 28288, sum loss: 4290.748047, avg loss: 2.461703, ppl: 11.724760 +epoch: 2, batch: 28289, sum loss: 5241.566895, avg loss: 2.510329, ppl: 12.308979 +epoch: 2, batch: 28290, sum loss: 3622.500488, avg loss: 2.589350, ppl: 13.321110 +epoch: 2, batch: 28291, sum loss: 3114.876953, avg loss: 1.946798, ppl: 7.006218 +epoch: 2, batch: 28292, sum loss: 4995.688965, avg loss: 2.427449, ppl: 11.329937 +epoch: 2, batch: 28293, sum loss: 3198.205811, avg loss: 2.038372, ppl: 7.678099 +epoch: 2, batch: 28294, sum loss: 4074.969238, avg loss: 2.390011, ppl: 10.913617 +epoch: 2, batch: 28295, sum loss: 3766.710938, avg loss: 2.394603, ppl: 10.963847 +epoch: 2, batch: 28296, sum loss: 5483.251465, avg loss: 2.682608, ppl: 14.623186 +epoch: 2, batch: 28297, sum loss: 3965.566895, avg loss: 2.610643, ppl: 13.607800 +epoch: 2, batch: 28298, sum loss: 4663.804688, avg loss: 2.738582, ppl: 15.465034 +epoch: 2, batch: 28299, sum loss: 4283.520508, avg loss: 2.315417, ppl: 10.129142 +epoch: 2, batch: 28300, sum loss: 4383.923340, avg loss: 2.483809, ppl: 11.986838 +epoch: 2, batch: 28301, sum loss: 4842.858887, avg loss: 2.722237, ppl: 15.214314 +epoch: 2, batch: 28302, sum loss: 3515.343750, avg loss: 2.470375, ppl: 11.826881 +epoch: 2, batch: 28303, sum loss: 4762.416504, avg loss: 2.675515, ppl: 14.519825 +epoch: 2, batch: 28304, sum loss: 4733.346680, avg loss: 2.532556, ppl: 12.585629 +epoch: 2, batch: 28305, sum loss: 4371.613770, avg loss: 2.359209, ppl: 10.582573 +epoch: 2, batch: 28306, sum loss: 4446.831543, avg loss: 2.728117, ppl: 15.304049 +epoch: 2, batch: 28307, sum loss: 3207.185303, avg loss: 2.103072, ppl: 8.191298 +epoch: 2, batch: 28308, sum loss: 3932.563965, avg loss: 2.421530, ppl: 11.263073 +epoch: 2, batch: 28309, sum loss: 4371.734863, avg loss: 2.603773, ppl: 13.514631 +epoch: 2, batch: 28310, sum loss: 4808.043945, avg loss: 2.587752, ppl: 13.299845 +epoch: 2, batch: 28311, sum loss: 3834.572266, avg loss: 2.396608, ppl: 10.985846 +epoch: 2, batch: 28312, sum loss: 4299.919434, avg loss: 2.293290, ppl: 9.907483 +epoch: 2, batch: 28313, sum loss: 3888.812988, avg loss: 2.377025, ppl: 10.772807 +epoch: 2, batch: 28314, sum loss: 4357.186523, avg loss: 2.442369, ppl: 11.500256 +epoch: 2, batch: 28315, sum loss: 4252.097168, avg loss: 2.510093, ppl: 12.306071 +epoch: 2, batch: 28316, sum loss: 3861.742676, avg loss: 2.386738, ppl: 10.877955 +epoch: 2, batch: 28317, sum loss: 3682.196045, avg loss: 2.304253, ppl: 10.016691 +epoch: 2, batch: 28318, sum loss: 4032.671875, avg loss: 2.448495, ppl: 11.570924 +epoch: 2, batch: 28319, sum loss: 3125.742676, avg loss: 2.474856, ppl: 11.879992 +epoch: 2, batch: 28320, sum loss: 3792.787109, avg loss: 2.402018, ppl: 11.045447 +epoch: 2, batch: 28321, sum loss: 4022.532715, avg loss: 2.280347, ppl: 9.780077 +epoch: 2, batch: 28322, sum loss: 4671.052734, avg loss: 2.426521, ppl: 11.319431 +epoch: 2, batch: 28323, sum loss: 3717.373779, avg loss: 2.373802, ppl: 10.738141 +epoch: 2, batch: 28324, sum loss: 4258.442871, avg loss: 2.548440, ppl: 12.787140 +epoch: 2, batch: 28325, sum loss: 4530.238281, avg loss: 2.217444, ppl: 9.183826 +epoch: 2, batch: 28326, sum loss: 3411.243896, avg loss: 2.331677, ppl: 10.295197 +epoch: 2, batch: 28327, sum loss: 4782.803223, avg loss: 2.614983, ppl: 13.666982 +epoch: 2, batch: 28328, sum loss: 4167.971191, avg loss: 2.671777, ppl: 14.465645 +epoch: 2, batch: 28329, sum loss: 3752.606201, avg loss: 2.390195, ppl: 10.915624 +epoch: 2, batch: 28330, sum loss: 4554.122559, avg loss: 2.539946, ppl: 12.678985 +epoch: 2, batch: 28331, sum loss: 3651.425293, avg loss: 2.156778, ppl: 8.643245 +epoch: 2, batch: 28332, sum loss: 3626.983887, avg loss: 2.367483, ppl: 10.670500 +epoch: 2, batch: 28333, sum loss: 3661.269531, avg loss: 2.324615, ppl: 10.222749 +epoch: 2, batch: 28334, sum loss: 3642.183350, avg loss: 2.420055, ppl: 11.246482 +epoch: 2, batch: 28335, sum loss: 3596.162598, avg loss: 2.402246, ppl: 11.047962 +epoch: 2, batch: 28336, sum loss: 4079.383057, avg loss: 2.378649, ppl: 10.790316 +epoch: 2, batch: 28337, sum loss: 4311.784668, avg loss: 2.461064, ppl: 11.717276 +epoch: 2, batch: 28338, sum loss: 4039.232422, avg loss: 2.499525, ppl: 12.176709 +epoch: 2, batch: 28339, sum loss: 3970.495361, avg loss: 2.743950, ppl: 15.548278 +epoch: 2, batch: 28340, sum loss: 3872.052246, avg loss: 2.317207, ppl: 10.147292 +epoch: 2, batch: 28341, sum loss: 5384.796875, avg loss: 2.535215, ppl: 12.619145 +epoch: 2, batch: 28342, sum loss: 3634.349121, avg loss: 2.364573, ppl: 10.639498 +epoch: 2, batch: 28343, sum loss: 5460.297852, avg loss: 2.993584, ppl: 19.957088 +epoch: 2, batch: 28344, sum loss: 3641.934082, avg loss: 2.319703, ppl: 10.172656 +epoch: 2, batch: 28345, sum loss: 4538.462891, avg loss: 2.587493, ppl: 13.296395 +epoch: 2, batch: 28346, sum loss: 4735.962891, avg loss: 2.651715, ppl: 14.178334 +epoch: 2, batch: 28347, sum loss: 4103.086914, avg loss: 2.524976, ppl: 12.490602 +epoch: 2, batch: 28348, sum loss: 5141.413574, avg loss: 2.464724, ppl: 11.760231 +epoch: 2, batch: 28349, sum loss: 4113.151855, avg loss: 2.501917, ppl: 12.205872 +epoch: 2, batch: 28350, sum loss: 3840.412109, avg loss: 2.372089, ppl: 10.719764 +epoch: 2, batch: 28351, sum loss: 4017.111572, avg loss: 2.583352, ppl: 13.241444 +epoch: 2, batch: 28352, sum loss: 3664.130371, avg loss: 2.311754, ppl: 10.092113 +epoch: 2, batch: 28353, sum loss: 4346.704102, avg loss: 2.620075, ppl: 13.736751 +epoch: 2, batch: 28354, sum loss: 5577.747070, avg loss: 2.702397, ppl: 14.915440 +epoch: 2, batch: 28355, sum loss: 4188.463379, avg loss: 2.518619, ppl: 12.411446 +epoch: 2, batch: 28356, sum loss: 4170.722168, avg loss: 2.479621, ppl: 11.936736 +epoch: 2, batch: 28357, sum loss: 3796.080566, avg loss: 2.381481, ppl: 10.820916 +epoch: 2, batch: 28358, sum loss: 3573.963135, avg loss: 2.276410, ppl: 9.741641 +epoch: 2, batch: 28359, sum loss: 5258.734863, avg loss: 2.571508, ppl: 13.085547 +epoch: 2, batch: 28360, sum loss: 4459.661133, avg loss: 2.662484, ppl: 14.331851 +epoch: 2, batch: 28361, sum loss: 4580.668945, avg loss: 2.446939, ppl: 11.552926 +epoch: 2, batch: 28362, sum loss: 4679.574219, avg loss: 2.562746, ppl: 12.971389 +epoch: 2, batch: 28363, sum loss: 4121.087402, avg loss: 2.458883, ppl: 11.691742 +epoch: 2, batch: 28364, sum loss: 3958.232910, avg loss: 2.626565, ppl: 13.826192 +epoch: 2, batch: 28365, sum loss: 4227.782227, avg loss: 2.492796, ppl: 12.095049 +epoch: 2, batch: 28366, sum loss: 4465.539062, avg loss: 2.587218, ppl: 13.292744 +epoch: 2, batch: 28367, sum loss: 3551.255127, avg loss: 2.282298, ppl: 9.799170 +epoch: 2, batch: 28368, sum loss: 4325.372070, avg loss: 2.547333, ppl: 12.772999 +epoch: 2, batch: 28369, sum loss: 4211.069336, avg loss: 2.764983, ppl: 15.878773 +epoch: 2, batch: 28370, sum loss: 4389.923828, avg loss: 2.466249, ppl: 11.778190 +epoch: 2, batch: 28371, sum loss: 4510.703613, avg loss: 2.786105, ppl: 16.217728 +epoch: 2, batch: 28372, sum loss: 3919.904541, avg loss: 2.424183, ppl: 11.293003 +epoch: 2, batch: 28373, sum loss: 4770.003418, avg loss: 2.479212, ppl: 11.931856 +epoch: 2, batch: 28374, sum loss: 4020.841309, avg loss: 2.465261, ppl: 11.766559 +epoch: 2, batch: 28375, sum loss: 4760.321289, avg loss: 2.864213, ppl: 17.535244 +epoch: 2, batch: 28376, sum loss: 4327.708496, avg loss: 2.551715, ppl: 12.829085 +epoch: 2, batch: 28377, sum loss: 3733.720459, avg loss: 2.408852, ppl: 11.121188 +epoch: 2, batch: 28378, sum loss: 4356.338867, avg loss: 2.500768, ppl: 12.191859 +epoch: 2, batch: 28379, sum loss: 3747.479492, avg loss: 2.483419, ppl: 11.982163 +epoch: 2, batch: 28380, sum loss: 4794.315918, avg loss: 2.591522, ppl: 13.350078 +epoch: 2, batch: 28381, sum loss: 4896.052734, avg loss: 2.579585, ppl: 13.191667 +epoch: 2, batch: 28382, sum loss: 4352.729492, avg loss: 2.484435, ppl: 11.994340 +epoch: 2, batch: 28383, sum loss: 4912.531738, avg loss: 2.622815, ppl: 13.774440 +epoch: 2, batch: 28384, sum loss: 3677.906738, avg loss: 2.220958, ppl: 9.216158 +epoch: 2, batch: 28385, sum loss: 4703.750000, avg loss: 2.678673, ppl: 14.565756 +epoch: 2, batch: 28386, sum loss: 4345.475098, avg loss: 2.794518, ppl: 16.354744 +epoch: 2, batch: 28387, sum loss: 4018.074219, avg loss: 2.528681, ppl: 12.536960 +epoch: 2, batch: 28388, sum loss: 5904.783203, avg loss: 2.817168, ppl: 16.729399 +epoch: 2, batch: 28389, sum loss: 4567.459961, avg loss: 2.718726, ppl: 15.160997 +epoch: 2, batch: 28390, sum loss: 3695.178711, avg loss: 2.322551, ppl: 10.201666 +epoch: 2, batch: 28391, sum loss: 4540.320801, avg loss: 2.741740, ppl: 15.513948 +epoch: 2, batch: 28392, sum loss: 4427.545898, avg loss: 2.667196, ppl: 14.399540 +epoch: 2, batch: 28393, sum loss: 4983.321777, avg loss: 2.580695, ppl: 13.206312 +epoch: 2, batch: 28394, sum loss: 4305.210938, avg loss: 2.558058, ppl: 12.910714 +epoch: 2, batch: 28395, sum loss: 4310.067383, avg loss: 2.519034, ppl: 12.416601 +epoch: 2, batch: 28396, sum loss: 3439.583740, avg loss: 2.270352, ppl: 9.682812 +epoch: 2, batch: 28397, sum loss: 4266.982910, avg loss: 2.492397, ppl: 12.090217 +epoch: 2, batch: 28398, sum loss: 3963.582520, avg loss: 2.572085, ppl: 13.093091 +epoch: 2, batch: 28399, sum loss: 4004.047607, avg loss: 2.360877, ppl: 10.600246 +epoch: 2, batch: 28400, sum loss: 4609.112305, avg loss: 2.644356, ppl: 14.074375 +epoch: 2, batch: 28401, sum loss: 4140.801758, avg loss: 2.289001, ppl: 9.865072 +epoch: 2, batch: 28402, sum loss: 3674.006348, avg loss: 2.180419, ppl: 8.850016 +epoch: 2, batch: 28403, sum loss: 5221.698242, avg loss: 2.698552, ppl: 14.858200 +epoch: 2, batch: 28404, sum loss: 4077.388184, avg loss: 2.427017, ppl: 11.325048 +epoch: 2, batch: 28405, sum loss: 3665.516113, avg loss: 2.361802, ppl: 10.610052 +epoch: 2, batch: 28406, sum loss: 4517.287109, avg loss: 2.568100, ppl: 13.041019 +epoch: 2, batch: 28407, sum loss: 3682.376953, avg loss: 2.292887, ppl: 9.903490 +epoch: 2, batch: 28408, sum loss: 4437.181641, avg loss: 2.548640, ppl: 12.789695 +epoch: 2, batch: 28409, sum loss: 3119.698975, avg loss: 2.107905, ppl: 8.230977 +epoch: 2, batch: 28410, sum loss: 4644.916992, avg loss: 2.649696, ppl: 14.149738 +epoch: 2, batch: 28411, sum loss: 4499.554688, avg loss: 2.479094, ppl: 11.930445 +epoch: 2, batch: 28412, sum loss: 4006.733887, avg loss: 2.504209, ppl: 12.233872 +epoch: 2, batch: 28413, sum loss: 3679.827393, avg loss: 2.366449, ppl: 10.659470 +epoch: 2, batch: 28414, sum loss: 4464.653809, avg loss: 2.454455, ppl: 11.640090 +epoch: 2, batch: 28415, sum loss: 4210.453613, avg loss: 2.341743, ppl: 10.399344 +epoch: 2, batch: 28416, sum loss: 4351.884277, avg loss: 2.533111, ppl: 12.592619 +epoch: 2, batch: 28417, sum loss: 4559.144531, avg loss: 2.639922, ppl: 14.012106 +epoch: 2, batch: 28418, sum loss: 3515.612793, avg loss: 2.340621, ppl: 10.387685 +epoch: 2, batch: 28419, sum loss: 4529.395020, avg loss: 2.654979, ppl: 14.224694 +epoch: 2, batch: 28420, sum loss: 3990.963623, avg loss: 2.349008, ppl: 10.475169 +epoch: 2, batch: 28421, sum loss: 5006.166016, avg loss: 2.761261, ppl: 15.819778 +epoch: 2, batch: 28422, sum loss: 4201.882812, avg loss: 2.420439, ppl: 11.250803 +epoch: 2, batch: 28423, sum loss: 4019.332275, avg loss: 2.553578, ppl: 12.853011 +epoch: 2, batch: 28424, sum loss: 4687.828613, avg loss: 2.675701, ppl: 14.522532 +epoch: 2, batch: 28425, sum loss: 4480.240234, avg loss: 2.404852, ppl: 11.076796 +epoch: 2, batch: 28426, sum loss: 4380.342285, avg loss: 2.430823, ppl: 11.368230 +epoch: 2, batch: 28427, sum loss: 4114.613281, avg loss: 2.364720, ppl: 10.641063 +epoch: 2, batch: 28428, sum loss: 4841.549316, avg loss: 2.450177, ppl: 11.590398 +epoch: 2, batch: 28429, sum loss: 3979.868652, avg loss: 2.518904, ppl: 12.414985 +epoch: 2, batch: 28430, sum loss: 4400.208008, avg loss: 2.498698, ppl: 12.166649 +epoch: 2, batch: 28431, sum loss: 3230.530518, avg loss: 2.172515, ppl: 8.780342 +epoch: 2, batch: 28432, sum loss: 4515.118164, avg loss: 2.759852, ppl: 15.797511 +epoch: 2, batch: 28433, sum loss: 4330.567383, avg loss: 2.506115, ppl: 12.257223 +epoch: 2, batch: 28434, sum loss: 5188.438965, avg loss: 2.770122, ppl: 15.960586 +epoch: 2, batch: 28435, sum loss: 3114.261963, avg loss: 2.006612, ppl: 7.438076 +epoch: 2, batch: 28436, sum loss: 5032.782715, avg loss: 2.587549, ppl: 13.297140 +epoch: 2, batch: 28437, sum loss: 4371.208984, avg loss: 2.466822, ppl: 11.784940 +epoch: 2, batch: 28438, sum loss: 4231.629395, avg loss: 2.520327, ppl: 12.432663 +epoch: 2, batch: 28439, sum loss: 4499.389648, avg loss: 2.614404, ppl: 13.659076 +epoch: 2, batch: 28440, sum loss: 3911.199707, avg loss: 2.528248, ppl: 12.531532 +epoch: 2, batch: 28441, sum loss: 4462.635254, avg loss: 2.514161, ppl: 12.356233 +epoch: 2, batch: 28442, sum loss: 3112.187500, avg loss: 2.338233, ppl: 10.362905 +epoch: 2, batch: 28443, sum loss: 3824.617920, avg loss: 2.227500, ppl: 9.276648 +epoch: 2, batch: 28444, sum loss: 4666.803711, avg loss: 2.615921, ppl: 13.679816 +epoch: 2, batch: 28445, sum loss: 3203.570312, avg loss: 2.298114, ppl: 9.955385 +epoch: 2, batch: 28446, sum loss: 4902.758301, avg loss: 2.476140, ppl: 11.895266 +epoch: 2, batch: 28447, sum loss: 3696.247314, avg loss: 2.246959, ppl: 9.458925 +epoch: 2, batch: 28448, sum loss: 3991.111328, avg loss: 2.441047, ppl: 11.485056 +epoch: 2, batch: 28449, sum loss: 4077.357422, avg loss: 2.502982, ppl: 12.218875 +epoch: 2, batch: 28450, sum loss: 3815.957520, avg loss: 2.352625, ppl: 10.513129 +epoch: 2, batch: 28451, sum loss: 5662.934570, avg loss: 2.852864, ppl: 17.337362 +epoch: 2, batch: 28452, sum loss: 3953.857666, avg loss: 2.376117, ppl: 10.763023 +epoch: 2, batch: 28453, sum loss: 4159.398438, avg loss: 2.688687, ppl: 14.712343 +epoch: 2, batch: 28454, sum loss: 4591.593262, avg loss: 2.579547, ppl: 13.191157 +epoch: 2, batch: 28455, sum loss: 4581.803711, avg loss: 2.495536, ppl: 12.128228 +epoch: 2, batch: 28456, sum loss: 4939.935059, avg loss: 2.709784, ppl: 15.026023 +epoch: 2, batch: 28457, sum loss: 3857.922852, avg loss: 2.333892, ppl: 10.318017 +epoch: 2, batch: 28458, sum loss: 4266.556152, avg loss: 2.551768, ppl: 12.829771 +epoch: 2, batch: 28459, sum loss: 3387.381348, avg loss: 2.206763, ppl: 9.086257 +epoch: 2, batch: 28460, sum loss: 4173.810059, avg loss: 2.430874, ppl: 11.368810 +epoch: 2, batch: 28461, sum loss: 3760.210205, avg loss: 2.726766, ppl: 15.283379 +epoch: 2, batch: 28462, sum loss: 4188.047852, avg loss: 2.422237, ppl: 11.271047 +epoch: 2, batch: 28463, sum loss: 5137.771973, avg loss: 2.860675, ppl: 17.473314 +epoch: 2, batch: 28464, sum loss: 3832.723633, avg loss: 2.416598, ppl: 11.207662 +epoch: 2, batch: 28465, sum loss: 3871.823730, avg loss: 2.335238, ppl: 10.331914 +epoch: 2, batch: 28466, sum loss: 4160.937500, avg loss: 2.532524, ppl: 12.585236 +epoch: 2, batch: 28467, sum loss: 4030.460938, avg loss: 2.393385, ppl: 10.950503 +epoch: 2, batch: 28468, sum loss: 4211.413574, avg loss: 2.310156, ppl: 10.075993 +epoch: 2, batch: 28469, sum loss: 4800.962891, avg loss: 2.768721, ppl: 15.938241 +epoch: 2, batch: 28470, sum loss: 3926.112793, avg loss: 2.365128, ppl: 10.645404 +epoch: 2, batch: 28471, sum loss: 3072.575439, avg loss: 2.481886, ppl: 11.963812 +epoch: 2, batch: 28472, sum loss: 4583.884277, avg loss: 2.439534, ppl: 11.467695 +epoch: 2, batch: 28473, sum loss: 3696.760254, avg loss: 2.165648, ppl: 8.720246 +epoch: 2, batch: 28474, sum loss: 4327.722168, avg loss: 2.388368, ppl: 10.895694 +epoch: 2, batch: 28475, sum loss: 3264.348633, avg loss: 2.311862, ppl: 10.093196 +epoch: 2, batch: 28476, sum loss: 4134.549316, avg loss: 2.691764, ppl: 14.757684 +epoch: 2, batch: 28477, sum loss: 3843.143066, avg loss: 2.362104, ppl: 10.613257 +epoch: 2, batch: 28478, sum loss: 4093.075928, avg loss: 2.503410, ppl: 12.224112 +epoch: 2, batch: 28479, sum loss: 4472.980469, avg loss: 2.680036, ppl: 14.585623 +epoch: 2, batch: 28480, sum loss: 4216.404297, avg loss: 2.610777, ppl: 13.609617 +epoch: 2, batch: 28481, sum loss: 4024.906250, avg loss: 2.451222, ppl: 11.602515 +epoch: 2, batch: 28482, sum loss: 4671.786133, avg loss: 2.576827, ppl: 13.155324 +epoch: 2, batch: 28483, sum loss: 3732.852295, avg loss: 2.493555, ppl: 12.104234 +epoch: 2, batch: 28484, sum loss: 5325.621094, avg loss: 2.800011, ppl: 16.444830 +epoch: 2, batch: 28485, sum loss: 3753.006348, avg loss: 2.528980, ppl: 12.540709 +epoch: 2, batch: 28486, sum loss: 4203.525879, avg loss: 2.481420, ppl: 11.958237 +epoch: 2, batch: 28487, sum loss: 4796.252441, avg loss: 2.782049, ppl: 16.152086 +epoch: 2, batch: 28488, sum loss: 3826.118896, avg loss: 2.476453, ppl: 11.898979 +epoch: 2, batch: 28489, sum loss: 4550.258301, avg loss: 2.478354, ppl: 11.921628 +epoch: 2, batch: 28490, sum loss: 3833.970215, avg loss: 2.394735, ppl: 10.965290 +epoch: 2, batch: 28491, sum loss: 4869.697754, avg loss: 2.728122, ppl: 15.304119 +epoch: 2, batch: 28492, sum loss: 3516.130615, avg loss: 2.400089, ppl: 11.024158 +epoch: 2, batch: 28493, sum loss: 5048.511719, avg loss: 2.698296, ppl: 14.854400 +epoch: 2, batch: 28494, sum loss: 4237.995605, avg loss: 2.424483, ppl: 11.296386 +epoch: 2, batch: 28495, sum loss: 4113.230469, avg loss: 2.316008, ppl: 10.135135 +epoch: 2, batch: 28496, sum loss: 5031.669922, avg loss: 2.733118, ppl: 15.380767 +epoch: 2, batch: 28497, sum loss: 5149.150879, avg loss: 2.615110, ppl: 13.668715 +epoch: 2, batch: 28498, sum loss: 3784.734863, avg loss: 2.513104, ppl: 12.343186 +epoch: 2, batch: 28499, sum loss: 4033.883057, avg loss: 2.530667, ppl: 12.561880 +epoch: 2, batch: 28500, sum loss: 4120.006836, avg loss: 2.639338, ppl: 14.003934 +epoch: 2, batch: 28501, sum loss: 5414.279785, avg loss: 2.731725, ppl: 15.359366 +epoch: 2, batch: 28502, sum loss: 3564.665771, avg loss: 2.485820, ppl: 12.010963 +epoch: 2, batch: 28503, sum loss: 4530.661621, avg loss: 2.649510, ppl: 14.147100 +epoch: 2, batch: 28504, sum loss: 4444.913574, avg loss: 2.591786, ppl: 13.353601 +epoch: 2, batch: 28505, sum loss: 3799.134033, avg loss: 2.438469, ppl: 11.455485 +epoch: 2, batch: 28506, sum loss: 3983.353760, avg loss: 2.355620, ppl: 10.544668 +epoch: 2, batch: 28507, sum loss: 3744.966064, avg loss: 2.370232, ppl: 10.699870 +epoch: 2, batch: 28508, sum loss: 5310.531738, avg loss: 2.532443, ppl: 12.584207 +epoch: 2, batch: 28509, sum loss: 4853.505371, avg loss: 2.541102, ppl: 12.693657 +epoch: 2, batch: 28510, sum loss: 4040.022949, avg loss: 2.473988, ppl: 11.869693 +epoch: 2, batch: 28511, sum loss: 4511.595215, avg loss: 2.604847, ppl: 13.529158 +epoch: 2, batch: 28512, sum loss: 3674.412354, avg loss: 2.296508, ppl: 9.939409 +epoch: 2, batch: 28513, sum loss: 4202.417480, avg loss: 2.651367, ppl: 14.173407 +epoch: 2, batch: 28514, sum loss: 4216.260742, avg loss: 2.533811, ppl: 12.601434 +epoch: 2, batch: 28515, sum loss: 4293.910645, avg loss: 2.551343, ppl: 12.824318 +epoch: 2, batch: 28516, sum loss: 5299.932617, avg loss: 2.640724, ppl: 14.023352 +epoch: 2, batch: 28517, sum loss: 3032.827637, avg loss: 1.984835, ppl: 7.277846 +epoch: 2, batch: 28518, sum loss: 5775.716309, avg loss: 2.730835, ppl: 15.345698 +epoch: 2, batch: 28519, sum loss: 3855.010986, avg loss: 2.344897, ppl: 10.432201 +epoch: 2, batch: 28520, sum loss: 4182.208008, avg loss: 2.567347, ppl: 13.031204 +epoch: 2, batch: 28521, sum loss: 4870.755371, avg loss: 2.455018, ppl: 11.646641 +epoch: 2, batch: 28522, sum loss: 3268.252197, avg loss: 2.092351, ppl: 8.103945 +epoch: 2, batch: 28523, sum loss: 3701.343994, avg loss: 2.354545, ppl: 10.533331 +epoch: 2, batch: 28524, sum loss: 4232.097168, avg loss: 2.583698, ppl: 13.246029 +epoch: 2, batch: 28525, sum loss: 4392.180176, avg loss: 2.556566, ppl: 12.891472 +epoch: 2, batch: 28526, sum loss: 4648.496582, avg loss: 2.526357, ppl: 12.507854 +epoch: 2, batch: 28527, sum loss: 4922.541016, avg loss: 2.629563, ppl: 13.867703 +epoch: 2, batch: 28528, sum loss: 3513.145752, avg loss: 2.098654, ppl: 8.155182 +epoch: 2, batch: 28529, sum loss: 3973.262451, avg loss: 2.376353, ppl: 10.765572 +epoch: 2, batch: 28530, sum loss: 3918.928467, avg loss: 2.752057, ppl: 15.674835 +epoch: 2, batch: 28531, sum loss: 4102.654297, avg loss: 2.393614, ppl: 10.953008 +epoch: 2, batch: 28532, sum loss: 3735.557129, avg loss: 2.604991, ppl: 13.531103 +epoch: 2, batch: 28533, sum loss: 4741.701172, avg loss: 2.516827, ppl: 12.389218 +epoch: 2, batch: 28534, sum loss: 4028.905273, avg loss: 2.516493, ppl: 12.385087 +epoch: 2, batch: 28535, sum loss: 4587.368164, avg loss: 2.645541, ppl: 14.091065 +epoch: 2, batch: 28536, sum loss: 3546.866699, avg loss: 2.273633, ppl: 9.714625 +epoch: 2, batch: 28537, sum loss: 3634.292480, avg loss: 2.447335, ppl: 11.557505 +epoch: 2, batch: 28538, sum loss: 4363.128906, avg loss: 2.448445, ppl: 11.570342 +epoch: 2, batch: 28539, sum loss: 2624.753906, avg loss: 2.041022, ppl: 7.698472 +epoch: 2, batch: 28540, sum loss: 4468.037109, avg loss: 2.351599, ppl: 10.502347 +epoch: 2, batch: 28541, sum loss: 4630.668945, avg loss: 2.617676, ppl: 13.703842 +epoch: 2, batch: 28542, sum loss: 4758.362793, avg loss: 2.475735, ppl: 11.890446 +epoch: 2, batch: 28543, sum loss: 4675.338379, avg loss: 2.450387, ppl: 11.592833 +epoch: 2, batch: 28544, sum loss: 4154.014648, avg loss: 2.474100, ppl: 11.871022 +epoch: 2, batch: 28545, sum loss: 3973.083496, avg loss: 2.563280, ppl: 12.978314 +epoch: 2, batch: 28546, sum loss: 4586.952148, avg loss: 2.512022, ppl: 12.329836 +epoch: 2, batch: 28547, sum loss: 4261.831543, avg loss: 2.462063, ppl: 11.728987 +epoch: 2, batch: 28548, sum loss: 3772.501953, avg loss: 2.432303, ppl: 11.385071 +epoch: 2, batch: 28549, sum loss: 4316.011719, avg loss: 2.538831, ppl: 12.664851 +epoch: 2, batch: 28550, sum loss: 4676.518066, avg loss: 2.460031, ppl: 11.705169 +epoch: 2, batch: 28551, sum loss: 3872.791504, avg loss: 2.286181, ppl: 9.837301 +epoch: 2, batch: 28552, sum loss: 5713.122070, avg loss: 2.846598, ppl: 17.229071 +epoch: 2, batch: 28553, sum loss: 3878.139648, avg loss: 2.588878, ppl: 13.314826 +epoch: 2, batch: 28554, sum loss: 4361.428711, avg loss: 2.708962, ppl: 15.013682 +epoch: 2, batch: 28555, sum loss: 3603.384766, avg loss: 2.270564, ppl: 9.684860 +epoch: 2, batch: 28556, sum loss: 4263.938965, avg loss: 2.551729, ppl: 12.829266 +epoch: 2, batch: 28557, sum loss: 3801.549072, avg loss: 2.499375, ppl: 12.174881 +epoch: 2, batch: 28558, sum loss: 4417.192871, avg loss: 2.516919, ppl: 12.390362 +epoch: 2, batch: 28559, sum loss: 4056.076660, avg loss: 2.308524, ppl: 10.059565 +epoch: 2, batch: 28560, sum loss: 3221.216797, avg loss: 2.140343, ppl: 8.502357 +epoch: 2, batch: 28561, sum loss: 4192.464844, avg loss: 2.368624, ppl: 10.682685 +epoch: 2, batch: 28562, sum loss: 3810.183838, avg loss: 2.583175, ppl: 13.239111 +epoch: 2, batch: 28563, sum loss: 3598.758301, avg loss: 2.487048, ppl: 12.025723 +epoch: 2, batch: 28564, sum loss: 4231.951660, avg loss: 2.571052, ppl: 13.079577 +epoch: 2, batch: 28565, sum loss: 3103.966309, avg loss: 2.117303, ppl: 8.308700 +epoch: 2, batch: 28566, sum loss: 5725.558594, avg loss: 2.655639, ppl: 14.234084 +epoch: 2, batch: 28567, sum loss: 4934.634766, avg loss: 2.887440, ppl: 17.947306 +epoch: 2, batch: 28568, sum loss: 2987.223145, avg loss: 2.271652, ppl: 9.695409 +epoch: 2, batch: 28569, sum loss: 3627.447754, avg loss: 2.543792, ppl: 12.727847 +epoch: 2, batch: 28570, sum loss: 4734.819824, avg loss: 2.574671, ppl: 13.126998 +epoch: 2, batch: 28571, sum loss: 4053.116699, avg loss: 2.431384, ppl: 11.374609 +epoch: 2, batch: 28572, sum loss: 4375.521484, avg loss: 2.356231, ppl: 10.551111 +epoch: 2, batch: 28573, sum loss: 4248.608887, avg loss: 2.333119, ppl: 10.310045 +epoch: 2, batch: 28574, sum loss: 3841.962646, avg loss: 2.496402, ppl: 12.138741 +epoch: 2, batch: 28575, sum loss: 4369.538086, avg loss: 2.441083, ppl: 11.485470 +epoch: 2, batch: 28576, sum loss: 3794.240234, avg loss: 2.468601, ppl: 11.805924 +epoch: 2, batch: 28577, sum loss: 3180.553467, avg loss: 2.099375, ppl: 8.161070 +epoch: 2, batch: 28578, sum loss: 3179.101318, avg loss: 2.120815, ppl: 8.337929 +epoch: 2, batch: 28579, sum loss: 4518.716309, avg loss: 2.484176, ppl: 11.991238 +epoch: 2, batch: 28580, sum loss: 3431.420410, avg loss: 2.110345, ppl: 8.251084 +epoch: 2, batch: 28581, sum loss: 3188.323730, avg loss: 2.157188, ppl: 8.646789 +epoch: 2, batch: 28582, sum loss: 3746.557373, avg loss: 2.324167, ppl: 10.218165 +epoch: 2, batch: 28583, sum loss: 4626.375977, avg loss: 2.479301, ppl: 11.932920 +epoch: 2, batch: 28584, sum loss: 3997.101562, avg loss: 2.429849, ppl: 11.357166 +epoch: 2, batch: 28585, sum loss: 3793.475098, avg loss: 2.376864, ppl: 10.771074 +epoch: 2, batch: 28586, sum loss: 4580.465820, avg loss: 2.496167, ppl: 12.135884 +epoch: 2, batch: 28587, sum loss: 3227.717773, avg loss: 2.175012, ppl: 8.802290 +epoch: 2, batch: 28588, sum loss: 4135.567383, avg loss: 2.576677, ppl: 13.153361 +epoch: 2, batch: 28589, sum loss: 4411.822754, avg loss: 2.718313, ppl: 15.154741 +epoch: 2, batch: 28590, sum loss: 5555.997070, avg loss: 2.613357, ppl: 13.644780 +epoch: 2, batch: 28591, sum loss: 4194.636230, avg loss: 2.562392, ppl: 12.966803 +epoch: 2, batch: 28592, sum loss: 4222.935547, avg loss: 2.513652, ppl: 12.349954 +epoch: 2, batch: 28593, sum loss: 2863.884521, avg loss: 2.031124, ppl: 7.622648 +epoch: 2, batch: 28594, sum loss: 3301.856934, avg loss: 2.278714, ppl: 9.764120 +epoch: 2, batch: 28595, sum loss: 3989.038330, avg loss: 2.568602, ppl: 13.047569 +epoch: 2, batch: 28596, sum loss: 3770.673340, avg loss: 2.488893, ppl: 12.047935 +epoch: 2, batch: 28597, sum loss: 4011.978027, avg loss: 2.490365, ppl: 12.065680 +epoch: 2, batch: 28598, sum loss: 3677.370361, avg loss: 2.234125, ppl: 9.338311 +epoch: 2, batch: 28599, sum loss: 4345.143555, avg loss: 2.662466, ppl: 14.331581 +epoch: 2, batch: 28600, sum loss: 5098.152832, avg loss: 2.670588, ppl: 14.448466 +epoch: 2, batch: 28601, sum loss: 3980.498779, avg loss: 2.362314, ppl: 10.615484 +epoch: 2, batch: 28602, sum loss: 3137.561523, avg loss: 2.288521, ppl: 9.860339 +epoch: 2, batch: 28603, sum loss: 3962.373535, avg loss: 2.583034, ppl: 13.237233 +epoch: 2, batch: 28604, sum loss: 4379.174316, avg loss: 2.450573, ppl: 11.594991 +epoch: 2, batch: 28605, sum loss: 4328.566895, avg loss: 2.477714, ppl: 11.914001 +epoch: 2, batch: 28606, sum loss: 4512.293945, avg loss: 2.449671, ppl: 11.584536 +epoch: 2, batch: 28607, sum loss: 4044.602051, avg loss: 2.493589, ppl: 12.104647 +epoch: 2, batch: 28608, sum loss: 4599.893555, avg loss: 2.506754, ppl: 12.265055 +epoch: 2, batch: 28609, sum loss: 4192.538086, avg loss: 2.278553, ppl: 9.762547 +epoch: 2, batch: 28610, sum loss: 4419.251465, avg loss: 2.696310, ppl: 14.824921 +epoch: 2, batch: 28611, sum loss: 3654.722168, avg loss: 2.379376, ppl: 10.798167 +epoch: 2, batch: 28612, sum loss: 3620.785156, avg loss: 2.165541, ppl: 8.719321 +epoch: 2, batch: 28613, sum loss: 3956.916016, avg loss: 2.448587, ppl: 11.571980 +epoch: 2, batch: 28614, sum loss: 3372.297119, avg loss: 2.314548, ppl: 10.120353 +epoch: 2, batch: 28615, sum loss: 3808.373291, avg loss: 2.361050, ppl: 10.602077 +epoch: 2, batch: 28616, sum loss: 4384.333984, avg loss: 2.665248, ppl: 14.371519 +epoch: 2, batch: 28617, sum loss: 4432.459961, avg loss: 2.538637, ppl: 12.662399 +epoch: 2, batch: 28618, sum loss: 3846.502441, avg loss: 2.352601, ppl: 10.512877 +epoch: 2, batch: 28619, sum loss: 5172.568359, avg loss: 2.729588, ppl: 15.326565 +epoch: 2, batch: 28620, sum loss: 3719.541016, avg loss: 2.339334, ppl: 10.374325 +epoch: 2, batch: 28621, sum loss: 4395.467773, avg loss: 2.613239, ppl: 13.643167 +epoch: 2, batch: 28622, sum loss: 4308.715820, avg loss: 2.608182, ppl: 13.574347 +epoch: 2, batch: 28623, sum loss: 4187.944824, avg loss: 2.439106, ppl: 11.462789 +epoch: 2, batch: 28624, sum loss: 5457.026855, avg loss: 2.552398, ppl: 12.837852 +epoch: 2, batch: 28625, sum loss: 4553.098145, avg loss: 2.438724, ppl: 11.458414 +epoch: 2, batch: 28626, sum loss: 5072.461914, avg loss: 2.586671, ppl: 13.285472 +epoch: 2, batch: 28627, sum loss: 4913.476074, avg loss: 2.621919, ppl: 13.762107 +epoch: 2, batch: 28628, sum loss: 3258.214111, avg loss: 2.149218, ppl: 8.578146 +epoch: 2, batch: 28629, sum loss: 4445.482910, avg loss: 2.627354, ppl: 13.837107 +epoch: 2, batch: 28630, sum loss: 3464.642578, avg loss: 2.575943, ppl: 13.143700 +epoch: 2, batch: 28631, sum loss: 4470.350586, avg loss: 2.500196, ppl: 12.184882 +epoch: 2, batch: 28632, sum loss: 4603.529785, avg loss: 2.439602, ppl: 11.468480 +epoch: 2, batch: 28633, sum loss: 4041.903320, avg loss: 2.657399, ppl: 14.259149 +epoch: 2, batch: 28634, sum loss: 3688.128418, avg loss: 2.429597, ppl: 11.354307 +epoch: 2, batch: 28635, sum loss: 2796.384277, avg loss: 1.962375, ppl: 7.116207 +epoch: 2, batch: 28636, sum loss: 4318.647461, avg loss: 2.483409, ppl: 11.982038 +epoch: 2, batch: 28637, sum loss: 3485.956055, avg loss: 2.161163, ppl: 8.681229 +epoch: 2, batch: 28638, sum loss: 5018.650879, avg loss: 2.736451, ppl: 15.432115 +epoch: 2, batch: 28639, sum loss: 3508.651367, avg loss: 2.257819, ppl: 9.562215 +epoch: 2, batch: 28640, sum loss: 3336.241699, avg loss: 2.152414, ppl: 8.605608 +epoch: 2, batch: 28641, sum loss: 3836.059570, avg loss: 2.484495, ppl: 11.995057 +epoch: 2, batch: 28642, sum loss: 4164.662598, avg loss: 2.459931, ppl: 11.704000 +epoch: 2, batch: 28643, sum loss: 4039.141846, avg loss: 2.487156, ppl: 12.027024 +epoch: 2, batch: 28644, sum loss: 3649.723145, avg loss: 2.365342, ppl: 10.647684 +epoch: 2, batch: 28645, sum loss: 4112.076172, avg loss: 2.586211, ppl: 13.279366 +epoch: 2, batch: 28646, sum loss: 4025.150391, avg loss: 2.565424, ppl: 13.006171 +epoch: 2, batch: 28647, sum loss: 4428.934082, avg loss: 2.631571, ppl: 13.895583 +epoch: 2, batch: 28648, sum loss: 4548.089844, avg loss: 2.598908, ppl: 13.449049 +epoch: 2, batch: 28649, sum loss: 4167.358887, avg loss: 2.464435, ppl: 11.756833 +epoch: 2, batch: 28650, sum loss: 4282.875977, avg loss: 2.650295, ppl: 14.158208 +epoch: 2, batch: 28651, sum loss: 4759.734863, avg loss: 2.549403, ppl: 12.799457 +epoch: 2, batch: 28652, sum loss: 4256.731445, avg loss: 2.595568, ppl: 13.404198 +epoch: 2, batch: 28653, sum loss: 5137.148438, avg loss: 2.581482, ppl: 13.216707 +epoch: 2, batch: 28654, sum loss: 3854.572998, avg loss: 2.275427, ppl: 9.732070 +epoch: 2, batch: 28655, sum loss: 3855.600342, avg loss: 2.655372, ppl: 14.230281 +epoch: 2, batch: 28656, sum loss: 4388.569824, avg loss: 2.494923, ppl: 12.120805 +epoch: 2, batch: 28657, sum loss: 4023.586914, avg loss: 2.709486, ppl: 15.021553 +epoch: 2, batch: 28658, sum loss: 3780.196045, avg loss: 2.423203, ppl: 11.281935 +epoch: 2, batch: 28659, sum loss: 3836.125000, avg loss: 2.423326, ppl: 11.283322 +epoch: 2, batch: 28660, sum loss: 3550.069580, avg loss: 2.300758, ppl: 9.981747 +epoch: 2, batch: 28661, sum loss: 4760.648438, avg loss: 2.474349, ppl: 11.873981 +epoch: 2, batch: 28662, sum loss: 3434.915527, avg loss: 2.296067, ppl: 9.935026 +epoch: 2, batch: 28663, sum loss: 4008.160645, avg loss: 2.388654, ppl: 10.898809 +epoch: 2, batch: 28664, sum loss: 3852.216309, avg loss: 2.292986, ppl: 9.904468 +epoch: 2, batch: 28665, sum loss: 3959.311035, avg loss: 2.331750, ppl: 10.295943 +epoch: 2, batch: 28666, sum loss: 4141.884766, avg loss: 2.383133, ppl: 10.838804 +epoch: 2, batch: 28667, sum loss: 3375.788818, avg loss: 2.079968, ppl: 8.004216 +epoch: 2, batch: 28668, sum loss: 4663.200195, avg loss: 2.592107, ppl: 13.357884 +epoch: 2, batch: 28669, sum loss: 4017.970459, avg loss: 2.442535, ppl: 11.502164 +epoch: 2, batch: 28670, sum loss: 3857.966064, avg loss: 2.328284, ppl: 10.260315 +epoch: 2, batch: 28671, sum loss: 3539.403320, avg loss: 2.511997, ppl: 12.329524 +epoch: 2, batch: 28672, sum loss: 4480.104492, avg loss: 2.476564, ppl: 11.900310 +epoch: 2, batch: 28673, sum loss: 4383.950684, avg loss: 2.453246, ppl: 11.626025 +epoch: 2, batch: 28674, sum loss: 4772.596191, avg loss: 2.747609, ppl: 15.605269 +epoch: 2, batch: 28675, sum loss: 5080.862305, avg loss: 2.499194, ppl: 12.172684 +epoch: 2, batch: 28676, sum loss: 3805.869629, avg loss: 2.396643, ppl: 10.986238 +epoch: 2, batch: 28677, sum loss: 4744.836914, avg loss: 2.428269, ppl: 11.339233 +epoch: 2, batch: 28678, sum loss: 4480.058594, avg loss: 2.679461, ppl: 14.577234 +epoch: 2, batch: 28679, sum loss: 4278.437988, avg loss: 2.631266, ppl: 13.891340 +epoch: 2, batch: 28680, sum loss: 4218.889160, avg loss: 2.543031, ppl: 12.718167 +epoch: 2, batch: 28681, sum loss: 3718.912354, avg loss: 2.314196, ppl: 10.116782 +epoch: 2, batch: 28682, sum loss: 3611.656738, avg loss: 2.337642, ppl: 10.356786 +epoch: 2, batch: 28683, sum loss: 3331.810547, avg loss: 2.186227, ppl: 8.901567 +epoch: 2, batch: 28684, sum loss: 3484.143311, avg loss: 2.275731, ppl: 9.735029 +epoch: 2, batch: 28685, sum loss: 4753.272461, avg loss: 2.594581, ppl: 13.390977 +epoch: 2, batch: 28686, sum loss: 4126.692871, avg loss: 2.331465, ppl: 10.293010 +epoch: 2, batch: 28687, sum loss: 4663.286621, avg loss: 2.676973, ppl: 14.541005 +epoch: 2, batch: 28688, sum loss: 4043.388428, avg loss: 2.372880, ppl: 10.728251 +epoch: 2, batch: 28689, sum loss: 3938.963867, avg loss: 2.618992, ppl: 13.721883 +epoch: 2, batch: 28690, sum loss: 4455.419434, avg loss: 2.494636, ppl: 12.117320 +epoch: 2, batch: 28691, sum loss: 3949.601807, avg loss: 2.302975, ppl: 10.003897 +epoch: 2, batch: 28692, sum loss: 3370.044434, avg loss: 2.297235, ppl: 9.946645 +epoch: 2, batch: 28693, sum loss: 5020.045410, avg loss: 2.591660, ppl: 13.351917 +epoch: 2, batch: 28694, sum loss: 3685.734863, avg loss: 2.287855, ppl: 9.853783 +epoch: 2, batch: 28695, sum loss: 3688.248779, avg loss: 2.319653, ppl: 10.172147 +epoch: 2, batch: 28696, sum loss: 5262.284180, avg loss: 2.729401, ppl: 15.323700 +epoch: 2, batch: 28697, sum loss: 4757.899414, avg loss: 2.496274, ppl: 12.137192 +epoch: 2, batch: 28698, sum loss: 3422.742188, avg loss: 2.370320, ppl: 10.700817 +epoch: 2, batch: 28699, sum loss: 4120.757812, avg loss: 2.682785, ppl: 14.625770 +epoch: 2, batch: 28700, sum loss: 3498.935547, avg loss: 2.362549, ppl: 10.617986 +epoch: 2, batch: 28701, sum loss: 3419.208496, avg loss: 2.371157, ppl: 10.709778 +epoch: 2, batch: 28702, sum loss: 5086.424805, avg loss: 2.634089, ppl: 13.930609 +epoch: 2, batch: 28703, sum loss: 4092.423340, avg loss: 2.405893, ppl: 11.088324 +epoch: 2, batch: 28704, sum loss: 4138.925781, avg loss: 2.528360, ppl: 12.532940 +epoch: 2, batch: 28705, sum loss: 3440.843994, avg loss: 2.419721, ppl: 11.242726 +epoch: 2, batch: 28706, sum loss: 4095.773682, avg loss: 2.446699, ppl: 11.550156 +epoch: 2, batch: 28707, sum loss: 3419.608154, avg loss: 2.112173, ppl: 8.266185 +epoch: 2, batch: 28708, sum loss: 4497.888672, avg loss: 2.579065, ppl: 13.184799 +epoch: 2, batch: 28709, sum loss: 3814.666504, avg loss: 2.439045, ppl: 11.462089 +epoch: 2, batch: 28710, sum loss: 3875.203369, avg loss: 2.342928, ppl: 10.411681 +epoch: 2, batch: 28711, sum loss: 4142.327637, avg loss: 2.545991, ppl: 12.755865 +epoch: 2, batch: 28712, sum loss: 4300.706055, avg loss: 2.352684, ppl: 10.513748 +epoch: 2, batch: 28713, sum loss: 3047.531006, avg loss: 2.027632, ppl: 7.596077 +epoch: 2, batch: 28714, sum loss: 4580.555664, avg loss: 2.634017, ppl: 13.929616 +epoch: 2, batch: 28715, sum loss: 3992.470459, avg loss: 2.501548, ppl: 12.201365 +epoch: 2, batch: 28716, sum loss: 4292.645020, avg loss: 2.607925, ppl: 13.570868 +epoch: 2, batch: 28717, sum loss: 4087.875488, avg loss: 2.486542, ppl: 12.019643 +epoch: 2, batch: 28718, sum loss: 3673.984863, avg loss: 2.350598, ppl: 10.491843 +epoch: 2, batch: 28719, sum loss: 3452.100830, avg loss: 2.124370, ppl: 8.367621 +epoch: 2, batch: 28720, sum loss: 4698.491211, avg loss: 2.577340, ppl: 13.162086 +epoch: 2, batch: 28721, sum loss: 4352.564941, avg loss: 2.502913, ppl: 12.218027 +epoch: 2, batch: 28722, sum loss: 3274.456787, avg loss: 2.199098, ppl: 9.016875 +epoch: 2, batch: 28723, sum loss: 3621.947998, avg loss: 2.408210, ppl: 11.114050 +epoch: 2, batch: 28724, sum loss: 4010.558594, avg loss: 2.381567, ppl: 10.821847 +epoch: 2, batch: 28725, sum loss: 4775.992676, avg loss: 2.612688, ppl: 13.635649 +epoch: 2, batch: 28726, sum loss: 5035.675293, avg loss: 2.650356, ppl: 14.159072 +epoch: 2, batch: 28727, sum loss: 3921.895264, avg loss: 2.298883, ppl: 9.963052 +epoch: 2, batch: 28728, sum loss: 3885.606689, avg loss: 2.497177, ppl: 12.148147 +epoch: 2, batch: 28729, sum loss: 4399.476562, avg loss: 2.485580, ppl: 12.008082 +epoch: 2, batch: 28730, sum loss: 4040.436035, avg loss: 2.495637, ppl: 12.129457 +epoch: 2, batch: 28731, sum loss: 3958.070557, avg loss: 2.413458, ppl: 11.172524 +epoch: 2, batch: 28732, sum loss: 4709.062012, avg loss: 2.654488, ppl: 14.217706 +epoch: 2, batch: 28733, sum loss: 4101.111328, avg loss: 2.533114, ppl: 12.592658 +epoch: 2, batch: 28734, sum loss: 3601.100586, avg loss: 2.279178, ppl: 9.768644 +epoch: 2, batch: 28735, sum loss: 4760.899902, avg loss: 2.586040, ppl: 13.277094 +epoch: 2, batch: 28736, sum loss: 4505.388672, avg loss: 2.689785, ppl: 14.728502 +epoch: 2, batch: 28737, sum loss: 3788.274414, avg loss: 2.367671, ppl: 10.672512 +epoch: 2, batch: 28738, sum loss: 4440.604980, avg loss: 2.586258, ppl: 13.279984 +epoch: 2, batch: 28739, sum loss: 3881.348877, avg loss: 2.197819, ppl: 9.005354 +epoch: 2, batch: 28740, sum loss: 3782.687012, avg loss: 2.298109, ppl: 9.955338 +epoch: 2, batch: 28741, sum loss: 5394.814453, avg loss: 2.683987, ppl: 14.643366 +epoch: 2, batch: 28742, sum loss: 3872.681152, avg loss: 2.342820, ppl: 10.410550 +epoch: 2, batch: 28743, sum loss: 3888.973877, avg loss: 2.364118, ppl: 10.634653 +epoch: 2, batch: 28744, sum loss: 4401.968750, avg loss: 2.712242, ppl: 15.063007 +epoch: 2, batch: 28745, sum loss: 4751.299805, avg loss: 2.544885, ppl: 12.741758 +epoch: 2, batch: 28746, sum loss: 4126.268066, avg loss: 2.427217, ppl: 11.327312 +epoch: 2, batch: 28747, sum loss: 4392.568359, avg loss: 2.494360, ppl: 12.113980 +epoch: 2, batch: 28748, sum loss: 4986.400391, avg loss: 2.745815, ppl: 15.577305 +epoch: 2, batch: 28749, sum loss: 4598.851562, avg loss: 2.532407, ppl: 12.583759 +epoch: 2, batch: 28750, sum loss: 3475.350586, avg loss: 2.360972, ppl: 10.601250 +epoch: 2, batch: 28751, sum loss: 4962.592285, avg loss: 2.659481, ppl: 14.288876 +epoch: 2, batch: 28752, sum loss: 3958.865723, avg loss: 2.451310, ppl: 11.603539 +epoch: 2, batch: 28753, sum loss: 4193.229004, avg loss: 2.648913, ppl: 14.138657 +epoch: 2, batch: 28754, sum loss: 3735.261475, avg loss: 2.395934, ppl: 10.978451 +epoch: 2, batch: 28755, sum loss: 3564.030029, avg loss: 2.347846, ppl: 10.463006 +epoch: 2, batch: 28756, sum loss: 3892.624512, avg loss: 2.487300, ppl: 12.028753 +epoch: 2, batch: 28757, sum loss: 3578.209961, avg loss: 2.369675, ppl: 10.693920 +epoch: 2, batch: 28758, sum loss: 3425.294189, avg loss: 2.349310, ppl: 10.478339 +epoch: 2, batch: 28759, sum loss: 4727.327637, avg loss: 2.719981, ppl: 15.180040 +epoch: 2, batch: 28760, sum loss: 3873.688477, avg loss: 2.265315, ppl: 9.634157 +epoch: 2, batch: 28761, sum loss: 4941.230957, avg loss: 2.593822, ppl: 13.380816 +epoch: 2, batch: 28762, sum loss: 4249.242188, avg loss: 2.428138, ppl: 11.337754 +epoch: 2, batch: 28763, sum loss: 4190.338867, avg loss: 2.473636, ppl: 11.865508 +epoch: 2, batch: 28764, sum loss: 3884.011719, avg loss: 2.307791, ppl: 10.052190 +epoch: 2, batch: 28765, sum loss: 3794.372559, avg loss: 2.575949, ppl: 13.143785 +epoch: 2, batch: 28766, sum loss: 3903.649902, avg loss: 2.375928, ppl: 10.760994 +epoch: 2, batch: 28767, sum loss: 4357.894043, avg loss: 2.382665, ppl: 10.833735 +epoch: 2, batch: 28768, sum loss: 4496.519531, avg loss: 2.687699, ppl: 14.697811 +epoch: 2, batch: 28769, sum loss: 4106.653809, avg loss: 2.582801, ppl: 13.234157 +epoch: 2, batch: 28770, sum loss: 4165.915527, avg loss: 2.574731, ppl: 13.127789 +epoch: 2, batch: 28771, sum loss: 4163.660645, avg loss: 2.631896, ppl: 13.900106 +epoch: 2, batch: 28772, sum loss: 5260.535156, avg loss: 2.745582, ppl: 15.573677 +epoch: 2, batch: 28773, sum loss: 4135.103516, avg loss: 2.316585, ppl: 10.140980 +epoch: 2, batch: 28774, sum loss: 4464.593262, avg loss: 2.451726, ppl: 11.608368 +epoch: 2, batch: 28775, sum loss: 3456.262451, avg loss: 2.140101, ppl: 8.500294 +epoch: 2, batch: 28776, sum loss: 4499.978027, avg loss: 2.633106, ppl: 13.916928 +epoch: 2, batch: 28777, sum loss: 4885.425781, avg loss: 2.659459, ppl: 14.288555 +epoch: 2, batch: 28778, sum loss: 4226.553711, avg loss: 2.374468, ppl: 10.745298 +epoch: 2, batch: 28779, sum loss: 4296.690430, avg loss: 2.462287, ppl: 11.731607 +epoch: 2, batch: 28780, sum loss: 4499.567871, avg loss: 2.683105, ppl: 14.630457 +epoch: 2, batch: 28781, sum loss: 3482.837402, avg loss: 2.353269, ppl: 10.519899 +epoch: 2, batch: 28782, sum loss: 3993.782959, avg loss: 2.496114, ppl: 12.135248 +epoch: 2, batch: 28783, sum loss: 4325.631836, avg loss: 2.445241, ppl: 11.533334 +epoch: 2, batch: 28784, sum loss: 5759.789551, avg loss: 2.852793, ppl: 17.336138 +epoch: 2, batch: 28785, sum loss: 4949.861816, avg loss: 2.665515, ppl: 14.375354 +epoch: 2, batch: 28786, sum loss: 4252.518555, avg loss: 2.618546, ppl: 13.715763 +epoch: 2, batch: 28787, sum loss: 3851.586670, avg loss: 2.178499, ppl: 8.833042 +epoch: 2, batch: 28788, sum loss: 4208.044922, avg loss: 2.602378, ppl: 13.495791 +epoch: 2, batch: 28789, sum loss: 4449.531738, avg loss: 2.529580, ppl: 12.548239 +epoch: 2, batch: 28790, sum loss: 3918.190430, avg loss: 2.344818, ppl: 10.431373 +epoch: 2, batch: 28791, sum loss: 4111.245117, avg loss: 2.571135, ppl: 13.080663 +epoch: 2, batch: 28792, sum loss: 3976.394043, avg loss: 2.399755, ppl: 11.020476 +epoch: 2, batch: 28793, sum loss: 4141.106445, avg loss: 2.454716, ppl: 11.643131 +epoch: 2, batch: 28794, sum loss: 3858.358154, avg loss: 2.313164, ppl: 10.106356 +epoch: 2, batch: 28795, sum loss: 4674.885254, avg loss: 2.638197, ppl: 13.987963 +epoch: 2, batch: 28796, sum loss: 4051.769775, avg loss: 2.416082, ppl: 11.201886 +epoch: 2, batch: 28797, sum loss: 4694.096191, avg loss: 2.662562, ppl: 14.332958 +epoch: 2, batch: 28798, sum loss: 3471.909668, avg loss: 2.366673, ppl: 10.661863 +epoch: 2, batch: 28799, sum loss: 3715.524902, avg loss: 2.190758, ppl: 8.941986 +epoch: 2, batch: 28800, sum loss: 3500.855469, avg loss: 2.396205, ppl: 10.981422 +epoch: 2, batch: 28801, sum loss: 3880.967041, avg loss: 2.300514, ppl: 9.979310 +epoch: 2, batch: 28802, sum loss: 4031.754883, avg loss: 2.530920, ppl: 12.565055 +epoch: 2, batch: 28803, sum loss: 4627.205078, avg loss: 2.564970, ppl: 13.000262 +epoch: 2, batch: 28804, sum loss: 3644.684570, avg loss: 2.374387, ppl: 10.744428 +epoch: 2, batch: 28805, sum loss: 4605.295410, avg loss: 2.469327, ppl: 11.814495 +epoch: 2, batch: 28806, sum loss: 3969.497803, avg loss: 2.371265, ppl: 10.710935 +epoch: 2, batch: 28807, sum loss: 3926.584961, avg loss: 2.344230, ppl: 10.425241 +epoch: 2, batch: 28808, sum loss: 4315.705566, avg loss: 2.590459, ppl: 13.335896 +epoch: 2, batch: 28809, sum loss: 3730.072021, avg loss: 2.434773, ppl: 11.413227 +epoch: 2, batch: 28810, sum loss: 4576.172852, avg loss: 2.460308, ppl: 11.708418 +epoch: 2, batch: 28811, sum loss: 4303.185547, avg loss: 2.506223, ppl: 12.258548 +epoch: 2, batch: 28812, sum loss: 4024.043945, avg loss: 2.383912, ppl: 10.847258 +epoch: 2, batch: 28813, sum loss: 4372.520020, avg loss: 2.410430, ppl: 11.138752 +epoch: 2, batch: 28814, sum loss: 3974.405029, avg loss: 2.457888, ppl: 11.680119 +epoch: 2, batch: 28815, sum loss: 4401.157715, avg loss: 2.464254, ppl: 11.754711 +epoch: 2, batch: 28816, sum loss: 4425.563965, avg loss: 2.276525, ppl: 9.742764 +epoch: 2, batch: 28817, sum loss: 4352.382812, avg loss: 2.442415, ppl: 11.500779 +epoch: 2, batch: 28818, sum loss: 4056.695801, avg loss: 2.522821, ppl: 12.463704 +epoch: 2, batch: 28819, sum loss: 4500.549805, avg loss: 2.583553, ppl: 13.244115 +epoch: 2, batch: 28820, sum loss: 5214.927734, avg loss: 2.770950, ppl: 15.973800 +epoch: 2, batch: 28821, sum loss: 3925.213379, avg loss: 2.498545, ppl: 12.164778 +epoch: 2, batch: 28822, sum loss: 4042.480469, avg loss: 2.454451, ppl: 11.640037 +epoch: 2, batch: 28823, sum loss: 3662.856934, avg loss: 2.550736, ppl: 12.816533 +epoch: 2, batch: 28824, sum loss: 4653.680664, avg loss: 2.523688, ppl: 12.474519 +epoch: 2, batch: 28825, sum loss: 3391.908936, avg loss: 2.148137, ppl: 8.568883 +epoch: 2, batch: 28826, sum loss: 4388.216797, avg loss: 2.320580, ppl: 10.181578 +epoch: 2, batch: 28827, sum loss: 4529.065918, avg loss: 2.542990, ppl: 12.717646 +epoch: 2, batch: 28828, sum loss: 4174.903320, avg loss: 2.468896, ppl: 11.809403 +epoch: 2, batch: 28829, sum loss: 4457.504883, avg loss: 2.433136, ppl: 11.394559 +epoch: 2, batch: 28830, sum loss: 3805.275635, avg loss: 2.455017, ppl: 11.646627 +epoch: 2, batch: 28831, sum loss: 4094.086426, avg loss: 2.406870, ppl: 11.099171 +epoch: 2, batch: 28832, sum loss: 4350.437500, avg loss: 2.445440, ppl: 11.535622 +epoch: 2, batch: 28833, sum loss: 3751.087646, avg loss: 2.562218, ppl: 12.964543 +epoch: 2, batch: 28834, sum loss: 4943.315430, avg loss: 2.615511, ppl: 13.674201 +epoch: 2, batch: 28835, sum loss: 4207.219727, avg loss: 2.379649, ppl: 10.801113 +epoch: 2, batch: 28836, sum loss: 4480.736328, avg loss: 2.590021, ppl: 13.330050 +epoch: 2, batch: 28837, sum loss: 3486.580322, avg loss: 2.401226, ppl: 11.036699 +epoch: 2, batch: 28838, sum loss: 4470.038086, avg loss: 2.529733, ppl: 12.550154 +epoch: 2, batch: 28839, sum loss: 4439.021484, avg loss: 2.347447, ppl: 10.458831 +epoch: 2, batch: 28840, sum loss: 5022.633301, avg loss: 3.094660, ppl: 22.079731 +epoch: 2, batch: 28841, sum loss: 4697.595215, avg loss: 2.652510, ppl: 14.189609 +epoch: 2, batch: 28842, sum loss: 3728.397461, avg loss: 2.427342, ppl: 11.328732 +epoch: 2, batch: 28843, sum loss: 3054.889404, avg loss: 2.096698, ppl: 8.139252 +epoch: 2, batch: 28844, sum loss: 4502.212402, avg loss: 2.546500, ppl: 12.762362 +epoch: 2, batch: 28845, sum loss: 4324.059570, avg loss: 2.473718, ppl: 11.866489 +epoch: 2, batch: 28846, sum loss: 4519.878906, avg loss: 2.494414, ppl: 12.114636 +epoch: 2, batch: 28847, sum loss: 4052.314209, avg loss: 2.510728, ppl: 12.313887 +epoch: 2, batch: 28848, sum loss: 3826.291260, avg loss: 2.463806, ppl: 11.749450 +epoch: 2, batch: 28849, sum loss: 4062.862793, avg loss: 2.358017, ppl: 10.569967 +epoch: 2, batch: 28850, sum loss: 3997.060059, avg loss: 2.265907, ppl: 9.639865 +epoch: 2, batch: 28851, sum loss: 5087.194336, avg loss: 2.727718, ppl: 15.297939 +epoch: 2, batch: 28852, sum loss: 4948.729980, avg loss: 2.523575, ppl: 12.473106 +epoch: 2, batch: 28853, sum loss: 3327.160889, avg loss: 2.331578, ppl: 10.294168 +epoch: 2, batch: 28854, sum loss: 3988.394043, avg loss: 2.530707, ppl: 12.562383 +epoch: 2, batch: 28855, sum loss: 3656.113281, avg loss: 2.306696, ppl: 10.041193 +epoch: 2, batch: 28856, sum loss: 4551.360352, avg loss: 2.655403, ppl: 14.230719 +epoch: 2, batch: 28857, sum loss: 3760.804199, avg loss: 2.333005, ppl: 10.308872 +epoch: 2, batch: 28858, sum loss: 3425.526611, avg loss: 2.162580, ppl: 8.693539 +epoch: 2, batch: 28859, sum loss: 3109.822266, avg loss: 2.158100, ppl: 8.654679 +epoch: 2, batch: 28860, sum loss: 4152.930664, avg loss: 2.563538, ppl: 12.981660 +epoch: 2, batch: 28861, sum loss: 4133.320312, avg loss: 2.425658, ppl: 11.309663 +epoch: 2, batch: 28862, sum loss: 5073.092285, avg loss: 2.846853, ppl: 17.233467 +epoch: 2, batch: 28863, sum loss: 4202.781250, avg loss: 2.515130, ppl: 12.368211 +epoch: 2, batch: 28864, sum loss: 3756.103516, avg loss: 2.325761, ppl: 10.234462 +epoch: 2, batch: 28865, sum loss: 4870.255859, avg loss: 2.696709, ppl: 14.830838 +epoch: 2, batch: 28866, sum loss: 3228.005615, avg loss: 2.182559, ppl: 8.868978 +epoch: 2, batch: 28867, sum loss: 4293.343262, avg loss: 2.532946, ppl: 12.590542 +epoch: 2, batch: 28868, sum loss: 4586.288086, avg loss: 2.599936, ppl: 13.462883 +epoch: 2, batch: 28869, sum loss: 3457.783691, avg loss: 2.326907, ppl: 10.246201 +epoch: 2, batch: 28870, sum loss: 4518.808594, avg loss: 2.616565, ppl: 13.688629 +epoch: 2, batch: 28871, sum loss: 4770.437012, avg loss: 2.524041, ppl: 12.478918 +epoch: 2, batch: 28872, sum loss: 4205.694824, avg loss: 2.366739, ppl: 10.662562 +epoch: 2, batch: 28873, sum loss: 3850.275879, avg loss: 2.352032, ppl: 10.506895 +epoch: 2, batch: 28874, sum loss: 4052.884766, avg loss: 2.412431, ppl: 11.161066 +epoch: 2, batch: 28875, sum loss: 4342.212891, avg loss: 2.347142, ppl: 10.455647 +epoch: 2, batch: 28876, sum loss: 4343.587891, avg loss: 2.510744, ppl: 12.314093 +epoch: 2, batch: 28877, sum loss: 5393.523438, avg loss: 2.896629, ppl: 18.112986 +epoch: 2, batch: 28878, sum loss: 3545.939453, avg loss: 2.442107, ppl: 11.497242 +epoch: 2, batch: 28879, sum loss: 3018.427979, avg loss: 2.200021, ppl: 9.025199 +epoch: 2, batch: 28880, sum loss: 5207.062500, avg loss: 2.766771, ppl: 15.907181 +epoch: 2, batch: 28881, sum loss: 4212.618652, avg loss: 2.440683, ppl: 11.480878 +epoch: 2, batch: 28882, sum loss: 4370.748535, avg loss: 2.686385, ppl: 14.678515 +epoch: 2, batch: 28883, sum loss: 4635.064453, avg loss: 2.623127, ppl: 13.778735 +epoch: 2, batch: 28884, sum loss: 4561.035645, avg loss: 2.572496, ppl: 13.098480 +epoch: 2, batch: 28885, sum loss: 4179.163086, avg loss: 2.636696, ppl: 13.966978 +epoch: 2, batch: 28886, sum loss: 3035.354248, avg loss: 2.106422, ppl: 8.218783 +epoch: 2, batch: 28887, sum loss: 4279.479980, avg loss: 2.489517, ppl: 12.055454 +epoch: 2, batch: 28888, sum loss: 3126.315186, avg loss: 2.262167, ppl: 9.603880 +epoch: 2, batch: 28889, sum loss: 4708.672363, avg loss: 2.614476, ppl: 13.660063 +epoch: 2, batch: 28890, sum loss: 3986.073242, avg loss: 2.297449, ppl: 9.948767 +epoch: 2, batch: 28891, sum loss: 3704.000977, avg loss: 2.262676, ppl: 9.608770 +epoch: 2, batch: 28892, sum loss: 4197.617188, avg loss: 2.404134, ppl: 11.068835 +epoch: 2, batch: 28893, sum loss: 3951.472168, avg loss: 2.352067, ppl: 10.507263 +epoch: 2, batch: 28894, sum loss: 4588.171875, avg loss: 2.618820, ppl: 13.719522 +epoch: 2, batch: 28895, sum loss: 4663.447754, avg loss: 2.417547, ppl: 11.218305 +epoch: 2, batch: 28896, sum loss: 2926.366211, avg loss: 2.063728, ppl: 7.875275 +epoch: 2, batch: 28897, sum loss: 3263.749023, avg loss: 2.306536, ppl: 10.039592 +epoch: 2, batch: 28898, sum loss: 2759.216309, avg loss: 2.052988, ppl: 7.791149 +epoch: 2, batch: 28899, sum loss: 3821.177979, avg loss: 2.444772, ppl: 11.527916 +epoch: 2, batch: 28900, sum loss: 3486.321777, avg loss: 2.202351, ppl: 9.046257 +epoch: 2, batch: 28901, sum loss: 3823.527344, avg loss: 2.219110, ppl: 9.199144 +epoch: 2, batch: 28902, sum loss: 4449.937988, avg loss: 2.644051, ppl: 14.070087 +epoch: 2, batch: 28903, sum loss: 3866.069092, avg loss: 2.446879, ppl: 11.552238 +epoch: 2, batch: 28904, sum loss: 4846.117188, avg loss: 2.473771, ppl: 11.867112 +epoch: 2, batch: 28905, sum loss: 3571.065430, avg loss: 2.469617, ppl: 11.817916 +epoch: 2, batch: 28906, sum loss: 4697.692383, avg loss: 2.620018, ppl: 13.735974 +epoch: 2, batch: 28907, sum loss: 3780.893066, avg loss: 2.415906, ppl: 11.199912 +epoch: 2, batch: 28908, sum loss: 3650.811523, avg loss: 2.341765, ppl: 10.399575 +epoch: 2, batch: 28909, sum loss: 4505.856934, avg loss: 2.570369, ppl: 13.070650 +epoch: 2, batch: 28910, sum loss: 3678.843262, avg loss: 2.409197, ppl: 11.125020 +epoch: 2, batch: 28911, sum loss: 3911.883301, avg loss: 2.217621, ppl: 9.185452 +epoch: 2, batch: 28912, sum loss: 4752.944824, avg loss: 2.664207, ppl: 14.356560 +epoch: 2, batch: 28913, sum loss: 4145.906250, avg loss: 2.347625, ppl: 10.460699 +epoch: 2, batch: 28914, sum loss: 4260.678711, avg loss: 2.602736, ppl: 13.500626 +epoch: 2, batch: 28915, sum loss: 3767.966797, avg loss: 2.287776, ppl: 9.852998 +epoch: 2, batch: 28916, sum loss: 4461.569336, avg loss: 2.439349, ppl: 11.465576 +epoch: 2, batch: 28917, sum loss: 4144.610840, avg loss: 2.394345, ppl: 10.961014 +epoch: 2, batch: 28918, sum loss: 4323.252441, avg loss: 2.507687, ppl: 12.276503 +epoch: 2, batch: 28919, sum loss: 5375.099121, avg loss: 2.750818, ppl: 15.655437 +epoch: 2, batch: 28920, sum loss: 3918.129883, avg loss: 2.339182, ppl: 10.372749 +epoch: 2, batch: 28921, sum loss: 4385.523438, avg loss: 2.585804, ppl: 13.273957 +epoch: 2, batch: 28922, sum loss: 3608.856934, avg loss: 2.197842, ppl: 9.005562 +epoch: 2, batch: 28923, sum loss: 4926.114258, avg loss: 2.732177, ppl: 15.366296 +epoch: 2, batch: 28924, sum loss: 3705.393799, avg loss: 2.156807, ppl: 8.643493 +epoch: 2, batch: 28925, sum loss: 4159.514648, avg loss: 2.352667, ppl: 10.513571 +epoch: 2, batch: 28926, sum loss: 3723.186035, avg loss: 2.505509, ppl: 12.249788 +epoch: 2, batch: 28927, sum loss: 3713.665771, avg loss: 2.104060, ppl: 8.199391 +epoch: 2, batch: 28928, sum loss: 3644.720459, avg loss: 2.170768, ppl: 8.765018 +epoch: 2, batch: 28929, sum loss: 4864.954102, avg loss: 2.612757, ppl: 13.636601 +epoch: 2, batch: 28930, sum loss: 4822.225098, avg loss: 2.622200, ppl: 13.765972 +epoch: 2, batch: 28931, sum loss: 4801.580566, avg loss: 2.592646, ppl: 13.365090 +epoch: 2, batch: 28932, sum loss: 4505.692383, avg loss: 2.608971, ppl: 13.585060 +epoch: 2, batch: 28933, sum loss: 4457.171387, avg loss: 2.378427, ppl: 10.787915 +epoch: 2, batch: 28934, sum loss: 3417.234863, avg loss: 2.391347, ppl: 10.928206 +epoch: 2, batch: 28935, sum loss: 3147.575439, avg loss: 2.285821, ppl: 9.833756 +epoch: 2, batch: 28936, sum loss: 4536.493164, avg loss: 2.583425, ppl: 13.242410 +epoch: 2, batch: 28937, sum loss: 3358.090820, avg loss: 2.128068, ppl: 8.398623 +epoch: 2, batch: 28938, sum loss: 4083.838867, avg loss: 2.430856, ppl: 11.368615 +epoch: 2, batch: 28939, sum loss: 4844.139160, avg loss: 2.676320, ppl: 14.531520 +epoch: 2, batch: 28940, sum loss: 5796.776367, avg loss: 2.789594, ppl: 16.274410 +epoch: 2, batch: 28941, sum loss: 4657.895508, avg loss: 2.402215, ppl: 11.047626 +epoch: 2, batch: 28942, sum loss: 3977.672852, avg loss: 2.604894, ppl: 13.529790 +epoch: 2, batch: 28943, sum loss: 3947.582275, avg loss: 2.372345, ppl: 10.722509 +epoch: 2, batch: 28944, sum loss: 5266.570801, avg loss: 2.700805, ppl: 14.891721 +epoch: 2, batch: 28945, sum loss: 4344.480957, avg loss: 2.508361, ppl: 12.284777 +epoch: 2, batch: 28946, sum loss: 4066.802246, avg loss: 2.429392, ppl: 11.351979 +epoch: 2, batch: 28947, sum loss: 3921.061035, avg loss: 2.489563, ppl: 12.056001 +epoch: 2, batch: 28948, sum loss: 4157.212402, avg loss: 2.664880, ppl: 14.366222 +epoch: 2, batch: 28949, sum loss: 3713.407227, avg loss: 2.368244, ppl: 10.678629 +epoch: 2, batch: 28950, sum loss: 4131.628906, avg loss: 2.621592, ppl: 13.757606 +epoch: 2, batch: 28951, sum loss: 4033.189209, avg loss: 2.422336, ppl: 11.272159 +epoch: 2, batch: 28952, sum loss: 4587.630371, avg loss: 2.469123, ppl: 11.812081 +epoch: 2, batch: 28953, sum loss: 4874.928711, avg loss: 2.601349, ppl: 13.481917 +epoch: 2, batch: 28954, sum loss: 3704.943604, avg loss: 2.181946, ppl: 8.863534 +epoch: 2, batch: 28955, sum loss: 4313.686035, avg loss: 2.457941, ppl: 11.680734 +epoch: 2, batch: 28956, sum loss: 3511.724609, avg loss: 2.382446, ppl: 10.831360 +epoch: 2, batch: 28957, sum loss: 4735.938965, avg loss: 2.436183, ppl: 11.429329 +epoch: 2, batch: 28958, sum loss: 4697.960938, avg loss: 2.586983, ppl: 13.289613 +epoch: 2, batch: 28959, sum loss: 4164.933594, avg loss: 2.471771, ppl: 11.843400 +epoch: 2, batch: 28960, sum loss: 3555.786133, avg loss: 2.081842, ppl: 8.019226 +epoch: 2, batch: 28961, sum loss: 4542.950195, avg loss: 2.689728, ppl: 14.727666 +epoch: 2, batch: 28962, sum loss: 3839.626465, avg loss: 2.336961, ppl: 10.349731 +epoch: 2, batch: 28963, sum loss: 3527.770020, avg loss: 2.255607, ppl: 9.541086 +epoch: 2, batch: 28964, sum loss: 4134.624023, avg loss: 2.527277, ppl: 12.519363 +epoch: 2, batch: 28965, sum loss: 3448.153320, avg loss: 2.271511, ppl: 9.694036 +epoch: 2, batch: 28966, sum loss: 4686.597656, avg loss: 2.357443, ppl: 10.563909 +epoch: 2, batch: 28967, sum loss: 3823.410400, avg loss: 2.318624, ppl: 10.161677 +epoch: 2, batch: 28968, sum loss: 4050.662598, avg loss: 2.488122, ppl: 12.038646 +epoch: 2, batch: 28969, sum loss: 3720.926758, avg loss: 2.257844, ppl: 9.562450 +epoch: 2, batch: 28970, sum loss: 4100.099121, avg loss: 2.420365, ppl: 11.249969 +epoch: 2, batch: 28971, sum loss: 4762.614258, avg loss: 2.502687, ppl: 12.215278 +epoch: 2, batch: 28972, sum loss: 3399.848633, avg loss: 2.173816, ppl: 8.791771 +epoch: 2, batch: 28973, sum loss: 4279.067871, avg loss: 2.527506, ppl: 12.522238 +epoch: 2, batch: 28974, sum loss: 3745.851807, avg loss: 2.375302, ppl: 10.754264 +epoch: 2, batch: 28975, sum loss: 3678.507812, avg loss: 2.383997, ppl: 10.848182 +epoch: 2, batch: 28976, sum loss: 3301.499023, avg loss: 2.156433, ppl: 8.640264 +epoch: 2, batch: 28977, sum loss: 3812.157959, avg loss: 2.361932, ppl: 10.611431 +epoch: 2, batch: 28978, sum loss: 3582.438477, avg loss: 2.374048, ppl: 10.740786 +epoch: 2, batch: 28979, sum loss: 4182.180664, avg loss: 2.769656, ppl: 15.953148 +epoch: 2, batch: 28980, sum loss: 4390.811035, avg loss: 2.591978, ppl: 13.356165 +epoch: 2, batch: 28981, sum loss: 4773.291504, avg loss: 2.606932, ppl: 13.557388 +epoch: 2, batch: 28982, sum loss: 3872.079590, avg loss: 2.365351, ppl: 10.647775 +epoch: 2, batch: 28983, sum loss: 3460.909424, avg loss: 2.324318, ppl: 10.219707 +epoch: 2, batch: 28984, sum loss: 3706.340820, avg loss: 2.133760, ppl: 8.446566 +epoch: 2, batch: 28985, sum loss: 3603.754883, avg loss: 2.223168, ppl: 9.236543 +epoch: 2, batch: 28986, sum loss: 3472.349365, avg loss: 2.110851, ppl: 8.255260 +epoch: 2, batch: 28987, sum loss: 4318.829102, avg loss: 2.383460, ppl: 10.842350 +epoch: 2, batch: 28988, sum loss: 4577.436523, avg loss: 2.498601, ppl: 12.165460 +epoch: 2, batch: 28989, sum loss: 4349.534180, avg loss: 2.732119, ppl: 15.365417 +epoch: 2, batch: 28990, sum loss: 5524.270996, avg loss: 2.886244, ppl: 17.925850 +epoch: 2, batch: 28991, sum loss: 3390.593750, avg loss: 2.352945, ppl: 10.516497 +epoch: 2, batch: 28992, sum loss: 4551.273438, avg loss: 2.678796, ppl: 14.567537 +epoch: 2, batch: 28993, sum loss: 5041.736816, avg loss: 2.710611, ppl: 15.038466 +epoch: 2, batch: 28994, sum loss: 3960.451660, avg loss: 2.294584, ppl: 9.920306 +epoch: 2, batch: 28995, sum loss: 4518.701660, avg loss: 2.585070, ppl: 13.264213 +epoch: 2, batch: 28996, sum loss: 3806.786377, avg loss: 2.427797, ppl: 11.333890 +epoch: 2, batch: 28997, sum loss: 3528.305176, avg loss: 2.342832, ppl: 10.410679 +epoch: 2, batch: 28998, sum loss: 4208.319824, avg loss: 2.577048, ppl: 13.158241 +epoch: 2, batch: 28999, sum loss: 4550.078125, avg loss: 2.518029, ppl: 12.404120 +epoch: 2, batch: 29000, sum loss: 3725.060547, avg loss: 2.263099, ppl: 9.612833 +epoch: 2, batch: 29001, sum loss: 4579.998047, avg loss: 2.724567, ppl: 15.249816 +epoch: 2, batch: 29002, sum loss: 5097.168945, avg loss: 2.482790, ppl: 11.974632 +epoch: 2, batch: 29003, sum loss: 4246.493652, avg loss: 2.460309, ppl: 11.708429 +epoch: 2, batch: 29004, sum loss: 4875.398926, avg loss: 2.532675, ppl: 12.587126 +epoch: 2, batch: 29005, sum loss: 3832.974121, avg loss: 2.276113, ppl: 9.738752 +epoch: 2, batch: 29006, sum loss: 3959.392334, avg loss: 2.477717, ppl: 11.914039 +epoch: 2, batch: 29007, sum loss: 4231.950684, avg loss: 2.362898, ppl: 10.621690 +epoch: 2, batch: 29008, sum loss: 4290.995117, avg loss: 2.610094, ppl: 13.600333 +epoch: 2, batch: 29009, sum loss: 3485.842285, avg loss: 2.451366, ppl: 11.604187 +epoch: 2, batch: 29010, sum loss: 4363.229492, avg loss: 2.614278, ppl: 13.657350 +epoch: 2, batch: 29011, sum loss: 4646.157227, avg loss: 2.358455, ppl: 10.574606 +epoch: 2, batch: 29012, sum loss: 4189.394043, avg loss: 2.520694, ppl: 12.437231 +epoch: 2, batch: 29013, sum loss: 5194.781250, avg loss: 2.926637, ppl: 18.664759 +epoch: 2, batch: 29014, sum loss: 4850.111816, avg loss: 2.581220, ppl: 13.213247 +epoch: 2, batch: 29015, sum loss: 3662.340088, avg loss: 2.338659, ppl: 10.367325 +epoch: 2, batch: 29016, sum loss: 4562.130371, avg loss: 2.608422, ppl: 13.577609 +epoch: 2, batch: 29017, sum loss: 4375.545898, avg loss: 2.445805, ppl: 11.539839 +epoch: 2, batch: 29018, sum loss: 3990.603027, avg loss: 2.556440, ppl: 12.889849 +epoch: 2, batch: 29019, sum loss: 4672.972656, avg loss: 2.591776, ppl: 13.353471 +epoch: 2, batch: 29020, sum loss: 4365.175293, avg loss: 2.560220, ppl: 12.938666 +epoch: 2, batch: 29021, sum loss: 4475.638672, avg loss: 2.493392, ppl: 12.102258 +epoch: 2, batch: 29022, sum loss: 3847.962158, avg loss: 2.336346, ppl: 10.343374 +epoch: 2, batch: 29023, sum loss: 4114.406250, avg loss: 2.524176, ppl: 12.480602 +epoch: 2, batch: 29024, sum loss: 4421.266113, avg loss: 2.456259, ppl: 11.661106 +epoch: 2, batch: 29025, sum loss: 4236.577148, avg loss: 2.450305, ppl: 11.591882 +epoch: 2, batch: 29026, sum loss: 4123.941406, avg loss: 2.374175, ppl: 10.742146 +epoch: 2, batch: 29027, sum loss: 3838.260498, avg loss: 2.397414, ppl: 10.994712 +epoch: 2, batch: 29028, sum loss: 3717.084229, avg loss: 2.256882, ppl: 9.553253 +epoch: 2, batch: 29029, sum loss: 4509.282715, avg loss: 2.457375, ppl: 11.674125 +epoch: 2, batch: 29030, sum loss: 3920.889648, avg loss: 2.436849, ppl: 11.436942 +epoch: 2, batch: 29031, sum loss: 4163.532715, avg loss: 2.406666, ppl: 11.096906 +epoch: 2, batch: 29032, sum loss: 4664.042969, avg loss: 2.751648, ppl: 15.668427 +epoch: 2, batch: 29033, sum loss: 3901.207275, avg loss: 2.319386, ppl: 10.169429 +epoch: 2, batch: 29034, sum loss: 5156.601562, avg loss: 2.754595, ppl: 15.714676 +epoch: 2, batch: 29035, sum loss: 4472.288086, avg loss: 2.487368, ppl: 12.029573 +epoch: 2, batch: 29036, sum loss: 4563.746582, avg loss: 2.545313, ppl: 12.747222 +epoch: 2, batch: 29037, sum loss: 3896.634277, avg loss: 2.326349, ppl: 10.240485 +epoch: 2, batch: 29038, sum loss: 3295.922363, avg loss: 2.329274, ppl: 10.270479 +epoch: 2, batch: 29039, sum loss: 3870.741211, avg loss: 2.249123, ppl: 9.479422 +epoch: 2, batch: 29040, sum loss: 4578.007812, avg loss: 2.604100, ppl: 13.519053 +epoch: 2, batch: 29041, sum loss: 3828.304199, avg loss: 2.358783, ppl: 10.578065 +epoch: 2, batch: 29042, sum loss: 4005.518555, avg loss: 2.480197, ppl: 11.943620 +epoch: 2, batch: 29043, sum loss: 3954.005127, avg loss: 2.515270, ppl: 12.369954 +epoch: 2, batch: 29044, sum loss: 4630.289551, avg loss: 2.610084, ppl: 13.600197 +epoch: 2, batch: 29045, sum loss: 3732.908203, avg loss: 2.320018, ppl: 10.175853 +epoch: 2, batch: 29046, sum loss: 5076.276367, avg loss: 2.743933, ppl: 15.548018 +epoch: 2, batch: 29047, sum loss: 4555.579102, avg loss: 2.594293, ppl: 13.387124 +epoch: 2, batch: 29048, sum loss: 3900.458740, avg loss: 2.361053, ppl: 10.602104 +epoch: 2, batch: 29049, sum loss: 4423.902832, avg loss: 2.356901, ppl: 10.558180 +epoch: 2, batch: 29050, sum loss: 4007.492920, avg loss: 2.347682, ppl: 10.461289 +epoch: 2, batch: 29051, sum loss: 4150.586914, avg loss: 2.648747, ppl: 14.136321 +epoch: 2, batch: 29052, sum loss: 4002.563965, avg loss: 2.296365, ppl: 9.937990 +epoch: 2, batch: 29053, sum loss: 4460.126953, avg loss: 2.427941, ppl: 11.335516 +epoch: 2, batch: 29054, sum loss: 5260.478516, avg loss: 2.906342, ppl: 18.289768 +epoch: 2, batch: 29055, sum loss: 3402.488281, avg loss: 2.306772, ppl: 10.041954 +epoch: 2, batch: 29056, sum loss: 3897.004150, avg loss: 2.558768, ppl: 12.919893 +epoch: 2, batch: 29057, sum loss: 4393.552734, avg loss: 2.563333, ppl: 12.979005 +epoch: 2, batch: 29058, sum loss: 3811.812500, avg loss: 2.438780, ppl: 11.459047 +epoch: 2, batch: 29059, sum loss: 4835.624023, avg loss: 2.609619, ppl: 13.593869 +epoch: 2, batch: 29060, sum loss: 3956.358398, avg loss: 2.564069, ppl: 12.988561 +epoch: 2, batch: 29061, sum loss: 3609.650146, avg loss: 2.167958, ppl: 8.740418 +epoch: 2, batch: 29062, sum loss: 3998.027588, avg loss: 2.484790, ppl: 11.998604 +epoch: 2, batch: 29063, sum loss: 3640.926758, avg loss: 2.582218, ppl: 13.226438 +epoch: 2, batch: 29064, sum loss: 4268.108887, avg loss: 2.437527, ppl: 11.444699 +epoch: 2, batch: 29065, sum loss: 4635.976074, avg loss: 2.434861, ppl: 11.414237 +epoch: 2, batch: 29066, sum loss: 3938.330078, avg loss: 2.555698, ppl: 12.880282 +epoch: 2, batch: 29067, sum loss: 3966.482178, avg loss: 2.430443, ppl: 11.363911 +epoch: 2, batch: 29068, sum loss: 4775.102539, avg loss: 2.705441, ppl: 14.960906 +epoch: 2, batch: 29069, sum loss: 4243.404297, avg loss: 2.438738, ppl: 11.458572 +epoch: 2, batch: 29070, sum loss: 3751.758789, avg loss: 2.368535, ppl: 10.681727 +epoch: 2, batch: 29071, sum loss: 4608.572266, avg loss: 2.511484, ppl: 12.323200 +epoch: 2, batch: 29072, sum loss: 4181.566406, avg loss: 2.435391, ppl: 11.420282 +epoch: 2, batch: 29073, sum loss: 4797.551270, avg loss: 2.716620, ppl: 15.129103 +epoch: 2, batch: 29074, sum loss: 4441.069824, avg loss: 2.457703, ppl: 11.677958 +epoch: 2, batch: 29075, sum loss: 4380.718750, avg loss: 2.677701, ppl: 14.551600 +epoch: 2, batch: 29076, sum loss: 4039.656494, avg loss: 2.581250, ppl: 13.213648 +epoch: 2, batch: 29077, sum loss: 4569.714355, avg loss: 2.642981, ppl: 14.055043 +epoch: 2, batch: 29078, sum loss: 4505.817871, avg loss: 2.514407, ppl: 12.359282 +epoch: 2, batch: 29079, sum loss: 3104.697754, avg loss: 2.071179, ppl: 7.934175 +epoch: 2, batch: 29080, sum loss: 4457.011719, avg loss: 2.530955, ppl: 12.565498 +epoch: 2, batch: 29081, sum loss: 3848.070312, avg loss: 2.387140, ppl: 10.882329 +epoch: 2, batch: 29082, sum loss: 3522.319580, avg loss: 2.263701, ppl: 9.618626 +epoch: 2, batch: 29083, sum loss: 3249.191650, avg loss: 2.301127, ppl: 9.985432 +epoch: 2, batch: 29084, sum loss: 4425.022461, avg loss: 2.613717, ppl: 13.649691 +epoch: 2, batch: 29085, sum loss: 4436.765625, avg loss: 2.477256, ppl: 11.908544 +epoch: 2, batch: 29086, sum loss: 4518.053711, avg loss: 2.645230, ppl: 14.086689 +epoch: 2, batch: 29087, sum loss: 4107.687988, avg loss: 2.303807, ppl: 10.012229 +epoch: 2, batch: 29088, sum loss: 4818.118164, avg loss: 2.862815, ppl: 17.510754 +epoch: 2, batch: 29089, sum loss: 4787.803711, avg loss: 2.589402, ppl: 13.321799 +epoch: 2, batch: 29090, sum loss: 4068.516357, avg loss: 2.271645, ppl: 9.695337 +epoch: 2, batch: 29091, sum loss: 4401.319336, avg loss: 2.582934, ppl: 13.235914 +epoch: 2, batch: 29092, sum loss: 3072.934814, avg loss: 2.182482, ppl: 8.868292 +epoch: 2, batch: 29093, sum loss: 4177.427246, avg loss: 2.551880, ppl: 12.831203 +epoch: 2, batch: 29094, sum loss: 3612.190430, avg loss: 2.455602, ppl: 11.653446 +epoch: 2, batch: 29095, sum loss: 4032.913818, avg loss: 2.586859, ppl: 13.287974 +epoch: 2, batch: 29096, sum loss: 4210.359863, avg loss: 2.497248, ppl: 12.149013 +epoch: 2, batch: 29097, sum loss: 3635.740723, avg loss: 2.192847, ppl: 8.960690 +epoch: 2, batch: 29098, sum loss: 4023.558105, avg loss: 2.403559, ppl: 11.062483 +epoch: 2, batch: 29099, sum loss: 4566.070312, avg loss: 2.435238, ppl: 11.418532 +epoch: 2, batch: 29100, sum loss: 4502.291016, avg loss: 2.546545, ppl: 12.762929 +epoch: 2, batch: 29101, sum loss: 3419.477051, avg loss: 2.382911, ppl: 10.836399 +epoch: 2, batch: 29102, sum loss: 4884.952637, avg loss: 2.704846, ppl: 14.952020 +epoch: 2, batch: 29103, sum loss: 4874.043945, avg loss: 2.647498, ppl: 14.118671 +epoch: 2, batch: 29104, sum loss: 4132.412109, avg loss: 2.350632, ppl: 10.492203 +epoch: 2, batch: 29105, sum loss: 4525.263672, avg loss: 2.579968, ppl: 13.196712 +epoch: 2, batch: 29106, sum loss: 3410.374512, avg loss: 2.446467, ppl: 11.547474 +epoch: 2, batch: 29107, sum loss: 4337.775391, avg loss: 2.436953, ppl: 11.438131 +epoch: 2, batch: 29108, sum loss: 4718.726562, avg loss: 2.667454, ppl: 14.403258 +epoch: 2, batch: 29109, sum loss: 3788.198975, avg loss: 2.349999, ppl: 10.485563 +epoch: 2, batch: 29110, sum loss: 4018.977539, avg loss: 2.415251, ppl: 11.192579 +epoch: 2, batch: 29111, sum loss: 4184.570801, avg loss: 2.258268, ppl: 9.566506 +epoch: 2, batch: 29112, sum loss: 5402.086914, avg loss: 2.786017, ppl: 16.216301 +epoch: 2, batch: 29113, sum loss: 4494.228516, avg loss: 2.315419, ppl: 10.129166 +epoch: 2, batch: 29114, sum loss: 3769.030029, avg loss: 2.316552, ppl: 10.140648 +epoch: 2, batch: 29115, sum loss: 3935.874268, avg loss: 2.326167, ppl: 10.238620 +epoch: 2, batch: 29116, sum loss: 3696.901123, avg loss: 2.141889, ppl: 8.515512 +epoch: 2, batch: 29117, sum loss: 4628.532715, avg loss: 2.609094, ppl: 13.586735 +epoch: 2, batch: 29118, sum loss: 3465.110107, avg loss: 2.491093, ppl: 12.074466 +epoch: 2, batch: 29119, sum loss: 3618.416260, avg loss: 2.331454, ppl: 10.292894 +epoch: 2, batch: 29120, sum loss: 4211.242676, avg loss: 2.527757, ppl: 12.525376 +epoch: 2, batch: 29121, sum loss: 4385.245117, avg loss: 2.534824, ppl: 12.614206 +epoch: 2, batch: 29122, sum loss: 4257.163086, avg loss: 2.526506, ppl: 12.509723 +epoch: 2, batch: 29123, sum loss: 4211.705078, avg loss: 2.711980, ppl: 15.059065 +epoch: 2, batch: 29124, sum loss: 5204.252930, avg loss: 2.689536, ppl: 14.724847 +epoch: 2, batch: 29125, sum loss: 3766.590088, avg loss: 2.592285, ppl: 13.360267 +epoch: 2, batch: 29126, sum loss: 3883.025391, avg loss: 2.370589, ppl: 10.703697 +epoch: 2, batch: 29127, sum loss: 3923.136475, avg loss: 2.357654, ppl: 10.566137 +epoch: 2, batch: 29128, sum loss: 4194.152832, avg loss: 2.531173, ppl: 12.568233 +epoch: 2, batch: 29129, sum loss: 3685.951416, avg loss: 2.346246, ppl: 10.446276 +epoch: 2, batch: 29130, sum loss: 4303.931152, avg loss: 2.393733, ppl: 10.954306 +epoch: 2, batch: 29131, sum loss: 3662.617676, avg loss: 2.401716, ppl: 11.042113 +epoch: 2, batch: 29132, sum loss: 4508.542480, avg loss: 2.609110, ppl: 13.586958 +epoch: 2, batch: 29133, sum loss: 3307.443604, avg loss: 2.190360, ppl: 8.938431 +epoch: 2, batch: 29134, sum loss: 4218.022461, avg loss: 2.388461, ppl: 10.896712 +epoch: 2, batch: 29135, sum loss: 4154.848145, avg loss: 2.424065, ppl: 11.291671 +epoch: 2, batch: 29136, sum loss: 3291.705078, avg loss: 2.277997, ppl: 9.757113 +epoch: 2, batch: 29137, sum loss: 4332.334961, avg loss: 2.514414, ppl: 12.359362 +epoch: 2, batch: 29138, sum loss: 3470.551514, avg loss: 2.181365, ppl: 8.858390 +epoch: 2, batch: 29139, sum loss: 4854.992676, avg loss: 2.648659, ppl: 14.135077 +epoch: 2, batch: 29140, sum loss: 3905.136963, avg loss: 2.372501, ppl: 10.724184 +epoch: 2, batch: 29141, sum loss: 3901.331543, avg loss: 2.523500, ppl: 12.472178 +epoch: 2, batch: 29142, sum loss: 4876.201660, avg loss: 2.529150, ppl: 12.542847 +epoch: 2, batch: 29143, sum loss: 3312.654297, avg loss: 2.305257, ppl: 10.026755 +epoch: 2, batch: 29144, sum loss: 3476.156006, avg loss: 2.273484, ppl: 9.713178 +epoch: 2, batch: 29145, sum loss: 3523.214355, avg loss: 2.259919, ppl: 9.582316 +epoch: 2, batch: 29146, sum loss: 4825.405273, avg loss: 2.755800, ppl: 15.733619 +epoch: 2, batch: 29147, sum loss: 4058.472168, avg loss: 2.401463, ppl: 11.039312 +epoch: 2, batch: 29148, sum loss: 4817.145020, avg loss: 2.527358, ppl: 12.520388 +epoch: 2, batch: 29149, sum loss: 3844.790527, avg loss: 2.338680, ppl: 10.367545 +epoch: 2, batch: 29150, sum loss: 4351.241211, avg loss: 2.702634, ppl: 14.918982 +epoch: 2, batch: 29151, sum loss: 4810.044434, avg loss: 2.605658, ppl: 13.540130 +epoch: 2, batch: 29152, sum loss: 3214.038574, avg loss: 2.078938, ppl: 7.995975 +epoch: 2, batch: 29153, sum loss: 3587.518066, avg loss: 2.357108, ppl: 10.560366 +epoch: 2, batch: 29154, sum loss: 4710.493652, avg loss: 2.367082, ppl: 10.666224 +epoch: 2, batch: 29155, sum loss: 3516.463135, avg loss: 2.264303, ppl: 9.624418 +epoch: 2, batch: 29156, sum loss: 3728.805420, avg loss: 2.471044, ppl: 11.834797 +epoch: 2, batch: 29157, sum loss: 4261.888672, avg loss: 2.498176, ppl: 12.160295 +epoch: 2, batch: 29158, sum loss: 5035.005859, avg loss: 2.513732, ppl: 12.350944 +epoch: 2, batch: 29159, sum loss: 4158.759277, avg loss: 2.440587, ppl: 11.479773 +epoch: 2, batch: 29160, sum loss: 4261.964355, avg loss: 2.302520, ppl: 9.999350 +epoch: 2, batch: 29161, sum loss: 3346.538330, avg loss: 2.363374, ppl: 10.626751 +epoch: 2, batch: 29162, sum loss: 3283.351318, avg loss: 2.328618, ppl: 10.263748 +epoch: 2, batch: 29163, sum loss: 3883.673340, avg loss: 2.494331, ppl: 12.113628 +epoch: 2, batch: 29164, sum loss: 4368.914551, avg loss: 2.560911, ppl: 12.947610 +epoch: 2, batch: 29165, sum loss: 3187.180664, avg loss: 2.146250, ppl: 8.552721 +epoch: 2, batch: 29166, sum loss: 3999.772705, avg loss: 2.470520, ppl: 11.828602 +epoch: 2, batch: 29167, sum loss: 4519.939941, avg loss: 2.491698, ppl: 12.081774 +epoch: 2, batch: 29168, sum loss: 3708.217041, avg loss: 2.514045, ppl: 12.354810 +epoch: 2, batch: 29169, sum loss: 3801.124023, avg loss: 2.257200, ppl: 9.556290 +epoch: 2, batch: 29170, sum loss: 4010.088379, avg loss: 2.398378, ppl: 11.005316 +epoch: 2, batch: 29171, sum loss: 4920.975098, avg loss: 2.743019, ppl: 15.533805 +epoch: 2, batch: 29172, sum loss: 4009.462402, avg loss: 2.395139, ppl: 10.969723 +epoch: 2, batch: 29173, sum loss: 4350.693359, avg loss: 2.474797, ppl: 11.879295 +epoch: 2, batch: 29174, sum loss: 4286.467773, avg loss: 2.464904, ppl: 11.762351 +epoch: 2, batch: 29175, sum loss: 4574.458496, avg loss: 2.804696, ppl: 16.522045 +epoch: 2, batch: 29176, sum loss: 4125.135742, avg loss: 2.560606, ppl: 12.943656 +epoch: 2, batch: 29177, sum loss: 4619.366211, avg loss: 2.479531, ppl: 11.935663 +epoch: 2, batch: 29178, sum loss: 4176.585938, avg loss: 2.497958, ppl: 12.157645 +epoch: 2, batch: 29179, sum loss: 3475.389404, avg loss: 2.445735, ppl: 11.539027 +epoch: 2, batch: 29180, sum loss: 4941.824219, avg loss: 2.556557, ppl: 12.891356 +epoch: 2, batch: 29181, sum loss: 3894.782227, avg loss: 2.413124, ppl: 11.168799 +epoch: 2, batch: 29182, sum loss: 3636.676270, avg loss: 2.164688, ppl: 8.711886 +epoch: 2, batch: 29183, sum loss: 4684.697754, avg loss: 2.602610, ppl: 13.498922 +epoch: 2, batch: 29184, sum loss: 3826.791016, avg loss: 2.315058, ppl: 10.125510 +epoch: 2, batch: 29185, sum loss: 4386.089844, avg loss: 2.731065, ppl: 15.349218 +epoch: 2, batch: 29186, sum loss: 3608.609375, avg loss: 2.399341, ppl: 11.015919 +epoch: 2, batch: 29187, sum loss: 3696.201660, avg loss: 2.384646, ppl: 10.855224 +epoch: 2, batch: 29188, sum loss: 5003.587891, avg loss: 2.516895, ppl: 12.390069 +epoch: 2, batch: 29189, sum loss: 4389.158691, avg loss: 2.661709, ppl: 14.320746 +epoch: 2, batch: 29190, sum loss: 4271.146484, avg loss: 2.659493, ppl: 14.289049 +epoch: 2, batch: 29191, sum loss: 4555.055664, avg loss: 2.500031, ppl: 12.182866 +epoch: 2, batch: 29192, sum loss: 4258.339355, avg loss: 2.797858, ppl: 16.409456 +epoch: 2, batch: 29193, sum loss: 3875.768066, avg loss: 2.451466, ppl: 11.605349 +epoch: 2, batch: 29194, sum loss: 4013.916016, avg loss: 2.447510, ppl: 11.559525 +epoch: 2, batch: 29195, sum loss: 3577.674072, avg loss: 2.303718, ppl: 10.011336 +epoch: 2, batch: 29196, sum loss: 4248.757812, avg loss: 2.657134, ppl: 14.255379 +epoch: 2, batch: 29197, sum loss: 4849.164062, avg loss: 2.605677, ppl: 13.540384 +epoch: 2, batch: 29198, sum loss: 4384.370605, avg loss: 2.422304, ppl: 11.271804 +epoch: 2, batch: 29199, sum loss: 3781.032715, avg loss: 2.405237, ppl: 11.081059 +epoch: 2, batch: 29200, sum loss: 4174.753418, avg loss: 2.607591, ppl: 13.566332 +epoch: 2, batch: 29201, sum loss: 4068.699463, avg loss: 2.400413, ppl: 11.027725 +epoch: 2, batch: 29202, sum loss: 4138.346680, avg loss: 2.557693, ppl: 12.906003 +epoch: 2, batch: 29203, sum loss: 4234.297852, avg loss: 2.396320, ppl: 10.982690 +epoch: 2, batch: 29204, sum loss: 3951.786621, avg loss: 2.330063, ppl: 10.278587 +epoch: 2, batch: 29205, sum loss: 4462.059082, avg loss: 2.501154, ppl: 12.196566 +epoch: 2, batch: 29206, sum loss: 3259.273438, avg loss: 2.138631, ppl: 8.487809 +epoch: 2, batch: 29207, sum loss: 4273.080566, avg loss: 2.180143, ppl: 8.847572 +epoch: 2, batch: 29208, sum loss: 3720.479980, avg loss: 2.359214, ppl: 10.582628 +epoch: 2, batch: 29209, sum loss: 3377.989258, avg loss: 2.225289, ppl: 9.256161 +epoch: 2, batch: 29210, sum loss: 4390.525391, avg loss: 2.613408, ppl: 13.645476 +epoch: 2, batch: 29211, sum loss: 4596.070312, avg loss: 2.757091, ppl: 15.753942 +epoch: 2, batch: 29212, sum loss: 3625.857666, avg loss: 2.480067, ppl: 11.942064 +epoch: 2, batch: 29213, sum loss: 4077.456543, avg loss: 2.481714, ppl: 11.961752 +epoch: 2, batch: 29214, sum loss: 4435.034180, avg loss: 2.638331, ppl: 13.989831 +epoch: 2, batch: 29215, sum loss: 3581.450684, avg loss: 2.368685, ppl: 10.683330 +epoch: 2, batch: 29216, sum loss: 3944.173096, avg loss: 2.571169, ppl: 13.081106 +epoch: 2, batch: 29217, sum loss: 3276.090576, avg loss: 2.191365, ppl: 8.947416 +epoch: 2, batch: 29218, sum loss: 4100.817871, avg loss: 2.283306, ppl: 9.809057 +epoch: 2, batch: 29219, sum loss: 3535.595703, avg loss: 2.304821, ppl: 10.022387 +epoch: 2, batch: 29220, sum loss: 3746.994629, avg loss: 2.234344, ppl: 9.340350 +epoch: 2, batch: 29221, sum loss: 3611.307373, avg loss: 2.415590, ppl: 11.196375 +epoch: 2, batch: 29222, sum loss: 3116.067383, avg loss: 2.089918, ppl: 8.084251 +epoch: 2, batch: 29223, sum loss: 4793.975098, avg loss: 2.796952, ppl: 16.394596 +epoch: 2, batch: 29224, sum loss: 4614.467285, avg loss: 2.604101, ppl: 13.519069 +epoch: 2, batch: 29225, sum loss: 4239.359863, avg loss: 2.482061, ppl: 11.965897 +epoch: 2, batch: 29226, sum loss: 3863.430908, avg loss: 2.248796, ppl: 9.476317 +epoch: 2, batch: 29227, sum loss: 4490.324707, avg loss: 2.638264, ppl: 13.988894 +epoch: 2, batch: 29228, sum loss: 3846.393555, avg loss: 2.281372, ppl: 9.790106 +epoch: 2, batch: 29229, sum loss: 4540.732422, avg loss: 2.621670, ppl: 13.758681 +epoch: 2, batch: 29230, sum loss: 3792.627686, avg loss: 2.545388, ppl: 12.748170 +epoch: 2, batch: 29231, sum loss: 4556.919434, avg loss: 2.728694, ppl: 15.312882 +epoch: 2, batch: 29232, sum loss: 3503.111328, avg loss: 2.173146, ppl: 8.785881 +epoch: 2, batch: 29233, sum loss: 3683.686768, avg loss: 2.288004, ppl: 9.855248 +epoch: 2, batch: 29234, sum loss: 3979.167480, avg loss: 2.471533, ppl: 11.840580 +epoch: 2, batch: 29235, sum loss: 4201.789551, avg loss: 2.582538, ppl: 13.230677 +epoch: 2, batch: 29236, sum loss: 4292.918945, avg loss: 2.439158, ppl: 11.463389 +epoch: 2, batch: 29237, sum loss: 3911.912842, avg loss: 2.346678, ppl: 10.450797 +epoch: 2, batch: 29238, sum loss: 3328.851807, avg loss: 2.210393, ppl: 9.119299 +epoch: 2, batch: 29239, sum loss: 4002.231689, avg loss: 2.444858, ppl: 11.528908 +epoch: 2, batch: 29240, sum loss: 4105.031250, avg loss: 2.366012, ppl: 10.654819 +epoch: 2, batch: 29241, sum loss: 4134.206055, avg loss: 2.582265, ppl: 13.227062 +epoch: 2, batch: 29242, sum loss: 4023.860840, avg loss: 2.175060, ppl: 8.802713 +epoch: 2, batch: 29243, sum loss: 3847.922852, avg loss: 2.418556, ppl: 11.229634 +epoch: 2, batch: 29244, sum loss: 4122.098633, avg loss: 2.538238, ppl: 12.657350 +epoch: 2, batch: 29245, sum loss: 4581.177734, avg loss: 2.553611, ppl: 12.853432 +epoch: 2, batch: 29246, sum loss: 4452.568848, avg loss: 2.379780, ppl: 10.802529 +epoch: 2, batch: 29247, sum loss: 3642.351807, avg loss: 2.308208, ppl: 10.056385 +epoch: 2, batch: 29248, sum loss: 3596.365234, avg loss: 2.264714, ppl: 9.628368 +epoch: 2, batch: 29249, sum loss: 4178.272461, avg loss: 2.488548, ppl: 12.043779 +epoch: 2, batch: 29250, sum loss: 3925.374023, avg loss: 2.366108, ppl: 10.655844 +epoch: 2, batch: 29251, sum loss: 4179.842773, avg loss: 2.480619, ppl: 11.948658 +epoch: 2, batch: 29252, sum loss: 4377.242676, avg loss: 2.561289, ppl: 12.952500 +epoch: 2, batch: 29253, sum loss: 4400.087402, avg loss: 2.598989, ppl: 13.450127 +epoch: 2, batch: 29254, sum loss: 4428.484375, avg loss: 2.504799, ppl: 12.241096 +epoch: 2, batch: 29255, sum loss: 4393.239258, avg loss: 2.451585, ppl: 11.606724 +epoch: 2, batch: 29256, sum loss: 3545.815430, avg loss: 2.384543, ppl: 10.854100 +epoch: 2, batch: 29257, sum loss: 3865.853760, avg loss: 2.376062, ppl: 10.762441 +epoch: 2, batch: 29258, sum loss: 4034.876221, avg loss: 2.308282, ppl: 10.057128 +epoch: 2, batch: 29259, sum loss: 3854.764160, avg loss: 2.398733, ppl: 11.009220 +epoch: 2, batch: 29260, sum loss: 4379.959473, avg loss: 2.587100, ppl: 13.291165 +epoch: 2, batch: 29261, sum loss: 3747.047607, avg loss: 2.179783, ppl: 8.844390 +epoch: 2, batch: 29262, sum loss: 4546.214355, avg loss: 2.737035, ppl: 15.441128 +epoch: 2, batch: 29263, sum loss: 5452.056641, avg loss: 2.717875, ppl: 15.148095 +epoch: 2, batch: 29264, sum loss: 4988.424805, avg loss: 2.559479, ppl: 12.929083 +epoch: 2, batch: 29265, sum loss: 4279.701172, avg loss: 2.567307, ppl: 13.030688 +epoch: 2, batch: 29266, sum loss: 5037.708984, avg loss: 2.528970, ppl: 12.540588 +epoch: 2, batch: 29267, sum loss: 4221.704590, avg loss: 2.440292, ppl: 11.476387 +epoch: 2, batch: 29268, sum loss: 3836.967529, avg loss: 2.319811, ppl: 10.173753 +epoch: 2, batch: 29269, sum loss: 3239.455566, avg loss: 2.117291, ppl: 8.308600 +epoch: 2, batch: 29270, sum loss: 3332.260010, avg loss: 2.299696, ppl: 9.971155 +epoch: 2, batch: 29271, sum loss: 4434.829590, avg loss: 2.491477, ppl: 12.079107 +epoch: 2, batch: 29272, sum loss: 4354.715332, avg loss: 2.520090, ppl: 12.429713 +epoch: 2, batch: 29273, sum loss: 4821.535156, avg loss: 2.720957, ppl: 15.194854 +epoch: 2, batch: 29274, sum loss: 4062.414551, avg loss: 2.540597, ppl: 12.687243 +epoch: 2, batch: 29275, sum loss: 4360.948730, avg loss: 2.474999, ppl: 11.881701 +epoch: 2, batch: 29276, sum loss: 3889.270508, avg loss: 2.299983, ppl: 9.974010 +epoch: 2, batch: 29277, sum loss: 4146.264160, avg loss: 2.440415, ppl: 11.477799 +epoch: 2, batch: 29278, sum loss: 3791.831299, avg loss: 2.195617, ppl: 8.985547 +epoch: 2, batch: 29279, sum loss: 3504.599609, avg loss: 2.299606, ppl: 9.970254 +epoch: 2, batch: 29280, sum loss: 4272.655273, avg loss: 2.438730, ppl: 11.458482 +epoch: 2, batch: 29281, sum loss: 4072.124023, avg loss: 2.496704, ppl: 12.142405 +epoch: 2, batch: 29282, sum loss: 3876.704102, avg loss: 2.363844, ppl: 10.631741 +epoch: 2, batch: 29283, sum loss: 5026.815918, avg loss: 2.696790, ppl: 14.832041 +epoch: 2, batch: 29284, sum loss: 4953.646484, avg loss: 2.693663, ppl: 14.785743 +epoch: 2, batch: 29285, sum loss: 4254.689453, avg loss: 2.245219, ppl: 9.442481 +epoch: 2, batch: 29286, sum loss: 3944.928955, avg loss: 2.306976, ppl: 10.044006 +epoch: 2, batch: 29287, sum loss: 4817.648926, avg loss: 2.648515, ppl: 14.133036 +epoch: 2, batch: 29288, sum loss: 3555.863281, avg loss: 2.324094, ppl: 10.217417 +epoch: 2, batch: 29289, sum loss: 4538.940918, avg loss: 2.659016, ppl: 14.282234 +epoch: 2, batch: 29290, sum loss: 4168.349609, avg loss: 2.399741, ppl: 11.020318 +epoch: 2, batch: 29291, sum loss: 5261.204102, avg loss: 2.608431, ppl: 13.577725 +epoch: 2, batch: 29292, sum loss: 3854.172363, avg loss: 2.389444, ppl: 10.907424 +epoch: 2, batch: 29293, sum loss: 3895.674561, avg loss: 2.325776, ppl: 10.234620 +epoch: 2, batch: 29294, sum loss: 4721.794434, avg loss: 2.687418, ppl: 14.693694 +epoch: 2, batch: 29295, sum loss: 3201.366211, avg loss: 2.200252, ppl: 9.027285 +epoch: 2, batch: 29296, sum loss: 4633.212402, avg loss: 2.658183, ppl: 14.270331 +epoch: 2, batch: 29297, sum loss: 4294.701660, avg loss: 2.571678, ppl: 13.087763 +epoch: 2, batch: 29298, sum loss: 4552.727051, avg loss: 2.861551, ppl: 17.488623 +epoch: 2, batch: 29299, sum loss: 4317.392090, avg loss: 2.681610, ppl: 14.608592 +epoch: 2, batch: 29300, sum loss: 4690.287598, avg loss: 2.667968, ppl: 14.410654 +epoch: 2, batch: 29301, sum loss: 4250.571777, avg loss: 2.358808, ppl: 10.578335 +epoch: 2, batch: 29302, sum loss: 3973.421875, avg loss: 2.491174, ppl: 12.075438 +epoch: 2, batch: 29303, sum loss: 4042.587158, avg loss: 2.380794, ppl: 10.813480 +epoch: 2, batch: 29304, sum loss: 4654.916992, avg loss: 2.616592, ppl: 13.688991 +epoch: 2, batch: 29305, sum loss: 4841.604004, avg loss: 2.814886, ppl: 16.691275 +epoch: 2, batch: 29306, sum loss: 4688.089355, avg loss: 2.457070, ppl: 11.670566 +epoch: 2, batch: 29307, sum loss: 4299.000000, avg loss: 2.698682, ppl: 14.860130 +epoch: 2, batch: 29308, sum loss: 3733.136230, avg loss: 2.340524, ppl: 10.386680 +epoch: 2, batch: 29309, sum loss: 4711.674316, avg loss: 2.507544, ppl: 12.274741 +epoch: 2, batch: 29310, sum loss: 4814.896973, avg loss: 2.576189, ppl: 13.146937 +epoch: 2, batch: 29311, sum loss: 4157.345215, avg loss: 2.389279, ppl: 10.905627 +epoch: 2, batch: 29312, sum loss: 4617.816406, avg loss: 2.526158, ppl: 12.505366 +epoch: 2, batch: 29313, sum loss: 5462.669434, avg loss: 2.754750, ppl: 15.717112 +epoch: 2, batch: 29314, sum loss: 4888.811035, avg loss: 2.583938, ppl: 13.249212 +epoch: 2, batch: 29315, sum loss: 4425.154785, avg loss: 2.430068, ppl: 11.359660 +epoch: 2, batch: 29316, sum loss: 4242.143555, avg loss: 2.638149, ppl: 13.987289 +epoch: 2, batch: 29317, sum loss: 3758.702881, avg loss: 2.139273, ppl: 8.493262 +epoch: 2, batch: 29318, sum loss: 3218.555176, avg loss: 2.302257, ppl: 9.996720 +epoch: 2, batch: 29319, sum loss: 3951.408691, avg loss: 2.343659, ppl: 10.419291 +epoch: 2, batch: 29320, sum loss: 3880.615234, avg loss: 2.222575, ppl: 9.231068 +epoch: 2, batch: 29321, sum loss: 4313.527344, avg loss: 2.288343, ppl: 9.858593 +epoch: 2, batch: 29322, sum loss: 4179.026367, avg loss: 2.357037, ppl: 10.559618 +epoch: 2, batch: 29323, sum loss: 4011.138672, avg loss: 2.337493, ppl: 10.355247 +epoch: 2, batch: 29324, sum loss: 4615.523438, avg loss: 2.501639, ppl: 12.202474 +epoch: 2, batch: 29325, sum loss: 4337.834473, avg loss: 2.577442, ppl: 13.163419 +epoch: 2, batch: 29326, sum loss: 3710.463867, avg loss: 2.272176, ppl: 9.700489 +epoch: 2, batch: 29327, sum loss: 3660.585938, avg loss: 2.496989, ppl: 12.145867 +epoch: 2, batch: 29328, sum loss: 4133.198730, avg loss: 2.531046, ppl: 12.566648 +epoch: 2, batch: 29329, sum loss: 5038.862305, avg loss: 2.664655, ppl: 14.362993 +epoch: 2, batch: 29330, sum loss: 3911.897705, avg loss: 2.486902, ppl: 12.023973 +epoch: 2, batch: 29331, sum loss: 4394.383789, avg loss: 2.565315, ppl: 13.004748 +epoch: 2, batch: 29332, sum loss: 4815.398438, avg loss: 2.562745, ppl: 12.971379 +epoch: 2, batch: 29333, sum loss: 4578.805176, avg loss: 2.592755, ppl: 13.366544 +epoch: 2, batch: 29334, sum loss: 3984.802246, avg loss: 2.453696, ppl: 11.631257 +epoch: 2, batch: 29335, sum loss: 5288.364258, avg loss: 2.636273, ppl: 13.961079 +epoch: 2, batch: 29336, sum loss: 4726.939453, avg loss: 2.591524, ppl: 13.350101 +epoch: 2, batch: 29337, sum loss: 4309.626953, avg loss: 2.486801, ppl: 12.022758 +epoch: 2, batch: 29338, sum loss: 3867.171875, avg loss: 2.390094, ppl: 10.914517 +epoch: 2, batch: 29339, sum loss: 3477.955078, avg loss: 2.142917, ppl: 8.524270 +epoch: 2, batch: 29340, sum loss: 3814.310303, avg loss: 2.435703, ppl: 11.423841 +epoch: 2, batch: 29341, sum loss: 4501.597656, avg loss: 2.579712, ppl: 13.193340 +epoch: 2, batch: 29342, sum loss: 3925.148193, avg loss: 2.498503, ppl: 12.164273 +epoch: 2, batch: 29343, sum loss: 4079.485352, avg loss: 2.543320, ppl: 12.721840 +epoch: 2, batch: 29344, sum loss: 3797.583496, avg loss: 2.205333, ppl: 9.073273 +epoch: 2, batch: 29345, sum loss: 4896.124023, avg loss: 2.380225, ppl: 10.807339 +epoch: 2, batch: 29346, sum loss: 3639.742920, avg loss: 2.175579, ppl: 8.807280 +epoch: 2, batch: 29347, sum loss: 4276.979980, avg loss: 2.678134, ppl: 14.557902 +epoch: 2, batch: 29348, sum loss: 4430.306641, avg loss: 2.560871, ppl: 12.947088 +epoch: 2, batch: 29349, sum loss: 4272.792969, avg loss: 2.347688, ppl: 10.461360 +epoch: 2, batch: 29350, sum loss: 3761.828613, avg loss: 2.502880, ppl: 12.217628 +epoch: 2, batch: 29351, sum loss: 5154.737305, avg loss: 2.556913, ppl: 12.895951 +epoch: 2, batch: 29352, sum loss: 4511.426270, avg loss: 2.633641, ppl: 13.924370 +epoch: 2, batch: 29353, sum loss: 3376.529297, avg loss: 2.130302, ppl: 8.417412 +epoch: 2, batch: 29354, sum loss: 4982.129883, avg loss: 2.743464, ppl: 15.540717 +epoch: 2, batch: 29355, sum loss: 3489.405762, avg loss: 2.225386, ppl: 9.257059 +epoch: 2, batch: 29356, sum loss: 5090.068359, avg loss: 2.729259, ppl: 15.321533 +epoch: 2, batch: 29357, sum loss: 4070.667236, avg loss: 2.563393, ppl: 12.979778 +epoch: 2, batch: 29358, sum loss: 3605.656250, avg loss: 2.227088, ppl: 9.272828 +epoch: 2, batch: 29359, sum loss: 4076.739990, avg loss: 2.379883, ppl: 10.803642 +epoch: 2, batch: 29360, sum loss: 4424.424316, avg loss: 2.889892, ppl: 17.991364 +epoch: 2, batch: 29361, sum loss: 4661.814453, avg loss: 2.671527, ppl: 14.462034 +epoch: 2, batch: 29362, sum loss: 3912.697266, avg loss: 2.532490, ppl: 12.584806 +epoch: 2, batch: 29363, sum loss: 2856.014404, avg loss: 2.066581, ppl: 7.897771 +epoch: 2, batch: 29364, sum loss: 3036.546631, avg loss: 1.983375, ppl: 7.267227 +epoch: 2, batch: 29365, sum loss: 3585.233398, avg loss: 2.204941, ppl: 9.069713 +epoch: 2, batch: 29366, sum loss: 3783.444580, avg loss: 2.529041, ppl: 12.541468 +epoch: 2, batch: 29367, sum loss: 3306.517578, avg loss: 2.258550, ppl: 9.569205 +epoch: 2, batch: 29368, sum loss: 3766.839355, avg loss: 2.648973, ppl: 14.139506 +epoch: 2, batch: 29369, sum loss: 3939.996094, avg loss: 2.414213, ppl: 11.180972 +epoch: 2, batch: 29370, sum loss: 4243.765625, avg loss: 2.649042, ppl: 14.140488 +epoch: 2, batch: 29371, sum loss: 3623.249023, avg loss: 2.249068, ppl: 9.478902 +epoch: 2, batch: 29372, sum loss: 3861.948242, avg loss: 2.369293, ppl: 10.689837 +epoch: 2, batch: 29373, sum loss: 4734.289062, avg loss: 2.714615, ppl: 15.098798 +epoch: 2, batch: 29374, sum loss: 4822.013184, avg loss: 2.508852, ppl: 12.290812 +epoch: 2, batch: 29375, sum loss: 3677.772461, avg loss: 2.527679, ppl: 12.524400 +epoch: 2, batch: 29376, sum loss: 4497.312012, avg loss: 2.571362, ppl: 13.083632 +epoch: 2, batch: 29377, sum loss: 4704.376953, avg loss: 2.741478, ppl: 15.509898 +epoch: 2, batch: 29378, sum loss: 3498.291260, avg loss: 2.378172, ppl: 10.785172 +epoch: 2, batch: 29379, sum loss: 4010.311279, avg loss: 2.347957, ppl: 10.464173 +epoch: 2, batch: 29380, sum loss: 4011.553467, avg loss: 2.500969, ppl: 12.194301 +epoch: 2, batch: 29381, sum loss: 4082.012695, avg loss: 2.401184, ppl: 11.036237 +epoch: 2, batch: 29382, sum loss: 3864.797363, avg loss: 2.504730, ppl: 12.240248 +epoch: 2, batch: 29383, sum loss: 3443.057617, avg loss: 2.145207, ppl: 8.543811 +epoch: 2, batch: 29384, sum loss: 4061.819336, avg loss: 2.416311, ppl: 11.204453 +epoch: 2, batch: 29385, sum loss: 4078.247314, avg loss: 2.439143, ppl: 11.463215 +epoch: 2, batch: 29386, sum loss: 4147.411621, avg loss: 2.435356, ppl: 11.419885 +epoch: 2, batch: 29387, sum loss: 4053.798584, avg loss: 2.439109, ppl: 11.462818 +epoch: 2, batch: 29388, sum loss: 3688.366211, avg loss: 2.490457, ppl: 12.066784 +epoch: 2, batch: 29389, sum loss: 4685.374023, avg loss: 2.520373, ppl: 12.433238 +epoch: 2, batch: 29390, sum loss: 5338.895508, avg loss: 2.622247, ppl: 13.766629 +epoch: 2, batch: 29391, sum loss: 3851.577637, avg loss: 2.549026, ppl: 12.794630 +epoch: 2, batch: 29392, sum loss: 4030.795166, avg loss: 2.605556, ppl: 13.538751 +epoch: 2, batch: 29393, sum loss: 3637.170166, avg loss: 2.303464, ppl: 10.008795 +epoch: 2, batch: 29394, sum loss: 3950.467773, avg loss: 2.472133, ppl: 11.847688 +epoch: 2, batch: 29395, sum loss: 4104.273926, avg loss: 2.501081, ppl: 12.195670 +epoch: 2, batch: 29396, sum loss: 4658.160156, avg loss: 2.513848, ppl: 12.352368 +epoch: 2, batch: 29397, sum loss: 3599.677490, avg loss: 2.275397, ppl: 9.731778 +epoch: 2, batch: 29398, sum loss: 4719.719238, avg loss: 2.584731, ppl: 13.259727 +epoch: 2, batch: 29399, sum loss: 3854.742676, avg loss: 2.324935, ppl: 10.226017 +epoch: 2, batch: 29400, sum loss: 4950.755371, avg loss: 2.792304, ppl: 16.318579 +epoch: 2, batch: 29401, sum loss: 4151.305664, avg loss: 2.311418, ppl: 10.088717 +epoch: 2, batch: 29402, sum loss: 4697.719238, avg loss: 2.866210, ppl: 17.570309 +epoch: 2, batch: 29403, sum loss: 4168.437500, avg loss: 2.506577, ppl: 12.262882 +epoch: 2, batch: 29404, sum loss: 4293.113770, avg loss: 2.500357, ppl: 12.186849 +epoch: 2, batch: 29405, sum loss: 3988.736816, avg loss: 2.479016, ppl: 11.929521 +epoch: 2, batch: 29406, sum loss: 3796.993164, avg loss: 2.302603, ppl: 10.000184 +epoch: 2, batch: 29407, sum loss: 4017.174805, avg loss: 2.449497, ppl: 11.582517 +epoch: 2, batch: 29408, sum loss: 4804.692871, avg loss: 2.538137, ppl: 12.656067 +epoch: 2, batch: 29409, sum loss: 4206.829102, avg loss: 2.529663, ppl: 12.549274 +epoch: 2, batch: 29410, sum loss: 4103.025879, avg loss: 2.554811, ppl: 12.868861 +epoch: 2, batch: 29411, sum loss: 4191.350586, avg loss: 2.425550, ppl: 11.308450 +epoch: 2, batch: 29412, sum loss: 3184.494385, avg loss: 2.077296, ppl: 7.982852 +epoch: 2, batch: 29413, sum loss: 4799.909668, avg loss: 2.739675, ppl: 15.481946 +epoch: 2, batch: 29414, sum loss: 3469.319336, avg loss: 2.395939, ppl: 10.978500 +epoch: 2, batch: 29415, sum loss: 4784.915039, avg loss: 2.620435, ppl: 13.741707 +epoch: 2, batch: 29416, sum loss: 5419.536133, avg loss: 2.858405, ppl: 17.433704 +epoch: 2, batch: 29417, sum loss: 5330.196777, avg loss: 2.754624, ppl: 15.715125 +epoch: 2, batch: 29418, sum loss: 3341.789551, avg loss: 2.505090, ppl: 12.244658 +epoch: 2, batch: 29419, sum loss: 4173.009766, avg loss: 2.473628, ppl: 11.865415 +epoch: 2, batch: 29420, sum loss: 3833.381836, avg loss: 2.378028, ppl: 10.783621 +epoch: 2, batch: 29421, sum loss: 4523.206543, avg loss: 2.389438, ppl: 10.907364 +epoch: 2, batch: 29422, sum loss: 4249.889160, avg loss: 2.543321, ppl: 12.721848 +epoch: 2, batch: 29423, sum loss: 4929.977539, avg loss: 2.698400, ppl: 14.855950 +epoch: 2, batch: 29424, sum loss: 3824.588623, avg loss: 2.180495, ppl: 8.850689 +epoch: 2, batch: 29425, sum loss: 5447.195801, avg loss: 2.879067, ppl: 17.797668 +epoch: 2, batch: 29426, sum loss: 3667.616455, avg loss: 2.287970, ppl: 9.854915 +epoch: 2, batch: 29427, sum loss: 3621.919678, avg loss: 2.305487, ppl: 10.029060 +epoch: 2, batch: 29428, sum loss: 4250.568359, avg loss: 2.703924, ppl: 14.938233 +epoch: 2, batch: 29429, sum loss: 4571.406738, avg loss: 2.622723, ppl: 13.773182 +epoch: 2, batch: 29430, sum loss: 5006.113770, avg loss: 2.575162, ppl: 13.133441 +epoch: 2, batch: 29431, sum loss: 3992.317871, avg loss: 2.358132, ppl: 10.571190 +epoch: 2, batch: 29432, sum loss: 3682.253174, avg loss: 2.315882, ppl: 10.133862 +epoch: 2, batch: 29433, sum loss: 3448.233154, avg loss: 2.416421, ppl: 11.205687 +epoch: 2, batch: 29434, sum loss: 4281.928223, avg loss: 2.410996, ppl: 11.145053 +epoch: 2, batch: 29435, sum loss: 4654.435547, avg loss: 2.494338, ppl: 12.113715 +epoch: 2, batch: 29436, sum loss: 3906.386230, avg loss: 2.380491, ppl: 10.810212 +epoch: 2, batch: 29437, sum loss: 3862.177734, avg loss: 2.430571, ppl: 11.365374 +epoch: 2, batch: 29438, sum loss: 5435.590820, avg loss: 2.757783, ppl: 15.764856 +epoch: 2, batch: 29439, sum loss: 3298.945312, avg loss: 2.109300, ppl: 8.242471 +epoch: 2, batch: 29440, sum loss: 3942.151855, avg loss: 2.253946, ppl: 9.525249 +epoch: 2, batch: 29441, sum loss: 3888.797607, avg loss: 2.301064, ppl: 9.984798 +epoch: 2, batch: 29442, sum loss: 4527.613281, avg loss: 2.599089, ppl: 13.451480 +epoch: 2, batch: 29443, sum loss: 4804.602539, avg loss: 2.806427, ppl: 16.550669 +epoch: 2, batch: 29444, sum loss: 4020.191406, avg loss: 2.522077, ppl: 12.454442 +epoch: 2, batch: 29445, sum loss: 4815.519043, avg loss: 2.573768, ppl: 13.115145 +epoch: 2, batch: 29446, sum loss: 4308.575195, avg loss: 2.466271, ppl: 11.778440 +epoch: 2, batch: 29447, sum loss: 3452.920898, avg loss: 2.279156, ppl: 9.768430 +epoch: 2, batch: 29448, sum loss: 3456.672607, avg loss: 2.265185, ppl: 9.632910 +epoch: 2, batch: 29449, sum loss: 4795.431641, avg loss: 2.621887, ppl: 13.761670 +epoch: 2, batch: 29450, sum loss: 3706.578857, avg loss: 2.295095, ppl: 9.925381 +epoch: 2, batch: 29451, sum loss: 3492.285889, avg loss: 2.366047, ppl: 10.655193 +epoch: 2, batch: 29452, sum loss: 4113.109375, avg loss: 2.399714, ppl: 11.020021 +epoch: 2, batch: 29453, sum loss: 2956.618652, avg loss: 2.058927, ppl: 7.837553 +epoch: 2, batch: 29454, sum loss: 3638.856445, avg loss: 2.362894, ppl: 10.621644 +epoch: 2, batch: 29455, sum loss: 4214.895020, avg loss: 2.460534, ppl: 11.711064 +epoch: 2, batch: 29456, sum loss: 4635.494141, avg loss: 2.494884, ppl: 12.120325 +epoch: 2, batch: 29457, sum loss: 3451.659424, avg loss: 2.137250, ppl: 8.476100 +epoch: 2, batch: 29458, sum loss: 3533.959961, avg loss: 2.174745, ppl: 8.799937 +epoch: 2, batch: 29459, sum loss: 4928.033203, avg loss: 2.827328, ppl: 16.900246 +epoch: 2, batch: 29460, sum loss: 3627.966553, avg loss: 2.130338, ppl: 8.417715 +epoch: 2, batch: 29461, sum loss: 4455.365723, avg loss: 2.507240, ppl: 12.271016 +epoch: 2, batch: 29462, sum loss: 4580.549316, avg loss: 2.452114, ppl: 11.612871 +epoch: 2, batch: 29463, sum loss: 4026.709717, avg loss: 2.365869, ppl: 10.653296 +epoch: 2, batch: 29464, sum loss: 3557.037354, avg loss: 2.238538, ppl: 9.379611 +epoch: 2, batch: 29465, sum loss: 3746.602295, avg loss: 2.185882, ppl: 8.898497 +epoch: 2, batch: 29466, sum loss: 4379.877441, avg loss: 2.407849, ppl: 11.110039 +epoch: 2, batch: 29467, sum loss: 3905.227051, avg loss: 2.445352, ppl: 11.534610 +epoch: 2, batch: 29468, sum loss: 3542.065674, avg loss: 2.220731, ppl: 9.214062 +epoch: 2, batch: 29469, sum loss: 4373.955078, avg loss: 2.272184, ppl: 9.700567 +epoch: 2, batch: 29470, sum loss: 3995.084229, avg loss: 2.422731, ppl: 11.276618 +epoch: 2, batch: 29471, sum loss: 4188.618164, avg loss: 2.506654, ppl: 12.263824 +epoch: 2, batch: 29472, sum loss: 3574.801758, avg loss: 2.276944, ppl: 9.746848 +epoch: 2, batch: 29473, sum loss: 4431.749023, avg loss: 2.671338, ppl: 14.459297 +epoch: 2, batch: 29474, sum loss: 4538.979492, avg loss: 2.514670, ppl: 12.362530 +epoch: 2, batch: 29475, sum loss: 4229.469238, avg loss: 2.446194, ppl: 11.544325 +epoch: 2, batch: 29476, sum loss: 4620.508789, avg loss: 2.578409, ppl: 13.176158 +epoch: 2, batch: 29477, sum loss: 4119.862305, avg loss: 2.399454, ppl: 11.017158 +epoch: 2, batch: 29478, sum loss: 3968.442139, avg loss: 2.455719, ppl: 11.654810 +epoch: 2, batch: 29479, sum loss: 3732.733643, avg loss: 2.352069, ppl: 10.507288 +epoch: 2, batch: 29480, sum loss: 4804.348145, avg loss: 2.432581, ppl: 11.388242 +epoch: 2, batch: 29481, sum loss: 4359.882812, avg loss: 2.664965, ppl: 14.367449 +epoch: 2, batch: 29482, sum loss: 4687.736328, avg loss: 2.818843, ppl: 16.757458 +epoch: 2, batch: 29483, sum loss: 3702.378906, avg loss: 2.359706, ppl: 10.587840 +epoch: 2, batch: 29484, sum loss: 4278.742676, avg loss: 2.691033, ppl: 14.746903 +epoch: 2, batch: 29485, sum loss: 3579.543701, avg loss: 2.372130, ppl: 10.720199 +epoch: 2, batch: 29486, sum loss: 3223.826660, avg loss: 2.179734, ppl: 8.843953 +epoch: 2, batch: 29487, sum loss: 4067.270996, avg loss: 2.314895, ppl: 10.123861 +epoch: 2, batch: 29488, sum loss: 3979.622559, avg loss: 2.474890, ppl: 11.880394 +epoch: 2, batch: 29489, sum loss: 4896.727051, avg loss: 2.428932, ppl: 11.346760 +epoch: 2, batch: 29490, sum loss: 5077.979004, avg loss: 2.774852, ppl: 16.036255 +epoch: 2, batch: 29491, sum loss: 4478.825684, avg loss: 2.647060, ppl: 14.112489 +epoch: 2, batch: 29492, sum loss: 4821.941406, avg loss: 2.570331, ppl: 13.070154 +epoch: 2, batch: 29493, sum loss: 3626.399902, avg loss: 2.507884, ppl: 12.278917 +epoch: 2, batch: 29494, sum loss: 4064.126221, avg loss: 2.652824, ppl: 14.194065 +epoch: 2, batch: 29495, sum loss: 4976.552246, avg loss: 2.679888, ppl: 14.583460 +epoch: 2, batch: 29496, sum loss: 4086.360352, avg loss: 2.460181, ppl: 11.706927 +epoch: 2, batch: 29497, sum loss: 3395.082031, avg loss: 2.219008, ppl: 9.198201 +epoch: 2, batch: 29498, sum loss: 4251.012695, avg loss: 2.246835, ppl: 9.457759 +epoch: 2, batch: 29499, sum loss: 4354.030273, avg loss: 2.558185, ppl: 12.912355 +epoch: 2, batch: 29500, sum loss: 4385.758789, avg loss: 2.605917, ppl: 13.543642 +epoch: 2, batch: 29501, sum loss: 4442.357422, avg loss: 2.474851, ppl: 11.879935 +epoch: 2, batch: 29502, sum loss: 4816.077148, avg loss: 2.727110, ppl: 15.288645 +epoch: 2, batch: 29503, sum loss: 3524.883301, avg loss: 2.293353, ppl: 9.908102 +epoch: 2, batch: 29504, sum loss: 5760.550781, avg loss: 2.700680, ppl: 14.889857 +epoch: 2, batch: 29505, sum loss: 4343.467285, avg loss: 2.557990, ppl: 12.909846 +epoch: 2, batch: 29506, sum loss: 3699.610840, avg loss: 2.296469, ppl: 9.939023 +epoch: 2, batch: 29507, sum loss: 4249.682129, avg loss: 2.371475, ppl: 10.713178 +epoch: 2, batch: 29508, sum loss: 4070.719238, avg loss: 2.412993, ppl: 11.167337 +epoch: 2, batch: 29509, sum loss: 5651.553223, avg loss: 2.701507, ppl: 14.902177 +epoch: 2, batch: 29510, sum loss: 5044.906738, avg loss: 2.459730, ppl: 11.701653 +epoch: 2, batch: 29511, sum loss: 4733.512695, avg loss: 2.653314, ppl: 14.201028 +epoch: 2, batch: 29512, sum loss: 4441.658203, avg loss: 2.404796, ppl: 11.076172 +epoch: 2, batch: 29513, sum loss: 3418.338135, avg loss: 2.305016, ppl: 10.024337 +epoch: 2, batch: 29514, sum loss: 3509.693604, avg loss: 2.487380, ppl: 12.029720 +epoch: 2, batch: 29515, sum loss: 3824.681885, avg loss: 2.343555, ppl: 10.418210 +epoch: 2, batch: 29516, sum loss: 4566.032715, avg loss: 2.630203, ppl: 13.876590 +epoch: 2, batch: 29517, sum loss: 3467.314941, avg loss: 2.157632, ppl: 8.650632 +epoch: 2, batch: 29518, sum loss: 5212.020020, avg loss: 2.766465, ppl: 15.902323 +epoch: 2, batch: 29519, sum loss: 3871.642578, avg loss: 2.403254, ppl: 11.059107 +epoch: 2, batch: 29520, sum loss: 3684.513428, avg loss: 2.403466, ppl: 11.061449 +epoch: 2, batch: 29521, sum loss: 3867.950684, avg loss: 2.389099, ppl: 10.903662 +epoch: 2, batch: 29522, sum loss: 4247.605957, avg loss: 2.452428, ppl: 11.616522 +epoch: 2, batch: 29523, sum loss: 4681.277344, avg loss: 2.691936, ppl: 14.760231 +epoch: 2, batch: 29524, sum loss: 4453.556152, avg loss: 2.630571, ppl: 13.881693 +epoch: 2, batch: 29525, sum loss: 4487.198242, avg loss: 2.501225, ppl: 12.197433 +epoch: 2, batch: 29526, sum loss: 3764.597656, avg loss: 2.210568, ppl: 9.120897 +epoch: 2, batch: 29527, sum loss: 3206.794922, avg loss: 2.302078, ppl: 9.994933 +epoch: 2, batch: 29528, sum loss: 3501.456543, avg loss: 2.106773, ppl: 8.221666 +epoch: 2, batch: 29529, sum loss: 3710.748291, avg loss: 2.090562, ppl: 8.089463 +epoch: 2, batch: 29530, sum loss: 4084.643066, avg loss: 2.385890, ppl: 10.868727 +epoch: 2, batch: 29531, sum loss: 4245.111816, avg loss: 2.353166, ppl: 10.518821 +epoch: 2, batch: 29532, sum loss: 3632.607422, avg loss: 2.377361, ppl: 10.776424 +epoch: 2, batch: 29533, sum loss: 5121.272461, avg loss: 2.721186, ppl: 15.198339 +epoch: 2, batch: 29534, sum loss: 4117.846680, avg loss: 2.506297, ppl: 12.259454 +epoch: 2, batch: 29535, sum loss: 4441.357422, avg loss: 2.580684, ppl: 13.206170 +epoch: 2, batch: 29536, sum loss: 4885.031250, avg loss: 2.507716, ppl: 12.276863 +epoch: 2, batch: 29537, sum loss: 3582.580078, avg loss: 2.340026, ppl: 10.381511 +epoch: 2, batch: 29538, sum loss: 5031.200684, avg loss: 2.857013, ppl: 17.409454 +epoch: 2, batch: 29539, sum loss: 4206.059082, avg loss: 2.328936, ppl: 10.267015 +epoch: 2, batch: 29540, sum loss: 4764.951660, avg loss: 2.591056, ppl: 13.343854 +epoch: 2, batch: 29541, sum loss: 4247.004883, avg loss: 2.501181, ppl: 12.196889 +epoch: 2, batch: 29542, sum loss: 4397.907715, avg loss: 2.458305, ppl: 11.684990 +epoch: 2, batch: 29543, sum loss: 4721.473633, avg loss: 2.566018, ppl: 13.013901 +epoch: 2, batch: 29544, sum loss: 3978.192871, avg loss: 2.530657, ppl: 12.561757 +epoch: 2, batch: 29545, sum loss: 2909.183350, avg loss: 2.179164, ppl: 8.838911 +epoch: 2, batch: 29546, sum loss: 3710.862549, avg loss: 2.094166, ppl: 8.118670 +epoch: 2, batch: 29547, sum loss: 4328.717285, avg loss: 2.451142, ppl: 11.601592 +epoch: 2, batch: 29548, sum loss: 3704.373291, avg loss: 2.497892, ppl: 12.156837 +epoch: 2, batch: 29549, sum loss: 4987.981934, avg loss: 2.792823, ppl: 16.327047 +epoch: 2, batch: 29550, sum loss: 3809.536621, avg loss: 2.514546, ppl: 12.360991 +epoch: 2, batch: 29551, sum loss: 4757.995605, avg loss: 2.442503, ppl: 11.501794 +epoch: 2, batch: 29552, sum loss: 4452.596191, avg loss: 2.495850, ppl: 12.132043 +epoch: 2, batch: 29553, sum loss: 4177.011230, avg loss: 2.537674, ppl: 12.650211 +epoch: 2, batch: 29554, sum loss: 4464.428711, avg loss: 2.421057, ppl: 11.257750 +epoch: 2, batch: 29555, sum loss: 3958.705566, avg loss: 2.639137, ppl: 14.001116 +epoch: 2, batch: 29556, sum loss: 3556.923096, avg loss: 2.315705, ppl: 10.132067 +epoch: 2, batch: 29557, sum loss: 5258.530762, avg loss: 2.709186, ppl: 15.017052 +epoch: 2, batch: 29558, sum loss: 3947.575195, avg loss: 2.376626, ppl: 10.768503 +epoch: 2, batch: 29559, sum loss: 4320.945801, avg loss: 2.487591, ppl: 12.032259 +epoch: 2, batch: 29560, sum loss: 3877.098633, avg loss: 2.436894, ppl: 11.437463 +epoch: 2, batch: 29561, sum loss: 4474.921875, avg loss: 2.443977, ppl: 11.518762 +epoch: 2, batch: 29562, sum loss: 3855.398193, avg loss: 2.385766, ppl: 10.867387 +epoch: 2, batch: 29563, sum loss: 3988.062256, avg loss: 2.620278, ppl: 13.739538 +epoch: 2, batch: 29564, sum loss: 4127.097168, avg loss: 2.667807, ppl: 14.408335 +epoch: 2, batch: 29565, sum loss: 4044.977295, avg loss: 2.442619, ppl: 11.503129 +epoch: 2, batch: 29566, sum loss: 3212.782959, avg loss: 2.170799, ppl: 8.765287 +epoch: 2, batch: 29567, sum loss: 4228.996582, avg loss: 2.586542, ppl: 13.283758 +epoch: 2, batch: 29568, sum loss: 3344.697021, avg loss: 2.149548, ppl: 8.580981 +epoch: 2, batch: 29569, sum loss: 4057.983887, avg loss: 2.371703, ppl: 10.715627 +epoch: 2, batch: 29570, sum loss: 4168.875488, avg loss: 2.420950, ppl: 11.256544 +epoch: 2, batch: 29571, sum loss: 4014.137207, avg loss: 2.476334, ppl: 11.897566 +epoch: 2, batch: 29572, sum loss: 3388.736328, avg loss: 2.176452, ppl: 8.814979 +epoch: 2, batch: 29573, sum loss: 4430.010742, avg loss: 2.377891, ppl: 10.782138 +epoch: 2, batch: 29574, sum loss: 4741.615723, avg loss: 2.657856, ppl: 14.265677 +epoch: 2, batch: 29575, sum loss: 4085.323242, avg loss: 2.377953, ppl: 10.782808 +epoch: 2, batch: 29576, sum loss: 4560.360352, avg loss: 2.407793, ppl: 11.109419 +epoch: 2, batch: 29577, sum loss: 3856.305908, avg loss: 2.481535, ppl: 11.959614 +epoch: 2, batch: 29578, sum loss: 2995.860840, avg loss: 1.950430, ppl: 7.031713 +epoch: 2, batch: 29579, sum loss: 3166.876221, avg loss: 2.082102, ppl: 8.021309 +epoch: 2, batch: 29580, sum loss: 5048.206543, avg loss: 2.679515, ppl: 14.578027 +epoch: 2, batch: 29581, sum loss: 3952.341309, avg loss: 2.252046, ppl: 9.507171 +epoch: 2, batch: 29582, sum loss: 3801.925537, avg loss: 2.376203, ppl: 10.763958 +epoch: 2, batch: 29583, sum loss: 4090.825684, avg loss: 2.330955, ppl: 10.287760 +epoch: 2, batch: 29584, sum loss: 3266.031982, avg loss: 2.129095, ppl: 8.407255 +epoch: 2, batch: 29585, sum loss: 4812.124023, avg loss: 2.515486, ppl: 12.372615 +epoch: 2, batch: 29586, sum loss: 4252.314941, avg loss: 2.353245, ppl: 10.519656 +epoch: 2, batch: 29587, sum loss: 3675.837402, avg loss: 2.522881, ppl: 12.464453 +epoch: 2, batch: 29588, sum loss: 4438.976562, avg loss: 2.486822, ppl: 12.023002 +epoch: 2, batch: 29589, sum loss: 4047.669922, avg loss: 2.532960, ppl: 12.590719 +epoch: 2, batch: 29590, sum loss: 5001.764160, avg loss: 2.575574, ppl: 13.138856 +epoch: 2, batch: 29591, sum loss: 4987.513184, avg loss: 2.661426, ppl: 14.316694 +epoch: 2, batch: 29592, sum loss: 5463.445312, avg loss: 2.858946, ppl: 17.443125 +epoch: 2, batch: 29593, sum loss: 3652.745605, avg loss: 2.407874, ppl: 11.110319 +epoch: 2, batch: 29594, sum loss: 4704.723633, avg loss: 2.849621, ppl: 17.281227 +epoch: 2, batch: 29595, sum loss: 4733.211914, avg loss: 2.546106, ppl: 12.757334 +epoch: 2, batch: 29596, sum loss: 4011.233154, avg loss: 2.469971, ppl: 11.822106 +epoch: 2, batch: 29597, sum loss: 4147.086426, avg loss: 2.364359, ppl: 10.637222 +epoch: 2, batch: 29598, sum loss: 3825.758789, avg loss: 2.301901, ppl: 9.993157 +epoch: 2, batch: 29599, sum loss: 3462.166748, avg loss: 2.220761, ppl: 9.214343 +epoch: 2, batch: 29600, sum loss: 3948.117676, avg loss: 2.618115, ppl: 13.709859 +epoch: 2, batch: 29601, sum loss: 3464.144775, avg loss: 2.295656, ppl: 9.930948 +epoch: 2, batch: 29602, sum loss: 4250.001465, avg loss: 2.504421, ppl: 12.236466 +epoch: 2, batch: 29603, sum loss: 4173.611816, avg loss: 2.246293, ppl: 9.452626 +epoch: 2, batch: 29604, sum loss: 3489.482666, avg loss: 2.285188, ppl: 9.827538 +epoch: 2, batch: 29605, sum loss: 4396.920898, avg loss: 2.377999, ppl: 10.783307 +epoch: 2, batch: 29606, sum loss: 3903.781006, avg loss: 2.528356, ppl: 12.532883 +epoch: 2, batch: 29607, sum loss: 3706.383301, avg loss: 2.207494, ppl: 9.092906 +epoch: 2, batch: 29608, sum loss: 3529.380127, avg loss: 2.337338, ppl: 10.353638 +epoch: 2, batch: 29609, sum loss: 3714.613281, avg loss: 2.155899, ppl: 8.635647 +epoch: 2, batch: 29610, sum loss: 3835.510742, avg loss: 2.545130, ppl: 12.744885 +epoch: 2, batch: 29611, sum loss: 3713.147949, avg loss: 2.242239, ppl: 9.414387 +epoch: 2, batch: 29612, sum loss: 3923.104004, avg loss: 2.387769, ppl: 10.889168 +epoch: 2, batch: 29613, sum loss: 4937.575195, avg loss: 2.749207, ppl: 15.630229 +epoch: 2, batch: 29614, sum loss: 4492.865234, avg loss: 2.415519, ppl: 11.195580 +epoch: 2, batch: 29615, sum loss: 3985.659668, avg loss: 2.455736, ppl: 11.655007 +epoch: 2, batch: 29616, sum loss: 3876.779297, avg loss: 2.481933, ppl: 11.964371 +epoch: 2, batch: 29617, sum loss: 4764.340332, avg loss: 2.666111, ppl: 14.383921 +epoch: 2, batch: 29618, sum loss: 2883.105225, avg loss: 2.169379, ppl: 8.752851 +epoch: 2, batch: 29619, sum loss: 4428.297852, avg loss: 2.349230, ppl: 10.477494 +epoch: 2, batch: 29620, sum loss: 4592.432617, avg loss: 2.471708, ppl: 11.842652 +epoch: 2, batch: 29621, sum loss: 4349.847656, avg loss: 2.751327, ppl: 15.663401 +epoch: 2, batch: 29622, sum loss: 3827.494629, avg loss: 2.491858, ppl: 12.083713 +epoch: 2, batch: 29623, sum loss: 3647.334961, avg loss: 2.336538, ppl: 10.345355 +epoch: 2, batch: 29624, sum loss: 3439.250000, avg loss: 2.176740, ppl: 8.817518 +epoch: 2, batch: 29625, sum loss: 4873.883789, avg loss: 2.526638, ppl: 12.511367 +epoch: 2, batch: 29626, sum loss: 4305.896484, avg loss: 2.640035, ppl: 14.013690 +epoch: 2, batch: 29627, sum loss: 4131.943359, avg loss: 2.547437, ppl: 12.774323 +epoch: 2, batch: 29628, sum loss: 4437.183105, avg loss: 2.204264, ppl: 9.063578 +epoch: 2, batch: 29629, sum loss: 4029.222656, avg loss: 2.424322, ppl: 11.294565 +epoch: 2, batch: 29630, sum loss: 3990.231201, avg loss: 2.267177, ppl: 9.652111 +epoch: 2, batch: 29631, sum loss: 4262.965332, avg loss: 2.348741, ppl: 10.472380 +epoch: 2, batch: 29632, sum loss: 4034.200439, avg loss: 2.461379, ppl: 11.720964 +epoch: 2, batch: 29633, sum loss: 5128.940430, avg loss: 2.886292, ppl: 17.926710 +epoch: 2, batch: 29634, sum loss: 3706.338135, avg loss: 2.248991, ppl: 9.478172 +epoch: 2, batch: 29635, sum loss: 5503.151367, avg loss: 2.768185, ppl: 15.929690 +epoch: 2, batch: 29636, sum loss: 4359.716797, avg loss: 2.579714, ppl: 13.193365 +epoch: 2, batch: 29637, sum loss: 4756.332031, avg loss: 2.725692, ppl: 15.266969 +epoch: 2, batch: 29638, sum loss: 4397.169922, avg loss: 2.571444, ppl: 13.084709 +epoch: 2, batch: 29639, sum loss: 3623.016357, avg loss: 2.306185, ppl: 10.036064 +epoch: 2, batch: 29640, sum loss: 4414.671387, avg loss: 2.475980, ppl: 11.893352 +epoch: 2, batch: 29641, sum loss: 4387.568359, avg loss: 2.533238, ppl: 12.594222 +epoch: 2, batch: 29642, sum loss: 3579.368164, avg loss: 2.262559, ppl: 9.607641 +epoch: 2, batch: 29643, sum loss: 3928.471924, avg loss: 2.624230, ppl: 13.793944 +epoch: 2, batch: 29644, sum loss: 4289.478027, avg loss: 2.323661, ppl: 10.212995 +epoch: 2, batch: 29645, sum loss: 4524.785156, avg loss: 2.459122, ppl: 11.694541 +epoch: 2, batch: 29646, sum loss: 4182.201172, avg loss: 2.418856, ppl: 11.232998 +epoch: 2, batch: 29647, sum loss: 4126.802734, avg loss: 2.362222, ppl: 10.614515 +epoch: 2, batch: 29648, sum loss: 3633.722900, avg loss: 2.365705, ppl: 10.651546 +epoch: 2, batch: 29649, sum loss: 3224.936035, avg loss: 2.261526, ppl: 9.597723 +epoch: 2, batch: 29650, sum loss: 4039.567383, avg loss: 2.455664, ppl: 11.654168 +epoch: 2, batch: 29651, sum loss: 4322.810059, avg loss: 2.504525, ppl: 12.237744 +epoch: 2, batch: 29652, sum loss: 4074.667480, avg loss: 2.596984, ppl: 13.423188 +epoch: 2, batch: 29653, sum loss: 3925.372070, avg loss: 2.351931, ppl: 10.505833 +epoch: 2, batch: 29654, sum loss: 4678.365234, avg loss: 2.723146, ppl: 15.228158 +epoch: 2, batch: 29655, sum loss: 4554.923828, avg loss: 2.827389, ppl: 16.901278 +epoch: 2, batch: 29656, sum loss: 4156.836914, avg loss: 2.386244, ppl: 10.872578 +epoch: 2, batch: 29657, sum loss: 3697.779785, avg loss: 2.391837, ppl: 10.933559 +epoch: 2, batch: 29658, sum loss: 3711.599121, avg loss: 2.467818, ppl: 11.796684 +epoch: 2, batch: 29659, sum loss: 3623.552002, avg loss: 2.319816, ppl: 10.173799 +epoch: 2, batch: 29660, sum loss: 4979.846680, avg loss: 2.757390, ppl: 15.758663 +epoch: 2, batch: 29661, sum loss: 4669.587891, avg loss: 2.400816, ppl: 11.032180 +epoch: 2, batch: 29662, sum loss: 4621.767090, avg loss: 2.720287, ppl: 15.184677 +epoch: 2, batch: 29663, sum loss: 5304.632324, avg loss: 2.797802, ppl: 16.408541 +epoch: 2, batch: 29664, sum loss: 5523.191895, avg loss: 2.832406, ppl: 16.986282 +epoch: 2, batch: 29665, sum loss: 3992.156006, avg loss: 2.585593, ppl: 13.271163 +epoch: 2, batch: 29666, sum loss: 3940.956543, avg loss: 2.338847, ppl: 10.369270 +epoch: 2, batch: 29667, sum loss: 5156.702637, avg loss: 2.708352, ppl: 15.004533 +epoch: 2, batch: 29668, sum loss: 4647.712402, avg loss: 2.595037, ppl: 13.397089 +epoch: 2, batch: 29669, sum loss: 4278.619141, avg loss: 2.509454, ppl: 12.298217 +epoch: 2, batch: 29670, sum loss: 4070.371582, avg loss: 2.405657, ppl: 11.085711 +epoch: 2, batch: 29671, sum loss: 3934.906250, avg loss: 2.352006, ppl: 10.506627 +epoch: 2, batch: 29672, sum loss: 3336.533691, avg loss: 2.017251, ppl: 7.517634 +epoch: 2, batch: 29673, sum loss: 4016.596436, avg loss: 2.464170, ppl: 11.753716 +epoch: 2, batch: 29674, sum loss: 3241.548584, avg loss: 2.052912, ppl: 7.790556 +epoch: 2, batch: 29675, sum loss: 3978.185059, avg loss: 2.385003, ppl: 10.859097 +epoch: 2, batch: 29676, sum loss: 5616.537109, avg loss: 2.826642, ppl: 16.888645 +epoch: 2, batch: 29677, sum loss: 4385.598145, avg loss: 2.516120, ppl: 12.380472 +epoch: 2, batch: 29678, sum loss: 3920.563477, avg loss: 2.462666, ppl: 11.736053 +epoch: 2, batch: 29679, sum loss: 4747.208984, avg loss: 2.473793, ppl: 11.867375 +epoch: 2, batch: 29680, sum loss: 5647.843750, avg loss: 2.872759, ppl: 17.685743 +epoch: 2, batch: 29681, sum loss: 4414.950684, avg loss: 2.537328, ppl: 12.645836 +epoch: 2, batch: 29682, sum loss: 4356.483887, avg loss: 2.462682, ppl: 11.736243 +epoch: 2, batch: 29683, sum loss: 3906.319336, avg loss: 2.516958, ppl: 12.390852 +epoch: 2, batch: 29684, sum loss: 3508.122314, avg loss: 2.370353, ppl: 10.701169 +epoch: 2, batch: 29685, sum loss: 4377.440430, avg loss: 2.508562, ppl: 12.287247 +epoch: 2, batch: 29686, sum loss: 4422.196777, avg loss: 2.660768, ppl: 14.307277 +epoch: 2, batch: 29687, sum loss: 4469.633789, avg loss: 2.506805, ppl: 12.265684 +epoch: 2, batch: 29688, sum loss: 4242.450195, avg loss: 2.514790, ppl: 12.364010 +epoch: 2, batch: 29689, sum loss: 4672.687012, avg loss: 2.573066, ppl: 13.105940 +epoch: 2, batch: 29690, sum loss: 3097.718750, avg loss: 2.211077, ppl: 9.125539 +epoch: 2, batch: 29691, sum loss: 4369.244141, avg loss: 2.546180, ppl: 12.758268 +epoch: 2, batch: 29692, sum loss: 4422.765625, avg loss: 2.466685, ppl: 11.783319 +epoch: 2, batch: 29693, sum loss: 4651.753418, avg loss: 2.500943, ppl: 12.193984 +epoch: 2, batch: 29694, sum loss: 4217.912109, avg loss: 2.465174, ppl: 11.765526 +epoch: 2, batch: 29695, sum loss: 4569.398926, avg loss: 2.615569, ppl: 13.674994 +epoch: 2, batch: 29696, sum loss: 4075.462891, avg loss: 2.250394, ppl: 9.491473 +epoch: 2, batch: 29697, sum loss: 4767.491211, avg loss: 2.642734, ppl: 14.051562 +epoch: 2, batch: 29698, sum loss: 4176.645020, avg loss: 2.351715, ppl: 10.503564 +epoch: 2, batch: 29699, sum loss: 3819.755859, avg loss: 2.437624, ppl: 11.445807 +epoch: 2, batch: 29700, sum loss: 4797.714844, avg loss: 2.509265, ppl: 12.295889 +epoch: 2, batch: 29701, sum loss: 4167.352539, avg loss: 2.473206, ppl: 11.860414 +epoch: 2, batch: 29702, sum loss: 3964.136230, avg loss: 2.429005, ppl: 11.347588 +epoch: 2, batch: 29703, sum loss: 4161.045410, avg loss: 2.488664, ppl: 12.045169 +epoch: 2, batch: 29704, sum loss: 3756.278809, avg loss: 2.230569, ppl: 9.305162 +epoch: 2, batch: 29705, sum loss: 3618.885986, avg loss: 2.339293, ppl: 10.373899 +epoch: 2, batch: 29706, sum loss: 3322.221191, avg loss: 2.333020, ppl: 10.309032 +epoch: 2, batch: 29707, sum loss: 4001.997803, avg loss: 2.248313, ppl: 9.471747 +epoch: 2, batch: 29708, sum loss: 3283.741699, avg loss: 2.181888, ppl: 8.863027 +epoch: 2, batch: 29709, sum loss: 4007.783691, avg loss: 2.360297, ppl: 10.594094 +epoch: 2, batch: 29710, sum loss: 3957.827881, avg loss: 2.268096, ppl: 9.660991 +epoch: 2, batch: 29711, sum loss: 3873.956543, avg loss: 2.553696, ppl: 12.854523 +epoch: 2, batch: 29712, sum loss: 5377.713379, avg loss: 2.690202, ppl: 14.734652 +epoch: 2, batch: 29713, sum loss: 5174.281250, avg loss: 2.844575, ppl: 17.194244 +epoch: 2, batch: 29714, sum loss: 3527.416260, avg loss: 2.109699, ppl: 8.245757 +epoch: 2, batch: 29715, sum loss: 3336.489502, avg loss: 2.227296, ppl: 9.274752 +epoch: 2, batch: 29716, sum loss: 4957.080566, avg loss: 2.645187, ppl: 14.086078 +epoch: 2, batch: 29717, sum loss: 4383.437500, avg loss: 2.609189, ppl: 13.588027 +epoch: 2, batch: 29718, sum loss: 4917.942871, avg loss: 2.661225, ppl: 14.313807 +epoch: 2, batch: 29719, sum loss: 4467.929688, avg loss: 2.650018, ppl: 14.154289 +epoch: 2, batch: 29720, sum loss: 3662.225098, avg loss: 2.518724, ppl: 12.412750 +epoch: 2, batch: 29721, sum loss: 3812.090576, avg loss: 2.279959, ppl: 9.776277 +epoch: 2, batch: 29722, sum loss: 3837.725830, avg loss: 2.551679, ppl: 12.828629 +epoch: 2, batch: 29723, sum loss: 4261.171387, avg loss: 2.536412, ppl: 12.634252 +epoch: 2, batch: 29724, sum loss: 3849.152832, avg loss: 2.649107, ppl: 14.141408 +epoch: 2, batch: 29725, sum loss: 4334.155273, avg loss: 2.374880, ppl: 10.749719 +epoch: 2, batch: 29726, sum loss: 4508.447266, avg loss: 2.518686, ppl: 12.412271 +epoch: 2, batch: 29727, sum loss: 3618.923828, avg loss: 2.306516, ppl: 10.039388 +epoch: 2, batch: 29728, sum loss: 4418.262695, avg loss: 2.299981, ppl: 9.973989 +epoch: 2, batch: 29729, sum loss: 5473.089844, avg loss: 2.746156, ppl: 15.582624 +epoch: 2, batch: 29730, sum loss: 5208.546387, avg loss: 2.660136, ppl: 14.298237 +epoch: 2, batch: 29731, sum loss: 5015.022949, avg loss: 2.817429, ppl: 16.733767 +epoch: 2, batch: 29732, sum loss: 4573.880859, avg loss: 2.556669, ppl: 12.892799 +epoch: 2, batch: 29733, sum loss: 4483.364746, avg loss: 2.550264, ppl: 12.810490 +epoch: 2, batch: 29734, sum loss: 4595.691406, avg loss: 2.700171, ppl: 14.882279 +epoch: 2, batch: 29735, sum loss: 3062.231934, avg loss: 1.996240, ppl: 7.361324 +epoch: 2, batch: 29736, sum loss: 4087.467285, avg loss: 2.537224, ppl: 12.644519 +epoch: 2, batch: 29737, sum loss: 4193.061523, avg loss: 2.346425, ppl: 10.448152 +epoch: 2, batch: 29738, sum loss: 4009.010498, avg loss: 2.345822, ppl: 10.441855 +epoch: 2, batch: 29739, sum loss: 4559.671387, avg loss: 2.605526, ppl: 13.538351 +epoch: 2, batch: 29740, sum loss: 3181.863770, avg loss: 2.304029, ppl: 10.014447 +epoch: 2, batch: 29741, sum loss: 5035.874512, avg loss: 2.738377, ppl: 15.461864 +epoch: 2, batch: 29742, sum loss: 4842.816895, avg loss: 2.762588, ppl: 15.840790 +epoch: 2, batch: 29743, sum loss: 4227.054688, avg loss: 2.661873, ppl: 14.323096 +epoch: 2, batch: 29744, sum loss: 5182.907227, avg loss: 2.693819, ppl: 14.788041 +epoch: 2, batch: 29745, sum loss: 4728.941895, avg loss: 2.581300, ppl: 13.214309 +epoch: 2, batch: 29746, sum loss: 4763.522461, avg loss: 2.561034, ppl: 12.949196 +epoch: 2, batch: 29747, sum loss: 4048.812744, avg loss: 2.397166, ppl: 10.991979 +epoch: 2, batch: 29748, sum loss: 3002.245117, avg loss: 2.281341, ppl: 9.789803 +epoch: 2, batch: 29749, sum loss: 4555.495117, avg loss: 2.578096, ppl: 13.172030 +epoch: 2, batch: 29750, sum loss: 4188.380859, avg loss: 2.475402, ppl: 11.886489 +epoch: 2, batch: 29751, sum loss: 3593.436768, avg loss: 2.238901, ppl: 9.383018 +epoch: 2, batch: 29752, sum loss: 3826.867432, avg loss: 2.365184, ppl: 10.645996 +epoch: 2, batch: 29753, sum loss: 4129.896484, avg loss: 2.270422, ppl: 9.683481 +epoch: 2, batch: 29754, sum loss: 4275.250000, avg loss: 2.501609, ppl: 12.202112 +epoch: 2, batch: 29755, sum loss: 3696.394043, avg loss: 2.212085, ppl: 9.134745 +epoch: 2, batch: 29756, sum loss: 4384.203125, avg loss: 2.534221, ppl: 12.606611 +epoch: 2, batch: 29757, sum loss: 3974.665527, avg loss: 2.426536, ppl: 11.319598 +epoch: 2, batch: 29758, sum loss: 4419.812500, avg loss: 2.703249, ppl: 14.928158 +epoch: 2, batch: 29759, sum loss: 4264.898926, avg loss: 2.469542, ppl: 11.817034 +epoch: 2, batch: 29760, sum loss: 4763.165527, avg loss: 2.534947, ppl: 12.615765 +epoch: 2, batch: 29761, sum loss: 4659.558594, avg loss: 2.793500, ppl: 16.338110 +epoch: 2, batch: 29762, sum loss: 3964.232910, avg loss: 2.521777, ppl: 12.450698 +epoch: 2, batch: 29763, sum loss: 4486.576172, avg loss: 2.541970, ppl: 12.704669 +epoch: 2, batch: 29764, sum loss: 4227.149414, avg loss: 2.327725, ppl: 10.254590 +epoch: 2, batch: 29765, sum loss: 4151.772461, avg loss: 2.360303, ppl: 10.594157 +epoch: 2, batch: 29766, sum loss: 3681.022461, avg loss: 2.460577, ppl: 11.711562 +epoch: 2, batch: 29767, sum loss: 4802.564453, avg loss: 2.810161, ppl: 16.612587 +epoch: 2, batch: 29768, sum loss: 2844.690430, avg loss: 2.143700, ppl: 8.530947 +epoch: 2, batch: 29769, sum loss: 4102.340820, avg loss: 2.400434, ppl: 11.027956 +epoch: 2, batch: 29770, sum loss: 3677.521484, avg loss: 2.179918, ppl: 8.845581 +epoch: 2, batch: 29771, sum loss: 4646.193359, avg loss: 2.633897, ppl: 13.927936 +epoch: 2, batch: 29772, sum loss: 4582.488281, avg loss: 2.466355, ppl: 11.779436 +epoch: 2, batch: 29773, sum loss: 3567.305664, avg loss: 2.456822, ppl: 11.667674 +epoch: 2, batch: 29774, sum loss: 3882.963867, avg loss: 2.364777, ppl: 10.641666 +epoch: 2, batch: 29775, sum loss: 4045.428223, avg loss: 2.273990, ppl: 9.718098 +epoch: 2, batch: 29776, sum loss: 4758.322266, avg loss: 2.471856, ppl: 11.844405 +epoch: 2, batch: 29777, sum loss: 4609.321777, avg loss: 2.535381, ppl: 12.621234 +epoch: 2, batch: 29778, sum loss: 4009.577148, avg loss: 2.520162, ppl: 12.430606 +epoch: 2, batch: 29779, sum loss: 4177.236328, avg loss: 2.625541, ppl: 13.812051 +epoch: 2, batch: 29780, sum loss: 3921.608887, avg loss: 2.410331, ppl: 11.137650 +epoch: 2, batch: 29781, sum loss: 4056.678955, avg loss: 2.393321, ppl: 10.949796 +epoch: 2, batch: 29782, sum loss: 3118.655518, avg loss: 2.218105, ppl: 9.189898 +epoch: 2, batch: 29783, sum loss: 3571.580566, avg loss: 2.276342, ppl: 9.740982 +epoch: 2, batch: 29784, sum loss: 3569.089355, avg loss: 2.388949, ppl: 10.902026 +epoch: 2, batch: 29785, sum loss: 4596.768555, avg loss: 2.723204, ppl: 15.229040 +epoch: 2, batch: 29786, sum loss: 4186.289062, avg loss: 2.361133, ppl: 10.602962 +epoch: 2, batch: 29787, sum loss: 3966.376465, avg loss: 2.290056, ppl: 9.875488 +epoch: 2, batch: 29788, sum loss: 4225.074219, avg loss: 2.401975, ppl: 11.044971 +epoch: 2, batch: 29789, sum loss: 3876.270996, avg loss: 2.410616, ppl: 11.140823 +epoch: 2, batch: 29790, sum loss: 4789.857422, avg loss: 2.553229, ppl: 12.848523 +epoch: 2, batch: 29791, sum loss: 3923.782715, avg loss: 2.328655, ppl: 10.264122 +epoch: 2, batch: 29792, sum loss: 4058.400635, avg loss: 2.344541, ppl: 10.428486 +epoch: 2, batch: 29793, sum loss: 4317.731445, avg loss: 2.449082, ppl: 11.577712 +epoch: 2, batch: 29794, sum loss: 4710.884766, avg loss: 2.643594, ppl: 14.063662 +epoch: 2, batch: 29795, sum loss: 4522.487305, avg loss: 2.781358, ppl: 16.140919 +epoch: 2, batch: 29796, sum loss: 3843.750977, avg loss: 2.363930, ppl: 10.632661 +epoch: 2, batch: 29797, sum loss: 3988.620117, avg loss: 2.377008, ppl: 10.772628 +epoch: 2, batch: 29798, sum loss: 4338.796387, avg loss: 2.443016, ppl: 11.507696 +epoch: 2, batch: 29799, sum loss: 4473.081055, avg loss: 2.710958, ppl: 15.043684 +epoch: 2, batch: 29800, sum loss: 4769.056641, avg loss: 2.805328, ppl: 16.532492 +epoch: 2, batch: 29801, sum loss: 4492.751953, avg loss: 2.687053, ppl: 14.688321 +epoch: 2, batch: 29802, sum loss: 3772.836670, avg loss: 2.300510, ppl: 9.979272 +epoch: 2, batch: 29803, sum loss: 4162.935059, avg loss: 2.388374, ppl: 10.895759 +epoch: 2, batch: 29804, sum loss: 4767.622070, avg loss: 2.522551, ppl: 12.460346 +epoch: 2, batch: 29805, sum loss: 3993.719727, avg loss: 2.488299, ppl: 12.040776 +epoch: 2, batch: 29806, sum loss: 3492.826172, avg loss: 2.285881, ppl: 9.834347 +epoch: 2, batch: 29807, sum loss: 4360.092773, avg loss: 2.586057, ppl: 13.277322 +epoch: 2, batch: 29808, sum loss: 4369.044922, avg loss: 2.582178, ppl: 13.225911 +epoch: 2, batch: 29809, sum loss: 3919.483398, avg loss: 2.460442, ppl: 11.709981 +epoch: 2, batch: 29810, sum loss: 4659.545410, avg loss: 2.462762, ppl: 11.737184 +epoch: 2, batch: 29811, sum loss: 4988.850586, avg loss: 2.765438, ppl: 15.886002 +epoch: 2, batch: 29812, sum loss: 3653.247559, avg loss: 2.166814, ppl: 8.730421 +epoch: 2, batch: 29813, sum loss: 4319.000000, avg loss: 2.590882, ppl: 13.341532 +epoch: 2, batch: 29814, sum loss: 4120.668945, avg loss: 2.433945, ppl: 11.403784 +epoch: 2, batch: 29815, sum loss: 4144.235352, avg loss: 2.357358, ppl: 10.563006 +epoch: 2, batch: 29816, sum loss: 4459.838867, avg loss: 2.580925, ppl: 13.209354 +epoch: 2, batch: 29817, sum loss: 4097.177734, avg loss: 2.567154, ppl: 13.028691 +epoch: 2, batch: 29818, sum loss: 4159.551270, avg loss: 2.398818, ppl: 11.010160 +epoch: 2, batch: 29819, sum loss: 4009.288818, avg loss: 2.459686, ppl: 11.701140 +epoch: 2, batch: 29820, sum loss: 4427.510742, avg loss: 2.480398, ppl: 11.946020 +epoch: 2, batch: 29821, sum loss: 3397.953857, avg loss: 2.348275, ppl: 10.467497 +epoch: 2, batch: 29822, sum loss: 4503.203613, avg loss: 2.501780, ppl: 12.204196 +epoch: 2, batch: 29823, sum loss: 4394.543457, avg loss: 2.411934, ppl: 11.155514 +epoch: 2, batch: 29824, sum loss: 3852.127197, avg loss: 2.643876, ppl: 14.067622 +epoch: 2, batch: 29825, sum loss: 4643.655762, avg loss: 2.548659, ppl: 12.789936 +epoch: 2, batch: 29826, sum loss: 4366.448242, avg loss: 2.582169, ppl: 13.225798 +epoch: 2, batch: 29827, sum loss: 4356.417480, avg loss: 2.649889, ppl: 14.152468 +epoch: 2, batch: 29828, sum loss: 4381.733887, avg loss: 2.544561, ppl: 12.737634 +epoch: 2, batch: 29829, sum loss: 5124.357910, avg loss: 2.824894, ppl: 16.859161 +epoch: 2, batch: 29830, sum loss: 3660.596680, avg loss: 2.531533, ppl: 12.572765 +epoch: 2, batch: 29831, sum loss: 3640.372559, avg loss: 2.471400, ppl: 11.839013 +epoch: 2, batch: 29832, sum loss: 3195.353027, avg loss: 2.228280, ppl: 9.283880 +epoch: 2, batch: 29833, sum loss: 3971.497803, avg loss: 2.447010, ppl: 11.553753 +epoch: 2, batch: 29834, sum loss: 4055.558105, avg loss: 2.549062, ppl: 12.795099 +epoch: 2, batch: 29835, sum loss: 4443.721680, avg loss: 2.448332, ppl: 11.569029 +epoch: 2, batch: 29836, sum loss: 4867.121094, avg loss: 2.523132, ppl: 12.467579 +epoch: 2, batch: 29837, sum loss: 3952.287842, avg loss: 2.561431, ppl: 12.954341 +epoch: 2, batch: 29838, sum loss: 4352.429199, avg loss: 2.763447, ppl: 15.854400 +epoch: 2, batch: 29839, sum loss: 3109.917969, avg loss: 2.345338, ppl: 10.436798 +epoch: 2, batch: 29840, sum loss: 3786.860840, avg loss: 2.404356, ppl: 11.071298 +epoch: 2, batch: 29841, sum loss: 4266.007812, avg loss: 2.576092, ppl: 13.145658 +epoch: 2, batch: 29842, sum loss: 4630.917969, avg loss: 2.418234, ppl: 11.226015 +epoch: 2, batch: 29843, sum loss: 3651.034180, avg loss: 2.358549, ppl: 10.575599 +epoch: 2, batch: 29844, sum loss: 4300.128906, avg loss: 2.588880, ppl: 13.314845 +epoch: 2, batch: 29845, sum loss: 4706.824219, avg loss: 2.569227, ppl: 13.055732 +epoch: 2, batch: 29846, sum loss: 3403.677734, avg loss: 2.336086, ppl: 10.340687 +epoch: 2, batch: 29847, sum loss: 4482.427734, avg loss: 2.543943, ppl: 12.729768 +epoch: 2, batch: 29848, sum loss: 3553.911133, avg loss: 2.319785, ppl: 10.173491 +epoch: 2, batch: 29849, sum loss: 3769.791748, avg loss: 2.454292, ppl: 11.638186 +epoch: 2, batch: 29850, sum loss: 5089.277344, avg loss: 2.825806, ppl: 16.874546 +epoch: 2, batch: 29851, sum loss: 4287.283203, avg loss: 2.349196, ppl: 10.477145 +epoch: 2, batch: 29852, sum loss: 5008.302246, avg loss: 2.823169, ppl: 16.830105 +epoch: 2, batch: 29853, sum loss: 4346.850586, avg loss: 2.295064, ppl: 9.925069 +epoch: 2, batch: 29854, sum loss: 3932.344971, avg loss: 2.408050, ppl: 11.112267 +epoch: 2, batch: 29855, sum loss: 4331.855957, avg loss: 2.508313, ppl: 12.284185 +epoch: 2, batch: 29856, sum loss: 4183.408203, avg loss: 2.532330, ppl: 12.582785 +epoch: 2, batch: 29857, sum loss: 4064.403076, avg loss: 2.251747, ppl: 9.504324 +epoch: 2, batch: 29858, sum loss: 4778.920898, avg loss: 2.660869, ppl: 14.308720 +epoch: 2, batch: 29859, sum loss: 4369.450195, avg loss: 2.646548, ppl: 14.105260 +epoch: 2, batch: 29860, sum loss: 3968.409180, avg loss: 2.396382, ppl: 10.983370 +epoch: 2, batch: 29861, sum loss: 3051.303223, avg loss: 2.012733, ppl: 7.483742 +epoch: 2, batch: 29862, sum loss: 3727.598389, avg loss: 2.465343, ppl: 11.767518 +epoch: 2, batch: 29863, sum loss: 3765.129883, avg loss: 2.305652, ppl: 10.030718 +epoch: 2, batch: 29864, sum loss: 5282.722168, avg loss: 2.720248, ppl: 15.184094 +epoch: 2, batch: 29865, sum loss: 3557.954346, avg loss: 2.370389, ppl: 10.701559 +epoch: 2, batch: 29866, sum loss: 3441.639160, avg loss: 2.196324, ppl: 8.991898 +epoch: 2, batch: 29867, sum loss: 3302.395020, avg loss: 2.428232, ppl: 11.338814 +epoch: 2, batch: 29868, sum loss: 4326.623535, avg loss: 2.606400, ppl: 13.550179 +epoch: 2, batch: 29869, sum loss: 4587.492676, avg loss: 2.365906, ppl: 10.653691 +epoch: 2, batch: 29870, sum loss: 4387.444336, avg loss: 2.409360, ppl: 11.126837 +epoch: 2, batch: 29871, sum loss: 3806.569092, avg loss: 2.407697, ppl: 11.108349 +epoch: 2, batch: 29872, sum loss: 3701.240967, avg loss: 2.167003, ppl: 8.732074 +epoch: 2, batch: 29873, sum loss: 4592.407227, avg loss: 2.520531, ppl: 12.435197 +epoch: 2, batch: 29874, sum loss: 3914.767334, avg loss: 2.504650, ppl: 12.239270 +epoch: 2, batch: 29875, sum loss: 4448.332520, avg loss: 2.476800, ppl: 11.903113 +epoch: 2, batch: 29876, sum loss: 4494.837402, avg loss: 2.870266, ppl: 17.641718 +epoch: 2, batch: 29877, sum loss: 3827.023438, avg loss: 2.384438, ppl: 10.852964 +epoch: 2, batch: 29878, sum loss: 4328.755371, avg loss: 2.572047, ppl: 13.092598 +epoch: 2, batch: 29879, sum loss: 3501.418457, avg loss: 2.348369, ppl: 10.468486 +epoch: 2, batch: 29880, sum loss: 3174.674805, avg loss: 2.279020, ppl: 9.767102 +epoch: 2, batch: 29881, sum loss: 4171.027344, avg loss: 2.598771, ppl: 13.447199 +epoch: 2, batch: 29882, sum loss: 4343.834961, avg loss: 2.503651, ppl: 12.227058 +epoch: 2, batch: 29883, sum loss: 4289.796875, avg loss: 2.310068, ppl: 10.075111 +epoch: 2, batch: 29884, sum loss: 3764.308838, avg loss: 2.528079, ppl: 12.529408 +epoch: 2, batch: 29885, sum loss: 5385.194824, avg loss: 2.633347, ppl: 13.920283 +epoch: 2, batch: 29886, sum loss: 4386.246094, avg loss: 2.462800, ppl: 11.737628 +epoch: 2, batch: 29887, sum loss: 4057.592773, avg loss: 2.315978, ppl: 10.134828 +epoch: 2, batch: 29888, sum loss: 3894.309814, avg loss: 2.438516, ppl: 11.456026 +epoch: 2, batch: 29889, sum loss: 4725.069336, avg loss: 2.538995, ppl: 12.666935 +epoch: 2, batch: 29890, sum loss: 4663.174805, avg loss: 2.381601, ppl: 10.822216 +epoch: 2, batch: 29891, sum loss: 5111.511719, avg loss: 2.723235, ppl: 15.229508 +epoch: 2, batch: 29892, sum loss: 4563.434082, avg loss: 2.562288, ppl: 12.965443 +epoch: 2, batch: 29893, sum loss: 3738.723145, avg loss: 2.246829, ppl: 9.457696 +epoch: 2, batch: 29894, sum loss: 3274.283203, avg loss: 2.209368, ppl: 9.109957 +epoch: 2, batch: 29895, sum loss: 3843.829590, avg loss: 2.481491, ppl: 11.959084 +epoch: 2, batch: 29896, sum loss: 4214.876953, avg loss: 2.261200, ppl: 9.594598 +epoch: 2, batch: 29897, sum loss: 4359.566895, avg loss: 2.522897, ppl: 12.464661 +epoch: 2, batch: 29898, sum loss: 4448.423340, avg loss: 2.282413, ppl: 9.800303 +epoch: 2, batch: 29899, sum loss: 4019.980469, avg loss: 2.294510, ppl: 9.919571 +epoch: 2, batch: 29900, sum loss: 3995.099609, avg loss: 2.403790, ppl: 11.065039 +epoch: 2, batch: 29901, sum loss: 4586.131348, avg loss: 2.575032, ppl: 13.131734 +epoch: 2, batch: 29902, sum loss: 3260.455322, avg loss: 1.994162, ppl: 7.346046 +epoch: 2, batch: 29903, sum loss: 3944.006592, avg loss: 2.572737, ppl: 13.101641 +epoch: 2, batch: 29904, sum loss: 4074.603027, avg loss: 2.508992, ppl: 12.292532 +epoch: 2, batch: 29905, sum loss: 3471.728271, avg loss: 2.390997, ppl: 10.924385 +epoch: 2, batch: 29906, sum loss: 3262.360352, avg loss: 2.402327, ppl: 11.048861 +epoch: 2, batch: 29907, sum loss: 4367.206055, avg loss: 2.304594, ppl: 10.020112 +epoch: 2, batch: 29908, sum loss: 4320.418945, avg loss: 2.750108, ppl: 15.644318 +epoch: 2, batch: 29909, sum loss: 3258.605469, avg loss: 2.203249, ppl: 9.054383 +epoch: 2, batch: 29910, sum loss: 4456.055176, avg loss: 2.412591, ppl: 11.162847 +epoch: 2, batch: 29911, sum loss: 4196.987305, avg loss: 2.570108, ppl: 13.067241 +epoch: 2, batch: 29912, sum loss: 4209.710938, avg loss: 2.636012, ppl: 13.957428 +epoch: 2, batch: 29913, sum loss: 3974.372803, avg loss: 2.523411, ppl: 12.471066 +epoch: 2, batch: 29914, sum loss: 3723.616211, avg loss: 2.514258, ppl: 12.357438 +epoch: 2, batch: 29915, sum loss: 3941.084961, avg loss: 2.343095, ppl: 10.413412 +epoch: 2, batch: 29916, sum loss: 4782.607910, avg loss: 2.592199, ppl: 13.359120 +epoch: 2, batch: 29917, sum loss: 3700.193848, avg loss: 2.013163, ppl: 7.486962 +epoch: 2, batch: 29918, sum loss: 3972.963623, avg loss: 2.558251, ppl: 12.913211 +epoch: 2, batch: 29919, sum loss: 3724.921631, avg loss: 2.322270, ppl: 10.198803 +epoch: 2, batch: 29920, sum loss: 4448.218262, avg loss: 2.392802, ppl: 10.944111 +epoch: 2, batch: 29921, sum loss: 3442.548096, avg loss: 2.181589, ppl: 8.860377 +epoch: 2, batch: 29922, sum loss: 3707.895996, avg loss: 2.376857, ppl: 10.770991 +epoch: 2, batch: 29923, sum loss: 4084.093994, avg loss: 2.313934, ppl: 10.114139 +epoch: 2, batch: 29924, sum loss: 4452.436523, avg loss: 2.427719, ppl: 11.333000 +epoch: 2, batch: 29925, sum loss: 5075.307129, avg loss: 2.854504, ppl: 17.365816 +epoch: 2, batch: 29926, sum loss: 4469.898926, avg loss: 2.604836, ppl: 13.529010 +epoch: 2, batch: 29927, sum loss: 4064.759033, avg loss: 2.484572, ppl: 11.995981 +epoch: 2, batch: 29928, sum loss: 4387.682617, avg loss: 2.646371, ppl: 14.102762 +epoch: 2, batch: 29929, sum loss: 4950.869141, avg loss: 2.699492, ppl: 14.872178 +epoch: 2, batch: 29930, sum loss: 3558.843750, avg loss: 2.294548, ppl: 9.919950 +epoch: 2, batch: 29931, sum loss: 4622.342285, avg loss: 2.589548, ppl: 13.323743 +epoch: 2, batch: 29932, sum loss: 4328.734375, avg loss: 2.451152, ppl: 11.601703 +epoch: 2, batch: 29933, sum loss: 5591.069336, avg loss: 2.796933, ppl: 16.394291 +epoch: 2, batch: 29934, sum loss: 4508.157227, avg loss: 2.518523, ppl: 12.410259 +epoch: 2, batch: 29935, sum loss: 4554.065918, avg loss: 2.678863, ppl: 14.568513 +epoch: 2, batch: 29936, sum loss: 4405.557129, avg loss: 2.539226, ppl: 12.669862 +epoch: 2, batch: 29937, sum loss: 4096.619629, avg loss: 2.488833, ppl: 12.047214 +epoch: 2, batch: 29938, sum loss: 4183.603516, avg loss: 2.549423, ppl: 12.799719 +epoch: 2, batch: 29939, sum loss: 4431.637695, avg loss: 2.374940, ppl: 10.750364 +epoch: 2, batch: 29940, sum loss: 4759.384766, avg loss: 2.503622, ppl: 12.226694 +epoch: 2, batch: 29941, sum loss: 4531.746582, avg loss: 2.520437, ppl: 12.434035 +epoch: 2, batch: 29942, sum loss: 2892.525146, avg loss: 2.102126, ppl: 8.183549 +epoch: 2, batch: 29943, sum loss: 4819.269043, avg loss: 2.523178, ppl: 12.468153 +epoch: 2, batch: 29944, sum loss: 3817.592285, avg loss: 2.556994, ppl: 12.896994 +epoch: 2, batch: 29945, sum loss: 3523.225098, avg loss: 2.370945, ppl: 10.707511 +epoch: 2, batch: 29946, sum loss: 4208.740723, avg loss: 2.471369, ppl: 11.838637 +epoch: 2, batch: 29947, sum loss: 4510.148926, avg loss: 2.328420, ppl: 10.261712 +epoch: 2, batch: 29948, sum loss: 3629.862305, avg loss: 2.392790, ppl: 10.943983 +epoch: 2, batch: 29949, sum loss: 4297.272949, avg loss: 2.604408, ppl: 13.523214 +epoch: 2, batch: 29950, sum loss: 4008.942383, avg loss: 2.399128, ppl: 11.013565 +epoch: 2, batch: 29951, sum loss: 3169.982666, avg loss: 2.183184, ppl: 8.874515 +epoch: 2, batch: 29952, sum loss: 4132.118652, avg loss: 2.502798, ppl: 12.216623 +epoch: 2, batch: 29953, sum loss: 5004.452637, avg loss: 2.745174, ppl: 15.567328 +epoch: 2, batch: 29954, sum loss: 3338.180176, avg loss: 2.483765, ppl: 11.986309 +epoch: 2, batch: 29955, sum loss: 3886.645264, avg loss: 2.591097, ppl: 13.344400 +epoch: 2, batch: 29956, sum loss: 3760.397461, avg loss: 2.277648, ppl: 9.753717 +epoch: 2, batch: 29957, sum loss: 4111.164551, avg loss: 2.522187, ppl: 12.455805 +epoch: 2, batch: 29958, sum loss: 3730.531738, avg loss: 2.392900, ppl: 10.945194 +epoch: 2, batch: 29959, sum loss: 3951.612305, avg loss: 2.469758, ppl: 11.819581 +epoch: 2, batch: 29960, sum loss: 3251.328125, avg loss: 2.219337, ppl: 9.201224 +epoch: 2, batch: 29961, sum loss: 4699.577637, avg loss: 2.586449, ppl: 13.282520 +epoch: 2, batch: 29962, sum loss: 3824.227051, avg loss: 2.529251, ppl: 12.544106 +epoch: 2, batch: 29963, sum loss: 4689.041016, avg loss: 2.389929, ppl: 10.912720 +epoch: 2, batch: 29964, sum loss: 3627.851318, avg loss: 2.229779, ppl: 9.297815 +epoch: 2, batch: 29965, sum loss: 4304.923340, avg loss: 2.574715, ppl: 13.127577 +epoch: 2, batch: 29966, sum loss: 3574.187256, avg loss: 2.198147, ppl: 9.008307 +epoch: 2, batch: 29967, sum loss: 4486.068359, avg loss: 2.597608, ppl: 13.431566 +epoch: 2, batch: 29968, sum loss: 3163.164307, avg loss: 1.959829, ppl: 7.098114 +epoch: 2, batch: 29969, sum loss: 4461.455078, avg loss: 2.356817, ppl: 10.557297 +epoch: 2, batch: 29970, sum loss: 3463.865234, avg loss: 2.454901, ppl: 11.645281 +epoch: 2, batch: 29971, sum loss: 3729.565186, avg loss: 2.258974, ppl: 9.573258 +epoch: 2, batch: 29972, sum loss: 3234.025391, avg loss: 2.234987, ppl: 9.346356 +epoch: 2, batch: 29973, sum loss: 3966.352295, avg loss: 2.430363, ppl: 11.363006 +epoch: 2, batch: 29974, sum loss: 4101.833008, avg loss: 2.533560, ppl: 12.598270 +epoch: 2, batch: 29975, sum loss: 3554.974121, avg loss: 2.223248, ppl: 9.237288 +epoch: 2, batch: 29976, sum loss: 4344.227539, avg loss: 2.573595, ppl: 13.112875 +epoch: 2, batch: 29977, sum loss: 4006.435547, avg loss: 2.383364, ppl: 10.841316 +epoch: 2, batch: 29978, sum loss: 4431.734863, avg loss: 2.515173, ppl: 12.368751 +epoch: 2, batch: 29979, sum loss: 3871.247803, avg loss: 2.505662, ppl: 12.251669 +epoch: 2, batch: 29980, sum loss: 3949.894043, avg loss: 2.356739, ppl: 10.556469 +epoch: 2, batch: 29981, sum loss: 4324.591797, avg loss: 2.646629, ppl: 14.106407 +epoch: 2, batch: 29982, sum loss: 3712.785400, avg loss: 2.351352, ppl: 10.499761 +epoch: 2, batch: 29983, sum loss: 3877.670410, avg loss: 2.485686, ppl: 12.009359 +epoch: 2, batch: 29984, sum loss: 3695.696045, avg loss: 2.289775, ppl: 9.872713 +epoch: 2, batch: 29985, sum loss: 3972.323975, avg loss: 2.329809, ppl: 10.275978 +epoch: 2, batch: 29986, sum loss: 4610.614258, avg loss: 2.764157, ppl: 15.865664 +epoch: 2, batch: 29987, sum loss: 3809.697998, avg loss: 2.324404, ppl: 10.220584 +epoch: 2, batch: 29988, sum loss: 3993.574219, avg loss: 2.564916, ppl: 12.999568 +epoch: 2, batch: 29989, sum loss: 4753.410156, avg loss: 2.667458, ppl: 14.403310 +epoch: 2, batch: 29990, sum loss: 4453.659180, avg loss: 2.610586, ppl: 13.607018 +epoch: 2, batch: 29991, sum loss: 4010.472656, avg loss: 2.458904, ppl: 11.691993 +epoch: 2, batch: 29992, sum loss: 3596.456055, avg loss: 2.355243, ppl: 10.540689 +epoch: 2, batch: 29993, sum loss: 2983.372559, avg loss: 2.233063, ppl: 9.328400 +epoch: 2, batch: 29994, sum loss: 4245.810547, avg loss: 2.653631, ppl: 14.205532 +epoch: 2, batch: 29995, sum loss: 3417.840332, avg loss: 2.190923, ppl: 8.943468 +epoch: 2, batch: 29996, sum loss: 2355.042236, avg loss: 1.758807, ppl: 5.805506 +epoch: 2, batch: 29997, sum loss: 3840.431152, avg loss: 2.375035, ppl: 10.751388 +epoch: 2, batch: 29998, sum loss: 4415.367188, avg loss: 2.507307, ppl: 12.271832 +epoch: 2, batch: 29999, sum loss: 4902.115234, avg loss: 2.644075, ppl: 14.070423 +epoch: 2, batch: 30000, sum loss: 4093.835938, avg loss: 2.494720, ppl: 12.118342 +epoch: 2, batch: 30001, sum loss: 4962.726074, avg loss: 2.438686, ppl: 11.457973 +epoch: 2, batch: 30002, sum loss: 4628.760742, avg loss: 2.643496, ppl: 14.062277 +epoch: 2, batch: 30003, sum loss: 4344.428223, avg loss: 2.374005, ppl: 10.740317 +epoch: 2, batch: 30004, sum loss: 3944.196777, avg loss: 2.363210, ppl: 10.625000 +epoch: 2, batch: 30005, sum loss: 4600.225586, avg loss: 2.597530, ppl: 13.430522 +epoch: 2, batch: 30006, sum loss: 3897.716309, avg loss: 2.559236, ppl: 12.925939 +epoch: 2, batch: 30007, sum loss: 4383.116211, avg loss: 2.351457, ppl: 10.500859 +epoch: 2, batch: 30008, sum loss: 3883.708252, avg loss: 2.180634, ppl: 8.851912 +epoch: 2, batch: 30009, sum loss: 3385.663086, avg loss: 2.121343, ppl: 8.342331 +epoch: 2, batch: 30010, sum loss: 4369.146484, avg loss: 2.718822, ppl: 15.162447 +epoch: 2, batch: 30011, sum loss: 3322.340576, avg loss: 2.357942, ppl: 10.569181 +epoch: 2, batch: 30012, sum loss: 3201.989502, avg loss: 2.211319, ppl: 9.127746 +epoch: 2, batch: 30013, sum loss: 3970.151123, avg loss: 2.306886, ppl: 10.043103 +epoch: 2, batch: 30014, sum loss: 3869.068115, avg loss: 2.418168, ppl: 11.225271 +epoch: 2, batch: 30015, sum loss: 3528.168457, avg loss: 2.091386, ppl: 8.096131 +epoch: 2, batch: 30016, sum loss: 3506.587158, avg loss: 2.153923, ppl: 8.618606 +epoch: 2, batch: 30017, sum loss: 4462.616699, avg loss: 2.537019, ppl: 12.641932 +epoch: 2, batch: 30018, sum loss: 4716.284180, avg loss: 2.572987, ppl: 13.104905 +epoch: 2, batch: 30019, sum loss: 4584.934570, avg loss: 2.704976, ppl: 14.953959 +epoch: 2, batch: 30020, sum loss: 3529.036133, avg loss: 2.324793, ppl: 10.224566 +epoch: 2, batch: 30021, sum loss: 3602.027832, avg loss: 2.335945, ppl: 10.339230 +epoch: 2, batch: 30022, sum loss: 4009.419922, avg loss: 2.348811, ppl: 10.473106 +epoch: 2, batch: 30023, sum loss: 3937.953369, avg loss: 2.338452, ppl: 10.365182 +epoch: 2, batch: 30024, sum loss: 4256.815918, avg loss: 2.570541, ppl: 13.072894 +epoch: 2, batch: 30025, sum loss: 4754.313965, avg loss: 2.641286, ppl: 14.031231 +epoch: 2, batch: 30026, sum loss: 3846.752197, avg loss: 2.377474, ppl: 10.777639 +epoch: 2, batch: 30027, sum loss: 4510.557129, avg loss: 2.577461, ppl: 13.163676 +epoch: 2, batch: 30028, sum loss: 3970.539795, avg loss: 2.393333, ppl: 10.949931 +epoch: 2, batch: 30029, sum loss: 4722.232422, avg loss: 2.551179, ppl: 12.822214 +epoch: 2, batch: 30030, sum loss: 4264.424805, avg loss: 2.460718, ppl: 11.713221 +epoch: 2, batch: 30031, sum loss: 3998.027344, avg loss: 2.530397, ppl: 12.558493 +epoch: 2, batch: 30032, sum loss: 4746.060547, avg loss: 2.569605, ppl: 13.060669 +epoch: 2, batch: 30033, sum loss: 4698.616211, avg loss: 2.457435, ppl: 11.674829 +epoch: 2, batch: 30034, sum loss: 3803.401367, avg loss: 2.269333, ppl: 9.672943 +epoch: 2, batch: 30035, sum loss: 4411.223145, avg loss: 2.688131, ppl: 14.704169 +epoch: 2, batch: 30036, sum loss: 3929.579102, avg loss: 2.303387, ppl: 10.008026 +epoch: 2, batch: 30037, sum loss: 3082.236328, avg loss: 2.281448, ppl: 9.790846 +epoch: 2, batch: 30038, sum loss: 4089.115723, avg loss: 2.476751, ppl: 11.902532 +epoch: 2, batch: 30039, sum loss: 3827.934814, avg loss: 2.354203, ppl: 10.529738 +epoch: 2, batch: 30040, sum loss: 4503.035156, avg loss: 2.347776, ppl: 10.462280 +epoch: 2, batch: 30041, sum loss: 4952.352051, avg loss: 2.771322, ppl: 15.979742 +epoch: 2, batch: 30042, sum loss: 3820.328857, avg loss: 2.313949, ppl: 10.114284 +epoch: 2, batch: 30043, sum loss: 4411.220703, avg loss: 2.348893, ppl: 10.473966 +epoch: 2, batch: 30044, sum loss: 4311.748047, avg loss: 2.426420, ppl: 11.318287 +epoch: 2, batch: 30045, sum loss: 4234.212402, avg loss: 2.412657, ppl: 11.163578 +epoch: 2, batch: 30046, sum loss: 5068.448730, avg loss: 2.626139, ppl: 13.820312 +epoch: 2, batch: 30047, sum loss: 3992.471191, avg loss: 2.268450, ppl: 9.664405 +epoch: 2, batch: 30048, sum loss: 5663.312500, avg loss: 2.974429, ppl: 19.578438 +epoch: 2, batch: 30049, sum loss: 4717.099609, avg loss: 2.346816, ppl: 10.452235 +epoch: 2, batch: 30050, sum loss: 4113.735352, avg loss: 2.447195, ppl: 11.555890 +epoch: 2, batch: 30051, sum loss: 4201.049316, avg loss: 2.403347, ppl: 11.060127 +epoch: 2, batch: 30052, sum loss: 3243.812988, avg loss: 2.345490, ppl: 10.438389 +epoch: 2, batch: 30053, sum loss: 4140.287598, avg loss: 2.398776, ppl: 11.009693 +epoch: 2, batch: 30054, sum loss: 4393.064453, avg loss: 2.619597, ppl: 13.730192 +epoch: 2, batch: 30055, sum loss: 4966.865234, avg loss: 2.588257, ppl: 13.306556 +epoch: 2, batch: 30056, sum loss: 5278.474609, avg loss: 2.727894, ppl: 15.300628 +epoch: 2, batch: 30057, sum loss: 4322.344727, avg loss: 2.491265, ppl: 12.076544 +epoch: 2, batch: 30058, sum loss: 3946.801270, avg loss: 2.328496, ppl: 10.262497 +epoch: 2, batch: 30059, sum loss: 4302.980957, avg loss: 2.511956, ppl: 12.329024 +epoch: 2, batch: 30060, sum loss: 3823.293945, avg loss: 2.290769, ppl: 9.882538 +epoch: 2, batch: 30061, sum loss: 3984.367676, avg loss: 2.640403, ppl: 14.018849 +epoch: 2, batch: 30062, sum loss: 4542.036621, avg loss: 2.390546, ppl: 10.919453 +epoch: 2, batch: 30063, sum loss: 3307.985352, avg loss: 2.419887, ppl: 11.244587 +epoch: 2, batch: 30064, sum loss: 4188.020508, avg loss: 2.375508, ppl: 10.756474 +epoch: 2, batch: 30065, sum loss: 4891.240723, avg loss: 2.639633, ppl: 14.008067 +epoch: 2, batch: 30066, sum loss: 4472.220215, avg loss: 2.459967, ppl: 11.704427 +epoch: 2, batch: 30067, sum loss: 5372.809082, avg loss: 2.811517, ppl: 16.635134 +epoch: 2, batch: 30068, sum loss: 4017.618164, avg loss: 2.500074, ppl: 12.183392 +epoch: 2, batch: 30069, sum loss: 4050.333984, avg loss: 2.400909, ppl: 11.033206 +epoch: 2, batch: 30070, sum loss: 4320.884766, avg loss: 2.445323, ppl: 11.534269 +epoch: 2, batch: 30071, sum loss: 5209.597168, avg loss: 2.700672, ppl: 14.889740 +epoch: 2, batch: 30072, sum loss: 4807.208984, avg loss: 2.641324, ppl: 14.031767 +epoch: 2, batch: 30073, sum loss: 3089.140381, avg loss: 2.090081, ppl: 8.085574 +epoch: 2, batch: 30074, sum loss: 4902.934082, avg loss: 2.541697, ppl: 12.701210 +epoch: 2, batch: 30075, sum loss: 3876.495850, avg loss: 2.497742, ppl: 12.155022 +epoch: 2, batch: 30076, sum loss: 3911.397949, avg loss: 2.464649, ppl: 11.759356 +epoch: 2, batch: 30077, sum loss: 3439.636475, avg loss: 2.068332, ppl: 7.911617 +epoch: 2, batch: 30078, sum loss: 3163.165039, avg loss: 2.166551, ppl: 8.728132 +epoch: 2, batch: 30079, sum loss: 4886.140625, avg loss: 2.692089, ppl: 14.762477 +epoch: 2, batch: 30080, sum loss: 3393.910889, avg loss: 2.158976, ppl: 8.662266 +epoch: 2, batch: 30081, sum loss: 4528.268066, avg loss: 2.470414, ppl: 11.827338 +epoch: 2, batch: 30082, sum loss: 4889.405273, avg loss: 2.793946, ppl: 16.345388 +epoch: 2, batch: 30083, sum loss: 4728.230469, avg loss: 2.629716, ppl: 13.869836 +epoch: 2, batch: 30084, sum loss: 4023.245361, avg loss: 2.638194, ppl: 13.987913 +epoch: 2, batch: 30085, sum loss: 4607.059570, avg loss: 2.483590, ppl: 11.984212 +epoch: 2, batch: 30086, sum loss: 4139.304199, avg loss: 2.406572, ppl: 11.095860 +epoch: 2, batch: 30087, sum loss: 4828.266602, avg loss: 2.748017, ppl: 15.611649 +epoch: 2, batch: 30088, sum loss: 5595.987793, avg loss: 2.820558, ppl: 16.786221 +epoch: 2, batch: 30089, sum loss: 4378.901367, avg loss: 2.392842, ppl: 10.944557 +epoch: 2, batch: 30090, sum loss: 4271.230469, avg loss: 2.376867, ppl: 10.771108 +epoch: 2, batch: 30091, sum loss: 4451.703125, avg loss: 2.664095, ppl: 14.354959 +epoch: 2, batch: 30092, sum loss: 4673.322266, avg loss: 2.577674, ppl: 13.166473 +epoch: 2, batch: 30093, sum loss: 4095.229980, avg loss: 2.392073, ppl: 10.936146 +epoch: 2, batch: 30094, sum loss: 4449.874023, avg loss: 2.560342, ppl: 12.940240 +epoch: 2, batch: 30095, sum loss: 4452.392578, avg loss: 2.634552, ppl: 13.937064 +epoch: 2, batch: 30096, sum loss: 3647.350098, avg loss: 2.236266, ppl: 9.358323 +epoch: 2, batch: 30097, sum loss: 4821.691895, avg loss: 2.649282, ppl: 14.143872 +epoch: 2, batch: 30098, sum loss: 4675.694824, avg loss: 2.440342, ppl: 11.476962 +epoch: 2, batch: 30099, sum loss: 3443.364746, avg loss: 2.292520, ppl: 9.899852 +epoch: 2, batch: 30100, sum loss: 4137.349609, avg loss: 2.429448, ppl: 11.352612 +epoch: 2, batch: 30101, sum loss: 3674.217773, avg loss: 2.190947, ppl: 8.943677 +epoch: 2, batch: 30102, sum loss: 4291.820801, avg loss: 2.451069, ppl: 11.600738 +epoch: 2, batch: 30103, sum loss: 3357.498535, avg loss: 2.310735, ppl: 10.081837 +epoch: 2, batch: 30104, sum loss: 4348.630371, avg loss: 2.493481, ppl: 12.103331 +epoch: 2, batch: 30105, sum loss: 3559.422852, avg loss: 2.249951, ppl: 9.487272 +epoch: 2, batch: 30106, sum loss: 5276.143555, avg loss: 2.807953, ppl: 16.575954 +epoch: 2, batch: 30107, sum loss: 3912.959961, avg loss: 2.371491, ppl: 10.713353 +epoch: 2, batch: 30108, sum loss: 4656.111328, avg loss: 2.495236, ppl: 12.124599 +epoch: 2, batch: 30109, sum loss: 3536.584229, avg loss: 2.437343, ppl: 11.442594 +epoch: 2, batch: 30110, sum loss: 4030.103027, avg loss: 2.416129, ppl: 11.202410 +epoch: 2, batch: 30111, sum loss: 4606.319824, avg loss: 2.761583, ppl: 15.824868 +epoch: 2, batch: 30112, sum loss: 4498.130859, avg loss: 2.706457, ppl: 14.976116 +epoch: 2, batch: 30113, sum loss: 4069.229492, avg loss: 2.368585, ppl: 10.682270 +epoch: 2, batch: 30114, sum loss: 4032.510010, avg loss: 2.578331, ppl: 13.175134 +epoch: 2, batch: 30115, sum loss: 5023.651855, avg loss: 2.628808, ppl: 13.857239 +epoch: 2, batch: 30116, sum loss: 4325.069824, avg loss: 2.388222, ppl: 10.894107 +epoch: 2, batch: 30117, sum loss: 3499.111328, avg loss: 2.162615, ppl: 8.693843 +epoch: 2, batch: 30118, sum loss: 3902.327637, avg loss: 2.245298, ppl: 9.443229 +epoch: 2, batch: 30119, sum loss: 3007.760254, avg loss: 2.280334, ppl: 9.779944 +epoch: 2, batch: 30120, sum loss: 4338.044922, avg loss: 2.476053, ppl: 11.894228 +epoch: 2, batch: 30121, sum loss: 5194.257324, avg loss: 2.563799, ppl: 12.985059 +epoch: 2, batch: 30122, sum loss: 3918.321289, avg loss: 2.392137, ppl: 10.936844 +epoch: 2, batch: 30123, sum loss: 3944.722656, avg loss: 2.424537, ppl: 11.297003 +epoch: 2, batch: 30124, sum loss: 3707.712646, avg loss: 2.498459, ppl: 12.163731 +epoch: 2, batch: 30125, sum loss: 4654.587891, avg loss: 2.580148, ppl: 13.199098 +epoch: 2, batch: 30126, sum loss: 4213.037109, avg loss: 2.597433, ppl: 13.429225 +epoch: 2, batch: 30127, sum loss: 3727.192383, avg loss: 2.293657, ppl: 9.911115 +epoch: 2, batch: 30128, sum loss: 4543.951660, avg loss: 2.514638, ppl: 12.362135 +epoch: 2, batch: 30129, sum loss: 3405.529785, avg loss: 1.960581, ppl: 7.103456 +epoch: 2, batch: 30130, sum loss: 4029.382080, avg loss: 2.394166, ppl: 10.959060 +epoch: 2, batch: 30131, sum loss: 3573.965820, avg loss: 2.427966, ppl: 11.335800 +epoch: 2, batch: 30132, sum loss: 4056.316895, avg loss: 2.524155, ppl: 12.480344 +epoch: 2, batch: 30133, sum loss: 3818.831055, avg loss: 2.478151, ppl: 11.919209 +epoch: 2, batch: 30134, sum loss: 4628.566406, avg loss: 2.594488, ppl: 13.389732 +epoch: 2, batch: 30135, sum loss: 4039.219727, avg loss: 2.662637, ppl: 14.334031 +epoch: 2, batch: 30136, sum loss: 4135.665527, avg loss: 2.514082, ppl: 12.355264 +epoch: 2, batch: 30137, sum loss: 3551.990479, avg loss: 2.404868, ppl: 11.076972 +epoch: 2, batch: 30138, sum loss: 4455.480957, avg loss: 2.553284, ppl: 12.849234 +epoch: 2, batch: 30139, sum loss: 3875.463379, avg loss: 2.451273, ppl: 11.603113 +epoch: 2, batch: 30140, sum loss: 3756.985840, avg loss: 2.364371, ppl: 10.637349 +epoch: 2, batch: 30141, sum loss: 3739.564697, avg loss: 2.267777, ppl: 9.657909 +epoch: 2, batch: 30142, sum loss: 5117.781250, avg loss: 2.745591, ppl: 15.573811 +epoch: 2, batch: 30143, sum loss: 4607.362305, avg loss: 2.667842, ppl: 14.408836 +epoch: 2, batch: 30144, sum loss: 4022.336182, avg loss: 2.603454, ppl: 13.510321 +epoch: 2, batch: 30145, sum loss: 4131.275391, avg loss: 2.454709, ppl: 11.643045 +epoch: 2, batch: 30146, sum loss: 5011.781738, avg loss: 2.637780, ppl: 13.982128 +epoch: 2, batch: 30147, sum loss: 3641.396240, avg loss: 2.271613, ppl: 9.695030 +epoch: 2, batch: 30148, sum loss: 4733.253906, avg loss: 2.631047, ppl: 13.888307 +epoch: 2, batch: 30149, sum loss: 3483.518066, avg loss: 2.399117, ppl: 11.013450 +epoch: 2, batch: 30150, sum loss: 4590.054199, avg loss: 2.541558, ppl: 12.699445 +epoch: 2, batch: 30151, sum loss: 4592.127930, avg loss: 2.584203, ppl: 13.252716 +epoch: 2, batch: 30152, sum loss: 3950.118652, avg loss: 2.424873, ppl: 11.300798 +epoch: 2, batch: 30153, sum loss: 4432.264160, avg loss: 2.419358, ppl: 11.238645 +epoch: 2, batch: 30154, sum loss: 4705.997559, avg loss: 2.717089, ppl: 15.136192 +epoch: 2, batch: 30155, sum loss: 3767.104004, avg loss: 2.421018, ppl: 11.257312 +epoch: 2, batch: 30156, sum loss: 4522.474121, avg loss: 2.543574, ppl: 12.725067 +epoch: 2, batch: 30157, sum loss: 4006.276367, avg loss: 2.309093, ppl: 10.065291 +epoch: 2, batch: 30158, sum loss: 3138.143311, avg loss: 2.134791, ppl: 8.455282 +epoch: 2, batch: 30159, sum loss: 4297.835938, avg loss: 2.478567, ppl: 11.924170 +epoch: 2, batch: 30160, sum loss: 4514.324707, avg loss: 2.604919, ppl: 13.530128 +epoch: 2, batch: 30161, sum loss: 4404.767090, avg loss: 2.798454, ppl: 16.419249 +epoch: 2, batch: 30162, sum loss: 4700.450195, avg loss: 2.639220, ppl: 14.002275 +epoch: 2, batch: 30163, sum loss: 3838.777832, avg loss: 2.411293, ppl: 11.148362 +epoch: 2, batch: 30164, sum loss: 3828.949707, avg loss: 2.489564, ppl: 12.056021 +epoch: 2, batch: 30165, sum loss: 3278.182617, avg loss: 2.305332, ppl: 10.027511 +epoch: 2, batch: 30166, sum loss: 4072.390625, avg loss: 2.343148, ppl: 10.413965 +epoch: 2, batch: 30167, sum loss: 4874.201172, avg loss: 2.646146, ppl: 14.099598 +epoch: 2, batch: 30168, sum loss: 4391.812500, avg loss: 2.511042, ppl: 12.317760 +epoch: 2, batch: 30169, sum loss: 4092.357422, avg loss: 2.506036, ppl: 12.256253 +epoch: 2, batch: 30170, sum loss: 3521.834473, avg loss: 2.042828, ppl: 7.712386 +epoch: 2, batch: 30171, sum loss: 3610.125000, avg loss: 2.358018, ppl: 10.569978 +epoch: 2, batch: 30172, sum loss: 3616.288330, avg loss: 2.539528, ppl: 12.673693 +epoch: 2, batch: 30173, sum loss: 3870.305908, avg loss: 2.303754, ppl: 10.011692 +epoch: 2, batch: 30174, sum loss: 4129.844238, avg loss: 2.417942, ppl: 11.222734 +epoch: 2, batch: 30175, sum loss: 4086.027344, avg loss: 2.470392, ppl: 11.827076 +epoch: 2, batch: 30176, sum loss: 5159.408203, avg loss: 2.503352, ppl: 12.223397 +epoch: 2, batch: 30177, sum loss: 3522.870361, avg loss: 2.480895, ppl: 11.951954 +epoch: 2, batch: 30178, sum loss: 3667.013916, avg loss: 2.423671, ppl: 11.287219 +epoch: 2, batch: 30179, sum loss: 3968.398926, avg loss: 2.393486, ppl: 10.951600 +epoch: 2, batch: 30180, sum loss: 3407.050781, avg loss: 2.416348, ppl: 11.204866 +epoch: 2, batch: 30181, sum loss: 4462.210938, avg loss: 2.643490, ppl: 14.062196 +epoch: 2, batch: 30182, sum loss: 4203.627441, avg loss: 2.330170, ppl: 10.279693 +epoch: 2, batch: 30183, sum loss: 4360.838379, avg loss: 2.477749, ppl: 11.914416 +epoch: 2, batch: 30184, sum loss: 4202.980957, avg loss: 2.538032, ppl: 12.654742 +epoch: 2, batch: 30185, sum loss: 4629.806152, avg loss: 2.517567, ppl: 12.398399 +epoch: 2, batch: 30186, sum loss: 4669.805664, avg loss: 2.776341, ppl: 16.060148 +epoch: 2, batch: 30187, sum loss: 4748.768555, avg loss: 2.672351, ppl: 14.473963 +epoch: 2, batch: 30188, sum loss: 3752.954102, avg loss: 2.422824, ppl: 11.277661 +epoch: 2, batch: 30189, sum loss: 4410.094727, avg loss: 2.634465, ppl: 13.935862 +epoch: 2, batch: 30190, sum loss: 4235.788086, avg loss: 2.731004, ppl: 15.348296 +epoch: 2, batch: 30191, sum loss: 4690.760742, avg loss: 2.563257, ppl: 12.978021 +epoch: 2, batch: 30192, sum loss: 4646.411621, avg loss: 2.641508, ppl: 14.034347 +epoch: 2, batch: 30193, sum loss: 3177.352539, avg loss: 2.106998, ppl: 8.223515 +epoch: 2, batch: 30194, sum loss: 4072.335449, avg loss: 2.615501, ppl: 13.674071 +epoch: 2, batch: 30195, sum loss: 3455.037109, avg loss: 2.520085, ppl: 12.429657 +epoch: 2, batch: 30196, sum loss: 5036.517090, avg loss: 2.926506, ppl: 18.662312 +epoch: 2, batch: 30197, sum loss: 4809.558105, avg loss: 2.561000, ppl: 12.948761 +epoch: 2, batch: 30198, sum loss: 3871.636963, avg loss: 2.455065, ppl: 11.647188 +epoch: 2, batch: 30199, sum loss: 4048.852539, avg loss: 2.362224, ppl: 10.614535 +epoch: 2, batch: 30200, sum loss: 3675.557617, avg loss: 2.234381, ppl: 9.340702 +epoch: 2, batch: 30201, sum loss: 4519.795898, avg loss: 2.520801, ppl: 12.438554 +epoch: 2, batch: 30202, sum loss: 4224.195312, avg loss: 2.635181, ppl: 13.945836 +epoch: 2, batch: 30203, sum loss: 4172.135254, avg loss: 2.432732, ppl: 11.389956 +epoch: 2, batch: 30204, sum loss: 4596.496094, avg loss: 2.485936, ppl: 12.012361 +epoch: 2, batch: 30205, sum loss: 4357.661133, avg loss: 2.471731, ppl: 11.842926 +epoch: 2, batch: 30206, sum loss: 4002.863281, avg loss: 2.308456, ppl: 10.058884 +epoch: 2, batch: 30207, sum loss: 3611.546631, avg loss: 2.374455, ppl: 10.745160 +epoch: 2, batch: 30208, sum loss: 4632.789551, avg loss: 2.575203, ppl: 13.133979 +epoch: 2, batch: 30209, sum loss: 4130.219727, avg loss: 2.645881, ppl: 14.095854 +epoch: 2, batch: 30210, sum loss: 3808.625000, avg loss: 2.372975, ppl: 10.729265 +epoch: 2, batch: 30211, sum loss: 3769.145020, avg loss: 2.234230, ppl: 9.339284 +epoch: 2, batch: 30212, sum loss: 4215.739746, avg loss: 2.411751, ppl: 11.153469 +epoch: 2, batch: 30213, sum loss: 4345.660156, avg loss: 2.498942, ppl: 12.169614 +epoch: 2, batch: 30214, sum loss: 3920.074951, avg loss: 2.616872, ppl: 13.692830 +epoch: 2, batch: 30215, sum loss: 3781.645020, avg loss: 2.335791, ppl: 10.337630 +epoch: 2, batch: 30216, sum loss: 3586.656738, avg loss: 2.453254, ppl: 11.626114 +epoch: 2, batch: 30217, sum loss: 3749.585449, avg loss: 2.306018, ppl: 10.034389 +epoch: 2, batch: 30218, sum loss: 5229.391602, avg loss: 2.753761, ppl: 15.701571 +epoch: 2, batch: 30219, sum loss: 3797.423340, avg loss: 2.536689, ppl: 12.637755 +epoch: 2, batch: 30220, sum loss: 4817.379883, avg loss: 2.467920, ppl: 11.797882 +epoch: 2, batch: 30221, sum loss: 6030.906250, avg loss: 2.921951, ppl: 18.577490 +epoch: 2, batch: 30222, sum loss: 4772.981445, avg loss: 2.530743, ppl: 12.562839 +epoch: 2, batch: 30223, sum loss: 4024.513672, avg loss: 2.377149, ppl: 10.774146 +epoch: 2, batch: 30224, sum loss: 3624.402344, avg loss: 2.351981, ppl: 10.506359 +epoch: 2, batch: 30225, sum loss: 3663.665283, avg loss: 2.380549, ppl: 10.810839 +epoch: 2, batch: 30226, sum loss: 4035.327881, avg loss: 2.370933, ppl: 10.707376 +epoch: 2, batch: 30227, sum loss: 3811.267090, avg loss: 2.410669, ppl: 11.141408 +epoch: 2, batch: 30228, sum loss: 3665.588623, avg loss: 2.607104, ppl: 13.559729 +epoch: 2, batch: 30229, sum loss: 3625.555664, avg loss: 2.172292, ppl: 8.778383 +epoch: 2, batch: 30230, sum loss: 3981.750488, avg loss: 2.694013, ppl: 14.790908 +epoch: 2, batch: 30231, sum loss: 3514.227783, avg loss: 2.255602, ppl: 9.541034 +epoch: 2, batch: 30232, sum loss: 4158.066895, avg loss: 2.391068, ppl: 10.925156 +epoch: 2, batch: 30233, sum loss: 4967.893066, avg loss: 2.521773, ppl: 12.450653 +epoch: 2, batch: 30234, sum loss: 4619.906738, avg loss: 2.578073, ppl: 13.171732 +epoch: 2, batch: 30235, sum loss: 4918.506836, avg loss: 2.755466, ppl: 15.728373 +epoch: 2, batch: 30236, sum loss: 3541.405273, avg loss: 2.150216, ppl: 8.586710 +epoch: 2, batch: 30237, sum loss: 4290.293457, avg loss: 2.403526, ppl: 11.062111 +epoch: 2, batch: 30238, sum loss: 4801.537109, avg loss: 2.515211, ppl: 12.369214 +epoch: 2, batch: 30239, sum loss: 3946.693848, avg loss: 2.452886, ppl: 11.621840 +epoch: 2, batch: 30240, sum loss: 2774.027100, avg loss: 1.795487, ppl: 6.022407 +epoch: 2, batch: 30241, sum loss: 3395.332764, avg loss: 2.376020, ppl: 10.761987 +epoch: 2, batch: 30242, sum loss: 4007.017822, avg loss: 2.655413, ppl: 14.230861 +epoch: 2, batch: 30243, sum loss: 4194.008301, avg loss: 2.634427, ppl: 13.935329 +epoch: 2, batch: 30244, sum loss: 5475.246094, avg loss: 2.877166, ppl: 17.763851 +epoch: 2, batch: 30245, sum loss: 4388.328613, avg loss: 2.624599, ppl: 13.799033 +epoch: 2, batch: 30246, sum loss: 4899.816895, avg loss: 2.580209, ppl: 13.199897 +epoch: 2, batch: 30247, sum loss: 4475.551270, avg loss: 2.479530, ppl: 11.935652 +epoch: 2, batch: 30248, sum loss: 3950.030029, avg loss: 2.512742, ppl: 12.338714 +epoch: 2, batch: 30249, sum loss: 3317.901855, avg loss: 2.204586, ppl: 9.066498 +epoch: 2, batch: 30250, sum loss: 3579.327393, avg loss: 2.213561, ppl: 9.148232 +epoch: 2, batch: 30251, sum loss: 4356.619629, avg loss: 2.604076, ppl: 13.518733 +epoch: 2, batch: 30252, sum loss: 3862.982666, avg loss: 2.452687, ppl: 11.619530 +epoch: 2, batch: 30253, sum loss: 4467.519043, avg loss: 2.597395, ppl: 13.428707 +epoch: 2, batch: 30254, sum loss: 4110.416992, avg loss: 2.521728, ppl: 12.450095 +epoch: 2, batch: 30255, sum loss: 3320.229492, avg loss: 2.213486, ppl: 9.147551 +epoch: 2, batch: 30256, sum loss: 3944.627930, avg loss: 2.444007, ppl: 11.519110 +epoch: 2, batch: 30257, sum loss: 3781.787354, avg loss: 2.351858, ppl: 10.505067 +epoch: 2, batch: 30258, sum loss: 4039.304443, avg loss: 2.518270, ppl: 12.407111 +epoch: 2, batch: 30259, sum loss: 3794.773193, avg loss: 2.305452, ppl: 10.028707 +epoch: 2, batch: 30260, sum loss: 4402.316895, avg loss: 2.494231, ppl: 12.112410 +epoch: 2, batch: 30261, sum loss: 4349.234375, avg loss: 2.453037, ppl: 11.623594 +epoch: 2, batch: 30262, sum loss: 3322.382812, avg loss: 2.082999, ppl: 8.028508 +epoch: 2, batch: 30263, sum loss: 4430.755859, avg loss: 2.516045, ppl: 12.379542 +epoch: 2, batch: 30264, sum loss: 4786.896973, avg loss: 2.760609, ppl: 15.809474 +epoch: 2, batch: 30265, sum loss: 3986.991699, avg loss: 2.510700, ppl: 12.313549 +epoch: 2, batch: 30266, sum loss: 3134.365234, avg loss: 2.112106, ppl: 8.265629 +epoch: 2, batch: 30267, sum loss: 3998.559814, avg loss: 2.438146, ppl: 11.451791 +epoch: 2, batch: 30268, sum loss: 4548.716797, avg loss: 2.485638, ppl: 12.008776 +epoch: 2, batch: 30269, sum loss: 4129.230469, avg loss: 2.436124, ppl: 11.428658 +epoch: 2, batch: 30270, sum loss: 4098.161621, avg loss: 2.572606, ppl: 13.099919 +epoch: 2, batch: 30271, sum loss: 5240.549316, avg loss: 2.672386, ppl: 14.474470 +epoch: 2, batch: 30272, sum loss: 4049.031494, avg loss: 2.464413, ppl: 11.756584 +epoch: 2, batch: 30273, sum loss: 3935.940186, avg loss: 2.349815, ppl: 10.483631 +epoch: 2, batch: 30274, sum loss: 3741.818359, avg loss: 2.360769, ppl: 10.599094 +epoch: 2, batch: 30275, sum loss: 4963.766602, avg loss: 2.601555, ppl: 13.484686 +epoch: 2, batch: 30276, sum loss: 4483.716309, avg loss: 2.527461, ppl: 12.521674 +epoch: 2, batch: 30277, sum loss: 4278.351074, avg loss: 2.626366, ppl: 13.823449 +epoch: 2, batch: 30278, sum loss: 3330.793213, avg loss: 2.301861, ppl: 9.992764 +epoch: 2, batch: 30279, sum loss: 4180.706543, avg loss: 2.509428, ppl: 12.297888 +epoch: 2, batch: 30280, sum loss: 3615.149902, avg loss: 2.521025, ppl: 12.441341 +epoch: 2, batch: 30281, sum loss: 3965.460449, avg loss: 2.290849, ppl: 9.883329 +epoch: 2, batch: 30282, sum loss: 3771.449219, avg loss: 2.504282, ppl: 12.234777 +epoch: 2, batch: 30283, sum loss: 4906.473633, avg loss: 2.625186, ppl: 13.807148 +epoch: 2, batch: 30284, sum loss: 3398.236816, avg loss: 2.253473, ppl: 9.520740 +epoch: 2, batch: 30285, sum loss: 3830.852539, avg loss: 2.482730, ppl: 11.973910 +epoch: 2, batch: 30286, sum loss: 4207.131836, avg loss: 2.570026, ppl: 13.066160 +epoch: 2, batch: 30287, sum loss: 4084.151123, avg loss: 2.228124, ppl: 9.282435 +epoch: 2, batch: 30288, sum loss: 4314.762695, avg loss: 2.520305, ppl: 12.432390 +epoch: 2, batch: 30289, sum loss: 4495.694824, avg loss: 2.501778, ppl: 12.204172 +epoch: 2, batch: 30290, sum loss: 3598.850586, avg loss: 2.397635, ppl: 10.997143 +epoch: 2, batch: 30291, sum loss: 3165.374756, avg loss: 2.137323, ppl: 8.476712 +epoch: 2, batch: 30292, sum loss: 3530.976562, avg loss: 2.350850, ppl: 10.494485 +epoch: 2, batch: 30293, sum loss: 4476.667969, avg loss: 2.327960, ppl: 10.257001 +epoch: 2, batch: 30294, sum loss: 3980.364990, avg loss: 2.512857, ppl: 12.340132 +epoch: 2, batch: 30295, sum loss: 4311.563477, avg loss: 2.654904, ppl: 14.223616 +epoch: 2, batch: 30296, sum loss: 3500.625977, avg loss: 2.412561, ppl: 11.162511 +epoch: 2, batch: 30297, sum loss: 3937.234131, avg loss: 2.275858, ppl: 9.736268 +epoch: 2, batch: 30298, sum loss: 4385.073242, avg loss: 2.397525, ppl: 10.995929 +epoch: 2, batch: 30299, sum loss: 3507.031006, avg loss: 2.212638, ppl: 9.139794 +epoch: 2, batch: 30300, sum loss: 3805.308594, avg loss: 2.439301, ppl: 11.465018 +epoch: 2, batch: 30301, sum loss: 4049.865967, avg loss: 2.432352, ppl: 11.385631 +epoch: 2, batch: 30302, sum loss: 4856.886230, avg loss: 2.403209, ppl: 11.058612 +epoch: 2, batch: 30303, sum loss: 4922.515137, avg loss: 2.611414, ppl: 13.618293 +epoch: 2, batch: 30304, sum loss: 4294.202148, avg loss: 2.389651, ppl: 10.909684 +epoch: 2, batch: 30305, sum loss: 4191.592773, avg loss: 2.399309, ppl: 11.015561 +epoch: 2, batch: 30306, sum loss: 3597.870117, avg loss: 2.282912, ppl: 9.805197 +epoch: 2, batch: 30307, sum loss: 3179.509766, avg loss: 2.254972, ppl: 9.535022 +epoch: 2, batch: 30308, sum loss: 4016.708008, avg loss: 2.386636, ppl: 10.876841 +epoch: 2, batch: 30309, sum loss: 3222.885254, avg loss: 2.264853, ppl: 9.629704 +epoch: 2, batch: 30310, sum loss: 4027.036133, avg loss: 2.520048, ppl: 12.429189 +epoch: 2, batch: 30311, sum loss: 4741.212402, avg loss: 2.935735, ppl: 18.835346 +epoch: 2, batch: 30312, sum loss: 4041.765869, avg loss: 2.476572, ppl: 11.900403 +epoch: 2, batch: 30313, sum loss: 4234.386230, avg loss: 2.343324, ppl: 10.415798 +epoch: 2, batch: 30314, sum loss: 3951.257812, avg loss: 2.243758, ppl: 9.428698 +epoch: 2, batch: 30315, sum loss: 4831.675781, avg loss: 2.789651, ppl: 16.275341 +epoch: 2, batch: 30316, sum loss: 4165.112305, avg loss: 2.611356, ppl: 13.617500 +epoch: 2, batch: 30317, sum loss: 4251.985840, avg loss: 2.336256, ppl: 10.342443 +epoch: 2, batch: 30318, sum loss: 4644.131836, avg loss: 2.568657, ppl: 13.048287 +epoch: 2, batch: 30319, sum loss: 3993.358643, avg loss: 2.381251, ppl: 10.818432 +epoch: 2, batch: 30320, sum loss: 4881.931641, avg loss: 2.542673, ppl: 12.713608 +epoch: 2, batch: 30321, sum loss: 4097.108887, avg loss: 2.457774, ppl: 11.678782 +epoch: 2, batch: 30322, sum loss: 3736.331055, avg loss: 2.480964, ppl: 11.952778 +epoch: 2, batch: 30323, sum loss: 4178.720215, avg loss: 2.474080, ppl: 11.870776 +epoch: 2, batch: 30324, sum loss: 3855.762939, avg loss: 2.270767, ppl: 9.686832 +epoch: 2, batch: 30325, sum loss: 5066.404785, avg loss: 2.825658, ppl: 16.872044 +epoch: 2, batch: 30326, sum loss: 4244.400391, avg loss: 2.662735, ppl: 14.335449 +epoch: 2, batch: 30327, sum loss: 4048.979492, avg loss: 2.433281, ppl: 11.396212 +epoch: 2, batch: 30328, sum loss: 3644.354004, avg loss: 2.368001, ppl: 10.676032 +epoch: 2, batch: 30329, sum loss: 4541.853516, avg loss: 2.571831, ppl: 13.089769 +epoch: 2, batch: 30330, sum loss: 3888.764893, avg loss: 2.563457, ppl: 12.980618 +epoch: 2, batch: 30331, sum loss: 3913.461182, avg loss: 2.295285, ppl: 9.927267 +epoch: 2, batch: 30332, sum loss: 3557.319336, avg loss: 2.340342, ppl: 10.384786 +epoch: 2, batch: 30333, sum loss: 4025.571533, avg loss: 2.241410, ppl: 9.406581 +epoch: 2, batch: 30334, sum loss: 3483.383057, avg loss: 2.227227, ppl: 9.274113 +epoch: 2, batch: 30335, sum loss: 4731.918945, avg loss: 2.645008, ppl: 14.083555 +epoch: 2, batch: 30336, sum loss: 3696.890625, avg loss: 2.345743, ppl: 10.441025 +epoch: 2, batch: 30337, sum loss: 3358.468262, avg loss: 2.388669, ppl: 10.898975 +epoch: 2, batch: 30338, sum loss: 4483.591309, avg loss: 2.582714, ppl: 13.233002 +epoch: 2, batch: 30339, sum loss: 4356.743164, avg loss: 2.502437, ppl: 12.212220 +epoch: 2, batch: 30340, sum loss: 3620.957764, avg loss: 2.485215, ppl: 12.003697 +epoch: 2, batch: 30341, sum loss: 3841.616455, avg loss: 2.297618, ppl: 9.950449 +epoch: 2, batch: 30342, sum loss: 4143.889648, avg loss: 2.343829, ppl: 10.421064 +epoch: 2, batch: 30343, sum loss: 3608.106689, avg loss: 2.267823, ppl: 9.658354 +epoch: 2, batch: 30344, sum loss: 4550.122559, avg loss: 2.868930, ppl: 17.618151 +epoch: 2, batch: 30345, sum loss: 4302.388184, avg loss: 2.821238, ppl: 16.797634 +epoch: 2, batch: 30346, sum loss: 4237.748047, avg loss: 2.489864, ppl: 12.059631 +epoch: 2, batch: 30347, sum loss: 4508.288086, avg loss: 2.917986, ppl: 18.503981 +epoch: 2, batch: 30348, sum loss: 3553.726318, avg loss: 2.336441, ppl: 10.344354 +epoch: 2, batch: 30349, sum loss: 3436.895508, avg loss: 2.314408, ppl: 10.118927 +epoch: 2, batch: 30350, sum loss: 3727.709961, avg loss: 2.347425, ppl: 10.458599 +epoch: 2, batch: 30351, sum loss: 4242.461914, avg loss: 2.287041, ppl: 9.845765 +epoch: 2, batch: 30352, sum loss: 4118.994141, avg loss: 2.339009, ppl: 10.370950 +epoch: 2, batch: 30353, sum loss: 3634.432617, avg loss: 2.278641, ppl: 9.763405 +epoch: 2, batch: 30354, sum loss: 3971.305176, avg loss: 2.336062, ppl: 10.340435 +epoch: 2, batch: 30355, sum loss: 4851.734375, avg loss: 2.636812, ppl: 13.968600 +epoch: 2, batch: 30356, sum loss: 4967.562500, avg loss: 2.767444, ppl: 15.917902 +epoch: 2, batch: 30357, sum loss: 4876.362305, avg loss: 2.826877, ppl: 16.892620 +epoch: 2, batch: 30358, sum loss: 4066.252197, avg loss: 2.608244, ppl: 13.575191 +epoch: 2, batch: 30359, sum loss: 3012.318359, avg loss: 2.109467, ppl: 8.243844 +epoch: 2, batch: 30360, sum loss: 3595.738525, avg loss: 2.233378, ppl: 9.331333 +epoch: 2, batch: 30361, sum loss: 3446.425781, avg loss: 2.254039, ppl: 9.526137 +epoch: 2, batch: 30362, sum loss: 3882.283447, avg loss: 2.320552, ppl: 10.181293 +epoch: 2, batch: 30363, sum loss: 5555.923828, avg loss: 2.827442, ppl: 16.902172 +epoch: 2, batch: 30364, sum loss: 3919.353027, avg loss: 2.256392, ppl: 9.548578 +epoch: 2, batch: 30365, sum loss: 3632.386475, avg loss: 2.246374, ppl: 9.453395 +epoch: 2, batch: 30366, sum loss: 4881.575684, avg loss: 2.744000, ppl: 15.549053 +epoch: 2, batch: 30367, sum loss: 5006.567383, avg loss: 2.674448, ppl: 14.504349 +epoch: 2, batch: 30368, sum loss: 3983.358887, avg loss: 2.282727, ppl: 9.803378 +epoch: 2, batch: 30369, sum loss: 3773.276855, avg loss: 2.353884, ppl: 10.526380 +epoch: 2, batch: 30370, sum loss: 3493.263184, avg loss: 2.335069, ppl: 10.330173 +epoch: 2, batch: 30371, sum loss: 5411.775879, avg loss: 2.861859, ppl: 17.494024 +epoch: 2, batch: 30372, sum loss: 3930.416992, avg loss: 2.392220, ppl: 10.937744 +epoch: 2, batch: 30373, sum loss: 4492.563965, avg loss: 2.483452, ppl: 11.982555 +epoch: 2, batch: 30374, sum loss: 3645.627930, avg loss: 2.618986, ppl: 13.721798 +epoch: 2, batch: 30375, sum loss: 4067.981934, avg loss: 2.483505, ppl: 11.983195 +epoch: 2, batch: 30376, sum loss: 3779.035889, avg loss: 2.322702, ppl: 10.203203 +epoch: 2, batch: 30377, sum loss: 4431.226562, avg loss: 2.549613, ppl: 12.802142 +epoch: 2, batch: 30378, sum loss: 4408.149414, avg loss: 2.534876, ppl: 12.614868 +epoch: 2, batch: 30379, sum loss: 4391.769043, avg loss: 2.568286, ppl: 13.043448 +epoch: 2, batch: 30380, sum loss: 4197.101562, avg loss: 2.410742, ppl: 11.142223 +epoch: 2, batch: 30381, sum loss: 4228.974121, avg loss: 2.454425, ppl: 11.639737 +epoch: 2, batch: 30382, sum loss: 3816.072021, avg loss: 2.463571, ppl: 11.746688 +epoch: 2, batch: 30383, sum loss: 4971.416016, avg loss: 2.754247, ppl: 15.709210 +epoch: 2, batch: 30384, sum loss: 4358.262695, avg loss: 2.350735, ppl: 10.493279 +epoch: 2, batch: 30385, sum loss: 3889.786621, avg loss: 2.469706, ppl: 11.818967 +epoch: 2, batch: 30386, sum loss: 3319.445801, avg loss: 2.255058, ppl: 9.535850 +epoch: 2, batch: 30387, sum loss: 3771.906006, avg loss: 2.488065, ppl: 12.037957 +epoch: 2, batch: 30388, sum loss: 3434.459473, avg loss: 2.306554, ppl: 10.039771 +epoch: 2, batch: 30389, sum loss: 4243.155273, avg loss: 2.484283, ppl: 11.992516 +epoch: 2, batch: 30390, sum loss: 4780.377930, avg loss: 2.679584, ppl: 14.579031 +epoch: 2, batch: 30391, sum loss: 3687.490234, avg loss: 2.564319, ppl: 12.991803 +epoch: 2, batch: 30392, sum loss: 3992.678223, avg loss: 2.289380, ppl: 9.868814 +epoch: 2, batch: 30393, sum loss: 3903.813232, avg loss: 2.539891, ppl: 12.678295 +epoch: 2, batch: 30394, sum loss: 4429.412598, avg loss: 2.531093, ppl: 12.567233 +epoch: 2, batch: 30395, sum loss: 4558.916016, avg loss: 2.676991, ppl: 14.541276 +epoch: 2, batch: 30396, sum loss: 3678.907471, avg loss: 2.474047, ppl: 11.870386 +epoch: 2, batch: 30397, sum loss: 4236.103027, avg loss: 2.381171, ppl: 10.817560 +epoch: 2, batch: 30398, sum loss: 3542.438965, avg loss: 2.444747, ppl: 11.527637 +epoch: 2, batch: 30399, sum loss: 3738.578613, avg loss: 2.466081, ppl: 11.776204 +epoch: 2, batch: 30400, sum loss: 3540.129883, avg loss: 2.281012, ppl: 9.786575 +epoch: 2, batch: 30401, sum loss: 4432.362305, avg loss: 2.639882, ppl: 14.011551 +epoch: 2, batch: 30402, sum loss: 3487.904785, avg loss: 2.237271, ppl: 9.367728 +epoch: 2, batch: 30403, sum loss: 3407.195557, avg loss: 2.314671, ppl: 10.121593 +epoch: 2, batch: 30404, sum loss: 3968.230957, avg loss: 2.238145, ppl: 9.375924 +epoch: 2, batch: 30405, sum loss: 4021.288086, avg loss: 2.473117, ppl: 11.859353 +epoch: 2, batch: 30406, sum loss: 4060.568359, avg loss: 2.561873, ppl: 12.960065 +epoch: 2, batch: 30407, sum loss: 3273.028320, avg loss: 2.220508, ppl: 9.212006 +epoch: 2, batch: 30408, sum loss: 4556.268066, avg loss: 2.796972, ppl: 16.394932 +epoch: 2, batch: 30409, sum loss: 4143.525879, avg loss: 2.441677, ppl: 11.492298 +epoch: 2, batch: 30410, sum loss: 5090.999023, avg loss: 2.576416, ppl: 13.149931 +epoch: 2, batch: 30411, sum loss: 3766.783203, avg loss: 2.399225, ppl: 11.014637 +epoch: 2, batch: 30412, sum loss: 3907.245850, avg loss: 2.272976, ppl: 9.708251 +epoch: 2, batch: 30413, sum loss: 4704.090820, avg loss: 2.490255, ppl: 12.064348 +epoch: 2, batch: 30414, sum loss: 4267.374023, avg loss: 2.683883, ppl: 14.641836 +epoch: 2, batch: 30415, sum loss: 3711.455078, avg loss: 2.371537, ppl: 10.713844 +epoch: 2, batch: 30416, sum loss: 3919.892822, avg loss: 2.499932, ppl: 12.181661 +epoch: 2, batch: 30417, sum loss: 3622.085938, avg loss: 2.205899, ppl: 9.078409 +epoch: 2, batch: 30418, sum loss: 4407.104492, avg loss: 2.603133, ppl: 13.505989 +epoch: 2, batch: 30419, sum loss: 4438.187500, avg loss: 2.475286, ppl: 11.885106 +epoch: 2, batch: 30420, sum loss: 4599.630371, avg loss: 2.613426, ppl: 13.645724 +epoch: 2, batch: 30421, sum loss: 4005.575195, avg loss: 2.523992, ppl: 12.478312 +epoch: 2, batch: 30422, sum loss: 4792.331543, avg loss: 2.523608, ppl: 12.473519 +epoch: 2, batch: 30423, sum loss: 4618.063965, avg loss: 2.446008, ppl: 11.542183 +epoch: 2, batch: 30424, sum loss: 5104.527344, avg loss: 2.792411, ppl: 16.320322 +epoch: 2, batch: 30425, sum loss: 4429.618164, avg loss: 2.476030, ppl: 11.893956 +epoch: 2, batch: 30426, sum loss: 4725.303223, avg loss: 2.483081, ppl: 11.978113 +epoch: 2, batch: 30427, sum loss: 3652.447754, avg loss: 2.338315, ppl: 10.363756 +epoch: 2, batch: 30428, sum loss: 4381.629883, avg loss: 2.642720, ppl: 14.051374 +epoch: 2, batch: 30429, sum loss: 5490.248047, avg loss: 2.862486, ppl: 17.504988 +epoch: 2, batch: 30430, sum loss: 3937.630859, avg loss: 2.369212, ppl: 10.688971 +epoch: 2, batch: 30431, sum loss: 3888.026123, avg loss: 2.335151, ppl: 10.331019 +epoch: 2, batch: 30432, sum loss: 3595.211670, avg loss: 2.212438, ppl: 9.137966 +epoch: 2, batch: 30433, sum loss: 4516.620117, avg loss: 2.478935, ppl: 11.928556 +epoch: 2, batch: 30434, sum loss: 5069.766602, avg loss: 2.756806, ppl: 15.749465 +epoch: 2, batch: 30435, sum loss: 3460.876221, avg loss: 2.210010, ppl: 9.115810 +epoch: 2, batch: 30436, sum loss: 3697.128906, avg loss: 2.231218, ppl: 9.311203 +epoch: 2, batch: 30437, sum loss: 5479.626465, avg loss: 2.784363, ppl: 16.189507 +epoch: 2, batch: 30438, sum loss: 3564.812988, avg loss: 2.431660, ppl: 11.377750 +epoch: 2, batch: 30439, sum loss: 3605.705322, avg loss: 2.178674, ppl: 8.834583 +epoch: 2, batch: 30440, sum loss: 4465.703125, avg loss: 2.599362, ppl: 13.455146 +epoch: 2, batch: 30441, sum loss: 4104.595703, avg loss: 2.390563, ppl: 10.919635 +epoch: 2, batch: 30442, sum loss: 3887.550537, avg loss: 2.423660, ppl: 11.287095 +epoch: 2, batch: 30443, sum loss: 3270.373535, avg loss: 2.217202, ppl: 9.181608 +epoch: 2, batch: 30444, sum loss: 5132.419922, avg loss: 2.807670, ppl: 16.571257 +epoch: 2, batch: 30445, sum loss: 3703.930908, avg loss: 2.435195, ppl: 11.418042 +epoch: 2, batch: 30446, sum loss: 4359.360352, avg loss: 2.648457, ppl: 14.132220 +epoch: 2, batch: 30447, sum loss: 4095.057861, avg loss: 2.456543, ppl: 11.664423 +epoch: 2, batch: 30448, sum loss: 4713.371094, avg loss: 2.475510, ppl: 11.887770 +epoch: 2, batch: 30449, sum loss: 4318.762695, avg loss: 2.348430, ppl: 10.469122 +epoch: 2, batch: 30450, sum loss: 3852.958496, avg loss: 2.754081, ppl: 15.706593 +epoch: 2, batch: 30451, sum loss: 4774.614258, avg loss: 2.680861, ppl: 14.597663 +epoch: 2, batch: 30452, sum loss: 4958.505859, avg loss: 2.765480, ppl: 15.886668 +epoch: 2, batch: 30453, sum loss: 4292.295898, avg loss: 2.595100, ppl: 13.397932 +epoch: 2, batch: 30454, sum loss: 3778.612061, avg loss: 2.440964, ppl: 11.484106 +epoch: 2, batch: 30455, sum loss: 3911.742188, avg loss: 2.329805, ppl: 10.275934 +epoch: 2, batch: 30456, sum loss: 4588.400391, avg loss: 2.413677, ppl: 11.174978 +epoch: 2, batch: 30457, sum loss: 3927.415039, avg loss: 2.388939, ppl: 10.901917 +epoch: 2, batch: 30458, sum loss: 4223.053711, avg loss: 2.413173, ppl: 11.169351 +epoch: 2, batch: 30459, sum loss: 4134.770508, avg loss: 2.484838, ppl: 11.999179 +epoch: 2, batch: 30460, sum loss: 5172.626953, avg loss: 2.577293, ppl: 13.161464 +epoch: 2, batch: 30461, sum loss: 3884.776123, avg loss: 2.417409, ppl: 11.216759 +epoch: 2, batch: 30462, sum loss: 4246.750977, avg loss: 2.637733, ppl: 13.981478 +epoch: 2, batch: 30463, sum loss: 4396.616699, avg loss: 2.499498, ppl: 12.176378 +epoch: 2, batch: 30464, sum loss: 3937.898193, avg loss: 2.209820, ppl: 9.114072 +epoch: 2, batch: 30465, sum loss: 4676.129395, avg loss: 2.721845, ppl: 15.208354 +epoch: 2, batch: 30466, sum loss: 3747.996338, avg loss: 2.394886, ppl: 10.966946 +epoch: 2, batch: 30467, sum loss: 3638.859619, avg loss: 2.364431, ppl: 10.637985 +epoch: 2, batch: 30468, sum loss: 4288.926270, avg loss: 2.351385, ppl: 10.500104 +epoch: 2, batch: 30469, sum loss: 3860.479492, avg loss: 2.418847, ppl: 11.232899 +epoch: 2, batch: 30470, sum loss: 3403.083984, avg loss: 2.444744, ppl: 11.527602 +epoch: 2, batch: 30471, sum loss: 4779.088379, avg loss: 2.637466, ppl: 13.977738 +epoch: 2, batch: 30472, sum loss: 4204.299805, avg loss: 2.326674, ppl: 10.243812 +epoch: 2, batch: 30473, sum loss: 3502.981934, avg loss: 2.493226, ppl: 12.100244 +epoch: 2, batch: 30474, sum loss: 5518.007812, avg loss: 2.555816, ppl: 12.881812 +epoch: 2, batch: 30475, sum loss: 3945.623535, avg loss: 2.449177, ppl: 11.578811 +epoch: 2, batch: 30476, sum loss: 4058.419189, avg loss: 2.500566, ppl: 12.189394 +epoch: 2, batch: 30477, sum loss: 4212.063965, avg loss: 2.565203, ppl: 13.003304 +epoch: 2, batch: 30478, sum loss: 4194.335938, avg loss: 2.520635, ppl: 12.436487 +epoch: 2, batch: 30479, sum loss: 3817.284180, avg loss: 2.366574, ppl: 10.660809 +epoch: 2, batch: 30480, sum loss: 3264.711426, avg loss: 2.338619, ppl: 10.366905 +epoch: 2, batch: 30481, sum loss: 4164.465332, avg loss: 2.422609, ppl: 11.275242 +epoch: 2, batch: 30482, sum loss: 4842.840332, avg loss: 2.693460, ppl: 14.782728 +epoch: 2, batch: 30483, sum loss: 3364.628174, avg loss: 2.399878, ppl: 11.021827 +epoch: 2, batch: 30484, sum loss: 4234.622070, avg loss: 2.331840, ppl: 10.296873 +epoch: 2, batch: 30485, sum loss: 4305.987793, avg loss: 2.286770, ppl: 9.843092 +epoch: 2, batch: 30486, sum loss: 3320.772949, avg loss: 2.340221, ppl: 10.383528 +epoch: 2, batch: 30487, sum loss: 4789.972656, avg loss: 2.738692, ppl: 15.466745 +epoch: 2, batch: 30488, sum loss: 4729.171387, avg loss: 2.725747, ppl: 15.267817 +epoch: 2, batch: 30489, sum loss: 4190.423828, avg loss: 2.446249, ppl: 11.544955 +epoch: 2, batch: 30490, sum loss: 3566.493408, avg loss: 2.408166, ppl: 11.113557 +epoch: 2, batch: 30491, sum loss: 4031.820312, avg loss: 2.515172, ppl: 12.368733 +epoch: 2, batch: 30492, sum loss: 4420.474609, avg loss: 2.397221, ppl: 10.992581 +epoch: 2, batch: 30493, sum loss: 2941.229980, avg loss: 2.354868, ppl: 10.536737 +epoch: 2, batch: 30494, sum loss: 4104.504395, avg loss: 2.384953, ppl: 10.858553 +epoch: 2, batch: 30495, sum loss: 3833.341797, avg loss: 2.287197, ppl: 9.847296 +epoch: 2, batch: 30496, sum loss: 4620.264160, avg loss: 2.538607, ppl: 12.662019 +epoch: 2, batch: 30497, sum loss: 3749.338867, avg loss: 2.340411, ppl: 10.385506 +epoch: 2, batch: 30498, sum loss: 4095.893555, avg loss: 2.320620, ppl: 10.181981 +epoch: 2, batch: 30499, sum loss: 5136.611328, avg loss: 2.716347, ppl: 15.124962 +epoch: 2, batch: 30500, sum loss: 3700.011230, avg loss: 2.347723, ppl: 10.461719 +epoch: 2, batch: 30501, sum loss: 3639.528076, avg loss: 2.130871, ppl: 8.422200 +epoch: 2, batch: 30502, sum loss: 3342.617188, avg loss: 2.036939, ppl: 7.667105 +epoch: 2, batch: 30503, sum loss: 4130.082031, avg loss: 2.404006, ppl: 11.067421 +epoch: 2, batch: 30504, sum loss: 3229.149902, avg loss: 2.270851, ppl: 9.687640 +epoch: 2, batch: 30505, sum loss: 4316.709961, avg loss: 2.819536, ppl: 16.769075 +epoch: 2, batch: 30506, sum loss: 3447.155518, avg loss: 2.348199, ppl: 10.466699 +epoch: 2, batch: 30507, sum loss: 3865.207031, avg loss: 2.689775, ppl: 14.728365 +epoch: 2, batch: 30508, sum loss: 3400.695312, avg loss: 2.337248, ppl: 10.352703 +epoch: 2, batch: 30509, sum loss: 4165.323242, avg loss: 2.533652, ppl: 12.599430 +epoch: 2, batch: 30510, sum loss: 4548.308105, avg loss: 2.686538, ppl: 14.680759 +epoch: 2, batch: 30511, sum loss: 4029.532959, avg loss: 2.491981, ppl: 12.085191 +epoch: 2, batch: 30512, sum loss: 4695.977539, avg loss: 2.472869, ppl: 11.856410 +epoch: 2, batch: 30513, sum loss: 3918.077393, avg loss: 2.462651, ppl: 11.735880 +epoch: 2, batch: 30514, sum loss: 3916.714844, avg loss: 2.563295, ppl: 12.978513 +epoch: 2, batch: 30515, sum loss: 4402.121582, avg loss: 2.557886, ppl: 12.908499 +epoch: 2, batch: 30516, sum loss: 4197.357910, avg loss: 2.508881, ppl: 12.291170 +epoch: 2, batch: 30517, sum loss: 4743.114258, avg loss: 2.559695, ppl: 12.931870 +epoch: 2, batch: 30518, sum loss: 5028.528809, avg loss: 2.605455, ppl: 13.537389 +epoch: 2, batch: 30519, sum loss: 4457.891602, avg loss: 2.670996, ppl: 14.454354 +epoch: 2, batch: 30520, sum loss: 3989.743896, avg loss: 2.404909, ppl: 11.077421 +epoch: 2, batch: 30521, sum loss: 4008.806396, avg loss: 2.426638, ppl: 11.320762 +epoch: 2, batch: 30522, sum loss: 4752.995605, avg loss: 2.498946, ppl: 12.169662 +epoch: 2, batch: 30523, sum loss: 3828.329834, avg loss: 2.394202, ppl: 10.959455 +epoch: 2, batch: 30524, sum loss: 3767.237061, avg loss: 2.468701, ppl: 11.807096 +epoch: 2, batch: 30525, sum loss: 5007.506836, avg loss: 2.531601, ppl: 12.573617 +epoch: 2, batch: 30526, sum loss: 4203.016113, avg loss: 2.515270, ppl: 12.369948 +epoch: 2, batch: 30527, sum loss: 3916.708984, avg loss: 2.472670, ppl: 11.854053 +epoch: 2, batch: 30528, sum loss: 3785.329346, avg loss: 2.136190, ppl: 8.467120 +epoch: 2, batch: 30529, sum loss: 3232.011230, avg loss: 2.122135, ppl: 8.348941 +epoch: 2, batch: 30530, sum loss: 4590.457031, avg loss: 2.580358, ppl: 13.201864 +epoch: 2, batch: 30531, sum loss: 3929.873535, avg loss: 2.364545, ppl: 10.639195 +epoch: 2, batch: 30532, sum loss: 3781.892334, avg loss: 2.357788, ppl: 10.567554 +epoch: 2, batch: 30533, sum loss: 4259.945312, avg loss: 2.453886, ppl: 11.633461 +epoch: 2, batch: 30534, sum loss: 4783.813965, avg loss: 2.412412, ppl: 11.160853 +epoch: 2, batch: 30535, sum loss: 4601.899902, avg loss: 2.689597, ppl: 14.725739 +epoch: 2, batch: 30536, sum loss: 3906.954346, avg loss: 2.339494, ppl: 10.375982 +epoch: 2, batch: 30537, sum loss: 3830.875488, avg loss: 2.360367, ppl: 10.594837 +epoch: 2, batch: 30538, sum loss: 3939.376953, avg loss: 2.568042, ppl: 13.040270 +epoch: 2, batch: 30539, sum loss: 3927.580566, avg loss: 2.310342, ppl: 10.077867 +epoch: 2, batch: 30540, sum loss: 4084.273926, avg loss: 2.584984, ppl: 13.263071 +epoch: 2, batch: 30541, sum loss: 3963.594971, avg loss: 2.224240, ppl: 9.246449 +epoch: 2, batch: 30542, sum loss: 3968.124023, avg loss: 2.578378, ppl: 13.175753 +epoch: 2, batch: 30543, sum loss: 4375.091797, avg loss: 2.460682, ppl: 11.712793 +epoch: 2, batch: 30544, sum loss: 4462.312988, avg loss: 2.570457, ppl: 13.071794 +epoch: 2, batch: 30545, sum loss: 4497.314453, avg loss: 2.561113, ppl: 12.950225 +epoch: 2, batch: 30546, sum loss: 4420.879883, avg loss: 2.406576, ppl: 11.095902 +epoch: 2, batch: 30547, sum loss: 4836.056641, avg loss: 2.522722, ppl: 12.462467 +epoch: 2, batch: 30548, sum loss: 3270.404785, avg loss: 2.200811, ppl: 9.032332 +epoch: 2, batch: 30549, sum loss: 3855.512207, avg loss: 2.322598, ppl: 10.202143 +epoch: 2, batch: 30550, sum loss: 4884.455566, avg loss: 2.569414, ppl: 13.058166 +epoch: 2, batch: 30551, sum loss: 4073.174316, avg loss: 2.275516, ppl: 9.732943 +epoch: 2, batch: 30552, sum loss: 5010.287109, avg loss: 2.598697, ppl: 13.446202 +epoch: 2, batch: 30553, sum loss: 3850.761963, avg loss: 2.396243, ppl: 10.981838 +epoch: 2, batch: 30554, sum loss: 4458.431152, avg loss: 2.463222, ppl: 11.742582 +epoch: 2, batch: 30555, sum loss: 3747.242920, avg loss: 2.233160, ppl: 9.329303 +epoch: 2, batch: 30556, sum loss: 4547.437988, avg loss: 2.569174, ppl: 13.055037 +epoch: 2, batch: 30557, sum loss: 4181.619141, avg loss: 2.658372, ppl: 14.273032 +epoch: 2, batch: 30558, sum loss: 4264.643066, avg loss: 2.476564, ppl: 11.900304 +epoch: 2, batch: 30559, sum loss: 3725.189697, avg loss: 2.245443, ppl: 9.444597 +epoch: 2, batch: 30560, sum loss: 3600.074951, avg loss: 2.170027, ppl: 8.758521 +epoch: 2, batch: 30561, sum loss: 4912.034668, avg loss: 2.678318, ppl: 14.560575 +epoch: 2, batch: 30562, sum loss: 4412.812988, avg loss: 2.447484, ppl: 11.559222 +epoch: 2, batch: 30563, sum loss: 4854.995117, avg loss: 2.947781, ppl: 19.063602 +epoch: 2, batch: 30564, sum loss: 3246.020264, avg loss: 2.260460, ppl: 9.587495 +epoch: 2, batch: 30565, sum loss: 3717.325439, avg loss: 2.288993, ppl: 9.865003 +epoch: 2, batch: 30566, sum loss: 3881.947021, avg loss: 2.435349, ppl: 11.419808 +epoch: 2, batch: 30567, sum loss: 4564.776855, avg loss: 2.611428, ppl: 13.618491 +epoch: 2, batch: 30568, sum loss: 3551.558838, avg loss: 2.295772, ppl: 9.932099 +epoch: 2, batch: 30569, sum loss: 4412.214844, avg loss: 2.407100, ppl: 11.101725 +epoch: 2, batch: 30570, sum loss: 4301.233398, avg loss: 2.490581, ppl: 12.068286 +epoch: 2, batch: 30571, sum loss: 3841.159424, avg loss: 2.201237, ppl: 9.036188 +epoch: 2, batch: 30572, sum loss: 4074.348145, avg loss: 2.445587, ppl: 11.537322 +epoch: 2, batch: 30573, sum loss: 3875.671631, avg loss: 2.423810, ppl: 11.288782 +epoch: 2, batch: 30574, sum loss: 4292.457031, avg loss: 2.511678, ppl: 12.325592 +epoch: 2, batch: 30575, sum loss: 4709.840332, avg loss: 2.884164, ppl: 17.888613 +epoch: 2, batch: 30576, sum loss: 4264.544922, avg loss: 2.326539, ppl: 10.242427 +epoch: 2, batch: 30577, sum loss: 4307.049805, avg loss: 2.471056, ppl: 11.834932 +epoch: 2, batch: 30578, sum loss: 4475.946777, avg loss: 2.471534, ppl: 11.840590 +epoch: 2, batch: 30579, sum loss: 3803.317383, avg loss: 2.402601, ppl: 11.051885 +epoch: 2, batch: 30580, sum loss: 4385.863281, avg loss: 2.316885, ppl: 10.144027 +epoch: 2, batch: 30581, sum loss: 4101.376465, avg loss: 2.561759, ppl: 12.958594 +epoch: 2, batch: 30582, sum loss: 4680.325195, avg loss: 2.286432, ppl: 9.839763 +epoch: 2, batch: 30583, sum loss: 3945.524902, avg loss: 2.462874, ppl: 11.738504 +epoch: 2, batch: 30584, sum loss: 3778.592529, avg loss: 2.251843, ppl: 9.505238 +epoch: 2, batch: 30585, sum loss: 5173.458496, avg loss: 2.615499, ppl: 13.674045 +epoch: 2, batch: 30586, sum loss: 3351.739502, avg loss: 2.186392, ppl: 8.903036 +epoch: 2, batch: 30587, sum loss: 4754.948730, avg loss: 2.729592, ppl: 15.326627 +epoch: 2, batch: 30588, sum loss: 4235.316406, avg loss: 2.436891, ppl: 11.437425 +epoch: 2, batch: 30589, sum loss: 4577.260742, avg loss: 2.400242, ppl: 11.025840 +epoch: 2, batch: 30590, sum loss: 3947.355225, avg loss: 2.337096, ppl: 10.351135 +epoch: 2, batch: 30591, sum loss: 3921.150635, avg loss: 2.631645, ppl: 13.896607 +epoch: 2, batch: 30592, sum loss: 4781.525391, avg loss: 2.558333, ppl: 12.914276 +epoch: 2, batch: 30593, sum loss: 3472.443848, avg loss: 2.248992, ppl: 9.478179 +epoch: 2, batch: 30594, sum loss: 4152.373047, avg loss: 2.438270, ppl: 11.453205 +epoch: 2, batch: 30595, sum loss: 4278.026367, avg loss: 2.485779, ppl: 12.010476 +epoch: 2, batch: 30596, sum loss: 4760.595703, avg loss: 2.361407, ppl: 10.605862 +epoch: 2, batch: 30597, sum loss: 4115.979492, avg loss: 2.330679, ppl: 10.284925 +epoch: 2, batch: 30598, sum loss: 4169.215820, avg loss: 2.560943, ppl: 12.948027 +epoch: 2, batch: 30599, sum loss: 4487.173340, avg loss: 2.691766, ppl: 14.757709 +epoch: 2, batch: 30600, sum loss: 3366.802246, avg loss: 2.362668, ppl: 10.619246 +epoch: 2, batch: 30601, sum loss: 4603.352051, avg loss: 2.600764, ppl: 13.474029 +epoch: 2, batch: 30602, sum loss: 3567.306641, avg loss: 2.405467, ppl: 11.083600 +epoch: 2, batch: 30603, sum loss: 4579.913086, avg loss: 2.583143, ppl: 13.238688 +epoch: 2, batch: 30604, sum loss: 4654.511719, avg loss: 2.670403, ppl: 14.445783 +epoch: 2, batch: 30605, sum loss: 4153.815430, avg loss: 2.518991, ppl: 12.416057 +epoch: 2, batch: 30606, sum loss: 5105.391602, avg loss: 2.497745, ppl: 12.155057 +epoch: 2, batch: 30607, sum loss: 4998.810059, avg loss: 2.722663, ppl: 15.220807 +epoch: 2, batch: 30608, sum loss: 4440.152832, avg loss: 2.661962, ppl: 14.324369 +epoch: 2, batch: 30609, sum loss: 4052.396484, avg loss: 2.531166, ppl: 12.568150 +epoch: 2, batch: 30610, sum loss: 3562.055420, avg loss: 2.456590, ppl: 11.664966 +epoch: 2, batch: 30611, sum loss: 4179.430664, avg loss: 2.507157, ppl: 12.269995 +epoch: 2, batch: 30612, sum loss: 2950.303955, avg loss: 2.223289, ppl: 9.237659 +epoch: 2, batch: 30613, sum loss: 3610.833740, avg loss: 2.319097, ppl: 10.166488 +epoch: 2, batch: 30614, sum loss: 3815.144531, avg loss: 2.430028, ppl: 11.359205 +epoch: 2, batch: 30615, sum loss: 4596.268555, avg loss: 2.521266, ppl: 12.444347 +epoch: 2, batch: 30616, sum loss: 4545.836426, avg loss: 2.562478, ppl: 12.967913 +epoch: 2, batch: 30617, sum loss: 4414.758301, avg loss: 2.630965, ppl: 13.887157 +epoch: 2, batch: 30618, sum loss: 4230.789062, avg loss: 2.445543, ppl: 11.536810 +epoch: 2, batch: 30619, sum loss: 3664.620361, avg loss: 2.412522, ppl: 11.162072 +epoch: 2, batch: 30620, sum loss: 4346.038574, avg loss: 2.398476, ppl: 11.006392 +epoch: 2, batch: 30621, sum loss: 3869.487061, avg loss: 2.221290, ppl: 9.219214 +epoch: 2, batch: 30622, sum loss: 4074.265625, avg loss: 2.635360, ppl: 13.948326 +epoch: 2, batch: 30623, sum loss: 3836.569336, avg loss: 2.480006, ppl: 11.941339 +epoch: 2, batch: 30624, sum loss: 4847.104980, avg loss: 2.675003, ppl: 14.512391 +epoch: 2, batch: 30625, sum loss: 4425.291504, avg loss: 2.729976, ppl: 15.332521 +epoch: 2, batch: 30626, sum loss: 3944.277344, avg loss: 2.413878, ppl: 11.177227 +epoch: 2, batch: 30627, sum loss: 4439.547852, avg loss: 2.554400, ppl: 12.863585 +epoch: 2, batch: 30628, sum loss: 4762.866211, avg loss: 2.677272, ppl: 14.545354 +epoch: 2, batch: 30629, sum loss: 3912.701660, avg loss: 2.482679, ppl: 11.973294 +epoch: 2, batch: 30630, sum loss: 4550.298340, avg loss: 2.554912, ppl: 12.870168 +epoch: 2, batch: 30631, sum loss: 4522.379883, avg loss: 2.468548, ppl: 11.805294 +epoch: 2, batch: 30632, sum loss: 3900.358887, avg loss: 2.324409, ppl: 10.220640 +epoch: 2, batch: 30633, sum loss: 3957.318359, avg loss: 2.517378, ppl: 12.396052 +epoch: 2, batch: 30634, sum loss: 4143.375977, avg loss: 2.467764, ppl: 11.796043 +epoch: 2, batch: 30635, sum loss: 4925.200195, avg loss: 2.555890, ppl: 12.882761 +epoch: 2, batch: 30636, sum loss: 3741.081543, avg loss: 2.351403, ppl: 10.500286 +epoch: 2, batch: 30637, sum loss: 3625.510742, avg loss: 2.240736, ppl: 9.400248 +epoch: 2, batch: 30638, sum loss: 3237.054199, avg loss: 2.193126, ppl: 8.963190 +epoch: 2, batch: 30639, sum loss: 4116.935059, avg loss: 2.587640, ppl: 13.298347 +epoch: 2, batch: 30640, sum loss: 3875.657959, avg loss: 2.357456, ppl: 10.564045 +epoch: 2, batch: 30641, sum loss: 3959.627686, avg loss: 2.559553, ppl: 12.930032 +epoch: 2, batch: 30642, sum loss: 3757.808838, avg loss: 2.206582, ppl: 9.084611 +epoch: 2, batch: 30643, sum loss: 4968.091797, avg loss: 2.843785, ppl: 17.180664 +epoch: 2, batch: 30644, sum loss: 4973.097168, avg loss: 2.764368, ppl: 15.869001 +epoch: 2, batch: 30645, sum loss: 4467.365723, avg loss: 2.563033, ppl: 12.975106 +epoch: 2, batch: 30646, sum loss: 4126.066895, avg loss: 2.442905, ppl: 11.506420 +epoch: 2, batch: 30647, sum loss: 4514.169434, avg loss: 2.421765, ppl: 11.265721 +epoch: 2, batch: 30648, sum loss: 4181.947266, avg loss: 2.473062, ppl: 11.858701 +epoch: 2, batch: 30649, sum loss: 5245.172852, avg loss: 2.798918, ppl: 16.426867 +epoch: 2, batch: 30650, sum loss: 4133.860840, avg loss: 2.519111, ppl: 12.417548 +epoch: 2, batch: 30651, sum loss: 3545.826660, avg loss: 2.373378, ppl: 10.733589 +epoch: 2, batch: 30652, sum loss: 4314.603027, avg loss: 2.418499, ppl: 11.228997 +epoch: 2, batch: 30653, sum loss: 3880.576172, avg loss: 2.536324, ppl: 12.633149 +epoch: 2, batch: 30654, sum loss: 4861.413086, avg loss: 2.513657, ppl: 12.350013 +epoch: 2, batch: 30655, sum loss: 5202.718262, avg loss: 2.650392, ppl: 14.159582 +epoch: 2, batch: 30656, sum loss: 5103.965332, avg loss: 2.792104, ppl: 16.315304 +epoch: 2, batch: 30657, sum loss: 4256.639160, avg loss: 2.153080, ppl: 8.611341 +epoch: 2, batch: 30658, sum loss: 3889.000244, avg loss: 2.371342, ppl: 10.711755 +epoch: 2, batch: 30659, sum loss: 3648.808838, avg loss: 2.263529, ppl: 9.616968 +epoch: 2, batch: 30660, sum loss: 4279.512695, avg loss: 2.517360, ppl: 12.395834 +epoch: 2, batch: 30661, sum loss: 4218.843750, avg loss: 2.416291, ppl: 11.204226 +epoch: 2, batch: 30662, sum loss: 4175.917480, avg loss: 2.393076, ppl: 10.947115 +epoch: 2, batch: 30663, sum loss: 4273.987305, avg loss: 2.669574, ppl: 14.433816 +epoch: 2, batch: 30664, sum loss: 4547.339844, avg loss: 2.454042, ppl: 11.635282 +epoch: 2, batch: 30665, sum loss: 4461.967773, avg loss: 2.459740, ppl: 11.701765 +epoch: 2, batch: 30666, sum loss: 3971.936279, avg loss: 2.430806, ppl: 11.368037 +epoch: 2, batch: 30667, sum loss: 4655.372559, avg loss: 2.615378, ppl: 13.672382 +epoch: 2, batch: 30668, sum loss: 6174.805664, avg loss: 2.996024, ppl: 20.005838 +epoch: 2, batch: 30669, sum loss: 3819.009033, avg loss: 2.211354, ppl: 9.128072 +epoch: 2, batch: 30670, sum loss: 5196.632812, avg loss: 2.775979, ppl: 16.054340 +epoch: 2, batch: 30671, sum loss: 4396.108398, avg loss: 2.582907, ppl: 13.235564 +epoch: 2, batch: 30672, sum loss: 4232.798340, avg loss: 2.421510, ppl: 11.262848 +epoch: 2, batch: 30673, sum loss: 3060.187988, avg loss: 2.158102, ppl: 8.654692 +epoch: 2, batch: 30674, sum loss: 4036.200684, avg loss: 2.441743, ppl: 11.493052 +epoch: 2, batch: 30675, sum loss: 4244.943848, avg loss: 2.833741, ppl: 17.008968 +epoch: 2, batch: 30676, sum loss: 4288.775391, avg loss: 2.440965, ppl: 11.484117 +epoch: 2, batch: 30677, sum loss: 5076.928711, avg loss: 2.575814, ppl: 13.142004 +epoch: 2, batch: 30678, sum loss: 4566.130859, avg loss: 2.461526, ppl: 11.722689 +epoch: 2, batch: 30679, sum loss: 4594.314453, avg loss: 2.529909, ppl: 12.552360 +epoch: 2, batch: 30680, sum loss: 3880.105957, avg loss: 2.495245, ppl: 12.124706 +epoch: 2, batch: 30681, sum loss: 4832.049805, avg loss: 2.470373, ppl: 11.826859 +epoch: 2, batch: 30682, sum loss: 4176.461426, avg loss: 2.440948, ppl: 11.483920 +epoch: 2, batch: 30683, sum loss: 4673.345215, avg loss: 2.369850, ppl: 10.695792 +epoch: 2, batch: 30684, sum loss: 3730.850098, avg loss: 2.154070, ppl: 8.619873 +epoch: 2, batch: 30685, sum loss: 4865.513184, avg loss: 2.683681, ppl: 14.638880 +epoch: 2, batch: 30686, sum loss: 3977.187500, avg loss: 2.528409, ppl: 12.533549 +epoch: 2, batch: 30687, sum loss: 4233.476562, avg loss: 2.419129, ppl: 11.236073 +epoch: 2, batch: 30688, sum loss: 4272.953125, avg loss: 2.709545, ppl: 15.022445 +epoch: 2, batch: 30689, sum loss: 5759.881836, avg loss: 2.777185, ppl: 16.073709 +epoch: 2, batch: 30690, sum loss: 3282.731689, avg loss: 2.305289, ppl: 10.027076 +epoch: 2, batch: 30691, sum loss: 4304.069336, avg loss: 2.495113, ppl: 12.123102 +epoch: 2, batch: 30692, sum loss: 4051.570068, avg loss: 2.482580, ppl: 11.972109 +epoch: 2, batch: 30693, sum loss: 4223.556641, avg loss: 2.573770, ppl: 13.115176 +epoch: 2, batch: 30694, sum loss: 3863.672119, avg loss: 2.310809, ppl: 10.082577 +epoch: 2, batch: 30695, sum loss: 4669.605469, avg loss: 2.796171, ppl: 16.381800 +epoch: 2, batch: 30696, sum loss: 4592.099609, avg loss: 2.616581, ppl: 13.688841 +epoch: 2, batch: 30697, sum loss: 4943.086426, avg loss: 2.758419, ppl: 15.774880 +epoch: 2, batch: 30698, sum loss: 4520.987305, avg loss: 2.450400, ppl: 11.592978 +epoch: 2, batch: 30699, sum loss: 4913.115723, avg loss: 2.807495, ppl: 16.568356 +epoch: 2, batch: 30700, sum loss: 4037.438477, avg loss: 2.373568, ppl: 10.735624 +epoch: 2, batch: 30701, sum loss: 4753.092773, avg loss: 2.504264, ppl: 12.234552 +epoch: 2, batch: 30702, sum loss: 4405.162109, avg loss: 2.505780, ppl: 12.253119 +epoch: 2, batch: 30703, sum loss: 4502.974609, avg loss: 2.636402, ppl: 13.962873 +epoch: 2, batch: 30704, sum loss: 3625.151123, avg loss: 2.331287, ppl: 10.291177 +epoch: 2, batch: 30705, sum loss: 4451.817383, avg loss: 2.419466, ppl: 11.239853 +epoch: 2, batch: 30706, sum loss: 5067.675781, avg loss: 2.880998, ppl: 17.832067 +epoch: 2, batch: 30707, sum loss: 4409.330078, avg loss: 2.466068, ppl: 11.776055 +epoch: 2, batch: 30708, sum loss: 4545.121094, avg loss: 2.433148, ppl: 11.394701 +epoch: 2, batch: 30709, sum loss: 4371.737305, avg loss: 2.570098, ppl: 13.067107 +epoch: 2, batch: 30710, sum loss: 4310.375000, avg loss: 2.166018, ppl: 8.723474 +epoch: 2, batch: 30711, sum loss: 3371.641846, avg loss: 2.338171, ppl: 10.362262 +epoch: 2, batch: 30712, sum loss: 3942.099365, avg loss: 2.499746, ppl: 12.179401 +epoch: 2, batch: 30713, sum loss: 4655.553711, avg loss: 2.724139, ppl: 15.243287 +epoch: 2, batch: 30714, sum loss: 4542.123535, avg loss: 2.673410, ppl: 14.489296 +epoch: 2, batch: 30715, sum loss: 3499.592041, avg loss: 2.195478, ppl: 8.984294 +epoch: 2, batch: 30716, sum loss: 4027.360840, avg loss: 2.373224, ppl: 10.731934 +epoch: 2, batch: 30717, sum loss: 4101.373535, avg loss: 2.405498, ppl: 11.083946 +epoch: 2, batch: 30718, sum loss: 4183.907227, avg loss: 2.517393, ppl: 12.396239 +epoch: 2, batch: 30719, sum loss: 3960.640625, avg loss: 2.476948, ppl: 11.904881 +epoch: 2, batch: 30720, sum loss: 3552.243164, avg loss: 2.116951, ppl: 8.305771 +epoch: 2, batch: 30721, sum loss: 4071.491455, avg loss: 2.409167, ppl: 11.124685 +epoch: 2, batch: 30722, sum loss: 4263.689453, avg loss: 2.688329, ppl: 14.707076 +epoch: 2, batch: 30723, sum loss: 4922.536133, avg loss: 2.629560, ppl: 13.867666 +epoch: 2, batch: 30724, sum loss: 3843.801270, avg loss: 2.292070, ppl: 9.895399 +epoch: 2, batch: 30725, sum loss: 4576.553223, avg loss: 2.402390, ppl: 11.049556 +epoch: 2, batch: 30726, sum loss: 3786.620117, avg loss: 2.521052, ppl: 12.441677 +epoch: 2, batch: 30727, sum loss: 4042.461426, avg loss: 2.472453, ppl: 11.851487 +epoch: 2, batch: 30728, sum loss: 3543.590088, avg loss: 2.538388, ppl: 12.659254 +epoch: 2, batch: 30729, sum loss: 3830.312988, avg loss: 2.263778, ppl: 9.619367 +epoch: 2, batch: 30730, sum loss: 3061.296143, avg loss: 2.042226, ppl: 7.707745 +epoch: 2, batch: 30731, sum loss: 4083.249023, avg loss: 2.393464, ppl: 10.951360 +epoch: 2, batch: 30732, sum loss: 3021.942627, avg loss: 2.191401, ppl: 8.947744 +epoch: 2, batch: 30733, sum loss: 5301.370117, avg loss: 2.693786, ppl: 14.787555 +epoch: 2, batch: 30734, sum loss: 4138.840820, avg loss: 2.441794, ppl: 11.493641 +epoch: 2, batch: 30735, sum loss: 4922.711426, avg loss: 2.633874, ppl: 13.927627 +epoch: 2, batch: 30736, sum loss: 4018.626953, avg loss: 2.439968, ppl: 11.472670 +epoch: 2, batch: 30737, sum loss: 4710.173828, avg loss: 2.490838, ppl: 12.071383 +epoch: 2, batch: 30738, sum loss: 4049.243164, avg loss: 2.452600, ppl: 11.618521 +epoch: 2, batch: 30739, sum loss: 4694.912109, avg loss: 2.503953, ppl: 12.230749 +epoch: 2, batch: 30740, sum loss: 3486.572754, avg loss: 2.194193, ppl: 8.972758 +epoch: 2, batch: 30741, sum loss: 3700.699219, avg loss: 2.251034, ppl: 9.497547 +epoch: 2, batch: 30742, sum loss: 4222.849121, avg loss: 2.426925, ppl: 11.324004 +epoch: 2, batch: 30743, sum loss: 4758.088379, avg loss: 2.601470, ppl: 13.483544 +epoch: 2, batch: 30744, sum loss: 4102.278320, avg loss: 2.484724, ppl: 11.997804 +epoch: 2, batch: 30745, sum loss: 4598.379395, avg loss: 2.725773, ppl: 15.268217 +epoch: 2, batch: 30746, sum loss: 3982.440918, avg loss: 2.390421, ppl: 10.918086 +epoch: 2, batch: 30747, sum loss: 4055.383301, avg loss: 2.321341, ppl: 10.189332 +epoch: 2, batch: 30748, sum loss: 4124.414062, avg loss: 2.300287, ppl: 9.977043 +epoch: 2, batch: 30749, sum loss: 4358.155273, avg loss: 2.389339, ppl: 10.906287 +epoch: 2, batch: 30750, sum loss: 5138.313965, avg loss: 2.865764, ppl: 17.562460 +epoch: 2, batch: 30751, sum loss: 4257.260742, avg loss: 2.427172, ppl: 11.326809 +epoch: 2, batch: 30752, sum loss: 4177.840332, avg loss: 2.645877, ppl: 14.095807 +epoch: 2, batch: 30753, sum loss: 3788.007812, avg loss: 2.319662, ppl: 10.172234 +epoch: 2, batch: 30754, sum loss: 4273.894043, avg loss: 2.421470, ppl: 11.262400 +epoch: 2, batch: 30755, sum loss: 4791.939453, avg loss: 2.659234, ppl: 14.285343 +epoch: 2, batch: 30756, sum loss: 5032.184082, avg loss: 2.541507, ppl: 12.698793 +epoch: 2, batch: 30757, sum loss: 3723.649170, avg loss: 2.629696, ppl: 13.869548 +epoch: 2, batch: 30758, sum loss: 3845.029297, avg loss: 2.504905, ppl: 12.242395 +epoch: 2, batch: 30759, sum loss: 4300.484375, avg loss: 2.329623, ppl: 10.274070 +epoch: 2, batch: 30760, sum loss: 4271.146484, avg loss: 2.453272, ppl: 11.626328 +epoch: 2, batch: 30761, sum loss: 4177.938477, avg loss: 2.331439, ppl: 10.292743 +epoch: 2, batch: 30762, sum loss: 4271.805664, avg loss: 2.557967, ppl: 12.909551 +epoch: 2, batch: 30763, sum loss: 4192.104004, avg loss: 2.379174, ppl: 10.795979 +epoch: 2, batch: 30764, sum loss: 4886.124023, avg loss: 2.581154, ppl: 13.212372 +epoch: 2, batch: 30765, sum loss: 3529.739258, avg loss: 2.304007, ppl: 10.014232 +epoch: 2, batch: 30766, sum loss: 4619.946289, avg loss: 2.599857, ppl: 13.461814 +epoch: 2, batch: 30767, sum loss: 3810.445312, avg loss: 2.337697, ppl: 10.357351 +epoch: 2, batch: 30768, sum loss: 3893.451416, avg loss: 2.611302, ppl: 13.616770 +epoch: 2, batch: 30769, sum loss: 4377.235840, avg loss: 2.624242, ppl: 13.794115 +epoch: 2, batch: 30770, sum loss: 4258.570312, avg loss: 2.554631, ppl: 12.866554 +epoch: 2, batch: 30771, sum loss: 4809.951660, avg loss: 2.591569, ppl: 13.350698 +epoch: 2, batch: 30772, sum loss: 4760.778809, avg loss: 2.725116, ppl: 15.258191 +epoch: 2, batch: 30773, sum loss: 3908.426758, avg loss: 2.244932, ppl: 9.439775 +epoch: 2, batch: 30774, sum loss: 4977.337891, avg loss: 2.496157, ppl: 12.135772 +epoch: 2, batch: 30775, sum loss: 4217.065430, avg loss: 2.426390, ppl: 11.317947 +epoch: 2, batch: 30776, sum loss: 5429.992188, avg loss: 2.753546, ppl: 15.698195 +epoch: 2, batch: 30777, sum loss: 4268.763184, avg loss: 2.464644, ppl: 11.759295 +epoch: 2, batch: 30778, sum loss: 3796.401123, avg loss: 2.441416, ppl: 11.489293 +epoch: 2, batch: 30779, sum loss: 4141.050293, avg loss: 2.246907, ppl: 9.458440 +epoch: 2, batch: 30780, sum loss: 3103.082520, avg loss: 2.163935, ppl: 8.705323 +epoch: 2, batch: 30781, sum loss: 4649.709961, avg loss: 2.591812, ppl: 13.353943 +epoch: 2, batch: 30782, sum loss: 4589.111328, avg loss: 2.605969, ppl: 13.544343 +epoch: 2, batch: 30783, sum loss: 4056.111328, avg loss: 2.557447, ppl: 12.902837 +epoch: 2, batch: 30784, sum loss: 4464.072754, avg loss: 2.420864, ppl: 11.255578 +epoch: 2, batch: 30785, sum loss: 4390.870605, avg loss: 2.539543, ppl: 12.673882 +epoch: 2, batch: 30786, sum loss: 4330.184570, avg loss: 2.648431, ppl: 14.131845 +epoch: 2, batch: 30787, sum loss: 3968.665039, avg loss: 2.390762, ppl: 10.921814 +epoch: 2, batch: 30788, sum loss: 4586.712402, avg loss: 2.553849, ppl: 12.856490 +epoch: 2, batch: 30789, sum loss: 3803.253418, avg loss: 2.472857, ppl: 11.856266 +epoch: 2, batch: 30790, sum loss: 3585.090820, avg loss: 2.358613, ppl: 10.576267 +epoch: 2, batch: 30791, sum loss: 5779.900391, avg loss: 2.999429, ppl: 20.074081 +epoch: 2, batch: 30792, sum loss: 4309.117188, avg loss: 2.517008, ppl: 12.391460 +epoch: 2, batch: 30793, sum loss: 4408.276367, avg loss: 2.676549, ppl: 14.534846 +epoch: 2, batch: 30794, sum loss: 2787.434570, avg loss: 2.311306, ppl: 10.087585 +epoch: 2, batch: 30795, sum loss: 3560.618164, avg loss: 2.250707, ppl: 9.494443 +epoch: 2, batch: 30796, sum loss: 4173.918457, avg loss: 2.436613, ppl: 11.434248 +epoch: 2, batch: 30797, sum loss: 4178.041504, avg loss: 2.386089, ppl: 10.870893 +epoch: 2, batch: 30798, sum loss: 4233.776367, avg loss: 2.624784, ppl: 13.801593 +epoch: 2, batch: 30799, sum loss: 3503.574219, avg loss: 2.264754, ppl: 9.628754 +epoch: 2, batch: 30800, sum loss: 5100.067871, avg loss: 2.718586, ppl: 15.158879 +epoch: 2, batch: 30801, sum loss: 4074.195557, avg loss: 2.703514, ppl: 14.932113 +epoch: 2, batch: 30802, sum loss: 3251.113281, avg loss: 2.240602, ppl: 9.398986 +epoch: 2, batch: 30803, sum loss: 3841.178955, avg loss: 2.560786, ppl: 12.945989 +epoch: 2, batch: 30804, sum loss: 4670.535156, avg loss: 2.773477, ppl: 16.014219 +epoch: 2, batch: 30805, sum loss: 4497.975586, avg loss: 2.505836, ppl: 12.253799 +epoch: 2, batch: 30806, sum loss: 4104.603027, avg loss: 2.365766, ppl: 10.652191 +epoch: 2, batch: 30807, sum loss: 4117.706543, avg loss: 2.467170, ppl: 11.789037 +epoch: 2, batch: 30808, sum loss: 3916.392334, avg loss: 2.443164, ppl: 11.509400 +epoch: 2, batch: 30809, sum loss: 3652.137695, avg loss: 2.305642, ppl: 10.030620 +epoch: 2, batch: 30810, sum loss: 4038.668945, avg loss: 2.612334, ppl: 13.630835 +epoch: 2, batch: 30811, sum loss: 4461.752930, avg loss: 2.467784, ppl: 11.796273 +epoch: 2, batch: 30812, sum loss: 4128.400879, avg loss: 2.441396, ppl: 11.489071 +epoch: 2, batch: 30813, sum loss: 3588.126709, avg loss: 2.230035, ppl: 9.300195 +epoch: 2, batch: 30814, sum loss: 3823.428711, avg loss: 2.303270, ppl: 10.006855 +epoch: 2, batch: 30815, sum loss: 4621.198730, avg loss: 2.530777, ppl: 12.563264 +epoch: 2, batch: 30816, sum loss: 3744.367920, avg loss: 2.322809, ppl: 10.204298 +epoch: 2, batch: 30817, sum loss: 4078.652588, avg loss: 2.468918, ppl: 11.809663 +epoch: 2, batch: 30818, sum loss: 5250.712891, avg loss: 2.688537, ppl: 14.710141 +epoch: 2, batch: 30819, sum loss: 4517.726562, avg loss: 2.535200, ppl: 12.618956 +epoch: 2, batch: 30820, sum loss: 4033.612305, avg loss: 2.368533, ppl: 10.681715 +epoch: 2, batch: 30821, sum loss: 4003.502686, avg loss: 2.679721, ppl: 14.581019 +epoch: 2, batch: 30822, sum loss: 4620.741699, avg loss: 2.530527, ppl: 12.560122 +epoch: 2, batch: 30823, sum loss: 4407.462891, avg loss: 2.585022, ppl: 13.263584 +epoch: 2, batch: 30824, sum loss: 4618.858887, avg loss: 2.560343, ppl: 12.940255 +epoch: 2, batch: 30825, sum loss: 4670.938477, avg loss: 2.729947, ppl: 15.332068 +epoch: 2, batch: 30826, sum loss: 4172.788086, avg loss: 2.333774, ppl: 10.316805 +epoch: 2, batch: 30827, sum loss: 4850.972168, avg loss: 2.723735, ppl: 15.237128 +epoch: 2, batch: 30828, sum loss: 4064.472168, avg loss: 2.565955, ppl: 13.013076 +epoch: 2, batch: 30829, sum loss: 3370.997559, avg loss: 2.191806, ppl: 8.951364 +epoch: 2, batch: 30830, sum loss: 3256.820557, avg loss: 2.337990, ppl: 10.360394 +epoch: 2, batch: 30831, sum loss: 4367.589844, avg loss: 2.514444, ppl: 12.359739 +epoch: 2, batch: 30832, sum loss: 3128.055908, avg loss: 2.202856, ppl: 9.050829 +epoch: 2, batch: 30833, sum loss: 3699.036133, avg loss: 2.338202, ppl: 10.362591 +epoch: 2, batch: 30834, sum loss: 4699.296387, avg loss: 2.597732, ppl: 13.433234 +epoch: 2, batch: 30835, sum loss: 4494.233887, avg loss: 2.487124, ppl: 12.026640 +epoch: 2, batch: 30836, sum loss: 4250.741211, avg loss: 2.441551, ppl: 11.490854 +epoch: 2, batch: 30837, sum loss: 4667.735840, avg loss: 2.665755, ppl: 14.378795 +epoch: 2, batch: 30838, sum loss: 3752.734375, avg loss: 2.475419, ppl: 11.886682 +epoch: 2, batch: 30839, sum loss: 4029.739990, avg loss: 2.434888, ppl: 11.414544 +epoch: 2, batch: 30840, sum loss: 4180.603516, avg loss: 2.356597, ppl: 10.554974 +epoch: 2, batch: 30841, sum loss: 3938.667969, avg loss: 2.431277, ppl: 11.373392 +epoch: 2, batch: 30842, sum loss: 4743.034180, avg loss: 2.733737, ppl: 15.390297 +epoch: 2, batch: 30843, sum loss: 4446.894531, avg loss: 2.625085, ppl: 13.805753 +epoch: 2, batch: 30844, sum loss: 5265.737793, avg loss: 2.674321, ppl: 14.502499 +epoch: 2, batch: 30845, sum loss: 4059.690918, avg loss: 2.577581, ppl: 13.165258 +epoch: 2, batch: 30846, sum loss: 4089.955566, avg loss: 2.660999, ppl: 14.310579 +epoch: 2, batch: 30847, sum loss: 3400.250244, avg loss: 2.288190, ppl: 9.857080 +epoch: 2, batch: 30848, sum loss: 4088.512695, avg loss: 2.285362, ppl: 9.829244 +epoch: 2, batch: 30849, sum loss: 3595.842285, avg loss: 2.479891, ppl: 11.939966 +epoch: 2, batch: 30850, sum loss: 3948.239258, avg loss: 2.592409, ppl: 13.361927 +epoch: 2, batch: 30851, sum loss: 4333.477051, avg loss: 2.515077, ppl: 12.367557 +epoch: 2, batch: 30852, sum loss: 4489.443848, avg loss: 2.640850, ppl: 14.025114 +epoch: 2, batch: 30853, sum loss: 4220.317383, avg loss: 2.573364, ppl: 13.109856 +epoch: 2, batch: 30854, sum loss: 3619.015381, avg loss: 2.253434, ppl: 9.520374 +epoch: 2, batch: 30855, sum loss: 3892.437744, avg loss: 2.314172, ppl: 10.116546 +epoch: 2, batch: 30856, sum loss: 4012.558105, avg loss: 2.504718, ppl: 12.240105 +epoch: 2, batch: 30857, sum loss: 3912.334473, avg loss: 2.287915, ppl: 9.854367 +epoch: 2, batch: 30858, sum loss: 4704.864258, avg loss: 2.624018, ppl: 13.791027 +epoch: 2, batch: 30859, sum loss: 3913.469482, avg loss: 2.487902, ppl: 12.035995 +epoch: 2, batch: 30860, sum loss: 5163.448730, avg loss: 2.629047, ppl: 13.860560 +epoch: 2, batch: 30861, sum loss: 4399.221680, avg loss: 2.568139, ppl: 13.041527 +epoch: 2, batch: 30862, sum loss: 4467.151367, avg loss: 2.712296, ppl: 15.063819 +epoch: 2, batch: 30863, sum loss: 4560.765137, avg loss: 2.622637, ppl: 13.771990 +epoch: 2, batch: 30864, sum loss: 5138.521973, avg loss: 2.734711, ppl: 15.405293 +epoch: 2, batch: 30865, sum loss: 4216.110352, avg loss: 2.352740, ppl: 10.514342 +epoch: 2, batch: 30866, sum loss: 3929.905762, avg loss: 2.239263, ppl: 9.386407 +epoch: 2, batch: 30867, sum loss: 4580.652832, avg loss: 2.488133, ppl: 12.038778 +epoch: 2, batch: 30868, sum loss: 5143.429688, avg loss: 2.542476, ppl: 12.711107 +epoch: 2, batch: 30869, sum loss: 3775.166992, avg loss: 2.403034, ppl: 11.056677 +epoch: 2, batch: 30870, sum loss: 4711.606934, avg loss: 2.549571, ppl: 12.801608 +epoch: 2, batch: 30871, sum loss: 4471.550293, avg loss: 2.716616, ppl: 15.129045 +epoch: 2, batch: 30872, sum loss: 4124.306641, avg loss: 2.447660, ppl: 11.561258 +epoch: 2, batch: 30873, sum loss: 3806.639404, avg loss: 2.272621, ppl: 9.704801 +epoch: 2, batch: 30874, sum loss: 4047.805908, avg loss: 2.552211, ppl: 12.835446 +epoch: 2, batch: 30875, sum loss: 3562.210938, avg loss: 2.337409, ppl: 10.354371 +epoch: 2, batch: 30876, sum loss: 4348.641602, avg loss: 2.528280, ppl: 12.531933 +epoch: 2, batch: 30877, sum loss: 3830.478760, avg loss: 2.477671, ppl: 11.913482 +epoch: 2, batch: 30878, sum loss: 3902.010254, avg loss: 2.483775, ppl: 11.986426 +epoch: 2, batch: 30879, sum loss: 3210.003418, avg loss: 2.230718, ppl: 9.306547 +epoch: 2, batch: 30880, sum loss: 3874.593506, avg loss: 2.456939, ppl: 11.669044 +epoch: 2, batch: 30881, sum loss: 4886.099121, avg loss: 2.506978, ppl: 12.267796 +epoch: 2, batch: 30882, sum loss: 4421.247559, avg loss: 2.530766, ppl: 12.563120 +epoch: 2, batch: 30883, sum loss: 4090.238281, avg loss: 2.288886, ppl: 9.863939 +epoch: 2, batch: 30884, sum loss: 4563.535645, avg loss: 2.585573, ppl: 13.270887 +epoch: 2, batch: 30885, sum loss: 4406.276367, avg loss: 2.563279, ppl: 12.978302 +epoch: 2, batch: 30886, sum loss: 4387.755859, avg loss: 2.610206, ppl: 13.601848 +epoch: 2, batch: 30887, sum loss: 4068.913086, avg loss: 2.521012, ppl: 12.441178 +epoch: 2, batch: 30888, sum loss: 5368.092773, avg loss: 2.740221, ppl: 15.490409 +epoch: 2, batch: 30889, sum loss: 5173.898438, avg loss: 2.690535, ppl: 14.739553 +epoch: 2, batch: 30890, sum loss: 3451.696289, avg loss: 2.215466, ppl: 9.165682 +epoch: 2, batch: 30891, sum loss: 3753.073242, avg loss: 2.284281, ppl: 9.818620 +epoch: 2, batch: 30892, sum loss: 3446.510254, avg loss: 2.467080, ppl: 11.787971 +epoch: 2, batch: 30893, sum loss: 3874.591553, avg loss: 2.216586, ppl: 9.175947 +epoch: 2, batch: 30894, sum loss: 4031.434570, avg loss: 2.408265, ppl: 11.114656 +epoch: 2, batch: 30895, sum loss: 4720.610352, avg loss: 2.625479, ppl: 13.811181 +epoch: 2, batch: 30896, sum loss: 2893.458984, avg loss: 2.059402, ppl: 7.841276 +epoch: 2, batch: 30897, sum loss: 5011.459961, avg loss: 2.573940, ppl: 13.117399 +epoch: 2, batch: 30898, sum loss: 4615.604980, avg loss: 2.713465, ppl: 15.081450 +epoch: 2, batch: 30899, sum loss: 4463.552734, avg loss: 2.316322, ppl: 10.138320 +epoch: 2, batch: 30900, sum loss: 3808.422607, avg loss: 2.404307, ppl: 11.070757 +epoch: 2, batch: 30901, sum loss: 3640.752197, avg loss: 2.359529, ppl: 10.585959 +epoch: 2, batch: 30902, sum loss: 3413.218506, avg loss: 2.142636, ppl: 8.521868 +epoch: 2, batch: 30903, sum loss: 3416.786377, avg loss: 2.287006, ppl: 9.845413 +epoch: 2, batch: 30904, sum loss: 3847.509521, avg loss: 2.415260, ppl: 11.192683 +epoch: 2, batch: 30905, sum loss: 4416.957031, avg loss: 2.632275, ppl: 13.905363 +epoch: 2, batch: 30906, sum loss: 4873.087402, avg loss: 2.431680, ppl: 11.377986 +epoch: 2, batch: 30907, sum loss: 3604.260742, avg loss: 2.572634, ppl: 13.100288 +epoch: 2, batch: 30908, sum loss: 4681.196289, avg loss: 2.527644, ppl: 12.523961 +epoch: 2, batch: 30909, sum loss: 4179.340820, avg loss: 2.468601, ppl: 11.805916 +epoch: 2, batch: 30910, sum loss: 4436.522949, avg loss: 2.650253, ppl: 14.157618 +epoch: 2, batch: 30911, sum loss: 4025.046143, avg loss: 2.481533, ppl: 11.959582 +epoch: 2, batch: 30912, sum loss: 4864.833008, avg loss: 2.734588, ppl: 15.403402 +epoch: 2, batch: 30913, sum loss: 3993.347168, avg loss: 2.424619, ppl: 11.297921 +epoch: 2, batch: 30914, sum loss: 3066.513428, avg loss: 2.165617, ppl: 8.719978 +epoch: 2, batch: 30915, sum loss: 4269.709473, avg loss: 2.361565, ppl: 10.607538 +epoch: 2, batch: 30916, sum loss: 5208.128906, avg loss: 2.845972, ppl: 17.218292 +epoch: 2, batch: 30917, sum loss: 4211.815918, avg loss: 2.316730, ppl: 10.142459 +epoch: 2, batch: 30918, sum loss: 4107.345215, avg loss: 2.416085, ppl: 11.201923 +epoch: 2, batch: 30919, sum loss: 4725.384277, avg loss: 2.656203, ppl: 14.242103 +epoch: 2, batch: 30920, sum loss: 3960.695312, avg loss: 2.366007, ppl: 10.654761 +epoch: 2, batch: 30921, sum loss: 3510.296631, avg loss: 2.424238, ppl: 11.293623 +epoch: 2, batch: 30922, sum loss: 4800.291992, avg loss: 2.590552, ppl: 13.337127 +epoch: 2, batch: 30923, sum loss: 4151.341309, avg loss: 2.372195, ppl: 10.720899 +epoch: 2, batch: 30924, sum loss: 3806.927246, avg loss: 2.278233, ppl: 9.759420 +epoch: 2, batch: 30925, sum loss: 4097.854492, avg loss: 2.337624, ppl: 10.356598 +epoch: 2, batch: 30926, sum loss: 5358.649414, avg loss: 2.876355, ppl: 17.749456 +epoch: 2, batch: 30927, sum loss: 4388.323730, avg loss: 2.396682, ppl: 10.986657 +epoch: 2, batch: 30928, sum loss: 4316.335938, avg loss: 2.518282, ppl: 12.407268 +epoch: 2, batch: 30929, sum loss: 4891.415527, avg loss: 2.824143, ppl: 16.846500 +epoch: 2, batch: 30930, sum loss: 5040.645020, avg loss: 2.732057, ppl: 15.364457 +epoch: 2, batch: 30931, sum loss: 4313.871582, avg loss: 2.438593, ppl: 11.456914 +epoch: 2, batch: 30932, sum loss: 3588.684326, avg loss: 2.652390, ppl: 14.187914 +epoch: 2, batch: 30933, sum loss: 3977.705811, avg loss: 2.449326, ppl: 11.580542 +epoch: 2, batch: 30934, sum loss: 4258.330566, avg loss: 2.641644, ppl: 14.036263 +epoch: 2, batch: 30935, sum loss: 3927.499023, avg loss: 2.286088, ppl: 9.836382 +epoch: 2, batch: 30936, sum loss: 4172.707520, avg loss: 2.357462, ppl: 10.564105 +epoch: 2, batch: 30937, sum loss: 3696.282715, avg loss: 2.311621, ppl: 10.090773 +epoch: 2, batch: 30938, sum loss: 4086.668457, avg loss: 2.306246, ppl: 10.036679 +epoch: 2, batch: 30939, sum loss: 4176.024414, avg loss: 2.491662, ppl: 12.081333 +epoch: 2, batch: 30940, sum loss: 4785.082520, avg loss: 2.558868, ppl: 12.921178 +epoch: 2, batch: 30941, sum loss: 4147.701172, avg loss: 2.471812, ppl: 11.843891 +epoch: 2, batch: 30942, sum loss: 3393.590332, avg loss: 2.089649, ppl: 8.082080 +epoch: 2, batch: 30943, sum loss: 4005.963379, avg loss: 2.341300, ppl: 10.394736 +epoch: 2, batch: 30944, sum loss: 3933.582520, avg loss: 2.638218, ppl: 13.988250 +epoch: 2, batch: 30945, sum loss: 3773.946289, avg loss: 2.319574, ppl: 10.171337 +epoch: 2, batch: 30946, sum loss: 3970.872070, avg loss: 2.434624, ppl: 11.411530 +epoch: 2, batch: 30947, sum loss: 4142.417480, avg loss: 2.563377, ppl: 12.979577 +epoch: 2, batch: 30948, sum loss: 4629.660645, avg loss: 2.731363, ppl: 15.353805 +epoch: 2, batch: 30949, sum loss: 4191.883789, avg loss: 2.579621, ppl: 13.192135 +epoch: 2, batch: 30950, sum loss: 3974.073486, avg loss: 2.444080, ppl: 11.519942 +epoch: 2, batch: 30951, sum loss: 3809.014648, avg loss: 2.377662, ppl: 10.779670 +epoch: 2, batch: 30952, sum loss: 4131.198730, avg loss: 2.542276, ppl: 12.708565 +epoch: 2, batch: 30953, sum loss: 4297.410156, avg loss: 2.404818, ppl: 11.076418 +epoch: 2, batch: 30954, sum loss: 4120.396484, avg loss: 2.471743, ppl: 11.843076 +epoch: 2, batch: 30955, sum loss: 3916.429199, avg loss: 2.428041, ppl: 11.336646 +epoch: 2, batch: 30956, sum loss: 3410.147461, avg loss: 2.095973, ppl: 8.133348 +epoch: 2, batch: 30957, sum loss: 4784.771484, avg loss: 2.658206, ppl: 14.270671 +epoch: 2, batch: 30958, sum loss: 4682.739746, avg loss: 2.708352, ppl: 15.004521 +epoch: 2, batch: 30959, sum loss: 3961.933105, avg loss: 2.608251, ppl: 13.575285 +epoch: 2, batch: 30960, sum loss: 4039.735840, avg loss: 2.403174, ppl: 11.058221 +epoch: 2, batch: 30961, sum loss: 4284.327637, avg loss: 2.580920, ppl: 13.209288 +epoch: 2, batch: 30962, sum loss: 4642.359375, avg loss: 2.516184, ppl: 12.381258 +epoch: 2, batch: 30963, sum loss: 4109.908203, avg loss: 2.474358, ppl: 11.874080 +epoch: 2, batch: 30964, sum loss: 4129.967285, avg loss: 2.592572, ppl: 13.364100 +epoch: 2, batch: 30965, sum loss: 4305.125000, avg loss: 2.462886, ppl: 11.738645 +epoch: 2, batch: 30966, sum loss: 4575.246094, avg loss: 2.658481, ppl: 14.274591 +epoch: 2, batch: 30967, sum loss: 4662.765625, avg loss: 2.653822, ppl: 14.208241 +epoch: 2, batch: 30968, sum loss: 3889.564453, avg loss: 2.537224, ppl: 12.644522 +epoch: 2, batch: 30969, sum loss: 4717.768555, avg loss: 2.526925, ppl: 12.514961 +epoch: 2, batch: 30970, sum loss: 5119.756836, avg loss: 2.567581, ppl: 13.034259 +epoch: 2, batch: 30971, sum loss: 4707.297852, avg loss: 2.596414, ppl: 13.415542 +epoch: 2, batch: 30972, sum loss: 4578.051270, avg loss: 2.559000, ppl: 12.922888 +epoch: 2, batch: 30973, sum loss: 4187.935547, avg loss: 2.489855, ppl: 12.059523 +epoch: 2, batch: 30974, sum loss: 3943.407959, avg loss: 2.343082, ppl: 10.413285 +epoch: 2, batch: 30975, sum loss: 3900.445312, avg loss: 2.409169, ppl: 11.124718 +epoch: 2, batch: 30976, sum loss: 4781.303711, avg loss: 2.901277, ppl: 18.197361 +epoch: 2, batch: 30977, sum loss: 3257.016113, avg loss: 2.380860, ppl: 10.814198 +epoch: 2, batch: 30978, sum loss: 4252.732422, avg loss: 2.548072, ppl: 12.782440 +epoch: 2, batch: 30979, sum loss: 4398.975098, avg loss: 2.556057, ppl: 12.884917 +epoch: 2, batch: 30980, sum loss: 4157.574219, avg loss: 2.411586, ppl: 11.151634 +epoch: 2, batch: 30981, sum loss: 4431.750488, avg loss: 2.462084, ppl: 11.729225 +epoch: 2, batch: 30982, sum loss: 4168.680176, avg loss: 2.468135, ppl: 11.800420 +epoch: 2, batch: 30983, sum loss: 4473.196777, avg loss: 2.398497, ppl: 11.006619 +epoch: 2, batch: 30984, sum loss: 4121.489746, avg loss: 2.497873, ppl: 12.156605 +epoch: 2, batch: 30985, sum loss: 4856.812988, avg loss: 2.562962, ppl: 12.974191 +epoch: 2, batch: 30986, sum loss: 4635.585938, avg loss: 2.544230, ppl: 12.733414 +epoch: 2, batch: 30987, sum loss: 3235.942871, avg loss: 2.180554, ppl: 8.851212 +epoch: 2, batch: 30988, sum loss: 4092.252441, avg loss: 2.480153, ppl: 11.943092 +epoch: 2, batch: 30989, sum loss: 3888.485107, avg loss: 2.299518, ppl: 9.969378 +epoch: 2, batch: 30990, sum loss: 4489.407715, avg loss: 2.740786, ppl: 15.499164 +epoch: 2, batch: 30991, sum loss: 3487.367188, avg loss: 2.264524, ppl: 9.626543 +epoch: 2, batch: 30992, sum loss: 3425.983643, avg loss: 2.344958, ppl: 10.432836 +epoch: 2, batch: 30993, sum loss: 4514.785645, avg loss: 2.573994, ppl: 13.118113 +epoch: 2, batch: 30994, sum loss: 4203.838867, avg loss: 2.625758, ppl: 13.815047 +epoch: 2, batch: 30995, sum loss: 4824.578613, avg loss: 2.681811, ppl: 14.611536 +epoch: 2, batch: 30996, sum loss: 4207.890625, avg loss: 2.437943, ppl: 11.449471 +epoch: 2, batch: 30997, sum loss: 4263.159668, avg loss: 2.482912, ppl: 11.976086 +epoch: 2, batch: 30998, sum loss: 4726.666504, avg loss: 2.534406, ppl: 12.608935 +epoch: 2, batch: 30999, sum loss: 4341.539062, avg loss: 2.570479, ppl: 13.072086 +epoch: 2, batch: 31000, sum loss: 4212.368164, avg loss: 2.517853, ppl: 12.401941 +epoch: 2, batch: 31001, sum loss: 4583.960449, avg loss: 2.610456, ppl: 13.605253 +epoch: 2, batch: 31002, sum loss: 3749.166992, avg loss: 2.349102, ppl: 10.476158 +epoch: 2, batch: 31003, sum loss: 4184.913086, avg loss: 2.431675, ppl: 11.377927 +epoch: 2, batch: 31004, sum loss: 4079.570801, avg loss: 2.347279, ppl: 10.457078 +epoch: 2, batch: 31005, sum loss: 4840.821777, avg loss: 2.588675, ppl: 13.312119 +epoch: 2, batch: 31006, sum loss: 4310.356934, avg loss: 2.422910, ppl: 11.278632 +epoch: 2, batch: 31007, sum loss: 4909.593750, avg loss: 2.534638, ppl: 12.611864 +epoch: 2, batch: 31008, sum loss: 3915.076172, avg loss: 2.397475, ppl: 10.995373 +epoch: 2, batch: 31009, sum loss: 4050.954590, avg loss: 2.485248, ppl: 12.004098 +epoch: 2, batch: 31010, sum loss: 4304.906250, avg loss: 2.580879, ppl: 13.208746 +epoch: 2, batch: 31011, sum loss: 4455.430664, avg loss: 2.464287, ppl: 11.755095 +epoch: 2, batch: 31012, sum loss: 3527.347656, avg loss: 2.419306, ppl: 11.238052 +epoch: 2, batch: 31013, sum loss: 3861.028809, avg loss: 2.508791, ppl: 12.290059 +epoch: 2, batch: 31014, sum loss: 3683.309082, avg loss: 2.410543, ppl: 11.140005 +epoch: 2, batch: 31015, sum loss: 3977.099121, avg loss: 2.557620, ppl: 12.905067 +epoch: 2, batch: 31016, sum loss: 3692.495361, avg loss: 2.151804, ppl: 8.600359 +epoch: 2, batch: 31017, sum loss: 4430.722168, avg loss: 2.576001, ppl: 13.144470 +epoch: 2, batch: 31018, sum loss: 4221.055664, avg loss: 2.380742, ppl: 10.812923 +epoch: 2, batch: 31019, sum loss: 3870.584717, avg loss: 2.279496, ppl: 9.771758 +epoch: 2, batch: 31020, sum loss: 4520.811523, avg loss: 2.700604, ppl: 14.888728 +epoch: 2, batch: 31021, sum loss: 3680.428711, avg loss: 2.285980, ppl: 9.835324 +epoch: 2, batch: 31022, sum loss: 4837.490234, avg loss: 2.810860, ppl: 16.624208 +epoch: 2, batch: 31023, sum loss: 4543.151367, avg loss: 2.674015, ppl: 14.498063 +epoch: 2, batch: 31024, sum loss: 4013.310059, avg loss: 2.541678, ppl: 12.700968 +epoch: 2, batch: 31025, sum loss: 3621.155762, avg loss: 2.177484, ppl: 8.824078 +epoch: 2, batch: 31026, sum loss: 3704.027100, avg loss: 2.224641, ppl: 9.250160 +epoch: 2, batch: 31027, sum loss: 3893.162109, avg loss: 2.506866, ppl: 12.266420 +epoch: 2, batch: 31028, sum loss: 4292.217285, avg loss: 2.400569, ppl: 11.029449 +epoch: 2, batch: 31029, sum loss: 4267.878418, avg loss: 2.416692, ppl: 11.208719 +epoch: 2, batch: 31030, sum loss: 3703.626709, avg loss: 2.336673, ppl: 10.346756 +epoch: 2, batch: 31031, sum loss: 3694.471680, avg loss: 2.675215, ppl: 14.515467 +epoch: 2, batch: 31032, sum loss: 4444.050293, avg loss: 2.640553, ppl: 14.020951 +epoch: 2, batch: 31033, sum loss: 2994.968018, avg loss: 2.186108, ppl: 8.900506 +epoch: 2, batch: 31034, sum loss: 4647.720703, avg loss: 2.552290, ppl: 12.836472 +epoch: 2, batch: 31035, sum loss: 4468.422363, avg loss: 2.590390, ppl: 13.334970 +epoch: 2, batch: 31036, sum loss: 4678.948730, avg loss: 2.672158, ppl: 14.471168 +epoch: 2, batch: 31037, sum loss: 4073.856445, avg loss: 2.487092, ppl: 12.026250 +epoch: 2, batch: 31038, sum loss: 4093.718750, avg loss: 2.350011, ppl: 10.485682 +epoch: 2, batch: 31039, sum loss: 3622.907959, avg loss: 2.314957, ppl: 10.124489 +epoch: 2, batch: 31040, sum loss: 4025.791992, avg loss: 2.231592, ppl: 9.314683 +epoch: 2, batch: 31041, sum loss: 4355.756836, avg loss: 2.375004, ppl: 10.751052 +epoch: 2, batch: 31042, sum loss: 4328.756836, avg loss: 2.821875, ppl: 16.808342 +epoch: 2, batch: 31043, sum loss: 3270.536133, avg loss: 2.200899, ppl: 9.033132 +epoch: 2, batch: 31044, sum loss: 3449.877197, avg loss: 2.200177, ppl: 9.026608 +epoch: 2, batch: 31045, sum loss: 4898.229980, avg loss: 2.709198, ppl: 15.017227 +epoch: 2, batch: 31046, sum loss: 5100.215820, avg loss: 2.666082, ppl: 14.383510 +epoch: 2, batch: 31047, sum loss: 4305.610352, avg loss: 2.541682, ppl: 12.701022 +epoch: 2, batch: 31048, sum loss: 4346.772461, avg loss: 2.409519, ppl: 11.128609 +epoch: 2, batch: 31049, sum loss: 4637.198242, avg loss: 2.658944, ppl: 14.281199 +epoch: 2, batch: 31050, sum loss: 3672.109863, avg loss: 2.166436, ppl: 8.727129 +epoch: 2, batch: 31051, sum loss: 3844.950684, avg loss: 2.406102, ppl: 11.090645 +epoch: 2, batch: 31052, sum loss: 4871.673828, avg loss: 2.515061, ppl: 12.367368 +epoch: 2, batch: 31053, sum loss: 3368.904541, avg loss: 2.063016, ppl: 7.869667 +epoch: 2, batch: 31054, sum loss: 4416.146484, avg loss: 2.748069, ppl: 15.612453 +epoch: 2, batch: 31055, sum loss: 3946.559326, avg loss: 2.775358, ppl: 16.044373 +epoch: 2, batch: 31056, sum loss: 4086.480225, avg loss: 2.442606, ppl: 11.502981 +epoch: 2, batch: 31057, sum loss: 4279.620605, avg loss: 2.541342, ppl: 12.696705 +epoch: 2, batch: 31058, sum loss: 3661.916504, avg loss: 2.317669, ppl: 10.151979 +epoch: 2, batch: 31059, sum loss: 4789.302734, avg loss: 2.495728, ppl: 12.130565 +epoch: 2, batch: 31060, sum loss: 4300.104980, avg loss: 2.404980, ppl: 11.078214 +epoch: 2, batch: 31061, sum loss: 4132.738281, avg loss: 2.310083, ppl: 10.075260 +epoch: 2, batch: 31062, sum loss: 4606.732422, avg loss: 2.621931, ppl: 13.762271 +epoch: 2, batch: 31063, sum loss: 4131.182617, avg loss: 2.545399, ppl: 12.748313 +epoch: 2, batch: 31064, sum loss: 4439.901367, avg loss: 2.644372, ppl: 14.074610 +epoch: 2, batch: 31065, sum loss: 4861.818848, avg loss: 2.513867, ppl: 12.352604 +epoch: 2, batch: 31066, sum loss: 5010.891602, avg loss: 2.865004, ppl: 17.549120 +epoch: 2, batch: 31067, sum loss: 3657.462158, avg loss: 2.146398, ppl: 8.553992 +epoch: 2, batch: 31068, sum loss: 5470.610352, avg loss: 2.776960, ppl: 16.070087 +epoch: 2, batch: 31069, sum loss: 5013.340820, avg loss: 2.560440, ppl: 12.941508 +epoch: 2, batch: 31070, sum loss: 4253.948730, avg loss: 2.532112, ppl: 12.580051 +epoch: 2, batch: 31071, sum loss: 3810.965332, avg loss: 2.192730, ppl: 8.959643 +epoch: 2, batch: 31072, sum loss: 3676.223145, avg loss: 2.343036, ppl: 10.412799 +epoch: 2, batch: 31073, sum loss: 4353.544434, avg loss: 2.251057, ppl: 9.497771 +epoch: 2, batch: 31074, sum loss: 3613.550293, avg loss: 2.264129, ppl: 9.622741 +epoch: 2, batch: 31075, sum loss: 4782.785156, avg loss: 2.697567, ppl: 14.843566 +epoch: 2, batch: 31076, sum loss: 4396.011719, avg loss: 2.455872, ppl: 11.656597 +epoch: 2, batch: 31077, sum loss: 5466.684570, avg loss: 2.658893, ppl: 14.280477 +epoch: 2, batch: 31078, sum loss: 3809.520020, avg loss: 2.389912, ppl: 10.912535 +epoch: 2, batch: 31079, sum loss: 3201.397949, avg loss: 2.102034, ppl: 8.182797 +epoch: 2, batch: 31080, sum loss: 3752.075195, avg loss: 2.147725, ppl: 8.565347 +epoch: 2, batch: 31081, sum loss: 3685.010254, avg loss: 2.363701, ppl: 10.630225 +epoch: 2, batch: 31082, sum loss: 3617.562256, avg loss: 2.150750, ppl: 8.591303 +epoch: 2, batch: 31083, sum loss: 3954.106934, avg loss: 2.393527, ppl: 10.952057 +epoch: 2, batch: 31084, sum loss: 4215.509766, avg loss: 2.482632, ppl: 11.972740 +epoch: 2, batch: 31085, sum loss: 4443.372559, avg loss: 2.453547, ppl: 11.629519 +epoch: 2, batch: 31086, sum loss: 5235.716309, avg loss: 2.661777, ppl: 14.321720 +epoch: 2, batch: 31087, sum loss: 5206.706055, avg loss: 2.693588, ppl: 14.784632 +epoch: 2, batch: 31088, sum loss: 4163.380371, avg loss: 2.532470, ppl: 12.584548 +epoch: 2, batch: 31089, sum loss: 4478.099121, avg loss: 2.652902, ppl: 14.195178 +epoch: 2, batch: 31090, sum loss: 3970.750488, avg loss: 2.386268, ppl: 10.872845 +epoch: 2, batch: 31091, sum loss: 4048.600098, avg loss: 2.483804, ppl: 11.986773 +epoch: 2, batch: 31092, sum loss: 4446.390625, avg loss: 2.367620, ppl: 10.671963 +epoch: 2, batch: 31093, sum loss: 4579.567383, avg loss: 2.521788, ppl: 12.450840 +epoch: 2, batch: 31094, sum loss: 3923.813965, avg loss: 2.388201, ppl: 10.893876 +epoch: 2, batch: 31095, sum loss: 4566.366211, avg loss: 2.601918, ppl: 13.489586 +epoch: 2, batch: 31096, sum loss: 4517.153320, avg loss: 2.680803, ppl: 14.596810 +epoch: 2, batch: 31097, sum loss: 3679.873779, avg loss: 2.214124, ppl: 9.153387 +epoch: 2, batch: 31098, sum loss: 4046.188721, avg loss: 2.370351, ppl: 10.701146 +epoch: 2, batch: 31099, sum loss: 3201.026123, avg loss: 2.338222, ppl: 10.362798 +epoch: 2, batch: 31100, sum loss: 5274.854492, avg loss: 2.805773, ppl: 16.539864 +epoch: 2, batch: 31101, sum loss: 4509.999023, avg loss: 2.464480, ppl: 11.757372 +epoch: 2, batch: 31102, sum loss: 5017.659180, avg loss: 2.793797, ppl: 16.342957 +epoch: 2, batch: 31103, sum loss: 4189.209961, avg loss: 2.511517, ppl: 12.323608 +epoch: 2, batch: 31104, sum loss: 3402.757080, avg loss: 2.222572, ppl: 9.231039 +epoch: 2, batch: 31105, sum loss: 4066.529053, avg loss: 2.549548, ppl: 12.801315 +epoch: 2, batch: 31106, sum loss: 4155.535645, avg loss: 2.672370, ppl: 14.474238 +epoch: 2, batch: 31107, sum loss: 4433.389160, avg loss: 2.574558, ppl: 13.125515 +epoch: 2, batch: 31108, sum loss: 3892.107422, avg loss: 2.266807, ppl: 9.648542 +epoch: 2, batch: 31109, sum loss: 3778.896484, avg loss: 2.473100, ppl: 11.859150 +epoch: 2, batch: 31110, sum loss: 4396.645508, avg loss: 2.687436, ppl: 14.693957 +epoch: 2, batch: 31111, sum loss: 4435.465332, avg loss: 2.471011, ppl: 11.834410 +epoch: 2, batch: 31112, sum loss: 3839.904541, avg loss: 2.436488, ppl: 11.432815 +epoch: 2, batch: 31113, sum loss: 3780.263428, avg loss: 2.461109, ppl: 11.717801 +epoch: 2, batch: 31114, sum loss: 4070.216553, avg loss: 2.540709, ppl: 12.688670 +epoch: 2, batch: 31115, sum loss: 4359.582520, avg loss: 2.491190, ppl: 12.075637 +epoch: 2, batch: 31116, sum loss: 3850.681152, avg loss: 2.168176, ppl: 8.742327 +epoch: 2, batch: 31117, sum loss: 3354.281006, avg loss: 2.397628, ppl: 10.997056 +epoch: 2, batch: 31118, sum loss: 4085.398193, avg loss: 2.477500, ppl: 11.911454 +epoch: 2, batch: 31119, sum loss: 4240.720703, avg loss: 2.449867, ppl: 11.586811 +epoch: 2, batch: 31120, sum loss: 4006.613525, avg loss: 2.285575, ppl: 9.831342 +epoch: 2, batch: 31121, sum loss: 4774.573242, avg loss: 2.576672, ppl: 13.153292 +epoch: 2, batch: 31122, sum loss: 4691.354492, avg loss: 2.426981, ppl: 11.324644 +epoch: 2, batch: 31123, sum loss: 3782.515625, avg loss: 2.162673, ppl: 8.694348 +epoch: 2, batch: 31124, sum loss: 4745.282715, avg loss: 2.653961, ppl: 14.210217 +epoch: 2, batch: 31125, sum loss: 4375.654297, avg loss: 2.538083, ppl: 12.655382 +epoch: 2, batch: 31126, sum loss: 3757.831055, avg loss: 2.234144, ppl: 9.338489 +epoch: 2, batch: 31127, sum loss: 3514.479980, avg loss: 2.072217, ppl: 7.942412 +epoch: 2, batch: 31128, sum loss: 4709.437988, avg loss: 2.648728, ppl: 14.136045 +epoch: 2, batch: 31129, sum loss: 4002.072510, avg loss: 2.583649, ppl: 13.245384 +epoch: 2, batch: 31130, sum loss: 4119.790039, avg loss: 2.373151, ppl: 10.731151 +epoch: 2, batch: 31131, sum loss: 4630.651367, avg loss: 2.486923, ppl: 12.024226 +epoch: 2, batch: 31132, sum loss: 3647.370361, avg loss: 2.552394, ppl: 12.837796 +epoch: 2, batch: 31133, sum loss: 4306.379395, avg loss: 2.381847, ppl: 10.824877 +epoch: 2, batch: 31134, sum loss: 5332.967773, avg loss: 2.856437, ppl: 17.399422 +epoch: 2, batch: 31135, sum loss: 4656.845703, avg loss: 2.488961, ppl: 12.048751 +epoch: 2, batch: 31136, sum loss: 3746.013428, avg loss: 2.248507, ppl: 9.473586 +epoch: 2, batch: 31137, sum loss: 4217.530273, avg loss: 2.523956, ppl: 12.477862 +epoch: 2, batch: 31138, sum loss: 3716.015381, avg loss: 2.383589, ppl: 10.843754 +epoch: 2, batch: 31139, sum loss: 3185.880859, avg loss: 2.256289, ppl: 9.547597 +epoch: 2, batch: 31140, sum loss: 4282.932129, avg loss: 2.637274, ppl: 13.975049 +epoch: 2, batch: 31141, sum loss: 3701.637695, avg loss: 2.280738, ppl: 9.783897 +epoch: 2, batch: 31142, sum loss: 4058.641357, avg loss: 2.446438, ppl: 11.547146 +epoch: 2, batch: 31143, sum loss: 4351.371582, avg loss: 2.611868, ppl: 13.624473 +epoch: 2, batch: 31144, sum loss: 3770.614990, avg loss: 2.623949, ppl: 13.790077 +epoch: 2, batch: 31145, sum loss: 4433.428711, avg loss: 2.469877, ppl: 11.820990 +epoch: 2, batch: 31146, sum loss: 4741.964844, avg loss: 2.327916, ppl: 10.256544 +epoch: 2, batch: 31147, sum loss: 4664.032227, avg loss: 2.624666, ppl: 13.799971 +epoch: 2, batch: 31148, sum loss: 4382.647461, avg loss: 2.285009, ppl: 9.825776 +epoch: 2, batch: 31149, sum loss: 4188.662598, avg loss: 2.547848, ppl: 12.779575 +epoch: 2, batch: 31150, sum loss: 4525.642578, avg loss: 2.615978, ppl: 13.680593 +epoch: 2, batch: 31151, sum loss: 4225.316895, avg loss: 2.396663, ppl: 10.986453 +epoch: 2, batch: 31152, sum loss: 4694.271484, avg loss: 2.650633, ppl: 14.163002 +epoch: 2, batch: 31153, sum loss: 4386.476074, avg loss: 2.385251, ppl: 10.861787 +epoch: 2, batch: 31154, sum loss: 2985.447754, avg loss: 2.106879, ppl: 8.222541 +epoch: 2, batch: 31155, sum loss: 4637.747559, avg loss: 2.520515, ppl: 12.434999 +epoch: 2, batch: 31156, sum loss: 4806.508301, avg loss: 2.709418, ppl: 15.020535 +epoch: 2, batch: 31157, sum loss: 4535.767090, avg loss: 2.600784, ppl: 13.474296 +epoch: 2, batch: 31158, sum loss: 3748.559326, avg loss: 2.365022, ppl: 10.644270 +epoch: 2, batch: 31159, sum loss: 3709.793945, avg loss: 2.241567, ppl: 9.408066 +epoch: 2, batch: 31160, sum loss: 4385.606934, avg loss: 2.661169, ppl: 14.313015 +epoch: 2, batch: 31161, sum loss: 4658.716309, avg loss: 2.410096, ppl: 11.135035 +epoch: 2, batch: 31162, sum loss: 5799.861816, avg loss: 2.624372, ppl: 13.795904 +epoch: 2, batch: 31163, sum loss: 4334.615723, avg loss: 2.515738, ppl: 12.375733 +epoch: 2, batch: 31164, sum loss: 5243.264648, avg loss: 2.709697, ppl: 15.024730 +epoch: 2, batch: 31165, sum loss: 4060.526367, avg loss: 2.425643, ppl: 11.309502 +epoch: 2, batch: 31166, sum loss: 3964.466797, avg loss: 2.184279, ppl: 8.884242 +epoch: 2, batch: 31167, sum loss: 4019.404785, avg loss: 2.521584, ppl: 12.448297 +epoch: 2, batch: 31168, sum loss: 4003.237793, avg loss: 2.392850, ppl: 10.944641 +epoch: 2, batch: 31169, sum loss: 3899.267822, avg loss: 2.346130, ppl: 10.445068 +epoch: 2, batch: 31170, sum loss: 3490.730713, avg loss: 2.361794, ppl: 10.609963 +epoch: 2, batch: 31171, sum loss: 4662.089355, avg loss: 2.619151, ppl: 13.724072 +epoch: 2, batch: 31172, sum loss: 4139.599121, avg loss: 2.477319, ppl: 11.909287 +epoch: 2, batch: 31173, sum loss: 3552.156494, avg loss: 2.210427, ppl: 9.119613 +epoch: 2, batch: 31174, sum loss: 4722.358887, avg loss: 2.517249, ppl: 12.394451 +epoch: 2, batch: 31175, sum loss: 4371.348145, avg loss: 2.400521, ppl: 11.028919 +epoch: 2, batch: 31176, sum loss: 4020.147217, avg loss: 2.430561, ppl: 11.365252 +epoch: 2, batch: 31177, sum loss: 4572.909180, avg loss: 2.371841, ppl: 10.717104 +epoch: 2, batch: 31178, sum loss: 3777.880127, avg loss: 2.252761, ppl: 9.513967 +epoch: 2, batch: 31179, sum loss: 4485.945801, avg loss: 2.570743, ppl: 13.075531 +epoch: 2, batch: 31180, sum loss: 5063.719238, avg loss: 2.531860, ppl: 12.576873 +epoch: 2, batch: 31181, sum loss: 5187.793945, avg loss: 2.654961, ppl: 14.224433 +epoch: 2, batch: 31182, sum loss: 3999.346191, avg loss: 2.417984, ppl: 11.223216 +epoch: 2, batch: 31183, sum loss: 3827.005615, avg loss: 2.532763, ppl: 12.588245 +epoch: 2, batch: 31184, sum loss: 4214.549805, avg loss: 2.440388, ppl: 11.477492 +epoch: 2, batch: 31185, sum loss: 4202.515625, avg loss: 2.759367, ppl: 15.789841 +epoch: 2, batch: 31186, sum loss: 3974.751465, avg loss: 2.371570, ppl: 10.714202 +epoch: 2, batch: 31187, sum loss: 3606.801758, avg loss: 2.404535, ppl: 11.073276 +epoch: 2, batch: 31188, sum loss: 3548.265381, avg loss: 2.295126, ppl: 9.925691 +epoch: 2, batch: 31189, sum loss: 4196.748047, avg loss: 2.520570, ppl: 12.435678 +epoch: 2, batch: 31190, sum loss: 4403.979980, avg loss: 2.495173, ppl: 12.123828 +epoch: 2, batch: 31191, sum loss: 3437.719482, avg loss: 2.284199, ppl: 9.817819 +epoch: 2, batch: 31192, sum loss: 5238.135742, avg loss: 2.599571, ppl: 13.457967 +epoch: 2, batch: 31193, sum loss: 4478.082520, avg loss: 2.718933, ppl: 15.164131 +epoch: 2, batch: 31194, sum loss: 4017.419189, avg loss: 2.415766, ppl: 11.198348 +epoch: 2, batch: 31195, sum loss: 4109.104980, avg loss: 2.615598, ppl: 13.675398 +epoch: 2, batch: 31196, sum loss: 3808.716064, avg loss: 2.215658, ppl: 9.167441 +epoch: 2, batch: 31197, sum loss: 4391.784180, avg loss: 2.508158, ppl: 12.282282 +epoch: 2, batch: 31198, sum loss: 3640.642334, avg loss: 2.254268, ppl: 9.528313 +epoch: 2, batch: 31199, sum loss: 4137.190430, avg loss: 2.383174, ppl: 10.839254 +epoch: 2, batch: 31200, sum loss: 4798.006836, avg loss: 2.805852, ppl: 16.541162 +epoch: 2, batch: 31201, sum loss: 4491.385742, avg loss: 2.368875, ppl: 10.685360 +epoch: 2, batch: 31202, sum loss: 4105.397461, avg loss: 2.349970, ppl: 10.485253 +epoch: 2, batch: 31203, sum loss: 3853.401611, avg loss: 2.290964, ppl: 9.884463 +epoch: 2, batch: 31204, sum loss: 4646.429688, avg loss: 2.611821, ppl: 13.623837 +epoch: 2, batch: 31205, sum loss: 4551.591309, avg loss: 2.481784, ppl: 11.962583 +epoch: 2, batch: 31206, sum loss: 4190.688965, avg loss: 2.642301, ppl: 14.045483 +epoch: 2, batch: 31207, sum loss: 4102.901855, avg loss: 2.393758, ppl: 10.954587 +epoch: 2, batch: 31208, sum loss: 4213.825195, avg loss: 2.432925, ppl: 11.392150 +epoch: 2, batch: 31209, sum loss: 4463.354004, avg loss: 2.546123, ppl: 12.757550 +epoch: 2, batch: 31210, sum loss: 3728.544922, avg loss: 2.144074, ppl: 8.534137 +epoch: 2, batch: 31211, sum loss: 3647.090088, avg loss: 2.262463, ppl: 9.606720 +epoch: 2, batch: 31212, sum loss: 4380.255859, avg loss: 2.341131, ppl: 10.392984 +epoch: 2, batch: 31213, sum loss: 3941.911377, avg loss: 2.437793, ppl: 11.447751 +epoch: 2, batch: 31214, sum loss: 3185.795410, avg loss: 2.391738, ppl: 10.932483 +epoch: 2, batch: 31215, sum loss: 4777.588379, avg loss: 2.648330, ppl: 14.130427 +epoch: 2, batch: 31216, sum loss: 5191.013672, avg loss: 2.582594, ppl: 13.231415 +epoch: 2, batch: 31217, sum loss: 4586.600586, avg loss: 2.586915, ppl: 13.288716 +epoch: 2, batch: 31218, sum loss: 3846.419434, avg loss: 2.496054, ppl: 12.134519 +epoch: 2, batch: 31219, sum loss: 5035.233398, avg loss: 2.743997, ppl: 15.549005 +epoch: 2, batch: 31220, sum loss: 3912.011719, avg loss: 2.325809, ppl: 10.234962 +epoch: 2, batch: 31221, sum loss: 4095.422607, avg loss: 2.416178, ppl: 11.202965 +epoch: 2, batch: 31222, sum loss: 4838.634277, avg loss: 2.719862, ppl: 15.178224 +epoch: 2, batch: 31223, sum loss: 4587.282227, avg loss: 2.768426, ppl: 15.933537 +epoch: 2, batch: 31224, sum loss: 3854.236328, avg loss: 2.464345, ppl: 11.755785 +epoch: 2, batch: 31225, sum loss: 4704.316406, avg loss: 2.487740, ppl: 12.034049 +epoch: 2, batch: 31226, sum loss: 4535.688965, avg loss: 2.577096, ppl: 13.158869 +epoch: 2, batch: 31227, sum loss: 4911.184570, avg loss: 2.648967, ppl: 14.139422 +epoch: 2, batch: 31228, sum loss: 4098.353027, avg loss: 2.281934, ppl: 9.795605 +epoch: 2, batch: 31229, sum loss: 3601.415771, avg loss: 2.331014, ppl: 10.288365 +epoch: 2, batch: 31230, sum loss: 3798.689453, avg loss: 2.395138, ppl: 10.969715 +epoch: 2, batch: 31231, sum loss: 4283.538086, avg loss: 2.695745, ppl: 14.816545 +epoch: 2, batch: 31232, sum loss: 3903.131836, avg loss: 2.425812, ppl: 11.311414 +epoch: 2, batch: 31233, sum loss: 4141.731445, avg loss: 2.333370, ppl: 10.312634 +epoch: 2, batch: 31234, sum loss: 3774.493164, avg loss: 2.137312, ppl: 8.476622 +epoch: 2, batch: 31235, sum loss: 4342.300781, avg loss: 2.511452, ppl: 12.322812 +epoch: 2, batch: 31236, sum loss: 3858.011475, avg loss: 2.355318, ppl: 10.541484 +epoch: 2, batch: 31237, sum loss: 3747.922852, avg loss: 2.352745, ppl: 10.514393 +epoch: 2, batch: 31238, sum loss: 4628.017090, avg loss: 2.830591, ppl: 16.955486 +epoch: 2, batch: 31239, sum loss: 4386.280762, avg loss: 2.400810, ppl: 11.032114 +epoch: 2, batch: 31240, sum loss: 3854.749023, avg loss: 2.197690, ppl: 9.004194 +epoch: 2, batch: 31241, sum loss: 4083.511230, avg loss: 2.445216, ppl: 11.533045 +epoch: 2, batch: 31242, sum loss: 3810.602539, avg loss: 2.360968, ppl: 10.601210 +epoch: 2, batch: 31243, sum loss: 4010.252930, avg loss: 2.381386, ppl: 10.819884 +epoch: 2, batch: 31244, sum loss: 4368.152344, avg loss: 2.592375, ppl: 13.361471 +epoch: 2, batch: 31245, sum loss: 4430.772949, avg loss: 2.454722, ppl: 11.643195 +epoch: 2, batch: 31246, sum loss: 4280.016113, avg loss: 2.522107, ppl: 12.454816 +epoch: 2, batch: 31247, sum loss: 4235.067871, avg loss: 2.552783, ppl: 12.842802 +epoch: 2, batch: 31248, sum loss: 4110.481445, avg loss: 2.365064, ppl: 10.644722 +epoch: 2, batch: 31249, sum loss: 4255.141602, avg loss: 2.466749, ppl: 11.784074 +epoch: 2, batch: 31250, sum loss: 3684.870605, avg loss: 2.311713, ppl: 10.091697 +epoch: 2, batch: 31251, sum loss: 3861.887939, avg loss: 2.263709, ppl: 9.618699 +epoch: 2, batch: 31252, sum loss: 4459.787598, avg loss: 2.642054, ppl: 14.042021 +epoch: 2, batch: 31253, sum loss: 3764.379883, avg loss: 2.441232, ppl: 11.487186 +epoch: 2, batch: 31254, sum loss: 4538.715820, avg loss: 2.528533, ppl: 12.535097 +epoch: 2, batch: 31255, sum loss: 4206.453125, avg loss: 2.534008, ppl: 12.603922 +epoch: 2, batch: 31256, sum loss: 4891.893555, avg loss: 2.655751, ppl: 14.235676 +epoch: 2, batch: 31257, sum loss: 3634.521973, avg loss: 2.325350, ppl: 10.230261 +epoch: 2, batch: 31258, sum loss: 4355.426270, avg loss: 2.370945, ppl: 10.707508 +epoch: 2, batch: 31259, sum loss: 4656.054688, avg loss: 2.593902, ppl: 13.381890 +epoch: 2, batch: 31260, sum loss: 3934.034424, avg loss: 2.381377, ppl: 10.819791 +epoch: 2, batch: 31261, sum loss: 4887.909668, avg loss: 2.764655, ppl: 15.873561 +epoch: 2, batch: 31262, sum loss: 4617.336914, avg loss: 2.576639, ppl: 13.152856 +epoch: 2, batch: 31263, sum loss: 3561.157227, avg loss: 2.199603, ppl: 9.021432 +epoch: 2, batch: 31264, sum loss: 4049.109375, avg loss: 2.393091, ppl: 10.947277 +epoch: 2, batch: 31265, sum loss: 4450.117676, avg loss: 2.418542, ppl: 11.229476 +epoch: 2, batch: 31266, sum loss: 4724.913086, avg loss: 2.498632, ppl: 12.165839 +epoch: 2, batch: 31267, sum loss: 3671.895264, avg loss: 2.452836, ppl: 11.621256 +epoch: 2, batch: 31268, sum loss: 4520.815430, avg loss: 2.601160, ppl: 13.479360 +epoch: 2, batch: 31269, sum loss: 3333.333496, avg loss: 2.111041, ppl: 8.256831 +epoch: 2, batch: 31270, sum loss: 3561.116943, avg loss: 2.183395, ppl: 8.876390 +epoch: 2, batch: 31271, sum loss: 4437.212891, avg loss: 2.375382, ppl: 10.755117 +epoch: 2, batch: 31272, sum loss: 3768.028564, avg loss: 2.331701, ppl: 10.295437 +epoch: 2, batch: 31273, sum loss: 4205.497559, avg loss: 2.468015, ppl: 11.799002 +epoch: 2, batch: 31274, sum loss: 5113.788086, avg loss: 2.574919, ppl: 13.130247 +epoch: 2, batch: 31275, sum loss: 4667.611816, avg loss: 2.540889, ppl: 12.690943 +epoch: 2, batch: 31276, sum loss: 4092.405762, avg loss: 2.507602, ppl: 12.275455 +epoch: 2, batch: 31277, sum loss: 4372.383789, avg loss: 2.635554, ppl: 13.951036 +epoch: 2, batch: 31278, sum loss: 4392.773438, avg loss: 2.382198, ppl: 10.828679 +epoch: 2, batch: 31279, sum loss: 4682.742188, avg loss: 2.531212, ppl: 12.568731 +epoch: 2, batch: 31280, sum loss: 3776.197510, avg loss: 2.269350, ppl: 9.673107 +epoch: 2, batch: 31281, sum loss: 3623.284668, avg loss: 2.409099, ppl: 11.123933 +epoch: 2, batch: 31282, sum loss: 4240.734375, avg loss: 2.702826, ppl: 14.921845 +epoch: 2, batch: 31283, sum loss: 4257.075195, avg loss: 2.470734, ppl: 11.831132 +epoch: 2, batch: 31284, sum loss: 3637.084473, avg loss: 2.341973, ppl: 10.401742 +epoch: 2, batch: 31285, sum loss: 4526.389648, avg loss: 2.492505, ppl: 12.091531 +epoch: 2, batch: 31286, sum loss: 3825.954590, avg loss: 2.310359, ppl: 10.078042 +epoch: 2, batch: 31287, sum loss: 3876.960938, avg loss: 2.237138, ppl: 9.366488 +epoch: 2, batch: 31288, sum loss: 4555.181152, avg loss: 2.483741, ppl: 11.986021 +epoch: 2, batch: 31289, sum loss: 3631.719727, avg loss: 2.379895, ppl: 10.803768 +epoch: 2, batch: 31290, sum loss: 3512.155029, avg loss: 2.294027, ppl: 9.914783 +epoch: 2, batch: 31291, sum loss: 4711.636719, avg loss: 2.797884, ppl: 16.409891 +epoch: 2, batch: 31292, sum loss: 4533.049316, avg loss: 2.577060, ppl: 13.158401 +epoch: 2, batch: 31293, sum loss: 4121.242676, avg loss: 2.352308, ppl: 10.509793 +epoch: 2, batch: 31294, sum loss: 4748.172852, avg loss: 2.470433, ppl: 11.827573 +epoch: 2, batch: 31295, sum loss: 4729.522949, avg loss: 2.518383, ppl: 12.408513 +epoch: 2, batch: 31296, sum loss: 3266.216309, avg loss: 2.261923, ppl: 9.601531 +epoch: 2, batch: 31297, sum loss: 4959.823242, avg loss: 2.646651, ppl: 14.106709 +epoch: 2, batch: 31298, sum loss: 4457.312988, avg loss: 2.617330, ppl: 13.699100 +epoch: 2, batch: 31299, sum loss: 3531.790527, avg loss: 2.098509, ppl: 8.154002 +epoch: 2, batch: 31300, sum loss: 5728.677734, avg loss: 2.674453, ppl: 14.504411 +epoch: 2, batch: 31301, sum loss: 4684.537598, avg loss: 2.664697, ppl: 14.363599 +epoch: 2, batch: 31302, sum loss: 3809.663818, avg loss: 2.477025, ppl: 11.905787 +epoch: 2, batch: 31303, sum loss: 3575.145996, avg loss: 2.174663, ppl: 8.799220 +epoch: 2, batch: 31304, sum loss: 3787.441406, avg loss: 2.278846, ppl: 9.765403 +epoch: 2, batch: 31305, sum loss: 3966.093018, avg loss: 2.545631, ppl: 12.751270 +epoch: 2, batch: 31306, sum loss: 4379.698242, avg loss: 2.643149, ppl: 14.057403 +epoch: 2, batch: 31307, sum loss: 3900.087891, avg loss: 2.242719, ppl: 9.418903 +epoch: 2, batch: 31308, sum loss: 4472.041992, avg loss: 2.438409, ppl: 11.454800 +epoch: 2, batch: 31309, sum loss: 4365.963867, avg loss: 2.401520, ppl: 11.039948 +epoch: 2, batch: 31310, sum loss: 3862.486084, avg loss: 2.387198, ppl: 10.882955 +epoch: 2, batch: 31311, sum loss: 4565.339844, avg loss: 2.440053, ppl: 11.473654 +epoch: 2, batch: 31312, sum loss: 4099.006836, avg loss: 2.339616, ppl: 10.377249 +epoch: 2, batch: 31313, sum loss: 3116.914551, avg loss: 2.154053, ppl: 8.619723 +epoch: 2, batch: 31314, sum loss: 4077.593262, avg loss: 2.550090, ppl: 12.808251 +epoch: 2, batch: 31315, sum loss: 4047.913818, avg loss: 2.343899, ppl: 10.421795 +epoch: 2, batch: 31316, sum loss: 4219.532227, avg loss: 2.405663, ppl: 11.085773 +epoch: 2, batch: 31317, sum loss: 3505.371826, avg loss: 2.280658, ppl: 9.783120 +epoch: 2, batch: 31318, sum loss: 3960.247559, avg loss: 2.358694, ppl: 10.577129 +epoch: 2, batch: 31319, sum loss: 4250.153809, avg loss: 2.478224, ppl: 11.920074 +epoch: 2, batch: 31320, sum loss: 3081.937500, avg loss: 2.170379, ppl: 8.761601 +epoch: 2, batch: 31321, sum loss: 4737.562500, avg loss: 2.760817, ppl: 15.812761 +epoch: 2, batch: 31322, sum loss: 3754.677246, avg loss: 2.209934, ppl: 9.115112 +epoch: 2, batch: 31323, sum loss: 4095.174316, avg loss: 2.403271, ppl: 11.059297 +epoch: 2, batch: 31324, sum loss: 4982.674805, avg loss: 2.665957, ppl: 14.381713 +epoch: 2, batch: 31325, sum loss: 4416.285645, avg loss: 2.458956, ppl: 11.692604 +epoch: 2, batch: 31326, sum loss: 4698.725586, avg loss: 2.556434, ppl: 12.889770 +epoch: 2, batch: 31327, sum loss: 3717.872559, avg loss: 2.273928, ppl: 9.717498 +epoch: 2, batch: 31328, sum loss: 4910.231445, avg loss: 2.580258, ppl: 13.200545 +epoch: 2, batch: 31329, sum loss: 4024.459473, avg loss: 2.526340, ppl: 12.507645 +epoch: 2, batch: 31330, sum loss: 4777.062988, avg loss: 2.624760, ppl: 13.801260 +epoch: 2, batch: 31331, sum loss: 4041.271484, avg loss: 2.305346, ppl: 10.027647 +epoch: 2, batch: 31332, sum loss: 5022.551758, avg loss: 2.574347, ppl: 13.122752 +epoch: 2, batch: 31333, sum loss: 4720.864746, avg loss: 2.467781, ppl: 11.796240 +epoch: 2, batch: 31334, sum loss: 4358.692871, avg loss: 2.486419, ppl: 12.018165 +epoch: 2, batch: 31335, sum loss: 4075.848389, avg loss: 2.471709, ppl: 11.842669 +epoch: 2, batch: 31336, sum loss: 4092.365479, avg loss: 2.590105, ppl: 13.331169 +epoch: 2, batch: 31337, sum loss: 3935.826660, avg loss: 2.386796, ppl: 10.878583 +epoch: 2, batch: 31338, sum loss: 3879.549561, avg loss: 2.567538, ppl: 13.033693 +epoch: 2, batch: 31339, sum loss: 3592.288818, avg loss: 2.404477, ppl: 11.072639 +epoch: 2, batch: 31340, sum loss: 4360.410156, avg loss: 2.673458, ppl: 14.489990 +epoch: 2, batch: 31341, sum loss: 4629.004883, avg loss: 2.758644, ppl: 15.778435 +epoch: 2, batch: 31342, sum loss: 4211.836914, avg loss: 2.343816, ppl: 10.420925 +epoch: 2, batch: 31343, sum loss: 4935.271484, avg loss: 2.627940, ppl: 13.845219 +epoch: 2, batch: 31344, sum loss: 4145.256836, avg loss: 2.539986, ppl: 12.679492 +epoch: 2, batch: 31345, sum loss: 3457.480225, avg loss: 2.237851, ppl: 9.373170 +epoch: 2, batch: 31346, sum loss: 3728.082764, avg loss: 2.422406, ppl: 11.272949 +epoch: 2, batch: 31347, sum loss: 3867.837891, avg loss: 2.457330, ppl: 11.673604 +epoch: 2, batch: 31348, sum loss: 3730.490479, avg loss: 2.353622, ppl: 10.523614 +epoch: 2, batch: 31349, sum loss: 4151.066406, avg loss: 2.524980, ppl: 12.490641 +epoch: 2, batch: 31350, sum loss: 4818.463867, avg loss: 2.700933, ppl: 14.893617 +epoch: 2, batch: 31351, sum loss: 4096.978027, avg loss: 2.186221, ppl: 8.901509 +epoch: 2, batch: 31352, sum loss: 4264.616211, avg loss: 2.625995, ppl: 13.818318 +epoch: 2, batch: 31353, sum loss: 4204.727539, avg loss: 2.493907, ppl: 12.108495 +epoch: 2, batch: 31354, sum loss: 4178.448242, avg loss: 2.606643, ppl: 13.553472 +epoch: 2, batch: 31355, sum loss: 4500.583496, avg loss: 2.482396, ppl: 11.969909 +epoch: 2, batch: 31356, sum loss: 5061.636719, avg loss: 2.582468, ppl: 13.229743 +epoch: 2, batch: 31357, sum loss: 5171.350586, avg loss: 2.495826, ppl: 12.131744 +epoch: 2, batch: 31358, sum loss: 4256.232422, avg loss: 2.524456, ppl: 12.484099 +epoch: 2, batch: 31359, sum loss: 4317.422852, avg loss: 2.517448, ppl: 12.396915 +epoch: 2, batch: 31360, sum loss: 3940.364258, avg loss: 2.442879, ppl: 11.506122 +epoch: 2, batch: 31361, sum loss: 4269.727539, avg loss: 2.365500, ppl: 10.649362 +epoch: 2, batch: 31362, sum loss: 4066.871582, avg loss: 2.280915, ppl: 9.785633 +epoch: 2, batch: 31363, sum loss: 3206.574463, avg loss: 2.154956, ppl: 8.627509 +epoch: 2, batch: 31364, sum loss: 4756.894043, avg loss: 2.516875, ppl: 12.389821 +epoch: 2, batch: 31365, sum loss: 4568.551270, avg loss: 2.693721, ppl: 14.786599 +epoch: 2, batch: 31366, sum loss: 4075.335938, avg loss: 2.430135, ppl: 11.360413 +epoch: 2, batch: 31367, sum loss: 4270.141602, avg loss: 2.553913, ppl: 12.857311 +epoch: 2, batch: 31368, sum loss: 3595.572021, avg loss: 2.535664, ppl: 12.624815 +epoch: 2, batch: 31369, sum loss: 4313.831055, avg loss: 2.515353, ppl: 12.370977 +epoch: 2, batch: 31370, sum loss: 3665.138428, avg loss: 2.433691, ppl: 11.400886 +epoch: 2, batch: 31371, sum loss: 4064.301514, avg loss: 2.513483, ppl: 12.347860 +epoch: 2, batch: 31372, sum loss: 3029.853516, avg loss: 2.151885, ppl: 8.601052 +epoch: 2, batch: 31373, sum loss: 4441.499023, avg loss: 2.657989, ppl: 14.267565 +epoch: 2, batch: 31374, sum loss: 3902.601807, avg loss: 2.413483, ppl: 11.172808 +epoch: 2, batch: 31375, sum loss: 4422.651367, avg loss: 2.580310, ppl: 13.201231 +epoch: 2, batch: 31376, sum loss: 4408.815430, avg loss: 2.572238, ppl: 13.095095 +epoch: 2, batch: 31377, sum loss: 3619.369141, avg loss: 2.323087, ppl: 10.207132 +epoch: 2, batch: 31378, sum loss: 4358.940918, avg loss: 2.455741, ppl: 11.655068 +epoch: 2, batch: 31379, sum loss: 5146.493652, avg loss: 2.608461, ppl: 13.578140 +epoch: 2, batch: 31380, sum loss: 3862.632812, avg loss: 2.554651, ppl: 12.866815 +epoch: 2, batch: 31381, sum loss: 3246.215820, avg loss: 2.130063, ppl: 8.415396 +epoch: 2, batch: 31382, sum loss: 3597.867188, avg loss: 2.051235, ppl: 7.777504 +epoch: 2, batch: 31383, sum loss: 3975.374756, avg loss: 2.410779, ppl: 11.142638 +epoch: 2, batch: 31384, sum loss: 3812.281494, avg loss: 2.286911, ppl: 9.844486 +epoch: 2, batch: 31385, sum loss: 4501.965332, avg loss: 2.687741, ppl: 14.698428 +epoch: 2, batch: 31386, sum loss: 4497.969238, avg loss: 2.461943, ppl: 11.727572 +epoch: 2, batch: 31387, sum loss: 4709.471680, avg loss: 2.674317, ppl: 14.502436 +epoch: 2, batch: 31388, sum loss: 3497.769775, avg loss: 2.541984, ppl: 12.704850 +epoch: 2, batch: 31389, sum loss: 3396.667480, avg loss: 2.261430, ppl: 9.596799 +epoch: 2, batch: 31390, sum loss: 3999.222900, avg loss: 2.487079, ppl: 12.026095 +epoch: 2, batch: 31391, sum loss: 4178.771484, avg loss: 2.368918, ppl: 10.685826 +epoch: 2, batch: 31392, sum loss: 3611.235840, avg loss: 2.204662, ppl: 9.067183 +epoch: 2, batch: 31393, sum loss: 4214.514160, avg loss: 2.409671, ppl: 11.130297 +epoch: 2, batch: 31394, sum loss: 3132.622803, avg loss: 2.080095, ppl: 8.005228 +epoch: 2, batch: 31395, sum loss: 4527.346191, avg loss: 2.522198, ppl: 12.455950 +epoch: 2, batch: 31396, sum loss: 4405.340332, avg loss: 2.649032, ppl: 14.140346 +epoch: 2, batch: 31397, sum loss: 3486.363525, avg loss: 2.149423, ppl: 8.579904 +epoch: 2, batch: 31398, sum loss: 4506.928711, avg loss: 2.623358, ppl: 13.781922 +epoch: 2, batch: 31399, sum loss: 4662.833008, avg loss: 2.684417, ppl: 14.649665 +epoch: 2, batch: 31400, sum loss: 5268.125000, avg loss: 2.709941, ppl: 15.028391 +epoch: 2, batch: 31401, sum loss: 4309.029785, avg loss: 2.479304, ppl: 11.932951 +epoch: 2, batch: 31402, sum loss: 2497.215576, avg loss: 2.086229, ppl: 8.054482 +epoch: 2, batch: 31403, sum loss: 4015.877441, avg loss: 2.561146, ppl: 12.950654 +epoch: 2, batch: 31404, sum loss: 4587.786621, avg loss: 2.675094, ppl: 14.513716 +epoch: 2, batch: 31405, sum loss: 4152.991211, avg loss: 2.597243, ppl: 13.426664 +epoch: 2, batch: 31406, sum loss: 4607.103027, avg loss: 2.695789, ppl: 14.817203 +epoch: 2, batch: 31407, sum loss: 4145.771484, avg loss: 2.367659, ppl: 10.672382 +epoch: 2, batch: 31408, sum loss: 3217.551758, avg loss: 2.240635, ppl: 9.399297 +epoch: 2, batch: 31409, sum loss: 3746.151855, avg loss: 2.418432, ppl: 11.228245 +epoch: 2, batch: 31410, sum loss: 4419.701172, avg loss: 2.683486, ppl: 14.636022 +epoch: 2, batch: 31411, sum loss: 4846.530762, avg loss: 2.598676, ppl: 13.445924 +epoch: 2, batch: 31412, sum loss: 3745.041016, avg loss: 2.441357, ppl: 11.488616 +epoch: 2, batch: 31413, sum loss: 4862.242188, avg loss: 2.609899, ppl: 13.597678 +epoch: 2, batch: 31414, sum loss: 3388.058838, avg loss: 2.234867, ppl: 9.345242 +epoch: 2, batch: 31415, sum loss: 4026.811768, avg loss: 2.545393, ppl: 12.748237 +epoch: 2, batch: 31416, sum loss: 3746.271484, avg loss: 2.464652, ppl: 11.759393 +epoch: 2, batch: 31417, sum loss: 3542.742920, avg loss: 2.318549, ppl: 10.160922 +epoch: 2, batch: 31418, sum loss: 3867.689697, avg loss: 2.345476, ppl: 10.438239 +epoch: 2, batch: 31419, sum loss: 4048.006836, avg loss: 2.311826, ppl: 10.092834 +epoch: 2, batch: 31420, sum loss: 4550.426758, avg loss: 2.550688, ppl: 12.815915 +epoch: 2, batch: 31421, sum loss: 4877.239746, avg loss: 2.696097, ppl: 14.821774 +epoch: 2, batch: 31422, sum loss: 3243.528809, avg loss: 2.298745, ppl: 9.961670 +epoch: 2, batch: 31423, sum loss: 4279.932617, avg loss: 2.547579, ppl: 12.776136 +epoch: 2, batch: 31424, sum loss: 3703.435791, avg loss: 2.459121, ppl: 11.694525 +epoch: 2, batch: 31425, sum loss: 4859.066406, avg loss: 2.616621, ppl: 13.689396 +epoch: 2, batch: 31426, sum loss: 3820.222656, avg loss: 2.399637, ppl: 11.019178 +epoch: 2, batch: 31427, sum loss: 4251.756348, avg loss: 2.581516, ppl: 13.217155 +epoch: 2, batch: 31428, sum loss: 4149.669922, avg loss: 2.569455, ppl: 13.058707 +epoch: 2, batch: 31429, sum loss: 2960.323242, avg loss: 2.126669, ppl: 8.386883 +epoch: 2, batch: 31430, sum loss: 4178.891602, avg loss: 2.474181, ppl: 11.871979 +epoch: 2, batch: 31431, sum loss: 3366.294922, avg loss: 2.113179, ppl: 8.274508 +epoch: 2, batch: 31432, sum loss: 4849.963867, avg loss: 2.686960, ppl: 14.686966 +epoch: 2, batch: 31433, sum loss: 4982.815430, avg loss: 2.663183, ppl: 14.341869 +epoch: 2, batch: 31434, sum loss: 4017.445557, avg loss: 2.414330, ppl: 11.182279 +epoch: 2, batch: 31435, sum loss: 4671.370117, avg loss: 2.538788, ppl: 12.664313 +epoch: 2, batch: 31436, sum loss: 4446.376465, avg loss: 2.594152, ppl: 13.385232 +epoch: 2, batch: 31437, sum loss: 3673.602051, avg loss: 2.256512, ppl: 9.549724 +epoch: 2, batch: 31438, sum loss: 5150.832520, avg loss: 2.744184, ppl: 15.551911 +epoch: 2, batch: 31439, sum loss: 4559.504883, avg loss: 2.479339, ppl: 11.933378 +epoch: 2, batch: 31440, sum loss: 4170.689453, avg loss: 2.348361, ppl: 10.468401 +epoch: 2, batch: 31441, sum loss: 3509.132080, avg loss: 2.513705, ppl: 12.350605 +epoch: 2, batch: 31442, sum loss: 3745.816162, avg loss: 2.204718, ppl: 9.067698 +epoch: 2, batch: 31443, sum loss: 3837.412109, avg loss: 2.358582, ppl: 10.575939 +epoch: 2, batch: 31444, sum loss: 3737.997314, avg loss: 2.156952, ppl: 8.644746 +epoch: 2, batch: 31445, sum loss: 4437.401367, avg loss: 2.581385, ppl: 13.215434 +epoch: 2, batch: 31446, sum loss: 3583.808838, avg loss: 2.478429, ppl: 11.922523 +epoch: 2, batch: 31447, sum loss: 3984.388672, avg loss: 2.459499, ppl: 11.698953 +epoch: 2, batch: 31448, sum loss: 4288.763184, avg loss: 2.530244, ppl: 12.556568 +epoch: 2, batch: 31449, sum loss: 4573.532227, avg loss: 2.549349, ppl: 12.798770 +epoch: 2, batch: 31450, sum loss: 4156.768555, avg loss: 2.589887, ppl: 13.328264 +epoch: 2, batch: 31451, sum loss: 4447.888184, avg loss: 2.496009, ppl: 12.133972 +epoch: 2, batch: 31452, sum loss: 4440.954590, avg loss: 2.595532, ppl: 13.403712 +epoch: 2, batch: 31453, sum loss: 3442.003174, avg loss: 2.235067, ppl: 9.347107 +epoch: 2, batch: 31454, sum loss: 4536.760254, avg loss: 2.514834, ppl: 12.364555 +epoch: 2, batch: 31455, sum loss: 3737.868652, avg loss: 2.377779, ppl: 10.780931 +epoch: 2, batch: 31456, sum loss: 4505.790039, avg loss: 2.486639, ppl: 12.020806 +epoch: 2, batch: 31457, sum loss: 3519.637939, avg loss: 2.330886, ppl: 10.287053 +epoch: 2, batch: 31458, sum loss: 4351.897949, avg loss: 2.503969, ppl: 12.230942 +epoch: 2, batch: 31459, sum loss: 3323.373047, avg loss: 2.116798, ppl: 8.304505 +epoch: 2, batch: 31460, sum loss: 4104.689453, avg loss: 2.498289, ppl: 12.161672 +epoch: 2, batch: 31461, sum loss: 3653.501709, avg loss: 2.163115, ppl: 8.698193 +epoch: 2, batch: 31462, sum loss: 4624.136230, avg loss: 2.560430, ppl: 12.941379 +epoch: 2, batch: 31463, sum loss: 4274.071777, avg loss: 2.551685, ppl: 12.828700 +epoch: 2, batch: 31464, sum loss: 5194.363770, avg loss: 2.691380, ppl: 14.752024 +epoch: 2, batch: 31465, sum loss: 4151.371582, avg loss: 2.776837, ppl: 16.068119 +epoch: 2, batch: 31466, sum loss: 4974.760254, avg loss: 2.793240, ppl: 16.333857 +epoch: 2, batch: 31467, sum loss: 3922.288330, avg loss: 2.457574, ppl: 11.676452 +epoch: 2, batch: 31468, sum loss: 3549.184570, avg loss: 2.306163, ppl: 10.035842 +epoch: 2, batch: 31469, sum loss: 4435.696777, avg loss: 2.473897, ppl: 11.868606 +epoch: 2, batch: 31470, sum loss: 3298.266846, avg loss: 2.287286, ppl: 9.848176 +epoch: 2, batch: 31471, sum loss: 4637.174316, avg loss: 2.570496, ppl: 13.072302 +epoch: 2, batch: 31472, sum loss: 3414.030273, avg loss: 2.174542, ppl: 8.798152 +epoch: 2, batch: 31473, sum loss: 4230.537598, avg loss: 2.481254, ppl: 11.956246 +epoch: 2, batch: 31474, sum loss: 3520.760742, avg loss: 2.274393, ppl: 9.722019 +epoch: 2, batch: 31475, sum loss: 4701.520508, avg loss: 2.666773, ppl: 14.393444 +epoch: 2, batch: 31476, sum loss: 5561.439453, avg loss: 2.675055, ppl: 14.513148 +epoch: 2, batch: 31477, sum loss: 4357.757812, avg loss: 2.527702, ppl: 12.524690 +epoch: 2, batch: 31478, sum loss: 4536.963867, avg loss: 2.541717, ppl: 12.701455 +epoch: 2, batch: 31479, sum loss: 3592.537598, avg loss: 2.472497, ppl: 11.852001 +epoch: 2, batch: 31480, sum loss: 3911.569824, avg loss: 2.509025, ppl: 12.292940 +epoch: 2, batch: 31481, sum loss: 4416.483887, avg loss: 2.600992, ppl: 13.477097 +epoch: 2, batch: 31482, sum loss: 4283.586914, avg loss: 2.621534, ppl: 13.756815 +epoch: 2, batch: 31483, sum loss: 4636.289062, avg loss: 2.837386, ppl: 17.071089 +epoch: 2, batch: 31484, sum loss: 4367.763672, avg loss: 2.507327, ppl: 12.272084 +epoch: 2, batch: 31485, sum loss: 4311.379883, avg loss: 2.427579, ppl: 11.331414 +epoch: 2, batch: 31486, sum loss: 3570.190918, avg loss: 2.375376, ppl: 10.755061 +epoch: 2, batch: 31487, sum loss: 3985.763184, avg loss: 2.486440, ppl: 12.018414 +epoch: 2, batch: 31488, sum loss: 4167.265625, avg loss: 2.495369, ppl: 12.126204 +epoch: 2, batch: 31489, sum loss: 3512.313477, avg loss: 2.368384, ppl: 10.680120 +epoch: 2, batch: 31490, sum loss: 3238.133545, avg loss: 2.126155, ppl: 8.382571 +epoch: 2, batch: 31491, sum loss: 4081.786133, avg loss: 2.431082, ppl: 11.371174 +epoch: 2, batch: 31492, sum loss: 3593.040039, avg loss: 2.292942, ppl: 9.904033 +epoch: 2, batch: 31493, sum loss: 3868.825439, avg loss: 2.327813, ppl: 10.255489 +epoch: 2, batch: 31494, sum loss: 3917.300781, avg loss: 2.232080, ppl: 9.319232 +epoch: 2, batch: 31495, sum loss: 3077.590820, avg loss: 2.059967, ppl: 7.845711 +epoch: 2, batch: 31496, sum loss: 3661.506836, avg loss: 2.345616, ppl: 10.439705 +epoch: 2, batch: 31497, sum loss: 5243.934082, avg loss: 2.923040, ppl: 18.597742 +epoch: 2, batch: 31498, sum loss: 3937.246094, avg loss: 2.360459, ppl: 10.595818 +epoch: 2, batch: 31499, sum loss: 4573.593750, avg loss: 2.484299, ppl: 11.992707 +epoch: 2, batch: 31500, sum loss: 3515.081543, avg loss: 2.086102, ppl: 8.053460 +epoch: 2, batch: 31501, sum loss: 3707.350342, avg loss: 2.413640, ppl: 11.174558 +epoch: 2, batch: 31502, sum loss: 4536.271973, avg loss: 2.764334, ppl: 15.868464 +epoch: 2, batch: 31503, sum loss: 4360.490723, avg loss: 2.529287, ppl: 12.544559 +epoch: 2, batch: 31504, sum loss: 3845.176025, avg loss: 2.350352, ppl: 10.489264 +epoch: 2, batch: 31505, sum loss: 3573.954590, avg loss: 2.351286, ppl: 10.499063 +epoch: 2, batch: 31506, sum loss: 4298.411621, avg loss: 2.418915, ppl: 11.233662 +epoch: 2, batch: 31507, sum loss: 4647.875488, avg loss: 2.567887, ppl: 13.038246 +epoch: 2, batch: 31508, sum loss: 3567.674072, avg loss: 2.286971, ppl: 9.845068 +epoch: 2, batch: 31509, sum loss: 4884.932617, avg loss: 2.735125, ppl: 15.411667 +epoch: 2, batch: 31510, sum loss: 3664.464600, avg loss: 2.425192, ppl: 11.304396 +epoch: 2, batch: 31511, sum loss: 4602.011230, avg loss: 2.470215, ppl: 11.824993 +epoch: 2, batch: 31512, sum loss: 4567.112305, avg loss: 2.499788, ppl: 12.179909 +epoch: 2, batch: 31513, sum loss: 4642.561035, avg loss: 2.517658, ppl: 12.399520 +epoch: 2, batch: 31514, sum loss: 4921.711914, avg loss: 2.548789, ppl: 12.791607 +epoch: 2, batch: 31515, sum loss: 4380.421387, avg loss: 2.608947, ppl: 13.584733 +epoch: 2, batch: 31516, sum loss: 3738.502930, avg loss: 2.363150, ppl: 10.624362 +epoch: 2, batch: 31517, sum loss: 4050.785645, avg loss: 2.565412, ppl: 13.006017 +epoch: 2, batch: 31518, sum loss: 3568.328857, avg loss: 2.388440, ppl: 10.896479 +epoch: 2, batch: 31519, sum loss: 4675.100586, avg loss: 2.504071, ppl: 12.232190 +epoch: 2, batch: 31520, sum loss: 3322.618896, avg loss: 2.081841, ppl: 8.019222 +epoch: 2, batch: 31521, sum loss: 4676.531250, avg loss: 2.615510, ppl: 13.674185 +epoch: 2, batch: 31522, sum loss: 4296.249023, avg loss: 2.361874, ppl: 10.610819 +epoch: 2, batch: 31523, sum loss: 4673.705078, avg loss: 2.670689, ppl: 14.449917 +epoch: 2, batch: 31524, sum loss: 3964.325684, avg loss: 2.549406, ppl: 12.799497 +epoch: 2, batch: 31525, sum loss: 4201.291992, avg loss: 2.502259, ppl: 12.210040 +epoch: 2, batch: 31526, sum loss: 3820.537842, avg loss: 2.424199, ppl: 11.293181 +epoch: 2, batch: 31527, sum loss: 3997.215820, avg loss: 2.384974, ppl: 10.858778 +epoch: 2, batch: 31528, sum loss: 4512.388672, avg loss: 2.634202, ppl: 13.932194 +epoch: 2, batch: 31529, sum loss: 3748.730957, avg loss: 2.299835, ppl: 9.972536 +epoch: 2, batch: 31530, sum loss: 3720.763184, avg loss: 2.324025, ppl: 10.216711 +epoch: 2, batch: 31531, sum loss: 4119.494141, avg loss: 2.298825, ppl: 9.962470 +epoch: 2, batch: 31532, sum loss: 4418.416016, avg loss: 2.475303, ppl: 11.885310 +epoch: 2, batch: 31533, sum loss: 4253.379883, avg loss: 2.537816, ppl: 12.652013 +epoch: 2, batch: 31534, sum loss: 4512.807617, avg loss: 2.340668, ppl: 10.388173 +epoch: 2, batch: 31535, sum loss: 4301.165039, avg loss: 2.545068, ppl: 12.744095 +epoch: 2, batch: 31536, sum loss: 5111.112793, avg loss: 2.648245, ppl: 14.129222 +epoch: 2, batch: 31537, sum loss: 5147.083008, avg loss: 2.586474, ppl: 13.282853 +epoch: 2, batch: 31538, sum loss: 4153.538086, avg loss: 2.589488, ppl: 13.322946 +epoch: 2, batch: 31539, sum loss: 3770.506836, avg loss: 2.503657, ppl: 12.227122 +epoch: 2, batch: 31540, sum loss: 3838.099365, avg loss: 2.510202, ppl: 12.307421 +epoch: 2, batch: 31541, sum loss: 4179.523926, avg loss: 2.301500, ppl: 9.989154 +epoch: 2, batch: 31542, sum loss: 4116.384277, avg loss: 2.563128, ppl: 12.976350 +epoch: 2, batch: 31543, sum loss: 3692.365234, avg loss: 2.369939, ppl: 10.696743 +epoch: 2, batch: 31544, sum loss: 3963.725342, avg loss: 2.542480, ppl: 12.711149 +epoch: 2, batch: 31545, sum loss: 4199.753906, avg loss: 2.426201, ppl: 11.315810 +epoch: 2, batch: 31546, sum loss: 5084.198730, avg loss: 2.558731, ppl: 12.919414 +epoch: 2, batch: 31547, sum loss: 3628.718750, avg loss: 2.201892, ppl: 9.042109 +epoch: 2, batch: 31548, sum loss: 3524.555176, avg loss: 2.327976, ppl: 10.257157 +epoch: 2, batch: 31549, sum loss: 4189.633301, avg loss: 2.495315, ppl: 12.125547 +epoch: 2, batch: 31550, sum loss: 4456.387207, avg loss: 2.670094, ppl: 14.441330 +epoch: 2, batch: 31551, sum loss: 3960.000244, avg loss: 2.274555, ppl: 9.723591 +epoch: 2, batch: 31552, sum loss: 4082.143066, avg loss: 2.431294, ppl: 11.373592 +epoch: 2, batch: 31553, sum loss: 3613.060547, avg loss: 2.316065, ppl: 10.135708 +epoch: 2, batch: 31554, sum loss: 4093.275391, avg loss: 2.427803, ppl: 11.333952 +epoch: 2, batch: 31555, sum loss: 5393.894531, avg loss: 2.796213, ppl: 16.382483 +epoch: 2, batch: 31556, sum loss: 3512.195068, avg loss: 2.303079, ppl: 10.004937 +epoch: 2, batch: 31557, sum loss: 4918.620117, avg loss: 2.605201, ppl: 13.533949 +epoch: 2, batch: 31558, sum loss: 3815.188477, avg loss: 2.387477, ppl: 10.885996 +epoch: 2, batch: 31559, sum loss: 4826.614746, avg loss: 2.799661, ppl: 16.439068 +epoch: 2, batch: 31560, sum loss: 4089.024902, avg loss: 2.568483, ppl: 13.046017 +epoch: 2, batch: 31561, sum loss: 4610.030273, avg loss: 2.700662, ppl: 14.889587 +epoch: 2, batch: 31562, sum loss: 4596.708008, avg loss: 2.459448, ppl: 11.698351 +epoch: 2, batch: 31563, sum loss: 4051.635254, avg loss: 2.365228, ppl: 10.646462 +epoch: 2, batch: 31564, sum loss: 4078.726074, avg loss: 2.520844, ppl: 12.439094 +epoch: 2, batch: 31565, sum loss: 4298.261719, avg loss: 2.414754, ppl: 11.187017 +epoch: 2, batch: 31566, sum loss: 4355.812500, avg loss: 2.641487, ppl: 14.034061 +epoch: 2, batch: 31567, sum loss: 4124.884277, avg loss: 2.266420, ppl: 9.644812 +epoch: 2, batch: 31568, sum loss: 4233.339355, avg loss: 2.389018, ppl: 10.902781 +epoch: 2, batch: 31569, sum loss: 4028.575928, avg loss: 2.395111, ppl: 10.969411 +epoch: 2, batch: 31570, sum loss: 3976.831055, avg loss: 2.453320, ppl: 11.626879 +epoch: 2, batch: 31571, sum loss: 3473.102539, avg loss: 2.387012, ppl: 10.880933 +epoch: 2, batch: 31572, sum loss: 4259.813477, avg loss: 2.559984, ppl: 12.935613 +epoch: 2, batch: 31573, sum loss: 3860.877441, avg loss: 2.341345, ppl: 10.395205 +epoch: 2, batch: 31574, sum loss: 3821.483398, avg loss: 2.561316, ppl: 12.952852 +epoch: 2, batch: 31575, sum loss: 4663.623535, avg loss: 2.519516, ppl: 12.422580 +epoch: 2, batch: 31576, sum loss: 4576.305176, avg loss: 2.610556, ppl: 13.606619 +epoch: 2, batch: 31577, sum loss: 3614.378174, avg loss: 2.059475, ppl: 7.841849 +epoch: 2, batch: 31578, sum loss: 3292.963135, avg loss: 2.162156, ppl: 8.689850 +epoch: 2, batch: 31579, sum loss: 4026.639893, avg loss: 2.559847, ppl: 12.933843 +epoch: 2, batch: 31580, sum loss: 4869.510742, avg loss: 2.488253, ppl: 12.040222 +epoch: 2, batch: 31581, sum loss: 3793.668945, avg loss: 2.184035, ppl: 8.882073 +epoch: 2, batch: 31582, sum loss: 4474.664551, avg loss: 2.502609, ppl: 12.214317 +epoch: 2, batch: 31583, sum loss: 3966.757324, avg loss: 2.528207, ppl: 12.531022 +epoch: 2, batch: 31584, sum loss: 4621.794922, avg loss: 2.476846, ppl: 11.903661 +epoch: 2, batch: 31585, sum loss: 3863.004395, avg loss: 2.342634, ppl: 10.408622 +epoch: 2, batch: 31586, sum loss: 3972.582031, avg loss: 2.388805, ppl: 10.900456 +epoch: 2, batch: 31587, sum loss: 4959.197754, avg loss: 2.467263, ppl: 11.790130 +epoch: 2, batch: 31588, sum loss: 3980.186523, avg loss: 2.557961, ppl: 12.909462 +epoch: 2, batch: 31589, sum loss: 3100.437500, avg loss: 2.069718, ppl: 7.922588 +epoch: 2, batch: 31590, sum loss: 4578.784668, avg loss: 2.550855, ppl: 12.818057 +epoch: 2, batch: 31591, sum loss: 3872.800781, avg loss: 2.474633, ppl: 11.877347 +epoch: 2, batch: 31592, sum loss: 3764.209473, avg loss: 2.499475, ppl: 12.176103 +epoch: 2, batch: 31593, sum loss: 3887.025146, avg loss: 2.467952, ppl: 11.798265 +epoch: 2, batch: 31594, sum loss: 4206.482910, avg loss: 2.582248, ppl: 13.226845 +epoch: 2, batch: 31595, sum loss: 3581.657715, avg loss: 2.254033, ppl: 9.526073 +epoch: 2, batch: 31596, sum loss: 3824.771240, avg loss: 2.339309, ppl: 10.374070 +epoch: 2, batch: 31597, sum loss: 4330.580078, avg loss: 2.453587, ppl: 11.629984 +epoch: 2, batch: 31598, sum loss: 4517.940430, avg loss: 2.522580, ppl: 12.460700 +epoch: 2, batch: 31599, sum loss: 4162.688965, avg loss: 2.629620, ppl: 13.868504 +epoch: 2, batch: 31600, sum loss: 3615.664551, avg loss: 2.167665, ppl: 8.737855 +epoch: 2, batch: 31601, sum loss: 3251.358398, avg loss: 2.089562, ppl: 8.081374 +epoch: 2, batch: 31602, sum loss: 4006.884766, avg loss: 2.493394, ppl: 12.102286 +epoch: 2, batch: 31603, sum loss: 4627.187500, avg loss: 2.535445, ppl: 12.622049 +epoch: 2, batch: 31604, sum loss: 4846.923340, avg loss: 2.942880, ppl: 18.970402 +epoch: 2, batch: 31605, sum loss: 3931.976562, avg loss: 2.476056, ppl: 11.894259 +epoch: 2, batch: 31606, sum loss: 3970.928223, avg loss: 2.358033, ppl: 10.570144 +epoch: 2, batch: 31607, sum loss: 4168.558105, avg loss: 2.439180, ppl: 11.463633 +epoch: 2, batch: 31608, sum loss: 3912.374023, avg loss: 2.599584, ppl: 13.458139 +epoch: 2, batch: 31609, sum loss: 3871.354736, avg loss: 2.425661, ppl: 11.309701 +epoch: 2, batch: 31610, sum loss: 4688.154297, avg loss: 2.722505, ppl: 15.218402 +epoch: 2, batch: 31611, sum loss: 4915.187988, avg loss: 2.831330, ppl: 16.968006 +epoch: 2, batch: 31612, sum loss: 2787.855957, avg loss: 2.151124, ppl: 8.594509 +epoch: 2, batch: 31613, sum loss: 5089.915039, avg loss: 2.604869, ppl: 13.529458 +epoch: 2, batch: 31614, sum loss: 4403.003418, avg loss: 2.458405, ppl: 11.686157 +epoch: 2, batch: 31615, sum loss: 4574.327148, avg loss: 2.574185, ppl: 13.120622 +epoch: 2, batch: 31616, sum loss: 4887.639160, avg loss: 2.540353, ppl: 12.684149 +epoch: 2, batch: 31617, sum loss: 3912.492188, avg loss: 2.130987, ppl: 8.423176 +epoch: 2, batch: 31618, sum loss: 4602.461426, avg loss: 2.576966, ppl: 13.157163 +epoch: 2, batch: 31619, sum loss: 3859.691162, avg loss: 2.467833, ppl: 11.796859 +epoch: 2, batch: 31620, sum loss: 3681.496582, avg loss: 2.438077, ppl: 11.451002 +epoch: 2, batch: 31621, sum loss: 4263.307617, avg loss: 2.256912, ppl: 9.553547 +epoch: 2, batch: 31622, sum loss: 4533.178223, avg loss: 2.543871, ppl: 12.728851 +epoch: 2, batch: 31623, sum loss: 3346.965088, avg loss: 2.353703, ppl: 10.524465 +epoch: 2, batch: 31624, sum loss: 4611.197754, avg loss: 2.647071, ppl: 14.112640 +epoch: 2, batch: 31625, sum loss: 4102.525879, avg loss: 2.439076, ppl: 11.462444 +epoch: 2, batch: 31626, sum loss: 3558.895508, avg loss: 2.504501, ppl: 12.237447 +epoch: 2, batch: 31627, sum loss: 2961.180664, avg loss: 2.085339, ppl: 8.047316 +epoch: 2, batch: 31628, sum loss: 4369.067871, avg loss: 2.558002, ppl: 12.910001 +epoch: 2, batch: 31629, sum loss: 4762.379883, avg loss: 2.672492, ppl: 14.475992 +epoch: 2, batch: 31630, sum loss: 4007.644043, avg loss: 2.379836, ppl: 10.803135 +epoch: 2, batch: 31631, sum loss: 4204.021484, avg loss: 2.409181, ppl: 11.124850 +epoch: 2, batch: 31632, sum loss: 4234.790039, avg loss: 2.397956, ppl: 11.000667 +epoch: 2, batch: 31633, sum loss: 3695.213623, avg loss: 2.216685, ppl: 9.176857 +epoch: 2, batch: 31634, sum loss: 4183.319336, avg loss: 2.574350, ppl: 13.122789 +epoch: 2, batch: 31635, sum loss: 4367.819824, avg loss: 2.521836, ppl: 12.451434 +epoch: 2, batch: 31636, sum loss: 3938.709473, avg loss: 2.508732, ppl: 12.289338 +epoch: 2, batch: 31637, sum loss: 4655.660645, avg loss: 2.429886, ppl: 11.357583 +epoch: 2, batch: 31638, sum loss: 4153.147461, avg loss: 2.655465, ppl: 14.231604 +epoch: 2, batch: 31639, sum loss: 5088.681641, avg loss: 2.594942, ppl: 13.395815 +epoch: 2, batch: 31640, sum loss: 3969.684570, avg loss: 2.128517, ppl: 8.402398 +epoch: 2, batch: 31641, sum loss: 4889.457031, avg loss: 2.470670, ppl: 11.830376 +epoch: 2, batch: 31642, sum loss: 4058.531738, avg loss: 2.423004, ppl: 11.279695 +epoch: 2, batch: 31643, sum loss: 3751.334961, avg loss: 2.471235, ppl: 11.837060 +epoch: 2, batch: 31644, sum loss: 2977.429932, avg loss: 2.060505, ppl: 7.849934 +epoch: 2, batch: 31645, sum loss: 4524.337891, avg loss: 2.597209, ppl: 13.426209 +epoch: 2, batch: 31646, sum loss: 3953.104980, avg loss: 2.319897, ppl: 10.174630 +epoch: 2, batch: 31647, sum loss: 3620.516113, avg loss: 2.307531, ppl: 10.049581 +epoch: 2, batch: 31648, sum loss: 3487.748291, avg loss: 2.408666, ppl: 11.119120 +epoch: 2, batch: 31649, sum loss: 5024.019531, avg loss: 2.745366, ppl: 15.570310 +epoch: 2, batch: 31650, sum loss: 3727.648193, avg loss: 2.442758, ppl: 11.504725 +epoch: 2, batch: 31651, sum loss: 3739.641846, avg loss: 2.254154, ppl: 9.527232 +epoch: 2, batch: 31652, sum loss: 3272.289795, avg loss: 2.126244, ppl: 8.383323 +epoch: 2, batch: 31653, sum loss: 4639.587402, avg loss: 2.694302, ppl: 14.795182 +epoch: 2, batch: 31654, sum loss: 4264.380371, avg loss: 2.582908, ppl: 13.235567 +epoch: 2, batch: 31655, sum loss: 2951.705811, avg loss: 2.046953, ppl: 7.744266 +epoch: 2, batch: 31656, sum loss: 4763.068359, avg loss: 2.533547, ppl: 12.598111 +epoch: 2, batch: 31657, sum loss: 4544.814453, avg loss: 2.554702, ppl: 12.867462 +epoch: 2, batch: 31658, sum loss: 5145.115723, avg loss: 2.953568, ppl: 19.174246 +epoch: 2, batch: 31659, sum loss: 3635.494873, avg loss: 2.446497, ppl: 11.547826 +epoch: 2, batch: 31660, sum loss: 3369.602295, avg loss: 2.212477, ppl: 9.138324 +epoch: 2, batch: 31661, sum loss: 4846.598633, avg loss: 2.517714, ppl: 12.400211 +epoch: 2, batch: 31662, sum loss: 4716.223633, avg loss: 2.764492, ppl: 15.870976 +epoch: 2, batch: 31663, sum loss: 3010.734619, avg loss: 2.196014, ppl: 8.989109 +epoch: 2, batch: 31664, sum loss: 3878.162109, avg loss: 2.281272, ppl: 9.789124 +epoch: 2, batch: 31665, sum loss: 4324.706055, avg loss: 2.598982, ppl: 13.450040 +epoch: 2, batch: 31666, sum loss: 3817.161377, avg loss: 2.309232, ppl: 10.066695 +epoch: 2, batch: 31667, sum loss: 4119.558105, avg loss: 2.455040, ppl: 11.646905 +epoch: 2, batch: 31668, sum loss: 4292.858887, avg loss: 2.676346, ppl: 14.531898 +epoch: 2, batch: 31669, sum loss: 3992.271240, avg loss: 2.413707, ppl: 11.175311 +epoch: 2, batch: 31670, sum loss: 3567.963135, avg loss: 2.333527, ppl: 10.314260 +epoch: 2, batch: 31671, sum loss: 3719.409180, avg loss: 2.413634, ppl: 11.174491 +epoch: 2, batch: 31672, sum loss: 3841.929688, avg loss: 2.375962, ppl: 10.761355 +epoch: 2, batch: 31673, sum loss: 4256.904297, avg loss: 2.493793, ppl: 12.107109 +epoch: 2, batch: 31674, sum loss: 4783.413086, avg loss: 2.522897, ppl: 12.464658 +epoch: 2, batch: 31675, sum loss: 4701.519531, avg loss: 2.644274, ppl: 14.073228 +epoch: 2, batch: 31676, sum loss: 3958.535889, avg loss: 2.300137, ppl: 9.975550 +epoch: 2, batch: 31677, sum loss: 3551.611084, avg loss: 2.322833, ppl: 10.204541 +epoch: 2, batch: 31678, sum loss: 4211.583984, avg loss: 2.402501, ppl: 11.050778 +epoch: 2, batch: 31679, sum loss: 4119.127930, avg loss: 2.423017, ppl: 11.279834 +epoch: 2, batch: 31680, sum loss: 3751.970215, avg loss: 2.222731, ppl: 9.232513 +epoch: 2, batch: 31681, sum loss: 4626.555664, avg loss: 2.680507, ppl: 14.592492 +epoch: 2, batch: 31682, sum loss: 3996.414795, avg loss: 2.680359, ppl: 14.590328 +epoch: 2, batch: 31683, sum loss: 3733.952148, avg loss: 2.453320, ppl: 11.626879 +epoch: 2, batch: 31684, sum loss: 4476.896484, avg loss: 2.404348, ppl: 11.071214 +epoch: 2, batch: 31685, sum loss: 4086.259766, avg loss: 2.344383, ppl: 10.426837 +epoch: 2, batch: 31686, sum loss: 4203.759766, avg loss: 2.383084, ppl: 10.838275 +epoch: 2, batch: 31687, sum loss: 4757.192383, avg loss: 2.637025, ppl: 13.971571 +epoch: 2, batch: 31688, sum loss: 4632.114746, avg loss: 2.387688, ppl: 10.888293 +epoch: 2, batch: 31689, sum loss: 4280.909180, avg loss: 2.577308, ppl: 13.161665 +epoch: 2, batch: 31690, sum loss: 4874.847168, avg loss: 2.793609, ppl: 16.339878 +epoch: 2, batch: 31691, sum loss: 4141.107910, avg loss: 2.413233, ppl: 11.170016 +epoch: 2, batch: 31692, sum loss: 3803.585938, avg loss: 2.465059, ppl: 11.764176 +epoch: 2, batch: 31693, sum loss: 3632.794434, avg loss: 2.279043, ppl: 9.767328 +epoch: 2, batch: 31694, sum loss: 4218.771484, avg loss: 2.588203, ppl: 13.305845 +epoch: 2, batch: 31695, sum loss: 3332.218506, avg loss: 2.051859, ppl: 7.782352 +epoch: 2, batch: 31696, sum loss: 4128.811523, avg loss: 2.545506, ppl: 12.749684 +epoch: 2, batch: 31697, sum loss: 3973.682617, avg loss: 2.686736, ppl: 14.683668 +epoch: 2, batch: 31698, sum loss: 4941.255371, avg loss: 2.714976, ppl: 15.104241 +epoch: 2, batch: 31699, sum loss: 4157.617188, avg loss: 2.556960, ppl: 12.896553 +epoch: 2, batch: 31700, sum loss: 4308.035645, avg loss: 2.520793, ppl: 12.438459 +epoch: 2, batch: 31701, sum loss: 4804.956543, avg loss: 2.694872, ppl: 14.803622 +epoch: 2, batch: 31702, sum loss: 3661.325439, avg loss: 2.332054, ppl: 10.299078 +epoch: 2, batch: 31703, sum loss: 3800.195312, avg loss: 2.213276, ppl: 9.145630 +epoch: 2, batch: 31704, sum loss: 4632.763184, avg loss: 2.645782, ppl: 14.094456 +epoch: 2, batch: 31705, sum loss: 4167.049316, avg loss: 2.403142, ppl: 11.057871 +epoch: 2, batch: 31706, sum loss: 3875.890869, avg loss: 2.492534, ppl: 12.091883 +epoch: 2, batch: 31707, sum loss: 5359.539062, avg loss: 2.571756, ppl: 13.088787 +epoch: 2, batch: 31708, sum loss: 3606.514648, avg loss: 2.418856, ppl: 11.233006 +epoch: 2, batch: 31709, sum loss: 4941.881836, avg loss: 2.760828, ppl: 15.812927 +epoch: 2, batch: 31710, sum loss: 2876.237793, avg loss: 2.005745, ppl: 7.431626 +epoch: 2, batch: 31711, sum loss: 4014.309570, avg loss: 2.727113, ppl: 15.288677 +epoch: 2, batch: 31712, sum loss: 5571.520020, avg loss: 2.643036, ppl: 14.055814 +epoch: 2, batch: 31713, sum loss: 3372.166992, avg loss: 2.193993, ppl: 8.970962 +epoch: 2, batch: 31714, sum loss: 3913.423828, avg loss: 2.552788, ppl: 12.842860 +epoch: 2, batch: 31715, sum loss: 4469.867188, avg loss: 2.447901, ppl: 11.564049 +epoch: 2, batch: 31716, sum loss: 3865.031250, avg loss: 2.214918, ppl: 9.160655 +epoch: 2, batch: 31717, sum loss: 4055.057617, avg loss: 2.318501, ppl: 10.160427 +epoch: 2, batch: 31718, sum loss: 4205.321289, avg loss: 2.485415, ppl: 12.006096 +epoch: 2, batch: 31719, sum loss: 3610.355957, avg loss: 2.253655, ppl: 9.522481 +epoch: 2, batch: 31720, sum loss: 4666.305664, avg loss: 2.651310, ppl: 14.172592 +epoch: 2, batch: 31721, sum loss: 3627.091797, avg loss: 2.271191, ppl: 9.690935 +epoch: 2, batch: 31722, sum loss: 4588.659180, avg loss: 2.517092, ppl: 12.392512 +epoch: 2, batch: 31723, sum loss: 3706.695801, avg loss: 2.516426, ppl: 12.384260 +epoch: 2, batch: 31724, sum loss: 4826.979980, avg loss: 2.497144, ppl: 12.147756 +epoch: 2, batch: 31725, sum loss: 5094.866699, avg loss: 2.685750, ppl: 14.669192 +epoch: 2, batch: 31726, sum loss: 4025.499023, avg loss: 2.318836, ppl: 10.163834 +epoch: 2, batch: 31727, sum loss: 4778.541504, avg loss: 2.735284, ppl: 15.414122 +epoch: 2, batch: 31728, sum loss: 4129.615723, avg loss: 2.585858, ppl: 13.274678 +epoch: 2, batch: 31729, sum loss: 4159.688965, avg loss: 2.481915, ppl: 11.964151 +epoch: 2, batch: 31730, sum loss: 3860.119141, avg loss: 2.608189, ppl: 13.574440 +epoch: 2, batch: 31731, sum loss: 4302.935059, avg loss: 2.541604, ppl: 12.700023 +epoch: 2, batch: 31732, sum loss: 4973.538574, avg loss: 2.719267, ppl: 15.169194 +epoch: 2, batch: 31733, sum loss: 4406.022461, avg loss: 2.628892, ppl: 13.858402 +epoch: 2, batch: 31734, sum loss: 4436.987305, avg loss: 2.641064, ppl: 14.028121 +epoch: 2, batch: 31735, sum loss: 3537.041748, avg loss: 2.194195, ppl: 8.972771 +epoch: 2, batch: 31736, sum loss: 4370.316406, avg loss: 2.635896, ppl: 13.955817 +epoch: 2, batch: 31737, sum loss: 4523.841309, avg loss: 2.428256, ppl: 11.339089 +epoch: 2, batch: 31738, sum loss: 3842.488525, avg loss: 2.330193, ppl: 10.279925 +epoch: 2, batch: 31739, sum loss: 3611.659424, avg loss: 2.268630, ppl: 9.666152 +epoch: 2, batch: 31740, sum loss: 4439.154297, avg loss: 2.579404, ppl: 13.189274 +epoch: 2, batch: 31741, sum loss: 4300.283691, avg loss: 2.478550, ppl: 11.923959 +epoch: 2, batch: 31742, sum loss: 3516.873535, avg loss: 2.273351, ppl: 9.711888 +epoch: 2, batch: 31743, sum loss: 4201.060059, avg loss: 2.401978, ppl: 11.045005 +epoch: 2, batch: 31744, sum loss: 5064.145508, avg loss: 2.837056, ppl: 17.065458 +epoch: 2, batch: 31745, sum loss: 4377.332031, avg loss: 2.555360, ppl: 12.875936 +epoch: 2, batch: 31746, sum loss: 3599.538574, avg loss: 2.460382, ppl: 11.709284 +epoch: 2, batch: 31747, sum loss: 3886.078857, avg loss: 2.427282, ppl: 11.328054 +epoch: 2, batch: 31748, sum loss: 3728.093750, avg loss: 2.273228, ppl: 9.710695 +epoch: 2, batch: 31749, sum loss: 4508.461914, avg loss: 2.631910, ppl: 13.900295 +epoch: 2, batch: 31750, sum loss: 4229.080078, avg loss: 2.530868, ppl: 12.564408 +epoch: 2, batch: 31751, sum loss: 3954.283691, avg loss: 2.222756, ppl: 9.232745 +epoch: 2, batch: 31752, sum loss: 4361.867188, avg loss: 2.276549, ppl: 9.742995 +epoch: 2, batch: 31753, sum loss: 4262.534668, avg loss: 2.469603, ppl: 11.817752 +epoch: 2, batch: 31754, sum loss: 3676.849609, avg loss: 2.359981, ppl: 10.590746 +epoch: 2, batch: 31755, sum loss: 4905.361328, avg loss: 2.628811, ppl: 13.857283 +epoch: 2, batch: 31756, sum loss: 4181.309570, avg loss: 2.332019, ppl: 10.298712 +epoch: 2, batch: 31757, sum loss: 4469.087402, avg loss: 2.435470, ppl: 11.421186 +epoch: 2, batch: 31758, sum loss: 3685.258789, avg loss: 2.314861, ppl: 10.123516 +epoch: 2, batch: 31759, sum loss: 4064.013672, avg loss: 2.373840, ppl: 10.738545 +epoch: 2, batch: 31760, sum loss: 4369.947266, avg loss: 2.581186, ppl: 13.212797 +epoch: 2, batch: 31761, sum loss: 3519.263428, avg loss: 2.251608, ppl: 9.503006 +epoch: 2, batch: 31762, sum loss: 4951.553223, avg loss: 2.560265, ppl: 12.939250 +epoch: 2, batch: 31763, sum loss: 4878.667969, avg loss: 2.462730, ppl: 11.736806 +epoch: 2, batch: 31764, sum loss: 4148.432129, avg loss: 2.488561, ppl: 12.043937 +epoch: 2, batch: 31765, sum loss: 4213.158691, avg loss: 2.369606, ppl: 10.693173 +epoch: 2, batch: 31766, sum loss: 5015.553711, avg loss: 2.427664, ppl: 11.332379 +epoch: 2, batch: 31767, sum loss: 4884.596680, avg loss: 2.619087, ppl: 13.723185 +epoch: 2, batch: 31768, sum loss: 4028.988037, avg loss: 2.363043, ppl: 10.623227 +epoch: 2, batch: 31769, sum loss: 4478.432617, avg loss: 2.620499, ppl: 13.742578 +epoch: 2, batch: 31770, sum loss: 4256.808105, avg loss: 2.260652, ppl: 9.589340 +epoch: 2, batch: 31771, sum loss: 4377.230469, avg loss: 2.630547, ppl: 13.881365 +epoch: 2, batch: 31772, sum loss: 3786.045410, avg loss: 2.237616, ppl: 9.370960 +epoch: 2, batch: 31773, sum loss: 3590.237549, avg loss: 2.243898, ppl: 9.430021 +epoch: 2, batch: 31774, sum loss: 4405.570801, avg loss: 2.818663, ppl: 16.754440 +epoch: 2, batch: 31775, sum loss: 3883.637207, avg loss: 2.544979, ppl: 12.742955 +epoch: 2, batch: 31776, sum loss: 4170.995117, avg loss: 2.450643, ppl: 11.595804 +epoch: 2, batch: 31777, sum loss: 3483.548584, avg loss: 2.331692, ppl: 10.295351 +epoch: 2, batch: 31778, sum loss: 4890.879883, avg loss: 2.688774, ppl: 14.713628 +epoch: 2, batch: 31779, sum loss: 4024.475586, avg loss: 2.287934, ppl: 9.854555 +epoch: 2, batch: 31780, sum loss: 4242.839844, avg loss: 2.663427, ppl: 14.345371 +epoch: 2, batch: 31781, sum loss: 4107.728516, avg loss: 2.613059, ppl: 13.640712 +epoch: 2, batch: 31782, sum loss: 3522.863281, avg loss: 2.460100, ppl: 11.705984 +epoch: 2, batch: 31783, sum loss: 4517.043457, avg loss: 2.680738, ppl: 14.595860 +epoch: 2, batch: 31784, sum loss: 4294.958008, avg loss: 2.516086, ppl: 12.380041 +epoch: 2, batch: 31785, sum loss: 4205.707031, avg loss: 2.414298, ppl: 11.181916 +epoch: 2, batch: 31786, sum loss: 4060.879150, avg loss: 2.523853, ppl: 12.476574 +epoch: 2, batch: 31787, sum loss: 4167.262695, avg loss: 2.429891, ppl: 11.357640 +epoch: 2, batch: 31788, sum loss: 3895.512695, avg loss: 2.406123, ppl: 11.090875 +epoch: 2, batch: 31789, sum loss: 4851.533203, avg loss: 2.649663, ppl: 14.149273 +epoch: 2, batch: 31790, sum loss: 3585.714844, avg loss: 2.406520, ppl: 11.095284 +epoch: 2, batch: 31791, sum loss: 4659.626953, avg loss: 2.473263, ppl: 11.861084 +epoch: 2, batch: 31792, sum loss: 3270.824707, avg loss: 2.167545, ppl: 8.736805 +epoch: 2, batch: 31793, sum loss: 4431.847656, avg loss: 2.815659, ppl: 16.704182 +epoch: 2, batch: 31794, sum loss: 4246.435547, avg loss: 2.508231, ppl: 12.283187 +epoch: 2, batch: 31795, sum loss: 3488.110840, avg loss: 2.352064, ppl: 10.507236 +epoch: 2, batch: 31796, sum loss: 3771.483643, avg loss: 2.377985, ppl: 10.783151 +epoch: 2, batch: 31797, sum loss: 4308.479980, avg loss: 2.638383, ppl: 13.990564 +epoch: 2, batch: 31798, sum loss: 3807.844727, avg loss: 2.433128, ppl: 11.394464 +epoch: 2, batch: 31799, sum loss: 4019.435791, avg loss: 2.628800, ppl: 13.857137 +epoch: 2, batch: 31800, sum loss: 4557.992188, avg loss: 2.591241, ppl: 13.346319 +epoch: 2, batch: 31801, sum loss: 4467.907227, avg loss: 2.503029, ppl: 12.219455 +epoch: 2, batch: 31802, sum loss: 4772.799316, avg loss: 2.332747, ppl: 10.306209 +epoch: 2, batch: 31803, sum loss: 3539.693115, avg loss: 2.330278, ppl: 10.280804 +epoch: 2, batch: 31804, sum loss: 4598.798828, avg loss: 2.440976, ppl: 11.484245 +epoch: 2, batch: 31805, sum loss: 5229.537109, avg loss: 2.626588, ppl: 13.826515 +epoch: 2, batch: 31806, sum loss: 3154.082031, avg loss: 2.033580, ppl: 7.641390 +epoch: 2, batch: 31807, sum loss: 4950.414062, avg loss: 2.567642, ppl: 13.035054 +epoch: 2, batch: 31808, sum loss: 4026.369629, avg loss: 2.614526, ppl: 13.660737 +epoch: 2, batch: 31809, sum loss: 3726.080078, avg loss: 2.444934, ppl: 11.529793 +epoch: 2, batch: 31810, sum loss: 4569.957520, avg loss: 2.606935, ppl: 13.557437 +epoch: 2, batch: 31811, sum loss: 4319.236328, avg loss: 2.503905, ppl: 12.230164 +epoch: 2, batch: 31812, sum loss: 4516.959473, avg loss: 2.592973, ppl: 13.369463 +epoch: 2, batch: 31813, sum loss: 4041.409912, avg loss: 2.567605, ppl: 13.034566 +epoch: 2, batch: 31814, sum loss: 5197.910156, avg loss: 2.781118, ppl: 16.137054 +epoch: 2, batch: 31815, sum loss: 4307.067871, avg loss: 2.627863, ppl: 13.844155 +epoch: 2, batch: 31816, sum loss: 3965.614014, avg loss: 2.232891, ppl: 9.326790 +epoch: 2, batch: 31817, sum loss: 5092.431152, avg loss: 2.488969, ppl: 12.048851 +epoch: 2, batch: 31818, sum loss: 4183.395020, avg loss: 2.318955, ppl: 10.165049 +epoch: 2, batch: 31819, sum loss: 4584.127930, avg loss: 2.630022, ppl: 13.874072 +epoch: 2, batch: 31820, sum loss: 3431.447266, avg loss: 2.216697, ppl: 9.176971 +epoch: 2, batch: 31821, sum loss: 5205.936523, avg loss: 2.983345, ppl: 19.753775 +epoch: 2, batch: 31822, sum loss: 3766.299316, avg loss: 2.306368, ppl: 10.037902 +epoch: 2, batch: 31823, sum loss: 4400.756836, avg loss: 2.532081, ppl: 12.579659 +epoch: 2, batch: 31824, sum loss: 4173.520508, avg loss: 2.576247, ppl: 13.147708 +epoch: 2, batch: 31825, sum loss: 4047.317871, avg loss: 2.417753, ppl: 11.220615 +epoch: 2, batch: 31826, sum loss: 3096.979492, avg loss: 1.962598, ppl: 7.117794 +epoch: 2, batch: 31827, sum loss: 4582.868164, avg loss: 2.855370, ppl: 17.380859 +epoch: 2, batch: 31828, sum loss: 4144.706543, avg loss: 2.369758, ppl: 10.694802 +epoch: 2, batch: 31829, sum loss: 3696.629883, avg loss: 2.375726, ppl: 10.758823 +epoch: 2, batch: 31830, sum loss: 3390.800049, avg loss: 2.314539, ppl: 10.120258 +epoch: 2, batch: 31831, sum loss: 3283.347168, avg loss: 2.158677, ppl: 8.659670 +epoch: 2, batch: 31832, sum loss: 4233.830078, avg loss: 2.781754, ppl: 16.147327 +epoch: 2, batch: 31833, sum loss: 4921.666016, avg loss: 2.547446, ppl: 12.774440 +epoch: 2, batch: 31834, sum loss: 4538.119629, avg loss: 2.482560, ppl: 11.971872 +epoch: 2, batch: 31835, sum loss: 4641.586914, avg loss: 2.525347, ppl: 12.495224 +epoch: 2, batch: 31836, sum loss: 3881.398193, avg loss: 2.098053, ppl: 8.150288 +epoch: 2, batch: 31837, sum loss: 4557.354004, avg loss: 2.550282, ppl: 12.810716 +epoch: 2, batch: 31838, sum loss: 4027.348633, avg loss: 2.391537, ppl: 10.930283 +epoch: 2, batch: 31839, sum loss: 4320.774902, avg loss: 2.503346, ppl: 12.223322 +epoch: 2, batch: 31840, sum loss: 3071.645508, avg loss: 2.214597, ppl: 9.157713 +epoch: 2, batch: 31841, sum loss: 3133.542725, avg loss: 2.255970, ppl: 9.544549 +epoch: 2, batch: 31842, sum loss: 4459.180176, avg loss: 2.545194, ppl: 12.745706 +epoch: 2, batch: 31843, sum loss: 4228.645020, avg loss: 2.441481, ppl: 11.490044 +epoch: 2, batch: 31844, sum loss: 3952.403564, avg loss: 2.453385, ppl: 11.627644 +epoch: 2, batch: 31845, sum loss: 3896.988770, avg loss: 2.292346, ppl: 9.898136 +epoch: 2, batch: 31846, sum loss: 5037.144531, avg loss: 2.389537, ppl: 10.908445 +epoch: 2, batch: 31847, sum loss: 2828.890869, avg loss: 2.133402, ppl: 8.443542 +epoch: 2, batch: 31848, sum loss: 3448.588867, avg loss: 2.245175, ppl: 9.442069 +epoch: 2, batch: 31849, sum loss: 4563.067871, avg loss: 2.442756, ppl: 11.504701 +epoch: 2, batch: 31850, sum loss: 4416.101562, avg loss: 2.526374, ppl: 12.508071 +epoch: 2, batch: 31851, sum loss: 4591.592285, avg loss: 2.761030, ppl: 15.816121 +epoch: 2, batch: 31852, sum loss: 4892.448242, avg loss: 2.550807, ppl: 12.817447 +epoch: 2, batch: 31853, sum loss: 4303.112793, avg loss: 2.331047, ppl: 10.288709 +epoch: 2, batch: 31854, sum loss: 4744.035645, avg loss: 2.707783, ppl: 14.995993 +epoch: 2, batch: 31855, sum loss: 4429.517090, avg loss: 2.678064, ppl: 14.556878 +epoch: 2, batch: 31856, sum loss: 5122.064941, avg loss: 2.667742, ppl: 14.407404 +epoch: 2, batch: 31857, sum loss: 4187.816895, avg loss: 2.384862, ppl: 10.857561 +epoch: 2, batch: 31858, sum loss: 4791.236328, avg loss: 2.519052, ppl: 12.416818 +epoch: 2, batch: 31859, sum loss: 3809.972900, avg loss: 2.302099, ppl: 9.995135 +epoch: 2, batch: 31860, sum loss: 4507.195312, avg loss: 2.556549, ppl: 12.891248 +epoch: 2, batch: 31861, sum loss: 3841.243408, avg loss: 2.497557, ppl: 12.152773 +epoch: 2, batch: 31862, sum loss: 3776.878418, avg loss: 2.327097, ppl: 10.248147 +epoch: 2, batch: 31863, sum loss: 5146.628418, avg loss: 2.648805, ppl: 14.137140 +epoch: 2, batch: 31864, sum loss: 3959.700195, avg loss: 2.354162, ppl: 10.529299 +epoch: 2, batch: 31865, sum loss: 4269.825684, avg loss: 2.479573, ppl: 11.936172 +epoch: 2, batch: 31866, sum loss: 4671.446777, avg loss: 2.664830, ppl: 14.365507 +epoch: 2, batch: 31867, sum loss: 3729.942383, avg loss: 2.377274, ppl: 10.775484 +epoch: 2, batch: 31868, sum loss: 5686.225586, avg loss: 2.685983, ppl: 14.672613 +epoch: 2, batch: 31869, sum loss: 4607.756836, avg loss: 2.741081, ppl: 15.503732 +epoch: 2, batch: 31870, sum loss: 3980.762451, avg loss: 2.427294, ppl: 11.328190 +epoch: 2, batch: 31871, sum loss: 4032.368164, avg loss: 2.649388, ppl: 14.145377 +epoch: 2, batch: 31872, sum loss: 3759.961182, avg loss: 2.416427, ppl: 11.205753 +epoch: 2, batch: 31873, sum loss: 4258.457520, avg loss: 2.440377, ppl: 11.477365 +epoch: 2, batch: 31874, sum loss: 3440.096191, avg loss: 2.202366, ppl: 9.046395 +epoch: 2, batch: 31875, sum loss: 4787.842773, avg loss: 2.457825, ppl: 11.679380 +epoch: 2, batch: 31876, sum loss: 3813.196777, avg loss: 2.319463, ppl: 10.170209 +epoch: 2, batch: 31877, sum loss: 3724.265625, avg loss: 2.296095, ppl: 9.935306 +epoch: 2, batch: 31878, sum loss: 4544.284668, avg loss: 2.608659, ppl: 13.580831 +epoch: 2, batch: 31879, sum loss: 4087.678711, avg loss: 2.295159, ppl: 9.926018 +epoch: 2, batch: 31880, sum loss: 4799.798828, avg loss: 2.535551, ppl: 12.623388 +epoch: 2, batch: 31881, sum loss: 4301.642578, avg loss: 2.578923, ppl: 13.182926 +epoch: 2, batch: 31882, sum loss: 4438.657227, avg loss: 2.534927, ppl: 12.615512 +epoch: 2, batch: 31883, sum loss: 4006.092285, avg loss: 2.345487, ppl: 10.438356 +epoch: 2, batch: 31884, sum loss: 4150.529297, avg loss: 2.323925, ppl: 10.215688 +epoch: 2, batch: 31885, sum loss: 3923.956055, avg loss: 2.469450, ppl: 11.815947 +epoch: 2, batch: 31886, sum loss: 4317.893066, avg loss: 2.565593, ppl: 13.008370 +epoch: 2, batch: 31887, sum loss: 2990.664062, avg loss: 2.347460, ppl: 10.458970 +epoch: 2, batch: 31888, sum loss: 4097.050293, avg loss: 2.376479, ppl: 10.766930 +epoch: 2, batch: 31889, sum loss: 4210.458984, avg loss: 2.309632, ppl: 10.070719 +epoch: 2, batch: 31890, sum loss: 4759.062012, avg loss: 2.562769, ppl: 12.971685 +epoch: 2, batch: 31891, sum loss: 4348.773438, avg loss: 2.613446, ppl: 13.645990 +epoch: 2, batch: 31892, sum loss: 3697.098877, avg loss: 2.309244, ppl: 10.066808 +epoch: 2, batch: 31893, sum loss: 4055.916504, avg loss: 2.522336, ppl: 12.457664 +epoch: 2, batch: 31894, sum loss: 3899.528809, avg loss: 2.370534, ppl: 10.703108 +epoch: 2, batch: 31895, sum loss: 4782.336914, avg loss: 2.567008, ppl: 13.026796 +epoch: 2, batch: 31896, sum loss: 4005.588379, avg loss: 2.339713, ppl: 10.378256 +epoch: 2, batch: 31897, sum loss: 4057.633789, avg loss: 2.289861, ppl: 9.873568 +epoch: 2, batch: 31898, sum loss: 4872.760742, avg loss: 2.656903, ppl: 14.252086 +epoch: 2, batch: 31899, sum loss: 3539.580322, avg loss: 2.266057, ppl: 9.641306 +epoch: 2, batch: 31900, sum loss: 3107.262451, avg loss: 2.049645, ppl: 7.765147 +epoch: 2, batch: 31901, sum loss: 5562.210938, avg loss: 2.674140, ppl: 14.499874 +epoch: 2, batch: 31902, sum loss: 4056.127930, avg loss: 2.385958, ppl: 10.869468 +epoch: 2, batch: 31903, sum loss: 3301.919678, avg loss: 2.142712, ppl: 8.522522 +epoch: 2, batch: 31904, sum loss: 4714.731934, avg loss: 2.796401, ppl: 16.385569 +epoch: 2, batch: 31905, sum loss: 3725.279297, avg loss: 2.285447, ppl: 9.830083 +epoch: 2, batch: 31906, sum loss: 3975.468262, avg loss: 2.458546, ppl: 11.687801 +epoch: 2, batch: 31907, sum loss: 4378.954102, avg loss: 2.455947, ppl: 11.657473 +epoch: 2, batch: 31908, sum loss: 4923.736816, avg loss: 2.711309, ppl: 15.048958 +epoch: 2, batch: 31909, sum loss: 4220.602539, avg loss: 2.459559, ppl: 11.699648 +epoch: 2, batch: 31910, sum loss: 4555.914551, avg loss: 2.565267, ppl: 13.004134 +epoch: 2, batch: 31911, sum loss: 3659.292480, avg loss: 2.396393, ppl: 10.983491 +epoch: 2, batch: 31912, sum loss: 4336.782715, avg loss: 2.652467, ppl: 14.188993 +epoch: 2, batch: 31913, sum loss: 4013.810303, avg loss: 2.419415, ppl: 11.239287 +epoch: 2, batch: 31914, sum loss: 3662.625000, avg loss: 2.197135, ppl: 8.999198 +epoch: 2, batch: 31915, sum loss: 3680.595947, avg loss: 2.426233, ppl: 11.316177 +epoch: 2, batch: 31916, sum loss: 4339.048340, avg loss: 2.390660, ppl: 10.920702 +epoch: 2, batch: 31917, sum loss: 4319.076172, avg loss: 2.646493, ppl: 14.104486 +epoch: 2, batch: 31918, sum loss: 5076.549805, avg loss: 2.618128, ppl: 13.710032 +epoch: 2, batch: 31919, sum loss: 3983.909180, avg loss: 2.283042, ppl: 9.806471 +epoch: 2, batch: 31920, sum loss: 4744.489746, avg loss: 2.777804, ppl: 16.083664 +epoch: 2, batch: 31921, sum loss: 3340.329590, avg loss: 2.280088, ppl: 9.777545 +epoch: 2, batch: 31922, sum loss: 4311.551758, avg loss: 2.622598, ppl: 13.771461 +epoch: 2, batch: 31923, sum loss: 5238.811523, avg loss: 2.592188, ppl: 13.358968 +epoch: 2, batch: 31924, sum loss: 3936.945068, avg loss: 2.413823, ppl: 11.176606 +epoch: 2, batch: 31925, sum loss: 4610.224121, avg loss: 2.750731, ppl: 15.654063 +epoch: 2, batch: 31926, sum loss: 3315.653320, avg loss: 2.450594, ppl: 11.595229 +epoch: 2, batch: 31927, sum loss: 5672.108398, avg loss: 2.821945, ppl: 16.809509 +epoch: 2, batch: 31928, sum loss: 4169.622070, avg loss: 2.431266, ppl: 11.373275 +epoch: 2, batch: 31929, sum loss: 3941.919678, avg loss: 2.396304, ppl: 10.982506 +epoch: 2, batch: 31930, sum loss: 3917.600586, avg loss: 2.482637, ppl: 11.972792 +epoch: 2, batch: 31931, sum loss: 4132.142578, avg loss: 2.286742, ppl: 9.842814 +epoch: 2, batch: 31932, sum loss: 4415.464844, avg loss: 2.826802, ppl: 16.891356 +epoch: 2, batch: 31933, sum loss: 4274.489258, avg loss: 2.530781, ppl: 12.563314 +epoch: 2, batch: 31934, sum loss: 5080.791992, avg loss: 2.392087, ppl: 10.936289 +epoch: 2, batch: 31935, sum loss: 4832.883789, avg loss: 2.753780, ppl: 15.701871 +epoch: 2, batch: 31936, sum loss: 3452.201416, avg loss: 2.340476, ppl: 10.386174 +epoch: 2, batch: 31937, sum loss: 4625.386230, avg loss: 2.478771, ppl: 11.926595 +epoch: 2, batch: 31938, sum loss: 4299.219727, avg loss: 2.508296, ppl: 12.283983 +epoch: 2, batch: 31939, sum loss: 3421.635742, avg loss: 2.214651, ppl: 9.158214 +epoch: 2, batch: 31940, sum loss: 4777.167480, avg loss: 2.806796, ppl: 16.556791 +epoch: 2, batch: 31941, sum loss: 3540.161865, avg loss: 2.342927, ppl: 10.411662 +epoch: 2, batch: 31942, sum loss: 4976.046875, avg loss: 2.606625, ppl: 13.553232 +epoch: 2, batch: 31943, sum loss: 4174.333008, avg loss: 2.537588, ppl: 12.649129 +epoch: 2, batch: 31944, sum loss: 3899.677246, avg loss: 2.315723, ppl: 10.132245 +epoch: 2, batch: 31945, sum loss: 3886.594971, avg loss: 2.530335, ppl: 12.557717 +epoch: 2, batch: 31946, sum loss: 4261.666504, avg loss: 2.719634, ppl: 15.174771 +epoch: 2, batch: 31947, sum loss: 4065.823242, avg loss: 2.414384, ppl: 11.182883 +epoch: 2, batch: 31948, sum loss: 4230.961426, avg loss: 2.467033, ppl: 11.787420 +epoch: 2, batch: 31949, sum loss: 3157.684082, avg loss: 2.191314, ppl: 8.946966 +epoch: 2, batch: 31950, sum loss: 4441.829102, avg loss: 2.593012, ppl: 13.369976 +epoch: 2, batch: 31951, sum loss: 4921.344727, avg loss: 2.650159, ppl: 14.156284 +epoch: 2, batch: 31952, sum loss: 3706.958984, avg loss: 2.319749, ppl: 10.173121 +epoch: 2, batch: 31953, sum loss: 4096.410156, avg loss: 2.351556, ppl: 10.501896 +epoch: 2, batch: 31954, sum loss: 3978.930908, avg loss: 2.545701, ppl: 12.752168 +epoch: 2, batch: 31955, sum loss: 3674.044922, avg loss: 2.015384, ppl: 7.503608 +epoch: 2, batch: 31956, sum loss: 3778.215332, avg loss: 2.394306, ppl: 10.960590 +epoch: 2, batch: 31957, sum loss: 4376.782227, avg loss: 2.463017, ppl: 11.740184 +epoch: 2, batch: 31958, sum loss: 4002.435547, avg loss: 2.336506, ppl: 10.345032 +epoch: 2, batch: 31959, sum loss: 3919.678223, avg loss: 2.298932, ppl: 9.963531 +epoch: 2, batch: 31960, sum loss: 4954.975586, avg loss: 2.559388, ppl: 12.927905 +epoch: 2, batch: 31961, sum loss: 3958.516602, avg loss: 2.532640, ppl: 12.586694 +epoch: 2, batch: 31962, sum loss: 4062.010254, avg loss: 2.476835, ppl: 11.903536 +epoch: 2, batch: 31963, sum loss: 4574.934082, avg loss: 2.565863, ppl: 13.011888 +epoch: 2, batch: 31964, sum loss: 4450.676270, avg loss: 2.568192, ppl: 13.042216 +epoch: 2, batch: 31965, sum loss: 3573.947021, avg loss: 2.363722, ppl: 10.630441 +epoch: 2, batch: 31966, sum loss: 4099.434082, avg loss: 2.525837, ppl: 12.501360 +epoch: 2, batch: 31967, sum loss: 4349.578613, avg loss: 2.474163, ppl: 11.871767 +epoch: 2, batch: 31968, sum loss: 4052.582520, avg loss: 2.531282, ppl: 12.569613 +epoch: 2, batch: 31969, sum loss: 4394.044922, avg loss: 2.428991, ppl: 11.347430 +epoch: 2, batch: 31970, sum loss: 4952.496582, avg loss: 2.481211, ppl: 11.955730 +epoch: 2, batch: 31971, sum loss: 4099.042480, avg loss: 2.516294, ppl: 12.382619 +epoch: 2, batch: 31972, sum loss: 3346.830322, avg loss: 2.360247, ppl: 10.593569 +epoch: 2, batch: 31973, sum loss: 4878.111816, avg loss: 2.826252, ppl: 16.882076 +epoch: 2, batch: 31974, sum loss: 4417.858398, avg loss: 2.455730, ppl: 11.654938 +epoch: 2, batch: 31975, sum loss: 4231.233398, avg loss: 2.445800, ppl: 11.539773 +epoch: 2, batch: 31976, sum loss: 3849.322266, avg loss: 2.427063, ppl: 11.325573 +epoch: 2, batch: 31977, sum loss: 3885.481689, avg loss: 2.463844, ppl: 11.749889 +epoch: 2, batch: 31978, sum loss: 3515.673096, avg loss: 2.329803, ppl: 10.275919 +epoch: 2, batch: 31979, sum loss: 4098.333008, avg loss: 2.543968, ppl: 12.730089 +epoch: 2, batch: 31980, sum loss: 4640.016113, avg loss: 2.731028, ppl: 15.348655 +epoch: 2, batch: 31981, sum loss: 4757.715820, avg loss: 2.498801, ppl: 12.167890 +epoch: 2, batch: 31982, sum loss: 3619.900391, avg loss: 2.296891, ppl: 9.943221 +epoch: 2, batch: 31983, sum loss: 4187.555664, avg loss: 2.351238, ppl: 10.498564 +epoch: 2, batch: 31984, sum loss: 3869.388184, avg loss: 2.438178, ppl: 11.452154 +epoch: 2, batch: 31985, sum loss: 4197.816406, avg loss: 2.503170, ppl: 12.221177 +epoch: 2, batch: 31986, sum loss: 3698.498047, avg loss: 2.417319, ppl: 11.215750 +epoch: 2, batch: 31987, sum loss: 4686.524902, avg loss: 2.687227, ppl: 14.690888 +epoch: 2, batch: 31988, sum loss: 5061.410645, avg loss: 2.640277, ppl: 14.017084 +epoch: 2, batch: 31989, sum loss: 4619.394531, avg loss: 2.641163, ppl: 14.029515 +epoch: 2, batch: 31990, sum loss: 4977.094238, avg loss: 2.622284, ppl: 13.767128 +epoch: 2, batch: 31991, sum loss: 3853.392578, avg loss: 2.248187, ppl: 9.470551 +epoch: 2, batch: 31992, sum loss: 5265.691895, avg loss: 2.635481, ppl: 13.950026 +epoch: 2, batch: 31993, sum loss: 3946.512207, avg loss: 2.329700, ppl: 10.274861 +epoch: 2, batch: 31994, sum loss: 3649.790039, avg loss: 2.168622, ppl: 8.746220 +epoch: 2, batch: 31995, sum loss: 3241.867188, avg loss: 2.392522, ppl: 10.941051 +epoch: 2, batch: 31996, sum loss: 4600.361328, avg loss: 2.818849, ppl: 16.757549 +epoch: 2, batch: 31997, sum loss: 3869.647461, avg loss: 2.493330, ppl: 12.101504 +epoch: 2, batch: 31998, sum loss: 3587.257324, avg loss: 2.420552, ppl: 11.252064 +epoch: 2, batch: 31999, sum loss: 3547.605225, avg loss: 2.424884, ppl: 11.300920 +epoch: 2, batch: 32000, sum loss: 3058.781250, avg loss: 2.037829, ppl: 7.673930 +epoch: 2, batch: 32001, sum loss: 3643.513672, avg loss: 2.397048, ppl: 10.990689 +epoch: 2, batch: 32002, sum loss: 4499.886719, avg loss: 2.584656, ppl: 13.258734 +epoch: 2, batch: 32003, sum loss: 3970.361816, avg loss: 2.268778, ppl: 9.667581 +epoch: 2, batch: 32004, sum loss: 3642.948730, avg loss: 2.364016, ppl: 10.633571 +epoch: 2, batch: 32005, sum loss: 3890.897461, avg loss: 2.433332, ppl: 11.396790 +epoch: 2, batch: 32006, sum loss: 4469.522461, avg loss: 2.362327, ppl: 10.615623 +epoch: 2, batch: 32007, sum loss: 4416.254883, avg loss: 2.427848, ppl: 11.334460 +epoch: 2, batch: 32008, sum loss: 3560.606934, avg loss: 2.394490, ppl: 10.962608 +epoch: 2, batch: 32009, sum loss: 3853.286133, avg loss: 2.711672, ppl: 15.054426 +epoch: 2, batch: 32010, sum loss: 4566.568359, avg loss: 2.627485, ppl: 13.838919 +epoch: 2, batch: 32011, sum loss: 4795.562012, avg loss: 2.712422, ppl: 15.065722 +epoch: 2, batch: 32012, sum loss: 4493.761719, avg loss: 2.526004, ppl: 12.503447 +epoch: 2, batch: 32013, sum loss: 3709.568848, avg loss: 2.370332, ppl: 10.700939 +epoch: 2, batch: 32014, sum loss: 4181.413574, avg loss: 2.349109, ppl: 10.476228 +epoch: 2, batch: 32015, sum loss: 3435.244141, avg loss: 2.287113, ppl: 9.846472 +epoch: 2, batch: 32016, sum loss: 5479.729004, avg loss: 2.621880, ppl: 13.761568 +epoch: 2, batch: 32017, sum loss: 4303.787109, avg loss: 2.584857, ppl: 13.261395 +epoch: 2, batch: 32018, sum loss: 4175.214355, avg loss: 2.489692, ppl: 12.057568 +epoch: 2, batch: 32019, sum loss: 3341.281738, avg loss: 2.361330, ppl: 10.605044 +epoch: 2, batch: 32020, sum loss: 3184.576660, avg loss: 1.898972, ppl: 6.679027 +epoch: 2, batch: 32021, sum loss: 3975.265381, avg loss: 2.308517, ppl: 10.059490 +epoch: 2, batch: 32022, sum loss: 3758.260254, avg loss: 2.418443, ppl: 11.228363 +epoch: 2, batch: 32023, sum loss: 4215.777344, avg loss: 2.500461, ppl: 12.188112 +epoch: 2, batch: 32024, sum loss: 4213.994629, avg loss: 2.281535, ppl: 9.791698 +epoch: 2, batch: 32025, sum loss: 4501.663574, avg loss: 2.498149, ppl: 12.159961 +epoch: 2, batch: 32026, sum loss: 3928.384033, avg loss: 2.392439, ppl: 10.940141 +epoch: 2, batch: 32027, sum loss: 4059.447266, avg loss: 2.524532, ppl: 12.485049 +epoch: 2, batch: 32028, sum loss: 3938.537354, avg loss: 2.264829, ppl: 9.629478 +epoch: 2, batch: 32029, sum loss: 4144.512695, avg loss: 2.209229, ppl: 9.108686 +epoch: 2, batch: 32030, sum loss: 4164.432617, avg loss: 2.487714, ppl: 12.033733 +epoch: 2, batch: 32031, sum loss: 4057.705322, avg loss: 2.577958, ppl: 13.170212 +epoch: 2, batch: 32032, sum loss: 4873.028809, avg loss: 2.478652, ppl: 11.925173 +epoch: 2, batch: 32033, sum loss: 4233.283203, avg loss: 2.280864, ppl: 9.785129 +epoch: 2, batch: 32034, sum loss: 3343.912842, avg loss: 2.213046, ppl: 9.143528 +epoch: 2, batch: 32035, sum loss: 4543.064453, avg loss: 2.490715, ppl: 12.069906 +epoch: 2, batch: 32036, sum loss: 4509.983398, avg loss: 2.684514, ppl: 14.651080 +epoch: 2, batch: 32037, sum loss: 4150.602539, avg loss: 2.351616, ppl: 10.502529 +epoch: 2, batch: 32038, sum loss: 3326.468994, avg loss: 2.447733, ppl: 11.562105 +epoch: 2, batch: 32039, sum loss: 3848.920654, avg loss: 2.364202, ppl: 10.635546 +epoch: 2, batch: 32040, sum loss: 3822.651367, avg loss: 2.442589, ppl: 11.502781 +epoch: 2, batch: 32041, sum loss: 4240.574219, avg loss: 2.293442, ppl: 9.908986 +epoch: 2, batch: 32042, sum loss: 4571.558594, avg loss: 2.515993, ppl: 12.378890 +epoch: 2, batch: 32043, sum loss: 4113.826172, avg loss: 2.735257, ppl: 15.413699 +epoch: 2, batch: 32044, sum loss: 3927.112305, avg loss: 2.228781, ppl: 9.288539 +epoch: 2, batch: 32045, sum loss: 4537.803711, avg loss: 2.569538, ppl: 13.059785 +epoch: 2, batch: 32046, sum loss: 3857.588623, avg loss: 2.485560, ppl: 12.007842 +epoch: 2, batch: 32047, sum loss: 3416.143555, avg loss: 2.099658, ppl: 8.163378 +epoch: 2, batch: 32048, sum loss: 4000.670898, avg loss: 2.382770, ppl: 10.834874 +epoch: 2, batch: 32049, sum loss: 4094.781494, avg loss: 2.404452, ppl: 11.072359 +epoch: 2, batch: 32050, sum loss: 4489.586426, avg loss: 2.586167, ppl: 13.278781 +epoch: 2, batch: 32051, sum loss: 4403.305176, avg loss: 2.590180, ppl: 13.332167 +epoch: 2, batch: 32052, sum loss: 4846.266113, avg loss: 2.465039, ppl: 11.763938 +epoch: 2, batch: 32053, sum loss: 3616.614502, avg loss: 2.609390, ppl: 13.590758 +epoch: 2, batch: 32054, sum loss: 4605.806641, avg loss: 2.685601, ppl: 14.667021 +epoch: 2, batch: 32055, sum loss: 4145.536133, avg loss: 2.281528, ppl: 9.791631 +epoch: 2, batch: 32056, sum loss: 4167.524902, avg loss: 2.530373, ppl: 12.558193 +epoch: 2, batch: 32057, sum loss: 3687.958984, avg loss: 2.606331, ppl: 13.549253 +epoch: 2, batch: 32058, sum loss: 3860.077637, avg loss: 2.471241, ppl: 11.837122 +epoch: 2, batch: 32059, sum loss: 3942.113281, avg loss: 2.541659, ppl: 12.700726 +epoch: 2, batch: 32060, sum loss: 5069.055664, avg loss: 2.888351, ppl: 17.963657 +epoch: 2, batch: 32061, sum loss: 4029.736328, avg loss: 2.528065, ppl: 12.529244 +epoch: 2, batch: 32062, sum loss: 4123.232422, avg loss: 2.263026, ppl: 9.612127 +epoch: 2, batch: 32063, sum loss: 4137.815430, avg loss: 2.395956, ppl: 10.978686 +epoch: 2, batch: 32064, sum loss: 4815.109375, avg loss: 2.634086, ppl: 13.930576 +epoch: 2, batch: 32065, sum loss: 4580.372070, avg loss: 2.539009, ppl: 12.667109 +epoch: 2, batch: 32066, sum loss: 4766.208984, avg loss: 2.556979, ppl: 12.896799 +epoch: 2, batch: 32067, sum loss: 4044.692871, avg loss: 2.327211, ppl: 10.249318 +epoch: 2, batch: 32068, sum loss: 4063.054688, avg loss: 2.534657, ppl: 12.612098 +epoch: 2, batch: 32069, sum loss: 4335.415527, avg loss: 2.544258, ppl: 12.733774 +epoch: 2, batch: 32070, sum loss: 4234.880859, avg loss: 2.466442, ppl: 11.780456 +epoch: 2, batch: 32071, sum loss: 4732.811035, avg loss: 2.590482, ppl: 13.336202 +epoch: 2, batch: 32072, sum loss: 4101.807129, avg loss: 2.440099, ppl: 11.474182 +epoch: 2, batch: 32073, sum loss: 4066.805176, avg loss: 2.305445, ppl: 10.028642 +epoch: 2, batch: 32074, sum loss: 3654.409668, avg loss: 2.408971, ppl: 11.122514 +epoch: 2, batch: 32075, sum loss: 4101.594727, avg loss: 2.383262, ppl: 10.840210 +epoch: 2, batch: 32076, sum loss: 3509.294922, avg loss: 2.274333, ppl: 9.721428 +epoch: 2, batch: 32077, sum loss: 3772.164795, avg loss: 2.314212, ppl: 10.116943 +epoch: 2, batch: 32078, sum loss: 3589.504883, avg loss: 2.629674, ppl: 13.869247 +epoch: 2, batch: 32079, sum loss: 3654.081299, avg loss: 2.371240, ppl: 10.710670 +epoch: 2, batch: 32080, sum loss: 3455.442139, avg loss: 2.339501, ppl: 10.376054 +epoch: 2, batch: 32081, sum loss: 4225.714844, avg loss: 2.427177, ppl: 11.326860 +epoch: 2, batch: 32082, sum loss: 4382.687988, avg loss: 2.295803, ppl: 9.932409 +epoch: 2, batch: 32083, sum loss: 3746.618652, avg loss: 2.119128, ppl: 8.323877 +epoch: 2, batch: 32084, sum loss: 4334.637695, avg loss: 2.440675, ppl: 11.480783 +epoch: 2, batch: 32085, sum loss: 4933.241211, avg loss: 2.713554, ppl: 15.082783 +epoch: 2, batch: 32086, sum loss: 3249.651367, avg loss: 2.221225, ppl: 9.218613 +epoch: 2, batch: 32087, sum loss: 3262.263184, avg loss: 2.169058, ppl: 8.750036 +epoch: 2, batch: 32088, sum loss: 4157.673340, avg loss: 2.371747, ppl: 10.716103 +epoch: 2, batch: 32089, sum loss: 3890.581299, avg loss: 2.342313, ppl: 10.405272 +epoch: 2, batch: 32090, sum loss: 3807.184570, avg loss: 2.184271, ppl: 8.884171 +epoch: 2, batch: 32091, sum loss: 3775.807861, avg loss: 2.472697, ppl: 11.854372 +epoch: 2, batch: 32092, sum loss: 3148.908691, avg loss: 2.383731, ppl: 10.845293 +epoch: 2, batch: 32093, sum loss: 3842.068115, avg loss: 2.371647, ppl: 10.715027 +epoch: 2, batch: 32094, sum loss: 5168.075195, avg loss: 2.672221, ppl: 14.472075 +epoch: 2, batch: 32095, sum loss: 3867.177734, avg loss: 2.225074, ppl: 9.254164 +epoch: 2, batch: 32096, sum loss: 4997.141602, avg loss: 2.673698, ppl: 14.493466 +epoch: 2, batch: 32097, sum loss: 4035.610840, avg loss: 2.422335, ppl: 11.272154 +epoch: 2, batch: 32098, sum loss: 3895.965576, avg loss: 2.590403, ppl: 13.335139 +epoch: 2, batch: 32099, sum loss: 5473.459961, avg loss: 2.804027, ppl: 16.511000 +epoch: 2, batch: 32100, sum loss: 5158.489746, avg loss: 2.705029, ppl: 14.954747 +epoch: 2, batch: 32101, sum loss: 4788.128906, avg loss: 2.452935, ppl: 11.622408 +epoch: 2, batch: 32102, sum loss: 4440.336426, avg loss: 2.633651, ppl: 13.924522 +epoch: 2, batch: 32103, sum loss: 3517.763672, avg loss: 2.174143, ppl: 8.794645 +epoch: 2, batch: 32104, sum loss: 3382.884766, avg loss: 2.449591, ppl: 11.583605 +epoch: 2, batch: 32105, sum loss: 4680.288086, avg loss: 2.610311, ppl: 13.603285 +epoch: 2, batch: 32106, sum loss: 3891.663818, avg loss: 2.563679, ppl: 12.983489 +epoch: 2, batch: 32107, sum loss: 3996.418213, avg loss: 2.519810, ppl: 12.426232 +epoch: 2, batch: 32108, sum loss: 4854.962402, avg loss: 2.654436, ppl: 14.216960 +epoch: 2, batch: 32109, sum loss: 3494.015381, avg loss: 2.155469, ppl: 8.631937 +epoch: 2, batch: 32110, sum loss: 5427.996094, avg loss: 2.742797, ppl: 15.530369 +epoch: 2, batch: 32111, sum loss: 4015.647461, avg loss: 2.357984, ppl: 10.569625 +epoch: 2, batch: 32112, sum loss: 3986.014648, avg loss: 2.733892, ppl: 15.392678 +epoch: 2, batch: 32113, sum loss: 4045.306396, avg loss: 2.568448, ppl: 13.045566 +epoch: 2, batch: 32114, sum loss: 4365.781250, avg loss: 2.474933, ppl: 11.880907 +epoch: 2, batch: 32115, sum loss: 4573.030273, avg loss: 2.432463, ppl: 11.386892 +epoch: 2, batch: 32116, sum loss: 4349.248535, avg loss: 2.651981, ppl: 14.182104 +epoch: 2, batch: 32117, sum loss: 3430.611816, avg loss: 2.103379, ppl: 8.193814 +epoch: 2, batch: 32118, sum loss: 4831.256836, avg loss: 2.504539, ppl: 12.237922 +epoch: 2, batch: 32119, sum loss: 3388.846680, avg loss: 2.269824, ppl: 9.677693 +epoch: 2, batch: 32120, sum loss: 4008.461670, avg loss: 2.469785, ppl: 11.819911 +epoch: 2, batch: 32121, sum loss: 3727.520996, avg loss: 2.331157, ppl: 10.289845 +epoch: 2, batch: 32122, sum loss: 5155.837891, avg loss: 2.634562, ppl: 13.937207 +epoch: 2, batch: 32123, sum loss: 4793.666992, avg loss: 2.742373, ppl: 15.523772 +epoch: 2, batch: 32124, sum loss: 4329.573242, avg loss: 2.396001, ppl: 10.979178 +epoch: 2, batch: 32125, sum loss: 3736.226074, avg loss: 2.346876, ppl: 10.452861 +epoch: 2, batch: 32126, sum loss: 4242.005859, avg loss: 2.686514, ppl: 14.680412 +epoch: 2, batch: 32127, sum loss: 3753.606934, avg loss: 2.230307, ppl: 9.302723 +epoch: 2, batch: 32128, sum loss: 3685.342773, avg loss: 2.124117, ppl: 8.365506 +epoch: 2, batch: 32129, sum loss: 3943.062500, avg loss: 2.417574, ppl: 11.218606 +epoch: 2, batch: 32130, sum loss: 4557.380859, avg loss: 2.580623, ppl: 13.205368 +epoch: 2, batch: 32131, sum loss: 4142.986328, avg loss: 2.422799, ppl: 11.277382 +epoch: 2, batch: 32132, sum loss: 4401.957031, avg loss: 2.532772, ppl: 12.588347 +epoch: 2, batch: 32133, sum loss: 5142.116211, avg loss: 2.597028, ppl: 13.423786 +epoch: 2, batch: 32134, sum loss: 3199.201416, avg loss: 2.517074, ppl: 12.392288 +epoch: 2, batch: 32135, sum loss: 4292.954590, avg loss: 2.600215, ppl: 13.466633 +epoch: 2, batch: 32136, sum loss: 4553.004395, avg loss: 2.489341, ppl: 12.053330 +epoch: 2, batch: 32137, sum loss: 4633.535645, avg loss: 2.643203, ppl: 14.058167 +epoch: 2, batch: 32138, sum loss: 4258.339355, avg loss: 2.583944, ppl: 13.249289 +epoch: 2, batch: 32139, sum loss: 5202.645996, avg loss: 2.705484, ppl: 14.961558 +epoch: 2, batch: 32140, sum loss: 3903.653076, avg loss: 2.578371, ppl: 13.175652 +epoch: 2, batch: 32141, sum loss: 4820.329590, avg loss: 2.461864, ppl: 11.726649 +epoch: 2, batch: 32142, sum loss: 3446.799561, avg loss: 2.239636, ppl: 9.389913 +epoch: 2, batch: 32143, sum loss: 4020.657715, avg loss: 2.593973, ppl: 13.382835 +epoch: 2, batch: 32144, sum loss: 3714.614502, avg loss: 2.372040, ppl: 10.719235 +epoch: 2, batch: 32145, sum loss: 4166.883301, avg loss: 2.486207, ppl: 12.015617 +epoch: 2, batch: 32146, sum loss: 4934.982910, avg loss: 2.537266, ppl: 12.645059 +epoch: 2, batch: 32147, sum loss: 4921.064941, avg loss: 2.823330, ppl: 16.832817 +epoch: 2, batch: 32148, sum loss: 4255.762207, avg loss: 2.369578, ppl: 10.692881 +epoch: 2, batch: 32149, sum loss: 4468.286621, avg loss: 2.525883, ppl: 12.501929 +epoch: 2, batch: 32150, sum loss: 4517.867188, avg loss: 2.651330, ppl: 14.172883 +epoch: 2, batch: 32151, sum loss: 4562.682617, avg loss: 2.623739, ppl: 13.787181 +epoch: 2, batch: 32152, sum loss: 4193.029785, avg loss: 2.432152, ppl: 11.383353 +epoch: 2, batch: 32153, sum loss: 5010.493652, avg loss: 2.623295, ppl: 13.781058 +epoch: 2, batch: 32154, sum loss: 3462.586670, avg loss: 2.257227, ppl: 9.556554 +epoch: 2, batch: 32155, sum loss: 3788.871826, avg loss: 2.274233, ppl: 9.720457 +epoch: 2, batch: 32156, sum loss: 3949.967773, avg loss: 2.385246, ppl: 10.861737 +epoch: 2, batch: 32157, sum loss: 3579.394531, avg loss: 2.334895, ppl: 10.328380 +epoch: 2, batch: 32158, sum loss: 4764.864746, avg loss: 2.615184, ppl: 13.669729 +epoch: 2, batch: 32159, sum loss: 4123.973633, avg loss: 2.472406, ppl: 11.850930 +epoch: 2, batch: 32160, sum loss: 3952.450928, avg loss: 2.361082, ppl: 10.602415 +epoch: 2, batch: 32161, sum loss: 3932.036621, avg loss: 2.304828, ppl: 10.022453 +epoch: 2, batch: 32162, sum loss: 4396.635742, avg loss: 2.658184, ppl: 14.270348 +epoch: 2, batch: 32163, sum loss: 4033.919922, avg loss: 2.449253, ppl: 11.579695 +epoch: 2, batch: 32164, sum loss: 3820.456055, avg loss: 2.407345, ppl: 11.104438 +epoch: 2, batch: 32165, sum loss: 4439.599609, avg loss: 2.542726, ppl: 12.714283 +epoch: 2, batch: 32166, sum loss: 4480.800781, avg loss: 2.637317, ppl: 13.975653 +epoch: 2, batch: 32167, sum loss: 4235.586914, avg loss: 2.592159, ppl: 13.358576 +epoch: 2, batch: 32168, sum loss: 3438.375000, avg loss: 2.167954, ppl: 8.740382 +epoch: 2, batch: 32169, sum loss: 4501.569824, avg loss: 2.644871, ppl: 14.081622 +epoch: 2, batch: 32170, sum loss: 4410.225098, avg loss: 2.470714, ppl: 11.830895 +epoch: 2, batch: 32171, sum loss: 3870.723877, avg loss: 2.425266, ppl: 11.305231 +epoch: 2, batch: 32172, sum loss: 4003.385986, avg loss: 2.348027, ppl: 10.464902 +epoch: 2, batch: 32173, sum loss: 4378.016602, avg loss: 2.587480, ppl: 13.296227 +epoch: 2, batch: 32174, sum loss: 4181.498047, avg loss: 2.379908, ppl: 10.803907 +epoch: 2, batch: 32175, sum loss: 4299.173340, avg loss: 2.569739, ppl: 13.062416 +epoch: 2, batch: 32176, sum loss: 5164.186523, avg loss: 2.630762, ppl: 13.884351 +epoch: 2, batch: 32177, sum loss: 4962.211914, avg loss: 2.556523, ppl: 12.890922 +epoch: 2, batch: 32178, sum loss: 4486.492188, avg loss: 2.526178, ppl: 12.505620 +epoch: 2, batch: 32179, sum loss: 3565.915527, avg loss: 2.155935, ppl: 8.635958 +epoch: 2, batch: 32180, sum loss: 3625.120117, avg loss: 2.384948, ppl: 10.858493 +epoch: 2, batch: 32181, sum loss: 3830.768066, avg loss: 2.419942, ppl: 11.245206 +epoch: 2, batch: 32182, sum loss: 4927.826172, avg loss: 2.632386, ppl: 13.906912 +epoch: 2, batch: 32183, sum loss: 4529.381836, avg loss: 2.370163, ppl: 10.699139 +epoch: 2, batch: 32184, sum loss: 3337.899902, avg loss: 2.269137, ppl: 9.671048 +epoch: 2, batch: 32185, sum loss: 3975.891846, avg loss: 2.532415, ppl: 12.583861 +epoch: 2, batch: 32186, sum loss: 5075.314453, avg loss: 2.573689, ppl: 13.114110 +epoch: 2, batch: 32187, sum loss: 4414.504883, avg loss: 2.496892, ppl: 12.144690 +epoch: 2, batch: 32188, sum loss: 4460.602051, avg loss: 2.866711, ppl: 17.579103 +epoch: 2, batch: 32189, sum loss: 4616.824707, avg loss: 2.647262, ppl: 14.115335 +epoch: 2, batch: 32190, sum loss: 3692.878662, avg loss: 2.186429, ppl: 8.903362 +epoch: 2, batch: 32191, sum loss: 5027.401855, avg loss: 2.711651, ppl: 15.054114 +epoch: 2, batch: 32192, sum loss: 3969.473145, avg loss: 2.326772, ppl: 10.244818 +epoch: 2, batch: 32193, sum loss: 3608.952637, avg loss: 2.304567, ppl: 10.019842 +epoch: 2, batch: 32194, sum loss: 4044.352539, avg loss: 2.384642, ppl: 10.855175 +epoch: 2, batch: 32195, sum loss: 4115.869629, avg loss: 2.204536, ppl: 9.066048 +epoch: 2, batch: 32196, sum loss: 4300.982422, avg loss: 2.529990, ppl: 12.553377 +epoch: 2, batch: 32197, sum loss: 4258.128418, avg loss: 2.247033, ppl: 9.459631 +epoch: 2, batch: 32198, sum loss: 3888.220459, avg loss: 2.495649, ppl: 12.129599 +epoch: 2, batch: 32199, sum loss: 4982.788574, avg loss: 2.643389, ppl: 14.060778 +epoch: 2, batch: 32200, sum loss: 4127.401855, avg loss: 2.305811, ppl: 10.032310 +epoch: 2, batch: 32201, sum loss: 4367.135254, avg loss: 2.539032, ppl: 12.667406 +epoch: 2, batch: 32202, sum loss: 4156.562500, avg loss: 2.248006, ppl: 9.468833 +epoch: 2, batch: 32203, sum loss: 3429.631348, avg loss: 2.519935, ppl: 12.427788 +epoch: 2, batch: 32204, sum loss: 4082.950928, avg loss: 2.523455, ppl: 12.471617 +epoch: 2, batch: 32205, sum loss: 4591.383301, avg loss: 2.413977, ppl: 11.178326 +epoch: 2, batch: 32206, sum loss: 3710.130615, avg loss: 2.311608, ppl: 10.090635 +epoch: 2, batch: 32207, sum loss: 3984.485840, avg loss: 2.363277, ppl: 10.625720 +epoch: 2, batch: 32208, sum loss: 3978.687988, avg loss: 2.348694, ppl: 10.471886 +epoch: 2, batch: 32209, sum loss: 4650.949219, avg loss: 2.475226, ppl: 11.884392 +epoch: 2, batch: 32210, sum loss: 4172.008301, avg loss: 2.594532, ppl: 13.390326 +epoch: 2, batch: 32211, sum loss: 4271.057617, avg loss: 2.565200, ppl: 13.003257 +epoch: 2, batch: 32212, sum loss: 4150.559082, avg loss: 2.529286, ppl: 12.544551 +epoch: 2, batch: 32213, sum loss: 4604.479980, avg loss: 2.517485, ppl: 12.397380 +epoch: 2, batch: 32214, sum loss: 4817.796387, avg loss: 2.639889, ppl: 14.011642 +epoch: 2, batch: 32215, sum loss: 4109.831055, avg loss: 2.471336, ppl: 11.838248 +epoch: 2, batch: 32216, sum loss: 3982.043457, avg loss: 2.391618, ppl: 10.931164 +epoch: 2, batch: 32217, sum loss: 4517.030273, avg loss: 2.621608, ppl: 13.757826 +epoch: 2, batch: 32218, sum loss: 3259.445312, avg loss: 2.161436, ppl: 8.683597 +epoch: 2, batch: 32219, sum loss: 4759.503418, avg loss: 2.571315, ppl: 13.083014 +epoch: 2, batch: 32220, sum loss: 5186.432129, avg loss: 2.865432, ppl: 17.556641 +epoch: 2, batch: 32221, sum loss: 4428.214355, avg loss: 2.460119, ppl: 11.706207 +epoch: 2, batch: 32222, sum loss: 4263.601074, avg loss: 2.513916, ppl: 12.353208 +epoch: 2, batch: 32223, sum loss: 5551.901367, avg loss: 2.766269, ppl: 15.899199 +epoch: 2, batch: 32224, sum loss: 3940.067383, avg loss: 2.272242, ppl: 9.701125 +epoch: 2, batch: 32225, sum loss: 4420.500488, avg loss: 2.416895, ppl: 11.210994 +epoch: 2, batch: 32226, sum loss: 4219.602539, avg loss: 2.535819, ppl: 12.626765 +epoch: 2, batch: 32227, sum loss: 4189.447266, avg loss: 2.435725, ppl: 11.424101 +epoch: 2, batch: 32228, sum loss: 4339.092773, avg loss: 2.519798, ppl: 12.426090 +epoch: 2, batch: 32229, sum loss: 3939.531494, avg loss: 2.605510, ppl: 13.538135 +epoch: 2, batch: 32230, sum loss: 4184.898438, avg loss: 2.591268, ppl: 13.346688 +epoch: 2, batch: 32231, sum loss: 4557.561523, avg loss: 2.969095, ppl: 19.474297 +epoch: 2, batch: 32232, sum loss: 4396.771973, avg loss: 2.417137, ppl: 11.213707 +epoch: 2, batch: 32233, sum loss: 3946.483154, avg loss: 2.317371, ppl: 10.148959 +epoch: 2, batch: 32234, sum loss: 4278.916016, avg loss: 2.307937, ppl: 10.053666 +epoch: 2, batch: 32235, sum loss: 4617.128906, avg loss: 2.503866, ppl: 12.229682 +epoch: 2, batch: 32236, sum loss: 3711.542969, avg loss: 2.355040, ppl: 10.538548 +epoch: 2, batch: 32237, sum loss: 2876.920898, avg loss: 2.006221, ppl: 7.435167 +epoch: 2, batch: 32238, sum loss: 5091.534180, avg loss: 2.792943, ppl: 16.328999 +epoch: 2, batch: 32239, sum loss: 3677.182129, avg loss: 2.384684, ppl: 10.855627 +epoch: 2, batch: 32240, sum loss: 4546.939941, avg loss: 2.501067, ppl: 12.195501 +epoch: 2, batch: 32241, sum loss: 3767.875977, avg loss: 2.631198, ppl: 13.890407 +epoch: 2, batch: 32242, sum loss: 4335.160156, avg loss: 2.423231, ppl: 11.282252 +epoch: 2, batch: 32243, sum loss: 3848.945801, avg loss: 2.345488, ppl: 10.438364 +epoch: 2, batch: 32244, sum loss: 4735.750000, avg loss: 2.540638, ppl: 12.687769 +epoch: 2, batch: 32245, sum loss: 3871.522217, avg loss: 2.353509, ppl: 10.522428 +epoch: 2, batch: 32246, sum loss: 3625.416992, avg loss: 2.397763, ppl: 10.998543 +epoch: 2, batch: 32247, sum loss: 4854.355469, avg loss: 2.537562, ppl: 12.648791 +epoch: 2, batch: 32248, sum loss: 4259.646973, avg loss: 2.668952, ppl: 14.424840 +epoch: 2, batch: 32249, sum loss: 4266.062500, avg loss: 2.483156, ppl: 11.979013 +epoch: 2, batch: 32250, sum loss: 3976.295898, avg loss: 2.337622, ppl: 10.356584 +epoch: 2, batch: 32251, sum loss: 4471.347656, avg loss: 2.405243, ppl: 11.081127 +epoch: 2, batch: 32252, sum loss: 4141.929199, avg loss: 2.317811, ppl: 10.153429 +epoch: 2, batch: 32253, sum loss: 4308.699219, avg loss: 2.628859, ppl: 13.857943 +epoch: 2, batch: 32254, sum loss: 4438.170410, avg loss: 2.433208, ppl: 11.395374 +epoch: 2, batch: 32255, sum loss: 4339.059082, avg loss: 2.593580, ppl: 13.377578 +epoch: 2, batch: 32256, sum loss: 4043.552002, avg loss: 2.665492, ppl: 14.375025 +epoch: 2, batch: 32257, sum loss: 4275.302246, avg loss: 2.618066, ppl: 13.709188 +epoch: 2, batch: 32258, sum loss: 5012.805176, avg loss: 2.643885, ppl: 14.067746 +epoch: 2, batch: 32259, sum loss: 4085.912598, avg loss: 2.531544, ppl: 12.572909 +epoch: 2, batch: 32260, sum loss: 3558.722168, avg loss: 2.363029, ppl: 10.623085 +epoch: 2, batch: 32261, sum loss: 3952.818848, avg loss: 2.300826, ppl: 9.982423 +epoch: 2, batch: 32262, sum loss: 4076.114990, avg loss: 2.387882, ppl: 10.890407 +epoch: 2, batch: 32263, sum loss: 4295.833984, avg loss: 2.515125, ppl: 12.368158 +epoch: 2, batch: 32264, sum loss: 4058.485352, avg loss: 2.333804, ppl: 10.317115 +epoch: 2, batch: 32265, sum loss: 4733.787109, avg loss: 2.524687, ppl: 12.486981 +epoch: 2, batch: 32266, sum loss: 3611.101807, avg loss: 2.163632, ppl: 8.702690 +epoch: 2, batch: 32267, sum loss: 3476.546875, avg loss: 2.294750, ppl: 9.921960 +epoch: 2, batch: 32268, sum loss: 3953.249756, avg loss: 2.516391, ppl: 12.383821 +epoch: 2, batch: 32269, sum loss: 3937.543457, avg loss: 2.553530, ppl: 12.852395 +epoch: 2, batch: 32270, sum loss: 4746.864746, avg loss: 2.654846, ppl: 14.222795 +epoch: 2, batch: 32271, sum loss: 4766.854492, avg loss: 2.629263, ppl: 13.863554 +epoch: 2, batch: 32272, sum loss: 4571.915039, avg loss: 2.548448, ppl: 12.787237 +epoch: 2, batch: 32273, sum loss: 3358.114258, avg loss: 2.172131, ppl: 8.776966 +epoch: 2, batch: 32274, sum loss: 5014.767090, avg loss: 2.932612, ppl: 18.776615 +epoch: 2, batch: 32275, sum loss: 4437.343750, avg loss: 2.293201, ppl: 9.906595 +epoch: 2, batch: 32276, sum loss: 4169.065918, avg loss: 2.452392, ppl: 11.616097 +epoch: 2, batch: 32277, sum loss: 4242.610352, avg loss: 2.472384, ppl: 11.850662 +epoch: 2, batch: 32278, sum loss: 4289.456055, avg loss: 2.490973, ppl: 12.073020 +epoch: 2, batch: 32279, sum loss: 4571.494629, avg loss: 2.714664, ppl: 15.099543 +epoch: 2, batch: 32280, sum loss: 4127.201172, avg loss: 2.595724, ppl: 13.406288 +epoch: 2, batch: 32281, sum loss: 4644.357910, avg loss: 2.646358, ppl: 14.102580 +epoch: 2, batch: 32282, sum loss: 3766.896240, avg loss: 2.405425, ppl: 11.083143 +epoch: 2, batch: 32283, sum loss: 4782.171875, avg loss: 2.560049, ppl: 12.936452 +epoch: 2, batch: 32284, sum loss: 4049.370361, avg loss: 2.615872, ppl: 13.679145 +epoch: 2, batch: 32285, sum loss: 4732.153809, avg loss: 2.676558, ppl: 14.534974 +epoch: 2, batch: 32286, sum loss: 3432.398682, avg loss: 2.196033, ppl: 8.989278 +epoch: 2, batch: 32287, sum loss: 4757.503906, avg loss: 2.589823, ppl: 13.327406 +epoch: 2, batch: 32288, sum loss: 4365.388184, avg loss: 2.694684, ppl: 14.800845 +epoch: 2, batch: 32289, sum loss: 3975.367920, avg loss: 2.339828, ppl: 10.379454 +epoch: 2, batch: 32290, sum loss: 3567.236816, avg loss: 2.354612, ppl: 10.534039 +epoch: 2, batch: 32291, sum loss: 4247.476562, avg loss: 2.453770, ppl: 11.632122 +epoch: 2, batch: 32292, sum loss: 5131.805664, avg loss: 2.748691, ppl: 15.622163 +epoch: 2, batch: 32293, sum loss: 3861.321289, avg loss: 2.413326, ppl: 11.171052 +epoch: 2, batch: 32294, sum loss: 4335.998047, avg loss: 2.624696, ppl: 13.800382 +epoch: 2, batch: 32295, sum loss: 4165.968750, avg loss: 2.434815, ppl: 11.413712 +epoch: 2, batch: 32296, sum loss: 4732.056641, avg loss: 2.652498, ppl: 14.189443 +epoch: 2, batch: 32297, sum loss: 3980.634277, avg loss: 2.200461, ppl: 9.029179 +epoch: 2, batch: 32298, sum loss: 3864.476807, avg loss: 2.444324, ppl: 11.522760 +epoch: 2, batch: 32299, sum loss: 3696.257324, avg loss: 2.064948, ppl: 7.884889 +epoch: 2, batch: 32300, sum loss: 4332.884277, avg loss: 2.442437, ppl: 11.501040 +epoch: 2, batch: 32301, sum loss: 4313.621582, avg loss: 2.357170, ppl: 10.561025 +epoch: 2, batch: 32302, sum loss: 4027.106689, avg loss: 2.361939, ppl: 10.611512 +epoch: 2, batch: 32303, sum loss: 4856.022949, avg loss: 2.732708, ppl: 15.374472 +epoch: 2, batch: 32304, sum loss: 4410.817871, avg loss: 2.462768, ppl: 11.737256 +epoch: 2, batch: 32305, sum loss: 4219.409180, avg loss: 2.498170, ppl: 12.160222 +epoch: 2, batch: 32306, sum loss: 4434.609863, avg loss: 2.380359, ppl: 10.808787 +epoch: 2, batch: 32307, sum loss: 4750.943848, avg loss: 2.771846, ppl: 15.988118 +epoch: 2, batch: 32308, sum loss: 4733.372559, avg loss: 2.680279, ppl: 14.589160 +epoch: 2, batch: 32309, sum loss: 4814.106445, avg loss: 2.760382, ppl: 15.805879 +epoch: 2, batch: 32310, sum loss: 4884.445801, avg loss: 2.707564, ppl: 14.992710 +epoch: 2, batch: 32311, sum loss: 4449.018555, avg loss: 2.500854, ppl: 12.192897 +epoch: 2, batch: 32312, sum loss: 4042.992676, avg loss: 2.484937, ppl: 12.000363 +epoch: 2, batch: 32313, sum loss: 3657.768555, avg loss: 2.164360, ppl: 8.709026 +epoch: 2, batch: 32314, sum loss: 4211.177734, avg loss: 2.442679, ppl: 11.503815 +epoch: 2, batch: 32315, sum loss: 3768.790771, avg loss: 2.232696, ppl: 9.324973 +epoch: 2, batch: 32316, sum loss: 5131.902832, avg loss: 2.875016, ppl: 17.725702 +epoch: 2, batch: 32317, sum loss: 2915.400879, avg loss: 2.205296, ppl: 9.072935 +epoch: 2, batch: 32318, sum loss: 4281.121094, avg loss: 2.551324, ppl: 12.824067 +epoch: 2, batch: 32319, sum loss: 3091.019775, avg loss: 2.192213, ppl: 8.955007 +epoch: 2, batch: 32320, sum loss: 4876.842285, avg loss: 2.558679, ppl: 12.918738 +epoch: 2, batch: 32321, sum loss: 4215.475098, avg loss: 2.573550, ppl: 13.112290 +epoch: 2, batch: 32322, sum loss: 3851.398926, avg loss: 2.240488, ppl: 9.397917 +epoch: 2, batch: 32323, sum loss: 4969.262207, avg loss: 2.573414, ppl: 13.110505 +epoch: 2, batch: 32324, sum loss: 3711.184082, avg loss: 2.534962, ppl: 12.615948 +epoch: 2, batch: 32325, sum loss: 3330.126709, avg loss: 2.256183, ppl: 9.546584 +epoch: 2, batch: 32326, sum loss: 3766.815918, avg loss: 2.270534, ppl: 9.684571 +epoch: 2, batch: 32327, sum loss: 4532.211914, avg loss: 2.576585, ppl: 13.152142 +epoch: 2, batch: 32328, sum loss: 4534.781250, avg loss: 2.645730, ppl: 14.093730 +epoch: 2, batch: 32329, sum loss: 4347.199707, avg loss: 2.623536, ppl: 13.784384 +epoch: 2, batch: 32330, sum loss: 4883.554688, avg loss: 2.680326, ppl: 14.589855 +epoch: 2, batch: 32331, sum loss: 3519.358154, avg loss: 2.335340, ppl: 10.332977 +epoch: 2, batch: 32332, sum loss: 4829.618164, avg loss: 2.914676, ppl: 18.442835 +epoch: 2, batch: 32333, sum loss: 5213.896484, avg loss: 2.855365, ppl: 17.380781 +epoch: 2, batch: 32334, sum loss: 4805.472656, avg loss: 2.659365, ppl: 14.287213 +epoch: 2, batch: 32335, sum loss: 5327.048828, avg loss: 2.667526, ppl: 14.404285 +epoch: 2, batch: 32336, sum loss: 4501.624512, avg loss: 2.546168, ppl: 12.758119 +epoch: 2, batch: 32337, sum loss: 4164.756836, avg loss: 2.502859, ppl: 12.217372 +epoch: 2, batch: 32338, sum loss: 4470.402832, avg loss: 2.440176, ppl: 11.475063 +epoch: 2, batch: 32339, sum loss: 3990.866943, avg loss: 2.605005, ppl: 13.531287 +epoch: 2, batch: 32340, sum loss: 4515.962402, avg loss: 2.582025, ppl: 13.223893 +epoch: 2, batch: 32341, sum loss: 4006.088379, avg loss: 2.426462, ppl: 11.318765 +epoch: 2, batch: 32342, sum loss: 3838.649658, avg loss: 2.320828, ppl: 10.184105 +epoch: 2, batch: 32343, sum loss: 4375.318848, avg loss: 2.578267, ppl: 13.174286 +epoch: 2, batch: 32344, sum loss: 4277.261230, avg loss: 2.496942, ppl: 12.145294 +epoch: 2, batch: 32345, sum loss: 4276.529785, avg loss: 2.618818, ppl: 13.719495 +epoch: 2, batch: 32346, sum loss: 3960.404297, avg loss: 2.509762, ppl: 12.301999 +epoch: 2, batch: 32347, sum loss: 4713.484375, avg loss: 2.754813, ppl: 15.718097 +epoch: 2, batch: 32348, sum loss: 3776.944336, avg loss: 2.421118, ppl: 11.258442 +epoch: 2, batch: 32349, sum loss: 4981.255859, avg loss: 2.714581, ppl: 15.098283 +epoch: 2, batch: 32350, sum loss: 4247.804688, avg loss: 2.445483, ppl: 11.536125 +epoch: 2, batch: 32351, sum loss: 4457.054688, avg loss: 2.684973, ppl: 14.657803 +epoch: 2, batch: 32352, sum loss: 4261.307129, avg loss: 2.645132, ppl: 14.085301 +epoch: 2, batch: 32353, sum loss: 3640.798340, avg loss: 2.302845, ppl: 10.002602 +epoch: 2, batch: 32354, sum loss: 4997.191895, avg loss: 2.517477, ppl: 12.397280 +epoch: 2, batch: 32355, sum loss: 4464.796387, avg loss: 2.582300, ppl: 13.227526 +epoch: 2, batch: 32356, sum loss: 4738.247559, avg loss: 2.590622, ppl: 13.338068 +epoch: 2, batch: 32357, sum loss: 3108.861816, avg loss: 2.289294, ppl: 9.867973 +epoch: 2, batch: 32358, sum loss: 3788.674805, avg loss: 2.461777, ppl: 11.725629 +epoch: 2, batch: 32359, sum loss: 4261.278320, avg loss: 2.533459, ppl: 12.597006 +epoch: 2, batch: 32360, sum loss: 5133.779297, avg loss: 2.736556, ppl: 15.433744 +epoch: 2, batch: 32361, sum loss: 5427.863281, avg loss: 2.819669, ppl: 16.771299 +epoch: 2, batch: 32362, sum loss: 3875.798828, avg loss: 2.440680, ppl: 11.480840 +epoch: 2, batch: 32363, sum loss: 3658.260742, avg loss: 2.303691, ppl: 10.011062 +epoch: 2, batch: 32364, sum loss: 3351.580078, avg loss: 2.356948, ppl: 10.558676 +epoch: 2, batch: 32365, sum loss: 3541.647461, avg loss: 2.319350, ppl: 10.169063 +epoch: 2, batch: 32366, sum loss: 4438.444824, avg loss: 2.689967, ppl: 14.731185 +epoch: 2, batch: 32367, sum loss: 4591.134766, avg loss: 2.493827, ppl: 12.107518 +epoch: 2, batch: 32368, sum loss: 4251.212891, avg loss: 2.509571, ppl: 12.299647 +epoch: 2, batch: 32369, sum loss: 4327.882812, avg loss: 2.500221, ppl: 12.185186 +epoch: 2, batch: 32370, sum loss: 3926.832764, avg loss: 2.397334, ppl: 10.993824 +epoch: 2, batch: 32371, sum loss: 4110.917480, avg loss: 2.506657, ppl: 12.263862 +epoch: 2, batch: 32372, sum loss: 5341.732422, avg loss: 2.871899, ppl: 17.670546 +epoch: 2, batch: 32373, sum loss: 3888.409180, avg loss: 2.471970, ppl: 11.845761 +epoch: 2, batch: 32374, sum loss: 3871.614990, avg loss: 2.207306, ppl: 9.091193 +epoch: 2, batch: 32375, sum loss: 4619.205078, avg loss: 2.725195, ppl: 15.259384 +epoch: 2, batch: 32376, sum loss: 4525.038086, avg loss: 2.504171, ppl: 12.233418 +epoch: 2, batch: 32377, sum loss: 3656.366943, avg loss: 2.483945, ppl: 11.988464 +epoch: 2, batch: 32378, sum loss: 4252.798340, avg loss: 2.466820, ppl: 11.784917 +epoch: 2, batch: 32379, sum loss: 3878.025146, avg loss: 2.516564, ppl: 12.385967 +epoch: 2, batch: 32380, sum loss: 5164.182129, avg loss: 2.785427, ppl: 16.206738 +epoch: 2, batch: 32381, sum loss: 4192.194824, avg loss: 2.638260, ppl: 13.988840 +epoch: 2, batch: 32382, sum loss: 3611.729248, avg loss: 2.496012, ppl: 12.134007 +epoch: 2, batch: 32383, sum loss: 4088.952148, avg loss: 2.370407, ppl: 10.701748 +epoch: 2, batch: 32384, sum loss: 3696.403809, avg loss: 2.318949, ppl: 10.164980 +epoch: 2, batch: 32385, sum loss: 3262.291260, avg loss: 2.274959, ppl: 9.727521 +epoch: 2, batch: 32386, sum loss: 4381.022949, avg loss: 2.490633, ppl: 12.068911 +epoch: 2, batch: 32387, sum loss: 4709.555664, avg loss: 2.497113, ppl: 12.147380 +epoch: 2, batch: 32388, sum loss: 4622.699219, avg loss: 2.486659, ppl: 12.021048 +epoch: 2, batch: 32389, sum loss: 3924.949463, avg loss: 2.433323, ppl: 11.396687 +epoch: 2, batch: 32390, sum loss: 4219.971191, avg loss: 2.404542, ppl: 11.073358 +epoch: 2, batch: 32391, sum loss: 4729.371094, avg loss: 2.514286, ppl: 12.357777 +epoch: 2, batch: 32392, sum loss: 4496.000488, avg loss: 2.451472, ppl: 11.605421 +epoch: 2, batch: 32393, sum loss: 4102.933594, avg loss: 2.543666, ppl: 12.726245 +epoch: 2, batch: 32394, sum loss: 5126.182617, avg loss: 2.889618, ppl: 17.986437 +epoch: 2, batch: 32395, sum loss: 4347.657227, avg loss: 2.516005, ppl: 12.379046 +epoch: 2, batch: 32396, sum loss: 4252.101074, avg loss: 2.371501, ppl: 10.713461 +epoch: 2, batch: 32397, sum loss: 4184.739746, avg loss: 2.496861, ppl: 12.144319 +epoch: 2, batch: 32398, sum loss: 4149.731445, avg loss: 2.487849, ppl: 12.035357 +epoch: 2, batch: 32399, sum loss: 5053.246582, avg loss: 2.734441, ppl: 15.401129 +epoch: 2, batch: 32400, sum loss: 4818.954102, avg loss: 2.695164, ppl: 14.807954 +epoch: 2, batch: 32401, sum loss: 3589.970947, avg loss: 2.437183, ppl: 11.440771 +epoch: 2, batch: 32402, sum loss: 4087.910156, avg loss: 2.256021, ppl: 9.545034 +epoch: 2, batch: 32403, sum loss: 4308.264648, avg loss: 2.576714, ppl: 13.153838 +epoch: 2, batch: 32404, sum loss: 4440.644043, avg loss: 2.468396, ppl: 11.803495 +epoch: 2, batch: 32405, sum loss: 4872.369141, avg loss: 2.609732, ppl: 13.595403 +epoch: 2, batch: 32406, sum loss: 3863.998535, avg loss: 2.419536, ppl: 11.240641 +epoch: 2, batch: 32407, sum loss: 3700.453369, avg loss: 2.491888, ppl: 12.084064 +epoch: 2, batch: 32408, sum loss: 4951.441406, avg loss: 2.826165, ppl: 16.880608 +epoch: 2, batch: 32409, sum loss: 4272.378906, avg loss: 2.522065, ppl: 12.454293 +epoch: 2, batch: 32410, sum loss: 4824.666504, avg loss: 2.747532, ppl: 15.604079 +epoch: 2, batch: 32411, sum loss: 4124.858398, avg loss: 2.326485, ppl: 10.241882 +epoch: 2, batch: 32412, sum loss: 4687.258301, avg loss: 2.454062, ppl: 11.635514 +epoch: 2, batch: 32413, sum loss: 3895.223389, avg loss: 2.407431, ppl: 11.105393 +epoch: 2, batch: 32414, sum loss: 4136.487305, avg loss: 2.522248, ppl: 12.456571 +epoch: 2, batch: 32415, sum loss: 5079.695801, avg loss: 2.684829, ppl: 14.655688 +epoch: 2, batch: 32416, sum loss: 4273.387207, avg loss: 2.561983, ppl: 12.961493 +epoch: 2, batch: 32417, sum loss: 3379.402588, avg loss: 2.381538, ppl: 10.821535 +epoch: 2, batch: 32418, sum loss: 4315.733398, avg loss: 2.339151, ppl: 10.372426 +epoch: 2, batch: 32419, sum loss: 4017.130371, avg loss: 2.595046, ppl: 13.397198 +epoch: 2, batch: 32420, sum loss: 4735.992676, avg loss: 2.496570, ppl: 12.140775 +epoch: 2, batch: 32421, sum loss: 5046.820312, avg loss: 2.486119, ppl: 12.014552 +epoch: 2, batch: 32422, sum loss: 3994.281982, avg loss: 2.401853, ppl: 11.043625 +epoch: 2, batch: 32423, sum loss: 4332.729492, avg loss: 2.613226, ppl: 13.642998 +epoch: 2, batch: 32424, sum loss: 4089.555664, avg loss: 2.544839, ppl: 12.741173 +epoch: 2, batch: 32425, sum loss: 5187.138184, avg loss: 2.797809, ppl: 16.408657 +epoch: 2, batch: 32426, sum loss: 4144.681641, avg loss: 2.311590, ppl: 10.090460 +epoch: 2, batch: 32427, sum loss: 4110.972656, avg loss: 2.615123, ppl: 13.668892 +epoch: 2, batch: 32428, sum loss: 4544.687012, avg loss: 2.631550, ppl: 13.895292 +epoch: 2, batch: 32429, sum loss: 4224.967773, avg loss: 2.557487, ppl: 12.903347 +epoch: 2, batch: 32430, sum loss: 4809.234863, avg loss: 2.328927, ppl: 10.266922 +epoch: 2, batch: 32431, sum loss: 3628.498535, avg loss: 2.264980, ppl: 9.630935 +epoch: 2, batch: 32432, sum loss: 5299.078125, avg loss: 2.685797, ppl: 14.669895 +epoch: 2, batch: 32433, sum loss: 4016.198486, avg loss: 2.466952, ppl: 11.786470 +epoch: 2, batch: 32434, sum loss: 5233.285645, avg loss: 2.770400, ppl: 15.965019 +epoch: 2, batch: 32435, sum loss: 4038.102539, avg loss: 2.526973, ppl: 12.515561 +epoch: 2, batch: 32436, sum loss: 4728.940918, avg loss: 2.565893, ppl: 13.012276 +epoch: 2, batch: 32437, sum loss: 4087.040527, avg loss: 2.338124, ppl: 10.361780 +epoch: 2, batch: 32438, sum loss: 4269.845703, avg loss: 2.566013, ppl: 13.013836 +epoch: 2, batch: 32439, sum loss: 3926.000244, avg loss: 2.338297, ppl: 10.363568 +epoch: 2, batch: 32440, sum loss: 4663.090820, avg loss: 2.455551, ppl: 11.652848 +epoch: 2, batch: 32441, sum loss: 3817.278564, avg loss: 2.422131, ppl: 11.269851 +epoch: 2, batch: 32442, sum loss: 4008.906006, avg loss: 2.336192, ppl: 10.341784 +epoch: 2, batch: 32443, sum loss: 3920.080566, avg loss: 2.152708, ppl: 8.608134 +epoch: 2, batch: 32444, sum loss: 4407.120117, avg loss: 2.595477, ppl: 13.402981 +epoch: 2, batch: 32445, sum loss: 4223.631348, avg loss: 2.391637, ppl: 10.931375 +epoch: 2, batch: 32446, sum loss: 4413.241211, avg loss: 2.499004, ppl: 12.170368 +epoch: 2, batch: 32447, sum loss: 4463.287109, avg loss: 2.800055, ppl: 16.445543 +epoch: 2, batch: 32448, sum loss: 4018.617920, avg loss: 2.372266, ppl: 10.721656 +epoch: 2, batch: 32449, sum loss: 4739.625977, avg loss: 2.454493, ppl: 11.640531 +epoch: 2, batch: 32450, sum loss: 4916.660156, avg loss: 2.728446, ppl: 15.309082 +epoch: 2, batch: 32451, sum loss: 4257.622070, avg loss: 2.729245, ppl: 15.321314 +epoch: 2, batch: 32452, sum loss: 3677.157471, avg loss: 2.207177, ppl: 9.090023 +epoch: 2, batch: 32453, sum loss: 3791.595215, avg loss: 2.296545, ppl: 9.939779 +epoch: 2, batch: 32454, sum loss: 3941.602295, avg loss: 2.360241, ppl: 10.593504 +epoch: 2, batch: 32455, sum loss: 3571.092041, avg loss: 2.140943, ppl: 8.507453 +epoch: 2, batch: 32456, sum loss: 3922.632812, avg loss: 2.602942, ppl: 13.503400 +epoch: 2, batch: 32457, sum loss: 4229.966797, avg loss: 2.482375, ppl: 11.969658 +epoch: 2, batch: 32458, sum loss: 5527.700195, avg loss: 2.847862, ppl: 17.250860 +epoch: 2, batch: 32459, sum loss: 3964.424072, avg loss: 2.445666, ppl: 11.538227 +epoch: 2, batch: 32460, sum loss: 4905.282715, avg loss: 2.734271, ppl: 15.398519 +epoch: 2, batch: 32461, sum loss: 3295.406738, avg loss: 2.131570, ppl: 8.428085 +epoch: 2, batch: 32462, sum loss: 3666.513672, avg loss: 2.420141, ppl: 11.247448 +epoch: 2, batch: 32463, sum loss: 3075.015381, avg loss: 2.329557, ppl: 10.273392 +epoch: 2, batch: 32464, sum loss: 3440.649414, avg loss: 2.301438, ppl: 9.988532 +epoch: 2, batch: 32465, sum loss: 3851.864258, avg loss: 2.227799, ppl: 9.279419 +epoch: 2, batch: 32466, sum loss: 4461.587891, avg loss: 2.643121, ppl: 14.057007 +epoch: 2, batch: 32467, sum loss: 3767.683105, avg loss: 2.332931, ppl: 10.308108 +epoch: 2, batch: 32468, sum loss: 4385.793457, avg loss: 2.666136, ppl: 14.384277 +epoch: 2, batch: 32469, sum loss: 4487.830078, avg loss: 2.534065, ppl: 12.604646 +epoch: 2, batch: 32470, sum loss: 4177.056641, avg loss: 2.425701, ppl: 11.310151 +epoch: 2, batch: 32471, sum loss: 4419.563965, avg loss: 2.492704, ppl: 12.093933 +epoch: 2, batch: 32472, sum loss: 4639.818848, avg loss: 2.560606, ppl: 12.943665 +epoch: 2, batch: 32473, sum loss: 3615.531738, avg loss: 2.178031, ppl: 8.828907 +epoch: 2, batch: 32474, sum loss: 5092.422852, avg loss: 2.785789, ppl: 16.212608 +epoch: 2, batch: 32475, sum loss: 4508.926758, avg loss: 2.573588, ppl: 13.112794 +epoch: 2, batch: 32476, sum loss: 4308.797852, avg loss: 2.441245, ppl: 11.487337 +epoch: 2, batch: 32477, sum loss: 5090.276367, avg loss: 2.735237, ppl: 15.413402 +epoch: 2, batch: 32478, sum loss: 4301.150879, avg loss: 2.497765, ppl: 12.155292 +epoch: 2, batch: 32479, sum loss: 4507.622070, avg loss: 2.497298, ppl: 12.149615 +epoch: 2, batch: 32480, sum loss: 4166.107422, avg loss: 2.361739, ppl: 10.609385 +epoch: 2, batch: 32481, sum loss: 4217.694336, avg loss: 2.469376, ppl: 11.815073 +epoch: 2, batch: 32482, sum loss: 4241.355957, avg loss: 2.423632, ppl: 11.286777 +epoch: 2, batch: 32483, sum loss: 3712.179199, avg loss: 2.327385, ppl: 10.251101 +epoch: 2, batch: 32484, sum loss: 4483.871094, avg loss: 2.854151, ppl: 17.359694 +epoch: 2, batch: 32485, sum loss: 3512.335205, avg loss: 2.321438, ppl: 10.190316 +epoch: 2, batch: 32486, sum loss: 4412.568848, avg loss: 2.514284, ppl: 12.357759 +epoch: 2, batch: 32487, sum loss: 4127.009277, avg loss: 2.424800, ppl: 11.299966 +epoch: 2, batch: 32488, sum loss: 4816.119141, avg loss: 2.851462, ppl: 17.313072 +epoch: 2, batch: 32489, sum loss: 3631.339355, avg loss: 2.329275, ppl: 10.270492 +epoch: 2, batch: 32490, sum loss: 4192.583984, avg loss: 2.483758, ppl: 11.986230 +epoch: 2, batch: 32491, sum loss: 3870.882080, avg loss: 2.377692, ppl: 10.779991 +epoch: 2, batch: 32492, sum loss: 4867.830078, avg loss: 2.736273, ppl: 15.429377 +epoch: 2, batch: 32493, sum loss: 4380.570312, avg loss: 2.539461, ppl: 12.672840 +epoch: 2, batch: 32494, sum loss: 4138.184570, avg loss: 2.491381, ppl: 12.077950 +epoch: 2, batch: 32495, sum loss: 3412.812500, avg loss: 2.104077, ppl: 8.199529 +epoch: 2, batch: 32496, sum loss: 4498.120117, avg loss: 2.462025, ppl: 11.728539 +epoch: 2, batch: 32497, sum loss: 3384.643555, avg loss: 2.288468, ppl: 9.859818 +epoch: 2, batch: 32498, sum loss: 4959.642578, avg loss: 2.691070, ppl: 14.747452 +epoch: 2, batch: 32499, sum loss: 4385.696777, avg loss: 2.619891, ppl: 13.734222 +epoch: 2, batch: 32500, sum loss: 4135.204590, avg loss: 2.473209, ppl: 11.860442 +epoch: 2, batch: 32501, sum loss: 3687.551025, avg loss: 2.322135, ppl: 10.197427 +epoch: 2, batch: 32502, sum loss: 4032.670654, avg loss: 2.416220, ppl: 11.203427 +epoch: 2, batch: 32503, sum loss: 3893.402832, avg loss: 2.422777, ppl: 11.277135 +epoch: 2, batch: 32504, sum loss: 3773.283203, avg loss: 2.379119, ppl: 10.795392 +epoch: 2, batch: 32505, sum loss: 3650.032227, avg loss: 2.267101, ppl: 9.651379 +epoch: 2, batch: 32506, sum loss: 4322.703613, avg loss: 2.498672, ppl: 12.166332 +epoch: 2, batch: 32507, sum loss: 4303.846191, avg loss: 2.360859, ppl: 10.600055 +epoch: 2, batch: 32508, sum loss: 4008.964355, avg loss: 2.390557, ppl: 10.919575 +epoch: 2, batch: 32509, sum loss: 5277.762695, avg loss: 2.620538, ppl: 13.743109 +epoch: 2, batch: 32510, sum loss: 4520.662109, avg loss: 2.656088, ppl: 14.240473 +epoch: 2, batch: 32511, sum loss: 4986.014648, avg loss: 2.669173, ppl: 14.428029 +epoch: 2, batch: 32512, sum loss: 4067.592529, avg loss: 2.348495, ppl: 10.469796 +epoch: 2, batch: 32513, sum loss: 3900.878418, avg loss: 2.664534, ppl: 14.361260 +epoch: 2, batch: 32514, sum loss: 4371.279297, avg loss: 2.571341, ppl: 13.083355 +epoch: 2, batch: 32515, sum loss: 4305.443359, avg loss: 2.380013, ppl: 10.805043 +epoch: 2, batch: 32516, sum loss: 5650.592773, avg loss: 2.735040, ppl: 15.410363 +epoch: 2, batch: 32517, sum loss: 3889.734375, avg loss: 2.276030, ppl: 9.737940 +epoch: 2, batch: 32518, sum loss: 3805.801514, avg loss: 2.428718, ppl: 11.344333 +epoch: 2, batch: 32519, sum loss: 4194.472656, avg loss: 2.298341, ppl: 9.957652 +epoch: 2, batch: 32520, sum loss: 3674.988525, avg loss: 2.325942, ppl: 10.236319 +epoch: 2, batch: 32521, sum loss: 3601.751221, avg loss: 2.417283, ppl: 11.215343 +epoch: 2, batch: 32522, sum loss: 4735.700684, avg loss: 2.523016, ppl: 12.466135 +epoch: 2, batch: 32523, sum loss: 4161.316406, avg loss: 2.511356, ppl: 12.321623 +epoch: 2, batch: 32524, sum loss: 4454.524902, avg loss: 2.614158, ppl: 13.655712 +epoch: 2, batch: 32525, sum loss: 4041.287598, avg loss: 2.465703, ppl: 11.771758 +epoch: 2, batch: 32526, sum loss: 4609.587402, avg loss: 2.570880, ppl: 13.077327 +epoch: 2, batch: 32527, sum loss: 4167.225586, avg loss: 2.407409, ppl: 11.105152 +epoch: 2, batch: 32528, sum loss: 4515.292480, avg loss: 2.637437, ppl: 13.977335 +epoch: 2, batch: 32529, sum loss: 4742.625488, avg loss: 2.631868, ppl: 13.899706 +epoch: 2, batch: 32530, sum loss: 4542.202637, avg loss: 2.600001, ppl: 13.463756 +epoch: 2, batch: 32531, sum loss: 4744.342773, avg loss: 2.672869, ppl: 14.481457 +epoch: 2, batch: 32532, sum loss: 3999.192383, avg loss: 2.632780, ppl: 13.912387 +epoch: 2, batch: 32533, sum loss: 4044.007324, avg loss: 2.479465, ppl: 11.934877 +epoch: 2, batch: 32534, sum loss: 4429.895508, avg loss: 2.565081, ppl: 13.001716 +epoch: 2, batch: 32535, sum loss: 4036.170410, avg loss: 2.533691, ppl: 12.599932 +epoch: 2, batch: 32536, sum loss: 3701.185059, avg loss: 2.398694, ppl: 11.008793 +epoch: 2, batch: 32537, sum loss: 2874.998779, avg loss: 1.973232, ppl: 7.193888 +epoch: 2, batch: 32538, sum loss: 4849.178223, avg loss: 2.642604, ppl: 14.049743 +epoch: 2, batch: 32539, sum loss: 3780.263184, avg loss: 2.231560, ppl: 9.314387 +epoch: 2, batch: 32540, sum loss: 4939.090820, avg loss: 2.625779, ppl: 13.815334 +epoch: 2, batch: 32541, sum loss: 3880.161865, avg loss: 2.278427, ppl: 9.761317 +epoch: 2, batch: 32542, sum loss: 3387.619629, avg loss: 2.378946, ppl: 10.793524 +epoch: 2, batch: 32543, sum loss: 4088.694824, avg loss: 2.646405, ppl: 14.103239 +epoch: 2, batch: 32544, sum loss: 3422.168457, avg loss: 2.189487, ppl: 8.930633 +epoch: 2, batch: 32545, sum loss: 4309.973633, avg loss: 2.282825, ppl: 9.804338 +epoch: 2, batch: 32546, sum loss: 4149.813477, avg loss: 2.375394, ppl: 10.755251 +epoch: 2, batch: 32547, sum loss: 4301.882812, avg loss: 2.616717, ppl: 13.690704 +epoch: 2, batch: 32548, sum loss: 5378.028809, avg loss: 2.693054, ppl: 14.776734 +epoch: 2, batch: 32549, sum loss: 3876.874756, avg loss: 2.520725, ppl: 12.437608 +epoch: 2, batch: 32550, sum loss: 4024.387451, avg loss: 2.491881, ppl: 12.083983 +epoch: 2, batch: 32551, sum loss: 4455.408203, avg loss: 2.786371, ppl: 16.222052 +epoch: 2, batch: 32552, sum loss: 5280.613770, avg loss: 2.779271, ppl: 16.107269 +epoch: 2, batch: 32553, sum loss: 4282.821777, avg loss: 2.485677, ppl: 12.009251 +epoch: 2, batch: 32554, sum loss: 4434.392578, avg loss: 2.682633, ppl: 14.623549 +epoch: 2, batch: 32555, sum loss: 4580.610352, avg loss: 2.599665, ppl: 13.459234 +epoch: 2, batch: 32556, sum loss: 5134.119629, avg loss: 2.574784, ppl: 13.128485 +epoch: 2, batch: 32557, sum loss: 3755.671631, avg loss: 2.517206, ppl: 12.393922 +epoch: 2, batch: 32558, sum loss: 3376.367920, avg loss: 2.215465, ppl: 9.165667 +epoch: 2, batch: 32559, sum loss: 4093.205811, avg loss: 2.473236, ppl: 11.860770 +epoch: 2, batch: 32560, sum loss: 4467.975586, avg loss: 2.406018, ppl: 11.089714 +epoch: 2, batch: 32561, sum loss: 4988.532227, avg loss: 2.762199, ppl: 15.834631 +epoch: 2, batch: 32562, sum loss: 4214.098145, avg loss: 2.670531, ppl: 14.447639 +epoch: 2, batch: 32563, sum loss: 4549.657227, avg loss: 2.530399, ppl: 12.558514 +epoch: 2, batch: 32564, sum loss: 2946.473145, avg loss: 2.177733, ppl: 8.826276 +epoch: 2, batch: 32565, sum loss: 3986.578125, avg loss: 2.173707, ppl: 8.790807 +epoch: 2, batch: 32566, sum loss: 2622.382324, avg loss: 1.983648, ppl: 7.269211 +epoch: 2, batch: 32567, sum loss: 3830.765869, avg loss: 2.280218, ppl: 9.778811 +epoch: 2, batch: 32568, sum loss: 4491.426758, avg loss: 2.591706, ppl: 13.352532 +epoch: 2, batch: 32569, sum loss: 5027.589844, avg loss: 2.646100, ppl: 14.098946 +epoch: 2, batch: 32570, sum loss: 3737.577637, avg loss: 2.397420, ppl: 10.994773 +epoch: 2, batch: 32571, sum loss: 4432.723145, avg loss: 2.347841, ppl: 10.462951 +epoch: 2, batch: 32572, sum loss: 4365.145508, avg loss: 2.658432, ppl: 14.273893 +epoch: 2, batch: 32573, sum loss: 4530.423828, avg loss: 2.458179, ppl: 11.683517 +epoch: 2, batch: 32574, sum loss: 3802.738770, avg loss: 2.394672, ppl: 10.964601 +epoch: 2, batch: 32575, sum loss: 4107.991211, avg loss: 2.494226, ppl: 12.112360 +epoch: 2, batch: 32576, sum loss: 4188.986816, avg loss: 2.673253, ppl: 14.487017 +epoch: 2, batch: 32577, sum loss: 4143.276367, avg loss: 2.415905, ppl: 11.199896 +epoch: 2, batch: 32578, sum loss: 3277.592041, avg loss: 2.091635, ppl: 8.098146 +epoch: 2, batch: 32579, sum loss: 3375.511719, avg loss: 2.065797, ppl: 7.891582 +epoch: 2, batch: 32580, sum loss: 3957.691406, avg loss: 2.394248, ppl: 10.959948 +epoch: 2, batch: 32581, sum loss: 4111.376465, avg loss: 2.438539, ppl: 11.456288 +epoch: 2, batch: 32582, sum loss: 5020.478516, avg loss: 2.575925, ppl: 13.143474 +epoch: 2, batch: 32583, sum loss: 4128.730957, avg loss: 2.514453, ppl: 12.359842 +epoch: 2, batch: 32584, sum loss: 4587.815430, avg loss: 2.606713, ppl: 13.554428 +epoch: 2, batch: 32585, sum loss: 3861.812256, avg loss: 2.466036, ppl: 11.775674 +epoch: 2, batch: 32586, sum loss: 4303.067871, avg loss: 2.329761, ppl: 10.275483 +epoch: 2, batch: 32587, sum loss: 3764.955566, avg loss: 2.221213, ppl: 9.218503 +epoch: 2, batch: 32588, sum loss: 4362.080078, avg loss: 2.539046, ppl: 12.667575 +epoch: 2, batch: 32589, sum loss: 5841.323730, avg loss: 2.761855, ppl: 15.829181 +epoch: 2, batch: 32590, sum loss: 4968.875977, avg loss: 2.600144, ppl: 13.465682 +epoch: 2, batch: 32591, sum loss: 3404.681641, avg loss: 2.102953, ppl: 8.190324 +epoch: 2, batch: 32592, sum loss: 4773.267578, avg loss: 2.456648, ppl: 11.665647 +epoch: 2, batch: 32593, sum loss: 5107.770020, avg loss: 2.743163, ppl: 15.536054 +epoch: 2, batch: 32594, sum loss: 3486.353516, avg loss: 2.434605, ppl: 11.411306 +epoch: 2, batch: 32595, sum loss: 3858.601807, avg loss: 2.543574, ppl: 12.725067 +epoch: 2, batch: 32596, sum loss: 5080.577637, avg loss: 2.586852, ppl: 13.287880 +epoch: 2, batch: 32597, sum loss: 3348.228271, avg loss: 2.313910, ppl: 10.113895 +epoch: 2, batch: 32598, sum loss: 4394.110840, avg loss: 2.468602, ppl: 11.805927 +epoch: 2, batch: 32599, sum loss: 4959.911133, avg loss: 2.702949, ppl: 14.923677 +epoch: 2, batch: 32600, sum loss: 3805.506348, avg loss: 2.216369, ppl: 9.173964 +epoch: 2, batch: 32601, sum loss: 4712.644531, avg loss: 2.517438, ppl: 12.396800 +epoch: 2, batch: 32602, sum loss: 3928.630615, avg loss: 2.284088, ppl: 9.816726 +epoch: 2, batch: 32603, sum loss: 4662.782715, avg loss: 2.690584, ppl: 14.740284 +epoch: 2, batch: 32604, sum loss: 5126.924805, avg loss: 2.681446, ppl: 14.606199 +epoch: 2, batch: 32605, sum loss: 4247.781738, avg loss: 2.328828, ppl: 10.265899 +epoch: 2, batch: 32606, sum loss: 5173.446289, avg loss: 2.765070, ppl: 15.880155 +epoch: 2, batch: 32607, sum loss: 3405.094971, avg loss: 2.194005, ppl: 8.971066 +epoch: 2, batch: 32608, sum loss: 4141.648926, avg loss: 2.220723, ppl: 9.213991 +epoch: 2, batch: 32609, sum loss: 3865.013428, avg loss: 2.397651, ppl: 10.997313 +epoch: 2, batch: 32610, sum loss: 4894.914062, avg loss: 2.614805, ppl: 13.664548 +epoch: 2, batch: 32611, sum loss: 4208.847168, avg loss: 2.488970, ppl: 12.048854 +epoch: 2, batch: 32612, sum loss: 3553.930176, avg loss: 2.278160, ppl: 9.758711 +epoch: 2, batch: 32613, sum loss: 4803.163086, avg loss: 2.631870, ppl: 13.899742 +epoch: 2, batch: 32614, sum loss: 3919.471191, avg loss: 2.359706, ppl: 10.587835 +epoch: 2, batch: 32615, sum loss: 4033.709229, avg loss: 2.427021, ppl: 11.325097 +epoch: 2, batch: 32616, sum loss: 4556.294434, avg loss: 2.531275, ppl: 12.569519 +epoch: 2, batch: 32617, sum loss: 4270.150879, avg loss: 2.564655, ppl: 12.996178 +epoch: 2, batch: 32618, sum loss: 4002.459961, avg loss: 2.593947, ppl: 13.382487 +epoch: 2, batch: 32619, sum loss: 5081.202637, avg loss: 2.826030, ppl: 16.878325 +epoch: 2, batch: 32620, sum loss: 4120.688477, avg loss: 2.562617, ppl: 12.969715 +epoch: 2, batch: 32621, sum loss: 3932.688477, avg loss: 2.352087, ppl: 10.507471 +epoch: 2, batch: 32622, sum loss: 4564.289551, avg loss: 2.541364, ppl: 12.696978 +epoch: 2, batch: 32623, sum loss: 4046.209229, avg loss: 2.440416, ppl: 11.477810 +epoch: 2, batch: 32624, sum loss: 4214.014160, avg loss: 2.699561, ppl: 14.873196 +epoch: 2, batch: 32625, sum loss: 4397.940918, avg loss: 2.353098, ppl: 10.518108 +epoch: 2, batch: 32626, sum loss: 4548.503906, avg loss: 2.549610, ppl: 12.802109 +epoch: 2, batch: 32627, sum loss: 4814.093262, avg loss: 2.626347, ppl: 13.823175 +epoch: 2, batch: 32628, sum loss: 3876.527588, avg loss: 2.376780, ppl: 10.770163 +epoch: 2, batch: 32629, sum loss: 4687.311523, avg loss: 2.686138, ppl: 14.674897 +epoch: 2, batch: 32630, sum loss: 4254.050293, avg loss: 2.489204, ppl: 12.051684 +epoch: 2, batch: 32631, sum loss: 4327.520996, avg loss: 2.591330, ppl: 13.347512 +epoch: 2, batch: 32632, sum loss: 3787.067627, avg loss: 2.166515, ppl: 8.727814 +epoch: 2, batch: 32633, sum loss: 5044.248535, avg loss: 2.794598, ppl: 16.356047 +epoch: 2, batch: 32634, sum loss: 3417.885498, avg loss: 2.310944, ppl: 10.083936 +epoch: 2, batch: 32635, sum loss: 4666.423340, avg loss: 2.562561, ppl: 12.968986 +epoch: 2, batch: 32636, sum loss: 3565.231934, avg loss: 2.208942, ppl: 9.106074 +epoch: 2, batch: 32637, sum loss: 5008.051270, avg loss: 2.649763, ppl: 14.150680 +epoch: 2, batch: 32638, sum loss: 4678.494629, avg loss: 2.676485, ppl: 14.533911 +epoch: 2, batch: 32639, sum loss: 3142.472412, avg loss: 2.055247, ppl: 7.808765 +epoch: 2, batch: 32640, sum loss: 4811.252441, avg loss: 2.656683, ppl: 14.248946 +epoch: 2, batch: 32641, sum loss: 4224.636230, avg loss: 2.630533, ppl: 13.881166 +epoch: 2, batch: 32642, sum loss: 3460.225586, avg loss: 2.370018, ppl: 10.697579 +epoch: 2, batch: 32643, sum loss: 4172.687012, avg loss: 2.569389, ppl: 13.057838 +epoch: 2, batch: 32644, sum loss: 4583.745117, avg loss: 2.550776, ppl: 12.817052 +epoch: 2, batch: 32645, sum loss: 4530.573242, avg loss: 2.546696, ppl: 12.764864 +epoch: 2, batch: 32646, sum loss: 4313.144043, avg loss: 2.377698, ppl: 10.780058 +epoch: 2, batch: 32647, sum loss: 4366.256836, avg loss: 2.482238, ppl: 11.968022 +epoch: 2, batch: 32648, sum loss: 4899.579102, avg loss: 2.791783, ppl: 16.310072 +epoch: 2, batch: 32649, sum loss: 3843.305420, avg loss: 2.350646, ppl: 10.492341 +epoch: 2, batch: 32650, sum loss: 4888.698242, avg loss: 2.626920, ppl: 13.831107 +epoch: 2, batch: 32651, sum loss: 4511.679199, avg loss: 2.470799, ppl: 11.831900 +epoch: 2, batch: 32652, sum loss: 4319.969238, avg loss: 2.283282, ppl: 9.808818 +epoch: 2, batch: 32653, sum loss: 5136.608398, avg loss: 2.619382, ppl: 13.727242 +epoch: 2, batch: 32654, sum loss: 5351.891113, avg loss: 2.663958, ppl: 14.352983 +epoch: 2, batch: 32655, sum loss: 4481.615234, avg loss: 2.610143, ppl: 13.600995 +epoch: 2, batch: 32656, sum loss: 4596.960938, avg loss: 2.588379, ppl: 13.308180 +epoch: 2, batch: 32657, sum loss: 3888.233887, avg loss: 2.516656, ppl: 12.387110 +epoch: 2, batch: 32658, sum loss: 3977.142334, avg loss: 2.254616, ppl: 9.531631 +epoch: 2, batch: 32659, sum loss: 4094.552002, avg loss: 2.213271, ppl: 9.145586 +epoch: 2, batch: 32660, sum loss: 4707.956543, avg loss: 2.558672, ppl: 12.918650 +epoch: 2, batch: 32661, sum loss: 4973.936035, avg loss: 2.707641, ppl: 14.993861 +epoch: 2, batch: 32662, sum loss: 4910.715332, avg loss: 2.584587, ppl: 13.257813 +epoch: 2, batch: 32663, sum loss: 4245.715332, avg loss: 2.415083, ppl: 11.190696 +epoch: 2, batch: 32664, sum loss: 4321.100098, avg loss: 2.534370, ppl: 12.608481 +epoch: 2, batch: 32665, sum loss: 3969.238770, avg loss: 2.510587, ppl: 12.312160 +epoch: 2, batch: 32666, sum loss: 4930.877930, avg loss: 2.546941, ppl: 12.767987 +epoch: 2, batch: 32667, sum loss: 4184.781738, avg loss: 2.384491, ppl: 10.853537 +epoch: 2, batch: 32668, sum loss: 4344.842285, avg loss: 2.718925, ppl: 15.164016 +epoch: 2, batch: 32669, sum loss: 3750.447266, avg loss: 2.411863, ppl: 11.154727 +epoch: 2, batch: 32670, sum loss: 4367.034180, avg loss: 2.664450, ppl: 14.360052 +epoch: 2, batch: 32671, sum loss: 4542.895508, avg loss: 2.584127, ppl: 13.251715 +epoch: 2, batch: 32672, sum loss: 3915.079346, avg loss: 2.485765, ppl: 12.010299 +epoch: 2, batch: 32673, sum loss: 4666.588867, avg loss: 2.623153, ppl: 13.779097 +epoch: 2, batch: 32674, sum loss: 4766.235840, avg loss: 2.595989, ppl: 13.409843 +epoch: 2, batch: 32675, sum loss: 3933.529297, avg loss: 2.361062, ppl: 10.602203 +epoch: 2, batch: 32676, sum loss: 4015.349365, avg loss: 2.512734, ppl: 12.338623 +epoch: 2, batch: 32677, sum loss: 4292.354004, avg loss: 2.541358, ppl: 12.696904 +epoch: 2, batch: 32678, sum loss: 3409.027344, avg loss: 2.196538, ppl: 8.993827 +epoch: 2, batch: 32679, sum loss: 4627.261719, avg loss: 2.553676, ppl: 12.854274 +epoch: 2, batch: 32680, sum loss: 4725.164551, avg loss: 2.612031, ppl: 13.626705 +epoch: 2, batch: 32681, sum loss: 3435.017822, avg loss: 2.231980, ppl: 9.318301 +epoch: 2, batch: 32682, sum loss: 3852.645020, avg loss: 2.346313, ppl: 10.446976 +epoch: 2, batch: 32683, sum loss: 3746.338623, avg loss: 2.274644, ppl: 9.724456 +epoch: 2, batch: 32684, sum loss: 4492.231934, avg loss: 2.762750, ppl: 15.843354 +epoch: 2, batch: 32685, sum loss: 4424.763672, avg loss: 2.439230, ppl: 11.464215 +epoch: 2, batch: 32686, sum loss: 3300.297363, avg loss: 2.152836, ppl: 8.609239 +epoch: 2, batch: 32687, sum loss: 4702.977539, avg loss: 2.551805, ppl: 12.830248 +epoch: 2, batch: 32688, sum loss: 4821.625977, avg loss: 2.866603, ppl: 17.577206 +epoch: 2, batch: 32689, sum loss: 4197.223633, avg loss: 2.537620, ppl: 12.649530 +epoch: 2, batch: 32690, sum loss: 4263.989258, avg loss: 2.471878, ppl: 11.844671 +epoch: 2, batch: 32691, sum loss: 4374.939453, avg loss: 2.561440, ppl: 12.954455 +epoch: 2, batch: 32692, sum loss: 3958.050293, avg loss: 2.489340, ppl: 12.053316 +epoch: 2, batch: 32693, sum loss: 3045.592773, avg loss: 2.030395, ppl: 7.617096 +epoch: 2, batch: 32694, sum loss: 4665.018555, avg loss: 2.477439, ppl: 11.910727 +epoch: 2, batch: 32695, sum loss: 4286.276855, avg loss: 2.443715, ppl: 11.515746 +epoch: 2, batch: 32696, sum loss: 4358.192871, avg loss: 2.556125, ppl: 12.885787 +epoch: 2, batch: 32697, sum loss: 4086.371338, avg loss: 2.386899, ppl: 10.879704 +epoch: 2, batch: 32698, sum loss: 4062.679199, avg loss: 2.517149, ppl: 12.393219 +epoch: 2, batch: 32699, sum loss: 4241.135254, avg loss: 2.362750, ppl: 10.620112 +epoch: 2, batch: 32700, sum loss: 3378.842285, avg loss: 2.396342, ppl: 10.982930 +epoch: 2, batch: 32701, sum loss: 4557.482422, avg loss: 2.434553, ppl: 11.410713 +epoch: 2, batch: 32702, sum loss: 4018.583984, avg loss: 2.471454, ppl: 11.839648 +epoch: 2, batch: 32703, sum loss: 3883.979980, avg loss: 2.481776, ppl: 11.962494 +epoch: 2, batch: 32704, sum loss: 3654.216309, avg loss: 2.464070, ppl: 11.752551 +epoch: 2, batch: 32705, sum loss: 3978.645264, avg loss: 2.485100, ppl: 12.002324 +epoch: 2, batch: 32706, sum loss: 4544.911133, avg loss: 2.670336, ppl: 14.444815 +epoch: 2, batch: 32707, sum loss: 4141.208496, avg loss: 2.427438, ppl: 11.329813 +epoch: 2, batch: 32708, sum loss: 4662.307129, avg loss: 2.464221, ppl: 11.754328 +epoch: 2, batch: 32709, sum loss: 4483.618652, avg loss: 2.492284, ppl: 12.088854 +epoch: 2, batch: 32710, sum loss: 5310.232422, avg loss: 2.717622, ppl: 15.144259 +epoch: 2, batch: 32711, sum loss: 4815.460449, avg loss: 2.666368, ppl: 14.387618 +epoch: 2, batch: 32712, sum loss: 4620.550293, avg loss: 2.626805, ppl: 13.829518 +epoch: 2, batch: 32713, sum loss: 4773.136719, avg loss: 2.596919, ppl: 13.422318 +epoch: 2, batch: 32714, sum loss: 4173.483887, avg loss: 2.313461, ppl: 10.109353 +epoch: 2, batch: 32715, sum loss: 4480.222168, avg loss: 2.552833, ppl: 12.843439 +epoch: 2, batch: 32716, sum loss: 5214.099609, avg loss: 2.761705, ppl: 15.826811 +epoch: 2, batch: 32717, sum loss: 3423.911621, avg loss: 2.227659, ppl: 9.278120 +epoch: 2, batch: 32718, sum loss: 4369.367676, avg loss: 2.391553, ppl: 10.930457 +epoch: 2, batch: 32719, sum loss: 5392.755859, avg loss: 2.860878, ppl: 17.476873 +epoch: 2, batch: 32720, sum loss: 4232.598633, avg loss: 2.632213, ppl: 13.904509 +epoch: 2, batch: 32721, sum loss: 3491.328613, avg loss: 2.422851, ppl: 11.277970 +epoch: 2, batch: 32722, sum loss: 4244.422363, avg loss: 2.626499, ppl: 13.825282 +epoch: 2, batch: 32723, sum loss: 3744.214600, avg loss: 2.437640, ppl: 11.445993 +epoch: 2, batch: 32724, sum loss: 5237.138184, avg loss: 2.917626, ppl: 18.497320 +epoch: 2, batch: 32725, sum loss: 3990.485352, avg loss: 2.592908, ppl: 13.368589 +epoch: 2, batch: 32726, sum loss: 4902.804688, avg loss: 2.682060, ppl: 14.615163 +epoch: 2, batch: 32727, sum loss: 3925.861816, avg loss: 2.374992, ppl: 10.750926 +epoch: 2, batch: 32728, sum loss: 4299.041992, avg loss: 2.712329, ppl: 15.064325 +epoch: 2, batch: 32729, sum loss: 4326.451172, avg loss: 2.541981, ppl: 12.704811 +epoch: 2, batch: 32730, sum loss: 4170.372070, avg loss: 2.381709, ppl: 10.823382 +epoch: 2, batch: 32731, sum loss: 3990.004395, avg loss: 2.665334, ppl: 14.372743 +epoch: 2, batch: 32732, sum loss: 5143.511719, avg loss: 2.687310, ppl: 14.692104 +epoch: 2, batch: 32733, sum loss: 4992.572266, avg loss: 2.812717, ppl: 16.655104 +epoch: 2, batch: 32734, sum loss: 4024.008789, avg loss: 2.717089, ppl: 15.136195 +epoch: 2, batch: 32735, sum loss: 4245.923340, avg loss: 2.347111, ppl: 10.455320 +epoch: 2, batch: 32736, sum loss: 3804.636719, avg loss: 2.352899, ppl: 10.516007 +epoch: 2, batch: 32737, sum loss: 3768.208740, avg loss: 2.450071, ppl: 11.589165 +epoch: 2, batch: 32738, sum loss: 3433.854004, avg loss: 2.206847, ppl: 9.087019 +epoch: 2, batch: 32739, sum loss: 5049.753418, avg loss: 2.575091, ppl: 13.132514 +epoch: 2, batch: 32740, sum loss: 3732.810791, avg loss: 2.308479, ppl: 10.059116 +epoch: 2, batch: 32741, sum loss: 3901.097900, avg loss: 2.194093, ppl: 8.971864 +epoch: 2, batch: 32742, sum loss: 4613.505859, avg loss: 2.367115, ppl: 10.666570 +epoch: 2, batch: 32743, sum loss: 4889.905762, avg loss: 2.607950, ppl: 13.571198 +epoch: 2, batch: 32744, sum loss: 4210.782715, avg loss: 2.425566, ppl: 11.308631 +epoch: 2, batch: 32745, sum loss: 3378.786865, avg loss: 2.339880, ppl: 10.379991 +epoch: 2, batch: 32746, sum loss: 4237.729492, avg loss: 2.531499, ppl: 12.572340 +epoch: 2, batch: 32747, sum loss: 4046.444092, avg loss: 2.754557, ppl: 15.714072 +epoch: 2, batch: 32748, sum loss: 3663.449219, avg loss: 2.323050, ppl: 10.206753 +epoch: 2, batch: 32749, sum loss: 4325.638672, avg loss: 2.500369, ppl: 12.186991 +epoch: 2, batch: 32750, sum loss: 4506.164062, avg loss: 2.749337, ppl: 15.632271 +epoch: 2, batch: 32751, sum loss: 4134.305176, avg loss: 2.450685, ppl: 11.596285 +epoch: 2, batch: 32752, sum loss: 4288.114258, avg loss: 2.441979, ppl: 11.495765 +epoch: 2, batch: 32753, sum loss: 4832.389160, avg loss: 2.622023, ppl: 13.763544 +epoch: 2, batch: 32754, sum loss: 3583.475098, avg loss: 2.263724, ppl: 9.618842 +epoch: 2, batch: 32755, sum loss: 3871.647217, avg loss: 2.186136, ppl: 8.900756 +epoch: 2, batch: 32756, sum loss: 4778.344727, avg loss: 2.632697, ppl: 13.911236 +epoch: 2, batch: 32757, sum loss: 3655.182861, avg loss: 2.264673, ppl: 9.627980 +epoch: 2, batch: 32758, sum loss: 4668.086914, avg loss: 2.669003, ppl: 14.425583 +epoch: 2, batch: 32759, sum loss: 5638.760742, avg loss: 2.762744, ppl: 15.843256 +epoch: 2, batch: 32760, sum loss: 5032.583984, avg loss: 2.765156, ppl: 15.881521 +epoch: 2, batch: 32761, sum loss: 4336.392578, avg loss: 2.395797, ppl: 10.976946 +epoch: 2, batch: 32762, sum loss: 4511.897461, avg loss: 2.550536, ppl: 12.813966 +epoch: 2, batch: 32763, sum loss: 3725.495850, avg loss: 2.434965, ppl: 11.415415 +epoch: 2, batch: 32764, sum loss: 4402.924805, avg loss: 2.799062, ppl: 16.429230 +epoch: 2, batch: 32765, sum loss: 4708.304199, avg loss: 2.687388, ppl: 14.693254 +epoch: 2, batch: 32766, sum loss: 4165.710938, avg loss: 2.417708, ppl: 11.220115 +epoch: 2, batch: 32767, sum loss: 3145.536621, avg loss: 2.202757, ppl: 9.049929 +epoch: 2, batch: 32768, sum loss: 4121.890625, avg loss: 2.417531, ppl: 11.218130 +epoch: 2, batch: 32769, sum loss: 3668.498291, avg loss: 2.453845, ppl: 11.632990 +epoch: 2, batch: 32770, sum loss: 4726.628906, avg loss: 2.584270, ppl: 13.253604 +epoch: 2, batch: 32771, sum loss: 3901.789551, avg loss: 2.367591, ppl: 10.671650 +epoch: 2, batch: 32772, sum loss: 4524.113770, avg loss: 2.369887, ppl: 10.696182 +epoch: 2, batch: 32773, sum loss: 4507.922363, avg loss: 2.619362, ppl: 13.726965 +epoch: 2, batch: 32774, sum loss: 4526.973633, avg loss: 2.789263, ppl: 16.269022 +epoch: 2, batch: 32775, sum loss: 4439.950195, avg loss: 2.438193, ppl: 11.452332 +epoch: 2, batch: 32776, sum loss: 4344.976074, avg loss: 2.499986, ppl: 12.182325 +epoch: 2, batch: 32777, sum loss: 4020.805664, avg loss: 2.616009, ppl: 13.681013 +epoch: 2, batch: 32778, sum loss: 3822.391602, avg loss: 2.414650, ppl: 11.185857 +epoch: 2, batch: 32779, sum loss: 3921.489014, avg loss: 2.248560, ppl: 9.474085 +epoch: 2, batch: 32780, sum loss: 3977.779541, avg loss: 2.334378, ppl: 10.323034 +epoch: 2, batch: 32781, sum loss: 4084.955078, avg loss: 2.593622, ppl: 13.378142 +epoch: 2, batch: 32782, sum loss: 5026.875000, avg loss: 2.731997, ppl: 15.363542 +epoch: 2, batch: 32783, sum loss: 4422.600098, avg loss: 2.587829, ppl: 13.300869 +epoch: 2, batch: 32784, sum loss: 3696.912842, avg loss: 2.506382, ppl: 12.260486 +epoch: 2, batch: 32785, sum loss: 4234.512695, avg loss: 2.372276, ppl: 10.721768 +epoch: 2, batch: 32786, sum loss: 3892.331055, avg loss: 2.411605, ppl: 11.151850 +epoch: 2, batch: 32787, sum loss: 5027.540039, avg loss: 2.588847, ppl: 13.314407 +epoch: 2, batch: 32788, sum loss: 4886.503906, avg loss: 2.713217, ppl: 15.077703 +epoch: 2, batch: 32789, sum loss: 3775.474121, avg loss: 2.421728, ppl: 11.265310 +epoch: 2, batch: 32790, sum loss: 3814.416748, avg loss: 2.320205, ppl: 10.177758 +epoch: 2, batch: 32791, sum loss: 4661.699219, avg loss: 2.645686, ppl: 14.093115 +epoch: 2, batch: 32792, sum loss: 4295.845215, avg loss: 2.518080, ppl: 12.404762 +epoch: 2, batch: 32793, sum loss: 5256.006348, avg loss: 2.878426, ppl: 17.786261 +epoch: 2, batch: 32794, sum loss: 4996.263672, avg loss: 2.687608, ppl: 14.696480 +epoch: 2, batch: 32795, sum loss: 4630.054688, avg loss: 2.712393, ppl: 15.065285 +epoch: 2, batch: 32796, sum loss: 4268.976562, avg loss: 2.443604, ppl: 11.514467 +epoch: 2, batch: 32797, sum loss: 4116.433594, avg loss: 2.459041, ppl: 11.693588 +epoch: 2, batch: 32798, sum loss: 3743.438721, avg loss: 2.270127, ppl: 9.680626 +epoch: 2, batch: 32799, sum loss: 3653.471680, avg loss: 2.402020, ppl: 11.045460 +epoch: 2, batch: 32800, sum loss: 4758.569824, avg loss: 2.643650, ppl: 14.064446 +epoch: 2, batch: 32801, sum loss: 4022.296875, avg loss: 2.152112, ppl: 8.603006 +epoch: 2, batch: 32802, sum loss: 4040.723633, avg loss: 2.326266, ppl: 10.239634 +epoch: 2, batch: 32803, sum loss: 4253.813477, avg loss: 2.539590, ppl: 12.674478 +epoch: 2, batch: 32804, sum loss: 4785.349609, avg loss: 2.523919, ppl: 12.477395 +epoch: 2, batch: 32805, sum loss: 4975.672363, avg loss: 2.634025, ppl: 13.929719 +epoch: 2, batch: 32806, sum loss: 4302.059570, avg loss: 2.709106, ppl: 15.015841 +epoch: 2, batch: 32807, sum loss: 3962.877930, avg loss: 2.434200, ppl: 11.406691 +epoch: 2, batch: 32808, sum loss: 5153.675293, avg loss: 2.713889, ppl: 15.087840 +epoch: 2, batch: 32809, sum loss: 3657.752441, avg loss: 2.373623, ppl: 10.736216 +epoch: 2, batch: 32810, sum loss: 3018.822021, avg loss: 2.528327, ppl: 12.532516 +epoch: 2, batch: 32811, sum loss: 3871.158936, avg loss: 2.494304, ppl: 12.113297 +epoch: 2, batch: 32812, sum loss: 4127.178711, avg loss: 2.569850, ppl: 13.063861 +epoch: 2, batch: 32813, sum loss: 4736.013184, avg loss: 2.718722, ppl: 15.160929 +epoch: 2, batch: 32814, sum loss: 4155.378906, avg loss: 2.519939, ppl: 12.427835 +epoch: 2, batch: 32815, sum loss: 4193.370117, avg loss: 2.337442, ppl: 10.354712 +epoch: 2, batch: 32816, sum loss: 4378.690918, avg loss: 2.513600, ppl: 12.349306 +epoch: 2, batch: 32817, sum loss: 4913.746582, avg loss: 2.448304, ppl: 11.568714 +epoch: 2, batch: 32818, sum loss: 3611.874756, avg loss: 2.307907, ppl: 10.053362 +epoch: 2, batch: 32819, sum loss: 3749.328613, avg loss: 2.320129, ppl: 10.176989 +epoch: 2, batch: 32820, sum loss: 4311.138672, avg loss: 2.606493, ppl: 13.551436 +epoch: 2, batch: 32821, sum loss: 4408.904785, avg loss: 2.746981, ppl: 15.595480 +epoch: 2, batch: 32822, sum loss: 4513.990234, avg loss: 2.543093, ppl: 12.718949 +epoch: 2, batch: 32823, sum loss: 3558.705078, avg loss: 2.324432, ppl: 10.220872 +epoch: 2, batch: 32824, sum loss: 4652.007324, avg loss: 2.506469, ppl: 12.261564 +epoch: 2, batch: 32825, sum loss: 4021.539062, avg loss: 2.443219, ppl: 11.510036 +epoch: 2, batch: 32826, sum loss: 4871.031738, avg loss: 2.706129, ppl: 14.971208 +epoch: 2, batch: 32827, sum loss: 3888.220703, avg loss: 2.542983, ppl: 12.717548 +epoch: 2, batch: 32828, sum loss: 3883.180176, avg loss: 2.366350, ppl: 10.658415 +epoch: 2, batch: 32829, sum loss: 4577.374023, avg loss: 2.406611, ppl: 11.096292 +epoch: 2, batch: 32830, sum loss: 3837.215820, avg loss: 2.422485, ppl: 11.273836 +epoch: 2, batch: 32831, sum loss: 4232.021484, avg loss: 2.504155, ppl: 12.233213 +epoch: 2, batch: 32832, sum loss: 3672.919434, avg loss: 2.362006, ppl: 10.612220 +epoch: 2, batch: 32833, sum loss: 4817.949219, avg loss: 2.740585, ppl: 15.496053 +epoch: 2, batch: 32834, sum loss: 4538.691895, avg loss: 2.525705, ppl: 12.499705 +epoch: 2, batch: 32835, sum loss: 4400.323730, avg loss: 2.614571, ppl: 13.661359 +epoch: 2, batch: 32836, sum loss: 4847.170410, avg loss: 2.740062, ppl: 15.487952 +epoch: 2, batch: 32837, sum loss: 4892.513184, avg loss: 2.709033, ppl: 15.014746 +epoch: 2, batch: 32838, sum loss: 4774.610840, avg loss: 2.422431, ppl: 11.273226 +epoch: 2, batch: 32839, sum loss: 3854.880371, avg loss: 2.400299, ppl: 11.026474 +epoch: 2, batch: 32840, sum loss: 4331.172363, avg loss: 2.590414, ppl: 13.335295 +epoch: 2, batch: 32841, sum loss: 4087.733887, avg loss: 2.498615, ppl: 12.165633 +epoch: 2, batch: 32842, sum loss: 4969.548340, avg loss: 2.664637, ppl: 14.362736 +epoch: 2, batch: 32843, sum loss: 4272.654297, avg loss: 2.325887, ppl: 10.235755 +epoch: 2, batch: 32844, sum loss: 4751.134766, avg loss: 2.537999, ppl: 12.654329 +epoch: 2, batch: 32845, sum loss: 4601.075195, avg loss: 2.501944, ppl: 12.206203 +epoch: 2, batch: 32846, sum loss: 4819.806641, avg loss: 2.623738, ppl: 13.787165 +epoch: 2, batch: 32847, sum loss: 4538.636719, avg loss: 2.535551, ppl: 12.623385 +epoch: 2, batch: 32848, sum loss: 3551.195557, avg loss: 2.376971, ppl: 10.772229 +epoch: 2, batch: 32849, sum loss: 5213.801758, avg loss: 2.647944, ppl: 14.124968 +epoch: 2, batch: 32850, sum loss: 5026.810547, avg loss: 2.424896, ppl: 11.301060 +epoch: 2, batch: 32851, sum loss: 4847.393555, avg loss: 2.841379, ppl: 17.139391 +epoch: 2, batch: 32852, sum loss: 4376.436523, avg loss: 2.499393, ppl: 12.175098 +epoch: 2, batch: 32853, sum loss: 3648.582764, avg loss: 2.376927, ppl: 10.771749 +epoch: 2, batch: 32854, sum loss: 3451.661377, avg loss: 2.226878, ppl: 9.270881 +epoch: 2, batch: 32855, sum loss: 5053.072266, avg loss: 2.395957, ppl: 10.978694 +epoch: 2, batch: 32856, sum loss: 4026.972412, avg loss: 2.408476, ppl: 11.117010 +epoch: 2, batch: 32857, sum loss: 4280.544434, avg loss: 2.408860, ppl: 11.121278 +epoch: 2, batch: 32858, sum loss: 4047.426270, avg loss: 2.469449, ppl: 11.815929 +epoch: 2, batch: 32859, sum loss: 4167.480469, avg loss: 2.445704, ppl: 11.538675 +epoch: 2, batch: 32860, sum loss: 4588.518066, avg loss: 2.811592, ppl: 16.636383 +epoch: 2, batch: 32861, sum loss: 3772.328125, avg loss: 2.327161, ppl: 10.248805 +epoch: 2, batch: 32862, sum loss: 5163.531250, avg loss: 2.622413, ppl: 13.768910 +epoch: 2, batch: 32863, sum loss: 4594.753906, avg loss: 2.718789, ppl: 15.161955 +epoch: 2, batch: 32864, sum loss: 4715.385254, avg loss: 2.600874, ppl: 13.475516 +epoch: 2, batch: 32865, sum loss: 4450.008301, avg loss: 2.650392, ppl: 14.159589 +epoch: 2, batch: 32866, sum loss: 4003.528564, avg loss: 2.614976, ppl: 13.666894 +epoch: 2, batch: 32867, sum loss: 4470.741211, avg loss: 2.452409, ppl: 11.616294 +epoch: 2, batch: 32868, sum loss: 4663.016602, avg loss: 2.679895, ppl: 14.583557 +epoch: 2, batch: 32869, sum loss: 3946.348145, avg loss: 2.380186, ppl: 10.806911 +epoch: 2, batch: 32870, sum loss: 4227.014648, avg loss: 2.522085, ppl: 12.454539 +epoch: 2, batch: 32871, sum loss: 4357.666016, avg loss: 2.666870, ppl: 14.394848 +epoch: 2, batch: 32872, sum loss: 4011.720459, avg loss: 2.496404, ppl: 12.138761 +epoch: 2, batch: 32873, sum loss: 4462.161621, avg loss: 2.529570, ppl: 12.548111 +epoch: 2, batch: 32874, sum loss: 3954.605469, avg loss: 2.291197, ppl: 9.886761 +epoch: 2, batch: 32875, sum loss: 3530.655762, avg loss: 2.280785, ppl: 9.784361 +epoch: 2, batch: 32876, sum loss: 4293.214844, avg loss: 2.630646, ppl: 13.882742 +epoch: 2, batch: 32877, sum loss: 3430.156738, avg loss: 2.441393, ppl: 11.489030 +epoch: 2, batch: 32878, sum loss: 4727.456055, avg loss: 2.590387, ppl: 13.334929 +epoch: 2, batch: 32879, sum loss: 4446.628418, avg loss: 2.583747, ppl: 13.246679 +epoch: 2, batch: 32880, sum loss: 3485.344482, avg loss: 2.228481, ppl: 9.285750 +epoch: 2, batch: 32881, sum loss: 4374.525391, avg loss: 2.555213, ppl: 12.874047 +epoch: 2, batch: 32882, sum loss: 4478.739746, avg loss: 2.492343, ppl: 12.089565 +epoch: 2, batch: 32883, sum loss: 4663.268555, avg loss: 2.582098, ppl: 13.224852 +epoch: 2, batch: 32884, sum loss: 4275.313965, avg loss: 2.602139, ppl: 13.492564 +epoch: 2, batch: 32885, sum loss: 4668.461914, avg loss: 2.562273, ppl: 12.965258 +epoch: 2, batch: 32886, sum loss: 5044.480957, avg loss: 2.707719, ppl: 14.995034 +epoch: 2, batch: 32887, sum loss: 4141.762695, avg loss: 2.511681, ppl: 12.325636 +epoch: 2, batch: 32888, sum loss: 3703.612793, avg loss: 2.348518, ppl: 10.470040 +epoch: 2, batch: 32889, sum loss: 3076.477295, avg loss: 2.042814, ppl: 7.712279 +epoch: 2, batch: 32890, sum loss: 3047.975586, avg loss: 2.486114, ppl: 12.014494 +epoch: 2, batch: 32891, sum loss: 3920.554688, avg loss: 2.403774, ppl: 11.064852 +epoch: 2, batch: 32892, sum loss: 4607.901855, avg loss: 2.515230, ppl: 12.369458 +epoch: 2, batch: 32893, sum loss: 4379.872559, avg loss: 2.339676, ppl: 10.377871 +epoch: 2, batch: 32894, sum loss: 3619.689697, avg loss: 2.318828, ppl: 10.163752 +epoch: 2, batch: 32895, sum loss: 4969.067871, avg loss: 2.564019, ppl: 12.987907 +epoch: 2, batch: 32896, sum loss: 4071.460938, avg loss: 2.625055, ppl: 13.805338 +epoch: 2, batch: 32897, sum loss: 3987.334961, avg loss: 2.462838, ppl: 11.738079 +epoch: 2, batch: 32898, sum loss: 4226.593750, avg loss: 2.505391, ppl: 12.248343 +epoch: 2, batch: 32899, sum loss: 3616.167236, avg loss: 2.470060, ppl: 11.823152 +epoch: 2, batch: 32900, sum loss: 4558.949219, avg loss: 2.726645, ppl: 15.281524 +epoch: 2, batch: 32901, sum loss: 4681.840820, avg loss: 2.412077, ppl: 11.157107 +epoch: 2, batch: 32902, sum loss: 4447.433105, avg loss: 2.483212, ppl: 11.979684 +epoch: 2, batch: 32903, sum loss: 3119.625488, avg loss: 2.267170, ppl: 9.652044 +epoch: 2, batch: 32904, sum loss: 4042.052246, avg loss: 2.492017, ppl: 12.085631 +epoch: 2, batch: 32905, sum loss: 3611.127930, avg loss: 2.383583, ppl: 10.843684 +epoch: 2, batch: 32906, sum loss: 3705.906250, avg loss: 2.349972, ppl: 10.485279 +epoch: 2, batch: 32907, sum loss: 4540.909180, avg loss: 2.599261, ppl: 13.453793 +epoch: 2, batch: 32908, sum loss: 4212.516113, avg loss: 2.513435, ppl: 12.347266 +epoch: 2, batch: 32909, sum loss: 4681.230957, avg loss: 2.632864, ppl: 13.913555 +epoch: 2, batch: 32910, sum loss: 4321.124512, avg loss: 2.596830, ppl: 13.421121 +epoch: 2, batch: 32911, sum loss: 4241.662109, avg loss: 2.439139, ppl: 11.463162 +epoch: 2, batch: 32912, sum loss: 4586.417969, avg loss: 2.674296, ppl: 14.502139 +epoch: 2, batch: 32913, sum loss: 5010.314453, avg loss: 2.708278, ppl: 15.003420 +epoch: 2, batch: 32914, sum loss: 3899.651611, avg loss: 2.414645, ppl: 11.185799 +epoch: 2, batch: 32915, sum loss: 4219.097656, avg loss: 2.417821, ppl: 11.221380 +epoch: 2, batch: 32916, sum loss: 4349.667969, avg loss: 2.238635, ppl: 9.380521 +epoch: 2, batch: 32917, sum loss: 4687.216309, avg loss: 2.720381, ppl: 15.186108 +epoch: 2, batch: 32918, sum loss: 5026.090332, avg loss: 2.749502, ppl: 15.634851 +epoch: 2, batch: 32919, sum loss: 4274.562988, avg loss: 2.593788, ppl: 13.380363 +epoch: 2, batch: 32920, sum loss: 4763.934570, avg loss: 2.639299, ppl: 14.003383 +epoch: 2, batch: 32921, sum loss: 4066.578369, avg loss: 2.603443, ppl: 13.510179 +epoch: 2, batch: 32922, sum loss: 3840.001953, avg loss: 2.456815, ppl: 11.667591 +epoch: 2, batch: 32923, sum loss: 5002.227051, avg loss: 2.648082, ppl: 14.126918 +epoch: 2, batch: 32924, sum loss: 4738.510254, avg loss: 2.525858, ppl: 12.501622 +epoch: 2, batch: 32925, sum loss: 4550.373535, avg loss: 2.644029, ppl: 14.069772 +epoch: 2, batch: 32926, sum loss: 3769.795898, avg loss: 2.141929, ppl: 8.515852 +epoch: 2, batch: 32927, sum loss: 4085.254883, avg loss: 2.523320, ppl: 12.469928 +epoch: 2, batch: 32928, sum loss: 5109.166016, avg loss: 2.805692, ppl: 16.538525 +epoch: 2, batch: 32929, sum loss: 3761.871094, avg loss: 2.455529, ppl: 11.652601 +epoch: 2, batch: 32930, sum loss: 5011.517090, avg loss: 2.692916, ppl: 14.774702 +epoch: 2, batch: 32931, sum loss: 4504.056152, avg loss: 2.499476, ppl: 12.176114 +epoch: 2, batch: 32932, sum loss: 2929.051025, avg loss: 2.054033, ppl: 7.799293 +epoch: 2, batch: 32933, sum loss: 3837.070312, avg loss: 2.246528, ppl: 9.454853 +epoch: 2, batch: 32934, sum loss: 3759.380859, avg loss: 2.329232, ppl: 10.270053 +epoch: 2, batch: 32935, sum loss: 4228.051758, avg loss: 2.527228, ppl: 12.518751 +epoch: 2, batch: 32936, sum loss: 2536.043457, avg loss: 2.023977, ppl: 7.568367 +epoch: 2, batch: 32937, sum loss: 3920.264648, avg loss: 2.460932, ppl: 11.715726 +epoch: 2, batch: 32938, sum loss: 4424.327148, avg loss: 2.710985, ppl: 15.044086 +epoch: 2, batch: 32939, sum loss: 5371.355957, avg loss: 2.627865, ppl: 13.844179 +epoch: 2, batch: 32940, sum loss: 4790.851074, avg loss: 2.595261, ppl: 13.400079 +epoch: 2, batch: 32941, sum loss: 4035.216797, avg loss: 2.492413, ppl: 12.090416 +epoch: 2, batch: 32942, sum loss: 4224.331055, avg loss: 2.622180, ppl: 13.765694 +epoch: 2, batch: 32943, sum loss: 3815.101074, avg loss: 2.565636, ppl: 13.008931 +epoch: 2, batch: 32944, sum loss: 4988.516113, avg loss: 2.670512, ppl: 14.447360 +epoch: 2, batch: 32945, sum loss: 4338.407715, avg loss: 2.459415, ppl: 11.697963 +epoch: 2, batch: 32946, sum loss: 4762.555664, avg loss: 2.564650, ppl: 12.996113 +epoch: 2, batch: 32947, sum loss: 3868.427490, avg loss: 2.396795, ppl: 10.987906 +epoch: 2, batch: 32948, sum loss: 4415.299316, avg loss: 2.527361, ppl: 12.520417 +epoch: 2, batch: 32949, sum loss: 4014.043945, avg loss: 2.653036, ppl: 14.197081 +epoch: 2, batch: 32950, sum loss: 3911.421631, avg loss: 2.353443, ppl: 10.521730 +epoch: 2, batch: 32951, sum loss: 5000.131348, avg loss: 2.580047, ppl: 13.197763 +epoch: 2, batch: 32952, sum loss: 4683.685059, avg loss: 2.667247, ppl: 14.400268 +epoch: 2, batch: 32953, sum loss: 4710.885742, avg loss: 2.517844, ppl: 12.401829 +epoch: 2, batch: 32954, sum loss: 4116.439453, avg loss: 2.450262, ppl: 11.591378 +epoch: 2, batch: 32955, sum loss: 3620.066406, avg loss: 2.524454, ppl: 12.484073 +epoch: 2, batch: 32956, sum loss: 5396.448242, avg loss: 2.704987, ppl: 14.954116 +epoch: 2, batch: 32957, sum loss: 4601.420410, avg loss: 2.843894, ppl: 17.182545 +epoch: 2, batch: 32958, sum loss: 3964.492188, avg loss: 2.564355, ppl: 12.992271 +epoch: 2, batch: 32959, sum loss: 4748.605957, avg loss: 2.559895, ppl: 12.934463 +epoch: 2, batch: 32960, sum loss: 4077.534668, avg loss: 2.450442, ppl: 11.593465 +epoch: 2, batch: 32961, sum loss: 4249.222656, avg loss: 2.314391, ppl: 10.118763 +epoch: 2, batch: 32962, sum loss: 3924.748047, avg loss: 2.331995, ppl: 10.298469 +epoch: 2, batch: 32963, sum loss: 4291.075195, avg loss: 2.379964, ppl: 10.804515 +epoch: 2, batch: 32964, sum loss: 4331.360352, avg loss: 2.607682, ppl: 13.567568 +epoch: 2, batch: 32965, sum loss: 4287.252930, avg loss: 2.524884, ppl: 12.489447 +epoch: 2, batch: 32966, sum loss: 4537.888184, avg loss: 2.582748, ppl: 13.233453 +epoch: 2, batch: 32967, sum loss: 4254.265625, avg loss: 2.605184, ppl: 13.533716 +epoch: 2, batch: 32968, sum loss: 3966.101807, avg loss: 2.297857, ppl: 9.952833 +epoch: 2, batch: 32969, sum loss: 4072.769775, avg loss: 2.380345, ppl: 10.808630 +epoch: 2, batch: 32970, sum loss: 4680.894531, avg loss: 2.588990, ppl: 13.316319 +epoch: 2, batch: 32971, sum loss: 4306.059570, avg loss: 2.531487, ppl: 12.572186 +epoch: 2, batch: 32972, sum loss: 4315.609375, avg loss: 2.398893, ppl: 11.010984 +epoch: 2, batch: 32973, sum loss: 4566.034180, avg loss: 2.457500, ppl: 11.675580 +epoch: 2, batch: 32974, sum loss: 3664.578613, avg loss: 2.234499, ppl: 9.341803 +epoch: 2, batch: 32975, sum loss: 4542.563477, avg loss: 2.583938, ppl: 13.249212 +epoch: 2, batch: 32976, sum loss: 4146.243164, avg loss: 2.666394, ppl: 14.388000 +epoch: 2, batch: 32977, sum loss: 3987.036377, avg loss: 2.594038, ppl: 13.383706 +epoch: 2, batch: 32978, sum loss: 3366.232178, avg loss: 2.117127, ppl: 8.307238 +epoch: 2, batch: 32979, sum loss: 4513.604980, avg loss: 2.592536, ppl: 13.363619 +epoch: 2, batch: 32980, sum loss: 4013.433105, avg loss: 2.463740, ppl: 11.748673 +epoch: 2, batch: 32981, sum loss: 3727.618408, avg loss: 2.380344, ppl: 10.808617 +epoch: 2, batch: 32982, sum loss: 4296.940918, avg loss: 2.590079, ppl: 13.330822 +epoch: 2, batch: 32983, sum loss: 4835.072266, avg loss: 2.666891, ppl: 14.395140 +epoch: 2, batch: 32984, sum loss: 4370.169434, avg loss: 2.687681, ppl: 14.697552 +epoch: 2, batch: 32985, sum loss: 3226.106201, avg loss: 2.431128, ppl: 11.371697 +epoch: 2, batch: 32986, sum loss: 4783.092773, avg loss: 2.577097, ppl: 13.158888 +epoch: 2, batch: 32987, sum loss: 3782.144531, avg loss: 2.494818, ppl: 12.119533 +epoch: 2, batch: 32988, sum loss: 4840.893555, avg loss: 2.591485, ppl: 13.349578 +epoch: 2, batch: 32989, sum loss: 3908.358398, avg loss: 2.576373, ppl: 13.149364 +epoch: 2, batch: 32990, sum loss: 4916.453125, avg loss: 2.413575, ppl: 11.173841 +epoch: 2, batch: 32991, sum loss: 4375.600098, avg loss: 2.418795, ppl: 11.232317 +epoch: 2, batch: 32992, sum loss: 4711.128906, avg loss: 2.607155, ppl: 13.560414 +epoch: 2, batch: 32993, sum loss: 3752.533691, avg loss: 2.227023, ppl: 9.272221 +epoch: 2, batch: 32994, sum loss: 3069.167480, avg loss: 2.112297, ppl: 8.267210 +epoch: 2, batch: 32995, sum loss: 4383.688477, avg loss: 2.528079, ppl: 12.529408 +epoch: 2, batch: 32996, sum loss: 4393.850586, avg loss: 2.434266, ppl: 11.407446 +epoch: 2, batch: 32997, sum loss: 5301.436523, avg loss: 2.718685, ppl: 15.160379 +epoch: 2, batch: 32998, sum loss: 4226.387695, avg loss: 2.702294, ppl: 14.913903 +epoch: 2, batch: 32999, sum loss: 3846.260742, avg loss: 2.403913, ppl: 11.066395 +epoch: 2, batch: 33000, sum loss: 3867.980469, avg loss: 2.408456, ppl: 11.116784 +epoch: 2, batch: 33001, sum loss: 4176.097656, avg loss: 2.347441, ppl: 10.458771 +epoch: 2, batch: 33002, sum loss: 4100.652344, avg loss: 2.615212, ppl: 13.670114 +epoch: 2, batch: 33003, sum loss: 4949.708008, avg loss: 2.609229, ppl: 13.588574 +epoch: 2, batch: 33004, sum loss: 4058.846191, avg loss: 2.595170, ppl: 13.398869 +epoch: 2, batch: 33005, sum loss: 4573.343262, avg loss: 2.654291, ppl: 14.214903 +epoch: 2, batch: 33006, sum loss: 4138.962891, avg loss: 2.433253, ppl: 11.395887 +epoch: 2, batch: 33007, sum loss: 4769.852539, avg loss: 2.681199, ppl: 14.602586 +epoch: 2, batch: 33008, sum loss: 5151.793457, avg loss: 2.727260, ppl: 15.290927 +epoch: 2, batch: 33009, sum loss: 4117.400391, avg loss: 2.339432, ppl: 10.375341 +epoch: 2, batch: 33010, sum loss: 3773.485352, avg loss: 2.282810, ppl: 9.804193 +epoch: 2, batch: 33011, sum loss: 3633.381348, avg loss: 2.116122, ppl: 8.298892 +epoch: 2, batch: 33012, sum loss: 5211.018555, avg loss: 2.734008, ppl: 15.394459 +epoch: 2, batch: 33013, sum loss: 4599.456543, avg loss: 2.721572, ppl: 15.204204 +epoch: 2, batch: 33014, sum loss: 4648.091309, avg loss: 2.436106, ppl: 11.428456 +epoch: 2, batch: 33015, sum loss: 3500.547363, avg loss: 2.304508, ppl: 10.019245 +epoch: 2, batch: 33016, sum loss: 3632.987061, avg loss: 2.510703, ppl: 12.313581 +epoch: 2, batch: 33017, sum loss: 4524.702148, avg loss: 2.332321, ppl: 10.301821 +epoch: 2, batch: 33018, sum loss: 4228.274902, avg loss: 2.619749, ppl: 13.732277 +epoch: 2, batch: 33019, sum loss: 4430.768555, avg loss: 2.606335, ppl: 13.549297 +epoch: 2, batch: 33020, sum loss: 3591.597900, avg loss: 2.198040, ppl: 9.007346 +epoch: 2, batch: 33021, sum loss: 5200.344238, avg loss: 2.709924, ppl: 15.028133 +epoch: 2, batch: 33022, sum loss: 5414.933105, avg loss: 2.685979, ppl: 14.672557 +epoch: 2, batch: 33023, sum loss: 3826.979980, avg loss: 2.553022, ppl: 12.845867 +epoch: 2, batch: 33024, sum loss: 4725.096680, avg loss: 2.476466, ppl: 11.899135 +epoch: 2, batch: 33025, sum loss: 3713.666260, avg loss: 2.475777, ppl: 11.890947 +epoch: 2, batch: 33026, sum loss: 4162.829590, avg loss: 2.474928, ppl: 11.880857 +epoch: 2, batch: 33027, sum loss: 4300.446289, avg loss: 2.666117, ppl: 14.384004 +epoch: 2, batch: 33028, sum loss: 4896.888184, avg loss: 2.644108, ppl: 14.070889 +epoch: 2, batch: 33029, sum loss: 3401.577148, avg loss: 2.468488, ppl: 11.804590 +epoch: 2, batch: 33030, sum loss: 4003.071777, avg loss: 2.382781, ppl: 10.834993 +epoch: 2, batch: 33031, sum loss: 4085.700684, avg loss: 2.494323, ppl: 12.113524 +epoch: 2, batch: 33032, sum loss: 4026.307861, avg loss: 2.455066, ppl: 11.647199 +epoch: 2, batch: 33033, sum loss: 4771.802734, avg loss: 2.611824, ppl: 13.623882 +epoch: 2, batch: 33034, sum loss: 4469.510742, avg loss: 2.567209, ppl: 13.029408 +epoch: 2, batch: 33035, sum loss: 4520.112793, avg loss: 2.338393, ppl: 10.364565 +epoch: 2, batch: 33036, sum loss: 4568.920898, avg loss: 2.492592, ppl: 12.092578 +epoch: 2, batch: 33037, sum loss: 4144.229004, avg loss: 2.514702, ppl: 12.362926 +epoch: 2, batch: 33038, sum loss: 4066.089111, avg loss: 2.353061, ppl: 10.517715 +epoch: 2, batch: 33039, sum loss: 4054.981445, avg loss: 2.297440, ppl: 9.948679 +epoch: 2, batch: 33040, sum loss: 3064.479492, avg loss: 1.986053, ppl: 7.286716 +epoch: 2, batch: 33041, sum loss: 3891.072510, avg loss: 2.436489, ppl: 11.432828 +epoch: 2, batch: 33042, sum loss: 3978.885986, avg loss: 2.273649, ppl: 9.714787 +epoch: 2, batch: 33043, sum loss: 3740.398682, avg loss: 2.126435, ppl: 8.384919 +epoch: 2, batch: 33044, sum loss: 4727.416992, avg loss: 2.726307, ppl: 15.276373 +epoch: 2, batch: 33045, sum loss: 4044.270752, avg loss: 2.431913, ppl: 11.380629 +epoch: 2, batch: 33046, sum loss: 4875.532227, avg loss: 2.628319, ppl: 13.850471 +epoch: 2, batch: 33047, sum loss: 4541.791992, avg loss: 2.467025, ppl: 11.787322 +epoch: 2, batch: 33048, sum loss: 4937.039062, avg loss: 2.739755, ppl: 15.483197 +epoch: 2, batch: 33049, sum loss: 5086.112305, avg loss: 2.749250, ppl: 15.630903 +epoch: 2, batch: 33050, sum loss: 4167.849121, avg loss: 2.595174, ppl: 13.398913 +epoch: 2, batch: 33051, sum loss: 4331.962891, avg loss: 2.702410, ppl: 14.915631 +epoch: 2, batch: 33052, sum loss: 4360.125488, avg loss: 2.827578, ppl: 16.904470 +epoch: 2, batch: 33053, sum loss: 3916.027100, avg loss: 2.360474, ppl: 10.595977 +epoch: 2, batch: 33054, sum loss: 4099.723633, avg loss: 2.413022, ppl: 11.167654 +epoch: 2, batch: 33055, sum loss: 4141.820312, avg loss: 2.471253, ppl: 11.837272 +epoch: 2, batch: 33056, sum loss: 4541.711914, avg loss: 2.642066, ppl: 14.042192 +epoch: 2, batch: 33057, sum loss: 4749.417969, avg loss: 2.629800, ppl: 13.870990 +epoch: 2, batch: 33058, sum loss: 4171.935547, avg loss: 2.660673, ppl: 14.305916 +epoch: 2, batch: 33059, sum loss: 5083.916992, avg loss: 2.511817, ppl: 12.327305 +epoch: 2, batch: 33060, sum loss: 4249.951660, avg loss: 2.574168, ppl: 13.120399 +epoch: 2, batch: 33061, sum loss: 4363.353027, avg loss: 2.433549, ppl: 11.399265 +epoch: 2, batch: 33062, sum loss: 4201.566406, avg loss: 2.514403, ppl: 12.359223 +epoch: 2, batch: 33063, sum loss: 3888.036621, avg loss: 2.485957, ppl: 12.012609 +epoch: 2, batch: 33064, sum loss: 4609.366211, avg loss: 2.690815, ppl: 14.743687 +epoch: 2, batch: 33065, sum loss: 4272.406738, avg loss: 2.670254, ppl: 14.443641 +epoch: 2, batch: 33066, sum loss: 3625.980469, avg loss: 2.360664, ppl: 10.597990 +epoch: 2, batch: 33067, sum loss: 4129.091309, avg loss: 2.385379, ppl: 10.863180 +epoch: 2, batch: 33068, sum loss: 5133.911133, avg loss: 2.634126, ppl: 13.931130 +epoch: 2, batch: 33069, sum loss: 4152.635254, avg loss: 2.392071, ppl: 10.936119 +epoch: 2, batch: 33070, sum loss: 3180.201660, avg loss: 2.111688, ppl: 8.262174 +epoch: 2, batch: 33071, sum loss: 4576.133789, avg loss: 2.575202, ppl: 13.133970 +epoch: 2, batch: 33072, sum loss: 4486.809570, avg loss: 2.439810, ppl: 11.470856 +epoch: 2, batch: 33073, sum loss: 4191.783203, avg loss: 2.486230, ppl: 12.015886 +epoch: 2, batch: 33074, sum loss: 3938.524658, avg loss: 2.423707, ppl: 11.287630 +epoch: 2, batch: 33075, sum loss: 5311.157227, avg loss: 2.678344, ppl: 14.560967 +epoch: 2, batch: 33076, sum loss: 4600.054688, avg loss: 2.500030, ppl: 12.182854 +epoch: 2, batch: 33077, sum loss: 3693.210449, avg loss: 2.391976, ppl: 10.935079 +epoch: 2, batch: 33078, sum loss: 4960.241211, avg loss: 2.796077, ppl: 16.380264 +epoch: 2, batch: 33079, sum loss: 3497.070068, avg loss: 2.380579, ppl: 10.811155 +epoch: 2, batch: 33080, sum loss: 4306.341797, avg loss: 2.459362, ppl: 11.697341 +epoch: 2, batch: 33081, sum loss: 3875.113525, avg loss: 2.496852, ppl: 12.144200 +epoch: 2, batch: 33082, sum loss: 5083.740234, avg loss: 2.740561, ppl: 15.495672 +epoch: 2, batch: 33083, sum loss: 4297.974121, avg loss: 2.671208, ppl: 14.457425 +epoch: 2, batch: 33084, sum loss: 4065.523926, avg loss: 2.469942, ppl: 11.821756 +epoch: 2, batch: 33085, sum loss: 4871.619629, avg loss: 2.706455, ppl: 14.976098 +epoch: 2, batch: 33086, sum loss: 4299.455078, avg loss: 2.623218, ppl: 13.780001 +epoch: 2, batch: 33087, sum loss: 4398.323730, avg loss: 2.743808, ppl: 15.546065 +epoch: 2, batch: 33088, sum loss: 4709.090820, avg loss: 2.675620, ppl: 14.521344 +epoch: 2, batch: 33089, sum loss: 5289.386230, avg loss: 2.801582, ppl: 16.470676 +epoch: 2, batch: 33090, sum loss: 4867.740234, avg loss: 2.684909, ppl: 14.656873 +epoch: 2, batch: 33091, sum loss: 4603.098633, avg loss: 2.574440, ppl: 13.123965 +epoch: 2, batch: 33092, sum loss: 3803.623535, avg loss: 2.573494, ppl: 13.111552 +epoch: 2, batch: 33093, sum loss: 4252.197266, avg loss: 2.708406, ppl: 15.005338 +epoch: 2, batch: 33094, sum loss: 3710.047852, avg loss: 2.415396, ppl: 11.194200 +epoch: 2, batch: 33095, sum loss: 3900.187256, avg loss: 2.381067, ppl: 10.816433 +epoch: 2, batch: 33096, sum loss: 3014.715088, avg loss: 2.266703, ppl: 9.647542 +epoch: 2, batch: 33097, sum loss: 5512.691895, avg loss: 2.880194, ppl: 17.817734 +epoch: 2, batch: 33098, sum loss: 4488.838379, avg loss: 2.681504, ppl: 14.607053 +epoch: 2, batch: 33099, sum loss: 5281.727539, avg loss: 2.878326, ppl: 17.784472 +epoch: 2, batch: 33100, sum loss: 4049.173828, avg loss: 2.561147, ppl: 12.950666 +epoch: 2, batch: 33101, sum loss: 3977.762207, avg loss: 2.443343, ppl: 11.511458 +epoch: 2, batch: 33102, sum loss: 3565.112793, avg loss: 2.175175, ppl: 8.803729 +epoch: 2, batch: 33103, sum loss: 4504.887207, avg loss: 2.660890, ppl: 14.309023 +epoch: 2, batch: 33104, sum loss: 3745.924316, avg loss: 2.485683, ppl: 12.009319 +epoch: 2, batch: 33105, sum loss: 3734.637695, avg loss: 2.335608, ppl: 10.335745 +epoch: 2, batch: 33106, sum loss: 5221.457031, avg loss: 2.902422, ppl: 18.218216 +epoch: 2, batch: 33107, sum loss: 3920.514648, avg loss: 2.530997, ppl: 12.566031 +epoch: 2, batch: 33108, sum loss: 4222.316895, avg loss: 2.603155, ppl: 13.506279 +epoch: 2, batch: 33109, sum loss: 4995.977539, avg loss: 2.840237, ppl: 17.119829 +epoch: 2, batch: 33110, sum loss: 4032.217773, avg loss: 2.499825, ppl: 12.180362 +epoch: 2, batch: 33111, sum loss: 4137.137695, avg loss: 2.504321, ppl: 12.235247 +epoch: 2, batch: 33112, sum loss: 4866.461914, avg loss: 2.750968, ppl: 15.657777 +epoch: 2, batch: 33113, sum loss: 3234.743164, avg loss: 2.328829, ppl: 10.265911 +epoch: 2, batch: 33114, sum loss: 5180.499512, avg loss: 2.668985, ppl: 14.425315 +epoch: 2, batch: 33115, sum loss: 4489.765625, avg loss: 2.628668, ppl: 13.855307 +epoch: 2, batch: 33116, sum loss: 3504.153564, avg loss: 2.290297, ppl: 9.877867 +epoch: 2, batch: 33117, sum loss: 4755.850586, avg loss: 2.517655, ppl: 12.399487 +epoch: 2, batch: 33118, sum loss: 4256.922852, avg loss: 2.473517, ppl: 11.864099 +epoch: 2, batch: 33119, sum loss: 4324.018066, avg loss: 2.495106, ppl: 12.123013 +epoch: 2, batch: 33120, sum loss: 3472.447754, avg loss: 2.351014, ppl: 10.496209 +epoch: 2, batch: 33121, sum loss: 4787.233398, avg loss: 2.723113, ppl: 15.227656 +epoch: 2, batch: 33122, sum loss: 5411.728027, avg loss: 2.711287, ppl: 15.048624 +epoch: 2, batch: 33123, sum loss: 4669.573730, avg loss: 2.656185, ppl: 14.241855 +epoch: 2, batch: 33124, sum loss: 4489.105957, avg loss: 2.416096, ppl: 11.202038 +epoch: 2, batch: 33125, sum loss: 4371.635742, avg loss: 2.415268, ppl: 11.192775 +epoch: 2, batch: 33126, sum loss: 4736.273438, avg loss: 2.728268, ppl: 15.306355 +epoch: 2, batch: 33127, sum loss: 4433.740234, avg loss: 2.353365, ppl: 10.520918 +epoch: 2, batch: 33128, sum loss: 4152.687012, avg loss: 2.422805, ppl: 11.277444 +epoch: 2, batch: 33129, sum loss: 3619.257812, avg loss: 2.229980, ppl: 9.299683 +epoch: 2, batch: 33130, sum loss: 4209.163574, avg loss: 2.543301, ppl: 12.721601 +epoch: 2, batch: 33131, sum loss: 4651.892578, avg loss: 2.728383, ppl: 15.308111 +epoch: 2, batch: 33132, sum loss: 5457.792480, avg loss: 2.980772, ppl: 19.703012 +epoch: 2, batch: 33133, sum loss: 3605.316162, avg loss: 2.393968, ppl: 10.956888 +epoch: 2, batch: 33134, sum loss: 3495.101074, avg loss: 2.265134, ppl: 9.632412 +epoch: 2, batch: 33135, sum loss: 3537.296387, avg loss: 2.230326, ppl: 9.302896 +epoch: 2, batch: 33136, sum loss: 4220.580566, avg loss: 2.379132, ppl: 10.795528 +epoch: 2, batch: 33137, sum loss: 4433.690918, avg loss: 2.506326, ppl: 12.259808 +epoch: 2, batch: 33138, sum loss: 4193.903320, avg loss: 2.490442, ppl: 12.066603 +epoch: 2, batch: 33139, sum loss: 4331.463379, avg loss: 2.368214, ppl: 10.678306 +epoch: 2, batch: 33140, sum loss: 4218.144531, avg loss: 2.428408, ppl: 11.340812 +epoch: 2, batch: 33141, sum loss: 4170.891113, avg loss: 2.436268, ppl: 11.430305 +epoch: 2, batch: 33142, sum loss: 4823.188477, avg loss: 2.611364, ppl: 13.617608 +epoch: 2, batch: 33143, sum loss: 4563.105469, avg loss: 2.511340, ppl: 12.321434 +epoch: 2, batch: 33144, sum loss: 4569.922852, avg loss: 2.605429, ppl: 13.537034 +epoch: 2, batch: 33145, sum loss: 4058.340820, avg loss: 2.409941, ppl: 11.133307 +epoch: 2, batch: 33146, sum loss: 4254.044922, avg loss: 2.561135, ppl: 12.950505 +epoch: 2, batch: 33147, sum loss: 3822.494873, avg loss: 2.466126, ppl: 11.776732 +epoch: 2, batch: 33148, sum loss: 3540.565918, avg loss: 2.366688, ppl: 10.662026 +epoch: 2, batch: 33149, sum loss: 3634.993164, avg loss: 2.250770, ppl: 9.495043 +epoch: 2, batch: 33150, sum loss: 3861.844482, avg loss: 2.422738, ppl: 11.276693 +epoch: 2, batch: 33151, sum loss: 5017.050293, avg loss: 2.746059, ppl: 15.581108 +epoch: 2, batch: 33152, sum loss: 5164.080078, avg loss: 2.782371, ppl: 16.157282 +epoch: 2, batch: 33153, sum loss: 3846.195801, avg loss: 2.385978, ppl: 10.869682 +epoch: 2, batch: 33154, sum loss: 4390.941406, avg loss: 2.166227, ppl: 8.725298 +epoch: 2, batch: 33155, sum loss: 3956.537598, avg loss: 2.376299, ppl: 10.764987 +epoch: 2, batch: 33156, sum loss: 5105.113770, avg loss: 2.815838, ppl: 16.707169 +epoch: 2, batch: 33157, sum loss: 4247.915039, avg loss: 2.476918, ppl: 11.904521 +epoch: 2, batch: 33158, sum loss: 4158.829102, avg loss: 2.548302, ppl: 12.785378 +epoch: 2, batch: 33159, sum loss: 4730.679688, avg loss: 2.540644, ppl: 12.687844 +epoch: 2, batch: 33160, sum loss: 4959.872559, avg loss: 2.702928, ppl: 14.923365 +epoch: 2, batch: 33161, sum loss: 3958.219727, avg loss: 2.517951, ppl: 12.403163 +epoch: 2, batch: 33162, sum loss: 4218.997559, avg loss: 2.386311, ppl: 10.873309 +epoch: 2, batch: 33163, sum loss: 4515.998047, avg loss: 2.554298, ppl: 12.862263 +epoch: 2, batch: 33164, sum loss: 4305.510254, avg loss: 2.670912, ppl: 14.453145 +epoch: 2, batch: 33165, sum loss: 4387.683594, avg loss: 2.632084, ppl: 13.902708 +epoch: 2, batch: 33166, sum loss: 3969.557861, avg loss: 2.438303, ppl: 11.453590 +epoch: 2, batch: 33167, sum loss: 4033.801758, avg loss: 2.322281, ppl: 10.198911 +epoch: 2, batch: 33168, sum loss: 3823.538574, avg loss: 2.494154, ppl: 12.111488 +epoch: 2, batch: 33169, sum loss: 4902.062988, avg loss: 2.418383, ppl: 11.227694 +epoch: 2, batch: 33170, sum loss: 3965.383545, avg loss: 2.338080, ppl: 10.361323 +epoch: 2, batch: 33171, sum loss: 4458.221191, avg loss: 2.687294, ppl: 14.691870 +epoch: 2, batch: 33172, sum loss: 4665.256836, avg loss: 2.696680, ppl: 14.830418 +epoch: 2, batch: 33173, sum loss: 3914.920410, avg loss: 2.476230, ppl: 11.896336 +epoch: 2, batch: 33174, sum loss: 3905.529297, avg loss: 2.340042, ppl: 10.381669 +epoch: 2, batch: 33175, sum loss: 3800.813232, avg loss: 2.313337, ppl: 10.108103 +epoch: 2, batch: 33176, sum loss: 4501.049316, avg loss: 2.731219, ppl: 15.351594 +epoch: 2, batch: 33177, sum loss: 4324.787109, avg loss: 2.697933, ppl: 14.849010 +epoch: 2, batch: 33178, sum loss: 4050.117432, avg loss: 2.324981, ppl: 10.226488 +epoch: 2, batch: 33179, sum loss: 4120.749512, avg loss: 2.390226, ppl: 10.915962 +epoch: 2, batch: 33180, sum loss: 4346.429688, avg loss: 2.661623, ppl: 14.319504 +epoch: 2, batch: 33181, sum loss: 4891.587891, avg loss: 2.648396, ppl: 14.131361 +epoch: 2, batch: 33182, sum loss: 3569.654297, avg loss: 2.162117, ppl: 8.689509 +epoch: 2, batch: 33183, sum loss: 4920.139648, avg loss: 2.853909, ppl: 17.355501 +epoch: 2, batch: 33184, sum loss: 3573.267822, avg loss: 2.240293, ppl: 9.396087 +epoch: 2, batch: 33185, sum loss: 4332.173828, avg loss: 2.511405, ppl: 12.322233 +epoch: 2, batch: 33186, sum loss: 4406.476074, avg loss: 2.695092, ppl: 14.806888 +epoch: 2, batch: 33187, sum loss: 3577.594727, avg loss: 2.438715, ppl: 11.458304 +epoch: 2, batch: 33188, sum loss: 4266.176758, avg loss: 2.678077, ppl: 14.557073 +epoch: 2, batch: 33189, sum loss: 3761.663086, avg loss: 2.496127, ppl: 12.135401 +epoch: 2, batch: 33190, sum loss: 4157.778320, avg loss: 2.390902, ppl: 10.923340 +epoch: 2, batch: 33191, sum loss: 4204.392090, avg loss: 2.426077, ppl: 11.314413 +epoch: 2, batch: 33192, sum loss: 5121.784668, avg loss: 2.794209, ppl: 16.349688 +epoch: 2, batch: 33193, sum loss: 4997.833008, avg loss: 2.714738, ppl: 15.100658 +epoch: 2, batch: 33194, sum loss: 4970.743164, avg loss: 2.830719, ppl: 16.957657 +epoch: 2, batch: 33195, sum loss: 4919.926270, avg loss: 2.607274, ppl: 13.562031 +epoch: 2, batch: 33196, sum loss: 5066.939453, avg loss: 2.758269, ppl: 15.772514 +epoch: 2, batch: 33197, sum loss: 4085.229492, avg loss: 2.339765, ppl: 10.378798 +epoch: 2, batch: 33198, sum loss: 3330.862549, avg loss: 2.460016, ppl: 11.705005 +epoch: 2, batch: 33199, sum loss: 5166.745605, avg loss: 2.841994, ppl: 17.149933 +epoch: 2, batch: 33200, sum loss: 4124.494629, avg loss: 2.466803, ppl: 11.784712 +epoch: 2, batch: 33201, sum loss: 4170.416992, avg loss: 2.437415, ppl: 11.443422 +epoch: 2, batch: 33202, sum loss: 3949.413818, avg loss: 2.194119, ppl: 8.972091 +epoch: 2, batch: 33203, sum loss: 3798.625000, avg loss: 2.563175, ppl: 12.976954 +epoch: 2, batch: 33204, sum loss: 3885.882324, avg loss: 2.300700, ppl: 9.981166 +epoch: 2, batch: 33205, sum loss: 3572.327148, avg loss: 2.284097, ppl: 9.816814 +epoch: 2, batch: 33206, sum loss: 4583.049805, avg loss: 2.593690, ppl: 13.379048 +epoch: 2, batch: 33207, sum loss: 4355.042480, avg loss: 2.581531, ppl: 13.217356 +epoch: 2, batch: 33208, sum loss: 3605.867188, avg loss: 2.616740, ppl: 13.691011 +epoch: 2, batch: 33209, sum loss: 4268.460938, avg loss: 2.453138, ppl: 11.624772 +epoch: 2, batch: 33210, sum loss: 3804.127686, avg loss: 2.416854, ppl: 11.210531 +epoch: 2, batch: 33211, sum loss: 4982.821289, avg loss: 2.520395, ppl: 12.433511 +epoch: 2, batch: 33212, sum loss: 4213.257812, avg loss: 2.342000, ppl: 10.402018 +epoch: 2, batch: 33213, sum loss: 5026.725586, avg loss: 2.769546, ppl: 15.951387 +epoch: 2, batch: 33214, sum loss: 3689.248291, avg loss: 2.280129, ppl: 9.777939 +epoch: 2, batch: 33215, sum loss: 3608.499023, avg loss: 2.360039, ppl: 10.591362 +epoch: 2, batch: 33216, sum loss: 4358.748047, avg loss: 2.685612, ppl: 14.667171 +epoch: 2, batch: 33217, sum loss: 5250.775391, avg loss: 2.709378, ppl: 15.019938 +epoch: 2, batch: 33218, sum loss: 4158.265137, avg loss: 2.353291, ppl: 10.520135 +epoch: 2, batch: 33219, sum loss: 3139.581299, avg loss: 2.223500, ppl: 9.239609 +epoch: 2, batch: 33220, sum loss: 3913.316895, avg loss: 2.323822, ppl: 10.214645 +epoch: 2, batch: 33221, sum loss: 3766.162354, avg loss: 2.458331, ppl: 11.685288 +epoch: 2, batch: 33222, sum loss: 4229.818848, avg loss: 2.645290, ppl: 14.087528 +epoch: 2, batch: 33223, sum loss: 4550.064941, avg loss: 2.396032, ppl: 10.979524 +epoch: 2, batch: 33224, sum loss: 4223.295898, avg loss: 2.506407, ppl: 12.260798 +epoch: 2, batch: 33225, sum loss: 5019.140137, avg loss: 2.627822, ppl: 13.843588 +epoch: 2, batch: 33226, sum loss: 4417.705566, avg loss: 2.629587, ppl: 13.868037 +epoch: 2, batch: 33227, sum loss: 4650.420410, avg loss: 2.420833, ppl: 11.255232 +epoch: 2, batch: 33228, sum loss: 3556.030029, avg loss: 2.327245, ppl: 10.249662 +epoch: 2, batch: 33229, sum loss: 4606.284180, avg loss: 2.687447, ppl: 14.694115 +epoch: 2, batch: 33230, sum loss: 4387.994629, avg loss: 2.410986, ppl: 11.144947 +epoch: 2, batch: 33231, sum loss: 5877.388672, avg loss: 3.035841, ppl: 20.818483 +epoch: 2, batch: 33232, sum loss: 3601.221436, avg loss: 2.671529, ppl: 14.462069 +epoch: 2, batch: 33233, sum loss: 3338.622803, avg loss: 2.286728, ppl: 9.842679 +epoch: 2, batch: 33234, sum loss: 4448.190430, avg loss: 2.523080, ppl: 12.466937 +epoch: 2, batch: 33235, sum loss: 4286.043457, avg loss: 2.499151, ppl: 12.172150 +epoch: 2, batch: 33236, sum loss: 4435.184082, avg loss: 2.580096, ppl: 13.198399 +epoch: 2, batch: 33237, sum loss: 3932.242188, avg loss: 2.412419, ppl: 11.160922 +epoch: 2, batch: 33238, sum loss: 4170.904297, avg loss: 2.530889, ppl: 12.564666 +epoch: 2, batch: 33239, sum loss: 3147.900391, avg loss: 2.201329, ppl: 9.037016 +epoch: 2, batch: 33240, sum loss: 4285.497559, avg loss: 2.632370, ppl: 13.906683 +epoch: 2, batch: 33241, sum loss: 3959.743164, avg loss: 2.485714, ppl: 12.009697 +epoch: 2, batch: 33242, sum loss: 5184.790039, avg loss: 2.618581, ppl: 13.716241 +epoch: 2, batch: 33243, sum loss: 4594.936523, avg loss: 2.659107, ppl: 14.283525 +epoch: 2, batch: 33244, sum loss: 3745.933105, avg loss: 2.418291, ppl: 11.226661 +epoch: 2, batch: 33245, sum loss: 3814.640625, avg loss: 2.271972, ppl: 9.698504 +epoch: 2, batch: 33246, sum loss: 3867.590820, avg loss: 2.446294, ppl: 11.545478 +epoch: 2, batch: 33247, sum loss: 4948.966797, avg loss: 2.589726, ppl: 13.326122 +epoch: 2, batch: 33248, sum loss: 4646.443848, avg loss: 2.428878, ppl: 11.346146 +epoch: 2, batch: 33249, sum loss: 4265.515625, avg loss: 2.662619, ppl: 14.333778 +epoch: 2, batch: 33250, sum loss: 4332.396973, avg loss: 2.449066, ppl: 11.577524 +epoch: 2, batch: 33251, sum loss: 4076.883545, avg loss: 2.578674, ppl: 13.179648 +epoch: 2, batch: 33252, sum loss: 3729.377197, avg loss: 2.399857, ppl: 11.021595 +epoch: 2, batch: 33253, sum loss: 3966.892578, avg loss: 2.509103, ppl: 12.293901 +epoch: 2, batch: 33254, sum loss: 4555.287109, avg loss: 2.522307, ppl: 12.457308 +epoch: 2, batch: 33255, sum loss: 4620.097656, avg loss: 2.290579, ppl: 9.880658 +epoch: 2, batch: 33256, sum loss: 4262.333008, avg loss: 2.621361, ppl: 13.754431 +epoch: 2, batch: 33257, sum loss: 4316.565430, avg loss: 2.503808, ppl: 12.228979 +epoch: 2, batch: 33258, sum loss: 3421.368896, avg loss: 2.242050, ppl: 9.412611 +epoch: 2, batch: 33259, sum loss: 3266.914551, avg loss: 2.419937, ppl: 11.245147 +epoch: 2, batch: 33260, sum loss: 4067.853027, avg loss: 2.512571, ppl: 12.336611 +epoch: 2, batch: 33261, sum loss: 4624.745117, avg loss: 2.621738, ppl: 13.759613 +epoch: 2, batch: 33262, sum loss: 3965.818115, avg loss: 2.352205, ppl: 10.508718 +epoch: 2, batch: 33263, sum loss: 4208.331055, avg loss: 2.378932, ppl: 10.793372 +epoch: 2, batch: 33264, sum loss: 4993.515625, avg loss: 2.571326, ppl: 13.083168 +epoch: 2, batch: 33265, sum loss: 5003.313477, avg loss: 2.589707, ppl: 13.325865 +epoch: 2, batch: 33266, sum loss: 4417.590820, avg loss: 2.408719, ppl: 11.119708 +epoch: 2, batch: 33267, sum loss: 4116.955078, avg loss: 2.744637, ppl: 15.558962 +epoch: 2, batch: 33268, sum loss: 4233.894531, avg loss: 2.440285, ppl: 11.476311 +epoch: 2, batch: 33269, sum loss: 3210.816406, avg loss: 2.222018, ppl: 9.225932 +epoch: 2, batch: 33270, sum loss: 5463.304199, avg loss: 2.684670, ppl: 14.653368 +epoch: 2, batch: 33271, sum loss: 4580.165039, avg loss: 2.369460, ppl: 10.691613 +epoch: 2, batch: 33272, sum loss: 3948.409912, avg loss: 2.553952, ppl: 12.857821 +epoch: 2, batch: 33273, sum loss: 5205.350586, avg loss: 2.639630, ppl: 14.008020 +epoch: 2, batch: 33274, sum loss: 3969.358643, avg loss: 2.473121, ppl: 11.859399 +epoch: 2, batch: 33275, sum loss: 4177.682617, avg loss: 2.521233, ppl: 12.443926 +epoch: 2, batch: 33276, sum loss: 4576.029297, avg loss: 2.419899, ppl: 11.244726 +epoch: 2, batch: 33277, sum loss: 4032.670654, avg loss: 2.325646, ppl: 10.233291 +epoch: 2, batch: 33278, sum loss: 5065.416992, avg loss: 2.541604, ppl: 12.700026 +epoch: 2, batch: 33279, sum loss: 4366.787109, avg loss: 2.588493, ppl: 13.309697 +epoch: 2, batch: 33280, sum loss: 4016.685547, avg loss: 2.351689, ppl: 10.503299 +epoch: 2, batch: 33281, sum loss: 4110.619141, avg loss: 2.406686, ppl: 11.097119 +epoch: 2, batch: 33282, sum loss: 4466.119141, avg loss: 2.539010, ppl: 12.667128 +epoch: 2, batch: 33283, sum loss: 3615.531738, avg loss: 2.278218, ppl: 9.759274 +epoch: 2, batch: 33284, sum loss: 4006.186035, avg loss: 2.432414, ppl: 11.386336 +epoch: 2, batch: 33285, sum loss: 5337.227051, avg loss: 2.795824, ppl: 16.376110 +epoch: 2, batch: 33286, sum loss: 3716.419189, avg loss: 2.349190, ppl: 10.477082 +epoch: 2, batch: 33287, sum loss: 3936.227539, avg loss: 2.458606, ppl: 11.688503 +epoch: 2, batch: 33288, sum loss: 4768.354980, avg loss: 2.525612, ppl: 12.498537 +epoch: 2, batch: 33289, sum loss: 3929.246094, avg loss: 2.331897, ppl: 10.297455 +epoch: 2, batch: 33290, sum loss: 4580.846680, avg loss: 2.413513, ppl: 11.173141 +epoch: 2, batch: 33291, sum loss: 5301.174805, avg loss: 2.860861, ppl: 17.476561 +epoch: 2, batch: 33292, sum loss: 4663.545898, avg loss: 2.569447, ppl: 13.058599 +epoch: 2, batch: 33293, sum loss: 4129.860352, avg loss: 2.544584, ppl: 12.737931 +epoch: 2, batch: 33294, sum loss: 4837.768555, avg loss: 2.767602, ppl: 15.920414 +epoch: 2, batch: 33295, sum loss: 4376.569824, avg loss: 2.394185, ppl: 10.959261 +epoch: 2, batch: 33296, sum loss: 4393.673828, avg loss: 2.322238, ppl: 10.198470 +epoch: 2, batch: 33297, sum loss: 4439.646484, avg loss: 2.639504, ppl: 14.006261 +epoch: 2, batch: 33298, sum loss: 4950.304199, avg loss: 2.583666, ppl: 13.245608 +epoch: 2, batch: 33299, sum loss: 5633.391602, avg loss: 2.765533, ppl: 15.887513 +epoch: 2, batch: 33300, sum loss: 4227.003906, avg loss: 2.454706, ppl: 11.643009 +epoch: 2, batch: 33301, sum loss: 3617.117188, avg loss: 2.262112, ppl: 9.603349 +epoch: 2, batch: 33302, sum loss: 5926.779297, avg loss: 2.849413, ppl: 17.277639 +epoch: 2, batch: 33303, sum loss: 3781.508545, avg loss: 2.519326, ppl: 12.420225 +epoch: 2, batch: 33304, sum loss: 3814.690186, avg loss: 2.239982, ppl: 9.393166 +epoch: 2, batch: 33305, sum loss: 4057.546387, avg loss: 2.506205, ppl: 12.258326 +epoch: 2, batch: 33306, sum loss: 4602.264648, avg loss: 2.691383, ppl: 14.752062 +epoch: 2, batch: 33307, sum loss: 4145.046875, avg loss: 2.563418, ppl: 12.980109 +epoch: 2, batch: 33308, sum loss: 3978.675049, avg loss: 2.437914, ppl: 11.449129 +epoch: 2, batch: 33309, sum loss: 4333.974609, avg loss: 2.570566, ppl: 13.073224 +epoch: 2, batch: 33310, sum loss: 4698.460449, avg loss: 2.515236, ppl: 12.369523 +epoch: 2, batch: 33311, sum loss: 3905.444580, avg loss: 2.440903, ppl: 11.483402 +epoch: 2, batch: 33312, sum loss: 4145.038574, avg loss: 2.353798, ppl: 10.525472 +epoch: 2, batch: 33313, sum loss: 3960.516602, avg loss: 2.361668, ppl: 10.608628 +epoch: 2, batch: 33314, sum loss: 4016.026855, avg loss: 2.290945, ppl: 9.884276 +epoch: 2, batch: 33315, sum loss: 4431.442871, avg loss: 2.603668, ppl: 13.513213 +epoch: 2, batch: 33316, sum loss: 4476.345215, avg loss: 2.568184, ppl: 13.042124 +epoch: 2, batch: 33317, sum loss: 4122.967285, avg loss: 2.385977, ppl: 10.869672 +epoch: 2, batch: 33318, sum loss: 4638.510742, avg loss: 2.605905, ppl: 13.543477 +epoch: 2, batch: 33319, sum loss: 3662.833008, avg loss: 2.331530, ppl: 10.293674 +epoch: 2, batch: 33320, sum loss: 3946.468994, avg loss: 2.310579, ppl: 10.080257 +epoch: 2, batch: 33321, sum loss: 4321.972656, avg loss: 2.386512, ppl: 10.875492 +epoch: 2, batch: 33322, sum loss: 4789.512207, avg loss: 2.715143, ppl: 15.106770 +epoch: 2, batch: 33323, sum loss: 3728.746582, avg loss: 2.362957, ppl: 10.622317 +epoch: 2, batch: 33324, sum loss: 3508.901855, avg loss: 2.336153, ppl: 10.341377 +epoch: 2, batch: 33325, sum loss: 3188.163818, avg loss: 2.209400, ppl: 9.110246 +epoch: 2, batch: 33326, sum loss: 4400.401855, avg loss: 2.507351, ppl: 12.272383 +epoch: 2, batch: 33327, sum loss: 4917.808594, avg loss: 2.538879, ppl: 12.665464 +epoch: 2, batch: 33328, sum loss: 4592.054199, avg loss: 2.643670, ppl: 14.064724 +epoch: 2, batch: 33329, sum loss: 4308.752930, avg loss: 2.698030, ppl: 14.850440 +epoch: 2, batch: 33330, sum loss: 4164.607910, avg loss: 2.196523, ppl: 8.993690 +epoch: 2, batch: 33331, sum loss: 3954.570312, avg loss: 2.613728, ppl: 13.649843 +epoch: 2, batch: 33332, sum loss: 3759.638672, avg loss: 2.493129, ppl: 12.099075 +epoch: 2, batch: 33333, sum loss: 4826.041504, avg loss: 2.656049, ppl: 14.239920 +epoch: 2, batch: 33334, sum loss: 4307.243652, avg loss: 2.502756, ppl: 12.216120 +epoch: 2, batch: 33335, sum loss: 3974.672852, avg loss: 2.132335, ppl: 8.434540 +epoch: 2, batch: 33336, sum loss: 3828.593994, avg loss: 2.429311, ppl: 11.351056 +epoch: 2, batch: 33337, sum loss: 4862.960449, avg loss: 2.695654, ppl: 14.815210 +epoch: 2, batch: 33338, sum loss: 4683.516602, avg loss: 2.751772, ppl: 15.670382 +epoch: 2, batch: 33339, sum loss: 3270.096191, avg loss: 2.274059, ppl: 9.718765 +epoch: 2, batch: 33340, sum loss: 4478.333008, avg loss: 2.472851, ppl: 11.856201 +epoch: 2, batch: 33341, sum loss: 4793.521973, avg loss: 2.696019, ppl: 14.820616 +epoch: 2, batch: 33342, sum loss: 3431.572998, avg loss: 2.094977, ppl: 8.125257 +epoch: 2, batch: 33343, sum loss: 3571.832275, avg loss: 2.185944, ppl: 8.899044 +epoch: 2, batch: 33344, sum loss: 3269.643066, avg loss: 2.286464, ppl: 9.840079 +epoch: 2, batch: 33345, sum loss: 4711.314453, avg loss: 2.611593, ppl: 13.620735 +epoch: 2, batch: 33346, sum loss: 3641.291748, avg loss: 2.197521, ppl: 9.002666 +epoch: 2, batch: 33347, sum loss: 3920.892090, avg loss: 2.430807, ppl: 11.368056 +epoch: 2, batch: 33348, sum loss: 4315.014160, avg loss: 2.417375, ppl: 11.216378 +epoch: 2, batch: 33349, sum loss: 4610.724609, avg loss: 2.503108, ppl: 12.220416 +epoch: 2, batch: 33350, sum loss: 3621.951172, avg loss: 2.408212, ppl: 11.114074 +epoch: 2, batch: 33351, sum loss: 4698.002930, avg loss: 2.683040, ppl: 14.629501 +epoch: 2, batch: 33352, sum loss: 4800.131348, avg loss: 2.455310, ppl: 11.650045 +epoch: 2, batch: 33353, sum loss: 3851.114258, avg loss: 2.299173, ppl: 9.965936 +epoch: 2, batch: 33354, sum loss: 4165.371582, avg loss: 2.627995, ppl: 13.845975 +epoch: 2, batch: 33355, sum loss: 3843.463379, avg loss: 2.256878, ppl: 9.553219 +epoch: 2, batch: 33356, sum loss: 4581.048340, avg loss: 2.744787, ppl: 15.561292 +epoch: 2, batch: 33357, sum loss: 3459.199707, avg loss: 2.257963, ppl: 9.563590 +epoch: 2, batch: 33358, sum loss: 4451.709473, avg loss: 2.675306, ppl: 14.516796 +epoch: 2, batch: 33359, sum loss: 4411.950195, avg loss: 2.487007, ppl: 12.025227 +epoch: 2, batch: 33360, sum loss: 3754.309326, avg loss: 2.466695, ppl: 11.783436 +epoch: 2, batch: 33361, sum loss: 3772.119629, avg loss: 2.350230, ppl: 10.487984 +epoch: 2, batch: 33362, sum loss: 4411.828613, avg loss: 2.548717, ppl: 12.790677 +epoch: 2, batch: 33363, sum loss: 4086.622559, avg loss: 2.527287, ppl: 12.519492 +epoch: 2, batch: 33364, sum loss: 4146.773926, avg loss: 2.550291, ppl: 12.810835 +epoch: 2, batch: 33365, sum loss: 3438.802002, avg loss: 2.230092, ppl: 9.300722 +epoch: 2, batch: 33366, sum loss: 3701.610840, avg loss: 2.464454, ppl: 11.757066 +epoch: 2, batch: 33367, sum loss: 3876.680176, avg loss: 2.518960, ppl: 12.415684 +epoch: 2, batch: 33368, sum loss: 3984.502441, avg loss: 2.328757, ppl: 10.265172 +epoch: 2, batch: 33369, sum loss: 4067.160400, avg loss: 2.431058, ppl: 11.370911 +epoch: 2, batch: 33370, sum loss: 4234.644043, avg loss: 2.498315, ppl: 12.161984 +epoch: 2, batch: 33371, sum loss: 4913.281738, avg loss: 2.652960, ppl: 14.195994 +epoch: 2, batch: 33372, sum loss: 3940.546143, avg loss: 2.287026, ppl: 9.845614 +epoch: 2, batch: 33373, sum loss: 3637.580811, avg loss: 2.234386, ppl: 9.340747 +epoch: 2, batch: 33374, sum loss: 4356.661621, avg loss: 2.600992, ppl: 13.477103 +epoch: 2, batch: 33375, sum loss: 4042.982910, avg loss: 2.247350, ppl: 9.462628 +epoch: 2, batch: 33376, sum loss: 4500.234863, avg loss: 2.457802, ppl: 11.679111 +epoch: 2, batch: 33377, sum loss: 4347.870117, avg loss: 2.577279, ppl: 13.161282 +epoch: 2, batch: 33378, sum loss: 3724.638916, avg loss: 2.393727, ppl: 10.954243 +epoch: 2, batch: 33379, sum loss: 4698.068359, avg loss: 2.589895, ppl: 13.328366 +epoch: 2, batch: 33380, sum loss: 3733.284912, avg loss: 2.280565, ppl: 9.782206 +epoch: 2, batch: 33381, sum loss: 4763.306641, avg loss: 2.611462, ppl: 13.618945 +epoch: 2, batch: 33382, sum loss: 4187.110352, avg loss: 2.333952, ppl: 10.318645 +epoch: 2, batch: 33383, sum loss: 3851.656250, avg loss: 2.433137, ppl: 11.394573 +epoch: 2, batch: 33384, sum loss: 4706.897461, avg loss: 2.559488, ppl: 12.929191 +epoch: 2, batch: 33385, sum loss: 4492.636230, avg loss: 2.766402, ppl: 15.901311 +epoch: 2, batch: 33386, sum loss: 3758.825928, avg loss: 2.166470, ppl: 8.727425 +epoch: 2, batch: 33387, sum loss: 5136.001953, avg loss: 2.672218, ppl: 14.472027 +epoch: 2, batch: 33388, sum loss: 3708.326660, avg loss: 2.417423, ppl: 11.216913 +epoch: 2, batch: 33389, sum loss: 4872.680664, avg loss: 2.732855, ppl: 15.376730 +epoch: 2, batch: 33390, sum loss: 4395.017578, avg loss: 2.609868, ppl: 13.597257 +epoch: 2, batch: 33391, sum loss: 3074.225586, avg loss: 2.131918, ppl: 8.431022 +epoch: 2, batch: 33392, sum loss: 4009.084717, avg loss: 2.764886, ppl: 15.877233 +epoch: 2, batch: 33393, sum loss: 4137.909668, avg loss: 2.297562, ppl: 9.949896 +epoch: 2, batch: 33394, sum loss: 4477.753418, avg loss: 2.670097, ppl: 14.441375 +epoch: 2, batch: 33395, sum loss: 4952.011230, avg loss: 2.668109, ppl: 14.412695 +epoch: 2, batch: 33396, sum loss: 5235.436523, avg loss: 2.580304, ppl: 13.201150 +epoch: 2, batch: 33397, sum loss: 4443.013672, avg loss: 2.566732, ppl: 13.023197 +epoch: 2, batch: 33398, sum loss: 4353.491211, avg loss: 2.605321, ppl: 13.535568 +epoch: 2, batch: 33399, sum loss: 5364.181641, avg loss: 2.603971, ppl: 13.517315 +epoch: 2, batch: 33400, sum loss: 4164.667480, avg loss: 2.293319, ppl: 9.907767 +epoch: 2, batch: 33401, sum loss: 3479.867188, avg loss: 2.298459, ppl: 9.958827 +epoch: 2, batch: 33402, sum loss: 4072.689453, avg loss: 2.373362, ppl: 10.733420 +epoch: 2, batch: 33403, sum loss: 3744.317871, avg loss: 2.210341, ppl: 9.118825 +epoch: 2, batch: 33404, sum loss: 4646.907227, avg loss: 2.616502, ppl: 13.687757 +epoch: 2, batch: 33405, sum loss: 4588.687988, avg loss: 2.503376, ppl: 12.223692 +epoch: 2, batch: 33406, sum loss: 4653.517090, avg loss: 2.522231, ppl: 12.456360 +epoch: 2, batch: 33407, sum loss: 4772.473145, avg loss: 2.509187, ppl: 12.294927 +epoch: 2, batch: 33408, sum loss: 4558.458008, avg loss: 2.519877, ppl: 12.427073 +epoch: 2, batch: 33409, sum loss: 5324.354004, avg loss: 2.582131, ppl: 13.225290 +epoch: 2, batch: 33410, sum loss: 4267.711914, avg loss: 2.597512, ppl: 13.430279 +epoch: 2, batch: 33411, sum loss: 3323.176514, avg loss: 2.237829, ppl: 9.372962 +epoch: 2, batch: 33412, sum loss: 3910.363525, avg loss: 2.374234, ppl: 10.742781 +epoch: 2, batch: 33413, sum loss: 3714.495361, avg loss: 2.241699, ppl: 9.409304 +epoch: 2, batch: 33414, sum loss: 4169.668457, avg loss: 2.420005, ppl: 11.245914 +epoch: 2, batch: 33415, sum loss: 5397.929199, avg loss: 2.685537, ppl: 14.666077 +epoch: 2, batch: 33416, sum loss: 4823.580566, avg loss: 2.746914, ppl: 15.594432 +epoch: 2, batch: 33417, sum loss: 4266.063477, avg loss: 2.242936, ppl: 9.420947 +epoch: 2, batch: 33418, sum loss: 4244.286133, avg loss: 2.442052, ppl: 11.496606 +epoch: 2, batch: 33419, sum loss: 4570.250488, avg loss: 2.655578, ppl: 14.233216 +epoch: 2, batch: 33420, sum loss: 3254.499268, avg loss: 2.198986, ppl: 9.015867 +epoch: 2, batch: 33421, sum loss: 5075.556641, avg loss: 2.698329, ppl: 14.854888 +epoch: 2, batch: 33422, sum loss: 4392.846680, avg loss: 2.725091, ppl: 15.257802 +epoch: 2, batch: 33423, sum loss: 3534.562500, avg loss: 2.298155, ppl: 9.955798 +epoch: 2, batch: 33424, sum loss: 4160.739258, avg loss: 2.353359, ppl: 10.520855 +epoch: 2, batch: 33425, sum loss: 4250.191895, avg loss: 2.554202, ppl: 12.861031 +epoch: 2, batch: 33426, sum loss: 4408.845703, avg loss: 2.835271, ppl: 17.035009 +epoch: 2, batch: 33427, sum loss: 5314.207031, avg loss: 2.737871, ppl: 15.454043 +epoch: 2, batch: 33428, sum loss: 4465.747559, avg loss: 2.685356, ppl: 14.663426 +epoch: 2, batch: 33429, sum loss: 4016.876709, avg loss: 2.438905, ppl: 11.460485 +epoch: 2, batch: 33430, sum loss: 3850.334473, avg loss: 2.465003, ppl: 11.763517 +epoch: 2, batch: 33431, sum loss: 4084.230957, avg loss: 2.568699, ppl: 13.048832 +epoch: 2, batch: 33432, sum loss: 5901.428711, avg loss: 2.725833, ppl: 15.269131 +epoch: 2, batch: 33433, sum loss: 5141.818359, avg loss: 2.720539, ppl: 15.188504 +epoch: 2, batch: 33434, sum loss: 4141.966309, avg loss: 2.446525, ppl: 11.548145 +epoch: 2, batch: 33435, sum loss: 4183.748047, avg loss: 2.386622, ppl: 10.876690 +epoch: 2, batch: 33436, sum loss: 4334.782715, avg loss: 2.531999, ppl: 12.578627 +epoch: 2, batch: 33437, sum loss: 4000.867432, avg loss: 2.430661, ppl: 11.366389 +epoch: 2, batch: 33438, sum loss: 3151.624268, avg loss: 2.130916, ppl: 8.422575 +epoch: 2, batch: 33439, sum loss: 4413.372070, avg loss: 2.628572, ppl: 13.853969 +epoch: 2, batch: 33440, sum loss: 3889.402588, avg loss: 2.387601, ppl: 10.887348 +epoch: 2, batch: 33441, sum loss: 3981.466064, avg loss: 2.537582, ppl: 12.649048 +epoch: 2, batch: 33442, sum loss: 3740.590088, avg loss: 2.615797, ppl: 13.678118 +epoch: 2, batch: 33443, sum loss: 3699.366455, avg loss: 2.489479, ppl: 12.054997 +epoch: 2, batch: 33444, sum loss: 4138.248535, avg loss: 2.495928, ppl: 12.132988 +epoch: 2, batch: 33445, sum loss: 4153.043945, avg loss: 2.280639, ppl: 9.782931 +epoch: 2, batch: 33446, sum loss: 3258.655518, avg loss: 2.278780, ppl: 9.764762 +epoch: 2, batch: 33447, sum loss: 4726.754883, avg loss: 2.455457, ppl: 11.651757 +epoch: 2, batch: 33448, sum loss: 4186.027832, avg loss: 2.356998, ppl: 10.559202 +epoch: 2, batch: 33449, sum loss: 3631.792480, avg loss: 2.308832, ppl: 10.062661 +epoch: 2, batch: 33450, sum loss: 4630.733398, avg loss: 2.545758, ppl: 12.752888 +epoch: 2, batch: 33451, sum loss: 4776.764160, avg loss: 2.503545, ppl: 12.225759 +epoch: 2, batch: 33452, sum loss: 4924.083496, avg loss: 2.464506, ppl: 11.757674 +epoch: 2, batch: 33453, sum loss: 5337.158203, avg loss: 2.684687, ppl: 14.653616 +epoch: 2, batch: 33454, sum loss: 3862.990234, avg loss: 2.216288, ppl: 9.173218 +epoch: 2, batch: 33455, sum loss: 4356.779785, avg loss: 2.579503, ppl: 13.190576 +epoch: 2, batch: 33456, sum loss: 4346.566406, avg loss: 2.480917, ppl: 11.952219 +epoch: 2, batch: 33457, sum loss: 4021.294678, avg loss: 2.261696, ppl: 9.599352 +epoch: 2, batch: 33458, sum loss: 4526.102539, avg loss: 2.596731, ppl: 13.419800 +epoch: 2, batch: 33459, sum loss: 3558.440918, avg loss: 2.306184, ppl: 10.036050 +epoch: 2, batch: 33460, sum loss: 3254.253662, avg loss: 2.006322, ppl: 7.435915 +epoch: 2, batch: 33461, sum loss: 4065.571289, avg loss: 2.586241, ppl: 13.279762 +epoch: 2, batch: 33462, sum loss: 4148.706543, avg loss: 2.683510, ppl: 14.636378 +epoch: 2, batch: 33463, sum loss: 3796.101807, avg loss: 2.054168, ppl: 7.800344 +epoch: 2, batch: 33464, sum loss: 4308.897949, avg loss: 2.463635, ppl: 11.747438 +epoch: 2, batch: 33465, sum loss: 3543.494141, avg loss: 2.270015, ppl: 9.679550 +epoch: 2, batch: 33466, sum loss: 4056.875488, avg loss: 2.440960, ppl: 11.484059 +epoch: 2, batch: 33467, sum loss: 3848.034180, avg loss: 2.385638, ppl: 10.865993 +epoch: 2, batch: 33468, sum loss: 3583.155762, avg loss: 2.279361, ppl: 9.770437 +epoch: 2, batch: 33469, sum loss: 4732.647461, avg loss: 2.800383, ppl: 16.450949 +epoch: 2, batch: 33470, sum loss: 4087.431396, avg loss: 2.434444, ppl: 11.409472 +epoch: 2, batch: 33471, sum loss: 4625.773926, avg loss: 2.582788, ppl: 13.233986 +epoch: 2, batch: 33472, sum loss: 5266.042969, avg loss: 2.790696, ppl: 16.292351 +epoch: 2, batch: 33473, sum loss: 3685.266602, avg loss: 2.328027, ppl: 10.257681 +epoch: 2, batch: 33474, sum loss: 4455.234863, avg loss: 2.493136, ppl: 12.099165 +epoch: 2, batch: 33475, sum loss: 4452.765625, avg loss: 2.567915, ppl: 13.038616 +epoch: 2, batch: 33476, sum loss: 3292.262451, avg loss: 2.276807, ppl: 9.745512 +epoch: 2, batch: 33477, sum loss: 4216.315918, avg loss: 2.414843, ppl: 11.188015 +epoch: 2, batch: 33478, sum loss: 3319.133545, avg loss: 2.340715, ppl: 10.388661 +epoch: 2, batch: 33479, sum loss: 3933.045166, avg loss: 2.613319, ppl: 13.644263 +epoch: 2, batch: 33480, sum loss: 4573.975586, avg loss: 2.572540, ppl: 13.099051 +epoch: 2, batch: 33481, sum loss: 4154.186523, avg loss: 2.443639, ppl: 11.514871 +epoch: 2, batch: 33482, sum loss: 4027.065918, avg loss: 2.437691, ppl: 11.446586 +epoch: 2, batch: 33483, sum loss: 4030.098877, avg loss: 2.472453, ppl: 11.851487 +epoch: 2, batch: 33484, sum loss: 4328.062012, avg loss: 2.380672, ppl: 10.812168 +epoch: 2, batch: 33485, sum loss: 4348.016602, avg loss: 2.656088, ppl: 14.240477 +epoch: 2, batch: 33486, sum loss: 3847.505127, avg loss: 2.376470, ppl: 10.766829 +epoch: 2, batch: 33487, sum loss: 4290.804199, avg loss: 2.573968, ppl: 13.117768 +epoch: 2, batch: 33488, sum loss: 3842.993652, avg loss: 2.309491, ppl: 10.069302 +epoch: 2, batch: 33489, sum loss: 4343.013672, avg loss: 2.337467, ppl: 10.354974 +epoch: 2, batch: 33490, sum loss: 4224.384766, avg loss: 2.443253, ppl: 11.510427 +epoch: 2, batch: 33491, sum loss: 5134.810547, avg loss: 2.753250, ppl: 15.693547 +epoch: 2, batch: 33492, sum loss: 3676.486572, avg loss: 2.378064, ppl: 10.784001 +epoch: 2, batch: 33493, sum loss: 3302.934082, avg loss: 2.218223, ppl: 9.190985 +epoch: 2, batch: 33494, sum loss: 4623.750488, avg loss: 2.540522, ppl: 12.686296 +epoch: 2, batch: 33495, sum loss: 4204.719727, avg loss: 2.490948, ppl: 12.072712 +epoch: 2, batch: 33496, sum loss: 4511.090820, avg loss: 2.557308, ppl: 12.901037 +epoch: 2, batch: 33497, sum loss: 4521.971680, avg loss: 2.639796, ppl: 14.010352 +epoch: 2, batch: 33498, sum loss: 4530.879395, avg loss: 2.747653, ppl: 15.605958 +epoch: 2, batch: 33499, sum loss: 3781.924561, avg loss: 2.243134, ppl: 9.422821 +epoch: 2, batch: 33500, sum loss: 4128.547852, avg loss: 2.292364, ppl: 9.898311 +epoch: 2, batch: 33501, sum loss: 4009.778809, avg loss: 2.303147, ppl: 10.005621 +epoch: 2, batch: 33502, sum loss: 3812.003174, avg loss: 2.277182, ppl: 9.749172 +epoch: 2, batch: 33503, sum loss: 4033.524414, avg loss: 2.536808, ppl: 12.639259 +epoch: 2, batch: 33504, sum loss: 3974.330322, avg loss: 2.391294, ppl: 10.927623 +epoch: 2, batch: 33505, sum loss: 4543.140137, avg loss: 2.605011, ppl: 13.531381 +epoch: 2, batch: 33506, sum loss: 3858.806885, avg loss: 2.355804, ppl: 10.546602 +epoch: 2, batch: 33507, sum loss: 3874.465820, avg loss: 2.581256, ppl: 13.213729 +epoch: 2, batch: 33508, sum loss: 3832.574463, avg loss: 2.233435, ppl: 9.331867 +epoch: 2, batch: 33509, sum loss: 3547.495605, avg loss: 2.175043, ppl: 8.802567 +epoch: 2, batch: 33510, sum loss: 3149.759766, avg loss: 2.148540, ppl: 8.572334 +epoch: 2, batch: 33511, sum loss: 5291.513672, avg loss: 2.741717, ppl: 15.513600 +epoch: 2, batch: 33512, sum loss: 3841.052734, avg loss: 2.434127, ppl: 11.405858 +epoch: 2, batch: 33513, sum loss: 4262.295410, avg loss: 2.441177, ppl: 11.486557 +epoch: 2, batch: 33514, sum loss: 4314.503418, avg loss: 2.488179, ppl: 12.039338 +epoch: 2, batch: 33515, sum loss: 3651.532471, avg loss: 2.237459, ppl: 9.369491 +epoch: 2, batch: 33516, sum loss: 3875.626709, avg loss: 2.306921, ppl: 10.043451 +epoch: 2, batch: 33517, sum loss: 4946.399414, avg loss: 2.587029, ppl: 13.290227 +epoch: 2, batch: 33518, sum loss: 3709.913330, avg loss: 2.623701, ppl: 13.786655 +epoch: 2, batch: 33519, sum loss: 3873.407471, avg loss: 2.339014, ppl: 10.371006 +epoch: 2, batch: 33520, sum loss: 3935.229004, avg loss: 2.429154, ppl: 11.349276 +epoch: 2, batch: 33521, sum loss: 4727.300781, avg loss: 2.462136, ppl: 11.729840 +epoch: 2, batch: 33522, sum loss: 5192.214844, avg loss: 2.642349, ppl: 14.046153 +epoch: 2, batch: 33523, sum loss: 5045.745605, avg loss: 2.752725, ppl: 15.685322 +epoch: 2, batch: 33524, sum loss: 4622.350098, avg loss: 2.528638, ppl: 12.536419 +epoch: 2, batch: 33525, sum loss: 4749.093262, avg loss: 2.785392, ppl: 16.206171 +epoch: 2, batch: 33526, sum loss: 4008.659424, avg loss: 2.516422, ppl: 12.384201 +epoch: 2, batch: 33527, sum loss: 4356.325195, avg loss: 2.541613, ppl: 12.700144 +epoch: 2, batch: 33528, sum loss: 4357.077637, avg loss: 2.540570, ppl: 12.686901 +epoch: 2, batch: 33529, sum loss: 3368.279541, avg loss: 2.469413, ppl: 11.815510 +epoch: 2, batch: 33530, sum loss: 4499.371582, avg loss: 2.581395, ppl: 13.215563 +epoch: 2, batch: 33531, sum loss: 3022.180176, avg loss: 2.212430, ppl: 9.137898 +epoch: 2, batch: 33532, sum loss: 4120.560059, avg loss: 2.583423, ppl: 13.242394 +epoch: 2, batch: 33533, sum loss: 4130.836426, avg loss: 2.489956, ppl: 12.060741 +epoch: 2, batch: 33534, sum loss: 3809.271484, avg loss: 2.370424, ppl: 10.701929 +epoch: 2, batch: 33535, sum loss: 4537.860840, avg loss: 2.552228, ppl: 12.835667 +epoch: 2, batch: 33536, sum loss: 4156.007324, avg loss: 2.587800, ppl: 13.300482 +epoch: 2, batch: 33537, sum loss: 5845.109375, avg loss: 2.795366, ppl: 16.368612 +epoch: 2, batch: 33538, sum loss: 3619.534668, avg loss: 2.462269, ppl: 11.731395 +epoch: 2, batch: 33539, sum loss: 3991.927246, avg loss: 2.308807, ppl: 10.062414 +epoch: 2, batch: 33540, sum loss: 4506.294434, avg loss: 2.446414, ppl: 11.546865 +epoch: 2, batch: 33541, sum loss: 3724.148682, avg loss: 2.388806, ppl: 10.900472 +epoch: 2, batch: 33542, sum loss: 4276.990234, avg loss: 2.332056, ppl: 10.299090 +epoch: 2, batch: 33543, sum loss: 4130.164551, avg loss: 2.582967, ppl: 13.236353 +epoch: 2, batch: 33544, sum loss: 4574.655762, avg loss: 2.632138, ppl: 13.903464 +epoch: 2, batch: 33545, sum loss: 4098.513184, avg loss: 2.473454, ppl: 11.863352 +epoch: 2, batch: 33546, sum loss: 3988.608398, avg loss: 2.301563, ppl: 9.989782 +epoch: 2, batch: 33547, sum loss: 4101.277344, avg loss: 2.502305, ppl: 12.210602 +epoch: 2, batch: 33548, sum loss: 4896.928711, avg loss: 2.799845, ppl: 16.442099 +epoch: 2, batch: 33549, sum loss: 5195.800293, avg loss: 2.562032, ppl: 12.962127 +epoch: 2, batch: 33550, sum loss: 4716.973145, avg loss: 2.574767, ppl: 13.128260 +epoch: 2, batch: 33551, sum loss: 3757.672607, avg loss: 2.247412, ppl: 9.463213 +epoch: 2, batch: 33552, sum loss: 4093.765625, avg loss: 2.399628, ppl: 11.019078 +epoch: 2, batch: 33553, sum loss: 4413.486816, avg loss: 2.296299, ppl: 9.937339 +epoch: 2, batch: 33554, sum loss: 4042.115479, avg loss: 2.359670, ppl: 10.587461 +epoch: 2, batch: 33555, sum loss: 4912.491699, avg loss: 2.656837, ppl: 14.251142 +epoch: 2, batch: 33556, sum loss: 3772.609131, avg loss: 2.381698, ppl: 10.823261 +epoch: 2, batch: 33557, sum loss: 4333.771484, avg loss: 2.550778, ppl: 12.817070 +epoch: 2, batch: 33558, sum loss: 4681.917969, avg loss: 2.657161, ppl: 14.255763 +epoch: 2, batch: 33559, sum loss: 4439.347656, avg loss: 2.584021, ppl: 13.250309 +epoch: 2, batch: 33560, sum loss: 4135.383789, avg loss: 2.269695, ppl: 9.676447 +epoch: 2, batch: 33561, sum loss: 4439.093262, avg loss: 2.481327, ppl: 11.957116 +epoch: 2, batch: 33562, sum loss: 4459.299316, avg loss: 2.601691, ppl: 13.486531 +epoch: 2, batch: 33563, sum loss: 4201.894531, avg loss: 2.603404, ppl: 13.509650 +epoch: 2, batch: 33564, sum loss: 4478.063965, avg loss: 2.483674, ppl: 11.985215 +epoch: 2, batch: 33565, sum loss: 5439.690430, avg loss: 2.575611, ppl: 13.139341 +epoch: 2, batch: 33566, sum loss: 5646.473633, avg loss: 2.997067, ppl: 20.026707 +epoch: 2, batch: 33567, sum loss: 3818.176514, avg loss: 2.190578, ppl: 8.940374 +epoch: 2, batch: 33568, sum loss: 3795.883789, avg loss: 2.447378, ppl: 11.558003 +epoch: 2, batch: 33569, sum loss: 4097.025391, avg loss: 2.472556, ppl: 11.852705 +epoch: 2, batch: 33570, sum loss: 3713.393555, avg loss: 2.052733, ppl: 7.789159 +epoch: 2, batch: 33571, sum loss: 4219.115234, avg loss: 2.435979, ppl: 11.426996 +epoch: 2, batch: 33572, sum loss: 3920.334717, avg loss: 2.539077, ppl: 12.667974 +epoch: 2, batch: 33573, sum loss: 3711.924805, avg loss: 2.454977, ppl: 11.646163 +epoch: 2, batch: 33574, sum loss: 4041.647705, avg loss: 2.532361, ppl: 12.583178 +epoch: 2, batch: 33575, sum loss: 3140.148438, avg loss: 2.251003, ppl: 9.497252 +epoch: 2, batch: 33576, sum loss: 3727.540039, avg loss: 2.284032, ppl: 9.816178 +epoch: 2, batch: 33577, sum loss: 4112.572754, avg loss: 2.535495, ppl: 12.622675 +epoch: 2, batch: 33578, sum loss: 4392.982422, avg loss: 2.417712, ppl: 11.220158 +epoch: 2, batch: 33579, sum loss: 3982.976074, avg loss: 2.379317, ppl: 10.797523 +epoch: 2, batch: 33580, sum loss: 3388.067139, avg loss: 2.260218, ppl: 9.585182 +epoch: 2, batch: 33581, sum loss: 4380.420898, avg loss: 2.576718, ppl: 13.153900 +epoch: 2, batch: 33582, sum loss: 4875.015625, avg loss: 2.662488, ppl: 14.331905 +epoch: 2, batch: 33583, sum loss: 4318.418945, avg loss: 2.477578, ppl: 11.912379 +epoch: 2, batch: 33584, sum loss: 4586.643555, avg loss: 2.617947, ppl: 13.707558 +epoch: 2, batch: 33585, sum loss: 4366.990234, avg loss: 2.667679, ppl: 14.406490 +epoch: 2, batch: 33586, sum loss: 3744.633057, avg loss: 2.428426, ppl: 11.341018 +epoch: 2, batch: 33587, sum loss: 4832.758301, avg loss: 2.528916, ppl: 12.539904 +epoch: 2, batch: 33588, sum loss: 3298.510498, avg loss: 2.317997, ppl: 10.155317 +epoch: 2, batch: 33589, sum loss: 3739.725586, avg loss: 2.518334, ppl: 12.407904 +epoch: 2, batch: 33590, sum loss: 5015.062012, avg loss: 2.728543, ppl: 15.310560 +epoch: 2, batch: 33591, sum loss: 4643.346680, avg loss: 2.508561, ppl: 12.287238 +epoch: 2, batch: 33592, sum loss: 4879.992676, avg loss: 2.738492, ppl: 15.463649 +epoch: 2, batch: 33593, sum loss: 4588.604492, avg loss: 2.656980, ppl: 14.253180 +epoch: 2, batch: 33594, sum loss: 5068.326660, avg loss: 2.544341, ppl: 12.734831 +epoch: 2, batch: 33595, sum loss: 3587.357666, avg loss: 2.347747, ppl: 10.461973 +epoch: 2, batch: 33596, sum loss: 3880.192383, avg loss: 2.539393, ppl: 12.671976 +epoch: 2, batch: 33597, sum loss: 3969.731445, avg loss: 2.451965, ppl: 11.611141 +epoch: 2, batch: 33598, sum loss: 5287.656250, avg loss: 2.686817, ppl: 14.684866 +epoch: 2, batch: 33599, sum loss: 3941.334961, avg loss: 2.529740, ppl: 12.550243 +epoch: 2, batch: 33600, sum loss: 3793.602539, avg loss: 2.413233, ppl: 11.170018 +epoch: 2, batch: 33601, sum loss: 3797.268555, avg loss: 2.305567, ppl: 10.029861 +epoch: 2, batch: 33602, sum loss: 4783.683594, avg loss: 2.434445, ppl: 11.409481 +epoch: 2, batch: 33603, sum loss: 3553.672363, avg loss: 2.427372, ppl: 11.329067 +epoch: 2, batch: 33604, sum loss: 3976.994385, avg loss: 2.267385, ppl: 9.654127 +epoch: 2, batch: 33605, sum loss: 4991.316406, avg loss: 2.524692, ppl: 12.487049 +epoch: 2, batch: 33606, sum loss: 5377.088867, avg loss: 2.936695, ppl: 18.853439 +epoch: 2, batch: 33607, sum loss: 4476.512695, avg loss: 2.460974, ppl: 11.716223 +epoch: 2, batch: 33608, sum loss: 4338.509277, avg loss: 2.473494, ppl: 11.863831 +epoch: 2, batch: 33609, sum loss: 4492.709961, avg loss: 2.645884, ppl: 14.095894 +epoch: 2, batch: 33610, sum loss: 5053.677734, avg loss: 2.748058, ppl: 15.612277 +epoch: 2, batch: 33611, sum loss: 4906.436523, avg loss: 2.598748, ppl: 13.446895 +epoch: 2, batch: 33612, sum loss: 4116.765137, avg loss: 2.450455, ppl: 11.593626 +epoch: 2, batch: 33613, sum loss: 4587.953125, avg loss: 2.584762, ppl: 13.260134 +epoch: 2, batch: 33614, sum loss: 4107.193848, avg loss: 2.532179, ppl: 12.580886 +epoch: 2, batch: 33615, sum loss: 3865.880859, avg loss: 2.408648, ppl: 11.118923 +epoch: 2, batch: 33616, sum loss: 3853.592529, avg loss: 2.465510, ppl: 11.769487 +epoch: 2, batch: 33617, sum loss: 4502.505371, avg loss: 2.539484, ppl: 12.673133 +epoch: 2, batch: 33618, sum loss: 4822.567871, avg loss: 2.671783, ppl: 14.465734 +epoch: 2, batch: 33619, sum loss: 3956.938965, avg loss: 2.448601, ppl: 11.572144 +epoch: 2, batch: 33620, sum loss: 4152.370117, avg loss: 2.661776, ppl: 14.321699 +epoch: 2, batch: 33621, sum loss: 5062.199707, avg loss: 2.746717, ppl: 15.591365 +epoch: 2, batch: 33622, sum loss: 4314.997559, avg loss: 2.511640, ppl: 12.325128 +epoch: 2, batch: 33623, sum loss: 4726.472168, avg loss: 2.631666, ppl: 13.896905 +epoch: 2, batch: 33624, sum loss: 4499.271973, avg loss: 2.635777, ppl: 13.954157 +epoch: 2, batch: 33625, sum loss: 4850.466309, avg loss: 2.575925, ppl: 13.143464 +epoch: 2, batch: 33626, sum loss: 4147.536133, avg loss: 2.435429, ppl: 11.420721 +epoch: 2, batch: 33627, sum loss: 4798.013672, avg loss: 2.627609, ppl: 13.840637 +epoch: 2, batch: 33628, sum loss: 4412.096191, avg loss: 2.618455, ppl: 13.714514 +epoch: 2, batch: 33629, sum loss: 4827.830566, avg loss: 2.619550, ppl: 13.729544 +epoch: 2, batch: 33630, sum loss: 3171.889648, avg loss: 2.241618, ppl: 9.408544 +epoch: 2, batch: 33631, sum loss: 4150.657227, avg loss: 2.308486, ppl: 10.059178 +epoch: 2, batch: 33632, sum loss: 4757.541992, avg loss: 2.631384, ppl: 13.892983 +epoch: 2, batch: 33633, sum loss: 4690.782227, avg loss: 2.467534, ppl: 11.793329 +epoch: 2, batch: 33634, sum loss: 4518.290527, avg loss: 2.614752, ppl: 13.663821 +epoch: 2, batch: 33635, sum loss: 4516.591797, avg loss: 2.530304, ppl: 12.557320 +epoch: 2, batch: 33636, sum loss: 4201.737793, avg loss: 2.501034, ppl: 12.195104 +epoch: 2, batch: 33637, sum loss: 4359.963379, avg loss: 2.560166, ppl: 12.937969 +epoch: 2, batch: 33638, sum loss: 3884.639893, avg loss: 2.258512, ppl: 9.568836 +epoch: 2, batch: 33639, sum loss: 3858.886719, avg loss: 2.357292, ppl: 10.562309 +epoch: 2, batch: 33640, sum loss: 4259.609863, avg loss: 2.653962, ppl: 14.210234 +epoch: 2, batch: 33641, sum loss: 4944.043457, avg loss: 2.462173, ppl: 11.730276 +epoch: 2, batch: 33642, sum loss: 4189.831543, avg loss: 2.488024, ppl: 12.037460 +epoch: 2, batch: 33643, sum loss: 4238.434570, avg loss: 2.286103, ppl: 9.836528 +epoch: 2, batch: 33644, sum loss: 4811.288574, avg loss: 2.675911, ppl: 14.525579 +epoch: 2, batch: 33645, sum loss: 4237.647461, avg loss: 2.501563, ppl: 12.201548 +epoch: 2, batch: 33646, sum loss: 4293.839355, avg loss: 2.412269, ppl: 11.159257 +epoch: 2, batch: 33647, sum loss: 4198.354492, avg loss: 2.623971, ppl: 13.790383 +epoch: 2, batch: 33648, sum loss: 4762.589844, avg loss: 2.601087, ppl: 13.478379 +epoch: 2, batch: 33649, sum loss: 4427.679688, avg loss: 2.633956, ppl: 13.928760 +epoch: 2, batch: 33650, sum loss: 4240.505859, avg loss: 2.417620, ppl: 11.219128 +epoch: 2, batch: 33651, sum loss: 3878.397461, avg loss: 2.719774, ppl: 15.176888 +epoch: 2, batch: 33652, sum loss: 4573.378906, avg loss: 2.482833, ppl: 11.975146 +epoch: 2, batch: 33653, sum loss: 3717.310547, avg loss: 2.326227, ppl: 10.239236 +epoch: 2, batch: 33654, sum loss: 3298.180908, avg loss: 2.182780, ppl: 8.870935 +epoch: 2, batch: 33655, sum loss: 3682.457764, avg loss: 2.291511, ppl: 9.889868 +epoch: 2, batch: 33656, sum loss: 3855.166504, avg loss: 2.578707, ppl: 13.180082 +epoch: 2, batch: 33657, sum loss: 4818.557617, avg loss: 2.460959, ppl: 11.716042 +epoch: 2, batch: 33658, sum loss: 4080.134277, avg loss: 2.401492, ppl: 11.039631 +epoch: 2, batch: 33659, sum loss: 4425.385254, avg loss: 2.470902, ppl: 11.833113 +epoch: 2, batch: 33660, sum loss: 4338.763672, avg loss: 2.521071, ppl: 12.441917 +epoch: 2, batch: 33661, sum loss: 3606.678223, avg loss: 2.064498, ppl: 7.881342 +epoch: 2, batch: 33662, sum loss: 3847.548828, avg loss: 2.511455, ppl: 12.322845 +epoch: 2, batch: 33663, sum loss: 4558.454102, avg loss: 2.829581, ppl: 16.938354 +epoch: 2, batch: 33664, sum loss: 4835.681152, avg loss: 2.682019, ppl: 14.614577 +epoch: 2, batch: 33665, sum loss: 3497.130371, avg loss: 2.221811, ppl: 9.224019 +epoch: 2, batch: 33666, sum loss: 4059.152588, avg loss: 2.530644, ppl: 12.561592 +epoch: 2, batch: 33667, sum loss: 4280.205566, avg loss: 2.698742, ppl: 14.861031 +epoch: 2, batch: 33668, sum loss: 4386.784180, avg loss: 2.240442, ppl: 9.397480 +epoch: 2, batch: 33669, sum loss: 4818.025879, avg loss: 2.709801, ppl: 15.026284 +epoch: 2, batch: 33670, sum loss: 5320.528320, avg loss: 2.800278, ppl: 16.449223 +epoch: 2, batch: 33671, sum loss: 4382.251465, avg loss: 2.482862, ppl: 11.975490 +epoch: 2, batch: 33672, sum loss: 4521.716797, avg loss: 2.691498, ppl: 14.753765 +epoch: 2, batch: 33673, sum loss: 3778.492188, avg loss: 2.225260, ppl: 9.255894 +epoch: 2, batch: 33674, sum loss: 5082.847168, avg loss: 2.685075, ppl: 14.659302 +epoch: 2, batch: 33675, sum loss: 4404.140625, avg loss: 2.569510, ppl: 13.059426 +epoch: 2, batch: 33676, sum loss: 3879.156982, avg loss: 2.173197, ppl: 8.786331 +epoch: 2, batch: 33677, sum loss: 3989.292236, avg loss: 2.247488, ppl: 9.463937 +epoch: 2, batch: 33678, sum loss: 3904.725342, avg loss: 2.512693, ppl: 12.338117 +epoch: 2, batch: 33679, sum loss: 4379.198730, avg loss: 2.427494, ppl: 11.330450 +epoch: 2, batch: 33680, sum loss: 3516.933350, avg loss: 2.248679, ppl: 9.475208 +epoch: 2, batch: 33681, sum loss: 4084.482910, avg loss: 2.340678, ppl: 10.388275 +epoch: 2, batch: 33682, sum loss: 4010.620117, avg loss: 2.544810, ppl: 12.740802 +epoch: 2, batch: 33683, sum loss: 4619.639160, avg loss: 2.423735, ppl: 11.287940 +epoch: 2, batch: 33684, sum loss: 4736.781738, avg loss: 2.559039, ppl: 12.923396 +epoch: 2, batch: 33685, sum loss: 3353.503906, avg loss: 2.278196, ppl: 9.759055 +epoch: 2, batch: 33686, sum loss: 4770.543457, avg loss: 2.443926, ppl: 11.518173 +epoch: 2, batch: 33687, sum loss: 4356.462891, avg loss: 2.574742, ppl: 13.127925 +epoch: 2, batch: 33688, sum loss: 3817.276855, avg loss: 2.260081, ppl: 9.583866 +epoch: 2, batch: 33689, sum loss: 3967.593750, avg loss: 2.455194, ppl: 11.648693 +epoch: 2, batch: 33690, sum loss: 4115.048340, avg loss: 2.381394, ppl: 10.819972 +epoch: 2, batch: 33691, sum loss: 4391.068359, avg loss: 2.596729, ppl: 13.419767 +epoch: 2, batch: 33692, sum loss: 4962.660156, avg loss: 2.607809, ppl: 13.569285 +epoch: 2, batch: 33693, sum loss: 3240.246826, avg loss: 2.242385, ppl: 9.415765 +epoch: 2, batch: 33694, sum loss: 4380.144043, avg loss: 2.596410, ppl: 13.415494 +epoch: 2, batch: 33695, sum loss: 5043.343262, avg loss: 2.549718, ppl: 12.803497 +epoch: 2, batch: 33696, sum loss: 4525.656250, avg loss: 2.713223, ppl: 15.077796 +epoch: 2, batch: 33697, sum loss: 4702.021973, avg loss: 2.693025, ppl: 14.776312 +epoch: 2, batch: 33698, sum loss: 3898.292725, avg loss: 2.511787, ppl: 12.326935 +epoch: 2, batch: 33699, sum loss: 4560.599609, avg loss: 2.685866, ppl: 14.670896 +epoch: 2, batch: 33700, sum loss: 3628.772949, avg loss: 2.223513, ppl: 9.239732 +epoch: 2, batch: 33701, sum loss: 3430.231445, avg loss: 2.225978, ppl: 9.262534 +epoch: 2, batch: 33702, sum loss: 4316.723633, avg loss: 2.522925, ppl: 12.464999 +epoch: 2, batch: 33703, sum loss: 4872.140625, avg loss: 2.606817, ppl: 13.555831 +epoch: 2, batch: 33704, sum loss: 5212.664062, avg loss: 2.845341, ppl: 17.207420 +epoch: 2, batch: 33705, sum loss: 4389.332031, avg loss: 2.611143, ppl: 13.614608 +epoch: 2, batch: 33706, sum loss: 4881.740234, avg loss: 2.795957, ppl: 16.378292 +epoch: 2, batch: 33707, sum loss: 4593.319336, avg loss: 2.551844, ppl: 12.830744 +epoch: 2, batch: 33708, sum loss: 3260.028320, avg loss: 2.335264, ppl: 10.332187 +epoch: 2, batch: 33709, sum loss: 3634.814941, avg loss: 2.263272, ppl: 9.614497 +epoch: 2, batch: 33710, sum loss: 5677.153809, avg loss: 2.685503, ppl: 14.665580 +epoch: 2, batch: 33711, sum loss: 4473.519531, avg loss: 2.569512, ppl: 13.059445 +epoch: 2, batch: 33712, sum loss: 4695.308594, avg loss: 2.715621, ppl: 15.113993 +epoch: 2, batch: 33713, sum loss: 4696.555664, avg loss: 2.544180, ppl: 12.732779 +epoch: 2, batch: 33714, sum loss: 3988.836914, avg loss: 2.266385, ppl: 9.644469 +epoch: 2, batch: 33715, sum loss: 3922.785645, avg loss: 2.364548, ppl: 10.639231 +epoch: 2, batch: 33716, sum loss: 3883.276855, avg loss: 2.411973, ppl: 11.155952 +epoch: 2, batch: 33717, sum loss: 3578.410645, avg loss: 2.416213, ppl: 11.203347 +epoch: 2, batch: 33718, sum loss: 4609.520508, avg loss: 2.487599, ppl: 12.032348 +epoch: 2, batch: 33719, sum loss: 3698.623291, avg loss: 2.425327, ppl: 11.305923 +epoch: 2, batch: 33720, sum loss: 3591.518311, avg loss: 2.062906, ppl: 7.868800 +epoch: 2, batch: 33721, sum loss: 4263.196289, avg loss: 2.462852, ppl: 11.738235 +epoch: 2, batch: 33722, sum loss: 4095.388184, avg loss: 2.531142, ppl: 12.567853 +epoch: 2, batch: 33723, sum loss: 4959.908203, avg loss: 2.465163, ppl: 11.765403 +epoch: 2, batch: 33724, sum loss: 3493.330078, avg loss: 2.229311, ppl: 9.293461 +epoch: 2, batch: 33725, sum loss: 4117.840332, avg loss: 2.435151, ppl: 11.417543 +epoch: 2, batch: 33726, sum loss: 4644.673340, avg loss: 2.449722, ppl: 11.585129 +epoch: 2, batch: 33727, sum loss: 4243.146484, avg loss: 2.456946, ppl: 11.669124 +epoch: 2, batch: 33728, sum loss: 5043.685547, avg loss: 2.704389, ppl: 14.945184 +epoch: 2, batch: 33729, sum loss: 4212.502930, avg loss: 2.613215, ppl: 13.642845 +epoch: 2, batch: 33730, sum loss: 4232.731934, avg loss: 2.445252, ppl: 11.533461 +epoch: 2, batch: 33731, sum loss: 3443.325684, avg loss: 2.411293, ppl: 11.148362 +epoch: 2, batch: 33732, sum loss: 3786.546875, avg loss: 2.287944, ppl: 9.854654 +epoch: 2, batch: 33733, sum loss: 5269.090820, avg loss: 2.792311, ppl: 16.318687 +epoch: 2, batch: 33734, sum loss: 4667.706055, avg loss: 2.693425, ppl: 14.782221 +epoch: 2, batch: 33735, sum loss: 4183.463867, avg loss: 2.391917, ppl: 10.934441 +epoch: 2, batch: 33736, sum loss: 4124.659180, avg loss: 2.533574, ppl: 12.598457 +epoch: 2, batch: 33737, sum loss: 4894.012207, avg loss: 2.964272, ppl: 19.380581 +epoch: 2, batch: 33738, sum loss: 3585.124023, avg loss: 2.192736, ppl: 8.959697 +epoch: 2, batch: 33739, sum loss: 3382.874756, avg loss: 2.075383, ppl: 7.967599 +epoch: 2, batch: 33740, sum loss: 3933.555176, avg loss: 2.320682, ppl: 10.182612 +epoch: 2, batch: 33741, sum loss: 3616.277344, avg loss: 2.209088, ppl: 9.107409 +epoch: 2, batch: 33742, sum loss: 3600.478760, avg loss: 2.309480, ppl: 10.069184 +epoch: 2, batch: 33743, sum loss: 4127.614258, avg loss: 2.362687, ppl: 10.619446 +epoch: 2, batch: 33744, sum loss: 4413.958984, avg loss: 2.544069, ppl: 12.731364 +epoch: 2, batch: 33745, sum loss: 4360.201660, avg loss: 2.527653, ppl: 12.524080 +epoch: 2, batch: 33746, sum loss: 3900.812256, avg loss: 2.534641, ppl: 12.611899 +epoch: 2, batch: 33747, sum loss: 3855.511719, avg loss: 2.336674, ppl: 10.346764 +epoch: 2, batch: 33748, sum loss: 4966.037109, avg loss: 2.721116, ppl: 15.197278 +epoch: 2, batch: 33749, sum loss: 4451.431152, avg loss: 2.534984, ppl: 12.616224 +epoch: 2, batch: 33750, sum loss: 4669.813965, avg loss: 2.558802, ppl: 12.920331 +epoch: 2, batch: 33751, sum loss: 4295.354492, avg loss: 2.289635, ppl: 9.871331 +epoch: 2, batch: 33752, sum loss: 3906.513184, avg loss: 2.500969, ppl: 12.194301 +epoch: 2, batch: 33753, sum loss: 5144.538574, avg loss: 2.693476, ppl: 14.782969 +epoch: 2, batch: 33754, sum loss: 4900.425293, avg loss: 2.667624, ppl: 14.405704 +epoch: 2, batch: 33755, sum loss: 3534.735352, avg loss: 2.254295, ppl: 9.528577 +epoch: 2, batch: 33756, sum loss: 4418.640137, avg loss: 2.676342, ppl: 14.531836 +epoch: 2, batch: 33757, sum loss: 4579.484375, avg loss: 2.527309, ppl: 12.519773 +epoch: 2, batch: 33758, sum loss: 3800.027344, avg loss: 2.434355, ppl: 11.408453 +epoch: 2, batch: 33759, sum loss: 4365.488770, avg loss: 2.312229, ppl: 10.096908 +epoch: 2, batch: 33760, sum loss: 4843.060547, avg loss: 2.595423, ppl: 13.402262 +epoch: 2, batch: 33761, sum loss: 4386.943359, avg loss: 2.642737, ppl: 14.051609 +epoch: 2, batch: 33762, sum loss: 3496.725586, avg loss: 2.244368, ppl: 9.434454 +epoch: 2, batch: 33763, sum loss: 4007.622559, avg loss: 2.399774, ppl: 11.020686 +epoch: 2, batch: 33764, sum loss: 5206.662598, avg loss: 2.879791, ppl: 17.810555 +epoch: 2, batch: 33765, sum loss: 3735.979736, avg loss: 2.526017, ppl: 12.503611 +epoch: 2, batch: 33766, sum loss: 4025.078857, avg loss: 2.627336, ppl: 13.836860 +epoch: 2, batch: 33767, sum loss: 3835.769531, avg loss: 2.321894, ppl: 10.194969 +epoch: 2, batch: 33768, sum loss: 4057.416504, avg loss: 2.407962, ppl: 11.111297 +epoch: 2, batch: 33769, sum loss: 3779.163574, avg loss: 2.381326, ppl: 10.819237 +epoch: 2, batch: 33770, sum loss: 4701.881836, avg loss: 2.662447, ppl: 14.331318 +epoch: 2, batch: 33771, sum loss: 4461.361328, avg loss: 2.407642, ppl: 11.107742 +epoch: 2, batch: 33772, sum loss: 3788.885254, avg loss: 2.354807, ppl: 10.536099 +epoch: 2, batch: 33773, sum loss: 4601.151855, avg loss: 2.518419, ppl: 12.408966 +epoch: 2, batch: 33774, sum loss: 3925.381592, avg loss: 2.380462, ppl: 10.809896 +epoch: 2, batch: 33775, sum loss: 3793.157715, avg loss: 2.547453, ppl: 12.774528 +epoch: 2, batch: 33776, sum loss: 5080.808594, avg loss: 2.811737, ppl: 16.638792 +epoch: 2, batch: 33777, sum loss: 4376.702148, avg loss: 2.623922, ppl: 13.789702 +epoch: 2, batch: 33778, sum loss: 5102.243164, avg loss: 2.609843, ppl: 13.596916 +epoch: 2, batch: 33779, sum loss: 3300.184570, avg loss: 2.257308, ppl: 9.557328 +epoch: 2, batch: 33780, sum loss: 4320.060547, avg loss: 2.574529, ppl: 13.125139 +epoch: 2, batch: 33781, sum loss: 4276.588867, avg loss: 2.505325, ppl: 12.247534 +epoch: 2, batch: 33782, sum loss: 4997.010254, avg loss: 2.702547, ppl: 14.917684 +epoch: 2, batch: 33783, sum loss: 3955.387695, avg loss: 2.323965, ppl: 10.216097 +epoch: 2, batch: 33784, sum loss: 4177.435547, avg loss: 2.583448, ppl: 13.242723 +epoch: 2, batch: 33785, sum loss: 3835.665771, avg loss: 2.395794, ppl: 10.976910 +epoch: 2, batch: 33786, sum loss: 4333.482910, avg loss: 2.652070, ppl: 14.183372 +epoch: 2, batch: 33787, sum loss: 4064.315186, avg loss: 2.189825, ppl: 8.933650 +epoch: 2, batch: 33788, sum loss: 4582.312012, avg loss: 2.482292, ppl: 11.968670 +epoch: 2, batch: 33789, sum loss: 4001.647461, avg loss: 2.282743, ppl: 9.803530 +epoch: 2, batch: 33790, sum loss: 4023.905029, avg loss: 2.462610, ppl: 11.735404 +epoch: 2, batch: 33791, sum loss: 4407.084473, avg loss: 2.690528, ppl: 14.739451 +epoch: 2, batch: 33792, sum loss: 4738.652344, avg loss: 2.555907, ppl: 12.882982 +epoch: 2, batch: 33793, sum loss: 5096.000000, avg loss: 2.641783, ppl: 14.038215 +epoch: 2, batch: 33794, sum loss: 4935.703613, avg loss: 2.791688, ppl: 16.308521 +epoch: 2, batch: 33795, sum loss: 4352.761230, avg loss: 2.696878, ppl: 14.833352 +epoch: 2, batch: 33796, sum loss: 4931.091309, avg loss: 2.565604, ppl: 13.008519 +epoch: 2, batch: 33797, sum loss: 5114.354980, avg loss: 2.876465, ppl: 17.751417 +epoch: 2, batch: 33798, sum loss: 4499.568848, avg loss: 2.469577, ppl: 11.817445 +epoch: 2, batch: 33799, sum loss: 4811.674316, avg loss: 2.825411, ppl: 16.867870 +epoch: 2, batch: 33800, sum loss: 3885.022217, avg loss: 2.512951, ppl: 12.341294 +epoch: 2, batch: 33801, sum loss: 3643.887207, avg loss: 2.303342, ppl: 10.007570 +epoch: 2, batch: 33802, sum loss: 4022.702148, avg loss: 2.511050, ppl: 12.317857 +epoch: 2, batch: 33803, sum loss: 3657.520020, avg loss: 2.344564, ppl: 10.428727 +epoch: 2, batch: 33804, sum loss: 3685.523926, avg loss: 2.204261, ppl: 9.063550 +epoch: 2, batch: 33805, sum loss: 3837.365723, avg loss: 2.341285, ppl: 10.394583 +epoch: 2, batch: 33806, sum loss: 4110.416992, avg loss: 2.598241, ppl: 13.440071 +epoch: 2, batch: 33807, sum loss: 3886.891357, avg loss: 2.320532, ppl: 10.181092 +epoch: 2, batch: 33808, sum loss: 3377.446777, avg loss: 2.203162, ppl: 9.053594 +epoch: 2, batch: 33809, sum loss: 4526.657715, avg loss: 2.604521, ppl: 13.524746 +epoch: 2, batch: 33810, sum loss: 5008.158203, avg loss: 2.548681, ppl: 12.790222 +epoch: 2, batch: 33811, sum loss: 4841.993652, avg loss: 2.564615, ppl: 12.995657 +epoch: 2, batch: 33812, sum loss: 4218.204590, avg loss: 2.420083, ppl: 11.246794 +epoch: 2, batch: 33813, sum loss: 2985.183350, avg loss: 2.222772, ppl: 9.232892 +epoch: 2, batch: 33814, sum loss: 4715.208496, avg loss: 2.641573, ppl: 14.035270 +epoch: 2, batch: 33815, sum loss: 4464.571289, avg loss: 2.600216, ppl: 13.466648 +epoch: 2, batch: 33816, sum loss: 4834.181152, avg loss: 2.592054, ppl: 13.357181 +epoch: 2, batch: 33817, sum loss: 4876.621094, avg loss: 2.753597, ppl: 15.699008 +epoch: 2, batch: 33818, sum loss: 4233.939941, avg loss: 2.490553, ppl: 12.067949 +epoch: 2, batch: 33819, sum loss: 5561.368164, avg loss: 2.939412, ppl: 18.904734 +epoch: 2, batch: 33820, sum loss: 5196.025391, avg loss: 2.688063, ppl: 14.703167 +epoch: 2, batch: 33821, sum loss: 4674.663086, avg loss: 2.503837, ppl: 12.229323 +epoch: 2, batch: 33822, sum loss: 5329.946777, avg loss: 2.860948, ppl: 17.478094 +epoch: 2, batch: 33823, sum loss: 4248.493164, avg loss: 2.503532, ppl: 12.225595 +epoch: 2, batch: 33824, sum loss: 4739.114258, avg loss: 2.461877, ppl: 11.726806 +epoch: 2, batch: 33825, sum loss: 5430.421875, avg loss: 2.600777, ppl: 13.474202 +epoch: 2, batch: 33826, sum loss: 4024.954102, avg loss: 2.420297, ppl: 11.249202 +epoch: 2, batch: 33827, sum loss: 4079.025391, avg loss: 2.536707, ppl: 12.637987 +epoch: 2, batch: 33828, sum loss: 3888.316162, avg loss: 2.464079, ppl: 11.752649 +epoch: 2, batch: 33829, sum loss: 3425.046875, avg loss: 2.162277, ppl: 8.690905 +epoch: 2, batch: 33830, sum loss: 4152.406250, avg loss: 2.407192, ppl: 11.102743 +epoch: 2, batch: 33831, sum loss: 3562.239258, avg loss: 2.156319, ppl: 8.639279 +epoch: 2, batch: 33832, sum loss: 3516.628174, avg loss: 2.315094, ppl: 10.125877 +epoch: 2, batch: 33833, sum loss: 3044.265137, avg loss: 2.062510, ppl: 7.865690 +epoch: 2, batch: 33834, sum loss: 3903.920898, avg loss: 2.478680, ppl: 11.925511 +epoch: 2, batch: 33835, sum loss: 4936.997070, avg loss: 2.660020, ppl: 14.296574 +epoch: 2, batch: 33836, sum loss: 4178.148438, avg loss: 2.599968, ppl: 13.463306 +epoch: 2, batch: 33837, sum loss: 4083.797607, avg loss: 2.523979, ppl: 12.478145 +epoch: 2, batch: 33838, sum loss: 3128.376709, avg loss: 2.239353, ppl: 9.387259 +epoch: 2, batch: 33839, sum loss: 4278.328125, avg loss: 2.551179, ppl: 12.822218 +epoch: 2, batch: 33840, sum loss: 4209.107910, avg loss: 2.483249, ppl: 11.980130 +epoch: 2, batch: 33841, sum loss: 4562.501465, avg loss: 2.449008, ppl: 11.576854 +epoch: 2, batch: 33842, sum loss: 3649.433594, avg loss: 2.351439, ppl: 10.500671 +epoch: 2, batch: 33843, sum loss: 4626.851074, avg loss: 2.716883, ppl: 15.133075 +epoch: 2, batch: 33844, sum loss: 4210.388184, avg loss: 2.615148, ppl: 13.669237 +epoch: 2, batch: 33845, sum loss: 4565.523438, avg loss: 2.537812, ppl: 12.651955 +epoch: 2, batch: 33846, sum loss: 4422.814941, avg loss: 2.375304, ppl: 10.754276 +epoch: 2, batch: 33847, sum loss: 4757.315918, avg loss: 2.527798, ppl: 12.525893 +epoch: 2, batch: 33848, sum loss: 4054.293457, avg loss: 2.339465, ppl: 10.375686 +epoch: 2, batch: 33849, sum loss: 3298.992676, avg loss: 2.295750, ppl: 9.931881 +epoch: 2, batch: 33850, sum loss: 3237.472900, avg loss: 2.368305, ppl: 10.679276 +epoch: 2, batch: 33851, sum loss: 4782.779297, avg loss: 2.633689, ppl: 13.925044 +epoch: 2, batch: 33852, sum loss: 4814.564453, avg loss: 2.695725, ppl: 14.816257 +epoch: 2, batch: 33853, sum loss: 3464.556641, avg loss: 2.285328, ppl: 9.828906 +epoch: 2, batch: 33854, sum loss: 3909.154541, avg loss: 2.332431, ppl: 10.302960 +epoch: 2, batch: 33855, sum loss: 3850.675293, avg loss: 2.460495, ppl: 11.710609 +epoch: 2, batch: 33856, sum loss: 3715.677246, avg loss: 2.276763, ppl: 9.745087 +epoch: 2, batch: 33857, sum loss: 4965.556641, avg loss: 2.592980, ppl: 13.369553 +epoch: 2, batch: 33858, sum loss: 3713.707764, avg loss: 2.289586, ppl: 9.870846 +epoch: 2, batch: 33859, sum loss: 3847.738525, avg loss: 2.455481, ppl: 11.652038 +epoch: 2, batch: 33860, sum loss: 3526.356934, avg loss: 2.300298, ppl: 9.977158 +epoch: 2, batch: 33861, sum loss: 4233.348633, avg loss: 2.660810, ppl: 14.307874 +epoch: 2, batch: 33862, sum loss: 4329.152832, avg loss: 2.508200, ppl: 12.282797 +epoch: 2, batch: 33863, sum loss: 4446.094727, avg loss: 2.766705, ppl: 15.906137 +epoch: 2, batch: 33864, sum loss: 4765.546387, avg loss: 3.006654, ppl: 20.219627 +epoch: 2, batch: 33865, sum loss: 4396.017090, avg loss: 2.538116, ppl: 12.655805 +epoch: 2, batch: 33866, sum loss: 3681.057617, avg loss: 2.244547, ppl: 9.436144 +epoch: 2, batch: 33867, sum loss: 4172.954590, avg loss: 2.522947, ppl: 12.465281 +epoch: 2, batch: 33868, sum loss: 3925.536621, avg loss: 2.226623, ppl: 9.268514 +epoch: 2, batch: 33869, sum loss: 5169.811523, avg loss: 2.554255, ppl: 12.861712 +epoch: 2, batch: 33870, sum loss: 4307.013184, avg loss: 2.530560, ppl: 12.560538 +epoch: 2, batch: 33871, sum loss: 4939.515625, avg loss: 2.732033, ppl: 15.364091 +epoch: 2, batch: 33872, sum loss: 5048.153809, avg loss: 2.680910, ppl: 14.598373 +epoch: 2, batch: 33873, sum loss: 4442.012695, avg loss: 2.752177, ppl: 15.676716 +epoch: 2, batch: 33874, sum loss: 3994.192871, avg loss: 2.390301, ppl: 10.916780 +epoch: 2, batch: 33875, sum loss: 4111.148438, avg loss: 2.435514, ppl: 11.421693 +epoch: 2, batch: 33876, sum loss: 4506.675293, avg loss: 2.564983, ppl: 13.000439 +epoch: 2, batch: 33877, sum loss: 3913.123779, avg loss: 2.567667, ppl: 13.035371 +epoch: 2, batch: 33878, sum loss: 4771.323242, avg loss: 2.515194, ppl: 12.369010 +epoch: 2, batch: 33879, sum loss: 4094.510254, avg loss: 2.425658, ppl: 11.309666 +epoch: 2, batch: 33880, sum loss: 4026.670898, avg loss: 2.535687, ppl: 12.625104 +epoch: 2, batch: 33881, sum loss: 4499.542969, avg loss: 2.549316, ppl: 12.798348 +epoch: 2, batch: 33882, sum loss: 4197.974121, avg loss: 2.519792, ppl: 12.426016 +epoch: 2, batch: 33883, sum loss: 3857.397461, avg loss: 2.487039, ppl: 12.025614 +epoch: 2, batch: 33884, sum loss: 3760.656250, avg loss: 2.399908, ppl: 11.022165 +epoch: 2, batch: 33885, sum loss: 4490.025879, avg loss: 2.630361, ppl: 13.878778 +epoch: 2, batch: 33886, sum loss: 4815.532227, avg loss: 2.645897, ppl: 14.096083 +epoch: 2, batch: 33887, sum loss: 4297.671875, avg loss: 2.373093, ppl: 10.730535 +epoch: 2, batch: 33888, sum loss: 4383.507324, avg loss: 2.464029, ppl: 11.752064 +epoch: 2, batch: 33889, sum loss: 4899.425293, avg loss: 2.815762, ppl: 16.705894 +epoch: 2, batch: 33890, sum loss: 3850.069336, avg loss: 2.488733, ppl: 12.046000 +epoch: 2, batch: 33891, sum loss: 4540.371094, avg loss: 2.606413, ppl: 13.550354 +epoch: 2, batch: 33892, sum loss: 4453.998535, avg loss: 2.582028, ppl: 13.223934 +epoch: 2, batch: 33893, sum loss: 4530.226074, avg loss: 2.646160, ppl: 14.099790 +epoch: 2, batch: 33894, sum loss: 4237.102539, avg loss: 2.474943, ppl: 11.881029 +epoch: 2, batch: 33895, sum loss: 3515.065430, avg loss: 2.224725, ppl: 9.250938 +epoch: 2, batch: 33896, sum loss: 4640.509277, avg loss: 2.531647, ppl: 12.574202 +epoch: 2, batch: 33897, sum loss: 4076.463135, avg loss: 2.283733, ppl: 9.813244 +epoch: 2, batch: 33898, sum loss: 3583.729980, avg loss: 2.457977, ppl: 11.681151 +epoch: 2, batch: 33899, sum loss: 5033.389160, avg loss: 2.636663, ppl: 13.966516 +epoch: 2, batch: 33900, sum loss: 4616.728516, avg loss: 2.475458, ppl: 11.887146 +epoch: 2, batch: 33901, sum loss: 4320.047363, avg loss: 2.342759, ppl: 10.409917 +epoch: 2, batch: 33902, sum loss: 4136.412598, avg loss: 2.400704, ppl: 11.030938 +epoch: 2, batch: 33903, sum loss: 4679.070801, avg loss: 2.537457, ppl: 12.647468 +epoch: 2, batch: 33904, sum loss: 3744.830078, avg loss: 2.353759, ppl: 10.525057 +epoch: 2, batch: 33905, sum loss: 4063.257324, avg loss: 2.441861, ppl: 11.494416 +epoch: 2, batch: 33906, sum loss: 3419.260010, avg loss: 2.311873, ppl: 10.093308 +epoch: 2, batch: 33907, sum loss: 4139.803223, avg loss: 2.451038, ppl: 11.600383 +epoch: 2, batch: 33908, sum loss: 4110.819824, avg loss: 2.498979, ppl: 12.170057 +epoch: 2, batch: 33909, sum loss: 3299.245850, avg loss: 2.299126, ppl: 9.965467 +epoch: 2, batch: 33910, sum loss: 4540.859375, avg loss: 2.471889, ppl: 11.844798 +epoch: 2, batch: 33911, sum loss: 5258.526855, avg loss: 2.537899, ppl: 12.653059 +epoch: 2, batch: 33912, sum loss: 4577.573242, avg loss: 2.396635, ppl: 10.986149 +epoch: 2, batch: 33913, sum loss: 5234.982910, avg loss: 2.647943, ppl: 14.124950 +epoch: 2, batch: 33914, sum loss: 4112.529785, avg loss: 2.616113, ppl: 13.682439 +epoch: 2, batch: 33915, sum loss: 3768.264648, avg loss: 2.384978, ppl: 10.858819 +epoch: 2, batch: 33916, sum loss: 3565.210938, avg loss: 2.479285, ppl: 11.932724 +epoch: 2, batch: 33917, sum loss: 4487.914062, avg loss: 2.391004, ppl: 10.924455 +epoch: 2, batch: 33918, sum loss: 3941.961426, avg loss: 2.488612, ppl: 12.044546 +epoch: 2, batch: 33919, sum loss: 3734.111816, avg loss: 2.348498, ppl: 10.469831 +epoch: 2, batch: 33920, sum loss: 4747.344727, avg loss: 2.578677, ppl: 13.179692 +epoch: 2, batch: 33921, sum loss: 5638.708496, avg loss: 2.758664, ppl: 15.778743 +epoch: 2, batch: 33922, sum loss: 4199.384766, avg loss: 2.626257, ppl: 13.821934 +epoch: 2, batch: 33923, sum loss: 5056.083008, avg loss: 2.588880, ppl: 13.314855 +epoch: 2, batch: 33924, sum loss: 5610.545410, avg loss: 2.747574, ppl: 15.604723 +epoch: 2, batch: 33925, sum loss: 4116.820801, avg loss: 2.356509, ppl: 10.554040 +epoch: 2, batch: 33926, sum loss: 4609.967285, avg loss: 2.591325, ppl: 13.347445 +epoch: 2, batch: 33927, sum loss: 4885.021484, avg loss: 2.528479, ppl: 12.534428 +epoch: 2, batch: 33928, sum loss: 3669.418945, avg loss: 2.184178, ppl: 8.883345 +epoch: 2, batch: 33929, sum loss: 3865.784668, avg loss: 2.551673, ppl: 12.828550 +epoch: 2, batch: 33930, sum loss: 4127.127930, avg loss: 2.483230, ppl: 11.979896 +epoch: 2, batch: 33931, sum loss: 4629.518066, avg loss: 2.436589, ppl: 11.433968 +epoch: 2, batch: 33932, sum loss: 4436.746094, avg loss: 2.623741, ppl: 13.787208 +epoch: 2, batch: 33933, sum loss: 4399.842773, avg loss: 2.741335, ppl: 15.507672 +epoch: 2, batch: 33934, sum loss: 4051.236816, avg loss: 2.415764, ppl: 11.198326 +epoch: 2, batch: 33935, sum loss: 4442.972656, avg loss: 2.577131, ppl: 13.159324 +epoch: 2, batch: 33936, sum loss: 3965.797363, avg loss: 2.424082, ppl: 11.291854 +epoch: 2, batch: 33937, sum loss: 3710.238037, avg loss: 2.277617, ppl: 9.753410 +epoch: 2, batch: 33938, sum loss: 5603.814941, avg loss: 2.726917, ppl: 15.285692 +epoch: 2, batch: 33939, sum loss: 3388.635498, avg loss: 2.291167, ppl: 9.886464 +epoch: 2, batch: 33940, sum loss: 3640.807861, avg loss: 2.203879, ppl: 9.060088 +epoch: 2, batch: 33941, sum loss: 3777.972412, avg loss: 2.314934, ppl: 10.124257 +epoch: 2, batch: 33942, sum loss: 4355.583984, avg loss: 2.673778, ppl: 14.494624 +epoch: 2, batch: 33943, sum loss: 4144.688477, avg loss: 2.436619, ppl: 11.434311 +epoch: 2, batch: 33944, sum loss: 4229.457031, avg loss: 2.516036, ppl: 12.379431 +epoch: 2, batch: 33945, sum loss: 3776.477783, avg loss: 2.414628, ppl: 11.185606 +epoch: 2, batch: 33946, sum loss: 4691.137207, avg loss: 2.714778, ppl: 15.101264 +epoch: 2, batch: 33947, sum loss: 3506.117188, avg loss: 2.112119, ppl: 8.265736 +epoch: 2, batch: 33948, sum loss: 4118.835449, avg loss: 2.481226, ppl: 11.955915 +epoch: 2, batch: 33949, sum loss: 4625.697754, avg loss: 2.729025, ppl: 15.317946 +epoch: 2, batch: 33950, sum loss: 3493.212891, avg loss: 2.256598, ppl: 9.550538 +epoch: 2, batch: 33951, sum loss: 4280.527832, avg loss: 2.595833, ppl: 13.407745 +epoch: 2, batch: 33952, sum loss: 3600.435303, avg loss: 2.557127, ppl: 12.898712 +epoch: 2, batch: 33953, sum loss: 4273.772461, avg loss: 2.566830, ppl: 13.024476 +epoch: 2, batch: 33954, sum loss: 4083.300293, avg loss: 2.494380, ppl: 12.114223 +epoch: 2, batch: 33955, sum loss: 4384.261719, avg loss: 2.622166, ppl: 13.765510 +epoch: 2, batch: 33956, sum loss: 4472.976562, avg loss: 2.468530, ppl: 11.805082 +epoch: 2, batch: 33957, sum loss: 3943.721924, avg loss: 2.477212, ppl: 11.908021 +epoch: 2, batch: 33958, sum loss: 4550.235352, avg loss: 2.253707, ppl: 9.522976 +epoch: 2, batch: 33959, sum loss: 4564.897949, avg loss: 2.452928, ppl: 11.622322 +epoch: 2, batch: 33960, sum loss: 4710.642090, avg loss: 2.690258, ppl: 14.735481 +epoch: 2, batch: 33961, sum loss: 4279.387207, avg loss: 2.493815, ppl: 12.107383 +epoch: 2, batch: 33962, sum loss: 3722.422363, avg loss: 2.396924, ppl: 10.989319 +epoch: 2, batch: 33963, sum loss: 4144.022461, avg loss: 2.547033, ppl: 12.769159 +epoch: 2, batch: 33964, sum loss: 4908.648926, avg loss: 2.563263, ppl: 12.978098 +epoch: 2, batch: 33965, sum loss: 3897.939941, avg loss: 2.254448, ppl: 9.530028 +epoch: 2, batch: 33966, sum loss: 3929.078125, avg loss: 2.265904, ppl: 9.639837 +epoch: 2, batch: 33967, sum loss: 4829.705078, avg loss: 2.759831, ppl: 15.797180 +epoch: 2, batch: 33968, sum loss: 5013.158691, avg loss: 2.693799, ppl: 14.787742 +epoch: 2, batch: 33969, sum loss: 3401.339844, avg loss: 2.362042, ppl: 10.612597 +epoch: 2, batch: 33970, sum loss: 3718.257080, avg loss: 2.335589, ppl: 10.335543 +epoch: 2, batch: 33971, sum loss: 4838.680176, avg loss: 2.707711, ppl: 14.994920 +epoch: 2, batch: 33972, sum loss: 4682.726074, avg loss: 2.618974, ppl: 13.721641 +epoch: 2, batch: 33973, sum loss: 4273.159180, avg loss: 2.353061, ppl: 10.517717 +epoch: 2, batch: 33974, sum loss: 4134.537598, avg loss: 2.351841, ppl: 10.504896 +epoch: 2, batch: 33975, sum loss: 3621.796143, avg loss: 2.257978, ppl: 9.563729 +epoch: 2, batch: 33976, sum loss: 4196.237793, avg loss: 2.466924, ppl: 11.786137 +epoch: 2, batch: 33977, sum loss: 4556.602539, avg loss: 2.534262, ppl: 12.607120 +epoch: 2, batch: 33978, sum loss: 4256.004395, avg loss: 2.429226, ppl: 11.350099 +epoch: 2, batch: 33979, sum loss: 4339.635254, avg loss: 2.638076, ppl: 13.986269 +epoch: 2, batch: 33980, sum loss: 4876.114258, avg loss: 2.685085, ppl: 14.659445 +epoch: 2, batch: 33981, sum loss: 4264.731934, avg loss: 2.378546, ppl: 10.789199 +epoch: 2, batch: 33982, sum loss: 4750.347656, avg loss: 2.447371, ppl: 11.557923 +epoch: 2, batch: 33983, sum loss: 5348.451660, avg loss: 2.759779, ppl: 15.796355 +epoch: 2, batch: 33984, sum loss: 3793.736572, avg loss: 2.071948, ppl: 7.940276 +epoch: 2, batch: 33985, sum loss: 4786.930664, avg loss: 2.597358, ppl: 13.428214 +epoch: 2, batch: 33986, sum loss: 3660.552246, avg loss: 2.502086, ppl: 12.207938 +epoch: 2, batch: 33987, sum loss: 3937.759766, avg loss: 2.336949, ppl: 10.349615 +epoch: 2, batch: 33988, sum loss: 4730.061523, avg loss: 2.660327, ppl: 14.300967 +epoch: 2, batch: 33989, sum loss: 4245.083984, avg loss: 2.446734, ppl: 11.550566 +epoch: 2, batch: 33990, sum loss: 4536.335938, avg loss: 2.537101, ppl: 12.642961 +epoch: 2, batch: 33991, sum loss: 4366.208008, avg loss: 2.375521, ppl: 10.756618 +epoch: 2, batch: 33992, sum loss: 3930.061279, avg loss: 2.439517, ppl: 11.467498 +epoch: 2, batch: 33993, sum loss: 4924.632812, avg loss: 2.528046, ppl: 12.528996 +epoch: 2, batch: 33994, sum loss: 3896.736084, avg loss: 2.297604, ppl: 9.950312 +epoch: 2, batch: 33995, sum loss: 4350.825195, avg loss: 2.489031, ppl: 12.049589 +epoch: 2, batch: 33996, sum loss: 3562.456055, avg loss: 2.342180, ppl: 10.403895 +epoch: 2, batch: 33997, sum loss: 4310.677734, avg loss: 2.323816, ppl: 10.214575 +epoch: 2, batch: 33998, sum loss: 4346.869141, avg loss: 2.388390, ppl: 10.895935 +epoch: 2, batch: 33999, sum loss: 4620.961914, avg loss: 2.532034, ppl: 12.579065 +epoch: 2, batch: 34000, sum loss: 4291.952637, avg loss: 2.488089, ppl: 12.038244 +epoch: 2, batch: 34001, sum loss: 4312.640625, avg loss: 2.359213, ppl: 10.582616 +epoch: 2, batch: 34002, sum loss: 4034.899902, avg loss: 2.413218, ppl: 11.169846 +epoch: 2, batch: 34003, sum loss: 4537.959961, avg loss: 2.485192, ppl: 12.003423 +epoch: 2, batch: 34004, sum loss: 4518.656250, avg loss: 2.650238, ppl: 14.157412 +epoch: 2, batch: 34005, sum loss: 4430.403809, avg loss: 2.482019, ppl: 11.965398 +epoch: 2, batch: 34006, sum loss: 4683.716309, avg loss: 2.393314, ppl: 10.949725 +epoch: 2, batch: 34007, sum loss: 4582.253418, avg loss: 2.760394, ppl: 15.806064 +epoch: 2, batch: 34008, sum loss: 3546.165283, avg loss: 2.283429, ppl: 9.810264 +epoch: 2, batch: 34009, sum loss: 4429.726074, avg loss: 2.542897, ppl: 12.716454 +epoch: 2, batch: 34010, sum loss: 4888.949219, avg loss: 2.541034, ppl: 12.692788 +epoch: 2, batch: 34011, sum loss: 4586.507324, avg loss: 2.468518, ppl: 11.804945 +epoch: 2, batch: 34012, sum loss: 4561.507812, avg loss: 2.566971, ppl: 13.026309 +epoch: 2, batch: 34013, sum loss: 3270.296875, avg loss: 2.239929, ppl: 9.392669 +epoch: 2, batch: 34014, sum loss: 4116.157227, avg loss: 2.575818, ppl: 13.142064 +epoch: 2, batch: 34015, sum loss: 4496.092773, avg loss: 2.513188, ppl: 12.344216 +epoch: 2, batch: 34016, sum loss: 4137.452637, avg loss: 2.388829, ppl: 10.900727 +epoch: 2, batch: 34017, sum loss: 2889.450684, avg loss: 2.063893, ppl: 7.876576 +epoch: 2, batch: 34018, sum loss: 3203.162109, avg loss: 2.152663, ppl: 8.607748 +epoch: 2, batch: 34019, sum loss: 4285.014648, avg loss: 2.587569, ppl: 13.297409 +epoch: 2, batch: 34020, sum loss: 4314.051270, avg loss: 2.414131, ppl: 11.180047 +epoch: 2, batch: 34021, sum loss: 3642.116455, avg loss: 2.423231, ppl: 11.282255 +epoch: 2, batch: 34022, sum loss: 3816.983643, avg loss: 2.393093, ppl: 10.947303 +epoch: 2, batch: 34023, sum loss: 4422.564941, avg loss: 2.626226, ppl: 13.821515 +epoch: 2, batch: 34024, sum loss: 4257.380859, avg loss: 2.387763, ppl: 10.889106 +epoch: 2, batch: 34025, sum loss: 4592.423828, avg loss: 2.545689, ppl: 12.752015 +epoch: 2, batch: 34026, sum loss: 4492.459961, avg loss: 2.487520, ppl: 12.031395 +epoch: 2, batch: 34027, sum loss: 4506.498535, avg loss: 2.391984, ppl: 10.935173 +epoch: 2, batch: 34028, sum loss: 3912.261230, avg loss: 2.395751, ppl: 10.976436 +epoch: 2, batch: 34029, sum loss: 4118.244629, avg loss: 2.408330, ppl: 11.115385 +epoch: 2, batch: 34030, sum loss: 4106.032715, avg loss: 2.482487, ppl: 11.970996 +epoch: 2, batch: 34031, sum loss: 4448.636719, avg loss: 2.562579, ppl: 12.969217 +epoch: 2, batch: 34032, sum loss: 3784.834229, avg loss: 2.265012, ppl: 9.631236 +epoch: 2, batch: 34033, sum loss: 4880.525391, avg loss: 2.829290, ppl: 16.933435 +epoch: 2, batch: 34034, sum loss: 3207.574707, avg loss: 2.221312, ppl: 9.219419 +epoch: 2, batch: 34035, sum loss: 4409.227051, avg loss: 2.656161, ppl: 14.241508 +epoch: 2, batch: 34036, sum loss: 4126.330566, avg loss: 2.585420, ppl: 13.268863 +epoch: 2, batch: 34037, sum loss: 3313.705566, avg loss: 2.317277, ppl: 10.148001 +epoch: 2, batch: 34038, sum loss: 4570.192383, avg loss: 2.498739, ppl: 12.167136 +epoch: 2, batch: 34039, sum loss: 3939.102783, avg loss: 2.380123, ppl: 10.806229 +epoch: 2, batch: 34040, sum loss: 4862.051270, avg loss: 2.622466, ppl: 13.769632 +epoch: 2, batch: 34041, sum loss: 3774.676270, avg loss: 2.278018, ppl: 9.757324 +epoch: 2, batch: 34042, sum loss: 4231.303711, avg loss: 2.473001, ppl: 11.857974 +epoch: 2, batch: 34043, sum loss: 4142.240234, avg loss: 2.595389, ppl: 13.401795 +epoch: 2, batch: 34044, sum loss: 4270.419922, avg loss: 2.411304, ppl: 11.148492 +epoch: 2, batch: 34045, sum loss: 3752.319580, avg loss: 2.309120, ppl: 10.065560 +epoch: 2, batch: 34046, sum loss: 3843.563477, avg loss: 2.346498, ppl: 10.448911 +epoch: 2, batch: 34047, sum loss: 3688.952637, avg loss: 2.436561, ppl: 11.433648 +epoch: 2, batch: 34048, sum loss: 3618.472168, avg loss: 2.325496, ppl: 10.231756 +epoch: 2, batch: 34049, sum loss: 3684.112305, avg loss: 2.292540, ppl: 9.900055 +epoch: 2, batch: 34050, sum loss: 3921.611816, avg loss: 2.335683, ppl: 10.336518 +epoch: 2, batch: 34051, sum loss: 4145.586914, avg loss: 2.458830, ppl: 11.691124 +epoch: 2, batch: 34052, sum loss: 3514.500244, avg loss: 2.425466, ppl: 11.307498 +epoch: 2, batch: 34053, sum loss: 3221.075195, avg loss: 2.192699, ppl: 8.959364 +epoch: 2, batch: 34054, sum loss: 3808.210449, avg loss: 2.396608, ppl: 10.985850 +epoch: 2, batch: 34055, sum loss: 3560.011230, avg loss: 2.208444, ppl: 9.101541 +epoch: 2, batch: 34056, sum loss: 3860.509033, avg loss: 2.493869, ppl: 12.108032 +epoch: 2, batch: 34057, sum loss: 3970.665527, avg loss: 2.438984, ppl: 11.461387 +epoch: 2, batch: 34058, sum loss: 3309.121094, avg loss: 2.351898, ppl: 10.505495 +epoch: 2, batch: 34059, sum loss: 4621.917969, avg loss: 2.624599, ppl: 13.799033 +epoch: 2, batch: 34060, sum loss: 3943.629883, avg loss: 2.372822, ppl: 10.727621 +epoch: 2, batch: 34061, sum loss: 4294.009766, avg loss: 2.469241, ppl: 11.813476 +epoch: 2, batch: 34062, sum loss: 3534.284912, avg loss: 2.468076, ppl: 11.799725 +epoch: 2, batch: 34063, sum loss: 4603.765137, avg loss: 2.442316, ppl: 11.499641 +epoch: 2, batch: 34064, sum loss: 4872.355957, avg loss: 2.510230, ppl: 12.307755 +epoch: 2, batch: 34065, sum loss: 3523.780762, avg loss: 2.341383, ppl: 10.395601 +epoch: 2, batch: 34066, sum loss: 4156.569824, avg loss: 2.372472, ppl: 10.723865 +epoch: 2, batch: 34067, sum loss: 4707.115234, avg loss: 2.623810, ppl: 13.788157 +epoch: 2, batch: 34068, sum loss: 4467.945312, avg loss: 2.258819, ppl: 9.571781 +epoch: 2, batch: 34069, sum loss: 3907.549805, avg loss: 2.378302, ppl: 10.786568 +epoch: 2, batch: 34070, sum loss: 4825.523438, avg loss: 2.697330, ppl: 14.840055 +epoch: 2, batch: 34071, sum loss: 3440.696777, avg loss: 2.283143, ppl: 9.807460 +epoch: 2, batch: 34072, sum loss: 4095.340576, avg loss: 2.387954, ppl: 10.891183 +epoch: 2, batch: 34073, sum loss: 5213.606934, avg loss: 2.683277, ppl: 14.632965 +epoch: 2, batch: 34074, sum loss: 3387.009766, avg loss: 2.268593, ppl: 9.665794 +epoch: 2, batch: 34075, sum loss: 4066.907227, avg loss: 2.519769, ppl: 12.425729 +epoch: 2, batch: 34076, sum loss: 4106.885254, avg loss: 2.446030, ppl: 11.542437 +epoch: 2, batch: 34077, sum loss: 4190.280762, avg loss: 2.437627, ppl: 11.445848 +epoch: 2, batch: 34078, sum loss: 5043.062988, avg loss: 2.798592, ppl: 16.421511 +epoch: 2, batch: 34079, sum loss: 3560.264404, avg loss: 2.430215, ppl: 11.361320 +epoch: 2, batch: 34080, sum loss: 4595.071777, avg loss: 2.534513, ppl: 12.610288 +epoch: 2, batch: 34081, sum loss: 4263.669922, avg loss: 2.350425, ppl: 10.490022 +epoch: 2, batch: 34082, sum loss: 5392.228516, avg loss: 2.886632, ppl: 17.932810 +epoch: 2, batch: 34083, sum loss: 4738.990723, avg loss: 2.660860, ppl: 14.308583 +epoch: 2, batch: 34084, sum loss: 4660.413086, avg loss: 2.473680, ppl: 11.866034 +epoch: 2, batch: 34085, sum loss: 4162.552246, avg loss: 2.468892, ppl: 11.809359 +epoch: 2, batch: 34086, sum loss: 3133.112793, avg loss: 2.263810, ppl: 9.619667 +epoch: 2, batch: 34087, sum loss: 4390.170898, avg loss: 2.523087, ppl: 12.467020 +epoch: 2, batch: 34088, sum loss: 3274.291748, avg loss: 2.367528, ppl: 10.670985 +epoch: 2, batch: 34089, sum loss: 4700.002930, avg loss: 2.528243, ppl: 12.531464 +epoch: 2, batch: 34090, sum loss: 4241.048828, avg loss: 2.322590, ppl: 10.202063 +epoch: 2, batch: 34091, sum loss: 4796.518066, avg loss: 2.645625, ppl: 14.092252 +epoch: 2, batch: 34092, sum loss: 4900.077148, avg loss: 2.696795, ppl: 14.832126 +epoch: 2, batch: 34093, sum loss: 4745.566406, avg loss: 2.419973, ppl: 11.245555 +epoch: 2, batch: 34094, sum loss: 4160.132812, avg loss: 2.583933, ppl: 13.249149 +epoch: 2, batch: 34095, sum loss: 3282.994385, avg loss: 2.203352, ppl: 9.055316 +epoch: 2, batch: 34096, sum loss: 3949.566650, avg loss: 2.258186, ppl: 9.565718 +epoch: 2, batch: 34097, sum loss: 3876.325684, avg loss: 2.325330, ppl: 10.230058 +epoch: 2, batch: 34098, sum loss: 3549.582031, avg loss: 2.442933, ppl: 11.506744 +epoch: 2, batch: 34099, sum loss: 4544.523926, avg loss: 2.346166, ppl: 10.445446 +epoch: 2, batch: 34100, sum loss: 5074.921875, avg loss: 2.565683, ppl: 13.009542 +epoch: 2, batch: 34101, sum loss: 4653.970703, avg loss: 2.412634, ppl: 11.163325 +epoch: 2, batch: 34102, sum loss: 3582.881104, avg loss: 2.335646, ppl: 10.336137 +epoch: 2, batch: 34103, sum loss: 4729.675293, avg loss: 2.551065, ppl: 12.820753 +epoch: 2, batch: 34104, sum loss: 4086.352539, avg loss: 2.337730, ppl: 10.357702 +epoch: 2, batch: 34105, sum loss: 3379.962891, avg loss: 2.214917, ppl: 9.160646 +epoch: 2, batch: 34106, sum loss: 3711.802490, avg loss: 2.223968, ppl: 9.243938 +epoch: 2, batch: 34107, sum loss: 4623.114746, avg loss: 2.572685, ppl: 13.100953 +epoch: 2, batch: 34108, sum loss: 3975.646484, avg loss: 2.362238, ppl: 10.614679 +epoch: 2, batch: 34109, sum loss: 4149.396973, avg loss: 2.466942, ppl: 11.786353 +epoch: 2, batch: 34110, sum loss: 3189.425293, avg loss: 2.141991, ppl: 8.516380 +epoch: 2, batch: 34111, sum loss: 3636.345215, avg loss: 2.492355, ppl: 12.089709 +epoch: 2, batch: 34112, sum loss: 3862.857178, avg loss: 2.341125, ppl: 10.392927 +epoch: 2, batch: 34113, sum loss: 3216.742676, avg loss: 2.095598, ppl: 8.130300 +epoch: 2, batch: 34114, sum loss: 4097.026855, avg loss: 2.333159, ppl: 10.310461 +epoch: 2, batch: 34115, sum loss: 3694.546631, avg loss: 2.335364, ppl: 10.333224 +epoch: 2, batch: 34116, sum loss: 4461.523926, avg loss: 2.613664, ppl: 13.648965 +epoch: 2, batch: 34117, sum loss: 4042.185303, avg loss: 2.478348, ppl: 11.921551 +epoch: 2, batch: 34118, sum loss: 4621.609375, avg loss: 2.579023, ppl: 13.184253 +epoch: 2, batch: 34119, sum loss: 4340.874512, avg loss: 2.577717, ppl: 13.167038 +epoch: 2, batch: 34120, sum loss: 3917.862793, avg loss: 2.598052, ppl: 13.437540 +epoch: 2, batch: 34121, sum loss: 5511.666016, avg loss: 2.900877, ppl: 18.190090 +epoch: 2, batch: 34122, sum loss: 3631.302490, avg loss: 2.292489, ppl: 9.899548 +epoch: 2, batch: 34123, sum loss: 4138.770508, avg loss: 2.505309, ppl: 12.247344 +epoch: 2, batch: 34124, sum loss: 4262.431152, avg loss: 2.553883, ppl: 12.856935 +epoch: 2, batch: 34125, sum loss: 3935.621582, avg loss: 2.331529, ppl: 10.293673 +epoch: 2, batch: 34126, sum loss: 4381.721191, avg loss: 2.676678, ppl: 14.536717 +epoch: 2, batch: 34127, sum loss: 3356.727539, avg loss: 2.392536, ppl: 10.941202 +epoch: 2, batch: 34128, sum loss: 4241.891602, avg loss: 2.529452, ppl: 12.546633 +epoch: 2, batch: 34129, sum loss: 3356.170898, avg loss: 2.199326, ppl: 9.018930 +epoch: 2, batch: 34130, sum loss: 4273.404785, avg loss: 2.432217, ppl: 11.384089 +epoch: 2, batch: 34131, sum loss: 4369.366211, avg loss: 2.503935, ppl: 12.230522 +epoch: 2, batch: 34132, sum loss: 4410.456055, avg loss: 2.254834, ppl: 9.533714 +epoch: 2, batch: 34133, sum loss: 4572.061523, avg loss: 2.364044, ppl: 10.633870 +epoch: 2, batch: 34134, sum loss: 3999.548584, avg loss: 2.560530, ppl: 12.942680 +epoch: 2, batch: 34135, sum loss: 4017.289062, avg loss: 2.223181, ppl: 9.236670 +epoch: 2, batch: 34136, sum loss: 5012.194336, avg loss: 2.602386, ppl: 13.495894 +epoch: 2, batch: 34137, sum loss: 3747.081055, avg loss: 2.229079, ppl: 9.291301 +epoch: 2, batch: 34138, sum loss: 3571.334717, avg loss: 2.351109, ppl: 10.497205 +epoch: 2, batch: 34139, sum loss: 3565.746826, avg loss: 2.407662, ppl: 11.107957 +epoch: 2, batch: 34140, sum loss: 4309.424805, avg loss: 2.512784, ppl: 12.339234 +epoch: 2, batch: 34141, sum loss: 4563.460449, avg loss: 2.528233, ppl: 12.531342 +epoch: 2, batch: 34142, sum loss: 3549.954102, avg loss: 2.213188, ppl: 9.144828 +epoch: 2, batch: 34143, sum loss: 3645.284424, avg loss: 2.456391, ppl: 11.662646 +epoch: 2, batch: 34144, sum loss: 4877.464844, avg loss: 2.688790, ppl: 14.713862 +epoch: 2, batch: 34145, sum loss: 3709.761719, avg loss: 2.336122, ppl: 10.341057 +epoch: 2, batch: 34146, sum loss: 4199.906250, avg loss: 2.427691, ppl: 11.332690 +epoch: 2, batch: 34147, sum loss: 5000.686523, avg loss: 2.657113, ppl: 14.255073 +epoch: 2, batch: 34148, sum loss: 3955.818848, avg loss: 2.651353, ppl: 14.173203 +epoch: 2, batch: 34149, sum loss: 4348.057617, avg loss: 2.498884, ppl: 12.168902 +epoch: 2, batch: 34150, sum loss: 3529.282959, avg loss: 2.185315, ppl: 8.893447 +epoch: 2, batch: 34151, sum loss: 4392.226562, avg loss: 2.502693, ppl: 12.215348 +epoch: 2, batch: 34152, sum loss: 4555.838867, avg loss: 2.510104, ppl: 12.306212 +epoch: 2, batch: 34153, sum loss: 5382.928223, avg loss: 2.812397, ppl: 16.649784 +epoch: 2, batch: 34154, sum loss: 4953.667480, avg loss: 2.798682, ppl: 16.422991 +epoch: 2, batch: 34155, sum loss: 3816.350098, avg loss: 2.474935, ppl: 11.880939 +epoch: 2, batch: 34156, sum loss: 3632.238770, avg loss: 2.108090, ppl: 8.232502 +epoch: 2, batch: 34157, sum loss: 4863.369629, avg loss: 2.672181, ppl: 14.471499 +epoch: 2, batch: 34158, sum loss: 3644.519531, avg loss: 2.269315, ppl: 9.672771 +epoch: 2, batch: 34159, sum loss: 4045.410400, avg loss: 2.303765, ppl: 10.011802 +epoch: 2, batch: 34160, sum loss: 4573.225098, avg loss: 2.462695, ppl: 11.736400 +epoch: 2, batch: 34161, sum loss: 3980.854248, avg loss: 2.517934, ppl: 12.402949 +epoch: 2, batch: 34162, sum loss: 3895.326416, avg loss: 2.434579, ppl: 11.411013 +epoch: 2, batch: 34163, sum loss: 2742.347656, avg loss: 2.216934, ppl: 9.179147 +epoch: 2, batch: 34164, sum loss: 4403.884277, avg loss: 2.656142, ppl: 14.241247 +epoch: 2, batch: 34165, sum loss: 4494.395996, avg loss: 2.547843, ppl: 12.779514 +epoch: 2, batch: 34166, sum loss: 4197.418945, avg loss: 2.202213, ppl: 9.045011 +epoch: 2, batch: 34167, sum loss: 3354.158936, avg loss: 2.243585, ppl: 9.427063 +epoch: 2, batch: 34168, sum loss: 3749.520996, avg loss: 2.503018, ppl: 12.219316 +epoch: 2, batch: 34169, sum loss: 4402.867676, avg loss: 2.574776, ppl: 13.128382 +epoch: 2, batch: 34170, sum loss: 4222.656738, avg loss: 2.440842, ppl: 11.482704 +epoch: 2, batch: 34171, sum loss: 3922.348145, avg loss: 2.367138, ppl: 10.666821 +epoch: 2, batch: 34172, sum loss: 4883.986328, avg loss: 2.781313, ppl: 16.140205 +epoch: 2, batch: 34173, sum loss: 4533.685547, avg loss: 2.679483, ppl: 14.577558 +epoch: 2, batch: 34174, sum loss: 4142.731934, avg loss: 2.465912, ppl: 11.774214 +epoch: 2, batch: 34175, sum loss: 4646.724121, avg loss: 2.614926, ppl: 13.666209 +epoch: 2, batch: 34176, sum loss: 4919.974609, avg loss: 2.494916, ppl: 12.120715 +epoch: 2, batch: 34177, sum loss: 3804.873047, avg loss: 2.465893, ppl: 11.773994 +epoch: 2, batch: 34178, sum loss: 4763.393066, avg loss: 2.507049, ppl: 12.268673 +epoch: 2, batch: 34179, sum loss: 4433.570801, avg loss: 2.667612, ppl: 14.405525 +epoch: 2, batch: 34180, sum loss: 3490.740234, avg loss: 2.293522, ppl: 9.909778 +epoch: 2, batch: 34181, sum loss: 4721.972656, avg loss: 2.795721, ppl: 16.374432 +epoch: 2, batch: 34182, sum loss: 4687.617676, avg loss: 2.481534, ppl: 11.959597 +epoch: 2, batch: 34183, sum loss: 4534.634766, avg loss: 2.448507, ppl: 11.571056 +epoch: 2, batch: 34184, sum loss: 4518.453125, avg loss: 2.560030, ppl: 12.936209 +epoch: 2, batch: 34185, sum loss: 4293.406738, avg loss: 2.351263, ppl: 10.498824 +epoch: 2, batch: 34186, sum loss: 3288.612793, avg loss: 2.170702, ppl: 8.764430 +epoch: 2, batch: 34187, sum loss: 4279.003906, avg loss: 2.656117, ppl: 14.240880 +epoch: 2, batch: 34188, sum loss: 4305.667969, avg loss: 2.674328, ppl: 14.502599 +epoch: 2, batch: 34189, sum loss: 4834.254883, avg loss: 2.537667, ppl: 12.650118 +epoch: 2, batch: 34190, sum loss: 3795.641113, avg loss: 2.492213, ppl: 12.088003 +epoch: 2, batch: 34191, sum loss: 4773.357422, avg loss: 2.591399, ppl: 13.348439 +epoch: 2, batch: 34192, sum loss: 4685.186035, avg loss: 2.498766, ppl: 12.167469 +epoch: 2, batch: 34193, sum loss: 3139.764160, avg loss: 2.204890, ppl: 9.069259 +epoch: 2, batch: 34194, sum loss: 4190.536621, avg loss: 2.507802, ppl: 12.277911 +epoch: 2, batch: 34195, sum loss: 4719.590820, avg loss: 2.723364, ppl: 15.231481 +epoch: 2, batch: 34196, sum loss: 4515.136719, avg loss: 2.471339, ppl: 11.838290 +epoch: 2, batch: 34197, sum loss: 4258.252441, avg loss: 2.462841, ppl: 11.738115 +epoch: 2, batch: 34198, sum loss: 5362.679199, avg loss: 2.869277, ppl: 17.624275 +epoch: 2, batch: 34199, sum loss: 3706.950928, avg loss: 2.352126, ppl: 10.507887 +epoch: 2, batch: 34200, sum loss: 4681.957031, avg loss: 2.599643, ppl: 13.458932 +epoch: 2, batch: 34201, sum loss: 4468.942871, avg loss: 2.537730, ppl: 12.650924 +epoch: 2, batch: 34202, sum loss: 3800.556396, avg loss: 2.416120, ppl: 11.202308 +epoch: 2, batch: 34203, sum loss: 4814.231934, avg loss: 2.639382, ppl: 14.004541 +epoch: 2, batch: 34204, sum loss: 4583.298828, avg loss: 2.702417, ppl: 14.915738 +epoch: 2, batch: 34205, sum loss: 5639.069824, avg loss: 2.791619, ppl: 16.307398 +epoch: 2, batch: 34206, sum loss: 5104.944336, avg loss: 2.530959, ppl: 12.565553 +epoch: 2, batch: 34207, sum loss: 4382.154297, avg loss: 2.602230, ppl: 13.493791 +epoch: 2, batch: 34208, sum loss: 3861.352295, avg loss: 2.402833, ppl: 11.054449 +epoch: 2, batch: 34209, sum loss: 3784.722168, avg loss: 2.317650, ppl: 10.151788 +epoch: 2, batch: 34210, sum loss: 4875.307617, avg loss: 2.801901, ppl: 16.475935 +epoch: 2, batch: 34211, sum loss: 4218.288574, avg loss: 2.297543, ppl: 9.949704 +epoch: 2, batch: 34212, sum loss: 5039.732910, avg loss: 2.583154, ppl: 13.238824 +epoch: 2, batch: 34213, sum loss: 4533.428223, avg loss: 2.474579, ppl: 11.876704 +epoch: 2, batch: 34214, sum loss: 4023.797363, avg loss: 2.403702, ppl: 11.064063 +epoch: 2, batch: 34215, sum loss: 4291.801758, avg loss: 2.542537, ppl: 12.711877 +epoch: 2, batch: 34216, sum loss: 4184.410156, avg loss: 2.508639, ppl: 12.288198 +epoch: 2, batch: 34217, sum loss: 3561.824219, avg loss: 2.305388, ppl: 10.028069 +epoch: 2, batch: 34218, sum loss: 3437.603027, avg loss: 2.192349, ppl: 8.956224 +epoch: 2, batch: 34219, sum loss: 3395.522217, avg loss: 2.322519, ppl: 10.201338 +epoch: 2, batch: 34220, sum loss: 4122.014648, avg loss: 2.332776, ppl: 10.306511 +epoch: 2, batch: 34221, sum loss: 4700.190430, avg loss: 2.645014, ppl: 14.083646 +epoch: 2, batch: 34222, sum loss: 4141.988281, avg loss: 2.267098, ppl: 9.651354 +epoch: 2, batch: 34223, sum loss: 4324.578125, avg loss: 2.759782, ppl: 15.796400 +epoch: 2, batch: 34224, sum loss: 3447.062744, avg loss: 2.545836, ppl: 12.753892 +epoch: 2, batch: 34225, sum loss: 3858.026123, avg loss: 2.374170, ppl: 10.742092 +epoch: 2, batch: 34226, sum loss: 3810.048828, avg loss: 2.367961, ppl: 10.675599 +epoch: 2, batch: 34227, sum loss: 5712.936035, avg loss: 2.637551, ppl: 13.978931 +epoch: 2, batch: 34228, sum loss: 3839.073486, avg loss: 2.449951, ppl: 11.587784 +epoch: 2, batch: 34229, sum loss: 4243.118164, avg loss: 2.404033, ppl: 11.067722 +epoch: 2, batch: 34230, sum loss: 4357.871582, avg loss: 2.341683, ppl: 10.398722 +epoch: 2, batch: 34231, sum loss: 3807.124268, avg loss: 2.254070, ppl: 9.526430 +epoch: 2, batch: 34232, sum loss: 4172.625977, avg loss: 2.349452, ppl: 10.479820 +epoch: 2, batch: 34233, sum loss: 4160.416016, avg loss: 2.543042, ppl: 12.718298 +epoch: 2, batch: 34234, sum loss: 4151.614258, avg loss: 2.398391, ppl: 11.005449 +epoch: 2, batch: 34235, sum loss: 5861.964355, avg loss: 2.917852, ppl: 18.501503 +epoch: 2, batch: 34236, sum loss: 3926.202637, avg loss: 2.319080, ppl: 10.166318 +epoch: 2, batch: 34237, sum loss: 4580.854492, avg loss: 2.488243, ppl: 12.040098 +epoch: 2, batch: 34238, sum loss: 4163.753418, avg loss: 2.506775, ppl: 12.265312 +epoch: 2, batch: 34239, sum loss: 3983.124512, avg loss: 2.400919, ppl: 11.033311 +epoch: 2, batch: 34240, sum loss: 3387.549805, avg loss: 2.419678, ppl: 11.242244 +epoch: 2, batch: 34241, sum loss: 4332.556641, avg loss: 2.502921, ppl: 12.218132 +epoch: 2, batch: 34242, sum loss: 4083.622559, avg loss: 2.404960, ppl: 11.077991 +epoch: 2, batch: 34243, sum loss: 3339.542480, avg loss: 2.336979, ppl: 10.349918 +epoch: 2, batch: 34244, sum loss: 3899.824219, avg loss: 2.472939, ppl: 11.857242 +epoch: 2, batch: 34245, sum loss: 4416.477539, avg loss: 2.633559, ppl: 13.923228 +epoch: 2, batch: 34246, sum loss: 4141.548340, avg loss: 2.452071, ppl: 11.612373 +epoch: 2, batch: 34247, sum loss: 3181.042725, avg loss: 2.096930, ppl: 8.141136 +epoch: 2, batch: 34248, sum loss: 5020.971191, avg loss: 2.722869, ppl: 15.223943 +epoch: 2, batch: 34249, sum loss: 4181.873047, avg loss: 2.534468, ppl: 12.609726 +epoch: 2, batch: 34250, sum loss: 5043.662109, avg loss: 2.702927, ppl: 14.923350 +epoch: 2, batch: 34251, sum loss: 4814.512207, avg loss: 2.692680, ppl: 14.771212 +epoch: 2, batch: 34252, sum loss: 4466.610352, avg loss: 2.532092, ppl: 12.579797 +epoch: 2, batch: 34253, sum loss: 4490.869141, avg loss: 2.652610, ppl: 14.191033 +epoch: 2, batch: 34254, sum loss: 3935.763916, avg loss: 2.372371, ppl: 10.722788 +epoch: 2, batch: 34255, sum loss: 3466.728027, avg loss: 2.321988, ppl: 10.195922 +epoch: 2, batch: 34256, sum loss: 4924.454102, avg loss: 2.640458, ppl: 14.019621 +epoch: 2, batch: 34257, sum loss: 3834.873535, avg loss: 2.335490, ppl: 10.334517 +epoch: 2, batch: 34258, sum loss: 4528.883789, avg loss: 2.453350, ppl: 11.627231 +epoch: 2, batch: 34259, sum loss: 5015.632324, avg loss: 2.539561, ppl: 12.674103 +epoch: 2, batch: 34260, sum loss: 4097.115234, avg loss: 2.451894, ppl: 11.610319 +epoch: 2, batch: 34261, sum loss: 4083.398438, avg loss: 2.380990, ppl: 10.815608 +epoch: 2, batch: 34262, sum loss: 5176.924316, avg loss: 2.637251, ppl: 13.974739 +epoch: 2, batch: 34263, sum loss: 4189.724609, avg loss: 2.594257, ppl: 13.386633 +epoch: 2, batch: 34264, sum loss: 4784.093750, avg loss: 2.650467, ppl: 14.160656 +epoch: 2, batch: 34265, sum loss: 4554.440918, avg loss: 2.686986, ppl: 14.687338 +epoch: 2, batch: 34266, sum loss: 4415.043945, avg loss: 2.425848, ppl: 11.311823 +epoch: 2, batch: 34267, sum loss: 3717.871094, avg loss: 2.351594, ppl: 10.502302 +epoch: 2, batch: 34268, sum loss: 4183.988281, avg loss: 2.224343, ppl: 9.247401 +epoch: 2, batch: 34269, sum loss: 4013.613770, avg loss: 2.119120, ppl: 8.323812 +epoch: 2, batch: 34270, sum loss: 3186.386230, avg loss: 2.101838, ppl: 8.181192 +epoch: 2, batch: 34271, sum loss: 4821.845215, avg loss: 2.687762, ppl: 14.698747 +epoch: 2, batch: 34272, sum loss: 5390.680664, avg loss: 2.791652, ppl: 16.307945 +epoch: 2, batch: 34273, sum loss: 3694.053467, avg loss: 2.381724, ppl: 10.823543 +epoch: 2, batch: 34274, sum loss: 4580.746582, avg loss: 2.843418, ppl: 17.174370 +epoch: 2, batch: 34275, sum loss: 4145.272461, avg loss: 2.206106, ppl: 9.080286 +epoch: 2, batch: 34276, sum loss: 3894.547852, avg loss: 2.139862, ppl: 8.498261 +epoch: 2, batch: 34277, sum loss: 4793.407227, avg loss: 2.689903, ppl: 14.730251 +epoch: 2, batch: 34278, sum loss: 3935.116211, avg loss: 2.380590, ppl: 10.811284 +epoch: 2, batch: 34279, sum loss: 4390.169922, avg loss: 2.414835, ppl: 11.187924 +epoch: 2, batch: 34280, sum loss: 3813.224121, avg loss: 2.398254, ppl: 11.003948 +epoch: 2, batch: 34281, sum loss: 3498.421387, avg loss: 2.168891, ppl: 8.748578 +epoch: 2, batch: 34282, sum loss: 3463.876221, avg loss: 2.437633, ppl: 11.445914 +epoch: 2, batch: 34283, sum loss: 4373.842773, avg loss: 2.364239, ppl: 10.635946 +epoch: 2, batch: 34284, sum loss: 3549.765869, avg loss: 2.080754, ppl: 8.010503 +epoch: 2, batch: 34285, sum loss: 4255.733398, avg loss: 2.686700, ppl: 14.683146 +epoch: 2, batch: 34286, sum loss: 3744.121094, avg loss: 2.482839, ppl: 11.975212 +epoch: 2, batch: 34287, sum loss: 4527.991211, avg loss: 2.549545, ppl: 12.801272 +epoch: 2, batch: 34288, sum loss: 4134.658203, avg loss: 2.420760, ppl: 11.254409 +epoch: 2, batch: 34289, sum loss: 4540.820801, avg loss: 2.477262, ppl: 11.908611 +epoch: 2, batch: 34290, sum loss: 3622.921875, avg loss: 2.377245, ppl: 10.775181 +epoch: 2, batch: 34291, sum loss: 4276.254883, avg loss: 2.442179, ppl: 11.498065 +epoch: 2, batch: 34292, sum loss: 4552.198730, avg loss: 2.574773, ppl: 13.128338 +epoch: 2, batch: 34293, sum loss: 4840.943848, avg loss: 2.555937, ppl: 12.883360 +epoch: 2, batch: 34294, sum loss: 3912.295654, avg loss: 2.229228, ppl: 9.292692 +epoch: 2, batch: 34295, sum loss: 3503.222900, avg loss: 2.399468, ppl: 11.017310 +epoch: 2, batch: 34296, sum loss: 4215.517578, avg loss: 2.376278, ppl: 10.764764 +epoch: 2, batch: 34297, sum loss: 4106.230957, avg loss: 2.425417, ppl: 11.306946 +epoch: 2, batch: 34298, sum loss: 5581.827148, avg loss: 2.629217, ppl: 13.862906 +epoch: 2, batch: 34299, sum loss: 4780.830566, avg loss: 2.693425, ppl: 14.782225 +epoch: 2, batch: 34300, sum loss: 4955.358887, avg loss: 2.626051, ppl: 13.819093 +epoch: 2, batch: 34301, sum loss: 4457.256836, avg loss: 2.589923, ppl: 13.328741 +epoch: 2, batch: 34302, sum loss: 4217.451172, avg loss: 2.242132, ppl: 9.413383 +epoch: 2, batch: 34303, sum loss: 4865.069824, avg loss: 2.848401, ppl: 17.260166 +epoch: 2, batch: 34304, sum loss: 2946.745605, avg loss: 2.015558, ppl: 7.504914 +epoch: 2, batch: 34305, sum loss: 3487.521240, avg loss: 2.410174, ppl: 11.135895 +epoch: 2, batch: 34306, sum loss: 3619.047607, avg loss: 2.240896, ppl: 9.401756 +epoch: 2, batch: 34307, sum loss: 3168.622803, avg loss: 2.158462, ppl: 8.657814 +epoch: 2, batch: 34308, sum loss: 4240.292969, avg loss: 2.295773, ppl: 9.932114 +epoch: 2, batch: 34309, sum loss: 4426.997070, avg loss: 2.392972, ppl: 10.945971 +epoch: 2, batch: 34310, sum loss: 4691.373047, avg loss: 2.653492, ppl: 14.203547 +epoch: 2, batch: 34311, sum loss: 4022.967285, avg loss: 2.462036, ppl: 11.728671 +epoch: 2, batch: 34312, sum loss: 4264.611816, avg loss: 2.453747, ppl: 11.631847 +epoch: 2, batch: 34313, sum loss: 4479.273926, avg loss: 2.250891, ppl: 9.496198 +epoch: 2, batch: 34314, sum loss: 3890.841064, avg loss: 2.352383, ppl: 10.510586 +epoch: 2, batch: 34315, sum loss: 3754.719971, avg loss: 2.423964, ppl: 11.290524 +epoch: 2, batch: 34316, sum loss: 4512.347168, avg loss: 2.505468, ppl: 12.249286 +epoch: 2, batch: 34317, sum loss: 5449.029297, avg loss: 2.972738, ppl: 19.545366 +epoch: 2, batch: 34318, sum loss: 3766.300781, avg loss: 2.081980, ppl: 8.020329 +epoch: 2, batch: 34319, sum loss: 5028.498047, avg loss: 2.671890, ppl: 14.467294 +epoch: 2, batch: 34320, sum loss: 5644.647949, avg loss: 2.935334, ppl: 18.827795 +epoch: 2, batch: 34321, sum loss: 4205.914551, avg loss: 2.615618, ppl: 13.675672 +epoch: 2, batch: 34322, sum loss: 4705.377930, avg loss: 2.398256, ppl: 11.003966 +epoch: 2, batch: 34323, sum loss: 4577.083008, avg loss: 2.520420, ppl: 12.433819 +epoch: 2, batch: 34324, sum loss: 4072.940430, avg loss: 2.431606, ppl: 11.377143 +epoch: 2, batch: 34325, sum loss: 4517.616211, avg loss: 2.868328, ppl: 17.607548 +epoch: 2, batch: 34326, sum loss: 4744.970703, avg loss: 2.477792, ppl: 11.914922 +epoch: 2, batch: 34327, sum loss: 4402.320312, avg loss: 2.640864, ppl: 14.025314 +epoch: 2, batch: 34328, sum loss: 3794.067871, avg loss: 2.383209, ppl: 10.839626 +epoch: 2, batch: 34329, sum loss: 4144.809570, avg loss: 2.486388, ppl: 12.017795 +epoch: 2, batch: 34330, sum loss: 4050.489258, avg loss: 2.423991, ppl: 11.290833 +epoch: 2, batch: 34331, sum loss: 3863.942627, avg loss: 2.394017, ppl: 10.957417 +epoch: 2, batch: 34332, sum loss: 3397.679688, avg loss: 2.272695, ppl: 9.705525 +epoch: 2, batch: 34333, sum loss: 4505.357422, avg loss: 2.586313, ppl: 13.280715 +epoch: 2, batch: 34334, sum loss: 3553.208008, avg loss: 2.213837, ppl: 9.150758 +epoch: 2, batch: 34335, sum loss: 4035.078857, avg loss: 2.446985, ppl: 11.553463 +epoch: 2, batch: 34336, sum loss: 3832.500488, avg loss: 2.386364, ppl: 10.873881 +epoch: 2, batch: 34337, sum loss: 4300.166016, avg loss: 2.553543, ppl: 12.852558 +epoch: 2, batch: 34338, sum loss: 4747.743164, avg loss: 2.773214, ppl: 16.010014 +epoch: 2, batch: 34339, sum loss: 5924.390137, avg loss: 2.758096, ppl: 15.769789 +epoch: 2, batch: 34340, sum loss: 3296.754639, avg loss: 2.184728, ppl: 8.888232 +epoch: 2, batch: 34341, sum loss: 3750.869141, avg loss: 2.505590, ppl: 12.250791 +epoch: 2, batch: 34342, sum loss: 4710.106934, avg loss: 2.705403, ppl: 14.960349 +epoch: 2, batch: 34343, sum loss: 4665.987793, avg loss: 2.762574, ppl: 15.840568 +epoch: 2, batch: 34344, sum loss: 3551.387695, avg loss: 2.442495, ppl: 11.501703 +epoch: 2, batch: 34345, sum loss: 5183.597656, avg loss: 2.598294, ppl: 13.440795 +epoch: 2, batch: 34346, sum loss: 3724.558105, avg loss: 2.402941, ppl: 11.055640 +epoch: 2, batch: 34347, sum loss: 4670.760254, avg loss: 2.690530, ppl: 14.739487 +epoch: 2, batch: 34348, sum loss: 4317.357422, avg loss: 2.799843, ppl: 16.442059 +epoch: 2, batch: 34349, sum loss: 3928.801514, avg loss: 2.515238, ppl: 12.369550 +epoch: 2, batch: 34350, sum loss: 4535.511719, avg loss: 2.663248, ppl: 14.342803 +epoch: 2, batch: 34351, sum loss: 3577.869141, avg loss: 2.382070, ppl: 10.827291 +epoch: 2, batch: 34352, sum loss: 4785.212402, avg loss: 2.791839, ppl: 16.310989 +epoch: 2, batch: 34353, sum loss: 3966.074463, avg loss: 2.317986, ppl: 10.155206 +epoch: 2, batch: 34354, sum loss: 3692.134521, avg loss: 2.297533, ppl: 9.949602 +epoch: 2, batch: 34355, sum loss: 4040.428711, avg loss: 2.351821, ppl: 10.504683 +epoch: 2, batch: 34356, sum loss: 3592.282227, avg loss: 2.272158, ppl: 9.700313 +epoch: 2, batch: 34357, sum loss: 4750.058105, avg loss: 2.655147, ppl: 14.227078 +epoch: 2, batch: 34358, sum loss: 3275.808594, avg loss: 2.303663, ppl: 10.010782 +epoch: 2, batch: 34359, sum loss: 4274.810059, avg loss: 2.556705, ppl: 12.893261 +epoch: 2, batch: 34360, sum loss: 4024.642090, avg loss: 2.516974, ppl: 12.391050 +epoch: 2, batch: 34361, sum loss: 5116.336914, avg loss: 2.476446, ppl: 11.898896 +epoch: 2, batch: 34362, sum loss: 4656.861328, avg loss: 2.661064, ppl: 14.311503 +epoch: 2, batch: 34363, sum loss: 3462.983643, avg loss: 2.235625, ppl: 9.352327 +epoch: 2, batch: 34364, sum loss: 3611.912109, avg loss: 2.345397, ppl: 10.437421 +epoch: 2, batch: 34365, sum loss: 3798.256348, avg loss: 2.411591, ppl: 11.151692 +epoch: 2, batch: 34366, sum loss: 4919.679199, avg loss: 2.612681, ppl: 13.635564 +epoch: 2, batch: 34367, sum loss: 4322.523926, avg loss: 2.485638, ppl: 12.008776 +epoch: 2, batch: 34368, sum loss: 4896.954590, avg loss: 2.526809, ppl: 12.513506 +epoch: 2, batch: 34369, sum loss: 5222.280273, avg loss: 2.721355, ppl: 15.200905 +epoch: 2, batch: 34370, sum loss: 4832.946777, avg loss: 2.488644, ppl: 12.044933 +epoch: 2, batch: 34371, sum loss: 3818.207275, avg loss: 2.453861, ppl: 11.633173 +epoch: 2, batch: 34372, sum loss: 5220.047852, avg loss: 2.624458, ppl: 13.797098 +epoch: 2, batch: 34373, sum loss: 3926.868652, avg loss: 2.272493, ppl: 9.703566 +epoch: 2, batch: 34374, sum loss: 3431.221436, avg loss: 2.315264, ppl: 10.127599 +epoch: 2, batch: 34375, sum loss: 4251.199219, avg loss: 2.518483, ppl: 12.409759 +epoch: 2, batch: 34376, sum loss: 5151.199219, avg loss: 2.719746, ppl: 15.176473 +epoch: 2, batch: 34377, sum loss: 4448.223633, avg loss: 2.605872, ppl: 13.543032 +epoch: 2, batch: 34378, sum loss: 4244.766113, avg loss: 2.564813, ppl: 12.998232 +epoch: 2, batch: 34379, sum loss: 4031.318848, avg loss: 2.319516, ppl: 10.170750 +epoch: 2, batch: 34380, sum loss: 4037.701416, avg loss: 2.350234, ppl: 10.488022 +epoch: 2, batch: 34381, sum loss: 3960.056152, avg loss: 2.447501, ppl: 11.559421 +epoch: 2, batch: 34382, sum loss: 3576.794678, avg loss: 2.334723, ppl: 10.326594 +epoch: 2, batch: 34383, sum loss: 4475.481934, avg loss: 2.626456, ppl: 13.824696 +epoch: 2, batch: 34384, sum loss: 4036.675049, avg loss: 2.556476, ppl: 12.890307 +epoch: 2, batch: 34385, sum loss: 4460.604492, avg loss: 2.563566, ppl: 12.982025 +epoch: 2, batch: 34386, sum loss: 4701.137695, avg loss: 2.465201, ppl: 11.765842 +epoch: 2, batch: 34387, sum loss: 4844.104004, avg loss: 2.520346, ppl: 12.432894 +epoch: 2, batch: 34388, sum loss: 4690.325195, avg loss: 2.592772, ppl: 13.366779 +epoch: 2, batch: 34389, sum loss: 4094.001953, avg loss: 2.457384, ppl: 11.674233 +epoch: 2, batch: 34390, sum loss: 4148.288086, avg loss: 2.491464, ppl: 12.078951 +epoch: 2, batch: 34391, sum loss: 4061.301758, avg loss: 2.151113, ppl: 8.594421 +epoch: 2, batch: 34392, sum loss: 3738.777344, avg loss: 2.378357, ppl: 10.787167 +epoch: 2, batch: 34393, sum loss: 4531.670898, avg loss: 2.518994, ppl: 12.416104 +epoch: 2, batch: 34394, sum loss: 3549.197754, avg loss: 2.227996, ppl: 9.281249 +epoch: 2, batch: 34395, sum loss: 4773.586914, avg loss: 2.475927, ppl: 11.892725 +epoch: 2, batch: 34396, sum loss: 3711.718750, avg loss: 2.456465, ppl: 11.663508 +epoch: 2, batch: 34397, sum loss: 3620.772217, avg loss: 2.108778, ppl: 8.238170 +epoch: 2, batch: 34398, sum loss: 4125.000977, avg loss: 2.412281, ppl: 11.159389 +epoch: 2, batch: 34399, sum loss: 4493.070312, avg loss: 2.604679, ppl: 13.526877 +epoch: 2, batch: 34400, sum loss: 4145.603516, avg loss: 2.342149, ppl: 10.403570 +epoch: 2, batch: 34401, sum loss: 4070.336914, avg loss: 2.440250, ppl: 11.475908 +epoch: 2, batch: 34402, sum loss: 4190.174316, avg loss: 2.473538, ppl: 11.864353 +epoch: 2, batch: 34403, sum loss: 4307.271484, avg loss: 2.520346, ppl: 12.432899 +epoch: 2, batch: 34404, sum loss: 3643.959473, avg loss: 2.280325, ppl: 9.779860 +epoch: 2, batch: 34405, sum loss: 3621.704102, avg loss: 2.392143, ppl: 10.936904 +epoch: 2, batch: 34406, sum loss: 4364.801758, avg loss: 2.525927, ppl: 12.502481 +epoch: 2, batch: 34407, sum loss: 3709.133301, avg loss: 2.430625, ppl: 11.365980 +epoch: 2, batch: 34408, sum loss: 3673.126221, avg loss: 2.256220, ppl: 9.546934 +epoch: 2, batch: 34409, sum loss: 3767.075439, avg loss: 2.488161, ppl: 12.039114 +epoch: 2, batch: 34410, sum loss: 3269.387939, avg loss: 2.230142, ppl: 9.301186 +epoch: 2, batch: 34411, sum loss: 3338.410156, avg loss: 2.271027, ppl: 9.689350 +epoch: 2, batch: 34412, sum loss: 4589.879395, avg loss: 2.342971, ppl: 10.412121 +epoch: 2, batch: 34413, sum loss: 4356.506836, avg loss: 2.570210, ppl: 13.068575 +epoch: 2, batch: 34414, sum loss: 4038.424805, avg loss: 2.463956, ppl: 11.751212 +epoch: 2, batch: 34415, sum loss: 4418.741211, avg loss: 2.485231, ppl: 12.003898 +epoch: 2, batch: 34416, sum loss: 3717.002686, avg loss: 2.255463, ppl: 9.539708 +epoch: 2, batch: 34417, sum loss: 3560.097168, avg loss: 2.552041, ppl: 12.833270 +epoch: 2, batch: 34418, sum loss: 4031.390625, avg loss: 2.624603, ppl: 13.799099 +epoch: 2, batch: 34419, sum loss: 3462.072510, avg loss: 2.209364, ppl: 9.109918 +epoch: 2, batch: 34420, sum loss: 5257.027832, avg loss: 2.827879, ppl: 16.909565 +epoch: 2, batch: 34421, sum loss: 4592.357422, avg loss: 2.712556, ppl: 15.067741 +epoch: 2, batch: 34422, sum loss: 4714.713867, avg loss: 2.752314, ppl: 15.678868 +epoch: 2, batch: 34423, sum loss: 3789.356934, avg loss: 2.416682, ppl: 11.208605 +epoch: 2, batch: 34424, sum loss: 3607.698486, avg loss: 2.201158, ppl: 9.035473 +epoch: 2, batch: 34425, sum loss: 4329.874512, avg loss: 2.759640, ppl: 15.794148 +epoch: 2, batch: 34426, sum loss: 4024.654053, avg loss: 2.353599, ppl: 10.523374 +epoch: 2, batch: 34427, sum loss: 3857.260254, avg loss: 2.168218, ppl: 8.742692 +epoch: 2, batch: 34428, sum loss: 3957.695312, avg loss: 2.434007, ppl: 11.404488 +epoch: 2, batch: 34429, sum loss: 4295.427246, avg loss: 2.549215, ppl: 12.797055 +epoch: 2, batch: 34430, sum loss: 4019.555420, avg loss: 2.625445, ppl: 13.810714 +epoch: 2, batch: 34431, sum loss: 4561.991211, avg loss: 2.524621, ppl: 12.486168 +epoch: 2, batch: 34432, sum loss: 4159.068848, avg loss: 2.506973, ppl: 12.267742 +epoch: 2, batch: 34433, sum loss: 4782.784180, avg loss: 2.607843, ppl: 13.569748 +epoch: 2, batch: 34434, sum loss: 3770.497803, avg loss: 2.346296, ppl: 10.446804 +epoch: 2, batch: 34435, sum loss: 3577.410645, avg loss: 2.409031, ppl: 11.123174 +epoch: 2, batch: 34436, sum loss: 4300.394043, avg loss: 2.339714, ppl: 10.378266 +epoch: 2, batch: 34437, sum loss: 4098.907227, avg loss: 2.514667, ppl: 12.362492 +epoch: 2, batch: 34438, sum loss: 4284.771973, avg loss: 2.458274, ppl: 11.684628 +epoch: 2, batch: 34439, sum loss: 4243.687988, avg loss: 2.217183, ppl: 9.181429 +epoch: 2, batch: 34440, sum loss: 4159.156738, avg loss: 2.559481, ppl: 12.929104 +epoch: 2, batch: 34441, sum loss: 5540.446777, avg loss: 2.537997, ppl: 12.654296 +epoch: 2, batch: 34442, sum loss: 3646.885254, avg loss: 2.348284, ppl: 10.467594 +epoch: 2, batch: 34443, sum loss: 4419.063965, avg loss: 2.525179, ppl: 12.493136 +epoch: 2, batch: 34444, sum loss: 3262.765625, avg loss: 2.218060, ppl: 9.189482 +epoch: 2, batch: 34445, sum loss: 3751.966553, avg loss: 2.284998, ppl: 9.825666 +epoch: 2, batch: 34446, sum loss: 4688.893555, avg loss: 2.552474, ppl: 12.838822 +epoch: 2, batch: 34447, sum loss: 5234.347656, avg loss: 2.614559, ppl: 13.661193 +epoch: 2, batch: 34448, sum loss: 4266.650391, avg loss: 2.565635, ppl: 13.008913 +epoch: 2, batch: 34449, sum loss: 4195.406250, avg loss: 2.383754, ppl: 10.845535 +epoch: 2, batch: 34450, sum loss: 4330.987793, avg loss: 2.464990, ppl: 11.763366 +epoch: 2, batch: 34451, sum loss: 4669.594238, avg loss: 2.530945, ppl: 12.565378 +epoch: 2, batch: 34452, sum loss: 3990.406738, avg loss: 2.263418, ppl: 9.615905 +epoch: 2, batch: 34453, sum loss: 4550.169922, avg loss: 2.660918, ppl: 14.309419 +epoch: 2, batch: 34454, sum loss: 4705.889160, avg loss: 2.425716, ppl: 11.310327 +epoch: 2, batch: 34455, sum loss: 4156.755371, avg loss: 2.323508, ppl: 10.211432 +epoch: 2, batch: 34456, sum loss: 4385.156250, avg loss: 2.451177, ppl: 11.601998 +epoch: 2, batch: 34457, sum loss: 3467.584229, avg loss: 2.232830, ppl: 9.326218 +epoch: 2, batch: 34458, sum loss: 3163.129639, avg loss: 2.257766, ppl: 9.561700 +epoch: 2, batch: 34459, sum loss: 3492.930176, avg loss: 2.402290, ppl: 11.048452 +epoch: 2, batch: 34460, sum loss: 4102.036133, avg loss: 2.440236, ppl: 11.475744 +epoch: 2, batch: 34461, sum loss: 4095.935547, avg loss: 2.564769, ppl: 12.997653 +epoch: 2, batch: 34462, sum loss: 3840.454102, avg loss: 2.122971, ppl: 8.355925 +epoch: 2, batch: 34463, sum loss: 3899.985352, avg loss: 2.394098, ppl: 10.958307 +epoch: 2, batch: 34464, sum loss: 4448.625488, avg loss: 2.440277, ppl: 11.476223 +epoch: 2, batch: 34465, sum loss: 5518.002930, avg loss: 2.870969, ppl: 17.654121 +epoch: 2, batch: 34466, sum loss: 3672.519775, avg loss: 2.285327, ppl: 9.828897 +epoch: 2, batch: 34467, sum loss: 4605.172363, avg loss: 2.552756, ppl: 12.842453 +epoch: 2, batch: 34468, sum loss: 4992.396973, avg loss: 2.719170, ppl: 15.167733 +epoch: 2, batch: 34469, sum loss: 4647.804199, avg loss: 2.447501, ppl: 11.559422 +epoch: 2, batch: 34470, sum loss: 4123.662109, avg loss: 2.451642, ppl: 11.607390 +epoch: 2, batch: 34471, sum loss: 4482.567871, avg loss: 2.378020, ppl: 10.783531 +epoch: 2, batch: 34472, sum loss: 3173.664307, avg loss: 2.278295, ppl: 9.760021 +epoch: 2, batch: 34473, sum loss: 3426.875000, avg loss: 2.332794, ppl: 10.306700 +epoch: 2, batch: 34474, sum loss: 4093.649902, avg loss: 2.380029, ppl: 10.805216 +epoch: 2, batch: 34475, sum loss: 3916.821533, avg loss: 2.651877, ppl: 14.180626 +epoch: 2, batch: 34476, sum loss: 4374.640625, avg loss: 2.320764, ppl: 10.183455 +epoch: 2, batch: 34477, sum loss: 4273.129883, avg loss: 2.415562, ppl: 11.196065 +epoch: 2, batch: 34478, sum loss: 3631.004395, avg loss: 2.433649, ppl: 11.400407 +epoch: 2, batch: 34479, sum loss: 3992.255371, avg loss: 2.443241, ppl: 11.510283 +epoch: 2, batch: 34480, sum loss: 3851.035645, avg loss: 2.482937, ppl: 11.976389 +epoch: 2, batch: 34481, sum loss: 4135.237793, avg loss: 2.418268, ppl: 11.226396 +epoch: 2, batch: 34482, sum loss: 4095.779297, avg loss: 2.375742, ppl: 10.758995 +epoch: 2, batch: 34483, sum loss: 3781.684082, avg loss: 2.193552, ppl: 8.967011 +epoch: 2, batch: 34484, sum loss: 4603.433105, avg loss: 2.439551, ppl: 11.467892 +epoch: 2, batch: 34485, sum loss: 4334.712402, avg loss: 2.460109, ppl: 11.706090 +epoch: 2, batch: 34486, sum loss: 4089.093994, avg loss: 2.356827, ppl: 10.557395 +epoch: 2, batch: 34487, sum loss: 4987.614258, avg loss: 2.728454, ppl: 15.309202 +epoch: 2, batch: 34488, sum loss: 3197.486572, avg loss: 2.254927, ppl: 9.534598 +epoch: 2, batch: 34489, sum loss: 3543.224854, avg loss: 2.448670, ppl: 11.572941 +epoch: 2, batch: 34490, sum loss: 4075.704102, avg loss: 2.574671, ppl: 13.126995 +epoch: 2, batch: 34491, sum loss: 4437.366211, avg loss: 2.422143, ppl: 11.269987 +epoch: 2, batch: 34492, sum loss: 3237.617432, avg loss: 2.278408, ppl: 9.761127 +epoch: 2, batch: 34493, sum loss: 3999.595947, avg loss: 2.352704, ppl: 10.513957 +epoch: 2, batch: 34494, sum loss: 3618.651611, avg loss: 2.209189, ppl: 9.108326 +epoch: 2, batch: 34495, sum loss: 4344.751465, avg loss: 2.419127, ppl: 11.236043 +epoch: 2, batch: 34496, sum loss: 3647.844727, avg loss: 2.588960, ppl: 13.315918 +epoch: 2, batch: 34497, sum loss: 4743.778809, avg loss: 2.716941, ppl: 15.133959 +epoch: 2, batch: 34498, sum loss: 3926.215576, avg loss: 2.337033, ppl: 10.350484 +epoch: 2, batch: 34499, sum loss: 4161.001953, avg loss: 2.586080, ppl: 13.277616 +epoch: 2, batch: 34500, sum loss: 4975.033691, avg loss: 2.674749, ppl: 14.508713 +epoch: 2, batch: 34501, sum loss: 4357.738281, avg loss: 2.345392, ppl: 10.437364 +epoch: 2, batch: 34502, sum loss: 4105.285156, avg loss: 2.381256, ppl: 10.818483 +epoch: 2, batch: 34503, sum loss: 3868.330078, avg loss: 2.410174, ppl: 11.135902 +epoch: 2, batch: 34504, sum loss: 4768.412598, avg loss: 2.555419, ppl: 12.876697 +epoch: 2, batch: 34505, sum loss: 3852.835938, avg loss: 2.619195, ppl: 13.724670 +epoch: 2, batch: 34506, sum loss: 4280.486816, avg loss: 2.472840, ppl: 11.856074 +epoch: 2, batch: 34507, sum loss: 3816.366943, avg loss: 2.263563, ppl: 9.617293 +epoch: 2, batch: 34508, sum loss: 3401.039062, avg loss: 2.291805, ppl: 9.892780 +epoch: 2, batch: 34509, sum loss: 3390.439453, avg loss: 2.300163, ppl: 9.975804 +epoch: 2, batch: 34510, sum loss: 4714.980957, avg loss: 2.706648, ppl: 14.978983 +epoch: 2, batch: 34511, sum loss: 4739.671875, avg loss: 2.497193, ppl: 12.148344 +epoch: 2, batch: 34512, sum loss: 4440.328613, avg loss: 2.627413, ppl: 13.837929 +epoch: 2, batch: 34513, sum loss: 4181.806152, avg loss: 2.424236, ppl: 11.293593 +epoch: 2, batch: 34514, sum loss: 4437.754395, avg loss: 2.625890, ppl: 13.816869 +epoch: 2, batch: 34515, sum loss: 4117.081055, avg loss: 2.421813, ppl: 11.266261 +epoch: 2, batch: 34516, sum loss: 4456.729492, avg loss: 2.621606, ppl: 13.757796 +epoch: 2, batch: 34517, sum loss: 4642.825684, avg loss: 2.512352, ppl: 12.333899 +epoch: 2, batch: 34518, sum loss: 6370.308594, avg loss: 3.034925, ppl: 20.799427 +epoch: 2, batch: 34519, sum loss: 4221.613281, avg loss: 2.663478, ppl: 14.346104 +epoch: 2, batch: 34520, sum loss: 3452.066895, avg loss: 2.323060, ppl: 10.206858 +epoch: 2, batch: 34521, sum loss: 3739.971680, avg loss: 2.371574, ppl: 10.714240 +epoch: 2, batch: 34522, sum loss: 4134.497559, avg loss: 2.463944, ppl: 11.751063 +epoch: 2, batch: 34523, sum loss: 4349.546387, avg loss: 2.739009, ppl: 15.471647 +epoch: 2, batch: 34524, sum loss: 3870.257812, avg loss: 2.287386, ppl: 9.849162 +epoch: 2, batch: 34525, sum loss: 3884.925293, avg loss: 2.333288, ppl: 10.311795 +epoch: 2, batch: 34526, sum loss: 3835.518555, avg loss: 2.386757, ppl: 10.878160 +epoch: 2, batch: 34527, sum loss: 3597.506348, avg loss: 2.462359, ppl: 11.732455 +epoch: 2, batch: 34528, sum loss: 4778.546875, avg loss: 2.622693, ppl: 13.772764 +epoch: 2, batch: 34529, sum loss: 4527.594727, avg loss: 2.556519, ppl: 12.890863 +epoch: 2, batch: 34530, sum loss: 3964.872559, avg loss: 2.259187, ppl: 9.575296 +epoch: 2, batch: 34531, sum loss: 4840.411133, avg loss: 2.624952, ppl: 13.803909 +epoch: 2, batch: 34532, sum loss: 3624.878662, avg loss: 2.227952, ppl: 9.280844 +epoch: 2, batch: 34533, sum loss: 4157.663086, avg loss: 2.367690, ppl: 10.672709 +epoch: 2, batch: 34534, sum loss: 3864.445068, avg loss: 2.527433, ppl: 12.521324 +epoch: 2, batch: 34535, sum loss: 3875.539551, avg loss: 2.270380, ppl: 9.683084 +epoch: 2, batch: 34536, sum loss: 3445.209229, avg loss: 2.359732, ppl: 10.588118 +epoch: 2, batch: 34537, sum loss: 4394.442383, avg loss: 2.372809, ppl: 10.727483 +epoch: 2, batch: 34538, sum loss: 4066.808105, avg loss: 2.319913, ppl: 10.174793 +epoch: 2, batch: 34539, sum loss: 4533.028320, avg loss: 2.412469, ppl: 11.161481 +epoch: 2, batch: 34540, sum loss: 3579.948242, avg loss: 2.341366, ppl: 10.395425 +epoch: 2, batch: 34541, sum loss: 4549.184570, avg loss: 2.557158, ppl: 12.899109 +epoch: 2, batch: 34542, sum loss: 5292.933105, avg loss: 2.731132, ppl: 15.350250 +epoch: 2, batch: 34543, sum loss: 5739.227539, avg loss: 2.794171, ppl: 16.349075 +epoch: 2, batch: 34544, sum loss: 4581.900879, avg loss: 2.685757, ppl: 14.669297 +epoch: 2, batch: 34545, sum loss: 3323.723145, avg loss: 2.141574, ppl: 8.512830 +epoch: 2, batch: 34546, sum loss: 4901.646484, avg loss: 2.548958, ppl: 12.793767 +epoch: 2, batch: 34547, sum loss: 4135.366211, avg loss: 2.495695, ppl: 12.130157 +epoch: 2, batch: 34548, sum loss: 3185.977783, avg loss: 2.079620, ppl: 8.001427 +epoch: 2, batch: 34549, sum loss: 4019.894531, avg loss: 2.466193, ppl: 11.777524 +epoch: 2, batch: 34550, sum loss: 3808.760742, avg loss: 2.504116, ppl: 12.232744 +epoch: 2, batch: 34551, sum loss: 4339.032715, avg loss: 2.425395, ppl: 11.306700 +epoch: 2, batch: 34552, sum loss: 4816.372559, avg loss: 2.352893, ppl: 10.515952 +epoch: 2, batch: 34553, sum loss: 4229.938965, avg loss: 2.552769, ppl: 12.842621 +epoch: 2, batch: 34554, sum loss: 4476.323730, avg loss: 2.520452, ppl: 12.434222 +epoch: 2, batch: 34555, sum loss: 3937.016357, avg loss: 2.310456, ppl: 10.079015 +epoch: 2, batch: 34556, sum loss: 4155.191895, avg loss: 2.541402, ppl: 12.697455 +epoch: 2, batch: 34557, sum loss: 4880.612305, avg loss: 2.598835, ppl: 13.448062 +epoch: 2, batch: 34558, sum loss: 3961.155762, avg loss: 2.458818, ppl: 11.690987 +epoch: 2, batch: 34559, sum loss: 3917.537598, avg loss: 2.256646, ppl: 9.551003 +epoch: 2, batch: 34560, sum loss: 3912.769531, avg loss: 2.490624, ppl: 12.068801 +epoch: 2, batch: 34561, sum loss: 4265.479980, avg loss: 2.633013, ppl: 13.915628 +epoch: 2, batch: 34562, sum loss: 3763.020996, avg loss: 2.354832, ppl: 10.536355 +epoch: 2, batch: 34563, sum loss: 3725.964355, avg loss: 2.432092, ppl: 11.382667 +epoch: 2, batch: 34564, sum loss: 3555.732178, avg loss: 2.244780, ppl: 9.438342 +epoch: 2, batch: 34565, sum loss: 4115.336914, avg loss: 2.285029, ppl: 9.825968 +epoch: 2, batch: 34566, sum loss: 4274.046387, avg loss: 2.438133, ppl: 11.451638 +epoch: 2, batch: 34567, sum loss: 3614.740967, avg loss: 2.197411, ppl: 9.001677 +epoch: 2, batch: 34568, sum loss: 4368.904297, avg loss: 2.529765, ppl: 12.550558 +epoch: 2, batch: 34569, sum loss: 3628.633789, avg loss: 2.335028, ppl: 10.329751 +epoch: 2, batch: 34570, sum loss: 3515.631348, avg loss: 2.297798, ppl: 9.952248 +epoch: 2, batch: 34571, sum loss: 3905.133301, avg loss: 2.554044, ppl: 12.859004 +epoch: 2, batch: 34572, sum loss: 2735.682129, avg loss: 1.976649, ppl: 7.218513 +epoch: 2, batch: 34573, sum loss: 4673.143066, avg loss: 2.726454, ppl: 15.278621 +epoch: 2, batch: 34574, sum loss: 3996.388672, avg loss: 2.438309, ppl: 11.453655 +epoch: 2, batch: 34575, sum loss: 4273.556641, avg loss: 2.521272, ppl: 12.444421 +epoch: 2, batch: 34576, sum loss: 4337.925293, avg loss: 2.452191, ppl: 11.613763 +epoch: 2, batch: 34577, sum loss: 4017.570312, avg loss: 2.644878, ppl: 14.081733 +epoch: 2, batch: 34578, sum loss: 4171.435059, avg loss: 2.334323, ppl: 10.322469 +epoch: 2, batch: 34579, sum loss: 4351.589844, avg loss: 2.325810, ppl: 10.234964 +epoch: 2, batch: 34580, sum loss: 3626.528809, avg loss: 2.348788, ppl: 10.472872 +epoch: 2, batch: 34581, sum loss: 4598.189453, avg loss: 2.478808, ppl: 11.927044 +epoch: 2, batch: 34582, sum loss: 3761.594482, avg loss: 2.216614, ppl: 9.176212 +epoch: 2, batch: 34583, sum loss: 4994.742188, avg loss: 2.815525, ppl: 16.701946 +epoch: 2, batch: 34584, sum loss: 4286.483398, avg loss: 2.508182, ppl: 12.282583 +epoch: 2, batch: 34585, sum loss: 4041.129883, avg loss: 2.371555, ppl: 10.714041 +epoch: 2, batch: 34586, sum loss: 4419.957520, avg loss: 2.474780, ppl: 11.879097 +epoch: 2, batch: 34587, sum loss: 4108.547363, avg loss: 2.674836, ppl: 14.509965 +epoch: 2, batch: 34588, sum loss: 3805.991943, avg loss: 2.408856, ppl: 11.121227 +epoch: 2, batch: 34589, sum loss: 5163.845703, avg loss: 2.779249, ppl: 16.106928 +epoch: 2, batch: 34590, sum loss: 4017.211914, avg loss: 2.548992, ppl: 12.794206 +epoch: 2, batch: 34591, sum loss: 4336.459961, avg loss: 2.344033, ppl: 10.423183 +epoch: 2, batch: 34592, sum loss: 3474.228271, avg loss: 2.137987, ppl: 8.482343 +epoch: 2, batch: 34593, sum loss: 3654.322998, avg loss: 2.150867, ppl: 8.592304 +epoch: 2, batch: 34594, sum loss: 3838.642578, avg loss: 2.372461, ppl: 10.723755 +epoch: 2, batch: 34595, sum loss: 4614.311523, avg loss: 2.574951, ppl: 13.130670 +epoch: 2, batch: 34596, sum loss: 4564.276855, avg loss: 2.606669, ppl: 13.553823 +epoch: 2, batch: 34597, sum loss: 4281.715332, avg loss: 2.496627, ppl: 12.141470 +epoch: 2, batch: 34598, sum loss: 4185.760742, avg loss: 2.378273, ppl: 10.786259 +epoch: 2, batch: 34599, sum loss: 4248.875977, avg loss: 2.667217, ppl: 14.399832 +epoch: 2, batch: 34600, sum loss: 3945.548828, avg loss: 2.511489, ppl: 12.323265 +epoch: 2, batch: 34601, sum loss: 3904.974854, avg loss: 2.661878, ppl: 14.323157 +epoch: 2, batch: 34602, sum loss: 3775.669434, avg loss: 2.616541, ppl: 13.688299 +epoch: 2, batch: 34603, sum loss: 4363.518555, avg loss: 2.285762, ppl: 9.833172 +epoch: 2, batch: 34604, sum loss: 3427.890137, avg loss: 2.170925, ppl: 8.766386 +epoch: 2, batch: 34605, sum loss: 4009.655518, avg loss: 2.336629, ppl: 10.346302 +epoch: 2, batch: 34606, sum loss: 3142.223389, avg loss: 2.089244, ppl: 8.078806 +epoch: 2, batch: 34607, sum loss: 4513.490234, avg loss: 2.461009, ppl: 11.716625 +epoch: 2, batch: 34608, sum loss: 4629.365234, avg loss: 2.654452, ppl: 14.217197 +epoch: 2, batch: 34609, sum loss: 5058.917969, avg loss: 2.730123, ppl: 15.334774 +epoch: 2, batch: 34610, sum loss: 3807.625244, avg loss: 2.324557, ppl: 10.222154 +epoch: 2, batch: 34611, sum loss: 4185.832031, avg loss: 2.442142, ppl: 11.497648 +epoch: 2, batch: 34612, sum loss: 3330.599854, avg loss: 2.199868, ppl: 9.023822 +epoch: 2, batch: 34613, sum loss: 4426.097168, avg loss: 2.551065, ppl: 12.820747 +epoch: 2, batch: 34614, sum loss: 4732.829102, avg loss: 2.740492, ppl: 15.494601 +epoch: 2, batch: 34615, sum loss: 5335.703125, avg loss: 2.766046, ppl: 15.895658 +epoch: 2, batch: 34616, sum loss: 4550.678223, avg loss: 2.865667, ppl: 17.560757 +epoch: 2, batch: 34617, sum loss: 4218.262207, avg loss: 2.375148, ppl: 10.752600 +epoch: 2, batch: 34618, sum loss: 4078.795898, avg loss: 2.500795, ppl: 12.192179 +epoch: 2, batch: 34619, sum loss: 3831.510742, avg loss: 2.288836, ppl: 9.863447 +epoch: 2, batch: 34620, sum loss: 4024.289795, avg loss: 2.269763, ppl: 9.677107 +epoch: 2, batch: 34621, sum loss: 4510.602051, avg loss: 2.616359, ppl: 13.685800 +epoch: 2, batch: 34622, sum loss: 4327.815430, avg loss: 2.629292, ppl: 13.863957 +epoch: 2, batch: 34623, sum loss: 3686.092773, avg loss: 2.231291, ppl: 9.311880 +epoch: 2, batch: 34624, sum loss: 4549.122070, avg loss: 2.506403, ppl: 12.260755 +epoch: 2, batch: 34625, sum loss: 3687.420410, avg loss: 2.330860, ppl: 10.286783 +epoch: 2, batch: 34626, sum loss: 4440.371582, avg loss: 2.469617, ppl: 11.817922 +epoch: 2, batch: 34627, sum loss: 4627.591797, avg loss: 2.621865, ppl: 13.761365 +epoch: 2, batch: 34628, sum loss: 4228.805176, avg loss: 2.487533, ppl: 12.031553 +epoch: 2, batch: 34629, sum loss: 3775.873535, avg loss: 2.469505, ppl: 11.816600 +epoch: 2, batch: 34630, sum loss: 4113.935059, avg loss: 2.451689, ppl: 11.607942 +epoch: 2, batch: 34631, sum loss: 4655.203125, avg loss: 2.545218, ppl: 12.746003 +epoch: 2, batch: 34632, sum loss: 4017.795898, avg loss: 2.211225, ppl: 9.126890 +epoch: 2, batch: 34633, sum loss: 4910.797363, avg loss: 2.468978, ppl: 11.810369 +epoch: 2, batch: 34634, sum loss: 4020.803467, avg loss: 2.288448, ppl: 9.859625 +epoch: 2, batch: 34635, sum loss: 3593.933105, avg loss: 2.293512, ppl: 9.909680 +epoch: 2, batch: 34636, sum loss: 4014.640625, avg loss: 2.443482, ppl: 11.513056 +epoch: 2, batch: 34637, sum loss: 3966.549561, avg loss: 2.411276, ppl: 11.148178 +epoch: 2, batch: 34638, sum loss: 3763.728516, avg loss: 2.220489, ppl: 9.211832 +epoch: 2, batch: 34639, sum loss: 4697.930176, avg loss: 2.645231, ppl: 14.086699 +epoch: 2, batch: 34640, sum loss: 4298.304199, avg loss: 2.287549, ppl: 9.850761 +epoch: 2, batch: 34641, sum loss: 4042.109863, avg loss: 2.499759, ppl: 12.179555 +epoch: 2, batch: 34642, sum loss: 3949.682129, avg loss: 2.533472, ppl: 12.597162 +epoch: 2, batch: 34643, sum loss: 3792.827637, avg loss: 2.271154, ppl: 9.690581 +epoch: 2, batch: 34644, sum loss: 3690.025146, avg loss: 2.304825, ppl: 10.022428 +epoch: 2, batch: 34645, sum loss: 4275.864746, avg loss: 2.162805, ppl: 8.695491 +epoch: 2, batch: 34646, sum loss: 4028.848145, avg loss: 2.308795, ppl: 10.062297 +epoch: 2, batch: 34647, sum loss: 4367.405273, avg loss: 2.427685, ppl: 11.332617 +epoch: 2, batch: 34648, sum loss: 3853.542725, avg loss: 2.411479, ppl: 11.150435 +epoch: 2, batch: 34649, sum loss: 4285.389648, avg loss: 2.500227, ppl: 12.185266 +epoch: 2, batch: 34650, sum loss: 4873.598145, avg loss: 2.685178, ppl: 14.660811 +epoch: 2, batch: 34651, sum loss: 3764.813477, avg loss: 2.390358, ppl: 10.917398 +epoch: 2, batch: 34652, sum loss: 3880.746338, avg loss: 2.336391, ppl: 10.343843 +epoch: 2, batch: 34653, sum loss: 4518.295898, avg loss: 2.529841, ppl: 12.551510 +epoch: 2, batch: 34654, sum loss: 3929.111084, avg loss: 2.303113, ppl: 10.005282 +epoch: 2, batch: 34655, sum loss: 4404.995605, avg loss: 2.399235, ppl: 11.014747 +epoch: 2, batch: 34656, sum loss: 4468.141602, avg loss: 2.572333, ppl: 13.096337 +epoch: 2, batch: 34657, sum loss: 3686.888184, avg loss: 2.381711, ppl: 10.823403 +epoch: 2, batch: 34658, sum loss: 4149.152344, avg loss: 2.287295, ppl: 9.848258 +epoch: 2, batch: 34659, sum loss: 4937.379395, avg loss: 2.417913, ppl: 11.222419 +epoch: 2, batch: 34660, sum loss: 4034.153320, avg loss: 2.349536, ppl: 10.480705 +epoch: 2, batch: 34661, sum loss: 3140.199951, avg loss: 2.216090, ppl: 9.171405 +epoch: 2, batch: 34662, sum loss: 4658.315430, avg loss: 2.589391, ppl: 13.321663 +epoch: 2, batch: 34663, sum loss: 5396.862793, avg loss: 2.777593, ppl: 16.080267 +epoch: 2, batch: 34664, sum loss: 4544.422363, avg loss: 2.543046, ppl: 12.718346 +epoch: 2, batch: 34665, sum loss: 4009.369385, avg loss: 2.449218, ppl: 11.579286 +epoch: 2, batch: 34666, sum loss: 4165.459473, avg loss: 2.605040, ppl: 13.531771 +epoch: 2, batch: 34667, sum loss: 4076.513184, avg loss: 2.423611, ppl: 11.286538 +epoch: 2, batch: 34668, sum loss: 3832.948242, avg loss: 2.401597, ppl: 11.040790 +epoch: 2, batch: 34669, sum loss: 4777.890625, avg loss: 2.522646, ppl: 12.461523 +epoch: 2, batch: 34670, sum loss: 3354.560059, avg loss: 2.224509, ppl: 9.248942 +epoch: 2, batch: 34671, sum loss: 4198.420898, avg loss: 2.274334, ppl: 9.721444 +epoch: 2, batch: 34672, sum loss: 3786.861816, avg loss: 2.263516, ppl: 9.616839 +epoch: 2, batch: 34673, sum loss: 4103.712402, avg loss: 2.391441, ppl: 10.929230 +epoch: 2, batch: 34674, sum loss: 3876.272217, avg loss: 2.256270, ppl: 9.547413 +epoch: 2, batch: 34675, sum loss: 3685.468018, avg loss: 2.241769, ppl: 9.409962 +epoch: 2, batch: 34676, sum loss: 4262.215332, avg loss: 2.586296, ppl: 13.280487 +epoch: 2, batch: 34677, sum loss: 2956.382568, avg loss: 1.931014, ppl: 6.896501 +epoch: 2, batch: 34678, sum loss: 4560.188477, avg loss: 2.605822, ppl: 13.542351 +epoch: 2, batch: 34679, sum loss: 3773.988281, avg loss: 2.354328, ppl: 10.531052 +epoch: 2, batch: 34680, sum loss: 3413.285156, avg loss: 2.277041, ppl: 9.747798 +epoch: 2, batch: 34681, sum loss: 4462.094727, avg loss: 2.592734, ppl: 13.366260 +epoch: 2, batch: 34682, sum loss: 4468.074219, avg loss: 2.486408, ppl: 12.018023 +epoch: 2, batch: 34683, sum loss: 3718.044678, avg loss: 2.357669, ppl: 10.566297 +epoch: 2, batch: 34684, sum loss: 4505.058105, avg loss: 2.611628, ppl: 13.621209 +epoch: 2, batch: 34685, sum loss: 5144.554688, avg loss: 2.645015, ppl: 14.083663 +epoch: 2, batch: 34686, sum loss: 4438.615234, avg loss: 2.728098, ppl: 15.303750 +epoch: 2, batch: 34687, sum loss: 4267.836914, avg loss: 2.605517, ppl: 13.538221 +epoch: 2, batch: 34688, sum loss: 4083.286621, avg loss: 2.263463, ppl: 9.616328 +epoch: 2, batch: 34689, sum loss: 5600.356445, avg loss: 2.906257, ppl: 18.288212 +epoch: 2, batch: 34690, sum loss: 4607.427246, avg loss: 2.637337, ppl: 13.975932 +epoch: 2, batch: 34691, sum loss: 3727.718994, avg loss: 2.255123, ppl: 9.536469 +epoch: 2, batch: 34692, sum loss: 4239.594727, avg loss: 2.449217, ppl: 11.579278 +epoch: 2, batch: 34693, sum loss: 4911.733398, avg loss: 2.689887, ppl: 14.730012 +epoch: 2, batch: 34694, sum loss: 4433.711914, avg loss: 2.574745, ppl: 13.127975 +epoch: 2, batch: 34695, sum loss: 3322.305664, avg loss: 2.250885, ppl: 9.496132 +epoch: 2, batch: 34696, sum loss: 3805.390381, avg loss: 2.357739, ppl: 10.567030 +epoch: 2, batch: 34697, sum loss: 3642.480713, avg loss: 2.449550, ppl: 11.583132 +epoch: 2, batch: 34698, sum loss: 4374.301758, avg loss: 2.531425, ppl: 12.571404 +epoch: 2, batch: 34699, sum loss: 3598.381592, avg loss: 2.236409, ppl: 9.359657 +epoch: 2, batch: 34700, sum loss: 4474.321289, avg loss: 2.604378, ppl: 13.522811 +epoch: 2, batch: 34701, sum loss: 3488.662598, avg loss: 2.295173, ppl: 9.926152 +epoch: 2, batch: 34702, sum loss: 4120.317871, avg loss: 2.365280, ppl: 10.647018 +epoch: 2, batch: 34703, sum loss: 4044.419678, avg loss: 2.407393, ppl: 11.104970 +epoch: 2, batch: 34704, sum loss: 5004.881348, avg loss: 2.777404, ppl: 16.077227 +epoch: 2, batch: 34705, sum loss: 4084.509033, avg loss: 2.367831, ppl: 10.674220 +epoch: 2, batch: 34706, sum loss: 3412.382080, avg loss: 2.215832, ppl: 9.169039 +epoch: 2, batch: 34707, sum loss: 4969.557129, avg loss: 2.499777, ppl: 12.179778 +epoch: 2, batch: 34708, sum loss: 3868.281250, avg loss: 2.414657, ppl: 11.185937 +epoch: 2, batch: 34709, sum loss: 4251.361816, avg loss: 2.605001, ppl: 13.531241 +epoch: 2, batch: 34710, sum loss: 4813.066895, avg loss: 2.582117, ppl: 13.225110 +epoch: 2, batch: 34711, sum loss: 4435.418945, avg loss: 2.486222, ppl: 12.015789 +epoch: 2, batch: 34712, sum loss: 4560.613770, avg loss: 2.566468, ppl: 13.019757 +epoch: 2, batch: 34713, sum loss: 4068.089355, avg loss: 2.307481, ppl: 10.049082 +epoch: 2, batch: 34714, sum loss: 4784.189453, avg loss: 2.557023, ppl: 12.897362 +epoch: 2, batch: 34715, sum loss: 4895.862305, avg loss: 2.605568, ppl: 13.538916 +epoch: 2, batch: 34716, sum loss: 5085.734863, avg loss: 2.855550, ppl: 17.384001 +epoch: 2, batch: 34717, sum loss: 4277.862793, avg loss: 2.619634, ppl: 13.730700 +epoch: 2, batch: 34718, sum loss: 3903.526123, avg loss: 2.529829, ppl: 12.551360 +epoch: 2, batch: 34719, sum loss: 4513.167969, avg loss: 2.815451, ppl: 16.700705 +epoch: 2, batch: 34720, sum loss: 4401.537598, avg loss: 2.463088, ppl: 11.741010 +epoch: 2, batch: 34721, sum loss: 4606.844238, avg loss: 2.524298, ppl: 12.482132 +epoch: 2, batch: 34722, sum loss: 3779.911865, avg loss: 2.437080, ppl: 11.439592 +epoch: 2, batch: 34723, sum loss: 4105.341797, avg loss: 2.498686, ppl: 12.166500 +epoch: 2, batch: 34724, sum loss: 3199.909668, avg loss: 2.079214, ppl: 7.998177 +epoch: 2, batch: 34725, sum loss: 4130.254395, avg loss: 2.471726, ppl: 11.842872 +epoch: 2, batch: 34726, sum loss: 4417.075684, avg loss: 2.556178, ppl: 12.886472 +epoch: 2, batch: 34727, sum loss: 4187.103516, avg loss: 2.490841, ppl: 12.071423 +epoch: 2, batch: 34728, sum loss: 4660.613770, avg loss: 2.539844, ppl: 12.677693 +epoch: 2, batch: 34729, sum loss: 3821.312500, avg loss: 2.160154, ppl: 8.672474 +epoch: 2, batch: 34730, sum loss: 3883.702881, avg loss: 2.336765, ppl: 10.347706 +epoch: 2, batch: 34731, sum loss: 4685.943359, avg loss: 2.620774, ppl: 13.746356 +epoch: 2, batch: 34732, sum loss: 4107.570312, avg loss: 2.580132, ppl: 13.198880 +epoch: 2, batch: 34733, sum loss: 4480.500977, avg loss: 2.416667, ppl: 11.208439 +epoch: 2, batch: 34734, sum loss: 4195.052734, avg loss: 2.589539, ppl: 13.323626 +epoch: 2, batch: 34735, sum loss: 4161.605469, avg loss: 2.488999, ppl: 12.049205 +epoch: 2, batch: 34736, sum loss: 4855.234375, avg loss: 2.592223, ppl: 13.359442 +epoch: 2, batch: 34737, sum loss: 4608.037109, avg loss: 2.578644, ppl: 13.179258 +epoch: 2, batch: 34738, sum loss: 3786.176025, avg loss: 2.311463, ppl: 10.089170 +epoch: 2, batch: 34739, sum loss: 4123.206543, avg loss: 2.524927, ppl: 12.489988 +epoch: 2, batch: 34740, sum loss: 4427.837402, avg loss: 2.581829, ppl: 13.221302 +epoch: 2, batch: 34741, sum loss: 3724.139893, avg loss: 2.298852, ppl: 9.962738 +epoch: 2, batch: 34742, sum loss: 4639.692383, avg loss: 2.718039, ppl: 15.150579 +epoch: 2, batch: 34743, sum loss: 4449.722656, avg loss: 2.593079, ppl: 13.370872 +epoch: 2, batch: 34744, sum loss: 4177.333496, avg loss: 2.341555, ppl: 10.397390 +epoch: 2, batch: 34745, sum loss: 4802.554688, avg loss: 2.501331, ppl: 12.198715 +epoch: 2, batch: 34746, sum loss: 4095.109375, avg loss: 2.810645, ppl: 16.620628 +epoch: 2, batch: 34747, sum loss: 4336.393066, avg loss: 2.587347, ppl: 13.294452 +epoch: 2, batch: 34748, sum loss: 4014.420410, avg loss: 2.369788, ppl: 10.695122 +epoch: 2, batch: 34749, sum loss: 4382.838867, avg loss: 2.637087, ppl: 13.972447 +epoch: 2, batch: 34750, sum loss: 3123.909424, avg loss: 2.263703, ppl: 9.618638 +epoch: 2, batch: 34751, sum loss: 4040.190430, avg loss: 2.518822, ppl: 12.413964 +epoch: 2, batch: 34752, sum loss: 4401.802246, avg loss: 2.477098, ppl: 11.906655 +epoch: 2, batch: 34753, sum loss: 3727.619629, avg loss: 2.345890, ppl: 10.442565 +epoch: 2, batch: 34754, sum loss: 4259.909180, avg loss: 2.549317, ppl: 12.798364 +epoch: 2, batch: 34755, sum loss: 3572.580811, avg loss: 2.396097, ppl: 10.980239 +epoch: 2, batch: 34756, sum loss: 4253.738281, avg loss: 2.542581, ppl: 12.712443 +epoch: 2, batch: 34757, sum loss: 3897.564941, avg loss: 2.402938, ppl: 11.055606 +epoch: 2, batch: 34758, sum loss: 4192.035645, avg loss: 2.465904, ppl: 11.774116 +epoch: 2, batch: 34759, sum loss: 4535.446289, avg loss: 2.377068, ppl: 10.773273 +epoch: 2, batch: 34760, sum loss: 4684.633789, avg loss: 2.379194, ppl: 10.796203 +epoch: 2, batch: 34761, sum loss: 3670.380371, avg loss: 2.246255, ppl: 9.452270 +epoch: 2, batch: 34762, sum loss: 3371.377197, avg loss: 2.443027, ppl: 11.507822 +epoch: 2, batch: 34763, sum loss: 3831.255859, avg loss: 2.314958, ppl: 10.124501 +epoch: 2, batch: 34764, sum loss: 3837.244141, avg loss: 2.434799, ppl: 11.413529 +epoch: 2, batch: 34765, sum loss: 4276.061035, avg loss: 2.302671, ppl: 10.000863 +epoch: 2, batch: 34766, sum loss: 4575.265137, avg loss: 2.467780, ppl: 11.796234 +epoch: 2, batch: 34767, sum loss: 3940.884521, avg loss: 2.434147, ppl: 11.406089 +epoch: 2, batch: 34768, sum loss: 4156.693848, avg loss: 2.462497, ppl: 11.734069 +epoch: 2, batch: 34769, sum loss: 4233.489258, avg loss: 2.323540, ppl: 10.211758 +epoch: 2, batch: 34770, sum loss: 6037.536133, avg loss: 2.868188, ppl: 17.605091 +epoch: 2, batch: 34771, sum loss: 3897.328369, avg loss: 2.534024, ppl: 12.604117 +epoch: 2, batch: 34772, sum loss: 4011.996582, avg loss: 2.528038, ppl: 12.528904 +epoch: 2, batch: 34773, sum loss: 3792.092285, avg loss: 2.246500, ppl: 9.454589 +epoch: 2, batch: 34774, sum loss: 4425.094238, avg loss: 2.581735, ppl: 13.220060 +epoch: 2, batch: 34775, sum loss: 4361.623535, avg loss: 2.509565, ppl: 12.299577 +epoch: 2, batch: 34776, sum loss: 4369.011230, avg loss: 2.540123, ppl: 12.681228 +epoch: 2, batch: 34777, sum loss: 4691.110840, avg loss: 2.364471, ppl: 10.638412 +epoch: 2, batch: 34778, sum loss: 3894.259766, avg loss: 2.285364, ppl: 9.829260 +epoch: 2, batch: 34779, sum loss: 4811.874023, avg loss: 2.609476, ppl: 13.591928 +epoch: 2, batch: 34780, sum loss: 3991.074219, avg loss: 2.398482, ppl: 11.006456 +epoch: 2, batch: 34781, sum loss: 3512.434570, avg loss: 2.397566, ppl: 10.996382 +epoch: 2, batch: 34782, sum loss: 3758.004395, avg loss: 2.549528, ppl: 12.801062 +epoch: 2, batch: 34783, sum loss: 3397.377930, avg loss: 2.395894, ppl: 10.978011 +epoch: 2, batch: 34784, sum loss: 3986.043457, avg loss: 2.481970, ppl: 11.964808 +epoch: 2, batch: 34785, sum loss: 3295.522217, avg loss: 2.582698, ppl: 13.232790 +epoch: 2, batch: 34786, sum loss: 5154.444336, avg loss: 2.617798, ppl: 13.705512 +epoch: 2, batch: 34787, sum loss: 4456.327148, avg loss: 2.327064, ppl: 10.247808 +epoch: 2, batch: 34788, sum loss: 4711.135254, avg loss: 2.661658, ppl: 14.320016 +epoch: 2, batch: 34789, sum loss: 5107.291504, avg loss: 2.617781, ppl: 13.705283 +epoch: 2, batch: 34790, sum loss: 4049.693359, avg loss: 2.268736, ppl: 9.667172 +epoch: 2, batch: 34791, sum loss: 4016.168945, avg loss: 2.447391, ppl: 11.558152 +epoch: 2, batch: 34792, sum loss: 3924.067871, avg loss: 2.460231, ppl: 11.707511 +epoch: 2, batch: 34793, sum loss: 3664.998779, avg loss: 2.242961, ppl: 9.421190 +epoch: 2, batch: 34794, sum loss: 3343.341064, avg loss: 2.145919, ppl: 8.549891 +epoch: 2, batch: 34795, sum loss: 4723.912598, avg loss: 2.448892, ppl: 11.575513 +epoch: 2, batch: 34796, sum loss: 5011.850098, avg loss: 2.779728, ppl: 16.114643 +epoch: 2, batch: 34797, sum loss: 3873.722168, avg loss: 2.300310, ppl: 9.977276 +epoch: 2, batch: 34798, sum loss: 4335.785645, avg loss: 2.378380, ppl: 10.787409 +epoch: 2, batch: 34799, sum loss: 4435.423828, avg loss: 2.563829, ppl: 12.985439 +epoch: 2, batch: 34800, sum loss: 4627.829102, avg loss: 2.599904, ppl: 13.462446 +epoch: 2, batch: 34801, sum loss: 4299.687012, avg loss: 2.438847, ppl: 11.459818 +epoch: 2, batch: 34802, sum loss: 4927.429199, avg loss: 2.429699, ppl: 11.355463 +epoch: 2, batch: 34803, sum loss: 3831.514893, avg loss: 2.170830, ppl: 8.765556 +epoch: 2, batch: 34804, sum loss: 4560.577637, avg loss: 2.508569, ppl: 12.287329 +epoch: 2, batch: 34805, sum loss: 3909.141113, avg loss: 2.483571, ppl: 11.983984 +epoch: 2, batch: 34806, sum loss: 4703.625977, avg loss: 2.603003, ppl: 13.504225 +epoch: 2, batch: 34807, sum loss: 4036.215820, avg loss: 2.419794, ppl: 11.243539 +epoch: 2, batch: 34808, sum loss: 3888.548828, avg loss: 2.368178, ppl: 10.677923 +epoch: 2, batch: 34809, sum loss: 4873.866699, avg loss: 2.584235, ppl: 13.253146 +epoch: 2, batch: 34810, sum loss: 4606.708984, avg loss: 2.443877, ppl: 11.517613 +epoch: 2, batch: 34811, sum loss: 4089.355713, avg loss: 2.562253, ppl: 12.964994 +epoch: 2, batch: 34812, sum loss: 5046.919922, avg loss: 2.821084, ppl: 16.795052 +epoch: 2, batch: 34813, sum loss: 3602.043457, avg loss: 2.160794, ppl: 8.678023 +epoch: 2, batch: 34814, sum loss: 3674.888184, avg loss: 2.459764, ppl: 11.702055 +epoch: 2, batch: 34815, sum loss: 3813.942139, avg loss: 2.494403, ppl: 12.114500 +epoch: 2, batch: 34816, sum loss: 4882.805176, avg loss: 2.760206, ppl: 15.803105 +epoch: 2, batch: 34817, sum loss: 4076.821045, avg loss: 2.703462, ppl: 14.931336 +epoch: 2, batch: 34818, sum loss: 3907.914795, avg loss: 2.457808, ppl: 11.679183 +epoch: 2, batch: 34819, sum loss: 5331.115234, avg loss: 2.752254, ppl: 15.677923 +epoch: 2, batch: 34820, sum loss: 4050.295654, avg loss: 2.475731, ppl: 11.890394 +epoch: 2, batch: 34821, sum loss: 4470.973145, avg loss: 2.547563, ppl: 12.775932 +epoch: 2, batch: 34822, sum loss: 3600.125488, avg loss: 2.660847, ppl: 14.308399 +epoch: 2, batch: 34823, sum loss: 3769.985840, avg loss: 2.190579, ppl: 8.940386 +epoch: 2, batch: 34824, sum loss: 3811.637207, avg loss: 2.270183, ppl: 9.681170 +epoch: 2, batch: 34825, sum loss: 4239.644531, avg loss: 2.492442, ppl: 12.090770 +epoch: 2, batch: 34826, sum loss: 4264.397949, avg loss: 2.315091, ppl: 10.125848 +epoch: 2, batch: 34827, sum loss: 3738.484863, avg loss: 2.402625, ppl: 11.052154 +epoch: 2, batch: 34828, sum loss: 4243.069824, avg loss: 2.562240, ppl: 12.964828 +epoch: 2, batch: 34829, sum loss: 3599.255371, avg loss: 2.265107, ppl: 9.632156 +epoch: 2, batch: 34830, sum loss: 4328.797852, avg loss: 2.344961, ppl: 10.432865 +epoch: 2, batch: 34831, sum loss: 4189.448242, avg loss: 2.568638, ppl: 13.048038 +epoch: 2, batch: 34832, sum loss: 4532.098145, avg loss: 2.480623, ppl: 11.948707 +epoch: 2, batch: 34833, sum loss: 3629.257324, avg loss: 2.447240, ppl: 11.556411 +epoch: 2, batch: 34834, sum loss: 3656.874023, avg loss: 2.210927, ppl: 9.124175 +epoch: 2, batch: 34835, sum loss: 3487.728027, avg loss: 2.387220, ppl: 10.883196 +epoch: 2, batch: 34836, sum loss: 5015.863281, avg loss: 2.656707, ppl: 14.249293 +epoch: 2, batch: 34837, sum loss: 4203.161621, avg loss: 2.599358, ppl: 13.455095 +epoch: 2, batch: 34838, sum loss: 4453.184570, avg loss: 2.649128, ppl: 14.141705 +epoch: 2, batch: 34839, sum loss: 4535.369141, avg loss: 2.582784, ppl: 13.233935 +epoch: 2, batch: 34840, sum loss: 4885.975586, avg loss: 2.645358, ppl: 14.088486 +epoch: 2, batch: 34841, sum loss: 4514.760742, avg loss: 2.582815, ppl: 13.234343 +epoch: 2, batch: 34842, sum loss: 5353.742188, avg loss: 2.844709, ppl: 17.196552 +epoch: 2, batch: 34843, sum loss: 3859.227539, avg loss: 2.358941, ppl: 10.579742 +epoch: 2, batch: 34844, sum loss: 4383.003418, avg loss: 2.621414, ppl: 13.755156 +epoch: 2, batch: 34845, sum loss: 3693.273926, avg loss: 2.189255, ppl: 8.928563 +epoch: 2, batch: 34846, sum loss: 4148.604492, avg loss: 2.385627, ppl: 10.865869 +epoch: 2, batch: 34847, sum loss: 4422.333008, avg loss: 2.495673, ppl: 12.129899 +epoch: 2, batch: 34848, sum loss: 4043.904785, avg loss: 2.422951, ppl: 11.279092 +epoch: 2, batch: 34849, sum loss: 4143.883789, avg loss: 2.376080, ppl: 10.762631 +epoch: 2, batch: 34850, sum loss: 4363.864258, avg loss: 2.337367, ppl: 10.353939 +epoch: 2, batch: 34851, sum loss: 3794.902344, avg loss: 2.237560, ppl: 9.370442 +epoch: 2, batch: 34852, sum loss: 5211.946777, avg loss: 2.704695, ppl: 14.949753 +epoch: 2, batch: 34853, sum loss: 3891.077637, avg loss: 2.240114, ppl: 9.394400 +epoch: 2, batch: 34854, sum loss: 4782.416992, avg loss: 2.539786, ppl: 12.676956 +epoch: 2, batch: 34855, sum loss: 4651.469727, avg loss: 2.458493, ppl: 11.687191 +epoch: 2, batch: 34856, sum loss: 4029.886475, avg loss: 2.358038, ppl: 10.570189 +epoch: 2, batch: 34857, sum loss: 3469.802490, avg loss: 2.342878, ppl: 10.411158 +epoch: 2, batch: 34858, sum loss: 4617.514648, avg loss: 2.458740, ppl: 11.690079 +epoch: 2, batch: 34859, sum loss: 4163.830078, avg loss: 2.583021, ppl: 13.237069 +epoch: 2, batch: 34860, sum loss: 4169.884277, avg loss: 2.662761, ppl: 14.335818 +epoch: 2, batch: 34861, sum loss: 4600.646973, avg loss: 2.486836, ppl: 12.023177 +epoch: 2, batch: 34862, sum loss: 3710.670410, avg loss: 2.465562, ppl: 11.770094 +epoch: 2, batch: 34863, sum loss: 4899.114258, avg loss: 2.610077, ppl: 13.600097 +epoch: 2, batch: 34864, sum loss: 4414.732422, avg loss: 2.460832, ppl: 11.714553 +epoch: 2, batch: 34865, sum loss: 4045.975586, avg loss: 2.649624, ppl: 14.148716 +epoch: 2, batch: 34866, sum loss: 3486.564453, avg loss: 2.262534, ppl: 9.607400 +epoch: 2, batch: 34867, sum loss: 3815.971436, avg loss: 2.236794, ppl: 9.363269 +epoch: 2, batch: 34868, sum loss: 3343.932129, avg loss: 2.188437, ppl: 8.921260 +epoch: 2, batch: 34869, sum loss: 5483.379883, avg loss: 2.761017, ppl: 15.815921 +epoch: 2, batch: 34870, sum loss: 4703.216309, avg loss: 2.649699, ppl: 14.149782 +epoch: 2, batch: 34871, sum loss: 4854.748535, avg loss: 2.624188, ppl: 13.793375 +epoch: 2, batch: 34872, sum loss: 3334.754883, avg loss: 2.290354, ppl: 9.878432 +epoch: 2, batch: 34873, sum loss: 4277.832520, avg loss: 2.326173, ppl: 10.238687 +epoch: 2, batch: 34874, sum loss: 4088.852539, avg loss: 2.442564, ppl: 11.502501 +epoch: 2, batch: 34875, sum loss: 3596.780273, avg loss: 2.340130, ppl: 10.382589 +epoch: 2, batch: 34876, sum loss: 3980.491211, avg loss: 2.240006, ppl: 9.393390 +epoch: 2, batch: 34877, sum loss: 3523.679443, avg loss: 2.138155, ppl: 8.483770 +epoch: 2, batch: 34878, sum loss: 4378.193848, avg loss: 2.489024, ppl: 12.049515 +epoch: 2, batch: 34879, sum loss: 3553.807617, avg loss: 2.265014, ppl: 9.631264 +epoch: 2, batch: 34880, sum loss: 4438.562500, avg loss: 2.467239, ppl: 11.789846 +epoch: 2, batch: 34881, sum loss: 4260.726074, avg loss: 2.395012, ppl: 10.968328 +epoch: 2, batch: 34882, sum loss: 3870.175781, avg loss: 2.506591, ppl: 12.263049 +epoch: 2, batch: 34883, sum loss: 4145.635742, avg loss: 2.498876, ppl: 12.168813 +epoch: 2, batch: 34884, sum loss: 4613.712891, avg loss: 2.502013, ppl: 12.207047 +epoch: 2, batch: 34885, sum loss: 3681.512207, avg loss: 2.303825, ppl: 10.012405 +epoch: 2, batch: 34886, sum loss: 5044.149902, avg loss: 2.570922, ppl: 13.077882 +epoch: 2, batch: 34887, sum loss: 4349.675293, avg loss: 2.482692, ppl: 11.973448 +epoch: 2, batch: 34888, sum loss: 3342.467041, avg loss: 2.343946, ppl: 10.422281 +epoch: 2, batch: 34889, sum loss: 3909.315674, avg loss: 2.320069, ppl: 10.176373 +epoch: 2, batch: 34890, sum loss: 3871.206055, avg loss: 2.414976, ppl: 11.189498 +epoch: 2, batch: 34891, sum loss: 4487.590332, avg loss: 2.504236, ppl: 12.234205 +epoch: 2, batch: 34892, sum loss: 3986.114014, avg loss: 2.268705, ppl: 9.666871 +epoch: 2, batch: 34893, sum loss: 3356.123047, avg loss: 2.350226, ppl: 10.487942 +epoch: 2, batch: 34894, sum loss: 4881.722656, avg loss: 2.765849, ppl: 15.892521 +epoch: 2, batch: 34895, sum loss: 3539.931152, avg loss: 2.294188, ppl: 9.916376 +epoch: 2, batch: 34896, sum loss: 3689.537354, avg loss: 2.406743, ppl: 11.097760 +epoch: 2, batch: 34897, sum loss: 3309.222900, avg loss: 2.323893, ppl: 10.215362 +epoch: 2, batch: 34898, sum loss: 3925.575195, avg loss: 2.313244, ppl: 10.107161 +epoch: 2, batch: 34899, sum loss: 5555.229004, avg loss: 2.630317, ppl: 13.878165 +epoch: 2, batch: 34900, sum loss: 4261.200684, avg loss: 2.470261, ppl: 11.825537 +epoch: 2, batch: 34901, sum loss: 3490.944580, avg loss: 2.347643, ppl: 10.460881 +epoch: 2, batch: 34902, sum loss: 3035.419922, avg loss: 2.146690, ppl: 8.556491 +epoch: 2, batch: 34903, sum loss: 4548.753906, avg loss: 2.444252, ppl: 11.521934 +epoch: 2, batch: 34904, sum loss: 4299.774902, avg loss: 2.526307, ppl: 12.507233 +epoch: 2, batch: 34905, sum loss: 4107.374512, avg loss: 2.611172, ppl: 13.615005 +epoch: 2, batch: 34906, sum loss: 4718.277832, avg loss: 2.527198, ppl: 12.518375 +epoch: 2, batch: 34907, sum loss: 3379.743896, avg loss: 2.350309, ppl: 10.488807 +epoch: 2, batch: 34908, sum loss: 4277.378906, avg loss: 2.620943, ppl: 13.748683 +epoch: 2, batch: 34909, sum loss: 4434.726562, avg loss: 2.550159, ppl: 12.809143 +epoch: 2, batch: 34910, sum loss: 4559.721191, avg loss: 2.516402, ppl: 12.383965 +epoch: 2, batch: 34911, sum loss: 3409.818604, avg loss: 2.227184, ppl: 9.273715 +epoch: 2, batch: 34912, sum loss: 3667.464111, avg loss: 2.302237, ppl: 9.996522 +epoch: 2, batch: 34913, sum loss: 3668.555664, avg loss: 2.480430, ppl: 11.946396 +epoch: 2, batch: 34914, sum loss: 3639.651123, avg loss: 2.259250, ppl: 9.575902 +epoch: 2, batch: 34915, sum loss: 4249.088379, avg loss: 2.541321, ppl: 12.696432 +epoch: 2, batch: 34916, sum loss: 4592.328613, avg loss: 2.514966, ppl: 12.366195 +epoch: 2, batch: 34917, sum loss: 3781.962402, avg loss: 2.564042, ppl: 12.988214 +epoch: 2, batch: 34918, sum loss: 5130.340820, avg loss: 2.733266, ppl: 15.383052 +epoch: 2, batch: 34919, sum loss: 3196.626465, avg loss: 1.974445, ppl: 7.202622 +epoch: 2, batch: 34920, sum loss: 4477.162109, avg loss: 2.559841, ppl: 12.933763 +epoch: 2, batch: 34921, sum loss: 4326.018555, avg loss: 2.293753, ppl: 9.912072 +epoch: 2, batch: 34922, sum loss: 4244.019043, avg loss: 2.573693, ppl: 13.114160 +epoch: 2, batch: 34923, sum loss: 5505.467773, avg loss: 2.899141, ppl: 18.158533 +epoch: 2, batch: 34924, sum loss: 4434.071777, avg loss: 2.666309, ppl: 14.386767 +epoch: 2, batch: 34925, sum loss: 4078.010254, avg loss: 2.336969, ppl: 10.349815 +epoch: 2, batch: 34926, sum loss: 3558.501465, avg loss: 2.315226, ppl: 10.127208 +epoch: 2, batch: 34927, sum loss: 3442.521484, avg loss: 2.123702, ppl: 8.362039 +epoch: 2, batch: 34928, sum loss: 4777.699219, avg loss: 2.563143, ppl: 12.976542 +epoch: 2, batch: 34929, sum loss: 4021.219727, avg loss: 2.474597, ppl: 11.876917 +epoch: 2, batch: 34930, sum loss: 3884.775879, avg loss: 2.219872, ppl: 9.206152 +epoch: 2, batch: 34931, sum loss: 3818.387695, avg loss: 2.260739, ppl: 9.590172 +epoch: 2, batch: 34932, sum loss: 3758.342773, avg loss: 2.492270, ppl: 12.088683 +epoch: 2, batch: 34933, sum loss: 4137.583008, avg loss: 2.421055, ppl: 11.257730 +epoch: 2, batch: 34934, sum loss: 3718.260742, avg loss: 2.503879, ppl: 12.229842 +epoch: 2, batch: 34935, sum loss: 4621.409668, avg loss: 2.574602, ppl: 13.126087 +epoch: 2, batch: 34936, sum loss: 3918.102539, avg loss: 2.238916, ppl: 9.383151 +epoch: 2, batch: 34937, sum loss: 4447.994141, avg loss: 2.518683, ppl: 12.412238 +epoch: 2, batch: 34938, sum loss: 4748.078125, avg loss: 2.688606, ppl: 14.711151 +epoch: 2, batch: 34939, sum loss: 4096.476562, avg loss: 2.369275, ppl: 10.689640 +epoch: 2, batch: 34940, sum loss: 4855.774414, avg loss: 2.633283, ppl: 13.919397 +epoch: 2, batch: 34941, sum loss: 3935.139160, avg loss: 2.320247, ppl: 10.178190 +epoch: 2, batch: 34942, sum loss: 3710.246582, avg loss: 2.346772, ppl: 10.451777 +epoch: 2, batch: 34943, sum loss: 4108.143066, avg loss: 2.443868, ppl: 11.517509 +epoch: 2, batch: 34944, sum loss: 3936.701172, avg loss: 2.346067, ppl: 10.444415 +epoch: 2, batch: 34945, sum loss: 4425.702148, avg loss: 2.373031, ppl: 10.729861 +epoch: 2, batch: 34946, sum loss: 3715.466797, avg loss: 2.508755, ppl: 12.289625 +epoch: 2, batch: 34947, sum loss: 4361.155273, avg loss: 2.535555, ppl: 12.623440 +epoch: 2, batch: 34948, sum loss: 3600.502441, avg loss: 2.254541, ppl: 9.530921 +epoch: 2, batch: 34949, sum loss: 3590.262695, avg loss: 2.349648, ppl: 10.481884 +epoch: 2, batch: 34950, sum loss: 5184.876953, avg loss: 2.573140, ppl: 13.106917 +epoch: 2, batch: 34951, sum loss: 3662.148682, avg loss: 2.472754, ppl: 11.855051 +epoch: 2, batch: 34952, sum loss: 4509.918945, avg loss: 2.546538, ppl: 12.762844 +epoch: 2, batch: 34953, sum loss: 4318.974609, avg loss: 2.525716, ppl: 12.499843 +epoch: 2, batch: 34954, sum loss: 4236.541992, avg loss: 2.607103, ppl: 13.559707 +epoch: 2, batch: 34955, sum loss: 4873.204102, avg loss: 2.550081, ppl: 12.808135 +epoch: 2, batch: 34956, sum loss: 3309.800293, avg loss: 2.295285, ppl: 9.927260 +epoch: 2, batch: 34957, sum loss: 3221.777832, avg loss: 2.208210, ppl: 9.099412 +epoch: 2, batch: 34958, sum loss: 3956.052734, avg loss: 2.359006, ppl: 10.580426 +epoch: 2, batch: 34959, sum loss: 4942.733398, avg loss: 2.654529, ppl: 14.218292 +epoch: 2, batch: 34960, sum loss: 4445.164062, avg loss: 2.475036, ppl: 11.882131 +epoch: 2, batch: 34961, sum loss: 3981.572510, avg loss: 2.371395, ppl: 10.712327 +epoch: 2, batch: 34962, sum loss: 3934.592529, avg loss: 2.217921, ppl: 9.188211 +epoch: 2, batch: 34963, sum loss: 3195.278320, avg loss: 2.153152, ppl: 8.611964 +epoch: 2, batch: 34964, sum loss: 3368.155029, avg loss: 2.352064, ppl: 10.507231 +epoch: 2, batch: 34965, sum loss: 3819.230469, avg loss: 2.330220, ppl: 10.280200 +epoch: 2, batch: 34966, sum loss: 3267.748779, avg loss: 2.010922, ppl: 7.470203 +epoch: 2, batch: 34967, sum loss: 4885.334961, avg loss: 2.594442, ppl: 13.389119 +epoch: 2, batch: 34968, sum loss: 5184.185547, avg loss: 2.536294, ppl: 12.632770 +epoch: 2, batch: 34969, sum loss: 4002.449219, avg loss: 2.484450, ppl: 11.994526 +epoch: 2, batch: 34970, sum loss: 4992.101074, avg loss: 2.955655, ppl: 19.214302 +epoch: 2, batch: 34971, sum loss: 4674.921387, avg loss: 2.407272, ppl: 11.103625 +epoch: 2, batch: 34972, sum loss: 4462.973145, avg loss: 2.310028, ppl: 10.074702 +epoch: 2, batch: 34973, sum loss: 4911.238281, avg loss: 2.686673, ppl: 14.682748 +epoch: 2, batch: 34974, sum loss: 5322.838867, avg loss: 2.729661, ppl: 15.327686 +epoch: 2, batch: 34975, sum loss: 4397.801758, avg loss: 2.381052, ppl: 10.816271 +epoch: 2, batch: 34976, sum loss: 4509.575684, avg loss: 2.513699, ppl: 12.350528 +epoch: 2, batch: 34977, sum loss: 4411.515137, avg loss: 2.369235, ppl: 10.689210 +epoch: 2, batch: 34978, sum loss: 3978.466309, avg loss: 2.247721, ppl: 9.466140 +epoch: 2, batch: 34979, sum loss: 4200.872070, avg loss: 2.532171, ppl: 12.580792 +epoch: 2, batch: 34980, sum loss: 5041.303711, avg loss: 2.613429, ppl: 13.645757 +epoch: 2, batch: 34981, sum loss: 4090.547119, avg loss: 2.308435, ppl: 10.058673 +epoch: 2, batch: 34982, sum loss: 3819.861572, avg loss: 2.353581, ppl: 10.523183 +epoch: 2, batch: 34983, sum loss: 3660.892090, avg loss: 2.337734, ppl: 10.357744 +epoch: 2, batch: 34984, sum loss: 3584.190918, avg loss: 2.350289, ppl: 10.488602 +epoch: 2, batch: 34985, sum loss: 3258.937988, avg loss: 2.260013, ppl: 9.583210 +epoch: 2, batch: 34986, sum loss: 5066.450195, avg loss: 2.649817, ppl: 14.151448 +epoch: 2, batch: 34987, sum loss: 4160.934570, avg loss: 2.537155, ppl: 12.643650 +epoch: 2, batch: 34988, sum loss: 3760.477051, avg loss: 2.600607, ppl: 13.471909 +epoch: 2, batch: 34989, sum loss: 4280.617188, avg loss: 2.493079, ppl: 12.098475 +epoch: 2, batch: 34990, sum loss: 3871.409668, avg loss: 2.431790, ppl: 11.379231 +epoch: 2, batch: 34991, sum loss: 4371.438965, avg loss: 2.577499, ppl: 13.164179 +epoch: 2, batch: 34992, sum loss: 3665.549561, avg loss: 2.468384, ppl: 11.803352 +epoch: 2, batch: 34993, sum loss: 4258.449219, avg loss: 2.321946, ppl: 10.195497 +epoch: 2, batch: 34994, sum loss: 3655.371582, avg loss: 2.192784, ppl: 8.960126 +epoch: 2, batch: 34995, sum loss: 3867.258301, avg loss: 2.310190, ppl: 10.076341 +epoch: 2, batch: 34996, sum loss: 4403.473633, avg loss: 2.287519, ppl: 9.850466 +epoch: 2, batch: 34997, sum loss: 4039.868408, avg loss: 2.406116, ppl: 11.090798 +epoch: 2, batch: 34998, sum loss: 4630.653320, avg loss: 2.589851, ppl: 13.327785 +epoch: 2, batch: 34999, sum loss: 5371.832031, avg loss: 2.708942, ppl: 15.013382 +epoch: 2, batch: 35000, sum loss: 4603.974121, avg loss: 2.758523, ppl: 15.776520 +epoch: 2, batch: 35001, sum loss: 4410.318848, avg loss: 2.666456, ppl: 14.388891 +epoch: 2, batch: 35002, sum loss: 2659.536865, avg loss: 2.027086, ppl: 7.591931 +epoch: 2, batch: 35003, sum loss: 3126.267334, avg loss: 2.109492, ppl: 8.244055 +epoch: 2, batch: 35004, sum loss: 4367.816406, avg loss: 2.524749, ppl: 12.487764 +epoch: 2, batch: 35005, sum loss: 4642.381348, avg loss: 2.633228, ppl: 13.918631 +epoch: 2, batch: 35006, sum loss: 4926.988281, avg loss: 2.849617, ppl: 17.281166 +epoch: 2, batch: 35007, sum loss: 3650.595459, avg loss: 2.505556, ppl: 12.250370 +epoch: 2, batch: 35008, sum loss: 3263.819336, avg loss: 2.036069, ppl: 7.660440 +epoch: 2, batch: 35009, sum loss: 4323.063477, avg loss: 2.487378, ppl: 12.029697 +epoch: 2, batch: 35010, sum loss: 3555.038574, avg loss: 2.338841, ppl: 10.369214 +epoch: 2, batch: 35011, sum loss: 4074.910156, avg loss: 2.444457, ppl: 11.524291 +epoch: 2, batch: 35012, sum loss: 3242.530518, avg loss: 2.041896, ppl: 7.705204 +epoch: 2, batch: 35013, sum loss: 4331.559570, avg loss: 2.401086, ppl: 11.035158 +epoch: 2, batch: 35014, sum loss: 4777.983398, avg loss: 2.812233, ppl: 16.647049 +epoch: 2, batch: 35015, sum loss: 4896.596680, avg loss: 2.403827, ppl: 11.065448 +epoch: 2, batch: 35016, sum loss: 4487.975586, avg loss: 2.574857, ppl: 13.129439 +epoch: 2, batch: 35017, sum loss: 5041.507812, avg loss: 2.400718, ppl: 11.031094 +epoch: 2, batch: 35018, sum loss: 3820.235596, avg loss: 2.264514, ppl: 9.626450 +epoch: 2, batch: 35019, sum loss: 3809.716553, avg loss: 2.360419, ppl: 10.595393 +epoch: 2, batch: 35020, sum loss: 4288.539551, avg loss: 2.578797, ppl: 13.181273 +epoch: 2, batch: 35021, sum loss: 3443.252686, avg loss: 2.243161, ppl: 9.423075 +epoch: 2, batch: 35022, sum loss: 3808.249512, avg loss: 2.347873, ppl: 10.463288 +epoch: 2, batch: 35023, sum loss: 3891.458496, avg loss: 2.352756, ppl: 10.514508 +epoch: 2, batch: 35024, sum loss: 3811.511719, avg loss: 2.294709, ppl: 9.921548 +epoch: 2, batch: 35025, sum loss: 4725.059570, avg loss: 2.429337, ppl: 11.351349 +epoch: 2, batch: 35026, sum loss: 5243.067383, avg loss: 2.668228, ppl: 14.414399 +epoch: 2, batch: 35027, sum loss: 4094.546387, avg loss: 2.427117, ppl: 11.326182 +epoch: 2, batch: 35028, sum loss: 3936.145020, avg loss: 2.507099, ppl: 12.269281 +epoch: 2, batch: 35029, sum loss: 4271.338867, avg loss: 2.657958, ppl: 14.267130 +epoch: 2, batch: 35030, sum loss: 4429.166504, avg loss: 2.620809, ppl: 13.746835 +epoch: 2, batch: 35031, sum loss: 3744.653809, avg loss: 2.283325, ppl: 9.809246 +epoch: 2, batch: 35032, sum loss: 5106.321289, avg loss: 2.693208, ppl: 14.779004 +epoch: 2, batch: 35033, sum loss: 3885.229004, avg loss: 2.241909, ppl: 9.411283 +epoch: 2, batch: 35034, sum loss: 4463.835938, avg loss: 2.215303, ppl: 9.164187 +epoch: 2, batch: 35035, sum loss: 4298.071289, avg loss: 2.628790, ppl: 13.856988 +epoch: 2, batch: 35036, sum loss: 4350.957031, avg loss: 2.462341, ppl: 11.732248 +epoch: 2, batch: 35037, sum loss: 3771.927002, avg loss: 2.385786, ppl: 10.867597 +epoch: 2, batch: 35038, sum loss: 4691.431641, avg loss: 2.725991, ppl: 15.271548 +epoch: 2, batch: 35039, sum loss: 3995.649414, avg loss: 2.469499, ppl: 11.816524 +epoch: 2, batch: 35040, sum loss: 3699.741211, avg loss: 2.587232, ppl: 13.292921 +epoch: 2, batch: 35041, sum loss: 4859.161133, avg loss: 2.605448, ppl: 13.537292 +epoch: 2, batch: 35042, sum loss: 4211.714844, avg loss: 2.387594, ppl: 10.887262 +epoch: 2, batch: 35043, sum loss: 3926.143555, avg loss: 2.489628, ppl: 12.056791 +epoch: 2, batch: 35044, sum loss: 4617.145508, avg loss: 2.417354, ppl: 11.216138 +epoch: 2, batch: 35045, sum loss: 4420.663086, avg loss: 2.479340, ppl: 11.933387 +epoch: 2, batch: 35046, sum loss: 4119.403809, avg loss: 2.370198, ppl: 10.699508 +epoch: 2, batch: 35047, sum loss: 5049.975098, avg loss: 2.725297, ppl: 15.260942 +epoch: 2, batch: 35048, sum loss: 4567.684570, avg loss: 2.531976, ppl: 12.578336 +epoch: 2, batch: 35049, sum loss: 5262.033691, avg loss: 2.785619, ppl: 16.209845 +epoch: 2, batch: 35050, sum loss: 3750.006348, avg loss: 2.234807, ppl: 9.344678 +epoch: 2, batch: 35051, sum loss: 4903.048828, avg loss: 2.669052, ppl: 14.426291 +epoch: 2, batch: 35052, sum loss: 3866.351562, avg loss: 2.404447, ppl: 11.072309 +epoch: 2, batch: 35053, sum loss: 3878.843994, avg loss: 2.445677, ppl: 11.538362 +epoch: 2, batch: 35054, sum loss: 3828.523193, avg loss: 2.262721, ppl: 9.609197 +epoch: 2, batch: 35055, sum loss: 4175.595215, avg loss: 2.249782, ppl: 9.485666 +epoch: 2, batch: 35056, sum loss: 3588.260986, avg loss: 2.303120, ppl: 10.005352 +epoch: 2, batch: 35057, sum loss: 4433.328125, avg loss: 2.516077, ppl: 12.379938 +epoch: 2, batch: 35058, sum loss: 3536.004883, avg loss: 2.185417, ppl: 8.894359 +epoch: 2, batch: 35059, sum loss: 3743.943848, avg loss: 2.417007, ppl: 11.212251 +epoch: 2, batch: 35060, sum loss: 4065.494385, avg loss: 2.504926, ppl: 12.242650 +epoch: 2, batch: 35061, sum loss: 4136.763184, avg loss: 2.341122, ppl: 10.392895 +epoch: 2, batch: 35062, sum loss: 4443.296387, avg loss: 2.500448, ppl: 12.187956 +epoch: 2, batch: 35063, sum loss: 4061.616211, avg loss: 2.362779, ppl: 10.620421 +epoch: 2, batch: 35064, sum loss: 4385.405762, avg loss: 2.564565, ppl: 12.995001 +epoch: 2, batch: 35065, sum loss: 3918.163818, avg loss: 2.347612, ppl: 10.460557 +epoch: 2, batch: 35066, sum loss: 5047.777344, avg loss: 2.520108, ppl: 12.429945 +epoch: 2, batch: 35067, sum loss: 4403.433594, avg loss: 2.649479, ppl: 14.146662 +epoch: 2, batch: 35068, sum loss: 4026.814941, avg loss: 2.613118, ppl: 13.641521 +epoch: 2, batch: 35069, sum loss: 4219.592285, avg loss: 2.622494, ppl: 13.770020 +epoch: 2, batch: 35070, sum loss: 3929.681152, avg loss: 2.183156, ppl: 8.874271 +epoch: 2, batch: 35071, sum loss: 4922.820312, avg loss: 2.640998, ppl: 14.027198 +epoch: 2, batch: 35072, sum loss: 6046.444336, avg loss: 3.091229, ppl: 22.004108 +epoch: 2, batch: 35073, sum loss: 3335.678711, avg loss: 2.345766, ppl: 10.441263 +epoch: 2, batch: 35074, sum loss: 3920.813477, avg loss: 2.261138, ppl: 9.594001 +epoch: 2, batch: 35075, sum loss: 4397.746094, avg loss: 2.408404, ppl: 11.116209 +epoch: 2, batch: 35076, sum loss: 4125.523926, avg loss: 2.452749, ppl: 11.620247 +epoch: 2, batch: 35077, sum loss: 4333.860352, avg loss: 2.499343, ppl: 12.174489 +epoch: 2, batch: 35078, sum loss: 4756.428223, avg loss: 2.456833, ppl: 11.667797 +epoch: 2, batch: 35079, sum loss: 4549.884277, avg loss: 2.693833, ppl: 14.788256 +epoch: 2, batch: 35080, sum loss: 4312.643066, avg loss: 2.383993, ppl: 10.848132 +epoch: 2, batch: 35081, sum loss: 4572.291016, avg loss: 2.557210, ppl: 12.899774 +epoch: 2, batch: 35082, sum loss: 3993.980225, avg loss: 2.381622, ppl: 10.822443 +epoch: 2, batch: 35083, sum loss: 5664.484375, avg loss: 2.753760, ppl: 15.701557 +epoch: 2, batch: 35084, sum loss: 4021.003662, avg loss: 2.423751, ppl: 11.288126 +epoch: 2, batch: 35085, sum loss: 5250.956055, avg loss: 2.633378, ppl: 13.920718 +epoch: 2, batch: 35086, sum loss: 3521.770508, avg loss: 2.464500, ppl: 11.757602 +epoch: 2, batch: 35087, sum loss: 4532.842773, avg loss: 2.533730, ppl: 12.600415 +epoch: 2, batch: 35088, sum loss: 3692.906738, avg loss: 2.415243, ppl: 11.192492 +epoch: 2, batch: 35089, sum loss: 5542.624023, avg loss: 2.938825, ppl: 18.893635 +epoch: 2, batch: 35090, sum loss: 5008.406250, avg loss: 2.518052, ppl: 12.404413 +epoch: 2, batch: 35091, sum loss: 3289.972168, avg loss: 2.083580, ppl: 8.033173 +epoch: 2, batch: 35092, sum loss: 4837.906738, avg loss: 2.605227, ppl: 13.534297 +epoch: 2, batch: 35093, sum loss: 3539.708008, avg loss: 2.155730, ppl: 8.634189 +epoch: 2, batch: 35094, sum loss: 4034.340820, avg loss: 2.355132, ppl: 10.539518 +epoch: 2, batch: 35095, sum loss: 3625.220703, avg loss: 2.420040, ppl: 11.246313 +epoch: 2, batch: 35096, sum loss: 3918.367920, avg loss: 2.264952, ppl: 9.630667 +epoch: 2, batch: 35097, sum loss: 4126.635254, avg loss: 2.539468, ppl: 12.672925 +epoch: 2, batch: 35098, sum loss: 4961.761719, avg loss: 2.640640, ppl: 14.022168 +epoch: 2, batch: 35099, sum loss: 3480.625977, avg loss: 2.302001, ppl: 9.994165 +epoch: 2, batch: 35100, sum loss: 3255.412842, avg loss: 2.331958, ppl: 10.298083 +epoch: 2, batch: 35101, sum loss: 4405.717773, avg loss: 2.564446, ppl: 12.993455 +epoch: 2, batch: 35102, sum loss: 3596.077148, avg loss: 2.281775, ppl: 9.794047 +epoch: 2, batch: 35103, sum loss: 3679.062256, avg loss: 2.442936, ppl: 11.506780 +epoch: 2, batch: 35104, sum loss: 3145.818359, avg loss: 2.168035, ppl: 8.741089 +epoch: 2, batch: 35105, sum loss: 4890.131348, avg loss: 2.873168, ppl: 17.692972 +epoch: 2, batch: 35106, sum loss: 4447.544922, avg loss: 2.493019, ppl: 12.097739 +epoch: 2, batch: 35107, sum loss: 4878.534180, avg loss: 2.677571, ppl: 14.549706 +epoch: 2, batch: 35108, sum loss: 4399.618164, avg loss: 2.398919, ppl: 11.011271 +epoch: 2, batch: 35109, sum loss: 4375.557617, avg loss: 2.753655, ppl: 15.699909 +epoch: 2, batch: 35110, sum loss: 3448.426514, avg loss: 2.160668, ppl: 8.676933 +epoch: 2, batch: 35111, sum loss: 3844.717773, avg loss: 2.161168, ppl: 8.681271 +epoch: 2, batch: 35112, sum loss: 4494.427246, avg loss: 2.610004, ppl: 13.599108 +epoch: 2, batch: 35113, sum loss: 3667.638428, avg loss: 2.193564, ppl: 8.967113 +epoch: 2, batch: 35114, sum loss: 4194.234375, avg loss: 2.538883, ppl: 12.665515 +epoch: 2, batch: 35115, sum loss: 4630.065430, avg loss: 2.615856, ppl: 13.678923 +epoch: 2, batch: 35116, sum loss: 3848.438477, avg loss: 2.526880, ppl: 12.514403 +epoch: 2, batch: 35117, sum loss: 4493.280273, avg loss: 2.564658, ppl: 12.996212 +epoch: 2, batch: 35118, sum loss: 5089.787109, avg loss: 2.704457, ppl: 14.946192 +epoch: 2, batch: 35119, sum loss: 4198.435059, avg loss: 2.544506, ppl: 12.736936 +epoch: 2, batch: 35120, sum loss: 4488.142578, avg loss: 2.329083, ppl: 10.268520 +epoch: 2, batch: 35121, sum loss: 3963.073730, avg loss: 2.484686, ppl: 11.997349 +epoch: 2, batch: 35122, sum loss: 4869.775879, avg loss: 2.681595, ppl: 14.608370 +epoch: 2, batch: 35123, sum loss: 4299.389648, avg loss: 2.569868, ppl: 13.064104 +epoch: 2, batch: 35124, sum loss: 4622.581055, avg loss: 2.685985, ppl: 14.672651 +epoch: 2, batch: 35125, sum loss: 3641.515137, avg loss: 2.249237, ppl: 9.480502 +epoch: 2, batch: 35126, sum loss: 4560.958496, avg loss: 2.636392, ppl: 13.962737 +epoch: 2, batch: 35127, sum loss: 4291.740723, avg loss: 2.455229, ppl: 11.649104 +epoch: 2, batch: 35128, sum loss: 3825.813721, avg loss: 2.247834, ppl: 9.467210 +epoch: 2, batch: 35129, sum loss: 5037.103516, avg loss: 2.769161, ppl: 15.945247 +epoch: 2, batch: 35130, sum loss: 5061.124512, avg loss: 2.595448, ppl: 13.402594 +epoch: 2, batch: 35131, sum loss: 3964.553711, avg loss: 2.300960, ppl: 9.983761 +epoch: 2, batch: 35132, sum loss: 4728.278809, avg loss: 2.666824, ppl: 14.394182 +epoch: 2, batch: 35133, sum loss: 3930.402588, avg loss: 2.366287, ppl: 10.657746 +epoch: 2, batch: 35134, sum loss: 4441.864258, avg loss: 2.615939, ppl: 13.680055 +epoch: 2, batch: 35135, sum loss: 3904.603760, avg loss: 2.580703, ppl: 13.206423 +epoch: 2, batch: 35136, sum loss: 4462.369629, avg loss: 2.501328, ppl: 12.198689 +epoch: 2, batch: 35137, sum loss: 4630.156738, avg loss: 2.532908, ppl: 12.590071 +epoch: 2, batch: 35138, sum loss: 4140.008789, avg loss: 2.321934, ppl: 10.195377 +epoch: 2, batch: 35139, sum loss: 3975.533447, avg loss: 2.328959, ppl: 10.267250 +epoch: 2, batch: 35140, sum loss: 4322.907227, avg loss: 2.488720, ppl: 12.045853 +epoch: 2, batch: 35141, sum loss: 4568.476074, avg loss: 2.688921, ppl: 14.715785 +epoch: 2, batch: 35142, sum loss: 3291.365479, avg loss: 2.246666, ppl: 9.456154 +epoch: 2, batch: 35143, sum loss: 3289.233643, avg loss: 2.099064, ppl: 8.158533 +epoch: 2, batch: 35144, sum loss: 4890.642578, avg loss: 2.749096, ppl: 15.628504 +epoch: 2, batch: 35145, sum loss: 5290.800293, avg loss: 2.724408, ppl: 15.247387 +epoch: 2, batch: 35146, sum loss: 4301.246094, avg loss: 2.539106, ppl: 12.668342 +epoch: 2, batch: 35147, sum loss: 4560.621582, avg loss: 2.454586, ppl: 11.641616 +epoch: 2, batch: 35148, sum loss: 3210.417725, avg loss: 2.365820, ppl: 10.652772 +epoch: 2, batch: 35149, sum loss: 4744.277344, avg loss: 2.657859, ppl: 14.265708 +epoch: 2, batch: 35150, sum loss: 3498.645996, avg loss: 2.330877, ppl: 10.286958 +epoch: 2, batch: 35151, sum loss: 3555.469238, avg loss: 2.021302, ppl: 7.548142 +epoch: 2, batch: 35152, sum loss: 4426.058105, avg loss: 2.688978, ppl: 14.716631 +epoch: 2, batch: 35153, sum loss: 4314.852051, avg loss: 2.351418, ppl: 10.500449 +epoch: 2, batch: 35154, sum loss: 4851.531250, avg loss: 2.704309, ppl: 14.943994 +epoch: 2, batch: 35155, sum loss: 3998.737549, avg loss: 2.405979, ppl: 11.089286 +epoch: 2, batch: 35156, sum loss: 3558.461914, avg loss: 2.375475, ppl: 10.756123 +epoch: 2, batch: 35157, sum loss: 4013.039062, avg loss: 2.341330, ppl: 10.395051 +epoch: 2, batch: 35158, sum loss: 4292.517578, avg loss: 2.440317, ppl: 11.476680 +epoch: 2, batch: 35159, sum loss: 4251.838867, avg loss: 2.732544, ppl: 15.371946 +epoch: 2, batch: 35160, sum loss: 3609.488037, avg loss: 2.299037, ppl: 9.964581 +epoch: 2, batch: 35161, sum loss: 5196.173828, avg loss: 2.742044, ppl: 15.518676 +epoch: 2, batch: 35162, sum loss: 3891.934570, avg loss: 2.469501, ppl: 11.816555 +epoch: 2, batch: 35163, sum loss: 3649.045898, avg loss: 2.351190, ppl: 10.498051 +epoch: 2, batch: 35164, sum loss: 3876.411865, avg loss: 2.291023, ppl: 9.885050 +epoch: 2, batch: 35165, sum loss: 4343.108887, avg loss: 2.755780, ppl: 15.733304 +epoch: 2, batch: 35166, sum loss: 4322.425781, avg loss: 2.417464, ppl: 11.217376 +epoch: 2, batch: 35167, sum loss: 3426.019531, avg loss: 2.335392, ppl: 10.333505 +epoch: 2, batch: 35168, sum loss: 3875.951416, avg loss: 2.305741, ppl: 10.031612 +epoch: 2, batch: 35169, sum loss: 4075.729248, avg loss: 2.453780, ppl: 11.632238 +epoch: 2, batch: 35170, sum loss: 4790.579590, avg loss: 2.729675, ppl: 15.327902 +epoch: 2, batch: 35171, sum loss: 4235.200684, avg loss: 2.465193, ppl: 11.765747 +epoch: 2, batch: 35172, sum loss: 4810.208496, avg loss: 2.553189, ppl: 12.848015 +epoch: 2, batch: 35173, sum loss: 3921.849609, avg loss: 2.387005, ppl: 10.880857 +epoch: 2, batch: 35174, sum loss: 4057.676758, avg loss: 2.457709, ppl: 11.678021 +epoch: 2, batch: 35175, sum loss: 3931.010986, avg loss: 2.328798, ppl: 10.265595 +epoch: 2, batch: 35176, sum loss: 4314.060547, avg loss: 2.525796, ppl: 12.500847 +epoch: 2, batch: 35177, sum loss: 4071.970947, avg loss: 2.395277, ppl: 10.971239 +epoch: 2, batch: 35178, sum loss: 3568.654541, avg loss: 2.205596, ppl: 9.075661 +epoch: 2, batch: 35179, sum loss: 4266.105469, avg loss: 2.598115, ppl: 13.438389 +epoch: 2, batch: 35180, sum loss: 3833.653320, avg loss: 2.408074, ppl: 11.112534 +epoch: 2, batch: 35181, sum loss: 4592.114258, avg loss: 2.640664, ppl: 14.022510 +epoch: 2, batch: 35182, sum loss: 5272.846191, avg loss: 2.781037, ppl: 16.135746 +epoch: 2, batch: 35183, sum loss: 3694.284912, avg loss: 2.327842, ppl: 10.255786 +epoch: 2, batch: 35184, sum loss: 4994.841309, avg loss: 2.784193, ppl: 16.186743 +epoch: 2, batch: 35185, sum loss: 4349.510742, avg loss: 2.403045, ppl: 11.056789 +epoch: 2, batch: 35186, sum loss: 3718.279785, avg loss: 2.400439, ppl: 11.028014 +epoch: 2, batch: 35187, sum loss: 4363.439941, avg loss: 2.565220, ppl: 13.003524 +epoch: 2, batch: 35188, sum loss: 4275.993164, avg loss: 2.496202, ppl: 12.136307 +epoch: 2, batch: 35189, sum loss: 4262.478516, avg loss: 2.366729, ppl: 10.662456 +epoch: 2, batch: 35190, sum loss: 3166.687744, avg loss: 2.064334, ppl: 7.880045 +epoch: 2, batch: 35191, sum loss: 4188.122070, avg loss: 2.362167, ppl: 10.613928 +epoch: 2, batch: 35192, sum loss: 4120.731934, avg loss: 2.395774, ppl: 10.976695 +epoch: 2, batch: 35193, sum loss: 4475.878418, avg loss: 2.500491, ppl: 12.188473 +epoch: 2, batch: 35194, sum loss: 5896.749023, avg loss: 2.695041, ppl: 14.806122 +epoch: 2, batch: 35195, sum loss: 5108.393555, avg loss: 2.677355, ppl: 14.546568 +epoch: 2, batch: 35196, sum loss: 4193.875488, avg loss: 2.558801, ppl: 12.920321 +epoch: 2, batch: 35197, sum loss: 4830.989258, avg loss: 2.694361, ppl: 14.796061 +epoch: 2, batch: 35198, sum loss: 4749.333496, avg loss: 2.674174, ppl: 14.500372 +epoch: 2, batch: 35199, sum loss: 3684.047852, avg loss: 2.339078, ppl: 10.371669 +epoch: 2, batch: 35200, sum loss: 3840.790771, avg loss: 2.250024, ppl: 9.487962 +epoch: 2, batch: 35201, sum loss: 4063.027344, avg loss: 2.387208, ppl: 10.883060 +epoch: 2, batch: 35202, sum loss: 4190.358887, avg loss: 2.578682, ppl: 13.179761 +epoch: 2, batch: 35203, sum loss: 4725.383301, avg loss: 2.531003, ppl: 12.566107 +epoch: 2, batch: 35204, sum loss: 4664.669922, avg loss: 2.811736, ppl: 16.638777 +epoch: 2, batch: 35205, sum loss: 4536.879883, avg loss: 2.634657, ppl: 13.938533 +epoch: 2, batch: 35206, sum loss: 4085.903809, avg loss: 2.430639, ppl: 11.366140 +epoch: 2, batch: 35207, sum loss: 4133.269043, avg loss: 2.557716, ppl: 12.906304 +epoch: 2, batch: 35208, sum loss: 4547.078125, avg loss: 2.460540, ppl: 11.711135 +epoch: 2, batch: 35209, sum loss: 3759.830811, avg loss: 2.422572, ppl: 11.274817 +epoch: 2, batch: 35210, sum loss: 3868.586670, avg loss: 2.483047, ppl: 11.977702 +epoch: 2, batch: 35211, sum loss: 4185.744629, avg loss: 2.437824, ppl: 11.448108 +epoch: 2, batch: 35212, sum loss: 4101.096680, avg loss: 2.394102, ppl: 10.958351 +epoch: 2, batch: 35213, sum loss: 3951.019043, avg loss: 2.320035, ppl: 10.176028 +epoch: 2, batch: 35214, sum loss: 4370.468750, avg loss: 2.653594, ppl: 14.204993 +epoch: 2, batch: 35215, sum loss: 4094.488770, avg loss: 2.415627, ppl: 11.196794 +epoch: 2, batch: 35216, sum loss: 4960.677246, avg loss: 2.737681, ppl: 15.451107 +epoch: 2, batch: 35217, sum loss: 3655.966064, avg loss: 2.222472, ppl: 9.230117 +epoch: 2, batch: 35218, sum loss: 3116.187500, avg loss: 2.016950, ppl: 7.515367 +epoch: 2, batch: 35219, sum loss: 4821.165527, avg loss: 2.751807, ppl: 15.670923 +epoch: 2, batch: 35220, sum loss: 4519.624512, avg loss: 2.572353, ppl: 13.096606 +epoch: 2, batch: 35221, sum loss: 3105.293701, avg loss: 2.096755, ppl: 8.139712 +epoch: 2, batch: 35222, sum loss: 3396.748047, avg loss: 2.187217, ppl: 8.910383 +epoch: 2, batch: 35223, sum loss: 4085.080811, avg loss: 2.408656, ppl: 11.119008 +epoch: 2, batch: 35224, sum loss: 3830.740967, avg loss: 2.302128, ppl: 9.995431 +epoch: 2, batch: 35225, sum loss: 4853.970215, avg loss: 2.650994, ppl: 14.168119 +epoch: 2, batch: 35226, sum loss: 4064.449951, avg loss: 2.429438, ppl: 11.352502 +epoch: 2, batch: 35227, sum loss: 4475.858887, avg loss: 2.489354, ppl: 12.053489 +epoch: 2, batch: 35228, sum loss: 5481.329102, avg loss: 2.845965, ppl: 17.218172 +epoch: 2, batch: 35229, sum loss: 4348.614258, avg loss: 2.355696, ppl: 10.545463 +epoch: 2, batch: 35230, sum loss: 4338.127441, avg loss: 2.573029, ppl: 13.105465 +epoch: 2, batch: 35231, sum loss: 4767.520508, avg loss: 2.716536, ppl: 15.127826 +epoch: 2, batch: 35232, sum loss: 4091.124268, avg loss: 2.519165, ppl: 12.418223 +epoch: 2, batch: 35233, sum loss: 4175.917480, avg loss: 2.416619, ppl: 11.207899 +epoch: 2, batch: 35234, sum loss: 3511.891357, avg loss: 2.215704, ppl: 9.167863 +epoch: 2, batch: 35235, sum loss: 4066.928223, avg loss: 2.545012, ppl: 12.743375 +epoch: 2, batch: 35236, sum loss: 4186.094727, avg loss: 2.398908, ppl: 11.011147 +epoch: 2, batch: 35237, sum loss: 4541.282227, avg loss: 2.462734, ppl: 11.736861 +epoch: 2, batch: 35238, sum loss: 4618.115234, avg loss: 2.594447, ppl: 13.389180 +epoch: 2, batch: 35239, sum loss: 4142.199219, avg loss: 2.587258, ppl: 13.293266 +epoch: 2, batch: 35240, sum loss: 3715.519043, avg loss: 2.503719, ppl: 12.227886 +epoch: 2, batch: 35241, sum loss: 4878.602051, avg loss: 2.451559, ppl: 11.606425 +epoch: 2, batch: 35242, sum loss: 3625.244141, avg loss: 2.237805, ppl: 9.372737 +epoch: 2, batch: 35243, sum loss: 3960.883057, avg loss: 2.469378, ppl: 11.815101 +epoch: 2, batch: 35244, sum loss: 3460.218994, avg loss: 2.252747, ppl: 9.513833 +epoch: 2, batch: 35245, sum loss: 4221.927246, avg loss: 2.378551, ppl: 10.789253 +epoch: 2, batch: 35246, sum loss: 4784.952637, avg loss: 2.534403, ppl: 12.608899 +epoch: 2, batch: 35247, sum loss: 4840.614746, avg loss: 2.669948, ppl: 14.439213 +epoch: 2, batch: 35248, sum loss: 4695.381836, avg loss: 2.703156, ppl: 14.926766 +epoch: 2, batch: 35249, sum loss: 3576.340332, avg loss: 2.382638, ppl: 10.833448 +epoch: 2, batch: 35250, sum loss: 4842.060547, avg loss: 2.675172, ppl: 14.514840 +epoch: 2, batch: 35251, sum loss: 4268.282715, avg loss: 2.465790, ppl: 11.772779 +epoch: 2, batch: 35252, sum loss: 4102.871094, avg loss: 2.467150, ppl: 11.788806 +epoch: 2, batch: 35253, sum loss: 5121.100586, avg loss: 2.721095, ppl: 15.196952 +epoch: 2, batch: 35254, sum loss: 3546.899170, avg loss: 2.204412, ppl: 9.064921 +epoch: 2, batch: 35255, sum loss: 3624.994629, avg loss: 2.373932, ppl: 10.739541 +epoch: 2, batch: 35256, sum loss: 4028.517822, avg loss: 2.415179, ppl: 11.191769 +epoch: 2, batch: 35257, sum loss: 4285.565430, avg loss: 2.233228, ppl: 9.329939 +epoch: 2, batch: 35258, sum loss: 4550.441406, avg loss: 2.528023, ppl: 12.528712 +epoch: 2, batch: 35259, sum loss: 4069.059814, avg loss: 2.497888, ppl: 12.156793 +epoch: 2, batch: 35260, sum loss: 4411.364258, avg loss: 2.693141, ppl: 14.778013 +epoch: 2, batch: 35261, sum loss: 4531.089844, avg loss: 2.545556, ppl: 12.750317 +epoch: 2, batch: 35262, sum loss: 4724.064941, avg loss: 2.519501, ppl: 12.422401 +epoch: 2, batch: 35263, sum loss: 4739.294434, avg loss: 2.425432, ppl: 11.307116 +epoch: 2, batch: 35264, sum loss: 4550.066895, avg loss: 2.619498, ppl: 13.728824 +epoch: 2, batch: 35265, sum loss: 5346.041016, avg loss: 2.812226, ppl: 16.646927 +epoch: 2, batch: 35266, sum loss: 3725.532959, avg loss: 2.377494, ppl: 10.777861 +epoch: 2, batch: 35267, sum loss: 4510.176270, avg loss: 2.607038, ppl: 13.558833 +epoch: 2, batch: 35268, sum loss: 4595.495605, avg loss: 2.466718, ppl: 11.783709 +epoch: 2, batch: 35269, sum loss: 4139.126953, avg loss: 2.444848, ppl: 11.528792 +epoch: 2, batch: 35270, sum loss: 4178.562988, avg loss: 2.642987, ppl: 14.055127 +epoch: 2, batch: 35271, sum loss: 3682.077637, avg loss: 2.583914, ppl: 13.248894 +epoch: 2, batch: 35272, sum loss: 4793.805664, avg loss: 2.779018, ppl: 16.103199 +epoch: 2, batch: 35273, sum loss: 4399.166992, avg loss: 2.252518, ppl: 9.511653 +epoch: 2, batch: 35274, sum loss: 4815.328125, avg loss: 2.740653, ppl: 15.497106 +epoch: 2, batch: 35275, sum loss: 4793.524902, avg loss: 2.598116, ppl: 13.438401 +epoch: 2, batch: 35276, sum loss: 4485.551758, avg loss: 2.414183, ppl: 11.180631 +epoch: 2, batch: 35277, sum loss: 4134.940918, avg loss: 2.476012, ppl: 11.893744 +epoch: 2, batch: 35278, sum loss: 4041.838135, avg loss: 2.428989, ppl: 11.347408 +epoch: 2, batch: 35279, sum loss: 4061.717773, avg loss: 2.275472, ppl: 9.732513 +epoch: 2, batch: 35280, sum loss: 4204.329102, avg loss: 2.626065, ppl: 13.819278 +epoch: 2, batch: 35281, sum loss: 3935.645752, avg loss: 2.252802, ppl: 9.514361 +epoch: 2, batch: 35282, sum loss: 3510.068848, avg loss: 2.260186, ppl: 9.584873 +epoch: 2, batch: 35283, sum loss: 4385.873047, avg loss: 2.436596, ppl: 11.434054 +epoch: 2, batch: 35284, sum loss: 4661.900391, avg loss: 2.723073, ppl: 15.227036 +epoch: 2, batch: 35285, sum loss: 4190.993652, avg loss: 2.511081, ppl: 12.318235 +epoch: 2, batch: 35286, sum loss: 4643.205566, avg loss: 2.330927, ppl: 10.287470 +epoch: 2, batch: 35287, sum loss: 3859.325684, avg loss: 2.329104, ppl: 10.268739 +epoch: 2, batch: 35288, sum loss: 4037.040283, avg loss: 2.276955, ppl: 9.746952 +epoch: 2, batch: 35289, sum loss: 4034.577637, avg loss: 2.487409, ppl: 12.030067 +epoch: 2, batch: 35290, sum loss: 3612.121094, avg loss: 2.174666, ppl: 8.799250 +epoch: 2, batch: 35291, sum loss: 4241.652344, avg loss: 2.580081, ppl: 13.198200 +epoch: 2, batch: 35292, sum loss: 4173.766602, avg loss: 2.364740, ppl: 10.641275 +epoch: 2, batch: 35293, sum loss: 4059.636230, avg loss: 2.281976, ppl: 9.796022 +epoch: 2, batch: 35294, sum loss: 4017.307129, avg loss: 2.407015, ppl: 11.100772 +epoch: 2, batch: 35295, sum loss: 4173.178223, avg loss: 2.520035, ppl: 12.429032 +epoch: 2, batch: 35296, sum loss: 3856.474365, avg loss: 2.518925, ppl: 12.415245 +epoch: 2, batch: 35297, sum loss: 3378.058594, avg loss: 2.069889, ppl: 7.923942 +epoch: 2, batch: 35298, sum loss: 4947.170410, avg loss: 2.607892, ppl: 13.570408 +epoch: 2, batch: 35299, sum loss: 3842.047852, avg loss: 2.489986, ppl: 12.061104 +epoch: 2, batch: 35300, sum loss: 4562.586914, avg loss: 2.428200, ppl: 11.338449 +epoch: 2, batch: 35301, sum loss: 4350.176270, avg loss: 2.561941, ppl: 12.960955 +epoch: 2, batch: 35302, sum loss: 4527.697266, avg loss: 2.528027, ppl: 12.528767 +epoch: 2, batch: 35303, sum loss: 3616.018311, avg loss: 2.394714, ppl: 10.965063 +epoch: 2, batch: 35304, sum loss: 4292.062012, avg loss: 2.389790, ppl: 10.911198 +epoch: 2, batch: 35305, sum loss: 4413.180176, avg loss: 2.465464, ppl: 11.768937 +epoch: 2, batch: 35306, sum loss: 4335.449707, avg loss: 2.597633, ppl: 13.431909 +epoch: 2, batch: 35307, sum loss: 4187.788086, avg loss: 2.433346, ppl: 11.396950 +epoch: 2, batch: 35308, sum loss: 4118.155762, avg loss: 2.581916, ppl: 13.222446 +epoch: 2, batch: 35309, sum loss: 4445.671875, avg loss: 2.388862, ppl: 10.901080 +epoch: 2, batch: 35310, sum loss: 3888.660156, avg loss: 2.327146, ppl: 10.248646 +epoch: 2, batch: 35311, sum loss: 4179.752441, avg loss: 2.524005, ppl: 12.478476 +epoch: 2, batch: 35312, sum loss: 4315.809570, avg loss: 2.609317, ppl: 13.589760 +epoch: 2, batch: 35313, sum loss: 4032.014648, avg loss: 2.470597, ppl: 11.829510 +epoch: 2, batch: 35314, sum loss: 3940.477051, avg loss: 2.439924, ppl: 11.472169 +epoch: 2, batch: 35315, sum loss: 3757.634766, avg loss: 2.394923, ppl: 10.967359 +epoch: 2, batch: 35316, sum loss: 4104.105469, avg loss: 2.487337, ppl: 12.029195 +epoch: 2, batch: 35317, sum loss: 4327.162598, avg loss: 2.542399, ppl: 12.710122 +epoch: 2, batch: 35318, sum loss: 3569.557617, avg loss: 2.468574, ppl: 11.805598 +epoch: 2, batch: 35319, sum loss: 4605.217773, avg loss: 2.616601, ppl: 13.689115 +epoch: 2, batch: 35320, sum loss: 3556.725830, avg loss: 2.296143, ppl: 9.935789 +epoch: 2, batch: 35321, sum loss: 3794.457520, avg loss: 2.262646, ppl: 9.608479 +epoch: 2, batch: 35322, sum loss: 3905.157227, avg loss: 2.406135, ppl: 11.091012 +epoch: 2, batch: 35323, sum loss: 4486.001465, avg loss: 2.545971, ppl: 12.755612 +epoch: 2, batch: 35324, sum loss: 4094.844727, avg loss: 2.408732, ppl: 11.119857 +epoch: 2, batch: 35325, sum loss: 3622.537109, avg loss: 2.133414, ppl: 8.443646 +epoch: 2, batch: 35326, sum loss: 4381.273438, avg loss: 2.479498, ppl: 11.935276 +epoch: 2, batch: 35327, sum loss: 5222.270508, avg loss: 2.712868, ppl: 15.072437 +epoch: 2, batch: 35328, sum loss: 4394.593750, avg loss: 2.453709, ppl: 11.631412 +epoch: 2, batch: 35329, sum loss: 4380.372070, avg loss: 2.441679, ppl: 11.492320 +epoch: 2, batch: 35330, sum loss: 4015.052246, avg loss: 2.420164, ppl: 11.247705 +epoch: 2, batch: 35331, sum loss: 4047.187012, avg loss: 2.464791, ppl: 11.761027 +epoch: 2, batch: 35332, sum loss: 4100.225098, avg loss: 2.392197, ppl: 10.937493 +epoch: 2, batch: 35333, sum loss: 4159.938477, avg loss: 2.411559, ppl: 11.151328 +epoch: 2, batch: 35334, sum loss: 3906.306396, avg loss: 2.544825, ppl: 12.740999 +epoch: 2, batch: 35335, sum loss: 4122.847168, avg loss: 2.402592, ppl: 11.051783 +epoch: 2, batch: 35336, sum loss: 4554.261230, avg loss: 2.623422, ppl: 13.782813 +epoch: 2, batch: 35337, sum loss: 4080.670166, avg loss: 2.383569, ppl: 10.843534 +epoch: 2, batch: 35338, sum loss: 5190.529297, avg loss: 2.750678, ppl: 15.653238 +epoch: 2, batch: 35339, sum loss: 4641.029785, avg loss: 2.463392, ppl: 11.744579 +epoch: 2, batch: 35340, sum loss: 4269.295410, avg loss: 2.515790, ppl: 12.376382 +epoch: 2, batch: 35341, sum loss: 4759.419434, avg loss: 2.684388, ppl: 14.649232 +epoch: 2, batch: 35342, sum loss: 3670.305908, avg loss: 2.298250, ppl: 9.956747 +epoch: 2, batch: 35343, sum loss: 4065.549316, avg loss: 2.471458, ppl: 11.839702 +epoch: 2, batch: 35344, sum loss: 3879.824707, avg loss: 2.417336, ppl: 11.215942 +epoch: 2, batch: 35345, sum loss: 3838.682373, avg loss: 2.293120, ppl: 9.905795 +epoch: 2, batch: 35346, sum loss: 4423.521973, avg loss: 2.698915, ppl: 14.863596 +epoch: 2, batch: 35347, sum loss: 3906.616943, avg loss: 2.555014, ppl: 12.871485 +epoch: 2, batch: 35348, sum loss: 3649.281250, avg loss: 2.296590, ppl: 9.940227 +epoch: 2, batch: 35349, sum loss: 3674.725098, avg loss: 2.370790, ppl: 10.705852 +epoch: 2, batch: 35350, sum loss: 3840.532227, avg loss: 2.528329, ppl: 12.532548 +epoch: 2, batch: 35351, sum loss: 4643.950195, avg loss: 2.725323, ppl: 15.261342 +epoch: 2, batch: 35352, sum loss: 3593.519043, avg loss: 2.257236, ppl: 9.556634 +epoch: 2, batch: 35353, sum loss: 3838.295166, avg loss: 2.349018, ppl: 10.475276 +epoch: 2, batch: 35354, sum loss: 4651.739258, avg loss: 2.494230, ppl: 12.112404 +epoch: 2, batch: 35355, sum loss: 4673.809570, avg loss: 2.361703, ppl: 10.609000 +epoch: 2, batch: 35356, sum loss: 3871.518799, avg loss: 2.458107, ppl: 11.682675 +epoch: 2, batch: 35357, sum loss: 4202.848145, avg loss: 2.488365, ppl: 12.041572 +epoch: 2, batch: 35358, sum loss: 4037.292480, avg loss: 2.396019, ppl: 10.979383 +epoch: 2, batch: 35359, sum loss: 4413.343750, avg loss: 2.355039, ppl: 10.538544 +epoch: 2, batch: 35360, sum loss: 5430.693848, avg loss: 2.782118, ppl: 16.153194 +epoch: 2, batch: 35361, sum loss: 5128.644531, avg loss: 2.482403, ppl: 11.969995 +epoch: 2, batch: 35362, sum loss: 4911.105957, avg loss: 2.616466, ppl: 13.687262 +epoch: 2, batch: 35363, sum loss: 4157.531250, avg loss: 2.561634, ppl: 12.956966 +epoch: 2, batch: 35364, sum loss: 4164.328613, avg loss: 2.401573, ppl: 11.040534 +epoch: 2, batch: 35365, sum loss: 4207.035645, avg loss: 2.617944, ppl: 13.707509 +epoch: 2, batch: 35366, sum loss: 4272.393066, avg loss: 2.638909, ppl: 13.997918 +epoch: 2, batch: 35367, sum loss: 3498.710938, avg loss: 2.416237, ppl: 11.203619 +epoch: 2, batch: 35368, sum loss: 4786.717285, avg loss: 2.439713, ppl: 11.469748 +epoch: 2, batch: 35369, sum loss: 5692.233398, avg loss: 2.817937, ppl: 16.742283 +epoch: 2, batch: 35370, sum loss: 3926.458252, avg loss: 2.352582, ppl: 10.512673 +epoch: 2, batch: 35371, sum loss: 3654.219971, avg loss: 2.298252, ppl: 9.956759 +epoch: 2, batch: 35372, sum loss: 4504.303223, avg loss: 2.461368, ppl: 11.720836 +epoch: 2, batch: 35373, sum loss: 3926.924805, avg loss: 2.331903, ppl: 10.297522 +epoch: 2, batch: 35374, sum loss: 4159.782715, avg loss: 2.606380, ppl: 13.549914 +epoch: 2, batch: 35375, sum loss: 4635.054688, avg loss: 2.626093, ppl: 13.819676 +epoch: 2, batch: 35376, sum loss: 3898.805908, avg loss: 2.347264, ppl: 10.456923 +epoch: 2, batch: 35377, sum loss: 4371.325684, avg loss: 2.432569, ppl: 11.388096 +epoch: 2, batch: 35378, sum loss: 3606.885742, avg loss: 2.259953, ppl: 9.582644 +epoch: 2, batch: 35379, sum loss: 4572.608887, avg loss: 2.650788, ppl: 14.165194 +epoch: 2, batch: 35380, sum loss: 3753.772217, avg loss: 2.378816, ppl: 10.792121 +epoch: 2, batch: 35381, sum loss: 4449.000000, avg loss: 2.565744, ppl: 13.010333 +epoch: 2, batch: 35382, sum loss: 5045.882812, avg loss: 2.488108, ppl: 12.038477 +epoch: 2, batch: 35383, sum loss: 4920.622070, avg loss: 2.688865, ppl: 14.714961 +epoch: 2, batch: 35384, sum loss: 3817.294189, avg loss: 2.313512, ppl: 10.109864 +epoch: 2, batch: 35385, sum loss: 4740.909180, avg loss: 2.465371, ppl: 11.767852 +epoch: 2, batch: 35386, sum loss: 4819.994629, avg loss: 2.601184, ppl: 13.479693 +epoch: 2, batch: 35387, sum loss: 3838.522949, avg loss: 2.173569, ppl: 8.789598 +epoch: 2, batch: 35388, sum loss: 3717.138184, avg loss: 2.431091, ppl: 11.371283 +epoch: 2, batch: 35389, sum loss: 4184.436523, avg loss: 2.387015, ppl: 10.880961 +epoch: 2, batch: 35390, sum loss: 3223.096191, avg loss: 2.290758, ppl: 9.882424 +epoch: 2, batch: 35391, sum loss: 5534.331543, avg loss: 2.942228, ppl: 18.958044 +epoch: 2, batch: 35392, sum loss: 4317.917969, avg loss: 2.442261, ppl: 11.499016 +epoch: 2, batch: 35393, sum loss: 3907.709961, avg loss: 2.479511, ppl: 11.935430 +epoch: 2, batch: 35394, sum loss: 4567.172852, avg loss: 2.574505, ppl: 13.124823 +epoch: 2, batch: 35395, sum loss: 3640.243652, avg loss: 2.471313, ppl: 11.837977 +epoch: 2, batch: 35396, sum loss: 4545.497070, avg loss: 2.678549, ppl: 14.563943 +epoch: 2, batch: 35397, sum loss: 4155.368652, avg loss: 2.330549, ppl: 10.283586 +epoch: 2, batch: 35398, sum loss: 4166.394043, avg loss: 2.468243, ppl: 11.801695 +epoch: 2, batch: 35399, sum loss: 4131.295410, avg loss: 2.364794, ppl: 10.641847 +epoch: 2, batch: 35400, sum loss: 4552.120605, avg loss: 2.455297, ppl: 11.649893 +epoch: 2, batch: 35401, sum loss: 3753.473389, avg loss: 2.599358, ppl: 13.455101 +epoch: 2, batch: 35402, sum loss: 4200.406250, avg loss: 2.536477, ppl: 12.635080 +epoch: 2, batch: 35403, sum loss: 4682.255371, avg loss: 2.443766, ppl: 11.516328 +epoch: 2, batch: 35404, sum loss: 4762.061035, avg loss: 2.752636, ppl: 15.683927 +epoch: 2, batch: 35405, sum loss: 4385.102539, avg loss: 2.782425, ppl: 16.158163 +epoch: 2, batch: 35406, sum loss: 3947.592773, avg loss: 2.253192, ppl: 9.518073 +epoch: 2, batch: 35407, sum loss: 2740.450439, avg loss: 1.913723, ppl: 6.778274 +epoch: 2, batch: 35408, sum loss: 3520.749512, avg loss: 2.229734, ppl: 9.297390 +epoch: 2, batch: 35409, sum loss: 3595.004395, avg loss: 2.322354, ppl: 10.199659 +epoch: 2, batch: 35410, sum loss: 4828.008789, avg loss: 2.549107, ppl: 12.795673 +epoch: 2, batch: 35411, sum loss: 4938.871582, avg loss: 2.725646, ppl: 15.266280 +epoch: 2, batch: 35412, sum loss: 4279.436523, avg loss: 2.553363, ppl: 12.850247 +epoch: 2, batch: 35413, sum loss: 3600.799805, avg loss: 2.306726, ppl: 10.041499 +epoch: 2, batch: 35414, sum loss: 4494.229004, avg loss: 2.434577, ppl: 11.410991 +epoch: 2, batch: 35415, sum loss: 3493.820557, avg loss: 2.202913, ppl: 9.051344 +epoch: 2, batch: 35416, sum loss: 3706.896240, avg loss: 2.283978, ppl: 9.815649 +epoch: 2, batch: 35417, sum loss: 4798.098145, avg loss: 2.440538, ppl: 11.479217 +epoch: 2, batch: 35418, sum loss: 4015.745605, avg loss: 2.424967, ppl: 11.301857 +epoch: 2, batch: 35419, sum loss: 4744.859863, avg loss: 2.536002, ppl: 12.629081 +epoch: 2, batch: 35420, sum loss: 4038.026367, avg loss: 2.463713, ppl: 11.748357 +epoch: 2, batch: 35421, sum loss: 4786.079102, avg loss: 2.790717, ppl: 16.292692 +epoch: 2, batch: 35422, sum loss: 4609.921387, avg loss: 2.538503, ppl: 12.660703 +epoch: 2, batch: 35423, sum loss: 3925.183350, avg loss: 2.313013, ppl: 10.104828 +epoch: 2, batch: 35424, sum loss: 4138.262695, avg loss: 2.615842, ppl: 13.678734 +epoch: 2, batch: 35425, sum loss: 5373.775391, avg loss: 2.835765, ppl: 17.043440 +epoch: 2, batch: 35426, sum loss: 3047.065674, avg loss: 2.005968, ppl: 7.433287 +epoch: 2, batch: 35427, sum loss: 3504.751953, avg loss: 2.236600, ppl: 9.361447 +epoch: 2, batch: 35428, sum loss: 3658.901367, avg loss: 2.274022, ppl: 9.718409 +epoch: 2, batch: 35429, sum loss: 4298.846191, avg loss: 2.524278, ppl: 12.481885 +epoch: 2, batch: 35430, sum loss: 4170.710449, avg loss: 2.601815, ppl: 13.488203 +epoch: 2, batch: 35431, sum loss: 4035.377441, avg loss: 2.622078, ppl: 13.764292 +epoch: 2, batch: 35432, sum loss: 4626.891602, avg loss: 2.428814, ppl: 11.345423 +epoch: 2, batch: 35433, sum loss: 4333.003418, avg loss: 2.645301, ppl: 14.087687 +epoch: 2, batch: 35434, sum loss: 4277.563965, avg loss: 2.691985, ppl: 14.760945 +epoch: 2, batch: 35435, sum loss: 4070.957031, avg loss: 2.476251, ppl: 11.896582 +epoch: 2, batch: 35436, sum loss: 4400.620117, avg loss: 2.573462, ppl: 13.111136 +epoch: 2, batch: 35437, sum loss: 4086.114014, avg loss: 2.483960, ppl: 11.988642 +epoch: 2, batch: 35438, sum loss: 3214.949219, avg loss: 2.167869, ppl: 8.739638 +epoch: 2, batch: 35439, sum loss: 4235.779297, avg loss: 2.439965, ppl: 11.472639 +epoch: 2, batch: 35440, sum loss: 4035.198730, avg loss: 2.347411, ppl: 10.458454 +epoch: 2, batch: 35441, sum loss: 4655.837402, avg loss: 2.716358, ppl: 15.125135 +epoch: 2, batch: 35442, sum loss: 3651.975830, avg loss: 2.394738, ppl: 10.965327 +epoch: 2, batch: 35443, sum loss: 4024.959229, avg loss: 2.415942, ppl: 11.200315 +epoch: 2, batch: 35444, sum loss: 4008.347900, avg loss: 2.618124, ppl: 13.709983 +epoch: 2, batch: 35445, sum loss: 4641.098633, avg loss: 2.525081, ppl: 12.491903 +epoch: 2, batch: 35446, sum loss: 4082.247559, avg loss: 2.372020, ppl: 10.719028 +epoch: 2, batch: 35447, sum loss: 3733.132812, avg loss: 2.274913, ppl: 9.727076 +epoch: 2, batch: 35448, sum loss: 4687.911133, avg loss: 2.471224, ppl: 11.836922 +epoch: 2, batch: 35449, sum loss: 3721.738525, avg loss: 2.323183, ppl: 10.208111 +epoch: 2, batch: 35450, sum loss: 3952.766602, avg loss: 2.456660, ppl: 11.665786 +epoch: 2, batch: 35451, sum loss: 4181.194824, avg loss: 2.375679, ppl: 10.758313 +epoch: 2, batch: 35452, sum loss: 4086.865723, avg loss: 2.391378, ppl: 10.928548 +epoch: 2, batch: 35453, sum loss: 4034.768555, avg loss: 2.523307, ppl: 12.469770 +epoch: 2, batch: 35454, sum loss: 4409.823730, avg loss: 2.464966, ppl: 11.763080 +epoch: 2, batch: 35455, sum loss: 4340.008789, avg loss: 2.472939, ppl: 11.857250 +epoch: 2, batch: 35456, sum loss: 3851.172363, avg loss: 2.449855, ppl: 11.586668 +epoch: 2, batch: 35457, sum loss: 4348.520508, avg loss: 2.341691, ppl: 10.398807 +epoch: 2, batch: 35458, sum loss: 4420.644531, avg loss: 2.631336, ppl: 13.892321 +epoch: 2, batch: 35459, sum loss: 3922.674316, avg loss: 2.623862, ppl: 13.788877 +epoch: 2, batch: 35460, sum loss: 4849.376953, avg loss: 2.674781, ppl: 14.509167 +epoch: 2, batch: 35461, sum loss: 3586.057129, avg loss: 2.298755, ppl: 9.961769 +epoch: 2, batch: 35462, sum loss: 4603.405762, avg loss: 2.439537, ppl: 11.467725 +epoch: 2, batch: 35463, sum loss: 4243.474609, avg loss: 2.537963, ppl: 12.653873 +epoch: 2, batch: 35464, sum loss: 4337.603516, avg loss: 2.516011, ppl: 12.379124 +epoch: 2, batch: 35465, sum loss: 4027.623047, avg loss: 2.355335, ppl: 10.541659 +epoch: 2, batch: 35466, sum loss: 4474.635742, avg loss: 2.554016, ppl: 12.858639 +epoch: 2, batch: 35467, sum loss: 3598.354980, avg loss: 2.283220, ppl: 9.808212 +epoch: 2, batch: 35468, sum loss: 4437.477539, avg loss: 2.553209, ppl: 12.848271 +epoch: 2, batch: 35469, sum loss: 3857.002930, avg loss: 2.323496, ppl: 10.211309 +epoch: 2, batch: 35470, sum loss: 3456.899658, avg loss: 2.165977, ppl: 8.723123 +epoch: 2, batch: 35471, sum loss: 4029.325684, avg loss: 2.540558, ppl: 12.686752 +epoch: 2, batch: 35472, sum loss: 4566.932617, avg loss: 2.465946, ppl: 11.774618 +epoch: 2, batch: 35473, sum loss: 4178.584961, avg loss: 2.628041, ppl: 13.846615 +epoch: 2, batch: 35474, sum loss: 4216.448730, avg loss: 2.591548, ppl: 13.350422 +epoch: 2, batch: 35475, sum loss: 4279.066406, avg loss: 2.508245, ppl: 12.283357 +epoch: 2, batch: 35476, sum loss: 3770.399658, avg loss: 2.245622, ppl: 9.446291 +epoch: 2, batch: 35477, sum loss: 4588.382812, avg loss: 2.501844, ppl: 12.204984 +epoch: 2, batch: 35478, sum loss: 4570.500977, avg loss: 2.522352, ppl: 12.457857 +epoch: 2, batch: 35479, sum loss: 3442.984863, avg loss: 2.493110, ppl: 12.098845 +epoch: 2, batch: 35480, sum loss: 5292.308105, avg loss: 2.690548, ppl: 14.739750 +epoch: 2, batch: 35481, sum loss: 4111.319336, avg loss: 2.540988, ppl: 12.692210 +epoch: 2, batch: 35482, sum loss: 4003.742676, avg loss: 2.472973, ppl: 11.857643 +epoch: 2, batch: 35483, sum loss: 4187.209961, avg loss: 2.658546, ppl: 14.275517 +epoch: 2, batch: 35484, sum loss: 4988.169922, avg loss: 2.861830, ppl: 17.493511 +epoch: 2, batch: 35485, sum loss: 3504.188965, avg loss: 2.230547, ppl: 9.304954 +epoch: 2, batch: 35486, sum loss: 4254.866211, avg loss: 2.299928, ppl: 9.973461 +epoch: 2, batch: 35487, sum loss: 3644.541504, avg loss: 2.311060, ppl: 10.085109 +epoch: 2, batch: 35488, sum loss: 4154.230469, avg loss: 2.301513, ppl: 9.989282 +epoch: 2, batch: 35489, sum loss: 4189.564453, avg loss: 2.364314, ppl: 10.636741 +epoch: 2, batch: 35490, sum loss: 3944.884521, avg loss: 2.417209, ppl: 11.214512 +epoch: 2, batch: 35491, sum loss: 3192.031982, avg loss: 2.116732, ppl: 8.303957 +epoch: 2, batch: 35492, sum loss: 4595.143555, avg loss: 2.362542, ppl: 10.617905 +epoch: 2, batch: 35493, sum loss: 4662.892578, avg loss: 2.438751, ppl: 11.458722 +epoch: 2, batch: 35494, sum loss: 4944.955078, avg loss: 2.548946, ppl: 12.793611 +epoch: 2, batch: 35495, sum loss: 4337.049805, avg loss: 2.612681, ppl: 13.635554 +epoch: 2, batch: 35496, sum loss: 3135.105713, avg loss: 2.001983, ppl: 7.403724 +epoch: 2, batch: 35497, sum loss: 4603.366211, avg loss: 2.561695, ppl: 12.957767 +epoch: 2, batch: 35498, sum loss: 4809.730469, avg loss: 2.732801, ppl: 15.375897 +epoch: 2, batch: 35499, sum loss: 4494.049316, avg loss: 2.445076, ppl: 11.531423 +epoch: 2, batch: 35500, sum loss: 4025.845703, avg loss: 2.390645, ppl: 10.920533 +epoch: 2, batch: 35501, sum loss: 4238.053711, avg loss: 2.468290, ppl: 11.802246 +epoch: 2, batch: 35502, sum loss: 4653.821289, avg loss: 2.586893, ppl: 13.288424 +epoch: 2, batch: 35503, sum loss: 4699.745605, avg loss: 2.437628, ppl: 11.445854 +epoch: 2, batch: 35504, sum loss: 4327.541504, avg loss: 2.381696, ppl: 10.823243 +epoch: 2, batch: 35505, sum loss: 3738.039307, avg loss: 2.364351, ppl: 10.637134 +epoch: 2, batch: 35506, sum loss: 3376.328613, avg loss: 2.053728, ppl: 7.796913 +epoch: 2, batch: 35507, sum loss: 4028.705811, avg loss: 2.553046, ppl: 12.846168 +epoch: 2, batch: 35508, sum loss: 3719.309570, avg loss: 2.443699, ppl: 11.515556 +epoch: 2, batch: 35509, sum loss: 4520.333496, avg loss: 2.394244, ppl: 10.959914 +epoch: 2, batch: 35510, sum loss: 4822.699219, avg loss: 2.692741, ppl: 14.772110 +epoch: 2, batch: 35511, sum loss: 4870.530762, avg loss: 2.786345, ppl: 16.221621 +epoch: 2, batch: 35512, sum loss: 3476.240234, avg loss: 2.392457, ppl: 10.940344 +epoch: 2, batch: 35513, sum loss: 4099.183105, avg loss: 2.497979, ppl: 12.157894 +epoch: 2, batch: 35514, sum loss: 3525.417236, avg loss: 2.299685, ppl: 9.971044 +epoch: 2, batch: 35515, sum loss: 3954.437744, avg loss: 2.151489, ppl: 8.597655 +epoch: 2, batch: 35516, sum loss: 4656.422852, avg loss: 2.465020, ppl: 11.763720 +epoch: 2, batch: 35517, sum loss: 4972.605957, avg loss: 2.568495, ppl: 13.046172 +epoch: 2, batch: 35518, sum loss: 4424.675781, avg loss: 2.535631, ppl: 12.624391 +epoch: 2, batch: 35519, sum loss: 4216.522461, avg loss: 2.440117, ppl: 11.474384 +epoch: 2, batch: 35520, sum loss: 5044.984375, avg loss: 2.607227, ppl: 13.561391 +epoch: 2, batch: 35521, sum loss: 4523.992188, avg loss: 2.351347, ppl: 10.499705 +epoch: 2, batch: 35522, sum loss: 4104.530762, avg loss: 2.389133, ppl: 10.904038 +epoch: 2, batch: 35523, sum loss: 3437.123291, avg loss: 2.341365, ppl: 10.395412 +epoch: 2, batch: 35524, sum loss: 3580.349365, avg loss: 2.363267, ppl: 10.625608 +epoch: 2, batch: 35525, sum loss: 4703.996094, avg loss: 2.496813, ppl: 12.143734 +epoch: 2, batch: 35526, sum loss: 4279.553711, avg loss: 2.515904, ppl: 12.377798 +epoch: 2, batch: 35527, sum loss: 5350.257812, avg loss: 2.552604, ppl: 12.840496 +epoch: 2, batch: 35528, sum loss: 4602.155762, avg loss: 2.523112, ppl: 12.467333 +epoch: 2, batch: 35529, sum loss: 4387.789062, avg loss: 2.294869, ppl: 9.923133 +epoch: 2, batch: 35530, sum loss: 4876.757812, avg loss: 2.585768, ppl: 13.273476 +epoch: 2, batch: 35531, sum loss: 4346.551270, avg loss: 2.518280, ppl: 12.407238 +epoch: 2, batch: 35532, sum loss: 4528.898926, avg loss: 2.523063, ppl: 12.466729 +epoch: 2, batch: 35533, sum loss: 3995.249023, avg loss: 2.422831, ppl: 11.277745 +epoch: 2, batch: 35534, sum loss: 4564.812012, avg loss: 2.590699, ppl: 13.339098 +epoch: 2, batch: 35535, sum loss: 4541.393555, avg loss: 2.516007, ppl: 12.379073 +epoch: 2, batch: 35536, sum loss: 3736.319336, avg loss: 2.494205, ppl: 12.112101 +epoch: 2, batch: 35537, sum loss: 3570.221436, avg loss: 2.158538, ppl: 8.658468 +epoch: 2, batch: 35538, sum loss: 4669.138184, avg loss: 2.435649, ppl: 11.423226 +epoch: 2, batch: 35539, sum loss: 3876.509766, avg loss: 2.480173, ppl: 11.943326 +epoch: 2, batch: 35540, sum loss: 3631.121826, avg loss: 2.360937, ppl: 10.600883 +epoch: 2, batch: 35541, sum loss: 3548.079346, avg loss: 2.518154, ppl: 12.405680 +epoch: 2, batch: 35542, sum loss: 3603.383301, avg loss: 2.479961, ppl: 11.940798 +epoch: 2, batch: 35543, sum loss: 4829.601562, avg loss: 2.614836, ppl: 13.664975 +epoch: 2, batch: 35544, sum loss: 4264.195801, avg loss: 2.509827, ppl: 12.302800 +epoch: 2, batch: 35545, sum loss: 5017.931152, avg loss: 2.502709, ppl: 12.215537 +epoch: 2, batch: 35546, sum loss: 4392.908691, avg loss: 2.617943, ppl: 13.707502 +epoch: 2, batch: 35547, sum loss: 3735.020264, avg loss: 2.284416, ppl: 9.819949 +epoch: 2, batch: 35548, sum loss: 4021.768799, avg loss: 2.430072, ppl: 11.359698 +epoch: 2, batch: 35549, sum loss: 4457.277344, avg loss: 2.429034, ppl: 11.347915 +epoch: 2, batch: 35550, sum loss: 4292.331055, avg loss: 2.633332, ppl: 13.920074 +epoch: 2, batch: 35551, sum loss: 4290.735840, avg loss: 2.529915, ppl: 12.552441 +epoch: 2, batch: 35552, sum loss: 4106.777832, avg loss: 2.579634, ppl: 13.192311 +epoch: 2, batch: 35553, sum loss: 4908.343262, avg loss: 2.745158, ppl: 15.567080 +epoch: 2, batch: 35554, sum loss: 4597.894531, avg loss: 2.492084, ppl: 12.086435 +epoch: 2, batch: 35555, sum loss: 3144.775146, avg loss: 2.100718, ppl: 8.172035 +epoch: 2, batch: 35556, sum loss: 3374.943115, avg loss: 2.136040, ppl: 8.465846 +epoch: 2, batch: 35557, sum loss: 3849.777344, avg loss: 2.398615, ppl: 11.007921 +epoch: 2, batch: 35558, sum loss: 3924.494629, avg loss: 2.597283, ppl: 13.427205 +epoch: 2, batch: 35559, sum loss: 5086.576660, avg loss: 2.698449, ppl: 14.856673 +epoch: 2, batch: 35560, sum loss: 3630.590820, avg loss: 2.255025, ppl: 9.535536 +epoch: 2, batch: 35561, sum loss: 4771.331055, avg loss: 2.598764, ppl: 13.447109 +epoch: 2, batch: 35562, sum loss: 4259.197266, avg loss: 2.353148, ppl: 10.518627 +epoch: 2, batch: 35563, sum loss: 4034.545898, avg loss: 2.487390, ppl: 12.029832 +epoch: 2, batch: 35564, sum loss: 3815.026611, avg loss: 2.414574, ppl: 11.185003 +epoch: 2, batch: 35565, sum loss: 4437.031738, avg loss: 2.679367, ppl: 14.575865 +epoch: 2, batch: 35566, sum loss: 4074.495605, avg loss: 2.546560, ppl: 12.763121 +epoch: 2, batch: 35567, sum loss: 4179.501465, avg loss: 2.507199, ppl: 12.270515 +epoch: 2, batch: 35568, sum loss: 4199.320312, avg loss: 2.385977, ppl: 10.869680 +epoch: 2, batch: 35569, sum loss: 4005.075439, avg loss: 2.425848, ppl: 11.311823 +epoch: 2, batch: 35570, sum loss: 2907.854248, avg loss: 2.201252, ppl: 9.036322 +epoch: 2, batch: 35571, sum loss: 4312.210449, avg loss: 2.414452, ppl: 11.183636 +epoch: 2, batch: 35572, sum loss: 3889.545898, avg loss: 2.615700, ppl: 13.676786 +epoch: 2, batch: 35573, sum loss: 4294.598633, avg loss: 2.472423, ppl: 11.851129 +epoch: 2, batch: 35574, sum loss: 4669.161133, avg loss: 2.736906, ppl: 15.439136 +epoch: 2, batch: 35575, sum loss: 4117.249512, avg loss: 2.251093, ppl: 9.498116 +epoch: 2, batch: 35576, sum loss: 4032.652100, avg loss: 2.644362, ppl: 14.074462 +epoch: 2, batch: 35577, sum loss: 4222.802734, avg loss: 2.368370, ppl: 10.679965 +epoch: 2, batch: 35578, sum loss: 4683.911133, avg loss: 2.287066, ppl: 9.846007 +epoch: 2, batch: 35579, sum loss: 3880.664062, avg loss: 2.323751, ppl: 10.213915 +epoch: 2, batch: 35580, sum loss: 4166.510254, avg loss: 2.383587, ppl: 10.843731 +epoch: 2, batch: 35581, sum loss: 4501.899902, avg loss: 2.422982, ppl: 11.279439 +epoch: 2, batch: 35582, sum loss: 4410.203613, avg loss: 2.589667, ppl: 13.325337 +epoch: 2, batch: 35583, sum loss: 4717.369629, avg loss: 2.600535, ppl: 13.470939 +epoch: 2, batch: 35584, sum loss: 4120.863281, avg loss: 2.540606, ppl: 12.687361 +epoch: 2, batch: 35585, sum loss: 4395.149414, avg loss: 2.441750, ppl: 11.493134 +epoch: 2, batch: 35586, sum loss: 4128.947754, avg loss: 2.484325, ppl: 11.993021 +epoch: 2, batch: 35587, sum loss: 3875.844482, avg loss: 2.484516, ppl: 11.995309 +epoch: 2, batch: 35588, sum loss: 3998.261230, avg loss: 2.458955, ppl: 11.692590 +epoch: 2, batch: 35589, sum loss: 4374.604492, avg loss: 2.517034, ppl: 12.391783 +epoch: 2, batch: 35590, sum loss: 5015.491699, avg loss: 2.900805, ppl: 18.188782 +epoch: 2, batch: 35591, sum loss: 4193.391602, avg loss: 2.604591, ppl: 13.525691 +epoch: 2, batch: 35592, sum loss: 4272.158203, avg loss: 2.589187, ppl: 13.318937 +epoch: 2, batch: 35593, sum loss: 4776.551758, avg loss: 2.588917, ppl: 13.315340 +epoch: 2, batch: 35594, sum loss: 4768.750488, avg loss: 2.524484, ppl: 12.484453 +epoch: 2, batch: 35595, sum loss: 4467.120117, avg loss: 2.508209, ppl: 12.282911 +epoch: 2, batch: 35596, sum loss: 4235.362305, avg loss: 2.370096, ppl: 10.698424 +epoch: 2, batch: 35597, sum loss: 2939.402344, avg loss: 2.075849, ppl: 7.971312 +epoch: 2, batch: 35598, sum loss: 4871.982910, avg loss: 2.503588, ppl: 12.226289 +epoch: 2, batch: 35599, sum loss: 3926.146729, avg loss: 2.308140, ppl: 10.055706 +epoch: 2, batch: 35600, sum loss: 4643.394531, avg loss: 2.556935, ppl: 12.896234 +epoch: 2, batch: 35601, sum loss: 4137.289062, avg loss: 2.401212, ppl: 11.036550 +epoch: 2, batch: 35602, sum loss: 3983.869141, avg loss: 2.263562, ppl: 9.617285 +epoch: 2, batch: 35603, sum loss: 3509.821533, avg loss: 2.161220, ppl: 8.681724 +epoch: 2, batch: 35604, sum loss: 4433.300781, avg loss: 2.507523, ppl: 12.274489 +epoch: 2, batch: 35605, sum loss: 4095.524414, avg loss: 2.361894, ppl: 10.611032 +epoch: 2, batch: 35606, sum loss: 5194.614258, avg loss: 2.581816, ppl: 13.221131 +epoch: 2, batch: 35607, sum loss: 3655.481689, avg loss: 2.373689, ppl: 10.736933 +epoch: 2, batch: 35608, sum loss: 3651.518799, avg loss: 2.366506, ppl: 10.660082 +epoch: 2, batch: 35609, sum loss: 5129.670898, avg loss: 2.577724, ppl: 13.167135 +epoch: 2, batch: 35610, sum loss: 3713.272461, avg loss: 2.304949, ppl: 10.023665 +epoch: 2, batch: 35611, sum loss: 3333.555664, avg loss: 2.281695, ppl: 9.793262 +epoch: 2, batch: 35612, sum loss: 3666.679443, avg loss: 2.304638, ppl: 10.020554 +epoch: 2, batch: 35613, sum loss: 3680.902100, avg loss: 2.561519, ppl: 12.955478 +epoch: 2, batch: 35614, sum loss: 4881.476562, avg loss: 2.614610, ppl: 13.661883 +epoch: 2, batch: 35615, sum loss: 4127.959961, avg loss: 2.319079, ppl: 10.166304 +epoch: 2, batch: 35616, sum loss: 3825.337891, avg loss: 2.213737, ppl: 9.149848 +epoch: 2, batch: 35617, sum loss: 4875.007324, avg loss: 2.720428, ppl: 15.186824 +epoch: 2, batch: 35618, sum loss: 3617.951904, avg loss: 2.403955, ppl: 11.066859 +epoch: 2, batch: 35619, sum loss: 3819.730469, avg loss: 2.357858, ppl: 10.568295 +epoch: 2, batch: 35620, sum loss: 4200.415039, avg loss: 2.396130, ppl: 10.980595 +epoch: 2, batch: 35621, sum loss: 3986.169922, avg loss: 2.394096, ppl: 10.958289 +epoch: 2, batch: 35622, sum loss: 3838.355957, avg loss: 2.344750, ppl: 10.430667 +epoch: 2, batch: 35623, sum loss: 3561.940186, avg loss: 2.254393, ppl: 9.529504 +epoch: 2, batch: 35624, sum loss: 4261.455078, avg loss: 2.479032, ppl: 11.929706 +epoch: 2, batch: 35625, sum loss: 3739.342773, avg loss: 2.476386, ppl: 11.898188 +epoch: 2, batch: 35626, sum loss: 4223.972656, avg loss: 2.417844, ppl: 11.221635 +epoch: 2, batch: 35627, sum loss: 4855.293945, avg loss: 2.643056, ppl: 14.056096 +epoch: 2, batch: 35628, sum loss: 4551.310547, avg loss: 2.409376, ppl: 11.127012 +epoch: 2, batch: 35629, sum loss: 4705.247559, avg loss: 2.311025, ppl: 10.084760 +epoch: 2, batch: 35630, sum loss: 3529.081299, avg loss: 2.236427, ppl: 9.359825 +epoch: 2, batch: 35631, sum loss: 4011.578125, avg loss: 2.488572, ppl: 12.044066 +epoch: 2, batch: 35632, sum loss: 3770.258057, avg loss: 2.387750, ppl: 10.888970 +epoch: 2, batch: 35633, sum loss: 3881.162598, avg loss: 2.365120, ppl: 10.645321 +epoch: 2, batch: 35634, sum loss: 3630.165039, avg loss: 2.498393, ppl: 12.162933 +epoch: 2, batch: 35635, sum loss: 4093.855713, avg loss: 2.584505, ppl: 13.256723 +epoch: 2, batch: 35636, sum loss: 4612.206055, avg loss: 2.509361, ppl: 12.297073 +epoch: 2, batch: 35637, sum loss: 5091.577148, avg loss: 2.587184, ppl: 13.292284 +epoch: 2, batch: 35638, sum loss: 4497.562500, avg loss: 2.529563, ppl: 12.548018 +epoch: 2, batch: 35639, sum loss: 5058.957520, avg loss: 2.619864, ppl: 13.733859 +epoch: 2, batch: 35640, sum loss: 3816.578613, avg loss: 2.403387, ppl: 11.060575 +epoch: 2, batch: 35641, sum loss: 4522.413574, avg loss: 2.602079, ppl: 13.491760 +epoch: 2, batch: 35642, sum loss: 3059.091064, avg loss: 2.239452, ppl: 9.388182 +epoch: 2, batch: 35643, sum loss: 4456.212891, avg loss: 2.657253, ppl: 14.257068 +epoch: 2, batch: 35644, sum loss: 4296.961426, avg loss: 2.647542, ppl: 14.119297 +epoch: 2, batch: 35645, sum loss: 3932.184570, avg loss: 2.556687, ppl: 12.893030 +epoch: 2, batch: 35646, sum loss: 4508.170898, avg loss: 2.529838, ppl: 12.551471 +epoch: 2, batch: 35647, sum loss: 5422.453125, avg loss: 2.680402, ppl: 14.590955 +epoch: 2, batch: 35648, sum loss: 4395.582031, avg loss: 2.455632, ppl: 11.653802 +epoch: 2, batch: 35649, sum loss: 3799.355469, avg loss: 2.312450, ppl: 10.099136 +epoch: 2, batch: 35650, sum loss: 4001.671631, avg loss: 2.239324, ppl: 9.386982 +epoch: 2, batch: 35651, sum loss: 3999.662109, avg loss: 2.525039, ppl: 12.491385 +epoch: 2, batch: 35652, sum loss: 4021.635498, avg loss: 2.275968, ppl: 9.737341 +epoch: 2, batch: 35653, sum loss: 4757.288574, avg loss: 2.522423, ppl: 12.458742 +epoch: 2, batch: 35654, sum loss: 3163.284424, avg loss: 2.175574, ppl: 8.807238 +epoch: 2, batch: 35655, sum loss: 3612.413818, avg loss: 2.311205, ppl: 10.086574 +epoch: 2, batch: 35656, sum loss: 4356.553223, avg loss: 2.757312, ppl: 15.757435 +epoch: 2, batch: 35657, sum loss: 4247.679688, avg loss: 2.719385, ppl: 15.170992 +epoch: 2, batch: 35658, sum loss: 3959.510010, avg loss: 2.094979, ppl: 8.125269 +epoch: 2, batch: 35659, sum loss: 3662.664795, avg loss: 2.366063, ppl: 10.655355 +epoch: 2, batch: 35660, sum loss: 5080.026367, avg loss: 2.628053, ppl: 13.846786 +epoch: 2, batch: 35661, sum loss: 3435.789062, avg loss: 2.245614, ppl: 9.446212 +epoch: 2, batch: 35662, sum loss: 4404.486816, avg loss: 2.631115, ppl: 13.889254 +epoch: 2, batch: 35663, sum loss: 3840.120605, avg loss: 2.376312, ppl: 10.765131 +epoch: 2, batch: 35664, sum loss: 4523.238770, avg loss: 2.799034, ppl: 16.428766 +epoch: 2, batch: 35665, sum loss: 4566.854004, avg loss: 2.859646, ppl: 17.455339 +epoch: 2, batch: 35666, sum loss: 3947.889648, avg loss: 2.107790, ppl: 8.230029 +epoch: 2, batch: 35667, sum loss: 4077.068359, avg loss: 2.434071, ppl: 11.405216 +epoch: 2, batch: 35668, sum loss: 4641.355957, avg loss: 2.512916, ppl: 12.340868 +epoch: 2, batch: 35669, sum loss: 3498.631592, avg loss: 2.360750, ppl: 10.598900 +epoch: 2, batch: 35670, sum loss: 3428.176758, avg loss: 2.280889, ppl: 9.785378 +epoch: 2, batch: 35671, sum loss: 3737.854248, avg loss: 2.359756, ppl: 10.588372 +epoch: 2, batch: 35672, sum loss: 3891.127441, avg loss: 2.318908, ppl: 10.164568 +epoch: 2, batch: 35673, sum loss: 5130.982910, avg loss: 2.803816, ppl: 16.507517 +epoch: 2, batch: 35674, sum loss: 5056.009277, avg loss: 2.649900, ppl: 14.152623 +epoch: 2, batch: 35675, sum loss: 3820.287598, avg loss: 2.480706, ppl: 11.949700 +epoch: 2, batch: 35676, sum loss: 4272.204590, avg loss: 2.547528, ppl: 12.775484 +epoch: 2, batch: 35677, sum loss: 4830.362793, avg loss: 2.565248, ppl: 13.003886 +epoch: 2, batch: 35678, sum loss: 4057.582520, avg loss: 2.558375, ppl: 12.914812 +epoch: 2, batch: 35679, sum loss: 3666.254395, avg loss: 2.445800, ppl: 11.539782 +epoch: 2, batch: 35680, sum loss: 4442.419922, avg loss: 2.424902, ppl: 11.301119 +epoch: 2, batch: 35681, sum loss: 3923.983154, avg loss: 2.296070, ppl: 9.935060 +epoch: 2, batch: 35682, sum loss: 4866.094238, avg loss: 2.709407, ppl: 15.020360 +epoch: 2, batch: 35683, sum loss: 4385.339355, avg loss: 2.605668, ppl: 13.540268 +epoch: 2, batch: 35684, sum loss: 3287.931152, avg loss: 2.221575, ppl: 9.221847 +epoch: 2, batch: 35685, sum loss: 4301.473633, avg loss: 2.291675, ppl: 9.891491 +epoch: 2, batch: 35686, sum loss: 3784.212158, avg loss: 2.565567, ppl: 13.008039 +epoch: 2, batch: 35687, sum loss: 4958.331543, avg loss: 2.457052, ppl: 11.670362 +epoch: 2, batch: 35688, sum loss: 4369.389648, avg loss: 2.583909, ppl: 13.248824 +epoch: 2, batch: 35689, sum loss: 4270.939941, avg loss: 2.460219, ppl: 11.707374 +epoch: 2, batch: 35690, sum loss: 3559.138184, avg loss: 2.311129, ppl: 10.085801 +epoch: 2, batch: 35691, sum loss: 5043.552734, avg loss: 2.830277, ppl: 16.950151 +epoch: 2, batch: 35692, sum loss: 4328.834961, avg loss: 2.687048, ppl: 14.688258 +epoch: 2, batch: 35693, sum loss: 4579.147949, avg loss: 2.472542, ppl: 11.852538 +epoch: 2, batch: 35694, sum loss: 4105.906250, avg loss: 2.389934, ppl: 10.912772 +epoch: 2, batch: 35695, sum loss: 3761.915527, avg loss: 2.380959, ppl: 10.815272 +epoch: 2, batch: 35696, sum loss: 4083.622559, avg loss: 2.348259, ppl: 10.467333 +epoch: 2, batch: 35697, sum loss: 4040.857422, avg loss: 2.482099, ppl: 11.966356 +epoch: 2, batch: 35698, sum loss: 3647.529053, avg loss: 2.493185, ppl: 12.099751 +epoch: 2, batch: 35699, sum loss: 4154.963867, avg loss: 2.677168, ppl: 14.543841 +epoch: 2, batch: 35700, sum loss: 4105.819824, avg loss: 2.395461, ppl: 10.973254 +epoch: 2, batch: 35701, sum loss: 4318.855957, avg loss: 2.263551, ppl: 9.617182 +epoch: 2, batch: 35702, sum loss: 4679.123047, avg loss: 2.654069, ppl: 14.211744 +epoch: 2, batch: 35703, sum loss: 3605.483887, avg loss: 2.286293, ppl: 9.838399 +epoch: 2, batch: 35704, sum loss: 3887.903320, avg loss: 2.244748, ppl: 9.438036 +epoch: 2, batch: 35705, sum loss: 4067.287598, avg loss: 2.722415, ppl: 15.217023 +epoch: 2, batch: 35706, sum loss: 4240.560059, avg loss: 2.363746, ppl: 10.630699 +epoch: 2, batch: 35707, sum loss: 3758.558105, avg loss: 2.265556, ppl: 9.636484 +epoch: 2, batch: 35708, sum loss: 4479.719727, avg loss: 2.394292, ppl: 10.960434 +epoch: 2, batch: 35709, sum loss: 4712.820801, avg loss: 2.508154, ppl: 12.282235 +epoch: 2, batch: 35710, sum loss: 3856.311279, avg loss: 2.532049, ppl: 12.579260 +epoch: 2, batch: 35711, sum loss: 3531.844727, avg loss: 2.405889, ppl: 11.088281 +epoch: 2, batch: 35712, sum loss: 4462.249512, avg loss: 2.564511, ppl: 12.994307 +epoch: 2, batch: 35713, sum loss: 3853.110596, avg loss: 2.294884, ppl: 9.923286 +epoch: 2, batch: 35714, sum loss: 3976.885498, avg loss: 2.490223, ppl: 12.063962 +epoch: 2, batch: 35715, sum loss: 3928.302734, avg loss: 2.408524, ppl: 11.117540 +epoch: 2, batch: 35716, sum loss: 4540.848145, avg loss: 2.461164, ppl: 11.718447 +epoch: 2, batch: 35717, sum loss: 4266.203613, avg loss: 2.400790, ppl: 11.031885 +epoch: 2, batch: 35718, sum loss: 3212.265625, avg loss: 2.213829, ppl: 9.150685 +epoch: 2, batch: 35719, sum loss: 4440.887207, avg loss: 2.482329, ppl: 11.969112 +epoch: 2, batch: 35720, sum loss: 4599.371094, avg loss: 2.586823, ppl: 13.287490 +epoch: 2, batch: 35721, sum loss: 3857.588135, avg loss: 2.385645, ppl: 10.866071 +epoch: 2, batch: 35722, sum loss: 3963.217529, avg loss: 2.342327, ppl: 10.405423 +epoch: 2, batch: 35723, sum loss: 5104.346680, avg loss: 2.778632, ppl: 16.096985 +epoch: 2, batch: 35724, sum loss: 3783.535400, avg loss: 2.387089, ppl: 10.881766 +epoch: 2, batch: 35725, sum loss: 4547.361328, avg loss: 2.890884, ppl: 18.009230 +epoch: 2, batch: 35726, sum loss: 3969.259277, avg loss: 2.565778, ppl: 13.010783 +epoch: 2, batch: 35727, sum loss: 3654.047119, avg loss: 2.251415, ppl: 9.501173 +epoch: 2, batch: 35728, sum loss: 4378.962402, avg loss: 2.392876, ppl: 10.944922 +epoch: 2, batch: 35729, sum loss: 3109.446533, avg loss: 2.141492, ppl: 8.512130 +epoch: 2, batch: 35730, sum loss: 3867.328125, avg loss: 2.322720, ppl: 10.203385 +epoch: 2, batch: 35731, sum loss: 3967.301758, avg loss: 2.400062, ppl: 11.023855 +epoch: 2, batch: 35732, sum loss: 4112.830078, avg loss: 2.462773, ppl: 11.737309 +epoch: 2, batch: 35733, sum loss: 4045.423340, avg loss: 2.504906, ppl: 12.242411 +epoch: 2, batch: 35734, sum loss: 4988.892578, avg loss: 2.832988, ppl: 16.996174 +epoch: 2, batch: 35735, sum loss: 5038.819336, avg loss: 2.780805, ppl: 16.132008 +epoch: 2, batch: 35736, sum loss: 4596.988281, avg loss: 2.545398, ppl: 12.748298 +epoch: 2, batch: 35737, sum loss: 3531.100098, avg loss: 2.511451, ppl: 12.322798 +epoch: 2, batch: 35738, sum loss: 4210.987305, avg loss: 2.435505, ppl: 11.421581 +epoch: 2, batch: 35739, sum loss: 3515.790283, avg loss: 2.414691, ppl: 11.186316 +epoch: 2, batch: 35740, sum loss: 3495.927246, avg loss: 2.143426, ppl: 8.528605 +epoch: 2, batch: 35741, sum loss: 4399.514160, avg loss: 2.434706, ppl: 11.412462 +epoch: 2, batch: 35742, sum loss: 4332.064453, avg loss: 2.378948, ppl: 10.793545 +epoch: 2, batch: 35743, sum loss: 4239.674316, avg loss: 2.323109, ppl: 10.207361 +epoch: 2, batch: 35744, sum loss: 3665.000488, avg loss: 2.287766, ppl: 9.852897 +epoch: 2, batch: 35745, sum loss: 4638.166504, avg loss: 2.469737, ppl: 11.819342 +epoch: 2, batch: 35746, sum loss: 3813.419922, avg loss: 2.201744, ppl: 9.040763 +epoch: 2, batch: 35747, sum loss: 5001.904297, avg loss: 2.671957, ppl: 14.468263 +epoch: 2, batch: 35748, sum loss: 3560.194336, avg loss: 2.313317, ppl: 10.107893 +epoch: 2, batch: 35749, sum loss: 4951.144531, avg loss: 2.390702, ppl: 10.921164 +epoch: 2, batch: 35750, sum loss: 4298.357910, avg loss: 2.730850, ppl: 15.345925 +epoch: 2, batch: 35751, sum loss: 3909.385254, avg loss: 2.231385, ppl: 9.312751 +epoch: 2, batch: 35752, sum loss: 4407.684570, avg loss: 2.491625, ppl: 12.080893 +epoch: 2, batch: 35753, sum loss: 3832.963379, avg loss: 2.286971, ppl: 9.845073 +epoch: 2, batch: 35754, sum loss: 3348.921631, avg loss: 2.246091, ppl: 9.450722 +epoch: 2, batch: 35755, sum loss: 5186.390137, avg loss: 2.894191, ppl: 18.068878 +epoch: 2, batch: 35756, sum loss: 4562.362793, avg loss: 2.649456, ppl: 14.146338 +epoch: 2, batch: 35757, sum loss: 4158.971191, avg loss: 2.412396, ppl: 11.160675 +epoch: 2, batch: 35758, sum loss: 4208.491211, avg loss: 2.490232, ppl: 12.064069 +epoch: 2, batch: 35759, sum loss: 4710.630371, avg loss: 2.454732, ppl: 11.643310 +epoch: 2, batch: 35760, sum loss: 3535.390381, avg loss: 2.285320, ppl: 9.828834 +epoch: 2, batch: 35761, sum loss: 4124.271973, avg loss: 2.477040, ppl: 11.905974 +epoch: 2, batch: 35762, sum loss: 4321.950684, avg loss: 2.545319, ppl: 12.747289 +epoch: 2, batch: 35763, sum loss: 3906.631592, avg loss: 2.480401, ppl: 11.946052 +epoch: 2, batch: 35764, sum loss: 3653.051514, avg loss: 2.254970, ppl: 9.535008 +epoch: 2, batch: 35765, sum loss: 3559.104248, avg loss: 2.107226, ppl: 8.225389 +epoch: 2, batch: 35766, sum loss: 3437.118896, avg loss: 2.328671, ppl: 10.264296 +epoch: 2, batch: 35767, sum loss: 4052.365723, avg loss: 2.161262, ppl: 8.682086 +epoch: 2, batch: 35768, sum loss: 5050.723633, avg loss: 2.678008, ppl: 14.556073 +epoch: 2, batch: 35769, sum loss: 3441.122314, avg loss: 2.361786, ppl: 10.609885 +epoch: 2, batch: 35770, sum loss: 4037.751465, avg loss: 2.362640, ppl: 10.618947 +epoch: 2, batch: 35771, sum loss: 4107.316406, avg loss: 2.636275, ppl: 13.961102 +epoch: 2, batch: 35772, sum loss: 4663.792969, avg loss: 2.538810, ppl: 12.664585 +epoch: 2, batch: 35773, sum loss: 4633.300781, avg loss: 2.512636, ppl: 12.337408 +epoch: 2, batch: 35774, sum loss: 3810.291992, avg loss: 2.139412, ppl: 8.494439 +epoch: 2, batch: 35775, sum loss: 4748.357422, avg loss: 2.722682, ppl: 15.221090 +epoch: 2, batch: 35776, sum loss: 4374.203125, avg loss: 2.509583, ppl: 12.299800 +epoch: 2, batch: 35777, sum loss: 4242.074707, avg loss: 2.511590, ppl: 12.324505 +epoch: 2, batch: 35778, sum loss: 3907.250244, avg loss: 2.275626, ppl: 9.734013 +epoch: 2, batch: 35779, sum loss: 4712.151367, avg loss: 2.580587, ppl: 13.204886 +epoch: 2, batch: 35780, sum loss: 6204.492188, avg loss: 2.914275, ppl: 18.435450 +epoch: 2, batch: 35781, sum loss: 4549.693359, avg loss: 2.587994, ppl: 13.303057 +epoch: 2, batch: 35782, sum loss: 4696.164062, avg loss: 2.408289, ppl: 11.114929 +epoch: 2, batch: 35783, sum loss: 4596.270020, avg loss: 2.664505, ppl: 14.360832 +epoch: 2, batch: 35784, sum loss: 3805.825928, avg loss: 2.528788, ppl: 12.538301 +epoch: 2, batch: 35785, sum loss: 4519.134766, avg loss: 2.638140, ppl: 13.987169 +epoch: 2, batch: 35786, sum loss: 3583.801270, avg loss: 2.310639, ppl: 10.080866 +epoch: 2, batch: 35787, sum loss: 4345.369141, avg loss: 2.383636, ppl: 10.844266 +epoch: 2, batch: 35788, sum loss: 4651.049805, avg loss: 2.601258, ppl: 13.480690 +epoch: 2, batch: 35789, sum loss: 4202.515137, avg loss: 2.488168, ppl: 12.039197 +epoch: 2, batch: 35790, sum loss: 4189.055176, avg loss: 2.427031, ppl: 11.325206 +epoch: 2, batch: 35791, sum loss: 4202.455078, avg loss: 2.467678, ppl: 11.795022 +epoch: 2, batch: 35792, sum loss: 4324.072266, avg loss: 2.399596, ppl: 11.018726 +epoch: 2, batch: 35793, sum loss: 4593.029297, avg loss: 2.550266, ppl: 12.810512 +epoch: 2, batch: 35794, sum loss: 3829.878662, avg loss: 2.274275, ppl: 9.720867 +epoch: 2, batch: 35795, sum loss: 5891.781250, avg loss: 2.726414, ppl: 15.278005 +epoch: 2, batch: 35796, sum loss: 3263.216309, avg loss: 2.139814, ppl: 8.497856 +epoch: 2, batch: 35797, sum loss: 4305.375488, avg loss: 2.474354, ppl: 11.874031 +epoch: 2, batch: 35798, sum loss: 4359.752930, avg loss: 2.557040, ppl: 12.897584 +epoch: 2, batch: 35799, sum loss: 3837.303711, avg loss: 2.345540, ppl: 10.438911 +epoch: 2, batch: 35800, sum loss: 3986.294922, avg loss: 2.377039, ppl: 10.772959 +epoch: 2, batch: 35801, sum loss: 3725.104980, avg loss: 2.318049, ppl: 10.155843 +epoch: 2, batch: 35802, sum loss: 4265.779297, avg loss: 2.488786, ppl: 12.046642 +epoch: 2, batch: 35803, sum loss: 4814.212402, avg loss: 2.662728, ppl: 14.335343 +epoch: 2, batch: 35804, sum loss: 3496.162109, avg loss: 2.236828, ppl: 9.363584 +epoch: 2, batch: 35805, sum loss: 4012.415527, avg loss: 2.560572, ppl: 12.943214 +epoch: 2, batch: 35806, sum loss: 4039.380615, avg loss: 2.530940, ppl: 12.565315 +epoch: 2, batch: 35807, sum loss: 3274.944580, avg loss: 2.206836, ppl: 9.086920 +epoch: 2, batch: 35808, sum loss: 3865.516357, avg loss: 2.476308, ppl: 11.897257 +epoch: 2, batch: 35809, sum loss: 4424.915527, avg loss: 2.439314, ppl: 11.465175 +epoch: 2, batch: 35810, sum loss: 3783.911133, avg loss: 2.452308, ppl: 11.615123 +epoch: 2, batch: 35811, sum loss: 3511.309082, avg loss: 2.499152, ppl: 12.172173 +epoch: 2, batch: 35812, sum loss: 4124.473633, avg loss: 2.268687, ppl: 9.666702 +epoch: 2, batch: 35813, sum loss: 3927.963623, avg loss: 2.451912, ppl: 11.610530 +epoch: 2, batch: 35814, sum loss: 4288.204590, avg loss: 2.485916, ppl: 12.012117 +epoch: 2, batch: 35815, sum loss: 3864.969482, avg loss: 2.336741, ppl: 10.347459 +epoch: 2, batch: 35816, sum loss: 3985.705566, avg loss: 2.172047, ppl: 8.776228 +epoch: 2, batch: 35817, sum loss: 4953.218750, avg loss: 2.566435, ppl: 13.019322 +epoch: 2, batch: 35818, sum loss: 3947.166016, avg loss: 2.485621, ppl: 12.008575 +epoch: 2, batch: 35819, sum loss: 3710.754639, avg loss: 2.310557, ppl: 10.080036 +epoch: 2, batch: 35820, sum loss: 3789.949219, avg loss: 2.338032, ppl: 10.360822 +epoch: 2, batch: 35821, sum loss: 4555.471680, avg loss: 2.410302, ppl: 11.137328 +epoch: 2, batch: 35822, sum loss: 3934.389648, avg loss: 2.603832, ppl: 13.515424 +epoch: 2, batch: 35823, sum loss: 3382.492432, avg loss: 2.119356, ppl: 8.325775 +epoch: 2, batch: 35824, sum loss: 4170.454590, avg loss: 2.460445, ppl: 11.710023 +epoch: 2, batch: 35825, sum loss: 4082.803467, avg loss: 2.331698, ppl: 10.295410 +epoch: 2, batch: 35826, sum loss: 5092.416504, avg loss: 2.633101, ppl: 13.916852 +epoch: 2, batch: 35827, sum loss: 3592.929932, avg loss: 2.439192, ppl: 11.463778 +epoch: 2, batch: 35828, sum loss: 3262.022217, avg loss: 2.213041, ppl: 9.143477 +epoch: 2, batch: 35829, sum loss: 3823.845215, avg loss: 2.284256, ppl: 9.818383 +epoch: 2, batch: 35830, sum loss: 4431.451172, avg loss: 2.667942, ppl: 14.410279 +epoch: 2, batch: 35831, sum loss: 4566.391602, avg loss: 2.703607, ppl: 14.933494 +epoch: 2, batch: 35832, sum loss: 3205.590088, avg loss: 2.404794, ppl: 11.076145 +epoch: 2, batch: 35833, sum loss: 4680.276855, avg loss: 2.446564, ppl: 11.548597 +epoch: 2, batch: 35834, sum loss: 4317.295898, avg loss: 2.814404, ppl: 16.683229 +epoch: 2, batch: 35835, sum loss: 4025.274414, avg loss: 2.401715, ppl: 11.042098 +epoch: 2, batch: 35836, sum loss: 4097.758301, avg loss: 2.598452, ppl: 13.442910 +epoch: 2, batch: 35837, sum loss: 4433.820801, avg loss: 2.489512, ppl: 12.055391 +epoch: 2, batch: 35838, sum loss: 3908.030029, avg loss: 2.495549, ppl: 12.128392 +epoch: 2, batch: 35839, sum loss: 3877.677979, avg loss: 2.354388, ppl: 10.531687 +epoch: 2, batch: 35840, sum loss: 3992.667725, avg loss: 2.478379, ppl: 11.921918 +epoch: 2, batch: 35841, sum loss: 4558.704102, avg loss: 2.683169, ppl: 14.631389 +epoch: 2, batch: 35842, sum loss: 3912.573730, avg loss: 2.553899, ppl: 12.857140 +epoch: 2, batch: 35843, sum loss: 4250.221680, avg loss: 2.626837, ppl: 13.829950 +epoch: 2, batch: 35844, sum loss: 4241.689941, avg loss: 2.666053, ppl: 14.383084 +epoch: 2, batch: 35845, sum loss: 3898.994385, avg loss: 2.337527, ppl: 10.355591 +epoch: 2, batch: 35846, sum loss: 3759.633301, avg loss: 2.323630, ppl: 10.212678 +epoch: 2, batch: 35847, sum loss: 4310.862305, avg loss: 2.570580, ppl: 13.073401 +epoch: 2, batch: 35848, sum loss: 4188.518555, avg loss: 2.476948, ppl: 11.904873 +epoch: 2, batch: 35849, sum loss: 3231.461914, avg loss: 2.228595, ppl: 9.286805 +epoch: 2, batch: 35850, sum loss: 5103.933105, avg loss: 2.604047, ppl: 13.518340 +epoch: 2, batch: 35851, sum loss: 4180.941895, avg loss: 2.418127, ppl: 11.224819 +epoch: 2, batch: 35852, sum loss: 4527.614258, avg loss: 2.543604, ppl: 12.725447 +epoch: 2, batch: 35853, sum loss: 4362.721191, avg loss: 2.691376, ppl: 14.751967 +epoch: 2, batch: 35854, sum loss: 3673.364014, avg loss: 2.284430, ppl: 9.820089 +epoch: 2, batch: 35855, sum loss: 3935.819580, avg loss: 2.330266, ppl: 10.280678 +epoch: 2, batch: 35856, sum loss: 5044.659180, avg loss: 2.753635, ppl: 15.699595 +epoch: 2, batch: 35857, sum loss: 4159.959473, avg loss: 2.389408, ppl: 10.907036 +epoch: 2, batch: 35858, sum loss: 3878.501709, avg loss: 2.451644, ppl: 11.607418 +epoch: 2, batch: 35859, sum loss: 4111.511230, avg loss: 2.380725, ppl: 10.812736 +epoch: 2, batch: 35860, sum loss: 4670.700684, avg loss: 2.629899, ppl: 13.872369 +epoch: 2, batch: 35861, sum loss: 4140.662598, avg loss: 2.498891, ppl: 12.168992 +epoch: 2, batch: 35862, sum loss: 3703.737549, avg loss: 2.196760, ppl: 8.995821 +epoch: 2, batch: 35863, sum loss: 3862.879883, avg loss: 2.355415, ppl: 10.542500 +epoch: 2, batch: 35864, sum loss: 3926.970703, avg loss: 2.391578, ppl: 10.930729 +epoch: 2, batch: 35865, sum loss: 4669.098145, avg loss: 2.627517, ppl: 13.839367 +epoch: 2, batch: 35866, sum loss: 4399.250977, avg loss: 2.505268, ppl: 12.246845 +epoch: 2, batch: 35867, sum loss: 4270.802734, avg loss: 2.546692, ppl: 12.764810 +epoch: 2, batch: 35868, sum loss: 3974.422852, avg loss: 2.479365, ppl: 11.933688 +epoch: 2, batch: 35869, sum loss: 4654.722168, avg loss: 2.626818, ppl: 13.829700 +epoch: 2, batch: 35870, sum loss: 3549.326172, avg loss: 2.407955, ppl: 11.111220 +epoch: 2, batch: 35871, sum loss: 4057.316650, avg loss: 2.632911, ppl: 13.914222 +epoch: 2, batch: 35872, sum loss: 4473.966797, avg loss: 2.507829, ppl: 12.278244 +epoch: 2, batch: 35873, sum loss: 3826.022949, avg loss: 2.330099, ppl: 10.278963 +epoch: 2, batch: 35874, sum loss: 3981.750977, avg loss: 2.494831, ppl: 12.119689 +epoch: 2, batch: 35875, sum loss: 4468.251465, avg loss: 2.468647, ppl: 11.806468 +epoch: 2, batch: 35876, sum loss: 4562.279297, avg loss: 2.626528, ppl: 13.825687 +epoch: 2, batch: 35877, sum loss: 4038.854980, avg loss: 2.459717, ppl: 11.701497 +epoch: 2, batch: 35878, sum loss: 4301.722656, avg loss: 2.668562, ppl: 14.419226 +epoch: 2, batch: 35879, sum loss: 3463.721191, avg loss: 2.290821, ppl: 9.883049 +epoch: 2, batch: 35880, sum loss: 3566.101807, avg loss: 2.315650, ppl: 10.131511 +epoch: 2, batch: 35881, sum loss: 4270.588379, avg loss: 2.217336, ppl: 9.182833 +epoch: 2, batch: 35882, sum loss: 4335.024902, avg loss: 2.481411, ppl: 11.958125 +epoch: 2, batch: 35883, sum loss: 4167.691895, avg loss: 2.423077, ppl: 11.280512 +epoch: 2, batch: 35884, sum loss: 4158.758789, avg loss: 2.609008, ppl: 13.585568 +epoch: 2, batch: 35885, sum loss: 4929.385742, avg loss: 2.470870, ppl: 11.832737 +epoch: 2, batch: 35886, sum loss: 3691.044189, avg loss: 2.449266, ppl: 11.579846 +epoch: 2, batch: 35887, sum loss: 4305.851074, avg loss: 2.386835, ppl: 10.879011 +epoch: 2, batch: 35888, sum loss: 4789.216797, avg loss: 2.555612, ppl: 12.879177 +epoch: 2, batch: 35889, sum loss: 4232.569824, avg loss: 2.492680, ppl: 12.093638 +epoch: 2, batch: 35890, sum loss: 4349.147949, avg loss: 2.647077, ppl: 14.112728 +epoch: 2, batch: 35891, sum loss: 4590.011719, avg loss: 2.381947, ppl: 10.825961 +epoch: 2, batch: 35892, sum loss: 3682.708984, avg loss: 2.438880, ppl: 11.460200 +epoch: 2, batch: 35893, sum loss: 4182.670898, avg loss: 2.401074, ppl: 11.035021 +epoch: 2, batch: 35894, sum loss: 4190.209473, avg loss: 2.398517, ppl: 11.006843 +epoch: 2, batch: 35895, sum loss: 4465.077637, avg loss: 2.421409, ppl: 11.261712 +epoch: 2, batch: 35896, sum loss: 3967.001465, avg loss: 2.366946, ppl: 10.664772 +epoch: 2, batch: 35897, sum loss: 3827.294922, avg loss: 2.339422, ppl: 10.375243 +epoch: 2, batch: 35898, sum loss: 3883.030029, avg loss: 2.258889, ppl: 9.572448 +epoch: 2, batch: 35899, sum loss: 4224.985840, avg loss: 2.523887, ppl: 12.476996 +epoch: 2, batch: 35900, sum loss: 4398.587891, avg loss: 2.569268, ppl: 13.056270 +epoch: 2, batch: 35901, sum loss: 4594.339844, avg loss: 2.559521, ppl: 12.929622 +epoch: 2, batch: 35902, sum loss: 3472.255859, avg loss: 2.370141, ppl: 10.698896 +epoch: 2, batch: 35903, sum loss: 3617.320312, avg loss: 2.381383, ppl: 10.819853 +epoch: 2, batch: 35904, sum loss: 4310.060547, avg loss: 2.342424, ppl: 10.406433 +epoch: 2, batch: 35905, sum loss: 3135.162842, avg loss: 2.108381, ppl: 8.234899 +epoch: 2, batch: 35906, sum loss: 4136.728027, avg loss: 2.480053, ppl: 11.941896 +epoch: 2, batch: 35907, sum loss: 4045.065430, avg loss: 2.429469, ppl: 11.352851 +epoch: 2, batch: 35908, sum loss: 3964.925293, avg loss: 2.495233, ppl: 12.124559 +epoch: 2, batch: 35909, sum loss: 4946.016113, avg loss: 2.750843, ppl: 15.655825 +epoch: 2, batch: 35910, sum loss: 4068.819336, avg loss: 2.337059, ppl: 10.350748 +epoch: 2, batch: 35911, sum loss: 4670.430664, avg loss: 2.682614, ppl: 14.623266 +epoch: 2, batch: 35912, sum loss: 4727.536133, avg loss: 2.467399, ppl: 11.791736 +epoch: 2, batch: 35913, sum loss: 2995.834473, avg loss: 2.042150, ppl: 7.707164 +epoch: 2, batch: 35914, sum loss: 4057.179199, avg loss: 2.371233, ppl: 10.710590 +epoch: 2, batch: 35915, sum loss: 5542.825684, avg loss: 2.750782, ppl: 15.654869 +epoch: 2, batch: 35916, sum loss: 4634.828613, avg loss: 2.793748, ppl: 16.342161 +epoch: 2, batch: 35917, sum loss: 3857.418945, avg loss: 2.471120, ppl: 11.835700 +epoch: 2, batch: 35918, sum loss: 4946.610840, avg loss: 2.650917, ppl: 14.167021 +epoch: 2, batch: 35919, sum loss: 4625.629883, avg loss: 2.738680, ppl: 15.466550 +epoch: 2, batch: 35920, sum loss: 4983.475586, avg loss: 2.570127, ppl: 13.067481 +epoch: 2, batch: 35921, sum loss: 4271.025879, avg loss: 2.437800, ppl: 11.447832 +epoch: 2, batch: 35922, sum loss: 4568.066406, avg loss: 2.533592, ppl: 12.598680 +epoch: 2, batch: 35923, sum loss: 4665.940430, avg loss: 2.709605, ppl: 15.023343 +epoch: 2, batch: 35924, sum loss: 4601.543945, avg loss: 2.392899, ppl: 10.945173 +epoch: 2, batch: 35925, sum loss: 3885.605713, avg loss: 2.344964, ppl: 10.432899 +epoch: 2, batch: 35926, sum loss: 4161.162598, avg loss: 2.574977, ppl: 13.131014 +epoch: 2, batch: 35927, sum loss: 4404.690918, avg loss: 2.452501, ppl: 11.617360 +epoch: 2, batch: 35928, sum loss: 3095.603516, avg loss: 2.259565, ppl: 9.578918 +epoch: 2, batch: 35929, sum loss: 6221.058594, avg loss: 2.940009, ppl: 18.916014 +epoch: 2, batch: 35930, sum loss: 4021.724365, avg loss: 2.431514, ppl: 11.376096 +epoch: 2, batch: 35931, sum loss: 5782.588379, avg loss: 2.692080, ppl: 14.762354 +epoch: 2, batch: 35932, sum loss: 3870.760986, avg loss: 2.283635, ppl: 9.812283 +epoch: 2, batch: 35933, sum loss: 4720.221680, avg loss: 2.525533, ppl: 12.497558 +epoch: 2, batch: 35934, sum loss: 3296.430664, avg loss: 2.245525, ppl: 9.445372 +epoch: 2, batch: 35935, sum loss: 5344.412109, avg loss: 2.764828, ppl: 15.876308 +epoch: 2, batch: 35936, sum loss: 4377.439941, avg loss: 2.713850, ppl: 15.087250 +epoch: 2, batch: 35937, sum loss: 3791.492188, avg loss: 2.593360, ppl: 13.374634 +epoch: 2, batch: 35938, sum loss: 3853.200195, avg loss: 2.144241, ppl: 8.535557 +epoch: 2, batch: 35939, sum loss: 4046.929199, avg loss: 2.515183, ppl: 12.368869 +epoch: 2, batch: 35940, sum loss: 4510.530273, avg loss: 2.500294, ppl: 12.186076 +epoch: 2, batch: 35941, sum loss: 3608.207031, avg loss: 2.471375, ppl: 11.838711 +epoch: 2, batch: 35942, sum loss: 3875.337891, avg loss: 2.514820, ppl: 12.364385 +epoch: 2, batch: 35943, sum loss: 4523.081543, avg loss: 2.376816, ppl: 10.770557 +epoch: 2, batch: 35944, sum loss: 3695.530273, avg loss: 2.343393, ppl: 10.416518 +epoch: 2, batch: 35945, sum loss: 4036.535156, avg loss: 2.529157, ppl: 12.542933 +epoch: 2, batch: 35946, sum loss: 3496.035645, avg loss: 2.329138, ppl: 10.269084 +epoch: 2, batch: 35947, sum loss: 4794.380859, avg loss: 2.378165, ppl: 10.785097 +epoch: 2, batch: 35948, sum loss: 3456.598633, avg loss: 2.280078, ppl: 9.777447 +epoch: 2, batch: 35949, sum loss: 3701.268066, avg loss: 2.178498, ppl: 8.833030 +epoch: 2, batch: 35950, sum loss: 3855.746094, avg loss: 2.319944, ppl: 10.175101 +epoch: 2, batch: 35951, sum loss: 4621.858398, avg loss: 2.511879, ppl: 12.328078 +epoch: 2, batch: 35952, sum loss: 4391.290039, avg loss: 2.486574, ppl: 12.020027 +epoch: 2, batch: 35953, sum loss: 4157.351074, avg loss: 2.439760, ppl: 11.470287 +epoch: 2, batch: 35954, sum loss: 3736.611816, avg loss: 2.355997, ppl: 10.548644 +epoch: 2, batch: 35955, sum loss: 3909.920410, avg loss: 2.416514, ppl: 11.206729 +epoch: 2, batch: 35956, sum loss: 3765.056641, avg loss: 2.266741, ppl: 9.647905 +epoch: 2, batch: 35957, sum loss: 4413.624512, avg loss: 2.514886, ppl: 12.365195 +epoch: 2, batch: 35958, sum loss: 4552.111328, avg loss: 2.541659, ppl: 12.700723 +epoch: 2, batch: 35959, sum loss: 4261.113281, avg loss: 2.447509, ppl: 11.559517 +epoch: 2, batch: 35960, sum loss: 4741.980957, avg loss: 2.658061, ppl: 14.268600 +epoch: 2, batch: 35961, sum loss: 4260.712891, avg loss: 2.563606, ppl: 12.982545 +epoch: 2, batch: 35962, sum loss: 4988.501953, avg loss: 2.519445, ppl: 12.421703 +epoch: 2, batch: 35963, sum loss: 4196.028809, avg loss: 2.539969, ppl: 12.679280 +epoch: 2, batch: 35964, sum loss: 4260.651855, avg loss: 2.469943, ppl: 11.821776 +epoch: 2, batch: 35965, sum loss: 3666.446777, avg loss: 2.310301, ppl: 10.077454 +epoch: 2, batch: 35966, sum loss: 4669.077148, avg loss: 2.598262, ppl: 13.440363 +epoch: 2, batch: 35967, sum loss: 3816.016602, avg loss: 2.271439, ppl: 9.693336 +epoch: 2, batch: 35968, sum loss: 3635.858154, avg loss: 2.396742, ppl: 10.987325 +epoch: 2, batch: 35969, sum loss: 3512.628662, avg loss: 2.243058, ppl: 9.422097 +epoch: 2, batch: 35970, sum loss: 3380.064697, avg loss: 2.216436, ppl: 9.174574 +epoch: 2, batch: 35971, sum loss: 4176.376953, avg loss: 2.496340, ppl: 12.137988 +epoch: 2, batch: 35972, sum loss: 3385.918457, avg loss: 2.301780, ppl: 9.991952 +epoch: 2, batch: 35973, sum loss: 3402.487549, avg loss: 2.195153, ppl: 8.981378 +epoch: 2, batch: 35974, sum loss: 4266.528320, avg loss: 2.489223, ppl: 12.051908 +epoch: 2, batch: 35975, sum loss: 3441.055420, avg loss: 2.207220, ppl: 9.090406 +epoch: 2, batch: 35976, sum loss: 3970.325928, avg loss: 2.367517, ppl: 10.670863 +epoch: 2, batch: 35977, sum loss: 3655.009277, avg loss: 2.359593, ppl: 10.586641 +epoch: 2, batch: 35978, sum loss: 5020.106934, avg loss: 2.565205, ppl: 13.003328 +epoch: 2, batch: 35979, sum loss: 3817.509521, avg loss: 2.334868, ppl: 10.328098 +epoch: 2, batch: 35980, sum loss: 4787.541992, avg loss: 2.624749, ppl: 13.801109 +epoch: 2, batch: 35981, sum loss: 4774.085449, avg loss: 2.520637, ppl: 12.436511 +epoch: 2, batch: 35982, sum loss: 3825.863281, avg loss: 2.314497, ppl: 10.119829 +epoch: 2, batch: 35983, sum loss: 4621.258789, avg loss: 2.734473, ppl: 15.401621 +epoch: 2, batch: 35984, sum loss: 4548.963867, avg loss: 2.510466, ppl: 12.310661 +epoch: 2, batch: 35985, sum loss: 4475.653809, avg loss: 2.602124, ppl: 13.492369 +epoch: 2, batch: 35986, sum loss: 3904.142090, avg loss: 2.337810, ppl: 10.358522 +epoch: 2, batch: 35987, sum loss: 4169.476562, avg loss: 2.492216, ppl: 12.088029 +epoch: 2, batch: 35988, sum loss: 4541.062500, avg loss: 2.521412, ppl: 12.446154 +epoch: 2, batch: 35989, sum loss: 3115.740234, avg loss: 2.209745, ppl: 9.113392 +epoch: 2, batch: 35990, sum loss: 4280.830566, avg loss: 2.509279, ppl: 12.296064 +epoch: 2, batch: 35991, sum loss: 4758.945801, avg loss: 2.639460, ppl: 14.005633 +epoch: 2, batch: 35992, sum loss: 3942.622803, avg loss: 2.393821, ppl: 10.955272 +epoch: 2, batch: 35993, sum loss: 3629.170166, avg loss: 2.080946, ppl: 8.012047 +epoch: 2, batch: 35994, sum loss: 4208.806152, avg loss: 2.245894, ppl: 9.448863 +epoch: 2, batch: 35995, sum loss: 3093.868164, avg loss: 2.141085, ppl: 8.508666 +epoch: 2, batch: 35996, sum loss: 4225.741211, avg loss: 2.435586, ppl: 11.422507 +epoch: 2, batch: 35997, sum loss: 4117.180664, avg loss: 2.466855, ppl: 11.785321 +epoch: 2, batch: 35998, sum loss: 4807.517578, avg loss: 2.660496, ppl: 14.303389 +epoch: 2, batch: 35999, sum loss: 4372.314941, avg loss: 2.656327, ppl: 14.243882 +epoch: 2, batch: 36000, sum loss: 3865.368164, avg loss: 2.495396, ppl: 12.126534 +epoch: 2, batch: 36001, sum loss: 4106.279297, avg loss: 2.499257, ppl: 12.173444 +epoch: 2, batch: 36002, sum loss: 3846.546143, avg loss: 2.325602, ppl: 10.232841 +epoch: 2, batch: 36003, sum loss: 4026.227783, avg loss: 2.383794, ppl: 10.845973 +epoch: 2, batch: 36004, sum loss: 3987.055176, avg loss: 2.266660, ppl: 9.647127 +epoch: 2, batch: 36005, sum loss: 4463.855469, avg loss: 2.497961, ppl: 12.157674 +epoch: 2, batch: 36006, sum loss: 3715.151855, avg loss: 2.434569, ppl: 11.410898 +epoch: 2, batch: 36007, sum loss: 3571.973877, avg loss: 2.333099, ppl: 10.309838 +epoch: 2, batch: 36008, sum loss: 4861.776367, avg loss: 2.633682, ppl: 13.924944 +epoch: 2, batch: 36009, sum loss: 3253.394775, avg loss: 2.154566, ppl: 8.624146 +epoch: 2, batch: 36010, sum loss: 4933.780762, avg loss: 2.757843, ppl: 15.765796 +epoch: 2, batch: 36011, sum loss: 4494.827148, avg loss: 2.480589, ppl: 11.948298 +epoch: 2, batch: 36012, sum loss: 4565.139648, avg loss: 2.446484, ppl: 11.547675 +epoch: 2, batch: 36013, sum loss: 4066.834473, avg loss: 2.530700, ppl: 12.562293 +epoch: 2, batch: 36014, sum loss: 3507.482910, avg loss: 2.193548, ppl: 8.966969 +epoch: 2, batch: 36015, sum loss: 4786.250488, avg loss: 2.659028, ppl: 14.282401 +epoch: 2, batch: 36016, sum loss: 4532.476562, avg loss: 2.681939, ppl: 14.613400 +epoch: 2, batch: 36017, sum loss: 3863.227295, avg loss: 2.370078, ppl: 10.698228 +epoch: 2, batch: 36018, sum loss: 4202.637207, avg loss: 2.341302, ppl: 10.394763 +epoch: 2, batch: 36019, sum loss: 5321.405273, avg loss: 2.734535, ppl: 15.402583 +epoch: 2, batch: 36020, sum loss: 3763.526367, avg loss: 2.252260, ppl: 9.509202 +epoch: 2, batch: 36021, sum loss: 3611.991211, avg loss: 2.193073, ppl: 8.962711 +epoch: 2, batch: 36022, sum loss: 3369.177979, avg loss: 2.354422, ppl: 10.532041 +epoch: 2, batch: 36023, sum loss: 3805.205566, avg loss: 2.131768, ppl: 8.429757 +epoch: 2, batch: 36024, sum loss: 4314.545410, avg loss: 2.549968, ppl: 12.806691 +epoch: 2, batch: 36025, sum loss: 4808.338867, avg loss: 2.782604, ppl: 16.161041 +epoch: 2, batch: 36026, sum loss: 4089.378906, avg loss: 2.373406, ppl: 10.733891 +epoch: 2, batch: 36027, sum loss: 4365.976562, avg loss: 2.606553, ppl: 13.552259 +epoch: 2, batch: 36028, sum loss: 3760.705078, avg loss: 2.434114, ppl: 11.405704 +epoch: 2, batch: 36029, sum loss: 4299.549316, avg loss: 2.168204, ppl: 8.742571 +epoch: 2, batch: 36030, sum loss: 3458.542969, avg loss: 2.084715, ppl: 8.042302 +epoch: 2, batch: 36031, sum loss: 4654.409668, avg loss: 2.601682, ppl: 13.486405 +epoch: 2, batch: 36032, sum loss: 3693.739258, avg loss: 2.428494, ppl: 11.341788 +epoch: 2, batch: 36033, sum loss: 3866.760254, avg loss: 2.388363, ppl: 10.895647 +epoch: 2, batch: 36034, sum loss: 4723.489258, avg loss: 2.634406, ppl: 13.935027 +epoch: 2, batch: 36035, sum loss: 4423.136230, avg loss: 2.708595, ppl: 15.008179 +epoch: 2, batch: 36036, sum loss: 4701.404297, avg loss: 2.639755, ppl: 14.009778 +epoch: 2, batch: 36037, sum loss: 4171.870117, avg loss: 2.501121, ppl: 12.196161 +epoch: 2, batch: 36038, sum loss: 4320.883301, avg loss: 2.467666, ppl: 11.794888 +epoch: 2, batch: 36039, sum loss: 4767.041016, avg loss: 2.627917, ppl: 13.844898 +epoch: 2, batch: 36040, sum loss: 4155.105957, avg loss: 2.580811, ppl: 13.207846 +epoch: 2, batch: 36041, sum loss: 4783.100586, avg loss: 2.520074, ppl: 12.429518 +epoch: 2, batch: 36042, sum loss: 3972.013184, avg loss: 2.476318, ppl: 11.897373 +epoch: 2, batch: 36043, sum loss: 4468.426270, avg loss: 2.315247, ppl: 10.127422 +epoch: 2, batch: 36044, sum loss: 5304.112793, avg loss: 2.692443, ppl: 14.767712 +epoch: 2, batch: 36045, sum loss: 4368.592285, avg loss: 2.642827, ppl: 14.052869 +epoch: 2, batch: 36046, sum loss: 4220.903809, avg loss: 2.536601, ppl: 12.636643 +epoch: 2, batch: 36047, sum loss: 3807.339600, avg loss: 2.577752, ppl: 13.167506 +epoch: 2, batch: 36048, sum loss: 4841.497559, avg loss: 2.550842, ppl: 12.817889 +epoch: 2, batch: 36049, sum loss: 4667.887695, avg loss: 2.490869, ppl: 12.071756 +epoch: 2, batch: 36050, sum loss: 3541.365479, avg loss: 2.216124, ppl: 9.171708 +epoch: 2, batch: 36051, sum loss: 4793.019531, avg loss: 2.626312, ppl: 13.822698 +epoch: 2, batch: 36052, sum loss: 4971.249023, avg loss: 2.548052, ppl: 12.782178 +epoch: 2, batch: 36053, sum loss: 3877.732910, avg loss: 2.254496, ppl: 9.530487 +epoch: 2, batch: 36054, sum loss: 3686.569336, avg loss: 2.327380, ppl: 10.251045 +epoch: 2, batch: 36055, sum loss: 4008.416504, avg loss: 2.314328, ppl: 10.118123 +epoch: 2, batch: 36056, sum loss: 3138.964355, avg loss: 2.227796, ppl: 9.279390 +epoch: 2, batch: 36057, sum loss: 3666.449707, avg loss: 2.391683, ppl: 10.931875 +epoch: 2, batch: 36058, sum loss: 4232.519043, avg loss: 2.607837, ppl: 13.569664 +epoch: 2, batch: 36059, sum loss: 3988.354492, avg loss: 2.486505, ppl: 12.019198 +epoch: 2, batch: 36060, sum loss: 4231.891602, avg loss: 2.473344, ppl: 11.862048 +epoch: 2, batch: 36061, sum loss: 5052.473633, avg loss: 2.556920, ppl: 12.896034 +epoch: 2, batch: 36062, sum loss: 4483.637207, avg loss: 2.645214, ppl: 14.086453 +epoch: 2, batch: 36063, sum loss: 3811.625732, avg loss: 2.294778, ppl: 9.922230 +epoch: 2, batch: 36064, sum loss: 3634.627686, avg loss: 2.278764, ppl: 9.764600 +epoch: 2, batch: 36065, sum loss: 4952.762207, avg loss: 2.493838, ppl: 12.107657 +epoch: 2, batch: 36066, sum loss: 4329.837891, avg loss: 2.375117, ppl: 10.752269 +epoch: 2, batch: 36067, sum loss: 4226.208008, avg loss: 2.724828, ppl: 15.253786 +epoch: 2, batch: 36068, sum loss: 4757.487305, avg loss: 2.466297, ppl: 11.778751 +epoch: 2, batch: 36069, sum loss: 4559.487793, avg loss: 2.620395, ppl: 13.741153 +epoch: 2, batch: 36070, sum loss: 4136.755371, avg loss: 2.516274, ppl: 12.382380 +epoch: 2, batch: 36071, sum loss: 4697.455078, avg loss: 2.834915, ppl: 17.028959 +epoch: 2, batch: 36072, sum loss: 4663.087891, avg loss: 2.631540, ppl: 13.895146 +epoch: 2, batch: 36073, sum loss: 3759.708252, avg loss: 2.540344, ppl: 12.684028 +epoch: 2, batch: 36074, sum loss: 4370.255859, avg loss: 2.279737, ppl: 9.774112 +epoch: 2, batch: 36075, sum loss: 4561.537109, avg loss: 2.425060, ppl: 11.302902 +epoch: 2, batch: 36076, sum loss: 4168.349121, avg loss: 2.608479, ppl: 13.578380 +epoch: 2, batch: 36077, sum loss: 4757.871094, avg loss: 2.611345, ppl: 13.617358 +epoch: 2, batch: 36078, sum loss: 4916.658203, avg loss: 2.602784, ppl: 13.501269 +epoch: 2, batch: 36079, sum loss: 3710.341797, avg loss: 2.455554, ppl: 11.652884 +epoch: 2, batch: 36080, sum loss: 4410.407227, avg loss: 2.504490, ppl: 12.237318 +epoch: 2, batch: 36081, sum loss: 4396.633789, avg loss: 2.705621, ppl: 14.963603 +epoch: 2, batch: 36082, sum loss: 4239.757812, avg loss: 2.707380, ppl: 14.989955 +epoch: 2, batch: 36083, sum loss: 3953.863037, avg loss: 2.434645, ppl: 11.411763 +epoch: 2, batch: 36084, sum loss: 4681.416992, avg loss: 2.443328, ppl: 11.511291 +epoch: 2, batch: 36085, sum loss: 4950.265137, avg loss: 2.728922, ppl: 15.316373 +epoch: 2, batch: 36086, sum loss: 4058.585693, avg loss: 2.465727, ppl: 11.772033 +epoch: 2, batch: 36087, sum loss: 3822.708252, avg loss: 2.330920, ppl: 10.287399 +epoch: 2, batch: 36088, sum loss: 3623.674072, avg loss: 2.300745, ppl: 9.981619 +epoch: 2, batch: 36089, sum loss: 3225.255127, avg loss: 2.150170, ppl: 8.586319 +epoch: 2, batch: 36090, sum loss: 3382.022949, avg loss: 2.174934, ppl: 8.801607 +epoch: 2, batch: 36091, sum loss: 4678.757812, avg loss: 2.373799, ppl: 10.738108 +epoch: 2, batch: 36092, sum loss: 3616.406250, avg loss: 2.492354, ppl: 12.089706 +epoch: 2, batch: 36093, sum loss: 3036.913574, avg loss: 2.259608, ppl: 9.579336 +epoch: 2, batch: 36094, sum loss: 3499.425293, avg loss: 2.337625, ppl: 10.356613 +epoch: 2, batch: 36095, sum loss: 4050.258301, avg loss: 2.374125, ppl: 10.741615 +epoch: 2, batch: 36096, sum loss: 4087.769775, avg loss: 2.562865, ppl: 12.972932 +epoch: 2, batch: 36097, sum loss: 4686.906738, avg loss: 2.661503, ppl: 14.317790 +epoch: 2, batch: 36098, sum loss: 4371.848145, avg loss: 2.463013, ppl: 11.740130 +epoch: 2, batch: 36099, sum loss: 4864.139160, avg loss: 2.866317, ppl: 17.572172 +epoch: 2, batch: 36100, sum loss: 4820.411133, avg loss: 2.538394, ppl: 12.659329 +epoch: 2, batch: 36101, sum loss: 4639.611816, avg loss: 2.714811, ppl: 15.101753 +epoch: 2, batch: 36102, sum loss: 4086.377686, avg loss: 2.284169, ppl: 9.817522 +epoch: 2, batch: 36103, sum loss: 4732.814941, avg loss: 2.373528, ppl: 10.735199 +epoch: 2, batch: 36104, sum loss: 5895.594238, avg loss: 2.718116, ppl: 15.151753 +epoch: 2, batch: 36105, sum loss: 4341.716309, avg loss: 2.376418, ppl: 10.766273 +epoch: 2, batch: 36106, sum loss: 4051.348145, avg loss: 2.410082, ppl: 11.134875 +epoch: 2, batch: 36107, sum loss: 3808.295410, avg loss: 2.260116, ppl: 9.584200 +epoch: 2, batch: 36108, sum loss: 4380.971191, avg loss: 2.419090, ppl: 11.235628 +epoch: 2, batch: 36109, sum loss: 4144.476562, avg loss: 2.442237, ppl: 11.498739 +epoch: 2, batch: 36110, sum loss: 3769.855225, avg loss: 2.342980, ppl: 10.412220 +epoch: 2, batch: 36111, sum loss: 5034.059570, avg loss: 2.500775, ppl: 12.191937 +epoch: 2, batch: 36112, sum loss: 3945.763672, avg loss: 2.414788, ppl: 11.187399 +epoch: 2, batch: 36113, sum loss: 4351.769043, avg loss: 2.492422, ppl: 12.090528 +epoch: 2, batch: 36114, sum loss: 4502.443848, avg loss: 2.445651, ppl: 11.538062 +epoch: 2, batch: 36115, sum loss: 4905.967773, avg loss: 2.556523, ppl: 12.890919 +epoch: 2, batch: 36116, sum loss: 4035.840820, avg loss: 2.415225, ppl: 11.192288 +epoch: 2, batch: 36117, sum loss: 3624.402100, avg loss: 2.327811, ppl: 10.255470 +epoch: 2, batch: 36118, sum loss: 3555.613281, avg loss: 2.350042, ppl: 10.486009 +epoch: 2, batch: 36119, sum loss: 4519.709961, avg loss: 2.660218, ppl: 14.299406 +epoch: 2, batch: 36120, sum loss: 4187.459473, avg loss: 2.323784, ppl: 10.214255 +epoch: 2, batch: 36121, sum loss: 5260.020996, avg loss: 2.562114, ppl: 12.963199 +epoch: 2, batch: 36122, sum loss: 4111.919434, avg loss: 2.664886, ppl: 14.366315 +epoch: 2, batch: 36123, sum loss: 3706.546875, avg loss: 2.369915, ppl: 10.696481 +epoch: 2, batch: 36124, sum loss: 3443.109375, avg loss: 2.047033, ppl: 7.744886 +epoch: 2, batch: 36125, sum loss: 4321.825195, avg loss: 2.518546, ppl: 12.410540 +epoch: 2, batch: 36126, sum loss: 4594.499023, avg loss: 2.563895, ppl: 12.986298 +epoch: 2, batch: 36127, sum loss: 4321.961914, avg loss: 2.475351, ppl: 11.885874 +epoch: 2, batch: 36128, sum loss: 4961.910645, avg loss: 2.482197, ppl: 11.967523 +epoch: 2, batch: 36129, sum loss: 4020.149170, avg loss: 2.266149, ppl: 9.642200 +epoch: 2, batch: 36130, sum loss: 4362.953613, avg loss: 2.442863, ppl: 11.505938 +epoch: 2, batch: 36131, sum loss: 3152.099609, avg loss: 2.091639, ppl: 8.098176 +epoch: 2, batch: 36132, sum loss: 4320.341797, avg loss: 2.778355, ppl: 16.092529 +epoch: 2, batch: 36133, sum loss: 3402.307373, avg loss: 2.259168, ppl: 9.575123 +epoch: 2, batch: 36134, sum loss: 3994.421631, avg loss: 2.444566, ppl: 11.525552 +epoch: 2, batch: 36135, sum loss: 4609.220215, avg loss: 2.582196, ppl: 13.226154 +epoch: 2, batch: 36136, sum loss: 3587.937500, avg loss: 2.517851, ppl: 12.401915 +epoch: 2, batch: 36137, sum loss: 4387.174805, avg loss: 2.534474, ppl: 12.609798 +epoch: 2, batch: 36138, sum loss: 3252.738770, avg loss: 2.167048, ppl: 8.732468 +epoch: 2, batch: 36139, sum loss: 3974.114502, avg loss: 2.477628, ppl: 11.912968 +epoch: 2, batch: 36140, sum loss: 4645.213867, avg loss: 2.484072, ppl: 11.989985 +epoch: 2, batch: 36141, sum loss: 3882.723145, avg loss: 2.446581, ppl: 11.548790 +epoch: 2, batch: 36142, sum loss: 4370.750488, avg loss: 2.455478, ppl: 11.651999 +epoch: 2, batch: 36143, sum loss: 3669.451416, avg loss: 2.422080, ppl: 11.269279 +epoch: 2, batch: 36144, sum loss: 4627.367188, avg loss: 2.471884, ppl: 11.844745 +epoch: 2, batch: 36145, sum loss: 4102.077637, avg loss: 2.410151, ppl: 11.135645 +epoch: 2, batch: 36146, sum loss: 4900.886230, avg loss: 2.948788, ppl: 19.082819 +epoch: 2, batch: 36147, sum loss: 3821.494629, avg loss: 2.345914, ppl: 10.442817 +epoch: 2, batch: 36148, sum loss: 5349.860352, avg loss: 2.763357, ppl: 15.852979 +epoch: 2, batch: 36149, sum loss: 3477.631104, avg loss: 2.400022, ppl: 11.023414 +epoch: 2, batch: 36150, sum loss: 4083.939453, avg loss: 2.550868, ppl: 12.818226 +epoch: 2, batch: 36151, sum loss: 4690.745117, avg loss: 2.466217, ppl: 11.777810 +epoch: 2, batch: 36152, sum loss: 3659.527100, avg loss: 2.385611, ppl: 10.865698 +epoch: 2, batch: 36153, sum loss: 3679.486328, avg loss: 2.283977, ppl: 9.815638 +epoch: 2, batch: 36154, sum loss: 5014.851562, avg loss: 2.622830, ppl: 13.774652 +epoch: 2, batch: 36155, sum loss: 4205.412109, avg loss: 2.506205, ppl: 12.258323 +epoch: 2, batch: 36156, sum loss: 3795.357910, avg loss: 2.300217, ppl: 9.976346 +epoch: 2, batch: 36157, sum loss: 4609.828125, avg loss: 2.687946, ppl: 14.701453 +epoch: 2, batch: 36158, sum loss: 4368.799316, avg loss: 2.369197, ppl: 10.688807 +epoch: 2, batch: 36159, sum loss: 4545.265137, avg loss: 2.807452, ppl: 16.567654 +epoch: 2, batch: 36160, sum loss: 4233.502930, avg loss: 2.536551, ppl: 12.636011 +epoch: 2, batch: 36161, sum loss: 3879.117432, avg loss: 2.422934, ppl: 11.278904 +epoch: 2, batch: 36162, sum loss: 4524.934570, avg loss: 2.544958, ppl: 12.742688 +epoch: 2, batch: 36163, sum loss: 4613.046387, avg loss: 2.429198, ppl: 11.349773 +epoch: 2, batch: 36164, sum loss: 3647.782471, avg loss: 2.351891, ppl: 10.505412 +epoch: 2, batch: 36165, sum loss: 3653.104492, avg loss: 2.252222, ppl: 9.508843 +epoch: 2, batch: 36166, sum loss: 4885.496582, avg loss: 2.619569, ppl: 13.729809 +epoch: 2, batch: 36167, sum loss: 4949.025391, avg loss: 2.716260, ppl: 15.123653 +epoch: 2, batch: 36168, sum loss: 4117.626953, avg loss: 2.297783, ppl: 9.952093 +epoch: 2, batch: 36169, sum loss: 4696.633789, avg loss: 2.717959, ppl: 15.149377 +epoch: 2, batch: 36170, sum loss: 4562.333008, avg loss: 2.631103, ppl: 13.889082 +epoch: 2, batch: 36171, sum loss: 4776.228516, avg loss: 2.460705, ppl: 11.713067 +epoch: 2, batch: 36172, sum loss: 4978.791016, avg loss: 2.608063, ppl: 13.572728 +epoch: 2, batch: 36173, sum loss: 4106.284180, avg loss: 2.398530, ppl: 11.006989 +epoch: 2, batch: 36174, sum loss: 4910.965332, avg loss: 2.673362, ppl: 14.488595 +epoch: 2, batch: 36175, sum loss: 4062.915771, avg loss: 2.363535, ppl: 10.628453 +epoch: 2, batch: 36176, sum loss: 5494.311523, avg loss: 2.667141, ppl: 14.398750 +epoch: 2, batch: 36177, sum loss: 3879.848389, avg loss: 2.377358, ppl: 10.776396 +epoch: 2, batch: 36178, sum loss: 4475.401855, avg loss: 2.388155, ppl: 10.893372 +epoch: 2, batch: 36179, sum loss: 3652.425049, avg loss: 2.329353, ppl: 10.271290 +epoch: 2, batch: 36180, sum loss: 5713.283203, avg loss: 2.772093, ppl: 15.992067 +epoch: 2, batch: 36181, sum loss: 4689.452148, avg loss: 2.506388, ppl: 12.260567 +epoch: 2, batch: 36182, sum loss: 3583.672607, avg loss: 2.241196, ppl: 9.404573 +epoch: 2, batch: 36183, sum loss: 4364.049805, avg loss: 2.425820, ppl: 11.311497 +epoch: 2, batch: 36184, sum loss: 3483.242188, avg loss: 2.193478, ppl: 8.966341 +epoch: 2, batch: 36185, sum loss: 3660.082764, avg loss: 2.496646, ppl: 12.141702 +epoch: 2, batch: 36186, sum loss: 4463.781738, avg loss: 2.360540, ppl: 10.596676 +epoch: 2, batch: 36187, sum loss: 5112.652832, avg loss: 2.766587, ppl: 15.904261 +epoch: 2, batch: 36188, sum loss: 3797.035889, avg loss: 2.265535, ppl: 9.636275 +epoch: 2, batch: 36189, sum loss: 4802.541992, avg loss: 2.576471, ppl: 13.150649 +epoch: 2, batch: 36190, sum loss: 3499.500977, avg loss: 2.411786, ppl: 11.153860 +epoch: 2, batch: 36191, sum loss: 4087.190918, avg loss: 2.472590, ppl: 11.853104 +epoch: 2, batch: 36192, sum loss: 4348.679688, avg loss: 2.473652, ppl: 11.865697 +epoch: 2, batch: 36193, sum loss: 3770.731934, avg loss: 2.334819, ppl: 10.327586 +epoch: 2, batch: 36194, sum loss: 3898.827148, avg loss: 2.502456, ppl: 12.212456 +epoch: 2, batch: 36195, sum loss: 4345.921387, avg loss: 2.462278, ppl: 11.731510 +epoch: 2, batch: 36196, sum loss: 4180.447266, avg loss: 2.629212, ppl: 13.862844 +epoch: 2, batch: 36197, sum loss: 4171.440918, avg loss: 2.299582, ppl: 9.970010 +epoch: 2, batch: 36198, sum loss: 4510.926758, avg loss: 2.501901, ppl: 12.205671 +epoch: 2, batch: 36199, sum loss: 3956.612305, avg loss: 2.528187, ppl: 12.530765 +epoch: 2, batch: 36200, sum loss: 3508.722168, avg loss: 2.395032, ppl: 10.968551 +epoch: 2, batch: 36201, sum loss: 3281.327637, avg loss: 2.190472, ppl: 8.939435 +epoch: 2, batch: 36202, sum loss: 4369.787109, avg loss: 2.460466, ppl: 11.710263 +epoch: 2, batch: 36203, sum loss: 4475.942383, avg loss: 2.678601, ppl: 14.564707 +epoch: 2, batch: 36204, sum loss: 4208.762695, avg loss: 2.421613, ppl: 11.264008 +epoch: 2, batch: 36205, sum loss: 3591.465820, avg loss: 2.361253, ppl: 10.604231 +epoch: 2, batch: 36206, sum loss: 4281.864258, avg loss: 2.514307, ppl: 12.358036 +epoch: 2, batch: 36207, sum loss: 4056.263184, avg loss: 2.427447, ppl: 11.329915 +epoch: 2, batch: 36208, sum loss: 4167.331055, avg loss: 2.371845, ppl: 10.717142 +epoch: 2, batch: 36209, sum loss: 3881.967285, avg loss: 2.430787, ppl: 11.367828 +epoch: 2, batch: 36210, sum loss: 4342.872070, avg loss: 2.589667, ppl: 13.325337 +epoch: 2, batch: 36211, sum loss: 3948.030762, avg loss: 2.365507, ppl: 10.649435 +epoch: 2, batch: 36212, sum loss: 4452.382324, avg loss: 2.393754, ppl: 10.954540 +epoch: 2, batch: 36213, sum loss: 4481.939453, avg loss: 2.610332, ppl: 13.603563 +epoch: 2, batch: 36214, sum loss: 4204.838867, avg loss: 2.453232, ppl: 11.625855 +epoch: 2, batch: 36215, sum loss: 3909.028564, avg loss: 2.470941, ppl: 11.833575 +epoch: 2, batch: 36216, sum loss: 4361.466797, avg loss: 2.262172, ppl: 9.603924 +epoch: 2, batch: 36217, sum loss: 4556.997070, avg loss: 2.655593, ppl: 14.233419 +epoch: 2, batch: 36218, sum loss: 5131.890625, avg loss: 2.818172, ppl: 16.746206 +epoch: 2, batch: 36219, sum loss: 3987.932861, avg loss: 2.566237, ppl: 13.016756 +epoch: 2, batch: 36220, sum loss: 4420.951172, avg loss: 2.443865, ppl: 11.517467 +epoch: 2, batch: 36221, sum loss: 3883.899658, avg loss: 2.287338, ppl: 9.848685 +epoch: 2, batch: 36222, sum loss: 2880.623291, avg loss: 2.045897, ppl: 7.736096 +epoch: 2, batch: 36223, sum loss: 4533.746094, avg loss: 2.593676, ppl: 13.378866 +epoch: 2, batch: 36224, sum loss: 4356.548340, avg loss: 2.486614, ppl: 12.020512 +epoch: 2, batch: 36225, sum loss: 3921.279541, avg loss: 2.372220, ppl: 10.721165 +epoch: 2, batch: 36226, sum loss: 4562.329102, avg loss: 2.706008, ppl: 14.969394 +epoch: 2, batch: 36227, sum loss: 3639.531738, avg loss: 2.324094, ppl: 10.217422 +epoch: 2, batch: 36228, sum loss: 4677.542969, avg loss: 2.520228, ppl: 12.431430 +epoch: 2, batch: 36229, sum loss: 5182.635742, avg loss: 2.703514, ppl: 14.932109 +epoch: 2, batch: 36230, sum loss: 3516.102783, avg loss: 2.387035, ppl: 10.881187 +epoch: 2, batch: 36231, sum loss: 4469.948242, avg loss: 2.523968, ppl: 12.478017 +epoch: 2, batch: 36232, sum loss: 3350.925537, avg loss: 2.335140, ppl: 10.330904 +epoch: 2, batch: 36233, sum loss: 4891.555664, avg loss: 2.549013, ppl: 12.794465 +epoch: 2, batch: 36234, sum loss: 4471.609375, avg loss: 2.628812, ppl: 13.857299 +epoch: 2, batch: 36235, sum loss: 4919.365234, avg loss: 2.555514, ppl: 12.877921 +epoch: 2, batch: 36236, sum loss: 3835.596680, avg loss: 2.422992, ppl: 11.279557 +epoch: 2, batch: 36237, sum loss: 4662.974609, avg loss: 2.488247, ppl: 12.040148 +epoch: 2, batch: 36238, sum loss: 3223.292480, avg loss: 2.169107, ppl: 8.750463 +epoch: 2, batch: 36239, sum loss: 4182.612793, avg loss: 2.575500, ppl: 13.137891 +epoch: 2, batch: 36240, sum loss: 4702.068848, avg loss: 2.600702, ppl: 13.473190 +epoch: 2, batch: 36241, sum loss: 3803.103271, avg loss: 2.273224, ppl: 9.710656 +epoch: 2, batch: 36242, sum loss: 4340.639648, avg loss: 2.480366, ppl: 11.945630 +epoch: 2, batch: 36243, sum loss: 5187.824219, avg loss: 2.686600, ppl: 14.681669 +epoch: 2, batch: 36244, sum loss: 3991.795410, avg loss: 2.572033, ppl: 13.092417 +epoch: 2, batch: 36245, sum loss: 4614.203125, avg loss: 2.566298, ppl: 13.017538 +epoch: 2, batch: 36246, sum loss: 3707.555420, avg loss: 2.238862, ppl: 9.382648 +epoch: 2, batch: 36247, sum loss: 4316.718262, avg loss: 2.586410, ppl: 13.282004 +epoch: 2, batch: 36248, sum loss: 4261.506836, avg loss: 2.418562, ppl: 11.229704 +epoch: 2, batch: 36249, sum loss: 3424.219238, avg loss: 2.243918, ppl: 9.430210 +epoch: 2, batch: 36250, sum loss: 4117.684570, avg loss: 2.470117, ppl: 11.823826 +epoch: 2, batch: 36251, sum loss: 3628.526855, avg loss: 2.320030, ppl: 10.175980 +epoch: 2, batch: 36252, sum loss: 3756.180176, avg loss: 2.344682, ppl: 10.429953 +epoch: 2, batch: 36253, sum loss: 3330.270996, avg loss: 2.229097, ppl: 9.291473 +epoch: 2, batch: 36254, sum loss: 3386.529297, avg loss: 2.466518, ppl: 11.781352 +epoch: 2, batch: 36255, sum loss: 3959.490234, avg loss: 2.345670, ppl: 10.440263 +epoch: 2, batch: 36256, sum loss: 4027.789062, avg loss: 2.605297, ppl: 13.535243 +epoch: 2, batch: 36257, sum loss: 3149.056641, avg loss: 2.337830, ppl: 10.358732 +epoch: 2, batch: 36258, sum loss: 4024.733154, avg loss: 2.233481, ppl: 9.332299 +epoch: 2, batch: 36259, sum loss: 4401.396484, avg loss: 2.482457, ppl: 11.970642 +epoch: 2, batch: 36260, sum loss: 4427.647461, avg loss: 2.571224, ppl: 13.081823 +epoch: 2, batch: 36261, sum loss: 4116.360352, avg loss: 2.208348, ppl: 9.100668 +epoch: 2, batch: 36262, sum loss: 5100.406738, avg loss: 2.828845, ppl: 16.925892 +epoch: 2, batch: 36263, sum loss: 2778.857666, avg loss: 2.055368, ppl: 7.809713 +epoch: 2, batch: 36264, sum loss: 4576.937988, avg loss: 2.486115, ppl: 12.014511 +epoch: 2, batch: 36265, sum loss: 3848.423828, avg loss: 2.258465, ppl: 9.568389 +epoch: 2, batch: 36266, sum loss: 3560.218262, avg loss: 2.407179, ppl: 11.102600 +epoch: 2, batch: 36267, sum loss: 4768.191406, avg loss: 2.390071, ppl: 10.914268 +epoch: 2, batch: 36268, sum loss: 4872.162109, avg loss: 2.584701, ppl: 13.259328 +epoch: 2, batch: 36269, sum loss: 3798.163330, avg loss: 2.254103, ppl: 9.526744 +epoch: 2, batch: 36270, sum loss: 4494.193848, avg loss: 2.562254, ppl: 12.965010 +epoch: 2, batch: 36271, sum loss: 3685.769775, avg loss: 2.380988, ppl: 10.815587 +epoch: 2, batch: 36272, sum loss: 3800.951904, avg loss: 2.278748, ppl: 9.764450 +epoch: 2, batch: 36273, sum loss: 5451.199219, avg loss: 2.648785, ppl: 14.136850 +epoch: 2, batch: 36274, sum loss: 4394.554688, avg loss: 2.522707, ppl: 12.462280 +epoch: 2, batch: 36275, sum loss: 4118.340820, avg loss: 2.423980, ppl: 11.290702 +epoch: 2, batch: 36276, sum loss: 4410.589355, avg loss: 2.834569, ppl: 17.023062 +epoch: 2, batch: 36277, sum loss: 3881.252441, avg loss: 2.492776, ppl: 12.094807 +epoch: 2, batch: 36278, sum loss: 3200.448975, avg loss: 2.210255, ppl: 9.118040 +epoch: 2, batch: 36279, sum loss: 4187.723145, avg loss: 2.398467, ppl: 11.006291 +epoch: 2, batch: 36280, sum loss: 4312.241699, avg loss: 2.553133, ppl: 12.847292 +epoch: 2, batch: 36281, sum loss: 4410.226562, avg loss: 2.559621, ppl: 12.930914 +epoch: 2, batch: 36282, sum loss: 4806.350098, avg loss: 2.670195, ppl: 14.442780 +epoch: 2, batch: 36283, sum loss: 4047.995361, avg loss: 2.435617, ppl: 11.422864 +epoch: 2, batch: 36284, sum loss: 3678.256104, avg loss: 2.288896, ppl: 9.864045 +epoch: 2, batch: 36285, sum loss: 3949.465820, avg loss: 2.409680, ppl: 11.130401 +epoch: 2, batch: 36286, sum loss: 4234.870117, avg loss: 2.270708, ppl: 9.686255 +epoch: 2, batch: 36287, sum loss: 3825.153809, avg loss: 2.452022, ppl: 11.611800 +epoch: 2, batch: 36288, sum loss: 3875.150146, avg loss: 2.368674, ppl: 10.683215 +epoch: 2, batch: 36289, sum loss: 4534.649414, avg loss: 2.658059, ppl: 14.268572 +epoch: 2, batch: 36290, sum loss: 4212.904297, avg loss: 2.384213, ppl: 10.850519 +epoch: 2, batch: 36291, sum loss: 4351.542969, avg loss: 2.682825, ppl: 14.626359 +epoch: 2, batch: 36292, sum loss: 3613.371094, avg loss: 2.316264, ppl: 10.137726 +epoch: 2, batch: 36293, sum loss: 4599.215820, avg loss: 2.466068, ppl: 11.776047 +epoch: 2, batch: 36294, sum loss: 4339.421875, avg loss: 2.551100, ppl: 12.821206 +epoch: 2, batch: 36295, sum loss: 4004.749512, avg loss: 2.352967, ppl: 10.516725 +epoch: 2, batch: 36296, sum loss: 3641.333496, avg loss: 2.437305, ppl: 11.442163 +epoch: 2, batch: 36297, sum loss: 3910.005127, avg loss: 2.552223, ppl: 12.835602 +epoch: 2, batch: 36298, sum loss: 3984.635498, avg loss: 2.471858, ppl: 11.844437 +epoch: 2, batch: 36299, sum loss: 3899.047852, avg loss: 2.553404, ppl: 12.850775 +epoch: 2, batch: 36300, sum loss: 3900.992188, avg loss: 2.634026, ppl: 13.929736 +epoch: 2, batch: 36301, sum loss: 4536.673828, avg loss: 2.476350, ppl: 11.897762 +epoch: 2, batch: 36302, sum loss: 4242.607422, avg loss: 2.607626, ppl: 13.566805 +epoch: 2, batch: 36303, sum loss: 3473.677246, avg loss: 2.286819, ppl: 9.843571 +epoch: 2, batch: 36304, sum loss: 4595.750488, avg loss: 2.462889, ppl: 11.738672 +epoch: 2, batch: 36305, sum loss: 4142.726562, avg loss: 2.495619, ppl: 12.129234 +epoch: 2, batch: 36306, sum loss: 3979.165527, avg loss: 2.468465, ppl: 11.804312 +epoch: 2, batch: 36307, sum loss: 5541.485840, avg loss: 2.893726, ppl: 18.060484 +epoch: 2, batch: 36308, sum loss: 4060.691895, avg loss: 2.441787, ppl: 11.493562 +epoch: 2, batch: 36309, sum loss: 5045.717773, avg loss: 2.736290, ppl: 15.429627 +epoch: 2, batch: 36310, sum loss: 4619.402832, avg loss: 2.413481, ppl: 11.172786 +epoch: 2, batch: 36311, sum loss: 3977.264404, avg loss: 2.345085, ppl: 10.434161 +epoch: 2, batch: 36312, sum loss: 5685.901855, avg loss: 2.805082, ppl: 16.528437 +epoch: 2, batch: 36313, sum loss: 3161.876221, avg loss: 2.063888, ppl: 7.876533 +epoch: 2, batch: 36314, sum loss: 5092.153320, avg loss: 2.692836, ppl: 14.773519 +epoch: 2, batch: 36315, sum loss: 5131.717773, avg loss: 2.747172, ppl: 15.598459 +epoch: 2, batch: 36316, sum loss: 3871.224121, avg loss: 2.572242, ppl: 13.095151 +epoch: 2, batch: 36317, sum loss: 4327.373535, avg loss: 2.339121, ppl: 10.372114 +epoch: 2, batch: 36318, sum loss: 4108.470703, avg loss: 2.545521, ppl: 12.749869 +epoch: 2, batch: 36319, sum loss: 3465.483643, avg loss: 2.304178, ppl: 10.015942 +epoch: 2, batch: 36320, sum loss: 3965.973389, avg loss: 2.316573, ppl: 10.140863 +epoch: 2, batch: 36321, sum loss: 4288.086426, avg loss: 2.335559, ppl: 10.335235 +epoch: 2, batch: 36322, sum loss: 5000.531738, avg loss: 2.647185, ppl: 14.114245 +epoch: 2, batch: 36323, sum loss: 4816.469727, avg loss: 2.653702, ppl: 14.206541 +epoch: 2, batch: 36324, sum loss: 3472.989258, avg loss: 2.195315, ppl: 8.982834 +epoch: 2, batch: 36325, sum loss: 4146.010742, avg loss: 2.503630, ppl: 12.226793 +epoch: 2, batch: 36326, sum loss: 3851.864746, avg loss: 2.544164, ppl: 12.732585 +epoch: 2, batch: 36327, sum loss: 3815.504395, avg loss: 2.265739, ppl: 9.638245 +epoch: 2, batch: 36328, sum loss: 4032.456543, avg loss: 2.301630, ppl: 9.990458 +epoch: 2, batch: 36329, sum loss: 3244.748779, avg loss: 2.199830, ppl: 9.023476 +epoch: 2, batch: 36330, sum loss: 3755.116211, avg loss: 2.291102, ppl: 9.885825 +epoch: 2, batch: 36331, sum loss: 4745.770020, avg loss: 2.704142, ppl: 14.941496 +epoch: 2, batch: 36332, sum loss: 3700.831543, avg loss: 2.372328, ppl: 10.722325 +epoch: 2, batch: 36333, sum loss: 4983.732422, avg loss: 2.574242, ppl: 13.121366 +epoch: 2, batch: 36334, sum loss: 4503.098145, avg loss: 2.615040, ppl: 13.667757 +epoch: 2, batch: 36335, sum loss: 4198.464355, avg loss: 2.612610, ppl: 13.634592 +epoch: 2, batch: 36336, sum loss: 5151.851562, avg loss: 2.780276, ppl: 16.123472 +epoch: 2, batch: 36337, sum loss: 5195.166504, avg loss: 2.509742, ppl: 12.301759 +epoch: 2, batch: 36338, sum loss: 4208.462891, avg loss: 2.488742, ppl: 12.046114 +epoch: 2, batch: 36339, sum loss: 3648.939941, avg loss: 2.259406, ppl: 9.577394 +epoch: 2, batch: 36340, sum loss: 4667.048828, avg loss: 2.598580, ppl: 13.444629 +epoch: 2, batch: 36341, sum loss: 5520.917969, avg loss: 2.760459, ppl: 15.807099 +epoch: 2, batch: 36342, sum loss: 4818.292480, avg loss: 2.518710, ppl: 12.412576 +epoch: 2, batch: 36343, sum loss: 3811.194336, avg loss: 2.457250, ppl: 11.672664 +epoch: 2, batch: 36344, sum loss: 3708.471680, avg loss: 2.173782, ppl: 8.791468 +epoch: 2, batch: 36345, sum loss: 3973.227051, avg loss: 2.384890, ppl: 10.857869 +epoch: 2, batch: 36346, sum loss: 4189.903809, avg loss: 2.570493, ppl: 13.072270 +epoch: 2, batch: 36347, sum loss: 3701.137939, avg loss: 2.230945, ppl: 9.308660 +epoch: 2, batch: 36348, sum loss: 4199.855957, avg loss: 2.516391, ppl: 12.383821 +epoch: 2, batch: 36349, sum loss: 5380.550781, avg loss: 2.994185, ppl: 19.969082 +epoch: 2, batch: 36350, sum loss: 4005.026367, avg loss: 2.443579, ppl: 11.514178 +epoch: 2, batch: 36351, sum loss: 4320.208008, avg loss: 2.535333, ppl: 12.620638 +epoch: 2, batch: 36352, sum loss: 4880.653809, avg loss: 2.636766, ppl: 13.967957 +epoch: 2, batch: 36353, sum loss: 4494.377441, avg loss: 2.540632, ppl: 12.687684 +epoch: 2, batch: 36354, sum loss: 3938.524170, avg loss: 2.518238, ppl: 12.406714 +epoch: 2, batch: 36355, sum loss: 4992.829102, avg loss: 2.565688, ppl: 13.009607 +epoch: 2, batch: 36356, sum loss: 3958.487305, avg loss: 2.667444, ppl: 14.403111 +epoch: 2, batch: 36357, sum loss: 4792.812988, avg loss: 2.729393, ppl: 15.323576 +epoch: 2, batch: 36358, sum loss: 4249.009766, avg loss: 2.626088, ppl: 13.819597 +epoch: 2, batch: 36359, sum loss: 4450.797363, avg loss: 2.483704, ppl: 11.985578 +epoch: 2, batch: 36360, sum loss: 3841.230225, avg loss: 2.432698, ppl: 11.389570 +epoch: 2, batch: 36361, sum loss: 4266.787598, avg loss: 2.547336, ppl: 12.773032 +epoch: 2, batch: 36362, sum loss: 4708.891113, avg loss: 2.567552, ppl: 13.033883 +epoch: 2, batch: 36363, sum loss: 4104.137207, avg loss: 2.555503, ppl: 12.877771 +epoch: 2, batch: 36364, sum loss: 4223.730469, avg loss: 2.536775, ppl: 12.638846 +epoch: 2, batch: 36365, sum loss: 3774.369141, avg loss: 2.470137, ppl: 11.824065 +epoch: 2, batch: 36366, sum loss: 4708.159180, avg loss: 2.508343, ppl: 12.284554 +epoch: 2, batch: 36367, sum loss: 4324.767578, avg loss: 2.499866, ppl: 12.180856 +epoch: 2, batch: 36368, sum loss: 4416.941895, avg loss: 2.605865, ppl: 13.542941 +epoch: 2, batch: 36369, sum loss: 4802.919922, avg loss: 2.747666, ppl: 15.606167 +epoch: 2, batch: 36370, sum loss: 4402.369141, avg loss: 2.518518, ppl: 12.410191 +epoch: 2, batch: 36371, sum loss: 3772.778809, avg loss: 2.362416, ppl: 10.616573 +epoch: 2, batch: 36372, sum loss: 4952.347656, avg loss: 2.768221, ppl: 15.930271 +epoch: 2, batch: 36373, sum loss: 4906.736328, avg loss: 2.679812, ppl: 14.582357 +epoch: 2, batch: 36374, sum loss: 4685.704102, avg loss: 2.686757, ppl: 14.683976 +epoch: 2, batch: 36375, sum loss: 3581.883789, avg loss: 2.285823, ppl: 9.833773 +epoch: 2, batch: 36376, sum loss: 4703.240234, avg loss: 2.821380, ppl: 16.800013 +epoch: 2, batch: 36377, sum loss: 3938.630371, avg loss: 2.627505, ppl: 13.839202 +epoch: 2, batch: 36378, sum loss: 3615.734131, avg loss: 2.392941, ppl: 10.945641 +epoch: 2, batch: 36379, sum loss: 4371.654297, avg loss: 2.336534, ppl: 10.345315 +epoch: 2, batch: 36380, sum loss: 3794.754395, avg loss: 2.355527, ppl: 10.543688 +epoch: 2, batch: 36381, sum loss: 4141.589844, avg loss: 2.463765, ppl: 11.748968 +epoch: 2, batch: 36382, sum loss: 4163.254883, avg loss: 2.552578, ppl: 12.840166 +epoch: 2, batch: 36383, sum loss: 4377.053711, avg loss: 2.550731, ppl: 12.816465 +epoch: 2, batch: 36384, sum loss: 4693.480469, avg loss: 2.595952, ppl: 13.409341 +epoch: 2, batch: 36385, sum loss: 4518.768555, avg loss: 2.504861, ppl: 12.241853 +epoch: 2, batch: 36386, sum loss: 4095.743652, avg loss: 2.317908, ppl: 10.154412 +epoch: 2, batch: 36387, sum loss: 4248.839355, avg loss: 2.590756, ppl: 13.339849 +epoch: 2, batch: 36388, sum loss: 3923.270996, avg loss: 2.340854, ppl: 10.390105 +epoch: 2, batch: 36389, sum loss: 4929.719727, avg loss: 2.427238, ppl: 11.327549 +epoch: 2, batch: 36390, sum loss: 4385.766602, avg loss: 2.319284, ppl: 10.168393 +epoch: 2, batch: 36391, sum loss: 4176.247559, avg loss: 2.320138, ppl: 10.177073 +epoch: 2, batch: 36392, sum loss: 4979.895508, avg loss: 2.865302, ppl: 17.554359 +epoch: 2, batch: 36393, sum loss: 4025.684570, avg loss: 2.495775, ppl: 12.131128 +epoch: 2, batch: 36394, sum loss: 3387.988525, avg loss: 2.246677, ppl: 9.456258 +epoch: 2, batch: 36395, sum loss: 3987.832764, avg loss: 2.463146, ppl: 11.741690 +epoch: 2, batch: 36396, sum loss: 4035.219238, avg loss: 2.429391, ppl: 11.351971 +epoch: 2, batch: 36397, sum loss: 4092.639160, avg loss: 2.305712, ppl: 10.031318 +epoch: 2, batch: 36398, sum loss: 5117.210449, avg loss: 2.622865, ppl: 13.775139 +epoch: 2, batch: 36399, sum loss: 3686.010986, avg loss: 2.387313, ppl: 10.884208 +epoch: 2, batch: 36400, sum loss: 4054.302246, avg loss: 2.524472, ppl: 12.484302 +epoch: 2, batch: 36401, sum loss: 3976.200928, avg loss: 2.402538, ppl: 11.051192 +epoch: 2, batch: 36402, sum loss: 2891.521973, avg loss: 2.132391, ppl: 8.435009 +epoch: 2, batch: 36403, sum loss: 3971.555908, avg loss: 2.545869, ppl: 12.754312 +epoch: 2, batch: 36404, sum loss: 4577.872559, avg loss: 2.502938, ppl: 12.218333 +epoch: 2, batch: 36405, sum loss: 4056.322021, avg loss: 2.659883, ppl: 14.294621 +epoch: 2, batch: 36406, sum loss: 4999.759277, avg loss: 2.512442, ppl: 12.335013 +epoch: 2, batch: 36407, sum loss: 3738.231201, avg loss: 2.397839, ppl: 10.999384 +epoch: 2, batch: 36408, sum loss: 4504.763184, avg loss: 2.353586, ppl: 10.523235 +epoch: 2, batch: 36409, sum loss: 5020.326660, avg loss: 2.558780, ppl: 12.920045 +epoch: 2, batch: 36410, sum loss: 4640.715332, avg loss: 2.725024, ppl: 15.256776 +epoch: 2, batch: 36411, sum loss: 4422.224121, avg loss: 2.495612, ppl: 12.129153 +epoch: 2, batch: 36412, sum loss: 3679.941162, avg loss: 2.263186, ppl: 9.613674 +epoch: 2, batch: 36413, sum loss: 4517.073242, avg loss: 2.774615, ppl: 16.032454 +epoch: 2, batch: 36414, sum loss: 4244.909180, avg loss: 2.485310, ppl: 12.004839 +epoch: 2, batch: 36415, sum loss: 4235.045410, avg loss: 2.411757, ppl: 11.153543 +epoch: 2, batch: 36416, sum loss: 3851.450928, avg loss: 2.373044, ppl: 10.730007 +epoch: 2, batch: 36417, sum loss: 4284.847656, avg loss: 2.535413, ppl: 12.621640 +epoch: 2, batch: 36418, sum loss: 3498.826904, avg loss: 2.315570, ppl: 10.130699 +epoch: 2, batch: 36419, sum loss: 5216.683105, avg loss: 2.572329, ppl: 13.096291 +epoch: 2, batch: 36420, sum loss: 4501.540039, avg loss: 2.327580, ppl: 10.253101 +epoch: 2, batch: 36421, sum loss: 5363.294922, avg loss: 2.918006, ppl: 18.504353 +epoch: 2, batch: 36422, sum loss: 3822.373779, avg loss: 2.472428, ppl: 11.851188 +epoch: 2, batch: 36423, sum loss: 3599.615234, avg loss: 2.491083, ppl: 12.074347 +epoch: 2, batch: 36424, sum loss: 5301.355469, avg loss: 2.708920, ppl: 15.013045 +epoch: 2, batch: 36425, sum loss: 4318.324219, avg loss: 2.407093, ppl: 11.101640 +epoch: 2, batch: 36426, sum loss: 3731.910645, avg loss: 2.358983, ppl: 10.580181 +epoch: 2, batch: 36427, sum loss: 4835.039062, avg loss: 2.617780, ppl: 13.705264 +epoch: 2, batch: 36428, sum loss: 3812.251953, avg loss: 2.435944, ppl: 11.426596 +epoch: 2, batch: 36429, sum loss: 4622.737793, avg loss: 2.579653, ppl: 13.192556 +epoch: 2, batch: 36430, sum loss: 3930.662354, avg loss: 2.341074, ppl: 10.392387 +epoch: 2, batch: 36431, sum loss: 5279.171875, avg loss: 2.652850, ppl: 14.194437 +epoch: 2, batch: 36432, sum loss: 3806.003906, avg loss: 2.158822, ppl: 8.660932 +epoch: 2, batch: 36433, sum loss: 5032.932617, avg loss: 2.707333, ppl: 14.989251 +epoch: 2, batch: 36434, sum loss: 3726.424805, avg loss: 2.177922, ppl: 8.827945 +epoch: 2, batch: 36435, sum loss: 4312.273926, avg loss: 2.557695, ppl: 12.906036 +epoch: 2, batch: 36436, sum loss: 4544.550781, avg loss: 2.614816, ppl: 13.664708 +epoch: 2, batch: 36437, sum loss: 3813.208984, avg loss: 2.411897, ppl: 11.155099 +epoch: 2, batch: 36438, sum loss: 4014.097656, avg loss: 2.282034, ppl: 9.796586 +epoch: 2, batch: 36439, sum loss: 4510.372559, avg loss: 2.664130, ppl: 14.355461 +epoch: 2, batch: 36440, sum loss: 4271.459961, avg loss: 2.446426, ppl: 11.547006 +epoch: 2, batch: 36441, sum loss: 4194.130371, avg loss: 2.511455, ppl: 12.322850 +epoch: 2, batch: 36442, sum loss: 4568.932617, avg loss: 2.496685, ppl: 12.142170 +epoch: 2, batch: 36443, sum loss: 4812.227539, avg loss: 2.625329, ppl: 13.809114 +epoch: 2, batch: 36444, sum loss: 4328.885254, avg loss: 2.550905, ppl: 12.818696 +epoch: 2, batch: 36445, sum loss: 3890.364258, avg loss: 2.394070, ppl: 10.958007 +epoch: 2, batch: 36446, sum loss: 4261.418945, avg loss: 2.439278, ppl: 11.464762 +epoch: 2, batch: 36447, sum loss: 3360.098389, avg loss: 2.114599, ppl: 8.286266 +epoch: 2, batch: 36448, sum loss: 4029.134521, avg loss: 2.440421, ppl: 11.477870 +epoch: 2, batch: 36449, sum loss: 4496.343262, avg loss: 2.705381, ppl: 14.960018 +epoch: 2, batch: 36450, sum loss: 4747.700195, avg loss: 2.558028, ppl: 12.910336 +epoch: 2, batch: 36451, sum loss: 3412.281250, avg loss: 2.233168, ppl: 9.329378 +epoch: 2, batch: 36452, sum loss: 3780.529297, avg loss: 2.276056, ppl: 9.738197 +epoch: 2, batch: 36453, sum loss: 3893.577393, avg loss: 2.458066, ppl: 11.682201 +epoch: 2, batch: 36454, sum loss: 3824.946289, avg loss: 2.404114, ppl: 11.068624 +epoch: 2, batch: 36455, sum loss: 3989.219727, avg loss: 2.320663, ppl: 10.182425 +epoch: 2, batch: 36456, sum loss: 5051.752930, avg loss: 2.697145, ppl: 14.837314 +epoch: 2, batch: 36457, sum loss: 4556.409180, avg loss: 2.580073, ppl: 13.198103 +epoch: 2, batch: 36458, sum loss: 4321.684570, avg loss: 2.413001, ppl: 11.167422 +epoch: 2, batch: 36459, sum loss: 3678.573242, avg loss: 2.322332, ppl: 10.199429 +epoch: 2, batch: 36460, sum loss: 4442.301758, avg loss: 2.547191, ppl: 12.771184 +epoch: 2, batch: 36461, sum loss: 4108.608398, avg loss: 2.351808, ppl: 10.504543 +epoch: 2, batch: 36462, sum loss: 3888.720947, avg loss: 2.561740, ppl: 12.958342 +epoch: 2, batch: 36463, sum loss: 4110.881348, avg loss: 2.470482, ppl: 11.828142 +epoch: 2, batch: 36464, sum loss: 2735.167236, avg loss: 2.277408, ppl: 9.751373 +epoch: 2, batch: 36465, sum loss: 3794.339844, avg loss: 2.302391, ppl: 9.998057 +epoch: 2, batch: 36466, sum loss: 3686.375244, avg loss: 2.434858, ppl: 11.414202 +epoch: 2, batch: 36467, sum loss: 5149.178711, avg loss: 2.572017, ppl: 13.092207 +epoch: 2, batch: 36468, sum loss: 3741.539307, avg loss: 2.369563, ppl: 10.692715 +epoch: 2, batch: 36469, sum loss: 2633.201660, avg loss: 2.096498, ppl: 8.137624 +epoch: 2, batch: 36470, sum loss: 3623.911133, avg loss: 2.177831, ppl: 8.827141 +epoch: 2, batch: 36471, sum loss: 4038.246094, avg loss: 2.590280, ppl: 13.333502 +epoch: 2, batch: 36472, sum loss: 4064.630615, avg loss: 2.704345, ppl: 14.944525 +epoch: 2, batch: 36473, sum loss: 5227.709961, avg loss: 2.669924, ppl: 14.438866 +epoch: 2, batch: 36474, sum loss: 4271.329590, avg loss: 2.554623, ppl: 12.866449 +epoch: 2, batch: 36475, sum loss: 4118.971680, avg loss: 2.470889, ppl: 11.832960 +epoch: 2, batch: 36476, sum loss: 4001.953369, avg loss: 2.412268, ppl: 11.159246 +epoch: 2, batch: 36477, sum loss: 4917.266602, avg loss: 2.798672, ppl: 16.422823 +epoch: 2, batch: 36478, sum loss: 4168.522461, avg loss: 2.379294, ppl: 10.797274 +epoch: 2, batch: 36479, sum loss: 5268.120117, avg loss: 2.850714, ppl: 17.300137 +epoch: 2, batch: 36480, sum loss: 4249.038086, avg loss: 2.594040, ppl: 13.383735 +epoch: 2, batch: 36481, sum loss: 3653.322266, avg loss: 2.204781, ppl: 9.068266 +epoch: 2, batch: 36482, sum loss: 3779.607422, avg loss: 2.253791, ppl: 9.523770 +epoch: 2, batch: 36483, sum loss: 4284.985352, avg loss: 2.547554, ppl: 12.775813 +epoch: 2, batch: 36484, sum loss: 4113.200684, avg loss: 2.479325, ppl: 11.933210 +epoch: 2, batch: 36485, sum loss: 5653.688965, avg loss: 2.905287, ppl: 18.270491 +epoch: 2, batch: 36486, sum loss: 4908.658691, avg loss: 2.605445, ppl: 13.537253 +epoch: 2, batch: 36487, sum loss: 4710.278320, avg loss: 2.508135, ppl: 12.282006 +epoch: 2, batch: 36488, sum loss: 4108.094727, avg loss: 2.400991, ppl: 11.034102 +epoch: 2, batch: 36489, sum loss: 4259.077148, avg loss: 2.490688, ppl: 12.069581 +epoch: 2, batch: 36490, sum loss: 3300.327881, avg loss: 2.005059, ppl: 7.426536 +epoch: 2, batch: 36491, sum loss: 3751.739258, avg loss: 2.230523, ppl: 9.304728 +epoch: 2, batch: 36492, sum loss: 3758.199707, avg loss: 2.351815, ppl: 10.504613 +epoch: 2, batch: 36493, sum loss: 3905.263184, avg loss: 2.462335, ppl: 11.732173 +epoch: 2, batch: 36494, sum loss: 3806.080322, avg loss: 2.302529, ppl: 9.999437 +epoch: 2, batch: 36495, sum loss: 3522.247803, avg loss: 2.387965, ppl: 10.891305 +epoch: 2, batch: 36496, sum loss: 3344.390137, avg loss: 2.173093, ppl: 8.785416 +epoch: 2, batch: 36497, sum loss: 4660.051758, avg loss: 2.726771, ppl: 15.283459 +epoch: 2, batch: 36498, sum loss: 3657.027344, avg loss: 2.439645, ppl: 11.468966 +epoch: 2, batch: 36499, sum loss: 4298.157715, avg loss: 2.698153, ppl: 14.852275 +epoch: 2, batch: 36500, sum loss: 3447.084961, avg loss: 2.329112, ppl: 10.268814 +epoch: 2, batch: 36501, sum loss: 4773.752930, avg loss: 2.556911, ppl: 12.895920 +epoch: 2, batch: 36502, sum loss: 4182.147949, avg loss: 2.520885, ppl: 12.439598 +epoch: 2, batch: 36503, sum loss: 4134.748047, avg loss: 2.381767, ppl: 10.824015 +epoch: 2, batch: 36504, sum loss: 4541.939941, avg loss: 2.561726, ppl: 12.958165 +epoch: 2, batch: 36505, sum loss: 3832.438232, avg loss: 2.242503, ppl: 9.416876 +epoch: 2, batch: 36506, sum loss: 4928.754883, avg loss: 2.632882, ppl: 13.913814 +epoch: 2, batch: 36507, sum loss: 4792.035645, avg loss: 2.632987, ppl: 13.915270 +epoch: 2, batch: 36508, sum loss: 3646.716797, avg loss: 2.411850, ppl: 11.154578 +epoch: 2, batch: 36509, sum loss: 4648.801758, avg loss: 2.843304, ppl: 17.172403 +epoch: 2, batch: 36510, sum loss: 3409.171875, avg loss: 2.352776, ppl: 10.514716 +epoch: 2, batch: 36511, sum loss: 3519.598877, avg loss: 2.492634, ppl: 12.093085 +epoch: 2, batch: 36512, sum loss: 3859.646973, avg loss: 2.290592, ppl: 9.880782 +epoch: 2, batch: 36513, sum loss: 4669.706543, avg loss: 2.412038, ppl: 11.156679 +epoch: 2, batch: 36514, sum loss: 3266.933105, avg loss: 2.225431, ppl: 9.257474 +epoch: 2, batch: 36515, sum loss: 3747.820312, avg loss: 2.432070, ppl: 11.382422 +epoch: 2, batch: 36516, sum loss: 4762.419922, avg loss: 2.923523, ppl: 18.606731 +epoch: 2, batch: 36517, sum loss: 4254.009766, avg loss: 2.267596, ppl: 9.656157 +epoch: 2, batch: 36518, sum loss: 4106.989258, avg loss: 2.300834, ppl: 9.982509 +epoch: 2, batch: 36519, sum loss: 4092.393066, avg loss: 2.433052, ppl: 11.393600 +epoch: 2, batch: 36520, sum loss: 3974.753418, avg loss: 2.467259, ppl: 11.790082 +epoch: 2, batch: 36521, sum loss: 4932.138184, avg loss: 2.654541, ppl: 14.218465 +epoch: 2, batch: 36522, sum loss: 3847.255127, avg loss: 2.493361, ppl: 12.101876 +epoch: 2, batch: 36523, sum loss: 4236.311035, avg loss: 2.524619, ppl: 12.486138 +epoch: 2, batch: 36524, sum loss: 3953.240967, avg loss: 2.350322, ppl: 10.488942 +epoch: 2, batch: 36525, sum loss: 4324.915039, avg loss: 2.563672, ppl: 12.983409 +epoch: 2, batch: 36526, sum loss: 5066.906250, avg loss: 2.680903, ppl: 14.598266 +epoch: 2, batch: 36527, sum loss: 4503.579102, avg loss: 2.677514, ppl: 14.548884 +epoch: 2, batch: 36528, sum loss: 3240.553955, avg loss: 2.334693, ppl: 10.326289 +epoch: 2, batch: 36529, sum loss: 4565.153320, avg loss: 2.485114, ppl: 12.002484 +epoch: 2, batch: 36530, sum loss: 3883.914062, avg loss: 2.379849, ppl: 10.803276 +epoch: 2, batch: 36531, sum loss: 3169.070801, avg loss: 2.233313, ppl: 9.330727 +epoch: 2, batch: 36532, sum loss: 3451.055908, avg loss: 2.265959, ppl: 9.640368 +epoch: 2, batch: 36533, sum loss: 4383.454102, avg loss: 2.631125, ppl: 13.889383 +epoch: 2, batch: 36534, sum loss: 4354.635742, avg loss: 2.271589, ppl: 9.694794 +epoch: 2, batch: 36535, sum loss: 5001.023438, avg loss: 2.542462, ppl: 12.710931 +epoch: 2, batch: 36536, sum loss: 4166.102539, avg loss: 2.348423, ppl: 10.469047 +epoch: 2, batch: 36537, sum loss: 4915.376953, avg loss: 2.820067, ppl: 16.777977 +epoch: 2, batch: 36538, sum loss: 5283.053223, avg loss: 2.817628, ppl: 16.737110 +epoch: 2, batch: 36539, sum loss: 4601.105957, avg loss: 2.581990, ppl: 13.223427 +epoch: 2, batch: 36540, sum loss: 4156.728027, avg loss: 2.526886, ppl: 12.514478 +epoch: 2, batch: 36541, sum loss: 3633.047363, avg loss: 2.357591, ppl: 10.565465 +epoch: 2, batch: 36542, sum loss: 3413.168701, avg loss: 2.422405, ppl: 11.272938 +epoch: 2, batch: 36543, sum loss: 3596.266846, avg loss: 2.415223, ppl: 11.192265 +epoch: 2, batch: 36544, sum loss: 3543.265625, avg loss: 2.119178, ppl: 8.324292 +epoch: 2, batch: 36545, sum loss: 4747.244629, avg loss: 2.548172, ppl: 12.783713 +epoch: 2, batch: 36546, sum loss: 3827.407471, avg loss: 2.311236, ppl: 10.086888 +epoch: 2, batch: 36547, sum loss: 3716.639648, avg loss: 2.379411, ppl: 10.798538 +epoch: 2, batch: 36548, sum loss: 3878.722168, avg loss: 2.392796, ppl: 10.944049 +epoch: 2, batch: 36549, sum loss: 4777.446289, avg loss: 2.782438, ppl: 16.158371 +epoch: 2, batch: 36550, sum loss: 4033.317627, avg loss: 2.442955, ppl: 11.506989 +epoch: 2, batch: 36551, sum loss: 3511.386719, avg loss: 2.305573, ppl: 10.029919 +epoch: 2, batch: 36552, sum loss: 3653.487305, avg loss: 2.417927, ppl: 11.222569 +epoch: 2, batch: 36553, sum loss: 3908.372803, avg loss: 2.201900, ppl: 9.042177 +epoch: 2, batch: 36554, sum loss: 3614.780762, avg loss: 2.227222, ppl: 9.274064 +epoch: 2, batch: 36555, sum loss: 3851.878174, avg loss: 2.306514, ppl: 10.039367 +epoch: 2, batch: 36556, sum loss: 4943.018555, avg loss: 2.787941, ppl: 16.247528 +epoch: 2, batch: 36557, sum loss: 4485.357422, avg loss: 2.615369, ppl: 13.672256 +epoch: 2, batch: 36558, sum loss: 4562.301270, avg loss: 2.598122, ppl: 13.438472 +epoch: 2, batch: 36559, sum loss: 4004.741211, avg loss: 2.430061, ppl: 11.359579 +epoch: 2, batch: 36560, sum loss: 3179.622559, avg loss: 2.277667, ppl: 9.753894 +epoch: 2, batch: 36561, sum loss: 4081.928955, avg loss: 2.356772, ppl: 10.556818 +epoch: 2, batch: 36562, sum loss: 3591.141113, avg loss: 2.416649, ppl: 11.208241 +epoch: 2, batch: 36563, sum loss: 4359.073242, avg loss: 2.419020, ppl: 11.234840 +epoch: 2, batch: 36564, sum loss: 4403.124512, avg loss: 2.549580, ppl: 12.801724 +epoch: 2, batch: 36565, sum loss: 4155.915527, avg loss: 2.384346, ppl: 10.851966 +epoch: 2, batch: 36566, sum loss: 4425.913086, avg loss: 2.456112, ppl: 11.659388 +epoch: 2, batch: 36567, sum loss: 4508.749512, avg loss: 2.776324, ppl: 16.059870 +epoch: 2, batch: 36568, sum loss: 4948.426270, avg loss: 2.430465, ppl: 11.364162 +epoch: 2, batch: 36569, sum loss: 3605.454102, avg loss: 2.348830, ppl: 10.473309 +epoch: 2, batch: 36570, sum loss: 3408.475098, avg loss: 2.216174, ppl: 9.172168 +epoch: 2, batch: 36571, sum loss: 3803.556885, avg loss: 2.384675, ppl: 10.855536 +epoch: 2, batch: 36572, sum loss: 4668.565430, avg loss: 2.569381, ppl: 13.057742 +epoch: 2, batch: 36573, sum loss: 4517.914062, avg loss: 2.423774, ppl: 11.288376 +epoch: 2, batch: 36574, sum loss: 4516.750000, avg loss: 2.550395, ppl: 12.812167 +epoch: 2, batch: 36575, sum loss: 4425.967773, avg loss: 2.682405, ppl: 14.620209 +epoch: 2, batch: 36576, sum loss: 3699.369629, avg loss: 2.168446, ppl: 8.744687 +epoch: 2, batch: 36577, sum loss: 3783.373535, avg loss: 2.463134, ppl: 11.741549 +epoch: 2, batch: 36578, sum loss: 3649.171631, avg loss: 2.416670, ppl: 11.208474 +epoch: 2, batch: 36579, sum loss: 3303.911133, avg loss: 2.190922, ppl: 8.943460 +epoch: 2, batch: 36580, sum loss: 5417.206543, avg loss: 2.716753, ppl: 15.131119 +epoch: 2, batch: 36581, sum loss: 3561.058594, avg loss: 2.338187, ppl: 10.362432 +epoch: 2, batch: 36582, sum loss: 4225.401855, avg loss: 2.632649, ppl: 13.910569 +epoch: 2, batch: 36583, sum loss: 3529.575439, avg loss: 2.346792, ppl: 10.451988 +epoch: 2, batch: 36584, sum loss: 4803.446289, avg loss: 2.718419, ppl: 15.156343 +epoch: 2, batch: 36585, sum loss: 4159.160156, avg loss: 2.634047, ppl: 13.930031 +epoch: 2, batch: 36586, sum loss: 4077.475098, avg loss: 2.592165, ppl: 13.358659 +epoch: 2, batch: 36587, sum loss: 4103.646973, avg loss: 2.340928, ppl: 10.390876 +epoch: 2, batch: 36588, sum loss: 3500.184326, avg loss: 2.261101, ppl: 9.593646 +epoch: 2, batch: 36589, sum loss: 3201.165771, avg loss: 2.084092, ppl: 8.037293 +epoch: 2, batch: 36590, sum loss: 3877.804932, avg loss: 2.556233, ppl: 12.887176 +epoch: 2, batch: 36591, sum loss: 3416.255371, avg loss: 2.055509, ppl: 7.810810 +epoch: 2, batch: 36592, sum loss: 4344.338379, avg loss: 2.471182, ppl: 11.836431 +epoch: 2, batch: 36593, sum loss: 4556.822266, avg loss: 2.599442, ppl: 13.456231 +epoch: 2, batch: 36594, sum loss: 3082.630615, avg loss: 2.427268, ppl: 11.327895 +epoch: 2, batch: 36595, sum loss: 3862.844482, avg loss: 2.511602, ppl: 12.324663 +epoch: 2, batch: 36596, sum loss: 5058.402344, avg loss: 2.531733, ppl: 12.575277 +epoch: 2, batch: 36597, sum loss: 3696.165039, avg loss: 2.267586, ppl: 9.656063 +epoch: 2, batch: 36598, sum loss: 4101.560059, avg loss: 2.522485, ppl: 12.459515 +epoch: 2, batch: 36599, sum loss: 4612.196289, avg loss: 2.523083, ppl: 12.466979 +epoch: 2, batch: 36600, sum loss: 4145.083496, avg loss: 2.433989, ppl: 11.404284 +epoch: 2, batch: 36601, sum loss: 3778.963135, avg loss: 2.419311, ppl: 11.238109 +epoch: 2, batch: 36602, sum loss: 5064.799316, avg loss: 2.684048, ppl: 14.644259 +epoch: 2, batch: 36603, sum loss: 5335.895020, avg loss: 2.888952, ppl: 17.974472 +epoch: 2, batch: 36604, sum loss: 5123.924316, avg loss: 2.511728, ppl: 12.326206 +epoch: 2, batch: 36605, sum loss: 3371.202637, avg loss: 2.439365, ppl: 11.465759 +epoch: 2, batch: 36606, sum loss: 4602.988281, avg loss: 2.693381, ppl: 14.781569 +epoch: 2, batch: 36607, sum loss: 3792.587646, avg loss: 2.386776, ppl: 10.878370 +epoch: 2, batch: 36608, sum loss: 4049.174561, avg loss: 2.549858, ppl: 12.805286 +epoch: 2, batch: 36609, sum loss: 4176.514648, avg loss: 2.500907, ppl: 12.193548 +epoch: 2, batch: 36610, sum loss: 4412.865234, avg loss: 2.555220, ppl: 12.874133 +epoch: 2, batch: 36611, sum loss: 3505.774414, avg loss: 2.453306, ppl: 11.626723 +epoch: 2, batch: 36612, sum loss: 4717.907227, avg loss: 2.483109, ppl: 11.978451 +epoch: 2, batch: 36613, sum loss: 4196.962891, avg loss: 2.401009, ppl: 11.034300 +epoch: 2, batch: 36614, sum loss: 4154.119141, avg loss: 2.523766, ppl: 12.475494 +epoch: 2, batch: 36615, sum loss: 4557.427734, avg loss: 2.508216, ppl: 12.282993 +epoch: 2, batch: 36616, sum loss: 4044.421875, avg loss: 2.241919, ppl: 9.411375 +epoch: 2, batch: 36617, sum loss: 4717.632812, avg loss: 2.590683, ppl: 13.338872 +epoch: 2, batch: 36618, sum loss: 4063.940430, avg loss: 2.497812, ppl: 12.155869 +epoch: 2, batch: 36619, sum loss: 4308.342773, avg loss: 2.562964, ppl: 12.974218 +epoch: 2, batch: 36620, sum loss: 4274.628418, avg loss: 2.397436, ppl: 10.994950 +epoch: 2, batch: 36621, sum loss: 3418.604736, avg loss: 2.314560, ppl: 10.120468 +epoch: 2, batch: 36622, sum loss: 5027.707031, avg loss: 2.802512, ppl: 16.486013 +epoch: 2, batch: 36623, sum loss: 3591.787842, avg loss: 2.440073, ppl: 11.473881 +epoch: 2, batch: 36624, sum loss: 4078.551758, avg loss: 2.272174, ppl: 9.700463 +epoch: 2, batch: 36625, sum loss: 4300.945312, avg loss: 2.648365, ppl: 14.130919 +epoch: 2, batch: 36626, sum loss: 4226.982910, avg loss: 2.582152, ppl: 13.225571 +epoch: 2, batch: 36627, sum loss: 4090.656006, avg loss: 2.368649, ppl: 10.682945 +epoch: 2, batch: 36628, sum loss: 4769.433594, avg loss: 2.477628, ppl: 11.912971 +epoch: 2, batch: 36629, sum loss: 4210.390625, avg loss: 2.491355, ppl: 12.077636 +epoch: 2, batch: 36630, sum loss: 4161.218262, avg loss: 2.475442, ppl: 11.886962 +epoch: 2, batch: 36631, sum loss: 4916.905273, avg loss: 2.600162, ppl: 13.465914 +epoch: 2, batch: 36632, sum loss: 4450.821777, avg loss: 2.604343, ppl: 13.522334 +epoch: 2, batch: 36633, sum loss: 4330.980957, avg loss: 2.653787, ppl: 14.207747 +epoch: 2, batch: 36634, sum loss: 3835.807373, avg loss: 2.407914, ppl: 11.110762 +epoch: 2, batch: 36635, sum loss: 4503.196289, avg loss: 2.517158, ppl: 12.393328 +epoch: 2, batch: 36636, sum loss: 3274.019531, avg loss: 2.163926, ppl: 8.705244 +epoch: 2, batch: 36637, sum loss: 4368.851562, avg loss: 2.608270, ppl: 13.575541 +epoch: 2, batch: 36638, sum loss: 3688.629883, avg loss: 2.316978, ppl: 10.144975 +epoch: 2, batch: 36639, sum loss: 3790.821777, avg loss: 2.439396, ppl: 11.466118 +epoch: 2, batch: 36640, sum loss: 3575.196289, avg loss: 2.226150, ppl: 9.264127 +epoch: 2, batch: 36641, sum loss: 5066.226074, avg loss: 2.570384, ppl: 13.070837 +epoch: 2, batch: 36642, sum loss: 4554.009277, avg loss: 2.568533, ppl: 13.046674 +epoch: 2, batch: 36643, sum loss: 4226.973633, avg loss: 2.495262, ppl: 12.124908 +epoch: 2, batch: 36644, sum loss: 4017.602295, avg loss: 2.426088, ppl: 11.314537 +epoch: 2, batch: 36645, sum loss: 4005.771484, avg loss: 2.350805, ppl: 10.494012 +epoch: 2, batch: 36646, sum loss: 3686.362793, avg loss: 2.336098, ppl: 10.340810 +epoch: 2, batch: 36647, sum loss: 4277.768066, avg loss: 2.478429, ppl: 11.922515 +epoch: 2, batch: 36648, sum loss: 3658.279297, avg loss: 2.536948, ppl: 12.641034 +epoch: 2, batch: 36649, sum loss: 3898.920166, avg loss: 2.360121, ppl: 10.592236 +epoch: 2, batch: 36650, sum loss: 3927.666992, avg loss: 2.307678, ppl: 10.051054 +epoch: 2, batch: 36651, sum loss: 4121.456543, avg loss: 2.506969, ppl: 12.267687 +epoch: 2, batch: 36652, sum loss: 4534.691895, avg loss: 2.636449, ppl: 13.963529 +epoch: 2, batch: 36653, sum loss: 4095.109863, avg loss: 2.305805, ppl: 10.032253 +epoch: 2, batch: 36654, sum loss: 3500.683594, avg loss: 2.252692, ppl: 9.513313 +epoch: 2, batch: 36655, sum loss: 4092.095459, avg loss: 2.287365, ppl: 9.848948 +epoch: 2, batch: 36656, sum loss: 4291.384766, avg loss: 2.543797, ppl: 12.727901 +epoch: 2, batch: 36657, sum loss: 3481.638672, avg loss: 2.206362, ppl: 9.082609 +epoch: 2, batch: 36658, sum loss: 3887.156738, avg loss: 2.246911, ppl: 9.458476 +epoch: 2, batch: 36659, sum loss: 4066.258301, avg loss: 2.456954, ppl: 11.669210 +epoch: 2, batch: 36660, sum loss: 3453.135742, avg loss: 2.270306, ppl: 9.682366 +epoch: 2, batch: 36661, sum loss: 4551.262695, avg loss: 2.694649, ppl: 14.800330 +epoch: 2, batch: 36662, sum loss: 4199.348145, avg loss: 2.555903, ppl: 12.882924 +epoch: 2, batch: 36663, sum loss: 4287.091797, avg loss: 2.708207, ppl: 15.002351 +epoch: 2, batch: 36664, sum loss: 3877.103027, avg loss: 2.187981, ppl: 8.917194 +epoch: 2, batch: 36665, sum loss: 4148.223633, avg loss: 2.434404, ppl: 11.409013 +epoch: 2, batch: 36666, sum loss: 4409.416992, avg loss: 2.750728, ppl: 15.654022 +epoch: 2, batch: 36667, sum loss: 4219.321289, avg loss: 2.532606, ppl: 12.586259 +epoch: 2, batch: 36668, sum loss: 3773.253418, avg loss: 2.302168, ppl: 9.995829 +epoch: 2, batch: 36669, sum loss: 4323.429688, avg loss: 2.562792, ppl: 12.971982 +epoch: 2, batch: 36670, sum loss: 3434.423340, avg loss: 2.408432, ppl: 11.116511 +epoch: 2, batch: 36671, sum loss: 4091.108643, avg loss: 2.389666, ppl: 10.909850 +epoch: 2, batch: 36672, sum loss: 4170.659180, avg loss: 2.203201, ppl: 9.053947 +epoch: 2, batch: 36673, sum loss: 4038.317627, avg loss: 2.549443, ppl: 12.799973 +epoch: 2, batch: 36674, sum loss: 3638.128174, avg loss: 2.461521, ppl: 11.722630 +epoch: 2, batch: 36675, sum loss: 4338.075684, avg loss: 2.476071, ppl: 11.894435 +epoch: 2, batch: 36676, sum loss: 3765.721924, avg loss: 2.267141, ppl: 9.651771 +epoch: 2, batch: 36677, sum loss: 3698.808350, avg loss: 2.241702, ppl: 9.409333 +epoch: 2, batch: 36678, sum loss: 4196.394531, avg loss: 2.361505, ppl: 10.606903 +epoch: 2, batch: 36679, sum loss: 3849.756836, avg loss: 2.480514, ppl: 11.947399 +epoch: 2, batch: 36680, sum loss: 4526.064453, avg loss: 2.539879, ppl: 12.678138 +epoch: 2, batch: 36681, sum loss: 4122.359863, avg loss: 2.527504, ppl: 12.522217 +epoch: 2, batch: 36682, sum loss: 3882.472412, avg loss: 2.348743, ppl: 10.472397 +epoch: 2, batch: 36683, sum loss: 3441.784912, avg loss: 2.188039, ppl: 8.917705 +epoch: 2, batch: 36684, sum loss: 4806.958008, avg loss: 2.556892, ppl: 12.895680 +epoch: 2, batch: 36685, sum loss: 5117.775879, avg loss: 2.745588, ppl: 15.573766 +epoch: 2, batch: 36686, sum loss: 4003.573486, avg loss: 2.370381, ppl: 10.701470 +epoch: 2, batch: 36687, sum loss: 4258.954590, avg loss: 2.617673, ppl: 13.703803 +epoch: 2, batch: 36688, sum loss: 3933.704346, avg loss: 2.344281, ppl: 10.425778 +epoch: 2, batch: 36689, sum loss: 4431.799316, avg loss: 2.403362, ppl: 11.060299 +epoch: 2, batch: 36690, sum loss: 3801.056641, avg loss: 2.180755, ppl: 8.852991 +epoch: 2, batch: 36691, sum loss: 3260.535400, avg loss: 2.194169, ppl: 8.972542 +epoch: 2, batch: 36692, sum loss: 4099.118164, avg loss: 2.665226, ppl: 14.371203 +epoch: 2, batch: 36693, sum loss: 4392.031250, avg loss: 2.489814, ppl: 12.059028 +epoch: 2, batch: 36694, sum loss: 4563.144531, avg loss: 2.539313, ppl: 12.670958 +epoch: 2, batch: 36695, sum loss: 3837.483643, avg loss: 2.433408, ppl: 11.397654 +epoch: 2, batch: 36696, sum loss: 4430.394531, avg loss: 2.667305, ppl: 14.401113 +epoch: 2, batch: 36697, sum loss: 4993.345215, avg loss: 2.603413, ppl: 13.509764 +epoch: 2, batch: 36698, sum loss: 5293.680664, avg loss: 2.710538, ppl: 15.037362 +epoch: 2, batch: 36699, sum loss: 5196.652344, avg loss: 2.651353, ppl: 14.173203 +epoch: 2, batch: 36700, sum loss: 4400.202148, avg loss: 2.546413, ppl: 12.761250 +epoch: 2, batch: 36701, sum loss: 4383.173828, avg loss: 2.285284, ppl: 9.828473 +epoch: 2, batch: 36702, sum loss: 3931.474121, avg loss: 2.419369, ppl: 11.238763 +epoch: 2, batch: 36703, sum loss: 4590.812500, avg loss: 2.665977, ppl: 14.381994 +epoch: 2, batch: 36704, sum loss: 4054.427979, avg loss: 2.328793, ppl: 10.265539 +epoch: 2, batch: 36705, sum loss: 3446.862061, avg loss: 2.225218, ppl: 9.255496 +epoch: 2, batch: 36706, sum loss: 2823.457275, avg loss: 2.205826, ppl: 9.077747 +epoch: 2, batch: 36707, sum loss: 4713.365234, avg loss: 2.527274, ppl: 12.519327 +epoch: 2, batch: 36708, sum loss: 3933.063721, avg loss: 2.479864, ppl: 11.939636 +epoch: 2, batch: 36709, sum loss: 4031.971680, avg loss: 2.496577, ppl: 12.140865 +epoch: 2, batch: 36710, sum loss: 4890.906250, avg loss: 2.674088, ppl: 14.499117 +epoch: 2, batch: 36711, sum loss: 4043.468262, avg loss: 2.450587, ppl: 11.595149 +epoch: 2, batch: 36712, sum loss: 3626.044922, avg loss: 2.425448, ppl: 11.307293 +epoch: 2, batch: 36713, sum loss: 3996.221680, avg loss: 2.343825, ppl: 10.421021 +epoch: 2, batch: 36714, sum loss: 3762.318115, avg loss: 2.244820, ppl: 9.438715 +epoch: 2, batch: 36715, sum loss: 4416.691406, avg loss: 2.655858, ppl: 14.237193 +epoch: 2, batch: 36716, sum loss: 4004.350342, avg loss: 2.321363, ppl: 10.189550 +epoch: 2, batch: 36717, sum loss: 3607.405273, avg loss: 2.373293, ppl: 10.732678 +epoch: 2, batch: 36718, sum loss: 5220.627930, avg loss: 2.778408, ppl: 16.093376 +epoch: 2, batch: 36719, sum loss: 4089.961182, avg loss: 2.524668, ppl: 12.486743 +epoch: 2, batch: 36720, sum loss: 3377.724609, avg loss: 2.140510, ppl: 8.503772 +epoch: 2, batch: 36721, sum loss: 3443.727051, avg loss: 2.218896, ppl: 9.197175 +epoch: 2, batch: 36722, sum loss: 4621.300781, avg loss: 2.537782, ppl: 12.651578 +epoch: 2, batch: 36723, sum loss: 4353.840332, avg loss: 2.503646, ppl: 12.226995 +epoch: 2, batch: 36724, sum loss: 4235.606934, avg loss: 2.622667, ppl: 13.772404 +epoch: 2, batch: 36725, sum loss: 4046.397949, avg loss: 2.268160, ppl: 9.661611 +epoch: 2, batch: 36726, sum loss: 4245.232422, avg loss: 2.401150, ppl: 11.035857 +epoch: 2, batch: 36727, sum loss: 4478.588379, avg loss: 2.547547, ppl: 12.775731 +epoch: 2, batch: 36728, sum loss: 4043.421875, avg loss: 2.459502, ppl: 11.698987 +epoch: 2, batch: 36729, sum loss: 4092.417480, avg loss: 2.390431, ppl: 10.918196 +epoch: 2, batch: 36730, sum loss: 4652.852539, avg loss: 2.592119, ppl: 13.358041 +epoch: 2, batch: 36731, sum loss: 3894.123291, avg loss: 2.427758, ppl: 11.333441 +epoch: 2, batch: 36732, sum loss: 4027.782959, avg loss: 2.372075, ppl: 10.719611 +epoch: 2, batch: 36733, sum loss: 4161.907715, avg loss: 2.490669, ppl: 12.069348 +epoch: 2, batch: 36734, sum loss: 3581.194092, avg loss: 2.426283, ppl: 11.316744 +epoch: 2, batch: 36735, sum loss: 4127.752441, avg loss: 2.479131, ppl: 11.930889 +epoch: 2, batch: 36736, sum loss: 3925.746826, avg loss: 2.498884, ppl: 12.168908 +epoch: 2, batch: 36737, sum loss: 4732.733398, avg loss: 2.547219, ppl: 12.771541 +epoch: 2, batch: 36738, sum loss: 4226.686523, avg loss: 2.529436, ppl: 12.546423 +epoch: 2, batch: 36739, sum loss: 4661.877441, avg loss: 2.600043, ppl: 13.464321 +epoch: 2, batch: 36740, sum loss: 4009.008301, avg loss: 2.470122, ppl: 11.823891 +epoch: 2, batch: 36741, sum loss: 4480.714355, avg loss: 2.667092, ppl: 14.398040 +epoch: 2, batch: 36742, sum loss: 5412.253418, avg loss: 2.670081, ppl: 14.441134 +epoch: 2, batch: 36743, sum loss: 4172.532227, avg loss: 2.302722, ppl: 10.001369 +epoch: 2, batch: 36744, sum loss: 3552.507324, avg loss: 2.363611, ppl: 10.629264 +epoch: 2, batch: 36745, sum loss: 5097.048828, avg loss: 2.764126, ppl: 15.865172 +epoch: 2, batch: 36746, sum loss: 4792.103027, avg loss: 2.361805, ppl: 10.610090 +epoch: 2, batch: 36747, sum loss: 3643.077393, avg loss: 2.211947, ppl: 9.133486 +epoch: 2, batch: 36748, sum loss: 4398.249023, avg loss: 2.527729, ppl: 12.525033 +epoch: 2, batch: 36749, sum loss: 4710.827637, avg loss: 2.588367, ppl: 13.308022 +epoch: 2, batch: 36750, sum loss: 4556.130859, avg loss: 2.594608, ppl: 13.391332 +epoch: 2, batch: 36751, sum loss: 3740.951660, avg loss: 2.346896, ppl: 10.453070 +epoch: 2, batch: 36752, sum loss: 3427.662598, avg loss: 2.166664, ppl: 8.729115 +epoch: 2, batch: 36753, sum loss: 3879.451904, avg loss: 2.299616, ppl: 9.970352 +epoch: 2, batch: 36754, sum loss: 4633.882812, avg loss: 2.607700, ppl: 13.567807 +epoch: 2, batch: 36755, sum loss: 3384.193359, avg loss: 2.408679, ppl: 11.119258 +epoch: 2, batch: 36756, sum loss: 3130.988281, avg loss: 2.234824, ppl: 9.344836 +epoch: 2, batch: 36757, sum loss: 4753.258789, avg loss: 2.521623, ppl: 12.448780 +epoch: 2, batch: 36758, sum loss: 4565.447266, avg loss: 2.573533, ppl: 13.112065 +epoch: 2, batch: 36759, sum loss: 4165.031250, avg loss: 2.448578, ppl: 11.571878 +epoch: 2, batch: 36760, sum loss: 4144.885742, avg loss: 2.568083, ppl: 13.040799 +epoch: 2, batch: 36761, sum loss: 3232.910889, avg loss: 2.294472, ppl: 9.919197 +epoch: 2, batch: 36762, sum loss: 4482.552734, avg loss: 2.588079, ppl: 13.304190 +epoch: 2, batch: 36763, sum loss: 3290.246582, avg loss: 2.071944, ppl: 7.940242 +epoch: 2, batch: 36764, sum loss: 3689.324951, avg loss: 2.419230, ppl: 11.237198 +epoch: 2, batch: 36765, sum loss: 5431.145996, avg loss: 2.500528, ppl: 12.188924 +epoch: 2, batch: 36766, sum loss: 4618.665039, avg loss: 2.648317, ppl: 14.130239 +epoch: 2, batch: 36767, sum loss: 4619.915039, avg loss: 2.351102, ppl: 10.497129 +epoch: 2, batch: 36768, sum loss: 3707.898682, avg loss: 2.305907, ppl: 10.033275 +epoch: 2, batch: 36769, sum loss: 3925.927734, avg loss: 2.495822, ppl: 12.131699 +epoch: 2, batch: 36770, sum loss: 3167.058105, avg loss: 1.986862, ppl: 7.292614 +epoch: 2, batch: 36771, sum loss: 3489.994141, avg loss: 2.525321, ppl: 12.494908 +epoch: 2, batch: 36772, sum loss: 3863.877930, avg loss: 2.464208, ppl: 11.754168 +epoch: 2, batch: 36773, sum loss: 5376.743164, avg loss: 2.736256, ppl: 15.429112 +epoch: 2, batch: 36774, sum loss: 5041.562988, avg loss: 2.623082, ppl: 13.778121 +epoch: 2, batch: 36775, sum loss: 4429.517090, avg loss: 2.548629, ppl: 12.789557 +epoch: 2, batch: 36776, sum loss: 3816.551270, avg loss: 2.282626, ppl: 9.802392 +epoch: 2, batch: 36777, sum loss: 4140.290039, avg loss: 2.288718, ppl: 9.862284 +epoch: 2, batch: 36778, sum loss: 3690.850342, avg loss: 2.452392, ppl: 11.616103 +epoch: 2, batch: 36779, sum loss: 4242.163574, avg loss: 2.364640, ppl: 10.640205 +epoch: 2, batch: 36780, sum loss: 4269.083496, avg loss: 2.447869, ppl: 11.563676 +epoch: 2, batch: 36781, sum loss: 4738.352051, avg loss: 2.659008, ppl: 14.282115 +epoch: 2, batch: 36782, sum loss: 4566.949707, avg loss: 2.594858, ppl: 13.394681 +epoch: 2, batch: 36783, sum loss: 3287.470215, avg loss: 2.240947, ppl: 9.402236 +epoch: 2, batch: 36784, sum loss: 3890.539307, avg loss: 2.306188, ppl: 10.036095 +epoch: 2, batch: 36785, sum loss: 3642.666992, avg loss: 2.332053, ppl: 10.299066 +epoch: 2, batch: 36786, sum loss: 3808.620605, avg loss: 2.191381, ppl: 8.947563 +epoch: 2, batch: 36787, sum loss: 4462.458008, avg loss: 2.557282, ppl: 12.900711 +epoch: 2, batch: 36788, sum loss: 4116.530273, avg loss: 2.345601, ppl: 10.439548 +epoch: 2, batch: 36789, sum loss: 4705.250000, avg loss: 2.458333, ppl: 11.685319 +epoch: 2, batch: 36790, sum loss: 4992.459961, avg loss: 2.613853, ppl: 13.651555 +epoch: 2, batch: 36791, sum loss: 4379.931152, avg loss: 2.641695, ppl: 14.036983 +epoch: 2, batch: 36792, sum loss: 5465.287598, avg loss: 2.663396, ppl: 14.344916 +epoch: 2, batch: 36793, sum loss: 4297.452148, avg loss: 2.527913, ppl: 12.527335 +epoch: 2, batch: 36794, sum loss: 4680.234863, avg loss: 2.549147, ppl: 12.796188 +epoch: 2, batch: 36795, sum loss: 4226.875488, avg loss: 2.461780, ppl: 11.725660 +epoch: 2, batch: 36796, sum loss: 4228.634277, avg loss: 2.417744, ppl: 11.220516 +epoch: 2, batch: 36797, sum loss: 3735.508545, avg loss: 2.171807, ppl: 8.774127 +epoch: 2, batch: 36798, sum loss: 3804.372070, avg loss: 2.521122, ppl: 12.442543 +epoch: 2, batch: 36799, sum loss: 3933.770020, avg loss: 2.259489, ppl: 9.578192 +epoch: 2, batch: 36800, sum loss: 3677.249023, avg loss: 2.398727, ppl: 11.009157 +epoch: 2, batch: 36801, sum loss: 3741.417725, avg loss: 2.463079, ppl: 11.740911 +epoch: 2, batch: 36802, sum loss: 3273.142090, avg loss: 2.187929, ppl: 8.916729 +epoch: 2, batch: 36803, sum loss: 4735.989746, avg loss: 2.740735, ppl: 15.498369 +epoch: 2, batch: 36804, sum loss: 4119.894043, avg loss: 2.403672, ppl: 11.063730 +epoch: 2, batch: 36805, sum loss: 4478.247070, avg loss: 2.554619, ppl: 12.866398 +epoch: 2, batch: 36806, sum loss: 4772.707520, avg loss: 2.613750, ppl: 13.650146 +epoch: 2, batch: 36807, sum loss: 4845.992188, avg loss: 2.649531, ppl: 14.147404 +epoch: 2, batch: 36808, sum loss: 3422.997559, avg loss: 2.298857, ppl: 9.962786 +epoch: 2, batch: 36809, sum loss: 3275.760986, avg loss: 2.248292, ppl: 9.471542 +epoch: 2, batch: 36810, sum loss: 4089.788330, avg loss: 2.152520, ppl: 8.606524 +epoch: 2, batch: 36811, sum loss: 4200.021973, avg loss: 2.266607, ppl: 9.646610 +epoch: 2, batch: 36812, sum loss: 3581.215820, avg loss: 2.418107, ppl: 11.224586 +epoch: 2, batch: 36813, sum loss: 4743.392090, avg loss: 2.466662, ppl: 11.783054 +epoch: 2, batch: 36814, sum loss: 4197.108887, avg loss: 2.384721, ppl: 10.856031 +epoch: 2, batch: 36815, sum loss: 5272.432129, avg loss: 2.539707, ppl: 12.675959 +epoch: 2, batch: 36816, sum loss: 4627.378418, avg loss: 2.510786, ppl: 12.314603 +epoch: 2, batch: 36817, sum loss: 4472.586914, avg loss: 2.535480, ppl: 12.622492 +epoch: 2, batch: 36818, sum loss: 3759.102051, avg loss: 2.356804, ppl: 10.557156 +epoch: 2, batch: 36819, sum loss: 4183.908203, avg loss: 2.377220, ppl: 10.774911 +epoch: 2, batch: 36820, sum loss: 4829.093262, avg loss: 2.648982, ppl: 14.139631 +epoch: 2, batch: 36821, sum loss: 3131.235596, avg loss: 2.427315, ppl: 11.328419 +epoch: 2, batch: 36822, sum loss: 4128.589355, avg loss: 2.491605, ppl: 12.080647 +epoch: 2, batch: 36823, sum loss: 4658.451172, avg loss: 2.548387, ppl: 12.786460 +epoch: 2, batch: 36824, sum loss: 3455.491699, avg loss: 2.416428, ppl: 11.205759 +epoch: 2, batch: 36825, sum loss: 4040.782471, avg loss: 2.488167, ppl: 12.039183 +epoch: 2, batch: 36826, sum loss: 3581.453857, avg loss: 2.387636, ppl: 10.887724 +epoch: 2, batch: 36827, sum loss: 3621.402588, avg loss: 2.238197, ppl: 9.376409 +epoch: 2, batch: 36828, sum loss: 4724.474121, avg loss: 2.664678, ppl: 14.363325 +epoch: 2, batch: 36829, sum loss: 3755.881348, avg loss: 2.380153, ppl: 10.806556 +epoch: 2, batch: 36830, sum loss: 4076.476318, avg loss: 2.279908, ppl: 9.775785 +epoch: 2, batch: 36831, sum loss: 4713.941895, avg loss: 2.486256, ppl: 12.016208 +epoch: 2, batch: 36832, sum loss: 3976.137695, avg loss: 2.388071, ppl: 10.892460 +epoch: 2, batch: 36833, sum loss: 4449.841309, avg loss: 2.508366, ppl: 12.284839 +epoch: 2, batch: 36834, sum loss: 4430.911621, avg loss: 2.526175, ppl: 12.505584 +epoch: 2, batch: 36835, sum loss: 3574.981934, avg loss: 2.304953, ppl: 10.023705 +epoch: 2, batch: 36836, sum loss: 3465.596924, avg loss: 2.020756, ppl: 7.544028 +epoch: 2, batch: 36837, sum loss: 5664.189453, avg loss: 2.760326, ppl: 15.804996 +epoch: 2, batch: 36838, sum loss: 4332.151367, avg loss: 2.367296, ppl: 10.668506 +epoch: 2, batch: 36839, sum loss: 3705.419922, avg loss: 2.376793, ppl: 10.770306 +epoch: 2, batch: 36840, sum loss: 2982.135986, avg loss: 2.243895, ppl: 9.429988 +epoch: 2, batch: 36841, sum loss: 3612.515869, avg loss: 2.220354, ppl: 9.210589 +epoch: 2, batch: 36842, sum loss: 5106.010742, avg loss: 2.793222, ppl: 16.333569 +epoch: 2, batch: 36843, sum loss: 5142.157715, avg loss: 2.600990, ppl: 13.477077 +epoch: 2, batch: 36844, sum loss: 3574.290771, avg loss: 2.286814, ppl: 9.843529 +epoch: 2, batch: 36845, sum loss: 3979.471436, avg loss: 2.427987, ppl: 11.336043 +epoch: 2, batch: 36846, sum loss: 4486.393066, avg loss: 2.510573, ppl: 12.311978 +epoch: 2, batch: 36847, sum loss: 4592.288086, avg loss: 2.545614, ppl: 12.751058 +epoch: 2, batch: 36848, sum loss: 4508.707520, avg loss: 2.586751, ppl: 13.286536 +epoch: 2, batch: 36849, sum loss: 4513.621094, avg loss: 2.493713, ppl: 12.106148 +epoch: 2, batch: 36850, sum loss: 3479.274658, avg loss: 2.282989, ppl: 9.805943 +epoch: 2, batch: 36851, sum loss: 3473.340576, avg loss: 2.522397, ppl: 12.458425 +epoch: 2, batch: 36852, sum loss: 3179.175781, avg loss: 2.152455, ppl: 8.605959 +epoch: 2, batch: 36853, sum loss: 4505.526367, avg loss: 2.706022, ppl: 14.969608 +epoch: 2, batch: 36854, sum loss: 3562.302002, avg loss: 2.271876, ppl: 9.697579 +epoch: 2, batch: 36855, sum loss: 3943.320312, avg loss: 2.579019, ppl: 13.184202 +epoch: 2, batch: 36856, sum loss: 4125.861328, avg loss: 2.502038, ppl: 12.207353 +epoch: 2, batch: 36857, sum loss: 4307.224609, avg loss: 2.499840, ppl: 12.180548 +epoch: 2, batch: 36858, sum loss: 4346.044434, avg loss: 2.544522, ppl: 12.737142 +epoch: 2, batch: 36859, sum loss: 3954.086670, avg loss: 2.572600, ppl: 13.099845 +epoch: 2, batch: 36860, sum loss: 4750.277344, avg loss: 2.590118, ppl: 13.331350 +epoch: 2, batch: 36861, sum loss: 3168.072266, avg loss: 2.143486, ppl: 8.529119 +epoch: 2, batch: 36862, sum loss: 4543.924805, avg loss: 2.752226, ppl: 15.677489 +epoch: 2, batch: 36863, sum loss: 3260.924072, avg loss: 2.075700, ppl: 7.970122 +epoch: 2, batch: 36864, sum loss: 4484.958496, avg loss: 2.562833, ppl: 12.972521 +epoch: 2, batch: 36865, sum loss: 4477.262207, avg loss: 2.601547, ppl: 13.484579 +epoch: 2, batch: 36866, sum loss: 4305.116211, avg loss: 2.330870, ppl: 10.286886 +epoch: 2, batch: 36867, sum loss: 3832.155029, avg loss: 2.416239, ppl: 11.203644 +epoch: 2, batch: 36868, sum loss: 3795.557129, avg loss: 2.286480, ppl: 9.840240 +epoch: 2, batch: 36869, sum loss: 3588.069336, avg loss: 2.011250, ppl: 7.472651 +epoch: 2, batch: 36870, sum loss: 3857.350342, avg loss: 2.455347, ppl: 11.650479 +epoch: 2, batch: 36871, sum loss: 3942.635742, avg loss: 2.306984, ppl: 10.044087 +epoch: 2, batch: 36872, sum loss: 3924.662109, avg loss: 2.382915, ppl: 10.836451 +epoch: 2, batch: 36873, sum loss: 4007.526123, avg loss: 2.342213, ppl: 10.404235 +epoch: 2, batch: 36874, sum loss: 3901.410156, avg loss: 2.204187, ppl: 9.062878 +epoch: 2, batch: 36875, sum loss: 3986.933594, avg loss: 2.345255, ppl: 10.435935 +epoch: 2, batch: 36876, sum loss: 3714.257080, avg loss: 2.260655, ppl: 9.589372 +epoch: 2, batch: 36877, sum loss: 4825.203125, avg loss: 2.733826, ppl: 15.391665 +epoch: 2, batch: 36878, sum loss: 4619.190430, avg loss: 2.639537, ppl: 14.006721 +epoch: 2, batch: 36879, sum loss: 3491.394043, avg loss: 2.391366, ppl: 10.928410 +epoch: 2, batch: 36880, sum loss: 3670.480957, avg loss: 2.363478, ppl: 10.627851 +epoch: 2, batch: 36881, sum loss: 4683.538086, avg loss: 2.617964, ppl: 13.707789 +epoch: 2, batch: 36882, sum loss: 4042.221680, avg loss: 2.383385, ppl: 10.841544 +epoch: 2, batch: 36883, sum loss: 4024.107666, avg loss: 2.542077, ppl: 12.706032 +epoch: 2, batch: 36884, sum loss: 3307.761719, avg loss: 2.248648, ppl: 9.474921 +epoch: 2, batch: 36885, sum loss: 3934.127686, avg loss: 2.278013, ppl: 9.757269 +epoch: 2, batch: 36886, sum loss: 4092.768311, avg loss: 2.529523, ppl: 12.547522 +epoch: 2, batch: 36887, sum loss: 3998.350830, avg loss: 2.330041, ppl: 10.278367 +epoch: 2, batch: 36888, sum loss: 4038.894043, avg loss: 2.419949, ppl: 11.245281 +epoch: 2, batch: 36889, sum loss: 3411.493164, avg loss: 2.159173, ppl: 8.663970 +epoch: 2, batch: 36890, sum loss: 3338.759766, avg loss: 2.158216, ppl: 8.655680 +epoch: 2, batch: 36891, sum loss: 4658.382812, avg loss: 2.603903, ppl: 13.516391 +epoch: 2, batch: 36892, sum loss: 4123.862305, avg loss: 2.528426, ppl: 12.533758 +epoch: 2, batch: 36893, sum loss: 3973.557373, avg loss: 2.518097, ppl: 12.404969 +epoch: 2, batch: 36894, sum loss: 4792.158203, avg loss: 2.576429, ppl: 13.150097 +epoch: 2, batch: 36895, sum loss: 3740.744385, avg loss: 2.264373, ppl: 9.625091 +epoch: 2, batch: 36896, sum loss: 4040.045166, avg loss: 2.422090, ppl: 11.269383 +epoch: 2, batch: 36897, sum loss: 4863.503906, avg loss: 2.638906, ppl: 13.997882 +epoch: 2, batch: 36898, sum loss: 3378.784424, avg loss: 2.322189, ppl: 10.197969 +epoch: 2, batch: 36899, sum loss: 4168.104004, avg loss: 2.388598, ppl: 10.898206 +epoch: 2, batch: 36900, sum loss: 4198.667969, avg loss: 2.445351, ppl: 11.534599 +epoch: 2, batch: 36901, sum loss: 4851.208984, avg loss: 2.765798, ppl: 15.891710 +epoch: 2, batch: 36902, sum loss: 4255.836914, avg loss: 2.554524, ppl: 12.865173 +epoch: 2, batch: 36903, sum loss: 4993.024414, avg loss: 2.619635, ppl: 13.730709 +epoch: 2, batch: 36904, sum loss: 3794.662109, avg loss: 2.448169, ppl: 11.567150 +epoch: 2, batch: 36905, sum loss: 4375.129395, avg loss: 2.604244, ppl: 13.520996 +epoch: 2, batch: 36906, sum loss: 3442.960205, avg loss: 2.014605, ppl: 7.497767 +epoch: 2, batch: 36907, sum loss: 3616.582520, avg loss: 2.201206, ppl: 9.035909 +epoch: 2, batch: 36908, sum loss: 4936.004883, avg loss: 2.550907, ppl: 12.818724 +epoch: 2, batch: 36909, sum loss: 4361.394043, avg loss: 2.614745, ppl: 13.663727 +epoch: 2, batch: 36910, sum loss: 4853.568359, avg loss: 2.583059, ppl: 13.237575 +epoch: 2, batch: 36911, sum loss: 4086.696045, avg loss: 2.366356, ppl: 10.658479 +epoch: 2, batch: 36912, sum loss: 3875.737793, avg loss: 2.329170, ppl: 10.269409 +epoch: 2, batch: 36913, sum loss: 4308.902832, avg loss: 2.428919, ppl: 11.346611 +epoch: 2, batch: 36914, sum loss: 3933.239746, avg loss: 2.327361, ppl: 10.250853 +epoch: 2, batch: 36915, sum loss: 3818.971680, avg loss: 2.289551, ppl: 9.870510 +epoch: 2, batch: 36916, sum loss: 3100.859863, avg loss: 2.208590, ppl: 9.102868 +epoch: 2, batch: 36917, sum loss: 4766.169922, avg loss: 2.500614, ppl: 12.189973 +epoch: 2, batch: 36918, sum loss: 4077.123047, avg loss: 2.462031, ppl: 11.728607 +epoch: 2, batch: 36919, sum loss: 3769.270996, avg loss: 2.219830, ppl: 9.205764 +epoch: 2, batch: 36920, sum loss: 3750.080566, avg loss: 2.290825, ppl: 9.883089 +epoch: 2, batch: 36921, sum loss: 3649.459961, avg loss: 2.226638, ppl: 9.268653 +epoch: 2, batch: 36922, sum loss: 3191.344238, avg loss: 2.282793, ppl: 9.804023 +epoch: 2, batch: 36923, sum loss: 4466.342773, avg loss: 2.537695, ppl: 12.650475 +epoch: 2, batch: 36924, sum loss: 4719.787109, avg loss: 2.567893, ppl: 13.038321 +epoch: 2, batch: 36925, sum loss: 3969.602783, avg loss: 2.255456, ppl: 9.539642 +epoch: 2, batch: 36926, sum loss: 3906.514160, avg loss: 2.322541, ppl: 10.201564 +epoch: 2, batch: 36927, sum loss: 3810.664062, avg loss: 2.080057, ppl: 8.004925 +epoch: 2, batch: 36928, sum loss: 3035.357178, avg loss: 2.183710, ppl: 8.879188 +epoch: 2, batch: 36929, sum loss: 4379.781738, avg loss: 2.399880, ppl: 11.021858 +epoch: 2, batch: 36930, sum loss: 4039.545898, avg loss: 2.411669, ppl: 11.152562 +epoch: 2, batch: 36931, sum loss: 4001.552246, avg loss: 2.477741, ppl: 11.914323 +epoch: 2, batch: 36932, sum loss: 4022.356445, avg loss: 2.439270, ppl: 11.464668 +epoch: 2, batch: 36933, sum loss: 3909.871094, avg loss: 2.213970, ppl: 9.151978 +epoch: 2, batch: 36934, sum loss: 3725.368164, avg loss: 2.468766, ppl: 11.807870 +epoch: 2, batch: 36935, sum loss: 3824.548096, avg loss: 2.374021, ppl: 10.740497 +epoch: 2, batch: 36936, sum loss: 4705.237793, avg loss: 2.748386, ppl: 15.617411 +epoch: 2, batch: 36937, sum loss: 3057.361328, avg loss: 2.043691, ppl: 7.719045 +epoch: 2, batch: 36938, sum loss: 5334.648438, avg loss: 2.562271, ppl: 12.965229 +epoch: 2, batch: 36939, sum loss: 4275.265625, avg loss: 2.193569, ppl: 8.967159 +epoch: 2, batch: 36940, sum loss: 3829.424805, avg loss: 2.126277, ppl: 8.383596 +epoch: 2, batch: 36941, sum loss: 4457.574219, avg loss: 2.550100, ppl: 12.808383 +epoch: 2, batch: 36942, sum loss: 3645.818604, avg loss: 2.350624, ppl: 10.492118 +epoch: 2, batch: 36943, sum loss: 4412.429199, avg loss: 2.595547, ppl: 13.403913 +epoch: 2, batch: 36944, sum loss: 3872.666504, avg loss: 2.511457, ppl: 12.322871 +epoch: 2, batch: 36945, sum loss: 4614.253906, avg loss: 2.480782, ppl: 11.950603 +epoch: 2, batch: 36946, sum loss: 3699.625977, avg loss: 2.347478, ppl: 10.459163 +epoch: 2, batch: 36947, sum loss: 3314.595947, avg loss: 2.273386, ppl: 9.712226 +epoch: 2, batch: 36948, sum loss: 3575.615479, avg loss: 2.287662, ppl: 9.851875 +epoch: 2, batch: 36949, sum loss: 4011.862549, avg loss: 2.388014, ppl: 10.891837 +epoch: 2, batch: 36950, sum loss: 4112.722168, avg loss: 2.446593, ppl: 11.548927 +epoch: 2, batch: 36951, sum loss: 3674.851562, avg loss: 2.342162, ppl: 10.403702 +epoch: 2, batch: 36952, sum loss: 4415.604004, avg loss: 2.357503, ppl: 10.564543 +epoch: 2, batch: 36953, sum loss: 4149.945801, avg loss: 2.492460, ppl: 12.090983 +epoch: 2, batch: 36954, sum loss: 4070.514648, avg loss: 2.401484, ppl: 11.039542 +epoch: 2, batch: 36955, sum loss: 3535.334473, avg loss: 2.493184, ppl: 12.099739 +epoch: 2, batch: 36956, sum loss: 3610.739258, avg loss: 2.434753, ppl: 11.413004 +epoch: 2, batch: 36957, sum loss: 3901.770752, avg loss: 2.514028, ppl: 12.354589 +epoch: 2, batch: 36958, sum loss: 3357.358154, avg loss: 2.210242, ppl: 9.117925 +epoch: 2, batch: 36959, sum loss: 4015.716797, avg loss: 2.442650, ppl: 11.503486 +epoch: 2, batch: 36960, sum loss: 3029.094727, avg loss: 2.423276, ppl: 11.282761 +epoch: 2, batch: 36961, sum loss: 4060.866943, avg loss: 2.392968, ppl: 10.945935 +epoch: 2, batch: 36962, sum loss: 4403.248535, avg loss: 2.534973, ppl: 12.616095 +epoch: 2, batch: 36963, sum loss: 4146.162109, avg loss: 2.564108, ppl: 12.989062 +epoch: 2, batch: 36964, sum loss: 3329.953125, avg loss: 2.050464, ppl: 7.771504 +epoch: 2, batch: 36965, sum loss: 4344.155762, avg loss: 2.509622, ppl: 12.300278 +epoch: 2, batch: 36966, sum loss: 4536.116699, avg loss: 2.500616, ppl: 12.189996 +epoch: 2, batch: 36967, sum loss: 4771.305176, avg loss: 2.472179, ppl: 11.848235 +epoch: 2, batch: 36968, sum loss: 3526.265137, avg loss: 2.398820, ppl: 11.010176 +epoch: 2, batch: 36969, sum loss: 4416.285156, avg loss: 2.499313, ppl: 12.174123 +epoch: 2, batch: 36970, sum loss: 3647.564697, avg loss: 2.284010, ppl: 9.815968 +epoch: 2, batch: 36971, sum loss: 4360.078613, avg loss: 2.474506, ppl: 11.875835 +epoch: 2, batch: 36972, sum loss: 4435.579102, avg loss: 2.317439, ppl: 10.149651 +epoch: 2, batch: 36973, sum loss: 3945.649902, avg loss: 2.231703, ppl: 9.315713 +epoch: 2, batch: 36974, sum loss: 4322.111328, avg loss: 2.577288, ppl: 13.161391 +epoch: 2, batch: 36975, sum loss: 5004.572266, avg loss: 2.631216, ppl: 13.890648 +epoch: 2, batch: 36976, sum loss: 4247.685059, avg loss: 2.445415, ppl: 11.535331 +epoch: 2, batch: 36977, sum loss: 3955.072754, avg loss: 2.629703, ppl: 13.869644 +epoch: 2, batch: 36978, sum loss: 3993.377930, avg loss: 2.551679, ppl: 12.828627 +epoch: 2, batch: 36979, sum loss: 4387.285645, avg loss: 2.847038, ppl: 17.236652 +epoch: 2, batch: 36980, sum loss: 4230.588867, avg loss: 2.518208, ppl: 12.406342 +epoch: 2, batch: 36981, sum loss: 3906.729492, avg loss: 2.382152, ppl: 10.828181 +epoch: 2, batch: 36982, sum loss: 4235.577148, avg loss: 2.609721, ppl: 13.595257 +epoch: 2, batch: 36983, sum loss: 4416.586914, avg loss: 2.657393, ppl: 14.259067 +epoch: 2, batch: 36984, sum loss: 4237.631836, avg loss: 2.549718, ppl: 12.803497 +epoch: 2, batch: 36985, sum loss: 4359.635742, avg loss: 2.755775, ppl: 15.733225 +epoch: 2, batch: 36986, sum loss: 3891.750977, avg loss: 2.429308, ppl: 11.351021 +epoch: 2, batch: 36987, sum loss: 3794.119385, avg loss: 2.649525, ppl: 14.147316 +epoch: 2, batch: 36988, sum loss: 3676.693604, avg loss: 2.436510, ppl: 11.433070 +epoch: 2, batch: 36989, sum loss: 5097.642578, avg loss: 2.620896, ppl: 13.748037 +epoch: 2, batch: 36990, sum loss: 4025.085205, avg loss: 2.454320, ppl: 11.638519 +epoch: 2, batch: 36991, sum loss: 4563.964844, avg loss: 2.627499, ppl: 13.839109 +epoch: 2, batch: 36992, sum loss: 4233.185059, avg loss: 2.405219, ppl: 11.080853 +epoch: 2, batch: 36993, sum loss: 3671.960693, avg loss: 2.393716, ppl: 10.954125 +epoch: 2, batch: 36994, sum loss: 4628.545410, avg loss: 2.675460, ppl: 14.519022 +epoch: 2, batch: 36995, sum loss: 3989.922852, avg loss: 2.525268, ppl: 12.494238 +epoch: 2, batch: 36996, sum loss: 4466.624512, avg loss: 2.618186, ppl: 13.710823 +epoch: 2, batch: 36997, sum loss: 4393.053711, avg loss: 2.604063, ppl: 13.518550 +epoch: 2, batch: 36998, sum loss: 4059.391602, avg loss: 2.461729, ppl: 11.725070 +epoch: 2, batch: 36999, sum loss: 4067.038574, avg loss: 2.370069, ppl: 10.698133 +epoch: 2, batch: 37000, sum loss: 3142.202393, avg loss: 2.201964, ppl: 9.042755 +epoch: 2, batch: 37001, sum loss: 4218.850098, avg loss: 2.594619, ppl: 13.391481 +epoch: 2, batch: 37002, sum loss: 4520.010254, avg loss: 2.652588, ppl: 14.190719 +epoch: 2, batch: 37003, sum loss: 3982.639160, avg loss: 2.541569, ppl: 12.699587 +epoch: 2, batch: 37004, sum loss: 4087.636719, avg loss: 2.587112, ppl: 13.291330 +epoch: 2, batch: 37005, sum loss: 4356.281738, avg loss: 2.659513, ppl: 14.289322 +epoch: 2, batch: 37006, sum loss: 3597.922607, avg loss: 2.384309, ppl: 10.851564 +epoch: 2, batch: 37007, sum loss: 4024.893066, avg loss: 2.346876, ppl: 10.452868 +epoch: 2, batch: 37008, sum loss: 4060.842773, avg loss: 2.506693, ppl: 12.264306 +epoch: 2, batch: 37009, sum loss: 4226.867676, avg loss: 2.484931, ppl: 12.000292 +epoch: 2, batch: 37010, sum loss: 3978.906494, avg loss: 2.291997, ppl: 9.894675 +epoch: 2, batch: 37011, sum loss: 4064.722900, avg loss: 2.582416, ppl: 13.229062 +epoch: 2, batch: 37012, sum loss: 4090.446533, avg loss: 2.383710, ppl: 10.845065 +epoch: 2, batch: 37013, sum loss: 4870.230957, avg loss: 2.580938, ppl: 13.209527 +epoch: 2, batch: 37014, sum loss: 4560.069336, avg loss: 2.691895, ppl: 14.759612 +epoch: 2, batch: 37015, sum loss: 4511.284180, avg loss: 2.484187, ppl: 11.991369 +epoch: 2, batch: 37016, sum loss: 4976.618652, avg loss: 2.631739, ppl: 13.897920 +epoch: 2, batch: 37017, sum loss: 3356.871826, avg loss: 2.192601, ppl: 8.958484 +epoch: 2, batch: 37018, sum loss: 5099.183105, avg loss: 2.609613, ppl: 13.593785 +epoch: 2, batch: 37019, sum loss: 4492.172852, avg loss: 2.407381, ppl: 11.104837 +epoch: 2, batch: 37020, sum loss: 4693.737305, avg loss: 2.409516, ppl: 11.128574 +epoch: 2, batch: 37021, sum loss: 3679.623047, avg loss: 2.378554, ppl: 10.789291 +epoch: 2, batch: 37022, sum loss: 4180.998047, avg loss: 2.512619, ppl: 12.337199 +epoch: 2, batch: 37023, sum loss: 4062.800781, avg loss: 2.371746, ppl: 10.716084 +epoch: 2, batch: 37024, sum loss: 4131.608887, avg loss: 2.531623, ppl: 12.573898 +epoch: 2, batch: 37025, sum loss: 4458.641113, avg loss: 2.619648, ppl: 13.730889 +epoch: 2, batch: 37026, sum loss: 4283.620605, avg loss: 2.768986, ppl: 15.942452 +epoch: 2, batch: 37027, sum loss: 3678.977539, avg loss: 2.379675, ppl: 10.801391 +epoch: 2, batch: 37028, sum loss: 3917.019775, avg loss: 2.253751, ppl: 9.523394 +epoch: 2, batch: 37029, sum loss: 4025.098633, avg loss: 2.674484, ppl: 14.504867 +epoch: 2, batch: 37030, sum loss: 3460.346191, avg loss: 2.358791, ppl: 10.578151 +epoch: 2, batch: 37031, sum loss: 4191.653809, avg loss: 2.562136, ppl: 12.963474 +epoch: 2, batch: 37032, sum loss: 4448.814453, avg loss: 2.416521, ppl: 11.206799 +epoch: 2, batch: 37033, sum loss: 4414.342285, avg loss: 2.512432, ppl: 12.334888 +epoch: 2, batch: 37034, sum loss: 3939.496338, avg loss: 2.449935, ppl: 11.587599 +epoch: 2, batch: 37035, sum loss: 3610.137207, avg loss: 2.408364, ppl: 11.115759 +epoch: 2, batch: 37036, sum loss: 4046.830322, avg loss: 2.575958, ppl: 13.143907 +epoch: 2, batch: 37037, sum loss: 3936.972168, avg loss: 2.354649, ppl: 10.534427 +epoch: 2, batch: 37038, sum loss: 5495.444336, avg loss: 2.680705, ppl: 14.595373 +epoch: 2, batch: 37039, sum loss: 3884.875488, avg loss: 2.291962, ppl: 9.894330 +epoch: 2, batch: 37040, sum loss: 4479.680664, avg loss: 2.684051, ppl: 14.644295 +epoch: 2, batch: 37041, sum loss: 4010.021973, avg loss: 2.414222, ppl: 11.181063 +epoch: 2, batch: 37042, sum loss: 4150.771484, avg loss: 2.431618, ppl: 11.377273 +epoch: 2, batch: 37043, sum loss: 4314.427734, avg loss: 2.206868, ppl: 9.087214 +epoch: 2, batch: 37044, sum loss: 4414.563965, avg loss: 2.481486, ppl: 11.959023 +epoch: 2, batch: 37045, sum loss: 4198.805176, avg loss: 2.654112, ppl: 14.212358 +epoch: 2, batch: 37046, sum loss: 3842.771484, avg loss: 2.392759, ppl: 10.943649 +epoch: 2, batch: 37047, sum loss: 4275.426270, avg loss: 2.362114, ppl: 10.613366 +epoch: 2, batch: 37048, sum loss: 3645.494873, avg loss: 2.204048, ppl: 9.061618 +epoch: 2, batch: 37049, sum loss: 4340.787109, avg loss: 2.501895, ppl: 12.205598 +epoch: 2, batch: 37050, sum loss: 4207.657715, avg loss: 2.279338, ppl: 9.770209 +epoch: 2, batch: 37051, sum loss: 3335.743652, avg loss: 2.025345, ppl: 7.578726 +epoch: 2, batch: 37052, sum loss: 3789.308105, avg loss: 2.197975, ppl: 9.006754 +epoch: 2, batch: 37053, sum loss: 4528.510742, avg loss: 2.375924, ppl: 10.760948 +epoch: 2, batch: 37054, sum loss: 4275.285645, avg loss: 2.629327, ppl: 13.864433 +epoch: 2, batch: 37055, sum loss: 3725.492188, avg loss: 2.428613, ppl: 11.343138 +epoch: 2, batch: 37056, sum loss: 4045.685791, avg loss: 2.632196, ppl: 13.904276 +epoch: 2, batch: 37057, sum loss: 3523.406982, avg loss: 2.380681, ppl: 10.812259 +epoch: 2, batch: 37058, sum loss: 4240.690918, avg loss: 2.471265, ppl: 11.837413 +epoch: 2, batch: 37059, sum loss: 4450.022461, avg loss: 2.575244, ppl: 13.134527 +epoch: 2, batch: 37060, sum loss: 2668.638672, avg loss: 2.318539, ppl: 10.160820 +epoch: 2, batch: 37061, sum loss: 4229.875488, avg loss: 2.306366, ppl: 10.037880 +epoch: 2, batch: 37062, sum loss: 3732.130127, avg loss: 2.295283, ppl: 9.927243 +epoch: 2, batch: 37063, sum loss: 3387.890625, avg loss: 2.379137, ppl: 10.795580 +epoch: 2, batch: 37064, sum loss: 3874.292969, avg loss: 2.439731, ppl: 11.469956 +epoch: 2, batch: 37065, sum loss: 4543.529785, avg loss: 2.707705, ppl: 14.994830 +epoch: 2, batch: 37066, sum loss: 3979.398438, avg loss: 2.424984, ppl: 11.302046 +epoch: 2, batch: 37067, sum loss: 3906.889160, avg loss: 2.457163, ppl: 11.671650 +epoch: 2, batch: 37068, sum loss: 4144.998535, avg loss: 2.498492, ppl: 12.164140 +epoch: 2, batch: 37069, sum loss: 3602.740479, avg loss: 2.310930, ppl: 10.083803 +epoch: 2, batch: 37070, sum loss: 4196.145020, avg loss: 2.512662, ppl: 12.337726 +epoch: 2, batch: 37071, sum loss: 3857.610840, avg loss: 2.321066, ppl: 10.186524 +epoch: 2, batch: 37072, sum loss: 3676.568115, avg loss: 2.172913, ppl: 8.783831 +epoch: 2, batch: 37073, sum loss: 4142.643066, avg loss: 2.712929, ppl: 15.073364 +epoch: 2, batch: 37074, sum loss: 4043.965088, avg loss: 2.538584, ppl: 12.661736 +epoch: 2, batch: 37075, sum loss: 4166.259766, avg loss: 2.409636, ppl: 11.129904 +epoch: 2, batch: 37076, sum loss: 4321.243164, avg loss: 2.516740, ppl: 12.388149 +epoch: 2, batch: 37077, sum loss: 5566.257812, avg loss: 2.746057, ppl: 15.581075 +epoch: 2, batch: 37078, sum loss: 4112.792480, avg loss: 2.501698, ppl: 12.203203 +epoch: 2, batch: 37079, sum loss: 4349.095703, avg loss: 2.441940, ppl: 11.495323 +epoch: 2, batch: 37080, sum loss: 4053.439209, avg loss: 2.399905, ppl: 11.022129 +epoch: 2, batch: 37081, sum loss: 4197.184570, avg loss: 2.307413, ppl: 10.048397 +epoch: 2, batch: 37082, sum loss: 3798.300537, avg loss: 2.520438, ppl: 12.434047 +epoch: 2, batch: 37083, sum loss: 4141.312012, avg loss: 2.591559, ppl: 13.350574 +epoch: 2, batch: 37084, sum loss: 4186.148438, avg loss: 2.354414, ppl: 10.531958 +epoch: 2, batch: 37085, sum loss: 5576.606445, avg loss: 2.985335, ppl: 19.793139 +epoch: 2, batch: 37086, sum loss: 4144.603027, avg loss: 2.383326, ppl: 10.840896 +epoch: 2, batch: 37087, sum loss: 4169.406250, avg loss: 2.394834, ppl: 10.966380 +epoch: 2, batch: 37088, sum loss: 4323.068359, avg loss: 2.593322, ppl: 13.374130 +epoch: 2, batch: 37089, sum loss: 4575.852051, avg loss: 2.468097, ppl: 11.799970 +epoch: 2, batch: 37090, sum loss: 5114.310547, avg loss: 2.577777, ppl: 13.167839 +epoch: 2, batch: 37091, sum loss: 3623.956299, avg loss: 2.295096, ppl: 9.925385 +epoch: 2, batch: 37092, sum loss: 4653.158203, avg loss: 2.600983, ppl: 13.476974 +epoch: 2, batch: 37093, sum loss: 4055.377441, avg loss: 2.574843, ppl: 13.129251 +epoch: 2, batch: 37094, sum loss: 4139.469727, avg loss: 2.427842, ppl: 11.334393 +epoch: 2, batch: 37095, sum loss: 4478.883301, avg loss: 2.565225, ppl: 13.003589 +epoch: 2, batch: 37096, sum loss: 3972.138428, avg loss: 2.630555, ppl: 13.881477 +epoch: 2, batch: 37097, sum loss: 3971.766113, avg loss: 2.451708, ppl: 11.608151 +epoch: 2, batch: 37098, sum loss: 4479.250000, avg loss: 2.682186, ppl: 14.617006 +epoch: 2, batch: 37099, sum loss: 3682.989502, avg loss: 2.169016, ppl: 8.749673 +epoch: 2, batch: 37100, sum loss: 3665.643066, avg loss: 2.254393, ppl: 9.529508 +epoch: 2, batch: 37101, sum loss: 4197.825684, avg loss: 2.604110, ppl: 13.519191 +epoch: 2, batch: 37102, sum loss: 4818.803711, avg loss: 2.541563, ppl: 12.699509 +epoch: 2, batch: 37103, sum loss: 4884.671387, avg loss: 2.653271, ppl: 14.200408 +epoch: 2, batch: 37104, sum loss: 4445.523926, avg loss: 2.534506, ppl: 12.610201 +epoch: 2, batch: 37105, sum loss: 3873.041504, avg loss: 2.465335, ppl: 11.767428 +epoch: 2, batch: 37106, sum loss: 4331.745117, avg loss: 2.572295, ppl: 13.095851 +epoch: 2, batch: 37107, sum loss: 4311.695312, avg loss: 2.352262, ppl: 10.509310 +epoch: 2, batch: 37108, sum loss: 4653.057617, avg loss: 2.528836, ppl: 12.538897 +epoch: 2, batch: 37109, sum loss: 3505.875977, avg loss: 2.419514, ppl: 11.240397 +epoch: 2, batch: 37110, sum loss: 4285.375977, avg loss: 2.323957, ppl: 10.216017 +epoch: 2, batch: 37111, sum loss: 4465.979492, avg loss: 2.403649, ppl: 11.063472 +epoch: 2, batch: 37112, sum loss: 3628.113037, avg loss: 2.225836, ppl: 9.261225 +epoch: 2, batch: 37113, sum loss: 4252.938477, avg loss: 2.609165, ppl: 13.587696 +epoch: 2, batch: 37114, sum loss: 3863.300781, avg loss: 2.392137, ppl: 10.936839 +epoch: 2, batch: 37115, sum loss: 4643.038086, avg loss: 2.509750, ppl: 12.301859 +epoch: 2, batch: 37116, sum loss: 4612.395508, avg loss: 2.444301, ppl: 11.522489 +epoch: 2, batch: 37117, sum loss: 3602.340088, avg loss: 2.250056, ppl: 9.488270 +epoch: 2, batch: 37118, sum loss: 4062.843750, avg loss: 2.336310, ppl: 10.343004 +epoch: 2, batch: 37119, sum loss: 4088.867432, avg loss: 2.599407, ppl: 13.455756 +epoch: 2, batch: 37120, sum loss: 4416.363770, avg loss: 2.527970, ppl: 12.528049 +epoch: 2, batch: 37121, sum loss: 4174.608398, avg loss: 2.746453, ppl: 15.587247 +epoch: 2, batch: 37122, sum loss: 3912.564697, avg loss: 2.401820, ppl: 11.043256 +epoch: 2, batch: 37123, sum loss: 4348.843262, avg loss: 2.571759, ppl: 13.088821 +epoch: 2, batch: 37124, sum loss: 3805.218750, avg loss: 2.230492, ppl: 9.304440 +epoch: 2, batch: 37125, sum loss: 5037.727051, avg loss: 2.647255, ppl: 14.115245 +epoch: 2, batch: 37126, sum loss: 4622.888184, avg loss: 2.498858, ppl: 12.168595 +epoch: 2, batch: 37127, sum loss: 3924.161133, avg loss: 2.423818, ppl: 11.288877 +epoch: 2, batch: 37128, sum loss: 4650.952148, avg loss: 2.653139, ppl: 14.198536 +epoch: 2, batch: 37129, sum loss: 3691.946289, avg loss: 2.244344, ppl: 9.434227 +epoch: 2, batch: 37130, sum loss: 3905.163086, avg loss: 2.439203, ppl: 11.463895 +epoch: 2, batch: 37131, sum loss: 2892.202393, avg loss: 2.065859, ppl: 7.892073 +epoch: 2, batch: 37132, sum loss: 4403.996094, avg loss: 2.569426, ppl: 13.058328 +epoch: 2, batch: 37133, sum loss: 4449.405273, avg loss: 2.339330, ppl: 10.374283 +epoch: 2, batch: 37134, sum loss: 4525.482910, avg loss: 2.390641, ppl: 10.920489 +epoch: 2, batch: 37135, sum loss: 4015.105713, avg loss: 2.557392, ppl: 12.902126 +epoch: 2, batch: 37136, sum loss: 2848.222168, avg loss: 2.167597, ppl: 8.737262 +epoch: 2, batch: 37137, sum loss: 4601.128906, avg loss: 2.592185, ppl: 13.358932 +epoch: 2, batch: 37138, sum loss: 3795.509277, avg loss: 2.508598, ppl: 12.287695 +epoch: 2, batch: 37139, sum loss: 4365.369141, avg loss: 2.800109, ppl: 16.446434 +epoch: 2, batch: 37140, sum loss: 5239.975098, avg loss: 2.814165, ppl: 16.679245 +epoch: 2, batch: 37141, sum loss: 4277.029297, avg loss: 2.258199, ppl: 9.565847 +epoch: 2, batch: 37142, sum loss: 4385.515137, avg loss: 2.581233, ppl: 13.213424 +epoch: 2, batch: 37143, sum loss: 4077.888916, avg loss: 2.391724, ppl: 10.932323 +epoch: 2, batch: 37144, sum loss: 3541.899414, avg loss: 2.339432, ppl: 10.375340 +epoch: 2, batch: 37145, sum loss: 3909.527832, avg loss: 2.303788, ppl: 10.012035 +epoch: 2, batch: 37146, sum loss: 3477.596436, avg loss: 2.337094, ppl: 10.351115 +epoch: 2, batch: 37147, sum loss: 3775.938477, avg loss: 2.556492, ppl: 12.890519 +epoch: 2, batch: 37148, sum loss: 4771.826172, avg loss: 2.490515, ppl: 12.067487 +epoch: 2, batch: 37149, sum loss: 3257.975586, avg loss: 2.111455, ppl: 8.260255 +epoch: 2, batch: 37150, sum loss: 4315.181641, avg loss: 2.417469, ppl: 11.217429 +epoch: 2, batch: 37151, sum loss: 4228.401367, avg loss: 2.507949, ppl: 12.279714 +epoch: 2, batch: 37152, sum loss: 3639.968994, avg loss: 2.276403, ppl: 9.741579 +epoch: 2, batch: 37153, sum loss: 3451.884766, avg loss: 2.217010, ppl: 9.179845 +epoch: 2, batch: 37154, sum loss: 4104.662598, avg loss: 2.549480, ppl: 12.800443 +epoch: 2, batch: 37155, sum loss: 4037.029297, avg loss: 2.626564, ppl: 13.826185 +epoch: 2, batch: 37156, sum loss: 4320.413574, avg loss: 2.389609, ppl: 10.909231 +epoch: 2, batch: 37157, sum loss: 3901.002441, avg loss: 2.344352, ppl: 10.426519 +epoch: 2, batch: 37158, sum loss: 4380.598633, avg loss: 2.779568, ppl: 16.112055 +epoch: 2, batch: 37159, sum loss: 4239.742188, avg loss: 2.504278, ppl: 12.234718 +epoch: 2, batch: 37160, sum loss: 4090.621826, avg loss: 2.467203, ppl: 11.789422 +epoch: 2, batch: 37161, sum loss: 4057.515137, avg loss: 2.393814, ppl: 10.955201 +epoch: 2, batch: 37162, sum loss: 4069.618896, avg loss: 2.422392, ppl: 11.272796 +epoch: 2, batch: 37163, sum loss: 3605.120850, avg loss: 2.439189, ppl: 11.463737 +epoch: 2, batch: 37164, sum loss: 3694.338867, avg loss: 2.247165, ppl: 9.460874 +epoch: 2, batch: 37165, sum loss: 4044.461182, avg loss: 2.449704, ppl: 11.584919 +epoch: 2, batch: 37166, sum loss: 3596.776367, avg loss: 2.256447, ppl: 9.549099 +epoch: 2, batch: 37167, sum loss: 3522.741699, avg loss: 2.162518, ppl: 8.692997 +epoch: 2, batch: 37168, sum loss: 4257.974121, avg loss: 2.516533, ppl: 12.385583 +epoch: 2, batch: 37169, sum loss: 4245.623047, avg loss: 2.543813, ppl: 12.728105 +epoch: 2, batch: 37170, sum loss: 4273.815918, avg loss: 2.527390, ppl: 12.520782 +epoch: 2, batch: 37171, sum loss: 4553.579102, avg loss: 2.647430, ppl: 14.117705 +epoch: 2, batch: 37172, sum loss: 3906.791992, avg loss: 2.455558, ppl: 11.652929 +epoch: 2, batch: 37173, sum loss: 4593.774902, avg loss: 2.647709, ppl: 14.121647 +epoch: 2, batch: 37174, sum loss: 4611.087891, avg loss: 2.608082, ppl: 13.572988 +epoch: 2, batch: 37175, sum loss: 3930.721924, avg loss: 2.420395, ppl: 11.250304 +epoch: 2, batch: 37176, sum loss: 4092.225586, avg loss: 2.408608, ppl: 11.118479 +epoch: 2, batch: 37177, sum loss: 4339.665039, avg loss: 2.673854, ppl: 14.495727 +epoch: 2, batch: 37178, sum loss: 4501.911133, avg loss: 2.513630, ppl: 12.349674 +epoch: 2, batch: 37179, sum loss: 4809.654785, avg loss: 2.666106, ppl: 14.383845 +epoch: 2, batch: 37180, sum loss: 5041.008789, avg loss: 2.557589, ppl: 12.904671 +epoch: 2, batch: 37181, sum loss: 4963.884277, avg loss: 2.696298, ppl: 14.824747 +epoch: 2, batch: 37182, sum loss: 5215.612793, avg loss: 2.805601, ppl: 16.537014 +epoch: 2, batch: 37183, sum loss: 3209.546387, avg loss: 2.093638, ppl: 8.114379 +epoch: 2, batch: 37184, sum loss: 4351.105469, avg loss: 2.418624, ppl: 11.230400 +epoch: 2, batch: 37185, sum loss: 4983.338867, avg loss: 2.457268, ppl: 11.672874 +epoch: 2, batch: 37186, sum loss: 4115.998535, avg loss: 2.624999, ppl: 13.804561 +epoch: 2, batch: 37187, sum loss: 3764.405762, avg loss: 2.501266, ppl: 12.197932 +epoch: 2, batch: 37188, sum loss: 3975.428223, avg loss: 2.349544, ppl: 10.480787 +epoch: 2, batch: 37189, sum loss: 4356.319824, avg loss: 2.416151, ppl: 11.202655 +epoch: 2, batch: 37190, sum loss: 4590.208984, avg loss: 2.651767, ppl: 14.179071 +epoch: 2, batch: 37191, sum loss: 5385.009766, avg loss: 2.744653, ppl: 15.559217 +epoch: 2, batch: 37192, sum loss: 2857.334961, avg loss: 2.045336, ppl: 7.731758 +epoch: 2, batch: 37193, sum loss: 4813.500000, avg loss: 2.561735, ppl: 12.958282 +epoch: 2, batch: 37194, sum loss: 4280.588867, avg loss: 2.528405, ppl: 12.533496 +epoch: 2, batch: 37195, sum loss: 4605.804199, avg loss: 2.738290, ppl: 15.460529 +epoch: 2, batch: 37196, sum loss: 4282.437500, avg loss: 2.513168, ppl: 12.343969 +epoch: 2, batch: 37197, sum loss: 4481.710938, avg loss: 2.491223, ppl: 12.076040 +epoch: 2, batch: 37198, sum loss: 4448.624512, avg loss: 2.507680, ppl: 12.276415 +epoch: 2, batch: 37199, sum loss: 3230.937500, avg loss: 2.389747, ppl: 10.910729 +epoch: 2, batch: 37200, sum loss: 3987.418213, avg loss: 2.476657, ppl: 11.901413 +epoch: 2, batch: 37201, sum loss: 4416.306152, avg loss: 2.448063, ppl: 11.565927 +epoch: 2, batch: 37202, sum loss: 4023.203613, avg loss: 2.435353, ppl: 11.419852 +epoch: 2, batch: 37203, sum loss: 4119.791504, avg loss: 2.288773, ppl: 9.862829 +epoch: 2, batch: 37204, sum loss: 4468.490234, avg loss: 2.432493, ppl: 11.387240 +epoch: 2, batch: 37205, sum loss: 4157.486328, avg loss: 2.697915, ppl: 14.848734 +epoch: 2, batch: 37206, sum loss: 3957.871582, avg loss: 2.565050, ppl: 13.001304 +epoch: 2, batch: 37207, sum loss: 3847.677979, avg loss: 2.383939, ppl: 10.847550 +epoch: 2, batch: 37208, sum loss: 3308.263916, avg loss: 2.208454, ppl: 9.101633 +epoch: 2, batch: 37209, sum loss: 4222.990234, avg loss: 2.473925, ppl: 11.868942 +epoch: 2, batch: 37210, sum loss: 4904.823242, avg loss: 2.687574, ppl: 14.695986 +epoch: 2, batch: 37211, sum loss: 4033.277832, avg loss: 2.483545, ppl: 11.983675 +epoch: 2, batch: 37212, sum loss: 3894.749023, avg loss: 2.214184, ppl: 9.153934 +epoch: 2, batch: 37213, sum loss: 5041.553711, avg loss: 2.656245, ppl: 14.242714 +epoch: 2, batch: 37214, sum loss: 4035.987061, avg loss: 2.483684, ppl: 11.985341 +epoch: 2, batch: 37215, sum loss: 4540.486328, avg loss: 2.765217, ppl: 15.882487 +epoch: 2, batch: 37216, sum loss: 4101.613770, avg loss: 2.333114, ppl: 10.309994 +epoch: 2, batch: 37217, sum loss: 4117.154297, avg loss: 2.670009, ppl: 14.440104 +epoch: 2, batch: 37218, sum loss: 5032.208496, avg loss: 2.676707, ppl: 14.537137 +epoch: 2, batch: 37219, sum loss: 3502.353760, avg loss: 2.372869, ppl: 10.728122 +epoch: 2, batch: 37220, sum loss: 3145.920654, avg loss: 2.166612, ppl: 8.728662 +epoch: 2, batch: 37221, sum loss: 3701.688965, avg loss: 2.372878, ppl: 10.728219 +epoch: 2, batch: 37222, sum loss: 3595.350342, avg loss: 2.230366, ppl: 9.303272 +epoch: 2, batch: 37223, sum loss: 3958.546387, avg loss: 2.286855, ppl: 9.843932 +epoch: 2, batch: 37224, sum loss: 3808.638184, avg loss: 2.396877, ppl: 10.988808 +epoch: 2, batch: 37225, sum loss: 4587.595215, avg loss: 2.687519, ppl: 14.695173 +epoch: 2, batch: 37226, sum loss: 3762.974121, avg loss: 2.229250, ppl: 9.292895 +epoch: 2, batch: 37227, sum loss: 3478.883789, avg loss: 2.134284, ppl: 8.450997 +epoch: 2, batch: 37228, sum loss: 3536.856201, avg loss: 2.286268, ppl: 9.838151 +epoch: 2, batch: 37229, sum loss: 3847.458252, avg loss: 2.319143, ppl: 10.166958 +epoch: 2, batch: 37230, sum loss: 3584.863281, avg loss: 2.278998, ppl: 9.766886 +epoch: 2, batch: 37231, sum loss: 3908.975098, avg loss: 2.466230, ppl: 11.777965 +epoch: 2, batch: 37232, sum loss: 4349.053711, avg loss: 2.425574, ppl: 11.308717 +epoch: 2, batch: 37233, sum loss: 5318.707520, avg loss: 2.775944, ppl: 16.053766 +epoch: 2, batch: 37234, sum loss: 4208.232910, avg loss: 2.516886, ppl: 12.389951 +epoch: 2, batch: 37235, sum loss: 3637.320068, avg loss: 2.300645, ppl: 9.980619 +epoch: 2, batch: 37236, sum loss: 4562.268066, avg loss: 2.491681, ppl: 12.081573 +epoch: 2, batch: 37237, sum loss: 4893.843262, avg loss: 2.930445, ppl: 18.735970 +epoch: 2, batch: 37238, sum loss: 5474.754395, avg loss: 2.829330, ppl: 16.934114 +epoch: 2, batch: 37239, sum loss: 4785.310059, avg loss: 2.523898, ppl: 12.477137 +epoch: 2, batch: 37240, sum loss: 2820.139160, avg loss: 2.053998, ppl: 7.799017 +epoch: 2, batch: 37241, sum loss: 4439.028809, avg loss: 2.346210, ppl: 10.445902 +epoch: 2, batch: 37242, sum loss: 4527.820312, avg loss: 2.451446, ppl: 11.605116 +epoch: 2, batch: 37243, sum loss: 4882.328613, avg loss: 2.657773, ppl: 14.264487 +epoch: 2, batch: 37244, sum loss: 4420.622070, avg loss: 2.556751, ppl: 12.893854 +epoch: 2, batch: 37245, sum loss: 3803.794434, avg loss: 2.144191, ppl: 8.535131 +epoch: 2, batch: 37246, sum loss: 4727.653809, avg loss: 2.606204, ppl: 13.547527 +epoch: 2, batch: 37247, sum loss: 4166.836426, avg loss: 2.347513, ppl: 10.459529 +epoch: 2, batch: 37248, sum loss: 4090.449951, avg loss: 2.336065, ppl: 10.340469 +epoch: 2, batch: 37249, sum loss: 4515.932617, avg loss: 2.396992, ppl: 10.990068 +epoch: 2, batch: 37250, sum loss: 3837.044434, avg loss: 2.319858, ppl: 10.174226 +epoch: 2, batch: 37251, sum loss: 3776.471680, avg loss: 2.447487, ppl: 11.559258 +epoch: 2, batch: 37252, sum loss: 4854.396484, avg loss: 2.422354, ppl: 11.272358 +epoch: 2, batch: 37253, sum loss: 3895.225098, avg loss: 2.423911, ppl: 11.289929 +epoch: 2, batch: 37254, sum loss: 4024.830078, avg loss: 2.313121, ppl: 10.105914 +epoch: 2, batch: 37255, sum loss: 4577.305176, avg loss: 2.544361, ppl: 12.735086 +epoch: 2, batch: 37256, sum loss: 4955.006348, avg loss: 2.768160, ppl: 15.929295 +epoch: 2, batch: 37257, sum loss: 4577.460449, avg loss: 2.577399, ppl: 13.162857 +epoch: 2, batch: 37258, sum loss: 3762.850342, avg loss: 2.321314, ppl: 10.189054 +epoch: 2, batch: 37259, sum loss: 4090.595215, avg loss: 2.593910, ppl: 13.381987 +epoch: 2, batch: 37260, sum loss: 4046.414795, avg loss: 2.246760, ppl: 9.457045 +epoch: 2, batch: 37261, sum loss: 3585.046631, avg loss: 2.249088, ppl: 9.479090 +epoch: 2, batch: 37262, sum loss: 5410.185059, avg loss: 2.572603, ppl: 13.099885 +epoch: 2, batch: 37263, sum loss: 3701.658447, avg loss: 2.275143, ppl: 9.729314 +epoch: 2, batch: 37264, sum loss: 4447.758789, avg loss: 2.588917, ppl: 13.315340 +epoch: 2, batch: 37265, sum loss: 3385.791748, avg loss: 2.207165, ppl: 9.089914 +epoch: 2, batch: 37266, sum loss: 4403.084473, avg loss: 2.642908, ppl: 14.054011 +epoch: 2, batch: 37267, sum loss: 5201.828125, avg loss: 2.747928, ppl: 15.610256 +epoch: 2, batch: 37268, sum loss: 4854.718262, avg loss: 2.476897, ppl: 11.904268 +epoch: 2, batch: 37269, sum loss: 3472.122070, avg loss: 2.370049, ppl: 10.697919 +epoch: 2, batch: 37270, sum loss: 4878.372070, avg loss: 2.716243, ppl: 15.123394 +epoch: 2, batch: 37271, sum loss: 4172.583496, avg loss: 2.447263, ppl: 11.556676 +epoch: 2, batch: 37272, sum loss: 4170.022461, avg loss: 2.569330, ppl: 13.057073 +epoch: 2, batch: 37273, sum loss: 4081.596191, avg loss: 2.436774, ppl: 11.436089 +epoch: 2, batch: 37274, sum loss: 4060.188477, avg loss: 2.379946, ppl: 10.804322 +epoch: 2, batch: 37275, sum loss: 4450.089355, avg loss: 2.497244, ppl: 12.148970 +epoch: 2, batch: 37276, sum loss: 3657.107910, avg loss: 2.285692, ppl: 9.832493 +epoch: 2, batch: 37277, sum loss: 4188.736328, avg loss: 2.425441, ppl: 11.307213 +epoch: 2, batch: 37278, sum loss: 4210.185059, avg loss: 2.695381, ppl: 14.811160 +epoch: 2, batch: 37279, sum loss: 3833.427002, avg loss: 2.292720, ppl: 9.901830 +epoch: 2, batch: 37280, sum loss: 4549.572266, avg loss: 2.652812, ppl: 14.193892 +epoch: 2, batch: 37281, sum loss: 4168.414062, avg loss: 2.487121, ppl: 12.026597 +epoch: 2, batch: 37282, sum loss: 4928.187012, avg loss: 2.542924, ppl: 12.716803 +epoch: 2, batch: 37283, sum loss: 4117.400391, avg loss: 2.381377, ppl: 10.819789 +epoch: 2, batch: 37284, sum loss: 3707.257568, avg loss: 2.292676, ppl: 9.901402 +epoch: 2, batch: 37285, sum loss: 4166.586914, avg loss: 2.591161, ppl: 13.345257 +epoch: 2, batch: 37286, sum loss: 3520.272217, avg loss: 2.416110, ppl: 11.202196 +epoch: 2, batch: 37287, sum loss: 4124.972656, avg loss: 2.489422, ppl: 12.054308 +epoch: 2, batch: 37288, sum loss: 4507.715820, avg loss: 2.498734, ppl: 12.167078 +epoch: 2, batch: 37289, sum loss: 4949.116211, avg loss: 2.714820, ppl: 15.101887 +epoch: 2, batch: 37290, sum loss: 4213.923828, avg loss: 2.462843, ppl: 11.738135 +epoch: 2, batch: 37291, sum loss: 4373.154785, avg loss: 2.632844, ppl: 13.913289 +epoch: 2, batch: 37292, sum loss: 3388.458008, avg loss: 2.181879, ppl: 8.862945 +epoch: 2, batch: 37293, sum loss: 4069.597412, avg loss: 2.399527, ppl: 11.017962 +epoch: 2, batch: 37294, sum loss: 4021.255371, avg loss: 2.380850, ppl: 10.814089 +epoch: 2, batch: 37295, sum loss: 4533.704102, avg loss: 2.734442, ppl: 15.401140 +epoch: 2, batch: 37296, sum loss: 3511.317383, avg loss: 2.200073, ppl: 9.025677 +epoch: 2, batch: 37297, sum loss: 3482.458496, avg loss: 2.343512, ppl: 10.417758 +epoch: 2, batch: 37298, sum loss: 3189.677246, avg loss: 2.142161, ppl: 8.517822 +epoch: 2, batch: 37299, sum loss: 4663.445312, avg loss: 2.435220, ppl: 11.418325 +epoch: 2, batch: 37300, sum loss: 4435.933594, avg loss: 2.618615, ppl: 13.716712 +epoch: 2, batch: 37301, sum loss: 3582.857666, avg loss: 2.145424, ppl: 8.545663 +epoch: 2, batch: 37302, sum loss: 3542.821777, avg loss: 2.263784, ppl: 9.619420 +epoch: 2, batch: 37303, sum loss: 3908.820801, avg loss: 2.513711, ppl: 12.350681 +epoch: 2, batch: 37304, sum loss: 4115.169922, avg loss: 2.500103, ppl: 12.183752 +epoch: 2, batch: 37305, sum loss: 4121.322754, avg loss: 2.456092, ppl: 11.659160 +epoch: 2, batch: 37306, sum loss: 3980.787842, avg loss: 2.184845, ppl: 8.889273 +epoch: 2, batch: 37307, sum loss: 3701.281494, avg loss: 2.250019, ppl: 9.487917 +epoch: 2, batch: 37308, sum loss: 4450.091797, avg loss: 2.517020, ppl: 12.391621 +epoch: 2, batch: 37309, sum loss: 3441.212158, avg loss: 2.360228, ppl: 10.593365 +epoch: 2, batch: 37310, sum loss: 4168.701172, avg loss: 2.447857, ppl: 11.563544 +epoch: 2, batch: 37311, sum loss: 4234.804688, avg loss: 2.730371, ppl: 15.338570 +epoch: 2, batch: 37312, sum loss: 3401.303223, avg loss: 2.334456, ppl: 10.323847 +epoch: 2, batch: 37313, sum loss: 4256.197266, avg loss: 2.542531, ppl: 12.711810 +epoch: 2, batch: 37314, sum loss: 3445.437988, avg loss: 2.144019, ppl: 8.533663 +epoch: 2, batch: 37315, sum loss: 4588.893555, avg loss: 2.640330, ppl: 14.017830 +epoch: 2, batch: 37316, sum loss: 4777.108887, avg loss: 2.388555, ppl: 10.897731 +epoch: 2, batch: 37317, sum loss: 4216.666016, avg loss: 2.630484, ppl: 13.880487 +epoch: 2, batch: 37318, sum loss: 4388.206055, avg loss: 2.695458, ppl: 14.812304 +epoch: 2, batch: 37319, sum loss: 3647.427734, avg loss: 2.477872, ppl: 11.915882 +epoch: 2, batch: 37320, sum loss: 4290.038086, avg loss: 2.382031, ppl: 10.826870 +epoch: 2, batch: 37321, sum loss: 4406.802246, avg loss: 2.602955, ppl: 13.503580 +epoch: 2, batch: 37322, sum loss: 5586.927734, avg loss: 3.008577, ppl: 20.258554 +epoch: 2, batch: 37323, sum loss: 4975.793945, avg loss: 2.884518, ppl: 17.894947 +epoch: 2, batch: 37324, sum loss: 4812.208984, avg loss: 2.527421, ppl: 12.521170 +epoch: 2, batch: 37325, sum loss: 3788.959961, avg loss: 2.260716, ppl: 9.589952 +epoch: 2, batch: 37326, sum loss: 3909.922363, avg loss: 2.201533, ppl: 9.038858 +epoch: 2, batch: 37327, sum loss: 4428.186523, avg loss: 2.747014, ppl: 15.595993 +epoch: 2, batch: 37328, sum loss: 4454.763672, avg loss: 2.669122, ppl: 14.427289 +epoch: 2, batch: 37329, sum loss: 3760.813721, avg loss: 2.196737, ppl: 8.995611 +epoch: 2, batch: 37330, sum loss: 4339.919434, avg loss: 2.484213, ppl: 11.991674 +epoch: 2, batch: 37331, sum loss: 4215.508789, avg loss: 2.431089, ppl: 11.371261 +epoch: 2, batch: 37332, sum loss: 4266.064453, avg loss: 2.569918, ppl: 13.064758 +epoch: 2, batch: 37333, sum loss: 5375.307617, avg loss: 2.793819, ppl: 16.343315 +epoch: 2, batch: 37334, sum loss: 4565.019043, avg loss: 2.513777, ppl: 12.351494 +epoch: 2, batch: 37335, sum loss: 3934.545898, avg loss: 2.410874, ppl: 11.143695 +epoch: 2, batch: 37336, sum loss: 3833.027344, avg loss: 2.395642, ppl: 10.975243 +epoch: 2, batch: 37337, sum loss: 3334.364502, avg loss: 2.083978, ppl: 8.036371 +epoch: 2, batch: 37338, sum loss: 3312.476562, avg loss: 2.232127, ppl: 9.319668 +epoch: 2, batch: 37339, sum loss: 4083.020508, avg loss: 2.485101, ppl: 12.002330 +epoch: 2, batch: 37340, sum loss: 4889.363770, avg loss: 2.602110, ppl: 13.492172 +epoch: 2, batch: 37341, sum loss: 4568.660156, avg loss: 2.570996, ppl: 13.078845 +epoch: 2, batch: 37342, sum loss: 4204.250000, avg loss: 2.435834, ppl: 11.425345 +epoch: 2, batch: 37343, sum loss: 4783.079102, avg loss: 2.557796, ppl: 12.907345 +epoch: 2, batch: 37344, sum loss: 5006.731445, avg loss: 2.582120, ppl: 13.225151 +epoch: 2, batch: 37345, sum loss: 3472.165527, avg loss: 2.147289, ppl: 8.561612 +epoch: 2, batch: 37346, sum loss: 4379.964844, avg loss: 2.456514, ppl: 11.664083 +epoch: 2, batch: 37347, sum loss: 3694.466553, avg loss: 2.122037, ppl: 8.348126 +epoch: 2, batch: 37348, sum loss: 5390.010254, avg loss: 2.926173, ppl: 18.656096 +epoch: 2, batch: 37349, sum loss: 4207.917969, avg loss: 2.295645, ppl: 9.930844 +epoch: 2, batch: 37350, sum loss: 4155.788086, avg loss: 2.482550, ppl: 11.971752 +epoch: 2, batch: 37351, sum loss: 3753.185791, avg loss: 2.432395, ppl: 11.386122 +epoch: 2, batch: 37352, sum loss: 4023.116455, avg loss: 2.301554, ppl: 9.989697 +epoch: 2, batch: 37353, sum loss: 3893.991211, avg loss: 2.319232, ppl: 10.167867 +epoch: 2, batch: 37354, sum loss: 4038.067139, avg loss: 2.416557, ppl: 11.207210 +epoch: 2, batch: 37355, sum loss: 4464.006836, avg loss: 2.469030, ppl: 11.810989 +epoch: 2, batch: 37356, sum loss: 4425.417969, avg loss: 2.432885, ppl: 11.391702 +epoch: 2, batch: 37357, sum loss: 4339.932617, avg loss: 2.371548, ppl: 10.713964 +epoch: 2, batch: 37358, sum loss: 3939.802734, avg loss: 2.454706, ppl: 11.643006 +epoch: 2, batch: 37359, sum loss: 5225.862793, avg loss: 2.654069, ppl: 14.211755 +epoch: 2, batch: 37360, sum loss: 4889.162109, avg loss: 2.670214, ppl: 14.443066 +epoch: 2, batch: 37361, sum loss: 4512.974609, avg loss: 2.503036, ppl: 12.219543 +epoch: 2, batch: 37362, sum loss: 3982.452393, avg loss: 2.310007, ppl: 10.074498 +epoch: 2, batch: 37363, sum loss: 4679.869141, avg loss: 2.463089, ppl: 11.741026 +epoch: 2, batch: 37364, sum loss: 3949.359375, avg loss: 2.412559, ppl: 11.162493 +epoch: 2, batch: 37365, sum loss: 5457.876953, avg loss: 2.700583, ppl: 14.888402 +epoch: 2, batch: 37366, sum loss: 5306.592285, avg loss: 2.672000, ppl: 14.468881 +epoch: 2, batch: 37367, sum loss: 4037.911621, avg loss: 2.489465, ppl: 12.054819 +epoch: 2, batch: 37368, sum loss: 4356.961914, avg loss: 2.411158, ppl: 11.146857 +epoch: 2, batch: 37369, sum loss: 4200.193848, avg loss: 2.336036, ppl: 10.340172 +epoch: 2, batch: 37370, sum loss: 4174.970703, avg loss: 2.575552, ppl: 13.138574 +epoch: 2, batch: 37371, sum loss: 4047.291748, avg loss: 2.510727, ppl: 12.313878 +epoch: 2, batch: 37372, sum loss: 4335.716797, avg loss: 2.506195, ppl: 12.258194 +epoch: 2, batch: 37373, sum loss: 4312.987793, avg loss: 2.394774, ppl: 10.965716 +epoch: 2, batch: 37374, sum loss: 4450.503906, avg loss: 2.697275, ppl: 14.839242 +epoch: 2, batch: 37375, sum loss: 3868.382568, avg loss: 2.465508, ppl: 11.769463 +epoch: 2, batch: 37376, sum loss: 3984.069580, avg loss: 2.388531, ppl: 10.897473 +epoch: 2, batch: 37377, sum loss: 4589.052734, avg loss: 2.520073, ppl: 12.429503 +epoch: 2, batch: 37378, sum loss: 4720.864258, avg loss: 2.616887, ppl: 13.693032 +epoch: 2, batch: 37379, sum loss: 3992.314209, avg loss: 2.374964, ppl: 10.750624 +epoch: 2, batch: 37380, sum loss: 4723.495117, avg loss: 2.791664, ppl: 16.308132 +epoch: 2, batch: 37381, sum loss: 3803.578857, avg loss: 2.384689, ppl: 10.855684 +epoch: 2, batch: 37382, sum loss: 3980.024658, avg loss: 2.447740, ppl: 11.562182 +epoch: 2, batch: 37383, sum loss: 3901.781250, avg loss: 2.354726, ppl: 10.535243 +epoch: 2, batch: 37384, sum loss: 3791.641602, avg loss: 2.259619, ppl: 9.579443 +epoch: 2, batch: 37385, sum loss: 4263.264648, avg loss: 2.590076, ppl: 13.330781 +epoch: 2, batch: 37386, sum loss: 5303.398438, avg loss: 2.568232, ppl: 13.042742 +epoch: 2, batch: 37387, sum loss: 4210.398438, avg loss: 2.416991, ppl: 11.212071 +epoch: 2, batch: 37388, sum loss: 3877.255371, avg loss: 2.440060, ppl: 11.473728 +epoch: 2, batch: 37389, sum loss: 3528.790283, avg loss: 2.568261, ppl: 13.043121 +epoch: 2, batch: 37390, sum loss: 4769.507324, avg loss: 2.920702, ppl: 18.554316 +epoch: 2, batch: 37391, sum loss: 5151.787109, avg loss: 2.775747, ppl: 16.050617 +epoch: 2, batch: 37392, sum loss: 4594.833496, avg loss: 2.666764, ppl: 14.393311 +epoch: 2, batch: 37393, sum loss: 4452.631348, avg loss: 2.626921, ppl: 13.831120 +epoch: 2, batch: 37394, sum loss: 4694.475586, avg loss: 2.497061, ppl: 12.146749 +epoch: 2, batch: 37395, sum loss: 4671.554688, avg loss: 2.692539, ppl: 14.769123 +epoch: 2, batch: 37396, sum loss: 3646.876953, avg loss: 2.434497, ppl: 11.410079 +epoch: 2, batch: 37397, sum loss: 3259.922363, avg loss: 2.157460, ppl: 8.649143 +epoch: 2, batch: 37398, sum loss: 4425.644043, avg loss: 2.355319, ppl: 10.541489 +epoch: 2, batch: 37399, sum loss: 4583.927246, avg loss: 2.504878, ppl: 12.242069 +epoch: 2, batch: 37400, sum loss: 3358.509521, avg loss: 2.162595, ppl: 8.693666 +epoch: 2, batch: 37401, sum loss: 4458.838867, avg loss: 2.536313, ppl: 12.633011 +epoch: 2, batch: 37402, sum loss: 3678.468750, avg loss: 2.290454, ppl: 9.879418 +epoch: 2, batch: 37403, sum loss: 3861.105469, avg loss: 2.208870, ppl: 9.105425 +epoch: 2, batch: 37404, sum loss: 5297.365234, avg loss: 2.530992, ppl: 12.565960 +epoch: 2, batch: 37405, sum loss: 3595.031250, avg loss: 2.501761, ppl: 12.203972 +epoch: 2, batch: 37406, sum loss: 4069.123779, avg loss: 2.473631, ppl: 11.865457 +epoch: 2, batch: 37407, sum loss: 4026.411133, avg loss: 2.524396, ppl: 12.483350 +epoch: 2, batch: 37408, sum loss: 4021.013916, avg loss: 2.442900, ppl: 11.506366 +epoch: 2, batch: 37409, sum loss: 3608.491455, avg loss: 2.232977, ppl: 9.327597 +epoch: 2, batch: 37410, sum loss: 3951.361816, avg loss: 2.491401, ppl: 12.078185 +epoch: 2, batch: 37411, sum loss: 4094.660400, avg loss: 2.568796, ppl: 13.050098 +epoch: 2, batch: 37412, sum loss: 3618.685059, avg loss: 2.283082, ppl: 9.806859 +epoch: 2, batch: 37413, sum loss: 4122.708008, avg loss: 2.250386, ppl: 9.491404 +epoch: 2, batch: 37414, sum loss: 3456.982910, avg loss: 2.156571, ppl: 8.641453 +epoch: 2, batch: 37415, sum loss: 4716.652832, avg loss: 2.491628, ppl: 12.080933 +epoch: 2, batch: 37416, sum loss: 3811.155518, avg loss: 2.330982, ppl: 10.288039 +epoch: 2, batch: 37417, sum loss: 4049.704590, avg loss: 2.284097, ppl: 9.816822 +epoch: 2, batch: 37418, sum loss: 4544.230957, avg loss: 2.469691, ppl: 11.818789 +epoch: 2, batch: 37419, sum loss: 4504.711426, avg loss: 2.575593, ppl: 13.139100 +epoch: 2, batch: 37420, sum loss: 4543.348145, avg loss: 2.366327, ppl: 10.658175 +epoch: 2, batch: 37421, sum loss: 3746.783447, avg loss: 2.309977, ppl: 10.074196 +epoch: 2, batch: 37422, sum loss: 4271.093262, avg loss: 2.628365, ppl: 13.851106 +epoch: 2, batch: 37423, sum loss: 4517.991699, avg loss: 2.673368, ppl: 14.488681 +epoch: 2, batch: 37424, sum loss: 4094.521973, avg loss: 2.480026, ppl: 11.941569 +epoch: 2, batch: 37425, sum loss: 4316.045898, avg loss: 2.647881, ppl: 14.124075 +epoch: 2, batch: 37426, sum loss: 4567.666992, avg loss: 2.610096, ppl: 13.600349 +epoch: 2, batch: 37427, sum loss: 4301.240723, avg loss: 2.466308, ppl: 11.778874 +epoch: 2, batch: 37428, sum loss: 4489.052246, avg loss: 2.485632, ppl: 12.008713 +epoch: 2, batch: 37429, sum loss: 4006.484863, avg loss: 2.423766, ppl: 11.288287 +epoch: 2, batch: 37430, sum loss: 5066.092285, avg loss: 2.667768, ppl: 14.407782 +epoch: 2, batch: 37431, sum loss: 3898.099609, avg loss: 2.451635, ppl: 11.607307 +epoch: 2, batch: 37432, sum loss: 4824.920410, avg loss: 2.526136, ppl: 12.505098 +epoch: 2, batch: 37433, sum loss: 4016.124268, avg loss: 2.362426, ppl: 10.616679 +epoch: 2, batch: 37434, sum loss: 4134.233887, avg loss: 2.357032, ppl: 10.559563 +epoch: 2, batch: 37435, sum loss: 4273.665039, avg loss: 2.359837, ppl: 10.589229 +epoch: 2, batch: 37436, sum loss: 3863.929199, avg loss: 2.241258, ppl: 9.405159 +epoch: 2, batch: 37437, sum loss: 5113.500488, avg loss: 2.698417, ppl: 14.856198 +epoch: 2, batch: 37438, sum loss: 4472.683594, avg loss: 2.488973, ppl: 12.048891 +epoch: 2, batch: 37439, sum loss: 4822.345703, avg loss: 2.694048, ppl: 14.791426 +epoch: 2, batch: 37440, sum loss: 3364.024414, avg loss: 2.224884, ppl: 9.252410 +epoch: 2, batch: 37441, sum loss: 3925.404785, avg loss: 2.475034, ppl: 11.882116 +epoch: 2, batch: 37442, sum loss: 4259.892090, avg loss: 2.558494, ppl: 12.916348 +epoch: 2, batch: 37443, sum loss: 3731.708252, avg loss: 2.453457, ppl: 11.628478 +epoch: 2, batch: 37444, sum loss: 4497.222168, avg loss: 2.519452, ppl: 12.421792 +epoch: 2, batch: 37445, sum loss: 4846.878906, avg loss: 2.578127, ppl: 13.172441 +epoch: 2, batch: 37446, sum loss: 3816.117188, avg loss: 2.240820, ppl: 9.401041 +epoch: 2, batch: 37447, sum loss: 3278.554199, avg loss: 2.037635, ppl: 7.672440 +epoch: 2, batch: 37448, sum loss: 3888.160645, avg loss: 2.528063, ppl: 12.529211 +epoch: 2, batch: 37449, sum loss: 4311.739746, avg loss: 2.520012, ppl: 12.428741 +epoch: 2, batch: 37450, sum loss: 4293.935059, avg loss: 2.515486, ppl: 12.372623 +epoch: 2, batch: 37451, sum loss: 5525.187500, avg loss: 2.837795, ppl: 17.078075 +epoch: 2, batch: 37452, sum loss: 3189.335449, avg loss: 2.297792, ppl: 9.952184 +epoch: 2, batch: 37453, sum loss: 3981.564209, avg loss: 2.382744, ppl: 10.834588 +epoch: 2, batch: 37454, sum loss: 4263.894043, avg loss: 2.464679, ppl: 11.759701 +epoch: 2, batch: 37455, sum loss: 4189.542969, avg loss: 2.499727, ppl: 12.179174 +epoch: 2, batch: 37456, sum loss: 4329.949219, avg loss: 2.472844, ppl: 11.856113 +epoch: 2, batch: 37457, sum loss: 4855.108398, avg loss: 2.618721, ppl: 13.718163 +epoch: 2, batch: 37458, sum loss: 4574.720215, avg loss: 2.442456, ppl: 11.501256 +epoch: 2, batch: 37459, sum loss: 4381.831543, avg loss: 2.404957, ppl: 11.077954 +epoch: 2, batch: 37460, sum loss: 3498.229004, avg loss: 2.265692, ppl: 9.637796 +epoch: 2, batch: 37461, sum loss: 4762.663086, avg loss: 2.674151, ppl: 14.500037 +epoch: 2, batch: 37462, sum loss: 5075.591797, avg loss: 2.752490, ppl: 15.681635 +epoch: 2, batch: 37463, sum loss: 4541.999023, avg loss: 2.623916, ppl: 13.789620 +epoch: 2, batch: 37464, sum loss: 4005.387207, avg loss: 2.385579, ppl: 10.865353 +epoch: 2, batch: 37465, sum loss: 4817.462402, avg loss: 2.644052, ppl: 14.070097 +epoch: 2, batch: 37466, sum loss: 4941.497070, avg loss: 2.672524, ppl: 14.476461 +epoch: 2, batch: 37467, sum loss: 3996.211914, avg loss: 2.361827, ppl: 10.610323 +epoch: 2, batch: 37468, sum loss: 4575.506836, avg loss: 2.459950, ppl: 11.704226 +epoch: 2, batch: 37469, sum loss: 3381.139160, avg loss: 2.114533, ppl: 8.285719 +epoch: 2, batch: 37470, sum loss: 3846.441895, avg loss: 2.453088, ppl: 11.624185 +epoch: 2, batch: 37471, sum loss: 2850.569336, avg loss: 1.875375, ppl: 6.523263 +epoch: 2, batch: 37472, sum loss: 4563.664551, avg loss: 2.500638, ppl: 12.190269 +epoch: 2, batch: 37473, sum loss: 3620.914795, avg loss: 2.430144, ppl: 11.360521 +epoch: 2, batch: 37474, sum loss: 4011.576172, avg loss: 2.470182, ppl: 11.824604 +epoch: 2, batch: 37475, sum loss: 3703.989502, avg loss: 2.424077, ppl: 11.291802 +epoch: 2, batch: 37476, sum loss: 5646.373047, avg loss: 2.786956, ppl: 16.231541 +epoch: 2, batch: 37477, sum loss: 3627.490479, avg loss: 2.285753, ppl: 9.833093 +epoch: 2, batch: 37478, sum loss: 3967.368164, avg loss: 2.303930, ppl: 10.013461 +epoch: 2, batch: 37479, sum loss: 3770.381348, avg loss: 2.413816, ppl: 11.176535 +epoch: 2, batch: 37480, sum loss: 3991.841553, avg loss: 2.383189, ppl: 10.839417 +epoch: 2, batch: 37481, sum loss: 3435.270508, avg loss: 2.170101, ppl: 8.759172 +epoch: 2, batch: 37482, sum loss: 3566.555908, avg loss: 2.301004, ppl: 9.984200 +epoch: 2, batch: 37483, sum loss: 4654.934082, avg loss: 2.532608, ppl: 12.586292 +epoch: 2, batch: 37484, sum loss: 3267.794189, avg loss: 2.094740, ppl: 8.123328 +epoch: 2, batch: 37485, sum loss: 4667.206543, avg loss: 2.409503, ppl: 11.128424 +epoch: 2, batch: 37486, sum loss: 3940.133545, avg loss: 2.478071, ppl: 11.918258 +epoch: 2, batch: 37487, sum loss: 3418.967285, avg loss: 2.224442, ppl: 9.248321 +epoch: 2, batch: 37488, sum loss: 4231.705078, avg loss: 2.374694, ppl: 10.747727 +epoch: 2, batch: 37489, sum loss: 3402.340820, avg loss: 2.241331, ppl: 9.405846 +epoch: 2, batch: 37490, sum loss: 4667.338867, avg loss: 2.518801, ppl: 12.413707 +epoch: 2, batch: 37491, sum loss: 4283.868164, avg loss: 2.569807, ppl: 13.063300 +epoch: 2, batch: 37492, sum loss: 4236.000000, avg loss: 2.567273, ppl: 13.030238 +epoch: 2, batch: 37493, sum loss: 4962.532715, avg loss: 2.740217, ppl: 15.490346 +epoch: 2, batch: 37494, sum loss: 3979.302246, avg loss: 2.550835, ppl: 12.817801 +epoch: 2, batch: 37495, sum loss: 4161.911621, avg loss: 2.354023, ppl: 10.527833 +epoch: 2, batch: 37496, sum loss: 4444.094727, avg loss: 2.507954, ppl: 12.279781 +epoch: 2, batch: 37497, sum loss: 3911.112549, avg loss: 2.263375, ppl: 9.615489 +epoch: 2, batch: 37498, sum loss: 3925.485352, avg loss: 2.503498, ppl: 12.225186 +epoch: 2, batch: 37499, sum loss: 4029.017090, avg loss: 2.456718, ppl: 11.666456 +epoch: 2, batch: 37500, sum loss: 4743.047852, avg loss: 2.645314, ppl: 14.087868 +epoch: 2, batch: 37501, sum loss: 5243.817383, avg loss: 2.667252, ppl: 14.400344 +epoch: 2, batch: 37502, sum loss: 3666.135986, avg loss: 2.108186, ppl: 8.233296 +epoch: 2, batch: 37503, sum loss: 3636.958496, avg loss: 2.358598, ppl: 10.576116 +epoch: 2, batch: 37504, sum loss: 4694.989746, avg loss: 2.562767, ppl: 12.971663 +epoch: 2, batch: 37505, sum loss: 4840.832031, avg loss: 2.427699, ppl: 11.332776 +epoch: 2, batch: 37506, sum loss: 4285.361328, avg loss: 2.479954, ppl: 11.940721 +epoch: 2, batch: 37507, sum loss: 4320.884277, avg loss: 2.407178, ppl: 11.102585 +epoch: 2, batch: 37508, sum loss: 4009.441895, avg loss: 2.461290, ppl: 11.719925 +epoch: 2, batch: 37509, sum loss: 4825.031738, avg loss: 2.769823, ppl: 15.955811 +epoch: 2, batch: 37510, sum loss: 4141.835449, avg loss: 2.365412, ppl: 10.648420 +epoch: 2, batch: 37511, sum loss: 3847.338623, avg loss: 2.248591, ppl: 9.474374 +epoch: 2, batch: 37512, sum loss: 4426.662109, avg loss: 2.619327, ppl: 13.726477 +epoch: 2, batch: 37513, sum loss: 4114.306152, avg loss: 2.595777, ppl: 13.406998 +epoch: 2, batch: 37514, sum loss: 4245.395996, avg loss: 2.349416, ppl: 10.479453 +epoch: 2, batch: 37515, sum loss: 3853.661133, avg loss: 2.372944, ppl: 10.728933 +epoch: 2, batch: 37516, sum loss: 4127.715820, avg loss: 2.283029, ppl: 9.806335 +epoch: 2, batch: 37517, sum loss: 4231.106934, avg loss: 2.529054, ppl: 12.541636 +epoch: 2, batch: 37518, sum loss: 4227.019043, avg loss: 2.499716, ppl: 12.179032 +epoch: 2, batch: 37519, sum loss: 4088.281250, avg loss: 2.517415, ppl: 12.396505 +epoch: 2, batch: 37520, sum loss: 4750.217773, avg loss: 2.542943, ppl: 12.717042 +epoch: 2, batch: 37521, sum loss: 4546.827637, avg loss: 2.576106, ppl: 13.145852 +epoch: 2, batch: 37522, sum loss: 3524.581787, avg loss: 2.146518, ppl: 8.555014 +epoch: 2, batch: 37523, sum loss: 3724.357178, avg loss: 2.310395, ppl: 10.078407 +epoch: 2, batch: 37524, sum loss: 3689.001953, avg loss: 2.415849, ppl: 11.199280 +epoch: 2, batch: 37525, sum loss: 3892.891602, avg loss: 2.470109, ppl: 11.823732 +epoch: 2, batch: 37526, sum loss: 4216.114746, avg loss: 2.384680, ppl: 10.855591 +epoch: 2, batch: 37527, sum loss: 5102.597656, avg loss: 2.667327, ppl: 14.401428 +epoch: 2, batch: 37528, sum loss: 4357.930176, avg loss: 2.483151, ppl: 11.978951 +epoch: 2, batch: 37529, sum loss: 4324.259766, avg loss: 2.536223, ppl: 12.631869 +epoch: 2, batch: 37530, sum loss: 4090.916016, avg loss: 2.430728, ppl: 11.367159 +epoch: 2, batch: 37531, sum loss: 4835.928711, avg loss: 2.590214, ppl: 13.332619 +epoch: 2, batch: 37532, sum loss: 4450.737793, avg loss: 2.446805, ppl: 11.551378 +epoch: 2, batch: 37533, sum loss: 4847.932129, avg loss: 2.341996, ppl: 10.401980 +epoch: 2, batch: 37534, sum loss: 4414.278809, avg loss: 2.529673, ppl: 12.549397 +epoch: 2, batch: 37535, sum loss: 5258.092285, avg loss: 2.789439, ppl: 16.271889 +epoch: 2, batch: 37536, sum loss: 4648.288086, avg loss: 2.497737, ppl: 12.154955 +epoch: 2, batch: 37537, sum loss: 4310.990234, avg loss: 2.439723, ppl: 11.469861 +epoch: 2, batch: 37538, sum loss: 3705.633301, avg loss: 2.144464, ppl: 8.537462 +epoch: 2, batch: 37539, sum loss: 3852.288330, avg loss: 2.370639, ppl: 10.704228 +epoch: 2, batch: 37540, sum loss: 3133.730225, avg loss: 2.126004, ppl: 8.381310 +epoch: 2, batch: 37541, sum loss: 4415.166992, avg loss: 2.512901, ppl: 12.340676 +epoch: 2, batch: 37542, sum loss: 5014.029297, avg loss: 2.717631, ppl: 15.144404 +epoch: 2, batch: 37543, sum loss: 3795.203125, avg loss: 2.443788, ppl: 11.516586 +epoch: 2, batch: 37544, sum loss: 5040.627930, avg loss: 2.536803, ppl: 12.639199 +epoch: 2, batch: 37545, sum loss: 3688.472168, avg loss: 2.202073, ppl: 9.043742 +epoch: 2, batch: 37546, sum loss: 4246.518555, avg loss: 2.487709, ppl: 12.033670 +epoch: 2, batch: 37547, sum loss: 3936.977783, avg loss: 2.187210, ppl: 8.910317 +epoch: 2, batch: 37548, sum loss: 4379.909180, avg loss: 2.409191, ppl: 11.124956 +epoch: 2, batch: 37549, sum loss: 3142.572021, avg loss: 2.155399, ppl: 8.631334 +epoch: 2, batch: 37550, sum loss: 4117.476074, avg loss: 2.324944, ppl: 10.226108 +epoch: 2, batch: 37551, sum loss: 4432.192383, avg loss: 2.572369, ppl: 13.096819 +epoch: 2, batch: 37552, sum loss: 3868.313965, avg loss: 2.350130, ppl: 10.486934 +epoch: 2, batch: 37553, sum loss: 4154.084961, avg loss: 2.283719, ppl: 9.813108 +epoch: 2, batch: 37554, sum loss: 4022.699951, avg loss: 2.458863, ppl: 11.691514 +epoch: 2, batch: 37555, sum loss: 3587.043457, avg loss: 2.162172, ppl: 8.689993 +epoch: 2, batch: 37556, sum loss: 3822.621582, avg loss: 2.593366, ppl: 13.374717 +epoch: 2, batch: 37557, sum loss: 3725.465820, avg loss: 2.285562, ppl: 9.831208 +epoch: 2, batch: 37558, sum loss: 4051.003174, avg loss: 2.492925, ppl: 12.096606 +epoch: 2, batch: 37559, sum loss: 4360.723145, avg loss: 2.606529, ppl: 13.551933 +epoch: 2, batch: 37560, sum loss: 3748.521973, avg loss: 2.389115, ppl: 10.903844 +epoch: 2, batch: 37561, sum loss: 3436.376221, avg loss: 2.232863, ppl: 9.326530 +epoch: 2, batch: 37562, sum loss: 4607.978516, avg loss: 2.905409, ppl: 18.272713 +epoch: 2, batch: 37563, sum loss: 4709.164551, avg loss: 2.950604, ppl: 19.117506 +epoch: 2, batch: 37564, sum loss: 4366.363770, avg loss: 2.507963, ppl: 12.279893 +epoch: 2, batch: 37565, sum loss: 4462.913574, avg loss: 2.522846, ppl: 12.464013 +epoch: 2, batch: 37566, sum loss: 4981.418945, avg loss: 2.711714, ppl: 15.055061 +epoch: 2, batch: 37567, sum loss: 4677.071289, avg loss: 2.720809, ppl: 15.192616 +epoch: 2, batch: 37568, sum loss: 4524.229492, avg loss: 2.499574, ppl: 12.177311 +epoch: 2, batch: 37569, sum loss: 4283.050293, avg loss: 2.529859, ppl: 12.551731 +epoch: 2, batch: 37570, sum loss: 4062.169678, avg loss: 2.389512, ppl: 10.908168 +epoch: 2, batch: 37571, sum loss: 3581.437988, avg loss: 2.504502, ppl: 12.237467 +epoch: 2, batch: 37572, sum loss: 4190.142578, avg loss: 2.433300, ppl: 11.396428 +epoch: 2, batch: 37573, sum loss: 4067.785156, avg loss: 2.563192, ppl: 12.977173 +epoch: 2, batch: 37574, sum loss: 4766.926758, avg loss: 2.437079, ppl: 11.439576 +epoch: 2, batch: 37575, sum loss: 3981.887939, avg loss: 2.494917, ppl: 12.120729 +epoch: 2, batch: 37576, sum loss: 3134.320068, avg loss: 2.224500, ppl: 9.248855 +epoch: 2, batch: 37577, sum loss: 3338.530029, avg loss: 2.449399, ppl: 11.581379 +epoch: 2, batch: 37578, sum loss: 4073.463867, avg loss: 2.390531, ppl: 10.919286 +epoch: 2, batch: 37579, sum loss: 4173.248535, avg loss: 2.555572, ppl: 12.878658 +epoch: 2, batch: 37580, sum loss: 3546.176270, avg loss: 2.188998, ppl: 8.926262 +epoch: 2, batch: 37581, sum loss: 4382.626953, avg loss: 2.766810, ppl: 15.907806 +epoch: 2, batch: 37582, sum loss: 3735.043457, avg loss: 2.405051, ppl: 11.078990 +epoch: 2, batch: 37583, sum loss: 3905.466309, avg loss: 2.277240, ppl: 9.749732 +epoch: 2, batch: 37584, sum loss: 3486.852539, avg loss: 2.126130, ppl: 8.382361 +epoch: 2, batch: 37585, sum loss: 3624.840576, avg loss: 2.248660, ppl: 9.475034 +epoch: 2, batch: 37586, sum loss: 4549.687500, avg loss: 2.529009, ppl: 12.541073 +epoch: 2, batch: 37587, sum loss: 4491.755371, avg loss: 2.354169, ppl: 10.529379 +epoch: 2, batch: 37588, sum loss: 4355.883301, avg loss: 2.454019, ppl: 11.635009 +epoch: 2, batch: 37589, sum loss: 4225.546387, avg loss: 2.403610, ppl: 11.063042 +epoch: 2, batch: 37590, sum loss: 3147.993652, avg loss: 2.023132, ppl: 7.561973 +epoch: 2, batch: 37591, sum loss: 3692.634033, avg loss: 2.486622, ppl: 12.020603 +epoch: 2, batch: 37592, sum loss: 3137.800293, avg loss: 2.317430, ppl: 10.149556 +epoch: 2, batch: 37593, sum loss: 4088.180420, avg loss: 2.558311, ppl: 12.913984 +epoch: 2, batch: 37594, sum loss: 3875.710449, avg loss: 2.346072, ppl: 10.444460 +epoch: 2, batch: 37595, sum loss: 4100.496094, avg loss: 2.642072, ppl: 14.042275 +epoch: 2, batch: 37596, sum loss: 4243.807617, avg loss: 2.592430, ppl: 13.362201 +epoch: 2, batch: 37597, sum loss: 3986.811768, avg loss: 2.262663, ppl: 9.608642 +epoch: 2, batch: 37598, sum loss: 5020.640137, avg loss: 2.520402, ppl: 12.433594 +epoch: 2, batch: 37599, sum loss: 4449.728027, avg loss: 2.525385, ppl: 12.495704 +epoch: 2, batch: 37600, sum loss: 3256.250488, avg loss: 2.046669, ppl: 7.742069 +epoch: 2, batch: 37601, sum loss: 4748.576660, avg loss: 2.741673, ppl: 15.512909 +epoch: 2, batch: 37602, sum loss: 4541.611328, avg loss: 2.477693, ppl: 11.913749 +epoch: 2, batch: 37603, sum loss: 3842.267578, avg loss: 2.318810, ppl: 10.163568 +epoch: 2, batch: 37604, sum loss: 4666.901367, avg loss: 2.569880, ppl: 13.064251 +epoch: 2, batch: 37605, sum loss: 2988.451416, avg loss: 2.100106, ppl: 8.167040 +epoch: 2, batch: 37606, sum loss: 3825.729736, avg loss: 2.331340, ppl: 10.291726 +epoch: 2, batch: 37607, sum loss: 4322.104004, avg loss: 2.621045, ppl: 13.750090 +epoch: 2, batch: 37608, sum loss: 4239.000977, avg loss: 2.671078, ppl: 14.455546 +epoch: 2, batch: 37609, sum loss: 4122.286621, avg loss: 2.334251, ppl: 10.321723 +epoch: 2, batch: 37610, sum loss: 4871.168457, avg loss: 2.753628, ppl: 15.699494 +epoch: 2, batch: 37611, sum loss: 3680.526123, avg loss: 2.189486, ppl: 8.930622 +epoch: 2, batch: 37612, sum loss: 4220.408203, avg loss: 2.439542, ppl: 11.467791 +epoch: 2, batch: 37613, sum loss: 4993.632324, avg loss: 2.568741, ppl: 13.049386 +epoch: 2, batch: 37614, sum loss: 4194.711914, avg loss: 2.434540, ppl: 11.410566 +epoch: 2, batch: 37615, sum loss: 4490.134766, avg loss: 2.344718, ppl: 10.430330 +epoch: 2, batch: 37616, sum loss: 4955.201172, avg loss: 2.539826, ppl: 12.677470 +epoch: 2, batch: 37617, sum loss: 5005.427246, avg loss: 2.438104, ppl: 11.451307 +epoch: 2, batch: 37618, sum loss: 3815.080566, avg loss: 2.362279, ppl: 10.615115 +epoch: 2, batch: 37619, sum loss: 4005.064941, avg loss: 2.421442, ppl: 11.262088 +epoch: 2, batch: 37620, sum loss: 3916.450195, avg loss: 2.360729, ppl: 10.598680 +epoch: 2, batch: 37621, sum loss: 4098.605469, avg loss: 2.484003, ppl: 11.989164 +epoch: 2, batch: 37622, sum loss: 3886.721191, avg loss: 2.452190, ppl: 11.613752 +epoch: 2, batch: 37623, sum loss: 4545.877441, avg loss: 2.423176, ppl: 11.281628 +epoch: 2, batch: 37624, sum loss: 3657.172363, avg loss: 2.287162, ppl: 9.846953 +epoch: 2, batch: 37625, sum loss: 4059.827881, avg loss: 2.561406, ppl: 12.954014 +epoch: 2, batch: 37626, sum loss: 3494.419434, avg loss: 2.314185, ppl: 10.116676 +epoch: 2, batch: 37627, sum loss: 3176.659668, avg loss: 2.301927, ppl: 9.993424 +epoch: 2, batch: 37628, sum loss: 4588.471680, avg loss: 2.500530, ppl: 12.188950 +epoch: 2, batch: 37629, sum loss: 3874.014648, avg loss: 2.366533, ppl: 10.660369 +epoch: 2, batch: 37630, sum loss: 4012.072998, avg loss: 2.421287, ppl: 11.260345 +epoch: 2, batch: 37631, sum loss: 4975.485840, avg loss: 2.744339, ppl: 15.554325 +epoch: 2, batch: 37632, sum loss: 5200.459961, avg loss: 2.812580, ppl: 16.652821 +epoch: 2, batch: 37633, sum loss: 4946.715820, avg loss: 2.525123, ppl: 12.492434 +epoch: 2, batch: 37634, sum loss: 3926.158447, avg loss: 2.513546, ppl: 12.348638 +epoch: 2, batch: 37635, sum loss: 4452.074219, avg loss: 2.466523, ppl: 11.781414 +epoch: 2, batch: 37636, sum loss: 5259.959473, avg loss: 2.793393, ppl: 16.336357 +epoch: 2, batch: 37637, sum loss: 5018.112793, avg loss: 2.524202, ppl: 12.480927 +epoch: 2, batch: 37638, sum loss: 4122.633789, avg loss: 2.513801, ppl: 12.351791 +epoch: 2, batch: 37639, sum loss: 3884.320312, avg loss: 2.720112, ppl: 15.182028 +epoch: 2, batch: 37640, sum loss: 4301.743164, avg loss: 2.663618, ppl: 14.348108 +epoch: 2, batch: 37641, sum loss: 4019.152344, avg loss: 2.438806, ppl: 11.459351 +epoch: 2, batch: 37642, sum loss: 4167.853516, avg loss: 2.420356, ppl: 11.249866 +epoch: 2, batch: 37643, sum loss: 3907.061279, avg loss: 2.324248, ppl: 10.218993 +epoch: 2, batch: 37644, sum loss: 4059.614258, avg loss: 2.317132, ppl: 10.146527 +epoch: 2, batch: 37645, sum loss: 4161.210938, avg loss: 2.381918, ppl: 10.825646 +epoch: 2, batch: 37646, sum loss: 3717.122803, avg loss: 2.404349, ppl: 11.071217 +epoch: 2, batch: 37647, sum loss: 3742.945312, avg loss: 2.433645, ppl: 11.400355 +epoch: 2, batch: 37648, sum loss: 4123.570801, avg loss: 2.475132, ppl: 11.883281 +epoch: 2, batch: 37649, sum loss: 4549.276367, avg loss: 2.629639, ppl: 13.868768 +epoch: 2, batch: 37650, sum loss: 3935.235352, avg loss: 2.411296, ppl: 11.148404 +epoch: 2, batch: 37651, sum loss: 4340.286133, avg loss: 2.443855, ppl: 11.517352 +epoch: 2, batch: 37652, sum loss: 4148.237305, avg loss: 2.612240, ppl: 13.629551 +epoch: 2, batch: 37653, sum loss: 3738.655518, avg loss: 2.295062, ppl: 9.925047 +epoch: 2, batch: 37654, sum loss: 4075.217285, avg loss: 2.378994, ppl: 10.794041 +epoch: 2, batch: 37655, sum loss: 3526.080078, avg loss: 2.374465, ppl: 10.745257 +epoch: 2, batch: 37656, sum loss: 3550.834961, avg loss: 2.267455, ppl: 9.654801 +epoch: 2, batch: 37657, sum loss: 4188.863281, avg loss: 2.632849, ppl: 13.913359 +epoch: 2, batch: 37658, sum loss: 4770.076172, avg loss: 2.778146, ppl: 16.089161 +epoch: 2, batch: 37659, sum loss: 4283.814941, avg loss: 2.605727, ppl: 13.541062 +epoch: 2, batch: 37660, sum loss: 4160.448730, avg loss: 2.402107, ppl: 11.046421 +epoch: 2, batch: 37661, sum loss: 4951.633301, avg loss: 2.769370, ppl: 15.948581 +epoch: 2, batch: 37662, sum loss: 3506.752441, avg loss: 2.216658, ppl: 9.176608 +epoch: 2, batch: 37663, sum loss: 4082.734863, avg loss: 2.497086, ppl: 12.147040 +epoch: 2, batch: 37664, sum loss: 4407.448242, avg loss: 2.588049, ppl: 13.303796 +epoch: 2, batch: 37665, sum loss: 4673.438477, avg loss: 2.493830, ppl: 12.107565 +epoch: 2, batch: 37666, sum loss: 4493.156738, avg loss: 2.476933, ppl: 11.904699 +epoch: 2, batch: 37667, sum loss: 3996.831055, avg loss: 2.515312, ppl: 12.370470 +epoch: 2, batch: 37668, sum loss: 4586.368652, avg loss: 2.536708, ppl: 12.638002 +epoch: 2, batch: 37669, sum loss: 4811.448242, avg loss: 2.581249, ppl: 13.213632 +epoch: 2, batch: 37670, sum loss: 3750.055176, avg loss: 2.475284, ppl: 11.885083 +epoch: 2, batch: 37671, sum loss: 3880.660889, avg loss: 2.181372, ppl: 8.858451 +epoch: 2, batch: 37672, sum loss: 4562.208984, avg loss: 2.510847, ppl: 12.315358 +epoch: 2, batch: 37673, sum loss: 3940.080322, avg loss: 2.398101, ppl: 11.002264 +epoch: 2, batch: 37674, sum loss: 3696.599121, avg loss: 2.475954, ppl: 11.893045 diff --git a/fluid/neural_machine_translation/transformer_nist_base/train.py b/fluid/neural_machine_translation/transformer_nist_base/train.py new file mode 100644 index 0000000000000000000000000000000000000000..d5889a4d8f3c88a1b41fc09fd6f0056473800ae8 --- /dev/null +++ b/fluid/neural_machine_translation/transformer_nist_base/train.py @@ -0,0 +1,217 @@ +import os +import time +import numpy as np + +import paddle +import paddle.fluid as fluid + +from model import transformer, position_encoding_init +from optim import LearningRateScheduler +from config import TrainTaskConfig, ModelHyperParams, pos_enc_param_names, \ + encoder_input_data_names, decoder_input_data_names, label_data_names +import nist_data_provider + + +def pad_batch_data(insts, + pad_idx, + n_head, + is_target=False, + is_label=False, + return_attn_bias=True, + return_max_len=True): + """ + Pad the instances to the max sequence length in batch, and generate the + corresponding position data and attention bias. + """ + return_list = [] + max_len = max(len(inst) for inst in insts) + # Any token included in dict can be used to pad, since the paddings' loss + # will be masked out by weights and make no effect on parameter gradients. + inst_data = np.array( + [inst + [pad_idx] * (max_len - len(inst)) for inst in insts]) + return_list += [inst_data.astype("int64").reshape([-1, 1])] + if is_label: # label weight + inst_weight = np.array( + [[1.] * len(inst) + [0.] * (max_len - len(inst)) for inst in insts]) + return_list += [inst_weight.astype("float32").reshape([-1, 1])] + else: # position data + inst_pos = np.array([ + range(1, len(inst) + 1) + [0] * (max_len - len(inst)) + for inst in insts + ]) + return_list += [inst_pos.astype("int64").reshape([-1, 1])] + if return_attn_bias: + if is_target: + # This is used to avoid attention on paddings and subsequent + # words. + slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, max_len)) + slf_attn_bias_data = np.triu(slf_attn_bias_data, 1).reshape( + [-1, 1, max_len, max_len]) + slf_attn_bias_data = np.tile(slf_attn_bias_data, + [1, n_head, 1, 1]) * [-1e9] + else: + # This is used to avoid attention on paddings. + slf_attn_bias_data = np.array([[0] * len(inst) + [-1e9] * + (max_len - len(inst)) + for inst in insts]) + slf_attn_bias_data = np.tile( + slf_attn_bias_data.reshape([-1, 1, 1, max_len]), + [1, n_head, max_len, 1]) + return_list += [slf_attn_bias_data.astype("float32")] + if return_max_len: + return_list += [max_len] + return return_list if len(return_list) > 1 else return_list[0] + + +def prepare_batch_input(insts, input_data_names, src_pad_idx, trg_pad_idx, + n_head, d_model): + """ + Put all padded data needed by training into a dict. + """ + src_word, src_pos, src_slf_attn_bias, src_max_len = pad_batch_data( + [inst[0] for inst in insts], src_pad_idx, n_head, is_target=False) + trg_word, trg_pos, trg_slf_attn_bias, trg_max_len = pad_batch_data( + [inst[1] for inst in insts], trg_pad_idx, n_head, is_target=True) + trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :], + [1, 1, trg_max_len, 1]).astype("float32") + + # These shape tensors are used in reshape_op. + src_data_shape = np.array([len(insts), src_max_len, d_model], dtype="int32") + trg_data_shape = np.array([len(insts), trg_max_len, d_model], dtype="int32") + src_slf_attn_pre_softmax_shape = np.array( + [-1, src_slf_attn_bias.shape[-1]], dtype="int32") + src_slf_attn_post_softmax_shape = np.array( + src_slf_attn_bias.shape, dtype="int32") + trg_slf_attn_pre_softmax_shape = np.array( + [-1, trg_slf_attn_bias.shape[-1]], dtype="int32") + trg_slf_attn_post_softmax_shape = np.array( + trg_slf_attn_bias.shape, dtype="int32") + trg_src_attn_pre_softmax_shape = np.array( + [-1, trg_src_attn_bias.shape[-1]], dtype="int32") + trg_src_attn_post_softmax_shape = np.array( + trg_src_attn_bias.shape, dtype="int32") + + lbl_word, lbl_weight = pad_batch_data( + [inst[2] for inst in insts], + trg_pad_idx, + n_head, + is_target=False, + is_label=True, + return_attn_bias=False, + return_max_len=False) + + input_dict = dict( + zip(input_data_names, [ + src_word, src_pos, src_slf_attn_bias, src_data_shape, + src_slf_attn_pre_softmax_shape, src_slf_attn_post_softmax_shape, + trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, + trg_data_shape, trg_slf_attn_pre_softmax_shape, + trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, + trg_src_attn_post_softmax_shape, lbl_word, lbl_weight + ])) + return input_dict + + +def main(): + place = fluid.CUDAPlace(0) if TrainTaskConfig.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + sum_cost, avg_cost, predict, token_num = transformer( + ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, + ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, + ModelHyperParams.n_head, ModelHyperParams.d_key, + ModelHyperParams.d_value, ModelHyperParams.d_model, + ModelHyperParams.d_inner_hid, ModelHyperParams.dropout) + + lr_scheduler = LearningRateScheduler(ModelHyperParams.d_model, + TrainTaskConfig.warmup_steps, place, + TrainTaskConfig.learning_rate) + optimizer = fluid.optimizer.Adam( + learning_rate=lr_scheduler.learning_rate, + beta1=TrainTaskConfig.beta1, + beta2=TrainTaskConfig.beta2, + epsilon=TrainTaskConfig.eps) + optimizer.minimize(avg_cost if TrainTaskConfig.use_avg_cost else sum_cost) + + train_data = paddle.batch( + paddle.reader.shuffle( + nist_data_provider.train("data", ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + buf_size=100000), + batch_size=TrainTaskConfig.batch_size) + + # Program to do validation. + '''test_program = fluid.default_main_program().clone() + with fluid.program_guard(test_program): + test_program = fluid.io.get_inference_program([avg_cost]) + val_data = paddle.batch( + nist_data_provider.train("data", ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=TrainTaskConfig.batch_size)''' + + def test(exe): + test_total_cost = 0 + test_total_token = 0 + for batch_id, data in enumerate(val_data()): + data_input = prepare_batch_input( + data, encoder_input_data_names + decoder_input_data_names[:-1] + + label_data_names, ModelHyperParams.eos_idx, + ModelHyperParams.eos_idx, ModelHyperParams.n_head, + ModelHyperParams.d_model) + test_sum_cost, test_token_num = exe.run( + test_program, + feed=data_input, + fetch_list=[sum_cost, token_num], + use_program_cache=True) + test_total_cost += test_sum_cost + test_total_token += test_token_num + test_avg_cost = test_total_cost / test_total_token + test_ppl = np.exp([min(test_avg_cost, 100)]) + return test_avg_cost, test_ppl + + # Initialize the parameters. + exe.run(fluid.framework.default_startup_program()) + for pos_enc_param_name in pos_enc_param_names: + pos_enc_param = fluid.global_scope().find_var( + pos_enc_param_name).get_tensor() + pos_enc_param.set( + position_encoding_init(ModelHyperParams.max_length + 1, + ModelHyperParams.d_model), place) + + for pass_id in xrange(TrainTaskConfig.pass_num): + pass_start_time = time.time() + for batch_id, data in enumerate(train_data()): + if len(data) != TrainTaskConfig.batch_size: + continue + data_input = prepare_batch_input( + data, encoder_input_data_names + decoder_input_data_names[:-1] + + label_data_names, ModelHyperParams.eos_idx, + ModelHyperParams.eos_idx, ModelHyperParams.n_head, + ModelHyperParams.d_model) + lr_scheduler.update_learning_rate(data_input) + outs = exe.run(fluid.framework.default_main_program(), + feed=data_input, + fetch_list=[sum_cost, avg_cost], + use_program_cache=True) + sum_cost_val, avg_cost_val = np.array(outs[0]), np.array(outs[1]) + print("epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f" % + (pass_id, batch_id, sum_cost_val, avg_cost_val, + np.exp([min(avg_cost_val[0], 100)]))) + # Validate and save the model for inference. + #val_avg_cost, val_ppl = test(exe) + pass_end_time = time.time() + time_consumed = pass_end_time - pass_start_time + print("pass_id = " + str(pass_id) + " time_consumed = " + + str(time_consumed)) + #print("epoch: %d, val avg loss: %f, val ppl: %f, " + # "consumed %fs" % (pass_id, val_avg_cost, val_ppl, time_consumed)) + fluid.io.save_inference_model( + os.path.join(TrainTaskConfig.model_dir, + "pass_" + str(pass_id) + ".infer.model"), + encoder_input_data_names + decoder_input_data_names[:-1], + [predict], exe) + + +if __name__ == "__main__": + main() +